diff --git a/out/checkpoint-16000/config.json b/out/checkpoint-16000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..16f06bb1cdbf882eb90d57ea1906b3790e298a3f --- /dev/null +++ b/out/checkpoint-16000/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "./models/checkpoint-10000", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1877, + "pad_token_id": 1026, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 6027 +} diff --git a/out/checkpoint-16000/generation_config.json b/out/checkpoint-16000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..51f4dbe1c89cfa9da69401685604ff16254d9d20 --- /dev/null +++ b/out/checkpoint-16000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "pad_token_id": 1026, + "transformers_version": "4.41.2" +} diff --git a/out/checkpoint-16000/model.safetensors b/out/checkpoint-16000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad36f984fcc7994284e1611843cc08bae76fb089 --- /dev/null +++ b/out/checkpoint-16000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790d92381e9ac59216e77171ddbe07f0a1801bf7a2b92d973502b74c82d92d8f +size 364520064 diff --git a/out/checkpoint-16000/optimizer.pt b/out/checkpoint-16000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..94000e678ef29ee61f9b982428e317be33346dec --- /dev/null +++ b/out/checkpoint-16000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575b6bd6a3caa7784c5ada6c62b42891bd7e2b3de474d707eee963ec0a1baff3 +size 729134010 diff --git a/out/checkpoint-16000/rng_state.pth b/out/checkpoint-16000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f1ec1b03fa5ec9d4777f0079e7b61b053b9076a2 --- /dev/null +++ b/out/checkpoint-16000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e65fc304061e243c3d875844eb1e2de46e5ebf63e7c98f252c80699f6db03b0 +size 14244 diff --git a/out/checkpoint-16000/scheduler.pt b/out/checkpoint-16000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec47bdceef13f3f466d5790a618f0cc537a2fc28 --- /dev/null +++ b/out/checkpoint-16000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3522b54b582fe1a5f40833ed1a75ccde72e3cb67b2e108505119d9a09a4655ae +size 1064 diff --git a/out/checkpoint-16000/special_tokens_map.json b/out/checkpoint-16000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9fa6207c25267215ce16bfacdcb9089df3e897 --- /dev/null +++ b/out/checkpoint-16000/special_tokens_map.json @@ -0,0 +1,9 @@ +{ + "pad_token": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-16000/tokenizer.json b/out/checkpoint-16000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..2bf66a33fda75b69f9b1a9597987f418f5acfb49 --- /dev/null +++ b/out/checkpoint-16000/tokenizer.json @@ -0,0 +1,20279 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|audio:0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|audio:1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|audio:2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "<|audio:3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "<|audio:4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 5, + "content": "<|audio:5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 6, + "content": "<|audio:6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 7, + "content": "<|audio:7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 8, + "content": "<|audio:8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 9, + "content": "<|audio:9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 10, + "content": "<|audio:10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 11, + "content": "<|audio:11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 12, + "content": "<|audio:12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 13, + "content": "<|audio:13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 14, + "content": "<|audio:14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 15, + "content": "<|audio:15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 16, + "content": "<|audio:16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 17, + "content": "<|audio:17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 18, + "content": "<|audio:18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 19, + "content": "<|audio:19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 20, + "content": "<|audio:20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 21, + "content": "<|audio:21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 22, + "content": "<|audio:22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 23, + "content": "<|audio:23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 24, + "content": "<|audio:24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 25, + "content": "<|audio:25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 26, + "content": "<|audio:26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 27, + "content": "<|audio:27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 28, + "content": "<|audio:28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 29, + "content": "<|audio:29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 30, + "content": "<|audio:30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 31, + "content": "<|audio:31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 32, + "content": "<|audio:32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 33, + "content": "<|audio:33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 34, + "content": "<|audio:34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 35, + "content": "<|audio:35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 36, + "content": "<|audio:36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 37, + "content": "<|audio:37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 38, + "content": "<|audio:38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 39, + "content": "<|audio:39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 40, + "content": "<|audio:40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 41, + "content": "<|audio:41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 42, + "content": "<|audio:42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 43, + "content": "<|audio:43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 44, + "content": "<|audio:44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 45, + "content": "<|audio:45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 46, + "content": "<|audio:46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 47, + "content": "<|audio:47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 48, + "content": "<|audio:48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 49, + "content": "<|audio:49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 50, + "content": "<|audio:50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 51, + "content": "<|audio:51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 52, + "content": "<|audio:52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 53, + "content": "<|audio:53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 54, + "content": "<|audio:54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 55, + "content": "<|audio:55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 56, + "content": "<|audio:56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 57, + "content": "<|audio:57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 58, + "content": "<|audio:58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 59, + "content": "<|audio:59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 60, + "content": "<|audio:60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 61, + "content": "<|audio:61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 62, + "content": "<|audio:62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 63, + "content": "<|audio:63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 64, + "content": "<|audio:64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 65, + "content": "<|audio:65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 66, + "content": "<|audio:66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 67, + "content": "<|audio:67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 68, + "content": "<|audio:68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 69, + "content": "<|audio:69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 70, + "content": "<|audio:70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 71, + "content": "<|audio:71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 72, + "content": "<|audio:72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 73, + "content": "<|audio:73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 74, + "content": "<|audio:74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 75, + "content": "<|audio:75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 76, + "content": "<|audio:76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 77, + "content": "<|audio:77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 78, + "content": "<|audio:78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 79, + "content": "<|audio:79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 80, + "content": "<|audio:80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 81, + "content": "<|audio:81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 82, + "content": "<|audio:82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 83, + "content": "<|audio:83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 84, + "content": "<|audio:84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 85, + "content": "<|audio:85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 86, + "content": "<|audio:86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 87, + "content": "<|audio:87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 88, + "content": "<|audio:88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 89, + "content": "<|audio:89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 90, + "content": "<|audio:90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 91, + "content": "<|audio:91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 92, + "content": "<|audio:92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 93, + "content": "<|audio:93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 94, + "content": "<|audio:94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 95, + "content": "<|audio:95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 96, + "content": "<|audio:96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 97, + "content": "<|audio:97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 98, + "content": "<|audio:98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 99, + "content": "<|audio:99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 100, + "content": "<|audio:100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 101, + "content": "<|audio:101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 102, + "content": "<|audio:102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 103, + "content": "<|audio:103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 104, + "content": "<|audio:104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 105, + "content": "<|audio:105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 106, + "content": "<|audio:106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 107, + "content": "<|audio:107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 108, + "content": "<|audio:108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 109, + "content": "<|audio:109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 110, + "content": "<|audio:110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 111, + "content": "<|audio:111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 112, + "content": "<|audio:112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 113, + "content": "<|audio:113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 114, + "content": "<|audio:114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 115, + "content": "<|audio:115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 116, + "content": "<|audio:116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 117, + "content": "<|audio:117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 118, + "content": "<|audio:118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 119, + "content": "<|audio:119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 120, + "content": "<|audio:120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 121, + "content": "<|audio:121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 122, + "content": "<|audio:122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 123, + "content": "<|audio:123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 124, + "content": "<|audio:124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 125, + "content": "<|audio:125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 126, + "content": "<|audio:126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127, + "content": "<|audio:127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128, + "content": "<|audio:128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 129, + "content": "<|audio:129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 130, + "content": "<|audio:130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 131, + "content": "<|audio:131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 132, + "content": "<|audio:132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 133, + "content": "<|audio:133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 134, + "content": "<|audio:134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 135, + "content": "<|audio:135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 136, + "content": "<|audio:136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 137, + "content": "<|audio:137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 138, + "content": "<|audio:138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 139, + "content": "<|audio:139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 140, + "content": "<|audio:140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 141, + "content": "<|audio:141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 142, + "content": "<|audio:142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 143, + "content": "<|audio:143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 144, + "content": "<|audio:144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 145, + "content": "<|audio:145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 146, + "content": "<|audio:146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 147, + "content": "<|audio:147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 148, + "content": "<|audio:148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 149, + "content": "<|audio:149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 150, + "content": "<|audio:150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 151, + "content": "<|audio:151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 152, + "content": "<|audio:152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 153, + "content": "<|audio:153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 154, + "content": "<|audio:154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 155, + "content": "<|audio:155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 156, + "content": "<|audio:156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 157, + "content": "<|audio:157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 158, + "content": "<|audio:158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 159, + "content": "<|audio:159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 160, + "content": "<|audio:160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 161, + "content": "<|audio:161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 162, + "content": "<|audio:162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 163, + "content": "<|audio:163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 164, + "content": "<|audio:164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 165, + "content": "<|audio:165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 166, + "content": "<|audio:166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 167, + "content": "<|audio:167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 168, + "content": "<|audio:168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 169, + "content": "<|audio:169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 170, + "content": "<|audio:170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 171, + "content": "<|audio:171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 172, + "content": "<|audio:172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 173, + "content": "<|audio:173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 174, + "content": "<|audio:174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 175, + "content": "<|audio:175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 176, + "content": "<|audio:176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 177, + "content": "<|audio:177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 178, + "content": "<|audio:178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 179, + "content": "<|audio:179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 180, + "content": "<|audio:180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 181, + "content": "<|audio:181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 182, + "content": "<|audio:182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 183, + "content": "<|audio:183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 184, + "content": "<|audio:184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 185, + "content": "<|audio:185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 186, + "content": "<|audio:186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 187, + "content": "<|audio:187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 188, + "content": "<|audio:188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 189, + "content": "<|audio:189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 190, + "content": "<|audio:190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 191, + "content": "<|audio:191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 192, + "content": "<|audio:192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 193, + "content": "<|audio:193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 194, + "content": "<|audio:194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 195, + "content": "<|audio:195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 196, + "content": "<|audio:196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 197, + "content": "<|audio:197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 198, + "content": "<|audio:198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 199, + "content": "<|audio:199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 200, + "content": "<|audio:200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 201, + "content": "<|audio:201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 202, + "content": "<|audio:202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 203, + "content": "<|audio:203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 204, + "content": "<|audio:204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 205, + "content": "<|audio:205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 206, + "content": "<|audio:206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 207, + "content": "<|audio:207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 208, + "content": "<|audio:208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 209, + "content": "<|audio:209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 210, + "content": "<|audio:210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 211, + "content": "<|audio:211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 212, + "content": "<|audio:212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 213, + "content": "<|audio:213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 214, + "content": "<|audio:214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 215, + "content": "<|audio:215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 216, + "content": "<|audio:216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 217, + "content": "<|audio:217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 218, + "content": "<|audio:218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 219, + "content": "<|audio:219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 220, + "content": "<|audio:220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 221, + "content": "<|audio:221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 222, + "content": "<|audio:222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 223, + "content": "<|audio:223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 224, + "content": "<|audio:224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 225, + "content": "<|audio:225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 226, + "content": "<|audio:226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 227, + "content": "<|audio:227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 228, + "content": "<|audio:228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 229, + "content": "<|audio:229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 230, + "content": "<|audio:230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 231, + "content": "<|audio:231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 232, + "content": "<|audio:232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 233, + "content": "<|audio:233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 234, + "content": "<|audio:234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 235, + "content": "<|audio:235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 236, + "content": "<|audio:236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 237, + "content": "<|audio:237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 238, + "content": "<|audio:238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 239, + "content": "<|audio:239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 240, + "content": "<|audio:240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 241, + "content": "<|audio:241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 242, + "content": "<|audio:242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 243, + "content": "<|audio:243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 244, + "content": "<|audio:244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 245, + "content": "<|audio:245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 246, + "content": "<|audio:246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 247, + "content": "<|audio:247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 248, + "content": "<|audio:248|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 249, + "content": "<|audio:249|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 250, + "content": "<|audio:250|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 251, + "content": "<|audio:251|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 252, + "content": "<|audio:252|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 253, + "content": "<|audio:253|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 254, + "content": "<|audio:254|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 255, + "content": "<|audio:255|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 256, + "content": "<|audio:256|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 257, + "content": "<|audio:257|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 258, + "content": "<|audio:258|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "<|audio:259|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 260, + "content": "<|audio:260|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 261, + "content": "<|audio:261|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 262, + "content": "<|audio:262|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 263, + "content": "<|audio:263|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 264, + "content": "<|audio:264|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 265, + "content": "<|audio:265|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 266, + "content": "<|audio:266|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 267, + "content": "<|audio:267|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 268, + "content": "<|audio:268|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 269, + "content": "<|audio:269|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 270, + "content": "<|audio:270|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 271, + "content": "<|audio:271|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 272, + "content": "<|audio:272|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 273, + "content": "<|audio:273|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 274, + "content": "<|audio:274|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 275, + "content": "<|audio:275|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 276, + "content": "<|audio:276|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 277, + "content": "<|audio:277|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 278, + "content": "<|audio:278|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 279, + "content": "<|audio:279|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 280, + "content": "<|audio:280|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 281, + "content": "<|audio:281|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 282, + "content": "<|audio:282|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 283, + "content": "<|audio:283|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 284, + "content": "<|audio:284|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 285, + "content": "<|audio:285|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 286, + "content": "<|audio:286|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 287, + "content": "<|audio:287|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 288, + "content": "<|audio:288|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 289, + "content": "<|audio:289|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 290, + "content": "<|audio:290|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 291, + "content": "<|audio:291|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 292, + "content": "<|audio:292|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 293, + "content": "<|audio:293|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 294, + "content": "<|audio:294|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 295, + "content": "<|audio:295|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 296, + "content": "<|audio:296|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 297, + "content": "<|audio:297|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 298, + "content": "<|audio:298|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 299, + "content": "<|audio:299|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 300, + "content": "<|audio:300|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 301, + "content": "<|audio:301|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 302, + "content": "<|audio:302|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 303, + "content": "<|audio:303|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 304, + "content": "<|audio:304|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 305, + "content": "<|audio:305|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 306, + "content": "<|audio:306|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 307, + "content": "<|audio:307|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 308, + "content": "<|audio:308|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 309, + "content": "<|audio:309|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 310, + "content": "<|audio:310|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 311, + "content": "<|audio:311|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 312, + "content": "<|audio:312|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 313, + "content": "<|audio:313|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 314, + "content": "<|audio:314|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 315, + "content": "<|audio:315|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 316, + "content": "<|audio:316|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 317, + "content": "<|audio:317|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 318, + "content": "<|audio:318|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 319, + "content": "<|audio:319|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 320, + "content": "<|audio:320|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 321, + "content": "<|audio:321|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 322, + "content": "<|audio:322|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 323, + "content": "<|audio:323|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 324, + "content": "<|audio:324|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 325, + "content": "<|audio:325|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 326, + "content": "<|audio:326|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 327, + "content": "<|audio:327|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 328, + "content": "<|audio:328|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 329, + "content": "<|audio:329|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 330, + "content": "<|audio:330|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 331, + "content": "<|audio:331|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 332, + "content": "<|audio:332|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 333, + "content": "<|audio:333|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 334, + "content": "<|audio:334|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 335, + "content": "<|audio:335|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 336, + "content": "<|audio:336|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 337, + "content": "<|audio:337|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 338, + "content": "<|audio:338|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 339, + "content": "<|audio:339|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 340, + "content": "<|audio:340|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 341, + "content": "<|audio:341|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 342, + "content": "<|audio:342|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 343, + "content": "<|audio:343|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 344, + "content": "<|audio:344|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 345, + "content": "<|audio:345|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 346, + "content": "<|audio:346|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 347, + "content": "<|audio:347|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 348, + "content": "<|audio:348|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 349, + "content": "<|audio:349|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 350, + "content": "<|audio:350|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 351, + "content": "<|audio:351|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 352, + "content": "<|audio:352|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 353, + "content": "<|audio:353|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 354, + "content": "<|audio:354|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 355, + "content": "<|audio:355|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 356, + "content": "<|audio:356|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 357, + "content": "<|audio:357|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 358, + "content": "<|audio:358|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 359, + "content": "<|audio:359|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 360, + "content": "<|audio:360|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 361, + "content": "<|audio:361|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 362, + "content": "<|audio:362|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 363, + "content": "<|audio:363|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 364, + "content": "<|audio:364|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 365, + "content": "<|audio:365|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 366, + "content": "<|audio:366|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 367, + "content": "<|audio:367|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 368, + "content": "<|audio:368|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 369, + "content": "<|audio:369|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 370, + "content": "<|audio:370|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 371, + "content": "<|audio:371|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 372, + "content": "<|audio:372|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 373, + "content": "<|audio:373|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 374, + "content": "<|audio:374|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 375, + "content": "<|audio:375|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 376, + "content": "<|audio:376|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 377, + "content": "<|audio:377|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 378, + "content": "<|audio:378|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 379, + "content": "<|audio:379|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 380, + "content": "<|audio:380|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 381, + "content": "<|audio:381|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 382, + "content": "<|audio:382|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 383, + "content": "<|audio:383|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 384, + "content": "<|audio:384|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 385, + "content": "<|audio:385|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 386, + "content": "<|audio:386|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 387, + "content": "<|audio:387|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 388, + "content": "<|audio:388|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 389, + "content": "<|audio:389|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 390, + "content": "<|audio:390|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 391, + "content": "<|audio:391|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 392, + "content": "<|audio:392|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 393, + "content": "<|audio:393|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 394, + "content": "<|audio:394|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 395, + "content": "<|audio:395|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 396, + "content": "<|audio:396|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 397, + "content": "<|audio:397|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 398, + "content": "<|audio:398|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 399, + "content": "<|audio:399|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 400, + "content": "<|audio:400|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 401, + "content": "<|audio:401|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 402, + "content": "<|audio:402|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 403, + "content": "<|audio:403|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 404, + "content": "<|audio:404|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 405, + "content": "<|audio:405|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 406, + "content": "<|audio:406|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 407, + "content": "<|audio:407|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 408, + "content": "<|audio:408|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 409, + "content": "<|audio:409|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 410, + "content": "<|audio:410|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 411, + "content": "<|audio:411|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 412, + "content": "<|audio:412|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 413, + "content": "<|audio:413|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 414, + "content": "<|audio:414|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 415, + "content": "<|audio:415|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 416, + "content": "<|audio:416|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 417, + "content": "<|audio:417|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 418, + "content": "<|audio:418|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 419, + "content": "<|audio:419|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 420, + "content": "<|audio:420|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 421, + "content": "<|audio:421|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 422, + "content": "<|audio:422|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 423, + "content": "<|audio:423|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 424, + "content": "<|audio:424|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 425, + "content": "<|audio:425|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 426, + "content": "<|audio:426|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 427, + "content": "<|audio:427|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 428, + "content": "<|audio:428|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 429, + "content": "<|audio:429|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 430, + "content": "<|audio:430|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 431, + "content": "<|audio:431|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 432, + "content": "<|audio:432|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 433, + "content": "<|audio:433|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 434, + "content": "<|audio:434|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 435, + "content": "<|audio:435|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 436, + "content": "<|audio:436|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 437, + "content": "<|audio:437|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 438, + "content": "<|audio:438|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 439, + "content": "<|audio:439|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 440, + "content": "<|audio:440|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 441, + "content": "<|audio:441|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 442, + "content": "<|audio:442|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 443, + "content": "<|audio:443|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 444, + "content": "<|audio:444|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 445, + "content": "<|audio:445|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 446, + "content": "<|audio:446|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 447, + "content": "<|audio:447|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 448, + "content": "<|audio:448|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 449, + "content": "<|audio:449|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 450, + "content": "<|audio:450|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 451, + "content": "<|audio:451|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 452, + "content": "<|audio:452|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 453, + "content": "<|audio:453|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 454, + "content": "<|audio:454|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 455, + "content": "<|audio:455|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 456, + "content": "<|audio:456|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 457, + "content": "<|audio:457|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 458, + "content": "<|audio:458|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 459, + "content": "<|audio:459|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 460, + "content": "<|audio:460|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 461, + "content": "<|audio:461|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 462, + "content": "<|audio:462|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 463, + "content": "<|audio:463|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 464, + "content": "<|audio:464|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 465, + "content": "<|audio:465|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 466, + "content": "<|audio:466|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 467, + "content": "<|audio:467|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 468, + "content": "<|audio:468|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 469, + "content": "<|audio:469|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 470, + "content": "<|audio:470|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 471, + "content": "<|audio:471|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 472, + "content": "<|audio:472|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 473, + "content": "<|audio:473|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 474, + "content": "<|audio:474|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 475, + "content": "<|audio:475|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 476, + "content": "<|audio:476|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 477, + "content": "<|audio:477|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 478, + "content": "<|audio:478|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 479, + "content": "<|audio:479|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 480, + "content": "<|audio:480|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 481, + "content": "<|audio:481|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 482, + "content": "<|audio:482|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 483, + "content": "<|audio:483|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 484, + "content": "<|audio:484|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 485, + "content": "<|audio:485|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 486, + "content": "<|audio:486|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 487, + "content": "<|audio:487|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 488, + "content": "<|audio:488|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 489, + "content": "<|audio:489|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 490, + "content": "<|audio:490|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 491, + "content": "<|audio:491|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 492, + "content": "<|audio:492|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 493, + "content": "<|audio:493|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 494, + "content": "<|audio:494|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 495, + "content": "<|audio:495|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 496, + "content": "<|audio:496|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 497, + "content": "<|audio:497|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 498, + "content": "<|audio:498|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 499, + "content": "<|audio:499|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 500, + "content": "<|audio:500|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 501, + "content": "<|audio:501|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 502, + "content": "<|audio:502|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 503, + "content": "<|audio:503|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 504, + "content": "<|audio:504|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 505, + "content": "<|audio:505|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 506, + "content": "<|audio:506|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 507, + "content": "<|audio:507|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 508, + "content": "<|audio:508|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 509, + "content": "<|audio:509|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 510, + "content": "<|audio:510|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 511, + "content": "<|audio:511|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 512, + "content": "<|audio:512|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 513, + "content": "<|audio:513|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 514, + "content": "<|audio:514|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 515, + "content": "<|audio:515|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 516, + "content": "<|audio:516|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 517, + "content": "<|audio:517|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 518, + "content": "<|audio:518|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 519, + "content": "<|audio:519|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 520, + "content": "<|audio:520|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 521, + "content": "<|audio:521|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 522, + "content": "<|audio:522|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 523, + "content": "<|audio:523|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 524, + "content": "<|audio:524|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 525, + "content": "<|audio:525|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 526, + "content": "<|audio:526|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 527, + "content": "<|audio:527|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 528, + "content": "<|audio:528|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 529, + "content": "<|audio:529|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 530, + "content": "<|audio:530|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 531, + "content": "<|audio:531|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 532, + "content": "<|audio:532|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 533, + "content": "<|audio:533|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 534, + "content": "<|audio:534|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 535, + "content": "<|audio:535|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 536, + "content": "<|audio:536|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 537, + "content": "<|audio:537|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 538, + "content": "<|audio:538|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 539, + "content": "<|audio:539|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 540, + "content": "<|audio:540|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 541, + "content": "<|audio:541|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 542, + "content": "<|audio:542|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 543, + "content": "<|audio:543|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 544, + "content": "<|audio:544|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 545, + "content": "<|audio:545|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 546, + "content": "<|audio:546|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 547, + "content": "<|audio:547|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 548, + "content": "<|audio:548|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 549, + "content": "<|audio:549|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 550, + "content": "<|audio:550|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 551, + "content": "<|audio:551|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 552, + "content": "<|audio:552|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 553, + "content": "<|audio:553|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 554, + "content": "<|audio:554|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 555, + "content": "<|audio:555|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 556, + "content": "<|audio:556|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 557, + "content": "<|audio:557|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 558, + "content": "<|audio:558|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 559, + "content": "<|audio:559|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 560, + "content": "<|audio:560|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 561, + "content": "<|audio:561|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 562, + "content": "<|audio:562|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 563, + "content": "<|audio:563|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 564, + "content": "<|audio:564|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 565, + "content": "<|audio:565|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 566, + "content": "<|audio:566|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 567, + "content": "<|audio:567|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 568, + "content": "<|audio:568|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 569, + "content": "<|audio:569|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 570, + "content": "<|audio:570|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 571, + "content": "<|audio:571|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 572, + "content": "<|audio:572|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 573, + "content": "<|audio:573|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 574, + "content": "<|audio:574|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 575, + "content": "<|audio:575|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 576, + "content": "<|audio:576|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 577, + "content": "<|audio:577|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 578, + "content": "<|audio:578|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 579, + "content": "<|audio:579|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 580, + "content": "<|audio:580|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 581, + "content": "<|audio:581|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 582, + "content": "<|audio:582|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 583, + "content": "<|audio:583|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 584, + "content": "<|audio:584|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 585, + "content": "<|audio:585|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 586, + "content": "<|audio:586|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 587, + "content": "<|audio:587|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 588, + "content": "<|audio:588|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 589, + "content": "<|audio:589|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 590, + "content": "<|audio:590|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 591, + "content": "<|audio:591|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 592, + "content": "<|audio:592|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 593, + "content": "<|audio:593|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 594, + "content": "<|audio:594|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 595, + "content": "<|audio:595|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 596, + "content": "<|audio:596|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 597, + "content": "<|audio:597|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 598, + "content": "<|audio:598|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 599, + "content": "<|audio:599|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 600, + "content": "<|audio:600|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 601, + "content": "<|audio:601|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 602, + "content": "<|audio:602|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 603, + "content": "<|audio:603|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 604, + "content": "<|audio:604|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "<|audio:605|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "<|audio:606|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "<|audio:607|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "<|audio:608|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "<|audio:609|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "<|audio:610|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "<|audio:611|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "<|audio:612|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "<|audio:613|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "<|audio:614|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "<|audio:615|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "<|audio:616|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "<|audio:617|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "<|audio:618|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "<|audio:619|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "<|audio:620|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "<|audio:621|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "<|audio:622|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "<|audio:623|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "<|audio:624|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "<|audio:625|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "<|audio:626|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "<|audio:627|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "<|audio:628|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "<|audio:629|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "<|audio:630|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "<|audio:631|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "<|audio:632|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "<|audio:633|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "<|audio:634|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "<|audio:635|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "<|audio:636|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "<|audio:637|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "<|audio:638|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "<|audio:639|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 640, + "content": "<|audio:640|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 641, + "content": "<|audio:641|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 642, + "content": "<|audio:642|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 643, + "content": "<|audio:643|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 644, + "content": "<|audio:644|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 645, + "content": "<|audio:645|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 646, + "content": "<|audio:646|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 647, + "content": "<|audio:647|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 648, + "content": "<|audio:648|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 649, + "content": "<|audio:649|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 650, + "content": "<|audio:650|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 651, + "content": "<|audio:651|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 652, + "content": "<|audio:652|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 653, + "content": "<|audio:653|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 654, + "content": "<|audio:654|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 655, + "content": "<|audio:655|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 656, + "content": "<|audio:656|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 657, + "content": "<|audio:657|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 658, + "content": "<|audio:658|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 659, + "content": "<|audio:659|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 660, + "content": "<|audio:660|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 661, + "content": "<|audio:661|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 662, + "content": "<|audio:662|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 663, + "content": "<|audio:663|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 664, + "content": "<|audio:664|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 665, + "content": "<|audio:665|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 666, + "content": "<|audio:666|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 667, + "content": "<|audio:667|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 668, + "content": "<|audio:668|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 669, + "content": "<|audio:669|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 670, + "content": "<|audio:670|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 671, + "content": "<|audio:671|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 672, + "content": "<|audio:672|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 673, + "content": "<|audio:673|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 674, + "content": "<|audio:674|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 675, + "content": "<|audio:675|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 676, + "content": "<|audio:676|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 677, + "content": "<|audio:677|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 678, + "content": "<|audio:678|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 679, + "content": "<|audio:679|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 680, + "content": "<|audio:680|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 681, + "content": "<|audio:681|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 682, + "content": "<|audio:682|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 683, + "content": "<|audio:683|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 684, + "content": "<|audio:684|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 685, + "content": "<|audio:685|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 686, + "content": "<|audio:686|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 687, + "content": "<|audio:687|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 688, + "content": "<|audio:688|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 689, + "content": "<|audio:689|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 690, + "content": "<|audio:690|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 691, + "content": "<|audio:691|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 692, + "content": "<|audio:692|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 693, + "content": "<|audio:693|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 694, + "content": "<|audio:694|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "<|audio:695|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "<|audio:696|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "<|audio:697|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "<|audio:698|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "<|audio:699|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "<|audio:700|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "<|audio:701|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "<|audio:702|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "<|audio:703|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 704, + "content": "<|audio:704|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 705, + "content": "<|audio:705|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 706, + "content": "<|audio:706|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 707, + "content": "<|audio:707|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 708, + "content": "<|audio:708|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 709, + "content": "<|audio:709|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 710, + "content": "<|audio:710|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 711, + "content": "<|audio:711|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 712, + "content": "<|audio:712|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 713, + "content": "<|audio:713|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 714, + "content": "<|audio:714|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 715, + "content": "<|audio:715|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 716, + "content": "<|audio:716|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 717, + "content": "<|audio:717|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 718, + "content": "<|audio:718|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 719, + "content": "<|audio:719|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 720, + "content": "<|audio:720|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 721, + "content": "<|audio:721|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 722, + "content": "<|audio:722|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 723, + "content": "<|audio:723|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 724, + "content": "<|audio:724|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 725, + "content": "<|audio:725|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 726, + "content": "<|audio:726|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 727, + "content": "<|audio:727|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 728, + "content": "<|audio:728|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 729, + "content": "<|audio:729|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 730, + "content": "<|audio:730|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 731, + "content": "<|audio:731|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 732, + "content": "<|audio:732|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 733, + "content": "<|audio:733|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 734, + "content": "<|audio:734|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 735, + "content": "<|audio:735|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 736, + "content": "<|audio:736|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 737, + "content": "<|audio:737|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 738, + "content": "<|audio:738|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 739, + "content": "<|audio:739|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 740, + "content": "<|audio:740|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 741, + "content": "<|audio:741|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 742, + "content": "<|audio:742|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 743, + "content": "<|audio:743|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 744, + "content": "<|audio:744|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 745, + "content": "<|audio:745|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 746, + "content": "<|audio:746|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 747, + "content": "<|audio:747|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 748, + "content": "<|audio:748|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 749, + "content": "<|audio:749|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 750, + "content": "<|audio:750|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 751, + "content": "<|audio:751|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 752, + "content": "<|audio:752|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 753, + "content": "<|audio:753|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 754, + "content": "<|audio:754|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 755, + "content": "<|audio:755|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 756, + "content": "<|audio:756|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 757, + "content": "<|audio:757|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 758, + "content": "<|audio:758|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 759, + "content": "<|audio:759|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 760, + "content": "<|audio:760|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 761, + "content": "<|audio:761|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 762, + "content": "<|audio:762|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 763, + "content": "<|audio:763|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 764, + "content": "<|audio:764|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 765, + "content": "<|audio:765|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 766, + "content": "<|audio:766|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 767, + "content": "<|audio:767|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 768, + "content": "<|audio:768|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 769, + "content": "<|audio:769|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 770, + "content": "<|audio:770|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 771, + "content": "<|audio:771|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 772, + "content": "<|audio:772|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 773, + "content": "<|audio:773|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 774, + "content": "<|audio:774|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 775, + "content": "<|audio:775|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 776, + "content": "<|audio:776|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 777, + "content": "<|audio:777|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 778, + "content": "<|audio:778|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 779, + "content": "<|audio:779|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 780, + "content": "<|audio:780|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 781, + "content": "<|audio:781|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 782, + "content": "<|audio:782|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 783, + "content": "<|audio:783|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 784, + "content": "<|audio:784|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 785, + "content": "<|audio:785|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 786, + "content": "<|audio:786|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 787, + "content": "<|audio:787|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 788, + "content": "<|audio:788|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 789, + "content": "<|audio:789|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 790, + "content": "<|audio:790|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 791, + "content": "<|audio:791|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 792, + "content": "<|audio:792|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 793, + "content": "<|audio:793|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 794, + "content": "<|audio:794|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 795, + "content": "<|audio:795|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 796, + "content": "<|audio:796|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 797, + "content": "<|audio:797|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 798, + "content": "<|audio:798|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 799, + "content": "<|audio:799|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 800, + "content": "<|audio:800|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 801, + "content": "<|audio:801|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 802, + "content": "<|audio:802|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 803, + "content": "<|audio:803|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 804, + "content": "<|audio:804|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 805, + "content": "<|audio:805|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 806, + "content": "<|audio:806|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 807, + "content": "<|audio:807|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 808, + "content": "<|audio:808|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 809, + "content": "<|audio:809|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 810, + "content": "<|audio:810|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 811, + "content": "<|audio:811|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 812, + "content": "<|audio:812|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 813, + "content": "<|audio:813|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 814, + "content": "<|audio:814|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 815, + "content": "<|audio:815|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 816, + "content": "<|audio:816|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 817, + "content": "<|audio:817|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 818, + "content": "<|audio:818|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 819, + "content": "<|audio:819|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 820, + "content": "<|audio:820|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 821, + "content": "<|audio:821|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 822, + "content": "<|audio:822|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 823, + "content": "<|audio:823|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 824, + "content": "<|audio:824|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 825, + "content": "<|audio:825|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 826, + "content": "<|audio:826|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 827, + "content": "<|audio:827|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 828, + "content": "<|audio:828|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 829, + "content": "<|audio:829|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 830, + "content": "<|audio:830|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 831, + "content": "<|audio:831|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 832, + "content": "<|audio:832|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 833, + "content": "<|audio:833|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 834, + "content": "<|audio:834|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 835, + "content": "<|audio:835|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 836, + "content": "<|audio:836|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 837, + "content": "<|audio:837|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 838, + "content": "<|audio:838|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 839, + "content": "<|audio:839|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 840, + "content": "<|audio:840|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 841, + "content": "<|audio:841|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 842, + "content": "<|audio:842|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 843, + "content": "<|audio:843|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 844, + "content": "<|audio:844|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 845, + "content": "<|audio:845|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 846, + "content": "<|audio:846|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 847, + "content": "<|audio:847|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 848, + "content": "<|audio:848|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 849, + "content": "<|audio:849|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 850, + "content": "<|audio:850|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 851, + "content": "<|audio:851|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 852, + "content": "<|audio:852|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 853, + "content": "<|audio:853|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 854, + "content": "<|audio:854|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 855, + "content": "<|audio:855|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 856, + "content": "<|audio:856|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 857, + "content": "<|audio:857|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 858, + "content": "<|audio:858|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 859, + "content": "<|audio:859|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 860, + "content": "<|audio:860|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 861, + "content": "<|audio:861|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 862, + "content": "<|audio:862|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 863, + "content": "<|audio:863|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 864, + "content": "<|audio:864|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 865, + "content": "<|audio:865|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 866, + "content": "<|audio:866|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 867, + "content": "<|audio:867|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 868, + "content": "<|audio:868|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 869, + "content": "<|audio:869|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 870, + "content": "<|audio:870|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 871, + "content": "<|audio:871|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 872, + "content": "<|audio:872|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 873, + "content": "<|audio:873|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 874, + "content": "<|audio:874|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 875, + "content": "<|audio:875|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 876, + "content": "<|audio:876|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 877, + "content": "<|audio:877|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 878, + "content": "<|audio:878|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 879, + "content": "<|audio:879|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 880, + "content": "<|audio:880|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 881, + "content": "<|audio:881|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 882, + "content": "<|audio:882|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 883, + "content": "<|audio:883|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 884, + "content": "<|audio:884|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 885, + "content": "<|audio:885|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 886, + "content": "<|audio:886|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 887, + "content": "<|audio:887|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 888, + "content": "<|audio:888|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 889, + "content": "<|audio:889|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 890, + "content": "<|audio:890|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 891, + "content": "<|audio:891|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 892, + "content": "<|audio:892|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 893, + "content": "<|audio:893|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 894, + "content": "<|audio:894|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 895, + "content": "<|audio:895|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 896, + "content": "<|audio:896|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 897, + "content": "<|audio:897|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 898, + "content": "<|audio:898|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 899, + "content": "<|audio:899|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 900, + "content": "<|audio:900|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 901, + "content": "<|audio:901|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 902, + "content": "<|audio:902|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 903, + "content": "<|audio:903|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 904, + "content": "<|audio:904|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 905, + "content": "<|audio:905|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 906, + "content": "<|audio:906|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 907, + "content": "<|audio:907|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 908, + "content": "<|audio:908|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 909, + "content": "<|audio:909|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 910, + "content": "<|audio:910|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 911, + "content": "<|audio:911|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 912, + "content": "<|audio:912|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 913, + "content": "<|audio:913|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 914, + "content": "<|audio:914|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 915, + "content": "<|audio:915|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 916, + "content": "<|audio:916|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 917, + "content": "<|audio:917|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 918, + "content": "<|audio:918|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 919, + "content": "<|audio:919|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 920, + "content": "<|audio:920|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 921, + "content": "<|audio:921|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 922, + "content": "<|audio:922|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 923, + "content": "<|audio:923|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 924, + "content": "<|audio:924|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 925, + "content": "<|audio:925|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 926, + "content": "<|audio:926|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 927, + "content": "<|audio:927|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 928, + "content": "<|audio:928|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 929, + "content": "<|audio:929|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 930, + "content": "<|audio:930|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 931, + "content": "<|audio:931|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 932, + "content": "<|audio:932|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 933, + "content": "<|audio:933|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 934, + "content": "<|audio:934|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 935, + "content": "<|audio:935|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 936, + "content": "<|audio:936|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 937, + "content": "<|audio:937|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 938, + "content": "<|audio:938|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 939, + "content": "<|audio:939|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 940, + "content": "<|audio:940|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 941, + "content": "<|audio:941|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 942, + "content": "<|audio:942|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 943, + "content": "<|audio:943|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 944, + "content": "<|audio:944|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 945, + "content": "<|audio:945|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 946, + "content": "<|audio:946|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 947, + "content": "<|audio:947|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 948, + "content": "<|audio:948|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 949, + "content": "<|audio:949|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 950, + "content": "<|audio:950|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 951, + "content": "<|audio:951|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 952, + "content": "<|audio:952|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 953, + "content": "<|audio:953|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 954, + "content": "<|audio:954|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 955, + "content": "<|audio:955|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 956, + "content": "<|audio:956|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 957, + "content": "<|audio:957|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 958, + "content": "<|audio:958|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 959, + "content": "<|audio:959|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 960, + "content": "<|audio:960|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 961, + "content": "<|audio:961|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 962, + "content": "<|audio:962|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 963, + "content": "<|audio:963|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 964, + "content": "<|audio:964|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 965, + "content": "<|audio:965|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 966, + "content": "<|audio:966|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 967, + "content": "<|audio:967|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 968, + "content": "<|audio:968|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 969, + "content": "<|audio:969|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 970, + "content": "<|audio:970|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 971, + "content": "<|audio:971|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 972, + "content": "<|audio:972|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 973, + "content": "<|audio:973|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 974, + "content": "<|audio:974|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 975, + "content": "<|audio:975|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 976, + "content": "<|audio:976|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 977, + "content": "<|audio:977|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 978, + "content": "<|audio:978|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 979, + "content": "<|audio:979|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 980, + "content": "<|audio:980|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 981, + "content": "<|audio:981|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 982, + "content": "<|audio:982|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 983, + "content": "<|audio:983|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 984, + "content": "<|audio:984|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 985, + "content": "<|audio:985|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 986, + "content": "<|audio:986|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 987, + "content": "<|audio:987|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 988, + "content": "<|audio:988|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 989, + "content": "<|audio:989|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 990, + "content": "<|audio:990|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 991, + "content": "<|audio:991|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 992, + "content": "<|audio:992|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 993, + "content": "<|audio:993|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 994, + "content": "<|audio:994|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 995, + "content": "<|audio:995|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 996, + "content": "<|audio:996|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 997, + "content": "<|audio:997|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 998, + "content": "<|audio:998|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 999, + "content": "<|audio:999|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1000, + "content": "<|audio:1000|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1001, + "content": "<|audio:1001|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1002, + "content": "<|audio:1002|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1003, + "content": "<|audio:1003|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1004, + "content": "<|audio:1004|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1005, + "content": "<|audio:1005|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1006, + "content": "<|audio:1006|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1007, + "content": "<|audio:1007|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1008, + "content": "<|audio:1008|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1009, + "content": "<|audio:1009|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1010, + "content": "<|audio:1010|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1011, + "content": "<|audio:1011|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1012, + "content": "<|audio:1012|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1013, + "content": "<|audio:1013|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1014, + "content": "<|audio:1014|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1015, + "content": "<|audio:1015|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1016, + "content": "<|audio:1016|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1017, + "content": "<|audio:1017|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1018, + "content": "<|audio:1018|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1019, + "content": "<|audio:1019|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1020, + "content": "<|audio:1020|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1021, + "content": "<|audio:1021|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1022, + "content": "<|audio:1022|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1023, + "content": "<|audio:1023|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1024, + "content": "<|startoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1025, + "content": "<|endoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1026, + "content": "<|padding|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFKC" + }, + "pre_tokenizer": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "post_processor": null, + "decoder": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|audio:0|>": 0, + "<|audio:1|>": 1, + "<|audio:2|>": 2, + "<|audio:3|>": 3, + "<|audio:4|>": 4, + "<|audio:5|>": 5, + "<|audio:6|>": 6, + "<|audio:7|>": 7, + "<|audio:8|>": 8, + "<|audio:9|>": 9, + "<|audio:10|>": 10, + "<|audio:11|>": 11, + "<|audio:12|>": 12, + "<|audio:13|>": 13, + "<|audio:14|>": 14, + "<|audio:15|>": 15, + "<|audio:16|>": 16, + "<|audio:17|>": 17, + "<|audio:18|>": 18, + "<|audio:19|>": 19, + "<|audio:20|>": 20, + "<|audio:21|>": 21, + "<|audio:22|>": 22, + "<|audio:23|>": 23, + "<|audio:24|>": 24, + "<|audio:25|>": 25, + "<|audio:26|>": 26, + "<|audio:27|>": 27, + "<|audio:28|>": 28, + "<|audio:29|>": 29, + "<|audio:30|>": 30, + "<|audio:31|>": 31, + "<|audio:32|>": 32, + "<|audio:33|>": 33, + "<|audio:34|>": 34, + "<|audio:35|>": 35, + "<|audio:36|>": 36, + "<|audio:37|>": 37, + "<|audio:38|>": 38, + "<|audio:39|>": 39, + "<|audio:40|>": 40, + "<|audio:41|>": 41, + "<|audio:42|>": 42, + "<|audio:43|>": 43, + "<|audio:44|>": 44, + "<|audio:45|>": 45, + "<|audio:46|>": 46, + "<|audio:47|>": 47, + "<|audio:48|>": 48, + "<|audio:49|>": 49, + "<|audio:50|>": 50, + "<|audio:51|>": 51, + "<|audio:52|>": 52, + "<|audio:53|>": 53, + "<|audio:54|>": 54, + "<|audio:55|>": 55, + "<|audio:56|>": 56, + "<|audio:57|>": 57, + "<|audio:58|>": 58, + "<|audio:59|>": 59, + "<|audio:60|>": 60, + "<|audio:61|>": 61, + "<|audio:62|>": 62, + "<|audio:63|>": 63, + "<|audio:64|>": 64, + "<|audio:65|>": 65, + "<|audio:66|>": 66, + "<|audio:67|>": 67, + "<|audio:68|>": 68, + "<|audio:69|>": 69, + "<|audio:70|>": 70, + "<|audio:71|>": 71, + "<|audio:72|>": 72, + "<|audio:73|>": 73, + "<|audio:74|>": 74, + "<|audio:75|>": 75, + "<|audio:76|>": 76, + "<|audio:77|>": 77, + "<|audio:78|>": 78, + "<|audio:79|>": 79, + "<|audio:80|>": 80, + "<|audio:81|>": 81, + "<|audio:82|>": 82, + "<|audio:83|>": 83, + "<|audio:84|>": 84, + "<|audio:85|>": 85, + "<|audio:86|>": 86, + "<|audio:87|>": 87, + "<|audio:88|>": 88, + "<|audio:89|>": 89, + "<|audio:90|>": 90, + "<|audio:91|>": 91, + "<|audio:92|>": 92, + "<|audio:93|>": 93, + "<|audio:94|>": 94, + "<|audio:95|>": 95, + "<|audio:96|>": 96, + "<|audio:97|>": 97, + "<|audio:98|>": 98, + "<|audio:99|>": 99, + "<|audio:100|>": 100, + "<|audio:101|>": 101, + "<|audio:102|>": 102, + "<|audio:103|>": 103, + "<|audio:104|>": 104, + "<|audio:105|>": 105, + "<|audio:106|>": 106, + "<|audio:107|>": 107, + "<|audio:108|>": 108, + "<|audio:109|>": 109, + "<|audio:110|>": 110, + "<|audio:111|>": 111, + "<|audio:112|>": 112, + "<|audio:113|>": 113, + "<|audio:114|>": 114, + "<|audio:115|>": 115, + "<|audio:116|>": 116, + "<|audio:117|>": 117, + "<|audio:118|>": 118, + "<|audio:119|>": 119, + "<|audio:120|>": 120, + "<|audio:121|>": 121, + "<|audio:122|>": 122, + "<|audio:123|>": 123, + "<|audio:124|>": 124, + "<|audio:125|>": 125, + "<|audio:126|>": 126, + "<|audio:127|>": 127, + "<|audio:128|>": 128, + "<|audio:129|>": 129, + "<|audio:130|>": 130, + "<|audio:131|>": 131, + "<|audio:132|>": 132, + "<|audio:133|>": 133, + "<|audio:134|>": 134, + "<|audio:135|>": 135, + "<|audio:136|>": 136, + "<|audio:137|>": 137, + "<|audio:138|>": 138, + "<|audio:139|>": 139, + "<|audio:140|>": 140, + "<|audio:141|>": 141, + "<|audio:142|>": 142, + "<|audio:143|>": 143, + "<|audio:144|>": 144, + "<|audio:145|>": 145, + "<|audio:146|>": 146, + "<|audio:147|>": 147, + "<|audio:148|>": 148, + "<|audio:149|>": 149, + "<|audio:150|>": 150, + "<|audio:151|>": 151, + "<|audio:152|>": 152, + "<|audio:153|>": 153, + "<|audio:154|>": 154, + "<|audio:155|>": 155, + "<|audio:156|>": 156, + "<|audio:157|>": 157, + "<|audio:158|>": 158, + "<|audio:159|>": 159, + "<|audio:160|>": 160, + "<|audio:161|>": 161, + "<|audio:162|>": 162, + "<|audio:163|>": 163, + "<|audio:164|>": 164, + "<|audio:165|>": 165, + "<|audio:166|>": 166, + "<|audio:167|>": 167, + "<|audio:168|>": 168, + "<|audio:169|>": 169, + "<|audio:170|>": 170, + "<|audio:171|>": 171, + "<|audio:172|>": 172, + "<|audio:173|>": 173, + "<|audio:174|>": 174, + "<|audio:175|>": 175, + "<|audio:176|>": 176, + "<|audio:177|>": 177, + "<|audio:178|>": 178, + "<|audio:179|>": 179, + "<|audio:180|>": 180, + "<|audio:181|>": 181, + "<|audio:182|>": 182, + "<|audio:183|>": 183, + "<|audio:184|>": 184, + "<|audio:185|>": 185, + "<|audio:186|>": 186, + "<|audio:187|>": 187, + "<|audio:188|>": 188, + "<|audio:189|>": 189, + "<|audio:190|>": 190, + "<|audio:191|>": 191, + "<|audio:192|>": 192, + "<|audio:193|>": 193, + "<|audio:194|>": 194, + "<|audio:195|>": 195, + "<|audio:196|>": 196, + "<|audio:197|>": 197, + "<|audio:198|>": 198, + "<|audio:199|>": 199, + "<|audio:200|>": 200, + "<|audio:201|>": 201, + "<|audio:202|>": 202, + "<|audio:203|>": 203, + "<|audio:204|>": 204, + "<|audio:205|>": 205, + "<|audio:206|>": 206, + "<|audio:207|>": 207, + "<|audio:208|>": 208, + "<|audio:209|>": 209, + "<|audio:210|>": 210, + "<|audio:211|>": 211, + "<|audio:212|>": 212, + "<|audio:213|>": 213, + "<|audio:214|>": 214, + "<|audio:215|>": 215, + "<|audio:216|>": 216, + "<|audio:217|>": 217, + "<|audio:218|>": 218, + "<|audio:219|>": 219, + "<|audio:220|>": 220, + "<|audio:221|>": 221, + "<|audio:222|>": 222, + "<|audio:223|>": 223, + "<|audio:224|>": 224, + "<|audio:225|>": 225, + "<|audio:226|>": 226, + "<|audio:227|>": 227, + "<|audio:228|>": 228, + "<|audio:229|>": 229, + "<|audio:230|>": 230, + "<|audio:231|>": 231, + "<|audio:232|>": 232, + "<|audio:233|>": 233, + "<|audio:234|>": 234, + "<|audio:235|>": 235, + "<|audio:236|>": 236, + "<|audio:237|>": 237, + "<|audio:238|>": 238, + "<|audio:239|>": 239, + "<|audio:240|>": 240, + "<|audio:241|>": 241, + "<|audio:242|>": 242, + "<|audio:243|>": 243, + "<|audio:244|>": 244, + "<|audio:245|>": 245, + "<|audio:246|>": 246, + "<|audio:247|>": 247, + "<|audio:248|>": 248, + "<|audio:249|>": 249, + "<|audio:250|>": 250, + "<|audio:251|>": 251, + "<|audio:252|>": 252, + "<|audio:253|>": 253, + "<|audio:254|>": 254, + "<|audio:255|>": 255, + "<|audio:256|>": 256, + "<|audio:257|>": 257, + "<|audio:258|>": 258, + "<|audio:259|>": 259, + "<|audio:260|>": 260, + "<|audio:261|>": 261, + "<|audio:262|>": 262, + "<|audio:263|>": 263, + "<|audio:264|>": 264, + "<|audio:265|>": 265, + "<|audio:266|>": 266, + "<|audio:267|>": 267, + "<|audio:268|>": 268, + "<|audio:269|>": 269, + "<|audio:270|>": 270, + "<|audio:271|>": 271, + "<|audio:272|>": 272, + "<|audio:273|>": 273, + "<|audio:274|>": 274, + "<|audio:275|>": 275, + "<|audio:276|>": 276, + "<|audio:277|>": 277, + "<|audio:278|>": 278, + "<|audio:279|>": 279, + "<|audio:280|>": 280, + "<|audio:281|>": 281, + "<|audio:282|>": 282, + "<|audio:283|>": 283, + "<|audio:284|>": 284, + "<|audio:285|>": 285, + "<|audio:286|>": 286, + "<|audio:287|>": 287, + "<|audio:288|>": 288, + "<|audio:289|>": 289, + "<|audio:290|>": 290, + "<|audio:291|>": 291, + "<|audio:292|>": 292, + "<|audio:293|>": 293, + "<|audio:294|>": 294, + "<|audio:295|>": 295, + "<|audio:296|>": 296, + "<|audio:297|>": 297, + "<|audio:298|>": 298, + "<|audio:299|>": 299, + "<|audio:300|>": 300, + "<|audio:301|>": 301, + "<|audio:302|>": 302, + "<|audio:303|>": 303, + "<|audio:304|>": 304, + "<|audio:305|>": 305, + "<|audio:306|>": 306, + "<|audio:307|>": 307, + "<|audio:308|>": 308, + "<|audio:309|>": 309, + "<|audio:310|>": 310, + "<|audio:311|>": 311, + "<|audio:312|>": 312, + "<|audio:313|>": 313, + "<|audio:314|>": 314, + "<|audio:315|>": 315, + "<|audio:316|>": 316, + "<|audio:317|>": 317, + "<|audio:318|>": 318, + "<|audio:319|>": 319, + "<|audio:320|>": 320, + "<|audio:321|>": 321, + "<|audio:322|>": 322, + "<|audio:323|>": 323, + "<|audio:324|>": 324, + "<|audio:325|>": 325, + "<|audio:326|>": 326, + "<|audio:327|>": 327, + "<|audio:328|>": 328, + "<|audio:329|>": 329, + "<|audio:330|>": 330, + "<|audio:331|>": 331, + "<|audio:332|>": 332, + "<|audio:333|>": 333, + "<|audio:334|>": 334, + "<|audio:335|>": 335, + "<|audio:336|>": 336, + "<|audio:337|>": 337, + "<|audio:338|>": 338, + "<|audio:339|>": 339, + "<|audio:340|>": 340, + "<|audio:341|>": 341, + "<|audio:342|>": 342, + "<|audio:343|>": 343, + "<|audio:344|>": 344, + "<|audio:345|>": 345, + "<|audio:346|>": 346, + "<|audio:347|>": 347, + "<|audio:348|>": 348, + "<|audio:349|>": 349, + "<|audio:350|>": 350, + "<|audio:351|>": 351, + "<|audio:352|>": 352, + "<|audio:353|>": 353, + "<|audio:354|>": 354, + "<|audio:355|>": 355, + "<|audio:356|>": 356, + "<|audio:357|>": 357, + "<|audio:358|>": 358, + "<|audio:359|>": 359, + "<|audio:360|>": 360, + "<|audio:361|>": 361, + "<|audio:362|>": 362, + "<|audio:363|>": 363, + "<|audio:364|>": 364, + "<|audio:365|>": 365, + "<|audio:366|>": 366, + "<|audio:367|>": 367, + "<|audio:368|>": 368, + "<|audio:369|>": 369, + "<|audio:370|>": 370, + "<|audio:371|>": 371, + "<|audio:372|>": 372, + "<|audio:373|>": 373, + "<|audio:374|>": 374, + "<|audio:375|>": 375, + "<|audio:376|>": 376, + "<|audio:377|>": 377, + "<|audio:378|>": 378, + "<|audio:379|>": 379, + "<|audio:380|>": 380, + "<|audio:381|>": 381, + "<|audio:382|>": 382, + "<|audio:383|>": 383, + "<|audio:384|>": 384, + "<|audio:385|>": 385, + "<|audio:386|>": 386, + "<|audio:387|>": 387, + "<|audio:388|>": 388, + "<|audio:389|>": 389, + "<|audio:390|>": 390, + "<|audio:391|>": 391, + "<|audio:392|>": 392, + "<|audio:393|>": 393, + "<|audio:394|>": 394, + "<|audio:395|>": 395, + "<|audio:396|>": 396, + "<|audio:397|>": 397, + "<|audio:398|>": 398, + "<|audio:399|>": 399, + "<|audio:400|>": 400, + "<|audio:401|>": 401, + "<|audio:402|>": 402, + "<|audio:403|>": 403, + "<|audio:404|>": 404, + "<|audio:405|>": 405, + "<|audio:406|>": 406, + "<|audio:407|>": 407, + "<|audio:408|>": 408, + "<|audio:409|>": 409, + "<|audio:410|>": 410, + "<|audio:411|>": 411, + "<|audio:412|>": 412, + "<|audio:413|>": 413, + "<|audio:414|>": 414, + "<|audio:415|>": 415, + "<|audio:416|>": 416, + "<|audio:417|>": 417, + "<|audio:418|>": 418, + "<|audio:419|>": 419, + "<|audio:420|>": 420, + "<|audio:421|>": 421, + "<|audio:422|>": 422, + "<|audio:423|>": 423, + "<|audio:424|>": 424, + "<|audio:425|>": 425, + "<|audio:426|>": 426, + "<|audio:427|>": 427, + "<|audio:428|>": 428, + "<|audio:429|>": 429, + "<|audio:430|>": 430, + "<|audio:431|>": 431, + "<|audio:432|>": 432, + "<|audio:433|>": 433, + "<|audio:434|>": 434, + "<|audio:435|>": 435, + "<|audio:436|>": 436, + "<|audio:437|>": 437, + "<|audio:438|>": 438, + "<|audio:439|>": 439, + "<|audio:440|>": 440, + "<|audio:441|>": 441, + "<|audio:442|>": 442, + "<|audio:443|>": 443, + "<|audio:444|>": 444, + "<|audio:445|>": 445, + "<|audio:446|>": 446, + "<|audio:447|>": 447, + "<|audio:448|>": 448, + "<|audio:449|>": 449, + "<|audio:450|>": 450, + "<|audio:451|>": 451, + "<|audio:452|>": 452, + "<|audio:453|>": 453, + "<|audio:454|>": 454, + "<|audio:455|>": 455, + "<|audio:456|>": 456, + "<|audio:457|>": 457, + "<|audio:458|>": 458, + "<|audio:459|>": 459, + "<|audio:460|>": 460, + "<|audio:461|>": 461, + "<|audio:462|>": 462, + "<|audio:463|>": 463, + "<|audio:464|>": 464, + "<|audio:465|>": 465, + "<|audio:466|>": 466, + "<|audio:467|>": 467, + "<|audio:468|>": 468, + "<|audio:469|>": 469, + "<|audio:470|>": 470, + "<|audio:471|>": 471, + "<|audio:472|>": 472, + "<|audio:473|>": 473, + "<|audio:474|>": 474, + "<|audio:475|>": 475, + "<|audio:476|>": 476, + "<|audio:477|>": 477, + "<|audio:478|>": 478, + "<|audio:479|>": 479, + "<|audio:480|>": 480, + "<|audio:481|>": 481, + "<|audio:482|>": 482, + "<|audio:483|>": 483, + "<|audio:484|>": 484, + "<|audio:485|>": 485, + "<|audio:486|>": 486, + "<|audio:487|>": 487, + "<|audio:488|>": 488, + "<|audio:489|>": 489, + "<|audio:490|>": 490, + "<|audio:491|>": 491, + "<|audio:492|>": 492, + "<|audio:493|>": 493, + "<|audio:494|>": 494, + "<|audio:495|>": 495, + "<|audio:496|>": 496, + "<|audio:497|>": 497, + "<|audio:498|>": 498, + "<|audio:499|>": 499, + "<|audio:500|>": 500, + "<|audio:501|>": 501, + "<|audio:502|>": 502, + "<|audio:503|>": 503, + "<|audio:504|>": 504, + "<|audio:505|>": 505, + "<|audio:506|>": 506, + "<|audio:507|>": 507, + "<|audio:508|>": 508, + "<|audio:509|>": 509, + "<|audio:510|>": 510, + "<|audio:511|>": 511, + "<|audio:512|>": 512, + "<|audio:513|>": 513, + "<|audio:514|>": 514, + "<|audio:515|>": 515, + "<|audio:516|>": 516, + "<|audio:517|>": 517, + "<|audio:518|>": 518, + "<|audio:519|>": 519, + "<|audio:520|>": 520, + "<|audio:521|>": 521, + "<|audio:522|>": 522, + "<|audio:523|>": 523, + "<|audio:524|>": 524, + "<|audio:525|>": 525, + "<|audio:526|>": 526, + "<|audio:527|>": 527, + "<|audio:528|>": 528, + "<|audio:529|>": 529, + "<|audio:530|>": 530, + "<|audio:531|>": 531, + "<|audio:532|>": 532, + "<|audio:533|>": 533, + "<|audio:534|>": 534, + "<|audio:535|>": 535, + "<|audio:536|>": 536, + "<|audio:537|>": 537, + "<|audio:538|>": 538, + "<|audio:539|>": 539, + "<|audio:540|>": 540, + "<|audio:541|>": 541, + "<|audio:542|>": 542, + "<|audio:543|>": 543, + "<|audio:544|>": 544, + "<|audio:545|>": 545, + "<|audio:546|>": 546, + "<|audio:547|>": 547, + "<|audio:548|>": 548, + "<|audio:549|>": 549, + "<|audio:550|>": 550, + "<|audio:551|>": 551, + "<|audio:552|>": 552, + "<|audio:553|>": 553, + "<|audio:554|>": 554, + "<|audio:555|>": 555, + "<|audio:556|>": 556, + "<|audio:557|>": 557, + "<|audio:558|>": 558, + "<|audio:559|>": 559, + "<|audio:560|>": 560, + "<|audio:561|>": 561, + "<|audio:562|>": 562, + "<|audio:563|>": 563, + "<|audio:564|>": 564, + "<|audio:565|>": 565, + "<|audio:566|>": 566, + "<|audio:567|>": 567, + "<|audio:568|>": 568, + "<|audio:569|>": 569, + "<|audio:570|>": 570, + "<|audio:571|>": 571, + "<|audio:572|>": 572, + "<|audio:573|>": 573, + "<|audio:574|>": 574, + "<|audio:575|>": 575, + "<|audio:576|>": 576, + "<|audio:577|>": 577, + "<|audio:578|>": 578, + "<|audio:579|>": 579, + "<|audio:580|>": 580, + "<|audio:581|>": 581, + "<|audio:582|>": 582, + "<|audio:583|>": 583, + "<|audio:584|>": 584, + "<|audio:585|>": 585, + "<|audio:586|>": 586, + "<|audio:587|>": 587, + "<|audio:588|>": 588, + "<|audio:589|>": 589, + "<|audio:590|>": 590, + "<|audio:591|>": 591, + "<|audio:592|>": 592, + "<|audio:593|>": 593, + "<|audio:594|>": 594, + "<|audio:595|>": 595, + "<|audio:596|>": 596, + "<|audio:597|>": 597, + "<|audio:598|>": 598, + "<|audio:599|>": 599, + "<|audio:600|>": 600, + "<|audio:601|>": 601, + "<|audio:602|>": 602, + "<|audio:603|>": 603, + "<|audio:604|>": 604, + "<|audio:605|>": 605, + "<|audio:606|>": 606, + "<|audio:607|>": 607, + "<|audio:608|>": 608, + "<|audio:609|>": 609, + "<|audio:610|>": 610, + "<|audio:611|>": 611, + "<|audio:612|>": 612, + "<|audio:613|>": 613, + "<|audio:614|>": 614, + "<|audio:615|>": 615, + "<|audio:616|>": 616, + "<|audio:617|>": 617, + "<|audio:618|>": 618, + "<|audio:619|>": 619, + "<|audio:620|>": 620, + "<|audio:621|>": 621, + "<|audio:622|>": 622, + "<|audio:623|>": 623, + "<|audio:624|>": 624, + "<|audio:625|>": 625, + "<|audio:626|>": 626, + "<|audio:627|>": 627, + "<|audio:628|>": 628, + "<|audio:629|>": 629, + "<|audio:630|>": 630, + "<|audio:631|>": 631, + "<|audio:632|>": 632, + "<|audio:633|>": 633, + "<|audio:634|>": 634, + "<|audio:635|>": 635, + "<|audio:636|>": 636, + "<|audio:637|>": 637, + "<|audio:638|>": 638, + "<|audio:639|>": 639, + "<|audio:640|>": 640, + "<|audio:641|>": 641, + "<|audio:642|>": 642, + "<|audio:643|>": 643, + "<|audio:644|>": 644, + "<|audio:645|>": 645, + "<|audio:646|>": 646, + "<|audio:647|>": 647, + "<|audio:648|>": 648, + "<|audio:649|>": 649, + "<|audio:650|>": 650, + "<|audio:651|>": 651, + "<|audio:652|>": 652, + "<|audio:653|>": 653, + "<|audio:654|>": 654, + "<|audio:655|>": 655, + "<|audio:656|>": 656, + "<|audio:657|>": 657, + "<|audio:658|>": 658, + "<|audio:659|>": 659, + "<|audio:660|>": 660, + "<|audio:661|>": 661, + "<|audio:662|>": 662, + "<|audio:663|>": 663, + "<|audio:664|>": 664, + "<|audio:665|>": 665, + "<|audio:666|>": 666, + "<|audio:667|>": 667, + "<|audio:668|>": 668, + "<|audio:669|>": 669, + "<|audio:670|>": 670, + "<|audio:671|>": 671, + "<|audio:672|>": 672, + "<|audio:673|>": 673, + "<|audio:674|>": 674, + "<|audio:675|>": 675, + "<|audio:676|>": 676, + "<|audio:677|>": 677, + "<|audio:678|>": 678, + "<|audio:679|>": 679, + "<|audio:680|>": 680, + "<|audio:681|>": 681, + "<|audio:682|>": 682, + "<|audio:683|>": 683, + "<|audio:684|>": 684, + "<|audio:685|>": 685, + "<|audio:686|>": 686, + "<|audio:687|>": 687, + "<|audio:688|>": 688, + "<|audio:689|>": 689, + "<|audio:690|>": 690, + "<|audio:691|>": 691, + "<|audio:692|>": 692, + "<|audio:693|>": 693, + "<|audio:694|>": 694, + "<|audio:695|>": 695, + "<|audio:696|>": 696, + "<|audio:697|>": 697, + "<|audio:698|>": 698, + "<|audio:699|>": 699, + "<|audio:700|>": 700, + "<|audio:701|>": 701, + "<|audio:702|>": 702, + "<|audio:703|>": 703, + "<|audio:704|>": 704, + "<|audio:705|>": 705, + "<|audio:706|>": 706, + "<|audio:707|>": 707, + "<|audio:708|>": 708, + "<|audio:709|>": 709, + "<|audio:710|>": 710, + "<|audio:711|>": 711, + "<|audio:712|>": 712, + "<|audio:713|>": 713, + "<|audio:714|>": 714, + "<|audio:715|>": 715, + "<|audio:716|>": 716, + "<|audio:717|>": 717, + "<|audio:718|>": 718, + "<|audio:719|>": 719, + "<|audio:720|>": 720, + "<|audio:721|>": 721, + "<|audio:722|>": 722, + "<|audio:723|>": 723, + "<|audio:724|>": 724, + "<|audio:725|>": 725, + "<|audio:726|>": 726, + "<|audio:727|>": 727, + "<|audio:728|>": 728, + "<|audio:729|>": 729, + "<|audio:730|>": 730, + "<|audio:731|>": 731, + "<|audio:732|>": 732, + "<|audio:733|>": 733, + "<|audio:734|>": 734, + "<|audio:735|>": 735, + "<|audio:736|>": 736, + "<|audio:737|>": 737, + "<|audio:738|>": 738, + "<|audio:739|>": 739, + "<|audio:740|>": 740, + "<|audio:741|>": 741, + "<|audio:742|>": 742, + "<|audio:743|>": 743, + "<|audio:744|>": 744, + "<|audio:745|>": 745, + "<|audio:746|>": 746, + "<|audio:747|>": 747, + "<|audio:748|>": 748, + "<|audio:749|>": 749, + "<|audio:750|>": 750, + "<|audio:751|>": 751, + "<|audio:752|>": 752, + "<|audio:753|>": 753, + "<|audio:754|>": 754, + "<|audio:755|>": 755, + "<|audio:756|>": 756, + "<|audio:757|>": 757, + "<|audio:758|>": 758, + "<|audio:759|>": 759, + "<|audio:760|>": 760, + "<|audio:761|>": 761, + "<|audio:762|>": 762, + "<|audio:763|>": 763, + "<|audio:764|>": 764, + "<|audio:765|>": 765, + "<|audio:766|>": 766, + "<|audio:767|>": 767, + "<|audio:768|>": 768, + "<|audio:769|>": 769, + "<|audio:770|>": 770, + "<|audio:771|>": 771, + "<|audio:772|>": 772, + "<|audio:773|>": 773, + "<|audio:774|>": 774, + "<|audio:775|>": 775, + "<|audio:776|>": 776, + "<|audio:777|>": 777, + "<|audio:778|>": 778, + "<|audio:779|>": 779, + "<|audio:780|>": 780, + "<|audio:781|>": 781, + "<|audio:782|>": 782, + "<|audio:783|>": 783, + "<|audio:784|>": 784, + "<|audio:785|>": 785, + "<|audio:786|>": 786, + "<|audio:787|>": 787, + "<|audio:788|>": 788, + "<|audio:789|>": 789, + "<|audio:790|>": 790, + "<|audio:791|>": 791, + "<|audio:792|>": 792, + "<|audio:793|>": 793, + "<|audio:794|>": 794, + "<|audio:795|>": 795, + "<|audio:796|>": 796, + "<|audio:797|>": 797, + "<|audio:798|>": 798, + "<|audio:799|>": 799, + "<|audio:800|>": 800, + "<|audio:801|>": 801, + "<|audio:802|>": 802, + "<|audio:803|>": 803, + "<|audio:804|>": 804, + "<|audio:805|>": 805, + "<|audio:806|>": 806, + "<|audio:807|>": 807, + "<|audio:808|>": 808, + "<|audio:809|>": 809, + "<|audio:810|>": 810, + "<|audio:811|>": 811, + "<|audio:812|>": 812, + "<|audio:813|>": 813, + "<|audio:814|>": 814, + "<|audio:815|>": 815, + "<|audio:816|>": 816, + "<|audio:817|>": 817, + "<|audio:818|>": 818, + "<|audio:819|>": 819, + "<|audio:820|>": 820, + "<|audio:821|>": 821, + "<|audio:822|>": 822, + "<|audio:823|>": 823, + "<|audio:824|>": 824, + "<|audio:825|>": 825, + "<|audio:826|>": 826, + "<|audio:827|>": 827, + "<|audio:828|>": 828, + "<|audio:829|>": 829, + "<|audio:830|>": 830, + "<|audio:831|>": 831, + "<|audio:832|>": 832, + "<|audio:833|>": 833, + "<|audio:834|>": 834, + "<|audio:835|>": 835, + "<|audio:836|>": 836, + "<|audio:837|>": 837, + "<|audio:838|>": 838, + "<|audio:839|>": 839, + "<|audio:840|>": 840, + "<|audio:841|>": 841, + "<|audio:842|>": 842, + "<|audio:843|>": 843, + "<|audio:844|>": 844, + "<|audio:845|>": 845, + "<|audio:846|>": 846, + "<|audio:847|>": 847, + "<|audio:848|>": 848, + "<|audio:849|>": 849, + "<|audio:850|>": 850, + "<|audio:851|>": 851, + "<|audio:852|>": 852, + "<|audio:853|>": 853, + "<|audio:854|>": 854, + "<|audio:855|>": 855, + "<|audio:856|>": 856, + "<|audio:857|>": 857, + "<|audio:858|>": 858, + "<|audio:859|>": 859, + "<|audio:860|>": 860, + "<|audio:861|>": 861, + "<|audio:862|>": 862, + "<|audio:863|>": 863, + "<|audio:864|>": 864, + "<|audio:865|>": 865, + "<|audio:866|>": 866, + "<|audio:867|>": 867, + "<|audio:868|>": 868, + "<|audio:869|>": 869, + "<|audio:870|>": 870, + "<|audio:871|>": 871, + "<|audio:872|>": 872, + "<|audio:873|>": 873, + "<|audio:874|>": 874, + "<|audio:875|>": 875, + "<|audio:876|>": 876, + "<|audio:877|>": 877, + "<|audio:878|>": 878, + "<|audio:879|>": 879, + "<|audio:880|>": 880, + "<|audio:881|>": 881, + "<|audio:882|>": 882, + "<|audio:883|>": 883, + "<|audio:884|>": 884, + "<|audio:885|>": 885, + "<|audio:886|>": 886, + "<|audio:887|>": 887, + "<|audio:888|>": 888, + "<|audio:889|>": 889, + "<|audio:890|>": 890, + "<|audio:891|>": 891, + "<|audio:892|>": 892, + "<|audio:893|>": 893, + "<|audio:894|>": 894, + "<|audio:895|>": 895, + "<|audio:896|>": 896, + "<|audio:897|>": 897, + "<|audio:898|>": 898, + "<|audio:899|>": 899, + "<|audio:900|>": 900, + "<|audio:901|>": 901, + "<|audio:902|>": 902, + "<|audio:903|>": 903, + "<|audio:904|>": 904, + "<|audio:905|>": 905, + "<|audio:906|>": 906, + "<|audio:907|>": 907, + "<|audio:908|>": 908, + "<|audio:909|>": 909, + "<|audio:910|>": 910, + "<|audio:911|>": 911, + "<|audio:912|>": 912, + "<|audio:913|>": 913, + "<|audio:914|>": 914, + "<|audio:915|>": 915, + "<|audio:916|>": 916, + "<|audio:917|>": 917, + "<|audio:918|>": 918, + "<|audio:919|>": 919, + "<|audio:920|>": 920, + "<|audio:921|>": 921, + "<|audio:922|>": 922, + "<|audio:923|>": 923, + "<|audio:924|>": 924, + "<|audio:925|>": 925, + "<|audio:926|>": 926, + "<|audio:927|>": 927, + "<|audio:928|>": 928, + "<|audio:929|>": 929, + "<|audio:930|>": 930, + "<|audio:931|>": 931, + "<|audio:932|>": 932, + "<|audio:933|>": 933, + "<|audio:934|>": 934, + "<|audio:935|>": 935, + "<|audio:936|>": 936, + "<|audio:937|>": 937, + "<|audio:938|>": 938, + "<|audio:939|>": 939, + "<|audio:940|>": 940, + "<|audio:941|>": 941, + "<|audio:942|>": 942, + "<|audio:943|>": 943, + "<|audio:944|>": 944, + "<|audio:945|>": 945, + "<|audio:946|>": 946, + "<|audio:947|>": 947, + "<|audio:948|>": 948, + "<|audio:949|>": 949, + "<|audio:950|>": 950, + "<|audio:951|>": 951, + "<|audio:952|>": 952, + "<|audio:953|>": 953, + "<|audio:954|>": 954, + "<|audio:955|>": 955, + "<|audio:956|>": 956, + "<|audio:957|>": 957, + "<|audio:958|>": 958, + "<|audio:959|>": 959, + "<|audio:960|>": 960, + "<|audio:961|>": 961, + "<|audio:962|>": 962, + "<|audio:963|>": 963, + "<|audio:964|>": 964, + "<|audio:965|>": 965, + "<|audio:966|>": 966, + "<|audio:967|>": 967, + "<|audio:968|>": 968, + "<|audio:969|>": 969, + "<|audio:970|>": 970, + "<|audio:971|>": 971, + "<|audio:972|>": 972, + "<|audio:973|>": 973, + "<|audio:974|>": 974, + "<|audio:975|>": 975, + "<|audio:976|>": 976, + "<|audio:977|>": 977, + "<|audio:978|>": 978, + "<|audio:979|>": 979, + "<|audio:980|>": 980, + "<|audio:981|>": 981, + "<|audio:982|>": 982, + "<|audio:983|>": 983, + "<|audio:984|>": 984, + "<|audio:985|>": 985, + "<|audio:986|>": 986, + "<|audio:987|>": 987, + "<|audio:988|>": 988, + "<|audio:989|>": 989, + "<|audio:990|>": 990, + "<|audio:991|>": 991, + "<|audio:992|>": 992, + "<|audio:993|>": 993, + "<|audio:994|>": 994, + "<|audio:995|>": 995, + "<|audio:996|>": 996, + "<|audio:997|>": 997, + "<|audio:998|>": 998, + "<|audio:999|>": 999, + "<|audio:1000|>": 1000, + "<|audio:1001|>": 1001, + "<|audio:1002|>": 1002, + "<|audio:1003|>": 1003, + "<|audio:1004|>": 1004, + "<|audio:1005|>": 1005, + "<|audio:1006|>": 1006, + "<|audio:1007|>": 1007, + "<|audio:1008|>": 1008, + "<|audio:1009|>": 1009, + "<|audio:1010|>": 1010, + "<|audio:1011|>": 1011, + "<|audio:1012|>": 1012, + "<|audio:1013|>": 1013, + "<|audio:1014|>": 1014, + "<|audio:1015|>": 1015, + "<|audio:1016|>": 1016, + "<|audio:1017|>": 1017, + "<|audio:1018|>": 1018, + "<|audio:1019|>": 1019, + "<|audio:1020|>": 1020, + "<|audio:1021|>": 1021, + "<|audio:1022|>": 1022, + "<|audio:1023|>": 1023, + "<|startoftranscript|>": 1024, + "<|endoftranscript|>": 1025, + "<|padding|>": 1026, + "'": 1027, + "a": 1028, + "b": 1029, + "c": 1030, + "d": 1031, + "e": 1032, + "f": 1033, + "g": 1034, + "h": 1035, + "i": 1036, + "j": 1037, + "k": 1038, + "l": 1039, + "m": 1040, + "n": 1041, + "o": 1042, + "p": 1043, + "q": 1044, + "r": 1045, + "s": 1046, + "t": 1047, + "u": 1048, + "v": 1049, + "w": 1050, + "x": 1051, + "y": 1052, + "z": 1053, + "▁": 1054, + "▁t": 1055, + "he": 1056, + "▁a": 1057, + "▁the": 1058, + "in": 1059, + "▁s": 1060, + "▁w": 1061, + "▁o": 1062, + "re": 1063, + "nd": 1064, + "▁b": 1065, + "▁h": 1066, + "er": 1067, + "▁m": 1068, + "▁i": 1069, + "ou": 1070, + "▁c": 1071, + "▁f": 1072, + "at": 1073, + "ed": 1074, + "▁and": 1075, + "en": 1076, + "▁to": 1077, + "▁of": 1078, + "on": 1079, + "is": 1080, + "▁d": 1081, + "ing": 1082, + "▁th": 1083, + "▁p": 1084, + "▁he": 1085, + "or": 1086, + "▁l": 1087, + "es": 1088, + "▁in": 1089, + "ll": 1090, + "it": 1091, + "ar": 1092, + "as": 1093, + "an": 1094, + "▁n": 1095, + "▁g": 1096, + "om": 1097, + "▁be": 1098, + "▁ha": 1099, + "▁e": 1100, + "le": 1101, + "ot": 1102, + "▁y": 1103, + "ut": 1104, + "ow": 1105, + "ic": 1106, + "▁wh": 1107, + "▁it": 1108, + "ld": 1109, + "ve": 1110, + "▁that": 1111, + "ly": 1112, + "▁was": 1113, + "id": 1114, + "se": 1115, + "st": 1116, + "▁on": 1117, + "gh": 1118, + "ent": 1119, + "▁re": 1120, + "▁you": 1121, + "im": 1122, + "ce": 1123, + "▁u": 1124, + "ver": 1125, + "ion": 1126, + "▁as": 1127, + "et": 1128, + "▁for": 1129, + "ay": 1130, + "▁his": 1131, + "▁we": 1132, + "ith": 1133, + "al": 1134, + "ir": 1135, + "▁r": 1136, + "▁with": 1137, + "▁st": 1138, + "ad": 1139, + "ur": 1140, + "ght": 1141, + "▁an": 1142, + "▁her": 1143, + "▁not": 1144, + "▁is": 1145, + "▁had": 1146, + "ter": 1147, + "her": 1148, + "ac": 1149, + "am": 1150, + "▁at": 1151, + "oo": 1152, + "▁but": 1153, + "ould": 1154, + "▁she": 1155, + "▁k": 1156, + "▁se": 1157, + "▁sa": 1158, + "▁sh": 1159, + "▁fr": 1160, + "▁him": 1161, + "▁so": 1162, + "▁me": 1163, + "ill": 1164, + "ain": 1165, + "▁su": 1166, + "ight": 1167, + "ch": 1168, + "red": 1169, + "ct": 1170, + "all": 1171, + "ro": 1172, + "ke": 1173, + "ess": 1174, + "il": 1175, + "'s": 1176, + "ore": 1177, + "▁de": 1178, + "▁my": 1179, + "▁they": 1180, + "▁whe": 1181, + "▁all": 1182, + "ich": 1183, + "▁ne": 1184, + "ri": 1185, + "▁by": 1186, + "▁have": 1187, + "ome": 1188, + "pp": 1189, + "▁this": 1190, + "▁li": 1191, + "▁do": 1192, + "▁con": 1193, + "us": 1194, + "▁which": 1195, + "▁ch": 1196, + "ul": 1197, + "qu": 1198, + "▁j": 1199, + "▁up": 1200, + "▁said": 1201, + "▁from": 1202, + "ard": 1203, + "ge": 1204, + "▁or": 1205, + "▁v": 1206, + "▁one": 1207, + "▁no": 1208, + "th": 1209, + "▁ex": 1210, + "▁were": 1211, + "▁there": 1212, + "pe": 1213, + "and": 1214, + "est": 1215, + "▁man": 1216, + "▁who": 1217, + "ble": 1218, + "ie": 1219, + "▁al": 1220, + "ant": 1221, + "res": 1222, + "ous": 1223, + "ust": 1224, + "very": 1225, + "ation": 1226, + "▁fe": 1227, + "▁them": 1228, + "lf": 1229, + "▁when": 1230, + "nt": 1231, + "ame": 1232, + "ind": 1233, + "ra": 1234, + "▁go": 1235, + "ers": 1236, + "ast": 1237, + "fe": 1238, + "ood": 1239, + "▁kn": 1240, + "▁int": 1241, + "ist": 1242, + "▁are": 1243, + "art": 1244, + "out": 1245, + "▁would": 1246, + "▁le": 1247, + "▁what": 1248, + "os": 1249, + "▁their": 1250, + "ong": 1251, + "our": 1252, + "▁if": 1253, + "▁com": 1254, + "ound": 1255, + "▁ab": 1256, + "▁out": 1257, + "▁wor": 1258, + "em": 1259, + "▁will": 1260, + "ak": 1261, + "▁mis": 1262, + "ate": 1263, + "ol": 1264, + "um": 1265, + "un": 1266, + "itt": 1267, + "ough": 1268, + "ked": 1269, + "ig": 1270, + "ap": 1271, + "one": 1272, + "▁been": 1273, + "own": 1274, + "ive": 1275, + "▁then": 1276, + "▁br": 1277, + "ven": 1278, + "if": 1279, + "▁ar": 1280, + "'t": 1281, + "self": 1282, + "▁tr": 1283, + "▁pl": 1284, + "▁ro": 1285, + "▁pr": 1286, + "ther": 1287, + "reat": 1288, + "▁un": 1289, + "▁af": 1290, + "▁sp": 1291, + "▁qu": 1292, + "▁pro": 1293, + "ity": 1294, + "hed": 1295, + "▁tw": 1296, + "▁ag": 1297, + "▁could": 1298, + "ost": 1299, + "ace": 1300, + "ort": 1301, + "ure": 1302, + "ake": 1303, + "▁am": 1304, + "ack": 1305, + "▁any": 1306, + "▁some": 1307, + "▁your": 1308, + "▁more": 1309, + "▁can": 1310, + "au": 1311, + "▁tim": 1312, + "ep": 1313, + "ag": 1314, + "▁en": 1315, + "ck": 1316, + "▁into": 1317, + "▁cl": 1318, + "ry": 1319, + "▁now": 1320, + "hing": 1321, + "nder": 1322, + "are": 1323, + "▁very": 1324, + "▁gr": 1325, + "el": 1326, + "ose": 1327, + "▁loo": 1328, + "▁bo": 1329, + "ved": 1330, + "op": 1331, + "▁other": 1332, + "▁did": 1333, + "ance": 1334, + "▁than": 1335, + "ittle": 1336, + "▁little": 1337, + "ine": 1338, + "ies": 1339, + "way": 1340, + "ite": 1341, + "▁like": 1342, + "ide": 1343, + "▁lo": 1344, + "ass": 1345, + "▁bl": 1346, + "able": 1347, + "urn": 1348, + "ought": 1349, + "▁know": 1350, + "other": 1351, + "▁time": 1352, + "▁im": 1353, + "▁dis": 1354, + "▁us": 1355, + "▁co": 1356, + "fore": 1357, + "▁how": 1358, + "▁te": 1359, + "ence": 1360, + "▁day": 1361, + "▁ad": 1362, + "ade": 1363, + "ice": 1364, + "▁about": 1365, + "▁see": 1366, + "▁over": 1367, + "pt": 1368, + "cc": 1369, + "▁too": 1370, + "ink": 1371, + "▁fl": 1372, + "wn": 1373, + "▁great": 1374, + "▁after": 1375, + "pl": 1376, + "de": 1377, + "▁per": 1378, + "ment": 1379, + "▁again": 1380, + "▁upon": 1381, + "▁hand": 1382, + "ab": 1383, + "▁has": 1384, + "ree": 1385, + "ish": 1386, + "ci": 1387, + "▁only": 1388, + "ally": 1389, + "▁well": 1390, + "▁should": 1391, + "▁po": 1392, + "▁mar": 1393, + "ress": 1394, + "▁say": 1395, + "▁good": 1396, + "ather": 1397, + "▁two": 1398, + "ings": 1399, + "▁pe": 1400, + "ount": 1401, + "▁our": 1402, + "ire": 1403, + "ving": 1404, + "▁down": 1405, + "ars": 1406, + "ert": 1407, + "we": 1408, + "▁before": 1409, + "ile": 1410, + "ves": 1411, + "▁app": 1412, + "▁every": 1413, + "▁its": 1414, + "▁old": 1415, + "▁thr": 1416, + "▁mu": 1417, + "▁made": 1418, + "ied": 1419, + "ick": 1420, + "▁long": 1421, + "age": 1422, + "te": 1423, + "ft": 1424, + "▁where": 1425, + "ang": 1426, + "▁never": 1427, + "▁must": 1428, + "▁pre": 1429, + "▁sm": 1430, + "ful": 1431, + "▁such": 1432, + "ull": 1433, + "▁str": 1434, + "ions": 1435, + "▁off": 1436, + "▁sc": 1437, + "▁came": 1438, + "ious": 1439, + "ue": 1440, + "▁miss": 1441, + "ward": 1442, + "ild": 1443, + "▁fir": 1444, + "▁even": 1445, + "▁under": 1446, + "act": 1447, + "▁these": 1448, + "▁come": 1449, + "▁part": 1450, + "▁fo": 1451, + "ated": 1452, + "ness": 1453, + "▁rem": 1454, + "ord": 1455, + "▁bec": 1456, + "ty": 1457, + "▁may": 1458, + "▁much": 1459, + "▁think": 1460, + "per": 1461, + "▁way": 1462, + "▁mister": 1463, + "led": 1464, + "▁let": 1465, + "orn": 1466, + "▁ey": 1467, + "▁gl": 1468, + "▁cont": 1469, + "▁thought": 1470, + "▁look": 1471, + "ect": 1472, + "▁spe": 1473, + "ise": 1474, + "▁back": 1475, + "▁bet": 1476, + "ady": 1477, + "▁ye": 1478, + "ans": 1479, + "ach": 1480, + "▁here": 1481, + "▁just": 1482, + "ren": 1483, + "▁first": 1484, + "▁ho": 1485, + "▁own": 1486, + "▁des": 1487, + "▁ob": 1488, + "ried": 1489, + "ud": 1490, + "ary": 1491, + "▁went": 1492, + "▁mo": 1493, + "▁himself": 1494, + "▁men": 1495, + "air": 1496, + "cl": 1497, + "ave": 1498, + "ath": 1499, + "ff": 1500, + "▁sl": 1501, + "co": 1502, + "on't": 1503, + "llow": 1504, + "▁cr": 1505, + "▁res": 1506, + "▁i'": 1507, + "▁might": 1508, + "ily": 1509, + "▁seem": 1510, + "int": 1511, + "ip": 1512, + "▁beg": 1513, + "ouse": 1514, + "anc": 1515, + "n't": 1516, + "▁wat": 1517, + "▁through": 1518, + "▁comp": 1519, + "ber": 1520, + "▁away": 1521, + "▁car": 1522, + "▁em": 1523, + "▁get": 1524, + "▁imp": 1525, + "▁head": 1526, + "oss": 1527, + "▁life": 1528, + "▁bel": 1529, + "▁without": 1530, + "▁most": 1531, + "▁pass": 1532, + "▁make": 1533, + "▁cons": 1534, + "ened": 1535, + "▁som": 1536, + "▁turn": 1537, + "av": 1538, + "ng": 1539, + "▁shall": 1540, + "▁acc": 1541, + "▁those": 1542, + "▁pres": 1543, + "▁eyes": 1544, + "▁house": 1545, + "iz": 1546, + "▁somet": 1547, + "▁jo": 1548, + "▁still": 1549, + "▁call": 1550, + "▁night": 1551, + "hes": 1552, + "▁op": 1553, + "ause": 1554, + "▁wom": 1555, + "▁last": 1556, + "ks": 1557, + "less": 1558, + "ared": 1559, + "▁comm": 1560, + "▁don't": 1561, + "▁tell": 1562, + "▁ent": 1563, + "▁nothing": 1564, + "▁new": 1565, + "ign": 1566, + "▁take": 1567, + "▁being": 1568, + "▁many": 1569, + "▁word": 1570, + "ons": 1571, + "▁found": 1572, + "▁ret": 1573, + "ase": 1574, + "▁ear": 1575, + "▁while": 1576, + "▁att": 1577, + "ory": 1578, + "ix": 1579, + "▁ser": 1580, + "▁saw": 1581, + "▁put": 1582, + "ne": 1583, + "oth": 1584, + "iend": 1585, + "▁peop": 1586, + "▁wr": 1587, + "▁young": 1588, + "ark": 1589, + "dy": 1590, + "aking": 1591, + "les": 1592, + "▁count": 1593, + "▁once": 1594, + "▁friend": 1595, + "▁la": 1596, + "ens": 1597, + "▁people": 1598, + "pect": 1599, + "ors": 1600, + "fect": 1601, + "▁mat": 1602, + "ince": 1603, + "ible": 1604, + "ered": 1605, + "▁room": 1606, + "▁three": 1607, + "▁yet": 1608, + "ail": 1609, + "▁same": 1610, + "▁father": 1611, + "▁right": 1612, + "▁child": 1613, + "▁cour": 1614, + "igh": 1615, + "▁place": 1616, + "▁another": 1617, + "ult": 1618, + "iv": 1619, + "ition": 1620, + "▁ind": 1621, + "▁want": 1622, + "▁though": 1623, + "▁nor": 1624, + "▁far": 1625, + "▁king": 1626, + "▁happ": 1627, + "▁heart": 1628, + "▁face": 1629, + "▁end": 1630, + "▁ever": 1631, + "▁nat": 1632, + "thing": 1633, + "▁love": 1634, + "get": 1635, + "▁took": 1636, + "▁dist": 1637, + "ever": 1638, + "ian": 1639, + "▁hu": 1640, + "ew": 1641, + "▁arm": 1642, + "▁inst": 1643, + "man": 1644, + "▁work": 1645, + "▁light": 1646, + "▁char": 1647, + "▁ple": 1648, + "ict": 1649, + "▁set": 1650, + "▁ac": 1651, + "▁looked": 1652, + "▁missus": 1653, + "▁asked": 1654, + "▁mind": 1655, + "▁yes": 1656, + "▁supp": 1657, + "▁inte": 1658, + "▁rep": 1659, + "cess": 1660, + "ently": 1661, + "▁left": 1662, + "gg": 1663, + "ertain": 1664, + "▁ke": 1665, + "ished": 1666, + "ub": 1667, + "▁pers": 1668, + "ways": 1669, + "▁things": 1670, + "alk": 1671, + "irl": 1672, + "▁mom": 1673, + "▁sir": 1674, + "▁wa": 1675, + "▁moment": 1676, + "ations": 1677, + "▁sat": 1678, + "sel": 1679, + "▁find": 1680, + "ower": 1681, + "ia": 1682, + "vent": 1683, + "rew": 1684, + "▁world": 1685, + "ject": 1686, + "▁give": 1687, + "▁cap": 1688, + "▁why": 1689, + "so": 1690, + "▁gu": 1691, + "▁mother": 1692, + "▁gen": 1693, + "▁sw": 1694, + "▁always": 1695, + "der": 1696, + "lt": 1697, + "ling": 1698, + "▁ans": 1699, + "pped": 1700, + "▁soon": 1701, + "▁act": 1702, + "▁form": 1703, + "▁el": 1704, + "dd": 1705, + "▁heard": 1706, + "ret": 1707, + "▁thing": 1708, + "▁something": 1709, + "▁seemed": 1710, + "▁sub": 1711, + "▁door": 1712, + "ange": 1713, + "▁girl": 1714, + "ced": 1715, + "▁appe": 1716, + "ither": 1717, + "▁wind": 1718, + "▁because": 1719, + "▁dif": 1720, + "▁mon": 1721, + "ss": 1722, + "▁going": 1723, + "▁told": 1724, + "orm": 1725, + "▁home": 1726, + "ained": 1727, + "▁got": 1728, + "▁war": 1729, + "▁god": 1730, + "aught": 1731, + "▁gi": 1732, + "▁eng": 1733, + "▁sur": 1734, + "ning": 1735, + "▁hands": 1736, + "▁woman": 1737, + "▁follow": 1738, + "land": 1739, + "aut": 1740, + "▁vo": 1741, + "▁feel": 1742, + "▁rel": 1743, + "▁poss": 1744, + "ched": 1745, + "ical": 1746, + "ple": 1747, + "ph": 1748, + "▁boy": 1749, + "▁return": 1750, + "▁reg": 1751, + "▁rest": 1752, + "ook": 1753, + "▁knew": 1754, + "ner": 1755, + "▁each": 1756, + "▁oh": 1757, + "▁sil": 1758, + "▁kind": 1759, + "▁exp": 1760, + "▁ma": 1761, + "▁cle": 1762, + "▁hel": 1763, + "iver": 1764, + "ting": 1765, + "▁del": 1766, + "ual": 1767, + "▁inf": 1768, + "▁ass": 1769, + "▁water": 1770, + "▁conf": 1771, + "▁bre": 1772, + "▁wo": 1773, + "cept": 1774, + "▁belie": 1775, + "▁certain": 1776, + "▁against": 1777, + "▁hard": 1778, + "▁ph": 1779, + "row": 1780, + "▁unt": 1781, + "▁years": 1782, + "▁quite": 1783, + "▁side": 1784, + "iness": 1785, + "ined": 1786, + "▁near": 1787, + "▁hor": 1788, + "ters": 1789, + "ired": 1790, + "ool": 1791, + "▁four": 1792, + "▁few": 1793, + "▁done": 1794, + "ier": 1795, + "▁che": 1796, + "rest": 1797, + "ited": 1798, + "most": 1799, + "▁better": 1800, + "▁half": 1801, + "▁min": 1802, + "▁tre": 1803, + "ps": 1804, + "▁also": 1805, + "▁care": 1806, + "ock": 1807, + "uck": 1808, + "oub": 1809, + "▁began": 1810, + "ully": 1811, + "▁enough": 1812, + "ised": 1813, + "ru": 1814, + "▁having": 1815, + "▁seen": 1816, + "▁gener": 1817, + "▁lady": 1818, + "▁dra": 1819, + "▁hum": 1820, + "aps": 1821, + "ott": 1822, + "▁pur": 1823, + "aken": 1824, + "ross": 1825, + "ying": 1826, + "▁ter": 1827, + "▁hour": 1828, + "▁inde": 1829, + "ank": 1830, + "▁called": 1831, + "ial": 1832, + "ason": 1833, + "▁beh": 1834, + "▁does": 1835, + "▁whole": 1836, + "▁morn": 1837, + "▁turned": 1838, + "▁pleas": 1839, + "▁ste": 1840, + "▁ref": 1841, + "▁gave": 1842, + "ense": 1843, + "▁occ": 1844, + "ib": 1845, + "▁course": 1846, + "▁ins": 1847, + "ream": 1848, + "gether": 1849, + "uth": 1850, + "▁both": 1851, + "▁sou": 1852, + "▁cur": 1853, + "▁add": 1854, + "een": 1855, + "▁col": 1856, + "▁read": 1857, + "ween": 1858, + "selves": 1859, + "▁among": 1860, + "▁between": 1861, + "▁inc": 1862, + "▁keep": 1863, + "▁beaut": 1864, + "ular": 1865, + "▁poor": 1866, + "▁it's": 1867, + "▁sure": 1868, + "▁morning": 1869, + "▁white": 1870, + "ged": 1871, + "▁name": 1872, + "▁dear": 1873, + "▁toward": 1874, + "ute": 1875, + "▁small": 1876, + "▁whom": 1877, + "▁repl": 1878, + "▁sk": 1879, + "▁lar": 1880, + "▁felt": 1881, + "bo": 1882, + "osed": 1883, + "ating": 1884, + "▁myself": 1885, + "▁open": 1886, + "▁six": 1887, + "▁herself": 1888, + "▁however": 1889, + "▁bu": 1890, + "ond": 1891, + "aint": 1892, + "xt": 1893, + "▁fore": 1894, + "▁inter": 1895, + "▁ev": 1896, + "▁high": 1897, + "ction": 1898, + "▁hund": 1899, + "▁stood": 1900, + "▁hundred": 1901, + "aster": 1902, + "▁tra": 1903, + "▁show": 1904, + "▁sent": 1905, + "ife": 1906, + "▁round": 1907, + "▁sim": 1908, + "▁dr": 1909, + "▁gra": 1910, + "▁words": 1911, + "▁days": 1912, + "▁almost": 1913, + "ale": 1914, + "vel": 1915, + "▁point": 1916, + "ents": 1917, + "▁gre": 1918, + "▁eight": 1919, + "ces": 1920, + "ates": 1921, + "dden": 1922, + "▁fam": 1923, + "▁stand": 1924, + "▁bus": 1925, + "▁land": 1926, + "▁ed": 1927, + "▁mean": 1928, + "ung": 1929, + "haps": 1930, + "▁sun": 1931, + "ures": 1932, + "▁since": 1933, + "iet": 1934, + "ird": 1935, + "▁perhaps": 1936, + "ned": 1937, + "▁sle": 1938, + "iss": 1939, + "▁best": 1940, + "▁sudden": 1941, + "▁dark": 1942, + "▁replied": 1943, + "▁voice": 1944, + "▁met": 1945, + "▁anything": 1946, + "▁till": 1947, + "▁underst": 1948, + "▁bar": 1949, + "its": 1950, + "▁until": 1951, + "ins": 1952, + "oud": 1953, + "▁black": 1954, + "▁bro": 1955, + "▁hear": 1956, + "▁looking": 1957, + "▁cried": 1958, + "▁you'": 1959, + "▁fact": 1960, + "amp": 1961, + "▁prin": 1962, + "▁less": 1963, + "▁lay": 1964, + "▁next": 1965, + "▁law": 1966, + "up": 1967, + "▁power": 1968, + "▁prop": 1969, + "not": 1970, + "rent": 1971, + "▁brought": 1972, + "ately": 1973, + "enty": 1974, + "▁country": 1975, + "▁help": 1976, + "als": 1977, + "▁quest": 1978, + "med": 1979, + "▁use": 1980, + "▁vis": 1981, + "▁sn": 1982, + "▁i'm": 1983, + "fully": 1984, + "▁spo": 1985, + "▁together": 1986, + "▁need": 1987, + "▁air": 1988, + "▁adv": 1989, + "▁person": 1990, + "▁indeed": 1991, + "▁contin": 1992, + "▁unc": 1993, + "oney": 1994, + "▁gent": 1995, + "▁present": 1996, + "▁aw": 1997, + "▁par": 1998, + "ows": 1999, + "ured": 2000, + "▁full": 2001, + "tain": 2002, + "▁run": 2003, + "▁rather": 2004, + "▁ide": 2005, + "▁cond": 2006, + "nded": 2007, + "▁lat": 2008, + "▁sy": 2009, + "be": 2010, + "du": 2011, + "▁har": 2012, + "▁feet": 2013, + "▁fin": 2014, + "eter": 2015, + "▁fall": 2016, + "cei": 2017, + "▁five": 2018, + "▁mil": 2019, + "▁bed": 2020, + "oc": 2021, + "▁doct": 2022, + "▁interest": 2023, + "ressed": 2024, + "▁matter": 2025, + "▁lord": 2026, + "▁gone": 2027, + "▁es": 2028, + "fort": 2029, + "▁death": 2030, + "▁wife": 2031, + "▁serv": 2032, + "▁pat": 2033, + "ering": 2034, + "oubt": 2035, + "▁adm": 2036, + "▁talk": 2037, + "▁taken": 2038, + "▁art": 2039, + "▁tri": 2040, + "▁others": 2041, + "▁hope": 2042, + "ash": 2043, + "az": 2044, + "▁ext": 2045, + "▁cannot": 2046, + "ief": 2047, + "▁speak": 2048, + "▁lau": 2049, + "▁themselves": 2050, + "▁along": 2051, + "▁dire": 2052, + "ove": 2053, + "mb": 2054, + "pr": 2055, + "▁bes": 2056, + "▁cou": 2057, + "▁mor": 2058, + "ten": 2059, + "▁gentle": 2060, + "uring": 2061, + "▁fire": 2062, + "▁large": 2063, + "▁pol": 2064, + "▁cat": 2065, + "▁swe": 2066, + "ention": 2067, + "vers": 2068, + "▁thus": 2069, + "app": 2070, + "▁sec": 2071, + "▁play": 2072, + "▁real": 2073, + "▁prom": 2074, + "ments": 2075, + "wered": 2076, + "ield": 2077, + "ains": 2078, + "ison": 2079, + "ached": 2080, + "▁thou": 2081, + "▁reason": 2082, + "▁thous": 2083, + "iting": 2084, + "▁brother": 2085, + "akes": 2086, + "▁thousand": 2087, + "ont": 2088, + "▁money": 2089, + "▁remem": 2090, + "▁dep": 2091, + "▁answered": 2092, + "▁true": 2093, + "▁children": 2094, + "▁behind": 2095, + "oy": 2096, + "▁sound": 2097, + "ants": 2098, + "ably": 2099, + "▁wood": 2100, + "used": 2101, + "▁dec": 2102, + "▁whose": 2103, + "od": 2104, + "▁ele": 2105, + "▁twenty": 2106, + "▁ra": 2107, + "itu": 2108, + "▁believe": 2109, + "▁wonder": 2110, + "ene": 2111, + "▁inv": 2112, + "▁hon": 2113, + "aring": 2114, + "sh": 2115, + "ued": 2116, + "▁suff": 2117, + "▁opp": 2118, + "▁doubt": 2119, + "▁rec": 2120, + "ton": 2121, + "▁hold": 2122, + "▁diffe": 2123, + "▁passed": 2124, + "▁cor": 2125, + "me": 2126, + "ided": 2127, + "ities": 2128, + "▁mer": 2129, + "▁sing": 2130, + "▁nature": 2131, + "▁alone": 2132, + "▁dead": 2133, + "▁pri": 2134, + "ken": 2135, + "lic": 2136, + "▁red": 2137, + "▁bur": 2138, + "aces": 2139, + "▁close": 2140, + "▁gold": 2141, + "▁start": 2142, + "▁hur": 2143, + "▁fur": 2144, + "og": 2145, + "ances": 2146, + "▁ask": 2147, + "▁doctor": 2148, + "▁son": 2149, + "▁ground": 2150, + "wer": 2151, + "ets": 2152, + "▁sea": 2153, + "▁strong": 2154, + "▁leave": 2155, + "▁compan": 2156, + "▁i'll": 2157, + "ery": 2158, + "cy": 2159, + "illed": 2160, + "ept": 2161, + "ides": 2162, + "tle": 2163, + "▁ce": 2164, + "▁obs": 2165, + "body": 2166, + "▁fell": 2167, + "▁sign": 2168, + "cond": 2169, + "▁mount": 2170, + "▁fair": 2171, + "▁given": 2172, + "▁therefore": 2173, + "ane": 2174, + "▁ir": 2175, + "▁deep": 2176, + "iful": 2177, + "fic": 2178, + "ys": 2179, + "▁often": 2180, + "▁body": 2181, + "unt": 2182, + "▁short": 2183, + "▁tem": 2184, + "▁fa": 2185, + "▁master": 2186, + "▁earth": 2187, + "▁pap": 2188, + "ceed": 2189, + "▁stre": 2190, + "▁second": 2191, + "▁fort": 2192, + "bed": 2193, + "gth": 2194, + "owed": 2195, + "▁horse": 2196, + "idd": 2197, + "▁mad": 2198, + "ually": 2199, + "▁pa": 2200, + "▁chr": 2201, + "▁order": 2202, + "▁ten": 2203, + "vered": 2204, + "▁const": 2205, + "▁wish": 2206, + "▁fif": 2207, + "▁eas": 2208, + "▁cir": 2209, + "▁dro": 2210, + "aim": 2211, + "hen": 2212, + "▁ca": 2213, + "▁really": 2214, + "read": 2215, + "ceived": 2216, + "▁ill": 2217, + "▁fear": 2218, + "osition": 2219, + "▁understand": 2220, + "▁spir": 2221, + "▁list": 2222, + "▁abs": 2223, + "▁spr": 2224, + "aced": 2225, + "▁question": 2226, + "anger": 2227, + "▁everything": 2228, + "aughter": 2229, + "▁aff": 2230, + "▁wall": 2231, + "▁coming": 2232, + "ching": 2233, + "ready": 2234, + "ider": 2235, + "▁above": 2236, + "▁prince": 2237, + "▁already": 2238, + "▁least": 2239, + "▁reco": 2240, + "▁expl": 2241, + "▁step": 2242, + "▁used": 2243, + "▁ru": 2244, + "▁itself": 2245, + "ister": 2246, + "▁necess": 2247, + "▁case": 2248, + "▁around": 2249, + "hn": 2250, + "▁soul": 2251, + "▁suddenly": 2252, + "ger": 2253, + "▁lad": 2254, + "▁evening": 2255, + "▁mag": 2256, + "▁general": 2257, + "▁num": 2258, + "imes": 2259, + "▁known": 2260, + "▁wal": 2261, + "▁quick": 2262, + "ized": 2263, + "▁mus": 2264, + "▁sch": 2265, + "▁captain": 2266, + "▁that's": 2267, + "ific": 2268, + "▁whether": 2269, + "▁lear": 2270, + "gn": 2271, + "▁within": 2272, + "men": 2273, + "▁live": 2274, + "vern": 2275, + "▁times": 2276, + "▁expect": 2277, + "▁state": 2278, + "▁friends": 2279, + "▁bring": 2280, + "▁sort": 2281, + "▁women": 2282, + "▁table": 2283, + "▁meet": 2284, + "▁john": 2285, + "▁circ": 2286, + "▁sum": 2287, + "▁returned": 2288, + "iled": 2289, + "▁dri": 2290, + "▁held": 2291, + "▁exc": 2292, + "▁big": 2293, + "▁says": 2294, + "▁perfect": 2295, + "▁lea": 2296, + "▁obser": 2297, + "▁else": 2298, + "▁during": 2299, + "ident": 2300, + "▁hus": 2301, + "ted": 2302, + "▁beautiful": 2303, + "▁clear": 2304, + "▁either": 2305, + "▁town": 2306, + "▁sight": 2307, + "▁lost": 2308, + "▁sleep": 2309, + "▁means": 2310, + "▁foot": 2311, + "▁cut": 2312, + "▁cal": 2313, + "▁kept": 2314, + "▁ran": 2315, + "ience": 2316, + "▁prof": 2317, + "tered": 2318, + "here": 2319, + "ety": 2320, + "▁fellow": 2321, + "▁can't": 2322, + "▁mist": 2323, + "▁past": 2324, + "▁dream": 2325, + "ages": 2326, + "▁became": 2327, + "▁pret": 2328, + "▁disc": 2329, + "▁bad": 2330, + "▁making": 2331, + "ution": 2332, + "▁object": 2333, + "▁towards": 2334, + "▁low": 2335, + "ught": 2336, + "▁dev": 2337, + "▁human": 2338, + "▁manner": 2339, + "▁strange": 2340, + "▁year": 2341, + "old": 2342, + "ient": 2343, + "ines": 2344, + "▁sever": 2345, + "mon": 2346, + "▁ann": 2347, + "airs": 2348, + "ches": 2349, + "▁city": 2350, + "▁sometimes": 2351, + "'d": 2352, + "▁rose": 2353, + "▁est": 2354, + "ility": 2355, + "▁walk": 2356, + "▁ready": 2357, + "▁pal": 2358, + "▁leg": 2359, + "▁road": 2360, + "ians": 2361, + "cious": 2362, + "▁corn": 2363, + "▁thy": 2364, + "▁cold": 2365, + "lly": 2366, + "iously": 2367, + "lish": 2368, + "▁stra": 2369, + "mer": 2370, + "▁bat": 2371, + "owing": 2372, + "iew": 2373, + "▁christ": 2374, + "▁squ": 2375, + "▁truth": 2376, + "cri": 2377, + "lled": 2378, + "▁thir": 2379, + "▁didn't": 2380, + "bert": 2381, + "▁soci": 2382, + "br": 2383, + "▁bit": 2384, + "▁subject": 2385, + "▁ship": 2386, + "▁mur": 2387, + "▁appro": 2388, + "▁pie": 2389, + "▁answer": 2390, + "▁free": 2391, + "▁business": 2392, + "▁ut": 2393, + "ape": 2394, + "▁appear": 2395, + "▁river": 2396, + "▁sto": 2397, + "▁cast": 2398, + "▁family": 2399, + "▁jud": 2400, + "▁excl": 2401, + "▁letter": 2402, + "ingly": 2403, + "rie": 2404, + "▁hair": 2405, + "ote": 2406, + "▁arms": 2407, + "▁become": 2408, + "ern": 2409, + "ouble": 2410, + "▁different": 2411, + "▁val": 2412, + "ffect": 2413, + "▁natur": 2414, + "▁possible": 2415, + "▁several": 2416, + "▁fine": 2417, + "ah": 2418, + "▁lead": 2419, + "▁forg": 2420, + "▁express": 2421, + "li": 2422, + "▁sus": 2423, + "▁glad": 2424, + "oon": 2425, + "▁arri": 2426, + "▁blood": 2427, + "itting": 2428, + "▁quiet": 2429, + "rence": 2430, + "▁idea": 2431, + "▁able": 2432, + "itted": 2433, + "ster": 2434, + "▁charac": 2435, + "▁begin": 2436, + "▁chur": 2437, + "▁tou": 2438, + "▁story": 2439, + "▁eye": 2440, + "band": 2441, + "ative": 2442, + "▁grand": 2443, + "▁consider": 2444, + "▁across": 2445, + "▁pen": 2446, + "▁except": 2447, + "▁fre": 2448, + "▁win": 2449, + "▁equ": 2450, + "eth": 2451, + "▁cent": 2452, + "isf": 2453, + "▁partic": 2454, + "▁diffic": 2455, + "▁window": 2456, + "▁surpr": 2457, + "llect": 2458, + "▁prov": 2459, + "▁direct": 2460, + "▁conc": 2461, + "ey": 2462, + "aw": 2463, + "▁govern": 2464, + "▁disco": 2465, + "▁wild": 2466, + "▁dog": 2467, + "▁flo": 2468, + "▁soft": 2469, + "teen": 2470, + "▁cross": 2471, + "ased": 2472, + "▁effect": 2473, + "▁sor": 2474, + "▁longer": 2475, + "▁hen": 2476, + "▁followed": 2477, + "▁sold": 2478, + "▁thee": 2479, + "▁pub": 2480, + "▁husband": 2481, + "ards": 2482, + "antly": 2483, + "by": 2484, + "▁ap": 2485, + "▁suppose": 2486, + "▁respect": 2487, + "ts": 2488, + "▁hast": 2489, + "▁sal": 2490, + "▁comple": 2491, + "▁heav": 2492, + "▁happy": 2493, + "▁rich": 2494, + "▁creat": 2495, + "une": 2496, + "▁taking": 2497, + "▁requ": 2498, + "▁stay": 2499, + "▁spoke": 2500, + "▁daughter": 2501, + "▁wee": 2502, + "▁ve": 2503, + "▁du": 2504, + "▁green": 2505, + "▁anim": 2506, + "▁din": 2507, + "'ll": 2508, + "▁bird": 2509, + "alth": 2510, + "▁mere": 2511, + "▁gard": 2512, + "ny": 2513, + "ley": 2514, + "▁possess": 2515, + "empt": 2516, + "▁reached": 2517, + "▁appeared": 2518, + "ov": 2519, + "▁exist": 2520, + "ination": 2521, + "▁pretty": 2522, + "▁remember": 2523, + "▁hea": 2524, + "▁opened": 2525, + "▁tom": 2526, + "anged": 2527, + "▁slow": 2528, + "▁imag": 2529, + "▁i've": 2530, + "ract": 2531, + "▁saying": 2532, + "king": 2533, + "utes": 2534, + "▁common": 2535, + "▁occas": 2536, + "▁book": 2537, + "▁rus": 2538, + "ames": 2539, + "ices": 2540, + "▁bright": 2541, + "ms": 2542, + "▁satisf": 2543, + "▁sense": 2544, + "▁fav": 2545, + "▁succ": 2546, + "ump": 2547, + "ising": 2548, + "▁lu": 2549, + "▁accord": 2550, + "tern": 2551, + "▁break": 2552, + "▁exper": 2553, + "▁month": 2554, + "use": 2555, + "▁dem": 2556, + "▁scar": 2557, + "▁continued": 2558, + "▁secret": 2559, + "▁church": 2560, + "▁tree": 2561, + "▁stri": 2562, + "▁carried": 2563, + "▁cry": 2564, + "nding": 2565, + "▁spirit": 2566, + "▁wanted": 2567, + "eric": 2568, + "▁certainly": 2569, + "▁command": 2570, + "▁dest": 2571, + "▁move": 2572, + "oun": 2573, + "▁sweet": 2574, + "▁street": 2575, + "▁ought": 2576, + "▁account": 2577, + "▁def": 2578, + "ham": 2579, + "▁prep": 2580, + "▁sens": 2581, + "▁esc": 2582, + "▁rock": 2583, + "ots": 2584, + "▁decl": 2585, + "▁purp": 2586, + "riage": 2587, + "outh": 2588, + "owers": 2589, + "▁draw": 2590, + "▁eat": 2591, + "▁breat": 2592, + "▁character": 2593, + "ime": 2594, + "cul": 2595, + "medi": 2596, + "▁stud": 2597, + "▁school": 2598, + "itude": 2599, + "▁heaven": 2600, + "▁feeling": 2601, + "▁sad": 2602, + "▁regard": 2603, + "ement": 2604, + "▁pain": 2605, + "▁worth": 2606, + "▁bra": 2607, + "ney": 2608, + "▁dut": 2609, + "▁smo": 2610, + "aimed": 2611, + "▁trans": 2612, + "▁delight": 2613, + "▁quar": 2614, + "▁hung": 2615, + "▁mot": 2616, + "▁blue": 2617, + "▁hot": 2618, + "▁hill": 2619, + "▁div": 2620, + "umb": 2621, + "▁disapp": 2622, + "▁marg": 2623, + "▁laugh": 2624, + "idence": 2625, + "▁produ": 2626, + "▁success": 2627, + "ury": 2628, + "son": 2629, + "▁fast": 2630, + "▁english": 2631, + "▁dress": 2632, + "▁hat": 2633, + "▁terri": 2634, + "▁port": 2635, + "▁neither": 2636, + "▁court": 2637, + "▁seven": 2638, + "▁fight": 2639, + "▁princess": 2640, + "▁lived": 2641, + "▁view": 2642, + "▁immedi": 2643, + "▁self": 2644, + "▁var": 2645, + "▁hours": 2646, + "▁mill": 2647, + "▁sol": 2648, + "▁exam": 2649, + "▁tried": 2650, + "▁won't": 2651, + "▁entered": 2652, + "▁disp": 2653, + "to": 2654, + "ric": 2655, + "▁carry": 2656, + "▁import": 2657, + "▁ang": 2658, + "ze": 2659, + "ony": 2660, + "▁danger": 2661, + "ledge": 2662, + "▁offic": 2663, + "▁cause": 2664, + "▁none": 2665, + "▁forward": 2666, + "▁uncle": 2667, + "▁tor": 2668, + "▁det": 2669, + "ask": 2670, + "▁len": 2671, + "▁further": 2672, + "▁pay": 2673, + "▁added": 2674, + "▁front": 2675, + "ror": 2676, + "▁ge": 2677, + "▁particular": 2678, + "▁deal": 2679, + "▁prot": 2680, + "▁led": 2681, + "▁acqu": 2682, + "▁pray": 2683, + "▁eff": 2684, + "▁happened": 2685, + "▁chief": 2686, + "lect": 2687, + "▁walked": 2688, + "▁later": 2689, + "▁joy": 2690, + "iar": 2691, + "day": 2692, + "▁ord": 2693, + "▁alth": 2694, + "▁comfort": 2695, + "▁prob": 2696, + "▁maj": 2697, + "▁affect": 2698, + "▁public": 2699, + "▁bene": 2700, + "ening": 2701, + "▁although": 2702, + "gr": 2703, + "▁sho": 2704, + "▁fig": 2705, + "resh": 2706, + "▁fail": 2707, + "uct": 2708, + "ug": 2709, + "ality": 2710, + "▁mem": 2711, + "▁seems": 2712, + "▁yourself": 2713, + "ship": 2714, + "ead": 2715, + "iam": 2716, + "▁number": 2717, + "side": 2718, + "▁ah": 2719, + "▁doing": 2720, + "▁living": 2721, + "arent": 2722, + "▁desp": 2723, + "ize": 2724, + "oof": 2725, + "▁field": 2726, + "▁received": 2727, + "▁shad": 2728, + "▁bey": 2729, + "▁beyond": 2730, + "▁phil": 2731, + "▁line": 2732, + "▁visit": 2733, + "inct": 2734, + "rig": 2735, + "▁party": 2736, + "▁garden": 2737, + "▁je": 2738, + "▁mouth": 2739, + "▁hall": 2740, + "▁queen": 2741, + "▁boat": 2742, + "▁bear": 2743, + "▁americ": 2744, + "ism": 2745, + "▁gentleman": 2746, + "▁vi": 2747, + "irt": 2748, + "uff": 2749, + "▁laid": 2750, + "raid": 2751, + "▁occasion": 2752, + "▁entire": 2753, + "▁age": 2754, + "▁sister": 2755, + "▁clot": 2756, + "▁repe": 2757, + "ously": 2758, + "▁prison": 2759, + "▁accom": 2760, + "▁whis": 2761, + "▁nearly": 2762, + "▁trees": 2763, + "iling": 2764, + "iff": 2765, + "▁eighteen": 2766, + "bit": 2767, + "wards": 2768, + "▁early": 2769, + "▁tal": 2770, + "▁lab": 2771, + "▁forth": 2772, + "ming": 2773, + "ones": 2774, + "▁med": 2775, + "▁try": 2776, + "▁da": 2777, + "ilt": 2778, + "anced": 2779, + "▁princi": 2780, + "▁enem": 2781, + "▁thinking": 2782, + "▁chance": 2783, + "where": 2784, + "▁cre": 2785, + "▁minutes": 2786, + "▁anx": 2787, + "▁mary": 2788, + "▁pict": 2789, + "▁wait": 2790, + "▁vill": 2791, + "▁stren": 2792, + "▁afraid": 2793, + "▁crow": 2794, + "▁smile": 2795, + "▁late": 2796, + "▁england": 2797, + "▁pleasure": 2798, + "▁aunt": 2799, + "▁news": 2800, + "▁wis": 2801, + "▁fle": 2802, + "▁seeing": 2803, + "▁super": 2804, + "▁faith": 2805, + "▁rob": 2806, + "iment": 2807, + "oint": 2808, + "▁bill": 2809, + "lling": 2810, + "▁neigh": 2811, + "▁trouble": 2812, + "▁silence": 2813, + "▁plain": 2814, + "▁there's": 2815, + "aret": 2816, + "pend": 2817, + "▁exclaimed": 2818, + "rench": 2819, + "gy": 2820, + "▁miles": 2821, + "ply": 2822, + "▁glass": 2823, + "▁drew": 2824, + "▁neighb": 2825, + "els": 2826, + "▁mine": 2827, + "▁pract": 2828, + "▁heavy": 2829, + "▁standing": 2830, + "▁sevent": 2831, + "▁shar": 2832, + "▁change": 2833, + "▁necessary": 2834, + "▁chap": 2835, + "▁purpose": 2836, + "▁inqu": 2837, + "▁natural": 2838, + "▁deter": 2839, + "icked": 2840, + "▁bott": 2841, + "▁hardly": 2842, + "▁bell": 2843, + "▁top": 2844, + "▁caught": 2845, + "fered": 2846, + "wh": 2847, + "ives": 2848, + "ounded": 2849, + "▁auth": 2850, + "▁circum": 2851, + "▁fing": 2852, + "▁stopped": 2853, + "uc": 2854, + "▁wit": 2855, + "ament": 2856, + "▁opin": 2857, + "▁av": 2858, + "▁priv": 2859, + "aining": 2860, + "▁instead": 2861, + "rupt": 2862, + "▁grew": 2863, + "▁loved": 2864, + "▁island": 2865, + "▁knight": 2866, + "▁ago": 2867, + "▁length": 2868, + "▁inn": 2869, + "▁peace": 2870, + "ls": 2871, + "inary": 2872, + "ior": 2873, + "ues": 2874, + "▁third": 2875, + "ush": 2876, + "▁beauty": 2877, + "▁hig": 2878, + "▁he's": 2879, + "the": 2880, + "form": 2881, + "head": 2882, + "ically": 2883, + "asp": 2884, + "ancy": 2885, + "▁determ": 2886, + "▁straight": 2887, + "▁cra": 2888, + "ining": 2889, + "pper": 2890, + "ler": 2891, + "▁infl": 2892, + "▁thor": 2893, + "▁convers": 2894, + "▁besides": 2895, + "▁position": 2896, + "▁thirty": 2897, + "▁den": 2898, + "rage": 2899, + "▁attention": 2900, + "ma": 2901, + "▁conv": 2902, + "ager": 2903, + "▁hist": 2904, + "ored": 2905, + "▁comes": 2906, + "aged": 2907, + "▁force": 2908, + "▁sitting": 2909, + "▁please": 2910, + "tend": 2911, + "iter": 2912, + "▁whatever": 2913, + "▁inform": 2914, + "▁hop": 2915, + "▁chair": 2916, + "▁build": 2917, + "▁bab": 2918, + "ustom": 2919, + "▁girls": 2920, + "▁rom": 2921, + "▁french": 2922, + "▁struck": 2923, + "▁pull": 2924, + "▁ast": 2925, + "▁lie": 2926, + "▁wrong": 2927, + "▁knowledge": 2928, + "▁grace": 2929, + "▁scarce": 2930, + "ghed": 2931, + "▁resol": 2932, + "▁watch": 2933, + "▁thoughts": 2934, + "▁rid": 2935, + "▁attempt": 2936, + "▁fifty": 2937, + "▁rap": 2938, + "▁box": 2939, + "hood": 2940, + "▁getting": 2941, + "▁ver": 2942, + "▁fat": 2943, + "▁company": 2944, + "▁arr": 2945, + "▁crowd": 2946, + "▁burn": 2947, + "▁slight": 2948, + "▁class": 2949, + "▁south": 2950, + "▁die": 2951, + "▁exact": 2952, + "▁drink": 2953, + "▁enj": 2954, + "▁thick": 2955, + "▁dinner": 2956, + "▁save": 2957, + "▁maid": 2958, + "▁plan": 2959, + "▁saint": 2960, + "▁immediately": 2961, + "iers": 2962, + "▁born": 2963, + "ius": 2964, + "▁rev": 2965, + "▁tears": 2966, + "ists": 2967, + "▁treat": 2968, + "usion": 2969, + "▁meant": 2970, + "▁boys": 2971, + "pping": 2972, + "▁slowly": 2973, + "▁incl": 2974, + "▁lim": 2975, + "▁died": 2976, + "iced": 2977, + "▁compl": 2978, + "▁fool": 2979, + "▁forest": 2980, + "▁sugg": 2981, + "▁post": 2982, + "▁accept": 2983, + "▁result": 2984, + "▁author": 2985, + "ndon": 2986, + "ceive": 2987, + "▁suggest": 2988, + "cient": 2989, + "▁stone": 2990, + "▁fright": 2991, + "▁paper": 2992, + "▁conse": 2993, + "▁jour": 2994, + "▁ty": 2995, + "▁enc": 2996, + "▁quickly": 2997, + "▁contr": 2998, + "▁youth": 2999, + "▁send": 3000, + "▁vict": 3001, + "ified": 3002, + "▁belong": 3003, + "▁warm": 3004, + "▁fix": 3005, + "▁imposs": 3006, + "▁beside": 3007, + "▁er": 3008, + "▁tone": 3009, + "▁camp": 3010, + "▁desire": 3011, + "▁bound": 3012, + "▁makes": 3013, + "▁margaret": 3014, + "▁north": 3015, + "▁brown": 3016, + "▁moon": 3017, + "▁lips": 3018, + "▁placed": 3019, + "val": 3020, + "▁circumst": 3021, + "▁food": 3022, + "▁filled": 3023, + "ics": 3024, + "ift": 3025, + "ann": 3026, + "▁london": 3027, + "▁distance": 3028, + "ging": 3029, + "▁strength": 3030, + "▁id": 3031, + "▁floor": 3032, + "▁forget": 3033, + "▁obl": 3034, + "▁mid": 3035, + "ries": 3036, + "itions": 3037, + "bs": 3038, + "▁spring": 3039, + "▁you're": 3040, + "▁viol": 3041, + "▁jack": 3042, + "▁pock": 3043, + "ooks": 3044, + "▁following": 3045, + "▁sac": 3046, + "▁remained": 3047, + "arch": 3048, + "▁grow": 3049, + "▁snow": 3050, + "▁government": 3051, + "▁ball": 3052, + "▁hors": 3053, + "▁nar": 3054, + "aded": 3055, + "▁broken": 3056, + "▁laughed": 3057, + "▁descri": 3058, + "▁safe": 3059, + "itten": 3060, + "ively": 3061, + "▁profess": 3062, + "▁o'": 3063, + "amed": 3064, + "▁depart": 3065, + "▁easy": 3066, + "oured": 3067, + "▁und": 3068, + "▁coun": 3069, + "▁thank": 3070, + "▁knows": 3071, + "▁waiting": 3072, + "dom": 3073, + "ats": 3074, + "▁ger": 3075, + "▁van": 3076, + "▁anne": 3077, + "▁horses": 3078, + "ugg": 3079, + "▁dread": 3080, + "▁une": 3081, + "ges": 3082, + "acy": 3083, + "▁proceed": 3084, + "▁gaz": 3085, + "▁shout": 3086, + "▁started": 3087, + "ented": 3088, + "▁complete": 3089, + "ope": 3090, + "▁gall": 3091, + "dered": 3092, + "▁wide": 3093, + "ires": 3094, + "▁neck": 3095, + "asure": 3096, + "isted": 3097, + "▁service": 3098, + "▁piece": 3099, + "cially": 3100, + "ences": 3101, + "▁sail": 3102, + "▁palace": 3103, + "erv": 3104, + "▁guard": 3105, + "▁doll": 3106, + "▁talking": 3107, + "▁man's": 3108, + "▁lift": 3109, + "▁grave": 3110, + "▁week": 3111, + "let": 3112, + "▁impossible": 3113, + "▁effort": 3114, + "▁imm": 3115, + "▁army": 3116, + "well": 3117, + "▁difficult": 3118, + "und": 3119, + "▁fresh": 3120, + "▁fun": 3121, + "reme": 3122, + "▁stop": 3123, + "▁mess": 3124, + "▁gar": 3125, + "▁deg": 3126, + "▁incre": 3127, + "▁corner": 3128, + "▁society": 3129, + "▁weak": 3130, + "▁shut": 3131, + "▁hy": 3132, + "▁proper": 3133, + "aching": 3134, + "▁cloud": 3135, + "iddle": 3136, + "ivid": 3137, + "▁demand": 3138, + "▁nine": 3139, + "▁sit": 3140, + "▁recogn": 3141, + "▁beat": 3142, + "uss": 3143, + "▁turning": 3144, + "▁sky": 3145, + "▁opinion": 3146, + "▁single": 3147, + "pic": 3148, + "▁fly": 3149, + "▁lang": 3150, + "▁mass": 3151, + "cell": 3152, + "▁outside": 3153, + "▁kiss": 3154, + "▁trust": 3155, + "▁occup": 3156, + "▁evil": 3157, + "▁below": 3158, + "▁appearance": 3159, + "uit": 3160, + "▁aftern": 3161, + "▁glo": 3162, + "▁gun": 3163, + "▁west": 3164, + "ency": 3165, + "par": 3166, + "▁showed": 3167, + "▁conversation": 3168, + "ises": 3169, + "▁conn": 3170, + "▁couldn't": 3171, + "▁running": 3172, + "▁mention": 3173, + "▁greater": 3174, + "▁music": 3175, + "▁breath": 3176, + "ases": 3177, + "▁nin": 3178, + "▁ant": 3179, + "arer": 3180, + "▁morrow": 3181, + "▁bank": 3182, + "▁espe": 3183, + "▁peter": 3184, + "ork": 3185, + "cial": 3186, + "▁presence": 3187, + "▁battle": 3188, + "▁winter": 3189, + "hered": 3190, + "▁probably": 3191, + "▁clothes": 3192, + "▁fash": 3193, + "▁mark": 3194, + "▁wished": 3195, + "vere": 3196, + "▁coll": 3197, + "▁emb": 3198, + "▁kne": 3199, + "▁married": 3200, + "▁arrived": 3201, + "▁pun": 3202, + "▁event": 3203, + "ushed": 3204, + "▁suffic": 3205, + "▁eager": 3206, + "▁former": 3207, + "▁giving": 3208, + "▁pop": 3209, + "▁sand": 3210, + "▁neg": 3211, + "▁usual": 3212, + "▁relig": 3213, + "▁simple": 3214, + "▁sym": 3215, + "itation": 3216, + "▁gro": 3217, + "ories": 3218, + "▁moved": 3219, + "▁months": 3220, + "▁speaking": 3221, + "▁pet": 3222, + "▁silent": 3223, + "▁cab": 3224, + "▁mountain": 3225, + "▁expression": 3226, + "gar": 3227, + "▁covered": 3228, + "▁hunt": 3229, + "▁afternoon": 3230, + "aped": 3231, + "▁occur": 3232, + "rief": 3233, + "▁states": 3234, + "▁z": 3235, + "str": 3236, + "▁loc": 3237, + "light": 3238, + "▁shore": 3239, + "che": 3240, + "▁easily": 3241, + "▁pale": 3242, + "unity": 3243, + "▁remark": 3244, + "▁phys": 3245, + "▁beginning": 3246, + "▁duty": 3247, + "▁chapter": 3248, + "▁influ": 3249, + "cho": 3250, + "▁concl": 3251, + "amb": 3252, + "▁instant": 3253, + "▁polit": 3254, + "zz": 3255, + "▁enjoy": 3256, + "▁sick": 3257, + "▁remain": 3258, + "uel": 3259, + "▁stream": 3260, + "▁figure": 3261, + "ald": 3262, + "▁tur": 3263, + "▁path": 3264, + "▁vol": 3265, + "▁minute": 3266, + "▁pleasant": 3267, + "▁scarcely": 3268, + "▁conscious": 3269, + "▁terrible": 3270, + "▁kill": 3271, + "▁raised": 3272, + "▁fashion": 3273, + "▁twel": 3274, + "yal": 3275, + "▁leaving": 3276, + "▁twelve": 3277, + "ature": 3278, + "▁fut": 3279, + "▁threw": 3280, + "▁star": 3281, + "▁flowers": 3282, + "olog": 3283, + "▁trying": 3284, + "rib": 3285, + "▁sword": 3286, + "▁tall": 3287, + "▁marry": 3288, + "▁ben": 3289, + "▁expected": 3290, + "▁according": 3291, + "▁forty": 3292, + "▁stick": 3293, + "inal": 3294, + "▁guess": 3295, + "▁silver": 3296, + "▁iron": 3297, + "▁oblig": 3298, + "▁office": 3299, + "▁rapid": 3300, + "▁ladies": 3301, + "▁especially": 3302, + "ipped": 3303, + "orted": 3304, + "▁bread": 3305, + "ech": 3306, + "▁tender": 3307, + "orth": 3308, + "▁learned": 3309, + "▁books": 3310, + "▁isn't": 3311, + "▁surprise": 3312, + "▁write": 3313, + "▁purs": 3314, + "pered": 3315, + "▁written": 3316, + "▁killed": 3317, + "▁consequ": 3318, + "▁exh": 3319, + "▁places": 3320, + "▁condition": 3321, + "▁direction": 3322, + "▁cho": 3323, + "ulty": 3324, + "jo": 3325, + "mit": 3326, + "▁entirely": 3327, + "tering": 3328, + "▁enter": 3329, + "▁action": 3330, + "wise": 3331, + "▁suc": 3332, + "ibly": 3333, + "▁happiness": 3334, + "▁decided": 3335, + "▁golden": 3336, + "▁langu": 3337, + "eness": 3338, + "▁note": 3339, + "▁unless": 3340, + "uous": 3341, + "▁fal": 3342, + "aled": 3343, + "▁you'll": 3344, + "▁wonderful": 3345, + "ounds": 3346, + "ume": 3347, + "'re": 3348, + "▁shook": 3349, + "er's": 3350, + "oop": 3351, + "onel": 3352, + "▁perfectly": 3353, + "▁geor": 3354, + "ndered": 3355, + "▁broad": 3356, + "atic": 3357, + "▁closed": 3358, + "a's": 3359, + "▁spot": 3360, + "tended": 3361, + "▁latter": 3362, + "▁steps": 3363, + "▁merely": 3364, + "▁history": 3365, + "fer": 3366, + "▁wise": 3367, + "ishing": 3368, + "osing": 3369, + "▁middle": 3370, + "idered": 3371, + "▁understood": 3372, + "▁enemy": 3373, + "▁sole": 3374, + "llig": 3375, + "▁jew": 3376, + "▁simply": 3377, + "gan": 3378, + "▁conduct": 3379, + "▁tast": 3380, + "▁board": 3381, + "▁sav": 3382, + "▁wouldn't": 3383, + "▁shot": 3384, + "▁reply": 3385, + "▁changed": 3386, + "mn": 3387, + "▁grass": 3388, + "▁finally": 3389, + "▁admir": 3390, + "ital": 3391, + "▁sharp": 3392, + "itch": 3393, + "▁fortune": 3394, + "▁summer": 3395, + "▁experience": 3396, + "▁succeed": 3397, + "gress": 3398, + "uted": 3399, + "▁orig": 3400, + "retched": 3401, + "▁journey": 3402, + "▁excell": 3403, + "▁observed": 3404, + "ax": 3405, + "▁afterwards": 3406, + "fast": 3407, + "sy": 3408, + "▁bow": 3409, + "▁flat": 3410, + "▁persons": 3411, + "▁lean": 3412, + "▁earn": 3413, + "▁broke": 3414, + "▁mir": 3415, + "▁fit": 3416, + "osp": 3417, + "▁marriage": 3418, + "▁repres": 3419, + "io": 3420, + "▁lying": 3421, + "unk": 3422, + "▁trave": 3423, + "▁situ": 3424, + "▁listen": 3425, + "▁acquaint": 3426, + "▁ring": 3427, + "cience": 3428, + "▁faint": 3429, + "olute": 3430, + "▁calm": 3431, + "bered": 3432, + "▁lives": 3433, + "▁escape": 3434, + "▁beneath": 3435, + "ouses": 3436, + "▁clim": 3437, + "▁bless": 3438, + "▁repeated": 3439, + "▁pocket": 3440, + "ests": 3441, + "▁tail": 3442, + "▁passion": 3443, + "▁dick": 3444, + "▁ven": 3445, + "oses": 3446, + "clock": 3447, + "▁mut": 3448, + "▁becom": 3449, + "▁oper": 3450, + "▁o'clock": 3451, + "▁fish": 3452, + "▁lou": 3453, + "semb": 3454, + "▁prev": 3455, + "▁allowed": 3456, + "▁famil": 3457, + "hel": 3458, + "▁gate": 3459, + "▁spite": 3460, + "ivers": 3461, + "▁health": 3462, + "ission": 3463, + "▁ign": 3464, + "▁reach": 3465, + "▁cand": 3466, + "▁rain": 3467, + "▁empl": 3468, + "▁ban": 3469, + "▁strugg": 3470, + "▁firm": 3471, + "▁bitter": 3472, + "▁sorry": 3473, + "bing": 3474, + "▁father's": 3475, + "▁temper": 3476, + "▁madame": 3477, + "ples": 3478, + "▁furn": 3479, + "▁future": 3480, + "umed": 3481, + "▁nice": 3482, + "▁separ": 3483, + "▁presently": 3484, + "▁circumstances": 3485, + "▁connect": 3486, + "iding": 3487, + "▁sett": 3488, + "kes": 3489, + "▁loud": 3490, + "▁worse": 3491, + "▁wand": 3492, + "▁spread": 3493, + "▁i'd": 3494, + "▁letters": 3495, + "▁yellow": 3496, + "▁magn": 3497, + "▁passing": 3498, + "▁kit": 3499, + "▁pleased": 3500, + "▁darkness": 3501, + "▁remar": 3502, + "idden": 3503, + "come": 3504, + "▁tea": 3505, + "▁civ": 3506, + "▁apart": 3507, + "▁disappe": 3508, + "▁important": 3509, + "▁legs": 3510, + "▁nation": 3511, + "▁delic": 3512, + "▁dressed": 3513, + "▁game": 3514, + "▁walls": 3515, + "ec": 3516, + "▁dry": 3517, + "▁virt": 3518, + "▁dim": 3519, + "idently": 3520, + "rel": 3521, + "▁rub": 3522, + "▁absolute": 3523, + "▁blind": 3524, + "▁discovered": 3525, + "▁exactly": 3526, + "▁dam": 3527, + "otten": 3528, + "▁sorrow": 3529, + "my": 3530, + "▁cost": 3531, + "ference": 3532, + "▁employ": 3533, + "velop": 3534, + "▁cous": 3535, + "▁beast": 3536, + "▁spec": 3537, + "▁opport": 3538, + "▁ears": 3539, + "▁dropped": 3540, + "▁subst": 3541, + "▁chee": 3542, + "▁protect": 3543, + "ils": 3544, + "▁smiled": 3545, + "ina": 3546, + "▁resp": 3547, + "▁promise": 3548, + "▁bag": 3549, + "▁host": 3550, + "urs": 3551, + "▁creature": 3552, + "▁notice": 3553, + "▁knowing": 3554, + "▁heads": 3555, + "▁concer": 3556, + "▁seat": 3557, + "ishment": 3558, + "▁individ": 3559, + "▁existence": 3560, + "▁determined": 3561, + "lend": 3562, + "▁storm": 3563, + "roy": 3564, + "ours": 3565, + "▁conce": 3566, + "anging": 3567, + "▁fixed": 3568, + "▁press": 3569, + "▁major": 3570, + "oved": 3571, + "▁ves": 3572, + "iod": 3573, + "▁learn": 3574, + "▁motion": 3575, + "▁empt": 3576, + "▁leaves": 3577, + "▁bottom": 3578, + "▁arg": 3579, + "iety": 3580, + "▁nobody": 3581, + "▁pros": 3582, + "que": 3583, + "▁utter": 3584, + "▁pick": 3585, + "acked": 3586, + "▁intellig": 3587, + "▁hes": 3588, + "▁stir": 3589, + "▁prevent": 3590, + "▁assist": 3591, + "▁dom": 3592, + "▁disg": 3593, + "▁advant": 3594, + "erable": 3595, + "▁vent": 3596, + "ument": 3597, + "▁tired": 3598, + "rect": 3599, + "ashed": 3600, + "action": 3601, + "▁considered": 3602, + "▁wrote": 3603, + "▁houses": 3604, + "▁suit": 3605, + "▁cheer": 3606, + "▁castle": 3607, + "▁pra": 3608, + "▁perform": 3609, + "ancing": 3610, + "▁clean": 3611, + "ruct": 3612, + "▁stro": 3613, + "▁frequ": 3614, + "▁drawing": 3615, + "▁luck": 3616, + "▁habit": 3617, + "idge": 3618, + "ell": 3619, + "▁ones": 3620, + "▁noble": 3621, + "▁splend": 3622, + "▁honor": 3623, + "zen": 3624, + "▁paid": 3625, + "▁speech": 3626, + "▁estab": 3627, + "▁ur": 3628, + "istr": 3629, + "▁individual": 3630, + "inite": 3631, + "▁vall": 3632, + "▁birds": 3633, + "rodu": 3634, + "▁dar": 3635, + "▁allow": 3636, + "▁confess": 3637, + "▁impress": 3638, + "▁propert": 3639, + "▁jane": 3640, + "▁song": 3641, + "▁various": 3642, + "▁narrow": 3643, + "▁moder": 3644, + "▁believed": 3645, + "ays": 3646, + "▁extra": 3647, + "▁pure": 3648, + "arily": 3649, + "▁period": 3650, + "▁shadow": 3651, + "▁somewh": 3652, + "▁mal": 3653, + "▁cott": 3654, + "▁extreme": 3655, + "▁judge": 3656, + "▁village": 3657, + "▁royal": 3658, + "▁somewhat": 3659, + "▁lower": 3660, + "▁ham": 3661, + "▁agree": 3662, + "▁remembered": 3663, + "▁aston": 3664, + "enth": 3665, + "▁declared": 3666, + "pan": 3667, + "▁train": 3668, + "▁parts": 3669, + "▁colonel": 3670, + "amber": 3671, + "▁breakfast": 3672, + "▁surely": 3673, + "▁sin": 3674, + "ayed": 3675, + "▁scene": 3676, + "go": 3677, + "▁greatest": 3678, + "▁influence": 3679, + "▁custom": 3680, + "itary": 3681, + "▁animal": 3682, + "▁sake": 3683, + "▁mod": 3684, + "▁soldiers": 3685, + "iny": 3686, + "▁ancient": 3687, + "▁drawn": 3688, + "▁evidently": 3689, + "▁ways": 3690, + "▁looks": 3691, + "▁revol": 3692, + "ator": 3693, + "anted": 3694, + "▁reflect": 3695, + "▁picture": 3696, + "▁likely": 3697, + "▁shr": 3698, + "▁laws": 3699, + "▁holding": 3700, + "▁difficulty": 3701, + "▁inj": 3702, + "▁mel": 3703, + "▁courage": 3704, + "nes": 3705, + "▁mort": 3706, + "▁troub": 3707, + "▁burst": 3708, + "▁angry": 3709, + "▁proud": 3710, + "gged": 3711, + "▁spoken": 3712, + "ision": 3713, + "▁desert": 3714, + "ption": 3715, + "▁comb": 3716, + "▁apparent": 3717, + "ring": 3718, + "▁watched": 3719, + "na": 3720, + "▁east": 3721, + "▁shop": 3722, + "▁agre": 3723, + "▁private": 3724, + "esty": 3725, + "▁jul": 3726, + "▁finished": 3727, + "▁anxious": 3728, + "otion": 3729, + "▁fifteen": 3730, + "▁social": 3731, + "under": 3732, + "▁dism": 3733, + "▁touch": 3734, + "▁wine": 3735, + "▁attack": 3736, + "▁ideas": 3737, + "▁george": 3738, + "af": 3739, + "rer": 3740, + "oose": 3741, + "▁space": 3742, + "▁scr": 3743, + "▁inside": 3744, + "▁gentlemen": 3745, + "▁civil": 3746, + "iently": 3747, + "▁formed": 3748, + "▁fol": 3749, + "▁goes": 3750, + "▁you've": 3751, + "▁thin": 3752, + "▁surf": 3753, + "▁servant": 3754, + "▁bal": 3755, + "▁cover": 3756, + "▁ourselves": 3757, + "▁fallen": 3758, + "▁henry": 3759, + "▁lot": 3760, + "ium": 3761, + "▁advent": 3762, + "▁carriage": 3763, + "▁baby": 3764, + "▁elect": 3765, + "▁tong": 3766, + "▁appre": 3767, + "▁everybody": 3768, + "uded": 3769, + "▁commun": 3770, + "▁ine": 3771, + "itive": 3772, + "▁waited": 3773, + "cise": 3774, + "▁grou": 3775, + "het": 3776, + "▁vain": 3777, + "▁impro": 3778, + "▁favor": 3779, + "erial": 3780, + "▁speed": 3781, + "▁windows": 3782, + "▁carefully": 3783, + "▁ice": 3784, + "▁noise": 3785, + "▁hero": 3786, + "▁jim": 3787, + "▁william": 3788, + "▁pecul": 3789, + "▁promised": 3790, + "▁walking": 3791, + "▁forgotten": 3792, + "▁obliged": 3793, + "▁earnest": 3794, + "▁main": 3795, + "▁lose": 3796, + "▁glance": 3797, + "▁vessel": 3798, + "▁grad": 3799, + "▁thro": 3800, + "▁bod": 3801, + "▁shoulder": 3802, + "▁meth": 3803, + "▁animals": 3804, + "▁noticed": 3805, + "ables": 3806, + "▁peculiar": 3807, + "▁fier": 3808, + "▁pot": 3809, + "▁quietly": 3810, + "▁cup": 3811, + "▁serious": 3812, + "▁tremb": 3813, + "▁generally": 3814, + "▁american": 3815, + "▁symp": 3816, + "ral": 3817, + "▁don": 3818, + "▁france": 3819, + "iction": 3820, + "▁property": 3821, + "▁shoulders": 3822, + "▁stranger": 3823, + "▁san": 3824, + "▁cow": 3825, + "▁what's": 3826, + "▁dust": 3827, + "▁affection": 3828, + "▁handsome": 3829, + "▁higher": 3830, + "iant": 3831, + "nday": 3832, + "▁wel": 3833, + "▁poet": 3834, + "▁sla": 3835, + "▁distinct": 3836, + "▁mam": 3837, + "▁pier": 3838, + "acing": 3839, + "ague": 3840, + "▁grown": 3841, + "uly": 3842, + "▁d'": 3843, + "▁chamber": 3844, + "▁desce": 3845, + "▁murm": 3846, + "stem": 3847, + "▁personal": 3848, + "▁fancy": 3849, + "▁offered": 3850, + "osite": 3851, + "onsie": 3852, + "▁built": 3853, + "▁edge": 3854, + "▁whispered": 3855, + "▁skin": 3856, + "▁pieces": 3857, + "itated": 3858, + "cher": 3859, + "osity": 3860, + "▁pit": 3861, + "▁contro": 3862, + "▁faces": 3863, + "▁spent": 3864, + "▁interrupt": 3865, + "how": 3866, + "isters": 3867, + "▁butter": 3868, + "▁develop": 3869, + "▁unk": 3870, + "hip": 3871, + "▁heat": 3872, + "▁fond": 3873, + "▁coat": 3874, + "▁touched": 3875, + "▁hol": 3876, + "ingu": 3877, + "▁pi": 3878, + "▁race": 3879, + "▁jump": 3880, + "▁surprised": 3881, + "oted": 3882, + "▁defe": 3883, + "enced": 3884, + "▁wasn't": 3885, + "▁wear": 3886, + "andon": 3887, + "▁fan": 3888, + "acher": 3889, + "▁arch": 3890, + "▁educ": 3891, + "▁brave": 3892, + "athered": 3893, + "▁eld": 3894, + "▁wealth": 3895, + "▁system": 3896, + "▁german": 3897, + "▁false": 3898, + "wood": 3899, + "▁dare": 3900, + "aked": 3901, + "▁cousin": 3902, + "▁fer": 3903, + "key": 3904, + "▁lin": 3905, + "▁intellect": 3906, + "▁prepared": 3907, + "▁fingers": 3908, + "▁surr": 3909, + "▁mountains": 3910, + "ipp": 3911, + "▁opportunity": 3912, + "aff": 3913, + "▁bare": 3914, + "▁dor": 3915, + "▁introdu": 3916, + "▁collect": 3917, + "▁lovely": 3918, + "▁rag": 3919, + "▁crown": 3920, + "▁matters": 3921, + "▁companion": 3922, + "▁weather": 3923, + "▁alar": 3924, + "▁innoc": 3925, + "▁ris": 3926, + "▁mix": 3927, + "▁lake": 3928, + "▁store": 3929, + "▁unh": 3930, + "▁meaning": 3931, + "▁memory": 3932, + "over": 3933, + "▁band": 3934, + "leep": 3935, + "▁finding": 3936, + "ee": 3937, + "▁charge": 3938, + "▁grat": 3939, + "▁attract": 3940, + "▁gray": 3941, + "▁quarter": 3942, + "▁avo": 3943, + "▁greatly": 3944, + "▁mach": 3945, + "▁inh": 3946, + "▁asleep": 3947, + "▁paris": 3948, + "▁dav": 3949, + "▁alto": 3950, + "▁offer": 3951, + "▁opposite": 3952, + "ounced": 3953, + "erve": 3954, + "▁breast": 3955, + "nown": 3956, + "▁reading": 3957, + "▁altogether": 3958, + "▁writing": 3959, + "pected": 3960, + "▁degree": 3961, + "cing": 3962, + "night": 3963, + "▁exec": 3964, + "fortun": 3965, + "▁stat": 3966, + "▁feelings": 3967, + "▁hath": 3968, + "▁cook": 3969, + "▁rail": 3970, + "▁honour": 3971, + "ding": 3972, + "▁fate": 3973, + "▁por": 3974, + "▁frank": 3975, + "▁meeting": 3976, + "▁rough": 3977, + "▁alive": 3978, + "▁hide": 3979, + "ites": 3980, + "ilar": 3981, + "▁blow": 3982, + "▁cruel": 3983, + "raph": 3984, + "▁hurt": 3985, + "▁loss": 3986, + "▁thrown": 3987, + "▁caused": 3988, + "▁we'll": 3989, + "▁serve": 3990, + "▁duke": 3991, + "▁bent": 3992, + "▁united": 3993, + "▁seek": 3994, + "▁kingdom": 3995, + "▁situation": 3996, + "▁empty": 3997, + "ners": 3998, + "▁due": 3999, + "▁liked": 4000, + "▁swift": 4001, + "▁opening": 4002, + "▁servants": 4003, + "chen": 4004, + "oura": 4005, + "▁gh": 4006, + "▁suspic": 4007, + "▁freed": 4008, + "ointed": 4009, + "▁surface": 4010, + "cil": 4011, + "▁questions": 4012, + "▁ess": 4013, + "▁curious": 4014, + "▁constit": 4015, + "▁accompan": 4016, + "▁christian": 4017, + "▁fill": 4018, + "arest": 4019, + "▁satisfied": 4020, + "ron": 4021, + "▁sides": 4022, + "▁pity": 4023, + "▁reve": 4024, + "▁equal": 4025, + "▁height": 4026, + "▁ordered": 4027, + "osop": 4028, + "▁grey": 4029, + "▁listened": 4030, + "pet": 4031, + "▁rejo": 4032, + "▁capt": 4033, + "ibility": 4034, + "ob": 4035, + "▁mart": 4036, + "▁happen": 4037, + "▁hurried": 4038, + "▁dollars": 4039, + "▁language": 4040, + "▁ange": 4041, + "▁yours": 4042, + "▁supposed": 4043, + "▁laughing": 4044, + "▁settled": 4045, + "▁rode": 4046, + "▁perm": 4047, + "▁distingu": 4048, + "▁hurry": 4049, + "▁destroy": 4050, + "▁talked": 4051, + "▁lifted": 4052, + "ocr": 4053, + "▁square": 4054, + "▁value": 4055, + "▁taste": 4056, + "▁vast": 4057, + "▁king's": 4058, + "▁rul": 4059, + "▁roof": 4060, + "▁telling": 4061, + "▁study": 4062, + "▁ow": 4063, + "▁pan": 4064, + "▁bas": 4065, + "▁rising": 4066, + "▁sufficient": 4067, + "▁forced": 4068, + "▁rise": 4069, + "▁attend": 4070, + "▁philosop": 4071, + "▁nose": 4072, + "▁sixty": 4073, + "hest": 4074, + "▁pin": 4075, + "▁egg": 4076, + "▁amb": 4077, + "▁fault": 4078, + "bur": 4079, + "▁station": 4080, + "▁distur": 4081, + "▁regular": 4082, + "ille": 4083, + "▁pack": 4084, + "▁special": 4085, + "▁honest": 4086, + "▁building": 4087, + "▁season": 4088, + "▁shape": 4089, + "▁pride": 4090, + "▁smiling": 4091, + "like": 4092, + "▁orders": 4093, + "yn": 4094, + "▁woods": 4095, + "▁accompl": 4096, + "con": 4097, + "▁sam": 4098, + "▁usually": 4099, + "▁watching": 4100, + "▁sacri": 4101, + "erved": 4102, + "▁passage": 4103, + "▁material": 4104, + "▁valley": 4105, + "yr": 4106, + "▁stairs": 4107, + "▁libert": 4108, + "▁frightened": 4109, + "▁remarked": 4110, + "▁tit": 4111, + "▁wed": 4112, + "▁mistress": 4113, + "▁directly": 4114, + "▁suffer": 4115, + "▁gloom": 4116, + "▁lines": 4117, + "▁stock": 4118, + "▁justice": 4119, + "▁diam": 4120, + "ested": 4121, + "▁growing": 4122, + "▁doesn't": 4123, + "▁gathered": 4124, + "▁ordinary": 4125, + "uce": 4126, + "▁eur": 4127, + "▁unf": 4128, + "▁kitchen": 4129, + "▁threat": 4130, + "▁depend": 4131, + "▁weeks": 4132, + "▁despair": 4133, + "▁method": 4134, + "▁seized": 4135, + "▁discuss": 4136, + "▁exer": 4137, + "ify": 4138, + "▁flower": 4139, + "▁ignor": 4140, + "eer": 4141, + "ades": 4142, + "▁deb": 4143, + "eping": 4144, + "▁ale": 4145, + "▁yo": 4146, + "chief": 4147, + "▁supper": 4148, + "ik": 4149, + "▁bold": 4150, + "▁putting": 4151, + "▁nearer": 4152, + "uses": 4153, + "▁one's": 4154, + "▁ble": 4155, + "▁york": 4156, + "▁ende": 4157, + "▁affairs": 4158, + "▁soldier": 4159, + "▁contrary": 4160, + "▁moving": 4161, + "▁streets": 4162, + "▁bir": 4163, + "rance": 4164, + "hens": 4165, + "▁cit": 4166, + "icated": 4167, + "▁catch": 4168, + "▁imagine": 4169, + "eds": 4170, + "▁march": 4171, + "▁search": 4172, + "ara": 4173, + "▁receive": 4174, + "imate": 4175, + "▁monsie": 4176, + "▁twice": 4177, + "▁papa": 4178, + "▁monsieur": 4179, + "▁reck": 4180, + "min": 4181, + "ude": 4182, + "▁process": 4183, + "▁hole": 4184, + "aly": 4185, + "lin": 4186, + "▁cro": 4187, + "▁favour": 4188, + "▁dign": 4189, + "▁working": 4190, + "▁harm": 4191, + "▁europe": 4192, + "antic": 4193, + "▁proved": 4194, + "ocked": 4195, + "▁prove": 4196, + "▁cler": 4197, + "▁lod": 4198, + "ception": 4199, + "▁pulled": 4200, + "▁arth": 4201, + "▁authority": 4202, + "▁haven": 4203, + "▁jer": 4204, + "▁uns": 4205, + "▁movement": 4206, + "usted": 4207, + "▁engaged": 4208, + "▁brothers": 4209, + "▁advantage": 4210, + "lished": 4211, + "ole": 4212, + "▁arthur": 4213, + "▁aut": 4214, + "▁stones": 4215, + "▁farm": 4216, + "▁difference": 4217, + "▁fart": 4218, + "▁aside": 4219, + "▁mas": 4220, + "▁observ": 4221, + "▁hence": 4222, + "▁possession": 4223, + "▁hills": 4224, + "▁fortun": 4225, + "uls": 4226, + "ails": 4227, + "▁instance": 4228, + "▁she's": 4229, + "▁ol": 4230, + "▁holy": 4231, + "▁flew": 4232, + "ky": 4233, + "▁color": 4234, + "▁rate": 4235, + "▁doors": 4236, + "▁busy": 4237, + "set": 4238, + "▁address": 4239, + "▁familiar": 4240, + "▁weight": 4241, + "▁aware": 4242, + "▁played": 4243, + "▁sympath": 4244, + "lls": 4245, + "▁solemn": 4246, + "▁liter": 4247, + "▁test": 4248, + "▁emper": 4249, + "▁indian": 4250, + "▁distant": 4251, + "▁interesting": 4252, + "▁bull": 4253, + "▁thorough": 4254, + "▁wore": 4255, + "▁worked": 4256, + "▁explained": 4257, + "▁excellent": 4258, + "▁splendid": 4259, + "▁tongue": 4260, + "▁di": 4261, + "▁pard": 4262, + "▁named": 4263, + "▁shame": 4264, + "▁franc": 4265, + "▁spect": 4266, + "▁moments": 4267, + "bers": 4268, + "▁wil": 4269, + "▁myster": 4270, + "▁seated": 4271, + "▁instantly": 4272, + "▁similar": 4273, + "▁endeav": 4274, + "▁measure": 4275, + "▁naturally": 4276, + "nds": 4277, + "▁suf": 4278, + "▁amount": 4279, + "▁imper": 4280, + "▁dogs": 4281, + "itable": 4282, + "▁brit": 4283, + "▁necessity": 4284, + "rid": 4285, + "ulous": 4286, + "▁confidence": 4287, + "den": 4288, + "▁parent": 4289, + "▁wid": 4290, + "▁vir": 4291, + "▁neverthe": 4292, + "▁agreed": 4293, + "▁nevertheless": 4294, + "unch": 4295, + "▁hearing": 4296, + "▁takes": 4297, + "▁aug": 4298, + "▁univers": 4299, + "enance": 4300, + "▁unw": 4301, + "▁earl": 4302, + "▁keeping": 4303, + "▁drive": 4304, + "▁produced": 4305, + "▁aud": 4306, + "on's": 4307, + "▁names": 4308, + "agn": 4309, + "▁disappeared": 4310, + "▁throw": 4311, + "▁president": 4312, + "▁gods": 4313, + "▁magic": 4314, + "▁represent": 4315, + "▁unknown": 4316, + "por": 4317, + "▁terror": 4318, + "▁haven't": 4319, + "asc": 4320, + "▁support": 4321, + "▁smoke": 4322, + "▁wicked": 4323, + "ker": 4324, + "▁works": 4325, + "▁artic": 4326, + "▁dull": 4327, + "▁yester": 4328, + "▁falling": 4329, + "▁worthy": 4330, + "▁liberty": 4331, + "ulation": 4332, + "▁design": 4333, + "▁wants": 4334, + "▁evidence": 4335, + "▁companions": 4336, + "▁spirits": 4337, + "▁coast": 4338, + "▁mighty": 4339, + "▁particularly": 4340, + "▁witness": 4341, + "▁discover": 4342, + "▁sought": 4343, + "▁span": 4344, + "'ve": 4345, + "▁rare": 4346, + "▁officers": 4347, + "lv": 4348, + "zy": 4349, + "▁yesterday": 4350, + "vey": 4351, + "cent": 4352, + "▁powers": 4353, + "▁yield": 4354, + "▁cool": 4355, + "▁organ": 4356, + "▁amaz": 4357, + "▁pointed": 4358, + "ford": 4359, + "▁claim": 4360, + "▁content": 4361, + "▁possibly": 4362, + "▁terms": 4363, + "▁trium": 4364, + "▁officer": 4365, + "▁persu": 4366, + "▁ceased": 4367, + "▁drove": 4368, + "▁occurred": 4369, + "▁gree": 4370, + "▁lies": 4371, + "▁otherwise": 4372, + "▁emperor": 4373, + "▁hom": 4374, + "▁stars": 4375, + "▁knees": 4376, + "▁triumph": 4377, + "ruction": 4378, + "▁paused": 4379, + "oms": 4380, + "▁required": 4381, + "▁failed": 4382, + "▁unhapp": 4383, + "▁diamond": 4384, + "▁rat": 4385, + "▁ali": 4386, + "▁double": 4387, + "▁forms": 4388, + "▁gives": 4389, + "▁finger": 4390, + "race": 4391, + "▁pair": 4392, + "alous": 4393, + "illa": 4394, + "▁bob": 4395, + "▁eliz": 4396, + "▁travel": 4397, + "▁carrying": 4398, + "▁gle": 4399, + "iles": 4400, + "▁teeth": 4401, + "esh": 4402, + "▁shown": 4403, + "▁fruit": 4404, + "▁waters": 4405, + "▁entertain": 4406, + "▁hearts": 4407, + "umn": 4408, + "▁labor": 4409, + "in't": 4410, + "▁pill": 4411, + "▁ener": 4412, + "soci": 4413, + "▁example": 4414, + "▁upper": 4415, + "▁foreign": 4416, + "▁moral": 4417, + "▁softly": 4418, + "rose": 4419, + "▁huge": 4420, + "▁charles": 4421, + "▁priest": 4422, + "▁excit": 4423, + "▁fet": 4424, + "▁mother's": 4425, + "▁possessed": 4426, + "▁cases": 4427, + "▁report": 4428, + "▁milk": 4429, + "▁affair": 4430, + "▁principle": 4431, + "▁inhab": 4432, + "▁freedom": 4433, + "▁proof": 4434, + "▁intended": 4435, + "▁satisfaction": 4436, + "▁shouted": 4437, + "isc": 4438, + "▁plat": 4439, + "▁bask": 4440, + "ental": 4441, + "▁group": 4442, + "▁farther": 4443, + "asm": 4444, + "▁unfortun": 4445, + "▁unto": 4446, + "▁singing": 4447, + "▁arrange": 4448, + "▁religion": 4449, + "▁ber": 4450, + "▁rocks": 4451, + "▁seventeen": 4452, + "▁der": 4453, + "▁james": 4454, + "▁buy": 4455, + "▁succeeded": 4456, + "▁rooms": 4457, + "▁leading": 4458, + "▁majesty": 4459, + "▁events": 4460, + "▁dance": 4461, + "▁paint": 4462, + "▁gently": 4463, + "acle": 4464, + "▁tele": 4465, + "▁pardon": 4466, + "using": 4467, + "▁drop": 4468, + "father": 4469, + "▁invent": 4470, + "▁key": 4471, + "▁mentioned": 4472, + "▁seventy": 4473, + "▁ros": 4474, + "▁suffering": 4475, + "▁record": 4476, + "▁cabin": 4477, + "road": 4478, + "▁diss": 4479, + "ival": 4480, + "▁demanded": 4481, + "▁excitement": 4482, + "▁associ": 4483, + "▁progress": 4484, + "angers": 4485, + "▁curi": 4486, + "▁america": 4487, + "▁rule": 4488, + "▁bor": 4489, + "▁vig": 4490, + "lessly": 4491, + "▁clearly": 4492, + "▁bore": 4493, + "▁sheep": 4494, + "▁regret": 4495, + "▁neighbour": 4496, + "bly": 4497, + "iance": 4498, + "▁instinct": 4499, + "▁advice": 4500, + "▁awful": 4501, + "▁sen": 4502, + "▁fully": 4503, + "▁gather": 4504, + "▁papers": 4505, + "▁hidden": 4506, + "▁chest": 4507, + "▁birth": 4508, + "hy": 4509, + "pap": 4510, + "▁hither": 4511, + "▁stuff": 4512, + "▁impat": 4513, + "▁calling": 4514, + "▁fourth": 4515, + "▁dreadful": 4516, + "▁pos": 4517, + "▁grief": 4518, + "▁brill": 4519, + "▁powerful": 4520, + "▁presented": 4521, + "▁fairy": 4522, + "▁explain": 4523, + "▁shoot": 4524, + "▁prisoner": 4525, + "▁joined": 4526, + "▁afford": 4527, + "mond": 4528, + "attered": 4529, + "▁ing": 4530, + "iments": 4531, + "▁shel": 4532, + "▁prefer": 4533, + "▁considerable": 4534, + "▁obey": 4535, + "▁voices": 4536, + "▁interv": 4537, + "▁interested": 4538, + "▁virg": 4539, + "▁cred": 4540, + "▁card": 4541, + "▁ep": 4542, + "▁needed": 4543, + "▁pounds": 4544, + "▁conqu": 4545, + "▁clever": 4546, + "▁advanced": 4547, + "▁cord": 4548, + "ighed": 4549, + "▁undert": 4550, + "▁resolved": 4551, + "▁wag": 4552, + "istic": 4553, + "▁paul": 4554, + "▁excited": 4555, + "▁conditions": 4556, + "▁pictures": 4557, + "acious": 4558, + "▁shining": 4559, + "▁sunday": 4560, + "▁served": 4561, + "▁steam": 4562, + "▁police": 4563, + "▁sprang": 4564, + "sie": 4565, + "ora": 4566, + "ese": 4567, + "▁jes": 4568, + "▁nodd": 4569, + "▁salt": 4570, + "▁fields": 4571, + "▁cart": 4572, + "▁indians": 4573, + "▁fierce": 4574, + "dle": 4575, + "▁ride": 4576, + "▁desired": 4577, + "▁edward": 4578, + "▁importance": 4579, + "▁information": 4580, + "ture": 4581, + "▁hosp": 4582, + "▁memb": 4583, + "▁perceived": 4584, + "▁yard": 4585, + "▁crit": 4586, + "ternal": 4587, + "▁task": 4588, + "▁fold": 4589, + "rant": 4590, + "▁sooner": 4591, + "▁merch": 4592, + "▁absolutely": 4593, + "▁citiz": 4594, + "▁suffered": 4595, + "▁tight": 4596, + "▁dur": 4597, + "▁iss": 4598, + "illy": 4599, + "▁log": 4600, + "▁completely": 4601, + "hold": 4602, + "▁rad": 4603, + "▁share": 4604, + "▁willing": 4605, + "▁devil": 4606, + "▁ships": 4607, + "▁imagination": 4608, + "▁superior": 4609, + "com": 4610, + "ams": 4611, + "▁anybody": 4612, + "▁env": 4613, + "▁appl": 4614, + "▁drag": 4615, + "▁dawn": 4616, + "asped": 4617, + "▁occupied": 4618, + "▁curiosity": 4619, + "iest": 4620, + "▁sigh": 4621, + "▁fox": 4622, + "asant": 4623, + "▁myst": 4624, + "▁stead": 4625, + "ett": 4626, + "▁couple": 4627, + "▁type": 4628, + "▁extraord": 4629, + "▁apparently": 4630, + "▁welcome": 4631, + "▁daily": 4632, + "▁modern": 4633, + "iot": 4634, + "▁ain't": 4635, + "▁dying": 4636, + "llen": 4637, + "▁feat": 4638, + "▁accident": 4639, + "▁countenance": 4640, + "▁abandon": 4641, + "ortion": 4642, + "▁lock": 4643, + "▁crime": 4644, + "pir": 4645, + "▁mult": 4646, + "▁alas": 4647, + "▁refused": 4648, + "▁hate": 4649, + "▁dw": 4650, + "▁whenever": 4651, + "▁thanks": 4652, + "▁slave": 4653, + "▁regarded": 4654, + "▁suggested": 4655, + "ulf": 4656, + "▁actually": 4657, + "gment": 4658, + "▁size": 4659, + "reg": 4660, + "▁cult": 4661, + "▁kat": 4662, + "▁bodies": 4663, + "hus": 4664, + "▁bay": 4665, + "▁truly": 4666, + "▁flesh": 4667, + "ishop": 4668, + "▁smith": 4669, + "▁betr": 4670, + "with": 4671, + "▁wet": 4672, + "▁rapidly": 4673, + "gers": 4674, + "▁odd": 4675, + "asons": 4676, + "ette": 4677, + "▁club": 4678, + "abel": 4679, + "▁horror": 4680, + "▁mile": 4681, + "▁flight": 4682, + "▁crossed": 4683, + "▁professor": 4684, + "▁oce": 4685, + "▁worst": 4686, + "ization": 4687, + "▁rushed": 4688, + "▁science": 4689, + "▁brief": 4690, + "▁stepped": 4691, + "▁midst": 4692, + "ha": 4693, + "▁sour": 4694, + "▁maint": 4695, + "▁brain": 4696, + "▁cottage": 4697, + "▁expressed": 4698, + "▁equally": 4699, + "▁education": 4700, + "▁august": 4701, + "▁buck": 4702, + "▁nay": 4703, + "ids": 4704, + "▁tempt": 4705, + "▁inquir": 4706, + "▁foolish": 4707, + "▁taught": 4708, + "▁cop": 4709, + "▁dun": 4710, + "▁picked": 4711, + "▁elsie": 4712, + "▁lands": 4713, + "▁driven": 4714, + "▁political": 4715, + "mas": 4716, + "▁deck": 4717, + "▁resist": 4718, + "▁instr": 4719, + "▁bon": 4720, + "▁ken": 4721, + "ips": 4722, + "▁hotel": 4723, + "▁dangerous": 4724, + "ially": 4725, + "now": 4726, + "▁dozen": 4727, + "▁trade": 4728, + "▁points": 4729, + "▁ninet": 4730, + "ability": 4731, + "▁crim": 4732, + "▁relations": 4733, + "▁interp": 4734, + "▁barb": 4735, + "▁delighted": 4736, + "▁members": 4737, + "▁sisters": 4738, + "▁sty": 4739, + "▁anger": 4740, + "▁belief": 4741, + "▁asking": 4742, + "▁meat": 4743, + "▁displ": 4744, + "▁relief": 4745, + "ification": 4746, + "▁hunting": 4747, + "▁alex": 4748, + "aries": 4749, + "▁obst": 4750, + "▁behold": 4751, + "▁mistake": 4752, + "▁inquired": 4753, + "▁remarkable": 4754, + "▁origin": 4755, + "cked": 4756, + "▁nerv": 4757, + "acks": 4758, + "vert": 4759, + "rop": 4760, + "▁careful": 4761, + "▁wounded": 4762, + "ading": 4763, + "▁cere": 4764, + "▁enemies": 4765, + "▁gradually": 4766, + "▁interrupted": 4767, + "▁fis": 4768, + "▁stup": 4769, + "▁severe": 4770, + "▁keen": 4771, + "▁sixteen": 4772, + "kins": 4773, + "resp": 4774, + "▁worn": 4775, + "▁flour": 4776, + "▁sylv": 4777, + "▁control": 4778, + "kin": 4779, + "▁lone": 4780, + "asing": 4781, + "▁nap": 4782, + "▁assert": 4783, + "▁depth": 4784, + "▁kindly": 4785, + "▁murder": 4786, + "acity": 4787, + "▁eleven": 4788, + "▁invol": 4789, + "▁d'art": 4790, + "▁wings": 4791, + "▁oak": 4792, + "▁et": 4793, + "▁begun": 4794, + "▁dreams": 4795, + "while": 4796, + "▁moreover": 4797, + "▁exped": 4798, + "▁independ": 4799, + "▁buried": 4800, + "▁approached": 4801, + "agnan": 4802, + "▁d'artagnan": 4803, + "▁sex": 4804, + "▁saved": 4805, + "▁harry": 4806, + "▁physical": 4807, + "▁species": 4808, + "cer": 4809, + "oe": 4810, + "▁glory": 4811, + "▁creatures": 4812, + "▁newspap": 4813, + "▁sang": 4814, + "▁plenty": 4815, + "▁useful": 4816, + "▁shoes": 4817, + "▁hoped": 4818, + "▁frequently": 4819, + "▁saf": 4820, + "▁distr": 4821, + "▁princip": 4822, + "▁pu": 4823, + "y's": 4824, + "aunt": 4825, + "▁lover": 4826, + "▁famous": 4827, + "▁recollect": 4828, + "▁nur": 4829, + "▁grim": 4830, + "▁indif": 4831, + "▁charming": 4832, + "▁aim": 4833, + "▁loose": 4834, + "▁consciousness": 4835, + "▁mamma": 4836, + "▁enthus": 4837, + "▁slept": 4838, + "▁smooth": 4839, + "▁fighting": 4840, + "▁hyp": 4841, + "▁enthusi": 4842, + "▁dig": 4843, + "aling": 4844, + "▁stage": 4845, + "▁anyone": 4846, + "▁thrust": 4847, + "▁desper": 4848, + "▁tar": 4849, + "▁lamp": 4850, + "stone": 4851, + "▁stern": 4852, + "▁evident": 4853, + "▁meanwhile": 4854, + "▁forgive": 4855, + "▁accepted": 4856, + "▁ocean": 4857, + "▁tot": 4858, + "▁they're": 4859, + "▁wondered": 4860, + "▁playing": 4861, + "▁detect": 4862, + "▁hale": 4863, + "▁knife": 4864, + "ailed": 4865, + "▁closely": 4866, + "▁meas": 4867, + "▁proceeded": 4868, + "▁message": 4869, + "▁mour": 4870, + "▁fac": 4871, + "▁union": 4872, + "ustomed": 4873, + "hem": 4874, + "aming": 4875, + "▁exceed": 4876, + "▁feather": 4877, + "▁precious": 4878, + "▁century": 4879, + "▁unex": 4880, + "▁park": 4881, + "ication": 4882, + "▁everywhere": 4883, + "▁minds": 4884, + "▁extraordinary": 4885, + "▁arose": 4886, + "▁entrance": 4887, + "▁capital": 4888, + "▁recall": 4889, + "▁burning": 4890, + "▁magnific": 4891, + "oes": 4892, + "orious": 4893, + "stand": 4894, + "▁assemb": 4895, + "▁plant": 4896, + "▁neighbor": 4897, + "▁lest": 4898, + "uments": 4899, + "▁colle": 4900, + "▁virtue": 4901, + "▁bew": 4902, + "▁forb": 4903, + "▁retreat": 4904, + "▁capable": 4905, + "▁assured": 4906, + "▁constant": 4907, + "▁governor": 4908, + "▁increased": 4909, + "▁horn": 4910, + "▁removed": 4911, + "▁facts": 4912, + "▁absence": 4913, + "▁explan": 4914, + "▁ack": 4915, + "▁somebody": 4916, + "▁awa": 4917, + "▁admit": 4918, + "▁correct": 4919, + "▁forgot": 4920, + "▁jealous": 4921, + "▁kissed": 4922, + "▁popular": 4923, + "▁hut": 4924, + "▁ug": 4925, + "pelled": 4926, + "▁grant": 4927, + "▁friendship": 4928, + "▁indign": 4929, + "▁sympathy": 4930, + "iable": 4931, + "erous": 4932, + "▁thom": 4933, + "▁alice": 4934, + "▁level": 4935, + "▁objects": 4936, + "▁pressed": 4937, + "▁sha": 4938, + "room": 4939, + "▁qual": 4940, + "▁begged": 4941, + "▁emp": 4942, + "▁hind": 4943, + "▁highest": 4944, + "▁clouds": 4945, + "▁ghost": 4946, + "▁acknow": 4947, + "oused": 4948, + "▁strike": 4949, + "▁wishes": 4950, + "▁becomes": 4951, + "▁trembling": 4952, + "▁nob": 4953, + "▁kindness": 4954, + "▁accordingly": 4955, + "▁throat": 4956, + "ration": 4957, + "▁fare": 4958, + "▁we're": 4959, + "▁stretched": 4960, + "▁frag": 4961, + "▁wheel": 4962, + "▁queer": 4963, + "▁grandfather": 4964, + "for": 4965, + "▁choose": 4966, + "▁helen": 4967, + "▁eighty": 4968, + "▁ly": 4969, + "▁miserable": 4970, + "▁contempt": 4971, + "igned": 4972, + "▁military": 4973, + "▁russ": 4974, + "▁basket": 4975, + "▁ahead": 4976, + "oops": 4977, + "ivered": 4978, + "▁listening": 4979, + "▁fro": 4980, + "▁larger": 4981, + "▁divine": 4982, + "iber": 4983, + "▁stories": 4984, + "anches": 4985, + "ushing": 4986, + "izing": 4987, + "▁treasure": 4988, + "▁excuse": 4989, + "▁innocent": 4990, + "▁aid": 4991, + "▁remind": 4992, + "▁slaves": 4993, + "rit": 4994, + "stairs": 4995, + "▁reward": 4996, + "ograph": 4997, + "▁manage": 4998, + "▁dish": 4999, + "▁throughout": 5000, + "▁waves": 5001, + "▁judgment": 5002, + "▁arrival": 5003, + "▁choice": 5004, + "▁unhappy": 5005, + "astic": 5006, + "▁blank": 5007, + "▁advance": 5008, + "▁informed": 5009, + "▁acquaintance": 5010, + "▁impression": 5011, + "▁mysterious": 5012, + "bb": 5013, + "▁ara": 5014, + "▁notes": 5015, + "▁hadn't": 5016, + "▁sell": 5017, + "▁comr": 5018, + "▁impl": 5019, + "▁indust": 5020, + "▁ended": 5021, + "▁lights": 5022, + "▁nurse": 5023, + "▁sout": 5024, + "▁bought": 5025, + "▁fred": 5026, + "▁marked": 5027, + "▁scream": 5028, + "mend": 5029, + "▁uneas": 5030, + "▁delicate": 5031, + "▁weary": 5032, + "estic": 5033, + "▁prompt": 5034, + "▁experi": 5035, + "▁hungry": 5036, + "▁flying": 5037, + "▁pow": 5038, + "▁bridge": 5039, + "▁join": 5040, + "▁visible": 5041, + "▁understanding": 5042, + "▁crying": 5043, + "▁avoid": 5044, + "▁tis": 5045, + "▁stiff": 5046, + "aches": 5047, + "▁restr": 5048, + "▁sounds": 5049, + "▁bowed": 5050, + "▁caut": 5051, + "▁goods": 5052, + "▁david": 5053, + "▁unable": 5054, + "▁you'd": 5055, + "hamed": 5056, + "▁bos": 5057, + "eral": 5058, + "▁ashamed": 5059, + "▁somewhere": 5060, + "▁infinite": 5061, + "ocks": 5062, + "▁dignity": 5063, + "▁gay": 5064, + "▁vic": 5065, + "▁amid": 5066, + "▁hollow": 5067, + "▁emotion": 5068, + "▁admitted": 5069, + "▁parents": 5070, + "▁wra": 5071, + "▁hint": 5072, + "▁temple": 5073, + "▁comfortable": 5074, + "▁intelligence": 5075, + "orous": 5076, + "▁bearing": 5077, + "▁hers": 5078, + "abeth": 5079, + "▁remains": 5080, + "▁contem": 5081, + "▁settle": 5082, + "▁immense": 5083, + "ffe": 5084, + "pher": 5085, + "▁cher": 5086, + "ldom": 5087, + "▁weap": 5088, + "ulated": 5089, + "▁lighted": 5090, + "gypt": 5091, + "▁adventure": 5092, + "▁thoroughly": 5093, + "▁egypt": 5094, + "ilst": 5095, + "anges": 5096, + "▁obt": 5097, + "▁friendly": 5098, + "▁reckon": 5099, + "▁stupid": 5100, + "▁fed": 5101, + "▁rome": 5102, + "▁meal": 5103, + "▁intention": 5104, + "▁returning": 5105, + "▁convin": 5106, + "▁coo": 5107, + "lection": 5108, + "▁ash": 5109, + "achel": 5110, + "▁rope": 5111, + "▁price": 5112, + "▁project": 5113, + "elt": 5114, + "rows": 5115, + "▁secure": 5116, + "▁escaped": 5117, + "▁hopes": 5118, + "▁elizabeth": 5119, + "▁safety": 5120, + "▁wound": 5121, + "▁sup": 5122, + "▁unus": 5123, + "onscious": 5124, + "▁horri": 5125, + "▁minister": 5126, + "▁ox": 5127, + "lla": 5128, + "ensive": 5129, + "▁helped": 5130, + "▁plainly": 5131, + "▁seldom": 5132, + "▁thinks": 5133, + "▁fellows": 5134, + "▁mood": 5135, + "▁pushed": 5136, + "▁exhib": 5137, + "inging": 5138, + "▁thunder": 5139, + "aud": 5140, + "iana": 5141, + "▁fairly": 5142, + "▁elder": 5143, + "▁eggs": 5144, + "irm": 5145, + "▁maiden": 5146, + "mother": 5147, + "▁appears": 5148, + "▁cheeks": 5149, + "▁won": 5150, + "▁ease": 5151, + "▁redu": 5152, + "▁skill": 5153, + "▁extent": 5154, + "▁practice": 5155, + "▁religious": 5156, + "▁becoming": 5157, + "▁virgin": 5158, + "▁features": 5159, + "▁tied": 5160, + "▁whence": 5161, + "▁somehow": 5162, + "▁greet": 5163, + "▁faithful": 5164, + "▁concerned": 5165, + "▁theat": 5166, + "▁bishop": 5167, + "▁pink": 5168, + "▁eagerly": 5169, + "rees": 5170, + "▁eating": 5171, + "▁waste": 5172, + "▁rank": 5173, + "▁fem": 5174, + "▁bride": 5175, + "▁unl": 5176, + "otted": 5177, + "ceiving": 5178, + "▁trib": 5179, + "▁original": 5180, + "▁concerning": 5181, + "▁hab": 5182, + "▁accustomed": 5183, + "▁patient": 5184, + "▁recom": 5185, + "▁cell": 5186, + "ointment": 5187, + "▁arranged": 5188, + "ville": 5189, + "iture": 5190, + "▁wholly": 5191, + "▁older": 5192, + "▁colour": 5193, + "▁provided": 5194, + "▁ate": 5195, + "▁partly": 5196, + "▁mont": 5197, + "ology": 5198, + "▁prospect": 5199, + "▁ceremon": 5200, + "▁ze": 5201, + "▁laughter": 5202, + "▁fee": 5203, + "▁branches": 5204, + "▁fled": 5205, + "right": 5206, + "▁whilst": 5207, + "▁slipped": 5208, + "▁violent": 5209, + "▁inhabit": 5210, + "▁sons": 5211, + "▁engage": 5212, + "▁uncom": 5213, + "▁deeply": 5214, + "▁substance": 5215, + "▁tale": 5216, + "▁tiny": 5217, + "▁dan": 5218, + "▁ga": 5219, + "▁bee": 5220, + "▁yards": 5221, + "icks": 5222, + "▁hastily": 5223, + "held": 5224, + "▁wes": 5225, + "▁vague": 5226, + "▁amuse": 5227, + "▁mud": 5228, + "▁wolf": 5229, + "▁hans": 5230, + "illing": 5231, + "▁supply": 5232, + "▁silk": 5233, + "▁constantly": 5234, + "▁christmas": 5235, + "▁million": 5236, + "▁whisper": 5237, + "▁mental": 5238, + "▁washing": 5239, + "verse": 5240, + "▁cloth": 5241, + "▁baron": 5242, + "▁corresp": 5243, + "▁nodded": 5244, + "▁correspond": 5245, + "ka": 5246, + "▁hell": 5247, + "▁gain": 5248, + "▁rust": 5249, + "▁obtain": 5250, + "▁unconscious": 5251, + "▁struggle": 5252, + "▁established": 5253, + "▁lawy": 5254, + "ols": 5255, + "▁signs": 5256, + "▁uttered": 5257, + "▁roman": 5258, + "▁constitution": 5259, + "pes": 5260, + "▁cave": 5261, + "▁spare": 5262, + "▁quant": 5263, + "▁image": 5264, + "▁merry": 5265, + "▁treated": 5266, + "▁efforts": 5267, + "▁lonely": 5268, + "rated": 5269, + "▁nut": 5270, + "▁glanced": 5271, + "▁portion": 5272, + "itor": 5273, + "▁resemb": 5274, + "▁withd": 5275, + "▁mead": 5276, + "▁feast": 5277, + "▁prim": 5278, + "▁cliff": 5279, + "▁emer": 5280, + "▁proportion": 5281, + "▁consideration": 5282, + "▁haste": 5283, + "▁gaze": 5284, + "▁savage": 5285, + "▁crew": 5286, + "▁tower": 5287, + "▁lack": 5288, + "▁conscience": 5289, + "▁mercy": 5290, + "▁exha": 5291, + "▁consent": 5292, + "ators": 5293, + "urd": 5294, + "▁outl": 5295, + "▁clo": 5296, + "▁adop": 5297, + "▁amongst": 5298, + "▁hanging": 5299, + "▁circle": 5300, + "▁prepar": 5301, + "▁brilliant": 5302, + "fl": 5303, + "▁gained": 5304, + "▁row": 5305, + "▁troops": 5306, + "▁repro": 5307, + "▁ming": 5308, + "oul": 5309, + "▁dared": 5310, + "▁lion": 5311, + "▁joe": 5312, + "▁winds": 5313, + "▁bringing": 5314, + "▁anxiety": 5315, + "▁billy": 5316, + "▁consequence": 5317, + "fice": 5318, + "pse": 5319, + "▁fought": 5320, + "▁pred": 5321, + "▁scra": 5322, + "▁glim": 5323, + "▁victory": 5324, + "ped": 5325, + "▁rab": 5326, + "▁scot": 5327, + "▁obv": 5328, + "▁shock": 5329, + "chan": 5330, + "▁knock": 5331, + "ourse": 5332, + "▁handed": 5333, + "▁indul": 5334, + "▁patience": 5335, + "▁souther": 5336, + "▁jose": 5337, + "▁fever": 5338, + "▁rolled": 5339, + "icted": 5340, + "▁setting": 5341, + "▁profession": 5342, + "▁sylvia": 5343, + "▁hun": 5344, + "utions": 5345, + "▁feared": 5346, + "▁brand": 5347, + "▁boots": 5348, + "▁forehead": 5349, + "▁principles": 5350, + "▁sink": 5351, + "▁rig": 5352, + "aval": 5353, + "▁purch": 5354, + "▁gazed": 5355, + "▁employed": 5356, + "▁murmured": 5357, + "more": 5358, + "▁sar": 5359, + "ashing": 5360, + "ural": 5361, + "acles": 5362, + "▁trad": 5363, + "▁active": 5364, + "▁benef": 5365, + "▁bottle": 5366, + "▁rage": 5367, + "▁invest": 5368, + "▁lux": 5369, + "▁sank": 5370, + "▁hang": 5371, + "▁beard": 5372, + "ential": 5373, + "▁loving": 5374, + "▁native": 5375, + "▁instruct": 5376, + "▁waist": 5377, + "▁relation": 5378, + "▁discovery": 5379, + "▁melan": 5380, + "▁nervous": 5381, + "▁obtained": 5382, + "▁pig": 5383, + "▁sear": 5384, + "▁flag": 5385, + "▁trail": 5386, + "▁distinguished": 5387, + "▁stared": 5388, + "▁misery": 5389, + "▁print": 5390, + "▁guil": 5391, + "▁jumped": 5392, + "▁swim": 5393, + "▁approaching": 5394, + "▁suspicion": 5395, + "▁iv": 5396, + "▁managed": 5397, + "aker": 5398, + "▁teach": 5399, + "▁match": 5400, + "▁guilty": 5401, + "▁wretched": 5402, + "▁rum": 5403, + "▁compar": 5404, + "▁theory": 5405, + "▁sher": 5406, + "▁bree": 5407, + "▁kings": 5408, + "▁shone": 5409, + "atherine": 5410, + "▁throne": 5411, + "▁showing": 5412, + "aws": 5413, + "▁robin": 5414, + "▁embar": 5415, + "utation": 5416, + "▁woman's": 5417, + "▁addressed": 5418, + "▁protest": 5419, + "▁admiration": 5420, + "▁troubled": 5421, + "▁ugly": 5422, + "oom": 5423, + "erves": 5424, + "▁flung": 5425, + "▁subs": 5426, + "▁relie": 5427, + "▁thousands": 5428, + "nce": 5429, + "▁od": 5430, + "▁current": 5431, + "▁wooden": 5432, + "▁sacrifice": 5433, + "urity": 5434, + "cip": 5435, + "▁pear": 5436, + "▁farmer": 5437, + "▁needs": 5438, + "▁condem": 5439, + "▁member": 5440, + "▁bade": 5441, + "▁dancing": 5442, + "▁reasons": 5443, + "▁consult": 5444, + "▁swall": 5445, + "▁shadows": 5446, + "▁angel": 5447, + "▁nineteen": 5448, + "▁style": 5449, + "field": 5450, + "▁lan": 5451, + "▁manif": 5452, + "▁robert": 5453, + "▁grate": 5454, + "▁engine": 5455, + "▁wisdom": 5456, + "▁jesus": 5457, + "▁convent": 5458, + "▁preced": 5459, + "▁interests": 5460, + "▁trial": 5461, + "bor": 5462, + "iven": 5463, + "▁nest": 5464, + "▁exch": 5465, + "▁voy": 5466, + "▁illust": 5467, + "▁worship": 5468, + "▁adam": 5469, + "▁phr": 5470, + "▁principal": 5471, + "▁hit": 5472, + "▁spend": 5473, + "▁stands": 5474, + "▁respons": 5475, + "▁ay": 5476, + "▁haw": 5477, + "▁whist": 5478, + "▁arrest": 5479, + "▁kinds": 5480, + "▁require": 5481, + "▁described": 5482, + "▁lit": 5483, + "▁precise": 5484, + "▁proposed": 5485, + "▁produce": 5486, + "▁utterly": 5487, + "ulse": 5488, + "▁novel": 5489, + "▁blame": 5490, + "▁credit": 5491, + "▁pause": 5492, + "osen": 5493, + "▁household": 5494, + "▁armed": 5495, + "▁follows": 5496, + "upon": 5497, + "▁approach": 5498, + "▁ninety": 5499, + "▁pir": 5500, + "▁flore": 5501, + "ivity": 5502, + "▁refuse": 5503, + "▁sensible": 5504, + "choly": 5505, + "▁national": 5506, + "▁grie": 5507, + "▁reven": 5508, + "▁let's": 5509, + "▁delightful": 5510, + "▁extremely": 5511, + "▁melancholy": 5512, + "uing": 5513, + "▁enorm": 5514, + "cles": 5515, + "▁slightly": 5516, + "▁sacred": 5517, + "▁recognized": 5518, + "▁mystery": 5519, + "▁gri": 5520, + "▁compre": 5521, + "▁distress": 5522, + "▁warri": 5523, + "▁useless": 5524, + "▁trif": 5525, + "▁mounted": 5526, + "▁philip": 5527, + "▁energy": 5528, + "▁explanation": 5529, + "▁cas": 5530, + "atory": 5531, + "▁pour": 5532, + "▁ric": 5533, + "▁chosen": 5534, + "▁everyone": 5535, + "umbled": 5536, + "▁apr": 5537, + "▁cam": 5538, + "▁proc": 5539, + "▁resumed": 5540, + "▁appreci": 5541, + "▁alexand": 5542, + "▁aven": 5543, + "▁wing": 5544, + "▁intense": 5545, + "▁highly": 5546, + "▁lucy": 5547, + "▁solid": 5548, + "▁departure": 5549, + "▁agreeable": 5550, + "▁exercise": 5551, + "apped": 5552, + "▁ward": 5553, + "▁bud": 5554, + "▁dwell": 5555, + "icate": 5556, + "▁dece": 5557, + "▁teacher": 5558, + "tending": 5559, + "▁max": 5560, + "▁request": 5561, + "▁unexpected": 5562, + "▁joseph": 5563, + "col": 5564, + "▁leap": 5565, + "▁victim": 5566, + "▁sighed": 5567, + "▁forces": 5568, + "chie": 5569, + "▁feed": 5570, + "▁sport": 5571, + "▁drift": 5572, + "▁wedding": 5573, + "▁british": 5574, + "sec": 5575, + "▁attitude": 5576, + "▁vision": 5577, + "▁pipe": 5578, + "▁tow": 5579, + "▁halt": 5580, + "▁manners": 5581, + "▁tend": 5582, + "▁flood": 5583, + "▁commission": 5584, + "▁guide": 5585, + "▁observe": 5586, + "▁concern": 5587, + "▁rush": 5588, + "▁affected": 5589, + "fall": 5590, + "▁stret": 5591, + "▁coach": 5592, + "▁poison": 5593, + "▁directed": 5594, + "▁medic": 5595, + "▁gest": 5596, + "▁echo": 5597, + "▁younger": 5598, + "▁confusion": 5599, + "▁continue": 5600, + "▁parli": 5601, + "▁absor": 5602, + "▁centre": 5603, + "conom": 5604, + "▁horrible": 5605, + "rison": 5606, + "▁bol": 5607, + "▁bath": 5608, + "▁gown": 5609, + "▁bye": 5610, + "▁aloud": 5611, + "▁suppl": 5612, + "▁profound": 5613, + "▁err": 5614, + "▁cheerful": 5615, + "worth": 5616, + "▁sentence": 5617, + "▁mistaken": 5618, + "▁torn": 5619, + "▁figures": 5620, + "▁accompanied": 5621, + "▁catherine": 5622, + "▁econom": 5623, + "▁atm": 5624, + "▁shaking": 5625, + "umber": 5626, + "▁council": 5627, + "lot": 5628, + "▁asce": 5629, + "ilities": 5630, + "▁spar": 5631, + "▁ends": 5632, + "▁straw": 5633, + "▁knights": 5634, + "▁atmosp": 5635, + "▁shade": 5636, + "▁brow": 5637, + "▁spark": 5638, + "▁rested": 5639, + "▁sentiment": 5640, + "▁recovered": 5641, + "▁subjects": 5642, + "▁duties": 5643, + "▁composed": 5644, + "▁swept": 5645, + "▁reality": 5646, + "▁singular": 5647, + "▁transp": 5648, + "▁locked": 5649, + "▁louis": 5650, + "▁assistance": 5651, + "▁wake": 5652, + "rem": 5653, + "▁sovere": 5654, + "▁unp": 5655, + "▁loves": 5656, + "▁absurd": 5657, + "▁souls": 5658, + "▁immediate": 5659, + "▁riding": 5660, + "▁connection": 5661, + "▁cheek": 5662, + "▁magnificent": 5663, + "▁ere": 5664, + "▁sugar": 5665, + "▁plans": 5666, + "▁prud": 5667, + "▁dise": 5668, + "▁adj": 5669, + "▁leaning": 5670, + "▁surrounded": 5671, + "▁we've": 5672, + "▁orn": 5673, + "▁roll": 5674, + "▁proble": 5675, + "▁strict": 5676, + "▁awake": 5677, + "▁praise": 5678, + "▁convinced": 5679, + "▁rele": 5680, + "▁frame": 5681, + "▁breaking": 5682, + "▁curtain": 5683, + "▁stayed": 5684, + "▁divided": 5685, + "▁craw": 5686, + "▁inclined": 5687, + "▁previous": 5688, + "ault": 5689, + "omen": 5690, + "▁stair": 5691, + "▁sees": 5692, + "▁pron": 5693, + "board": 5694, + "▁complex": 5695, + "▁prayer": 5696, + "▁pierre": 5697, + "▁unfortunate": 5698, + "gs": 5699, + "▁genius": 5700, + "▁increase": 5701, + "▁sufficiently": 5702, + "▁banks": 5703, + "▁revolution": 5704, + "▁southern": 5705, + "ki": 5706, + "oke": 5707, + "▁aust": 5708, + "edy": 5709, + "▁ling": 5710, + "▁countess": 5711, + "▁sleeping": 5712, + "▁devoted": 5713, + "▁utmost": 5714, + "▁market": 5715, + "▁bosom": 5716, + "▁bark": 5717, + "▁cath": 5718, + "alt": 5719, + "char": 5720, + "▁clock": 5721, + "▁handker": 5722, + "▁admin": 5723, + "▁senses": 5724, + "▁ident": 5725, + "▁midnight": 5726, + "▁connected": 5727, + "▁permitted": 5728, + "▁hid": 5729, + "▁fil": 5730, + "▁faced": 5731, + "▁gift": 5732, + "▁chat": 5733, + "▁brid": 5734, + "▁norther": 5735, + "▁horiz": 5736, + "▁college": 5737, + "▁handkerchief": 5738, + "isions": 5739, + "▁rebe": 5740, + "▁polic": 5741, + "▁announced": 5742, + "ounce": 5743, + "▁nons": 5744, + "▁nurs": 5745, + "ales": 5746, + "▁fleet": 5747, + "▁ragged": 5748, + "▁coffe": 5749, + "▁parties": 5750, + "▁delay": 5751, + "▁sounded": 5752, + "▁cities": 5753, + "▁wash": 5754, + "▁appointed": 5755, + "▁nights": 5756, + "▁instit": 5757, + "▁god's": 5758, + "▁striking": 5759, + "▁guns": 5760, + "▁astonishment": 5761, + "▁merchant": 5762, + "▁parliament": 5763, + "nal": 5764, + "▁ax": 5765, + "atched": 5766, + "▁pil": 5767, + "▁page": 5768, + "iform": 5769, + "▁plate": 5770, + "▁thirst": 5771, + "▁negro": 5772, + "▁ruin": 5773, + "▁inhabitants": 5774, + "win": 5775, + "arf": 5776, + "▁rib": 5777, + "▁addition": 5778, + "▁argument": 5779, + "bour": 5780, + "▁tad": 5781, + "▁scen": 5782, + "▁guests": 5783, + "▁wondering": 5784, + "▁acquainted": 5785, + "▁intent": 5786, + "pless": 5787, + "▁destroyed": 5788, + "▁coffee": 5789, + "inent": 5790, + "lebr": 5791, + "▁render": 5792, + "▁sob": 5793, + "▁demon": 5794, + "▁desir": 5795, + "uding": 5796, + "▁gets": 5797, + "▁assure": 5798, + "▁raise": 5799, + "▁sharply": 5800, + "▁privile": 5801, + "▁alarm": 5802, + "▁machine": 5803, + "fied": 5804, + "▁contract": 5805, + "▁deliber": 5806, + "▁drown": 5807, + "▁afterward": 5808, + "▁guest": 5809, + "▁conclusion": 5810, + "▁risk": 5811, + "▁ignorant": 5812, + "bury": 5813, + "kind": 5814, + "▁pian": 5815, + "an's": 5816, + "uries": 5817, + "▁soil": 5818, + "▁refer": 5819, + "▁commanded": 5820, + "▁practical": 5821, + "▁toss": 5822, + "▁offe": 5823, + "▁beheld": 5824, + "▁arist": 5825, + "▁quarters": 5826, + "▁degrees": 5827, + "▁fisher": 5828, + "▁nonsense": 5829, + "▁mc": 5830, + "isp": 5831, + "▁mechan": 5832, + "keep": 5833, + "▁doubtless": 5834, + "▁violence": 5835, + "▁neglect": 5836, + "▁folk": 5837, + "liness": 5838, + "▁bul": 5839, + "▁easter": 5840, + "▁loft": 5841, + "▁contained": 5842, + "▁reflection": 5843, + "▁celebr": 5844, + "▁leaf": 5845, + "▁concluded": 5846, + "▁district": 5847, + "iation": 5848, + "rs": 5849, + "▁scient": 5850, + "▁he'd": 5851, + "▁scorn": 5852, + "▁crack": 5853, + "▁steep": 5854, + "▁muttered": 5855, + "▁establish": 5856, + "▁darling": 5857, + "▁andrew": 5858, + "▁chim": 5859, + "quis": 5860, + "▁quality": 5861, + "▁polly": 5862, + "▁check": 5863, + "▁craft": 5864, + "▁travell": 5865, + "▁universal": 5866, + "inate": 5867, + "▁cig": 5868, + "atives": 5869, + "omp": 5870, + "uten": 5871, + "▁jac": 5872, + "▁job": 5873, + "▁subm": 5874, + "▁reader": 5875, + "▁leis": 5876, + "▁emph": 5877, + "▁surround": 5878, + "ox": 5879, + "pent": 5880, + "itate": 5881, + "▁extended": 5882, + "▁lev": 5883, + "▁overt": 5884, + "▁retired": 5885, + "▁puzz": 5886, + "uable": 5887, + "▁libr": 5888, + "▁chin": 5889, + "▁spl": 5890, + "▁realized": 5891, + "▁causes": 5892, + "▁punishment": 5893, + "▁physic": 5894, + "▁leisure": 5895, + "can": 5896, + "▁wave": 5897, + "▁shake": 5898, + "▁charm": 5899, + "▁belonged": 5900, + "mber": 5901, + "▁bones": 5902, + "▁gas": 5903, + "▁range": 5904, + "▁prec": 5905, + "▁smell": 5906, + "▁maybe": 5907, + "▁invited": 5908, + "▁troubles": 5909, + "▁tables": 5910, + "anch": 5911, + "icip": 5912, + "▁june": 5913, + "▁abo": 5914, + "▁ages": 5915, + "▁anywhere": 5916, + "ffin": 5917, + "▁drunk": 5918, + "▁properly": 5919, + "▁local": 5920, + "▁improve": 5921, + "▁atmosphere": 5922, + "▁dir": 5923, + "▁he'll": 5924, + "▁reb": 5925, + "▁rang": 5926, + "▁compass": 5927, + "▁lieuten": 5928, + "▁leaned": 5929, + "▁firmly": 5930, + "▁nations": 5931, + "▁hay": 5932, + "▁wept": 5933, + "▁ral": 5934, + "▁conven": 5935, + "▁uniform": 5936, + "▁julia": 5937, + "eem": 5938, + "rass": 5939, + "▁track": 5940, + "▁commer": 5941, + "▁bushes": 5942, + "▁obsc": 5943, + "▁sorts": 5944, + "▁difficulties": 5945, + "▁intellectual": 5946, + "▁introduced": 5947, + "mith": 5948, + "▁tro": 5949, + "iday": 5950, + "▁rendered": 5951, + "▁rout": 5952, + "add": 5953, + "▁plun": 5954, + "▁throwing": 5955, + "▁humble": 5956, + "▁polite": 5957, + "▁numerous": 5958, + "▁movements": 5959, + "▁successful": 5960, + "▁candle": 5961, + "▁separate": 5962, + "▁protection": 5963, + "▁thomas": 5964, + "▁enormous": 5965, + "▁unb": 5966, + "▁repub": 5967, + "▁sunsh": 5968, + "▁descended": 5969, + "▁unusual": 5970, + "ived": 5971, + "▁blaz": 5972, + "▁shows": 5973, + "▁simpl": 5974, + "▁cattle": 5975, + "▁crept": 5976, + "▁astonished": 5977, + "▁deserted": 5978, + "▁lap": 5979, + "arse": 5980, + "▁nearest": 5981, + "udes": 5982, + "▁entering": 5983, + "▁ideal": 5984, + "standing": 5985, + "nders": 5986, + "▁sore": 5987, + "aine": 5988, + "▁clos": 5989, + "▁ours": 5990, + "▁wherever": 5991, + "▁term": 5992, + "▁visited": 5993, + "▁calcul": 5994, + "ds": 5995, + "▁base": 5996, + "▁gates": 5997, + "▁stamp": 5998, + "▁liber": 5999, + "▁official": 6000, + "▁erect": 6001, + "▁alt": 6002, + "elia": 6003, + "▁harmon": 6004, + "▁painful": 6005, + "▁burned": 6006, + "▁republic": 6007, + "uer": 6008, + "▁lately": 6009, + "▁ital": 6010, + "amm": 6011, + "▁tear": 6012, + "▁actions": 6013, + "▁final": 6014, + "▁startled": 6015, + "▁sensation": 6016, + "▁fatal": 6017, + "olic": 6018, + "▁flash": 6019, + "▁appet": 6020, + "▁stronger": 6021, + "▁numbers": 6022, + "▁gratitude": 6023, + "▁female": 6024, + "▁western": 6025, + "lest": 6026 + }, + "merges": [ + "▁ t", + "h e", + "▁ a", + "▁t he", + "i n", + "▁ s", + "▁ w", + "▁ o", + "r e", + "n d", + "▁ b", + "▁ h", + "e r", + "▁ m", + "▁ i", + "o u", + "▁ c", + "▁ f", + "a t", + "e d", + "▁a nd", + "e n", + "▁t o", + "▁o f", + "o n", + "i s", + "▁ d", + "in g", + "▁t h", + "▁ p", + "▁ he", + "o r", + "▁ l", + "e s", + "▁ in", + "l l", + "i t", + "a r", + "a s", + "a n", + "▁ n", + "▁ g", + "o m", + "▁b e", + "▁h a", + "▁ e", + "l e", + "o t", + "▁ y", + "u t", + "o w", + "i c", + "▁w h", + "▁i t", + "l d", + "v e", + "▁th at", + "l y", + "▁w as", + "i d", + "s e", + "s t", + "▁o n", + "g h", + "en t", + "▁ re", + "▁y ou", + "i m", + "c e", + "▁ u", + "v er", + "i on", + "▁a s", + "e t", + "▁f or", + "a y", + "▁h is", + "▁w e", + "it h", + "a l", + "i r", + "▁ r", + "▁w ith", + "▁s t", + "a d", + "u r", + "gh t", + "▁a n", + "▁he r", + "▁n ot", + "▁i s", + "▁ha d", + "t er", + "he r", + "a c", + "a m", + "▁a t", + "o o", + "▁b ut", + "ou ld", + "▁s he", + "▁ k", + "▁s e", + "▁s a", + "▁s h", + "▁f r", + "▁h im", + "▁s o", + "▁m e", + "i ll", + "a in", + "▁s u", + "i ght", + "c h", + "re d", + "c t", + "a ll", + "r o", + "k e", + "es s", + "i l", + "' s", + "o re", + "▁d e", + "▁m y", + "▁the y", + "▁w he", + "▁a ll", + "ic h", + "▁n e", + "r i", + "▁b y", + "▁ha ve", + "om e", + "p p", + "▁th is", + "▁l i", + "▁d o", + "▁c on", + "u s", + "▁wh ich", + "▁c h", + "u l", + "q u", + "▁ j", + "▁u p", + "▁sa id", + "▁fr om", + "ar d", + "g e", + "▁o r", + "▁ v", + "▁on e", + "▁n o", + "t h", + "▁e x", + "▁we re", + "▁the re", + "p e", + "a nd", + "es t", + "▁m an", + "▁wh o", + "b le", + "i e", + "▁a l", + "an t", + "re s", + "ou s", + "u st", + "ver y", + "at ion", + "▁f e", + "▁the m", + "l f", + "▁whe n", + "n t", + "am e", + "in d", + "r a", + "▁g o", + "er s", + "as t", + "f e", + "oo d", + "▁k n", + "▁in t", + "is t", + "▁a re", + "ar t", + "ou t", + "▁w ould", + "▁l e", + "▁wh at", + "o s", + "▁the ir", + "on g", + "ou r", + "▁i f", + "▁c om", + "ou nd", + "▁a b", + "▁o ut", + "▁w or", + "e m", + "▁w ill", + "a k", + "▁m is", + "at e", + "o l", + "u m", + "u n", + "it t", + "ou gh", + "k ed", + "i g", + "a p", + "on e", + "▁be en", + "ow n", + "i ve", + "▁the n", + "▁b r", + "v en", + "i f", + "▁a r", + "' t", + "se lf", + "▁t r", + "▁p l", + "▁r o", + "▁p r", + "t her", + "re at", + "▁u n", + "▁a f", + "▁s p", + "▁ qu", + "▁p ro", + "it y", + "he d", + "▁t w", + "▁a g", + "▁c ould", + "o st", + "a ce", + "or t", + "u re", + "a ke", + "▁a m", + "ac k", + "▁an y", + "▁s ome", + "▁you r", + "▁m ore", + "▁c an", + "a u", + "▁t im", + "e p", + "a g", + "▁ en", + "c k", + "▁int o", + "▁c l", + "r y", + "▁n ow", + "h ing", + "nd er", + "a re", + "▁ very", + "▁g r", + "e l", + "o se", + "▁l oo", + "▁b o", + "v ed", + "o p", + "▁o ther", + "▁d id", + "an ce", + "▁th an", + "itt le", + "▁l ittle", + "in e", + "i es", + "w ay", + "it e", + "▁li ke", + "id e", + "▁l o", + "as s", + "▁b l", + "a ble", + "ur n", + "ou ght", + "▁kn ow", + "ot her", + "▁tim e", + "▁i m", + "▁d is", + "▁u s", + "▁c o", + "f ore", + "▁h ow", + "▁t e", + "en ce", + "▁d ay", + "▁a d", + "ad e", + "ic e", + "▁ab out", + "▁se e", + "▁o ver", + "p t", + "c c", + "▁to o", + "in k", + "▁f l", + "w n", + "▁g reat", + "▁af ter", + "p l", + "d e", + "▁p er", + "m ent", + "▁ag ain", + "▁up on", + "▁ha nd", + "a b", + "▁h as", + "re e", + "is h", + "c i", + "▁on ly", + "all y", + "▁we ll", + "▁sh ould", + "▁p o", + "▁m ar", + "res s", + "▁s ay", + "▁g ood", + "at her", + "▁tw o", + "ing s", + "▁p e", + "ou nt", + "▁o ur", + "i re", + "v ing", + "▁d own", + "ar s", + "er t", + "w e", + "▁be fore", + "i le", + "v es", + "▁a pp", + "▁e very", + "▁it s", + "▁o ld", + "▁th r", + "▁m u", + "▁m ade", + "i ed", + "ic k", + "▁l ong", + "a ge", + "t e", + "f t", + "▁whe re", + "an g", + "▁ne ver", + "▁m ust", + "▁p re", + "▁s m", + "f ul", + "▁su ch", + "u ll", + "▁st r", + "ion s", + "▁of f", + "▁s c", + "▁c ame", + "i ous", + "u e", + "▁mis s", + "w ard", + "i ld", + "▁f ir", + "▁e ven", + "▁u nder", + "ac t", + "▁the se", + "▁c ome", + "▁p art", + "▁f o", + "at ed", + "n ess", + "▁re m", + "or d", + "▁be c", + "t y", + "▁m ay", + "▁mu ch", + "▁th ink", + "p er", + "▁w ay", + "▁mis ter", + "l ed", + "▁l et", + "or n", + "▁e y", + "▁g l", + "▁con t", + "▁th ought", + "▁loo k", + "e ct", + "▁s pe", + "is e", + "▁b ack", + "▁be t", + "ad y", + "▁y e", + "an s", + "ac h", + "▁he re", + "▁j ust", + "re n", + "▁fir st", + "▁h o", + "▁o wn", + "▁d es", + "▁o b", + "ri ed", + "u d", + "ar y", + "▁w ent", + "▁m o", + "▁him self", + "▁m en", + "a ir", + "c l", + "a ve", + "at h", + "f f", + "▁s l", + "c o", + "on 't", + "ll ow", + "▁c r", + "▁re s", + "▁i '", + "▁m ight", + "i ly", + "▁se em", + "in t", + "i p", + "▁be g", + "ou se", + "an c", + "n 't", + "▁w at", + "▁thr ough", + "▁com p", + "b er", + "▁a way", + "▁c ar", + "▁e m", + "▁g et", + "▁im p", + "▁he ad", + "os s", + "▁li fe", + "▁be l", + "▁with out", + "▁m ost", + "▁p ass", + "▁m ake", + "▁con s", + "en ed", + "▁s om", + "▁t urn", + "a v", + "n g", + "▁sh all", + "▁a cc", + "▁th ose", + "▁p res", + "▁ey es", + "▁h ouse", + "i z", + "▁som et", + "▁j o", + "▁st ill", + "▁c all", + "▁n ight", + "he s", + "▁o p", + "au se", + "▁w om", + "▁l ast", + "k s", + "l ess", + "a red", + "▁com m", + "▁d on't", + "▁te ll", + "▁ ent", + "▁not hing", + "▁ne w", + "ig n", + "▁t ake", + "▁be ing", + "▁man y", + "▁wor d", + "on s", + "▁f ound", + "▁re t", + "as e", + "▁e ar", + "▁wh ile", + "▁at t", + "or y", + "i x", + "▁s er", + "▁sa w", + "▁p ut", + "n e", + "ot h", + "ie nd", + "▁pe op", + "▁w r", + "▁you ng", + "ar k", + "d y", + "ak ing", + "l es", + "▁c ount", + "▁on ce", + "▁fr iend", + "▁l a", + "en s", + "▁peop le", + "pe ct", + "or s", + "fe ct", + "▁m at", + "in ce", + "i ble", + "e red", + "▁ro om", + "▁th ree", + "▁y et", + "a il", + "▁s ame", + "▁f ather", + "▁r ight", + "▁ch ild", + "▁c our", + "i gh", + "▁pl ace", + "▁an other", + "ul t", + "i v", + "it ion", + "▁in d", + "▁w ant", + "▁th ough", + "▁n or", + "▁f ar", + "▁k ing", + "▁ha pp", + "▁he art", + "▁f ace", + "▁e nd", + "▁e ver", + "▁n at", + "th ing", + "▁lo ve", + "g et", + "▁too k", + "▁d ist", + "e ver", + "i an", + "▁h u", + "e w", + "▁ar m", + "▁in st", + "m an", + "▁wor k", + "▁l ight", + "▁ch ar", + "▁p le", + "ic t", + "▁s et", + "▁a c", + "▁loo ked", + "▁miss us", + "▁as ked", + "▁m ind", + "▁y es", + "▁su pp", + "▁int e", + "▁re p", + "c ess", + "ent ly", + "▁le ft", + "g g", + "ert ain", + "▁k e", + "is hed", + "u b", + "▁p ers", + "way s", + "▁th ings", + "al k", + "ir l", + "▁m om", + "▁s ir", + "▁w a", + "▁mom ent", + "ation s", + "▁s at", + "se l", + "▁f ind", + "ow er", + "i a", + "v ent", + "re w", + "▁wor ld", + "j ect", + "▁g ive", + "▁c ap", + "▁wh y", + "s o", + "▁g u", + "▁m other", + "▁g en", + "▁s w", + "▁al ways", + "d er", + "l t", + "l ing", + "▁an s", + "pp ed", + "▁so on", + "▁a ct", + "▁for m", + "▁e l", + "d d", + "▁he ard", + "re t", + "▁th ing", + "▁somet hing", + "▁seem ed", + "▁su b", + "▁do or", + "an ge", + "▁g irl", + "c ed", + "▁app e", + "it her", + "▁w ind", + "▁bec ause", + "▁d if", + "▁m on", + "s s", + "▁go ing", + "▁to ld", + "or m", + "▁h ome", + "ain ed", + "▁g ot", + "▁w ar", + "▁go d", + "au ght", + "▁g i", + "▁en g", + "▁s ur", + "n ing", + "▁hand s", + "▁wom an", + "▁fo llow", + "l and", + "a ut", + "▁v o", + "▁fe el", + "▁re l", + "▁p oss", + "c hed", + "ic al", + "p le", + "p h", + "▁bo y", + "▁ret urn", + "▁re g", + "▁re st", + "oo k", + "▁kn ew", + "n er", + "▁e ach", + "▁o h", + "▁s il", + "▁k ind", + "▁ex p", + "▁m a", + "▁c le", + "▁he l", + "i ver", + "t ing", + "▁de l", + "u al", + "▁in f", + "▁as s", + "▁wat er", + "▁con f", + "▁b re", + "▁w o", + "ce pt", + "▁bel ie", + "▁c ertain", + "▁again st", + "▁h ard", + "▁p h", + "r ow", + "▁u nt", + "▁ye ars", + "▁qu ite", + "▁s ide", + "in ess", + "in ed", + "▁ne ar", + "▁h or", + "ter s", + "i red", + "oo l", + "▁f our", + "▁fe w", + "▁d one", + "i er", + "▁c he", + "re st", + "it ed", + "m ost", + "▁bet ter", + "▁ha lf", + "▁m in", + "▁t re", + "p s", + "▁al so", + "▁c are", + "o ck", + "u ck", + "ou b", + "▁beg an", + "ull y", + "▁en ough", + "is ed", + "r u", + "▁ha ving", + "▁se en", + "▁gen er", + "▁l ady", + "▁d ra", + "▁h um", + "ap s", + "ot t", + "▁p ur", + "ak en", + "ro ss", + "y ing", + "▁t er", + "▁h our", + "▁in de", + "an k", + "▁call ed", + "i al", + "as on", + "▁be h", + "▁do es", + "▁who le", + "▁m orn", + "▁turn ed", + "▁ple as", + "▁st e", + "▁re f", + "▁g ave", + "en se", + "▁o cc", + "i b", + "▁cour se", + "▁in s", + "re am", + "get her", + "ut h", + "▁b oth", + "▁s ou", + "▁c ur", + "▁ad d", + "e en", + "▁c ol", + "▁re ad", + "we en", + "sel ves", + "▁am ong", + "▁bet ween", + "▁in c", + "▁ke ep", + "▁be aut", + "ul ar", + "▁po or", + "▁it 's", + "▁su re", + "▁morn ing", + "▁wh ite", + "g ed", + "▁n ame", + "▁de ar", + "▁to ward", + "ut e", + "▁sm all", + "▁wh om", + "▁re pl", + "▁s k", + "▁l ar", + "▁fe lt", + "b o", + "os ed", + "at ing", + "▁my self", + "▁op en", + "▁s ix", + "▁her self", + "▁how ever", + "▁b u", + "o nd", + "ain t", + "x t", + "▁f ore", + "▁in ter", + "▁e v", + "▁h igh", + "ct ion", + "▁hu nd", + "▁st ood", + "▁hund red", + "as ter", + "▁t ra", + "▁sh ow", + "▁s ent", + "i fe", + "▁r ound", + "▁s im", + "▁d r", + "▁g ra", + "▁word s", + "▁day s", + "▁al most", + "a le", + "ve l", + "▁po int", + "ent s", + "▁g re", + "▁e ight", + "c es", + "at es", + "dd en", + "▁f am", + "▁st and", + "▁b us", + "▁l and", + "▁ ed", + "▁me an", + "un g", + "h aps", + "▁su n", + "u res", + "▁s ince", + "i et", + "ir d", + "▁per haps", + "n ed", + "▁s le", + "is s", + "▁b est", + "▁su dden", + "▁d ark", + "▁repl ied", + "▁vo ice", + "▁m et", + "▁any thing", + "▁t ill", + "▁under st", + "▁b ar", + "it s", + "▁unt il", + "in s", + "ou d", + "▁bl ack", + "▁b ro", + "▁he ar", + "▁look ing", + "▁c ried", + "▁you '", + "▁f act", + "am p", + "▁pr in", + "▁l ess", + "▁l ay", + "▁ne xt", + "▁la w", + "u p", + "▁p ower", + "▁pro p", + "n ot", + "re nt", + "▁br ought", + "ate ly", + "ent y", + "▁count ry", + "▁hel p", + "al s", + "▁qu est", + "m ed", + "▁u se", + "▁v is", + "▁s n", + "▁i' m", + "f ully", + "▁sp o", + "▁to gether", + "▁ne ed", + "▁a ir", + "▁ad v", + "▁pers on", + "▁inde ed", + "▁cont in", + "▁un c", + "one y", + "▁g ent", + "▁pres ent", + "▁a w", + "▁p ar", + "ow s", + "u red", + "▁f ull", + "t ain", + "▁r un", + "▁r ather", + "▁i de", + "▁co nd", + "nd ed", + "▁l at", + "▁s y", + "b e", + "d u", + "▁h ar", + "▁fe et", + "▁f in", + "et er", + "▁f all", + "ce i", + "▁f ive", + "▁m il", + "▁b ed", + "o c", + "▁do ct", + "▁inte rest", + "ress ed", + "▁mat ter", + "▁l ord", + "▁g one", + "▁ es", + "f ort", + "▁de ath", + "▁w ife", + "▁ser v", + "▁p at", + "er ing", + "oub t", + "▁ad m", + "▁t alk", + "▁t aken", + "▁ar t", + "▁t ri", + "▁other s", + "▁ho pe", + "as h", + "a z", + "▁ex t", + "▁can not", + "ie f", + "▁spe ak", + "▁l au", + "▁them selves", + "▁al ong", + "▁d ire", + "o ve", + "m b", + "p r", + "▁b es", + "▁c ou", + "▁m or", + "t en", + "▁gent le", + "ur ing", + "▁f ire", + "▁lar ge", + "▁p ol", + "▁c at", + "▁s we", + "ent ion", + "ver s", + "▁th us", + "a pp", + "▁se c", + "▁pl ay", + "▁re al", + "▁pr om", + "ment s", + "we red", + "ie ld", + "ain s", + "is on", + "ac hed", + "▁th ou", + "▁re ason", + "▁th ous", + "it ing", + "▁br other", + "ak es", + "▁thous and", + "on t", + "▁m oney", + "▁rem em", + "▁de p", + "▁ans wered", + "▁tr ue", + "▁child ren", + "▁beh ind", + "o y", + "▁s ound", + "ant s", + "ab ly", + "▁w ood", + "us ed", + "▁de c", + "▁who se", + "o d", + "▁e le", + "▁tw enty", + "▁r a", + "it u", + "▁belie ve", + "▁wo nder", + "en e", + "▁in v", + "▁h on", + "ar ing", + "s h", + "u ed", + "▁su ff", + "▁o pp", + "▁d oubt", + "▁re c", + "t on", + "▁ho ld", + "▁dif fe", + "▁pass ed", + "▁c or", + "m e", + "id ed", + "it ies", + "▁m er", + "▁s ing", + "▁nat ure", + "▁al one", + "▁de ad", + "▁p ri", + "k en", + "l ic", + "▁re d", + "▁b ur", + "ac es", + "▁cl ose", + "▁go ld", + "▁st art", + "▁h ur", + "▁f ur", + "o g", + "anc es", + "▁as k", + "▁doct or", + "▁s on", + "▁gr ound", + "w er", + "et s", + "▁se a", + "▁str ong", + "▁le ave", + "▁comp an", + "▁i' ll", + "er y", + "c y", + "ill ed", + "ep t", + "id es", + "t le", + "▁c e", + "▁ob s", + "bo dy", + "▁fe ll", + "▁s ign", + "co nd", + "▁m ount", + "▁f air", + "▁gi ven", + "▁there fore", + "an e", + "▁i r", + "▁de ep", + "if ul", + "f ic", + "y s", + "▁of ten", + "▁bo dy", + "u nt", + "▁sh ort", + "▁t em", + "▁f a", + "▁m aster", + "▁ear th", + "▁p ap", + "ce ed", + "▁st re", + "▁se cond", + "▁for t", + "b ed", + "g th", + "ow ed", + "▁hor se", + "id d", + "▁m ad", + "u ally", + "▁p a", + "▁ch r", + "▁or der", + "▁t en", + "ve red", + "▁con st", + "▁w ish", + "▁f if", + "▁e as", + "▁c ir", + "▁d ro", + "a im", + "he n", + "▁c a", + "▁re ally", + "re ad", + "cei ved", + "▁i ll", + "▁fe ar", + "os ition", + "▁underst and", + "▁sp ir", + "▁l ist", + "▁ab s", + "▁sp r", + "ac ed", + "▁quest ion", + "ang er", + "▁every thing", + "aught er", + "▁af f", + "▁w all", + "▁com ing", + "ch ing", + "re ady", + "id er", + "▁ab ove", + "▁pr ince", + "▁al ready", + "▁le ast", + "▁re co", + "▁ex pl", + "▁st ep", + "▁us ed", + "▁r u", + "▁it self", + "is ter", + "▁ne cess", + "▁c ase", + "▁ar ound", + "h n", + "▁sou l", + "▁sudden ly", + "g er", + "▁l ad", + "▁even ing", + "▁m ag", + "▁gener al", + "▁n um", + "im es", + "▁kn own", + "▁w al", + "▁qu ick", + "iz ed", + "▁m us", + "▁s ch", + "▁cap tain", + "▁that 's", + "if ic", + "▁whe ther", + "▁le ar", + "g n", + "▁with in", + "m en", + "▁li ve", + "ver n", + "▁tim es", + "▁ex pect", + "▁st ate", + "▁friend s", + "▁br ing", + "▁s ort", + "▁wom en", + "▁t able", + "▁me et", + "▁jo hn", + "▁cir c", + "▁su m", + "▁return ed", + "il ed", + "▁d ri", + "▁he ld", + "▁ex c", + "▁b ig", + "▁say s", + "▁per fect", + "▁le a", + "▁obs er", + "▁el se", + "▁d uring", + "id ent", + "▁h us", + "t ed", + "▁beaut iful", + "▁cle ar", + "▁e ither", + "▁to wn", + "▁s ight", + "▁l ost", + "▁sle ep", + "▁me ans", + "▁fo ot", + "▁c ut", + "▁c al", + "▁k ept", + "▁r an", + "i ence", + "▁pro f", + "te red", + "he re", + "et y", + "▁fe llow", + "▁can 't", + "▁m ist", + "▁p ast", + "▁d ream", + "ag es", + "▁bec ame", + "▁pre t", + "▁dis c", + "▁b ad", + "▁m aking", + "ut ion", + "▁ob ject", + "▁toward s", + "▁l ow", + "u ght", + "▁de v", + "▁hum an", + "▁man ner", + "▁str ange", + "▁ye ar", + "o ld", + "i ent", + "in es", + "▁se ver", + "m on", + "▁an n", + "air s", + "c hes", + "▁c ity", + "▁somet imes", + "' d", + "▁ro se", + "▁ est", + "il ity", + "▁w alk", + "▁re ady", + "▁p al", + "▁le g", + "▁ro ad", + "i ans", + "ci ous", + "▁c orn", + "▁th y", + "▁co ld", + "ll y", + "ious ly", + "l ish", + "▁st ra", + "m er", + "▁b at", + "ow ing", + "ie w", + "▁chr ist", + "▁s qu", + "▁tr uth", + "c ri", + "ll ed", + "▁th ir", + "▁did n't", + "b ert", + "▁so ci", + "b r", + "▁b it", + "▁sub ject", + "▁sh ip", + "▁m ur", + "▁app ro", + "▁p ie", + "▁ans wer", + "▁f ree", + "▁bus iness", + "▁ ut", + "a pe", + "▁appe ar", + "▁r iver", + "▁st o", + "▁c ast", + "▁fam ily", + "▁j ud", + "▁ex cl", + "▁let ter", + "ing ly", + "ri e", + "▁ha ir", + "ot e", + "▁arm s", + "▁bec ome", + "er n", + "ou ble", + "▁diffe rent", + "▁v al", + "f fect", + "▁nat ur", + "▁poss ible", + "▁sever al", + "▁f ine", + "a h", + "▁le ad", + "▁for g", + "▁exp ress", + "l i", + "▁su s", + "▁gl ad", + "o on", + "▁ar ri", + "▁bl ood", + "itt ing", + "▁qu iet", + "ren ce", + "▁ide a", + "▁a ble", + "itt ed", + "st er", + "▁char ac", + "▁beg in", + "▁ch ur", + "▁t ou", + "▁st ory", + "▁ey e", + "b and", + "at ive", + "▁gr and", + "▁cons ider", + "▁ac ross", + "▁p en", + "▁ex cept", + "▁f re", + "▁w in", + "▁e qu", + "et h", + "▁c ent", + "is f", + "▁part ic", + "▁dif fic", + "▁wind ow", + "▁sur pr", + "ll ect", + "▁pro v", + "▁dire ct", + "▁con c", + "e y", + "a w", + "▁go vern", + "▁dis co", + "▁w ild", + "▁do g", + "▁fl o", + "▁so ft", + "te en", + "▁c ross", + "as ed", + "▁e ffect", + "▁s or", + "▁long er", + "▁he n", + "▁follow ed", + "▁so ld", + "▁the e", + "▁p ub", + "▁hus band", + "ard s", + "ant ly", + "b y", + "▁a p", + "▁supp ose", + "▁res pect", + "t s", + "▁h ast", + "▁s al", + "▁comp le", + "▁he av", + "▁happ y", + "▁r ich", + "▁c reat", + "un e", + "▁t aking", + "▁re qu", + "▁st ay", + "▁spo ke", + "▁d aughter", + "▁we e", + "▁ ve", + "▁d u", + "▁gre en", + "▁an im", + "▁d in", + "' ll", + "▁b ird", + "al th", + "▁me re", + "▁g ard", + "n y", + "le y", + "▁poss ess", + "em pt", + "▁re ached", + "▁appe ared", + "o v", + "▁ex ist", + "in ation", + "▁pret ty", + "▁remem ber", + "▁he a", + "▁op ened", + "▁to m", + "ang ed", + "▁sl ow", + "▁im ag", + "▁i' ve", + "r act", + "▁say ing", + "k ing", + "ut es", + "▁comm on", + "▁occ as", + "▁b ook", + "▁r us", + "am es", + "ic es", + "▁br ight", + "m s", + "▁sat isf", + "▁s ense", + "▁f av", + "▁su cc", + "um p", + "is ing", + "▁l u", + "▁acc ord", + "ter n", + "▁bre ak", + "▁ex per", + "▁mon th", + "u se", + "▁de m", + "▁sc ar", + "▁contin ued", + "▁sec ret", + "▁chur ch", + "▁t ree", + "▁st ri", + "▁car ried", + "▁c ry", + "nd ing", + "▁spir it", + "▁want ed", + "er ic", + "▁certain ly", + "▁comm and", + "▁d est", + "▁mo ve", + "ou n", + "▁swe et", + "▁stre et", + "▁o ught", + "▁acc ount", + "▁de f", + "h am", + "▁pre p", + "▁s ens", + "▁es c", + "▁ro ck", + "ot s", + "▁de cl", + "▁pur p", + "ri age", + "ou th", + "ow ers", + "▁dra w", + "▁e at", + "▁b reat", + "▁charac ter", + "im e", + "c ul", + "med i", + "▁st ud", + "▁sch ool", + "itu de", + "▁hea ven", + "▁feel ing", + "▁s ad", + "▁reg ard", + "em ent", + "▁p ain", + "▁wor th", + "▁b ra", + "ne y", + "▁d ut", + "▁sm o", + "aim ed", + "▁tr ans", + "▁del ight", + "▁qu ar", + "▁h ung", + "▁m ot", + "▁bl ue", + "▁h ot", + "▁h ill", + "▁d iv", + "um b", + "▁dis app", + "▁mar g", + "▁lau gh", + "id ence", + "▁pro du", + "▁succ ess", + "ur y", + "s on", + "▁f ast", + "▁eng lish", + "▁d ress", + "▁h at", + "▁ter ri", + "▁p ort", + "▁ne ither", + "▁cour t", + "▁se ven", + "▁f ight", + "▁prin cess", + "▁li ved", + "▁v iew", + "▁im medi", + "▁se lf", + "▁v ar", + "▁hour s", + "▁m ill", + "▁so l", + "▁ex am", + "▁t ried", + "▁w on't", + "▁ent ered", + "▁dis p", + "t o", + "r ic", + "▁car ry", + "▁imp ort", + "▁an g", + "z e", + "on y", + "▁d anger", + "led ge", + "▁off ic", + "▁c ause", + "▁n one", + "▁for ward", + "▁unc le", + "▁to r", + "▁d et", + "as k", + "▁l en", + "▁fur ther", + "▁p ay", + "▁add ed", + "▁fr ont", + "r or", + "▁g e", + "▁partic ular", + "▁de al", + "▁pr ot", + "▁l ed", + "▁ac qu", + "▁pr ay", + "▁e ff", + "▁happ ened", + "▁ch ief", + "le ct", + "▁wal ked", + "▁lat er", + "▁jo y", + "i ar", + "d ay", + "▁or d", + "▁al th", + "▁com fort", + "▁pro b", + "▁ma j", + "▁af fect", + "▁pub lic", + "▁b ene", + "en ing", + "▁alth ough", + "g r", + "▁sh o", + "▁f ig", + "res h", + "▁f ail", + "u ct", + "u g", + "al ity", + "▁me m", + "▁seem s", + "▁your self", + "sh ip", + "e ad", + "i am", + "▁num ber", + "s ide", + "▁a h", + "▁do ing", + "▁li ving", + "are nt", + "▁des p", + "iz e", + "oo f", + "▁f ield", + "▁re ceived", + "▁sh ad", + "▁be y", + "▁bey ond", + "▁ph il", + "▁l ine", + "▁vis it", + "in ct", + "ri g", + "▁part y", + "▁gard en", + "▁j e", + "▁m outh", + "▁ha ll", + "▁qu een", + "▁bo at", + "▁be ar", + "▁am eric", + "is m", + "▁gentle man", + "▁v i", + "ir t", + "u ff", + "▁la id", + "ra id", + "▁occas ion", + "▁ent ire", + "▁a ge", + "▁s ister", + "▁cl ot", + "▁re pe", + "ous ly", + "▁pr ison", + "▁acc om", + "▁wh is", + "▁near ly", + "▁tre es", + "il ing", + "if f", + "▁eight een", + "b it", + "ward s", + "▁ear ly", + "▁t al", + "▁l ab", + "▁for th", + "m ing", + "on es", + "▁m ed", + "▁tr y", + "▁d a", + "il t", + "anc ed", + "▁prin ci", + "▁en em", + "▁think ing", + "▁ch ance", + "w here", + "▁c re", + "▁min utes", + "▁an x", + "▁mar y", + "▁p ict", + "▁wa it", + "▁v ill", + "▁st ren", + "▁af raid", + "▁cr ow", + "▁sm ile", + "▁l ate", + "▁eng land", + "▁pleas ure", + "▁a unt", + "▁new s", + "▁w is", + "▁f le", + "▁see ing", + "▁su per", + "▁fa ith", + "▁ro b", + "im ent", + "o int", + "▁b ill", + "ll ing", + "▁ne igh", + "▁tr ouble", + "▁sil ence", + "▁pl ain", + "▁there 's", + "are t", + "pe nd", + "▁excl aimed", + "ren ch", + "g y", + "▁mil es", + "p ly", + "▁gl ass", + "▁d rew", + "▁neigh b", + "el s", + "▁m ine", + "▁pr act", + "▁heav y", + "▁stand ing", + "▁se vent", + "▁sh ar", + "▁ch ange", + "▁necess ary", + "▁ch ap", + "▁purp ose", + "▁in qu", + "▁natur al", + "▁d eter", + "ic ked", + "▁b ott", + "▁hard ly", + "▁be ll", + "▁to p", + "▁c aught", + "fe red", + "w h", + "i ves", + "ound ed", + "▁a uth", + "▁circ um", + "▁f ing", + "▁sto pped", + "u c", + "▁w it", + "am ent", + "▁op in", + "▁a v", + "▁pri v", + "ain ing", + "▁inst ead", + "ru pt", + "▁g rew", + "▁lo ved", + "▁is land", + "▁kn ight", + "▁ag o", + "▁len gth", + "▁in n", + "▁pe ace", + "l s", + "in ary", + "i or", + "u es", + "▁th ird", + "us h", + "▁beaut y", + "▁h ig", + "▁he 's", + "t he", + "f orm", + "he ad", + "ic ally", + "as p", + "anc y", + "▁deter m", + "▁stra ight", + "▁c ra", + "in ing", + "pp er", + "l er", + "▁inf l", + "▁th or", + "▁con vers", + "▁bes ides", + "▁p osition", + "▁thir ty", + "▁d en", + "ra ge", + "▁att ention", + "m a", + "▁con v", + "ag er", + "▁his t", + "o red", + "▁com es", + "ag ed", + "▁for ce", + "▁s itting", + "▁ple ase", + "te nd", + "it er", + "▁what ever", + "▁inf orm", + "▁h op", + "▁ch air", + "▁bu ild", + "▁b ab", + "ust om", + "▁girl s", + "▁r om", + "▁f rench", + "▁str uck", + "▁p ull", + "▁a st", + "▁li e", + "▁wr ong", + "▁know ledge", + "▁gra ce", + "▁scar ce", + "g hed", + "▁res ol", + "▁wat ch", + "▁thought s", + "▁r id", + "▁att empt", + "▁fif ty", + "▁r ap", + "▁bo x", + "h ood", + "▁get ting", + "▁ ver", + "▁f at", + "▁compan y", + "▁ar r", + "▁crow d", + "▁b urn", + "▁sl ight", + "▁cl ass", + "▁sou th", + "▁d ie", + "▁ex act", + "▁dr ink", + "▁en j", + "▁th ick", + "▁din ner", + "▁sa ve", + "▁ma id", + "▁pl an", + "▁sa int", + "▁immedi ately", + "i ers", + "▁b orn", + "i us", + "▁re v", + "▁te ars", + "ist s", + "▁t reat", + "us ion", + "▁me ant", + "▁boy s", + "pp ing", + "▁slow ly", + "▁in cl", + "▁l im", + "▁d ied", + "ic ed", + "▁com pl", + "▁f ool", + "▁fore st", + "▁su gg", + "▁p ost", + "▁ac cept", + "▁res ult", + "▁auth or", + "nd on", + "ce ive", + "▁sugg est", + "ci ent", + "▁st one", + "▁fr ight", + "▁pap er", + "▁con se", + "▁j our", + "▁t y", + "▁en c", + "▁quick ly", + "▁cont r", + "▁you th", + "▁se nd", + "▁v ict", + "if ied", + "▁bel ong", + "▁war m", + "▁f ix", + "▁imp oss", + "▁bes ide", + "▁ er", + "▁to ne", + "▁c amp", + "▁des ire", + "▁b ound", + "▁m akes", + "▁marg aret", + "▁nor th", + "▁br own", + "▁mo on", + "▁li ps", + "▁pl aced", + "v al", + "▁circum st", + "▁f ood", + "▁f illed", + "ic s", + "if t", + "an n", + "▁lo ndon", + "▁dist ance", + "g ing", + "▁stren gth", + "▁i d", + "▁flo or", + "▁for get", + "▁ob l", + "▁m id", + "ri es", + "it ions", + "b s", + "▁spr ing", + "▁you' re", + "▁vi ol", + "▁j ack", + "▁po ck", + "oo ks", + "▁follow ing", + "▁s ac", + "▁rem ained", + "ar ch", + "▁gr ow", + "▁sn ow", + "▁govern ment", + "▁b all", + "▁h ors", + "▁n ar", + "ad ed", + "▁bro ken", + "▁lau ghed", + "▁des cri", + "▁sa fe", + "itt en", + "ive ly", + "▁prof ess", + "▁o '", + "am ed", + "▁dep art", + "▁eas y", + "ou red", + "▁u nd", + "▁cou n", + "▁than k", + "▁know s", + "▁wa iting", + "d om", + "at s", + "▁g er", + "▁v an", + "▁an ne", + "▁hors es", + "u gg", + "▁d read", + "▁un e", + "g es", + "ac y", + "▁pro ceed", + "▁g az", + "▁sh out", + "▁start ed", + "ent ed", + "▁comple te", + "o pe", + "▁g all", + "de red", + "▁w ide", + "i res", + "▁ne ck", + "as ure", + "ist ed", + "▁serv ice", + "▁pie ce", + "ci ally", + "en ces", + "▁sa il", + "▁pal ace", + "er v", + "▁gu ard", + "▁do ll", + "▁talk ing", + "▁man 's", + "▁li ft", + "▁gra ve", + "▁wee k", + "le t", + "▁imposs ible", + "▁eff ort", + "▁im m", + "▁arm y", + "we ll", + "▁diffic ult", + "u nd", + "▁f resh", + "▁f un", + "re me", + "▁st op", + "▁m ess", + "▁g ar", + "▁de g", + "▁inc re", + "▁corn er", + "▁soci ety", + "▁we ak", + "▁sh ut", + "▁h y", + "▁pro per", + "ac hing", + "▁cl oud", + "idd le", + "iv id", + "▁dem and", + "▁n ine", + "▁s it", + "▁reco gn", + "▁be at", + "us s", + "▁turn ing", + "▁sk y", + "▁opin ion", + "▁sing le", + "p ic", + "▁f ly", + "▁l ang", + "▁m ass", + "ce ll", + "▁out side", + "▁k iss", + "▁tr ust", + "▁occ up", + "▁ev il", + "▁bel ow", + "▁appear ance", + "u it", + "▁after n", + "▁gl o", + "▁g un", + "▁w est", + "en cy", + "p ar", + "▁show ed", + "▁convers ation", + "is es", + "▁con n", + "▁could n't", + "▁run ning", + "▁m ention", + "▁great er", + "▁mus ic", + "▁breat h", + "as es", + "▁n in", + "▁an t", + "are r", + "▁mor row", + "▁b ank", + "▁es pe", + "▁p eter", + "or k", + "ci al", + "▁pres ence", + "▁bat tle", + "▁win ter", + "he red", + "▁prob ably", + "▁clot hes", + "▁f ash", + "▁mar k", + "▁w ished", + "ve re", + "▁co ll", + "▁em b", + "▁kn e", + "▁mar ried", + "▁arri ved", + "▁p un", + "▁e vent", + "us hed", + "▁suff ic", + "▁e ager", + "▁form er", + "▁gi ving", + "▁p op", + "▁sa nd", + "▁ne g", + "▁us ual", + "▁rel ig", + "▁sim ple", + "▁sy m", + "it ation", + "▁g ro", + "or ies", + "▁mo ved", + "▁month s", + "▁spe aking", + "▁p et", + "▁sil ent", + "▁c ab", + "▁mount ain", + "▁express ion", + "g ar", + "▁co vered", + "▁hu nt", + "▁aftern oon", + "ap ed", + "▁occ ur", + "rie f", + "▁st ates", + "▁ z", + "st r", + "▁lo c", + "l ight", + "▁sh ore", + "c he", + "▁eas ily", + "▁p ale", + "un ity", + "▁rem ark", + "▁ph ys", + "▁begin ning", + "▁dut y", + "▁chap ter", + "▁infl u", + "ch o", + "▁con cl", + "am b", + "▁inst ant", + "▁pol it", + "z z", + "▁enj oy", + "▁s ick", + "▁rem ain", + "u el", + "▁st ream", + "▁fig ure", + "a ld", + "▁t ur", + "▁p ath", + "▁v ol", + "▁min ute", + "▁pleas ant", + "▁scarce ly", + "▁cons cious", + "▁terri ble", + "▁k ill", + "▁ra ised", + "▁fash ion", + "▁tw el", + "y al", + "▁lea ving", + "▁twel ve", + "at ure", + "▁f ut", + "▁th rew", + "▁st ar", + "▁fl owers", + "ol og", + "▁tr ying", + "ri b", + "▁sw ord", + "▁t all", + "▁mar ry", + "▁b en", + "▁expect ed", + "▁accord ing", + "▁for ty", + "▁st ick", + "in al", + "▁gu ess", + "▁sil ver", + "▁ir on", + "▁obl ig", + "▁off ice", + "▁rap id", + "▁lad ies", + "▁espe cially", + "i pped", + "ort ed", + "▁bre ad", + "e ch", + "▁te nder", + "or th", + "▁lear ned", + "▁b ooks", + "▁is n't", + "▁surpr ise", + "▁wr ite", + "▁pur s", + "pe red", + "▁wr itten", + "▁k illed", + "▁conse qu", + "▁ex h", + "▁pl aces", + "▁cond ition", + "▁dire ction", + "▁ch o", + "ul ty", + "j o", + "m it", + "▁entire ly", + "ter ing", + "▁ent er", + "▁act ion", + "w ise", + "▁su c", + "ib ly", + "▁happ iness", + "▁dec ided", + "▁gold en", + "▁lang u", + "en ess", + "▁not e", + "▁un less", + "u ous", + "▁f al", + "al ed", + "▁you' ll", + "▁wonder ful", + "ound s", + "um e", + "' re", + "▁sh ook", + "er 's", + "oo p", + "one l", + "▁perfect ly", + "▁ge or", + "nd ered", + "▁bro ad", + "at ic", + "▁cl osed", + "a 's", + "▁sp ot", + "te nded", + "▁lat ter", + "▁step s", + "▁mere ly", + "▁hist ory", + "f er", + "▁w ise", + "is hing", + "os ing", + "▁m iddle", + "ide red", + "▁underst ood", + "▁enem y", + "▁so le", + "ll ig", + "▁j ew", + "▁sim ply", + "g an", + "▁cond uct", + "▁t ast", + "▁bo ard", + "▁sa v", + "▁would n't", + "▁sh ot", + "▁rep ly", + "▁ch anged", + "m n", + "▁gr ass", + "▁fin ally", + "▁adm ir", + "it al", + "▁shar p", + "it ch", + "▁fort une", + "▁sum mer", + "▁exper ience", + "▁suc ceed", + "g ress", + "ut ed", + "▁o rig", + "ret ched", + "▁jour ney", + "▁ex cell", + "▁obser ved", + "a x", + "▁after wards", + "f ast", + "s y", + "▁b ow", + "▁fl at", + "▁pers ons", + "▁le an", + "▁ear n", + "▁bro ke", + "▁m ir", + "▁f it", + "os p", + "▁mar riage", + "▁rep res", + "i o", + "▁l ying", + "un k", + "▁tra ve", + "▁s itu", + "▁list en", + "▁acqu aint", + "▁r ing", + "ci ence", + "▁f aint", + "ol ute", + "▁cal m", + "b ered", + "▁li ves", + "▁esc ape", + "▁bene ath", + "ous es", + "▁cl im", + "▁bl ess", + "▁repe ated", + "▁pock et", + "est s", + "▁t ail", + "▁pass ion", + "▁d ick", + "▁v en", + "os es", + "cl ock", + "▁m ut", + "▁bec om", + "▁o per", + "▁o' clock", + "▁f ish", + "▁l ou", + "se mb", + "▁pre v", + "▁all owed", + "▁fam il", + "he l", + "▁g ate", + "▁sp ite", + "iver s", + "▁he alth", + "iss ion", + "▁i gn", + "▁re ach", + "▁c and", + "▁r ain", + "▁em pl", + "▁b an", + "▁str ugg", + "▁fir m", + "▁bit ter", + "▁sor ry", + "b ing", + "▁father 's", + "▁tem per", + "▁mad ame", + "pl es", + "▁f urn", + "▁fut ure", + "um ed", + "▁n ice", + "▁se par", + "▁pres ently", + "▁circumst ances", + "▁conn ect", + "id ing", + "▁set t", + "k es", + "▁l oud", + "▁wor se", + "▁w and", + "▁sp read", + "▁i' d", + "▁let ters", + "▁ye llow", + "▁mag n", + "▁pass ing", + "▁k it", + "▁pleas ed", + "▁dark ness", + "▁rem ar", + "idd en", + "c ome", + "▁te a", + "▁c iv", + "▁ap art", + "▁disapp e", + "▁import ant", + "▁leg s", + "▁n ation", + "▁del ic", + "▁d ressed", + "▁g ame", + "▁wall s", + "e c", + "▁d ry", + "▁v irt", + "▁d im", + "id ently", + "re l", + "▁r ub", + "▁abs olute", + "▁bl ind", + "▁disco vered", + "▁exact ly", + "▁d am", + "ott en", + "▁sor row", + "m y", + "▁c ost", + "fe rence", + "▁empl oy", + "vel op", + "▁c ous", + "▁be ast", + "▁spe c", + "▁opp ort", + "▁e ars", + "▁dro pped", + "▁sub st", + "▁che e", + "▁prot ect", + "il s", + "▁sm iled", + "in a", + "▁res p", + "▁prom ise", + "▁b ag", + "▁h ost", + "ur s", + "▁creat ure", + "▁not ice", + "▁know ing", + "▁head s", + "▁conc er", + "▁se at", + "ish ment", + "▁ind ivid", + "▁exist ence", + "▁determ ined", + "le nd", + "▁st orm", + "ro y", + "our s", + "▁con ce", + "ang ing", + "▁fix ed", + "▁p ress", + "▁maj or", + "o ved", + "▁v es", + "i od", + "▁lear n", + "▁mot ion", + "▁em pt", + "▁lea ves", + "▁bott om", + "▁ar g", + "iet y", + "▁no body", + "▁pro s", + "qu e", + "▁ut ter", + "▁p ick", + "ac ked", + "▁inte llig", + "▁he s", + "▁st ir", + "▁pre vent", + "▁ass ist", + "▁d om", + "▁dis g", + "▁adv ant", + "er able", + "▁v ent", + "um ent", + "▁t ired", + "re ct", + "as hed", + "act ion", + "▁cons idered", + "▁wr ote", + "▁h ouses", + "▁su it", + "▁che er", + "▁cast le", + "▁p ra", + "▁per form", + "anc ing", + "▁cle an", + "ru ct", + "▁st ro", + "▁fre qu", + "▁draw ing", + "▁l uck", + "▁ha bit", + "id ge", + "e ll", + "▁on es", + "▁no ble", + "▁sp lend", + "▁hon or", + "z en", + "▁pa id", + "▁spe ech", + "▁est ab", + "▁u r", + "ist r", + "▁individ ual", + "in ite", + "▁v all", + "▁bird s", + "ro du", + "▁d ar", + "▁all ow", + "▁conf ess", + "▁imp ress", + "▁prop ert", + "▁j ane", + "▁s ong", + "▁var ious", + "▁nar row", + "▁mo der", + "▁belie ved", + "ay s", + "▁ext ra", + "▁p ure", + "ar ily", + "▁per iod", + "▁shad ow", + "▁some wh", + "▁m al", + "▁c ott", + "▁ext reme", + "▁jud ge", + "▁vill age", + "▁ro yal", + "▁somewh at", + "▁l ower", + "▁ha m", + "▁ag ree", + "▁remem bered", + "▁ast on", + "ent h", + "▁decl ared", + "p an", + "▁tr ain", + "▁part s", + "▁col onel", + "am ber", + "▁break fast", + "▁sure ly", + "▁s in", + "ay ed", + "▁sc ene", + "g o", + "▁great est", + "▁influ ence", + "▁c ustom", + "it ary", + "▁anim al", + "▁sa ke", + "▁mo d", + "▁sold iers", + "in y", + "▁an cient", + "▁dra wn", + "▁ev idently", + "▁way s", + "▁look s", + "▁rev ol", + "at or", + "ant ed", + "▁ref lect", + "▁pict ure", + "▁like ly", + "▁sh r", + "▁law s", + "▁hold ing", + "▁diffic ulty", + "▁in j", + "▁me l", + "▁cou rage", + "n es", + "▁m ort", + "▁tr oub", + "▁bur st", + "▁ang ry", + "▁pr oud", + "gg ed", + "▁spo ken", + "is ion", + "▁des ert", + "pt ion", + "▁com b", + "▁app arent", + "r ing", + "▁wat ched", + "n a", + "▁e ast", + "▁sh op", + "▁ag re", + "▁priv ate", + "est y", + "▁j ul", + "▁fin ished", + "▁anx ious", + "ot ion", + "▁fif teen", + "▁soci al", + "u nder", + "▁dis m", + "▁tou ch", + "▁w ine", + "▁att ack", + "▁ide as", + "▁geor ge", + "a f", + "re r", + "oo se", + "▁sp ace", + "▁sc r", + "▁ins ide", + "▁gentle men", + "▁civ il", + "i ently", + "▁form ed", + "▁f ol", + "▁go es", + "▁you' ve", + "▁th in", + "▁sur f", + "▁serv ant", + "▁b al", + "▁co ver", + "▁our selves", + "▁fall en", + "▁hen ry", + "▁l ot", + "i um", + "▁ad vent", + "▁car riage", + "▁bab y", + "▁ele ct", + "▁to ng", + "▁app re", + "▁every body", + "ud ed", + "▁comm un", + "▁in e", + "it ive", + "▁wa ited", + "c ise", + "▁gr ou", + "he t", + "▁v ain", + "▁imp ro", + "▁fav or", + "er ial", + "▁spe ed", + "▁wind ows", + "▁care fully", + "▁i ce", + "▁no ise", + "▁her o", + "▁j im", + "▁will iam", + "▁pe cul", + "▁prom ised", + "▁walk ing", + "▁forg otten", + "▁oblig ed", + "▁earn est", + "▁m ain", + "▁l ose", + "▁gl ance", + "▁ves sel", + "▁gr ad", + "▁th ro", + "▁bo d", + "▁should er", + "▁met h", + "▁anim als", + "▁not iced", + "ab les", + "▁pecul iar", + "▁f ier", + "▁p ot", + "▁quiet ly", + "▁c up", + "▁ser ious", + "▁tre mb", + "▁gener ally", + "▁americ an", + "▁sym p", + "r al", + "▁d on", + "▁fr ance", + "ict ion", + "▁propert y", + "▁should ers", + "▁str anger", + "▁s an", + "▁c ow", + "▁what 's", + "▁d ust", + "▁affect ion", + "▁hands ome", + "▁hig her", + "i ant", + "nd ay", + "▁we l", + "▁po et", + "▁sl a", + "▁dist inct", + "▁m am", + "▁p ier", + "ac ing", + "ag ue", + "▁gr own", + "u ly", + "▁d '", + "▁ch amber", + "▁des ce", + "▁mur m", + "st em", + "▁person al", + "▁f ancy", + "▁of fered", + "os ite", + "ons ie", + "▁bu ilt", + "▁ed ge", + "▁whis pered", + "▁sk in", + "▁pie ces", + "it ated", + "c her", + "os ity", + "▁p it", + "▁cont ro", + "▁f aces", + "▁sp ent", + "▁inter rupt", + "h ow", + "is ters", + "▁but ter", + "▁de velop", + "▁un k", + "h ip", + "▁he at", + "▁fo nd", + "▁co at", + "▁tou ched", + "▁h ol", + "ing u", + "▁p i", + "▁r ace", + "▁j ump", + "▁surpr ised", + "ot ed", + "▁de fe", + "en ced", + "▁was n't", + "▁we ar", + "and on", + "▁f an", + "ac her", + "▁ar ch", + "▁ed uc", + "▁bra ve", + "at hered", + "▁e ld", + "▁we alth", + "▁sy stem", + "▁ger man", + "▁fal se", + "w ood", + "▁d are", + "ak ed", + "▁cous in", + "▁f er", + "ke y", + "▁l in", + "▁inte llect", + "▁prep ared", + "▁fing ers", + "▁sur r", + "▁mount ains", + "i pp", + "▁opport unity", + "a ff", + "▁b are", + "▁d or", + "▁int rodu", + "▁co llect", + "▁love ly", + "▁r ag", + "▁cr own", + "▁mat ters", + "▁compan ion", + "▁we ather", + "▁al ar", + "▁inn oc", + "▁r is", + "▁m ix", + "▁l ake", + "▁st ore", + "▁un h", + "▁mean ing", + "▁mem ory", + "o ver", + "▁b and", + "le ep", + "▁find ing", + "e e", + "▁char ge", + "▁gr at", + "▁att ract", + "▁gr ay", + "▁quar ter", + "▁av o", + "▁great ly", + "▁m ach", + "▁in h", + "▁as leep", + "▁par is", + "▁d av", + "▁al to", + "▁off er", + "▁opp osite", + "oun ced", + "er ve", + "▁bre ast", + "n own", + "▁read ing", + "▁alto gether", + "▁wr iting", + "pect ed", + "▁deg ree", + "c ing", + "n ight", + "▁ex ec", + "fort un", + "▁st at", + "▁feel ings", + "▁h ath", + "▁c ook", + "▁r ail", + "▁hon our", + "d ing", + "▁f ate", + "▁p or", + "▁fr ank", + "▁meet ing", + "▁r ough", + "▁al ive", + "▁h ide", + "it es", + "il ar", + "▁bl ow", + "▁cr uel", + "ra ph", + "▁hur t", + "▁l oss", + "▁thr own", + "▁ca used", + "▁we 'll", + "▁ser ve", + "▁du ke", + "▁b ent", + "▁un ited", + "▁see k", + "▁king dom", + "▁situ ation", + "▁empt y", + "n ers", + "▁d ue", + "▁li ked", + "▁sw ift", + "▁open ing", + "▁serv ants", + "c hen", + "ou ra", + "▁g h", + "▁sus pic", + "▁fre ed", + "oint ed", + "▁surf ace", + "c il", + "▁quest ions", + "▁ ess", + "▁cur ious", + "▁const it", + "▁accom pan", + "▁christ ian", + "▁f ill", + "are st", + "▁satisf ied", + "r on", + "▁s ides", + "▁p ity", + "▁re ve", + "▁equ al", + "▁he ight", + "▁or dered", + "os op", + "▁gre y", + "▁list ened", + "p et", + "▁re jo", + "▁cap t", + "ib ility", + "o b", + "▁m art", + "▁happ en", + "▁hur ried", + "▁doll ars", + "▁langu age", + "▁an ge", + "▁your s", + "▁supp osed", + "▁laugh ing", + "▁sett led", + "▁ro de", + "▁per m", + "▁dist ingu", + "▁hur ry", + "▁dest roy", + "▁tal ked", + "▁lift ed", + "oc r", + "▁squ are", + "▁val ue", + "▁tast e", + "▁v ast", + "▁king 's", + "▁r ul", + "▁r oof", + "▁tell ing", + "▁stud y", + "▁o w", + "▁p an", + "▁b as", + "▁r ising", + "▁suffic ient", + "▁for ced", + "▁r ise", + "▁at tend", + "▁phil osop", + "▁no se", + "▁six ty", + "he st", + "▁p in", + "▁e gg", + "▁am b", + "▁fa ult", + "b ur", + "▁st ation", + "▁dist ur", + "▁reg ular", + "ill e", + "▁p ack", + "▁spe cial", + "▁hon est", + "▁build ing", + "▁se ason", + "▁sh ape", + "▁pr ide", + "▁sm iling", + "li ke", + "▁ord ers", + "y n", + "▁wood s", + "▁accom pl", + "c on", + "▁s am", + "▁us ually", + "▁wat ching", + "▁sac ri", + "er ved", + "▁pass age", + "▁mat erial", + "▁vall ey", + "y r", + "▁st airs", + "▁li bert", + "▁fright ened", + "▁remar ked", + "▁t it", + "▁w ed", + "▁mist ress", + "▁direct ly", + "▁suff er", + "▁glo om", + "▁l ines", + "▁st ock", + "▁just ice", + "▁d iam", + "est ed", + "▁gr owing", + "▁does n't", + "▁g athered", + "▁ord inary", + "u ce", + "▁e ur", + "▁un f", + "▁kit chen", + "▁th reat", + "▁de pend", + "▁wee ks", + "▁desp air", + "▁meth od", + "▁se ized", + "▁disc uss", + "▁ex er", + "if y", + "▁fl ower", + "▁ign or", + "e er", + "ad es", + "▁de b", + "ep ing", + "▁a le", + "▁y o", + "ch ief", + "▁supp er", + "i k", + "▁bo ld", + "▁put ting", + "▁ne arer", + "us es", + "▁one 's", + "▁b le", + "▁y ork", + "▁end e", + "▁aff airs", + "▁sold ier", + "▁contr ary", + "▁mo ving", + "▁stre ets", + "▁b ir", + "r ance", + "hen s", + "▁c it", + "ic ated", + "▁cat ch", + "▁imag ine", + "ed s", + "▁mar ch", + "▁se arch", + "ar a", + "▁re ceive", + "im ate", + "▁m onsie", + "▁tw ice", + "▁pap a", + "▁monsie ur", + "▁re ck", + "m in", + "u de", + "▁pro cess", + "▁ho le", + "a ly", + "l in", + "▁c ro", + "▁fav our", + "▁d ign", + "▁work ing", + "▁har m", + "▁eur ope", + "ant ic", + "▁pro ved", + "oc ked", + "▁pro ve", + "▁cl er", + "▁lo d", + "cept ion", + "▁pull ed", + "▁ar th", + "▁author ity", + "▁ha ven", + "▁j er", + "▁un s", + "▁move ment", + "ust ed", + "▁eng aged", + "▁brother s", + "▁advant age", + "l ished", + "o le", + "▁arth ur", + "▁a ut", + "▁st ones", + "▁far m", + "▁diffe rence", + "▁f art", + "▁as ide", + "▁m as", + "▁obser v", + "▁hen ce", + "▁possess ion", + "▁hill s", + "▁fort un", + "ul s", + "ail s", + "▁inst ance", + "▁she 's", + "▁o l", + "▁ho ly", + "▁fle w", + "k y", + "▁col or", + "▁r ate", + "▁do ors", + "▁bus y", + "se t", + "▁add ress", + "▁famil iar", + "▁we ight", + "▁aw are", + "▁play ed", + "▁symp ath", + "ll s", + "▁sole mn", + "▁l iter", + "▁t est", + "▁em per", + "▁ind ian", + "▁dist ant", + "▁interest ing", + "▁b ull", + "▁thor ough", + "▁w ore", + "▁wor ked", + "▁expl ained", + "▁excell ent", + "▁splend id", + "▁tong ue", + "▁d i", + "▁p ard", + "▁n amed", + "▁sh ame", + "▁fr anc", + "▁spe ct", + "▁moment s", + "b ers", + "▁w il", + "▁my ster", + "▁se ated", + "▁inst antly", + "▁sim ilar", + "▁ende av", + "▁me asure", + "▁natur ally", + "nd s", + "▁su f", + "▁am ount", + "▁im per", + "▁dog s", + "it able", + "▁br it", + "▁necess ity", + "r id", + "ul ous", + "▁conf idence", + "d en", + "▁p arent", + "▁w id", + "▁v ir", + "▁never the", + "▁agre ed", + "▁neverthe less", + "un ch", + "▁hear ing", + "▁t akes", + "▁a ug", + "▁un ivers", + "en ance", + "▁un w", + "▁ear l", + "▁keep ing", + "▁dri ve", + "▁produ ced", + "▁a ud", + "on 's", + "▁n ames", + "ag n", + "▁disappe ared", + "▁thr ow", + "▁pres ident", + "▁god s", + "▁mag ic", + "▁repres ent", + "▁unk nown", + "p or", + "▁ter ror", + "▁haven 't", + "as c", + "▁supp ort", + "▁smo ke", + "▁w icked", + "k er", + "▁wor ks", + "▁art ic", + "▁d ull", + "▁yes ter", + "▁fall ing", + "▁worth y", + "▁libert y", + "ul ation", + "▁des ign", + "▁want s", + "▁ev idence", + "▁compan ions", + "▁spir its", + "▁co ast", + "▁might y", + "▁particular ly", + "▁wit ness", + "▁disco ver", + "▁s ought", + "▁sp an", + "' ve", + "▁r are", + "▁offic ers", + "l v", + "z y", + "▁yester day", + "ve y", + "c ent", + "▁p owers", + "▁y ield", + "▁c ool", + "▁or gan", + "▁am az", + "▁point ed", + "f ord", + "▁cl aim", + "▁cont ent", + "▁poss ibly", + "▁ter ms", + "▁tri um", + "▁offic er", + "▁pers u", + "▁ce ased", + "▁dro ve", + "▁occur red", + "▁g ree", + "▁li es", + "▁other wise", + "▁emper or", + "▁h om", + "▁st ars", + "▁kne es", + "▁trium ph", + "ru ction", + "▁pa used", + "om s", + "▁requ ired", + "▁fail ed", + "▁unh app", + "▁diam ond", + "▁r at", + "▁al i", + "▁d ouble", + "▁form s", + "▁gi ves", + "▁fing er", + "ra ce", + "▁p air", + "al ous", + "ill a", + "▁bo b", + "▁el iz", + "▁tra vel", + "▁carry ing", + "▁g le", + "il es", + "▁te eth", + "es h", + "▁sh own", + "▁fr uit", + "▁wat ers", + "▁ent ertain", + "▁heart s", + "um n", + "▁lab or", + "in 't", + "▁p ill", + "▁en er", + "so ci", + "▁exam ple", + "▁u pper", + "▁fore ign", + "▁mor al", + "▁soft ly", + "ro se", + "▁hu ge", + "▁char les", + "▁pri est", + "▁exc it", + "▁f et", + "▁mother 's", + "▁possess ed", + "▁c ases", + "▁rep ort", + "▁mil k", + "▁aff air", + "▁princi ple", + "▁inh ab", + "▁freed om", + "▁pr oof", + "▁inte nded", + "▁satisf action", + "▁shout ed", + "is c", + "▁pl at", + "▁b ask", + "ent al", + "▁grou p", + "▁fart her", + "as m", + "▁un fortun", + "▁unt o", + "▁sing ing", + "▁arr ange", + "▁relig ion", + "▁b er", + "▁rock s", + "▁sevent een", + "▁d er", + "▁j ames", + "▁bu y", + "▁succeed ed", + "▁room s", + "▁lead ing", + "▁maj esty", + "▁event s", + "▁d ance", + "▁p aint", + "▁g ently", + "ac le", + "▁te le", + "▁pard on", + "us ing", + "▁dro p", + "f ather", + "▁in vent", + "▁ke y", + "▁mention ed", + "▁sevent y", + "▁r os", + "▁suff ering", + "▁rec ord", + "▁cab in", + "ro ad", + "▁dis s", + "iv al", + "▁demand ed", + "▁excit ement", + "▁as soci", + "▁pro gress", + "ang ers", + "▁cur i", + "▁americ a", + "▁ru le", + "▁b or", + "▁v ig", + "less ly", + "▁clear ly", + "▁b ore", + "▁she ep", + "▁reg ret", + "▁neighb our", + "b ly", + "i ance", + "▁inst inct", + "▁adv ice", + "▁aw ful", + "▁s en", + "▁f ully", + "▁g ather", + "▁pap ers", + "▁h idden", + "▁che st", + "▁bir th", + "h y", + "p ap", + "▁h ither", + "▁st uff", + "▁imp at", + "▁call ing", + "▁four th", + "▁dread ful", + "▁p os", + "▁g rief", + "▁br ill", + "▁power ful", + "▁present ed", + "▁fair y", + "▁expl ain", + "▁sho ot", + "▁prison er", + "▁jo ined", + "▁aff ord", + "m ond", + "at tered", + "▁ ing", + "im ents", + "▁she l", + "▁pre fer", + "▁consider able", + "▁ob ey", + "▁vo ices", + "▁inter v", + "▁interest ed", + "▁vir g", + "▁c red", + "▁c ard", + "▁e p", + "▁need ed", + "▁p ounds", + "▁con qu", + "▁cle ver", + "▁adv anced", + "▁c ord", + "ig hed", + "▁under t", + "▁resol ved", + "▁w ag", + "ist ic", + "▁pa ul", + "▁exc ited", + "▁cond itions", + "▁pict ures", + "ac ious", + "▁sh ining", + "▁su nday", + "▁ser ved", + "▁ste am", + "▁pol ice", + "▁spr ang", + "s ie", + "or a", + "es e", + "▁j es", + "▁no dd", + "▁sal t", + "▁field s", + "▁c art", + "▁ind ians", + "▁fier ce", + "d le", + "▁r ide", + "▁des ired", + "▁ed ward", + "▁import ance", + "▁inform ation", + "t ure", + "▁h osp", + "▁me mb", + "▁per ceived", + "▁y ard", + "▁cr it", + "tern al", + "▁t ask", + "▁fo ld", + "r ant", + "▁soon er", + "▁mer ch", + "▁absolute ly", + "▁cit iz", + "▁suf fered", + "▁t ight", + "▁d ur", + "▁is s", + "ill y", + "▁lo g", + "▁complete ly", + "h old", + "▁r ad", + "▁sh are", + "▁will ing", + "▁dev il", + "▁ship s", + "▁imag ination", + "▁super ior", + "c om", + "am s", + "▁any body", + "▁en v", + "▁app l", + "▁dra g", + "▁da wn", + "asp ed", + "▁occup ied", + "▁curi osity", + "i est", + "▁s igh", + "▁fo x", + "as ant", + "▁my st", + "▁ste ad", + "et t", + "▁cou ple", + "▁ty pe", + "▁extra ord", + "▁apparent ly", + "▁wel come", + "▁da ily", + "▁moder n", + "i ot", + "▁a in't", + "▁d ying", + "ll en", + "▁fe at", + "▁acc ident", + "▁count enance", + "▁ab andon", + "ort ion", + "▁lo ck", + "▁cr ime", + "p ir", + "▁m ult", + "▁al as", + "▁ref used", + "▁h ate", + "▁d w", + "▁when ever", + "▁than ks", + "▁sl ave", + "▁regard ed", + "▁suggest ed", + "ul f", + "▁act ually", + "g ment", + "▁s ize", + "re g", + "▁c ult", + "▁k at", + "▁bod ies", + "h us", + "▁b ay", + "▁tr uly", + "▁fl esh", + "ish op", + "▁sm ith", + "▁bet r", + "w ith", + "▁w et", + "▁rapid ly", + "g ers", + "▁o dd", + "as ons", + "et te", + "▁cl ub", + "ab el", + "▁hor ror", + "▁m ile", + "▁fl ight", + "▁cross ed", + "▁profess or", + "▁o ce", + "▁wor st", + "iz ation", + "▁rus hed", + "▁s cience", + "▁b rief", + "▁ste pped", + "▁mid st", + "h a", + "▁s our", + "▁m aint", + "▁br ain", + "▁cott age", + "▁exp ressed", + "▁equ ally", + "▁educ ation", + "▁aug ust", + "▁b uck", + "▁n ay", + "id s", + "▁tem pt", + "▁inqu ir", + "▁fool ish", + "▁t aught", + "▁c op", + "▁d un", + "▁p icked", + "▁el sie", + "▁land s", + "▁dri ven", + "▁polit ical", + "m as", + "▁de ck", + "▁res ist", + "▁inst r", + "▁b on", + "▁k en", + "ip s", + "▁hot el", + "▁danger ous", + "i ally", + "n ow", + "▁do zen", + "▁tr ade", + "▁point s", + "▁nin et", + "ab ility", + "▁cr im", + "▁rel ations", + "▁inter p", + "▁bar b", + "▁delight ed", + "▁memb ers", + "▁s isters", + "▁st y", + "▁an ger", + "▁belie f", + "▁ask ing", + "▁me at", + "▁dis pl", + "▁rel ief", + "ific ation", + "▁hunt ing", + "▁ale x", + "ar ies", + "▁ob st", + "▁beh old", + "▁mist ake", + "▁inqu ired", + "▁remark able", + "▁orig in", + "c ked", + "▁n erv", + "ack s", + "ver t", + "ro p", + "▁care ful", + "▁w ounded", + "ad ing", + "▁ce re", + "▁enem ies", + "▁grad ually", + "▁interrupt ed", + "▁f is", + "▁st up", + "▁se vere", + "▁ke en", + "▁six teen", + "k ins", + "res p", + "▁wor n", + "▁fl our", + "▁sy lv", + "▁contro l", + "k in", + "▁l one", + "as ing", + "▁n ap", + "▁ass ert", + "▁dep th", + "▁kind ly", + "▁mur der", + "ac ity", + "▁ele ven", + "▁inv ol", + "▁d' art", + "▁w ings", + "▁o ak", + "▁e t", + "▁beg un", + "▁dream s", + "wh ile", + "▁more over", + "▁exp ed", + "▁inde pend", + "▁bur ied", + "▁appro ached", + "agn an", + "▁d'art agnan", + "▁se x", + "▁sa ved", + "▁har ry", + "▁phys ical", + "▁spec ies", + "c er", + "o e", + "▁gl ory", + "▁creat ures", + "▁news pap", + "▁s ang", + "▁pl enty", + "▁use ful", + "▁sho es", + "▁hop ed", + "▁frequ ently", + "▁sa f", + "▁dist r", + "▁princi p", + "▁p u", + "y 's", + "au nt", + "▁lo ver", + "▁fam ous", + "▁reco llect", + "▁n ur", + "▁gr im", + "▁ind if", + "▁char ming", + "▁a im", + "▁loo se", + "▁conscious ness", + "▁mam ma", + "▁ent hus", + "▁sle pt", + "▁smo oth", + "▁fight ing", + "▁hy p", + "▁enthus i", + "▁d ig", + "al ing", + "▁st age", + "▁any one", + "▁thr ust", + "▁des per", + "▁t ar", + "▁l amp", + "st one", + "▁st ern", + "▁ev ident", + "▁mean while", + "▁forg ive", + "▁accept ed", + "▁oce an", + "▁to t", + "▁they 're", + "▁wo ndered", + "▁play ing", + "▁det ect", + "▁ha le", + "▁kn ife", + "ail ed", + "▁close ly", + "▁me as", + "▁proceed ed", + "▁mess age", + "▁m our", + "▁f ac", + "▁un ion", + "ustom ed", + "he m", + "am ing", + "▁ex ceed", + "▁fe ather", + "▁pre cious", + "▁cent ury", + "▁une x", + "▁p ark", + "ic ation", + "▁every where", + "▁mind s", + "▁extraord inary", + "▁a rose", + "▁ent rance", + "▁cap ital", + "▁rec all", + "▁burn ing", + "▁magn ific", + "o es", + "or ious", + "st and", + "▁as semb", + "▁pl ant", + "▁neighb or", + "▁l est", + "um ents", + "▁coll e", + "▁virt ue", + "▁be w", + "▁for b", + "▁ret reat", + "▁cap able", + "▁ass ured", + "▁const ant", + "▁govern or", + "▁incre ased", + "▁h orn", + "▁rem oved", + "▁fact s", + "▁abs ence", + "▁expl an", + "▁a ck", + "▁some body", + "▁aw a", + "▁adm it", + "▁cor rect", + "▁forg ot", + "▁je alous", + "▁kiss ed", + "▁pop ular", + "▁h ut", + "▁u g", + "pe lled", + "▁gr ant", + "▁friend ship", + "▁ind ign", + "▁sympath y", + "i able", + "er ous", + "▁th om", + "▁al ice", + "▁le vel", + "▁object s", + "▁p ressed", + "▁sh a", + "ro om", + "▁qu al", + "▁beg ged", + "▁em p", + "▁h ind", + "▁hig hest", + "▁cloud s", + "▁gh ost", + "▁ack now", + "ous ed", + "▁stri ke", + "▁wis hes", + "▁becom es", + "▁tremb ling", + "▁no b", + "▁kind ness", + "▁accord ingly", + "▁thro at", + "r ation", + "▁f are", + "▁we 're", + "▁st retched", + "▁fr ag", + "▁whe el", + "▁qu eer", + "▁grand father", + "f or", + "▁ch oose", + "▁hel en", + "▁eight y", + "▁l y", + "▁mis erable", + "▁cont empt", + "ign ed", + "▁mil itary", + "▁rus s", + "▁bask et", + "▁a head", + "oo ps", + "ive red", + "▁list ening", + "▁fr o", + "▁lar ger", + "▁div ine", + "i ber", + "▁st ories", + "anc hes", + "us hing", + "iz ing", + "▁tre asure", + "▁exc use", + "▁innoc ent", + "▁a id", + "▁rem ind", + "▁sla ves", + "r it", + "st airs", + "▁re ward", + "og raph", + "▁man age", + "▁dis h", + "▁through out", + "▁wa ves", + "▁jud gment", + "▁arri val", + "▁cho ice", + "▁unhapp y", + "ast ic", + "▁bl ank", + "▁adv ance", + "▁inform ed", + "▁acquaint ance", + "▁impress ion", + "▁myster ious", + "b b", + "▁a ra", + "▁not es", + "▁had n't", + "▁se ll", + "▁com r", + "▁im pl", + "▁ind ust", + "▁end ed", + "▁light s", + "▁nur se", + "▁s out", + "▁b ought", + "▁f red", + "▁mar ked", + "▁sc ream", + "me nd", + "▁une as", + "▁delic ate", + "▁we ary", + "est ic", + "▁prom pt", + "▁exper i", + "▁hung ry", + "▁fly ing", + "▁p ow", + "▁br idge", + "▁jo in", + "▁vis ible", + "▁understand ing", + "▁cry ing", + "▁avo id", + "▁t is", + "▁st iff", + "ac hes", + "▁rest r", + "▁sound s", + "▁b owed", + "▁c aut", + "▁good s", + "▁dav id", + "▁un able", + "▁you' d", + "ham ed", + "▁b os", + "er al", + "▁as hamed", + "▁some where", + "▁inf inite", + "ock s", + "▁dign ity", + "▁g ay", + "▁v ic", + "▁am id", + "▁ho llow", + "▁em otion", + "▁adm itted", + "▁parent s", + "▁w ra", + "▁h int", + "▁tem ple", + "▁comfort able", + "▁intellig ence", + "or ous", + "▁be aring", + "▁her s", + "ab eth", + "▁rem ains", + "▁cont em", + "▁set tle", + "▁imm ense", + "f fe", + "p her", + "▁c her", + "ld om", + "▁we ap", + "ul ated", + "▁light ed", + "gy pt", + "▁advent ure", + "▁thorough ly", + "▁e gypt", + "il st", + "ang es", + "▁ob t", + "▁friend ly", + "▁reck on", + "▁stup id", + "▁f ed", + "▁r ome", + "▁me al", + "▁int ention", + "▁return ing", + "▁conv in", + "▁c oo", + "le ction", + "▁as h", + "ac hel", + "▁ro pe", + "▁pr ice", + "▁pro ject", + "el t", + "row s", + "▁sec ure", + "▁esc aped", + "▁hop es", + "▁eliz abeth", + "▁saf ety", + "▁w ound", + "▁su p", + "▁un us", + "ons cious", + "▁hor ri", + "▁min ister", + "▁o x", + "ll a", + "ens ive", + "▁help ed", + "▁plain ly", + "▁se ldom", + "▁think s", + "▁fellow s", + "▁m ood", + "▁p ushed", + "▁exh ib", + "ing ing", + "▁th under", + "au d", + "ian a", + "▁fair ly", + "▁eld er", + "▁egg s", + "ir m", + "▁maid en", + "m other", + "▁appe ars", + "▁chee ks", + "▁w on", + "▁e ase", + "▁re du", + "▁sk ill", + "▁ext ent", + "▁pract ice", + "▁relig ious", + "▁becom ing", + "▁virg in", + "▁feat ures", + "▁t ied", + "▁when ce", + "▁some how", + "▁gre et", + "▁faith ful", + "▁concer ned", + "▁the at", + "▁b ishop", + "▁p ink", + "▁eager ly", + "re es", + "▁e ating", + "▁was te", + "▁r ank", + "▁fe m", + "▁br ide", + "▁un l", + "ott ed", + "cei ving", + "▁tri b", + "▁orig inal", + "▁concer ning", + "▁ha b", + "▁acc ustomed", + "▁pat ient", + "▁rec om", + "▁ce ll", + "oint ment", + "▁arr anged", + "v ille", + "it ure", + "▁who lly", + "▁old er", + "▁col our", + "▁prov ided", + "▁at e", + "▁part ly", + "▁mon t", + "olog y", + "▁pros pect", + "▁cere mon", + "▁ ze", + "▁l aughter", + "▁fe e", + "▁br anches", + "▁fl ed", + "r ight", + "▁wh ilst", + "▁sl ipped", + "▁viol ent", + "▁inhab it", + "▁s ons", + "▁eng age", + "▁unc om", + "▁deep ly", + "▁subst ance", + "▁t ale", + "▁t iny", + "▁d an", + "▁g a", + "▁be e", + "▁y ards", + "ick s", + "▁hast ily", + "he ld", + "▁w es", + "▁v ague", + "▁am use", + "▁mu d", + "▁wo lf", + "▁h ans", + "ill ing", + "▁supp ly", + "▁sil k", + "▁const antly", + "▁christ mas", + "▁mill ion", + "▁whis per", + "▁m ental", + "▁was hing", + "ver se", + "▁cl oth", + "▁bar on", + "▁cor resp", + "▁nodd ed", + "▁corresp ond", + "k a", + "▁he ll", + "▁g ain", + "▁r ust", + "▁ob tain", + "▁unc onscious", + "▁strugg le", + "▁estab lished", + "▁law y", + "ol s", + "▁sign s", + "▁ut tered", + "▁rom an", + "▁constit ution", + "p es", + "▁c ave", + "▁sp are", + "▁qu ant", + "▁im age", + "▁mer ry", + "▁treat ed", + "▁effort s", + "▁lone ly", + "r ated", + "▁n ut", + "▁gl anced", + "▁port ion", + "it or", + "▁re semb", + "▁with d", + "▁me ad", + "▁fe ast", + "▁pr im", + "▁cl iff", + "▁em er", + "▁prop ortion", + "▁consider ation", + "▁hast e", + "▁gaz e", + "▁sav age", + "▁c rew", + "▁to wer", + "▁l ack", + "▁cons cience", + "▁mer cy", + "▁exh a", + "▁cons ent", + "at ors", + "ur d", + "▁out l", + "▁cl o", + "▁ad op", + "▁among st", + "▁h anging", + "▁circ le", + "▁prep ar", + "▁brill iant", + "f l", + "▁g ained", + "▁r ow", + "▁tr oops", + "▁rep ro", + "▁m ing", + "ou l", + "▁d ared", + "▁l ion", + "▁jo e", + "▁wind s", + "▁bring ing", + "▁anx iety", + "▁bill y", + "▁consequ ence", + "f ice", + "p se", + "▁f ought", + "▁p red", + "▁sc ra", + "▁gl im", + "▁vict ory", + "p ed", + "▁r ab", + "▁sc ot", + "▁ob v", + "▁sh ock", + "ch an", + "▁kn ock", + "our se", + "▁hand ed", + "▁ind ul", + "▁pat ience", + "▁sout her", + "▁j ose", + "▁fe ver", + "▁ro lled", + "ict ed", + "▁set ting", + "▁profess ion", + "▁sylv ia", + "▁h un", + "ut ions", + "▁fe ared", + "▁br and", + "▁bo ots", + "▁fore head", + "▁princi ples", + "▁s ink", + "▁r ig", + "av al", + "▁pur ch", + "▁gaz ed", + "▁employ ed", + "▁murm ured", + "m ore", + "▁s ar", + "as hing", + "ur al", + "ac les", + "▁tr ad", + "▁act ive", + "▁bene f", + "▁bott le", + "▁r age", + "▁inv est", + "▁lu x", + "▁s ank", + "▁h ang", + "▁be ard", + "ent ial", + "▁lo ving", + "▁nat ive", + "▁inst ruct", + "▁wa ist", + "▁rel ation", + "▁disco very", + "▁mel an", + "▁nerv ous", + "▁obt ained", + "▁p ig", + "▁se ar", + "▁fl ag", + "▁tra il", + "▁distingu ished", + "▁st ared", + "▁mis ery", + "▁pr int", + "▁gu il", + "▁jump ed", + "▁sw im", + "▁appro aching", + "▁suspic ion", + "▁i v", + "▁man aged", + "ak er", + "▁te ach", + "▁mat ch", + "▁guil ty", + "▁w retched", + "▁r um", + "▁comp ar", + "▁the ory", + "▁s her", + "▁b ree", + "▁k ings", + "▁sh one", + "ather ine", + "▁thr one", + "▁show ing", + "aw s", + "▁rob in", + "▁emb ar", + "ut ation", + "▁woman 's", + "▁add ressed", + "▁prot est", + "▁admir ation", + "▁troub led", + "▁ug ly", + "o om", + "er ves", + "▁fl ung", + "▁sub s", + "▁rel ie", + "▁thousand s", + "n ce", + "▁o d", + "▁cur rent", + "▁wood en", + "▁sacri fice", + "ur ity", + "ci p", + "▁pe ar", + "▁far mer", + "▁need s", + "▁cond em", + "▁mem ber", + "▁b ade", + "▁d ancing", + "▁re asons", + "▁cons ult", + "▁sw all", + "▁shad ows", + "▁ange l", + "▁ninet een", + "▁sty le", + "f ield", + "▁l an", + "▁man if", + "▁ro bert", + "▁gr ate", + "▁eng ine", + "▁wis dom", + "▁jes us", + "▁con vent", + "▁pre ced", + "▁interest s", + "▁tri al", + "b or", + "i ven", + "▁n est", + "▁ex ch", + "▁vo y", + "▁ill ust", + "▁wor ship", + "▁ad am", + "▁ph r", + "▁princip al", + "▁h it", + "▁spe nd", + "▁stand s", + "▁resp ons", + "▁a y", + "▁ha w", + "▁wh ist", + "▁ar rest", + "▁kind s", + "▁requ ire", + "▁descri bed", + "▁l it", + "▁pre cise", + "▁prop osed", + "▁produ ce", + "▁utter ly", + "ul se", + "▁no vel", + "▁bl ame", + "▁cred it", + "▁p ause", + "os en", + "▁house hold", + "▁arm ed", + "▁follow s", + "up on", + "▁appro ach", + "▁nin ety", + "▁p ir", + "▁fl ore", + "iv ity", + "▁ref use", + "▁sens ible", + "cho ly", + "▁nation al", + "▁g rie", + "▁re ven", + "▁let 's", + "▁delight ful", + "▁extreme ly", + "▁melan choly", + "u ing", + "▁en orm", + "cl es", + "▁slight ly", + "▁sac red", + "▁recogn ized", + "▁myst ery", + "▁g ri", + "▁comp re", + "▁dist ress", + "▁war ri", + "▁use less", + "▁tri f", + "▁mount ed", + "▁phil ip", + "▁ener gy", + "▁explan ation", + "▁c as", + "at ory", + "▁p our", + "▁r ic", + "▁ch osen", + "▁every one", + "umb led", + "▁a pr", + "▁c am", + "▁pro c", + "▁res umed", + "▁appre ci", + "▁alex and", + "▁a ven", + "▁w ing", + "▁int ense", + "▁high ly", + "▁lu cy", + "▁sol id", + "▁depart ure", + "▁agree able", + "▁exer cise", + "a pped", + "▁w ard", + "▁b ud", + "▁d well", + "ic ate", + "▁de ce", + "▁te acher", + "te nding", + "▁ma x", + "▁requ est", + "▁unex pected", + "▁jose ph", + "c ol", + "▁le ap", + "▁vict im", + "▁s ighed", + "▁for ces", + "ch ie", + "▁fe ed", + "▁sp ort", + "▁dri ft", + "▁wed ding", + "▁brit ish", + "se c", + "▁att itude", + "▁vis ion", + "▁pi pe", + "▁to w", + "▁ha lt", + "▁man ners", + "▁te nd", + "▁fl ood", + "▁comm ission", + "▁gu ide", + "▁obser ve", + "▁conc ern", + "▁rus h", + "▁affect ed", + "f all", + "▁st ret", + "▁co ach", + "▁po ison", + "▁direct ed", + "▁med ic", + "▁g est", + "▁e cho", + "▁young er", + "▁conf usion", + "▁contin ue", + "▁par li", + "▁abs or", + "▁cent re", + "con om", + "▁horri ble", + "r ison", + "▁b ol", + "▁b ath", + "▁g own", + "▁by e", + "▁al oud", + "▁supp l", + "▁prof ound", + "▁er r", + "▁cheer ful", + "w orth", + "▁sent ence", + "▁mist aken", + "▁tor n", + "▁fig ures", + "▁accompan ied", + "▁c atherine", + "▁e conom", + "▁at m", + "▁sh aking", + "um ber", + "▁coun cil", + "l ot", + "▁as ce", + "il ities", + "▁sp ar", + "▁end s", + "▁stra w", + "▁knight s", + "▁atm osp", + "▁sh ade", + "▁br ow", + "▁sp ark", + "▁rest ed", + "▁sent iment", + "▁reco vered", + "▁subject s", + "▁dut ies", + "▁comp osed", + "▁sw ept", + "▁real ity", + "▁sing ular", + "▁trans p", + "▁loc ked", + "▁lou is", + "▁assist ance", + "▁w ake", + "re m", + "▁so vere", + "▁un p", + "▁lo ves", + "▁abs urd", + "▁soul s", + "▁immedi ate", + "▁rid ing", + "▁connect ion", + "▁chee k", + "▁magnific ent", + "▁e re", + "▁su gar", + "▁pl ans", + "▁pr ud", + "▁dis e", + "▁ad j", + "▁lean ing", + "▁surr ounded", + "▁we 've", + "▁or n", + "▁ro ll", + "▁pro ble", + "▁str ict", + "▁aw ake", + "▁pra ise", + "▁convin ced", + "▁re le", + "▁fr ame", + "▁bre aking", + "▁cur tain", + "▁stay ed", + "▁div ided", + "▁cra w", + "▁incl ined", + "▁prev ious", + "a ult", + "om en", + "▁st air", + "▁se es", + "▁pr on", + "bo ard", + "▁comple x", + "▁pray er", + "▁pier re", + "▁unfortun ate", + "g s", + "▁gen ius", + "▁incre ase", + "▁suffic iently", + "▁ban ks", + "▁revol ution", + "▁souther n", + "k i", + "o ke", + "▁a ust", + "ed y", + "▁l ing", + "▁count ess", + "▁sleep ing", + "▁dev oted", + "▁ut most", + "▁mark et", + "▁bos om", + "▁b ark", + "▁c ath", + "al t", + "ch ar", + "▁cl ock", + "▁hand ker", + "▁adm in", + "▁sens es", + "▁id ent", + "▁mid night", + "▁connect ed", + "▁perm itted", + "▁h id", + "▁f il", + "▁f aced", + "▁g ift", + "▁ch at", + "▁br id", + "▁nor ther", + "▁hor iz", + "▁colle ge", + "▁handker chief", + "is ions", + "▁re be", + "▁pol ic", + "▁ann ounced", + "oun ce", + "▁n ons", + "▁n urs", + "al es", + "▁fle et", + "▁rag ged", + "▁co ffe", + "▁part ies", + "▁del ay", + "▁sound ed", + "▁c ities", + "▁was h", + "▁app ointed", + "▁night s", + "▁inst it", + "▁god 's", + "▁stri king", + "▁gun s", + "▁aston ishment", + "▁merch ant", + "▁parli ament", + "n al", + "▁a x", + "at ched", + "▁p il", + "▁p age", + "if orm", + "▁pl ate", + "▁thir st", + "▁neg ro", + "▁ru in", + "▁inhabit ants", + "w in", + "ar f", + "▁r ib", + "▁add ition", + "▁arg ument", + "b our", + "▁t ad", + "▁sc en", + "▁gu ests", + "▁wonder ing", + "▁acquaint ed", + "▁int ent", + "pl ess", + "▁destroy ed", + "▁coffe e", + "in ent", + "le br", + "▁re nder", + "▁so b", + "▁de mon", + "▁des ir", + "ud ing", + "▁get s", + "▁ass ure", + "▁ra ise", + "▁shar ply", + "▁priv ile", + "▁alar m", + "▁mach ine", + "f ied", + "▁cont ract", + "▁del iber", + "▁dr own", + "▁after ward", + "▁gu est", + "▁concl usion", + "▁ris k", + "▁ignor ant", + "b ury", + "k ind", + "▁p ian", + "an 's", + "ur ies", + "▁so il", + "▁ref er", + "▁command ed", + "▁pract ical", + "▁to ss", + "▁of fe", + "▁be held", + "▁ar ist", + "▁quar ters", + "▁deg rees", + "▁fis her", + "▁nons ense", + "▁m c", + "is p", + "▁me chan", + "ke ep", + "▁doubt less", + "▁viol ence", + "▁neg lect", + "▁fol k", + "l iness", + "▁b ul", + "▁e aster", + "▁lo ft", + "▁cont ained", + "▁ref lection", + "▁ce lebr", + "▁lea f", + "▁concl uded", + "▁distr ict", + "i ation", + "r s", + "▁s cient", + "▁he 'd", + "▁sc orn", + "▁cr ack", + "▁ste ep", + "▁mut tered", + "▁estab lish", + "▁dar ling", + "▁and rew", + "▁ch im", + "qu is", + "▁qu ality", + "▁po lly", + "▁che ck", + "▁cra ft", + "▁trave ll", + "▁univers al", + "in ate", + "▁c ig", + "at ives", + "om p", + "ut en", + "▁j ac", + "▁jo b", + "▁sub m", + "▁read er", + "▁le is", + "▁em ph", + "▁surr ound", + "o x", + "p ent", + "it ate", + "▁ex tended", + "▁le v", + "▁over t", + "▁ret ired", + "▁pu zz", + "u able", + "▁li br", + "▁ch in", + "▁sp l", + "▁real ized", + "▁ca uses", + "▁pun ishment", + "▁phys ic", + "▁leis ure", + "c an", + "▁w ave", + "▁sh ake", + "▁char m", + "▁belong ed", + "m ber", + "▁b ones", + "▁g as", + "▁r ange", + "▁pre c", + "▁sm ell", + "▁may be", + "▁inv ited", + "▁troub les", + "▁t ables", + "an ch", + "ic ip", + "▁j une", + "▁ab o", + "▁ag es", + "▁any where", + "ff in", + "▁dr unk", + "▁proper ly", + "▁loc al", + "▁impro ve", + "▁atmosp here", + "▁d ir", + "▁he 'll", + "▁re b", + "▁r ang", + "▁comp ass", + "▁lie uten", + "▁lean ed", + "▁firm ly", + "▁n ations", + "▁ha y", + "▁we pt", + "▁r al", + "▁con ven", + "▁un iform", + "▁jul ia", + "e em", + "r ass", + "▁tr ack", + "▁comm er", + "▁bus hes", + "▁obs c", + "▁sort s", + "▁difficult ies", + "▁intellect ual", + "▁introdu ced", + "m ith", + "▁t ro", + "id ay", + "▁re ndered", + "▁r out", + "ad d", + "▁pl un", + "▁thr owing", + "▁hum ble", + "▁pol ite", + "▁num erous", + "▁move ments", + "▁success ful", + "▁cand le", + "▁separ ate", + "▁protect ion", + "▁thom as", + "▁enorm ous", + "▁un b", + "▁rep ub", + "▁sun sh", + "▁desce nded", + "▁unus ual", + "i ved", + "▁bl az", + "▁show s", + "▁sim pl", + "▁cat tle", + "▁cre pt", + "▁aston ished", + "▁desert ed", + "▁l ap", + "ar se", + "▁ne arest", + "ud es", + "▁ent ering", + "▁ide al", + "stand ing", + "nd ers", + "▁so re", + "ain e", + "▁cl os", + "▁our s", + "▁where ver", + "▁ter m", + "▁vis ited", + "▁cal cul", + "d s", + "▁b ase", + "▁g ates", + "▁st amp", + "▁li ber", + "▁offic ial", + "▁e rect", + "▁al t", + "el ia", + "▁har mon", + "▁pain ful", + "▁burn ed", + "▁repub lic", + "u er", + "▁l ately", + "▁it al", + "am m", + "▁te ar", + "▁act ions", + "▁fin al", + "▁start led", + "▁sens ation", + "▁fat al", + "ol ic", + "▁fl ash", + "▁app et", + "▁strong er", + "▁num bers", + "▁grat itude", + "▁fem ale", + "▁wes tern", + "l est" + ] + } +} \ No newline at end of file diff --git a/out/checkpoint-16000/tokenizer_config.json b/out/checkpoint-16000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0073e6415da746fc5c44a52e02785cb94510efa4 --- /dev/null +++ b/out/checkpoint-16000/tokenizer_config.json @@ -0,0 +1,9253 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|audio:0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|audio:1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|audio:2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "<|audio:3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "<|audio:4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "<|audio:5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "<|audio:6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "<|audio:7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "8": { + "content": "<|audio:8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9": { + "content": "<|audio:9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10": { + "content": "<|audio:10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11": { + "content": "<|audio:11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12": { + "content": "<|audio:12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13": { + "content": "<|audio:13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "14": { + "content": "<|audio:14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15": { + "content": "<|audio:15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "16": { + "content": "<|audio:16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17": { + "content": "<|audio:17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "18": { + "content": "<|audio:18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19": { + "content": "<|audio:19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20": { + "content": "<|audio:20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21": { + "content": "<|audio:21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22": { + "content": "<|audio:22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "23": { + "content": "<|audio:23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "24": { + "content": "<|audio:24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "25": { + "content": "<|audio:25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "26": { + "content": "<|audio:26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27": { + "content": "<|audio:27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "<|audio:28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "<|audio:29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "<|audio:30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "<|audio:31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "<|audio:32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "33": { + "content": "<|audio:33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34": { + "content": "<|audio:34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "35": { + "content": "<|audio:35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "36": { + "content": "<|audio:36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "37": { + "content": "<|audio:37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "38": { + "content": "<|audio:38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39": { + "content": "<|audio:39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "40": { + "content": "<|audio:40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "41": { + "content": "<|audio:41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "42": { + "content": "<|audio:42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43": { + "content": "<|audio:43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "44": { + "content": "<|audio:44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "45": { + "content": "<|audio:45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46": { + "content": "<|audio:46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47": { + "content": "<|audio:47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "48": { + "content": "<|audio:48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "49": { + "content": "<|audio:49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50": { + "content": "<|audio:50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "51": { + "content": "<|audio:51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52": { + "content": "<|audio:52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "53": { + "content": "<|audio:53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54": { + "content": "<|audio:54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "55": { + "content": "<|audio:55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "56": { + "content": "<|audio:56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "57": { + "content": "<|audio:57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58": { + "content": "<|audio:58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "59": { + "content": "<|audio:59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "60": { + "content": "<|audio:60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "61": { + "content": "<|audio:61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "62": { + "content": "<|audio:62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "63": { + "content": "<|audio:63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64": { + "content": "<|audio:64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "65": { + "content": "<|audio:65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66": { + "content": "<|audio:66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "67": { + "content": "<|audio:67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68": { + "content": "<|audio:68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "69": { + "content": "<|audio:69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70": { + "content": "<|audio:70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "71": { + "content": "<|audio:71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72": { + "content": "<|audio:72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "73": { + "content": "<|audio:73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74": { + "content": "<|audio:74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "75": { + "content": "<|audio:75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "76": { + "content": "<|audio:76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77": { + "content": "<|audio:77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "78": { + "content": "<|audio:78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "<|audio:79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "<|audio:80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81": { + "content": "<|audio:81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82": { + "content": "<|audio:82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "83": { + "content": "<|audio:83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84": { + "content": "<|audio:84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "85": { + "content": "<|audio:85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86": { + "content": "<|audio:86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "87": { + "content": "<|audio:87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "88": { + "content": "<|audio:88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "89": { + "content": "<|audio:89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "90": { + "content": "<|audio:90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "91": { + "content": "<|audio:91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92": { + "content": "<|audio:92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93": { + "content": "<|audio:93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94": { + "content": "<|audio:94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95": { + "content": "<|audio:95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "96": { + "content": "<|audio:96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "97": { + "content": "<|audio:97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "98": { + "content": "<|audio:98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "99": { + "content": "<|audio:99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100": { + "content": "<|audio:100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "<|audio:101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "<|audio:102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "<|audio:103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104": { + "content": "<|audio:104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "105": { + "content": "<|audio:105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "<|audio:106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "<|audio:107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "<|audio:108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "109": { + "content": "<|audio:109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110": { + "content": "<|audio:110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "111": { + "content": "<|audio:111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "112": { + "content": "<|audio:112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "113": { + "content": "<|audio:113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "114": { + "content": "<|audio:114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "115": { + "content": "<|audio:115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "116": { + "content": "<|audio:116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117": { + "content": "<|audio:117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118": { + "content": "<|audio:118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "119": { + "content": "<|audio:119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "120": { + "content": "<|audio:120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "121": { + "content": "<|audio:121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "122": { + "content": "<|audio:122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "123": { + "content": "<|audio:123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "124": { + "content": "<|audio:124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "125": { + "content": "<|audio:125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126": { + "content": "<|audio:126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "127": { + "content": "<|audio:127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128": { + "content": "<|audio:128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "129": { + "content": "<|audio:129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130": { + "content": "<|audio:130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "131": { + "content": "<|audio:131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "132": { + "content": "<|audio:132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "133": { + "content": "<|audio:133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "134": { + "content": "<|audio:134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "135": { + "content": "<|audio:135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "136": { + "content": "<|audio:136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "137": { + "content": "<|audio:137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "138": { + "content": "<|audio:138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "139": { + "content": "<|audio:139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "140": { + "content": "<|audio:140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "141": { + "content": "<|audio:141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "142": { + "content": "<|audio:142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "143": { + "content": "<|audio:143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "144": { + "content": "<|audio:144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "145": { + "content": "<|audio:145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "146": { + "content": "<|audio:146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147": { + "content": "<|audio:147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "148": { + "content": "<|audio:148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "149": { + "content": "<|audio:149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "150": { + "content": "<|audio:150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151": { + "content": "<|audio:151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152": { + "content": "<|audio:152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "153": { + "content": "<|audio:153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "154": { + "content": "<|audio:154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155": { + "content": "<|audio:155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "156": { + "content": "<|audio:156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "157": { + "content": "<|audio:157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158": { + "content": "<|audio:158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "159": { + "content": "<|audio:159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "160": { + "content": "<|audio:160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "161": { + "content": "<|audio:161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162": { + "content": "<|audio:162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "163": { + "content": "<|audio:163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "164": { + "content": "<|audio:164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "165": { + "content": "<|audio:165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166": { + "content": "<|audio:166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "167": { + "content": "<|audio:167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "168": { + "content": "<|audio:168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "169": { + "content": "<|audio:169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "170": { + "content": "<|audio:170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171": { + "content": "<|audio:171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172": { + "content": "<|audio:172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "173": { + "content": "<|audio:173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "174": { + "content": "<|audio:174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175": { + "content": "<|audio:175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "176": { + "content": "<|audio:176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177": { + "content": "<|audio:177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178": { + "content": "<|audio:178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "179": { + "content": "<|audio:179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "180": { + "content": "<|audio:180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "181": { + "content": "<|audio:181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "182": { + "content": "<|audio:182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "183": { + "content": "<|audio:183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "184": { + "content": "<|audio:184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "185": { + "content": "<|audio:185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186": { + "content": "<|audio:186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187": { + "content": "<|audio:187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "188": { + "content": "<|audio:188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "189": { + "content": "<|audio:189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "190": { + "content": "<|audio:190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "191": { + "content": "<|audio:191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "192": { + "content": "<|audio:192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "193": { + "content": "<|audio:193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "194": { + "content": "<|audio:194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "195": { + "content": "<|audio:195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "196": { + "content": "<|audio:196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "197": { + "content": "<|audio:197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "198": { + "content": "<|audio:198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199": { + "content": "<|audio:199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200": { + "content": "<|audio:200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "201": { + "content": "<|audio:201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "202": { + "content": "<|audio:202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "203": { + "content": "<|audio:203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "204": { + "content": "<|audio:204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "205": { + "content": "<|audio:205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "206": { + "content": "<|audio:206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "207": { + "content": "<|audio:207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "208": { + "content": "<|audio:208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "209": { + "content": "<|audio:209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "210": { + "content": "<|audio:210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "211": { + "content": "<|audio:211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "212": { + "content": "<|audio:212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "213": { + "content": "<|audio:213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "214": { + "content": "<|audio:214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "215": { + "content": "<|audio:215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "216": { + "content": "<|audio:216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "217": { + "content": "<|audio:217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "218": { + "content": "<|audio:218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "219": { + "content": "<|audio:219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "220": { + "content": "<|audio:220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "221": { + "content": "<|audio:221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "222": { + "content": "<|audio:222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "223": { + "content": "<|audio:223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "224": { + "content": "<|audio:224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "225": { + "content": "<|audio:225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "226": { + "content": "<|audio:226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "227": { + "content": "<|audio:227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "228": { + "content": "<|audio:228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "229": { + "content": "<|audio:229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "230": { + "content": "<|audio:230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "231": { + "content": "<|audio:231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "232": { + "content": "<|audio:232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "233": { + "content": "<|audio:233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "234": { + "content": "<|audio:234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "235": { + "content": "<|audio:235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "236": { + "content": "<|audio:236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "237": { + "content": "<|audio:237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "238": { + "content": "<|audio:238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "239": { + "content": "<|audio:239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "240": { + "content": "<|audio:240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "241": { + "content": "<|audio:241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "242": { + "content": "<|audio:242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "243": { + "content": "<|audio:243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "244": { + "content": "<|audio:244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "245": { + "content": "<|audio:245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "246": { + "content": "<|audio:246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "247": { + "content": "<|audio:247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "248": { + "content": "<|audio:248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "249": { + "content": "<|audio:249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250": { + "content": "<|audio:250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "251": { + "content": "<|audio:251|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "252": { + "content": "<|audio:252|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "253": { + "content": "<|audio:253|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "254": { + "content": "<|audio:254|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255": { + "content": "<|audio:255|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256": { + "content": "<|audio:256|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "257": { + "content": "<|audio:257|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "258": { + "content": "<|audio:258|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "<|audio:259|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "<|audio:260|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "<|audio:261|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "<|audio:262|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "<|audio:263|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "<|audio:264|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "<|audio:265|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "<|audio:266|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "<|audio:267|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "<|audio:268|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "<|audio:269|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "<|audio:270|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "<|audio:271|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "<|audio:272|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "<|audio:273|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "<|audio:274|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "<|audio:275|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "<|audio:276|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "<|audio:277|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "<|audio:278|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "<|audio:279|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "<|audio:280|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "<|audio:281|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "<|audio:282|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "<|audio:283|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "<|audio:284|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "<|audio:285|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "<|audio:286|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "<|audio:287|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "<|audio:288|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "<|audio:289|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "<|audio:290|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "<|audio:291|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "<|audio:292|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "<|audio:293|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "<|audio:294|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "<|audio:295|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "<|audio:296|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "<|audio:297|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "<|audio:298|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "<|audio:299|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "<|audio:300|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "<|audio:301|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "<|audio:302|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "<|audio:303|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "<|audio:304|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "<|audio:305|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "<|audio:306|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "<|audio:307|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "<|audio:308|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "<|audio:309|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "<|audio:310|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "<|audio:311|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "<|audio:312|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "<|audio:313|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "<|audio:314|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "<|audio:315|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "<|audio:316|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "<|audio:317|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "<|audio:318|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "<|audio:319|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "<|audio:320|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "<|audio:321|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "<|audio:322|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "<|audio:323|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "<|audio:324|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "<|audio:325|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "<|audio:326|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "<|audio:327|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "<|audio:328|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "<|audio:329|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "<|audio:330|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "<|audio:331|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "<|audio:332|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "<|audio:333|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "<|audio:334|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "<|audio:335|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "<|audio:336|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "<|audio:337|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "<|audio:338|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "<|audio:339|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "<|audio:340|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "<|audio:341|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "<|audio:342|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "<|audio:343|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "<|audio:344|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "<|audio:345|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "<|audio:346|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "<|audio:347|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "<|audio:348|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "<|audio:349|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "<|audio:350|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "<|audio:351|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "<|audio:352|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "<|audio:353|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "<|audio:354|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "<|audio:355|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "<|audio:356|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "<|audio:357|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "<|audio:358|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "<|audio:359|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "<|audio:360|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "<|audio:361|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "<|audio:362|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "<|audio:363|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "<|audio:364|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "<|audio:365|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "<|audio:366|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "<|audio:367|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "<|audio:368|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "<|audio:369|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "<|audio:370|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "<|audio:371|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "<|audio:372|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "<|audio:373|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "<|audio:374|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "<|audio:375|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "<|audio:376|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "<|audio:377|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "<|audio:378|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "<|audio:379|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "<|audio:380|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "<|audio:381|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "<|audio:382|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "<|audio:383|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "384": { + "content": "<|audio:384|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "385": { + "content": "<|audio:385|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "386": { + "content": "<|audio:386|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "387": { + "content": "<|audio:387|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "388": { + "content": "<|audio:388|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "389": { + "content": "<|audio:389|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "390": { + "content": "<|audio:390|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "391": { + "content": "<|audio:391|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "392": { + "content": "<|audio:392|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "393": { + "content": "<|audio:393|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "394": { + "content": "<|audio:394|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "395": { + "content": "<|audio:395|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "396": { + "content": "<|audio:396|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "397": { + "content": "<|audio:397|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "398": { + "content": "<|audio:398|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "399": { + "content": "<|audio:399|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "400": { + "content": "<|audio:400|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "401": { + "content": "<|audio:401|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "402": { + "content": "<|audio:402|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "403": { + "content": "<|audio:403|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "404": { + "content": "<|audio:404|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "405": { + "content": "<|audio:405|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "406": { + "content": "<|audio:406|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "407": { + "content": "<|audio:407|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "408": { + "content": "<|audio:408|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "409": { + "content": "<|audio:409|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "410": { + "content": "<|audio:410|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "411": { + "content": "<|audio:411|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "412": { + "content": "<|audio:412|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "413": { + "content": "<|audio:413|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "414": { + "content": "<|audio:414|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "415": { + "content": "<|audio:415|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "416": { + "content": "<|audio:416|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "417": { + "content": "<|audio:417|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "418": { + "content": "<|audio:418|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "419": { + "content": "<|audio:419|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "420": { + "content": "<|audio:420|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "421": { + "content": "<|audio:421|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "422": { + "content": "<|audio:422|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "423": { + "content": "<|audio:423|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "424": { + "content": "<|audio:424|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "425": { + "content": "<|audio:425|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "426": { + "content": "<|audio:426|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "427": { + "content": "<|audio:427|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "428": { + "content": "<|audio:428|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "429": { + "content": "<|audio:429|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "430": { + "content": "<|audio:430|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "431": { + "content": "<|audio:431|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "432": { + "content": "<|audio:432|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "433": { + "content": "<|audio:433|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "434": { + "content": "<|audio:434|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "435": { + "content": "<|audio:435|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "436": { + "content": "<|audio:436|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "437": { + "content": "<|audio:437|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "438": { + "content": "<|audio:438|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "439": { + "content": "<|audio:439|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "440": { + "content": "<|audio:440|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "441": { + "content": "<|audio:441|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "442": { + "content": "<|audio:442|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "443": { + "content": "<|audio:443|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "444": { + "content": "<|audio:444|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "445": { + "content": "<|audio:445|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "446": { + "content": "<|audio:446|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "447": { + "content": "<|audio:447|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "448": { + "content": "<|audio:448|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "449": { + "content": "<|audio:449|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "450": { + "content": "<|audio:450|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "451": { + "content": "<|audio:451|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "452": { + "content": "<|audio:452|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "453": { + "content": "<|audio:453|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "454": { + "content": "<|audio:454|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "455": { + "content": "<|audio:455|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "456": { + "content": "<|audio:456|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "457": { + "content": "<|audio:457|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "458": { + "content": "<|audio:458|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "459": { + "content": "<|audio:459|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "460": { + "content": "<|audio:460|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "461": { + "content": "<|audio:461|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "462": { + "content": "<|audio:462|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "463": { + "content": "<|audio:463|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "464": { + "content": "<|audio:464|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "465": { + "content": "<|audio:465|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "466": { + "content": "<|audio:466|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "467": { + "content": "<|audio:467|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "468": { + "content": "<|audio:468|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "469": { + "content": "<|audio:469|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "470": { + "content": "<|audio:470|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "471": { + "content": "<|audio:471|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "472": { + "content": "<|audio:472|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "473": { + "content": "<|audio:473|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "474": { + "content": "<|audio:474|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "475": { + "content": "<|audio:475|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "476": { + "content": "<|audio:476|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "477": { + "content": "<|audio:477|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "478": { + "content": "<|audio:478|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "479": { + "content": "<|audio:479|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "480": { + "content": "<|audio:480|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "481": { + "content": "<|audio:481|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "482": { + "content": "<|audio:482|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "483": { + "content": "<|audio:483|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "484": { + "content": "<|audio:484|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "485": { + "content": "<|audio:485|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "486": { + "content": "<|audio:486|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "487": { + "content": "<|audio:487|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "488": { + "content": "<|audio:488|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "489": { + "content": "<|audio:489|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "490": { + "content": "<|audio:490|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "491": { + "content": "<|audio:491|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "492": { + "content": "<|audio:492|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "493": { + "content": "<|audio:493|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "494": { + "content": "<|audio:494|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "495": { + "content": "<|audio:495|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "496": { + "content": "<|audio:496|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "497": { + "content": "<|audio:497|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "498": { + "content": "<|audio:498|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "499": { + "content": "<|audio:499|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "500": { + "content": "<|audio:500|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "501": { + "content": "<|audio:501|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "502": { + "content": "<|audio:502|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "503": { + "content": "<|audio:503|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "504": { + "content": "<|audio:504|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "505": { + "content": "<|audio:505|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "506": { + "content": "<|audio:506|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "507": { + "content": "<|audio:507|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "508": { + "content": "<|audio:508|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "509": { + "content": "<|audio:509|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "510": { + "content": "<|audio:510|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "511": { + "content": "<|audio:511|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "512": { + "content": "<|audio:512|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "513": { + "content": "<|audio:513|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "514": { + "content": "<|audio:514|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "515": { + "content": "<|audio:515|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "516": { + "content": "<|audio:516|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "517": { + "content": "<|audio:517|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "518": { + "content": "<|audio:518|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "519": { + "content": "<|audio:519|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "520": { + "content": "<|audio:520|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "521": { + "content": "<|audio:521|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "522": { + "content": "<|audio:522|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "523": { + "content": "<|audio:523|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "524": { + "content": "<|audio:524|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "525": { + "content": "<|audio:525|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "526": { + "content": "<|audio:526|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "527": { + "content": "<|audio:527|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "528": { + "content": "<|audio:528|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "529": { + "content": "<|audio:529|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "530": { + "content": "<|audio:530|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "531": { + "content": "<|audio:531|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "532": { + "content": "<|audio:532|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "533": { + "content": "<|audio:533|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "534": { + "content": "<|audio:534|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "535": { + "content": "<|audio:535|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "536": { + "content": "<|audio:536|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "537": { + "content": "<|audio:537|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "538": { + "content": "<|audio:538|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "539": { + "content": "<|audio:539|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "540": { + "content": "<|audio:540|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "541": { + "content": "<|audio:541|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "542": { + "content": "<|audio:542|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "543": { + "content": "<|audio:543|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "544": { + "content": "<|audio:544|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "545": { + "content": "<|audio:545|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "546": { + "content": "<|audio:546|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "547": { + "content": "<|audio:547|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "548": { + "content": "<|audio:548|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "549": { + "content": "<|audio:549|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "550": { + "content": "<|audio:550|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "551": { + "content": "<|audio:551|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "552": { + "content": "<|audio:552|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "553": { + "content": "<|audio:553|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "554": { + "content": "<|audio:554|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "555": { + "content": "<|audio:555|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "556": { + "content": "<|audio:556|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "557": { + "content": "<|audio:557|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "558": { + "content": "<|audio:558|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "559": { + "content": "<|audio:559|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "560": { + "content": "<|audio:560|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "561": { + "content": "<|audio:561|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "562": { + "content": "<|audio:562|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "563": { + "content": "<|audio:563|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "564": { + "content": "<|audio:564|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "565": { + "content": "<|audio:565|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "566": { + "content": "<|audio:566|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "567": { + "content": "<|audio:567|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "568": { + "content": "<|audio:568|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "569": { + "content": "<|audio:569|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "570": { + "content": "<|audio:570|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "571": { + "content": "<|audio:571|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "572": { + "content": "<|audio:572|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "573": { + "content": "<|audio:573|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "574": { + "content": "<|audio:574|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "575": { + "content": "<|audio:575|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "576": { + "content": "<|audio:576|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "577": { + "content": "<|audio:577|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "578": { + "content": "<|audio:578|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "579": { + "content": "<|audio:579|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "580": { + "content": "<|audio:580|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "581": { + "content": "<|audio:581|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "582": { + "content": "<|audio:582|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "583": { + "content": "<|audio:583|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "584": { + "content": "<|audio:584|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "585": { + "content": "<|audio:585|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "586": { + "content": "<|audio:586|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "587": { + "content": "<|audio:587|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "588": { + "content": "<|audio:588|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "589": { + "content": "<|audio:589|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "590": { + "content": "<|audio:590|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "591": { + "content": "<|audio:591|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "592": { + "content": "<|audio:592|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "593": { + "content": "<|audio:593|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "594": { + "content": "<|audio:594|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "595": { + "content": "<|audio:595|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "596": { + "content": "<|audio:596|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "597": { + "content": "<|audio:597|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "598": { + "content": "<|audio:598|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "599": { + "content": "<|audio:599|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "600": { + "content": "<|audio:600|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "601": { + "content": "<|audio:601|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "602": { + "content": "<|audio:602|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "603": { + "content": "<|audio:603|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "604": { + "content": "<|audio:604|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "605": { + "content": "<|audio:605|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "606": { + "content": "<|audio:606|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "607": { + "content": "<|audio:607|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "608": { + "content": "<|audio:608|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "609": { + "content": "<|audio:609|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "610": { + "content": "<|audio:610|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "611": { + "content": "<|audio:611|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "612": { + "content": "<|audio:612|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "613": { + "content": "<|audio:613|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "614": { + "content": "<|audio:614|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "615": { + "content": "<|audio:615|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "616": { + "content": "<|audio:616|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "617": { + "content": "<|audio:617|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "618": { + "content": "<|audio:618|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "619": { + "content": "<|audio:619|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "620": { + "content": "<|audio:620|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "621": { + "content": "<|audio:621|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "622": { + "content": "<|audio:622|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "623": { + "content": "<|audio:623|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "624": { + "content": "<|audio:624|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "625": { + "content": "<|audio:625|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "626": { + "content": "<|audio:626|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "627": { + "content": "<|audio:627|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "628": { + "content": "<|audio:628|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "629": { + "content": "<|audio:629|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "630": { + "content": "<|audio:630|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "631": { + "content": "<|audio:631|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "632": { + "content": "<|audio:632|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "633": { + "content": "<|audio:633|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "634": { + "content": "<|audio:634|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "635": { + "content": "<|audio:635|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "636": { + "content": "<|audio:636|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "637": { + "content": "<|audio:637|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "638": { + "content": "<|audio:638|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "639": { + "content": "<|audio:639|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "640": { + "content": "<|audio:640|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "641": { + "content": "<|audio:641|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "642": { + "content": "<|audio:642|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "643": { + "content": "<|audio:643|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "644": { + "content": "<|audio:644|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "645": { + "content": "<|audio:645|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "646": { + "content": "<|audio:646|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "647": { + "content": "<|audio:647|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "648": { + "content": "<|audio:648|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "649": { + "content": "<|audio:649|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "650": { + "content": "<|audio:650|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "651": { + "content": "<|audio:651|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "652": { + "content": "<|audio:652|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "653": { + "content": "<|audio:653|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "654": { + "content": "<|audio:654|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "655": { + "content": "<|audio:655|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "656": { + "content": "<|audio:656|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "657": { + "content": "<|audio:657|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "658": { + "content": "<|audio:658|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "659": { + "content": "<|audio:659|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "660": { + "content": "<|audio:660|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "661": { + "content": "<|audio:661|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "662": { + "content": "<|audio:662|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "663": { + "content": "<|audio:663|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "664": { + "content": "<|audio:664|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "665": { + "content": "<|audio:665|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "666": { + "content": "<|audio:666|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "667": { + "content": "<|audio:667|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "668": { + "content": "<|audio:668|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "669": { + "content": "<|audio:669|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "670": { + "content": "<|audio:670|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "671": { + "content": "<|audio:671|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "672": { + "content": "<|audio:672|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "673": { + "content": "<|audio:673|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "674": { + "content": "<|audio:674|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "675": { + "content": "<|audio:675|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "676": { + "content": "<|audio:676|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "677": { + "content": "<|audio:677|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "678": { + "content": "<|audio:678|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "679": { + "content": "<|audio:679|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "680": { + "content": "<|audio:680|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "681": { + "content": "<|audio:681|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "682": { + "content": "<|audio:682|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "683": { + "content": "<|audio:683|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "684": { + "content": "<|audio:684|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "685": { + "content": "<|audio:685|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "686": { + "content": "<|audio:686|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "687": { + "content": "<|audio:687|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "688": { + "content": "<|audio:688|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "689": { + "content": "<|audio:689|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "690": { + "content": "<|audio:690|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "691": { + "content": "<|audio:691|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "692": { + "content": "<|audio:692|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "693": { + "content": "<|audio:693|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "694": { + "content": "<|audio:694|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "695": { + "content": "<|audio:695|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "696": { + "content": "<|audio:696|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "697": { + "content": "<|audio:697|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "698": { + "content": "<|audio:698|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "699": { + "content": "<|audio:699|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "700": { + "content": "<|audio:700|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "701": { + "content": "<|audio:701|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "702": { + "content": "<|audio:702|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "703": { + "content": "<|audio:703|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "704": { + "content": "<|audio:704|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "705": { + "content": "<|audio:705|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "706": { + "content": "<|audio:706|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "707": { + "content": "<|audio:707|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "708": { + "content": "<|audio:708|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "709": { + "content": "<|audio:709|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "710": { + "content": "<|audio:710|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "711": { + "content": "<|audio:711|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "712": { + "content": "<|audio:712|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "713": { + "content": "<|audio:713|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "714": { + "content": "<|audio:714|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "715": { + "content": "<|audio:715|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "716": { + "content": "<|audio:716|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "717": { + "content": "<|audio:717|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "718": { + "content": "<|audio:718|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "719": { + "content": "<|audio:719|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "720": { + "content": "<|audio:720|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "721": { + "content": "<|audio:721|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "722": { + "content": "<|audio:722|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "723": { + "content": "<|audio:723|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "724": { + "content": "<|audio:724|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "725": { + "content": "<|audio:725|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "726": { + "content": "<|audio:726|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "727": { + "content": "<|audio:727|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "728": { + "content": "<|audio:728|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "729": { + "content": "<|audio:729|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "730": { + "content": "<|audio:730|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "731": { + "content": "<|audio:731|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "732": { + "content": "<|audio:732|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "733": { + "content": "<|audio:733|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "734": { + "content": "<|audio:734|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "735": { + "content": "<|audio:735|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "736": { + "content": "<|audio:736|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "737": { + "content": "<|audio:737|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "738": { + "content": "<|audio:738|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "739": { + "content": "<|audio:739|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "740": { + "content": "<|audio:740|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "741": { + "content": "<|audio:741|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "742": { + "content": "<|audio:742|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "743": { + "content": "<|audio:743|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "744": { + "content": "<|audio:744|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "745": { + "content": "<|audio:745|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "746": { + "content": "<|audio:746|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "747": { + "content": "<|audio:747|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "748": { + "content": "<|audio:748|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "749": { + "content": "<|audio:749|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "750": { + "content": "<|audio:750|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "751": { + "content": "<|audio:751|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "752": { + "content": "<|audio:752|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "753": { + "content": "<|audio:753|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "754": { + "content": "<|audio:754|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "755": { + "content": "<|audio:755|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "756": { + "content": "<|audio:756|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "757": { + "content": "<|audio:757|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "758": { + "content": "<|audio:758|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "759": { + "content": "<|audio:759|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "760": { + "content": "<|audio:760|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "761": { + "content": "<|audio:761|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "762": { + "content": "<|audio:762|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "763": { + "content": "<|audio:763|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "764": { + "content": "<|audio:764|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "765": { + "content": "<|audio:765|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "766": { + "content": "<|audio:766|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "767": { + "content": "<|audio:767|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "768": { + "content": "<|audio:768|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "769": { + "content": "<|audio:769|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "770": { + "content": "<|audio:770|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "771": { + "content": "<|audio:771|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "772": { + "content": "<|audio:772|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "773": { + "content": "<|audio:773|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "774": { + "content": "<|audio:774|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "775": { + "content": "<|audio:775|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "776": { + "content": "<|audio:776|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "777": { + "content": "<|audio:777|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "778": { + "content": "<|audio:778|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "779": { + "content": "<|audio:779|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "780": { + "content": "<|audio:780|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "781": { + "content": "<|audio:781|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "782": { + "content": "<|audio:782|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "783": { + "content": "<|audio:783|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "784": { + "content": "<|audio:784|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "785": { + "content": "<|audio:785|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "786": { + "content": "<|audio:786|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "787": { + "content": "<|audio:787|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "788": { + "content": "<|audio:788|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "789": { + "content": "<|audio:789|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "790": { + "content": "<|audio:790|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "791": { + "content": "<|audio:791|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "792": { + "content": "<|audio:792|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "793": { + "content": "<|audio:793|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "794": { + "content": "<|audio:794|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "795": { + "content": "<|audio:795|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "796": { + "content": "<|audio:796|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "797": { + "content": "<|audio:797|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "798": { + "content": "<|audio:798|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "799": { + "content": "<|audio:799|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "800": { + "content": "<|audio:800|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "801": { + "content": "<|audio:801|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "802": { + "content": "<|audio:802|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "803": { + "content": "<|audio:803|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "804": { + "content": "<|audio:804|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "805": { + "content": "<|audio:805|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "806": { + "content": "<|audio:806|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "807": { + "content": "<|audio:807|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "808": { + "content": "<|audio:808|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "809": { + "content": "<|audio:809|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "810": { + "content": "<|audio:810|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "811": { + "content": "<|audio:811|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "812": { + "content": "<|audio:812|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "813": { + "content": "<|audio:813|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "814": { + "content": "<|audio:814|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "815": { + "content": "<|audio:815|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "816": { + "content": "<|audio:816|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "817": { + "content": "<|audio:817|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "818": { + "content": "<|audio:818|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "819": { + "content": "<|audio:819|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "820": { + "content": "<|audio:820|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "821": { + "content": "<|audio:821|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "822": { + "content": "<|audio:822|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "823": { + "content": "<|audio:823|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "824": { + "content": "<|audio:824|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "825": { + "content": "<|audio:825|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "826": { + "content": "<|audio:826|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "827": { + "content": "<|audio:827|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "828": { + "content": "<|audio:828|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "829": { + "content": "<|audio:829|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "830": { + "content": "<|audio:830|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "831": { + "content": "<|audio:831|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "832": { + "content": "<|audio:832|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "833": { + "content": "<|audio:833|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "834": { + "content": "<|audio:834|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "835": { + "content": "<|audio:835|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "836": { + "content": "<|audio:836|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "837": { + "content": "<|audio:837|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "838": { + "content": "<|audio:838|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "839": { + "content": "<|audio:839|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "840": { + "content": "<|audio:840|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "841": { + "content": "<|audio:841|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "842": { + "content": "<|audio:842|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "843": { + "content": "<|audio:843|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "844": { + "content": "<|audio:844|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "845": { + "content": "<|audio:845|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "846": { + "content": "<|audio:846|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "847": { + "content": "<|audio:847|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "848": { + "content": "<|audio:848|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "849": { + "content": "<|audio:849|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "850": { + "content": "<|audio:850|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "851": { + "content": "<|audio:851|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "852": { + "content": "<|audio:852|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "853": { + "content": "<|audio:853|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "854": { + "content": "<|audio:854|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "855": { + "content": "<|audio:855|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "856": { + "content": "<|audio:856|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "857": { + "content": "<|audio:857|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "858": { + "content": "<|audio:858|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "859": { + "content": "<|audio:859|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "860": { + "content": "<|audio:860|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "861": { + "content": "<|audio:861|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "862": { + "content": "<|audio:862|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "863": { + "content": "<|audio:863|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "864": { + "content": "<|audio:864|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "865": { + "content": "<|audio:865|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "866": { + "content": "<|audio:866|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "867": { + "content": "<|audio:867|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "868": { + "content": "<|audio:868|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "869": { + "content": "<|audio:869|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "870": { + "content": "<|audio:870|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "871": { + "content": "<|audio:871|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "872": { + "content": "<|audio:872|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "873": { + "content": "<|audio:873|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "874": { + "content": "<|audio:874|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "875": { + "content": "<|audio:875|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "876": { + "content": "<|audio:876|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "877": { + "content": "<|audio:877|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "878": { + "content": "<|audio:878|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "879": { + "content": "<|audio:879|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "880": { + "content": "<|audio:880|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "881": { + "content": "<|audio:881|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "882": { + "content": "<|audio:882|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "883": { + "content": "<|audio:883|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "884": { + "content": "<|audio:884|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "885": { + "content": "<|audio:885|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "886": { + "content": "<|audio:886|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "887": { + "content": "<|audio:887|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "888": { + "content": "<|audio:888|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "889": { + "content": "<|audio:889|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "890": { + "content": "<|audio:890|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "891": { + "content": "<|audio:891|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "892": { + "content": "<|audio:892|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "893": { + "content": "<|audio:893|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "894": { + "content": "<|audio:894|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "895": { + "content": "<|audio:895|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "896": { + "content": "<|audio:896|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "897": { + "content": "<|audio:897|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "898": { + "content": "<|audio:898|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "899": { + "content": "<|audio:899|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "900": { + "content": "<|audio:900|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "901": { + "content": "<|audio:901|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "902": { + "content": "<|audio:902|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "903": { + "content": "<|audio:903|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "904": { + "content": "<|audio:904|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "905": { + "content": "<|audio:905|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "906": { + "content": "<|audio:906|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "907": { + "content": "<|audio:907|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "908": { + "content": "<|audio:908|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "909": { + "content": "<|audio:909|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "910": { + "content": "<|audio:910|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "911": { + "content": "<|audio:911|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "912": { + "content": "<|audio:912|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "913": { + "content": "<|audio:913|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "914": { + "content": "<|audio:914|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "915": { + "content": "<|audio:915|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "916": { + "content": "<|audio:916|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "917": { + "content": "<|audio:917|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "918": { + "content": "<|audio:918|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "919": { + "content": "<|audio:919|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "920": { + "content": "<|audio:920|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "921": { + "content": "<|audio:921|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "922": { + "content": "<|audio:922|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "923": { + "content": "<|audio:923|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "924": { + "content": "<|audio:924|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "925": { + "content": "<|audio:925|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "926": { + "content": "<|audio:926|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "927": { + "content": "<|audio:927|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "928": { + "content": "<|audio:928|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "929": { + "content": "<|audio:929|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "930": { + "content": "<|audio:930|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "931": { + "content": "<|audio:931|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "932": { + "content": "<|audio:932|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "933": { + "content": "<|audio:933|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "934": { + "content": "<|audio:934|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "935": { + "content": "<|audio:935|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "936": { + "content": "<|audio:936|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "937": { + "content": "<|audio:937|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "938": { + "content": "<|audio:938|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "939": { + "content": "<|audio:939|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "940": { + "content": "<|audio:940|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "941": { + "content": "<|audio:941|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "942": { + "content": "<|audio:942|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "943": { + "content": "<|audio:943|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "944": { + "content": "<|audio:944|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "945": { + "content": "<|audio:945|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "946": { + "content": "<|audio:946|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "947": { + "content": "<|audio:947|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "948": { + "content": "<|audio:948|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "949": { + "content": "<|audio:949|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "950": { + "content": "<|audio:950|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "951": { + "content": "<|audio:951|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "952": { + "content": "<|audio:952|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "953": { + "content": "<|audio:953|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "954": { + "content": "<|audio:954|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "955": { + "content": "<|audio:955|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "956": { + "content": "<|audio:956|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "957": { + "content": "<|audio:957|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "958": { + "content": "<|audio:958|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "959": { + "content": "<|audio:959|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "960": { + "content": "<|audio:960|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "961": { + "content": "<|audio:961|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "962": { + "content": "<|audio:962|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "963": { + "content": "<|audio:963|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "964": { + "content": "<|audio:964|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "965": { + "content": "<|audio:965|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "966": { + "content": "<|audio:966|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "967": { + "content": "<|audio:967|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "968": { + "content": "<|audio:968|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "969": { + "content": "<|audio:969|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "970": { + "content": "<|audio:970|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "971": { + "content": "<|audio:971|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "972": { + "content": "<|audio:972|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "973": { + "content": "<|audio:973|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "974": { + "content": "<|audio:974|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "975": { + "content": "<|audio:975|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "976": { + "content": "<|audio:976|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "977": { + "content": "<|audio:977|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "978": { + "content": "<|audio:978|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "979": { + "content": "<|audio:979|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "980": { + "content": "<|audio:980|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "981": { + "content": "<|audio:981|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "982": { + "content": "<|audio:982|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "983": { + "content": "<|audio:983|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "984": { + "content": "<|audio:984|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "985": { + "content": "<|audio:985|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "986": { + "content": "<|audio:986|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "987": { + "content": "<|audio:987|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "988": { + "content": "<|audio:988|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "989": { + "content": "<|audio:989|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "990": { + "content": "<|audio:990|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "991": { + "content": "<|audio:991|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "992": { + "content": "<|audio:992|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "993": { + "content": "<|audio:993|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "994": { + "content": "<|audio:994|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "995": { + "content": "<|audio:995|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "996": { + "content": "<|audio:996|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "997": { + "content": "<|audio:997|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "998": { + "content": "<|audio:998|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "999": { + "content": "<|audio:999|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1000": { + "content": "<|audio:1000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1001": { + "content": "<|audio:1001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1002": { + "content": "<|audio:1002|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1003": { + "content": "<|audio:1003|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1004": { + "content": "<|audio:1004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1005": { + "content": "<|audio:1005|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1006": { + "content": "<|audio:1006|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1007": { + "content": "<|audio:1007|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1008": { + "content": "<|audio:1008|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1009": { + "content": "<|audio:1009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1010": { + "content": "<|audio:1010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1011": { + "content": "<|audio:1011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1012": { + "content": "<|audio:1012|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1013": { + "content": "<|audio:1013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1014": { + "content": "<|audio:1014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1015": { + "content": "<|audio:1015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1016": { + "content": "<|audio:1016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1017": { + "content": "<|audio:1017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1018": { + "content": "<|audio:1018|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1019": { + "content": "<|audio:1019|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1020": { + "content": "<|audio:1020|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1021": { + "content": "<|audio:1021|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1022": { + "content": "<|audio:1022|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1023": { + "content": "<|audio:1023|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1024": { + "content": "<|startoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1025": { + "content": "<|endoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1026": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "model_max_length": 1877, + "pad_token": "<|padding|>", + "special_tokens": [ + "<|audio:0|>", + "<|audio:1|>", + "<|audio:2|>", + "<|audio:3|>", + "<|audio:4|>", + "<|audio:5|>", + "<|audio:6|>", + "<|audio:7|>", + "<|audio:8|>", + "<|audio:9|>", + "<|audio:10|>", + "<|audio:11|>", + "<|audio:12|>", + "<|audio:13|>", + "<|audio:14|>", + "<|audio:15|>", + "<|audio:16|>", + "<|audio:17|>", + "<|audio:18|>", + "<|audio:19|>", + "<|audio:20|>", + "<|audio:21|>", + "<|audio:22|>", + "<|audio:23|>", + "<|audio:24|>", + "<|audio:25|>", + "<|audio:26|>", + "<|audio:27|>", + "<|audio:28|>", + "<|audio:29|>", + "<|audio:30|>", + "<|audio:31|>", + "<|audio:32|>", + "<|audio:33|>", + "<|audio:34|>", + "<|audio:35|>", + "<|audio:36|>", + "<|audio:37|>", + "<|audio:38|>", + "<|audio:39|>", + "<|audio:40|>", + "<|audio:41|>", + "<|audio:42|>", + "<|audio:43|>", + "<|audio:44|>", + "<|audio:45|>", + "<|audio:46|>", + "<|audio:47|>", + "<|audio:48|>", + "<|audio:49|>", + "<|audio:50|>", + "<|audio:51|>", + "<|audio:52|>", + "<|audio:53|>", + "<|audio:54|>", + "<|audio:55|>", + "<|audio:56|>", + "<|audio:57|>", + "<|audio:58|>", + "<|audio:59|>", + "<|audio:60|>", + "<|audio:61|>", + "<|audio:62|>", + "<|audio:63|>", + "<|audio:64|>", + "<|audio:65|>", + "<|audio:66|>", + "<|audio:67|>", + "<|audio:68|>", + "<|audio:69|>", + "<|audio:70|>", + "<|audio:71|>", + "<|audio:72|>", + "<|audio:73|>", + "<|audio:74|>", + "<|audio:75|>", + "<|audio:76|>", + "<|audio:77|>", + "<|audio:78|>", + "<|audio:79|>", + "<|audio:80|>", + "<|audio:81|>", + "<|audio:82|>", + "<|audio:83|>", + "<|audio:84|>", + "<|audio:85|>", + "<|audio:86|>", + "<|audio:87|>", + "<|audio:88|>", + "<|audio:89|>", + "<|audio:90|>", + "<|audio:91|>", + "<|audio:92|>", + "<|audio:93|>", + "<|audio:94|>", + "<|audio:95|>", + "<|audio:96|>", + "<|audio:97|>", + "<|audio:98|>", + "<|audio:99|>", + "<|audio:100|>", + "<|audio:101|>", + "<|audio:102|>", + "<|audio:103|>", + "<|audio:104|>", + "<|audio:105|>", + "<|audio:106|>", + "<|audio:107|>", + "<|audio:108|>", + "<|audio:109|>", + "<|audio:110|>", + "<|audio:111|>", + "<|audio:112|>", + "<|audio:113|>", + "<|audio:114|>", + "<|audio:115|>", + "<|audio:116|>", + "<|audio:117|>", + "<|audio:118|>", + "<|audio:119|>", + "<|audio:120|>", + "<|audio:121|>", + "<|audio:122|>", + "<|audio:123|>", + "<|audio:124|>", + "<|audio:125|>", + "<|audio:126|>", + "<|audio:127|>", + "<|audio:128|>", + "<|audio:129|>", + "<|audio:130|>", + "<|audio:131|>", + "<|audio:132|>", + "<|audio:133|>", + "<|audio:134|>", + "<|audio:135|>", + "<|audio:136|>", + "<|audio:137|>", + "<|audio:138|>", + "<|audio:139|>", + "<|audio:140|>", + "<|audio:141|>", + "<|audio:142|>", + "<|audio:143|>", + "<|audio:144|>", + "<|audio:145|>", + "<|audio:146|>", + "<|audio:147|>", + "<|audio:148|>", + "<|audio:149|>", + "<|audio:150|>", + "<|audio:151|>", + "<|audio:152|>", + "<|audio:153|>", + "<|audio:154|>", + "<|audio:155|>", + "<|audio:156|>", + "<|audio:157|>", + "<|audio:158|>", + "<|audio:159|>", + "<|audio:160|>", + "<|audio:161|>", + "<|audio:162|>", + "<|audio:163|>", + "<|audio:164|>", + "<|audio:165|>", + "<|audio:166|>", + "<|audio:167|>", + "<|audio:168|>", + "<|audio:169|>", + "<|audio:170|>", + "<|audio:171|>", + "<|audio:172|>", + "<|audio:173|>", + "<|audio:174|>", + "<|audio:175|>", + "<|audio:176|>", + "<|audio:177|>", + "<|audio:178|>", + "<|audio:179|>", + "<|audio:180|>", + "<|audio:181|>", + "<|audio:182|>", + "<|audio:183|>", + "<|audio:184|>", + "<|audio:185|>", + "<|audio:186|>", + "<|audio:187|>", + "<|audio:188|>", + "<|audio:189|>", + "<|audio:190|>", + "<|audio:191|>", + "<|audio:192|>", + "<|audio:193|>", + "<|audio:194|>", + "<|audio:195|>", + "<|audio:196|>", + "<|audio:197|>", + "<|audio:198|>", + "<|audio:199|>", + "<|audio:200|>", + "<|audio:201|>", + "<|audio:202|>", + "<|audio:203|>", + "<|audio:204|>", + "<|audio:205|>", + "<|audio:206|>", + "<|audio:207|>", + "<|audio:208|>", + "<|audio:209|>", + "<|audio:210|>", + "<|audio:211|>", + "<|audio:212|>", + "<|audio:213|>", + "<|audio:214|>", + "<|audio:215|>", + "<|audio:216|>", + "<|audio:217|>", + "<|audio:218|>", + "<|audio:219|>", + "<|audio:220|>", + "<|audio:221|>", + "<|audio:222|>", + "<|audio:223|>", + "<|audio:224|>", + "<|audio:225|>", + "<|audio:226|>", + "<|audio:227|>", + "<|audio:228|>", + "<|audio:229|>", + "<|audio:230|>", + "<|audio:231|>", + "<|audio:232|>", + "<|audio:233|>", + "<|audio:234|>", + "<|audio:235|>", + "<|audio:236|>", + "<|audio:237|>", + "<|audio:238|>", + "<|audio:239|>", + "<|audio:240|>", + "<|audio:241|>", + "<|audio:242|>", + "<|audio:243|>", + "<|audio:244|>", + "<|audio:245|>", + "<|audio:246|>", + "<|audio:247|>", + "<|audio:248|>", + "<|audio:249|>", + "<|audio:250|>", + "<|audio:251|>", + "<|audio:252|>", + "<|audio:253|>", + "<|audio:254|>", + "<|audio:255|>", + "<|audio:256|>", + "<|audio:257|>", + "<|audio:258|>", + "<|audio:259|>", + "<|audio:260|>", + "<|audio:261|>", + "<|audio:262|>", + "<|audio:263|>", + "<|audio:264|>", + "<|audio:265|>", + "<|audio:266|>", + "<|audio:267|>", + "<|audio:268|>", + "<|audio:269|>", + "<|audio:270|>", + "<|audio:271|>", + "<|audio:272|>", + "<|audio:273|>", + "<|audio:274|>", + "<|audio:275|>", + "<|audio:276|>", + "<|audio:277|>", + "<|audio:278|>", + "<|audio:279|>", + "<|audio:280|>", + "<|audio:281|>", + "<|audio:282|>", + "<|audio:283|>", + "<|audio:284|>", + "<|audio:285|>", + "<|audio:286|>", + "<|audio:287|>", + "<|audio:288|>", + "<|audio:289|>", + "<|audio:290|>", + "<|audio:291|>", + "<|audio:292|>", + "<|audio:293|>", + "<|audio:294|>", + "<|audio:295|>", + "<|audio:296|>", + "<|audio:297|>", + "<|audio:298|>", + "<|audio:299|>", + "<|audio:300|>", + "<|audio:301|>", + "<|audio:302|>", + "<|audio:303|>", + "<|audio:304|>", + "<|audio:305|>", + "<|audio:306|>", + "<|audio:307|>", + "<|audio:308|>", + "<|audio:309|>", + "<|audio:310|>", + "<|audio:311|>", + "<|audio:312|>", + "<|audio:313|>", + "<|audio:314|>", + "<|audio:315|>", + "<|audio:316|>", + "<|audio:317|>", + "<|audio:318|>", + "<|audio:319|>", + "<|audio:320|>", + "<|audio:321|>", + "<|audio:322|>", + "<|audio:323|>", + "<|audio:324|>", + "<|audio:325|>", + "<|audio:326|>", + "<|audio:327|>", + "<|audio:328|>", + "<|audio:329|>", + "<|audio:330|>", + "<|audio:331|>", + "<|audio:332|>", + "<|audio:333|>", + "<|audio:334|>", + "<|audio:335|>", + "<|audio:336|>", + "<|audio:337|>", + "<|audio:338|>", + "<|audio:339|>", + "<|audio:340|>", + "<|audio:341|>", + "<|audio:342|>", + "<|audio:343|>", + "<|audio:344|>", + "<|audio:345|>", + "<|audio:346|>", + "<|audio:347|>", + "<|audio:348|>", + "<|audio:349|>", + "<|audio:350|>", + "<|audio:351|>", + "<|audio:352|>", + "<|audio:353|>", + "<|audio:354|>", + "<|audio:355|>", + "<|audio:356|>", + "<|audio:357|>", + "<|audio:358|>", + "<|audio:359|>", + "<|audio:360|>", + "<|audio:361|>", + "<|audio:362|>", + "<|audio:363|>", + "<|audio:364|>", + "<|audio:365|>", + "<|audio:366|>", + "<|audio:367|>", + "<|audio:368|>", + "<|audio:369|>", + "<|audio:370|>", + "<|audio:371|>", + "<|audio:372|>", + "<|audio:373|>", + "<|audio:374|>", + "<|audio:375|>", + "<|audio:376|>", + "<|audio:377|>", + "<|audio:378|>", + "<|audio:379|>", + "<|audio:380|>", + "<|audio:381|>", + "<|audio:382|>", + "<|audio:383|>", + "<|audio:384|>", + "<|audio:385|>", + "<|audio:386|>", + "<|audio:387|>", + "<|audio:388|>", + "<|audio:389|>", + "<|audio:390|>", + "<|audio:391|>", + "<|audio:392|>", + "<|audio:393|>", + "<|audio:394|>", + "<|audio:395|>", + "<|audio:396|>", + "<|audio:397|>", + "<|audio:398|>", + "<|audio:399|>", + "<|audio:400|>", + "<|audio:401|>", + "<|audio:402|>", + "<|audio:403|>", + "<|audio:404|>", + "<|audio:405|>", + "<|audio:406|>", + "<|audio:407|>", + "<|audio:408|>", + "<|audio:409|>", + "<|audio:410|>", + "<|audio:411|>", + "<|audio:412|>", + "<|audio:413|>", + "<|audio:414|>", + "<|audio:415|>", + "<|audio:416|>", + "<|audio:417|>", + "<|audio:418|>", + "<|audio:419|>", + "<|audio:420|>", + "<|audio:421|>", + "<|audio:422|>", + "<|audio:423|>", + "<|audio:424|>", + "<|audio:425|>", + "<|audio:426|>", + "<|audio:427|>", + "<|audio:428|>", + "<|audio:429|>", + "<|audio:430|>", + "<|audio:431|>", + "<|audio:432|>", + "<|audio:433|>", + "<|audio:434|>", + "<|audio:435|>", + "<|audio:436|>", + "<|audio:437|>", + "<|audio:438|>", + "<|audio:439|>", + "<|audio:440|>", + "<|audio:441|>", + "<|audio:442|>", + "<|audio:443|>", + "<|audio:444|>", + "<|audio:445|>", + "<|audio:446|>", + "<|audio:447|>", + "<|audio:448|>", + "<|audio:449|>", + "<|audio:450|>", + "<|audio:451|>", + "<|audio:452|>", + "<|audio:453|>", + "<|audio:454|>", + "<|audio:455|>", + "<|audio:456|>", + "<|audio:457|>", + "<|audio:458|>", + "<|audio:459|>", + "<|audio:460|>", + "<|audio:461|>", + "<|audio:462|>", + "<|audio:463|>", + "<|audio:464|>", + "<|audio:465|>", + "<|audio:466|>", + "<|audio:467|>", + "<|audio:468|>", + "<|audio:469|>", + "<|audio:470|>", + "<|audio:471|>", + "<|audio:472|>", + "<|audio:473|>", + "<|audio:474|>", + "<|audio:475|>", + "<|audio:476|>", + "<|audio:477|>", + "<|audio:478|>", + "<|audio:479|>", + "<|audio:480|>", + "<|audio:481|>", + "<|audio:482|>", + "<|audio:483|>", + "<|audio:484|>", + "<|audio:485|>", + "<|audio:486|>", + "<|audio:487|>", + "<|audio:488|>", + "<|audio:489|>", + "<|audio:490|>", + "<|audio:491|>", + "<|audio:492|>", + "<|audio:493|>", + "<|audio:494|>", + "<|audio:495|>", + "<|audio:496|>", + "<|audio:497|>", + "<|audio:498|>", + "<|audio:499|>", + "<|audio:500|>", + "<|audio:501|>", + "<|audio:502|>", + "<|audio:503|>", + "<|audio:504|>", + "<|audio:505|>", + "<|audio:506|>", + "<|audio:507|>", + "<|audio:508|>", + "<|audio:509|>", + "<|audio:510|>", + "<|audio:511|>", + "<|audio:512|>", + "<|audio:513|>", + "<|audio:514|>", + "<|audio:515|>", + "<|audio:516|>", + "<|audio:517|>", + "<|audio:518|>", + "<|audio:519|>", + "<|audio:520|>", + "<|audio:521|>", + "<|audio:522|>", + "<|audio:523|>", + "<|audio:524|>", + "<|audio:525|>", + "<|audio:526|>", + "<|audio:527|>", + "<|audio:528|>", + "<|audio:529|>", + "<|audio:530|>", + "<|audio:531|>", + "<|audio:532|>", + "<|audio:533|>", + "<|audio:534|>", + "<|audio:535|>", + "<|audio:536|>", + "<|audio:537|>", + "<|audio:538|>", + "<|audio:539|>", + "<|audio:540|>", + "<|audio:541|>", + "<|audio:542|>", + "<|audio:543|>", + "<|audio:544|>", + "<|audio:545|>", + "<|audio:546|>", + "<|audio:547|>", + "<|audio:548|>", + "<|audio:549|>", + "<|audio:550|>", + "<|audio:551|>", + "<|audio:552|>", + "<|audio:553|>", + "<|audio:554|>", + "<|audio:555|>", + "<|audio:556|>", + "<|audio:557|>", + "<|audio:558|>", + "<|audio:559|>", + "<|audio:560|>", + "<|audio:561|>", + "<|audio:562|>", + "<|audio:563|>", + "<|audio:564|>", + "<|audio:565|>", + "<|audio:566|>", + "<|audio:567|>", + "<|audio:568|>", + "<|audio:569|>", + "<|audio:570|>", + "<|audio:571|>", + "<|audio:572|>", + "<|audio:573|>", + "<|audio:574|>", + "<|audio:575|>", + "<|audio:576|>", + "<|audio:577|>", + "<|audio:578|>", + "<|audio:579|>", + "<|audio:580|>", + "<|audio:581|>", + "<|audio:582|>", + "<|audio:583|>", + "<|audio:584|>", + "<|audio:585|>", + "<|audio:586|>", + "<|audio:587|>", + "<|audio:588|>", + "<|audio:589|>", + "<|audio:590|>", + "<|audio:591|>", + "<|audio:592|>", + "<|audio:593|>", + "<|audio:594|>", + "<|audio:595|>", + "<|audio:596|>", + "<|audio:597|>", + "<|audio:598|>", + "<|audio:599|>", + "<|audio:600|>", + "<|audio:601|>", + "<|audio:602|>", + "<|audio:603|>", + "<|audio:604|>", + "<|audio:605|>", + "<|audio:606|>", + "<|audio:607|>", + "<|audio:608|>", + "<|audio:609|>", + "<|audio:610|>", + "<|audio:611|>", + "<|audio:612|>", + "<|audio:613|>", + "<|audio:614|>", + "<|audio:615|>", + "<|audio:616|>", + "<|audio:617|>", + "<|audio:618|>", + "<|audio:619|>", + "<|audio:620|>", + "<|audio:621|>", + "<|audio:622|>", + "<|audio:623|>", + "<|audio:624|>", + "<|audio:625|>", + "<|audio:626|>", + "<|audio:627|>", + "<|audio:628|>", + "<|audio:629|>", + "<|audio:630|>", + "<|audio:631|>", + "<|audio:632|>", + "<|audio:633|>", + "<|audio:634|>", + "<|audio:635|>", + "<|audio:636|>", + "<|audio:637|>", + "<|audio:638|>", + "<|audio:639|>", + "<|audio:640|>", + "<|audio:641|>", + "<|audio:642|>", + "<|audio:643|>", + "<|audio:644|>", + "<|audio:645|>", + "<|audio:646|>", + "<|audio:647|>", + "<|audio:648|>", + "<|audio:649|>", + "<|audio:650|>", + "<|audio:651|>", + "<|audio:652|>", + "<|audio:653|>", + "<|audio:654|>", + "<|audio:655|>", + "<|audio:656|>", + "<|audio:657|>", + "<|audio:658|>", + "<|audio:659|>", + "<|audio:660|>", + "<|audio:661|>", + "<|audio:662|>", + "<|audio:663|>", + "<|audio:664|>", + "<|audio:665|>", + "<|audio:666|>", + "<|audio:667|>", + "<|audio:668|>", + "<|audio:669|>", + "<|audio:670|>", + "<|audio:671|>", + "<|audio:672|>", + "<|audio:673|>", + "<|audio:674|>", + "<|audio:675|>", + "<|audio:676|>", + "<|audio:677|>", + "<|audio:678|>", + "<|audio:679|>", + "<|audio:680|>", + "<|audio:681|>", + "<|audio:682|>", + "<|audio:683|>", + "<|audio:684|>", + "<|audio:685|>", + "<|audio:686|>", + "<|audio:687|>", + "<|audio:688|>", + "<|audio:689|>", + "<|audio:690|>", + "<|audio:691|>", + "<|audio:692|>", + "<|audio:693|>", + "<|audio:694|>", + "<|audio:695|>", + "<|audio:696|>", + "<|audio:697|>", + "<|audio:698|>", + "<|audio:699|>", + "<|audio:700|>", + "<|audio:701|>", + "<|audio:702|>", + "<|audio:703|>", + "<|audio:704|>", + "<|audio:705|>", + "<|audio:706|>", + "<|audio:707|>", + "<|audio:708|>", + "<|audio:709|>", + "<|audio:710|>", + "<|audio:711|>", + "<|audio:712|>", + "<|audio:713|>", + "<|audio:714|>", + "<|audio:715|>", + "<|audio:716|>", + "<|audio:717|>", + "<|audio:718|>", + "<|audio:719|>", + "<|audio:720|>", + "<|audio:721|>", + "<|audio:722|>", + "<|audio:723|>", + "<|audio:724|>", + "<|audio:725|>", + "<|audio:726|>", + "<|audio:727|>", + "<|audio:728|>", + "<|audio:729|>", + "<|audio:730|>", + "<|audio:731|>", + "<|audio:732|>", + "<|audio:733|>", + "<|audio:734|>", + "<|audio:735|>", + "<|audio:736|>", + "<|audio:737|>", + "<|audio:738|>", + "<|audio:739|>", + "<|audio:740|>", + "<|audio:741|>", + "<|audio:742|>", + "<|audio:743|>", + "<|audio:744|>", + "<|audio:745|>", + "<|audio:746|>", + "<|audio:747|>", + "<|audio:748|>", + "<|audio:749|>", + "<|audio:750|>", + "<|audio:751|>", + "<|audio:752|>", + "<|audio:753|>", + "<|audio:754|>", + "<|audio:755|>", + "<|audio:756|>", + "<|audio:757|>", + "<|audio:758|>", + "<|audio:759|>", + "<|audio:760|>", + "<|audio:761|>", + "<|audio:762|>", + "<|audio:763|>", + "<|audio:764|>", + "<|audio:765|>", + "<|audio:766|>", + "<|audio:767|>", + "<|audio:768|>", + "<|audio:769|>", + "<|audio:770|>", + "<|audio:771|>", + "<|audio:772|>", + "<|audio:773|>", + "<|audio:774|>", + "<|audio:775|>", + "<|audio:776|>", + "<|audio:777|>", + "<|audio:778|>", + "<|audio:779|>", + "<|audio:780|>", + "<|audio:781|>", + "<|audio:782|>", + "<|audio:783|>", + "<|audio:784|>", + "<|audio:785|>", + "<|audio:786|>", + "<|audio:787|>", + "<|audio:788|>", + "<|audio:789|>", + "<|audio:790|>", + "<|audio:791|>", + "<|audio:792|>", + "<|audio:793|>", + "<|audio:794|>", + "<|audio:795|>", + "<|audio:796|>", + "<|audio:797|>", + "<|audio:798|>", + "<|audio:799|>", + "<|audio:800|>", + "<|audio:801|>", + "<|audio:802|>", + "<|audio:803|>", + "<|audio:804|>", + "<|audio:805|>", + "<|audio:806|>", + "<|audio:807|>", + "<|audio:808|>", + "<|audio:809|>", + "<|audio:810|>", + "<|audio:811|>", + "<|audio:812|>", + "<|audio:813|>", + "<|audio:814|>", + "<|audio:815|>", + "<|audio:816|>", + "<|audio:817|>", + "<|audio:818|>", + "<|audio:819|>", + "<|audio:820|>", + "<|audio:821|>", + "<|audio:822|>", + "<|audio:823|>", + "<|audio:824|>", + "<|audio:825|>", + "<|audio:826|>", + "<|audio:827|>", + "<|audio:828|>", + "<|audio:829|>", + "<|audio:830|>", + "<|audio:831|>", + "<|audio:832|>", + "<|audio:833|>", + "<|audio:834|>", + "<|audio:835|>", + "<|audio:836|>", + "<|audio:837|>", + "<|audio:838|>", + "<|audio:839|>", + "<|audio:840|>", + "<|audio:841|>", + "<|audio:842|>", + "<|audio:843|>", + "<|audio:844|>", + "<|audio:845|>", + "<|audio:846|>", + "<|audio:847|>", + "<|audio:848|>", + "<|audio:849|>", + "<|audio:850|>", + "<|audio:851|>", + "<|audio:852|>", + "<|audio:853|>", + "<|audio:854|>", + "<|audio:855|>", + "<|audio:856|>", + "<|audio:857|>", + "<|audio:858|>", + "<|audio:859|>", + "<|audio:860|>", + "<|audio:861|>", + "<|audio:862|>", + "<|audio:863|>", + "<|audio:864|>", + "<|audio:865|>", + "<|audio:866|>", + "<|audio:867|>", + "<|audio:868|>", + "<|audio:869|>", + "<|audio:870|>", + "<|audio:871|>", + "<|audio:872|>", + "<|audio:873|>", + "<|audio:874|>", + "<|audio:875|>", + "<|audio:876|>", + "<|audio:877|>", + "<|audio:878|>", + "<|audio:879|>", + "<|audio:880|>", + "<|audio:881|>", + "<|audio:882|>", + "<|audio:883|>", + "<|audio:884|>", + "<|audio:885|>", + "<|audio:886|>", + "<|audio:887|>", + "<|audio:888|>", + "<|audio:889|>", + "<|audio:890|>", + "<|audio:891|>", + "<|audio:892|>", + "<|audio:893|>", + "<|audio:894|>", + "<|audio:895|>", + "<|audio:896|>", + "<|audio:897|>", + "<|audio:898|>", + "<|audio:899|>", + "<|audio:900|>", + "<|audio:901|>", + "<|audio:902|>", + "<|audio:903|>", + "<|audio:904|>", + "<|audio:905|>", + "<|audio:906|>", + "<|audio:907|>", + "<|audio:908|>", + "<|audio:909|>", + "<|audio:910|>", + "<|audio:911|>", + "<|audio:912|>", + "<|audio:913|>", + "<|audio:914|>", + "<|audio:915|>", + "<|audio:916|>", + "<|audio:917|>", + "<|audio:918|>", + "<|audio:919|>", + "<|audio:920|>", + "<|audio:921|>", + "<|audio:922|>", + "<|audio:923|>", + "<|audio:924|>", + "<|audio:925|>", + "<|audio:926|>", + "<|audio:927|>", + "<|audio:928|>", + "<|audio:929|>", + "<|audio:930|>", + "<|audio:931|>", + "<|audio:932|>", + "<|audio:933|>", + "<|audio:934|>", + "<|audio:935|>", + "<|audio:936|>", + "<|audio:937|>", + "<|audio:938|>", + "<|audio:939|>", + "<|audio:940|>", + "<|audio:941|>", + "<|audio:942|>", + "<|audio:943|>", + "<|audio:944|>", + "<|audio:945|>", + "<|audio:946|>", + "<|audio:947|>", + "<|audio:948|>", + "<|audio:949|>", + "<|audio:950|>", + "<|audio:951|>", + "<|audio:952|>", + "<|audio:953|>", + "<|audio:954|>", + "<|audio:955|>", + "<|audio:956|>", + "<|audio:957|>", + "<|audio:958|>", + "<|audio:959|>", + "<|audio:960|>", + "<|audio:961|>", + "<|audio:962|>", + "<|audio:963|>", + "<|audio:964|>", + "<|audio:965|>", + "<|audio:966|>", + "<|audio:967|>", + "<|audio:968|>", + "<|audio:969|>", + "<|audio:970|>", + "<|audio:971|>", + "<|audio:972|>", + "<|audio:973|>", + "<|audio:974|>", + "<|audio:975|>", + "<|audio:976|>", + "<|audio:977|>", + "<|audio:978|>", + "<|audio:979|>", + "<|audio:980|>", + "<|audio:981|>", + "<|audio:982|>", + "<|audio:983|>", + "<|audio:984|>", + "<|audio:985|>", + "<|audio:986|>", + "<|audio:987|>", + "<|audio:988|>", + "<|audio:989|>", + "<|audio:990|>", + "<|audio:991|>", + "<|audio:992|>", + "<|audio:993|>", + "<|audio:994|>", + "<|audio:995|>", + "<|audio:996|>", + "<|audio:997|>", + "<|audio:998|>", + "<|audio:999|>", + "<|audio:1000|>", + "<|audio:1001|>", + "<|audio:1002|>", + "<|audio:1003|>", + "<|audio:1004|>", + "<|audio:1005|>", + "<|audio:1006|>", + "<|audio:1007|>", + "<|audio:1008|>", + "<|audio:1009|>", + "<|audio:1010|>", + "<|audio:1011|>", + "<|audio:1012|>", + "<|audio:1013|>", + "<|audio:1014|>", + "<|audio:1015|>", + "<|audio:1016|>", + "<|audio:1017|>", + "<|audio:1018|>", + "<|audio:1019|>", + "<|audio:1020|>", + "<|audio:1021|>", + "<|audio:1022|>", + "<|audio:1023|>", + "<|startoftranscript|>", + "<|endoftranscript|>", + "<|padding|>" + ], + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/out/checkpoint-16000/trainer_state.json b/out/checkpoint-16000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0ac6b21da1bf07156de8c9b7dad64560a26af347 --- /dev/null +++ b/out/checkpoint-16000/trainer_state.json @@ -0,0 +1,112161 @@ +{ + "best_metric": 2.3791537284851074, + "best_model_checkpoint": "./out/checkpoint-16000", + "epoch": 1.2912597853280607, + "eval_steps": 1000, + "global_step": 16000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 8.07037365830038e-05, + "grad_norm": 0.8911969065666199, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.6759, + "step": 1 + }, + { + "epoch": 0.0001614074731660076, + "grad_norm": 0.8724873661994934, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7001, + "step": 2 + }, + { + "epoch": 0.00024211120974901139, + "grad_norm": 0.9050428867340088, + "learning_rate": 6e-06, + "loss": 2.6291, + "step": 3 + }, + { + "epoch": 0.0003228149463320152, + "grad_norm": 0.9249712824821472, + "learning_rate": 8.000000000000001e-06, + "loss": 2.7174, + "step": 4 + }, + { + "epoch": 0.000403518682915019, + "grad_norm": 0.9102846384048462, + "learning_rate": 1e-05, + "loss": 2.6831, + "step": 5 + }, + { + "epoch": 0.00048422241949802277, + "grad_norm": 0.9129141569137573, + "learning_rate": 1.2e-05, + "loss": 2.684, + "step": 6 + }, + { + "epoch": 0.0005649261560810266, + "grad_norm": 0.8648065328598022, + "learning_rate": 1.4000000000000001e-05, + "loss": 2.6488, + "step": 7 + }, + { + "epoch": 0.0006456298926640304, + "grad_norm": 0.8677545785903931, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.7143, + "step": 8 + }, + { + "epoch": 0.0007263336292470342, + "grad_norm": 0.919029712677002, + "learning_rate": 1.8e-05, + "loss": 2.631, + "step": 9 + }, + { + "epoch": 0.000807037365830038, + "grad_norm": 0.9289683103561401, + "learning_rate": 2e-05, + "loss": 2.6564, + "step": 10 + }, + { + "epoch": 0.0008877411024130417, + "grad_norm": 0.8810267448425293, + "learning_rate": 2.2000000000000003e-05, + "loss": 2.6395, + "step": 11 + }, + { + "epoch": 0.0009684448389960455, + "grad_norm": 0.8185754418373108, + "learning_rate": 2.4e-05, + "loss": 2.6871, + "step": 12 + }, + { + "epoch": 0.0010491485755790492, + "grad_norm": 0.9476913213729858, + "learning_rate": 2.6000000000000002e-05, + "loss": 2.7011, + "step": 13 + }, + { + "epoch": 0.0011298523121620531, + "grad_norm": 0.9616057872772217, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.7373, + "step": 14 + }, + { + "epoch": 0.0012105560487450568, + "grad_norm": 0.9429686665534973, + "learning_rate": 3e-05, + "loss": 2.7556, + "step": 15 + }, + { + "epoch": 0.0012912597853280607, + "grad_norm": 1.0331422090530396, + "learning_rate": 3.2000000000000005e-05, + "loss": 2.7756, + "step": 16 + }, + { + "epoch": 0.0013719635219110644, + "grad_norm": 0.906057596206665, + "learning_rate": 3.4000000000000007e-05, + "loss": 2.7053, + "step": 17 + }, + { + "epoch": 0.0014526672584940683, + "grad_norm": 0.8677626252174377, + "learning_rate": 3.6e-05, + "loss": 2.7012, + "step": 18 + }, + { + "epoch": 0.001533370995077072, + "grad_norm": 0.9378079175949097, + "learning_rate": 3.8e-05, + "loss": 2.6786, + "step": 19 + }, + { + "epoch": 0.001614074731660076, + "grad_norm": 1.0333882570266724, + "learning_rate": 4e-05, + "loss": 2.689, + "step": 20 + }, + { + "epoch": 0.0016947784682430796, + "grad_norm": 0.9435378909111023, + "learning_rate": 4.2e-05, + "loss": 2.7084, + "step": 21 + }, + { + "epoch": 0.0017754822048260835, + "grad_norm": 0.9530225396156311, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.7039, + "step": 22 + }, + { + "epoch": 0.0018561859414090872, + "grad_norm": 1.0154749155044556, + "learning_rate": 4.600000000000001e-05, + "loss": 2.6623, + "step": 23 + }, + { + "epoch": 0.001936889677992091, + "grad_norm": 1.0341671705245972, + "learning_rate": 4.8e-05, + "loss": 2.7072, + "step": 24 + }, + { + "epoch": 0.002017593414575095, + "grad_norm": 0.9185739159584045, + "learning_rate": 5e-05, + "loss": 2.6595, + "step": 25 + }, + { + "epoch": 0.0020982971511580985, + "grad_norm": 1.060390591621399, + "learning_rate": 5.2000000000000004e-05, + "loss": 2.7045, + "step": 26 + }, + { + "epoch": 0.0021790008877411024, + "grad_norm": 0.9720118641853333, + "learning_rate": 5.4000000000000005e-05, + "loss": 2.6513, + "step": 27 + }, + { + "epoch": 0.0022597046243241063, + "grad_norm": 0.9426784515380859, + "learning_rate": 5.6000000000000006e-05, + "loss": 2.6541, + "step": 28 + }, + { + "epoch": 0.00234040836090711, + "grad_norm": 0.9736170768737793, + "learning_rate": 5.8e-05, + "loss": 2.7324, + "step": 29 + }, + { + "epoch": 0.0024211120974901136, + "grad_norm": 0.9831354022026062, + "learning_rate": 6e-05, + "loss": 2.6651, + "step": 30 + }, + { + "epoch": 0.0025018158340731175, + "grad_norm": 1.0222605466842651, + "learning_rate": 6.2e-05, + "loss": 2.7375, + "step": 31 + }, + { + "epoch": 0.0025825195706561214, + "grad_norm": 0.9182235598564148, + "learning_rate": 6.400000000000001e-05, + "loss": 2.7142, + "step": 32 + }, + { + "epoch": 0.0026632233072391254, + "grad_norm": 1.0200958251953125, + "learning_rate": 6.6e-05, + "loss": 2.6785, + "step": 33 + }, + { + "epoch": 0.002743927043822129, + "grad_norm": 1.0153381824493408, + "learning_rate": 6.800000000000001e-05, + "loss": 2.6737, + "step": 34 + }, + { + "epoch": 0.0028246307804051327, + "grad_norm": 0.8998087644577026, + "learning_rate": 7e-05, + "loss": 2.7594, + "step": 35 + }, + { + "epoch": 0.0029053345169881366, + "grad_norm": 0.9005621671676636, + "learning_rate": 7.2e-05, + "loss": 2.713, + "step": 36 + }, + { + "epoch": 0.0029860382535711405, + "grad_norm": 1.0165663957595825, + "learning_rate": 7.4e-05, + "loss": 2.7197, + "step": 37 + }, + { + "epoch": 0.003066741990154144, + "grad_norm": 1.0011894702911377, + "learning_rate": 7.6e-05, + "loss": 2.6315, + "step": 38 + }, + { + "epoch": 0.003147445726737148, + "grad_norm": 1.141209602355957, + "learning_rate": 7.800000000000001e-05, + "loss": 2.7249, + "step": 39 + }, + { + "epoch": 0.003228149463320152, + "grad_norm": 0.9114719033241272, + "learning_rate": 8e-05, + "loss": 2.7039, + "step": 40 + }, + { + "epoch": 0.0033088531999031557, + "grad_norm": 1.0193392038345337, + "learning_rate": 8.2e-05, + "loss": 2.6501, + "step": 41 + }, + { + "epoch": 0.003389556936486159, + "grad_norm": 0.9458270072937012, + "learning_rate": 8.4e-05, + "loss": 2.725, + "step": 42 + }, + { + "epoch": 0.003470260673069163, + "grad_norm": 0.9667492508888245, + "learning_rate": 8.6e-05, + "loss": 2.7232, + "step": 43 + }, + { + "epoch": 0.003550964409652167, + "grad_norm": 0.9987972378730774, + "learning_rate": 8.800000000000001e-05, + "loss": 2.6554, + "step": 44 + }, + { + "epoch": 0.003631668146235171, + "grad_norm": 1.0166393518447876, + "learning_rate": 9e-05, + "loss": 2.7291, + "step": 45 + }, + { + "epoch": 0.0037123718828181744, + "grad_norm": 0.9557009935379028, + "learning_rate": 9.200000000000001e-05, + "loss": 2.7194, + "step": 46 + }, + { + "epoch": 0.0037930756194011783, + "grad_norm": 0.9575492143630981, + "learning_rate": 9.4e-05, + "loss": 2.6671, + "step": 47 + }, + { + "epoch": 0.003873779355984182, + "grad_norm": 0.9614555239677429, + "learning_rate": 9.6e-05, + "loss": 2.6865, + "step": 48 + }, + { + "epoch": 0.003954483092567186, + "grad_norm": 0.9245515465736389, + "learning_rate": 9.8e-05, + "loss": 2.7821, + "step": 49 + }, + { + "epoch": 0.00403518682915019, + "grad_norm": 0.9756044745445251, + "learning_rate": 0.0001, + "loss": 2.7608, + "step": 50 + }, + { + "epoch": 0.0041158905657331935, + "grad_norm": 0.95787513256073, + "learning_rate": 0.00010200000000000001, + "loss": 2.6458, + "step": 51 + }, + { + "epoch": 0.004196594302316197, + "grad_norm": 1.0102490186691284, + "learning_rate": 0.00010400000000000001, + "loss": 2.7835, + "step": 52 + }, + { + "epoch": 0.004277298038899201, + "grad_norm": 0.9676176309585571, + "learning_rate": 0.00010600000000000002, + "loss": 2.702, + "step": 53 + }, + { + "epoch": 0.004358001775482205, + "grad_norm": 0.9724096655845642, + "learning_rate": 0.00010800000000000001, + "loss": 2.714, + "step": 54 + }, + { + "epoch": 0.004438705512065208, + "grad_norm": 0.9482994675636292, + "learning_rate": 0.00011000000000000002, + "loss": 2.8069, + "step": 55 + }, + { + "epoch": 0.0045194092486482125, + "grad_norm": 0.9886480569839478, + "learning_rate": 0.00011200000000000001, + "loss": 2.7468, + "step": 56 + }, + { + "epoch": 0.004600112985231216, + "grad_norm": 0.9696247577667236, + "learning_rate": 0.00011399999999999999, + "loss": 2.7486, + "step": 57 + }, + { + "epoch": 0.00468081672181422, + "grad_norm": 1.0638912916183472, + "learning_rate": 0.000116, + "loss": 2.7747, + "step": 58 + }, + { + "epoch": 0.004761520458397224, + "grad_norm": 1.016483187675476, + "learning_rate": 0.000118, + "loss": 2.6925, + "step": 59 + }, + { + "epoch": 0.004842224194980227, + "grad_norm": 1.0298779010772705, + "learning_rate": 0.00012, + "loss": 2.7487, + "step": 60 + }, + { + "epoch": 0.004922927931563232, + "grad_norm": 1.1082268953323364, + "learning_rate": 0.000122, + "loss": 2.7697, + "step": 61 + }, + { + "epoch": 0.005003631668146235, + "grad_norm": 0.9202101826667786, + "learning_rate": 0.000124, + "loss": 2.7429, + "step": 62 + }, + { + "epoch": 0.0050843354047292386, + "grad_norm": 1.0140503644943237, + "learning_rate": 0.000126, + "loss": 2.7492, + "step": 63 + }, + { + "epoch": 0.005165039141312243, + "grad_norm": 1.0689163208007812, + "learning_rate": 0.00012800000000000002, + "loss": 2.7353, + "step": 64 + }, + { + "epoch": 0.005245742877895246, + "grad_norm": 0.9947141408920288, + "learning_rate": 0.00013000000000000002, + "loss": 2.7385, + "step": 65 + }, + { + "epoch": 0.005326446614478251, + "grad_norm": 1.2034410238265991, + "learning_rate": 0.000132, + "loss": 2.7632, + "step": 66 + }, + { + "epoch": 0.005407150351061254, + "grad_norm": 0.9450412392616272, + "learning_rate": 0.000134, + "loss": 2.7547, + "step": 67 + }, + { + "epoch": 0.005487854087644258, + "grad_norm": 1.1818269491195679, + "learning_rate": 0.00013600000000000003, + "loss": 2.7663, + "step": 68 + }, + { + "epoch": 0.005568557824227262, + "grad_norm": 1.003347396850586, + "learning_rate": 0.000138, + "loss": 2.7299, + "step": 69 + }, + { + "epoch": 0.0056492615608102655, + "grad_norm": 1.0105760097503662, + "learning_rate": 0.00014, + "loss": 2.7261, + "step": 70 + }, + { + "epoch": 0.005729965297393269, + "grad_norm": 0.9459090232849121, + "learning_rate": 0.000142, + "loss": 2.7237, + "step": 71 + }, + { + "epoch": 0.005810669033976273, + "grad_norm": 0.9716219305992126, + "learning_rate": 0.000144, + "loss": 2.8175, + "step": 72 + }, + { + "epoch": 0.005891372770559277, + "grad_norm": 0.9968419075012207, + "learning_rate": 0.000146, + "loss": 2.7828, + "step": 73 + }, + { + "epoch": 0.005972076507142281, + "grad_norm": 1.099680781364441, + "learning_rate": 0.000148, + "loss": 2.7111, + "step": 74 + }, + { + "epoch": 0.0060527802437252845, + "grad_norm": 1.004846453666687, + "learning_rate": 0.00015000000000000001, + "loss": 2.7508, + "step": 75 + }, + { + "epoch": 0.006133483980308288, + "grad_norm": 1.0568128824234009, + "learning_rate": 0.000152, + "loss": 2.7341, + "step": 76 + }, + { + "epoch": 0.006214187716891292, + "grad_norm": 0.9871000051498413, + "learning_rate": 0.000154, + "loss": 2.7831, + "step": 77 + }, + { + "epoch": 0.006294891453474296, + "grad_norm": 1.005947232246399, + "learning_rate": 0.00015600000000000002, + "loss": 2.6798, + "step": 78 + }, + { + "epoch": 0.006375595190057299, + "grad_norm": 0.9984713792800903, + "learning_rate": 0.00015800000000000002, + "loss": 2.8126, + "step": 79 + }, + { + "epoch": 0.006456298926640304, + "grad_norm": 0.9805751442909241, + "learning_rate": 0.00016, + "loss": 2.7826, + "step": 80 + }, + { + "epoch": 0.006537002663223307, + "grad_norm": 1.02998685836792, + "learning_rate": 0.000162, + "loss": 2.7636, + "step": 81 + }, + { + "epoch": 0.006617706399806311, + "grad_norm": 1.0790135860443115, + "learning_rate": 0.000164, + "loss": 2.7809, + "step": 82 + }, + { + "epoch": 0.006698410136389315, + "grad_norm": 1.1058307886123657, + "learning_rate": 0.000166, + "loss": 2.787, + "step": 83 + }, + { + "epoch": 0.006779113872972318, + "grad_norm": 1.0199624300003052, + "learning_rate": 0.000168, + "loss": 2.7171, + "step": 84 + }, + { + "epoch": 0.006859817609555323, + "grad_norm": 1.006494402885437, + "learning_rate": 0.00017, + "loss": 2.7791, + "step": 85 + }, + { + "epoch": 0.006940521346138326, + "grad_norm": 0.9672449827194214, + "learning_rate": 0.000172, + "loss": 2.6929, + "step": 86 + }, + { + "epoch": 0.00702122508272133, + "grad_norm": 0.9747781157493591, + "learning_rate": 0.000174, + "loss": 2.7676, + "step": 87 + }, + { + "epoch": 0.007101928819304334, + "grad_norm": 0.9193839430809021, + "learning_rate": 0.00017600000000000002, + "loss": 2.7124, + "step": 88 + }, + { + "epoch": 0.0071826325558873375, + "grad_norm": 1.078499436378479, + "learning_rate": 0.00017800000000000002, + "loss": 2.8018, + "step": 89 + }, + { + "epoch": 0.007263336292470342, + "grad_norm": 1.070957899093628, + "learning_rate": 0.00018, + "loss": 2.7889, + "step": 90 + }, + { + "epoch": 0.007344040029053345, + "grad_norm": 1.160942554473877, + "learning_rate": 0.000182, + "loss": 2.8026, + "step": 91 + }, + { + "epoch": 0.007424743765636349, + "grad_norm": 0.9988501071929932, + "learning_rate": 0.00018400000000000003, + "loss": 2.7746, + "step": 92 + }, + { + "epoch": 0.007505447502219353, + "grad_norm": 1.0882319211959839, + "learning_rate": 0.00018600000000000002, + "loss": 2.8105, + "step": 93 + }, + { + "epoch": 0.0075861512388023565, + "grad_norm": 1.1882357597351074, + "learning_rate": 0.000188, + "loss": 2.8294, + "step": 94 + }, + { + "epoch": 0.00766685497538536, + "grad_norm": 1.0761829614639282, + "learning_rate": 0.00019, + "loss": 2.7846, + "step": 95 + }, + { + "epoch": 0.007747558711968364, + "grad_norm": 1.0665982961654663, + "learning_rate": 0.000192, + "loss": 2.8542, + "step": 96 + }, + { + "epoch": 0.007828262448551369, + "grad_norm": 1.206127405166626, + "learning_rate": 0.000194, + "loss": 2.7711, + "step": 97 + }, + { + "epoch": 0.007908966185134371, + "grad_norm": 1.095150113105774, + "learning_rate": 0.000196, + "loss": 2.732, + "step": 98 + }, + { + "epoch": 0.007989669921717376, + "grad_norm": 1.118348240852356, + "learning_rate": 0.00019800000000000002, + "loss": 2.7736, + "step": 99 + }, + { + "epoch": 0.00807037365830038, + "grad_norm": 1.0646461248397827, + "learning_rate": 0.0002, + "loss": 2.8584, + "step": 100 + }, + { + "epoch": 0.008151077394883383, + "grad_norm": 1.0387661457061768, + "learning_rate": 0.0001999999987538693, + "loss": 2.7961, + "step": 101 + }, + { + "epoch": 0.008231781131466387, + "grad_norm": 1.1905474662780762, + "learning_rate": 0.00019999999501547723, + "loss": 2.8615, + "step": 102 + }, + { + "epoch": 0.008312484868049391, + "grad_norm": 0.9630722999572754, + "learning_rate": 0.0001999999887848239, + "loss": 2.8076, + "step": 103 + }, + { + "epoch": 0.008393188604632394, + "grad_norm": 1.1034537553787231, + "learning_rate": 0.00019999998006190942, + "loss": 2.8402, + "step": 104 + }, + { + "epoch": 0.008473892341215398, + "grad_norm": 1.0679295063018799, + "learning_rate": 0.00019999996884673403, + "loss": 2.7948, + "step": 105 + }, + { + "epoch": 0.008554596077798403, + "grad_norm": 1.0108860731124878, + "learning_rate": 0.00019999995513929802, + "loss": 2.7996, + "step": 106 + }, + { + "epoch": 0.008635299814381405, + "grad_norm": 1.3762084245681763, + "learning_rate": 0.0001999999389396017, + "loss": 2.8023, + "step": 107 + }, + { + "epoch": 0.00871600355096441, + "grad_norm": 1.1320533752441406, + "learning_rate": 0.00019999992024764555, + "loss": 2.793, + "step": 108 + }, + { + "epoch": 0.008796707287547414, + "grad_norm": 1.1752389669418335, + "learning_rate": 0.00019999989906342998, + "loss": 2.8274, + "step": 109 + }, + { + "epoch": 0.008877411024130416, + "grad_norm": 1.2734956741333008, + "learning_rate": 0.00019999987538695552, + "loss": 2.8017, + "step": 110 + }, + { + "epoch": 0.00895811476071342, + "grad_norm": 1.3703055381774902, + "learning_rate": 0.00019999984921822273, + "loss": 2.8699, + "step": 111 + }, + { + "epoch": 0.009038818497296425, + "grad_norm": 1.0079127550125122, + "learning_rate": 0.0001999998205572323, + "loss": 2.8845, + "step": 112 + }, + { + "epoch": 0.00911952223387943, + "grad_norm": 1.28025484085083, + "learning_rate": 0.000199999789403985, + "loss": 2.8636, + "step": 113 + }, + { + "epoch": 0.009200225970462432, + "grad_norm": 1.1057093143463135, + "learning_rate": 0.00019999975575848148, + "loss": 2.8484, + "step": 114 + }, + { + "epoch": 0.009280929707045436, + "grad_norm": 1.0874677896499634, + "learning_rate": 0.00019999971962072265, + "loss": 2.7314, + "step": 115 + }, + { + "epoch": 0.00936163344362844, + "grad_norm": 1.0909658670425415, + "learning_rate": 0.00019999968099070943, + "loss": 2.7827, + "step": 116 + }, + { + "epoch": 0.009442337180211443, + "grad_norm": 1.0881624221801758, + "learning_rate": 0.00019999963986844273, + "loss": 2.827, + "step": 117 + }, + { + "epoch": 0.009523040916794448, + "grad_norm": 1.2498180866241455, + "learning_rate": 0.00019999959625392362, + "loss": 2.8695, + "step": 118 + }, + { + "epoch": 0.009603744653377452, + "grad_norm": 1.1344549655914307, + "learning_rate": 0.00019999955014715317, + "loss": 2.8079, + "step": 119 + }, + { + "epoch": 0.009684448389960455, + "grad_norm": 1.032563328742981, + "learning_rate": 0.00019999950154813253, + "loss": 2.7787, + "step": 120 + }, + { + "epoch": 0.009765152126543459, + "grad_norm": 0.9630110263824463, + "learning_rate": 0.0001999994504568629, + "loss": 2.8103, + "step": 121 + }, + { + "epoch": 0.009845855863126463, + "grad_norm": 1.0418641567230225, + "learning_rate": 0.0001999993968733456, + "loss": 2.8679, + "step": 122 + }, + { + "epoch": 0.009926559599709466, + "grad_norm": 0.9797310829162598, + "learning_rate": 0.00019999934079758188, + "loss": 2.7792, + "step": 123 + }, + { + "epoch": 0.01000726333629247, + "grad_norm": 1.0494028329849243, + "learning_rate": 0.00019999928222957323, + "loss": 2.8007, + "step": 124 + }, + { + "epoch": 0.010087967072875475, + "grad_norm": 1.1570640802383423, + "learning_rate": 0.00019999922116932105, + "loss": 2.8331, + "step": 125 + }, + { + "epoch": 0.010168670809458477, + "grad_norm": 1.2753098011016846, + "learning_rate": 0.00019999915761682684, + "loss": 2.8533, + "step": 126 + }, + { + "epoch": 0.010249374546041481, + "grad_norm": 0.9804013967514038, + "learning_rate": 0.00019999909157209227, + "loss": 2.841, + "step": 127 + }, + { + "epoch": 0.010330078282624486, + "grad_norm": 1.320839285850525, + "learning_rate": 0.00019999902303511892, + "loss": 2.8738, + "step": 128 + }, + { + "epoch": 0.01041078201920749, + "grad_norm": 1.1105059385299683, + "learning_rate": 0.0001999989520059085, + "loss": 2.8458, + "step": 129 + }, + { + "epoch": 0.010491485755790493, + "grad_norm": 1.2869762182235718, + "learning_rate": 0.0001999988784844628, + "loss": 2.7951, + "step": 130 + }, + { + "epoch": 0.010572189492373497, + "grad_norm": 1.1609153747558594, + "learning_rate": 0.00019999880247078368, + "loss": 2.8147, + "step": 131 + }, + { + "epoch": 0.010652893228956501, + "grad_norm": 1.066728115081787, + "learning_rate": 0.00019999872396487297, + "loss": 2.863, + "step": 132 + }, + { + "epoch": 0.010733596965539504, + "grad_norm": 1.2868720293045044, + "learning_rate": 0.0001999986429667327, + "loss": 2.7765, + "step": 133 + }, + { + "epoch": 0.010814300702122508, + "grad_norm": 1.0064955949783325, + "learning_rate": 0.00019999855947636485, + "loss": 2.7834, + "step": 134 + }, + { + "epoch": 0.010895004438705513, + "grad_norm": 1.146589756011963, + "learning_rate": 0.00019999847349377143, + "loss": 2.7966, + "step": 135 + }, + { + "epoch": 0.010975708175288515, + "grad_norm": 0.9831073880195618, + "learning_rate": 0.0001999983850189547, + "loss": 2.8877, + "step": 136 + }, + { + "epoch": 0.01105641191187152, + "grad_norm": 1.1690322160720825, + "learning_rate": 0.0001999982940519168, + "loss": 2.8514, + "step": 137 + }, + { + "epoch": 0.011137115648454524, + "grad_norm": 1.0014944076538086, + "learning_rate": 0.00019999820059266003, + "loss": 2.7846, + "step": 138 + }, + { + "epoch": 0.011217819385037527, + "grad_norm": 0.9581566452980042, + "learning_rate": 0.0001999981046411867, + "loss": 2.7907, + "step": 139 + }, + { + "epoch": 0.011298523121620531, + "grad_norm": 1.1300675868988037, + "learning_rate": 0.00019999800619749922, + "loss": 2.8099, + "step": 140 + }, + { + "epoch": 0.011379226858203535, + "grad_norm": 0.9845526814460754, + "learning_rate": 0.0001999979052616, + "loss": 2.8607, + "step": 141 + }, + { + "epoch": 0.011459930594786538, + "grad_norm": 1.0781387090682983, + "learning_rate": 0.0001999978018334916, + "loss": 2.831, + "step": 142 + }, + { + "epoch": 0.011540634331369542, + "grad_norm": 1.1142648458480835, + "learning_rate": 0.00019999769591317658, + "loss": 2.9194, + "step": 143 + }, + { + "epoch": 0.011621338067952547, + "grad_norm": 0.9972650408744812, + "learning_rate": 0.00019999758750065757, + "loss": 2.8253, + "step": 144 + }, + { + "epoch": 0.01170204180453555, + "grad_norm": 1.040738582611084, + "learning_rate": 0.0001999974765959373, + "loss": 2.7378, + "step": 145 + }, + { + "epoch": 0.011782745541118553, + "grad_norm": 0.9824327826499939, + "learning_rate": 0.00019999736319901848, + "loss": 2.8263, + "step": 146 + }, + { + "epoch": 0.011863449277701558, + "grad_norm": 1.0531679391860962, + "learning_rate": 0.00019999724730990402, + "loss": 2.7975, + "step": 147 + }, + { + "epoch": 0.011944153014284562, + "grad_norm": 1.0699561834335327, + "learning_rate": 0.0001999971289285967, + "loss": 2.8199, + "step": 148 + }, + { + "epoch": 0.012024856750867565, + "grad_norm": 1.0203633308410645, + "learning_rate": 0.0001999970080550996, + "loss": 2.8479, + "step": 149 + }, + { + "epoch": 0.012105560487450569, + "grad_norm": 1.035589575767517, + "learning_rate": 0.00019999688468941564, + "loss": 2.8263, + "step": 150 + }, + { + "epoch": 0.012186264224033573, + "grad_norm": 0.9706670641899109, + "learning_rate": 0.00019999675883154792, + "loss": 2.8324, + "step": 151 + }, + { + "epoch": 0.012266967960616576, + "grad_norm": 1.1565446853637695, + "learning_rate": 0.00019999663048149958, + "loss": 2.8098, + "step": 152 + }, + { + "epoch": 0.01234767169719958, + "grad_norm": 1.025796890258789, + "learning_rate": 0.0001999964996392738, + "loss": 2.7906, + "step": 153 + }, + { + "epoch": 0.012428375433782585, + "grad_norm": 1.117438554763794, + "learning_rate": 0.00019999636630487386, + "loss": 2.8276, + "step": 154 + }, + { + "epoch": 0.012509079170365587, + "grad_norm": 1.025159478187561, + "learning_rate": 0.00019999623047830308, + "loss": 2.8089, + "step": 155 + }, + { + "epoch": 0.012589782906948592, + "grad_norm": 1.007582664489746, + "learning_rate": 0.00019999609215956487, + "loss": 2.8147, + "step": 156 + }, + { + "epoch": 0.012670486643531596, + "grad_norm": 1.0504885911941528, + "learning_rate": 0.0001999959513486626, + "loss": 2.8329, + "step": 157 + }, + { + "epoch": 0.012751190380114599, + "grad_norm": 0.918382465839386, + "learning_rate": 0.00019999580804559987, + "loss": 2.878, + "step": 158 + }, + { + "epoch": 0.012831894116697603, + "grad_norm": 0.9397236704826355, + "learning_rate": 0.0001999956622503802, + "loss": 2.8254, + "step": 159 + }, + { + "epoch": 0.012912597853280607, + "grad_norm": 0.9985697269439697, + "learning_rate": 0.00019999551396300723, + "loss": 2.8417, + "step": 160 + }, + { + "epoch": 0.01299330158986361, + "grad_norm": 0.9866878390312195, + "learning_rate": 0.00019999536318348465, + "loss": 2.7524, + "step": 161 + }, + { + "epoch": 0.013074005326446614, + "grad_norm": 1.0707440376281738, + "learning_rate": 0.00019999520991181627, + "loss": 2.8171, + "step": 162 + }, + { + "epoch": 0.013154709063029619, + "grad_norm": 0.9359755516052246, + "learning_rate": 0.00019999505414800583, + "loss": 2.8463, + "step": 163 + }, + { + "epoch": 0.013235412799612623, + "grad_norm": 1.056647777557373, + "learning_rate": 0.00019999489589205726, + "loss": 2.8602, + "step": 164 + }, + { + "epoch": 0.013316116536195625, + "grad_norm": 0.975370466709137, + "learning_rate": 0.0001999947351439745, + "loss": 2.8292, + "step": 165 + }, + { + "epoch": 0.01339682027277863, + "grad_norm": 0.9241237044334412, + "learning_rate": 0.00019999457190376157, + "loss": 2.7827, + "step": 166 + }, + { + "epoch": 0.013477524009361634, + "grad_norm": 0.9478302001953125, + "learning_rate": 0.00019999440617142247, + "loss": 2.7708, + "step": 167 + }, + { + "epoch": 0.013558227745944637, + "grad_norm": 0.9804863333702087, + "learning_rate": 0.00019999423794696142, + "loss": 2.7696, + "step": 168 + }, + { + "epoch": 0.013638931482527641, + "grad_norm": 0.9764013886451721, + "learning_rate": 0.00019999406723038255, + "loss": 2.8521, + "step": 169 + }, + { + "epoch": 0.013719635219110645, + "grad_norm": 1.026532769203186, + "learning_rate": 0.00019999389402169016, + "loss": 2.8507, + "step": 170 + }, + { + "epoch": 0.013800338955693648, + "grad_norm": 0.9983204007148743, + "learning_rate": 0.00019999371832088854, + "loss": 2.8761, + "step": 171 + }, + { + "epoch": 0.013881042692276652, + "grad_norm": 0.9914593696594238, + "learning_rate": 0.00019999354012798206, + "loss": 2.8723, + "step": 172 + }, + { + "epoch": 0.013961746428859657, + "grad_norm": 1.066962718963623, + "learning_rate": 0.00019999335944297517, + "loss": 2.8635, + "step": 173 + }, + { + "epoch": 0.01404245016544266, + "grad_norm": 1.0848973989486694, + "learning_rate": 0.0001999931762658724, + "loss": 2.8645, + "step": 174 + }, + { + "epoch": 0.014123153902025664, + "grad_norm": 1.0245702266693115, + "learning_rate": 0.0001999929905966783, + "loss": 2.8463, + "step": 175 + }, + { + "epoch": 0.014203857638608668, + "grad_norm": 1.2363669872283936, + "learning_rate": 0.00019999280243539747, + "loss": 2.8345, + "step": 176 + }, + { + "epoch": 0.01428456137519167, + "grad_norm": 1.0224756002426147, + "learning_rate": 0.0001999926117820346, + "loss": 2.8309, + "step": 177 + }, + { + "epoch": 0.014365265111774675, + "grad_norm": 1.0882402658462524, + "learning_rate": 0.0001999924186365945, + "loss": 2.8619, + "step": 178 + }, + { + "epoch": 0.01444596884835768, + "grad_norm": 1.0384254455566406, + "learning_rate": 0.00019999222299908192, + "loss": 2.8477, + "step": 179 + }, + { + "epoch": 0.014526672584940684, + "grad_norm": 0.9662587642669678, + "learning_rate": 0.00019999202486950177, + "loss": 2.8087, + "step": 180 + }, + { + "epoch": 0.014607376321523686, + "grad_norm": 0.9086892604827881, + "learning_rate": 0.000199991824247859, + "loss": 2.7688, + "step": 181 + }, + { + "epoch": 0.01468808005810669, + "grad_norm": 1.004185676574707, + "learning_rate": 0.00019999162113415854, + "loss": 2.8237, + "step": 182 + }, + { + "epoch": 0.014768783794689695, + "grad_norm": 0.997965395450592, + "learning_rate": 0.00019999141552840552, + "loss": 2.8228, + "step": 183 + }, + { + "epoch": 0.014849487531272697, + "grad_norm": 0.9844975471496582, + "learning_rate": 0.00019999120743060503, + "loss": 2.8582, + "step": 184 + }, + { + "epoch": 0.014930191267855702, + "grad_norm": 1.0531272888183594, + "learning_rate": 0.00019999099684076232, + "loss": 2.8571, + "step": 185 + }, + { + "epoch": 0.015010895004438706, + "grad_norm": 1.1178920269012451, + "learning_rate": 0.00019999078375888257, + "loss": 2.85, + "step": 186 + }, + { + "epoch": 0.015091598741021709, + "grad_norm": 1.0773903131484985, + "learning_rate": 0.0001999905681849711, + "loss": 2.826, + "step": 187 + }, + { + "epoch": 0.015172302477604713, + "grad_norm": 1.1573486328125, + "learning_rate": 0.00019999035011903325, + "loss": 2.8866, + "step": 188 + }, + { + "epoch": 0.015253006214187717, + "grad_norm": 1.0401980876922607, + "learning_rate": 0.00019999012956107456, + "loss": 2.788, + "step": 189 + }, + { + "epoch": 0.01533370995077072, + "grad_norm": 1.0150686502456665, + "learning_rate": 0.00019998990651110045, + "loss": 2.8542, + "step": 190 + }, + { + "epoch": 0.015414413687353724, + "grad_norm": 1.1902797222137451, + "learning_rate": 0.0001999896809691165, + "loss": 2.9209, + "step": 191 + }, + { + "epoch": 0.015495117423936729, + "grad_norm": 1.0177555084228516, + "learning_rate": 0.0001999894529351283, + "loss": 2.7852, + "step": 192 + }, + { + "epoch": 0.015575821160519731, + "grad_norm": 1.062322974205017, + "learning_rate": 0.00019998922240914159, + "loss": 2.8328, + "step": 193 + }, + { + "epoch": 0.015656524897102737, + "grad_norm": 1.0937334299087524, + "learning_rate": 0.00019998898939116205, + "loss": 2.8069, + "step": 194 + }, + { + "epoch": 0.015737228633685738, + "grad_norm": 0.9553198218345642, + "learning_rate": 0.00019998875388119554, + "loss": 2.8402, + "step": 195 + }, + { + "epoch": 0.015817932370268743, + "grad_norm": 1.1802356243133545, + "learning_rate": 0.0001999885158792479, + "loss": 2.945, + "step": 196 + }, + { + "epoch": 0.015898636106851747, + "grad_norm": 1.160346269607544, + "learning_rate": 0.0001999882753853251, + "loss": 2.8341, + "step": 197 + }, + { + "epoch": 0.01597933984343475, + "grad_norm": 1.0379278659820557, + "learning_rate": 0.00019998803239943305, + "loss": 2.898, + "step": 198 + }, + { + "epoch": 0.016060043580017756, + "grad_norm": 1.2022395133972168, + "learning_rate": 0.00019998778692157792, + "loss": 2.8302, + "step": 199 + }, + { + "epoch": 0.01614074731660076, + "grad_norm": 1.057017207145691, + "learning_rate": 0.00019998753895176575, + "loss": 2.8474, + "step": 200 + }, + { + "epoch": 0.01622145105318376, + "grad_norm": 0.9299072027206421, + "learning_rate": 0.00019998728849000271, + "loss": 2.8266, + "step": 201 + }, + { + "epoch": 0.016302154789766765, + "grad_norm": 1.0296592712402344, + "learning_rate": 0.00019998703553629512, + "loss": 2.8106, + "step": 202 + }, + { + "epoch": 0.01638285852634977, + "grad_norm": 0.9641671180725098, + "learning_rate": 0.0001999867800906492, + "loss": 2.8089, + "step": 203 + }, + { + "epoch": 0.016463562262932774, + "grad_norm": 0.9951125383377075, + "learning_rate": 0.00019998652215307136, + "loss": 2.813, + "step": 204 + }, + { + "epoch": 0.016544265999515778, + "grad_norm": 1.0089969635009766, + "learning_rate": 0.00019998626172356804, + "loss": 2.8021, + "step": 205 + }, + { + "epoch": 0.016624969736098782, + "grad_norm": 0.9916231632232666, + "learning_rate": 0.00019998599880214566, + "loss": 2.8455, + "step": 206 + }, + { + "epoch": 0.016705673472681787, + "grad_norm": 0.9612492322921753, + "learning_rate": 0.00019998573338881088, + "loss": 2.8653, + "step": 207 + }, + { + "epoch": 0.016786377209264788, + "grad_norm": 0.984578013420105, + "learning_rate": 0.00019998546548357022, + "loss": 2.8359, + "step": 208 + }, + { + "epoch": 0.016867080945847792, + "grad_norm": 0.9457565546035767, + "learning_rate": 0.0001999851950864304, + "loss": 2.8507, + "step": 209 + }, + { + "epoch": 0.016947784682430796, + "grad_norm": 1.0219026803970337, + "learning_rate": 0.00019998492219739817, + "loss": 2.8326, + "step": 210 + }, + { + "epoch": 0.0170284884190138, + "grad_norm": 0.971570611000061, + "learning_rate": 0.00019998464681648032, + "loss": 2.8079, + "step": 211 + }, + { + "epoch": 0.017109192155596805, + "grad_norm": 0.9731320738792419, + "learning_rate": 0.00019998436894368368, + "loss": 2.8536, + "step": 212 + }, + { + "epoch": 0.01718989589217981, + "grad_norm": 1.0519105195999146, + "learning_rate": 0.00019998408857901525, + "loss": 2.8589, + "step": 213 + }, + { + "epoch": 0.01727059962876281, + "grad_norm": 0.9725883603096008, + "learning_rate": 0.00019998380572248194, + "loss": 2.7937, + "step": 214 + }, + { + "epoch": 0.017351303365345815, + "grad_norm": 1.0397064685821533, + "learning_rate": 0.00019998352037409084, + "loss": 2.9145, + "step": 215 + }, + { + "epoch": 0.01743200710192882, + "grad_norm": 0.9094852209091187, + "learning_rate": 0.00019998323253384904, + "loss": 2.7692, + "step": 216 + }, + { + "epoch": 0.017512710838511823, + "grad_norm": 0.941646158695221, + "learning_rate": 0.00019998294220176374, + "loss": 2.7975, + "step": 217 + }, + { + "epoch": 0.017593414575094828, + "grad_norm": 0.9939892888069153, + "learning_rate": 0.00019998264937784216, + "loss": 2.8421, + "step": 218 + }, + { + "epoch": 0.017674118311677832, + "grad_norm": 0.8985795378684998, + "learning_rate": 0.0001999823540620916, + "loss": 2.8146, + "step": 219 + }, + { + "epoch": 0.017754822048260833, + "grad_norm": 1.0436078310012817, + "learning_rate": 0.00019998205625451943, + "loss": 2.8416, + "step": 220 + }, + { + "epoch": 0.017835525784843837, + "grad_norm": 0.9941675066947937, + "learning_rate": 0.00019998175595513305, + "loss": 2.8723, + "step": 221 + }, + { + "epoch": 0.01791622952142684, + "grad_norm": 0.9203903675079346, + "learning_rate": 0.00019998145316393995, + "loss": 2.7791, + "step": 222 + }, + { + "epoch": 0.017996933258009846, + "grad_norm": 0.9325969815254211, + "learning_rate": 0.00019998114788094768, + "loss": 2.8664, + "step": 223 + }, + { + "epoch": 0.01807763699459285, + "grad_norm": 0.9483599662780762, + "learning_rate": 0.00019998084010616388, + "loss": 2.7782, + "step": 224 + }, + { + "epoch": 0.018158340731175854, + "grad_norm": 0.9555078744888306, + "learning_rate": 0.00019998052983959615, + "loss": 2.7771, + "step": 225 + }, + { + "epoch": 0.01823904446775886, + "grad_norm": 0.9452421069145203, + "learning_rate": 0.00019998021708125233, + "loss": 2.8878, + "step": 226 + }, + { + "epoch": 0.01831974820434186, + "grad_norm": 0.9784894585609436, + "learning_rate": 0.00019997990183114007, + "loss": 2.8382, + "step": 227 + }, + { + "epoch": 0.018400451940924864, + "grad_norm": 1.0844931602478027, + "learning_rate": 0.00019997958408926735, + "loss": 2.8015, + "step": 228 + }, + { + "epoch": 0.01848115567750787, + "grad_norm": 1.0416710376739502, + "learning_rate": 0.00019997926385564207, + "loss": 2.8364, + "step": 229 + }, + { + "epoch": 0.018561859414090873, + "grad_norm": 0.9213813543319702, + "learning_rate": 0.00019997894113027215, + "loss": 2.8489, + "step": 230 + }, + { + "epoch": 0.018642563150673877, + "grad_norm": 1.0186388492584229, + "learning_rate": 0.00019997861591316567, + "loss": 2.914, + "step": 231 + }, + { + "epoch": 0.01872326688725688, + "grad_norm": 1.0032236576080322, + "learning_rate": 0.00019997828820433072, + "loss": 2.8733, + "step": 232 + }, + { + "epoch": 0.018803970623839882, + "grad_norm": 0.9783569574356079, + "learning_rate": 0.0001999779580037755, + "loss": 2.851, + "step": 233 + }, + { + "epoch": 0.018884674360422887, + "grad_norm": 0.8471441268920898, + "learning_rate": 0.00019997762531150825, + "loss": 2.7923, + "step": 234 + }, + { + "epoch": 0.01896537809700589, + "grad_norm": 0.8912937641143799, + "learning_rate": 0.00019997729012753717, + "loss": 2.8725, + "step": 235 + }, + { + "epoch": 0.019046081833588895, + "grad_norm": 1.2453325986862183, + "learning_rate": 0.00019997695245187075, + "loss": 2.9292, + "step": 236 + }, + { + "epoch": 0.0191267855701719, + "grad_norm": 0.8870908617973328, + "learning_rate": 0.0001999766122845173, + "loss": 2.8008, + "step": 237 + }, + { + "epoch": 0.019207489306754904, + "grad_norm": 1.0679768323898315, + "learning_rate": 0.0001999762696254853, + "loss": 2.8919, + "step": 238 + }, + { + "epoch": 0.01928819304333791, + "grad_norm": 0.9769917130470276, + "learning_rate": 0.00019997592447478337, + "loss": 2.7937, + "step": 239 + }, + { + "epoch": 0.01936889677992091, + "grad_norm": 1.066183090209961, + "learning_rate": 0.00019997557683242004, + "loss": 2.8375, + "step": 240 + }, + { + "epoch": 0.019449600516503913, + "grad_norm": 0.9834103584289551, + "learning_rate": 0.000199975226698404, + "loss": 2.8577, + "step": 241 + }, + { + "epoch": 0.019530304253086918, + "grad_norm": 1.102211833000183, + "learning_rate": 0.00019997487407274396, + "loss": 2.8466, + "step": 242 + }, + { + "epoch": 0.019611007989669922, + "grad_norm": 0.9936226606369019, + "learning_rate": 0.00019997451895544872, + "loss": 2.7729, + "step": 243 + }, + { + "epoch": 0.019691711726252926, + "grad_norm": 1.0995992422103882, + "learning_rate": 0.00019997416134652713, + "loss": 2.8425, + "step": 244 + }, + { + "epoch": 0.01977241546283593, + "grad_norm": 0.94181889295578, + "learning_rate": 0.00019997380124598814, + "loss": 2.8495, + "step": 245 + }, + { + "epoch": 0.01985311919941893, + "grad_norm": 0.9791487455368042, + "learning_rate": 0.00019997343865384067, + "loss": 2.8919, + "step": 246 + }, + { + "epoch": 0.019933822936001936, + "grad_norm": 0.9173399209976196, + "learning_rate": 0.00019997307357009375, + "loss": 2.8593, + "step": 247 + }, + { + "epoch": 0.02001452667258494, + "grad_norm": 0.9675281047821045, + "learning_rate": 0.00019997270599475653, + "loss": 2.8226, + "step": 248 + }, + { + "epoch": 0.020095230409167945, + "grad_norm": 0.8928244113922119, + "learning_rate": 0.00019997233592783812, + "loss": 2.8296, + "step": 249 + }, + { + "epoch": 0.02017593414575095, + "grad_norm": 0.928601861000061, + "learning_rate": 0.0001999719633693478, + "loss": 2.8399, + "step": 250 + }, + { + "epoch": 0.020256637882333953, + "grad_norm": 0.9378123879432678, + "learning_rate": 0.00019997158831929482, + "loss": 2.8711, + "step": 251 + }, + { + "epoch": 0.020337341618916954, + "grad_norm": 0.9041047692298889, + "learning_rate": 0.00019997121077768853, + "loss": 2.8338, + "step": 252 + }, + { + "epoch": 0.02041804535549996, + "grad_norm": 0.9673274755477905, + "learning_rate": 0.00019997083074453832, + "loss": 2.8556, + "step": 253 + }, + { + "epoch": 0.020498749092082963, + "grad_norm": 0.9204083681106567, + "learning_rate": 0.0001999704482198537, + "loss": 2.7954, + "step": 254 + }, + { + "epoch": 0.020579452828665967, + "grad_norm": 0.9267606735229492, + "learning_rate": 0.00019997006320364417, + "loss": 2.8656, + "step": 255 + }, + { + "epoch": 0.02066015656524897, + "grad_norm": 0.9562919735908508, + "learning_rate": 0.00019996967569591936, + "loss": 2.8406, + "step": 256 + }, + { + "epoch": 0.020740860301831976, + "grad_norm": 0.9065950512886047, + "learning_rate": 0.0001999692856966889, + "loss": 2.7856, + "step": 257 + }, + { + "epoch": 0.02082156403841498, + "grad_norm": 0.9136463403701782, + "learning_rate": 0.0001999688932059625, + "loss": 2.8083, + "step": 258 + }, + { + "epoch": 0.02090226777499798, + "grad_norm": 0.9785570502281189, + "learning_rate": 0.00019996849822374998, + "loss": 2.7984, + "step": 259 + }, + { + "epoch": 0.020982971511580985, + "grad_norm": 0.9549168348312378, + "learning_rate": 0.00019996810075006117, + "loss": 2.8048, + "step": 260 + }, + { + "epoch": 0.02106367524816399, + "grad_norm": 0.8923975825309753, + "learning_rate": 0.00019996770078490594, + "loss": 2.8559, + "step": 261 + }, + { + "epoch": 0.021144378984746994, + "grad_norm": 0.9516206383705139, + "learning_rate": 0.0001999672983282943, + "loss": 2.9171, + "step": 262 + }, + { + "epoch": 0.02122508272133, + "grad_norm": 0.9101666808128357, + "learning_rate": 0.0001999668933802363, + "loss": 2.8746, + "step": 263 + }, + { + "epoch": 0.021305786457913003, + "grad_norm": 0.9081267714500427, + "learning_rate": 0.00019996648594074195, + "loss": 2.8637, + "step": 264 + }, + { + "epoch": 0.021386490194496004, + "grad_norm": 1.0048178434371948, + "learning_rate": 0.0001999660760098215, + "loss": 2.8783, + "step": 265 + }, + { + "epoch": 0.021467193931079008, + "grad_norm": 0.9625924229621887, + "learning_rate": 0.0001999656635874851, + "loss": 2.8226, + "step": 266 + }, + { + "epoch": 0.021547897667662012, + "grad_norm": 0.9911805391311646, + "learning_rate": 0.00019996524867374306, + "loss": 2.8135, + "step": 267 + }, + { + "epoch": 0.021628601404245017, + "grad_norm": 0.8920134902000427, + "learning_rate": 0.00019996483126860572, + "loss": 2.7934, + "step": 268 + }, + { + "epoch": 0.02170930514082802, + "grad_norm": 1.0806514024734497, + "learning_rate": 0.00019996441137208346, + "loss": 2.8435, + "step": 269 + }, + { + "epoch": 0.021790008877411025, + "grad_norm": 0.9426547884941101, + "learning_rate": 0.00019996398898418675, + "loss": 2.7919, + "step": 270 + }, + { + "epoch": 0.021870712613994026, + "grad_norm": 0.9893020987510681, + "learning_rate": 0.00019996356410492615, + "loss": 2.8616, + "step": 271 + }, + { + "epoch": 0.02195141635057703, + "grad_norm": 1.0196046829223633, + "learning_rate": 0.00019996313673431218, + "loss": 2.8101, + "step": 272 + }, + { + "epoch": 0.022032120087160035, + "grad_norm": 0.9556699991226196, + "learning_rate": 0.00019996270687235558, + "loss": 2.8669, + "step": 273 + }, + { + "epoch": 0.02211282382374304, + "grad_norm": 0.8985902667045593, + "learning_rate": 0.00019996227451906702, + "loss": 2.8078, + "step": 274 + }, + { + "epoch": 0.022193527560326044, + "grad_norm": 1.0198246240615845, + "learning_rate": 0.00019996183967445726, + "loss": 2.8314, + "step": 275 + }, + { + "epoch": 0.022274231296909048, + "grad_norm": 0.9360179901123047, + "learning_rate": 0.00019996140233853715, + "loss": 2.7969, + "step": 276 + }, + { + "epoch": 0.022354935033492052, + "grad_norm": 1.0250160694122314, + "learning_rate": 0.00019996096251131759, + "loss": 2.7897, + "step": 277 + }, + { + "epoch": 0.022435638770075053, + "grad_norm": 0.934582531452179, + "learning_rate": 0.00019996052019280954, + "loss": 2.8667, + "step": 278 + }, + { + "epoch": 0.022516342506658057, + "grad_norm": 0.9394461512565613, + "learning_rate": 0.00019996007538302407, + "loss": 2.7681, + "step": 279 + }, + { + "epoch": 0.022597046243241062, + "grad_norm": 0.9468861222267151, + "learning_rate": 0.00019995962808197216, + "loss": 2.7709, + "step": 280 + }, + { + "epoch": 0.022677749979824066, + "grad_norm": 0.9798515439033508, + "learning_rate": 0.00019995917828966506, + "loss": 2.8274, + "step": 281 + }, + { + "epoch": 0.02275845371640707, + "grad_norm": 1.0403941869735718, + "learning_rate": 0.00019995872600611395, + "loss": 2.8897, + "step": 282 + }, + { + "epoch": 0.022839157452990075, + "grad_norm": 0.9795030951499939, + "learning_rate": 0.00019995827123133006, + "loss": 2.8792, + "step": 283 + }, + { + "epoch": 0.022919861189573076, + "grad_norm": 0.9162538647651672, + "learning_rate": 0.00019995781396532479, + "loss": 2.8339, + "step": 284 + }, + { + "epoch": 0.02300056492615608, + "grad_norm": 1.0864707231521606, + "learning_rate": 0.00019995735420810947, + "loss": 2.8599, + "step": 285 + }, + { + "epoch": 0.023081268662739084, + "grad_norm": 0.9181776642799377, + "learning_rate": 0.0001999568919596956, + "loss": 2.8736, + "step": 286 + }, + { + "epoch": 0.02316197239932209, + "grad_norm": 0.8880531191825867, + "learning_rate": 0.00019995642722009472, + "loss": 2.8215, + "step": 287 + }, + { + "epoch": 0.023242676135905093, + "grad_norm": 0.9287240505218506, + "learning_rate": 0.00019995595998931835, + "loss": 2.844, + "step": 288 + }, + { + "epoch": 0.023323379872488097, + "grad_norm": 0.886894941329956, + "learning_rate": 0.0001999554902673782, + "loss": 2.8319, + "step": 289 + }, + { + "epoch": 0.0234040836090711, + "grad_norm": 0.9564458131790161, + "learning_rate": 0.0001999550180542859, + "loss": 2.8126, + "step": 290 + }, + { + "epoch": 0.023484787345654103, + "grad_norm": 0.8745970726013184, + "learning_rate": 0.00019995454335005334, + "loss": 2.8344, + "step": 291 + }, + { + "epoch": 0.023565491082237107, + "grad_norm": 1.0343137979507446, + "learning_rate": 0.00019995406615469217, + "loss": 2.8498, + "step": 292 + }, + { + "epoch": 0.02364619481882011, + "grad_norm": 0.9951575994491577, + "learning_rate": 0.0001999535864682145, + "loss": 2.8655, + "step": 293 + }, + { + "epoch": 0.023726898555403116, + "grad_norm": 0.8457592725753784, + "learning_rate": 0.0001999531042906321, + "loss": 2.8189, + "step": 294 + }, + { + "epoch": 0.02380760229198612, + "grad_norm": 0.9126954674720764, + "learning_rate": 0.00019995261962195708, + "loss": 2.8272, + "step": 295 + }, + { + "epoch": 0.023888306028569124, + "grad_norm": 1.0171937942504883, + "learning_rate": 0.0001999521324622015, + "loss": 2.869, + "step": 296 + }, + { + "epoch": 0.023969009765152125, + "grad_norm": 0.9887226223945618, + "learning_rate": 0.00019995164281137753, + "loss": 2.7643, + "step": 297 + }, + { + "epoch": 0.02404971350173513, + "grad_norm": 1.4240798950195312, + "learning_rate": 0.00019995115066949733, + "loss": 2.8332, + "step": 298 + }, + { + "epoch": 0.024130417238318134, + "grad_norm": 0.9856921434402466, + "learning_rate": 0.00019995065603657316, + "loss": 2.8283, + "step": 299 + }, + { + "epoch": 0.024211120974901138, + "grad_norm": 0.997164785861969, + "learning_rate": 0.0001999501589126174, + "loss": 2.9164, + "step": 300 + }, + { + "epoch": 0.024291824711484142, + "grad_norm": 1.6480412483215332, + "learning_rate": 0.00019994965929764238, + "loss": 2.8941, + "step": 301 + }, + { + "epoch": 0.024372528448067147, + "grad_norm": 1.1590758562088013, + "learning_rate": 0.0001999491571916606, + "loss": 2.8127, + "step": 302 + }, + { + "epoch": 0.024453232184650148, + "grad_norm": 1.1228376626968384, + "learning_rate": 0.00019994865259468454, + "loss": 2.8439, + "step": 303 + }, + { + "epoch": 0.024533935921233152, + "grad_norm": 1.0426349639892578, + "learning_rate": 0.0001999481455067268, + "loss": 2.8671, + "step": 304 + }, + { + "epoch": 0.024614639657816156, + "grad_norm": 1.0911917686462402, + "learning_rate": 0.00019994763592779996, + "loss": 2.8297, + "step": 305 + }, + { + "epoch": 0.02469534339439916, + "grad_norm": 1.0493195056915283, + "learning_rate": 0.00019994712385791683, + "loss": 2.7996, + "step": 306 + }, + { + "epoch": 0.024776047130982165, + "grad_norm": 0.9275023341178894, + "learning_rate": 0.00019994660929709008, + "loss": 2.7949, + "step": 307 + }, + { + "epoch": 0.02485675086756517, + "grad_norm": 1.1074799299240112, + "learning_rate": 0.00019994609224533255, + "loss": 2.8364, + "step": 308 + }, + { + "epoch": 0.024937454604148174, + "grad_norm": 0.9189429879188538, + "learning_rate": 0.00019994557270265717, + "loss": 2.8293, + "step": 309 + }, + { + "epoch": 0.025018158340731175, + "grad_norm": 0.9577780961990356, + "learning_rate": 0.00019994505066907683, + "loss": 2.8295, + "step": 310 + }, + { + "epoch": 0.02509886207731418, + "grad_norm": 1.0707277059555054, + "learning_rate": 0.0001999445261446046, + "loss": 2.795, + "step": 311 + }, + { + "epoch": 0.025179565813897183, + "grad_norm": 0.9211257696151733, + "learning_rate": 0.0001999439991292535, + "loss": 2.8355, + "step": 312 + }, + { + "epoch": 0.025260269550480188, + "grad_norm": 0.987779438495636, + "learning_rate": 0.00019994346962303667, + "loss": 2.8175, + "step": 313 + }, + { + "epoch": 0.025340973287063192, + "grad_norm": 0.9317128658294678, + "learning_rate": 0.00019994293762596734, + "loss": 2.8205, + "step": 314 + }, + { + "epoch": 0.025421677023646196, + "grad_norm": 0.8989154100418091, + "learning_rate": 0.00019994240313805873, + "loss": 2.8257, + "step": 315 + }, + { + "epoch": 0.025502380760229197, + "grad_norm": 0.8391042351722717, + "learning_rate": 0.00019994186615932423, + "loss": 2.8105, + "step": 316 + }, + { + "epoch": 0.0255830844968122, + "grad_norm": 0.8908089995384216, + "learning_rate": 0.00019994132668977715, + "loss": 2.7894, + "step": 317 + }, + { + "epoch": 0.025663788233395206, + "grad_norm": 0.8666881322860718, + "learning_rate": 0.00019994078472943097, + "loss": 2.7934, + "step": 318 + }, + { + "epoch": 0.02574449196997821, + "grad_norm": 0.8834616541862488, + "learning_rate": 0.00019994024027829914, + "loss": 2.8166, + "step": 319 + }, + { + "epoch": 0.025825195706561214, + "grad_norm": 0.9831370115280151, + "learning_rate": 0.00019993969333639532, + "loss": 2.889, + "step": 320 + }, + { + "epoch": 0.02590589944314422, + "grad_norm": 0.9171644449234009, + "learning_rate": 0.00019993914390373308, + "loss": 2.8582, + "step": 321 + }, + { + "epoch": 0.02598660317972722, + "grad_norm": 0.9624861478805542, + "learning_rate": 0.00019993859198032615, + "loss": 2.8574, + "step": 322 + }, + { + "epoch": 0.026067306916310224, + "grad_norm": 0.8826586008071899, + "learning_rate": 0.00019993803756618826, + "loss": 2.8544, + "step": 323 + }, + { + "epoch": 0.02614801065289323, + "grad_norm": 0.9286447763442993, + "learning_rate": 0.0001999374806613332, + "loss": 2.7937, + "step": 324 + }, + { + "epoch": 0.026228714389476233, + "grad_norm": 0.9901685118675232, + "learning_rate": 0.00019993692126577493, + "loss": 2.7654, + "step": 325 + }, + { + "epoch": 0.026309418126059237, + "grad_norm": 0.9624341130256653, + "learning_rate": 0.00019993635937952734, + "loss": 2.8804, + "step": 326 + }, + { + "epoch": 0.02639012186264224, + "grad_norm": 0.8867596387863159, + "learning_rate": 0.0001999357950026044, + "loss": 2.8254, + "step": 327 + }, + { + "epoch": 0.026470825599225246, + "grad_norm": 0.9243817925453186, + "learning_rate": 0.00019993522813502022, + "loss": 2.8177, + "step": 328 + }, + { + "epoch": 0.026551529335808247, + "grad_norm": 0.9322247505187988, + "learning_rate": 0.00019993465877678895, + "loss": 2.9023, + "step": 329 + }, + { + "epoch": 0.02663223307239125, + "grad_norm": 0.8768174648284912, + "learning_rate": 0.00019993408692792474, + "loss": 2.8184, + "step": 330 + }, + { + "epoch": 0.026712936808974255, + "grad_norm": 0.9436870813369751, + "learning_rate": 0.00019993351258844184, + "loss": 2.8319, + "step": 331 + }, + { + "epoch": 0.02679364054555726, + "grad_norm": 0.9970327019691467, + "learning_rate": 0.0001999329357583546, + "loss": 2.7946, + "step": 332 + }, + { + "epoch": 0.026874344282140264, + "grad_norm": 0.9100088477134705, + "learning_rate": 0.00019993235643767736, + "loss": 2.782, + "step": 333 + }, + { + "epoch": 0.02695504801872327, + "grad_norm": 0.9693402051925659, + "learning_rate": 0.00019993177462642456, + "loss": 2.8182, + "step": 334 + }, + { + "epoch": 0.02703575175530627, + "grad_norm": 0.8761965036392212, + "learning_rate": 0.00019993119032461073, + "loss": 2.8058, + "step": 335 + }, + { + "epoch": 0.027116455491889273, + "grad_norm": 1.0699270963668823, + "learning_rate": 0.00019993060353225043, + "loss": 2.9211, + "step": 336 + }, + { + "epoch": 0.027197159228472278, + "grad_norm": 1.0094172954559326, + "learning_rate": 0.00019993001424935822, + "loss": 2.8837, + "step": 337 + }, + { + "epoch": 0.027277862965055282, + "grad_norm": 0.9683573842048645, + "learning_rate": 0.00019992942247594887, + "loss": 2.8523, + "step": 338 + }, + { + "epoch": 0.027358566701638286, + "grad_norm": 1.3243813514709473, + "learning_rate": 0.00019992882821203708, + "loss": 2.7891, + "step": 339 + }, + { + "epoch": 0.02743927043822129, + "grad_norm": 1.0227056741714478, + "learning_rate": 0.0001999282314576377, + "loss": 2.8396, + "step": 340 + }, + { + "epoch": 0.027519974174804295, + "grad_norm": 1.03257417678833, + "learning_rate": 0.00019992763221276556, + "loss": 2.824, + "step": 341 + }, + { + "epoch": 0.027600677911387296, + "grad_norm": 0.86456698179245, + "learning_rate": 0.00019992703047743562, + "loss": 2.8006, + "step": 342 + }, + { + "epoch": 0.0276813816479703, + "grad_norm": 0.965339720249176, + "learning_rate": 0.00019992642625166286, + "loss": 2.8658, + "step": 343 + }, + { + "epoch": 0.027762085384553305, + "grad_norm": 1.0028942823410034, + "learning_rate": 0.00019992581953546236, + "loss": 2.8311, + "step": 344 + }, + { + "epoch": 0.02784278912113631, + "grad_norm": 0.984307050704956, + "learning_rate": 0.0001999252103288492, + "loss": 2.8748, + "step": 345 + }, + { + "epoch": 0.027923492857719313, + "grad_norm": 0.9405032396316528, + "learning_rate": 0.00019992459863183858, + "loss": 2.8371, + "step": 346 + }, + { + "epoch": 0.028004196594302318, + "grad_norm": 0.9867002367973328, + "learning_rate": 0.0001999239844444458, + "loss": 2.7914, + "step": 347 + }, + { + "epoch": 0.02808490033088532, + "grad_norm": 0.9224951267242432, + "learning_rate": 0.00019992336776668613, + "loss": 2.7986, + "step": 348 + }, + { + "epoch": 0.028165604067468323, + "grad_norm": 1.002838134765625, + "learning_rate": 0.0001999227485985749, + "loss": 2.8207, + "step": 349 + }, + { + "epoch": 0.028246307804051327, + "grad_norm": 0.8922045826911926, + "learning_rate": 0.00019992212694012757, + "loss": 2.8264, + "step": 350 + }, + { + "epoch": 0.02832701154063433, + "grad_norm": 1.0860323905944824, + "learning_rate": 0.00019992150279135964, + "loss": 2.8778, + "step": 351 + }, + { + "epoch": 0.028407715277217336, + "grad_norm": 1.0995604991912842, + "learning_rate": 0.0001999208761522867, + "loss": 2.8599, + "step": 352 + }, + { + "epoch": 0.02848841901380034, + "grad_norm": 0.8741658926010132, + "learning_rate": 0.0001999202470229243, + "loss": 2.7757, + "step": 353 + }, + { + "epoch": 0.02856912275038334, + "grad_norm": 0.9142587184906006, + "learning_rate": 0.00019991961540328815, + "loss": 2.8235, + "step": 354 + }, + { + "epoch": 0.028649826486966345, + "grad_norm": 1.0000953674316406, + "learning_rate": 0.000199918981293394, + "loss": 2.8, + "step": 355 + }, + { + "epoch": 0.02873053022354935, + "grad_norm": 0.9416046738624573, + "learning_rate": 0.00019991834469325763, + "loss": 2.7941, + "step": 356 + }, + { + "epoch": 0.028811233960132354, + "grad_norm": 0.9135935306549072, + "learning_rate": 0.00019991770560289496, + "loss": 2.8315, + "step": 357 + }, + { + "epoch": 0.02889193769671536, + "grad_norm": 0.8867244124412537, + "learning_rate": 0.00019991706402232184, + "loss": 2.8649, + "step": 358 + }, + { + "epoch": 0.028972641433298363, + "grad_norm": 0.9360243678092957, + "learning_rate": 0.00019991641995155431, + "loss": 2.7556, + "step": 359 + }, + { + "epoch": 0.029053345169881367, + "grad_norm": 0.8903766870498657, + "learning_rate": 0.00019991577339060842, + "loss": 2.8379, + "step": 360 + }, + { + "epoch": 0.029134048906464368, + "grad_norm": 1.0178784132003784, + "learning_rate": 0.00019991512433950023, + "loss": 2.8045, + "step": 361 + }, + { + "epoch": 0.029214752643047372, + "grad_norm": 0.9318631887435913, + "learning_rate": 0.000199914472798246, + "loss": 2.823, + "step": 362 + }, + { + "epoch": 0.029295456379630377, + "grad_norm": 0.9384647011756897, + "learning_rate": 0.00019991381876686195, + "loss": 2.9379, + "step": 363 + }, + { + "epoch": 0.02937616011621338, + "grad_norm": 0.9318633675575256, + "learning_rate": 0.00019991316224536433, + "loss": 2.8222, + "step": 364 + }, + { + "epoch": 0.029456863852796385, + "grad_norm": 0.8653938174247742, + "learning_rate": 0.00019991250323376952, + "loss": 2.8447, + "step": 365 + }, + { + "epoch": 0.02953756758937939, + "grad_norm": 0.8997991681098938, + "learning_rate": 0.00019991184173209398, + "loss": 2.8523, + "step": 366 + }, + { + "epoch": 0.02961827132596239, + "grad_norm": 0.8587092161178589, + "learning_rate": 0.00019991117774035416, + "loss": 2.8141, + "step": 367 + }, + { + "epoch": 0.029698975062545395, + "grad_norm": 0.8740741014480591, + "learning_rate": 0.00019991051125856663, + "loss": 2.7487, + "step": 368 + }, + { + "epoch": 0.0297796787991284, + "grad_norm": 0.9099416732788086, + "learning_rate": 0.00019990984228674798, + "loss": 2.834, + "step": 369 + }, + { + "epoch": 0.029860382535711404, + "grad_norm": 0.8675365447998047, + "learning_rate": 0.0001999091708249149, + "loss": 2.8259, + "step": 370 + }, + { + "epoch": 0.029941086272294408, + "grad_norm": 1.0141092538833618, + "learning_rate": 0.00019990849687308412, + "loss": 2.8369, + "step": 371 + }, + { + "epoch": 0.030021790008877412, + "grad_norm": 0.849155604839325, + "learning_rate": 0.00019990782043127243, + "loss": 2.7505, + "step": 372 + }, + { + "epoch": 0.030102493745460413, + "grad_norm": 1.073754072189331, + "learning_rate": 0.0001999071414994967, + "loss": 2.8939, + "step": 373 + }, + { + "epoch": 0.030183197482043417, + "grad_norm": 0.8615279197692871, + "learning_rate": 0.00019990646007777383, + "loss": 2.7662, + "step": 374 + }, + { + "epoch": 0.030263901218626422, + "grad_norm": 0.8803398609161377, + "learning_rate": 0.0001999057761661208, + "loss": 2.7992, + "step": 375 + }, + { + "epoch": 0.030344604955209426, + "grad_norm": 0.8901834487915039, + "learning_rate": 0.00019990508976455473, + "loss": 2.8222, + "step": 376 + }, + { + "epoch": 0.03042530869179243, + "grad_norm": 0.9443284869194031, + "learning_rate": 0.00019990440087309263, + "loss": 2.8326, + "step": 377 + }, + { + "epoch": 0.030506012428375435, + "grad_norm": 0.9122868180274963, + "learning_rate": 0.0001999037094917517, + "loss": 2.7653, + "step": 378 + }, + { + "epoch": 0.03058671616495844, + "grad_norm": 0.8764635920524597, + "learning_rate": 0.0001999030156205492, + "loss": 2.7813, + "step": 379 + }, + { + "epoch": 0.03066741990154144, + "grad_norm": 0.8466865420341492, + "learning_rate": 0.0001999023192595024, + "loss": 2.8338, + "step": 380 + }, + { + "epoch": 0.030748123638124444, + "grad_norm": 0.8833961486816406, + "learning_rate": 0.00019990162040862863, + "loss": 2.78, + "step": 381 + }, + { + "epoch": 0.03082882737470745, + "grad_norm": 1.0298357009887695, + "learning_rate": 0.00019990091906794537, + "loss": 2.8059, + "step": 382 + }, + { + "epoch": 0.030909531111290453, + "grad_norm": 0.8651318550109863, + "learning_rate": 0.00019990021523747005, + "loss": 2.8608, + "step": 383 + }, + { + "epoch": 0.030990234847873457, + "grad_norm": 1.0262864828109741, + "learning_rate": 0.0001998995089172202, + "loss": 2.8226, + "step": 384 + }, + { + "epoch": 0.03107093858445646, + "grad_norm": 0.9266276955604553, + "learning_rate": 0.00019989880010721348, + "loss": 2.9414, + "step": 385 + }, + { + "epoch": 0.031151642321039463, + "grad_norm": 0.8762117028236389, + "learning_rate": 0.00019989808880746749, + "loss": 2.8023, + "step": 386 + }, + { + "epoch": 0.031232346057622467, + "grad_norm": 0.8531816601753235, + "learning_rate": 0.00019989737501800004, + "loss": 2.777, + "step": 387 + }, + { + "epoch": 0.031313049794205475, + "grad_norm": 0.8999545574188232, + "learning_rate": 0.0001998966587388288, + "loss": 2.8656, + "step": 388 + }, + { + "epoch": 0.03139375353078847, + "grad_norm": 0.932248055934906, + "learning_rate": 0.00019989593996997177, + "loss": 2.8212, + "step": 389 + }, + { + "epoch": 0.031474457267371476, + "grad_norm": 0.9059134125709534, + "learning_rate": 0.00019989521871144672, + "loss": 2.7945, + "step": 390 + }, + { + "epoch": 0.03155516100395448, + "grad_norm": 0.9323028922080994, + "learning_rate": 0.00019989449496327172, + "loss": 2.8338, + "step": 391 + }, + { + "epoch": 0.031635864740537485, + "grad_norm": 0.9141251444816589, + "learning_rate": 0.0001998937687254648, + "loss": 2.7935, + "step": 392 + }, + { + "epoch": 0.03171656847712049, + "grad_norm": 1.0026880502700806, + "learning_rate": 0.000199893039998044, + "loss": 2.8811, + "step": 393 + }, + { + "epoch": 0.031797272213703494, + "grad_norm": 1.0178622007369995, + "learning_rate": 0.00019989230878102756, + "loss": 2.9003, + "step": 394 + }, + { + "epoch": 0.0318779759502865, + "grad_norm": 0.9111912846565247, + "learning_rate": 0.00019989157507443363, + "loss": 2.8399, + "step": 395 + }, + { + "epoch": 0.0319586796868695, + "grad_norm": 1.054563283920288, + "learning_rate": 0.00019989083887828052, + "loss": 2.9088, + "step": 396 + }, + { + "epoch": 0.03203938342345251, + "grad_norm": 0.9459816217422485, + "learning_rate": 0.00019989010019258663, + "loss": 2.805, + "step": 397 + }, + { + "epoch": 0.03212008716003551, + "grad_norm": 1.0139873027801514, + "learning_rate": 0.00019988935901737033, + "loss": 2.8452, + "step": 398 + }, + { + "epoch": 0.032200790896618516, + "grad_norm": 0.986325204372406, + "learning_rate": 0.00019988861535265006, + "loss": 2.8311, + "step": 399 + }, + { + "epoch": 0.03228149463320152, + "grad_norm": 0.9565223455429077, + "learning_rate": 0.00019988786919844436, + "loss": 2.7766, + "step": 400 + }, + { + "epoch": 0.032362198369784524, + "grad_norm": 0.8901559710502625, + "learning_rate": 0.0001998871205547719, + "loss": 2.7966, + "step": 401 + }, + { + "epoch": 0.03244290210636752, + "grad_norm": 1.0959528684616089, + "learning_rate": 0.00019988636942165123, + "loss": 2.8377, + "step": 402 + }, + { + "epoch": 0.032523605842950526, + "grad_norm": 1.0768988132476807, + "learning_rate": 0.00019988561579910118, + "loss": 2.8267, + "step": 403 + }, + { + "epoch": 0.03260430957953353, + "grad_norm": 0.9563855528831482, + "learning_rate": 0.00019988485968714048, + "loss": 2.8459, + "step": 404 + }, + { + "epoch": 0.032685013316116535, + "grad_norm": 0.930927038192749, + "learning_rate": 0.00019988410108578796, + "loss": 2.8053, + "step": 405 + }, + { + "epoch": 0.03276571705269954, + "grad_norm": 1.0658363103866577, + "learning_rate": 0.00019988333999506255, + "loss": 2.8512, + "step": 406 + }, + { + "epoch": 0.03284642078928254, + "grad_norm": 0.9258090257644653, + "learning_rate": 0.0001998825764149832, + "loss": 2.8541, + "step": 407 + }, + { + "epoch": 0.03292712452586555, + "grad_norm": 1.18158757686615, + "learning_rate": 0.00019988181034556895, + "loss": 2.8838, + "step": 408 + }, + { + "epoch": 0.03300782826244855, + "grad_norm": 0.9506754875183105, + "learning_rate": 0.00019988104178683891, + "loss": 2.7733, + "step": 409 + }, + { + "epoch": 0.033088531999031556, + "grad_norm": 0.9559460282325745, + "learning_rate": 0.0001998802707388122, + "loss": 2.9259, + "step": 410 + }, + { + "epoch": 0.03316923573561456, + "grad_norm": 0.9322298765182495, + "learning_rate": 0.00019987949720150808, + "loss": 2.8318, + "step": 411 + }, + { + "epoch": 0.033249939472197565, + "grad_norm": 0.9226691722869873, + "learning_rate": 0.00019987872117494576, + "loss": 2.9063, + "step": 412 + }, + { + "epoch": 0.03333064320878057, + "grad_norm": 1.0543674230575562, + "learning_rate": 0.00019987794265914464, + "loss": 2.7877, + "step": 413 + }, + { + "epoch": 0.033411346945363574, + "grad_norm": 0.989986002445221, + "learning_rate": 0.00019987716165412408, + "loss": 2.8354, + "step": 414 + }, + { + "epoch": 0.03349205068194657, + "grad_norm": 0.8703451752662659, + "learning_rate": 0.0001998763781599036, + "loss": 2.8127, + "step": 415 + }, + { + "epoch": 0.033572754418529575, + "grad_norm": 0.974943220615387, + "learning_rate": 0.0001998755921765027, + "loss": 2.9272, + "step": 416 + }, + { + "epoch": 0.03365345815511258, + "grad_norm": 0.8714169859886169, + "learning_rate": 0.000199874803703941, + "loss": 2.8027, + "step": 417 + }, + { + "epoch": 0.033734161891695584, + "grad_norm": 0.9251161217689514, + "learning_rate": 0.00019987401274223804, + "loss": 2.8186, + "step": 418 + }, + { + "epoch": 0.03381486562827859, + "grad_norm": 0.9657236933708191, + "learning_rate": 0.00019987321929141366, + "loss": 2.8297, + "step": 419 + }, + { + "epoch": 0.03389556936486159, + "grad_norm": 0.9022002816200256, + "learning_rate": 0.00019987242335148757, + "loss": 2.881, + "step": 420 + }, + { + "epoch": 0.0339762731014446, + "grad_norm": 0.9479621052742004, + "learning_rate": 0.0001998716249224796, + "loss": 2.8288, + "step": 421 + }, + { + "epoch": 0.0340569768380276, + "grad_norm": 0.9458955526351929, + "learning_rate": 0.00019987082400440968, + "loss": 2.8861, + "step": 422 + }, + { + "epoch": 0.034137680574610606, + "grad_norm": 0.9444572329521179, + "learning_rate": 0.0001998700205972978, + "loss": 2.8877, + "step": 423 + }, + { + "epoch": 0.03421838431119361, + "grad_norm": 0.9263925552368164, + "learning_rate": 0.00019986921470116392, + "loss": 2.8028, + "step": 424 + }, + { + "epoch": 0.034299088047776614, + "grad_norm": 1.0690566301345825, + "learning_rate": 0.00019986840631602812, + "loss": 2.882, + "step": 425 + }, + { + "epoch": 0.03437979178435962, + "grad_norm": 0.8999007940292358, + "learning_rate": 0.0001998675954419106, + "loss": 2.8179, + "step": 426 + }, + { + "epoch": 0.03446049552094262, + "grad_norm": 0.894395112991333, + "learning_rate": 0.00019986678207883153, + "loss": 2.814, + "step": 427 + }, + { + "epoch": 0.03454119925752562, + "grad_norm": 0.8621550798416138, + "learning_rate": 0.00019986596622681123, + "loss": 2.7584, + "step": 428 + }, + { + "epoch": 0.034621902994108625, + "grad_norm": 0.9452527165412903, + "learning_rate": 0.00019986514788587, + "loss": 2.8949, + "step": 429 + }, + { + "epoch": 0.03470260673069163, + "grad_norm": 0.8973272442817688, + "learning_rate": 0.0001998643270560282, + "loss": 2.868, + "step": 430 + }, + { + "epoch": 0.034783310467274633, + "grad_norm": 0.9887418150901794, + "learning_rate": 0.00019986350373730634, + "loss": 2.8009, + "step": 431 + }, + { + "epoch": 0.03486401420385764, + "grad_norm": 0.9449994564056396, + "learning_rate": 0.0001998626779297249, + "loss": 2.8305, + "step": 432 + }, + { + "epoch": 0.03494471794044064, + "grad_norm": 1.052871823310852, + "learning_rate": 0.0001998618496333045, + "loss": 2.8136, + "step": 433 + }, + { + "epoch": 0.035025421677023647, + "grad_norm": 0.9600724577903748, + "learning_rate": 0.00019986101884806576, + "loss": 2.7857, + "step": 434 + }, + { + "epoch": 0.03510612541360665, + "grad_norm": 0.874043345451355, + "learning_rate": 0.00019986018557402942, + "loss": 2.8524, + "step": 435 + }, + { + "epoch": 0.035186829150189655, + "grad_norm": 0.9810616374015808, + "learning_rate": 0.0001998593498112162, + "loss": 2.7506, + "step": 436 + }, + { + "epoch": 0.03526753288677266, + "grad_norm": 0.9163016080856323, + "learning_rate": 0.00019985851155964693, + "loss": 2.798, + "step": 437 + }, + { + "epoch": 0.035348236623355664, + "grad_norm": 1.0688380002975464, + "learning_rate": 0.00019985767081934252, + "loss": 2.8916, + "step": 438 + }, + { + "epoch": 0.03542894035993867, + "grad_norm": 0.925020158290863, + "learning_rate": 0.00019985682759032393, + "loss": 2.8017, + "step": 439 + }, + { + "epoch": 0.035509644096521666, + "grad_norm": 0.9429430961608887, + "learning_rate": 0.0001998559818726122, + "loss": 2.837, + "step": 440 + }, + { + "epoch": 0.03559034783310467, + "grad_norm": 0.9135627150535583, + "learning_rate": 0.00019985513366622832, + "loss": 2.8423, + "step": 441 + }, + { + "epoch": 0.035671051569687674, + "grad_norm": 0.9218924045562744, + "learning_rate": 0.00019985428297119353, + "loss": 2.854, + "step": 442 + }, + { + "epoch": 0.03575175530627068, + "grad_norm": 0.9307878613471985, + "learning_rate": 0.00019985342978752897, + "loss": 2.8591, + "step": 443 + }, + { + "epoch": 0.03583245904285368, + "grad_norm": 0.935394287109375, + "learning_rate": 0.00019985257411525592, + "loss": 2.8388, + "step": 444 + }, + { + "epoch": 0.03591316277943669, + "grad_norm": 0.890959620475769, + "learning_rate": 0.0001998517159543957, + "loss": 2.78, + "step": 445 + }, + { + "epoch": 0.03599386651601969, + "grad_norm": 1.110924482345581, + "learning_rate": 0.0001998508553049697, + "loss": 2.8117, + "step": 446 + }, + { + "epoch": 0.036074570252602696, + "grad_norm": 0.8774176239967346, + "learning_rate": 0.0001998499921669994, + "loss": 2.8368, + "step": 447 + }, + { + "epoch": 0.0361552739891857, + "grad_norm": 0.9766948819160461, + "learning_rate": 0.00019984912654050625, + "loss": 2.764, + "step": 448 + }, + { + "epoch": 0.036235977725768705, + "grad_norm": 1.1439398527145386, + "learning_rate": 0.00019984825842551187, + "loss": 2.84, + "step": 449 + }, + { + "epoch": 0.03631668146235171, + "grad_norm": 0.8995118737220764, + "learning_rate": 0.0001998473878220379, + "loss": 2.834, + "step": 450 + }, + { + "epoch": 0.03639738519893471, + "grad_norm": 0.9810060858726501, + "learning_rate": 0.000199846514730106, + "loss": 2.9338, + "step": 451 + }, + { + "epoch": 0.03647808893551772, + "grad_norm": 1.0862053632736206, + "learning_rate": 0.00019984563914973795, + "loss": 2.837, + "step": 452 + }, + { + "epoch": 0.036558792672100715, + "grad_norm": 0.9456702470779419, + "learning_rate": 0.0001998447610809556, + "loss": 2.7664, + "step": 453 + }, + { + "epoch": 0.03663949640868372, + "grad_norm": 1.0714432001113892, + "learning_rate": 0.0001998438805237808, + "loss": 2.8339, + "step": 454 + }, + { + "epoch": 0.036720200145266724, + "grad_norm": 0.89134281873703, + "learning_rate": 0.00019984299747823547, + "loss": 2.7818, + "step": 455 + }, + { + "epoch": 0.03680090388184973, + "grad_norm": 0.869742214679718, + "learning_rate": 0.0001998421119443417, + "loss": 2.7916, + "step": 456 + }, + { + "epoch": 0.03688160761843273, + "grad_norm": 0.9307265281677246, + "learning_rate": 0.00019984122392212149, + "loss": 2.8485, + "step": 457 + }, + { + "epoch": 0.03696231135501574, + "grad_norm": 0.900215744972229, + "learning_rate": 0.00019984033341159698, + "loss": 2.8536, + "step": 458 + }, + { + "epoch": 0.03704301509159874, + "grad_norm": 0.8679699897766113, + "learning_rate": 0.00019983944041279038, + "loss": 2.8344, + "step": 459 + }, + { + "epoch": 0.037123718828181745, + "grad_norm": 0.9540488719940186, + "learning_rate": 0.00019983854492572394, + "loss": 2.873, + "step": 460 + }, + { + "epoch": 0.03720442256476475, + "grad_norm": 0.8697962760925293, + "learning_rate": 0.00019983764695042, + "loss": 2.8122, + "step": 461 + }, + { + "epoch": 0.037285126301347754, + "grad_norm": 0.9534483551979065, + "learning_rate": 0.0001998367464869009, + "loss": 2.8842, + "step": 462 + }, + { + "epoch": 0.03736583003793076, + "grad_norm": 0.8402275443077087, + "learning_rate": 0.00019983584353518911, + "loss": 2.8135, + "step": 463 + }, + { + "epoch": 0.03744653377451376, + "grad_norm": 0.8226146697998047, + "learning_rate": 0.0001998349380953071, + "loss": 2.8036, + "step": 464 + }, + { + "epoch": 0.03752723751109677, + "grad_norm": 0.9292199611663818, + "learning_rate": 0.0001998340301672775, + "loss": 2.7887, + "step": 465 + }, + { + "epoch": 0.037607941247679764, + "grad_norm": 0.9035555124282837, + "learning_rate": 0.0001998331197511229, + "loss": 2.7851, + "step": 466 + }, + { + "epoch": 0.03768864498426277, + "grad_norm": 0.9411706328392029, + "learning_rate": 0.00019983220684686596, + "loss": 2.7782, + "step": 467 + }, + { + "epoch": 0.03776934872084577, + "grad_norm": 0.9867696166038513, + "learning_rate": 0.0001998312914545295, + "loss": 2.8125, + "step": 468 + }, + { + "epoch": 0.03785005245742878, + "grad_norm": 0.9683675169944763, + "learning_rate": 0.00019983037357413624, + "loss": 2.8325, + "step": 469 + }, + { + "epoch": 0.03793075619401178, + "grad_norm": 0.963941752910614, + "learning_rate": 0.00019982945320570913, + "loss": 2.8281, + "step": 470 + }, + { + "epoch": 0.038011459930594786, + "grad_norm": 0.9812459349632263, + "learning_rate": 0.0001998285303492711, + "loss": 2.765, + "step": 471 + }, + { + "epoch": 0.03809216366717779, + "grad_norm": 0.9681405425071716, + "learning_rate": 0.00019982760500484516, + "loss": 2.8882, + "step": 472 + }, + { + "epoch": 0.038172867403760795, + "grad_norm": 0.8983948826789856, + "learning_rate": 0.00019982667717245432, + "loss": 2.8182, + "step": 473 + }, + { + "epoch": 0.0382535711403438, + "grad_norm": 0.9875261783599854, + "learning_rate": 0.00019982574685212178, + "loss": 2.8072, + "step": 474 + }, + { + "epoch": 0.038334274876926804, + "grad_norm": 0.8889442086219788, + "learning_rate": 0.00019982481404387064, + "loss": 2.8635, + "step": 475 + }, + { + "epoch": 0.03841497861350981, + "grad_norm": 0.8904242515563965, + "learning_rate": 0.00019982387874772418, + "loss": 2.829, + "step": 476 + }, + { + "epoch": 0.03849568235009281, + "grad_norm": 1.0182000398635864, + "learning_rate": 0.00019982294096370574, + "loss": 2.8552, + "step": 477 + }, + { + "epoch": 0.03857638608667582, + "grad_norm": 0.9867151975631714, + "learning_rate": 0.00019982200069183867, + "loss": 2.8201, + "step": 478 + }, + { + "epoch": 0.038657089823258814, + "grad_norm": 0.9785345196723938, + "learning_rate": 0.0001998210579321464, + "loss": 2.8652, + "step": 479 + }, + { + "epoch": 0.03873779355984182, + "grad_norm": 0.9696915149688721, + "learning_rate": 0.00019982011268465243, + "loss": 2.8276, + "step": 480 + }, + { + "epoch": 0.03881849729642482, + "grad_norm": 0.9257470965385437, + "learning_rate": 0.00019981916494938033, + "loss": 2.8321, + "step": 481 + }, + { + "epoch": 0.03889920103300783, + "grad_norm": 0.9394895434379578, + "learning_rate": 0.00019981821472635369, + "loss": 2.8747, + "step": 482 + }, + { + "epoch": 0.03897990476959083, + "grad_norm": 0.9888504147529602, + "learning_rate": 0.00019981726201559626, + "loss": 2.8201, + "step": 483 + }, + { + "epoch": 0.039060608506173836, + "grad_norm": 0.8957003951072693, + "learning_rate": 0.0001998163068171317, + "loss": 2.8255, + "step": 484 + }, + { + "epoch": 0.03914131224275684, + "grad_norm": 0.9792008996009827, + "learning_rate": 0.00019981534913098383, + "loss": 2.7985, + "step": 485 + }, + { + "epoch": 0.039222015979339844, + "grad_norm": 0.8689060211181641, + "learning_rate": 0.00019981438895717656, + "loss": 2.7945, + "step": 486 + }, + { + "epoch": 0.03930271971592285, + "grad_norm": 0.9932593703269958, + "learning_rate": 0.0001998134262957338, + "loss": 2.9041, + "step": 487 + }, + { + "epoch": 0.03938342345250585, + "grad_norm": 0.8496069312095642, + "learning_rate": 0.00019981246114667955, + "loss": 2.8433, + "step": 488 + }, + { + "epoch": 0.03946412718908886, + "grad_norm": 0.8484126925468445, + "learning_rate": 0.00019981149351003786, + "loss": 2.7872, + "step": 489 + }, + { + "epoch": 0.03954483092567186, + "grad_norm": 0.9208858013153076, + "learning_rate": 0.00019981052338583283, + "loss": 2.7776, + "step": 490 + }, + { + "epoch": 0.03962553466225486, + "grad_norm": 0.9305418729782104, + "learning_rate": 0.00019980955077408865, + "loss": 2.7851, + "step": 491 + }, + { + "epoch": 0.03970623839883786, + "grad_norm": 0.9803212881088257, + "learning_rate": 0.00019980857567482955, + "loss": 2.8469, + "step": 492 + }, + { + "epoch": 0.03978694213542087, + "grad_norm": 0.9165790677070618, + "learning_rate": 0.00019980759808807985, + "loss": 2.8513, + "step": 493 + }, + { + "epoch": 0.03986764587200387, + "grad_norm": 0.9153794050216675, + "learning_rate": 0.00019980661801386393, + "loss": 2.8322, + "step": 494 + }, + { + "epoch": 0.039948349608586876, + "grad_norm": 0.89347904920578, + "learning_rate": 0.00019980563545220616, + "loss": 2.8316, + "step": 495 + }, + { + "epoch": 0.04002905334516988, + "grad_norm": 0.9882236123085022, + "learning_rate": 0.00019980465040313105, + "loss": 2.7471, + "step": 496 + }, + { + "epoch": 0.040109757081752885, + "grad_norm": 0.9391099810600281, + "learning_rate": 0.00019980366286666322, + "loss": 2.8182, + "step": 497 + }, + { + "epoch": 0.04019046081833589, + "grad_norm": 1.0155293941497803, + "learning_rate": 0.00019980267284282717, + "loss": 2.8721, + "step": 498 + }, + { + "epoch": 0.040271164554918894, + "grad_norm": 0.9952930212020874, + "learning_rate": 0.00019980168033164765, + "loss": 2.8538, + "step": 499 + }, + { + "epoch": 0.0403518682915019, + "grad_norm": 0.8385666608810425, + "learning_rate": 0.00019980068533314934, + "loss": 2.8242, + "step": 500 + }, + { + "epoch": 0.0404325720280849, + "grad_norm": 0.8747559785842896, + "learning_rate": 0.0001997996878473571, + "loss": 2.7908, + "step": 501 + }, + { + "epoch": 0.04051327576466791, + "grad_norm": 0.9267926216125488, + "learning_rate": 0.00019979868787429575, + "loss": 2.8359, + "step": 502 + }, + { + "epoch": 0.04059397950125091, + "grad_norm": 0.8194155693054199, + "learning_rate": 0.00019979768541399022, + "loss": 2.8161, + "step": 503 + }, + { + "epoch": 0.04067468323783391, + "grad_norm": 0.8923258185386658, + "learning_rate": 0.00019979668046646548, + "loss": 2.7547, + "step": 504 + }, + { + "epoch": 0.04075538697441691, + "grad_norm": 0.8965646028518677, + "learning_rate": 0.00019979567303174663, + "loss": 2.8432, + "step": 505 + }, + { + "epoch": 0.04083609071099992, + "grad_norm": 0.814481794834137, + "learning_rate": 0.0001997946631098587, + "loss": 2.8327, + "step": 506 + }, + { + "epoch": 0.04091679444758292, + "grad_norm": 0.8806928396224976, + "learning_rate": 0.00019979365070082694, + "loss": 2.8573, + "step": 507 + }, + { + "epoch": 0.040997498184165926, + "grad_norm": 0.8546919822692871, + "learning_rate": 0.00019979263580467653, + "loss": 2.8618, + "step": 508 + }, + { + "epoch": 0.04107820192074893, + "grad_norm": 0.8557277321815491, + "learning_rate": 0.00019979161842143274, + "loss": 2.8454, + "step": 509 + }, + { + "epoch": 0.041158905657331935, + "grad_norm": 0.9153180122375488, + "learning_rate": 0.00019979059855112098, + "loss": 2.8027, + "step": 510 + }, + { + "epoch": 0.04123960939391494, + "grad_norm": 0.8616741895675659, + "learning_rate": 0.00019978957619376666, + "loss": 2.7628, + "step": 511 + }, + { + "epoch": 0.04132031313049794, + "grad_norm": 0.8777137398719788, + "learning_rate": 0.00019978855134939524, + "loss": 2.8443, + "step": 512 + }, + { + "epoch": 0.04140101686708095, + "grad_norm": 0.852100133895874, + "learning_rate": 0.0001997875240180323, + "loss": 2.8125, + "step": 513 + }, + { + "epoch": 0.04148172060366395, + "grad_norm": 0.8470742702484131, + "learning_rate": 0.00019978649419970338, + "loss": 2.8139, + "step": 514 + }, + { + "epoch": 0.041562424340246956, + "grad_norm": 0.8890305161476135, + "learning_rate": 0.0001997854618944342, + "loss": 2.8633, + "step": 515 + }, + { + "epoch": 0.04164312807682996, + "grad_norm": 0.8893599510192871, + "learning_rate": 0.00019978442710225043, + "loss": 2.8066, + "step": 516 + }, + { + "epoch": 0.04172383181341296, + "grad_norm": 0.9093891382217407, + "learning_rate": 0.00019978338982317792, + "loss": 2.8026, + "step": 517 + }, + { + "epoch": 0.04180453554999596, + "grad_norm": 0.9775434136390686, + "learning_rate": 0.00019978235005724252, + "loss": 2.849, + "step": 518 + }, + { + "epoch": 0.04188523928657897, + "grad_norm": 1.0014091730117798, + "learning_rate": 0.00019978130780447012, + "loss": 2.8572, + "step": 519 + }, + { + "epoch": 0.04196594302316197, + "grad_norm": 0.8487632870674133, + "learning_rate": 0.00019978026306488668, + "loss": 2.7611, + "step": 520 + }, + { + "epoch": 0.042046646759744975, + "grad_norm": 0.86592698097229, + "learning_rate": 0.00019977921583851825, + "loss": 2.7616, + "step": 521 + }, + { + "epoch": 0.04212735049632798, + "grad_norm": 1.0285916328430176, + "learning_rate": 0.00019977816612539093, + "loss": 2.8049, + "step": 522 + }, + { + "epoch": 0.042208054232910984, + "grad_norm": 0.9716495871543884, + "learning_rate": 0.00019977711392553092, + "loss": 2.8459, + "step": 523 + }, + { + "epoch": 0.04228875796949399, + "grad_norm": 0.8842264413833618, + "learning_rate": 0.0001997760592389644, + "loss": 2.7934, + "step": 524 + }, + { + "epoch": 0.04236946170607699, + "grad_norm": 0.8839964866638184, + "learning_rate": 0.00019977500206571765, + "loss": 2.8135, + "step": 525 + }, + { + "epoch": 0.04245016544266, + "grad_norm": 0.870331346988678, + "learning_rate": 0.00019977394240581705, + "loss": 2.8684, + "step": 526 + }, + { + "epoch": 0.042530869179243, + "grad_norm": 0.8844720125198364, + "learning_rate": 0.000199772880259289, + "loss": 2.7867, + "step": 527 + }, + { + "epoch": 0.042611572915826006, + "grad_norm": 0.9353455901145935, + "learning_rate": 0.00019977181562615994, + "loss": 2.8051, + "step": 528 + }, + { + "epoch": 0.04269227665240901, + "grad_norm": 0.9530816078186035, + "learning_rate": 0.00019977074850645646, + "loss": 2.7915, + "step": 529 + }, + { + "epoch": 0.04277298038899201, + "grad_norm": 0.8984190821647644, + "learning_rate": 0.00019976967890020507, + "loss": 2.7957, + "step": 530 + }, + { + "epoch": 0.04285368412557501, + "grad_norm": 0.9146613478660583, + "learning_rate": 0.00019976860680743252, + "loss": 2.9053, + "step": 531 + }, + { + "epoch": 0.042934387862158016, + "grad_norm": 0.9228026866912842, + "learning_rate": 0.0001997675322281655, + "loss": 2.8578, + "step": 532 + }, + { + "epoch": 0.04301509159874102, + "grad_norm": 0.8266343474388123, + "learning_rate": 0.0001997664551624308, + "loss": 2.7393, + "step": 533 + }, + { + "epoch": 0.043095795335324025, + "grad_norm": 0.9197628498077393, + "learning_rate": 0.0001997653756102552, + "loss": 2.8828, + "step": 534 + }, + { + "epoch": 0.04317649907190703, + "grad_norm": 0.9145991802215576, + "learning_rate": 0.00019976429357166566, + "loss": 2.7767, + "step": 535 + }, + { + "epoch": 0.04325720280849003, + "grad_norm": 0.9123281240463257, + "learning_rate": 0.00019976320904668913, + "loss": 2.7993, + "step": 536 + }, + { + "epoch": 0.04333790654507304, + "grad_norm": 0.8597636818885803, + "learning_rate": 0.00019976212203535266, + "loss": 2.8148, + "step": 537 + }, + { + "epoch": 0.04341861028165604, + "grad_norm": 0.8963296413421631, + "learning_rate": 0.00019976103253768334, + "loss": 2.7722, + "step": 538 + }, + { + "epoch": 0.043499314018239046, + "grad_norm": 0.9480688571929932, + "learning_rate": 0.0001997599405537083, + "loss": 2.8038, + "step": 539 + }, + { + "epoch": 0.04358001775482205, + "grad_norm": 0.8115736842155457, + "learning_rate": 0.00019975884608345476, + "loss": 2.8069, + "step": 540 + }, + { + "epoch": 0.043660721491405055, + "grad_norm": 0.9642506837844849, + "learning_rate": 0.00019975774912695, + "loss": 2.8703, + "step": 541 + }, + { + "epoch": 0.04374142522798805, + "grad_norm": 0.9638697504997253, + "learning_rate": 0.0001997566496842214, + "loss": 2.8223, + "step": 542 + }, + { + "epoch": 0.04382212896457106, + "grad_norm": 0.9478490352630615, + "learning_rate": 0.00019975554775529628, + "loss": 2.8164, + "step": 543 + }, + { + "epoch": 0.04390283270115406, + "grad_norm": 1.1771583557128906, + "learning_rate": 0.00019975444334020215, + "loss": 2.7969, + "step": 544 + }, + { + "epoch": 0.043983536437737066, + "grad_norm": 0.9597339034080505, + "learning_rate": 0.00019975333643896655, + "loss": 2.8025, + "step": 545 + }, + { + "epoch": 0.04406424017432007, + "grad_norm": 0.981595516204834, + "learning_rate": 0.00019975222705161704, + "loss": 2.7994, + "step": 546 + }, + { + "epoch": 0.044144943910903074, + "grad_norm": 0.9581133723258972, + "learning_rate": 0.00019975111517818127, + "loss": 2.802, + "step": 547 + }, + { + "epoch": 0.04422564764748608, + "grad_norm": 0.8643878698348999, + "learning_rate": 0.00019975000081868697, + "loss": 2.7958, + "step": 548 + }, + { + "epoch": 0.04430635138406908, + "grad_norm": 1.2188652753829956, + "learning_rate": 0.0001997488839731619, + "loss": 2.8786, + "step": 549 + }, + { + "epoch": 0.04438705512065209, + "grad_norm": 0.9138071537017822, + "learning_rate": 0.00019974776464163387, + "loss": 2.809, + "step": 550 + }, + { + "epoch": 0.04446775885723509, + "grad_norm": 0.9604587554931641, + "learning_rate": 0.00019974664282413083, + "loss": 2.8009, + "step": 551 + }, + { + "epoch": 0.044548462593818096, + "grad_norm": 1.0271116495132446, + "learning_rate": 0.00019974551852068072, + "loss": 2.8689, + "step": 552 + }, + { + "epoch": 0.0446291663304011, + "grad_norm": 0.9330877065658569, + "learning_rate": 0.00019974439173131155, + "loss": 2.7613, + "step": 553 + }, + { + "epoch": 0.044709870066984105, + "grad_norm": 0.9549325108528137, + "learning_rate": 0.00019974326245605136, + "loss": 2.8314, + "step": 554 + }, + { + "epoch": 0.0447905738035671, + "grad_norm": 0.8928439021110535, + "learning_rate": 0.00019974213069492836, + "loss": 2.8097, + "step": 555 + }, + { + "epoch": 0.044871277540150106, + "grad_norm": 0.8705076575279236, + "learning_rate": 0.00019974099644797075, + "loss": 2.8112, + "step": 556 + }, + { + "epoch": 0.04495198127673311, + "grad_norm": 0.988345742225647, + "learning_rate": 0.00019973985971520676, + "loss": 2.7648, + "step": 557 + }, + { + "epoch": 0.045032685013316115, + "grad_norm": 0.9161957502365112, + "learning_rate": 0.00019973872049666475, + "loss": 2.8691, + "step": 558 + }, + { + "epoch": 0.04511338874989912, + "grad_norm": 0.8404076099395752, + "learning_rate": 0.00019973757879237312, + "loss": 2.7708, + "step": 559 + }, + { + "epoch": 0.045194092486482124, + "grad_norm": 1.05247962474823, + "learning_rate": 0.0001997364346023603, + "loss": 2.8638, + "step": 560 + }, + { + "epoch": 0.04527479622306513, + "grad_norm": 0.9235066175460815, + "learning_rate": 0.00019973528792665483, + "loss": 2.7876, + "step": 561 + }, + { + "epoch": 0.04535549995964813, + "grad_norm": 1.220075249671936, + "learning_rate": 0.00019973413876528526, + "loss": 2.8563, + "step": 562 + }, + { + "epoch": 0.04543620369623114, + "grad_norm": 0.9098384976387024, + "learning_rate": 0.00019973298711828025, + "loss": 2.8427, + "step": 563 + }, + { + "epoch": 0.04551690743281414, + "grad_norm": 0.8792217969894409, + "learning_rate": 0.00019973183298566848, + "loss": 2.8673, + "step": 564 + }, + { + "epoch": 0.045597611169397145, + "grad_norm": 0.9895235896110535, + "learning_rate": 0.00019973067636747875, + "loss": 2.8262, + "step": 565 + }, + { + "epoch": 0.04567831490598015, + "grad_norm": 0.9191479086875916, + "learning_rate": 0.00019972951726373984, + "loss": 2.8005, + "step": 566 + }, + { + "epoch": 0.045759018642563154, + "grad_norm": 0.9631491899490356, + "learning_rate": 0.0001997283556744807, + "loss": 2.8438, + "step": 567 + }, + { + "epoch": 0.04583972237914615, + "grad_norm": 0.8302746415138245, + "learning_rate": 0.00019972719159973024, + "loss": 2.8221, + "step": 568 + }, + { + "epoch": 0.045920426115729156, + "grad_norm": 0.8238534927368164, + "learning_rate": 0.00019972602503951748, + "loss": 2.7674, + "step": 569 + }, + { + "epoch": 0.04600112985231216, + "grad_norm": 0.9675811529159546, + "learning_rate": 0.00019972485599387146, + "loss": 2.8457, + "step": 570 + }, + { + "epoch": 0.046081833588895164, + "grad_norm": 0.8663914203643799, + "learning_rate": 0.00019972368446282134, + "loss": 2.7851, + "step": 571 + }, + { + "epoch": 0.04616253732547817, + "grad_norm": 0.9904592633247375, + "learning_rate": 0.00019972251044639636, + "loss": 2.8792, + "step": 572 + }, + { + "epoch": 0.04624324106206117, + "grad_norm": 0.907600462436676, + "learning_rate": 0.0001997213339446257, + "loss": 2.7991, + "step": 573 + }, + { + "epoch": 0.04632394479864418, + "grad_norm": 0.871362566947937, + "learning_rate": 0.00019972015495753876, + "loss": 2.7959, + "step": 574 + }, + { + "epoch": 0.04640464853522718, + "grad_norm": 0.9664937853813171, + "learning_rate": 0.00019971897348516486, + "loss": 2.7847, + "step": 575 + }, + { + "epoch": 0.046485352271810186, + "grad_norm": 1.0670619010925293, + "learning_rate": 0.0001997177895275335, + "loss": 2.8864, + "step": 576 + }, + { + "epoch": 0.04656605600839319, + "grad_norm": 0.9281025528907776, + "learning_rate": 0.00019971660308467414, + "loss": 2.8568, + "step": 577 + }, + { + "epoch": 0.046646759744976195, + "grad_norm": 0.8964822888374329, + "learning_rate": 0.00019971541415661639, + "loss": 2.7246, + "step": 578 + }, + { + "epoch": 0.0467274634815592, + "grad_norm": 0.8921917676925659, + "learning_rate": 0.00019971422274338985, + "loss": 2.8513, + "step": 579 + }, + { + "epoch": 0.0468081672181422, + "grad_norm": 0.9550159573554993, + "learning_rate": 0.0001997130288450242, + "loss": 2.7615, + "step": 580 + }, + { + "epoch": 0.0468888709547252, + "grad_norm": 0.9330170154571533, + "learning_rate": 0.00019971183246154925, + "loss": 2.9017, + "step": 581 + }, + { + "epoch": 0.046969574691308205, + "grad_norm": 0.9125271439552307, + "learning_rate": 0.00019971063359299477, + "loss": 2.8263, + "step": 582 + }, + { + "epoch": 0.04705027842789121, + "grad_norm": 1.0005927085876465, + "learning_rate": 0.00019970943223939066, + "loss": 2.8371, + "step": 583 + }, + { + "epoch": 0.047130982164474214, + "grad_norm": 1.0333613157272339, + "learning_rate": 0.00019970822840076685, + "loss": 2.8275, + "step": 584 + }, + { + "epoch": 0.04721168590105722, + "grad_norm": 0.8684708476066589, + "learning_rate": 0.00019970702207715334, + "loss": 2.8343, + "step": 585 + }, + { + "epoch": 0.04729238963764022, + "grad_norm": 1.1112761497497559, + "learning_rate": 0.00019970581326858025, + "loss": 2.9012, + "step": 586 + }, + { + "epoch": 0.04737309337422323, + "grad_norm": 1.0187962055206299, + "learning_rate": 0.00019970460197507763, + "loss": 2.8423, + "step": 587 + }, + { + "epoch": 0.04745379711080623, + "grad_norm": 0.9802024960517883, + "learning_rate": 0.00019970338819667567, + "loss": 2.867, + "step": 588 + }, + { + "epoch": 0.047534500847389236, + "grad_norm": 0.9825551509857178, + "learning_rate": 0.00019970217193340467, + "loss": 2.8359, + "step": 589 + }, + { + "epoch": 0.04761520458397224, + "grad_norm": 1.1399210691452026, + "learning_rate": 0.00019970095318529494, + "loss": 2.8356, + "step": 590 + }, + { + "epoch": 0.047695908320555244, + "grad_norm": 1.0373995304107666, + "learning_rate": 0.00019969973195237684, + "loss": 2.8005, + "step": 591 + }, + { + "epoch": 0.04777661205713825, + "grad_norm": 1.133596420288086, + "learning_rate": 0.00019969850823468077, + "loss": 2.8778, + "step": 592 + }, + { + "epoch": 0.047857315793721246, + "grad_norm": 1.0187327861785889, + "learning_rate": 0.00019969728203223728, + "loss": 2.8291, + "step": 593 + }, + { + "epoch": 0.04793801953030425, + "grad_norm": 1.0588128566741943, + "learning_rate": 0.00019969605334507688, + "loss": 2.9396, + "step": 594 + }, + { + "epoch": 0.048018723266887255, + "grad_norm": 0.8783230781555176, + "learning_rate": 0.00019969482217323026, + "loss": 2.8076, + "step": 595 + }, + { + "epoch": 0.04809942700347026, + "grad_norm": 1.0500195026397705, + "learning_rate": 0.00019969358851672805, + "loss": 2.9099, + "step": 596 + }, + { + "epoch": 0.04818013074005326, + "grad_norm": 0.9523593187332153, + "learning_rate": 0.000199692352375601, + "loss": 2.7448, + "step": 597 + }, + { + "epoch": 0.04826083447663627, + "grad_norm": 1.0008500814437866, + "learning_rate": 0.00019969111374987995, + "loss": 2.8212, + "step": 598 + }, + { + "epoch": 0.04834153821321927, + "grad_norm": 0.8992626070976257, + "learning_rate": 0.00019968987263959575, + "loss": 2.8698, + "step": 599 + }, + { + "epoch": 0.048422241949802276, + "grad_norm": 0.9914852380752563, + "learning_rate": 0.00019968862904477935, + "loss": 2.8221, + "step": 600 + }, + { + "epoch": 0.04850294568638528, + "grad_norm": 0.9633241295814514, + "learning_rate": 0.00019968738296546168, + "loss": 2.8835, + "step": 601 + }, + { + "epoch": 0.048583649422968285, + "grad_norm": 1.055831789970398, + "learning_rate": 0.00019968613440167387, + "loss": 2.8781, + "step": 602 + }, + { + "epoch": 0.04866435315955129, + "grad_norm": 0.913856029510498, + "learning_rate": 0.000199684883353447, + "loss": 2.7863, + "step": 603 + }, + { + "epoch": 0.048745056896134294, + "grad_norm": 0.8429243564605713, + "learning_rate": 0.00019968362982081226, + "loss": 2.7753, + "step": 604 + }, + { + "epoch": 0.0488257606327173, + "grad_norm": 0.9324761629104614, + "learning_rate": 0.0001996823738038009, + "loss": 2.8058, + "step": 605 + }, + { + "epoch": 0.048906464369300295, + "grad_norm": 1.0004981756210327, + "learning_rate": 0.0001996811153024442, + "loss": 2.8537, + "step": 606 + }, + { + "epoch": 0.0489871681058833, + "grad_norm": 0.9438043236732483, + "learning_rate": 0.00019967985431677354, + "loss": 2.8828, + "step": 607 + }, + { + "epoch": 0.049067871842466304, + "grad_norm": 0.9359340071678162, + "learning_rate": 0.00019967859084682034, + "loss": 2.8149, + "step": 608 + }, + { + "epoch": 0.04914857557904931, + "grad_norm": 1.0400227308273315, + "learning_rate": 0.00019967732489261609, + "loss": 2.8489, + "step": 609 + }, + { + "epoch": 0.04922927931563231, + "grad_norm": 0.8978031277656555, + "learning_rate": 0.00019967605645419237, + "loss": 2.8599, + "step": 610 + }, + { + "epoch": 0.04930998305221532, + "grad_norm": 0.9982689619064331, + "learning_rate": 0.00019967478553158073, + "loss": 2.9024, + "step": 611 + }, + { + "epoch": 0.04939068678879832, + "grad_norm": 1.0695222616195679, + "learning_rate": 0.00019967351212481292, + "loss": 2.8483, + "step": 612 + }, + { + "epoch": 0.049471390525381326, + "grad_norm": 1.0615525245666504, + "learning_rate": 0.0001996722362339206, + "loss": 2.806, + "step": 613 + }, + { + "epoch": 0.04955209426196433, + "grad_norm": 0.9624890089035034, + "learning_rate": 0.0001996709578589356, + "loss": 2.8641, + "step": 614 + }, + { + "epoch": 0.049632797998547334, + "grad_norm": 0.9156595468521118, + "learning_rate": 0.00019966967699988985, + "loss": 2.7991, + "step": 615 + }, + { + "epoch": 0.04971350173513034, + "grad_norm": 0.8687645196914673, + "learning_rate": 0.00019966839365681517, + "loss": 2.774, + "step": 616 + }, + { + "epoch": 0.04979420547171334, + "grad_norm": 0.9175437688827515, + "learning_rate": 0.00019966710782974359, + "loss": 2.8064, + "step": 617 + }, + { + "epoch": 0.04987490920829635, + "grad_norm": 0.8897463083267212, + "learning_rate": 0.00019966581951870715, + "loss": 2.8487, + "step": 618 + }, + { + "epoch": 0.049955612944879345, + "grad_norm": 0.8908397555351257, + "learning_rate": 0.00019966452872373795, + "loss": 2.8523, + "step": 619 + }, + { + "epoch": 0.05003631668146235, + "grad_norm": 0.95484858751297, + "learning_rate": 0.00019966323544486818, + "loss": 2.8471, + "step": 620 + }, + { + "epoch": 0.050117020418045354, + "grad_norm": 0.9995831251144409, + "learning_rate": 0.00019966193968213008, + "loss": 2.8341, + "step": 621 + }, + { + "epoch": 0.05019772415462836, + "grad_norm": 0.8731706142425537, + "learning_rate": 0.00019966064143555587, + "loss": 2.8491, + "step": 622 + }, + { + "epoch": 0.05027842789121136, + "grad_norm": 0.9213298559188843, + "learning_rate": 0.000199659340705178, + "loss": 2.8256, + "step": 623 + }, + { + "epoch": 0.050359131627794367, + "grad_norm": 0.9565179347991943, + "learning_rate": 0.00019965803749102885, + "loss": 2.8177, + "step": 624 + }, + { + "epoch": 0.05043983536437737, + "grad_norm": 1.0076881647109985, + "learning_rate": 0.00019965673179314086, + "loss": 2.7812, + "step": 625 + }, + { + "epoch": 0.050520539100960375, + "grad_norm": 0.989647388458252, + "learning_rate": 0.00019965542361154666, + "loss": 2.9226, + "step": 626 + }, + { + "epoch": 0.05060124283754338, + "grad_norm": 0.9671580791473389, + "learning_rate": 0.00019965411294627878, + "loss": 2.8204, + "step": 627 + }, + { + "epoch": 0.050681946574126384, + "grad_norm": 0.9275986552238464, + "learning_rate": 0.00019965279979736989, + "loss": 2.8481, + "step": 628 + }, + { + "epoch": 0.05076265031070939, + "grad_norm": 0.9949543476104736, + "learning_rate": 0.00019965148416485273, + "loss": 2.8606, + "step": 629 + }, + { + "epoch": 0.05084335404729239, + "grad_norm": 0.9506482481956482, + "learning_rate": 0.0001996501660487601, + "loss": 2.8088, + "step": 630 + }, + { + "epoch": 0.0509240577838754, + "grad_norm": 0.9147887229919434, + "learning_rate": 0.00019964884544912488, + "loss": 2.7997, + "step": 631 + }, + { + "epoch": 0.051004761520458394, + "grad_norm": 0.8964840769767761, + "learning_rate": 0.00019964752236597993, + "loss": 2.8342, + "step": 632 + }, + { + "epoch": 0.0510854652570414, + "grad_norm": 0.931811511516571, + "learning_rate": 0.00019964619679935824, + "loss": 2.8229, + "step": 633 + }, + { + "epoch": 0.0511661689936244, + "grad_norm": 0.8634423017501831, + "learning_rate": 0.00019964486874929282, + "loss": 2.803, + "step": 634 + }, + { + "epoch": 0.05124687273020741, + "grad_norm": 0.892223596572876, + "learning_rate": 0.00019964353821581683, + "loss": 2.802, + "step": 635 + }, + { + "epoch": 0.05132757646679041, + "grad_norm": 0.8373630046844482, + "learning_rate": 0.00019964220519896338, + "loss": 2.7693, + "step": 636 + }, + { + "epoch": 0.051408280203373416, + "grad_norm": 0.8729730248451233, + "learning_rate": 0.0001996408696987657, + "loss": 2.8467, + "step": 637 + }, + { + "epoch": 0.05148898393995642, + "grad_norm": 0.8994413614273071, + "learning_rate": 0.0001996395317152571, + "loss": 2.8837, + "step": 638 + }, + { + "epoch": 0.051569687676539425, + "grad_norm": 0.9146113395690918, + "learning_rate": 0.0001996381912484709, + "loss": 2.8189, + "step": 639 + }, + { + "epoch": 0.05165039141312243, + "grad_norm": 0.9330562353134155, + "learning_rate": 0.00019963684829844052, + "loss": 2.7873, + "step": 640 + }, + { + "epoch": 0.05173109514970543, + "grad_norm": 0.9076224565505981, + "learning_rate": 0.00019963550286519944, + "loss": 2.802, + "step": 641 + }, + { + "epoch": 0.05181179888628844, + "grad_norm": 0.9580704569816589, + "learning_rate": 0.00019963415494878115, + "loss": 2.8173, + "step": 642 + }, + { + "epoch": 0.05189250262287144, + "grad_norm": 0.9291248917579651, + "learning_rate": 0.00019963280454921928, + "loss": 2.7866, + "step": 643 + }, + { + "epoch": 0.05197320635945444, + "grad_norm": 0.9815296530723572, + "learning_rate": 0.0001996314516665475, + "loss": 2.7903, + "step": 644 + }, + { + "epoch": 0.052053910096037444, + "grad_norm": 0.9461820721626282, + "learning_rate": 0.00019963009630079949, + "loss": 2.7854, + "step": 645 + }, + { + "epoch": 0.05213461383262045, + "grad_norm": 0.9660771489143372, + "learning_rate": 0.00019962873845200908, + "loss": 2.9187, + "step": 646 + }, + { + "epoch": 0.05221531756920345, + "grad_norm": 0.8987802863121033, + "learning_rate": 0.00019962737812021002, + "loss": 2.8854, + "step": 647 + }, + { + "epoch": 0.05229602130578646, + "grad_norm": 0.9810429215431213, + "learning_rate": 0.0001996260153054363, + "loss": 2.8974, + "step": 648 + }, + { + "epoch": 0.05237672504236946, + "grad_norm": 0.8185738325119019, + "learning_rate": 0.00019962465000772183, + "loss": 2.797, + "step": 649 + }, + { + "epoch": 0.052457428778952465, + "grad_norm": 0.8976237773895264, + "learning_rate": 0.0001996232822271007, + "loss": 2.8557, + "step": 650 + }, + { + "epoch": 0.05253813251553547, + "grad_norm": 0.8591496348381042, + "learning_rate": 0.0001996219119636069, + "loss": 2.8521, + "step": 651 + }, + { + "epoch": 0.052618836252118474, + "grad_norm": 0.8907031416893005, + "learning_rate": 0.00019962053921727472, + "loss": 2.8117, + "step": 652 + }, + { + "epoch": 0.05269953998870148, + "grad_norm": 0.9034241437911987, + "learning_rate": 0.00019961916398813823, + "loss": 2.741, + "step": 653 + }, + { + "epoch": 0.05278024372528448, + "grad_norm": 0.8284802436828613, + "learning_rate": 0.00019961778627623176, + "loss": 2.776, + "step": 654 + }, + { + "epoch": 0.05286094746186749, + "grad_norm": 0.8459529876708984, + "learning_rate": 0.00019961640608158967, + "loss": 2.8027, + "step": 655 + }, + { + "epoch": 0.05294165119845049, + "grad_norm": 0.9720042943954468, + "learning_rate": 0.00019961502340424636, + "loss": 2.9086, + "step": 656 + }, + { + "epoch": 0.05302235493503349, + "grad_norm": 0.8581427335739136, + "learning_rate": 0.00019961363824423626, + "loss": 2.8347, + "step": 657 + }, + { + "epoch": 0.05310305867161649, + "grad_norm": 0.9545331597328186, + "learning_rate": 0.00019961225060159386, + "loss": 2.828, + "step": 658 + }, + { + "epoch": 0.0531837624081995, + "grad_norm": 1.0303562879562378, + "learning_rate": 0.00019961086047635385, + "loss": 2.8461, + "step": 659 + }, + { + "epoch": 0.0532644661447825, + "grad_norm": 0.86605304479599, + "learning_rate": 0.0001996094678685508, + "loss": 2.8355, + "step": 660 + }, + { + "epoch": 0.053345169881365506, + "grad_norm": 0.8146334886550903, + "learning_rate": 0.0001996080727782194, + "loss": 2.8638, + "step": 661 + }, + { + "epoch": 0.05342587361794851, + "grad_norm": 0.9434560537338257, + "learning_rate": 0.00019960667520539446, + "loss": 2.8196, + "step": 662 + }, + { + "epoch": 0.053506577354531515, + "grad_norm": 0.9362602829933167, + "learning_rate": 0.00019960527515011084, + "loss": 2.8452, + "step": 663 + }, + { + "epoch": 0.05358728109111452, + "grad_norm": 0.828713059425354, + "learning_rate": 0.00019960387261240334, + "loss": 2.8079, + "step": 664 + }, + { + "epoch": 0.053667984827697524, + "grad_norm": 0.8610214591026306, + "learning_rate": 0.00019960246759230697, + "loss": 2.8197, + "step": 665 + }, + { + "epoch": 0.05374868856428053, + "grad_norm": 0.8913124799728394, + "learning_rate": 0.00019960106008985674, + "loss": 2.8392, + "step": 666 + }, + { + "epoch": 0.05382939230086353, + "grad_norm": 0.8109759092330933, + "learning_rate": 0.00019959965010508778, + "loss": 2.7961, + "step": 667 + }, + { + "epoch": 0.05391009603744654, + "grad_norm": 0.8714832663536072, + "learning_rate": 0.00019959823763803514, + "loss": 2.7984, + "step": 668 + }, + { + "epoch": 0.05399079977402954, + "grad_norm": 0.9008125066757202, + "learning_rate": 0.00019959682268873408, + "loss": 2.8319, + "step": 669 + }, + { + "epoch": 0.05407150351061254, + "grad_norm": 0.8718584775924683, + "learning_rate": 0.00019959540525721985, + "loss": 2.7973, + "step": 670 + }, + { + "epoch": 0.05415220724719554, + "grad_norm": 0.8666327595710754, + "learning_rate": 0.00019959398534352774, + "loss": 2.8296, + "step": 671 + }, + { + "epoch": 0.05423291098377855, + "grad_norm": 0.9755229949951172, + "learning_rate": 0.00019959256294769322, + "loss": 2.8358, + "step": 672 + }, + { + "epoch": 0.05431361472036155, + "grad_norm": 1.193708062171936, + "learning_rate": 0.0001995911380697517, + "loss": 2.7672, + "step": 673 + }, + { + "epoch": 0.054394318456944556, + "grad_norm": 0.9104088544845581, + "learning_rate": 0.00019958971070973866, + "loss": 2.8389, + "step": 674 + }, + { + "epoch": 0.05447502219352756, + "grad_norm": 0.9266251921653748, + "learning_rate": 0.0001995882808676897, + "loss": 2.8226, + "step": 675 + }, + { + "epoch": 0.054555725930110564, + "grad_norm": 1.1161282062530518, + "learning_rate": 0.00019958684854364046, + "loss": 2.8236, + "step": 676 + }, + { + "epoch": 0.05463642966669357, + "grad_norm": 0.9200586080551147, + "learning_rate": 0.00019958541373762666, + "loss": 2.8074, + "step": 677 + }, + { + "epoch": 0.05471713340327657, + "grad_norm": 1.0372560024261475, + "learning_rate": 0.000199583976449684, + "loss": 2.815, + "step": 678 + }, + { + "epoch": 0.05479783713985958, + "grad_norm": 0.8822301030158997, + "learning_rate": 0.0001995825366798483, + "loss": 2.7985, + "step": 679 + }, + { + "epoch": 0.05487854087644258, + "grad_norm": 0.9226076006889343, + "learning_rate": 0.00019958109442815553, + "loss": 2.7649, + "step": 680 + }, + { + "epoch": 0.054959244613025586, + "grad_norm": 0.8769479990005493, + "learning_rate": 0.00019957964969464156, + "loss": 2.8483, + "step": 681 + }, + { + "epoch": 0.05503994834960859, + "grad_norm": 0.8601027727127075, + "learning_rate": 0.0001995782024793424, + "loss": 2.8072, + "step": 682 + }, + { + "epoch": 0.05512065208619159, + "grad_norm": 0.9684911370277405, + "learning_rate": 0.00019957675278229416, + "loss": 2.8693, + "step": 683 + }, + { + "epoch": 0.05520135582277459, + "grad_norm": 0.9119890928268433, + "learning_rate": 0.00019957530060353294, + "loss": 2.853, + "step": 684 + }, + { + "epoch": 0.055282059559357596, + "grad_norm": 0.9588247537612915, + "learning_rate": 0.0001995738459430949, + "loss": 2.8435, + "step": 685 + }, + { + "epoch": 0.0553627632959406, + "grad_norm": 0.8317441940307617, + "learning_rate": 0.00019957238880101636, + "loss": 2.8208, + "step": 686 + }, + { + "epoch": 0.055443467032523605, + "grad_norm": 0.92695152759552, + "learning_rate": 0.00019957092917733361, + "loss": 2.8378, + "step": 687 + }, + { + "epoch": 0.05552417076910661, + "grad_norm": 0.8908315300941467, + "learning_rate": 0.00019956946707208305, + "loss": 2.8041, + "step": 688 + }, + { + "epoch": 0.055604874505689614, + "grad_norm": 0.9787055253982544, + "learning_rate": 0.00019956800248530107, + "loss": 2.8604, + "step": 689 + }, + { + "epoch": 0.05568557824227262, + "grad_norm": 0.8707631826400757, + "learning_rate": 0.00019956653541702415, + "loss": 2.7763, + "step": 690 + }, + { + "epoch": 0.05576628197885562, + "grad_norm": 1.0059715509414673, + "learning_rate": 0.00019956506586728896, + "loss": 2.8267, + "step": 691 + }, + { + "epoch": 0.05584698571543863, + "grad_norm": 0.88490891456604, + "learning_rate": 0.00019956359383613203, + "loss": 2.8278, + "step": 692 + }, + { + "epoch": 0.05592768945202163, + "grad_norm": 0.9527923464775085, + "learning_rate": 0.00019956211932359007, + "loss": 2.8251, + "step": 693 + }, + { + "epoch": 0.056008393188604635, + "grad_norm": 0.9612617492675781, + "learning_rate": 0.00019956064232969987, + "loss": 2.8148, + "step": 694 + }, + { + "epoch": 0.05608909692518763, + "grad_norm": 0.9261285066604614, + "learning_rate": 0.0001995591628544982, + "loss": 2.8176, + "step": 695 + }, + { + "epoch": 0.05616980066177064, + "grad_norm": 0.9766250252723694, + "learning_rate": 0.0001995576808980219, + "loss": 2.7968, + "step": 696 + }, + { + "epoch": 0.05625050439835364, + "grad_norm": 0.9287495017051697, + "learning_rate": 0.00019955619646030802, + "loss": 2.7679, + "step": 697 + }, + { + "epoch": 0.056331208134936646, + "grad_norm": 0.9182924032211304, + "learning_rate": 0.00019955470954139345, + "loss": 2.8295, + "step": 698 + }, + { + "epoch": 0.05641191187151965, + "grad_norm": 0.8650663495063782, + "learning_rate": 0.00019955322014131524, + "loss": 2.7928, + "step": 699 + }, + { + "epoch": 0.056492615608102655, + "grad_norm": 0.9543934464454651, + "learning_rate": 0.00019955172826011062, + "loss": 2.8049, + "step": 700 + }, + { + "epoch": 0.05657331934468566, + "grad_norm": 0.9060636162757874, + "learning_rate": 0.00019955023389781664, + "loss": 2.871, + "step": 701 + }, + { + "epoch": 0.05665402308126866, + "grad_norm": 0.9824137091636658, + "learning_rate": 0.00019954873705447065, + "loss": 2.816, + "step": 702 + }, + { + "epoch": 0.05673472681785167, + "grad_norm": 0.8831053972244263, + "learning_rate": 0.00019954723773010988, + "loss": 2.8207, + "step": 703 + }, + { + "epoch": 0.05681543055443467, + "grad_norm": 0.9603390693664551, + "learning_rate": 0.00019954573592477173, + "loss": 2.831, + "step": 704 + }, + { + "epoch": 0.056896134291017676, + "grad_norm": 0.911556601524353, + "learning_rate": 0.00019954423163849364, + "loss": 2.7679, + "step": 705 + }, + { + "epoch": 0.05697683802760068, + "grad_norm": 0.8558745384216309, + "learning_rate": 0.00019954272487131305, + "loss": 2.7934, + "step": 706 + }, + { + "epoch": 0.057057541764183685, + "grad_norm": 1.0175282955169678, + "learning_rate": 0.00019954121562326758, + "loss": 2.905, + "step": 707 + }, + { + "epoch": 0.05713824550076668, + "grad_norm": 0.9480875730514526, + "learning_rate": 0.00019953970389439483, + "loss": 2.85, + "step": 708 + }, + { + "epoch": 0.05721894923734969, + "grad_norm": 0.9271003603935242, + "learning_rate": 0.0001995381896847324, + "loss": 2.8237, + "step": 709 + }, + { + "epoch": 0.05729965297393269, + "grad_norm": 0.8439653515815735, + "learning_rate": 0.00019953667299431815, + "loss": 2.821, + "step": 710 + }, + { + "epoch": 0.057380356710515695, + "grad_norm": 0.9750552177429199, + "learning_rate": 0.0001995351538231898, + "loss": 2.8613, + "step": 711 + }, + { + "epoch": 0.0574610604470987, + "grad_norm": 0.9409266710281372, + "learning_rate": 0.0001995336321713852, + "loss": 2.7876, + "step": 712 + }, + { + "epoch": 0.057541764183681704, + "grad_norm": 0.811138927936554, + "learning_rate": 0.00019953210803894233, + "loss": 2.7957, + "step": 713 + }, + { + "epoch": 0.05762246792026471, + "grad_norm": 0.9504825472831726, + "learning_rate": 0.00019953058142589916, + "loss": 2.8536, + "step": 714 + }, + { + "epoch": 0.05770317165684771, + "grad_norm": 0.8183554410934448, + "learning_rate": 0.00019952905233229368, + "loss": 2.7697, + "step": 715 + }, + { + "epoch": 0.05778387539343072, + "grad_norm": 1.1146113872528076, + "learning_rate": 0.0001995275207581641, + "loss": 2.8629, + "step": 716 + }, + { + "epoch": 0.05786457913001372, + "grad_norm": 0.8797986507415771, + "learning_rate": 0.00019952598670354852, + "loss": 2.7962, + "step": 717 + }, + { + "epoch": 0.057945282866596726, + "grad_norm": 0.8771101832389832, + "learning_rate": 0.00019952445016848517, + "loss": 2.8323, + "step": 718 + }, + { + "epoch": 0.05802598660317973, + "grad_norm": 0.9003355503082275, + "learning_rate": 0.00019952291115301235, + "loss": 2.777, + "step": 719 + }, + { + "epoch": 0.058106690339762734, + "grad_norm": 0.846125602722168, + "learning_rate": 0.00019952136965716846, + "loss": 2.7875, + "step": 720 + }, + { + "epoch": 0.05818739407634573, + "grad_norm": 0.908833920955658, + "learning_rate": 0.00019951982568099187, + "loss": 2.7975, + "step": 721 + }, + { + "epoch": 0.058268097812928736, + "grad_norm": 0.8616230487823486, + "learning_rate": 0.00019951827922452106, + "loss": 2.7486, + "step": 722 + }, + { + "epoch": 0.05834880154951174, + "grad_norm": 0.8791850805282593, + "learning_rate": 0.00019951673028779462, + "loss": 2.8301, + "step": 723 + }, + { + "epoch": 0.058429505286094745, + "grad_norm": 0.9437321424484253, + "learning_rate": 0.00019951517887085112, + "loss": 2.7956, + "step": 724 + }, + { + "epoch": 0.05851020902267775, + "grad_norm": 0.9263394474983215, + "learning_rate": 0.00019951362497372922, + "loss": 2.867, + "step": 725 + }, + { + "epoch": 0.05859091275926075, + "grad_norm": 0.9442462921142578, + "learning_rate": 0.00019951206859646764, + "loss": 2.8447, + "step": 726 + }, + { + "epoch": 0.05867161649584376, + "grad_norm": 0.9286711812019348, + "learning_rate": 0.0001995105097391052, + "loss": 2.7588, + "step": 727 + }, + { + "epoch": 0.05875232023242676, + "grad_norm": 0.9338774085044861, + "learning_rate": 0.00019950894840168072, + "loss": 2.7394, + "step": 728 + }, + { + "epoch": 0.058833023969009766, + "grad_norm": 0.8880760073661804, + "learning_rate": 0.00019950738458423314, + "loss": 2.7949, + "step": 729 + }, + { + "epoch": 0.05891372770559277, + "grad_norm": 1.0091183185577393, + "learning_rate": 0.00019950581828680143, + "loss": 2.8633, + "step": 730 + }, + { + "epoch": 0.058994431442175775, + "grad_norm": 0.8657729625701904, + "learning_rate": 0.0001995042495094246, + "loss": 2.8649, + "step": 731 + }, + { + "epoch": 0.05907513517875878, + "grad_norm": 1.0084047317504883, + "learning_rate": 0.00019950267825214176, + "loss": 2.8422, + "step": 732 + }, + { + "epoch": 0.059155838915341784, + "grad_norm": 0.9096506237983704, + "learning_rate": 0.00019950110451499208, + "loss": 2.7908, + "step": 733 + }, + { + "epoch": 0.05923654265192478, + "grad_norm": 1.1338937282562256, + "learning_rate": 0.0001994995282980148, + "loss": 2.8093, + "step": 734 + }, + { + "epoch": 0.059317246388507786, + "grad_norm": 0.8813811540603638, + "learning_rate": 0.00019949794960124915, + "loss": 2.8866, + "step": 735 + }, + { + "epoch": 0.05939795012509079, + "grad_norm": 0.8457592129707336, + "learning_rate": 0.00019949636842473453, + "loss": 2.7744, + "step": 736 + }, + { + "epoch": 0.059478653861673794, + "grad_norm": 0.8731856346130371, + "learning_rate": 0.0001994947847685103, + "loss": 2.7822, + "step": 737 + }, + { + "epoch": 0.0595593575982568, + "grad_norm": 0.8915185332298279, + "learning_rate": 0.00019949319863261597, + "loss": 2.773, + "step": 738 + }, + { + "epoch": 0.0596400613348398, + "grad_norm": 0.9478987455368042, + "learning_rate": 0.00019949161001709106, + "loss": 2.8462, + "step": 739 + }, + { + "epoch": 0.05972076507142281, + "grad_norm": 0.8903716206550598, + "learning_rate": 0.00019949001892197515, + "loss": 2.7741, + "step": 740 + }, + { + "epoch": 0.05980146880800581, + "grad_norm": 0.8870117664337158, + "learning_rate": 0.00019948842534730786, + "loss": 2.8255, + "step": 741 + }, + { + "epoch": 0.059882172544588816, + "grad_norm": 1.0766080617904663, + "learning_rate": 0.00019948682929312898, + "loss": 2.8865, + "step": 742 + }, + { + "epoch": 0.05996287628117182, + "grad_norm": 0.846447229385376, + "learning_rate": 0.00019948523075947824, + "loss": 2.8441, + "step": 743 + }, + { + "epoch": 0.060043580017754825, + "grad_norm": 0.9847991466522217, + "learning_rate": 0.00019948362974639552, + "loss": 2.8099, + "step": 744 + }, + { + "epoch": 0.06012428375433783, + "grad_norm": 0.9170514941215515, + "learning_rate": 0.00019948202625392068, + "loss": 2.8797, + "step": 745 + }, + { + "epoch": 0.060204987490920826, + "grad_norm": 0.8564898371696472, + "learning_rate": 0.0001994804202820937, + "loss": 2.7993, + "step": 746 + }, + { + "epoch": 0.06028569122750383, + "grad_norm": 0.8527392148971558, + "learning_rate": 0.00019947881183095457, + "loss": 2.7816, + "step": 747 + }, + { + "epoch": 0.060366394964086835, + "grad_norm": 0.9170876145362854, + "learning_rate": 0.00019947720090054342, + "loss": 2.8031, + "step": 748 + }, + { + "epoch": 0.06044709870066984, + "grad_norm": 0.8891414403915405, + "learning_rate": 0.0001994755874909004, + "loss": 2.8072, + "step": 749 + }, + { + "epoch": 0.060527802437252844, + "grad_norm": 0.8853670358657837, + "learning_rate": 0.0001994739716020657, + "loss": 2.8857, + "step": 750 + }, + { + "epoch": 0.06060850617383585, + "grad_norm": 0.9011211395263672, + "learning_rate": 0.0001994723532340796, + "loss": 2.8519, + "step": 751 + }, + { + "epoch": 0.06068920991041885, + "grad_norm": 0.8843330144882202, + "learning_rate": 0.00019947073238698243, + "loss": 2.7882, + "step": 752 + }, + { + "epoch": 0.06076991364700186, + "grad_norm": 0.8712944984436035, + "learning_rate": 0.00019946910906081463, + "loss": 2.791, + "step": 753 + }, + { + "epoch": 0.06085061738358486, + "grad_norm": 0.8296090364456177, + "learning_rate": 0.00019946748325561656, + "loss": 2.8073, + "step": 754 + }, + { + "epoch": 0.060931321120167865, + "grad_norm": 0.9239117503166199, + "learning_rate": 0.00019946585497142885, + "loss": 2.8209, + "step": 755 + }, + { + "epoch": 0.06101202485675087, + "grad_norm": 0.8885170221328735, + "learning_rate": 0.000199464224208292, + "loss": 2.8391, + "step": 756 + }, + { + "epoch": 0.061092728593333874, + "grad_norm": 0.933720588684082, + "learning_rate": 0.0001994625909662467, + "loss": 2.7635, + "step": 757 + }, + { + "epoch": 0.06117343232991688, + "grad_norm": 0.9751253724098206, + "learning_rate": 0.00019946095524533362, + "loss": 2.7933, + "step": 758 + }, + { + "epoch": 0.061254136066499876, + "grad_norm": 0.9469670057296753, + "learning_rate": 0.00019945931704559353, + "loss": 2.7652, + "step": 759 + }, + { + "epoch": 0.06133483980308288, + "grad_norm": 0.8559684157371521, + "learning_rate": 0.00019945767636706728, + "loss": 2.8258, + "step": 760 + }, + { + "epoch": 0.061415543539665884, + "grad_norm": 1.021478295326233, + "learning_rate": 0.00019945603320979574, + "loss": 2.8047, + "step": 761 + }, + { + "epoch": 0.06149624727624889, + "grad_norm": 0.8421681523323059, + "learning_rate": 0.00019945438757381986, + "loss": 2.8233, + "step": 762 + }, + { + "epoch": 0.06157695101283189, + "grad_norm": 0.900654137134552, + "learning_rate": 0.0001994527394591807, + "loss": 2.7591, + "step": 763 + }, + { + "epoch": 0.0616576547494149, + "grad_norm": 0.878300666809082, + "learning_rate": 0.0001994510888659193, + "loss": 2.715, + "step": 764 + }, + { + "epoch": 0.0617383584859979, + "grad_norm": 0.9170855283737183, + "learning_rate": 0.00019944943579407678, + "loss": 2.8604, + "step": 765 + }, + { + "epoch": 0.061819062222580906, + "grad_norm": 0.8532859683036804, + "learning_rate": 0.00019944778024369434, + "loss": 2.8124, + "step": 766 + }, + { + "epoch": 0.06189976595916391, + "grad_norm": 0.8549049496650696, + "learning_rate": 0.00019944612221481332, + "loss": 2.8066, + "step": 767 + }, + { + "epoch": 0.061980469695746915, + "grad_norm": 0.9602857828140259, + "learning_rate": 0.00019944446170747492, + "loss": 2.8424, + "step": 768 + }, + { + "epoch": 0.06206117343232992, + "grad_norm": 0.910953164100647, + "learning_rate": 0.0001994427987217206, + "loss": 2.8093, + "step": 769 + }, + { + "epoch": 0.06214187716891292, + "grad_norm": 0.8536386489868164, + "learning_rate": 0.0001994411332575918, + "loss": 2.802, + "step": 770 + }, + { + "epoch": 0.06222258090549593, + "grad_norm": 0.9166232347488403, + "learning_rate": 0.00019943946531513, + "loss": 2.783, + "step": 771 + }, + { + "epoch": 0.062303284642078925, + "grad_norm": 0.9954056739807129, + "learning_rate": 0.00019943779489437678, + "loss": 2.8198, + "step": 772 + }, + { + "epoch": 0.06238398837866193, + "grad_norm": 0.8527171015739441, + "learning_rate": 0.0001994361219953738, + "loss": 2.8159, + "step": 773 + }, + { + "epoch": 0.062464692115244934, + "grad_norm": 0.8951592445373535, + "learning_rate": 0.00019943444661816274, + "loss": 2.7969, + "step": 774 + }, + { + "epoch": 0.06254539585182795, + "grad_norm": 0.9348207116127014, + "learning_rate": 0.00019943276876278532, + "loss": 2.8403, + "step": 775 + }, + { + "epoch": 0.06262609958841095, + "grad_norm": 0.866318941116333, + "learning_rate": 0.00019943108842928342, + "loss": 2.7886, + "step": 776 + }, + { + "epoch": 0.06270680332499395, + "grad_norm": 0.8571285605430603, + "learning_rate": 0.00019942940561769884, + "loss": 2.771, + "step": 777 + }, + { + "epoch": 0.06278750706157694, + "grad_norm": 0.8384295105934143, + "learning_rate": 0.00019942772032807357, + "loss": 2.7885, + "step": 778 + }, + { + "epoch": 0.06286821079815995, + "grad_norm": 0.9934808611869812, + "learning_rate": 0.00019942603256044961, + "loss": 2.8399, + "step": 779 + }, + { + "epoch": 0.06294891453474295, + "grad_norm": 0.8275915384292603, + "learning_rate": 0.00019942434231486902, + "loss": 2.8983, + "step": 780 + }, + { + "epoch": 0.06302961827132596, + "grad_norm": 0.9073596000671387, + "learning_rate": 0.0001994226495913739, + "loss": 2.7886, + "step": 781 + }, + { + "epoch": 0.06311032200790896, + "grad_norm": 0.9091461300849915, + "learning_rate": 0.00019942095439000646, + "loss": 2.814, + "step": 782 + }, + { + "epoch": 0.06319102574449197, + "grad_norm": 0.9356934428215027, + "learning_rate": 0.000199419256710809, + "loss": 2.8238, + "step": 783 + }, + { + "epoch": 0.06327172948107497, + "grad_norm": 0.883514940738678, + "learning_rate": 0.00019941755655382374, + "loss": 2.7912, + "step": 784 + }, + { + "epoch": 0.06335243321765797, + "grad_norm": 0.8770506381988525, + "learning_rate": 0.00019941585391909308, + "loss": 2.7774, + "step": 785 + }, + { + "epoch": 0.06343313695424098, + "grad_norm": 0.8891726136207581, + "learning_rate": 0.00019941414880665948, + "loss": 2.7975, + "step": 786 + }, + { + "epoch": 0.06351384069082398, + "grad_norm": 0.9280585050582886, + "learning_rate": 0.00019941244121656545, + "loss": 2.9468, + "step": 787 + }, + { + "epoch": 0.06359454442740699, + "grad_norm": 0.8545510768890381, + "learning_rate": 0.00019941073114885347, + "loss": 2.8165, + "step": 788 + }, + { + "epoch": 0.06367524816398999, + "grad_norm": 0.8631312847137451, + "learning_rate": 0.0001994090186035662, + "loss": 2.7955, + "step": 789 + }, + { + "epoch": 0.063755951900573, + "grad_norm": 0.8883851170539856, + "learning_rate": 0.00019940730358074634, + "loss": 2.7828, + "step": 790 + }, + { + "epoch": 0.063836655637156, + "grad_norm": 0.8421074748039246, + "learning_rate": 0.00019940558608043664, + "loss": 2.7999, + "step": 791 + }, + { + "epoch": 0.063917359373739, + "grad_norm": 0.918134868144989, + "learning_rate": 0.0001994038661026799, + "loss": 2.7888, + "step": 792 + }, + { + "epoch": 0.06399806311032201, + "grad_norm": 0.8513637781143188, + "learning_rate": 0.00019940214364751896, + "loss": 2.7719, + "step": 793 + }, + { + "epoch": 0.06407876684690501, + "grad_norm": 0.9181898236274719, + "learning_rate": 0.00019940041871499675, + "loss": 2.8345, + "step": 794 + }, + { + "epoch": 0.06415947058348802, + "grad_norm": 0.8129134774208069, + "learning_rate": 0.00019939869130515626, + "loss": 2.7316, + "step": 795 + }, + { + "epoch": 0.06424017432007102, + "grad_norm": 0.8782191872596741, + "learning_rate": 0.00019939696141804057, + "loss": 2.7852, + "step": 796 + }, + { + "epoch": 0.06432087805665403, + "grad_norm": 0.9064851403236389, + "learning_rate": 0.00019939522905369276, + "loss": 2.8105, + "step": 797 + }, + { + "epoch": 0.06440158179323703, + "grad_norm": 0.9888454675674438, + "learning_rate": 0.00019939349421215603, + "loss": 2.8496, + "step": 798 + }, + { + "epoch": 0.06448228552982004, + "grad_norm": 0.8717427253723145, + "learning_rate": 0.0001993917568934736, + "loss": 2.8227, + "step": 799 + }, + { + "epoch": 0.06456298926640304, + "grad_norm": 0.922980010509491, + "learning_rate": 0.0001993900170976888, + "loss": 2.8571, + "step": 800 + }, + { + "epoch": 0.06464369300298604, + "grad_norm": 0.8311850428581238, + "learning_rate": 0.00019938827482484492, + "loss": 2.7905, + "step": 801 + }, + { + "epoch": 0.06472439673956905, + "grad_norm": 0.9274900555610657, + "learning_rate": 0.0001993865300749855, + "loss": 2.8526, + "step": 802 + }, + { + "epoch": 0.06480510047615205, + "grad_norm": 0.9072165489196777, + "learning_rate": 0.00019938478284815388, + "loss": 2.8384, + "step": 803 + }, + { + "epoch": 0.06488580421273504, + "grad_norm": 0.854099452495575, + "learning_rate": 0.0001993830331443937, + "loss": 2.8459, + "step": 804 + }, + { + "epoch": 0.06496650794931805, + "grad_norm": 0.824126660823822, + "learning_rate": 0.00019938128096374854, + "loss": 2.7845, + "step": 805 + }, + { + "epoch": 0.06504721168590105, + "grad_norm": 0.8570442795753479, + "learning_rate": 0.0001993795263062621, + "loss": 2.8446, + "step": 806 + }, + { + "epoch": 0.06512791542248406, + "grad_norm": 0.8998628854751587, + "learning_rate": 0.00019937776917197805, + "loss": 2.8604, + "step": 807 + }, + { + "epoch": 0.06520861915906706, + "grad_norm": 0.9189189076423645, + "learning_rate": 0.00019937600956094023, + "loss": 2.7866, + "step": 808 + }, + { + "epoch": 0.06528932289565006, + "grad_norm": 0.9471604824066162, + "learning_rate": 0.00019937424747319248, + "loss": 2.7619, + "step": 809 + }, + { + "epoch": 0.06537002663223307, + "grad_norm": 0.8507755994796753, + "learning_rate": 0.00019937248290877874, + "loss": 2.8259, + "step": 810 + }, + { + "epoch": 0.06545073036881607, + "grad_norm": 0.8800963759422302, + "learning_rate": 0.00019937071586774292, + "loss": 2.827, + "step": 811 + }, + { + "epoch": 0.06553143410539908, + "grad_norm": 0.8851124048233032, + "learning_rate": 0.00019936894635012915, + "loss": 2.793, + "step": 812 + }, + { + "epoch": 0.06561213784198208, + "grad_norm": 0.88127601146698, + "learning_rate": 0.00019936717435598144, + "loss": 2.8885, + "step": 813 + }, + { + "epoch": 0.06569284157856509, + "grad_norm": 0.9115073084831238, + "learning_rate": 0.000199365399885344, + "loss": 2.8278, + "step": 814 + }, + { + "epoch": 0.06577354531514809, + "grad_norm": 0.8722662925720215, + "learning_rate": 0.00019936362293826107, + "loss": 2.8125, + "step": 815 + }, + { + "epoch": 0.0658542490517311, + "grad_norm": 0.8332365155220032, + "learning_rate": 0.0001993618435147769, + "loss": 2.7682, + "step": 816 + }, + { + "epoch": 0.0659349527883141, + "grad_norm": 0.9524003863334656, + "learning_rate": 0.0001993600616149359, + "loss": 2.8166, + "step": 817 + }, + { + "epoch": 0.0660156565248971, + "grad_norm": 0.8402767181396484, + "learning_rate": 0.0001993582772387824, + "loss": 2.8192, + "step": 818 + }, + { + "epoch": 0.06609636026148011, + "grad_norm": 0.8589913249015808, + "learning_rate": 0.0001993564903863609, + "loss": 2.7785, + "step": 819 + }, + { + "epoch": 0.06617706399806311, + "grad_norm": 1.034550428390503, + "learning_rate": 0.00019935470105771598, + "loss": 2.8407, + "step": 820 + }, + { + "epoch": 0.06625776773464612, + "grad_norm": 0.856490969657898, + "learning_rate": 0.0001993529092528921, + "loss": 2.794, + "step": 821 + }, + { + "epoch": 0.06633847147122912, + "grad_norm": 0.897498369216919, + "learning_rate": 0.0001993511149719341, + "loss": 2.7959, + "step": 822 + }, + { + "epoch": 0.06641917520781213, + "grad_norm": 0.8495277166366577, + "learning_rate": 0.00019934931821488658, + "loss": 2.783, + "step": 823 + }, + { + "epoch": 0.06649987894439513, + "grad_norm": 0.8362239599227905, + "learning_rate": 0.00019934751898179436, + "loss": 2.8628, + "step": 824 + }, + { + "epoch": 0.06658058268097813, + "grad_norm": 0.8702061176300049, + "learning_rate": 0.00019934571727270225, + "loss": 2.7878, + "step": 825 + }, + { + "epoch": 0.06666128641756114, + "grad_norm": 0.8341560363769531, + "learning_rate": 0.0001993439130876552, + "loss": 2.7345, + "step": 826 + }, + { + "epoch": 0.06674199015414414, + "grad_norm": 0.880181074142456, + "learning_rate": 0.00019934210642669813, + "loss": 2.7789, + "step": 827 + }, + { + "epoch": 0.06682269389072715, + "grad_norm": 0.9088126420974731, + "learning_rate": 0.00019934029728987607, + "loss": 2.7893, + "step": 828 + }, + { + "epoch": 0.06690339762731014, + "grad_norm": 0.8087106347084045, + "learning_rate": 0.00019933848567723416, + "loss": 2.7967, + "step": 829 + }, + { + "epoch": 0.06698410136389314, + "grad_norm": 0.8970876336097717, + "learning_rate": 0.00019933667158881745, + "loss": 2.8837, + "step": 830 + }, + { + "epoch": 0.06706480510047615, + "grad_norm": 0.9344804883003235, + "learning_rate": 0.00019933485502467128, + "loss": 2.7754, + "step": 831 + }, + { + "epoch": 0.06714550883705915, + "grad_norm": 0.8119301795959473, + "learning_rate": 0.00019933303598484084, + "loss": 2.7919, + "step": 832 + }, + { + "epoch": 0.06722621257364216, + "grad_norm": 0.9370681047439575, + "learning_rate": 0.00019933121446937148, + "loss": 2.8011, + "step": 833 + }, + { + "epoch": 0.06730691631022516, + "grad_norm": 0.8358973264694214, + "learning_rate": 0.00019932939047830858, + "loss": 2.8339, + "step": 834 + }, + { + "epoch": 0.06738762004680816, + "grad_norm": 0.8565972447395325, + "learning_rate": 0.00019932756401169765, + "loss": 2.8269, + "step": 835 + }, + { + "epoch": 0.06746832378339117, + "grad_norm": 0.8405514359474182, + "learning_rate": 0.00019932573506958417, + "loss": 2.7621, + "step": 836 + }, + { + "epoch": 0.06754902751997417, + "grad_norm": 0.8217617869377136, + "learning_rate": 0.00019932390365201373, + "loss": 2.8363, + "step": 837 + }, + { + "epoch": 0.06762973125655718, + "grad_norm": 0.9121438264846802, + "learning_rate": 0.00019932206975903198, + "loss": 2.8033, + "step": 838 + }, + { + "epoch": 0.06771043499314018, + "grad_norm": 0.9113054871559143, + "learning_rate": 0.00019932023339068464, + "loss": 2.8696, + "step": 839 + }, + { + "epoch": 0.06779113872972319, + "grad_norm": 0.8638293743133545, + "learning_rate": 0.00019931839454701743, + "loss": 2.8008, + "step": 840 + }, + { + "epoch": 0.06787184246630619, + "grad_norm": 0.862932562828064, + "learning_rate": 0.0001993165532280762, + "loss": 2.8092, + "step": 841 + }, + { + "epoch": 0.0679525462028892, + "grad_norm": 0.9089607000350952, + "learning_rate": 0.00019931470943390685, + "loss": 2.8921, + "step": 842 + }, + { + "epoch": 0.0680332499394722, + "grad_norm": 0.9233555793762207, + "learning_rate": 0.00019931286316455537, + "loss": 2.9025, + "step": 843 + }, + { + "epoch": 0.0681139536760552, + "grad_norm": 0.9403017163276672, + "learning_rate": 0.0001993110144200677, + "loss": 2.7875, + "step": 844 + }, + { + "epoch": 0.06819465741263821, + "grad_norm": 0.9194290637969971, + "learning_rate": 0.00019930916320048996, + "loss": 2.8254, + "step": 845 + }, + { + "epoch": 0.06827536114922121, + "grad_norm": 0.8238688111305237, + "learning_rate": 0.00019930730950586828, + "loss": 2.82, + "step": 846 + }, + { + "epoch": 0.06835606488580422, + "grad_norm": 0.8560660481452942, + "learning_rate": 0.00019930545333624885, + "loss": 2.8516, + "step": 847 + }, + { + "epoch": 0.06843676862238722, + "grad_norm": 0.9127222895622253, + "learning_rate": 0.0001993035946916779, + "loss": 2.7674, + "step": 848 + }, + { + "epoch": 0.06851747235897022, + "grad_norm": 0.8679420948028564, + "learning_rate": 0.00019930173357220182, + "loss": 2.777, + "step": 849 + }, + { + "epoch": 0.06859817609555323, + "grad_norm": 0.9686945676803589, + "learning_rate": 0.00019929986997786699, + "loss": 2.7841, + "step": 850 + }, + { + "epoch": 0.06867887983213623, + "grad_norm": 0.8366333246231079, + "learning_rate": 0.00019929800390871977, + "loss": 2.7993, + "step": 851 + }, + { + "epoch": 0.06875958356871924, + "grad_norm": 0.8374585509300232, + "learning_rate": 0.00019929613536480675, + "loss": 2.7545, + "step": 852 + }, + { + "epoch": 0.06884028730530224, + "grad_norm": 0.9843763709068298, + "learning_rate": 0.00019929426434617451, + "loss": 2.8118, + "step": 853 + }, + { + "epoch": 0.06892099104188525, + "grad_norm": 0.8093454241752625, + "learning_rate": 0.0001992923908528696, + "loss": 2.7301, + "step": 854 + }, + { + "epoch": 0.06900169477846824, + "grad_norm": 0.8374418020248413, + "learning_rate": 0.00019929051488493877, + "loss": 2.7745, + "step": 855 + }, + { + "epoch": 0.06908239851505124, + "grad_norm": 0.869965136051178, + "learning_rate": 0.00019928863644242875, + "loss": 2.7637, + "step": 856 + }, + { + "epoch": 0.06916310225163425, + "grad_norm": 0.9280590415000916, + "learning_rate": 0.00019928675552538638, + "loss": 2.7792, + "step": 857 + }, + { + "epoch": 0.06924380598821725, + "grad_norm": 0.8624193668365479, + "learning_rate": 0.00019928487213385852, + "loss": 2.7755, + "step": 858 + }, + { + "epoch": 0.06932450972480025, + "grad_norm": 0.8379972577095032, + "learning_rate": 0.00019928298626789212, + "loss": 2.8563, + "step": 859 + }, + { + "epoch": 0.06940521346138326, + "grad_norm": 0.9272914528846741, + "learning_rate": 0.00019928109792753418, + "loss": 2.836, + "step": 860 + }, + { + "epoch": 0.06948591719796626, + "grad_norm": 0.9239040613174438, + "learning_rate": 0.00019927920711283175, + "loss": 2.7999, + "step": 861 + }, + { + "epoch": 0.06956662093454927, + "grad_norm": 0.9125113487243652, + "learning_rate": 0.00019927731382383195, + "loss": 2.8494, + "step": 862 + }, + { + "epoch": 0.06964732467113227, + "grad_norm": 0.8782855868339539, + "learning_rate": 0.00019927541806058198, + "loss": 2.767, + "step": 863 + }, + { + "epoch": 0.06972802840771528, + "grad_norm": 0.8815447092056274, + "learning_rate": 0.00019927351982312907, + "loss": 2.7877, + "step": 864 + }, + { + "epoch": 0.06980873214429828, + "grad_norm": 0.8555476069450378, + "learning_rate": 0.00019927161911152056, + "loss": 2.8057, + "step": 865 + }, + { + "epoch": 0.06988943588088128, + "grad_norm": 0.8562924265861511, + "learning_rate": 0.00019926971592580382, + "loss": 2.8049, + "step": 866 + }, + { + "epoch": 0.06997013961746429, + "grad_norm": 0.846503734588623, + "learning_rate": 0.00019926781026602625, + "loss": 2.8545, + "step": 867 + }, + { + "epoch": 0.07005084335404729, + "grad_norm": 0.8439623713493347, + "learning_rate": 0.00019926590213223535, + "loss": 2.7451, + "step": 868 + }, + { + "epoch": 0.0701315470906303, + "grad_norm": 0.8471730351448059, + "learning_rate": 0.00019926399152447868, + "loss": 2.7879, + "step": 869 + }, + { + "epoch": 0.0702122508272133, + "grad_norm": 0.8721400499343872, + "learning_rate": 0.00019926207844280387, + "loss": 2.8594, + "step": 870 + }, + { + "epoch": 0.0702929545637963, + "grad_norm": 0.8110925555229187, + "learning_rate": 0.0001992601628872586, + "loss": 2.7789, + "step": 871 + }, + { + "epoch": 0.07037365830037931, + "grad_norm": 0.9593119025230408, + "learning_rate": 0.0001992582448578906, + "loss": 2.8792, + "step": 872 + }, + { + "epoch": 0.07045436203696231, + "grad_norm": 0.8553354144096375, + "learning_rate": 0.00019925632435474765, + "loss": 2.8056, + "step": 873 + }, + { + "epoch": 0.07053506577354532, + "grad_norm": 0.8062612414360046, + "learning_rate": 0.00019925440137787768, + "loss": 2.7762, + "step": 874 + }, + { + "epoch": 0.07061576951012832, + "grad_norm": 0.8264921307563782, + "learning_rate": 0.00019925247592732858, + "loss": 2.8435, + "step": 875 + }, + { + "epoch": 0.07069647324671133, + "grad_norm": 0.7770401835441589, + "learning_rate": 0.00019925054800314828, + "loss": 2.7846, + "step": 876 + }, + { + "epoch": 0.07077717698329433, + "grad_norm": 0.8426765203475952, + "learning_rate": 0.0001992486176053849, + "loss": 2.782, + "step": 877 + }, + { + "epoch": 0.07085788071987734, + "grad_norm": 0.855330228805542, + "learning_rate": 0.00019924668473408655, + "loss": 2.8051, + "step": 878 + }, + { + "epoch": 0.07093858445646034, + "grad_norm": 0.8762049674987793, + "learning_rate": 0.00019924474938930135, + "loss": 2.7634, + "step": 879 + }, + { + "epoch": 0.07101928819304333, + "grad_norm": 0.9226812124252319, + "learning_rate": 0.0001992428115710776, + "loss": 2.8342, + "step": 880 + }, + { + "epoch": 0.07109999192962634, + "grad_norm": 0.9031660556793213, + "learning_rate": 0.00019924087127946353, + "loss": 2.7953, + "step": 881 + }, + { + "epoch": 0.07118069566620934, + "grad_norm": 1.0151792764663696, + "learning_rate": 0.00019923892851450757, + "loss": 2.8225, + "step": 882 + }, + { + "epoch": 0.07126139940279234, + "grad_norm": 0.9805678725242615, + "learning_rate": 0.00019923698327625806, + "loss": 2.7727, + "step": 883 + }, + { + "epoch": 0.07134210313937535, + "grad_norm": 0.8831729888916016, + "learning_rate": 0.00019923503556476356, + "loss": 2.7682, + "step": 884 + }, + { + "epoch": 0.07142280687595835, + "grad_norm": 1.0311404466629028, + "learning_rate": 0.00019923308538007253, + "loss": 2.8422, + "step": 885 + }, + { + "epoch": 0.07150351061254136, + "grad_norm": 0.8143388628959656, + "learning_rate": 0.0001992311327222336, + "loss": 2.7876, + "step": 886 + }, + { + "epoch": 0.07158421434912436, + "grad_norm": 0.877017617225647, + "learning_rate": 0.00019922917759129552, + "loss": 2.7486, + "step": 887 + }, + { + "epoch": 0.07166491808570737, + "grad_norm": 0.930646538734436, + "learning_rate": 0.0001992272199873069, + "loss": 2.8022, + "step": 888 + }, + { + "epoch": 0.07174562182229037, + "grad_norm": 0.934753954410553, + "learning_rate": 0.00019922525991031655, + "loss": 2.8485, + "step": 889 + }, + { + "epoch": 0.07182632555887337, + "grad_norm": 0.9564220905303955, + "learning_rate": 0.00019922329736037339, + "loss": 2.761, + "step": 890 + }, + { + "epoch": 0.07190702929545638, + "grad_norm": 0.9457311630249023, + "learning_rate": 0.00019922133233752626, + "loss": 2.8279, + "step": 891 + }, + { + "epoch": 0.07198773303203938, + "grad_norm": 0.9385658502578735, + "learning_rate": 0.0001992193648418242, + "loss": 2.8222, + "step": 892 + }, + { + "epoch": 0.07206843676862239, + "grad_norm": 1.0157524347305298, + "learning_rate": 0.00019921739487331616, + "loss": 2.9166, + "step": 893 + }, + { + "epoch": 0.07214914050520539, + "grad_norm": 0.9143860340118408, + "learning_rate": 0.00019921542243205132, + "loss": 2.8139, + "step": 894 + }, + { + "epoch": 0.0722298442417884, + "grad_norm": 0.8769320249557495, + "learning_rate": 0.00019921344751807878, + "loss": 2.8023, + "step": 895 + }, + { + "epoch": 0.0723105479783714, + "grad_norm": 0.9647517204284668, + "learning_rate": 0.0001992114701314478, + "loss": 2.8872, + "step": 896 + }, + { + "epoch": 0.0723912517149544, + "grad_norm": 1.025978446006775, + "learning_rate": 0.00019920949027220762, + "loss": 2.837, + "step": 897 + }, + { + "epoch": 0.07247195545153741, + "grad_norm": 0.8848521113395691, + "learning_rate": 0.0001992075079404076, + "loss": 2.7498, + "step": 898 + }, + { + "epoch": 0.07255265918812041, + "grad_norm": 0.9395595788955688, + "learning_rate": 0.0001992055231360972, + "loss": 2.8752, + "step": 899 + }, + { + "epoch": 0.07263336292470342, + "grad_norm": 0.8711572885513306, + "learning_rate": 0.00019920353585932578, + "loss": 2.8608, + "step": 900 + }, + { + "epoch": 0.07271406666128642, + "grad_norm": 0.8606846332550049, + "learning_rate": 0.00019920154611014295, + "loss": 2.829, + "step": 901 + }, + { + "epoch": 0.07279477039786943, + "grad_norm": 0.859354555606842, + "learning_rate": 0.0001991995538885983, + "loss": 2.8102, + "step": 902 + }, + { + "epoch": 0.07287547413445243, + "grad_norm": 0.9063243865966797, + "learning_rate": 0.00019919755919474143, + "loss": 2.8509, + "step": 903 + }, + { + "epoch": 0.07295617787103544, + "grad_norm": 0.8321940898895264, + "learning_rate": 0.00019919556202862207, + "loss": 2.796, + "step": 904 + }, + { + "epoch": 0.07303688160761844, + "grad_norm": 0.8875191807746887, + "learning_rate": 0.00019919356239029003, + "loss": 2.8672, + "step": 905 + }, + { + "epoch": 0.07311758534420143, + "grad_norm": 0.9028071165084839, + "learning_rate": 0.0001991915602797951, + "loss": 2.8926, + "step": 906 + }, + { + "epoch": 0.07319828908078443, + "grad_norm": 0.9449291825294495, + "learning_rate": 0.0001991895556971872, + "loss": 2.8159, + "step": 907 + }, + { + "epoch": 0.07327899281736744, + "grad_norm": 0.871576189994812, + "learning_rate": 0.0001991875486425163, + "loss": 2.8162, + "step": 908 + }, + { + "epoch": 0.07335969655395044, + "grad_norm": 0.818423330783844, + "learning_rate": 0.0001991855391158324, + "loss": 2.8882, + "step": 909 + }, + { + "epoch": 0.07344040029053345, + "grad_norm": 0.8802343606948853, + "learning_rate": 0.0001991835271171856, + "loss": 2.8245, + "step": 910 + }, + { + "epoch": 0.07352110402711645, + "grad_norm": 0.916023313999176, + "learning_rate": 0.000199181512646626, + "loss": 2.8966, + "step": 911 + }, + { + "epoch": 0.07360180776369946, + "grad_norm": 1.0663317441940308, + "learning_rate": 0.0001991794957042039, + "loss": 2.7736, + "step": 912 + }, + { + "epoch": 0.07368251150028246, + "grad_norm": 0.9212445616722107, + "learning_rate": 0.00019917747628996947, + "loss": 2.7924, + "step": 913 + }, + { + "epoch": 0.07376321523686546, + "grad_norm": 0.9785256385803223, + "learning_rate": 0.00019917545440397308, + "loss": 2.8021, + "step": 914 + }, + { + "epoch": 0.07384391897344847, + "grad_norm": 0.8510444760322571, + "learning_rate": 0.00019917343004626514, + "loss": 2.7991, + "step": 915 + }, + { + "epoch": 0.07392462271003147, + "grad_norm": 0.8967106342315674, + "learning_rate": 0.0001991714032168961, + "loss": 2.8838, + "step": 916 + }, + { + "epoch": 0.07400532644661448, + "grad_norm": 0.8940563797950745, + "learning_rate": 0.0001991693739159164, + "loss": 2.8124, + "step": 917 + }, + { + "epoch": 0.07408603018319748, + "grad_norm": 0.9270479679107666, + "learning_rate": 0.0001991673421433767, + "loss": 2.7627, + "step": 918 + }, + { + "epoch": 0.07416673391978049, + "grad_norm": 0.905805230140686, + "learning_rate": 0.0001991653078993276, + "loss": 2.781, + "step": 919 + }, + { + "epoch": 0.07424743765636349, + "grad_norm": 0.9295129179954529, + "learning_rate": 0.00019916327118381982, + "loss": 2.8332, + "step": 920 + }, + { + "epoch": 0.0743281413929465, + "grad_norm": 0.863331139087677, + "learning_rate": 0.00019916123199690408, + "loss": 2.8489, + "step": 921 + }, + { + "epoch": 0.0744088451295295, + "grad_norm": 0.9966896772384644, + "learning_rate": 0.00019915919033863127, + "loss": 2.9107, + "step": 922 + }, + { + "epoch": 0.0744895488661125, + "grad_norm": 0.8921390771865845, + "learning_rate": 0.00019915714620905218, + "loss": 2.7668, + "step": 923 + }, + { + "epoch": 0.07457025260269551, + "grad_norm": 0.9378434419631958, + "learning_rate": 0.00019915509960821782, + "loss": 2.8305, + "step": 924 + }, + { + "epoch": 0.07465095633927851, + "grad_norm": 1.0351817607879639, + "learning_rate": 0.0001991530505361792, + "loss": 2.9412, + "step": 925 + }, + { + "epoch": 0.07473166007586152, + "grad_norm": 0.7995476722717285, + "learning_rate": 0.0001991509989929874, + "loss": 2.7872, + "step": 926 + }, + { + "epoch": 0.07481236381244452, + "grad_norm": 0.858830988407135, + "learning_rate": 0.0001991489449786935, + "loss": 2.7775, + "step": 927 + }, + { + "epoch": 0.07489306754902753, + "grad_norm": 1.1254682540893555, + "learning_rate": 0.00019914688849334867, + "loss": 2.7913, + "step": 928 + }, + { + "epoch": 0.07497377128561053, + "grad_norm": 0.9475330710411072, + "learning_rate": 0.00019914482953700428, + "loss": 2.7945, + "step": 929 + }, + { + "epoch": 0.07505447502219353, + "grad_norm": 0.8427290916442871, + "learning_rate": 0.00019914276810971152, + "loss": 2.8297, + "step": 930 + }, + { + "epoch": 0.07513517875877652, + "grad_norm": 0.9308956265449524, + "learning_rate": 0.00019914070421152183, + "loss": 2.8534, + "step": 931 + }, + { + "epoch": 0.07521588249535953, + "grad_norm": 0.9264787435531616, + "learning_rate": 0.00019913863784248664, + "loss": 2.7959, + "step": 932 + }, + { + "epoch": 0.07529658623194253, + "grad_norm": 0.8432087302207947, + "learning_rate": 0.00019913656900265742, + "loss": 2.8479, + "step": 933 + }, + { + "epoch": 0.07537728996852554, + "grad_norm": 0.8237274885177612, + "learning_rate": 0.0001991344976920858, + "loss": 2.782, + "step": 934 + }, + { + "epoch": 0.07545799370510854, + "grad_norm": 0.8143243789672852, + "learning_rate": 0.0001991324239108233, + "loss": 2.7567, + "step": 935 + }, + { + "epoch": 0.07553869744169155, + "grad_norm": 0.8824434280395508, + "learning_rate": 0.0001991303476589217, + "loss": 2.7971, + "step": 936 + }, + { + "epoch": 0.07561940117827455, + "grad_norm": 0.8202407360076904, + "learning_rate": 0.00019912826893643272, + "loss": 2.7825, + "step": 937 + }, + { + "epoch": 0.07570010491485755, + "grad_norm": 0.8001337647438049, + "learning_rate": 0.00019912618774340813, + "loss": 2.8294, + "step": 938 + }, + { + "epoch": 0.07578080865144056, + "grad_norm": 0.8875572085380554, + "learning_rate": 0.00019912410407989982, + "loss": 2.8013, + "step": 939 + }, + { + "epoch": 0.07586151238802356, + "grad_norm": 0.8676280379295349, + "learning_rate": 0.0001991220179459597, + "loss": 2.767, + "step": 940 + }, + { + "epoch": 0.07594221612460657, + "grad_norm": 0.9767136573791504, + "learning_rate": 0.00019911992934163982, + "loss": 2.8315, + "step": 941 + }, + { + "epoch": 0.07602291986118957, + "grad_norm": 0.8690733909606934, + "learning_rate": 0.0001991178382669922, + "loss": 2.8042, + "step": 942 + }, + { + "epoch": 0.07610362359777258, + "grad_norm": 0.862978458404541, + "learning_rate": 0.00019911574472206893, + "loss": 2.8243, + "step": 943 + }, + { + "epoch": 0.07618432733435558, + "grad_norm": 0.9116127490997314, + "learning_rate": 0.00019911364870692225, + "loss": 2.7377, + "step": 944 + }, + { + "epoch": 0.07626503107093859, + "grad_norm": 0.8765420317649841, + "learning_rate": 0.00019911155022160433, + "loss": 2.7673, + "step": 945 + }, + { + "epoch": 0.07634573480752159, + "grad_norm": 0.8229342699050903, + "learning_rate": 0.0001991094492661675, + "loss": 2.7749, + "step": 946 + }, + { + "epoch": 0.0764264385441046, + "grad_norm": 0.8340098261833191, + "learning_rate": 0.00019910734584066412, + "loss": 2.7871, + "step": 947 + }, + { + "epoch": 0.0765071422806876, + "grad_norm": 0.8116940259933472, + "learning_rate": 0.0001991052399451466, + "loss": 2.8202, + "step": 948 + }, + { + "epoch": 0.0765878460172706, + "grad_norm": 0.8730412721633911, + "learning_rate": 0.00019910313157966747, + "loss": 2.8661, + "step": 949 + }, + { + "epoch": 0.07666854975385361, + "grad_norm": 0.8272213339805603, + "learning_rate": 0.0001991010207442792, + "loss": 2.8352, + "step": 950 + }, + { + "epoch": 0.07674925349043661, + "grad_norm": 0.8586944937705994, + "learning_rate": 0.0001990989074390345, + "loss": 2.8018, + "step": 951 + }, + { + "epoch": 0.07682995722701962, + "grad_norm": 0.81830894947052, + "learning_rate": 0.00019909679166398592, + "loss": 2.8154, + "step": 952 + }, + { + "epoch": 0.07691066096360262, + "grad_norm": 0.8158484101295471, + "learning_rate": 0.00019909467341918627, + "loss": 2.7618, + "step": 953 + }, + { + "epoch": 0.07699136470018562, + "grad_norm": 0.816834032535553, + "learning_rate": 0.00019909255270468833, + "loss": 2.8125, + "step": 954 + }, + { + "epoch": 0.07707206843676863, + "grad_norm": 0.944790780544281, + "learning_rate": 0.00019909042952054496, + "loss": 2.8054, + "step": 955 + }, + { + "epoch": 0.07715277217335163, + "grad_norm": 0.9281302690505981, + "learning_rate": 0.00019908830386680904, + "loss": 2.8724, + "step": 956 + }, + { + "epoch": 0.07723347590993462, + "grad_norm": 0.8850300908088684, + "learning_rate": 0.00019908617574353356, + "loss": 2.7906, + "step": 957 + }, + { + "epoch": 0.07731417964651763, + "grad_norm": 0.8997938632965088, + "learning_rate": 0.00019908404515077158, + "loss": 2.7814, + "step": 958 + }, + { + "epoch": 0.07739488338310063, + "grad_norm": 0.8814194798469543, + "learning_rate": 0.0001990819120885762, + "loss": 2.7423, + "step": 959 + }, + { + "epoch": 0.07747558711968364, + "grad_norm": 0.8759928345680237, + "learning_rate": 0.00019907977655700054, + "loss": 2.7803, + "step": 960 + }, + { + "epoch": 0.07755629085626664, + "grad_norm": 0.8439476490020752, + "learning_rate": 0.00019907763855609787, + "loss": 2.8277, + "step": 961 + }, + { + "epoch": 0.07763699459284965, + "grad_norm": 0.8745121955871582, + "learning_rate": 0.00019907549808592144, + "loss": 2.8152, + "step": 962 + }, + { + "epoch": 0.07771769832943265, + "grad_norm": 1.0439598560333252, + "learning_rate": 0.00019907335514652465, + "loss": 2.7882, + "step": 963 + }, + { + "epoch": 0.07779840206601565, + "grad_norm": 0.9516503810882568, + "learning_rate": 0.00019907120973796082, + "loss": 2.8555, + "step": 964 + }, + { + "epoch": 0.07787910580259866, + "grad_norm": 0.928717315196991, + "learning_rate": 0.0001990690618602835, + "loss": 2.8214, + "step": 965 + }, + { + "epoch": 0.07795980953918166, + "grad_norm": 0.7923071384429932, + "learning_rate": 0.00019906691151354617, + "loss": 2.8153, + "step": 966 + }, + { + "epoch": 0.07804051327576467, + "grad_norm": 0.8783324956893921, + "learning_rate": 0.00019906475869780246, + "loss": 2.7691, + "step": 967 + }, + { + "epoch": 0.07812121701234767, + "grad_norm": 0.8974801301956177, + "learning_rate": 0.000199062603413106, + "loss": 2.8156, + "step": 968 + }, + { + "epoch": 0.07820192074893068, + "grad_norm": 0.9304391741752625, + "learning_rate": 0.00019906044565951052, + "loss": 2.8489, + "step": 969 + }, + { + "epoch": 0.07828262448551368, + "grad_norm": 0.8351098895072937, + "learning_rate": 0.00019905828543706976, + "loss": 2.7744, + "step": 970 + }, + { + "epoch": 0.07836332822209668, + "grad_norm": 0.8634265065193176, + "learning_rate": 0.0001990561227458376, + "loss": 2.8193, + "step": 971 + }, + { + "epoch": 0.07844403195867969, + "grad_norm": 0.8969653248786926, + "learning_rate": 0.00019905395758586792, + "loss": 2.7548, + "step": 972 + }, + { + "epoch": 0.07852473569526269, + "grad_norm": 0.8964852094650269, + "learning_rate": 0.0001990517899572147, + "loss": 2.8037, + "step": 973 + }, + { + "epoch": 0.0786054394318457, + "grad_norm": 0.8567596077919006, + "learning_rate": 0.00019904961985993196, + "loss": 2.7942, + "step": 974 + }, + { + "epoch": 0.0786861431684287, + "grad_norm": 0.8275273442268372, + "learning_rate": 0.00019904744729407374, + "loss": 2.8359, + "step": 975 + }, + { + "epoch": 0.0787668469050117, + "grad_norm": 0.9458810091018677, + "learning_rate": 0.00019904527225969424, + "loss": 2.8354, + "step": 976 + }, + { + "epoch": 0.07884755064159471, + "grad_norm": 0.8690593838691711, + "learning_rate": 0.00019904309475684767, + "loss": 2.7894, + "step": 977 + }, + { + "epoch": 0.07892825437817771, + "grad_norm": 0.810279130935669, + "learning_rate": 0.00019904091478558823, + "loss": 2.7939, + "step": 978 + }, + { + "epoch": 0.07900895811476072, + "grad_norm": 0.8779012560844421, + "learning_rate": 0.0001990387323459703, + "loss": 2.7551, + "step": 979 + }, + { + "epoch": 0.07908966185134372, + "grad_norm": 0.7936381101608276, + "learning_rate": 0.00019903654743804833, + "loss": 2.814, + "step": 980 + }, + { + "epoch": 0.07917036558792673, + "grad_norm": 0.9567989110946655, + "learning_rate": 0.00019903436006187667, + "loss": 2.7715, + "step": 981 + }, + { + "epoch": 0.07925106932450972, + "grad_norm": 0.9250255823135376, + "learning_rate": 0.00019903217021750987, + "loss": 2.8967, + "step": 982 + }, + { + "epoch": 0.07933177306109272, + "grad_norm": 0.8342804312705994, + "learning_rate": 0.00019902997790500256, + "loss": 2.7728, + "step": 983 + }, + { + "epoch": 0.07941247679767573, + "grad_norm": 0.8321473598480225, + "learning_rate": 0.00019902778312440932, + "loss": 2.8479, + "step": 984 + }, + { + "epoch": 0.07949318053425873, + "grad_norm": 0.894727885723114, + "learning_rate": 0.00019902558587578484, + "loss": 2.8211, + "step": 985 + }, + { + "epoch": 0.07957388427084174, + "grad_norm": 0.8093457221984863, + "learning_rate": 0.0001990233861591839, + "loss": 2.7481, + "step": 986 + }, + { + "epoch": 0.07965458800742474, + "grad_norm": 0.8626284599304199, + "learning_rate": 0.00019902118397466132, + "loss": 2.8368, + "step": 987 + }, + { + "epoch": 0.07973529174400774, + "grad_norm": 0.799648642539978, + "learning_rate": 0.00019901897932227204, + "loss": 2.8713, + "step": 988 + }, + { + "epoch": 0.07981599548059075, + "grad_norm": 0.9658265709877014, + "learning_rate": 0.00019901677220207092, + "loss": 2.7284, + "step": 989 + }, + { + "epoch": 0.07989669921717375, + "grad_norm": 0.877299427986145, + "learning_rate": 0.00019901456261411303, + "loss": 2.7916, + "step": 990 + }, + { + "epoch": 0.07997740295375676, + "grad_norm": 0.926450252532959, + "learning_rate": 0.00019901235055845337, + "loss": 2.8207, + "step": 991 + }, + { + "epoch": 0.08005810669033976, + "grad_norm": 0.8858455419540405, + "learning_rate": 0.00019901013603514716, + "loss": 2.795, + "step": 992 + }, + { + "epoch": 0.08013881042692277, + "grad_norm": 0.8619922995567322, + "learning_rate": 0.0001990079190442495, + "loss": 2.8163, + "step": 993 + }, + { + "epoch": 0.08021951416350577, + "grad_norm": 0.859200656414032, + "learning_rate": 0.00019900569958581572, + "loss": 2.7715, + "step": 994 + }, + { + "epoch": 0.08030021790008877, + "grad_norm": 0.8346282839775085, + "learning_rate": 0.0001990034776599011, + "loss": 2.8312, + "step": 995 + }, + { + "epoch": 0.08038092163667178, + "grad_norm": 0.9188725352287292, + "learning_rate": 0.00019900125326656102, + "loss": 2.799, + "step": 996 + }, + { + "epoch": 0.08046162537325478, + "grad_norm": 0.8548648953437805, + "learning_rate": 0.00019899902640585092, + "loss": 2.7778, + "step": 997 + }, + { + "epoch": 0.08054232910983779, + "grad_norm": 0.8883183002471924, + "learning_rate": 0.00019899679707782624, + "loss": 2.809, + "step": 998 + }, + { + "epoch": 0.08062303284642079, + "grad_norm": 0.8915852308273315, + "learning_rate": 0.00019899456528254267, + "loss": 2.8309, + "step": 999 + }, + { + "epoch": 0.0807037365830038, + "grad_norm": 0.8092094659805298, + "learning_rate": 0.00019899233102005573, + "loss": 2.7753, + "step": 1000 + }, + { + "epoch": 0.0807037365830038, + "eval_loss": 2.7104671001434326, + "eval_runtime": 773.7354, + "eval_samples_per_second": 3.386, + "eval_steps_per_second": 0.565, + "step": 1000 + }, + { + "epoch": 0.0807844403195868, + "grad_norm": 0.8744900226593018, + "learning_rate": 0.00019899009429042114, + "loss": 2.7948, + "step": 1001 + }, + { + "epoch": 0.0808651440561698, + "grad_norm": 0.8749974370002747, + "learning_rate": 0.0001989878550936946, + "loss": 2.7609, + "step": 1002 + }, + { + "epoch": 0.08094584779275281, + "grad_norm": 0.8622820377349854, + "learning_rate": 0.000198985613429932, + "loss": 2.8023, + "step": 1003 + }, + { + "epoch": 0.08102655152933581, + "grad_norm": 0.9404367208480835, + "learning_rate": 0.00019898336929918915, + "loss": 2.7992, + "step": 1004 + }, + { + "epoch": 0.08110725526591882, + "grad_norm": 0.8846708536148071, + "learning_rate": 0.000198981122701522, + "loss": 2.8084, + "step": 1005 + }, + { + "epoch": 0.08118795900250182, + "grad_norm": 0.8105908036231995, + "learning_rate": 0.0001989788736369865, + "loss": 2.8504, + "step": 1006 + }, + { + "epoch": 0.08126866273908483, + "grad_norm": 1.0107187032699585, + "learning_rate": 0.0001989766221056388, + "loss": 2.7935, + "step": 1007 + }, + { + "epoch": 0.08134936647566782, + "grad_norm": 0.7825451493263245, + "learning_rate": 0.0001989743681075349, + "loss": 2.8024, + "step": 1008 + }, + { + "epoch": 0.08143007021225082, + "grad_norm": 0.8478613495826721, + "learning_rate": 0.000198972111642731, + "loss": 2.8645, + "step": 1009 + }, + { + "epoch": 0.08151077394883383, + "grad_norm": 0.8432144522666931, + "learning_rate": 0.0001989698527112834, + "loss": 2.8469, + "step": 1010 + }, + { + "epoch": 0.08159147768541683, + "grad_norm": 0.8147936463356018, + "learning_rate": 0.00019896759131324835, + "loss": 2.7799, + "step": 1011 + }, + { + "epoch": 0.08167218142199983, + "grad_norm": 0.8446993827819824, + "learning_rate": 0.00019896532744868224, + "loss": 2.7685, + "step": 1012 + }, + { + "epoch": 0.08175288515858284, + "grad_norm": 0.7635807394981384, + "learning_rate": 0.00019896306111764146, + "loss": 2.7823, + "step": 1013 + }, + { + "epoch": 0.08183358889516584, + "grad_norm": 0.8272855877876282, + "learning_rate": 0.00019896079232018253, + "loss": 2.7877, + "step": 1014 + }, + { + "epoch": 0.08191429263174885, + "grad_norm": 0.8079700469970703, + "learning_rate": 0.00019895852105636193, + "loss": 2.7849, + "step": 1015 + }, + { + "epoch": 0.08199499636833185, + "grad_norm": 0.8518063426017761, + "learning_rate": 0.0001989562473262363, + "loss": 2.8622, + "step": 1016 + }, + { + "epoch": 0.08207570010491486, + "grad_norm": 0.8646622896194458, + "learning_rate": 0.00019895397112986235, + "loss": 2.8224, + "step": 1017 + }, + { + "epoch": 0.08215640384149786, + "grad_norm": 0.8764398097991943, + "learning_rate": 0.00019895169246729672, + "loss": 2.938, + "step": 1018 + }, + { + "epoch": 0.08223710757808086, + "grad_norm": 0.8304057717323303, + "learning_rate": 0.0001989494113385963, + "loss": 2.7586, + "step": 1019 + }, + { + "epoch": 0.08231781131466387, + "grad_norm": 0.8569272756576538, + "learning_rate": 0.00019894712774381787, + "loss": 2.7803, + "step": 1020 + }, + { + "epoch": 0.08239851505124687, + "grad_norm": 0.8788578510284424, + "learning_rate": 0.00019894484168301836, + "loss": 2.8138, + "step": 1021 + }, + { + "epoch": 0.08247921878782988, + "grad_norm": 0.9113569855690002, + "learning_rate": 0.0001989425531562548, + "loss": 2.8023, + "step": 1022 + }, + { + "epoch": 0.08255992252441288, + "grad_norm": 0.8630590438842773, + "learning_rate": 0.00019894026216358413, + "loss": 2.791, + "step": 1023 + }, + { + "epoch": 0.08264062626099589, + "grad_norm": 0.8691157698631287, + "learning_rate": 0.00019893796870506348, + "loss": 2.811, + "step": 1024 + }, + { + "epoch": 0.08272132999757889, + "grad_norm": 0.9078284502029419, + "learning_rate": 0.00019893567278075007, + "loss": 2.8282, + "step": 1025 + }, + { + "epoch": 0.0828020337341619, + "grad_norm": 0.867511510848999, + "learning_rate": 0.00019893337439070105, + "loss": 2.7862, + "step": 1026 + }, + { + "epoch": 0.0828827374707449, + "grad_norm": 0.8016698360443115, + "learning_rate": 0.00019893107353497372, + "loss": 2.8083, + "step": 1027 + }, + { + "epoch": 0.0829634412073279, + "grad_norm": 0.8583545684814453, + "learning_rate": 0.00019892877021362543, + "loss": 2.8041, + "step": 1028 + }, + { + "epoch": 0.08304414494391091, + "grad_norm": 0.8302493691444397, + "learning_rate": 0.0001989264644267136, + "loss": 2.7866, + "step": 1029 + }, + { + "epoch": 0.08312484868049391, + "grad_norm": 0.9628411531448364, + "learning_rate": 0.00019892415617429567, + "loss": 2.8187, + "step": 1030 + }, + { + "epoch": 0.08320555241707692, + "grad_norm": 0.874840259552002, + "learning_rate": 0.0001989218454564292, + "loss": 2.7475, + "step": 1031 + }, + { + "epoch": 0.08328625615365992, + "grad_norm": 0.8641294836997986, + "learning_rate": 0.0001989195322731717, + "loss": 2.7795, + "step": 1032 + }, + { + "epoch": 0.08336695989024291, + "grad_norm": 0.8219757080078125, + "learning_rate": 0.0001989172166245809, + "loss": 2.7683, + "step": 1033 + }, + { + "epoch": 0.08344766362682592, + "grad_norm": 0.7905694246292114, + "learning_rate": 0.00019891489851071455, + "loss": 2.7668, + "step": 1034 + }, + { + "epoch": 0.08352836736340892, + "grad_norm": 0.8180816173553467, + "learning_rate": 0.0001989125779316303, + "loss": 2.7661, + "step": 1035 + }, + { + "epoch": 0.08360907109999192, + "grad_norm": 0.8337293267250061, + "learning_rate": 0.00019891025488738605, + "loss": 2.7823, + "step": 1036 + }, + { + "epoch": 0.08368977483657493, + "grad_norm": 0.9673140048980713, + "learning_rate": 0.00019890792937803973, + "loss": 2.8164, + "step": 1037 + }, + { + "epoch": 0.08377047857315793, + "grad_norm": 0.8810501098632812, + "learning_rate": 0.00019890560140364922, + "loss": 2.7904, + "step": 1038 + }, + { + "epoch": 0.08385118230974094, + "grad_norm": 0.9507614374160767, + "learning_rate": 0.0001989032709642726, + "loss": 2.7928, + "step": 1039 + }, + { + "epoch": 0.08393188604632394, + "grad_norm": 0.953738808631897, + "learning_rate": 0.00019890093805996793, + "loss": 2.7922, + "step": 1040 + }, + { + "epoch": 0.08401258978290695, + "grad_norm": 0.8079931139945984, + "learning_rate": 0.00019889860269079336, + "loss": 2.7909, + "step": 1041 + }, + { + "epoch": 0.08409329351948995, + "grad_norm": 1.0330647230148315, + "learning_rate": 0.0001988962648568071, + "loss": 2.7526, + "step": 1042 + }, + { + "epoch": 0.08417399725607295, + "grad_norm": 0.8988988399505615, + "learning_rate": 0.00019889392455806738, + "loss": 2.7471, + "step": 1043 + }, + { + "epoch": 0.08425470099265596, + "grad_norm": 0.7986348271369934, + "learning_rate": 0.00019889158179463255, + "loss": 2.7208, + "step": 1044 + }, + { + "epoch": 0.08433540472923896, + "grad_norm": 0.9231631755828857, + "learning_rate": 0.000198889236566561, + "loss": 2.7953, + "step": 1045 + }, + { + "epoch": 0.08441610846582197, + "grad_norm": 0.8438155055046082, + "learning_rate": 0.00019888688887391117, + "loss": 2.8006, + "step": 1046 + }, + { + "epoch": 0.08449681220240497, + "grad_norm": 0.8915219306945801, + "learning_rate": 0.0001988845387167416, + "loss": 2.8184, + "step": 1047 + }, + { + "epoch": 0.08457751593898798, + "grad_norm": 0.924401581287384, + "learning_rate": 0.0001988821860951108, + "loss": 2.8411, + "step": 1048 + }, + { + "epoch": 0.08465821967557098, + "grad_norm": 0.8144630193710327, + "learning_rate": 0.00019887983100907745, + "loss": 2.8258, + "step": 1049 + }, + { + "epoch": 0.08473892341215399, + "grad_norm": 0.9974459409713745, + "learning_rate": 0.00019887747345870028, + "loss": 2.7567, + "step": 1050 + }, + { + "epoch": 0.08481962714873699, + "grad_norm": 0.944526195526123, + "learning_rate": 0.00019887511344403796, + "loss": 2.8657, + "step": 1051 + }, + { + "epoch": 0.08490033088532, + "grad_norm": 0.8204831480979919, + "learning_rate": 0.00019887275096514936, + "loss": 2.8054, + "step": 1052 + }, + { + "epoch": 0.084981034621903, + "grad_norm": 0.8855900168418884, + "learning_rate": 0.00019887038602209336, + "loss": 2.8019, + "step": 1053 + }, + { + "epoch": 0.085061738358486, + "grad_norm": 0.9025108814239502, + "learning_rate": 0.0001988680186149289, + "loss": 2.7934, + "step": 1054 + }, + { + "epoch": 0.08514244209506901, + "grad_norm": 0.8486441373825073, + "learning_rate": 0.00019886564874371494, + "loss": 2.809, + "step": 1055 + }, + { + "epoch": 0.08522314583165201, + "grad_norm": 0.778364896774292, + "learning_rate": 0.00019886327640851058, + "loss": 2.7783, + "step": 1056 + }, + { + "epoch": 0.08530384956823502, + "grad_norm": 0.8515299558639526, + "learning_rate": 0.00019886090160937497, + "loss": 2.8122, + "step": 1057 + }, + { + "epoch": 0.08538455330481802, + "grad_norm": 0.8466131091117859, + "learning_rate": 0.00019885852434636724, + "loss": 2.7798, + "step": 1058 + }, + { + "epoch": 0.08546525704140101, + "grad_norm": 0.8856541514396667, + "learning_rate": 0.00019885614461954667, + "loss": 2.8033, + "step": 1059 + }, + { + "epoch": 0.08554596077798401, + "grad_norm": 0.8853924870491028, + "learning_rate": 0.00019885376242897258, + "loss": 2.8368, + "step": 1060 + }, + { + "epoch": 0.08562666451456702, + "grad_norm": 0.7858660221099854, + "learning_rate": 0.0001988513777747043, + "loss": 2.7806, + "step": 1061 + }, + { + "epoch": 0.08570736825115002, + "grad_norm": 0.8601513504981995, + "learning_rate": 0.0001988489906568013, + "loss": 2.8434, + "step": 1062 + }, + { + "epoch": 0.08578807198773303, + "grad_norm": 0.9126001596450806, + "learning_rate": 0.00019884660107532306, + "loss": 2.8469, + "step": 1063 + }, + { + "epoch": 0.08586877572431603, + "grad_norm": 0.9016061425209045, + "learning_rate": 0.00019884420903032912, + "loss": 2.7907, + "step": 1064 + }, + { + "epoch": 0.08594947946089904, + "grad_norm": 0.9134494066238403, + "learning_rate": 0.00019884181452187915, + "loss": 2.8426, + "step": 1065 + }, + { + "epoch": 0.08603018319748204, + "grad_norm": 0.8891138434410095, + "learning_rate": 0.00019883941755003272, + "loss": 2.8092, + "step": 1066 + }, + { + "epoch": 0.08611088693406505, + "grad_norm": 0.822884202003479, + "learning_rate": 0.0001988370181148497, + "loss": 2.8454, + "step": 1067 + }, + { + "epoch": 0.08619159067064805, + "grad_norm": 0.8341901898384094, + "learning_rate": 0.0001988346162163898, + "loss": 2.8027, + "step": 1068 + }, + { + "epoch": 0.08627229440723105, + "grad_norm": 0.8653229475021362, + "learning_rate": 0.00019883221185471291, + "loss": 2.7487, + "step": 1069 + }, + { + "epoch": 0.08635299814381406, + "grad_norm": 0.8065966367721558, + "learning_rate": 0.00019882980502987894, + "loss": 2.7847, + "step": 1070 + }, + { + "epoch": 0.08643370188039706, + "grad_norm": 0.9106903076171875, + "learning_rate": 0.0001988273957419479, + "loss": 2.7962, + "step": 1071 + }, + { + "epoch": 0.08651440561698007, + "grad_norm": 0.953815221786499, + "learning_rate": 0.0001988249839909798, + "loss": 2.8168, + "step": 1072 + }, + { + "epoch": 0.08659510935356307, + "grad_norm": 0.8642842173576355, + "learning_rate": 0.00019882256977703477, + "loss": 2.8205, + "step": 1073 + }, + { + "epoch": 0.08667581309014608, + "grad_norm": 0.8500350117683411, + "learning_rate": 0.000198820153100173, + "loss": 2.8798, + "step": 1074 + }, + { + "epoch": 0.08675651682672908, + "grad_norm": 0.9212989807128906, + "learning_rate": 0.00019881773396045467, + "loss": 2.8088, + "step": 1075 + }, + { + "epoch": 0.08683722056331208, + "grad_norm": 0.8897970914840698, + "learning_rate": 0.0001988153123579401, + "loss": 2.7983, + "step": 1076 + }, + { + "epoch": 0.08691792429989509, + "grad_norm": 0.7942636609077454, + "learning_rate": 0.00019881288829268968, + "loss": 2.7711, + "step": 1077 + }, + { + "epoch": 0.08699862803647809, + "grad_norm": 0.8286700248718262, + "learning_rate": 0.00019881046176476374, + "loss": 2.7995, + "step": 1078 + }, + { + "epoch": 0.0870793317730611, + "grad_norm": 0.9436343908309937, + "learning_rate": 0.00019880803277422281, + "loss": 2.8399, + "step": 1079 + }, + { + "epoch": 0.0871600355096441, + "grad_norm": 0.9592518210411072, + "learning_rate": 0.00019880560132112742, + "loss": 2.7888, + "step": 1080 + }, + { + "epoch": 0.0872407392462271, + "grad_norm": 0.8956589698791504, + "learning_rate": 0.00019880316740553816, + "loss": 2.7635, + "step": 1081 + }, + { + "epoch": 0.08732144298281011, + "grad_norm": 1.055312156677246, + "learning_rate": 0.00019880073102751574, + "loss": 2.7778, + "step": 1082 + }, + { + "epoch": 0.08740214671939311, + "grad_norm": 0.783273458480835, + "learning_rate": 0.00019879829218712075, + "loss": 2.735, + "step": 1083 + }, + { + "epoch": 0.0874828504559761, + "grad_norm": 0.8315421938896179, + "learning_rate": 0.00019879585088441413, + "loss": 2.7973, + "step": 1084 + }, + { + "epoch": 0.08756355419255911, + "grad_norm": 0.9550945162773132, + "learning_rate": 0.00019879340711945662, + "loss": 2.8083, + "step": 1085 + }, + { + "epoch": 0.08764425792914211, + "grad_norm": 0.9579277634620667, + "learning_rate": 0.00019879096089230915, + "loss": 2.7411, + "step": 1086 + }, + { + "epoch": 0.08772496166572512, + "grad_norm": 0.8602219223976135, + "learning_rate": 0.0001987885122030327, + "loss": 2.7461, + "step": 1087 + }, + { + "epoch": 0.08780566540230812, + "grad_norm": 0.9749068021774292, + "learning_rate": 0.00019878606105168829, + "loss": 2.7701, + "step": 1088 + }, + { + "epoch": 0.08788636913889113, + "grad_norm": 0.8128982186317444, + "learning_rate": 0.00019878360743833703, + "loss": 2.7949, + "step": 1089 + }, + { + "epoch": 0.08796707287547413, + "grad_norm": 0.9177080988883972, + "learning_rate": 0.00019878115136304003, + "loss": 2.7471, + "step": 1090 + }, + { + "epoch": 0.08804777661205714, + "grad_norm": 0.9052132368087769, + "learning_rate": 0.0001987786928258585, + "loss": 2.8356, + "step": 1091 + }, + { + "epoch": 0.08812848034864014, + "grad_norm": 0.8972994089126587, + "learning_rate": 0.00019877623182685378, + "loss": 2.8304, + "step": 1092 + }, + { + "epoch": 0.08820918408522314, + "grad_norm": 0.861251950263977, + "learning_rate": 0.0001987737683660871, + "loss": 2.8436, + "step": 1093 + }, + { + "epoch": 0.08828988782180615, + "grad_norm": 0.9139869809150696, + "learning_rate": 0.00019877130244361996, + "loss": 2.7583, + "step": 1094 + }, + { + "epoch": 0.08837059155838915, + "grad_norm": 0.8441170454025269, + "learning_rate": 0.00019876883405951377, + "loss": 2.7508, + "step": 1095 + }, + { + "epoch": 0.08845129529497216, + "grad_norm": 0.8624769449234009, + "learning_rate": 0.00019876636321383004, + "loss": 2.8003, + "step": 1096 + }, + { + "epoch": 0.08853199903155516, + "grad_norm": 0.9033877849578857, + "learning_rate": 0.00019876388990663037, + "loss": 2.7934, + "step": 1097 + }, + { + "epoch": 0.08861270276813817, + "grad_norm": 0.9492632746696472, + "learning_rate": 0.0001987614141379764, + "loss": 2.7852, + "step": 1098 + }, + { + "epoch": 0.08869340650472117, + "grad_norm": 0.9004682302474976, + "learning_rate": 0.00019875893590792982, + "loss": 2.7518, + "step": 1099 + }, + { + "epoch": 0.08877411024130417, + "grad_norm": 0.8352272510528564, + "learning_rate": 0.0001987564552165524, + "loss": 2.8035, + "step": 1100 + }, + { + "epoch": 0.08885481397788718, + "grad_norm": 0.8488562107086182, + "learning_rate": 0.00019875397206390593, + "loss": 2.7672, + "step": 1101 + }, + { + "epoch": 0.08893551771447018, + "grad_norm": 0.9450985193252563, + "learning_rate": 0.00019875148645005238, + "loss": 2.7558, + "step": 1102 + }, + { + "epoch": 0.08901622145105319, + "grad_norm": 0.9203561544418335, + "learning_rate": 0.0001987489983750536, + "loss": 2.7983, + "step": 1103 + }, + { + "epoch": 0.08909692518763619, + "grad_norm": 0.8761897087097168, + "learning_rate": 0.0001987465078389717, + "loss": 2.7536, + "step": 1104 + }, + { + "epoch": 0.0891776289242192, + "grad_norm": 0.9064637422561646, + "learning_rate": 0.00019874401484186867, + "loss": 2.8104, + "step": 1105 + }, + { + "epoch": 0.0892583326608022, + "grad_norm": 0.8394999504089355, + "learning_rate": 0.00019874151938380666, + "loss": 2.7459, + "step": 1106 + }, + { + "epoch": 0.0893390363973852, + "grad_norm": 0.8782099485397339, + "learning_rate": 0.00019873902146484785, + "loss": 2.8675, + "step": 1107 + }, + { + "epoch": 0.08941974013396821, + "grad_norm": 0.8564850091934204, + "learning_rate": 0.00019873652108505458, + "loss": 2.8561, + "step": 1108 + }, + { + "epoch": 0.08950044387055121, + "grad_norm": 0.8343809843063354, + "learning_rate": 0.0001987340182444891, + "loss": 2.8406, + "step": 1109 + }, + { + "epoch": 0.0895811476071342, + "grad_norm": 1.096273422241211, + "learning_rate": 0.00019873151294321376, + "loss": 2.8264, + "step": 1110 + }, + { + "epoch": 0.08966185134371721, + "grad_norm": 0.8654618263244629, + "learning_rate": 0.00019872900518129103, + "loss": 2.7956, + "step": 1111 + }, + { + "epoch": 0.08974255508030021, + "grad_norm": 0.8868138194084167, + "learning_rate": 0.00019872649495878344, + "loss": 2.8028, + "step": 1112 + }, + { + "epoch": 0.08982325881688322, + "grad_norm": 0.8139104843139648, + "learning_rate": 0.00019872398227575348, + "loss": 2.7502, + "step": 1113 + }, + { + "epoch": 0.08990396255346622, + "grad_norm": 0.8277762532234192, + "learning_rate": 0.00019872146713226384, + "loss": 2.7913, + "step": 1114 + }, + { + "epoch": 0.08998466629004923, + "grad_norm": 0.8470397591590881, + "learning_rate": 0.00019871894952837717, + "loss": 2.7982, + "step": 1115 + }, + { + "epoch": 0.09006537002663223, + "grad_norm": 0.8424760103225708, + "learning_rate": 0.00019871642946415625, + "loss": 2.8067, + "step": 1116 + }, + { + "epoch": 0.09014607376321523, + "grad_norm": 0.8253894448280334, + "learning_rate": 0.00019871390693966382, + "loss": 2.8339, + "step": 1117 + }, + { + "epoch": 0.09022677749979824, + "grad_norm": 0.8120691776275635, + "learning_rate": 0.00019871138195496282, + "loss": 2.7938, + "step": 1118 + }, + { + "epoch": 0.09030748123638124, + "grad_norm": 0.920189619064331, + "learning_rate": 0.00019870885451011617, + "loss": 2.8083, + "step": 1119 + }, + { + "epoch": 0.09038818497296425, + "grad_norm": 0.8990969657897949, + "learning_rate": 0.0001987063246051868, + "loss": 2.7481, + "step": 1120 + }, + { + "epoch": 0.09046888870954725, + "grad_norm": 0.8280801773071289, + "learning_rate": 0.0001987037922402378, + "loss": 2.8536, + "step": 1121 + }, + { + "epoch": 0.09054959244613026, + "grad_norm": 0.8510503768920898, + "learning_rate": 0.0001987012574153323, + "loss": 2.758, + "step": 1122 + }, + { + "epoch": 0.09063029618271326, + "grad_norm": 0.9103946685791016, + "learning_rate": 0.00019869872013053344, + "loss": 2.7594, + "step": 1123 + }, + { + "epoch": 0.09071099991929626, + "grad_norm": 0.804916262626648, + "learning_rate": 0.00019869618038590448, + "loss": 2.7489, + "step": 1124 + }, + { + "epoch": 0.09079170365587927, + "grad_norm": 0.7542802095413208, + "learning_rate": 0.00019869363818150867, + "loss": 2.76, + "step": 1125 + }, + { + "epoch": 0.09087240739246227, + "grad_norm": 0.7725108861923218, + "learning_rate": 0.00019869109351740947, + "loss": 2.8124, + "step": 1126 + }, + { + "epoch": 0.09095311112904528, + "grad_norm": 0.8533692955970764, + "learning_rate": 0.0001986885463936702, + "loss": 2.8499, + "step": 1127 + }, + { + "epoch": 0.09103381486562828, + "grad_norm": 0.8351541757583618, + "learning_rate": 0.0001986859968103544, + "loss": 2.8075, + "step": 1128 + }, + { + "epoch": 0.09111451860221129, + "grad_norm": 0.8780044913291931, + "learning_rate": 0.0001986834447675256, + "loss": 2.7587, + "step": 1129 + }, + { + "epoch": 0.09119522233879429, + "grad_norm": 0.9587519764900208, + "learning_rate": 0.00019868089026524736, + "loss": 2.8069, + "step": 1130 + }, + { + "epoch": 0.0912759260753773, + "grad_norm": 0.8285651206970215, + "learning_rate": 0.00019867833330358342, + "loss": 2.8209, + "step": 1131 + }, + { + "epoch": 0.0913566298119603, + "grad_norm": 0.8589211106300354, + "learning_rate": 0.00019867577388259745, + "loss": 2.8144, + "step": 1132 + }, + { + "epoch": 0.0914373335485433, + "grad_norm": 0.8740364909172058, + "learning_rate": 0.00019867321200235324, + "loss": 2.858, + "step": 1133 + }, + { + "epoch": 0.09151803728512631, + "grad_norm": 0.8368108868598938, + "learning_rate": 0.00019867064766291467, + "loss": 2.7997, + "step": 1134 + }, + { + "epoch": 0.0915987410217093, + "grad_norm": 0.8243690133094788, + "learning_rate": 0.00019866808086434564, + "loss": 2.7925, + "step": 1135 + }, + { + "epoch": 0.0916794447582923, + "grad_norm": 0.8296996355056763, + "learning_rate": 0.0001986655116067101, + "loss": 2.7953, + "step": 1136 + }, + { + "epoch": 0.09176014849487531, + "grad_norm": 0.9255942702293396, + "learning_rate": 0.0001986629398900721, + "loss": 2.844, + "step": 1137 + }, + { + "epoch": 0.09184085223145831, + "grad_norm": 0.7498174905776978, + "learning_rate": 0.00019866036571449574, + "loss": 2.7372, + "step": 1138 + }, + { + "epoch": 0.09192155596804132, + "grad_norm": 0.8170139193534851, + "learning_rate": 0.00019865778908004513, + "loss": 2.7656, + "step": 1139 + }, + { + "epoch": 0.09200225970462432, + "grad_norm": 0.8858106732368469, + "learning_rate": 0.00019865520998678458, + "loss": 2.7657, + "step": 1140 + }, + { + "epoch": 0.09208296344120732, + "grad_norm": 0.8789847493171692, + "learning_rate": 0.00019865262843477826, + "loss": 2.8419, + "step": 1141 + }, + { + "epoch": 0.09216366717779033, + "grad_norm": 0.8433314561843872, + "learning_rate": 0.00019865004442409058, + "loss": 2.7981, + "step": 1142 + }, + { + "epoch": 0.09224437091437333, + "grad_norm": 0.8822595477104187, + "learning_rate": 0.0001986474579547859, + "loss": 2.8368, + "step": 1143 + }, + { + "epoch": 0.09232507465095634, + "grad_norm": 0.9067013263702393, + "learning_rate": 0.00019864486902692872, + "loss": 2.7807, + "step": 1144 + }, + { + "epoch": 0.09240577838753934, + "grad_norm": 0.9551558494567871, + "learning_rate": 0.00019864227764058355, + "loss": 2.7617, + "step": 1145 + }, + { + "epoch": 0.09248648212412235, + "grad_norm": 0.8337206244468689, + "learning_rate": 0.00019863968379581494, + "loss": 2.8289, + "step": 1146 + }, + { + "epoch": 0.09256718586070535, + "grad_norm": 0.952702522277832, + "learning_rate": 0.0001986370874926876, + "loss": 2.8508, + "step": 1147 + }, + { + "epoch": 0.09264788959728835, + "grad_norm": 0.8586699366569519, + "learning_rate": 0.00019863448873126615, + "loss": 2.8784, + "step": 1148 + }, + { + "epoch": 0.09272859333387136, + "grad_norm": 0.7625309228897095, + "learning_rate": 0.00019863188751161544, + "loss": 2.7936, + "step": 1149 + }, + { + "epoch": 0.09280929707045436, + "grad_norm": 0.8912700414657593, + "learning_rate": 0.0001986292838338003, + "loss": 2.8745, + "step": 1150 + }, + { + "epoch": 0.09289000080703737, + "grad_norm": 0.8618904948234558, + "learning_rate": 0.00019862667769788553, + "loss": 2.8086, + "step": 1151 + }, + { + "epoch": 0.09297070454362037, + "grad_norm": 1.0013352632522583, + "learning_rate": 0.00019862406910393617, + "loss": 2.8211, + "step": 1152 + }, + { + "epoch": 0.09305140828020338, + "grad_norm": 0.7922475337982178, + "learning_rate": 0.0001986214580520172, + "loss": 2.7668, + "step": 1153 + }, + { + "epoch": 0.09313211201678638, + "grad_norm": 0.9490330815315247, + "learning_rate": 0.00019861884454219365, + "loss": 2.7571, + "step": 1154 + }, + { + "epoch": 0.09321281575336939, + "grad_norm": 0.8780270218849182, + "learning_rate": 0.00019861622857453076, + "loss": 2.7598, + "step": 1155 + }, + { + "epoch": 0.09329351948995239, + "grad_norm": 0.9220066070556641, + "learning_rate": 0.00019861361014909365, + "loss": 2.7609, + "step": 1156 + }, + { + "epoch": 0.0933742232265354, + "grad_norm": 0.8299020528793335, + "learning_rate": 0.0001986109892659476, + "loss": 2.8655, + "step": 1157 + }, + { + "epoch": 0.0934549269631184, + "grad_norm": 0.9700348377227783, + "learning_rate": 0.0001986083659251579, + "loss": 2.8597, + "step": 1158 + }, + { + "epoch": 0.0935356306997014, + "grad_norm": 0.8820784687995911, + "learning_rate": 0.00019860574012679001, + "loss": 2.8776, + "step": 1159 + }, + { + "epoch": 0.0936163344362844, + "grad_norm": 0.8134172558784485, + "learning_rate": 0.0001986031118709093, + "loss": 2.8163, + "step": 1160 + }, + { + "epoch": 0.0936970381728674, + "grad_norm": 0.885974109172821, + "learning_rate": 0.00019860048115758123, + "loss": 2.752, + "step": 1161 + }, + { + "epoch": 0.0937777419094504, + "grad_norm": 0.9650186896324158, + "learning_rate": 0.0001985978479868715, + "loss": 2.7587, + "step": 1162 + }, + { + "epoch": 0.0938584456460334, + "grad_norm": 0.8550445437431335, + "learning_rate": 0.00019859521235884563, + "loss": 2.7887, + "step": 1163 + }, + { + "epoch": 0.09393914938261641, + "grad_norm": 0.9686560034751892, + "learning_rate": 0.00019859257427356933, + "loss": 2.7974, + "step": 1164 + }, + { + "epoch": 0.09401985311919941, + "grad_norm": 0.9185387492179871, + "learning_rate": 0.00019858993373110837, + "loss": 2.7933, + "step": 1165 + }, + { + "epoch": 0.09410055685578242, + "grad_norm": 0.9549610018730164, + "learning_rate": 0.00019858729073152852, + "loss": 2.7698, + "step": 1166 + }, + { + "epoch": 0.09418126059236542, + "grad_norm": 1.0523492097854614, + "learning_rate": 0.0001985846452748957, + "loss": 2.7215, + "step": 1167 + }, + { + "epoch": 0.09426196432894843, + "grad_norm": 0.8551118969917297, + "learning_rate": 0.00019858199736127582, + "loss": 2.805, + "step": 1168 + }, + { + "epoch": 0.09434266806553143, + "grad_norm": 1.021374225616455, + "learning_rate": 0.0001985793469907349, + "loss": 2.794, + "step": 1169 + }, + { + "epoch": 0.09442337180211444, + "grad_norm": 0.8745501041412354, + "learning_rate": 0.0001985766941633389, + "loss": 2.7793, + "step": 1170 + }, + { + "epoch": 0.09450407553869744, + "grad_norm": 0.7426434755325317, + "learning_rate": 0.00019857403887915402, + "loss": 2.7808, + "step": 1171 + }, + { + "epoch": 0.09458477927528045, + "grad_norm": 0.9183726906776428, + "learning_rate": 0.0001985713811382464, + "loss": 2.8001, + "step": 1172 + }, + { + "epoch": 0.09466548301186345, + "grad_norm": 0.8136709928512573, + "learning_rate": 0.00019856872094068233, + "loss": 2.7394, + "step": 1173 + }, + { + "epoch": 0.09474618674844645, + "grad_norm": 0.9399348497390747, + "learning_rate": 0.00019856605828652807, + "loss": 2.7733, + "step": 1174 + }, + { + "epoch": 0.09482689048502946, + "grad_norm": 0.8233176469802856, + "learning_rate": 0.00019856339317584997, + "loss": 2.7672, + "step": 1175 + }, + { + "epoch": 0.09490759422161246, + "grad_norm": 0.9157048463821411, + "learning_rate": 0.00019856072560871447, + "loss": 2.7992, + "step": 1176 + }, + { + "epoch": 0.09498829795819547, + "grad_norm": 0.8729545474052429, + "learning_rate": 0.00019855805558518803, + "loss": 2.749, + "step": 1177 + }, + { + "epoch": 0.09506900169477847, + "grad_norm": 0.8592300415039062, + "learning_rate": 0.00019855538310533722, + "loss": 2.7257, + "step": 1178 + }, + { + "epoch": 0.09514970543136148, + "grad_norm": 0.8470803499221802, + "learning_rate": 0.00019855270816922867, + "loss": 2.7479, + "step": 1179 + }, + { + "epoch": 0.09523040916794448, + "grad_norm": 0.8538667559623718, + "learning_rate": 0.00019855003077692897, + "loss": 2.7576, + "step": 1180 + }, + { + "epoch": 0.09531111290452748, + "grad_norm": 0.8890984654426575, + "learning_rate": 0.0001985473509285049, + "loss": 2.7961, + "step": 1181 + }, + { + "epoch": 0.09539181664111049, + "grad_norm": 0.7769411206245422, + "learning_rate": 0.00019854466862402324, + "loss": 2.8087, + "step": 1182 + }, + { + "epoch": 0.09547252037769349, + "grad_norm": 0.8892520666122437, + "learning_rate": 0.00019854198386355085, + "loss": 2.7935, + "step": 1183 + }, + { + "epoch": 0.0955532241142765, + "grad_norm": 0.8675585389137268, + "learning_rate": 0.00019853929664715464, + "loss": 2.833, + "step": 1184 + }, + { + "epoch": 0.0956339278508595, + "grad_norm": 0.8053853511810303, + "learning_rate": 0.00019853660697490154, + "loss": 2.8002, + "step": 1185 + }, + { + "epoch": 0.09571463158744249, + "grad_norm": 0.9237198829650879, + "learning_rate": 0.00019853391484685865, + "loss": 2.8281, + "step": 1186 + }, + { + "epoch": 0.0957953353240255, + "grad_norm": 0.8432926535606384, + "learning_rate": 0.000198531220263093, + "loss": 2.8131, + "step": 1187 + }, + { + "epoch": 0.0958760390606085, + "grad_norm": 0.796380341053009, + "learning_rate": 0.0001985285232236718, + "loss": 2.753, + "step": 1188 + }, + { + "epoch": 0.0959567427971915, + "grad_norm": 0.9183037281036377, + "learning_rate": 0.00019852582372866225, + "loss": 2.7625, + "step": 1189 + }, + { + "epoch": 0.09603744653377451, + "grad_norm": 0.8194435238838196, + "learning_rate": 0.0001985231217781316, + "loss": 2.7906, + "step": 1190 + }, + { + "epoch": 0.09611815027035751, + "grad_norm": 0.8430871367454529, + "learning_rate": 0.00019852041737214725, + "loss": 2.8457, + "step": 1191 + }, + { + "epoch": 0.09619885400694052, + "grad_norm": 1.0237345695495605, + "learning_rate": 0.0001985177105107765, + "loss": 2.789, + "step": 1192 + }, + { + "epoch": 0.09627955774352352, + "grad_norm": 0.8721581101417542, + "learning_rate": 0.00019851500119408692, + "loss": 2.7187, + "step": 1193 + }, + { + "epoch": 0.09636026148010653, + "grad_norm": 0.8089142441749573, + "learning_rate": 0.00019851228942214603, + "loss": 2.7544, + "step": 1194 + }, + { + "epoch": 0.09644096521668953, + "grad_norm": 1.1076842546463013, + "learning_rate": 0.0001985095751950213, + "loss": 2.7859, + "step": 1195 + }, + { + "epoch": 0.09652166895327254, + "grad_norm": 0.84585040807724, + "learning_rate": 0.0001985068585127805, + "loss": 2.8005, + "step": 1196 + }, + { + "epoch": 0.09660237268985554, + "grad_norm": 0.8231167197227478, + "learning_rate": 0.00019850413937549127, + "loss": 2.8561, + "step": 1197 + }, + { + "epoch": 0.09668307642643854, + "grad_norm": 1.0028103590011597, + "learning_rate": 0.00019850141778322136, + "loss": 2.8049, + "step": 1198 + }, + { + "epoch": 0.09676378016302155, + "grad_norm": 0.8575148582458496, + "learning_rate": 0.0001984986937360387, + "loss": 2.7723, + "step": 1199 + }, + { + "epoch": 0.09684448389960455, + "grad_norm": 0.8567116260528564, + "learning_rate": 0.00019849596723401107, + "loss": 2.7418, + "step": 1200 + }, + { + "epoch": 0.09692518763618756, + "grad_norm": 1.1159218549728394, + "learning_rate": 0.00019849323827720645, + "loss": 2.8352, + "step": 1201 + }, + { + "epoch": 0.09700589137277056, + "grad_norm": 0.849656879901886, + "learning_rate": 0.0001984905068656929, + "loss": 2.7875, + "step": 1202 + }, + { + "epoch": 0.09708659510935357, + "grad_norm": 0.8479150533676147, + "learning_rate": 0.00019848777299953847, + "loss": 2.7828, + "step": 1203 + }, + { + "epoch": 0.09716729884593657, + "grad_norm": 0.9143954515457153, + "learning_rate": 0.00019848503667881125, + "loss": 2.7978, + "step": 1204 + }, + { + "epoch": 0.09724800258251957, + "grad_norm": 0.8162297010421753, + "learning_rate": 0.0001984822979035795, + "loss": 2.7621, + "step": 1205 + }, + { + "epoch": 0.09732870631910258, + "grad_norm": 0.8625509142875671, + "learning_rate": 0.00019847955667391144, + "loss": 2.7484, + "step": 1206 + }, + { + "epoch": 0.09740941005568558, + "grad_norm": 0.8485168218612671, + "learning_rate": 0.00019847681298987543, + "loss": 2.7599, + "step": 1207 + }, + { + "epoch": 0.09749011379226859, + "grad_norm": 0.8962678909301758, + "learning_rate": 0.00019847406685153976, + "loss": 2.7753, + "step": 1208 + }, + { + "epoch": 0.09757081752885159, + "grad_norm": 0.8890791535377502, + "learning_rate": 0.00019847131825897297, + "loss": 2.7635, + "step": 1209 + }, + { + "epoch": 0.0976515212654346, + "grad_norm": 0.8461710810661316, + "learning_rate": 0.00019846856721224355, + "loss": 2.796, + "step": 1210 + }, + { + "epoch": 0.0977322250020176, + "grad_norm": 0.912738025188446, + "learning_rate": 0.00019846581371141996, + "loss": 2.7889, + "step": 1211 + }, + { + "epoch": 0.09781292873860059, + "grad_norm": 0.8530749082565308, + "learning_rate": 0.00019846305775657097, + "loss": 2.8298, + "step": 1212 + }, + { + "epoch": 0.0978936324751836, + "grad_norm": 0.8890148401260376, + "learning_rate": 0.00019846029934776516, + "loss": 2.7491, + "step": 1213 + }, + { + "epoch": 0.0979743362117666, + "grad_norm": 0.8936887979507446, + "learning_rate": 0.0001984575384850713, + "loss": 2.7759, + "step": 1214 + }, + { + "epoch": 0.0980550399483496, + "grad_norm": 0.7811321020126343, + "learning_rate": 0.00019845477516855823, + "loss": 2.8126, + "step": 1215 + }, + { + "epoch": 0.09813574368493261, + "grad_norm": 0.8751768469810486, + "learning_rate": 0.00019845200939829484, + "loss": 2.792, + "step": 1216 + }, + { + "epoch": 0.09821644742151561, + "grad_norm": 0.8749501705169678, + "learning_rate": 0.00019844924117434998, + "loss": 2.7818, + "step": 1217 + }, + { + "epoch": 0.09829715115809862, + "grad_norm": 0.8130955100059509, + "learning_rate": 0.0001984464704967927, + "loss": 2.8581, + "step": 1218 + }, + { + "epoch": 0.09837785489468162, + "grad_norm": 0.8158220648765564, + "learning_rate": 0.00019844369736569196, + "loss": 2.7704, + "step": 1219 + }, + { + "epoch": 0.09845855863126463, + "grad_norm": 0.9351849555969238, + "learning_rate": 0.00019844092178111702, + "loss": 2.7857, + "step": 1220 + }, + { + "epoch": 0.09853926236784763, + "grad_norm": 0.8373914957046509, + "learning_rate": 0.00019843814374313697, + "loss": 2.8217, + "step": 1221 + }, + { + "epoch": 0.09861996610443063, + "grad_norm": 0.8919960856437683, + "learning_rate": 0.00019843536325182104, + "loss": 2.7914, + "step": 1222 + }, + { + "epoch": 0.09870066984101364, + "grad_norm": 0.9994316697120667, + "learning_rate": 0.00019843258030723858, + "loss": 2.7981, + "step": 1223 + }, + { + "epoch": 0.09878137357759664, + "grad_norm": 0.8144915699958801, + "learning_rate": 0.0001984297949094589, + "loss": 2.811, + "step": 1224 + }, + { + "epoch": 0.09886207731417965, + "grad_norm": 0.8957876563072205, + "learning_rate": 0.0001984270070585514, + "loss": 2.7752, + "step": 1225 + }, + { + "epoch": 0.09894278105076265, + "grad_norm": 0.9426520466804504, + "learning_rate": 0.0001984242167545856, + "loss": 2.8139, + "step": 1226 + }, + { + "epoch": 0.09902348478734566, + "grad_norm": 0.888769268989563, + "learning_rate": 0.00019842142399763106, + "loss": 2.8305, + "step": 1227 + }, + { + "epoch": 0.09910418852392866, + "grad_norm": 0.9497748613357544, + "learning_rate": 0.00019841862878775736, + "loss": 2.748, + "step": 1228 + }, + { + "epoch": 0.09918489226051166, + "grad_norm": 0.8715065717697144, + "learning_rate": 0.00019841583112503416, + "loss": 2.7794, + "step": 1229 + }, + { + "epoch": 0.09926559599709467, + "grad_norm": 0.875599205493927, + "learning_rate": 0.00019841303100953116, + "loss": 2.8016, + "step": 1230 + }, + { + "epoch": 0.09934629973367767, + "grad_norm": 0.8631919622421265, + "learning_rate": 0.0001984102284413182, + "loss": 2.8239, + "step": 1231 + }, + { + "epoch": 0.09942700347026068, + "grad_norm": 0.9028074741363525, + "learning_rate": 0.0001984074234204651, + "loss": 2.8372, + "step": 1232 + }, + { + "epoch": 0.09950770720684368, + "grad_norm": 0.890933096408844, + "learning_rate": 0.00019840461594704175, + "loss": 2.799, + "step": 1233 + }, + { + "epoch": 0.09958841094342669, + "grad_norm": 0.9626480340957642, + "learning_rate": 0.00019840180602111816, + "loss": 2.8207, + "step": 1234 + }, + { + "epoch": 0.09966911468000969, + "grad_norm": 0.798394501209259, + "learning_rate": 0.00019839899364276433, + "loss": 2.7784, + "step": 1235 + }, + { + "epoch": 0.0997498184165927, + "grad_norm": 0.8246447443962097, + "learning_rate": 0.00019839617881205036, + "loss": 2.8193, + "step": 1236 + }, + { + "epoch": 0.09983052215317569, + "grad_norm": 0.8315989375114441, + "learning_rate": 0.0001983933615290464, + "loss": 2.8036, + "step": 1237 + }, + { + "epoch": 0.09991122588975869, + "grad_norm": 0.8889075517654419, + "learning_rate": 0.00019839054179382267, + "loss": 2.7606, + "step": 1238 + }, + { + "epoch": 0.0999919296263417, + "grad_norm": 0.7558645009994507, + "learning_rate": 0.00019838771960644942, + "loss": 2.7666, + "step": 1239 + }, + { + "epoch": 0.1000726333629247, + "grad_norm": 0.8876601457595825, + "learning_rate": 0.00019838489496699704, + "loss": 2.8778, + "step": 1240 + }, + { + "epoch": 0.1001533370995077, + "grad_norm": 0.8609516620635986, + "learning_rate": 0.00019838206787553588, + "loss": 2.8189, + "step": 1241 + }, + { + "epoch": 0.10023404083609071, + "grad_norm": 0.8521148562431335, + "learning_rate": 0.00019837923833213644, + "loss": 2.8159, + "step": 1242 + }, + { + "epoch": 0.10031474457267371, + "grad_norm": 0.9155359268188477, + "learning_rate": 0.0001983764063368692, + "loss": 2.8351, + "step": 1243 + }, + { + "epoch": 0.10039544830925672, + "grad_norm": 0.8595378398895264, + "learning_rate": 0.00019837357188980475, + "loss": 2.8447, + "step": 1244 + }, + { + "epoch": 0.10047615204583972, + "grad_norm": 0.900244951248169, + "learning_rate": 0.00019837073499101373, + "loss": 2.8646, + "step": 1245 + }, + { + "epoch": 0.10055685578242272, + "grad_norm": 0.8404260277748108, + "learning_rate": 0.00019836789564056689, + "loss": 2.7824, + "step": 1246 + }, + { + "epoch": 0.10063755951900573, + "grad_norm": 0.8776196241378784, + "learning_rate": 0.0001983650538385349, + "loss": 2.8045, + "step": 1247 + }, + { + "epoch": 0.10071826325558873, + "grad_norm": 0.8889327049255371, + "learning_rate": 0.00019836220958498868, + "loss": 2.7967, + "step": 1248 + }, + { + "epoch": 0.10079896699217174, + "grad_norm": 0.8905191421508789, + "learning_rate": 0.00019835936287999906, + "loss": 2.8167, + "step": 1249 + }, + { + "epoch": 0.10087967072875474, + "grad_norm": 0.839970052242279, + "learning_rate": 0.000198356513723637, + "loss": 2.8643, + "step": 1250 + }, + { + "epoch": 0.10096037446533775, + "grad_norm": 0.7989531755447388, + "learning_rate": 0.00019835366211597353, + "loss": 2.8493, + "step": 1251 + }, + { + "epoch": 0.10104107820192075, + "grad_norm": 0.7960095405578613, + "learning_rate": 0.0001983508080570797, + "loss": 2.7377, + "step": 1252 + }, + { + "epoch": 0.10112178193850375, + "grad_norm": 0.7989903092384338, + "learning_rate": 0.00019834795154702661, + "loss": 2.7409, + "step": 1253 + }, + { + "epoch": 0.10120248567508676, + "grad_norm": 0.8557813167572021, + "learning_rate": 0.0001983450925858855, + "loss": 2.7945, + "step": 1254 + }, + { + "epoch": 0.10128318941166976, + "grad_norm": 0.948357880115509, + "learning_rate": 0.0001983422311737276, + "loss": 2.826, + "step": 1255 + }, + { + "epoch": 0.10136389314825277, + "grad_norm": 0.8356020450592041, + "learning_rate": 0.00019833936731062423, + "loss": 2.8157, + "step": 1256 + }, + { + "epoch": 0.10144459688483577, + "grad_norm": 0.8199872970581055, + "learning_rate": 0.00019833650099664678, + "loss": 2.7273, + "step": 1257 + }, + { + "epoch": 0.10152530062141878, + "grad_norm": 0.8178466558456421, + "learning_rate": 0.00019833363223186669, + "loss": 2.7513, + "step": 1258 + }, + { + "epoch": 0.10160600435800178, + "grad_norm": 0.8165889978408813, + "learning_rate": 0.00019833076101635538, + "loss": 2.7689, + "step": 1259 + }, + { + "epoch": 0.10168670809458479, + "grad_norm": 0.8240275979042053, + "learning_rate": 0.0001983278873501845, + "loss": 2.7477, + "step": 1260 + }, + { + "epoch": 0.10176741183116779, + "grad_norm": 0.8470584750175476, + "learning_rate": 0.00019832501123342563, + "loss": 2.7414, + "step": 1261 + }, + { + "epoch": 0.1018481155677508, + "grad_norm": 0.819063663482666, + "learning_rate": 0.00019832213266615046, + "loss": 2.7335, + "step": 1262 + }, + { + "epoch": 0.10192881930433378, + "grad_norm": 0.8045673370361328, + "learning_rate": 0.00019831925164843071, + "loss": 2.8141, + "step": 1263 + }, + { + "epoch": 0.10200952304091679, + "grad_norm": 0.7827214598655701, + "learning_rate": 0.00019831636818033824, + "loss": 2.7549, + "step": 1264 + }, + { + "epoch": 0.10209022677749979, + "grad_norm": 0.9596436619758606, + "learning_rate": 0.00019831348226194485, + "loss": 2.7327, + "step": 1265 + }, + { + "epoch": 0.1021709305140828, + "grad_norm": 0.826909601688385, + "learning_rate": 0.0001983105938933225, + "loss": 2.7166, + "step": 1266 + }, + { + "epoch": 0.1022516342506658, + "grad_norm": 0.8060985207557678, + "learning_rate": 0.00019830770307454313, + "loss": 2.7514, + "step": 1267 + }, + { + "epoch": 0.1023323379872488, + "grad_norm": 0.8257390856742859, + "learning_rate": 0.00019830480980567887, + "loss": 2.77, + "step": 1268 + }, + { + "epoch": 0.10241304172383181, + "grad_norm": 0.844406008720398, + "learning_rate": 0.00019830191408680173, + "loss": 2.8548, + "step": 1269 + }, + { + "epoch": 0.10249374546041481, + "grad_norm": 0.84171462059021, + "learning_rate": 0.00019829901591798398, + "loss": 2.7404, + "step": 1270 + }, + { + "epoch": 0.10257444919699782, + "grad_norm": 0.8084118962287903, + "learning_rate": 0.00019829611529929774, + "loss": 2.8078, + "step": 1271 + }, + { + "epoch": 0.10265515293358082, + "grad_norm": 0.8273561000823975, + "learning_rate": 0.00019829321223081538, + "loss": 2.787, + "step": 1272 + }, + { + "epoch": 0.10273585667016383, + "grad_norm": 0.799098551273346, + "learning_rate": 0.00019829030671260925, + "loss": 2.7563, + "step": 1273 + }, + { + "epoch": 0.10281656040674683, + "grad_norm": 0.885866105556488, + "learning_rate": 0.00019828739874475172, + "loss": 2.7313, + "step": 1274 + }, + { + "epoch": 0.10289726414332984, + "grad_norm": 0.7702760696411133, + "learning_rate": 0.00019828448832731529, + "loss": 2.7919, + "step": 1275 + }, + { + "epoch": 0.10297796787991284, + "grad_norm": 0.7577444911003113, + "learning_rate": 0.0001982815754603725, + "loss": 2.7149, + "step": 1276 + }, + { + "epoch": 0.10305867161649584, + "grad_norm": 0.8439713716506958, + "learning_rate": 0.00019827866014399592, + "loss": 2.7881, + "step": 1277 + }, + { + "epoch": 0.10313937535307885, + "grad_norm": 0.8504937291145325, + "learning_rate": 0.00019827574237825827, + "loss": 2.7611, + "step": 1278 + }, + { + "epoch": 0.10322007908966185, + "grad_norm": 0.7775665521621704, + "learning_rate": 0.00019827282216323218, + "loss": 2.7312, + "step": 1279 + }, + { + "epoch": 0.10330078282624486, + "grad_norm": 0.8671591281890869, + "learning_rate": 0.00019826989949899048, + "loss": 2.836, + "step": 1280 + }, + { + "epoch": 0.10338148656282786, + "grad_norm": 0.9308713674545288, + "learning_rate": 0.00019826697438560603, + "loss": 2.7494, + "step": 1281 + }, + { + "epoch": 0.10346219029941087, + "grad_norm": 0.9145268797874451, + "learning_rate": 0.0001982640468231517, + "loss": 2.8054, + "step": 1282 + }, + { + "epoch": 0.10354289403599387, + "grad_norm": 0.8150805234909058, + "learning_rate": 0.00019826111681170043, + "loss": 2.7879, + "step": 1283 + }, + { + "epoch": 0.10362359777257688, + "grad_norm": 0.8576685786247253, + "learning_rate": 0.00019825818435132531, + "loss": 2.8184, + "step": 1284 + }, + { + "epoch": 0.10370430150915988, + "grad_norm": 0.8838599920272827, + "learning_rate": 0.00019825524944209937, + "loss": 2.7838, + "step": 1285 + }, + { + "epoch": 0.10378500524574288, + "grad_norm": 0.9119304418563843, + "learning_rate": 0.00019825231208409576, + "loss": 2.8392, + "step": 1286 + }, + { + "epoch": 0.10386570898232589, + "grad_norm": 0.8112398982048035, + "learning_rate": 0.00019824937227738771, + "loss": 2.7844, + "step": 1287 + }, + { + "epoch": 0.10394641271890888, + "grad_norm": 0.8714308738708496, + "learning_rate": 0.00019824643002204847, + "loss": 2.7765, + "step": 1288 + }, + { + "epoch": 0.10402711645549188, + "grad_norm": 0.8733358979225159, + "learning_rate": 0.00019824348531815138, + "loss": 2.771, + "step": 1289 + }, + { + "epoch": 0.10410782019207489, + "grad_norm": 0.8218281269073486, + "learning_rate": 0.00019824053816576981, + "loss": 2.8099, + "step": 1290 + }, + { + "epoch": 0.10418852392865789, + "grad_norm": 0.8647308945655823, + "learning_rate": 0.00019823758856497725, + "loss": 2.7738, + "step": 1291 + }, + { + "epoch": 0.1042692276652409, + "grad_norm": 0.8358582854270935, + "learning_rate": 0.00019823463651584718, + "loss": 2.8021, + "step": 1292 + }, + { + "epoch": 0.1043499314018239, + "grad_norm": 0.7943673133850098, + "learning_rate": 0.00019823168201845318, + "loss": 2.8293, + "step": 1293 + }, + { + "epoch": 0.1044306351384069, + "grad_norm": 0.8501425981521606, + "learning_rate": 0.0001982287250728689, + "loss": 2.7701, + "step": 1294 + }, + { + "epoch": 0.10451133887498991, + "grad_norm": 0.8503665328025818, + "learning_rate": 0.00019822576567916797, + "loss": 2.7881, + "step": 1295 + }, + { + "epoch": 0.10459204261157291, + "grad_norm": 0.9687628149986267, + "learning_rate": 0.0001982228038374242, + "loss": 2.7623, + "step": 1296 + }, + { + "epoch": 0.10467274634815592, + "grad_norm": 0.8034376502037048, + "learning_rate": 0.00019821983954771146, + "loss": 2.8072, + "step": 1297 + }, + { + "epoch": 0.10475345008473892, + "grad_norm": 0.817135214805603, + "learning_rate": 0.00019821687281010352, + "loss": 2.7572, + "step": 1298 + }, + { + "epoch": 0.10483415382132193, + "grad_norm": 0.7961457371711731, + "learning_rate": 0.0001982139036246744, + "loss": 2.8405, + "step": 1299 + }, + { + "epoch": 0.10491485755790493, + "grad_norm": 0.7572407722473145, + "learning_rate": 0.00019821093199149804, + "loss": 2.7495, + "step": 1300 + }, + { + "epoch": 0.10499556129448794, + "grad_norm": 0.7990664839744568, + "learning_rate": 0.00019820795791064856, + "loss": 2.7567, + "step": 1301 + }, + { + "epoch": 0.10507626503107094, + "grad_norm": 0.8197236061096191, + "learning_rate": 0.0001982049813822, + "loss": 2.7807, + "step": 1302 + }, + { + "epoch": 0.10515696876765394, + "grad_norm": 0.9491304159164429, + "learning_rate": 0.00019820200240622664, + "loss": 2.8531, + "step": 1303 + }, + { + "epoch": 0.10523767250423695, + "grad_norm": 0.8143845200538635, + "learning_rate": 0.00019819902098280268, + "loss": 2.7542, + "step": 1304 + }, + { + "epoch": 0.10531837624081995, + "grad_norm": 0.9055941104888916, + "learning_rate": 0.0001981960371120024, + "loss": 2.863, + "step": 1305 + }, + { + "epoch": 0.10539907997740296, + "grad_norm": 0.7804721593856812, + "learning_rate": 0.0001981930507939002, + "loss": 2.8213, + "step": 1306 + }, + { + "epoch": 0.10547978371398596, + "grad_norm": 0.8375318050384521, + "learning_rate": 0.00019819006202857046, + "loss": 2.8222, + "step": 1307 + }, + { + "epoch": 0.10556048745056897, + "grad_norm": 0.9145569801330566, + "learning_rate": 0.00019818707081608773, + "loss": 2.805, + "step": 1308 + }, + { + "epoch": 0.10564119118715197, + "grad_norm": 0.7899324893951416, + "learning_rate": 0.00019818407715652654, + "loss": 2.8246, + "step": 1309 + }, + { + "epoch": 0.10572189492373497, + "grad_norm": 0.7843480110168457, + "learning_rate": 0.0001981810810499615, + "loss": 2.7909, + "step": 1310 + }, + { + "epoch": 0.10580259866031798, + "grad_norm": 0.8071008920669556, + "learning_rate": 0.00019817808249646723, + "loss": 2.7434, + "step": 1311 + }, + { + "epoch": 0.10588330239690098, + "grad_norm": 0.8682011961936951, + "learning_rate": 0.0001981750814961185, + "loss": 2.8387, + "step": 1312 + }, + { + "epoch": 0.10596400613348399, + "grad_norm": 0.7501091361045837, + "learning_rate": 0.0001981720780489902, + "loss": 2.7633, + "step": 1313 + }, + { + "epoch": 0.10604470987006698, + "grad_norm": 0.9259567856788635, + "learning_rate": 0.000198169072155157, + "loss": 2.8309, + "step": 1314 + }, + { + "epoch": 0.10612541360664998, + "grad_norm": 0.8018674254417419, + "learning_rate": 0.00019816606381469393, + "loss": 2.8647, + "step": 1315 + }, + { + "epoch": 0.10620611734323299, + "grad_norm": 0.8218088746070862, + "learning_rate": 0.00019816305302767595, + "loss": 2.823, + "step": 1316 + }, + { + "epoch": 0.10628682107981599, + "grad_norm": 0.812125027179718, + "learning_rate": 0.00019816003979417808, + "loss": 2.7216, + "step": 1317 + }, + { + "epoch": 0.106367524816399, + "grad_norm": 0.787407636642456, + "learning_rate": 0.0001981570241142754, + "loss": 2.7639, + "step": 1318 + }, + { + "epoch": 0.106448228552982, + "grad_norm": 0.7982528805732727, + "learning_rate": 0.00019815400598804312, + "loss": 2.8597, + "step": 1319 + }, + { + "epoch": 0.106528932289565, + "grad_norm": 0.8490404486656189, + "learning_rate": 0.00019815098541555646, + "loss": 2.7947, + "step": 1320 + }, + { + "epoch": 0.10660963602614801, + "grad_norm": 0.8743172883987427, + "learning_rate": 0.00019814796239689064, + "loss": 2.8674, + "step": 1321 + }, + { + "epoch": 0.10669033976273101, + "grad_norm": 0.8338125348091125, + "learning_rate": 0.00019814493693212106, + "loss": 2.781, + "step": 1322 + }, + { + "epoch": 0.10677104349931402, + "grad_norm": 0.871516764163971, + "learning_rate": 0.00019814190902132307, + "loss": 2.8742, + "step": 1323 + }, + { + "epoch": 0.10685174723589702, + "grad_norm": 0.8935555815696716, + "learning_rate": 0.00019813887866457216, + "loss": 2.7991, + "step": 1324 + }, + { + "epoch": 0.10693245097248003, + "grad_norm": 0.840067446231842, + "learning_rate": 0.00019813584586194388, + "loss": 2.7922, + "step": 1325 + }, + { + "epoch": 0.10701315470906303, + "grad_norm": 0.7919262647628784, + "learning_rate": 0.0001981328106135138, + "loss": 2.7912, + "step": 1326 + }, + { + "epoch": 0.10709385844564603, + "grad_norm": 0.7974550127983093, + "learning_rate": 0.00019812977291935752, + "loss": 2.8497, + "step": 1327 + }, + { + "epoch": 0.10717456218222904, + "grad_norm": 0.9126157164573669, + "learning_rate": 0.00019812673277955082, + "loss": 2.7698, + "step": 1328 + }, + { + "epoch": 0.10725526591881204, + "grad_norm": 0.8329752683639526, + "learning_rate": 0.0001981236901941694, + "loss": 2.8366, + "step": 1329 + }, + { + "epoch": 0.10733596965539505, + "grad_norm": 0.8313524127006531, + "learning_rate": 0.00019812064516328915, + "loss": 2.6863, + "step": 1330 + }, + { + "epoch": 0.10741667339197805, + "grad_norm": 0.8917783498764038, + "learning_rate": 0.0001981175976869859, + "loss": 2.7817, + "step": 1331 + }, + { + "epoch": 0.10749737712856106, + "grad_norm": 0.8370450735092163, + "learning_rate": 0.00019811454776533566, + "loss": 2.837, + "step": 1332 + }, + { + "epoch": 0.10757808086514406, + "grad_norm": 0.8415676355361938, + "learning_rate": 0.00019811149539841443, + "loss": 2.7399, + "step": 1333 + }, + { + "epoch": 0.10765878460172706, + "grad_norm": 0.8576632142066956, + "learning_rate": 0.00019810844058629825, + "loss": 2.7747, + "step": 1334 + }, + { + "epoch": 0.10773948833831007, + "grad_norm": 0.8943549394607544, + "learning_rate": 0.00019810538332906328, + "loss": 2.7368, + "step": 1335 + }, + { + "epoch": 0.10782019207489307, + "grad_norm": 0.8878718018531799, + "learning_rate": 0.00019810232362678568, + "loss": 2.7907, + "step": 1336 + }, + { + "epoch": 0.10790089581147608, + "grad_norm": 0.8131409287452698, + "learning_rate": 0.00019809926147954174, + "loss": 2.7782, + "step": 1337 + }, + { + "epoch": 0.10798159954805908, + "grad_norm": 0.8733747005462646, + "learning_rate": 0.0001980961968874078, + "loss": 2.8552, + "step": 1338 + }, + { + "epoch": 0.10806230328464207, + "grad_norm": 0.8997320532798767, + "learning_rate": 0.0001980931298504602, + "loss": 2.8452, + "step": 1339 + }, + { + "epoch": 0.10814300702122508, + "grad_norm": 0.8400282263755798, + "learning_rate": 0.00019809006036877538, + "loss": 2.786, + "step": 1340 + }, + { + "epoch": 0.10822371075780808, + "grad_norm": 0.8173925280570984, + "learning_rate": 0.00019808698844242983, + "loss": 2.8363, + "step": 1341 + }, + { + "epoch": 0.10830441449439109, + "grad_norm": 0.872278094291687, + "learning_rate": 0.00019808391407150015, + "loss": 2.7789, + "step": 1342 + }, + { + "epoch": 0.10838511823097409, + "grad_norm": 0.8939952254295349, + "learning_rate": 0.00019808083725606293, + "loss": 2.7453, + "step": 1343 + }, + { + "epoch": 0.1084658219675571, + "grad_norm": 0.8351218104362488, + "learning_rate": 0.00019807775799619484, + "loss": 2.8004, + "step": 1344 + }, + { + "epoch": 0.1085465257041401, + "grad_norm": 0.8381102681159973, + "learning_rate": 0.00019807467629197266, + "loss": 2.8155, + "step": 1345 + }, + { + "epoch": 0.1086272294407231, + "grad_norm": 0.869458019733429, + "learning_rate": 0.00019807159214347317, + "loss": 2.8219, + "step": 1346 + }, + { + "epoch": 0.10870793317730611, + "grad_norm": 0.8251017928123474, + "learning_rate": 0.00019806850555077326, + "loss": 2.7978, + "step": 1347 + }, + { + "epoch": 0.10878863691388911, + "grad_norm": 0.8056492209434509, + "learning_rate": 0.0001980654165139498, + "loss": 2.7994, + "step": 1348 + }, + { + "epoch": 0.10886934065047212, + "grad_norm": 0.9566174745559692, + "learning_rate": 0.00019806232503307984, + "loss": 2.794, + "step": 1349 + }, + { + "epoch": 0.10895004438705512, + "grad_norm": 0.7891408801078796, + "learning_rate": 0.0001980592311082404, + "loss": 2.7134, + "step": 1350 + }, + { + "epoch": 0.10903074812363812, + "grad_norm": 0.8894741535186768, + "learning_rate": 0.00019805613473950862, + "loss": 2.7829, + "step": 1351 + }, + { + "epoch": 0.10911145186022113, + "grad_norm": 0.893086850643158, + "learning_rate": 0.0001980530359269616, + "loss": 2.7475, + "step": 1352 + }, + { + "epoch": 0.10919215559680413, + "grad_norm": 0.8758537173271179, + "learning_rate": 0.00019804993467067666, + "loss": 2.8715, + "step": 1353 + }, + { + "epoch": 0.10927285933338714, + "grad_norm": 0.9304648041725159, + "learning_rate": 0.00019804683097073098, + "loss": 2.8051, + "step": 1354 + }, + { + "epoch": 0.10935356306997014, + "grad_norm": 0.8465876579284668, + "learning_rate": 0.00019804372482720202, + "loss": 2.7879, + "step": 1355 + }, + { + "epoch": 0.10943426680655315, + "grad_norm": 0.8485612273216248, + "learning_rate": 0.00019804061624016713, + "loss": 2.7783, + "step": 1356 + }, + { + "epoch": 0.10951497054313615, + "grad_norm": 0.835630476474762, + "learning_rate": 0.0001980375052097038, + "loss": 2.8116, + "step": 1357 + }, + { + "epoch": 0.10959567427971915, + "grad_norm": 0.8404836058616638, + "learning_rate": 0.00019803439173588956, + "loss": 2.8257, + "step": 1358 + }, + { + "epoch": 0.10967637801630216, + "grad_norm": 0.8048505783081055, + "learning_rate": 0.00019803127581880206, + "loss": 2.7762, + "step": 1359 + }, + { + "epoch": 0.10975708175288516, + "grad_norm": 0.8481776118278503, + "learning_rate": 0.00019802815745851885, + "loss": 2.8243, + "step": 1360 + }, + { + "epoch": 0.10983778548946817, + "grad_norm": 0.8565996885299683, + "learning_rate": 0.00019802503665511775, + "loss": 2.7958, + "step": 1361 + }, + { + "epoch": 0.10991848922605117, + "grad_norm": 0.8867515921592712, + "learning_rate": 0.0001980219134086765, + "loss": 2.7973, + "step": 1362 + }, + { + "epoch": 0.10999919296263418, + "grad_norm": 0.8459765911102295, + "learning_rate": 0.0001980187877192729, + "loss": 2.848, + "step": 1363 + }, + { + "epoch": 0.11007989669921718, + "grad_norm": 0.7929832339286804, + "learning_rate": 0.0001980156595869849, + "loss": 2.8583, + "step": 1364 + }, + { + "epoch": 0.11016060043580017, + "grad_norm": 0.8475651741027832, + "learning_rate": 0.00019801252901189043, + "loss": 2.8436, + "step": 1365 + }, + { + "epoch": 0.11024130417238318, + "grad_norm": 0.8545576333999634, + "learning_rate": 0.00019800939599406755, + "loss": 2.7457, + "step": 1366 + }, + { + "epoch": 0.11032200790896618, + "grad_norm": 1.0093715190887451, + "learning_rate": 0.00019800626053359435, + "loss": 2.8198, + "step": 1367 + }, + { + "epoch": 0.11040271164554918, + "grad_norm": 0.8728145956993103, + "learning_rate": 0.0001980031226305489, + "loss": 2.7794, + "step": 1368 + }, + { + "epoch": 0.11048341538213219, + "grad_norm": 0.8538581728935242, + "learning_rate": 0.00019799998228500946, + "loss": 2.8018, + "step": 1369 + }, + { + "epoch": 0.11056411911871519, + "grad_norm": 0.9452785849571228, + "learning_rate": 0.00019799683949705432, + "loss": 2.8173, + "step": 1370 + }, + { + "epoch": 0.1106448228552982, + "grad_norm": 0.806508481502533, + "learning_rate": 0.00019799369426676174, + "loss": 2.8192, + "step": 1371 + }, + { + "epoch": 0.1107255265918812, + "grad_norm": 0.8952856063842773, + "learning_rate": 0.00019799054659421018, + "loss": 2.8072, + "step": 1372 + }, + { + "epoch": 0.1108062303284642, + "grad_norm": 0.8863561749458313, + "learning_rate": 0.00019798739647947802, + "loss": 2.7836, + "step": 1373 + }, + { + "epoch": 0.11088693406504721, + "grad_norm": 0.8544357419013977, + "learning_rate": 0.00019798424392264378, + "loss": 2.7714, + "step": 1374 + }, + { + "epoch": 0.11096763780163021, + "grad_norm": 0.807546854019165, + "learning_rate": 0.00019798108892378607, + "loss": 2.7635, + "step": 1375 + }, + { + "epoch": 0.11104834153821322, + "grad_norm": 0.8198233246803284, + "learning_rate": 0.0001979779314829835, + "loss": 2.8253, + "step": 1376 + }, + { + "epoch": 0.11112904527479622, + "grad_norm": 0.9268671870231628, + "learning_rate": 0.00019797477160031477, + "loss": 2.8007, + "step": 1377 + }, + { + "epoch": 0.11120974901137923, + "grad_norm": 0.8547680974006653, + "learning_rate": 0.0001979716092758586, + "loss": 2.7749, + "step": 1378 + }, + { + "epoch": 0.11129045274796223, + "grad_norm": 0.8052394390106201, + "learning_rate": 0.00019796844450969384, + "loss": 2.763, + "step": 1379 + }, + { + "epoch": 0.11137115648454524, + "grad_norm": 0.8291144371032715, + "learning_rate": 0.00019796527730189936, + "loss": 2.8053, + "step": 1380 + }, + { + "epoch": 0.11145186022112824, + "grad_norm": 0.8114006519317627, + "learning_rate": 0.00019796210765255404, + "loss": 2.8047, + "step": 1381 + }, + { + "epoch": 0.11153256395771124, + "grad_norm": 0.9326293468475342, + "learning_rate": 0.00019795893556173697, + "loss": 2.8199, + "step": 1382 + }, + { + "epoch": 0.11161326769429425, + "grad_norm": 0.7702555656433105, + "learning_rate": 0.00019795576102952714, + "loss": 2.7909, + "step": 1383 + }, + { + "epoch": 0.11169397143087725, + "grad_norm": 0.8115492463111877, + "learning_rate": 0.0001979525840560037, + "loss": 2.748, + "step": 1384 + }, + { + "epoch": 0.11177467516746026, + "grad_norm": 0.8926187753677368, + "learning_rate": 0.0001979494046412458, + "loss": 2.7791, + "step": 1385 + }, + { + "epoch": 0.11185537890404326, + "grad_norm": 0.8549754023551941, + "learning_rate": 0.0001979462227853327, + "loss": 2.7989, + "step": 1386 + }, + { + "epoch": 0.11193608264062627, + "grad_norm": 0.8625262975692749, + "learning_rate": 0.0001979430384883437, + "loss": 2.7202, + "step": 1387 + }, + { + "epoch": 0.11201678637720927, + "grad_norm": 0.8134698867797852, + "learning_rate": 0.00019793985175035813, + "loss": 2.8008, + "step": 1388 + }, + { + "epoch": 0.11209749011379228, + "grad_norm": 0.8546617031097412, + "learning_rate": 0.00019793666257145547, + "loss": 2.8076, + "step": 1389 + }, + { + "epoch": 0.11217819385037527, + "grad_norm": 0.8003748059272766, + "learning_rate": 0.00019793347095171514, + "loss": 2.826, + "step": 1390 + }, + { + "epoch": 0.11225889758695827, + "grad_norm": 0.8116614818572998, + "learning_rate": 0.00019793027689121674, + "loss": 2.7096, + "step": 1391 + }, + { + "epoch": 0.11233960132354127, + "grad_norm": 0.7785829901695251, + "learning_rate": 0.00019792708039003984, + "loss": 2.748, + "step": 1392 + }, + { + "epoch": 0.11242030506012428, + "grad_norm": 0.7999277710914612, + "learning_rate": 0.0001979238814482641, + "loss": 2.7671, + "step": 1393 + }, + { + "epoch": 0.11250100879670728, + "grad_norm": 0.8862190842628479, + "learning_rate": 0.00019792068006596925, + "loss": 2.8484, + "step": 1394 + }, + { + "epoch": 0.11258171253329029, + "grad_norm": 0.8747627139091492, + "learning_rate": 0.00019791747624323512, + "loss": 2.7477, + "step": 1395 + }, + { + "epoch": 0.11266241626987329, + "grad_norm": 0.8280831575393677, + "learning_rate": 0.0001979142699801415, + "loss": 2.87, + "step": 1396 + }, + { + "epoch": 0.1127431200064563, + "grad_norm": 0.8069074153900146, + "learning_rate": 0.00019791106127676832, + "loss": 2.7724, + "step": 1397 + }, + { + "epoch": 0.1128238237430393, + "grad_norm": 0.8253301382064819, + "learning_rate": 0.00019790785013319557, + "loss": 2.7351, + "step": 1398 + }, + { + "epoch": 0.1129045274796223, + "grad_norm": 0.8298853635787964, + "learning_rate": 0.00019790463654950323, + "loss": 2.7709, + "step": 1399 + }, + { + "epoch": 0.11298523121620531, + "grad_norm": 0.7796407341957092, + "learning_rate": 0.0001979014205257715, + "loss": 2.7766, + "step": 1400 + }, + { + "epoch": 0.11306593495278831, + "grad_norm": 0.8922166228294373, + "learning_rate": 0.00019789820206208037, + "loss": 2.8473, + "step": 1401 + }, + { + "epoch": 0.11314663868937132, + "grad_norm": 0.7763219475746155, + "learning_rate": 0.00019789498115851015, + "loss": 2.8629, + "step": 1402 + }, + { + "epoch": 0.11322734242595432, + "grad_norm": 0.8679928779602051, + "learning_rate": 0.0001978917578151411, + "loss": 2.8017, + "step": 1403 + }, + { + "epoch": 0.11330804616253733, + "grad_norm": 0.8491933941841125, + "learning_rate": 0.00019788853203205357, + "loss": 2.7156, + "step": 1404 + }, + { + "epoch": 0.11338874989912033, + "grad_norm": 0.8271194696426392, + "learning_rate": 0.00019788530380932792, + "loss": 2.7892, + "step": 1405 + }, + { + "epoch": 0.11346945363570334, + "grad_norm": 0.9224163293838501, + "learning_rate": 0.00019788207314704463, + "loss": 2.7824, + "step": 1406 + }, + { + "epoch": 0.11355015737228634, + "grad_norm": 0.7662777900695801, + "learning_rate": 0.00019787884004528422, + "loss": 2.7364, + "step": 1407 + }, + { + "epoch": 0.11363086110886934, + "grad_norm": 0.8750362396240234, + "learning_rate": 0.00019787560450412728, + "loss": 2.7546, + "step": 1408 + }, + { + "epoch": 0.11371156484545235, + "grad_norm": 0.9158821105957031, + "learning_rate": 0.0001978723665236544, + "loss": 2.8304, + "step": 1409 + }, + { + "epoch": 0.11379226858203535, + "grad_norm": 0.8291050791740417, + "learning_rate": 0.0001978691261039463, + "loss": 2.758, + "step": 1410 + }, + { + "epoch": 0.11387297231861836, + "grad_norm": 0.801886796951294, + "learning_rate": 0.00019786588324508374, + "loss": 2.7805, + "step": 1411 + }, + { + "epoch": 0.11395367605520136, + "grad_norm": 0.8140222430229187, + "learning_rate": 0.00019786263794714757, + "loss": 2.8155, + "step": 1412 + }, + { + "epoch": 0.11403437979178437, + "grad_norm": 0.7747580409049988, + "learning_rate": 0.00019785939021021865, + "loss": 2.778, + "step": 1413 + }, + { + "epoch": 0.11411508352836737, + "grad_norm": 0.8954138159751892, + "learning_rate": 0.0001978561400343779, + "loss": 2.7756, + "step": 1414 + }, + { + "epoch": 0.11419578726495037, + "grad_norm": 0.9038921594619751, + "learning_rate": 0.00019785288741970634, + "loss": 2.7181, + "step": 1415 + }, + { + "epoch": 0.11427649100153336, + "grad_norm": 0.8284393548965454, + "learning_rate": 0.000197849632366285, + "loss": 2.7467, + "step": 1416 + }, + { + "epoch": 0.11435719473811637, + "grad_norm": 0.8996441960334778, + "learning_rate": 0.00019784637487419514, + "loss": 2.7918, + "step": 1417 + }, + { + "epoch": 0.11443789847469937, + "grad_norm": 0.9868448376655579, + "learning_rate": 0.00019784311494351777, + "loss": 2.7687, + "step": 1418 + }, + { + "epoch": 0.11451860221128238, + "grad_norm": 0.8491402864456177, + "learning_rate": 0.0001978398525743342, + "loss": 2.8492, + "step": 1419 + }, + { + "epoch": 0.11459930594786538, + "grad_norm": 1.06125807762146, + "learning_rate": 0.0001978365877667258, + "loss": 2.8041, + "step": 1420 + }, + { + "epoch": 0.11468000968444839, + "grad_norm": 0.8194011449813843, + "learning_rate": 0.00019783332052077386, + "loss": 2.7109, + "step": 1421 + }, + { + "epoch": 0.11476071342103139, + "grad_norm": 0.972620964050293, + "learning_rate": 0.00019783005083655984, + "loss": 2.8107, + "step": 1422 + }, + { + "epoch": 0.1148414171576144, + "grad_norm": 0.925410270690918, + "learning_rate": 0.0001978267787141652, + "loss": 2.7603, + "step": 1423 + }, + { + "epoch": 0.1149221208941974, + "grad_norm": 0.920156717300415, + "learning_rate": 0.00019782350415367152, + "loss": 2.7644, + "step": 1424 + }, + { + "epoch": 0.1150028246307804, + "grad_norm": 0.8617576360702515, + "learning_rate": 0.00019782022715516043, + "loss": 2.769, + "step": 1425 + }, + { + "epoch": 0.11508352836736341, + "grad_norm": 1.0987342596054077, + "learning_rate": 0.00019781694771871356, + "loss": 2.8224, + "step": 1426 + }, + { + "epoch": 0.11516423210394641, + "grad_norm": 0.8418076634407043, + "learning_rate": 0.00019781366584441264, + "loss": 2.7947, + "step": 1427 + }, + { + "epoch": 0.11524493584052942, + "grad_norm": 0.8010901808738708, + "learning_rate": 0.0001978103815323395, + "loss": 2.733, + "step": 1428 + }, + { + "epoch": 0.11532563957711242, + "grad_norm": 0.8649042844772339, + "learning_rate": 0.00019780709478257598, + "loss": 2.7681, + "step": 1429 + }, + { + "epoch": 0.11540634331369543, + "grad_norm": 0.7728127837181091, + "learning_rate": 0.00019780380559520397, + "loss": 2.7795, + "step": 1430 + }, + { + "epoch": 0.11548704705027843, + "grad_norm": 0.7770940065383911, + "learning_rate": 0.00019780051397030545, + "loss": 2.743, + "step": 1431 + }, + { + "epoch": 0.11556775078686143, + "grad_norm": 0.8341890573501587, + "learning_rate": 0.0001977972199079625, + "loss": 2.8047, + "step": 1432 + }, + { + "epoch": 0.11564845452344444, + "grad_norm": 0.7894187569618225, + "learning_rate": 0.00019779392340825717, + "loss": 2.7757, + "step": 1433 + }, + { + "epoch": 0.11572915826002744, + "grad_norm": 0.8002873063087463, + "learning_rate": 0.00019779062447127164, + "loss": 2.7816, + "step": 1434 + }, + { + "epoch": 0.11580986199661045, + "grad_norm": 0.8256075978279114, + "learning_rate": 0.0001977873230970881, + "loss": 2.7839, + "step": 1435 + }, + { + "epoch": 0.11589056573319345, + "grad_norm": 0.8695322871208191, + "learning_rate": 0.0001977840192857889, + "loss": 2.746, + "step": 1436 + }, + { + "epoch": 0.11597126946977646, + "grad_norm": 0.767425537109375, + "learning_rate": 0.00019778071303745628, + "loss": 2.797, + "step": 1437 + }, + { + "epoch": 0.11605197320635946, + "grad_norm": 0.8263241052627563, + "learning_rate": 0.0001977774043521727, + "loss": 2.7702, + "step": 1438 + }, + { + "epoch": 0.11613267694294246, + "grad_norm": 0.8108638525009155, + "learning_rate": 0.0001977740932300206, + "loss": 2.6981, + "step": 1439 + }, + { + "epoch": 0.11621338067952547, + "grad_norm": 0.7945007681846619, + "learning_rate": 0.00019777077967108255, + "loss": 2.7357, + "step": 1440 + }, + { + "epoch": 0.11629408441610846, + "grad_norm": 0.8480326533317566, + "learning_rate": 0.00019776746367544107, + "loss": 2.8563, + "step": 1441 + }, + { + "epoch": 0.11637478815269146, + "grad_norm": 0.8202071785926819, + "learning_rate": 0.00019776414524317882, + "loss": 2.7955, + "step": 1442 + }, + { + "epoch": 0.11645549188927447, + "grad_norm": 0.8202874660491943, + "learning_rate": 0.00019776082437437852, + "loss": 2.765, + "step": 1443 + }, + { + "epoch": 0.11653619562585747, + "grad_norm": 0.8053051829338074, + "learning_rate": 0.00019775750106912294, + "loss": 2.6866, + "step": 1444 + }, + { + "epoch": 0.11661689936244048, + "grad_norm": 0.831968367099762, + "learning_rate": 0.00019775417532749486, + "loss": 2.7022, + "step": 1445 + }, + { + "epoch": 0.11669760309902348, + "grad_norm": 0.8903129696846008, + "learning_rate": 0.00019775084714957725, + "loss": 2.7308, + "step": 1446 + }, + { + "epoch": 0.11677830683560649, + "grad_norm": 0.8178622722625732, + "learning_rate": 0.000197747516535453, + "loss": 2.7446, + "step": 1447 + }, + { + "epoch": 0.11685901057218949, + "grad_norm": 0.8270576596260071, + "learning_rate": 0.00019774418348520508, + "loss": 2.7716, + "step": 1448 + }, + { + "epoch": 0.1169397143087725, + "grad_norm": 0.7965807914733887, + "learning_rate": 0.00019774084799891662, + "loss": 2.7305, + "step": 1449 + }, + { + "epoch": 0.1170204180453555, + "grad_norm": 0.8499472737312317, + "learning_rate": 0.00019773751007667073, + "loss": 2.7584, + "step": 1450 + }, + { + "epoch": 0.1171011217819385, + "grad_norm": 0.8961663842201233, + "learning_rate": 0.0001977341697185506, + "loss": 2.7729, + "step": 1451 + }, + { + "epoch": 0.1171818255185215, + "grad_norm": 1.0203527212142944, + "learning_rate": 0.0001977308269246395, + "loss": 2.727, + "step": 1452 + }, + { + "epoch": 0.11726252925510451, + "grad_norm": 0.953289806842804, + "learning_rate": 0.0001977274816950207, + "loss": 2.8158, + "step": 1453 + }, + { + "epoch": 0.11734323299168752, + "grad_norm": 1.0064597129821777, + "learning_rate": 0.0001977241340297776, + "loss": 2.8743, + "step": 1454 + }, + { + "epoch": 0.11742393672827052, + "grad_norm": 0.8541988730430603, + "learning_rate": 0.00019772078392899363, + "loss": 2.8532, + "step": 1455 + }, + { + "epoch": 0.11750464046485352, + "grad_norm": 0.8351433873176575, + "learning_rate": 0.00019771743139275228, + "loss": 2.7749, + "step": 1456 + }, + { + "epoch": 0.11758534420143653, + "grad_norm": 0.9555812478065491, + "learning_rate": 0.00019771407642113712, + "loss": 2.7408, + "step": 1457 + }, + { + "epoch": 0.11766604793801953, + "grad_norm": 0.7943894267082214, + "learning_rate": 0.0001977107190142317, + "loss": 2.7265, + "step": 1458 + }, + { + "epoch": 0.11774675167460254, + "grad_norm": 0.8636460900306702, + "learning_rate": 0.0001977073591721198, + "loss": 2.8178, + "step": 1459 + }, + { + "epoch": 0.11782745541118554, + "grad_norm": 0.8673834800720215, + "learning_rate": 0.00019770399689488506, + "loss": 2.7928, + "step": 1460 + }, + { + "epoch": 0.11790815914776855, + "grad_norm": 0.9463722705841064, + "learning_rate": 0.00019770063218261133, + "loss": 2.7448, + "step": 1461 + }, + { + "epoch": 0.11798886288435155, + "grad_norm": 0.8429726362228394, + "learning_rate": 0.00019769726503538246, + "loss": 2.7564, + "step": 1462 + }, + { + "epoch": 0.11806956662093455, + "grad_norm": 0.9412201642990112, + "learning_rate": 0.00019769389545328236, + "loss": 2.793, + "step": 1463 + }, + { + "epoch": 0.11815027035751756, + "grad_norm": 0.9112111926078796, + "learning_rate": 0.000197690523436395, + "loss": 2.7787, + "step": 1464 + }, + { + "epoch": 0.11823097409410056, + "grad_norm": 0.8417023420333862, + "learning_rate": 0.00019768714898480444, + "loss": 2.7654, + "step": 1465 + }, + { + "epoch": 0.11831167783068357, + "grad_norm": 0.8275290727615356, + "learning_rate": 0.00019768377209859476, + "loss": 2.7914, + "step": 1466 + }, + { + "epoch": 0.11839238156726656, + "grad_norm": 0.8113142848014832, + "learning_rate": 0.00019768039277785017, + "loss": 2.7516, + "step": 1467 + }, + { + "epoch": 0.11847308530384956, + "grad_norm": 0.8655288219451904, + "learning_rate": 0.0001976770110226548, + "loss": 2.8158, + "step": 1468 + }, + { + "epoch": 0.11855378904043257, + "grad_norm": 0.8063547611236572, + "learning_rate": 0.000197673626833093, + "loss": 2.7624, + "step": 1469 + }, + { + "epoch": 0.11863449277701557, + "grad_norm": 0.843772292137146, + "learning_rate": 0.00019767024020924908, + "loss": 2.86, + "step": 1470 + }, + { + "epoch": 0.11871519651359858, + "grad_norm": 0.7942481637001038, + "learning_rate": 0.0001976668511512075, + "loss": 2.758, + "step": 1471 + }, + { + "epoch": 0.11879590025018158, + "grad_norm": 0.841275155544281, + "learning_rate": 0.00019766345965905268, + "loss": 2.8014, + "step": 1472 + }, + { + "epoch": 0.11887660398676458, + "grad_norm": 0.8003600835800171, + "learning_rate": 0.00019766006573286915, + "loss": 2.7829, + "step": 1473 + }, + { + "epoch": 0.11895730772334759, + "grad_norm": 0.8437239527702332, + "learning_rate": 0.00019765666937274147, + "loss": 2.7706, + "step": 1474 + }, + { + "epoch": 0.11903801145993059, + "grad_norm": 0.8118240833282471, + "learning_rate": 0.00019765327057875433, + "loss": 2.8185, + "step": 1475 + }, + { + "epoch": 0.1191187151965136, + "grad_norm": 0.8051649928092957, + "learning_rate": 0.00019764986935099244, + "loss": 2.7676, + "step": 1476 + }, + { + "epoch": 0.1191994189330966, + "grad_norm": 0.7786862850189209, + "learning_rate": 0.00019764646568954053, + "loss": 2.8069, + "step": 1477 + }, + { + "epoch": 0.1192801226696796, + "grad_norm": 0.8199592232704163, + "learning_rate": 0.0001976430595944834, + "loss": 2.7718, + "step": 1478 + }, + { + "epoch": 0.11936082640626261, + "grad_norm": 0.8696652054786682, + "learning_rate": 0.00019763965106590604, + "loss": 2.7682, + "step": 1479 + }, + { + "epoch": 0.11944153014284561, + "grad_norm": 0.7993931174278259, + "learning_rate": 0.00019763624010389334, + "loss": 2.7607, + "step": 1480 + }, + { + "epoch": 0.11952223387942862, + "grad_norm": 0.8107055425643921, + "learning_rate": 0.0001976328267085303, + "loss": 2.7885, + "step": 1481 + }, + { + "epoch": 0.11960293761601162, + "grad_norm": 0.8189423084259033, + "learning_rate": 0.000197629410879902, + "loss": 2.7332, + "step": 1482 + }, + { + "epoch": 0.11968364135259463, + "grad_norm": 0.9134814143180847, + "learning_rate": 0.0001976259926180936, + "loss": 2.7691, + "step": 1483 + }, + { + "epoch": 0.11976434508917763, + "grad_norm": 0.8642883896827698, + "learning_rate": 0.00019762257192319023, + "loss": 2.7876, + "step": 1484 + }, + { + "epoch": 0.11984504882576064, + "grad_norm": 0.7411352396011353, + "learning_rate": 0.0001976191487952772, + "loss": 2.7577, + "step": 1485 + }, + { + "epoch": 0.11992575256234364, + "grad_norm": 0.7741669416427612, + "learning_rate": 0.00019761572323443978, + "loss": 2.8005, + "step": 1486 + }, + { + "epoch": 0.12000645629892664, + "grad_norm": 0.8195405602455139, + "learning_rate": 0.0001976122952407634, + "loss": 2.7421, + "step": 1487 + }, + { + "epoch": 0.12008716003550965, + "grad_norm": 0.8355886936187744, + "learning_rate": 0.00019760886481433345, + "loss": 2.8156, + "step": 1488 + }, + { + "epoch": 0.12016786377209265, + "grad_norm": 0.8321093916893005, + "learning_rate": 0.00019760543195523542, + "loss": 2.7261, + "step": 1489 + }, + { + "epoch": 0.12024856750867566, + "grad_norm": 0.7792446613311768, + "learning_rate": 0.0001976019966635549, + "loss": 2.7319, + "step": 1490 + }, + { + "epoch": 0.12032927124525866, + "grad_norm": 0.770535409450531, + "learning_rate": 0.00019759855893937748, + "loss": 2.7727, + "step": 1491 + }, + { + "epoch": 0.12040997498184165, + "grad_norm": 0.8168532252311707, + "learning_rate": 0.00019759511878278887, + "loss": 2.7763, + "step": 1492 + }, + { + "epoch": 0.12049067871842466, + "grad_norm": 0.8395755290985107, + "learning_rate": 0.00019759167619387476, + "loss": 2.8382, + "step": 1493 + }, + { + "epoch": 0.12057138245500766, + "grad_norm": 0.8682762384414673, + "learning_rate": 0.00019758823117272097, + "loss": 2.8056, + "step": 1494 + }, + { + "epoch": 0.12065208619159067, + "grad_norm": 0.815192699432373, + "learning_rate": 0.00019758478371941337, + "loss": 2.7602, + "step": 1495 + }, + { + "epoch": 0.12073278992817367, + "grad_norm": 0.7919273376464844, + "learning_rate": 0.00019758133383403786, + "loss": 2.7989, + "step": 1496 + }, + { + "epoch": 0.12081349366475667, + "grad_norm": 1.004387378692627, + "learning_rate": 0.00019757788151668045, + "loss": 2.7765, + "step": 1497 + }, + { + "epoch": 0.12089419740133968, + "grad_norm": 1.0032062530517578, + "learning_rate": 0.00019757442676742715, + "loss": 2.7751, + "step": 1498 + }, + { + "epoch": 0.12097490113792268, + "grad_norm": 0.8797723054885864, + "learning_rate": 0.00019757096958636407, + "loss": 2.7798, + "step": 1499 + }, + { + "epoch": 0.12105560487450569, + "grad_norm": 0.9239820241928101, + "learning_rate": 0.0001975675099735774, + "loss": 2.7976, + "step": 1500 + }, + { + "epoch": 0.12113630861108869, + "grad_norm": 0.9903601408004761, + "learning_rate": 0.00019756404792915328, + "loss": 2.7891, + "step": 1501 + }, + { + "epoch": 0.1212170123476717, + "grad_norm": 0.8402895331382751, + "learning_rate": 0.0001975605834531781, + "loss": 2.8037, + "step": 1502 + }, + { + "epoch": 0.1212977160842547, + "grad_norm": 0.8986102342605591, + "learning_rate": 0.00019755711654573813, + "loss": 2.8375, + "step": 1503 + }, + { + "epoch": 0.1213784198208377, + "grad_norm": 0.8795471787452698, + "learning_rate": 0.0001975536472069198, + "loss": 2.7916, + "step": 1504 + }, + { + "epoch": 0.12145912355742071, + "grad_norm": 0.866278350353241, + "learning_rate": 0.00019755017543680962, + "loss": 2.7884, + "step": 1505 + }, + { + "epoch": 0.12153982729400371, + "grad_norm": 0.7877952456474304, + "learning_rate": 0.00019754670123549398, + "loss": 2.7659, + "step": 1506 + }, + { + "epoch": 0.12162053103058672, + "grad_norm": 0.857155978679657, + "learning_rate": 0.00019754322460305962, + "loss": 2.8029, + "step": 1507 + }, + { + "epoch": 0.12170123476716972, + "grad_norm": 0.8323284387588501, + "learning_rate": 0.00019753974553959314, + "loss": 2.7764, + "step": 1508 + }, + { + "epoch": 0.12178193850375273, + "grad_norm": 0.8557485938072205, + "learning_rate": 0.00019753626404518117, + "loss": 2.7448, + "step": 1509 + }, + { + "epoch": 0.12186264224033573, + "grad_norm": 0.8026818037033081, + "learning_rate": 0.00019753278011991058, + "loss": 2.7323, + "step": 1510 + }, + { + "epoch": 0.12194334597691874, + "grad_norm": 0.8578904271125793, + "learning_rate": 0.00019752929376386816, + "loss": 2.759, + "step": 1511 + }, + { + "epoch": 0.12202404971350174, + "grad_norm": 0.8617175221443176, + "learning_rate": 0.00019752580497714076, + "loss": 2.7641, + "step": 1512 + }, + { + "epoch": 0.12210475345008474, + "grad_norm": 0.8261943459510803, + "learning_rate": 0.00019752231375981538, + "loss": 2.7554, + "step": 1513 + }, + { + "epoch": 0.12218545718666775, + "grad_norm": 0.9984099268913269, + "learning_rate": 0.00019751882011197902, + "loss": 2.763, + "step": 1514 + }, + { + "epoch": 0.12226616092325075, + "grad_norm": 0.8014064431190491, + "learning_rate": 0.00019751532403371874, + "loss": 2.8083, + "step": 1515 + }, + { + "epoch": 0.12234686465983376, + "grad_norm": 0.9276653528213501, + "learning_rate": 0.0001975118255251217, + "loss": 2.8055, + "step": 1516 + }, + { + "epoch": 0.12242756839641676, + "grad_norm": 0.9365193843841553, + "learning_rate": 0.00019750832458627503, + "loss": 2.7397, + "step": 1517 + }, + { + "epoch": 0.12250827213299975, + "grad_norm": 0.8952646851539612, + "learning_rate": 0.00019750482121726605, + "loss": 2.8305, + "step": 1518 + }, + { + "epoch": 0.12258897586958276, + "grad_norm": 0.8395531177520752, + "learning_rate": 0.00019750131541818204, + "loss": 2.7852, + "step": 1519 + }, + { + "epoch": 0.12266967960616576, + "grad_norm": 0.8123572468757629, + "learning_rate": 0.0001974978071891104, + "loss": 2.831, + "step": 1520 + }, + { + "epoch": 0.12275038334274876, + "grad_norm": 0.8716141581535339, + "learning_rate": 0.00019749429653013851, + "loss": 2.8012, + "step": 1521 + }, + { + "epoch": 0.12283108707933177, + "grad_norm": 0.7848379611968994, + "learning_rate": 0.0001974907834413539, + "loss": 2.7812, + "step": 1522 + }, + { + "epoch": 0.12291179081591477, + "grad_norm": 0.834072470664978, + "learning_rate": 0.00019748726792284414, + "loss": 2.7442, + "step": 1523 + }, + { + "epoch": 0.12299249455249778, + "grad_norm": 0.8377225399017334, + "learning_rate": 0.0001974837499746968, + "loss": 2.7967, + "step": 1524 + }, + { + "epoch": 0.12307319828908078, + "grad_norm": 0.8809494376182556, + "learning_rate": 0.0001974802295969996, + "loss": 2.8042, + "step": 1525 + }, + { + "epoch": 0.12315390202566379, + "grad_norm": 0.8504741787910461, + "learning_rate": 0.00019747670678984028, + "loss": 2.7909, + "step": 1526 + }, + { + "epoch": 0.12323460576224679, + "grad_norm": 0.9444355368614197, + "learning_rate": 0.00019747318155330663, + "loss": 2.8567, + "step": 1527 + }, + { + "epoch": 0.1233153094988298, + "grad_norm": 0.859166145324707, + "learning_rate": 0.00019746965388748645, + "loss": 2.8305, + "step": 1528 + }, + { + "epoch": 0.1233960132354128, + "grad_norm": 0.8431086540222168, + "learning_rate": 0.00019746612379246777, + "loss": 2.7799, + "step": 1529 + }, + { + "epoch": 0.1234767169719958, + "grad_norm": 0.8872438669204712, + "learning_rate": 0.00019746259126833846, + "loss": 2.8413, + "step": 1530 + }, + { + "epoch": 0.12355742070857881, + "grad_norm": 0.8698925375938416, + "learning_rate": 0.0001974590563151866, + "loss": 2.8446, + "step": 1531 + }, + { + "epoch": 0.12363812444516181, + "grad_norm": 0.8926429152488708, + "learning_rate": 0.0001974555189331003, + "loss": 2.7859, + "step": 1532 + }, + { + "epoch": 0.12371882818174482, + "grad_norm": 0.8089048862457275, + "learning_rate": 0.00019745197912216775, + "loss": 2.7985, + "step": 1533 + }, + { + "epoch": 0.12379953191832782, + "grad_norm": 0.8180400729179382, + "learning_rate": 0.0001974484368824771, + "loss": 2.7587, + "step": 1534 + }, + { + "epoch": 0.12388023565491083, + "grad_norm": 0.9584212303161621, + "learning_rate": 0.00019744489221411668, + "loss": 2.766, + "step": 1535 + }, + { + "epoch": 0.12396093939149383, + "grad_norm": 0.8425920009613037, + "learning_rate": 0.00019744134511717485, + "loss": 2.8125, + "step": 1536 + }, + { + "epoch": 0.12404164312807683, + "grad_norm": 0.9109299182891846, + "learning_rate": 0.00019743779559173996, + "loss": 2.8613, + "step": 1537 + }, + { + "epoch": 0.12412234686465984, + "grad_norm": 0.8840214610099792, + "learning_rate": 0.0001974342436379005, + "loss": 2.7603, + "step": 1538 + }, + { + "epoch": 0.12420305060124284, + "grad_norm": 0.8128962516784668, + "learning_rate": 0.00019743068925574502, + "loss": 2.7593, + "step": 1539 + }, + { + "epoch": 0.12428375433782585, + "grad_norm": 0.8150052428245544, + "learning_rate": 0.00019742713244536204, + "loss": 2.8099, + "step": 1540 + }, + { + "epoch": 0.12436445807440885, + "grad_norm": 0.8442968130111694, + "learning_rate": 0.00019742357320684027, + "loss": 2.7746, + "step": 1541 + }, + { + "epoch": 0.12444516181099186, + "grad_norm": 0.9347402453422546, + "learning_rate": 0.00019742001154026838, + "loss": 2.8247, + "step": 1542 + }, + { + "epoch": 0.12452586554757485, + "grad_norm": 0.8305966854095459, + "learning_rate": 0.00019741644744573512, + "loss": 2.7398, + "step": 1543 + }, + { + "epoch": 0.12460656928415785, + "grad_norm": 0.8811129927635193, + "learning_rate": 0.00019741288092332935, + "loss": 2.8014, + "step": 1544 + }, + { + "epoch": 0.12468727302074085, + "grad_norm": 1.0287303924560547, + "learning_rate": 0.00019740931197313996, + "loss": 2.8449, + "step": 1545 + }, + { + "epoch": 0.12476797675732386, + "grad_norm": 0.8499771356582642, + "learning_rate": 0.00019740574059525588, + "loss": 2.7845, + "step": 1546 + }, + { + "epoch": 0.12484868049390686, + "grad_norm": 0.8110969066619873, + "learning_rate": 0.00019740216678976614, + "loss": 2.7565, + "step": 1547 + }, + { + "epoch": 0.12492938423048987, + "grad_norm": 0.8530771136283875, + "learning_rate": 0.00019739859055675977, + "loss": 2.8098, + "step": 1548 + }, + { + "epoch": 0.12501008796707289, + "grad_norm": 0.8483901619911194, + "learning_rate": 0.00019739501189632591, + "loss": 2.812, + "step": 1549 + }, + { + "epoch": 0.1250907917036559, + "grad_norm": 0.7894467711448669, + "learning_rate": 0.00019739143080855378, + "loss": 2.8576, + "step": 1550 + }, + { + "epoch": 0.1251714954402389, + "grad_norm": 0.8270247578620911, + "learning_rate": 0.0001973878472935326, + "loss": 2.7613, + "step": 1551 + }, + { + "epoch": 0.1252521991768219, + "grad_norm": 0.8496212959289551, + "learning_rate": 0.00019738426135135174, + "loss": 2.8375, + "step": 1552 + }, + { + "epoch": 0.1253329029134049, + "grad_norm": 0.8465524911880493, + "learning_rate": 0.00019738067298210045, + "loss": 2.8023, + "step": 1553 + }, + { + "epoch": 0.1254136066499879, + "grad_norm": 0.7843824028968811, + "learning_rate": 0.00019737708218586826, + "loss": 2.7424, + "step": 1554 + }, + { + "epoch": 0.1254943103865709, + "grad_norm": 0.8310040235519409, + "learning_rate": 0.00019737348896274462, + "loss": 2.7608, + "step": 1555 + }, + { + "epoch": 0.1255750141231539, + "grad_norm": 0.7895017266273499, + "learning_rate": 0.00019736989331281914, + "loss": 2.7549, + "step": 1556 + }, + { + "epoch": 0.1256557178597369, + "grad_norm": 0.8140431642532349, + "learning_rate": 0.00019736629523618138, + "loss": 2.802, + "step": 1557 + }, + { + "epoch": 0.1257364215963199, + "grad_norm": 0.8026889562606812, + "learning_rate": 0.000197362694732921, + "loss": 2.7758, + "step": 1558 + }, + { + "epoch": 0.1258171253329029, + "grad_norm": 0.8018048405647278, + "learning_rate": 0.0001973590918031278, + "loss": 2.7729, + "step": 1559 + }, + { + "epoch": 0.1258978290694859, + "grad_norm": 0.8394612073898315, + "learning_rate": 0.00019735548644689147, + "loss": 2.7692, + "step": 1560 + }, + { + "epoch": 0.1259785328060689, + "grad_norm": 0.819804310798645, + "learning_rate": 0.00019735187866430198, + "loss": 2.6933, + "step": 1561 + }, + { + "epoch": 0.12605923654265191, + "grad_norm": 0.8094257116317749, + "learning_rate": 0.0001973482684554492, + "loss": 2.7722, + "step": 1562 + }, + { + "epoch": 0.12613994027923492, + "grad_norm": 0.8647315502166748, + "learning_rate": 0.00019734465582042305, + "loss": 2.787, + "step": 1563 + }, + { + "epoch": 0.12622064401581792, + "grad_norm": 0.8439335823059082, + "learning_rate": 0.00019734104075931367, + "loss": 2.8, + "step": 1564 + }, + { + "epoch": 0.12630134775240093, + "grad_norm": 0.852480947971344, + "learning_rate": 0.00019733742327221105, + "loss": 2.8656, + "step": 1565 + }, + { + "epoch": 0.12638205148898393, + "grad_norm": 0.813846230506897, + "learning_rate": 0.00019733380335920542, + "loss": 2.7733, + "step": 1566 + }, + { + "epoch": 0.12646275522556694, + "grad_norm": 0.7860896587371826, + "learning_rate": 0.00019733018102038698, + "loss": 2.8201, + "step": 1567 + }, + { + "epoch": 0.12654345896214994, + "grad_norm": 0.7857748866081238, + "learning_rate": 0.00019732655625584602, + "loss": 2.8726, + "step": 1568 + }, + { + "epoch": 0.12662416269873294, + "grad_norm": 0.8152899146080017, + "learning_rate": 0.00019732292906567286, + "loss": 2.7738, + "step": 1569 + }, + { + "epoch": 0.12670486643531595, + "grad_norm": 0.8281696438789368, + "learning_rate": 0.00019731929944995788, + "loss": 2.7966, + "step": 1570 + }, + { + "epoch": 0.12678557017189895, + "grad_norm": 0.8070773482322693, + "learning_rate": 0.00019731566740879158, + "loss": 2.6988, + "step": 1571 + }, + { + "epoch": 0.12686627390848196, + "grad_norm": 0.7859680652618408, + "learning_rate": 0.00019731203294226445, + "loss": 2.7241, + "step": 1572 + }, + { + "epoch": 0.12694697764506496, + "grad_norm": 0.7753982543945312, + "learning_rate": 0.0001973083960504671, + "loss": 2.7621, + "step": 1573 + }, + { + "epoch": 0.12702768138164797, + "grad_norm": 0.8063471913337708, + "learning_rate": 0.00019730475673349014, + "loss": 2.7298, + "step": 1574 + }, + { + "epoch": 0.12710838511823097, + "grad_norm": 0.7943962812423706, + "learning_rate": 0.0001973011149914243, + "loss": 2.7714, + "step": 1575 + }, + { + "epoch": 0.12718908885481398, + "grad_norm": 0.8297483325004578, + "learning_rate": 0.00019729747082436033, + "loss": 2.7743, + "step": 1576 + }, + { + "epoch": 0.12726979259139698, + "grad_norm": 0.8728111386299133, + "learning_rate": 0.000197293824232389, + "loss": 2.8251, + "step": 1577 + }, + { + "epoch": 0.12735049632797998, + "grad_norm": 0.8762480020523071, + "learning_rate": 0.00019729017521560128, + "loss": 2.8036, + "step": 1578 + }, + { + "epoch": 0.127431200064563, + "grad_norm": 0.9266185164451599, + "learning_rate": 0.00019728652377408806, + "loss": 2.7335, + "step": 1579 + }, + { + "epoch": 0.127511903801146, + "grad_norm": 0.9289839267730713, + "learning_rate": 0.00019728286990794037, + "loss": 2.7715, + "step": 1580 + }, + { + "epoch": 0.127592607537729, + "grad_norm": 0.8811823725700378, + "learning_rate": 0.0001972792136172493, + "loss": 2.7389, + "step": 1581 + }, + { + "epoch": 0.127673311274312, + "grad_norm": 0.8174294233322144, + "learning_rate": 0.00019727555490210588, + "loss": 2.7483, + "step": 1582 + }, + { + "epoch": 0.127754015010895, + "grad_norm": 0.8254107236862183, + "learning_rate": 0.00019727189376260137, + "loss": 2.7897, + "step": 1583 + }, + { + "epoch": 0.127834718747478, + "grad_norm": 0.8478763699531555, + "learning_rate": 0.000197268230198827, + "loss": 2.7394, + "step": 1584 + }, + { + "epoch": 0.12791542248406101, + "grad_norm": 0.8356192111968994, + "learning_rate": 0.00019726456421087404, + "loss": 2.7518, + "step": 1585 + }, + { + "epoch": 0.12799612622064402, + "grad_norm": 0.8523107767105103, + "learning_rate": 0.00019726089579883392, + "loss": 2.7893, + "step": 1586 + }, + { + "epoch": 0.12807682995722702, + "grad_norm": 0.9048579931259155, + "learning_rate": 0.00019725722496279804, + "loss": 2.7488, + "step": 1587 + }, + { + "epoch": 0.12815753369381003, + "grad_norm": 0.8242251873016357, + "learning_rate": 0.00019725355170285787, + "loss": 2.7544, + "step": 1588 + }, + { + "epoch": 0.12823823743039303, + "grad_norm": 0.8343983888626099, + "learning_rate": 0.00019724987601910497, + "loss": 2.7317, + "step": 1589 + }, + { + "epoch": 0.12831894116697604, + "grad_norm": 0.8084509372711182, + "learning_rate": 0.00019724619791163095, + "loss": 2.7822, + "step": 1590 + }, + { + "epoch": 0.12839964490355904, + "grad_norm": 0.8397380113601685, + "learning_rate": 0.00019724251738052745, + "loss": 2.8188, + "step": 1591 + }, + { + "epoch": 0.12848034864014204, + "grad_norm": 0.8558558821678162, + "learning_rate": 0.00019723883442588624, + "loss": 2.7623, + "step": 1592 + }, + { + "epoch": 0.12856105237672505, + "grad_norm": 0.7602639198303223, + "learning_rate": 0.0001972351490477991, + "loss": 2.7932, + "step": 1593 + }, + { + "epoch": 0.12864175611330805, + "grad_norm": 0.8379851579666138, + "learning_rate": 0.00019723146124635786, + "loss": 2.8296, + "step": 1594 + }, + { + "epoch": 0.12872245984989106, + "grad_norm": 0.8454548716545105, + "learning_rate": 0.00019722777102165444, + "loss": 2.8192, + "step": 1595 + }, + { + "epoch": 0.12880316358647406, + "grad_norm": 0.8344082832336426, + "learning_rate": 0.0001972240783737808, + "loss": 2.7628, + "step": 1596 + }, + { + "epoch": 0.12888386732305707, + "grad_norm": 0.809093713760376, + "learning_rate": 0.000197220383302829, + "loss": 2.8055, + "step": 1597 + }, + { + "epoch": 0.12896457105964007, + "grad_norm": 0.7909694910049438, + "learning_rate": 0.0001972166858088911, + "loss": 2.7292, + "step": 1598 + }, + { + "epoch": 0.12904527479622308, + "grad_norm": 0.8350280523300171, + "learning_rate": 0.00019721298589205928, + "loss": 2.7671, + "step": 1599 + }, + { + "epoch": 0.12912597853280608, + "grad_norm": 0.7857616543769836, + "learning_rate": 0.00019720928355242568, + "loss": 2.729, + "step": 1600 + }, + { + "epoch": 0.12920668226938908, + "grad_norm": 0.7899746298789978, + "learning_rate": 0.0001972055787900827, + "loss": 2.8023, + "step": 1601 + }, + { + "epoch": 0.1292873860059721, + "grad_norm": 0.8604246377944946, + "learning_rate": 0.00019720187160512256, + "loss": 2.749, + "step": 1602 + }, + { + "epoch": 0.1293680897425551, + "grad_norm": 0.8517864942550659, + "learning_rate": 0.0001971981619976377, + "loss": 2.7203, + "step": 1603 + }, + { + "epoch": 0.1294487934791381, + "grad_norm": 0.8860471248626709, + "learning_rate": 0.00019719444996772056, + "loss": 2.7372, + "step": 1604 + }, + { + "epoch": 0.1295294972157211, + "grad_norm": 0.8355888724327087, + "learning_rate": 0.00019719073551546367, + "loss": 2.7284, + "step": 1605 + }, + { + "epoch": 0.1296102009523041, + "grad_norm": 0.7998479604721069, + "learning_rate": 0.00019718701864095955, + "loss": 2.7726, + "step": 1606 + }, + { + "epoch": 0.12969090468888708, + "grad_norm": 0.8564549088478088, + "learning_rate": 0.00019718329934430092, + "loss": 2.7334, + "step": 1607 + }, + { + "epoch": 0.1297716084254701, + "grad_norm": 0.8594443798065186, + "learning_rate": 0.00019717957762558044, + "loss": 2.7865, + "step": 1608 + }, + { + "epoch": 0.1298523121620531, + "grad_norm": 0.804553210735321, + "learning_rate": 0.00019717585348489082, + "loss": 2.8094, + "step": 1609 + }, + { + "epoch": 0.1299330158986361, + "grad_norm": 0.7892553806304932, + "learning_rate": 0.0001971721269223249, + "loss": 2.7969, + "step": 1610 + }, + { + "epoch": 0.1300137196352191, + "grad_norm": 0.8703331351280212, + "learning_rate": 0.0001971683979379756, + "loss": 2.8192, + "step": 1611 + }, + { + "epoch": 0.1300944233718021, + "grad_norm": 0.8176589012145996, + "learning_rate": 0.00019716466653193582, + "loss": 2.7902, + "step": 1612 + }, + { + "epoch": 0.1301751271083851, + "grad_norm": 0.8305137157440186, + "learning_rate": 0.00019716093270429855, + "loss": 2.8202, + "step": 1613 + }, + { + "epoch": 0.1302558308449681, + "grad_norm": 0.8261505365371704, + "learning_rate": 0.00019715719645515688, + "loss": 2.7905, + "step": 1614 + }, + { + "epoch": 0.13033653458155112, + "grad_norm": 0.9465535879135132, + "learning_rate": 0.00019715345778460389, + "loss": 2.7965, + "step": 1615 + }, + { + "epoch": 0.13041723831813412, + "grad_norm": 0.8847100138664246, + "learning_rate": 0.00019714971669273275, + "loss": 2.8177, + "step": 1616 + }, + { + "epoch": 0.13049794205471713, + "grad_norm": 0.9768328666687012, + "learning_rate": 0.0001971459731796367, + "loss": 2.7668, + "step": 1617 + }, + { + "epoch": 0.13057864579130013, + "grad_norm": 0.7498586177825928, + "learning_rate": 0.0001971422272454091, + "loss": 2.761, + "step": 1618 + }, + { + "epoch": 0.13065934952788313, + "grad_norm": 1.0455373525619507, + "learning_rate": 0.00019713847889014325, + "loss": 2.7652, + "step": 1619 + }, + { + "epoch": 0.13074005326446614, + "grad_norm": 0.8484631180763245, + "learning_rate": 0.00019713472811393258, + "loss": 2.7858, + "step": 1620 + }, + { + "epoch": 0.13082075700104914, + "grad_norm": 0.8190686702728271, + "learning_rate": 0.00019713097491687057, + "loss": 2.7217, + "step": 1621 + }, + { + "epoch": 0.13090146073763215, + "grad_norm": 0.8866000175476074, + "learning_rate": 0.00019712721929905077, + "loss": 2.7868, + "step": 1622 + }, + { + "epoch": 0.13098216447421515, + "grad_norm": 0.8026713132858276, + "learning_rate": 0.00019712346126056677, + "loss": 2.7276, + "step": 1623 + }, + { + "epoch": 0.13106286821079816, + "grad_norm": 0.8306462168693542, + "learning_rate": 0.00019711970080151225, + "loss": 2.7747, + "step": 1624 + }, + { + "epoch": 0.13114357194738116, + "grad_norm": 0.8276618123054504, + "learning_rate": 0.0001971159379219809, + "loss": 2.7146, + "step": 1625 + }, + { + "epoch": 0.13122427568396416, + "grad_norm": 0.9749011993408203, + "learning_rate": 0.00019711217262206648, + "loss": 2.8731, + "step": 1626 + }, + { + "epoch": 0.13130497942054717, + "grad_norm": 0.828484058380127, + "learning_rate": 0.00019710840490186292, + "loss": 2.803, + "step": 1627 + }, + { + "epoch": 0.13138568315713017, + "grad_norm": 0.8095957636833191, + "learning_rate": 0.00019710463476146402, + "loss": 2.7751, + "step": 1628 + }, + { + "epoch": 0.13146638689371318, + "grad_norm": 0.8731853365898132, + "learning_rate": 0.0001971008622009638, + "loss": 2.8274, + "step": 1629 + }, + { + "epoch": 0.13154709063029618, + "grad_norm": 0.8180200457572937, + "learning_rate": 0.00019709708722045628, + "loss": 2.813, + "step": 1630 + }, + { + "epoch": 0.13162779436687919, + "grad_norm": 0.7740067839622498, + "learning_rate": 0.00019709330982003553, + "loss": 2.7319, + "step": 1631 + }, + { + "epoch": 0.1317084981034622, + "grad_norm": 0.8439326882362366, + "learning_rate": 0.0001970895299997957, + "loss": 2.8182, + "step": 1632 + }, + { + "epoch": 0.1317892018400452, + "grad_norm": 0.8254802823066711, + "learning_rate": 0.000197085747759831, + "loss": 2.7874, + "step": 1633 + }, + { + "epoch": 0.1318699055766282, + "grad_norm": 0.8128175139427185, + "learning_rate": 0.00019708196310023562, + "loss": 2.8125, + "step": 1634 + }, + { + "epoch": 0.1319506093132112, + "grad_norm": 0.8664820790290833, + "learning_rate": 0.00019707817602110402, + "loss": 2.8446, + "step": 1635 + }, + { + "epoch": 0.1320313130497942, + "grad_norm": 0.8101332783699036, + "learning_rate": 0.00019707438652253044, + "loss": 2.8027, + "step": 1636 + }, + { + "epoch": 0.1321120167863772, + "grad_norm": 0.8296725153923035, + "learning_rate": 0.00019707059460460945, + "loss": 2.7677, + "step": 1637 + }, + { + "epoch": 0.13219272052296022, + "grad_norm": 0.7321150898933411, + "learning_rate": 0.0001970668002674355, + "loss": 2.6991, + "step": 1638 + }, + { + "epoch": 0.13227342425954322, + "grad_norm": 0.8321375250816345, + "learning_rate": 0.0001970630035111031, + "loss": 2.6948, + "step": 1639 + }, + { + "epoch": 0.13235412799612623, + "grad_norm": 0.7622714042663574, + "learning_rate": 0.00019705920433570694, + "loss": 2.6957, + "step": 1640 + }, + { + "epoch": 0.13243483173270923, + "grad_norm": 0.8413416147232056, + "learning_rate": 0.00019705540274134173, + "loss": 2.7277, + "step": 1641 + }, + { + "epoch": 0.13251553546929223, + "grad_norm": 0.8798941373825073, + "learning_rate": 0.00019705159872810218, + "loss": 2.7699, + "step": 1642 + }, + { + "epoch": 0.13259623920587524, + "grad_norm": 0.788287341594696, + "learning_rate": 0.00019704779229608304, + "loss": 2.7933, + "step": 1643 + }, + { + "epoch": 0.13267694294245824, + "grad_norm": 0.8547430634498596, + "learning_rate": 0.00019704398344537927, + "loss": 2.7706, + "step": 1644 + }, + { + "epoch": 0.13275764667904125, + "grad_norm": 0.8474008440971375, + "learning_rate": 0.00019704017217608575, + "loss": 2.8005, + "step": 1645 + }, + { + "epoch": 0.13283835041562425, + "grad_norm": 0.8636945486068726, + "learning_rate": 0.00019703635848829747, + "loss": 2.8241, + "step": 1646 + }, + { + "epoch": 0.13291905415220726, + "grad_norm": 0.8158168792724609, + "learning_rate": 0.00019703254238210947, + "loss": 2.7576, + "step": 1647 + }, + { + "epoch": 0.13299975788879026, + "grad_norm": 0.8420887589454651, + "learning_rate": 0.0001970287238576169, + "loss": 2.7677, + "step": 1648 + }, + { + "epoch": 0.13308046162537326, + "grad_norm": 0.7910059690475464, + "learning_rate": 0.00019702490291491486, + "loss": 2.7807, + "step": 1649 + }, + { + "epoch": 0.13316116536195627, + "grad_norm": 0.8308143615722656, + "learning_rate": 0.00019702107955409863, + "loss": 2.7698, + "step": 1650 + }, + { + "epoch": 0.13324186909853927, + "grad_norm": 0.8215764760971069, + "learning_rate": 0.00019701725377526349, + "loss": 2.8263, + "step": 1651 + }, + { + "epoch": 0.13332257283512228, + "grad_norm": 0.8780504465103149, + "learning_rate": 0.00019701342557850476, + "loss": 2.8032, + "step": 1652 + }, + { + "epoch": 0.13340327657170528, + "grad_norm": 0.8125136494636536, + "learning_rate": 0.0001970095949639179, + "loss": 2.8317, + "step": 1653 + }, + { + "epoch": 0.13348398030828829, + "grad_norm": 0.8170902132987976, + "learning_rate": 0.00019700576193159831, + "loss": 2.7528, + "step": 1654 + }, + { + "epoch": 0.1335646840448713, + "grad_norm": 0.8318637013435364, + "learning_rate": 0.00019700192648164157, + "loss": 2.7963, + "step": 1655 + }, + { + "epoch": 0.1336453877814543, + "grad_norm": 0.8445270657539368, + "learning_rate": 0.00019699808861414327, + "loss": 2.772, + "step": 1656 + }, + { + "epoch": 0.1337260915180373, + "grad_norm": 0.7908959984779358, + "learning_rate": 0.00019699424832919906, + "loss": 2.7528, + "step": 1657 + }, + { + "epoch": 0.13380679525462028, + "grad_norm": 0.8153900504112244, + "learning_rate": 0.00019699040562690462, + "loss": 2.7643, + "step": 1658 + }, + { + "epoch": 0.13388749899120328, + "grad_norm": 0.86302250623703, + "learning_rate": 0.0001969865605073557, + "loss": 2.8037, + "step": 1659 + }, + { + "epoch": 0.13396820272778628, + "grad_norm": 0.8373419046401978, + "learning_rate": 0.0001969827129706482, + "loss": 2.7647, + "step": 1660 + }, + { + "epoch": 0.1340489064643693, + "grad_norm": 0.8166481852531433, + "learning_rate": 0.00019697886301687798, + "loss": 2.8333, + "step": 1661 + }, + { + "epoch": 0.1341296102009523, + "grad_norm": 0.7807812094688416, + "learning_rate": 0.00019697501064614098, + "loss": 2.7495, + "step": 1662 + }, + { + "epoch": 0.1342103139375353, + "grad_norm": 0.8375338315963745, + "learning_rate": 0.00019697115585853324, + "loss": 2.7518, + "step": 1663 + }, + { + "epoch": 0.1342910176741183, + "grad_norm": 0.7392182350158691, + "learning_rate": 0.00019696729865415077, + "loss": 2.758, + "step": 1664 + }, + { + "epoch": 0.1343717214107013, + "grad_norm": 0.8041971921920776, + "learning_rate": 0.00019696343903308978, + "loss": 2.7485, + "step": 1665 + }, + { + "epoch": 0.1344524251472843, + "grad_norm": 0.789310097694397, + "learning_rate": 0.00019695957699544643, + "loss": 2.8179, + "step": 1666 + }, + { + "epoch": 0.13453312888386731, + "grad_norm": 0.7643609642982483, + "learning_rate": 0.00019695571254131693, + "loss": 2.7791, + "step": 1667 + }, + { + "epoch": 0.13461383262045032, + "grad_norm": 0.8284661769866943, + "learning_rate": 0.00019695184567079766, + "loss": 2.717, + "step": 1668 + }, + { + "epoch": 0.13469453635703332, + "grad_norm": 0.7620903253555298, + "learning_rate": 0.00019694797638398494, + "loss": 2.7808, + "step": 1669 + }, + { + "epoch": 0.13477524009361633, + "grad_norm": 0.9123913645744324, + "learning_rate": 0.00019694410468097524, + "loss": 2.7648, + "step": 1670 + }, + { + "epoch": 0.13485594383019933, + "grad_norm": 0.735518217086792, + "learning_rate": 0.000196940230561865, + "loss": 2.7653, + "step": 1671 + }, + { + "epoch": 0.13493664756678234, + "grad_norm": 0.8363413214683533, + "learning_rate": 0.00019693635402675085, + "loss": 2.766, + "step": 1672 + }, + { + "epoch": 0.13501735130336534, + "grad_norm": 0.8206491470336914, + "learning_rate": 0.00019693247507572936, + "loss": 2.7829, + "step": 1673 + }, + { + "epoch": 0.13509805503994834, + "grad_norm": 0.7726099491119385, + "learning_rate": 0.0001969285937088972, + "loss": 2.7381, + "step": 1674 + }, + { + "epoch": 0.13517875877653135, + "grad_norm": 0.8970316052436829, + "learning_rate": 0.0001969247099263511, + "loss": 2.7836, + "step": 1675 + }, + { + "epoch": 0.13525946251311435, + "grad_norm": 0.7966172099113464, + "learning_rate": 0.00019692082372818788, + "loss": 2.7135, + "step": 1676 + }, + { + "epoch": 0.13534016624969736, + "grad_norm": 0.8583024740219116, + "learning_rate": 0.00019691693511450438, + "loss": 2.7908, + "step": 1677 + }, + { + "epoch": 0.13542086998628036, + "grad_norm": 0.9430457353591919, + "learning_rate": 0.0001969130440853975, + "loss": 2.7311, + "step": 1678 + }, + { + "epoch": 0.13550157372286337, + "grad_norm": 0.8066009879112244, + "learning_rate": 0.00019690915064096424, + "loss": 2.7039, + "step": 1679 + }, + { + "epoch": 0.13558227745944637, + "grad_norm": 1.0169655084609985, + "learning_rate": 0.0001969052547813016, + "loss": 2.7832, + "step": 1680 + }, + { + "epoch": 0.13566298119602938, + "grad_norm": 0.8606080412864685, + "learning_rate": 0.00019690135650650672, + "loss": 2.751, + "step": 1681 + }, + { + "epoch": 0.13574368493261238, + "grad_norm": 0.8625333905220032, + "learning_rate": 0.00019689745581667674, + "loss": 2.761, + "step": 1682 + }, + { + "epoch": 0.13582438866919538, + "grad_norm": 0.9304285645484924, + "learning_rate": 0.00019689355271190886, + "loss": 2.7566, + "step": 1683 + }, + { + "epoch": 0.1359050924057784, + "grad_norm": 0.793397068977356, + "learning_rate": 0.00019688964719230035, + "loss": 2.7648, + "step": 1684 + }, + { + "epoch": 0.1359857961423614, + "grad_norm": 0.8496749401092529, + "learning_rate": 0.00019688573925794858, + "loss": 2.7461, + "step": 1685 + }, + { + "epoch": 0.1360664998789444, + "grad_norm": 0.7807914018630981, + "learning_rate": 0.0001968818289089509, + "loss": 2.8266, + "step": 1686 + }, + { + "epoch": 0.1361472036155274, + "grad_norm": 0.8186607956886292, + "learning_rate": 0.0001968779161454048, + "loss": 2.8447, + "step": 1687 + }, + { + "epoch": 0.1362279073521104, + "grad_norm": 0.8007118701934814, + "learning_rate": 0.0001968740009674078, + "loss": 2.7888, + "step": 1688 + }, + { + "epoch": 0.1363086110886934, + "grad_norm": 0.8735570311546326, + "learning_rate": 0.00019687008337505749, + "loss": 2.7152, + "step": 1689 + }, + { + "epoch": 0.13638931482527641, + "grad_norm": 0.8546476364135742, + "learning_rate": 0.00019686616336845144, + "loss": 2.8113, + "step": 1690 + }, + { + "epoch": 0.13647001856185942, + "grad_norm": 0.9156736135482788, + "learning_rate": 0.0001968622409476874, + "loss": 2.7561, + "step": 1691 + }, + { + "epoch": 0.13655072229844242, + "grad_norm": 0.8091925382614136, + "learning_rate": 0.0001968583161128631, + "loss": 2.7384, + "step": 1692 + }, + { + "epoch": 0.13663142603502543, + "grad_norm": 0.7871039509773254, + "learning_rate": 0.0001968543888640764, + "loss": 2.7138, + "step": 1693 + }, + { + "epoch": 0.13671212977160843, + "grad_norm": 0.9537062048912048, + "learning_rate": 0.00019685045920142516, + "loss": 2.7726, + "step": 1694 + }, + { + "epoch": 0.13679283350819144, + "grad_norm": 0.8663280010223389, + "learning_rate": 0.00019684652712500728, + "loss": 2.7509, + "step": 1695 + }, + { + "epoch": 0.13687353724477444, + "grad_norm": 0.8717214465141296, + "learning_rate": 0.0001968425926349208, + "loss": 2.791, + "step": 1696 + }, + { + "epoch": 0.13695424098135744, + "grad_norm": 0.8942584991455078, + "learning_rate": 0.00019683865573126374, + "loss": 2.77, + "step": 1697 + }, + { + "epoch": 0.13703494471794045, + "grad_norm": 0.8243421316146851, + "learning_rate": 0.00019683471641413424, + "loss": 2.8063, + "step": 1698 + }, + { + "epoch": 0.13711564845452345, + "grad_norm": 0.8618699908256531, + "learning_rate": 0.0001968307746836305, + "loss": 2.6872, + "step": 1699 + }, + { + "epoch": 0.13719635219110646, + "grad_norm": 0.7931695580482483, + "learning_rate": 0.00019682683053985072, + "loss": 2.7495, + "step": 1700 + }, + { + "epoch": 0.13727705592768946, + "grad_norm": 0.7549482583999634, + "learning_rate": 0.00019682288398289324, + "loss": 2.7543, + "step": 1701 + }, + { + "epoch": 0.13735775966427247, + "grad_norm": 0.7953789234161377, + "learning_rate": 0.00019681893501285636, + "loss": 2.6895, + "step": 1702 + }, + { + "epoch": 0.13743846340085547, + "grad_norm": 0.7916574478149414, + "learning_rate": 0.00019681498362983857, + "loss": 2.819, + "step": 1703 + }, + { + "epoch": 0.13751916713743847, + "grad_norm": 0.7986735105514526, + "learning_rate": 0.0001968110298339383, + "loss": 2.8062, + "step": 1704 + }, + { + "epoch": 0.13759987087402148, + "grad_norm": 0.8601658940315247, + "learning_rate": 0.00019680707362525407, + "loss": 2.7625, + "step": 1705 + }, + { + "epoch": 0.13768057461060448, + "grad_norm": 0.8888362050056458, + "learning_rate": 0.00019680311500388454, + "loss": 2.7747, + "step": 1706 + }, + { + "epoch": 0.1377612783471875, + "grad_norm": 0.7762896418571472, + "learning_rate": 0.00019679915396992833, + "loss": 2.7959, + "step": 1707 + }, + { + "epoch": 0.1378419820837705, + "grad_norm": 0.8942253589630127, + "learning_rate": 0.00019679519052348416, + "loss": 2.7717, + "step": 1708 + }, + { + "epoch": 0.13792268582035347, + "grad_norm": 0.8388909697532654, + "learning_rate": 0.00019679122466465082, + "loss": 2.7448, + "step": 1709 + }, + { + "epoch": 0.13800338955693647, + "grad_norm": 0.8826024532318115, + "learning_rate": 0.00019678725639352712, + "loss": 2.7307, + "step": 1710 + }, + { + "epoch": 0.13808409329351948, + "grad_norm": 0.8972313404083252, + "learning_rate": 0.00019678328571021204, + "loss": 2.7619, + "step": 1711 + }, + { + "epoch": 0.13816479703010248, + "grad_norm": 0.9373044371604919, + "learning_rate": 0.00019677931261480444, + "loss": 2.7664, + "step": 1712 + }, + { + "epoch": 0.1382455007666855, + "grad_norm": 0.8060994148254395, + "learning_rate": 0.00019677533710740343, + "loss": 2.7707, + "step": 1713 + }, + { + "epoch": 0.1383262045032685, + "grad_norm": 0.8324100971221924, + "learning_rate": 0.000196771359188108, + "loss": 2.8249, + "step": 1714 + }, + { + "epoch": 0.1384069082398515, + "grad_norm": 0.879176676273346, + "learning_rate": 0.00019676737885701738, + "loss": 2.7767, + "step": 1715 + }, + { + "epoch": 0.1384876119764345, + "grad_norm": 0.8823966979980469, + "learning_rate": 0.0001967633961142307, + "loss": 2.791, + "step": 1716 + }, + { + "epoch": 0.1385683157130175, + "grad_norm": 0.8176039457321167, + "learning_rate": 0.00019675941095984728, + "loss": 2.8225, + "step": 1717 + }, + { + "epoch": 0.1386490194496005, + "grad_norm": 0.8005076050758362, + "learning_rate": 0.00019675542339396635, + "loss": 2.8175, + "step": 1718 + }, + { + "epoch": 0.1387297231861835, + "grad_norm": 0.800854504108429, + "learning_rate": 0.0001967514334166874, + "loss": 2.8226, + "step": 1719 + }, + { + "epoch": 0.13881042692276652, + "grad_norm": 0.7941261529922485, + "learning_rate": 0.00019674744102810978, + "loss": 2.7488, + "step": 1720 + }, + { + "epoch": 0.13889113065934952, + "grad_norm": 0.7955947518348694, + "learning_rate": 0.00019674344622833302, + "loss": 2.7749, + "step": 1721 + }, + { + "epoch": 0.13897183439593253, + "grad_norm": 0.8353856205940247, + "learning_rate": 0.00019673944901745674, + "loss": 2.7982, + "step": 1722 + }, + { + "epoch": 0.13905253813251553, + "grad_norm": 0.8711503744125366, + "learning_rate": 0.00019673544939558047, + "loss": 2.8007, + "step": 1723 + }, + { + "epoch": 0.13913324186909853, + "grad_norm": 0.8525274991989136, + "learning_rate": 0.00019673144736280396, + "loss": 2.7423, + "step": 1724 + }, + { + "epoch": 0.13921394560568154, + "grad_norm": 0.8143991231918335, + "learning_rate": 0.0001967274429192269, + "loss": 2.7752, + "step": 1725 + }, + { + "epoch": 0.13929464934226454, + "grad_norm": 0.8508228063583374, + "learning_rate": 0.00019672343606494912, + "loss": 2.7422, + "step": 1726 + }, + { + "epoch": 0.13937535307884755, + "grad_norm": 0.8320932984352112, + "learning_rate": 0.0001967194268000705, + "loss": 2.7598, + "step": 1727 + }, + { + "epoch": 0.13945605681543055, + "grad_norm": 0.8233908414840698, + "learning_rate": 0.00019671541512469092, + "loss": 2.7834, + "step": 1728 + }, + { + "epoch": 0.13953676055201356, + "grad_norm": 0.8097162246704102, + "learning_rate": 0.00019671140103891038, + "loss": 2.7856, + "step": 1729 + }, + { + "epoch": 0.13961746428859656, + "grad_norm": 0.9043141007423401, + "learning_rate": 0.0001967073845428289, + "loss": 2.8047, + "step": 1730 + }, + { + "epoch": 0.13969816802517956, + "grad_norm": 0.9118517637252808, + "learning_rate": 0.00019670336563654662, + "loss": 2.789, + "step": 1731 + }, + { + "epoch": 0.13977887176176257, + "grad_norm": 0.8016074895858765, + "learning_rate": 0.00019669934432016368, + "loss": 2.7506, + "step": 1732 + }, + { + "epoch": 0.13985957549834557, + "grad_norm": 0.8376848697662354, + "learning_rate": 0.0001966953205937803, + "loss": 2.7832, + "step": 1733 + }, + { + "epoch": 0.13994027923492858, + "grad_norm": 0.8511834144592285, + "learning_rate": 0.0001966912944574968, + "loss": 2.7564, + "step": 1734 + }, + { + "epoch": 0.14002098297151158, + "grad_norm": 0.7796351909637451, + "learning_rate": 0.00019668726591141344, + "loss": 2.7489, + "step": 1735 + }, + { + "epoch": 0.14010168670809459, + "grad_norm": 0.8204767107963562, + "learning_rate": 0.00019668323495563068, + "loss": 2.7634, + "step": 1736 + }, + { + "epoch": 0.1401823904446776, + "grad_norm": 0.9049975872039795, + "learning_rate": 0.000196679201590249, + "loss": 2.7863, + "step": 1737 + }, + { + "epoch": 0.1402630941812606, + "grad_norm": 0.7473673224449158, + "learning_rate": 0.0001966751658153689, + "loss": 2.7557, + "step": 1738 + }, + { + "epoch": 0.1403437979178436, + "grad_norm": 0.7765525579452515, + "learning_rate": 0.0001966711276310909, + "loss": 2.7865, + "step": 1739 + }, + { + "epoch": 0.1404245016544266, + "grad_norm": 0.8766517043113708, + "learning_rate": 0.00019666708703751576, + "loss": 2.7873, + "step": 1740 + }, + { + "epoch": 0.1405052053910096, + "grad_norm": 0.8351505994796753, + "learning_rate": 0.00019666304403474408, + "loss": 2.7355, + "step": 1741 + }, + { + "epoch": 0.1405859091275926, + "grad_norm": 0.7612324953079224, + "learning_rate": 0.00019665899862287667, + "loss": 2.7608, + "step": 1742 + }, + { + "epoch": 0.14066661286417562, + "grad_norm": 0.894249439239502, + "learning_rate": 0.00019665495080201434, + "loss": 2.7469, + "step": 1743 + }, + { + "epoch": 0.14074731660075862, + "grad_norm": 0.8528907895088196, + "learning_rate": 0.00019665090057225803, + "loss": 2.773, + "step": 1744 + }, + { + "epoch": 0.14082802033734163, + "grad_norm": 0.7718498706817627, + "learning_rate": 0.00019664684793370855, + "loss": 2.8045, + "step": 1745 + }, + { + "epoch": 0.14090872407392463, + "grad_norm": 0.8013718128204346, + "learning_rate": 0.00019664279288646706, + "loss": 2.7665, + "step": 1746 + }, + { + "epoch": 0.14098942781050763, + "grad_norm": 0.828803539276123, + "learning_rate": 0.00019663873543063448, + "loss": 2.7846, + "step": 1747 + }, + { + "epoch": 0.14107013154709064, + "grad_norm": 0.8349393606185913, + "learning_rate": 0.00019663467556631204, + "loss": 2.7405, + "step": 1748 + }, + { + "epoch": 0.14115083528367364, + "grad_norm": 0.8273345232009888, + "learning_rate": 0.00019663061329360085, + "loss": 2.7578, + "step": 1749 + }, + { + "epoch": 0.14123153902025665, + "grad_norm": 0.7989444136619568, + "learning_rate": 0.0001966265486126022, + "loss": 2.739, + "step": 1750 + }, + { + "epoch": 0.14131224275683965, + "grad_norm": 0.8690519332885742, + "learning_rate": 0.00019662248152341736, + "loss": 2.7566, + "step": 1751 + }, + { + "epoch": 0.14139294649342266, + "grad_norm": 0.8453623056411743, + "learning_rate": 0.0001966184120261477, + "loss": 2.8572, + "step": 1752 + }, + { + "epoch": 0.14147365023000566, + "grad_norm": 0.8396254777908325, + "learning_rate": 0.00019661434012089468, + "loss": 2.786, + "step": 1753 + }, + { + "epoch": 0.14155435396658866, + "grad_norm": 0.7643738389015198, + "learning_rate": 0.00019661026580775973, + "loss": 2.8193, + "step": 1754 + }, + { + "epoch": 0.14163505770317167, + "grad_norm": 0.8124154806137085, + "learning_rate": 0.00019660618908684443, + "loss": 2.7754, + "step": 1755 + }, + { + "epoch": 0.14171576143975467, + "grad_norm": 0.8620683550834656, + "learning_rate": 0.00019660210995825036, + "loss": 2.7827, + "step": 1756 + }, + { + "epoch": 0.14179646517633768, + "grad_norm": 0.8241196274757385, + "learning_rate": 0.0001965980284220792, + "loss": 2.7573, + "step": 1757 + }, + { + "epoch": 0.14187716891292068, + "grad_norm": 0.8264089822769165, + "learning_rate": 0.00019659394447843262, + "loss": 2.8214, + "step": 1758 + }, + { + "epoch": 0.14195787264950369, + "grad_norm": 0.9129722118377686, + "learning_rate": 0.00019658985812741247, + "loss": 2.7962, + "step": 1759 + }, + { + "epoch": 0.14203857638608666, + "grad_norm": 0.7976365089416504, + "learning_rate": 0.00019658576936912057, + "loss": 2.7534, + "step": 1760 + }, + { + "epoch": 0.14211928012266967, + "grad_norm": 0.7587228417396545, + "learning_rate": 0.00019658167820365882, + "loss": 2.7083, + "step": 1761 + }, + { + "epoch": 0.14219998385925267, + "grad_norm": 0.757882833480835, + "learning_rate": 0.00019657758463112918, + "loss": 2.7135, + "step": 1762 + }, + { + "epoch": 0.14228068759583568, + "grad_norm": 0.8541501760482788, + "learning_rate": 0.00019657348865163369, + "loss": 2.7833, + "step": 1763 + }, + { + "epoch": 0.14236139133241868, + "grad_norm": 0.7708966135978699, + "learning_rate": 0.00019656939026527442, + "loss": 2.7128, + "step": 1764 + }, + { + "epoch": 0.14244209506900168, + "grad_norm": 0.8733000159263611, + "learning_rate": 0.00019656528947215347, + "loss": 2.7597, + "step": 1765 + }, + { + "epoch": 0.1425227988055847, + "grad_norm": 0.7913360595703125, + "learning_rate": 0.0001965611862723731, + "loss": 2.7681, + "step": 1766 + }, + { + "epoch": 0.1426035025421677, + "grad_norm": 0.8692380785942078, + "learning_rate": 0.00019655708066603555, + "loss": 2.7587, + "step": 1767 + }, + { + "epoch": 0.1426842062787507, + "grad_norm": 0.8231006860733032, + "learning_rate": 0.00019655297265324317, + "loss": 2.772, + "step": 1768 + }, + { + "epoch": 0.1427649100153337, + "grad_norm": 0.7373722791671753, + "learning_rate": 0.0001965488622340983, + "loss": 2.7875, + "step": 1769 + }, + { + "epoch": 0.1428456137519167, + "grad_norm": 0.8614751696586609, + "learning_rate": 0.0001965447494087034, + "loss": 2.7962, + "step": 1770 + }, + { + "epoch": 0.1429263174884997, + "grad_norm": 0.8336494565010071, + "learning_rate": 0.000196540634177161, + "loss": 2.7072, + "step": 1771 + }, + { + "epoch": 0.14300702122508271, + "grad_norm": 0.844292163848877, + "learning_rate": 0.00019653651653957362, + "loss": 2.8043, + "step": 1772 + }, + { + "epoch": 0.14308772496166572, + "grad_norm": 0.7366824150085449, + "learning_rate": 0.0001965323964960439, + "loss": 2.7296, + "step": 1773 + }, + { + "epoch": 0.14316842869824872, + "grad_norm": 0.75767982006073, + "learning_rate": 0.0001965282740466745, + "loss": 2.7946, + "step": 1774 + }, + { + "epoch": 0.14324913243483173, + "grad_norm": 0.8361382484436035, + "learning_rate": 0.00019652414919156823, + "loss": 2.7232, + "step": 1775 + }, + { + "epoch": 0.14332983617141473, + "grad_norm": 0.8473719358444214, + "learning_rate": 0.0001965200219308278, + "loss": 2.774, + "step": 1776 + }, + { + "epoch": 0.14341053990799774, + "grad_norm": 0.7446423172950745, + "learning_rate": 0.00019651589226455613, + "loss": 2.7439, + "step": 1777 + }, + { + "epoch": 0.14349124364458074, + "grad_norm": 0.8332851529121399, + "learning_rate": 0.00019651176019285616, + "loss": 2.7891, + "step": 1778 + }, + { + "epoch": 0.14357194738116374, + "grad_norm": 0.885313868522644, + "learning_rate": 0.0001965076257158308, + "loss": 2.7677, + "step": 1779 + }, + { + "epoch": 0.14365265111774675, + "grad_norm": 0.8506965637207031, + "learning_rate": 0.00019650348883358315, + "loss": 2.8112, + "step": 1780 + }, + { + "epoch": 0.14373335485432975, + "grad_norm": 0.8415799736976624, + "learning_rate": 0.0001964993495462163, + "loss": 2.8242, + "step": 1781 + }, + { + "epoch": 0.14381405859091276, + "grad_norm": 0.8501513004302979, + "learning_rate": 0.00019649520785383338, + "loss": 2.8352, + "step": 1782 + }, + { + "epoch": 0.14389476232749576, + "grad_norm": 0.7839778065681458, + "learning_rate": 0.00019649106375653767, + "loss": 2.7194, + "step": 1783 + }, + { + "epoch": 0.14397546606407877, + "grad_norm": 0.8013346195220947, + "learning_rate": 0.00019648691725443243, + "loss": 2.7665, + "step": 1784 + }, + { + "epoch": 0.14405616980066177, + "grad_norm": 1.0338317155838013, + "learning_rate": 0.00019648276834762095, + "loss": 2.8599, + "step": 1785 + }, + { + "epoch": 0.14413687353724478, + "grad_norm": 0.898417592048645, + "learning_rate": 0.0001964786170362067, + "loss": 2.7192, + "step": 1786 + }, + { + "epoch": 0.14421757727382778, + "grad_norm": 0.8876320123672485, + "learning_rate": 0.00019647446332029313, + "loss": 2.7722, + "step": 1787 + }, + { + "epoch": 0.14429828101041078, + "grad_norm": 0.819461464881897, + "learning_rate": 0.00019647030719998373, + "loss": 2.7698, + "step": 1788 + }, + { + "epoch": 0.1443789847469938, + "grad_norm": 0.848380446434021, + "learning_rate": 0.0001964661486753821, + "loss": 2.7894, + "step": 1789 + }, + { + "epoch": 0.1444596884835768, + "grad_norm": 0.8343753814697266, + "learning_rate": 0.0001964619877465919, + "loss": 2.699, + "step": 1790 + }, + { + "epoch": 0.1445403922201598, + "grad_norm": 0.8718340396881104, + "learning_rate": 0.0001964578244137168, + "loss": 2.7313, + "step": 1791 + }, + { + "epoch": 0.1446210959567428, + "grad_norm": 0.866122841835022, + "learning_rate": 0.00019645365867686056, + "loss": 2.7112, + "step": 1792 + }, + { + "epoch": 0.1447017996933258, + "grad_norm": 0.8351789712905884, + "learning_rate": 0.000196449490536127, + "loss": 2.7765, + "step": 1793 + }, + { + "epoch": 0.1447825034299088, + "grad_norm": 0.8628408312797546, + "learning_rate": 0.00019644531999162004, + "loss": 2.7375, + "step": 1794 + }, + { + "epoch": 0.14486320716649181, + "grad_norm": 0.8414484858512878, + "learning_rate": 0.00019644114704344358, + "loss": 2.7502, + "step": 1795 + }, + { + "epoch": 0.14494391090307482, + "grad_norm": 0.9092586636543274, + "learning_rate": 0.00019643697169170166, + "loss": 2.7714, + "step": 1796 + }, + { + "epoch": 0.14502461463965782, + "grad_norm": 0.8458060622215271, + "learning_rate": 0.0001964327939364983, + "loss": 2.8376, + "step": 1797 + }, + { + "epoch": 0.14510531837624083, + "grad_norm": 0.8150759935379028, + "learning_rate": 0.00019642861377793764, + "loss": 2.7147, + "step": 1798 + }, + { + "epoch": 0.14518602211282383, + "grad_norm": 0.9008790850639343, + "learning_rate": 0.00019642443121612387, + "loss": 2.7786, + "step": 1799 + }, + { + "epoch": 0.14526672584940684, + "grad_norm": 0.848671555519104, + "learning_rate": 0.00019642024625116117, + "loss": 2.7813, + "step": 1800 + }, + { + "epoch": 0.14534742958598984, + "grad_norm": 0.8035007119178772, + "learning_rate": 0.00019641605888315393, + "loss": 2.7988, + "step": 1801 + }, + { + "epoch": 0.14542813332257284, + "grad_norm": 0.8210242390632629, + "learning_rate": 0.00019641186911220645, + "loss": 2.8451, + "step": 1802 + }, + { + "epoch": 0.14550883705915585, + "grad_norm": 0.8852066397666931, + "learning_rate": 0.00019640767693842318, + "loss": 2.7492, + "step": 1803 + }, + { + "epoch": 0.14558954079573885, + "grad_norm": 0.8421196937561035, + "learning_rate": 0.0001964034823619086, + "loss": 2.759, + "step": 1804 + }, + { + "epoch": 0.14567024453232186, + "grad_norm": 0.8166298866271973, + "learning_rate": 0.00019639928538276724, + "loss": 2.7942, + "step": 1805 + }, + { + "epoch": 0.14575094826890486, + "grad_norm": 0.8502809405326843, + "learning_rate": 0.00019639508600110368, + "loss": 2.7829, + "step": 1806 + }, + { + "epoch": 0.14583165200548787, + "grad_norm": 0.8371078372001648, + "learning_rate": 0.0001963908842170226, + "loss": 2.7168, + "step": 1807 + }, + { + "epoch": 0.14591235574207087, + "grad_norm": 0.8148230910301208, + "learning_rate": 0.0001963866800306287, + "loss": 2.7706, + "step": 1808 + }, + { + "epoch": 0.14599305947865387, + "grad_norm": 0.8984564542770386, + "learning_rate": 0.0001963824734420268, + "loss": 2.7761, + "step": 1809 + }, + { + "epoch": 0.14607376321523688, + "grad_norm": 0.9357183575630188, + "learning_rate": 0.00019637826445132172, + "loss": 2.7738, + "step": 1810 + }, + { + "epoch": 0.14615446695181986, + "grad_norm": 0.8545449376106262, + "learning_rate": 0.00019637405305861834, + "loss": 2.772, + "step": 1811 + }, + { + "epoch": 0.14623517068840286, + "grad_norm": 1.1674948930740356, + "learning_rate": 0.00019636983926402165, + "loss": 2.8988, + "step": 1812 + }, + { + "epoch": 0.14631587442498586, + "grad_norm": 0.7875451445579529, + "learning_rate": 0.00019636562306763665, + "loss": 2.7053, + "step": 1813 + }, + { + "epoch": 0.14639657816156887, + "grad_norm": 0.8980962038040161, + "learning_rate": 0.0001963614044695684, + "loss": 2.7731, + "step": 1814 + }, + { + "epoch": 0.14647728189815187, + "grad_norm": 0.8403381705284119, + "learning_rate": 0.00019635718346992207, + "loss": 2.8555, + "step": 1815 + }, + { + "epoch": 0.14655798563473488, + "grad_norm": 0.8736433982849121, + "learning_rate": 0.00019635296006880284, + "loss": 2.7918, + "step": 1816 + }, + { + "epoch": 0.14663868937131788, + "grad_norm": 0.8604151606559753, + "learning_rate": 0.000196348734266316, + "loss": 2.7493, + "step": 1817 + }, + { + "epoch": 0.1467193931079009, + "grad_norm": 0.8329424262046814, + "learning_rate": 0.00019634450606256681, + "loss": 2.7348, + "step": 1818 + }, + { + "epoch": 0.1468000968444839, + "grad_norm": 0.9835913181304932, + "learning_rate": 0.0001963402754576607, + "loss": 2.7651, + "step": 1819 + }, + { + "epoch": 0.1468808005810669, + "grad_norm": 0.7968378067016602, + "learning_rate": 0.0001963360424517031, + "loss": 2.7672, + "step": 1820 + }, + { + "epoch": 0.1469615043176499, + "grad_norm": 0.8012512922286987, + "learning_rate": 0.00019633180704479948, + "loss": 2.8022, + "step": 1821 + }, + { + "epoch": 0.1470422080542329, + "grad_norm": 0.7656376957893372, + "learning_rate": 0.0001963275692370554, + "loss": 2.7561, + "step": 1822 + }, + { + "epoch": 0.1471229117908159, + "grad_norm": 0.8030453324317932, + "learning_rate": 0.00019632332902857656, + "loss": 2.8048, + "step": 1823 + }, + { + "epoch": 0.1472036155273989, + "grad_norm": 0.8050903677940369, + "learning_rate": 0.0001963190864194685, + "loss": 2.7846, + "step": 1824 + }, + { + "epoch": 0.14728431926398192, + "grad_norm": 0.8001886606216431, + "learning_rate": 0.00019631484140983705, + "loss": 2.7382, + "step": 1825 + }, + { + "epoch": 0.14736502300056492, + "grad_norm": 0.8589862585067749, + "learning_rate": 0.00019631059399978796, + "loss": 2.8376, + "step": 1826 + }, + { + "epoch": 0.14744572673714793, + "grad_norm": 0.86325603723526, + "learning_rate": 0.00019630634418942714, + "loss": 2.7643, + "step": 1827 + }, + { + "epoch": 0.14752643047373093, + "grad_norm": 0.7893280386924744, + "learning_rate": 0.00019630209197886046, + "loss": 2.713, + "step": 1828 + }, + { + "epoch": 0.14760713421031393, + "grad_norm": 0.8890528082847595, + "learning_rate": 0.00019629783736819394, + "loss": 2.7435, + "step": 1829 + }, + { + "epoch": 0.14768783794689694, + "grad_norm": 0.794924795627594, + "learning_rate": 0.00019629358035753357, + "loss": 2.7703, + "step": 1830 + }, + { + "epoch": 0.14776854168347994, + "grad_norm": 0.7712973952293396, + "learning_rate": 0.00019628932094698545, + "loss": 2.7487, + "step": 1831 + }, + { + "epoch": 0.14784924542006295, + "grad_norm": 0.7810670137405396, + "learning_rate": 0.00019628505913665576, + "loss": 2.7687, + "step": 1832 + }, + { + "epoch": 0.14792994915664595, + "grad_norm": 0.8331059813499451, + "learning_rate": 0.0001962807949266507, + "loss": 2.7166, + "step": 1833 + }, + { + "epoch": 0.14801065289322896, + "grad_norm": 0.8983452916145325, + "learning_rate": 0.00019627652831707656, + "loss": 2.8096, + "step": 1834 + }, + { + "epoch": 0.14809135662981196, + "grad_norm": 0.8387179374694824, + "learning_rate": 0.00019627225930803963, + "loss": 2.8252, + "step": 1835 + }, + { + "epoch": 0.14817206036639496, + "grad_norm": 0.8619294762611389, + "learning_rate": 0.0001962679878996464, + "loss": 2.7623, + "step": 1836 + }, + { + "epoch": 0.14825276410297797, + "grad_norm": 0.8195026516914368, + "learning_rate": 0.0001962637140920032, + "loss": 2.7295, + "step": 1837 + }, + { + "epoch": 0.14833346783956097, + "grad_norm": 0.806216835975647, + "learning_rate": 0.00019625943788521664, + "loss": 2.7184, + "step": 1838 + }, + { + "epoch": 0.14841417157614398, + "grad_norm": 0.7758379578590393, + "learning_rate": 0.00019625515927939327, + "loss": 2.7675, + "step": 1839 + }, + { + "epoch": 0.14849487531272698, + "grad_norm": 0.7617168426513672, + "learning_rate": 0.0001962508782746397, + "loss": 2.8041, + "step": 1840 + }, + { + "epoch": 0.14857557904930999, + "grad_norm": 0.9630066156387329, + "learning_rate": 0.00019624659487106264, + "loss": 2.814, + "step": 1841 + }, + { + "epoch": 0.148656282785893, + "grad_norm": 0.7656112313270569, + "learning_rate": 0.00019624230906876888, + "loss": 2.7564, + "step": 1842 + }, + { + "epoch": 0.148736986522476, + "grad_norm": 0.9394779801368713, + "learning_rate": 0.0001962380208678652, + "loss": 2.7958, + "step": 1843 + }, + { + "epoch": 0.148817690259059, + "grad_norm": 0.7647004127502441, + "learning_rate": 0.00019623373026845842, + "loss": 2.72, + "step": 1844 + }, + { + "epoch": 0.148898393995642, + "grad_norm": 0.809079647064209, + "learning_rate": 0.00019622943727065555, + "loss": 2.7732, + "step": 1845 + }, + { + "epoch": 0.148979097732225, + "grad_norm": 0.8241337537765503, + "learning_rate": 0.00019622514187456357, + "loss": 2.759, + "step": 1846 + }, + { + "epoch": 0.149059801468808, + "grad_norm": 0.8979619145393372, + "learning_rate": 0.00019622084408028948, + "loss": 2.8307, + "step": 1847 + }, + { + "epoch": 0.14914050520539102, + "grad_norm": 0.8058865666389465, + "learning_rate": 0.00019621654388794047, + "loss": 2.807, + "step": 1848 + }, + { + "epoch": 0.14922120894197402, + "grad_norm": 0.81967693567276, + "learning_rate": 0.00019621224129762364, + "loss": 2.7762, + "step": 1849 + }, + { + "epoch": 0.14930191267855702, + "grad_norm": 0.7385755777359009, + "learning_rate": 0.0001962079363094463, + "loss": 2.7854, + "step": 1850 + }, + { + "epoch": 0.14938261641514003, + "grad_norm": 0.8585657477378845, + "learning_rate": 0.00019620362892351566, + "loss": 2.7781, + "step": 1851 + }, + { + "epoch": 0.14946332015172303, + "grad_norm": 0.8328986763954163, + "learning_rate": 0.00019619931913993912, + "loss": 2.8245, + "step": 1852 + }, + { + "epoch": 0.14954402388830604, + "grad_norm": 0.749727189540863, + "learning_rate": 0.0001961950069588241, + "loss": 2.8049, + "step": 1853 + }, + { + "epoch": 0.14962472762488904, + "grad_norm": 0.7886502742767334, + "learning_rate": 0.00019619069238027803, + "loss": 2.7521, + "step": 1854 + }, + { + "epoch": 0.14970543136147205, + "grad_norm": 0.816137433052063, + "learning_rate": 0.00019618637540440848, + "loss": 2.8383, + "step": 1855 + }, + { + "epoch": 0.14978613509805505, + "grad_norm": 0.80442214012146, + "learning_rate": 0.000196182056031323, + "loss": 2.7227, + "step": 1856 + }, + { + "epoch": 0.14986683883463806, + "grad_norm": 0.7605221271514893, + "learning_rate": 0.00019617773426112924, + "loss": 2.7494, + "step": 1857 + }, + { + "epoch": 0.14994754257122106, + "grad_norm": 0.8745137453079224, + "learning_rate": 0.00019617341009393497, + "loss": 2.6978, + "step": 1858 + }, + { + "epoch": 0.15002824630780406, + "grad_norm": 0.8151741623878479, + "learning_rate": 0.00019616908352984789, + "loss": 2.7817, + "step": 1859 + }, + { + "epoch": 0.15010895004438707, + "grad_norm": 0.773876428604126, + "learning_rate": 0.0001961647545689759, + "loss": 2.812, + "step": 1860 + }, + { + "epoch": 0.15018965378097007, + "grad_norm": 0.8216966390609741, + "learning_rate": 0.00019616042321142683, + "loss": 2.8181, + "step": 1861 + }, + { + "epoch": 0.15027035751755305, + "grad_norm": 0.8097409605979919, + "learning_rate": 0.00019615608945730862, + "loss": 2.8336, + "step": 1862 + }, + { + "epoch": 0.15035106125413605, + "grad_norm": 0.8085697293281555, + "learning_rate": 0.00019615175330672932, + "loss": 2.8176, + "step": 1863 + }, + { + "epoch": 0.15043176499071906, + "grad_norm": 0.7658133506774902, + "learning_rate": 0.00019614741475979701, + "loss": 2.7543, + "step": 1864 + }, + { + "epoch": 0.15051246872730206, + "grad_norm": 0.7193909883499146, + "learning_rate": 0.00019614307381661978, + "loss": 2.7475, + "step": 1865 + }, + { + "epoch": 0.15059317246388507, + "grad_norm": 0.835608959197998, + "learning_rate": 0.0001961387304773058, + "loss": 2.8017, + "step": 1866 + }, + { + "epoch": 0.15067387620046807, + "grad_norm": 0.7898489832878113, + "learning_rate": 0.0001961343847419634, + "loss": 2.7613, + "step": 1867 + }, + { + "epoch": 0.15075457993705108, + "grad_norm": 0.8031982183456421, + "learning_rate": 0.0001961300366107008, + "loss": 2.7442, + "step": 1868 + }, + { + "epoch": 0.15083528367363408, + "grad_norm": 0.8427363634109497, + "learning_rate": 0.00019612568608362642, + "loss": 2.8095, + "step": 1869 + }, + { + "epoch": 0.15091598741021708, + "grad_norm": 0.8282802700996399, + "learning_rate": 0.00019612133316084863, + "loss": 2.7216, + "step": 1870 + }, + { + "epoch": 0.1509966911468001, + "grad_norm": 0.7799758911132812, + "learning_rate": 0.000196116977842476, + "loss": 2.793, + "step": 1871 + }, + { + "epoch": 0.1510773948833831, + "grad_norm": 0.8151525259017944, + "learning_rate": 0.00019611262012861702, + "loss": 2.7641, + "step": 1872 + }, + { + "epoch": 0.1511580986199661, + "grad_norm": 0.7926812767982483, + "learning_rate": 0.0001961082600193803, + "loss": 2.7523, + "step": 1873 + }, + { + "epoch": 0.1512388023565491, + "grad_norm": 0.8737135529518127, + "learning_rate": 0.0001961038975148745, + "loss": 2.7965, + "step": 1874 + }, + { + "epoch": 0.1513195060931321, + "grad_norm": 0.7948090434074402, + "learning_rate": 0.00019609953261520837, + "loss": 2.7737, + "step": 1875 + }, + { + "epoch": 0.1514002098297151, + "grad_norm": 0.8161277770996094, + "learning_rate": 0.0001960951653204907, + "loss": 2.7423, + "step": 1876 + }, + { + "epoch": 0.15148091356629811, + "grad_norm": 0.8904973864555359, + "learning_rate": 0.00019609079563083026, + "loss": 2.7066, + "step": 1877 + }, + { + "epoch": 0.15156161730288112, + "grad_norm": 0.8107061982154846, + "learning_rate": 0.00019608642354633604, + "loss": 2.7939, + "step": 1878 + }, + { + "epoch": 0.15164232103946412, + "grad_norm": 0.8410987854003906, + "learning_rate": 0.00019608204906711694, + "loss": 2.7521, + "step": 1879 + }, + { + "epoch": 0.15172302477604713, + "grad_norm": 0.8336483836174011, + "learning_rate": 0.0001960776721932821, + "loss": 2.7613, + "step": 1880 + }, + { + "epoch": 0.15180372851263013, + "grad_norm": 0.730549156665802, + "learning_rate": 0.00019607329292494044, + "loss": 2.8019, + "step": 1881 + }, + { + "epoch": 0.15188443224921314, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001960689112622012, + "loss": 2.6907, + "step": 1882 + }, + { + "epoch": 0.15196513598579614, + "grad_norm": 0.848414421081543, + "learning_rate": 0.00019606452720517359, + "loss": 2.7278, + "step": 1883 + }, + { + "epoch": 0.15204583972237914, + "grad_norm": 0.8331718444824219, + "learning_rate": 0.00019606014075396682, + "loss": 2.6994, + "step": 1884 + }, + { + "epoch": 0.15212654345896215, + "grad_norm": 0.9192764759063721, + "learning_rate": 0.00019605575190869025, + "loss": 2.7095, + "step": 1885 + }, + { + "epoch": 0.15220724719554515, + "grad_norm": 0.8377116322517395, + "learning_rate": 0.00019605136066945324, + "loss": 2.7925, + "step": 1886 + }, + { + "epoch": 0.15228795093212816, + "grad_norm": 0.7302869558334351, + "learning_rate": 0.00019604696703636525, + "loss": 2.7286, + "step": 1887 + }, + { + "epoch": 0.15236865466871116, + "grad_norm": 0.7972438335418701, + "learning_rate": 0.00019604257100953577, + "loss": 2.7732, + "step": 1888 + }, + { + "epoch": 0.15244935840529417, + "grad_norm": 1.0350826978683472, + "learning_rate": 0.00019603817258907435, + "loss": 2.8211, + "step": 1889 + }, + { + "epoch": 0.15253006214187717, + "grad_norm": 0.782755970954895, + "learning_rate": 0.00019603377177509067, + "loss": 2.8489, + "step": 1890 + }, + { + "epoch": 0.15261076587846018, + "grad_norm": 0.9072603583335876, + "learning_rate": 0.0001960293685676943, + "loss": 2.7764, + "step": 1891 + }, + { + "epoch": 0.15269146961504318, + "grad_norm": 0.7878704071044922, + "learning_rate": 0.0001960249629669951, + "loss": 2.7494, + "step": 1892 + }, + { + "epoch": 0.15277217335162618, + "grad_norm": 0.8770418167114258, + "learning_rate": 0.00019602055497310278, + "loss": 2.7318, + "step": 1893 + }, + { + "epoch": 0.1528528770882092, + "grad_norm": 0.8004975914955139, + "learning_rate": 0.00019601614458612723, + "loss": 2.7272, + "step": 1894 + }, + { + "epoch": 0.1529335808247922, + "grad_norm": 0.8511070013046265, + "learning_rate": 0.00019601173180617835, + "loss": 2.7876, + "step": 1895 + }, + { + "epoch": 0.1530142845613752, + "grad_norm": 0.7946128845214844, + "learning_rate": 0.00019600731663336617, + "loss": 2.7435, + "step": 1896 + }, + { + "epoch": 0.1530949882979582, + "grad_norm": 0.8155317902565002, + "learning_rate": 0.00019600289906780067, + "loss": 2.7642, + "step": 1897 + }, + { + "epoch": 0.1531756920345412, + "grad_norm": 0.8086098432540894, + "learning_rate": 0.000195998479109592, + "loss": 2.7358, + "step": 1898 + }, + { + "epoch": 0.1532563957711242, + "grad_norm": 0.8698278665542603, + "learning_rate": 0.00019599405675885026, + "loss": 2.725, + "step": 1899 + }, + { + "epoch": 0.15333709950770721, + "grad_norm": 0.8756006360054016, + "learning_rate": 0.00019598963201568573, + "loss": 2.7209, + "step": 1900 + }, + { + "epoch": 0.15341780324429022, + "grad_norm": 0.7984628081321716, + "learning_rate": 0.0001959852048802086, + "loss": 2.7685, + "step": 1901 + }, + { + "epoch": 0.15349850698087322, + "grad_norm": 0.8244056105613708, + "learning_rate": 0.0001959807753525293, + "loss": 2.7692, + "step": 1902 + }, + { + "epoch": 0.15357921071745623, + "grad_norm": 0.8577731251716614, + "learning_rate": 0.00019597634343275814, + "loss": 2.7571, + "step": 1903 + }, + { + "epoch": 0.15365991445403923, + "grad_norm": 0.8410975933074951, + "learning_rate": 0.00019597190912100566, + "loss": 2.7862, + "step": 1904 + }, + { + "epoch": 0.15374061819062224, + "grad_norm": 0.9094158411026001, + "learning_rate": 0.0001959674724173823, + "loss": 2.7655, + "step": 1905 + }, + { + "epoch": 0.15382132192720524, + "grad_norm": 0.8375208973884583, + "learning_rate": 0.00019596303332199868, + "loss": 2.8129, + "step": 1906 + }, + { + "epoch": 0.15390202566378824, + "grad_norm": 0.8335977792739868, + "learning_rate": 0.00019595859183496543, + "loss": 2.7835, + "step": 1907 + }, + { + "epoch": 0.15398272940037125, + "grad_norm": 0.7973531484603882, + "learning_rate": 0.0001959541479563932, + "loss": 2.7785, + "step": 1908 + }, + { + "epoch": 0.15406343313695425, + "grad_norm": 0.7808824181556702, + "learning_rate": 0.0001959497016863928, + "loss": 2.7862, + "step": 1909 + }, + { + "epoch": 0.15414413687353726, + "grad_norm": 0.853824257850647, + "learning_rate": 0.00019594525302507504, + "loss": 2.6721, + "step": 1910 + }, + { + "epoch": 0.15422484061012026, + "grad_norm": 0.8589324355125427, + "learning_rate": 0.00019594080197255073, + "loss": 2.7948, + "step": 1911 + }, + { + "epoch": 0.15430554434670327, + "grad_norm": 0.7951898574829102, + "learning_rate": 0.00019593634852893086, + "loss": 2.7903, + "step": 1912 + }, + { + "epoch": 0.15438624808328624, + "grad_norm": 0.8333349227905273, + "learning_rate": 0.0001959318926943264, + "loss": 2.8073, + "step": 1913 + }, + { + "epoch": 0.15446695181986925, + "grad_norm": 0.8552380800247192, + "learning_rate": 0.0001959274344688484, + "loss": 2.8199, + "step": 1914 + }, + { + "epoch": 0.15454765555645225, + "grad_norm": 0.8356214165687561, + "learning_rate": 0.000195922973852608, + "loss": 2.7985, + "step": 1915 + }, + { + "epoch": 0.15462835929303526, + "grad_norm": 0.7167248725891113, + "learning_rate": 0.00019591851084571634, + "loss": 2.6802, + "step": 1916 + }, + { + "epoch": 0.15470906302961826, + "grad_norm": 0.7980726361274719, + "learning_rate": 0.00019591404544828464, + "loss": 2.692, + "step": 1917 + }, + { + "epoch": 0.15478976676620126, + "grad_norm": 0.7766004800796509, + "learning_rate": 0.00019590957766042424, + "loss": 2.7219, + "step": 1918 + }, + { + "epoch": 0.15487047050278427, + "grad_norm": 0.828852653503418, + "learning_rate": 0.0001959051074822464, + "loss": 2.7369, + "step": 1919 + }, + { + "epoch": 0.15495117423936727, + "grad_norm": 0.7818129062652588, + "learning_rate": 0.0001959006349138626, + "loss": 2.7778, + "step": 1920 + }, + { + "epoch": 0.15503187797595028, + "grad_norm": 0.8428593873977661, + "learning_rate": 0.00019589615995538432, + "loss": 2.8257, + "step": 1921 + }, + { + "epoch": 0.15511258171253328, + "grad_norm": 0.8756616115570068, + "learning_rate": 0.00019589168260692307, + "loss": 2.7692, + "step": 1922 + }, + { + "epoch": 0.15519328544911629, + "grad_norm": 0.7802519202232361, + "learning_rate": 0.0001958872028685904, + "loss": 2.7811, + "step": 1923 + }, + { + "epoch": 0.1552739891856993, + "grad_norm": 0.7787032723426819, + "learning_rate": 0.00019588272074049797, + "loss": 2.7546, + "step": 1924 + }, + { + "epoch": 0.1553546929222823, + "grad_norm": 0.848479151725769, + "learning_rate": 0.0001958782362227575, + "loss": 2.7759, + "step": 1925 + }, + { + "epoch": 0.1554353966588653, + "grad_norm": 0.8331353664398193, + "learning_rate": 0.00019587374931548076, + "loss": 2.7881, + "step": 1926 + }, + { + "epoch": 0.1555161003954483, + "grad_norm": 0.8646424412727356, + "learning_rate": 0.00019586926001877958, + "loss": 2.8059, + "step": 1927 + }, + { + "epoch": 0.1555968041320313, + "grad_norm": 0.912253737449646, + "learning_rate": 0.00019586476833276584, + "loss": 2.7446, + "step": 1928 + }, + { + "epoch": 0.1556775078686143, + "grad_norm": 0.9256471395492554, + "learning_rate": 0.00019586027425755147, + "loss": 2.8, + "step": 1929 + }, + { + "epoch": 0.15575821160519732, + "grad_norm": 1.0984607934951782, + "learning_rate": 0.0001958557777932485, + "loss": 2.7759, + "step": 1930 + }, + { + "epoch": 0.15583891534178032, + "grad_norm": 0.8736081123352051, + "learning_rate": 0.00019585127893996895, + "loss": 2.7464, + "step": 1931 + }, + { + "epoch": 0.15591961907836333, + "grad_norm": 0.932538628578186, + "learning_rate": 0.00019584677769782498, + "loss": 2.7874, + "step": 1932 + }, + { + "epoch": 0.15600032281494633, + "grad_norm": 0.9742087125778198, + "learning_rate": 0.0001958422740669288, + "loss": 2.7727, + "step": 1933 + }, + { + "epoch": 0.15608102655152933, + "grad_norm": 0.8975874781608582, + "learning_rate": 0.00019583776804739256, + "loss": 2.7812, + "step": 1934 + }, + { + "epoch": 0.15616173028811234, + "grad_norm": 0.9380232691764832, + "learning_rate": 0.00019583325963932864, + "loss": 2.7284, + "step": 1935 + }, + { + "epoch": 0.15624243402469534, + "grad_norm": 0.8332872986793518, + "learning_rate": 0.00019582874884284938, + "loss": 2.7792, + "step": 1936 + }, + { + "epoch": 0.15632313776127835, + "grad_norm": 1.0017194747924805, + "learning_rate": 0.0001958242356580672, + "loss": 2.7187, + "step": 1937 + }, + { + "epoch": 0.15640384149786135, + "grad_norm": 0.9433515667915344, + "learning_rate": 0.0001958197200850946, + "loss": 2.8394, + "step": 1938 + }, + { + "epoch": 0.15648454523444436, + "grad_norm": 0.8781030178070068, + "learning_rate": 0.00019581520212404407, + "loss": 2.7667, + "step": 1939 + }, + { + "epoch": 0.15656524897102736, + "grad_norm": 0.895656168460846, + "learning_rate": 0.00019581068177502826, + "loss": 2.799, + "step": 1940 + }, + { + "epoch": 0.15664595270761036, + "grad_norm": 0.8336960673332214, + "learning_rate": 0.0001958061590381598, + "loss": 2.8152, + "step": 1941 + }, + { + "epoch": 0.15672665644419337, + "grad_norm": 0.9184536337852478, + "learning_rate": 0.00019580163391355143, + "loss": 2.7746, + "step": 1942 + }, + { + "epoch": 0.15680736018077637, + "grad_norm": 0.8564908504486084, + "learning_rate": 0.00019579710640131587, + "loss": 2.7674, + "step": 1943 + }, + { + "epoch": 0.15688806391735938, + "grad_norm": 0.7491608262062073, + "learning_rate": 0.00019579257650156605, + "loss": 2.7665, + "step": 1944 + }, + { + "epoch": 0.15696876765394238, + "grad_norm": 0.9165031313896179, + "learning_rate": 0.00019578804421441478, + "loss": 2.7343, + "step": 1945 + }, + { + "epoch": 0.15704947139052539, + "grad_norm": 0.8413978815078735, + "learning_rate": 0.00019578350953997512, + "loss": 2.7503, + "step": 1946 + }, + { + "epoch": 0.1571301751271084, + "grad_norm": 0.7820419073104858, + "learning_rate": 0.00019577897247835993, + "loss": 2.7535, + "step": 1947 + }, + { + "epoch": 0.1572108788636914, + "grad_norm": 0.8134996294975281, + "learning_rate": 0.00019577443302968246, + "loss": 2.7504, + "step": 1948 + }, + { + "epoch": 0.1572915826002744, + "grad_norm": 0.8201301097869873, + "learning_rate": 0.00019576989119405574, + "loss": 2.6927, + "step": 1949 + }, + { + "epoch": 0.1573722863368574, + "grad_norm": 0.8343217372894287, + "learning_rate": 0.00019576534697159296, + "loss": 2.7742, + "step": 1950 + }, + { + "epoch": 0.1574529900734404, + "grad_norm": 0.8161751627922058, + "learning_rate": 0.0001957608003624074, + "loss": 2.8236, + "step": 1951 + }, + { + "epoch": 0.1575336938100234, + "grad_norm": 0.8626808524131775, + "learning_rate": 0.00019575625136661242, + "loss": 2.7305, + "step": 1952 + }, + { + "epoch": 0.15761439754660642, + "grad_norm": 0.8238986730575562, + "learning_rate": 0.0001957516999843213, + "loss": 2.7641, + "step": 1953 + }, + { + "epoch": 0.15769510128318942, + "grad_norm": 0.7806095480918884, + "learning_rate": 0.00019574714621564755, + "loss": 2.7155, + "step": 1954 + }, + { + "epoch": 0.15777580501977242, + "grad_norm": 0.8137761950492859, + "learning_rate": 0.0001957425900607046, + "loss": 2.7529, + "step": 1955 + }, + { + "epoch": 0.15785650875635543, + "grad_norm": 0.8383988738059998, + "learning_rate": 0.00019573803151960606, + "loss": 2.7726, + "step": 1956 + }, + { + "epoch": 0.15793721249293843, + "grad_norm": 0.8734413385391235, + "learning_rate": 0.00019573347059246549, + "loss": 2.8563, + "step": 1957 + }, + { + "epoch": 0.15801791622952144, + "grad_norm": 0.8018438816070557, + "learning_rate": 0.0001957289072793966, + "loss": 2.8031, + "step": 1958 + }, + { + "epoch": 0.15809861996610444, + "grad_norm": 0.8175764083862305, + "learning_rate": 0.0001957243415805131, + "loss": 2.7824, + "step": 1959 + }, + { + "epoch": 0.15817932370268745, + "grad_norm": 0.7642164826393127, + "learning_rate": 0.00019571977349592878, + "loss": 2.7666, + "step": 1960 + }, + { + "epoch": 0.15826002743927045, + "grad_norm": 0.7584841847419739, + "learning_rate": 0.0001957152030257575, + "loss": 2.7211, + "step": 1961 + }, + { + "epoch": 0.15834073117585346, + "grad_norm": 0.822610080242157, + "learning_rate": 0.00019571063017011312, + "loss": 2.7025, + "step": 1962 + }, + { + "epoch": 0.15842143491243646, + "grad_norm": 0.7553817629814148, + "learning_rate": 0.00019570605492910968, + "loss": 2.8122, + "step": 1963 + }, + { + "epoch": 0.15850213864901944, + "grad_norm": 0.7224497199058533, + "learning_rate": 0.0001957014773028612, + "loss": 2.7613, + "step": 1964 + }, + { + "epoch": 0.15858284238560244, + "grad_norm": 0.8563623428344727, + "learning_rate": 0.00019569689729148168, + "loss": 2.8005, + "step": 1965 + }, + { + "epoch": 0.15866354612218544, + "grad_norm": 0.7665508389472961, + "learning_rate": 0.00019569231489508537, + "loss": 2.7387, + "step": 1966 + }, + { + "epoch": 0.15874424985876845, + "grad_norm": 0.7788479328155518, + "learning_rate": 0.0001956877301137864, + "loss": 2.7229, + "step": 1967 + }, + { + "epoch": 0.15882495359535145, + "grad_norm": 0.7326748371124268, + "learning_rate": 0.00019568314294769908, + "loss": 2.7728, + "step": 1968 + }, + { + "epoch": 0.15890565733193446, + "grad_norm": 0.790492594242096, + "learning_rate": 0.00019567855339693772, + "loss": 2.7809, + "step": 1969 + }, + { + "epoch": 0.15898636106851746, + "grad_norm": 0.8026898503303528, + "learning_rate": 0.0001956739614616167, + "loss": 2.7267, + "step": 1970 + }, + { + "epoch": 0.15906706480510047, + "grad_norm": 0.7963770627975464, + "learning_rate": 0.00019566936714185046, + "loss": 2.7161, + "step": 1971 + }, + { + "epoch": 0.15914776854168347, + "grad_norm": 0.7708200216293335, + "learning_rate": 0.00019566477043775354, + "loss": 2.7223, + "step": 1972 + }, + { + "epoch": 0.15922847227826648, + "grad_norm": 0.8036624789237976, + "learning_rate": 0.00019566017134944042, + "loss": 2.7644, + "step": 1973 + }, + { + "epoch": 0.15930917601484948, + "grad_norm": 0.8221341967582703, + "learning_rate": 0.00019565556987702581, + "loss": 2.7629, + "step": 1974 + }, + { + "epoch": 0.15938987975143248, + "grad_norm": 0.7685462832450867, + "learning_rate": 0.00019565096602062435, + "loss": 2.8016, + "step": 1975 + }, + { + "epoch": 0.1594705834880155, + "grad_norm": 0.8173574209213257, + "learning_rate": 0.00019564635978035075, + "loss": 2.761, + "step": 1976 + }, + { + "epoch": 0.1595512872245985, + "grad_norm": 0.7567519545555115, + "learning_rate": 0.00019564175115631988, + "loss": 2.7794, + "step": 1977 + }, + { + "epoch": 0.1596319909611815, + "grad_norm": 0.8754587173461914, + "learning_rate": 0.00019563714014864654, + "loss": 2.7769, + "step": 1978 + }, + { + "epoch": 0.1597126946977645, + "grad_norm": 0.753871738910675, + "learning_rate": 0.00019563252675744569, + "loss": 2.7489, + "step": 1979 + }, + { + "epoch": 0.1597933984343475, + "grad_norm": 0.777103841304779, + "learning_rate": 0.00019562791098283225, + "loss": 2.7667, + "step": 1980 + }, + { + "epoch": 0.1598741021709305, + "grad_norm": 0.8227293491363525, + "learning_rate": 0.00019562329282492131, + "loss": 2.7904, + "step": 1981 + }, + { + "epoch": 0.15995480590751351, + "grad_norm": 0.7595541477203369, + "learning_rate": 0.00019561867228382797, + "loss": 2.7654, + "step": 1982 + }, + { + "epoch": 0.16003550964409652, + "grad_norm": 0.8330550789833069, + "learning_rate": 0.00019561404935966733, + "loss": 2.7533, + "step": 1983 + }, + { + "epoch": 0.16011621338067952, + "grad_norm": 0.8213297128677368, + "learning_rate": 0.0001956094240525547, + "loss": 2.8103, + "step": 1984 + }, + { + "epoch": 0.16019691711726253, + "grad_norm": 0.8046056628227234, + "learning_rate": 0.00019560479636260527, + "loss": 2.7666, + "step": 1985 + }, + { + "epoch": 0.16027762085384553, + "grad_norm": 0.7886037230491638, + "learning_rate": 0.0001956001662899344, + "loss": 2.7066, + "step": 1986 + }, + { + "epoch": 0.16035832459042854, + "grad_norm": 0.8300043940544128, + "learning_rate": 0.00019559553383465748, + "loss": 2.7617, + "step": 1987 + }, + { + "epoch": 0.16043902832701154, + "grad_norm": 0.7963815331459045, + "learning_rate": 0.00019559089899688994, + "loss": 2.6891, + "step": 1988 + }, + { + "epoch": 0.16051973206359454, + "grad_norm": 0.7794002294540405, + "learning_rate": 0.00019558626177674734, + "loss": 2.8012, + "step": 1989 + }, + { + "epoch": 0.16060043580017755, + "grad_norm": 0.8345863819122314, + "learning_rate": 0.00019558162217434526, + "loss": 2.7715, + "step": 1990 + }, + { + "epoch": 0.16068113953676055, + "grad_norm": 0.8883393406867981, + "learning_rate": 0.00019557698018979927, + "loss": 2.7863, + "step": 1991 + }, + { + "epoch": 0.16076184327334356, + "grad_norm": 0.8069450259208679, + "learning_rate": 0.0001955723358232251, + "loss": 2.759, + "step": 1992 + }, + { + "epoch": 0.16084254700992656, + "grad_norm": 0.9014191031455994, + "learning_rate": 0.00019556768907473852, + "loss": 2.711, + "step": 1993 + }, + { + "epoch": 0.16092325074650957, + "grad_norm": 0.8429470658302307, + "learning_rate": 0.0001955630399444553, + "loss": 2.6936, + "step": 1994 + }, + { + "epoch": 0.16100395448309257, + "grad_norm": 0.7859500050544739, + "learning_rate": 0.00019555838843249128, + "loss": 2.7343, + "step": 1995 + }, + { + "epoch": 0.16108465821967557, + "grad_norm": 0.8068249821662903, + "learning_rate": 0.00019555373453896245, + "loss": 2.7492, + "step": 1996 + }, + { + "epoch": 0.16116536195625858, + "grad_norm": 0.8194023370742798, + "learning_rate": 0.00019554907826398478, + "loss": 2.7265, + "step": 1997 + }, + { + "epoch": 0.16124606569284158, + "grad_norm": 0.8139404654502869, + "learning_rate": 0.00019554441960767434, + "loss": 2.7311, + "step": 1998 + }, + { + "epoch": 0.1613267694294246, + "grad_norm": 0.8210673928260803, + "learning_rate": 0.00019553975857014718, + "loss": 2.7095, + "step": 1999 + }, + { + "epoch": 0.1614074731660076, + "grad_norm": 0.8615561723709106, + "learning_rate": 0.0001955350951515195, + "loss": 2.7458, + "step": 2000 + }, + { + "epoch": 0.1614074731660076, + "eval_loss": 2.6739437580108643, + "eval_runtime": 813.8274, + "eval_samples_per_second": 3.219, + "eval_steps_per_second": 0.537, + "step": 2000 + }, + { + "epoch": 0.1614881769025906, + "grad_norm": 0.8945594429969788, + "learning_rate": 0.0001955304293519075, + "loss": 2.776, + "step": 2001 + }, + { + "epoch": 0.1615688806391736, + "grad_norm": 0.7943438291549683, + "learning_rate": 0.00019552576117142748, + "loss": 2.7484, + "step": 2002 + }, + { + "epoch": 0.1616495843757566, + "grad_norm": 0.8264374136924744, + "learning_rate": 0.00019552109061019582, + "loss": 2.7725, + "step": 2003 + }, + { + "epoch": 0.1617302881123396, + "grad_norm": 0.7591681480407715, + "learning_rate": 0.00019551641766832887, + "loss": 2.7217, + "step": 2004 + }, + { + "epoch": 0.16181099184892261, + "grad_norm": 0.8275293707847595, + "learning_rate": 0.0001955117423459431, + "loss": 2.7279, + "step": 2005 + }, + { + "epoch": 0.16189169558550562, + "grad_norm": 0.8109650611877441, + "learning_rate": 0.00019550706464315504, + "loss": 2.8111, + "step": 2006 + }, + { + "epoch": 0.16197239932208862, + "grad_norm": 0.8710397481918335, + "learning_rate": 0.00019550238456008127, + "loss": 2.7166, + "step": 2007 + }, + { + "epoch": 0.16205310305867163, + "grad_norm": 0.8569270968437195, + "learning_rate": 0.00019549770209683845, + "loss": 2.7739, + "step": 2008 + }, + { + "epoch": 0.16213380679525463, + "grad_norm": 0.7927817702293396, + "learning_rate": 0.00019549301725354325, + "loss": 2.7154, + "step": 2009 + }, + { + "epoch": 0.16221451053183764, + "grad_norm": 0.7576590776443481, + "learning_rate": 0.00019548833003031244, + "loss": 2.7276, + "step": 2010 + }, + { + "epoch": 0.16229521426842064, + "grad_norm": 0.8092780709266663, + "learning_rate": 0.00019548364042726283, + "loss": 2.7494, + "step": 2011 + }, + { + "epoch": 0.16237591800500364, + "grad_norm": 0.7643424868583679, + "learning_rate": 0.0001954789484445113, + "loss": 2.7877, + "step": 2012 + }, + { + "epoch": 0.16245662174158665, + "grad_norm": 0.8235166072845459, + "learning_rate": 0.0001954742540821748, + "loss": 2.7884, + "step": 2013 + }, + { + "epoch": 0.16253732547816965, + "grad_norm": 0.9297853708267212, + "learning_rate": 0.00019546955734037034, + "loss": 2.765, + "step": 2014 + }, + { + "epoch": 0.16261802921475263, + "grad_norm": 0.7778275609016418, + "learning_rate": 0.0001954648582192149, + "loss": 2.7178, + "step": 2015 + }, + { + "epoch": 0.16269873295133563, + "grad_norm": 0.8767017126083374, + "learning_rate": 0.00019546015671882566, + "loss": 2.8254, + "step": 2016 + }, + { + "epoch": 0.16277943668791864, + "grad_norm": 0.7870603203773499, + "learning_rate": 0.0001954554528393198, + "loss": 2.797, + "step": 2017 + }, + { + "epoch": 0.16286014042450164, + "grad_norm": 0.8112391233444214, + "learning_rate": 0.00019545074658081454, + "loss": 2.8562, + "step": 2018 + }, + { + "epoch": 0.16294084416108465, + "grad_norm": 0.8216677308082581, + "learning_rate": 0.00019544603794342713, + "loss": 2.7894, + "step": 2019 + }, + { + "epoch": 0.16302154789766765, + "grad_norm": 0.8445515632629395, + "learning_rate": 0.00019544132692727497, + "loss": 2.8618, + "step": 2020 + }, + { + "epoch": 0.16310225163425066, + "grad_norm": 0.8275444507598877, + "learning_rate": 0.00019543661353247548, + "loss": 2.8087, + "step": 2021 + }, + { + "epoch": 0.16318295537083366, + "grad_norm": 0.8142833709716797, + "learning_rate": 0.00019543189775914608, + "loss": 2.8075, + "step": 2022 + }, + { + "epoch": 0.16326365910741666, + "grad_norm": 0.8182976245880127, + "learning_rate": 0.0001954271796074043, + "loss": 2.8312, + "step": 2023 + }, + { + "epoch": 0.16334436284399967, + "grad_norm": 0.7629228234291077, + "learning_rate": 0.0001954224590773678, + "loss": 2.7191, + "step": 2024 + }, + { + "epoch": 0.16342506658058267, + "grad_norm": 0.8630000948905945, + "learning_rate": 0.00019541773616915418, + "loss": 2.8013, + "step": 2025 + }, + { + "epoch": 0.16350577031716568, + "grad_norm": 0.8917906880378723, + "learning_rate": 0.00019541301088288115, + "loss": 2.7573, + "step": 2026 + }, + { + "epoch": 0.16358647405374868, + "grad_norm": 0.8641694188117981, + "learning_rate": 0.00019540828321866648, + "loss": 2.7509, + "step": 2027 + }, + { + "epoch": 0.16366717779033169, + "grad_norm": 0.7687639594078064, + "learning_rate": 0.00019540355317662798, + "loss": 2.7266, + "step": 2028 + }, + { + "epoch": 0.1637478815269147, + "grad_norm": 0.7870400547981262, + "learning_rate": 0.00019539882075688355, + "loss": 2.8217, + "step": 2029 + }, + { + "epoch": 0.1638285852634977, + "grad_norm": 0.9373054504394531, + "learning_rate": 0.0001953940859595511, + "loss": 2.7562, + "step": 2030 + }, + { + "epoch": 0.1639092890000807, + "grad_norm": 0.7941255569458008, + "learning_rate": 0.00019538934878474872, + "loss": 2.7553, + "step": 2031 + }, + { + "epoch": 0.1639899927366637, + "grad_norm": 0.735977053642273, + "learning_rate": 0.00019538460923259438, + "loss": 2.7058, + "step": 2032 + }, + { + "epoch": 0.1640706964732467, + "grad_norm": 0.7812782526016235, + "learning_rate": 0.00019537986730320625, + "loss": 2.7885, + "step": 2033 + }, + { + "epoch": 0.1641514002098297, + "grad_norm": 1.1534128189086914, + "learning_rate": 0.0001953751229967025, + "loss": 2.7139, + "step": 2034 + }, + { + "epoch": 0.16423210394641272, + "grad_norm": 0.9139814972877502, + "learning_rate": 0.00019537037631320135, + "loss": 2.7869, + "step": 2035 + }, + { + "epoch": 0.16431280768299572, + "grad_norm": 0.8330421447753906, + "learning_rate": 0.00019536562725282116, + "loss": 2.7491, + "step": 2036 + }, + { + "epoch": 0.16439351141957873, + "grad_norm": 0.9040594696998596, + "learning_rate": 0.00019536087581568026, + "loss": 2.7637, + "step": 2037 + }, + { + "epoch": 0.16447421515616173, + "grad_norm": 0.9158666729927063, + "learning_rate": 0.00019535612200189705, + "loss": 2.7709, + "step": 2038 + }, + { + "epoch": 0.16455491889274473, + "grad_norm": 0.8668088912963867, + "learning_rate": 0.00019535136581158997, + "loss": 2.7994, + "step": 2039 + }, + { + "epoch": 0.16463562262932774, + "grad_norm": 0.9179345369338989, + "learning_rate": 0.00019534660724487764, + "loss": 2.747, + "step": 2040 + }, + { + "epoch": 0.16471632636591074, + "grad_norm": 0.9690881967544556, + "learning_rate": 0.00019534184630187862, + "loss": 2.742, + "step": 2041 + }, + { + "epoch": 0.16479703010249375, + "grad_norm": 0.8478729724884033, + "learning_rate": 0.00019533708298271157, + "loss": 2.7824, + "step": 2042 + }, + { + "epoch": 0.16487773383907675, + "grad_norm": 0.8286584615707397, + "learning_rate": 0.00019533231728749518, + "loss": 2.7263, + "step": 2043 + }, + { + "epoch": 0.16495843757565976, + "grad_norm": 0.8095324039459229, + "learning_rate": 0.00019532754921634826, + "loss": 2.7845, + "step": 2044 + }, + { + "epoch": 0.16503914131224276, + "grad_norm": 0.9552872776985168, + "learning_rate": 0.0001953227787693896, + "loss": 2.7676, + "step": 2045 + }, + { + "epoch": 0.16511984504882576, + "grad_norm": 1.021515130996704, + "learning_rate": 0.00019531800594673815, + "loss": 2.784, + "step": 2046 + }, + { + "epoch": 0.16520054878540877, + "grad_norm": 0.7847293019294739, + "learning_rate": 0.00019531323074851276, + "loss": 2.7319, + "step": 2047 + }, + { + "epoch": 0.16528125252199177, + "grad_norm": 0.7803899049758911, + "learning_rate": 0.0001953084531748326, + "loss": 2.8321, + "step": 2048 + }, + { + "epoch": 0.16536195625857478, + "grad_norm": 0.8687692880630493, + "learning_rate": 0.0001953036732258166, + "loss": 2.763, + "step": 2049 + }, + { + "epoch": 0.16544265999515778, + "grad_norm": 0.8212031126022339, + "learning_rate": 0.00019529889090158392, + "loss": 2.7262, + "step": 2050 + }, + { + "epoch": 0.16552336373174079, + "grad_norm": 0.8460689187049866, + "learning_rate": 0.0001952941062022538, + "loss": 2.8018, + "step": 2051 + }, + { + "epoch": 0.1656040674683238, + "grad_norm": 0.9189361929893494, + "learning_rate": 0.00019528931912794547, + "loss": 2.8079, + "step": 2052 + }, + { + "epoch": 0.1656847712049068, + "grad_norm": 0.9529987573623657, + "learning_rate": 0.00019528452967877816, + "loss": 2.8015, + "step": 2053 + }, + { + "epoch": 0.1657654749414898, + "grad_norm": 0.8468493223190308, + "learning_rate": 0.00019527973785487133, + "loss": 2.8013, + "step": 2054 + }, + { + "epoch": 0.1658461786780728, + "grad_norm": 0.8150945901870728, + "learning_rate": 0.00019527494365634436, + "loss": 2.7975, + "step": 2055 + }, + { + "epoch": 0.1659268824146558, + "grad_norm": 0.814942479133606, + "learning_rate": 0.00019527014708331674, + "loss": 2.7503, + "step": 2056 + }, + { + "epoch": 0.1660075861512388, + "grad_norm": 0.7841517329216003, + "learning_rate": 0.000195265348135908, + "loss": 2.7921, + "step": 2057 + }, + { + "epoch": 0.16608828988782182, + "grad_norm": 0.7603738903999329, + "learning_rate": 0.0001952605468142378, + "loss": 2.7658, + "step": 2058 + }, + { + "epoch": 0.16616899362440482, + "grad_norm": 0.8460882902145386, + "learning_rate": 0.00019525574311842574, + "loss": 2.7644, + "step": 2059 + }, + { + "epoch": 0.16624969736098782, + "grad_norm": 0.8633555173873901, + "learning_rate": 0.00019525093704859156, + "loss": 2.7956, + "step": 2060 + }, + { + "epoch": 0.16633040109757083, + "grad_norm": 0.7700977325439453, + "learning_rate": 0.00019524612860485503, + "loss": 2.7103, + "step": 2061 + }, + { + "epoch": 0.16641110483415383, + "grad_norm": 0.888770580291748, + "learning_rate": 0.00019524131778733602, + "loss": 2.7325, + "step": 2062 + }, + { + "epoch": 0.16649180857073684, + "grad_norm": 0.8338149189949036, + "learning_rate": 0.00019523650459615438, + "loss": 2.7533, + "step": 2063 + }, + { + "epoch": 0.16657251230731984, + "grad_norm": 0.7723987698554993, + "learning_rate": 0.0001952316890314301, + "loss": 2.7316, + "step": 2064 + }, + { + "epoch": 0.16665321604390285, + "grad_norm": 0.8952934145927429, + "learning_rate": 0.0001952268710932832, + "loss": 2.7825, + "step": 2065 + }, + { + "epoch": 0.16673391978048582, + "grad_norm": 0.8201496601104736, + "learning_rate": 0.00019522205078183378, + "loss": 2.7162, + "step": 2066 + }, + { + "epoch": 0.16681462351706883, + "grad_norm": 0.7733781337738037, + "learning_rate": 0.00019521722809720188, + "loss": 2.7834, + "step": 2067 + }, + { + "epoch": 0.16689532725365183, + "grad_norm": 0.8285118937492371, + "learning_rate": 0.0001952124030395078, + "loss": 2.8475, + "step": 2068 + }, + { + "epoch": 0.16697603099023484, + "grad_norm": 0.84097820520401, + "learning_rate": 0.00019520757560887174, + "loss": 2.784, + "step": 2069 + }, + { + "epoch": 0.16705673472681784, + "grad_norm": 0.7336563467979431, + "learning_rate": 0.000195202745805414, + "loss": 2.7663, + "step": 2070 + }, + { + "epoch": 0.16713743846340084, + "grad_norm": 0.8359388113021851, + "learning_rate": 0.000195197913629255, + "loss": 2.7931, + "step": 2071 + }, + { + "epoch": 0.16721814219998385, + "grad_norm": 0.8272559642791748, + "learning_rate": 0.0001951930790805151, + "loss": 2.8578, + "step": 2072 + }, + { + "epoch": 0.16729884593656685, + "grad_norm": 0.7970743179321289, + "learning_rate": 0.00019518824215931487, + "loss": 2.8148, + "step": 2073 + }, + { + "epoch": 0.16737954967314986, + "grad_norm": 0.856200098991394, + "learning_rate": 0.00019518340286577482, + "loss": 2.8067, + "step": 2074 + }, + { + "epoch": 0.16746025340973286, + "grad_norm": 0.7581893801689148, + "learning_rate": 0.00019517856120001556, + "loss": 2.7339, + "step": 2075 + }, + { + "epoch": 0.16754095714631587, + "grad_norm": 0.8488386869430542, + "learning_rate": 0.00019517371716215774, + "loss": 2.7332, + "step": 2076 + }, + { + "epoch": 0.16762166088289887, + "grad_norm": 0.7488275170326233, + "learning_rate": 0.00019516887075232212, + "loss": 2.7734, + "step": 2077 + }, + { + "epoch": 0.16770236461948188, + "grad_norm": 0.9173932075500488, + "learning_rate": 0.00019516402197062945, + "loss": 2.7792, + "step": 2078 + }, + { + "epoch": 0.16778306835606488, + "grad_norm": 0.8200702667236328, + "learning_rate": 0.0001951591708172006, + "loss": 2.8046, + "step": 2079 + }, + { + "epoch": 0.16786377209264788, + "grad_norm": 0.8270781636238098, + "learning_rate": 0.00019515431729215642, + "loss": 2.7467, + "step": 2080 + }, + { + "epoch": 0.1679444758292309, + "grad_norm": 0.8660609722137451, + "learning_rate": 0.00019514946139561799, + "loss": 2.8169, + "step": 2081 + }, + { + "epoch": 0.1680251795658139, + "grad_norm": 0.78753262758255, + "learning_rate": 0.0001951446031277062, + "loss": 2.7388, + "step": 2082 + }, + { + "epoch": 0.1681058833023969, + "grad_norm": 0.791593074798584, + "learning_rate": 0.00019513974248854224, + "loss": 2.8776, + "step": 2083 + }, + { + "epoch": 0.1681865870389799, + "grad_norm": 0.7883535623550415, + "learning_rate": 0.0001951348794782472, + "loss": 2.78, + "step": 2084 + }, + { + "epoch": 0.1682672907755629, + "grad_norm": 0.7877013087272644, + "learning_rate": 0.00019513001409694224, + "loss": 2.7559, + "step": 2085 + }, + { + "epoch": 0.1683479945121459, + "grad_norm": 0.8838450908660889, + "learning_rate": 0.00019512514634474864, + "loss": 2.7489, + "step": 2086 + }, + { + "epoch": 0.16842869824872891, + "grad_norm": 0.7751588821411133, + "learning_rate": 0.00019512027622178775, + "loss": 2.6832, + "step": 2087 + }, + { + "epoch": 0.16850940198531192, + "grad_norm": 0.90345299243927, + "learning_rate": 0.00019511540372818095, + "loss": 2.8189, + "step": 2088 + }, + { + "epoch": 0.16859010572189492, + "grad_norm": 0.7820938229560852, + "learning_rate": 0.00019511052886404966, + "loss": 2.7655, + "step": 2089 + }, + { + "epoch": 0.16867080945847793, + "grad_norm": 0.8250375986099243, + "learning_rate": 0.00019510565162951537, + "loss": 2.7866, + "step": 2090 + }, + { + "epoch": 0.16875151319506093, + "grad_norm": 0.8063845634460449, + "learning_rate": 0.00019510077202469962, + "loss": 2.7774, + "step": 2091 + }, + { + "epoch": 0.16883221693164394, + "grad_norm": 0.7627965807914734, + "learning_rate": 0.00019509589004972403, + "loss": 2.7201, + "step": 2092 + }, + { + "epoch": 0.16891292066822694, + "grad_norm": 0.8392470479011536, + "learning_rate": 0.00019509100570471027, + "loss": 2.7613, + "step": 2093 + }, + { + "epoch": 0.16899362440480994, + "grad_norm": 0.7807552814483643, + "learning_rate": 0.0001950861189897801, + "loss": 2.7451, + "step": 2094 + }, + { + "epoch": 0.16907432814139295, + "grad_norm": 0.7829259634017944, + "learning_rate": 0.00019508122990505528, + "loss": 2.7128, + "step": 2095 + }, + { + "epoch": 0.16915503187797595, + "grad_norm": 0.7793046832084656, + "learning_rate": 0.00019507633845065766, + "loss": 2.7849, + "step": 2096 + }, + { + "epoch": 0.16923573561455896, + "grad_norm": 0.869752824306488, + "learning_rate": 0.00019507144462670915, + "loss": 2.7882, + "step": 2097 + }, + { + "epoch": 0.16931643935114196, + "grad_norm": 0.7550783753395081, + "learning_rate": 0.00019506654843333174, + "loss": 2.7211, + "step": 2098 + }, + { + "epoch": 0.16939714308772497, + "grad_norm": 0.8364891409873962, + "learning_rate": 0.0001950616498706474, + "loss": 2.7171, + "step": 2099 + }, + { + "epoch": 0.16947784682430797, + "grad_norm": 0.8026537299156189, + "learning_rate": 0.0001950567489387783, + "loss": 2.8362, + "step": 2100 + }, + { + "epoch": 0.16955855056089097, + "grad_norm": 0.8073398470878601, + "learning_rate": 0.00019505184563784652, + "loss": 2.7635, + "step": 2101 + }, + { + "epoch": 0.16963925429747398, + "grad_norm": 0.8168368935585022, + "learning_rate": 0.00019504693996797424, + "loss": 2.7553, + "step": 2102 + }, + { + "epoch": 0.16971995803405698, + "grad_norm": 0.7933681011199951, + "learning_rate": 0.0001950420319292838, + "loss": 2.7887, + "step": 2103 + }, + { + "epoch": 0.16980066177064, + "grad_norm": 0.8326540589332581, + "learning_rate": 0.00019503712152189748, + "loss": 2.7844, + "step": 2104 + }, + { + "epoch": 0.169881365507223, + "grad_norm": 0.8357202410697937, + "learning_rate": 0.00019503220874593765, + "loss": 2.7744, + "step": 2105 + }, + { + "epoch": 0.169962069243806, + "grad_norm": 0.8541022539138794, + "learning_rate": 0.00019502729360152676, + "loss": 2.7867, + "step": 2106 + }, + { + "epoch": 0.170042772980389, + "grad_norm": 0.8338841795921326, + "learning_rate": 0.0001950223760887873, + "loss": 2.7208, + "step": 2107 + }, + { + "epoch": 0.170123476716972, + "grad_norm": 0.8824255466461182, + "learning_rate": 0.00019501745620784187, + "loss": 2.7658, + "step": 2108 + }, + { + "epoch": 0.170204180453555, + "grad_norm": 0.7710463404655457, + "learning_rate": 0.00019501253395881306, + "loss": 2.7167, + "step": 2109 + }, + { + "epoch": 0.17028488419013801, + "grad_norm": 0.7740076184272766, + "learning_rate": 0.0001950076093418235, + "loss": 2.7251, + "step": 2110 + }, + { + "epoch": 0.17036558792672102, + "grad_norm": 0.8258434534072876, + "learning_rate": 0.00019500268235699597, + "loss": 2.7533, + "step": 2111 + }, + { + "epoch": 0.17044629166330402, + "grad_norm": 0.8347997069358826, + "learning_rate": 0.00019499775300445326, + "loss": 2.7372, + "step": 2112 + }, + { + "epoch": 0.17052699539988703, + "grad_norm": 0.8246529698371887, + "learning_rate": 0.00019499282128431823, + "loss": 2.7458, + "step": 2113 + }, + { + "epoch": 0.17060769913647003, + "grad_norm": 0.8510704040527344, + "learning_rate": 0.00019498788719671378, + "loss": 2.8144, + "step": 2114 + }, + { + "epoch": 0.17068840287305304, + "grad_norm": 0.7793454527854919, + "learning_rate": 0.00019498295074176286, + "loss": 2.7927, + "step": 2115 + }, + { + "epoch": 0.17076910660963604, + "grad_norm": 0.7888665199279785, + "learning_rate": 0.00019497801191958853, + "loss": 2.7156, + "step": 2116 + }, + { + "epoch": 0.17084981034621902, + "grad_norm": 0.8502812385559082, + "learning_rate": 0.00019497307073031386, + "loss": 2.7906, + "step": 2117 + }, + { + "epoch": 0.17093051408280202, + "grad_norm": 0.8376502990722656, + "learning_rate": 0.00019496812717406203, + "loss": 2.7354, + "step": 2118 + }, + { + "epoch": 0.17101121781938503, + "grad_norm": 0.7974401116371155, + "learning_rate": 0.0001949631812509562, + "loss": 2.7755, + "step": 2119 + }, + { + "epoch": 0.17109192155596803, + "grad_norm": 0.7760190963745117, + "learning_rate": 0.00019495823296111965, + "loss": 2.7694, + "step": 2120 + }, + { + "epoch": 0.17117262529255103, + "grad_norm": 0.7721701860427856, + "learning_rate": 0.00019495328230467575, + "loss": 2.7474, + "step": 2121 + }, + { + "epoch": 0.17125332902913404, + "grad_norm": 0.7360577583312988, + "learning_rate": 0.0001949483292817478, + "loss": 2.8044, + "step": 2122 + }, + { + "epoch": 0.17133403276571704, + "grad_norm": 0.7536107301712036, + "learning_rate": 0.0001949433738924593, + "loss": 2.8165, + "step": 2123 + }, + { + "epoch": 0.17141473650230005, + "grad_norm": 0.7668276429176331, + "learning_rate": 0.00019493841613693375, + "loss": 2.7964, + "step": 2124 + }, + { + "epoch": 0.17149544023888305, + "grad_norm": 0.8323161602020264, + "learning_rate": 0.0001949334560152947, + "loss": 2.7395, + "step": 2125 + }, + { + "epoch": 0.17157614397546606, + "grad_norm": 0.8132179975509644, + "learning_rate": 0.00019492849352766576, + "loss": 2.7511, + "step": 2126 + }, + { + "epoch": 0.17165684771204906, + "grad_norm": 0.7806998491287231, + "learning_rate": 0.0001949235286741706, + "loss": 2.7649, + "step": 2127 + }, + { + "epoch": 0.17173755144863206, + "grad_norm": 0.8315939903259277, + "learning_rate": 0.00019491856145493298, + "loss": 2.7742, + "step": 2128 + }, + { + "epoch": 0.17181825518521507, + "grad_norm": 0.8368063569068909, + "learning_rate": 0.00019491359187007672, + "loss": 2.7667, + "step": 2129 + }, + { + "epoch": 0.17189895892179807, + "grad_norm": 0.9183431267738342, + "learning_rate": 0.0001949086199197256, + "loss": 2.7444, + "step": 2130 + }, + { + "epoch": 0.17197966265838108, + "grad_norm": 0.7824065089225769, + "learning_rate": 0.0001949036456040036, + "loss": 2.7455, + "step": 2131 + }, + { + "epoch": 0.17206036639496408, + "grad_norm": 0.777974009513855, + "learning_rate": 0.00019489866892303468, + "loss": 2.7466, + "step": 2132 + }, + { + "epoch": 0.17214107013154709, + "grad_norm": 0.8068816065788269, + "learning_rate": 0.00019489368987694286, + "loss": 2.7081, + "step": 2133 + }, + { + "epoch": 0.1722217738681301, + "grad_norm": 0.8757622838020325, + "learning_rate": 0.00019488870846585222, + "loss": 2.8005, + "step": 2134 + }, + { + "epoch": 0.1723024776047131, + "grad_norm": 0.7967162728309631, + "learning_rate": 0.00019488372468988693, + "loss": 2.7737, + "step": 2135 + }, + { + "epoch": 0.1723831813412961, + "grad_norm": 0.7700283527374268, + "learning_rate": 0.00019487873854917117, + "loss": 2.7431, + "step": 2136 + }, + { + "epoch": 0.1724638850778791, + "grad_norm": 0.8259130716323853, + "learning_rate": 0.00019487375004382927, + "loss": 2.7635, + "step": 2137 + }, + { + "epoch": 0.1725445888144621, + "grad_norm": 0.8253815770149231, + "learning_rate": 0.0001948687591739855, + "loss": 2.7046, + "step": 2138 + }, + { + "epoch": 0.1726252925510451, + "grad_norm": 0.8087987303733826, + "learning_rate": 0.00019486376593976426, + "loss": 2.7728, + "step": 2139 + }, + { + "epoch": 0.17270599628762812, + "grad_norm": 0.8437588214874268, + "learning_rate": 0.00019485877034128998, + "loss": 2.7606, + "step": 2140 + }, + { + "epoch": 0.17278670002421112, + "grad_norm": 0.8416075110435486, + "learning_rate": 0.00019485377237868723, + "loss": 2.7396, + "step": 2141 + }, + { + "epoch": 0.17286740376079412, + "grad_norm": 0.784275472164154, + "learning_rate": 0.00019484877205208046, + "loss": 2.766, + "step": 2142 + }, + { + "epoch": 0.17294810749737713, + "grad_norm": 0.8082472681999207, + "learning_rate": 0.0001948437693615944, + "loss": 2.8, + "step": 2143 + }, + { + "epoch": 0.17302881123396013, + "grad_norm": 0.8904329538345337, + "learning_rate": 0.00019483876430735365, + "loss": 2.6579, + "step": 2144 + }, + { + "epoch": 0.17310951497054314, + "grad_norm": 0.7864851355552673, + "learning_rate": 0.000194833756889483, + "loss": 2.8231, + "step": 2145 + }, + { + "epoch": 0.17319021870712614, + "grad_norm": 0.7445049285888672, + "learning_rate": 0.00019482874710810723, + "loss": 2.7498, + "step": 2146 + }, + { + "epoch": 0.17327092244370915, + "grad_norm": 0.8266116380691528, + "learning_rate": 0.00019482373496335117, + "loss": 2.7152, + "step": 2147 + }, + { + "epoch": 0.17335162618029215, + "grad_norm": 0.7712300419807434, + "learning_rate": 0.0001948187204553398, + "loss": 2.7751, + "step": 2148 + }, + { + "epoch": 0.17343232991687516, + "grad_norm": 0.7472708225250244, + "learning_rate": 0.00019481370358419807, + "loss": 2.7397, + "step": 2149 + }, + { + "epoch": 0.17351303365345816, + "grad_norm": 0.763454020023346, + "learning_rate": 0.00019480868435005095, + "loss": 2.7174, + "step": 2150 + }, + { + "epoch": 0.17359373739004116, + "grad_norm": 0.8187674283981323, + "learning_rate": 0.00019480366275302362, + "loss": 2.8424, + "step": 2151 + }, + { + "epoch": 0.17367444112662417, + "grad_norm": 0.8183228373527527, + "learning_rate": 0.0001947986387932412, + "loss": 2.7351, + "step": 2152 + }, + { + "epoch": 0.17375514486320717, + "grad_norm": 0.807231605052948, + "learning_rate": 0.00019479361247082884, + "loss": 2.8054, + "step": 2153 + }, + { + "epoch": 0.17383584859979018, + "grad_norm": 0.8383626341819763, + "learning_rate": 0.00019478858378591194, + "loss": 2.7181, + "step": 2154 + }, + { + "epoch": 0.17391655233637318, + "grad_norm": 0.8330298662185669, + "learning_rate": 0.0001947835527386157, + "loss": 2.748, + "step": 2155 + }, + { + "epoch": 0.17399725607295619, + "grad_norm": 0.8433073163032532, + "learning_rate": 0.0001947785193290656, + "loss": 2.8115, + "step": 2156 + }, + { + "epoch": 0.1740779598095392, + "grad_norm": 0.8873384594917297, + "learning_rate": 0.000194773483557387, + "loss": 2.8288, + "step": 2157 + }, + { + "epoch": 0.1741586635461222, + "grad_norm": 0.8399423360824585, + "learning_rate": 0.00019476844542370546, + "loss": 2.7514, + "step": 2158 + }, + { + "epoch": 0.1742393672827052, + "grad_norm": 0.7808830738067627, + "learning_rate": 0.00019476340492814655, + "loss": 2.7003, + "step": 2159 + }, + { + "epoch": 0.1743200710192882, + "grad_norm": 0.8268750905990601, + "learning_rate": 0.00019475836207083589, + "loss": 2.7961, + "step": 2160 + }, + { + "epoch": 0.1744007747558712, + "grad_norm": 0.9144260883331299, + "learning_rate": 0.0001947533168518991, + "loss": 2.769, + "step": 2161 + }, + { + "epoch": 0.1744814784924542, + "grad_norm": 0.8409113883972168, + "learning_rate": 0.000194748269271462, + "loss": 2.8004, + "step": 2162 + }, + { + "epoch": 0.17456218222903722, + "grad_norm": 0.8747037649154663, + "learning_rate": 0.00019474321932965035, + "loss": 2.7602, + "step": 2163 + }, + { + "epoch": 0.17464288596562022, + "grad_norm": 0.8582575917243958, + "learning_rate": 0.00019473816702659, + "loss": 2.7292, + "step": 2164 + }, + { + "epoch": 0.17472358970220322, + "grad_norm": 0.7402843832969666, + "learning_rate": 0.0001947331123624069, + "loss": 2.7287, + "step": 2165 + }, + { + "epoch": 0.17480429343878623, + "grad_norm": 0.8019410967826843, + "learning_rate": 0.000194728055337227, + "loss": 2.7451, + "step": 2166 + }, + { + "epoch": 0.17488499717536923, + "grad_norm": 0.9137046337127686, + "learning_rate": 0.0001947229959511763, + "loss": 2.808, + "step": 2167 + }, + { + "epoch": 0.1749657009119522, + "grad_norm": 0.7539177536964417, + "learning_rate": 0.000194717934204381, + "loss": 2.7031, + "step": 2168 + }, + { + "epoch": 0.17504640464853521, + "grad_norm": 0.8611089587211609, + "learning_rate": 0.00019471287009696715, + "loss": 2.8751, + "step": 2169 + }, + { + "epoch": 0.17512710838511822, + "grad_norm": 0.906134843826294, + "learning_rate": 0.000194707803629061, + "loss": 2.9163, + "step": 2170 + }, + { + "epoch": 0.17520781212170122, + "grad_norm": 0.8066667318344116, + "learning_rate": 0.00019470273480078879, + "loss": 2.7549, + "step": 2171 + }, + { + "epoch": 0.17528851585828423, + "grad_norm": 0.7962325215339661, + "learning_rate": 0.00019469766361227692, + "loss": 2.7964, + "step": 2172 + }, + { + "epoch": 0.17536921959486723, + "grad_norm": 0.7802287340164185, + "learning_rate": 0.0001946925900636517, + "loss": 2.7022, + "step": 2173 + }, + { + "epoch": 0.17544992333145024, + "grad_norm": 0.783478319644928, + "learning_rate": 0.0001946875141550396, + "loss": 2.7798, + "step": 2174 + }, + { + "epoch": 0.17553062706803324, + "grad_norm": 0.8006815314292908, + "learning_rate": 0.00019468243588656713, + "loss": 2.7345, + "step": 2175 + }, + { + "epoch": 0.17561133080461624, + "grad_norm": 0.7566428184509277, + "learning_rate": 0.00019467735525836085, + "loss": 2.7822, + "step": 2176 + }, + { + "epoch": 0.17569203454119925, + "grad_norm": 0.772282600402832, + "learning_rate": 0.0001946722722705474, + "loss": 2.7346, + "step": 2177 + }, + { + "epoch": 0.17577273827778225, + "grad_norm": 0.7808345556259155, + "learning_rate": 0.00019466718692325347, + "loss": 2.755, + "step": 2178 + }, + { + "epoch": 0.17585344201436526, + "grad_norm": 0.8150362372398376, + "learning_rate": 0.00019466209921660576, + "loss": 2.7691, + "step": 2179 + }, + { + "epoch": 0.17593414575094826, + "grad_norm": 0.7952939867973328, + "learning_rate": 0.0001946570091507311, + "loss": 2.8175, + "step": 2180 + }, + { + "epoch": 0.17601484948753127, + "grad_norm": 0.8211334347724915, + "learning_rate": 0.00019465191672575634, + "loss": 2.7561, + "step": 2181 + }, + { + "epoch": 0.17609555322411427, + "grad_norm": 0.7726178765296936, + "learning_rate": 0.00019464682194180838, + "loss": 2.7435, + "step": 2182 + }, + { + "epoch": 0.17617625696069728, + "grad_norm": 0.7614372372627258, + "learning_rate": 0.00019464172479901422, + "loss": 2.7301, + "step": 2183 + }, + { + "epoch": 0.17625696069728028, + "grad_norm": 0.7818898558616638, + "learning_rate": 0.00019463662529750083, + "loss": 2.6964, + "step": 2184 + }, + { + "epoch": 0.17633766443386328, + "grad_norm": 0.7849796414375305, + "learning_rate": 0.0001946315234373954, + "loss": 2.7431, + "step": 2185 + }, + { + "epoch": 0.1764183681704463, + "grad_norm": 0.7939459085464478, + "learning_rate": 0.00019462641921882506, + "loss": 2.7126, + "step": 2186 + }, + { + "epoch": 0.1764990719070293, + "grad_norm": 0.8391629457473755, + "learning_rate": 0.00019462131264191696, + "loss": 2.8394, + "step": 2187 + }, + { + "epoch": 0.1765797756436123, + "grad_norm": 0.7548067569732666, + "learning_rate": 0.0001946162037067984, + "loss": 2.7315, + "step": 2188 + }, + { + "epoch": 0.1766604793801953, + "grad_norm": 0.8278634548187256, + "learning_rate": 0.00019461109241359674, + "loss": 2.8298, + "step": 2189 + }, + { + "epoch": 0.1767411831167783, + "grad_norm": 0.8275949954986572, + "learning_rate": 0.00019460597876243933, + "loss": 2.8072, + "step": 2190 + }, + { + "epoch": 0.1768218868533613, + "grad_norm": 0.7720363140106201, + "learning_rate": 0.00019460086275345363, + "loss": 2.7478, + "step": 2191 + }, + { + "epoch": 0.17690259058994431, + "grad_norm": 0.7795925140380859, + "learning_rate": 0.00019459574438676714, + "loss": 2.7633, + "step": 2192 + }, + { + "epoch": 0.17698329432652732, + "grad_norm": 0.7722043991088867, + "learning_rate": 0.00019459062366250743, + "loss": 2.8001, + "step": 2193 + }, + { + "epoch": 0.17706399806311032, + "grad_norm": 0.8560587763786316, + "learning_rate": 0.00019458550058080212, + "loss": 2.7494, + "step": 2194 + }, + { + "epoch": 0.17714470179969333, + "grad_norm": 0.7473754286766052, + "learning_rate": 0.00019458037514177886, + "loss": 2.7112, + "step": 2195 + }, + { + "epoch": 0.17722540553627633, + "grad_norm": 0.7625827789306641, + "learning_rate": 0.00019457524734556542, + "loss": 2.7496, + "step": 2196 + }, + { + "epoch": 0.17730610927285934, + "grad_norm": 0.7809351682662964, + "learning_rate": 0.00019457011719228962, + "loss": 2.7764, + "step": 2197 + }, + { + "epoch": 0.17738681300944234, + "grad_norm": 0.7846190333366394, + "learning_rate": 0.00019456498468207927, + "loss": 2.7189, + "step": 2198 + }, + { + "epoch": 0.17746751674602534, + "grad_norm": 0.7919551134109497, + "learning_rate": 0.0001945598498150623, + "loss": 2.7798, + "step": 2199 + }, + { + "epoch": 0.17754822048260835, + "grad_norm": 0.796183705329895, + "learning_rate": 0.0001945547125913667, + "loss": 2.7498, + "step": 2200 + }, + { + "epoch": 0.17762892421919135, + "grad_norm": 0.791668176651001, + "learning_rate": 0.0001945495730111205, + "loss": 2.7638, + "step": 2201 + }, + { + "epoch": 0.17770962795577436, + "grad_norm": 0.8303191661834717, + "learning_rate": 0.0001945444310744518, + "loss": 2.8079, + "step": 2202 + }, + { + "epoch": 0.17779033169235736, + "grad_norm": 0.8245917558670044, + "learning_rate": 0.00019453928678148872, + "loss": 2.7222, + "step": 2203 + }, + { + "epoch": 0.17787103542894037, + "grad_norm": 0.793456494808197, + "learning_rate": 0.0001945341401323595, + "loss": 2.8532, + "step": 2204 + }, + { + "epoch": 0.17795173916552337, + "grad_norm": 0.7574856877326965, + "learning_rate": 0.00019452899112719235, + "loss": 2.7361, + "step": 2205 + }, + { + "epoch": 0.17803244290210637, + "grad_norm": 0.7748556733131409, + "learning_rate": 0.0001945238397661157, + "loss": 2.7423, + "step": 2206 + }, + { + "epoch": 0.17811314663868938, + "grad_norm": 0.8973588347434998, + "learning_rate": 0.00019451868604925782, + "loss": 2.7604, + "step": 2207 + }, + { + "epoch": 0.17819385037527238, + "grad_norm": 0.7613589763641357, + "learning_rate": 0.00019451352997674722, + "loss": 2.7168, + "step": 2208 + }, + { + "epoch": 0.1782745541118554, + "grad_norm": 0.8152763247489929, + "learning_rate": 0.00019450837154871243, + "loss": 2.7904, + "step": 2209 + }, + { + "epoch": 0.1783552578484384, + "grad_norm": 0.8115083575248718, + "learning_rate": 0.00019450321076528194, + "loss": 2.7595, + "step": 2210 + }, + { + "epoch": 0.1784359615850214, + "grad_norm": 0.772665798664093, + "learning_rate": 0.00019449804762658438, + "loss": 2.7125, + "step": 2211 + }, + { + "epoch": 0.1785166653216044, + "grad_norm": 0.8002723455429077, + "learning_rate": 0.0001944928821327485, + "loss": 2.8121, + "step": 2212 + }, + { + "epoch": 0.1785973690581874, + "grad_norm": 0.8354858160018921, + "learning_rate": 0.00019448771428390296, + "loss": 2.8662, + "step": 2213 + }, + { + "epoch": 0.1786780727947704, + "grad_norm": 0.7799130082130432, + "learning_rate": 0.0001944825440801766, + "loss": 2.7247, + "step": 2214 + }, + { + "epoch": 0.1787587765313534, + "grad_norm": 0.810265302658081, + "learning_rate": 0.00019447737152169828, + "loss": 2.7095, + "step": 2215 + }, + { + "epoch": 0.17883948026793642, + "grad_norm": 0.8305599093437195, + "learning_rate": 0.00019447219660859687, + "loss": 2.7448, + "step": 2216 + }, + { + "epoch": 0.17892018400451942, + "grad_norm": 0.7899554371833801, + "learning_rate": 0.00019446701934100138, + "loss": 2.7295, + "step": 2217 + }, + { + "epoch": 0.17900088774110243, + "grad_norm": 0.7675672173500061, + "learning_rate": 0.00019446183971904082, + "loss": 2.7236, + "step": 2218 + }, + { + "epoch": 0.1790815914776854, + "grad_norm": 0.8717279434204102, + "learning_rate": 0.0001944566577428443, + "loss": 2.8044, + "step": 2219 + }, + { + "epoch": 0.1791622952142684, + "grad_norm": 0.8151431679725647, + "learning_rate": 0.00019445147341254094, + "loss": 2.7753, + "step": 2220 + }, + { + "epoch": 0.1792429989508514, + "grad_norm": 0.8481619358062744, + "learning_rate": 0.00019444628672825998, + "loss": 2.7954, + "step": 2221 + }, + { + "epoch": 0.17932370268743442, + "grad_norm": 0.8133199214935303, + "learning_rate": 0.00019444109769013065, + "loss": 2.7235, + "step": 2222 + }, + { + "epoch": 0.17940440642401742, + "grad_norm": 0.8250097036361694, + "learning_rate": 0.00019443590629828232, + "loss": 2.8352, + "step": 2223 + }, + { + "epoch": 0.17948511016060043, + "grad_norm": 0.8279787302017212, + "learning_rate": 0.00019443071255284433, + "loss": 2.7513, + "step": 2224 + }, + { + "epoch": 0.17956581389718343, + "grad_norm": 0.7781538963317871, + "learning_rate": 0.00019442551645394612, + "loss": 2.7239, + "step": 2225 + }, + { + "epoch": 0.17964651763376643, + "grad_norm": 0.7718615531921387, + "learning_rate": 0.00019442031800171727, + "loss": 2.7387, + "step": 2226 + }, + { + "epoch": 0.17972722137034944, + "grad_norm": 0.7704512476921082, + "learning_rate": 0.00019441511719628724, + "loss": 2.792, + "step": 2227 + }, + { + "epoch": 0.17980792510693244, + "grad_norm": 0.8290835618972778, + "learning_rate": 0.00019440991403778566, + "loss": 2.7745, + "step": 2228 + }, + { + "epoch": 0.17988862884351545, + "grad_norm": 0.8408392667770386, + "learning_rate": 0.00019440470852634227, + "loss": 2.7688, + "step": 2229 + }, + { + "epoch": 0.17996933258009845, + "grad_norm": 0.8503465056419373, + "learning_rate": 0.00019439950066208676, + "loss": 2.6747, + "step": 2230 + }, + { + "epoch": 0.18005003631668146, + "grad_norm": 0.8213364481925964, + "learning_rate": 0.0001943942904451489, + "loss": 2.7212, + "step": 2231 + }, + { + "epoch": 0.18013074005326446, + "grad_norm": 0.8511209487915039, + "learning_rate": 0.0001943890778756586, + "loss": 2.701, + "step": 2232 + }, + { + "epoch": 0.18021144378984746, + "grad_norm": 0.8034417033195496, + "learning_rate": 0.00019438386295374577, + "loss": 2.7029, + "step": 2233 + }, + { + "epoch": 0.18029214752643047, + "grad_norm": 0.7603715658187866, + "learning_rate": 0.0001943786456795403, + "loss": 2.7201, + "step": 2234 + }, + { + "epoch": 0.18037285126301347, + "grad_norm": 0.9210647940635681, + "learning_rate": 0.0001943734260531723, + "loss": 2.7847, + "step": 2235 + }, + { + "epoch": 0.18045355499959648, + "grad_norm": 0.7429665923118591, + "learning_rate": 0.00019436820407477186, + "loss": 2.7493, + "step": 2236 + }, + { + "epoch": 0.18053425873617948, + "grad_norm": 0.8290510773658752, + "learning_rate": 0.00019436297974446905, + "loss": 2.7711, + "step": 2237 + }, + { + "epoch": 0.18061496247276249, + "grad_norm": 0.7593570947647095, + "learning_rate": 0.0001943577530623941, + "loss": 2.7539, + "step": 2238 + }, + { + "epoch": 0.1806956662093455, + "grad_norm": 0.8222225308418274, + "learning_rate": 0.00019435252402867734, + "loss": 2.7703, + "step": 2239 + }, + { + "epoch": 0.1807763699459285, + "grad_norm": 0.8280842900276184, + "learning_rate": 0.00019434729264344898, + "loss": 2.7966, + "step": 2240 + }, + { + "epoch": 0.1808570736825115, + "grad_norm": 0.8258495926856995, + "learning_rate": 0.00019434205890683952, + "loss": 2.759, + "step": 2241 + }, + { + "epoch": 0.1809377774190945, + "grad_norm": 0.8294420838356018, + "learning_rate": 0.00019433682281897932, + "loss": 2.6996, + "step": 2242 + }, + { + "epoch": 0.1810184811556775, + "grad_norm": 0.8258811235427856, + "learning_rate": 0.0001943315843799989, + "loss": 2.774, + "step": 2243 + }, + { + "epoch": 0.1810991848922605, + "grad_norm": 0.8035838007926941, + "learning_rate": 0.0001943263435900288, + "loss": 2.7806, + "step": 2244 + }, + { + "epoch": 0.18117988862884352, + "grad_norm": 0.7900332808494568, + "learning_rate": 0.00019432110044919964, + "loss": 2.7462, + "step": 2245 + }, + { + "epoch": 0.18126059236542652, + "grad_norm": 0.8126730918884277, + "learning_rate": 0.00019431585495764212, + "loss": 2.6913, + "step": 2246 + }, + { + "epoch": 0.18134129610200952, + "grad_norm": 0.8411321043968201, + "learning_rate": 0.00019431060711548695, + "loss": 2.7503, + "step": 2247 + }, + { + "epoch": 0.18142199983859253, + "grad_norm": 0.7712867856025696, + "learning_rate": 0.0001943053569228649, + "loss": 2.7703, + "step": 2248 + }, + { + "epoch": 0.18150270357517553, + "grad_norm": 0.9093566536903381, + "learning_rate": 0.00019430010437990688, + "loss": 2.7838, + "step": 2249 + }, + { + "epoch": 0.18158340731175854, + "grad_norm": 0.8184913396835327, + "learning_rate": 0.00019429484948674372, + "loss": 2.8167, + "step": 2250 + }, + { + "epoch": 0.18166411104834154, + "grad_norm": 0.7215915322303772, + "learning_rate": 0.00019428959224350643, + "loss": 2.739, + "step": 2251 + }, + { + "epoch": 0.18174481478492455, + "grad_norm": 0.7842726111412048, + "learning_rate": 0.000194284332650326, + "loss": 2.8547, + "step": 2252 + }, + { + "epoch": 0.18182551852150755, + "grad_norm": 0.7758263349533081, + "learning_rate": 0.00019427907070733357, + "loss": 2.7746, + "step": 2253 + }, + { + "epoch": 0.18190622225809056, + "grad_norm": 0.7710500359535217, + "learning_rate": 0.00019427380641466027, + "loss": 2.7415, + "step": 2254 + }, + { + "epoch": 0.18198692599467356, + "grad_norm": 0.8233851194381714, + "learning_rate": 0.00019426853977243724, + "loss": 2.7471, + "step": 2255 + }, + { + "epoch": 0.18206762973125656, + "grad_norm": 0.7856284379959106, + "learning_rate": 0.00019426327078079578, + "loss": 2.6892, + "step": 2256 + }, + { + "epoch": 0.18214833346783957, + "grad_norm": 0.7978290915489197, + "learning_rate": 0.00019425799943986722, + "loss": 2.7346, + "step": 2257 + }, + { + "epoch": 0.18222903720442257, + "grad_norm": 0.8339362740516663, + "learning_rate": 0.00019425272574978293, + "loss": 2.7403, + "step": 2258 + }, + { + "epoch": 0.18230974094100558, + "grad_norm": 0.8035171031951904, + "learning_rate": 0.0001942474497106743, + "loss": 2.7444, + "step": 2259 + }, + { + "epoch": 0.18239044467758858, + "grad_norm": 0.7950475811958313, + "learning_rate": 0.0001942421713226729, + "loss": 2.7218, + "step": 2260 + }, + { + "epoch": 0.18247114841417159, + "grad_norm": 0.8439741730690002, + "learning_rate": 0.00019423689058591022, + "loss": 2.7498, + "step": 2261 + }, + { + "epoch": 0.1825518521507546, + "grad_norm": 0.8585919737815857, + "learning_rate": 0.00019423160750051789, + "loss": 2.7459, + "step": 2262 + }, + { + "epoch": 0.1826325558873376, + "grad_norm": 0.857276201248169, + "learning_rate": 0.00019422632206662755, + "loss": 2.8404, + "step": 2263 + }, + { + "epoch": 0.1827132596239206, + "grad_norm": 0.7692707777023315, + "learning_rate": 0.000194221034284371, + "loss": 2.8069, + "step": 2264 + }, + { + "epoch": 0.1827939633605036, + "grad_norm": 0.9107782244682312, + "learning_rate": 0.00019421574415387998, + "loss": 2.7554, + "step": 2265 + }, + { + "epoch": 0.1828746670970866, + "grad_norm": 0.763300895690918, + "learning_rate": 0.00019421045167528628, + "loss": 2.8031, + "step": 2266 + }, + { + "epoch": 0.1829553708336696, + "grad_norm": 0.8625530004501343, + "learning_rate": 0.0001942051568487219, + "loss": 2.7622, + "step": 2267 + }, + { + "epoch": 0.18303607457025262, + "grad_norm": 0.8483080863952637, + "learning_rate": 0.00019419985967431875, + "loss": 2.7726, + "step": 2268 + }, + { + "epoch": 0.18311677830683562, + "grad_norm": 0.8295309543609619, + "learning_rate": 0.00019419456015220884, + "loss": 2.7676, + "step": 2269 + }, + { + "epoch": 0.1831974820434186, + "grad_norm": 0.812976062297821, + "learning_rate": 0.0001941892582825243, + "loss": 2.745, + "step": 2270 + }, + { + "epoch": 0.1832781857800016, + "grad_norm": 0.799846351146698, + "learning_rate": 0.00019418395406539717, + "loss": 2.7474, + "step": 2271 + }, + { + "epoch": 0.1833588895165846, + "grad_norm": 0.7825174331665039, + "learning_rate": 0.00019417864750095976, + "loss": 2.7982, + "step": 2272 + }, + { + "epoch": 0.1834395932531676, + "grad_norm": 0.8331060409545898, + "learning_rate": 0.00019417333858934424, + "loss": 2.7279, + "step": 2273 + }, + { + "epoch": 0.18352029698975061, + "grad_norm": 0.8579809665679932, + "learning_rate": 0.00019416802733068295, + "loss": 2.7425, + "step": 2274 + }, + { + "epoch": 0.18360100072633362, + "grad_norm": 0.8643589019775391, + "learning_rate": 0.0001941627137251083, + "loss": 2.7369, + "step": 2275 + }, + { + "epoch": 0.18368170446291662, + "grad_norm": 0.9086846113204956, + "learning_rate": 0.00019415739777275265, + "loss": 2.7681, + "step": 2276 + }, + { + "epoch": 0.18376240819949963, + "grad_norm": 0.8442896604537964, + "learning_rate": 0.00019415207947374853, + "loss": 2.7733, + "step": 2277 + }, + { + "epoch": 0.18384311193608263, + "grad_norm": 0.7858592867851257, + "learning_rate": 0.00019414675882822846, + "loss": 2.7726, + "step": 2278 + }, + { + "epoch": 0.18392381567266564, + "grad_norm": 0.8191118240356445, + "learning_rate": 0.00019414143583632503, + "loss": 2.8142, + "step": 2279 + }, + { + "epoch": 0.18400451940924864, + "grad_norm": 0.8093815445899963, + "learning_rate": 0.00019413611049817097, + "loss": 2.7068, + "step": 2280 + }, + { + "epoch": 0.18408522314583164, + "grad_norm": 0.80247563123703, + "learning_rate": 0.00019413078281389895, + "loss": 2.7459, + "step": 2281 + }, + { + "epoch": 0.18416592688241465, + "grad_norm": 0.8200877904891968, + "learning_rate": 0.00019412545278364176, + "loss": 2.6963, + "step": 2282 + }, + { + "epoch": 0.18424663061899765, + "grad_norm": 0.870662271976471, + "learning_rate": 0.00019412012040753224, + "loss": 2.8636, + "step": 2283 + }, + { + "epoch": 0.18432733435558066, + "grad_norm": 0.7626601457595825, + "learning_rate": 0.00019411478568570332, + "loss": 2.8082, + "step": 2284 + }, + { + "epoch": 0.18440803809216366, + "grad_norm": 0.7492787837982178, + "learning_rate": 0.00019410944861828787, + "loss": 2.7231, + "step": 2285 + }, + { + "epoch": 0.18448874182874667, + "grad_norm": 0.8172419667243958, + "learning_rate": 0.000194104109205419, + "loss": 2.7054, + "step": 2286 + }, + { + "epoch": 0.18456944556532967, + "grad_norm": 0.7749670147895813, + "learning_rate": 0.0001940987674472297, + "loss": 2.6907, + "step": 2287 + }, + { + "epoch": 0.18465014930191267, + "grad_norm": 0.8855465054512024, + "learning_rate": 0.00019409342334385316, + "loss": 2.7439, + "step": 2288 + }, + { + "epoch": 0.18473085303849568, + "grad_norm": 0.8066419363021851, + "learning_rate": 0.00019408807689542257, + "loss": 2.7126, + "step": 2289 + }, + { + "epoch": 0.18481155677507868, + "grad_norm": 0.7759004235267639, + "learning_rate": 0.00019408272810207114, + "loss": 2.7207, + "step": 2290 + }, + { + "epoch": 0.1848922605116617, + "grad_norm": 0.8593513369560242, + "learning_rate": 0.00019407737696393215, + "loss": 2.7375, + "step": 2291 + }, + { + "epoch": 0.1849729642482447, + "grad_norm": 0.8154759407043457, + "learning_rate": 0.00019407202348113904, + "loss": 2.7608, + "step": 2292 + }, + { + "epoch": 0.1850536679848277, + "grad_norm": 0.7912892699241638, + "learning_rate": 0.0001940666676538252, + "loss": 2.7886, + "step": 2293 + }, + { + "epoch": 0.1851343717214107, + "grad_norm": 0.9184576272964478, + "learning_rate": 0.0001940613094821241, + "loss": 2.7867, + "step": 2294 + }, + { + "epoch": 0.1852150754579937, + "grad_norm": 0.8114588856697083, + "learning_rate": 0.0001940559489661693, + "loss": 2.8105, + "step": 2295 + }, + { + "epoch": 0.1852957791945767, + "grad_norm": 0.7681595683097839, + "learning_rate": 0.00019405058610609438, + "loss": 2.7707, + "step": 2296 + }, + { + "epoch": 0.18537648293115971, + "grad_norm": 0.7719643712043762, + "learning_rate": 0.000194045220902033, + "loss": 2.6767, + "step": 2297 + }, + { + "epoch": 0.18545718666774272, + "grad_norm": 0.7602487206459045, + "learning_rate": 0.00019403985335411888, + "loss": 2.7698, + "step": 2298 + }, + { + "epoch": 0.18553789040432572, + "grad_norm": 0.8044554591178894, + "learning_rate": 0.00019403448346248578, + "loss": 2.7578, + "step": 2299 + }, + { + "epoch": 0.18561859414090873, + "grad_norm": 0.7830328345298767, + "learning_rate": 0.00019402911122726757, + "loss": 2.7113, + "step": 2300 + }, + { + "epoch": 0.18569929787749173, + "grad_norm": 0.7793100476264954, + "learning_rate": 0.0001940237366485981, + "loss": 2.7388, + "step": 2301 + }, + { + "epoch": 0.18578000161407474, + "grad_norm": 0.9127374887466431, + "learning_rate": 0.00019401835972661133, + "loss": 2.7459, + "step": 2302 + }, + { + "epoch": 0.18586070535065774, + "grad_norm": 0.8007177114486694, + "learning_rate": 0.00019401298046144128, + "loss": 2.776, + "step": 2303 + }, + { + "epoch": 0.18594140908724074, + "grad_norm": 0.7384614944458008, + "learning_rate": 0.000194007598853222, + "loss": 2.6819, + "step": 2304 + }, + { + "epoch": 0.18602211282382375, + "grad_norm": 0.798909068107605, + "learning_rate": 0.0001940022149020876, + "loss": 2.7218, + "step": 2305 + }, + { + "epoch": 0.18610281656040675, + "grad_norm": 0.8388963341712952, + "learning_rate": 0.0001939968286081723, + "loss": 2.8248, + "step": 2306 + }, + { + "epoch": 0.18618352029698976, + "grad_norm": 0.8411754369735718, + "learning_rate": 0.0001939914399716103, + "loss": 2.7575, + "step": 2307 + }, + { + "epoch": 0.18626422403357276, + "grad_norm": 0.7936103343963623, + "learning_rate": 0.00019398604899253594, + "loss": 2.7488, + "step": 2308 + }, + { + "epoch": 0.18634492777015577, + "grad_norm": 0.7913734912872314, + "learning_rate": 0.00019398065567108357, + "loss": 2.7963, + "step": 2309 + }, + { + "epoch": 0.18642563150673877, + "grad_norm": 0.8341575860977173, + "learning_rate": 0.00019397526000738754, + "loss": 2.7698, + "step": 2310 + }, + { + "epoch": 0.18650633524332177, + "grad_norm": 0.8323128819465637, + "learning_rate": 0.00019396986200158244, + "loss": 2.7218, + "step": 2311 + }, + { + "epoch": 0.18658703897990478, + "grad_norm": 0.748073160648346, + "learning_rate": 0.0001939644616538027, + "loss": 2.7798, + "step": 2312 + }, + { + "epoch": 0.18666774271648778, + "grad_norm": 0.8166958689689636, + "learning_rate": 0.00019395905896418296, + "loss": 2.661, + "step": 2313 + }, + { + "epoch": 0.1867484464530708, + "grad_norm": 0.796791672706604, + "learning_rate": 0.00019395365393285786, + "loss": 2.7297, + "step": 2314 + }, + { + "epoch": 0.1868291501896538, + "grad_norm": 0.7851170897483826, + "learning_rate": 0.0001939482465599621, + "loss": 2.7798, + "step": 2315 + }, + { + "epoch": 0.1869098539262368, + "grad_norm": 0.7545836567878723, + "learning_rate": 0.00019394283684563045, + "loss": 2.7327, + "step": 2316 + }, + { + "epoch": 0.1869905576628198, + "grad_norm": 0.8100360631942749, + "learning_rate": 0.00019393742478999776, + "loss": 2.7901, + "step": 2317 + }, + { + "epoch": 0.1870712613994028, + "grad_norm": 0.7874314785003662, + "learning_rate": 0.00019393201039319887, + "loss": 2.7597, + "step": 2318 + }, + { + "epoch": 0.1871519651359858, + "grad_norm": 0.7698730826377869, + "learning_rate": 0.00019392659365536876, + "loss": 2.7327, + "step": 2319 + }, + { + "epoch": 0.1872326688725688, + "grad_norm": 0.7417994141578674, + "learning_rate": 0.0001939211745766424, + "loss": 2.7413, + "step": 2320 + }, + { + "epoch": 0.1873133726091518, + "grad_norm": 0.7823258638381958, + "learning_rate": 0.00019391575315715485, + "loss": 2.7577, + "step": 2321 + }, + { + "epoch": 0.1873940763457348, + "grad_norm": 0.82382732629776, + "learning_rate": 0.00019391032939704124, + "loss": 2.7769, + "step": 2322 + }, + { + "epoch": 0.1874747800823178, + "grad_norm": 0.8405026197433472, + "learning_rate": 0.0001939049032964367, + "loss": 2.8402, + "step": 2323 + }, + { + "epoch": 0.1875554838189008, + "grad_norm": 0.8307906985282898, + "learning_rate": 0.00019389947485547654, + "loss": 2.7642, + "step": 2324 + }, + { + "epoch": 0.1876361875554838, + "grad_norm": 0.8618248701095581, + "learning_rate": 0.000193894044074296, + "loss": 2.7853, + "step": 2325 + }, + { + "epoch": 0.1877168912920668, + "grad_norm": 0.8040831685066223, + "learning_rate": 0.00019388861095303046, + "loss": 2.7467, + "step": 2326 + }, + { + "epoch": 0.18779759502864982, + "grad_norm": 0.7723637223243713, + "learning_rate": 0.0001938831754918153, + "loss": 2.7222, + "step": 2327 + }, + { + "epoch": 0.18787829876523282, + "grad_norm": 0.8189084529876709, + "learning_rate": 0.000193877737690786, + "loss": 2.7857, + "step": 2328 + }, + { + "epoch": 0.18795900250181583, + "grad_norm": 0.8335791826248169, + "learning_rate": 0.00019387229755007805, + "loss": 2.6997, + "step": 2329 + }, + { + "epoch": 0.18803970623839883, + "grad_norm": 0.7732782959938049, + "learning_rate": 0.00019386685506982707, + "loss": 2.7155, + "step": 2330 + }, + { + "epoch": 0.18812040997498183, + "grad_norm": 0.8262906670570374, + "learning_rate": 0.0001938614102501687, + "loss": 2.7638, + "step": 2331 + }, + { + "epoch": 0.18820111371156484, + "grad_norm": 0.7969058156013489, + "learning_rate": 0.00019385596309123862, + "loss": 2.7363, + "step": 2332 + }, + { + "epoch": 0.18828181744814784, + "grad_norm": 0.7834853529930115, + "learning_rate": 0.0001938505135931726, + "loss": 2.7205, + "step": 2333 + }, + { + "epoch": 0.18836252118473085, + "grad_norm": 0.748481810092926, + "learning_rate": 0.00019384506175610647, + "loss": 2.7759, + "step": 2334 + }, + { + "epoch": 0.18844322492131385, + "grad_norm": 0.8137786984443665, + "learning_rate": 0.00019383960758017604, + "loss": 2.828, + "step": 2335 + }, + { + "epoch": 0.18852392865789686, + "grad_norm": 0.8065745234489441, + "learning_rate": 0.00019383415106551734, + "loss": 2.7408, + "step": 2336 + }, + { + "epoch": 0.18860463239447986, + "grad_norm": 0.768643856048584, + "learning_rate": 0.0001938286922122663, + "loss": 2.6503, + "step": 2337 + }, + { + "epoch": 0.18868533613106286, + "grad_norm": 0.7677921652793884, + "learning_rate": 0.00019382323102055897, + "loss": 2.7088, + "step": 2338 + }, + { + "epoch": 0.18876603986764587, + "grad_norm": 0.7648717164993286, + "learning_rate": 0.0001938177674905315, + "loss": 2.7015, + "step": 2339 + }, + { + "epoch": 0.18884674360422887, + "grad_norm": 0.7517116665840149, + "learning_rate": 0.00019381230162231997, + "loss": 2.7095, + "step": 2340 + }, + { + "epoch": 0.18892744734081188, + "grad_norm": 0.8147841691970825, + "learning_rate": 0.00019380683341606067, + "loss": 2.8563, + "step": 2341 + }, + { + "epoch": 0.18900815107739488, + "grad_norm": 0.7849822640419006, + "learning_rate": 0.00019380136287188988, + "loss": 2.7432, + "step": 2342 + }, + { + "epoch": 0.18908885481397789, + "grad_norm": 0.813811719417572, + "learning_rate": 0.0001937958899899439, + "loss": 2.7419, + "step": 2343 + }, + { + "epoch": 0.1891695585505609, + "grad_norm": 0.8142707943916321, + "learning_rate": 0.00019379041477035923, + "loss": 2.7658, + "step": 2344 + }, + { + "epoch": 0.1892502622871439, + "grad_norm": 0.7594506740570068, + "learning_rate": 0.00019378493721327217, + "loss": 2.7298, + "step": 2345 + }, + { + "epoch": 0.1893309660237269, + "grad_norm": 0.8374232053756714, + "learning_rate": 0.00019377945731881936, + "loss": 2.8112, + "step": 2346 + }, + { + "epoch": 0.1894116697603099, + "grad_norm": 0.783608615398407, + "learning_rate": 0.00019377397508713734, + "loss": 2.8168, + "step": 2347 + }, + { + "epoch": 0.1894923734968929, + "grad_norm": 0.720214307308197, + "learning_rate": 0.0001937684905183627, + "loss": 2.7516, + "step": 2348 + }, + { + "epoch": 0.1895730772334759, + "grad_norm": 0.7939600944519043, + "learning_rate": 0.0001937630036126322, + "loss": 2.7609, + "step": 2349 + }, + { + "epoch": 0.18965378097005892, + "grad_norm": 0.787315309047699, + "learning_rate": 0.00019375751437008252, + "loss": 2.758, + "step": 2350 + }, + { + "epoch": 0.18973448470664192, + "grad_norm": 0.7862411141395569, + "learning_rate": 0.00019375202279085053, + "loss": 2.6866, + "step": 2351 + }, + { + "epoch": 0.18981518844322492, + "grad_norm": 0.8651136159896851, + "learning_rate": 0.000193746528875073, + "loss": 2.7488, + "step": 2352 + }, + { + "epoch": 0.18989589217980793, + "grad_norm": 0.8150602579116821, + "learning_rate": 0.00019374103262288696, + "loss": 2.7417, + "step": 2353 + }, + { + "epoch": 0.18997659591639093, + "grad_norm": 0.9053540229797363, + "learning_rate": 0.00019373553403442934, + "loss": 2.7587, + "step": 2354 + }, + { + "epoch": 0.19005729965297394, + "grad_norm": 0.8775703310966492, + "learning_rate": 0.0001937300331098372, + "loss": 2.733, + "step": 2355 + }, + { + "epoch": 0.19013800338955694, + "grad_norm": 0.7714357972145081, + "learning_rate": 0.0001937245298492476, + "loss": 2.7595, + "step": 2356 + }, + { + "epoch": 0.19021870712613995, + "grad_norm": 0.8648017048835754, + "learning_rate": 0.0001937190242527977, + "loss": 2.7944, + "step": 2357 + }, + { + "epoch": 0.19029941086272295, + "grad_norm": 0.9367388486862183, + "learning_rate": 0.00019371351632062477, + "loss": 2.7902, + "step": 2358 + }, + { + "epoch": 0.19038011459930596, + "grad_norm": 0.8116368651390076, + "learning_rate": 0.00019370800605286604, + "loss": 2.7291, + "step": 2359 + }, + { + "epoch": 0.19046081833588896, + "grad_norm": 0.7892753481864929, + "learning_rate": 0.00019370249344965882, + "loss": 2.8192, + "step": 2360 + }, + { + "epoch": 0.19054152207247196, + "grad_norm": 0.8109372854232788, + "learning_rate": 0.00019369697851114056, + "loss": 2.6982, + "step": 2361 + }, + { + "epoch": 0.19062222580905497, + "grad_norm": 0.8756314516067505, + "learning_rate": 0.00019369146123744864, + "loss": 2.744, + "step": 2362 + }, + { + "epoch": 0.19070292954563797, + "grad_norm": 0.7400399446487427, + "learning_rate": 0.00019368594162872058, + "loss": 2.7328, + "step": 2363 + }, + { + "epoch": 0.19078363328222098, + "grad_norm": 0.8223158717155457, + "learning_rate": 0.000193680419685094, + "loss": 2.7614, + "step": 2364 + }, + { + "epoch": 0.19086433701880398, + "grad_norm": 0.7350139617919922, + "learning_rate": 0.00019367489540670645, + "loss": 2.7074, + "step": 2365 + }, + { + "epoch": 0.19094504075538699, + "grad_norm": 0.7915631532669067, + "learning_rate": 0.00019366936879369563, + "loss": 2.7835, + "step": 2366 + }, + { + "epoch": 0.19102574449197, + "grad_norm": 0.7765628099441528, + "learning_rate": 0.00019366383984619932, + "loss": 2.765, + "step": 2367 + }, + { + "epoch": 0.191106448228553, + "grad_norm": 0.8127059936523438, + "learning_rate": 0.00019365830856435525, + "loss": 2.7753, + "step": 2368 + }, + { + "epoch": 0.191187151965136, + "grad_norm": 0.8652897477149963, + "learning_rate": 0.0001936527749483013, + "loss": 2.7137, + "step": 2369 + }, + { + "epoch": 0.191267855701719, + "grad_norm": 0.8086774945259094, + "learning_rate": 0.00019364723899817541, + "loss": 2.7209, + "step": 2370 + }, + { + "epoch": 0.191348559438302, + "grad_norm": 0.7965098023414612, + "learning_rate": 0.00019364170071411554, + "loss": 2.786, + "step": 2371 + }, + { + "epoch": 0.19142926317488498, + "grad_norm": 0.7954064607620239, + "learning_rate": 0.00019363616009625967, + "loss": 2.7508, + "step": 2372 + }, + { + "epoch": 0.191509966911468, + "grad_norm": 0.7835928201675415, + "learning_rate": 0.00019363061714474595, + "loss": 2.7423, + "step": 2373 + }, + { + "epoch": 0.191590670648051, + "grad_norm": 0.8720580339431763, + "learning_rate": 0.0001936250718597125, + "loss": 2.7877, + "step": 2374 + }, + { + "epoch": 0.191671374384634, + "grad_norm": 0.836066484451294, + "learning_rate": 0.00019361952424129747, + "loss": 2.8456, + "step": 2375 + }, + { + "epoch": 0.191752078121217, + "grad_norm": 0.793666660785675, + "learning_rate": 0.00019361397428963923, + "loss": 2.786, + "step": 2376 + }, + { + "epoch": 0.1918327818578, + "grad_norm": 0.8573217391967773, + "learning_rate": 0.000193608422004876, + "loss": 2.7569, + "step": 2377 + }, + { + "epoch": 0.191913485594383, + "grad_norm": 0.81243896484375, + "learning_rate": 0.00019360286738714623, + "loss": 2.771, + "step": 2378 + }, + { + "epoch": 0.19199418933096601, + "grad_norm": 0.7449626326560974, + "learning_rate": 0.00019359731043658832, + "loss": 2.7479, + "step": 2379 + }, + { + "epoch": 0.19207489306754902, + "grad_norm": 0.8124165534973145, + "learning_rate": 0.00019359175115334076, + "loss": 2.7602, + "step": 2380 + }, + { + "epoch": 0.19215559680413202, + "grad_norm": 0.7786986827850342, + "learning_rate": 0.00019358618953754211, + "loss": 2.6926, + "step": 2381 + }, + { + "epoch": 0.19223630054071503, + "grad_norm": 0.7987258434295654, + "learning_rate": 0.000193580625589331, + "loss": 2.7573, + "step": 2382 + }, + { + "epoch": 0.19231700427729803, + "grad_norm": 0.8236463665962219, + "learning_rate": 0.00019357505930884606, + "loss": 2.6755, + "step": 2383 + }, + { + "epoch": 0.19239770801388104, + "grad_norm": 0.8285779356956482, + "learning_rate": 0.00019356949069622602, + "loss": 2.7658, + "step": 2384 + }, + { + "epoch": 0.19247841175046404, + "grad_norm": 0.7823960781097412, + "learning_rate": 0.0001935639197516097, + "loss": 2.7404, + "step": 2385 + }, + { + "epoch": 0.19255911548704704, + "grad_norm": 0.968638002872467, + "learning_rate": 0.00019355834647513591, + "loss": 2.7836, + "step": 2386 + }, + { + "epoch": 0.19263981922363005, + "grad_norm": 0.8170328736305237, + "learning_rate": 0.00019355277086694357, + "loss": 2.7816, + "step": 2387 + }, + { + "epoch": 0.19272052296021305, + "grad_norm": 0.8342583179473877, + "learning_rate": 0.00019354719292717163, + "loss": 2.8204, + "step": 2388 + }, + { + "epoch": 0.19280122669679606, + "grad_norm": 0.8160435557365417, + "learning_rate": 0.0001935416126559591, + "loss": 2.6938, + "step": 2389 + }, + { + "epoch": 0.19288193043337906, + "grad_norm": 0.7888174653053284, + "learning_rate": 0.00019353603005344504, + "loss": 2.6804, + "step": 2390 + }, + { + "epoch": 0.19296263416996207, + "grad_norm": 0.8389205932617188, + "learning_rate": 0.00019353044511976865, + "loss": 2.7571, + "step": 2391 + }, + { + "epoch": 0.19304333790654507, + "grad_norm": 0.7920562028884888, + "learning_rate": 0.00019352485785506906, + "loss": 2.7174, + "step": 2392 + }, + { + "epoch": 0.19312404164312807, + "grad_norm": 0.7853459715843201, + "learning_rate": 0.00019351926825948555, + "loss": 2.7626, + "step": 2393 + }, + { + "epoch": 0.19320474537971108, + "grad_norm": 0.9109459519386292, + "learning_rate": 0.0001935136763331574, + "loss": 2.7568, + "step": 2394 + }, + { + "epoch": 0.19328544911629408, + "grad_norm": 0.7983853816986084, + "learning_rate": 0.00019350808207622397, + "loss": 2.7412, + "step": 2395 + }, + { + "epoch": 0.1933661528528771, + "grad_norm": 0.7416854500770569, + "learning_rate": 0.00019350248548882472, + "loss": 2.7335, + "step": 2396 + }, + { + "epoch": 0.1934468565894601, + "grad_norm": 0.7305171489715576, + "learning_rate": 0.0001934968865710991, + "loss": 2.7295, + "step": 2397 + }, + { + "epoch": 0.1935275603260431, + "grad_norm": 0.7717033624649048, + "learning_rate": 0.0001934912853231867, + "loss": 2.7568, + "step": 2398 + }, + { + "epoch": 0.1936082640626261, + "grad_norm": 0.7833831906318665, + "learning_rate": 0.00019348568174522705, + "loss": 2.736, + "step": 2399 + }, + { + "epoch": 0.1936889677992091, + "grad_norm": 0.872831404209137, + "learning_rate": 0.00019348007583735983, + "loss": 2.7719, + "step": 2400 + }, + { + "epoch": 0.1937696715357921, + "grad_norm": 0.8389193415641785, + "learning_rate": 0.0001934744675997248, + "loss": 2.7572, + "step": 2401 + }, + { + "epoch": 0.19385037527237511, + "grad_norm": 0.8442249298095703, + "learning_rate": 0.00019346885703246165, + "loss": 2.8117, + "step": 2402 + }, + { + "epoch": 0.19393107900895812, + "grad_norm": 0.8451170325279236, + "learning_rate": 0.00019346324413571027, + "loss": 2.7216, + "step": 2403 + }, + { + "epoch": 0.19401178274554112, + "grad_norm": 0.898529052734375, + "learning_rate": 0.00019345762890961052, + "loss": 2.8119, + "step": 2404 + }, + { + "epoch": 0.19409248648212413, + "grad_norm": 0.8302313685417175, + "learning_rate": 0.00019345201135430236, + "loss": 2.76, + "step": 2405 + }, + { + "epoch": 0.19417319021870713, + "grad_norm": 0.8975207209587097, + "learning_rate": 0.00019344639146992582, + "loss": 2.8043, + "step": 2406 + }, + { + "epoch": 0.19425389395529014, + "grad_norm": 0.8972581028938293, + "learning_rate": 0.0001934407692566209, + "loss": 2.7487, + "step": 2407 + }, + { + "epoch": 0.19433459769187314, + "grad_norm": 0.8311447501182556, + "learning_rate": 0.00019343514471452776, + "loss": 2.7653, + "step": 2408 + }, + { + "epoch": 0.19441530142845614, + "grad_norm": 0.8336243033409119, + "learning_rate": 0.0001934295178437866, + "loss": 2.753, + "step": 2409 + }, + { + "epoch": 0.19449600516503915, + "grad_norm": 0.8339207172393799, + "learning_rate": 0.0001934238886445376, + "loss": 2.7643, + "step": 2410 + }, + { + "epoch": 0.19457670890162215, + "grad_norm": 0.906074583530426, + "learning_rate": 0.0001934182571169211, + "loss": 2.7777, + "step": 2411 + }, + { + "epoch": 0.19465741263820516, + "grad_norm": 0.8759943246841431, + "learning_rate": 0.00019341262326107742, + "loss": 2.77, + "step": 2412 + }, + { + "epoch": 0.19473811637478816, + "grad_norm": 0.8399369716644287, + "learning_rate": 0.00019340698707714699, + "loss": 2.752, + "step": 2413 + }, + { + "epoch": 0.19481882011137117, + "grad_norm": 0.8551808595657349, + "learning_rate": 0.00019340134856527026, + "loss": 2.6727, + "step": 2414 + }, + { + "epoch": 0.19489952384795417, + "grad_norm": 0.7660732865333557, + "learning_rate": 0.00019339570772558778, + "loss": 2.7491, + "step": 2415 + }, + { + "epoch": 0.19498022758453717, + "grad_norm": 0.8257685303688049, + "learning_rate": 0.00019339006455824015, + "loss": 2.7584, + "step": 2416 + }, + { + "epoch": 0.19506093132112018, + "grad_norm": 0.797275960445404, + "learning_rate": 0.00019338441906336794, + "loss": 2.7051, + "step": 2417 + }, + { + "epoch": 0.19514163505770318, + "grad_norm": 0.8311913013458252, + "learning_rate": 0.00019337877124111193, + "loss": 2.8084, + "step": 2418 + }, + { + "epoch": 0.1952223387942862, + "grad_norm": 0.7995893359184265, + "learning_rate": 0.0001933731210916128, + "loss": 2.7556, + "step": 2419 + }, + { + "epoch": 0.1953030425308692, + "grad_norm": 0.792850136756897, + "learning_rate": 0.00019336746861501147, + "loss": 2.7289, + "step": 2420 + }, + { + "epoch": 0.1953837462674522, + "grad_norm": 0.8058848977088928, + "learning_rate": 0.00019336181381144873, + "loss": 2.7394, + "step": 2421 + }, + { + "epoch": 0.1954644500040352, + "grad_norm": 0.8267124891281128, + "learning_rate": 0.00019335615668106555, + "loss": 2.771, + "step": 2422 + }, + { + "epoch": 0.19554515374061818, + "grad_norm": 0.7641060948371887, + "learning_rate": 0.00019335049722400292, + "loss": 2.7311, + "step": 2423 + }, + { + "epoch": 0.19562585747720118, + "grad_norm": 0.8023245930671692, + "learning_rate": 0.00019334483544040186, + "loss": 2.7658, + "step": 2424 + }, + { + "epoch": 0.19570656121378419, + "grad_norm": 0.8341927528381348, + "learning_rate": 0.00019333917133040348, + "loss": 2.7476, + "step": 2425 + }, + { + "epoch": 0.1957872649503672, + "grad_norm": 0.7985726594924927, + "learning_rate": 0.000193333504894149, + "loss": 2.7362, + "step": 2426 + }, + { + "epoch": 0.1958679686869502, + "grad_norm": 0.7267594933509827, + "learning_rate": 0.0001933278361317796, + "loss": 2.6875, + "step": 2427 + }, + { + "epoch": 0.1959486724235332, + "grad_norm": 0.8292990326881409, + "learning_rate": 0.00019332216504343652, + "loss": 2.7619, + "step": 2428 + }, + { + "epoch": 0.1960293761601162, + "grad_norm": 0.7549588680267334, + "learning_rate": 0.00019331649162926116, + "loss": 2.7385, + "step": 2429 + }, + { + "epoch": 0.1961100798966992, + "grad_norm": 0.7688446640968323, + "learning_rate": 0.0001933108158893949, + "loss": 2.7544, + "step": 2430 + }, + { + "epoch": 0.1961907836332822, + "grad_norm": 0.8168436884880066, + "learning_rate": 0.00019330513782397918, + "loss": 2.8013, + "step": 2431 + }, + { + "epoch": 0.19627148736986522, + "grad_norm": 0.8405759334564209, + "learning_rate": 0.00019329945743315556, + "loss": 2.7299, + "step": 2432 + }, + { + "epoch": 0.19635219110644822, + "grad_norm": 0.79430091381073, + "learning_rate": 0.00019329377471706554, + "loss": 2.7293, + "step": 2433 + }, + { + "epoch": 0.19643289484303122, + "grad_norm": 0.8428656458854675, + "learning_rate": 0.0001932880896758508, + "loss": 2.8211, + "step": 2434 + }, + { + "epoch": 0.19651359857961423, + "grad_norm": 0.7883139252662659, + "learning_rate": 0.00019328240230965298, + "loss": 2.6943, + "step": 2435 + }, + { + "epoch": 0.19659430231619723, + "grad_norm": 0.7539335489273071, + "learning_rate": 0.00019327671261861387, + "loss": 2.6926, + "step": 2436 + }, + { + "epoch": 0.19667500605278024, + "grad_norm": 0.9986057281494141, + "learning_rate": 0.00019327102060287524, + "loss": 2.7851, + "step": 2437 + }, + { + "epoch": 0.19675570978936324, + "grad_norm": 0.7716113924980164, + "learning_rate": 0.000193265326262579, + "loss": 2.752, + "step": 2438 + }, + { + "epoch": 0.19683641352594625, + "grad_norm": 0.9134296774864197, + "learning_rate": 0.000193259629597867, + "loss": 2.7698, + "step": 2439 + }, + { + "epoch": 0.19691711726252925, + "grad_norm": 0.7966345548629761, + "learning_rate": 0.00019325393060888124, + "loss": 2.7839, + "step": 2440 + }, + { + "epoch": 0.19699782099911226, + "grad_norm": 0.8051251173019409, + "learning_rate": 0.0001932482292957638, + "loss": 2.7322, + "step": 2441 + }, + { + "epoch": 0.19707852473569526, + "grad_norm": 0.843169629573822, + "learning_rate": 0.0001932425256586567, + "loss": 2.8263, + "step": 2442 + }, + { + "epoch": 0.19715922847227826, + "grad_norm": 0.7552370429039001, + "learning_rate": 0.00019323681969770213, + "loss": 2.7342, + "step": 2443 + }, + { + "epoch": 0.19723993220886127, + "grad_norm": 0.844473123550415, + "learning_rate": 0.0001932311114130423, + "loss": 2.776, + "step": 2444 + }, + { + "epoch": 0.19732063594544427, + "grad_norm": 0.8002473711967468, + "learning_rate": 0.00019322540080481945, + "loss": 2.7382, + "step": 2445 + }, + { + "epoch": 0.19740133968202728, + "grad_norm": 0.8564329147338867, + "learning_rate": 0.00019321968787317594, + "loss": 2.7592, + "step": 2446 + }, + { + "epoch": 0.19748204341861028, + "grad_norm": 0.7853825688362122, + "learning_rate": 0.00019321397261825408, + "loss": 2.7101, + "step": 2447 + }, + { + "epoch": 0.19756274715519329, + "grad_norm": 0.8482939004898071, + "learning_rate": 0.0001932082550401964, + "loss": 2.7891, + "step": 2448 + }, + { + "epoch": 0.1976434508917763, + "grad_norm": 0.8361770510673523, + "learning_rate": 0.00019320253513914536, + "loss": 2.7341, + "step": 2449 + }, + { + "epoch": 0.1977241546283593, + "grad_norm": 0.7814618945121765, + "learning_rate": 0.0001931968129152435, + "loss": 2.771, + "step": 2450 + }, + { + "epoch": 0.1978048583649423, + "grad_norm": 0.7588146924972534, + "learning_rate": 0.00019319108836863343, + "loss": 2.7577, + "step": 2451 + }, + { + "epoch": 0.1978855621015253, + "grad_norm": 0.9184895157814026, + "learning_rate": 0.00019318536149945785, + "loss": 2.7711, + "step": 2452 + }, + { + "epoch": 0.1979662658381083, + "grad_norm": 0.8454298973083496, + "learning_rate": 0.00019317963230785947, + "loss": 2.7748, + "step": 2453 + }, + { + "epoch": 0.1980469695746913, + "grad_norm": 0.7662420868873596, + "learning_rate": 0.0001931739007939811, + "loss": 2.7704, + "step": 2454 + }, + { + "epoch": 0.19812767331127432, + "grad_norm": 0.837888777256012, + "learning_rate": 0.0001931681669579655, + "loss": 2.7613, + "step": 2455 + }, + { + "epoch": 0.19820837704785732, + "grad_norm": 0.7835226058959961, + "learning_rate": 0.0001931624307999557, + "loss": 2.6888, + "step": 2456 + }, + { + "epoch": 0.19828908078444032, + "grad_norm": 0.8491464257240295, + "learning_rate": 0.00019315669232009456, + "loss": 2.7521, + "step": 2457 + }, + { + "epoch": 0.19836978452102333, + "grad_norm": 0.7590088248252869, + "learning_rate": 0.00019315095151852516, + "loss": 2.7441, + "step": 2458 + }, + { + "epoch": 0.19845048825760633, + "grad_norm": 0.9316127300262451, + "learning_rate": 0.00019314520839539052, + "loss": 2.786, + "step": 2459 + }, + { + "epoch": 0.19853119199418934, + "grad_norm": 0.7819615006446838, + "learning_rate": 0.0001931394629508338, + "loss": 2.7003, + "step": 2460 + }, + { + "epoch": 0.19861189573077234, + "grad_norm": 0.7675932049751282, + "learning_rate": 0.0001931337151849982, + "loss": 2.7065, + "step": 2461 + }, + { + "epoch": 0.19869259946735535, + "grad_norm": 0.7797678112983704, + "learning_rate": 0.000193127965098027, + "loss": 2.7605, + "step": 2462 + }, + { + "epoch": 0.19877330320393835, + "grad_norm": 0.789544403553009, + "learning_rate": 0.00019312221269006345, + "loss": 2.7913, + "step": 2463 + }, + { + "epoch": 0.19885400694052136, + "grad_norm": 0.9594957232475281, + "learning_rate": 0.00019311645796125094, + "loss": 2.785, + "step": 2464 + }, + { + "epoch": 0.19893471067710436, + "grad_norm": 0.8154739141464233, + "learning_rate": 0.00019311070091173287, + "loss": 2.6716, + "step": 2465 + }, + { + "epoch": 0.19901541441368736, + "grad_norm": 0.9042142629623413, + "learning_rate": 0.00019310494154165274, + "loss": 2.734, + "step": 2466 + }, + { + "epoch": 0.19909611815027037, + "grad_norm": 0.7803483605384827, + "learning_rate": 0.0001930991798511541, + "loss": 2.7052, + "step": 2467 + }, + { + "epoch": 0.19917682188685337, + "grad_norm": 0.7917614579200745, + "learning_rate": 0.00019309341584038055, + "loss": 2.728, + "step": 2468 + }, + { + "epoch": 0.19925752562343638, + "grad_norm": 0.8295063376426697, + "learning_rate": 0.00019308764950947568, + "loss": 2.7496, + "step": 2469 + }, + { + "epoch": 0.19933822936001938, + "grad_norm": 0.790831983089447, + "learning_rate": 0.0001930818808585833, + "loss": 2.7356, + "step": 2470 + }, + { + "epoch": 0.19941893309660239, + "grad_norm": 0.8527843952178955, + "learning_rate": 0.0001930761098878471, + "loss": 2.718, + "step": 2471 + }, + { + "epoch": 0.1994996368331854, + "grad_norm": 0.8518494367599487, + "learning_rate": 0.00019307033659741096, + "loss": 2.7189, + "step": 2472 + }, + { + "epoch": 0.1995803405697684, + "grad_norm": 0.8027220368385315, + "learning_rate": 0.00019306456098741872, + "loss": 2.7272, + "step": 2473 + }, + { + "epoch": 0.19966104430635137, + "grad_norm": 0.7516468167304993, + "learning_rate": 0.00019305878305801434, + "loss": 2.798, + "step": 2474 + }, + { + "epoch": 0.19974174804293438, + "grad_norm": 0.7676397562026978, + "learning_rate": 0.00019305300280934187, + "loss": 2.8076, + "step": 2475 + }, + { + "epoch": 0.19982245177951738, + "grad_norm": 0.8237762451171875, + "learning_rate": 0.00019304722024154528, + "loss": 2.6998, + "step": 2476 + }, + { + "epoch": 0.19990315551610038, + "grad_norm": 0.8397759199142456, + "learning_rate": 0.0001930414353547688, + "loss": 2.806, + "step": 2477 + }, + { + "epoch": 0.1999838592526834, + "grad_norm": 0.8911117911338806, + "learning_rate": 0.00019303564814915645, + "loss": 2.7566, + "step": 2478 + }, + { + "epoch": 0.2000645629892664, + "grad_norm": 0.765404999256134, + "learning_rate": 0.00019302985862485264, + "loss": 2.7363, + "step": 2479 + }, + { + "epoch": 0.2001452667258494, + "grad_norm": 0.7898589372634888, + "learning_rate": 0.0001930240667820015, + "loss": 2.7007, + "step": 2480 + }, + { + "epoch": 0.2002259704624324, + "grad_norm": 0.7581521272659302, + "learning_rate": 0.0001930182726207475, + "loss": 2.7508, + "step": 2481 + }, + { + "epoch": 0.2003066741990154, + "grad_norm": 0.8179795742034912, + "learning_rate": 0.00019301247614123495, + "loss": 2.7327, + "step": 2482 + }, + { + "epoch": 0.2003873779355984, + "grad_norm": 0.8103611469268799, + "learning_rate": 0.00019300667734360838, + "loss": 2.7869, + "step": 2483 + }, + { + "epoch": 0.20046808167218141, + "grad_norm": 0.7368054389953613, + "learning_rate": 0.0001930008762280123, + "loss": 2.73, + "step": 2484 + }, + { + "epoch": 0.20054878540876442, + "grad_norm": 0.7679662108421326, + "learning_rate": 0.00019299507279459127, + "loss": 2.7905, + "step": 2485 + }, + { + "epoch": 0.20062948914534742, + "grad_norm": 0.7783839702606201, + "learning_rate": 0.0001929892670434899, + "loss": 2.6816, + "step": 2486 + }, + { + "epoch": 0.20071019288193043, + "grad_norm": 0.7575809359550476, + "learning_rate": 0.00019298345897485298, + "loss": 2.7351, + "step": 2487 + }, + { + "epoch": 0.20079089661851343, + "grad_norm": 0.7674959301948547, + "learning_rate": 0.00019297764858882514, + "loss": 2.7682, + "step": 2488 + }, + { + "epoch": 0.20087160035509644, + "grad_norm": 0.7972592115402222, + "learning_rate": 0.00019297183588555127, + "loss": 2.782, + "step": 2489 + }, + { + "epoch": 0.20095230409167944, + "grad_norm": 0.8417105674743652, + "learning_rate": 0.00019296602086517624, + "loss": 2.8173, + "step": 2490 + }, + { + "epoch": 0.20103300782826244, + "grad_norm": 0.7194239497184753, + "learning_rate": 0.00019296020352784496, + "loss": 2.7735, + "step": 2491 + }, + { + "epoch": 0.20111371156484545, + "grad_norm": 0.801895022392273, + "learning_rate": 0.00019295438387370237, + "loss": 2.7018, + "step": 2492 + }, + { + "epoch": 0.20119441530142845, + "grad_norm": 0.900943398475647, + "learning_rate": 0.0001929485619028936, + "loss": 2.77, + "step": 2493 + }, + { + "epoch": 0.20127511903801146, + "grad_norm": 0.7882106304168701, + "learning_rate": 0.00019294273761556366, + "loss": 2.7195, + "step": 2494 + }, + { + "epoch": 0.20135582277459446, + "grad_norm": 0.7471950054168701, + "learning_rate": 0.00019293691101185775, + "loss": 2.7346, + "step": 2495 + }, + { + "epoch": 0.20143652651117747, + "grad_norm": 0.7498352527618408, + "learning_rate": 0.00019293108209192104, + "loss": 2.7255, + "step": 2496 + }, + { + "epoch": 0.20151723024776047, + "grad_norm": 0.8233164548873901, + "learning_rate": 0.0001929252508558989, + "loss": 2.8253, + "step": 2497 + }, + { + "epoch": 0.20159793398434347, + "grad_norm": 0.7533289790153503, + "learning_rate": 0.00019291941730393658, + "loss": 2.7487, + "step": 2498 + }, + { + "epoch": 0.20167863772092648, + "grad_norm": 0.7372691035270691, + "learning_rate": 0.0001929135814361795, + "loss": 2.6799, + "step": 2499 + }, + { + "epoch": 0.20175934145750948, + "grad_norm": 0.7760890126228333, + "learning_rate": 0.00019290774325277305, + "loss": 2.8366, + "step": 2500 + }, + { + "epoch": 0.2018400451940925, + "grad_norm": 0.7653746008872986, + "learning_rate": 0.0001929019027538628, + "loss": 2.7413, + "step": 2501 + }, + { + "epoch": 0.2019207489306755, + "grad_norm": 0.7364951372146606, + "learning_rate": 0.0001928960599395943, + "loss": 2.7405, + "step": 2502 + }, + { + "epoch": 0.2020014526672585, + "grad_norm": 0.8317872285842896, + "learning_rate": 0.00019289021481011314, + "loss": 2.7186, + "step": 2503 + }, + { + "epoch": 0.2020821564038415, + "grad_norm": 0.8325691223144531, + "learning_rate": 0.00019288436736556502, + "loss": 2.7305, + "step": 2504 + }, + { + "epoch": 0.2021628601404245, + "grad_norm": 0.7674683332443237, + "learning_rate": 0.00019287851760609566, + "loss": 2.7171, + "step": 2505 + }, + { + "epoch": 0.2022435638770075, + "grad_norm": 0.8043155074119568, + "learning_rate": 0.00019287266553185084, + "loss": 2.7425, + "step": 2506 + }, + { + "epoch": 0.2023242676135905, + "grad_norm": 0.8522058725357056, + "learning_rate": 0.00019286681114297642, + "loss": 2.7764, + "step": 2507 + }, + { + "epoch": 0.20240497135017352, + "grad_norm": 0.7700086236000061, + "learning_rate": 0.00019286095443961832, + "loss": 2.7499, + "step": 2508 + }, + { + "epoch": 0.20248567508675652, + "grad_norm": 0.8078013062477112, + "learning_rate": 0.0001928550954219225, + "loss": 2.7863, + "step": 2509 + }, + { + "epoch": 0.20256637882333953, + "grad_norm": 0.7431712746620178, + "learning_rate": 0.00019284923409003496, + "loss": 2.8296, + "step": 2510 + }, + { + "epoch": 0.20264708255992253, + "grad_norm": 0.753754734992981, + "learning_rate": 0.00019284337044410182, + "loss": 2.722, + "step": 2511 + }, + { + "epoch": 0.20272778629650554, + "grad_norm": 0.8117631077766418, + "learning_rate": 0.00019283750448426918, + "loss": 2.7718, + "step": 2512 + }, + { + "epoch": 0.20280849003308854, + "grad_norm": 0.9149020910263062, + "learning_rate": 0.00019283163621068325, + "loss": 2.7416, + "step": 2513 + }, + { + "epoch": 0.20288919376967154, + "grad_norm": 0.8240262866020203, + "learning_rate": 0.0001928257656234903, + "loss": 2.811, + "step": 2514 + }, + { + "epoch": 0.20296989750625455, + "grad_norm": 0.7394035458564758, + "learning_rate": 0.00019281989272283657, + "loss": 2.7345, + "step": 2515 + }, + { + "epoch": 0.20305060124283755, + "grad_norm": 0.7827345132827759, + "learning_rate": 0.00019281401750886854, + "loss": 2.7955, + "step": 2516 + }, + { + "epoch": 0.20313130497942056, + "grad_norm": 0.7482333183288574, + "learning_rate": 0.00019280813998173252, + "loss": 2.6963, + "step": 2517 + }, + { + "epoch": 0.20321200871600356, + "grad_norm": 0.8187180757522583, + "learning_rate": 0.00019280226014157509, + "loss": 2.7413, + "step": 2518 + }, + { + "epoch": 0.20329271245258657, + "grad_norm": 0.7708666920661926, + "learning_rate": 0.00019279637798854274, + "loss": 2.7636, + "step": 2519 + }, + { + "epoch": 0.20337341618916957, + "grad_norm": 0.7414180040359497, + "learning_rate": 0.00019279049352278208, + "loss": 2.7321, + "step": 2520 + }, + { + "epoch": 0.20345411992575257, + "grad_norm": 0.8172248601913452, + "learning_rate": 0.00019278460674443975, + "loss": 2.8026, + "step": 2521 + }, + { + "epoch": 0.20353482366233558, + "grad_norm": 0.7463089227676392, + "learning_rate": 0.0001927787176536625, + "loss": 2.74, + "step": 2522 + }, + { + "epoch": 0.20361552739891858, + "grad_norm": 0.7684210538864136, + "learning_rate": 0.00019277282625059704, + "loss": 2.782, + "step": 2523 + }, + { + "epoch": 0.2036962311355016, + "grad_norm": 0.9246797561645508, + "learning_rate": 0.00019276693253539027, + "loss": 2.8546, + "step": 2524 + }, + { + "epoch": 0.20377693487208456, + "grad_norm": 0.753753125667572, + "learning_rate": 0.00019276103650818906, + "loss": 2.7422, + "step": 2525 + }, + { + "epoch": 0.20385763860866757, + "grad_norm": 0.7461897134780884, + "learning_rate": 0.00019275513816914032, + "loss": 2.7575, + "step": 2526 + }, + { + "epoch": 0.20393834234525057, + "grad_norm": 0.7555257081985474, + "learning_rate": 0.00019274923751839106, + "loss": 2.7423, + "step": 2527 + }, + { + "epoch": 0.20401904608183358, + "grad_norm": 0.7628511786460876, + "learning_rate": 0.00019274333455608837, + "loss": 2.7386, + "step": 2528 + }, + { + "epoch": 0.20409974981841658, + "grad_norm": 0.7529371976852417, + "learning_rate": 0.00019273742928237937, + "loss": 2.6852, + "step": 2529 + }, + { + "epoch": 0.20418045355499959, + "grad_norm": 0.7466779351234436, + "learning_rate": 0.00019273152169741118, + "loss": 2.6996, + "step": 2530 + }, + { + "epoch": 0.2042611572915826, + "grad_norm": 0.7916153073310852, + "learning_rate": 0.0001927256118013311, + "loss": 2.7644, + "step": 2531 + }, + { + "epoch": 0.2043418610281656, + "grad_norm": 0.7662972211837769, + "learning_rate": 0.00019271969959428636, + "loss": 2.7497, + "step": 2532 + }, + { + "epoch": 0.2044225647647486, + "grad_norm": 0.8244680166244507, + "learning_rate": 0.00019271378507642432, + "loss": 2.7598, + "step": 2533 + }, + { + "epoch": 0.2045032685013316, + "grad_norm": 0.7721532583236694, + "learning_rate": 0.00019270786824789244, + "loss": 2.7303, + "step": 2534 + }, + { + "epoch": 0.2045839722379146, + "grad_norm": 0.7598209381103516, + "learning_rate": 0.0001927019491088381, + "loss": 2.734, + "step": 2535 + }, + { + "epoch": 0.2046646759744976, + "grad_norm": 0.7778685092926025, + "learning_rate": 0.00019269602765940887, + "loss": 2.7113, + "step": 2536 + }, + { + "epoch": 0.20474537971108062, + "grad_norm": 0.7447141408920288, + "learning_rate": 0.00019269010389975235, + "loss": 2.7205, + "step": 2537 + }, + { + "epoch": 0.20482608344766362, + "grad_norm": 0.8066664338111877, + "learning_rate": 0.00019268417783001613, + "loss": 2.7637, + "step": 2538 + }, + { + "epoch": 0.20490678718424662, + "grad_norm": 0.7055318355560303, + "learning_rate": 0.00019267824945034794, + "loss": 2.6936, + "step": 2539 + }, + { + "epoch": 0.20498749092082963, + "grad_norm": 0.832647979259491, + "learning_rate": 0.0001926723187608955, + "loss": 2.7423, + "step": 2540 + }, + { + "epoch": 0.20506819465741263, + "grad_norm": 0.7316983938217163, + "learning_rate": 0.0001926663857618066, + "loss": 2.7136, + "step": 2541 + }, + { + "epoch": 0.20514889839399564, + "grad_norm": 0.8115554451942444, + "learning_rate": 0.00019266045045322915, + "loss": 2.6964, + "step": 2542 + }, + { + "epoch": 0.20522960213057864, + "grad_norm": 0.802573025226593, + "learning_rate": 0.00019265451283531108, + "loss": 2.7989, + "step": 2543 + }, + { + "epoch": 0.20531030586716165, + "grad_norm": 0.7073348164558411, + "learning_rate": 0.00019264857290820033, + "loss": 2.7399, + "step": 2544 + }, + { + "epoch": 0.20539100960374465, + "grad_norm": 0.7749258279800415, + "learning_rate": 0.00019264263067204495, + "loss": 2.7321, + "step": 2545 + }, + { + "epoch": 0.20547171334032766, + "grad_norm": 0.7473557591438293, + "learning_rate": 0.00019263668612699305, + "loss": 2.7774, + "step": 2546 + }, + { + "epoch": 0.20555241707691066, + "grad_norm": 0.8073423504829407, + "learning_rate": 0.0001926307392731928, + "loss": 2.7429, + "step": 2547 + }, + { + "epoch": 0.20563312081349366, + "grad_norm": 0.9106586575508118, + "learning_rate": 0.00019262479011079235, + "loss": 2.7972, + "step": 2548 + }, + { + "epoch": 0.20571382455007667, + "grad_norm": 0.7975970506668091, + "learning_rate": 0.00019261883863994002, + "loss": 2.7561, + "step": 2549 + }, + { + "epoch": 0.20579452828665967, + "grad_norm": 0.8967030048370361, + "learning_rate": 0.00019261288486078414, + "loss": 2.7368, + "step": 2550 + }, + { + "epoch": 0.20587523202324268, + "grad_norm": 0.7157345414161682, + "learning_rate": 0.00019260692877347304, + "loss": 2.7329, + "step": 2551 + }, + { + "epoch": 0.20595593575982568, + "grad_norm": 0.8758620619773865, + "learning_rate": 0.00019260097037815524, + "loss": 2.7522, + "step": 2552 + }, + { + "epoch": 0.20603663949640869, + "grad_norm": 0.7948124408721924, + "learning_rate": 0.00019259500967497916, + "loss": 2.7675, + "step": 2553 + }, + { + "epoch": 0.2061173432329917, + "grad_norm": 0.8233941197395325, + "learning_rate": 0.00019258904666409344, + "loss": 2.7728, + "step": 2554 + }, + { + "epoch": 0.2061980469695747, + "grad_norm": 0.8084299564361572, + "learning_rate": 0.0001925830813456466, + "loss": 2.7728, + "step": 2555 + }, + { + "epoch": 0.2062787507061577, + "grad_norm": 0.8004557490348816, + "learning_rate": 0.00019257711371978737, + "loss": 2.7783, + "step": 2556 + }, + { + "epoch": 0.2063594544427407, + "grad_norm": 0.7999755144119263, + "learning_rate": 0.0001925711437866645, + "loss": 2.7632, + "step": 2557 + }, + { + "epoch": 0.2064401581793237, + "grad_norm": 0.7317264080047607, + "learning_rate": 0.0001925651715464267, + "loss": 2.7101, + "step": 2558 + }, + { + "epoch": 0.2065208619159067, + "grad_norm": 0.7906385660171509, + "learning_rate": 0.00019255919699922287, + "loss": 2.7258, + "step": 2559 + }, + { + "epoch": 0.20660156565248972, + "grad_norm": 0.7932917475700378, + "learning_rate": 0.0001925532201452019, + "loss": 2.7714, + "step": 2560 + }, + { + "epoch": 0.20668226938907272, + "grad_norm": 0.8039286732673645, + "learning_rate": 0.00019254724098451275, + "loss": 2.7469, + "step": 2561 + }, + { + "epoch": 0.20676297312565572, + "grad_norm": 0.79400634765625, + "learning_rate": 0.00019254125951730444, + "loss": 2.7499, + "step": 2562 + }, + { + "epoch": 0.20684367686223873, + "grad_norm": 0.8072263598442078, + "learning_rate": 0.00019253527574372603, + "loss": 2.7805, + "step": 2563 + }, + { + "epoch": 0.20692438059882173, + "grad_norm": 0.7117579579353333, + "learning_rate": 0.00019252928966392667, + "loss": 2.7321, + "step": 2564 + }, + { + "epoch": 0.20700508433540474, + "grad_norm": 0.7080324292182922, + "learning_rate": 0.00019252330127805554, + "loss": 2.7225, + "step": 2565 + }, + { + "epoch": 0.20708578807198774, + "grad_norm": 0.7276670336723328, + "learning_rate": 0.00019251731058626186, + "loss": 2.7592, + "step": 2566 + }, + { + "epoch": 0.20716649180857075, + "grad_norm": 0.8030811548233032, + "learning_rate": 0.00019251131758869495, + "loss": 2.7184, + "step": 2567 + }, + { + "epoch": 0.20724719554515375, + "grad_norm": 0.7808283567428589, + "learning_rate": 0.0001925053222855042, + "loss": 2.7504, + "step": 2568 + }, + { + "epoch": 0.20732789928173675, + "grad_norm": 0.783225953578949, + "learning_rate": 0.00019249932467683902, + "loss": 2.7125, + "step": 2569 + }, + { + "epoch": 0.20740860301831976, + "grad_norm": 0.7440134286880493, + "learning_rate": 0.00019249332476284887, + "loss": 2.7938, + "step": 2570 + }, + { + "epoch": 0.20748930675490276, + "grad_norm": 0.8729553818702698, + "learning_rate": 0.00019248732254368328, + "loss": 2.8338, + "step": 2571 + }, + { + "epoch": 0.20757001049148577, + "grad_norm": 0.8170497417449951, + "learning_rate": 0.0001924813180194918, + "loss": 2.7254, + "step": 2572 + }, + { + "epoch": 0.20765071422806877, + "grad_norm": 0.733220100402832, + "learning_rate": 0.00019247531119042418, + "loss": 2.6401, + "step": 2573 + }, + { + "epoch": 0.20773141796465178, + "grad_norm": 0.7247937917709351, + "learning_rate": 0.00019246930205663008, + "loss": 2.736, + "step": 2574 + }, + { + "epoch": 0.20781212170123478, + "grad_norm": 0.7880212068557739, + "learning_rate": 0.00019246329061825925, + "loss": 2.7173, + "step": 2575 + }, + { + "epoch": 0.20789282543781776, + "grad_norm": 0.820808470249176, + "learning_rate": 0.00019245727687546149, + "loss": 2.7331, + "step": 2576 + }, + { + "epoch": 0.20797352917440076, + "grad_norm": 0.8605412840843201, + "learning_rate": 0.00019245126082838673, + "loss": 2.761, + "step": 2577 + }, + { + "epoch": 0.20805423291098377, + "grad_norm": 0.763506293296814, + "learning_rate": 0.00019244524247718486, + "loss": 2.7053, + "step": 2578 + }, + { + "epoch": 0.20813493664756677, + "grad_norm": 0.8428114652633667, + "learning_rate": 0.00019243922182200592, + "loss": 2.724, + "step": 2579 + }, + { + "epoch": 0.20821564038414977, + "grad_norm": 0.821986734867096, + "learning_rate": 0.0001924331988629999, + "loss": 2.7615, + "step": 2580 + }, + { + "epoch": 0.20829634412073278, + "grad_norm": 0.8177430629730225, + "learning_rate": 0.00019242717360031693, + "loss": 2.7012, + "step": 2581 + }, + { + "epoch": 0.20837704785731578, + "grad_norm": 0.7584180235862732, + "learning_rate": 0.00019242114603410724, + "loss": 2.7372, + "step": 2582 + }, + { + "epoch": 0.2084577515938988, + "grad_norm": 0.9384645223617554, + "learning_rate": 0.00019241511616452096, + "loss": 2.695, + "step": 2583 + }, + { + "epoch": 0.2085384553304818, + "grad_norm": 0.8518964648246765, + "learning_rate": 0.00019240908399170844, + "loss": 2.8216, + "step": 2584 + }, + { + "epoch": 0.2086191590670648, + "grad_norm": 0.9082949161529541, + "learning_rate": 0.00019240304951581995, + "loss": 2.777, + "step": 2585 + }, + { + "epoch": 0.2086998628036478, + "grad_norm": 0.7906371355056763, + "learning_rate": 0.00019239701273700597, + "loss": 2.7083, + "step": 2586 + }, + { + "epoch": 0.2087805665402308, + "grad_norm": 0.7711954712867737, + "learning_rate": 0.00019239097365541686, + "loss": 2.6907, + "step": 2587 + }, + { + "epoch": 0.2088612702768138, + "grad_norm": 0.8155506253242493, + "learning_rate": 0.0001923849322712032, + "loss": 2.7602, + "step": 2588 + }, + { + "epoch": 0.20894197401339681, + "grad_norm": 0.8843441009521484, + "learning_rate": 0.0001923788885845155, + "loss": 2.7525, + "step": 2589 + }, + { + "epoch": 0.20902267774997982, + "grad_norm": 0.7336379289627075, + "learning_rate": 0.00019237284259550444, + "loss": 2.731, + "step": 2590 + }, + { + "epoch": 0.20910338148656282, + "grad_norm": 0.8261263370513916, + "learning_rate": 0.00019236679430432066, + "loss": 2.6493, + "step": 2591 + }, + { + "epoch": 0.20918408522314583, + "grad_norm": 0.7716216444969177, + "learning_rate": 0.00019236074371111497, + "loss": 2.7775, + "step": 2592 + }, + { + "epoch": 0.20926478895972883, + "grad_norm": 0.8390100598335266, + "learning_rate": 0.00019235469081603808, + "loss": 2.7532, + "step": 2593 + }, + { + "epoch": 0.20934549269631184, + "grad_norm": 0.8388446569442749, + "learning_rate": 0.00019234863561924087, + "loss": 2.8171, + "step": 2594 + }, + { + "epoch": 0.20942619643289484, + "grad_norm": 0.8003209829330444, + "learning_rate": 0.00019234257812087425, + "loss": 2.7385, + "step": 2595 + }, + { + "epoch": 0.20950690016947784, + "grad_norm": 0.8008458018302917, + "learning_rate": 0.00019233651832108918, + "loss": 2.7366, + "step": 2596 + }, + { + "epoch": 0.20958760390606085, + "grad_norm": 0.7701897025108337, + "learning_rate": 0.00019233045622003676, + "loss": 2.69, + "step": 2597 + }, + { + "epoch": 0.20966830764264385, + "grad_norm": 0.8106730580329895, + "learning_rate": 0.00019232439181786796, + "loss": 2.6911, + "step": 2598 + }, + { + "epoch": 0.20974901137922686, + "grad_norm": 0.9580766558647156, + "learning_rate": 0.00019231832511473401, + "loss": 2.7663, + "step": 2599 + }, + { + "epoch": 0.20982971511580986, + "grad_norm": 0.7851876616477966, + "learning_rate": 0.0001923122561107861, + "loss": 2.7632, + "step": 2600 + }, + { + "epoch": 0.20991041885239287, + "grad_norm": 0.8160942196846008, + "learning_rate": 0.0001923061848061754, + "loss": 2.8533, + "step": 2601 + }, + { + "epoch": 0.20999112258897587, + "grad_norm": 0.8540663719177246, + "learning_rate": 0.00019230011120105334, + "loss": 2.7083, + "step": 2602 + }, + { + "epoch": 0.21007182632555887, + "grad_norm": 0.8273833394050598, + "learning_rate": 0.0001922940352955712, + "loss": 2.7916, + "step": 2603 + }, + { + "epoch": 0.21015253006214188, + "grad_norm": 0.8394255638122559, + "learning_rate": 0.00019228795708988046, + "loss": 2.8561, + "step": 2604 + }, + { + "epoch": 0.21023323379872488, + "grad_norm": 0.8291410803794861, + "learning_rate": 0.00019228187658413258, + "loss": 2.7462, + "step": 2605 + }, + { + "epoch": 0.2103139375353079, + "grad_norm": 0.7984235286712646, + "learning_rate": 0.00019227579377847912, + "loss": 2.7459, + "step": 2606 + }, + { + "epoch": 0.2103946412718909, + "grad_norm": 0.8343340158462524, + "learning_rate": 0.00019226970867307163, + "loss": 2.6963, + "step": 2607 + }, + { + "epoch": 0.2104753450084739, + "grad_norm": 0.6982808709144592, + "learning_rate": 0.00019226362126806184, + "loss": 2.7333, + "step": 2608 + }, + { + "epoch": 0.2105560487450569, + "grad_norm": 0.8039572834968567, + "learning_rate": 0.0001922575315636014, + "loss": 2.7253, + "step": 2609 + }, + { + "epoch": 0.2106367524816399, + "grad_norm": 0.8708705902099609, + "learning_rate": 0.00019225143955984214, + "loss": 2.7555, + "step": 2610 + }, + { + "epoch": 0.2107174562182229, + "grad_norm": 0.8773347735404968, + "learning_rate": 0.00019224534525693585, + "loss": 2.7598, + "step": 2611 + }, + { + "epoch": 0.2107981599548059, + "grad_norm": 0.8151054978370667, + "learning_rate": 0.0001922392486550344, + "loss": 2.7398, + "step": 2612 + }, + { + "epoch": 0.21087886369138892, + "grad_norm": 0.7922329306602478, + "learning_rate": 0.0001922331497542898, + "loss": 2.7296, + "step": 2613 + }, + { + "epoch": 0.21095956742797192, + "grad_norm": 0.7536506652832031, + "learning_rate": 0.00019222704855485396, + "loss": 2.7897, + "step": 2614 + }, + { + "epoch": 0.21104027116455493, + "grad_norm": 0.7539274096488953, + "learning_rate": 0.000192220945056879, + "loss": 2.7809, + "step": 2615 + }, + { + "epoch": 0.21112097490113793, + "grad_norm": 0.7737646698951721, + "learning_rate": 0.00019221483926051705, + "loss": 2.7195, + "step": 2616 + }, + { + "epoch": 0.21120167863772094, + "grad_norm": 0.7421913743019104, + "learning_rate": 0.00019220873116592024, + "loss": 2.6817, + "step": 2617 + }, + { + "epoch": 0.21128238237430394, + "grad_norm": 0.7872927784919739, + "learning_rate": 0.0001922026207732408, + "loss": 2.7379, + "step": 2618 + }, + { + "epoch": 0.21136308611088694, + "grad_norm": 0.7950671315193176, + "learning_rate": 0.00019219650808263104, + "loss": 2.7135, + "step": 2619 + }, + { + "epoch": 0.21144378984746995, + "grad_norm": 0.7711792588233948, + "learning_rate": 0.0001921903930942433, + "loss": 2.7021, + "step": 2620 + }, + { + "epoch": 0.21152449358405295, + "grad_norm": 0.9030743837356567, + "learning_rate": 0.00019218427580822996, + "loss": 2.8083, + "step": 2621 + }, + { + "epoch": 0.21160519732063596, + "grad_norm": 0.8191907405853271, + "learning_rate": 0.0001921781562247435, + "loss": 2.6998, + "step": 2622 + }, + { + "epoch": 0.21168590105721896, + "grad_norm": 0.7883538603782654, + "learning_rate": 0.00019217203434393644, + "loss": 2.7573, + "step": 2623 + }, + { + "epoch": 0.21176660479380197, + "grad_norm": 0.7565868496894836, + "learning_rate": 0.00019216591016596134, + "loss": 2.7725, + "step": 2624 + }, + { + "epoch": 0.21184730853038497, + "grad_norm": 0.8579828143119812, + "learning_rate": 0.00019215978369097086, + "loss": 2.7529, + "step": 2625 + }, + { + "epoch": 0.21192801226696797, + "grad_norm": 0.7835422158241272, + "learning_rate": 0.0001921536549191176, + "loss": 2.6926, + "step": 2626 + }, + { + "epoch": 0.21200871600355095, + "grad_norm": 0.8041907548904419, + "learning_rate": 0.00019214752385055442, + "loss": 2.7541, + "step": 2627 + }, + { + "epoch": 0.21208941974013396, + "grad_norm": 0.7754014730453491, + "learning_rate": 0.00019214139048543406, + "loss": 2.6807, + "step": 2628 + }, + { + "epoch": 0.21217012347671696, + "grad_norm": 0.8222344517707825, + "learning_rate": 0.00019213525482390936, + "loss": 2.7339, + "step": 2629 + }, + { + "epoch": 0.21225082721329996, + "grad_norm": 0.8083673715591431, + "learning_rate": 0.0001921291168661333, + "loss": 2.739, + "step": 2630 + }, + { + "epoch": 0.21233153094988297, + "grad_norm": 0.8039100766181946, + "learning_rate": 0.0001921229766122588, + "loss": 2.7372, + "step": 2631 + }, + { + "epoch": 0.21241223468646597, + "grad_norm": 0.7513072490692139, + "learning_rate": 0.00019211683406243892, + "loss": 2.7284, + "step": 2632 + }, + { + "epoch": 0.21249293842304898, + "grad_norm": 0.7653890252113342, + "learning_rate": 0.00019211068921682673, + "loss": 2.6911, + "step": 2633 + }, + { + "epoch": 0.21257364215963198, + "grad_norm": 0.7210217714309692, + "learning_rate": 0.00019210454207557542, + "loss": 2.6989, + "step": 2634 + }, + { + "epoch": 0.21265434589621499, + "grad_norm": 0.7389202117919922, + "learning_rate": 0.00019209839263883814, + "loss": 2.7016, + "step": 2635 + }, + { + "epoch": 0.212735049632798, + "grad_norm": 0.8069031238555908, + "learning_rate": 0.00019209224090676813, + "loss": 2.8213, + "step": 2636 + }, + { + "epoch": 0.212815753369381, + "grad_norm": 0.8019161224365234, + "learning_rate": 0.00019208608687951877, + "loss": 2.7413, + "step": 2637 + }, + { + "epoch": 0.212896457105964, + "grad_norm": 0.775572657585144, + "learning_rate": 0.00019207993055724343, + "loss": 2.7016, + "step": 2638 + }, + { + "epoch": 0.212977160842547, + "grad_norm": 0.7482941746711731, + "learning_rate": 0.0001920737719400955, + "loss": 2.7991, + "step": 2639 + }, + { + "epoch": 0.21305786457913, + "grad_norm": 0.8467636704444885, + "learning_rate": 0.0001920676110282285, + "loss": 2.7401, + "step": 2640 + }, + { + "epoch": 0.213138568315713, + "grad_norm": 0.8726305365562439, + "learning_rate": 0.00019206144782179597, + "loss": 2.7599, + "step": 2641 + }, + { + "epoch": 0.21321927205229602, + "grad_norm": 0.740527868270874, + "learning_rate": 0.00019205528232095148, + "loss": 2.7326, + "step": 2642 + }, + { + "epoch": 0.21329997578887902, + "grad_norm": 0.7932354211807251, + "learning_rate": 0.00019204911452584873, + "loss": 2.7873, + "step": 2643 + }, + { + "epoch": 0.21338067952546202, + "grad_norm": 0.7994125485420227, + "learning_rate": 0.00019204294443664143, + "loss": 2.7305, + "step": 2644 + }, + { + "epoch": 0.21346138326204503, + "grad_norm": 0.880557656288147, + "learning_rate": 0.00019203677205348338, + "loss": 2.7295, + "step": 2645 + }, + { + "epoch": 0.21354208699862803, + "grad_norm": 0.8269557952880859, + "learning_rate": 0.00019203059737652836, + "loss": 2.765, + "step": 2646 + }, + { + "epoch": 0.21362279073521104, + "grad_norm": 0.8732784986495972, + "learning_rate": 0.00019202442040593026, + "loss": 2.6742, + "step": 2647 + }, + { + "epoch": 0.21370349447179404, + "grad_norm": 0.7921704649925232, + "learning_rate": 0.0001920182411418431, + "loss": 2.7144, + "step": 2648 + }, + { + "epoch": 0.21378419820837705, + "grad_norm": 0.8097628355026245, + "learning_rate": 0.00019201205958442082, + "loss": 2.7513, + "step": 2649 + }, + { + "epoch": 0.21386490194496005, + "grad_norm": 0.8230542540550232, + "learning_rate": 0.00019200587573381744, + "loss": 2.7648, + "step": 2650 + }, + { + "epoch": 0.21394560568154306, + "grad_norm": 0.7719153761863708, + "learning_rate": 0.0001919996895901872, + "loss": 2.7637, + "step": 2651 + }, + { + "epoch": 0.21402630941812606, + "grad_norm": 0.9022669792175293, + "learning_rate": 0.00019199350115368415, + "loss": 2.7707, + "step": 2652 + }, + { + "epoch": 0.21410701315470906, + "grad_norm": 0.8111257553100586, + "learning_rate": 0.00019198731042446263, + "loss": 2.7423, + "step": 2653 + }, + { + "epoch": 0.21418771689129207, + "grad_norm": 0.7534981966018677, + "learning_rate": 0.00019198111740267683, + "loss": 2.7474, + "step": 2654 + }, + { + "epoch": 0.21426842062787507, + "grad_norm": 0.761411190032959, + "learning_rate": 0.00019197492208848117, + "loss": 2.7541, + "step": 2655 + }, + { + "epoch": 0.21434912436445808, + "grad_norm": 0.8076324462890625, + "learning_rate": 0.00019196872448203002, + "loss": 2.7198, + "step": 2656 + }, + { + "epoch": 0.21442982810104108, + "grad_norm": 0.7987746000289917, + "learning_rate": 0.00019196252458347784, + "loss": 2.7164, + "step": 2657 + }, + { + "epoch": 0.21451053183762409, + "grad_norm": 0.7581545114517212, + "learning_rate": 0.0001919563223929792, + "loss": 2.6837, + "step": 2658 + }, + { + "epoch": 0.2145912355742071, + "grad_norm": 0.8773601055145264, + "learning_rate": 0.00019195011791068857, + "loss": 2.8248, + "step": 2659 + }, + { + "epoch": 0.2146719393107901, + "grad_norm": 0.7027503252029419, + "learning_rate": 0.00019194391113676066, + "loss": 2.6726, + "step": 2660 + }, + { + "epoch": 0.2147526430473731, + "grad_norm": 0.8650866746902466, + "learning_rate": 0.00019193770207135015, + "loss": 2.7348, + "step": 2661 + }, + { + "epoch": 0.2148333467839561, + "grad_norm": 0.8521862030029297, + "learning_rate": 0.0001919314907146118, + "loss": 2.7409, + "step": 2662 + }, + { + "epoch": 0.2149140505205391, + "grad_norm": 0.8098535537719727, + "learning_rate": 0.00019192527706670033, + "loss": 2.7615, + "step": 2663 + }, + { + "epoch": 0.2149947542571221, + "grad_norm": 0.7396193146705627, + "learning_rate": 0.0001919190611277707, + "loss": 2.7191, + "step": 2664 + }, + { + "epoch": 0.21507545799370512, + "grad_norm": 0.8245799541473389, + "learning_rate": 0.00019191284289797776, + "loss": 2.7429, + "step": 2665 + }, + { + "epoch": 0.21515616173028812, + "grad_norm": 0.791646420955658, + "learning_rate": 0.00019190662237747656, + "loss": 2.7197, + "step": 2666 + }, + { + "epoch": 0.21523686546687112, + "grad_norm": 0.7850802540779114, + "learning_rate": 0.00019190039956642205, + "loss": 2.7353, + "step": 2667 + }, + { + "epoch": 0.21531756920345413, + "grad_norm": 0.7657971978187561, + "learning_rate": 0.00019189417446496937, + "loss": 2.7083, + "step": 2668 + }, + { + "epoch": 0.21539827294003713, + "grad_norm": 0.7704403400421143, + "learning_rate": 0.00019188794707327363, + "loss": 2.7813, + "step": 2669 + }, + { + "epoch": 0.21547897667662014, + "grad_norm": 0.7345917224884033, + "learning_rate": 0.00019188171739149005, + "loss": 2.7098, + "step": 2670 + }, + { + "epoch": 0.21555968041320314, + "grad_norm": 0.728831946849823, + "learning_rate": 0.00019187548541977392, + "loss": 2.6745, + "step": 2671 + }, + { + "epoch": 0.21564038414978615, + "grad_norm": 0.8079627156257629, + "learning_rate": 0.0001918692511582805, + "loss": 2.6427, + "step": 2672 + }, + { + "epoch": 0.21572108788636915, + "grad_norm": 0.766808032989502, + "learning_rate": 0.0001918630146071652, + "loss": 2.6956, + "step": 2673 + }, + { + "epoch": 0.21580179162295215, + "grad_norm": 0.7555391192436218, + "learning_rate": 0.00019185677576658345, + "loss": 2.6499, + "step": 2674 + }, + { + "epoch": 0.21588249535953516, + "grad_norm": 0.7740229964256287, + "learning_rate": 0.00019185053463669074, + "loss": 2.7685, + "step": 2675 + }, + { + "epoch": 0.21596319909611816, + "grad_norm": 0.8272803425788879, + "learning_rate": 0.00019184429121764257, + "loss": 2.7272, + "step": 2676 + }, + { + "epoch": 0.21604390283270117, + "grad_norm": 0.870625376701355, + "learning_rate": 0.00019183804550959463, + "loss": 2.7509, + "step": 2677 + }, + { + "epoch": 0.21612460656928414, + "grad_norm": 0.8021238446235657, + "learning_rate": 0.0001918317975127025, + "loss": 2.7058, + "step": 2678 + }, + { + "epoch": 0.21620531030586715, + "grad_norm": 0.729918897151947, + "learning_rate": 0.00019182554722712192, + "loss": 2.6145, + "step": 2679 + }, + { + "epoch": 0.21628601404245015, + "grad_norm": 0.7658380270004272, + "learning_rate": 0.00019181929465300867, + "loss": 2.712, + "step": 2680 + }, + { + "epoch": 0.21636671777903316, + "grad_norm": 0.7702174186706543, + "learning_rate": 0.00019181303979051858, + "loss": 2.8257, + "step": 2681 + }, + { + "epoch": 0.21644742151561616, + "grad_norm": 0.7782231569290161, + "learning_rate": 0.00019180678263980755, + "loss": 2.8226, + "step": 2682 + }, + { + "epoch": 0.21652812525219917, + "grad_norm": 0.7448495626449585, + "learning_rate": 0.0001918005232010315, + "loss": 2.7877, + "step": 2683 + }, + { + "epoch": 0.21660882898878217, + "grad_norm": 0.7273527979850769, + "learning_rate": 0.00019179426147434647, + "loss": 2.7169, + "step": 2684 + }, + { + "epoch": 0.21668953272536517, + "grad_norm": 0.7730992436408997, + "learning_rate": 0.00019178799745990846, + "loss": 2.717, + "step": 2685 + }, + { + "epoch": 0.21677023646194818, + "grad_norm": 0.7709231376647949, + "learning_rate": 0.0001917817311578736, + "loss": 2.7676, + "step": 2686 + }, + { + "epoch": 0.21685094019853118, + "grad_norm": 0.7825181484222412, + "learning_rate": 0.00019177546256839812, + "loss": 2.7473, + "step": 2687 + }, + { + "epoch": 0.2169316439351142, + "grad_norm": 0.8133581280708313, + "learning_rate": 0.0001917691916916382, + "loss": 2.7242, + "step": 2688 + }, + { + "epoch": 0.2170123476716972, + "grad_norm": 0.7833015322685242, + "learning_rate": 0.00019176291852775011, + "loss": 2.8128, + "step": 2689 + }, + { + "epoch": 0.2170930514082802, + "grad_norm": 0.7423487305641174, + "learning_rate": 0.00019175664307689028, + "loss": 2.6999, + "step": 2690 + }, + { + "epoch": 0.2171737551448632, + "grad_norm": 0.7881289124488831, + "learning_rate": 0.000191750365339215, + "loss": 2.7349, + "step": 2691 + }, + { + "epoch": 0.2172544588814462, + "grad_norm": 0.8316197395324707, + "learning_rate": 0.00019174408531488077, + "loss": 2.7654, + "step": 2692 + }, + { + "epoch": 0.2173351626180292, + "grad_norm": 0.7589917778968811, + "learning_rate": 0.00019173780300404413, + "loss": 2.6815, + "step": 2693 + }, + { + "epoch": 0.21741586635461221, + "grad_norm": 0.7752439975738525, + "learning_rate": 0.00019173151840686163, + "loss": 2.7804, + "step": 2694 + }, + { + "epoch": 0.21749657009119522, + "grad_norm": 0.8156552910804749, + "learning_rate": 0.0001917252315234899, + "loss": 2.7325, + "step": 2695 + }, + { + "epoch": 0.21757727382777822, + "grad_norm": 0.8886982798576355, + "learning_rate": 0.00019171894235408564, + "loss": 2.7257, + "step": 2696 + }, + { + "epoch": 0.21765797756436123, + "grad_norm": 0.8270704746246338, + "learning_rate": 0.00019171265089880558, + "loss": 2.7357, + "step": 2697 + }, + { + "epoch": 0.21773868130094423, + "grad_norm": 0.807700514793396, + "learning_rate": 0.00019170635715780651, + "loss": 2.7488, + "step": 2698 + }, + { + "epoch": 0.21781938503752724, + "grad_norm": 0.8195288181304932, + "learning_rate": 0.00019170006113124533, + "loss": 2.7048, + "step": 2699 + }, + { + "epoch": 0.21790008877411024, + "grad_norm": 0.817097008228302, + "learning_rate": 0.00019169376281927888, + "loss": 2.7148, + "step": 2700 + }, + { + "epoch": 0.21798079251069324, + "grad_norm": 0.8415588140487671, + "learning_rate": 0.0001916874622220642, + "loss": 2.7376, + "step": 2701 + }, + { + "epoch": 0.21806149624727625, + "grad_norm": 0.8004198670387268, + "learning_rate": 0.00019168115933975826, + "loss": 2.7145, + "step": 2702 + }, + { + "epoch": 0.21814219998385925, + "grad_norm": 0.8167368769645691, + "learning_rate": 0.0001916748541725182, + "loss": 2.6923, + "step": 2703 + }, + { + "epoch": 0.21822290372044226, + "grad_norm": 0.8877980709075928, + "learning_rate": 0.0001916685467205011, + "loss": 2.8232, + "step": 2704 + }, + { + "epoch": 0.21830360745702526, + "grad_norm": 0.7835622429847717, + "learning_rate": 0.00019166223698386422, + "loss": 2.7797, + "step": 2705 + }, + { + "epoch": 0.21838431119360827, + "grad_norm": 0.8023552894592285, + "learning_rate": 0.00019165592496276477, + "loss": 2.6697, + "step": 2706 + }, + { + "epoch": 0.21846501493019127, + "grad_norm": 0.8549069166183472, + "learning_rate": 0.00019164961065736008, + "loss": 2.729, + "step": 2707 + }, + { + "epoch": 0.21854571866677427, + "grad_norm": 0.8561950325965881, + "learning_rate": 0.00019164329406780753, + "loss": 2.772, + "step": 2708 + }, + { + "epoch": 0.21862642240335728, + "grad_norm": 0.6979276537895203, + "learning_rate": 0.00019163697519426453, + "loss": 2.7195, + "step": 2709 + }, + { + "epoch": 0.21870712613994028, + "grad_norm": 0.7659175395965576, + "learning_rate": 0.00019163065403688856, + "loss": 2.7742, + "step": 2710 + }, + { + "epoch": 0.2187878298765233, + "grad_norm": 0.8621466755867004, + "learning_rate": 0.00019162433059583718, + "loss": 2.721, + "step": 2711 + }, + { + "epoch": 0.2188685336131063, + "grad_norm": 0.8086833357810974, + "learning_rate": 0.00019161800487126795, + "loss": 2.7356, + "step": 2712 + }, + { + "epoch": 0.2189492373496893, + "grad_norm": 0.816215455532074, + "learning_rate": 0.00019161167686333855, + "loss": 2.7159, + "step": 2713 + }, + { + "epoch": 0.2190299410862723, + "grad_norm": 0.9180822968482971, + "learning_rate": 0.0001916053465722067, + "loss": 2.7162, + "step": 2714 + }, + { + "epoch": 0.2191106448228553, + "grad_norm": 0.7547199130058289, + "learning_rate": 0.00019159901399803014, + "loss": 2.7338, + "step": 2715 + }, + { + "epoch": 0.2191913485594383, + "grad_norm": 0.7380769848823547, + "learning_rate": 0.00019159267914096675, + "loss": 2.7149, + "step": 2716 + }, + { + "epoch": 0.2192720522960213, + "grad_norm": 0.7242285013198853, + "learning_rate": 0.00019158634200117433, + "loss": 2.724, + "step": 2717 + }, + { + "epoch": 0.21935275603260432, + "grad_norm": 0.8400316834449768, + "learning_rate": 0.00019158000257881087, + "loss": 2.7528, + "step": 2718 + }, + { + "epoch": 0.21943345976918732, + "grad_norm": 0.8437172770500183, + "learning_rate": 0.00019157366087403435, + "loss": 2.7872, + "step": 2719 + }, + { + "epoch": 0.21951416350577033, + "grad_norm": 0.7428301572799683, + "learning_rate": 0.00019156731688700282, + "loss": 2.6831, + "step": 2720 + }, + { + "epoch": 0.21959486724235333, + "grad_norm": 0.7589641213417053, + "learning_rate": 0.00019156097061787445, + "loss": 2.7105, + "step": 2721 + }, + { + "epoch": 0.21967557097893634, + "grad_norm": 0.7607305645942688, + "learning_rate": 0.00019155462206680727, + "loss": 2.7913, + "step": 2722 + }, + { + "epoch": 0.21975627471551934, + "grad_norm": 0.7455689311027527, + "learning_rate": 0.00019154827123395963, + "loss": 2.6321, + "step": 2723 + }, + { + "epoch": 0.21983697845210234, + "grad_norm": 0.7860318422317505, + "learning_rate": 0.00019154191811948974, + "loss": 2.7907, + "step": 2724 + }, + { + "epoch": 0.21991768218868535, + "grad_norm": 0.8101385235786438, + "learning_rate": 0.00019153556272355596, + "loss": 2.7682, + "step": 2725 + }, + { + "epoch": 0.21999838592526835, + "grad_norm": 0.7437283396720886, + "learning_rate": 0.00019152920504631667, + "loss": 2.7271, + "step": 2726 + }, + { + "epoch": 0.22007908966185136, + "grad_norm": 0.7390851974487305, + "learning_rate": 0.00019152284508793034, + "loss": 2.7492, + "step": 2727 + }, + { + "epoch": 0.22015979339843436, + "grad_norm": 0.9074966311454773, + "learning_rate": 0.0001915164828485555, + "loss": 2.8076, + "step": 2728 + }, + { + "epoch": 0.22024049713501734, + "grad_norm": 0.7644218802452087, + "learning_rate": 0.00019151011832835063, + "loss": 2.7238, + "step": 2729 + }, + { + "epoch": 0.22032120087160034, + "grad_norm": 0.823567807674408, + "learning_rate": 0.0001915037515274744, + "loss": 2.7701, + "step": 2730 + }, + { + "epoch": 0.22040190460818335, + "grad_norm": 0.7601858377456665, + "learning_rate": 0.00019149738244608552, + "loss": 2.6981, + "step": 2731 + }, + { + "epoch": 0.22048260834476635, + "grad_norm": 0.8242961764335632, + "learning_rate": 0.00019149101108434269, + "loss": 2.6916, + "step": 2732 + }, + { + "epoch": 0.22056331208134936, + "grad_norm": 0.7970656156539917, + "learning_rate": 0.0001914846374424047, + "loss": 2.7858, + "step": 2733 + }, + { + "epoch": 0.22064401581793236, + "grad_norm": 0.7844050526618958, + "learning_rate": 0.0001914782615204304, + "loss": 2.6782, + "step": 2734 + }, + { + "epoch": 0.22072471955451536, + "grad_norm": 0.7965044975280762, + "learning_rate": 0.00019147188331857868, + "loss": 2.7563, + "step": 2735 + }, + { + "epoch": 0.22080542329109837, + "grad_norm": 0.8189071416854858, + "learning_rate": 0.00019146550283700856, + "loss": 2.7587, + "step": 2736 + }, + { + "epoch": 0.22088612702768137, + "grad_norm": 0.7610960602760315, + "learning_rate": 0.00019145912007587898, + "loss": 2.663, + "step": 2737 + }, + { + "epoch": 0.22096683076426438, + "grad_norm": 0.7642313838005066, + "learning_rate": 0.00019145273503534907, + "loss": 2.78, + "step": 2738 + }, + { + "epoch": 0.22104753450084738, + "grad_norm": 0.7699539065361023, + "learning_rate": 0.0001914463477155779, + "loss": 2.7429, + "step": 2739 + }, + { + "epoch": 0.22112823823743039, + "grad_norm": 0.7674413919448853, + "learning_rate": 0.00019143995811672477, + "loss": 2.7048, + "step": 2740 + }, + { + "epoch": 0.2212089419740134, + "grad_norm": 0.7871866226196289, + "learning_rate": 0.00019143356623894882, + "loss": 2.7769, + "step": 2741 + }, + { + "epoch": 0.2212896457105964, + "grad_norm": 0.8453468680381775, + "learning_rate": 0.00019142717208240937, + "loss": 2.7677, + "step": 2742 + }, + { + "epoch": 0.2213703494471794, + "grad_norm": 0.8050780892372131, + "learning_rate": 0.00019142077564726582, + "loss": 2.7809, + "step": 2743 + }, + { + "epoch": 0.2214510531837624, + "grad_norm": 0.811287522315979, + "learning_rate": 0.0001914143769336776, + "loss": 2.7201, + "step": 2744 + }, + { + "epoch": 0.2215317569203454, + "grad_norm": 0.823106050491333, + "learning_rate": 0.00019140797594180412, + "loss": 2.7371, + "step": 2745 + }, + { + "epoch": 0.2216124606569284, + "grad_norm": 0.778126060962677, + "learning_rate": 0.0001914015726718049, + "loss": 2.6925, + "step": 2746 + }, + { + "epoch": 0.22169316439351142, + "grad_norm": 0.8240278959274292, + "learning_rate": 0.0001913951671238396, + "loss": 2.7227, + "step": 2747 + }, + { + "epoch": 0.22177386813009442, + "grad_norm": 0.8061805963516235, + "learning_rate": 0.0001913887592980678, + "loss": 2.7092, + "step": 2748 + }, + { + "epoch": 0.22185457186667742, + "grad_norm": 0.9111800789833069, + "learning_rate": 0.00019138234919464925, + "loss": 2.7364, + "step": 2749 + }, + { + "epoch": 0.22193527560326043, + "grad_norm": 0.8154863715171814, + "learning_rate": 0.0001913759368137437, + "loss": 2.6983, + "step": 2750 + }, + { + "epoch": 0.22201597933984343, + "grad_norm": 0.8547734022140503, + "learning_rate": 0.0001913695221555109, + "loss": 2.7016, + "step": 2751 + }, + { + "epoch": 0.22209668307642644, + "grad_norm": 0.7488531470298767, + "learning_rate": 0.00019136310522011079, + "loss": 2.6641, + "step": 2752 + }, + { + "epoch": 0.22217738681300944, + "grad_norm": 0.9118027091026306, + "learning_rate": 0.00019135668600770326, + "loss": 2.6965, + "step": 2753 + }, + { + "epoch": 0.22225809054959245, + "grad_norm": 0.7629117369651794, + "learning_rate": 0.00019135026451844834, + "loss": 2.7836, + "step": 2754 + }, + { + "epoch": 0.22233879428617545, + "grad_norm": 0.8081222176551819, + "learning_rate": 0.000191343840752506, + "loss": 2.7339, + "step": 2755 + }, + { + "epoch": 0.22241949802275846, + "grad_norm": 0.9143899083137512, + "learning_rate": 0.00019133741471003636, + "loss": 2.7051, + "step": 2756 + }, + { + "epoch": 0.22250020175934146, + "grad_norm": 0.8096790909767151, + "learning_rate": 0.00019133098639119962, + "loss": 2.6884, + "step": 2757 + }, + { + "epoch": 0.22258090549592446, + "grad_norm": 0.7959297895431519, + "learning_rate": 0.00019132455579615597, + "loss": 2.7127, + "step": 2758 + }, + { + "epoch": 0.22266160923250747, + "grad_norm": 0.7111356854438782, + "learning_rate": 0.00019131812292506563, + "loss": 2.7418, + "step": 2759 + }, + { + "epoch": 0.22274231296909047, + "grad_norm": 0.7584012150764465, + "learning_rate": 0.00019131168777808898, + "loss": 2.6705, + "step": 2760 + }, + { + "epoch": 0.22282301670567348, + "grad_norm": 0.7646663784980774, + "learning_rate": 0.0001913052503553864, + "loss": 2.7166, + "step": 2761 + }, + { + "epoch": 0.22290372044225648, + "grad_norm": 0.7643954157829285, + "learning_rate": 0.00019129881065711827, + "loss": 2.7967, + "step": 2762 + }, + { + "epoch": 0.22298442417883949, + "grad_norm": 0.7591429948806763, + "learning_rate": 0.0001912923686834451, + "loss": 2.6611, + "step": 2763 + }, + { + "epoch": 0.2230651279154225, + "grad_norm": 0.7182386517524719, + "learning_rate": 0.00019128592443452749, + "loss": 2.6808, + "step": 2764 + }, + { + "epoch": 0.2231458316520055, + "grad_norm": 0.7689648270606995, + "learning_rate": 0.00019127947791052602, + "loss": 2.7288, + "step": 2765 + }, + { + "epoch": 0.2232265353885885, + "grad_norm": 0.7851321697235107, + "learning_rate": 0.00019127302911160136, + "loss": 2.7227, + "step": 2766 + }, + { + "epoch": 0.2233072391251715, + "grad_norm": 0.8419411182403564, + "learning_rate": 0.00019126657803791424, + "loss": 2.7397, + "step": 2767 + }, + { + "epoch": 0.2233879428617545, + "grad_norm": 0.7657596468925476, + "learning_rate": 0.0001912601246896254, + "loss": 2.7223, + "step": 2768 + }, + { + "epoch": 0.2234686465983375, + "grad_norm": 0.8033619523048401, + "learning_rate": 0.00019125366906689567, + "loss": 2.7256, + "step": 2769 + }, + { + "epoch": 0.22354935033492052, + "grad_norm": 0.7784682512283325, + "learning_rate": 0.00019124721116988601, + "loss": 2.7692, + "step": 2770 + }, + { + "epoch": 0.22363005407150352, + "grad_norm": 0.7842707633972168, + "learning_rate": 0.00019124075099875731, + "loss": 2.7707, + "step": 2771 + }, + { + "epoch": 0.22371075780808652, + "grad_norm": 0.7864845395088196, + "learning_rate": 0.0001912342885536706, + "loss": 2.6912, + "step": 2772 + }, + { + "epoch": 0.22379146154466953, + "grad_norm": 0.8544312715530396, + "learning_rate": 0.0001912278238347869, + "loss": 2.8345, + "step": 2773 + }, + { + "epoch": 0.22387216528125253, + "grad_norm": 0.7210882306098938, + "learning_rate": 0.0001912213568422674, + "loss": 2.6933, + "step": 2774 + }, + { + "epoch": 0.22395286901783554, + "grad_norm": 0.8877022862434387, + "learning_rate": 0.00019121488757627318, + "loss": 2.7583, + "step": 2775 + }, + { + "epoch": 0.22403357275441854, + "grad_norm": 0.902886688709259, + "learning_rate": 0.00019120841603696554, + "loss": 2.8, + "step": 2776 + }, + { + "epoch": 0.22411427649100155, + "grad_norm": 0.771294355392456, + "learning_rate": 0.0001912019422245058, + "loss": 2.7712, + "step": 2777 + }, + { + "epoch": 0.22419498022758455, + "grad_norm": 0.7973463535308838, + "learning_rate": 0.0001911954661390552, + "loss": 2.7368, + "step": 2778 + }, + { + "epoch": 0.22427568396416755, + "grad_norm": 0.776836633682251, + "learning_rate": 0.00019118898778077524, + "loss": 2.7126, + "step": 2779 + }, + { + "epoch": 0.22435638770075053, + "grad_norm": 0.8286641240119934, + "learning_rate": 0.00019118250714982731, + "loss": 2.7148, + "step": 2780 + }, + { + "epoch": 0.22443709143733354, + "grad_norm": 0.7848700284957886, + "learning_rate": 0.00019117602424637294, + "loss": 2.7284, + "step": 2781 + }, + { + "epoch": 0.22451779517391654, + "grad_norm": 0.7658216953277588, + "learning_rate": 0.0001911695390705737, + "loss": 2.7186, + "step": 2782 + }, + { + "epoch": 0.22459849891049954, + "grad_norm": 0.7596792578697205, + "learning_rate": 0.00019116305162259124, + "loss": 2.6854, + "step": 2783 + }, + { + "epoch": 0.22467920264708255, + "grad_norm": 0.7901157140731812, + "learning_rate": 0.00019115656190258726, + "loss": 2.7347, + "step": 2784 + }, + { + "epoch": 0.22475990638366555, + "grad_norm": 0.7499287128448486, + "learning_rate": 0.00019115006991072346, + "loss": 2.7219, + "step": 2785 + }, + { + "epoch": 0.22484061012024856, + "grad_norm": 0.7427374124526978, + "learning_rate": 0.00019114357564716162, + "loss": 2.7147, + "step": 2786 + }, + { + "epoch": 0.22492131385683156, + "grad_norm": 0.8305855393409729, + "learning_rate": 0.00019113707911206363, + "loss": 2.7587, + "step": 2787 + }, + { + "epoch": 0.22500201759341457, + "grad_norm": 0.8266459703445435, + "learning_rate": 0.00019113058030559142, + "loss": 2.7275, + "step": 2788 + }, + { + "epoch": 0.22508272132999757, + "grad_norm": 0.7338323593139648, + "learning_rate": 0.0001911240792279069, + "loss": 2.762, + "step": 2789 + }, + { + "epoch": 0.22516342506658057, + "grad_norm": 0.7653434872627258, + "learning_rate": 0.00019111757587917216, + "loss": 2.6715, + "step": 2790 + }, + { + "epoch": 0.22524412880316358, + "grad_norm": 0.76301509141922, + "learning_rate": 0.00019111107025954923, + "loss": 2.698, + "step": 2791 + }, + { + "epoch": 0.22532483253974658, + "grad_norm": 0.7810547947883606, + "learning_rate": 0.00019110456236920024, + "loss": 2.7295, + "step": 2792 + }, + { + "epoch": 0.2254055362763296, + "grad_norm": 0.7885214686393738, + "learning_rate": 0.00019109805220828742, + "loss": 2.7724, + "step": 2793 + }, + { + "epoch": 0.2254862400129126, + "grad_norm": 0.8087031841278076, + "learning_rate": 0.00019109153977697301, + "loss": 2.7888, + "step": 2794 + }, + { + "epoch": 0.2255669437494956, + "grad_norm": 0.795101523399353, + "learning_rate": 0.00019108502507541933, + "loss": 2.6815, + "step": 2795 + }, + { + "epoch": 0.2256476474860786, + "grad_norm": 0.8337482213973999, + "learning_rate": 0.0001910785081037887, + "loss": 2.8192, + "step": 2796 + }, + { + "epoch": 0.2257283512226616, + "grad_norm": 0.8357288241386414, + "learning_rate": 0.00019107198886224357, + "loss": 2.7867, + "step": 2797 + }, + { + "epoch": 0.2258090549592446, + "grad_norm": 0.80678391456604, + "learning_rate": 0.00019106546735094644, + "loss": 2.7313, + "step": 2798 + }, + { + "epoch": 0.2258897586958276, + "grad_norm": 0.7481401562690735, + "learning_rate": 0.00019105894357005979, + "loss": 2.7073, + "step": 2799 + }, + { + "epoch": 0.22597046243241062, + "grad_norm": 0.8025074005126953, + "learning_rate": 0.00019105241751974622, + "loss": 2.6922, + "step": 2800 + }, + { + "epoch": 0.22605116616899362, + "grad_norm": 0.7308986186981201, + "learning_rate": 0.00019104588920016842, + "loss": 2.7511, + "step": 2801 + }, + { + "epoch": 0.22613186990557663, + "grad_norm": 0.7727689146995544, + "learning_rate": 0.00019103935861148905, + "loss": 2.707, + "step": 2802 + }, + { + "epoch": 0.22621257364215963, + "grad_norm": 0.8611076474189758, + "learning_rate": 0.0001910328257538709, + "loss": 2.8494, + "step": 2803 + }, + { + "epoch": 0.22629327737874264, + "grad_norm": 0.8487605452537537, + "learning_rate": 0.00019102629062747677, + "loss": 2.7698, + "step": 2804 + }, + { + "epoch": 0.22637398111532564, + "grad_norm": 0.7495502233505249, + "learning_rate": 0.00019101975323246952, + "loss": 2.7091, + "step": 2805 + }, + { + "epoch": 0.22645468485190864, + "grad_norm": 0.7334234118461609, + "learning_rate": 0.0001910132135690121, + "loss": 2.7375, + "step": 2806 + }, + { + "epoch": 0.22653538858849165, + "grad_norm": 0.879912257194519, + "learning_rate": 0.00019100667163726747, + "loss": 2.7278, + "step": 2807 + }, + { + "epoch": 0.22661609232507465, + "grad_norm": 0.8087306618690491, + "learning_rate": 0.0001910001274373987, + "loss": 2.8065, + "step": 2808 + }, + { + "epoch": 0.22669679606165766, + "grad_norm": 0.7548169493675232, + "learning_rate": 0.00019099358096956887, + "loss": 2.7235, + "step": 2809 + }, + { + "epoch": 0.22677749979824066, + "grad_norm": 0.7505785822868347, + "learning_rate": 0.00019098703223394118, + "loss": 2.6633, + "step": 2810 + }, + { + "epoch": 0.22685820353482367, + "grad_norm": 0.829075813293457, + "learning_rate": 0.00019098048123067875, + "loss": 2.7389, + "step": 2811 + }, + { + "epoch": 0.22693890727140667, + "grad_norm": 0.7731673121452332, + "learning_rate": 0.00019097392795994493, + "loss": 2.7639, + "step": 2812 + }, + { + "epoch": 0.22701961100798967, + "grad_norm": 0.7389004826545715, + "learning_rate": 0.00019096737242190303, + "loss": 2.717, + "step": 2813 + }, + { + "epoch": 0.22710031474457268, + "grad_norm": 0.7520460486412048, + "learning_rate": 0.0001909608146167164, + "loss": 2.7203, + "step": 2814 + }, + { + "epoch": 0.22718101848115568, + "grad_norm": 0.7272354364395142, + "learning_rate": 0.00019095425454454849, + "loss": 2.7306, + "step": 2815 + }, + { + "epoch": 0.2272617222177387, + "grad_norm": 0.7593528032302856, + "learning_rate": 0.00019094769220556282, + "loss": 2.7565, + "step": 2816 + }, + { + "epoch": 0.2273424259543217, + "grad_norm": 0.7312695384025574, + "learning_rate": 0.0001909411275999229, + "loss": 2.744, + "step": 2817 + }, + { + "epoch": 0.2274231296909047, + "grad_norm": 0.7483308911323547, + "learning_rate": 0.00019093456072779238, + "loss": 2.7938, + "step": 2818 + }, + { + "epoch": 0.2275038334274877, + "grad_norm": 0.8515620231628418, + "learning_rate": 0.00019092799158933486, + "loss": 2.7392, + "step": 2819 + }, + { + "epoch": 0.2275845371640707, + "grad_norm": 0.7119776606559753, + "learning_rate": 0.00019092142018471415, + "loss": 2.6985, + "step": 2820 + }, + { + "epoch": 0.2276652409006537, + "grad_norm": 0.7549445033073425, + "learning_rate": 0.00019091484651409394, + "loss": 2.7621, + "step": 2821 + }, + { + "epoch": 0.2277459446372367, + "grad_norm": 0.8728097081184387, + "learning_rate": 0.00019090827057763814, + "loss": 2.8321, + "step": 2822 + }, + { + "epoch": 0.22782664837381972, + "grad_norm": 0.755043089389801, + "learning_rate": 0.00019090169237551057, + "loss": 2.7341, + "step": 2823 + }, + { + "epoch": 0.22790735211040272, + "grad_norm": 0.7949401140213013, + "learning_rate": 0.00019089511190787523, + "loss": 2.7646, + "step": 2824 + }, + { + "epoch": 0.22798805584698573, + "grad_norm": 0.8027622103691101, + "learning_rate": 0.00019088852917489607, + "loss": 2.7606, + "step": 2825 + }, + { + "epoch": 0.22806875958356873, + "grad_norm": 0.8609418869018555, + "learning_rate": 0.0001908819441767372, + "loss": 2.7433, + "step": 2826 + }, + { + "epoch": 0.22814946332015174, + "grad_norm": 0.8021805882453918, + "learning_rate": 0.00019087535691356271, + "loss": 2.7723, + "step": 2827 + }, + { + "epoch": 0.22823016705673474, + "grad_norm": 0.8104252219200134, + "learning_rate": 0.00019086876738553675, + "loss": 2.7229, + "step": 2828 + }, + { + "epoch": 0.22831087079331774, + "grad_norm": 0.8714433908462524, + "learning_rate": 0.00019086217559282362, + "loss": 2.75, + "step": 2829 + }, + { + "epoch": 0.22839157452990075, + "grad_norm": 0.7598714828491211, + "learning_rate": 0.0001908555815355875, + "loss": 2.6979, + "step": 2830 + }, + { + "epoch": 0.22847227826648372, + "grad_norm": 0.859708309173584, + "learning_rate": 0.00019084898521399283, + "loss": 2.7863, + "step": 2831 + }, + { + "epoch": 0.22855298200306673, + "grad_norm": 0.7798011302947998, + "learning_rate": 0.00019084238662820397, + "loss": 2.7623, + "step": 2832 + }, + { + "epoch": 0.22863368573964973, + "grad_norm": 0.7869576811790466, + "learning_rate": 0.00019083578577838535, + "loss": 2.7341, + "step": 2833 + }, + { + "epoch": 0.22871438947623274, + "grad_norm": 0.7486738562583923, + "learning_rate": 0.0001908291826647015, + "loss": 2.7615, + "step": 2834 + }, + { + "epoch": 0.22879509321281574, + "grad_norm": 0.8270190954208374, + "learning_rate": 0.00019082257728731704, + "loss": 2.7515, + "step": 2835 + }, + { + "epoch": 0.22887579694939875, + "grad_norm": 0.9060254693031311, + "learning_rate": 0.00019081596964639648, + "loss": 2.874, + "step": 2836 + }, + { + "epoch": 0.22895650068598175, + "grad_norm": 0.7802320122718811, + "learning_rate": 0.00019080935974210458, + "loss": 2.7224, + "step": 2837 + }, + { + "epoch": 0.22903720442256476, + "grad_norm": 0.9513018131256104, + "learning_rate": 0.00019080274757460607, + "loss": 2.7168, + "step": 2838 + }, + { + "epoch": 0.22911790815914776, + "grad_norm": 0.7139711976051331, + "learning_rate": 0.0001907961331440657, + "loss": 2.676, + "step": 2839 + }, + { + "epoch": 0.22919861189573076, + "grad_norm": 0.8635632395744324, + "learning_rate": 0.00019078951645064838, + "loss": 2.6979, + "step": 2840 + }, + { + "epoch": 0.22927931563231377, + "grad_norm": 0.8823218941688538, + "learning_rate": 0.000190782897494519, + "loss": 2.7345, + "step": 2841 + }, + { + "epoch": 0.22936001936889677, + "grad_norm": 0.8139359354972839, + "learning_rate": 0.00019077627627584246, + "loss": 2.6988, + "step": 2842 + }, + { + "epoch": 0.22944072310547978, + "grad_norm": 0.8935994505882263, + "learning_rate": 0.00019076965279478383, + "loss": 2.7706, + "step": 2843 + }, + { + "epoch": 0.22952142684206278, + "grad_norm": 0.8362705111503601, + "learning_rate": 0.00019076302705150816, + "loss": 2.7593, + "step": 2844 + }, + { + "epoch": 0.22960213057864579, + "grad_norm": 0.7534157633781433, + "learning_rate": 0.00019075639904618066, + "loss": 2.7501, + "step": 2845 + }, + { + "epoch": 0.2296828343152288, + "grad_norm": 0.8826640248298645, + "learning_rate": 0.00019074976877896642, + "loss": 2.7758, + "step": 2846 + }, + { + "epoch": 0.2297635380518118, + "grad_norm": 0.8395571112632751, + "learning_rate": 0.0001907431362500307, + "loss": 2.7625, + "step": 2847 + }, + { + "epoch": 0.2298442417883948, + "grad_norm": 0.7927684783935547, + "learning_rate": 0.00019073650145953885, + "loss": 2.7392, + "step": 2848 + }, + { + "epoch": 0.2299249455249778, + "grad_norm": 0.823208749294281, + "learning_rate": 0.00019072986440765618, + "loss": 2.7259, + "step": 2849 + }, + { + "epoch": 0.2300056492615608, + "grad_norm": 0.889416515827179, + "learning_rate": 0.00019072322509454815, + "loss": 2.7539, + "step": 2850 + }, + { + "epoch": 0.2300863529981438, + "grad_norm": 0.7957748770713806, + "learning_rate": 0.0001907165835203802, + "loss": 2.7756, + "step": 2851 + }, + { + "epoch": 0.23016705673472682, + "grad_norm": 0.7924029231071472, + "learning_rate": 0.00019070993968531782, + "loss": 2.7439, + "step": 2852 + }, + { + "epoch": 0.23024776047130982, + "grad_norm": 0.7811052799224854, + "learning_rate": 0.0001907032935895266, + "loss": 2.7479, + "step": 2853 + }, + { + "epoch": 0.23032846420789282, + "grad_norm": 0.7973877191543579, + "learning_rate": 0.00019069664523317225, + "loss": 2.7502, + "step": 2854 + }, + { + "epoch": 0.23040916794447583, + "grad_norm": 0.7524267435073853, + "learning_rate": 0.0001906899946164204, + "loss": 2.75, + "step": 2855 + }, + { + "epoch": 0.23048987168105883, + "grad_norm": 0.7594791054725647, + "learning_rate": 0.00019068334173943683, + "loss": 2.6534, + "step": 2856 + }, + { + "epoch": 0.23057057541764184, + "grad_norm": 0.7253785729408264, + "learning_rate": 0.00019067668660238733, + "loss": 2.7246, + "step": 2857 + }, + { + "epoch": 0.23065127915422484, + "grad_norm": 0.788737416267395, + "learning_rate": 0.00019067002920543775, + "loss": 2.757, + "step": 2858 + }, + { + "epoch": 0.23073198289080785, + "grad_norm": 0.7577618956565857, + "learning_rate": 0.00019066336954875403, + "loss": 2.674, + "step": 2859 + }, + { + "epoch": 0.23081268662739085, + "grad_norm": 0.7682929635047913, + "learning_rate": 0.0001906567076325022, + "loss": 2.8193, + "step": 2860 + }, + { + "epoch": 0.23089339036397385, + "grad_norm": 0.7742112874984741, + "learning_rate": 0.00019065004345684817, + "loss": 2.6969, + "step": 2861 + }, + { + "epoch": 0.23097409410055686, + "grad_norm": 0.7981678247451782, + "learning_rate": 0.00019064337702195814, + "loss": 2.7681, + "step": 2862 + }, + { + "epoch": 0.23105479783713986, + "grad_norm": 0.7608500123023987, + "learning_rate": 0.00019063670832799817, + "loss": 2.7459, + "step": 2863 + }, + { + "epoch": 0.23113550157372287, + "grad_norm": 0.7563463449478149, + "learning_rate": 0.00019063003737513455, + "loss": 2.7678, + "step": 2864 + }, + { + "epoch": 0.23121620531030587, + "grad_norm": 0.7915034890174866, + "learning_rate": 0.00019062336416353343, + "loss": 2.7577, + "step": 2865 + }, + { + "epoch": 0.23129690904688888, + "grad_norm": 0.7229592204093933, + "learning_rate": 0.00019061668869336122, + "loss": 2.7308, + "step": 2866 + }, + { + "epoch": 0.23137761278347188, + "grad_norm": 0.7910905480384827, + "learning_rate": 0.00019061001096478425, + "loss": 2.7571, + "step": 2867 + }, + { + "epoch": 0.23145831652005489, + "grad_norm": 0.8474656939506531, + "learning_rate": 0.00019060333097796895, + "loss": 2.7011, + "step": 2868 + }, + { + "epoch": 0.2315390202566379, + "grad_norm": 0.8005419373512268, + "learning_rate": 0.00019059664873308178, + "loss": 2.7441, + "step": 2869 + }, + { + "epoch": 0.2316197239932209, + "grad_norm": 0.7728021740913391, + "learning_rate": 0.00019058996423028935, + "loss": 2.7753, + "step": 2870 + }, + { + "epoch": 0.2317004277298039, + "grad_norm": 0.7338094115257263, + "learning_rate": 0.00019058327746975816, + "loss": 2.7009, + "step": 2871 + }, + { + "epoch": 0.2317811314663869, + "grad_norm": 0.7746245265007019, + "learning_rate": 0.00019057658845165494, + "loss": 2.6938, + "step": 2872 + }, + { + "epoch": 0.2318618352029699, + "grad_norm": 0.7474356293678284, + "learning_rate": 0.00019056989717614636, + "loss": 2.7161, + "step": 2873 + }, + { + "epoch": 0.2319425389395529, + "grad_norm": 0.9540585279464722, + "learning_rate": 0.00019056320364339917, + "loss": 2.7753, + "step": 2874 + }, + { + "epoch": 0.23202324267613592, + "grad_norm": 0.799726665019989, + "learning_rate": 0.00019055650785358024, + "loss": 2.7301, + "step": 2875 + }, + { + "epoch": 0.23210394641271892, + "grad_norm": 0.8087828159332275, + "learning_rate": 0.0001905498098068564, + "loss": 2.7305, + "step": 2876 + }, + { + "epoch": 0.23218465014930192, + "grad_norm": 0.8177600502967834, + "learning_rate": 0.00019054310950339457, + "loss": 2.7462, + "step": 2877 + }, + { + "epoch": 0.23226535388588493, + "grad_norm": 0.7106238603591919, + "learning_rate": 0.00019053640694336181, + "loss": 2.7183, + "step": 2878 + }, + { + "epoch": 0.23234605762246793, + "grad_norm": 0.884185791015625, + "learning_rate": 0.00019052970212692514, + "loss": 2.7549, + "step": 2879 + }, + { + "epoch": 0.23242676135905094, + "grad_norm": 0.7532132267951965, + "learning_rate": 0.00019052299505425163, + "loss": 2.7524, + "step": 2880 + }, + { + "epoch": 0.23250746509563394, + "grad_norm": 0.7295021414756775, + "learning_rate": 0.00019051628572550842, + "loss": 2.6928, + "step": 2881 + }, + { + "epoch": 0.23258816883221692, + "grad_norm": 0.8475896716117859, + "learning_rate": 0.00019050957414086278, + "loss": 2.7138, + "step": 2882 + }, + { + "epoch": 0.23266887256879992, + "grad_norm": 0.7219378352165222, + "learning_rate": 0.00019050286030048198, + "loss": 2.7034, + "step": 2883 + }, + { + "epoch": 0.23274957630538293, + "grad_norm": 0.8410176634788513, + "learning_rate": 0.0001904961442045333, + "loss": 2.7413, + "step": 2884 + }, + { + "epoch": 0.23283028004196593, + "grad_norm": 0.7792301177978516, + "learning_rate": 0.00019048942585318414, + "loss": 2.6771, + "step": 2885 + }, + { + "epoch": 0.23291098377854894, + "grad_norm": 0.7457073926925659, + "learning_rate": 0.00019048270524660196, + "loss": 2.7325, + "step": 2886 + }, + { + "epoch": 0.23299168751513194, + "grad_norm": 0.8258858323097229, + "learning_rate": 0.00019047598238495424, + "loss": 2.7434, + "step": 2887 + }, + { + "epoch": 0.23307239125171494, + "grad_norm": 0.8188657164573669, + "learning_rate": 0.00019046925726840853, + "loss": 2.732, + "step": 2888 + }, + { + "epoch": 0.23315309498829795, + "grad_norm": 0.8084142208099365, + "learning_rate": 0.00019046252989713246, + "loss": 2.7537, + "step": 2889 + }, + { + "epoch": 0.23323379872488095, + "grad_norm": 0.75553297996521, + "learning_rate": 0.00019045580027129364, + "loss": 2.6685, + "step": 2890 + }, + { + "epoch": 0.23331450246146396, + "grad_norm": 0.8145995736122131, + "learning_rate": 0.00019044906839105986, + "loss": 2.7654, + "step": 2891 + }, + { + "epoch": 0.23339520619804696, + "grad_norm": 0.8433949947357178, + "learning_rate": 0.0001904423342565988, + "loss": 2.7713, + "step": 2892 + }, + { + "epoch": 0.23347590993462997, + "grad_norm": 0.7826054096221924, + "learning_rate": 0.0001904355978680784, + "loss": 2.7108, + "step": 2893 + }, + { + "epoch": 0.23355661367121297, + "grad_norm": 0.7281686663627625, + "learning_rate": 0.0001904288592256665, + "loss": 2.7606, + "step": 2894 + }, + { + "epoch": 0.23363731740779597, + "grad_norm": 0.8282813429832458, + "learning_rate": 0.00019042211832953103, + "loss": 2.6662, + "step": 2895 + }, + { + "epoch": 0.23371802114437898, + "grad_norm": 0.8227263689041138, + "learning_rate": 0.00019041537517984, + "loss": 2.7493, + "step": 2896 + }, + { + "epoch": 0.23379872488096198, + "grad_norm": 0.839350700378418, + "learning_rate": 0.0001904086297767615, + "loss": 2.7258, + "step": 2897 + }, + { + "epoch": 0.233879428617545, + "grad_norm": 0.713231086730957, + "learning_rate": 0.00019040188212046357, + "loss": 2.6722, + "step": 2898 + }, + { + "epoch": 0.233960132354128, + "grad_norm": 0.8314552903175354, + "learning_rate": 0.00019039513221111447, + "loss": 2.8509, + "step": 2899 + }, + { + "epoch": 0.234040836090711, + "grad_norm": 0.8885688781738281, + "learning_rate": 0.0001903883800488824, + "loss": 2.7608, + "step": 2900 + }, + { + "epoch": 0.234121539827294, + "grad_norm": 0.755308210849762, + "learning_rate": 0.00019038162563393555, + "loss": 2.7065, + "step": 2901 + }, + { + "epoch": 0.234202243563877, + "grad_norm": 0.7436641454696655, + "learning_rate": 0.00019037486896644236, + "loss": 2.6865, + "step": 2902 + }, + { + "epoch": 0.23428294730046, + "grad_norm": 0.7861987948417664, + "learning_rate": 0.0001903681100465712, + "loss": 2.7238, + "step": 2903 + }, + { + "epoch": 0.234363651037043, + "grad_norm": 0.7481045126914978, + "learning_rate": 0.0001903613488744905, + "loss": 2.7038, + "step": 2904 + }, + { + "epoch": 0.23444435477362602, + "grad_norm": 0.790765106678009, + "learning_rate": 0.0001903545854503688, + "loss": 2.6865, + "step": 2905 + }, + { + "epoch": 0.23452505851020902, + "grad_norm": 0.8594793677330017, + "learning_rate": 0.0001903478197743746, + "loss": 2.7324, + "step": 2906 + }, + { + "epoch": 0.23460576224679203, + "grad_norm": 0.7504310011863708, + "learning_rate": 0.00019034105184667662, + "loss": 2.6535, + "step": 2907 + }, + { + "epoch": 0.23468646598337503, + "grad_norm": 0.7824578881263733, + "learning_rate": 0.00019033428166744342, + "loss": 2.7113, + "step": 2908 + }, + { + "epoch": 0.23476716971995804, + "grad_norm": 0.7766899466514587, + "learning_rate": 0.0001903275092368438, + "loss": 2.6907, + "step": 2909 + }, + { + "epoch": 0.23484787345654104, + "grad_norm": 0.8082600235939026, + "learning_rate": 0.00019032073455504657, + "loss": 2.6781, + "step": 2910 + }, + { + "epoch": 0.23492857719312404, + "grad_norm": 0.7790517210960388, + "learning_rate": 0.0001903139576222205, + "loss": 2.7277, + "step": 2911 + }, + { + "epoch": 0.23500928092970705, + "grad_norm": 0.7449578046798706, + "learning_rate": 0.00019030717843853453, + "loss": 2.7078, + "step": 2912 + }, + { + "epoch": 0.23508998466629005, + "grad_norm": 0.7931632399559021, + "learning_rate": 0.0001903003970041576, + "loss": 2.7165, + "step": 2913 + }, + { + "epoch": 0.23517068840287306, + "grad_norm": 0.7970653176307678, + "learning_rate": 0.00019029361331925873, + "loss": 2.7993, + "step": 2914 + }, + { + "epoch": 0.23525139213945606, + "grad_norm": 0.8497335314750671, + "learning_rate": 0.00019028682738400697, + "loss": 2.7564, + "step": 2915 + }, + { + "epoch": 0.23533209587603907, + "grad_norm": 0.7840128540992737, + "learning_rate": 0.0001902800391985715, + "loss": 2.7546, + "step": 2916 + }, + { + "epoch": 0.23541279961262207, + "grad_norm": 0.8237372636795044, + "learning_rate": 0.00019027324876312146, + "loss": 2.7507, + "step": 2917 + }, + { + "epoch": 0.23549350334920507, + "grad_norm": 0.8445321917533875, + "learning_rate": 0.00019026645607782603, + "loss": 2.7287, + "step": 2918 + }, + { + "epoch": 0.23557420708578808, + "grad_norm": 0.8380417227745056, + "learning_rate": 0.0001902596611428546, + "loss": 2.7778, + "step": 2919 + }, + { + "epoch": 0.23565491082237108, + "grad_norm": 0.7989064455032349, + "learning_rate": 0.00019025286395837646, + "loss": 2.7254, + "step": 2920 + }, + { + "epoch": 0.2357356145589541, + "grad_norm": 0.8223496079444885, + "learning_rate": 0.00019024606452456102, + "loss": 2.7028, + "step": 2921 + }, + { + "epoch": 0.2358163182955371, + "grad_norm": 0.8090229630470276, + "learning_rate": 0.00019023926284157775, + "loss": 2.6911, + "step": 2922 + }, + { + "epoch": 0.2358970220321201, + "grad_norm": 0.7556560635566711, + "learning_rate": 0.00019023245890959615, + "loss": 2.7183, + "step": 2923 + }, + { + "epoch": 0.2359777257687031, + "grad_norm": 0.7907983660697937, + "learning_rate": 0.00019022565272878582, + "loss": 2.6805, + "step": 2924 + }, + { + "epoch": 0.2360584295052861, + "grad_norm": 0.9404142498970032, + "learning_rate": 0.0001902188442993164, + "loss": 2.8081, + "step": 2925 + }, + { + "epoch": 0.2361391332418691, + "grad_norm": 0.8349069952964783, + "learning_rate": 0.0001902120336213575, + "loss": 2.8329, + "step": 2926 + }, + { + "epoch": 0.2362198369784521, + "grad_norm": 0.8557522892951965, + "learning_rate": 0.00019020522069507892, + "loss": 2.704, + "step": 2927 + }, + { + "epoch": 0.23630054071503512, + "grad_norm": 0.7557278275489807, + "learning_rate": 0.00019019840552065044, + "loss": 2.7071, + "step": 2928 + }, + { + "epoch": 0.23638124445161812, + "grad_norm": 0.8810723423957825, + "learning_rate": 0.00019019158809824193, + "loss": 2.7535, + "step": 2929 + }, + { + "epoch": 0.23646194818820113, + "grad_norm": 0.7845562100410461, + "learning_rate": 0.00019018476842802326, + "loss": 2.7254, + "step": 2930 + }, + { + "epoch": 0.23654265192478413, + "grad_norm": 0.7566044926643372, + "learning_rate": 0.00019017794651016444, + "loss": 2.7295, + "step": 2931 + }, + { + "epoch": 0.23662335566136714, + "grad_norm": 0.8083382248878479, + "learning_rate": 0.00019017112234483545, + "loss": 2.7305, + "step": 2932 + }, + { + "epoch": 0.2367040593979501, + "grad_norm": 0.7924187183380127, + "learning_rate": 0.00019016429593220638, + "loss": 2.7659, + "step": 2933 + }, + { + "epoch": 0.23678476313453312, + "grad_norm": 0.8400307297706604, + "learning_rate": 0.00019015746727244737, + "loss": 2.7293, + "step": 2934 + }, + { + "epoch": 0.23686546687111612, + "grad_norm": 0.6931199431419373, + "learning_rate": 0.0001901506363657286, + "loss": 2.7189, + "step": 2935 + }, + { + "epoch": 0.23694617060769912, + "grad_norm": 0.8263585567474365, + "learning_rate": 0.0001901438032122203, + "loss": 2.7368, + "step": 2936 + }, + { + "epoch": 0.23702687434428213, + "grad_norm": 0.8001893162727356, + "learning_rate": 0.0001901369678120928, + "loss": 2.7793, + "step": 2937 + }, + { + "epoch": 0.23710757808086513, + "grad_norm": 0.7724235653877258, + "learning_rate": 0.00019013013016551644, + "loss": 2.717, + "step": 2938 + }, + { + "epoch": 0.23718828181744814, + "grad_norm": 0.7617147564888, + "learning_rate": 0.00019012329027266164, + "loss": 2.7275, + "step": 2939 + }, + { + "epoch": 0.23726898555403114, + "grad_norm": 0.80738765001297, + "learning_rate": 0.00019011644813369884, + "loss": 2.7444, + "step": 2940 + }, + { + "epoch": 0.23734968929061415, + "grad_norm": 0.7885528802871704, + "learning_rate": 0.00019010960374879861, + "loss": 2.7377, + "step": 2941 + }, + { + "epoch": 0.23743039302719715, + "grad_norm": 0.720268964767456, + "learning_rate": 0.00019010275711813147, + "loss": 2.6897, + "step": 2942 + }, + { + "epoch": 0.23751109676378016, + "grad_norm": 0.7532111406326294, + "learning_rate": 0.00019009590824186815, + "loss": 2.8117, + "step": 2943 + }, + { + "epoch": 0.23759180050036316, + "grad_norm": 0.780777633190155, + "learning_rate": 0.00019008905712017925, + "loss": 2.7565, + "step": 2944 + }, + { + "epoch": 0.23767250423694616, + "grad_norm": 0.8721919059753418, + "learning_rate": 0.00019008220375323553, + "loss": 2.801, + "step": 2945 + }, + { + "epoch": 0.23775320797352917, + "grad_norm": 0.8258914947509766, + "learning_rate": 0.00019007534814120786, + "loss": 2.7696, + "step": 2946 + }, + { + "epoch": 0.23783391171011217, + "grad_norm": 0.7292730808258057, + "learning_rate": 0.00019006849028426704, + "loss": 2.7512, + "step": 2947 + }, + { + "epoch": 0.23791461544669518, + "grad_norm": 0.7789164185523987, + "learning_rate": 0.00019006163018258398, + "loss": 2.7489, + "step": 2948 + }, + { + "epoch": 0.23799531918327818, + "grad_norm": 0.8049725294113159, + "learning_rate": 0.00019005476783632967, + "loss": 2.672, + "step": 2949 + }, + { + "epoch": 0.23807602291986119, + "grad_norm": 0.7440119981765747, + "learning_rate": 0.00019004790324567519, + "loss": 2.7208, + "step": 2950 + }, + { + "epoch": 0.2381567266564442, + "grad_norm": 0.7695925235748291, + "learning_rate": 0.00019004103641079154, + "loss": 2.7816, + "step": 2951 + }, + { + "epoch": 0.2382374303930272, + "grad_norm": 0.7623234391212463, + "learning_rate": 0.00019003416733184988, + "loss": 2.7034, + "step": 2952 + }, + { + "epoch": 0.2383181341296102, + "grad_norm": 0.8136502504348755, + "learning_rate": 0.00019002729600902141, + "loss": 2.7638, + "step": 2953 + }, + { + "epoch": 0.2383988378661932, + "grad_norm": 0.7813066840171814, + "learning_rate": 0.00019002042244247743, + "loss": 2.7606, + "step": 2954 + }, + { + "epoch": 0.2384795416027762, + "grad_norm": 0.7863059043884277, + "learning_rate": 0.0001900135466323892, + "loss": 2.7219, + "step": 2955 + }, + { + "epoch": 0.2385602453393592, + "grad_norm": 0.8712359070777893, + "learning_rate": 0.00019000666857892806, + "loss": 2.7485, + "step": 2956 + }, + { + "epoch": 0.23864094907594222, + "grad_norm": 0.8130611777305603, + "learning_rate": 0.00018999978828226547, + "loss": 2.7195, + "step": 2957 + }, + { + "epoch": 0.23872165281252522, + "grad_norm": 0.759503960609436, + "learning_rate": 0.00018999290574257292, + "loss": 2.6856, + "step": 2958 + }, + { + "epoch": 0.23880235654910822, + "grad_norm": 0.7490882277488708, + "learning_rate": 0.0001899860209600219, + "loss": 2.7587, + "step": 2959 + }, + { + "epoch": 0.23888306028569123, + "grad_norm": 0.8111297488212585, + "learning_rate": 0.000189979133934784, + "loss": 2.7688, + "step": 2960 + }, + { + "epoch": 0.23896376402227423, + "grad_norm": 0.844894289970398, + "learning_rate": 0.0001899722446670309, + "loss": 2.7706, + "step": 2961 + }, + { + "epoch": 0.23904446775885724, + "grad_norm": 0.7875459790229797, + "learning_rate": 0.00018996535315693423, + "loss": 2.7535, + "step": 2962 + }, + { + "epoch": 0.23912517149544024, + "grad_norm": 0.7768518328666687, + "learning_rate": 0.0001899584594046658, + "loss": 2.7268, + "step": 2963 + }, + { + "epoch": 0.23920587523202325, + "grad_norm": 0.8645716309547424, + "learning_rate": 0.00018995156341039744, + "loss": 2.7856, + "step": 2964 + }, + { + "epoch": 0.23928657896860625, + "grad_norm": 0.7816600799560547, + "learning_rate": 0.00018994466517430097, + "loss": 2.757, + "step": 2965 + }, + { + "epoch": 0.23936728270518925, + "grad_norm": 0.7967644333839417, + "learning_rate": 0.00018993776469654832, + "loss": 2.7021, + "step": 2966 + }, + { + "epoch": 0.23944798644177226, + "grad_norm": 0.800589919090271, + "learning_rate": 0.00018993086197731146, + "loss": 2.6838, + "step": 2967 + }, + { + "epoch": 0.23952869017835526, + "grad_norm": 0.7658529281616211, + "learning_rate": 0.00018992395701676246, + "loss": 2.6992, + "step": 2968 + }, + { + "epoch": 0.23960939391493827, + "grad_norm": 0.848456621170044, + "learning_rate": 0.00018991704981507338, + "loss": 2.7249, + "step": 2969 + }, + { + "epoch": 0.23969009765152127, + "grad_norm": 0.7365427017211914, + "learning_rate": 0.00018991014037241638, + "loss": 2.7044, + "step": 2970 + }, + { + "epoch": 0.23977080138810428, + "grad_norm": 0.8026351928710938, + "learning_rate": 0.00018990322868896365, + "loss": 2.7409, + "step": 2971 + }, + { + "epoch": 0.23985150512468728, + "grad_norm": 0.788646936416626, + "learning_rate": 0.00018989631476488744, + "loss": 2.7331, + "step": 2972 + }, + { + "epoch": 0.23993220886127029, + "grad_norm": 0.8388644456863403, + "learning_rate": 0.00018988939860036007, + "loss": 2.7478, + "step": 2973 + }, + { + "epoch": 0.2400129125978533, + "grad_norm": 0.7479026913642883, + "learning_rate": 0.00018988248019555394, + "loss": 2.7248, + "step": 2974 + }, + { + "epoch": 0.2400936163344363, + "grad_norm": 0.7313364744186401, + "learning_rate": 0.00018987555955064144, + "loss": 2.7323, + "step": 2975 + }, + { + "epoch": 0.2401743200710193, + "grad_norm": 0.7858260273933411, + "learning_rate": 0.00018986863666579505, + "loss": 2.6845, + "step": 2976 + }, + { + "epoch": 0.2402550238076023, + "grad_norm": 0.8090949654579163, + "learning_rate": 0.00018986171154118732, + "loss": 2.8094, + "step": 2977 + }, + { + "epoch": 0.2403357275441853, + "grad_norm": 0.7917135953903198, + "learning_rate": 0.00018985478417699085, + "loss": 2.7106, + "step": 2978 + }, + { + "epoch": 0.2404164312807683, + "grad_norm": 0.8192126154899597, + "learning_rate": 0.00018984785457337825, + "loss": 2.7729, + "step": 2979 + }, + { + "epoch": 0.24049713501735132, + "grad_norm": 0.797922670841217, + "learning_rate": 0.00018984092273052226, + "loss": 2.7747, + "step": 2980 + }, + { + "epoch": 0.24057783875393432, + "grad_norm": 0.9050948023796082, + "learning_rate": 0.00018983398864859564, + "loss": 2.7453, + "step": 2981 + }, + { + "epoch": 0.24065854249051732, + "grad_norm": 0.7827617526054382, + "learning_rate": 0.0001898270523277712, + "loss": 2.7371, + "step": 2982 + }, + { + "epoch": 0.24073924622710033, + "grad_norm": 0.7530156373977661, + "learning_rate": 0.0001898201137682218, + "loss": 2.7397, + "step": 2983 + }, + { + "epoch": 0.2408199499636833, + "grad_norm": 0.7989545464515686, + "learning_rate": 0.00018981317297012034, + "loss": 2.7532, + "step": 2984 + }, + { + "epoch": 0.2409006537002663, + "grad_norm": 0.7501168847084045, + "learning_rate": 0.00018980622993363988, + "loss": 2.7395, + "step": 2985 + }, + { + "epoch": 0.2409813574368493, + "grad_norm": 0.8073468208312988, + "learning_rate": 0.0001897992846589534, + "loss": 2.7673, + "step": 2986 + }, + { + "epoch": 0.24106206117343232, + "grad_norm": 0.9155512452125549, + "learning_rate": 0.00018979233714623401, + "loss": 2.6608, + "step": 2987 + }, + { + "epoch": 0.24114276491001532, + "grad_norm": 0.7461311221122742, + "learning_rate": 0.00018978538739565485, + "loss": 2.7657, + "step": 2988 + }, + { + "epoch": 0.24122346864659833, + "grad_norm": 0.8011443018913269, + "learning_rate": 0.00018977843540738914, + "loss": 2.7363, + "step": 2989 + }, + { + "epoch": 0.24130417238318133, + "grad_norm": 0.7602998614311218, + "learning_rate": 0.0001897714811816101, + "loss": 2.7285, + "step": 2990 + }, + { + "epoch": 0.24138487611976434, + "grad_norm": 0.8283531069755554, + "learning_rate": 0.00018976452471849116, + "loss": 2.7614, + "step": 2991 + }, + { + "epoch": 0.24146557985634734, + "grad_norm": 0.7358889579772949, + "learning_rate": 0.00018975756601820556, + "loss": 2.7429, + "step": 2992 + }, + { + "epoch": 0.24154628359293034, + "grad_norm": 0.7749240398406982, + "learning_rate": 0.0001897506050809268, + "loss": 2.6884, + "step": 2993 + }, + { + "epoch": 0.24162698732951335, + "grad_norm": 0.7529963254928589, + "learning_rate": 0.00018974364190682837, + "loss": 2.7619, + "step": 2994 + }, + { + "epoch": 0.24170769106609635, + "grad_norm": 0.7946054935455322, + "learning_rate": 0.00018973667649608376, + "loss": 2.7403, + "step": 2995 + }, + { + "epoch": 0.24178839480267936, + "grad_norm": 0.735870897769928, + "learning_rate": 0.0001897297088488666, + "loss": 2.7158, + "step": 2996 + }, + { + "epoch": 0.24186909853926236, + "grad_norm": 0.8409188985824585, + "learning_rate": 0.00018972273896535055, + "loss": 2.768, + "step": 2997 + }, + { + "epoch": 0.24194980227584537, + "grad_norm": 0.8351938724517822, + "learning_rate": 0.0001897157668457093, + "loss": 2.7548, + "step": 2998 + }, + { + "epoch": 0.24203050601242837, + "grad_norm": 0.8339046239852905, + "learning_rate": 0.00018970879249011663, + "loss": 2.7842, + "step": 2999 + }, + { + "epoch": 0.24211120974901137, + "grad_norm": 0.8092730641365051, + "learning_rate": 0.00018970181589874637, + "loss": 2.7141, + "step": 3000 + }, + { + "epoch": 0.24211120974901137, + "eval_loss": 2.643277406692505, + "eval_runtime": 784.7512, + "eval_samples_per_second": 3.339, + "eval_steps_per_second": 0.557, + "step": 3000 + }, + { + "epoch": 0.24219191348559438, + "grad_norm": 0.8014447093009949, + "learning_rate": 0.00018969483707177235, + "loss": 2.7341, + "step": 3001 + }, + { + "epoch": 0.24227261722217738, + "grad_norm": 0.744153618812561, + "learning_rate": 0.00018968785600936855, + "loss": 2.678, + "step": 3002 + }, + { + "epoch": 0.2423533209587604, + "grad_norm": 0.7264240384101868, + "learning_rate": 0.0001896808727117089, + "loss": 2.7321, + "step": 3003 + }, + { + "epoch": 0.2424340246953434, + "grad_norm": 0.8214067220687866, + "learning_rate": 0.00018967388717896748, + "loss": 2.7311, + "step": 3004 + }, + { + "epoch": 0.2425147284319264, + "grad_norm": 0.7871330976486206, + "learning_rate": 0.00018966689941131838, + "loss": 2.7184, + "step": 3005 + }, + { + "epoch": 0.2425954321685094, + "grad_norm": 0.7301360964775085, + "learning_rate": 0.00018965990940893575, + "loss": 2.7039, + "step": 3006 + }, + { + "epoch": 0.2426761359050924, + "grad_norm": 0.8290385603904724, + "learning_rate": 0.00018965291717199382, + "loss": 2.7848, + "step": 3007 + }, + { + "epoch": 0.2427568396416754, + "grad_norm": 0.7465909123420715, + "learning_rate": 0.00018964592270066683, + "loss": 2.7271, + "step": 3008 + }, + { + "epoch": 0.2428375433782584, + "grad_norm": 0.7992933988571167, + "learning_rate": 0.00018963892599512913, + "loss": 2.7749, + "step": 3009 + }, + { + "epoch": 0.24291824711484142, + "grad_norm": 0.7879100441932678, + "learning_rate": 0.00018963192705555507, + "loss": 2.6844, + "step": 3010 + }, + { + "epoch": 0.24299895085142442, + "grad_norm": 0.7895401120185852, + "learning_rate": 0.00018962492588211905, + "loss": 2.725, + "step": 3011 + }, + { + "epoch": 0.24307965458800743, + "grad_norm": 0.7699374556541443, + "learning_rate": 0.00018961792247499564, + "loss": 2.7408, + "step": 3012 + }, + { + "epoch": 0.24316035832459043, + "grad_norm": 0.828372597694397, + "learning_rate": 0.0001896109168343593, + "loss": 2.7527, + "step": 3013 + }, + { + "epoch": 0.24324106206117344, + "grad_norm": 0.7611951231956482, + "learning_rate": 0.0001896039089603847, + "loss": 2.7294, + "step": 3014 + }, + { + "epoch": 0.24332176579775644, + "grad_norm": 0.8214892148971558, + "learning_rate": 0.00018959689885324646, + "loss": 2.6931, + "step": 3015 + }, + { + "epoch": 0.24340246953433944, + "grad_norm": 0.7472538352012634, + "learning_rate": 0.00018958988651311928, + "loss": 2.7316, + "step": 3016 + }, + { + "epoch": 0.24348317327092245, + "grad_norm": 0.7574933171272278, + "learning_rate": 0.00018958287194017795, + "loss": 2.7764, + "step": 3017 + }, + { + "epoch": 0.24356387700750545, + "grad_norm": 0.739152729511261, + "learning_rate": 0.00018957585513459723, + "loss": 2.7949, + "step": 3018 + }, + { + "epoch": 0.24364458074408846, + "grad_norm": 0.824097752571106, + "learning_rate": 0.00018956883609655208, + "loss": 2.6612, + "step": 3019 + }, + { + "epoch": 0.24372528448067146, + "grad_norm": 0.7891144156455994, + "learning_rate": 0.00018956181482621744, + "loss": 2.7139, + "step": 3020 + }, + { + "epoch": 0.24380598821725447, + "grad_norm": 0.7364415526390076, + "learning_rate": 0.0001895547913237682, + "loss": 2.6984, + "step": 3021 + }, + { + "epoch": 0.24388669195383747, + "grad_norm": 0.7631362080574036, + "learning_rate": 0.0001895477655893795, + "loss": 2.7015, + "step": 3022 + }, + { + "epoch": 0.24396739569042047, + "grad_norm": 0.780541181564331, + "learning_rate": 0.00018954073762322637, + "loss": 2.7716, + "step": 3023 + }, + { + "epoch": 0.24404809942700348, + "grad_norm": 0.7877349853515625, + "learning_rate": 0.00018953370742548403, + "loss": 2.6654, + "step": 3024 + }, + { + "epoch": 0.24412880316358648, + "grad_norm": 0.7786216139793396, + "learning_rate": 0.00018952667499632763, + "loss": 2.7491, + "step": 3025 + }, + { + "epoch": 0.2442095069001695, + "grad_norm": 0.8207663893699646, + "learning_rate": 0.00018951964033593247, + "loss": 2.7212, + "step": 3026 + }, + { + "epoch": 0.2442902106367525, + "grad_norm": 0.8271831274032593, + "learning_rate": 0.00018951260344447386, + "loss": 2.7456, + "step": 3027 + }, + { + "epoch": 0.2443709143733355, + "grad_norm": 0.7610505819320679, + "learning_rate": 0.00018950556432212722, + "loss": 2.7472, + "step": 3028 + }, + { + "epoch": 0.2444516181099185, + "grad_norm": 0.7521701455116272, + "learning_rate": 0.00018949852296906792, + "loss": 2.7263, + "step": 3029 + }, + { + "epoch": 0.2445323218465015, + "grad_norm": 0.7518337965011597, + "learning_rate": 0.00018949147938547144, + "loss": 2.7069, + "step": 3030 + }, + { + "epoch": 0.2446130255830845, + "grad_norm": 0.7823107838630676, + "learning_rate": 0.00018948443357151343, + "loss": 2.7858, + "step": 3031 + }, + { + "epoch": 0.2446937293196675, + "grad_norm": 0.733132004737854, + "learning_rate": 0.00018947738552736938, + "loss": 2.7194, + "step": 3032 + }, + { + "epoch": 0.24477443305625052, + "grad_norm": 0.7756488919258118, + "learning_rate": 0.00018947033525321501, + "loss": 2.7299, + "step": 3033 + }, + { + "epoch": 0.24485513679283352, + "grad_norm": 0.7971112728118896, + "learning_rate": 0.00018946328274922598, + "loss": 2.7474, + "step": 3034 + }, + { + "epoch": 0.2449358405294165, + "grad_norm": 0.7871260643005371, + "learning_rate": 0.0001894562280155781, + "loss": 2.6994, + "step": 3035 + }, + { + "epoch": 0.2450165442659995, + "grad_norm": 0.7431116104125977, + "learning_rate": 0.00018944917105244717, + "loss": 2.6834, + "step": 3036 + }, + { + "epoch": 0.2450972480025825, + "grad_norm": 0.7372273206710815, + "learning_rate": 0.00018944211186000906, + "loss": 2.6988, + "step": 3037 + }, + { + "epoch": 0.2451779517391655, + "grad_norm": 0.8161508440971375, + "learning_rate": 0.00018943505043843975, + "loss": 2.7595, + "step": 3038 + }, + { + "epoch": 0.24525865547574852, + "grad_norm": 0.8062586784362793, + "learning_rate": 0.00018942798678791518, + "loss": 2.6893, + "step": 3039 + }, + { + "epoch": 0.24533935921233152, + "grad_norm": 0.824023425579071, + "learning_rate": 0.0001894209209086114, + "loss": 2.7188, + "step": 3040 + }, + { + "epoch": 0.24542006294891452, + "grad_norm": 0.740466833114624, + "learning_rate": 0.00018941385280070455, + "loss": 2.674, + "step": 3041 + }, + { + "epoch": 0.24550076668549753, + "grad_norm": 0.8543577194213867, + "learning_rate": 0.00018940678246437073, + "loss": 2.7423, + "step": 3042 + }, + { + "epoch": 0.24558147042208053, + "grad_norm": 0.7059324979782104, + "learning_rate": 0.0001893997098997862, + "loss": 2.6669, + "step": 3043 + }, + { + "epoch": 0.24566217415866354, + "grad_norm": 0.7739956974983215, + "learning_rate": 0.00018939263510712721, + "loss": 2.7118, + "step": 3044 + }, + { + "epoch": 0.24574287789524654, + "grad_norm": 0.7701205611228943, + "learning_rate": 0.00018938555808657007, + "loss": 2.7653, + "step": 3045 + }, + { + "epoch": 0.24582358163182955, + "grad_norm": 0.7243000864982605, + "learning_rate": 0.00018937847883829115, + "loss": 2.6789, + "step": 3046 + }, + { + "epoch": 0.24590428536841255, + "grad_norm": 0.7645598649978638, + "learning_rate": 0.00018937139736246693, + "loss": 2.7108, + "step": 3047 + }, + { + "epoch": 0.24598498910499556, + "grad_norm": 0.7544745802879333, + "learning_rate": 0.00018936431365927385, + "loss": 2.6958, + "step": 3048 + }, + { + "epoch": 0.24606569284157856, + "grad_norm": 0.709282398223877, + "learning_rate": 0.00018935722772888848, + "loss": 2.6728, + "step": 3049 + }, + { + "epoch": 0.24614639657816156, + "grad_norm": 0.7524243593215942, + "learning_rate": 0.00018935013957148742, + "loss": 2.7283, + "step": 3050 + }, + { + "epoch": 0.24622710031474457, + "grad_norm": 0.7959655523300171, + "learning_rate": 0.0001893430491872473, + "loss": 2.7384, + "step": 3051 + }, + { + "epoch": 0.24630780405132757, + "grad_norm": 0.7252553105354309, + "learning_rate": 0.00018933595657634486, + "loss": 2.7226, + "step": 3052 + }, + { + "epoch": 0.24638850778791058, + "grad_norm": 0.7387316226959229, + "learning_rate": 0.00018932886173895686, + "loss": 2.7546, + "step": 3053 + }, + { + "epoch": 0.24646921152449358, + "grad_norm": 0.804856538772583, + "learning_rate": 0.0001893217646752601, + "loss": 2.7321, + "step": 3054 + }, + { + "epoch": 0.24654991526107659, + "grad_norm": 0.6929069757461548, + "learning_rate": 0.0001893146653854315, + "loss": 2.6735, + "step": 3055 + }, + { + "epoch": 0.2466306189976596, + "grad_norm": 0.7076159715652466, + "learning_rate": 0.00018930756386964794, + "loss": 2.7368, + "step": 3056 + }, + { + "epoch": 0.2467113227342426, + "grad_norm": 0.7522851228713989, + "learning_rate": 0.00018930046012808648, + "loss": 2.7448, + "step": 3057 + }, + { + "epoch": 0.2467920264708256, + "grad_norm": 0.8347200155258179, + "learning_rate": 0.00018929335416092408, + "loss": 2.6837, + "step": 3058 + }, + { + "epoch": 0.2468727302074086, + "grad_norm": 0.737503707408905, + "learning_rate": 0.00018928624596833786, + "loss": 2.693, + "step": 3059 + }, + { + "epoch": 0.2469534339439916, + "grad_norm": 0.7836787104606628, + "learning_rate": 0.00018927913555050503, + "loss": 2.7335, + "step": 3060 + }, + { + "epoch": 0.2470341376805746, + "grad_norm": 0.7823840975761414, + "learning_rate": 0.00018927202290760278, + "loss": 2.6736, + "step": 3061 + }, + { + "epoch": 0.24711484141715762, + "grad_norm": 0.7894529700279236, + "learning_rate": 0.00018926490803980833, + "loss": 2.7112, + "step": 3062 + }, + { + "epoch": 0.24719554515374062, + "grad_norm": 0.8289024829864502, + "learning_rate": 0.000189257790947299, + "loss": 2.7667, + "step": 3063 + }, + { + "epoch": 0.24727624889032362, + "grad_norm": 0.70560222864151, + "learning_rate": 0.00018925067163025227, + "loss": 2.6946, + "step": 3064 + }, + { + "epoch": 0.24735695262690663, + "grad_norm": 0.6954196095466614, + "learning_rate": 0.00018924355008884548, + "loss": 2.7237, + "step": 3065 + }, + { + "epoch": 0.24743765636348963, + "grad_norm": 0.7975121736526489, + "learning_rate": 0.0001892364263232561, + "loss": 2.6392, + "step": 3066 + }, + { + "epoch": 0.24751836010007264, + "grad_norm": 0.777350902557373, + "learning_rate": 0.00018922930033366174, + "loss": 2.7284, + "step": 3067 + }, + { + "epoch": 0.24759906383665564, + "grad_norm": 0.738240659236908, + "learning_rate": 0.00018922217212023995, + "loss": 2.6884, + "step": 3068 + }, + { + "epoch": 0.24767976757323865, + "grad_norm": 0.8077268600463867, + "learning_rate": 0.0001892150416831684, + "loss": 2.7205, + "step": 3069 + }, + { + "epoch": 0.24776047130982165, + "grad_norm": 0.8108188509941101, + "learning_rate": 0.00018920790902262483, + "loss": 2.7592, + "step": 3070 + }, + { + "epoch": 0.24784117504640465, + "grad_norm": 0.7842642664909363, + "learning_rate": 0.00018920077413878695, + "loss": 2.7474, + "step": 3071 + }, + { + "epoch": 0.24792187878298766, + "grad_norm": 0.7644543051719666, + "learning_rate": 0.0001891936370318326, + "loss": 2.7179, + "step": 3072 + }, + { + "epoch": 0.24800258251957066, + "grad_norm": 0.7761854529380798, + "learning_rate": 0.00018918649770193965, + "loss": 2.71, + "step": 3073 + }, + { + "epoch": 0.24808328625615367, + "grad_norm": 0.7724074125289917, + "learning_rate": 0.00018917935614928607, + "loss": 2.7359, + "step": 3074 + }, + { + "epoch": 0.24816398999273667, + "grad_norm": 0.7360609173774719, + "learning_rate": 0.0001891722123740498, + "loss": 2.7342, + "step": 3075 + }, + { + "epoch": 0.24824469372931968, + "grad_norm": 0.757561206817627, + "learning_rate": 0.00018916506637640894, + "loss": 2.7647, + "step": 3076 + }, + { + "epoch": 0.24832539746590268, + "grad_norm": 0.7180947065353394, + "learning_rate": 0.00018915791815654148, + "loss": 2.6771, + "step": 3077 + }, + { + "epoch": 0.24840610120248569, + "grad_norm": 0.7219653129577637, + "learning_rate": 0.0001891507677146257, + "loss": 2.7772, + "step": 3078 + }, + { + "epoch": 0.2484868049390687, + "grad_norm": 0.749113917350769, + "learning_rate": 0.0001891436150508397, + "loss": 2.6996, + "step": 3079 + }, + { + "epoch": 0.2485675086756517, + "grad_norm": 0.766180157661438, + "learning_rate": 0.00018913646016536183, + "loss": 2.7896, + "step": 3080 + }, + { + "epoch": 0.2486482124122347, + "grad_norm": 0.7672411799430847, + "learning_rate": 0.00018912930305837032, + "loss": 2.7307, + "step": 3081 + }, + { + "epoch": 0.2487289161488177, + "grad_norm": 0.7639018297195435, + "learning_rate": 0.00018912214373004364, + "loss": 2.6569, + "step": 3082 + }, + { + "epoch": 0.2488096198854007, + "grad_norm": 0.8935483694076538, + "learning_rate": 0.00018911498218056013, + "loss": 2.6897, + "step": 3083 + }, + { + "epoch": 0.2488903236219837, + "grad_norm": 0.8506368398666382, + "learning_rate": 0.00018910781841009836, + "loss": 2.778, + "step": 3084 + }, + { + "epoch": 0.24897102735856672, + "grad_norm": 0.8026999235153198, + "learning_rate": 0.0001891006524188368, + "loss": 2.7799, + "step": 3085 + }, + { + "epoch": 0.2490517310951497, + "grad_norm": 0.784637987613678, + "learning_rate": 0.00018909348420695406, + "loss": 2.673, + "step": 3086 + }, + { + "epoch": 0.2491324348317327, + "grad_norm": 0.8949337601661682, + "learning_rate": 0.00018908631377462882, + "loss": 2.7726, + "step": 3087 + }, + { + "epoch": 0.2492131385683157, + "grad_norm": 0.73841792345047, + "learning_rate": 0.00018907914112203974, + "loss": 2.7403, + "step": 3088 + }, + { + "epoch": 0.2492938423048987, + "grad_norm": 0.7305924296379089, + "learning_rate": 0.00018907196624936564, + "loss": 2.6713, + "step": 3089 + }, + { + "epoch": 0.2493745460414817, + "grad_norm": 0.7707394361495972, + "learning_rate": 0.0001890647891567853, + "loss": 2.7306, + "step": 3090 + }, + { + "epoch": 0.2494552497780647, + "grad_norm": 0.8691473603248596, + "learning_rate": 0.00018905760984447759, + "loss": 2.6775, + "step": 3091 + }, + { + "epoch": 0.24953595351464772, + "grad_norm": 0.7466028332710266, + "learning_rate": 0.00018905042831262144, + "loss": 2.7196, + "step": 3092 + }, + { + "epoch": 0.24961665725123072, + "grad_norm": 0.7785150408744812, + "learning_rate": 0.0001890432445613958, + "loss": 2.7099, + "step": 3093 + }, + { + "epoch": 0.24969736098781373, + "grad_norm": 0.7775028347969055, + "learning_rate": 0.0001890360585909798, + "loss": 2.698, + "step": 3094 + }, + { + "epoch": 0.24977806472439673, + "grad_norm": 0.829257071018219, + "learning_rate": 0.00018902887040155245, + "loss": 2.711, + "step": 3095 + }, + { + "epoch": 0.24985876846097974, + "grad_norm": 0.8492234945297241, + "learning_rate": 0.00018902167999329295, + "loss": 2.7164, + "step": 3096 + }, + { + "epoch": 0.24993947219756274, + "grad_norm": 0.7332174777984619, + "learning_rate": 0.00018901448736638045, + "loss": 2.6925, + "step": 3097 + }, + { + "epoch": 0.25002017593414577, + "grad_norm": 0.7494251728057861, + "learning_rate": 0.00018900729252099426, + "loss": 2.6899, + "step": 3098 + }, + { + "epoch": 0.25010087967072875, + "grad_norm": 0.7760747075080872, + "learning_rate": 0.00018900009545731367, + "loss": 2.6626, + "step": 3099 + }, + { + "epoch": 0.2501815834073118, + "grad_norm": 0.7270001173019409, + "learning_rate": 0.00018899289617551804, + "loss": 2.7338, + "step": 3100 + }, + { + "epoch": 0.25026228714389476, + "grad_norm": 0.7832693457603455, + "learning_rate": 0.0001889856946757868, + "loss": 2.6668, + "step": 3101 + }, + { + "epoch": 0.2503429908804778, + "grad_norm": 0.8833239674568176, + "learning_rate": 0.00018897849095829945, + "loss": 2.7219, + "step": 3102 + }, + { + "epoch": 0.25042369461706077, + "grad_norm": 0.8144814372062683, + "learning_rate": 0.0001889712850232355, + "loss": 2.724, + "step": 3103 + }, + { + "epoch": 0.2505043983536438, + "grad_norm": 0.9466180801391602, + "learning_rate": 0.0001889640768707746, + "loss": 2.7499, + "step": 3104 + }, + { + "epoch": 0.2505851020902268, + "grad_norm": 0.926292359828949, + "learning_rate": 0.00018895686650109632, + "loss": 2.7391, + "step": 3105 + }, + { + "epoch": 0.2506658058268098, + "grad_norm": 0.8214002251625061, + "learning_rate": 0.00018894965391438038, + "loss": 2.7546, + "step": 3106 + }, + { + "epoch": 0.2507465095633928, + "grad_norm": 0.9021030068397522, + "learning_rate": 0.00018894243911080655, + "loss": 2.7188, + "step": 3107 + }, + { + "epoch": 0.2508272132999758, + "grad_norm": 0.778366208076477, + "learning_rate": 0.00018893522209055465, + "loss": 2.7852, + "step": 3108 + }, + { + "epoch": 0.2509079170365588, + "grad_norm": 0.8780209422111511, + "learning_rate": 0.00018892800285380456, + "loss": 2.7344, + "step": 3109 + }, + { + "epoch": 0.2509886207731418, + "grad_norm": 0.7581839561462402, + "learning_rate": 0.00018892078140073614, + "loss": 2.6697, + "step": 3110 + }, + { + "epoch": 0.2510693245097248, + "grad_norm": 0.7818635702133179, + "learning_rate": 0.00018891355773152944, + "loss": 2.6969, + "step": 3111 + }, + { + "epoch": 0.2511500282463078, + "grad_norm": 0.7528424859046936, + "learning_rate": 0.0001889063318463644, + "loss": 2.7359, + "step": 3112 + }, + { + "epoch": 0.2512307319828908, + "grad_norm": 0.8274288773536682, + "learning_rate": 0.0001888991037454212, + "loss": 2.7124, + "step": 3113 + }, + { + "epoch": 0.2513114357194738, + "grad_norm": 0.7186813354492188, + "learning_rate": 0.00018889187342888, + "loss": 2.7037, + "step": 3114 + }, + { + "epoch": 0.2513921394560568, + "grad_norm": 0.7458071112632751, + "learning_rate": 0.00018888464089692088, + "loss": 2.7178, + "step": 3115 + }, + { + "epoch": 0.2514728431926398, + "grad_norm": 0.7814257740974426, + "learning_rate": 0.00018887740614972418, + "loss": 2.7554, + "step": 3116 + }, + { + "epoch": 0.2515535469292228, + "grad_norm": 0.7706831097602844, + "learning_rate": 0.0001888701691874702, + "loss": 2.7441, + "step": 3117 + }, + { + "epoch": 0.2516342506658058, + "grad_norm": 0.8177775740623474, + "learning_rate": 0.0001888629300103393, + "loss": 2.7257, + "step": 3118 + }, + { + "epoch": 0.25171495440238884, + "grad_norm": 0.791097104549408, + "learning_rate": 0.00018885568861851188, + "loss": 2.6937, + "step": 3119 + }, + { + "epoch": 0.2517956581389718, + "grad_norm": 0.7521430850028992, + "learning_rate": 0.00018884844501216845, + "loss": 2.7723, + "step": 3120 + }, + { + "epoch": 0.25187636187555484, + "grad_norm": 0.8119359016418457, + "learning_rate": 0.00018884119919148948, + "loss": 2.7573, + "step": 3121 + }, + { + "epoch": 0.2519570656121378, + "grad_norm": 0.7579830288887024, + "learning_rate": 0.00018883395115665562, + "loss": 2.6943, + "step": 3122 + }, + { + "epoch": 0.25203776934872085, + "grad_norm": 0.7718791365623474, + "learning_rate": 0.00018882670090784748, + "loss": 2.6911, + "step": 3123 + }, + { + "epoch": 0.25211847308530383, + "grad_norm": 0.7718087434768677, + "learning_rate": 0.00018881944844524576, + "loss": 2.7505, + "step": 3124 + }, + { + "epoch": 0.25219917682188686, + "grad_norm": 0.7696875333786011, + "learning_rate": 0.0001888121937690312, + "loss": 2.7272, + "step": 3125 + }, + { + "epoch": 0.25227988055846984, + "grad_norm": 0.8082131743431091, + "learning_rate": 0.00018880493687938464, + "loss": 2.6677, + "step": 3126 + }, + { + "epoch": 0.25236058429505287, + "grad_norm": 0.857224702835083, + "learning_rate": 0.00018879767777648686, + "loss": 2.7237, + "step": 3127 + }, + { + "epoch": 0.25244128803163585, + "grad_norm": 0.8135749697685242, + "learning_rate": 0.00018879041646051886, + "loss": 2.7298, + "step": 3128 + }, + { + "epoch": 0.2525219917682189, + "grad_norm": 0.7772457003593445, + "learning_rate": 0.0001887831529316616, + "loss": 2.7723, + "step": 3129 + }, + { + "epoch": 0.25260269550480186, + "grad_norm": 0.795555055141449, + "learning_rate": 0.00018877588719009607, + "loss": 2.7207, + "step": 3130 + }, + { + "epoch": 0.2526833992413849, + "grad_norm": 0.7677939534187317, + "learning_rate": 0.00018876861923600337, + "loss": 2.6649, + "step": 3131 + }, + { + "epoch": 0.25276410297796786, + "grad_norm": 0.7706151008605957, + "learning_rate": 0.00018876134906956464, + "loss": 2.7154, + "step": 3132 + }, + { + "epoch": 0.2528448067145509, + "grad_norm": 0.8230584859848022, + "learning_rate": 0.00018875407669096105, + "loss": 2.7871, + "step": 3133 + }, + { + "epoch": 0.2529255104511339, + "grad_norm": 0.7037158608436584, + "learning_rate": 0.0001887468021003739, + "loss": 2.669, + "step": 3134 + }, + { + "epoch": 0.2530062141877169, + "grad_norm": 0.8485400080680847, + "learning_rate": 0.00018873952529798441, + "loss": 2.7517, + "step": 3135 + }, + { + "epoch": 0.2530869179242999, + "grad_norm": 0.7803399562835693, + "learning_rate": 0.000188732246283974, + "loss": 2.6987, + "step": 3136 + }, + { + "epoch": 0.2531676216608829, + "grad_norm": 0.7884016633033752, + "learning_rate": 0.0001887249650585241, + "loss": 2.7348, + "step": 3137 + }, + { + "epoch": 0.2532483253974659, + "grad_norm": 0.7794530987739563, + "learning_rate": 0.0001887176816218161, + "loss": 2.6934, + "step": 3138 + }, + { + "epoch": 0.2533290291340489, + "grad_norm": 0.7905173301696777, + "learning_rate": 0.00018871039597403156, + "loss": 2.714, + "step": 3139 + }, + { + "epoch": 0.2534097328706319, + "grad_norm": 0.7857949137687683, + "learning_rate": 0.0001887031081153521, + "loss": 2.7591, + "step": 3140 + }, + { + "epoch": 0.25349043660721493, + "grad_norm": 0.8602419495582581, + "learning_rate": 0.00018869581804595927, + "loss": 2.7819, + "step": 3141 + }, + { + "epoch": 0.2535711403437979, + "grad_norm": 0.7845202088356018, + "learning_rate": 0.00018868852576603483, + "loss": 2.6796, + "step": 3142 + }, + { + "epoch": 0.25365184408038094, + "grad_norm": 0.7600612640380859, + "learning_rate": 0.00018868123127576048, + "loss": 2.6785, + "step": 3143 + }, + { + "epoch": 0.2537325478169639, + "grad_norm": 0.7731521725654602, + "learning_rate": 0.000188673934575318, + "loss": 2.7435, + "step": 3144 + }, + { + "epoch": 0.25381325155354695, + "grad_norm": 0.8214225172996521, + "learning_rate": 0.0001886666356648893, + "loss": 2.7264, + "step": 3145 + }, + { + "epoch": 0.2538939552901299, + "grad_norm": 0.7623010277748108, + "learning_rate": 0.00018865933454465628, + "loss": 2.73, + "step": 3146 + }, + { + "epoch": 0.25397465902671296, + "grad_norm": 0.7864633798599243, + "learning_rate": 0.00018865203121480088, + "loss": 2.7654, + "step": 3147 + }, + { + "epoch": 0.25405536276329593, + "grad_norm": 0.7654051780700684, + "learning_rate": 0.0001886447256755051, + "loss": 2.7171, + "step": 3148 + }, + { + "epoch": 0.25413606649987897, + "grad_norm": 0.8045486211776733, + "learning_rate": 0.0001886374179269511, + "loss": 2.7385, + "step": 3149 + }, + { + "epoch": 0.25421677023646194, + "grad_norm": 0.8504971861839294, + "learning_rate": 0.0001886301079693209, + "loss": 2.6719, + "step": 3150 + }, + { + "epoch": 0.254297473973045, + "grad_norm": 0.771538496017456, + "learning_rate": 0.0001886227958027967, + "loss": 2.6707, + "step": 3151 + }, + { + "epoch": 0.25437817770962795, + "grad_norm": 0.8472220301628113, + "learning_rate": 0.0001886154814275608, + "loss": 2.7201, + "step": 3152 + }, + { + "epoch": 0.254458881446211, + "grad_norm": 0.7639158368110657, + "learning_rate": 0.00018860816484379545, + "loss": 2.76, + "step": 3153 + }, + { + "epoch": 0.25453958518279396, + "grad_norm": 0.8042064905166626, + "learning_rate": 0.000188600846051683, + "loss": 2.6862, + "step": 3154 + }, + { + "epoch": 0.254620288919377, + "grad_norm": 0.7481087446212769, + "learning_rate": 0.0001885935250514059, + "loss": 2.7394, + "step": 3155 + }, + { + "epoch": 0.25470099265595997, + "grad_norm": 0.7826097011566162, + "learning_rate": 0.00018858620184314653, + "loss": 2.596, + "step": 3156 + }, + { + "epoch": 0.254781696392543, + "grad_norm": 0.7477610111236572, + "learning_rate": 0.00018857887642708743, + "loss": 2.7385, + "step": 3157 + }, + { + "epoch": 0.254862400129126, + "grad_norm": 0.7347466945648193, + "learning_rate": 0.00018857154880341122, + "loss": 2.722, + "step": 3158 + }, + { + "epoch": 0.254943103865709, + "grad_norm": 0.7853806018829346, + "learning_rate": 0.00018856421897230048, + "loss": 2.7675, + "step": 3159 + }, + { + "epoch": 0.255023807602292, + "grad_norm": 0.7497034072875977, + "learning_rate": 0.0001885568869339379, + "loss": 2.6882, + "step": 3160 + }, + { + "epoch": 0.255104511338875, + "grad_norm": 0.7932263612747192, + "learning_rate": 0.0001885495526885062, + "loss": 2.7938, + "step": 3161 + }, + { + "epoch": 0.255185215075458, + "grad_norm": 0.7776823043823242, + "learning_rate": 0.00018854221623618815, + "loss": 2.6955, + "step": 3162 + }, + { + "epoch": 0.25526591881204097, + "grad_norm": 0.7564878463745117, + "learning_rate": 0.00018853487757716666, + "loss": 2.7644, + "step": 3163 + }, + { + "epoch": 0.255346622548624, + "grad_norm": 0.836270809173584, + "learning_rate": 0.00018852753671162454, + "loss": 2.7119, + "step": 3164 + }, + { + "epoch": 0.255427326285207, + "grad_norm": 0.7540388703346252, + "learning_rate": 0.00018852019363974485, + "loss": 2.797, + "step": 3165 + }, + { + "epoch": 0.25550803002179, + "grad_norm": 0.7943860292434692, + "learning_rate": 0.0001885128483617105, + "loss": 2.7973, + "step": 3166 + }, + { + "epoch": 0.255588733758373, + "grad_norm": 0.7743831276893616, + "learning_rate": 0.00018850550087770463, + "loss": 2.7403, + "step": 3167 + }, + { + "epoch": 0.255669437494956, + "grad_norm": 0.7593801021575928, + "learning_rate": 0.00018849815118791028, + "loss": 2.7203, + "step": 3168 + }, + { + "epoch": 0.255750141231539, + "grad_norm": 0.7663586139678955, + "learning_rate": 0.00018849079929251068, + "loss": 2.7481, + "step": 3169 + }, + { + "epoch": 0.25583084496812203, + "grad_norm": 0.7218170166015625, + "learning_rate": 0.00018848344519168905, + "loss": 2.6698, + "step": 3170 + }, + { + "epoch": 0.255911548704705, + "grad_norm": 0.8374441266059875, + "learning_rate": 0.00018847608888562868, + "loss": 2.8121, + "step": 3171 + }, + { + "epoch": 0.25599225244128804, + "grad_norm": 0.7488373517990112, + "learning_rate": 0.00018846873037451286, + "loss": 2.6871, + "step": 3172 + }, + { + "epoch": 0.256072956177871, + "grad_norm": 0.7513325810432434, + "learning_rate": 0.00018846136965852505, + "loss": 2.6924, + "step": 3173 + }, + { + "epoch": 0.25615365991445405, + "grad_norm": 0.7467690706253052, + "learning_rate": 0.00018845400673784865, + "loss": 2.714, + "step": 3174 + }, + { + "epoch": 0.256234363651037, + "grad_norm": 0.7717954516410828, + "learning_rate": 0.0001884466416126672, + "loss": 2.6679, + "step": 3175 + }, + { + "epoch": 0.25631506738762005, + "grad_norm": 0.7086547613143921, + "learning_rate": 0.0001884392742831642, + "loss": 2.7046, + "step": 3176 + }, + { + "epoch": 0.25639577112420303, + "grad_norm": 0.7024885416030884, + "learning_rate": 0.00018843190474952337, + "loss": 2.6724, + "step": 3177 + }, + { + "epoch": 0.25647647486078606, + "grad_norm": 0.8376390933990479, + "learning_rate": 0.00018842453301192827, + "loss": 2.7818, + "step": 3178 + }, + { + "epoch": 0.25655717859736904, + "grad_norm": 0.8190221190452576, + "learning_rate": 0.00018841715907056265, + "loss": 2.7455, + "step": 3179 + }, + { + "epoch": 0.25663788233395207, + "grad_norm": 0.8029047846794128, + "learning_rate": 0.0001884097829256103, + "loss": 2.7102, + "step": 3180 + }, + { + "epoch": 0.25671858607053505, + "grad_norm": 0.7467923760414124, + "learning_rate": 0.00018840240457725508, + "loss": 2.7051, + "step": 3181 + }, + { + "epoch": 0.2567992898071181, + "grad_norm": 0.7850394248962402, + "learning_rate": 0.00018839502402568086, + "loss": 2.6826, + "step": 3182 + }, + { + "epoch": 0.25687999354370106, + "grad_norm": 0.7144927978515625, + "learning_rate": 0.00018838764127107155, + "loss": 2.6694, + "step": 3183 + }, + { + "epoch": 0.2569606972802841, + "grad_norm": 0.7580311894416809, + "learning_rate": 0.0001883802563136112, + "loss": 2.7191, + "step": 3184 + }, + { + "epoch": 0.25704140101686707, + "grad_norm": 0.7366482615470886, + "learning_rate": 0.0001883728691534838, + "loss": 2.7175, + "step": 3185 + }, + { + "epoch": 0.2571221047534501, + "grad_norm": 0.6961715817451477, + "learning_rate": 0.0001883654797908735, + "loss": 2.7705, + "step": 3186 + }, + { + "epoch": 0.2572028084900331, + "grad_norm": 0.7473716735839844, + "learning_rate": 0.00018835808822596445, + "loss": 2.707, + "step": 3187 + }, + { + "epoch": 0.2572835122266161, + "grad_norm": 0.8376151919364929, + "learning_rate": 0.00018835069445894087, + "loss": 2.7424, + "step": 3188 + }, + { + "epoch": 0.2573642159631991, + "grad_norm": 0.7950237393379211, + "learning_rate": 0.00018834329848998706, + "loss": 2.7593, + "step": 3189 + }, + { + "epoch": 0.2574449196997821, + "grad_norm": 0.7637122869491577, + "learning_rate": 0.0001883359003192873, + "loss": 2.6708, + "step": 3190 + }, + { + "epoch": 0.2575256234363651, + "grad_norm": 0.709516704082489, + "learning_rate": 0.00018832849994702597, + "loss": 2.6988, + "step": 3191 + }, + { + "epoch": 0.2576063271729481, + "grad_norm": 0.7465435266494751, + "learning_rate": 0.00018832109737338757, + "loss": 2.7183, + "step": 3192 + }, + { + "epoch": 0.2576870309095311, + "grad_norm": 0.7619186043739319, + "learning_rate": 0.00018831369259855653, + "loss": 2.6833, + "step": 3193 + }, + { + "epoch": 0.25776773464611413, + "grad_norm": 0.7501961588859558, + "learning_rate": 0.0001883062856227174, + "loss": 2.725, + "step": 3194 + }, + { + "epoch": 0.2578484383826971, + "grad_norm": 0.7720133066177368, + "learning_rate": 0.00018829887644605483, + "loss": 2.7988, + "step": 3195 + }, + { + "epoch": 0.25792914211928014, + "grad_norm": 0.7253942489624023, + "learning_rate": 0.00018829146506875344, + "loss": 2.6999, + "step": 3196 + }, + { + "epoch": 0.2580098458558631, + "grad_norm": 0.7759599685668945, + "learning_rate": 0.00018828405149099792, + "loss": 2.6831, + "step": 3197 + }, + { + "epoch": 0.25809054959244615, + "grad_norm": 0.7250547409057617, + "learning_rate": 0.0001882766357129731, + "loss": 2.6742, + "step": 3198 + }, + { + "epoch": 0.2581712533290291, + "grad_norm": 0.7565183043479919, + "learning_rate": 0.00018826921773486372, + "loss": 2.6777, + "step": 3199 + }, + { + "epoch": 0.25825195706561216, + "grad_norm": 0.7183675169944763, + "learning_rate": 0.0001882617975568547, + "loss": 2.6743, + "step": 3200 + }, + { + "epoch": 0.25833266080219514, + "grad_norm": 0.7021663784980774, + "learning_rate": 0.00018825437517913098, + "loss": 2.727, + "step": 3201 + }, + { + "epoch": 0.25841336453877817, + "grad_norm": 0.7406932711601257, + "learning_rate": 0.00018824695060187753, + "loss": 2.7448, + "step": 3202 + }, + { + "epoch": 0.25849406827536114, + "grad_norm": 0.7766773104667664, + "learning_rate": 0.0001882395238252794, + "loss": 2.69, + "step": 3203 + }, + { + "epoch": 0.2585747720119442, + "grad_norm": 0.7483372688293457, + "learning_rate": 0.00018823209484952164, + "loss": 2.6611, + "step": 3204 + }, + { + "epoch": 0.25865547574852715, + "grad_norm": 0.781831681728363, + "learning_rate": 0.0001882246636747895, + "loss": 2.7292, + "step": 3205 + }, + { + "epoch": 0.2587361794851102, + "grad_norm": 0.7188203930854797, + "learning_rate": 0.00018821723030126806, + "loss": 2.718, + "step": 3206 + }, + { + "epoch": 0.25881688322169316, + "grad_norm": 0.7332054972648621, + "learning_rate": 0.00018820979472914263, + "loss": 2.6492, + "step": 3207 + }, + { + "epoch": 0.2588975869582762, + "grad_norm": 0.7044041156768799, + "learning_rate": 0.00018820235695859858, + "loss": 2.7047, + "step": 3208 + }, + { + "epoch": 0.25897829069485917, + "grad_norm": 0.8651862740516663, + "learning_rate": 0.00018819491698982121, + "loss": 2.6301, + "step": 3209 + }, + { + "epoch": 0.2590589944314422, + "grad_norm": 0.8118106126785278, + "learning_rate": 0.00018818747482299598, + "loss": 2.6522, + "step": 3210 + }, + { + "epoch": 0.2591396981680252, + "grad_norm": 0.7239218354225159, + "learning_rate": 0.00018818003045830832, + "loss": 2.7058, + "step": 3211 + }, + { + "epoch": 0.2592204019046082, + "grad_norm": 0.8557687997817993, + "learning_rate": 0.00018817258389594382, + "loss": 2.7125, + "step": 3212 + }, + { + "epoch": 0.2593011056411912, + "grad_norm": 0.7685148119926453, + "learning_rate": 0.00018816513513608801, + "loss": 2.7516, + "step": 3213 + }, + { + "epoch": 0.25938180937777416, + "grad_norm": 0.7497698664665222, + "learning_rate": 0.00018815768417892664, + "loss": 2.6536, + "step": 3214 + }, + { + "epoch": 0.2594625131143572, + "grad_norm": 0.7041923403739929, + "learning_rate": 0.0001881502310246453, + "loss": 2.7031, + "step": 3215 + }, + { + "epoch": 0.2595432168509402, + "grad_norm": 0.7815428376197815, + "learning_rate": 0.00018814277567342976, + "loss": 2.7291, + "step": 3216 + }, + { + "epoch": 0.2596239205875232, + "grad_norm": 0.7285065650939941, + "learning_rate": 0.00018813531812546583, + "loss": 2.7712, + "step": 3217 + }, + { + "epoch": 0.2597046243241062, + "grad_norm": 0.7606547474861145, + "learning_rate": 0.0001881278583809394, + "loss": 2.6714, + "step": 3218 + }, + { + "epoch": 0.2597853280606892, + "grad_norm": 0.7166680097579956, + "learning_rate": 0.00018812039644003638, + "loss": 2.7147, + "step": 3219 + }, + { + "epoch": 0.2598660317972722, + "grad_norm": 0.8977978229522705, + "learning_rate": 0.0001881129323029427, + "loss": 2.7743, + "step": 3220 + }, + { + "epoch": 0.2599467355338552, + "grad_norm": 0.7447277307510376, + "learning_rate": 0.00018810546596984446, + "loss": 2.7049, + "step": 3221 + }, + { + "epoch": 0.2600274392704382, + "grad_norm": 0.7343515157699585, + "learning_rate": 0.00018809799744092768, + "loss": 2.6999, + "step": 3222 + }, + { + "epoch": 0.26010814300702123, + "grad_norm": 0.7303341627120972, + "learning_rate": 0.00018809052671637852, + "loss": 2.7222, + "step": 3223 + }, + { + "epoch": 0.2601888467436042, + "grad_norm": 0.7412950396537781, + "learning_rate": 0.00018808305379638314, + "loss": 2.6957, + "step": 3224 + }, + { + "epoch": 0.26026955048018724, + "grad_norm": 0.7495343089103699, + "learning_rate": 0.00018807557868112781, + "loss": 2.7123, + "step": 3225 + }, + { + "epoch": 0.2603502542167702, + "grad_norm": 0.8137524724006653, + "learning_rate": 0.00018806810137079886, + "loss": 2.7191, + "step": 3226 + }, + { + "epoch": 0.26043095795335325, + "grad_norm": 0.786374568939209, + "learning_rate": 0.0001880606218655826, + "loss": 2.7237, + "step": 3227 + }, + { + "epoch": 0.2605116616899362, + "grad_norm": 0.9969484806060791, + "learning_rate": 0.00018805314016566543, + "loss": 2.7603, + "step": 3228 + }, + { + "epoch": 0.26059236542651926, + "grad_norm": 0.8132432103157043, + "learning_rate": 0.00018804565627123386, + "loss": 2.6807, + "step": 3229 + }, + { + "epoch": 0.26067306916310223, + "grad_norm": 0.7604904174804688, + "learning_rate": 0.00018803817018247436, + "loss": 2.7105, + "step": 3230 + }, + { + "epoch": 0.26075377289968527, + "grad_norm": 0.743505597114563, + "learning_rate": 0.00018803068189957354, + "loss": 2.7152, + "step": 3231 + }, + { + "epoch": 0.26083447663626824, + "grad_norm": 0.7780006527900696, + "learning_rate": 0.000188023191422718, + "loss": 2.7043, + "step": 3232 + }, + { + "epoch": 0.2609151803728513, + "grad_norm": 0.7683089375495911, + "learning_rate": 0.00018801569875209447, + "loss": 2.7033, + "step": 3233 + }, + { + "epoch": 0.26099588410943425, + "grad_norm": 0.7540118098258972, + "learning_rate": 0.0001880082038878896, + "loss": 2.7121, + "step": 3234 + }, + { + "epoch": 0.2610765878460173, + "grad_norm": 0.7509592771530151, + "learning_rate": 0.00018800070683029025, + "loss": 2.6575, + "step": 3235 + }, + { + "epoch": 0.26115729158260026, + "grad_norm": 0.8015461564064026, + "learning_rate": 0.00018799320757948327, + "loss": 2.6956, + "step": 3236 + }, + { + "epoch": 0.2612379953191833, + "grad_norm": 0.7586383819580078, + "learning_rate": 0.00018798570613565553, + "loss": 2.6719, + "step": 3237 + }, + { + "epoch": 0.26131869905576627, + "grad_norm": 0.7833155989646912, + "learning_rate": 0.000187978202498994, + "loss": 2.7317, + "step": 3238 + }, + { + "epoch": 0.2613994027923493, + "grad_norm": 0.7976018786430359, + "learning_rate": 0.00018797069666968565, + "loss": 2.7514, + "step": 3239 + }, + { + "epoch": 0.2614801065289323, + "grad_norm": 0.8388968706130981, + "learning_rate": 0.00018796318864791763, + "loss": 2.6845, + "step": 3240 + }, + { + "epoch": 0.2615608102655153, + "grad_norm": 0.8082842230796814, + "learning_rate": 0.00018795567843387701, + "loss": 2.7204, + "step": 3241 + }, + { + "epoch": 0.2616415140020983, + "grad_norm": 0.7514800429344177, + "learning_rate": 0.00018794816602775094, + "loss": 2.7117, + "step": 3242 + }, + { + "epoch": 0.2617222177386813, + "grad_norm": 0.8676564693450928, + "learning_rate": 0.00018794065142972664, + "loss": 2.6596, + "step": 3243 + }, + { + "epoch": 0.2618029214752643, + "grad_norm": 0.7449865341186523, + "learning_rate": 0.0001879331346399915, + "loss": 2.7089, + "step": 3244 + }, + { + "epoch": 0.2618836252118473, + "grad_norm": 0.8020811676979065, + "learning_rate": 0.00018792561565873274, + "loss": 2.7293, + "step": 3245 + }, + { + "epoch": 0.2619643289484303, + "grad_norm": 0.7961642146110535, + "learning_rate": 0.00018791809448613783, + "loss": 2.7269, + "step": 3246 + }, + { + "epoch": 0.26204503268501333, + "grad_norm": 0.7842351198196411, + "learning_rate": 0.00018791057112239415, + "loss": 2.6773, + "step": 3247 + }, + { + "epoch": 0.2621257364215963, + "grad_norm": 0.7494246959686279, + "learning_rate": 0.00018790304556768925, + "loss": 2.7317, + "step": 3248 + }, + { + "epoch": 0.26220644015817934, + "grad_norm": 0.7822836637496948, + "learning_rate": 0.0001878955178222107, + "loss": 2.6834, + "step": 3249 + }, + { + "epoch": 0.2622871438947623, + "grad_norm": 0.8432494401931763, + "learning_rate": 0.00018788798788614607, + "loss": 2.7048, + "step": 3250 + }, + { + "epoch": 0.26236784763134535, + "grad_norm": 0.9599446058273315, + "learning_rate": 0.000187880455759683, + "loss": 2.7793, + "step": 3251 + }, + { + "epoch": 0.26244855136792833, + "grad_norm": 0.8097226023674011, + "learning_rate": 0.00018787292144300928, + "loss": 2.7177, + "step": 3252 + }, + { + "epoch": 0.26252925510451136, + "grad_norm": 0.8423499464988708, + "learning_rate": 0.00018786538493631265, + "loss": 2.7265, + "step": 3253 + }, + { + "epoch": 0.26260995884109434, + "grad_norm": 0.7388847470283508, + "learning_rate": 0.00018785784623978095, + "loss": 2.6778, + "step": 3254 + }, + { + "epoch": 0.26269066257767737, + "grad_norm": 0.766368567943573, + "learning_rate": 0.0001878503053536021, + "loss": 2.654, + "step": 3255 + }, + { + "epoch": 0.26277136631426035, + "grad_norm": 0.8181266188621521, + "learning_rate": 0.00018784276227796394, + "loss": 2.7568, + "step": 3256 + }, + { + "epoch": 0.2628520700508434, + "grad_norm": 0.8235312104225159, + "learning_rate": 0.00018783521701305452, + "loss": 2.7317, + "step": 3257 + }, + { + "epoch": 0.26293277378742635, + "grad_norm": 0.7103183269500732, + "learning_rate": 0.00018782766955906195, + "loss": 2.6919, + "step": 3258 + }, + { + "epoch": 0.2630134775240094, + "grad_norm": 0.7202538251876831, + "learning_rate": 0.0001878201199161742, + "loss": 2.7179, + "step": 3259 + }, + { + "epoch": 0.26309418126059236, + "grad_norm": 0.8402286171913147, + "learning_rate": 0.00018781256808457952, + "loss": 2.7789, + "step": 3260 + }, + { + "epoch": 0.2631748849971754, + "grad_norm": 0.8136829137802124, + "learning_rate": 0.00018780501406446613, + "loss": 2.6872, + "step": 3261 + }, + { + "epoch": 0.26325558873375837, + "grad_norm": 0.8017000555992126, + "learning_rate": 0.00018779745785602224, + "loss": 2.7527, + "step": 3262 + }, + { + "epoch": 0.2633362924703414, + "grad_norm": 0.7880774140357971, + "learning_rate": 0.00018778989945943619, + "loss": 2.7348, + "step": 3263 + }, + { + "epoch": 0.2634169962069244, + "grad_norm": 0.7402438521385193, + "learning_rate": 0.00018778233887489635, + "loss": 2.6946, + "step": 3264 + }, + { + "epoch": 0.26349769994350736, + "grad_norm": 0.7450907230377197, + "learning_rate": 0.0001877747761025912, + "loss": 2.7502, + "step": 3265 + }, + { + "epoch": 0.2635784036800904, + "grad_norm": 0.7504056692123413, + "learning_rate": 0.00018776721114270917, + "loss": 2.832, + "step": 3266 + }, + { + "epoch": 0.26365910741667337, + "grad_norm": 0.7710226774215698, + "learning_rate": 0.00018775964399543878, + "loss": 2.6895, + "step": 3267 + }, + { + "epoch": 0.2637398111532564, + "grad_norm": 0.769927978515625, + "learning_rate": 0.00018775207466096867, + "loss": 2.6801, + "step": 3268 + }, + { + "epoch": 0.2638205148898394, + "grad_norm": 0.7210869193077087, + "learning_rate": 0.0001877445031394875, + "loss": 2.6966, + "step": 3269 + }, + { + "epoch": 0.2639012186264224, + "grad_norm": 0.7731119990348816, + "learning_rate": 0.00018773692943118393, + "loss": 2.6965, + "step": 3270 + }, + { + "epoch": 0.2639819223630054, + "grad_norm": 0.7539728283882141, + "learning_rate": 0.00018772935353624672, + "loss": 2.753, + "step": 3271 + }, + { + "epoch": 0.2640626260995884, + "grad_norm": 0.7993821501731873, + "learning_rate": 0.00018772177545486472, + "loss": 2.7177, + "step": 3272 + }, + { + "epoch": 0.2641433298361714, + "grad_norm": 0.7880005240440369, + "learning_rate": 0.00018771419518722672, + "loss": 2.6854, + "step": 3273 + }, + { + "epoch": 0.2642240335727544, + "grad_norm": 0.8079188466072083, + "learning_rate": 0.0001877066127335217, + "loss": 2.734, + "step": 3274 + }, + { + "epoch": 0.2643047373093374, + "grad_norm": 0.8241428732872009, + "learning_rate": 0.00018769902809393865, + "loss": 2.7156, + "step": 3275 + }, + { + "epoch": 0.26438544104592043, + "grad_norm": 0.8007158041000366, + "learning_rate": 0.00018769144126866657, + "loss": 2.693, + "step": 3276 + }, + { + "epoch": 0.2644661447825034, + "grad_norm": 0.8360451459884644, + "learning_rate": 0.00018768385225789456, + "loss": 2.6919, + "step": 3277 + }, + { + "epoch": 0.26454684851908644, + "grad_norm": 0.7596627473831177, + "learning_rate": 0.00018767626106181172, + "loss": 2.7861, + "step": 3278 + }, + { + "epoch": 0.2646275522556694, + "grad_norm": 0.7469248175621033, + "learning_rate": 0.00018766866768060727, + "loss": 2.7305, + "step": 3279 + }, + { + "epoch": 0.26470825599225245, + "grad_norm": 0.7103936076164246, + "learning_rate": 0.00018766107211447045, + "loss": 2.6456, + "step": 3280 + }, + { + "epoch": 0.2647889597288354, + "grad_norm": 0.7595266103744507, + "learning_rate": 0.00018765347436359056, + "loss": 2.7235, + "step": 3281 + }, + { + "epoch": 0.26486966346541846, + "grad_norm": 0.786648154258728, + "learning_rate": 0.00018764587442815698, + "loss": 2.7182, + "step": 3282 + }, + { + "epoch": 0.26495036720200144, + "grad_norm": 0.7152618169784546, + "learning_rate": 0.00018763827230835908, + "loss": 2.6842, + "step": 3283 + }, + { + "epoch": 0.26503107093858447, + "grad_norm": 0.89169842004776, + "learning_rate": 0.00018763066800438636, + "loss": 2.7661, + "step": 3284 + }, + { + "epoch": 0.26511177467516744, + "grad_norm": 0.8148171305656433, + "learning_rate": 0.00018762306151642833, + "loss": 2.7264, + "step": 3285 + }, + { + "epoch": 0.2651924784117505, + "grad_norm": 0.8070533871650696, + "learning_rate": 0.00018761545284467454, + "loss": 2.7425, + "step": 3286 + }, + { + "epoch": 0.26527318214833345, + "grad_norm": 0.8536118268966675, + "learning_rate": 0.00018760784198931465, + "loss": 2.702, + "step": 3287 + }, + { + "epoch": 0.2653538858849165, + "grad_norm": 0.7422329783439636, + "learning_rate": 0.00018760022895053833, + "loss": 2.6913, + "step": 3288 + }, + { + "epoch": 0.26543458962149946, + "grad_norm": 0.7415527105331421, + "learning_rate": 0.0001875926137285353, + "loss": 2.6472, + "step": 3289 + }, + { + "epoch": 0.2655152933580825, + "grad_norm": 0.8432031273841858, + "learning_rate": 0.00018758499632349538, + "loss": 2.7506, + "step": 3290 + }, + { + "epoch": 0.26559599709466547, + "grad_norm": 0.8113259077072144, + "learning_rate": 0.0001875773767356084, + "loss": 2.6866, + "step": 3291 + }, + { + "epoch": 0.2656767008312485, + "grad_norm": 0.7898122668266296, + "learning_rate": 0.00018756975496506424, + "loss": 2.6516, + "step": 3292 + }, + { + "epoch": 0.2657574045678315, + "grad_norm": 0.7627275586128235, + "learning_rate": 0.0001875621310120529, + "loss": 2.7065, + "step": 3293 + }, + { + "epoch": 0.2658381083044145, + "grad_norm": 0.8227291107177734, + "learning_rate": 0.00018755450487676435, + "loss": 2.7614, + "step": 3294 + }, + { + "epoch": 0.2659188120409975, + "grad_norm": 0.8162109851837158, + "learning_rate": 0.00018754687655938868, + "loss": 2.7924, + "step": 3295 + }, + { + "epoch": 0.2659995157775805, + "grad_norm": 0.7231846451759338, + "learning_rate": 0.00018753924606011602, + "loss": 2.7505, + "step": 3296 + }, + { + "epoch": 0.2660802195141635, + "grad_norm": 0.8635944724082947, + "learning_rate": 0.00018753161337913647, + "loss": 2.7505, + "step": 3297 + }, + { + "epoch": 0.26616092325074653, + "grad_norm": 0.8131890892982483, + "learning_rate": 0.00018752397851664031, + "loss": 2.7872, + "step": 3298 + }, + { + "epoch": 0.2662416269873295, + "grad_norm": 0.7336695790290833, + "learning_rate": 0.00018751634147281786, + "loss": 2.7517, + "step": 3299 + }, + { + "epoch": 0.26632233072391254, + "grad_norm": 0.7541754841804504, + "learning_rate": 0.00018750870224785939, + "loss": 2.7807, + "step": 3300 + }, + { + "epoch": 0.2664030344604955, + "grad_norm": 0.9347110390663147, + "learning_rate": 0.0001875010608419553, + "loss": 2.6954, + "step": 3301 + }, + { + "epoch": 0.26648373819707855, + "grad_norm": 0.7591213583946228, + "learning_rate": 0.00018749341725529604, + "loss": 2.7019, + "step": 3302 + }, + { + "epoch": 0.2665644419336615, + "grad_norm": 0.811527669429779, + "learning_rate": 0.00018748577148807211, + "loss": 2.7123, + "step": 3303 + }, + { + "epoch": 0.26664514567024455, + "grad_norm": 0.7419980764389038, + "learning_rate": 0.00018747812354047408, + "loss": 2.7383, + "step": 3304 + }, + { + "epoch": 0.26672584940682753, + "grad_norm": 0.7801192402839661, + "learning_rate": 0.00018747047341269256, + "loss": 2.7245, + "step": 3305 + }, + { + "epoch": 0.26680655314341056, + "grad_norm": 0.7392756938934326, + "learning_rate": 0.00018746282110491816, + "loss": 2.6992, + "step": 3306 + }, + { + "epoch": 0.26688725687999354, + "grad_norm": 0.7085927724838257, + "learning_rate": 0.00018745516661734161, + "loss": 2.739, + "step": 3307 + }, + { + "epoch": 0.26696796061657657, + "grad_norm": 0.7218676209449768, + "learning_rate": 0.00018744750995015373, + "loss": 2.7091, + "step": 3308 + }, + { + "epoch": 0.26704866435315955, + "grad_norm": 0.847872257232666, + "learning_rate": 0.0001874398511035453, + "loss": 2.699, + "step": 3309 + }, + { + "epoch": 0.2671293680897426, + "grad_norm": 0.8280770778656006, + "learning_rate": 0.00018743219007770723, + "loss": 2.763, + "step": 3310 + }, + { + "epoch": 0.26721007182632556, + "grad_norm": 0.7271165251731873, + "learning_rate": 0.0001874245268728304, + "loss": 2.7219, + "step": 3311 + }, + { + "epoch": 0.2672907755629086, + "grad_norm": 0.7342363595962524, + "learning_rate": 0.00018741686148910586, + "loss": 2.6765, + "step": 3312 + }, + { + "epoch": 0.26737147929949157, + "grad_norm": 0.7260174751281738, + "learning_rate": 0.0001874091939267246, + "loss": 2.7003, + "step": 3313 + }, + { + "epoch": 0.2674521830360746, + "grad_norm": 0.742494523525238, + "learning_rate": 0.00018740152418587775, + "loss": 2.7371, + "step": 3314 + }, + { + "epoch": 0.2675328867726576, + "grad_norm": 0.7238131165504456, + "learning_rate": 0.00018739385226675646, + "loss": 2.7486, + "step": 3315 + }, + { + "epoch": 0.26761359050924055, + "grad_norm": 0.7329363226890564, + "learning_rate": 0.0001873861781695519, + "loss": 2.6414, + "step": 3316 + }, + { + "epoch": 0.2676942942458236, + "grad_norm": 0.7078117728233337, + "learning_rate": 0.00018737850189445534, + "loss": 2.7271, + "step": 3317 + }, + { + "epoch": 0.26777499798240656, + "grad_norm": 0.7945309281349182, + "learning_rate": 0.00018737082344165814, + "loss": 2.7323, + "step": 3318 + }, + { + "epoch": 0.2678557017189896, + "grad_norm": 0.7510890364646912, + "learning_rate": 0.0001873631428113516, + "loss": 2.6563, + "step": 3319 + }, + { + "epoch": 0.26793640545557257, + "grad_norm": 0.7790820002555847, + "learning_rate": 0.0001873554600037272, + "loss": 2.7445, + "step": 3320 + }, + { + "epoch": 0.2680171091921556, + "grad_norm": 0.7689393162727356, + "learning_rate": 0.00018734777501897636, + "loss": 2.669, + "step": 3321 + }, + { + "epoch": 0.2680978129287386, + "grad_norm": 0.8227118253707886, + "learning_rate": 0.00018734008785729065, + "loss": 2.7279, + "step": 3322 + }, + { + "epoch": 0.2681785166653216, + "grad_norm": 0.7551290392875671, + "learning_rate": 0.00018733239851886162, + "loss": 2.6864, + "step": 3323 + }, + { + "epoch": 0.2682592204019046, + "grad_norm": 0.8572004437446594, + "learning_rate": 0.00018732470700388097, + "loss": 2.8159, + "step": 3324 + }, + { + "epoch": 0.2683399241384876, + "grad_norm": 0.7509044408798218, + "learning_rate": 0.00018731701331254033, + "loss": 2.7698, + "step": 3325 + }, + { + "epoch": 0.2684206278750706, + "grad_norm": 0.8474129438400269, + "learning_rate": 0.00018730931744503148, + "loss": 2.6745, + "step": 3326 + }, + { + "epoch": 0.2685013316116536, + "grad_norm": 0.8310953378677368, + "learning_rate": 0.00018730161940154618, + "loss": 2.712, + "step": 3327 + }, + { + "epoch": 0.2685820353482366, + "grad_norm": 0.8820717334747314, + "learning_rate": 0.00018729391918227632, + "loss": 2.7776, + "step": 3328 + }, + { + "epoch": 0.26866273908481964, + "grad_norm": 0.8827663064002991, + "learning_rate": 0.00018728621678741384, + "loss": 2.7115, + "step": 3329 + }, + { + "epoch": 0.2687434428214026, + "grad_norm": 0.7896323800086975, + "learning_rate": 0.00018727851221715064, + "loss": 2.6799, + "step": 3330 + }, + { + "epoch": 0.26882414655798564, + "grad_norm": 0.7775614261627197, + "learning_rate": 0.0001872708054716788, + "loss": 2.7021, + "step": 3331 + }, + { + "epoch": 0.2689048502945686, + "grad_norm": 0.8150187134742737, + "learning_rate": 0.0001872630965511903, + "loss": 2.679, + "step": 3332 + }, + { + "epoch": 0.26898555403115165, + "grad_norm": 0.7821844220161438, + "learning_rate": 0.00018725538545587736, + "loss": 2.7067, + "step": 3333 + }, + { + "epoch": 0.26906625776773463, + "grad_norm": 0.8390234112739563, + "learning_rate": 0.00018724767218593216, + "loss": 2.7133, + "step": 3334 + }, + { + "epoch": 0.26914696150431766, + "grad_norm": 0.8150694370269775, + "learning_rate": 0.00018723995674154687, + "loss": 2.7022, + "step": 3335 + }, + { + "epoch": 0.26922766524090064, + "grad_norm": 0.7473872900009155, + "learning_rate": 0.0001872322391229138, + "loss": 2.7268, + "step": 3336 + }, + { + "epoch": 0.26930836897748367, + "grad_norm": 0.7591951489448547, + "learning_rate": 0.0001872245193302253, + "loss": 2.7516, + "step": 3337 + }, + { + "epoch": 0.26938907271406665, + "grad_norm": 0.7914662957191467, + "learning_rate": 0.00018721679736367382, + "loss": 2.6613, + "step": 3338 + }, + { + "epoch": 0.2694697764506497, + "grad_norm": 0.7823428511619568, + "learning_rate": 0.00018720907322345172, + "loss": 2.6661, + "step": 3339 + }, + { + "epoch": 0.26955048018723266, + "grad_norm": 0.8428264260292053, + "learning_rate": 0.00018720134690975156, + "loss": 2.672, + "step": 3340 + }, + { + "epoch": 0.2696311839238157, + "grad_norm": 0.71320641040802, + "learning_rate": 0.00018719361842276587, + "loss": 2.7326, + "step": 3341 + }, + { + "epoch": 0.26971188766039866, + "grad_norm": 0.7972821593284607, + "learning_rate": 0.00018718588776268731, + "loss": 2.7182, + "step": 3342 + }, + { + "epoch": 0.2697925913969817, + "grad_norm": 0.7924500107765198, + "learning_rate": 0.0001871781549297085, + "loss": 2.7308, + "step": 3343 + }, + { + "epoch": 0.2698732951335647, + "grad_norm": 0.7668356895446777, + "learning_rate": 0.0001871704199240222, + "loss": 2.678, + "step": 3344 + }, + { + "epoch": 0.2699539988701477, + "grad_norm": 0.866973876953125, + "learning_rate": 0.00018716268274582114, + "loss": 2.7802, + "step": 3345 + }, + { + "epoch": 0.2700347026067307, + "grad_norm": 0.7709557414054871, + "learning_rate": 0.0001871549433952982, + "loss": 2.7418, + "step": 3346 + }, + { + "epoch": 0.2701154063433137, + "grad_norm": 0.7707573771476746, + "learning_rate": 0.00018714720187264626, + "loss": 2.7486, + "step": 3347 + }, + { + "epoch": 0.2701961100798967, + "grad_norm": 0.8007768392562866, + "learning_rate": 0.00018713945817805822, + "loss": 2.7106, + "step": 3348 + }, + { + "epoch": 0.2702768138164797, + "grad_norm": 0.7239583134651184, + "learning_rate": 0.0001871317123117271, + "loss": 2.7209, + "step": 3349 + }, + { + "epoch": 0.2703575175530627, + "grad_norm": 0.775104820728302, + "learning_rate": 0.00018712396427384594, + "loss": 2.6503, + "step": 3350 + }, + { + "epoch": 0.27043822128964573, + "grad_norm": 0.7492741346359253, + "learning_rate": 0.0001871162140646079, + "loss": 2.699, + "step": 3351 + }, + { + "epoch": 0.2705189250262287, + "grad_norm": 0.7550846338272095, + "learning_rate": 0.00018710846168420604, + "loss": 2.7458, + "step": 3352 + }, + { + "epoch": 0.27059962876281174, + "grad_norm": 0.807996928691864, + "learning_rate": 0.0001871007071328336, + "loss": 2.7604, + "step": 3353 + }, + { + "epoch": 0.2706803324993947, + "grad_norm": 0.7381845116615295, + "learning_rate": 0.00018709295041068386, + "loss": 2.6833, + "step": 3354 + }, + { + "epoch": 0.27076103623597775, + "grad_norm": 0.7542420625686646, + "learning_rate": 0.00018708519151795016, + "loss": 2.6462, + "step": 3355 + }, + { + "epoch": 0.2708417399725607, + "grad_norm": 0.7675846219062805, + "learning_rate": 0.00018707743045482582, + "loss": 2.7068, + "step": 3356 + }, + { + "epoch": 0.27092244370914376, + "grad_norm": 0.7437357902526855, + "learning_rate": 0.0001870696672215043, + "loss": 2.73, + "step": 3357 + }, + { + "epoch": 0.27100314744572673, + "grad_norm": 0.7880852222442627, + "learning_rate": 0.00018706190181817903, + "loss": 2.759, + "step": 3358 + }, + { + "epoch": 0.27108385118230977, + "grad_norm": 0.7403178811073303, + "learning_rate": 0.00018705413424504363, + "loss": 2.7538, + "step": 3359 + }, + { + "epoch": 0.27116455491889274, + "grad_norm": 0.7601225972175598, + "learning_rate": 0.00018704636450229164, + "loss": 2.7331, + "step": 3360 + }, + { + "epoch": 0.2712452586554758, + "grad_norm": 0.7810701727867126, + "learning_rate": 0.0001870385925901167, + "loss": 2.7736, + "step": 3361 + }, + { + "epoch": 0.27132596239205875, + "grad_norm": 0.8934530019760132, + "learning_rate": 0.0001870308185087125, + "loss": 2.7214, + "step": 3362 + }, + { + "epoch": 0.2714066661286418, + "grad_norm": 0.7468441128730774, + "learning_rate": 0.0001870230422582728, + "loss": 2.6957, + "step": 3363 + }, + { + "epoch": 0.27148736986522476, + "grad_norm": 0.7643293142318726, + "learning_rate": 0.00018701526383899144, + "loss": 2.6773, + "step": 3364 + }, + { + "epoch": 0.2715680736018078, + "grad_norm": 0.7602033615112305, + "learning_rate": 0.0001870074832510622, + "loss": 2.7095, + "step": 3365 + }, + { + "epoch": 0.27164877733839077, + "grad_norm": 0.772065281867981, + "learning_rate": 0.00018699970049467908, + "loss": 2.6753, + "step": 3366 + }, + { + "epoch": 0.27172948107497374, + "grad_norm": 0.7718359231948853, + "learning_rate": 0.00018699191557003598, + "loss": 2.6857, + "step": 3367 + }, + { + "epoch": 0.2718101848115568, + "grad_norm": 0.8207093477249146, + "learning_rate": 0.00018698412847732693, + "loss": 2.7549, + "step": 3368 + }, + { + "epoch": 0.27189088854813975, + "grad_norm": 0.7393590807914734, + "learning_rate": 0.00018697633921674605, + "loss": 2.6884, + "step": 3369 + }, + { + "epoch": 0.2719715922847228, + "grad_norm": 0.7955869436264038, + "learning_rate": 0.0001869685477884874, + "loss": 2.708, + "step": 3370 + }, + { + "epoch": 0.27205229602130576, + "grad_norm": 0.7392188906669617, + "learning_rate": 0.00018696075419274527, + "loss": 2.717, + "step": 3371 + }, + { + "epoch": 0.2721329997578888, + "grad_norm": 0.800204873085022, + "learning_rate": 0.00018695295842971376, + "loss": 2.7184, + "step": 3372 + }, + { + "epoch": 0.27221370349447177, + "grad_norm": 0.8195740580558777, + "learning_rate": 0.00018694516049958725, + "loss": 2.6865, + "step": 3373 + }, + { + "epoch": 0.2722944072310548, + "grad_norm": 0.8617578148841858, + "learning_rate": 0.00018693736040256007, + "loss": 2.7098, + "step": 3374 + }, + { + "epoch": 0.2723751109676378, + "grad_norm": 0.8184413909912109, + "learning_rate": 0.00018692955813882662, + "loss": 2.7449, + "step": 3375 + }, + { + "epoch": 0.2724558147042208, + "grad_norm": 0.990275502204895, + "learning_rate": 0.00018692175370858133, + "loss": 2.7891, + "step": 3376 + }, + { + "epoch": 0.2725365184408038, + "grad_norm": 0.7857810854911804, + "learning_rate": 0.0001869139471120187, + "loss": 2.6884, + "step": 3377 + }, + { + "epoch": 0.2726172221773868, + "grad_norm": 0.8040915131568909, + "learning_rate": 0.00018690613834933335, + "loss": 2.7047, + "step": 3378 + }, + { + "epoch": 0.2726979259139698, + "grad_norm": 0.7512348294258118, + "learning_rate": 0.00018689832742071983, + "loss": 2.6898, + "step": 3379 + }, + { + "epoch": 0.27277862965055283, + "grad_norm": 0.6781859397888184, + "learning_rate": 0.00018689051432637288, + "loss": 2.6396, + "step": 3380 + }, + { + "epoch": 0.2728593333871358, + "grad_norm": 0.7858247756958008, + "learning_rate": 0.00018688269906648716, + "loss": 2.6785, + "step": 3381 + }, + { + "epoch": 0.27294003712371884, + "grad_norm": 0.7342140674591064, + "learning_rate": 0.00018687488164125744, + "loss": 2.6778, + "step": 3382 + }, + { + "epoch": 0.2730207408603018, + "grad_norm": 0.8113372921943665, + "learning_rate": 0.00018686706205087858, + "loss": 2.6982, + "step": 3383 + }, + { + "epoch": 0.27310144459688485, + "grad_norm": 0.7904205918312073, + "learning_rate": 0.0001868592402955455, + "loss": 2.7891, + "step": 3384 + }, + { + "epoch": 0.2731821483334678, + "grad_norm": 0.7274135947227478, + "learning_rate": 0.00018685141637545308, + "loss": 2.6908, + "step": 3385 + }, + { + "epoch": 0.27326285207005085, + "grad_norm": 0.7675744295120239, + "learning_rate": 0.0001868435902907963, + "loss": 2.6987, + "step": 3386 + }, + { + "epoch": 0.27334355580663383, + "grad_norm": 0.8085030913352966, + "learning_rate": 0.00018683576204177026, + "loss": 2.7798, + "step": 3387 + }, + { + "epoch": 0.27342425954321686, + "grad_norm": 0.7498135566711426, + "learning_rate": 0.00018682793162857006, + "loss": 2.7216, + "step": 3388 + }, + { + "epoch": 0.27350496327979984, + "grad_norm": 0.900741696357727, + "learning_rate": 0.0001868200990513908, + "loss": 2.6871, + "step": 3389 + }, + { + "epoch": 0.27358566701638287, + "grad_norm": 0.7948571443557739, + "learning_rate": 0.00018681226431042772, + "loss": 2.6985, + "step": 3390 + }, + { + "epoch": 0.27366637075296585, + "grad_norm": 0.8739100098609924, + "learning_rate": 0.00018680442740587612, + "loss": 2.6922, + "step": 3391 + }, + { + "epoch": 0.2737470744895489, + "grad_norm": 0.730084240436554, + "learning_rate": 0.00018679658833793125, + "loss": 2.7029, + "step": 3392 + }, + { + "epoch": 0.27382777822613186, + "grad_norm": 0.7560603022575378, + "learning_rate": 0.00018678874710678853, + "loss": 2.7429, + "step": 3393 + }, + { + "epoch": 0.2739084819627149, + "grad_norm": 0.8331460356712341, + "learning_rate": 0.00018678090371264334, + "loss": 2.7157, + "step": 3394 + }, + { + "epoch": 0.27398918569929787, + "grad_norm": 0.8070168495178223, + "learning_rate": 0.00018677305815569122, + "loss": 2.7629, + "step": 3395 + }, + { + "epoch": 0.2740698894358809, + "grad_norm": 0.7922534346580505, + "learning_rate": 0.00018676521043612762, + "loss": 2.7159, + "step": 3396 + }, + { + "epoch": 0.2741505931724639, + "grad_norm": 0.7838901281356812, + "learning_rate": 0.0001867573605541482, + "loss": 2.6721, + "step": 3397 + }, + { + "epoch": 0.2742312969090469, + "grad_norm": 0.8912512063980103, + "learning_rate": 0.00018674950850994856, + "loss": 2.7243, + "step": 3398 + }, + { + "epoch": 0.2743120006456299, + "grad_norm": 0.7205448150634766, + "learning_rate": 0.0001867416543037244, + "loss": 2.7152, + "step": 3399 + }, + { + "epoch": 0.2743927043822129, + "grad_norm": 0.6992877721786499, + "learning_rate": 0.00018673379793567146, + "loss": 2.7183, + "step": 3400 + }, + { + "epoch": 0.2744734081187959, + "grad_norm": 0.8009448051452637, + "learning_rate": 0.00018672593940598556, + "loss": 2.715, + "step": 3401 + }, + { + "epoch": 0.2745541118553789, + "grad_norm": 0.7812647819519043, + "learning_rate": 0.0001867180787148626, + "loss": 2.7579, + "step": 3402 + }, + { + "epoch": 0.2746348155919619, + "grad_norm": 0.7300555109977722, + "learning_rate": 0.00018671021586249835, + "loss": 2.694, + "step": 3403 + }, + { + "epoch": 0.27471551932854493, + "grad_norm": 0.8082736134529114, + "learning_rate": 0.00018670235084908887, + "loss": 2.768, + "step": 3404 + }, + { + "epoch": 0.2747962230651279, + "grad_norm": 0.7729581594467163, + "learning_rate": 0.0001866944836748302, + "loss": 2.7256, + "step": 3405 + }, + { + "epoch": 0.27487692680171094, + "grad_norm": 0.8113458752632141, + "learning_rate": 0.00018668661433991835, + "loss": 2.6692, + "step": 3406 + }, + { + "epoch": 0.2749576305382939, + "grad_norm": 0.7757337689399719, + "learning_rate": 0.00018667874284454948, + "loss": 2.6769, + "step": 3407 + }, + { + "epoch": 0.27503833427487695, + "grad_norm": 0.7896093726158142, + "learning_rate": 0.00018667086918891976, + "loss": 2.7118, + "step": 3408 + }, + { + "epoch": 0.2751190380114599, + "grad_norm": 0.7764071822166443, + "learning_rate": 0.00018666299337322543, + "loss": 2.7284, + "step": 3409 + }, + { + "epoch": 0.27519974174804296, + "grad_norm": 0.794815182685852, + "learning_rate": 0.00018665511539766273, + "loss": 2.7232, + "step": 3410 + }, + { + "epoch": 0.27528044548462594, + "grad_norm": 0.8134122490882874, + "learning_rate": 0.0001866472352624281, + "loss": 2.7023, + "step": 3411 + }, + { + "epoch": 0.27536114922120897, + "grad_norm": 0.7654025554656982, + "learning_rate": 0.00018663935296771782, + "loss": 2.7002, + "step": 3412 + }, + { + "epoch": 0.27544185295779194, + "grad_norm": 0.6930806636810303, + "learning_rate": 0.0001866314685137284, + "loss": 2.6764, + "step": 3413 + }, + { + "epoch": 0.275522556694375, + "grad_norm": 0.7535184621810913, + "learning_rate": 0.00018662358190065631, + "loss": 2.6657, + "step": 3414 + }, + { + "epoch": 0.27560326043095795, + "grad_norm": 0.7775620818138123, + "learning_rate": 0.00018661569312869816, + "loss": 2.6931, + "step": 3415 + }, + { + "epoch": 0.275683964167541, + "grad_norm": 0.7209072113037109, + "learning_rate": 0.00018660780219805048, + "loss": 2.7293, + "step": 3416 + }, + { + "epoch": 0.27576466790412396, + "grad_norm": 0.7182055711746216, + "learning_rate": 0.00018659990910891, + "loss": 2.6561, + "step": 3417 + }, + { + "epoch": 0.27584537164070694, + "grad_norm": 0.7130969166755676, + "learning_rate": 0.00018659201386147338, + "loss": 2.7156, + "step": 3418 + }, + { + "epoch": 0.27592607537728997, + "grad_norm": 0.7296265959739685, + "learning_rate": 0.00018658411645593745, + "loss": 2.6894, + "step": 3419 + }, + { + "epoch": 0.27600677911387295, + "grad_norm": 0.7707972526550293, + "learning_rate": 0.000186576216892499, + "loss": 2.7528, + "step": 3420 + }, + { + "epoch": 0.276087482850456, + "grad_norm": 0.6945170164108276, + "learning_rate": 0.0001865683151713549, + "loss": 2.6762, + "step": 3421 + }, + { + "epoch": 0.27616818658703896, + "grad_norm": 0.7664114236831665, + "learning_rate": 0.0001865604112927021, + "loss": 2.7212, + "step": 3422 + }, + { + "epoch": 0.276248890323622, + "grad_norm": 0.6950399875640869, + "learning_rate": 0.0001865525052567376, + "loss": 2.7035, + "step": 3423 + }, + { + "epoch": 0.27632959406020496, + "grad_norm": 0.7307506799697876, + "learning_rate": 0.00018654459706365838, + "loss": 2.7296, + "step": 3424 + }, + { + "epoch": 0.276410297796788, + "grad_norm": 0.720912516117096, + "learning_rate": 0.0001865366867136616, + "loss": 2.6884, + "step": 3425 + }, + { + "epoch": 0.276491001533371, + "grad_norm": 0.7581072449684143, + "learning_rate": 0.00018652877420694436, + "loss": 2.705, + "step": 3426 + }, + { + "epoch": 0.276571705269954, + "grad_norm": 0.7473136186599731, + "learning_rate": 0.0001865208595437039, + "loss": 2.7316, + "step": 3427 + }, + { + "epoch": 0.276652409006537, + "grad_norm": 0.7272855639457703, + "learning_rate": 0.00018651294272413745, + "loss": 2.6834, + "step": 3428 + }, + { + "epoch": 0.27673311274312, + "grad_norm": 0.7046366930007935, + "learning_rate": 0.0001865050237484423, + "loss": 2.6491, + "step": 3429 + }, + { + "epoch": 0.276813816479703, + "grad_norm": 0.7521376609802246, + "learning_rate": 0.00018649710261681586, + "loss": 2.708, + "step": 3430 + }, + { + "epoch": 0.276894520216286, + "grad_norm": 0.7372453808784485, + "learning_rate": 0.0001864891793294555, + "loss": 2.682, + "step": 3431 + }, + { + "epoch": 0.276975223952869, + "grad_norm": 0.7381749749183655, + "learning_rate": 0.0001864812538865587, + "loss": 2.7526, + "step": 3432 + }, + { + "epoch": 0.27705592768945203, + "grad_norm": 0.7891514301300049, + "learning_rate": 0.00018647332628832298, + "loss": 2.6904, + "step": 3433 + }, + { + "epoch": 0.277136631426035, + "grad_norm": 0.7942724823951721, + "learning_rate": 0.00018646539653494596, + "loss": 2.7873, + "step": 3434 + }, + { + "epoch": 0.27721733516261804, + "grad_norm": 0.7365398406982422, + "learning_rate": 0.0001864574646266252, + "loss": 2.6684, + "step": 3435 + }, + { + "epoch": 0.277298038899201, + "grad_norm": 0.7802249193191528, + "learning_rate": 0.00018644953056355846, + "loss": 2.7152, + "step": 3436 + }, + { + "epoch": 0.27737874263578405, + "grad_norm": 0.7801448106765747, + "learning_rate": 0.0001864415943459434, + "loss": 2.7034, + "step": 3437 + }, + { + "epoch": 0.277459446372367, + "grad_norm": 0.7722738981246948, + "learning_rate": 0.00018643365597397786, + "loss": 2.7135, + "step": 3438 + }, + { + "epoch": 0.27754015010895006, + "grad_norm": 0.7847445011138916, + "learning_rate": 0.00018642571544785967, + "loss": 2.6999, + "step": 3439 + }, + { + "epoch": 0.27762085384553303, + "grad_norm": 0.7226125597953796, + "learning_rate": 0.00018641777276778675, + "loss": 2.7613, + "step": 3440 + }, + { + "epoch": 0.27770155758211607, + "grad_norm": 0.713188111782074, + "learning_rate": 0.000186409827933957, + "loss": 2.6953, + "step": 3441 + }, + { + "epoch": 0.27778226131869904, + "grad_norm": 0.7308298349380493, + "learning_rate": 0.0001864018809465685, + "loss": 2.7045, + "step": 3442 + }, + { + "epoch": 0.2778629650552821, + "grad_norm": 0.7606719732284546, + "learning_rate": 0.00018639393180581925, + "loss": 2.7883, + "step": 3443 + }, + { + "epoch": 0.27794366879186505, + "grad_norm": 0.7583296895027161, + "learning_rate": 0.00018638598051190738, + "loss": 2.6734, + "step": 3444 + }, + { + "epoch": 0.2780243725284481, + "grad_norm": 0.7147012948989868, + "learning_rate": 0.00018637802706503108, + "loss": 2.7223, + "step": 3445 + }, + { + "epoch": 0.27810507626503106, + "grad_norm": 0.7812997102737427, + "learning_rate": 0.00018637007146538853, + "loss": 2.7277, + "step": 3446 + }, + { + "epoch": 0.2781857800016141, + "grad_norm": 0.7460772395133972, + "learning_rate": 0.000186362113713178, + "loss": 2.6875, + "step": 3447 + }, + { + "epoch": 0.27826648373819707, + "grad_norm": 0.7359143495559692, + "learning_rate": 0.0001863541538085979, + "loss": 2.7122, + "step": 3448 + }, + { + "epoch": 0.2783471874747801, + "grad_norm": 0.7122978568077087, + "learning_rate": 0.00018634619175184655, + "loss": 2.6381, + "step": 3449 + }, + { + "epoch": 0.2784278912113631, + "grad_norm": 0.6965885758399963, + "learning_rate": 0.00018633822754312234, + "loss": 2.6957, + "step": 3450 + }, + { + "epoch": 0.2785085949479461, + "grad_norm": 0.7737082242965698, + "learning_rate": 0.00018633026118262385, + "loss": 2.7579, + "step": 3451 + }, + { + "epoch": 0.2785892986845291, + "grad_norm": 0.6925420165061951, + "learning_rate": 0.00018632229267054958, + "loss": 2.6226, + "step": 3452 + }, + { + "epoch": 0.2786700024211121, + "grad_norm": 0.7496356964111328, + "learning_rate": 0.0001863143220070981, + "loss": 2.7059, + "step": 3453 + }, + { + "epoch": 0.2787507061576951, + "grad_norm": 0.7066817283630371, + "learning_rate": 0.0001863063491924681, + "loss": 2.681, + "step": 3454 + }, + { + "epoch": 0.2788314098942781, + "grad_norm": 0.8143237829208374, + "learning_rate": 0.0001862983742268583, + "loss": 2.6698, + "step": 3455 + }, + { + "epoch": 0.2789121136308611, + "grad_norm": 0.7518483996391296, + "learning_rate": 0.00018629039711046737, + "loss": 2.7041, + "step": 3456 + }, + { + "epoch": 0.27899281736744413, + "grad_norm": 0.8756366968154907, + "learning_rate": 0.00018628241784349422, + "loss": 2.7547, + "step": 3457 + }, + { + "epoch": 0.2790735211040271, + "grad_norm": 0.8709446787834167, + "learning_rate": 0.0001862744364261377, + "loss": 2.7068, + "step": 3458 + }, + { + "epoch": 0.27915422484061014, + "grad_norm": 0.8121913075447083, + "learning_rate": 0.00018626645285859666, + "loss": 2.673, + "step": 3459 + }, + { + "epoch": 0.2792349285771931, + "grad_norm": 0.7685909271240234, + "learning_rate": 0.00018625846714107012, + "loss": 2.7389, + "step": 3460 + }, + { + "epoch": 0.27931563231377615, + "grad_norm": 0.7098073363304138, + "learning_rate": 0.0001862504792737571, + "loss": 2.6942, + "step": 3461 + }, + { + "epoch": 0.27939633605035913, + "grad_norm": 0.7718049883842468, + "learning_rate": 0.00018624248925685666, + "loss": 2.7359, + "step": 3462 + }, + { + "epoch": 0.27947703978694216, + "grad_norm": 0.7912909984588623, + "learning_rate": 0.00018623449709056797, + "loss": 2.6658, + "step": 3463 + }, + { + "epoch": 0.27955774352352514, + "grad_norm": 0.7255454659461975, + "learning_rate": 0.0001862265027750902, + "loss": 2.771, + "step": 3464 + }, + { + "epoch": 0.27963844726010817, + "grad_norm": 0.7542218565940857, + "learning_rate": 0.00018621850631062254, + "loss": 2.6741, + "step": 3465 + }, + { + "epoch": 0.27971915099669115, + "grad_norm": 0.8386052846908569, + "learning_rate": 0.00018621050769736437, + "loss": 2.67, + "step": 3466 + }, + { + "epoch": 0.2797998547332742, + "grad_norm": 0.8563781976699829, + "learning_rate": 0.00018620250693551495, + "loss": 2.7461, + "step": 3467 + }, + { + "epoch": 0.27988055846985715, + "grad_norm": 0.7490699291229248, + "learning_rate": 0.00018619450402527376, + "loss": 2.6863, + "step": 3468 + }, + { + "epoch": 0.27996126220644013, + "grad_norm": 0.8008999824523926, + "learning_rate": 0.00018618649896684017, + "loss": 2.7769, + "step": 3469 + }, + { + "epoch": 0.28004196594302316, + "grad_norm": 0.7678235769271851, + "learning_rate": 0.00018617849176041378, + "loss": 2.7237, + "step": 3470 + }, + { + "epoch": 0.28012266967960614, + "grad_norm": 0.8774877786636353, + "learning_rate": 0.00018617048240619408, + "loss": 2.7502, + "step": 3471 + }, + { + "epoch": 0.28020337341618917, + "grad_norm": 0.8150283098220825, + "learning_rate": 0.00018616247090438073, + "loss": 2.6941, + "step": 3472 + }, + { + "epoch": 0.28028407715277215, + "grad_norm": 0.7330089807510376, + "learning_rate": 0.00018615445725517332, + "loss": 2.7002, + "step": 3473 + }, + { + "epoch": 0.2803647808893552, + "grad_norm": 0.748275101184845, + "learning_rate": 0.00018614644145877168, + "loss": 2.6996, + "step": 3474 + }, + { + "epoch": 0.28044548462593816, + "grad_norm": 0.7718296647071838, + "learning_rate": 0.0001861384235153755, + "loss": 2.7333, + "step": 3475 + }, + { + "epoch": 0.2805261883625212, + "grad_norm": 0.7751123309135437, + "learning_rate": 0.00018613040342518465, + "loss": 2.7362, + "step": 3476 + }, + { + "epoch": 0.28060689209910417, + "grad_norm": 0.70979243516922, + "learning_rate": 0.000186122381188399, + "loss": 2.6651, + "step": 3477 + }, + { + "epoch": 0.2806875958356872, + "grad_norm": 0.9607138633728027, + "learning_rate": 0.00018611435680521848, + "loss": 2.7779, + "step": 3478 + }, + { + "epoch": 0.2807682995722702, + "grad_norm": 0.709671676158905, + "learning_rate": 0.0001861063302758431, + "loss": 2.6994, + "step": 3479 + }, + { + "epoch": 0.2808490033088532, + "grad_norm": 0.8765757083892822, + "learning_rate": 0.00018609830160047283, + "loss": 2.7107, + "step": 3480 + }, + { + "epoch": 0.2809297070454362, + "grad_norm": 0.7996764183044434, + "learning_rate": 0.0001860902707793079, + "loss": 2.7921, + "step": 3481 + }, + { + "epoch": 0.2810104107820192, + "grad_norm": 0.7094513177871704, + "learning_rate": 0.0001860822378125483, + "loss": 2.7211, + "step": 3482 + }, + { + "epoch": 0.2810911145186022, + "grad_norm": 0.8068607449531555, + "learning_rate": 0.0001860742027003944, + "loss": 2.675, + "step": 3483 + }, + { + "epoch": 0.2811718182551852, + "grad_norm": 0.7737938165664673, + "learning_rate": 0.00018606616544304628, + "loss": 2.7538, + "step": 3484 + }, + { + "epoch": 0.2812525219917682, + "grad_norm": 0.7979975342750549, + "learning_rate": 0.0001860581260407044, + "loss": 2.7894, + "step": 3485 + }, + { + "epoch": 0.28133322572835123, + "grad_norm": 0.7671655416488647, + "learning_rate": 0.00018605008449356904, + "loss": 2.7097, + "step": 3486 + }, + { + "epoch": 0.2814139294649342, + "grad_norm": 0.7284159064292908, + "learning_rate": 0.00018604204080184062, + "loss": 2.7447, + "step": 3487 + }, + { + "epoch": 0.28149463320151724, + "grad_norm": 0.7425351142883301, + "learning_rate": 0.00018603399496571968, + "loss": 2.7302, + "step": 3488 + }, + { + "epoch": 0.2815753369381002, + "grad_norm": 0.7709810733795166, + "learning_rate": 0.00018602594698540663, + "loss": 2.6979, + "step": 3489 + }, + { + "epoch": 0.28165604067468325, + "grad_norm": 0.744628369808197, + "learning_rate": 0.00018601789686110214, + "loss": 2.7279, + "step": 3490 + }, + { + "epoch": 0.2817367444112662, + "grad_norm": 0.7679976224899292, + "learning_rate": 0.00018600984459300678, + "loss": 2.6862, + "step": 3491 + }, + { + "epoch": 0.28181744814784926, + "grad_norm": 0.7923497557640076, + "learning_rate": 0.0001860017901813213, + "loss": 2.6975, + "step": 3492 + }, + { + "epoch": 0.28189815188443224, + "grad_norm": 0.7896692156791687, + "learning_rate": 0.00018599373362624636, + "loss": 2.7052, + "step": 3493 + }, + { + "epoch": 0.28197885562101527, + "grad_norm": 0.7913276553153992, + "learning_rate": 0.00018598567492798284, + "loss": 2.7233, + "step": 3494 + }, + { + "epoch": 0.28205955935759824, + "grad_norm": 0.7385257482528687, + "learning_rate": 0.00018597761408673146, + "loss": 2.7616, + "step": 3495 + }, + { + "epoch": 0.2821402630941813, + "grad_norm": 0.7181909084320068, + "learning_rate": 0.00018596955110269323, + "loss": 2.718, + "step": 3496 + }, + { + "epoch": 0.28222096683076425, + "grad_norm": 0.8313151597976685, + "learning_rate": 0.00018596148597606907, + "loss": 2.6775, + "step": 3497 + }, + { + "epoch": 0.2823016705673473, + "grad_norm": 0.7235481142997742, + "learning_rate": 0.00018595341870705995, + "loss": 2.7085, + "step": 3498 + }, + { + "epoch": 0.28238237430393026, + "grad_norm": 0.7092145085334778, + "learning_rate": 0.00018594534929586697, + "loss": 2.7167, + "step": 3499 + }, + { + "epoch": 0.2824630780405133, + "grad_norm": 0.7929207682609558, + "learning_rate": 0.0001859372777426912, + "loss": 2.663, + "step": 3500 + }, + { + "epoch": 0.28254378177709627, + "grad_norm": 0.7488871216773987, + "learning_rate": 0.00018592920404773383, + "loss": 2.7911, + "step": 3501 + }, + { + "epoch": 0.2826244855136793, + "grad_norm": 0.8230419158935547, + "learning_rate": 0.0001859211282111961, + "loss": 2.754, + "step": 3502 + }, + { + "epoch": 0.2827051892502623, + "grad_norm": 0.731971025466919, + "learning_rate": 0.00018591305023327924, + "loss": 2.7142, + "step": 3503 + }, + { + "epoch": 0.2827858929868453, + "grad_norm": 0.8159881234169006, + "learning_rate": 0.00018590497011418457, + "loss": 2.7046, + "step": 3504 + }, + { + "epoch": 0.2828665967234283, + "grad_norm": 0.750266432762146, + "learning_rate": 0.0001858968878541135, + "loss": 2.6951, + "step": 3505 + }, + { + "epoch": 0.2829473004600113, + "grad_norm": 0.7750049233436584, + "learning_rate": 0.00018588880345326748, + "loss": 2.6958, + "step": 3506 + }, + { + "epoch": 0.2830280041965943, + "grad_norm": 0.8559218049049377, + "learning_rate": 0.00018588071691184795, + "loss": 2.7205, + "step": 3507 + }, + { + "epoch": 0.28310870793317733, + "grad_norm": 0.7334830164909363, + "learning_rate": 0.00018587262823005642, + "loss": 2.7134, + "step": 3508 + }, + { + "epoch": 0.2831894116697603, + "grad_norm": 0.8749497532844543, + "learning_rate": 0.00018586453740809456, + "loss": 2.6811, + "step": 3509 + }, + { + "epoch": 0.28327011540634334, + "grad_norm": 0.8800753355026245, + "learning_rate": 0.00018585644444616396, + "loss": 2.7427, + "step": 3510 + }, + { + "epoch": 0.2833508191429263, + "grad_norm": 0.8666185736656189, + "learning_rate": 0.00018584834934446632, + "loss": 2.6828, + "step": 3511 + }, + { + "epoch": 0.28343152287950935, + "grad_norm": 0.7451635003089905, + "learning_rate": 0.00018584025210320343, + "loss": 2.6784, + "step": 3512 + }, + { + "epoch": 0.2835122266160923, + "grad_norm": 0.8512656688690186, + "learning_rate": 0.00018583215272257708, + "loss": 2.7762, + "step": 3513 + }, + { + "epoch": 0.28359293035267535, + "grad_norm": 0.9298297166824341, + "learning_rate": 0.00018582405120278907, + "loss": 2.7714, + "step": 3514 + }, + { + "epoch": 0.28367363408925833, + "grad_norm": 0.7968065738677979, + "learning_rate": 0.0001858159475440414, + "loss": 2.7286, + "step": 3515 + }, + { + "epoch": 0.28375433782584136, + "grad_norm": 0.7381564378738403, + "learning_rate": 0.00018580784174653596, + "loss": 2.6697, + "step": 3516 + }, + { + "epoch": 0.28383504156242434, + "grad_norm": 0.8199222683906555, + "learning_rate": 0.00018579973381047481, + "loss": 2.7463, + "step": 3517 + }, + { + "epoch": 0.28391574529900737, + "grad_norm": 0.8022071123123169, + "learning_rate": 0.00018579162373606002, + "loss": 2.6898, + "step": 3518 + }, + { + "epoch": 0.28399644903559035, + "grad_norm": 0.7899700999259949, + "learning_rate": 0.0001857835115234937, + "loss": 2.7074, + "step": 3519 + }, + { + "epoch": 0.2840771527721733, + "grad_norm": 0.7237183451652527, + "learning_rate": 0.00018577539717297805, + "loss": 2.6699, + "step": 3520 + }, + { + "epoch": 0.28415785650875636, + "grad_norm": 0.7627314329147339, + "learning_rate": 0.00018576728068471526, + "loss": 2.7745, + "step": 3521 + }, + { + "epoch": 0.28423856024533933, + "grad_norm": 0.7301654815673828, + "learning_rate": 0.00018575916205890766, + "loss": 2.7191, + "step": 3522 + }, + { + "epoch": 0.28431926398192237, + "grad_norm": 0.7441647052764893, + "learning_rate": 0.00018575104129575753, + "loss": 2.7529, + "step": 3523 + }, + { + "epoch": 0.28439996771850534, + "grad_norm": 0.7715914249420166, + "learning_rate": 0.0001857429183954673, + "loss": 2.6893, + "step": 3524 + }, + { + "epoch": 0.2844806714550884, + "grad_norm": 0.7464057207107544, + "learning_rate": 0.00018573479335823944, + "loss": 2.7169, + "step": 3525 + }, + { + "epoch": 0.28456137519167135, + "grad_norm": 0.753198504447937, + "learning_rate": 0.00018572666618427638, + "loss": 2.7144, + "step": 3526 + }, + { + "epoch": 0.2846420789282544, + "grad_norm": 0.7681953310966492, + "learning_rate": 0.00018571853687378073, + "loss": 2.709, + "step": 3527 + }, + { + "epoch": 0.28472278266483736, + "grad_norm": 0.7591876983642578, + "learning_rate": 0.0001857104054269551, + "loss": 2.7519, + "step": 3528 + }, + { + "epoch": 0.2848034864014204, + "grad_norm": 0.7417709827423096, + "learning_rate": 0.00018570227184400205, + "loss": 2.6756, + "step": 3529 + }, + { + "epoch": 0.28488419013800337, + "grad_norm": 0.7641329169273376, + "learning_rate": 0.0001856941361251244, + "loss": 2.6614, + "step": 3530 + }, + { + "epoch": 0.2849648938745864, + "grad_norm": 0.7813490033149719, + "learning_rate": 0.0001856859982705249, + "loss": 2.7145, + "step": 3531 + }, + { + "epoch": 0.2850455976111694, + "grad_norm": 0.7777202129364014, + "learning_rate": 0.00018567785828040628, + "loss": 2.7015, + "step": 3532 + }, + { + "epoch": 0.2851263013477524, + "grad_norm": 0.7647144794464111, + "learning_rate": 0.0001856697161549715, + "loss": 2.7311, + "step": 3533 + }, + { + "epoch": 0.2852070050843354, + "grad_norm": 0.7477256655693054, + "learning_rate": 0.00018566157189442342, + "loss": 2.6832, + "step": 3534 + }, + { + "epoch": 0.2852877088209184, + "grad_norm": 0.7037049531936646, + "learning_rate": 0.00018565342549896506, + "loss": 2.6942, + "step": 3535 + }, + { + "epoch": 0.2853684125575014, + "grad_norm": 0.7309197783470154, + "learning_rate": 0.00018564527696879945, + "loss": 2.6797, + "step": 3536 + }, + { + "epoch": 0.2854491162940844, + "grad_norm": 0.798075795173645, + "learning_rate": 0.00018563712630412967, + "loss": 2.6926, + "step": 3537 + }, + { + "epoch": 0.2855298200306674, + "grad_norm": 0.7831682562828064, + "learning_rate": 0.0001856289735051588, + "loss": 2.7537, + "step": 3538 + }, + { + "epoch": 0.28561052376725043, + "grad_norm": 0.7983096241950989, + "learning_rate": 0.0001856208185720901, + "loss": 2.7037, + "step": 3539 + }, + { + "epoch": 0.2856912275038334, + "grad_norm": 0.7250573635101318, + "learning_rate": 0.00018561266150512678, + "loss": 2.7282, + "step": 3540 + }, + { + "epoch": 0.28577193124041644, + "grad_norm": 0.7800211906433105, + "learning_rate": 0.00018560450230447218, + "loss": 2.6541, + "step": 3541 + }, + { + "epoch": 0.2858526349769994, + "grad_norm": 0.7624209523200989, + "learning_rate": 0.00018559634097032953, + "loss": 2.7041, + "step": 3542 + }, + { + "epoch": 0.28593333871358245, + "grad_norm": 0.7212036848068237, + "learning_rate": 0.0001855881775029024, + "loss": 2.7287, + "step": 3543 + }, + { + "epoch": 0.28601404245016543, + "grad_norm": 0.7774164080619812, + "learning_rate": 0.00018558001190239408, + "loss": 2.6515, + "step": 3544 + }, + { + "epoch": 0.28609474618674846, + "grad_norm": 0.7169588208198547, + "learning_rate": 0.0001855718441690082, + "loss": 2.7111, + "step": 3545 + }, + { + "epoch": 0.28617544992333144, + "grad_norm": 0.7473909258842468, + "learning_rate": 0.00018556367430294827, + "loss": 2.7405, + "step": 3546 + }, + { + "epoch": 0.28625615365991447, + "grad_norm": 0.7213929295539856, + "learning_rate": 0.0001855555023044179, + "loss": 2.7336, + "step": 3547 + }, + { + "epoch": 0.28633685739649745, + "grad_norm": 0.701816201210022, + "learning_rate": 0.00018554732817362078, + "loss": 2.721, + "step": 3548 + }, + { + "epoch": 0.2864175611330805, + "grad_norm": 0.8158134818077087, + "learning_rate": 0.00018553915191076064, + "loss": 2.6979, + "step": 3549 + }, + { + "epoch": 0.28649826486966345, + "grad_norm": 0.7303084135055542, + "learning_rate": 0.00018553097351604118, + "loss": 2.6734, + "step": 3550 + }, + { + "epoch": 0.2865789686062465, + "grad_norm": 0.8140435814857483, + "learning_rate": 0.00018552279298966634, + "loss": 2.6832, + "step": 3551 + }, + { + "epoch": 0.28665967234282946, + "grad_norm": 0.7024678587913513, + "learning_rate": 0.00018551461033183988, + "loss": 2.7118, + "step": 3552 + }, + { + "epoch": 0.2867403760794125, + "grad_norm": 0.7277806401252747, + "learning_rate": 0.00018550642554276582, + "loss": 2.6362, + "step": 3553 + }, + { + "epoch": 0.28682107981599547, + "grad_norm": 0.8376575112342834, + "learning_rate": 0.00018549823862264812, + "loss": 2.744, + "step": 3554 + }, + { + "epoch": 0.2869017835525785, + "grad_norm": 0.712195098400116, + "learning_rate": 0.00018549004957169082, + "loss": 2.6715, + "step": 3555 + }, + { + "epoch": 0.2869824872891615, + "grad_norm": 0.7511523962020874, + "learning_rate": 0.00018548185839009805, + "loss": 2.7655, + "step": 3556 + }, + { + "epoch": 0.2870631910257445, + "grad_norm": 0.7397211790084839, + "learning_rate": 0.00018547366507807388, + "loss": 2.6813, + "step": 3557 + }, + { + "epoch": 0.2871438947623275, + "grad_norm": 0.6926341652870178, + "learning_rate": 0.00018546546963582253, + "loss": 2.6477, + "step": 3558 + }, + { + "epoch": 0.2872245984989105, + "grad_norm": 0.7776244878768921, + "learning_rate": 0.00018545727206354827, + "loss": 2.6979, + "step": 3559 + }, + { + "epoch": 0.2873053022354935, + "grad_norm": 0.7639400959014893, + "learning_rate": 0.00018544907236145542, + "loss": 2.6913, + "step": 3560 + }, + { + "epoch": 0.28738600597207653, + "grad_norm": 0.7738329768180847, + "learning_rate": 0.0001854408705297483, + "loss": 2.7231, + "step": 3561 + }, + { + "epoch": 0.2874667097086595, + "grad_norm": 0.7182422876358032, + "learning_rate": 0.00018543266656863137, + "loss": 2.718, + "step": 3562 + }, + { + "epoch": 0.28754741344524254, + "grad_norm": 0.7257261276245117, + "learning_rate": 0.00018542446047830903, + "loss": 2.7354, + "step": 3563 + }, + { + "epoch": 0.2876281171818255, + "grad_norm": 0.7761391997337341, + "learning_rate": 0.00018541625225898588, + "loss": 2.705, + "step": 3564 + }, + { + "epoch": 0.28770882091840855, + "grad_norm": 0.9272314310073853, + "learning_rate": 0.0001854080419108664, + "loss": 2.7278, + "step": 3565 + }, + { + "epoch": 0.2877895246549915, + "grad_norm": 0.7622589468955994, + "learning_rate": 0.00018539982943415527, + "loss": 2.7224, + "step": 3566 + }, + { + "epoch": 0.28787022839157456, + "grad_norm": 0.725349485874176, + "learning_rate": 0.0001853916148290572, + "loss": 2.6782, + "step": 3567 + }, + { + "epoch": 0.28795093212815753, + "grad_norm": 0.776242733001709, + "learning_rate": 0.0001853833980957768, + "loss": 2.6467, + "step": 3568 + }, + { + "epoch": 0.28803163586474057, + "grad_norm": 0.8461112976074219, + "learning_rate": 0.00018537517923451896, + "loss": 2.6763, + "step": 3569 + }, + { + "epoch": 0.28811233960132354, + "grad_norm": 0.8161221742630005, + "learning_rate": 0.00018536695824548848, + "loss": 2.7057, + "step": 3570 + }, + { + "epoch": 0.2881930433379065, + "grad_norm": 0.7404211759567261, + "learning_rate": 0.00018535873512889024, + "loss": 2.7083, + "step": 3571 + }, + { + "epoch": 0.28827374707448955, + "grad_norm": 0.831042468547821, + "learning_rate": 0.00018535050988492918, + "loss": 2.6121, + "step": 3572 + }, + { + "epoch": 0.2883544508110725, + "grad_norm": 0.7286352515220642, + "learning_rate": 0.00018534228251381035, + "loss": 2.7165, + "step": 3573 + }, + { + "epoch": 0.28843515454765556, + "grad_norm": 0.7951883673667908, + "learning_rate": 0.00018533405301573872, + "loss": 2.6794, + "step": 3574 + }, + { + "epoch": 0.28851585828423854, + "grad_norm": 0.7431079149246216, + "learning_rate": 0.00018532582139091944, + "loss": 2.6758, + "step": 3575 + }, + { + "epoch": 0.28859656202082157, + "grad_norm": 0.7408809065818787, + "learning_rate": 0.0001853175876395576, + "loss": 2.6901, + "step": 3576 + }, + { + "epoch": 0.28867726575740454, + "grad_norm": 0.7428708672523499, + "learning_rate": 0.00018530935176185848, + "loss": 2.6679, + "step": 3577 + }, + { + "epoch": 0.2887579694939876, + "grad_norm": 0.7670302987098694, + "learning_rate": 0.00018530111375802735, + "loss": 2.7306, + "step": 3578 + }, + { + "epoch": 0.28883867323057055, + "grad_norm": 0.7582474946975708, + "learning_rate": 0.00018529287362826943, + "loss": 2.7715, + "step": 3579 + }, + { + "epoch": 0.2889193769671536, + "grad_norm": 0.750973105430603, + "learning_rate": 0.0001852846313727902, + "loss": 2.7147, + "step": 3580 + }, + { + "epoch": 0.28900008070373656, + "grad_norm": 0.771854043006897, + "learning_rate": 0.00018527638699179498, + "loss": 2.6874, + "step": 3581 + }, + { + "epoch": 0.2890807844403196, + "grad_norm": 0.785469651222229, + "learning_rate": 0.00018526814048548928, + "loss": 2.6858, + "step": 3582 + }, + { + "epoch": 0.28916148817690257, + "grad_norm": 0.7601101398468018, + "learning_rate": 0.00018525989185407864, + "loss": 2.6927, + "step": 3583 + }, + { + "epoch": 0.2892421919134856, + "grad_norm": 0.7313411831855774, + "learning_rate": 0.00018525164109776861, + "loss": 2.6813, + "step": 3584 + }, + { + "epoch": 0.2893228956500686, + "grad_norm": 0.7471718192100525, + "learning_rate": 0.00018524338821676483, + "loss": 2.6791, + "step": 3585 + }, + { + "epoch": 0.2894035993866516, + "grad_norm": 0.7615204453468323, + "learning_rate": 0.00018523513321127302, + "loss": 2.7767, + "step": 3586 + }, + { + "epoch": 0.2894843031232346, + "grad_norm": 0.766793966293335, + "learning_rate": 0.00018522687608149886, + "loss": 2.664, + "step": 3587 + }, + { + "epoch": 0.2895650068598176, + "grad_norm": 0.7897932529449463, + "learning_rate": 0.00018521861682764816, + "loss": 2.7148, + "step": 3588 + }, + { + "epoch": 0.2896457105964006, + "grad_norm": 0.7366818785667419, + "learning_rate": 0.00018521035544992679, + "loss": 2.69, + "step": 3589 + }, + { + "epoch": 0.28972641433298363, + "grad_norm": 0.7503829598426819, + "learning_rate": 0.00018520209194854058, + "loss": 2.7141, + "step": 3590 + }, + { + "epoch": 0.2898071180695666, + "grad_norm": 0.8064351081848145, + "learning_rate": 0.00018519382632369556, + "loss": 2.6738, + "step": 3591 + }, + { + "epoch": 0.28988782180614964, + "grad_norm": 0.7364048361778259, + "learning_rate": 0.00018518555857559768, + "loss": 2.6731, + "step": 3592 + }, + { + "epoch": 0.2899685255427326, + "grad_norm": 0.7065430283546448, + "learning_rate": 0.00018517728870445297, + "loss": 2.7314, + "step": 3593 + }, + { + "epoch": 0.29004922927931565, + "grad_norm": 0.8233428001403809, + "learning_rate": 0.0001851690167104676, + "loss": 2.727, + "step": 3594 + }, + { + "epoch": 0.2901299330158986, + "grad_norm": 0.7563758492469788, + "learning_rate": 0.00018516074259384768, + "loss": 2.665, + "step": 3595 + }, + { + "epoch": 0.29021063675248165, + "grad_norm": 0.7451249361038208, + "learning_rate": 0.00018515246635479943, + "loss": 2.7686, + "step": 3596 + }, + { + "epoch": 0.29029134048906463, + "grad_norm": 0.7374305725097656, + "learning_rate": 0.00018514418799352918, + "loss": 2.6466, + "step": 3597 + }, + { + "epoch": 0.29037204422564766, + "grad_norm": 0.7596983909606934, + "learning_rate": 0.00018513590751024315, + "loss": 2.6763, + "step": 3598 + }, + { + "epoch": 0.29045274796223064, + "grad_norm": 0.7808190584182739, + "learning_rate": 0.0001851276249051478, + "loss": 2.7362, + "step": 3599 + }, + { + "epoch": 0.29053345169881367, + "grad_norm": 0.765785276889801, + "learning_rate": 0.00018511934017844948, + "loss": 2.7049, + "step": 3600 + }, + { + "epoch": 0.29061415543539665, + "grad_norm": 0.7503563165664673, + "learning_rate": 0.0001851110533303547, + "loss": 2.6262, + "step": 3601 + }, + { + "epoch": 0.2906948591719797, + "grad_norm": 0.7287782430648804, + "learning_rate": 0.00018510276436107, + "loss": 2.7076, + "step": 3602 + }, + { + "epoch": 0.29077556290856266, + "grad_norm": 0.7748721837997437, + "learning_rate": 0.00018509447327080193, + "loss": 2.6945, + "step": 3603 + }, + { + "epoch": 0.2908562666451457, + "grad_norm": 0.7482423186302185, + "learning_rate": 0.00018508618005975714, + "loss": 2.7326, + "step": 3604 + }, + { + "epoch": 0.29093697038172867, + "grad_norm": 0.7708765864372253, + "learning_rate": 0.00018507788472814238, + "loss": 2.7602, + "step": 3605 + }, + { + "epoch": 0.2910176741183117, + "grad_norm": 0.7308060526847839, + "learning_rate": 0.0001850695872761643, + "loss": 2.6735, + "step": 3606 + }, + { + "epoch": 0.2910983778548947, + "grad_norm": 0.7512951493263245, + "learning_rate": 0.00018506128770402972, + "loss": 2.6877, + "step": 3607 + }, + { + "epoch": 0.2911790815914777, + "grad_norm": 0.6806616187095642, + "learning_rate": 0.00018505298601194552, + "loss": 2.6689, + "step": 3608 + }, + { + "epoch": 0.2912597853280607, + "grad_norm": 0.7825661301612854, + "learning_rate": 0.00018504468220011857, + "loss": 2.7108, + "step": 3609 + }, + { + "epoch": 0.2913404890646437, + "grad_norm": 0.8243381977081299, + "learning_rate": 0.00018503637626875584, + "loss": 2.6789, + "step": 3610 + }, + { + "epoch": 0.2914211928012267, + "grad_norm": 0.745012640953064, + "learning_rate": 0.00018502806821806429, + "loss": 2.7658, + "step": 3611 + }, + { + "epoch": 0.2915018965378097, + "grad_norm": 0.7091341018676758, + "learning_rate": 0.00018501975804825104, + "loss": 2.7046, + "step": 3612 + }, + { + "epoch": 0.2915826002743927, + "grad_norm": 0.729026734828949, + "learning_rate": 0.0001850114457595232, + "loss": 2.6692, + "step": 3613 + }, + { + "epoch": 0.29166330401097573, + "grad_norm": 0.8098071813583374, + "learning_rate": 0.00018500313135208786, + "loss": 2.712, + "step": 3614 + }, + { + "epoch": 0.2917440077475587, + "grad_norm": 0.7387483716011047, + "learning_rate": 0.0001849948148261523, + "loss": 2.6705, + "step": 3615 + }, + { + "epoch": 0.29182471148414174, + "grad_norm": 0.7904576659202576, + "learning_rate": 0.0001849864961819238, + "loss": 2.5969, + "step": 3616 + }, + { + "epoch": 0.2919054152207247, + "grad_norm": 0.7560681700706482, + "learning_rate": 0.00018497817541960964, + "loss": 2.6971, + "step": 3617 + }, + { + "epoch": 0.29198611895730775, + "grad_norm": 0.8488430976867676, + "learning_rate": 0.00018496985253941723, + "loss": 2.7367, + "step": 3618 + }, + { + "epoch": 0.2920668226938907, + "grad_norm": 0.7641268372535706, + "learning_rate": 0.00018496152754155399, + "loss": 2.6948, + "step": 3619 + }, + { + "epoch": 0.29214752643047376, + "grad_norm": 0.7219721674919128, + "learning_rate": 0.00018495320042622736, + "loss": 2.7225, + "step": 3620 + }, + { + "epoch": 0.29222823016705674, + "grad_norm": 0.7583872675895691, + "learning_rate": 0.00018494487119364493, + "loss": 2.7335, + "step": 3621 + }, + { + "epoch": 0.2923089339036397, + "grad_norm": 0.7771418690681458, + "learning_rate": 0.00018493653984401424, + "loss": 2.6712, + "step": 3622 + }, + { + "epoch": 0.29238963764022274, + "grad_norm": 0.7537891268730164, + "learning_rate": 0.00018492820637754296, + "loss": 2.7282, + "step": 3623 + }, + { + "epoch": 0.2924703413768057, + "grad_norm": 0.7334226965904236, + "learning_rate": 0.00018491987079443875, + "loss": 2.7072, + "step": 3624 + }, + { + "epoch": 0.29255104511338875, + "grad_norm": 0.7768076658248901, + "learning_rate": 0.00018491153309490942, + "loss": 2.7176, + "step": 3625 + }, + { + "epoch": 0.29263174884997173, + "grad_norm": 0.6831281185150146, + "learning_rate": 0.0001849031932791627, + "loss": 2.6982, + "step": 3626 + }, + { + "epoch": 0.29271245258655476, + "grad_norm": 0.7150557637214661, + "learning_rate": 0.00018489485134740648, + "loss": 2.7325, + "step": 3627 + }, + { + "epoch": 0.29279315632313774, + "grad_norm": 0.782667338848114, + "learning_rate": 0.00018488650729984863, + "loss": 2.7146, + "step": 3628 + }, + { + "epoch": 0.29287386005972077, + "grad_norm": 0.7718524932861328, + "learning_rate": 0.0001848781611366971, + "loss": 2.746, + "step": 3629 + }, + { + "epoch": 0.29295456379630375, + "grad_norm": 0.7066439390182495, + "learning_rate": 0.00018486981285815998, + "loss": 2.7497, + "step": 3630 + }, + { + "epoch": 0.2930352675328868, + "grad_norm": 0.7705665826797485, + "learning_rate": 0.00018486146246444522, + "loss": 2.6448, + "step": 3631 + }, + { + "epoch": 0.29311597126946976, + "grad_norm": 0.7334863543510437, + "learning_rate": 0.000184853109955761, + "loss": 2.6931, + "step": 3632 + }, + { + "epoch": 0.2931966750060528, + "grad_norm": 0.7903133630752563, + "learning_rate": 0.0001848447553323155, + "loss": 2.6954, + "step": 3633 + }, + { + "epoch": 0.29327737874263576, + "grad_norm": 0.6821191310882568, + "learning_rate": 0.00018483639859431689, + "loss": 2.6165, + "step": 3634 + }, + { + "epoch": 0.2933580824792188, + "grad_norm": 0.7187811136245728, + "learning_rate": 0.00018482803974197344, + "loss": 2.6387, + "step": 3635 + }, + { + "epoch": 0.2934387862158018, + "grad_norm": 0.7429843544960022, + "learning_rate": 0.00018481967877549354, + "loss": 2.6848, + "step": 3636 + }, + { + "epoch": 0.2935194899523848, + "grad_norm": 0.7431524395942688, + "learning_rate": 0.0001848113156950855, + "loss": 2.7044, + "step": 3637 + }, + { + "epoch": 0.2936001936889678, + "grad_norm": 0.7008687853813171, + "learning_rate": 0.00018480295050095778, + "loss": 2.6922, + "step": 3638 + }, + { + "epoch": 0.2936808974255508, + "grad_norm": 0.7106652855873108, + "learning_rate": 0.00018479458319331884, + "loss": 2.6845, + "step": 3639 + }, + { + "epoch": 0.2937616011621338, + "grad_norm": 0.7288951873779297, + "learning_rate": 0.00018478621377237723, + "loss": 2.7017, + "step": 3640 + }, + { + "epoch": 0.2938423048987168, + "grad_norm": 0.7228607535362244, + "learning_rate": 0.00018477784223834155, + "loss": 2.7449, + "step": 3641 + }, + { + "epoch": 0.2939230086352998, + "grad_norm": 0.7180825471878052, + "learning_rate": 0.00018476946859142043, + "loss": 2.7291, + "step": 3642 + }, + { + "epoch": 0.29400371237188283, + "grad_norm": 0.7854947447776794, + "learning_rate": 0.00018476109283182258, + "loss": 2.7619, + "step": 3643 + }, + { + "epoch": 0.2940844161084658, + "grad_norm": 0.7871318459510803, + "learning_rate": 0.00018475271495975673, + "loss": 2.6695, + "step": 3644 + }, + { + "epoch": 0.29416511984504884, + "grad_norm": 0.7813127636909485, + "learning_rate": 0.00018474433497543165, + "loss": 2.735, + "step": 3645 + }, + { + "epoch": 0.2942458235816318, + "grad_norm": 0.7835291028022766, + "learning_rate": 0.00018473595287905623, + "loss": 2.7336, + "step": 3646 + }, + { + "epoch": 0.29432652731821485, + "grad_norm": 0.6970148682594299, + "learning_rate": 0.00018472756867083935, + "loss": 2.6912, + "step": 3647 + }, + { + "epoch": 0.2944072310547978, + "grad_norm": 0.7968462109565735, + "learning_rate": 0.00018471918235098998, + "loss": 2.6889, + "step": 3648 + }, + { + "epoch": 0.29448793479138086, + "grad_norm": 0.7011313438415527, + "learning_rate": 0.00018471079391971714, + "loss": 2.6989, + "step": 3649 + }, + { + "epoch": 0.29456863852796383, + "grad_norm": 0.8047335743904114, + "learning_rate": 0.00018470240337722991, + "loss": 2.6827, + "step": 3650 + }, + { + "epoch": 0.29464934226454687, + "grad_norm": 0.7446332573890686, + "learning_rate": 0.00018469401072373733, + "loss": 2.7089, + "step": 3651 + }, + { + "epoch": 0.29473004600112984, + "grad_norm": 0.7610359191894531, + "learning_rate": 0.00018468561595944862, + "loss": 2.6766, + "step": 3652 + }, + { + "epoch": 0.2948107497377129, + "grad_norm": 0.7705755233764648, + "learning_rate": 0.000184677219084573, + "loss": 2.7445, + "step": 3653 + }, + { + "epoch": 0.29489145347429585, + "grad_norm": 0.7466446757316589, + "learning_rate": 0.00018466882009931973, + "loss": 2.726, + "step": 3654 + }, + { + "epoch": 0.2949721572108789, + "grad_norm": 0.7912059426307678, + "learning_rate": 0.00018466041900389813, + "loss": 2.6865, + "step": 3655 + }, + { + "epoch": 0.29505286094746186, + "grad_norm": 0.722588837146759, + "learning_rate": 0.00018465201579851757, + "loss": 2.7039, + "step": 3656 + }, + { + "epoch": 0.2951335646840449, + "grad_norm": 0.739311933517456, + "learning_rate": 0.00018464361048338752, + "loss": 2.6991, + "step": 3657 + }, + { + "epoch": 0.29521426842062787, + "grad_norm": 0.7784128785133362, + "learning_rate": 0.00018463520305871743, + "loss": 2.753, + "step": 3658 + }, + { + "epoch": 0.2952949721572109, + "grad_norm": 0.8261777758598328, + "learning_rate": 0.00018462679352471682, + "loss": 2.7257, + "step": 3659 + }, + { + "epoch": 0.2953756758937939, + "grad_norm": 0.7510927319526672, + "learning_rate": 0.0001846183818815953, + "loss": 2.6981, + "step": 3660 + }, + { + "epoch": 0.2954563796303769, + "grad_norm": 0.7403035163879395, + "learning_rate": 0.00018460996812956254, + "loss": 2.744, + "step": 3661 + }, + { + "epoch": 0.2955370833669599, + "grad_norm": 0.7927733063697815, + "learning_rate": 0.00018460155226882817, + "loss": 2.6304, + "step": 3662 + }, + { + "epoch": 0.2956177871035429, + "grad_norm": 0.7923495769500732, + "learning_rate": 0.000184593134299602, + "loss": 2.7882, + "step": 3663 + }, + { + "epoch": 0.2956984908401259, + "grad_norm": 0.7639210224151611, + "learning_rate": 0.00018458471422209377, + "loss": 2.7171, + "step": 3664 + }, + { + "epoch": 0.2957791945767089, + "grad_norm": 0.736652672290802, + "learning_rate": 0.00018457629203651337, + "loss": 2.7479, + "step": 3665 + }, + { + "epoch": 0.2958598983132919, + "grad_norm": 0.7718610763549805, + "learning_rate": 0.00018456786774307066, + "loss": 2.7135, + "step": 3666 + }, + { + "epoch": 0.29594060204987493, + "grad_norm": 0.7711780071258545, + "learning_rate": 0.00018455944134197565, + "loss": 2.6867, + "step": 3667 + }, + { + "epoch": 0.2960213057864579, + "grad_norm": 0.7202491760253906, + "learning_rate": 0.0001845510128334383, + "loss": 2.6657, + "step": 3668 + }, + { + "epoch": 0.29610200952304094, + "grad_norm": 0.8155657649040222, + "learning_rate": 0.00018454258221766869, + "loss": 2.7342, + "step": 3669 + }, + { + "epoch": 0.2961827132596239, + "grad_norm": 0.7972069382667542, + "learning_rate": 0.00018453414949487696, + "loss": 2.7351, + "step": 3670 + }, + { + "epoch": 0.29626341699620695, + "grad_norm": 0.8645625710487366, + "learning_rate": 0.00018452571466527325, + "loss": 2.6778, + "step": 3671 + }, + { + "epoch": 0.29634412073278993, + "grad_norm": 0.7410334944725037, + "learning_rate": 0.00018451727772906775, + "loss": 2.7228, + "step": 3672 + }, + { + "epoch": 0.2964248244693729, + "grad_norm": 0.7845733165740967, + "learning_rate": 0.0001845088386864708, + "loss": 2.7068, + "step": 3673 + }, + { + "epoch": 0.29650552820595594, + "grad_norm": 0.7709881067276001, + "learning_rate": 0.00018450039753769266, + "loss": 2.676, + "step": 3674 + }, + { + "epoch": 0.2965862319425389, + "grad_norm": 0.7214749455451965, + "learning_rate": 0.00018449195428294371, + "loss": 2.6488, + "step": 3675 + }, + { + "epoch": 0.29666693567912195, + "grad_norm": 0.7467561960220337, + "learning_rate": 0.00018448350892243443, + "loss": 2.7262, + "step": 3676 + }, + { + "epoch": 0.2967476394157049, + "grad_norm": 0.8412678241729736, + "learning_rate": 0.00018447506145637522, + "loss": 2.7898, + "step": 3677 + }, + { + "epoch": 0.29682834315228795, + "grad_norm": 0.7130109071731567, + "learning_rate": 0.00018446661188497668, + "loss": 2.7344, + "step": 3678 + }, + { + "epoch": 0.29690904688887093, + "grad_norm": 0.7807374000549316, + "learning_rate": 0.00018445816020844937, + "loss": 2.7198, + "step": 3679 + }, + { + "epoch": 0.29698975062545396, + "grad_norm": 0.8497760891914368, + "learning_rate": 0.00018444970642700394, + "loss": 2.7479, + "step": 3680 + }, + { + "epoch": 0.29707045436203694, + "grad_norm": 0.6827178001403809, + "learning_rate": 0.0001844412505408511, + "loss": 2.727, + "step": 3681 + }, + { + "epoch": 0.29715115809861997, + "grad_norm": 0.8063304424285889, + "learning_rate": 0.00018443279255020152, + "loss": 2.7896, + "step": 3682 + }, + { + "epoch": 0.29723186183520295, + "grad_norm": 0.7759353518486023, + "learning_rate": 0.00018442433245526604, + "loss": 2.7014, + "step": 3683 + }, + { + "epoch": 0.297312565571786, + "grad_norm": 0.7380958199501038, + "learning_rate": 0.00018441587025625554, + "loss": 2.6665, + "step": 3684 + }, + { + "epoch": 0.29739326930836896, + "grad_norm": 0.7623556852340698, + "learning_rate": 0.00018440740595338087, + "loss": 2.6955, + "step": 3685 + }, + { + "epoch": 0.297473973044952, + "grad_norm": 0.8204537630081177, + "learning_rate": 0.000184398939546853, + "loss": 2.6854, + "step": 3686 + }, + { + "epoch": 0.29755467678153497, + "grad_norm": 0.7346726655960083, + "learning_rate": 0.00018439047103688293, + "loss": 2.6664, + "step": 3687 + }, + { + "epoch": 0.297635380518118, + "grad_norm": 0.777860701084137, + "learning_rate": 0.00018438200042368173, + "loss": 2.6423, + "step": 3688 + }, + { + "epoch": 0.297716084254701, + "grad_norm": 0.7331553101539612, + "learning_rate": 0.00018437352770746054, + "loss": 2.6137, + "step": 3689 + }, + { + "epoch": 0.297796787991284, + "grad_norm": 0.7634466290473938, + "learning_rate": 0.00018436505288843043, + "loss": 2.7266, + "step": 3690 + }, + { + "epoch": 0.297877491727867, + "grad_norm": 0.8151016235351562, + "learning_rate": 0.00018435657596680268, + "loss": 2.7373, + "step": 3691 + }, + { + "epoch": 0.29795819546445, + "grad_norm": 0.7806773781776428, + "learning_rate": 0.00018434809694278857, + "loss": 2.7011, + "step": 3692 + }, + { + "epoch": 0.298038899201033, + "grad_norm": 0.7575243711471558, + "learning_rate": 0.00018433961581659935, + "loss": 2.6601, + "step": 3693 + }, + { + "epoch": 0.298119602937616, + "grad_norm": 0.7527276873588562, + "learning_rate": 0.00018433113258844647, + "loss": 2.6864, + "step": 3694 + }, + { + "epoch": 0.298200306674199, + "grad_norm": 0.8024318218231201, + "learning_rate": 0.0001843226472585413, + "loss": 2.728, + "step": 3695 + }, + { + "epoch": 0.29828101041078203, + "grad_norm": 0.7549982666969299, + "learning_rate": 0.0001843141598270954, + "loss": 2.6834, + "step": 3696 + }, + { + "epoch": 0.298361714147365, + "grad_norm": 0.7699971199035645, + "learning_rate": 0.0001843056702943202, + "loss": 2.7209, + "step": 3697 + }, + { + "epoch": 0.29844241788394804, + "grad_norm": 0.823842465877533, + "learning_rate": 0.0001842971786604273, + "loss": 2.6924, + "step": 3698 + }, + { + "epoch": 0.298523121620531, + "grad_norm": 0.7645791172981262, + "learning_rate": 0.00018428868492562837, + "loss": 2.6821, + "step": 3699 + }, + { + "epoch": 0.29860382535711405, + "grad_norm": 0.7530989050865173, + "learning_rate": 0.00018428018909013506, + "loss": 2.7592, + "step": 3700 + }, + { + "epoch": 0.298684529093697, + "grad_norm": 0.7958168387413025, + "learning_rate": 0.00018427169115415914, + "loss": 2.6925, + "step": 3701 + }, + { + "epoch": 0.29876523283028006, + "grad_norm": 0.7777522802352905, + "learning_rate": 0.00018426319111791242, + "loss": 2.6757, + "step": 3702 + }, + { + "epoch": 0.29884593656686304, + "grad_norm": 0.7418079972267151, + "learning_rate": 0.00018425468898160667, + "loss": 2.6445, + "step": 3703 + }, + { + "epoch": 0.29892664030344607, + "grad_norm": 0.7591132521629333, + "learning_rate": 0.00018424618474545382, + "loss": 2.7157, + "step": 3704 + }, + { + "epoch": 0.29900734404002904, + "grad_norm": 0.7591627836227417, + "learning_rate": 0.00018423767840966586, + "loss": 2.6691, + "step": 3705 + }, + { + "epoch": 0.2990880477766121, + "grad_norm": 0.7934779524803162, + "learning_rate": 0.00018422916997445476, + "loss": 2.7262, + "step": 3706 + }, + { + "epoch": 0.29916875151319505, + "grad_norm": 0.7964254021644592, + "learning_rate": 0.00018422065944003252, + "loss": 2.6196, + "step": 3707 + }, + { + "epoch": 0.2992494552497781, + "grad_norm": 0.7448374032974243, + "learning_rate": 0.0001842121468066113, + "loss": 2.6732, + "step": 3708 + }, + { + "epoch": 0.29933015898636106, + "grad_norm": 0.7813000679016113, + "learning_rate": 0.00018420363207440329, + "loss": 2.6978, + "step": 3709 + }, + { + "epoch": 0.2994108627229441, + "grad_norm": 0.7760851979255676, + "learning_rate": 0.00018419511524362064, + "loss": 2.7466, + "step": 3710 + }, + { + "epoch": 0.29949156645952707, + "grad_norm": 0.7786797881126404, + "learning_rate": 0.00018418659631447564, + "loss": 2.7044, + "step": 3711 + }, + { + "epoch": 0.2995722701961101, + "grad_norm": 0.7860158085823059, + "learning_rate": 0.00018417807528718055, + "loss": 2.6587, + "step": 3712 + }, + { + "epoch": 0.2996529739326931, + "grad_norm": 0.8327339291572571, + "learning_rate": 0.0001841695521619478, + "loss": 2.7112, + "step": 3713 + }, + { + "epoch": 0.2997336776692761, + "grad_norm": 0.7535735368728638, + "learning_rate": 0.00018416102693898982, + "loss": 2.726, + "step": 3714 + }, + { + "epoch": 0.2998143814058591, + "grad_norm": 0.7781090140342712, + "learning_rate": 0.000184152499618519, + "loss": 2.7238, + "step": 3715 + }, + { + "epoch": 0.2998950851424421, + "grad_norm": 0.7700545191764832, + "learning_rate": 0.00018414397020074795, + "loss": 2.7081, + "step": 3716 + }, + { + "epoch": 0.2999757888790251, + "grad_norm": 0.7578303217887878, + "learning_rate": 0.0001841354386858892, + "loss": 2.6591, + "step": 3717 + }, + { + "epoch": 0.30005649261560813, + "grad_norm": 0.7506501078605652, + "learning_rate": 0.00018412690507415538, + "loss": 2.6551, + "step": 3718 + }, + { + "epoch": 0.3001371963521911, + "grad_norm": 0.7869547009468079, + "learning_rate": 0.00018411836936575918, + "loss": 2.7169, + "step": 3719 + }, + { + "epoch": 0.30021790008877414, + "grad_norm": 0.7547428607940674, + "learning_rate": 0.00018410983156091332, + "loss": 2.7498, + "step": 3720 + }, + { + "epoch": 0.3002986038253571, + "grad_norm": 0.7829383015632629, + "learning_rate": 0.0001841012916598306, + "loss": 2.6885, + "step": 3721 + }, + { + "epoch": 0.30037930756194015, + "grad_norm": 0.8469082117080688, + "learning_rate": 0.00018409274966272386, + "loss": 2.7594, + "step": 3722 + }, + { + "epoch": 0.3004600112985231, + "grad_norm": 0.7690171599388123, + "learning_rate": 0.00018408420556980596, + "loss": 2.7892, + "step": 3723 + }, + { + "epoch": 0.3005407150351061, + "grad_norm": 0.7295899987220764, + "learning_rate": 0.00018407565938128987, + "loss": 2.7023, + "step": 3724 + }, + { + "epoch": 0.30062141877168913, + "grad_norm": 0.7249528169631958, + "learning_rate": 0.00018406711109738856, + "loss": 2.7135, + "step": 3725 + }, + { + "epoch": 0.3007021225082721, + "grad_norm": 0.7237234711647034, + "learning_rate": 0.0001840585607183151, + "loss": 2.6117, + "step": 3726 + }, + { + "epoch": 0.30078282624485514, + "grad_norm": 0.7426557540893555, + "learning_rate": 0.00018405000824428256, + "loss": 2.7202, + "step": 3727 + }, + { + "epoch": 0.3008635299814381, + "grad_norm": 0.7572938799858093, + "learning_rate": 0.00018404145367550414, + "loss": 2.7373, + "step": 3728 + }, + { + "epoch": 0.30094423371802115, + "grad_norm": 0.7198675274848938, + "learning_rate": 0.00018403289701219295, + "loss": 2.6675, + "step": 3729 + }, + { + "epoch": 0.3010249374546041, + "grad_norm": 0.722532331943512, + "learning_rate": 0.00018402433825456235, + "loss": 2.6933, + "step": 3730 + }, + { + "epoch": 0.30110564119118716, + "grad_norm": 0.7621530890464783, + "learning_rate": 0.0001840157774028256, + "loss": 2.6951, + "step": 3731 + }, + { + "epoch": 0.30118634492777013, + "grad_norm": 0.7435615062713623, + "learning_rate": 0.00018400721445719604, + "loss": 2.7323, + "step": 3732 + }, + { + "epoch": 0.30126704866435317, + "grad_norm": 0.7233619689941406, + "learning_rate": 0.00018399864941788708, + "loss": 2.6789, + "step": 3733 + }, + { + "epoch": 0.30134775240093614, + "grad_norm": 0.7421496510505676, + "learning_rate": 0.00018399008228511224, + "loss": 2.72, + "step": 3734 + }, + { + "epoch": 0.3014284561375192, + "grad_norm": 0.7250909805297852, + "learning_rate": 0.000183981513059085, + "loss": 2.6717, + "step": 3735 + }, + { + "epoch": 0.30150915987410215, + "grad_norm": 0.7642899751663208, + "learning_rate": 0.0001839729417400189, + "loss": 2.6823, + "step": 3736 + }, + { + "epoch": 0.3015898636106852, + "grad_norm": 0.7434508204460144, + "learning_rate": 0.00018396436832812758, + "loss": 2.6441, + "step": 3737 + }, + { + "epoch": 0.30167056734726816, + "grad_norm": 0.7163311839103699, + "learning_rate": 0.00018395579282362473, + "loss": 2.6736, + "step": 3738 + }, + { + "epoch": 0.3017512710838512, + "grad_norm": 0.6936792731285095, + "learning_rate": 0.00018394721522672404, + "loss": 2.6792, + "step": 3739 + }, + { + "epoch": 0.30183197482043417, + "grad_norm": 0.7791975736618042, + "learning_rate": 0.0001839386355376393, + "loss": 2.653, + "step": 3740 + }, + { + "epoch": 0.3019126785570172, + "grad_norm": 0.7902694940567017, + "learning_rate": 0.00018393005375658437, + "loss": 2.7448, + "step": 3741 + }, + { + "epoch": 0.3019933822936002, + "grad_norm": 0.7405624389648438, + "learning_rate": 0.0001839214698837731, + "loss": 2.6977, + "step": 3742 + }, + { + "epoch": 0.3020740860301832, + "grad_norm": 0.8033632040023804, + "learning_rate": 0.00018391288391941943, + "loss": 2.7468, + "step": 3743 + }, + { + "epoch": 0.3021547897667662, + "grad_norm": 0.8148884177207947, + "learning_rate": 0.00018390429586373735, + "loss": 2.6992, + "step": 3744 + }, + { + "epoch": 0.3022354935033492, + "grad_norm": 0.7633625268936157, + "learning_rate": 0.00018389570571694089, + "loss": 2.6604, + "step": 3745 + }, + { + "epoch": 0.3023161972399322, + "grad_norm": 0.8687180876731873, + "learning_rate": 0.00018388711347924413, + "loss": 2.6808, + "step": 3746 + }, + { + "epoch": 0.3023969009765152, + "grad_norm": 0.6974104046821594, + "learning_rate": 0.0001838785191508612, + "loss": 2.7613, + "step": 3747 + }, + { + "epoch": 0.3024776047130982, + "grad_norm": 0.7919288873672485, + "learning_rate": 0.00018386992273200633, + "loss": 2.664, + "step": 3748 + }, + { + "epoch": 0.30255830844968123, + "grad_norm": 0.7708829045295715, + "learning_rate": 0.00018386132422289374, + "loss": 2.7703, + "step": 3749 + }, + { + "epoch": 0.3026390121862642, + "grad_norm": 0.7099813222885132, + "learning_rate": 0.00018385272362373775, + "loss": 2.6485, + "step": 3750 + }, + { + "epoch": 0.30271971592284724, + "grad_norm": 0.7629622220993042, + "learning_rate": 0.0001838441209347527, + "loss": 2.7339, + "step": 3751 + }, + { + "epoch": 0.3028004196594302, + "grad_norm": 0.727275550365448, + "learning_rate": 0.00018383551615615295, + "loss": 2.7194, + "step": 3752 + }, + { + "epoch": 0.30288112339601325, + "grad_norm": 0.7158832550048828, + "learning_rate": 0.00018382690928815302, + "loss": 2.6698, + "step": 3753 + }, + { + "epoch": 0.30296182713259623, + "grad_norm": 0.8075565099716187, + "learning_rate": 0.00018381830033096735, + "loss": 2.7198, + "step": 3754 + }, + { + "epoch": 0.30304253086917926, + "grad_norm": 0.7949094176292419, + "learning_rate": 0.00018380968928481057, + "loss": 2.7048, + "step": 3755 + }, + { + "epoch": 0.30312323460576224, + "grad_norm": 0.7009503841400146, + "learning_rate": 0.00018380107614989724, + "loss": 2.709, + "step": 3756 + }, + { + "epoch": 0.30320393834234527, + "grad_norm": 0.668574869632721, + "learning_rate": 0.00018379246092644204, + "loss": 2.6515, + "step": 3757 + }, + { + "epoch": 0.30328464207892825, + "grad_norm": 0.7470806241035461, + "learning_rate": 0.00018378384361465968, + "loss": 2.7577, + "step": 3758 + }, + { + "epoch": 0.3033653458155113, + "grad_norm": 0.7529913783073425, + "learning_rate": 0.0001837752242147649, + "loss": 2.7189, + "step": 3759 + }, + { + "epoch": 0.30344604955209425, + "grad_norm": 0.7373302578926086, + "learning_rate": 0.00018376660272697258, + "loss": 2.7197, + "step": 3760 + }, + { + "epoch": 0.3035267532886773, + "grad_norm": 0.7650466561317444, + "learning_rate": 0.0001837579791514975, + "loss": 2.6613, + "step": 3761 + }, + { + "epoch": 0.30360745702526026, + "grad_norm": 0.775209903717041, + "learning_rate": 0.00018374935348855468, + "loss": 2.6454, + "step": 3762 + }, + { + "epoch": 0.3036881607618433, + "grad_norm": 0.7049290537834167, + "learning_rate": 0.00018374072573835903, + "loss": 2.6663, + "step": 3763 + }, + { + "epoch": 0.30376886449842627, + "grad_norm": 0.7060630917549133, + "learning_rate": 0.0001837320959011256, + "loss": 2.6908, + "step": 3764 + }, + { + "epoch": 0.3038495682350093, + "grad_norm": 0.7561464905738831, + "learning_rate": 0.00018372346397706944, + "loss": 2.673, + "step": 3765 + }, + { + "epoch": 0.3039302719715923, + "grad_norm": 0.7293568849563599, + "learning_rate": 0.0001837148299664057, + "loss": 2.6431, + "step": 3766 + }, + { + "epoch": 0.3040109757081753, + "grad_norm": 0.8460379838943481, + "learning_rate": 0.00018370619386934962, + "loss": 2.7493, + "step": 3767 + }, + { + "epoch": 0.3040916794447583, + "grad_norm": 0.8136082291603088, + "learning_rate": 0.00018369755568611632, + "loss": 2.7298, + "step": 3768 + }, + { + "epoch": 0.3041723831813413, + "grad_norm": 0.6916636824607849, + "learning_rate": 0.00018368891541692116, + "loss": 2.7173, + "step": 3769 + }, + { + "epoch": 0.3042530869179243, + "grad_norm": 0.7547643780708313, + "learning_rate": 0.0001836802730619795, + "loss": 2.6343, + "step": 3770 + }, + { + "epoch": 0.30433379065450733, + "grad_norm": 0.7439205050468445, + "learning_rate": 0.00018367162862150665, + "loss": 2.6627, + "step": 3771 + }, + { + "epoch": 0.3044144943910903, + "grad_norm": 0.7781087756156921, + "learning_rate": 0.0001836629820957181, + "loss": 2.7223, + "step": 3772 + }, + { + "epoch": 0.30449519812767334, + "grad_norm": 0.7876880764961243, + "learning_rate": 0.00018365433348482935, + "loss": 2.7139, + "step": 3773 + }, + { + "epoch": 0.3045759018642563, + "grad_norm": 0.7571346163749695, + "learning_rate": 0.00018364568278905595, + "loss": 2.6939, + "step": 3774 + }, + { + "epoch": 0.3046566056008393, + "grad_norm": 0.9011813402175903, + "learning_rate": 0.00018363703000861346, + "loss": 2.7516, + "step": 3775 + }, + { + "epoch": 0.3047373093374223, + "grad_norm": 0.7809761762619019, + "learning_rate": 0.00018362837514371755, + "loss": 2.7587, + "step": 3776 + }, + { + "epoch": 0.3048180130740053, + "grad_norm": 0.7486867308616638, + "learning_rate": 0.00018361971819458393, + "loss": 2.6617, + "step": 3777 + }, + { + "epoch": 0.30489871681058833, + "grad_norm": 0.7434267401695251, + "learning_rate": 0.00018361105916142836, + "loss": 2.7328, + "step": 3778 + }, + { + "epoch": 0.3049794205471713, + "grad_norm": 0.7895822525024414, + "learning_rate": 0.0001836023980444666, + "loss": 2.7038, + "step": 3779 + }, + { + "epoch": 0.30506012428375434, + "grad_norm": 0.7329267263412476, + "learning_rate": 0.00018359373484391458, + "loss": 2.6533, + "step": 3780 + }, + { + "epoch": 0.3051408280203373, + "grad_norm": 0.7578477263450623, + "learning_rate": 0.00018358506955998817, + "loss": 2.723, + "step": 3781 + }, + { + "epoch": 0.30522153175692035, + "grad_norm": 0.7174215316772461, + "learning_rate": 0.0001835764021929033, + "loss": 2.7665, + "step": 3782 + }, + { + "epoch": 0.3053022354935033, + "grad_norm": 0.7261673808097839, + "learning_rate": 0.00018356773274287605, + "loss": 2.7239, + "step": 3783 + }, + { + "epoch": 0.30538293923008636, + "grad_norm": 0.7550768852233887, + "learning_rate": 0.00018355906121012244, + "loss": 2.6952, + "step": 3784 + }, + { + "epoch": 0.30546364296666934, + "grad_norm": 0.7805373668670654, + "learning_rate": 0.0001835503875948586, + "loss": 2.6453, + "step": 3785 + }, + { + "epoch": 0.30554434670325237, + "grad_norm": 0.7753674983978271, + "learning_rate": 0.0001835417118973007, + "loss": 2.7188, + "step": 3786 + }, + { + "epoch": 0.30562505043983534, + "grad_norm": 0.719774603843689, + "learning_rate": 0.00018353303411766496, + "loss": 2.69, + "step": 3787 + }, + { + "epoch": 0.3057057541764184, + "grad_norm": 0.786780059337616, + "learning_rate": 0.00018352435425616763, + "loss": 2.7015, + "step": 3788 + }, + { + "epoch": 0.30578645791300135, + "grad_norm": 0.7481613159179688, + "learning_rate": 0.00018351567231302508, + "loss": 2.6267, + "step": 3789 + }, + { + "epoch": 0.3058671616495844, + "grad_norm": 0.8138384222984314, + "learning_rate": 0.00018350698828845365, + "loss": 2.7301, + "step": 3790 + }, + { + "epoch": 0.30594786538616736, + "grad_norm": 0.7911081314086914, + "learning_rate": 0.00018349830218266982, + "loss": 2.6661, + "step": 3791 + }, + { + "epoch": 0.3060285691227504, + "grad_norm": 0.763179361820221, + "learning_rate": 0.00018348961399588997, + "loss": 2.6509, + "step": 3792 + }, + { + "epoch": 0.30610927285933337, + "grad_norm": 0.8214982748031616, + "learning_rate": 0.00018348092372833072, + "loss": 2.6951, + "step": 3793 + }, + { + "epoch": 0.3061899765959164, + "grad_norm": 0.7271003127098083, + "learning_rate": 0.00018347223138020865, + "loss": 2.7227, + "step": 3794 + }, + { + "epoch": 0.3062706803324994, + "grad_norm": 0.7727730870246887, + "learning_rate": 0.00018346353695174037, + "loss": 2.721, + "step": 3795 + }, + { + "epoch": 0.3063513840690824, + "grad_norm": 0.844895601272583, + "learning_rate": 0.00018345484044314257, + "loss": 2.6757, + "step": 3796 + }, + { + "epoch": 0.3064320878056654, + "grad_norm": 0.7409898638725281, + "learning_rate": 0.00018344614185463197, + "loss": 2.6798, + "step": 3797 + }, + { + "epoch": 0.3065127915422484, + "grad_norm": 0.8284425139427185, + "learning_rate": 0.00018343744118642542, + "loss": 2.7573, + "step": 3798 + }, + { + "epoch": 0.3065934952788314, + "grad_norm": 0.7535427808761597, + "learning_rate": 0.00018342873843873973, + "loss": 2.7026, + "step": 3799 + }, + { + "epoch": 0.30667419901541443, + "grad_norm": 0.8013898730278015, + "learning_rate": 0.00018342003361179176, + "loss": 2.7331, + "step": 3800 + }, + { + "epoch": 0.3067549027519974, + "grad_norm": 0.7458386421203613, + "learning_rate": 0.0001834113267057985, + "loss": 2.6976, + "step": 3801 + }, + { + "epoch": 0.30683560648858044, + "grad_norm": 0.8333673477172852, + "learning_rate": 0.00018340261772097695, + "loss": 2.7064, + "step": 3802 + }, + { + "epoch": 0.3069163102251634, + "grad_norm": 0.7273485064506531, + "learning_rate": 0.00018339390665754414, + "loss": 2.6619, + "step": 3803 + }, + { + "epoch": 0.30699701396174645, + "grad_norm": 0.8199014067649841, + "learning_rate": 0.0001833851935157172, + "loss": 2.654, + "step": 3804 + }, + { + "epoch": 0.3070777176983294, + "grad_norm": 0.780197024345398, + "learning_rate": 0.00018337647829571324, + "loss": 2.6814, + "step": 3805 + }, + { + "epoch": 0.30715842143491245, + "grad_norm": 0.7214049100875854, + "learning_rate": 0.0001833677609977495, + "loss": 2.709, + "step": 3806 + }, + { + "epoch": 0.30723912517149543, + "grad_norm": 0.7680457830429077, + "learning_rate": 0.00018335904162204326, + "loss": 2.6628, + "step": 3807 + }, + { + "epoch": 0.30731982890807846, + "grad_norm": 0.760728120803833, + "learning_rate": 0.00018335032016881178, + "loss": 2.7005, + "step": 3808 + }, + { + "epoch": 0.30740053264466144, + "grad_norm": 0.7631687521934509, + "learning_rate": 0.00018334159663827243, + "loss": 2.7012, + "step": 3809 + }, + { + "epoch": 0.30748123638124447, + "grad_norm": 0.7515785694122314, + "learning_rate": 0.00018333287103064266, + "loss": 2.7062, + "step": 3810 + }, + { + "epoch": 0.30756194011782745, + "grad_norm": 0.804500162601471, + "learning_rate": 0.00018332414334613987, + "loss": 2.7888, + "step": 3811 + }, + { + "epoch": 0.3076426438544105, + "grad_norm": 0.7551451325416565, + "learning_rate": 0.00018331541358498164, + "loss": 2.6345, + "step": 3812 + }, + { + "epoch": 0.30772334759099346, + "grad_norm": 0.7342958450317383, + "learning_rate": 0.0001833066817473855, + "loss": 2.6601, + "step": 3813 + }, + { + "epoch": 0.3078040513275765, + "grad_norm": 0.8059296607971191, + "learning_rate": 0.0001832979478335691, + "loss": 2.7694, + "step": 3814 + }, + { + "epoch": 0.30788475506415947, + "grad_norm": 0.7037352919578552, + "learning_rate": 0.0001832892118437501, + "loss": 2.6788, + "step": 3815 + }, + { + "epoch": 0.3079654588007425, + "grad_norm": 0.759509801864624, + "learning_rate": 0.0001832804737781462, + "loss": 2.7115, + "step": 3816 + }, + { + "epoch": 0.3080461625373255, + "grad_norm": 0.7911720871925354, + "learning_rate": 0.00018327173363697524, + "loss": 2.6676, + "step": 3817 + }, + { + "epoch": 0.3081268662739085, + "grad_norm": 0.7592991590499878, + "learning_rate": 0.00018326299142045496, + "loss": 2.7245, + "step": 3818 + }, + { + "epoch": 0.3082075700104915, + "grad_norm": 0.7620227932929993, + "learning_rate": 0.00018325424712880333, + "loss": 2.7224, + "step": 3819 + }, + { + "epoch": 0.3082882737470745, + "grad_norm": 0.7834638953208923, + "learning_rate": 0.0001832455007622382, + "loss": 2.7469, + "step": 3820 + }, + { + "epoch": 0.3083689774836575, + "grad_norm": 0.7765992879867554, + "learning_rate": 0.00018323675232097757, + "loss": 2.7193, + "step": 3821 + }, + { + "epoch": 0.3084496812202405, + "grad_norm": 0.7334728837013245, + "learning_rate": 0.00018322800180523949, + "loss": 2.667, + "step": 3822 + }, + { + "epoch": 0.3085303849568235, + "grad_norm": 0.7674607634544373, + "learning_rate": 0.00018321924921524207, + "loss": 2.6479, + "step": 3823 + }, + { + "epoch": 0.30861108869340653, + "grad_norm": 0.7616469860076904, + "learning_rate": 0.0001832104945512034, + "loss": 2.6535, + "step": 3824 + }, + { + "epoch": 0.3086917924299895, + "grad_norm": 0.7693164944648743, + "learning_rate": 0.00018320173781334172, + "loss": 2.7616, + "step": 3825 + }, + { + "epoch": 0.3087724961665725, + "grad_norm": 0.7099221348762512, + "learning_rate": 0.0001831929790018752, + "loss": 2.6729, + "step": 3826 + }, + { + "epoch": 0.3088531999031555, + "grad_norm": 0.7389346957206726, + "learning_rate": 0.00018318421811702222, + "loss": 2.6396, + "step": 3827 + }, + { + "epoch": 0.3089339036397385, + "grad_norm": 0.8302628397941589, + "learning_rate": 0.00018317545515900106, + "loss": 2.6786, + "step": 3828 + }, + { + "epoch": 0.3090146073763215, + "grad_norm": 0.7441998720169067, + "learning_rate": 0.00018316669012803015, + "loss": 2.6769, + "step": 3829 + }, + { + "epoch": 0.3090953111129045, + "grad_norm": 0.8454675674438477, + "learning_rate": 0.00018315792302432788, + "loss": 2.7275, + "step": 3830 + }, + { + "epoch": 0.30917601484948753, + "grad_norm": 0.8129739761352539, + "learning_rate": 0.00018314915384811282, + "loss": 2.7603, + "step": 3831 + }, + { + "epoch": 0.3092567185860705, + "grad_norm": 0.7525617480278015, + "learning_rate": 0.00018314038259960349, + "loss": 2.7156, + "step": 3832 + }, + { + "epoch": 0.30933742232265354, + "grad_norm": 0.7319022417068481, + "learning_rate": 0.0001831316092790185, + "loss": 2.676, + "step": 3833 + }, + { + "epoch": 0.3094181260592365, + "grad_norm": 0.7767768502235413, + "learning_rate": 0.00018312283388657646, + "loss": 2.7022, + "step": 3834 + }, + { + "epoch": 0.30949882979581955, + "grad_norm": 0.709293007850647, + "learning_rate": 0.00018311405642249616, + "loss": 2.6241, + "step": 3835 + }, + { + "epoch": 0.30957953353240253, + "grad_norm": 0.715360701084137, + "learning_rate": 0.0001831052768869963, + "loss": 2.6777, + "step": 3836 + }, + { + "epoch": 0.30966023726898556, + "grad_norm": 0.7361319065093994, + "learning_rate": 0.0001830964952802957, + "loss": 2.6539, + "step": 3837 + }, + { + "epoch": 0.30974094100556854, + "grad_norm": 0.7243087291717529, + "learning_rate": 0.0001830877116026132, + "loss": 2.7506, + "step": 3838 + }, + { + "epoch": 0.30982164474215157, + "grad_norm": 0.7361106872558594, + "learning_rate": 0.00018307892585416776, + "loss": 2.697, + "step": 3839 + }, + { + "epoch": 0.30990234847873455, + "grad_norm": 0.7541893720626831, + "learning_rate": 0.00018307013803517833, + "loss": 2.694, + "step": 3840 + }, + { + "epoch": 0.3099830522153176, + "grad_norm": 0.7235575914382935, + "learning_rate": 0.00018306134814586388, + "loss": 2.6711, + "step": 3841 + }, + { + "epoch": 0.31006375595190055, + "grad_norm": 0.7868196368217468, + "learning_rate": 0.00018305255618644354, + "loss": 2.7177, + "step": 3842 + }, + { + "epoch": 0.3101444596884836, + "grad_norm": 0.8074443340301514, + "learning_rate": 0.00018304376215713637, + "loss": 2.7293, + "step": 3843 + }, + { + "epoch": 0.31022516342506656, + "grad_norm": 0.6993385553359985, + "learning_rate": 0.00018303496605816158, + "loss": 2.6942, + "step": 3844 + }, + { + "epoch": 0.3103058671616496, + "grad_norm": 0.7272824645042419, + "learning_rate": 0.00018302616788973839, + "loss": 2.7093, + "step": 3845 + }, + { + "epoch": 0.31038657089823257, + "grad_norm": 0.7496963143348694, + "learning_rate": 0.00018301736765208605, + "loss": 2.7096, + "step": 3846 + }, + { + "epoch": 0.3104672746348156, + "grad_norm": 0.7407644987106323, + "learning_rate": 0.00018300856534542387, + "loss": 2.6956, + "step": 3847 + }, + { + "epoch": 0.3105479783713986, + "grad_norm": 0.742382287979126, + "learning_rate": 0.00018299976096997132, + "loss": 2.6744, + "step": 3848 + }, + { + "epoch": 0.3106286821079816, + "grad_norm": 0.7314567565917969, + "learning_rate": 0.0001829909545259477, + "loss": 2.7544, + "step": 3849 + }, + { + "epoch": 0.3107093858445646, + "grad_norm": 0.7550896406173706, + "learning_rate": 0.0001829821460135726, + "loss": 2.714, + "step": 3850 + }, + { + "epoch": 0.3107900895811476, + "grad_norm": 0.7496031522750854, + "learning_rate": 0.00018297333543306548, + "loss": 2.6718, + "step": 3851 + }, + { + "epoch": 0.3108707933177306, + "grad_norm": 0.7600073218345642, + "learning_rate": 0.00018296452278464596, + "loss": 2.7141, + "step": 3852 + }, + { + "epoch": 0.31095149705431363, + "grad_norm": 0.7242388129234314, + "learning_rate": 0.00018295570806853366, + "loss": 2.7407, + "step": 3853 + }, + { + "epoch": 0.3110322007908966, + "grad_norm": 0.723874568939209, + "learning_rate": 0.00018294689128494824, + "loss": 2.7253, + "step": 3854 + }, + { + "epoch": 0.31111290452747964, + "grad_norm": 0.7902834415435791, + "learning_rate": 0.00018293807243410947, + "loss": 2.7118, + "step": 3855 + }, + { + "epoch": 0.3111936082640626, + "grad_norm": 0.7676794528961182, + "learning_rate": 0.00018292925151623717, + "loss": 2.684, + "step": 3856 + }, + { + "epoch": 0.31127431200064565, + "grad_norm": 0.767431378364563, + "learning_rate": 0.0001829204285315511, + "loss": 2.6936, + "step": 3857 + }, + { + "epoch": 0.3113550157372286, + "grad_norm": 0.7802234888076782, + "learning_rate": 0.00018291160348027122, + "loss": 2.7181, + "step": 3858 + }, + { + "epoch": 0.31143571947381166, + "grad_norm": 0.7823610305786133, + "learning_rate": 0.00018290277636261743, + "loss": 2.7014, + "step": 3859 + }, + { + "epoch": 0.31151642321039463, + "grad_norm": 0.8199869394302368, + "learning_rate": 0.00018289394717880978, + "loss": 2.73, + "step": 3860 + }, + { + "epoch": 0.31159712694697766, + "grad_norm": 0.7725761532783508, + "learning_rate": 0.00018288511592906822, + "loss": 2.6978, + "step": 3861 + }, + { + "epoch": 0.31167783068356064, + "grad_norm": 0.752034068107605, + "learning_rate": 0.00018287628261361296, + "loss": 2.6635, + "step": 3862 + }, + { + "epoch": 0.3117585344201437, + "grad_norm": 0.7961714267730713, + "learning_rate": 0.0001828674472326641, + "loss": 2.7047, + "step": 3863 + }, + { + "epoch": 0.31183923815672665, + "grad_norm": 0.7413069605827332, + "learning_rate": 0.00018285860978644182, + "loss": 2.6872, + "step": 3864 + }, + { + "epoch": 0.3119199418933097, + "grad_norm": 0.8943146467208862, + "learning_rate": 0.00018284977027516636, + "loss": 2.7611, + "step": 3865 + }, + { + "epoch": 0.31200064562989266, + "grad_norm": 0.7663856744766235, + "learning_rate": 0.0001828409286990581, + "loss": 2.7541, + "step": 3866 + }, + { + "epoch": 0.3120813493664757, + "grad_norm": 0.7557348608970642, + "learning_rate": 0.00018283208505833731, + "loss": 2.6633, + "step": 3867 + }, + { + "epoch": 0.31216205310305867, + "grad_norm": 0.7690094113349915, + "learning_rate": 0.00018282323935322445, + "loss": 2.7117, + "step": 3868 + }, + { + "epoch": 0.3122427568396417, + "grad_norm": 0.8059033751487732, + "learning_rate": 0.00018281439158393997, + "loss": 2.6743, + "step": 3869 + }, + { + "epoch": 0.3123234605762247, + "grad_norm": 0.7877150774002075, + "learning_rate": 0.00018280554175070438, + "loss": 2.6546, + "step": 3870 + }, + { + "epoch": 0.3124041643128077, + "grad_norm": 0.799670934677124, + "learning_rate": 0.0001827966898537382, + "loss": 2.7184, + "step": 3871 + }, + { + "epoch": 0.3124848680493907, + "grad_norm": 0.8353915214538574, + "learning_rate": 0.0001827878358932621, + "loss": 2.7235, + "step": 3872 + }, + { + "epoch": 0.3125655717859737, + "grad_norm": 0.7954776883125305, + "learning_rate": 0.00018277897986949672, + "loss": 2.5992, + "step": 3873 + }, + { + "epoch": 0.3126462755225567, + "grad_norm": 0.7959856986999512, + "learning_rate": 0.00018277012178266277, + "loss": 2.6877, + "step": 3874 + }, + { + "epoch": 0.3127269792591397, + "grad_norm": 0.8220208883285522, + "learning_rate": 0.00018276126163298102, + "loss": 2.6891, + "step": 3875 + }, + { + "epoch": 0.3128076829957227, + "grad_norm": 0.7827965021133423, + "learning_rate": 0.0001827523994206723, + "loss": 2.7271, + "step": 3876 + }, + { + "epoch": 0.3128883867323057, + "grad_norm": 0.764369010925293, + "learning_rate": 0.00018274353514595746, + "loss": 2.6661, + "step": 3877 + }, + { + "epoch": 0.3129690904688887, + "grad_norm": 0.7440944314002991, + "learning_rate": 0.00018273466880905744, + "loss": 2.6621, + "step": 3878 + }, + { + "epoch": 0.3130497942054717, + "grad_norm": 0.8544813394546509, + "learning_rate": 0.00018272580041019319, + "loss": 2.7168, + "step": 3879 + }, + { + "epoch": 0.3131304979420547, + "grad_norm": 0.7232592701911926, + "learning_rate": 0.00018271692994958577, + "loss": 2.6666, + "step": 3880 + }, + { + "epoch": 0.3132112016786377, + "grad_norm": 0.750525712966919, + "learning_rate": 0.00018270805742745617, + "loss": 2.6984, + "step": 3881 + }, + { + "epoch": 0.31329190541522073, + "grad_norm": 0.8195550441741943, + "learning_rate": 0.00018269918284402565, + "loss": 2.7183, + "step": 3882 + }, + { + "epoch": 0.3133726091518037, + "grad_norm": 0.7695632576942444, + "learning_rate": 0.0001826903061995153, + "loss": 2.7092, + "step": 3883 + }, + { + "epoch": 0.31345331288838674, + "grad_norm": 0.7631582617759705, + "learning_rate": 0.0001826814274941463, + "loss": 2.7061, + "step": 3884 + }, + { + "epoch": 0.3135340166249697, + "grad_norm": 0.8318471908569336, + "learning_rate": 0.0001826725467281401, + "loss": 2.694, + "step": 3885 + }, + { + "epoch": 0.31361472036155275, + "grad_norm": 0.7313492298126221, + "learning_rate": 0.00018266366390171784, + "loss": 2.6729, + "step": 3886 + }, + { + "epoch": 0.3136954240981357, + "grad_norm": 0.7508631944656372, + "learning_rate": 0.00018265477901510105, + "loss": 2.731, + "step": 3887 + }, + { + "epoch": 0.31377612783471875, + "grad_norm": 0.8106402158737183, + "learning_rate": 0.00018264589206851107, + "loss": 2.7113, + "step": 3888 + }, + { + "epoch": 0.31385683157130173, + "grad_norm": 0.771542489528656, + "learning_rate": 0.00018263700306216945, + "loss": 2.644, + "step": 3889 + }, + { + "epoch": 0.31393753530788476, + "grad_norm": 0.812441885471344, + "learning_rate": 0.00018262811199629768, + "loss": 2.6889, + "step": 3890 + }, + { + "epoch": 0.31401823904446774, + "grad_norm": 0.8231199979782104, + "learning_rate": 0.00018261921887111738, + "loss": 2.6466, + "step": 3891 + }, + { + "epoch": 0.31409894278105077, + "grad_norm": 0.7492454051971436, + "learning_rate": 0.00018261032368685012, + "loss": 2.6693, + "step": 3892 + }, + { + "epoch": 0.31417964651763375, + "grad_norm": 0.7651814222335815, + "learning_rate": 0.00018260142644371772, + "loss": 2.6569, + "step": 3893 + }, + { + "epoch": 0.3142603502542168, + "grad_norm": 0.7504465579986572, + "learning_rate": 0.0001825925271419418, + "loss": 2.684, + "step": 3894 + }, + { + "epoch": 0.31434105399079976, + "grad_norm": 0.749650239944458, + "learning_rate": 0.00018258362578174424, + "loss": 2.6482, + "step": 3895 + }, + { + "epoch": 0.3144217577273828, + "grad_norm": 0.8445256352424622, + "learning_rate": 0.00018257472236334686, + "loss": 2.727, + "step": 3896 + }, + { + "epoch": 0.31450246146396577, + "grad_norm": 0.7628257870674133, + "learning_rate": 0.0001825658168869715, + "loss": 2.7314, + "step": 3897 + }, + { + "epoch": 0.3145831652005488, + "grad_norm": 0.7738446593284607, + "learning_rate": 0.00018255690935284019, + "loss": 2.7478, + "step": 3898 + }, + { + "epoch": 0.3146638689371318, + "grad_norm": 0.7578958868980408, + "learning_rate": 0.00018254799976117486, + "loss": 2.6922, + "step": 3899 + }, + { + "epoch": 0.3147445726737148, + "grad_norm": 0.8367362022399902, + "learning_rate": 0.00018253908811219764, + "loss": 2.7347, + "step": 3900 + }, + { + "epoch": 0.3148252764102978, + "grad_norm": 0.7530354857444763, + "learning_rate": 0.00018253017440613057, + "loss": 2.7151, + "step": 3901 + }, + { + "epoch": 0.3149059801468808, + "grad_norm": 0.7168053388595581, + "learning_rate": 0.00018252125864319578, + "loss": 2.7072, + "step": 3902 + }, + { + "epoch": 0.3149866838834638, + "grad_norm": 0.7480056285858154, + "learning_rate": 0.00018251234082361555, + "loss": 2.6489, + "step": 3903 + }, + { + "epoch": 0.3150673876200468, + "grad_norm": 0.8563880324363708, + "learning_rate": 0.0001825034209476121, + "loss": 2.7384, + "step": 3904 + }, + { + "epoch": 0.3151480913566298, + "grad_norm": 0.7959346771240234, + "learning_rate": 0.0001824944990154077, + "loss": 2.631, + "step": 3905 + }, + { + "epoch": 0.31522879509321283, + "grad_norm": 0.7385980486869812, + "learning_rate": 0.00018248557502722476, + "loss": 2.7394, + "step": 3906 + }, + { + "epoch": 0.3153094988297958, + "grad_norm": 0.7682650685310364, + "learning_rate": 0.00018247664898328567, + "loss": 2.7327, + "step": 3907 + }, + { + "epoch": 0.31539020256637884, + "grad_norm": 0.7720316648483276, + "learning_rate": 0.0001824677208838129, + "loss": 2.6442, + "step": 3908 + }, + { + "epoch": 0.3154709063029618, + "grad_norm": 0.7927379608154297, + "learning_rate": 0.00018245879072902895, + "loss": 2.7738, + "step": 3909 + }, + { + "epoch": 0.31555161003954485, + "grad_norm": 0.7506012916564941, + "learning_rate": 0.00018244985851915637, + "loss": 2.6825, + "step": 3910 + }, + { + "epoch": 0.3156323137761278, + "grad_norm": 0.6996353268623352, + "learning_rate": 0.00018244092425441781, + "loss": 2.6783, + "step": 3911 + }, + { + "epoch": 0.31571301751271086, + "grad_norm": 0.8039344549179077, + "learning_rate": 0.00018243198793503588, + "loss": 2.7628, + "step": 3912 + }, + { + "epoch": 0.31579372124929384, + "grad_norm": 0.7890963554382324, + "learning_rate": 0.0001824230495612334, + "loss": 2.7512, + "step": 3913 + }, + { + "epoch": 0.31587442498587687, + "grad_norm": 0.7470870614051819, + "learning_rate": 0.00018241410913323301, + "loss": 2.7058, + "step": 3914 + }, + { + "epoch": 0.31595512872245984, + "grad_norm": 0.7056336402893066, + "learning_rate": 0.0001824051666512576, + "loss": 2.6091, + "step": 3915 + }, + { + "epoch": 0.3160358324590429, + "grad_norm": 0.7818490862846375, + "learning_rate": 0.00018239622211553002, + "loss": 2.7509, + "step": 3916 + }, + { + "epoch": 0.31611653619562585, + "grad_norm": 0.7590607404708862, + "learning_rate": 0.0001823872755262732, + "loss": 2.7238, + "step": 3917 + }, + { + "epoch": 0.3161972399322089, + "grad_norm": 0.7157841920852661, + "learning_rate": 0.00018237832688371014, + "loss": 2.6639, + "step": 3918 + }, + { + "epoch": 0.31627794366879186, + "grad_norm": 0.7515804171562195, + "learning_rate": 0.00018236937618806382, + "loss": 2.6973, + "step": 3919 + }, + { + "epoch": 0.3163586474053749, + "grad_norm": 0.6691949963569641, + "learning_rate": 0.00018236042343955733, + "loss": 2.727, + "step": 3920 + }, + { + "epoch": 0.31643935114195787, + "grad_norm": 0.8122327327728271, + "learning_rate": 0.0001823514686384138, + "loss": 2.7513, + "step": 3921 + }, + { + "epoch": 0.3165200548785409, + "grad_norm": 0.7813653349876404, + "learning_rate": 0.0001823425117848564, + "loss": 2.7037, + "step": 3922 + }, + { + "epoch": 0.3166007586151239, + "grad_norm": 0.6869354844093323, + "learning_rate": 0.00018233355287910834, + "loss": 2.693, + "step": 3923 + }, + { + "epoch": 0.3166814623517069, + "grad_norm": 0.7773037552833557, + "learning_rate": 0.00018232459192139296, + "loss": 2.687, + "step": 3924 + }, + { + "epoch": 0.3167621660882899, + "grad_norm": 0.7644256949424744, + "learning_rate": 0.00018231562891193352, + "loss": 2.6753, + "step": 3925 + }, + { + "epoch": 0.3168428698248729, + "grad_norm": 0.8427005410194397, + "learning_rate": 0.00018230666385095343, + "loss": 2.6641, + "step": 3926 + }, + { + "epoch": 0.3169235735614559, + "grad_norm": 0.7194599509239197, + "learning_rate": 0.0001822976967386761, + "loss": 2.7091, + "step": 3927 + }, + { + "epoch": 0.3170042772980389, + "grad_norm": 0.7710655331611633, + "learning_rate": 0.00018228872757532512, + "loss": 2.6938, + "step": 3928 + }, + { + "epoch": 0.3170849810346219, + "grad_norm": 0.8003759980201721, + "learning_rate": 0.0001822797563611239, + "loss": 2.7019, + "step": 3929 + }, + { + "epoch": 0.3171656847712049, + "grad_norm": 0.7960470914840698, + "learning_rate": 0.00018227078309629606, + "loss": 2.661, + "step": 3930 + }, + { + "epoch": 0.3172463885077879, + "grad_norm": 0.7731126546859741, + "learning_rate": 0.00018226180778106526, + "loss": 2.7023, + "step": 3931 + }, + { + "epoch": 0.3173270922443709, + "grad_norm": 0.7561383843421936, + "learning_rate": 0.00018225283041565515, + "loss": 2.6768, + "step": 3932 + }, + { + "epoch": 0.3174077959809539, + "grad_norm": 0.7578409910202026, + "learning_rate": 0.0001822438510002895, + "loss": 2.7145, + "step": 3933 + }, + { + "epoch": 0.3174884997175369, + "grad_norm": 0.7901952862739563, + "learning_rate": 0.00018223486953519214, + "loss": 2.7121, + "step": 3934 + }, + { + "epoch": 0.31756920345411993, + "grad_norm": 0.82305908203125, + "learning_rate": 0.0001822258860205868, + "loss": 2.7553, + "step": 3935 + }, + { + "epoch": 0.3176499071907029, + "grad_norm": 0.748055636882782, + "learning_rate": 0.0001822169004566975, + "loss": 2.7236, + "step": 3936 + }, + { + "epoch": 0.31773061092728594, + "grad_norm": 0.7981358766555786, + "learning_rate": 0.0001822079128437481, + "loss": 2.7444, + "step": 3937 + }, + { + "epoch": 0.3178113146638689, + "grad_norm": 0.7938945889472961, + "learning_rate": 0.0001821989231819626, + "loss": 2.7512, + "step": 3938 + }, + { + "epoch": 0.31789201840045195, + "grad_norm": 0.7250397205352783, + "learning_rate": 0.0001821899314715651, + "loss": 2.6843, + "step": 3939 + }, + { + "epoch": 0.3179727221370349, + "grad_norm": 0.8844723701477051, + "learning_rate": 0.00018218093771277965, + "loss": 2.6295, + "step": 3940 + }, + { + "epoch": 0.31805342587361796, + "grad_norm": 0.7545698881149292, + "learning_rate": 0.0001821719419058304, + "loss": 2.7478, + "step": 3941 + }, + { + "epoch": 0.31813412961020093, + "grad_norm": 0.7254738807678223, + "learning_rate": 0.00018216294405094157, + "loss": 2.665, + "step": 3942 + }, + { + "epoch": 0.31821483334678397, + "grad_norm": 0.7664754390716553, + "learning_rate": 0.00018215394414833737, + "loss": 2.7431, + "step": 3943 + }, + { + "epoch": 0.31829553708336694, + "grad_norm": 0.8250303864479065, + "learning_rate": 0.00018214494219824217, + "loss": 2.6957, + "step": 3944 + }, + { + "epoch": 0.31837624081995, + "grad_norm": 0.7425532341003418, + "learning_rate": 0.00018213593820088026, + "loss": 2.666, + "step": 3945 + }, + { + "epoch": 0.31845694455653295, + "grad_norm": 0.6943121552467346, + "learning_rate": 0.00018212693215647604, + "loss": 2.716, + "step": 3946 + }, + { + "epoch": 0.318537648293116, + "grad_norm": 0.732829213142395, + "learning_rate": 0.00018211792406525403, + "loss": 2.6557, + "step": 3947 + }, + { + "epoch": 0.31861835202969896, + "grad_norm": 0.7666537165641785, + "learning_rate": 0.00018210891392743866, + "loss": 2.7275, + "step": 3948 + }, + { + "epoch": 0.318699055766282, + "grad_norm": 0.7652621865272522, + "learning_rate": 0.00018209990174325455, + "loss": 2.6372, + "step": 3949 + }, + { + "epoch": 0.31877975950286497, + "grad_norm": 0.7416055202484131, + "learning_rate": 0.00018209088751292626, + "loss": 2.6688, + "step": 3950 + }, + { + "epoch": 0.318860463239448, + "grad_norm": 0.7504609227180481, + "learning_rate": 0.00018208187123667848, + "loss": 2.6912, + "step": 3951 + }, + { + "epoch": 0.318941166976031, + "grad_norm": 0.7308809757232666, + "learning_rate": 0.00018207285291473588, + "loss": 2.7272, + "step": 3952 + }, + { + "epoch": 0.319021870712614, + "grad_norm": 0.8031618595123291, + "learning_rate": 0.00018206383254732326, + "loss": 2.7354, + "step": 3953 + }, + { + "epoch": 0.319102574449197, + "grad_norm": 0.81386798620224, + "learning_rate": 0.00018205481013466542, + "loss": 2.676, + "step": 3954 + }, + { + "epoch": 0.31918327818578, + "grad_norm": 0.7845911383628845, + "learning_rate": 0.0001820457856769872, + "loss": 2.7094, + "step": 3955 + }, + { + "epoch": 0.319263981922363, + "grad_norm": 0.7189298272132874, + "learning_rate": 0.00018203675917451357, + "loss": 2.6764, + "step": 3956 + }, + { + "epoch": 0.319344685658946, + "grad_norm": 0.8253228664398193, + "learning_rate": 0.00018202773062746944, + "loss": 2.6805, + "step": 3957 + }, + { + "epoch": 0.319425389395529, + "grad_norm": 0.7965289950370789, + "learning_rate": 0.0001820187000360798, + "loss": 2.7148, + "step": 3958 + }, + { + "epoch": 0.31950609313211203, + "grad_norm": 0.7505398988723755, + "learning_rate": 0.0001820096674005698, + "loss": 2.6732, + "step": 3959 + }, + { + "epoch": 0.319586796868695, + "grad_norm": 0.7554877400398254, + "learning_rate": 0.0001820006327211645, + "loss": 2.7467, + "step": 3960 + }, + { + "epoch": 0.31966750060527804, + "grad_norm": 0.7836194038391113, + "learning_rate": 0.00018199159599808907, + "loss": 2.7252, + "step": 3961 + }, + { + "epoch": 0.319748204341861, + "grad_norm": 0.7967261672019958, + "learning_rate": 0.00018198255723156877, + "loss": 2.6814, + "step": 3962 + }, + { + "epoch": 0.31982890807844405, + "grad_norm": 0.7411713600158691, + "learning_rate": 0.00018197351642182882, + "loss": 2.6928, + "step": 3963 + }, + { + "epoch": 0.31990961181502703, + "grad_norm": 0.6961422562599182, + "learning_rate": 0.00018196447356909454, + "loss": 2.6651, + "step": 3964 + }, + { + "epoch": 0.31999031555161006, + "grad_norm": 0.7245771884918213, + "learning_rate": 0.00018195542867359134, + "loss": 2.6726, + "step": 3965 + }, + { + "epoch": 0.32007101928819304, + "grad_norm": 0.784654974937439, + "learning_rate": 0.00018194638173554462, + "loss": 2.6829, + "step": 3966 + }, + { + "epoch": 0.32015172302477607, + "grad_norm": 0.7373329997062683, + "learning_rate": 0.00018193733275517985, + "loss": 2.6481, + "step": 3967 + }, + { + "epoch": 0.32023242676135905, + "grad_norm": 0.7878682613372803, + "learning_rate": 0.00018192828173272258, + "loss": 2.6701, + "step": 3968 + }, + { + "epoch": 0.3203131304979421, + "grad_norm": 0.759676992893219, + "learning_rate": 0.00018191922866839835, + "loss": 2.7218, + "step": 3969 + }, + { + "epoch": 0.32039383423452505, + "grad_norm": 0.7923088669776917, + "learning_rate": 0.00018191017356243282, + "loss": 2.6841, + "step": 3970 + }, + { + "epoch": 0.3204745379711081, + "grad_norm": 0.7084882855415344, + "learning_rate": 0.00018190111641505164, + "loss": 2.7167, + "step": 3971 + }, + { + "epoch": 0.32055524170769106, + "grad_norm": 0.7166235446929932, + "learning_rate": 0.00018189205722648054, + "loss": 2.6647, + "step": 3972 + }, + { + "epoch": 0.3206359454442741, + "grad_norm": 0.7997722029685974, + "learning_rate": 0.0001818829959969453, + "loss": 2.7199, + "step": 3973 + }, + { + "epoch": 0.32071664918085707, + "grad_norm": 0.8309516310691833, + "learning_rate": 0.0001818739327266718, + "loss": 2.8006, + "step": 3974 + }, + { + "epoch": 0.3207973529174401, + "grad_norm": 0.7164002656936646, + "learning_rate": 0.00018186486741588582, + "loss": 2.6258, + "step": 3975 + }, + { + "epoch": 0.3208780566540231, + "grad_norm": 0.7715865969657898, + "learning_rate": 0.0001818558000648134, + "loss": 2.7034, + "step": 3976 + }, + { + "epoch": 0.3209587603906061, + "grad_norm": 0.7806593775749207, + "learning_rate": 0.0001818467306736804, + "loss": 2.6758, + "step": 3977 + }, + { + "epoch": 0.3210394641271891, + "grad_norm": 0.8026594519615173, + "learning_rate": 0.00018183765924271298, + "loss": 2.6976, + "step": 3978 + }, + { + "epoch": 0.32112016786377207, + "grad_norm": 0.7971245050430298, + "learning_rate": 0.00018182858577213716, + "loss": 2.7312, + "step": 3979 + }, + { + "epoch": 0.3212008716003551, + "grad_norm": 0.7347297072410583, + "learning_rate": 0.00018181951026217908, + "loss": 2.6664, + "step": 3980 + }, + { + "epoch": 0.3212815753369381, + "grad_norm": 0.7929779291152954, + "learning_rate": 0.0001818104327130649, + "loss": 2.6603, + "step": 3981 + }, + { + "epoch": 0.3213622790735211, + "grad_norm": 0.7465224862098694, + "learning_rate": 0.00018180135312502089, + "loss": 2.6566, + "step": 3982 + }, + { + "epoch": 0.3214429828101041, + "grad_norm": 0.7114695906639099, + "learning_rate": 0.00018179227149827334, + "loss": 2.6492, + "step": 3983 + }, + { + "epoch": 0.3215236865466871, + "grad_norm": 0.7179337739944458, + "learning_rate": 0.00018178318783304857, + "loss": 2.6778, + "step": 3984 + }, + { + "epoch": 0.3216043902832701, + "grad_norm": 0.7182629704475403, + "learning_rate": 0.000181774102129573, + "loss": 2.7057, + "step": 3985 + }, + { + "epoch": 0.3216850940198531, + "grad_norm": 0.7383119463920593, + "learning_rate": 0.000181765014388073, + "loss": 2.6633, + "step": 3986 + }, + { + "epoch": 0.3217657977564361, + "grad_norm": 0.7340527176856995, + "learning_rate": 0.00018175592460877512, + "loss": 2.6838, + "step": 3987 + }, + { + "epoch": 0.32184650149301913, + "grad_norm": 0.7934359312057495, + "learning_rate": 0.00018174683279190593, + "loss": 2.6795, + "step": 3988 + }, + { + "epoch": 0.3219272052296021, + "grad_norm": 0.6960840821266174, + "learning_rate": 0.00018173773893769192, + "loss": 2.6669, + "step": 3989 + }, + { + "epoch": 0.32200790896618514, + "grad_norm": 0.7513574361801147, + "learning_rate": 0.00018172864304635985, + "loss": 2.6744, + "step": 3990 + }, + { + "epoch": 0.3220886127027681, + "grad_norm": 0.7516636848449707, + "learning_rate": 0.00018171954511813629, + "loss": 2.6652, + "step": 3991 + }, + { + "epoch": 0.32216931643935115, + "grad_norm": 0.7817716002464294, + "learning_rate": 0.00018171044515324808, + "loss": 2.6671, + "step": 3992 + }, + { + "epoch": 0.3222500201759341, + "grad_norm": 0.6859925389289856, + "learning_rate": 0.000181701343151922, + "loss": 2.6984, + "step": 3993 + }, + { + "epoch": 0.32233072391251716, + "grad_norm": 0.7669627666473389, + "learning_rate": 0.00018169223911438485, + "loss": 2.7102, + "step": 3994 + }, + { + "epoch": 0.32241142764910014, + "grad_norm": 0.784724235534668, + "learning_rate": 0.00018168313304086357, + "loss": 2.7413, + "step": 3995 + }, + { + "epoch": 0.32249213138568317, + "grad_norm": 0.7341497540473938, + "learning_rate": 0.00018167402493158509, + "loss": 2.706, + "step": 3996 + }, + { + "epoch": 0.32257283512226614, + "grad_norm": 0.7975730299949646, + "learning_rate": 0.00018166491478677641, + "loss": 2.6896, + "step": 3997 + }, + { + "epoch": 0.3226535388588492, + "grad_norm": 0.8138537406921387, + "learning_rate": 0.00018165580260666458, + "loss": 2.6986, + "step": 3998 + }, + { + "epoch": 0.32273424259543215, + "grad_norm": 0.6734997034072876, + "learning_rate": 0.0001816466883914767, + "loss": 2.6686, + "step": 3999 + }, + { + "epoch": 0.3228149463320152, + "grad_norm": 0.7742779850959778, + "learning_rate": 0.00018163757214143992, + "loss": 2.7222, + "step": 4000 + }, + { + "epoch": 0.3228149463320152, + "eval_loss": 2.615234375, + "eval_runtime": 783.0394, + "eval_samples_per_second": 3.346, + "eval_steps_per_second": 0.558, + "step": 4000 + }, + { + "epoch": 0.32289565006859816, + "grad_norm": 0.7654715180397034, + "learning_rate": 0.00018162845385678145, + "loss": 2.7016, + "step": 4001 + }, + { + "epoch": 0.3229763538051812, + "grad_norm": 0.8698763251304626, + "learning_rate": 0.0001816193335377285, + "loss": 2.6709, + "step": 4002 + }, + { + "epoch": 0.32305705754176417, + "grad_norm": 0.758056640625, + "learning_rate": 0.00018161021118450843, + "loss": 2.7277, + "step": 4003 + }, + { + "epoch": 0.3231377612783472, + "grad_norm": 0.7462654113769531, + "learning_rate": 0.00018160108679734856, + "loss": 2.623, + "step": 4004 + }, + { + "epoch": 0.3232184650149302, + "grad_norm": 0.7274953722953796, + "learning_rate": 0.00018159196037647628, + "loss": 2.6875, + "step": 4005 + }, + { + "epoch": 0.3232991687515132, + "grad_norm": 0.7737346887588501, + "learning_rate": 0.0001815828319221191, + "loss": 2.6967, + "step": 4006 + }, + { + "epoch": 0.3233798724880962, + "grad_norm": 0.7793172001838684, + "learning_rate": 0.00018157370143450448, + "loss": 2.724, + "step": 4007 + }, + { + "epoch": 0.3234605762246792, + "grad_norm": 0.7791805863380432, + "learning_rate": 0.00018156456891385995, + "loss": 2.6653, + "step": 4008 + }, + { + "epoch": 0.3235412799612622, + "grad_norm": 0.7225624918937683, + "learning_rate": 0.0001815554343604132, + "loss": 2.745, + "step": 4009 + }, + { + "epoch": 0.32362198369784523, + "grad_norm": 0.6958494782447815, + "learning_rate": 0.0001815462977743918, + "loss": 2.6856, + "step": 4010 + }, + { + "epoch": 0.3237026874344282, + "grad_norm": 0.7572030425071716, + "learning_rate": 0.0001815371591560235, + "loss": 2.7053, + "step": 4011 + }, + { + "epoch": 0.32378339117101124, + "grad_norm": 0.7133952975273132, + "learning_rate": 0.00018152801850553605, + "loss": 2.6984, + "step": 4012 + }, + { + "epoch": 0.3238640949075942, + "grad_norm": 0.7598705291748047, + "learning_rate": 0.00018151887582315728, + "loss": 2.6632, + "step": 4013 + }, + { + "epoch": 0.32394479864417725, + "grad_norm": 0.7670698165893555, + "learning_rate": 0.00018150973110911503, + "loss": 2.7035, + "step": 4014 + }, + { + "epoch": 0.3240255023807602, + "grad_norm": 0.7547060251235962, + "learning_rate": 0.00018150058436363723, + "loss": 2.6531, + "step": 4015 + }, + { + "epoch": 0.32410620611734325, + "grad_norm": 0.7943035364151001, + "learning_rate": 0.00018149143558695178, + "loss": 2.766, + "step": 4016 + }, + { + "epoch": 0.32418690985392623, + "grad_norm": 0.864356517791748, + "learning_rate": 0.00018148228477928675, + "loss": 2.7134, + "step": 4017 + }, + { + "epoch": 0.32426761359050926, + "grad_norm": 0.7773902416229248, + "learning_rate": 0.00018147313194087018, + "loss": 2.6948, + "step": 4018 + }, + { + "epoch": 0.32434831732709224, + "grad_norm": 0.839131772518158, + "learning_rate": 0.0001814639770719302, + "loss": 2.7393, + "step": 4019 + }, + { + "epoch": 0.32442902106367527, + "grad_norm": 0.807837963104248, + "learning_rate": 0.00018145482017269498, + "loss": 2.7835, + "step": 4020 + }, + { + "epoch": 0.32450972480025825, + "grad_norm": 0.7133228182792664, + "learning_rate": 0.00018144566124339272, + "loss": 2.6859, + "step": 4021 + }, + { + "epoch": 0.3245904285368413, + "grad_norm": 0.8450621962547302, + "learning_rate": 0.00018143650028425162, + "loss": 2.7548, + "step": 4022 + }, + { + "epoch": 0.32467113227342426, + "grad_norm": 0.8594980835914612, + "learning_rate": 0.00018142733729550013, + "loss": 2.6636, + "step": 4023 + }, + { + "epoch": 0.3247518360100073, + "grad_norm": 0.7134621739387512, + "learning_rate": 0.0001814181722773665, + "loss": 2.6501, + "step": 4024 + }, + { + "epoch": 0.32483253974659027, + "grad_norm": 0.8630430698394775, + "learning_rate": 0.0001814090052300792, + "loss": 2.6994, + "step": 4025 + }, + { + "epoch": 0.3249132434831733, + "grad_norm": 0.7044873237609863, + "learning_rate": 0.00018139983615386666, + "loss": 2.6603, + "step": 4026 + }, + { + "epoch": 0.3249939472197563, + "grad_norm": 0.6896052360534668, + "learning_rate": 0.00018139066504895744, + "loss": 2.6649, + "step": 4027 + }, + { + "epoch": 0.3250746509563393, + "grad_norm": 0.802855372428894, + "learning_rate": 0.00018138149191558012, + "loss": 2.7067, + "step": 4028 + }, + { + "epoch": 0.3251553546929223, + "grad_norm": 0.7555437088012695, + "learning_rate": 0.00018137231675396324, + "loss": 2.6471, + "step": 4029 + }, + { + "epoch": 0.32523605842950526, + "grad_norm": 0.6846967339515686, + "learning_rate": 0.00018136313956433552, + "loss": 2.6774, + "step": 4030 + }, + { + "epoch": 0.3253167621660883, + "grad_norm": 0.7435858249664307, + "learning_rate": 0.0001813539603469257, + "loss": 2.7135, + "step": 4031 + }, + { + "epoch": 0.32539746590267127, + "grad_norm": 0.7669098377227783, + "learning_rate": 0.00018134477910196253, + "loss": 2.7014, + "step": 4032 + }, + { + "epoch": 0.3254781696392543, + "grad_norm": 0.7797521352767944, + "learning_rate": 0.00018133559582967482, + "loss": 2.7229, + "step": 4033 + }, + { + "epoch": 0.3255588733758373, + "grad_norm": 0.7377886176109314, + "learning_rate": 0.00018132641053029142, + "loss": 2.7196, + "step": 4034 + }, + { + "epoch": 0.3256395771124203, + "grad_norm": 0.7387986779212952, + "learning_rate": 0.0001813172232040413, + "loss": 2.687, + "step": 4035 + }, + { + "epoch": 0.3257202808490033, + "grad_norm": 0.7276624441146851, + "learning_rate": 0.0001813080338511534, + "loss": 2.6954, + "step": 4036 + }, + { + "epoch": 0.3258009845855863, + "grad_norm": 0.7929670214653015, + "learning_rate": 0.00018129884247185683, + "loss": 2.7431, + "step": 4037 + }, + { + "epoch": 0.3258816883221693, + "grad_norm": 0.7896441221237183, + "learning_rate": 0.0001812896490663805, + "loss": 2.6823, + "step": 4038 + }, + { + "epoch": 0.3259623920587523, + "grad_norm": 0.8642957210540771, + "learning_rate": 0.00018128045363495368, + "loss": 2.7334, + "step": 4039 + }, + { + "epoch": 0.3260430957953353, + "grad_norm": 0.7156081795692444, + "learning_rate": 0.00018127125617780542, + "loss": 2.6886, + "step": 4040 + }, + { + "epoch": 0.32612379953191833, + "grad_norm": 0.8260853290557861, + "learning_rate": 0.00018126205669516507, + "loss": 2.6802, + "step": 4041 + }, + { + "epoch": 0.3262045032685013, + "grad_norm": 0.6853542327880859, + "learning_rate": 0.00018125285518726182, + "loss": 2.6392, + "step": 4042 + }, + { + "epoch": 0.32628520700508434, + "grad_norm": 0.7574017643928528, + "learning_rate": 0.00018124365165432505, + "loss": 2.7412, + "step": 4043 + }, + { + "epoch": 0.3263659107416673, + "grad_norm": 0.8656191825866699, + "learning_rate": 0.00018123444609658408, + "loss": 2.6903, + "step": 4044 + }, + { + "epoch": 0.32644661447825035, + "grad_norm": 0.7443257570266724, + "learning_rate": 0.00018122523851426837, + "loss": 2.682, + "step": 4045 + }, + { + "epoch": 0.32652731821483333, + "grad_norm": 0.7222229242324829, + "learning_rate": 0.0001812160289076074, + "loss": 2.6196, + "step": 4046 + }, + { + "epoch": 0.32660802195141636, + "grad_norm": 0.8531985878944397, + "learning_rate": 0.00018120681727683066, + "loss": 2.6777, + "step": 4047 + }, + { + "epoch": 0.32668872568799934, + "grad_norm": 0.7380290627479553, + "learning_rate": 0.0001811976036221678, + "loss": 2.6847, + "step": 4048 + }, + { + "epoch": 0.32676942942458237, + "grad_norm": 0.7250707149505615, + "learning_rate": 0.00018118838794384837, + "loss": 2.6846, + "step": 4049 + }, + { + "epoch": 0.32685013316116535, + "grad_norm": 0.763504147529602, + "learning_rate": 0.00018117917024210208, + "loss": 2.69, + "step": 4050 + }, + { + "epoch": 0.3269308368977484, + "grad_norm": 0.7740737795829773, + "learning_rate": 0.00018116995051715867, + "loss": 2.6945, + "step": 4051 + }, + { + "epoch": 0.32701154063433135, + "grad_norm": 0.7777624726295471, + "learning_rate": 0.00018116072876924792, + "loss": 2.6918, + "step": 4052 + }, + { + "epoch": 0.3270922443709144, + "grad_norm": 0.7957910895347595, + "learning_rate": 0.0001811515049985997, + "loss": 2.7237, + "step": 4053 + }, + { + "epoch": 0.32717294810749736, + "grad_norm": 0.7828991413116455, + "learning_rate": 0.00018114227920544375, + "loss": 2.7008, + "step": 4054 + }, + { + "epoch": 0.3272536518440804, + "grad_norm": 0.6695161461830139, + "learning_rate": 0.00018113305139001016, + "loss": 2.7311, + "step": 4055 + }, + { + "epoch": 0.32733435558066337, + "grad_norm": 0.7693436145782471, + "learning_rate": 0.00018112382155252883, + "loss": 2.7102, + "step": 4056 + }, + { + "epoch": 0.3274150593172464, + "grad_norm": 0.7520042657852173, + "learning_rate": 0.0001811145896932298, + "loss": 2.6455, + "step": 4057 + }, + { + "epoch": 0.3274957630538294, + "grad_norm": 0.786834716796875, + "learning_rate": 0.00018110535581234317, + "loss": 2.6965, + "step": 4058 + }, + { + "epoch": 0.3275764667904124, + "grad_norm": 0.742001473903656, + "learning_rate": 0.00018109611991009905, + "loss": 2.7341, + "step": 4059 + }, + { + "epoch": 0.3276571705269954, + "grad_norm": 0.813522219657898, + "learning_rate": 0.00018108688198672766, + "loss": 2.8116, + "step": 4060 + }, + { + "epoch": 0.3277378742635784, + "grad_norm": 0.7611314058303833, + "learning_rate": 0.00018107764204245916, + "loss": 2.6741, + "step": 4061 + }, + { + "epoch": 0.3278185780001614, + "grad_norm": 0.7285993695259094, + "learning_rate": 0.00018106840007752392, + "loss": 2.671, + "step": 4062 + }, + { + "epoch": 0.32789928173674443, + "grad_norm": 0.773151695728302, + "learning_rate": 0.0001810591560921522, + "loss": 2.7106, + "step": 4063 + }, + { + "epoch": 0.3279799854733274, + "grad_norm": 0.7448920011520386, + "learning_rate": 0.00018104991008657445, + "loss": 2.7176, + "step": 4064 + }, + { + "epoch": 0.32806068920991044, + "grad_norm": 0.7088467478752136, + "learning_rate": 0.0001810406620610211, + "loss": 2.7085, + "step": 4065 + }, + { + "epoch": 0.3281413929464934, + "grad_norm": 0.7507789731025696, + "learning_rate": 0.00018103141201572255, + "loss": 2.7361, + "step": 4066 + }, + { + "epoch": 0.32822209668307645, + "grad_norm": 0.7065643072128296, + "learning_rate": 0.00018102215995090943, + "loss": 2.6573, + "step": 4067 + }, + { + "epoch": 0.3283028004196594, + "grad_norm": 0.6888713836669922, + "learning_rate": 0.0001810129058668123, + "loss": 2.6699, + "step": 4068 + }, + { + "epoch": 0.32838350415624246, + "grad_norm": 0.736347496509552, + "learning_rate": 0.00018100364976366174, + "loss": 2.7089, + "step": 4069 + }, + { + "epoch": 0.32846420789282543, + "grad_norm": 0.6854562759399414, + "learning_rate": 0.0001809943916416885, + "loss": 2.7051, + "step": 4070 + }, + { + "epoch": 0.32854491162940846, + "grad_norm": 0.7481048107147217, + "learning_rate": 0.0001809851315011233, + "loss": 2.7428, + "step": 4071 + }, + { + "epoch": 0.32862561536599144, + "grad_norm": 0.7600961923599243, + "learning_rate": 0.0001809758693421969, + "loss": 2.7153, + "step": 4072 + }, + { + "epoch": 0.3287063191025745, + "grad_norm": 0.7545063495635986, + "learning_rate": 0.00018096660516514024, + "loss": 2.6736, + "step": 4073 + }, + { + "epoch": 0.32878702283915745, + "grad_norm": 0.7967175841331482, + "learning_rate": 0.0001809573389701841, + "loss": 2.6711, + "step": 4074 + }, + { + "epoch": 0.3288677265757405, + "grad_norm": 0.7115446925163269, + "learning_rate": 0.00018094807075755943, + "loss": 2.6761, + "step": 4075 + }, + { + "epoch": 0.32894843031232346, + "grad_norm": 0.8230876326560974, + "learning_rate": 0.00018093880052749725, + "loss": 2.6749, + "step": 4076 + }, + { + "epoch": 0.3290291340489065, + "grad_norm": 0.8549706935882568, + "learning_rate": 0.00018092952828022856, + "loss": 2.7084, + "step": 4077 + }, + { + "epoch": 0.32910983778548947, + "grad_norm": 0.7379534244537354, + "learning_rate": 0.00018092025401598448, + "loss": 2.7241, + "step": 4078 + }, + { + "epoch": 0.3291905415220725, + "grad_norm": 0.7659998536109924, + "learning_rate": 0.00018091097773499616, + "loss": 2.7108, + "step": 4079 + }, + { + "epoch": 0.3292712452586555, + "grad_norm": 0.8074536323547363, + "learning_rate": 0.00018090169943749476, + "loss": 2.676, + "step": 4080 + }, + { + "epoch": 0.32935194899523845, + "grad_norm": 0.7588536143302917, + "learning_rate": 0.00018089241912371153, + "loss": 2.639, + "step": 4081 + }, + { + "epoch": 0.3294326527318215, + "grad_norm": 0.7510811686515808, + "learning_rate": 0.00018088313679387775, + "loss": 2.6722, + "step": 4082 + }, + { + "epoch": 0.32951335646840446, + "grad_norm": 0.7538900971412659, + "learning_rate": 0.0001808738524482248, + "loss": 2.6917, + "step": 4083 + }, + { + "epoch": 0.3295940602049875, + "grad_norm": 0.8071155548095703, + "learning_rate": 0.00018086456608698402, + "loss": 2.6964, + "step": 4084 + }, + { + "epoch": 0.32967476394157047, + "grad_norm": 0.7778098583221436, + "learning_rate": 0.00018085527771038686, + "loss": 2.7301, + "step": 4085 + }, + { + "epoch": 0.3297554676781535, + "grad_norm": 0.7717564702033997, + "learning_rate": 0.00018084598731866485, + "loss": 2.7484, + "step": 4086 + }, + { + "epoch": 0.3298361714147365, + "grad_norm": 0.7361736297607422, + "learning_rate": 0.00018083669491204948, + "loss": 2.6299, + "step": 4087 + }, + { + "epoch": 0.3299168751513195, + "grad_norm": 0.736681342124939, + "learning_rate": 0.00018082740049077238, + "loss": 2.7521, + "step": 4088 + }, + { + "epoch": 0.3299975788879025, + "grad_norm": 0.8011857867240906, + "learning_rate": 0.00018081810405506517, + "loss": 2.724, + "step": 4089 + }, + { + "epoch": 0.3300782826244855, + "grad_norm": 0.7741932272911072, + "learning_rate": 0.00018080880560515956, + "loss": 2.6766, + "step": 4090 + }, + { + "epoch": 0.3301589863610685, + "grad_norm": 0.7321778535842896, + "learning_rate": 0.00018079950514128724, + "loss": 2.6614, + "step": 4091 + }, + { + "epoch": 0.33023969009765153, + "grad_norm": 0.7916514277458191, + "learning_rate": 0.00018079020266368006, + "loss": 2.7177, + "step": 4092 + }, + { + "epoch": 0.3303203938342345, + "grad_norm": 0.7961388826370239, + "learning_rate": 0.00018078089817256986, + "loss": 2.6671, + "step": 4093 + }, + { + "epoch": 0.33040109757081754, + "grad_norm": 0.7167038321495056, + "learning_rate": 0.0001807715916681885, + "loss": 2.6989, + "step": 4094 + }, + { + "epoch": 0.3304818013074005, + "grad_norm": 0.6924864649772644, + "learning_rate": 0.00018076228315076794, + "loss": 2.6484, + "step": 4095 + }, + { + "epoch": 0.33056250504398355, + "grad_norm": 0.777881383895874, + "learning_rate": 0.00018075297262054013, + "loss": 2.6498, + "step": 4096 + }, + { + "epoch": 0.3306432087805665, + "grad_norm": 0.7878376841545105, + "learning_rate": 0.0001807436600777372, + "loss": 2.7745, + "step": 4097 + }, + { + "epoch": 0.33072391251714955, + "grad_norm": 0.8418465256690979, + "learning_rate": 0.0001807343455225912, + "loss": 2.7195, + "step": 4098 + }, + { + "epoch": 0.33080461625373253, + "grad_norm": 0.7780830264091492, + "learning_rate": 0.00018072502895533424, + "loss": 2.6652, + "step": 4099 + }, + { + "epoch": 0.33088531999031556, + "grad_norm": 0.7102445960044861, + "learning_rate": 0.00018071571037619853, + "loss": 2.6618, + "step": 4100 + }, + { + "epoch": 0.33096602372689854, + "grad_norm": 0.7028098106384277, + "learning_rate": 0.00018070638978541633, + "loss": 2.7114, + "step": 4101 + }, + { + "epoch": 0.33104672746348157, + "grad_norm": 0.7529525756835938, + "learning_rate": 0.00018069706718321996, + "loss": 2.7231, + "step": 4102 + }, + { + "epoch": 0.33112743120006455, + "grad_norm": 0.7404564023017883, + "learning_rate": 0.0001806877425698417, + "loss": 2.6564, + "step": 4103 + }, + { + "epoch": 0.3312081349366476, + "grad_norm": 0.7725130319595337, + "learning_rate": 0.00018067841594551401, + "loss": 2.677, + "step": 4104 + }, + { + "epoch": 0.33128883867323056, + "grad_norm": 0.7616425156593323, + "learning_rate": 0.00018066908731046927, + "loss": 2.6586, + "step": 4105 + }, + { + "epoch": 0.3313695424098136, + "grad_norm": 0.7318183779716492, + "learning_rate": 0.00018065975666494002, + "loss": 2.6624, + "step": 4106 + }, + { + "epoch": 0.33145024614639657, + "grad_norm": 0.7012802958488464, + "learning_rate": 0.00018065042400915878, + "loss": 2.6663, + "step": 4107 + }, + { + "epoch": 0.3315309498829796, + "grad_norm": 0.815226674079895, + "learning_rate": 0.00018064108934335814, + "loss": 2.7248, + "step": 4108 + }, + { + "epoch": 0.3316116536195626, + "grad_norm": 0.68972247838974, + "learning_rate": 0.00018063175266777077, + "loss": 2.6961, + "step": 4109 + }, + { + "epoch": 0.3316923573561456, + "grad_norm": 0.7563794255256653, + "learning_rate": 0.00018062241398262937, + "loss": 2.6526, + "step": 4110 + }, + { + "epoch": 0.3317730610927286, + "grad_norm": 0.7878836989402771, + "learning_rate": 0.00018061307328816662, + "loss": 2.7316, + "step": 4111 + }, + { + "epoch": 0.3318537648293116, + "grad_norm": 0.7189129590988159, + "learning_rate": 0.00018060373058461537, + "loss": 2.6577, + "step": 4112 + }, + { + "epoch": 0.3319344685658946, + "grad_norm": 0.7517561912536621, + "learning_rate": 0.00018059438587220847, + "loss": 2.668, + "step": 4113 + }, + { + "epoch": 0.3320151723024776, + "grad_norm": 0.7602595686912537, + "learning_rate": 0.00018058503915117878, + "loss": 2.6741, + "step": 4114 + }, + { + "epoch": 0.3320958760390606, + "grad_norm": 0.7702187299728394, + "learning_rate": 0.00018057569042175927, + "loss": 2.7082, + "step": 4115 + }, + { + "epoch": 0.33217657977564363, + "grad_norm": 0.7289660573005676, + "learning_rate": 0.00018056633968418294, + "loss": 2.6728, + "step": 4116 + }, + { + "epoch": 0.3322572835122266, + "grad_norm": 0.6936683654785156, + "learning_rate": 0.0001805569869386828, + "loss": 2.6735, + "step": 4117 + }, + { + "epoch": 0.33233798724880964, + "grad_norm": 0.7128138542175293, + "learning_rate": 0.000180547632185492, + "loss": 2.646, + "step": 4118 + }, + { + "epoch": 0.3324186909853926, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00018053827542484363, + "loss": 2.6497, + "step": 4119 + }, + { + "epoch": 0.33249939472197565, + "grad_norm": 0.7084202170372009, + "learning_rate": 0.0001805289166569709, + "loss": 2.6328, + "step": 4120 + }, + { + "epoch": 0.3325800984585586, + "grad_norm": 0.8068051934242249, + "learning_rate": 0.00018051955588210708, + "loss": 2.6576, + "step": 4121 + }, + { + "epoch": 0.33266080219514166, + "grad_norm": 0.787680447101593, + "learning_rate": 0.00018051019310048544, + "loss": 2.7091, + "step": 4122 + }, + { + "epoch": 0.33274150593172463, + "grad_norm": 0.698946475982666, + "learning_rate": 0.00018050082831233931, + "loss": 2.6657, + "step": 4123 + }, + { + "epoch": 0.33282220966830767, + "grad_norm": 0.7946122288703918, + "learning_rate": 0.00018049146151790215, + "loss": 2.6981, + "step": 4124 + }, + { + "epoch": 0.33290291340489064, + "grad_norm": 0.8025123476982117, + "learning_rate": 0.00018048209271740736, + "loss": 2.6878, + "step": 4125 + }, + { + "epoch": 0.3329836171414737, + "grad_norm": 0.7493376135826111, + "learning_rate": 0.0001804727219110884, + "loss": 2.6556, + "step": 4126 + }, + { + "epoch": 0.33306432087805665, + "grad_norm": 0.7143186926841736, + "learning_rate": 0.00018046334909917886, + "loss": 2.6879, + "step": 4127 + }, + { + "epoch": 0.3331450246146397, + "grad_norm": 0.7375641465187073, + "learning_rate": 0.00018045397428191235, + "loss": 2.6817, + "step": 4128 + }, + { + "epoch": 0.33322572835122266, + "grad_norm": 0.7201291918754578, + "learning_rate": 0.00018044459745952248, + "loss": 2.6765, + "step": 4129 + }, + { + "epoch": 0.3333064320878057, + "grad_norm": 0.7924519777297974, + "learning_rate": 0.00018043521863224296, + "loss": 2.7748, + "step": 4130 + }, + { + "epoch": 0.33338713582438867, + "grad_norm": 0.7773354053497314, + "learning_rate": 0.00018042583780030752, + "loss": 2.6839, + "step": 4131 + }, + { + "epoch": 0.33346783956097165, + "grad_norm": 0.7527397274971008, + "learning_rate": 0.00018041645496394998, + "loss": 2.6749, + "step": 4132 + }, + { + "epoch": 0.3335485432975547, + "grad_norm": 0.7329208254814148, + "learning_rate": 0.00018040707012340418, + "loss": 2.7535, + "step": 4133 + }, + { + "epoch": 0.33362924703413765, + "grad_norm": 0.7637773752212524, + "learning_rate": 0.00018039768327890397, + "loss": 2.632, + "step": 4134 + }, + { + "epoch": 0.3337099507707207, + "grad_norm": 0.823623776435852, + "learning_rate": 0.00018038829443068333, + "loss": 2.7122, + "step": 4135 + }, + { + "epoch": 0.33379065450730366, + "grad_norm": 0.8040826916694641, + "learning_rate": 0.00018037890357897632, + "loss": 2.7197, + "step": 4136 + }, + { + "epoch": 0.3338713582438867, + "grad_norm": 0.7483998537063599, + "learning_rate": 0.00018036951072401686, + "loss": 2.6535, + "step": 4137 + }, + { + "epoch": 0.33395206198046967, + "grad_norm": 0.8141106367111206, + "learning_rate": 0.00018036011586603914, + "loss": 2.7127, + "step": 4138 + }, + { + "epoch": 0.3340327657170527, + "grad_norm": 0.7226041555404663, + "learning_rate": 0.00018035071900527724, + "loss": 2.6846, + "step": 4139 + }, + { + "epoch": 0.3341134694536357, + "grad_norm": 0.7624794840812683, + "learning_rate": 0.00018034132014196541, + "loss": 2.6725, + "step": 4140 + }, + { + "epoch": 0.3341941731902187, + "grad_norm": 0.7299962043762207, + "learning_rate": 0.00018033191927633785, + "loss": 2.6728, + "step": 4141 + }, + { + "epoch": 0.3342748769268017, + "grad_norm": 0.7920462489128113, + "learning_rate": 0.0001803225164086289, + "loss": 2.6544, + "step": 4142 + }, + { + "epoch": 0.3343555806633847, + "grad_norm": 0.7469778656959534, + "learning_rate": 0.00018031311153907282, + "loss": 2.7356, + "step": 4143 + }, + { + "epoch": 0.3344362843999677, + "grad_norm": 0.8831696510314941, + "learning_rate": 0.0001803037046679041, + "loss": 2.6584, + "step": 4144 + }, + { + "epoch": 0.33451698813655073, + "grad_norm": 0.8047679662704468, + "learning_rate": 0.00018029429579535715, + "loss": 2.6213, + "step": 4145 + }, + { + "epoch": 0.3345976918731337, + "grad_norm": 0.7109517455101013, + "learning_rate": 0.00018028488492166645, + "loss": 2.6622, + "step": 4146 + }, + { + "epoch": 0.33467839560971674, + "grad_norm": 0.7240141034126282, + "learning_rate": 0.0001802754720470665, + "loss": 2.6794, + "step": 4147 + }, + { + "epoch": 0.3347590993462997, + "grad_norm": 0.7292990684509277, + "learning_rate": 0.000180266057171792, + "loss": 2.6079, + "step": 4148 + }, + { + "epoch": 0.33483980308288275, + "grad_norm": 0.8055328130722046, + "learning_rate": 0.00018025664029607756, + "loss": 2.7044, + "step": 4149 + }, + { + "epoch": 0.3349205068194657, + "grad_norm": 0.8348979949951172, + "learning_rate": 0.00018024722142015781, + "loss": 2.6757, + "step": 4150 + }, + { + "epoch": 0.33500121055604876, + "grad_norm": 0.7797044515609741, + "learning_rate": 0.00018023780054426754, + "loss": 2.7125, + "step": 4151 + }, + { + "epoch": 0.33508191429263173, + "grad_norm": 0.802442729473114, + "learning_rate": 0.00018022837766864153, + "loss": 2.7121, + "step": 4152 + }, + { + "epoch": 0.33516261802921476, + "grad_norm": 0.7248829007148743, + "learning_rate": 0.00018021895279351463, + "loss": 2.7344, + "step": 4153 + }, + { + "epoch": 0.33524332176579774, + "grad_norm": 0.7458582520484924, + "learning_rate": 0.00018020952591912175, + "loss": 2.665, + "step": 4154 + }, + { + "epoch": 0.3353240255023808, + "grad_norm": 0.8153703808784485, + "learning_rate": 0.0001802000970456978, + "loss": 2.7416, + "step": 4155 + }, + { + "epoch": 0.33540472923896375, + "grad_norm": 0.7583708763122559, + "learning_rate": 0.00018019066617347779, + "loss": 2.7002, + "step": 4156 + }, + { + "epoch": 0.3354854329755468, + "grad_norm": 0.7522469162940979, + "learning_rate": 0.00018018123330269678, + "loss": 2.7196, + "step": 4157 + }, + { + "epoch": 0.33556613671212976, + "grad_norm": 0.7386923432350159, + "learning_rate": 0.00018017179843358983, + "loss": 2.6947, + "step": 4158 + }, + { + "epoch": 0.3356468404487128, + "grad_norm": 0.7366231083869934, + "learning_rate": 0.00018016236156639205, + "loss": 2.7377, + "step": 4159 + }, + { + "epoch": 0.33572754418529577, + "grad_norm": 0.7727232575416565, + "learning_rate": 0.00018015292270133872, + "loss": 2.7566, + "step": 4160 + }, + { + "epoch": 0.3358082479218788, + "grad_norm": 0.6781843304634094, + "learning_rate": 0.000180143481838665, + "loss": 2.6796, + "step": 4161 + }, + { + "epoch": 0.3358889516584618, + "grad_norm": 0.7036039233207703, + "learning_rate": 0.00018013403897860624, + "loss": 2.7012, + "step": 4162 + }, + { + "epoch": 0.3359696553950448, + "grad_norm": 0.8252625465393066, + "learning_rate": 0.00018012459412139776, + "loss": 2.6613, + "step": 4163 + }, + { + "epoch": 0.3360503591316278, + "grad_norm": 0.6924486756324768, + "learning_rate": 0.00018011514726727493, + "loss": 2.6425, + "step": 4164 + }, + { + "epoch": 0.3361310628682108, + "grad_norm": 0.7735962271690369, + "learning_rate": 0.0001801056984164732, + "loss": 2.7235, + "step": 4165 + }, + { + "epoch": 0.3362117666047938, + "grad_norm": 0.7439951300621033, + "learning_rate": 0.0001800962475692281, + "loss": 2.7428, + "step": 4166 + }, + { + "epoch": 0.3362924703413768, + "grad_norm": 0.6830539107322693, + "learning_rate": 0.0001800867947257751, + "loss": 2.5907, + "step": 4167 + }, + { + "epoch": 0.3363731740779598, + "grad_norm": 0.8355144262313843, + "learning_rate": 0.00018007733988634986, + "loss": 2.6978, + "step": 4168 + }, + { + "epoch": 0.33645387781454283, + "grad_norm": 0.6880978941917419, + "learning_rate": 0.00018006788305118798, + "loss": 2.6934, + "step": 4169 + }, + { + "epoch": 0.3365345815511258, + "grad_norm": 0.762709379196167, + "learning_rate": 0.0001800584242205251, + "loss": 2.684, + "step": 4170 + }, + { + "epoch": 0.33661528528770884, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001800489633945971, + "loss": 2.6857, + "step": 4171 + }, + { + "epoch": 0.3366959890242918, + "grad_norm": 0.787651777267456, + "learning_rate": 0.00018003950057363964, + "loss": 2.6979, + "step": 4172 + }, + { + "epoch": 0.33677669276087485, + "grad_norm": 0.7831481099128723, + "learning_rate": 0.00018003003575788856, + "loss": 2.7158, + "step": 4173 + }, + { + "epoch": 0.33685739649745783, + "grad_norm": 0.844904363155365, + "learning_rate": 0.00018002056894757986, + "loss": 2.6459, + "step": 4174 + }, + { + "epoch": 0.33693810023404086, + "grad_norm": 0.7529420852661133, + "learning_rate": 0.00018001110014294937, + "loss": 2.685, + "step": 4175 + }, + { + "epoch": 0.33701880397062384, + "grad_norm": 0.776719868183136, + "learning_rate": 0.0001800016293442331, + "loss": 2.6353, + "step": 4176 + }, + { + "epoch": 0.33709950770720687, + "grad_norm": 0.7988671660423279, + "learning_rate": 0.00017999215655166716, + "loss": 2.7241, + "step": 4177 + }, + { + "epoch": 0.33718021144378985, + "grad_norm": 0.7190617918968201, + "learning_rate": 0.00017998268176548752, + "loss": 2.7278, + "step": 4178 + }, + { + "epoch": 0.3372609151803729, + "grad_norm": 0.8337060809135437, + "learning_rate": 0.0001799732049859304, + "loss": 2.7059, + "step": 4179 + }, + { + "epoch": 0.33734161891695585, + "grad_norm": 0.7547435164451599, + "learning_rate": 0.0001799637262132319, + "loss": 2.7782, + "step": 4180 + }, + { + "epoch": 0.3374223226535389, + "grad_norm": 0.8067883253097534, + "learning_rate": 0.0001799542454476284, + "loss": 2.7978, + "step": 4181 + }, + { + "epoch": 0.33750302639012186, + "grad_norm": 0.7451581358909607, + "learning_rate": 0.00017994476268935609, + "loss": 2.6931, + "step": 4182 + }, + { + "epoch": 0.33758373012670484, + "grad_norm": 0.7521898746490479, + "learning_rate": 0.00017993527793865125, + "loss": 2.6939, + "step": 4183 + }, + { + "epoch": 0.33766443386328787, + "grad_norm": 0.7608996033668518, + "learning_rate": 0.0001799257911957504, + "loss": 2.715, + "step": 4184 + }, + { + "epoch": 0.33774513759987085, + "grad_norm": 0.7459948658943176, + "learning_rate": 0.00017991630246088987, + "loss": 2.6951, + "step": 4185 + }, + { + "epoch": 0.3378258413364539, + "grad_norm": 0.7549717426300049, + "learning_rate": 0.00017990681173430618, + "loss": 2.7353, + "step": 4186 + }, + { + "epoch": 0.33790654507303686, + "grad_norm": 0.7234344482421875, + "learning_rate": 0.0001798973190162359, + "loss": 2.6491, + "step": 4187 + }, + { + "epoch": 0.3379872488096199, + "grad_norm": 0.7652330994606018, + "learning_rate": 0.00017988782430691553, + "loss": 2.765, + "step": 4188 + }, + { + "epoch": 0.33806795254620287, + "grad_norm": 0.742953360080719, + "learning_rate": 0.00017987832760658177, + "loss": 2.7079, + "step": 4189 + }, + { + "epoch": 0.3381486562827859, + "grad_norm": 0.7440767288208008, + "learning_rate": 0.00017986882891547125, + "loss": 2.6751, + "step": 4190 + }, + { + "epoch": 0.3382293600193689, + "grad_norm": 0.7141925096511841, + "learning_rate": 0.00017985932823382078, + "loss": 2.6249, + "step": 4191 + }, + { + "epoch": 0.3383100637559519, + "grad_norm": 0.7200489044189453, + "learning_rate": 0.00017984982556186707, + "loss": 2.6811, + "step": 4192 + }, + { + "epoch": 0.3383907674925349, + "grad_norm": 0.7677409648895264, + "learning_rate": 0.00017984032089984696, + "loss": 2.6641, + "step": 4193 + }, + { + "epoch": 0.3384714712291179, + "grad_norm": 0.7386545538902283, + "learning_rate": 0.00017983081424799741, + "loss": 2.6504, + "step": 4194 + }, + { + "epoch": 0.3385521749657009, + "grad_norm": 0.7528583407402039, + "learning_rate": 0.00017982130560655526, + "loss": 2.6422, + "step": 4195 + }, + { + "epoch": 0.3386328787022839, + "grad_norm": 0.7339407801628113, + "learning_rate": 0.0001798117949757575, + "loss": 2.7047, + "step": 4196 + }, + { + "epoch": 0.3387135824388669, + "grad_norm": 0.7655882239341736, + "learning_rate": 0.00017980228235584117, + "loss": 2.7644, + "step": 4197 + }, + { + "epoch": 0.33879428617544993, + "grad_norm": 0.7602109909057617, + "learning_rate": 0.00017979276774704342, + "loss": 2.697, + "step": 4198 + }, + { + "epoch": 0.3388749899120329, + "grad_norm": 0.7188911437988281, + "learning_rate": 0.00017978325114960126, + "loss": 2.7147, + "step": 4199 + }, + { + "epoch": 0.33895569364861594, + "grad_norm": 0.7672597765922546, + "learning_rate": 0.00017977373256375194, + "loss": 2.6558, + "step": 4200 + }, + { + "epoch": 0.3390363973851989, + "grad_norm": 0.784187912940979, + "learning_rate": 0.0001797642119897327, + "loss": 2.7005, + "step": 4201 + }, + { + "epoch": 0.33911710112178195, + "grad_norm": 0.7359703779220581, + "learning_rate": 0.00017975468942778075, + "loss": 2.6578, + "step": 4202 + }, + { + "epoch": 0.3391978048583649, + "grad_norm": 0.7776080965995789, + "learning_rate": 0.00017974516487813345, + "loss": 2.6747, + "step": 4203 + }, + { + "epoch": 0.33927850859494796, + "grad_norm": 0.6934135556221008, + "learning_rate": 0.00017973563834102824, + "loss": 2.6335, + "step": 4204 + }, + { + "epoch": 0.33935921233153094, + "grad_norm": 0.7715818881988525, + "learning_rate": 0.00017972610981670245, + "loss": 2.6062, + "step": 4205 + }, + { + "epoch": 0.33943991606811397, + "grad_norm": 0.7466367483139038, + "learning_rate": 0.0001797165793053936, + "loss": 2.7243, + "step": 4206 + }, + { + "epoch": 0.33952061980469694, + "grad_norm": 0.7485085129737854, + "learning_rate": 0.00017970704680733926, + "loss": 2.6603, + "step": 4207 + }, + { + "epoch": 0.33960132354128, + "grad_norm": 0.7365782856941223, + "learning_rate": 0.0001796975123227769, + "loss": 2.7179, + "step": 4208 + }, + { + "epoch": 0.33968202727786295, + "grad_norm": 0.8405506014823914, + "learning_rate": 0.00017968797585194422, + "loss": 2.7413, + "step": 4209 + }, + { + "epoch": 0.339762731014446, + "grad_norm": 0.8227888941764832, + "learning_rate": 0.00017967843739507888, + "loss": 2.6814, + "step": 4210 + }, + { + "epoch": 0.33984343475102896, + "grad_norm": 0.8247283697128296, + "learning_rate": 0.0001796688969524186, + "loss": 2.6802, + "step": 4211 + }, + { + "epoch": 0.339924138487612, + "grad_norm": 0.7639476656913757, + "learning_rate": 0.00017965935452420116, + "loss": 2.7422, + "step": 4212 + }, + { + "epoch": 0.34000484222419497, + "grad_norm": 0.7846776247024536, + "learning_rate": 0.00017964981011066436, + "loss": 2.7443, + "step": 4213 + }, + { + "epoch": 0.340085545960778, + "grad_norm": 0.7593334913253784, + "learning_rate": 0.00017964026371204608, + "loss": 2.7179, + "step": 4214 + }, + { + "epoch": 0.340166249697361, + "grad_norm": 0.7878177165985107, + "learning_rate": 0.00017963071532858425, + "loss": 2.7118, + "step": 4215 + }, + { + "epoch": 0.340246953433944, + "grad_norm": 0.7728220224380493, + "learning_rate": 0.00017962116496051685, + "loss": 2.6646, + "step": 4216 + }, + { + "epoch": 0.340327657170527, + "grad_norm": 0.8419308066368103, + "learning_rate": 0.00017961161260808187, + "loss": 2.7829, + "step": 4217 + }, + { + "epoch": 0.34040836090711, + "grad_norm": 0.7066153883934021, + "learning_rate": 0.0001796020582715174, + "loss": 2.6498, + "step": 4218 + }, + { + "epoch": 0.340489064643693, + "grad_norm": 0.7976264953613281, + "learning_rate": 0.00017959250195106156, + "loss": 2.7496, + "step": 4219 + }, + { + "epoch": 0.34056976838027603, + "grad_norm": 0.736595630645752, + "learning_rate": 0.0001795829436469525, + "loss": 2.6497, + "step": 4220 + }, + { + "epoch": 0.340650472116859, + "grad_norm": 0.818550705909729, + "learning_rate": 0.0001795733833594285, + "loss": 2.6793, + "step": 4221 + }, + { + "epoch": 0.34073117585344204, + "grad_norm": 0.7712778449058533, + "learning_rate": 0.00017956382108872773, + "loss": 2.6215, + "step": 4222 + }, + { + "epoch": 0.340811879590025, + "grad_norm": 0.746306300163269, + "learning_rate": 0.00017955425683508858, + "loss": 2.7372, + "step": 4223 + }, + { + "epoch": 0.34089258332660805, + "grad_norm": 0.7269306778907776, + "learning_rate": 0.00017954469059874937, + "loss": 2.6438, + "step": 4224 + }, + { + "epoch": 0.340973287063191, + "grad_norm": 0.7426211833953857, + "learning_rate": 0.00017953512237994855, + "loss": 2.6539, + "step": 4225 + }, + { + "epoch": 0.34105399079977405, + "grad_norm": 0.7269948124885559, + "learning_rate": 0.0001795255521789246, + "loss": 2.6833, + "step": 4226 + }, + { + "epoch": 0.34113469453635703, + "grad_norm": 0.7279343605041504, + "learning_rate": 0.00017951597999591598, + "loss": 2.7011, + "step": 4227 + }, + { + "epoch": 0.34121539827294006, + "grad_norm": 0.7554663419723511, + "learning_rate": 0.0001795064058311613, + "loss": 2.7036, + "step": 4228 + }, + { + "epoch": 0.34129610200952304, + "grad_norm": 0.7516502141952515, + "learning_rate": 0.00017949682968489912, + "loss": 2.6699, + "step": 4229 + }, + { + "epoch": 0.34137680574610607, + "grad_norm": 0.7931745052337646, + "learning_rate": 0.00017948725155736818, + "loss": 2.6655, + "step": 4230 + }, + { + "epoch": 0.34145750948268905, + "grad_norm": 0.6981344223022461, + "learning_rate": 0.0001794776714488071, + "loss": 2.6987, + "step": 4231 + }, + { + "epoch": 0.3415382132192721, + "grad_norm": 0.7513911724090576, + "learning_rate": 0.00017946808935945474, + "loss": 2.6985, + "step": 4232 + }, + { + "epoch": 0.34161891695585506, + "grad_norm": 0.7373185753822327, + "learning_rate": 0.00017945850528954983, + "loss": 2.7269, + "step": 4233 + }, + { + "epoch": 0.34169962069243803, + "grad_norm": 0.6990259289741516, + "learning_rate": 0.0001794489192393313, + "loss": 2.6763, + "step": 4234 + }, + { + "epoch": 0.34178032442902107, + "grad_norm": 0.7661817669868469, + "learning_rate": 0.00017943933120903797, + "loss": 2.7057, + "step": 4235 + }, + { + "epoch": 0.34186102816560404, + "grad_norm": 0.7570027112960815, + "learning_rate": 0.0001794297411989089, + "loss": 2.7358, + "step": 4236 + }, + { + "epoch": 0.3419417319021871, + "grad_norm": 0.7751824855804443, + "learning_rate": 0.000179420149209183, + "loss": 2.6771, + "step": 4237 + }, + { + "epoch": 0.34202243563877005, + "grad_norm": 0.8028360605239868, + "learning_rate": 0.0001794105552400994, + "loss": 2.6399, + "step": 4238 + }, + { + "epoch": 0.3421031393753531, + "grad_norm": 0.7398171424865723, + "learning_rate": 0.00017940095929189716, + "loss": 2.6532, + "step": 4239 + }, + { + "epoch": 0.34218384311193606, + "grad_norm": 0.8300225138664246, + "learning_rate": 0.0001793913613648155, + "loss": 2.6798, + "step": 4240 + }, + { + "epoch": 0.3422645468485191, + "grad_norm": 0.7501145005226135, + "learning_rate": 0.00017938176145909356, + "loss": 2.7132, + "step": 4241 + }, + { + "epoch": 0.34234525058510207, + "grad_norm": 0.7178483605384827, + "learning_rate": 0.00017937215957497063, + "loss": 2.7172, + "step": 4242 + }, + { + "epoch": 0.3424259543216851, + "grad_norm": 0.7207306027412415, + "learning_rate": 0.00017936255571268599, + "loss": 2.629, + "step": 4243 + }, + { + "epoch": 0.3425066580582681, + "grad_norm": 0.7339839935302734, + "learning_rate": 0.00017935294987247899, + "loss": 2.6262, + "step": 4244 + }, + { + "epoch": 0.3425873617948511, + "grad_norm": 0.6977292895317078, + "learning_rate": 0.00017934334205458907, + "loss": 2.6949, + "step": 4245 + }, + { + "epoch": 0.3426680655314341, + "grad_norm": 0.7368096113204956, + "learning_rate": 0.00017933373225925564, + "loss": 2.681, + "step": 4246 + }, + { + "epoch": 0.3427487692680171, + "grad_norm": 0.7234459519386292, + "learning_rate": 0.00017932412048671825, + "loss": 2.6891, + "step": 4247 + }, + { + "epoch": 0.3428294730046001, + "grad_norm": 0.7659995555877686, + "learning_rate": 0.00017931450673721642, + "loss": 2.7394, + "step": 4248 + }, + { + "epoch": 0.3429101767411831, + "grad_norm": 0.7799893617630005, + "learning_rate": 0.00017930489101098974, + "loss": 2.7707, + "step": 4249 + }, + { + "epoch": 0.3429908804777661, + "grad_norm": 0.7063946723937988, + "learning_rate": 0.00017929527330827786, + "loss": 2.6573, + "step": 4250 + }, + { + "epoch": 0.34307158421434913, + "grad_norm": 0.7090561389923096, + "learning_rate": 0.0001792856536293205, + "loss": 2.7095, + "step": 4251 + }, + { + "epoch": 0.3431522879509321, + "grad_norm": 0.8020029067993164, + "learning_rate": 0.0001792760319743574, + "loss": 2.6905, + "step": 4252 + }, + { + "epoch": 0.34323299168751514, + "grad_norm": 0.7221484780311584, + "learning_rate": 0.00017926640834362836, + "loss": 2.6853, + "step": 4253 + }, + { + "epoch": 0.3433136954240981, + "grad_norm": 0.7102623581886292, + "learning_rate": 0.00017925678273737324, + "loss": 2.6821, + "step": 4254 + }, + { + "epoch": 0.34339439916068115, + "grad_norm": 0.7702807784080505, + "learning_rate": 0.00017924715515583187, + "loss": 2.6986, + "step": 4255 + }, + { + "epoch": 0.34347510289726413, + "grad_norm": 0.7938152551651001, + "learning_rate": 0.00017923752559924425, + "loss": 2.7162, + "step": 4256 + }, + { + "epoch": 0.34355580663384716, + "grad_norm": 0.7340937852859497, + "learning_rate": 0.00017922789406785036, + "loss": 2.6904, + "step": 4257 + }, + { + "epoch": 0.34363651037043014, + "grad_norm": 0.7010839581489563, + "learning_rate": 0.00017921826056189026, + "loss": 2.6969, + "step": 4258 + }, + { + "epoch": 0.34371721410701317, + "grad_norm": 0.758178174495697, + "learning_rate": 0.00017920862508160403, + "loss": 2.6391, + "step": 4259 + }, + { + "epoch": 0.34379791784359615, + "grad_norm": 0.7861726880073547, + "learning_rate": 0.0001791989876272318, + "loss": 2.7088, + "step": 4260 + }, + { + "epoch": 0.3438786215801792, + "grad_norm": 0.6764364242553711, + "learning_rate": 0.00017918934819901377, + "loss": 2.6221, + "step": 4261 + }, + { + "epoch": 0.34395932531676215, + "grad_norm": 0.76728355884552, + "learning_rate": 0.00017917970679719018, + "loss": 2.6854, + "step": 4262 + }, + { + "epoch": 0.3440400290533452, + "grad_norm": 0.7161166071891785, + "learning_rate": 0.00017917006342200133, + "loss": 2.7048, + "step": 4263 + }, + { + "epoch": 0.34412073278992816, + "grad_norm": 0.7182073593139648, + "learning_rate": 0.00017916041807368753, + "loss": 2.7559, + "step": 4264 + }, + { + "epoch": 0.3442014365265112, + "grad_norm": 0.832258403301239, + "learning_rate": 0.0001791507707524892, + "loss": 2.6743, + "step": 4265 + }, + { + "epoch": 0.34428214026309417, + "grad_norm": 0.7048495411872864, + "learning_rate": 0.00017914112145864675, + "loss": 2.693, + "step": 4266 + }, + { + "epoch": 0.3443628439996772, + "grad_norm": 0.7475518584251404, + "learning_rate": 0.00017913147019240068, + "loss": 2.6881, + "step": 4267 + }, + { + "epoch": 0.3444435477362602, + "grad_norm": 0.72830730676651, + "learning_rate": 0.00017912181695399154, + "loss": 2.659, + "step": 4268 + }, + { + "epoch": 0.3445242514728432, + "grad_norm": 0.7183662056922913, + "learning_rate": 0.00017911216174365988, + "loss": 2.6611, + "step": 4269 + }, + { + "epoch": 0.3446049552094262, + "grad_norm": 0.7487103343009949, + "learning_rate": 0.0001791025045616463, + "loss": 2.6518, + "step": 4270 + }, + { + "epoch": 0.3446856589460092, + "grad_norm": 0.7733812928199768, + "learning_rate": 0.0001790928454081916, + "loss": 2.6359, + "step": 4271 + }, + { + "epoch": 0.3447663626825922, + "grad_norm": 0.7774991393089294, + "learning_rate": 0.00017908318428353642, + "loss": 2.6654, + "step": 4272 + }, + { + "epoch": 0.34484706641917523, + "grad_norm": 0.6882895827293396, + "learning_rate": 0.00017907352118792157, + "loss": 2.686, + "step": 4273 + }, + { + "epoch": 0.3449277701557582, + "grad_norm": 0.7571535110473633, + "learning_rate": 0.00017906385612158785, + "loss": 2.7108, + "step": 4274 + }, + { + "epoch": 0.34500847389234124, + "grad_norm": 0.7324517369270325, + "learning_rate": 0.00017905418908477615, + "loss": 2.6663, + "step": 4275 + }, + { + "epoch": 0.3450891776289242, + "grad_norm": 0.7476221919059753, + "learning_rate": 0.00017904452007772744, + "loss": 2.7202, + "step": 4276 + }, + { + "epoch": 0.34516988136550725, + "grad_norm": 0.7648386359214783, + "learning_rate": 0.00017903484910068268, + "loss": 2.6759, + "step": 4277 + }, + { + "epoch": 0.3452505851020902, + "grad_norm": 0.7375434637069702, + "learning_rate": 0.00017902517615388282, + "loss": 2.6603, + "step": 4278 + }, + { + "epoch": 0.34533128883867326, + "grad_norm": 0.7248519062995911, + "learning_rate": 0.00017901550123756906, + "loss": 2.7147, + "step": 4279 + }, + { + "epoch": 0.34541199257525623, + "grad_norm": 0.7264916896820068, + "learning_rate": 0.0001790058243519824, + "loss": 2.6992, + "step": 4280 + }, + { + "epoch": 0.34549269631183926, + "grad_norm": 0.8370026350021362, + "learning_rate": 0.0001789961454973641, + "loss": 2.7114, + "step": 4281 + }, + { + "epoch": 0.34557340004842224, + "grad_norm": 0.72071373462677, + "learning_rate": 0.00017898646467395538, + "loss": 2.6957, + "step": 4282 + }, + { + "epoch": 0.3456541037850053, + "grad_norm": 0.7355397343635559, + "learning_rate": 0.0001789767818819975, + "loss": 2.6744, + "step": 4283 + }, + { + "epoch": 0.34573480752158825, + "grad_norm": 0.734756588935852, + "learning_rate": 0.00017896709712173173, + "loss": 2.726, + "step": 4284 + }, + { + "epoch": 0.3458155112581712, + "grad_norm": 0.7890543341636658, + "learning_rate": 0.00017895741039339945, + "loss": 2.6726, + "step": 4285 + }, + { + "epoch": 0.34589621499475426, + "grad_norm": 0.7768735885620117, + "learning_rate": 0.00017894772169724216, + "loss": 2.7617, + "step": 4286 + }, + { + "epoch": 0.34597691873133724, + "grad_norm": 0.7306547164916992, + "learning_rate": 0.00017893803103350125, + "loss": 2.6253, + "step": 4287 + }, + { + "epoch": 0.34605762246792027, + "grad_norm": 0.767066478729248, + "learning_rate": 0.00017892833840241828, + "loss": 2.6522, + "step": 4288 + }, + { + "epoch": 0.34613832620450324, + "grad_norm": 0.7018097639083862, + "learning_rate": 0.00017891864380423477, + "loss": 2.7111, + "step": 4289 + }, + { + "epoch": 0.3462190299410863, + "grad_norm": 0.7305615544319153, + "learning_rate": 0.00017890894723919236, + "loss": 2.6924, + "step": 4290 + }, + { + "epoch": 0.34629973367766925, + "grad_norm": 0.7588002681732178, + "learning_rate": 0.00017889924870753275, + "loss": 2.6952, + "step": 4291 + }, + { + "epoch": 0.3463804374142523, + "grad_norm": 0.7162861824035645, + "learning_rate": 0.0001788895482094976, + "loss": 2.6239, + "step": 4292 + }, + { + "epoch": 0.34646114115083526, + "grad_norm": 0.7494024634361267, + "learning_rate": 0.00017887984574532868, + "loss": 2.6763, + "step": 4293 + }, + { + "epoch": 0.3465418448874183, + "grad_norm": 0.7100037336349487, + "learning_rate": 0.0001788701413152678, + "loss": 2.6378, + "step": 4294 + }, + { + "epoch": 0.34662254862400127, + "grad_norm": 0.7316900491714478, + "learning_rate": 0.00017886043491955684, + "loss": 2.7001, + "step": 4295 + }, + { + "epoch": 0.3467032523605843, + "grad_norm": 0.8467028737068176, + "learning_rate": 0.00017885072655843772, + "loss": 2.7536, + "step": 4296 + }, + { + "epoch": 0.3467839560971673, + "grad_norm": 0.7248796820640564, + "learning_rate": 0.00017884101623215237, + "loss": 2.6956, + "step": 4297 + }, + { + "epoch": 0.3468646598337503, + "grad_norm": 0.7183107137680054, + "learning_rate": 0.0001788313039409428, + "loss": 2.743, + "step": 4298 + }, + { + "epoch": 0.3469453635703333, + "grad_norm": 0.6835163831710815, + "learning_rate": 0.00017882158968505105, + "loss": 2.7016, + "step": 4299 + }, + { + "epoch": 0.3470260673069163, + "grad_norm": 0.7973365783691406, + "learning_rate": 0.00017881187346471925, + "loss": 2.6927, + "step": 4300 + }, + { + "epoch": 0.3471067710434993, + "grad_norm": 0.700040876865387, + "learning_rate": 0.00017880215528018954, + "loss": 2.6961, + "step": 4301 + }, + { + "epoch": 0.34718747478008233, + "grad_norm": 0.8180583119392395, + "learning_rate": 0.00017879243513170415, + "loss": 2.642, + "step": 4302 + }, + { + "epoch": 0.3472681785166653, + "grad_norm": 0.7134599685668945, + "learning_rate": 0.0001787827130195053, + "loss": 2.6901, + "step": 4303 + }, + { + "epoch": 0.34734888225324834, + "grad_norm": 0.767998218536377, + "learning_rate": 0.0001787729889438353, + "loss": 2.6472, + "step": 4304 + }, + { + "epoch": 0.3474295859898313, + "grad_norm": 0.7260780930519104, + "learning_rate": 0.0001787632629049365, + "loss": 2.6791, + "step": 4305 + }, + { + "epoch": 0.34751028972641435, + "grad_norm": 0.6918236613273621, + "learning_rate": 0.00017875353490305132, + "loss": 2.6596, + "step": 4306 + }, + { + "epoch": 0.3475909934629973, + "grad_norm": 0.7734197974205017, + "learning_rate": 0.00017874380493842216, + "loss": 2.6402, + "step": 4307 + }, + { + "epoch": 0.34767169719958035, + "grad_norm": 0.7051037549972534, + "learning_rate": 0.00017873407301129154, + "loss": 2.7517, + "step": 4308 + }, + { + "epoch": 0.34775240093616333, + "grad_norm": 0.7026919722557068, + "learning_rate": 0.00017872433912190203, + "loss": 2.7058, + "step": 4309 + }, + { + "epoch": 0.34783310467274636, + "grad_norm": 0.7248546481132507, + "learning_rate": 0.00017871460327049618, + "loss": 2.666, + "step": 4310 + }, + { + "epoch": 0.34791380840932934, + "grad_norm": 0.7348842620849609, + "learning_rate": 0.0001787048654573167, + "loss": 2.7712, + "step": 4311 + }, + { + "epoch": 0.34799451214591237, + "grad_norm": 0.7923693656921387, + "learning_rate": 0.00017869512568260618, + "loss": 2.6469, + "step": 4312 + }, + { + "epoch": 0.34807521588249535, + "grad_norm": 0.7604066729545593, + "learning_rate": 0.00017868538394660743, + "loss": 2.7152, + "step": 4313 + }, + { + "epoch": 0.3481559196190784, + "grad_norm": 0.6811137795448303, + "learning_rate": 0.00017867564024956324, + "loss": 2.715, + "step": 4314 + }, + { + "epoch": 0.34823662335566136, + "grad_norm": 0.7292799353599548, + "learning_rate": 0.00017866589459171643, + "loss": 2.6374, + "step": 4315 + }, + { + "epoch": 0.3483173270922444, + "grad_norm": 0.6961250901222229, + "learning_rate": 0.0001786561469733099, + "loss": 2.6592, + "step": 4316 + }, + { + "epoch": 0.34839803082882737, + "grad_norm": 0.7447086572647095, + "learning_rate": 0.00017864639739458658, + "loss": 2.6965, + "step": 4317 + }, + { + "epoch": 0.3484787345654104, + "grad_norm": 0.7107378244400024, + "learning_rate": 0.00017863664585578942, + "loss": 2.7057, + "step": 4318 + }, + { + "epoch": 0.3485594383019934, + "grad_norm": 0.7372235655784607, + "learning_rate": 0.00017862689235716153, + "loss": 2.6289, + "step": 4319 + }, + { + "epoch": 0.3486401420385764, + "grad_norm": 0.7360481023788452, + "learning_rate": 0.00017861713689894593, + "loss": 2.7208, + "step": 4320 + }, + { + "epoch": 0.3487208457751594, + "grad_norm": 0.7378106713294983, + "learning_rate": 0.00017860737948138575, + "loss": 2.6836, + "step": 4321 + }, + { + "epoch": 0.3488015495117424, + "grad_norm": 0.7110548615455627, + "learning_rate": 0.00017859762010472423, + "loss": 2.6941, + "step": 4322 + }, + { + "epoch": 0.3488822532483254, + "grad_norm": 0.7419706583023071, + "learning_rate": 0.00017858785876920455, + "loss": 2.6591, + "step": 4323 + }, + { + "epoch": 0.3489629569849084, + "grad_norm": 0.7759542465209961, + "learning_rate": 0.00017857809547506997, + "loss": 2.6966, + "step": 4324 + }, + { + "epoch": 0.3490436607214914, + "grad_norm": 0.7894207239151001, + "learning_rate": 0.0001785683302225639, + "loss": 2.7298, + "step": 4325 + }, + { + "epoch": 0.34912436445807443, + "grad_norm": 0.7342399954795837, + "learning_rate": 0.0001785585630119296, + "loss": 2.6998, + "step": 4326 + }, + { + "epoch": 0.3492050681946574, + "grad_norm": 0.8684173822402954, + "learning_rate": 0.0001785487938434106, + "loss": 2.7179, + "step": 4327 + }, + { + "epoch": 0.34928577193124044, + "grad_norm": 0.7557523846626282, + "learning_rate": 0.00017853902271725033, + "loss": 2.7081, + "step": 4328 + }, + { + "epoch": 0.3493664756678234, + "grad_norm": 0.7910173535346985, + "learning_rate": 0.0001785292496336923, + "loss": 2.718, + "step": 4329 + }, + { + "epoch": 0.34944717940440645, + "grad_norm": 0.7878917455673218, + "learning_rate": 0.00017851947459298007, + "loss": 2.674, + "step": 4330 + }, + { + "epoch": 0.3495278831409894, + "grad_norm": 0.7290656566619873, + "learning_rate": 0.0001785096975953573, + "loss": 2.6962, + "step": 4331 + }, + { + "epoch": 0.34960858687757246, + "grad_norm": 0.8465737104415894, + "learning_rate": 0.00017849991864106763, + "loss": 2.6793, + "step": 4332 + }, + { + "epoch": 0.34968929061415543, + "grad_norm": 0.7183132171630859, + "learning_rate": 0.0001784901377303548, + "loss": 2.6902, + "step": 4333 + }, + { + "epoch": 0.34976999435073847, + "grad_norm": 0.7535461783409119, + "learning_rate": 0.00017848035486346255, + "loss": 2.7153, + "step": 4334 + }, + { + "epoch": 0.34985069808732144, + "grad_norm": 0.778734028339386, + "learning_rate": 0.0001784705700406347, + "loss": 2.6316, + "step": 4335 + }, + { + "epoch": 0.3499314018239044, + "grad_norm": 0.6937401294708252, + "learning_rate": 0.00017846078326211516, + "loss": 2.6902, + "step": 4336 + }, + { + "epoch": 0.35001210556048745, + "grad_norm": 0.7450751066207886, + "learning_rate": 0.00017845099452814774, + "loss": 2.6898, + "step": 4337 + }, + { + "epoch": 0.35009280929707043, + "grad_norm": 0.7535614967346191, + "learning_rate": 0.0001784412038389765, + "loss": 2.6969, + "step": 4338 + }, + { + "epoch": 0.35017351303365346, + "grad_norm": 0.6971385478973389, + "learning_rate": 0.00017843141119484543, + "loss": 2.6517, + "step": 4339 + }, + { + "epoch": 0.35025421677023644, + "grad_norm": 0.7233202457427979, + "learning_rate": 0.00017842161659599858, + "loss": 2.7332, + "step": 4340 + }, + { + "epoch": 0.35033492050681947, + "grad_norm": 0.7870340347290039, + "learning_rate": 0.00017841182004268, + "loss": 2.6485, + "step": 4341 + }, + { + "epoch": 0.35041562424340245, + "grad_norm": 0.7387053966522217, + "learning_rate": 0.0001784020215351339, + "loss": 2.6945, + "step": 4342 + }, + { + "epoch": 0.3504963279799855, + "grad_norm": 0.8357887268066406, + "learning_rate": 0.00017839222107360453, + "loss": 2.703, + "step": 4343 + }, + { + "epoch": 0.35057703171656845, + "grad_norm": 0.7197332978248596, + "learning_rate": 0.000178382418658336, + "loss": 2.6649, + "step": 4344 + }, + { + "epoch": 0.3506577354531515, + "grad_norm": 0.7416980862617493, + "learning_rate": 0.0001783726142895728, + "loss": 2.7393, + "step": 4345 + }, + { + "epoch": 0.35073843918973446, + "grad_norm": 0.6807832717895508, + "learning_rate": 0.00017836280796755912, + "loss": 2.6619, + "step": 4346 + }, + { + "epoch": 0.3508191429263175, + "grad_norm": 0.6858795285224915, + "learning_rate": 0.00017835299969253945, + "loss": 2.6266, + "step": 4347 + }, + { + "epoch": 0.35089984666290047, + "grad_norm": 0.8432363867759705, + "learning_rate": 0.0001783431894647582, + "loss": 2.6534, + "step": 4348 + }, + { + "epoch": 0.3509805503994835, + "grad_norm": 0.7240749001502991, + "learning_rate": 0.0001783333772844599, + "loss": 2.6851, + "step": 4349 + }, + { + "epoch": 0.3510612541360665, + "grad_norm": 0.7814531326293945, + "learning_rate": 0.00017832356315188906, + "loss": 2.7085, + "step": 4350 + }, + { + "epoch": 0.3511419578726495, + "grad_norm": 0.6989716291427612, + "learning_rate": 0.00017831374706729026, + "loss": 2.6674, + "step": 4351 + }, + { + "epoch": 0.3512226616092325, + "grad_norm": 0.7118446230888367, + "learning_rate": 0.0001783039290309082, + "loss": 2.6837, + "step": 4352 + }, + { + "epoch": 0.3513033653458155, + "grad_norm": 0.7641892433166504, + "learning_rate": 0.00017829410904298754, + "loss": 2.6415, + "step": 4353 + }, + { + "epoch": 0.3513840690823985, + "grad_norm": 0.6975794434547424, + "learning_rate": 0.000178284287103773, + "loss": 2.6679, + "step": 4354 + }, + { + "epoch": 0.35146477281898153, + "grad_norm": 0.7192546725273132, + "learning_rate": 0.00017827446321350943, + "loss": 2.6539, + "step": 4355 + }, + { + "epoch": 0.3515454765555645, + "grad_norm": 0.8749549388885498, + "learning_rate": 0.00017826463737244155, + "loss": 2.7254, + "step": 4356 + }, + { + "epoch": 0.35162618029214754, + "grad_norm": 0.8509732484817505, + "learning_rate": 0.0001782548095808144, + "loss": 2.7679, + "step": 4357 + }, + { + "epoch": 0.3517068840287305, + "grad_norm": 0.7647901773452759, + "learning_rate": 0.00017824497983887278, + "loss": 2.7049, + "step": 4358 + }, + { + "epoch": 0.35178758776531355, + "grad_norm": 0.7551973462104797, + "learning_rate": 0.00017823514814686178, + "loss": 2.7086, + "step": 4359 + }, + { + "epoch": 0.3518682915018965, + "grad_norm": 0.730140209197998, + "learning_rate": 0.00017822531450502633, + "loss": 2.6334, + "step": 4360 + }, + { + "epoch": 0.35194899523847956, + "grad_norm": 0.8210160136222839, + "learning_rate": 0.00017821547891361158, + "loss": 2.7248, + "step": 4361 + }, + { + "epoch": 0.35202969897506253, + "grad_norm": 0.761972963809967, + "learning_rate": 0.00017820564137286264, + "loss": 2.6502, + "step": 4362 + }, + { + "epoch": 0.35211040271164556, + "grad_norm": 0.7564061284065247, + "learning_rate": 0.00017819580188302466, + "loss": 2.6795, + "step": 4363 + }, + { + "epoch": 0.35219110644822854, + "grad_norm": 0.7382947206497192, + "learning_rate": 0.00017818596044434293, + "loss": 2.6754, + "step": 4364 + }, + { + "epoch": 0.3522718101848116, + "grad_norm": 0.737194836139679, + "learning_rate": 0.00017817611705706266, + "loss": 2.7098, + "step": 4365 + }, + { + "epoch": 0.35235251392139455, + "grad_norm": 0.7183281779289246, + "learning_rate": 0.0001781662717214292, + "loss": 2.6528, + "step": 4366 + }, + { + "epoch": 0.3524332176579776, + "grad_norm": 0.7785990238189697, + "learning_rate": 0.00017815642443768794, + "loss": 2.6419, + "step": 4367 + }, + { + "epoch": 0.35251392139456056, + "grad_norm": 0.7114452719688416, + "learning_rate": 0.00017814657520608427, + "loss": 2.7088, + "step": 4368 + }, + { + "epoch": 0.3525946251311436, + "grad_norm": 0.746969997882843, + "learning_rate": 0.00017813672402686365, + "loss": 2.7199, + "step": 4369 + }, + { + "epoch": 0.35267532886772657, + "grad_norm": 0.7700605988502502, + "learning_rate": 0.00017812687090027165, + "loss": 2.6713, + "step": 4370 + }, + { + "epoch": 0.3527560326043096, + "grad_norm": 0.7733504772186279, + "learning_rate": 0.0001781170158265538, + "loss": 2.6916, + "step": 4371 + }, + { + "epoch": 0.3528367363408926, + "grad_norm": 0.7769689559936523, + "learning_rate": 0.00017810715880595566, + "loss": 2.7787, + "step": 4372 + }, + { + "epoch": 0.3529174400774756, + "grad_norm": 0.7538996934890747, + "learning_rate": 0.000178097299838723, + "loss": 2.6964, + "step": 4373 + }, + { + "epoch": 0.3529981438140586, + "grad_norm": 0.7777890563011169, + "learning_rate": 0.00017808743892510146, + "loss": 2.6882, + "step": 4374 + }, + { + "epoch": 0.3530788475506416, + "grad_norm": 0.8331751823425293, + "learning_rate": 0.00017807757606533683, + "loss": 2.7113, + "step": 4375 + }, + { + "epoch": 0.3531595512872246, + "grad_norm": 0.8039207458496094, + "learning_rate": 0.00017806771125967492, + "loss": 2.6694, + "step": 4376 + }, + { + "epoch": 0.3532402550238076, + "grad_norm": 0.7727575898170471, + "learning_rate": 0.00017805784450836154, + "loss": 2.6639, + "step": 4377 + }, + { + "epoch": 0.3533209587603906, + "grad_norm": 0.8247967958450317, + "learning_rate": 0.00017804797581164264, + "loss": 2.6539, + "step": 4378 + }, + { + "epoch": 0.35340166249697363, + "grad_norm": 0.7574009299278259, + "learning_rate": 0.0001780381051697642, + "loss": 2.7163, + "step": 4379 + }, + { + "epoch": 0.3534823662335566, + "grad_norm": 0.7304368615150452, + "learning_rate": 0.0001780282325829721, + "loss": 2.5759, + "step": 4380 + }, + { + "epoch": 0.35356306997013964, + "grad_norm": 0.7133963704109192, + "learning_rate": 0.00017801835805151257, + "loss": 2.7008, + "step": 4381 + }, + { + "epoch": 0.3536437737067226, + "grad_norm": 0.7525407075881958, + "learning_rate": 0.00017800848157563157, + "loss": 2.6785, + "step": 4382 + }, + { + "epoch": 0.35372447744330565, + "grad_norm": 0.7306779623031616, + "learning_rate": 0.00017799860315557528, + "loss": 2.6454, + "step": 4383 + }, + { + "epoch": 0.35380518117988863, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.00017798872279158994, + "loss": 2.708, + "step": 4384 + }, + { + "epoch": 0.35388588491647166, + "grad_norm": 0.7655978202819824, + "learning_rate": 0.00017797884048392177, + "loss": 2.727, + "step": 4385 + }, + { + "epoch": 0.35396658865305464, + "grad_norm": 0.6802939176559448, + "learning_rate": 0.00017796895623281702, + "loss": 2.659, + "step": 4386 + }, + { + "epoch": 0.3540472923896376, + "grad_norm": 0.7191160917282104, + "learning_rate": 0.00017795907003852207, + "loss": 2.6335, + "step": 4387 + }, + { + "epoch": 0.35412799612622065, + "grad_norm": 0.7771886587142944, + "learning_rate": 0.00017794918190128337, + "loss": 2.6658, + "step": 4388 + }, + { + "epoch": 0.3542086998628036, + "grad_norm": 0.7133512496948242, + "learning_rate": 0.00017793929182134723, + "loss": 2.6701, + "step": 4389 + }, + { + "epoch": 0.35428940359938665, + "grad_norm": 0.7795221209526062, + "learning_rate": 0.00017792939979896022, + "loss": 2.6932, + "step": 4390 + }, + { + "epoch": 0.35437010733596963, + "grad_norm": 0.726767897605896, + "learning_rate": 0.00017791950583436887, + "loss": 2.676, + "step": 4391 + }, + { + "epoch": 0.35445081107255266, + "grad_norm": 0.7447288632392883, + "learning_rate": 0.00017790960992781972, + "loss": 2.7195, + "step": 4392 + }, + { + "epoch": 0.35453151480913564, + "grad_norm": 0.8053649663925171, + "learning_rate": 0.0001778997120795595, + "loss": 2.6851, + "step": 4393 + }, + { + "epoch": 0.35461221854571867, + "grad_norm": 0.7258884906768799, + "learning_rate": 0.00017788981228983474, + "loss": 2.6819, + "step": 4394 + }, + { + "epoch": 0.35469292228230165, + "grad_norm": 0.7279395461082458, + "learning_rate": 0.0001778799105588923, + "loss": 2.6954, + "step": 4395 + }, + { + "epoch": 0.3547736260188847, + "grad_norm": 0.7372962236404419, + "learning_rate": 0.0001778700068869789, + "loss": 2.7049, + "step": 4396 + }, + { + "epoch": 0.35485432975546766, + "grad_norm": 0.712003767490387, + "learning_rate": 0.00017786010127434135, + "loss": 2.7413, + "step": 4397 + }, + { + "epoch": 0.3549350334920507, + "grad_norm": 0.7487424612045288, + "learning_rate": 0.0001778501937212266, + "loss": 2.7231, + "step": 4398 + }, + { + "epoch": 0.35501573722863367, + "grad_norm": 0.73053377866745, + "learning_rate": 0.00017784028422788146, + "loss": 2.7029, + "step": 4399 + }, + { + "epoch": 0.3550964409652167, + "grad_norm": 0.697062611579895, + "learning_rate": 0.00017783037279455298, + "loss": 2.7139, + "step": 4400 + }, + { + "epoch": 0.3551771447017997, + "grad_norm": 0.7750880718231201, + "learning_rate": 0.00017782045942148819, + "loss": 2.6601, + "step": 4401 + }, + { + "epoch": 0.3552578484383827, + "grad_norm": 0.7124977111816406, + "learning_rate": 0.00017781054410893413, + "loss": 2.6119, + "step": 4402 + }, + { + "epoch": 0.3553385521749657, + "grad_norm": 0.7773111462593079, + "learning_rate": 0.00017780062685713785, + "loss": 2.7181, + "step": 4403 + }, + { + "epoch": 0.3554192559115487, + "grad_norm": 0.7282142639160156, + "learning_rate": 0.00017779070766634663, + "loss": 2.7141, + "step": 4404 + }, + { + "epoch": 0.3554999596481317, + "grad_norm": 0.8578598499298096, + "learning_rate": 0.0001777807865368076, + "loss": 2.7628, + "step": 4405 + }, + { + "epoch": 0.3555806633847147, + "grad_norm": 0.7126399874687195, + "learning_rate": 0.00017777086346876809, + "loss": 2.6914, + "step": 4406 + }, + { + "epoch": 0.3556613671212977, + "grad_norm": 0.8026365637779236, + "learning_rate": 0.00017776093846247533, + "loss": 2.7059, + "step": 4407 + }, + { + "epoch": 0.35574207085788073, + "grad_norm": 0.7839884161949158, + "learning_rate": 0.0001777510115181767, + "loss": 2.7265, + "step": 4408 + }, + { + "epoch": 0.3558227745944637, + "grad_norm": 0.7498767971992493, + "learning_rate": 0.00017774108263611966, + "loss": 2.7201, + "step": 4409 + }, + { + "epoch": 0.35590347833104674, + "grad_norm": 0.6996301412582397, + "learning_rate": 0.0001777311518165516, + "loss": 2.6271, + "step": 4410 + }, + { + "epoch": 0.3559841820676297, + "grad_norm": 0.7721461057662964, + "learning_rate": 0.00017772121905972003, + "loss": 2.6739, + "step": 4411 + }, + { + "epoch": 0.35606488580421275, + "grad_norm": 0.8018803000450134, + "learning_rate": 0.00017771128436587256, + "loss": 2.7092, + "step": 4412 + }, + { + "epoch": 0.3561455895407957, + "grad_norm": 0.7185639142990112, + "learning_rate": 0.0001777013477352567, + "loss": 2.6996, + "step": 4413 + }, + { + "epoch": 0.35622629327737876, + "grad_norm": 0.7218519449234009, + "learning_rate": 0.0001776914091681202, + "loss": 2.6555, + "step": 4414 + }, + { + "epoch": 0.35630699701396173, + "grad_norm": 0.7234479188919067, + "learning_rate": 0.00017768146866471062, + "loss": 2.6762, + "step": 4415 + }, + { + "epoch": 0.35638770075054477, + "grad_norm": 0.6723350286483765, + "learning_rate": 0.00017767152622527582, + "loss": 2.6272, + "step": 4416 + }, + { + "epoch": 0.35646840448712774, + "grad_norm": 0.7281947731971741, + "learning_rate": 0.00017766158185006356, + "loss": 2.7216, + "step": 4417 + }, + { + "epoch": 0.3565491082237108, + "grad_norm": 0.8350874781608582, + "learning_rate": 0.00017765163553932166, + "loss": 2.6619, + "step": 4418 + }, + { + "epoch": 0.35662981196029375, + "grad_norm": 0.7454007267951965, + "learning_rate": 0.00017764168729329801, + "loss": 2.6623, + "step": 4419 + }, + { + "epoch": 0.3567105156968768, + "grad_norm": 0.7419041395187378, + "learning_rate": 0.00017763173711224058, + "loss": 2.6773, + "step": 4420 + }, + { + "epoch": 0.35679121943345976, + "grad_norm": 0.7965987920761108, + "learning_rate": 0.0001776217849963973, + "loss": 2.6426, + "step": 4421 + }, + { + "epoch": 0.3568719231700428, + "grad_norm": 0.7093302607536316, + "learning_rate": 0.00017761183094601622, + "loss": 2.6745, + "step": 4422 + }, + { + "epoch": 0.35695262690662577, + "grad_norm": 0.7937216758728027, + "learning_rate": 0.00017760187496134548, + "loss": 2.7275, + "step": 4423 + }, + { + "epoch": 0.3570333306432088, + "grad_norm": 0.9185259938240051, + "learning_rate": 0.00017759191704263313, + "loss": 2.7055, + "step": 4424 + }, + { + "epoch": 0.3571140343797918, + "grad_norm": 0.7365124821662903, + "learning_rate": 0.00017758195719012743, + "loss": 2.6504, + "step": 4425 + }, + { + "epoch": 0.3571947381163748, + "grad_norm": 0.6992416977882385, + "learning_rate": 0.0001775719954040765, + "loss": 2.6684, + "step": 4426 + }, + { + "epoch": 0.3572754418529578, + "grad_norm": 0.7742372751235962, + "learning_rate": 0.00017756203168472866, + "loss": 2.6877, + "step": 4427 + }, + { + "epoch": 0.3573561455895408, + "grad_norm": 0.7448472380638123, + "learning_rate": 0.0001775520660323323, + "loss": 2.7027, + "step": 4428 + }, + { + "epoch": 0.3574368493261238, + "grad_norm": 0.7201915979385376, + "learning_rate": 0.00017754209844713569, + "loss": 2.7046, + "step": 4429 + }, + { + "epoch": 0.3575175530627068, + "grad_norm": 0.6675081253051758, + "learning_rate": 0.0001775321289293873, + "loss": 2.6503, + "step": 4430 + }, + { + "epoch": 0.3575982567992898, + "grad_norm": 0.7252706289291382, + "learning_rate": 0.0001775221574793356, + "loss": 2.6053, + "step": 4431 + }, + { + "epoch": 0.35767896053587284, + "grad_norm": 0.7134702801704407, + "learning_rate": 0.00017751218409722906, + "loss": 2.6857, + "step": 4432 + }, + { + "epoch": 0.3577596642724558, + "grad_norm": 0.7074102163314819, + "learning_rate": 0.0001775022087833163, + "loss": 2.6871, + "step": 4433 + }, + { + "epoch": 0.35784036800903885, + "grad_norm": 0.693520724773407, + "learning_rate": 0.00017749223153784588, + "loss": 2.6629, + "step": 4434 + }, + { + "epoch": 0.3579210717456218, + "grad_norm": 0.6933221817016602, + "learning_rate": 0.0001774822523610665, + "loss": 2.6793, + "step": 4435 + }, + { + "epoch": 0.35800177548220485, + "grad_norm": 0.75307297706604, + "learning_rate": 0.00017747227125322685, + "loss": 2.7012, + "step": 4436 + }, + { + "epoch": 0.35808247921878783, + "grad_norm": 0.7732915282249451, + "learning_rate": 0.0001774622882145757, + "loss": 2.6908, + "step": 4437 + }, + { + "epoch": 0.3581631829553708, + "grad_norm": 0.7067054510116577, + "learning_rate": 0.0001774523032453618, + "loss": 2.7494, + "step": 4438 + }, + { + "epoch": 0.35824388669195384, + "grad_norm": 0.7412838935852051, + "learning_rate": 0.00017744231634583406, + "loss": 2.6734, + "step": 4439 + }, + { + "epoch": 0.3583245904285368, + "grad_norm": 0.7663930654525757, + "learning_rate": 0.00017743232751624136, + "loss": 2.6952, + "step": 4440 + }, + { + "epoch": 0.35840529416511985, + "grad_norm": 0.70650714635849, + "learning_rate": 0.00017742233675683268, + "loss": 2.6806, + "step": 4441 + }, + { + "epoch": 0.3584859979017028, + "grad_norm": 0.698310375213623, + "learning_rate": 0.00017741234406785692, + "loss": 2.6471, + "step": 4442 + }, + { + "epoch": 0.35856670163828586, + "grad_norm": 0.7274026274681091, + "learning_rate": 0.00017740234944956323, + "loss": 2.6688, + "step": 4443 + }, + { + "epoch": 0.35864740537486883, + "grad_norm": 0.6944074034690857, + "learning_rate": 0.00017739235290220067, + "loss": 2.6954, + "step": 4444 + }, + { + "epoch": 0.35872810911145186, + "grad_norm": 0.841995358467102, + "learning_rate": 0.00017738235442601834, + "loss": 2.7169, + "step": 4445 + }, + { + "epoch": 0.35880881284803484, + "grad_norm": 0.74863201379776, + "learning_rate": 0.00017737235402126545, + "loss": 2.6534, + "step": 4446 + }, + { + "epoch": 0.3588895165846179, + "grad_norm": 0.7260422110557556, + "learning_rate": 0.00017736235168819126, + "loss": 2.6266, + "step": 4447 + }, + { + "epoch": 0.35897022032120085, + "grad_norm": 0.7450951337814331, + "learning_rate": 0.00017735234742704504, + "loss": 2.7328, + "step": 4448 + }, + { + "epoch": 0.3590509240577839, + "grad_norm": 0.6942493319511414, + "learning_rate": 0.00017734234123807614, + "loss": 2.7219, + "step": 4449 + }, + { + "epoch": 0.35913162779436686, + "grad_norm": 0.7676761746406555, + "learning_rate": 0.00017733233312153393, + "loss": 2.6594, + "step": 4450 + }, + { + "epoch": 0.3592123315309499, + "grad_norm": 0.7446104288101196, + "learning_rate": 0.00017732232307766778, + "loss": 2.6877, + "step": 4451 + }, + { + "epoch": 0.35929303526753287, + "grad_norm": 0.7551130056381226, + "learning_rate": 0.00017731231110672727, + "loss": 2.672, + "step": 4452 + }, + { + "epoch": 0.3593737390041159, + "grad_norm": 0.6876464486122131, + "learning_rate": 0.00017730229720896182, + "loss": 2.6658, + "step": 4453 + }, + { + "epoch": 0.3594544427406989, + "grad_norm": 0.6992844343185425, + "learning_rate": 0.00017729228138462107, + "loss": 2.6805, + "step": 4454 + }, + { + "epoch": 0.3595351464772819, + "grad_norm": 0.8437497615814209, + "learning_rate": 0.00017728226363395466, + "loss": 2.6884, + "step": 4455 + }, + { + "epoch": 0.3596158502138649, + "grad_norm": 0.7669322490692139, + "learning_rate": 0.00017727224395721217, + "loss": 2.6432, + "step": 4456 + }, + { + "epoch": 0.3596965539504479, + "grad_norm": 0.7613428831100464, + "learning_rate": 0.0001772622223546434, + "loss": 2.6124, + "step": 4457 + }, + { + "epoch": 0.3597772576870309, + "grad_norm": 0.719932496547699, + "learning_rate": 0.00017725219882649807, + "loss": 2.6623, + "step": 4458 + }, + { + "epoch": 0.3598579614236139, + "grad_norm": 0.7650800347328186, + "learning_rate": 0.000177242173373026, + "loss": 2.7551, + "step": 4459 + }, + { + "epoch": 0.3599386651601969, + "grad_norm": 0.7423754930496216, + "learning_rate": 0.0001772321459944771, + "loss": 2.7375, + "step": 4460 + }, + { + "epoch": 0.36001936889677993, + "grad_norm": 0.7602835297584534, + "learning_rate": 0.0001772221166911012, + "loss": 2.7086, + "step": 4461 + }, + { + "epoch": 0.3601000726333629, + "grad_norm": 0.7246943712234497, + "learning_rate": 0.00017721208546314827, + "loss": 2.7068, + "step": 4462 + }, + { + "epoch": 0.36018077636994594, + "grad_norm": 0.715965211391449, + "learning_rate": 0.00017720205231086837, + "loss": 2.689, + "step": 4463 + }, + { + "epoch": 0.3602614801065289, + "grad_norm": 0.7696218490600586, + "learning_rate": 0.00017719201723451151, + "loss": 2.611, + "step": 4464 + }, + { + "epoch": 0.36034218384311195, + "grad_norm": 0.7599236369132996, + "learning_rate": 0.00017718198023432779, + "loss": 2.6504, + "step": 4465 + }, + { + "epoch": 0.36042288757969493, + "grad_norm": 0.7674956321716309, + "learning_rate": 0.0001771719413105674, + "loss": 2.7559, + "step": 4466 + }, + { + "epoch": 0.36050359131627796, + "grad_norm": 0.7263289093971252, + "learning_rate": 0.00017716190046348045, + "loss": 2.6822, + "step": 4467 + }, + { + "epoch": 0.36058429505286094, + "grad_norm": 0.7564195990562439, + "learning_rate": 0.0001771518576933173, + "loss": 2.7319, + "step": 4468 + }, + { + "epoch": 0.36066499878944397, + "grad_norm": 0.7291253805160522, + "learning_rate": 0.00017714181300032813, + "loss": 2.704, + "step": 4469 + }, + { + "epoch": 0.36074570252602695, + "grad_norm": 0.7354169487953186, + "learning_rate": 0.00017713176638476332, + "loss": 2.6344, + "step": 4470 + }, + { + "epoch": 0.36082640626261, + "grad_norm": 0.7104110717773438, + "learning_rate": 0.0001771217178468733, + "loss": 2.665, + "step": 4471 + }, + { + "epoch": 0.36090710999919295, + "grad_norm": 0.6913934350013733, + "learning_rate": 0.00017711166738690847, + "loss": 2.6674, + "step": 4472 + }, + { + "epoch": 0.360987813735776, + "grad_norm": 0.7999634742736816, + "learning_rate": 0.0001771016150051193, + "loss": 2.6847, + "step": 4473 + }, + { + "epoch": 0.36106851747235896, + "grad_norm": 0.7878915667533875, + "learning_rate": 0.00017709156070175634, + "loss": 2.7125, + "step": 4474 + }, + { + "epoch": 0.361149221208942, + "grad_norm": 0.7145688533782959, + "learning_rate": 0.00017708150447707017, + "loss": 2.6863, + "step": 4475 + }, + { + "epoch": 0.36122992494552497, + "grad_norm": 0.7518604397773743, + "learning_rate": 0.00017707144633131143, + "loss": 2.6616, + "step": 4476 + }, + { + "epoch": 0.361310628682108, + "grad_norm": 0.735634982585907, + "learning_rate": 0.0001770613862647308, + "loss": 2.6315, + "step": 4477 + }, + { + "epoch": 0.361391332418691, + "grad_norm": 0.7925180196762085, + "learning_rate": 0.00017705132427757895, + "loss": 2.6951, + "step": 4478 + }, + { + "epoch": 0.361472036155274, + "grad_norm": 0.6949547529220581, + "learning_rate": 0.00017704126037010667, + "loss": 2.6934, + "step": 4479 + }, + { + "epoch": 0.361552739891857, + "grad_norm": 0.7233577966690063, + "learning_rate": 0.00017703119454256483, + "loss": 2.6773, + "step": 4480 + }, + { + "epoch": 0.36163344362844, + "grad_norm": 0.7303269505500793, + "learning_rate": 0.00017702112679520424, + "loss": 2.6351, + "step": 4481 + }, + { + "epoch": 0.361714147365023, + "grad_norm": 0.7620660066604614, + "learning_rate": 0.00017701105712827583, + "loss": 2.6748, + "step": 4482 + }, + { + "epoch": 0.36179485110160603, + "grad_norm": 0.7744965553283691, + "learning_rate": 0.00017700098554203057, + "loss": 2.7013, + "step": 4483 + }, + { + "epoch": 0.361875554838189, + "grad_norm": 0.8017357587814331, + "learning_rate": 0.00017699091203671947, + "loss": 2.7273, + "step": 4484 + }, + { + "epoch": 0.36195625857477204, + "grad_norm": 0.8014432191848755, + "learning_rate": 0.0001769808366125936, + "loss": 2.6864, + "step": 4485 + }, + { + "epoch": 0.362036962311355, + "grad_norm": 0.6914888620376587, + "learning_rate": 0.00017697075926990406, + "loss": 2.6851, + "step": 4486 + }, + { + "epoch": 0.36211766604793805, + "grad_norm": 0.7472698092460632, + "learning_rate": 0.00017696068000890196, + "loss": 2.695, + "step": 4487 + }, + { + "epoch": 0.362198369784521, + "grad_norm": 0.7506285309791565, + "learning_rate": 0.00017695059882983855, + "loss": 2.7055, + "step": 4488 + }, + { + "epoch": 0.362279073521104, + "grad_norm": 0.7501141428947449, + "learning_rate": 0.00017694051573296507, + "loss": 2.7109, + "step": 4489 + }, + { + "epoch": 0.36235977725768703, + "grad_norm": 0.6654670834541321, + "learning_rate": 0.00017693043071853284, + "loss": 2.6165, + "step": 4490 + }, + { + "epoch": 0.36244048099427, + "grad_norm": 0.7894664406776428, + "learning_rate": 0.00017692034378679315, + "loss": 2.7274, + "step": 4491 + }, + { + "epoch": 0.36252118473085304, + "grad_norm": 0.7206711173057556, + "learning_rate": 0.00017691025493799743, + "loss": 2.7047, + "step": 4492 + }, + { + "epoch": 0.362601888467436, + "grad_norm": 0.7656282186508179, + "learning_rate": 0.00017690016417239708, + "loss": 2.696, + "step": 4493 + }, + { + "epoch": 0.36268259220401905, + "grad_norm": 0.7357437610626221, + "learning_rate": 0.00017689007149024362, + "loss": 2.7279, + "step": 4494 + }, + { + "epoch": 0.362763295940602, + "grad_norm": 0.7262146472930908, + "learning_rate": 0.00017687997689178864, + "loss": 2.6964, + "step": 4495 + }, + { + "epoch": 0.36284399967718506, + "grad_norm": 0.7839891910552979, + "learning_rate": 0.00017686988037728365, + "loss": 2.651, + "step": 4496 + }, + { + "epoch": 0.36292470341376803, + "grad_norm": 0.7150306105613708, + "learning_rate": 0.00017685978194698028, + "loss": 2.6481, + "step": 4497 + }, + { + "epoch": 0.36300540715035107, + "grad_norm": 0.7144685387611389, + "learning_rate": 0.00017684968160113025, + "loss": 2.7169, + "step": 4498 + }, + { + "epoch": 0.36308611088693404, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00017683957933998525, + "loss": 2.7543, + "step": 4499 + }, + { + "epoch": 0.3631668146235171, + "grad_norm": 0.7301446199417114, + "learning_rate": 0.00017682947516379707, + "loss": 2.6806, + "step": 4500 + }, + { + "epoch": 0.36324751836010005, + "grad_norm": 0.7314243316650391, + "learning_rate": 0.00017681936907281757, + "loss": 2.7227, + "step": 4501 + }, + { + "epoch": 0.3633282220966831, + "grad_norm": 0.7695817351341248, + "learning_rate": 0.00017680926106729852, + "loss": 2.7229, + "step": 4502 + }, + { + "epoch": 0.36340892583326606, + "grad_norm": 0.6885762810707092, + "learning_rate": 0.00017679915114749198, + "loss": 2.7246, + "step": 4503 + }, + { + "epoch": 0.3634896295698491, + "grad_norm": 0.6893608570098877, + "learning_rate": 0.0001767890393136498, + "loss": 2.6572, + "step": 4504 + }, + { + "epoch": 0.36357033330643207, + "grad_norm": 0.7011978626251221, + "learning_rate": 0.00017677892556602402, + "loss": 2.6775, + "step": 4505 + }, + { + "epoch": 0.3636510370430151, + "grad_norm": 0.6693406105041504, + "learning_rate": 0.00017676880990486672, + "loss": 2.6183, + "step": 4506 + }, + { + "epoch": 0.3637317407795981, + "grad_norm": 0.7023048996925354, + "learning_rate": 0.00017675869233043002, + "loss": 2.6772, + "step": 4507 + }, + { + "epoch": 0.3638124445161811, + "grad_norm": 0.6903806328773499, + "learning_rate": 0.00017674857284296605, + "loss": 2.6486, + "step": 4508 + }, + { + "epoch": 0.3638931482527641, + "grad_norm": 0.6799258589744568, + "learning_rate": 0.000176738451442727, + "loss": 2.6305, + "step": 4509 + }, + { + "epoch": 0.3639738519893471, + "grad_norm": 0.7935682535171509, + "learning_rate": 0.00017672832812996517, + "loss": 2.7365, + "step": 4510 + }, + { + "epoch": 0.3640545557259301, + "grad_norm": 0.7593684196472168, + "learning_rate": 0.00017671820290493284, + "loss": 2.7029, + "step": 4511 + }, + { + "epoch": 0.36413525946251313, + "grad_norm": 0.7185288667678833, + "learning_rate": 0.00017670807576788234, + "loss": 2.6646, + "step": 4512 + }, + { + "epoch": 0.3642159631990961, + "grad_norm": 0.7260291576385498, + "learning_rate": 0.00017669794671906606, + "loss": 2.6615, + "step": 4513 + }, + { + "epoch": 0.36429666693567914, + "grad_norm": 0.6933417916297913, + "learning_rate": 0.00017668781575873646, + "loss": 2.6678, + "step": 4514 + }, + { + "epoch": 0.3643773706722621, + "grad_norm": 0.7657343149185181, + "learning_rate": 0.00017667768288714603, + "loss": 2.7155, + "step": 4515 + }, + { + "epoch": 0.36445807440884515, + "grad_norm": 0.7326949834823608, + "learning_rate": 0.0001766675481045473, + "loss": 2.732, + "step": 4516 + }, + { + "epoch": 0.3645387781454281, + "grad_norm": 0.7370324730873108, + "learning_rate": 0.0001766574114111929, + "loss": 2.6124, + "step": 4517 + }, + { + "epoch": 0.36461948188201115, + "grad_norm": 0.7280072569847107, + "learning_rate": 0.00017664727280733536, + "loss": 2.6793, + "step": 4518 + }, + { + "epoch": 0.36470018561859413, + "grad_norm": 0.7174237370491028, + "learning_rate": 0.00017663713229322748, + "loss": 2.629, + "step": 4519 + }, + { + "epoch": 0.36478088935517716, + "grad_norm": 0.6660771369934082, + "learning_rate": 0.0001766269898691219, + "loss": 2.6862, + "step": 4520 + }, + { + "epoch": 0.36486159309176014, + "grad_norm": 0.7024446725845337, + "learning_rate": 0.00017661684553527143, + "loss": 2.6602, + "step": 4521 + }, + { + "epoch": 0.36494229682834317, + "grad_norm": 0.7419618964195251, + "learning_rate": 0.0001766066992919289, + "loss": 2.6904, + "step": 4522 + }, + { + "epoch": 0.36502300056492615, + "grad_norm": 0.7425804138183594, + "learning_rate": 0.00017659655113934716, + "loss": 2.7312, + "step": 4523 + }, + { + "epoch": 0.3651037043015092, + "grad_norm": 0.7117013931274414, + "learning_rate": 0.00017658640107777915, + "loss": 2.6411, + "step": 4524 + }, + { + "epoch": 0.36518440803809216, + "grad_norm": 0.719613254070282, + "learning_rate": 0.00017657624910747782, + "loss": 2.6799, + "step": 4525 + }, + { + "epoch": 0.3652651117746752, + "grad_norm": 0.7654159665107727, + "learning_rate": 0.0001765660952286962, + "loss": 2.6675, + "step": 4526 + }, + { + "epoch": 0.36534581551125817, + "grad_norm": 0.7111814022064209, + "learning_rate": 0.00017655593944168734, + "loss": 2.6717, + "step": 4527 + }, + { + "epoch": 0.3654265192478412, + "grad_norm": 0.7494712471961975, + "learning_rate": 0.00017654578174670436, + "loss": 2.7181, + "step": 4528 + }, + { + "epoch": 0.3655072229844242, + "grad_norm": 0.8062291145324707, + "learning_rate": 0.0001765356221440004, + "loss": 2.6563, + "step": 4529 + }, + { + "epoch": 0.3655879267210072, + "grad_norm": 0.7923303842544556, + "learning_rate": 0.00017652546063382866, + "loss": 2.6295, + "step": 4530 + }, + { + "epoch": 0.3656686304575902, + "grad_norm": 0.7417340278625488, + "learning_rate": 0.00017651529721644238, + "loss": 2.6727, + "step": 4531 + }, + { + "epoch": 0.3657493341941732, + "grad_norm": 0.7326166033744812, + "learning_rate": 0.0001765051318920949, + "loss": 2.702, + "step": 4532 + }, + { + "epoch": 0.3658300379307562, + "grad_norm": 0.8133745193481445, + "learning_rate": 0.00017649496466103957, + "loss": 2.7157, + "step": 4533 + }, + { + "epoch": 0.3659107416673392, + "grad_norm": 0.710502564907074, + "learning_rate": 0.00017648479552352973, + "loss": 2.6668, + "step": 4534 + }, + { + "epoch": 0.3659914454039222, + "grad_norm": 0.6947012543678284, + "learning_rate": 0.00017647462447981885, + "loss": 2.6865, + "step": 4535 + }, + { + "epoch": 0.36607214914050523, + "grad_norm": 0.8432720899581909, + "learning_rate": 0.0001764644515301604, + "loss": 2.6226, + "step": 4536 + }, + { + "epoch": 0.3661528528770882, + "grad_norm": 0.7321269512176514, + "learning_rate": 0.00017645427667480802, + "loss": 2.662, + "step": 4537 + }, + { + "epoch": 0.36623355661367124, + "grad_norm": 0.8099743723869324, + "learning_rate": 0.00017644409991401515, + "loss": 2.6853, + "step": 4538 + }, + { + "epoch": 0.3663142603502542, + "grad_norm": 0.6885355114936829, + "learning_rate": 0.0001764339212480355, + "loss": 2.6672, + "step": 4539 + }, + { + "epoch": 0.3663949640868372, + "grad_norm": 0.911396324634552, + "learning_rate": 0.00017642374067712276, + "loss": 2.5778, + "step": 4540 + }, + { + "epoch": 0.3664756678234202, + "grad_norm": 0.7461941838264465, + "learning_rate": 0.0001764135582015306, + "loss": 2.6629, + "step": 4541 + }, + { + "epoch": 0.3665563715600032, + "grad_norm": 0.772741436958313, + "learning_rate": 0.0001764033738215128, + "loss": 2.725, + "step": 4542 + }, + { + "epoch": 0.36663707529658623, + "grad_norm": 0.7256152629852295, + "learning_rate": 0.0001763931875373232, + "loss": 2.6439, + "step": 4543 + }, + { + "epoch": 0.3667177790331692, + "grad_norm": 0.8089167475700378, + "learning_rate": 0.0001763829993492157, + "loss": 2.5972, + "step": 4544 + }, + { + "epoch": 0.36679848276975224, + "grad_norm": 0.7115232944488525, + "learning_rate": 0.0001763728092574442, + "loss": 2.633, + "step": 4545 + }, + { + "epoch": 0.3668791865063352, + "grad_norm": 0.7189347147941589, + "learning_rate": 0.00017636261726226266, + "loss": 2.619, + "step": 4546 + }, + { + "epoch": 0.36695989024291825, + "grad_norm": 0.7667742967605591, + "learning_rate": 0.00017635242336392506, + "loss": 2.667, + "step": 4547 + }, + { + "epoch": 0.36704059397950123, + "grad_norm": 0.7982457876205444, + "learning_rate": 0.00017634222756268545, + "loss": 2.6667, + "step": 4548 + }, + { + "epoch": 0.36712129771608426, + "grad_norm": 0.7465574145317078, + "learning_rate": 0.00017633202985879804, + "loss": 2.6436, + "step": 4549 + }, + { + "epoch": 0.36720200145266724, + "grad_norm": 0.7297804951667786, + "learning_rate": 0.00017632183025251686, + "loss": 2.6464, + "step": 4550 + }, + { + "epoch": 0.36728270518925027, + "grad_norm": 0.6885054111480713, + "learning_rate": 0.0001763116287440962, + "loss": 2.6742, + "step": 4551 + }, + { + "epoch": 0.36736340892583325, + "grad_norm": 0.7341574430465698, + "learning_rate": 0.00017630142533379023, + "loss": 2.6688, + "step": 4552 + }, + { + "epoch": 0.3674441126624163, + "grad_norm": 0.8565430045127869, + "learning_rate": 0.0001762912200218533, + "loss": 2.6889, + "step": 4553 + }, + { + "epoch": 0.36752481639899925, + "grad_norm": 0.7509489059448242, + "learning_rate": 0.00017628101280853974, + "loss": 2.6177, + "step": 4554 + }, + { + "epoch": 0.3676055201355823, + "grad_norm": 0.8128334879875183, + "learning_rate": 0.00017627080369410396, + "loss": 2.7301, + "step": 4555 + }, + { + "epoch": 0.36768622387216526, + "grad_norm": 0.7511637210845947, + "learning_rate": 0.00017626059267880035, + "loss": 2.7327, + "step": 4556 + }, + { + "epoch": 0.3677669276087483, + "grad_norm": 0.8350822925567627, + "learning_rate": 0.00017625037976288347, + "loss": 2.6073, + "step": 4557 + }, + { + "epoch": 0.36784763134533127, + "grad_norm": 0.7743313312530518, + "learning_rate": 0.00017624016494660776, + "loss": 2.7055, + "step": 4558 + }, + { + "epoch": 0.3679283350819143, + "grad_norm": 0.8196439146995544, + "learning_rate": 0.00017622994823022787, + "loss": 2.6565, + "step": 4559 + }, + { + "epoch": 0.3680090388184973, + "grad_norm": 0.7223393321037292, + "learning_rate": 0.00017621972961399837, + "loss": 2.68, + "step": 4560 + }, + { + "epoch": 0.3680897425550803, + "grad_norm": 0.7215418219566345, + "learning_rate": 0.000176209509098174, + "loss": 2.6627, + "step": 4561 + }, + { + "epoch": 0.3681704462916633, + "grad_norm": 0.8050473928451538, + "learning_rate": 0.00017619928668300946, + "loss": 2.5802, + "step": 4562 + }, + { + "epoch": 0.3682511500282463, + "grad_norm": 0.7452750205993652, + "learning_rate": 0.00017618906236875948, + "loss": 2.6524, + "step": 4563 + }, + { + "epoch": 0.3683318537648293, + "grad_norm": 0.7950742244720459, + "learning_rate": 0.00017617883615567888, + "loss": 2.6371, + "step": 4564 + }, + { + "epoch": 0.36841255750141233, + "grad_norm": 0.7185397744178772, + "learning_rate": 0.00017616860804402261, + "loss": 2.6531, + "step": 4565 + }, + { + "epoch": 0.3684932612379953, + "grad_norm": 0.7480553388595581, + "learning_rate": 0.0001761583780340455, + "loss": 2.6727, + "step": 4566 + }, + { + "epoch": 0.36857396497457834, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.00017614814612600251, + "loss": 2.6095, + "step": 4567 + }, + { + "epoch": 0.3686546687111613, + "grad_norm": 0.9159810543060303, + "learning_rate": 0.00017613791232014866, + "loss": 2.7039, + "step": 4568 + }, + { + "epoch": 0.36873537244774435, + "grad_norm": 0.7478305697441101, + "learning_rate": 0.00017612767661673905, + "loss": 2.6307, + "step": 4569 + }, + { + "epoch": 0.3688160761843273, + "grad_norm": 0.9154726266860962, + "learning_rate": 0.00017611743901602874, + "loss": 2.675, + "step": 4570 + }, + { + "epoch": 0.36889677992091036, + "grad_norm": 0.7903287410736084, + "learning_rate": 0.0001761071995182728, + "loss": 2.6938, + "step": 4571 + }, + { + "epoch": 0.36897748365749333, + "grad_norm": 0.7919119596481323, + "learning_rate": 0.0001760969581237266, + "loss": 2.7092, + "step": 4572 + }, + { + "epoch": 0.36905818739407636, + "grad_norm": 0.8052253723144531, + "learning_rate": 0.00017608671483264522, + "loss": 2.6914, + "step": 4573 + }, + { + "epoch": 0.36913889113065934, + "grad_norm": 0.7660435438156128, + "learning_rate": 0.00017607646964528403, + "loss": 2.674, + "step": 4574 + }, + { + "epoch": 0.3692195948672424, + "grad_norm": 0.8554383516311646, + "learning_rate": 0.00017606622256189836, + "loss": 2.6792, + "step": 4575 + }, + { + "epoch": 0.36930029860382535, + "grad_norm": 0.7719140648841858, + "learning_rate": 0.00017605597358274358, + "loss": 2.6836, + "step": 4576 + }, + { + "epoch": 0.3693810023404084, + "grad_norm": 0.733068585395813, + "learning_rate": 0.00017604572270807513, + "loss": 2.6496, + "step": 4577 + }, + { + "epoch": 0.36946170607699136, + "grad_norm": 0.7622445225715637, + "learning_rate": 0.00017603546993814849, + "loss": 2.7097, + "step": 4578 + }, + { + "epoch": 0.3695424098135744, + "grad_norm": 0.7326679825782776, + "learning_rate": 0.00017602521527321913, + "loss": 2.6786, + "step": 4579 + }, + { + "epoch": 0.36962311355015737, + "grad_norm": 0.7579432129859924, + "learning_rate": 0.00017601495871354272, + "loss": 2.6618, + "step": 4580 + }, + { + "epoch": 0.3697038172867404, + "grad_norm": 0.8812715411186218, + "learning_rate": 0.00017600470025937485, + "loss": 2.6942, + "step": 4581 + }, + { + "epoch": 0.3697845210233234, + "grad_norm": 0.7230449318885803, + "learning_rate": 0.00017599443991097116, + "loss": 2.6374, + "step": 4582 + }, + { + "epoch": 0.3698652247599064, + "grad_norm": 0.8347739577293396, + "learning_rate": 0.00017598417766858735, + "loss": 2.6653, + "step": 4583 + }, + { + "epoch": 0.3699459284964894, + "grad_norm": 0.7826598882675171, + "learning_rate": 0.0001759739135324792, + "loss": 2.6342, + "step": 4584 + }, + { + "epoch": 0.3700266322330724, + "grad_norm": 0.749060332775116, + "learning_rate": 0.00017596364750290254, + "loss": 2.7256, + "step": 4585 + }, + { + "epoch": 0.3701073359696554, + "grad_norm": 0.7470815181732178, + "learning_rate": 0.00017595337958011323, + "loss": 2.6485, + "step": 4586 + }, + { + "epoch": 0.3701880397062384, + "grad_norm": 0.7251530289649963, + "learning_rate": 0.00017594310976436716, + "loss": 2.6613, + "step": 4587 + }, + { + "epoch": 0.3702687434428214, + "grad_norm": 0.7143718004226685, + "learning_rate": 0.00017593283805592027, + "loss": 2.6101, + "step": 4588 + }, + { + "epoch": 0.37034944717940443, + "grad_norm": 0.7378203272819519, + "learning_rate": 0.00017592256445502855, + "loss": 2.6735, + "step": 4589 + }, + { + "epoch": 0.3704301509159874, + "grad_norm": 0.7193629741668701, + "learning_rate": 0.00017591228896194808, + "loss": 2.719, + "step": 4590 + }, + { + "epoch": 0.3705108546525704, + "grad_norm": 0.7377258539199829, + "learning_rate": 0.00017590201157693494, + "loss": 2.6789, + "step": 4591 + }, + { + "epoch": 0.3705915583891534, + "grad_norm": 0.7468351721763611, + "learning_rate": 0.00017589173230024522, + "loss": 2.6389, + "step": 4592 + }, + { + "epoch": 0.3706722621257364, + "grad_norm": 0.7612246870994568, + "learning_rate": 0.0001758814511321352, + "loss": 2.7045, + "step": 4593 + }, + { + "epoch": 0.37075296586231943, + "grad_norm": 0.7603838443756104, + "learning_rate": 0.00017587116807286102, + "loss": 2.7323, + "step": 4594 + }, + { + "epoch": 0.3708336695989024, + "grad_norm": 0.7436477541923523, + "learning_rate": 0.000175860883122679, + "loss": 2.7331, + "step": 4595 + }, + { + "epoch": 0.37091437333548544, + "grad_norm": 0.7004369497299194, + "learning_rate": 0.0001758505962818455, + "loss": 2.6418, + "step": 4596 + }, + { + "epoch": 0.3709950770720684, + "grad_norm": 0.711980938911438, + "learning_rate": 0.00017584030755061683, + "loss": 2.6184, + "step": 4597 + }, + { + "epoch": 0.37107578080865145, + "grad_norm": 0.6999367475509644, + "learning_rate": 0.0001758300169292495, + "loss": 2.6584, + "step": 4598 + }, + { + "epoch": 0.3711564845452344, + "grad_norm": 0.6755785942077637, + "learning_rate": 0.0001758197244179999, + "loss": 2.664, + "step": 4599 + }, + { + "epoch": 0.37123718828181745, + "grad_norm": 0.7174055576324463, + "learning_rate": 0.00017580943001712455, + "loss": 2.6821, + "step": 4600 + }, + { + "epoch": 0.37131789201840043, + "grad_norm": 0.8218933343887329, + "learning_rate": 0.00017579913372688005, + "loss": 2.6355, + "step": 4601 + }, + { + "epoch": 0.37139859575498346, + "grad_norm": 0.7417960166931152, + "learning_rate": 0.000175788835547523, + "loss": 2.7226, + "step": 4602 + }, + { + "epoch": 0.37147929949156644, + "grad_norm": 0.824421763420105, + "learning_rate": 0.00017577853547931006, + "loss": 2.6526, + "step": 4603 + }, + { + "epoch": 0.37156000322814947, + "grad_norm": 0.7391949892044067, + "learning_rate": 0.00017576823352249794, + "loss": 2.6702, + "step": 4604 + }, + { + "epoch": 0.37164070696473245, + "grad_norm": 0.7890247106552124, + "learning_rate": 0.00017575792967734337, + "loss": 2.7281, + "step": 4605 + }, + { + "epoch": 0.3717214107013155, + "grad_norm": 0.785527765750885, + "learning_rate": 0.00017574762394410317, + "loss": 2.6728, + "step": 4606 + }, + { + "epoch": 0.37180211443789846, + "grad_norm": 0.7195863127708435, + "learning_rate": 0.00017573731632303415, + "loss": 2.6329, + "step": 4607 + }, + { + "epoch": 0.3718828181744815, + "grad_norm": 0.7896780371665955, + "learning_rate": 0.0001757270068143932, + "loss": 2.6776, + "step": 4608 + }, + { + "epoch": 0.37196352191106447, + "grad_norm": 0.7568275332450867, + "learning_rate": 0.00017571669541843735, + "loss": 2.6668, + "step": 4609 + }, + { + "epoch": 0.3720442256476475, + "grad_norm": 0.7923939228057861, + "learning_rate": 0.00017570638213542348, + "loss": 2.7033, + "step": 4610 + }, + { + "epoch": 0.3721249293842305, + "grad_norm": 0.7586569786071777, + "learning_rate": 0.00017569606696560868, + "loss": 2.7286, + "step": 4611 + }, + { + "epoch": 0.3722056331208135, + "grad_norm": 0.8222009539604187, + "learning_rate": 0.00017568574990925004, + "loss": 2.6448, + "step": 4612 + }, + { + "epoch": 0.3722863368573965, + "grad_norm": 0.7144019603729248, + "learning_rate": 0.00017567543096660466, + "loss": 2.6671, + "step": 4613 + }, + { + "epoch": 0.3723670405939795, + "grad_norm": 0.7602240443229675, + "learning_rate": 0.00017566511013792973, + "loss": 2.6492, + "step": 4614 + }, + { + "epoch": 0.3724477443305625, + "grad_norm": 0.7949689626693726, + "learning_rate": 0.00017565478742348245, + "loss": 2.7002, + "step": 4615 + }, + { + "epoch": 0.3725284480671455, + "grad_norm": 0.6922519207000732, + "learning_rate": 0.00017564446282352012, + "loss": 2.6917, + "step": 4616 + }, + { + "epoch": 0.3726091518037285, + "grad_norm": 0.7382915616035461, + "learning_rate": 0.0001756341363383, + "loss": 2.6375, + "step": 4617 + }, + { + "epoch": 0.37268985554031153, + "grad_norm": 0.7511888742446899, + "learning_rate": 0.00017562380796807956, + "loss": 2.6823, + "step": 4618 + }, + { + "epoch": 0.3727705592768945, + "grad_norm": 0.7273457646369934, + "learning_rate": 0.00017561347771311608, + "loss": 2.6124, + "step": 4619 + }, + { + "epoch": 0.37285126301347754, + "grad_norm": 0.689440131187439, + "learning_rate": 0.0001756031455736671, + "loss": 2.6931, + "step": 4620 + }, + { + "epoch": 0.3729319667500605, + "grad_norm": 0.7755659222602844, + "learning_rate": 0.00017559281154999013, + "loss": 2.6273, + "step": 4621 + }, + { + "epoch": 0.37301267048664355, + "grad_norm": 0.6940193176269531, + "learning_rate": 0.00017558247564234265, + "loss": 2.641, + "step": 4622 + }, + { + "epoch": 0.3730933742232265, + "grad_norm": 0.7387529015541077, + "learning_rate": 0.00017557213785098232, + "loss": 2.7229, + "step": 4623 + }, + { + "epoch": 0.37317407795980956, + "grad_norm": 0.6807727217674255, + "learning_rate": 0.00017556179817616678, + "loss": 2.6469, + "step": 4624 + }, + { + "epoch": 0.37325478169639253, + "grad_norm": 0.7203819751739502, + "learning_rate": 0.0001755514566181537, + "loss": 2.6239, + "step": 4625 + }, + { + "epoch": 0.37333548543297557, + "grad_norm": 0.9345876574516296, + "learning_rate": 0.0001755411131772008, + "loss": 2.7154, + "step": 4626 + }, + { + "epoch": 0.37341618916955854, + "grad_norm": 0.6787357330322266, + "learning_rate": 0.00017553076785356594, + "loss": 2.6374, + "step": 4627 + }, + { + "epoch": 0.3734968929061416, + "grad_norm": 0.7153670191764832, + "learning_rate": 0.0001755204206475069, + "loss": 2.6734, + "step": 4628 + }, + { + "epoch": 0.37357759664272455, + "grad_norm": 0.736464262008667, + "learning_rate": 0.00017551007155928154, + "loss": 2.7241, + "step": 4629 + }, + { + "epoch": 0.3736583003793076, + "grad_norm": 0.7134939432144165, + "learning_rate": 0.0001754997205891478, + "loss": 2.682, + "step": 4630 + }, + { + "epoch": 0.37373900411589056, + "grad_norm": 0.7071199417114258, + "learning_rate": 0.0001754893677373637, + "loss": 2.7361, + "step": 4631 + }, + { + "epoch": 0.3738197078524736, + "grad_norm": 0.7040621638298035, + "learning_rate": 0.00017547901300418722, + "loss": 2.7031, + "step": 4632 + }, + { + "epoch": 0.37390041158905657, + "grad_norm": 0.7179287075996399, + "learning_rate": 0.00017546865638987642, + "loss": 2.6755, + "step": 4633 + }, + { + "epoch": 0.3739811153256396, + "grad_norm": 0.7579259276390076, + "learning_rate": 0.00017545829789468944, + "loss": 2.6514, + "step": 4634 + }, + { + "epoch": 0.3740618190622226, + "grad_norm": 0.7825835347175598, + "learning_rate": 0.0001754479375188844, + "loss": 2.6876, + "step": 4635 + }, + { + "epoch": 0.3741425227988056, + "grad_norm": 0.7913421988487244, + "learning_rate": 0.00017543757526271956, + "loss": 2.7153, + "step": 4636 + }, + { + "epoch": 0.3742232265353886, + "grad_norm": 0.7766042947769165, + "learning_rate": 0.00017542721112645313, + "loss": 2.645, + "step": 4637 + }, + { + "epoch": 0.3743039302719716, + "grad_norm": 0.7363953590393066, + "learning_rate": 0.00017541684511034343, + "loss": 2.6376, + "step": 4638 + }, + { + "epoch": 0.3743846340085546, + "grad_norm": 0.6928617358207703, + "learning_rate": 0.00017540647721464881, + "loss": 2.6882, + "step": 4639 + }, + { + "epoch": 0.3744653377451376, + "grad_norm": 0.7832257747650146, + "learning_rate": 0.0001753961074396277, + "loss": 2.7305, + "step": 4640 + }, + { + "epoch": 0.3745460414817206, + "grad_norm": 0.7180350422859192, + "learning_rate": 0.00017538573578553844, + "loss": 2.6783, + "step": 4641 + }, + { + "epoch": 0.3746267452183036, + "grad_norm": 0.718209981918335, + "learning_rate": 0.00017537536225263964, + "loss": 2.6961, + "step": 4642 + }, + { + "epoch": 0.3747074489548866, + "grad_norm": 0.7056655287742615, + "learning_rate": 0.00017536498684118975, + "loss": 2.7096, + "step": 4643 + }, + { + "epoch": 0.3747881526914696, + "grad_norm": 0.8004828691482544, + "learning_rate": 0.0001753546095514474, + "loss": 2.7168, + "step": 4644 + }, + { + "epoch": 0.3748688564280526, + "grad_norm": 0.7630821466445923, + "learning_rate": 0.0001753442303836712, + "loss": 2.7091, + "step": 4645 + }, + { + "epoch": 0.3749495601646356, + "grad_norm": 0.7539668083190918, + "learning_rate": 0.0001753338493381198, + "loss": 2.651, + "step": 4646 + }, + { + "epoch": 0.37503026390121863, + "grad_norm": 0.7243319749832153, + "learning_rate": 0.000175323466415052, + "loss": 2.6765, + "step": 4647 + }, + { + "epoch": 0.3751109676378016, + "grad_norm": 0.8906281590461731, + "learning_rate": 0.00017531308161472647, + "loss": 2.5938, + "step": 4648 + }, + { + "epoch": 0.37519167137438464, + "grad_norm": 0.787966251373291, + "learning_rate": 0.0001753026949374021, + "loss": 2.6011, + "step": 4649 + }, + { + "epoch": 0.3752723751109676, + "grad_norm": 0.7763915061950684, + "learning_rate": 0.00017529230638333772, + "loss": 2.7197, + "step": 4650 + }, + { + "epoch": 0.37535307884755065, + "grad_norm": 0.7717103362083435, + "learning_rate": 0.00017528191595279224, + "loss": 2.6605, + "step": 4651 + }, + { + "epoch": 0.3754337825841336, + "grad_norm": 0.7340055108070374, + "learning_rate": 0.00017527152364602464, + "loss": 2.6856, + "step": 4652 + }, + { + "epoch": 0.37551448632071666, + "grad_norm": 0.7805169820785522, + "learning_rate": 0.0001752611294632939, + "loss": 2.7088, + "step": 4653 + }, + { + "epoch": 0.37559519005729963, + "grad_norm": 0.7894891500473022, + "learning_rate": 0.00017525073340485912, + "loss": 2.6691, + "step": 4654 + }, + { + "epoch": 0.37567589379388266, + "grad_norm": 0.7627872824668884, + "learning_rate": 0.0001752403354709793, + "loss": 2.6536, + "step": 4655 + }, + { + "epoch": 0.37575659753046564, + "grad_norm": 0.8097225427627563, + "learning_rate": 0.00017522993566191367, + "loss": 2.7108, + "step": 4656 + }, + { + "epoch": 0.3758373012670487, + "grad_norm": 0.834449827671051, + "learning_rate": 0.00017521953397792137, + "loss": 2.7565, + "step": 4657 + }, + { + "epoch": 0.37591800500363165, + "grad_norm": 0.7924147844314575, + "learning_rate": 0.00017520913041926166, + "loss": 2.7101, + "step": 4658 + }, + { + "epoch": 0.3759987087402147, + "grad_norm": 0.7407249808311462, + "learning_rate": 0.00017519872498619385, + "loss": 2.6501, + "step": 4659 + }, + { + "epoch": 0.37607941247679766, + "grad_norm": 0.7251791954040527, + "learning_rate": 0.0001751883176789772, + "loss": 2.6786, + "step": 4660 + }, + { + "epoch": 0.3761601162133807, + "grad_norm": 0.7120431661605835, + "learning_rate": 0.00017517790849787116, + "loss": 2.7244, + "step": 4661 + }, + { + "epoch": 0.37624081994996367, + "grad_norm": 0.724836528301239, + "learning_rate": 0.00017516749744313513, + "loss": 2.7099, + "step": 4662 + }, + { + "epoch": 0.3763215236865467, + "grad_norm": 0.7788939476013184, + "learning_rate": 0.00017515708451502855, + "loss": 2.6206, + "step": 4663 + }, + { + "epoch": 0.3764022274231297, + "grad_norm": 0.7518914341926575, + "learning_rate": 0.00017514666971381099, + "loss": 2.7505, + "step": 4664 + }, + { + "epoch": 0.3764829311597127, + "grad_norm": 0.8004730939865112, + "learning_rate": 0.00017513625303974194, + "loss": 2.6119, + "step": 4665 + }, + { + "epoch": 0.3765636348962957, + "grad_norm": 0.7661109566688538, + "learning_rate": 0.00017512583449308107, + "loss": 2.724, + "step": 4666 + }, + { + "epoch": 0.3766443386328787, + "grad_norm": 0.7669692635536194, + "learning_rate": 0.00017511541407408805, + "loss": 2.7109, + "step": 4667 + }, + { + "epoch": 0.3767250423694617, + "grad_norm": 0.738608181476593, + "learning_rate": 0.00017510499178302253, + "loss": 2.6642, + "step": 4668 + }, + { + "epoch": 0.3768057461060447, + "grad_norm": 0.7194661498069763, + "learning_rate": 0.00017509456762014432, + "loss": 2.6906, + "step": 4669 + }, + { + "epoch": 0.3768864498426277, + "grad_norm": 0.7025040984153748, + "learning_rate": 0.00017508414158571314, + "loss": 2.6596, + "step": 4670 + }, + { + "epoch": 0.37696715357921073, + "grad_norm": 0.7756575345993042, + "learning_rate": 0.00017507371367998892, + "loss": 2.7114, + "step": 4671 + }, + { + "epoch": 0.3770478573157937, + "grad_norm": 0.834966778755188, + "learning_rate": 0.00017506328390323148, + "loss": 2.7554, + "step": 4672 + }, + { + "epoch": 0.37712856105237674, + "grad_norm": 0.6997280120849609, + "learning_rate": 0.0001750528522557008, + "loss": 2.6285, + "step": 4673 + }, + { + "epoch": 0.3772092647889597, + "grad_norm": 0.7101716995239258, + "learning_rate": 0.0001750424187376569, + "loss": 2.6465, + "step": 4674 + }, + { + "epoch": 0.37728996852554275, + "grad_norm": 0.6577222347259521, + "learning_rate": 0.0001750319833493597, + "loss": 2.6372, + "step": 4675 + }, + { + "epoch": 0.37737067226212573, + "grad_norm": 0.7402529120445251, + "learning_rate": 0.00017502154609106937, + "loss": 2.6464, + "step": 4676 + }, + { + "epoch": 0.37745137599870876, + "grad_norm": 0.6858490705490112, + "learning_rate": 0.00017501110696304596, + "loss": 2.6141, + "step": 4677 + }, + { + "epoch": 0.37753207973529174, + "grad_norm": 0.729468822479248, + "learning_rate": 0.0001750006659655497, + "loss": 2.6671, + "step": 4678 + }, + { + "epoch": 0.37761278347187477, + "grad_norm": 0.7197559475898743, + "learning_rate": 0.0001749902230988408, + "loss": 2.6462, + "step": 4679 + }, + { + "epoch": 0.37769348720845775, + "grad_norm": 0.7171144485473633, + "learning_rate": 0.00017497977836317957, + "loss": 2.6427, + "step": 4680 + }, + { + "epoch": 0.3777741909450408, + "grad_norm": 0.7423805594444275, + "learning_rate": 0.00017496933175882617, + "loss": 2.662, + "step": 4681 + }, + { + "epoch": 0.37785489468162375, + "grad_norm": 0.7498061060905457, + "learning_rate": 0.0001749588832860411, + "loss": 2.6243, + "step": 4682 + }, + { + "epoch": 0.3779355984182068, + "grad_norm": 0.7706165909767151, + "learning_rate": 0.0001749484329450847, + "loss": 2.6928, + "step": 4683 + }, + { + "epoch": 0.37801630215478976, + "grad_norm": 0.723363995552063, + "learning_rate": 0.00017493798073621745, + "loss": 2.6787, + "step": 4684 + }, + { + "epoch": 0.3780970058913728, + "grad_norm": 0.7444875836372375, + "learning_rate": 0.00017492752665969983, + "loss": 2.6789, + "step": 4685 + }, + { + "epoch": 0.37817770962795577, + "grad_norm": 0.6946491599082947, + "learning_rate": 0.00017491707071579237, + "loss": 2.6761, + "step": 4686 + }, + { + "epoch": 0.3782584133645388, + "grad_norm": 0.7171412706375122, + "learning_rate": 0.00017490661290475568, + "loss": 2.6788, + "step": 4687 + }, + { + "epoch": 0.3783391171011218, + "grad_norm": 0.7503272891044617, + "learning_rate": 0.00017489615322685038, + "loss": 2.7057, + "step": 4688 + }, + { + "epoch": 0.3784198208377048, + "grad_norm": 0.7458747625350952, + "learning_rate": 0.00017488569168233714, + "loss": 2.6857, + "step": 4689 + }, + { + "epoch": 0.3785005245742878, + "grad_norm": 0.7030516266822815, + "learning_rate": 0.0001748752282714768, + "loss": 2.6522, + "step": 4690 + }, + { + "epoch": 0.3785812283108708, + "grad_norm": 0.7717545628547668, + "learning_rate": 0.00017486476299452994, + "loss": 2.6527, + "step": 4691 + }, + { + "epoch": 0.3786619320474538, + "grad_norm": 0.6788322925567627, + "learning_rate": 0.0001748542958517575, + "loss": 2.6362, + "step": 4692 + }, + { + "epoch": 0.3787426357840368, + "grad_norm": 0.8518630266189575, + "learning_rate": 0.0001748438268434204, + "loss": 2.6812, + "step": 4693 + }, + { + "epoch": 0.3788233395206198, + "grad_norm": 0.7167141437530518, + "learning_rate": 0.00017483335596977945, + "loss": 2.6414, + "step": 4694 + }, + { + "epoch": 0.3789040432572028, + "grad_norm": 0.7748053073883057, + "learning_rate": 0.00017482288323109567, + "loss": 2.7291, + "step": 4695 + }, + { + "epoch": 0.3789847469937858, + "grad_norm": 0.7203041911125183, + "learning_rate": 0.00017481240862763002, + "loss": 2.6957, + "step": 4696 + }, + { + "epoch": 0.3790654507303688, + "grad_norm": 0.7973119020462036, + "learning_rate": 0.00017480193215964362, + "loss": 2.7456, + "step": 4697 + }, + { + "epoch": 0.3791461544669518, + "grad_norm": 0.7851223945617676, + "learning_rate": 0.00017479145382739755, + "loss": 2.6525, + "step": 4698 + }, + { + "epoch": 0.3792268582035348, + "grad_norm": 0.7012068629264832, + "learning_rate": 0.0001747809736311529, + "loss": 2.6662, + "step": 4699 + }, + { + "epoch": 0.37930756194011783, + "grad_norm": 0.7266128659248352, + "learning_rate": 0.00017477049157117093, + "loss": 2.5853, + "step": 4700 + }, + { + "epoch": 0.3793882656767008, + "grad_norm": 0.7264416217803955, + "learning_rate": 0.00017476000764771285, + "loss": 2.6972, + "step": 4701 + }, + { + "epoch": 0.37946896941328384, + "grad_norm": 0.797709047794342, + "learning_rate": 0.00017474952186103995, + "loss": 2.6997, + "step": 4702 + }, + { + "epoch": 0.3795496731498668, + "grad_norm": 0.7552568912506104, + "learning_rate": 0.00017473903421141358, + "loss": 2.7178, + "step": 4703 + }, + { + "epoch": 0.37963037688644985, + "grad_norm": 0.7611108422279358, + "learning_rate": 0.0001747285446990951, + "loss": 2.6997, + "step": 4704 + }, + { + "epoch": 0.3797110806230328, + "grad_norm": 0.8081753253936768, + "learning_rate": 0.00017471805332434595, + "loss": 2.7242, + "step": 4705 + }, + { + "epoch": 0.37979178435961586, + "grad_norm": 0.728301465511322, + "learning_rate": 0.0001747075600874276, + "loss": 2.5885, + "step": 4706 + }, + { + "epoch": 0.37987248809619883, + "grad_norm": 0.7548539638519287, + "learning_rate": 0.00017469706498860155, + "loss": 2.7038, + "step": 4707 + }, + { + "epoch": 0.37995319183278187, + "grad_norm": 0.7054354548454285, + "learning_rate": 0.00017468656802812938, + "loss": 2.6566, + "step": 4708 + }, + { + "epoch": 0.38003389556936484, + "grad_norm": 0.7231585383415222, + "learning_rate": 0.0001746760692062727, + "loss": 2.6564, + "step": 4709 + }, + { + "epoch": 0.3801145993059479, + "grad_norm": 0.6931934952735901, + "learning_rate": 0.00017466556852329318, + "loss": 2.6403, + "step": 4710 + }, + { + "epoch": 0.38019530304253085, + "grad_norm": 0.7882393598556519, + "learning_rate": 0.00017465506597945255, + "loss": 2.6337, + "step": 4711 + }, + { + "epoch": 0.3802760067791139, + "grad_norm": 0.7015109658241272, + "learning_rate": 0.0001746445615750125, + "loss": 2.6742, + "step": 4712 + }, + { + "epoch": 0.38035671051569686, + "grad_norm": 0.7653505802154541, + "learning_rate": 0.0001746340553102348, + "loss": 2.6742, + "step": 4713 + }, + { + "epoch": 0.3804374142522799, + "grad_norm": 0.7166270613670349, + "learning_rate": 0.0001746235471853814, + "loss": 2.5995, + "step": 4714 + }, + { + "epoch": 0.38051811798886287, + "grad_norm": 0.7612236738204956, + "learning_rate": 0.0001746130372007141, + "loss": 2.7595, + "step": 4715 + }, + { + "epoch": 0.3805988217254459, + "grad_norm": 0.6783852577209473, + "learning_rate": 0.00017460252535649493, + "loss": 2.6156, + "step": 4716 + }, + { + "epoch": 0.3806795254620289, + "grad_norm": 0.7495827078819275, + "learning_rate": 0.00017459201165298578, + "loss": 2.6847, + "step": 4717 + }, + { + "epoch": 0.3807602291986119, + "grad_norm": 0.814798891544342, + "learning_rate": 0.0001745814960904487, + "loss": 2.6211, + "step": 4718 + }, + { + "epoch": 0.3808409329351949, + "grad_norm": 0.7541367411613464, + "learning_rate": 0.0001745709786691458, + "loss": 2.6214, + "step": 4719 + }, + { + "epoch": 0.3809216366717779, + "grad_norm": 0.7065702676773071, + "learning_rate": 0.00017456045938933921, + "loss": 2.6699, + "step": 4720 + }, + { + "epoch": 0.3810023404083609, + "grad_norm": 0.751960813999176, + "learning_rate": 0.000174549938251291, + "loss": 2.6085, + "step": 4721 + }, + { + "epoch": 0.3810830441449439, + "grad_norm": 0.72068190574646, + "learning_rate": 0.00017453941525526353, + "loss": 2.6201, + "step": 4722 + }, + { + "epoch": 0.3811637478815269, + "grad_norm": 0.7201167941093445, + "learning_rate": 0.00017452889040151892, + "loss": 2.6775, + "step": 4723 + }, + { + "epoch": 0.38124445161810994, + "grad_norm": 0.7904958128929138, + "learning_rate": 0.00017451836369031956, + "loss": 2.7217, + "step": 4724 + }, + { + "epoch": 0.3813251553546929, + "grad_norm": 0.7096366882324219, + "learning_rate": 0.0001745078351219278, + "loss": 2.7004, + "step": 4725 + }, + { + "epoch": 0.38140585909127594, + "grad_norm": 0.6812441945075989, + "learning_rate": 0.00017449730469660602, + "loss": 2.6555, + "step": 4726 + }, + { + "epoch": 0.3814865628278589, + "grad_norm": 0.8037428855895996, + "learning_rate": 0.00017448677241461665, + "loss": 2.7094, + "step": 4727 + }, + { + "epoch": 0.38156726656444195, + "grad_norm": 0.7282679677009583, + "learning_rate": 0.00017447623827622223, + "loss": 2.6699, + "step": 4728 + }, + { + "epoch": 0.38164797030102493, + "grad_norm": 0.745705783367157, + "learning_rate": 0.00017446570228168523, + "loss": 2.6098, + "step": 4729 + }, + { + "epoch": 0.38172867403760796, + "grad_norm": 0.7098714113235474, + "learning_rate": 0.00017445516443126828, + "loss": 2.6628, + "step": 4730 + }, + { + "epoch": 0.38180937777419094, + "grad_norm": 0.7376620769500732, + "learning_rate": 0.00017444462472523405, + "loss": 2.7086, + "step": 4731 + }, + { + "epoch": 0.38189008151077397, + "grad_norm": 0.717800498008728, + "learning_rate": 0.00017443408316384512, + "loss": 2.6582, + "step": 4732 + }, + { + "epoch": 0.38197078524735695, + "grad_norm": 0.7061530947685242, + "learning_rate": 0.00017442353974736428, + "loss": 2.6817, + "step": 4733 + }, + { + "epoch": 0.38205148898394, + "grad_norm": 0.744667112827301, + "learning_rate": 0.0001744129944760543, + "loss": 2.6649, + "step": 4734 + }, + { + "epoch": 0.38213219272052296, + "grad_norm": 0.7302529215812683, + "learning_rate": 0.00017440244735017797, + "loss": 2.7313, + "step": 4735 + }, + { + "epoch": 0.382212896457106, + "grad_norm": 0.6845258474349976, + "learning_rate": 0.00017439189836999816, + "loss": 2.637, + "step": 4736 + }, + { + "epoch": 0.38229360019368896, + "grad_norm": 0.7060490250587463, + "learning_rate": 0.0001743813475357778, + "loss": 2.6674, + "step": 4737 + }, + { + "epoch": 0.382374303930272, + "grad_norm": 0.7146841287612915, + "learning_rate": 0.00017437079484777977, + "loss": 2.6607, + "step": 4738 + }, + { + "epoch": 0.382455007666855, + "grad_norm": 0.7107662558555603, + "learning_rate": 0.00017436024030626719, + "loss": 2.6777, + "step": 4739 + }, + { + "epoch": 0.382535711403438, + "grad_norm": 0.7356777191162109, + "learning_rate": 0.00017434968391150303, + "loss": 2.5801, + "step": 4740 + }, + { + "epoch": 0.382616415140021, + "grad_norm": 0.6839054226875305, + "learning_rate": 0.00017433912566375037, + "loss": 2.6319, + "step": 4741 + }, + { + "epoch": 0.382697118876604, + "grad_norm": 0.7049627900123596, + "learning_rate": 0.00017432856556327236, + "loss": 2.741, + "step": 4742 + }, + { + "epoch": 0.382777822613187, + "grad_norm": 0.7926551103591919, + "learning_rate": 0.00017431800361033224, + "loss": 2.64, + "step": 4743 + }, + { + "epoch": 0.38285852634976997, + "grad_norm": 0.734272301197052, + "learning_rate": 0.0001743074398051932, + "loss": 2.6575, + "step": 4744 + }, + { + "epoch": 0.382939230086353, + "grad_norm": 0.6959543824195862, + "learning_rate": 0.00017429687414811847, + "loss": 2.664, + "step": 4745 + }, + { + "epoch": 0.383019933822936, + "grad_norm": 0.7258255481719971, + "learning_rate": 0.00017428630663937148, + "loss": 2.6597, + "step": 4746 + }, + { + "epoch": 0.383100637559519, + "grad_norm": 0.8067473769187927, + "learning_rate": 0.0001742757372792155, + "loss": 2.6798, + "step": 4747 + }, + { + "epoch": 0.383181341296102, + "grad_norm": 0.7000626921653748, + "learning_rate": 0.000174265166067914, + "loss": 2.6561, + "step": 4748 + }, + { + "epoch": 0.383262045032685, + "grad_norm": 0.818914532661438, + "learning_rate": 0.00017425459300573045, + "loss": 2.6491, + "step": 4749 + }, + { + "epoch": 0.383342748769268, + "grad_norm": 0.7060543298721313, + "learning_rate": 0.00017424401809292833, + "loss": 2.6825, + "step": 4750 + }, + { + "epoch": 0.383423452505851, + "grad_norm": 0.893488883972168, + "learning_rate": 0.0001742334413297712, + "loss": 2.7201, + "step": 4751 + }, + { + "epoch": 0.383504156242434, + "grad_norm": 0.8131078481674194, + "learning_rate": 0.00017422286271652265, + "loss": 2.7828, + "step": 4752 + }, + { + "epoch": 0.38358485997901703, + "grad_norm": 0.7735587954521179, + "learning_rate": 0.00017421228225344634, + "loss": 2.6489, + "step": 4753 + }, + { + "epoch": 0.3836655637156, + "grad_norm": 0.713800311088562, + "learning_rate": 0.000174201699940806, + "loss": 2.6686, + "step": 4754 + }, + { + "epoch": 0.38374626745218304, + "grad_norm": 0.8246580362319946, + "learning_rate": 0.00017419111577886528, + "loss": 2.6771, + "step": 4755 + }, + { + "epoch": 0.383826971188766, + "grad_norm": 0.694542646408081, + "learning_rate": 0.00017418052976788805, + "loss": 2.6632, + "step": 4756 + }, + { + "epoch": 0.38390767492534905, + "grad_norm": 0.7200453281402588, + "learning_rate": 0.0001741699419081381, + "loss": 2.6386, + "step": 4757 + }, + { + "epoch": 0.38398837866193203, + "grad_norm": 0.7002073526382446, + "learning_rate": 0.00017415935219987933, + "loss": 2.6399, + "step": 4758 + }, + { + "epoch": 0.38406908239851506, + "grad_norm": 0.7056967616081238, + "learning_rate": 0.00017414876064337565, + "loss": 2.7048, + "step": 4759 + }, + { + "epoch": 0.38414978613509804, + "grad_norm": 0.7406448721885681, + "learning_rate": 0.000174138167238891, + "loss": 2.6256, + "step": 4760 + }, + { + "epoch": 0.38423048987168107, + "grad_norm": 0.7280529737472534, + "learning_rate": 0.00017412757198668945, + "loss": 2.6393, + "step": 4761 + }, + { + "epoch": 0.38431119360826405, + "grad_norm": 0.7626908421516418, + "learning_rate": 0.00017411697488703502, + "loss": 2.6717, + "step": 4762 + }, + { + "epoch": 0.3843918973448471, + "grad_norm": 0.716345489025116, + "learning_rate": 0.00017410637594019184, + "loss": 2.6457, + "step": 4763 + }, + { + "epoch": 0.38447260108143005, + "grad_norm": 0.8825077414512634, + "learning_rate": 0.00017409577514642405, + "loss": 2.7042, + "step": 4764 + }, + { + "epoch": 0.3845533048180131, + "grad_norm": 0.7301186919212341, + "learning_rate": 0.00017408517250599585, + "loss": 2.7065, + "step": 4765 + }, + { + "epoch": 0.38463400855459606, + "grad_norm": 0.8235788345336914, + "learning_rate": 0.0001740745680191715, + "loss": 2.6315, + "step": 4766 + }, + { + "epoch": 0.3847147122911791, + "grad_norm": 0.7355515956878662, + "learning_rate": 0.00017406396168621527, + "loss": 2.6939, + "step": 4767 + }, + { + "epoch": 0.38479541602776207, + "grad_norm": 0.6781682372093201, + "learning_rate": 0.0001740533535073915, + "loss": 2.6071, + "step": 4768 + }, + { + "epoch": 0.3848761197643451, + "grad_norm": 0.801191508769989, + "learning_rate": 0.0001740427434829646, + "loss": 2.6635, + "step": 4769 + }, + { + "epoch": 0.3849568235009281, + "grad_norm": 0.759682297706604, + "learning_rate": 0.00017403213161319903, + "loss": 2.6823, + "step": 4770 + }, + { + "epoch": 0.3850375272375111, + "grad_norm": 0.806498110294342, + "learning_rate": 0.00017402151789835916, + "loss": 2.7111, + "step": 4771 + }, + { + "epoch": 0.3851182309740941, + "grad_norm": 0.7677996158599854, + "learning_rate": 0.00017401090233870958, + "loss": 2.6701, + "step": 4772 + }, + { + "epoch": 0.3851989347106771, + "grad_norm": 0.7449933290481567, + "learning_rate": 0.00017400028493451487, + "loss": 2.7037, + "step": 4773 + }, + { + "epoch": 0.3852796384472601, + "grad_norm": 0.7506107091903687, + "learning_rate": 0.0001739896656860396, + "loss": 2.6587, + "step": 4774 + }, + { + "epoch": 0.38536034218384313, + "grad_norm": 0.8781036734580994, + "learning_rate": 0.00017397904459354844, + "loss": 2.7634, + "step": 4775 + }, + { + "epoch": 0.3854410459204261, + "grad_norm": 0.7067514657974243, + "learning_rate": 0.0001739684216573061, + "loss": 2.638, + "step": 4776 + }, + { + "epoch": 0.38552174965700914, + "grad_norm": 0.7742886543273926, + "learning_rate": 0.00017395779687757735, + "loss": 2.7043, + "step": 4777 + }, + { + "epoch": 0.3856024533935921, + "grad_norm": 0.7348291277885437, + "learning_rate": 0.00017394717025462697, + "loss": 2.7404, + "step": 4778 + }, + { + "epoch": 0.38568315713017515, + "grad_norm": 0.7449346780776978, + "learning_rate": 0.00017393654178871984, + "loss": 2.631, + "step": 4779 + }, + { + "epoch": 0.3857638608667581, + "grad_norm": 0.7191200256347656, + "learning_rate": 0.00017392591148012078, + "loss": 2.6776, + "step": 4780 + }, + { + "epoch": 0.38584456460334116, + "grad_norm": 0.7055533528327942, + "learning_rate": 0.00017391527932909476, + "loss": 2.6219, + "step": 4781 + }, + { + "epoch": 0.38592526833992413, + "grad_norm": 0.73755943775177, + "learning_rate": 0.0001739046453359068, + "loss": 2.6692, + "step": 4782 + }, + { + "epoch": 0.38600597207650716, + "grad_norm": 0.7469369769096375, + "learning_rate": 0.00017389400950082185, + "loss": 2.6572, + "step": 4783 + }, + { + "epoch": 0.38608667581309014, + "grad_norm": 0.7552534341812134, + "learning_rate": 0.00017388337182410504, + "loss": 2.6853, + "step": 4784 + }, + { + "epoch": 0.3861673795496732, + "grad_norm": 0.7453532814979553, + "learning_rate": 0.00017387273230602145, + "loss": 2.6601, + "step": 4785 + }, + { + "epoch": 0.38624808328625615, + "grad_norm": 0.7259301543235779, + "learning_rate": 0.0001738620909468363, + "loss": 2.6997, + "step": 4786 + }, + { + "epoch": 0.3863287870228392, + "grad_norm": 0.6970019936561584, + "learning_rate": 0.00017385144774681476, + "loss": 2.7497, + "step": 4787 + }, + { + "epoch": 0.38640949075942216, + "grad_norm": 0.7172032594680786, + "learning_rate": 0.00017384080270622208, + "loss": 2.7182, + "step": 4788 + }, + { + "epoch": 0.3864901944960052, + "grad_norm": 0.7184371948242188, + "learning_rate": 0.00017383015582532357, + "loss": 2.6358, + "step": 4789 + }, + { + "epoch": 0.38657089823258817, + "grad_norm": 0.7302096486091614, + "learning_rate": 0.00017381950710438458, + "loss": 2.6066, + "step": 4790 + }, + { + "epoch": 0.3866516019691712, + "grad_norm": 0.7043540477752686, + "learning_rate": 0.00017380885654367053, + "loss": 2.699, + "step": 4791 + }, + { + "epoch": 0.3867323057057542, + "grad_norm": 0.6919732689857483, + "learning_rate": 0.0001737982041434468, + "loss": 2.6025, + "step": 4792 + }, + { + "epoch": 0.3868130094423372, + "grad_norm": 0.7277705669403076, + "learning_rate": 0.00017378754990397894, + "loss": 2.6764, + "step": 4793 + }, + { + "epoch": 0.3868937131789202, + "grad_norm": 0.7546190619468689, + "learning_rate": 0.00017377689382553247, + "loss": 2.5865, + "step": 4794 + }, + { + "epoch": 0.38697441691550316, + "grad_norm": 0.7636401653289795, + "learning_rate": 0.00017376623590837294, + "loss": 2.6488, + "step": 4795 + }, + { + "epoch": 0.3870551206520862, + "grad_norm": 0.6945658922195435, + "learning_rate": 0.00017375557615276595, + "loss": 2.6739, + "step": 4796 + }, + { + "epoch": 0.38713582438866917, + "grad_norm": 0.7503637075424194, + "learning_rate": 0.00017374491455897722, + "loss": 2.6854, + "step": 4797 + }, + { + "epoch": 0.3872165281252522, + "grad_norm": 0.7457373142242432, + "learning_rate": 0.00017373425112727247, + "loss": 2.6659, + "step": 4798 + }, + { + "epoch": 0.3872972318618352, + "grad_norm": 0.7742534875869751, + "learning_rate": 0.0001737235858579174, + "loss": 2.6461, + "step": 4799 + }, + { + "epoch": 0.3873779355984182, + "grad_norm": 0.7397909760475159, + "learning_rate": 0.0001737129187511779, + "loss": 2.6779, + "step": 4800 + }, + { + "epoch": 0.3874586393350012, + "grad_norm": 0.7922031879425049, + "learning_rate": 0.00017370224980731974, + "loss": 2.6417, + "step": 4801 + }, + { + "epoch": 0.3875393430715842, + "grad_norm": 0.8503968715667725, + "learning_rate": 0.00017369157902660887, + "loss": 2.7063, + "step": 4802 + }, + { + "epoch": 0.3876200468081672, + "grad_norm": 0.7143701314926147, + "learning_rate": 0.00017368090640931125, + "loss": 2.6152, + "step": 4803 + }, + { + "epoch": 0.38770075054475023, + "grad_norm": 0.8016753196716309, + "learning_rate": 0.0001736702319556928, + "loss": 2.6005, + "step": 4804 + }, + { + "epoch": 0.3877814542813332, + "grad_norm": 0.7329538464546204, + "learning_rate": 0.00017365955566601962, + "loss": 2.6027, + "step": 4805 + }, + { + "epoch": 0.38786215801791624, + "grad_norm": 0.7005148530006409, + "learning_rate": 0.00017364887754055773, + "loss": 2.6585, + "step": 4806 + }, + { + "epoch": 0.3879428617544992, + "grad_norm": 0.7092769145965576, + "learning_rate": 0.00017363819757957333, + "loss": 2.6763, + "step": 4807 + }, + { + "epoch": 0.38802356549108225, + "grad_norm": 0.7475202679634094, + "learning_rate": 0.0001736275157833325, + "loss": 2.5969, + "step": 4808 + }, + { + "epoch": 0.3881042692276652, + "grad_norm": 0.822496235370636, + "learning_rate": 0.0001736168321521016, + "loss": 2.6758, + "step": 4809 + }, + { + "epoch": 0.38818497296424825, + "grad_norm": 0.7756842374801636, + "learning_rate": 0.0001736061466861467, + "loss": 2.6676, + "step": 4810 + }, + { + "epoch": 0.38826567670083123, + "grad_norm": 0.7192497849464417, + "learning_rate": 0.00017359545938573428, + "loss": 2.7045, + "step": 4811 + }, + { + "epoch": 0.38834638043741426, + "grad_norm": 0.7064149379730225, + "learning_rate": 0.00017358477025113063, + "loss": 2.6169, + "step": 4812 + }, + { + "epoch": 0.38842708417399724, + "grad_norm": 0.7297258973121643, + "learning_rate": 0.00017357407928260215, + "loss": 2.612, + "step": 4813 + }, + { + "epoch": 0.38850778791058027, + "grad_norm": 0.7011935114860535, + "learning_rate": 0.00017356338648041528, + "loss": 2.6507, + "step": 4814 + }, + { + "epoch": 0.38858849164716325, + "grad_norm": 0.7647256255149841, + "learning_rate": 0.00017355269184483651, + "loss": 2.6838, + "step": 4815 + }, + { + "epoch": 0.3886691953837463, + "grad_norm": 0.690182089805603, + "learning_rate": 0.0001735419953761324, + "loss": 2.6996, + "step": 4816 + }, + { + "epoch": 0.38874989912032926, + "grad_norm": 0.7142173647880554, + "learning_rate": 0.00017353129707456955, + "loss": 2.6705, + "step": 4817 + }, + { + "epoch": 0.3888306028569123, + "grad_norm": 0.801369309425354, + "learning_rate": 0.00017352059694041456, + "loss": 2.7002, + "step": 4818 + }, + { + "epoch": 0.38891130659349527, + "grad_norm": 0.7021649479866028, + "learning_rate": 0.0001735098949739341, + "loss": 2.7042, + "step": 4819 + }, + { + "epoch": 0.3889920103300783, + "grad_norm": 0.6802586317062378, + "learning_rate": 0.00017349919117539488, + "loss": 2.7186, + "step": 4820 + }, + { + "epoch": 0.3890727140666613, + "grad_norm": 0.7723212838172913, + "learning_rate": 0.0001734884855450637, + "loss": 2.608, + "step": 4821 + }, + { + "epoch": 0.3891534178032443, + "grad_norm": 0.7037193179130554, + "learning_rate": 0.00017347777808320735, + "loss": 2.6198, + "step": 4822 + }, + { + "epoch": 0.3892341215398273, + "grad_norm": 0.7172731161117554, + "learning_rate": 0.00017346706879009272, + "loss": 2.7037, + "step": 4823 + }, + { + "epoch": 0.3893148252764103, + "grad_norm": 0.7421539425849915, + "learning_rate": 0.00017345635766598667, + "loss": 2.6619, + "step": 4824 + }, + { + "epoch": 0.3893955290129933, + "grad_norm": 0.7587071061134338, + "learning_rate": 0.0001734456447111562, + "loss": 2.6229, + "step": 4825 + }, + { + "epoch": 0.3894762327495763, + "grad_norm": 0.6981459259986877, + "learning_rate": 0.00017343492992586822, + "loss": 2.5927, + "step": 4826 + }, + { + "epoch": 0.3895569364861593, + "grad_norm": 0.7628491520881653, + "learning_rate": 0.00017342421331038987, + "loss": 2.7047, + "step": 4827 + }, + { + "epoch": 0.38963764022274233, + "grad_norm": 0.8005064129829407, + "learning_rate": 0.00017341349486498818, + "loss": 2.6918, + "step": 4828 + }, + { + "epoch": 0.3897183439593253, + "grad_norm": 0.7756431102752686, + "learning_rate": 0.0001734027745899303, + "loss": 2.6621, + "step": 4829 + }, + { + "epoch": 0.38979904769590834, + "grad_norm": 0.7317833304405212, + "learning_rate": 0.00017339205248548338, + "loss": 2.7134, + "step": 4830 + }, + { + "epoch": 0.3898797514324913, + "grad_norm": 0.7293959259986877, + "learning_rate": 0.0001733813285519147, + "loss": 2.6865, + "step": 4831 + }, + { + "epoch": 0.38996045516907435, + "grad_norm": 0.7120299935340881, + "learning_rate": 0.00017337060278949147, + "loss": 2.6915, + "step": 4832 + }, + { + "epoch": 0.3900411589056573, + "grad_norm": 0.7255397439002991, + "learning_rate": 0.00017335987519848103, + "loss": 2.6671, + "step": 4833 + }, + { + "epoch": 0.39012186264224036, + "grad_norm": 0.7849408388137817, + "learning_rate": 0.0001733491457791507, + "loss": 2.6301, + "step": 4834 + }, + { + "epoch": 0.39020256637882333, + "grad_norm": 0.6998472809791565, + "learning_rate": 0.00017333841453176797, + "loss": 2.6587, + "step": 4835 + }, + { + "epoch": 0.39028327011540637, + "grad_norm": 0.7530023455619812, + "learning_rate": 0.00017332768145660024, + "loss": 2.7011, + "step": 4836 + }, + { + "epoch": 0.39036397385198934, + "grad_norm": 0.7251207828521729, + "learning_rate": 0.00017331694655391497, + "loss": 2.6416, + "step": 4837 + }, + { + "epoch": 0.3904446775885724, + "grad_norm": 0.7016854882240295, + "learning_rate": 0.00017330620982397975, + "loss": 2.7224, + "step": 4838 + }, + { + "epoch": 0.39052538132515535, + "grad_norm": 0.7253310084342957, + "learning_rate": 0.00017329547126706217, + "loss": 2.6747, + "step": 4839 + }, + { + "epoch": 0.3906060850617384, + "grad_norm": 0.7114601731300354, + "learning_rate": 0.00017328473088342987, + "loss": 2.6654, + "step": 4840 + }, + { + "epoch": 0.39068678879832136, + "grad_norm": 0.7773289680480957, + "learning_rate": 0.00017327398867335048, + "loss": 2.6625, + "step": 4841 + }, + { + "epoch": 0.3907674925349044, + "grad_norm": 0.7541868686676025, + "learning_rate": 0.00017326324463709175, + "loss": 2.667, + "step": 4842 + }, + { + "epoch": 0.39084819627148737, + "grad_norm": 0.8095890283584595, + "learning_rate": 0.00017325249877492147, + "loss": 2.706, + "step": 4843 + }, + { + "epoch": 0.3909289000080704, + "grad_norm": 0.7019474506378174, + "learning_rate": 0.00017324175108710742, + "loss": 2.6125, + "step": 4844 + }, + { + "epoch": 0.3910096037446534, + "grad_norm": 0.7055396437644958, + "learning_rate": 0.00017323100157391746, + "loss": 2.6373, + "step": 4845 + }, + { + "epoch": 0.39109030748123635, + "grad_norm": 0.7332476377487183, + "learning_rate": 0.00017322025023561955, + "loss": 2.6559, + "step": 4846 + }, + { + "epoch": 0.3911710112178194, + "grad_norm": 0.7740387916564941, + "learning_rate": 0.00017320949707248158, + "loss": 2.7341, + "step": 4847 + }, + { + "epoch": 0.39125171495440236, + "grad_norm": 0.7371044754981995, + "learning_rate": 0.0001731987420847716, + "loss": 2.7318, + "step": 4848 + }, + { + "epoch": 0.3913324186909854, + "grad_norm": 0.7897786498069763, + "learning_rate": 0.00017318798527275758, + "loss": 2.6759, + "step": 4849 + }, + { + "epoch": 0.39141312242756837, + "grad_norm": 0.7149896621704102, + "learning_rate": 0.0001731772266367077, + "loss": 2.7097, + "step": 4850 + }, + { + "epoch": 0.3914938261641514, + "grad_norm": 0.7824358344078064, + "learning_rate": 0.00017316646617689002, + "loss": 2.6376, + "step": 4851 + }, + { + "epoch": 0.3915745299007344, + "grad_norm": 0.7704496383666992, + "learning_rate": 0.00017315570389357272, + "loss": 2.6539, + "step": 4852 + }, + { + "epoch": 0.3916552336373174, + "grad_norm": 0.7489706873893738, + "learning_rate": 0.00017314493978702407, + "loss": 2.6716, + "step": 4853 + }, + { + "epoch": 0.3917359373739004, + "grad_norm": 0.7368690967559814, + "learning_rate": 0.00017313417385751234, + "loss": 2.7171, + "step": 4854 + }, + { + "epoch": 0.3918166411104834, + "grad_norm": 0.7215858697891235, + "learning_rate": 0.00017312340610530579, + "loss": 2.6306, + "step": 4855 + }, + { + "epoch": 0.3918973448470664, + "grad_norm": 0.7622217535972595, + "learning_rate": 0.00017311263653067285, + "loss": 2.6089, + "step": 4856 + }, + { + "epoch": 0.39197804858364943, + "grad_norm": 0.7317889332771301, + "learning_rate": 0.00017310186513388185, + "loss": 2.6831, + "step": 4857 + }, + { + "epoch": 0.3920587523202324, + "grad_norm": 0.894185483455658, + "learning_rate": 0.0001730910919152013, + "loss": 2.684, + "step": 4858 + }, + { + "epoch": 0.39213945605681544, + "grad_norm": 0.7313157916069031, + "learning_rate": 0.00017308031687489968, + "loss": 2.6465, + "step": 4859 + }, + { + "epoch": 0.3922201597933984, + "grad_norm": 0.7765825390815735, + "learning_rate": 0.00017306954001324552, + "loss": 2.6526, + "step": 4860 + }, + { + "epoch": 0.39230086352998145, + "grad_norm": 0.7171424031257629, + "learning_rate": 0.00017305876133050742, + "loss": 2.6212, + "step": 4861 + }, + { + "epoch": 0.3923815672665644, + "grad_norm": 0.7215112447738647, + "learning_rate": 0.000173047980826954, + "loss": 2.6329, + "step": 4862 + }, + { + "epoch": 0.39246227100314746, + "grad_norm": 0.7393578886985779, + "learning_rate": 0.00017303719850285396, + "loss": 2.7264, + "step": 4863 + }, + { + "epoch": 0.39254297473973043, + "grad_norm": 0.7620136737823486, + "learning_rate": 0.00017302641435847603, + "loss": 2.6686, + "step": 4864 + }, + { + "epoch": 0.39262367847631346, + "grad_norm": 0.7290963530540466, + "learning_rate": 0.00017301562839408893, + "loss": 2.578, + "step": 4865 + }, + { + "epoch": 0.39270438221289644, + "grad_norm": 0.6978541612625122, + "learning_rate": 0.00017300484060996153, + "loss": 2.6783, + "step": 4866 + }, + { + "epoch": 0.3927850859494795, + "grad_norm": 0.7212007641792297, + "learning_rate": 0.00017299405100636264, + "loss": 2.6282, + "step": 4867 + }, + { + "epoch": 0.39286578968606245, + "grad_norm": 0.757324755191803, + "learning_rate": 0.0001729832595835612, + "loss": 2.6933, + "step": 4868 + }, + { + "epoch": 0.3929464934226455, + "grad_norm": 0.7052869200706482, + "learning_rate": 0.00017297246634182618, + "loss": 2.7152, + "step": 4869 + }, + { + "epoch": 0.39302719715922846, + "grad_norm": 0.7326259016990662, + "learning_rate": 0.0001729616712814265, + "loss": 2.6792, + "step": 4870 + }, + { + "epoch": 0.3931079008958115, + "grad_norm": 0.7540302276611328, + "learning_rate": 0.00017295087440263128, + "loss": 2.6621, + "step": 4871 + }, + { + "epoch": 0.39318860463239447, + "grad_norm": 0.765454888343811, + "learning_rate": 0.00017294007570570956, + "loss": 2.7049, + "step": 4872 + }, + { + "epoch": 0.3932693083689775, + "grad_norm": 0.7303065061569214, + "learning_rate": 0.0001729292751909305, + "loss": 2.6867, + "step": 4873 + }, + { + "epoch": 0.3933500121055605, + "grad_norm": 0.7049854397773743, + "learning_rate": 0.00017291847285856325, + "loss": 2.7052, + "step": 4874 + }, + { + "epoch": 0.3934307158421435, + "grad_norm": 0.7199053764343262, + "learning_rate": 0.00017290766870887704, + "loss": 2.7195, + "step": 4875 + }, + { + "epoch": 0.3935114195787265, + "grad_norm": 0.7536180019378662, + "learning_rate": 0.00017289686274214118, + "loss": 2.6861, + "step": 4876 + }, + { + "epoch": 0.3935921233153095, + "grad_norm": 0.7295238971710205, + "learning_rate": 0.00017288605495862492, + "loss": 2.6684, + "step": 4877 + }, + { + "epoch": 0.3936728270518925, + "grad_norm": 0.7575719952583313, + "learning_rate": 0.00017287524535859763, + "loss": 2.6439, + "step": 4878 + }, + { + "epoch": 0.3937535307884755, + "grad_norm": 0.678909182548523, + "learning_rate": 0.00017286443394232874, + "loss": 2.6562, + "step": 4879 + }, + { + "epoch": 0.3938342345250585, + "grad_norm": 0.6908892393112183, + "learning_rate": 0.00017285362071008768, + "loss": 2.6364, + "step": 4880 + }, + { + "epoch": 0.39391493826164153, + "grad_norm": 0.7414079904556274, + "learning_rate": 0.00017284280566214397, + "loss": 2.5872, + "step": 4881 + }, + { + "epoch": 0.3939956419982245, + "grad_norm": 0.6824749112129211, + "learning_rate": 0.0001728319887987671, + "loss": 2.641, + "step": 4882 + }, + { + "epoch": 0.39407634573480754, + "grad_norm": 0.6908513903617859, + "learning_rate": 0.0001728211701202267, + "loss": 2.6977, + "step": 4883 + }, + { + "epoch": 0.3941570494713905, + "grad_norm": 0.7214735746383667, + "learning_rate": 0.0001728103496267924, + "loss": 2.5826, + "step": 4884 + }, + { + "epoch": 0.39423775320797355, + "grad_norm": 0.812781572341919, + "learning_rate": 0.00017279952731873385, + "loss": 2.6806, + "step": 4885 + }, + { + "epoch": 0.39431845694455653, + "grad_norm": 0.7610746026039124, + "learning_rate": 0.00017278870319632078, + "loss": 2.6046, + "step": 4886 + }, + { + "epoch": 0.39439916068113956, + "grad_norm": 0.7151652574539185, + "learning_rate": 0.00017277787725982293, + "loss": 2.6543, + "step": 4887 + }, + { + "epoch": 0.39447986441772254, + "grad_norm": 0.7293612360954285, + "learning_rate": 0.00017276704950951017, + "loss": 2.6384, + "step": 4888 + }, + { + "epoch": 0.39456056815430557, + "grad_norm": 0.8138254284858704, + "learning_rate": 0.00017275621994565233, + "loss": 2.7208, + "step": 4889 + }, + { + "epoch": 0.39464127189088855, + "grad_norm": 0.7557196021080017, + "learning_rate": 0.00017274538856851924, + "loss": 2.6571, + "step": 4890 + }, + { + "epoch": 0.3947219756274716, + "grad_norm": 0.7297266721725464, + "learning_rate": 0.00017273455537838097, + "loss": 2.6222, + "step": 4891 + }, + { + "epoch": 0.39480267936405455, + "grad_norm": 0.7838431596755981, + "learning_rate": 0.00017272372037550743, + "loss": 2.782, + "step": 4892 + }, + { + "epoch": 0.3948833831006376, + "grad_norm": 0.7799673676490784, + "learning_rate": 0.00017271288356016866, + "loss": 2.6658, + "step": 4893 + }, + { + "epoch": 0.39496408683722056, + "grad_norm": 0.8495545387268066, + "learning_rate": 0.0001727020449326348, + "loss": 2.6552, + "step": 4894 + }, + { + "epoch": 0.3950447905738036, + "grad_norm": 0.7317770719528198, + "learning_rate": 0.00017269120449317588, + "loss": 2.6616, + "step": 4895 + }, + { + "epoch": 0.39512549431038657, + "grad_norm": 0.7518885731697083, + "learning_rate": 0.00017268036224206217, + "loss": 2.6864, + "step": 4896 + }, + { + "epoch": 0.39520619804696955, + "grad_norm": 0.83487468957901, + "learning_rate": 0.00017266951817956382, + "loss": 2.7535, + "step": 4897 + }, + { + "epoch": 0.3952869017835526, + "grad_norm": 0.7440658211708069, + "learning_rate": 0.00017265867230595113, + "loss": 2.6584, + "step": 4898 + }, + { + "epoch": 0.39536760552013556, + "grad_norm": 0.7060485482215881, + "learning_rate": 0.00017264782462149438, + "loss": 2.6892, + "step": 4899 + }, + { + "epoch": 0.3954483092567186, + "grad_norm": 0.8410428166389465, + "learning_rate": 0.00017263697512646394, + "loss": 2.6425, + "step": 4900 + }, + { + "epoch": 0.39552901299330157, + "grad_norm": 0.757046639919281, + "learning_rate": 0.0001726261238211302, + "loss": 2.6159, + "step": 4901 + }, + { + "epoch": 0.3956097167298846, + "grad_norm": 0.7288908958435059, + "learning_rate": 0.00017261527070576365, + "loss": 2.6753, + "step": 4902 + }, + { + "epoch": 0.3956904204664676, + "grad_norm": 0.8194541335105896, + "learning_rate": 0.0001726044157806347, + "loss": 2.6673, + "step": 4903 + }, + { + "epoch": 0.3957711242030506, + "grad_norm": 0.7957740426063538, + "learning_rate": 0.00017259355904601393, + "loss": 2.6662, + "step": 4904 + }, + { + "epoch": 0.3958518279396336, + "grad_norm": 0.8790122270584106, + "learning_rate": 0.0001725827005021719, + "loss": 2.7513, + "step": 4905 + }, + { + "epoch": 0.3959325316762166, + "grad_norm": 0.7674984335899353, + "learning_rate": 0.00017257184014937924, + "loss": 2.6375, + "step": 4906 + }, + { + "epoch": 0.3960132354127996, + "grad_norm": 0.7250992655754089, + "learning_rate": 0.00017256097798790663, + "loss": 2.63, + "step": 4907 + }, + { + "epoch": 0.3960939391493826, + "grad_norm": 0.8578312397003174, + "learning_rate": 0.00017255011401802475, + "loss": 2.702, + "step": 4908 + }, + { + "epoch": 0.3961746428859656, + "grad_norm": 0.7365253567695618, + "learning_rate": 0.00017253924824000438, + "loss": 2.6156, + "step": 4909 + }, + { + "epoch": 0.39625534662254863, + "grad_norm": 0.7148925065994263, + "learning_rate": 0.00017252838065411633, + "loss": 2.6658, + "step": 4910 + }, + { + "epoch": 0.3963360503591316, + "grad_norm": 0.7517829537391663, + "learning_rate": 0.00017251751126063148, + "loss": 2.6347, + "step": 4911 + }, + { + "epoch": 0.39641675409571464, + "grad_norm": 0.7880864143371582, + "learning_rate": 0.00017250664005982066, + "loss": 2.7045, + "step": 4912 + }, + { + "epoch": 0.3964974578322976, + "grad_norm": 0.7460693120956421, + "learning_rate": 0.00017249576705195482, + "loss": 2.6976, + "step": 4913 + }, + { + "epoch": 0.39657816156888065, + "grad_norm": 0.7179895043373108, + "learning_rate": 0.00017248489223730496, + "loss": 2.6366, + "step": 4914 + }, + { + "epoch": 0.3966588653054636, + "grad_norm": 0.7737421989440918, + "learning_rate": 0.00017247401561614213, + "loss": 2.7116, + "step": 4915 + }, + { + "epoch": 0.39673956904204666, + "grad_norm": 0.8561483025550842, + "learning_rate": 0.0001724631371887374, + "loss": 2.6591, + "step": 4916 + }, + { + "epoch": 0.39682027277862963, + "grad_norm": 0.7616356611251831, + "learning_rate": 0.00017245225695536182, + "loss": 2.6436, + "step": 4917 + }, + { + "epoch": 0.39690097651521267, + "grad_norm": 0.7754645943641663, + "learning_rate": 0.0001724413749162866, + "loss": 2.6699, + "step": 4918 + }, + { + "epoch": 0.39698168025179564, + "grad_norm": 0.800165593624115, + "learning_rate": 0.000172430491071783, + "loss": 2.7155, + "step": 4919 + }, + { + "epoch": 0.3970623839883787, + "grad_norm": 0.8448799848556519, + "learning_rate": 0.00017241960542212223, + "loss": 2.6991, + "step": 4920 + }, + { + "epoch": 0.39714308772496165, + "grad_norm": 0.7106496095657349, + "learning_rate": 0.00017240871796757556, + "loss": 2.628, + "step": 4921 + }, + { + "epoch": 0.3972237914615447, + "grad_norm": 0.7332959175109863, + "learning_rate": 0.00017239782870841436, + "loss": 2.6159, + "step": 4922 + }, + { + "epoch": 0.39730449519812766, + "grad_norm": 0.7573551535606384, + "learning_rate": 0.00017238693764491002, + "loss": 2.67, + "step": 4923 + }, + { + "epoch": 0.3973851989347107, + "grad_norm": 0.7833136320114136, + "learning_rate": 0.00017237604477733399, + "loss": 2.7276, + "step": 4924 + }, + { + "epoch": 0.39746590267129367, + "grad_norm": 0.7233073711395264, + "learning_rate": 0.00017236515010595773, + "loss": 2.6654, + "step": 4925 + }, + { + "epoch": 0.3975466064078767, + "grad_norm": 0.7920324206352234, + "learning_rate": 0.00017235425363105273, + "loss": 2.7611, + "step": 4926 + }, + { + "epoch": 0.3976273101444597, + "grad_norm": 0.7096883058547974, + "learning_rate": 0.00017234335535289063, + "loss": 2.687, + "step": 4927 + }, + { + "epoch": 0.3977080138810427, + "grad_norm": 0.7231960296630859, + "learning_rate": 0.000172332455271743, + "loss": 2.6441, + "step": 4928 + }, + { + "epoch": 0.3977887176176257, + "grad_norm": 0.7852105498313904, + "learning_rate": 0.00017232155338788146, + "loss": 2.5948, + "step": 4929 + }, + { + "epoch": 0.3978694213542087, + "grad_norm": 0.788789689540863, + "learning_rate": 0.0001723106497015778, + "loss": 2.6797, + "step": 4930 + }, + { + "epoch": 0.3979501250907917, + "grad_norm": 0.7082793116569519, + "learning_rate": 0.00017229974421310377, + "loss": 2.6787, + "step": 4931 + }, + { + "epoch": 0.3980308288273747, + "grad_norm": 0.8157992362976074, + "learning_rate": 0.00017228883692273106, + "loss": 2.6367, + "step": 4932 + }, + { + "epoch": 0.3981115325639577, + "grad_norm": 0.7576673030853271, + "learning_rate": 0.00017227792783073157, + "loss": 2.6826, + "step": 4933 + }, + { + "epoch": 0.39819223630054074, + "grad_norm": 0.7225388884544373, + "learning_rate": 0.00017226701693737718, + "loss": 2.668, + "step": 4934 + }, + { + "epoch": 0.3982729400371237, + "grad_norm": 0.7029562592506409, + "learning_rate": 0.00017225610424293985, + "loss": 2.6613, + "step": 4935 + }, + { + "epoch": 0.39835364377370674, + "grad_norm": 0.73081374168396, + "learning_rate": 0.0001722451897476915, + "loss": 2.6378, + "step": 4936 + }, + { + "epoch": 0.3984343475102897, + "grad_norm": 0.744008481502533, + "learning_rate": 0.0001722342734519042, + "loss": 2.6501, + "step": 4937 + }, + { + "epoch": 0.39851505124687275, + "grad_norm": 0.7482618093490601, + "learning_rate": 0.00017222335535584996, + "loss": 2.7287, + "step": 4938 + }, + { + "epoch": 0.39859575498345573, + "grad_norm": 0.6487892866134644, + "learning_rate": 0.00017221243545980093, + "loss": 2.6417, + "step": 4939 + }, + { + "epoch": 0.39867645872003876, + "grad_norm": 0.7894789576530457, + "learning_rate": 0.00017220151376402923, + "loss": 2.7431, + "step": 4940 + }, + { + "epoch": 0.39875716245662174, + "grad_norm": 0.8232294321060181, + "learning_rate": 0.00017219059026880708, + "loss": 2.6824, + "step": 4941 + }, + { + "epoch": 0.39883786619320477, + "grad_norm": 0.6844691634178162, + "learning_rate": 0.00017217966497440668, + "loss": 2.6294, + "step": 4942 + }, + { + "epoch": 0.39891856992978775, + "grad_norm": 0.7245259881019592, + "learning_rate": 0.00017216873788110037, + "loss": 2.6815, + "step": 4943 + }, + { + "epoch": 0.3989992736663708, + "grad_norm": 0.7197226881980896, + "learning_rate": 0.00017215780898916045, + "loss": 2.725, + "step": 4944 + }, + { + "epoch": 0.39907997740295376, + "grad_norm": 0.8391285538673401, + "learning_rate": 0.00017214687829885934, + "loss": 2.6724, + "step": 4945 + }, + { + "epoch": 0.3991606811395368, + "grad_norm": 0.7357564568519592, + "learning_rate": 0.00017213594581046938, + "loss": 2.7052, + "step": 4946 + }, + { + "epoch": 0.39924138487611976, + "grad_norm": 0.7611483931541443, + "learning_rate": 0.00017212501152426312, + "loss": 2.7214, + "step": 4947 + }, + { + "epoch": 0.39932208861270274, + "grad_norm": 0.7314950227737427, + "learning_rate": 0.00017211407544051306, + "loss": 2.6594, + "step": 4948 + }, + { + "epoch": 0.3994027923492858, + "grad_norm": 0.774131178855896, + "learning_rate": 0.00017210313755949169, + "loss": 2.6812, + "step": 4949 + }, + { + "epoch": 0.39948349608586875, + "grad_norm": 0.707003116607666, + "learning_rate": 0.00017209219788147167, + "loss": 2.7334, + "step": 4950 + }, + { + "epoch": 0.3995641998224518, + "grad_norm": 0.8179643154144287, + "learning_rate": 0.0001720812564067256, + "loss": 2.6554, + "step": 4951 + }, + { + "epoch": 0.39964490355903476, + "grad_norm": 0.6572005152702332, + "learning_rate": 0.00017207031313552621, + "loss": 2.6423, + "step": 4952 + }, + { + "epoch": 0.3997256072956178, + "grad_norm": 0.7663072943687439, + "learning_rate": 0.00017205936806814623, + "loss": 2.689, + "step": 4953 + }, + { + "epoch": 0.39980631103220077, + "grad_norm": 0.7351107001304626, + "learning_rate": 0.00017204842120485846, + "loss": 2.631, + "step": 4954 + }, + { + "epoch": 0.3998870147687838, + "grad_norm": 0.7754253149032593, + "learning_rate": 0.00017203747254593564, + "loss": 2.6371, + "step": 4955 + }, + { + "epoch": 0.3999677185053668, + "grad_norm": 0.7471042275428772, + "learning_rate": 0.00017202652209165074, + "loss": 2.6542, + "step": 4956 + }, + { + "epoch": 0.4000484222419498, + "grad_norm": 0.7357343435287476, + "learning_rate": 0.00017201556984227664, + "loss": 2.6226, + "step": 4957 + }, + { + "epoch": 0.4001291259785328, + "grad_norm": 0.8096252679824829, + "learning_rate": 0.00017200461579808626, + "loss": 2.6458, + "step": 4958 + }, + { + "epoch": 0.4002098297151158, + "grad_norm": 0.7622970938682556, + "learning_rate": 0.0001719936599593526, + "loss": 2.7129, + "step": 4959 + }, + { + "epoch": 0.4002905334516988, + "grad_norm": 0.7374953627586365, + "learning_rate": 0.00017198270232634882, + "loss": 2.696, + "step": 4960 + }, + { + "epoch": 0.4003712371882818, + "grad_norm": 0.7897924184799194, + "learning_rate": 0.00017197174289934787, + "loss": 2.7508, + "step": 4961 + }, + { + "epoch": 0.4004519409248648, + "grad_norm": 0.7047984004020691, + "learning_rate": 0.00017196078167862298, + "loss": 2.6733, + "step": 4962 + }, + { + "epoch": 0.40053264466144783, + "grad_norm": 0.7866294980049133, + "learning_rate": 0.0001719498186644473, + "loss": 2.694, + "step": 4963 + }, + { + "epoch": 0.4006133483980308, + "grad_norm": 0.739923894405365, + "learning_rate": 0.00017193885385709409, + "loss": 2.7125, + "step": 4964 + }, + { + "epoch": 0.40069405213461384, + "grad_norm": 0.7506374716758728, + "learning_rate": 0.00017192788725683652, + "loss": 2.627, + "step": 4965 + }, + { + "epoch": 0.4007747558711968, + "grad_norm": 0.6591607928276062, + "learning_rate": 0.00017191691886394802, + "loss": 2.6723, + "step": 4966 + }, + { + "epoch": 0.40085545960777985, + "grad_norm": 0.7748788595199585, + "learning_rate": 0.00017190594867870192, + "loss": 2.6486, + "step": 4967 + }, + { + "epoch": 0.40093616334436283, + "grad_norm": 0.7518232464790344, + "learning_rate": 0.0001718949767013716, + "loss": 2.6879, + "step": 4968 + }, + { + "epoch": 0.40101686708094586, + "grad_norm": 0.7360039949417114, + "learning_rate": 0.00017188400293223052, + "loss": 2.6506, + "step": 4969 + }, + { + "epoch": 0.40109757081752884, + "grad_norm": 0.7217130064964294, + "learning_rate": 0.0001718730273715522, + "loss": 2.6263, + "step": 4970 + }, + { + "epoch": 0.40117827455411187, + "grad_norm": 0.7246078252792358, + "learning_rate": 0.00017186205001961015, + "loss": 2.6222, + "step": 4971 + }, + { + "epoch": 0.40125897829069485, + "grad_norm": 0.7566879391670227, + "learning_rate": 0.00017185107087667794, + "loss": 2.7003, + "step": 4972 + }, + { + "epoch": 0.4013396820272779, + "grad_norm": 0.7881271243095398, + "learning_rate": 0.00017184008994302924, + "loss": 2.6463, + "step": 4973 + }, + { + "epoch": 0.40142038576386085, + "grad_norm": 0.7307420372962952, + "learning_rate": 0.00017182910721893775, + "loss": 2.667, + "step": 4974 + }, + { + "epoch": 0.4015010895004439, + "grad_norm": 0.7088132500648499, + "learning_rate": 0.00017181812270467708, + "loss": 2.6073, + "step": 4975 + }, + { + "epoch": 0.40158179323702686, + "grad_norm": 0.7839647531509399, + "learning_rate": 0.0001718071364005211, + "loss": 2.6594, + "step": 4976 + }, + { + "epoch": 0.4016624969736099, + "grad_norm": 0.7472013235092163, + "learning_rate": 0.00017179614830674353, + "loss": 2.737, + "step": 4977 + }, + { + "epoch": 0.40174320071019287, + "grad_norm": 0.7241616249084473, + "learning_rate": 0.0001717851584236183, + "loss": 2.6615, + "step": 4978 + }, + { + "epoch": 0.4018239044467759, + "grad_norm": 0.7918941378593445, + "learning_rate": 0.00017177416675141929, + "loss": 2.6774, + "step": 4979 + }, + { + "epoch": 0.4019046081833589, + "grad_norm": 0.801003098487854, + "learning_rate": 0.00017176317329042039, + "loss": 2.6749, + "step": 4980 + }, + { + "epoch": 0.4019853119199419, + "grad_norm": 0.7556802034378052, + "learning_rate": 0.00017175217804089564, + "loss": 2.6197, + "step": 4981 + }, + { + "epoch": 0.4020660156565249, + "grad_norm": 0.7539604902267456, + "learning_rate": 0.00017174118100311904, + "loss": 2.6222, + "step": 4982 + }, + { + "epoch": 0.4021467193931079, + "grad_norm": 0.741436243057251, + "learning_rate": 0.0001717301821773647, + "loss": 2.6471, + "step": 4983 + }, + { + "epoch": 0.4022274231296909, + "grad_norm": 0.7449339628219604, + "learning_rate": 0.0001717191815639067, + "loss": 2.6448, + "step": 4984 + }, + { + "epoch": 0.40230812686627393, + "grad_norm": 0.7771497964859009, + "learning_rate": 0.0001717081791630192, + "loss": 2.673, + "step": 4985 + }, + { + "epoch": 0.4023888306028569, + "grad_norm": 0.6916669607162476, + "learning_rate": 0.00017169717497497646, + "loss": 2.6025, + "step": 4986 + }, + { + "epoch": 0.40246953433943994, + "grad_norm": 0.7373276948928833, + "learning_rate": 0.0001716861690000527, + "loss": 2.6783, + "step": 4987 + }, + { + "epoch": 0.4025502380760229, + "grad_norm": 0.7756158709526062, + "learning_rate": 0.0001716751612385222, + "loss": 2.7296, + "step": 4988 + }, + { + "epoch": 0.40263094181260595, + "grad_norm": 0.7725681066513062, + "learning_rate": 0.00017166415169065933, + "loss": 2.7169, + "step": 4989 + }, + { + "epoch": 0.4027116455491889, + "grad_norm": 0.7165024280548096, + "learning_rate": 0.00017165314035673846, + "loss": 2.677, + "step": 4990 + }, + { + "epoch": 0.40279234928577196, + "grad_norm": 0.8888981938362122, + "learning_rate": 0.00017164212723703404, + "loss": 2.7694, + "step": 4991 + }, + { + "epoch": 0.40287305302235493, + "grad_norm": 0.7439224720001221, + "learning_rate": 0.00017163111233182052, + "loss": 2.674, + "step": 4992 + }, + { + "epoch": 0.40295375675893796, + "grad_norm": 0.6948431730270386, + "learning_rate": 0.00017162009564137244, + "loss": 2.6595, + "step": 4993 + }, + { + "epoch": 0.40303446049552094, + "grad_norm": 0.7274380922317505, + "learning_rate": 0.00017160907716596438, + "loss": 2.649, + "step": 4994 + }, + { + "epoch": 0.403115164232104, + "grad_norm": 0.7127148509025574, + "learning_rate": 0.0001715980569058709, + "loss": 2.6883, + "step": 4995 + }, + { + "epoch": 0.40319586796868695, + "grad_norm": 0.7129155993461609, + "learning_rate": 0.00017158703486136668, + "loss": 2.6516, + "step": 4996 + }, + { + "epoch": 0.40327657170527, + "grad_norm": 0.7848126292228699, + "learning_rate": 0.00017157601103272646, + "loss": 2.6778, + "step": 4997 + }, + { + "epoch": 0.40335727544185296, + "grad_norm": 0.752268373966217, + "learning_rate": 0.0001715649854202249, + "loss": 2.7228, + "step": 4998 + }, + { + "epoch": 0.40343797917843593, + "grad_norm": 0.7750338912010193, + "learning_rate": 0.00017155395802413684, + "loss": 2.6338, + "step": 4999 + }, + { + "epoch": 0.40351868291501897, + "grad_norm": 0.7165457010269165, + "learning_rate": 0.00017154292884473713, + "loss": 2.6195, + "step": 5000 + }, + { + "epoch": 0.40351868291501897, + "eval_loss": 2.585501194000244, + "eval_runtime": 901.8519, + "eval_samples_per_second": 2.905, + "eval_steps_per_second": 0.485, + "step": 5000 + }, + { + "epoch": 0.40359938665160194, + "grad_norm": 0.8118943572044373, + "learning_rate": 0.00017153189788230062, + "loss": 2.6649, + "step": 5001 + }, + { + "epoch": 0.403680090388185, + "grad_norm": 0.722984790802002, + "learning_rate": 0.00017152086513710221, + "loss": 2.6929, + "step": 5002 + }, + { + "epoch": 0.40376079412476795, + "grad_norm": 0.700690507888794, + "learning_rate": 0.00017150983060941686, + "loss": 2.6368, + "step": 5003 + }, + { + "epoch": 0.403841497861351, + "grad_norm": 0.7331504225730896, + "learning_rate": 0.00017149879429951965, + "loss": 2.6826, + "step": 5004 + }, + { + "epoch": 0.40392220159793396, + "grad_norm": 0.7312643527984619, + "learning_rate": 0.00017148775620768553, + "loss": 2.6279, + "step": 5005 + }, + { + "epoch": 0.404002905334517, + "grad_norm": 0.7488462924957275, + "learning_rate": 0.00017147671633418972, + "loss": 2.6711, + "step": 5006 + }, + { + "epoch": 0.40408360907109997, + "grad_norm": 0.8620340824127197, + "learning_rate": 0.00017146567467930725, + "loss": 2.6637, + "step": 5007 + }, + { + "epoch": 0.404164312807683, + "grad_norm": 0.683907151222229, + "learning_rate": 0.00017145463124331335, + "loss": 2.6331, + "step": 5008 + }, + { + "epoch": 0.404245016544266, + "grad_norm": 0.7389389276504517, + "learning_rate": 0.0001714435860264833, + "loss": 2.7232, + "step": 5009 + }, + { + "epoch": 0.404325720280849, + "grad_norm": 0.7456515431404114, + "learning_rate": 0.00017143253902909228, + "loss": 2.6363, + "step": 5010 + }, + { + "epoch": 0.404406424017432, + "grad_norm": 0.7044962644577026, + "learning_rate": 0.0001714214902514157, + "loss": 2.6672, + "step": 5011 + }, + { + "epoch": 0.404487127754015, + "grad_norm": 0.7410328984260559, + "learning_rate": 0.00017141043969372887, + "loss": 2.6059, + "step": 5012 + }, + { + "epoch": 0.404567831490598, + "grad_norm": 0.6697140336036682, + "learning_rate": 0.00017139938735630722, + "loss": 2.7151, + "step": 5013 + }, + { + "epoch": 0.404648535227181, + "grad_norm": 0.746675431728363, + "learning_rate": 0.00017138833323942617, + "loss": 2.6792, + "step": 5014 + }, + { + "epoch": 0.404729238963764, + "grad_norm": 0.7724997401237488, + "learning_rate": 0.00017137727734336129, + "loss": 2.6234, + "step": 5015 + }, + { + "epoch": 0.40480994270034704, + "grad_norm": 0.8014429211616516, + "learning_rate": 0.00017136621966838805, + "loss": 2.6795, + "step": 5016 + }, + { + "epoch": 0.40489064643693, + "grad_norm": 0.6900430917739868, + "learning_rate": 0.00017135516021478205, + "loss": 2.7127, + "step": 5017 + }, + { + "epoch": 0.40497135017351304, + "grad_norm": 0.6648666858673096, + "learning_rate": 0.00017134409898281896, + "loss": 2.6564, + "step": 5018 + }, + { + "epoch": 0.405052053910096, + "grad_norm": 0.7054181098937988, + "learning_rate": 0.00017133303597277442, + "loss": 2.6652, + "step": 5019 + }, + { + "epoch": 0.40513275764667905, + "grad_norm": 0.6847733855247498, + "learning_rate": 0.00017132197118492414, + "loss": 2.6997, + "step": 5020 + }, + { + "epoch": 0.40521346138326203, + "grad_norm": 0.7047749757766724, + "learning_rate": 0.00017131090461954392, + "loss": 2.6752, + "step": 5021 + }, + { + "epoch": 0.40529416511984506, + "grad_norm": 0.7549976706504822, + "learning_rate": 0.00017129983627690957, + "loss": 2.6736, + "step": 5022 + }, + { + "epoch": 0.40537486885642804, + "grad_norm": 0.7436367273330688, + "learning_rate": 0.00017128876615729686, + "loss": 2.7189, + "step": 5023 + }, + { + "epoch": 0.40545557259301107, + "grad_norm": 0.6515071988105774, + "learning_rate": 0.00017127769426098177, + "loss": 2.6422, + "step": 5024 + }, + { + "epoch": 0.40553627632959405, + "grad_norm": 0.6960858702659607, + "learning_rate": 0.00017126662058824024, + "loss": 2.6619, + "step": 5025 + }, + { + "epoch": 0.4056169800661771, + "grad_norm": 0.8075968623161316, + "learning_rate": 0.0001712555451393482, + "loss": 2.6678, + "step": 5026 + }, + { + "epoch": 0.40569768380276006, + "grad_norm": 0.6864624619483948, + "learning_rate": 0.00017124446791458176, + "loss": 2.6331, + "step": 5027 + }, + { + "epoch": 0.4057783875393431, + "grad_norm": 0.7218763828277588, + "learning_rate": 0.0001712333889142169, + "loss": 2.6316, + "step": 5028 + }, + { + "epoch": 0.40585909127592606, + "grad_norm": 0.7024715542793274, + "learning_rate": 0.0001712223081385298, + "loss": 2.623, + "step": 5029 + }, + { + "epoch": 0.4059397950125091, + "grad_norm": 0.6681575775146484, + "learning_rate": 0.0001712112255877966, + "loss": 2.6786, + "step": 5030 + }, + { + "epoch": 0.4060204987490921, + "grad_norm": 0.7249817848205566, + "learning_rate": 0.0001712001412622935, + "loss": 2.6179, + "step": 5031 + }, + { + "epoch": 0.4061012024856751, + "grad_norm": 0.7178316116333008, + "learning_rate": 0.00017118905516229677, + "loss": 2.696, + "step": 5032 + }, + { + "epoch": 0.4061819062222581, + "grad_norm": 0.7838767766952515, + "learning_rate": 0.0001711779672880827, + "loss": 2.6881, + "step": 5033 + }, + { + "epoch": 0.4062626099588411, + "grad_norm": 0.799937903881073, + "learning_rate": 0.0001711668776399276, + "loss": 2.7587, + "step": 5034 + }, + { + "epoch": 0.4063433136954241, + "grad_norm": 0.7622246146202087, + "learning_rate": 0.0001711557862181079, + "loss": 2.6621, + "step": 5035 + }, + { + "epoch": 0.4064240174320071, + "grad_norm": 0.7158814072608948, + "learning_rate": 0.00017114469302290003, + "loss": 2.6421, + "step": 5036 + }, + { + "epoch": 0.4065047211685901, + "grad_norm": 0.7913404107093811, + "learning_rate": 0.0001711335980545804, + "loss": 2.6323, + "step": 5037 + }, + { + "epoch": 0.40658542490517313, + "grad_norm": 0.718325138092041, + "learning_rate": 0.00017112250131342556, + "loss": 2.6171, + "step": 5038 + }, + { + "epoch": 0.4066661286417561, + "grad_norm": 0.7793646454811096, + "learning_rate": 0.0001711114027997121, + "loss": 2.7494, + "step": 5039 + }, + { + "epoch": 0.40674683237833914, + "grad_norm": 0.7774816155433655, + "learning_rate": 0.00017110030251371656, + "loss": 2.5534, + "step": 5040 + }, + { + "epoch": 0.4068275361149221, + "grad_norm": 0.8547549247741699, + "learning_rate": 0.00017108920045571564, + "loss": 2.7155, + "step": 5041 + }, + { + "epoch": 0.40690823985150515, + "grad_norm": 0.7685851454734802, + "learning_rate": 0.000171078096625986, + "loss": 2.6109, + "step": 5042 + }, + { + "epoch": 0.4069889435880881, + "grad_norm": 0.7953611016273499, + "learning_rate": 0.00017106699102480445, + "loss": 2.7034, + "step": 5043 + }, + { + "epoch": 0.40706964732467116, + "grad_norm": 0.7550730109214783, + "learning_rate": 0.00017105588365244764, + "loss": 2.7026, + "step": 5044 + }, + { + "epoch": 0.40715035106125413, + "grad_norm": 0.7036548256874084, + "learning_rate": 0.0001710447745091925, + "loss": 2.6246, + "step": 5045 + }, + { + "epoch": 0.40723105479783717, + "grad_norm": 0.7154512405395508, + "learning_rate": 0.00017103366359531586, + "loss": 2.6592, + "step": 5046 + }, + { + "epoch": 0.40731175853442014, + "grad_norm": 0.7773932218551636, + "learning_rate": 0.00017102255091109463, + "loss": 2.6458, + "step": 5047 + }, + { + "epoch": 0.4073924622710032, + "grad_norm": 0.7458996176719666, + "learning_rate": 0.0001710114364568058, + "loss": 2.643, + "step": 5048 + }, + { + "epoch": 0.40747316600758615, + "grad_norm": 0.7465376257896423, + "learning_rate": 0.00017100032023272633, + "loss": 2.6677, + "step": 5049 + }, + { + "epoch": 0.40755386974416913, + "grad_norm": 0.7340850830078125, + "learning_rate": 0.0001709892022391333, + "loss": 2.6372, + "step": 5050 + }, + { + "epoch": 0.40763457348075216, + "grad_norm": 0.7189164757728577, + "learning_rate": 0.00017097808247630377, + "loss": 2.6524, + "step": 5051 + }, + { + "epoch": 0.40771527721733514, + "grad_norm": 0.6954184174537659, + "learning_rate": 0.0001709669609445149, + "loss": 2.7383, + "step": 5052 + }, + { + "epoch": 0.40779598095391817, + "grad_norm": 0.736409604549408, + "learning_rate": 0.00017095583764404384, + "loss": 2.6424, + "step": 5053 + }, + { + "epoch": 0.40787668469050115, + "grad_norm": 0.6773545742034912, + "learning_rate": 0.0001709447125751678, + "loss": 2.6557, + "step": 5054 + }, + { + "epoch": 0.4079573884270842, + "grad_norm": 0.718748927116394, + "learning_rate": 0.00017093358573816412, + "loss": 2.6884, + "step": 5055 + }, + { + "epoch": 0.40803809216366715, + "grad_norm": 0.8276848793029785, + "learning_rate": 0.00017092245713331002, + "loss": 2.6642, + "step": 5056 + }, + { + "epoch": 0.4081187959002502, + "grad_norm": 0.7694761157035828, + "learning_rate": 0.00017091132676088294, + "loss": 2.644, + "step": 5057 + }, + { + "epoch": 0.40819949963683316, + "grad_norm": 0.766724705696106, + "learning_rate": 0.0001709001946211602, + "loss": 2.6918, + "step": 5058 + }, + { + "epoch": 0.4082802033734162, + "grad_norm": 0.7067074775695801, + "learning_rate": 0.00017088906071441927, + "loss": 2.7228, + "step": 5059 + }, + { + "epoch": 0.40836090710999917, + "grad_norm": 0.7216899991035461, + "learning_rate": 0.00017087792504093767, + "loss": 2.7068, + "step": 5060 + }, + { + "epoch": 0.4084416108465822, + "grad_norm": 0.6728984713554382, + "learning_rate": 0.00017086678760099287, + "loss": 2.686, + "step": 5061 + }, + { + "epoch": 0.4085223145831652, + "grad_norm": 0.7546882033348083, + "learning_rate": 0.0001708556483948625, + "loss": 2.6907, + "step": 5062 + }, + { + "epoch": 0.4086030183197482, + "grad_norm": 0.7471179962158203, + "learning_rate": 0.00017084450742282416, + "loss": 2.6857, + "step": 5063 + }, + { + "epoch": 0.4086837220563312, + "grad_norm": 0.7879743576049805, + "learning_rate": 0.00017083336468515548, + "loss": 2.7224, + "step": 5064 + }, + { + "epoch": 0.4087644257929142, + "grad_norm": 0.691343367099762, + "learning_rate": 0.00017082222018213422, + "loss": 2.6561, + "step": 5065 + }, + { + "epoch": 0.4088451295294972, + "grad_norm": 0.7497386336326599, + "learning_rate": 0.00017081107391403805, + "loss": 2.6317, + "step": 5066 + }, + { + "epoch": 0.40892583326608023, + "grad_norm": 0.6846269965171814, + "learning_rate": 0.00017079992588114485, + "loss": 2.6522, + "step": 5067 + }, + { + "epoch": 0.4090065370026632, + "grad_norm": 0.7312905192375183, + "learning_rate": 0.0001707887760837324, + "loss": 2.588, + "step": 5068 + }, + { + "epoch": 0.40908724073924624, + "grad_norm": 0.6966867446899414, + "learning_rate": 0.00017077762452207866, + "loss": 2.6316, + "step": 5069 + }, + { + "epoch": 0.4091679444758292, + "grad_norm": 0.6882073283195496, + "learning_rate": 0.00017076647119646147, + "loss": 2.6977, + "step": 5070 + }, + { + "epoch": 0.40924864821241225, + "grad_norm": 0.7392483949661255, + "learning_rate": 0.00017075531610715884, + "loss": 2.6768, + "step": 5071 + }, + { + "epoch": 0.4093293519489952, + "grad_norm": 0.7311073541641235, + "learning_rate": 0.00017074415925444876, + "loss": 2.6628, + "step": 5072 + }, + { + "epoch": 0.40941005568557826, + "grad_norm": 0.6769934296607971, + "learning_rate": 0.00017073300063860934, + "loss": 2.6438, + "step": 5073 + }, + { + "epoch": 0.40949075942216123, + "grad_norm": 0.736456573009491, + "learning_rate": 0.00017072184025991862, + "loss": 2.6151, + "step": 5074 + }, + { + "epoch": 0.40957146315874426, + "grad_norm": 0.7026283740997314, + "learning_rate": 0.00017071067811865476, + "loss": 2.6726, + "step": 5075 + }, + { + "epoch": 0.40965216689532724, + "grad_norm": 0.6825234293937683, + "learning_rate": 0.00017069951421509597, + "loss": 2.6795, + "step": 5076 + }, + { + "epoch": 0.4097328706319103, + "grad_norm": 0.7243828773498535, + "learning_rate": 0.0001706883485495205, + "loss": 2.687, + "step": 5077 + }, + { + "epoch": 0.40981357436849325, + "grad_norm": 0.7300469875335693, + "learning_rate": 0.00017067718112220658, + "loss": 2.6268, + "step": 5078 + }, + { + "epoch": 0.4098942781050763, + "grad_norm": 0.698095440864563, + "learning_rate": 0.00017066601193343255, + "loss": 2.6461, + "step": 5079 + }, + { + "epoch": 0.40997498184165926, + "grad_norm": 0.7318777441978455, + "learning_rate": 0.00017065484098347677, + "loss": 2.6817, + "step": 5080 + }, + { + "epoch": 0.4100556855782423, + "grad_norm": 0.7681582570075989, + "learning_rate": 0.00017064366827261772, + "loss": 2.7309, + "step": 5081 + }, + { + "epoch": 0.41013638931482527, + "grad_norm": 0.7690179944038391, + "learning_rate": 0.0001706324938011337, + "loss": 2.6292, + "step": 5082 + }, + { + "epoch": 0.4102170930514083, + "grad_norm": 0.6745284199714661, + "learning_rate": 0.00017062131756930338, + "loss": 2.7133, + "step": 5083 + }, + { + "epoch": 0.4102977967879913, + "grad_norm": 0.7524279952049255, + "learning_rate": 0.00017061013957740518, + "loss": 2.6237, + "step": 5084 + }, + { + "epoch": 0.4103785005245743, + "grad_norm": 0.7813692092895508, + "learning_rate": 0.00017059895982571773, + "loss": 2.6953, + "step": 5085 + }, + { + "epoch": 0.4104592042611573, + "grad_norm": 0.7128829956054688, + "learning_rate": 0.00017058777831451967, + "loss": 2.6771, + "step": 5086 + }, + { + "epoch": 0.4105399079977403, + "grad_norm": 0.7249834537506104, + "learning_rate": 0.00017057659504408963, + "loss": 2.6376, + "step": 5087 + }, + { + "epoch": 0.4106206117343233, + "grad_norm": 0.7742593288421631, + "learning_rate": 0.00017056541001470637, + "loss": 2.6227, + "step": 5088 + }, + { + "epoch": 0.4107013154709063, + "grad_norm": 0.6994228959083557, + "learning_rate": 0.00017055422322664863, + "loss": 2.6573, + "step": 5089 + }, + { + "epoch": 0.4107820192074893, + "grad_norm": 0.7144249081611633, + "learning_rate": 0.00017054303468019518, + "loss": 2.6602, + "step": 5090 + }, + { + "epoch": 0.41086272294407233, + "grad_norm": 0.7695099711418152, + "learning_rate": 0.00017053184437562497, + "loss": 2.6516, + "step": 5091 + }, + { + "epoch": 0.4109434266806553, + "grad_norm": 0.7610031962394714, + "learning_rate": 0.00017052065231321678, + "loss": 2.6963, + "step": 5092 + }, + { + "epoch": 0.41102413041723834, + "grad_norm": 0.7117859721183777, + "learning_rate": 0.0001705094584932496, + "loss": 2.6954, + "step": 5093 + }, + { + "epoch": 0.4111048341538213, + "grad_norm": 0.7891486287117004, + "learning_rate": 0.00017049826291600244, + "loss": 2.7265, + "step": 5094 + }, + { + "epoch": 0.41118553789040435, + "grad_norm": 0.7347370386123657, + "learning_rate": 0.00017048706558175423, + "loss": 2.658, + "step": 5095 + }, + { + "epoch": 0.41126624162698733, + "grad_norm": 0.7541289925575256, + "learning_rate": 0.00017047586649078414, + "loss": 2.6596, + "step": 5096 + }, + { + "epoch": 0.41134694536357036, + "grad_norm": 0.7471255660057068, + "learning_rate": 0.00017046466564337118, + "loss": 2.7008, + "step": 5097 + }, + { + "epoch": 0.41142764910015334, + "grad_norm": 0.7566937208175659, + "learning_rate": 0.00017045346303979457, + "loss": 2.7006, + "step": 5098 + }, + { + "epoch": 0.41150835283673637, + "grad_norm": 0.6991304159164429, + "learning_rate": 0.00017044225868033353, + "loss": 2.6846, + "step": 5099 + }, + { + "epoch": 0.41158905657331935, + "grad_norm": 0.7286314368247986, + "learning_rate": 0.00017043105256526724, + "loss": 2.6219, + "step": 5100 + }, + { + "epoch": 0.4116697603099023, + "grad_norm": 0.6953727006912231, + "learning_rate": 0.000170419844694875, + "loss": 2.6093, + "step": 5101 + }, + { + "epoch": 0.41175046404648535, + "grad_norm": 0.6942756772041321, + "learning_rate": 0.00017040863506943615, + "loss": 2.6399, + "step": 5102 + }, + { + "epoch": 0.41183116778306833, + "grad_norm": 0.7513531446456909, + "learning_rate": 0.00017039742368923005, + "loss": 2.6187, + "step": 5103 + }, + { + "epoch": 0.41191187151965136, + "grad_norm": 0.7530633211135864, + "learning_rate": 0.00017038621055453617, + "loss": 2.6124, + "step": 5104 + }, + { + "epoch": 0.41199257525623434, + "grad_norm": 0.7487555146217346, + "learning_rate": 0.00017037499566563392, + "loss": 2.6331, + "step": 5105 + }, + { + "epoch": 0.41207327899281737, + "grad_norm": 0.7641858458518982, + "learning_rate": 0.00017036377902280282, + "loss": 2.6875, + "step": 5106 + }, + { + "epoch": 0.41215398272940035, + "grad_norm": 0.6962767839431763, + "learning_rate": 0.0001703525606263224, + "loss": 2.6538, + "step": 5107 + }, + { + "epoch": 0.4122346864659834, + "grad_norm": 0.8183409571647644, + "learning_rate": 0.0001703413404764723, + "loss": 2.6204, + "step": 5108 + }, + { + "epoch": 0.41231539020256636, + "grad_norm": 0.7029808759689331, + "learning_rate": 0.00017033011857353207, + "loss": 2.6369, + "step": 5109 + }, + { + "epoch": 0.4123960939391494, + "grad_norm": 0.7171663045883179, + "learning_rate": 0.00017031889491778149, + "loss": 2.6211, + "step": 5110 + }, + { + "epoch": 0.41247679767573237, + "grad_norm": 0.7456090450286865, + "learning_rate": 0.0001703076695095002, + "loss": 2.6574, + "step": 5111 + }, + { + "epoch": 0.4125575014123154, + "grad_norm": 0.7468575239181519, + "learning_rate": 0.000170296442348968, + "loss": 2.598, + "step": 5112 + }, + { + "epoch": 0.4126382051488984, + "grad_norm": 0.7106603384017944, + "learning_rate": 0.0001702852134364647, + "loss": 2.6577, + "step": 5113 + }, + { + "epoch": 0.4127189088854814, + "grad_norm": 0.7788330912590027, + "learning_rate": 0.00017027398277227017, + "loss": 2.6797, + "step": 5114 + }, + { + "epoch": 0.4127996126220644, + "grad_norm": 0.7794120907783508, + "learning_rate": 0.00017026275035666427, + "loss": 2.5834, + "step": 5115 + }, + { + "epoch": 0.4128803163586474, + "grad_norm": 0.7270684838294983, + "learning_rate": 0.00017025151618992702, + "loss": 2.7153, + "step": 5116 + }, + { + "epoch": 0.4129610200952304, + "grad_norm": 0.8169006109237671, + "learning_rate": 0.00017024028027233827, + "loss": 2.6786, + "step": 5117 + }, + { + "epoch": 0.4130417238318134, + "grad_norm": 0.8053112626075745, + "learning_rate": 0.00017022904260417815, + "loss": 2.6456, + "step": 5118 + }, + { + "epoch": 0.4131224275683964, + "grad_norm": 0.7646365165710449, + "learning_rate": 0.0001702178031857267, + "loss": 2.6784, + "step": 5119 + }, + { + "epoch": 0.41320313130497943, + "grad_norm": 0.7878902554512024, + "learning_rate": 0.00017020656201726406, + "loss": 2.66, + "step": 5120 + }, + { + "epoch": 0.4132838350415624, + "grad_norm": 0.8602383732795715, + "learning_rate": 0.00017019531909907037, + "loss": 2.7018, + "step": 5121 + }, + { + "epoch": 0.41336453877814544, + "grad_norm": 0.801092267036438, + "learning_rate": 0.00017018407443142585, + "loss": 2.7728, + "step": 5122 + }, + { + "epoch": 0.4134452425147284, + "grad_norm": 0.7372604012489319, + "learning_rate": 0.00017017282801461074, + "loss": 2.6588, + "step": 5123 + }, + { + "epoch": 0.41352594625131145, + "grad_norm": 0.7553830146789551, + "learning_rate": 0.0001701615798489053, + "loss": 2.6844, + "step": 5124 + }, + { + "epoch": 0.4136066499878944, + "grad_norm": 0.7699872255325317, + "learning_rate": 0.0001701503299345899, + "loss": 2.6523, + "step": 5125 + }, + { + "epoch": 0.41368735372447746, + "grad_norm": 0.7087047696113586, + "learning_rate": 0.0001701390782719449, + "loss": 2.6785, + "step": 5126 + }, + { + "epoch": 0.41376805746106043, + "grad_norm": 0.7835792303085327, + "learning_rate": 0.0001701278248612507, + "loss": 2.7064, + "step": 5127 + }, + { + "epoch": 0.41384876119764347, + "grad_norm": 0.7833154201507568, + "learning_rate": 0.0001701165697027878, + "loss": 2.6552, + "step": 5128 + }, + { + "epoch": 0.41392946493422644, + "grad_norm": 0.8240615725517273, + "learning_rate": 0.0001701053127968367, + "loss": 2.7074, + "step": 5129 + }, + { + "epoch": 0.4140101686708095, + "grad_norm": 0.7612149119377136, + "learning_rate": 0.0001700940541436779, + "loss": 2.7484, + "step": 5130 + }, + { + "epoch": 0.41409087240739245, + "grad_norm": 0.7795391082763672, + "learning_rate": 0.00017008279374359212, + "loss": 2.6022, + "step": 5131 + }, + { + "epoch": 0.4141715761439755, + "grad_norm": 0.7714587450027466, + "learning_rate": 0.00017007153159685992, + "loss": 2.6529, + "step": 5132 + }, + { + "epoch": 0.41425227988055846, + "grad_norm": 0.7821317911148071, + "learning_rate": 0.00017006026770376194, + "loss": 2.6356, + "step": 5133 + }, + { + "epoch": 0.4143329836171415, + "grad_norm": 0.7300596833229065, + "learning_rate": 0.00017004900206457897, + "loss": 2.6552, + "step": 5134 + }, + { + "epoch": 0.41441368735372447, + "grad_norm": 0.780505359172821, + "learning_rate": 0.00017003773467959174, + "loss": 2.675, + "step": 5135 + }, + { + "epoch": 0.4144943910903075, + "grad_norm": 0.7107391357421875, + "learning_rate": 0.00017002646554908107, + "loss": 2.7096, + "step": 5136 + }, + { + "epoch": 0.4145750948268905, + "grad_norm": 0.7358834743499756, + "learning_rate": 0.0001700151946733279, + "loss": 2.6619, + "step": 5137 + }, + { + "epoch": 0.4146557985634735, + "grad_norm": 0.7573859095573425, + "learning_rate": 0.00017000392205261298, + "loss": 2.6234, + "step": 5138 + }, + { + "epoch": 0.4147365023000565, + "grad_norm": 0.7032024264335632, + "learning_rate": 0.00016999264768721738, + "loss": 2.6096, + "step": 5139 + }, + { + "epoch": 0.4148172060366395, + "grad_norm": 0.743813693523407, + "learning_rate": 0.00016998137157742203, + "loss": 2.6782, + "step": 5140 + }, + { + "epoch": 0.4148979097732225, + "grad_norm": 0.8861347436904907, + "learning_rate": 0.00016997009372350793, + "loss": 2.6645, + "step": 5141 + }, + { + "epoch": 0.4149786135098055, + "grad_norm": 0.7598684430122375, + "learning_rate": 0.00016995881412575623, + "loss": 2.649, + "step": 5142 + }, + { + "epoch": 0.4150593172463885, + "grad_norm": 0.7535565495491028, + "learning_rate": 0.00016994753278444798, + "loss": 2.6449, + "step": 5143 + }, + { + "epoch": 0.41514002098297154, + "grad_norm": 0.7073138356208801, + "learning_rate": 0.0001699362496998644, + "loss": 2.6253, + "step": 5144 + }, + { + "epoch": 0.4152207247195545, + "grad_norm": 0.7161526679992676, + "learning_rate": 0.00016992496487228662, + "loss": 2.6623, + "step": 5145 + }, + { + "epoch": 0.41530142845613754, + "grad_norm": 0.8284714818000793, + "learning_rate": 0.00016991367830199595, + "loss": 2.7363, + "step": 5146 + }, + { + "epoch": 0.4153821321927205, + "grad_norm": 0.7127673625946045, + "learning_rate": 0.0001699023899892737, + "loss": 2.6274, + "step": 5147 + }, + { + "epoch": 0.41546283592930355, + "grad_norm": 0.7496370673179626, + "learning_rate": 0.00016989109993440112, + "loss": 2.6364, + "step": 5148 + }, + { + "epoch": 0.41554353966588653, + "grad_norm": 0.7616143822669983, + "learning_rate": 0.00016987980813765963, + "loss": 2.7225, + "step": 5149 + }, + { + "epoch": 0.41562424340246956, + "grad_norm": 0.6935909986495972, + "learning_rate": 0.00016986851459933067, + "loss": 2.6109, + "step": 5150 + }, + { + "epoch": 0.41570494713905254, + "grad_norm": 0.721023678779602, + "learning_rate": 0.00016985721931969566, + "loss": 2.6993, + "step": 5151 + }, + { + "epoch": 0.4157856508756355, + "grad_norm": 0.8216699361801147, + "learning_rate": 0.00016984592229903617, + "loss": 2.6512, + "step": 5152 + }, + { + "epoch": 0.41586635461221855, + "grad_norm": 0.7425234913825989, + "learning_rate": 0.00016983462353763372, + "loss": 2.5903, + "step": 5153 + }, + { + "epoch": 0.4159470583488015, + "grad_norm": 0.7292542457580566, + "learning_rate": 0.00016982332303576986, + "loss": 2.692, + "step": 5154 + }, + { + "epoch": 0.41602776208538456, + "grad_norm": 0.7466831803321838, + "learning_rate": 0.0001698120207937263, + "loss": 2.7145, + "step": 5155 + }, + { + "epoch": 0.41610846582196753, + "grad_norm": 0.7271949648857117, + "learning_rate": 0.00016980071681178471, + "loss": 2.655, + "step": 5156 + }, + { + "epoch": 0.41618916955855056, + "grad_norm": 0.7505547404289246, + "learning_rate": 0.00016978941109022677, + "loss": 2.7167, + "step": 5157 + }, + { + "epoch": 0.41626987329513354, + "grad_norm": 0.7307172417640686, + "learning_rate": 0.00016977810362933427, + "loss": 2.6735, + "step": 5158 + }, + { + "epoch": 0.4163505770317166, + "grad_norm": 0.7839170098304749, + "learning_rate": 0.00016976679442938904, + "loss": 2.6818, + "step": 5159 + }, + { + "epoch": 0.41643128076829955, + "grad_norm": 0.7131803631782532, + "learning_rate": 0.00016975548349067293, + "loss": 2.6921, + "step": 5160 + }, + { + "epoch": 0.4165119845048826, + "grad_norm": 0.8129798173904419, + "learning_rate": 0.0001697441708134678, + "loss": 2.6682, + "step": 5161 + }, + { + "epoch": 0.41659268824146556, + "grad_norm": 0.7634746432304382, + "learning_rate": 0.00016973285639805563, + "loss": 2.6684, + "step": 5162 + }, + { + "epoch": 0.4166733919780486, + "grad_norm": 0.7367348074913025, + "learning_rate": 0.0001697215402447184, + "loss": 2.6424, + "step": 5163 + }, + { + "epoch": 0.41675409571463157, + "grad_norm": 0.7235338687896729, + "learning_rate": 0.00016971022235373815, + "loss": 2.6817, + "step": 5164 + }, + { + "epoch": 0.4168347994512146, + "grad_norm": 0.7764291763305664, + "learning_rate": 0.0001696989027253969, + "loss": 2.6477, + "step": 5165 + }, + { + "epoch": 0.4169155031877976, + "grad_norm": 0.8207562565803528, + "learning_rate": 0.00016968758135997683, + "loss": 2.6408, + "step": 5166 + }, + { + "epoch": 0.4169962069243806, + "grad_norm": 0.7291484475135803, + "learning_rate": 0.00016967625825776005, + "loss": 2.6233, + "step": 5167 + }, + { + "epoch": 0.4170769106609636, + "grad_norm": 0.7060603499412537, + "learning_rate": 0.0001696649334190288, + "loss": 2.6204, + "step": 5168 + }, + { + "epoch": 0.4171576143975466, + "grad_norm": 0.7058241963386536, + "learning_rate": 0.00016965360684406528, + "loss": 2.6212, + "step": 5169 + }, + { + "epoch": 0.4172383181341296, + "grad_norm": 0.8248410224914551, + "learning_rate": 0.00016964227853315177, + "loss": 2.6688, + "step": 5170 + }, + { + "epoch": 0.4173190218707126, + "grad_norm": 0.7287606596946716, + "learning_rate": 0.0001696309484865707, + "loss": 2.6201, + "step": 5171 + }, + { + "epoch": 0.4173997256072956, + "grad_norm": 0.7214288115501404, + "learning_rate": 0.00016961961670460433, + "loss": 2.682, + "step": 5172 + }, + { + "epoch": 0.41748042934387863, + "grad_norm": 0.7133594155311584, + "learning_rate": 0.00016960828318753516, + "loss": 2.7167, + "step": 5173 + }, + { + "epoch": 0.4175611330804616, + "grad_norm": 0.6935842633247375, + "learning_rate": 0.00016959694793564558, + "loss": 2.6134, + "step": 5174 + }, + { + "epoch": 0.41764183681704464, + "grad_norm": 0.6863382458686829, + "learning_rate": 0.00016958561094921815, + "loss": 2.6396, + "step": 5175 + }, + { + "epoch": 0.4177225405536276, + "grad_norm": 0.7659433484077454, + "learning_rate": 0.0001695742722285354, + "loss": 2.6926, + "step": 5176 + }, + { + "epoch": 0.41780324429021065, + "grad_norm": 0.6997129917144775, + "learning_rate": 0.00016956293177387992, + "loss": 2.6983, + "step": 5177 + }, + { + "epoch": 0.41788394802679363, + "grad_norm": 0.6784526705741882, + "learning_rate": 0.00016955158958553433, + "loss": 2.6961, + "step": 5178 + }, + { + "epoch": 0.41796465176337666, + "grad_norm": 0.8227884769439697, + "learning_rate": 0.00016954024566378132, + "loss": 2.7008, + "step": 5179 + }, + { + "epoch": 0.41804535549995964, + "grad_norm": 0.7733054757118225, + "learning_rate": 0.0001695289000089036, + "loss": 2.6615, + "step": 5180 + }, + { + "epoch": 0.41812605923654267, + "grad_norm": 0.7077545523643494, + "learning_rate": 0.00016951755262118394, + "loss": 2.6388, + "step": 5181 + }, + { + "epoch": 0.41820676297312565, + "grad_norm": 0.7962050437927246, + "learning_rate": 0.00016950620350090513, + "loss": 2.7063, + "step": 5182 + }, + { + "epoch": 0.4182874667097087, + "grad_norm": 0.6950554847717285, + "learning_rate": 0.00016949485264835005, + "loss": 2.7076, + "step": 5183 + }, + { + "epoch": 0.41836817044629165, + "grad_norm": 0.8546960949897766, + "learning_rate": 0.00016948350006380162, + "loss": 2.6533, + "step": 5184 + }, + { + "epoch": 0.4184488741828747, + "grad_norm": 0.7469324469566345, + "learning_rate": 0.00016947214574754272, + "loss": 2.5884, + "step": 5185 + }, + { + "epoch": 0.41852957791945766, + "grad_norm": 0.7125554084777832, + "learning_rate": 0.0001694607896998563, + "loss": 2.6448, + "step": 5186 + }, + { + "epoch": 0.4186102816560407, + "grad_norm": 0.6998329758644104, + "learning_rate": 0.00016944943192102549, + "loss": 2.5569, + "step": 5187 + }, + { + "epoch": 0.41869098539262367, + "grad_norm": 0.9046749472618103, + "learning_rate": 0.00016943807241133328, + "loss": 2.7701, + "step": 5188 + }, + { + "epoch": 0.4187716891292067, + "grad_norm": 0.7842074036598206, + "learning_rate": 0.00016942671117106274, + "loss": 2.7124, + "step": 5189 + }, + { + "epoch": 0.4188523928657897, + "grad_norm": 0.7625874280929565, + "learning_rate": 0.00016941534820049713, + "loss": 2.6626, + "step": 5190 + }, + { + "epoch": 0.4189330966023727, + "grad_norm": 0.7006461024284363, + "learning_rate": 0.00016940398349991957, + "loss": 2.6283, + "step": 5191 + }, + { + "epoch": 0.4190138003389557, + "grad_norm": 0.7081875205039978, + "learning_rate": 0.00016939261706961332, + "loss": 2.69, + "step": 5192 + }, + { + "epoch": 0.4190945040755387, + "grad_norm": 0.7554503083229065, + "learning_rate": 0.00016938124890986166, + "loss": 2.641, + "step": 5193 + }, + { + "epoch": 0.4191752078121217, + "grad_norm": 0.7478535175323486, + "learning_rate": 0.0001693698790209479, + "loss": 2.7035, + "step": 5194 + }, + { + "epoch": 0.41925591154870473, + "grad_norm": 0.7323064208030701, + "learning_rate": 0.00016935850740315545, + "loss": 2.6713, + "step": 5195 + }, + { + "epoch": 0.4193366152852877, + "grad_norm": 0.8011505007743835, + "learning_rate": 0.00016934713405676764, + "loss": 2.6413, + "step": 5196 + }, + { + "epoch": 0.41941731902187074, + "grad_norm": 0.768851637840271, + "learning_rate": 0.00016933575898206804, + "loss": 2.6147, + "step": 5197 + }, + { + "epoch": 0.4194980227584537, + "grad_norm": 0.7255160808563232, + "learning_rate": 0.00016932438217934006, + "loss": 2.6093, + "step": 5198 + }, + { + "epoch": 0.41957872649503675, + "grad_norm": 0.7431769967079163, + "learning_rate": 0.00016931300364886722, + "loss": 2.6658, + "step": 5199 + }, + { + "epoch": 0.4196594302316197, + "grad_norm": 0.7532122731208801, + "learning_rate": 0.00016930162339093318, + "loss": 2.6371, + "step": 5200 + }, + { + "epoch": 0.41974013396820276, + "grad_norm": 0.7253943681716919, + "learning_rate": 0.00016929024140582152, + "loss": 2.6365, + "step": 5201 + }, + { + "epoch": 0.41982083770478573, + "grad_norm": 0.7323265075683594, + "learning_rate": 0.00016927885769381593, + "loss": 2.7096, + "step": 5202 + }, + { + "epoch": 0.4199015414413687, + "grad_norm": 0.7340009808540344, + "learning_rate": 0.00016926747225520008, + "loss": 2.6983, + "step": 5203 + }, + { + "epoch": 0.41998224517795174, + "grad_norm": 0.838706374168396, + "learning_rate": 0.00016925608509025776, + "loss": 2.7098, + "step": 5204 + }, + { + "epoch": 0.4200629489145347, + "grad_norm": 0.7320838570594788, + "learning_rate": 0.0001692446961992728, + "loss": 2.6767, + "step": 5205 + }, + { + "epoch": 0.42014365265111775, + "grad_norm": 0.7275335192680359, + "learning_rate": 0.00016923330558252898, + "loss": 2.6754, + "step": 5206 + }, + { + "epoch": 0.4202243563877007, + "grad_norm": 0.7572353482246399, + "learning_rate": 0.00016922191324031017, + "loss": 2.7076, + "step": 5207 + }, + { + "epoch": 0.42030506012428376, + "grad_norm": 0.7991098165512085, + "learning_rate": 0.0001692105191729004, + "loss": 2.7281, + "step": 5208 + }, + { + "epoch": 0.42038576386086673, + "grad_norm": 0.70769202709198, + "learning_rate": 0.00016919912338058356, + "loss": 2.684, + "step": 5209 + }, + { + "epoch": 0.42046646759744977, + "grad_norm": 0.6895349621772766, + "learning_rate": 0.0001691877258636436, + "loss": 2.6723, + "step": 5210 + }, + { + "epoch": 0.42054717133403274, + "grad_norm": 0.7368944883346558, + "learning_rate": 0.00016917632662236476, + "loss": 2.601, + "step": 5211 + }, + { + "epoch": 0.4206278750706158, + "grad_norm": 0.7122060060501099, + "learning_rate": 0.00016916492565703097, + "loss": 2.703, + "step": 5212 + }, + { + "epoch": 0.42070857880719875, + "grad_norm": 0.735251784324646, + "learning_rate": 0.00016915352296792646, + "loss": 2.7715, + "step": 5213 + }, + { + "epoch": 0.4207892825437818, + "grad_norm": 0.7686039805412292, + "learning_rate": 0.00016914211855533536, + "loss": 2.6935, + "step": 5214 + }, + { + "epoch": 0.42086998628036476, + "grad_norm": 0.8457472920417786, + "learning_rate": 0.00016913071241954195, + "loss": 2.6535, + "step": 5215 + }, + { + "epoch": 0.4209506900169478, + "grad_norm": 0.6913465261459351, + "learning_rate": 0.00016911930456083046, + "loss": 2.6453, + "step": 5216 + }, + { + "epoch": 0.42103139375353077, + "grad_norm": 0.6939878463745117, + "learning_rate": 0.00016910789497948524, + "loss": 2.6483, + "step": 5217 + }, + { + "epoch": 0.4211120974901138, + "grad_norm": 0.7240888476371765, + "learning_rate": 0.00016909648367579062, + "loss": 2.6649, + "step": 5218 + }, + { + "epoch": 0.4211928012266968, + "grad_norm": 0.7570972442626953, + "learning_rate": 0.00016908507065003102, + "loss": 2.6633, + "step": 5219 + }, + { + "epoch": 0.4212735049632798, + "grad_norm": 0.72161465883255, + "learning_rate": 0.00016907365590249082, + "loss": 2.6999, + "step": 5220 + }, + { + "epoch": 0.4213542086998628, + "grad_norm": 0.7818038463592529, + "learning_rate": 0.00016906223943345458, + "loss": 2.6478, + "step": 5221 + }, + { + "epoch": 0.4214349124364458, + "grad_norm": 0.7292464971542358, + "learning_rate": 0.00016905082124320684, + "loss": 2.6725, + "step": 5222 + }, + { + "epoch": 0.4215156161730288, + "grad_norm": 0.7612937092781067, + "learning_rate": 0.0001690394013320321, + "loss": 2.6474, + "step": 5223 + }, + { + "epoch": 0.4215963199096118, + "grad_norm": 0.7325131297111511, + "learning_rate": 0.000169027979700215, + "loss": 2.6525, + "step": 5224 + }, + { + "epoch": 0.4216770236461948, + "grad_norm": 0.7736644148826599, + "learning_rate": 0.00016901655634804022, + "loss": 2.662, + "step": 5225 + }, + { + "epoch": 0.42175772738277784, + "grad_norm": 0.758522629737854, + "learning_rate": 0.00016900513127579244, + "loss": 2.6558, + "step": 5226 + }, + { + "epoch": 0.4218384311193608, + "grad_norm": 0.7559491991996765, + "learning_rate": 0.00016899370448375642, + "loss": 2.7361, + "step": 5227 + }, + { + "epoch": 0.42191913485594384, + "grad_norm": 0.7791146039962769, + "learning_rate": 0.00016898227597221692, + "loss": 2.6739, + "step": 5228 + }, + { + "epoch": 0.4219998385925268, + "grad_norm": 0.7280717492103577, + "learning_rate": 0.00016897084574145878, + "loss": 2.6316, + "step": 5229 + }, + { + "epoch": 0.42208054232910985, + "grad_norm": 0.7455596327781677, + "learning_rate": 0.0001689594137917669, + "loss": 2.7244, + "step": 5230 + }, + { + "epoch": 0.42216124606569283, + "grad_norm": 0.7965813875198364, + "learning_rate": 0.00016894798012342613, + "loss": 2.6757, + "step": 5231 + }, + { + "epoch": 0.42224194980227586, + "grad_norm": 0.6740596294403076, + "learning_rate": 0.00016893654473672148, + "loss": 2.631, + "step": 5232 + }, + { + "epoch": 0.42232265353885884, + "grad_norm": 0.695105254650116, + "learning_rate": 0.00016892510763193795, + "loss": 2.6563, + "step": 5233 + }, + { + "epoch": 0.42240335727544187, + "grad_norm": 0.7623865008354187, + "learning_rate": 0.00016891366880936051, + "loss": 2.6738, + "step": 5234 + }, + { + "epoch": 0.42248406101202485, + "grad_norm": 0.7545912265777588, + "learning_rate": 0.00016890222826927435, + "loss": 2.6949, + "step": 5235 + }, + { + "epoch": 0.4225647647486079, + "grad_norm": 0.7280749678611755, + "learning_rate": 0.00016889078601196452, + "loss": 2.6571, + "step": 5236 + }, + { + "epoch": 0.42264546848519086, + "grad_norm": 0.6624523401260376, + "learning_rate": 0.00016887934203771625, + "loss": 2.6854, + "step": 5237 + }, + { + "epoch": 0.4227261722217739, + "grad_norm": 0.7835487127304077, + "learning_rate": 0.0001688678963468147, + "loss": 2.6437, + "step": 5238 + }, + { + "epoch": 0.42280687595835686, + "grad_norm": 0.7384940981864929, + "learning_rate": 0.00016885644893954518, + "loss": 2.6584, + "step": 5239 + }, + { + "epoch": 0.4228875796949399, + "grad_norm": 0.8227531313896179, + "learning_rate": 0.00016884499981619292, + "loss": 2.673, + "step": 5240 + }, + { + "epoch": 0.4229682834315229, + "grad_norm": 0.7442220449447632, + "learning_rate": 0.00016883354897704334, + "loss": 2.6729, + "step": 5241 + }, + { + "epoch": 0.4230489871681059, + "grad_norm": 0.7182636857032776, + "learning_rate": 0.00016882209642238175, + "loss": 2.6833, + "step": 5242 + }, + { + "epoch": 0.4231296909046889, + "grad_norm": 0.7061870098114014, + "learning_rate": 0.00016881064215249362, + "loss": 2.6696, + "step": 5243 + }, + { + "epoch": 0.4232103946412719, + "grad_norm": 0.6792885065078735, + "learning_rate": 0.00016879918616766445, + "loss": 2.6805, + "step": 5244 + }, + { + "epoch": 0.4232910983778549, + "grad_norm": 0.7439807057380676, + "learning_rate": 0.00016878772846817968, + "loss": 2.6522, + "step": 5245 + }, + { + "epoch": 0.4233718021144379, + "grad_norm": 0.7078969478607178, + "learning_rate": 0.00016877626905432492, + "loss": 2.6549, + "step": 5246 + }, + { + "epoch": 0.4234525058510209, + "grad_norm": 0.7103868126869202, + "learning_rate": 0.00016876480792638577, + "loss": 2.6812, + "step": 5247 + }, + { + "epoch": 0.42353320958760393, + "grad_norm": 0.7224452495574951, + "learning_rate": 0.00016875334508464782, + "loss": 2.6657, + "step": 5248 + }, + { + "epoch": 0.4236139133241869, + "grad_norm": 0.6885106563568115, + "learning_rate": 0.00016874188052939682, + "loss": 2.6421, + "step": 5249 + }, + { + "epoch": 0.42369461706076994, + "grad_norm": 0.6736720204353333, + "learning_rate": 0.00016873041426091845, + "loss": 2.6717, + "step": 5250 + }, + { + "epoch": 0.4237753207973529, + "grad_norm": 0.7597963809967041, + "learning_rate": 0.00016871894627949846, + "loss": 2.6231, + "step": 5251 + }, + { + "epoch": 0.42385602453393595, + "grad_norm": 0.8295687437057495, + "learning_rate": 0.00016870747658542275, + "loss": 2.6631, + "step": 5252 + }, + { + "epoch": 0.4239367282705189, + "grad_norm": 0.6750548481941223, + "learning_rate": 0.0001686960051789771, + "loss": 2.6997, + "step": 5253 + }, + { + "epoch": 0.4240174320071019, + "grad_norm": 0.7229160666465759, + "learning_rate": 0.0001686845320604474, + "loss": 2.6525, + "step": 5254 + }, + { + "epoch": 0.42409813574368493, + "grad_norm": 0.8318623900413513, + "learning_rate": 0.00016867305723011967, + "loss": 2.7774, + "step": 5255 + }, + { + "epoch": 0.4241788394802679, + "grad_norm": 0.8391026854515076, + "learning_rate": 0.00016866158068827979, + "loss": 2.6712, + "step": 5256 + }, + { + "epoch": 0.42425954321685094, + "grad_norm": 0.691146969795227, + "learning_rate": 0.00016865010243521388, + "loss": 2.6459, + "step": 5257 + }, + { + "epoch": 0.4243402469534339, + "grad_norm": 0.7223602533340454, + "learning_rate": 0.00016863862247120794, + "loss": 2.6675, + "step": 5258 + }, + { + "epoch": 0.42442095069001695, + "grad_norm": 0.8400631546974182, + "learning_rate": 0.0001686271407965481, + "loss": 2.6978, + "step": 5259 + }, + { + "epoch": 0.42450165442659993, + "grad_norm": 0.737684965133667, + "learning_rate": 0.0001686156574115205, + "loss": 2.6992, + "step": 5260 + }, + { + "epoch": 0.42458235816318296, + "grad_norm": 0.7511717677116394, + "learning_rate": 0.0001686041723164114, + "loss": 2.6947, + "step": 5261 + }, + { + "epoch": 0.42466306189976594, + "grad_norm": 0.7434492707252502, + "learning_rate": 0.00016859268551150698, + "loss": 2.7353, + "step": 5262 + }, + { + "epoch": 0.42474376563634897, + "grad_norm": 0.746609628200531, + "learning_rate": 0.00016858119699709353, + "loss": 2.7519, + "step": 5263 + }, + { + "epoch": 0.42482446937293195, + "grad_norm": 0.7709949612617493, + "learning_rate": 0.0001685697067734574, + "loss": 2.7018, + "step": 5264 + }, + { + "epoch": 0.424905173109515, + "grad_norm": 0.7496309876441956, + "learning_rate": 0.00016855821484088488, + "loss": 2.6761, + "step": 5265 + }, + { + "epoch": 0.42498587684609795, + "grad_norm": 0.7071252465248108, + "learning_rate": 0.00016854672119966243, + "loss": 2.6762, + "step": 5266 + }, + { + "epoch": 0.425066580582681, + "grad_norm": 0.7991356253623962, + "learning_rate": 0.00016853522585007658, + "loss": 2.6134, + "step": 5267 + }, + { + "epoch": 0.42514728431926396, + "grad_norm": 0.8194605708122253, + "learning_rate": 0.0001685237287924137, + "loss": 2.6601, + "step": 5268 + }, + { + "epoch": 0.425227988055847, + "grad_norm": 0.7451688051223755, + "learning_rate": 0.00016851223002696037, + "loss": 2.6631, + "step": 5269 + }, + { + "epoch": 0.42530869179242997, + "grad_norm": 0.7220263481140137, + "learning_rate": 0.0001685007295540032, + "loss": 2.6631, + "step": 5270 + }, + { + "epoch": 0.425389395529013, + "grad_norm": 0.7268854975700378, + "learning_rate": 0.00016848922737382874, + "loss": 2.6752, + "step": 5271 + }, + { + "epoch": 0.425470099265596, + "grad_norm": 0.8841642141342163, + "learning_rate": 0.00016847772348672378, + "loss": 2.7153, + "step": 5272 + }, + { + "epoch": 0.425550803002179, + "grad_norm": 0.7725942134857178, + "learning_rate": 0.00016846621789297489, + "loss": 2.6726, + "step": 5273 + }, + { + "epoch": 0.425631506738762, + "grad_norm": 0.7179448008537292, + "learning_rate": 0.00016845471059286887, + "loss": 2.6659, + "step": 5274 + }, + { + "epoch": 0.425712210475345, + "grad_norm": 0.7630325555801392, + "learning_rate": 0.00016844320158669257, + "loss": 2.7133, + "step": 5275 + }, + { + "epoch": 0.425792914211928, + "grad_norm": 0.7349739670753479, + "learning_rate": 0.00016843169087473272, + "loss": 2.6397, + "step": 5276 + }, + { + "epoch": 0.42587361794851103, + "grad_norm": 0.7670298218727112, + "learning_rate": 0.00016842017845727626, + "loss": 2.6485, + "step": 5277 + }, + { + "epoch": 0.425954321685094, + "grad_norm": 0.692095160484314, + "learning_rate": 0.00016840866433461013, + "loss": 2.6058, + "step": 5278 + }, + { + "epoch": 0.42603502542167704, + "grad_norm": 0.6888624429702759, + "learning_rate": 0.00016839714850702125, + "loss": 2.5757, + "step": 5279 + }, + { + "epoch": 0.42611572915826, + "grad_norm": 0.6816484332084656, + "learning_rate": 0.00016838563097479664, + "loss": 2.6656, + "step": 5280 + }, + { + "epoch": 0.42619643289484305, + "grad_norm": 0.7778486609458923, + "learning_rate": 0.00016837411173822333, + "loss": 2.6738, + "step": 5281 + }, + { + "epoch": 0.426277136631426, + "grad_norm": 0.73436439037323, + "learning_rate": 0.00016836259079758845, + "loss": 2.6346, + "step": 5282 + }, + { + "epoch": 0.42635784036800906, + "grad_norm": 0.673528254032135, + "learning_rate": 0.00016835106815317908, + "loss": 2.6636, + "step": 5283 + }, + { + "epoch": 0.42643854410459203, + "grad_norm": 0.6892737150192261, + "learning_rate": 0.00016833954380528242, + "loss": 2.6723, + "step": 5284 + }, + { + "epoch": 0.42651924784117506, + "grad_norm": 0.7404607534408569, + "learning_rate": 0.00016832801775418571, + "loss": 2.6751, + "step": 5285 + }, + { + "epoch": 0.42659995157775804, + "grad_norm": 0.7040587663650513, + "learning_rate": 0.00016831649000017618, + "loss": 2.6079, + "step": 5286 + }, + { + "epoch": 0.4266806553143411, + "grad_norm": 0.7295164465904236, + "learning_rate": 0.00016830496054354112, + "loss": 2.5928, + "step": 5287 + }, + { + "epoch": 0.42676135905092405, + "grad_norm": 0.7269962430000305, + "learning_rate": 0.00016829342938456788, + "loss": 2.6648, + "step": 5288 + }, + { + "epoch": 0.4268420627875071, + "grad_norm": 0.7296550273895264, + "learning_rate": 0.0001682818965235439, + "loss": 2.6814, + "step": 5289 + }, + { + "epoch": 0.42692276652409006, + "grad_norm": 0.8376085758209229, + "learning_rate": 0.00016827036196075655, + "loss": 2.702, + "step": 5290 + }, + { + "epoch": 0.4270034702606731, + "grad_norm": 0.7461032271385193, + "learning_rate": 0.00016825882569649332, + "loss": 2.6959, + "step": 5291 + }, + { + "epoch": 0.42708417399725607, + "grad_norm": 0.7218661308288574, + "learning_rate": 0.00016824728773104171, + "loss": 2.7182, + "step": 5292 + }, + { + "epoch": 0.4271648777338391, + "grad_norm": 0.7012860774993896, + "learning_rate": 0.00016823574806468933, + "loss": 2.6989, + "step": 5293 + }, + { + "epoch": 0.4272455814704221, + "grad_norm": 0.7039482593536377, + "learning_rate": 0.0001682242066977237, + "loss": 2.6153, + "step": 5294 + }, + { + "epoch": 0.4273262852070051, + "grad_norm": 0.8783851861953735, + "learning_rate": 0.0001682126636304325, + "loss": 2.7174, + "step": 5295 + }, + { + "epoch": 0.4274069889435881, + "grad_norm": 0.7266566157341003, + "learning_rate": 0.00016820111886310343, + "loss": 2.6571, + "step": 5296 + }, + { + "epoch": 0.4274876926801711, + "grad_norm": 0.7512212991714478, + "learning_rate": 0.0001681895723960242, + "loss": 2.6802, + "step": 5297 + }, + { + "epoch": 0.4275683964167541, + "grad_norm": 0.7786974310874939, + "learning_rate": 0.00016817802422948254, + "loss": 2.6514, + "step": 5298 + }, + { + "epoch": 0.4276491001533371, + "grad_norm": 0.7454531788825989, + "learning_rate": 0.00016816647436376634, + "loss": 2.6508, + "step": 5299 + }, + { + "epoch": 0.4277298038899201, + "grad_norm": 0.7542992830276489, + "learning_rate": 0.0001681549227991634, + "loss": 2.6455, + "step": 5300 + }, + { + "epoch": 0.42781050762650313, + "grad_norm": 0.7405722141265869, + "learning_rate": 0.0001681433695359616, + "loss": 2.6505, + "step": 5301 + }, + { + "epoch": 0.4278912113630861, + "grad_norm": 0.7120002508163452, + "learning_rate": 0.00016813181457444896, + "loss": 2.6652, + "step": 5302 + }, + { + "epoch": 0.42797191509966914, + "grad_norm": 0.7645997405052185, + "learning_rate": 0.00016812025791491334, + "loss": 2.6456, + "step": 5303 + }, + { + "epoch": 0.4280526188362521, + "grad_norm": 0.7214465141296387, + "learning_rate": 0.00016810869955764286, + "loss": 2.6261, + "step": 5304 + }, + { + "epoch": 0.4281333225728351, + "grad_norm": 0.7653367519378662, + "learning_rate": 0.00016809713950292551, + "loss": 2.7295, + "step": 5305 + }, + { + "epoch": 0.4282140263094181, + "grad_norm": 0.6798970103263855, + "learning_rate": 0.0001680855777510495, + "loss": 2.6549, + "step": 5306 + }, + { + "epoch": 0.4282947300460011, + "grad_norm": 0.7693684101104736, + "learning_rate": 0.00016807401430230288, + "loss": 2.7001, + "step": 5307 + }, + { + "epoch": 0.42837543378258414, + "grad_norm": 0.6962063312530518, + "learning_rate": 0.00016806244915697384, + "loss": 2.6582, + "step": 5308 + }, + { + "epoch": 0.4284561375191671, + "grad_norm": 0.7526959776878357, + "learning_rate": 0.00016805088231535068, + "loss": 2.7204, + "step": 5309 + }, + { + "epoch": 0.42853684125575014, + "grad_norm": 0.7403820753097534, + "learning_rate": 0.0001680393137777217, + "loss": 2.6505, + "step": 5310 + }, + { + "epoch": 0.4286175449923331, + "grad_norm": 0.7056909799575806, + "learning_rate": 0.00016802774354437506, + "loss": 2.5981, + "step": 5311 + }, + { + "epoch": 0.42869824872891615, + "grad_norm": 0.6756439805030823, + "learning_rate": 0.0001680161716155993, + "loss": 2.6845, + "step": 5312 + }, + { + "epoch": 0.42877895246549913, + "grad_norm": 0.7634297013282776, + "learning_rate": 0.0001680045979916827, + "loss": 2.6399, + "step": 5313 + }, + { + "epoch": 0.42885965620208216, + "grad_norm": 0.6793022751808167, + "learning_rate": 0.0001679930226729138, + "loss": 2.6808, + "step": 5314 + }, + { + "epoch": 0.42894035993866514, + "grad_norm": 0.7692369222640991, + "learning_rate": 0.00016798144565958103, + "loss": 2.673, + "step": 5315 + }, + { + "epoch": 0.42902106367524817, + "grad_norm": 0.668798565864563, + "learning_rate": 0.00016796986695197293, + "loss": 2.6465, + "step": 5316 + }, + { + "epoch": 0.42910176741183115, + "grad_norm": 0.719160795211792, + "learning_rate": 0.00016795828655037805, + "loss": 2.5876, + "step": 5317 + }, + { + "epoch": 0.4291824711484142, + "grad_norm": 0.7352864742279053, + "learning_rate": 0.000167946704455085, + "loss": 2.625, + "step": 5318 + }, + { + "epoch": 0.42926317488499716, + "grad_norm": 0.7103392481803894, + "learning_rate": 0.00016793512066638254, + "loss": 2.602, + "step": 5319 + }, + { + "epoch": 0.4293438786215802, + "grad_norm": 0.7005727291107178, + "learning_rate": 0.0001679235351845592, + "loss": 2.6723, + "step": 5320 + }, + { + "epoch": 0.42942458235816316, + "grad_norm": 0.7686243653297424, + "learning_rate": 0.00016791194800990387, + "loss": 2.693, + "step": 5321 + }, + { + "epoch": 0.4295052860947462, + "grad_norm": 0.7026933431625366, + "learning_rate": 0.00016790035914270526, + "loss": 2.6334, + "step": 5322 + }, + { + "epoch": 0.4295859898313292, + "grad_norm": 0.748938262462616, + "learning_rate": 0.0001678887685832522, + "loss": 2.6757, + "step": 5323 + }, + { + "epoch": 0.4296666935679122, + "grad_norm": 0.7753568887710571, + "learning_rate": 0.00016787717633183355, + "loss": 2.6782, + "step": 5324 + }, + { + "epoch": 0.4297473973044952, + "grad_norm": 0.7605767846107483, + "learning_rate": 0.00016786558238873823, + "loss": 2.6822, + "step": 5325 + }, + { + "epoch": 0.4298281010410782, + "grad_norm": 0.7516531348228455, + "learning_rate": 0.00016785398675425524, + "loss": 2.6802, + "step": 5326 + }, + { + "epoch": 0.4299088047776612, + "grad_norm": 0.7551677227020264, + "learning_rate": 0.0001678423894286735, + "loss": 2.6509, + "step": 5327 + }, + { + "epoch": 0.4299895085142442, + "grad_norm": 0.765364944934845, + "learning_rate": 0.00016783079041228206, + "loss": 2.6552, + "step": 5328 + }, + { + "epoch": 0.4300702122508272, + "grad_norm": 0.7016649842262268, + "learning_rate": 0.00016781918970537002, + "loss": 2.6861, + "step": 5329 + }, + { + "epoch": 0.43015091598741023, + "grad_norm": 0.7266311645507812, + "learning_rate": 0.0001678075873082265, + "loss": 2.7064, + "step": 5330 + }, + { + "epoch": 0.4302316197239932, + "grad_norm": 0.7414532899856567, + "learning_rate": 0.00016779598322114064, + "loss": 2.6273, + "step": 5331 + }, + { + "epoch": 0.43031232346057624, + "grad_norm": 0.7032443881034851, + "learning_rate": 0.00016778437744440167, + "loss": 2.6577, + "step": 5332 + }, + { + "epoch": 0.4303930271971592, + "grad_norm": 0.7150338888168335, + "learning_rate": 0.00016777276997829882, + "loss": 2.6586, + "step": 5333 + }, + { + "epoch": 0.43047373093374225, + "grad_norm": 0.6893971562385559, + "learning_rate": 0.0001677611608231214, + "loss": 2.6713, + "step": 5334 + }, + { + "epoch": 0.4305544346703252, + "grad_norm": 0.861935555934906, + "learning_rate": 0.00016774954997915867, + "loss": 2.7037, + "step": 5335 + }, + { + "epoch": 0.43063513840690826, + "grad_norm": 0.7140138745307922, + "learning_rate": 0.00016773793744670012, + "loss": 2.6684, + "step": 5336 + }, + { + "epoch": 0.43071584214349123, + "grad_norm": 0.7245929837226868, + "learning_rate": 0.00016772632322603506, + "loss": 2.6349, + "step": 5337 + }, + { + "epoch": 0.43079654588007427, + "grad_norm": 0.7216203808784485, + "learning_rate": 0.000167714707317453, + "loss": 2.6338, + "step": 5338 + }, + { + "epoch": 0.43087724961665724, + "grad_norm": 0.7076452374458313, + "learning_rate": 0.00016770308972124343, + "loss": 2.6614, + "step": 5339 + }, + { + "epoch": 0.4309579533532403, + "grad_norm": 0.7392035722732544, + "learning_rate": 0.00016769147043769586, + "loss": 2.6697, + "step": 5340 + }, + { + "epoch": 0.43103865708982325, + "grad_norm": 0.7235357761383057, + "learning_rate": 0.00016767984946709994, + "loss": 2.6664, + "step": 5341 + }, + { + "epoch": 0.4311193608264063, + "grad_norm": 0.6985526084899902, + "learning_rate": 0.00016766822680974524, + "loss": 2.6157, + "step": 5342 + }, + { + "epoch": 0.43120006456298926, + "grad_norm": 0.769963264465332, + "learning_rate": 0.0001676566024659214, + "loss": 2.6096, + "step": 5343 + }, + { + "epoch": 0.4312807682995723, + "grad_norm": 0.7504093050956726, + "learning_rate": 0.00016764497643591823, + "loss": 2.5795, + "step": 5344 + }, + { + "epoch": 0.43136147203615527, + "grad_norm": 0.7193379402160645, + "learning_rate": 0.0001676333487200254, + "loss": 2.6158, + "step": 5345 + }, + { + "epoch": 0.4314421757727383, + "grad_norm": 0.777357280254364, + "learning_rate": 0.00016762171931853273, + "loss": 2.6388, + "step": 5346 + }, + { + "epoch": 0.4315228795093213, + "grad_norm": 0.8590179085731506, + "learning_rate": 0.00016761008823173003, + "loss": 2.6597, + "step": 5347 + }, + { + "epoch": 0.4316035832459043, + "grad_norm": 0.7040170431137085, + "learning_rate": 0.0001675984554599072, + "loss": 2.6447, + "step": 5348 + }, + { + "epoch": 0.4316842869824873, + "grad_norm": 0.7682301998138428, + "learning_rate": 0.00016758682100335417, + "loss": 2.6738, + "step": 5349 + }, + { + "epoch": 0.4317649907190703, + "grad_norm": 0.8342414498329163, + "learning_rate": 0.00016757518486236087, + "loss": 2.7058, + "step": 5350 + }, + { + "epoch": 0.4318456944556533, + "grad_norm": 0.7410600781440735, + "learning_rate": 0.00016756354703721736, + "loss": 2.6597, + "step": 5351 + }, + { + "epoch": 0.4319263981922363, + "grad_norm": 0.7633174061775208, + "learning_rate": 0.00016755190752821363, + "loss": 2.6461, + "step": 5352 + }, + { + "epoch": 0.4320071019288193, + "grad_norm": 0.7855150103569031, + "learning_rate": 0.00016754026633563973, + "loss": 2.6556, + "step": 5353 + }, + { + "epoch": 0.43208780566540234, + "grad_norm": 0.7197602391242981, + "learning_rate": 0.00016752862345978587, + "loss": 2.6511, + "step": 5354 + }, + { + "epoch": 0.4321685094019853, + "grad_norm": 0.7748876810073853, + "learning_rate": 0.00016751697890094223, + "loss": 2.7, + "step": 5355 + }, + { + "epoch": 0.4322492131385683, + "grad_norm": 0.7457308173179626, + "learning_rate": 0.00016750533265939895, + "loss": 2.6934, + "step": 5356 + }, + { + "epoch": 0.4323299168751513, + "grad_norm": 0.8003394603729248, + "learning_rate": 0.00016749368473544633, + "loss": 2.6273, + "step": 5357 + }, + { + "epoch": 0.4324106206117343, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.00016748203512937464, + "loss": 2.6605, + "step": 5358 + }, + { + "epoch": 0.43249132434831733, + "grad_norm": 0.6859120726585388, + "learning_rate": 0.00016747038384147422, + "loss": 2.6748, + "step": 5359 + }, + { + "epoch": 0.4325720280849003, + "grad_norm": 0.7169440984725952, + "learning_rate": 0.0001674587308720355, + "loss": 2.6674, + "step": 5360 + }, + { + "epoch": 0.43265273182148334, + "grad_norm": 0.7762351036071777, + "learning_rate": 0.00016744707622134888, + "loss": 2.6673, + "step": 5361 + }, + { + "epoch": 0.4327334355580663, + "grad_norm": 0.7169542908668518, + "learning_rate": 0.0001674354198897048, + "loss": 2.7341, + "step": 5362 + }, + { + "epoch": 0.43281413929464935, + "grad_norm": 0.7903403043746948, + "learning_rate": 0.00016742376187739376, + "loss": 2.6019, + "step": 5363 + }, + { + "epoch": 0.4328948430312323, + "grad_norm": 0.8395403027534485, + "learning_rate": 0.00016741210218470634, + "loss": 2.6519, + "step": 5364 + }, + { + "epoch": 0.43297554676781536, + "grad_norm": 0.7521546483039856, + "learning_rate": 0.0001674004408119331, + "loss": 2.6067, + "step": 5365 + }, + { + "epoch": 0.43305625050439833, + "grad_norm": 0.7186779975891113, + "learning_rate": 0.0001673887777593647, + "loss": 2.6435, + "step": 5366 + }, + { + "epoch": 0.43313695424098136, + "grad_norm": 0.7362968921661377, + "learning_rate": 0.0001673771130272918, + "loss": 2.6031, + "step": 5367 + }, + { + "epoch": 0.43321765797756434, + "grad_norm": 0.8033537864685059, + "learning_rate": 0.0001673654466160051, + "loss": 2.7234, + "step": 5368 + }, + { + "epoch": 0.4332983617141474, + "grad_norm": 0.7109711766242981, + "learning_rate": 0.0001673537785257954, + "loss": 2.6621, + "step": 5369 + }, + { + "epoch": 0.43337906545073035, + "grad_norm": 0.7499226927757263, + "learning_rate": 0.0001673421087569535, + "loss": 2.706, + "step": 5370 + }, + { + "epoch": 0.4334597691873134, + "grad_norm": 0.7192875146865845, + "learning_rate": 0.00016733043730977017, + "loss": 2.6053, + "step": 5371 + }, + { + "epoch": 0.43354047292389636, + "grad_norm": 0.6939374208450317, + "learning_rate": 0.00016731876418453636, + "loss": 2.6621, + "step": 5372 + }, + { + "epoch": 0.4336211766604794, + "grad_norm": 0.720741331577301, + "learning_rate": 0.00016730708938154297, + "loss": 2.6358, + "step": 5373 + }, + { + "epoch": 0.43370188039706237, + "grad_norm": 0.6979780793190002, + "learning_rate": 0.00016729541290108095, + "loss": 2.6162, + "step": 5374 + }, + { + "epoch": 0.4337825841336454, + "grad_norm": 0.8014200925827026, + "learning_rate": 0.00016728373474344136, + "loss": 2.6255, + "step": 5375 + }, + { + "epoch": 0.4338632878702284, + "grad_norm": 0.7780057787895203, + "learning_rate": 0.0001672720549089152, + "loss": 2.6257, + "step": 5376 + }, + { + "epoch": 0.4339439916068114, + "grad_norm": 0.7111102938652039, + "learning_rate": 0.00016726037339779358, + "loss": 2.6384, + "step": 5377 + }, + { + "epoch": 0.4340246953433944, + "grad_norm": 0.7077106833457947, + "learning_rate": 0.00016724869021036764, + "loss": 2.6293, + "step": 5378 + }, + { + "epoch": 0.4341053990799774, + "grad_norm": 0.8328250646591187, + "learning_rate": 0.00016723700534692853, + "loss": 2.6186, + "step": 5379 + }, + { + "epoch": 0.4341861028165604, + "grad_norm": 0.6942149996757507, + "learning_rate": 0.00016722531880776752, + "loss": 2.6032, + "step": 5380 + }, + { + "epoch": 0.4342668065531434, + "grad_norm": 0.7180305123329163, + "learning_rate": 0.00016721363059317583, + "loss": 2.6166, + "step": 5381 + }, + { + "epoch": 0.4343475102897264, + "grad_norm": 0.8093443512916565, + "learning_rate": 0.00016720194070344476, + "loss": 2.6596, + "step": 5382 + }, + { + "epoch": 0.43442821402630943, + "grad_norm": 0.7337743043899536, + "learning_rate": 0.00016719024913886568, + "loss": 2.6137, + "step": 5383 + }, + { + "epoch": 0.4345089177628924, + "grad_norm": 0.7590384483337402, + "learning_rate": 0.00016717855589972993, + "loss": 2.6541, + "step": 5384 + }, + { + "epoch": 0.43458962149947544, + "grad_norm": 0.6945257186889648, + "learning_rate": 0.00016716686098632898, + "loss": 2.686, + "step": 5385 + }, + { + "epoch": 0.4346703252360584, + "grad_norm": 0.7175764441490173, + "learning_rate": 0.00016715516439895424, + "loss": 2.6081, + "step": 5386 + }, + { + "epoch": 0.43475102897264145, + "grad_norm": 0.7287259697914124, + "learning_rate": 0.00016714346613789732, + "loss": 2.6462, + "step": 5387 + }, + { + "epoch": 0.43483173270922443, + "grad_norm": 0.6864096522331238, + "learning_rate": 0.00016713176620344964, + "loss": 2.7104, + "step": 5388 + }, + { + "epoch": 0.43491243644580746, + "grad_norm": 0.6554383039474487, + "learning_rate": 0.00016712006459590289, + "loss": 2.6153, + "step": 5389 + }, + { + "epoch": 0.43499314018239044, + "grad_norm": 0.6415165662765503, + "learning_rate": 0.00016710836131554867, + "loss": 2.6198, + "step": 5390 + }, + { + "epoch": 0.43507384391897347, + "grad_norm": 0.6998475193977356, + "learning_rate": 0.00016709665636267869, + "loss": 2.6774, + "step": 5391 + }, + { + "epoch": 0.43515454765555645, + "grad_norm": 0.7437679171562195, + "learning_rate": 0.00016708494973758465, + "loss": 2.6176, + "step": 5392 + }, + { + "epoch": 0.4352352513921395, + "grad_norm": 0.6898311376571655, + "learning_rate": 0.00016707324144055825, + "loss": 2.6194, + "step": 5393 + }, + { + "epoch": 0.43531595512872245, + "grad_norm": 0.7536425590515137, + "learning_rate": 0.00016706153147189138, + "loss": 2.672, + "step": 5394 + }, + { + "epoch": 0.4353966588653055, + "grad_norm": 0.7576118111610413, + "learning_rate": 0.00016704981983187581, + "loss": 2.6473, + "step": 5395 + }, + { + "epoch": 0.43547736260188846, + "grad_norm": 0.7452495098114014, + "learning_rate": 0.00016703810652080349, + "loss": 2.6487, + "step": 5396 + }, + { + "epoch": 0.4355580663384715, + "grad_norm": 0.7817744612693787, + "learning_rate": 0.0001670263915389663, + "loss": 2.61, + "step": 5397 + }, + { + "epoch": 0.43563877007505447, + "grad_norm": 0.7195492386817932, + "learning_rate": 0.00016701467488665624, + "loss": 2.6745, + "step": 5398 + }, + { + "epoch": 0.4357194738116375, + "grad_norm": 0.7703930735588074, + "learning_rate": 0.0001670029565641653, + "loss": 2.7196, + "step": 5399 + }, + { + "epoch": 0.4358001775482205, + "grad_norm": 0.6859520673751831, + "learning_rate": 0.00016699123657178553, + "loss": 2.6317, + "step": 5400 + }, + { + "epoch": 0.4358808812848035, + "grad_norm": 0.7380268573760986, + "learning_rate": 0.00016697951490980903, + "loss": 2.6008, + "step": 5401 + }, + { + "epoch": 0.4359615850213865, + "grad_norm": 0.7903439402580261, + "learning_rate": 0.00016696779157852792, + "loss": 2.6411, + "step": 5402 + }, + { + "epoch": 0.4360422887579695, + "grad_norm": 0.7022606134414673, + "learning_rate": 0.0001669560665782344, + "loss": 2.6153, + "step": 5403 + }, + { + "epoch": 0.4361229924945525, + "grad_norm": 0.8196203112602234, + "learning_rate": 0.00016694433990922068, + "loss": 2.6128, + "step": 5404 + }, + { + "epoch": 0.43620369623113553, + "grad_norm": 0.7342696189880371, + "learning_rate": 0.000166932611571779, + "loss": 2.6802, + "step": 5405 + }, + { + "epoch": 0.4362843999677185, + "grad_norm": 0.7475131154060364, + "learning_rate": 0.0001669208815662017, + "loss": 2.6106, + "step": 5406 + }, + { + "epoch": 0.4363651037043015, + "grad_norm": 0.7067655324935913, + "learning_rate": 0.00016690914989278107, + "loss": 2.6362, + "step": 5407 + }, + { + "epoch": 0.4364458074408845, + "grad_norm": 0.7550163865089417, + "learning_rate": 0.00016689741655180956, + "loss": 2.6256, + "step": 5408 + }, + { + "epoch": 0.4365265111774675, + "grad_norm": 0.7341828346252441, + "learning_rate": 0.00016688568154357952, + "loss": 2.6912, + "step": 5409 + }, + { + "epoch": 0.4366072149140505, + "grad_norm": 0.7501869201660156, + "learning_rate": 0.00016687394486838349, + "loss": 2.7122, + "step": 5410 + }, + { + "epoch": 0.4366879186506335, + "grad_norm": 0.7041562795639038, + "learning_rate": 0.00016686220652651392, + "loss": 2.6755, + "step": 5411 + }, + { + "epoch": 0.43676862238721653, + "grad_norm": 0.7218217253684998, + "learning_rate": 0.00016685046651826338, + "loss": 2.693, + "step": 5412 + }, + { + "epoch": 0.4368493261237995, + "grad_norm": 0.6880577206611633, + "learning_rate": 0.00016683872484392448, + "loss": 2.638, + "step": 5413 + }, + { + "epoch": 0.43693002986038254, + "grad_norm": 0.6864475607872009, + "learning_rate": 0.0001668269815037898, + "loss": 2.6497, + "step": 5414 + }, + { + "epoch": 0.4370107335969655, + "grad_norm": 0.7326167821884155, + "learning_rate": 0.00016681523649815212, + "loss": 2.6858, + "step": 5415 + }, + { + "epoch": 0.43709143733354855, + "grad_norm": 0.6773428320884705, + "learning_rate": 0.00016680348982730405, + "loss": 2.6489, + "step": 5416 + }, + { + "epoch": 0.4371721410701315, + "grad_norm": 0.7117835283279419, + "learning_rate": 0.00016679174149153837, + "loss": 2.6607, + "step": 5417 + }, + { + "epoch": 0.43725284480671456, + "grad_norm": 0.7268334031105042, + "learning_rate": 0.00016677999149114793, + "loss": 2.703, + "step": 5418 + }, + { + "epoch": 0.43733354854329753, + "grad_norm": 0.7672972679138184, + "learning_rate": 0.00016676823982642554, + "loss": 2.5803, + "step": 5419 + }, + { + "epoch": 0.43741425227988057, + "grad_norm": 0.6966733932495117, + "learning_rate": 0.00016675648649766407, + "loss": 2.6149, + "step": 5420 + }, + { + "epoch": 0.43749495601646354, + "grad_norm": 0.752896249294281, + "learning_rate": 0.00016674473150515644, + "loss": 2.7108, + "step": 5421 + }, + { + "epoch": 0.4375756597530466, + "grad_norm": 0.7094796895980835, + "learning_rate": 0.00016673297484919565, + "loss": 2.6989, + "step": 5422 + }, + { + "epoch": 0.43765636348962955, + "grad_norm": 0.7631612420082092, + "learning_rate": 0.00016672121653007465, + "loss": 2.6673, + "step": 5423 + }, + { + "epoch": 0.4377370672262126, + "grad_norm": 0.7083843946456909, + "learning_rate": 0.00016670945654808655, + "loss": 2.6529, + "step": 5424 + }, + { + "epoch": 0.43781777096279556, + "grad_norm": 0.7291569709777832, + "learning_rate": 0.0001666976949035244, + "loss": 2.633, + "step": 5425 + }, + { + "epoch": 0.4378984746993786, + "grad_norm": 0.8351448774337769, + "learning_rate": 0.00016668593159668138, + "loss": 2.5993, + "step": 5426 + }, + { + "epoch": 0.43797917843596157, + "grad_norm": 0.7339642643928528, + "learning_rate": 0.00016667416662785058, + "loss": 2.6486, + "step": 5427 + }, + { + "epoch": 0.4380598821725446, + "grad_norm": 0.7257512211799622, + "learning_rate": 0.00016666239999732526, + "loss": 2.6453, + "step": 5428 + }, + { + "epoch": 0.4381405859091276, + "grad_norm": 0.7282476425170898, + "learning_rate": 0.00016665063170539872, + "loss": 2.6654, + "step": 5429 + }, + { + "epoch": 0.4382212896457106, + "grad_norm": 0.726685643196106, + "learning_rate": 0.00016663886175236417, + "loss": 2.65, + "step": 5430 + }, + { + "epoch": 0.4383019933822936, + "grad_norm": 0.7478880286216736, + "learning_rate": 0.000166627090138515, + "loss": 2.623, + "step": 5431 + }, + { + "epoch": 0.4383826971188766, + "grad_norm": 0.7624948024749756, + "learning_rate": 0.00016661531686414457, + "loss": 2.6438, + "step": 5432 + }, + { + "epoch": 0.4384634008554596, + "grad_norm": 0.8098936676979065, + "learning_rate": 0.00016660354192954633, + "loss": 2.6226, + "step": 5433 + }, + { + "epoch": 0.4385441045920426, + "grad_norm": 0.7305725812911987, + "learning_rate": 0.0001665917653350137, + "loss": 2.6425, + "step": 5434 + }, + { + "epoch": 0.4386248083286256, + "grad_norm": 0.7064421772956848, + "learning_rate": 0.00016657998708084027, + "loss": 2.6069, + "step": 5435 + }, + { + "epoch": 0.43870551206520864, + "grad_norm": 0.8279524445533752, + "learning_rate": 0.00016656820716731945, + "loss": 2.6609, + "step": 5436 + }, + { + "epoch": 0.4387862158017916, + "grad_norm": 0.742659866809845, + "learning_rate": 0.00016655642559474488, + "loss": 2.64, + "step": 5437 + }, + { + "epoch": 0.43886691953837464, + "grad_norm": 0.757780909538269, + "learning_rate": 0.00016654464236341026, + "loss": 2.6546, + "step": 5438 + }, + { + "epoch": 0.4389476232749576, + "grad_norm": 0.7439742684364319, + "learning_rate": 0.00016653285747360918, + "loss": 2.6717, + "step": 5439 + }, + { + "epoch": 0.43902832701154065, + "grad_norm": 0.7529581189155579, + "learning_rate": 0.0001665210709256354, + "loss": 2.6204, + "step": 5440 + }, + { + "epoch": 0.43910903074812363, + "grad_norm": 0.7224153876304626, + "learning_rate": 0.00016650928271978258, + "loss": 2.6417, + "step": 5441 + }, + { + "epoch": 0.43918973448470666, + "grad_norm": 0.6792185306549072, + "learning_rate": 0.00016649749285634462, + "loss": 2.6382, + "step": 5442 + }, + { + "epoch": 0.43927043822128964, + "grad_norm": 0.6887058019638062, + "learning_rate": 0.00016648570133561533, + "loss": 2.6302, + "step": 5443 + }, + { + "epoch": 0.43935114195787267, + "grad_norm": 0.7373671531677246, + "learning_rate": 0.00016647390815788853, + "loss": 2.625, + "step": 5444 + }, + { + "epoch": 0.43943184569445565, + "grad_norm": 0.7595719695091248, + "learning_rate": 0.0001664621133234582, + "loss": 2.6444, + "step": 5445 + }, + { + "epoch": 0.4395125494310387, + "grad_norm": 0.7331473231315613, + "learning_rate": 0.00016645031683261825, + "loss": 2.6308, + "step": 5446 + }, + { + "epoch": 0.43959325316762166, + "grad_norm": 0.7724922895431519, + "learning_rate": 0.0001664385186856627, + "loss": 2.6646, + "step": 5447 + }, + { + "epoch": 0.4396739569042047, + "grad_norm": 0.6960163712501526, + "learning_rate": 0.00016642671888288563, + "loss": 2.6196, + "step": 5448 + }, + { + "epoch": 0.43975466064078766, + "grad_norm": 0.6769189834594727, + "learning_rate": 0.00016641491742458103, + "loss": 2.6558, + "step": 5449 + }, + { + "epoch": 0.4398353643773707, + "grad_norm": 0.7435783743858337, + "learning_rate": 0.0001664031143110431, + "loss": 2.6717, + "step": 5450 + }, + { + "epoch": 0.4399160681139537, + "grad_norm": 0.7234118580818176, + "learning_rate": 0.00016639130954256603, + "loss": 2.6549, + "step": 5451 + }, + { + "epoch": 0.4399967718505367, + "grad_norm": 0.720825731754303, + "learning_rate": 0.00016637950311944392, + "loss": 2.6098, + "step": 5452 + }, + { + "epoch": 0.4400774755871197, + "grad_norm": 0.6977505087852478, + "learning_rate": 0.0001663676950419711, + "loss": 2.6351, + "step": 5453 + }, + { + "epoch": 0.4401581793237027, + "grad_norm": 0.6959076523780823, + "learning_rate": 0.00016635588531044185, + "loss": 2.6918, + "step": 5454 + }, + { + "epoch": 0.4402388830602857, + "grad_norm": 0.7022189497947693, + "learning_rate": 0.00016634407392515044, + "loss": 2.6218, + "step": 5455 + }, + { + "epoch": 0.4403195867968687, + "grad_norm": 0.7147775292396545, + "learning_rate": 0.0001663322608863913, + "loss": 2.6966, + "step": 5456 + }, + { + "epoch": 0.4404002905334517, + "grad_norm": 0.7592755556106567, + "learning_rate": 0.00016632044619445882, + "loss": 2.6326, + "step": 5457 + }, + { + "epoch": 0.4404809942700347, + "grad_norm": 0.6914302110671997, + "learning_rate": 0.00016630862984964745, + "loss": 2.603, + "step": 5458 + }, + { + "epoch": 0.4405616980066177, + "grad_norm": 0.7735368609428406, + "learning_rate": 0.0001662968118522517, + "loss": 2.6666, + "step": 5459 + }, + { + "epoch": 0.4406424017432007, + "grad_norm": 0.7175899744033813, + "learning_rate": 0.00016628499220256612, + "loss": 2.666, + "step": 5460 + }, + { + "epoch": 0.4407231054797837, + "grad_norm": 0.6735796332359314, + "learning_rate": 0.00016627317090088523, + "loss": 2.6451, + "step": 5461 + }, + { + "epoch": 0.4408038092163667, + "grad_norm": 0.72022545337677, + "learning_rate": 0.0001662613479475037, + "loss": 2.6295, + "step": 5462 + }, + { + "epoch": 0.4408845129529497, + "grad_norm": 0.7084751725196838, + "learning_rate": 0.00016624952334271616, + "loss": 2.6633, + "step": 5463 + }, + { + "epoch": 0.4409652166895327, + "grad_norm": 0.7399250864982605, + "learning_rate": 0.00016623769708681735, + "loss": 2.6076, + "step": 5464 + }, + { + "epoch": 0.44104592042611573, + "grad_norm": 0.6904892325401306, + "learning_rate": 0.00016622586918010193, + "loss": 2.6799, + "step": 5465 + }, + { + "epoch": 0.4411266241626987, + "grad_norm": 0.7419006824493408, + "learning_rate": 0.00016621403962286478, + "loss": 2.65, + "step": 5466 + }, + { + "epoch": 0.44120732789928174, + "grad_norm": 0.7201282978057861, + "learning_rate": 0.00016620220841540064, + "loss": 2.6769, + "step": 5467 + }, + { + "epoch": 0.4412880316358647, + "grad_norm": 0.7223218679428101, + "learning_rate": 0.00016619037555800443, + "loss": 2.6342, + "step": 5468 + }, + { + "epoch": 0.44136873537244775, + "grad_norm": 0.7517585754394531, + "learning_rate": 0.00016617854105097104, + "loss": 2.6103, + "step": 5469 + }, + { + "epoch": 0.44144943910903073, + "grad_norm": 0.6765139698982239, + "learning_rate": 0.0001661667048945954, + "loss": 2.624, + "step": 5470 + }, + { + "epoch": 0.44153014284561376, + "grad_norm": 0.7197677493095398, + "learning_rate": 0.00016615486708917255, + "loss": 2.5786, + "step": 5471 + }, + { + "epoch": 0.44161084658219674, + "grad_norm": 0.7196774482727051, + "learning_rate": 0.00016614302763499742, + "loss": 2.6147, + "step": 5472 + }, + { + "epoch": 0.44169155031877977, + "grad_norm": 0.7210293412208557, + "learning_rate": 0.00016613118653236518, + "loss": 2.6526, + "step": 5473 + }, + { + "epoch": 0.44177225405536275, + "grad_norm": 0.6870129108428955, + "learning_rate": 0.00016611934378157092, + "loss": 2.665, + "step": 5474 + }, + { + "epoch": 0.4418529577919458, + "grad_norm": 0.6925365328788757, + "learning_rate": 0.00016610749938290975, + "loss": 2.5734, + "step": 5475 + }, + { + "epoch": 0.44193366152852875, + "grad_norm": 0.7399131655693054, + "learning_rate": 0.0001660956533366769, + "loss": 2.6935, + "step": 5476 + }, + { + "epoch": 0.4420143652651118, + "grad_norm": 0.7348966002464294, + "learning_rate": 0.00016608380564316758, + "loss": 2.6788, + "step": 5477 + }, + { + "epoch": 0.44209506900169476, + "grad_norm": 0.7597334980964661, + "learning_rate": 0.00016607195630267708, + "loss": 2.6732, + "step": 5478 + }, + { + "epoch": 0.4421757727382778, + "grad_norm": 0.6847043037414551, + "learning_rate": 0.00016606010531550072, + "loss": 2.6475, + "step": 5479 + }, + { + "epoch": 0.44225647647486077, + "grad_norm": 0.7065151929855347, + "learning_rate": 0.00016604825268193388, + "loss": 2.6674, + "step": 5480 + }, + { + "epoch": 0.4423371802114438, + "grad_norm": 0.7102208137512207, + "learning_rate": 0.0001660363984022719, + "loss": 2.6723, + "step": 5481 + }, + { + "epoch": 0.4424178839480268, + "grad_norm": 0.6912767887115479, + "learning_rate": 0.00016602454247681024, + "loss": 2.628, + "step": 5482 + }, + { + "epoch": 0.4424985876846098, + "grad_norm": 0.7265123128890991, + "learning_rate": 0.0001660126849058444, + "loss": 2.5935, + "step": 5483 + }, + { + "epoch": 0.4425792914211928, + "grad_norm": 0.8177923560142517, + "learning_rate": 0.0001660008256896699, + "loss": 2.6402, + "step": 5484 + }, + { + "epoch": 0.4426599951577758, + "grad_norm": 0.7196556925773621, + "learning_rate": 0.00016598896482858231, + "loss": 2.6939, + "step": 5485 + }, + { + "epoch": 0.4427406988943588, + "grad_norm": 0.7459850907325745, + "learning_rate": 0.0001659771023228772, + "loss": 2.6343, + "step": 5486 + }, + { + "epoch": 0.44282140263094183, + "grad_norm": 0.7399095892906189, + "learning_rate": 0.00016596523817285024, + "loss": 2.6139, + "step": 5487 + }, + { + "epoch": 0.4429021063675248, + "grad_norm": 0.7517558336257935, + "learning_rate": 0.0001659533723787971, + "loss": 2.6609, + "step": 5488 + }, + { + "epoch": 0.44298281010410784, + "grad_norm": 0.7073537707328796, + "learning_rate": 0.00016594150494101355, + "loss": 2.6326, + "step": 5489 + }, + { + "epoch": 0.4430635138406908, + "grad_norm": 0.7414752244949341, + "learning_rate": 0.0001659296358597953, + "loss": 2.6759, + "step": 5490 + }, + { + "epoch": 0.44314421757727385, + "grad_norm": 0.7636380195617676, + "learning_rate": 0.0001659177651354382, + "loss": 2.5743, + "step": 5491 + }, + { + "epoch": 0.4432249213138568, + "grad_norm": 0.6839539408683777, + "learning_rate": 0.00016590589276823804, + "loss": 2.631, + "step": 5492 + }, + { + "epoch": 0.44330562505043986, + "grad_norm": 0.8057516813278198, + "learning_rate": 0.0001658940187584908, + "loss": 2.6916, + "step": 5493 + }, + { + "epoch": 0.44338632878702283, + "grad_norm": 0.7479767799377441, + "learning_rate": 0.00016588214310649232, + "loss": 2.6811, + "step": 5494 + }, + { + "epoch": 0.44346703252360586, + "grad_norm": 0.7854729294776917, + "learning_rate": 0.00016587026581253866, + "loss": 2.6746, + "step": 5495 + }, + { + "epoch": 0.44354773626018884, + "grad_norm": 0.7782836556434631, + "learning_rate": 0.00016585838687692577, + "loss": 2.61, + "step": 5496 + }, + { + "epoch": 0.4436284399967719, + "grad_norm": 0.7047034502029419, + "learning_rate": 0.00016584650629994968, + "loss": 2.6573, + "step": 5497 + }, + { + "epoch": 0.44370914373335485, + "grad_norm": 0.7398735880851746, + "learning_rate": 0.0001658346240819066, + "loss": 2.6338, + "step": 5498 + }, + { + "epoch": 0.4437898474699379, + "grad_norm": 0.7243468165397644, + "learning_rate": 0.00016582274022309258, + "loss": 2.5898, + "step": 5499 + }, + { + "epoch": 0.44387055120652086, + "grad_norm": 0.7415906190872192, + "learning_rate": 0.00016581085472380376, + "loss": 2.5893, + "step": 5500 + }, + { + "epoch": 0.4439512549431039, + "grad_norm": 0.6935107707977295, + "learning_rate": 0.00016579896758433645, + "loss": 2.6704, + "step": 5501 + }, + { + "epoch": 0.44403195867968687, + "grad_norm": 0.7188034653663635, + "learning_rate": 0.00016578707880498685, + "loss": 2.643, + "step": 5502 + }, + { + "epoch": 0.4441126624162699, + "grad_norm": 0.6697022914886475, + "learning_rate": 0.0001657751883860513, + "loss": 2.6313, + "step": 5503 + }, + { + "epoch": 0.4441933661528529, + "grad_norm": 0.760154664516449, + "learning_rate": 0.00016576329632782613, + "loss": 2.6604, + "step": 5504 + }, + { + "epoch": 0.4442740698894359, + "grad_norm": 0.6883447170257568, + "learning_rate": 0.00016575140263060765, + "loss": 2.64, + "step": 5505 + }, + { + "epoch": 0.4443547736260189, + "grad_norm": 0.8628804683685303, + "learning_rate": 0.0001657395072946924, + "loss": 2.6651, + "step": 5506 + }, + { + "epoch": 0.4444354773626019, + "grad_norm": 0.7125170230865479, + "learning_rate": 0.0001657276103203768, + "loss": 2.7132, + "step": 5507 + }, + { + "epoch": 0.4445161810991849, + "grad_norm": 0.6965304613113403, + "learning_rate": 0.00016571571170795725, + "loss": 2.7109, + "step": 5508 + }, + { + "epoch": 0.44459688483576787, + "grad_norm": 0.720327615737915, + "learning_rate": 0.00016570381145773042, + "loss": 2.6323, + "step": 5509 + }, + { + "epoch": 0.4446775885723509, + "grad_norm": 0.7097898125648499, + "learning_rate": 0.00016569190956999287, + "loss": 2.6461, + "step": 5510 + }, + { + "epoch": 0.4447582923089339, + "grad_norm": 0.7142884731292725, + "learning_rate": 0.0001656800060450412, + "loss": 2.6894, + "step": 5511 + }, + { + "epoch": 0.4448389960455169, + "grad_norm": 0.6992002725601196, + "learning_rate": 0.0001656681008831721, + "loss": 2.6116, + "step": 5512 + }, + { + "epoch": 0.4449196997820999, + "grad_norm": 0.763841450214386, + "learning_rate": 0.00016565619408468227, + "loss": 2.6441, + "step": 5513 + }, + { + "epoch": 0.4450004035186829, + "grad_norm": 0.6958404183387756, + "learning_rate": 0.00016564428564986848, + "loss": 2.5751, + "step": 5514 + }, + { + "epoch": 0.4450811072552659, + "grad_norm": 0.8804046511650085, + "learning_rate": 0.00016563237557902744, + "loss": 2.6353, + "step": 5515 + }, + { + "epoch": 0.4451618109918489, + "grad_norm": 0.744864821434021, + "learning_rate": 0.00016562046387245608, + "loss": 2.6887, + "step": 5516 + }, + { + "epoch": 0.4452425147284319, + "grad_norm": 0.7627978920936584, + "learning_rate": 0.0001656085505304512, + "loss": 2.6347, + "step": 5517 + }, + { + "epoch": 0.44532321846501494, + "grad_norm": 0.7728918194770813, + "learning_rate": 0.00016559663555330975, + "loss": 2.6344, + "step": 5518 + }, + { + "epoch": 0.4454039222015979, + "grad_norm": 0.7853842377662659, + "learning_rate": 0.00016558471894132865, + "loss": 2.7239, + "step": 5519 + }, + { + "epoch": 0.44548462593818094, + "grad_norm": 0.7981860041618347, + "learning_rate": 0.00016557280069480495, + "loss": 2.66, + "step": 5520 + }, + { + "epoch": 0.4455653296747639, + "grad_norm": 0.7555295825004578, + "learning_rate": 0.0001655608808140356, + "loss": 2.6636, + "step": 5521 + }, + { + "epoch": 0.44564603341134695, + "grad_norm": 0.6893854141235352, + "learning_rate": 0.00016554895929931778, + "loss": 2.5999, + "step": 5522 + }, + { + "epoch": 0.44572673714792993, + "grad_norm": 0.7740506529808044, + "learning_rate": 0.0001655370361509485, + "loss": 2.6308, + "step": 5523 + }, + { + "epoch": 0.44580744088451296, + "grad_norm": 0.6956021785736084, + "learning_rate": 0.00016552511136922498, + "loss": 2.6376, + "step": 5524 + }, + { + "epoch": 0.44588814462109594, + "grad_norm": 0.7408841252326965, + "learning_rate": 0.00016551318495444445, + "loss": 2.6644, + "step": 5525 + }, + { + "epoch": 0.44596884835767897, + "grad_norm": 0.7715663313865662, + "learning_rate": 0.000165501256906904, + "loss": 2.6791, + "step": 5526 + }, + { + "epoch": 0.44604955209426195, + "grad_norm": 0.6880629062652588, + "learning_rate": 0.0001654893272269011, + "loss": 2.7209, + "step": 5527 + }, + { + "epoch": 0.446130255830845, + "grad_norm": 0.6765853762626648, + "learning_rate": 0.0001654773959147329, + "loss": 2.6548, + "step": 5528 + }, + { + "epoch": 0.44621095956742796, + "grad_norm": 0.739248514175415, + "learning_rate": 0.00016546546297069688, + "loss": 2.69, + "step": 5529 + }, + { + "epoch": 0.446291663304011, + "grad_norm": 0.7655714750289917, + "learning_rate": 0.00016545352839509038, + "loss": 2.6238, + "step": 5530 + }, + { + "epoch": 0.44637236704059396, + "grad_norm": 0.706068217754364, + "learning_rate": 0.00016544159218821088, + "loss": 2.6528, + "step": 5531 + }, + { + "epoch": 0.446453070777177, + "grad_norm": 0.7411316633224487, + "learning_rate": 0.00016542965435035578, + "loss": 2.7034, + "step": 5532 + }, + { + "epoch": 0.44653377451376, + "grad_norm": 0.6550690531730652, + "learning_rate": 0.0001654177148818227, + "loss": 2.6388, + "step": 5533 + }, + { + "epoch": 0.446614478250343, + "grad_norm": 0.7151147127151489, + "learning_rate": 0.00016540577378290915, + "loss": 2.7382, + "step": 5534 + }, + { + "epoch": 0.446695181986926, + "grad_norm": 0.7343939542770386, + "learning_rate": 0.00016539383105391276, + "loss": 2.6316, + "step": 5535 + }, + { + "epoch": 0.446775885723509, + "grad_norm": 0.702036440372467, + "learning_rate": 0.00016538188669513115, + "loss": 2.6465, + "step": 5536 + }, + { + "epoch": 0.446856589460092, + "grad_norm": 0.7212840914726257, + "learning_rate": 0.00016536994070686197, + "loss": 2.6471, + "step": 5537 + }, + { + "epoch": 0.446937293196675, + "grad_norm": 0.7345479130744934, + "learning_rate": 0.00016535799308940304, + "loss": 2.6746, + "step": 5538 + }, + { + "epoch": 0.447017996933258, + "grad_norm": 0.7447341084480286, + "learning_rate": 0.00016534604384305207, + "loss": 2.6487, + "step": 5539 + }, + { + "epoch": 0.44709870066984103, + "grad_norm": 0.6865687370300293, + "learning_rate": 0.00016533409296810687, + "loss": 2.6202, + "step": 5540 + }, + { + "epoch": 0.447179404406424, + "grad_norm": 0.8210769891738892, + "learning_rate": 0.0001653221404648653, + "loss": 2.7155, + "step": 5541 + }, + { + "epoch": 0.44726010814300704, + "grad_norm": 0.7768925428390503, + "learning_rate": 0.0001653101863336252, + "loss": 2.6011, + "step": 5542 + }, + { + "epoch": 0.44734081187959, + "grad_norm": 0.7160049080848694, + "learning_rate": 0.00016529823057468456, + "loss": 2.6541, + "step": 5543 + }, + { + "epoch": 0.44742151561617305, + "grad_norm": 0.7386900782585144, + "learning_rate": 0.00016528627318834134, + "loss": 2.6586, + "step": 5544 + }, + { + "epoch": 0.447502219352756, + "grad_norm": 0.7415460348129272, + "learning_rate": 0.0001652743141748935, + "loss": 2.7032, + "step": 5545 + }, + { + "epoch": 0.44758292308933906, + "grad_norm": 0.8483054637908936, + "learning_rate": 0.00016526235353463912, + "loss": 2.6145, + "step": 5546 + }, + { + "epoch": 0.44766362682592203, + "grad_norm": 0.7428778409957886, + "learning_rate": 0.00016525039126787629, + "loss": 2.7005, + "step": 5547 + }, + { + "epoch": 0.44774433056250507, + "grad_norm": 0.7214285731315613, + "learning_rate": 0.00016523842737490316, + "loss": 2.6267, + "step": 5548 + }, + { + "epoch": 0.44782503429908804, + "grad_norm": 0.6753950715065002, + "learning_rate": 0.0001652264618560179, + "loss": 2.6732, + "step": 5549 + }, + { + "epoch": 0.4479057380356711, + "grad_norm": 0.6969403028488159, + "learning_rate": 0.00016521449471151867, + "loss": 2.6218, + "step": 5550 + }, + { + "epoch": 0.44798644177225405, + "grad_norm": 0.7562664151191711, + "learning_rate": 0.00016520252594170377, + "loss": 2.69, + "step": 5551 + }, + { + "epoch": 0.4480671455088371, + "grad_norm": 0.6831937432289124, + "learning_rate": 0.0001651905555468715, + "loss": 2.709, + "step": 5552 + }, + { + "epoch": 0.44814784924542006, + "grad_norm": 0.6753427386283875, + "learning_rate": 0.00016517858352732017, + "loss": 2.5852, + "step": 5553 + }, + { + "epoch": 0.4482285529820031, + "grad_norm": 0.7573871612548828, + "learning_rate": 0.00016516660988334815, + "loss": 2.6187, + "step": 5554 + }, + { + "epoch": 0.44830925671858607, + "grad_norm": 0.6424254775047302, + "learning_rate": 0.00016515463461525383, + "loss": 2.6411, + "step": 5555 + }, + { + "epoch": 0.4483899604551691, + "grad_norm": 0.7460073232650757, + "learning_rate": 0.0001651426577233358, + "loss": 2.6239, + "step": 5556 + }, + { + "epoch": 0.4484706641917521, + "grad_norm": 0.6980866193771362, + "learning_rate": 0.0001651306792078924, + "loss": 2.605, + "step": 5557 + }, + { + "epoch": 0.4485513679283351, + "grad_norm": 0.7376009225845337, + "learning_rate": 0.00016511869906922217, + "loss": 2.7114, + "step": 5558 + }, + { + "epoch": 0.4486320716649181, + "grad_norm": 0.7227364778518677, + "learning_rate": 0.0001651067173076238, + "loss": 2.6212, + "step": 5559 + }, + { + "epoch": 0.44871277540150106, + "grad_norm": 0.8989635705947876, + "learning_rate": 0.00016509473392339584, + "loss": 2.671, + "step": 5560 + }, + { + "epoch": 0.4487934791380841, + "grad_norm": 0.7273553609848022, + "learning_rate": 0.0001650827489168369, + "loss": 2.6556, + "step": 5561 + }, + { + "epoch": 0.44887418287466707, + "grad_norm": 0.839439868927002, + "learning_rate": 0.00016507076228824578, + "loss": 2.6959, + "step": 5562 + }, + { + "epoch": 0.4489548866112501, + "grad_norm": 0.6912770867347717, + "learning_rate": 0.00016505877403792115, + "loss": 2.6709, + "step": 5563 + }, + { + "epoch": 0.4490355903478331, + "grad_norm": 0.7850949168205261, + "learning_rate": 0.00016504678416616182, + "loss": 2.7257, + "step": 5564 + }, + { + "epoch": 0.4491162940844161, + "grad_norm": 0.7768355011940002, + "learning_rate": 0.0001650347926732666, + "loss": 2.5939, + "step": 5565 + }, + { + "epoch": 0.4491969978209991, + "grad_norm": 0.6518398523330688, + "learning_rate": 0.0001650227995595343, + "loss": 2.6589, + "step": 5566 + }, + { + "epoch": 0.4492777015575821, + "grad_norm": 0.6855975389480591, + "learning_rate": 0.0001650108048252639, + "loss": 2.6372, + "step": 5567 + }, + { + "epoch": 0.4493584052941651, + "grad_norm": 0.7176938056945801, + "learning_rate": 0.0001649988084707543, + "loss": 2.6506, + "step": 5568 + }, + { + "epoch": 0.44943910903074813, + "grad_norm": 0.735335648059845, + "learning_rate": 0.00016498681049630448, + "loss": 2.608, + "step": 5569 + }, + { + "epoch": 0.4495198127673311, + "grad_norm": 0.6862306594848633, + "learning_rate": 0.00016497481090221346, + "loss": 2.5982, + "step": 5570 + }, + { + "epoch": 0.44960051650391414, + "grad_norm": 0.7213380336761475, + "learning_rate": 0.0001649628096887803, + "loss": 2.6457, + "step": 5571 + }, + { + "epoch": 0.4496812202404971, + "grad_norm": 0.7118985652923584, + "learning_rate": 0.0001649508068563041, + "loss": 2.6321, + "step": 5572 + }, + { + "epoch": 0.44976192397708015, + "grad_norm": 0.7663396596908569, + "learning_rate": 0.00016493880240508405, + "loss": 2.5865, + "step": 5573 + }, + { + "epoch": 0.4498426277136631, + "grad_norm": 0.6854543089866638, + "learning_rate": 0.00016492679633541926, + "loss": 2.6536, + "step": 5574 + }, + { + "epoch": 0.44992333145024616, + "grad_norm": 0.7071701884269714, + "learning_rate": 0.000164914788647609, + "loss": 2.6149, + "step": 5575 + }, + { + "epoch": 0.45000403518682913, + "grad_norm": 0.7610478401184082, + "learning_rate": 0.00016490277934195252, + "loss": 2.6326, + "step": 5576 + }, + { + "epoch": 0.45008473892341216, + "grad_norm": 0.7117596864700317, + "learning_rate": 0.0001648907684187491, + "loss": 2.6938, + "step": 5577 + }, + { + "epoch": 0.45016544265999514, + "grad_norm": 0.6980494856834412, + "learning_rate": 0.00016487875587829813, + "loss": 2.6798, + "step": 5578 + }, + { + "epoch": 0.4502461463965782, + "grad_norm": 0.7957972288131714, + "learning_rate": 0.00016486674172089898, + "loss": 2.6029, + "step": 5579 + }, + { + "epoch": 0.45032685013316115, + "grad_norm": 0.7258082032203674, + "learning_rate": 0.00016485472594685103, + "loss": 2.6785, + "step": 5580 + }, + { + "epoch": 0.4504075538697442, + "grad_norm": 0.7402041554450989, + "learning_rate": 0.0001648427085564538, + "loss": 2.6263, + "step": 5581 + }, + { + "epoch": 0.45048825760632716, + "grad_norm": 0.6943814158439636, + "learning_rate": 0.00016483068955000673, + "loss": 2.6761, + "step": 5582 + }, + { + "epoch": 0.4505689613429102, + "grad_norm": 0.8021644353866577, + "learning_rate": 0.00016481866892780947, + "loss": 2.6376, + "step": 5583 + }, + { + "epoch": 0.45064966507949317, + "grad_norm": 0.7748533487319946, + "learning_rate": 0.0001648066466901615, + "loss": 2.7465, + "step": 5584 + }, + { + "epoch": 0.4507303688160762, + "grad_norm": 0.7432222366333008, + "learning_rate": 0.00016479462283736248, + "loss": 2.6368, + "step": 5585 + }, + { + "epoch": 0.4508110725526592, + "grad_norm": 0.7835286259651184, + "learning_rate": 0.00016478259736971214, + "loss": 2.6449, + "step": 5586 + }, + { + "epoch": 0.4508917762892422, + "grad_norm": 0.7372995018959045, + "learning_rate": 0.00016477057028751007, + "loss": 2.6091, + "step": 5587 + }, + { + "epoch": 0.4509724800258252, + "grad_norm": 0.8230665326118469, + "learning_rate": 0.0001647585415910561, + "loss": 2.6345, + "step": 5588 + }, + { + "epoch": 0.4510531837624082, + "grad_norm": 0.7490825057029724, + "learning_rate": 0.00016474651128065002, + "loss": 2.5996, + "step": 5589 + }, + { + "epoch": 0.4511338874989912, + "grad_norm": 0.7950569987297058, + "learning_rate": 0.00016473447935659157, + "loss": 2.7109, + "step": 5590 + }, + { + "epoch": 0.4512145912355742, + "grad_norm": 0.7648342251777649, + "learning_rate": 0.00016472244581918074, + "loss": 2.6268, + "step": 5591 + }, + { + "epoch": 0.4512952949721572, + "grad_norm": 0.726828396320343, + "learning_rate": 0.00016471041066871733, + "loss": 2.5959, + "step": 5592 + }, + { + "epoch": 0.45137599870874023, + "grad_norm": 0.7855841517448425, + "learning_rate": 0.00016469837390550133, + "loss": 2.6671, + "step": 5593 + }, + { + "epoch": 0.4514567024453232, + "grad_norm": 0.6858882904052734, + "learning_rate": 0.00016468633552983275, + "loss": 2.6003, + "step": 5594 + }, + { + "epoch": 0.45153740618190624, + "grad_norm": 0.710926353931427, + "learning_rate": 0.0001646742955420116, + "loss": 2.6049, + "step": 5595 + }, + { + "epoch": 0.4516181099184892, + "grad_norm": 0.8359978199005127, + "learning_rate": 0.0001646622539423379, + "loss": 2.6636, + "step": 5596 + }, + { + "epoch": 0.45169881365507225, + "grad_norm": 0.7628041505813599, + "learning_rate": 0.00016465021073111186, + "loss": 2.6586, + "step": 5597 + }, + { + "epoch": 0.4517795173916552, + "grad_norm": 0.7723419666290283, + "learning_rate": 0.00016463816590863356, + "loss": 2.6213, + "step": 5598 + }, + { + "epoch": 0.45186022112823826, + "grad_norm": 0.7210986018180847, + "learning_rate": 0.0001646261194752032, + "loss": 2.6674, + "step": 5599 + }, + { + "epoch": 0.45194092486482124, + "grad_norm": 0.7665949463844299, + "learning_rate": 0.00016461407143112097, + "loss": 2.68, + "step": 5600 + }, + { + "epoch": 0.45202162860140427, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016460202177668722, + "loss": 2.6473, + "step": 5601 + }, + { + "epoch": 0.45210233233798724, + "grad_norm": 0.6831738948822021, + "learning_rate": 0.0001645899705122022, + "loss": 2.6863, + "step": 5602 + }, + { + "epoch": 0.4521830360745703, + "grad_norm": 0.7006321549415588, + "learning_rate": 0.00016457791763796627, + "loss": 2.6242, + "step": 5603 + }, + { + "epoch": 0.45226373981115325, + "grad_norm": 0.7245663404464722, + "learning_rate": 0.00016456586315427983, + "loss": 2.6201, + "step": 5604 + }, + { + "epoch": 0.4523444435477363, + "grad_norm": 0.7444287538528442, + "learning_rate": 0.00016455380706144332, + "loss": 2.6684, + "step": 5605 + }, + { + "epoch": 0.45242514728431926, + "grad_norm": 0.6562673449516296, + "learning_rate": 0.00016454174935975714, + "loss": 2.5912, + "step": 5606 + }, + { + "epoch": 0.4525058510209023, + "grad_norm": 0.6494336724281311, + "learning_rate": 0.0001645296900495219, + "loss": 2.6245, + "step": 5607 + }, + { + "epoch": 0.45258655475748527, + "grad_norm": 0.6968161463737488, + "learning_rate": 0.0001645176291310381, + "loss": 2.6494, + "step": 5608 + }, + { + "epoch": 0.4526672584940683, + "grad_norm": 0.7351142764091492, + "learning_rate": 0.00016450556660460632, + "loss": 2.574, + "step": 5609 + }, + { + "epoch": 0.4527479622306513, + "grad_norm": 0.7522323131561279, + "learning_rate": 0.0001644935024705272, + "loss": 2.6512, + "step": 5610 + }, + { + "epoch": 0.45282866596723426, + "grad_norm": 0.6744225025177002, + "learning_rate": 0.0001644814367291014, + "loss": 2.6288, + "step": 5611 + }, + { + "epoch": 0.4529093697038173, + "grad_norm": 0.6933234333992004, + "learning_rate": 0.00016446936938062967, + "loss": 2.6076, + "step": 5612 + }, + { + "epoch": 0.45299007344040026, + "grad_norm": 0.7101204991340637, + "learning_rate": 0.00016445730042541272, + "loss": 2.6322, + "step": 5613 + }, + { + "epoch": 0.4530707771769833, + "grad_norm": 0.7647581696510315, + "learning_rate": 0.00016444522986375134, + "loss": 2.7021, + "step": 5614 + }, + { + "epoch": 0.4531514809135663, + "grad_norm": 0.7028820514678955, + "learning_rate": 0.00016443315769594635, + "loss": 2.6171, + "step": 5615 + }, + { + "epoch": 0.4532321846501493, + "grad_norm": 0.6933851838111877, + "learning_rate": 0.00016442108392229868, + "loss": 2.6119, + "step": 5616 + }, + { + "epoch": 0.4533128883867323, + "grad_norm": 0.7218462824821472, + "learning_rate": 0.0001644090085431092, + "loss": 2.6661, + "step": 5617 + }, + { + "epoch": 0.4533935921233153, + "grad_norm": 0.7390525341033936, + "learning_rate": 0.00016439693155867883, + "loss": 2.7084, + "step": 5618 + }, + { + "epoch": 0.4534742958598983, + "grad_norm": 0.734136164188385, + "learning_rate": 0.0001643848529693086, + "loss": 2.6896, + "step": 5619 + }, + { + "epoch": 0.4535549995964813, + "grad_norm": 0.8082060813903809, + "learning_rate": 0.00016437277277529954, + "loss": 2.5828, + "step": 5620 + }, + { + "epoch": 0.4536357033330643, + "grad_norm": 0.695988655090332, + "learning_rate": 0.0001643606909769527, + "loss": 2.6383, + "step": 5621 + }, + { + "epoch": 0.45371640706964733, + "grad_norm": 0.7415786385536194, + "learning_rate": 0.00016434860757456922, + "loss": 2.6388, + "step": 5622 + }, + { + "epoch": 0.4537971108062303, + "grad_norm": 0.7378649115562439, + "learning_rate": 0.0001643365225684502, + "loss": 2.6534, + "step": 5623 + }, + { + "epoch": 0.45387781454281334, + "grad_norm": 0.7686129808425903, + "learning_rate": 0.0001643244359588969, + "loss": 2.6637, + "step": 5624 + }, + { + "epoch": 0.4539585182793963, + "grad_norm": 0.7305558323860168, + "learning_rate": 0.00016431234774621047, + "loss": 2.6525, + "step": 5625 + }, + { + "epoch": 0.45403922201597935, + "grad_norm": 0.7994235157966614, + "learning_rate": 0.00016430025793069225, + "loss": 2.6316, + "step": 5626 + }, + { + "epoch": 0.4541199257525623, + "grad_norm": 0.6945801377296448, + "learning_rate": 0.0001642881665126435, + "loss": 2.6367, + "step": 5627 + }, + { + "epoch": 0.45420062948914536, + "grad_norm": 0.6855447292327881, + "learning_rate": 0.00016427607349236558, + "loss": 2.6317, + "step": 5628 + }, + { + "epoch": 0.45428133322572833, + "grad_norm": 0.6961888670921326, + "learning_rate": 0.00016426397887015992, + "loss": 2.6477, + "step": 5629 + }, + { + "epoch": 0.45436203696231137, + "grad_norm": 0.7531994581222534, + "learning_rate": 0.0001642518826463279, + "loss": 2.7219, + "step": 5630 + }, + { + "epoch": 0.45444274069889434, + "grad_norm": 0.7442335486412048, + "learning_rate": 0.00016423978482117102, + "loss": 2.706, + "step": 5631 + }, + { + "epoch": 0.4545234444354774, + "grad_norm": 0.7075700759887695, + "learning_rate": 0.00016422768539499076, + "loss": 2.6481, + "step": 5632 + }, + { + "epoch": 0.45460414817206035, + "grad_norm": 0.7831876873970032, + "learning_rate": 0.0001642155843680887, + "loss": 2.616, + "step": 5633 + }, + { + "epoch": 0.4546848519086434, + "grad_norm": 0.7514604926109314, + "learning_rate": 0.00016420348174076642, + "loss": 2.6282, + "step": 5634 + }, + { + "epoch": 0.45476555564522636, + "grad_norm": 0.7136685252189636, + "learning_rate": 0.0001641913775133255, + "loss": 2.6764, + "step": 5635 + }, + { + "epoch": 0.4548462593818094, + "grad_norm": 0.7406740784645081, + "learning_rate": 0.00016417927168606771, + "loss": 2.6126, + "step": 5636 + }, + { + "epoch": 0.45492696311839237, + "grad_norm": 0.7257869839668274, + "learning_rate": 0.0001641671642592947, + "loss": 2.6035, + "step": 5637 + }, + { + "epoch": 0.4550076668549754, + "grad_norm": 0.8378798961639404, + "learning_rate": 0.00016415505523330822, + "loss": 2.6657, + "step": 5638 + }, + { + "epoch": 0.4550883705915584, + "grad_norm": 0.7218836545944214, + "learning_rate": 0.00016414294460841003, + "loss": 2.6209, + "step": 5639 + }, + { + "epoch": 0.4551690743281414, + "grad_norm": 0.7792766690254211, + "learning_rate": 0.00016413083238490204, + "loss": 2.7208, + "step": 5640 + }, + { + "epoch": 0.4552497780647244, + "grad_norm": 0.7800823450088501, + "learning_rate": 0.000164118718563086, + "loss": 2.6351, + "step": 5641 + }, + { + "epoch": 0.4553304818013074, + "grad_norm": 0.7593275904655457, + "learning_rate": 0.00016410660314326395, + "loss": 2.7025, + "step": 5642 + }, + { + "epoch": 0.4554111855378904, + "grad_norm": 0.7561587691307068, + "learning_rate": 0.00016409448612573772, + "loss": 2.6188, + "step": 5643 + }, + { + "epoch": 0.4554918892744734, + "grad_norm": 0.7674516439437866, + "learning_rate": 0.00016408236751080937, + "loss": 2.629, + "step": 5644 + }, + { + "epoch": 0.4555725930110564, + "grad_norm": 0.7112495303153992, + "learning_rate": 0.00016407024729878095, + "loss": 2.6261, + "step": 5645 + }, + { + "epoch": 0.45565329674763944, + "grad_norm": 0.6861695647239685, + "learning_rate": 0.00016405812548995444, + "loss": 2.6984, + "step": 5646 + }, + { + "epoch": 0.4557340004842224, + "grad_norm": 0.7711648941040039, + "learning_rate": 0.000164046002084632, + "loss": 2.6839, + "step": 5647 + }, + { + "epoch": 0.45581470422080544, + "grad_norm": 0.6862967014312744, + "learning_rate": 0.00016403387708311578, + "loss": 2.5964, + "step": 5648 + }, + { + "epoch": 0.4558954079573884, + "grad_norm": 0.707374632358551, + "learning_rate": 0.00016402175048570793, + "loss": 2.6191, + "step": 5649 + }, + { + "epoch": 0.45597611169397145, + "grad_norm": 0.7980892658233643, + "learning_rate": 0.00016400962229271072, + "loss": 2.6288, + "step": 5650 + }, + { + "epoch": 0.45605681543055443, + "grad_norm": 0.686187744140625, + "learning_rate": 0.0001639974925044264, + "loss": 2.6277, + "step": 5651 + }, + { + "epoch": 0.45613751916713746, + "grad_norm": 0.6970425844192505, + "learning_rate": 0.0001639853611211573, + "loss": 2.5726, + "step": 5652 + }, + { + "epoch": 0.45621822290372044, + "grad_norm": 0.701500415802002, + "learning_rate": 0.00016397322814320573, + "loss": 2.6275, + "step": 5653 + }, + { + "epoch": 0.45629892664030347, + "grad_norm": 0.8432207107543945, + "learning_rate": 0.00016396109357087407, + "loss": 2.6185, + "step": 5654 + }, + { + "epoch": 0.45637963037688645, + "grad_norm": 0.7049770951271057, + "learning_rate": 0.00016394895740446476, + "loss": 2.674, + "step": 5655 + }, + { + "epoch": 0.4564603341134695, + "grad_norm": 0.7068646550178528, + "learning_rate": 0.00016393681964428026, + "loss": 2.6072, + "step": 5656 + }, + { + "epoch": 0.45654103785005246, + "grad_norm": 0.7698760032653809, + "learning_rate": 0.00016392468029062312, + "loss": 2.6547, + "step": 5657 + }, + { + "epoch": 0.4566217415866355, + "grad_norm": 0.7381031513214111, + "learning_rate": 0.00016391253934379583, + "loss": 2.6125, + "step": 5658 + }, + { + "epoch": 0.45670244532321846, + "grad_norm": 0.7367781400680542, + "learning_rate": 0.00016390039680410097, + "loss": 2.6763, + "step": 5659 + }, + { + "epoch": 0.4567831490598015, + "grad_norm": 0.7416272759437561, + "learning_rate": 0.00016388825267184121, + "loss": 2.7059, + "step": 5660 + }, + { + "epoch": 0.4568638527963845, + "grad_norm": 0.6933416724205017, + "learning_rate": 0.0001638761069473192, + "loss": 2.6028, + "step": 5661 + }, + { + "epoch": 0.45694455653296745, + "grad_norm": 0.7311314940452576, + "learning_rate": 0.00016386395963083756, + "loss": 2.6266, + "step": 5662 + }, + { + "epoch": 0.4570252602695505, + "grad_norm": 0.7172734141349792, + "learning_rate": 0.00016385181072269917, + "loss": 2.6754, + "step": 5663 + }, + { + "epoch": 0.45710596400613346, + "grad_norm": 0.7286428213119507, + "learning_rate": 0.00016383966022320671, + "loss": 2.6637, + "step": 5664 + }, + { + "epoch": 0.4571866677427165, + "grad_norm": 0.7296474575996399, + "learning_rate": 0.00016382750813266308, + "loss": 2.6655, + "step": 5665 + }, + { + "epoch": 0.45726737147929947, + "grad_norm": 0.6929224133491516, + "learning_rate": 0.00016381535445137105, + "loss": 2.6376, + "step": 5666 + }, + { + "epoch": 0.4573480752158825, + "grad_norm": 0.7012765407562256, + "learning_rate": 0.0001638031991796336, + "loss": 2.6222, + "step": 5667 + }, + { + "epoch": 0.4574287789524655, + "grad_norm": 0.7360745668411255, + "learning_rate": 0.00016379104231775368, + "loss": 2.6304, + "step": 5668 + }, + { + "epoch": 0.4575094826890485, + "grad_norm": 0.7276801466941833, + "learning_rate": 0.00016377888386603419, + "loss": 2.7046, + "step": 5669 + }, + { + "epoch": 0.4575901864256315, + "grad_norm": 0.688432514667511, + "learning_rate": 0.0001637667238247782, + "loss": 2.6598, + "step": 5670 + }, + { + "epoch": 0.4576708901622145, + "grad_norm": 0.6874414682388306, + "learning_rate": 0.00016375456219428877, + "loss": 2.7, + "step": 5671 + }, + { + "epoch": 0.4577515938987975, + "grad_norm": 0.711091160774231, + "learning_rate": 0.000163742398974869, + "loss": 2.6063, + "step": 5672 + }, + { + "epoch": 0.4578322976353805, + "grad_norm": 0.7131791710853577, + "learning_rate": 0.000163730234166822, + "loss": 2.5948, + "step": 5673 + }, + { + "epoch": 0.4579130013719635, + "grad_norm": 0.7166630625724792, + "learning_rate": 0.000163718067770451, + "loss": 2.6488, + "step": 5674 + }, + { + "epoch": 0.45799370510854653, + "grad_norm": 0.7285952568054199, + "learning_rate": 0.00016370589978605916, + "loss": 2.6445, + "step": 5675 + }, + { + "epoch": 0.4580744088451295, + "grad_norm": 0.728050172328949, + "learning_rate": 0.0001636937302139498, + "loss": 2.5425, + "step": 5676 + }, + { + "epoch": 0.45815511258171254, + "grad_norm": 0.7196047902107239, + "learning_rate": 0.00016368155905442615, + "loss": 2.7426, + "step": 5677 + }, + { + "epoch": 0.4582358163182955, + "grad_norm": 0.6844602823257446, + "learning_rate": 0.0001636693863077916, + "loss": 2.6157, + "step": 5678 + }, + { + "epoch": 0.45831652005487855, + "grad_norm": 0.7375781536102295, + "learning_rate": 0.0001636572119743495, + "loss": 2.7069, + "step": 5679 + }, + { + "epoch": 0.4583972237914615, + "grad_norm": 0.7667750120162964, + "learning_rate": 0.0001636450360544033, + "loss": 2.6589, + "step": 5680 + }, + { + "epoch": 0.45847792752804456, + "grad_norm": 0.6569861173629761, + "learning_rate": 0.00016363285854825642, + "loss": 2.6197, + "step": 5681 + }, + { + "epoch": 0.45855863126462754, + "grad_norm": 0.7177335023880005, + "learning_rate": 0.00016362067945621239, + "loss": 2.6104, + "step": 5682 + }, + { + "epoch": 0.45863933500121057, + "grad_norm": 0.7260481715202332, + "learning_rate": 0.00016360849877857469, + "loss": 2.6435, + "step": 5683 + }, + { + "epoch": 0.45872003873779355, + "grad_norm": 0.7083989381790161, + "learning_rate": 0.00016359631651564693, + "loss": 2.6366, + "step": 5684 + }, + { + "epoch": 0.4588007424743766, + "grad_norm": 0.6417020559310913, + "learning_rate": 0.00016358413266773271, + "loss": 2.6311, + "step": 5685 + }, + { + "epoch": 0.45888144621095955, + "grad_norm": 0.737856924533844, + "learning_rate": 0.0001635719472351357, + "loss": 2.6647, + "step": 5686 + }, + { + "epoch": 0.4589621499475426, + "grad_norm": 0.6774190068244934, + "learning_rate": 0.0001635597602181596, + "loss": 2.6366, + "step": 5687 + }, + { + "epoch": 0.45904285368412556, + "grad_norm": 0.6480480432510376, + "learning_rate": 0.0001635475716171081, + "loss": 2.6501, + "step": 5688 + }, + { + "epoch": 0.4591235574207086, + "grad_norm": 0.7886860370635986, + "learning_rate": 0.0001635353814322851, + "loss": 2.7239, + "step": 5689 + }, + { + "epoch": 0.45920426115729157, + "grad_norm": 0.7579021453857422, + "learning_rate": 0.0001635231896639942, + "loss": 2.6155, + "step": 5690 + }, + { + "epoch": 0.4592849648938746, + "grad_norm": 0.6853809356689453, + "learning_rate": 0.0001635109963125394, + "loss": 2.5933, + "step": 5691 + }, + { + "epoch": 0.4593656686304576, + "grad_norm": 0.661342978477478, + "learning_rate": 0.00016349880137822456, + "loss": 2.6277, + "step": 5692 + }, + { + "epoch": 0.4594463723670406, + "grad_norm": 0.6795682311058044, + "learning_rate": 0.0001634866048613536, + "loss": 2.6221, + "step": 5693 + }, + { + "epoch": 0.4595270761036236, + "grad_norm": 0.7375383377075195, + "learning_rate": 0.00016347440676223047, + "loss": 2.6082, + "step": 5694 + }, + { + "epoch": 0.4596077798402066, + "grad_norm": 0.7565153241157532, + "learning_rate": 0.0001634622070811592, + "loss": 2.6615, + "step": 5695 + }, + { + "epoch": 0.4596884835767896, + "grad_norm": 0.6869745254516602, + "learning_rate": 0.00016345000581844386, + "loss": 2.6172, + "step": 5696 + }, + { + "epoch": 0.45976918731337263, + "grad_norm": 0.7192853689193726, + "learning_rate": 0.0001634378029743885, + "loss": 2.6324, + "step": 5697 + }, + { + "epoch": 0.4598498910499556, + "grad_norm": 0.6919218301773071, + "learning_rate": 0.00016342559854929726, + "loss": 2.5965, + "step": 5698 + }, + { + "epoch": 0.45993059478653864, + "grad_norm": 0.6715282797813416, + "learning_rate": 0.00016341339254347432, + "loss": 2.6225, + "step": 5699 + }, + { + "epoch": 0.4600112985231216, + "grad_norm": 0.6768380999565125, + "learning_rate": 0.00016340118495722388, + "loss": 2.6376, + "step": 5700 + }, + { + "epoch": 0.46009200225970465, + "grad_norm": 0.6898325681686401, + "learning_rate": 0.00016338897579085018, + "loss": 2.667, + "step": 5701 + }, + { + "epoch": 0.4601727059962876, + "grad_norm": 0.7171810865402222, + "learning_rate": 0.00016337676504465747, + "loss": 2.678, + "step": 5702 + }, + { + "epoch": 0.46025340973287066, + "grad_norm": 0.7050724029541016, + "learning_rate": 0.00016336455271895016, + "loss": 2.619, + "step": 5703 + }, + { + "epoch": 0.46033411346945363, + "grad_norm": 0.8287240862846375, + "learning_rate": 0.00016335233881403248, + "loss": 2.71, + "step": 5704 + }, + { + "epoch": 0.46041481720603666, + "grad_norm": 0.6880568861961365, + "learning_rate": 0.000163340123330209, + "loss": 2.6516, + "step": 5705 + }, + { + "epoch": 0.46049552094261964, + "grad_norm": 0.7222896218299866, + "learning_rate": 0.00016332790626778402, + "loss": 2.5899, + "step": 5706 + }, + { + "epoch": 0.4605762246792027, + "grad_norm": 0.7707448601722717, + "learning_rate": 0.00016331568762706207, + "loss": 2.6116, + "step": 5707 + }, + { + "epoch": 0.46065692841578565, + "grad_norm": 0.7780653834342957, + "learning_rate": 0.0001633034674083477, + "loss": 2.6072, + "step": 5708 + }, + { + "epoch": 0.4607376321523687, + "grad_norm": 0.7551524639129639, + "learning_rate": 0.00016329124561194545, + "loss": 2.548, + "step": 5709 + }, + { + "epoch": 0.46081833588895166, + "grad_norm": 0.9312284588813782, + "learning_rate": 0.0001632790222381599, + "loss": 2.6557, + "step": 5710 + }, + { + "epoch": 0.4608990396255347, + "grad_norm": 0.7404753565788269, + "learning_rate": 0.0001632667972872957, + "loss": 2.6889, + "step": 5711 + }, + { + "epoch": 0.46097974336211767, + "grad_norm": 0.7423726916313171, + "learning_rate": 0.00016325457075965752, + "loss": 2.6265, + "step": 5712 + }, + { + "epoch": 0.46106044709870064, + "grad_norm": 1.0683187246322632, + "learning_rate": 0.0001632423426555501, + "loss": 2.6827, + "step": 5713 + }, + { + "epoch": 0.4611411508352837, + "grad_norm": 0.7204160094261169, + "learning_rate": 0.0001632301129752782, + "loss": 2.702, + "step": 5714 + }, + { + "epoch": 0.46122185457186665, + "grad_norm": 0.7591153383255005, + "learning_rate": 0.0001632178817191466, + "loss": 2.6031, + "step": 5715 + }, + { + "epoch": 0.4613025583084497, + "grad_norm": 0.8147456645965576, + "learning_rate": 0.00016320564888746013, + "loss": 2.6117, + "step": 5716 + }, + { + "epoch": 0.46138326204503266, + "grad_norm": 0.7880246639251709, + "learning_rate": 0.00016319341448052364, + "loss": 2.5896, + "step": 5717 + }, + { + "epoch": 0.4614639657816157, + "grad_norm": 0.6875137686729431, + "learning_rate": 0.00016318117849864206, + "loss": 2.6258, + "step": 5718 + }, + { + "epoch": 0.46154466951819867, + "grad_norm": 0.7197960615158081, + "learning_rate": 0.00016316894094212044, + "loss": 2.6656, + "step": 5719 + }, + { + "epoch": 0.4616253732547817, + "grad_norm": 0.7049540281295776, + "learning_rate": 0.0001631567018112636, + "loss": 2.6698, + "step": 5720 + }, + { + "epoch": 0.4617060769913647, + "grad_norm": 0.7128825783729553, + "learning_rate": 0.00016314446110637668, + "loss": 2.6552, + "step": 5721 + }, + { + "epoch": 0.4617867807279477, + "grad_norm": 0.7956201434135437, + "learning_rate": 0.00016313221882776477, + "loss": 2.6747, + "step": 5722 + }, + { + "epoch": 0.4618674844645307, + "grad_norm": 0.7598347663879395, + "learning_rate": 0.0001631199749757329, + "loss": 2.6187, + "step": 5723 + }, + { + "epoch": 0.4619481882011137, + "grad_norm": 0.6587582230567932, + "learning_rate": 0.00016310772955058627, + "loss": 2.596, + "step": 5724 + }, + { + "epoch": 0.4620288919376967, + "grad_norm": 0.700136125087738, + "learning_rate": 0.00016309548255263003, + "loss": 2.6527, + "step": 5725 + }, + { + "epoch": 0.4621095956742797, + "grad_norm": 0.7246582508087158, + "learning_rate": 0.00016308323398216945, + "loss": 2.6577, + "step": 5726 + }, + { + "epoch": 0.4621902994108627, + "grad_norm": 0.6951557993888855, + "learning_rate": 0.00016307098383950977, + "loss": 2.5816, + "step": 5727 + }, + { + "epoch": 0.46227100314744574, + "grad_norm": 0.7109191417694092, + "learning_rate": 0.0001630587321249563, + "loss": 2.6586, + "step": 5728 + }, + { + "epoch": 0.4623517068840287, + "grad_norm": 0.7357863783836365, + "learning_rate": 0.0001630464788388144, + "loss": 2.691, + "step": 5729 + }, + { + "epoch": 0.46243241062061174, + "grad_norm": 0.7916350960731506, + "learning_rate": 0.00016303422398138945, + "loss": 2.6584, + "step": 5730 + }, + { + "epoch": 0.4625131143571947, + "grad_norm": 0.6543231010437012, + "learning_rate": 0.00016302196755298685, + "loss": 2.6482, + "step": 5731 + }, + { + "epoch": 0.46259381809377775, + "grad_norm": 0.6978787183761597, + "learning_rate": 0.00016300970955391208, + "loss": 2.5956, + "step": 5732 + }, + { + "epoch": 0.46267452183036073, + "grad_norm": 0.7301886677742004, + "learning_rate": 0.00016299744998447065, + "loss": 2.6178, + "step": 5733 + }, + { + "epoch": 0.46275522556694376, + "grad_norm": 0.7381030321121216, + "learning_rate": 0.00016298518884496808, + "loss": 2.6712, + "step": 5734 + }, + { + "epoch": 0.46283592930352674, + "grad_norm": 0.7769027948379517, + "learning_rate": 0.00016297292613570995, + "loss": 2.6082, + "step": 5735 + }, + { + "epoch": 0.46291663304010977, + "grad_norm": 0.7698354721069336, + "learning_rate": 0.0001629606618570019, + "loss": 2.6543, + "step": 5736 + }, + { + "epoch": 0.46299733677669275, + "grad_norm": 0.7001554369926453, + "learning_rate": 0.00016294839600914957, + "loss": 2.6174, + "step": 5737 + }, + { + "epoch": 0.4630780405132758, + "grad_norm": 0.7589300274848938, + "learning_rate": 0.00016293612859245868, + "loss": 2.6338, + "step": 5738 + }, + { + "epoch": 0.46315874424985876, + "grad_norm": 0.7083945274353027, + "learning_rate": 0.00016292385960723493, + "loss": 2.6793, + "step": 5739 + }, + { + "epoch": 0.4632394479864418, + "grad_norm": 0.739439845085144, + "learning_rate": 0.00016291158905378412, + "loss": 2.7335, + "step": 5740 + }, + { + "epoch": 0.46332015172302476, + "grad_norm": 0.6868166923522949, + "learning_rate": 0.00016289931693241205, + "loss": 2.6139, + "step": 5741 + }, + { + "epoch": 0.4634008554596078, + "grad_norm": 0.7385871410369873, + "learning_rate": 0.0001628870432434246, + "loss": 2.6783, + "step": 5742 + }, + { + "epoch": 0.4634815591961908, + "grad_norm": 0.7227835655212402, + "learning_rate": 0.00016287476798712764, + "loss": 2.6732, + "step": 5743 + }, + { + "epoch": 0.4635622629327738, + "grad_norm": 0.6662411689758301, + "learning_rate": 0.00016286249116382709, + "loss": 2.6645, + "step": 5744 + }, + { + "epoch": 0.4636429666693568, + "grad_norm": 0.8110263347625732, + "learning_rate": 0.00016285021277382894, + "loss": 2.6448, + "step": 5745 + }, + { + "epoch": 0.4637236704059398, + "grad_norm": 0.7419269680976868, + "learning_rate": 0.0001628379328174392, + "loss": 2.7286, + "step": 5746 + }, + { + "epoch": 0.4638043741425228, + "grad_norm": 0.6518125534057617, + "learning_rate": 0.0001628256512949639, + "loss": 2.6545, + "step": 5747 + }, + { + "epoch": 0.4638850778791058, + "grad_norm": 0.6816060543060303, + "learning_rate": 0.00016281336820670917, + "loss": 2.6167, + "step": 5748 + }, + { + "epoch": 0.4639657816156888, + "grad_norm": 0.6537362337112427, + "learning_rate": 0.0001628010835529811, + "loss": 2.6522, + "step": 5749 + }, + { + "epoch": 0.46404648535227183, + "grad_norm": 0.6720992922782898, + "learning_rate": 0.00016278879733408585, + "loss": 2.6028, + "step": 5750 + }, + { + "epoch": 0.4641271890888548, + "grad_norm": 0.6778908371925354, + "learning_rate": 0.00016277650955032967, + "loss": 2.5591, + "step": 5751 + }, + { + "epoch": 0.46420789282543784, + "grad_norm": 0.6908471584320068, + "learning_rate": 0.0001627642202020187, + "loss": 2.6574, + "step": 5752 + }, + { + "epoch": 0.4642885965620208, + "grad_norm": 0.7034298181533813, + "learning_rate": 0.00016275192928945936, + "loss": 2.657, + "step": 5753 + }, + { + "epoch": 0.46436930029860385, + "grad_norm": 0.7245952486991882, + "learning_rate": 0.0001627396368129579, + "loss": 2.6572, + "step": 5754 + }, + { + "epoch": 0.4644500040351868, + "grad_norm": 0.6764482855796814, + "learning_rate": 0.0001627273427728207, + "loss": 2.6576, + "step": 5755 + }, + { + "epoch": 0.46453070777176986, + "grad_norm": 0.7074379920959473, + "learning_rate": 0.0001627150471693541, + "loss": 2.614, + "step": 5756 + }, + { + "epoch": 0.46461141150835283, + "grad_norm": 0.7292052507400513, + "learning_rate": 0.0001627027500028646, + "loss": 2.673, + "step": 5757 + }, + { + "epoch": 0.46469211524493587, + "grad_norm": 0.7554025650024414, + "learning_rate": 0.0001626904512736587, + "loss": 2.5919, + "step": 5758 + }, + { + "epoch": 0.46477281898151884, + "grad_norm": 0.6829606890678406, + "learning_rate": 0.00016267815098204284, + "loss": 2.7206, + "step": 5759 + }, + { + "epoch": 0.4648535227181019, + "grad_norm": 0.7201548218727112, + "learning_rate": 0.00016266584912832363, + "loss": 2.6651, + "step": 5760 + }, + { + "epoch": 0.46493422645468485, + "grad_norm": 0.6889227628707886, + "learning_rate": 0.00016265354571280764, + "loss": 2.6776, + "step": 5761 + }, + { + "epoch": 0.4650149301912679, + "grad_norm": 0.7286190986633301, + "learning_rate": 0.00016264124073580156, + "loss": 2.591, + "step": 5762 + }, + { + "epoch": 0.46509563392785086, + "grad_norm": 0.7222036123275757, + "learning_rate": 0.00016262893419761196, + "loss": 2.6422, + "step": 5763 + }, + { + "epoch": 0.46517633766443384, + "grad_norm": 0.6822768449783325, + "learning_rate": 0.00016261662609854562, + "loss": 2.6126, + "step": 5764 + }, + { + "epoch": 0.46525704140101687, + "grad_norm": 0.7263356447219849, + "learning_rate": 0.00016260431643890929, + "loss": 2.6304, + "step": 5765 + }, + { + "epoch": 0.46533774513759985, + "grad_norm": 0.7152180075645447, + "learning_rate": 0.00016259200521900972, + "loss": 2.6489, + "step": 5766 + }, + { + "epoch": 0.4654184488741829, + "grad_norm": 0.6988116502761841, + "learning_rate": 0.00016257969243915378, + "loss": 2.6151, + "step": 5767 + }, + { + "epoch": 0.46549915261076585, + "grad_norm": 0.7131790518760681, + "learning_rate": 0.00016256737809964831, + "loss": 2.6284, + "step": 5768 + }, + { + "epoch": 0.4655798563473489, + "grad_norm": 0.674196183681488, + "learning_rate": 0.00016255506220080025, + "loss": 2.5815, + "step": 5769 + }, + { + "epoch": 0.46566056008393186, + "grad_norm": 0.7166198492050171, + "learning_rate": 0.0001625427447429165, + "loss": 2.6594, + "step": 5770 + }, + { + "epoch": 0.4657412638205149, + "grad_norm": 0.6997127532958984, + "learning_rate": 0.00016253042572630407, + "loss": 2.6502, + "step": 5771 + }, + { + "epoch": 0.46582196755709787, + "grad_norm": 0.7761591076850891, + "learning_rate": 0.00016251810515126994, + "loss": 2.624, + "step": 5772 + }, + { + "epoch": 0.4659026712936809, + "grad_norm": 0.7038728594779968, + "learning_rate": 0.00016250578301812125, + "loss": 2.6096, + "step": 5773 + }, + { + "epoch": 0.4659833750302639, + "grad_norm": 0.7080080509185791, + "learning_rate": 0.00016249345932716505, + "loss": 2.6196, + "step": 5774 + }, + { + "epoch": 0.4660640787668469, + "grad_norm": 0.7461444735527039, + "learning_rate": 0.00016248113407870847, + "loss": 2.65, + "step": 5775 + }, + { + "epoch": 0.4661447825034299, + "grad_norm": 0.7914463877677917, + "learning_rate": 0.00016246880727305868, + "loss": 2.6539, + "step": 5776 + }, + { + "epoch": 0.4662254862400129, + "grad_norm": 0.7067776918411255, + "learning_rate": 0.00016245647891052295, + "loss": 2.72, + "step": 5777 + }, + { + "epoch": 0.4663061899765959, + "grad_norm": 0.7190818190574646, + "learning_rate": 0.00016244414899140852, + "loss": 2.7029, + "step": 5778 + }, + { + "epoch": 0.46638689371317893, + "grad_norm": 0.6740003824234009, + "learning_rate": 0.00016243181751602261, + "loss": 2.6404, + "step": 5779 + }, + { + "epoch": 0.4664675974497619, + "grad_norm": 0.7942661643028259, + "learning_rate": 0.00016241948448467267, + "loss": 2.6333, + "step": 5780 + }, + { + "epoch": 0.46654830118634494, + "grad_norm": 0.6415690183639526, + "learning_rate": 0.00016240714989766597, + "loss": 2.6354, + "step": 5781 + }, + { + "epoch": 0.4666290049229279, + "grad_norm": 0.7287769913673401, + "learning_rate": 0.00016239481375530997, + "loss": 2.6721, + "step": 5782 + }, + { + "epoch": 0.46670970865951095, + "grad_norm": 0.8197699189186096, + "learning_rate": 0.00016238247605791212, + "loss": 2.7577, + "step": 5783 + }, + { + "epoch": 0.4667904123960939, + "grad_norm": 0.8182012438774109, + "learning_rate": 0.0001623701368057799, + "loss": 2.6475, + "step": 5784 + }, + { + "epoch": 0.46687111613267696, + "grad_norm": 0.6974665522575378, + "learning_rate": 0.00016235779599922082, + "loss": 2.5897, + "step": 5785 + }, + { + "epoch": 0.46695181986925993, + "grad_norm": 0.7156379222869873, + "learning_rate": 0.00016234545363854247, + "loss": 2.5981, + "step": 5786 + }, + { + "epoch": 0.46703252360584296, + "grad_norm": 0.6875364780426025, + "learning_rate": 0.0001623331097240524, + "loss": 2.6333, + "step": 5787 + }, + { + "epoch": 0.46711322734242594, + "grad_norm": 0.7222917675971985, + "learning_rate": 0.00016232076425605835, + "loss": 2.5865, + "step": 5788 + }, + { + "epoch": 0.467193931079009, + "grad_norm": 0.7224915027618408, + "learning_rate": 0.00016230841723486792, + "loss": 2.667, + "step": 5789 + }, + { + "epoch": 0.46727463481559195, + "grad_norm": 0.7125402688980103, + "learning_rate": 0.00016229606866078887, + "loss": 2.6548, + "step": 5790 + }, + { + "epoch": 0.467355338552175, + "grad_norm": 0.6866132616996765, + "learning_rate": 0.00016228371853412894, + "loss": 2.6381, + "step": 5791 + }, + { + "epoch": 0.46743604228875796, + "grad_norm": 0.7573552131652832, + "learning_rate": 0.00016227136685519593, + "loss": 2.6766, + "step": 5792 + }, + { + "epoch": 0.467516746025341, + "grad_norm": 0.7565932273864746, + "learning_rate": 0.00016225901362429767, + "loss": 2.5965, + "step": 5793 + }, + { + "epoch": 0.46759744976192397, + "grad_norm": 0.7279250621795654, + "learning_rate": 0.00016224665884174207, + "loss": 2.6599, + "step": 5794 + }, + { + "epoch": 0.467678153498507, + "grad_norm": 0.7501276731491089, + "learning_rate": 0.000162234302507837, + "loss": 2.636, + "step": 5795 + }, + { + "epoch": 0.46775885723509, + "grad_norm": 0.7823930978775024, + "learning_rate": 0.00016222194462289042, + "loss": 2.6277, + "step": 5796 + }, + { + "epoch": 0.467839560971673, + "grad_norm": 0.7168415784835815, + "learning_rate": 0.00016220958518721034, + "loss": 2.6868, + "step": 5797 + }, + { + "epoch": 0.467920264708256, + "grad_norm": 0.7468454241752625, + "learning_rate": 0.00016219722420110478, + "loss": 2.7209, + "step": 5798 + }, + { + "epoch": 0.468000968444839, + "grad_norm": 0.6915228962898254, + "learning_rate": 0.0001621848616648818, + "loss": 2.6356, + "step": 5799 + }, + { + "epoch": 0.468081672181422, + "grad_norm": 0.7731573581695557, + "learning_rate": 0.00016217249757884955, + "loss": 2.6396, + "step": 5800 + }, + { + "epoch": 0.468162375918005, + "grad_norm": 0.6579388380050659, + "learning_rate": 0.0001621601319433161, + "loss": 2.6077, + "step": 5801 + }, + { + "epoch": 0.468243079654588, + "grad_norm": 0.7136246562004089, + "learning_rate": 0.00016214776475858967, + "loss": 2.6602, + "step": 5802 + }, + { + "epoch": 0.46832378339117103, + "grad_norm": 0.6929461359977722, + "learning_rate": 0.0001621353960249785, + "loss": 2.6851, + "step": 5803 + }, + { + "epoch": 0.468404487127754, + "grad_norm": 0.8001779913902283, + "learning_rate": 0.00016212302574279087, + "loss": 2.6577, + "step": 5804 + }, + { + "epoch": 0.46848519086433704, + "grad_norm": 0.7637671828269958, + "learning_rate": 0.00016211065391233498, + "loss": 2.6923, + "step": 5805 + }, + { + "epoch": 0.46856589460092, + "grad_norm": 0.6879906058311462, + "learning_rate": 0.0001620982805339193, + "loss": 2.6555, + "step": 5806 + }, + { + "epoch": 0.46864659833750305, + "grad_norm": 0.7731223702430725, + "learning_rate": 0.0001620859056078521, + "loss": 2.6301, + "step": 5807 + }, + { + "epoch": 0.468727302074086, + "grad_norm": 0.7351491451263428, + "learning_rate": 0.00016207352913444185, + "loss": 2.6154, + "step": 5808 + }, + { + "epoch": 0.46880800581066906, + "grad_norm": 0.716314435005188, + "learning_rate": 0.000162061151113997, + "loss": 2.6294, + "step": 5809 + }, + { + "epoch": 0.46888870954725204, + "grad_norm": 0.6974702477455139, + "learning_rate": 0.00016204877154682605, + "loss": 2.6046, + "step": 5810 + }, + { + "epoch": 0.46896941328383507, + "grad_norm": 0.7456035614013672, + "learning_rate": 0.00016203639043323745, + "loss": 2.6308, + "step": 5811 + }, + { + "epoch": 0.46905011702041804, + "grad_norm": 0.7198047637939453, + "learning_rate": 0.0001620240077735399, + "loss": 2.6303, + "step": 5812 + }, + { + "epoch": 0.4691308207570011, + "grad_norm": 0.7098269462585449, + "learning_rate": 0.00016201162356804192, + "loss": 2.6352, + "step": 5813 + }, + { + "epoch": 0.46921152449358405, + "grad_norm": 0.7060410976409912, + "learning_rate": 0.0001619992378170522, + "loss": 2.6489, + "step": 5814 + }, + { + "epoch": 0.46929222823016703, + "grad_norm": 0.7126092314720154, + "learning_rate": 0.0001619868505208794, + "loss": 2.66, + "step": 5815 + }, + { + "epoch": 0.46937293196675006, + "grad_norm": 0.7391123175621033, + "learning_rate": 0.00016197446167983223, + "loss": 2.6066, + "step": 5816 + }, + { + "epoch": 0.46945363570333304, + "grad_norm": 0.7282211780548096, + "learning_rate": 0.0001619620712942195, + "loss": 2.6422, + "step": 5817 + }, + { + "epoch": 0.46953433943991607, + "grad_norm": 0.7581801414489746, + "learning_rate": 0.00016194967936434998, + "loss": 2.702, + "step": 5818 + }, + { + "epoch": 0.46961504317649905, + "grad_norm": 0.6649011373519897, + "learning_rate": 0.00016193728589053248, + "loss": 2.6235, + "step": 5819 + }, + { + "epoch": 0.4696957469130821, + "grad_norm": 0.720312237739563, + "learning_rate": 0.00016192489087307592, + "loss": 2.5961, + "step": 5820 + }, + { + "epoch": 0.46977645064966506, + "grad_norm": 0.72076016664505, + "learning_rate": 0.0001619124943122892, + "loss": 2.6793, + "step": 5821 + }, + { + "epoch": 0.4698571543862481, + "grad_norm": 0.6695740818977356, + "learning_rate": 0.0001619000962084813, + "loss": 2.6325, + "step": 5822 + }, + { + "epoch": 0.46993785812283106, + "grad_norm": 0.7678804993629456, + "learning_rate": 0.0001618876965619612, + "loss": 2.7473, + "step": 5823 + }, + { + "epoch": 0.4700185618594141, + "grad_norm": 0.782349169254303, + "learning_rate": 0.00016187529537303792, + "loss": 2.6139, + "step": 5824 + }, + { + "epoch": 0.4700992655959971, + "grad_norm": 0.6906631588935852, + "learning_rate": 0.00016186289264202052, + "loss": 2.6529, + "step": 5825 + }, + { + "epoch": 0.4701799693325801, + "grad_norm": 0.732947051525116, + "learning_rate": 0.00016185048836921814, + "loss": 2.6416, + "step": 5826 + }, + { + "epoch": 0.4702606730691631, + "grad_norm": 0.8306718468666077, + "learning_rate": 0.0001618380825549399, + "loss": 2.6566, + "step": 5827 + }, + { + "epoch": 0.4703413768057461, + "grad_norm": 0.725764811038971, + "learning_rate": 0.00016182567519949502, + "loss": 2.6664, + "step": 5828 + }, + { + "epoch": 0.4704220805423291, + "grad_norm": 0.7301872372627258, + "learning_rate": 0.00016181326630319268, + "loss": 2.6666, + "step": 5829 + }, + { + "epoch": 0.4705027842789121, + "grad_norm": 0.7297122478485107, + "learning_rate": 0.00016180085586634216, + "loss": 2.6415, + "step": 5830 + }, + { + "epoch": 0.4705834880154951, + "grad_norm": 0.7445664405822754, + "learning_rate": 0.00016178844388925278, + "loss": 2.6112, + "step": 5831 + }, + { + "epoch": 0.47066419175207813, + "grad_norm": 0.7787267565727234, + "learning_rate": 0.00016177603037223384, + "loss": 2.6452, + "step": 5832 + }, + { + "epoch": 0.4707448954886611, + "grad_norm": 0.7386903762817383, + "learning_rate": 0.00016176361531559474, + "loss": 2.6919, + "step": 5833 + }, + { + "epoch": 0.47082559922524414, + "grad_norm": 0.7991776466369629, + "learning_rate": 0.0001617511987196449, + "loss": 2.6728, + "step": 5834 + }, + { + "epoch": 0.4709063029618271, + "grad_norm": 0.7196263670921326, + "learning_rate": 0.00016173878058469375, + "loss": 2.6008, + "step": 5835 + }, + { + "epoch": 0.47098700669841015, + "grad_norm": 0.6773477792739868, + "learning_rate": 0.00016172636091105086, + "loss": 2.6184, + "step": 5836 + }, + { + "epoch": 0.4710677104349931, + "grad_norm": 0.7238345742225647, + "learning_rate": 0.00016171393969902567, + "loss": 2.6221, + "step": 5837 + }, + { + "epoch": 0.47114841417157616, + "grad_norm": 0.702104926109314, + "learning_rate": 0.00016170151694892777, + "loss": 2.5909, + "step": 5838 + }, + { + "epoch": 0.47122911790815913, + "grad_norm": 0.7571590542793274, + "learning_rate": 0.00016168909266106677, + "loss": 2.6044, + "step": 5839 + }, + { + "epoch": 0.47130982164474217, + "grad_norm": 0.7408227324485779, + "learning_rate": 0.00016167666683575234, + "loss": 2.5771, + "step": 5840 + }, + { + "epoch": 0.47139052538132514, + "grad_norm": 0.6760764122009277, + "learning_rate": 0.00016166423947329414, + "loss": 2.6202, + "step": 5841 + }, + { + "epoch": 0.4714712291179082, + "grad_norm": 0.7085632681846619, + "learning_rate": 0.00016165181057400192, + "loss": 2.5887, + "step": 5842 + }, + { + "epoch": 0.47155193285449115, + "grad_norm": 0.7298943400382996, + "learning_rate": 0.00016163938013818538, + "loss": 2.609, + "step": 5843 + }, + { + "epoch": 0.4716326365910742, + "grad_norm": 0.7591157555580139, + "learning_rate": 0.0001616269481661544, + "loss": 2.6582, + "step": 5844 + }, + { + "epoch": 0.47171334032765716, + "grad_norm": 0.6727088093757629, + "learning_rate": 0.00016161451465821877, + "loss": 2.6289, + "step": 5845 + }, + { + "epoch": 0.4717940440642402, + "grad_norm": 0.6782706379890442, + "learning_rate": 0.00016160207961468835, + "loss": 2.6875, + "step": 5846 + }, + { + "epoch": 0.47187474780082317, + "grad_norm": 0.6839444041252136, + "learning_rate": 0.00016158964303587313, + "loss": 2.5687, + "step": 5847 + }, + { + "epoch": 0.4719554515374062, + "grad_norm": 0.7565997838973999, + "learning_rate": 0.00016157720492208295, + "loss": 2.6855, + "step": 5848 + }, + { + "epoch": 0.4720361552739892, + "grad_norm": 0.7286611199378967, + "learning_rate": 0.0001615647652736279, + "loss": 2.5906, + "step": 5849 + }, + { + "epoch": 0.4721168590105722, + "grad_norm": 0.7503396272659302, + "learning_rate": 0.00016155232409081793, + "loss": 2.6419, + "step": 5850 + }, + { + "epoch": 0.4721975627471552, + "grad_norm": 0.6924198865890503, + "learning_rate": 0.00016153988137396317, + "loss": 2.661, + "step": 5851 + }, + { + "epoch": 0.4722782664837382, + "grad_norm": 0.7731672525405884, + "learning_rate": 0.0001615274371233737, + "loss": 2.6993, + "step": 5852 + }, + { + "epoch": 0.4723589702203212, + "grad_norm": 0.7422799468040466, + "learning_rate": 0.00016151499133935964, + "loss": 2.6134, + "step": 5853 + }, + { + "epoch": 0.4724396739569042, + "grad_norm": 0.6924546957015991, + "learning_rate": 0.0001615025440222312, + "loss": 2.672, + "step": 5854 + }, + { + "epoch": 0.4725203776934872, + "grad_norm": 0.7205976843833923, + "learning_rate": 0.00016149009517229862, + "loss": 2.6722, + "step": 5855 + }, + { + "epoch": 0.47260108143007024, + "grad_norm": 0.6898519992828369, + "learning_rate": 0.0001614776447898721, + "loss": 2.6474, + "step": 5856 + }, + { + "epoch": 0.4726817851666532, + "grad_norm": 0.7512481212615967, + "learning_rate": 0.00016146519287526197, + "loss": 2.7413, + "step": 5857 + }, + { + "epoch": 0.47276248890323624, + "grad_norm": 0.6734220385551453, + "learning_rate": 0.0001614527394287786, + "loss": 2.6114, + "step": 5858 + }, + { + "epoch": 0.4728431926398192, + "grad_norm": 0.6745339632034302, + "learning_rate": 0.00016144028445073228, + "loss": 2.6039, + "step": 5859 + }, + { + "epoch": 0.47292389637640225, + "grad_norm": 0.7463086843490601, + "learning_rate": 0.0001614278279414335, + "loss": 2.6109, + "step": 5860 + }, + { + "epoch": 0.47300460011298523, + "grad_norm": 0.7203261256217957, + "learning_rate": 0.00016141536990119264, + "loss": 2.651, + "step": 5861 + }, + { + "epoch": 0.47308530384956826, + "grad_norm": 0.7718746066093445, + "learning_rate": 0.00016140291033032024, + "loss": 2.6953, + "step": 5862 + }, + { + "epoch": 0.47316600758615124, + "grad_norm": 0.7854858040809631, + "learning_rate": 0.0001613904492291268, + "loss": 2.5941, + "step": 5863 + }, + { + "epoch": 0.47324671132273427, + "grad_norm": 0.7218664288520813, + "learning_rate": 0.0001613779865979229, + "loss": 2.6447, + "step": 5864 + }, + { + "epoch": 0.47332741505931725, + "grad_norm": 0.7479045987129211, + "learning_rate": 0.0001613655224370191, + "loss": 2.6662, + "step": 5865 + }, + { + "epoch": 0.4734081187959002, + "grad_norm": 0.7335021495819092, + "learning_rate": 0.00016135305674672612, + "loss": 2.6283, + "step": 5866 + }, + { + "epoch": 0.47348882253248326, + "grad_norm": 0.7650331258773804, + "learning_rate": 0.00016134058952735453, + "loss": 2.7168, + "step": 5867 + }, + { + "epoch": 0.47356952626906623, + "grad_norm": 0.733383297920227, + "learning_rate": 0.00016132812077921513, + "loss": 2.6352, + "step": 5868 + }, + { + "epoch": 0.47365023000564926, + "grad_norm": 1.3944146633148193, + "learning_rate": 0.00016131565050261866, + "loss": 2.7518, + "step": 5869 + }, + { + "epoch": 0.47373093374223224, + "grad_norm": 0.746112585067749, + "learning_rate": 0.0001613031786978759, + "loss": 2.6253, + "step": 5870 + }, + { + "epoch": 0.4738116374788153, + "grad_norm": 0.9859737753868103, + "learning_rate": 0.00016129070536529766, + "loss": 2.6682, + "step": 5871 + }, + { + "epoch": 0.47389234121539825, + "grad_norm": 0.7358877062797546, + "learning_rate": 0.00016127823050519484, + "loss": 2.6712, + "step": 5872 + }, + { + "epoch": 0.4739730449519813, + "grad_norm": 0.7379923462867737, + "learning_rate": 0.0001612657541178783, + "loss": 2.6268, + "step": 5873 + }, + { + "epoch": 0.47405374868856426, + "grad_norm": 0.7671005725860596, + "learning_rate": 0.00016125327620365907, + "loss": 2.6127, + "step": 5874 + }, + { + "epoch": 0.4741344524251473, + "grad_norm": 0.8007156252861023, + "learning_rate": 0.00016124079676284805, + "loss": 2.6173, + "step": 5875 + }, + { + "epoch": 0.47421515616173027, + "grad_norm": 0.7930500507354736, + "learning_rate": 0.00016122831579575627, + "loss": 2.589, + "step": 5876 + }, + { + "epoch": 0.4742958598983133, + "grad_norm": 0.788006603717804, + "learning_rate": 0.00016121583330269484, + "loss": 2.6731, + "step": 5877 + }, + { + "epoch": 0.4743765636348963, + "grad_norm": 0.742148220539093, + "learning_rate": 0.00016120334928397483, + "loss": 2.674, + "step": 5878 + }, + { + "epoch": 0.4744572673714793, + "grad_norm": 0.6823038458824158, + "learning_rate": 0.00016119086373990736, + "loss": 2.6153, + "step": 5879 + }, + { + "epoch": 0.4745379711080623, + "grad_norm": 0.7542331218719482, + "learning_rate": 0.00016117837667080356, + "loss": 2.6739, + "step": 5880 + }, + { + "epoch": 0.4746186748446453, + "grad_norm": 0.8163543343544006, + "learning_rate": 0.00016116588807697476, + "loss": 2.6558, + "step": 5881 + }, + { + "epoch": 0.4746993785812283, + "grad_norm": 0.7528213858604431, + "learning_rate": 0.0001611533979587321, + "loss": 2.6243, + "step": 5882 + }, + { + "epoch": 0.4747800823178113, + "grad_norm": 0.7476626038551331, + "learning_rate": 0.00016114090631638695, + "loss": 2.5984, + "step": 5883 + }, + { + "epoch": 0.4748607860543943, + "grad_norm": 0.7436621785163879, + "learning_rate": 0.00016112841315025055, + "loss": 2.6118, + "step": 5884 + }, + { + "epoch": 0.47494148979097733, + "grad_norm": 0.8024004101753235, + "learning_rate": 0.0001611159184606343, + "loss": 2.6926, + "step": 5885 + }, + { + "epoch": 0.4750221935275603, + "grad_norm": 0.7475626468658447, + "learning_rate": 0.00016110342224784962, + "loss": 2.6175, + "step": 5886 + }, + { + "epoch": 0.47510289726414334, + "grad_norm": 0.7900637984275818, + "learning_rate": 0.00016109092451220796, + "loss": 2.6503, + "step": 5887 + }, + { + "epoch": 0.4751836010007263, + "grad_norm": 0.6988356113433838, + "learning_rate": 0.00016107842525402074, + "loss": 2.6494, + "step": 5888 + }, + { + "epoch": 0.47526430473730935, + "grad_norm": 1.0214186906814575, + "learning_rate": 0.00016106592447359948, + "loss": 2.6476, + "step": 5889 + }, + { + "epoch": 0.4753450084738923, + "grad_norm": 0.741527795791626, + "learning_rate": 0.00016105342217125578, + "loss": 2.6054, + "step": 5890 + }, + { + "epoch": 0.47542571221047536, + "grad_norm": 0.7196603417396545, + "learning_rate": 0.0001610409183473012, + "loss": 2.6146, + "step": 5891 + }, + { + "epoch": 0.47550641594705834, + "grad_norm": 0.8130923509597778, + "learning_rate": 0.00016102841300204737, + "loss": 2.6505, + "step": 5892 + }, + { + "epoch": 0.47558711968364137, + "grad_norm": 0.7929537892341614, + "learning_rate": 0.00016101590613580596, + "loss": 2.6725, + "step": 5893 + }, + { + "epoch": 0.47566782342022434, + "grad_norm": 0.7149303555488586, + "learning_rate": 0.00016100339774888865, + "loss": 2.6272, + "step": 5894 + }, + { + "epoch": 0.4757485271568074, + "grad_norm": 0.7242792248725891, + "learning_rate": 0.00016099088784160724, + "loss": 2.5948, + "step": 5895 + }, + { + "epoch": 0.47582923089339035, + "grad_norm": 0.7571540474891663, + "learning_rate": 0.00016097837641427346, + "loss": 2.689, + "step": 5896 + }, + { + "epoch": 0.4759099346299734, + "grad_norm": 0.7402021288871765, + "learning_rate": 0.00016096586346719916, + "loss": 2.7035, + "step": 5897 + }, + { + "epoch": 0.47599063836655636, + "grad_norm": 0.7195574045181274, + "learning_rate": 0.00016095334900069613, + "loss": 2.5862, + "step": 5898 + }, + { + "epoch": 0.4760713421031394, + "grad_norm": 0.7677412033081055, + "learning_rate": 0.00016094083301507634, + "loss": 2.6715, + "step": 5899 + }, + { + "epoch": 0.47615204583972237, + "grad_norm": 0.7131708860397339, + "learning_rate": 0.0001609283155106517, + "loss": 2.6555, + "step": 5900 + }, + { + "epoch": 0.4762327495763054, + "grad_norm": 0.6774055361747742, + "learning_rate": 0.00016091579648773414, + "loss": 2.621, + "step": 5901 + }, + { + "epoch": 0.4763134533128884, + "grad_norm": 0.6873257160186768, + "learning_rate": 0.00016090327594663571, + "loss": 2.6719, + "step": 5902 + }, + { + "epoch": 0.4763941570494714, + "grad_norm": 0.8004229068756104, + "learning_rate": 0.00016089075388766845, + "loss": 2.6926, + "step": 5903 + }, + { + "epoch": 0.4764748607860544, + "grad_norm": 0.7196173667907715, + "learning_rate": 0.00016087823031114438, + "loss": 2.6032, + "step": 5904 + }, + { + "epoch": 0.4765555645226374, + "grad_norm": 0.7665518522262573, + "learning_rate": 0.00016086570521737573, + "loss": 2.6359, + "step": 5905 + }, + { + "epoch": 0.4766362682592204, + "grad_norm": 0.7240240573883057, + "learning_rate": 0.0001608531786066746, + "loss": 2.6489, + "step": 5906 + }, + { + "epoch": 0.47671697199580343, + "grad_norm": 0.7603839039802551, + "learning_rate": 0.00016084065047935317, + "loss": 2.6064, + "step": 5907 + }, + { + "epoch": 0.4767976757323864, + "grad_norm": 0.7394058704376221, + "learning_rate": 0.0001608281208357237, + "loss": 2.6643, + "step": 5908 + }, + { + "epoch": 0.47687837946896944, + "grad_norm": 0.7183148860931396, + "learning_rate": 0.00016081558967609845, + "loss": 2.56, + "step": 5909 + }, + { + "epoch": 0.4769590832055524, + "grad_norm": 0.7181926965713501, + "learning_rate": 0.00016080305700078972, + "loss": 2.6665, + "step": 5910 + }, + { + "epoch": 0.47703978694213545, + "grad_norm": 0.7634081840515137, + "learning_rate": 0.00016079052281010988, + "loss": 2.7076, + "step": 5911 + }, + { + "epoch": 0.4771204906787184, + "grad_norm": 0.7928739190101624, + "learning_rate": 0.0001607779871043713, + "loss": 2.6512, + "step": 5912 + }, + { + "epoch": 0.47720119441530146, + "grad_norm": 0.7192893028259277, + "learning_rate": 0.00016076544988388643, + "loss": 2.6453, + "step": 5913 + }, + { + "epoch": 0.47728189815188443, + "grad_norm": 0.7171720862388611, + "learning_rate": 0.00016075291114896767, + "loss": 2.6501, + "step": 5914 + }, + { + "epoch": 0.47736260188846746, + "grad_norm": 0.6787160038948059, + "learning_rate": 0.00016074037089992756, + "loss": 2.6566, + "step": 5915 + }, + { + "epoch": 0.47744330562505044, + "grad_norm": 0.8118634819984436, + "learning_rate": 0.00016072782913707868, + "loss": 2.6635, + "step": 5916 + }, + { + "epoch": 0.4775240093616334, + "grad_norm": 0.7188509702682495, + "learning_rate": 0.0001607152858607335, + "loss": 2.6899, + "step": 5917 + }, + { + "epoch": 0.47760471309821645, + "grad_norm": 0.6742647290229797, + "learning_rate": 0.00016070274107120468, + "loss": 2.6221, + "step": 5918 + }, + { + "epoch": 0.4776854168347994, + "grad_norm": 0.7274083495140076, + "learning_rate": 0.00016069019476880488, + "loss": 2.6588, + "step": 5919 + }, + { + "epoch": 0.47776612057138246, + "grad_norm": 0.6984386444091797, + "learning_rate": 0.00016067764695384682, + "loss": 2.6376, + "step": 5920 + }, + { + "epoch": 0.47784682430796543, + "grad_norm": 0.7260883450508118, + "learning_rate": 0.00016066509762664315, + "loss": 2.6623, + "step": 5921 + }, + { + "epoch": 0.47792752804454847, + "grad_norm": 0.7540579438209534, + "learning_rate": 0.00016065254678750666, + "loss": 2.695, + "step": 5922 + }, + { + "epoch": 0.47800823178113144, + "grad_norm": 0.7032651305198669, + "learning_rate": 0.00016063999443675017, + "loss": 2.6791, + "step": 5923 + }, + { + "epoch": 0.4780889355177145, + "grad_norm": 0.682842493057251, + "learning_rate": 0.0001606274405746865, + "loss": 2.6198, + "step": 5924 + }, + { + "epoch": 0.47816963925429745, + "grad_norm": 0.6843859553337097, + "learning_rate": 0.00016061488520162853, + "loss": 2.6432, + "step": 5925 + }, + { + "epoch": 0.4782503429908805, + "grad_norm": 0.652119517326355, + "learning_rate": 0.00016060232831788918, + "loss": 2.6461, + "step": 5926 + }, + { + "epoch": 0.47833104672746346, + "grad_norm": 0.6986887454986572, + "learning_rate": 0.0001605897699237814, + "loss": 2.5885, + "step": 5927 + }, + { + "epoch": 0.4784117504640465, + "grad_norm": 0.7156725525856018, + "learning_rate": 0.00016057721001961817, + "loss": 2.6526, + "step": 5928 + }, + { + "epoch": 0.47849245420062947, + "grad_norm": 0.7367579936981201, + "learning_rate": 0.0001605646486057125, + "loss": 2.5842, + "step": 5929 + }, + { + "epoch": 0.4785731579372125, + "grad_norm": 0.7059770822525024, + "learning_rate": 0.00016055208568237746, + "loss": 2.617, + "step": 5930 + }, + { + "epoch": 0.4786538616737955, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016053952124992619, + "loss": 2.6499, + "step": 5931 + }, + { + "epoch": 0.4787345654103785, + "grad_norm": 0.7027475237846375, + "learning_rate": 0.00016052695530867177, + "loss": 2.5934, + "step": 5932 + }, + { + "epoch": 0.4788152691469615, + "grad_norm": 0.7031852602958679, + "learning_rate": 0.00016051438785892743, + "loss": 2.5947, + "step": 5933 + }, + { + "epoch": 0.4788959728835445, + "grad_norm": 0.6731768846511841, + "learning_rate": 0.00016050181890100635, + "loss": 2.6811, + "step": 5934 + }, + { + "epoch": 0.4789766766201275, + "grad_norm": 0.7120038866996765, + "learning_rate": 0.0001604892484352218, + "loss": 2.6625, + "step": 5935 + }, + { + "epoch": 0.4790573803567105, + "grad_norm": 0.6895150542259216, + "learning_rate": 0.00016047667646188702, + "loss": 2.6784, + "step": 5936 + }, + { + "epoch": 0.4791380840932935, + "grad_norm": 0.7080708742141724, + "learning_rate": 0.0001604641029813154, + "loss": 2.6491, + "step": 5937 + }, + { + "epoch": 0.47921878782987654, + "grad_norm": 0.6522819399833679, + "learning_rate": 0.00016045152799382025, + "loss": 2.6113, + "step": 5938 + }, + { + "epoch": 0.4792994915664595, + "grad_norm": 0.6988112926483154, + "learning_rate": 0.00016043895149971506, + "loss": 2.6892, + "step": 5939 + }, + { + "epoch": 0.47938019530304254, + "grad_norm": 0.7545368671417236, + "learning_rate": 0.00016042637349931318, + "loss": 2.6872, + "step": 5940 + }, + { + "epoch": 0.4794608990396255, + "grad_norm": 0.7083707451820374, + "learning_rate": 0.0001604137939929281, + "loss": 2.6726, + "step": 5941 + }, + { + "epoch": 0.47954160277620855, + "grad_norm": 0.8198027014732361, + "learning_rate": 0.00016040121298087337, + "loss": 2.647, + "step": 5942 + }, + { + "epoch": 0.47962230651279153, + "grad_norm": 0.7296201586723328, + "learning_rate": 0.00016038863046346252, + "loss": 2.7122, + "step": 5943 + }, + { + "epoch": 0.47970301024937456, + "grad_norm": 0.7262474298477173, + "learning_rate": 0.00016037604644100913, + "loss": 2.6903, + "step": 5944 + }, + { + "epoch": 0.47978371398595754, + "grad_norm": 0.8010182976722717, + "learning_rate": 0.00016036346091382686, + "loss": 2.6942, + "step": 5945 + }, + { + "epoch": 0.47986441772254057, + "grad_norm": 0.7227098345756531, + "learning_rate": 0.00016035087388222932, + "loss": 2.6661, + "step": 5946 + }, + { + "epoch": 0.47994512145912355, + "grad_norm": 0.7374662756919861, + "learning_rate": 0.00016033828534653028, + "loss": 2.6233, + "step": 5947 + }, + { + "epoch": 0.4800258251957066, + "grad_norm": 0.7139650583267212, + "learning_rate": 0.00016032569530704342, + "loss": 2.5859, + "step": 5948 + }, + { + "epoch": 0.48010652893228956, + "grad_norm": 0.7067660689353943, + "learning_rate": 0.00016031310376408254, + "loss": 2.6677, + "step": 5949 + }, + { + "epoch": 0.4801872326688726, + "grad_norm": 0.694715142250061, + "learning_rate": 0.00016030051071796146, + "loss": 2.6415, + "step": 5950 + }, + { + "epoch": 0.48026793640545556, + "grad_norm": 0.728918194770813, + "learning_rate": 0.00016028791616899403, + "loss": 2.6274, + "step": 5951 + }, + { + "epoch": 0.4803486401420386, + "grad_norm": 0.699846088886261, + "learning_rate": 0.00016027532011749412, + "loss": 2.6613, + "step": 5952 + }, + { + "epoch": 0.4804293438786216, + "grad_norm": 0.7177432179450989, + "learning_rate": 0.0001602627225637757, + "loss": 2.6107, + "step": 5953 + }, + { + "epoch": 0.4805100476152046, + "grad_norm": 0.7502370476722717, + "learning_rate": 0.00016025012350815267, + "loss": 2.6534, + "step": 5954 + }, + { + "epoch": 0.4805907513517876, + "grad_norm": 0.7730218172073364, + "learning_rate": 0.0001602375229509391, + "loss": 2.7037, + "step": 5955 + }, + { + "epoch": 0.4806714550883706, + "grad_norm": 0.7046666145324707, + "learning_rate": 0.00016022492089244898, + "loss": 2.6336, + "step": 5956 + }, + { + "epoch": 0.4807521588249536, + "grad_norm": 0.7991104125976562, + "learning_rate": 0.0001602123173329964, + "loss": 2.7024, + "step": 5957 + }, + { + "epoch": 0.4808328625615366, + "grad_norm": 0.7056288123130798, + "learning_rate": 0.00016019971227289548, + "loss": 2.6088, + "step": 5958 + }, + { + "epoch": 0.4809135662981196, + "grad_norm": 0.7277925610542297, + "learning_rate": 0.00016018710571246038, + "loss": 2.6245, + "step": 5959 + }, + { + "epoch": 0.48099427003470263, + "grad_norm": 0.7545790672302246, + "learning_rate": 0.00016017449765200526, + "loss": 2.6076, + "step": 5960 + }, + { + "epoch": 0.4810749737712856, + "grad_norm": 0.7106321454048157, + "learning_rate": 0.00016016188809184434, + "loss": 2.5561, + "step": 5961 + }, + { + "epoch": 0.48115567750786864, + "grad_norm": 0.7464704513549805, + "learning_rate": 0.0001601492770322919, + "loss": 2.6336, + "step": 5962 + }, + { + "epoch": 0.4812363812444516, + "grad_norm": 0.7531768083572388, + "learning_rate": 0.00016013666447366228, + "loss": 2.6236, + "step": 5963 + }, + { + "epoch": 0.48131708498103465, + "grad_norm": 0.7412876486778259, + "learning_rate": 0.00016012405041626978, + "loss": 2.6309, + "step": 5964 + }, + { + "epoch": 0.4813977887176176, + "grad_norm": 0.7030940055847168, + "learning_rate": 0.00016011143486042878, + "loss": 2.6252, + "step": 5965 + }, + { + "epoch": 0.48147849245420066, + "grad_norm": 0.7932302951812744, + "learning_rate": 0.00016009881780645367, + "loss": 2.6797, + "step": 5966 + }, + { + "epoch": 0.48155919619078363, + "grad_norm": 0.7366262078285217, + "learning_rate": 0.00016008619925465893, + "loss": 2.6616, + "step": 5967 + }, + { + "epoch": 0.4816398999273666, + "grad_norm": 0.6938421130180359, + "learning_rate": 0.00016007357920535902, + "loss": 2.6888, + "step": 5968 + }, + { + "epoch": 0.48172060366394964, + "grad_norm": 0.7560005784034729, + "learning_rate": 0.00016006095765886853, + "loss": 2.6044, + "step": 5969 + }, + { + "epoch": 0.4818013074005326, + "grad_norm": 0.7330430150032043, + "learning_rate": 0.0001600483346155019, + "loss": 2.7023, + "step": 5970 + }, + { + "epoch": 0.48188201113711565, + "grad_norm": 0.7257955074310303, + "learning_rate": 0.00016003571007557388, + "loss": 2.6763, + "step": 5971 + }, + { + "epoch": 0.4819627148736986, + "grad_norm": 0.704187273979187, + "learning_rate": 0.000160023084039399, + "loss": 2.6229, + "step": 5972 + }, + { + "epoch": 0.48204341861028166, + "grad_norm": 0.7014813423156738, + "learning_rate": 0.00016001045650729196, + "loss": 2.6207, + "step": 5973 + }, + { + "epoch": 0.48212412234686464, + "grad_norm": 0.8039405941963196, + "learning_rate": 0.00015999782747956747, + "loss": 2.6198, + "step": 5974 + }, + { + "epoch": 0.48220482608344767, + "grad_norm": 0.7114945650100708, + "learning_rate": 0.0001599851969565403, + "loss": 2.6154, + "step": 5975 + }, + { + "epoch": 0.48228552982003065, + "grad_norm": 0.7603329420089722, + "learning_rate": 0.00015997256493852517, + "loss": 2.6217, + "step": 5976 + }, + { + "epoch": 0.4823662335566137, + "grad_norm": 0.7773346900939941, + "learning_rate": 0.000159959931425837, + "loss": 2.7054, + "step": 5977 + }, + { + "epoch": 0.48244693729319665, + "grad_norm": 0.8022029399871826, + "learning_rate": 0.0001599472964187906, + "loss": 2.6844, + "step": 5978 + }, + { + "epoch": 0.4825276410297797, + "grad_norm": 0.7384541630744934, + "learning_rate": 0.00015993465991770087, + "loss": 2.6516, + "step": 5979 + }, + { + "epoch": 0.48260834476636266, + "grad_norm": 0.6993509531021118, + "learning_rate": 0.00015992202192288273, + "loss": 2.6837, + "step": 5980 + }, + { + "epoch": 0.4826890485029457, + "grad_norm": 0.7430509328842163, + "learning_rate": 0.00015990938243465116, + "loss": 2.6717, + "step": 5981 + }, + { + "epoch": 0.48276975223952867, + "grad_norm": 0.7544847726821899, + "learning_rate": 0.0001598967414533212, + "loss": 2.6573, + "step": 5982 + }, + { + "epoch": 0.4828504559761117, + "grad_norm": 0.736955463886261, + "learning_rate": 0.00015988409897920786, + "loss": 2.6865, + "step": 5983 + }, + { + "epoch": 0.4829311597126947, + "grad_norm": 0.7771684527397156, + "learning_rate": 0.00015987145501262622, + "loss": 2.6173, + "step": 5984 + }, + { + "epoch": 0.4830118634492777, + "grad_norm": 0.7504391670227051, + "learning_rate": 0.00015985880955389143, + "loss": 2.6218, + "step": 5985 + }, + { + "epoch": 0.4830925671858607, + "grad_norm": 0.7025442123413086, + "learning_rate": 0.00015984616260331861, + "loss": 2.6107, + "step": 5986 + }, + { + "epoch": 0.4831732709224437, + "grad_norm": 0.6906485557556152, + "learning_rate": 0.000159833514161223, + "loss": 2.633, + "step": 5987 + }, + { + "epoch": 0.4832539746590267, + "grad_norm": 0.7771004438400269, + "learning_rate": 0.00015982086422791983, + "loss": 2.5956, + "step": 5988 + }, + { + "epoch": 0.48333467839560973, + "grad_norm": 0.6927372813224792, + "learning_rate": 0.00015980821280372432, + "loss": 2.5984, + "step": 5989 + }, + { + "epoch": 0.4834153821321927, + "grad_norm": 0.7196357846260071, + "learning_rate": 0.00015979555988895184, + "loss": 2.6386, + "step": 5990 + }, + { + "epoch": 0.48349608586877574, + "grad_norm": 0.7601087689399719, + "learning_rate": 0.0001597829054839177, + "loss": 2.6707, + "step": 5991 + }, + { + "epoch": 0.4835767896053587, + "grad_norm": 0.7783588767051697, + "learning_rate": 0.00015977024958893722, + "loss": 2.5815, + "step": 5992 + }, + { + "epoch": 0.48365749334194175, + "grad_norm": 0.7651833891868591, + "learning_rate": 0.00015975759220432592, + "loss": 2.6235, + "step": 5993 + }, + { + "epoch": 0.4837381970785247, + "grad_norm": 0.7158511877059937, + "learning_rate": 0.0001597449333303992, + "loss": 2.6813, + "step": 5994 + }, + { + "epoch": 0.48381890081510776, + "grad_norm": 0.7411341667175293, + "learning_rate": 0.0001597322729674726, + "loss": 2.7231, + "step": 5995 + }, + { + "epoch": 0.48389960455169073, + "grad_norm": 0.7168158292770386, + "learning_rate": 0.0001597196111158616, + "loss": 2.6408, + "step": 5996 + }, + { + "epoch": 0.48398030828827376, + "grad_norm": 0.7603393793106079, + "learning_rate": 0.00015970694777588175, + "loss": 2.7821, + "step": 5997 + }, + { + "epoch": 0.48406101202485674, + "grad_norm": 0.7298564910888672, + "learning_rate": 0.0001596942829478487, + "loss": 2.6828, + "step": 5998 + }, + { + "epoch": 0.4841417157614398, + "grad_norm": 0.7850572466850281, + "learning_rate": 0.0001596816166320781, + "loss": 2.6191, + "step": 5999 + }, + { + "epoch": 0.48422241949802275, + "grad_norm": 0.7697601914405823, + "learning_rate": 0.00015966894882888562, + "loss": 2.6768, + "step": 6000 + }, + { + "epoch": 0.48422241949802275, + "eval_loss": 2.5610127449035645, + "eval_runtime": 760.0481, + "eval_samples_per_second": 3.447, + "eval_steps_per_second": 0.575, + "step": 6000 + }, + { + "epoch": 0.4843031232346058, + "grad_norm": 0.7212432026863098, + "learning_rate": 0.00015965627953858693, + "loss": 2.5967, + "step": 6001 + }, + { + "epoch": 0.48438382697118876, + "grad_norm": 0.7629631757736206, + "learning_rate": 0.0001596436087614978, + "loss": 2.7005, + "step": 6002 + }, + { + "epoch": 0.4844645307077718, + "grad_norm": 0.7154754400253296, + "learning_rate": 0.00015963093649793404, + "loss": 2.6909, + "step": 6003 + }, + { + "epoch": 0.48454523444435477, + "grad_norm": 0.7365279793739319, + "learning_rate": 0.00015961826274821147, + "loss": 2.6268, + "step": 6004 + }, + { + "epoch": 0.4846259381809378, + "grad_norm": 0.8114632964134216, + "learning_rate": 0.00015960558751264596, + "loss": 2.6647, + "step": 6005 + }, + { + "epoch": 0.4847066419175208, + "grad_norm": 0.7411556243896484, + "learning_rate": 0.00015959291079155338, + "loss": 2.6378, + "step": 6006 + }, + { + "epoch": 0.4847873456541038, + "grad_norm": 0.7137390375137329, + "learning_rate": 0.00015958023258524968, + "loss": 2.6454, + "step": 6007 + }, + { + "epoch": 0.4848680493906868, + "grad_norm": 0.7477054595947266, + "learning_rate": 0.00015956755289405088, + "loss": 2.6463, + "step": 6008 + }, + { + "epoch": 0.4849487531272698, + "grad_norm": 0.7198071479797363, + "learning_rate": 0.0001595548717182729, + "loss": 2.6537, + "step": 6009 + }, + { + "epoch": 0.4850294568638528, + "grad_norm": 0.6697781085968018, + "learning_rate": 0.00015954218905823186, + "loss": 2.7018, + "step": 6010 + }, + { + "epoch": 0.4851101606004358, + "grad_norm": 0.7577201724052429, + "learning_rate": 0.00015952950491424382, + "loss": 2.6531, + "step": 6011 + }, + { + "epoch": 0.4851908643370188, + "grad_norm": 0.6852774024009705, + "learning_rate": 0.0001595168192866249, + "loss": 2.5819, + "step": 6012 + }, + { + "epoch": 0.48527156807360183, + "grad_norm": 0.7116097807884216, + "learning_rate": 0.0001595041321756913, + "loss": 2.5691, + "step": 6013 + }, + { + "epoch": 0.4853522718101848, + "grad_norm": 0.7478477954864502, + "learning_rate": 0.00015949144358175916, + "loss": 2.6658, + "step": 6014 + }, + { + "epoch": 0.48543297554676784, + "grad_norm": 0.816969633102417, + "learning_rate": 0.0001594787535051447, + "loss": 2.6709, + "step": 6015 + }, + { + "epoch": 0.4855136792833508, + "grad_norm": 0.6953164339065552, + "learning_rate": 0.00015946606194616427, + "loss": 2.6139, + "step": 6016 + }, + { + "epoch": 0.48559438301993385, + "grad_norm": 0.6698834300041199, + "learning_rate": 0.0001594533689051341, + "loss": 2.574, + "step": 6017 + }, + { + "epoch": 0.4856750867565168, + "grad_norm": 0.7686784267425537, + "learning_rate": 0.0001594406743823706, + "loss": 2.6271, + "step": 6018 + }, + { + "epoch": 0.4857557904930998, + "grad_norm": 0.7713280916213989, + "learning_rate": 0.00015942797837819009, + "loss": 2.6682, + "step": 6019 + }, + { + "epoch": 0.48583649422968284, + "grad_norm": 0.8102596998214722, + "learning_rate": 0.00015941528089290902, + "loss": 2.6771, + "step": 6020 + }, + { + "epoch": 0.4859171979662658, + "grad_norm": 0.7140331864356995, + "learning_rate": 0.00015940258192684382, + "loss": 2.6267, + "step": 6021 + }, + { + "epoch": 0.48599790170284884, + "grad_norm": 0.7057615518569946, + "learning_rate": 0.000159389881480311, + "loss": 2.6011, + "step": 6022 + }, + { + "epoch": 0.4860786054394318, + "grad_norm": 0.7106850147247314, + "learning_rate": 0.0001593771795536271, + "loss": 2.6681, + "step": 6023 + }, + { + "epoch": 0.48615930917601485, + "grad_norm": 0.7618210315704346, + "learning_rate": 0.00015936447614710867, + "loss": 2.6545, + "step": 6024 + }, + { + "epoch": 0.48624001291259783, + "grad_norm": 0.7577608227729797, + "learning_rate": 0.00015935177126107233, + "loss": 2.6479, + "step": 6025 + }, + { + "epoch": 0.48632071664918086, + "grad_norm": 0.758745551109314, + "learning_rate": 0.00015933906489583468, + "loss": 2.7057, + "step": 6026 + }, + { + "epoch": 0.48640142038576384, + "grad_norm": 0.785906970500946, + "learning_rate": 0.00015932635705171241, + "loss": 2.7081, + "step": 6027 + }, + { + "epoch": 0.48648212412234687, + "grad_norm": 0.6744558215141296, + "learning_rate": 0.00015931364772902228, + "loss": 2.6438, + "step": 6028 + }, + { + "epoch": 0.48656282785892985, + "grad_norm": 0.7451377511024475, + "learning_rate": 0.00015930093692808099, + "loss": 2.6509, + "step": 6029 + }, + { + "epoch": 0.4866435315955129, + "grad_norm": 0.6590149402618408, + "learning_rate": 0.0001592882246492053, + "loss": 2.5683, + "step": 6030 + }, + { + "epoch": 0.48672423533209586, + "grad_norm": 0.7433840036392212, + "learning_rate": 0.0001592755108927121, + "loss": 2.6647, + "step": 6031 + }, + { + "epoch": 0.4868049390686789, + "grad_norm": 0.876806378364563, + "learning_rate": 0.00015926279565891822, + "loss": 2.6482, + "step": 6032 + }, + { + "epoch": 0.48688564280526186, + "grad_norm": 0.7495005130767822, + "learning_rate": 0.00015925007894814058, + "loss": 2.6346, + "step": 6033 + }, + { + "epoch": 0.4869663465418449, + "grad_norm": 0.7005730271339417, + "learning_rate": 0.00015923736076069604, + "loss": 2.6241, + "step": 6034 + }, + { + "epoch": 0.4870470502784279, + "grad_norm": 0.664098858833313, + "learning_rate": 0.00015922464109690166, + "loss": 2.6281, + "step": 6035 + }, + { + "epoch": 0.4871277540150109, + "grad_norm": 0.7482514977455139, + "learning_rate": 0.00015921191995707442, + "loss": 2.5764, + "step": 6036 + }, + { + "epoch": 0.4872084577515939, + "grad_norm": 0.7450351715087891, + "learning_rate": 0.0001591991973415313, + "loss": 2.6433, + "step": 6037 + }, + { + "epoch": 0.4872891614881769, + "grad_norm": 0.6738519072532654, + "learning_rate": 0.00015918647325058948, + "loss": 2.6688, + "step": 6038 + }, + { + "epoch": 0.4873698652247599, + "grad_norm": 0.7999960780143738, + "learning_rate": 0.000159173747684566, + "loss": 2.6309, + "step": 6039 + }, + { + "epoch": 0.4874505689613429, + "grad_norm": 0.7249687910079956, + "learning_rate": 0.00015916102064377806, + "loss": 2.5808, + "step": 6040 + }, + { + "epoch": 0.4875312726979259, + "grad_norm": 0.7014601826667786, + "learning_rate": 0.00015914829212854286, + "loss": 2.6646, + "step": 6041 + }, + { + "epoch": 0.48761197643450893, + "grad_norm": 0.7091174721717834, + "learning_rate": 0.00015913556213917757, + "loss": 2.6576, + "step": 6042 + }, + { + "epoch": 0.4876926801710919, + "grad_norm": 0.6949019432067871, + "learning_rate": 0.00015912283067599952, + "loss": 2.5883, + "step": 6043 + }, + { + "epoch": 0.48777338390767494, + "grad_norm": 0.6990448236465454, + "learning_rate": 0.00015911009773932598, + "loss": 2.6413, + "step": 6044 + }, + { + "epoch": 0.4878540876442579, + "grad_norm": 0.7106831073760986, + "learning_rate": 0.00015909736332947425, + "loss": 2.6122, + "step": 6045 + }, + { + "epoch": 0.48793479138084095, + "grad_norm": 0.7052395343780518, + "learning_rate": 0.00015908462744676177, + "loss": 2.572, + "step": 6046 + }, + { + "epoch": 0.4880154951174239, + "grad_norm": 0.7250158190727234, + "learning_rate": 0.00015907189009150592, + "loss": 2.6582, + "step": 6047 + }, + { + "epoch": 0.48809619885400696, + "grad_norm": 0.7213590145111084, + "learning_rate": 0.00015905915126402414, + "loss": 2.7025, + "step": 6048 + }, + { + "epoch": 0.48817690259058993, + "grad_norm": 0.7136254906654358, + "learning_rate": 0.00015904641096463394, + "loss": 2.6823, + "step": 6049 + }, + { + "epoch": 0.48825760632717297, + "grad_norm": 0.7163361310958862, + "learning_rate": 0.00015903366919365282, + "loss": 2.6642, + "step": 6050 + }, + { + "epoch": 0.48833831006375594, + "grad_norm": 0.6842724680900574, + "learning_rate": 0.00015902092595139838, + "loss": 2.6599, + "step": 6051 + }, + { + "epoch": 0.488419013800339, + "grad_norm": 0.7426519393920898, + "learning_rate": 0.0001590081812381882, + "loss": 2.6271, + "step": 6052 + }, + { + "epoch": 0.48849971753692195, + "grad_norm": 0.7415586709976196, + "learning_rate": 0.00015899543505433985, + "loss": 2.6105, + "step": 6053 + }, + { + "epoch": 0.488580421273505, + "grad_norm": 0.7286739945411682, + "learning_rate": 0.00015898268740017105, + "loss": 2.6304, + "step": 6054 + }, + { + "epoch": 0.48866112501008796, + "grad_norm": 0.6898483633995056, + "learning_rate": 0.00015896993827599947, + "loss": 2.6237, + "step": 6055 + }, + { + "epoch": 0.488741828746671, + "grad_norm": 0.7020056247711182, + "learning_rate": 0.00015895718768214293, + "loss": 2.6166, + "step": 6056 + }, + { + "epoch": 0.48882253248325397, + "grad_norm": 0.7145286798477173, + "learning_rate": 0.00015894443561891914, + "loss": 2.6729, + "step": 6057 + }, + { + "epoch": 0.488903236219837, + "grad_norm": 0.6888289451599121, + "learning_rate": 0.00015893168208664594, + "loss": 2.6154, + "step": 6058 + }, + { + "epoch": 0.48898393995642, + "grad_norm": 0.6929970383644104, + "learning_rate": 0.00015891892708564116, + "loss": 2.6748, + "step": 6059 + }, + { + "epoch": 0.489064643693003, + "grad_norm": 0.679853618144989, + "learning_rate": 0.0001589061706162227, + "loss": 2.605, + "step": 6060 + }, + { + "epoch": 0.489145347429586, + "grad_norm": 0.71812504529953, + "learning_rate": 0.0001588934126787085, + "loss": 2.7249, + "step": 6061 + }, + { + "epoch": 0.489226051166169, + "grad_norm": 0.7083466053009033, + "learning_rate": 0.00015888065327341648, + "loss": 2.5986, + "step": 6062 + }, + { + "epoch": 0.489306754902752, + "grad_norm": 0.7476792931556702, + "learning_rate": 0.00015886789240066466, + "loss": 2.5942, + "step": 6063 + }, + { + "epoch": 0.489387458639335, + "grad_norm": 0.7197855114936829, + "learning_rate": 0.00015885513006077114, + "loss": 2.6198, + "step": 6064 + }, + { + "epoch": 0.489468162375918, + "grad_norm": 0.6678233742713928, + "learning_rate": 0.00015884236625405385, + "loss": 2.5793, + "step": 6065 + }, + { + "epoch": 0.48954886611250104, + "grad_norm": 0.7371037602424622, + "learning_rate": 0.00015882960098083105, + "loss": 2.6231, + "step": 6066 + }, + { + "epoch": 0.489629569849084, + "grad_norm": 0.7087417244911194, + "learning_rate": 0.00015881683424142078, + "loss": 2.6483, + "step": 6067 + }, + { + "epoch": 0.48971027358566704, + "grad_norm": 0.7300292253494263, + "learning_rate": 0.00015880406603614126, + "loss": 2.6778, + "step": 6068 + }, + { + "epoch": 0.48979097732225, + "grad_norm": 0.8347866535186768, + "learning_rate": 0.0001587912963653107, + "loss": 2.554, + "step": 6069 + }, + { + "epoch": 0.489871681058833, + "grad_norm": 0.7717794179916382, + "learning_rate": 0.00015877852522924732, + "loss": 2.6904, + "step": 6070 + }, + { + "epoch": 0.48995238479541603, + "grad_norm": 0.6960952281951904, + "learning_rate": 0.00015876575262826944, + "loss": 2.6059, + "step": 6071 + }, + { + "epoch": 0.490033088531999, + "grad_norm": 0.7316592931747437, + "learning_rate": 0.00015875297856269543, + "loss": 2.6685, + "step": 6072 + }, + { + "epoch": 0.49011379226858204, + "grad_norm": 0.6775457859039307, + "learning_rate": 0.00015874020303284362, + "loss": 2.6232, + "step": 6073 + }, + { + "epoch": 0.490194496005165, + "grad_norm": 0.7741925120353699, + "learning_rate": 0.00015872742603903237, + "loss": 2.6767, + "step": 6074 + }, + { + "epoch": 0.49027519974174805, + "grad_norm": 0.857490599155426, + "learning_rate": 0.00015871464758158017, + "loss": 2.6649, + "step": 6075 + }, + { + "epoch": 0.490355903478331, + "grad_norm": 0.7474274039268494, + "learning_rate": 0.00015870186766080545, + "loss": 2.6926, + "step": 6076 + }, + { + "epoch": 0.49043660721491406, + "grad_norm": 0.7266567945480347, + "learning_rate": 0.00015868908627702675, + "loss": 2.5919, + "step": 6077 + }, + { + "epoch": 0.49051731095149703, + "grad_norm": 0.7247830629348755, + "learning_rate": 0.0001586763034305626, + "loss": 2.6158, + "step": 6078 + }, + { + "epoch": 0.49059801468808006, + "grad_norm": 0.7654951214790344, + "learning_rate": 0.00015866351912173157, + "loss": 2.7236, + "step": 6079 + }, + { + "epoch": 0.49067871842466304, + "grad_norm": 0.732431948184967, + "learning_rate": 0.00015865073335085236, + "loss": 2.6349, + "step": 6080 + }, + { + "epoch": 0.4907594221612461, + "grad_norm": 0.7240673303604126, + "learning_rate": 0.0001586379461182435, + "loss": 2.6282, + "step": 6081 + }, + { + "epoch": 0.49084012589782905, + "grad_norm": 0.767473042011261, + "learning_rate": 0.00015862515742422374, + "loss": 2.6939, + "step": 6082 + }, + { + "epoch": 0.4909208296344121, + "grad_norm": 0.6977359056472778, + "learning_rate": 0.00015861236726911183, + "loss": 2.6591, + "step": 6083 + }, + { + "epoch": 0.49100153337099506, + "grad_norm": 0.7676639556884766, + "learning_rate": 0.00015859957565322655, + "loss": 2.6189, + "step": 6084 + }, + { + "epoch": 0.4910822371075781, + "grad_norm": 0.7157976031303406, + "learning_rate": 0.0001585867825768866, + "loss": 2.644, + "step": 6085 + }, + { + "epoch": 0.49116294084416107, + "grad_norm": 0.7080803513526917, + "learning_rate": 0.0001585739880404109, + "loss": 2.6099, + "step": 6086 + }, + { + "epoch": 0.4912436445807441, + "grad_norm": 0.7109760046005249, + "learning_rate": 0.0001585611920441183, + "loss": 2.7087, + "step": 6087 + }, + { + "epoch": 0.4913243483173271, + "grad_norm": 0.7274255156517029, + "learning_rate": 0.00015854839458832772, + "loss": 2.6394, + "step": 6088 + }, + { + "epoch": 0.4914050520539101, + "grad_norm": 0.7407883405685425, + "learning_rate": 0.00015853559567335812, + "loss": 2.6729, + "step": 6089 + }, + { + "epoch": 0.4914857557904931, + "grad_norm": 0.6879885196685791, + "learning_rate": 0.00015852279529952843, + "loss": 2.5971, + "step": 6090 + }, + { + "epoch": 0.4915664595270761, + "grad_norm": 0.7678415179252625, + "learning_rate": 0.00015850999346715772, + "loss": 2.6606, + "step": 6091 + }, + { + "epoch": 0.4916471632636591, + "grad_norm": 0.7108608484268188, + "learning_rate": 0.00015849719017656504, + "loss": 2.6494, + "step": 6092 + }, + { + "epoch": 0.4917278670002421, + "grad_norm": 0.7238833904266357, + "learning_rate": 0.00015848438542806945, + "loss": 2.6742, + "step": 6093 + }, + { + "epoch": 0.4918085707368251, + "grad_norm": 0.7316902279853821, + "learning_rate": 0.0001584715792219901, + "loss": 2.6757, + "step": 6094 + }, + { + "epoch": 0.49188927447340813, + "grad_norm": 0.7339446544647217, + "learning_rate": 0.00015845877155864612, + "loss": 2.607, + "step": 6095 + }, + { + "epoch": 0.4919699782099911, + "grad_norm": 0.6931337714195251, + "learning_rate": 0.0001584459624383568, + "loss": 2.6203, + "step": 6096 + }, + { + "epoch": 0.49205068194657414, + "grad_norm": 0.734229326248169, + "learning_rate": 0.00015843315186144126, + "loss": 2.646, + "step": 6097 + }, + { + "epoch": 0.4921313856831571, + "grad_norm": 0.7764919400215149, + "learning_rate": 0.00015842033982821883, + "loss": 2.6698, + "step": 6098 + }, + { + "epoch": 0.49221208941974015, + "grad_norm": 0.7707986235618591, + "learning_rate": 0.00015840752633900887, + "loss": 2.6995, + "step": 6099 + }, + { + "epoch": 0.4922927931563231, + "grad_norm": 0.7321949601173401, + "learning_rate": 0.00015839471139413066, + "loss": 2.6517, + "step": 6100 + }, + { + "epoch": 0.49237349689290616, + "grad_norm": 0.7087488770484924, + "learning_rate": 0.00015838189499390353, + "loss": 2.6153, + "step": 6101 + }, + { + "epoch": 0.49245420062948914, + "grad_norm": 0.7300730347633362, + "learning_rate": 0.00015836907713864706, + "loss": 2.5868, + "step": 6102 + }, + { + "epoch": 0.49253490436607217, + "grad_norm": 0.8476536273956299, + "learning_rate": 0.00015835625782868054, + "loss": 2.7158, + "step": 6103 + }, + { + "epoch": 0.49261560810265514, + "grad_norm": 0.8062012791633606, + "learning_rate": 0.0001583434370643236, + "loss": 2.6896, + "step": 6104 + }, + { + "epoch": 0.4926963118392382, + "grad_norm": 0.7336686849594116, + "learning_rate": 0.00015833061484589562, + "loss": 2.6416, + "step": 6105 + }, + { + "epoch": 0.49277701557582115, + "grad_norm": 0.6976929306983948, + "learning_rate": 0.00015831779117371627, + "loss": 2.6279, + "step": 6106 + }, + { + "epoch": 0.4928577193124042, + "grad_norm": 0.7262609601020813, + "learning_rate": 0.00015830496604810513, + "loss": 2.6144, + "step": 6107 + }, + { + "epoch": 0.49293842304898716, + "grad_norm": 0.7274572253227234, + "learning_rate": 0.00015829213946938183, + "loss": 2.7409, + "step": 6108 + }, + { + "epoch": 0.4930191267855702, + "grad_norm": 0.7438454031944275, + "learning_rate": 0.000158279311437866, + "loss": 2.5928, + "step": 6109 + }, + { + "epoch": 0.49309983052215317, + "grad_norm": 0.6885421872138977, + "learning_rate": 0.00015826648195387742, + "loss": 2.6659, + "step": 6110 + }, + { + "epoch": 0.4931805342587362, + "grad_norm": 0.6781450510025024, + "learning_rate": 0.0001582536510177358, + "loss": 2.6068, + "step": 6111 + }, + { + "epoch": 0.4932612379953192, + "grad_norm": 0.7618128657341003, + "learning_rate": 0.0001582408186297609, + "loss": 2.6705, + "step": 6112 + }, + { + "epoch": 0.4933419417319022, + "grad_norm": 0.7011203765869141, + "learning_rate": 0.00015822798479027256, + "loss": 2.596, + "step": 6113 + }, + { + "epoch": 0.4934226454684852, + "grad_norm": 0.7727806568145752, + "learning_rate": 0.00015821514949959065, + "loss": 2.6458, + "step": 6114 + }, + { + "epoch": 0.4935033492050682, + "grad_norm": 0.7318129539489746, + "learning_rate": 0.00015820231275803502, + "loss": 2.6009, + "step": 6115 + }, + { + "epoch": 0.4935840529416512, + "grad_norm": 0.6836227178573608, + "learning_rate": 0.00015818947456592563, + "loss": 2.6311, + "step": 6116 + }, + { + "epoch": 0.49366475667823423, + "grad_norm": 0.7657275199890137, + "learning_rate": 0.0001581766349235824, + "loss": 2.6079, + "step": 6117 + }, + { + "epoch": 0.4937454604148172, + "grad_norm": 0.74736487865448, + "learning_rate": 0.0001581637938313254, + "loss": 2.6752, + "step": 6118 + }, + { + "epoch": 0.49382616415140024, + "grad_norm": 0.716708242893219, + "learning_rate": 0.00015815095128947454, + "loss": 2.5896, + "step": 6119 + }, + { + "epoch": 0.4939068678879832, + "grad_norm": 0.740727424621582, + "learning_rate": 0.00015813810729835002, + "loss": 2.6528, + "step": 6120 + }, + { + "epoch": 0.4939875716245662, + "grad_norm": 0.6746687293052673, + "learning_rate": 0.0001581252618582719, + "loss": 2.6438, + "step": 6121 + }, + { + "epoch": 0.4940682753611492, + "grad_norm": 0.7547900080680847, + "learning_rate": 0.00015811241496956028, + "loss": 2.631, + "step": 6122 + }, + { + "epoch": 0.4941489790977322, + "grad_norm": 0.7500903606414795, + "learning_rate": 0.0001580995666325354, + "loss": 2.7039, + "step": 6123 + }, + { + "epoch": 0.49422968283431523, + "grad_norm": 0.7692849636077881, + "learning_rate": 0.00015808671684751743, + "loss": 2.5922, + "step": 6124 + }, + { + "epoch": 0.4943103865708982, + "grad_norm": 0.6964236497879028, + "learning_rate": 0.00015807386561482662, + "loss": 2.6239, + "step": 6125 + }, + { + "epoch": 0.49439109030748124, + "grad_norm": 0.7094165086746216, + "learning_rate": 0.0001580610129347833, + "loss": 2.6239, + "step": 6126 + }, + { + "epoch": 0.4944717940440642, + "grad_norm": 0.7579131126403809, + "learning_rate": 0.00015804815880770775, + "loss": 2.6654, + "step": 6127 + }, + { + "epoch": 0.49455249778064725, + "grad_norm": 0.7687693238258362, + "learning_rate": 0.00015803530323392034, + "loss": 2.6557, + "step": 6128 + }, + { + "epoch": 0.4946332015172302, + "grad_norm": 0.6913540363311768, + "learning_rate": 0.0001580224462137415, + "loss": 2.6299, + "step": 6129 + }, + { + "epoch": 0.49471390525381326, + "grad_norm": 0.7574129700660706, + "learning_rate": 0.0001580095877474916, + "loss": 2.6327, + "step": 6130 + }, + { + "epoch": 0.49479460899039623, + "grad_norm": 0.6834598183631897, + "learning_rate": 0.0001579967278354911, + "loss": 2.6402, + "step": 6131 + }, + { + "epoch": 0.49487531272697927, + "grad_norm": 0.7872750163078308, + "learning_rate": 0.00015798386647806057, + "loss": 2.6647, + "step": 6132 + }, + { + "epoch": 0.49495601646356224, + "grad_norm": 0.705211341381073, + "learning_rate": 0.00015797100367552055, + "loss": 2.6288, + "step": 6133 + }, + { + "epoch": 0.4950367202001453, + "grad_norm": 0.7302640080451965, + "learning_rate": 0.00015795813942819155, + "loss": 2.6683, + "step": 6134 + }, + { + "epoch": 0.49511742393672825, + "grad_norm": 0.7522360682487488, + "learning_rate": 0.0001579452737363942, + "loss": 2.5885, + "step": 6135 + }, + { + "epoch": 0.4951981276733113, + "grad_norm": 0.657376229763031, + "learning_rate": 0.0001579324066004492, + "loss": 2.5775, + "step": 6136 + }, + { + "epoch": 0.49527883140989426, + "grad_norm": 0.7539556622505188, + "learning_rate": 0.00015791953802067715, + "loss": 2.6236, + "step": 6137 + }, + { + "epoch": 0.4953595351464773, + "grad_norm": 0.7090374827384949, + "learning_rate": 0.00015790666799739883, + "loss": 2.5845, + "step": 6138 + }, + { + "epoch": 0.49544023888306027, + "grad_norm": 0.6883948445320129, + "learning_rate": 0.00015789379653093497, + "loss": 2.6621, + "step": 6139 + }, + { + "epoch": 0.4955209426196433, + "grad_norm": 0.7466424107551575, + "learning_rate": 0.00015788092362160633, + "loss": 2.6289, + "step": 6140 + }, + { + "epoch": 0.4956016463562263, + "grad_norm": 0.7424437403678894, + "learning_rate": 0.00015786804926973383, + "loss": 2.6405, + "step": 6141 + }, + { + "epoch": 0.4956823500928093, + "grad_norm": 0.7227851748466492, + "learning_rate": 0.00015785517347563822, + "loss": 2.6537, + "step": 6142 + }, + { + "epoch": 0.4957630538293923, + "grad_norm": 0.7548653483390808, + "learning_rate": 0.00015784229623964048, + "loss": 2.7377, + "step": 6143 + }, + { + "epoch": 0.4958437575659753, + "grad_norm": 0.7086976170539856, + "learning_rate": 0.00015782941756206152, + "loss": 2.6194, + "step": 6144 + }, + { + "epoch": 0.4959244613025583, + "grad_norm": 0.6605533957481384, + "learning_rate": 0.0001578165374432223, + "loss": 2.6265, + "step": 6145 + }, + { + "epoch": 0.4960051650391413, + "grad_norm": 0.7187899947166443, + "learning_rate": 0.00015780365588344384, + "loss": 2.5639, + "step": 6146 + }, + { + "epoch": 0.4960858687757243, + "grad_norm": 0.7014074921607971, + "learning_rate": 0.00015779077288304716, + "loss": 2.6011, + "step": 6147 + }, + { + "epoch": 0.49616657251230734, + "grad_norm": 0.7463840842247009, + "learning_rate": 0.00015777788844235335, + "loss": 2.6059, + "step": 6148 + }, + { + "epoch": 0.4962472762488903, + "grad_norm": 0.8022417426109314, + "learning_rate": 0.00015776500256168356, + "loss": 2.6011, + "step": 6149 + }, + { + "epoch": 0.49632797998547334, + "grad_norm": 0.7140083909034729, + "learning_rate": 0.0001577521152413589, + "loss": 2.6891, + "step": 6150 + }, + { + "epoch": 0.4964086837220563, + "grad_norm": 0.7266198992729187, + "learning_rate": 0.00015773922648170053, + "loss": 2.6561, + "step": 6151 + }, + { + "epoch": 0.49648938745863935, + "grad_norm": 0.7241406440734863, + "learning_rate": 0.0001577263362830297, + "loss": 2.6835, + "step": 6152 + }, + { + "epoch": 0.49657009119522233, + "grad_norm": 0.7422344088554382, + "learning_rate": 0.0001577134446456677, + "loss": 2.6039, + "step": 6153 + }, + { + "epoch": 0.49665079493180536, + "grad_norm": 0.8764764666557312, + "learning_rate": 0.0001577005515699358, + "loss": 2.68, + "step": 6154 + }, + { + "epoch": 0.49673149866838834, + "grad_norm": 0.7224323749542236, + "learning_rate": 0.0001576876570561553, + "loss": 2.5824, + "step": 6155 + }, + { + "epoch": 0.49681220240497137, + "grad_norm": 0.7601075172424316, + "learning_rate": 0.00015767476110464758, + "loss": 2.7124, + "step": 6156 + }, + { + "epoch": 0.49689290614155435, + "grad_norm": 0.7425428628921509, + "learning_rate": 0.0001576618637157341, + "loss": 2.5913, + "step": 6157 + }, + { + "epoch": 0.4969736098781374, + "grad_norm": 0.721969723701477, + "learning_rate": 0.0001576489648897362, + "loss": 2.6482, + "step": 6158 + }, + { + "epoch": 0.49705431361472036, + "grad_norm": 0.8142126798629761, + "learning_rate": 0.00015763606462697544, + "loss": 2.6231, + "step": 6159 + }, + { + "epoch": 0.4971350173513034, + "grad_norm": 0.6636359691619873, + "learning_rate": 0.00015762316292777326, + "loss": 2.6388, + "step": 6160 + }, + { + "epoch": 0.49721572108788636, + "grad_norm": 0.7093132734298706, + "learning_rate": 0.00015761025979245123, + "loss": 2.6562, + "step": 6161 + }, + { + "epoch": 0.4972964248244694, + "grad_norm": 0.7130851745605469, + "learning_rate": 0.00015759735522133094, + "loss": 2.6856, + "step": 6162 + }, + { + "epoch": 0.4973771285610524, + "grad_norm": 0.7303292155265808, + "learning_rate": 0.000157584449214734, + "loss": 2.6077, + "step": 6163 + }, + { + "epoch": 0.4974578322976354, + "grad_norm": 0.6742258071899414, + "learning_rate": 0.00015757154177298204, + "loss": 2.6644, + "step": 6164 + }, + { + "epoch": 0.4975385360342184, + "grad_norm": 0.6882894039154053, + "learning_rate": 0.00015755863289639677, + "loss": 2.6462, + "step": 6165 + }, + { + "epoch": 0.4976192397708014, + "grad_norm": 0.7882276773452759, + "learning_rate": 0.00015754572258529993, + "loss": 2.6509, + "step": 6166 + }, + { + "epoch": 0.4976999435073844, + "grad_norm": 0.7163859009742737, + "learning_rate": 0.00015753281084001324, + "loss": 2.627, + "step": 6167 + }, + { + "epoch": 0.4977806472439674, + "grad_norm": 0.7194411158561707, + "learning_rate": 0.0001575198976608585, + "loss": 2.6798, + "step": 6168 + }, + { + "epoch": 0.4978613509805504, + "grad_norm": 0.7233198881149292, + "learning_rate": 0.0001575069830481576, + "loss": 2.6616, + "step": 6169 + }, + { + "epoch": 0.49794205471713343, + "grad_norm": 0.7246997952461243, + "learning_rate": 0.00015749406700223231, + "loss": 2.6262, + "step": 6170 + }, + { + "epoch": 0.4980227584537164, + "grad_norm": 0.7509368658065796, + "learning_rate": 0.00015748114952340457, + "loss": 2.6148, + "step": 6171 + }, + { + "epoch": 0.4981034621902994, + "grad_norm": 0.7079075574874878, + "learning_rate": 0.00015746823061199637, + "loss": 2.6712, + "step": 6172 + }, + { + "epoch": 0.4981841659268824, + "grad_norm": 0.6821560859680176, + "learning_rate": 0.0001574553102683296, + "loss": 2.6253, + "step": 6173 + }, + { + "epoch": 0.4982648696634654, + "grad_norm": 0.7623000741004944, + "learning_rate": 0.00015744238849272634, + "loss": 2.6252, + "step": 6174 + }, + { + "epoch": 0.4983455734000484, + "grad_norm": 0.709434449672699, + "learning_rate": 0.00015742946528550858, + "loss": 2.555, + "step": 6175 + }, + { + "epoch": 0.4984262771366314, + "grad_norm": 0.7277799844741821, + "learning_rate": 0.00015741654064699846, + "loss": 2.6551, + "step": 6176 + }, + { + "epoch": 0.49850698087321443, + "grad_norm": 0.7208690643310547, + "learning_rate": 0.00015740361457751802, + "loss": 2.6747, + "step": 6177 + }, + { + "epoch": 0.4985876846097974, + "grad_norm": 0.8458136916160583, + "learning_rate": 0.00015739068707738946, + "loss": 2.6551, + "step": 6178 + }, + { + "epoch": 0.49866838834638044, + "grad_norm": 0.7718539834022522, + "learning_rate": 0.00015737775814693498, + "loss": 2.6246, + "step": 6179 + }, + { + "epoch": 0.4987490920829634, + "grad_norm": 0.6982735395431519, + "learning_rate": 0.00015736482778647674, + "loss": 2.5726, + "step": 6180 + }, + { + "epoch": 0.49882979581954645, + "grad_norm": 0.6759411692619324, + "learning_rate": 0.00015735189599633707, + "loss": 2.6603, + "step": 6181 + }, + { + "epoch": 0.4989104995561294, + "grad_norm": 0.7016656994819641, + "learning_rate": 0.0001573389627768382, + "loss": 2.6045, + "step": 6182 + }, + { + "epoch": 0.49899120329271246, + "grad_norm": 0.7170618176460266, + "learning_rate": 0.00015732602812830253, + "loss": 2.6419, + "step": 6183 + }, + { + "epoch": 0.49907190702929544, + "grad_norm": 0.6963300704956055, + "learning_rate": 0.00015731309205105237, + "loss": 2.6377, + "step": 6184 + }, + { + "epoch": 0.49915261076587847, + "grad_norm": 0.7437995672225952, + "learning_rate": 0.00015730015454541014, + "loss": 2.7013, + "step": 6185 + }, + { + "epoch": 0.49923331450246144, + "grad_norm": 0.6846518516540527, + "learning_rate": 0.00015728721561169827, + "loss": 2.5526, + "step": 6186 + }, + { + "epoch": 0.4993140182390445, + "grad_norm": 0.7343618273735046, + "learning_rate": 0.00015727427525023924, + "loss": 2.6567, + "step": 6187 + }, + { + "epoch": 0.49939472197562745, + "grad_norm": 0.6947566270828247, + "learning_rate": 0.00015726133346135554, + "loss": 2.6642, + "step": 6188 + }, + { + "epoch": 0.4994754257122105, + "grad_norm": 0.7402610778808594, + "learning_rate": 0.00015724839024536976, + "loss": 2.6964, + "step": 6189 + }, + { + "epoch": 0.49955612944879346, + "grad_norm": 0.7318306565284729, + "learning_rate": 0.00015723544560260444, + "loss": 2.5864, + "step": 6190 + }, + { + "epoch": 0.4996368331853765, + "grad_norm": 0.752216100692749, + "learning_rate": 0.00015722249953338215, + "loss": 2.6357, + "step": 6191 + }, + { + "epoch": 0.49971753692195947, + "grad_norm": 0.70283442735672, + "learning_rate": 0.00015720955203802565, + "loss": 2.5892, + "step": 6192 + }, + { + "epoch": 0.4997982406585425, + "grad_norm": 0.7457823753356934, + "learning_rate": 0.00015719660311685755, + "loss": 2.6663, + "step": 6193 + }, + { + "epoch": 0.4998789443951255, + "grad_norm": 0.7296229600906372, + "learning_rate": 0.00015718365277020058, + "loss": 2.6238, + "step": 6194 + }, + { + "epoch": 0.4999596481317085, + "grad_norm": 0.6963346004486084, + "learning_rate": 0.0001571707009983775, + "loss": 2.6303, + "step": 6195 + }, + { + "epoch": 0.5000403518682915, + "grad_norm": 0.7074694633483887, + "learning_rate": 0.0001571577478017111, + "loss": 2.6077, + "step": 6196 + }, + { + "epoch": 0.5001210556048745, + "grad_norm": 0.7826260328292847, + "learning_rate": 0.00015714479318052423, + "loss": 2.6668, + "step": 6197 + }, + { + "epoch": 0.5002017593414575, + "grad_norm": 0.6908758282661438, + "learning_rate": 0.00015713183713513974, + "loss": 2.6195, + "step": 6198 + }, + { + "epoch": 0.5002824630780405, + "grad_norm": 0.7571602463722229, + "learning_rate": 0.0001571188796658805, + "loss": 2.6546, + "step": 6199 + }, + { + "epoch": 0.5003631668146236, + "grad_norm": 0.7359431385993958, + "learning_rate": 0.0001571059207730695, + "loss": 2.5792, + "step": 6200 + }, + { + "epoch": 0.5004438705512065, + "grad_norm": 0.6886340379714966, + "learning_rate": 0.00015709296045702967, + "loss": 2.6099, + "step": 6201 + }, + { + "epoch": 0.5005245742877895, + "grad_norm": 0.6900473833084106, + "learning_rate": 0.000157079998718084, + "loss": 2.6461, + "step": 6202 + }, + { + "epoch": 0.5006052780243725, + "grad_norm": 0.66212397813797, + "learning_rate": 0.00015706703555655555, + "loss": 2.6178, + "step": 6203 + }, + { + "epoch": 0.5006859817609556, + "grad_norm": 0.7666565179824829, + "learning_rate": 0.00015705407097276744, + "loss": 2.7097, + "step": 6204 + }, + { + "epoch": 0.5007666854975386, + "grad_norm": 0.7294591069221497, + "learning_rate": 0.0001570411049670427, + "loss": 2.5995, + "step": 6205 + }, + { + "epoch": 0.5008473892341215, + "grad_norm": 0.7279765009880066, + "learning_rate": 0.00015702813753970453, + "loss": 2.5554, + "step": 6206 + }, + { + "epoch": 0.5009280929707045, + "grad_norm": 0.7174742817878723, + "learning_rate": 0.0001570151686910761, + "loss": 2.6523, + "step": 6207 + }, + { + "epoch": 0.5010087967072876, + "grad_norm": 0.67017662525177, + "learning_rate": 0.00015700219842148063, + "loss": 2.5613, + "step": 6208 + }, + { + "epoch": 0.5010895004438706, + "grad_norm": 0.7000258564949036, + "learning_rate": 0.00015698922673124138, + "loss": 2.5658, + "step": 6209 + }, + { + "epoch": 0.5011702041804535, + "grad_norm": 0.6894544363021851, + "learning_rate": 0.00015697625362068164, + "loss": 2.6925, + "step": 6210 + }, + { + "epoch": 0.5012509079170365, + "grad_norm": 0.6742957234382629, + "learning_rate": 0.00015696327909012466, + "loss": 2.6429, + "step": 6211 + }, + { + "epoch": 0.5013316116536196, + "grad_norm": 0.7039656639099121, + "learning_rate": 0.0001569503031398939, + "loss": 2.6313, + "step": 6212 + }, + { + "epoch": 0.5014123153902026, + "grad_norm": 0.720003604888916, + "learning_rate": 0.00015693732577031272, + "loss": 2.6207, + "step": 6213 + }, + { + "epoch": 0.5014930191267856, + "grad_norm": 0.8611499071121216, + "learning_rate": 0.00015692434698170456, + "loss": 2.6855, + "step": 6214 + }, + { + "epoch": 0.5015737228633685, + "grad_norm": 0.6664702296257019, + "learning_rate": 0.00015691136677439284, + "loss": 2.6174, + "step": 6215 + }, + { + "epoch": 0.5016544265999516, + "grad_norm": 0.7258509993553162, + "learning_rate": 0.00015689838514870111, + "loss": 2.6558, + "step": 6216 + }, + { + "epoch": 0.5017351303365346, + "grad_norm": 0.6972211599349976, + "learning_rate": 0.0001568854021049529, + "loss": 2.5913, + "step": 6217 + }, + { + "epoch": 0.5018158340731176, + "grad_norm": 0.7927280068397522, + "learning_rate": 0.00015687241764347177, + "loss": 2.6466, + "step": 6218 + }, + { + "epoch": 0.5018965378097006, + "grad_norm": 0.7044646143913269, + "learning_rate": 0.00015685943176458128, + "loss": 2.6195, + "step": 6219 + }, + { + "epoch": 0.5019772415462836, + "grad_norm": 0.6935598254203796, + "learning_rate": 0.00015684644446860516, + "loss": 2.6486, + "step": 6220 + }, + { + "epoch": 0.5020579452828666, + "grad_norm": 0.7965792417526245, + "learning_rate": 0.00015683345575586704, + "loss": 2.6265, + "step": 6221 + }, + { + "epoch": 0.5021386490194496, + "grad_norm": 0.727053701877594, + "learning_rate": 0.00015682046562669064, + "loss": 2.6714, + "step": 6222 + }, + { + "epoch": 0.5022193527560326, + "grad_norm": 0.7919184565544128, + "learning_rate": 0.0001568074740813997, + "loss": 2.7115, + "step": 6223 + }, + { + "epoch": 0.5023000564926156, + "grad_norm": 0.7724714279174805, + "learning_rate": 0.00015679448112031801, + "loss": 2.6636, + "step": 6224 + }, + { + "epoch": 0.5023807602291986, + "grad_norm": 0.6893701553344727, + "learning_rate": 0.0001567814867437694, + "loss": 2.6562, + "step": 6225 + }, + { + "epoch": 0.5024614639657816, + "grad_norm": 0.7089633345603943, + "learning_rate": 0.00015676849095207769, + "loss": 2.6125, + "step": 6226 + }, + { + "epoch": 0.5025421677023646, + "grad_norm": 0.7620012760162354, + "learning_rate": 0.00015675549374556682, + "loss": 2.6935, + "step": 6227 + }, + { + "epoch": 0.5026228714389476, + "grad_norm": 0.7293741703033447, + "learning_rate": 0.00015674249512456065, + "loss": 2.66, + "step": 6228 + }, + { + "epoch": 0.5027035751755307, + "grad_norm": 0.7366519570350647, + "learning_rate": 0.00015672949508938318, + "loss": 2.5968, + "step": 6229 + }, + { + "epoch": 0.5027842789121136, + "grad_norm": 0.6646310091018677, + "learning_rate": 0.00015671649364035846, + "loss": 2.5751, + "step": 6230 + }, + { + "epoch": 0.5028649826486966, + "grad_norm": 0.6682632565498352, + "learning_rate": 0.00015670349077781038, + "loss": 2.5902, + "step": 6231 + }, + { + "epoch": 0.5029456863852796, + "grad_norm": 0.7327528595924377, + "learning_rate": 0.00015669048650206313, + "loss": 2.6487, + "step": 6232 + }, + { + "epoch": 0.5030263901218627, + "grad_norm": 0.7114281058311462, + "learning_rate": 0.00015667748081344074, + "loss": 2.5779, + "step": 6233 + }, + { + "epoch": 0.5031070938584457, + "grad_norm": 0.7908105850219727, + "learning_rate": 0.00015666447371226737, + "loss": 2.6099, + "step": 6234 + }, + { + "epoch": 0.5031877975950286, + "grad_norm": 0.7823575139045715, + "learning_rate": 0.00015665146519886725, + "loss": 2.6339, + "step": 6235 + }, + { + "epoch": 0.5032685013316116, + "grad_norm": 0.7404836416244507, + "learning_rate": 0.00015663845527356447, + "loss": 2.6035, + "step": 6236 + }, + { + "epoch": 0.5033492050681947, + "grad_norm": 0.7448995113372803, + "learning_rate": 0.00015662544393668334, + "loss": 2.6566, + "step": 6237 + }, + { + "epoch": 0.5034299088047777, + "grad_norm": 0.7209747433662415, + "learning_rate": 0.00015661243118854815, + "loss": 2.682, + "step": 6238 + }, + { + "epoch": 0.5035106125413606, + "grad_norm": 0.691759467124939, + "learning_rate": 0.00015659941702948315, + "loss": 2.6435, + "step": 6239 + }, + { + "epoch": 0.5035913162779436, + "grad_norm": 0.7646063566207886, + "learning_rate": 0.00015658640145981275, + "loss": 2.591, + "step": 6240 + }, + { + "epoch": 0.5036720200145267, + "grad_norm": 0.8319387435913086, + "learning_rate": 0.00015657338447986133, + "loss": 2.5937, + "step": 6241 + }, + { + "epoch": 0.5037527237511097, + "grad_norm": 0.729193389415741, + "learning_rate": 0.00015656036608995323, + "loss": 2.651, + "step": 6242 + }, + { + "epoch": 0.5038334274876927, + "grad_norm": 0.720098614692688, + "learning_rate": 0.000156547346290413, + "loss": 2.681, + "step": 6243 + }, + { + "epoch": 0.5039141312242756, + "grad_norm": 0.7172541618347168, + "learning_rate": 0.00015653432508156508, + "loss": 2.5906, + "step": 6244 + }, + { + "epoch": 0.5039948349608587, + "grad_norm": 0.7352481484413147, + "learning_rate": 0.00015652130246373398, + "loss": 2.6376, + "step": 6245 + }, + { + "epoch": 0.5040755386974417, + "grad_norm": 0.6664925813674927, + "learning_rate": 0.0001565082784372443, + "loss": 2.706, + "step": 6246 + }, + { + "epoch": 0.5041562424340247, + "grad_norm": 0.7292987704277039, + "learning_rate": 0.0001564952530024206, + "loss": 2.6149, + "step": 6247 + }, + { + "epoch": 0.5042369461706077, + "grad_norm": 0.6904531121253967, + "learning_rate": 0.00015648222615958747, + "loss": 2.579, + "step": 6248 + }, + { + "epoch": 0.5043176499071907, + "grad_norm": 0.7385311722755432, + "learning_rate": 0.00015646919790906965, + "loss": 2.6137, + "step": 6249 + }, + { + "epoch": 0.5043983536437737, + "grad_norm": 0.7869507074356079, + "learning_rate": 0.0001564561682511918, + "loss": 2.6831, + "step": 6250 + }, + { + "epoch": 0.5044790573803567, + "grad_norm": 0.723680317401886, + "learning_rate": 0.00015644313718627867, + "loss": 2.6083, + "step": 6251 + }, + { + "epoch": 0.5045597611169397, + "grad_norm": 0.7029969692230225, + "learning_rate": 0.00015643010471465502, + "loss": 2.6462, + "step": 6252 + }, + { + "epoch": 0.5046404648535228, + "grad_norm": 0.818975031375885, + "learning_rate": 0.00015641707083664566, + "loss": 2.6393, + "step": 6253 + }, + { + "epoch": 0.5047211685901057, + "grad_norm": 0.7237667441368103, + "learning_rate": 0.0001564040355525754, + "loss": 2.5995, + "step": 6254 + }, + { + "epoch": 0.5048018723266887, + "grad_norm": 0.8613824248313904, + "learning_rate": 0.00015639099886276912, + "loss": 2.748, + "step": 6255 + }, + { + "epoch": 0.5048825760632717, + "grad_norm": 0.6802194118499756, + "learning_rate": 0.00015637796076755178, + "loss": 2.6393, + "step": 6256 + }, + { + "epoch": 0.5049632797998548, + "grad_norm": 0.7816255688667297, + "learning_rate": 0.00015636492126724823, + "loss": 2.6218, + "step": 6257 + }, + { + "epoch": 0.5050439835364378, + "grad_norm": 0.7443990707397461, + "learning_rate": 0.00015635188036218356, + "loss": 2.6181, + "step": 6258 + }, + { + "epoch": 0.5051246872730207, + "grad_norm": 0.7869458794593811, + "learning_rate": 0.0001563388380526827, + "loss": 2.6641, + "step": 6259 + }, + { + "epoch": 0.5052053910096037, + "grad_norm": 0.7423158288002014, + "learning_rate": 0.00015632579433907072, + "loss": 2.5849, + "step": 6260 + }, + { + "epoch": 0.5052860947461868, + "grad_norm": 0.7888280153274536, + "learning_rate": 0.00015631274922167272, + "loss": 2.7095, + "step": 6261 + }, + { + "epoch": 0.5053667984827698, + "grad_norm": 0.7053405046463013, + "learning_rate": 0.0001562997027008138, + "loss": 2.5747, + "step": 6262 + }, + { + "epoch": 0.5054475022193528, + "grad_norm": 0.7930825352668762, + "learning_rate": 0.0001562866547768191, + "loss": 2.6359, + "step": 6263 + }, + { + "epoch": 0.5055282059559357, + "grad_norm": 0.7431469559669495, + "learning_rate": 0.0001562736054500139, + "loss": 2.6167, + "step": 6264 + }, + { + "epoch": 0.5056089096925188, + "grad_norm": 0.8395694494247437, + "learning_rate": 0.00015626055472072324, + "loss": 2.7217, + "step": 6265 + }, + { + "epoch": 0.5056896134291018, + "grad_norm": 0.7318898439407349, + "learning_rate": 0.0001562475025892726, + "loss": 2.6866, + "step": 6266 + }, + { + "epoch": 0.5057703171656848, + "grad_norm": 0.7487025856971741, + "learning_rate": 0.0001562344490559871, + "loss": 2.7206, + "step": 6267 + }, + { + "epoch": 0.5058510209022677, + "grad_norm": 0.8187269568443298, + "learning_rate": 0.00015622139412119212, + "loss": 2.658, + "step": 6268 + }, + { + "epoch": 0.5059317246388508, + "grad_norm": 0.6714495420455933, + "learning_rate": 0.00015620833778521307, + "loss": 2.6182, + "step": 6269 + }, + { + "epoch": 0.5060124283754338, + "grad_norm": 0.7556246519088745, + "learning_rate": 0.00015619528004837528, + "loss": 2.6502, + "step": 6270 + }, + { + "epoch": 0.5060931321120168, + "grad_norm": 0.6989960074424744, + "learning_rate": 0.00015618222091100424, + "loss": 2.6031, + "step": 6271 + }, + { + "epoch": 0.5061738358485998, + "grad_norm": 0.7002139091491699, + "learning_rate": 0.0001561691603734254, + "loss": 2.6563, + "step": 6272 + }, + { + "epoch": 0.5062545395851827, + "grad_norm": 0.7064816355705261, + "learning_rate": 0.00015615609843596423, + "loss": 2.6482, + "step": 6273 + }, + { + "epoch": 0.5063352433217658, + "grad_norm": 0.6971433162689209, + "learning_rate": 0.00015614303509894634, + "loss": 2.6522, + "step": 6274 + }, + { + "epoch": 0.5064159470583488, + "grad_norm": 0.6982942223548889, + "learning_rate": 0.0001561299703626972, + "loss": 2.6477, + "step": 6275 + }, + { + "epoch": 0.5064966507949318, + "grad_norm": 0.7219811081886292, + "learning_rate": 0.0001561169042275425, + "loss": 2.6514, + "step": 6276 + }, + { + "epoch": 0.5065773545315148, + "grad_norm": 0.7391932010650635, + "learning_rate": 0.00015610383669380787, + "loss": 2.698, + "step": 6277 + }, + { + "epoch": 0.5066580582680978, + "grad_norm": 0.7852853536605835, + "learning_rate": 0.00015609076776181894, + "loss": 2.6281, + "step": 6278 + }, + { + "epoch": 0.5067387620046808, + "grad_norm": 0.7435647249221802, + "learning_rate": 0.00015607769743190147, + "loss": 2.6403, + "step": 6279 + }, + { + "epoch": 0.5068194657412638, + "grad_norm": 0.7300949096679688, + "learning_rate": 0.00015606462570438119, + "loss": 2.6125, + "step": 6280 + }, + { + "epoch": 0.5069001694778468, + "grad_norm": 0.7081549167633057, + "learning_rate": 0.00015605155257958388, + "loss": 2.6192, + "step": 6281 + }, + { + "epoch": 0.5069808732144299, + "grad_norm": 0.709020733833313, + "learning_rate": 0.00015603847805783537, + "loss": 2.6745, + "step": 6282 + }, + { + "epoch": 0.5070615769510128, + "grad_norm": 0.691684901714325, + "learning_rate": 0.0001560254021394615, + "loss": 2.5638, + "step": 6283 + }, + { + "epoch": 0.5071422806875958, + "grad_norm": 0.8338537812232971, + "learning_rate": 0.00015601232482478813, + "loss": 2.5835, + "step": 6284 + }, + { + "epoch": 0.5072229844241788, + "grad_norm": 0.659436047077179, + "learning_rate": 0.00015599924611414126, + "loss": 2.601, + "step": 6285 + }, + { + "epoch": 0.5073036881607619, + "grad_norm": 0.72590172290802, + "learning_rate": 0.00015598616600784676, + "loss": 2.602, + "step": 6286 + }, + { + "epoch": 0.5073843918973449, + "grad_norm": 0.6704443693161011, + "learning_rate": 0.00015597308450623066, + "loss": 2.5703, + "step": 6287 + }, + { + "epoch": 0.5074650956339278, + "grad_norm": 0.7298632264137268, + "learning_rate": 0.00015596000160961898, + "loss": 2.6859, + "step": 6288 + }, + { + "epoch": 0.5075457993705108, + "grad_norm": 0.6900345087051392, + "learning_rate": 0.00015594691731833776, + "loss": 2.6264, + "step": 6289 + }, + { + "epoch": 0.5076265031070939, + "grad_norm": 0.6705992221832275, + "learning_rate": 0.0001559338316327131, + "loss": 2.6135, + "step": 6290 + }, + { + "epoch": 0.5077072068436769, + "grad_norm": 0.691545307636261, + "learning_rate": 0.0001559207445530712, + "loss": 2.6538, + "step": 6291 + }, + { + "epoch": 0.5077879105802598, + "grad_norm": 0.6579985618591309, + "learning_rate": 0.00015590765607973811, + "loss": 2.6224, + "step": 6292 + }, + { + "epoch": 0.5078686143168428, + "grad_norm": 0.6938790678977966, + "learning_rate": 0.00015589456621304014, + "loss": 2.5932, + "step": 6293 + }, + { + "epoch": 0.5079493180534259, + "grad_norm": 0.7421671748161316, + "learning_rate": 0.00015588147495330346, + "loss": 2.7098, + "step": 6294 + }, + { + "epoch": 0.5080300217900089, + "grad_norm": 0.7076674699783325, + "learning_rate": 0.0001558683823008543, + "loss": 2.664, + "step": 6295 + }, + { + "epoch": 0.5081107255265919, + "grad_norm": 0.6829726696014404, + "learning_rate": 0.00015585528825601906, + "loss": 2.6029, + "step": 6296 + }, + { + "epoch": 0.5081914292631748, + "grad_norm": 0.6968080401420593, + "learning_rate": 0.000155842192819124, + "loss": 2.6256, + "step": 6297 + }, + { + "epoch": 0.5082721329997579, + "grad_norm": 0.7453410625457764, + "learning_rate": 0.00015582909599049554, + "loss": 2.6577, + "step": 6298 + }, + { + "epoch": 0.5083528367363409, + "grad_norm": 0.6603519916534424, + "learning_rate": 0.00015581599777046007, + "loss": 2.6066, + "step": 6299 + }, + { + "epoch": 0.5084335404729239, + "grad_norm": 0.7096173763275146, + "learning_rate": 0.00015580289815934401, + "loss": 2.5488, + "step": 6300 + }, + { + "epoch": 0.5085142442095069, + "grad_norm": 0.799298107624054, + "learning_rate": 0.0001557897971574739, + "loss": 2.6021, + "step": 6301 + }, + { + "epoch": 0.50859494794609, + "grad_norm": 0.6820314526557922, + "learning_rate": 0.00015577669476517618, + "loss": 2.6276, + "step": 6302 + }, + { + "epoch": 0.5086756516826729, + "grad_norm": 0.7119347453117371, + "learning_rate": 0.00015576359098277742, + "loss": 2.6627, + "step": 6303 + }, + { + "epoch": 0.5087563554192559, + "grad_norm": 0.7638720273971558, + "learning_rate": 0.00015575048581060422, + "loss": 2.6824, + "step": 6304 + }, + { + "epoch": 0.5088370591558389, + "grad_norm": 0.7360339164733887, + "learning_rate": 0.00015573737924898316, + "loss": 2.5805, + "step": 6305 + }, + { + "epoch": 0.508917762892422, + "grad_norm": 0.7220984697341919, + "learning_rate": 0.00015572427129824091, + "loss": 2.6374, + "step": 6306 + }, + { + "epoch": 0.5089984666290049, + "grad_norm": 0.670964777469635, + "learning_rate": 0.00015571116195870418, + "loss": 2.6371, + "step": 6307 + }, + { + "epoch": 0.5090791703655879, + "grad_norm": 0.7826075553894043, + "learning_rate": 0.00015569805123069968, + "loss": 2.7666, + "step": 6308 + }, + { + "epoch": 0.5091598741021709, + "grad_norm": 0.7691593766212463, + "learning_rate": 0.00015568493911455412, + "loss": 2.6242, + "step": 6309 + }, + { + "epoch": 0.509240577838754, + "grad_norm": 0.714500367641449, + "learning_rate": 0.0001556718256105943, + "loss": 2.6551, + "step": 6310 + }, + { + "epoch": 0.509321281575337, + "grad_norm": 0.7634009718894958, + "learning_rate": 0.00015565871071914706, + "loss": 2.7069, + "step": 6311 + }, + { + "epoch": 0.5094019853119199, + "grad_norm": 0.7134168148040771, + "learning_rate": 0.00015564559444053926, + "loss": 2.5816, + "step": 6312 + }, + { + "epoch": 0.5094826890485029, + "grad_norm": 0.6548121571540833, + "learning_rate": 0.0001556324767750978, + "loss": 2.6192, + "step": 6313 + }, + { + "epoch": 0.509563392785086, + "grad_norm": 0.7244428992271423, + "learning_rate": 0.0001556193577231496, + "loss": 2.6072, + "step": 6314 + }, + { + "epoch": 0.509644096521669, + "grad_norm": 0.6976662278175354, + "learning_rate": 0.0001556062372850216, + "loss": 2.6148, + "step": 6315 + }, + { + "epoch": 0.509724800258252, + "grad_norm": 0.772726833820343, + "learning_rate": 0.00015559311546104083, + "loss": 2.6458, + "step": 6316 + }, + { + "epoch": 0.5098055039948349, + "grad_norm": 0.7976188659667969, + "learning_rate": 0.00015557999225153428, + "loss": 2.6772, + "step": 6317 + }, + { + "epoch": 0.509886207731418, + "grad_norm": 0.6458039283752441, + "learning_rate": 0.00015556686765682903, + "loss": 2.6143, + "step": 6318 + }, + { + "epoch": 0.509966911468001, + "grad_norm": 0.7295405268669128, + "learning_rate": 0.0001555537416772522, + "loss": 2.5919, + "step": 6319 + }, + { + "epoch": 0.510047615204584, + "grad_norm": 0.657978355884552, + "learning_rate": 0.00015554061431313093, + "loss": 2.6245, + "step": 6320 + }, + { + "epoch": 0.510128318941167, + "grad_norm": 0.6726922392845154, + "learning_rate": 0.00015552748556479232, + "loss": 2.6207, + "step": 6321 + }, + { + "epoch": 0.51020902267775, + "grad_norm": 0.7954673767089844, + "learning_rate": 0.00015551435543256363, + "loss": 2.7177, + "step": 6322 + }, + { + "epoch": 0.510289726414333, + "grad_norm": 0.7186735272407532, + "learning_rate": 0.00015550122391677211, + "loss": 2.5953, + "step": 6323 + }, + { + "epoch": 0.510370430150916, + "grad_norm": 0.7835420966148376, + "learning_rate": 0.00015548809101774498, + "loss": 2.7039, + "step": 6324 + }, + { + "epoch": 0.510451133887499, + "grad_norm": 0.6966592073440552, + "learning_rate": 0.00015547495673580962, + "loss": 2.6287, + "step": 6325 + }, + { + "epoch": 0.5105318376240819, + "grad_norm": 0.6676180362701416, + "learning_rate": 0.00015546182107129328, + "loss": 2.638, + "step": 6326 + }, + { + "epoch": 0.510612541360665, + "grad_norm": 0.7285657525062561, + "learning_rate": 0.0001554486840245234, + "loss": 2.6661, + "step": 6327 + }, + { + "epoch": 0.510693245097248, + "grad_norm": 0.6453657150268555, + "learning_rate": 0.00015543554559582735, + "loss": 2.715, + "step": 6328 + }, + { + "epoch": 0.510773948833831, + "grad_norm": 0.7364684343338013, + "learning_rate": 0.0001554224057855326, + "loss": 2.6475, + "step": 6329 + }, + { + "epoch": 0.510854652570414, + "grad_norm": 0.670894980430603, + "learning_rate": 0.00015540926459396665, + "loss": 2.6091, + "step": 6330 + }, + { + "epoch": 0.510935356306997, + "grad_norm": 0.6750168204307556, + "learning_rate": 0.00015539612202145696, + "loss": 2.6473, + "step": 6331 + }, + { + "epoch": 0.51101606004358, + "grad_norm": 0.6552454233169556, + "learning_rate": 0.0001553829780683311, + "loss": 2.6158, + "step": 6332 + }, + { + "epoch": 0.511096763780163, + "grad_norm": 0.7387828230857849, + "learning_rate": 0.00015536983273491668, + "loss": 2.6219, + "step": 6333 + }, + { + "epoch": 0.511177467516746, + "grad_norm": 0.6993975639343262, + "learning_rate": 0.00015535668602154127, + "loss": 2.6446, + "step": 6334 + }, + { + "epoch": 0.5112581712533291, + "grad_norm": 0.6491217613220215, + "learning_rate": 0.00015534353792853254, + "loss": 2.6404, + "step": 6335 + }, + { + "epoch": 0.511338874989912, + "grad_norm": 0.7165521383285522, + "learning_rate": 0.0001553303884562182, + "loss": 2.6339, + "step": 6336 + }, + { + "epoch": 0.511419578726495, + "grad_norm": 0.7363756895065308, + "learning_rate": 0.0001553172376049259, + "loss": 2.6411, + "step": 6337 + }, + { + "epoch": 0.511500282463078, + "grad_norm": 0.7148438096046448, + "learning_rate": 0.00015530408537498347, + "loss": 2.5617, + "step": 6338 + }, + { + "epoch": 0.5115809861996611, + "grad_norm": 0.7140451669692993, + "learning_rate": 0.00015529093176671864, + "loss": 2.5898, + "step": 6339 + }, + { + "epoch": 0.5116616899362441, + "grad_norm": 0.7799252271652222, + "learning_rate": 0.00015527777678045926, + "loss": 2.6176, + "step": 6340 + }, + { + "epoch": 0.511742393672827, + "grad_norm": 0.7292928099632263, + "learning_rate": 0.00015526462041653323, + "loss": 2.6722, + "step": 6341 + }, + { + "epoch": 0.51182309740941, + "grad_norm": 0.6986904740333557, + "learning_rate": 0.00015525146267526837, + "loss": 2.6154, + "step": 6342 + }, + { + "epoch": 0.5119038011459931, + "grad_norm": 0.7239612936973572, + "learning_rate": 0.00015523830355699262, + "loss": 2.5664, + "step": 6343 + }, + { + "epoch": 0.5119845048825761, + "grad_norm": 0.6805121898651123, + "learning_rate": 0.00015522514306203395, + "loss": 2.6204, + "step": 6344 + }, + { + "epoch": 0.512065208619159, + "grad_norm": 0.7036689519882202, + "learning_rate": 0.00015521198119072035, + "loss": 2.6211, + "step": 6345 + }, + { + "epoch": 0.512145912355742, + "grad_norm": 0.7155849933624268, + "learning_rate": 0.00015519881794337988, + "loss": 2.6074, + "step": 6346 + }, + { + "epoch": 0.5122266160923251, + "grad_norm": 0.7183938026428223, + "learning_rate": 0.00015518565332034057, + "loss": 2.6148, + "step": 6347 + }, + { + "epoch": 0.5123073198289081, + "grad_norm": 0.7053570747375488, + "learning_rate": 0.0001551724873219305, + "loss": 2.6476, + "step": 6348 + }, + { + "epoch": 0.5123880235654911, + "grad_norm": 0.714846670627594, + "learning_rate": 0.00015515931994847785, + "loss": 2.5728, + "step": 6349 + }, + { + "epoch": 0.512468727302074, + "grad_norm": 0.7504729628562927, + "learning_rate": 0.00015514615120031076, + "loss": 2.6415, + "step": 6350 + }, + { + "epoch": 0.5125494310386571, + "grad_norm": 0.6940335035324097, + "learning_rate": 0.0001551329810777574, + "loss": 2.6115, + "step": 6351 + }, + { + "epoch": 0.5126301347752401, + "grad_norm": 0.7166119813919067, + "learning_rate": 0.00015511980958114608, + "loss": 2.6284, + "step": 6352 + }, + { + "epoch": 0.5127108385118231, + "grad_norm": 0.7787839770317078, + "learning_rate": 0.00015510663671080497, + "loss": 2.6385, + "step": 6353 + }, + { + "epoch": 0.5127915422484061, + "grad_norm": 0.7298412322998047, + "learning_rate": 0.00015509346246706245, + "loss": 2.629, + "step": 6354 + }, + { + "epoch": 0.5128722459849892, + "grad_norm": 0.7918897271156311, + "learning_rate": 0.00015508028685024683, + "loss": 2.6777, + "step": 6355 + }, + { + "epoch": 0.5129529497215721, + "grad_norm": 0.6867843866348267, + "learning_rate": 0.00015506710986068646, + "loss": 2.6101, + "step": 6356 + }, + { + "epoch": 0.5130336534581551, + "grad_norm": 0.716468870639801, + "learning_rate": 0.00015505393149870978, + "loss": 2.6558, + "step": 6357 + }, + { + "epoch": 0.5131143571947381, + "grad_norm": 0.6704092621803284, + "learning_rate": 0.0001550407517646452, + "loss": 2.6128, + "step": 6358 + }, + { + "epoch": 0.5131950609313212, + "grad_norm": 0.820716381072998, + "learning_rate": 0.00015502757065882124, + "loss": 2.6052, + "step": 6359 + }, + { + "epoch": 0.5132757646679041, + "grad_norm": 0.7328094840049744, + "learning_rate": 0.00015501438818156635, + "loss": 2.6399, + "step": 6360 + }, + { + "epoch": 0.5133564684044871, + "grad_norm": 0.6602808833122253, + "learning_rate": 0.00015500120433320911, + "loss": 2.5509, + "step": 6361 + }, + { + "epoch": 0.5134371721410701, + "grad_norm": 0.7013166546821594, + "learning_rate": 0.00015498801911407805, + "loss": 2.6439, + "step": 6362 + }, + { + "epoch": 0.5135178758776532, + "grad_norm": 0.7415499091148376, + "learning_rate": 0.00015497483252450186, + "loss": 2.575, + "step": 6363 + }, + { + "epoch": 0.5135985796142362, + "grad_norm": 0.7262336015701294, + "learning_rate": 0.00015496164456480912, + "loss": 2.6815, + "step": 6364 + }, + { + "epoch": 0.5136792833508191, + "grad_norm": 0.7353699803352356, + "learning_rate": 0.0001549484552353285, + "loss": 2.6172, + "step": 6365 + }, + { + "epoch": 0.5137599870874021, + "grad_norm": 0.7005086541175842, + "learning_rate": 0.00015493526453638879, + "loss": 2.5945, + "step": 6366 + }, + { + "epoch": 0.5138406908239852, + "grad_norm": 0.7469770908355713, + "learning_rate": 0.00015492207246831864, + "loss": 2.6797, + "step": 6367 + }, + { + "epoch": 0.5139213945605682, + "grad_norm": 0.6768934726715088, + "learning_rate": 0.00015490887903144693, + "loss": 2.6369, + "step": 6368 + }, + { + "epoch": 0.5140020982971512, + "grad_norm": 0.7625820636749268, + "learning_rate": 0.00015489568422610237, + "loss": 2.6182, + "step": 6369 + }, + { + "epoch": 0.5140828020337341, + "grad_norm": 0.749351978302002, + "learning_rate": 0.00015488248805261388, + "loss": 2.6066, + "step": 6370 + }, + { + "epoch": 0.5141635057703172, + "grad_norm": 0.8369480967521667, + "learning_rate": 0.00015486929051131032, + "loss": 2.7627, + "step": 6371 + }, + { + "epoch": 0.5142442095069002, + "grad_norm": 0.6482037305831909, + "learning_rate": 0.0001548560916025206, + "loss": 2.609, + "step": 6372 + }, + { + "epoch": 0.5143249132434832, + "grad_norm": 0.6801851391792297, + "learning_rate": 0.0001548428913265737, + "loss": 2.5878, + "step": 6373 + }, + { + "epoch": 0.5144056169800661, + "grad_norm": 0.744926929473877, + "learning_rate": 0.0001548296896837986, + "loss": 2.6569, + "step": 6374 + }, + { + "epoch": 0.5144863207166491, + "grad_norm": 0.6862614750862122, + "learning_rate": 0.00015481648667452425, + "loss": 2.5626, + "step": 6375 + }, + { + "epoch": 0.5145670244532322, + "grad_norm": 0.7186449766159058, + "learning_rate": 0.0001548032822990798, + "loss": 2.6783, + "step": 6376 + }, + { + "epoch": 0.5146477281898152, + "grad_norm": 0.699715256690979, + "learning_rate": 0.0001547900765577943, + "loss": 2.6709, + "step": 6377 + }, + { + "epoch": 0.5147284319263982, + "grad_norm": 0.7272205352783203, + "learning_rate": 0.00015477686945099687, + "loss": 2.6076, + "step": 6378 + }, + { + "epoch": 0.5148091356629811, + "grad_norm": 0.7667459845542908, + "learning_rate": 0.00015476366097901667, + "loss": 2.6541, + "step": 6379 + }, + { + "epoch": 0.5148898393995642, + "grad_norm": 0.6538121700286865, + "learning_rate": 0.00015475045114218285, + "loss": 2.5806, + "step": 6380 + }, + { + "epoch": 0.5149705431361472, + "grad_norm": 0.7388994097709656, + "learning_rate": 0.00015473723994082473, + "loss": 2.6293, + "step": 6381 + }, + { + "epoch": 0.5150512468727302, + "grad_norm": 0.7044215202331543, + "learning_rate": 0.00015472402737527142, + "loss": 2.5755, + "step": 6382 + }, + { + "epoch": 0.5151319506093132, + "grad_norm": 0.6807994246482849, + "learning_rate": 0.00015471081344585236, + "loss": 2.6493, + "step": 6383 + }, + { + "epoch": 0.5152126543458962, + "grad_norm": 0.676278293132782, + "learning_rate": 0.00015469759815289681, + "loss": 2.6319, + "step": 6384 + }, + { + "epoch": 0.5152933580824792, + "grad_norm": 0.7515453696250916, + "learning_rate": 0.00015468438149673412, + "loss": 2.6415, + "step": 6385 + }, + { + "epoch": 0.5153740618190622, + "grad_norm": 0.8694239854812622, + "learning_rate": 0.0001546711634776937, + "loss": 2.5818, + "step": 6386 + }, + { + "epoch": 0.5154547655556452, + "grad_norm": 0.717090368270874, + "learning_rate": 0.000154657944096105, + "loss": 2.7132, + "step": 6387 + }, + { + "epoch": 0.5155354692922283, + "grad_norm": 0.7098804116249084, + "learning_rate": 0.00015464472335229742, + "loss": 2.564, + "step": 6388 + }, + { + "epoch": 0.5156161730288112, + "grad_norm": 0.6879690289497375, + "learning_rate": 0.0001546315012466005, + "loss": 2.6094, + "step": 6389 + }, + { + "epoch": 0.5156968767653942, + "grad_norm": 0.7110763788223267, + "learning_rate": 0.00015461827777934377, + "loss": 2.5982, + "step": 6390 + }, + { + "epoch": 0.5157775805019772, + "grad_norm": 0.7168039679527283, + "learning_rate": 0.00015460505295085677, + "loss": 2.5451, + "step": 6391 + }, + { + "epoch": 0.5158582842385603, + "grad_norm": 0.7059877514839172, + "learning_rate": 0.00015459182676146914, + "loss": 2.6655, + "step": 6392 + }, + { + "epoch": 0.5159389879751433, + "grad_norm": 0.7278143763542175, + "learning_rate": 0.00015457859921151043, + "loss": 2.6587, + "step": 6393 + }, + { + "epoch": 0.5160196917117262, + "grad_norm": 0.7301023602485657, + "learning_rate": 0.0001545653703013104, + "loss": 2.7672, + "step": 6394 + }, + { + "epoch": 0.5161003954483092, + "grad_norm": 0.6933302283287048, + "learning_rate": 0.0001545521400311987, + "loss": 2.5924, + "step": 6395 + }, + { + "epoch": 0.5161810991848923, + "grad_norm": 0.7074775099754333, + "learning_rate": 0.00015453890840150508, + "loss": 2.6663, + "step": 6396 + }, + { + "epoch": 0.5162618029214753, + "grad_norm": 0.7069801092147827, + "learning_rate": 0.00015452567541255924, + "loss": 2.6791, + "step": 6397 + }, + { + "epoch": 0.5163425066580583, + "grad_norm": 0.6586462259292603, + "learning_rate": 0.00015451244106469108, + "loss": 2.6368, + "step": 6398 + }, + { + "epoch": 0.5164232103946412, + "grad_norm": 0.6862531900405884, + "learning_rate": 0.00015449920535823042, + "loss": 2.7099, + "step": 6399 + }, + { + "epoch": 0.5165039141312243, + "grad_norm": 0.7177795767784119, + "learning_rate": 0.00015448596829350706, + "loss": 2.5921, + "step": 6400 + }, + { + "epoch": 0.5165846178678073, + "grad_norm": 0.6936569213867188, + "learning_rate": 0.00015447272987085094, + "loss": 2.5739, + "step": 6401 + }, + { + "epoch": 0.5166653216043903, + "grad_norm": 0.7394363284111023, + "learning_rate": 0.00015445949009059202, + "loss": 2.5941, + "step": 6402 + }, + { + "epoch": 0.5167460253409732, + "grad_norm": 0.6713366508483887, + "learning_rate": 0.00015444624895306027, + "loss": 2.574, + "step": 6403 + }, + { + "epoch": 0.5168267290775563, + "grad_norm": 0.679128885269165, + "learning_rate": 0.0001544330064585856, + "loss": 2.6422, + "step": 6404 + }, + { + "epoch": 0.5169074328141393, + "grad_norm": 0.6803367137908936, + "learning_rate": 0.0001544197626074982, + "loss": 2.6503, + "step": 6405 + }, + { + "epoch": 0.5169881365507223, + "grad_norm": 0.8009794354438782, + "learning_rate": 0.000154406517400128, + "loss": 2.6434, + "step": 6406 + }, + { + "epoch": 0.5170688402873053, + "grad_norm": 0.7292529344558716, + "learning_rate": 0.00015439327083680517, + "loss": 2.6333, + "step": 6407 + }, + { + "epoch": 0.5171495440238884, + "grad_norm": 0.67046719789505, + "learning_rate": 0.00015438002291785988, + "loss": 2.5791, + "step": 6408 + }, + { + "epoch": 0.5172302477604713, + "grad_norm": 0.755501925945282, + "learning_rate": 0.00015436677364362225, + "loss": 2.5558, + "step": 6409 + }, + { + "epoch": 0.5173109514970543, + "grad_norm": 0.6957115530967712, + "learning_rate": 0.0001543535230144225, + "loss": 2.5839, + "step": 6410 + }, + { + "epoch": 0.5173916552336373, + "grad_norm": 0.6629074215888977, + "learning_rate": 0.0001543402710305909, + "loss": 2.6529, + "step": 6411 + }, + { + "epoch": 0.5174723589702204, + "grad_norm": 0.6647019386291504, + "learning_rate": 0.00015432701769245766, + "loss": 2.589, + "step": 6412 + }, + { + "epoch": 0.5175530627068033, + "grad_norm": 0.6472512483596802, + "learning_rate": 0.00015431376300035316, + "loss": 2.6184, + "step": 6413 + }, + { + "epoch": 0.5176337664433863, + "grad_norm": 0.6900136470794678, + "learning_rate": 0.0001543005069546077, + "loss": 2.7029, + "step": 6414 + }, + { + "epoch": 0.5177144701799693, + "grad_norm": 0.7702177166938782, + "learning_rate": 0.00015428724955555165, + "loss": 2.6189, + "step": 6415 + }, + { + "epoch": 0.5177951739165524, + "grad_norm": 0.641655445098877, + "learning_rate": 0.00015427399080351545, + "loss": 2.6486, + "step": 6416 + }, + { + "epoch": 0.5178758776531354, + "grad_norm": 0.6826485991477966, + "learning_rate": 0.00015426073069882952, + "loss": 2.6105, + "step": 6417 + }, + { + "epoch": 0.5179565813897183, + "grad_norm": 0.749812662601471, + "learning_rate": 0.00015424746924182434, + "loss": 2.5644, + "step": 6418 + }, + { + "epoch": 0.5180372851263013, + "grad_norm": 0.6737890243530273, + "learning_rate": 0.0001542342064328304, + "loss": 2.686, + "step": 6419 + }, + { + "epoch": 0.5181179888628844, + "grad_norm": 0.7131822109222412, + "learning_rate": 0.0001542209422721783, + "loss": 2.697, + "step": 6420 + }, + { + "epoch": 0.5181986925994674, + "grad_norm": 0.7543746829032898, + "learning_rate": 0.0001542076767601986, + "loss": 2.6349, + "step": 6421 + }, + { + "epoch": 0.5182793963360504, + "grad_norm": 0.7589309215545654, + "learning_rate": 0.00015419440989722184, + "loss": 2.63, + "step": 6422 + }, + { + "epoch": 0.5183601000726333, + "grad_norm": 0.7036365866661072, + "learning_rate": 0.00015418114168357872, + "loss": 2.605, + "step": 6423 + }, + { + "epoch": 0.5184408038092164, + "grad_norm": 0.733161985874176, + "learning_rate": 0.00015416787211959998, + "loss": 2.6708, + "step": 6424 + }, + { + "epoch": 0.5185215075457994, + "grad_norm": 0.6928101181983948, + "learning_rate": 0.00015415460120561623, + "loss": 2.6549, + "step": 6425 + }, + { + "epoch": 0.5186022112823824, + "grad_norm": 0.6557250022888184, + "learning_rate": 0.00015414132894195825, + "loss": 2.6185, + "step": 6426 + }, + { + "epoch": 0.5186829150189654, + "grad_norm": 0.7236297726631165, + "learning_rate": 0.00015412805532895684, + "loss": 2.6185, + "step": 6427 + }, + { + "epoch": 0.5187636187555483, + "grad_norm": 0.7194060683250427, + "learning_rate": 0.0001541147803669428, + "loss": 2.6123, + "step": 6428 + }, + { + "epoch": 0.5188443224921314, + "grad_norm": 0.7077342867851257, + "learning_rate": 0.00015410150405624696, + "loss": 2.6628, + "step": 6429 + }, + { + "epoch": 0.5189250262287144, + "grad_norm": 0.7036150693893433, + "learning_rate": 0.00015408822639720023, + "loss": 2.5966, + "step": 6430 + }, + { + "epoch": 0.5190057299652974, + "grad_norm": 0.7047349810600281, + "learning_rate": 0.00015407494739013352, + "loss": 2.6626, + "step": 6431 + }, + { + "epoch": 0.5190864337018803, + "grad_norm": 0.7537584900856018, + "learning_rate": 0.00015406166703537777, + "loss": 2.6452, + "step": 6432 + }, + { + "epoch": 0.5191671374384634, + "grad_norm": 0.7944707870483398, + "learning_rate": 0.00015404838533326394, + "loss": 2.6834, + "step": 6433 + }, + { + "epoch": 0.5192478411750464, + "grad_norm": 0.8602458238601685, + "learning_rate": 0.00015403510228412305, + "loss": 2.6238, + "step": 6434 + }, + { + "epoch": 0.5193285449116294, + "grad_norm": 0.7181896567344666, + "learning_rate": 0.0001540218178882862, + "loss": 2.652, + "step": 6435 + }, + { + "epoch": 0.5194092486482124, + "grad_norm": 0.7470960021018982, + "learning_rate": 0.0001540085321460844, + "loss": 2.6703, + "step": 6436 + }, + { + "epoch": 0.5194899523847955, + "grad_norm": 0.8249944448471069, + "learning_rate": 0.00015399524505784883, + "loss": 2.5945, + "step": 6437 + }, + { + "epoch": 0.5195706561213784, + "grad_norm": 0.7332444190979004, + "learning_rate": 0.00015398195662391057, + "loss": 2.6472, + "step": 6438 + }, + { + "epoch": 0.5196513598579614, + "grad_norm": 0.7727739810943604, + "learning_rate": 0.0001539686668446009, + "loss": 2.6276, + "step": 6439 + }, + { + "epoch": 0.5197320635945444, + "grad_norm": 0.7161617279052734, + "learning_rate": 0.00015395537572025094, + "loss": 2.624, + "step": 6440 + }, + { + "epoch": 0.5198127673311275, + "grad_norm": 0.7657529711723328, + "learning_rate": 0.00015394208325119198, + "loss": 2.6604, + "step": 6441 + }, + { + "epoch": 0.5198934710677104, + "grad_norm": 0.732904314994812, + "learning_rate": 0.00015392878943775527, + "loss": 2.6334, + "step": 6442 + }, + { + "epoch": 0.5199741748042934, + "grad_norm": 0.7058991193771362, + "learning_rate": 0.0001539154942802722, + "loss": 2.5936, + "step": 6443 + }, + { + "epoch": 0.5200548785408764, + "grad_norm": 0.7328821420669556, + "learning_rate": 0.00015390219777907405, + "loss": 2.5969, + "step": 6444 + }, + { + "epoch": 0.5201355822774595, + "grad_norm": 0.7899969220161438, + "learning_rate": 0.00015388889993449224, + "loss": 2.5856, + "step": 6445 + }, + { + "epoch": 0.5202162860140425, + "grad_norm": 0.6963860392570496, + "learning_rate": 0.00015387560074685817, + "loss": 2.6139, + "step": 6446 + }, + { + "epoch": 0.5202969897506254, + "grad_norm": 0.812053918838501, + "learning_rate": 0.00015386230021650327, + "loss": 2.716, + "step": 6447 + }, + { + "epoch": 0.5203776934872084, + "grad_norm": 0.766781210899353, + "learning_rate": 0.0001538489983437591, + "loss": 2.6509, + "step": 6448 + }, + { + "epoch": 0.5204583972237915, + "grad_norm": 0.6877299547195435, + "learning_rate": 0.00015383569512895712, + "loss": 2.6076, + "step": 6449 + }, + { + "epoch": 0.5205391009603745, + "grad_norm": 0.7009176015853882, + "learning_rate": 0.00015382239057242888, + "loss": 2.608, + "step": 6450 + }, + { + "epoch": 0.5206198046969575, + "grad_norm": 0.7187578678131104, + "learning_rate": 0.000153809084674506, + "loss": 2.5946, + "step": 6451 + }, + { + "epoch": 0.5207005084335404, + "grad_norm": 0.7242687344551086, + "learning_rate": 0.00015379577743552001, + "loss": 2.6752, + "step": 6452 + }, + { + "epoch": 0.5207812121701235, + "grad_norm": 0.7668174505233765, + "learning_rate": 0.00015378246885580266, + "loss": 2.6694, + "step": 6453 + }, + { + "epoch": 0.5208619159067065, + "grad_norm": 0.7676039338111877, + "learning_rate": 0.00015376915893568557, + "loss": 2.6379, + "step": 6454 + }, + { + "epoch": 0.5209426196432895, + "grad_norm": 0.7394412159919739, + "learning_rate": 0.00015375584767550053, + "loss": 2.6046, + "step": 6455 + }, + { + "epoch": 0.5210233233798724, + "grad_norm": 0.7246636748313904, + "learning_rate": 0.00015374253507557923, + "loss": 2.592, + "step": 6456 + }, + { + "epoch": 0.5211040271164555, + "grad_norm": 0.7121255993843079, + "learning_rate": 0.00015372922113625345, + "loss": 2.634, + "step": 6457 + }, + { + "epoch": 0.5211847308530385, + "grad_norm": 0.7378345131874084, + "learning_rate": 0.00015371590585785505, + "loss": 2.5753, + "step": 6458 + }, + { + "epoch": 0.5212654345896215, + "grad_norm": 0.6682030558586121, + "learning_rate": 0.00015370258924071587, + "loss": 2.6305, + "step": 6459 + }, + { + "epoch": 0.5213461383262045, + "grad_norm": 0.7164177894592285, + "learning_rate": 0.00015368927128516776, + "loss": 2.7188, + "step": 6460 + }, + { + "epoch": 0.5214268420627876, + "grad_norm": 0.7341115474700928, + "learning_rate": 0.00015367595199154273, + "loss": 2.6204, + "step": 6461 + }, + { + "epoch": 0.5215075457993705, + "grad_norm": 0.6781840920448303, + "learning_rate": 0.00015366263136017258, + "loss": 2.6104, + "step": 6462 + }, + { + "epoch": 0.5215882495359535, + "grad_norm": 0.7029077410697937, + "learning_rate": 0.0001536493093913894, + "loss": 2.6055, + "step": 6463 + }, + { + "epoch": 0.5216689532725365, + "grad_norm": 0.6958553194999695, + "learning_rate": 0.00015363598608552522, + "loss": 2.5991, + "step": 6464 + }, + { + "epoch": 0.5217496570091196, + "grad_norm": 0.6919750571250916, + "learning_rate": 0.00015362266144291207, + "loss": 2.6022, + "step": 6465 + }, + { + "epoch": 0.5218303607457025, + "grad_norm": 0.6980622410774231, + "learning_rate": 0.000153609335463882, + "loss": 2.6289, + "step": 6466 + }, + { + "epoch": 0.5219110644822855, + "grad_norm": 0.7468248009681702, + "learning_rate": 0.00015359600814876715, + "loss": 2.6327, + "step": 6467 + }, + { + "epoch": 0.5219917682188685, + "grad_norm": 0.7183729410171509, + "learning_rate": 0.00015358267949789966, + "loss": 2.6389, + "step": 6468 + }, + { + "epoch": 0.5220724719554516, + "grad_norm": 0.6558868885040283, + "learning_rate": 0.00015356934951161178, + "loss": 2.6261, + "step": 6469 + }, + { + "epoch": 0.5221531756920346, + "grad_norm": 0.8000216484069824, + "learning_rate": 0.00015355601819023562, + "loss": 2.6908, + "step": 6470 + }, + { + "epoch": 0.5222338794286175, + "grad_norm": 0.775056004524231, + "learning_rate": 0.00015354268553410355, + "loss": 2.6763, + "step": 6471 + }, + { + "epoch": 0.5223145831652005, + "grad_norm": 0.7345123291015625, + "learning_rate": 0.00015352935154354776, + "loss": 2.582, + "step": 6472 + }, + { + "epoch": 0.5223952869017836, + "grad_norm": 0.731311023235321, + "learning_rate": 0.0001535160162189006, + "loss": 2.6519, + "step": 6473 + }, + { + "epoch": 0.5224759906383666, + "grad_norm": 0.6481007933616638, + "learning_rate": 0.00015350267956049443, + "loss": 2.5695, + "step": 6474 + }, + { + "epoch": 0.5225566943749496, + "grad_norm": 0.7698814868927002, + "learning_rate": 0.00015348934156866163, + "loss": 2.5732, + "step": 6475 + }, + { + "epoch": 0.5226373981115325, + "grad_norm": 0.7404680848121643, + "learning_rate": 0.00015347600224373462, + "loss": 2.5826, + "step": 6476 + }, + { + "epoch": 0.5227181018481155, + "grad_norm": 0.6965613961219788, + "learning_rate": 0.00015346266158604584, + "loss": 2.6069, + "step": 6477 + }, + { + "epoch": 0.5227988055846986, + "grad_norm": 0.6611152291297913, + "learning_rate": 0.00015344931959592777, + "loss": 2.4937, + "step": 6478 + }, + { + "epoch": 0.5228795093212816, + "grad_norm": 0.7418150305747986, + "learning_rate": 0.00015343597627371296, + "loss": 2.5747, + "step": 6479 + }, + { + "epoch": 0.5229602130578646, + "grad_norm": 0.6847610473632812, + "learning_rate": 0.00015342263161973393, + "loss": 2.5906, + "step": 6480 + }, + { + "epoch": 0.5230409167944475, + "grad_norm": 0.7054881453514099, + "learning_rate": 0.00015340928563432326, + "loss": 2.5914, + "step": 6481 + }, + { + "epoch": 0.5231216205310306, + "grad_norm": 0.6918888092041016, + "learning_rate": 0.0001533959383178136, + "loss": 2.6412, + "step": 6482 + }, + { + "epoch": 0.5232023242676136, + "grad_norm": 0.7232856154441833, + "learning_rate": 0.00015338258967053755, + "loss": 2.6364, + "step": 6483 + }, + { + "epoch": 0.5232830280041966, + "grad_norm": 0.7345031499862671, + "learning_rate": 0.00015336923969282786, + "loss": 2.6649, + "step": 6484 + }, + { + "epoch": 0.5233637317407795, + "grad_norm": 0.7644383907318115, + "learning_rate": 0.0001533558883850172, + "loss": 2.6949, + "step": 6485 + }, + { + "epoch": 0.5234444354773626, + "grad_norm": 0.6532372832298279, + "learning_rate": 0.0001533425357474383, + "loss": 2.5915, + "step": 6486 + }, + { + "epoch": 0.5235251392139456, + "grad_norm": 0.7089118361473083, + "learning_rate": 0.000153329181780424, + "loss": 2.6446, + "step": 6487 + }, + { + "epoch": 0.5236058429505286, + "grad_norm": 0.6966068148612976, + "learning_rate": 0.00015331582648430705, + "loss": 2.6764, + "step": 6488 + }, + { + "epoch": 0.5236865466871116, + "grad_norm": 0.7130835056304932, + "learning_rate": 0.00015330246985942035, + "loss": 2.6279, + "step": 6489 + }, + { + "epoch": 0.5237672504236947, + "grad_norm": 0.729727029800415, + "learning_rate": 0.00015328911190609678, + "loss": 2.612, + "step": 6490 + }, + { + "epoch": 0.5238479541602776, + "grad_norm": 0.6804213523864746, + "learning_rate": 0.0001532757526246692, + "loss": 2.6113, + "step": 6491 + }, + { + "epoch": 0.5239286578968606, + "grad_norm": 0.7324437499046326, + "learning_rate": 0.0001532623920154707, + "loss": 2.6054, + "step": 6492 + }, + { + "epoch": 0.5240093616334436, + "grad_norm": 0.6166699528694153, + "learning_rate": 0.00015324903007883406, + "loss": 2.5822, + "step": 6493 + }, + { + "epoch": 0.5240900653700267, + "grad_norm": 0.7339944839477539, + "learning_rate": 0.00015323566681509242, + "loss": 2.6204, + "step": 6494 + }, + { + "epoch": 0.5241707691066096, + "grad_norm": 0.7267727255821228, + "learning_rate": 0.00015322230222457886, + "loss": 2.6094, + "step": 6495 + }, + { + "epoch": 0.5242514728431926, + "grad_norm": 0.6417120695114136, + "learning_rate": 0.00015320893630762635, + "loss": 2.6044, + "step": 6496 + }, + { + "epoch": 0.5243321765797756, + "grad_norm": 0.7092922329902649, + "learning_rate": 0.00015319556906456808, + "loss": 2.6428, + "step": 6497 + }, + { + "epoch": 0.5244128803163587, + "grad_norm": 0.7482922673225403, + "learning_rate": 0.00015318220049573714, + "loss": 2.6025, + "step": 6498 + }, + { + "epoch": 0.5244935840529417, + "grad_norm": 0.691925048828125, + "learning_rate": 0.00015316883060146675, + "loss": 2.6308, + "step": 6499 + }, + { + "epoch": 0.5245742877895246, + "grad_norm": 0.7084488272666931, + "learning_rate": 0.00015315545938209015, + "loss": 2.6535, + "step": 6500 + }, + { + "epoch": 0.5246549915261076, + "grad_norm": 0.7182802557945251, + "learning_rate": 0.00015314208683794056, + "loss": 2.6045, + "step": 6501 + }, + { + "epoch": 0.5247356952626907, + "grad_norm": 0.7043096423149109, + "learning_rate": 0.00015312871296935122, + "loss": 2.6465, + "step": 6502 + }, + { + "epoch": 0.5248163989992737, + "grad_norm": 0.7679466009140015, + "learning_rate": 0.00015311533777665547, + "loss": 2.6624, + "step": 6503 + }, + { + "epoch": 0.5248971027358567, + "grad_norm": 0.6825870275497437, + "learning_rate": 0.00015310196126018668, + "loss": 2.5548, + "step": 6504 + }, + { + "epoch": 0.5249778064724396, + "grad_norm": 0.7364058494567871, + "learning_rate": 0.00015308858342027816, + "loss": 2.6495, + "step": 6505 + }, + { + "epoch": 0.5250585102090227, + "grad_norm": 0.7333239316940308, + "learning_rate": 0.00015307520425726341, + "loss": 2.5835, + "step": 6506 + }, + { + "epoch": 0.5251392139456057, + "grad_norm": 0.7479620575904846, + "learning_rate": 0.00015306182377147583, + "loss": 2.6065, + "step": 6507 + }, + { + "epoch": 0.5252199176821887, + "grad_norm": 0.7347591519355774, + "learning_rate": 0.00015304844196324888, + "loss": 2.6624, + "step": 6508 + }, + { + "epoch": 0.5253006214187717, + "grad_norm": 0.6879193782806396, + "learning_rate": 0.0001530350588329161, + "loss": 2.6598, + "step": 6509 + }, + { + "epoch": 0.5253813251553547, + "grad_norm": 0.7841597199440002, + "learning_rate": 0.000153021674380811, + "loss": 2.53, + "step": 6510 + }, + { + "epoch": 0.5254620288919377, + "grad_norm": 0.7916845679283142, + "learning_rate": 0.0001530082886072672, + "loss": 2.6995, + "step": 6511 + }, + { + "epoch": 0.5255427326285207, + "grad_norm": 0.7066318988800049, + "learning_rate": 0.0001529949015126183, + "loss": 2.58, + "step": 6512 + }, + { + "epoch": 0.5256234363651037, + "grad_norm": 0.6871134638786316, + "learning_rate": 0.00015298151309719787, + "loss": 2.6095, + "step": 6513 + }, + { + "epoch": 0.5257041401016868, + "grad_norm": 0.7479702830314636, + "learning_rate": 0.00015296812336133963, + "loss": 2.608, + "step": 6514 + }, + { + "epoch": 0.5257848438382697, + "grad_norm": 0.6772119402885437, + "learning_rate": 0.00015295473230537735, + "loss": 2.5679, + "step": 6515 + }, + { + "epoch": 0.5258655475748527, + "grad_norm": 0.7365416884422302, + "learning_rate": 0.0001529413399296447, + "loss": 2.6722, + "step": 6516 + }, + { + "epoch": 0.5259462513114357, + "grad_norm": 0.7538040280342102, + "learning_rate": 0.00015292794623447545, + "loss": 2.5562, + "step": 6517 + }, + { + "epoch": 0.5260269550480188, + "grad_norm": 0.7471820712089539, + "learning_rate": 0.00015291455122020344, + "loss": 2.7079, + "step": 6518 + }, + { + "epoch": 0.5261076587846018, + "grad_norm": 0.7605932354927063, + "learning_rate": 0.00015290115488716247, + "loss": 2.6696, + "step": 6519 + }, + { + "epoch": 0.5261883625211847, + "grad_norm": 0.7081854939460754, + "learning_rate": 0.00015288775723568647, + "loss": 2.6502, + "step": 6520 + }, + { + "epoch": 0.5262690662577677, + "grad_norm": 0.7236372828483582, + "learning_rate": 0.0001528743582661093, + "loss": 2.662, + "step": 6521 + }, + { + "epoch": 0.5263497699943508, + "grad_norm": 0.6710047721862793, + "learning_rate": 0.0001528609579787649, + "loss": 2.5947, + "step": 6522 + }, + { + "epoch": 0.5264304737309338, + "grad_norm": 0.709381103515625, + "learning_rate": 0.00015284755637398726, + "loss": 2.5922, + "step": 6523 + }, + { + "epoch": 0.5265111774675167, + "grad_norm": 0.7029775381088257, + "learning_rate": 0.00015283415345211033, + "loss": 2.6777, + "step": 6524 + }, + { + "epoch": 0.5265918812040997, + "grad_norm": 0.7250857949256897, + "learning_rate": 0.00015282074921346825, + "loss": 2.6027, + "step": 6525 + }, + { + "epoch": 0.5266725849406828, + "grad_norm": 0.7192760705947876, + "learning_rate": 0.00015280734365839498, + "loss": 2.6544, + "step": 6526 + }, + { + "epoch": 0.5267532886772658, + "grad_norm": 0.693583071231842, + "learning_rate": 0.0001527939367872247, + "loss": 2.6302, + "step": 6527 + }, + { + "epoch": 0.5268339924138488, + "grad_norm": 0.7031428217887878, + "learning_rate": 0.00015278052860029145, + "loss": 2.6944, + "step": 6528 + }, + { + "epoch": 0.5269146961504317, + "grad_norm": 0.6986895799636841, + "learning_rate": 0.00015276711909792949, + "loss": 2.6595, + "step": 6529 + }, + { + "epoch": 0.5269953998870147, + "grad_norm": 0.7375979423522949, + "learning_rate": 0.000152753708280473, + "loss": 2.6839, + "step": 6530 + }, + { + "epoch": 0.5270761036235978, + "grad_norm": 0.7126755714416504, + "learning_rate": 0.0001527402961482562, + "loss": 2.5597, + "step": 6531 + }, + { + "epoch": 0.5271568073601808, + "grad_norm": 0.6631070971488953, + "learning_rate": 0.00015272688270161338, + "loss": 2.5566, + "step": 6532 + }, + { + "epoch": 0.5272375110967638, + "grad_norm": 0.6896609663963318, + "learning_rate": 0.00015271346794087874, + "loss": 2.5801, + "step": 6533 + }, + { + "epoch": 0.5273182148333467, + "grad_norm": 0.7437502145767212, + "learning_rate": 0.00015270005186638673, + "loss": 2.6572, + "step": 6534 + }, + { + "epoch": 0.5273989185699298, + "grad_norm": 0.7013052701950073, + "learning_rate": 0.00015268663447847166, + "loss": 2.621, + "step": 6535 + }, + { + "epoch": 0.5274796223065128, + "grad_norm": 0.7161773443222046, + "learning_rate": 0.00015267321577746795, + "loss": 2.5989, + "step": 6536 + }, + { + "epoch": 0.5275603260430958, + "grad_norm": 0.7654534578323364, + "learning_rate": 0.00015265979576371, + "loss": 2.6338, + "step": 6537 + }, + { + "epoch": 0.5276410297796787, + "grad_norm": 0.694646954536438, + "learning_rate": 0.0001526463744375323, + "loss": 2.6036, + "step": 6538 + }, + { + "epoch": 0.5277217335162618, + "grad_norm": 0.6594679355621338, + "learning_rate": 0.0001526329517992693, + "loss": 2.6256, + "step": 6539 + }, + { + "epoch": 0.5278024372528448, + "grad_norm": 0.6424389481544495, + "learning_rate": 0.00015261952784925557, + "loss": 2.6389, + "step": 6540 + }, + { + "epoch": 0.5278831409894278, + "grad_norm": 0.7465235590934753, + "learning_rate": 0.0001526061025878257, + "loss": 2.5449, + "step": 6541 + }, + { + "epoch": 0.5279638447260108, + "grad_norm": 0.6900132298469543, + "learning_rate": 0.0001525926760153142, + "loss": 2.5597, + "step": 6542 + }, + { + "epoch": 0.5280445484625939, + "grad_norm": 0.7505282163619995, + "learning_rate": 0.00015257924813205572, + "loss": 2.6526, + "step": 6543 + }, + { + "epoch": 0.5281252521991768, + "grad_norm": 0.72642582654953, + "learning_rate": 0.00015256581893838495, + "loss": 2.6593, + "step": 6544 + }, + { + "epoch": 0.5282059559357598, + "grad_norm": 0.6901132464408875, + "learning_rate": 0.00015255238843463656, + "loss": 2.6726, + "step": 6545 + }, + { + "epoch": 0.5282866596723428, + "grad_norm": 0.7741395831108093, + "learning_rate": 0.0001525389566211453, + "loss": 2.5929, + "step": 6546 + }, + { + "epoch": 0.5283673634089259, + "grad_norm": 0.7282403111457825, + "learning_rate": 0.00015252552349824585, + "loss": 2.5696, + "step": 6547 + }, + { + "epoch": 0.5284480671455088, + "grad_norm": 0.7421764731407166, + "learning_rate": 0.0001525120890662731, + "loss": 2.5593, + "step": 6548 + }, + { + "epoch": 0.5285287708820918, + "grad_norm": 0.6830468773841858, + "learning_rate": 0.00015249865332556182, + "loss": 2.6396, + "step": 6549 + }, + { + "epoch": 0.5286094746186748, + "grad_norm": 0.6758440732955933, + "learning_rate": 0.00015248521627644684, + "loss": 2.5375, + "step": 6550 + }, + { + "epoch": 0.5286901783552579, + "grad_norm": 0.6897253394126892, + "learning_rate": 0.00015247177791926308, + "loss": 2.6148, + "step": 6551 + }, + { + "epoch": 0.5287708820918409, + "grad_norm": 0.6391426920890808, + "learning_rate": 0.00015245833825434547, + "loss": 2.5563, + "step": 6552 + }, + { + "epoch": 0.5288515858284238, + "grad_norm": 0.7213610410690308, + "learning_rate": 0.00015244489728202893, + "loss": 2.6158, + "step": 6553 + }, + { + "epoch": 0.5289322895650068, + "grad_norm": 0.6678160429000854, + "learning_rate": 0.00015243145500264845, + "loss": 2.6177, + "step": 6554 + }, + { + "epoch": 0.5290129933015899, + "grad_norm": 0.7041724324226379, + "learning_rate": 0.00015241801141653905, + "loss": 2.6504, + "step": 6555 + }, + { + "epoch": 0.5290936970381729, + "grad_norm": 0.6551648378372192, + "learning_rate": 0.0001524045665240358, + "loss": 2.577, + "step": 6556 + }, + { + "epoch": 0.5291744007747559, + "grad_norm": 0.7190412878990173, + "learning_rate": 0.00015239112032547377, + "loss": 2.596, + "step": 6557 + }, + { + "epoch": 0.5292551045113388, + "grad_norm": 0.6936302781105042, + "learning_rate": 0.00015237767282118807, + "loss": 2.6551, + "step": 6558 + }, + { + "epoch": 0.5293358082479219, + "grad_norm": 0.6901839971542358, + "learning_rate": 0.0001523642240115138, + "loss": 2.6263, + "step": 6559 + }, + { + "epoch": 0.5294165119845049, + "grad_norm": 0.6905068159103394, + "learning_rate": 0.00015235077389678624, + "loss": 2.6323, + "step": 6560 + }, + { + "epoch": 0.5294972157210879, + "grad_norm": 0.7495188117027283, + "learning_rate": 0.00015233732247734057, + "loss": 2.6243, + "step": 6561 + }, + { + "epoch": 0.5295779194576709, + "grad_norm": 0.6758708357810974, + "learning_rate": 0.00015232386975351197, + "loss": 2.6184, + "step": 6562 + }, + { + "epoch": 0.5296586231942539, + "grad_norm": 0.6443266868591309, + "learning_rate": 0.00015231041572563573, + "loss": 2.6543, + "step": 6563 + }, + { + "epoch": 0.5297393269308369, + "grad_norm": 0.7384275794029236, + "learning_rate": 0.00015229696039404723, + "loss": 2.6117, + "step": 6564 + }, + { + "epoch": 0.5298200306674199, + "grad_norm": 0.6873897314071655, + "learning_rate": 0.00015228350375908178, + "loss": 2.5689, + "step": 6565 + }, + { + "epoch": 0.5299007344040029, + "grad_norm": 0.6715645790100098, + "learning_rate": 0.00015227004582107472, + "loss": 2.5943, + "step": 6566 + }, + { + "epoch": 0.529981438140586, + "grad_norm": 0.6814208030700684, + "learning_rate": 0.00015225658658036151, + "loss": 2.5562, + "step": 6567 + }, + { + "epoch": 0.5300621418771689, + "grad_norm": 0.6942310929298401, + "learning_rate": 0.00015224312603727755, + "loss": 2.5902, + "step": 6568 + }, + { + "epoch": 0.5301428456137519, + "grad_norm": 0.6856299042701721, + "learning_rate": 0.0001522296641921583, + "loss": 2.6115, + "step": 6569 + }, + { + "epoch": 0.5302235493503349, + "grad_norm": 0.870833694934845, + "learning_rate": 0.0001522162010453393, + "loss": 2.7492, + "step": 6570 + }, + { + "epoch": 0.530304253086918, + "grad_norm": 0.6796989440917969, + "learning_rate": 0.0001522027365971561, + "loss": 2.6957, + "step": 6571 + }, + { + "epoch": 0.530384956823501, + "grad_norm": 0.7043026685714722, + "learning_rate": 0.00015218927084794423, + "loss": 2.604, + "step": 6572 + }, + { + "epoch": 0.5304656605600839, + "grad_norm": 0.7533933520317078, + "learning_rate": 0.00015217580379803933, + "loss": 2.6271, + "step": 6573 + }, + { + "epoch": 0.5305463642966669, + "grad_norm": 0.7526697516441345, + "learning_rate": 0.000152162335447777, + "loss": 2.553, + "step": 6574 + }, + { + "epoch": 0.53062706803325, + "grad_norm": 0.6942071318626404, + "learning_rate": 0.00015214886579749284, + "loss": 2.7206, + "step": 6575 + }, + { + "epoch": 0.530707771769833, + "grad_norm": 0.7133236527442932, + "learning_rate": 0.00015213539484752273, + "loss": 2.6545, + "step": 6576 + }, + { + "epoch": 0.530788475506416, + "grad_norm": 0.7229849696159363, + "learning_rate": 0.00015212192259820222, + "loss": 2.6647, + "step": 6577 + }, + { + "epoch": 0.5308691792429989, + "grad_norm": 0.7142449617385864, + "learning_rate": 0.0001521084490498672, + "loss": 2.5777, + "step": 6578 + }, + { + "epoch": 0.5309498829795819, + "grad_norm": 0.6950247287750244, + "learning_rate": 0.00015209497420285342, + "loss": 2.6159, + "step": 6579 + }, + { + "epoch": 0.531030586716165, + "grad_norm": 0.7492622137069702, + "learning_rate": 0.00015208149805749668, + "loss": 2.6927, + "step": 6580 + }, + { + "epoch": 0.531111290452748, + "grad_norm": 0.7618215084075928, + "learning_rate": 0.00015206802061413287, + "loss": 2.5831, + "step": 6581 + }, + { + "epoch": 0.5311919941893309, + "grad_norm": 0.7448660731315613, + "learning_rate": 0.0001520545418730979, + "loss": 2.6123, + "step": 6582 + }, + { + "epoch": 0.5312726979259139, + "grad_norm": 0.7450618147850037, + "learning_rate": 0.00015204106183472766, + "loss": 2.5768, + "step": 6583 + }, + { + "epoch": 0.531353401662497, + "grad_norm": 0.7426019310951233, + "learning_rate": 0.0001520275804993581, + "loss": 2.603, + "step": 6584 + }, + { + "epoch": 0.53143410539908, + "grad_norm": 0.7503333687782288, + "learning_rate": 0.00015201409786732526, + "loss": 2.6159, + "step": 6585 + }, + { + "epoch": 0.531514809135663, + "grad_norm": 0.6944373846054077, + "learning_rate": 0.00015200061393896513, + "loss": 2.5201, + "step": 6586 + }, + { + "epoch": 0.5315955128722459, + "grad_norm": 0.6958110332489014, + "learning_rate": 0.00015198712871461375, + "loss": 2.5592, + "step": 6587 + }, + { + "epoch": 0.531676216608829, + "grad_norm": 0.7838244438171387, + "learning_rate": 0.00015197364219460727, + "loss": 2.6663, + "step": 6588 + }, + { + "epoch": 0.531756920345412, + "grad_norm": 0.754338800907135, + "learning_rate": 0.00015196015437928174, + "loss": 2.6183, + "step": 6589 + }, + { + "epoch": 0.531837624081995, + "grad_norm": 0.7394337058067322, + "learning_rate": 0.00015194666526897332, + "loss": 2.5622, + "step": 6590 + }, + { + "epoch": 0.531918327818578, + "grad_norm": 0.7352069020271301, + "learning_rate": 0.00015193317486401824, + "loss": 2.6173, + "step": 6591 + }, + { + "epoch": 0.531999031555161, + "grad_norm": 0.6318944096565247, + "learning_rate": 0.00015191968316475267, + "loss": 2.6159, + "step": 6592 + }, + { + "epoch": 0.532079735291744, + "grad_norm": 0.7071281671524048, + "learning_rate": 0.00015190619017151291, + "loss": 2.633, + "step": 6593 + }, + { + "epoch": 0.532160439028327, + "grad_norm": 0.7762585282325745, + "learning_rate": 0.00015189269588463517, + "loss": 2.6445, + "step": 6594 + }, + { + "epoch": 0.53224114276491, + "grad_norm": 0.7979930639266968, + "learning_rate": 0.0001518792003044558, + "loss": 2.5825, + "step": 6595 + }, + { + "epoch": 0.5323218465014931, + "grad_norm": 0.7355580925941467, + "learning_rate": 0.00015186570343131114, + "loss": 2.6197, + "step": 6596 + }, + { + "epoch": 0.532402550238076, + "grad_norm": 0.7286938428878784, + "learning_rate": 0.0001518522052655376, + "loss": 2.6385, + "step": 6597 + }, + { + "epoch": 0.532483253974659, + "grad_norm": 0.689143180847168, + "learning_rate": 0.00015183870580747156, + "loss": 2.6593, + "step": 6598 + }, + { + "epoch": 0.532563957711242, + "grad_norm": 0.714746356010437, + "learning_rate": 0.00015182520505744945, + "loss": 2.6059, + "step": 6599 + }, + { + "epoch": 0.5326446614478251, + "grad_norm": 0.8055040240287781, + "learning_rate": 0.00015181170301580777, + "loss": 2.6983, + "step": 6600 + }, + { + "epoch": 0.532725365184408, + "grad_norm": 0.7104170918464661, + "learning_rate": 0.00015179819968288297, + "loss": 2.6578, + "step": 6601 + }, + { + "epoch": 0.532806068920991, + "grad_norm": 0.7175524830818176, + "learning_rate": 0.0001517846950590117, + "loss": 2.6263, + "step": 6602 + }, + { + "epoch": 0.532886772657574, + "grad_norm": 0.6755492091178894, + "learning_rate": 0.00015177118914453042, + "loss": 2.5752, + "step": 6603 + }, + { + "epoch": 0.5329674763941571, + "grad_norm": 0.7020289897918701, + "learning_rate": 0.00015175768193977578, + "loss": 2.6186, + "step": 6604 + }, + { + "epoch": 0.5330481801307401, + "grad_norm": 0.7550958395004272, + "learning_rate": 0.0001517441734450844, + "loss": 2.628, + "step": 6605 + }, + { + "epoch": 0.533128883867323, + "grad_norm": 0.6697603464126587, + "learning_rate": 0.00015173066366079297, + "loss": 2.6433, + "step": 6606 + }, + { + "epoch": 0.533209587603906, + "grad_norm": 0.715372622013092, + "learning_rate": 0.0001517171525872382, + "loss": 2.6022, + "step": 6607 + }, + { + "epoch": 0.5332902913404891, + "grad_norm": 0.7081933617591858, + "learning_rate": 0.00015170364022475675, + "loss": 2.675, + "step": 6608 + }, + { + "epoch": 0.5333709950770721, + "grad_norm": 0.7074152231216431, + "learning_rate": 0.00015169012657368546, + "loss": 2.6637, + "step": 6609 + }, + { + "epoch": 0.5334516988136551, + "grad_norm": 0.6692848801612854, + "learning_rate": 0.00015167661163436108, + "loss": 2.5855, + "step": 6610 + }, + { + "epoch": 0.533532402550238, + "grad_norm": 0.7307556867599487, + "learning_rate": 0.00015166309540712048, + "loss": 2.6105, + "step": 6611 + }, + { + "epoch": 0.5336131062868211, + "grad_norm": 0.7026669383049011, + "learning_rate": 0.00015164957789230048, + "loss": 2.6656, + "step": 6612 + }, + { + "epoch": 0.5336938100234041, + "grad_norm": 0.6579706072807312, + "learning_rate": 0.000151636059090238, + "loss": 2.6456, + "step": 6613 + }, + { + "epoch": 0.5337745137599871, + "grad_norm": 0.6854498386383057, + "learning_rate": 0.00015162253900126993, + "loss": 2.5969, + "step": 6614 + }, + { + "epoch": 0.5338552174965701, + "grad_norm": 0.7542434334754944, + "learning_rate": 0.00015160901762573323, + "loss": 2.6333, + "step": 6615 + }, + { + "epoch": 0.5339359212331531, + "grad_norm": 0.6795105934143066, + "learning_rate": 0.0001515954949639649, + "loss": 2.6268, + "step": 6616 + }, + { + "epoch": 0.5340166249697361, + "grad_norm": 0.6395254135131836, + "learning_rate": 0.000151581971016302, + "loss": 2.5684, + "step": 6617 + }, + { + "epoch": 0.5340973287063191, + "grad_norm": 0.7069850564002991, + "learning_rate": 0.00015156844578308155, + "loss": 2.64, + "step": 6618 + }, + { + "epoch": 0.5341780324429021, + "grad_norm": 0.6779203414916992, + "learning_rate": 0.0001515549192646406, + "loss": 2.6255, + "step": 6619 + }, + { + "epoch": 0.5342587361794852, + "grad_norm": 0.6403560638427734, + "learning_rate": 0.00015154139146131632, + "loss": 2.611, + "step": 6620 + }, + { + "epoch": 0.5343394399160681, + "grad_norm": 0.7532669901847839, + "learning_rate": 0.00015152786237344583, + "loss": 2.5641, + "step": 6621 + }, + { + "epoch": 0.5344201436526511, + "grad_norm": 0.6827573776245117, + "learning_rate": 0.00015151433200136629, + "loss": 2.6096, + "step": 6622 + }, + { + "epoch": 0.5345008473892341, + "grad_norm": 0.6691904067993164, + "learning_rate": 0.000151500800345415, + "loss": 2.6602, + "step": 6623 + }, + { + "epoch": 0.5345815511258172, + "grad_norm": 0.7288634777069092, + "learning_rate": 0.00015148726740592906, + "loss": 2.6468, + "step": 6624 + }, + { + "epoch": 0.5346622548624002, + "grad_norm": 0.7087839245796204, + "learning_rate": 0.00015147373318324586, + "loss": 2.5795, + "step": 6625 + }, + { + "epoch": 0.5347429585989831, + "grad_norm": 0.6618373394012451, + "learning_rate": 0.00015146019767770267, + "loss": 2.638, + "step": 6626 + }, + { + "epoch": 0.5348236623355661, + "grad_norm": 0.7384989857673645, + "learning_rate": 0.00015144666088963684, + "loss": 2.6104, + "step": 6627 + }, + { + "epoch": 0.5349043660721492, + "grad_norm": 0.6662275195121765, + "learning_rate": 0.00015143312281938576, + "loss": 2.6174, + "step": 6628 + }, + { + "epoch": 0.5349850698087322, + "grad_norm": 0.6617184281349182, + "learning_rate": 0.0001514195834672868, + "loss": 2.6154, + "step": 6629 + }, + { + "epoch": 0.5350657735453151, + "grad_norm": 0.7173622846603394, + "learning_rate": 0.0001514060428336774, + "loss": 2.5741, + "step": 6630 + }, + { + "epoch": 0.5351464772818981, + "grad_norm": 0.7773584127426147, + "learning_rate": 0.00015139250091889502, + "loss": 2.6333, + "step": 6631 + }, + { + "epoch": 0.5352271810184811, + "grad_norm": 0.7255204916000366, + "learning_rate": 0.0001513789577232772, + "loss": 2.5459, + "step": 6632 + }, + { + "epoch": 0.5353078847550642, + "grad_norm": 0.7308403849601746, + "learning_rate": 0.00015136541324716144, + "loss": 2.5934, + "step": 6633 + }, + { + "epoch": 0.5353885884916472, + "grad_norm": 0.699367880821228, + "learning_rate": 0.0001513518674908853, + "loss": 2.6797, + "step": 6634 + }, + { + "epoch": 0.5354692922282301, + "grad_norm": 0.7236449718475342, + "learning_rate": 0.0001513383204547864, + "loss": 2.6289, + "step": 6635 + }, + { + "epoch": 0.5355499959648131, + "grad_norm": 0.6860557794570923, + "learning_rate": 0.00015132477213920234, + "loss": 2.6736, + "step": 6636 + }, + { + "epoch": 0.5356306997013962, + "grad_norm": 0.6724153161048889, + "learning_rate": 0.00015131122254447084, + "loss": 2.5581, + "step": 6637 + }, + { + "epoch": 0.5357114034379792, + "grad_norm": 0.6818630695343018, + "learning_rate": 0.00015129767167092949, + "loss": 2.5979, + "step": 6638 + }, + { + "epoch": 0.5357921071745622, + "grad_norm": 0.6956631541252136, + "learning_rate": 0.00015128411951891607, + "loss": 2.6116, + "step": 6639 + }, + { + "epoch": 0.5358728109111451, + "grad_norm": 0.6698076128959656, + "learning_rate": 0.00015127056608876837, + "loss": 2.65, + "step": 6640 + }, + { + "epoch": 0.5359535146477282, + "grad_norm": 0.7763264179229736, + "learning_rate": 0.00015125701138082415, + "loss": 2.6164, + "step": 6641 + }, + { + "epoch": 0.5360342183843112, + "grad_norm": 0.7148340940475464, + "learning_rate": 0.00015124345539542118, + "loss": 2.6467, + "step": 6642 + }, + { + "epoch": 0.5361149221208942, + "grad_norm": 0.7350041270256042, + "learning_rate": 0.00015122989813289733, + "loss": 2.6477, + "step": 6643 + }, + { + "epoch": 0.5361956258574772, + "grad_norm": 0.6993441581726074, + "learning_rate": 0.00015121633959359055, + "loss": 2.7526, + "step": 6644 + }, + { + "epoch": 0.5362763295940602, + "grad_norm": 0.6828470826148987, + "learning_rate": 0.00015120277977783873, + "loss": 2.6439, + "step": 6645 + }, + { + "epoch": 0.5363570333306432, + "grad_norm": 0.7076796889305115, + "learning_rate": 0.0001511892186859797, + "loss": 2.6375, + "step": 6646 + }, + { + "epoch": 0.5364377370672262, + "grad_norm": 0.6830769777297974, + "learning_rate": 0.0001511756563183516, + "loss": 2.6052, + "step": 6647 + }, + { + "epoch": 0.5365184408038092, + "grad_norm": 0.6482179760932922, + "learning_rate": 0.00015116209267529237, + "loss": 2.6251, + "step": 6648 + }, + { + "epoch": 0.5365991445403923, + "grad_norm": 0.6687620878219604, + "learning_rate": 0.00015114852775714, + "loss": 2.659, + "step": 6649 + }, + { + "epoch": 0.5366798482769752, + "grad_norm": 0.734108030796051, + "learning_rate": 0.0001511349615642327, + "loss": 2.6542, + "step": 6650 + }, + { + "epoch": 0.5367605520135582, + "grad_norm": 0.7092111706733704, + "learning_rate": 0.00015112139409690842, + "loss": 2.6228, + "step": 6651 + }, + { + "epoch": 0.5368412557501412, + "grad_norm": 0.6544996500015259, + "learning_rate": 0.0001511078253555054, + "loss": 2.5661, + "step": 6652 + }, + { + "epoch": 0.5369219594867243, + "grad_norm": 0.7012531161308289, + "learning_rate": 0.00015109425534036176, + "loss": 2.6447, + "step": 6653 + }, + { + "epoch": 0.5370026632233073, + "grad_norm": 0.6813335418701172, + "learning_rate": 0.0001510806840518157, + "loss": 2.5723, + "step": 6654 + }, + { + "epoch": 0.5370833669598902, + "grad_norm": 0.6711288094520569, + "learning_rate": 0.0001510671114902055, + "loss": 2.6096, + "step": 6655 + }, + { + "epoch": 0.5371640706964732, + "grad_norm": 0.721866250038147, + "learning_rate": 0.00015105353765586935, + "loss": 2.6167, + "step": 6656 + }, + { + "epoch": 0.5372447744330563, + "grad_norm": 0.8140639066696167, + "learning_rate": 0.00015103996254914562, + "loss": 2.5768, + "step": 6657 + }, + { + "epoch": 0.5373254781696393, + "grad_norm": 0.6859177947044373, + "learning_rate": 0.0001510263861703726, + "loss": 2.5638, + "step": 6658 + }, + { + "epoch": 0.5374061819062222, + "grad_norm": 0.7254204154014587, + "learning_rate": 0.00015101280851988864, + "loss": 2.5855, + "step": 6659 + }, + { + "epoch": 0.5374868856428052, + "grad_norm": 0.7181829810142517, + "learning_rate": 0.00015099922959803218, + "loss": 2.5358, + "step": 6660 + }, + { + "epoch": 0.5375675893793883, + "grad_norm": 0.7092663645744324, + "learning_rate": 0.00015098564940514155, + "loss": 2.679, + "step": 6661 + }, + { + "epoch": 0.5376482931159713, + "grad_norm": 0.7126225233078003, + "learning_rate": 0.00015097206794155527, + "loss": 2.6167, + "step": 6662 + }, + { + "epoch": 0.5377289968525543, + "grad_norm": 0.7469925880432129, + "learning_rate": 0.00015095848520761186, + "loss": 2.5906, + "step": 6663 + }, + { + "epoch": 0.5378097005891372, + "grad_norm": 0.6911186575889587, + "learning_rate": 0.00015094490120364973, + "loss": 2.6488, + "step": 6664 + }, + { + "epoch": 0.5378904043257203, + "grad_norm": 0.6579635143280029, + "learning_rate": 0.00015093131593000753, + "loss": 2.5894, + "step": 6665 + }, + { + "epoch": 0.5379711080623033, + "grad_norm": 0.7107242345809937, + "learning_rate": 0.00015091772938702377, + "loss": 2.6568, + "step": 6666 + }, + { + "epoch": 0.5380518117988863, + "grad_norm": 0.6845428943634033, + "learning_rate": 0.00015090414157503714, + "loss": 2.5697, + "step": 6667 + }, + { + "epoch": 0.5381325155354693, + "grad_norm": 0.6713212132453918, + "learning_rate": 0.00015089055249438622, + "loss": 2.5747, + "step": 6668 + }, + { + "epoch": 0.5382132192720523, + "grad_norm": 0.7091513276100159, + "learning_rate": 0.0001508769621454097, + "loss": 2.6765, + "step": 6669 + }, + { + "epoch": 0.5382939230086353, + "grad_norm": 0.7403436899185181, + "learning_rate": 0.00015086337052844627, + "loss": 2.6841, + "step": 6670 + }, + { + "epoch": 0.5383746267452183, + "grad_norm": 0.6745626330375671, + "learning_rate": 0.0001508497776438347, + "loss": 2.6436, + "step": 6671 + }, + { + "epoch": 0.5384553304818013, + "grad_norm": 0.7491294145584106, + "learning_rate": 0.00015083618349191372, + "loss": 2.6376, + "step": 6672 + }, + { + "epoch": 0.5385360342183844, + "grad_norm": 0.719761848449707, + "learning_rate": 0.00015082258807302222, + "loss": 2.5885, + "step": 6673 + }, + { + "epoch": 0.5386167379549673, + "grad_norm": 0.7302667498588562, + "learning_rate": 0.00015080899138749895, + "loss": 2.7019, + "step": 6674 + }, + { + "epoch": 0.5386974416915503, + "grad_norm": 0.7640584111213684, + "learning_rate": 0.0001507953934356828, + "loss": 2.6404, + "step": 6675 + }, + { + "epoch": 0.5387781454281333, + "grad_norm": 0.699515700340271, + "learning_rate": 0.0001507817942179127, + "loss": 2.6407, + "step": 6676 + }, + { + "epoch": 0.5388588491647164, + "grad_norm": 0.7305224537849426, + "learning_rate": 0.00015076819373452746, + "loss": 2.5994, + "step": 6677 + }, + { + "epoch": 0.5389395529012994, + "grad_norm": 0.7125952243804932, + "learning_rate": 0.00015075459198586616, + "loss": 2.6472, + "step": 6678 + }, + { + "epoch": 0.5390202566378823, + "grad_norm": 0.7077293395996094, + "learning_rate": 0.00015074098897226778, + "loss": 2.6168, + "step": 6679 + }, + { + "epoch": 0.5391009603744653, + "grad_norm": 0.6713843941688538, + "learning_rate": 0.00015072738469407127, + "loss": 2.5736, + "step": 6680 + }, + { + "epoch": 0.5391816641110483, + "grad_norm": 0.7101294994354248, + "learning_rate": 0.00015071377915161578, + "loss": 2.6994, + "step": 6681 + }, + { + "epoch": 0.5392623678476314, + "grad_norm": 0.7132740020751953, + "learning_rate": 0.00015070017234524032, + "loss": 2.586, + "step": 6682 + }, + { + "epoch": 0.5393430715842144, + "grad_norm": 0.7043401598930359, + "learning_rate": 0.00015068656427528402, + "loss": 2.6025, + "step": 6683 + }, + { + "epoch": 0.5394237753207973, + "grad_norm": 0.6831551194190979, + "learning_rate": 0.00015067295494208607, + "loss": 2.6183, + "step": 6684 + }, + { + "epoch": 0.5395044790573803, + "grad_norm": 0.7066370844841003, + "learning_rate": 0.0001506593443459856, + "loss": 2.6467, + "step": 6685 + }, + { + "epoch": 0.5395851827939634, + "grad_norm": 0.7908033132553101, + "learning_rate": 0.0001506457324873219, + "loss": 2.6929, + "step": 6686 + }, + { + "epoch": 0.5396658865305464, + "grad_norm": 0.7186774611473083, + "learning_rate": 0.00015063211936643407, + "loss": 2.5841, + "step": 6687 + }, + { + "epoch": 0.5397465902671293, + "grad_norm": 0.6634512543678284, + "learning_rate": 0.0001506185049836615, + "loss": 2.5517, + "step": 6688 + }, + { + "epoch": 0.5398272940037123, + "grad_norm": 0.734406590461731, + "learning_rate": 0.00015060488933934353, + "loss": 2.6317, + "step": 6689 + }, + { + "epoch": 0.5399079977402954, + "grad_norm": 0.7754772305488586, + "learning_rate": 0.00015059127243381937, + "loss": 2.6885, + "step": 6690 + }, + { + "epoch": 0.5399887014768784, + "grad_norm": 0.7636603713035583, + "learning_rate": 0.00015057765426742848, + "loss": 2.5767, + "step": 6691 + }, + { + "epoch": 0.5400694052134614, + "grad_norm": 0.6621577143669128, + "learning_rate": 0.00015056403484051017, + "loss": 2.5905, + "step": 6692 + }, + { + "epoch": 0.5401501089500443, + "grad_norm": 0.7605881094932556, + "learning_rate": 0.00015055041415340404, + "loss": 2.6166, + "step": 6693 + }, + { + "epoch": 0.5402308126866274, + "grad_norm": 0.7603485584259033, + "learning_rate": 0.0001505367922064494, + "loss": 2.6123, + "step": 6694 + }, + { + "epoch": 0.5403115164232104, + "grad_norm": 0.7021469473838806, + "learning_rate": 0.0001505231689999858, + "loss": 2.6754, + "step": 6695 + }, + { + "epoch": 0.5403922201597934, + "grad_norm": 0.7291955947875977, + "learning_rate": 0.00015050954453435273, + "loss": 2.6393, + "step": 6696 + }, + { + "epoch": 0.5404729238963764, + "grad_norm": 0.6658700704574585, + "learning_rate": 0.00015049591880988977, + "loss": 2.5888, + "step": 6697 + }, + { + "epoch": 0.5405536276329594, + "grad_norm": 0.7080146074295044, + "learning_rate": 0.00015048229182693657, + "loss": 2.6318, + "step": 6698 + }, + { + "epoch": 0.5406343313695424, + "grad_norm": 0.7440849542617798, + "learning_rate": 0.00015046866358583267, + "loss": 2.596, + "step": 6699 + }, + { + "epoch": 0.5407150351061254, + "grad_norm": 0.886578381061554, + "learning_rate": 0.00015045503408691775, + "loss": 2.6479, + "step": 6700 + }, + { + "epoch": 0.5407957388427084, + "grad_norm": 0.7221408486366272, + "learning_rate": 0.00015044140333053148, + "loss": 2.625, + "step": 6701 + }, + { + "epoch": 0.5408764425792915, + "grad_norm": 0.7193209528923035, + "learning_rate": 0.0001504277713170136, + "loss": 2.6044, + "step": 6702 + }, + { + "epoch": 0.5409571463158744, + "grad_norm": 0.7139819860458374, + "learning_rate": 0.00015041413804670384, + "loss": 2.5572, + "step": 6703 + }, + { + "epoch": 0.5410378500524574, + "grad_norm": 0.728875994682312, + "learning_rate": 0.00015040050351994196, + "loss": 2.6373, + "step": 6704 + }, + { + "epoch": 0.5411185537890404, + "grad_norm": 0.6794858574867249, + "learning_rate": 0.0001503868677370678, + "loss": 2.6265, + "step": 6705 + }, + { + "epoch": 0.5411992575256235, + "grad_norm": 0.6874774098396301, + "learning_rate": 0.00015037323069842117, + "loss": 2.6146, + "step": 6706 + }, + { + "epoch": 0.5412799612622065, + "grad_norm": 0.7064409255981445, + "learning_rate": 0.00015035959240434197, + "loss": 2.6126, + "step": 6707 + }, + { + "epoch": 0.5413606649987894, + "grad_norm": 0.7212977409362793, + "learning_rate": 0.00015034595285517006, + "loss": 2.6836, + "step": 6708 + }, + { + "epoch": 0.5414413687353724, + "grad_norm": 0.7826492190361023, + "learning_rate": 0.0001503323120512454, + "loss": 2.6648, + "step": 6709 + }, + { + "epoch": 0.5415220724719555, + "grad_norm": 0.7228415608406067, + "learning_rate": 0.000150318669992908, + "loss": 2.5734, + "step": 6710 + }, + { + "epoch": 0.5416027762085385, + "grad_norm": 0.6929590702056885, + "learning_rate": 0.00015030502668049778, + "loss": 2.6023, + "step": 6711 + }, + { + "epoch": 0.5416834799451214, + "grad_norm": 0.679990291595459, + "learning_rate": 0.0001502913821143548, + "loss": 2.5867, + "step": 6712 + }, + { + "epoch": 0.5417641836817044, + "grad_norm": 0.7324180603027344, + "learning_rate": 0.00015027773629481907, + "loss": 2.5722, + "step": 6713 + }, + { + "epoch": 0.5418448874182875, + "grad_norm": 0.686826765537262, + "learning_rate": 0.00015026408922223078, + "loss": 2.6138, + "step": 6714 + }, + { + "epoch": 0.5419255911548705, + "grad_norm": 0.7045193314552307, + "learning_rate": 0.00015025044089693, + "loss": 2.619, + "step": 6715 + }, + { + "epoch": 0.5420062948914535, + "grad_norm": 0.6839936375617981, + "learning_rate": 0.00015023679131925683, + "loss": 2.5778, + "step": 6716 + }, + { + "epoch": 0.5420869986280364, + "grad_norm": 0.7613961696624756, + "learning_rate": 0.00015022314048955153, + "loss": 2.6262, + "step": 6717 + }, + { + "epoch": 0.5421677023646195, + "grad_norm": 0.7867478728294373, + "learning_rate": 0.00015020948840815428, + "loss": 2.6576, + "step": 6718 + }, + { + "epoch": 0.5422484061012025, + "grad_norm": 0.7371038794517517, + "learning_rate": 0.0001501958350754053, + "loss": 2.6495, + "step": 6719 + }, + { + "epoch": 0.5423291098377855, + "grad_norm": 0.7146512269973755, + "learning_rate": 0.00015018218049164494, + "loss": 2.6514, + "step": 6720 + }, + { + "epoch": 0.5424098135743685, + "grad_norm": 0.7507650256156921, + "learning_rate": 0.00015016852465721346, + "loss": 2.6509, + "step": 6721 + }, + { + "epoch": 0.5424905173109515, + "grad_norm": 0.6786547303199768, + "learning_rate": 0.0001501548675724512, + "loss": 2.5983, + "step": 6722 + }, + { + "epoch": 0.5425712210475345, + "grad_norm": 0.7077932357788086, + "learning_rate": 0.0001501412092376985, + "loss": 2.622, + "step": 6723 + }, + { + "epoch": 0.5426519247841175, + "grad_norm": 0.7191271781921387, + "learning_rate": 0.00015012754965329584, + "loss": 2.6632, + "step": 6724 + }, + { + "epoch": 0.5427326285207005, + "grad_norm": 0.6785906553268433, + "learning_rate": 0.00015011388881958356, + "loss": 2.6312, + "step": 6725 + }, + { + "epoch": 0.5428133322572836, + "grad_norm": 0.6880263090133667, + "learning_rate": 0.00015010022673690222, + "loss": 2.5951, + "step": 6726 + }, + { + "epoch": 0.5428940359938665, + "grad_norm": 0.7769095301628113, + "learning_rate": 0.0001500865634055923, + "loss": 2.5503, + "step": 6727 + }, + { + "epoch": 0.5429747397304495, + "grad_norm": 0.6847476959228516, + "learning_rate": 0.0001500728988259942, + "loss": 2.6824, + "step": 6728 + }, + { + "epoch": 0.5430554434670325, + "grad_norm": 0.6829310059547424, + "learning_rate": 0.00015005923299844863, + "loss": 2.5683, + "step": 6729 + }, + { + "epoch": 0.5431361472036156, + "grad_norm": 0.7436082363128662, + "learning_rate": 0.0001500455659232961, + "loss": 2.6165, + "step": 6730 + }, + { + "epoch": 0.5432168509401986, + "grad_norm": 0.7876375913619995, + "learning_rate": 0.00015003189760087724, + "loss": 2.6203, + "step": 6731 + }, + { + "epoch": 0.5432975546767815, + "grad_norm": 0.6869253516197205, + "learning_rate": 0.0001500182280315327, + "loss": 2.6136, + "step": 6732 + }, + { + "epoch": 0.5433782584133645, + "grad_norm": 0.7179432511329651, + "learning_rate": 0.00015000455721560316, + "loss": 2.6049, + "step": 6733 + }, + { + "epoch": 0.5434589621499475, + "grad_norm": 0.7286917567253113, + "learning_rate": 0.00014999088515342939, + "loss": 2.5704, + "step": 6734 + }, + { + "epoch": 0.5435396658865306, + "grad_norm": 0.6841779351234436, + "learning_rate": 0.00014997721184535206, + "loss": 2.6095, + "step": 6735 + }, + { + "epoch": 0.5436203696231136, + "grad_norm": 0.7661791443824768, + "learning_rate": 0.00014996353729171196, + "loss": 2.6193, + "step": 6736 + }, + { + "epoch": 0.5437010733596965, + "grad_norm": 0.7365885376930237, + "learning_rate": 0.0001499498614928499, + "loss": 2.586, + "step": 6737 + }, + { + "epoch": 0.5437817770962795, + "grad_norm": 0.7423815131187439, + "learning_rate": 0.00014993618444910674, + "loss": 2.6199, + "step": 6738 + }, + { + "epoch": 0.5438624808328626, + "grad_norm": 0.7667781114578247, + "learning_rate": 0.0001499225061608233, + "loss": 2.6584, + "step": 6739 + }, + { + "epoch": 0.5439431845694456, + "grad_norm": 0.7148830890655518, + "learning_rate": 0.00014990882662834057, + "loss": 2.7172, + "step": 6740 + }, + { + "epoch": 0.5440238883060285, + "grad_norm": 0.7206205725669861, + "learning_rate": 0.00014989514585199936, + "loss": 2.5682, + "step": 6741 + }, + { + "epoch": 0.5441045920426115, + "grad_norm": 0.7306448221206665, + "learning_rate": 0.0001498814638321407, + "loss": 2.6724, + "step": 6742 + }, + { + "epoch": 0.5441852957791946, + "grad_norm": 0.7058824896812439, + "learning_rate": 0.00014986778056910556, + "loss": 2.6573, + "step": 6743 + }, + { + "epoch": 0.5442659995157776, + "grad_norm": 0.770588755607605, + "learning_rate": 0.000149854096063235, + "loss": 2.658, + "step": 6744 + }, + { + "epoch": 0.5443467032523606, + "grad_norm": 0.8283931612968445, + "learning_rate": 0.00014984041031487001, + "loss": 2.6624, + "step": 6745 + }, + { + "epoch": 0.5444274069889435, + "grad_norm": 0.6814693808555603, + "learning_rate": 0.00014982672332435176, + "loss": 2.5835, + "step": 6746 + }, + { + "epoch": 0.5445081107255266, + "grad_norm": 0.7059363722801208, + "learning_rate": 0.00014981303509202127, + "loss": 2.5977, + "step": 6747 + }, + { + "epoch": 0.5445888144621096, + "grad_norm": 0.6678106188774109, + "learning_rate": 0.00014979934561821975, + "loss": 2.6479, + "step": 6748 + }, + { + "epoch": 0.5446695181986926, + "grad_norm": 0.8167592883110046, + "learning_rate": 0.00014978565490328835, + "loss": 2.6529, + "step": 6749 + }, + { + "epoch": 0.5447502219352756, + "grad_norm": 0.807209849357605, + "learning_rate": 0.00014977196294756832, + "loss": 2.6546, + "step": 6750 + }, + { + "epoch": 0.5448309256718586, + "grad_norm": 0.7099517583847046, + "learning_rate": 0.00014975826975140085, + "loss": 2.6178, + "step": 6751 + }, + { + "epoch": 0.5449116294084416, + "grad_norm": 0.7900758981704712, + "learning_rate": 0.0001497445753151272, + "loss": 2.586, + "step": 6752 + }, + { + "epoch": 0.5449923331450246, + "grad_norm": 0.6826134920120239, + "learning_rate": 0.00014973087963908875, + "loss": 2.5914, + "step": 6753 + }, + { + "epoch": 0.5450730368816076, + "grad_norm": 0.7383863925933838, + "learning_rate": 0.0001497171827236268, + "loss": 2.6357, + "step": 6754 + }, + { + "epoch": 0.5451537406181907, + "grad_norm": 0.7208051085472107, + "learning_rate": 0.0001497034845690826, + "loss": 2.5435, + "step": 6755 + }, + { + "epoch": 0.5452344443547736, + "grad_norm": 0.680794894695282, + "learning_rate": 0.00014968978517579772, + "loss": 2.5691, + "step": 6756 + }, + { + "epoch": 0.5453151480913566, + "grad_norm": 0.680759847164154, + "learning_rate": 0.00014967608454411347, + "loss": 2.5761, + "step": 6757 + }, + { + "epoch": 0.5453958518279396, + "grad_norm": 0.719634473323822, + "learning_rate": 0.00014966238267437134, + "loss": 2.637, + "step": 6758 + }, + { + "epoch": 0.5454765555645227, + "grad_norm": 0.777302086353302, + "learning_rate": 0.0001496486795669128, + "loss": 2.6457, + "step": 6759 + }, + { + "epoch": 0.5455572593011057, + "grad_norm": 0.6875059604644775, + "learning_rate": 0.0001496349752220794, + "loss": 2.6116, + "step": 6760 + }, + { + "epoch": 0.5456379630376886, + "grad_norm": 0.6884258985519409, + "learning_rate": 0.0001496212696402127, + "loss": 2.5863, + "step": 6761 + }, + { + "epoch": 0.5457186667742716, + "grad_norm": 0.6667922139167786, + "learning_rate": 0.00014960756282165422, + "loss": 2.5892, + "step": 6762 + }, + { + "epoch": 0.5457993705108547, + "grad_norm": 0.6712725162506104, + "learning_rate": 0.00014959385476674559, + "loss": 2.5478, + "step": 6763 + }, + { + "epoch": 0.5458800742474377, + "grad_norm": 0.6803874969482422, + "learning_rate": 0.00014958014547582845, + "loss": 2.5785, + "step": 6764 + }, + { + "epoch": 0.5459607779840207, + "grad_norm": 0.6975811123847961, + "learning_rate": 0.0001495664349492445, + "loss": 2.5765, + "step": 6765 + }, + { + "epoch": 0.5460414817206036, + "grad_norm": 0.7676273584365845, + "learning_rate": 0.00014955272318733544, + "loss": 2.634, + "step": 6766 + }, + { + "epoch": 0.5461221854571867, + "grad_norm": 0.7044547200202942, + "learning_rate": 0.000149539010190443, + "loss": 2.646, + "step": 6767 + }, + { + "epoch": 0.5462028891937697, + "grad_norm": 0.7453166842460632, + "learning_rate": 0.00014952529595890887, + "loss": 2.6137, + "step": 6768 + }, + { + "epoch": 0.5462835929303527, + "grad_norm": 0.7281681299209595, + "learning_rate": 0.00014951158049307493, + "loss": 2.6558, + "step": 6769 + }, + { + "epoch": 0.5463642966669356, + "grad_norm": 0.7131047248840332, + "learning_rate": 0.00014949786379328298, + "loss": 2.6441, + "step": 6770 + }, + { + "epoch": 0.5464450004035187, + "grad_norm": 0.7072219848632812, + "learning_rate": 0.00014948414585987487, + "loss": 2.5861, + "step": 6771 + }, + { + "epoch": 0.5465257041401017, + "grad_norm": 0.7270335555076599, + "learning_rate": 0.00014947042669319252, + "loss": 2.6703, + "step": 6772 + }, + { + "epoch": 0.5466064078766847, + "grad_norm": 0.7314150929450989, + "learning_rate": 0.0001494567062935778, + "loss": 2.6101, + "step": 6773 + }, + { + "epoch": 0.5466871116132677, + "grad_norm": 0.8168460130691528, + "learning_rate": 0.00014944298466137266, + "loss": 2.662, + "step": 6774 + }, + { + "epoch": 0.5467678153498507, + "grad_norm": 0.7338390350341797, + "learning_rate": 0.00014942926179691913, + "loss": 2.6481, + "step": 6775 + }, + { + "epoch": 0.5468485190864337, + "grad_norm": 0.7065639495849609, + "learning_rate": 0.00014941553770055917, + "loss": 2.6192, + "step": 6776 + }, + { + "epoch": 0.5469292228230167, + "grad_norm": 0.7675396203994751, + "learning_rate": 0.00014940181237263483, + "loss": 2.5828, + "step": 6777 + }, + { + "epoch": 0.5470099265595997, + "grad_norm": 0.7085692286491394, + "learning_rate": 0.0001493880858134882, + "loss": 2.5815, + "step": 6778 + }, + { + "epoch": 0.5470906302961828, + "grad_norm": 0.757591187953949, + "learning_rate": 0.00014937435802346135, + "loss": 2.691, + "step": 6779 + }, + { + "epoch": 0.5471713340327657, + "grad_norm": 0.7299168705940247, + "learning_rate": 0.00014936062900289647, + "loss": 2.6246, + "step": 6780 + }, + { + "epoch": 0.5472520377693487, + "grad_norm": 0.693692684173584, + "learning_rate": 0.00014934689875213564, + "loss": 2.6149, + "step": 6781 + }, + { + "epoch": 0.5473327415059317, + "grad_norm": 0.733657956123352, + "learning_rate": 0.00014933316727152113, + "loss": 2.582, + "step": 6782 + }, + { + "epoch": 0.5474134452425147, + "grad_norm": 0.6881953477859497, + "learning_rate": 0.00014931943456139514, + "loss": 2.6023, + "step": 6783 + }, + { + "epoch": 0.5474941489790978, + "grad_norm": 0.7102411985397339, + "learning_rate": 0.00014930570062209988, + "loss": 2.6296, + "step": 6784 + }, + { + "epoch": 0.5475748527156807, + "grad_norm": 0.7263364791870117, + "learning_rate": 0.00014929196545397771, + "loss": 2.6414, + "step": 6785 + }, + { + "epoch": 0.5476555564522637, + "grad_norm": 0.7239066958427429, + "learning_rate": 0.00014927822905737092, + "loss": 2.6174, + "step": 6786 + }, + { + "epoch": 0.5477362601888467, + "grad_norm": 0.6909911632537842, + "learning_rate": 0.0001492644914326218, + "loss": 2.6036, + "step": 6787 + }, + { + "epoch": 0.5478169639254298, + "grad_norm": 0.719693124294281, + "learning_rate": 0.00014925075258007283, + "loss": 2.6507, + "step": 6788 + }, + { + "epoch": 0.5478976676620128, + "grad_norm": 0.7722225785255432, + "learning_rate": 0.0001492370125000663, + "loss": 2.6268, + "step": 6789 + }, + { + "epoch": 0.5479783713985957, + "grad_norm": 0.7456568479537964, + "learning_rate": 0.00014922327119294476, + "loss": 2.6426, + "step": 6790 + }, + { + "epoch": 0.5480590751351787, + "grad_norm": 0.7430242300033569, + "learning_rate": 0.00014920952865905062, + "loss": 2.6632, + "step": 6791 + }, + { + "epoch": 0.5481397788717618, + "grad_norm": 0.7363260388374329, + "learning_rate": 0.0001491957848987264, + "loss": 2.6021, + "step": 6792 + }, + { + "epoch": 0.5482204826083448, + "grad_norm": 0.6903972029685974, + "learning_rate": 0.00014918203991231462, + "loss": 2.6086, + "step": 6793 + }, + { + "epoch": 0.5483011863449277, + "grad_norm": 0.6765161752700806, + "learning_rate": 0.00014916829370015781, + "loss": 2.5806, + "step": 6794 + }, + { + "epoch": 0.5483818900815107, + "grad_norm": 0.7533403635025024, + "learning_rate": 0.0001491545462625986, + "loss": 2.6351, + "step": 6795 + }, + { + "epoch": 0.5484625938180938, + "grad_norm": 0.6841829419136047, + "learning_rate": 0.00014914079759997963, + "loss": 2.606, + "step": 6796 + }, + { + "epoch": 0.5485432975546768, + "grad_norm": 0.7671411037445068, + "learning_rate": 0.00014912704771264353, + "loss": 2.6645, + "step": 6797 + }, + { + "epoch": 0.5486240012912598, + "grad_norm": 0.7218797206878662, + "learning_rate": 0.00014911329660093295, + "loss": 2.6302, + "step": 6798 + }, + { + "epoch": 0.5487047050278427, + "grad_norm": 0.7269994020462036, + "learning_rate": 0.00014909954426519067, + "loss": 2.6261, + "step": 6799 + }, + { + "epoch": 0.5487854087644258, + "grad_norm": 0.765353262424469, + "learning_rate": 0.00014908579070575936, + "loss": 2.5787, + "step": 6800 + }, + { + "epoch": 0.5488661125010088, + "grad_norm": 0.6503065228462219, + "learning_rate": 0.00014907203592298189, + "loss": 2.6404, + "step": 6801 + }, + { + "epoch": 0.5489468162375918, + "grad_norm": 0.6869633197784424, + "learning_rate": 0.00014905827991720097, + "loss": 2.6463, + "step": 6802 + }, + { + "epoch": 0.5490275199741748, + "grad_norm": 0.7221426963806152, + "learning_rate": 0.00014904452268875947, + "loss": 2.6686, + "step": 6803 + }, + { + "epoch": 0.5491082237107578, + "grad_norm": 0.6781399250030518, + "learning_rate": 0.00014903076423800028, + "loss": 2.6274, + "step": 6804 + }, + { + "epoch": 0.5491889274473408, + "grad_norm": 0.7451084852218628, + "learning_rate": 0.00014901700456526626, + "loss": 2.6449, + "step": 6805 + }, + { + "epoch": 0.5492696311839238, + "grad_norm": 0.7159574627876282, + "learning_rate": 0.0001490032436709004, + "loss": 2.6664, + "step": 6806 + }, + { + "epoch": 0.5493503349205068, + "grad_norm": 0.724039614200592, + "learning_rate": 0.00014898948155524558, + "loss": 2.5816, + "step": 6807 + }, + { + "epoch": 0.5494310386570899, + "grad_norm": 0.7194633483886719, + "learning_rate": 0.0001489757182186448, + "loss": 2.5625, + "step": 6808 + }, + { + "epoch": 0.5495117423936728, + "grad_norm": 0.704133927822113, + "learning_rate": 0.0001489619536614411, + "loss": 2.6295, + "step": 6809 + }, + { + "epoch": 0.5495924461302558, + "grad_norm": 0.6717158555984497, + "learning_rate": 0.00014894818788397757, + "loss": 2.6168, + "step": 6810 + }, + { + "epoch": 0.5496731498668388, + "grad_norm": 0.7096573710441589, + "learning_rate": 0.0001489344208865972, + "loss": 2.6316, + "step": 6811 + }, + { + "epoch": 0.5497538536034219, + "grad_norm": 0.6383458375930786, + "learning_rate": 0.00014892065266964316, + "loss": 2.5577, + "step": 6812 + }, + { + "epoch": 0.5498345573400049, + "grad_norm": 0.7606377601623535, + "learning_rate": 0.0001489068832334586, + "loss": 2.7078, + "step": 6813 + }, + { + "epoch": 0.5499152610765878, + "grad_norm": 0.649162232875824, + "learning_rate": 0.00014889311257838665, + "loss": 2.6023, + "step": 6814 + }, + { + "epoch": 0.5499959648131708, + "grad_norm": 0.6445025205612183, + "learning_rate": 0.00014887934070477053, + "loss": 2.6, + "step": 6815 + }, + { + "epoch": 0.5500766685497539, + "grad_norm": 0.6873729825019836, + "learning_rate": 0.00014886556761295342, + "loss": 2.6398, + "step": 6816 + }, + { + "epoch": 0.5501573722863369, + "grad_norm": 0.7814947366714478, + "learning_rate": 0.0001488517933032787, + "loss": 2.5803, + "step": 6817 + }, + { + "epoch": 0.5502380760229199, + "grad_norm": 0.7140909433364868, + "learning_rate": 0.00014883801777608953, + "loss": 2.6051, + "step": 6818 + }, + { + "epoch": 0.5503187797595028, + "grad_norm": 0.7326326370239258, + "learning_rate": 0.00014882424103172936, + "loss": 2.6123, + "step": 6819 + }, + { + "epoch": 0.5503994834960859, + "grad_norm": 0.7093667387962341, + "learning_rate": 0.00014881046307054142, + "loss": 2.6527, + "step": 6820 + }, + { + "epoch": 0.5504801872326689, + "grad_norm": 0.6877567768096924, + "learning_rate": 0.00014879668389286915, + "loss": 2.6086, + "step": 6821 + }, + { + "epoch": 0.5505608909692519, + "grad_norm": 0.7095615863800049, + "learning_rate": 0.000148782903499056, + "loss": 2.6469, + "step": 6822 + }, + { + "epoch": 0.5506415947058348, + "grad_norm": 0.6931191086769104, + "learning_rate": 0.00014876912188944535, + "loss": 2.6842, + "step": 6823 + }, + { + "epoch": 0.5507222984424179, + "grad_norm": 0.7016414403915405, + "learning_rate": 0.00014875533906438072, + "loss": 2.5753, + "step": 6824 + }, + { + "epoch": 0.5508030021790009, + "grad_norm": 0.6813814640045166, + "learning_rate": 0.00014874155502420558, + "loss": 2.5739, + "step": 6825 + }, + { + "epoch": 0.5508837059155839, + "grad_norm": 0.7068608403205872, + "learning_rate": 0.00014872776976926347, + "loss": 2.6325, + "step": 6826 + }, + { + "epoch": 0.5509644096521669, + "grad_norm": 0.6978127360343933, + "learning_rate": 0.00014871398329989796, + "loss": 2.5614, + "step": 6827 + }, + { + "epoch": 0.55104511338875, + "grad_norm": 0.6923051476478577, + "learning_rate": 0.00014870019561645265, + "loss": 2.6075, + "step": 6828 + }, + { + "epoch": 0.5511258171253329, + "grad_norm": 0.6708533763885498, + "learning_rate": 0.00014868640671927117, + "loss": 2.5883, + "step": 6829 + }, + { + "epoch": 0.5512065208619159, + "grad_norm": 0.7679650783538818, + "learning_rate": 0.00014867261660869713, + "loss": 2.6105, + "step": 6830 + }, + { + "epoch": 0.5512872245984989, + "grad_norm": 0.7080917358398438, + "learning_rate": 0.0001486588252850743, + "loss": 2.5855, + "step": 6831 + }, + { + "epoch": 0.551367928335082, + "grad_norm": 0.7218755483627319, + "learning_rate": 0.00014864503274874635, + "loss": 2.5872, + "step": 6832 + }, + { + "epoch": 0.551448632071665, + "grad_norm": 0.689038872718811, + "learning_rate": 0.000148631239000057, + "loss": 2.5902, + "step": 6833 + }, + { + "epoch": 0.5515293358082479, + "grad_norm": 0.6810954213142395, + "learning_rate": 0.00014861744403935005, + "loss": 2.5938, + "step": 6834 + }, + { + "epoch": 0.5516100395448309, + "grad_norm": 0.7509457468986511, + "learning_rate": 0.00014860364786696933, + "loss": 2.593, + "step": 6835 + }, + { + "epoch": 0.5516907432814139, + "grad_norm": 0.739536702632904, + "learning_rate": 0.00014858985048325863, + "loss": 2.6668, + "step": 6836 + }, + { + "epoch": 0.551771447017997, + "grad_norm": 0.661829948425293, + "learning_rate": 0.00014857605188856184, + "loss": 2.6407, + "step": 6837 + }, + { + "epoch": 0.5518521507545799, + "grad_norm": 0.6869735717773438, + "learning_rate": 0.00014856225208322287, + "loss": 2.535, + "step": 6838 + }, + { + "epoch": 0.5519328544911629, + "grad_norm": 0.6724792122840881, + "learning_rate": 0.00014854845106758563, + "loss": 2.5629, + "step": 6839 + }, + { + "epoch": 0.5520135582277459, + "grad_norm": 0.7066503763198853, + "learning_rate": 0.00014853464884199407, + "loss": 2.6002, + "step": 6840 + }, + { + "epoch": 0.552094261964329, + "grad_norm": 0.7354215979576111, + "learning_rate": 0.0001485208454067922, + "loss": 2.6032, + "step": 6841 + }, + { + "epoch": 0.552174965700912, + "grad_norm": 0.8124571442604065, + "learning_rate": 0.00014850704076232405, + "loss": 2.5884, + "step": 6842 + }, + { + "epoch": 0.5522556694374949, + "grad_norm": 0.6941336393356323, + "learning_rate": 0.00014849323490893364, + "loss": 2.6461, + "step": 6843 + }, + { + "epoch": 0.5523363731740779, + "grad_norm": 0.6848790049552917, + "learning_rate": 0.00014847942784696505, + "loss": 2.6098, + "step": 6844 + }, + { + "epoch": 0.552417076910661, + "grad_norm": 0.6688000559806824, + "learning_rate": 0.00014846561957676237, + "loss": 2.6115, + "step": 6845 + }, + { + "epoch": 0.552497780647244, + "grad_norm": 0.6647306084632874, + "learning_rate": 0.00014845181009866975, + "loss": 2.597, + "step": 6846 + }, + { + "epoch": 0.552578484383827, + "grad_norm": 0.7277785539627075, + "learning_rate": 0.0001484379994130314, + "loss": 2.6223, + "step": 6847 + }, + { + "epoch": 0.5526591881204099, + "grad_norm": 0.6623761057853699, + "learning_rate": 0.00014842418752019146, + "loss": 2.5657, + "step": 6848 + }, + { + "epoch": 0.552739891856993, + "grad_norm": 0.7207754254341125, + "learning_rate": 0.00014841037442049423, + "loss": 2.5711, + "step": 6849 + }, + { + "epoch": 0.552820595593576, + "grad_norm": 0.6963560581207275, + "learning_rate": 0.00014839656011428389, + "loss": 2.6078, + "step": 6850 + }, + { + "epoch": 0.552901299330159, + "grad_norm": 0.6875078678131104, + "learning_rate": 0.00014838274460190475, + "loss": 2.6109, + "step": 6851 + }, + { + "epoch": 0.552982003066742, + "grad_norm": 0.7049943804740906, + "learning_rate": 0.00014836892788370118, + "loss": 2.5755, + "step": 6852 + }, + { + "epoch": 0.553062706803325, + "grad_norm": 0.6941191554069519, + "learning_rate": 0.00014835510996001744, + "loss": 2.6694, + "step": 6853 + }, + { + "epoch": 0.553143410539908, + "grad_norm": 0.7589484453201294, + "learning_rate": 0.000148341290831198, + "loss": 2.5677, + "step": 6854 + }, + { + "epoch": 0.553224114276491, + "grad_norm": 0.6594784259796143, + "learning_rate": 0.00014832747049758723, + "loss": 2.6209, + "step": 6855 + }, + { + "epoch": 0.553304818013074, + "grad_norm": 0.726598858833313, + "learning_rate": 0.00014831364895952952, + "loss": 2.6492, + "step": 6856 + }, + { + "epoch": 0.553385521749657, + "grad_norm": 0.6668030023574829, + "learning_rate": 0.0001482998262173694, + "loss": 2.6057, + "step": 6857 + }, + { + "epoch": 0.55346622548624, + "grad_norm": 0.7698997855186462, + "learning_rate": 0.0001482860022714514, + "loss": 2.6215, + "step": 6858 + }, + { + "epoch": 0.553546929222823, + "grad_norm": 0.6805251836776733, + "learning_rate": 0.00014827217712211997, + "loss": 2.5855, + "step": 6859 + }, + { + "epoch": 0.553627632959406, + "grad_norm": 0.8481020331382751, + "learning_rate": 0.00014825835076971968, + "loss": 2.6218, + "step": 6860 + }, + { + "epoch": 0.5537083366959891, + "grad_norm": 0.6801722645759583, + "learning_rate": 0.00014824452321459517, + "loss": 2.5998, + "step": 6861 + }, + { + "epoch": 0.553789040432572, + "grad_norm": 0.7174597978591919, + "learning_rate": 0.00014823069445709104, + "loss": 2.5782, + "step": 6862 + }, + { + "epoch": 0.553869744169155, + "grad_norm": 0.7607117891311646, + "learning_rate": 0.0001482168644975519, + "loss": 2.6492, + "step": 6863 + }, + { + "epoch": 0.553950447905738, + "grad_norm": 0.7554265856742859, + "learning_rate": 0.00014820303333632246, + "loss": 2.6511, + "step": 6864 + }, + { + "epoch": 0.5540311516423211, + "grad_norm": 0.7520260214805603, + "learning_rate": 0.00014818920097374745, + "loss": 2.6258, + "step": 6865 + }, + { + "epoch": 0.5541118553789041, + "grad_norm": 0.7897995114326477, + "learning_rate": 0.00014817536741017152, + "loss": 2.6153, + "step": 6866 + }, + { + "epoch": 0.554192559115487, + "grad_norm": 0.7444615960121155, + "learning_rate": 0.00014816153264593957, + "loss": 2.5892, + "step": 6867 + }, + { + "epoch": 0.55427326285207, + "grad_norm": 0.6593222618103027, + "learning_rate": 0.0001481476966813963, + "loss": 2.6048, + "step": 6868 + }, + { + "epoch": 0.5543539665886531, + "grad_norm": 0.7517102360725403, + "learning_rate": 0.0001481338595168866, + "loss": 2.6496, + "step": 6869 + }, + { + "epoch": 0.5544346703252361, + "grad_norm": 0.7314056754112244, + "learning_rate": 0.00014812002115275529, + "loss": 2.6009, + "step": 6870 + }, + { + "epoch": 0.554515374061819, + "grad_norm": 0.6718037724494934, + "learning_rate": 0.00014810618158934722, + "loss": 2.6279, + "step": 6871 + }, + { + "epoch": 0.554596077798402, + "grad_norm": 0.6853529810905457, + "learning_rate": 0.00014809234082700735, + "loss": 2.6562, + "step": 6872 + }, + { + "epoch": 0.5546767815349851, + "grad_norm": 0.713599443435669, + "learning_rate": 0.0001480784988660807, + "loss": 2.5783, + "step": 6873 + }, + { + "epoch": 0.5547574852715681, + "grad_norm": 0.6820243000984192, + "learning_rate": 0.00014806465570691213, + "loss": 2.5753, + "step": 6874 + }, + { + "epoch": 0.5548381890081511, + "grad_norm": 0.6999152302742004, + "learning_rate": 0.00014805081134984673, + "loss": 2.5839, + "step": 6875 + }, + { + "epoch": 0.554918892744734, + "grad_norm": 0.7145923376083374, + "learning_rate": 0.00014803696579522948, + "loss": 2.6153, + "step": 6876 + }, + { + "epoch": 0.5549995964813171, + "grad_norm": 0.7569223046302795, + "learning_rate": 0.00014802311904340548, + "loss": 2.5879, + "step": 6877 + }, + { + "epoch": 0.5550803002179001, + "grad_norm": 0.6977131962776184, + "learning_rate": 0.00014800927109471983, + "loss": 2.6587, + "step": 6878 + }, + { + "epoch": 0.5551610039544831, + "grad_norm": 0.6693562865257263, + "learning_rate": 0.00014799542194951764, + "loss": 2.6271, + "step": 6879 + }, + { + "epoch": 0.5552417076910661, + "grad_norm": 0.6937456130981445, + "learning_rate": 0.00014798157160814406, + "loss": 2.6213, + "step": 6880 + }, + { + "epoch": 0.5553224114276492, + "grad_norm": 0.761538565158844, + "learning_rate": 0.0001479677200709443, + "loss": 2.6053, + "step": 6881 + }, + { + "epoch": 0.5554031151642321, + "grad_norm": 0.707457959651947, + "learning_rate": 0.00014795386733826356, + "loss": 2.5763, + "step": 6882 + }, + { + "epoch": 0.5554838189008151, + "grad_norm": 0.7323198318481445, + "learning_rate": 0.0001479400134104471, + "loss": 2.6899, + "step": 6883 + }, + { + "epoch": 0.5555645226373981, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.0001479261582878402, + "loss": 2.5743, + "step": 6884 + }, + { + "epoch": 0.5556452263739811, + "grad_norm": 0.7683241367340088, + "learning_rate": 0.00014791230197078813, + "loss": 2.5295, + "step": 6885 + }, + { + "epoch": 0.5557259301105641, + "grad_norm": 0.7248150706291199, + "learning_rate": 0.00014789844445963626, + "loss": 2.6131, + "step": 6886 + }, + { + "epoch": 0.5558066338471471, + "grad_norm": 0.6868402361869812, + "learning_rate": 0.00014788458575472997, + "loss": 2.6182, + "step": 6887 + }, + { + "epoch": 0.5558873375837301, + "grad_norm": 0.6995798945426941, + "learning_rate": 0.0001478707258564146, + "loss": 2.5969, + "step": 6888 + }, + { + "epoch": 0.5559680413203131, + "grad_norm": 0.6912558078765869, + "learning_rate": 0.00014785686476503565, + "loss": 2.6264, + "step": 6889 + }, + { + "epoch": 0.5560487450568962, + "grad_norm": 0.7485123872756958, + "learning_rate": 0.00014784300248093848, + "loss": 2.6036, + "step": 6890 + }, + { + "epoch": 0.5561294487934791, + "grad_norm": 0.7150819897651672, + "learning_rate": 0.00014782913900446864, + "loss": 2.5807, + "step": 6891 + }, + { + "epoch": 0.5562101525300621, + "grad_norm": 0.6715224385261536, + "learning_rate": 0.00014781527433597167, + "loss": 2.6164, + "step": 6892 + }, + { + "epoch": 0.5562908562666451, + "grad_norm": 0.6951256394386292, + "learning_rate": 0.000147801408475793, + "loss": 2.6106, + "step": 6893 + }, + { + "epoch": 0.5563715600032282, + "grad_norm": 0.7296997904777527, + "learning_rate": 0.00014778754142427832, + "loss": 2.6182, + "step": 6894 + }, + { + "epoch": 0.5564522637398112, + "grad_norm": 0.7484713196754456, + "learning_rate": 0.0001477736731817732, + "loss": 2.6384, + "step": 6895 + }, + { + "epoch": 0.5565329674763941, + "grad_norm": 0.6967526078224182, + "learning_rate": 0.00014775980374862326, + "loss": 2.5889, + "step": 6896 + }, + { + "epoch": 0.5566136712129771, + "grad_norm": 0.7004885077476501, + "learning_rate": 0.00014774593312517415, + "loss": 2.6549, + "step": 6897 + }, + { + "epoch": 0.5566943749495602, + "grad_norm": 0.7069302201271057, + "learning_rate": 0.00014773206131177158, + "loss": 2.6408, + "step": 6898 + }, + { + "epoch": 0.5567750786861432, + "grad_norm": 0.7048566341400146, + "learning_rate": 0.00014771818830876127, + "loss": 2.5909, + "step": 6899 + }, + { + "epoch": 0.5568557824227262, + "grad_norm": 0.7386630773544312, + "learning_rate": 0.00014770431411648897, + "loss": 2.6402, + "step": 6900 + }, + { + "epoch": 0.5569364861593091, + "grad_norm": 0.7244876027107239, + "learning_rate": 0.00014769043873530047, + "loss": 2.5548, + "step": 6901 + }, + { + "epoch": 0.5570171898958922, + "grad_norm": 0.6820651888847351, + "learning_rate": 0.00014767656216554156, + "loss": 2.682, + "step": 6902 + }, + { + "epoch": 0.5570978936324752, + "grad_norm": 0.7281784415245056, + "learning_rate": 0.00014766268440755812, + "loss": 2.622, + "step": 6903 + }, + { + "epoch": 0.5571785973690582, + "grad_norm": 0.6525030136108398, + "learning_rate": 0.00014764880546169594, + "loss": 2.5809, + "step": 6904 + }, + { + "epoch": 0.5572593011056411, + "grad_norm": 0.6735210418701172, + "learning_rate": 0.00014763492532830102, + "loss": 2.6645, + "step": 6905 + }, + { + "epoch": 0.5573400048422242, + "grad_norm": 0.674700140953064, + "learning_rate": 0.00014762104400771922, + "loss": 2.6466, + "step": 6906 + }, + { + "epoch": 0.5574207085788072, + "grad_norm": 0.7570134401321411, + "learning_rate": 0.00014760716150029652, + "loss": 2.57, + "step": 6907 + }, + { + "epoch": 0.5575014123153902, + "grad_norm": 0.6532449722290039, + "learning_rate": 0.00014759327780637893, + "loss": 2.6207, + "step": 6908 + }, + { + "epoch": 0.5575821160519732, + "grad_norm": 0.7697737812995911, + "learning_rate": 0.00014757939292631242, + "loss": 2.5846, + "step": 6909 + }, + { + "epoch": 0.5576628197885563, + "grad_norm": 0.6750194430351257, + "learning_rate": 0.00014756550686044308, + "loss": 2.6421, + "step": 6910 + }, + { + "epoch": 0.5577435235251392, + "grad_norm": 0.7357683777809143, + "learning_rate": 0.00014755161960911697, + "loss": 2.6173, + "step": 6911 + }, + { + "epoch": 0.5578242272617222, + "grad_norm": 0.6812090277671814, + "learning_rate": 0.0001475377311726802, + "loss": 2.5556, + "step": 6912 + }, + { + "epoch": 0.5579049309983052, + "grad_norm": 0.7633040547370911, + "learning_rate": 0.00014752384155147888, + "loss": 2.6505, + "step": 6913 + }, + { + "epoch": 0.5579856347348883, + "grad_norm": 0.7426417469978333, + "learning_rate": 0.00014750995074585922, + "loss": 2.5575, + "step": 6914 + }, + { + "epoch": 0.5580663384714712, + "grad_norm": 0.6926711201667786, + "learning_rate": 0.00014749605875616744, + "loss": 2.5751, + "step": 6915 + }, + { + "epoch": 0.5581470422080542, + "grad_norm": 0.70630943775177, + "learning_rate": 0.00014748216558274966, + "loss": 2.6228, + "step": 6916 + }, + { + "epoch": 0.5582277459446372, + "grad_norm": 0.7183346748352051, + "learning_rate": 0.0001474682712259522, + "loss": 2.5704, + "step": 6917 + }, + { + "epoch": 0.5583084496812203, + "grad_norm": 0.7622792720794678, + "learning_rate": 0.00014745437568612136, + "loss": 2.6031, + "step": 6918 + }, + { + "epoch": 0.5583891534178033, + "grad_norm": 0.6967802047729492, + "learning_rate": 0.00014744047896360344, + "loss": 2.6031, + "step": 6919 + }, + { + "epoch": 0.5584698571543862, + "grad_norm": 0.7827191948890686, + "learning_rate": 0.00014742658105874475, + "loss": 2.5427, + "step": 6920 + }, + { + "epoch": 0.5585505608909692, + "grad_norm": 0.6865705847740173, + "learning_rate": 0.0001474126819718917, + "loss": 2.6514, + "step": 6921 + }, + { + "epoch": 0.5586312646275523, + "grad_norm": 0.7181665897369385, + "learning_rate": 0.0001473987817033906, + "loss": 2.613, + "step": 6922 + }, + { + "epoch": 0.5587119683641353, + "grad_norm": 0.7198463082313538, + "learning_rate": 0.00014738488025358806, + "loss": 2.6423, + "step": 6923 + }, + { + "epoch": 0.5587926721007183, + "grad_norm": 0.773078441619873, + "learning_rate": 0.00014737097762283042, + "loss": 2.5946, + "step": 6924 + }, + { + "epoch": 0.5588733758373012, + "grad_norm": 0.7732799649238586, + "learning_rate": 0.00014735707381146416, + "loss": 2.6778, + "step": 6925 + }, + { + "epoch": 0.5589540795738843, + "grad_norm": 0.7639997601509094, + "learning_rate": 0.00014734316881983585, + "loss": 2.6064, + "step": 6926 + }, + { + "epoch": 0.5590347833104673, + "grad_norm": 0.7912085652351379, + "learning_rate": 0.00014732926264829198, + "loss": 2.5765, + "step": 6927 + }, + { + "epoch": 0.5591154870470503, + "grad_norm": 0.7460121512413025, + "learning_rate": 0.0001473153552971792, + "loss": 2.6724, + "step": 6928 + }, + { + "epoch": 0.5591961907836333, + "grad_norm": 0.6853603720664978, + "learning_rate": 0.00014730144676684408, + "loss": 2.5846, + "step": 6929 + }, + { + "epoch": 0.5592768945202163, + "grad_norm": 0.7368159294128418, + "learning_rate": 0.00014728753705763324, + "loss": 2.6626, + "step": 6930 + }, + { + "epoch": 0.5593575982567993, + "grad_norm": 0.6888907551765442, + "learning_rate": 0.0001472736261698934, + "loss": 2.6169, + "step": 6931 + }, + { + "epoch": 0.5594383019933823, + "grad_norm": 0.6978163719177246, + "learning_rate": 0.0001472597141039712, + "loss": 2.6367, + "step": 6932 + }, + { + "epoch": 0.5595190057299653, + "grad_norm": 0.7829774618148804, + "learning_rate": 0.00014724580086021335, + "loss": 2.5983, + "step": 6933 + }, + { + "epoch": 0.5595997094665484, + "grad_norm": 0.7872018218040466, + "learning_rate": 0.0001472318864389667, + "loss": 2.5418, + "step": 6934 + }, + { + "epoch": 0.5596804132031313, + "grad_norm": 0.6994973421096802, + "learning_rate": 0.00014721797084057793, + "loss": 2.6062, + "step": 6935 + }, + { + "epoch": 0.5597611169397143, + "grad_norm": 0.7281144857406616, + "learning_rate": 0.00014720405406539394, + "loss": 2.573, + "step": 6936 + }, + { + "epoch": 0.5598418206762973, + "grad_norm": 0.713513970375061, + "learning_rate": 0.0001471901361137615, + "loss": 2.6589, + "step": 6937 + }, + { + "epoch": 0.5599225244128803, + "grad_norm": 0.7752750515937805, + "learning_rate": 0.00014717621698602754, + "loss": 2.6478, + "step": 6938 + }, + { + "epoch": 0.5600032281494634, + "grad_norm": 0.6876000165939331, + "learning_rate": 0.00014716229668253889, + "loss": 2.6092, + "step": 6939 + }, + { + "epoch": 0.5600839318860463, + "grad_norm": 0.6371028423309326, + "learning_rate": 0.00014714837520364256, + "loss": 2.606, + "step": 6940 + }, + { + "epoch": 0.5601646356226293, + "grad_norm": 0.6488915085792542, + "learning_rate": 0.00014713445254968546, + "loss": 2.5769, + "step": 6941 + }, + { + "epoch": 0.5602453393592123, + "grad_norm": 0.7286413908004761, + "learning_rate": 0.00014712052872101458, + "loss": 2.6267, + "step": 6942 + }, + { + "epoch": 0.5603260430957954, + "grad_norm": 0.6863759160041809, + "learning_rate": 0.00014710660371797696, + "loss": 2.641, + "step": 6943 + }, + { + "epoch": 0.5604067468323783, + "grad_norm": 0.706900417804718, + "learning_rate": 0.00014709267754091964, + "loss": 2.6344, + "step": 6944 + }, + { + "epoch": 0.5604874505689613, + "grad_norm": 0.6462892293930054, + "learning_rate": 0.0001470787501901897, + "loss": 2.5561, + "step": 6945 + }, + { + "epoch": 0.5605681543055443, + "grad_norm": 0.7342472076416016, + "learning_rate": 0.00014706482166613425, + "loss": 2.583, + "step": 6946 + }, + { + "epoch": 0.5606488580421274, + "grad_norm": 0.7132803797721863, + "learning_rate": 0.00014705089196910038, + "loss": 2.558, + "step": 6947 + }, + { + "epoch": 0.5607295617787104, + "grad_norm": 0.7709125876426697, + "learning_rate": 0.00014703696109943533, + "loss": 2.6165, + "step": 6948 + }, + { + "epoch": 0.5608102655152933, + "grad_norm": 0.7108885645866394, + "learning_rate": 0.00014702302905748619, + "loss": 2.5788, + "step": 6949 + }, + { + "epoch": 0.5608909692518763, + "grad_norm": 0.7295591235160828, + "learning_rate": 0.0001470090958436003, + "loss": 2.6526, + "step": 6950 + }, + { + "epoch": 0.5609716729884594, + "grad_norm": 0.7235364317893982, + "learning_rate": 0.00014699516145812486, + "loss": 2.604, + "step": 6951 + }, + { + "epoch": 0.5610523767250424, + "grad_norm": 0.6723269820213318, + "learning_rate": 0.00014698122590140714, + "loss": 2.5838, + "step": 6952 + }, + { + "epoch": 0.5611330804616254, + "grad_norm": 0.7022266983985901, + "learning_rate": 0.00014696728917379447, + "loss": 2.6086, + "step": 6953 + }, + { + "epoch": 0.5612137841982083, + "grad_norm": 0.6923824548721313, + "learning_rate": 0.00014695335127563414, + "loss": 2.6678, + "step": 6954 + }, + { + "epoch": 0.5612944879347914, + "grad_norm": 0.6909339427947998, + "learning_rate": 0.0001469394122072736, + "loss": 2.6397, + "step": 6955 + }, + { + "epoch": 0.5613751916713744, + "grad_norm": 0.710299015045166, + "learning_rate": 0.00014692547196906022, + "loss": 2.5973, + "step": 6956 + }, + { + "epoch": 0.5614558954079574, + "grad_norm": 0.7141178250312805, + "learning_rate": 0.00014691153056134136, + "loss": 2.6111, + "step": 6957 + }, + { + "epoch": 0.5615365991445403, + "grad_norm": 0.6994750499725342, + "learning_rate": 0.00014689758798446456, + "loss": 2.6498, + "step": 6958 + }, + { + "epoch": 0.5616173028811234, + "grad_norm": 0.6951611638069153, + "learning_rate": 0.00014688364423877726, + "loss": 2.6208, + "step": 6959 + }, + { + "epoch": 0.5616980066177064, + "grad_norm": 0.6610642075538635, + "learning_rate": 0.000146869699324627, + "loss": 2.5725, + "step": 6960 + }, + { + "epoch": 0.5617787103542894, + "grad_norm": 0.6771267056465149, + "learning_rate": 0.00014685575324236135, + "loss": 2.6336, + "step": 6961 + }, + { + "epoch": 0.5618594140908724, + "grad_norm": 0.7431008815765381, + "learning_rate": 0.0001468418059923278, + "loss": 2.6782, + "step": 6962 + }, + { + "epoch": 0.5619401178274555, + "grad_norm": 0.7399705648422241, + "learning_rate": 0.000146827857574874, + "loss": 2.6212, + "step": 6963 + }, + { + "epoch": 0.5620208215640384, + "grad_norm": 0.7237067222595215, + "learning_rate": 0.00014681390799034763, + "loss": 2.6261, + "step": 6964 + }, + { + "epoch": 0.5621015253006214, + "grad_norm": 0.7033257484436035, + "learning_rate": 0.00014679995723909623, + "loss": 2.6912, + "step": 6965 + }, + { + "epoch": 0.5621822290372044, + "grad_norm": 0.6953759789466858, + "learning_rate": 0.00014678600532146762, + "loss": 2.6022, + "step": 6966 + }, + { + "epoch": 0.5622629327737875, + "grad_norm": 0.8338057994842529, + "learning_rate": 0.0001467720522378094, + "loss": 2.595, + "step": 6967 + }, + { + "epoch": 0.5623436365103704, + "grad_norm": 0.6506100296974182, + "learning_rate": 0.00014675809798846942, + "loss": 2.6033, + "step": 6968 + }, + { + "epoch": 0.5624243402469534, + "grad_norm": 0.7122468948364258, + "learning_rate": 0.0001467441425737954, + "loss": 2.56, + "step": 6969 + }, + { + "epoch": 0.5625050439835364, + "grad_norm": 0.7012680172920227, + "learning_rate": 0.00014673018599413516, + "loss": 2.6052, + "step": 6970 + }, + { + "epoch": 0.5625857477201195, + "grad_norm": 0.668187141418457, + "learning_rate": 0.00014671622824983653, + "loss": 2.6675, + "step": 6971 + }, + { + "epoch": 0.5626664514567025, + "grad_norm": 0.7259203791618347, + "learning_rate": 0.00014670226934124738, + "loss": 2.5977, + "step": 6972 + }, + { + "epoch": 0.5627471551932854, + "grad_norm": 0.6705875396728516, + "learning_rate": 0.00014668830926871555, + "loss": 2.649, + "step": 6973 + }, + { + "epoch": 0.5628278589298684, + "grad_norm": 0.682731568813324, + "learning_rate": 0.00014667434803258906, + "loss": 2.6084, + "step": 6974 + }, + { + "epoch": 0.5629085626664515, + "grad_norm": 0.7061700224876404, + "learning_rate": 0.00014666038563321577, + "loss": 2.6256, + "step": 6975 + }, + { + "epoch": 0.5629892664030345, + "grad_norm": 0.6839977502822876, + "learning_rate": 0.00014664642207094374, + "loss": 2.6342, + "step": 6976 + }, + { + "epoch": 0.5630699701396175, + "grad_norm": 0.7376503348350525, + "learning_rate": 0.00014663245734612094, + "loss": 2.6001, + "step": 6977 + }, + { + "epoch": 0.5631506738762004, + "grad_norm": 0.6901546716690063, + "learning_rate": 0.0001466184914590954, + "loss": 2.6715, + "step": 6978 + }, + { + "epoch": 0.5632313776127835, + "grad_norm": 0.816223680973053, + "learning_rate": 0.00014660452441021512, + "loss": 2.6407, + "step": 6979 + }, + { + "epoch": 0.5633120813493665, + "grad_norm": 0.6904644966125488, + "learning_rate": 0.00014659055619982835, + "loss": 2.5543, + "step": 6980 + }, + { + "epoch": 0.5633927850859495, + "grad_norm": 0.6784235239028931, + "learning_rate": 0.0001465765868282831, + "loss": 2.6184, + "step": 6981 + }, + { + "epoch": 0.5634734888225325, + "grad_norm": 0.7689006328582764, + "learning_rate": 0.00014656261629592755, + "loss": 2.644, + "step": 6982 + }, + { + "epoch": 0.5635541925591155, + "grad_norm": 0.7608775496482849, + "learning_rate": 0.0001465486446031099, + "loss": 2.5952, + "step": 6983 + }, + { + "epoch": 0.5636348962956985, + "grad_norm": 0.7266525626182556, + "learning_rate": 0.00014653467175017833, + "loss": 2.6479, + "step": 6984 + }, + { + "epoch": 0.5637156000322815, + "grad_norm": 0.6907477974891663, + "learning_rate": 0.00014652069773748113, + "loss": 2.5825, + "step": 6985 + }, + { + "epoch": 0.5637963037688645, + "grad_norm": 0.7790403366088867, + "learning_rate": 0.00014650672256536648, + "loss": 2.5948, + "step": 6986 + }, + { + "epoch": 0.5638770075054474, + "grad_norm": 0.7072858214378357, + "learning_rate": 0.00014649274623418278, + "loss": 2.6017, + "step": 6987 + }, + { + "epoch": 0.5639577112420305, + "grad_norm": 0.7140414118766785, + "learning_rate": 0.0001464787687442783, + "loss": 2.5709, + "step": 6988 + }, + { + "epoch": 0.5640384149786135, + "grad_norm": 0.857783317565918, + "learning_rate": 0.00014646479009600139, + "loss": 2.7049, + "step": 6989 + }, + { + "epoch": 0.5641191187151965, + "grad_norm": 0.7599344253540039, + "learning_rate": 0.00014645081028970047, + "loss": 2.6369, + "step": 6990 + }, + { + "epoch": 0.5641998224517795, + "grad_norm": 0.7286150455474854, + "learning_rate": 0.00014643682932572393, + "loss": 2.6238, + "step": 6991 + }, + { + "epoch": 0.5642805261883626, + "grad_norm": 0.7095075249671936, + "learning_rate": 0.0001464228472044202, + "loss": 2.5924, + "step": 6992 + }, + { + "epoch": 0.5643612299249455, + "grad_norm": 0.7583668828010559, + "learning_rate": 0.0001464088639261378, + "loss": 2.6098, + "step": 6993 + }, + { + "epoch": 0.5644419336615285, + "grad_norm": 0.7393970489501953, + "learning_rate": 0.00014639487949122515, + "loss": 2.6036, + "step": 6994 + }, + { + "epoch": 0.5645226373981115, + "grad_norm": 0.6789388656616211, + "learning_rate": 0.00014638089390003086, + "loss": 2.642, + "step": 6995 + }, + { + "epoch": 0.5646033411346946, + "grad_norm": 0.8021289706230164, + "learning_rate": 0.00014636690715290346, + "loss": 2.6851, + "step": 6996 + }, + { + "epoch": 0.5646840448712775, + "grad_norm": 0.6931039094924927, + "learning_rate": 0.00014635291925019152, + "loss": 2.6358, + "step": 6997 + }, + { + "epoch": 0.5647647486078605, + "grad_norm": 0.7356590032577515, + "learning_rate": 0.00014633893019224366, + "loss": 2.5661, + "step": 6998 + }, + { + "epoch": 0.5648454523444435, + "grad_norm": 0.6777941584587097, + "learning_rate": 0.0001463249399794085, + "loss": 2.5578, + "step": 6999 + }, + { + "epoch": 0.5649261560810266, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.0001463109486120348, + "loss": 2.5582, + "step": 7000 + }, + { + "epoch": 0.5649261560810266, + "eval_loss": 2.5298855304718018, + "eval_runtime": 757.774, + "eval_samples_per_second": 3.457, + "eval_steps_per_second": 0.577, + "step": 7000 + }, + { + "epoch": 0.5650068598176096, + "grad_norm": 0.7175148129463196, + "learning_rate": 0.0001462969560904712, + "loss": 2.568, + "step": 7001 + }, + { + "epoch": 0.5650875635541925, + "grad_norm": 0.6998937129974365, + "learning_rate": 0.00014628296241506636, + "loss": 2.6347, + "step": 7002 + }, + { + "epoch": 0.5651682672907755, + "grad_norm": 0.8140312433242798, + "learning_rate": 0.00014626896758616916, + "loss": 2.6566, + "step": 7003 + }, + { + "epoch": 0.5652489710273586, + "grad_norm": 0.7218164205551147, + "learning_rate": 0.00014625497160412833, + "loss": 2.5693, + "step": 7004 + }, + { + "epoch": 0.5653296747639416, + "grad_norm": 0.6974074244499207, + "learning_rate": 0.0001462409744692927, + "loss": 2.6084, + "step": 7005 + }, + { + "epoch": 0.5654103785005246, + "grad_norm": 0.7475053071975708, + "learning_rate": 0.00014622697618201113, + "loss": 2.6534, + "step": 7006 + }, + { + "epoch": 0.5654910822371075, + "grad_norm": 0.6768492460250854, + "learning_rate": 0.00014621297674263247, + "loss": 2.585, + "step": 7007 + }, + { + "epoch": 0.5655717859736906, + "grad_norm": 0.7023029923439026, + "learning_rate": 0.0001461989761515056, + "loss": 2.6219, + "step": 7008 + }, + { + "epoch": 0.5656524897102736, + "grad_norm": 0.7248445749282837, + "learning_rate": 0.0001461849744089795, + "loss": 2.6382, + "step": 7009 + }, + { + "epoch": 0.5657331934468566, + "grad_norm": 0.6961148381233215, + "learning_rate": 0.00014617097151540308, + "loss": 2.7184, + "step": 7010 + }, + { + "epoch": 0.5658138971834396, + "grad_norm": 0.6649057269096375, + "learning_rate": 0.0001461569674711254, + "loss": 2.6059, + "step": 7011 + }, + { + "epoch": 0.5658946009200226, + "grad_norm": 0.7451788783073425, + "learning_rate": 0.00014614296227649542, + "loss": 2.5697, + "step": 7012 + }, + { + "epoch": 0.5659753046566056, + "grad_norm": 0.6880216598510742, + "learning_rate": 0.0001461289559318622, + "loss": 2.5785, + "step": 7013 + }, + { + "epoch": 0.5660560083931886, + "grad_norm": 0.7505971789360046, + "learning_rate": 0.00014611494843757482, + "loss": 2.5479, + "step": 7014 + }, + { + "epoch": 0.5661367121297716, + "grad_norm": 0.745914876461029, + "learning_rate": 0.00014610093979398235, + "loss": 2.6367, + "step": 7015 + }, + { + "epoch": 0.5662174158663547, + "grad_norm": 0.6758660674095154, + "learning_rate": 0.000146086930001434, + "loss": 2.5673, + "step": 7016 + }, + { + "epoch": 0.5662981196029376, + "grad_norm": 0.7114273309707642, + "learning_rate": 0.00014607291906027886, + "loss": 2.6188, + "step": 7017 + }, + { + "epoch": 0.5663788233395206, + "grad_norm": 0.6791165471076965, + "learning_rate": 0.00014605890697086613, + "loss": 2.6197, + "step": 7018 + }, + { + "epoch": 0.5664595270761036, + "grad_norm": 0.6948217153549194, + "learning_rate": 0.00014604489373354503, + "loss": 2.5996, + "step": 7019 + }, + { + "epoch": 0.5665402308126867, + "grad_norm": 0.6993576884269714, + "learning_rate": 0.00014603087934866483, + "loss": 2.565, + "step": 7020 + }, + { + "epoch": 0.5666209345492697, + "grad_norm": 0.6936905384063721, + "learning_rate": 0.0001460168638165748, + "loss": 2.6524, + "step": 7021 + }, + { + "epoch": 0.5667016382858526, + "grad_norm": 0.6810741424560547, + "learning_rate": 0.00014600284713762424, + "loss": 2.6519, + "step": 7022 + }, + { + "epoch": 0.5667823420224356, + "grad_norm": 0.7540227770805359, + "learning_rate": 0.00014598882931216245, + "loss": 2.659, + "step": 7023 + }, + { + "epoch": 0.5668630457590187, + "grad_norm": 0.6520613431930542, + "learning_rate": 0.0001459748103405388, + "loss": 2.5341, + "step": 7024 + }, + { + "epoch": 0.5669437494956017, + "grad_norm": 0.7159109711647034, + "learning_rate": 0.00014596079022310277, + "loss": 2.6548, + "step": 7025 + }, + { + "epoch": 0.5670244532321846, + "grad_norm": 0.803284227848053, + "learning_rate": 0.00014594676896020366, + "loss": 2.705, + "step": 7026 + }, + { + "epoch": 0.5671051569687676, + "grad_norm": 0.7069976925849915, + "learning_rate": 0.00014593274655219095, + "loss": 2.5733, + "step": 7027 + }, + { + "epoch": 0.5671858607053507, + "grad_norm": 0.7085167169570923, + "learning_rate": 0.00014591872299941417, + "loss": 2.6247, + "step": 7028 + }, + { + "epoch": 0.5672665644419337, + "grad_norm": 0.6748499274253845, + "learning_rate": 0.00014590469830222272, + "loss": 2.6446, + "step": 7029 + }, + { + "epoch": 0.5673472681785167, + "grad_norm": 0.6885821223258972, + "learning_rate": 0.00014589067246096623, + "loss": 2.5879, + "step": 7030 + }, + { + "epoch": 0.5674279719150996, + "grad_norm": 0.7220324277877808, + "learning_rate": 0.0001458766454759942, + "loss": 2.6249, + "step": 7031 + }, + { + "epoch": 0.5675086756516827, + "grad_norm": 0.6712783575057983, + "learning_rate": 0.00014586261734765628, + "loss": 2.5971, + "step": 7032 + }, + { + "epoch": 0.5675893793882657, + "grad_norm": 0.6582161784172058, + "learning_rate": 0.00014584858807630203, + "loss": 2.6224, + "step": 7033 + }, + { + "epoch": 0.5676700831248487, + "grad_norm": 0.6699219346046448, + "learning_rate": 0.0001458345576622811, + "loss": 2.5926, + "step": 7034 + }, + { + "epoch": 0.5677507868614317, + "grad_norm": 0.6508033871650696, + "learning_rate": 0.0001458205261059432, + "loss": 2.6311, + "step": 7035 + }, + { + "epoch": 0.5678314905980147, + "grad_norm": 0.7551338076591492, + "learning_rate": 0.00014580649340763802, + "loss": 2.5729, + "step": 7036 + }, + { + "epoch": 0.5679121943345977, + "grad_norm": 0.6875829100608826, + "learning_rate": 0.00014579245956771527, + "loss": 2.6253, + "step": 7037 + }, + { + "epoch": 0.5679928980711807, + "grad_norm": 0.698204517364502, + "learning_rate": 0.00014577842458652474, + "loss": 2.6218, + "step": 7038 + }, + { + "epoch": 0.5680736018077637, + "grad_norm": 0.8258630037307739, + "learning_rate": 0.00014576438846441615, + "loss": 2.6307, + "step": 7039 + }, + { + "epoch": 0.5681543055443466, + "grad_norm": 0.753105878829956, + "learning_rate": 0.00014575035120173942, + "loss": 2.5664, + "step": 7040 + }, + { + "epoch": 0.5682350092809297, + "grad_norm": 0.6999726295471191, + "learning_rate": 0.00014573631279884435, + "loss": 2.6857, + "step": 7041 + }, + { + "epoch": 0.5683157130175127, + "grad_norm": 0.6484847068786621, + "learning_rate": 0.00014572227325608078, + "loss": 2.6068, + "step": 7042 + }, + { + "epoch": 0.5683964167540957, + "grad_norm": 0.7098011374473572, + "learning_rate": 0.00014570823257379866, + "loss": 2.6591, + "step": 7043 + }, + { + "epoch": 0.5684771204906787, + "grad_norm": 0.8304192423820496, + "learning_rate": 0.0001456941907523479, + "loss": 2.6582, + "step": 7044 + }, + { + "epoch": 0.5685578242272618, + "grad_norm": 0.763214111328125, + "learning_rate": 0.00014568014779207844, + "loss": 2.6605, + "step": 7045 + }, + { + "epoch": 0.5686385279638447, + "grad_norm": 0.6805880665779114, + "learning_rate": 0.00014566610369334032, + "loss": 2.6362, + "step": 7046 + }, + { + "epoch": 0.5687192317004277, + "grad_norm": 0.6753434538841248, + "learning_rate": 0.00014565205845648352, + "loss": 2.6352, + "step": 7047 + }, + { + "epoch": 0.5687999354370107, + "grad_norm": 0.7065438032150269, + "learning_rate": 0.00014563801208185807, + "loss": 2.5975, + "step": 7048 + }, + { + "epoch": 0.5688806391735938, + "grad_norm": 0.6863527894020081, + "learning_rate": 0.00014562396456981407, + "loss": 2.576, + "step": 7049 + }, + { + "epoch": 0.5689613429101767, + "grad_norm": 0.7344440817832947, + "learning_rate": 0.00014560991592070158, + "loss": 2.5933, + "step": 7050 + }, + { + "epoch": 0.5690420466467597, + "grad_norm": 0.699992835521698, + "learning_rate": 0.00014559586613487082, + "loss": 2.6161, + "step": 7051 + }, + { + "epoch": 0.5691227503833427, + "grad_norm": 0.7287258505821228, + "learning_rate": 0.00014558181521267185, + "loss": 2.665, + "step": 7052 + }, + { + "epoch": 0.5692034541199258, + "grad_norm": 0.7304692268371582, + "learning_rate": 0.0001455677631544549, + "loss": 2.5696, + "step": 7053 + }, + { + "epoch": 0.5692841578565088, + "grad_norm": 0.6556086540222168, + "learning_rate": 0.00014555370996057016, + "loss": 2.6405, + "step": 7054 + }, + { + "epoch": 0.5693648615930917, + "grad_norm": 0.6796221137046814, + "learning_rate": 0.0001455396556313679, + "loss": 2.6475, + "step": 7055 + }, + { + "epoch": 0.5694455653296747, + "grad_norm": 0.7067505717277527, + "learning_rate": 0.00014552560016719838, + "loss": 2.6344, + "step": 7056 + }, + { + "epoch": 0.5695262690662578, + "grad_norm": 0.7108997106552124, + "learning_rate": 0.00014551154356841193, + "loss": 2.6543, + "step": 7057 + }, + { + "epoch": 0.5696069728028408, + "grad_norm": 0.7296212911605835, + "learning_rate": 0.0001454974858353588, + "loss": 2.6152, + "step": 7058 + }, + { + "epoch": 0.5696876765394238, + "grad_norm": 0.7329154014587402, + "learning_rate": 0.00014548342696838943, + "loss": 2.6338, + "step": 7059 + }, + { + "epoch": 0.5697683802760067, + "grad_norm": 0.6880258321762085, + "learning_rate": 0.00014546936696785412, + "loss": 2.5834, + "step": 7060 + }, + { + "epoch": 0.5698490840125898, + "grad_norm": 0.7140741348266602, + "learning_rate": 0.00014545530583410336, + "loss": 2.6361, + "step": 7061 + }, + { + "epoch": 0.5699297877491728, + "grad_norm": 0.6419476866722107, + "learning_rate": 0.00014544124356748755, + "loss": 2.4982, + "step": 7062 + }, + { + "epoch": 0.5700104914857558, + "grad_norm": 0.6934036612510681, + "learning_rate": 0.00014542718016835718, + "loss": 2.5748, + "step": 7063 + }, + { + "epoch": 0.5700911952223388, + "grad_norm": 0.721663236618042, + "learning_rate": 0.0001454131156370627, + "loss": 2.5419, + "step": 7064 + }, + { + "epoch": 0.5701718989589218, + "grad_norm": 0.734062671661377, + "learning_rate": 0.00014539904997395468, + "loss": 2.6288, + "step": 7065 + }, + { + "epoch": 0.5702526026955048, + "grad_norm": 0.7927694320678711, + "learning_rate": 0.00014538498317938367, + "loss": 2.6331, + "step": 7066 + }, + { + "epoch": 0.5703333064320878, + "grad_norm": 0.715929388999939, + "learning_rate": 0.00014537091525370025, + "loss": 2.6333, + "step": 7067 + }, + { + "epoch": 0.5704140101686708, + "grad_norm": 0.772230327129364, + "learning_rate": 0.00014535684619725498, + "loss": 2.6019, + "step": 7068 + }, + { + "epoch": 0.5704947139052539, + "grad_norm": 0.7277318239212036, + "learning_rate": 0.0001453427760103986, + "loss": 2.6062, + "step": 7069 + }, + { + "epoch": 0.5705754176418368, + "grad_norm": 0.6708227396011353, + "learning_rate": 0.00014532870469348164, + "loss": 2.6613, + "step": 7070 + }, + { + "epoch": 0.5706561213784198, + "grad_norm": 0.7507323622703552, + "learning_rate": 0.0001453146322468549, + "loss": 2.6456, + "step": 7071 + }, + { + "epoch": 0.5707368251150028, + "grad_norm": 0.6864063739776611, + "learning_rate": 0.00014530055867086912, + "loss": 2.6361, + "step": 7072 + }, + { + "epoch": 0.5708175288515859, + "grad_norm": 0.6805310249328613, + "learning_rate": 0.00014528648396587498, + "loss": 2.6088, + "step": 7073 + }, + { + "epoch": 0.5708982325881689, + "grad_norm": 0.7946523427963257, + "learning_rate": 0.00014527240813222325, + "loss": 2.6533, + "step": 7074 + }, + { + "epoch": 0.5709789363247518, + "grad_norm": 0.6814306974411011, + "learning_rate": 0.00014525833117026474, + "loss": 2.6478, + "step": 7075 + }, + { + "epoch": 0.5710596400613348, + "grad_norm": 0.749664843082428, + "learning_rate": 0.00014524425308035034, + "loss": 2.6296, + "step": 7076 + }, + { + "epoch": 0.5711403437979179, + "grad_norm": 0.6774656772613525, + "learning_rate": 0.00014523017386283091, + "loss": 2.5867, + "step": 7077 + }, + { + "epoch": 0.5712210475345009, + "grad_norm": 0.7331634163856506, + "learning_rate": 0.00014521609351805733, + "loss": 2.6484, + "step": 7078 + }, + { + "epoch": 0.5713017512710838, + "grad_norm": 0.7076910734176636, + "learning_rate": 0.00014520201204638045, + "loss": 2.6464, + "step": 7079 + }, + { + "epoch": 0.5713824550076668, + "grad_norm": 0.74099200963974, + "learning_rate": 0.00014518792944815127, + "loss": 2.6304, + "step": 7080 + }, + { + "epoch": 0.5714631587442499, + "grad_norm": 0.6673823595046997, + "learning_rate": 0.00014517384572372078, + "loss": 2.5903, + "step": 7081 + }, + { + "epoch": 0.5715438624808329, + "grad_norm": 0.6872609257698059, + "learning_rate": 0.00014515976087343997, + "loss": 2.6189, + "step": 7082 + }, + { + "epoch": 0.5716245662174159, + "grad_norm": 0.7363224625587463, + "learning_rate": 0.0001451456748976599, + "loss": 2.5845, + "step": 7083 + }, + { + "epoch": 0.5717052699539988, + "grad_norm": 0.7672157287597656, + "learning_rate": 0.00014513158779673157, + "loss": 2.6331, + "step": 7084 + }, + { + "epoch": 0.5717859736905819, + "grad_norm": 0.661195695400238, + "learning_rate": 0.00014511749957100612, + "loss": 2.5827, + "step": 7085 + }, + { + "epoch": 0.5718666774271649, + "grad_norm": 0.8034788370132446, + "learning_rate": 0.0001451034102208346, + "loss": 2.6209, + "step": 7086 + }, + { + "epoch": 0.5719473811637479, + "grad_norm": 0.7318302392959595, + "learning_rate": 0.00014508931974656822, + "loss": 2.5898, + "step": 7087 + }, + { + "epoch": 0.5720280849003309, + "grad_norm": 0.7334744930267334, + "learning_rate": 0.00014507522814855814, + "loss": 2.5893, + "step": 7088 + }, + { + "epoch": 0.5721087886369138, + "grad_norm": 0.783051609992981, + "learning_rate": 0.00014506113542715553, + "loss": 2.6284, + "step": 7089 + }, + { + "epoch": 0.5721894923734969, + "grad_norm": 0.7319497466087341, + "learning_rate": 0.00014504704158271165, + "loss": 2.5705, + "step": 7090 + }, + { + "epoch": 0.5722701961100799, + "grad_norm": 0.7886925935745239, + "learning_rate": 0.00014503294661557772, + "loss": 2.641, + "step": 7091 + }, + { + "epoch": 0.5723508998466629, + "grad_norm": 0.6882795691490173, + "learning_rate": 0.00014501885052610502, + "loss": 2.5714, + "step": 7092 + }, + { + "epoch": 0.5724316035832459, + "grad_norm": 0.7089235186576843, + "learning_rate": 0.00014500475331464494, + "loss": 2.6073, + "step": 7093 + }, + { + "epoch": 0.5725123073198289, + "grad_norm": 0.7261029481887817, + "learning_rate": 0.00014499065498154874, + "loss": 2.5595, + "step": 7094 + }, + { + "epoch": 0.5725930110564119, + "grad_norm": 0.7625105977058411, + "learning_rate": 0.0001449765555271678, + "loss": 2.5978, + "step": 7095 + }, + { + "epoch": 0.5726737147929949, + "grad_norm": 0.7853986024856567, + "learning_rate": 0.00014496245495185353, + "loss": 2.6378, + "step": 7096 + }, + { + "epoch": 0.5727544185295779, + "grad_norm": 0.8070923686027527, + "learning_rate": 0.00014494835325595736, + "loss": 2.7062, + "step": 7097 + }, + { + "epoch": 0.572835122266161, + "grad_norm": 0.7074965834617615, + "learning_rate": 0.00014493425043983073, + "loss": 2.5177, + "step": 7098 + }, + { + "epoch": 0.5729158260027439, + "grad_norm": 0.6890520453453064, + "learning_rate": 0.00014492014650382512, + "loss": 2.6058, + "step": 7099 + }, + { + "epoch": 0.5729965297393269, + "grad_norm": 0.6979860067367554, + "learning_rate": 0.00014490604144829202, + "loss": 2.5274, + "step": 7100 + }, + { + "epoch": 0.5730772334759099, + "grad_norm": 0.7972229719161987, + "learning_rate": 0.000144891935273583, + "loss": 2.6369, + "step": 7101 + }, + { + "epoch": 0.573157937212493, + "grad_norm": 0.6994345188140869, + "learning_rate": 0.0001448778279800496, + "loss": 2.5975, + "step": 7102 + }, + { + "epoch": 0.573238640949076, + "grad_norm": 0.7943929433822632, + "learning_rate": 0.0001448637195680434, + "loss": 2.6317, + "step": 7103 + }, + { + "epoch": 0.5733193446856589, + "grad_norm": 0.6975306272506714, + "learning_rate": 0.00014484961003791605, + "loss": 2.6264, + "step": 7104 + }, + { + "epoch": 0.5734000484222419, + "grad_norm": 0.6889060735702515, + "learning_rate": 0.00014483549939001917, + "loss": 2.5974, + "step": 7105 + }, + { + "epoch": 0.573480752158825, + "grad_norm": 0.7372777462005615, + "learning_rate": 0.00014482138762470444, + "loss": 2.5851, + "step": 7106 + }, + { + "epoch": 0.573561455895408, + "grad_norm": 0.7045157551765442, + "learning_rate": 0.00014480727474232362, + "loss": 2.6451, + "step": 7107 + }, + { + "epoch": 0.5736421596319909, + "grad_norm": 0.6974517107009888, + "learning_rate": 0.00014479316074322832, + "loss": 2.6796, + "step": 7108 + }, + { + "epoch": 0.5737228633685739, + "grad_norm": 0.7328097224235535, + "learning_rate": 0.00014477904562777038, + "loss": 2.5923, + "step": 7109 + }, + { + "epoch": 0.573803567105157, + "grad_norm": 0.7288877964019775, + "learning_rate": 0.0001447649293963016, + "loss": 2.6012, + "step": 7110 + }, + { + "epoch": 0.57388427084174, + "grad_norm": 0.7054389119148254, + "learning_rate": 0.00014475081204917372, + "loss": 2.6666, + "step": 7111 + }, + { + "epoch": 0.573964974578323, + "grad_norm": 0.7447949647903442, + "learning_rate": 0.00014473669358673865, + "loss": 2.6093, + "step": 7112 + }, + { + "epoch": 0.5740456783149059, + "grad_norm": 0.6431592106819153, + "learning_rate": 0.0001447225740093482, + "loss": 2.6242, + "step": 7113 + }, + { + "epoch": 0.574126382051489, + "grad_norm": 0.7096747756004333, + "learning_rate": 0.00014470845331735434, + "loss": 2.6297, + "step": 7114 + }, + { + "epoch": 0.574207085788072, + "grad_norm": 0.6918880939483643, + "learning_rate": 0.00014469433151110894, + "loss": 2.5849, + "step": 7115 + }, + { + "epoch": 0.574287789524655, + "grad_norm": 0.6617783308029175, + "learning_rate": 0.00014468020859096395, + "loss": 2.5972, + "step": 7116 + }, + { + "epoch": 0.574368493261238, + "grad_norm": 0.6525121927261353, + "learning_rate": 0.0001446660845572714, + "loss": 2.5888, + "step": 7117 + }, + { + "epoch": 0.574449196997821, + "grad_norm": 0.7024720907211304, + "learning_rate": 0.00014465195941038326, + "loss": 2.6135, + "step": 7118 + }, + { + "epoch": 0.574529900734404, + "grad_norm": 0.7660520672798157, + "learning_rate": 0.00014463783315065153, + "loss": 2.5837, + "step": 7119 + }, + { + "epoch": 0.574610604470987, + "grad_norm": 0.8206443190574646, + "learning_rate": 0.00014462370577842838, + "loss": 2.6749, + "step": 7120 + }, + { + "epoch": 0.57469130820757, + "grad_norm": 0.7176216840744019, + "learning_rate": 0.00014460957729406577, + "loss": 2.5814, + "step": 7121 + }, + { + "epoch": 0.5747720119441531, + "grad_norm": 0.7867588400840759, + "learning_rate": 0.0001445954476979159, + "loss": 2.5697, + "step": 7122 + }, + { + "epoch": 0.574852715680736, + "grad_norm": 0.7150471806526184, + "learning_rate": 0.0001445813169903309, + "loss": 2.5689, + "step": 7123 + }, + { + "epoch": 0.574933419417319, + "grad_norm": 0.7082479596138, + "learning_rate": 0.00014456718517166296, + "loss": 2.6081, + "step": 7124 + }, + { + "epoch": 0.575014123153902, + "grad_norm": 0.7207253575325012, + "learning_rate": 0.00014455305224226426, + "loss": 2.6573, + "step": 7125 + }, + { + "epoch": 0.5750948268904851, + "grad_norm": 0.7451751232147217, + "learning_rate": 0.00014453891820248704, + "loss": 2.6057, + "step": 7126 + }, + { + "epoch": 0.575175530627068, + "grad_norm": 0.7030230164527893, + "learning_rate": 0.0001445247830526835, + "loss": 2.6122, + "step": 7127 + }, + { + "epoch": 0.575256234363651, + "grad_norm": 0.7233754396438599, + "learning_rate": 0.00014451064679320605, + "loss": 2.5937, + "step": 7128 + }, + { + "epoch": 0.575336938100234, + "grad_norm": 0.6943942904472351, + "learning_rate": 0.0001444965094244069, + "loss": 2.6327, + "step": 7129 + }, + { + "epoch": 0.5754176418368171, + "grad_norm": 0.682056725025177, + "learning_rate": 0.00014448237094663843, + "loss": 2.6212, + "step": 7130 + }, + { + "epoch": 0.5754983455734001, + "grad_norm": 0.7424136400222778, + "learning_rate": 0.00014446823136025298, + "loss": 2.6031, + "step": 7131 + }, + { + "epoch": 0.575579049309983, + "grad_norm": 0.7464002370834351, + "learning_rate": 0.00014445409066560298, + "loss": 2.6363, + "step": 7132 + }, + { + "epoch": 0.575659753046566, + "grad_norm": 0.7137650847434998, + "learning_rate": 0.00014443994886304085, + "loss": 2.5343, + "step": 7133 + }, + { + "epoch": 0.5757404567831491, + "grad_norm": 0.6744158864021301, + "learning_rate": 0.00014442580595291901, + "loss": 2.6463, + "step": 7134 + }, + { + "epoch": 0.5758211605197321, + "grad_norm": 0.6947084069252014, + "learning_rate": 0.00014441166193558991, + "loss": 2.6074, + "step": 7135 + }, + { + "epoch": 0.5759018642563151, + "grad_norm": 0.6981585621833801, + "learning_rate": 0.00014439751681140616, + "loss": 2.6257, + "step": 7136 + }, + { + "epoch": 0.575982567992898, + "grad_norm": 0.6800102591514587, + "learning_rate": 0.00014438337058072023, + "loss": 2.6447, + "step": 7137 + }, + { + "epoch": 0.5760632717294811, + "grad_norm": 0.6952316164970398, + "learning_rate": 0.00014436922324388465, + "loss": 2.5739, + "step": 7138 + }, + { + "epoch": 0.5761439754660641, + "grad_norm": 0.709170937538147, + "learning_rate": 0.0001443550748012521, + "loss": 2.5918, + "step": 7139 + }, + { + "epoch": 0.5762246792026471, + "grad_norm": 0.7677363157272339, + "learning_rate": 0.00014434092525317512, + "loss": 2.6322, + "step": 7140 + }, + { + "epoch": 0.5763053829392301, + "grad_norm": 0.6730263233184814, + "learning_rate": 0.00014432677460000636, + "loss": 2.6764, + "step": 7141 + }, + { + "epoch": 0.576386086675813, + "grad_norm": 0.6782239675521851, + "learning_rate": 0.0001443126228420985, + "loss": 2.5208, + "step": 7142 + }, + { + "epoch": 0.5764667904123961, + "grad_norm": 0.7737600207328796, + "learning_rate": 0.00014429846997980424, + "loss": 2.6964, + "step": 7143 + }, + { + "epoch": 0.5765474941489791, + "grad_norm": 0.7456403374671936, + "learning_rate": 0.00014428431601347635, + "loss": 2.6163, + "step": 7144 + }, + { + "epoch": 0.5766281978855621, + "grad_norm": 0.7824606895446777, + "learning_rate": 0.00014427016094346754, + "loss": 2.6499, + "step": 7145 + }, + { + "epoch": 0.576708901622145, + "grad_norm": 0.7233635187149048, + "learning_rate": 0.00014425600477013055, + "loss": 2.6064, + "step": 7146 + }, + { + "epoch": 0.5767896053587281, + "grad_norm": 0.7008275389671326, + "learning_rate": 0.00014424184749381824, + "loss": 2.5585, + "step": 7147 + }, + { + "epoch": 0.5768703090953111, + "grad_norm": 0.6817710995674133, + "learning_rate": 0.00014422768911488346, + "loss": 2.6215, + "step": 7148 + }, + { + "epoch": 0.5769510128318941, + "grad_norm": 0.6860779523849487, + "learning_rate": 0.00014421352963367906, + "loss": 2.5877, + "step": 7149 + }, + { + "epoch": 0.5770317165684771, + "grad_norm": 0.732865035533905, + "learning_rate": 0.00014419936905055793, + "loss": 2.5704, + "step": 7150 + }, + { + "epoch": 0.5771124203050602, + "grad_norm": 0.6992458701133728, + "learning_rate": 0.00014418520736587297, + "loss": 2.6654, + "step": 7151 + }, + { + "epoch": 0.5771931240416431, + "grad_norm": 0.6865053176879883, + "learning_rate": 0.00014417104457997715, + "loss": 2.6389, + "step": 7152 + }, + { + "epoch": 0.5772738277782261, + "grad_norm": 0.7652727365493774, + "learning_rate": 0.00014415688069322345, + "loss": 2.6478, + "step": 7153 + }, + { + "epoch": 0.5773545315148091, + "grad_norm": 0.708692193031311, + "learning_rate": 0.0001441427157059648, + "loss": 2.6065, + "step": 7154 + }, + { + "epoch": 0.5774352352513922, + "grad_norm": 0.7549232244491577, + "learning_rate": 0.00014412854961855435, + "loss": 2.6484, + "step": 7155 + }, + { + "epoch": 0.5775159389879752, + "grad_norm": 0.6410655975341797, + "learning_rate": 0.00014411438243134506, + "loss": 2.6061, + "step": 7156 + }, + { + "epoch": 0.5775966427245581, + "grad_norm": 0.7711724042892456, + "learning_rate": 0.00014410021414469005, + "loss": 2.628, + "step": 7157 + }, + { + "epoch": 0.5776773464611411, + "grad_norm": 0.6723695993423462, + "learning_rate": 0.0001440860447589424, + "loss": 2.6214, + "step": 7158 + }, + { + "epoch": 0.5777580501977242, + "grad_norm": 0.7359206676483154, + "learning_rate": 0.0001440718742744553, + "loss": 2.6157, + "step": 7159 + }, + { + "epoch": 0.5778387539343072, + "grad_norm": 0.7320525050163269, + "learning_rate": 0.0001440577026915819, + "loss": 2.6081, + "step": 7160 + }, + { + "epoch": 0.5779194576708901, + "grad_norm": 0.7728561162948608, + "learning_rate": 0.00014404353001067535, + "loss": 2.5989, + "step": 7161 + }, + { + "epoch": 0.5780001614074731, + "grad_norm": 0.7380329370498657, + "learning_rate": 0.0001440293562320889, + "loss": 2.6337, + "step": 7162 + }, + { + "epoch": 0.5780808651440562, + "grad_norm": 0.667789876461029, + "learning_rate": 0.00014401518135617581, + "loss": 2.6324, + "step": 7163 + }, + { + "epoch": 0.5781615688806392, + "grad_norm": 0.6907219886779785, + "learning_rate": 0.00014400100538328935, + "loss": 2.5897, + "step": 7164 + }, + { + "epoch": 0.5782422726172222, + "grad_norm": 0.9051530957221985, + "learning_rate": 0.00014398682831378283, + "loss": 2.6895, + "step": 7165 + }, + { + "epoch": 0.5783229763538051, + "grad_norm": 0.7189533114433289, + "learning_rate": 0.00014397265014800956, + "loss": 2.5948, + "step": 7166 + }, + { + "epoch": 0.5784036800903882, + "grad_norm": 0.7003059983253479, + "learning_rate": 0.00014395847088632285, + "loss": 2.5814, + "step": 7167 + }, + { + "epoch": 0.5784843838269712, + "grad_norm": 0.8083534240722656, + "learning_rate": 0.0001439442905290762, + "loss": 2.6131, + "step": 7168 + }, + { + "epoch": 0.5785650875635542, + "grad_norm": 0.7068585157394409, + "learning_rate": 0.0001439301090766229, + "loss": 2.6027, + "step": 7169 + }, + { + "epoch": 0.5786457913001372, + "grad_norm": 0.7010494470596313, + "learning_rate": 0.00014391592652931653, + "loss": 2.5296, + "step": 7170 + }, + { + "epoch": 0.5787264950367202, + "grad_norm": 0.7577467560768127, + "learning_rate": 0.00014390174288751045, + "loss": 2.6347, + "step": 7171 + }, + { + "epoch": 0.5788071987733032, + "grad_norm": 0.643799364566803, + "learning_rate": 0.00014388755815155813, + "loss": 2.6152, + "step": 7172 + }, + { + "epoch": 0.5788879025098862, + "grad_norm": 0.740352988243103, + "learning_rate": 0.00014387337232181315, + "loss": 2.6123, + "step": 7173 + }, + { + "epoch": 0.5789686062464692, + "grad_norm": 0.7309309840202332, + "learning_rate": 0.00014385918539862907, + "loss": 2.6072, + "step": 7174 + }, + { + "epoch": 0.5790493099830523, + "grad_norm": 0.7237016558647156, + "learning_rate": 0.00014384499738235941, + "loss": 2.6375, + "step": 7175 + }, + { + "epoch": 0.5791300137196352, + "grad_norm": 0.6600970029830933, + "learning_rate": 0.00014383080827335784, + "loss": 2.5285, + "step": 7176 + }, + { + "epoch": 0.5792107174562182, + "grad_norm": 0.6822233200073242, + "learning_rate": 0.00014381661807197794, + "loss": 2.5497, + "step": 7177 + }, + { + "epoch": 0.5792914211928012, + "grad_norm": 0.6990383863449097, + "learning_rate": 0.00014380242677857337, + "loss": 2.6283, + "step": 7178 + }, + { + "epoch": 0.5793721249293843, + "grad_norm": 0.64422208070755, + "learning_rate": 0.00014378823439349783, + "loss": 2.5762, + "step": 7179 + }, + { + "epoch": 0.5794528286659673, + "grad_norm": 0.63804692029953, + "learning_rate": 0.00014377404091710501, + "loss": 2.5523, + "step": 7180 + }, + { + "epoch": 0.5795335324025502, + "grad_norm": 0.6978863477706909, + "learning_rate": 0.0001437598463497487, + "loss": 2.5089, + "step": 7181 + }, + { + "epoch": 0.5796142361391332, + "grad_norm": 0.7091087698936462, + "learning_rate": 0.00014374565069178257, + "loss": 2.7005, + "step": 7182 + }, + { + "epoch": 0.5796949398757163, + "grad_norm": 0.683659553527832, + "learning_rate": 0.00014373145394356053, + "loss": 2.5988, + "step": 7183 + }, + { + "epoch": 0.5797756436122993, + "grad_norm": 0.7352960705757141, + "learning_rate": 0.00014371725610543633, + "loss": 2.5671, + "step": 7184 + }, + { + "epoch": 0.5798563473488823, + "grad_norm": 0.6951913237571716, + "learning_rate": 0.00014370305717776382, + "loss": 2.5917, + "step": 7185 + }, + { + "epoch": 0.5799370510854652, + "grad_norm": 0.6644465923309326, + "learning_rate": 0.0001436888571608969, + "loss": 2.5954, + "step": 7186 + }, + { + "epoch": 0.5800177548220483, + "grad_norm": 0.7406458258628845, + "learning_rate": 0.00014367465605518942, + "loss": 2.6369, + "step": 7187 + }, + { + "epoch": 0.5800984585586313, + "grad_norm": 0.6724697351455688, + "learning_rate": 0.00014366045386099535, + "loss": 2.6227, + "step": 7188 + }, + { + "epoch": 0.5801791622952143, + "grad_norm": 0.6804977059364319, + "learning_rate": 0.00014364625057866867, + "loss": 2.6445, + "step": 7189 + }, + { + "epoch": 0.5802598660317972, + "grad_norm": 0.7020019888877869, + "learning_rate": 0.00014363204620856335, + "loss": 2.6733, + "step": 7190 + }, + { + "epoch": 0.5803405697683802, + "grad_norm": 0.6458491086959839, + "learning_rate": 0.00014361784075103332, + "loss": 2.572, + "step": 7191 + }, + { + "epoch": 0.5804212735049633, + "grad_norm": 0.7078056335449219, + "learning_rate": 0.00014360363420643272, + "loss": 2.7032, + "step": 7192 + }, + { + "epoch": 0.5805019772415463, + "grad_norm": 0.6367471814155579, + "learning_rate": 0.00014358942657511557, + "loss": 2.5369, + "step": 7193 + }, + { + "epoch": 0.5805826809781293, + "grad_norm": 0.7311955094337463, + "learning_rate": 0.00014357521785743596, + "loss": 2.6513, + "step": 7194 + }, + { + "epoch": 0.5806633847147122, + "grad_norm": 0.6957442164421082, + "learning_rate": 0.00014356100805374805, + "loss": 2.6512, + "step": 7195 + }, + { + "epoch": 0.5807440884512953, + "grad_norm": 0.7026693224906921, + "learning_rate": 0.0001435467971644059, + "loss": 2.6049, + "step": 7196 + }, + { + "epoch": 0.5808247921878783, + "grad_norm": 0.7337697744369507, + "learning_rate": 0.00014353258518976376, + "loss": 2.5516, + "step": 7197 + }, + { + "epoch": 0.5809054959244613, + "grad_norm": 0.6891856789588928, + "learning_rate": 0.00014351837213017577, + "loss": 2.5894, + "step": 7198 + }, + { + "epoch": 0.5809861996610443, + "grad_norm": 0.6710659265518188, + "learning_rate": 0.0001435041579859962, + "loss": 2.596, + "step": 7199 + }, + { + "epoch": 0.5810669033976273, + "grad_norm": 0.7637245059013367, + "learning_rate": 0.00014348994275757931, + "loss": 2.6278, + "step": 7200 + }, + { + "epoch": 0.5811476071342103, + "grad_norm": 0.7558664679527283, + "learning_rate": 0.00014347572644527934, + "loss": 2.6917, + "step": 7201 + }, + { + "epoch": 0.5812283108707933, + "grad_norm": 0.7254986763000488, + "learning_rate": 0.00014346150904945065, + "loss": 2.6161, + "step": 7202 + }, + { + "epoch": 0.5813090146073763, + "grad_norm": 0.7177211046218872, + "learning_rate": 0.00014344729057044753, + "loss": 2.555, + "step": 7203 + }, + { + "epoch": 0.5813897183439594, + "grad_norm": 0.6408729553222656, + "learning_rate": 0.00014343307100862432, + "loss": 2.6071, + "step": 7204 + }, + { + "epoch": 0.5814704220805423, + "grad_norm": 0.7399997711181641, + "learning_rate": 0.0001434188503643355, + "loss": 2.6013, + "step": 7205 + }, + { + "epoch": 0.5815511258171253, + "grad_norm": 0.7796236276626587, + "learning_rate": 0.00014340462863793543, + "loss": 2.603, + "step": 7206 + }, + { + "epoch": 0.5816318295537083, + "grad_norm": 0.7420137524604797, + "learning_rate": 0.00014339040582977855, + "loss": 2.5858, + "step": 7207 + }, + { + "epoch": 0.5817125332902914, + "grad_norm": 0.738042414188385, + "learning_rate": 0.00014337618194021928, + "loss": 2.592, + "step": 7208 + }, + { + "epoch": 0.5817932370268744, + "grad_norm": 0.6910614371299744, + "learning_rate": 0.00014336195696961222, + "loss": 2.6448, + "step": 7209 + }, + { + "epoch": 0.5818739407634573, + "grad_norm": 0.7838915586471558, + "learning_rate": 0.00014334773091831185, + "loss": 2.6257, + "step": 7210 + }, + { + "epoch": 0.5819546445000403, + "grad_norm": 0.7362141013145447, + "learning_rate": 0.0001433335037866727, + "loss": 2.6505, + "step": 7211 + }, + { + "epoch": 0.5820353482366234, + "grad_norm": 0.6892269253730774, + "learning_rate": 0.00014331927557504934, + "loss": 2.6518, + "step": 7212 + }, + { + "epoch": 0.5821160519732064, + "grad_norm": 0.7444556951522827, + "learning_rate": 0.0001433050462837964, + "loss": 2.6785, + "step": 7213 + }, + { + "epoch": 0.5821967557097893, + "grad_norm": 0.6948450207710266, + "learning_rate": 0.00014329081591326853, + "loss": 2.5753, + "step": 7214 + }, + { + "epoch": 0.5822774594463723, + "grad_norm": 0.713741660118103, + "learning_rate": 0.00014327658446382032, + "loss": 2.6425, + "step": 7215 + }, + { + "epoch": 0.5823581631829554, + "grad_norm": 0.7352245450019836, + "learning_rate": 0.00014326235193580657, + "loss": 2.6859, + "step": 7216 + }, + { + "epoch": 0.5824388669195384, + "grad_norm": 0.7151867151260376, + "learning_rate": 0.00014324811832958187, + "loss": 2.6106, + "step": 7217 + }, + { + "epoch": 0.5825195706561214, + "grad_norm": 0.7003469467163086, + "learning_rate": 0.000143233883645501, + "loss": 2.618, + "step": 7218 + }, + { + "epoch": 0.5826002743927043, + "grad_norm": 0.7139034867286682, + "learning_rate": 0.00014321964788391878, + "loss": 2.5772, + "step": 7219 + }, + { + "epoch": 0.5826809781292874, + "grad_norm": 0.6368305683135986, + "learning_rate": 0.00014320541104518992, + "loss": 2.5259, + "step": 7220 + }, + { + "epoch": 0.5827616818658704, + "grad_norm": 0.6921548247337341, + "learning_rate": 0.0001431911731296693, + "loss": 2.6403, + "step": 7221 + }, + { + "epoch": 0.5828423856024534, + "grad_norm": 0.6995570659637451, + "learning_rate": 0.00014317693413771175, + "loss": 2.6172, + "step": 7222 + }, + { + "epoch": 0.5829230893390364, + "grad_norm": 0.7557246088981628, + "learning_rate": 0.0001431626940696721, + "loss": 2.6347, + "step": 7223 + }, + { + "epoch": 0.5830037930756194, + "grad_norm": 0.6912205219268799, + "learning_rate": 0.00014314845292590528, + "loss": 2.5958, + "step": 7224 + }, + { + "epoch": 0.5830844968122024, + "grad_norm": 0.6896184682846069, + "learning_rate": 0.00014313421070676625, + "loss": 2.569, + "step": 7225 + }, + { + "epoch": 0.5831652005487854, + "grad_norm": 0.6900814771652222, + "learning_rate": 0.00014311996741260994, + "loss": 2.5466, + "step": 7226 + }, + { + "epoch": 0.5832459042853684, + "grad_norm": 0.7319771647453308, + "learning_rate": 0.00014310572304379132, + "loss": 2.6181, + "step": 7227 + }, + { + "epoch": 0.5833266080219515, + "grad_norm": 0.728138267993927, + "learning_rate": 0.0001430914776006654, + "loss": 2.6644, + "step": 7228 + }, + { + "epoch": 0.5834073117585344, + "grad_norm": 0.7361802458763123, + "learning_rate": 0.0001430772310835872, + "loss": 2.6079, + "step": 7229 + }, + { + "epoch": 0.5834880154951174, + "grad_norm": 0.6893376708030701, + "learning_rate": 0.00014306298349291182, + "loss": 2.5615, + "step": 7230 + }, + { + "epoch": 0.5835687192317004, + "grad_norm": 0.6661401987075806, + "learning_rate": 0.00014304873482899431, + "loss": 2.6028, + "step": 7231 + }, + { + "epoch": 0.5836494229682835, + "grad_norm": 0.6571504473686218, + "learning_rate": 0.0001430344850921898, + "loss": 2.5553, + "step": 7232 + }, + { + "epoch": 0.5837301267048665, + "grad_norm": 0.6878423690795898, + "learning_rate": 0.00014302023428285342, + "loss": 2.5336, + "step": 7233 + }, + { + "epoch": 0.5838108304414494, + "grad_norm": 0.768117368221283, + "learning_rate": 0.00014300598240134035, + "loss": 2.6036, + "step": 7234 + }, + { + "epoch": 0.5838915341780324, + "grad_norm": 0.6876625418663025, + "learning_rate": 0.0001429917294480058, + "loss": 2.6314, + "step": 7235 + }, + { + "epoch": 0.5839722379146155, + "grad_norm": 0.7146790027618408, + "learning_rate": 0.00014297747542320495, + "loss": 2.6029, + "step": 7236 + }, + { + "epoch": 0.5840529416511985, + "grad_norm": 0.7032392024993896, + "learning_rate": 0.00014296322032729308, + "loss": 2.6163, + "step": 7237 + }, + { + "epoch": 0.5841336453877815, + "grad_norm": 0.7323551177978516, + "learning_rate": 0.00014294896416062544, + "loss": 2.6706, + "step": 7238 + }, + { + "epoch": 0.5842143491243644, + "grad_norm": 0.7647258639335632, + "learning_rate": 0.00014293470692355734, + "loss": 2.6744, + "step": 7239 + }, + { + "epoch": 0.5842950528609475, + "grad_norm": 0.6824506521224976, + "learning_rate": 0.00014292044861644414, + "loss": 2.579, + "step": 7240 + }, + { + "epoch": 0.5843757565975305, + "grad_norm": 0.7553619742393494, + "learning_rate": 0.00014290618923964115, + "loss": 2.6196, + "step": 7241 + }, + { + "epoch": 0.5844564603341135, + "grad_norm": 0.6872109770774841, + "learning_rate": 0.00014289192879350375, + "loss": 2.555, + "step": 7242 + }, + { + "epoch": 0.5845371640706964, + "grad_norm": 0.664658784866333, + "learning_rate": 0.00014287766727838735, + "loss": 2.5781, + "step": 7243 + }, + { + "epoch": 0.5846178678072794, + "grad_norm": 0.6709543466567993, + "learning_rate": 0.00014286340469464744, + "loss": 2.6022, + "step": 7244 + }, + { + "epoch": 0.5846985715438625, + "grad_norm": 0.7236210107803345, + "learning_rate": 0.00014284914104263941, + "loss": 2.5609, + "step": 7245 + }, + { + "epoch": 0.5847792752804455, + "grad_norm": 0.6751740574836731, + "learning_rate": 0.0001428348763227188, + "loss": 2.5792, + "step": 7246 + }, + { + "epoch": 0.5848599790170285, + "grad_norm": 0.6684607267379761, + "learning_rate": 0.0001428206105352411, + "loss": 2.5705, + "step": 7247 + }, + { + "epoch": 0.5849406827536114, + "grad_norm": 0.6876732707023621, + "learning_rate": 0.00014280634368056186, + "loss": 2.6576, + "step": 7248 + }, + { + "epoch": 0.5850213864901945, + "grad_norm": 0.758637547492981, + "learning_rate": 0.0001427920757590366, + "loss": 2.6215, + "step": 7249 + }, + { + "epoch": 0.5851020902267775, + "grad_norm": 0.6839025020599365, + "learning_rate": 0.00014277780677102097, + "loss": 2.5898, + "step": 7250 + }, + { + "epoch": 0.5851827939633605, + "grad_norm": 0.6912671327590942, + "learning_rate": 0.00014276353671687056, + "loss": 2.5879, + "step": 7251 + }, + { + "epoch": 0.5852634976999435, + "grad_norm": 0.6727048754692078, + "learning_rate": 0.00014274926559694107, + "loss": 2.5501, + "step": 7252 + }, + { + "epoch": 0.5853442014365265, + "grad_norm": 0.7031945586204529, + "learning_rate": 0.00014273499341158812, + "loss": 2.625, + "step": 7253 + }, + { + "epoch": 0.5854249051731095, + "grad_norm": 0.6886943578720093, + "learning_rate": 0.0001427207201611674, + "loss": 2.6141, + "step": 7254 + }, + { + "epoch": 0.5855056089096925, + "grad_norm": 0.7906915545463562, + "learning_rate": 0.00014270644584603466, + "loss": 2.7189, + "step": 7255 + }, + { + "epoch": 0.5855863126462755, + "grad_norm": 0.6873704195022583, + "learning_rate": 0.00014269217046654567, + "loss": 2.6031, + "step": 7256 + }, + { + "epoch": 0.5856670163828586, + "grad_norm": 0.6655381321907043, + "learning_rate": 0.00014267789402305618, + "loss": 2.5747, + "step": 7257 + }, + { + "epoch": 0.5857477201194415, + "grad_norm": 0.6655673384666443, + "learning_rate": 0.00014266361651592204, + "loss": 2.625, + "step": 7258 + }, + { + "epoch": 0.5858284238560245, + "grad_norm": 0.6752866506576538, + "learning_rate": 0.00014264933794549901, + "loss": 2.5914, + "step": 7259 + }, + { + "epoch": 0.5859091275926075, + "grad_norm": 0.6680975556373596, + "learning_rate": 0.00014263505831214302, + "loss": 2.5572, + "step": 7260 + }, + { + "epoch": 0.5859898313291906, + "grad_norm": 0.6873607039451599, + "learning_rate": 0.00014262077761620994, + "loss": 2.6696, + "step": 7261 + }, + { + "epoch": 0.5860705350657736, + "grad_norm": 0.6745384335517883, + "learning_rate": 0.00014260649585805566, + "loss": 2.5738, + "step": 7262 + }, + { + "epoch": 0.5861512388023565, + "grad_norm": 0.6524637937545776, + "learning_rate": 0.0001425922130380361, + "loss": 2.6209, + "step": 7263 + }, + { + "epoch": 0.5862319425389395, + "grad_norm": 0.6729850172996521, + "learning_rate": 0.00014257792915650728, + "loss": 2.652, + "step": 7264 + }, + { + "epoch": 0.5863126462755226, + "grad_norm": 0.6713503003120422, + "learning_rate": 0.00014256364421382514, + "loss": 2.5658, + "step": 7265 + }, + { + "epoch": 0.5863933500121056, + "grad_norm": 0.6835616827011108, + "learning_rate": 0.00014254935821034575, + "loss": 2.5535, + "step": 7266 + }, + { + "epoch": 0.5864740537486886, + "grad_norm": 0.7425376176834106, + "learning_rate": 0.00014253507114642515, + "loss": 2.6369, + "step": 7267 + }, + { + "epoch": 0.5865547574852715, + "grad_norm": 0.6788069605827332, + "learning_rate": 0.00014252078302241932, + "loss": 2.601, + "step": 7268 + }, + { + "epoch": 0.5866354612218546, + "grad_norm": 0.6828538179397583, + "learning_rate": 0.0001425064938386845, + "loss": 2.5861, + "step": 7269 + }, + { + "epoch": 0.5867161649584376, + "grad_norm": 0.6763372421264648, + "learning_rate": 0.0001424922035955767, + "loss": 2.6035, + "step": 7270 + }, + { + "epoch": 0.5867968686950206, + "grad_norm": 0.6517930626869202, + "learning_rate": 0.0001424779122934521, + "loss": 2.5564, + "step": 7271 + }, + { + "epoch": 0.5868775724316035, + "grad_norm": 0.6633113622665405, + "learning_rate": 0.00014246361993266692, + "loss": 2.6163, + "step": 7272 + }, + { + "epoch": 0.5869582761681866, + "grad_norm": 0.684822678565979, + "learning_rate": 0.00014244932651357733, + "loss": 2.6057, + "step": 7273 + }, + { + "epoch": 0.5870389799047696, + "grad_norm": 0.7679704427719116, + "learning_rate": 0.00014243503203653952, + "loss": 2.6522, + "step": 7274 + }, + { + "epoch": 0.5871196836413526, + "grad_norm": 0.6834188103675842, + "learning_rate": 0.00014242073650190984, + "loss": 2.652, + "step": 7275 + }, + { + "epoch": 0.5872003873779356, + "grad_norm": 0.6903846859931946, + "learning_rate": 0.00014240643991004449, + "loss": 2.5894, + "step": 7276 + }, + { + "epoch": 0.5872810911145186, + "grad_norm": 0.7060866951942444, + "learning_rate": 0.0001423921422612998, + "loss": 2.5994, + "step": 7277 + }, + { + "epoch": 0.5873617948511016, + "grad_norm": 0.6646741628646851, + "learning_rate": 0.0001423778435560321, + "loss": 2.6432, + "step": 7278 + }, + { + "epoch": 0.5874424985876846, + "grad_norm": 0.6930218935012817, + "learning_rate": 0.0001423635437945978, + "loss": 2.6233, + "step": 7279 + }, + { + "epoch": 0.5875232023242676, + "grad_norm": 0.6914143562316895, + "learning_rate": 0.00014234924297735322, + "loss": 2.6143, + "step": 7280 + }, + { + "epoch": 0.5876039060608507, + "grad_norm": 0.7351366281509399, + "learning_rate": 0.0001423349411046548, + "loss": 2.6323, + "step": 7281 + }, + { + "epoch": 0.5876846097974336, + "grad_norm": 0.6813770532608032, + "learning_rate": 0.000142320638176859, + "loss": 2.5964, + "step": 7282 + }, + { + "epoch": 0.5877653135340166, + "grad_norm": 0.7049702405929565, + "learning_rate": 0.00014230633419432226, + "loss": 2.6284, + "step": 7283 + }, + { + "epoch": 0.5878460172705996, + "grad_norm": 0.7140446901321411, + "learning_rate": 0.00014229202915740107, + "loss": 2.6113, + "step": 7284 + }, + { + "epoch": 0.5879267210071827, + "grad_norm": 0.696588933467865, + "learning_rate": 0.00014227772306645196, + "loss": 2.6384, + "step": 7285 + }, + { + "epoch": 0.5880074247437657, + "grad_norm": 0.6800615787506104, + "learning_rate": 0.0001422634159218315, + "loss": 2.5743, + "step": 7286 + }, + { + "epoch": 0.5880881284803486, + "grad_norm": 0.7586596608161926, + "learning_rate": 0.00014224910772389624, + "loss": 2.6504, + "step": 7287 + }, + { + "epoch": 0.5881688322169316, + "grad_norm": 0.73286372423172, + "learning_rate": 0.00014223479847300278, + "loss": 2.6026, + "step": 7288 + }, + { + "epoch": 0.5882495359535147, + "grad_norm": 0.6808766722679138, + "learning_rate": 0.00014222048816950772, + "loss": 2.5822, + "step": 7289 + }, + { + "epoch": 0.5883302396900977, + "grad_norm": 0.7424919009208679, + "learning_rate": 0.0001422061768137677, + "loss": 2.6474, + "step": 7290 + }, + { + "epoch": 0.5884109434266807, + "grad_norm": 0.658183753490448, + "learning_rate": 0.00014219186440613948, + "loss": 2.6051, + "step": 7291 + }, + { + "epoch": 0.5884916471632636, + "grad_norm": 0.6693006157875061, + "learning_rate": 0.0001421775509469797, + "loss": 2.5774, + "step": 7292 + }, + { + "epoch": 0.5885723508998466, + "grad_norm": 0.7298646569252014, + "learning_rate": 0.00014216323643664508, + "loss": 2.5688, + "step": 7293 + }, + { + "epoch": 0.5886530546364297, + "grad_norm": 0.6665881276130676, + "learning_rate": 0.00014214892087549238, + "loss": 2.608, + "step": 7294 + }, + { + "epoch": 0.5887337583730127, + "grad_norm": 0.7220060229301453, + "learning_rate": 0.00014213460426387841, + "loss": 2.6078, + "step": 7295 + }, + { + "epoch": 0.5888144621095956, + "grad_norm": 0.6693970561027527, + "learning_rate": 0.00014212028660215997, + "loss": 2.597, + "step": 7296 + }, + { + "epoch": 0.5888951658461786, + "grad_norm": 0.682331919670105, + "learning_rate": 0.00014210596789069387, + "loss": 2.5752, + "step": 7297 + }, + { + "epoch": 0.5889758695827617, + "grad_norm": 0.7586890459060669, + "learning_rate": 0.000142091648129837, + "loss": 2.6878, + "step": 7298 + }, + { + "epoch": 0.5890565733193447, + "grad_norm": 0.6740901470184326, + "learning_rate": 0.00014207732731994624, + "loss": 2.6083, + "step": 7299 + }, + { + "epoch": 0.5891372770559277, + "grad_norm": 0.6959021091461182, + "learning_rate": 0.00014206300546137842, + "loss": 2.5765, + "step": 7300 + }, + { + "epoch": 0.5892179807925106, + "grad_norm": 0.7446078658103943, + "learning_rate": 0.0001420486825544906, + "loss": 2.662, + "step": 7301 + }, + { + "epoch": 0.5892986845290937, + "grad_norm": 0.7418847680091858, + "learning_rate": 0.0001420343585996397, + "loss": 2.6606, + "step": 7302 + }, + { + "epoch": 0.5893793882656767, + "grad_norm": 0.7185709476470947, + "learning_rate": 0.00014202003359718273, + "loss": 2.563, + "step": 7303 + }, + { + "epoch": 0.5894600920022597, + "grad_norm": 0.6960515379905701, + "learning_rate": 0.00014200570754747664, + "loss": 2.6182, + "step": 7304 + }, + { + "epoch": 0.5895407957388427, + "grad_norm": 0.6589705348014832, + "learning_rate": 0.00014199138045087849, + "loss": 2.6714, + "step": 7305 + }, + { + "epoch": 0.5896214994754257, + "grad_norm": 0.7027507424354553, + "learning_rate": 0.00014197705230774543, + "loss": 2.6145, + "step": 7306 + }, + { + "epoch": 0.5897022032120087, + "grad_norm": 0.6761246919631958, + "learning_rate": 0.00014196272311843447, + "loss": 2.5688, + "step": 7307 + }, + { + "epoch": 0.5897829069485917, + "grad_norm": 0.6618059277534485, + "learning_rate": 0.00014194839288330277, + "loss": 2.6194, + "step": 7308 + }, + { + "epoch": 0.5898636106851747, + "grad_norm": 0.7182614803314209, + "learning_rate": 0.00014193406160270747, + "loss": 2.5452, + "step": 7309 + }, + { + "epoch": 0.5899443144217578, + "grad_norm": 0.6830565333366394, + "learning_rate": 0.0001419197292770057, + "loss": 2.5728, + "step": 7310 + }, + { + "epoch": 0.5900250181583407, + "grad_norm": 0.6744499802589417, + "learning_rate": 0.00014190539590655475, + "loss": 2.5736, + "step": 7311 + }, + { + "epoch": 0.5901057218949237, + "grad_norm": 0.7177874445915222, + "learning_rate": 0.00014189106149171176, + "loss": 2.6271, + "step": 7312 + }, + { + "epoch": 0.5901864256315067, + "grad_norm": 0.6770105361938477, + "learning_rate": 0.000141876726032834, + "loss": 2.5924, + "step": 7313 + }, + { + "epoch": 0.5902671293680898, + "grad_norm": 0.7295818328857422, + "learning_rate": 0.0001418623895302788, + "loss": 2.644, + "step": 7314 + }, + { + "epoch": 0.5903478331046728, + "grad_norm": 0.7244859933853149, + "learning_rate": 0.00014184805198440338, + "loss": 2.5892, + "step": 7315 + }, + { + "epoch": 0.5904285368412557, + "grad_norm": 0.7067728638648987, + "learning_rate": 0.00014183371339556512, + "loss": 2.5985, + "step": 7316 + }, + { + "epoch": 0.5905092405778387, + "grad_norm": 0.6732490062713623, + "learning_rate": 0.0001418193737641214, + "loss": 2.5771, + "step": 7317 + }, + { + "epoch": 0.5905899443144218, + "grad_norm": 0.7087544202804565, + "learning_rate": 0.00014180503309042957, + "loss": 2.6373, + "step": 7318 + }, + { + "epoch": 0.5906706480510048, + "grad_norm": 0.772174596786499, + "learning_rate": 0.00014179069137484703, + "loss": 2.6262, + "step": 7319 + }, + { + "epoch": 0.5907513517875878, + "grad_norm": 0.6855718493461609, + "learning_rate": 0.00014177634861773118, + "loss": 2.6268, + "step": 7320 + }, + { + "epoch": 0.5908320555241707, + "grad_norm": 0.7168720364570618, + "learning_rate": 0.00014176200481943953, + "loss": 2.5892, + "step": 7321 + }, + { + "epoch": 0.5909127592607538, + "grad_norm": 0.7126333713531494, + "learning_rate": 0.0001417476599803296, + "loss": 2.6079, + "step": 7322 + }, + { + "epoch": 0.5909934629973368, + "grad_norm": 0.7451913952827454, + "learning_rate": 0.0001417333141007588, + "loss": 2.635, + "step": 7323 + }, + { + "epoch": 0.5910741667339198, + "grad_norm": 0.7405436038970947, + "learning_rate": 0.00014171896718108475, + "loss": 2.6014, + "step": 7324 + }, + { + "epoch": 0.5911548704705027, + "grad_norm": 0.7583999037742615, + "learning_rate": 0.00014170461922166498, + "loss": 2.6815, + "step": 7325 + }, + { + "epoch": 0.5912355742070858, + "grad_norm": 0.6653509140014648, + "learning_rate": 0.00014169027022285706, + "loss": 2.6153, + "step": 7326 + }, + { + "epoch": 0.5913162779436688, + "grad_norm": 0.7145548462867737, + "learning_rate": 0.00014167592018501864, + "loss": 2.6022, + "step": 7327 + }, + { + "epoch": 0.5913969816802518, + "grad_norm": 0.6996089816093445, + "learning_rate": 0.00014166156910850737, + "loss": 2.6586, + "step": 7328 + }, + { + "epoch": 0.5914776854168348, + "grad_norm": 0.735653281211853, + "learning_rate": 0.0001416472169936809, + "loss": 2.6084, + "step": 7329 + }, + { + "epoch": 0.5915583891534179, + "grad_norm": 0.695036768913269, + "learning_rate": 0.00014163286384089686, + "loss": 2.5058, + "step": 7330 + }, + { + "epoch": 0.5916390928900008, + "grad_norm": 0.9014756679534912, + "learning_rate": 0.00014161850965051307, + "loss": 2.5991, + "step": 7331 + }, + { + "epoch": 0.5917197966265838, + "grad_norm": 0.7079846858978271, + "learning_rate": 0.0001416041544228872, + "loss": 2.6067, + "step": 7332 + }, + { + "epoch": 0.5918005003631668, + "grad_norm": 0.7681204080581665, + "learning_rate": 0.00014158979815837705, + "loss": 2.5414, + "step": 7333 + }, + { + "epoch": 0.5918812040997499, + "grad_norm": 0.6501670479774475, + "learning_rate": 0.00014157544085734042, + "loss": 2.617, + "step": 7334 + }, + { + "epoch": 0.5919619078363328, + "grad_norm": 0.7573496103286743, + "learning_rate": 0.00014156108252013513, + "loss": 2.6341, + "step": 7335 + }, + { + "epoch": 0.5920426115729158, + "grad_norm": 0.6865558624267578, + "learning_rate": 0.00014154672314711903, + "loss": 2.6229, + "step": 7336 + }, + { + "epoch": 0.5921233153094988, + "grad_norm": 0.6859166622161865, + "learning_rate": 0.00014153236273864995, + "loss": 2.6149, + "step": 7337 + }, + { + "epoch": 0.5922040190460819, + "grad_norm": 0.7603647112846375, + "learning_rate": 0.00014151800129508585, + "loss": 2.5645, + "step": 7338 + }, + { + "epoch": 0.5922847227826649, + "grad_norm": 0.6740217208862305, + "learning_rate": 0.00014150363881678464, + "loss": 2.5883, + "step": 7339 + }, + { + "epoch": 0.5923654265192478, + "grad_norm": 0.6412263512611389, + "learning_rate": 0.00014148927530410426, + "loss": 2.576, + "step": 7340 + }, + { + "epoch": 0.5924461302558308, + "grad_norm": 0.669834315776825, + "learning_rate": 0.00014147491075740265, + "loss": 2.542, + "step": 7341 + }, + { + "epoch": 0.5925268339924139, + "grad_norm": 0.720024049282074, + "learning_rate": 0.00014146054517703786, + "loss": 2.6491, + "step": 7342 + }, + { + "epoch": 0.5926075377289969, + "grad_norm": 0.7191612720489502, + "learning_rate": 0.00014144617856336794, + "loss": 2.5933, + "step": 7343 + }, + { + "epoch": 0.5926882414655799, + "grad_norm": 0.7012050747871399, + "learning_rate": 0.00014143181091675087, + "loss": 2.5253, + "step": 7344 + }, + { + "epoch": 0.5927689452021628, + "grad_norm": 0.7825081944465637, + "learning_rate": 0.00014141744223754478, + "loss": 2.6225, + "step": 7345 + }, + { + "epoch": 0.5928496489387458, + "grad_norm": 0.6699295043945312, + "learning_rate": 0.00014140307252610775, + "loss": 2.5893, + "step": 7346 + }, + { + "epoch": 0.5929303526753289, + "grad_norm": 0.6668846011161804, + "learning_rate": 0.00014138870178279794, + "loss": 2.5944, + "step": 7347 + }, + { + "epoch": 0.5930110564119119, + "grad_norm": 0.7681072950363159, + "learning_rate": 0.0001413743300079735, + "loss": 2.5715, + "step": 7348 + }, + { + "epoch": 0.5930917601484949, + "grad_norm": 0.653075635433197, + "learning_rate": 0.00014135995720199258, + "loss": 2.5924, + "step": 7349 + }, + { + "epoch": 0.5931724638850778, + "grad_norm": 0.6807504892349243, + "learning_rate": 0.00014134558336521342, + "loss": 2.5395, + "step": 7350 + }, + { + "epoch": 0.5932531676216609, + "grad_norm": 0.681175708770752, + "learning_rate": 0.00014133120849799423, + "loss": 2.5401, + "step": 7351 + }, + { + "epoch": 0.5933338713582439, + "grad_norm": 0.7159900665283203, + "learning_rate": 0.0001413168326006933, + "loss": 2.5684, + "step": 7352 + }, + { + "epoch": 0.5934145750948269, + "grad_norm": 0.6517181992530823, + "learning_rate": 0.00014130245567366888, + "loss": 2.5887, + "step": 7353 + }, + { + "epoch": 0.5934952788314098, + "grad_norm": 0.6982731223106384, + "learning_rate": 0.00014128807771727936, + "loss": 2.5707, + "step": 7354 + }, + { + "epoch": 0.5935759825679929, + "grad_norm": 0.7003650069236755, + "learning_rate": 0.00014127369873188296, + "loss": 2.6415, + "step": 7355 + }, + { + "epoch": 0.5936566863045759, + "grad_norm": 0.7408339977264404, + "learning_rate": 0.0001412593187178381, + "loss": 2.5655, + "step": 7356 + }, + { + "epoch": 0.5937373900411589, + "grad_norm": 0.717218279838562, + "learning_rate": 0.00014124493767550317, + "loss": 2.586, + "step": 7357 + }, + { + "epoch": 0.5938180937777419, + "grad_norm": 0.6723458766937256, + "learning_rate": 0.00014123055560523657, + "loss": 2.593, + "step": 7358 + }, + { + "epoch": 0.593898797514325, + "grad_norm": 0.6861262321472168, + "learning_rate": 0.00014121617250739677, + "loss": 2.612, + "step": 7359 + }, + { + "epoch": 0.5939795012509079, + "grad_norm": 0.6811453104019165, + "learning_rate": 0.00014120178838234222, + "loss": 2.5708, + "step": 7360 + }, + { + "epoch": 0.5940602049874909, + "grad_norm": 0.6249656677246094, + "learning_rate": 0.00014118740323043136, + "loss": 2.5604, + "step": 7361 + }, + { + "epoch": 0.5941409087240739, + "grad_norm": 0.7671588659286499, + "learning_rate": 0.00014117301705202274, + "loss": 2.547, + "step": 7362 + }, + { + "epoch": 0.594221612460657, + "grad_norm": 0.6856057643890381, + "learning_rate": 0.00014115862984747496, + "loss": 2.6108, + "step": 7363 + }, + { + "epoch": 0.5943023161972399, + "grad_norm": 0.692331850528717, + "learning_rate": 0.0001411442416171465, + "loss": 2.6347, + "step": 7364 + }, + { + "epoch": 0.5943830199338229, + "grad_norm": 0.7256516814231873, + "learning_rate": 0.000141129852361396, + "loss": 2.6098, + "step": 7365 + }, + { + "epoch": 0.5944637236704059, + "grad_norm": 0.7522590160369873, + "learning_rate": 0.00014111546208058203, + "loss": 2.5688, + "step": 7366 + }, + { + "epoch": 0.594544427406989, + "grad_norm": 0.6915806531906128, + "learning_rate": 0.0001411010707750633, + "loss": 2.5899, + "step": 7367 + }, + { + "epoch": 0.594625131143572, + "grad_norm": 0.7355465292930603, + "learning_rate": 0.00014108667844519844, + "loss": 2.5212, + "step": 7368 + }, + { + "epoch": 0.5947058348801549, + "grad_norm": 0.731002926826477, + "learning_rate": 0.00014107228509134615, + "loss": 2.6369, + "step": 7369 + }, + { + "epoch": 0.5947865386167379, + "grad_norm": 0.6764423251152039, + "learning_rate": 0.0001410578907138652, + "loss": 2.6012, + "step": 7370 + }, + { + "epoch": 0.594867242353321, + "grad_norm": 0.7466071844100952, + "learning_rate": 0.0001410434953131142, + "loss": 2.5822, + "step": 7371 + }, + { + "epoch": 0.594947946089904, + "grad_norm": 0.7276137471199036, + "learning_rate": 0.00014102909888945205, + "loss": 2.6055, + "step": 7372 + }, + { + "epoch": 0.595028649826487, + "grad_norm": 0.7411746978759766, + "learning_rate": 0.00014101470144323752, + "loss": 2.6489, + "step": 7373 + }, + { + "epoch": 0.5951093535630699, + "grad_norm": 0.7511908411979675, + "learning_rate": 0.0001410003029748294, + "loss": 2.6268, + "step": 7374 + }, + { + "epoch": 0.595190057299653, + "grad_norm": 0.6623562574386597, + "learning_rate": 0.0001409859034845866, + "loss": 2.58, + "step": 7375 + }, + { + "epoch": 0.595270761036236, + "grad_norm": 0.6948572397232056, + "learning_rate": 0.00014097150297286785, + "loss": 2.5811, + "step": 7376 + }, + { + "epoch": 0.595351464772819, + "grad_norm": 0.6836786270141602, + "learning_rate": 0.0001409571014400322, + "loss": 2.5861, + "step": 7377 + }, + { + "epoch": 0.595432168509402, + "grad_norm": 0.6644341945648193, + "learning_rate": 0.00014094269888643854, + "loss": 2.6339, + "step": 7378 + }, + { + "epoch": 0.595512872245985, + "grad_norm": 0.6434289813041687, + "learning_rate": 0.0001409282953124458, + "loss": 2.4897, + "step": 7379 + }, + { + "epoch": 0.595593575982568, + "grad_norm": 0.6745082139968872, + "learning_rate": 0.0001409138907184129, + "loss": 2.522, + "step": 7380 + }, + { + "epoch": 0.595674279719151, + "grad_norm": 0.725321352481842, + "learning_rate": 0.0001408994851046989, + "loss": 2.5711, + "step": 7381 + }, + { + "epoch": 0.595754983455734, + "grad_norm": 0.7485500574111938, + "learning_rate": 0.00014088507847166283, + "loss": 2.6095, + "step": 7382 + }, + { + "epoch": 0.595835687192317, + "grad_norm": 0.721125602722168, + "learning_rate": 0.00014087067081966376, + "loss": 2.6762, + "step": 7383 + }, + { + "epoch": 0.5959163909289, + "grad_norm": 0.7099901437759399, + "learning_rate": 0.00014085626214906073, + "loss": 2.5667, + "step": 7384 + }, + { + "epoch": 0.595997094665483, + "grad_norm": 0.6889060139656067, + "learning_rate": 0.00014084185246021283, + "loss": 2.6723, + "step": 7385 + }, + { + "epoch": 0.596077798402066, + "grad_norm": 0.735698938369751, + "learning_rate": 0.00014082744175347923, + "loss": 2.6434, + "step": 7386 + }, + { + "epoch": 0.5961585021386491, + "grad_norm": 0.7603070735931396, + "learning_rate": 0.00014081303002921902, + "loss": 2.665, + "step": 7387 + }, + { + "epoch": 0.596239205875232, + "grad_norm": 0.6786355376243591, + "learning_rate": 0.00014079861728779141, + "loss": 2.5842, + "step": 7388 + }, + { + "epoch": 0.596319909611815, + "grad_norm": 0.6693331003189087, + "learning_rate": 0.00014078420352955565, + "loss": 2.6211, + "step": 7389 + }, + { + "epoch": 0.596400613348398, + "grad_norm": 0.74013751745224, + "learning_rate": 0.0001407697887548709, + "loss": 2.5886, + "step": 7390 + }, + { + "epoch": 0.5964813170849811, + "grad_norm": 0.739507257938385, + "learning_rate": 0.00014075537296409646, + "loss": 2.607, + "step": 7391 + }, + { + "epoch": 0.5965620208215641, + "grad_norm": 0.7121848464012146, + "learning_rate": 0.00014074095615759156, + "loss": 2.6052, + "step": 7392 + }, + { + "epoch": 0.596642724558147, + "grad_norm": 0.7526760697364807, + "learning_rate": 0.00014072653833571556, + "loss": 2.6051, + "step": 7393 + }, + { + "epoch": 0.59672342829473, + "grad_norm": 0.7867496609687805, + "learning_rate": 0.00014071211949882777, + "loss": 2.6228, + "step": 7394 + }, + { + "epoch": 0.596804132031313, + "grad_norm": 0.7527757883071899, + "learning_rate": 0.00014069769964728752, + "loss": 2.6793, + "step": 7395 + }, + { + "epoch": 0.5968848357678961, + "grad_norm": 0.7096899747848511, + "learning_rate": 0.00014068327878145423, + "loss": 2.5207, + "step": 7396 + }, + { + "epoch": 0.5969655395044791, + "grad_norm": 0.6863983869552612, + "learning_rate": 0.00014066885690168726, + "loss": 2.7059, + "step": 7397 + }, + { + "epoch": 0.597046243241062, + "grad_norm": 0.7782251834869385, + "learning_rate": 0.0001406544340083461, + "loss": 2.6232, + "step": 7398 + }, + { + "epoch": 0.597126946977645, + "grad_norm": 0.6944136619567871, + "learning_rate": 0.00014064001010179013, + "loss": 2.6134, + "step": 7399 + }, + { + "epoch": 0.5972076507142281, + "grad_norm": 0.7629704475402832, + "learning_rate": 0.00014062558518237892, + "loss": 2.5358, + "step": 7400 + }, + { + "epoch": 0.5972883544508111, + "grad_norm": 0.6922330260276794, + "learning_rate": 0.0001406111592504719, + "loss": 2.5457, + "step": 7401 + }, + { + "epoch": 0.597369058187394, + "grad_norm": 0.6992952227592468, + "learning_rate": 0.00014059673230642865, + "loss": 2.6241, + "step": 7402 + }, + { + "epoch": 0.597449761923977, + "grad_norm": 0.6587642431259155, + "learning_rate": 0.0001405823043506087, + "loss": 2.5867, + "step": 7403 + }, + { + "epoch": 0.5975304656605601, + "grad_norm": 0.6993013024330139, + "learning_rate": 0.00014056787538337164, + "loss": 2.6194, + "step": 7404 + }, + { + "epoch": 0.5976111693971431, + "grad_norm": 0.7605414986610413, + "learning_rate": 0.0001405534454050771, + "loss": 2.607, + "step": 7405 + }, + { + "epoch": 0.5976918731337261, + "grad_norm": 0.6624562740325928, + "learning_rate": 0.00014053901441608466, + "loss": 2.5962, + "step": 7406 + }, + { + "epoch": 0.597772576870309, + "grad_norm": 0.7432621717453003, + "learning_rate": 0.000140524582416754, + "loss": 2.6434, + "step": 7407 + }, + { + "epoch": 0.5978532806068921, + "grad_norm": 0.7184053659439087, + "learning_rate": 0.00014051014940744488, + "loss": 2.6139, + "step": 7408 + }, + { + "epoch": 0.5979339843434751, + "grad_norm": 0.7567455768585205, + "learning_rate": 0.00014049571538851687, + "loss": 2.5788, + "step": 7409 + }, + { + "epoch": 0.5980146880800581, + "grad_norm": 0.6759883761405945, + "learning_rate": 0.00014048128036032984, + "loss": 2.5584, + "step": 7410 + }, + { + "epoch": 0.5980953918166411, + "grad_norm": 0.7607424855232239, + "learning_rate": 0.00014046684432324343, + "loss": 2.5675, + "step": 7411 + }, + { + "epoch": 0.5981760955532242, + "grad_norm": 0.7134036421775818, + "learning_rate": 0.00014045240727761748, + "loss": 2.6805, + "step": 7412 + }, + { + "epoch": 0.5982567992898071, + "grad_norm": 0.6996984481811523, + "learning_rate": 0.00014043796922381184, + "loss": 2.5874, + "step": 7413 + }, + { + "epoch": 0.5983375030263901, + "grad_norm": 0.7098252177238464, + "learning_rate": 0.00014042353016218627, + "loss": 2.5895, + "step": 7414 + }, + { + "epoch": 0.5984182067629731, + "grad_norm": 0.7160520553588867, + "learning_rate": 0.00014040909009310068, + "loss": 2.6042, + "step": 7415 + }, + { + "epoch": 0.5984989104995562, + "grad_norm": 0.6727281212806702, + "learning_rate": 0.00014039464901691493, + "loss": 2.5356, + "step": 7416 + }, + { + "epoch": 0.5985796142361391, + "grad_norm": 0.7052881717681885, + "learning_rate": 0.00014038020693398891, + "loss": 2.6093, + "step": 7417 + }, + { + "epoch": 0.5986603179727221, + "grad_norm": 0.7151781916618347, + "learning_rate": 0.00014036576384468262, + "loss": 2.5776, + "step": 7418 + }, + { + "epoch": 0.5987410217093051, + "grad_norm": 0.7376574873924255, + "learning_rate": 0.0001403513197493559, + "loss": 2.6246, + "step": 7419 + }, + { + "epoch": 0.5988217254458882, + "grad_norm": 0.6882135272026062, + "learning_rate": 0.00014033687464836892, + "loss": 2.6028, + "step": 7420 + }, + { + "epoch": 0.5989024291824712, + "grad_norm": 0.6603999137878418, + "learning_rate": 0.00014032242854208153, + "loss": 2.5897, + "step": 7421 + }, + { + "epoch": 0.5989831329190541, + "grad_norm": 0.7001559734344482, + "learning_rate": 0.0001403079814308538, + "loss": 2.6033, + "step": 7422 + }, + { + "epoch": 0.5990638366556371, + "grad_norm": 0.7184363603591919, + "learning_rate": 0.00014029353331504582, + "loss": 2.7464, + "step": 7423 + }, + { + "epoch": 0.5991445403922202, + "grad_norm": 0.6794769167900085, + "learning_rate": 0.00014027908419501767, + "loss": 2.569, + "step": 7424 + }, + { + "epoch": 0.5992252441288032, + "grad_norm": 0.6846041083335876, + "learning_rate": 0.00014026463407112942, + "loss": 2.5995, + "step": 7425 + }, + { + "epoch": 0.5993059478653862, + "grad_norm": 0.6539658308029175, + "learning_rate": 0.00014025018294374129, + "loss": 2.5749, + "step": 7426 + }, + { + "epoch": 0.5993866516019691, + "grad_norm": 0.6572301983833313, + "learning_rate": 0.00014023573081321336, + "loss": 2.5312, + "step": 7427 + }, + { + "epoch": 0.5994673553385522, + "grad_norm": 0.7010765671730042, + "learning_rate": 0.00014022127767990581, + "loss": 2.5088, + "step": 7428 + }, + { + "epoch": 0.5995480590751352, + "grad_norm": 0.7193396091461182, + "learning_rate": 0.0001402068235441789, + "loss": 2.6193, + "step": 7429 + }, + { + "epoch": 0.5996287628117182, + "grad_norm": 0.6928533315658569, + "learning_rate": 0.00014019236840639288, + "loss": 2.6149, + "step": 7430 + }, + { + "epoch": 0.5997094665483012, + "grad_norm": 0.743658185005188, + "learning_rate": 0.00014017791226690794, + "loss": 2.5466, + "step": 7431 + }, + { + "epoch": 0.5997901702848842, + "grad_norm": 0.752082347869873, + "learning_rate": 0.0001401634551260844, + "loss": 2.6605, + "step": 7432 + }, + { + "epoch": 0.5998708740214672, + "grad_norm": 0.7280415296554565, + "learning_rate": 0.00014014899698428255, + "loss": 2.6128, + "step": 7433 + }, + { + "epoch": 0.5999515777580502, + "grad_norm": 0.7037710547447205, + "learning_rate": 0.0001401345378418628, + "loss": 2.6157, + "step": 7434 + }, + { + "epoch": 0.6000322814946332, + "grad_norm": 0.6984395980834961, + "learning_rate": 0.00014012007769918542, + "loss": 2.5579, + "step": 7435 + }, + { + "epoch": 0.6001129852312163, + "grad_norm": 0.6853601336479187, + "learning_rate": 0.00014010561655661085, + "loss": 2.6316, + "step": 7436 + }, + { + "epoch": 0.6001936889677992, + "grad_norm": 0.7551750540733337, + "learning_rate": 0.00014009115441449948, + "loss": 2.6671, + "step": 7437 + }, + { + "epoch": 0.6002743927043822, + "grad_norm": 0.7680155038833618, + "learning_rate": 0.0001400766912732117, + "loss": 2.6301, + "step": 7438 + }, + { + "epoch": 0.6003550964409652, + "grad_norm": 0.6757175922393799, + "learning_rate": 0.00014006222713310807, + "loss": 2.5584, + "step": 7439 + }, + { + "epoch": 0.6004358001775483, + "grad_norm": 0.6636163592338562, + "learning_rate": 0.00014004776199454897, + "loss": 2.5437, + "step": 7440 + }, + { + "epoch": 0.6005165039141312, + "grad_norm": 0.7317774891853333, + "learning_rate": 0.00014003329585789498, + "loss": 2.594, + "step": 7441 + }, + { + "epoch": 0.6005972076507142, + "grad_norm": 0.6903451681137085, + "learning_rate": 0.0001400188287235066, + "loss": 2.6175, + "step": 7442 + }, + { + "epoch": 0.6006779113872972, + "grad_norm": 0.7137858867645264, + "learning_rate": 0.00014000436059174437, + "loss": 2.6411, + "step": 7443 + }, + { + "epoch": 0.6007586151238803, + "grad_norm": 0.7124149203300476, + "learning_rate": 0.00013998989146296893, + "loss": 2.6562, + "step": 7444 + }, + { + "epoch": 0.6008393188604633, + "grad_norm": 0.7518175840377808, + "learning_rate": 0.00013997542133754087, + "loss": 2.6213, + "step": 7445 + }, + { + "epoch": 0.6009200225970462, + "grad_norm": 0.6843053698539734, + "learning_rate": 0.0001399609502158208, + "loss": 2.6099, + "step": 7446 + }, + { + "epoch": 0.6010007263336292, + "grad_norm": 0.6668025255203247, + "learning_rate": 0.0001399464780981694, + "loss": 2.609, + "step": 7447 + }, + { + "epoch": 0.6010814300702122, + "grad_norm": 0.6849119067192078, + "learning_rate": 0.00013993200498494735, + "loss": 2.6097, + "step": 7448 + }, + { + "epoch": 0.6011621338067953, + "grad_norm": 0.7767381072044373, + "learning_rate": 0.0001399175308765153, + "loss": 2.6351, + "step": 7449 + }, + { + "epoch": 0.6012428375433783, + "grad_norm": 0.6630256772041321, + "learning_rate": 0.0001399030557732341, + "loss": 2.5924, + "step": 7450 + }, + { + "epoch": 0.6013235412799612, + "grad_norm": 0.6918755769729614, + "learning_rate": 0.00013988857967546444, + "loss": 2.6205, + "step": 7451 + }, + { + "epoch": 0.6014042450165442, + "grad_norm": 0.7179181575775146, + "learning_rate": 0.00013987410258356708, + "loss": 2.5971, + "step": 7452 + }, + { + "epoch": 0.6014849487531273, + "grad_norm": 0.7233672738075256, + "learning_rate": 0.00013985962449790284, + "loss": 2.595, + "step": 7453 + }, + { + "epoch": 0.6015656524897103, + "grad_norm": 0.6861593127250671, + "learning_rate": 0.0001398451454188326, + "loss": 2.6127, + "step": 7454 + }, + { + "epoch": 0.6016463562262933, + "grad_norm": 0.6818981170654297, + "learning_rate": 0.00013983066534671714, + "loss": 2.5923, + "step": 7455 + }, + { + "epoch": 0.6017270599628762, + "grad_norm": 0.700036346912384, + "learning_rate": 0.0001398161842819174, + "loss": 2.5474, + "step": 7456 + }, + { + "epoch": 0.6018077636994593, + "grad_norm": 0.6884824633598328, + "learning_rate": 0.00013980170222479426, + "loss": 2.6041, + "step": 7457 + }, + { + "epoch": 0.6018884674360423, + "grad_norm": 0.6745120286941528, + "learning_rate": 0.00013978721917570866, + "loss": 2.6638, + "step": 7458 + }, + { + "epoch": 0.6019691711726253, + "grad_norm": 0.6886256337165833, + "learning_rate": 0.00013977273513502157, + "loss": 2.5733, + "step": 7459 + }, + { + "epoch": 0.6020498749092082, + "grad_norm": 0.7220930457115173, + "learning_rate": 0.00013975825010309394, + "loss": 2.5739, + "step": 7460 + }, + { + "epoch": 0.6021305786457913, + "grad_norm": 0.7281780242919922, + "learning_rate": 0.0001397437640802868, + "loss": 2.5646, + "step": 7461 + }, + { + "epoch": 0.6022112823823743, + "grad_norm": 0.7316896915435791, + "learning_rate": 0.00013972927706696115, + "loss": 2.6532, + "step": 7462 + }, + { + "epoch": 0.6022919861189573, + "grad_norm": 0.6288646459579468, + "learning_rate": 0.00013971478906347806, + "loss": 2.5753, + "step": 7463 + }, + { + "epoch": 0.6023726898555403, + "grad_norm": 0.7110145688056946, + "learning_rate": 0.00013970030007019862, + "loss": 2.6421, + "step": 7464 + }, + { + "epoch": 0.6024533935921234, + "grad_norm": 0.7437754273414612, + "learning_rate": 0.00013968581008748393, + "loss": 2.585, + "step": 7465 + }, + { + "epoch": 0.6025340973287063, + "grad_norm": 0.6839718222618103, + "learning_rate": 0.00013967131911569514, + "loss": 2.6249, + "step": 7466 + }, + { + "epoch": 0.6026148010652893, + "grad_norm": 0.7358397841453552, + "learning_rate": 0.00013965682715519332, + "loss": 2.597, + "step": 7467 + }, + { + "epoch": 0.6026955048018723, + "grad_norm": 0.673651397228241, + "learning_rate": 0.00013964233420633973, + "loss": 2.6111, + "step": 7468 + }, + { + "epoch": 0.6027762085384554, + "grad_norm": 0.7390083074569702, + "learning_rate": 0.00013962784026949553, + "loss": 2.6131, + "step": 7469 + }, + { + "epoch": 0.6028569122750383, + "grad_norm": 0.6902220249176025, + "learning_rate": 0.00013961334534502197, + "loss": 2.6116, + "step": 7470 + }, + { + "epoch": 0.6029376160116213, + "grad_norm": 0.6946651935577393, + "learning_rate": 0.00013959884943328033, + "loss": 2.6307, + "step": 7471 + }, + { + "epoch": 0.6030183197482043, + "grad_norm": 0.7277294993400574, + "learning_rate": 0.00013958435253463183, + "loss": 2.6065, + "step": 7472 + }, + { + "epoch": 0.6030990234847874, + "grad_norm": 0.743833601474762, + "learning_rate": 0.00013956985464943776, + "loss": 2.6644, + "step": 7473 + }, + { + "epoch": 0.6031797272213704, + "grad_norm": 0.6480288505554199, + "learning_rate": 0.0001395553557780595, + "loss": 2.5386, + "step": 7474 + }, + { + "epoch": 0.6032604309579533, + "grad_norm": 0.799443781375885, + "learning_rate": 0.00013954085592085834, + "loss": 2.5653, + "step": 7475 + }, + { + "epoch": 0.6033411346945363, + "grad_norm": 0.6790705323219299, + "learning_rate": 0.00013952635507819575, + "loss": 2.6229, + "step": 7476 + }, + { + "epoch": 0.6034218384311194, + "grad_norm": 0.6871588826179504, + "learning_rate": 0.00013951185325043302, + "loss": 2.6514, + "step": 7477 + }, + { + "epoch": 0.6035025421677024, + "grad_norm": 0.7236921787261963, + "learning_rate": 0.00013949735043793164, + "loss": 2.5931, + "step": 7478 + }, + { + "epoch": 0.6035832459042854, + "grad_norm": 0.6888518929481506, + "learning_rate": 0.00013948284664105305, + "loss": 2.6408, + "step": 7479 + }, + { + "epoch": 0.6036639496408683, + "grad_norm": 0.7292625904083252, + "learning_rate": 0.00013946834186015868, + "loss": 2.5829, + "step": 7480 + }, + { + "epoch": 0.6037446533774514, + "grad_norm": 0.6755293607711792, + "learning_rate": 0.00013945383609561009, + "loss": 2.5917, + "step": 7481 + }, + { + "epoch": 0.6038253571140344, + "grad_norm": 0.6808032989501953, + "learning_rate": 0.00013943932934776877, + "loss": 2.6103, + "step": 7482 + }, + { + "epoch": 0.6039060608506174, + "grad_norm": 0.747173547744751, + "learning_rate": 0.00013942482161699625, + "loss": 2.624, + "step": 7483 + }, + { + "epoch": 0.6039867645872004, + "grad_norm": 0.7265594005584717, + "learning_rate": 0.00013941031290365413, + "loss": 2.5672, + "step": 7484 + }, + { + "epoch": 0.6040674683237834, + "grad_norm": 0.6434060335159302, + "learning_rate": 0.000139395803208104, + "loss": 2.5885, + "step": 7485 + }, + { + "epoch": 0.6041481720603664, + "grad_norm": 0.7148730754852295, + "learning_rate": 0.00013938129253070747, + "loss": 2.6466, + "step": 7486 + }, + { + "epoch": 0.6042288757969494, + "grad_norm": 0.7724708318710327, + "learning_rate": 0.00013936678087182616, + "loss": 2.6364, + "step": 7487 + }, + { + "epoch": 0.6043095795335324, + "grad_norm": 0.6886702179908752, + "learning_rate": 0.0001393522682318218, + "loss": 2.5844, + "step": 7488 + }, + { + "epoch": 0.6043902832701155, + "grad_norm": 0.6501082181930542, + "learning_rate": 0.00013933775461105603, + "loss": 2.5767, + "step": 7489 + }, + { + "epoch": 0.6044709870066984, + "grad_norm": 0.7333959341049194, + "learning_rate": 0.00013932324000989058, + "loss": 2.5735, + "step": 7490 + }, + { + "epoch": 0.6045516907432814, + "grad_norm": 0.7057361602783203, + "learning_rate": 0.00013930872442868722, + "loss": 2.627, + "step": 7491 + }, + { + "epoch": 0.6046323944798644, + "grad_norm": 0.705078661441803, + "learning_rate": 0.00013929420786780767, + "loss": 2.6012, + "step": 7492 + }, + { + "epoch": 0.6047130982164475, + "grad_norm": 0.7192156314849854, + "learning_rate": 0.00013927969032761378, + "loss": 2.5594, + "step": 7493 + }, + { + "epoch": 0.6047938019530305, + "grad_norm": 0.703116774559021, + "learning_rate": 0.00013926517180846726, + "loss": 2.6099, + "step": 7494 + }, + { + "epoch": 0.6048745056896134, + "grad_norm": 0.6970264315605164, + "learning_rate": 0.00013925065231073006, + "loss": 2.5832, + "step": 7495 + }, + { + "epoch": 0.6049552094261964, + "grad_norm": 0.7308031320571899, + "learning_rate": 0.00013923613183476402, + "loss": 2.586, + "step": 7496 + }, + { + "epoch": 0.6050359131627794, + "grad_norm": 0.7212777137756348, + "learning_rate": 0.00013922161038093097, + "loss": 2.6374, + "step": 7497 + }, + { + "epoch": 0.6051166168993625, + "grad_norm": 0.6644641757011414, + "learning_rate": 0.0001392070879495929, + "loss": 2.5226, + "step": 7498 + }, + { + "epoch": 0.6051973206359454, + "grad_norm": 0.6683016419410706, + "learning_rate": 0.0001391925645411117, + "loss": 2.5279, + "step": 7499 + }, + { + "epoch": 0.6052780243725284, + "grad_norm": 0.7341439127922058, + "learning_rate": 0.00013917804015584932, + "loss": 2.5995, + "step": 7500 + }, + { + "epoch": 0.6053587281091114, + "grad_norm": 0.753942608833313, + "learning_rate": 0.0001391635147941678, + "loss": 2.5706, + "step": 7501 + }, + { + "epoch": 0.6054394318456945, + "grad_norm": 0.7541958093643188, + "learning_rate": 0.00013914898845642908, + "loss": 2.6365, + "step": 7502 + }, + { + "epoch": 0.6055201355822775, + "grad_norm": 0.6583349108695984, + "learning_rate": 0.00013913446114299528, + "loss": 2.534, + "step": 7503 + }, + { + "epoch": 0.6056008393188604, + "grad_norm": 0.6545756459236145, + "learning_rate": 0.00013911993285422835, + "loss": 2.5443, + "step": 7504 + }, + { + "epoch": 0.6056815430554434, + "grad_norm": 0.8290210366249084, + "learning_rate": 0.00013910540359049045, + "loss": 2.6196, + "step": 7505 + }, + { + "epoch": 0.6057622467920265, + "grad_norm": 0.7032577395439148, + "learning_rate": 0.0001390908733521437, + "loss": 2.6575, + "step": 7506 + }, + { + "epoch": 0.6058429505286095, + "grad_norm": 0.7018071413040161, + "learning_rate": 0.0001390763421395502, + "loss": 2.6272, + "step": 7507 + }, + { + "epoch": 0.6059236542651925, + "grad_norm": 0.6288552284240723, + "learning_rate": 0.00013906180995307206, + "loss": 2.5295, + "step": 7508 + }, + { + "epoch": 0.6060043580017754, + "grad_norm": 0.7013774514198303, + "learning_rate": 0.00013904727679307153, + "loss": 2.5669, + "step": 7509 + }, + { + "epoch": 0.6060850617383585, + "grad_norm": 0.6811630129814148, + "learning_rate": 0.00013903274265991082, + "loss": 2.5827, + "step": 7510 + }, + { + "epoch": 0.6061657654749415, + "grad_norm": 0.6690269112586975, + "learning_rate": 0.0001390182075539521, + "loss": 2.5947, + "step": 7511 + }, + { + "epoch": 0.6062464692115245, + "grad_norm": 0.6946289539337158, + "learning_rate": 0.00013900367147555768, + "loss": 2.59, + "step": 7512 + }, + { + "epoch": 0.6063271729481075, + "grad_norm": 0.7302843332290649, + "learning_rate": 0.0001389891344250898, + "loss": 2.5994, + "step": 7513 + }, + { + "epoch": 0.6064078766846905, + "grad_norm": 0.7462306022644043, + "learning_rate": 0.00013897459640291074, + "loss": 2.5983, + "step": 7514 + }, + { + "epoch": 0.6064885804212735, + "grad_norm": 0.6948123574256897, + "learning_rate": 0.0001389600574093829, + "loss": 2.5737, + "step": 7515 + }, + { + "epoch": 0.6065692841578565, + "grad_norm": 0.6897372007369995, + "learning_rate": 0.00013894551744486857, + "loss": 2.607, + "step": 7516 + }, + { + "epoch": 0.6066499878944395, + "grad_norm": 0.6808069348335266, + "learning_rate": 0.00013893097650973015, + "loss": 2.5712, + "step": 7517 + }, + { + "epoch": 0.6067306916310226, + "grad_norm": 0.7000731229782104, + "learning_rate": 0.00013891643460433, + "loss": 2.5654, + "step": 7518 + }, + { + "epoch": 0.6068113953676055, + "grad_norm": 0.7197545766830444, + "learning_rate": 0.0001389018917290306, + "loss": 2.5705, + "step": 7519 + }, + { + "epoch": 0.6068920991041885, + "grad_norm": 0.7001069188117981, + "learning_rate": 0.00013888734788419433, + "loss": 2.5934, + "step": 7520 + }, + { + "epoch": 0.6069728028407715, + "grad_norm": 0.7480459213256836, + "learning_rate": 0.00013887280307018377, + "loss": 2.5211, + "step": 7521 + }, + { + "epoch": 0.6070535065773546, + "grad_norm": 0.6913945078849792, + "learning_rate": 0.00013885825728736132, + "loss": 2.6013, + "step": 7522 + }, + { + "epoch": 0.6071342103139376, + "grad_norm": 0.6527336239814758, + "learning_rate": 0.00013884371053608948, + "loss": 2.5901, + "step": 7523 + }, + { + "epoch": 0.6072149140505205, + "grad_norm": 0.6897335052490234, + "learning_rate": 0.00013882916281673086, + "loss": 2.5389, + "step": 7524 + }, + { + "epoch": 0.6072956177871035, + "grad_norm": 0.7159501910209656, + "learning_rate": 0.00013881461412964798, + "loss": 2.5399, + "step": 7525 + }, + { + "epoch": 0.6073763215236866, + "grad_norm": 0.6744364500045776, + "learning_rate": 0.00013880006447520346, + "loss": 2.5658, + "step": 7526 + }, + { + "epoch": 0.6074570252602696, + "grad_norm": 0.819950520992279, + "learning_rate": 0.00013878551385375994, + "loss": 2.6143, + "step": 7527 + }, + { + "epoch": 0.6075377289968525, + "grad_norm": 0.744293212890625, + "learning_rate": 0.00013877096226568, + "loss": 2.6565, + "step": 7528 + }, + { + "epoch": 0.6076184327334355, + "grad_norm": 0.7121254205703735, + "learning_rate": 0.00013875640971132636, + "loss": 2.6151, + "step": 7529 + }, + { + "epoch": 0.6076991364700186, + "grad_norm": 0.7616204023361206, + "learning_rate": 0.00013874185619106163, + "loss": 2.6395, + "step": 7530 + }, + { + "epoch": 0.6077798402066016, + "grad_norm": 0.7481076121330261, + "learning_rate": 0.0001387273017052486, + "loss": 2.597, + "step": 7531 + }, + { + "epoch": 0.6078605439431846, + "grad_norm": 0.6660816073417664, + "learning_rate": 0.00013871274625425, + "loss": 2.5696, + "step": 7532 + }, + { + "epoch": 0.6079412476797675, + "grad_norm": 0.7491411566734314, + "learning_rate": 0.00013869818983842854, + "loss": 2.552, + "step": 7533 + }, + { + "epoch": 0.6080219514163506, + "grad_norm": 0.7130792140960693, + "learning_rate": 0.00013868363245814704, + "loss": 2.5959, + "step": 7534 + }, + { + "epoch": 0.6081026551529336, + "grad_norm": 0.7157341241836548, + "learning_rate": 0.00013866907411376827, + "loss": 2.5598, + "step": 7535 + }, + { + "epoch": 0.6081833588895166, + "grad_norm": 0.7750656008720398, + "learning_rate": 0.00013865451480565513, + "loss": 2.6217, + "step": 7536 + }, + { + "epoch": 0.6082640626260996, + "grad_norm": 0.6915080547332764, + "learning_rate": 0.00013863995453417043, + "loss": 2.6211, + "step": 7537 + }, + { + "epoch": 0.6083447663626826, + "grad_norm": 0.7245940566062927, + "learning_rate": 0.00013862539329967706, + "loss": 2.5619, + "step": 7538 + }, + { + "epoch": 0.6084254700992656, + "grad_norm": 0.8884119391441345, + "learning_rate": 0.0001386108311025379, + "loss": 2.6349, + "step": 7539 + }, + { + "epoch": 0.6085061738358486, + "grad_norm": 0.7889477610588074, + "learning_rate": 0.0001385962679431159, + "loss": 2.6169, + "step": 7540 + }, + { + "epoch": 0.6085868775724316, + "grad_norm": 0.7187505960464478, + "learning_rate": 0.00013858170382177403, + "loss": 2.5582, + "step": 7541 + }, + { + "epoch": 0.6086675813090147, + "grad_norm": 0.7502198219299316, + "learning_rate": 0.00013856713873887526, + "loss": 2.5418, + "step": 7542 + }, + { + "epoch": 0.6087482850455976, + "grad_norm": 0.797704815864563, + "learning_rate": 0.00013855257269478256, + "loss": 2.5764, + "step": 7543 + }, + { + "epoch": 0.6088289887821806, + "grad_norm": 0.7651431560516357, + "learning_rate": 0.00013853800568985896, + "loss": 2.5995, + "step": 7544 + }, + { + "epoch": 0.6089096925187636, + "grad_norm": 0.7048482298851013, + "learning_rate": 0.00013852343772446753, + "loss": 2.5656, + "step": 7545 + }, + { + "epoch": 0.6089903962553467, + "grad_norm": 0.7252251505851746, + "learning_rate": 0.00013850886879897135, + "loss": 2.6509, + "step": 7546 + }, + { + "epoch": 0.6090710999919297, + "grad_norm": 0.7220067381858826, + "learning_rate": 0.00013849429891373344, + "loss": 2.5558, + "step": 7547 + }, + { + "epoch": 0.6091518037285126, + "grad_norm": 0.7672600746154785, + "learning_rate": 0.000138479728069117, + "loss": 2.5682, + "step": 7548 + }, + { + "epoch": 0.6092325074650956, + "grad_norm": 0.7753601670265198, + "learning_rate": 0.0001384651562654852, + "loss": 2.6459, + "step": 7549 + }, + { + "epoch": 0.6093132112016786, + "grad_norm": 0.7346559166908264, + "learning_rate": 0.00013845058350320108, + "loss": 2.5988, + "step": 7550 + }, + { + "epoch": 0.6093939149382617, + "grad_norm": 0.7386072874069214, + "learning_rate": 0.00013843600978262797, + "loss": 2.6366, + "step": 7551 + }, + { + "epoch": 0.6094746186748446, + "grad_norm": 0.7114188075065613, + "learning_rate": 0.00013842143510412898, + "loss": 2.5515, + "step": 7552 + }, + { + "epoch": 0.6095553224114276, + "grad_norm": 0.6836373209953308, + "learning_rate": 0.00013840685946806742, + "loss": 2.6301, + "step": 7553 + }, + { + "epoch": 0.6096360261480106, + "grad_norm": 0.7548927068710327, + "learning_rate": 0.00013839228287480652, + "loss": 2.6508, + "step": 7554 + }, + { + "epoch": 0.6097167298845937, + "grad_norm": 0.6931679248809814, + "learning_rate": 0.00013837770532470957, + "loss": 2.5535, + "step": 7555 + }, + { + "epoch": 0.6097974336211767, + "grad_norm": 0.7621145248413086, + "learning_rate": 0.00013836312681813988, + "loss": 2.6831, + "step": 7556 + }, + { + "epoch": 0.6098781373577596, + "grad_norm": 0.6735427975654602, + "learning_rate": 0.00013834854735546079, + "loss": 2.5338, + "step": 7557 + }, + { + "epoch": 0.6099588410943426, + "grad_norm": 0.7157600522041321, + "learning_rate": 0.00013833396693703565, + "loss": 2.5713, + "step": 7558 + }, + { + "epoch": 0.6100395448309257, + "grad_norm": 0.718032956123352, + "learning_rate": 0.00013831938556322789, + "loss": 2.5625, + "step": 7559 + }, + { + "epoch": 0.6101202485675087, + "grad_norm": 0.7290309071540833, + "learning_rate": 0.0001383048032344008, + "loss": 2.5956, + "step": 7560 + }, + { + "epoch": 0.6102009523040917, + "grad_norm": 0.675470769405365, + "learning_rate": 0.00013829021995091792, + "loss": 2.6053, + "step": 7561 + }, + { + "epoch": 0.6102816560406746, + "grad_norm": 0.7348767518997192, + "learning_rate": 0.00013827563571314268, + "loss": 2.6174, + "step": 7562 + }, + { + "epoch": 0.6103623597772577, + "grad_norm": 0.64495849609375, + "learning_rate": 0.00013826105052143852, + "loss": 2.5923, + "step": 7563 + }, + { + "epoch": 0.6104430635138407, + "grad_norm": 0.7379264235496521, + "learning_rate": 0.000138246464376169, + "loss": 2.6438, + "step": 7564 + }, + { + "epoch": 0.6105237672504237, + "grad_norm": 0.7802134156227112, + "learning_rate": 0.00013823187727769756, + "loss": 2.5884, + "step": 7565 + }, + { + "epoch": 0.6106044709870067, + "grad_norm": 0.6907222867012024, + "learning_rate": 0.00013821728922638782, + "loss": 2.596, + "step": 7566 + }, + { + "epoch": 0.6106851747235897, + "grad_norm": 0.6924182176589966, + "learning_rate": 0.00013820270022260335, + "loss": 2.5631, + "step": 7567 + }, + { + "epoch": 0.6107658784601727, + "grad_norm": 0.729258120059967, + "learning_rate": 0.0001381881102667077, + "loss": 2.5761, + "step": 7568 + }, + { + "epoch": 0.6108465821967557, + "grad_norm": 0.7141425013542175, + "learning_rate": 0.00013817351935906455, + "loss": 2.6214, + "step": 7569 + }, + { + "epoch": 0.6109272859333387, + "grad_norm": 0.7564505338668823, + "learning_rate": 0.00013815892750003748, + "loss": 2.6338, + "step": 7570 + }, + { + "epoch": 0.6110079896699218, + "grad_norm": 0.674705982208252, + "learning_rate": 0.00013814433468999022, + "loss": 2.5604, + "step": 7571 + }, + { + "epoch": 0.6110886934065047, + "grad_norm": 0.6956657767295837, + "learning_rate": 0.00013812974092928642, + "loss": 2.5805, + "step": 7572 + }, + { + "epoch": 0.6111693971430877, + "grad_norm": 0.7393823862075806, + "learning_rate": 0.0001381151462182898, + "loss": 2.6312, + "step": 7573 + }, + { + "epoch": 0.6112501008796707, + "grad_norm": 0.7048184275627136, + "learning_rate": 0.00013810055055736407, + "loss": 2.5948, + "step": 7574 + }, + { + "epoch": 0.6113308046162538, + "grad_norm": 0.748798668384552, + "learning_rate": 0.0001380859539468731, + "loss": 2.5815, + "step": 7575 + }, + { + "epoch": 0.6114115083528368, + "grad_norm": 0.7146531343460083, + "learning_rate": 0.00013807135638718048, + "loss": 2.5803, + "step": 7576 + }, + { + "epoch": 0.6114922120894197, + "grad_norm": 0.6883770823478699, + "learning_rate": 0.00013805675787865025, + "loss": 2.6005, + "step": 7577 + }, + { + "epoch": 0.6115729158260027, + "grad_norm": 0.7808375358581543, + "learning_rate": 0.0001380421584216461, + "loss": 2.6539, + "step": 7578 + }, + { + "epoch": 0.6116536195625858, + "grad_norm": 0.6919417977333069, + "learning_rate": 0.00013802755801653192, + "loss": 2.5812, + "step": 7579 + }, + { + "epoch": 0.6117343232991688, + "grad_norm": 0.6651085615158081, + "learning_rate": 0.0001380129566636716, + "loss": 2.5952, + "step": 7580 + }, + { + "epoch": 0.6118150270357517, + "grad_norm": 0.7806586623191833, + "learning_rate": 0.00013799835436342897, + "loss": 2.6509, + "step": 7581 + }, + { + "epoch": 0.6118957307723347, + "grad_norm": 0.6522969007492065, + "learning_rate": 0.0001379837511161681, + "loss": 2.606, + "step": 7582 + }, + { + "epoch": 0.6119764345089178, + "grad_norm": 0.7566540837287903, + "learning_rate": 0.0001379691469222528, + "loss": 2.6625, + "step": 7583 + }, + { + "epoch": 0.6120571382455008, + "grad_norm": 0.7126421928405762, + "learning_rate": 0.00013795454178204715, + "loss": 2.6396, + "step": 7584 + }, + { + "epoch": 0.6121378419820838, + "grad_norm": 0.6534276008605957, + "learning_rate": 0.0001379399356959151, + "loss": 2.5841, + "step": 7585 + }, + { + "epoch": 0.6122185457186667, + "grad_norm": 0.7663385272026062, + "learning_rate": 0.00013792532866422065, + "loss": 2.6685, + "step": 7586 + }, + { + "epoch": 0.6122992494552498, + "grad_norm": 0.6971656084060669, + "learning_rate": 0.0001379107206873279, + "loss": 2.6036, + "step": 7587 + }, + { + "epoch": 0.6123799531918328, + "grad_norm": 0.6807122230529785, + "learning_rate": 0.00013789611176560088, + "loss": 2.6499, + "step": 7588 + }, + { + "epoch": 0.6124606569284158, + "grad_norm": 0.6712431311607361, + "learning_rate": 0.0001378815018994037, + "loss": 2.6725, + "step": 7589 + }, + { + "epoch": 0.6125413606649988, + "grad_norm": 0.6986604928970337, + "learning_rate": 0.00013786689108910045, + "loss": 2.6159, + "step": 7590 + }, + { + "epoch": 0.6126220644015818, + "grad_norm": 0.7004108428955078, + "learning_rate": 0.0001378522793350553, + "loss": 2.5743, + "step": 7591 + }, + { + "epoch": 0.6127027681381648, + "grad_norm": 0.6782098412513733, + "learning_rate": 0.00013783766663763239, + "loss": 2.5776, + "step": 7592 + }, + { + "epoch": 0.6127834718747478, + "grad_norm": 0.6697036027908325, + "learning_rate": 0.00013782305299719593, + "loss": 2.6195, + "step": 7593 + }, + { + "epoch": 0.6128641756113308, + "grad_norm": 0.6894395351409912, + "learning_rate": 0.00013780843841411014, + "loss": 2.662, + "step": 7594 + }, + { + "epoch": 0.6129448793479139, + "grad_norm": 0.6775636672973633, + "learning_rate": 0.00013779382288873918, + "loss": 2.6083, + "step": 7595 + }, + { + "epoch": 0.6130255830844968, + "grad_norm": 0.7143577337265015, + "learning_rate": 0.00013777920642144738, + "loss": 2.581, + "step": 7596 + }, + { + "epoch": 0.6131062868210798, + "grad_norm": 0.6143797636032104, + "learning_rate": 0.00013776458901259905, + "loss": 2.541, + "step": 7597 + }, + { + "epoch": 0.6131869905576628, + "grad_norm": 0.7003727555274963, + "learning_rate": 0.00013774997066255839, + "loss": 2.5748, + "step": 7598 + }, + { + "epoch": 0.6132676942942458, + "grad_norm": 0.6796504259109497, + "learning_rate": 0.0001377353513716898, + "loss": 2.596, + "step": 7599 + }, + { + "epoch": 0.6133483980308289, + "grad_norm": 0.7011274695396423, + "learning_rate": 0.00013772073114035762, + "loss": 2.5318, + "step": 7600 + }, + { + "epoch": 0.6134291017674118, + "grad_norm": 0.6584382057189941, + "learning_rate": 0.0001377061099689262, + "loss": 2.5793, + "step": 7601 + }, + { + "epoch": 0.6135098055039948, + "grad_norm": 0.6586211919784546, + "learning_rate": 0.00013769148785775995, + "loss": 2.5969, + "step": 7602 + }, + { + "epoch": 0.6135905092405778, + "grad_norm": 0.7187132835388184, + "learning_rate": 0.0001376768648072233, + "loss": 2.6407, + "step": 7603 + }, + { + "epoch": 0.6136712129771609, + "grad_norm": 0.7394679188728333, + "learning_rate": 0.00013766224081768072, + "loss": 2.5959, + "step": 7604 + }, + { + "epoch": 0.6137519167137439, + "grad_norm": 0.6802375912666321, + "learning_rate": 0.00013764761588949665, + "loss": 2.5956, + "step": 7605 + }, + { + "epoch": 0.6138326204503268, + "grad_norm": 0.6949049234390259, + "learning_rate": 0.00013763299002303553, + "loss": 2.556, + "step": 7606 + }, + { + "epoch": 0.6139133241869098, + "grad_norm": 0.7406589388847351, + "learning_rate": 0.00013761836321866196, + "loss": 2.5495, + "step": 7607 + }, + { + "epoch": 0.6139940279234929, + "grad_norm": 0.742499053478241, + "learning_rate": 0.0001376037354767404, + "loss": 2.589, + "step": 7608 + }, + { + "epoch": 0.6140747316600759, + "grad_norm": 0.7669157385826111, + "learning_rate": 0.00013758910679763551, + "loss": 2.576, + "step": 7609 + }, + { + "epoch": 0.6141554353966588, + "grad_norm": 0.6506752967834473, + "learning_rate": 0.00013757447718171182, + "loss": 2.5792, + "step": 7610 + }, + { + "epoch": 0.6142361391332418, + "grad_norm": 0.698514461517334, + "learning_rate": 0.00013755984662933393, + "loss": 2.5809, + "step": 7611 + }, + { + "epoch": 0.6143168428698249, + "grad_norm": 0.6541082262992859, + "learning_rate": 0.00013754521514086645, + "loss": 2.5755, + "step": 7612 + }, + { + "epoch": 0.6143975466064079, + "grad_norm": 0.6619362235069275, + "learning_rate": 0.0001375305827166741, + "loss": 2.5886, + "step": 7613 + }, + { + "epoch": 0.6144782503429909, + "grad_norm": 0.7205569744110107, + "learning_rate": 0.00013751594935712148, + "loss": 2.6293, + "step": 7614 + }, + { + "epoch": 0.6145589540795738, + "grad_norm": 0.7382494211196899, + "learning_rate": 0.00013750131506257339, + "loss": 2.6977, + "step": 7615 + }, + { + "epoch": 0.6146396578161569, + "grad_norm": 0.7492627501487732, + "learning_rate": 0.00013748667983339444, + "loss": 2.6492, + "step": 7616 + }, + { + "epoch": 0.6147203615527399, + "grad_norm": 0.6627328991889954, + "learning_rate": 0.00013747204366994947, + "loss": 2.5458, + "step": 7617 + }, + { + "epoch": 0.6148010652893229, + "grad_norm": 0.7039626836776733, + "learning_rate": 0.00013745740657260323, + "loss": 2.6578, + "step": 7618 + }, + { + "epoch": 0.6148817690259059, + "grad_norm": 0.6999295353889465, + "learning_rate": 0.00013744276854172046, + "loss": 2.6189, + "step": 7619 + }, + { + "epoch": 0.6149624727624889, + "grad_norm": 0.7604365348815918, + "learning_rate": 0.00013742812957766607, + "loss": 2.5344, + "step": 7620 + }, + { + "epoch": 0.6150431764990719, + "grad_norm": 0.6860831379890442, + "learning_rate": 0.0001374134896808048, + "loss": 2.6309, + "step": 7621 + }, + { + "epoch": 0.6151238802356549, + "grad_norm": 0.6628854274749756, + "learning_rate": 0.0001373988488515016, + "loss": 2.6339, + "step": 7622 + }, + { + "epoch": 0.6152045839722379, + "grad_norm": 0.7112562656402588, + "learning_rate": 0.00013738420709012134, + "loss": 2.6064, + "step": 7623 + }, + { + "epoch": 0.615285287708821, + "grad_norm": 0.7068392634391785, + "learning_rate": 0.0001373695643970289, + "loss": 2.624, + "step": 7624 + }, + { + "epoch": 0.6153659914454039, + "grad_norm": 0.6534786224365234, + "learning_rate": 0.00013735492077258924, + "loss": 2.5582, + "step": 7625 + }, + { + "epoch": 0.6154466951819869, + "grad_norm": 0.7433418035507202, + "learning_rate": 0.00013734027621716729, + "loss": 2.5803, + "step": 7626 + }, + { + "epoch": 0.6155273989185699, + "grad_norm": 0.7172532081604004, + "learning_rate": 0.00013732563073112804, + "loss": 2.5906, + "step": 7627 + }, + { + "epoch": 0.615608102655153, + "grad_norm": 0.6712297201156616, + "learning_rate": 0.00013731098431483653, + "loss": 2.5597, + "step": 7628 + }, + { + "epoch": 0.615688806391736, + "grad_norm": 0.7079061269760132, + "learning_rate": 0.00013729633696865775, + "loss": 2.5538, + "step": 7629 + }, + { + "epoch": 0.6157695101283189, + "grad_norm": 0.6968971490859985, + "learning_rate": 0.00013728168869295678, + "loss": 2.6429, + "step": 7630 + }, + { + "epoch": 0.6158502138649019, + "grad_norm": 0.7123236060142517, + "learning_rate": 0.00013726703948809864, + "loss": 2.5607, + "step": 7631 + }, + { + "epoch": 0.615930917601485, + "grad_norm": 0.6441208124160767, + "learning_rate": 0.00013725238935444843, + "loss": 2.6176, + "step": 7632 + }, + { + "epoch": 0.616011621338068, + "grad_norm": 0.7145917415618896, + "learning_rate": 0.00013723773829237137, + "loss": 2.5698, + "step": 7633 + }, + { + "epoch": 0.616092325074651, + "grad_norm": 0.6397334337234497, + "learning_rate": 0.00013722308630223252, + "loss": 2.596, + "step": 7634 + }, + { + "epoch": 0.6161730288112339, + "grad_norm": 0.6372843980789185, + "learning_rate": 0.00013720843338439702, + "loss": 2.5679, + "step": 7635 + }, + { + "epoch": 0.616253732547817, + "grad_norm": 0.707842230796814, + "learning_rate": 0.00013719377953923012, + "loss": 2.6296, + "step": 7636 + }, + { + "epoch": 0.6163344362844, + "grad_norm": 0.6629409193992615, + "learning_rate": 0.000137179124767097, + "loss": 2.542, + "step": 7637 + }, + { + "epoch": 0.616415140020983, + "grad_norm": 0.753646194934845, + "learning_rate": 0.00013716446906836288, + "loss": 2.5741, + "step": 7638 + }, + { + "epoch": 0.6164958437575659, + "grad_norm": 0.6409948468208313, + "learning_rate": 0.0001371498124433931, + "loss": 2.6723, + "step": 7639 + }, + { + "epoch": 0.616576547494149, + "grad_norm": 0.6489264965057373, + "learning_rate": 0.0001371351548925528, + "loss": 2.5806, + "step": 7640 + }, + { + "epoch": 0.616657251230732, + "grad_norm": 0.6857934594154358, + "learning_rate": 0.00013712049641620745, + "loss": 2.6406, + "step": 7641 + }, + { + "epoch": 0.616737954967315, + "grad_norm": 0.6754183769226074, + "learning_rate": 0.00013710583701472226, + "loss": 2.5576, + "step": 7642 + }, + { + "epoch": 0.616818658703898, + "grad_norm": 0.7083800435066223, + "learning_rate": 0.0001370911766884626, + "loss": 2.5747, + "step": 7643 + }, + { + "epoch": 0.616899362440481, + "grad_norm": 0.7281948924064636, + "learning_rate": 0.0001370765154377939, + "loss": 2.5627, + "step": 7644 + }, + { + "epoch": 0.616980066177064, + "grad_norm": 0.655414342880249, + "learning_rate": 0.00013706185326308148, + "loss": 2.5897, + "step": 7645 + }, + { + "epoch": 0.617060769913647, + "grad_norm": 0.6771859526634216, + "learning_rate": 0.0001370471901646908, + "loss": 2.5761, + "step": 7646 + }, + { + "epoch": 0.61714147365023, + "grad_norm": 0.6813557147979736, + "learning_rate": 0.00013703252614298732, + "loss": 2.5807, + "step": 7647 + }, + { + "epoch": 0.6172221773868131, + "grad_norm": 0.6948046684265137, + "learning_rate": 0.00013701786119833646, + "loss": 2.586, + "step": 7648 + }, + { + "epoch": 0.617302881123396, + "grad_norm": 0.643455982208252, + "learning_rate": 0.00013700319533110377, + "loss": 2.592, + "step": 7649 + }, + { + "epoch": 0.617383584859979, + "grad_norm": 0.7292457818984985, + "learning_rate": 0.0001369885285416547, + "loss": 2.6396, + "step": 7650 + }, + { + "epoch": 0.617464288596562, + "grad_norm": 0.642902672290802, + "learning_rate": 0.00013697386083035478, + "loss": 2.6115, + "step": 7651 + }, + { + "epoch": 0.617544992333145, + "grad_norm": 0.6536445021629333, + "learning_rate": 0.00013695919219756966, + "loss": 2.5406, + "step": 7652 + }, + { + "epoch": 0.6176256960697281, + "grad_norm": 0.6643723249435425, + "learning_rate": 0.0001369445226436648, + "loss": 2.6188, + "step": 7653 + }, + { + "epoch": 0.617706399806311, + "grad_norm": 0.6481621265411377, + "learning_rate": 0.00013692985216900592, + "loss": 2.5489, + "step": 7654 + }, + { + "epoch": 0.617787103542894, + "grad_norm": 0.6828036904335022, + "learning_rate": 0.00013691518077395856, + "loss": 2.5114, + "step": 7655 + }, + { + "epoch": 0.617867807279477, + "grad_norm": 0.6802895665168762, + "learning_rate": 0.00013690050845888838, + "loss": 2.5973, + "step": 7656 + }, + { + "epoch": 0.6179485110160601, + "grad_norm": 0.6980829238891602, + "learning_rate": 0.00013688583522416107, + "loss": 2.6032, + "step": 7657 + }, + { + "epoch": 0.618029214752643, + "grad_norm": 0.7157626748085022, + "learning_rate": 0.00013687116107014236, + "loss": 2.5552, + "step": 7658 + }, + { + "epoch": 0.618109918489226, + "grad_norm": 0.69700688123703, + "learning_rate": 0.00013685648599719792, + "loss": 2.5988, + "step": 7659 + }, + { + "epoch": 0.618190622225809, + "grad_norm": 0.6859539151191711, + "learning_rate": 0.0001368418100056935, + "loss": 2.6268, + "step": 7660 + }, + { + "epoch": 0.6182713259623921, + "grad_norm": 0.6812828183174133, + "learning_rate": 0.00013682713309599487, + "loss": 2.6002, + "step": 7661 + }, + { + "epoch": 0.6183520296989751, + "grad_norm": 0.6461766362190247, + "learning_rate": 0.00013681245526846783, + "loss": 2.6064, + "step": 7662 + }, + { + "epoch": 0.618432733435558, + "grad_norm": 0.7198306322097778, + "learning_rate": 0.00013679777652347814, + "loss": 2.6012, + "step": 7663 + }, + { + "epoch": 0.618513437172141, + "grad_norm": 0.7367191910743713, + "learning_rate": 0.00013678309686139168, + "loss": 2.6661, + "step": 7664 + }, + { + "epoch": 0.6185941409087241, + "grad_norm": 0.6975768804550171, + "learning_rate": 0.0001367684162825743, + "loss": 2.6394, + "step": 7665 + }, + { + "epoch": 0.6186748446453071, + "grad_norm": 0.7545140385627747, + "learning_rate": 0.0001367537347873919, + "loss": 2.624, + "step": 7666 + }, + { + "epoch": 0.6187555483818901, + "grad_norm": 0.6683520674705505, + "learning_rate": 0.0001367390523762103, + "loss": 2.6345, + "step": 7667 + }, + { + "epoch": 0.618836252118473, + "grad_norm": 0.6964975595474243, + "learning_rate": 0.00013672436904939552, + "loss": 2.591, + "step": 7668 + }, + { + "epoch": 0.6189169558550561, + "grad_norm": 0.7033975124359131, + "learning_rate": 0.00013670968480731344, + "loss": 2.566, + "step": 7669 + }, + { + "epoch": 0.6189976595916391, + "grad_norm": 0.706136167049408, + "learning_rate": 0.00013669499965033007, + "loss": 2.6073, + "step": 7670 + }, + { + "epoch": 0.6190783633282221, + "grad_norm": 0.7146300673484802, + "learning_rate": 0.0001366803135788114, + "loss": 2.6602, + "step": 7671 + }, + { + "epoch": 0.6191590670648051, + "grad_norm": 0.7603063583374023, + "learning_rate": 0.00013666562659312342, + "loss": 2.5286, + "step": 7672 + }, + { + "epoch": 0.6192397708013881, + "grad_norm": 0.744955837726593, + "learning_rate": 0.00013665093869363217, + "loss": 2.5678, + "step": 7673 + }, + { + "epoch": 0.6193204745379711, + "grad_norm": 0.7548620104789734, + "learning_rate": 0.00013663624988070373, + "loss": 2.6081, + "step": 7674 + }, + { + "epoch": 0.6194011782745541, + "grad_norm": 0.7367276549339294, + "learning_rate": 0.0001366215601547042, + "loss": 2.5559, + "step": 7675 + }, + { + "epoch": 0.6194818820111371, + "grad_norm": 0.7243839502334595, + "learning_rate": 0.00013660686951599962, + "loss": 2.5545, + "step": 7676 + }, + { + "epoch": 0.6195625857477202, + "grad_norm": 0.7595756649971008, + "learning_rate": 0.00013659217796495616, + "loss": 2.6547, + "step": 7677 + }, + { + "epoch": 0.6196432894843031, + "grad_norm": 0.7566717863082886, + "learning_rate": 0.00013657748550193998, + "loss": 2.6521, + "step": 7678 + }, + { + "epoch": 0.6197239932208861, + "grad_norm": 0.8441942930221558, + "learning_rate": 0.00013656279212731728, + "loss": 2.6325, + "step": 7679 + }, + { + "epoch": 0.6198046969574691, + "grad_norm": 0.7481170296669006, + "learning_rate": 0.00013654809784145418, + "loss": 2.6037, + "step": 7680 + }, + { + "epoch": 0.6198854006940522, + "grad_norm": 0.6626241207122803, + "learning_rate": 0.00013653340264471695, + "loss": 2.6028, + "step": 7681 + }, + { + "epoch": 0.6199661044306352, + "grad_norm": 0.7658020853996277, + "learning_rate": 0.00013651870653747186, + "loss": 2.5553, + "step": 7682 + }, + { + "epoch": 0.6200468081672181, + "grad_norm": 0.8218126893043518, + "learning_rate": 0.0001365040095200851, + "loss": 2.5661, + "step": 7683 + }, + { + "epoch": 0.6201275119038011, + "grad_norm": 0.6481068134307861, + "learning_rate": 0.00013648931159292304, + "loss": 2.5675, + "step": 7684 + }, + { + "epoch": 0.6202082156403842, + "grad_norm": 0.7529950141906738, + "learning_rate": 0.0001364746127563519, + "loss": 2.6137, + "step": 7685 + }, + { + "epoch": 0.6202889193769672, + "grad_norm": 0.7133232355117798, + "learning_rate": 0.00013645991301073816, + "loss": 2.6004, + "step": 7686 + }, + { + "epoch": 0.6203696231135502, + "grad_norm": 0.7809340953826904, + "learning_rate": 0.000136445212356448, + "loss": 2.6317, + "step": 7687 + }, + { + "epoch": 0.6204503268501331, + "grad_norm": 0.7106895446777344, + "learning_rate": 0.00013643051079384789, + "loss": 2.6086, + "step": 7688 + }, + { + "epoch": 0.6205310305867162, + "grad_norm": 0.6960744261741638, + "learning_rate": 0.00013641580832330423, + "loss": 2.5554, + "step": 7689 + }, + { + "epoch": 0.6206117343232992, + "grad_norm": 0.7078820466995239, + "learning_rate": 0.00013640110494518343, + "loss": 2.5902, + "step": 7690 + }, + { + "epoch": 0.6206924380598822, + "grad_norm": 0.7150746583938599, + "learning_rate": 0.00013638640065985195, + "loss": 2.5947, + "step": 7691 + }, + { + "epoch": 0.6207731417964651, + "grad_norm": 0.7507869601249695, + "learning_rate": 0.00013637169546767625, + "loss": 2.559, + "step": 7692 + }, + { + "epoch": 0.6208538455330482, + "grad_norm": 0.7453179359436035, + "learning_rate": 0.00013635698936902282, + "loss": 2.5612, + "step": 7693 + }, + { + "epoch": 0.6209345492696312, + "grad_norm": 0.7174177765846252, + "learning_rate": 0.00013634228236425816, + "loss": 2.6221, + "step": 7694 + }, + { + "epoch": 0.6210152530062142, + "grad_norm": 0.7394092679023743, + "learning_rate": 0.00013632757445374884, + "loss": 2.6045, + "step": 7695 + }, + { + "epoch": 0.6210959567427972, + "grad_norm": 0.7346367239952087, + "learning_rate": 0.0001363128656378614, + "loss": 2.677, + "step": 7696 + }, + { + "epoch": 0.6211766604793802, + "grad_norm": 0.6697696447372437, + "learning_rate": 0.00013629815591696245, + "loss": 2.5741, + "step": 7697 + }, + { + "epoch": 0.6212573642159632, + "grad_norm": 0.6993793845176697, + "learning_rate": 0.00013628344529141852, + "loss": 2.5206, + "step": 7698 + }, + { + "epoch": 0.6213380679525462, + "grad_norm": 0.6946697235107422, + "learning_rate": 0.00013626873376159631, + "loss": 2.6046, + "step": 7699 + }, + { + "epoch": 0.6214187716891292, + "grad_norm": 0.7641928195953369, + "learning_rate": 0.00013625402132786248, + "loss": 2.5459, + "step": 7700 + }, + { + "epoch": 0.6214994754257122, + "grad_norm": 0.6513504981994629, + "learning_rate": 0.00013623930799058363, + "loss": 2.6137, + "step": 7701 + }, + { + "epoch": 0.6215801791622952, + "grad_norm": 0.6745209097862244, + "learning_rate": 0.00013622459375012651, + "loss": 2.5285, + "step": 7702 + }, + { + "epoch": 0.6216608828988782, + "grad_norm": 0.7162348628044128, + "learning_rate": 0.0001362098786068578, + "loss": 2.6224, + "step": 7703 + }, + { + "epoch": 0.6217415866354612, + "grad_norm": 0.7387436032295227, + "learning_rate": 0.00013619516256114427, + "loss": 2.6216, + "step": 7704 + }, + { + "epoch": 0.6218222903720442, + "grad_norm": 0.764955461025238, + "learning_rate": 0.00013618044561335268, + "loss": 2.612, + "step": 7705 + }, + { + "epoch": 0.6219029941086273, + "grad_norm": 0.6492719054222107, + "learning_rate": 0.00013616572776384983, + "loss": 2.5532, + "step": 7706 + }, + { + "epoch": 0.6219836978452102, + "grad_norm": 0.6870293617248535, + "learning_rate": 0.0001361510090130025, + "loss": 2.5705, + "step": 7707 + }, + { + "epoch": 0.6220644015817932, + "grad_norm": 0.6899540424346924, + "learning_rate": 0.0001361362893611775, + "loss": 2.5768, + "step": 7708 + }, + { + "epoch": 0.6221451053183762, + "grad_norm": 0.658941924571991, + "learning_rate": 0.0001361215688087417, + "loss": 2.5664, + "step": 7709 + }, + { + "epoch": 0.6222258090549593, + "grad_norm": 0.6875531673431396, + "learning_rate": 0.000136106847356062, + "loss": 2.6128, + "step": 7710 + }, + { + "epoch": 0.6223065127915423, + "grad_norm": 0.657073974609375, + "learning_rate": 0.0001360921250035053, + "loss": 2.6449, + "step": 7711 + }, + { + "epoch": 0.6223872165281252, + "grad_norm": 0.7051201462745667, + "learning_rate": 0.00013607740175143848, + "loss": 2.5925, + "step": 7712 + }, + { + "epoch": 0.6224679202647082, + "grad_norm": 0.702877938747406, + "learning_rate": 0.0001360626776002285, + "loss": 2.5338, + "step": 7713 + }, + { + "epoch": 0.6225486240012913, + "grad_norm": 0.650935709476471, + "learning_rate": 0.00013604795255024233, + "loss": 2.5799, + "step": 7714 + }, + { + "epoch": 0.6226293277378743, + "grad_norm": 0.7035139203071594, + "learning_rate": 0.00013603322660184694, + "loss": 2.5476, + "step": 7715 + }, + { + "epoch": 0.6227100314744572, + "grad_norm": 0.6549977660179138, + "learning_rate": 0.0001360184997554094, + "loss": 2.6117, + "step": 7716 + }, + { + "epoch": 0.6227907352110402, + "grad_norm": 0.6882792115211487, + "learning_rate": 0.00013600377201129662, + "loss": 2.53, + "step": 7717 + }, + { + "epoch": 0.6228714389476233, + "grad_norm": 0.7390840649604797, + "learning_rate": 0.0001359890433698758, + "loss": 2.6345, + "step": 7718 + }, + { + "epoch": 0.6229521426842063, + "grad_norm": 0.7577612400054932, + "learning_rate": 0.00013597431383151386, + "loss": 2.6386, + "step": 7719 + }, + { + "epoch": 0.6230328464207893, + "grad_norm": 0.6818724870681763, + "learning_rate": 0.00013595958339657804, + "loss": 2.5806, + "step": 7720 + }, + { + "epoch": 0.6231135501573722, + "grad_norm": 0.6954349279403687, + "learning_rate": 0.0001359448520654354, + "loss": 2.5913, + "step": 7721 + }, + { + "epoch": 0.6231942538939553, + "grad_norm": 0.7976544499397278, + "learning_rate": 0.00013593011983845308, + "loss": 2.5686, + "step": 7722 + }, + { + "epoch": 0.6232749576305383, + "grad_norm": 0.7362754940986633, + "learning_rate": 0.00013591538671599824, + "loss": 2.5596, + "step": 7723 + }, + { + "epoch": 0.6233556613671213, + "grad_norm": 0.6842390298843384, + "learning_rate": 0.00013590065269843805, + "loss": 2.5793, + "step": 7724 + }, + { + "epoch": 0.6234363651037043, + "grad_norm": 0.6816275715827942, + "learning_rate": 0.0001358859177861398, + "loss": 2.5948, + "step": 7725 + }, + { + "epoch": 0.6235170688402873, + "grad_norm": 0.6892915964126587, + "learning_rate": 0.00013587118197947066, + "loss": 2.6287, + "step": 7726 + }, + { + "epoch": 0.6235977725768703, + "grad_norm": 0.6851752996444702, + "learning_rate": 0.00013585644527879792, + "loss": 2.5781, + "step": 7727 + }, + { + "epoch": 0.6236784763134533, + "grad_norm": 0.7022164463996887, + "learning_rate": 0.00013584170768448877, + "loss": 2.5856, + "step": 7728 + }, + { + "epoch": 0.6237591800500363, + "grad_norm": 0.6752299070358276, + "learning_rate": 0.0001358269691969106, + "loss": 2.6042, + "step": 7729 + }, + { + "epoch": 0.6238398837866194, + "grad_norm": 0.6861466765403748, + "learning_rate": 0.00013581222981643074, + "loss": 2.5887, + "step": 7730 + }, + { + "epoch": 0.6239205875232023, + "grad_norm": 0.7147940397262573, + "learning_rate": 0.00013579748954341647, + "loss": 2.5796, + "step": 7731 + }, + { + "epoch": 0.6240012912597853, + "grad_norm": 0.6704726219177246, + "learning_rate": 0.0001357827483782352, + "loss": 2.6027, + "step": 7732 + }, + { + "epoch": 0.6240819949963683, + "grad_norm": 0.6984317898750305, + "learning_rate": 0.0001357680063212543, + "loss": 2.635, + "step": 7733 + }, + { + "epoch": 0.6241626987329514, + "grad_norm": 0.6205787658691406, + "learning_rate": 0.00013575326337284115, + "loss": 2.5715, + "step": 7734 + }, + { + "epoch": 0.6242434024695344, + "grad_norm": 0.7214726805686951, + "learning_rate": 0.00013573851953336326, + "loss": 2.5605, + "step": 7735 + }, + { + "epoch": 0.6243241062061173, + "grad_norm": 0.6716169714927673, + "learning_rate": 0.000135723774803188, + "loss": 2.6766, + "step": 7736 + }, + { + "epoch": 0.6244048099427003, + "grad_norm": 0.6446832418441772, + "learning_rate": 0.00013570902918268293, + "loss": 2.5629, + "step": 7737 + }, + { + "epoch": 0.6244855136792834, + "grad_norm": 0.6721374988555908, + "learning_rate": 0.0001356942826722155, + "loss": 2.6093, + "step": 7738 + }, + { + "epoch": 0.6245662174158664, + "grad_norm": 0.7430365681648254, + "learning_rate": 0.0001356795352721532, + "loss": 2.5966, + "step": 7739 + }, + { + "epoch": 0.6246469211524494, + "grad_norm": 0.6787518858909607, + "learning_rate": 0.00013566478698286366, + "loss": 2.5519, + "step": 7740 + }, + { + "epoch": 0.6247276248890323, + "grad_norm": 0.6340047121047974, + "learning_rate": 0.0001356500378047144, + "loss": 2.5181, + "step": 7741 + }, + { + "epoch": 0.6248083286256154, + "grad_norm": 0.7559040188789368, + "learning_rate": 0.000135635287738073, + "loss": 2.6068, + "step": 7742 + }, + { + "epoch": 0.6248890323621984, + "grad_norm": 0.6819902062416077, + "learning_rate": 0.00013562053678330707, + "loss": 2.5754, + "step": 7743 + }, + { + "epoch": 0.6249697360987814, + "grad_norm": 0.6463500261306763, + "learning_rate": 0.00013560578494078423, + "loss": 2.5915, + "step": 7744 + }, + { + "epoch": 0.6250504398353643, + "grad_norm": 0.7510617971420288, + "learning_rate": 0.0001355910322108722, + "loss": 2.5738, + "step": 7745 + }, + { + "epoch": 0.6251311435719474, + "grad_norm": 0.75312739610672, + "learning_rate": 0.00013557627859393855, + "loss": 2.5938, + "step": 7746 + }, + { + "epoch": 0.6252118473085304, + "grad_norm": 0.7784396409988403, + "learning_rate": 0.0001355615240903511, + "loss": 2.6634, + "step": 7747 + }, + { + "epoch": 0.6252925510451134, + "grad_norm": 0.7174746990203857, + "learning_rate": 0.00013554676870047752, + "loss": 2.5973, + "step": 7748 + }, + { + "epoch": 0.6253732547816964, + "grad_norm": 0.6854952573776245, + "learning_rate": 0.0001355320124246855, + "loss": 2.5397, + "step": 7749 + }, + { + "epoch": 0.6254539585182795, + "grad_norm": 0.6584961414337158, + "learning_rate": 0.00013551725526334284, + "loss": 2.5574, + "step": 7750 + }, + { + "epoch": 0.6255346622548624, + "grad_norm": 0.7067389488220215, + "learning_rate": 0.00013550249721681738, + "loss": 2.5524, + "step": 7751 + }, + { + "epoch": 0.6256153659914454, + "grad_norm": 0.6923872232437134, + "learning_rate": 0.00013548773828547686, + "loss": 2.5651, + "step": 7752 + }, + { + "epoch": 0.6256960697280284, + "grad_norm": 0.6612355709075928, + "learning_rate": 0.00013547297846968915, + "loss": 2.6075, + "step": 7753 + }, + { + "epoch": 0.6257767734646114, + "grad_norm": 0.6762828826904297, + "learning_rate": 0.00013545821776982206, + "loss": 2.6136, + "step": 7754 + }, + { + "epoch": 0.6258574772011944, + "grad_norm": 0.6940783858299255, + "learning_rate": 0.0001354434561862435, + "loss": 2.5566, + "step": 7755 + }, + { + "epoch": 0.6259381809377774, + "grad_norm": 0.7874250411987305, + "learning_rate": 0.0001354286937193214, + "loss": 2.6732, + "step": 7756 + }, + { + "epoch": 0.6260188846743604, + "grad_norm": 0.6974111795425415, + "learning_rate": 0.0001354139303694236, + "loss": 2.5455, + "step": 7757 + }, + { + "epoch": 0.6260995884109434, + "grad_norm": 0.6710802316665649, + "learning_rate": 0.0001353991661369181, + "loss": 2.5608, + "step": 7758 + }, + { + "epoch": 0.6261802921475265, + "grad_norm": 0.681635320186615, + "learning_rate": 0.00013538440102217286, + "loss": 2.6107, + "step": 7759 + }, + { + "epoch": 0.6262609958841094, + "grad_norm": 0.7229577898979187, + "learning_rate": 0.0001353696350255558, + "loss": 2.5936, + "step": 7760 + }, + { + "epoch": 0.6263416996206924, + "grad_norm": 0.6909681558609009, + "learning_rate": 0.00013535486814743504, + "loss": 2.5521, + "step": 7761 + }, + { + "epoch": 0.6264224033572754, + "grad_norm": 0.7003746032714844, + "learning_rate": 0.0001353401003881785, + "loss": 2.5606, + "step": 7762 + }, + { + "epoch": 0.6265031070938585, + "grad_norm": 0.6883233785629272, + "learning_rate": 0.0001353253317481543, + "loss": 2.5971, + "step": 7763 + }, + { + "epoch": 0.6265838108304415, + "grad_norm": 0.7382355332374573, + "learning_rate": 0.0001353105622277305, + "loss": 2.5449, + "step": 7764 + }, + { + "epoch": 0.6266645145670244, + "grad_norm": 0.7090556621551514, + "learning_rate": 0.00013529579182727515, + "loss": 2.5988, + "step": 7765 + }, + { + "epoch": 0.6267452183036074, + "grad_norm": 0.6842581629753113, + "learning_rate": 0.00013528102054715643, + "loss": 2.6214, + "step": 7766 + }, + { + "epoch": 0.6268259220401905, + "grad_norm": 0.6969670653343201, + "learning_rate": 0.00013526624838774246, + "loss": 2.5443, + "step": 7767 + }, + { + "epoch": 0.6269066257767735, + "grad_norm": 0.7244827151298523, + "learning_rate": 0.00013525147534940138, + "loss": 2.5967, + "step": 7768 + }, + { + "epoch": 0.6269873295133565, + "grad_norm": 0.7022162675857544, + "learning_rate": 0.0001352367014325014, + "loss": 2.599, + "step": 7769 + }, + { + "epoch": 0.6270680332499394, + "grad_norm": 0.7065250873565674, + "learning_rate": 0.00013522192663741067, + "loss": 2.6105, + "step": 7770 + }, + { + "epoch": 0.6271487369865225, + "grad_norm": 0.6690711975097656, + "learning_rate": 0.0001352071509644975, + "loss": 2.55, + "step": 7771 + }, + { + "epoch": 0.6272294407231055, + "grad_norm": 0.6405982971191406, + "learning_rate": 0.00013519237441413011, + "loss": 2.6078, + "step": 7772 + }, + { + "epoch": 0.6273101444596885, + "grad_norm": 0.7340127229690552, + "learning_rate": 0.00013517759698667672, + "loss": 2.6244, + "step": 7773 + }, + { + "epoch": 0.6273908481962714, + "grad_norm": 0.6609435677528381, + "learning_rate": 0.00013516281868250566, + "loss": 2.5746, + "step": 7774 + }, + { + "epoch": 0.6274715519328545, + "grad_norm": 0.6681997179985046, + "learning_rate": 0.00013514803950198523, + "loss": 2.6181, + "step": 7775 + }, + { + "epoch": 0.6275522556694375, + "grad_norm": 0.7120032906532288, + "learning_rate": 0.0001351332594454838, + "loss": 2.6018, + "step": 7776 + }, + { + "epoch": 0.6276329594060205, + "grad_norm": 0.6618601679801941, + "learning_rate": 0.0001351184785133697, + "loss": 2.5342, + "step": 7777 + }, + { + "epoch": 0.6277136631426035, + "grad_norm": 0.7250192165374756, + "learning_rate": 0.00013510369670601132, + "loss": 2.5795, + "step": 7778 + }, + { + "epoch": 0.6277943668791865, + "grad_norm": 0.7918543219566345, + "learning_rate": 0.00013508891402377708, + "loss": 2.6544, + "step": 7779 + }, + { + "epoch": 0.6278750706157695, + "grad_norm": 0.678895890712738, + "learning_rate": 0.00013507413046703534, + "loss": 2.5937, + "step": 7780 + }, + { + "epoch": 0.6279557743523525, + "grad_norm": 0.7336576581001282, + "learning_rate": 0.00013505934603615457, + "loss": 2.598, + "step": 7781 + }, + { + "epoch": 0.6280364780889355, + "grad_norm": 0.6891419291496277, + "learning_rate": 0.00013504456073150332, + "loss": 2.5063, + "step": 7782 + }, + { + "epoch": 0.6281171818255186, + "grad_norm": 0.7949386835098267, + "learning_rate": 0.00013502977455344997, + "loss": 2.5703, + "step": 7783 + }, + { + "epoch": 0.6281978855621015, + "grad_norm": 0.7917985320091248, + "learning_rate": 0.00013501498750236306, + "loss": 2.639, + "step": 7784 + }, + { + "epoch": 0.6282785892986845, + "grad_norm": 0.7387086749076843, + "learning_rate": 0.00013500019957861113, + "loss": 2.5864, + "step": 7785 + }, + { + "epoch": 0.6283592930352675, + "grad_norm": 0.7189435958862305, + "learning_rate": 0.00013498541078256273, + "loss": 2.5627, + "step": 7786 + }, + { + "epoch": 0.6284399967718506, + "grad_norm": 0.6709900498390198, + "learning_rate": 0.00013497062111458646, + "loss": 2.5973, + "step": 7787 + }, + { + "epoch": 0.6285207005084336, + "grad_norm": 0.6925386190414429, + "learning_rate": 0.0001349558305750509, + "loss": 2.615, + "step": 7788 + }, + { + "epoch": 0.6286014042450165, + "grad_norm": 0.7191932201385498, + "learning_rate": 0.00013494103916432466, + "loss": 2.576, + "step": 7789 + }, + { + "epoch": 0.6286821079815995, + "grad_norm": 0.6798804402351379, + "learning_rate": 0.00013492624688277638, + "loss": 2.5661, + "step": 7790 + }, + { + "epoch": 0.6287628117181826, + "grad_norm": 0.6514562964439392, + "learning_rate": 0.00013491145373077475, + "loss": 2.6135, + "step": 7791 + }, + { + "epoch": 0.6288435154547656, + "grad_norm": 0.7345223426818848, + "learning_rate": 0.00013489665970868838, + "loss": 2.6015, + "step": 7792 + }, + { + "epoch": 0.6289242191913486, + "grad_norm": 0.7102675437927246, + "learning_rate": 0.0001348818648168861, + "loss": 2.5545, + "step": 7793 + }, + { + "epoch": 0.6290049229279315, + "grad_norm": 0.7151654362678528, + "learning_rate": 0.0001348670690557365, + "loss": 2.6464, + "step": 7794 + }, + { + "epoch": 0.6290856266645146, + "grad_norm": 0.7344057559967041, + "learning_rate": 0.00013485227242560844, + "loss": 2.6777, + "step": 7795 + }, + { + "epoch": 0.6291663304010976, + "grad_norm": 0.6622766852378845, + "learning_rate": 0.00013483747492687065, + "loss": 2.5713, + "step": 7796 + }, + { + "epoch": 0.6292470341376806, + "grad_norm": 0.6899346709251404, + "learning_rate": 0.0001348226765598919, + "loss": 2.5188, + "step": 7797 + }, + { + "epoch": 0.6293277378742635, + "grad_norm": 0.6711421012878418, + "learning_rate": 0.000134807877325041, + "loss": 2.5603, + "step": 7798 + }, + { + "epoch": 0.6294084416108466, + "grad_norm": 0.6973204016685486, + "learning_rate": 0.00013479307722268687, + "loss": 2.6621, + "step": 7799 + }, + { + "epoch": 0.6294891453474296, + "grad_norm": 0.7782350778579712, + "learning_rate": 0.00013477827625319824, + "loss": 2.5929, + "step": 7800 + }, + { + "epoch": 0.6295698490840126, + "grad_norm": 0.8703733682632446, + "learning_rate": 0.0001347634744169441, + "loss": 2.6884, + "step": 7801 + }, + { + "epoch": 0.6296505528205956, + "grad_norm": 0.7196036577224731, + "learning_rate": 0.00013474867171429326, + "loss": 2.6002, + "step": 7802 + }, + { + "epoch": 0.6297312565571785, + "grad_norm": 0.7224054932594299, + "learning_rate": 0.00013473386814561475, + "loss": 2.6007, + "step": 7803 + }, + { + "epoch": 0.6298119602937616, + "grad_norm": 0.7615752816200256, + "learning_rate": 0.00013471906371127743, + "loss": 2.6459, + "step": 7804 + }, + { + "epoch": 0.6298926640303446, + "grad_norm": 0.7189914584159851, + "learning_rate": 0.00013470425841165024, + "loss": 2.5692, + "step": 7805 + }, + { + "epoch": 0.6299733677669276, + "grad_norm": 0.7101845741271973, + "learning_rate": 0.00013468945224710225, + "loss": 2.5776, + "step": 7806 + }, + { + "epoch": 0.6300540715035106, + "grad_norm": 0.6860305666923523, + "learning_rate": 0.00013467464521800244, + "loss": 2.5567, + "step": 7807 + }, + { + "epoch": 0.6301347752400936, + "grad_norm": 0.7003797292709351, + "learning_rate": 0.0001346598373247198, + "loss": 2.6444, + "step": 7808 + }, + { + "epoch": 0.6302154789766766, + "grad_norm": 0.6341832876205444, + "learning_rate": 0.00013464502856762344, + "loss": 2.5475, + "step": 7809 + }, + { + "epoch": 0.6302961827132596, + "grad_norm": 0.6255922317504883, + "learning_rate": 0.00013463021894708242, + "loss": 2.5875, + "step": 7810 + }, + { + "epoch": 0.6303768864498426, + "grad_norm": 0.7136420607566833, + "learning_rate": 0.00013461540846346575, + "loss": 2.5708, + "step": 7811 + }, + { + "epoch": 0.6304575901864257, + "grad_norm": 0.7164542078971863, + "learning_rate": 0.00013460059711714267, + "loss": 2.4975, + "step": 7812 + }, + { + "epoch": 0.6305382939230086, + "grad_norm": 0.7667872905731201, + "learning_rate": 0.00013458578490848226, + "loss": 2.6124, + "step": 7813 + }, + { + "epoch": 0.6306189976595916, + "grad_norm": 0.6631812453269958, + "learning_rate": 0.0001345709718378537, + "loss": 2.5318, + "step": 7814 + }, + { + "epoch": 0.6306997013961746, + "grad_norm": 0.696864664554596, + "learning_rate": 0.0001345561579056261, + "loss": 2.6171, + "step": 7815 + }, + { + "epoch": 0.6307804051327577, + "grad_norm": 0.7368598580360413, + "learning_rate": 0.00013454134311216873, + "loss": 2.5734, + "step": 7816 + }, + { + "epoch": 0.6308611088693407, + "grad_norm": 0.7279712557792664, + "learning_rate": 0.00013452652745785083, + "loss": 2.6231, + "step": 7817 + }, + { + "epoch": 0.6309418126059236, + "grad_norm": 0.8070993423461914, + "learning_rate": 0.00013451171094304158, + "loss": 2.5486, + "step": 7818 + }, + { + "epoch": 0.6310225163425066, + "grad_norm": 0.7522621750831604, + "learning_rate": 0.0001344968935681103, + "loss": 2.5576, + "step": 7819 + }, + { + "epoch": 0.6311032200790897, + "grad_norm": 0.8185423612594604, + "learning_rate": 0.00013448207533342624, + "loss": 2.6068, + "step": 7820 + }, + { + "epoch": 0.6311839238156727, + "grad_norm": 0.7542584538459778, + "learning_rate": 0.0001344672562393587, + "loss": 2.643, + "step": 7821 + }, + { + "epoch": 0.6312646275522557, + "grad_norm": 0.7892276644706726, + "learning_rate": 0.00013445243628627712, + "loss": 2.6211, + "step": 7822 + }, + { + "epoch": 0.6313453312888386, + "grad_norm": 0.7216602563858032, + "learning_rate": 0.00013443761547455072, + "loss": 2.5725, + "step": 7823 + }, + { + "epoch": 0.6314260350254217, + "grad_norm": 0.6750743985176086, + "learning_rate": 0.0001344227938045489, + "loss": 2.5319, + "step": 7824 + }, + { + "epoch": 0.6315067387620047, + "grad_norm": 0.6711540222167969, + "learning_rate": 0.0001344079712766411, + "loss": 2.5957, + "step": 7825 + }, + { + "epoch": 0.6315874424985877, + "grad_norm": 0.6923524737358093, + "learning_rate": 0.00013439314789119667, + "loss": 2.6084, + "step": 7826 + }, + { + "epoch": 0.6316681462351706, + "grad_norm": 0.6859166026115417, + "learning_rate": 0.00013437832364858517, + "loss": 2.5608, + "step": 7827 + }, + { + "epoch": 0.6317488499717537, + "grad_norm": 0.7340966463088989, + "learning_rate": 0.0001343634985491759, + "loss": 2.531, + "step": 7828 + }, + { + "epoch": 0.6318295537083367, + "grad_norm": 0.7374520301818848, + "learning_rate": 0.00013434867259333848, + "loss": 2.5972, + "step": 7829 + }, + { + "epoch": 0.6319102574449197, + "grad_norm": 0.7252814769744873, + "learning_rate": 0.00013433384578144232, + "loss": 2.5874, + "step": 7830 + }, + { + "epoch": 0.6319909611815027, + "grad_norm": 0.7000489830970764, + "learning_rate": 0.000134319018113857, + "loss": 2.6137, + "step": 7831 + }, + { + "epoch": 0.6320716649180858, + "grad_norm": 0.805981457233429, + "learning_rate": 0.00013430418959095198, + "loss": 2.5581, + "step": 7832 + }, + { + "epoch": 0.6321523686546687, + "grad_norm": 0.7459721565246582, + "learning_rate": 0.00013428936021309693, + "loss": 2.5284, + "step": 7833 + }, + { + "epoch": 0.6322330723912517, + "grad_norm": 0.749794065952301, + "learning_rate": 0.00013427452998066136, + "loss": 2.5927, + "step": 7834 + }, + { + "epoch": 0.6323137761278347, + "grad_norm": 0.6925346255302429, + "learning_rate": 0.00013425969889401494, + "loss": 2.5703, + "step": 7835 + }, + { + "epoch": 0.6323944798644178, + "grad_norm": 0.6647117137908936, + "learning_rate": 0.00013424486695352728, + "loss": 2.5649, + "step": 7836 + }, + { + "epoch": 0.6324751836010007, + "grad_norm": 0.7358147501945496, + "learning_rate": 0.00013423003415956796, + "loss": 2.6122, + "step": 7837 + }, + { + "epoch": 0.6325558873375837, + "grad_norm": 0.7798088788986206, + "learning_rate": 0.00013421520051250675, + "loss": 2.5805, + "step": 7838 + }, + { + "epoch": 0.6326365910741667, + "grad_norm": 0.7108271718025208, + "learning_rate": 0.00013420036601271334, + "loss": 2.5457, + "step": 7839 + }, + { + "epoch": 0.6327172948107498, + "grad_norm": 0.7108528017997742, + "learning_rate": 0.00013418553066055734, + "loss": 2.6313, + "step": 7840 + }, + { + "epoch": 0.6327979985473328, + "grad_norm": 0.7325249910354614, + "learning_rate": 0.00013417069445640858, + "loss": 2.5598, + "step": 7841 + }, + { + "epoch": 0.6328787022839157, + "grad_norm": 0.6861844062805176, + "learning_rate": 0.0001341558574006368, + "loss": 2.5899, + "step": 7842 + }, + { + "epoch": 0.6329594060204987, + "grad_norm": 0.7576130628585815, + "learning_rate": 0.00013414101949361175, + "loss": 2.6077, + "step": 7843 + }, + { + "epoch": 0.6330401097570818, + "grad_norm": 0.7756128907203674, + "learning_rate": 0.0001341261807357033, + "loss": 2.6111, + "step": 7844 + }, + { + "epoch": 0.6331208134936648, + "grad_norm": 0.7131127715110779, + "learning_rate": 0.00013411134112728114, + "loss": 2.5227, + "step": 7845 + }, + { + "epoch": 0.6332015172302478, + "grad_norm": 0.6517898440361023, + "learning_rate": 0.00013409650066871525, + "loss": 2.5825, + "step": 7846 + }, + { + "epoch": 0.6332822209668307, + "grad_norm": 0.8452722430229187, + "learning_rate": 0.0001340816593603754, + "loss": 2.6037, + "step": 7847 + }, + { + "epoch": 0.6333629247034138, + "grad_norm": 0.7421110272407532, + "learning_rate": 0.00013406681720263153, + "loss": 2.5684, + "step": 7848 + }, + { + "epoch": 0.6334436284399968, + "grad_norm": 0.695139467716217, + "learning_rate": 0.0001340519741958535, + "loss": 2.5648, + "step": 7849 + }, + { + "epoch": 0.6335243321765798, + "grad_norm": 0.7780016660690308, + "learning_rate": 0.0001340371303404113, + "loss": 2.6849, + "step": 7850 + }, + { + "epoch": 0.6336050359131628, + "grad_norm": 0.7276864051818848, + "learning_rate": 0.00013402228563667482, + "loss": 2.6198, + "step": 7851 + }, + { + "epoch": 0.6336857396497458, + "grad_norm": 0.7566827535629272, + "learning_rate": 0.00013400744008501404, + "loss": 2.5803, + "step": 7852 + }, + { + "epoch": 0.6337664433863288, + "grad_norm": 0.7933458089828491, + "learning_rate": 0.00013399259368579894, + "loss": 2.6029, + "step": 7853 + }, + { + "epoch": 0.6338471471229118, + "grad_norm": 0.6849822402000427, + "learning_rate": 0.00013397774643939957, + "loss": 2.5454, + "step": 7854 + }, + { + "epoch": 0.6339278508594948, + "grad_norm": 0.7054651379585266, + "learning_rate": 0.00013396289834618594, + "loss": 2.5905, + "step": 7855 + }, + { + "epoch": 0.6340085545960777, + "grad_norm": 0.7036863565444946, + "learning_rate": 0.00013394804940652813, + "loss": 2.6342, + "step": 7856 + }, + { + "epoch": 0.6340892583326608, + "grad_norm": 0.7101735472679138, + "learning_rate": 0.00013393319962079614, + "loss": 2.6402, + "step": 7857 + }, + { + "epoch": 0.6341699620692438, + "grad_norm": 0.7053956389427185, + "learning_rate": 0.0001339183489893601, + "loss": 2.5841, + "step": 7858 + }, + { + "epoch": 0.6342506658058268, + "grad_norm": 0.7734887003898621, + "learning_rate": 0.0001339034975125902, + "loss": 2.652, + "step": 7859 + }, + { + "epoch": 0.6343313695424098, + "grad_norm": 0.6714119911193848, + "learning_rate": 0.0001338886451908565, + "loss": 2.5927, + "step": 7860 + }, + { + "epoch": 0.6344120732789928, + "grad_norm": 0.6580910682678223, + "learning_rate": 0.00013387379202452917, + "loss": 2.6114, + "step": 7861 + }, + { + "epoch": 0.6344927770155758, + "grad_norm": 0.6810200214385986, + "learning_rate": 0.00013385893801397836, + "loss": 2.5616, + "step": 7862 + }, + { + "epoch": 0.6345734807521588, + "grad_norm": 0.6989572048187256, + "learning_rate": 0.00013384408315957432, + "loss": 2.5954, + "step": 7863 + }, + { + "epoch": 0.6346541844887418, + "grad_norm": 0.7033671736717224, + "learning_rate": 0.00013382922746168728, + "loss": 2.6015, + "step": 7864 + }, + { + "epoch": 0.6347348882253249, + "grad_norm": 0.6873033046722412, + "learning_rate": 0.0001338143709206875, + "loss": 2.562, + "step": 7865 + }, + { + "epoch": 0.6348155919619078, + "grad_norm": 0.7361463904380798, + "learning_rate": 0.00013379951353694513, + "loss": 2.6175, + "step": 7866 + }, + { + "epoch": 0.6348962956984908, + "grad_norm": 0.7623226046562195, + "learning_rate": 0.00013378465531083055, + "loss": 2.7342, + "step": 7867 + }, + { + "epoch": 0.6349769994350738, + "grad_norm": 0.7427035570144653, + "learning_rate": 0.0001337697962427141, + "loss": 2.5468, + "step": 7868 + }, + { + "epoch": 0.6350577031716569, + "grad_norm": 0.6865772008895874, + "learning_rate": 0.00013375493633296598, + "loss": 2.6112, + "step": 7869 + }, + { + "epoch": 0.6351384069082399, + "grad_norm": 0.663567304611206, + "learning_rate": 0.00013374007558195666, + "loss": 2.5896, + "step": 7870 + }, + { + "epoch": 0.6352191106448228, + "grad_norm": 0.6804360151290894, + "learning_rate": 0.00013372521399005643, + "loss": 2.58, + "step": 7871 + }, + { + "epoch": 0.6352998143814058, + "grad_norm": 0.6755216121673584, + "learning_rate": 0.0001337103515576357, + "loss": 2.5593, + "step": 7872 + }, + { + "epoch": 0.6353805181179889, + "grad_norm": 0.8148807883262634, + "learning_rate": 0.00013369548828506491, + "loss": 2.6473, + "step": 7873 + }, + { + "epoch": 0.6354612218545719, + "grad_norm": 0.713009774684906, + "learning_rate": 0.00013368062417271447, + "loss": 2.6002, + "step": 7874 + }, + { + "epoch": 0.6355419255911549, + "grad_norm": 0.6390172839164734, + "learning_rate": 0.00013366575922095484, + "loss": 2.5794, + "step": 7875 + }, + { + "epoch": 0.6356226293277378, + "grad_norm": 0.7228195667266846, + "learning_rate": 0.00013365089343015649, + "loss": 2.6051, + "step": 7876 + }, + { + "epoch": 0.6357033330643209, + "grad_norm": 0.7563474178314209, + "learning_rate": 0.00013363602680068986, + "loss": 2.6308, + "step": 7877 + }, + { + "epoch": 0.6357840368009039, + "grad_norm": 0.7366798520088196, + "learning_rate": 0.00013362115933292557, + "loss": 2.5589, + "step": 7878 + }, + { + "epoch": 0.6358647405374869, + "grad_norm": 0.7137070894241333, + "learning_rate": 0.00013360629102723409, + "loss": 2.6428, + "step": 7879 + }, + { + "epoch": 0.6359454442740698, + "grad_norm": 0.6799132823944092, + "learning_rate": 0.000133591421883986, + "loss": 2.5549, + "step": 7880 + }, + { + "epoch": 0.6360261480106529, + "grad_norm": 0.7031344771385193, + "learning_rate": 0.00013357655190355188, + "loss": 2.6298, + "step": 7881 + }, + { + "epoch": 0.6361068517472359, + "grad_norm": 0.7441670298576355, + "learning_rate": 0.00013356168108630227, + "loss": 2.5844, + "step": 7882 + }, + { + "epoch": 0.6361875554838189, + "grad_norm": 0.7281978726387024, + "learning_rate": 0.00013354680943260784, + "loss": 2.5773, + "step": 7883 + }, + { + "epoch": 0.6362682592204019, + "grad_norm": 0.6969650983810425, + "learning_rate": 0.00013353193694283928, + "loss": 2.6156, + "step": 7884 + }, + { + "epoch": 0.636348962956985, + "grad_norm": 0.6668435335159302, + "learning_rate": 0.00013351706361736714, + "loss": 2.6328, + "step": 7885 + }, + { + "epoch": 0.6364296666935679, + "grad_norm": 0.6909573078155518, + "learning_rate": 0.0001335021894565622, + "loss": 2.5772, + "step": 7886 + }, + { + "epoch": 0.6365103704301509, + "grad_norm": 0.6740022897720337, + "learning_rate": 0.0001334873144607951, + "loss": 2.6435, + "step": 7887 + }, + { + "epoch": 0.6365910741667339, + "grad_norm": 0.7203185558319092, + "learning_rate": 0.0001334724386304366, + "loss": 2.5401, + "step": 7888 + }, + { + "epoch": 0.636671777903317, + "grad_norm": 0.7343020439147949, + "learning_rate": 0.0001334575619658574, + "loss": 2.5811, + "step": 7889 + }, + { + "epoch": 0.6367524816399, + "grad_norm": 0.6941348314285278, + "learning_rate": 0.00013344268446742835, + "loss": 2.6267, + "step": 7890 + }, + { + "epoch": 0.6368331853764829, + "grad_norm": 0.6983792185783386, + "learning_rate": 0.00013342780613552016, + "loss": 2.533, + "step": 7891 + }, + { + "epoch": 0.6369138891130659, + "grad_norm": 0.7093533277511597, + "learning_rate": 0.00013341292697050365, + "loss": 2.6616, + "step": 7892 + }, + { + "epoch": 0.636994592849649, + "grad_norm": 0.7377648949623108, + "learning_rate": 0.00013339804697274965, + "loss": 2.6032, + "step": 7893 + }, + { + "epoch": 0.637075296586232, + "grad_norm": 0.6669821739196777, + "learning_rate": 0.00013338316614262903, + "loss": 2.6082, + "step": 7894 + }, + { + "epoch": 0.6371560003228149, + "grad_norm": 0.6665576100349426, + "learning_rate": 0.00013336828448051263, + "loss": 2.6114, + "step": 7895 + }, + { + "epoch": 0.6372367040593979, + "grad_norm": 0.6893584132194519, + "learning_rate": 0.0001333534019867714, + "loss": 2.5886, + "step": 7896 + }, + { + "epoch": 0.637317407795981, + "grad_norm": 0.7651494741439819, + "learning_rate": 0.00013333851866177617, + "loss": 2.5622, + "step": 7897 + }, + { + "epoch": 0.637398111532564, + "grad_norm": 0.8124055862426758, + "learning_rate": 0.00013332363450589788, + "loss": 2.6036, + "step": 7898 + }, + { + "epoch": 0.637478815269147, + "grad_norm": 0.7394436597824097, + "learning_rate": 0.00013330874951950755, + "loss": 2.6214, + "step": 7899 + }, + { + "epoch": 0.6375595190057299, + "grad_norm": 0.6279659867286682, + "learning_rate": 0.00013329386370297615, + "loss": 2.5652, + "step": 7900 + }, + { + "epoch": 0.637640222742313, + "grad_norm": 0.7289649248123169, + "learning_rate": 0.00013327897705667455, + "loss": 2.5628, + "step": 7901 + }, + { + "epoch": 0.637720926478896, + "grad_norm": 0.7267701625823975, + "learning_rate": 0.0001332640895809739, + "loss": 2.5475, + "step": 7902 + }, + { + "epoch": 0.637801630215479, + "grad_norm": 0.7470490336418152, + "learning_rate": 0.00013324920127624515, + "loss": 2.5054, + "step": 7903 + }, + { + "epoch": 0.637882333952062, + "grad_norm": 0.6963294148445129, + "learning_rate": 0.00013323431214285944, + "loss": 2.5992, + "step": 7904 + }, + { + "epoch": 0.6379630376886449, + "grad_norm": 0.6993808746337891, + "learning_rate": 0.00013321942218118778, + "loss": 2.6044, + "step": 7905 + }, + { + "epoch": 0.638043741425228, + "grad_norm": 0.6620917916297913, + "learning_rate": 0.00013320453139160126, + "loss": 2.5278, + "step": 7906 + }, + { + "epoch": 0.638124445161811, + "grad_norm": 0.6535444855690002, + "learning_rate": 0.00013318963977447106, + "loss": 2.6069, + "step": 7907 + }, + { + "epoch": 0.638205148898394, + "grad_norm": 0.6913008689880371, + "learning_rate": 0.00013317474733016824, + "loss": 2.5271, + "step": 7908 + }, + { + "epoch": 0.638285852634977, + "grad_norm": 0.6760269403457642, + "learning_rate": 0.000133159854059064, + "loss": 2.7029, + "step": 7909 + }, + { + "epoch": 0.63836655637156, + "grad_norm": 0.7026536464691162, + "learning_rate": 0.0001331449599615295, + "loss": 2.592, + "step": 7910 + }, + { + "epoch": 0.638447260108143, + "grad_norm": 0.7935923933982849, + "learning_rate": 0.000133130065037936, + "loss": 2.5674, + "step": 7911 + }, + { + "epoch": 0.638527963844726, + "grad_norm": 0.694675087928772, + "learning_rate": 0.00013311516928865466, + "loss": 2.6727, + "step": 7912 + }, + { + "epoch": 0.638608667581309, + "grad_norm": 0.7378186583518982, + "learning_rate": 0.00013310027271405672, + "loss": 2.5691, + "step": 7913 + }, + { + "epoch": 0.638689371317892, + "grad_norm": 0.7684193849563599, + "learning_rate": 0.00013308537531451345, + "loss": 2.5796, + "step": 7914 + }, + { + "epoch": 0.638770075054475, + "grad_norm": 0.6881510019302368, + "learning_rate": 0.00013307047709039619, + "loss": 2.6, + "step": 7915 + }, + { + "epoch": 0.638850778791058, + "grad_norm": 0.7341364026069641, + "learning_rate": 0.00013305557804207618, + "loss": 2.622, + "step": 7916 + }, + { + "epoch": 0.638931482527641, + "grad_norm": 0.7620663642883301, + "learning_rate": 0.00013304067816992474, + "loss": 2.5571, + "step": 7917 + }, + { + "epoch": 0.6390121862642241, + "grad_norm": 0.6929789781570435, + "learning_rate": 0.00013302577747431322, + "loss": 2.6204, + "step": 7918 + }, + { + "epoch": 0.639092890000807, + "grad_norm": 0.6942943334579468, + "learning_rate": 0.000133010875955613, + "loss": 2.6737, + "step": 7919 + }, + { + "epoch": 0.63917359373739, + "grad_norm": 0.69537752866745, + "learning_rate": 0.0001329959736141955, + "loss": 2.6105, + "step": 7920 + }, + { + "epoch": 0.639254297473973, + "grad_norm": 0.6690821051597595, + "learning_rate": 0.00013298107045043203, + "loss": 2.6279, + "step": 7921 + }, + { + "epoch": 0.6393350012105561, + "grad_norm": 0.7748103141784668, + "learning_rate": 0.00013296616646469412, + "loss": 2.6307, + "step": 7922 + }, + { + "epoch": 0.6394157049471391, + "grad_norm": 0.7509558200836182, + "learning_rate": 0.00013295126165735311, + "loss": 2.6388, + "step": 7923 + }, + { + "epoch": 0.639496408683722, + "grad_norm": 0.7641764283180237, + "learning_rate": 0.0001329363560287806, + "loss": 2.5819, + "step": 7924 + }, + { + "epoch": 0.639577112420305, + "grad_norm": 0.6912327408790588, + "learning_rate": 0.00013292144957934794, + "loss": 2.5588, + "step": 7925 + }, + { + "epoch": 0.6396578161568881, + "grad_norm": 0.7568803429603577, + "learning_rate": 0.0001329065423094267, + "loss": 2.5627, + "step": 7926 + }, + { + "epoch": 0.6397385198934711, + "grad_norm": 0.7272306084632874, + "learning_rate": 0.00013289163421938843, + "loss": 2.6101, + "step": 7927 + }, + { + "epoch": 0.6398192236300541, + "grad_norm": 0.6965963840484619, + "learning_rate": 0.00013287672530960465, + "loss": 2.5967, + "step": 7928 + }, + { + "epoch": 0.639899927366637, + "grad_norm": 0.7729843854904175, + "learning_rate": 0.00013286181558044694, + "loss": 2.6222, + "step": 7929 + }, + { + "epoch": 0.6399806311032201, + "grad_norm": 0.6876606941223145, + "learning_rate": 0.00013284690503228687, + "loss": 2.6162, + "step": 7930 + }, + { + "epoch": 0.6400613348398031, + "grad_norm": 0.7555204629898071, + "learning_rate": 0.0001328319936654961, + "loss": 2.588, + "step": 7931 + }, + { + "epoch": 0.6401420385763861, + "grad_norm": 0.7324720621109009, + "learning_rate": 0.0001328170814804462, + "loss": 2.6111, + "step": 7932 + }, + { + "epoch": 0.640222742312969, + "grad_norm": 0.6802392601966858, + "learning_rate": 0.0001328021684775088, + "loss": 2.5955, + "step": 7933 + }, + { + "epoch": 0.6403034460495521, + "grad_norm": 0.7564330697059631, + "learning_rate": 0.00013278725465705568, + "loss": 2.5355, + "step": 7934 + }, + { + "epoch": 0.6403841497861351, + "grad_norm": 0.6916235089302063, + "learning_rate": 0.00013277234001945844, + "loss": 2.6037, + "step": 7935 + }, + { + "epoch": 0.6404648535227181, + "grad_norm": 0.688819169998169, + "learning_rate": 0.00013275742456508885, + "loss": 2.5626, + "step": 7936 + }, + { + "epoch": 0.6405455572593011, + "grad_norm": 0.6647922992706299, + "learning_rate": 0.0001327425082943186, + "loss": 2.6166, + "step": 7937 + }, + { + "epoch": 0.6406262609958842, + "grad_norm": 0.6792626976966858, + "learning_rate": 0.00013272759120751943, + "loss": 2.6206, + "step": 7938 + }, + { + "epoch": 0.6407069647324671, + "grad_norm": 0.6482827663421631, + "learning_rate": 0.00013271267330506312, + "loss": 2.5558, + "step": 7939 + }, + { + "epoch": 0.6407876684690501, + "grad_norm": 0.6628372073173523, + "learning_rate": 0.0001326977545873215, + "loss": 2.5904, + "step": 7940 + }, + { + "epoch": 0.6408683722056331, + "grad_norm": 0.7168916463851929, + "learning_rate": 0.00013268283505466635, + "loss": 2.5189, + "step": 7941 + }, + { + "epoch": 0.6409490759422162, + "grad_norm": 0.6691678762435913, + "learning_rate": 0.00013266791470746957, + "loss": 2.608, + "step": 7942 + }, + { + "epoch": 0.6410297796787991, + "grad_norm": 0.6850359439849854, + "learning_rate": 0.00013265299354610292, + "loss": 2.5929, + "step": 7943 + }, + { + "epoch": 0.6411104834153821, + "grad_norm": 0.6807669401168823, + "learning_rate": 0.0001326380715709383, + "loss": 2.6016, + "step": 7944 + }, + { + "epoch": 0.6411911871519651, + "grad_norm": 0.6450446844100952, + "learning_rate": 0.00013262314878234767, + "loss": 2.6129, + "step": 7945 + }, + { + "epoch": 0.6412718908885482, + "grad_norm": 0.679115355014801, + "learning_rate": 0.00013260822518070285, + "loss": 2.6049, + "step": 7946 + }, + { + "epoch": 0.6413525946251312, + "grad_norm": 0.7082008123397827, + "learning_rate": 0.00013259330076637583, + "loss": 2.5673, + "step": 7947 + }, + { + "epoch": 0.6414332983617141, + "grad_norm": 0.7357851266860962, + "learning_rate": 0.00013257837553973855, + "loss": 2.6118, + "step": 7948 + }, + { + "epoch": 0.6415140020982971, + "grad_norm": 0.687035083770752, + "learning_rate": 0.000132563449501163, + "loss": 2.5359, + "step": 7949 + }, + { + "epoch": 0.6415947058348802, + "grad_norm": 0.6950698494911194, + "learning_rate": 0.00013254852265102117, + "loss": 2.5527, + "step": 7950 + }, + { + "epoch": 0.6416754095714632, + "grad_norm": 0.6878959536552429, + "learning_rate": 0.00013253359498968507, + "loss": 2.611, + "step": 7951 + }, + { + "epoch": 0.6417561133080462, + "grad_norm": 0.7224605083465576, + "learning_rate": 0.00013251866651752675, + "loss": 2.5459, + "step": 7952 + }, + { + "epoch": 0.6418368170446291, + "grad_norm": 0.7299731969833374, + "learning_rate": 0.00013250373723491826, + "loss": 2.5651, + "step": 7953 + }, + { + "epoch": 0.6419175207812122, + "grad_norm": 0.7663037776947021, + "learning_rate": 0.00013248880714223163, + "loss": 2.6073, + "step": 7954 + }, + { + "epoch": 0.6419982245177952, + "grad_norm": 0.6532007455825806, + "learning_rate": 0.00013247387623983902, + "loss": 2.6087, + "step": 7955 + }, + { + "epoch": 0.6420789282543782, + "grad_norm": 0.7520449757575989, + "learning_rate": 0.00013245894452811255, + "loss": 2.5998, + "step": 7956 + }, + { + "epoch": 0.6421596319909612, + "grad_norm": 0.7196050882339478, + "learning_rate": 0.0001324440120074243, + "loss": 2.6448, + "step": 7957 + }, + { + "epoch": 0.6422403357275441, + "grad_norm": 0.7093806862831116, + "learning_rate": 0.0001324290786781465, + "loss": 2.5935, + "step": 7958 + }, + { + "epoch": 0.6423210394641272, + "grad_norm": 0.695541501045227, + "learning_rate": 0.00013241414454065125, + "loss": 2.5872, + "step": 7959 + }, + { + "epoch": 0.6424017432007102, + "grad_norm": 0.6763006448745728, + "learning_rate": 0.0001323992095953108, + "loss": 2.572, + "step": 7960 + }, + { + "epoch": 0.6424824469372932, + "grad_norm": 0.6403522491455078, + "learning_rate": 0.00013238427384249738, + "loss": 2.6137, + "step": 7961 + }, + { + "epoch": 0.6425631506738761, + "grad_norm": 0.6647571325302124, + "learning_rate": 0.00013236933728258315, + "loss": 2.5904, + "step": 7962 + }, + { + "epoch": 0.6426438544104592, + "grad_norm": 0.6931071877479553, + "learning_rate": 0.0001323543999159405, + "loss": 2.6085, + "step": 7963 + }, + { + "epoch": 0.6427245581470422, + "grad_norm": 0.6899439096450806, + "learning_rate": 0.00013233946174294155, + "loss": 2.5555, + "step": 7964 + }, + { + "epoch": 0.6428052618836252, + "grad_norm": 0.6564984321594238, + "learning_rate": 0.0001323245227639587, + "loss": 2.576, + "step": 7965 + }, + { + "epoch": 0.6428859656202082, + "grad_norm": 0.7427607774734497, + "learning_rate": 0.00013230958297936427, + "loss": 2.6178, + "step": 7966 + }, + { + "epoch": 0.6429666693567913, + "grad_norm": 0.6884508728981018, + "learning_rate": 0.00013229464238953054, + "loss": 2.6519, + "step": 7967 + }, + { + "epoch": 0.6430473730933742, + "grad_norm": 0.692442774772644, + "learning_rate": 0.00013227970099482993, + "loss": 2.5784, + "step": 7968 + }, + { + "epoch": 0.6431280768299572, + "grad_norm": 0.6637876629829407, + "learning_rate": 0.00013226475879563477, + "loss": 2.5785, + "step": 7969 + }, + { + "epoch": 0.6432087805665402, + "grad_norm": 0.6844972372055054, + "learning_rate": 0.0001322498157923175, + "loss": 2.5745, + "step": 7970 + }, + { + "epoch": 0.6432894843031233, + "grad_norm": 0.7259756922721863, + "learning_rate": 0.0001322348719852505, + "loss": 2.5696, + "step": 7971 + }, + { + "epoch": 0.6433701880397062, + "grad_norm": 0.6719023585319519, + "learning_rate": 0.00013221992737480625, + "loss": 2.6049, + "step": 7972 + }, + { + "epoch": 0.6434508917762892, + "grad_norm": 0.7160155773162842, + "learning_rate": 0.00013220498196135717, + "loss": 2.572, + "step": 7973 + }, + { + "epoch": 0.6435315955128722, + "grad_norm": 0.6920225620269775, + "learning_rate": 0.00013219003574527576, + "loss": 2.6576, + "step": 7974 + }, + { + "epoch": 0.6436122992494553, + "grad_norm": 0.698518693447113, + "learning_rate": 0.0001321750887269345, + "loss": 2.6074, + "step": 7975 + }, + { + "epoch": 0.6436930029860383, + "grad_norm": 0.7607932090759277, + "learning_rate": 0.00013216014090670594, + "loss": 2.6173, + "step": 7976 + }, + { + "epoch": 0.6437737067226212, + "grad_norm": 0.8130847811698914, + "learning_rate": 0.0001321451922849626, + "loss": 2.6023, + "step": 7977 + }, + { + "epoch": 0.6438544104592042, + "grad_norm": 0.676675021648407, + "learning_rate": 0.00013213024286207702, + "loss": 2.6174, + "step": 7978 + }, + { + "epoch": 0.6439351141957873, + "grad_norm": 0.7018851041793823, + "learning_rate": 0.00013211529263842183, + "loss": 2.5713, + "step": 7979 + }, + { + "epoch": 0.6440158179323703, + "grad_norm": 0.796097457408905, + "learning_rate": 0.00013210034161436954, + "loss": 2.5937, + "step": 7980 + }, + { + "epoch": 0.6440965216689533, + "grad_norm": 0.7118527293205261, + "learning_rate": 0.0001320853897902929, + "loss": 2.5721, + "step": 7981 + }, + { + "epoch": 0.6441772254055362, + "grad_norm": 0.7282249331474304, + "learning_rate": 0.00013207043716656445, + "loss": 2.5975, + "step": 7982 + }, + { + "epoch": 0.6442579291421193, + "grad_norm": 0.6710900664329529, + "learning_rate": 0.00013205548374355686, + "loss": 2.5809, + "step": 7983 + }, + { + "epoch": 0.6443386328787023, + "grad_norm": 0.7045658230781555, + "learning_rate": 0.00013204052952164278, + "loss": 2.5715, + "step": 7984 + }, + { + "epoch": 0.6444193366152853, + "grad_norm": 0.719507098197937, + "learning_rate": 0.00013202557450119504, + "loss": 2.5948, + "step": 7985 + }, + { + "epoch": 0.6445000403518683, + "grad_norm": 0.7603922486305237, + "learning_rate": 0.0001320106186825862, + "loss": 2.6176, + "step": 7986 + }, + { + "epoch": 0.6445807440884513, + "grad_norm": 0.7057444453239441, + "learning_rate": 0.0001319956620661891, + "loss": 2.5905, + "step": 7987 + }, + { + "epoch": 0.6446614478250343, + "grad_norm": 0.7884874939918518, + "learning_rate": 0.00013198070465237645, + "loss": 2.5892, + "step": 7988 + }, + { + "epoch": 0.6447421515616173, + "grad_norm": 0.6932834386825562, + "learning_rate": 0.00013196574644152103, + "loss": 2.6032, + "step": 7989 + }, + { + "epoch": 0.6448228552982003, + "grad_norm": 0.7361180186271667, + "learning_rate": 0.00013195078743399568, + "loss": 2.5877, + "step": 7990 + }, + { + "epoch": 0.6449035590347834, + "grad_norm": 0.6843615174293518, + "learning_rate": 0.00013193582763017315, + "loss": 2.5804, + "step": 7991 + }, + { + "epoch": 0.6449842627713663, + "grad_norm": 0.7592078447341919, + "learning_rate": 0.00013192086703042635, + "loss": 2.6464, + "step": 7992 + }, + { + "epoch": 0.6450649665079493, + "grad_norm": 0.7362154126167297, + "learning_rate": 0.0001319059056351281, + "loss": 2.6154, + "step": 7993 + }, + { + "epoch": 0.6451456702445323, + "grad_norm": 0.6721758246421814, + "learning_rate": 0.00013189094344465125, + "loss": 2.5735, + "step": 7994 + }, + { + "epoch": 0.6452263739811154, + "grad_norm": 0.6221550107002258, + "learning_rate": 0.00013187598045936874, + "loss": 2.5612, + "step": 7995 + }, + { + "epoch": 0.6453070777176984, + "grad_norm": 0.7225528359413147, + "learning_rate": 0.00013186101667965344, + "loss": 2.6263, + "step": 7996 + }, + { + "epoch": 0.6453877814542813, + "grad_norm": 0.7599418759346008, + "learning_rate": 0.00013184605210587837, + "loss": 2.5814, + "step": 7997 + }, + { + "epoch": 0.6454684851908643, + "grad_norm": 0.6778777837753296, + "learning_rate": 0.00013183108673841642, + "loss": 2.6158, + "step": 7998 + }, + { + "epoch": 0.6455491889274474, + "grad_norm": 0.6860963106155396, + "learning_rate": 0.00013181612057764058, + "loss": 2.6207, + "step": 7999 + }, + { + "epoch": 0.6456298926640304, + "grad_norm": 0.6615182757377625, + "learning_rate": 0.00013180115362392382, + "loss": 2.5571, + "step": 8000 + }, + { + "epoch": 0.6456298926640304, + "eval_loss": 2.5128066539764404, + "eval_runtime": 754.3655, + "eval_samples_per_second": 3.473, + "eval_steps_per_second": 0.579, + "step": 8000 + }, + { + "epoch": 0.6457105964006133, + "grad_norm": 0.688169538974762, + "learning_rate": 0.0001317861858776392, + "loss": 2.6513, + "step": 8001 + }, + { + "epoch": 0.6457913001371963, + "grad_norm": 0.6726182103157043, + "learning_rate": 0.00013177121733915975, + "loss": 2.5909, + "step": 8002 + }, + { + "epoch": 0.6458720038737794, + "grad_norm": 0.7348085641860962, + "learning_rate": 0.00013175624800885853, + "loss": 2.577, + "step": 8003 + }, + { + "epoch": 0.6459527076103624, + "grad_norm": 0.677435040473938, + "learning_rate": 0.00013174127788710856, + "loss": 2.5056, + "step": 8004 + }, + { + "epoch": 0.6460334113469454, + "grad_norm": 0.6864951848983765, + "learning_rate": 0.000131726306974283, + "loss": 2.5733, + "step": 8005 + }, + { + "epoch": 0.6461141150835283, + "grad_norm": 0.7070075869560242, + "learning_rate": 0.0001317113352707549, + "loss": 2.5359, + "step": 8006 + }, + { + "epoch": 0.6461948188201113, + "grad_norm": 0.7065049409866333, + "learning_rate": 0.00013169636277689746, + "loss": 2.6261, + "step": 8007 + }, + { + "epoch": 0.6462755225566944, + "grad_norm": 0.6691577434539795, + "learning_rate": 0.0001316813894930838, + "loss": 2.6015, + "step": 8008 + }, + { + "epoch": 0.6463562262932774, + "grad_norm": 0.6754019260406494, + "learning_rate": 0.0001316664154196871, + "loss": 2.5954, + "step": 8009 + }, + { + "epoch": 0.6464369300298604, + "grad_norm": 0.6172776818275452, + "learning_rate": 0.00013165144055708055, + "loss": 2.5599, + "step": 8010 + }, + { + "epoch": 0.6465176337664433, + "grad_norm": 0.6778094172477722, + "learning_rate": 0.00013163646490563737, + "loss": 2.5407, + "step": 8011 + }, + { + "epoch": 0.6465983375030264, + "grad_norm": 0.7363924980163574, + "learning_rate": 0.00013162148846573076, + "loss": 2.6075, + "step": 8012 + }, + { + "epoch": 0.6466790412396094, + "grad_norm": 0.6662711501121521, + "learning_rate": 0.00013160651123773404, + "loss": 2.5611, + "step": 8013 + }, + { + "epoch": 0.6467597449761924, + "grad_norm": 0.699670135974884, + "learning_rate": 0.00013159153322202043, + "loss": 2.5612, + "step": 8014 + }, + { + "epoch": 0.6468404487127754, + "grad_norm": 0.7382899522781372, + "learning_rate": 0.0001315765544189632, + "loss": 2.6017, + "step": 8015 + }, + { + "epoch": 0.6469211524493584, + "grad_norm": 0.7624868154525757, + "learning_rate": 0.0001315615748289357, + "loss": 2.6174, + "step": 8016 + }, + { + "epoch": 0.6470018561859414, + "grad_norm": 0.704622745513916, + "learning_rate": 0.00013154659445231129, + "loss": 2.5367, + "step": 8017 + }, + { + "epoch": 0.6470825599225244, + "grad_norm": 0.7117413878440857, + "learning_rate": 0.00013153161328946324, + "loss": 2.5958, + "step": 8018 + }, + { + "epoch": 0.6471632636591074, + "grad_norm": 0.6825408339500427, + "learning_rate": 0.00013151663134076497, + "loss": 2.5118, + "step": 8019 + }, + { + "epoch": 0.6472439673956905, + "grad_norm": 0.6732384562492371, + "learning_rate": 0.00013150164860658986, + "loss": 2.6312, + "step": 8020 + }, + { + "epoch": 0.6473246711322734, + "grad_norm": 0.712812602519989, + "learning_rate": 0.00013148666508731134, + "loss": 2.576, + "step": 8021 + }, + { + "epoch": 0.6474053748688564, + "grad_norm": 0.8128857612609863, + "learning_rate": 0.0001314716807833028, + "loss": 2.5333, + "step": 8022 + }, + { + "epoch": 0.6474860786054394, + "grad_norm": 0.7817162275314331, + "learning_rate": 0.00013145669569493773, + "loss": 2.6835, + "step": 8023 + }, + { + "epoch": 0.6475667823420225, + "grad_norm": 0.7164301872253418, + "learning_rate": 0.00013144170982258956, + "loss": 2.5573, + "step": 8024 + }, + { + "epoch": 0.6476474860786054, + "grad_norm": 0.67625892162323, + "learning_rate": 0.00013142672316663177, + "loss": 2.5976, + "step": 8025 + }, + { + "epoch": 0.6477281898151884, + "grad_norm": 0.6919494867324829, + "learning_rate": 0.0001314117357274379, + "loss": 2.6179, + "step": 8026 + }, + { + "epoch": 0.6478088935517714, + "grad_norm": 0.6787464618682861, + "learning_rate": 0.0001313967475053815, + "loss": 2.5405, + "step": 8027 + }, + { + "epoch": 0.6478895972883545, + "grad_norm": 0.6305621862411499, + "learning_rate": 0.00013138175850083605, + "loss": 2.6016, + "step": 8028 + }, + { + "epoch": 0.6479703010249375, + "grad_norm": 0.7456182837486267, + "learning_rate": 0.00013136676871417516, + "loss": 2.6091, + "step": 8029 + }, + { + "epoch": 0.6480510047615204, + "grad_norm": 0.7047890424728394, + "learning_rate": 0.00013135177814577238, + "loss": 2.6108, + "step": 8030 + }, + { + "epoch": 0.6481317084981034, + "grad_norm": 0.7509389519691467, + "learning_rate": 0.00013133678679600133, + "loss": 2.6396, + "step": 8031 + }, + { + "epoch": 0.6482124122346865, + "grad_norm": 0.63836270570755, + "learning_rate": 0.00013132179466523566, + "loss": 2.5759, + "step": 8032 + }, + { + "epoch": 0.6482931159712695, + "grad_norm": 0.6994885206222534, + "learning_rate": 0.000131306801753849, + "loss": 2.61, + "step": 8033 + }, + { + "epoch": 0.6483738197078525, + "grad_norm": 0.6762083768844604, + "learning_rate": 0.00013129180806221497, + "loss": 2.5431, + "step": 8034 + }, + { + "epoch": 0.6484545234444354, + "grad_norm": 0.6890944242477417, + "learning_rate": 0.0001312768135907073, + "loss": 2.5922, + "step": 8035 + }, + { + "epoch": 0.6485352271810185, + "grad_norm": 0.7409473061561584, + "learning_rate": 0.0001312618183396997, + "loss": 2.6132, + "step": 8036 + }, + { + "epoch": 0.6486159309176015, + "grad_norm": 0.6660643815994263, + "learning_rate": 0.00013124682230956585, + "loss": 2.5816, + "step": 8037 + }, + { + "epoch": 0.6486966346541845, + "grad_norm": 0.714235246181488, + "learning_rate": 0.0001312318255006795, + "loss": 2.5613, + "step": 8038 + }, + { + "epoch": 0.6487773383907675, + "grad_norm": 0.6568472385406494, + "learning_rate": 0.00013121682791341442, + "loss": 2.6382, + "step": 8039 + }, + { + "epoch": 0.6488580421273505, + "grad_norm": 0.6874251961708069, + "learning_rate": 0.00013120182954814438, + "loss": 2.593, + "step": 8040 + }, + { + "epoch": 0.6489387458639335, + "grad_norm": 0.7620158791542053, + "learning_rate": 0.0001311868304052432, + "loss": 2.589, + "step": 8041 + }, + { + "epoch": 0.6490194496005165, + "grad_norm": 0.6755926609039307, + "learning_rate": 0.00013117183048508467, + "loss": 2.5876, + "step": 8042 + }, + { + "epoch": 0.6491001533370995, + "grad_norm": 0.6952808499336243, + "learning_rate": 0.00013115682978804264, + "loss": 2.5909, + "step": 8043 + }, + { + "epoch": 0.6491808570736826, + "grad_norm": 0.6599535346031189, + "learning_rate": 0.00013114182831449098, + "loss": 2.6031, + "step": 8044 + }, + { + "epoch": 0.6492615608102655, + "grad_norm": 0.7816598415374756, + "learning_rate": 0.00013112682606480355, + "loss": 2.5633, + "step": 8045 + }, + { + "epoch": 0.6493422645468485, + "grad_norm": 0.7188639640808105, + "learning_rate": 0.00013111182303935425, + "loss": 2.6292, + "step": 8046 + }, + { + "epoch": 0.6494229682834315, + "grad_norm": 0.7131505608558655, + "learning_rate": 0.00013109681923851698, + "loss": 2.5729, + "step": 8047 + }, + { + "epoch": 0.6495036720200146, + "grad_norm": 0.7466408014297485, + "learning_rate": 0.00013108181466266568, + "loss": 2.5742, + "step": 8048 + }, + { + "epoch": 0.6495843757565976, + "grad_norm": 0.6707943677902222, + "learning_rate": 0.00013106680931217437, + "loss": 2.5506, + "step": 8049 + }, + { + "epoch": 0.6496650794931805, + "grad_norm": 0.6913424730300903, + "learning_rate": 0.0001310518031874169, + "loss": 2.5639, + "step": 8050 + }, + { + "epoch": 0.6497457832297635, + "grad_norm": 0.8261755704879761, + "learning_rate": 0.00013103679628876733, + "loss": 2.601, + "step": 8051 + }, + { + "epoch": 0.6498264869663466, + "grad_norm": 0.7410566806793213, + "learning_rate": 0.0001310217886165997, + "loss": 2.5326, + "step": 8052 + }, + { + "epoch": 0.6499071907029296, + "grad_norm": 0.7032365202903748, + "learning_rate": 0.00013100678017128798, + "loss": 2.5907, + "step": 8053 + }, + { + "epoch": 0.6499878944395125, + "grad_norm": 0.7074568271636963, + "learning_rate": 0.00013099177095320626, + "loss": 2.6193, + "step": 8054 + }, + { + "epoch": 0.6500685981760955, + "grad_norm": 0.7754546999931335, + "learning_rate": 0.00013097676096272855, + "loss": 2.5832, + "step": 8055 + }, + { + "epoch": 0.6501493019126786, + "grad_norm": 0.7475717663764954, + "learning_rate": 0.00013096175020022903, + "loss": 2.6233, + "step": 8056 + }, + { + "epoch": 0.6502300056492616, + "grad_norm": 0.7863949537277222, + "learning_rate": 0.00013094673866608173, + "loss": 2.5745, + "step": 8057 + }, + { + "epoch": 0.6503107093858446, + "grad_norm": 0.69294673204422, + "learning_rate": 0.0001309317263606608, + "loss": 2.5982, + "step": 8058 + }, + { + "epoch": 0.6503914131224275, + "grad_norm": 0.7096135020256042, + "learning_rate": 0.00013091671328434046, + "loss": 2.5944, + "step": 8059 + }, + { + "epoch": 0.6504721168590105, + "grad_norm": 0.7001097202301025, + "learning_rate": 0.00013090169943749476, + "loss": 2.5435, + "step": 8060 + }, + { + "epoch": 0.6505528205955936, + "grad_norm": 0.7522539496421814, + "learning_rate": 0.00013088668482049792, + "loss": 2.5843, + "step": 8061 + }, + { + "epoch": 0.6506335243321766, + "grad_norm": 0.6675420999526978, + "learning_rate": 0.00013087166943372418, + "loss": 2.5623, + "step": 8062 + }, + { + "epoch": 0.6507142280687596, + "grad_norm": 0.7779181599617004, + "learning_rate": 0.00013085665327754772, + "loss": 2.6087, + "step": 8063 + }, + { + "epoch": 0.6507949318053425, + "grad_norm": 0.7385239005088806, + "learning_rate": 0.00013084163635234284, + "loss": 2.5725, + "step": 8064 + }, + { + "epoch": 0.6508756355419256, + "grad_norm": 0.6966612339019775, + "learning_rate": 0.00013082661865848375, + "loss": 2.5745, + "step": 8065 + }, + { + "epoch": 0.6509563392785086, + "grad_norm": 0.7098337411880493, + "learning_rate": 0.00013081160019634468, + "loss": 2.5461, + "step": 8066 + }, + { + "epoch": 0.6510370430150916, + "grad_norm": 0.6514503359794617, + "learning_rate": 0.00013079658096630002, + "loss": 2.5869, + "step": 8067 + }, + { + "epoch": 0.6511177467516746, + "grad_norm": 0.680422306060791, + "learning_rate": 0.0001307815609687241, + "loss": 2.6316, + "step": 8068 + }, + { + "epoch": 0.6511984504882576, + "grad_norm": 0.6892665028572083, + "learning_rate": 0.00013076654020399117, + "loss": 2.5862, + "step": 8069 + }, + { + "epoch": 0.6512791542248406, + "grad_norm": 0.7605568170547485, + "learning_rate": 0.00013075151867247568, + "loss": 2.5342, + "step": 8070 + }, + { + "epoch": 0.6513598579614236, + "grad_norm": 0.7571204900741577, + "learning_rate": 0.00013073649637455192, + "loss": 2.5762, + "step": 8071 + }, + { + "epoch": 0.6514405616980066, + "grad_norm": 0.6910812258720398, + "learning_rate": 0.00013072147331059431, + "loss": 2.6635, + "step": 8072 + }, + { + "epoch": 0.6515212654345897, + "grad_norm": 0.765559196472168, + "learning_rate": 0.00013070644948097733, + "loss": 2.5885, + "step": 8073 + }, + { + "epoch": 0.6516019691711726, + "grad_norm": 0.7533665299415588, + "learning_rate": 0.00013069142488607532, + "loss": 2.6545, + "step": 8074 + }, + { + "epoch": 0.6516826729077556, + "grad_norm": 0.685089647769928, + "learning_rate": 0.0001306763995262628, + "loss": 2.5955, + "step": 8075 + }, + { + "epoch": 0.6517633766443386, + "grad_norm": 0.7280653715133667, + "learning_rate": 0.00013066137340191422, + "loss": 2.5548, + "step": 8076 + }, + { + "epoch": 0.6518440803809217, + "grad_norm": 0.6881482601165771, + "learning_rate": 0.00013064634651340404, + "loss": 2.6143, + "step": 8077 + }, + { + "epoch": 0.6519247841175047, + "grad_norm": 0.6878265142440796, + "learning_rate": 0.0001306313188611068, + "loss": 2.5681, + "step": 8078 + }, + { + "epoch": 0.6520054878540876, + "grad_norm": 0.685238242149353, + "learning_rate": 0.00013061629044539702, + "loss": 2.5517, + "step": 8079 + }, + { + "epoch": 0.6520861915906706, + "grad_norm": 0.6689820885658264, + "learning_rate": 0.00013060126126664928, + "loss": 2.6201, + "step": 8080 + }, + { + "epoch": 0.6521668953272537, + "grad_norm": 0.7128999829292297, + "learning_rate": 0.00013058623132523807, + "loss": 2.5829, + "step": 8081 + }, + { + "epoch": 0.6522475990638367, + "grad_norm": 0.6835216879844666, + "learning_rate": 0.00013057120062153805, + "loss": 2.6312, + "step": 8082 + }, + { + "epoch": 0.6523283028004196, + "grad_norm": 0.7140012383460999, + "learning_rate": 0.00013055616915592382, + "loss": 2.6148, + "step": 8083 + }, + { + "epoch": 0.6524090065370026, + "grad_norm": 0.7378252148628235, + "learning_rate": 0.00013054113692876994, + "loss": 2.5805, + "step": 8084 + }, + { + "epoch": 0.6524897102735857, + "grad_norm": 0.7569258213043213, + "learning_rate": 0.0001305261039404511, + "loss": 2.6088, + "step": 8085 + }, + { + "epoch": 0.6525704140101687, + "grad_norm": 0.6909007430076599, + "learning_rate": 0.00013051107019134195, + "loss": 2.5285, + "step": 8086 + }, + { + "epoch": 0.6526511177467517, + "grad_norm": 0.6785587072372437, + "learning_rate": 0.0001304960356818172, + "loss": 2.5527, + "step": 8087 + }, + { + "epoch": 0.6527318214833346, + "grad_norm": 0.7058801054954529, + "learning_rate": 0.0001304810004122515, + "loss": 2.6789, + "step": 8088 + }, + { + "epoch": 0.6528125252199177, + "grad_norm": 0.6920512318611145, + "learning_rate": 0.0001304659643830196, + "loss": 2.5748, + "step": 8089 + }, + { + "epoch": 0.6528932289565007, + "grad_norm": 0.6829244494438171, + "learning_rate": 0.00013045092759449625, + "loss": 2.5389, + "step": 8090 + }, + { + "epoch": 0.6529739326930837, + "grad_norm": 0.6942421793937683, + "learning_rate": 0.00013043589004705614, + "loss": 2.5851, + "step": 8091 + }, + { + "epoch": 0.6530546364296667, + "grad_norm": 0.6473072171211243, + "learning_rate": 0.0001304208517410741, + "loss": 2.56, + "step": 8092 + }, + { + "epoch": 0.6531353401662497, + "grad_norm": 0.6692056655883789, + "learning_rate": 0.00013040581267692494, + "loss": 2.5977, + "step": 8093 + }, + { + "epoch": 0.6532160439028327, + "grad_norm": 0.6918915510177612, + "learning_rate": 0.00013039077285498344, + "loss": 2.551, + "step": 8094 + }, + { + "epoch": 0.6532967476394157, + "grad_norm": 0.7432852387428284, + "learning_rate": 0.00013037573227562443, + "loss": 2.5537, + "step": 8095 + }, + { + "epoch": 0.6533774513759987, + "grad_norm": 0.6737081408500671, + "learning_rate": 0.0001303606909392228, + "loss": 2.5947, + "step": 8096 + }, + { + "epoch": 0.6534581551125818, + "grad_norm": 0.6810599565505981, + "learning_rate": 0.0001303456488461533, + "loss": 2.5704, + "step": 8097 + }, + { + "epoch": 0.6535388588491647, + "grad_norm": 0.675240159034729, + "learning_rate": 0.00013033060599679098, + "loss": 2.591, + "step": 8098 + }, + { + "epoch": 0.6536195625857477, + "grad_norm": 0.6888695359230042, + "learning_rate": 0.00013031556239151066, + "loss": 2.5403, + "step": 8099 + }, + { + "epoch": 0.6537002663223307, + "grad_norm": 0.7154796719551086, + "learning_rate": 0.00013030051803068727, + "loss": 2.5654, + "step": 8100 + }, + { + "epoch": 0.6537809700589138, + "grad_norm": 0.6655243635177612, + "learning_rate": 0.0001302854729146958, + "loss": 2.5867, + "step": 8101 + }, + { + "epoch": 0.6538616737954968, + "grad_norm": 0.7070788145065308, + "learning_rate": 0.00013027042704391115, + "loss": 2.5593, + "step": 8102 + }, + { + "epoch": 0.6539423775320797, + "grad_norm": 0.7071834206581116, + "learning_rate": 0.0001302553804187083, + "loss": 2.536, + "step": 8103 + }, + { + "epoch": 0.6540230812686627, + "grad_norm": 0.7086542248725891, + "learning_rate": 0.00013024033303946233, + "loss": 2.5644, + "step": 8104 + }, + { + "epoch": 0.6541037850052458, + "grad_norm": 0.6714556813240051, + "learning_rate": 0.00013022528490654818, + "loss": 2.5167, + "step": 8105 + }, + { + "epoch": 0.6541844887418288, + "grad_norm": 0.6905114054679871, + "learning_rate": 0.00013021023602034095, + "loss": 2.5227, + "step": 8106 + }, + { + "epoch": 0.6542651924784118, + "grad_norm": 0.7050586342811584, + "learning_rate": 0.00013019518638121563, + "loss": 2.5725, + "step": 8107 + }, + { + "epoch": 0.6543458962149947, + "grad_norm": 0.6940500736236572, + "learning_rate": 0.00013018013598954737, + "loss": 2.5912, + "step": 8108 + }, + { + "epoch": 0.6544265999515777, + "grad_norm": 0.7136965990066528, + "learning_rate": 0.00013016508484571122, + "loss": 2.6101, + "step": 8109 + }, + { + "epoch": 0.6545073036881608, + "grad_norm": 0.7205774188041687, + "learning_rate": 0.0001301500329500823, + "loss": 2.5869, + "step": 8110 + }, + { + "epoch": 0.6545880074247438, + "grad_norm": 0.6831154823303223, + "learning_rate": 0.00013013498030303575, + "loss": 2.5309, + "step": 8111 + }, + { + "epoch": 0.6546687111613267, + "grad_norm": 0.6778538823127747, + "learning_rate": 0.0001301199269049467, + "loss": 2.6297, + "step": 8112 + }, + { + "epoch": 0.6547494148979097, + "grad_norm": 0.705055832862854, + "learning_rate": 0.00013010487275619034, + "loss": 2.6188, + "step": 8113 + }, + { + "epoch": 0.6548301186344928, + "grad_norm": 0.6927980780601501, + "learning_rate": 0.00013008981785714188, + "loss": 2.5744, + "step": 8114 + }, + { + "epoch": 0.6549108223710758, + "grad_norm": 0.7070884108543396, + "learning_rate": 0.0001300747622081765, + "loss": 2.618, + "step": 8115 + }, + { + "epoch": 0.6549915261076588, + "grad_norm": 0.723479688167572, + "learning_rate": 0.0001300597058096694, + "loss": 2.5928, + "step": 8116 + }, + { + "epoch": 0.6550722298442417, + "grad_norm": 0.6689562201499939, + "learning_rate": 0.00013004464866199587, + "loss": 2.5592, + "step": 8117 + }, + { + "epoch": 0.6551529335808248, + "grad_norm": 0.6685079336166382, + "learning_rate": 0.00013002959076553115, + "loss": 2.558, + "step": 8118 + }, + { + "epoch": 0.6552336373174078, + "grad_norm": 0.678105890750885, + "learning_rate": 0.00013001453212065057, + "loss": 2.6176, + "step": 8119 + }, + { + "epoch": 0.6553143410539908, + "grad_norm": 0.7355597019195557, + "learning_rate": 0.00012999947272772933, + "loss": 2.6293, + "step": 8120 + }, + { + "epoch": 0.6553950447905738, + "grad_norm": 0.735862672328949, + "learning_rate": 0.00012998441258714284, + "loss": 2.635, + "step": 8121 + }, + { + "epoch": 0.6554757485271568, + "grad_norm": 0.6766025424003601, + "learning_rate": 0.0001299693516992664, + "loss": 2.5829, + "step": 8122 + }, + { + "epoch": 0.6555564522637398, + "grad_norm": 0.6701885461807251, + "learning_rate": 0.00012995429006447542, + "loss": 2.5996, + "step": 8123 + }, + { + "epoch": 0.6556371560003228, + "grad_norm": 0.6814082264900208, + "learning_rate": 0.00012993922768314518, + "loss": 2.5906, + "step": 8124 + }, + { + "epoch": 0.6557178597369058, + "grad_norm": 0.7104958295822144, + "learning_rate": 0.00012992416455565113, + "loss": 2.6708, + "step": 8125 + }, + { + "epoch": 0.6557985634734889, + "grad_norm": 0.6451221108436584, + "learning_rate": 0.0001299091006823687, + "loss": 2.5512, + "step": 8126 + }, + { + "epoch": 0.6558792672100718, + "grad_norm": 0.6736068725585938, + "learning_rate": 0.0001298940360636733, + "loss": 2.5839, + "step": 8127 + }, + { + "epoch": 0.6559599709466548, + "grad_norm": 0.6873149871826172, + "learning_rate": 0.00012987897069994031, + "loss": 2.5804, + "step": 8128 + }, + { + "epoch": 0.6560406746832378, + "grad_norm": 0.6937728524208069, + "learning_rate": 0.00012986390459154533, + "loss": 2.5648, + "step": 8129 + }, + { + "epoch": 0.6561213784198209, + "grad_norm": 0.7109464406967163, + "learning_rate": 0.00012984883773886377, + "loss": 2.6132, + "step": 8130 + }, + { + "epoch": 0.6562020821564039, + "grad_norm": 0.7134159803390503, + "learning_rate": 0.00012983377014227115, + "loss": 2.6029, + "step": 8131 + }, + { + "epoch": 0.6562827858929868, + "grad_norm": 0.6788110733032227, + "learning_rate": 0.000129818701802143, + "loss": 2.6344, + "step": 8132 + }, + { + "epoch": 0.6563634896295698, + "grad_norm": 0.6798231601715088, + "learning_rate": 0.00012980363271885483, + "loss": 2.5758, + "step": 8133 + }, + { + "epoch": 0.6564441933661529, + "grad_norm": 0.6586930155754089, + "learning_rate": 0.00012978856289278226, + "loss": 2.5918, + "step": 8134 + }, + { + "epoch": 0.6565248971027359, + "grad_norm": 0.6614218950271606, + "learning_rate": 0.0001297734923243008, + "loss": 2.5777, + "step": 8135 + }, + { + "epoch": 0.6566056008393188, + "grad_norm": 0.6874340176582336, + "learning_rate": 0.0001297584210137861, + "loss": 2.5528, + "step": 8136 + }, + { + "epoch": 0.6566863045759018, + "grad_norm": 0.6972174048423767, + "learning_rate": 0.00012974334896161376, + "loss": 2.6551, + "step": 8137 + }, + { + "epoch": 0.6567670083124849, + "grad_norm": 0.7414106726646423, + "learning_rate": 0.0001297282761681594, + "loss": 2.5719, + "step": 8138 + }, + { + "epoch": 0.6568477120490679, + "grad_norm": 0.6678279042243958, + "learning_rate": 0.00012971320263379868, + "loss": 2.555, + "step": 8139 + }, + { + "epoch": 0.6569284157856509, + "grad_norm": 0.692149817943573, + "learning_rate": 0.0001296981283589073, + "loss": 2.5991, + "step": 8140 + }, + { + "epoch": 0.6570091195222338, + "grad_norm": 0.6937025189399719, + "learning_rate": 0.00012968305334386094, + "loss": 2.5635, + "step": 8141 + }, + { + "epoch": 0.6570898232588169, + "grad_norm": 0.6250358819961548, + "learning_rate": 0.00012966797758903528, + "loss": 2.55, + "step": 8142 + }, + { + "epoch": 0.6571705269953999, + "grad_norm": 0.7388221025466919, + "learning_rate": 0.00012965290109480607, + "loss": 2.5307, + "step": 8143 + }, + { + "epoch": 0.6572512307319829, + "grad_norm": 0.7165891528129578, + "learning_rate": 0.00012963782386154904, + "loss": 2.5482, + "step": 8144 + }, + { + "epoch": 0.6573319344685659, + "grad_norm": 0.7605282068252563, + "learning_rate": 0.00012962274588963996, + "loss": 2.5839, + "step": 8145 + }, + { + "epoch": 0.657412638205149, + "grad_norm": 0.7259613275527954, + "learning_rate": 0.00012960766717945465, + "loss": 2.5612, + "step": 8146 + }, + { + "epoch": 0.6574933419417319, + "grad_norm": 0.7301480770111084, + "learning_rate": 0.00012959258773136885, + "loss": 2.5365, + "step": 8147 + }, + { + "epoch": 0.6575740456783149, + "grad_norm": 0.6800966262817383, + "learning_rate": 0.0001295775075457584, + "loss": 2.5663, + "step": 8148 + }, + { + "epoch": 0.6576547494148979, + "grad_norm": 0.6968960165977478, + "learning_rate": 0.0001295624266229992, + "loss": 2.5626, + "step": 8149 + }, + { + "epoch": 0.657735453151481, + "grad_norm": 0.9044952392578125, + "learning_rate": 0.00012954734496346704, + "loss": 2.6479, + "step": 8150 + }, + { + "epoch": 0.6578161568880639, + "grad_norm": 0.6955156922340393, + "learning_rate": 0.00012953226256753777, + "loss": 2.5879, + "step": 8151 + }, + { + "epoch": 0.6578968606246469, + "grad_norm": 0.6535033583641052, + "learning_rate": 0.00012951717943558735, + "loss": 2.5372, + "step": 8152 + }, + { + "epoch": 0.6579775643612299, + "grad_norm": 0.720730721950531, + "learning_rate": 0.0001295020955679916, + "loss": 2.5813, + "step": 8153 + }, + { + "epoch": 0.658058268097813, + "grad_norm": 0.7190384268760681, + "learning_rate": 0.00012948701096512655, + "loss": 2.5923, + "step": 8154 + }, + { + "epoch": 0.658138971834396, + "grad_norm": 0.6624464988708496, + "learning_rate": 0.0001294719256273681, + "loss": 2.5548, + "step": 8155 + }, + { + "epoch": 0.6582196755709789, + "grad_norm": 0.7839831709861755, + "learning_rate": 0.00012945683955509224, + "loss": 2.531, + "step": 8156 + }, + { + "epoch": 0.6583003793075619, + "grad_norm": 0.694970965385437, + "learning_rate": 0.00012944175274867497, + "loss": 2.4693, + "step": 8157 + }, + { + "epoch": 0.658381083044145, + "grad_norm": 0.7409366965293884, + "learning_rate": 0.0001294266652084922, + "loss": 2.5706, + "step": 8158 + }, + { + "epoch": 0.658461786780728, + "grad_norm": 0.7502163052558899, + "learning_rate": 0.00012941157693492002, + "loss": 2.6137, + "step": 8159 + }, + { + "epoch": 0.658542490517311, + "grad_norm": 0.6627129912376404, + "learning_rate": 0.00012939648792833447, + "loss": 2.5781, + "step": 8160 + }, + { + "epoch": 0.6586231942538939, + "grad_norm": 0.6775660514831543, + "learning_rate": 0.00012938139818911157, + "loss": 2.5441, + "step": 8161 + }, + { + "epoch": 0.6587038979904769, + "grad_norm": 0.7150553464889526, + "learning_rate": 0.00012936630771762748, + "loss": 2.5763, + "step": 8162 + }, + { + "epoch": 0.65878460172706, + "grad_norm": 0.7461466193199158, + "learning_rate": 0.0001293512165142582, + "loss": 2.54, + "step": 8163 + }, + { + "epoch": 0.658865305463643, + "grad_norm": 0.7635199427604675, + "learning_rate": 0.00012933612457937988, + "loss": 2.5763, + "step": 8164 + }, + { + "epoch": 0.658946009200226, + "grad_norm": 0.7360543608665466, + "learning_rate": 0.00012932103191336865, + "loss": 2.5968, + "step": 8165 + }, + { + "epoch": 0.6590267129368089, + "grad_norm": 0.6482167840003967, + "learning_rate": 0.0001293059385166007, + "loss": 2.5704, + "step": 8166 + }, + { + "epoch": 0.659107416673392, + "grad_norm": 0.7024737596511841, + "learning_rate": 0.00012929084438945208, + "loss": 2.6221, + "step": 8167 + }, + { + "epoch": 0.659188120409975, + "grad_norm": 0.7192068696022034, + "learning_rate": 0.0001292757495322991, + "loss": 2.5574, + "step": 8168 + }, + { + "epoch": 0.659268824146558, + "grad_norm": 0.6900508403778076, + "learning_rate": 0.0001292606539455179, + "loss": 2.5969, + "step": 8169 + }, + { + "epoch": 0.6593495278831409, + "grad_norm": 0.7522475719451904, + "learning_rate": 0.00012924555762948474, + "loss": 2.592, + "step": 8170 + }, + { + "epoch": 0.659430231619724, + "grad_norm": 0.6610947251319885, + "learning_rate": 0.00012923046058457583, + "loss": 2.5404, + "step": 8171 + }, + { + "epoch": 0.659510935356307, + "grad_norm": 0.667628288269043, + "learning_rate": 0.00012921536281116738, + "loss": 2.5551, + "step": 8172 + }, + { + "epoch": 0.65959163909289, + "grad_norm": 0.7119980454444885, + "learning_rate": 0.00012920026430963578, + "loss": 2.6002, + "step": 8173 + }, + { + "epoch": 0.659672342829473, + "grad_norm": 0.712166428565979, + "learning_rate": 0.00012918516508035724, + "loss": 2.626, + "step": 8174 + }, + { + "epoch": 0.659753046566056, + "grad_norm": 0.6993290185928345, + "learning_rate": 0.0001291700651237081, + "loss": 2.6311, + "step": 8175 + }, + { + "epoch": 0.659833750302639, + "grad_norm": 0.6889405250549316, + "learning_rate": 0.0001291549644400647, + "loss": 2.6483, + "step": 8176 + }, + { + "epoch": 0.659914454039222, + "grad_norm": 0.7120937705039978, + "learning_rate": 0.00012913986302980334, + "loss": 2.5489, + "step": 8177 + }, + { + "epoch": 0.659995157775805, + "grad_norm": 0.7112947106361389, + "learning_rate": 0.00012912476089330043, + "loss": 2.6393, + "step": 8178 + }, + { + "epoch": 0.6600758615123881, + "grad_norm": 0.710342526435852, + "learning_rate": 0.00012910965803093237, + "loss": 2.5897, + "step": 8179 + }, + { + "epoch": 0.660156565248971, + "grad_norm": 0.6506931185722351, + "learning_rate": 0.0001290945544430755, + "loss": 2.6429, + "step": 8180 + }, + { + "epoch": 0.660237268985554, + "grad_norm": 0.7147021293640137, + "learning_rate": 0.00012907945013010633, + "loss": 2.5521, + "step": 8181 + }, + { + "epoch": 0.660317972722137, + "grad_norm": 0.6802387833595276, + "learning_rate": 0.0001290643450924012, + "loss": 2.581, + "step": 8182 + }, + { + "epoch": 0.6603986764587201, + "grad_norm": 0.7599670886993408, + "learning_rate": 0.00012904923933033664, + "loss": 2.5532, + "step": 8183 + }, + { + "epoch": 0.6604793801953031, + "grad_norm": 0.7105657458305359, + "learning_rate": 0.0001290341328442891, + "loss": 2.5744, + "step": 8184 + }, + { + "epoch": 0.660560083931886, + "grad_norm": 0.6786425113677979, + "learning_rate": 0.00012901902563463506, + "loss": 2.5326, + "step": 8185 + }, + { + "epoch": 0.660640787668469, + "grad_norm": 0.7305583357810974, + "learning_rate": 0.00012900391770175106, + "loss": 2.6103, + "step": 8186 + }, + { + "epoch": 0.6607214914050521, + "grad_norm": 0.6578992605209351, + "learning_rate": 0.00012898880904601363, + "loss": 2.5833, + "step": 8187 + }, + { + "epoch": 0.6608021951416351, + "grad_norm": 0.6498856544494629, + "learning_rate": 0.00012897369966779926, + "loss": 2.6333, + "step": 8188 + }, + { + "epoch": 0.660882898878218, + "grad_norm": 0.7065569162368774, + "learning_rate": 0.00012895858956748458, + "loss": 2.5326, + "step": 8189 + }, + { + "epoch": 0.660963602614801, + "grad_norm": 0.7676446437835693, + "learning_rate": 0.00012894347874544613, + "loss": 2.6233, + "step": 8190 + }, + { + "epoch": 0.6610443063513841, + "grad_norm": 0.6794395446777344, + "learning_rate": 0.00012892836720206056, + "loss": 2.5426, + "step": 8191 + }, + { + "epoch": 0.6611250100879671, + "grad_norm": 0.7448986768722534, + "learning_rate": 0.00012891325493770444, + "loss": 2.5832, + "step": 8192 + }, + { + "epoch": 0.6612057138245501, + "grad_norm": 0.7789760231971741, + "learning_rate": 0.0001288981419527544, + "loss": 2.6393, + "step": 8193 + }, + { + "epoch": 0.661286417561133, + "grad_norm": 0.7425827980041504, + "learning_rate": 0.00012888302824758718, + "loss": 2.6159, + "step": 8194 + }, + { + "epoch": 0.6613671212977161, + "grad_norm": 0.6677481532096863, + "learning_rate": 0.00012886791382257936, + "loss": 2.5399, + "step": 8195 + }, + { + "epoch": 0.6614478250342991, + "grad_norm": 0.698397159576416, + "learning_rate": 0.0001288527986781077, + "loss": 2.5443, + "step": 8196 + }, + { + "epoch": 0.6615285287708821, + "grad_norm": 0.6862680315971375, + "learning_rate": 0.00012883768281454885, + "loss": 2.5843, + "step": 8197 + }, + { + "epoch": 0.6616092325074651, + "grad_norm": 0.7421948313713074, + "learning_rate": 0.00012882256623227955, + "loss": 2.5885, + "step": 8198 + }, + { + "epoch": 0.6616899362440481, + "grad_norm": 0.7453073859214783, + "learning_rate": 0.00012880744893167654, + "loss": 2.5821, + "step": 8199 + }, + { + "epoch": 0.6617706399806311, + "grad_norm": 0.668218195438385, + "learning_rate": 0.00012879233091311667, + "loss": 2.5941, + "step": 8200 + }, + { + "epoch": 0.6618513437172141, + "grad_norm": 0.6864587664604187, + "learning_rate": 0.00012877721217697657, + "loss": 2.5321, + "step": 8201 + }, + { + "epoch": 0.6619320474537971, + "grad_norm": 0.6521022319793701, + "learning_rate": 0.00012876209272363317, + "loss": 2.5945, + "step": 8202 + }, + { + "epoch": 0.6620127511903802, + "grad_norm": 0.7564631104469299, + "learning_rate": 0.00012874697255346325, + "loss": 2.5901, + "step": 8203 + }, + { + "epoch": 0.6620934549269631, + "grad_norm": 0.731991171836853, + "learning_rate": 0.00012873185166684356, + "loss": 2.649, + "step": 8204 + }, + { + "epoch": 0.6621741586635461, + "grad_norm": 0.6804815530776978, + "learning_rate": 0.00012871673006415108, + "loss": 2.5417, + "step": 8205 + }, + { + "epoch": 0.6622548624001291, + "grad_norm": 0.6862792372703552, + "learning_rate": 0.0001287016077457626, + "loss": 2.6118, + "step": 8206 + }, + { + "epoch": 0.6623355661367122, + "grad_norm": 0.7013735175132751, + "learning_rate": 0.00012868648471205503, + "loss": 2.6296, + "step": 8207 + }, + { + "epoch": 0.6624162698732952, + "grad_norm": 0.7284584045410156, + "learning_rate": 0.00012867136096340529, + "loss": 2.6547, + "step": 8208 + }, + { + "epoch": 0.6624969736098781, + "grad_norm": 0.714546799659729, + "learning_rate": 0.00012865623650019025, + "loss": 2.5955, + "step": 8209 + }, + { + "epoch": 0.6625776773464611, + "grad_norm": 0.7645453214645386, + "learning_rate": 0.0001286411113227869, + "loss": 2.6132, + "step": 8210 + }, + { + "epoch": 0.6626583810830441, + "grad_norm": 0.6615093946456909, + "learning_rate": 0.0001286259854315722, + "loss": 2.5701, + "step": 8211 + }, + { + "epoch": 0.6627390848196272, + "grad_norm": 0.6565523147583008, + "learning_rate": 0.0001286108588269231, + "loss": 2.57, + "step": 8212 + }, + { + "epoch": 0.6628197885562102, + "grad_norm": 0.7173478007316589, + "learning_rate": 0.00012859573150921666, + "loss": 2.589, + "step": 8213 + }, + { + "epoch": 0.6629004922927931, + "grad_norm": 0.7069580554962158, + "learning_rate": 0.00012858060347882975, + "loss": 2.6146, + "step": 8214 + }, + { + "epoch": 0.6629811960293761, + "grad_norm": 0.7004678249359131, + "learning_rate": 0.00012856547473613953, + "loss": 2.5735, + "step": 8215 + }, + { + "epoch": 0.6630618997659592, + "grad_norm": 0.6589130163192749, + "learning_rate": 0.00012855034528152305, + "loss": 2.5731, + "step": 8216 + }, + { + "epoch": 0.6631426035025422, + "grad_norm": 0.7223117351531982, + "learning_rate": 0.0001285352151153573, + "loss": 2.5262, + "step": 8217 + }, + { + "epoch": 0.6632233072391251, + "grad_norm": 0.7045131325721741, + "learning_rate": 0.0001285200842380194, + "loss": 2.5789, + "step": 8218 + }, + { + "epoch": 0.6633040109757081, + "grad_norm": 0.7002174854278564, + "learning_rate": 0.00012850495264988645, + "loss": 2.6386, + "step": 8219 + }, + { + "epoch": 0.6633847147122912, + "grad_norm": 0.6844584941864014, + "learning_rate": 0.00012848982035133555, + "loss": 2.5394, + "step": 8220 + }, + { + "epoch": 0.6634654184488742, + "grad_norm": 0.7154871821403503, + "learning_rate": 0.00012847468734274387, + "loss": 2.5927, + "step": 8221 + }, + { + "epoch": 0.6635461221854572, + "grad_norm": 0.6856776475906372, + "learning_rate": 0.00012845955362448855, + "loss": 2.5694, + "step": 8222 + }, + { + "epoch": 0.6636268259220401, + "grad_norm": 0.7069089412689209, + "learning_rate": 0.00012844441919694676, + "loss": 2.5856, + "step": 8223 + }, + { + "epoch": 0.6637075296586232, + "grad_norm": 0.7084143161773682, + "learning_rate": 0.00012842928406049567, + "loss": 2.6301, + "step": 8224 + }, + { + "epoch": 0.6637882333952062, + "grad_norm": 0.6790862679481506, + "learning_rate": 0.00012841414821551252, + "loss": 2.5586, + "step": 8225 + }, + { + "epoch": 0.6638689371317892, + "grad_norm": 0.6537249684333801, + "learning_rate": 0.00012839901166237453, + "loss": 2.5652, + "step": 8226 + }, + { + "epoch": 0.6639496408683722, + "grad_norm": 0.6670125126838684, + "learning_rate": 0.00012838387440145893, + "loss": 2.5438, + "step": 8227 + }, + { + "epoch": 0.6640303446049552, + "grad_norm": 0.7202955484390259, + "learning_rate": 0.00012836873643314297, + "loss": 2.5632, + "step": 8228 + }, + { + "epoch": 0.6641110483415382, + "grad_norm": 0.6844765543937683, + "learning_rate": 0.00012835359775780394, + "loss": 2.5595, + "step": 8229 + }, + { + "epoch": 0.6641917520781212, + "grad_norm": 0.6557698249816895, + "learning_rate": 0.00012833845837581916, + "loss": 2.5998, + "step": 8230 + }, + { + "epoch": 0.6642724558147042, + "grad_norm": 0.6741784811019897, + "learning_rate": 0.0001283233182875659, + "loss": 2.5591, + "step": 8231 + }, + { + "epoch": 0.6643531595512873, + "grad_norm": 0.6926484704017639, + "learning_rate": 0.00012830817749342154, + "loss": 2.5557, + "step": 8232 + }, + { + "epoch": 0.6644338632878702, + "grad_norm": 0.6866984367370605, + "learning_rate": 0.00012829303599376336, + "loss": 2.5646, + "step": 8233 + }, + { + "epoch": 0.6645145670244532, + "grad_norm": 0.6772707104682922, + "learning_rate": 0.0001282778937889688, + "loss": 2.6028, + "step": 8234 + }, + { + "epoch": 0.6645952707610362, + "grad_norm": 0.693236768245697, + "learning_rate": 0.00012826275087941518, + "loss": 2.611, + "step": 8235 + }, + { + "epoch": 0.6646759744976193, + "grad_norm": 0.7181996703147888, + "learning_rate": 0.00012824760726547993, + "loss": 2.6081, + "step": 8236 + }, + { + "epoch": 0.6647566782342023, + "grad_norm": 0.6845484375953674, + "learning_rate": 0.00012823246294754048, + "loss": 2.5544, + "step": 8237 + }, + { + "epoch": 0.6648373819707852, + "grad_norm": 0.7106444239616394, + "learning_rate": 0.00012821731792597425, + "loss": 2.552, + "step": 8238 + }, + { + "epoch": 0.6649180857073682, + "grad_norm": 0.6930601000785828, + "learning_rate": 0.0001282021722011587, + "loss": 2.5401, + "step": 8239 + }, + { + "epoch": 0.6649987894439513, + "grad_norm": 0.6658228039741516, + "learning_rate": 0.00012818702577347129, + "loss": 2.6287, + "step": 8240 + }, + { + "epoch": 0.6650794931805343, + "grad_norm": 0.6919803619384766, + "learning_rate": 0.0001281718786432895, + "loss": 2.6142, + "step": 8241 + }, + { + "epoch": 0.6651601969171173, + "grad_norm": 0.6675698757171631, + "learning_rate": 0.00012815673081099086, + "loss": 2.5325, + "step": 8242 + }, + { + "epoch": 0.6652409006537002, + "grad_norm": 0.6669798493385315, + "learning_rate": 0.0001281415822769529, + "loss": 2.5355, + "step": 8243 + }, + { + "epoch": 0.6653216043902833, + "grad_norm": 0.6449857950210571, + "learning_rate": 0.00012812643304155316, + "loss": 2.5968, + "step": 8244 + }, + { + "epoch": 0.6654023081268663, + "grad_norm": 0.6972789168357849, + "learning_rate": 0.00012811128310516914, + "loss": 2.6133, + "step": 8245 + }, + { + "epoch": 0.6654830118634493, + "grad_norm": 0.7179878354072571, + "learning_rate": 0.0001280961324681785, + "loss": 2.5793, + "step": 8246 + }, + { + "epoch": 0.6655637156000322, + "grad_norm": 0.6736378073692322, + "learning_rate": 0.0001280809811309588, + "loss": 2.5543, + "step": 8247 + }, + { + "epoch": 0.6656444193366153, + "grad_norm": 0.7376420497894287, + "learning_rate": 0.00012806582909388763, + "loss": 2.5501, + "step": 8248 + }, + { + "epoch": 0.6657251230731983, + "grad_norm": 0.7163094878196716, + "learning_rate": 0.00012805067635734263, + "loss": 2.5538, + "step": 8249 + }, + { + "epoch": 0.6658058268097813, + "grad_norm": 0.7699353694915771, + "learning_rate": 0.00012803552292170144, + "loss": 2.5925, + "step": 8250 + }, + { + "epoch": 0.6658865305463643, + "grad_norm": 0.6504995822906494, + "learning_rate": 0.00012802036878734177, + "loss": 2.5944, + "step": 8251 + }, + { + "epoch": 0.6659672342829474, + "grad_norm": 0.7150379419326782, + "learning_rate": 0.0001280052139546412, + "loss": 2.5959, + "step": 8252 + }, + { + "epoch": 0.6660479380195303, + "grad_norm": 0.7562555074691772, + "learning_rate": 0.00012799005842397757, + "loss": 2.6041, + "step": 8253 + }, + { + "epoch": 0.6661286417561133, + "grad_norm": 0.7242838740348816, + "learning_rate": 0.00012797490219572846, + "loss": 2.6152, + "step": 8254 + }, + { + "epoch": 0.6662093454926963, + "grad_norm": 0.7062848210334778, + "learning_rate": 0.00012795974527027168, + "loss": 2.596, + "step": 8255 + }, + { + "epoch": 0.6662900492292794, + "grad_norm": 0.8179726004600525, + "learning_rate": 0.00012794458764798497, + "loss": 2.5792, + "step": 8256 + }, + { + "epoch": 0.6663707529658623, + "grad_norm": 0.692166268825531, + "learning_rate": 0.00012792942932924608, + "loss": 2.6025, + "step": 8257 + }, + { + "epoch": 0.6664514567024453, + "grad_norm": 0.6540334224700928, + "learning_rate": 0.0001279142703144328, + "loss": 2.5119, + "step": 8258 + }, + { + "epoch": 0.6665321604390283, + "grad_norm": 0.7087461352348328, + "learning_rate": 0.00012789911060392294, + "loss": 2.5808, + "step": 8259 + }, + { + "epoch": 0.6666128641756114, + "grad_norm": 0.6897622346878052, + "learning_rate": 0.0001278839501980943, + "loss": 2.5811, + "step": 8260 + }, + { + "epoch": 0.6666935679121944, + "grad_norm": 0.6653634905815125, + "learning_rate": 0.00012786878909732473, + "loss": 2.5498, + "step": 8261 + }, + { + "epoch": 0.6667742716487773, + "grad_norm": 0.6541483402252197, + "learning_rate": 0.0001278536273019921, + "loss": 2.605, + "step": 8262 + }, + { + "epoch": 0.6668549753853603, + "grad_norm": 0.6748146414756775, + "learning_rate": 0.00012783846481247428, + "loss": 2.5571, + "step": 8263 + }, + { + "epoch": 0.6669356791219433, + "grad_norm": 0.7258282899856567, + "learning_rate": 0.00012782330162914915, + "loss": 2.5562, + "step": 8264 + }, + { + "epoch": 0.6670163828585264, + "grad_norm": 0.6963080167770386, + "learning_rate": 0.00012780813775239457, + "loss": 2.6467, + "step": 8265 + }, + { + "epoch": 0.6670970865951094, + "grad_norm": 0.6627718806266785, + "learning_rate": 0.00012779297318258855, + "loss": 2.5369, + "step": 8266 + }, + { + "epoch": 0.6671777903316923, + "grad_norm": 0.7026168704032898, + "learning_rate": 0.00012777780792010897, + "loss": 2.5639, + "step": 8267 + }, + { + "epoch": 0.6672584940682753, + "grad_norm": 0.6969077587127686, + "learning_rate": 0.0001277626419653338, + "loss": 2.517, + "step": 8268 + }, + { + "epoch": 0.6673391978048584, + "grad_norm": 0.6918485760688782, + "learning_rate": 0.00012774747531864102, + "loss": 2.6388, + "step": 8269 + }, + { + "epoch": 0.6674199015414414, + "grad_norm": 0.6661256551742554, + "learning_rate": 0.00012773230798040862, + "loss": 2.5477, + "step": 8270 + }, + { + "epoch": 0.6675006052780244, + "grad_norm": 0.6778402328491211, + "learning_rate": 0.0001277171399510146, + "loss": 2.6032, + "step": 8271 + }, + { + "epoch": 0.6675813090146073, + "grad_norm": 0.6464864611625671, + "learning_rate": 0.00012770197123083702, + "loss": 2.5396, + "step": 8272 + }, + { + "epoch": 0.6676620127511904, + "grad_norm": 0.7154508233070374, + "learning_rate": 0.0001276868018202539, + "loss": 2.6163, + "step": 8273 + }, + { + "epoch": 0.6677427164877734, + "grad_norm": 0.6849631071090698, + "learning_rate": 0.0001276716317196433, + "loss": 2.549, + "step": 8274 + }, + { + "epoch": 0.6678234202243564, + "grad_norm": 0.6696017980575562, + "learning_rate": 0.00012765646092938334, + "loss": 2.5046, + "step": 8275 + }, + { + "epoch": 0.6679041239609393, + "grad_norm": 0.668153703212738, + "learning_rate": 0.00012764128944985203, + "loss": 2.5422, + "step": 8276 + }, + { + "epoch": 0.6679848276975224, + "grad_norm": 0.6600282192230225, + "learning_rate": 0.00012762611728142756, + "loss": 2.6117, + "step": 8277 + }, + { + "epoch": 0.6680655314341054, + "grad_norm": 0.6691608428955078, + "learning_rate": 0.000127610944424488, + "loss": 2.5761, + "step": 8278 + }, + { + "epoch": 0.6681462351706884, + "grad_norm": 0.695142924785614, + "learning_rate": 0.00012759577087941156, + "loss": 2.6123, + "step": 8279 + }, + { + "epoch": 0.6682269389072714, + "grad_norm": 0.6846559643745422, + "learning_rate": 0.00012758059664657635, + "loss": 2.5882, + "step": 8280 + }, + { + "epoch": 0.6683076426438544, + "grad_norm": 0.7616459131240845, + "learning_rate": 0.0001275654217263606, + "loss": 2.5559, + "step": 8281 + }, + { + "epoch": 0.6683883463804374, + "grad_norm": 0.6995570063591003, + "learning_rate": 0.00012755024611914246, + "loss": 2.5336, + "step": 8282 + }, + { + "epoch": 0.6684690501170204, + "grad_norm": 0.7199691534042358, + "learning_rate": 0.0001275350698253002, + "loss": 2.6618, + "step": 8283 + }, + { + "epoch": 0.6685497538536034, + "grad_norm": 0.6938748955726624, + "learning_rate": 0.000127519892845212, + "loss": 2.574, + "step": 8284 + }, + { + "epoch": 0.6686304575901865, + "grad_norm": 0.6827714443206787, + "learning_rate": 0.00012750471517925614, + "loss": 2.5647, + "step": 8285 + }, + { + "epoch": 0.6687111613267694, + "grad_norm": 0.6684606671333313, + "learning_rate": 0.00012748953682781083, + "loss": 2.528, + "step": 8286 + }, + { + "epoch": 0.6687918650633524, + "grad_norm": 0.6842156052589417, + "learning_rate": 0.00012747435779125448, + "loss": 2.5521, + "step": 8287 + }, + { + "epoch": 0.6688725687999354, + "grad_norm": 0.7440506219863892, + "learning_rate": 0.0001274591780699653, + "loss": 2.5646, + "step": 8288 + }, + { + "epoch": 0.6689532725365185, + "grad_norm": 0.769922137260437, + "learning_rate": 0.0001274439976643216, + "loss": 2.6104, + "step": 8289 + }, + { + "epoch": 0.6690339762731015, + "grad_norm": 0.7793089747428894, + "learning_rate": 0.00012742881657470175, + "loss": 2.6348, + "step": 8290 + }, + { + "epoch": 0.6691146800096844, + "grad_norm": 0.695060133934021, + "learning_rate": 0.0001274136348014841, + "loss": 2.5797, + "step": 8291 + }, + { + "epoch": 0.6691953837462674, + "grad_norm": 0.7089917659759521, + "learning_rate": 0.00012739845234504697, + "loss": 2.5431, + "step": 8292 + }, + { + "epoch": 0.6692760874828505, + "grad_norm": 0.7542717456817627, + "learning_rate": 0.00012738326920576885, + "loss": 2.6172, + "step": 8293 + }, + { + "epoch": 0.6693567912194335, + "grad_norm": 0.6947969794273376, + "learning_rate": 0.00012736808538402802, + "loss": 2.6026, + "step": 8294 + }, + { + "epoch": 0.6694374949560165, + "grad_norm": 0.6696321368217468, + "learning_rate": 0.00012735290088020302, + "loss": 2.5592, + "step": 8295 + }, + { + "epoch": 0.6695181986925994, + "grad_norm": 0.7001518607139587, + "learning_rate": 0.0001273377156946722, + "loss": 2.5994, + "step": 8296 + }, + { + "epoch": 0.6695989024291825, + "grad_norm": 0.6708101630210876, + "learning_rate": 0.000127322529827814, + "loss": 2.6392, + "step": 8297 + }, + { + "epoch": 0.6696796061657655, + "grad_norm": 0.6282601952552795, + "learning_rate": 0.000127307343280007, + "loss": 2.5762, + "step": 8298 + }, + { + "epoch": 0.6697603099023485, + "grad_norm": 0.6879595518112183, + "learning_rate": 0.0001272921560516296, + "loss": 2.5507, + "step": 8299 + }, + { + "epoch": 0.6698410136389314, + "grad_norm": 0.6108266115188599, + "learning_rate": 0.00012727696814306033, + "loss": 2.5865, + "step": 8300 + }, + { + "epoch": 0.6699217173755145, + "grad_norm": 0.6763970851898193, + "learning_rate": 0.0001272617795546777, + "loss": 2.6439, + "step": 8301 + }, + { + "epoch": 0.6700024211120975, + "grad_norm": 0.6997560858726501, + "learning_rate": 0.00012724659028686027, + "loss": 2.5291, + "step": 8302 + }, + { + "epoch": 0.6700831248486805, + "grad_norm": 0.675714910030365, + "learning_rate": 0.0001272314003399866, + "loss": 2.5452, + "step": 8303 + }, + { + "epoch": 0.6701638285852635, + "grad_norm": 0.6847789883613586, + "learning_rate": 0.00012721620971443525, + "loss": 2.6111, + "step": 8304 + }, + { + "epoch": 0.6702445323218466, + "grad_norm": 0.7283920645713806, + "learning_rate": 0.0001272010184105848, + "loss": 2.6322, + "step": 8305 + }, + { + "epoch": 0.6703252360584295, + "grad_norm": 0.7551796436309814, + "learning_rate": 0.00012718582642881382, + "loss": 2.5728, + "step": 8306 + }, + { + "epoch": 0.6704059397950125, + "grad_norm": 0.694526195526123, + "learning_rate": 0.00012717063376950104, + "loss": 2.6241, + "step": 8307 + }, + { + "epoch": 0.6704866435315955, + "grad_norm": 0.6956443190574646, + "learning_rate": 0.00012715544043302504, + "loss": 2.5531, + "step": 8308 + }, + { + "epoch": 0.6705673472681786, + "grad_norm": 0.7649452686309814, + "learning_rate": 0.00012714024641976446, + "loss": 2.5462, + "step": 8309 + }, + { + "epoch": 0.6706480510047615, + "grad_norm": 0.7711065411567688, + "learning_rate": 0.00012712505173009797, + "loss": 2.5878, + "step": 8310 + }, + { + "epoch": 0.6707287547413445, + "grad_norm": 0.68077552318573, + "learning_rate": 0.00012710985636440434, + "loss": 2.5668, + "step": 8311 + }, + { + "epoch": 0.6708094584779275, + "grad_norm": 0.7181024551391602, + "learning_rate": 0.0001270946603230622, + "loss": 2.6104, + "step": 8312 + }, + { + "epoch": 0.6708901622145105, + "grad_norm": 0.7136553525924683, + "learning_rate": 0.0001270794636064503, + "loss": 2.5282, + "step": 8313 + }, + { + "epoch": 0.6709708659510936, + "grad_norm": 0.880094587802887, + "learning_rate": 0.00012706426621494736, + "loss": 2.5837, + "step": 8314 + }, + { + "epoch": 0.6710515696876765, + "grad_norm": 0.7438541054725647, + "learning_rate": 0.00012704906814893217, + "loss": 2.5577, + "step": 8315 + }, + { + "epoch": 0.6711322734242595, + "grad_norm": 0.8197470903396606, + "learning_rate": 0.00012703386940878352, + "loss": 2.569, + "step": 8316 + }, + { + "epoch": 0.6712129771608425, + "grad_norm": 0.7728317975997925, + "learning_rate": 0.00012701866999488014, + "loss": 2.6407, + "step": 8317 + }, + { + "epoch": 0.6712936808974256, + "grad_norm": 0.7594823837280273, + "learning_rate": 0.0001270034699076009, + "loss": 2.5789, + "step": 8318 + }, + { + "epoch": 0.6713743846340086, + "grad_norm": 0.7502284646034241, + "learning_rate": 0.0001269882691473246, + "loss": 2.6068, + "step": 8319 + }, + { + "epoch": 0.6714550883705915, + "grad_norm": 0.7355664372444153, + "learning_rate": 0.0001269730677144301, + "loss": 2.6055, + "step": 8320 + }, + { + "epoch": 0.6715357921071745, + "grad_norm": 0.7218407392501831, + "learning_rate": 0.0001269578656092962, + "loss": 2.5953, + "step": 8321 + }, + { + "epoch": 0.6716164958437576, + "grad_norm": 0.6932538747787476, + "learning_rate": 0.00012694266283230185, + "loss": 2.5795, + "step": 8322 + }, + { + "epoch": 0.6716971995803406, + "grad_norm": 0.7337260246276855, + "learning_rate": 0.00012692745938382591, + "loss": 2.5606, + "step": 8323 + }, + { + "epoch": 0.6717779033169236, + "grad_norm": 0.6959026455879211, + "learning_rate": 0.00012691225526424731, + "loss": 2.5688, + "step": 8324 + }, + { + "epoch": 0.6718586070535065, + "grad_norm": 0.7352995872497559, + "learning_rate": 0.00012689705047394493, + "loss": 2.6308, + "step": 8325 + }, + { + "epoch": 0.6719393107900896, + "grad_norm": 0.7023616433143616, + "learning_rate": 0.00012688184501329777, + "loss": 2.6462, + "step": 8326 + }, + { + "epoch": 0.6720200145266726, + "grad_norm": 0.6581354737281799, + "learning_rate": 0.00012686663888268474, + "loss": 2.5997, + "step": 8327 + }, + { + "epoch": 0.6721007182632556, + "grad_norm": 0.6332606077194214, + "learning_rate": 0.00012685143208248484, + "loss": 2.6348, + "step": 8328 + }, + { + "epoch": 0.6721814219998385, + "grad_norm": 0.6826457977294922, + "learning_rate": 0.00012683622461307707, + "loss": 2.5092, + "step": 8329 + }, + { + "epoch": 0.6722621257364216, + "grad_norm": 0.7641614079475403, + "learning_rate": 0.00012682101647484042, + "loss": 2.7098, + "step": 8330 + }, + { + "epoch": 0.6723428294730046, + "grad_norm": 0.7153630256652832, + "learning_rate": 0.00012680580766815394, + "loss": 2.5647, + "step": 8331 + }, + { + "epoch": 0.6724235332095876, + "grad_norm": 0.6746379137039185, + "learning_rate": 0.00012679059819339664, + "loss": 2.6187, + "step": 8332 + }, + { + "epoch": 0.6725042369461706, + "grad_norm": 0.6748883128166199, + "learning_rate": 0.00012677538805094764, + "loss": 2.6045, + "step": 8333 + }, + { + "epoch": 0.6725849406827537, + "grad_norm": 0.7366370558738708, + "learning_rate": 0.00012676017724118596, + "loss": 2.5789, + "step": 8334 + }, + { + "epoch": 0.6726656444193366, + "grad_norm": 0.7381749153137207, + "learning_rate": 0.00012674496576449074, + "loss": 2.5958, + "step": 8335 + }, + { + "epoch": 0.6727463481559196, + "grad_norm": 0.7109243869781494, + "learning_rate": 0.00012672975362124103, + "loss": 2.5874, + "step": 8336 + }, + { + "epoch": 0.6728270518925026, + "grad_norm": 0.6904270052909851, + "learning_rate": 0.00012671454081181595, + "loss": 2.5891, + "step": 8337 + }, + { + "epoch": 0.6729077556290857, + "grad_norm": 0.6809365749359131, + "learning_rate": 0.00012669932733659476, + "loss": 2.5904, + "step": 8338 + }, + { + "epoch": 0.6729884593656686, + "grad_norm": 0.7527552843093872, + "learning_rate": 0.00012668411319595647, + "loss": 2.5602, + "step": 8339 + }, + { + "epoch": 0.6730691631022516, + "grad_norm": 0.6746577620506287, + "learning_rate": 0.00012666889839028038, + "loss": 2.5468, + "step": 8340 + }, + { + "epoch": 0.6731498668388346, + "grad_norm": 0.6904895305633545, + "learning_rate": 0.00012665368291994562, + "loss": 2.623, + "step": 8341 + }, + { + "epoch": 0.6732305705754177, + "grad_norm": 0.6495908498764038, + "learning_rate": 0.00012663846678533135, + "loss": 2.5843, + "step": 8342 + }, + { + "epoch": 0.6733112743120007, + "grad_norm": 0.6782342195510864, + "learning_rate": 0.00012662324998681692, + "loss": 2.6141, + "step": 8343 + }, + { + "epoch": 0.6733919780485836, + "grad_norm": 0.7090504765510559, + "learning_rate": 0.0001266080325247815, + "loss": 2.6654, + "step": 8344 + }, + { + "epoch": 0.6734726817851666, + "grad_norm": 0.7085515856742859, + "learning_rate": 0.00012659281439960434, + "loss": 2.5394, + "step": 8345 + }, + { + "epoch": 0.6735533855217497, + "grad_norm": 0.6813806295394897, + "learning_rate": 0.00012657759561166473, + "loss": 2.6522, + "step": 8346 + }, + { + "epoch": 0.6736340892583327, + "grad_norm": 0.726378858089447, + "learning_rate": 0.00012656237616134197, + "loss": 2.5922, + "step": 8347 + }, + { + "epoch": 0.6737147929949157, + "grad_norm": 0.6323714256286621, + "learning_rate": 0.00012654715604901534, + "loss": 2.4938, + "step": 8348 + }, + { + "epoch": 0.6737954967314986, + "grad_norm": 0.6925889253616333, + "learning_rate": 0.0001265319352750642, + "loss": 2.635, + "step": 8349 + }, + { + "epoch": 0.6738762004680817, + "grad_norm": 0.6676003932952881, + "learning_rate": 0.00012651671383986788, + "loss": 2.558, + "step": 8350 + }, + { + "epoch": 0.6739569042046647, + "grad_norm": 0.7464616298675537, + "learning_rate": 0.00012650149174380575, + "loss": 2.5777, + "step": 8351 + }, + { + "epoch": 0.6740376079412477, + "grad_norm": 0.6611667275428772, + "learning_rate": 0.00012648626898725715, + "loss": 2.5779, + "step": 8352 + }, + { + "epoch": 0.6741183116778307, + "grad_norm": 0.7391866445541382, + "learning_rate": 0.00012647104557060148, + "loss": 2.5624, + "step": 8353 + }, + { + "epoch": 0.6741990154144137, + "grad_norm": 0.7107826471328735, + "learning_rate": 0.00012645582149421817, + "loss": 2.5744, + "step": 8354 + }, + { + "epoch": 0.6742797191509967, + "grad_norm": 0.7385339736938477, + "learning_rate": 0.00012644059675848666, + "loss": 2.5752, + "step": 8355 + }, + { + "epoch": 0.6743604228875797, + "grad_norm": 0.6887345314025879, + "learning_rate": 0.00012642537136378634, + "loss": 2.5794, + "step": 8356 + }, + { + "epoch": 0.6744411266241627, + "grad_norm": 0.6934933662414551, + "learning_rate": 0.00012641014531049666, + "loss": 2.5361, + "step": 8357 + }, + { + "epoch": 0.6745218303607458, + "grad_norm": 0.7437291741371155, + "learning_rate": 0.00012639491859899716, + "loss": 2.5741, + "step": 8358 + }, + { + "epoch": 0.6746025340973287, + "grad_norm": 0.7088494896888733, + "learning_rate": 0.00012637969122966729, + "loss": 2.6449, + "step": 8359 + }, + { + "epoch": 0.6746832378339117, + "grad_norm": 0.7496390342712402, + "learning_rate": 0.00012636446320288654, + "loss": 2.6109, + "step": 8360 + }, + { + "epoch": 0.6747639415704947, + "grad_norm": 0.6949843764305115, + "learning_rate": 0.00012634923451903447, + "loss": 2.5769, + "step": 8361 + }, + { + "epoch": 0.6748446453070778, + "grad_norm": 0.7192673087120056, + "learning_rate": 0.00012633400517849056, + "loss": 2.6053, + "step": 8362 + }, + { + "epoch": 0.6749253490436607, + "grad_norm": 0.7003379464149475, + "learning_rate": 0.00012631877518163442, + "loss": 2.5745, + "step": 8363 + }, + { + "epoch": 0.6750060527802437, + "grad_norm": 0.7499879002571106, + "learning_rate": 0.00012630354452884563, + "loss": 2.6077, + "step": 8364 + }, + { + "epoch": 0.6750867565168267, + "grad_norm": 0.7047405242919922, + "learning_rate": 0.00012628831322050377, + "loss": 2.5955, + "step": 8365 + }, + { + "epoch": 0.6751674602534097, + "grad_norm": 0.7463203072547913, + "learning_rate": 0.00012627308125698838, + "loss": 2.5421, + "step": 8366 + }, + { + "epoch": 0.6752481639899928, + "grad_norm": 0.7377086877822876, + "learning_rate": 0.00012625784863867914, + "loss": 2.5804, + "step": 8367 + }, + { + "epoch": 0.6753288677265757, + "grad_norm": 0.7136400938034058, + "learning_rate": 0.00012624261536595566, + "loss": 2.5673, + "step": 8368 + }, + { + "epoch": 0.6754095714631587, + "grad_norm": 0.6923615336418152, + "learning_rate": 0.0001262273814391976, + "loss": 2.5832, + "step": 8369 + }, + { + "epoch": 0.6754902751997417, + "grad_norm": 0.7495028972625732, + "learning_rate": 0.00012621214685878469, + "loss": 2.5943, + "step": 8370 + }, + { + "epoch": 0.6755709789363248, + "grad_norm": 0.6751434206962585, + "learning_rate": 0.0001261969116250965, + "loss": 2.5495, + "step": 8371 + }, + { + "epoch": 0.6756516826729078, + "grad_norm": 0.7055973410606384, + "learning_rate": 0.00012618167573851284, + "loss": 2.5651, + "step": 8372 + }, + { + "epoch": 0.6757323864094907, + "grad_norm": 0.7479640245437622, + "learning_rate": 0.00012616643919941337, + "loss": 2.653, + "step": 8373 + }, + { + "epoch": 0.6758130901460737, + "grad_norm": 0.7075015902519226, + "learning_rate": 0.00012615120200817778, + "loss": 2.5787, + "step": 8374 + }, + { + "epoch": 0.6758937938826568, + "grad_norm": 0.7513934969902039, + "learning_rate": 0.00012613596416518593, + "loss": 2.6099, + "step": 8375 + }, + { + "epoch": 0.6759744976192398, + "grad_norm": 0.6742326021194458, + "learning_rate": 0.00012612072567081754, + "loss": 2.5335, + "step": 8376 + }, + { + "epoch": 0.6760552013558228, + "grad_norm": 0.7271459698677063, + "learning_rate": 0.00012610548652545239, + "loss": 2.6082, + "step": 8377 + }, + { + "epoch": 0.6761359050924057, + "grad_norm": 0.7481515407562256, + "learning_rate": 0.00012609024672947022, + "loss": 2.5805, + "step": 8378 + }, + { + "epoch": 0.6762166088289888, + "grad_norm": 0.7484803199768066, + "learning_rate": 0.00012607500628325093, + "loss": 2.6099, + "step": 8379 + }, + { + "epoch": 0.6762973125655718, + "grad_norm": 0.7462390661239624, + "learning_rate": 0.00012605976518717435, + "loss": 2.6054, + "step": 8380 + }, + { + "epoch": 0.6763780163021548, + "grad_norm": 0.7014410495758057, + "learning_rate": 0.00012604452344162028, + "loss": 2.5614, + "step": 8381 + }, + { + "epoch": 0.6764587200387377, + "grad_norm": 0.6902963519096375, + "learning_rate": 0.0001260292810469686, + "loss": 2.5813, + "step": 8382 + }, + { + "epoch": 0.6765394237753208, + "grad_norm": 0.6646186113357544, + "learning_rate": 0.00012601403800359919, + "loss": 2.545, + "step": 8383 + }, + { + "epoch": 0.6766201275119038, + "grad_norm": 0.7067462801933289, + "learning_rate": 0.00012599879431189197, + "loss": 2.6195, + "step": 8384 + }, + { + "epoch": 0.6767008312484868, + "grad_norm": 0.7263965010643005, + "learning_rate": 0.0001259835499722268, + "loss": 2.5929, + "step": 8385 + }, + { + "epoch": 0.6767815349850698, + "grad_norm": 0.6672000885009766, + "learning_rate": 0.0001259683049849837, + "loss": 2.5561, + "step": 8386 + }, + { + "epoch": 0.6768622387216529, + "grad_norm": 0.6543236374855042, + "learning_rate": 0.0001259530593505425, + "loss": 2.6256, + "step": 8387 + }, + { + "epoch": 0.6769429424582358, + "grad_norm": 0.6532339453697205, + "learning_rate": 0.00012593781306928324, + "loss": 2.5074, + "step": 8388 + }, + { + "epoch": 0.6770236461948188, + "grad_norm": 0.7442833185195923, + "learning_rate": 0.00012592256614158591, + "loss": 2.6124, + "step": 8389 + }, + { + "epoch": 0.6771043499314018, + "grad_norm": 0.786685585975647, + "learning_rate": 0.00012590731856783043, + "loss": 2.6077, + "step": 8390 + }, + { + "epoch": 0.6771850536679849, + "grad_norm": 0.7952337265014648, + "learning_rate": 0.00012589207034839687, + "loss": 2.5894, + "step": 8391 + }, + { + "epoch": 0.6772657574045678, + "grad_norm": 0.7847954034805298, + "learning_rate": 0.00012587682148366524, + "loss": 2.4934, + "step": 8392 + }, + { + "epoch": 0.6773464611411508, + "grad_norm": 0.6769007444381714, + "learning_rate": 0.00012586157197401552, + "loss": 2.5695, + "step": 8393 + }, + { + "epoch": 0.6774271648777338, + "grad_norm": 0.6583757996559143, + "learning_rate": 0.00012584632181982788, + "loss": 2.5866, + "step": 8394 + }, + { + "epoch": 0.6775078686143169, + "grad_norm": 0.7375823855400085, + "learning_rate": 0.0001258310710214823, + "loss": 2.5141, + "step": 8395 + }, + { + "epoch": 0.6775885723508999, + "grad_norm": 0.6901078224182129, + "learning_rate": 0.00012581581957935896, + "loss": 2.5732, + "step": 8396 + }, + { + "epoch": 0.6776692760874828, + "grad_norm": 0.687152624130249, + "learning_rate": 0.0001258005674938379, + "loss": 2.5916, + "step": 8397 + }, + { + "epoch": 0.6777499798240658, + "grad_norm": 0.7198586463928223, + "learning_rate": 0.00012578531476529917, + "loss": 2.5626, + "step": 8398 + }, + { + "epoch": 0.6778306835606489, + "grad_norm": 0.7417474985122681, + "learning_rate": 0.00012577006139412309, + "loss": 2.5486, + "step": 8399 + }, + { + "epoch": 0.6779113872972319, + "grad_norm": 0.6588087677955627, + "learning_rate": 0.0001257548073806897, + "loss": 2.6123, + "step": 8400 + }, + { + "epoch": 0.6779920910338149, + "grad_norm": 0.7211382389068604, + "learning_rate": 0.00012573955272537915, + "loss": 2.6402, + "step": 8401 + }, + { + "epoch": 0.6780727947703978, + "grad_norm": 0.7196084856987, + "learning_rate": 0.00012572429742857167, + "loss": 2.51, + "step": 8402 + }, + { + "epoch": 0.6781534985069809, + "grad_norm": 0.6399394273757935, + "learning_rate": 0.00012570904149064748, + "loss": 2.5309, + "step": 8403 + }, + { + "epoch": 0.6782342022435639, + "grad_norm": 0.6969572305679321, + "learning_rate": 0.00012569378491198674, + "loss": 2.5829, + "step": 8404 + }, + { + "epoch": 0.6783149059801469, + "grad_norm": 0.8005492091178894, + "learning_rate": 0.00012567852769296975, + "loss": 2.6277, + "step": 8405 + }, + { + "epoch": 0.6783956097167299, + "grad_norm": 0.6786207556724548, + "learning_rate": 0.0001256632698339767, + "loss": 2.5839, + "step": 8406 + }, + { + "epoch": 0.6784763134533129, + "grad_norm": 0.7047130465507507, + "learning_rate": 0.0001256480113353879, + "loss": 2.533, + "step": 8407 + }, + { + "epoch": 0.6785570171898959, + "grad_norm": 0.7640479803085327, + "learning_rate": 0.0001256327521975836, + "loss": 2.5855, + "step": 8408 + }, + { + "epoch": 0.6786377209264789, + "grad_norm": 0.728111207485199, + "learning_rate": 0.00012561749242094412, + "loss": 2.6184, + "step": 8409 + }, + { + "epoch": 0.6787184246630619, + "grad_norm": 0.7842772603034973, + "learning_rate": 0.00012560223200584975, + "loss": 2.5915, + "step": 8410 + }, + { + "epoch": 0.678799128399645, + "grad_norm": 0.7129092812538147, + "learning_rate": 0.00012558697095268085, + "loss": 2.6526, + "step": 8411 + }, + { + "epoch": 0.6788798321362279, + "grad_norm": 0.751103401184082, + "learning_rate": 0.00012557170926181773, + "loss": 2.605, + "step": 8412 + }, + { + "epoch": 0.6789605358728109, + "grad_norm": 0.6850594878196716, + "learning_rate": 0.0001255564469336408, + "loss": 2.6047, + "step": 8413 + }, + { + "epoch": 0.6790412396093939, + "grad_norm": 0.703037679195404, + "learning_rate": 0.00012554118396853036, + "loss": 2.653, + "step": 8414 + }, + { + "epoch": 0.6791219433459769, + "grad_norm": 0.8097915053367615, + "learning_rate": 0.0001255259203668669, + "loss": 2.5937, + "step": 8415 + }, + { + "epoch": 0.67920264708256, + "grad_norm": 0.700351357460022, + "learning_rate": 0.00012551065612903076, + "loss": 2.6089, + "step": 8416 + }, + { + "epoch": 0.6792833508191429, + "grad_norm": 0.6760888695716858, + "learning_rate": 0.00012549539125540236, + "loss": 2.547, + "step": 8417 + }, + { + "epoch": 0.6793640545557259, + "grad_norm": 0.6751723289489746, + "learning_rate": 0.0001254801257463622, + "loss": 2.625, + "step": 8418 + }, + { + "epoch": 0.6794447582923089, + "grad_norm": 0.6928921937942505, + "learning_rate": 0.00012546485960229065, + "loss": 2.5671, + "step": 8419 + }, + { + "epoch": 0.679525462028892, + "grad_norm": 0.6541565656661987, + "learning_rate": 0.0001254495928235683, + "loss": 2.5837, + "step": 8420 + }, + { + "epoch": 0.679606165765475, + "grad_norm": 0.6228676438331604, + "learning_rate": 0.00012543432541057555, + "loss": 2.5798, + "step": 8421 + }, + { + "epoch": 0.6796868695020579, + "grad_norm": 0.7620853185653687, + "learning_rate": 0.0001254190573636929, + "loss": 2.5885, + "step": 8422 + }, + { + "epoch": 0.6797675732386409, + "grad_norm": 0.7425604462623596, + "learning_rate": 0.0001254037886833009, + "loss": 2.6124, + "step": 8423 + }, + { + "epoch": 0.679848276975224, + "grad_norm": 0.7150974273681641, + "learning_rate": 0.0001253885193697801, + "loss": 2.5423, + "step": 8424 + }, + { + "epoch": 0.679928980711807, + "grad_norm": 0.672649621963501, + "learning_rate": 0.000125373249423511, + "loss": 2.5563, + "step": 8425 + }, + { + "epoch": 0.6800096844483899, + "grad_norm": 0.6913620829582214, + "learning_rate": 0.00012535797884487425, + "loss": 2.5261, + "step": 8426 + }, + { + "epoch": 0.6800903881849729, + "grad_norm": 0.712123692035675, + "learning_rate": 0.00012534270763425034, + "loss": 2.5958, + "step": 8427 + }, + { + "epoch": 0.680171091921556, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00012532743579201993, + "loss": 2.6036, + "step": 8428 + }, + { + "epoch": 0.680251795658139, + "grad_norm": 0.7108714580535889, + "learning_rate": 0.0001253121633185636, + "loss": 2.6004, + "step": 8429 + }, + { + "epoch": 0.680332499394722, + "grad_norm": 0.7142449021339417, + "learning_rate": 0.00012529689021426198, + "loss": 2.588, + "step": 8430 + }, + { + "epoch": 0.6804132031313049, + "grad_norm": 0.7579841017723083, + "learning_rate": 0.00012528161647949574, + "loss": 2.5927, + "step": 8431 + }, + { + "epoch": 0.680493906867888, + "grad_norm": 0.6522083878517151, + "learning_rate": 0.00012526634211464555, + "loss": 2.5619, + "step": 8432 + }, + { + "epoch": 0.680574610604471, + "grad_norm": 0.7681782245635986, + "learning_rate": 0.00012525106712009203, + "loss": 2.6065, + "step": 8433 + }, + { + "epoch": 0.680655314341054, + "grad_norm": 0.6900169253349304, + "learning_rate": 0.00012523579149621594, + "loss": 2.5507, + "step": 8434 + }, + { + "epoch": 0.680736018077637, + "grad_norm": 0.6907666325569153, + "learning_rate": 0.00012522051524339794, + "loss": 2.5213, + "step": 8435 + }, + { + "epoch": 0.68081672181422, + "grad_norm": 0.7202023267745972, + "learning_rate": 0.0001252052383620188, + "loss": 2.6367, + "step": 8436 + }, + { + "epoch": 0.680897425550803, + "grad_norm": 0.7893621325492859, + "learning_rate": 0.00012518996085245925, + "loss": 2.6066, + "step": 8437 + }, + { + "epoch": 0.680978129287386, + "grad_norm": 0.7693532109260559, + "learning_rate": 0.00012517468271509998, + "loss": 2.5346, + "step": 8438 + }, + { + "epoch": 0.681058833023969, + "grad_norm": 0.7976840734481812, + "learning_rate": 0.0001251594039503218, + "loss": 2.5991, + "step": 8439 + }, + { + "epoch": 0.6811395367605521, + "grad_norm": 0.7671225666999817, + "learning_rate": 0.00012514412455850554, + "loss": 2.5959, + "step": 8440 + }, + { + "epoch": 0.681220240497135, + "grad_norm": 0.7143450975418091, + "learning_rate": 0.00012512884454003194, + "loss": 2.5828, + "step": 8441 + }, + { + "epoch": 0.681300944233718, + "grad_norm": 0.6821861863136292, + "learning_rate": 0.00012511356389528192, + "loss": 2.5908, + "step": 8442 + }, + { + "epoch": 0.681381647970301, + "grad_norm": 0.7279960513114929, + "learning_rate": 0.00012509828262463615, + "loss": 2.578, + "step": 8443 + }, + { + "epoch": 0.6814623517068841, + "grad_norm": 0.6503065824508667, + "learning_rate": 0.0001250830007284756, + "loss": 2.525, + "step": 8444 + }, + { + "epoch": 0.681543055443467, + "grad_norm": 0.7276029586791992, + "learning_rate": 0.00012506771820718112, + "loss": 2.584, + "step": 8445 + }, + { + "epoch": 0.68162375918005, + "grad_norm": 0.7635578513145447, + "learning_rate": 0.00012505243506113356, + "loss": 2.627, + "step": 8446 + }, + { + "epoch": 0.681704462916633, + "grad_norm": 0.7086981534957886, + "learning_rate": 0.00012503715129071386, + "loss": 2.6164, + "step": 8447 + }, + { + "epoch": 0.6817851666532161, + "grad_norm": 0.7144165635108948, + "learning_rate": 0.00012502186689630285, + "loss": 2.5642, + "step": 8448 + }, + { + "epoch": 0.6818658703897991, + "grad_norm": 0.8135093450546265, + "learning_rate": 0.00012500658187828155, + "loss": 2.6161, + "step": 8449 + }, + { + "epoch": 0.681946574126382, + "grad_norm": 0.7223377227783203, + "learning_rate": 0.00012499129623703086, + "loss": 2.6192, + "step": 8450 + }, + { + "epoch": 0.682027277862965, + "grad_norm": 0.7189127206802368, + "learning_rate": 0.00012497600997293172, + "loss": 2.6086, + "step": 8451 + }, + { + "epoch": 0.6821079815995481, + "grad_norm": 0.6742144823074341, + "learning_rate": 0.00012496072308636514, + "loss": 2.5747, + "step": 8452 + }, + { + "epoch": 0.6821886853361311, + "grad_norm": 0.7432419657707214, + "learning_rate": 0.0001249454355777121, + "loss": 2.5687, + "step": 8453 + }, + { + "epoch": 0.6822693890727141, + "grad_norm": 0.6140317320823669, + "learning_rate": 0.00012493014744735357, + "loss": 2.5371, + "step": 8454 + }, + { + "epoch": 0.682350092809297, + "grad_norm": 0.7215768098831177, + "learning_rate": 0.0001249148586956706, + "loss": 2.6806, + "step": 8455 + }, + { + "epoch": 0.6824307965458801, + "grad_norm": 0.7485790252685547, + "learning_rate": 0.0001248995693230442, + "loss": 2.575, + "step": 8456 + }, + { + "epoch": 0.6825115002824631, + "grad_norm": 0.744349479675293, + "learning_rate": 0.00012488427932985552, + "loss": 2.5961, + "step": 8457 + }, + { + "epoch": 0.6825922040190461, + "grad_norm": 0.6784959435462952, + "learning_rate": 0.0001248689887164855, + "loss": 2.5501, + "step": 8458 + }, + { + "epoch": 0.682672907755629, + "grad_norm": 0.6664010286331177, + "learning_rate": 0.0001248536974833153, + "loss": 2.5741, + "step": 8459 + }, + { + "epoch": 0.6827536114922121, + "grad_norm": 0.7185953259468079, + "learning_rate": 0.00012483840563072592, + "loss": 2.5875, + "step": 8460 + }, + { + "epoch": 0.6828343152287951, + "grad_norm": 0.6553035378456116, + "learning_rate": 0.00012482311315909864, + "loss": 2.5321, + "step": 8461 + }, + { + "epoch": 0.6829150189653781, + "grad_norm": 0.6713398694992065, + "learning_rate": 0.00012480782006881442, + "loss": 2.6207, + "step": 8462 + }, + { + "epoch": 0.6829957227019611, + "grad_norm": 0.6733734607696533, + "learning_rate": 0.00012479252636025452, + "loss": 2.5746, + "step": 8463 + }, + { + "epoch": 0.6830764264385442, + "grad_norm": 0.7257994413375854, + "learning_rate": 0.00012477723203380004, + "loss": 2.5837, + "step": 8464 + }, + { + "epoch": 0.6831571301751271, + "grad_norm": 0.716242253780365, + "learning_rate": 0.00012476193708983214, + "loss": 2.5611, + "step": 8465 + }, + { + "epoch": 0.6832378339117101, + "grad_norm": 0.6797829866409302, + "learning_rate": 0.0001247466415287321, + "loss": 2.5763, + "step": 8466 + }, + { + "epoch": 0.6833185376482931, + "grad_norm": 0.679931640625, + "learning_rate": 0.000124731345350881, + "loss": 2.606, + "step": 8467 + }, + { + "epoch": 0.6833992413848761, + "grad_norm": 0.6767866611480713, + "learning_rate": 0.00012471604855666016, + "loss": 2.5682, + "step": 8468 + }, + { + "epoch": 0.6834799451214592, + "grad_norm": 0.7297048568725586, + "learning_rate": 0.00012470075114645078, + "loss": 2.5527, + "step": 8469 + }, + { + "epoch": 0.6835606488580421, + "grad_norm": 0.6882644295692444, + "learning_rate": 0.0001246854531206341, + "loss": 2.5712, + "step": 8470 + }, + { + "epoch": 0.6836413525946251, + "grad_norm": 0.7129159569740295, + "learning_rate": 0.00012467015447959143, + "loss": 2.5627, + "step": 8471 + }, + { + "epoch": 0.6837220563312081, + "grad_norm": 0.6671481728553772, + "learning_rate": 0.000124654855223704, + "loss": 2.6226, + "step": 8472 + }, + { + "epoch": 0.6838027600677912, + "grad_norm": 0.7096946835517883, + "learning_rate": 0.00012463955535335313, + "loss": 2.5373, + "step": 8473 + }, + { + "epoch": 0.6838834638043741, + "grad_norm": 0.6781395077705383, + "learning_rate": 0.00012462425486892012, + "loss": 2.5607, + "step": 8474 + }, + { + "epoch": 0.6839641675409571, + "grad_norm": 0.6777891516685486, + "learning_rate": 0.00012460895377078632, + "loss": 2.5991, + "step": 8475 + }, + { + "epoch": 0.6840448712775401, + "grad_norm": 0.7175275087356567, + "learning_rate": 0.00012459365205933306, + "loss": 2.6006, + "step": 8476 + }, + { + "epoch": 0.6841255750141232, + "grad_norm": 0.6832807660102844, + "learning_rate": 0.00012457834973494174, + "loss": 2.5757, + "step": 8477 + }, + { + "epoch": 0.6842062787507062, + "grad_norm": 0.7002938985824585, + "learning_rate": 0.00012456304679799366, + "loss": 2.554, + "step": 8478 + }, + { + "epoch": 0.6842869824872891, + "grad_norm": 0.7236241698265076, + "learning_rate": 0.00012454774324887027, + "loss": 2.6054, + "step": 8479 + }, + { + "epoch": 0.6843676862238721, + "grad_norm": 0.7327216267585754, + "learning_rate": 0.00012453243908795288, + "loss": 2.6101, + "step": 8480 + }, + { + "epoch": 0.6844483899604552, + "grad_norm": 0.7414156794548035, + "learning_rate": 0.00012451713431562306, + "loss": 2.5505, + "step": 8481 + }, + { + "epoch": 0.6845290936970382, + "grad_norm": 0.697795569896698, + "learning_rate": 0.00012450182893226214, + "loss": 2.539, + "step": 8482 + }, + { + "epoch": 0.6846097974336212, + "grad_norm": 0.7053593397140503, + "learning_rate": 0.00012448652293825158, + "loss": 2.6045, + "step": 8483 + }, + { + "epoch": 0.6846905011702041, + "grad_norm": 0.6710856556892395, + "learning_rate": 0.00012447121633397287, + "loss": 2.554, + "step": 8484 + }, + { + "epoch": 0.6847712049067872, + "grad_norm": 0.754454493522644, + "learning_rate": 0.0001244559091198075, + "loss": 2.5523, + "step": 8485 + }, + { + "epoch": 0.6848519086433702, + "grad_norm": 0.6468656659126282, + "learning_rate": 0.0001244406012961369, + "loss": 2.5931, + "step": 8486 + }, + { + "epoch": 0.6849326123799532, + "grad_norm": 0.7169063091278076, + "learning_rate": 0.00012442529286334266, + "loss": 2.5743, + "step": 8487 + }, + { + "epoch": 0.6850133161165362, + "grad_norm": 0.6737040877342224, + "learning_rate": 0.00012440998382180627, + "loss": 2.5734, + "step": 8488 + }, + { + "epoch": 0.6850940198531192, + "grad_norm": 0.7026428580284119, + "learning_rate": 0.0001243946741719093, + "loss": 2.4994, + "step": 8489 + }, + { + "epoch": 0.6851747235897022, + "grad_norm": 0.7378512024879456, + "learning_rate": 0.00012437936391403322, + "loss": 2.5611, + "step": 8490 + }, + { + "epoch": 0.6852554273262852, + "grad_norm": 0.7379863262176514, + "learning_rate": 0.0001243640530485597, + "loss": 2.538, + "step": 8491 + }, + { + "epoch": 0.6853361310628682, + "grad_norm": 0.68398118019104, + "learning_rate": 0.00012434874157587027, + "loss": 2.5593, + "step": 8492 + }, + { + "epoch": 0.6854168347994513, + "grad_norm": 0.6780444383621216, + "learning_rate": 0.0001243334294963466, + "loss": 2.5068, + "step": 8493 + }, + { + "epoch": 0.6854975385360342, + "grad_norm": 0.7425427436828613, + "learning_rate": 0.0001243181168103702, + "loss": 2.6607, + "step": 8494 + }, + { + "epoch": 0.6855782422726172, + "grad_norm": 0.7563300132751465, + "learning_rate": 0.0001243028035183228, + "loss": 2.5915, + "step": 8495 + }, + { + "epoch": 0.6856589460092002, + "grad_norm": 0.6746618151664734, + "learning_rate": 0.000124287489620586, + "loss": 2.5399, + "step": 8496 + }, + { + "epoch": 0.6857396497457833, + "grad_norm": 0.7100487947463989, + "learning_rate": 0.00012427217511754146, + "loss": 2.5927, + "step": 8497 + }, + { + "epoch": 0.6858203534823663, + "grad_norm": 0.6487080454826355, + "learning_rate": 0.00012425686000957088, + "loss": 2.5582, + "step": 8498 + }, + { + "epoch": 0.6859010572189492, + "grad_norm": 0.6577199697494507, + "learning_rate": 0.00012424154429705592, + "loss": 2.5589, + "step": 8499 + }, + { + "epoch": 0.6859817609555322, + "grad_norm": 0.6748726963996887, + "learning_rate": 0.00012422622798037832, + "loss": 2.5651, + "step": 8500 + }, + { + "epoch": 0.6860624646921153, + "grad_norm": 0.7159377336502075, + "learning_rate": 0.0001242109110599198, + "loss": 2.569, + "step": 8501 + }, + { + "epoch": 0.6861431684286983, + "grad_norm": 0.6772934198379517, + "learning_rate": 0.00012419559353606208, + "loss": 2.5533, + "step": 8502 + }, + { + "epoch": 0.6862238721652812, + "grad_norm": 0.6776062846183777, + "learning_rate": 0.00012418027540918693, + "loss": 2.5704, + "step": 8503 + }, + { + "epoch": 0.6863045759018642, + "grad_norm": 0.7009913921356201, + "learning_rate": 0.00012416495667967608, + "loss": 2.5928, + "step": 8504 + }, + { + "epoch": 0.6863852796384473, + "grad_norm": 0.607571005821228, + "learning_rate": 0.00012414963734791137, + "loss": 2.5459, + "step": 8505 + }, + { + "epoch": 0.6864659833750303, + "grad_norm": 0.6798292398452759, + "learning_rate": 0.00012413431741427458, + "loss": 2.6585, + "step": 8506 + }, + { + "epoch": 0.6865466871116133, + "grad_norm": 0.7892771363258362, + "learning_rate": 0.00012411899687914747, + "loss": 2.5781, + "step": 8507 + }, + { + "epoch": 0.6866273908481962, + "grad_norm": 0.6683816909790039, + "learning_rate": 0.00012410367574291199, + "loss": 2.5598, + "step": 8508 + }, + { + "epoch": 0.6867080945847793, + "grad_norm": 0.7591805458068848, + "learning_rate": 0.00012408835400594983, + "loss": 2.6478, + "step": 8509 + }, + { + "epoch": 0.6867887983213623, + "grad_norm": 0.6896353960037231, + "learning_rate": 0.00012407303166864293, + "loss": 2.5418, + "step": 8510 + }, + { + "epoch": 0.6868695020579453, + "grad_norm": 0.6657233834266663, + "learning_rate": 0.00012405770873137316, + "loss": 2.5753, + "step": 8511 + }, + { + "epoch": 0.6869502057945283, + "grad_norm": 0.6775455474853516, + "learning_rate": 0.00012404238519452237, + "loss": 2.4902, + "step": 8512 + }, + { + "epoch": 0.6870309095311113, + "grad_norm": 0.6572847962379456, + "learning_rate": 0.00012402706105847254, + "loss": 2.6189, + "step": 8513 + }, + { + "epoch": 0.6871116132676943, + "grad_norm": 0.7159940004348755, + "learning_rate": 0.00012401173632360557, + "loss": 2.5928, + "step": 8514 + }, + { + "epoch": 0.6871923170042773, + "grad_norm": 0.7178850173950195, + "learning_rate": 0.0001239964109903033, + "loss": 2.5342, + "step": 8515 + }, + { + "epoch": 0.6872730207408603, + "grad_norm": 0.6761649250984192, + "learning_rate": 0.00012398108505894774, + "loss": 2.5716, + "step": 8516 + }, + { + "epoch": 0.6873537244774433, + "grad_norm": 0.6831200122833252, + "learning_rate": 0.0001239657585299209, + "loss": 2.5506, + "step": 8517 + }, + { + "epoch": 0.6874344282140263, + "grad_norm": 0.7064316868782043, + "learning_rate": 0.00012395043140360468, + "loss": 2.541, + "step": 8518 + }, + { + "epoch": 0.6875151319506093, + "grad_norm": 0.7269963026046753, + "learning_rate": 0.00012393510368038113, + "loss": 2.541, + "step": 8519 + }, + { + "epoch": 0.6875958356871923, + "grad_norm": 0.6651471257209778, + "learning_rate": 0.00012391977536063218, + "loss": 2.5476, + "step": 8520 + }, + { + "epoch": 0.6876765394237753, + "grad_norm": 0.7649257779121399, + "learning_rate": 0.00012390444644473994, + "loss": 2.601, + "step": 8521 + }, + { + "epoch": 0.6877572431603584, + "grad_norm": 0.6637376546859741, + "learning_rate": 0.0001238891169330864, + "loss": 2.5582, + "step": 8522 + }, + { + "epoch": 0.6878379468969413, + "grad_norm": 0.6609189510345459, + "learning_rate": 0.0001238737868260536, + "loss": 2.5795, + "step": 8523 + }, + { + "epoch": 0.6879186506335243, + "grad_norm": 0.657494843006134, + "learning_rate": 0.00012385845612402363, + "loss": 2.6005, + "step": 8524 + }, + { + "epoch": 0.6879993543701073, + "grad_norm": 0.6780641674995422, + "learning_rate": 0.00012384312482737858, + "loss": 2.514, + "step": 8525 + }, + { + "epoch": 0.6880800581066904, + "grad_norm": 0.7310795187950134, + "learning_rate": 0.00012382779293650052, + "loss": 2.5707, + "step": 8526 + }, + { + "epoch": 0.6881607618432733, + "grad_norm": 0.6722557544708252, + "learning_rate": 0.0001238124604517716, + "loss": 2.5897, + "step": 8527 + }, + { + "epoch": 0.6882414655798563, + "grad_norm": 0.6502346992492676, + "learning_rate": 0.0001237971273735739, + "loss": 2.5554, + "step": 8528 + }, + { + "epoch": 0.6883221693164393, + "grad_norm": 0.6993897557258606, + "learning_rate": 0.0001237817937022896, + "loss": 2.6328, + "step": 8529 + }, + { + "epoch": 0.6884028730530224, + "grad_norm": 0.7069644331932068, + "learning_rate": 0.00012376645943830083, + "loss": 2.5957, + "step": 8530 + }, + { + "epoch": 0.6884835767896054, + "grad_norm": 0.7193333506584167, + "learning_rate": 0.00012375112458198973, + "loss": 2.6505, + "step": 8531 + }, + { + "epoch": 0.6885642805261883, + "grad_norm": 0.6821088194847107, + "learning_rate": 0.00012373578913373853, + "loss": 2.6129, + "step": 8532 + }, + { + "epoch": 0.6886449842627713, + "grad_norm": 0.6499428749084473, + "learning_rate": 0.00012372045309392947, + "loss": 2.6053, + "step": 8533 + }, + { + "epoch": 0.6887256879993544, + "grad_norm": 0.7469449639320374, + "learning_rate": 0.00012370511646294464, + "loss": 2.6423, + "step": 8534 + }, + { + "epoch": 0.6888063917359374, + "grad_norm": 0.7326325178146362, + "learning_rate": 0.00012368977924116637, + "loss": 2.5708, + "step": 8535 + }, + { + "epoch": 0.6888870954725204, + "grad_norm": 0.7459580302238464, + "learning_rate": 0.00012367444142897686, + "loss": 2.544, + "step": 8536 + }, + { + "epoch": 0.6889677992091033, + "grad_norm": 0.7198929786682129, + "learning_rate": 0.00012365910302675843, + "loss": 2.6295, + "step": 8537 + }, + { + "epoch": 0.6890485029456864, + "grad_norm": 0.8139802813529968, + "learning_rate": 0.0001236437640348933, + "loss": 2.549, + "step": 8538 + }, + { + "epoch": 0.6891292066822694, + "grad_norm": 0.6497162580490112, + "learning_rate": 0.00012362842445376372, + "loss": 2.5849, + "step": 8539 + }, + { + "epoch": 0.6892099104188524, + "grad_norm": 0.7378165125846863, + "learning_rate": 0.00012361308428375208, + "loss": 2.606, + "step": 8540 + }, + { + "epoch": 0.6892906141554354, + "grad_norm": 0.6807567477226257, + "learning_rate": 0.00012359774352524062, + "loss": 2.5892, + "step": 8541 + }, + { + "epoch": 0.6893713178920184, + "grad_norm": 0.6639370918273926, + "learning_rate": 0.0001235824021786117, + "loss": 2.5249, + "step": 8542 + }, + { + "epoch": 0.6894520216286014, + "grad_norm": 0.7140880823135376, + "learning_rate": 0.00012356706024424773, + "loss": 2.5877, + "step": 8543 + }, + { + "epoch": 0.6895327253651844, + "grad_norm": 0.7079257965087891, + "learning_rate": 0.00012355171772253097, + "loss": 2.6011, + "step": 8544 + }, + { + "epoch": 0.6896134291017674, + "grad_norm": 0.7150856852531433, + "learning_rate": 0.00012353637461384387, + "loss": 2.549, + "step": 8545 + }, + { + "epoch": 0.6896941328383505, + "grad_norm": 0.6896397471427917, + "learning_rate": 0.00012352103091856876, + "loss": 2.5452, + "step": 8546 + }, + { + "epoch": 0.6897748365749334, + "grad_norm": 0.696964681148529, + "learning_rate": 0.00012350568663708808, + "loss": 2.5075, + "step": 8547 + }, + { + "epoch": 0.6898555403115164, + "grad_norm": 0.6926069855690002, + "learning_rate": 0.00012349034176978427, + "loss": 2.5905, + "step": 8548 + }, + { + "epoch": 0.6899362440480994, + "grad_norm": 0.6949423551559448, + "learning_rate": 0.00012347499631703968, + "loss": 2.5284, + "step": 8549 + }, + { + "epoch": 0.6900169477846825, + "grad_norm": 0.6480536460876465, + "learning_rate": 0.0001234596502792369, + "loss": 2.5713, + "step": 8550 + }, + { + "epoch": 0.6900976515212655, + "grad_norm": 0.6990019679069519, + "learning_rate": 0.00012344430365675825, + "loss": 2.5826, + "step": 8551 + }, + { + "epoch": 0.6901783552578484, + "grad_norm": 0.7063903212547302, + "learning_rate": 0.00012342895644998627, + "loss": 2.5271, + "step": 8552 + }, + { + "epoch": 0.6902590589944314, + "grad_norm": 0.7037132978439331, + "learning_rate": 0.0001234136086593035, + "loss": 2.5855, + "step": 8553 + }, + { + "epoch": 0.6903397627310145, + "grad_norm": 0.679701030254364, + "learning_rate": 0.00012339826028509235, + "loss": 2.5577, + "step": 8554 + }, + { + "epoch": 0.6904204664675975, + "grad_norm": 0.7088965773582458, + "learning_rate": 0.0001233829113277354, + "loss": 2.5767, + "step": 8555 + }, + { + "epoch": 0.6905011702041804, + "grad_norm": 0.7115551829338074, + "learning_rate": 0.00012336756178761517, + "loss": 2.5651, + "step": 8556 + }, + { + "epoch": 0.6905818739407634, + "grad_norm": 0.6778836250305176, + "learning_rate": 0.00012335221166511425, + "loss": 2.6388, + "step": 8557 + }, + { + "epoch": 0.6906625776773465, + "grad_norm": 0.6358879804611206, + "learning_rate": 0.00012333686096061515, + "loss": 2.5493, + "step": 8558 + }, + { + "epoch": 0.6907432814139295, + "grad_norm": 0.688197135925293, + "learning_rate": 0.00012332150967450046, + "loss": 2.5707, + "step": 8559 + }, + { + "epoch": 0.6908239851505125, + "grad_norm": 0.6931524872779846, + "learning_rate": 0.0001233061578071528, + "loss": 2.5561, + "step": 8560 + }, + { + "epoch": 0.6909046888870954, + "grad_norm": 0.6684975624084473, + "learning_rate": 0.00012329080535895478, + "loss": 2.6442, + "step": 8561 + }, + { + "epoch": 0.6909853926236785, + "grad_norm": 0.6865811347961426, + "learning_rate": 0.00012327545233028898, + "loss": 2.564, + "step": 8562 + }, + { + "epoch": 0.6910660963602615, + "grad_norm": 0.6999006867408752, + "learning_rate": 0.0001232600987215381, + "loss": 2.5607, + "step": 8563 + }, + { + "epoch": 0.6911468000968445, + "grad_norm": 0.6734526753425598, + "learning_rate": 0.0001232447445330847, + "loss": 2.5261, + "step": 8564 + }, + { + "epoch": 0.6912275038334275, + "grad_norm": 0.7447343468666077, + "learning_rate": 0.00012322938976531153, + "loss": 2.5359, + "step": 8565 + }, + { + "epoch": 0.6913082075700105, + "grad_norm": 0.6498517394065857, + "learning_rate": 0.00012321403441860126, + "loss": 2.5345, + "step": 8566 + }, + { + "epoch": 0.6913889113065935, + "grad_norm": 0.692933976650238, + "learning_rate": 0.00012319867849333658, + "loss": 2.6293, + "step": 8567 + }, + { + "epoch": 0.6914696150431765, + "grad_norm": 0.728430449962616, + "learning_rate": 0.00012318332198990015, + "loss": 2.618, + "step": 8568 + }, + { + "epoch": 0.6915503187797595, + "grad_norm": 0.7029061913490295, + "learning_rate": 0.00012316796490867478, + "loss": 2.6151, + "step": 8569 + }, + { + "epoch": 0.6916310225163425, + "grad_norm": 0.6692330241203308, + "learning_rate": 0.00012315260725004313, + "loss": 2.5511, + "step": 8570 + }, + { + "epoch": 0.6917117262529255, + "grad_norm": 0.6811983585357666, + "learning_rate": 0.000123137249014388, + "loss": 2.6337, + "step": 8571 + }, + { + "epoch": 0.6917924299895085, + "grad_norm": 0.7387441992759705, + "learning_rate": 0.00012312189020209212, + "loss": 2.5679, + "step": 8572 + }, + { + "epoch": 0.6918731337260915, + "grad_norm": 0.7180185914039612, + "learning_rate": 0.0001231065308135383, + "loss": 2.639, + "step": 8573 + }, + { + "epoch": 0.6919538374626745, + "grad_norm": 0.6997829079627991, + "learning_rate": 0.00012309117084910936, + "loss": 2.5392, + "step": 8574 + }, + { + "epoch": 0.6920345411992576, + "grad_norm": 0.7004552483558655, + "learning_rate": 0.00012307581030918807, + "loss": 2.6033, + "step": 8575 + }, + { + "epoch": 0.6921152449358405, + "grad_norm": 0.7183418273925781, + "learning_rate": 0.00012306044919415724, + "loss": 2.6302, + "step": 8576 + }, + { + "epoch": 0.6921959486724235, + "grad_norm": 0.6645712852478027, + "learning_rate": 0.00012304508750439976, + "loss": 2.5401, + "step": 8577 + }, + { + "epoch": 0.6922766524090065, + "grad_norm": 0.6455898284912109, + "learning_rate": 0.00012302972524029848, + "loss": 2.5084, + "step": 8578 + }, + { + "epoch": 0.6923573561455896, + "grad_norm": 0.6933849453926086, + "learning_rate": 0.00012301436240223622, + "loss": 2.5734, + "step": 8579 + }, + { + "epoch": 0.6924380598821726, + "grad_norm": 0.7967655658721924, + "learning_rate": 0.00012299899899059587, + "loss": 2.5721, + "step": 8580 + }, + { + "epoch": 0.6925187636187555, + "grad_norm": 0.706730306148529, + "learning_rate": 0.0001229836350057604, + "loss": 2.6216, + "step": 8581 + }, + { + "epoch": 0.6925994673553385, + "grad_norm": 0.7021105885505676, + "learning_rate": 0.0001229682704481126, + "loss": 2.4877, + "step": 8582 + }, + { + "epoch": 0.6926801710919216, + "grad_norm": 0.7197253108024597, + "learning_rate": 0.00012295290531803553, + "loss": 2.6124, + "step": 8583 + }, + { + "epoch": 0.6927608748285046, + "grad_norm": 0.7559605836868286, + "learning_rate": 0.00012293753961591198, + "loss": 2.6391, + "step": 8584 + }, + { + "epoch": 0.6928415785650875, + "grad_norm": 0.7074676752090454, + "learning_rate": 0.00012292217334212505, + "loss": 2.5949, + "step": 8585 + }, + { + "epoch": 0.6929222823016705, + "grad_norm": 0.6843528747558594, + "learning_rate": 0.00012290680649705763, + "loss": 2.4981, + "step": 8586 + }, + { + "epoch": 0.6930029860382536, + "grad_norm": 0.6853117942810059, + "learning_rate": 0.00012289143908109266, + "loss": 2.6352, + "step": 8587 + }, + { + "epoch": 0.6930836897748366, + "grad_norm": 0.6545630097389221, + "learning_rate": 0.00012287607109461325, + "loss": 2.5344, + "step": 8588 + }, + { + "epoch": 0.6931643935114196, + "grad_norm": 0.7377945184707642, + "learning_rate": 0.00012286070253800233, + "loss": 2.5895, + "step": 8589 + }, + { + "epoch": 0.6932450972480025, + "grad_norm": 0.6919971108436584, + "learning_rate": 0.00012284533341164295, + "loss": 2.5825, + "step": 8590 + }, + { + "epoch": 0.6933258009845856, + "grad_norm": 0.6911910176277161, + "learning_rate": 0.00012282996371591816, + "loss": 2.6008, + "step": 8591 + }, + { + "epoch": 0.6934065047211686, + "grad_norm": 0.7486373782157898, + "learning_rate": 0.00012281459345121095, + "loss": 2.6056, + "step": 8592 + }, + { + "epoch": 0.6934872084577516, + "grad_norm": 0.6829040050506592, + "learning_rate": 0.00012279922261790443, + "loss": 2.5161, + "step": 8593 + }, + { + "epoch": 0.6935679121943346, + "grad_norm": 0.7410104870796204, + "learning_rate": 0.00012278385121638173, + "loss": 2.6114, + "step": 8594 + }, + { + "epoch": 0.6936486159309176, + "grad_norm": 0.7355940937995911, + "learning_rate": 0.00012276847924702587, + "loss": 2.6371, + "step": 8595 + }, + { + "epoch": 0.6937293196675006, + "grad_norm": 0.650641679763794, + "learning_rate": 0.00012275310671022003, + "loss": 2.5568, + "step": 8596 + }, + { + "epoch": 0.6938100234040836, + "grad_norm": 0.661573052406311, + "learning_rate": 0.00012273773360634726, + "loss": 2.5828, + "step": 8597 + }, + { + "epoch": 0.6938907271406666, + "grad_norm": 0.6848435401916504, + "learning_rate": 0.00012272235993579072, + "loss": 2.5226, + "step": 8598 + }, + { + "epoch": 0.6939714308772497, + "grad_norm": 0.7015430927276611, + "learning_rate": 0.0001227069856989336, + "loss": 2.6156, + "step": 8599 + }, + { + "epoch": 0.6940521346138326, + "grad_norm": 0.7058628797531128, + "learning_rate": 0.000122691610896159, + "loss": 2.6007, + "step": 8600 + }, + { + "epoch": 0.6941328383504156, + "grad_norm": 0.6589432954788208, + "learning_rate": 0.0001226762355278502, + "loss": 2.5551, + "step": 8601 + }, + { + "epoch": 0.6942135420869986, + "grad_norm": 0.6875284910202026, + "learning_rate": 0.0001226608595943903, + "loss": 2.5537, + "step": 8602 + }, + { + "epoch": 0.6942942458235817, + "grad_norm": 0.7178356051445007, + "learning_rate": 0.00012264548309616252, + "loss": 2.655, + "step": 8603 + }, + { + "epoch": 0.6943749495601647, + "grad_norm": 0.7327077388763428, + "learning_rate": 0.00012263010603355017, + "loss": 2.5574, + "step": 8604 + }, + { + "epoch": 0.6944556532967476, + "grad_norm": 0.6318337917327881, + "learning_rate": 0.0001226147284069364, + "loss": 2.577, + "step": 8605 + }, + { + "epoch": 0.6945363570333306, + "grad_norm": 0.674872875213623, + "learning_rate": 0.00012259935021670444, + "loss": 2.6225, + "step": 8606 + }, + { + "epoch": 0.6946170607699137, + "grad_norm": 0.6554198861122131, + "learning_rate": 0.0001225839714632376, + "loss": 2.5951, + "step": 8607 + }, + { + "epoch": 0.6946977645064967, + "grad_norm": 0.7086453437805176, + "learning_rate": 0.00012256859214691918, + "loss": 2.622, + "step": 8608 + }, + { + "epoch": 0.6947784682430796, + "grad_norm": 0.6609488129615784, + "learning_rate": 0.00012255321226813245, + "loss": 2.5623, + "step": 8609 + }, + { + "epoch": 0.6948591719796626, + "grad_norm": 0.7504609823226929, + "learning_rate": 0.00012253783182726075, + "loss": 2.5264, + "step": 8610 + }, + { + "epoch": 0.6949398757162457, + "grad_norm": 0.6702934503555298, + "learning_rate": 0.00012252245082468733, + "loss": 2.5877, + "step": 8611 + }, + { + "epoch": 0.6950205794528287, + "grad_norm": 0.7116326689720154, + "learning_rate": 0.00012250706926079553, + "loss": 2.5629, + "step": 8612 + }, + { + "epoch": 0.6951012831894117, + "grad_norm": 0.7495368719100952, + "learning_rate": 0.00012249168713596875, + "loss": 2.5731, + "step": 8613 + }, + { + "epoch": 0.6951819869259946, + "grad_norm": 0.7434844970703125, + "learning_rate": 0.0001224763044505904, + "loss": 2.6008, + "step": 8614 + }, + { + "epoch": 0.6952626906625777, + "grad_norm": 0.719667375087738, + "learning_rate": 0.00012246092120504371, + "loss": 2.6051, + "step": 8615 + }, + { + "epoch": 0.6953433943991607, + "grad_norm": 0.7189086079597473, + "learning_rate": 0.00012244553739971216, + "loss": 2.5662, + "step": 8616 + }, + { + "epoch": 0.6954240981357437, + "grad_norm": 0.7222673892974854, + "learning_rate": 0.00012243015303497917, + "loss": 2.609, + "step": 8617 + }, + { + "epoch": 0.6955048018723267, + "grad_norm": 0.7323142290115356, + "learning_rate": 0.00012241476811122813, + "loss": 2.5458, + "step": 8618 + }, + { + "epoch": 0.6955855056089096, + "grad_norm": 0.7374032735824585, + "learning_rate": 0.00012239938262884246, + "loss": 2.6147, + "step": 8619 + }, + { + "epoch": 0.6956662093454927, + "grad_norm": 0.6707843542098999, + "learning_rate": 0.00012238399658820562, + "loss": 2.6462, + "step": 8620 + }, + { + "epoch": 0.6957469130820757, + "grad_norm": 0.7603243589401245, + "learning_rate": 0.0001223686099897011, + "loss": 2.6295, + "step": 8621 + }, + { + "epoch": 0.6958276168186587, + "grad_norm": 0.6966906785964966, + "learning_rate": 0.00012235322283371232, + "loss": 2.545, + "step": 8622 + }, + { + "epoch": 0.6959083205552417, + "grad_norm": 0.6757891774177551, + "learning_rate": 0.0001223378351206228, + "loss": 2.5548, + "step": 8623 + }, + { + "epoch": 0.6959890242918247, + "grad_norm": 0.6901456713676453, + "learning_rate": 0.00012232244685081605, + "loss": 2.5734, + "step": 8624 + }, + { + "epoch": 0.6960697280284077, + "grad_norm": 0.6942903995513916, + "learning_rate": 0.00012230705802467558, + "loss": 2.5495, + "step": 8625 + }, + { + "epoch": 0.6961504317649907, + "grad_norm": 0.6774815320968628, + "learning_rate": 0.0001222916686425849, + "loss": 2.5076, + "step": 8626 + }, + { + "epoch": 0.6962311355015737, + "grad_norm": 0.8037571310997009, + "learning_rate": 0.00012227627870492754, + "loss": 2.6737, + "step": 8627 + }, + { + "epoch": 0.6963118392381568, + "grad_norm": 0.7027560472488403, + "learning_rate": 0.0001222608882120871, + "loss": 2.5401, + "step": 8628 + }, + { + "epoch": 0.6963925429747397, + "grad_norm": 0.6651299595832825, + "learning_rate": 0.00012224549716444714, + "loss": 2.5835, + "step": 8629 + }, + { + "epoch": 0.6964732467113227, + "grad_norm": 0.7082433104515076, + "learning_rate": 0.00012223010556239124, + "loss": 2.5622, + "step": 8630 + }, + { + "epoch": 0.6965539504479057, + "grad_norm": 0.7993464469909668, + "learning_rate": 0.00012221471340630305, + "loss": 2.655, + "step": 8631 + }, + { + "epoch": 0.6966346541844888, + "grad_norm": 0.7375298142433167, + "learning_rate": 0.00012219932069656606, + "loss": 2.598, + "step": 8632 + }, + { + "epoch": 0.6967153579210718, + "grad_norm": 0.6915456652641296, + "learning_rate": 0.00012218392743356397, + "loss": 2.5649, + "step": 8633 + }, + { + "epoch": 0.6967960616576547, + "grad_norm": 0.679256021976471, + "learning_rate": 0.00012216853361768045, + "loss": 2.545, + "step": 8634 + }, + { + "epoch": 0.6968767653942377, + "grad_norm": 0.7234694361686707, + "learning_rate": 0.0001221531392492991, + "loss": 2.5863, + "step": 8635 + }, + { + "epoch": 0.6969574691308208, + "grad_norm": 0.7053319811820984, + "learning_rate": 0.00012213774432880364, + "loss": 2.5829, + "step": 8636 + }, + { + "epoch": 0.6970381728674038, + "grad_norm": 0.7584449648857117, + "learning_rate": 0.00012212234885657772, + "loss": 2.5855, + "step": 8637 + }, + { + "epoch": 0.6971188766039867, + "grad_norm": 0.7098579406738281, + "learning_rate": 0.00012210695283300501, + "loss": 2.6057, + "step": 8638 + }, + { + "epoch": 0.6971995803405697, + "grad_norm": 0.7350205779075623, + "learning_rate": 0.00012209155625846928, + "loss": 2.546, + "step": 8639 + }, + { + "epoch": 0.6972802840771528, + "grad_norm": 0.6842331290245056, + "learning_rate": 0.0001220761591333542, + "loss": 2.5602, + "step": 8640 + }, + { + "epoch": 0.6973609878137358, + "grad_norm": 0.6731252074241638, + "learning_rate": 0.00012206076145804354, + "loss": 2.4676, + "step": 8641 + }, + { + "epoch": 0.6974416915503188, + "grad_norm": 0.7271167635917664, + "learning_rate": 0.00012204536323292104, + "loss": 2.5605, + "step": 8642 + }, + { + "epoch": 0.6975223952869017, + "grad_norm": 0.6860780715942383, + "learning_rate": 0.00012202996445837043, + "loss": 2.5041, + "step": 8643 + }, + { + "epoch": 0.6976030990234848, + "grad_norm": 0.7134578824043274, + "learning_rate": 0.00012201456513477554, + "loss": 2.614, + "step": 8644 + }, + { + "epoch": 0.6976838027600678, + "grad_norm": 0.6995248198509216, + "learning_rate": 0.00012199916526252014, + "loss": 2.5087, + "step": 8645 + }, + { + "epoch": 0.6977645064966508, + "grad_norm": 0.7280197143554688, + "learning_rate": 0.00012198376484198803, + "loss": 2.5723, + "step": 8646 + }, + { + "epoch": 0.6978452102332338, + "grad_norm": 0.6898967623710632, + "learning_rate": 0.00012196836387356306, + "loss": 2.6073, + "step": 8647 + }, + { + "epoch": 0.6979259139698168, + "grad_norm": 0.6670758128166199, + "learning_rate": 0.00012195296235762901, + "loss": 2.5276, + "step": 8648 + }, + { + "epoch": 0.6980066177063998, + "grad_norm": 0.6862780451774597, + "learning_rate": 0.00012193756029456973, + "loss": 2.5363, + "step": 8649 + }, + { + "epoch": 0.6980873214429828, + "grad_norm": 0.6568876504898071, + "learning_rate": 0.00012192215768476916, + "loss": 2.5828, + "step": 8650 + }, + { + "epoch": 0.6981680251795658, + "grad_norm": 0.7237746119499207, + "learning_rate": 0.00012190675452861107, + "loss": 2.6076, + "step": 8651 + }, + { + "epoch": 0.6982487289161489, + "grad_norm": 0.6831536293029785, + "learning_rate": 0.00012189135082647943, + "loss": 2.5199, + "step": 8652 + }, + { + "epoch": 0.6983294326527318, + "grad_norm": 0.6767029166221619, + "learning_rate": 0.00012187594657875805, + "loss": 2.5859, + "step": 8653 + }, + { + "epoch": 0.6984101363893148, + "grad_norm": 0.6977167129516602, + "learning_rate": 0.00012186054178583092, + "loss": 2.5831, + "step": 8654 + }, + { + "epoch": 0.6984908401258978, + "grad_norm": 0.6369525194168091, + "learning_rate": 0.00012184513644808197, + "loss": 2.5839, + "step": 8655 + }, + { + "epoch": 0.6985715438624809, + "grad_norm": 0.6814634203910828, + "learning_rate": 0.00012182973056589508, + "loss": 2.5493, + "step": 8656 + }, + { + "epoch": 0.6986522475990639, + "grad_norm": 0.6895000338554382, + "learning_rate": 0.00012181432413965428, + "loss": 2.5616, + "step": 8657 + }, + { + "epoch": 0.6987329513356468, + "grad_norm": 0.6689717769622803, + "learning_rate": 0.00012179891716974345, + "loss": 2.5481, + "step": 8658 + }, + { + "epoch": 0.6988136550722298, + "grad_norm": 0.6945160031318665, + "learning_rate": 0.00012178350965654666, + "loss": 2.5781, + "step": 8659 + }, + { + "epoch": 0.6988943588088129, + "grad_norm": 0.7226110696792603, + "learning_rate": 0.00012176810160044785, + "loss": 2.5767, + "step": 8660 + }, + { + "epoch": 0.6989750625453959, + "grad_norm": 0.6810569167137146, + "learning_rate": 0.00012175269300183105, + "loss": 2.5184, + "step": 8661 + }, + { + "epoch": 0.6990557662819789, + "grad_norm": 0.727281928062439, + "learning_rate": 0.0001217372838610803, + "loss": 2.5972, + "step": 8662 + }, + { + "epoch": 0.6991364700185618, + "grad_norm": 0.7111573219299316, + "learning_rate": 0.00012172187417857959, + "loss": 2.6445, + "step": 8663 + }, + { + "epoch": 0.6992171737551449, + "grad_norm": 0.6808965802192688, + "learning_rate": 0.00012170646395471296, + "loss": 2.5191, + "step": 8664 + }, + { + "epoch": 0.6992978774917279, + "grad_norm": 0.7063688635826111, + "learning_rate": 0.00012169105318986455, + "loss": 2.6021, + "step": 8665 + }, + { + "epoch": 0.6993785812283109, + "grad_norm": 0.6522886753082275, + "learning_rate": 0.0001216756418844184, + "loss": 2.5697, + "step": 8666 + }, + { + "epoch": 0.6994592849648938, + "grad_norm": 0.6706095337867737, + "learning_rate": 0.00012166023003875859, + "loss": 2.5706, + "step": 8667 + }, + { + "epoch": 0.6995399887014769, + "grad_norm": 0.6744416356086731, + "learning_rate": 0.00012164481765326923, + "loss": 2.5713, + "step": 8668 + }, + { + "epoch": 0.6996206924380599, + "grad_norm": 0.7385411858558655, + "learning_rate": 0.0001216294047283344, + "loss": 2.5543, + "step": 8669 + }, + { + "epoch": 0.6997013961746429, + "grad_norm": 0.7286678552627563, + "learning_rate": 0.0001216139912643383, + "loss": 2.588, + "step": 8670 + }, + { + "epoch": 0.6997820999112259, + "grad_norm": 0.7065937519073486, + "learning_rate": 0.00012159857726166503, + "loss": 2.5475, + "step": 8671 + }, + { + "epoch": 0.6998628036478088, + "grad_norm": 0.6609788537025452, + "learning_rate": 0.00012158316272069874, + "loss": 2.5664, + "step": 8672 + }, + { + "epoch": 0.6999435073843919, + "grad_norm": 0.7360579371452332, + "learning_rate": 0.00012156774764182364, + "loss": 2.5822, + "step": 8673 + }, + { + "epoch": 0.7000242111209749, + "grad_norm": 0.6265058517456055, + "learning_rate": 0.00012155233202542384, + "loss": 2.5849, + "step": 8674 + }, + { + "epoch": 0.7001049148575579, + "grad_norm": 0.646976888179779, + "learning_rate": 0.00012153691587188363, + "loss": 2.5839, + "step": 8675 + }, + { + "epoch": 0.7001856185941409, + "grad_norm": 0.6634985208511353, + "learning_rate": 0.0001215214991815872, + "loss": 2.5434, + "step": 8676 + }, + { + "epoch": 0.700266322330724, + "grad_norm": 0.6757560968399048, + "learning_rate": 0.00012150608195491871, + "loss": 2.6186, + "step": 8677 + }, + { + "epoch": 0.7003470260673069, + "grad_norm": 0.7077112197875977, + "learning_rate": 0.00012149066419226247, + "loss": 2.5757, + "step": 8678 + }, + { + "epoch": 0.7004277298038899, + "grad_norm": 0.698226273059845, + "learning_rate": 0.00012147524589400268, + "loss": 2.5307, + "step": 8679 + }, + { + "epoch": 0.7005084335404729, + "grad_norm": 0.6782405376434326, + "learning_rate": 0.00012145982706052361, + "loss": 2.5582, + "step": 8680 + }, + { + "epoch": 0.700589137277056, + "grad_norm": 0.6832882165908813, + "learning_rate": 0.0001214444076922096, + "loss": 2.574, + "step": 8681 + }, + { + "epoch": 0.7006698410136389, + "grad_norm": 0.7182612419128418, + "learning_rate": 0.00012142898778944485, + "loss": 2.6457, + "step": 8682 + }, + { + "epoch": 0.7007505447502219, + "grad_norm": 0.7043644785881042, + "learning_rate": 0.00012141356735261373, + "loss": 2.5244, + "step": 8683 + }, + { + "epoch": 0.7008312484868049, + "grad_norm": 0.6942669749259949, + "learning_rate": 0.00012139814638210054, + "loss": 2.5507, + "step": 8684 + }, + { + "epoch": 0.700911952223388, + "grad_norm": 0.8412066102027893, + "learning_rate": 0.00012138272487828959, + "loss": 2.6025, + "step": 8685 + }, + { + "epoch": 0.700992655959971, + "grad_norm": 0.6906788945198059, + "learning_rate": 0.00012136730284156525, + "loss": 2.5259, + "step": 8686 + }, + { + "epoch": 0.7010733596965539, + "grad_norm": 0.7258631587028503, + "learning_rate": 0.00012135188027231188, + "loss": 2.6311, + "step": 8687 + }, + { + "epoch": 0.7011540634331369, + "grad_norm": 0.6294744610786438, + "learning_rate": 0.00012133645717091382, + "loss": 2.5969, + "step": 8688 + }, + { + "epoch": 0.70123476716972, + "grad_norm": 0.6994131207466125, + "learning_rate": 0.00012132103353775548, + "loss": 2.5954, + "step": 8689 + }, + { + "epoch": 0.701315470906303, + "grad_norm": 0.671441912651062, + "learning_rate": 0.00012130560937322124, + "loss": 2.5628, + "step": 8690 + }, + { + "epoch": 0.701396174642886, + "grad_norm": 0.6915482878684998, + "learning_rate": 0.00012129018467769555, + "loss": 2.5173, + "step": 8691 + }, + { + "epoch": 0.7014768783794689, + "grad_norm": 0.6810318231582642, + "learning_rate": 0.00012127475945156279, + "loss": 2.6186, + "step": 8692 + }, + { + "epoch": 0.701557582116052, + "grad_norm": 0.7931910157203674, + "learning_rate": 0.00012125933369520741, + "loss": 2.6243, + "step": 8693 + }, + { + "epoch": 0.701638285852635, + "grad_norm": 0.6843162178993225, + "learning_rate": 0.00012124390740901386, + "loss": 2.6072, + "step": 8694 + }, + { + "epoch": 0.701718989589218, + "grad_norm": 0.672115683555603, + "learning_rate": 0.0001212284805933666, + "loss": 2.6027, + "step": 8695 + }, + { + "epoch": 0.7017996933258009, + "grad_norm": 0.65242600440979, + "learning_rate": 0.00012121305324865014, + "loss": 2.5128, + "step": 8696 + }, + { + "epoch": 0.701880397062384, + "grad_norm": 0.7253173589706421, + "learning_rate": 0.00012119762537524893, + "loss": 2.5776, + "step": 8697 + }, + { + "epoch": 0.701961100798967, + "grad_norm": 0.6536431312561035, + "learning_rate": 0.00012118219697354745, + "loss": 2.5656, + "step": 8698 + }, + { + "epoch": 0.70204180453555, + "grad_norm": 0.7121500372886658, + "learning_rate": 0.00012116676804393028, + "loss": 2.5878, + "step": 8699 + }, + { + "epoch": 0.702122508272133, + "grad_norm": 0.676449716091156, + "learning_rate": 0.00012115133858678191, + "loss": 2.6624, + "step": 8700 + }, + { + "epoch": 0.702203212008716, + "grad_norm": 0.7230382561683655, + "learning_rate": 0.0001211359086024869, + "loss": 2.5461, + "step": 8701 + }, + { + "epoch": 0.702283915745299, + "grad_norm": 0.6679937839508057, + "learning_rate": 0.00012112047809142979, + "loss": 2.5568, + "step": 8702 + }, + { + "epoch": 0.702364619481882, + "grad_norm": 0.6627704501152039, + "learning_rate": 0.0001211050470539952, + "loss": 2.4819, + "step": 8703 + }, + { + "epoch": 0.702445323218465, + "grad_norm": 0.6680646538734436, + "learning_rate": 0.0001210896154905676, + "loss": 2.5722, + "step": 8704 + }, + { + "epoch": 0.7025260269550481, + "grad_norm": 0.7406336665153503, + "learning_rate": 0.00012107418340153167, + "loss": 2.5722, + "step": 8705 + }, + { + "epoch": 0.702606730691631, + "grad_norm": 0.6634557247161865, + "learning_rate": 0.00012105875078727203, + "loss": 2.5747, + "step": 8706 + }, + { + "epoch": 0.702687434428214, + "grad_norm": 0.6521568894386292, + "learning_rate": 0.00012104331764817325, + "loss": 2.555, + "step": 8707 + }, + { + "epoch": 0.702768138164797, + "grad_norm": 0.677606463432312, + "learning_rate": 0.00012102788398461999, + "loss": 2.5544, + "step": 8708 + }, + { + "epoch": 0.7028488419013801, + "grad_norm": 0.6593700051307678, + "learning_rate": 0.0001210124497969969, + "loss": 2.5252, + "step": 8709 + }, + { + "epoch": 0.7029295456379631, + "grad_norm": 0.686903715133667, + "learning_rate": 0.00012099701508568863, + "loss": 2.6513, + "step": 8710 + }, + { + "epoch": 0.703010249374546, + "grad_norm": 0.6395620107650757, + "learning_rate": 0.00012098157985107987, + "loss": 2.5169, + "step": 8711 + }, + { + "epoch": 0.703090953111129, + "grad_norm": 0.7387555837631226, + "learning_rate": 0.00012096614409355526, + "loss": 2.5741, + "step": 8712 + }, + { + "epoch": 0.7031716568477121, + "grad_norm": 0.665900707244873, + "learning_rate": 0.00012095070781349957, + "loss": 2.5068, + "step": 8713 + }, + { + "epoch": 0.7032523605842951, + "grad_norm": 0.6983458399772644, + "learning_rate": 0.00012093527101129745, + "loss": 2.5028, + "step": 8714 + }, + { + "epoch": 0.703333064320878, + "grad_norm": 0.6250826120376587, + "learning_rate": 0.00012091983368733366, + "loss": 2.5765, + "step": 8715 + }, + { + "epoch": 0.703413768057461, + "grad_norm": 0.7031501531600952, + "learning_rate": 0.00012090439584199294, + "loss": 2.5885, + "step": 8716 + }, + { + "epoch": 0.7034944717940441, + "grad_norm": 0.7140926122665405, + "learning_rate": 0.00012088895747566002, + "loss": 2.6278, + "step": 8717 + }, + { + "epoch": 0.7035751755306271, + "grad_norm": 0.6753602027893066, + "learning_rate": 0.00012087351858871969, + "loss": 2.5664, + "step": 8718 + }, + { + "epoch": 0.7036558792672101, + "grad_norm": 0.7150039076805115, + "learning_rate": 0.0001208580791815567, + "loss": 2.6739, + "step": 8719 + }, + { + "epoch": 0.703736583003793, + "grad_norm": 0.7120389342308044, + "learning_rate": 0.00012084263925455583, + "loss": 2.565, + "step": 8720 + }, + { + "epoch": 0.703817286740376, + "grad_norm": 0.7775784134864807, + "learning_rate": 0.00012082719880810194, + "loss": 2.5861, + "step": 8721 + }, + { + "epoch": 0.7038979904769591, + "grad_norm": 0.6704322695732117, + "learning_rate": 0.0001208117578425798, + "loss": 2.5957, + "step": 8722 + }, + { + "epoch": 0.7039786942135421, + "grad_norm": 0.6761276721954346, + "learning_rate": 0.00012079631635837426, + "loss": 2.5472, + "step": 8723 + }, + { + "epoch": 0.7040593979501251, + "grad_norm": 0.7639868855476379, + "learning_rate": 0.00012078087435587016, + "loss": 2.6053, + "step": 8724 + }, + { + "epoch": 0.704140101686708, + "grad_norm": 0.7490074038505554, + "learning_rate": 0.0001207654318354523, + "loss": 2.5517, + "step": 8725 + }, + { + "epoch": 0.7042208054232911, + "grad_norm": 0.7068852782249451, + "learning_rate": 0.00012074998879750566, + "loss": 2.5357, + "step": 8726 + }, + { + "epoch": 0.7043015091598741, + "grad_norm": 0.7273775935173035, + "learning_rate": 0.00012073454524241503, + "loss": 2.6028, + "step": 8727 + }, + { + "epoch": 0.7043822128964571, + "grad_norm": 0.7146363258361816, + "learning_rate": 0.00012071910117056533, + "loss": 2.5982, + "step": 8728 + }, + { + "epoch": 0.7044629166330401, + "grad_norm": 0.7631390690803528, + "learning_rate": 0.00012070365658234149, + "loss": 2.6021, + "step": 8729 + }, + { + "epoch": 0.7045436203696231, + "grad_norm": 0.7065283060073853, + "learning_rate": 0.00012068821147812839, + "loss": 2.5538, + "step": 8730 + }, + { + "epoch": 0.7046243241062061, + "grad_norm": 0.7914319634437561, + "learning_rate": 0.00012067276585831097, + "loss": 2.5617, + "step": 8731 + }, + { + "epoch": 0.7047050278427891, + "grad_norm": 0.7036565542221069, + "learning_rate": 0.0001206573197232742, + "loss": 2.5354, + "step": 8732 + }, + { + "epoch": 0.7047857315793721, + "grad_norm": 0.657116711139679, + "learning_rate": 0.00012064187307340303, + "loss": 2.5084, + "step": 8733 + }, + { + "epoch": 0.7048664353159552, + "grad_norm": 0.7246817946434021, + "learning_rate": 0.00012062642590908242, + "loss": 2.5737, + "step": 8734 + }, + { + "epoch": 0.7049471390525381, + "grad_norm": 0.6895857453346252, + "learning_rate": 0.00012061097823069736, + "loss": 2.5792, + "step": 8735 + }, + { + "epoch": 0.7050278427891211, + "grad_norm": 0.7654988169670105, + "learning_rate": 0.00012059553003863282, + "loss": 2.5302, + "step": 8736 + }, + { + "epoch": 0.7051085465257041, + "grad_norm": 0.7611668109893799, + "learning_rate": 0.00012058008133327387, + "loss": 2.6073, + "step": 8737 + }, + { + "epoch": 0.7051892502622872, + "grad_norm": 0.728729784488678, + "learning_rate": 0.00012056463211500546, + "loss": 2.5714, + "step": 8738 + }, + { + "epoch": 0.7052699539988702, + "grad_norm": 0.7251634001731873, + "learning_rate": 0.00012054918238421271, + "loss": 2.627, + "step": 8739 + }, + { + "epoch": 0.7053506577354531, + "grad_norm": 0.827745795249939, + "learning_rate": 0.00012053373214128056, + "loss": 2.6303, + "step": 8740 + }, + { + "epoch": 0.7054313614720361, + "grad_norm": 0.6837510466575623, + "learning_rate": 0.00012051828138659416, + "loss": 2.5837, + "step": 8741 + }, + { + "epoch": 0.7055120652086192, + "grad_norm": 0.6763553619384766, + "learning_rate": 0.00012050283012053856, + "loss": 2.575, + "step": 8742 + }, + { + "epoch": 0.7055927689452022, + "grad_norm": 0.6779605150222778, + "learning_rate": 0.00012048737834349886, + "loss": 2.588, + "step": 8743 + }, + { + "epoch": 0.7056734726817852, + "grad_norm": 0.7207251191139221, + "learning_rate": 0.00012047192605586008, + "loss": 2.6182, + "step": 8744 + }, + { + "epoch": 0.7057541764183681, + "grad_norm": 0.6681165099143982, + "learning_rate": 0.00012045647325800742, + "loss": 2.5595, + "step": 8745 + }, + { + "epoch": 0.7058348801549512, + "grad_norm": 0.7520970702171326, + "learning_rate": 0.00012044101995032594, + "loss": 2.6306, + "step": 8746 + }, + { + "epoch": 0.7059155838915342, + "grad_norm": 0.7148429155349731, + "learning_rate": 0.00012042556613320087, + "loss": 2.5749, + "step": 8747 + }, + { + "epoch": 0.7059962876281172, + "grad_norm": 0.619369626045227, + "learning_rate": 0.00012041011180701729, + "loss": 2.5382, + "step": 8748 + }, + { + "epoch": 0.7060769913647001, + "grad_norm": 0.7450816035270691, + "learning_rate": 0.00012039465697216032, + "loss": 2.5547, + "step": 8749 + }, + { + "epoch": 0.7061576951012832, + "grad_norm": 0.7324537634849548, + "learning_rate": 0.00012037920162901521, + "loss": 2.5756, + "step": 8750 + }, + { + "epoch": 0.7062383988378662, + "grad_norm": 0.7881754636764526, + "learning_rate": 0.00012036374577796715, + "loss": 2.6376, + "step": 8751 + }, + { + "epoch": 0.7063191025744492, + "grad_norm": 0.7095965147018433, + "learning_rate": 0.00012034828941940128, + "loss": 2.5454, + "step": 8752 + }, + { + "epoch": 0.7063998063110322, + "grad_norm": 0.7142949104309082, + "learning_rate": 0.00012033283255370287, + "loss": 2.5738, + "step": 8753 + }, + { + "epoch": 0.7064805100476153, + "grad_norm": 0.6592378616333008, + "learning_rate": 0.0001203173751812571, + "loss": 2.5473, + "step": 8754 + }, + { + "epoch": 0.7065612137841982, + "grad_norm": 0.6964332461357117, + "learning_rate": 0.00012030191730244926, + "loss": 2.5829, + "step": 8755 + }, + { + "epoch": 0.7066419175207812, + "grad_norm": 0.707539975643158, + "learning_rate": 0.00012028645891766455, + "loss": 2.5652, + "step": 8756 + }, + { + "epoch": 0.7067226212573642, + "grad_norm": 0.6991387009620667, + "learning_rate": 0.00012027100002728824, + "loss": 2.5874, + "step": 8757 + }, + { + "epoch": 0.7068033249939473, + "grad_norm": 0.665746808052063, + "learning_rate": 0.00012025554063170566, + "loss": 2.5163, + "step": 8758 + }, + { + "epoch": 0.7068840287305302, + "grad_norm": 0.696130096912384, + "learning_rate": 0.00012024008073130204, + "loss": 2.5748, + "step": 8759 + }, + { + "epoch": 0.7069647324671132, + "grad_norm": 0.698885440826416, + "learning_rate": 0.00012022462032646269, + "loss": 2.5561, + "step": 8760 + }, + { + "epoch": 0.7070454362036962, + "grad_norm": 0.7052211761474609, + "learning_rate": 0.00012020915941757292, + "loss": 2.5979, + "step": 8761 + }, + { + "epoch": 0.7071261399402793, + "grad_norm": 0.7370811104774475, + "learning_rate": 0.00012019369800501808, + "loss": 2.5623, + "step": 8762 + }, + { + "epoch": 0.7072068436768623, + "grad_norm": 0.6699148416519165, + "learning_rate": 0.00012017823608918352, + "loss": 2.5816, + "step": 8763 + }, + { + "epoch": 0.7072875474134452, + "grad_norm": 0.6712930798530579, + "learning_rate": 0.00012016277367045457, + "loss": 2.5495, + "step": 8764 + }, + { + "epoch": 0.7073682511500282, + "grad_norm": 0.7238204479217529, + "learning_rate": 0.00012014731074921659, + "loss": 2.5936, + "step": 8765 + }, + { + "epoch": 0.7074489548866113, + "grad_norm": 0.7303668856620789, + "learning_rate": 0.00012013184732585494, + "loss": 2.6366, + "step": 8766 + }, + { + "epoch": 0.7075296586231943, + "grad_norm": 0.6883132457733154, + "learning_rate": 0.00012011638340075505, + "loss": 2.534, + "step": 8767 + }, + { + "epoch": 0.7076103623597773, + "grad_norm": 0.7057133316993713, + "learning_rate": 0.00012010091897430229, + "loss": 2.6035, + "step": 8768 + }, + { + "epoch": 0.7076910660963602, + "grad_norm": 0.7069352269172668, + "learning_rate": 0.0001200854540468821, + "loss": 2.5047, + "step": 8769 + }, + { + "epoch": 0.7077717698329433, + "grad_norm": 0.7192478775978088, + "learning_rate": 0.00012006998861887985, + "loss": 2.5698, + "step": 8770 + }, + { + "epoch": 0.7078524735695263, + "grad_norm": 0.6992887854576111, + "learning_rate": 0.00012005452269068107, + "loss": 2.5631, + "step": 8771 + }, + { + "epoch": 0.7079331773061093, + "grad_norm": 0.676154613494873, + "learning_rate": 0.00012003905626267114, + "loss": 2.5255, + "step": 8772 + }, + { + "epoch": 0.7080138810426923, + "grad_norm": 0.672269880771637, + "learning_rate": 0.00012002358933523555, + "loss": 2.5766, + "step": 8773 + }, + { + "epoch": 0.7080945847792752, + "grad_norm": 0.7334566712379456, + "learning_rate": 0.00012000812190875976, + "loss": 2.6068, + "step": 8774 + }, + { + "epoch": 0.7081752885158583, + "grad_norm": 0.6599388122558594, + "learning_rate": 0.00011999265398362931, + "loss": 2.6032, + "step": 8775 + }, + { + "epoch": 0.7082559922524413, + "grad_norm": 0.7158498167991638, + "learning_rate": 0.00011997718556022958, + "loss": 2.599, + "step": 8776 + }, + { + "epoch": 0.7083366959890243, + "grad_norm": 0.7470360994338989, + "learning_rate": 0.00011996171663894624, + "loss": 2.58, + "step": 8777 + }, + { + "epoch": 0.7084173997256072, + "grad_norm": 0.6251266002655029, + "learning_rate": 0.00011994624722016472, + "loss": 2.5996, + "step": 8778 + }, + { + "epoch": 0.7084981034621903, + "grad_norm": 0.6649689078330994, + "learning_rate": 0.00011993077730427058, + "loss": 2.6025, + "step": 8779 + }, + { + "epoch": 0.7085788071987733, + "grad_norm": 0.7554693818092346, + "learning_rate": 0.00011991530689164939, + "loss": 2.6207, + "step": 8780 + }, + { + "epoch": 0.7086595109353563, + "grad_norm": 0.7941430807113647, + "learning_rate": 0.00011989983598268661, + "loss": 2.584, + "step": 8781 + }, + { + "epoch": 0.7087402146719393, + "grad_norm": 0.7257998585700989, + "learning_rate": 0.00011988436457776799, + "loss": 2.6152, + "step": 8782 + }, + { + "epoch": 0.7088209184085223, + "grad_norm": 0.716354489326477, + "learning_rate": 0.00011986889267727899, + "loss": 2.585, + "step": 8783 + }, + { + "epoch": 0.7089016221451053, + "grad_norm": 0.7094400525093079, + "learning_rate": 0.00011985342028160525, + "loss": 2.5759, + "step": 8784 + }, + { + "epoch": 0.7089823258816883, + "grad_norm": 0.7211421728134155, + "learning_rate": 0.0001198379473911324, + "loss": 2.5645, + "step": 8785 + }, + { + "epoch": 0.7090630296182713, + "grad_norm": 0.7166693806648254, + "learning_rate": 0.000119822474006246, + "loss": 2.5357, + "step": 8786 + }, + { + "epoch": 0.7091437333548544, + "grad_norm": 0.6702254414558411, + "learning_rate": 0.00011980700012733175, + "loss": 2.5353, + "step": 8787 + }, + { + "epoch": 0.7092244370914373, + "grad_norm": 0.6784049868583679, + "learning_rate": 0.0001197915257547753, + "loss": 2.4942, + "step": 8788 + }, + { + "epoch": 0.7093051408280203, + "grad_norm": 0.6914299726486206, + "learning_rate": 0.00011977605088896226, + "loss": 2.5682, + "step": 8789 + }, + { + "epoch": 0.7093858445646033, + "grad_norm": 0.7324358820915222, + "learning_rate": 0.00011976057553027837, + "loss": 2.564, + "step": 8790 + }, + { + "epoch": 0.7094665483011864, + "grad_norm": 0.6927928924560547, + "learning_rate": 0.00011974509967910927, + "loss": 2.5728, + "step": 8791 + }, + { + "epoch": 0.7095472520377694, + "grad_norm": 0.6795603036880493, + "learning_rate": 0.00011972962333584066, + "loss": 2.588, + "step": 8792 + }, + { + "epoch": 0.7096279557743523, + "grad_norm": 0.7132226228713989, + "learning_rate": 0.00011971414650085828, + "loss": 2.5759, + "step": 8793 + }, + { + "epoch": 0.7097086595109353, + "grad_norm": 0.737195611000061, + "learning_rate": 0.00011969866917454782, + "loss": 2.5721, + "step": 8794 + }, + { + "epoch": 0.7097893632475184, + "grad_norm": 0.6776021718978882, + "learning_rate": 0.00011968319135729507, + "loss": 2.5794, + "step": 8795 + }, + { + "epoch": 0.7098700669841014, + "grad_norm": 0.7113735675811768, + "learning_rate": 0.0001196677130494857, + "loss": 2.5595, + "step": 8796 + }, + { + "epoch": 0.7099507707206844, + "grad_norm": 0.6277747750282288, + "learning_rate": 0.0001196522342515055, + "loss": 2.5003, + "step": 8797 + }, + { + "epoch": 0.7100314744572673, + "grad_norm": 0.6982879042625427, + "learning_rate": 0.00011963675496374028, + "loss": 2.542, + "step": 8798 + }, + { + "epoch": 0.7101121781938504, + "grad_norm": 0.7019705176353455, + "learning_rate": 0.00011962127518657578, + "loss": 2.5723, + "step": 8799 + }, + { + "epoch": 0.7101928819304334, + "grad_norm": 0.6831088662147522, + "learning_rate": 0.00011960579492039783, + "loss": 2.5676, + "step": 8800 + }, + { + "epoch": 0.7102735856670164, + "grad_norm": 0.6744031310081482, + "learning_rate": 0.0001195903141655922, + "loss": 2.58, + "step": 8801 + }, + { + "epoch": 0.7103542894035993, + "grad_norm": 0.6873177289962769, + "learning_rate": 0.00011957483292254473, + "loss": 2.6289, + "step": 8802 + }, + { + "epoch": 0.7104349931401824, + "grad_norm": 0.6340685486793518, + "learning_rate": 0.00011955935119164125, + "loss": 2.5688, + "step": 8803 + }, + { + "epoch": 0.7105156968767654, + "grad_norm": 0.7147708535194397, + "learning_rate": 0.00011954386897326764, + "loss": 2.5471, + "step": 8804 + }, + { + "epoch": 0.7105964006133484, + "grad_norm": 0.699605405330658, + "learning_rate": 0.00011952838626780971, + "loss": 2.6122, + "step": 8805 + }, + { + "epoch": 0.7106771043499314, + "grad_norm": 0.6685385704040527, + "learning_rate": 0.00011951290307565335, + "loss": 2.5423, + "step": 8806 + }, + { + "epoch": 0.7107578080865145, + "grad_norm": 0.6884726881980896, + "learning_rate": 0.00011949741939718439, + "loss": 2.5243, + "step": 8807 + }, + { + "epoch": 0.7108385118230974, + "grad_norm": 0.6991142630577087, + "learning_rate": 0.00011948193523278884, + "loss": 2.6271, + "step": 8808 + }, + { + "epoch": 0.7109192155596804, + "grad_norm": 0.6964353919029236, + "learning_rate": 0.00011946645058285253, + "loss": 2.6296, + "step": 8809 + }, + { + "epoch": 0.7109999192962634, + "grad_norm": 0.7592040300369263, + "learning_rate": 0.00011945096544776136, + "loss": 2.6601, + "step": 8810 + }, + { + "epoch": 0.7110806230328465, + "grad_norm": 0.7146934866905212, + "learning_rate": 0.00011943547982790131, + "loss": 2.54, + "step": 8811 + }, + { + "epoch": 0.7111613267694294, + "grad_norm": 0.6991123557090759, + "learning_rate": 0.00011941999372365827, + "loss": 2.5978, + "step": 8812 + }, + { + "epoch": 0.7112420305060124, + "grad_norm": 0.6835920810699463, + "learning_rate": 0.00011940450713541822, + "loss": 2.6096, + "step": 8813 + }, + { + "epoch": 0.7113227342425954, + "grad_norm": 0.6913917660713196, + "learning_rate": 0.00011938902006356716, + "loss": 2.5624, + "step": 8814 + }, + { + "epoch": 0.7114034379791785, + "grad_norm": 0.6620622873306274, + "learning_rate": 0.00011937353250849102, + "loss": 2.6211, + "step": 8815 + }, + { + "epoch": 0.7114841417157615, + "grad_norm": 0.6738792061805725, + "learning_rate": 0.00011935804447057581, + "loss": 2.5889, + "step": 8816 + }, + { + "epoch": 0.7115648454523444, + "grad_norm": 0.7101936936378479, + "learning_rate": 0.00011934255595020751, + "loss": 2.5846, + "step": 8817 + }, + { + "epoch": 0.7116455491889274, + "grad_norm": 0.6843911409378052, + "learning_rate": 0.00011932706694777216, + "loss": 2.5757, + "step": 8818 + }, + { + "epoch": 0.7117262529255105, + "grad_norm": 0.7217971086502075, + "learning_rate": 0.0001193115774636558, + "loss": 2.6174, + "step": 8819 + }, + { + "epoch": 0.7118069566620935, + "grad_norm": 0.6706245541572571, + "learning_rate": 0.00011929608749824445, + "loss": 2.5893, + "step": 8820 + }, + { + "epoch": 0.7118876603986765, + "grad_norm": 0.7057672739028931, + "learning_rate": 0.00011928059705192413, + "loss": 2.5426, + "step": 8821 + }, + { + "epoch": 0.7119683641352594, + "grad_norm": 0.7354697585105896, + "learning_rate": 0.00011926510612508095, + "loss": 2.5741, + "step": 8822 + }, + { + "epoch": 0.7120490678718424, + "grad_norm": 0.6618186235427856, + "learning_rate": 0.00011924961471810096, + "loss": 2.6007, + "step": 8823 + }, + { + "epoch": 0.7121297716084255, + "grad_norm": 0.6733995676040649, + "learning_rate": 0.00011923412283137028, + "loss": 2.5739, + "step": 8824 + }, + { + "epoch": 0.7122104753450085, + "grad_norm": 0.7324833869934082, + "learning_rate": 0.00011921863046527497, + "loss": 2.5461, + "step": 8825 + }, + { + "epoch": 0.7122911790815915, + "grad_norm": 0.6753048896789551, + "learning_rate": 0.00011920313762020113, + "loss": 2.5066, + "step": 8826 + }, + { + "epoch": 0.7123718828181744, + "grad_norm": 0.7861250638961792, + "learning_rate": 0.00011918764429653489, + "loss": 2.5229, + "step": 8827 + }, + { + "epoch": 0.7124525865547575, + "grad_norm": 0.7037342190742493, + "learning_rate": 0.00011917215049466244, + "loss": 2.5443, + "step": 8828 + }, + { + "epoch": 0.7125332902913405, + "grad_norm": 0.7112773060798645, + "learning_rate": 0.00011915665621496985, + "loss": 2.5656, + "step": 8829 + }, + { + "epoch": 0.7126139940279235, + "grad_norm": 0.6384316682815552, + "learning_rate": 0.00011914116145784333, + "loss": 2.5526, + "step": 8830 + }, + { + "epoch": 0.7126946977645064, + "grad_norm": 0.6673600077629089, + "learning_rate": 0.000119125666223669, + "loss": 2.5868, + "step": 8831 + }, + { + "epoch": 0.7127754015010895, + "grad_norm": 0.6927722692489624, + "learning_rate": 0.0001191101705128331, + "loss": 2.6237, + "step": 8832 + }, + { + "epoch": 0.7128561052376725, + "grad_norm": 0.7410106658935547, + "learning_rate": 0.00011909467432572182, + "loss": 2.5652, + "step": 8833 + }, + { + "epoch": 0.7129368089742555, + "grad_norm": 0.6780139803886414, + "learning_rate": 0.0001190791776627213, + "loss": 2.5343, + "step": 8834 + }, + { + "epoch": 0.7130175127108385, + "grad_norm": 0.7147949934005737, + "learning_rate": 0.00011906368052421781, + "loss": 2.5368, + "step": 8835 + }, + { + "epoch": 0.7130982164474216, + "grad_norm": 0.7092324495315552, + "learning_rate": 0.00011904818291059759, + "loss": 2.538, + "step": 8836 + }, + { + "epoch": 0.7131789201840045, + "grad_norm": 0.761763870716095, + "learning_rate": 0.00011903268482224684, + "loss": 2.5984, + "step": 8837 + }, + { + "epoch": 0.7132596239205875, + "grad_norm": 0.7011365294456482, + "learning_rate": 0.00011901718625955182, + "loss": 2.5383, + "step": 8838 + }, + { + "epoch": 0.7133403276571705, + "grad_norm": 0.7982703447341919, + "learning_rate": 0.00011900168722289882, + "loss": 2.5714, + "step": 8839 + }, + { + "epoch": 0.7134210313937536, + "grad_norm": 0.6788253784179688, + "learning_rate": 0.00011898618771267412, + "loss": 2.5675, + "step": 8840 + }, + { + "epoch": 0.7135017351303365, + "grad_norm": 0.6245018243789673, + "learning_rate": 0.00011897068772926397, + "loss": 2.5497, + "step": 8841 + }, + { + "epoch": 0.7135824388669195, + "grad_norm": 0.732109785079956, + "learning_rate": 0.0001189551872730547, + "loss": 2.5043, + "step": 8842 + }, + { + "epoch": 0.7136631426035025, + "grad_norm": 0.7640885710716248, + "learning_rate": 0.0001189396863444326, + "loss": 2.5974, + "step": 8843 + }, + { + "epoch": 0.7137438463400856, + "grad_norm": 0.6806808710098267, + "learning_rate": 0.00011892418494378403, + "loss": 2.5911, + "step": 8844 + }, + { + "epoch": 0.7138245500766686, + "grad_norm": 0.6730000376701355, + "learning_rate": 0.00011890868307149528, + "loss": 2.5405, + "step": 8845 + }, + { + "epoch": 0.7139052538132515, + "grad_norm": 0.6881929636001587, + "learning_rate": 0.00011889318072795275, + "loss": 2.6083, + "step": 8846 + }, + { + "epoch": 0.7139859575498345, + "grad_norm": 0.7079598307609558, + "learning_rate": 0.00011887767791354275, + "loss": 2.5743, + "step": 8847 + }, + { + "epoch": 0.7140666612864176, + "grad_norm": 0.6760475635528564, + "learning_rate": 0.00011886217462865166, + "loss": 2.5925, + "step": 8848 + }, + { + "epoch": 0.7141473650230006, + "grad_norm": 0.6851043701171875, + "learning_rate": 0.00011884667087366587, + "loss": 2.5839, + "step": 8849 + }, + { + "epoch": 0.7142280687595836, + "grad_norm": 0.6805267930030823, + "learning_rate": 0.00011883116664897178, + "loss": 2.562, + "step": 8850 + }, + { + "epoch": 0.7143087724961665, + "grad_norm": 0.6720704436302185, + "learning_rate": 0.00011881566195495581, + "loss": 2.5381, + "step": 8851 + }, + { + "epoch": 0.7143894762327496, + "grad_norm": 0.718166172504425, + "learning_rate": 0.00011880015679200436, + "loss": 2.5912, + "step": 8852 + }, + { + "epoch": 0.7144701799693326, + "grad_norm": 0.6643497943878174, + "learning_rate": 0.00011878465116050383, + "loss": 2.5122, + "step": 8853 + }, + { + "epoch": 0.7145508837059156, + "grad_norm": 0.705186665058136, + "learning_rate": 0.00011876914506084074, + "loss": 2.617, + "step": 8854 + }, + { + "epoch": 0.7146315874424986, + "grad_norm": 0.6417848467826843, + "learning_rate": 0.00011875363849340144, + "loss": 2.5552, + "step": 8855 + }, + { + "epoch": 0.7147122911790816, + "grad_norm": 0.6861358880996704, + "learning_rate": 0.00011873813145857249, + "loss": 2.6324, + "step": 8856 + }, + { + "epoch": 0.7147929949156646, + "grad_norm": 0.7134111523628235, + "learning_rate": 0.00011872262395674027, + "loss": 2.5892, + "step": 8857 + }, + { + "epoch": 0.7148736986522476, + "grad_norm": 0.7177506685256958, + "learning_rate": 0.00011870711598829135, + "loss": 2.5677, + "step": 8858 + }, + { + "epoch": 0.7149544023888306, + "grad_norm": 0.6435763835906982, + "learning_rate": 0.00011869160755361219, + "loss": 2.5452, + "step": 8859 + }, + { + "epoch": 0.7150351061254137, + "grad_norm": 0.6443132758140564, + "learning_rate": 0.00011867609865308935, + "loss": 2.5566, + "step": 8860 + }, + { + "epoch": 0.7151158098619966, + "grad_norm": 0.7132347822189331, + "learning_rate": 0.00011866058928710925, + "loss": 2.565, + "step": 8861 + }, + { + "epoch": 0.7151965135985796, + "grad_norm": 0.7803207039833069, + "learning_rate": 0.00011864507945605854, + "loss": 2.556, + "step": 8862 + }, + { + "epoch": 0.7152772173351626, + "grad_norm": 0.7277950644493103, + "learning_rate": 0.00011862956916032367, + "loss": 2.5623, + "step": 8863 + }, + { + "epoch": 0.7153579210717457, + "grad_norm": 0.6812277436256409, + "learning_rate": 0.00011861405840029125, + "loss": 2.6146, + "step": 8864 + }, + { + "epoch": 0.7154386248083286, + "grad_norm": 0.7170509099960327, + "learning_rate": 0.00011859854717634786, + "loss": 2.52, + "step": 8865 + }, + { + "epoch": 0.7155193285449116, + "grad_norm": 0.7282906174659729, + "learning_rate": 0.00011858303548888004, + "loss": 2.5605, + "step": 8866 + }, + { + "epoch": 0.7156000322814946, + "grad_norm": 0.7290246486663818, + "learning_rate": 0.00011856752333827439, + "loss": 2.6292, + "step": 8867 + }, + { + "epoch": 0.7156807360180777, + "grad_norm": 0.6870024800300598, + "learning_rate": 0.00011855201072491752, + "loss": 2.6396, + "step": 8868 + }, + { + "epoch": 0.7157614397546607, + "grad_norm": 0.7336156964302063, + "learning_rate": 0.00011853649764919605, + "loss": 2.6356, + "step": 8869 + }, + { + "epoch": 0.7158421434912436, + "grad_norm": 0.7181294560432434, + "learning_rate": 0.00011852098411149661, + "loss": 2.5163, + "step": 8870 + }, + { + "epoch": 0.7159228472278266, + "grad_norm": 0.7355513572692871, + "learning_rate": 0.00011850547011220583, + "loss": 2.5485, + "step": 8871 + }, + { + "epoch": 0.7160035509644097, + "grad_norm": 0.7005351185798645, + "learning_rate": 0.00011848995565171038, + "loss": 2.5187, + "step": 8872 + }, + { + "epoch": 0.7160842547009927, + "grad_norm": 0.6550194025039673, + "learning_rate": 0.00011847444073039686, + "loss": 2.5174, + "step": 8873 + }, + { + "epoch": 0.7161649584375757, + "grad_norm": 0.6568251252174377, + "learning_rate": 0.00011845892534865202, + "loss": 2.5128, + "step": 8874 + }, + { + "epoch": 0.7162456621741586, + "grad_norm": 0.6359419226646423, + "learning_rate": 0.0001184434095068625, + "loss": 2.5967, + "step": 8875 + }, + { + "epoch": 0.7163263659107416, + "grad_norm": 0.6730023622512817, + "learning_rate": 0.00011842789320541504, + "loss": 2.5243, + "step": 8876 + }, + { + "epoch": 0.7164070696473247, + "grad_norm": 0.6750187277793884, + "learning_rate": 0.00011841237644469625, + "loss": 2.602, + "step": 8877 + }, + { + "epoch": 0.7164877733839077, + "grad_norm": 0.7039143443107605, + "learning_rate": 0.00011839685922509291, + "loss": 2.5345, + "step": 8878 + }, + { + "epoch": 0.7165684771204907, + "grad_norm": 0.6602306962013245, + "learning_rate": 0.00011838134154699177, + "loss": 2.5995, + "step": 8879 + }, + { + "epoch": 0.7166491808570736, + "grad_norm": 0.6744598150253296, + "learning_rate": 0.00011836582341077955, + "loss": 2.6005, + "step": 8880 + }, + { + "epoch": 0.7167298845936567, + "grad_norm": 0.7136051058769226, + "learning_rate": 0.00011835030481684302, + "loss": 2.5424, + "step": 8881 + }, + { + "epoch": 0.7168105883302397, + "grad_norm": 0.7085986137390137, + "learning_rate": 0.00011833478576556889, + "loss": 2.5912, + "step": 8882 + }, + { + "epoch": 0.7168912920668227, + "grad_norm": 0.7635689377784729, + "learning_rate": 0.00011831926625734398, + "loss": 2.5836, + "step": 8883 + }, + { + "epoch": 0.7169719958034056, + "grad_norm": 0.6543256640434265, + "learning_rate": 0.00011830374629255508, + "loss": 2.5442, + "step": 8884 + }, + { + "epoch": 0.7170526995399887, + "grad_norm": 0.663840115070343, + "learning_rate": 0.00011828822587158896, + "loss": 2.5529, + "step": 8885 + }, + { + "epoch": 0.7171334032765717, + "grad_norm": 0.6868027448654175, + "learning_rate": 0.00011827270499483247, + "loss": 2.6678, + "step": 8886 + }, + { + "epoch": 0.7172141070131547, + "grad_norm": 0.649172842502594, + "learning_rate": 0.00011825718366267238, + "loss": 2.57, + "step": 8887 + }, + { + "epoch": 0.7172948107497377, + "grad_norm": 0.6818440556526184, + "learning_rate": 0.00011824166187549554, + "loss": 2.5602, + "step": 8888 + }, + { + "epoch": 0.7173755144863208, + "grad_norm": 0.7222314476966858, + "learning_rate": 0.00011822613963368885, + "loss": 2.5526, + "step": 8889 + }, + { + "epoch": 0.7174562182229037, + "grad_norm": 0.7309598922729492, + "learning_rate": 0.00011821061693763909, + "loss": 2.5515, + "step": 8890 + }, + { + "epoch": 0.7175369219594867, + "grad_norm": 0.6935746669769287, + "learning_rate": 0.00011819509378773314, + "loss": 2.5506, + "step": 8891 + }, + { + "epoch": 0.7176176256960697, + "grad_norm": 0.6754423975944519, + "learning_rate": 0.00011817957018435792, + "loss": 2.5621, + "step": 8892 + }, + { + "epoch": 0.7176983294326528, + "grad_norm": 0.7087355852127075, + "learning_rate": 0.00011816404612790026, + "loss": 2.5708, + "step": 8893 + }, + { + "epoch": 0.7177790331692357, + "grad_norm": 0.726820707321167, + "learning_rate": 0.0001181485216187471, + "loss": 2.5741, + "step": 8894 + }, + { + "epoch": 0.7178597369058187, + "grad_norm": 0.6539922952651978, + "learning_rate": 0.00011813299665728532, + "loss": 2.613, + "step": 8895 + }, + { + "epoch": 0.7179404406424017, + "grad_norm": 0.7008066773414612, + "learning_rate": 0.00011811747124390189, + "loss": 2.6029, + "step": 8896 + }, + { + "epoch": 0.7180211443789848, + "grad_norm": 0.6900522708892822, + "learning_rate": 0.00011810194537898374, + "loss": 2.5716, + "step": 8897 + }, + { + "epoch": 0.7181018481155678, + "grad_norm": 0.675345242023468, + "learning_rate": 0.00011808641906291776, + "loss": 2.5742, + "step": 8898 + }, + { + "epoch": 0.7181825518521507, + "grad_norm": 0.6697559356689453, + "learning_rate": 0.00011807089229609092, + "loss": 2.5717, + "step": 8899 + }, + { + "epoch": 0.7182632555887337, + "grad_norm": 0.6874344944953918, + "learning_rate": 0.00011805536507889021, + "loss": 2.5394, + "step": 8900 + }, + { + "epoch": 0.7183439593253168, + "grad_norm": 0.6675494313240051, + "learning_rate": 0.00011803983741170263, + "loss": 2.5655, + "step": 8901 + }, + { + "epoch": 0.7184246630618998, + "grad_norm": 0.6937244534492493, + "learning_rate": 0.00011802430929491517, + "loss": 2.5676, + "step": 8902 + }, + { + "epoch": 0.7185053667984828, + "grad_norm": 0.7591496109962463, + "learning_rate": 0.00011800878072891474, + "loss": 2.5849, + "step": 8903 + }, + { + "epoch": 0.7185860705350657, + "grad_norm": 0.6503129005432129, + "learning_rate": 0.00011799325171408846, + "loss": 2.5416, + "step": 8904 + }, + { + "epoch": 0.7186667742716488, + "grad_norm": 0.6450222134590149, + "learning_rate": 0.00011797772225082333, + "loss": 2.5395, + "step": 8905 + }, + { + "epoch": 0.7187474780082318, + "grad_norm": 0.7317619919776917, + "learning_rate": 0.00011796219233950632, + "loss": 2.609, + "step": 8906 + }, + { + "epoch": 0.7188281817448148, + "grad_norm": 0.7585787773132324, + "learning_rate": 0.00011794666198052455, + "loss": 2.5556, + "step": 8907 + }, + { + "epoch": 0.7189088854813978, + "grad_norm": 0.6718214750289917, + "learning_rate": 0.00011793113117426505, + "loss": 2.5914, + "step": 8908 + }, + { + "epoch": 0.7189895892179808, + "grad_norm": 0.6459314823150635, + "learning_rate": 0.00011791559992111487, + "loss": 2.5956, + "step": 8909 + }, + { + "epoch": 0.7190702929545638, + "grad_norm": 0.6592775583267212, + "learning_rate": 0.00011790006822146113, + "loss": 2.5568, + "step": 8910 + }, + { + "epoch": 0.7191509966911468, + "grad_norm": 0.7277452349662781, + "learning_rate": 0.0001178845360756909, + "loss": 2.5989, + "step": 8911 + }, + { + "epoch": 0.7192317004277298, + "grad_norm": 0.7020131945610046, + "learning_rate": 0.00011786900348419128, + "loss": 2.645, + "step": 8912 + }, + { + "epoch": 0.7193124041643129, + "grad_norm": 0.6746636629104614, + "learning_rate": 0.00011785347044734938, + "loss": 2.5173, + "step": 8913 + }, + { + "epoch": 0.7193931079008958, + "grad_norm": 0.6782798171043396, + "learning_rate": 0.0001178379369655523, + "loss": 2.6007, + "step": 8914 + }, + { + "epoch": 0.7194738116374788, + "grad_norm": 0.705498218536377, + "learning_rate": 0.00011782240303918724, + "loss": 2.5408, + "step": 8915 + }, + { + "epoch": 0.7195545153740618, + "grad_norm": 0.675532341003418, + "learning_rate": 0.00011780686866864128, + "loss": 2.5188, + "step": 8916 + }, + { + "epoch": 0.7196352191106449, + "grad_norm": 0.6552390456199646, + "learning_rate": 0.00011779133385430161, + "loss": 2.5409, + "step": 8917 + }, + { + "epoch": 0.7197159228472279, + "grad_norm": 0.6589654088020325, + "learning_rate": 0.00011777579859655544, + "loss": 2.5447, + "step": 8918 + }, + { + "epoch": 0.7197966265838108, + "grad_norm": 0.7548382878303528, + "learning_rate": 0.00011776026289578985, + "loss": 2.5239, + "step": 8919 + }, + { + "epoch": 0.7198773303203938, + "grad_norm": 0.697325587272644, + "learning_rate": 0.00011774472675239207, + "loss": 2.5887, + "step": 8920 + }, + { + "epoch": 0.7199580340569769, + "grad_norm": 0.734462320804596, + "learning_rate": 0.00011772919016674934, + "loss": 2.5847, + "step": 8921 + }, + { + "epoch": 0.7200387377935599, + "grad_norm": 0.6736955642700195, + "learning_rate": 0.00011771365313924886, + "loss": 2.558, + "step": 8922 + }, + { + "epoch": 0.7201194415301428, + "grad_norm": 0.7157856822013855, + "learning_rate": 0.00011769811567027784, + "loss": 2.6199, + "step": 8923 + }, + { + "epoch": 0.7202001452667258, + "grad_norm": 0.7045830488204956, + "learning_rate": 0.0001176825777602235, + "loss": 2.576, + "step": 8924 + }, + { + "epoch": 0.7202808490033088, + "grad_norm": 0.6875419020652771, + "learning_rate": 0.00011766703940947308, + "loss": 2.6045, + "step": 8925 + }, + { + "epoch": 0.7203615527398919, + "grad_norm": 0.7313494086265564, + "learning_rate": 0.00011765150061841387, + "loss": 2.5388, + "step": 8926 + }, + { + "epoch": 0.7204422564764749, + "grad_norm": 0.7223608493804932, + "learning_rate": 0.00011763596138743313, + "loss": 2.5466, + "step": 8927 + }, + { + "epoch": 0.7205229602130578, + "grad_norm": 0.7289614081382751, + "learning_rate": 0.00011762042171691816, + "loss": 2.5862, + "step": 8928 + }, + { + "epoch": 0.7206036639496408, + "grad_norm": 0.7098878026008606, + "learning_rate": 0.00011760488160725617, + "loss": 2.5497, + "step": 8929 + }, + { + "epoch": 0.7206843676862239, + "grad_norm": 0.7096838355064392, + "learning_rate": 0.00011758934105883452, + "loss": 2.558, + "step": 8930 + }, + { + "epoch": 0.7207650714228069, + "grad_norm": 0.7334743738174438, + "learning_rate": 0.00011757380007204055, + "loss": 2.5966, + "step": 8931 + }, + { + "epoch": 0.7208457751593899, + "grad_norm": 0.7192476391792297, + "learning_rate": 0.00011755825864726149, + "loss": 2.5307, + "step": 8932 + }, + { + "epoch": 0.7209264788959728, + "grad_norm": 0.7329632043838501, + "learning_rate": 0.00011754271678488478, + "loss": 2.6453, + "step": 8933 + }, + { + "epoch": 0.7210071826325559, + "grad_norm": 0.6827974915504456, + "learning_rate": 0.00011752717448529766, + "loss": 2.5507, + "step": 8934 + }, + { + "epoch": 0.7210878863691389, + "grad_norm": 0.8292449116706848, + "learning_rate": 0.00011751163174888756, + "loss": 2.6178, + "step": 8935 + }, + { + "epoch": 0.7211685901057219, + "grad_norm": 0.6504058837890625, + "learning_rate": 0.00011749608857604183, + "loss": 2.574, + "step": 8936 + }, + { + "epoch": 0.7212492938423049, + "grad_norm": 0.6567742824554443, + "learning_rate": 0.00011748054496714785, + "loss": 2.45, + "step": 8937 + }, + { + "epoch": 0.7213299975788879, + "grad_norm": 0.6699101328849792, + "learning_rate": 0.00011746500092259296, + "loss": 2.5827, + "step": 8938 + }, + { + "epoch": 0.7214107013154709, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.0001174494564427646, + "loss": 2.5246, + "step": 8939 + }, + { + "epoch": 0.7214914050520539, + "grad_norm": 0.7276309132575989, + "learning_rate": 0.00011743391152805017, + "loss": 2.6096, + "step": 8940 + }, + { + "epoch": 0.7215721087886369, + "grad_norm": 0.7248005867004395, + "learning_rate": 0.0001174183661788371, + "loss": 2.6362, + "step": 8941 + }, + { + "epoch": 0.72165281252522, + "grad_norm": 0.7773801684379578, + "learning_rate": 0.00011740282039551282, + "loss": 2.547, + "step": 8942 + }, + { + "epoch": 0.7217335162618029, + "grad_norm": 0.7346466779708862, + "learning_rate": 0.00011738727417846476, + "loss": 2.5635, + "step": 8943 + }, + { + "epoch": 0.7218142199983859, + "grad_norm": 0.7042707800865173, + "learning_rate": 0.0001173717275280804, + "loss": 2.5593, + "step": 8944 + }, + { + "epoch": 0.7218949237349689, + "grad_norm": 0.6894899010658264, + "learning_rate": 0.00011735618044474712, + "loss": 2.5272, + "step": 8945 + }, + { + "epoch": 0.721975627471552, + "grad_norm": 0.6643744111061096, + "learning_rate": 0.00011734063292885249, + "loss": 2.6001, + "step": 8946 + }, + { + "epoch": 0.722056331208135, + "grad_norm": 0.7543076276779175, + "learning_rate": 0.00011732508498078396, + "loss": 2.558, + "step": 8947 + }, + { + "epoch": 0.7221370349447179, + "grad_norm": 0.7065596580505371, + "learning_rate": 0.00011730953660092903, + "loss": 2.6255, + "step": 8948 + }, + { + "epoch": 0.7222177386813009, + "grad_norm": 0.6968158483505249, + "learning_rate": 0.0001172939877896752, + "loss": 2.5277, + "step": 8949 + }, + { + "epoch": 0.722298442417884, + "grad_norm": 0.6918557286262512, + "learning_rate": 0.00011727843854740996, + "loss": 2.5456, + "step": 8950 + }, + { + "epoch": 0.722379146154467, + "grad_norm": 0.7262142300605774, + "learning_rate": 0.00011726288887452088, + "loss": 2.5345, + "step": 8951 + }, + { + "epoch": 0.7224598498910499, + "grad_norm": 0.7423329949378967, + "learning_rate": 0.00011724733877139548, + "loss": 2.6335, + "step": 8952 + }, + { + "epoch": 0.7225405536276329, + "grad_norm": 0.7734495997428894, + "learning_rate": 0.00011723178823842136, + "loss": 2.5951, + "step": 8953 + }, + { + "epoch": 0.722621257364216, + "grad_norm": 0.6792804598808289, + "learning_rate": 0.00011721623727598597, + "loss": 2.5927, + "step": 8954 + }, + { + "epoch": 0.722701961100799, + "grad_norm": 0.7971853017807007, + "learning_rate": 0.00011720068588447697, + "loss": 2.5451, + "step": 8955 + }, + { + "epoch": 0.722782664837382, + "grad_norm": 0.7264395356178284, + "learning_rate": 0.00011718513406428189, + "loss": 2.5769, + "step": 8956 + }, + { + "epoch": 0.7228633685739649, + "grad_norm": 0.6536725759506226, + "learning_rate": 0.0001171695818157884, + "loss": 2.6285, + "step": 8957 + }, + { + "epoch": 0.722944072310548, + "grad_norm": 0.6676235198974609, + "learning_rate": 0.000117154029139384, + "loss": 2.5896, + "step": 8958 + }, + { + "epoch": 0.723024776047131, + "grad_norm": 0.7104088664054871, + "learning_rate": 0.00011713847603545636, + "loss": 2.5606, + "step": 8959 + }, + { + "epoch": 0.723105479783714, + "grad_norm": 0.6646785140037537, + "learning_rate": 0.0001171229225043931, + "loss": 2.5617, + "step": 8960 + }, + { + "epoch": 0.723186183520297, + "grad_norm": 0.7148672342300415, + "learning_rate": 0.00011710736854658186, + "loss": 2.5855, + "step": 8961 + }, + { + "epoch": 0.72326688725688, + "grad_norm": 0.6864955425262451, + "learning_rate": 0.00011709181416241028, + "loss": 2.6098, + "step": 8962 + }, + { + "epoch": 0.723347590993463, + "grad_norm": 0.7049087285995483, + "learning_rate": 0.00011707625935226602, + "loss": 2.506, + "step": 8963 + }, + { + "epoch": 0.723428294730046, + "grad_norm": 0.6419759392738342, + "learning_rate": 0.00011706070411653672, + "loss": 2.5485, + "step": 8964 + }, + { + "epoch": 0.723508998466629, + "grad_norm": 0.6879174709320068, + "learning_rate": 0.00011704514845561007, + "loss": 2.5373, + "step": 8965 + }, + { + "epoch": 0.7235897022032121, + "grad_norm": 0.6473780274391174, + "learning_rate": 0.00011702959236987378, + "loss": 2.5479, + "step": 8966 + }, + { + "epoch": 0.723670405939795, + "grad_norm": 0.6924241185188293, + "learning_rate": 0.00011701403585971553, + "loss": 2.5679, + "step": 8967 + }, + { + "epoch": 0.723751109676378, + "grad_norm": 0.7452483773231506, + "learning_rate": 0.00011699847892552305, + "loss": 2.5043, + "step": 8968 + }, + { + "epoch": 0.723831813412961, + "grad_norm": 0.7517218589782715, + "learning_rate": 0.00011698292156768402, + "loss": 2.5554, + "step": 8969 + }, + { + "epoch": 0.7239125171495441, + "grad_norm": 0.6492432355880737, + "learning_rate": 0.00011696736378658618, + "loss": 2.6091, + "step": 8970 + }, + { + "epoch": 0.723993220886127, + "grad_norm": 0.740093469619751, + "learning_rate": 0.0001169518055826173, + "loss": 2.5629, + "step": 8971 + }, + { + "epoch": 0.72407392462271, + "grad_norm": 0.7186923027038574, + "learning_rate": 0.00011693624695616509, + "loss": 2.5537, + "step": 8972 + }, + { + "epoch": 0.724154628359293, + "grad_norm": 0.7066059112548828, + "learning_rate": 0.00011692068790761737, + "loss": 2.5115, + "step": 8973 + }, + { + "epoch": 0.7242353320958761, + "grad_norm": 0.7031805515289307, + "learning_rate": 0.00011690512843736185, + "loss": 2.596, + "step": 8974 + }, + { + "epoch": 0.7243160358324591, + "grad_norm": 0.7308956384658813, + "learning_rate": 0.00011688956854578635, + "loss": 2.6311, + "step": 8975 + }, + { + "epoch": 0.724396739569042, + "grad_norm": 0.6926052570343018, + "learning_rate": 0.00011687400823327863, + "loss": 2.5659, + "step": 8976 + }, + { + "epoch": 0.724477443305625, + "grad_norm": 0.69638991355896, + "learning_rate": 0.00011685844750022654, + "loss": 2.4792, + "step": 8977 + }, + { + "epoch": 0.724558147042208, + "grad_norm": 0.6858355402946472, + "learning_rate": 0.00011684288634701785, + "loss": 2.5707, + "step": 8978 + }, + { + "epoch": 0.7246388507787911, + "grad_norm": 0.6673639416694641, + "learning_rate": 0.00011682732477404044, + "loss": 2.5627, + "step": 8979 + }, + { + "epoch": 0.7247195545153741, + "grad_norm": 0.7174322605133057, + "learning_rate": 0.00011681176278168206, + "loss": 2.5801, + "step": 8980 + }, + { + "epoch": 0.724800258251957, + "grad_norm": 0.6840930581092834, + "learning_rate": 0.00011679620037033064, + "loss": 2.4994, + "step": 8981 + }, + { + "epoch": 0.72488096198854, + "grad_norm": 0.7179884910583496, + "learning_rate": 0.00011678063754037399, + "loss": 2.6408, + "step": 8982 + }, + { + "epoch": 0.7249616657251231, + "grad_norm": 0.6564825773239136, + "learning_rate": 0.00011676507429219998, + "loss": 2.5412, + "step": 8983 + }, + { + "epoch": 0.7250423694617061, + "grad_norm": 0.7020624876022339, + "learning_rate": 0.00011674951062619652, + "loss": 2.5778, + "step": 8984 + }, + { + "epoch": 0.7251230731982891, + "grad_norm": 0.8061255812644958, + "learning_rate": 0.00011673394654275145, + "loss": 2.5581, + "step": 8985 + }, + { + "epoch": 0.725203776934872, + "grad_norm": 0.7653982043266296, + "learning_rate": 0.00011671838204225267, + "loss": 2.5324, + "step": 8986 + }, + { + "epoch": 0.7252844806714551, + "grad_norm": 0.7168377041816711, + "learning_rate": 0.00011670281712508816, + "loss": 2.6357, + "step": 8987 + }, + { + "epoch": 0.7253651844080381, + "grad_norm": 0.6860470771789551, + "learning_rate": 0.00011668725179164575, + "loss": 2.5367, + "step": 8988 + }, + { + "epoch": 0.7254458881446211, + "grad_norm": 0.7175878286361694, + "learning_rate": 0.00011667168604231342, + "loss": 2.549, + "step": 8989 + }, + { + "epoch": 0.725526591881204, + "grad_norm": 0.7124783992767334, + "learning_rate": 0.00011665611987747907, + "loss": 2.5566, + "step": 8990 + }, + { + "epoch": 0.7256072956177871, + "grad_norm": 0.6575417518615723, + "learning_rate": 0.00011664055329753067, + "loss": 2.5455, + "step": 8991 + }, + { + "epoch": 0.7256879993543701, + "grad_norm": 0.6576877236366272, + "learning_rate": 0.00011662498630285623, + "loss": 2.5596, + "step": 8992 + }, + { + "epoch": 0.7257687030909531, + "grad_norm": 0.7235110402107239, + "learning_rate": 0.00011660941889384365, + "loss": 2.6199, + "step": 8993 + }, + { + "epoch": 0.7258494068275361, + "grad_norm": 0.6623982787132263, + "learning_rate": 0.00011659385107088092, + "loss": 2.5642, + "step": 8994 + }, + { + "epoch": 0.7259301105641192, + "grad_norm": 0.7113857865333557, + "learning_rate": 0.00011657828283435605, + "loss": 2.5631, + "step": 8995 + }, + { + "epoch": 0.7260108143007021, + "grad_norm": 0.7076124548912048, + "learning_rate": 0.00011656271418465702, + "loss": 2.5141, + "step": 8996 + }, + { + "epoch": 0.7260915180372851, + "grad_norm": 0.7534562349319458, + "learning_rate": 0.00011654714512217188, + "loss": 2.5896, + "step": 8997 + }, + { + "epoch": 0.7261722217738681, + "grad_norm": 0.7393170595169067, + "learning_rate": 0.00011653157564728865, + "loss": 2.5848, + "step": 8998 + }, + { + "epoch": 0.7262529255104512, + "grad_norm": 0.6829591989517212, + "learning_rate": 0.0001165160057603953, + "loss": 2.5439, + "step": 8999 + }, + { + "epoch": 0.7263336292470342, + "grad_norm": 0.6527189016342163, + "learning_rate": 0.00011650043546187995, + "loss": 2.5655, + "step": 9000 + }, + { + "epoch": 0.7263336292470342, + "eval_loss": 2.487652063369751, + "eval_runtime": 845.9129, + "eval_samples_per_second": 3.097, + "eval_steps_per_second": 0.517, + "step": 9000 + }, + { + "epoch": 0.7264143329836171, + "grad_norm": 0.6545615196228027, + "learning_rate": 0.00011648486475213058, + "loss": 2.5366, + "step": 9001 + }, + { + "epoch": 0.7264950367202001, + "grad_norm": 0.6854971647262573, + "learning_rate": 0.00011646929363153529, + "loss": 2.5832, + "step": 9002 + }, + { + "epoch": 0.7265757404567832, + "grad_norm": 0.7745552062988281, + "learning_rate": 0.00011645372210048218, + "loss": 2.5854, + "step": 9003 + }, + { + "epoch": 0.7266564441933662, + "grad_norm": 0.7159156203269958, + "learning_rate": 0.00011643815015935928, + "loss": 2.614, + "step": 9004 + }, + { + "epoch": 0.7267371479299491, + "grad_norm": 0.700074315071106, + "learning_rate": 0.00011642257780855475, + "loss": 2.6124, + "step": 9005 + }, + { + "epoch": 0.7268178516665321, + "grad_norm": 0.7367869019508362, + "learning_rate": 0.0001164070050484566, + "loss": 2.5512, + "step": 9006 + }, + { + "epoch": 0.7268985554031152, + "grad_norm": 0.6623905897140503, + "learning_rate": 0.00011639143187945301, + "loss": 2.5724, + "step": 9007 + }, + { + "epoch": 0.7269792591396982, + "grad_norm": 0.7111610770225525, + "learning_rate": 0.0001163758583019321, + "loss": 2.547, + "step": 9008 + }, + { + "epoch": 0.7270599628762812, + "grad_norm": 0.6860959529876709, + "learning_rate": 0.00011636028431628199, + "loss": 2.532, + "step": 9009 + }, + { + "epoch": 0.7271406666128641, + "grad_norm": 0.7606309056282043, + "learning_rate": 0.00011634470992289084, + "loss": 2.5214, + "step": 9010 + }, + { + "epoch": 0.7272213703494472, + "grad_norm": 0.6440508365631104, + "learning_rate": 0.00011632913512214677, + "loss": 2.5554, + "step": 9011 + }, + { + "epoch": 0.7273020740860302, + "grad_norm": 0.6770462393760681, + "learning_rate": 0.00011631355991443796, + "loss": 2.5877, + "step": 9012 + }, + { + "epoch": 0.7273827778226132, + "grad_norm": 0.6419155597686768, + "learning_rate": 0.00011629798430015262, + "loss": 2.5337, + "step": 9013 + }, + { + "epoch": 0.7274634815591962, + "grad_norm": 0.6782121658325195, + "learning_rate": 0.00011628240827967891, + "loss": 2.5152, + "step": 9014 + }, + { + "epoch": 0.7275441852957792, + "grad_norm": 0.6972285509109497, + "learning_rate": 0.00011626683185340501, + "loss": 2.5628, + "step": 9015 + }, + { + "epoch": 0.7276248890323622, + "grad_norm": 0.6823342442512512, + "learning_rate": 0.00011625125502171914, + "loss": 2.5977, + "step": 9016 + }, + { + "epoch": 0.7277055927689452, + "grad_norm": 0.723311722278595, + "learning_rate": 0.0001162356777850095, + "loss": 2.5772, + "step": 9017 + }, + { + "epoch": 0.7277862965055282, + "grad_norm": 0.7395427227020264, + "learning_rate": 0.00011622010014366435, + "loss": 2.6068, + "step": 9018 + }, + { + "epoch": 0.7278670002421113, + "grad_norm": 0.6970974206924438, + "learning_rate": 0.00011620452209807192, + "loss": 2.5577, + "step": 9019 + }, + { + "epoch": 0.7279477039786942, + "grad_norm": 0.6921418309211731, + "learning_rate": 0.0001161889436486204, + "loss": 2.5476, + "step": 9020 + }, + { + "epoch": 0.7280284077152772, + "grad_norm": 0.7243841886520386, + "learning_rate": 0.0001161733647956981, + "loss": 2.579, + "step": 9021 + }, + { + "epoch": 0.7281091114518602, + "grad_norm": 0.7240262627601624, + "learning_rate": 0.0001161577855396933, + "loss": 2.5959, + "step": 9022 + }, + { + "epoch": 0.7281898151884433, + "grad_norm": 0.7215476632118225, + "learning_rate": 0.0001161422058809942, + "loss": 2.5979, + "step": 9023 + }, + { + "epoch": 0.7282705189250263, + "grad_norm": 0.7109708786010742, + "learning_rate": 0.00011612662581998917, + "loss": 2.5912, + "step": 9024 + }, + { + "epoch": 0.7283512226616092, + "grad_norm": 0.6814073920249939, + "learning_rate": 0.00011611104535706645, + "loss": 2.5742, + "step": 9025 + }, + { + "epoch": 0.7284319263981922, + "grad_norm": 0.6788144707679749, + "learning_rate": 0.0001160954644926144, + "loss": 2.5656, + "step": 9026 + }, + { + "epoch": 0.7285126301347752, + "grad_norm": 0.7312989830970764, + "learning_rate": 0.00011607988322702126, + "loss": 2.5877, + "step": 9027 + }, + { + "epoch": 0.7285933338713583, + "grad_norm": 0.6725338697433472, + "learning_rate": 0.0001160643015606754, + "loss": 2.5261, + "step": 9028 + }, + { + "epoch": 0.7286740376079412, + "grad_norm": 0.7439326047897339, + "learning_rate": 0.00011604871949396516, + "loss": 2.603, + "step": 9029 + }, + { + "epoch": 0.7287547413445242, + "grad_norm": 0.7091783285140991, + "learning_rate": 0.00011603313702727889, + "loss": 2.5227, + "step": 9030 + }, + { + "epoch": 0.7288354450811072, + "grad_norm": 0.7474398016929626, + "learning_rate": 0.00011601755416100492, + "loss": 2.616, + "step": 9031 + }, + { + "epoch": 0.7289161488176903, + "grad_norm": 0.6904098987579346, + "learning_rate": 0.00011600197089553162, + "loss": 2.556, + "step": 9032 + }, + { + "epoch": 0.7289968525542733, + "grad_norm": 0.7305783033370972, + "learning_rate": 0.00011598638723124739, + "loss": 2.5633, + "step": 9033 + }, + { + "epoch": 0.7290775562908562, + "grad_norm": 0.6626651883125305, + "learning_rate": 0.00011597080316854062, + "loss": 2.5862, + "step": 9034 + }, + { + "epoch": 0.7291582600274392, + "grad_norm": 0.683102548122406, + "learning_rate": 0.00011595521870779968, + "loss": 2.5629, + "step": 9035 + }, + { + "epoch": 0.7292389637640223, + "grad_norm": 0.7486757636070251, + "learning_rate": 0.00011593963384941295, + "loss": 2.5831, + "step": 9036 + }, + { + "epoch": 0.7293196675006053, + "grad_norm": 0.8059591054916382, + "learning_rate": 0.00011592404859376888, + "loss": 2.6414, + "step": 9037 + }, + { + "epoch": 0.7294003712371883, + "grad_norm": 0.8371721506118774, + "learning_rate": 0.00011590846294125594, + "loss": 2.643, + "step": 9038 + }, + { + "epoch": 0.7294810749737712, + "grad_norm": 0.7216931581497192, + "learning_rate": 0.00011589287689226246, + "loss": 2.6, + "step": 9039 + }, + { + "epoch": 0.7295617787103543, + "grad_norm": 0.6940354704856873, + "learning_rate": 0.00011587729044717701, + "loss": 2.546, + "step": 9040 + }, + { + "epoch": 0.7296424824469373, + "grad_norm": 0.6888829469680786, + "learning_rate": 0.00011586170360638792, + "loss": 2.5878, + "step": 9041 + }, + { + "epoch": 0.7297231861835203, + "grad_norm": 0.6863886117935181, + "learning_rate": 0.00011584611637028373, + "loss": 2.5389, + "step": 9042 + }, + { + "epoch": 0.7298038899201033, + "grad_norm": 0.6670756936073303, + "learning_rate": 0.00011583052873925294, + "loss": 2.5465, + "step": 9043 + }, + { + "epoch": 0.7298845936566863, + "grad_norm": 0.7441220879554749, + "learning_rate": 0.00011581494071368392, + "loss": 2.5679, + "step": 9044 + }, + { + "epoch": 0.7299652973932693, + "grad_norm": 0.7135717272758484, + "learning_rate": 0.0001157993522939653, + "loss": 2.5341, + "step": 9045 + }, + { + "epoch": 0.7300460011298523, + "grad_norm": 0.6837992072105408, + "learning_rate": 0.00011578376348048547, + "loss": 2.5233, + "step": 9046 + }, + { + "epoch": 0.7301267048664353, + "grad_norm": 0.706666886806488, + "learning_rate": 0.00011576817427363302, + "loss": 2.6109, + "step": 9047 + }, + { + "epoch": 0.7302074086030184, + "grad_norm": 0.6856269240379333, + "learning_rate": 0.00011575258467379646, + "loss": 2.5651, + "step": 9048 + }, + { + "epoch": 0.7302881123396013, + "grad_norm": 0.6931480169296265, + "learning_rate": 0.00011573699468136427, + "loss": 2.6031, + "step": 9049 + }, + { + "epoch": 0.7303688160761843, + "grad_norm": 0.6558480858802795, + "learning_rate": 0.00011572140429672508, + "loss": 2.5661, + "step": 9050 + }, + { + "epoch": 0.7304495198127673, + "grad_norm": 0.6468425393104553, + "learning_rate": 0.00011570581352026742, + "loss": 2.5171, + "step": 9051 + }, + { + "epoch": 0.7305302235493504, + "grad_norm": 0.7204702496528625, + "learning_rate": 0.00011569022235237974, + "loss": 2.5861, + "step": 9052 + }, + { + "epoch": 0.7306109272859334, + "grad_norm": 0.7536416053771973, + "learning_rate": 0.00011567463079345078, + "loss": 2.633, + "step": 9053 + }, + { + "epoch": 0.7306916310225163, + "grad_norm": 0.6597960591316223, + "learning_rate": 0.00011565903884386904, + "loss": 2.5327, + "step": 9054 + }, + { + "epoch": 0.7307723347590993, + "grad_norm": 0.689153254032135, + "learning_rate": 0.0001156434465040231, + "loss": 2.5397, + "step": 9055 + }, + { + "epoch": 0.7308530384956824, + "grad_norm": 0.7664844393730164, + "learning_rate": 0.00011562785377430159, + "loss": 2.4852, + "step": 9056 + }, + { + "epoch": 0.7309337422322654, + "grad_norm": 0.7122881412506104, + "learning_rate": 0.0001156122606550931, + "loss": 2.5401, + "step": 9057 + }, + { + "epoch": 0.7310144459688483, + "grad_norm": 0.6937551498413086, + "learning_rate": 0.00011559666714678627, + "loss": 2.5705, + "step": 9058 + }, + { + "epoch": 0.7310951497054313, + "grad_norm": 0.6504047513008118, + "learning_rate": 0.00011558107324976974, + "loss": 2.5638, + "step": 9059 + }, + { + "epoch": 0.7311758534420144, + "grad_norm": 0.7759538888931274, + "learning_rate": 0.0001155654789644321, + "loss": 2.5864, + "step": 9060 + }, + { + "epoch": 0.7312565571785974, + "grad_norm": 0.719859778881073, + "learning_rate": 0.00011554988429116207, + "loss": 2.519, + "step": 9061 + }, + { + "epoch": 0.7313372609151804, + "grad_norm": 0.7159178853034973, + "learning_rate": 0.00011553428923034826, + "loss": 2.5301, + "step": 9062 + }, + { + "epoch": 0.7314179646517633, + "grad_norm": 0.6584001183509827, + "learning_rate": 0.00011551869378237934, + "loss": 2.4716, + "step": 9063 + }, + { + "epoch": 0.7314986683883464, + "grad_norm": 0.6548463702201843, + "learning_rate": 0.00011550309794764405, + "loss": 2.5637, + "step": 9064 + }, + { + "epoch": 0.7315793721249294, + "grad_norm": 0.73887699842453, + "learning_rate": 0.000115487501726531, + "loss": 2.5813, + "step": 9065 + }, + { + "epoch": 0.7316600758615124, + "grad_norm": 0.7856181859970093, + "learning_rate": 0.00011547190511942893, + "loss": 2.592, + "step": 9066 + }, + { + "epoch": 0.7317407795980954, + "grad_norm": 0.7040740847587585, + "learning_rate": 0.00011545630812672654, + "loss": 2.5324, + "step": 9067 + }, + { + "epoch": 0.7318214833346784, + "grad_norm": 0.7316064238548279, + "learning_rate": 0.00011544071074881253, + "loss": 2.5487, + "step": 9068 + }, + { + "epoch": 0.7319021870712614, + "grad_norm": 0.7020413279533386, + "learning_rate": 0.00011542511298607568, + "loss": 2.5179, + "step": 9069 + }, + { + "epoch": 0.7319828908078444, + "grad_norm": 0.672605574131012, + "learning_rate": 0.00011540951483890468, + "loss": 2.5367, + "step": 9070 + }, + { + "epoch": 0.7320635945444274, + "grad_norm": 0.7668856382369995, + "learning_rate": 0.00011539391630768828, + "loss": 2.6089, + "step": 9071 + }, + { + "epoch": 0.7321442982810105, + "grad_norm": 0.6641809940338135, + "learning_rate": 0.00011537831739281524, + "loss": 2.5411, + "step": 9072 + }, + { + "epoch": 0.7322250020175934, + "grad_norm": 0.7142000198364258, + "learning_rate": 0.00011536271809467434, + "loss": 2.5469, + "step": 9073 + }, + { + "epoch": 0.7323057057541764, + "grad_norm": 0.7266140580177307, + "learning_rate": 0.00011534711841365435, + "loss": 2.5565, + "step": 9074 + }, + { + "epoch": 0.7323864094907594, + "grad_norm": 0.6763899326324463, + "learning_rate": 0.00011533151835014407, + "loss": 2.551, + "step": 9075 + }, + { + "epoch": 0.7324671132273425, + "grad_norm": 0.6517418026924133, + "learning_rate": 0.00011531591790453224, + "loss": 2.5415, + "step": 9076 + }, + { + "epoch": 0.7325478169639255, + "grad_norm": 0.6602214574813843, + "learning_rate": 0.00011530031707720772, + "loss": 2.593, + "step": 9077 + }, + { + "epoch": 0.7326285207005084, + "grad_norm": 0.7448844313621521, + "learning_rate": 0.00011528471586855931, + "loss": 2.5598, + "step": 9078 + }, + { + "epoch": 0.7327092244370914, + "grad_norm": 0.7197073698043823, + "learning_rate": 0.00011526911427897579, + "loss": 2.5128, + "step": 9079 + }, + { + "epoch": 0.7327899281736744, + "grad_norm": 0.7245968580245972, + "learning_rate": 0.00011525351230884606, + "loss": 2.5016, + "step": 9080 + }, + { + "epoch": 0.7328706319102575, + "grad_norm": 0.6715837717056274, + "learning_rate": 0.00011523790995855892, + "loss": 2.5469, + "step": 9081 + }, + { + "epoch": 0.7329513356468405, + "grad_norm": 0.7143638730049133, + "learning_rate": 0.00011522230722850325, + "loss": 2.5164, + "step": 9082 + }, + { + "epoch": 0.7330320393834234, + "grad_norm": 0.6809647083282471, + "learning_rate": 0.00011520670411906787, + "loss": 2.6071, + "step": 9083 + }, + { + "epoch": 0.7331127431200064, + "grad_norm": 0.7160956859588623, + "learning_rate": 0.00011519110063064167, + "loss": 2.5346, + "step": 9084 + }, + { + "epoch": 0.7331934468565895, + "grad_norm": 0.6814724802970886, + "learning_rate": 0.00011517549676361357, + "loss": 2.5499, + "step": 9085 + }, + { + "epoch": 0.7332741505931725, + "grad_norm": 0.6914821267127991, + "learning_rate": 0.00011515989251837239, + "loss": 2.5386, + "step": 9086 + }, + { + "epoch": 0.7333548543297554, + "grad_norm": 0.7292554378509521, + "learning_rate": 0.00011514428789530705, + "loss": 2.5642, + "step": 9087 + }, + { + "epoch": 0.7334355580663384, + "grad_norm": 0.6894826292991638, + "learning_rate": 0.00011512868289480647, + "loss": 2.6131, + "step": 9088 + }, + { + "epoch": 0.7335162618029215, + "grad_norm": 0.658770740032196, + "learning_rate": 0.00011511307751725957, + "loss": 2.5594, + "step": 9089 + }, + { + "epoch": 0.7335969655395045, + "grad_norm": 0.7508681416511536, + "learning_rate": 0.0001150974717630553, + "loss": 2.595, + "step": 9090 + }, + { + "epoch": 0.7336776692760875, + "grad_norm": 0.69661545753479, + "learning_rate": 0.00011508186563258256, + "loss": 2.5803, + "step": 9091 + }, + { + "epoch": 0.7337583730126704, + "grad_norm": 0.7277412414550781, + "learning_rate": 0.00011506625912623028, + "loss": 2.5456, + "step": 9092 + }, + { + "epoch": 0.7338390767492535, + "grad_norm": 0.658329963684082, + "learning_rate": 0.00011505065224438745, + "loss": 2.5177, + "step": 9093 + }, + { + "epoch": 0.7339197804858365, + "grad_norm": 0.7277211546897888, + "learning_rate": 0.00011503504498744302, + "loss": 2.553, + "step": 9094 + }, + { + "epoch": 0.7340004842224195, + "grad_norm": 0.7240201830863953, + "learning_rate": 0.00011501943735578598, + "loss": 2.5851, + "step": 9095 + }, + { + "epoch": 0.7340811879590025, + "grad_norm": 0.6565662026405334, + "learning_rate": 0.00011500382934980529, + "loss": 2.5865, + "step": 9096 + }, + { + "epoch": 0.7341618916955855, + "grad_norm": 0.658268392086029, + "learning_rate": 0.00011498822096988995, + "loss": 2.5402, + "step": 9097 + }, + { + "epoch": 0.7342425954321685, + "grad_norm": 0.7305087447166443, + "learning_rate": 0.00011497261221642894, + "loss": 2.5483, + "step": 9098 + }, + { + "epoch": 0.7343232991687515, + "grad_norm": 0.7271504402160645, + "learning_rate": 0.00011495700308981134, + "loss": 2.5303, + "step": 9099 + }, + { + "epoch": 0.7344040029053345, + "grad_norm": 0.70429527759552, + "learning_rate": 0.0001149413935904261, + "loss": 2.5878, + "step": 9100 + }, + { + "epoch": 0.7344847066419176, + "grad_norm": 0.7168769836425781, + "learning_rate": 0.00011492578371866229, + "loss": 2.6017, + "step": 9101 + }, + { + "epoch": 0.7345654103785005, + "grad_norm": 0.7131996154785156, + "learning_rate": 0.00011491017347490891, + "loss": 2.5439, + "step": 9102 + }, + { + "epoch": 0.7346461141150835, + "grad_norm": 0.660321056842804, + "learning_rate": 0.00011489456285955504, + "loss": 2.5236, + "step": 9103 + }, + { + "epoch": 0.7347268178516665, + "grad_norm": 0.6742995977401733, + "learning_rate": 0.00011487895187298977, + "loss": 2.5375, + "step": 9104 + }, + { + "epoch": 0.7348075215882496, + "grad_norm": 0.6380610466003418, + "learning_rate": 0.00011486334051560206, + "loss": 2.5173, + "step": 9105 + }, + { + "epoch": 0.7348882253248326, + "grad_norm": 0.6948198080062866, + "learning_rate": 0.0001148477287877811, + "loss": 2.5247, + "step": 9106 + }, + { + "epoch": 0.7349689290614155, + "grad_norm": 0.7088696360588074, + "learning_rate": 0.00011483211668991591, + "loss": 2.587, + "step": 9107 + }, + { + "epoch": 0.7350496327979985, + "grad_norm": 0.6278921961784363, + "learning_rate": 0.00011481650422239556, + "loss": 2.5652, + "step": 9108 + }, + { + "epoch": 0.7351303365345816, + "grad_norm": 0.6901956796646118, + "learning_rate": 0.00011480089138560926, + "loss": 2.5964, + "step": 9109 + }, + { + "epoch": 0.7352110402711646, + "grad_norm": 0.7264819145202637, + "learning_rate": 0.00011478527817994604, + "loss": 2.5437, + "step": 9110 + }, + { + "epoch": 0.7352917440077475, + "grad_norm": 0.6940708756446838, + "learning_rate": 0.00011476966460579501, + "loss": 2.5761, + "step": 9111 + }, + { + "epoch": 0.7353724477443305, + "grad_norm": 0.689588189125061, + "learning_rate": 0.00011475405066354536, + "loss": 2.5457, + "step": 9112 + }, + { + "epoch": 0.7354531514809136, + "grad_norm": 0.6938436031341553, + "learning_rate": 0.00011473843635358618, + "loss": 2.6026, + "step": 9113 + }, + { + "epoch": 0.7355338552174966, + "grad_norm": 0.7122177481651306, + "learning_rate": 0.00011472282167630663, + "loss": 2.5701, + "step": 9114 + }, + { + "epoch": 0.7356145589540796, + "grad_norm": 0.6667213439941406, + "learning_rate": 0.00011470720663209591, + "loss": 2.5944, + "step": 9115 + }, + { + "epoch": 0.7356952626906625, + "grad_norm": 0.705910861492157, + "learning_rate": 0.00011469159122134314, + "loss": 2.6183, + "step": 9116 + }, + { + "epoch": 0.7357759664272456, + "grad_norm": 0.709937572479248, + "learning_rate": 0.00011467597544443751, + "loss": 2.5153, + "step": 9117 + }, + { + "epoch": 0.7358566701638286, + "grad_norm": 0.6870958805084229, + "learning_rate": 0.00011466035930176822, + "loss": 2.5334, + "step": 9118 + }, + { + "epoch": 0.7359373739004116, + "grad_norm": 0.7274392247200012, + "learning_rate": 0.00011464474279372443, + "loss": 2.5336, + "step": 9119 + }, + { + "epoch": 0.7360180776369946, + "grad_norm": 0.6360952258110046, + "learning_rate": 0.0001146291259206954, + "loss": 2.5604, + "step": 9120 + }, + { + "epoch": 0.7360987813735776, + "grad_norm": 0.7990559935569763, + "learning_rate": 0.00011461350868307028, + "loss": 2.624, + "step": 9121 + }, + { + "epoch": 0.7361794851101606, + "grad_norm": 0.6670079827308655, + "learning_rate": 0.00011459789108123835, + "loss": 2.5761, + "step": 9122 + }, + { + "epoch": 0.7362601888467436, + "grad_norm": 0.6994437575340271, + "learning_rate": 0.00011458227311558877, + "loss": 2.5679, + "step": 9123 + }, + { + "epoch": 0.7363408925833266, + "grad_norm": 0.7428358197212219, + "learning_rate": 0.00011456665478651087, + "loss": 2.5874, + "step": 9124 + }, + { + "epoch": 0.7364215963199097, + "grad_norm": 0.7079486846923828, + "learning_rate": 0.00011455103609439387, + "loss": 2.5999, + "step": 9125 + }, + { + "epoch": 0.7365023000564926, + "grad_norm": 0.646244466304779, + "learning_rate": 0.00011453541703962695, + "loss": 2.5053, + "step": 9126 + }, + { + "epoch": 0.7365830037930756, + "grad_norm": 0.6671318411827087, + "learning_rate": 0.0001145197976225995, + "loss": 2.5277, + "step": 9127 + }, + { + "epoch": 0.7366637075296586, + "grad_norm": 0.7060399055480957, + "learning_rate": 0.00011450417784370072, + "loss": 2.6092, + "step": 9128 + }, + { + "epoch": 0.7367444112662416, + "grad_norm": 0.741547703742981, + "learning_rate": 0.00011448855770331989, + "loss": 2.6121, + "step": 9129 + }, + { + "epoch": 0.7368251150028247, + "grad_norm": 0.710267961025238, + "learning_rate": 0.00011447293720184636, + "loss": 2.5141, + "step": 9130 + }, + { + "epoch": 0.7369058187394076, + "grad_norm": 0.6914308071136475, + "learning_rate": 0.0001144573163396694, + "loss": 2.5489, + "step": 9131 + }, + { + "epoch": 0.7369865224759906, + "grad_norm": 0.7051414847373962, + "learning_rate": 0.0001144416951171783, + "loss": 2.5925, + "step": 9132 + }, + { + "epoch": 0.7370672262125736, + "grad_norm": 0.6765387058258057, + "learning_rate": 0.00011442607353476245, + "loss": 2.5864, + "step": 9133 + }, + { + "epoch": 0.7371479299491567, + "grad_norm": 0.706672191619873, + "learning_rate": 0.00011441045159281108, + "loss": 2.4823, + "step": 9134 + }, + { + "epoch": 0.7372286336857397, + "grad_norm": 0.7534066438674927, + "learning_rate": 0.00011439482929171362, + "loss": 2.5728, + "step": 9135 + }, + { + "epoch": 0.7373093374223226, + "grad_norm": 0.6628777384757996, + "learning_rate": 0.00011437920663185939, + "loss": 2.5538, + "step": 9136 + }, + { + "epoch": 0.7373900411589056, + "grad_norm": 0.6575733423233032, + "learning_rate": 0.00011436358361363773, + "loss": 2.4802, + "step": 9137 + }, + { + "epoch": 0.7374707448954887, + "grad_norm": 0.7629329562187195, + "learning_rate": 0.00011434796023743803, + "loss": 2.6169, + "step": 9138 + }, + { + "epoch": 0.7375514486320717, + "grad_norm": 0.7148225903511047, + "learning_rate": 0.00011433233650364965, + "loss": 2.6335, + "step": 9139 + }, + { + "epoch": 0.7376321523686546, + "grad_norm": 0.705210268497467, + "learning_rate": 0.00011431671241266198, + "loss": 2.6261, + "step": 9140 + }, + { + "epoch": 0.7377128561052376, + "grad_norm": 0.7137441635131836, + "learning_rate": 0.00011430108796486441, + "loss": 2.5021, + "step": 9141 + }, + { + "epoch": 0.7377935598418207, + "grad_norm": 0.6979854702949524, + "learning_rate": 0.00011428546316064635, + "loss": 2.5436, + "step": 9142 + }, + { + "epoch": 0.7378742635784037, + "grad_norm": 0.6568784713745117, + "learning_rate": 0.00011426983800039721, + "loss": 2.5882, + "step": 9143 + }, + { + "epoch": 0.7379549673149867, + "grad_norm": 0.666606605052948, + "learning_rate": 0.00011425421248450638, + "loss": 2.5472, + "step": 9144 + }, + { + "epoch": 0.7380356710515696, + "grad_norm": 0.7240840792655945, + "learning_rate": 0.00011423858661336333, + "loss": 2.6057, + "step": 9145 + }, + { + "epoch": 0.7381163747881527, + "grad_norm": 0.7342149615287781, + "learning_rate": 0.0001142229603873575, + "loss": 2.508, + "step": 9146 + }, + { + "epoch": 0.7381970785247357, + "grad_norm": 0.7089941501617432, + "learning_rate": 0.0001142073338068783, + "loss": 2.6115, + "step": 9147 + }, + { + "epoch": 0.7382777822613187, + "grad_norm": 0.6883555054664612, + "learning_rate": 0.00011419170687231519, + "loss": 2.5254, + "step": 9148 + }, + { + "epoch": 0.7383584859979017, + "grad_norm": 0.6819528937339783, + "learning_rate": 0.00011417607958405765, + "loss": 2.5498, + "step": 9149 + }, + { + "epoch": 0.7384391897344847, + "grad_norm": 0.7348979711532593, + "learning_rate": 0.00011416045194249516, + "loss": 2.5547, + "step": 9150 + }, + { + "epoch": 0.7385198934710677, + "grad_norm": 0.6733320355415344, + "learning_rate": 0.00011414482394801719, + "loss": 2.5985, + "step": 9151 + }, + { + "epoch": 0.7386005972076507, + "grad_norm": 0.714771032333374, + "learning_rate": 0.00011412919560101327, + "loss": 2.571, + "step": 9152 + }, + { + "epoch": 0.7386813009442337, + "grad_norm": 0.7010024189949036, + "learning_rate": 0.0001141135669018728, + "loss": 2.5755, + "step": 9153 + }, + { + "epoch": 0.7387620046808168, + "grad_norm": 0.7014826536178589, + "learning_rate": 0.00011409793785098536, + "loss": 2.6033, + "step": 9154 + }, + { + "epoch": 0.7388427084173997, + "grad_norm": 0.7286051511764526, + "learning_rate": 0.0001140823084487405, + "loss": 2.515, + "step": 9155 + }, + { + "epoch": 0.7389234121539827, + "grad_norm": 0.669365406036377, + "learning_rate": 0.00011406667869552768, + "loss": 2.506, + "step": 9156 + }, + { + "epoch": 0.7390041158905657, + "grad_norm": 0.6886852979660034, + "learning_rate": 0.00011405104859173645, + "loss": 2.6123, + "step": 9157 + }, + { + "epoch": 0.7390848196271488, + "grad_norm": 0.6344162225723267, + "learning_rate": 0.00011403541813775635, + "loss": 2.5483, + "step": 9158 + }, + { + "epoch": 0.7391655233637318, + "grad_norm": 0.7043579816818237, + "learning_rate": 0.00011401978733397694, + "loss": 2.5545, + "step": 9159 + }, + { + "epoch": 0.7392462271003147, + "grad_norm": 0.7960262298583984, + "learning_rate": 0.00011400415618078781, + "loss": 2.5666, + "step": 9160 + }, + { + "epoch": 0.7393269308368977, + "grad_norm": 0.6771546006202698, + "learning_rate": 0.00011398852467857848, + "loss": 2.6016, + "step": 9161 + }, + { + "epoch": 0.7394076345734808, + "grad_norm": 0.6522069573402405, + "learning_rate": 0.00011397289282773855, + "loss": 2.5493, + "step": 9162 + }, + { + "epoch": 0.7394883383100638, + "grad_norm": 0.6804657578468323, + "learning_rate": 0.00011395726062865762, + "loss": 2.5856, + "step": 9163 + }, + { + "epoch": 0.7395690420466468, + "grad_norm": 0.7562841176986694, + "learning_rate": 0.00011394162808172526, + "loss": 2.557, + "step": 9164 + }, + { + "epoch": 0.7396497457832297, + "grad_norm": 0.6464113593101501, + "learning_rate": 0.00011392599518733107, + "loss": 2.5292, + "step": 9165 + }, + { + "epoch": 0.7397304495198128, + "grad_norm": 0.7469549775123596, + "learning_rate": 0.00011391036194586466, + "loss": 2.6168, + "step": 9166 + }, + { + "epoch": 0.7398111532563958, + "grad_norm": 0.7095946669578552, + "learning_rate": 0.00011389472835771572, + "loss": 2.5468, + "step": 9167 + }, + { + "epoch": 0.7398918569929788, + "grad_norm": 0.7376375794410706, + "learning_rate": 0.00011387909442327382, + "loss": 2.5576, + "step": 9168 + }, + { + "epoch": 0.7399725607295617, + "grad_norm": 0.736727774143219, + "learning_rate": 0.00011386346014292859, + "loss": 2.6034, + "step": 9169 + }, + { + "epoch": 0.7400532644661448, + "grad_norm": 0.7026904821395874, + "learning_rate": 0.00011384782551706967, + "loss": 2.5848, + "step": 9170 + }, + { + "epoch": 0.7401339682027278, + "grad_norm": 0.6894888877868652, + "learning_rate": 0.00011383219054608678, + "loss": 2.5475, + "step": 9171 + }, + { + "epoch": 0.7402146719393108, + "grad_norm": 0.6754137277603149, + "learning_rate": 0.00011381655523036954, + "loss": 2.5124, + "step": 9172 + }, + { + "epoch": 0.7402953756758938, + "grad_norm": 0.7935643196105957, + "learning_rate": 0.00011380091957030762, + "loss": 2.5898, + "step": 9173 + }, + { + "epoch": 0.7403760794124769, + "grad_norm": 0.7017118334770203, + "learning_rate": 0.0001137852835662907, + "loss": 2.6139, + "step": 9174 + }, + { + "epoch": 0.7404567831490598, + "grad_norm": 0.7246189117431641, + "learning_rate": 0.00011376964721870847, + "loss": 2.4627, + "step": 9175 + }, + { + "epoch": 0.7405374868856428, + "grad_norm": 0.6835598349571228, + "learning_rate": 0.00011375401052795064, + "loss": 2.5707, + "step": 9176 + }, + { + "epoch": 0.7406181906222258, + "grad_norm": 0.6439787745475769, + "learning_rate": 0.00011373837349440693, + "loss": 2.5161, + "step": 9177 + }, + { + "epoch": 0.7406988943588089, + "grad_norm": 0.7249091267585754, + "learning_rate": 0.00011372273611846704, + "loss": 2.5054, + "step": 9178 + }, + { + "epoch": 0.7407795980953918, + "grad_norm": 0.7653267979621887, + "learning_rate": 0.0001137070984005207, + "loss": 2.6016, + "step": 9179 + }, + { + "epoch": 0.7408603018319748, + "grad_norm": 0.7195165157318115, + "learning_rate": 0.0001136914603409576, + "loss": 2.5931, + "step": 9180 + }, + { + "epoch": 0.7409410055685578, + "grad_norm": 0.7093746662139893, + "learning_rate": 0.00011367582194016756, + "loss": 2.5567, + "step": 9181 + }, + { + "epoch": 0.7410217093051408, + "grad_norm": 0.6868107318878174, + "learning_rate": 0.00011366018319854026, + "loss": 2.5769, + "step": 9182 + }, + { + "epoch": 0.7411024130417239, + "grad_norm": 0.6870261430740356, + "learning_rate": 0.00011364454411646552, + "loss": 2.5418, + "step": 9183 + }, + { + "epoch": 0.7411831167783068, + "grad_norm": 0.7034662365913391, + "learning_rate": 0.00011362890469433306, + "loss": 2.5798, + "step": 9184 + }, + { + "epoch": 0.7412638205148898, + "grad_norm": 0.7200794816017151, + "learning_rate": 0.00011361326493253264, + "loss": 2.5523, + "step": 9185 + }, + { + "epoch": 0.7413445242514728, + "grad_norm": 0.7034540772438049, + "learning_rate": 0.0001135976248314541, + "loss": 2.5107, + "step": 9186 + }, + { + "epoch": 0.7414252279880559, + "grad_norm": 0.7155053019523621, + "learning_rate": 0.00011358198439148721, + "loss": 2.5804, + "step": 9187 + }, + { + "epoch": 0.7415059317246389, + "grad_norm": 0.6965398788452148, + "learning_rate": 0.00011356634361302175, + "loss": 2.5532, + "step": 9188 + }, + { + "epoch": 0.7415866354612218, + "grad_norm": 0.65416419506073, + "learning_rate": 0.00011355070249644755, + "loss": 2.5411, + "step": 9189 + }, + { + "epoch": 0.7416673391978048, + "grad_norm": 0.6798486709594727, + "learning_rate": 0.0001135350610421544, + "loss": 2.4957, + "step": 9190 + }, + { + "epoch": 0.7417480429343879, + "grad_norm": 0.6839874386787415, + "learning_rate": 0.00011351941925053218, + "loss": 2.5745, + "step": 9191 + }, + { + "epoch": 0.7418287466709709, + "grad_norm": 0.7374398708343506, + "learning_rate": 0.00011350377712197068, + "loss": 2.4923, + "step": 9192 + }, + { + "epoch": 0.7419094504075538, + "grad_norm": 0.7517396807670593, + "learning_rate": 0.00011348813465685974, + "loss": 2.538, + "step": 9193 + }, + { + "epoch": 0.7419901541441368, + "grad_norm": 0.6670863628387451, + "learning_rate": 0.00011347249185558926, + "loss": 2.5442, + "step": 9194 + }, + { + "epoch": 0.7420708578807199, + "grad_norm": 0.6508080363273621, + "learning_rate": 0.00011345684871854905, + "loss": 2.6665, + "step": 9195 + }, + { + "epoch": 0.7421515616173029, + "grad_norm": 0.6935258507728577, + "learning_rate": 0.00011344120524612898, + "loss": 2.5388, + "step": 9196 + }, + { + "epoch": 0.7422322653538859, + "grad_norm": 0.696067750453949, + "learning_rate": 0.00011342556143871897, + "loss": 2.574, + "step": 9197 + }, + { + "epoch": 0.7423129690904688, + "grad_norm": 0.7486966252326965, + "learning_rate": 0.00011340991729670882, + "loss": 2.5924, + "step": 9198 + }, + { + "epoch": 0.7423936728270519, + "grad_norm": 0.676407516002655, + "learning_rate": 0.00011339427282048854, + "loss": 2.5907, + "step": 9199 + }, + { + "epoch": 0.7424743765636349, + "grad_norm": 0.7241318225860596, + "learning_rate": 0.00011337862801044792, + "loss": 2.5685, + "step": 9200 + }, + { + "epoch": 0.7425550803002179, + "grad_norm": 0.7012883424758911, + "learning_rate": 0.00011336298286697692, + "loss": 2.56, + "step": 9201 + }, + { + "epoch": 0.7426357840368009, + "grad_norm": 0.7313060164451599, + "learning_rate": 0.0001133473373904655, + "loss": 2.632, + "step": 9202 + }, + { + "epoch": 0.742716487773384, + "grad_norm": 0.6829206943511963, + "learning_rate": 0.00011333169158130353, + "loss": 2.5006, + "step": 9203 + }, + { + "epoch": 0.7427971915099669, + "grad_norm": 0.7324578166007996, + "learning_rate": 0.00011331604543988093, + "loss": 2.5004, + "step": 9204 + }, + { + "epoch": 0.7428778952465499, + "grad_norm": 0.6761097311973572, + "learning_rate": 0.00011330039896658766, + "loss": 2.5516, + "step": 9205 + }, + { + "epoch": 0.7429585989831329, + "grad_norm": 0.6909754276275635, + "learning_rate": 0.00011328475216181369, + "loss": 2.5273, + "step": 9206 + }, + { + "epoch": 0.743039302719716, + "grad_norm": 0.6420674324035645, + "learning_rate": 0.00011326910502594899, + "loss": 2.5507, + "step": 9207 + }, + { + "epoch": 0.7431200064562989, + "grad_norm": 0.6442455053329468, + "learning_rate": 0.0001132534575593835, + "loss": 2.542, + "step": 9208 + }, + { + "epoch": 0.7432007101928819, + "grad_norm": 0.7053101658821106, + "learning_rate": 0.0001132378097625072, + "loss": 2.5116, + "step": 9209 + }, + { + "epoch": 0.7432814139294649, + "grad_norm": 0.7570765614509583, + "learning_rate": 0.00011322216163571007, + "loss": 2.5576, + "step": 9210 + }, + { + "epoch": 0.743362117666048, + "grad_norm": 0.6937675476074219, + "learning_rate": 0.00011320651317938214, + "loss": 2.6212, + "step": 9211 + }, + { + "epoch": 0.743442821402631, + "grad_norm": 0.6741313934326172, + "learning_rate": 0.00011319086439391333, + "loss": 2.5723, + "step": 9212 + }, + { + "epoch": 0.7435235251392139, + "grad_norm": 0.711358904838562, + "learning_rate": 0.00011317521527969374, + "loss": 2.5713, + "step": 9213 + }, + { + "epoch": 0.7436042288757969, + "grad_norm": 0.7443268895149231, + "learning_rate": 0.00011315956583711331, + "loss": 2.5301, + "step": 9214 + }, + { + "epoch": 0.74368493261238, + "grad_norm": 0.7001742720603943, + "learning_rate": 0.00011314391606656212, + "loss": 2.5545, + "step": 9215 + }, + { + "epoch": 0.743765636348963, + "grad_norm": 0.7294990420341492, + "learning_rate": 0.00011312826596843019, + "loss": 2.5897, + "step": 9216 + }, + { + "epoch": 0.743846340085546, + "grad_norm": 0.706924319267273, + "learning_rate": 0.00011311261554310753, + "loss": 2.6477, + "step": 9217 + }, + { + "epoch": 0.7439270438221289, + "grad_norm": 0.7065039277076721, + "learning_rate": 0.00011309696479098423, + "loss": 2.5326, + "step": 9218 + }, + { + "epoch": 0.744007747558712, + "grad_norm": 0.6502599716186523, + "learning_rate": 0.00011308131371245037, + "loss": 2.5833, + "step": 9219 + }, + { + "epoch": 0.744088451295295, + "grad_norm": 0.7135158181190491, + "learning_rate": 0.00011306566230789592, + "loss": 2.5686, + "step": 9220 + }, + { + "epoch": 0.744169155031878, + "grad_norm": 0.7239195108413696, + "learning_rate": 0.00011305001057771101, + "loss": 2.6303, + "step": 9221 + }, + { + "epoch": 0.744249858768461, + "grad_norm": 0.6442604660987854, + "learning_rate": 0.00011303435852228574, + "loss": 2.5495, + "step": 9222 + }, + { + "epoch": 0.744330562505044, + "grad_norm": 0.6700316071510315, + "learning_rate": 0.0001130187061420102, + "loss": 2.5575, + "step": 9223 + }, + { + "epoch": 0.744411266241627, + "grad_norm": 0.7532816529273987, + "learning_rate": 0.00011300305343727446, + "loss": 2.5174, + "step": 9224 + }, + { + "epoch": 0.74449196997821, + "grad_norm": 0.7614738941192627, + "learning_rate": 0.00011298740040846862, + "loss": 2.5995, + "step": 9225 + }, + { + "epoch": 0.744572673714793, + "grad_norm": 0.6781208515167236, + "learning_rate": 0.00011297174705598283, + "loss": 2.5225, + "step": 9226 + }, + { + "epoch": 0.744653377451376, + "grad_norm": 0.680525541305542, + "learning_rate": 0.0001129560933802072, + "loss": 2.5844, + "step": 9227 + }, + { + "epoch": 0.744734081187959, + "grad_norm": 0.7196657657623291, + "learning_rate": 0.00011294043938153185, + "loss": 2.564, + "step": 9228 + }, + { + "epoch": 0.744814784924542, + "grad_norm": 0.6997412443161011, + "learning_rate": 0.00011292478506034694, + "loss": 2.6486, + "step": 9229 + }, + { + "epoch": 0.744895488661125, + "grad_norm": 0.7438939809799194, + "learning_rate": 0.00011290913041704256, + "loss": 2.5667, + "step": 9230 + }, + { + "epoch": 0.744976192397708, + "grad_norm": 0.7391374707221985, + "learning_rate": 0.00011289347545200892, + "loss": 2.5974, + "step": 9231 + }, + { + "epoch": 0.745056896134291, + "grad_norm": 0.7845481634140015, + "learning_rate": 0.0001128778201656362, + "loss": 2.5168, + "step": 9232 + }, + { + "epoch": 0.745137599870874, + "grad_norm": 0.728712797164917, + "learning_rate": 0.00011286216455831449, + "loss": 2.5241, + "step": 9233 + }, + { + "epoch": 0.745218303607457, + "grad_norm": 0.7310191988945007, + "learning_rate": 0.00011284650863043407, + "loss": 2.5777, + "step": 9234 + }, + { + "epoch": 0.74529900734404, + "grad_norm": 0.6661474704742432, + "learning_rate": 0.00011283085238238503, + "loss": 2.5471, + "step": 9235 + }, + { + "epoch": 0.7453797110806231, + "grad_norm": 0.7697983384132385, + "learning_rate": 0.00011281519581455761, + "loss": 2.587, + "step": 9236 + }, + { + "epoch": 0.745460414817206, + "grad_norm": 0.7336567640304565, + "learning_rate": 0.00011279953892734203, + "loss": 2.5756, + "step": 9237 + }, + { + "epoch": 0.745541118553789, + "grad_norm": 0.6192059516906738, + "learning_rate": 0.00011278388172112848, + "loss": 2.5038, + "step": 9238 + }, + { + "epoch": 0.745621822290372, + "grad_norm": 0.7180300354957581, + "learning_rate": 0.00011276822419630719, + "loss": 2.5469, + "step": 9239 + }, + { + "epoch": 0.7457025260269551, + "grad_norm": 0.7583367824554443, + "learning_rate": 0.00011275256635326837, + "loss": 2.6274, + "step": 9240 + }, + { + "epoch": 0.7457832297635381, + "grad_norm": 0.6848096251487732, + "learning_rate": 0.00011273690819240221, + "loss": 2.5117, + "step": 9241 + }, + { + "epoch": 0.745863933500121, + "grad_norm": 0.6830503344535828, + "learning_rate": 0.00011272124971409907, + "loss": 2.5114, + "step": 9242 + }, + { + "epoch": 0.745944637236704, + "grad_norm": 0.780240535736084, + "learning_rate": 0.0001127055909187491, + "loss": 2.6432, + "step": 9243 + }, + { + "epoch": 0.7460253409732871, + "grad_norm": 0.7421274185180664, + "learning_rate": 0.00011268993180674261, + "loss": 2.5723, + "step": 9244 + }, + { + "epoch": 0.7461060447098701, + "grad_norm": 0.6695685386657715, + "learning_rate": 0.00011267427237846986, + "loss": 2.5335, + "step": 9245 + }, + { + "epoch": 0.746186748446453, + "grad_norm": 0.8390316963195801, + "learning_rate": 0.00011265861263432104, + "loss": 2.5125, + "step": 9246 + }, + { + "epoch": 0.746267452183036, + "grad_norm": 0.7030535936355591, + "learning_rate": 0.00011264295257468658, + "loss": 2.5986, + "step": 9247 + }, + { + "epoch": 0.7463481559196191, + "grad_norm": 0.6754253506660461, + "learning_rate": 0.00011262729219995669, + "loss": 2.5067, + "step": 9248 + }, + { + "epoch": 0.7464288596562021, + "grad_norm": 0.6809592843055725, + "learning_rate": 0.00011261163151052163, + "loss": 2.5359, + "step": 9249 + }, + { + "epoch": 0.7465095633927851, + "grad_norm": 0.6546878218650818, + "learning_rate": 0.00011259597050677178, + "loss": 2.5357, + "step": 9250 + }, + { + "epoch": 0.746590267129368, + "grad_norm": 0.6514731645584106, + "learning_rate": 0.00011258030918909739, + "loss": 2.5591, + "step": 9251 + }, + { + "epoch": 0.7466709708659511, + "grad_norm": 0.6981258392333984, + "learning_rate": 0.0001125646475578888, + "loss": 2.6171, + "step": 9252 + }, + { + "epoch": 0.7467516746025341, + "grad_norm": 0.6763784885406494, + "learning_rate": 0.00011254898561353639, + "loss": 2.5455, + "step": 9253 + }, + { + "epoch": 0.7468323783391171, + "grad_norm": 0.6241726279258728, + "learning_rate": 0.00011253332335643043, + "loss": 2.6073, + "step": 9254 + }, + { + "epoch": 0.7469130820757001, + "grad_norm": 0.6810312271118164, + "learning_rate": 0.00011251766078696132, + "loss": 2.5285, + "step": 9255 + }, + { + "epoch": 0.7469937858122832, + "grad_norm": 0.6603971123695374, + "learning_rate": 0.00011250199790551934, + "loss": 2.5985, + "step": 9256 + }, + { + "epoch": 0.7470744895488661, + "grad_norm": 0.69618159532547, + "learning_rate": 0.0001124863347124949, + "loss": 2.5728, + "step": 9257 + }, + { + "epoch": 0.7471551932854491, + "grad_norm": 0.6878889203071594, + "learning_rate": 0.00011247067120827837, + "loss": 2.5459, + "step": 9258 + }, + { + "epoch": 0.7472358970220321, + "grad_norm": 0.6613149046897888, + "learning_rate": 0.00011245500739326011, + "loss": 2.6559, + "step": 9259 + }, + { + "epoch": 0.7473166007586152, + "grad_norm": 0.6397448778152466, + "learning_rate": 0.00011243934326783053, + "loss": 2.5712, + "step": 9260 + }, + { + "epoch": 0.7473973044951981, + "grad_norm": 0.6804259419441223, + "learning_rate": 0.00011242367883237996, + "loss": 2.6143, + "step": 9261 + }, + { + "epoch": 0.7474780082317811, + "grad_norm": 0.8029066324234009, + "learning_rate": 0.00011240801408729884, + "loss": 2.5702, + "step": 9262 + }, + { + "epoch": 0.7475587119683641, + "grad_norm": 0.7086285948753357, + "learning_rate": 0.00011239234903297761, + "loss": 2.6113, + "step": 9263 + }, + { + "epoch": 0.7476394157049472, + "grad_norm": 0.6980452537536621, + "learning_rate": 0.00011237668366980665, + "loss": 2.6355, + "step": 9264 + }, + { + "epoch": 0.7477201194415302, + "grad_norm": 0.6906906962394714, + "learning_rate": 0.00011236101799817636, + "loss": 2.5605, + "step": 9265 + }, + { + "epoch": 0.7478008231781131, + "grad_norm": 0.7412894368171692, + "learning_rate": 0.00011234535201847716, + "loss": 2.6073, + "step": 9266 + }, + { + "epoch": 0.7478815269146961, + "grad_norm": 0.6949330568313599, + "learning_rate": 0.00011232968573109955, + "loss": 2.5623, + "step": 9267 + }, + { + "epoch": 0.7479622306512792, + "grad_norm": 0.6916515827178955, + "learning_rate": 0.00011231401913643393, + "loss": 2.5348, + "step": 9268 + }, + { + "epoch": 0.7480429343878622, + "grad_norm": 0.7576180696487427, + "learning_rate": 0.0001122983522348708, + "loss": 2.5968, + "step": 9269 + }, + { + "epoch": 0.7481236381244452, + "grad_norm": 0.6734197735786438, + "learning_rate": 0.00011228268502680052, + "loss": 2.5185, + "step": 9270 + }, + { + "epoch": 0.7482043418610281, + "grad_norm": 0.6952544450759888, + "learning_rate": 0.00011226701751261367, + "loss": 2.57, + "step": 9271 + }, + { + "epoch": 0.7482850455976112, + "grad_norm": 0.6504654884338379, + "learning_rate": 0.00011225134969270068, + "loss": 2.5677, + "step": 9272 + }, + { + "epoch": 0.7483657493341942, + "grad_norm": 0.6843643188476562, + "learning_rate": 0.00011223568156745198, + "loss": 2.5686, + "step": 9273 + }, + { + "epoch": 0.7484464530707772, + "grad_norm": 0.6786371469497681, + "learning_rate": 0.00011222001313725816, + "loss": 2.5024, + "step": 9274 + }, + { + "epoch": 0.7485271568073602, + "grad_norm": 0.6431117057800293, + "learning_rate": 0.00011220434440250967, + "loss": 2.5206, + "step": 9275 + }, + { + "epoch": 0.7486078605439432, + "grad_norm": 0.699547290802002, + "learning_rate": 0.000112188675363597, + "loss": 2.5974, + "step": 9276 + }, + { + "epoch": 0.7486885642805262, + "grad_norm": 0.6870436072349548, + "learning_rate": 0.00011217300602091067, + "loss": 2.5303, + "step": 9277 + }, + { + "epoch": 0.7487692680171092, + "grad_norm": 0.7032173871994019, + "learning_rate": 0.0001121573363748412, + "loss": 2.5045, + "step": 9278 + }, + { + "epoch": 0.7488499717536922, + "grad_norm": 0.6890417337417603, + "learning_rate": 0.00011214166642577917, + "loss": 2.5945, + "step": 9279 + }, + { + "epoch": 0.7489306754902753, + "grad_norm": 0.7257806062698364, + "learning_rate": 0.00011212599617411506, + "loss": 2.6013, + "step": 9280 + }, + { + "epoch": 0.7490113792268582, + "grad_norm": 0.722561240196228, + "learning_rate": 0.0001121103256202394, + "loss": 2.5809, + "step": 9281 + }, + { + "epoch": 0.7490920829634412, + "grad_norm": 0.7360994219779968, + "learning_rate": 0.00011209465476454277, + "loss": 2.5036, + "step": 9282 + }, + { + "epoch": 0.7491727867000242, + "grad_norm": 0.6561676263809204, + "learning_rate": 0.00011207898360741574, + "loss": 2.5302, + "step": 9283 + }, + { + "epoch": 0.7492534904366072, + "grad_norm": 0.7454147338867188, + "learning_rate": 0.00011206331214924887, + "loss": 2.5511, + "step": 9284 + }, + { + "epoch": 0.7493341941731902, + "grad_norm": 0.7085482478141785, + "learning_rate": 0.00011204764039043275, + "loss": 2.5743, + "step": 9285 + }, + { + "epoch": 0.7494148979097732, + "grad_norm": 0.691872775554657, + "learning_rate": 0.0001120319683313579, + "loss": 2.5414, + "step": 9286 + }, + { + "epoch": 0.7494956016463562, + "grad_norm": 0.6661050915718079, + "learning_rate": 0.00011201629597241496, + "loss": 2.5418, + "step": 9287 + }, + { + "epoch": 0.7495763053829392, + "grad_norm": 0.7440990805625916, + "learning_rate": 0.00011200062331399452, + "loss": 2.5543, + "step": 9288 + }, + { + "epoch": 0.7496570091195223, + "grad_norm": 0.6655303835868835, + "learning_rate": 0.00011198495035648715, + "loss": 2.5629, + "step": 9289 + }, + { + "epoch": 0.7497377128561052, + "grad_norm": 0.7550996541976929, + "learning_rate": 0.00011196927710028353, + "loss": 2.5376, + "step": 9290 + }, + { + "epoch": 0.7498184165926882, + "grad_norm": 0.692915678024292, + "learning_rate": 0.00011195360354577422, + "loss": 2.4661, + "step": 9291 + }, + { + "epoch": 0.7498991203292712, + "grad_norm": 0.7572253346443176, + "learning_rate": 0.00011193792969334985, + "loss": 2.5641, + "step": 9292 + }, + { + "epoch": 0.7499798240658543, + "grad_norm": 0.6550531387329102, + "learning_rate": 0.00011192225554340107, + "loss": 2.5591, + "step": 9293 + }, + { + "epoch": 0.7500605278024373, + "grad_norm": 0.677130401134491, + "learning_rate": 0.0001119065810963185, + "loss": 2.5859, + "step": 9294 + }, + { + "epoch": 0.7501412315390202, + "grad_norm": 0.680673360824585, + "learning_rate": 0.00011189090635249287, + "loss": 2.5343, + "step": 9295 + }, + { + "epoch": 0.7502219352756032, + "grad_norm": 0.7574957609176636, + "learning_rate": 0.00011187523131231472, + "loss": 2.5966, + "step": 9296 + }, + { + "epoch": 0.7503026390121863, + "grad_norm": 0.7099971175193787, + "learning_rate": 0.00011185955597617474, + "loss": 2.5547, + "step": 9297 + }, + { + "epoch": 0.7503833427487693, + "grad_norm": 0.7153162956237793, + "learning_rate": 0.00011184388034446367, + "loss": 2.5986, + "step": 9298 + }, + { + "epoch": 0.7504640464853523, + "grad_norm": 0.7154852747917175, + "learning_rate": 0.00011182820441757212, + "loss": 2.5214, + "step": 9299 + }, + { + "epoch": 0.7505447502219352, + "grad_norm": 0.6899208426475525, + "learning_rate": 0.00011181252819589081, + "loss": 2.5026, + "step": 9300 + }, + { + "epoch": 0.7506254539585183, + "grad_norm": 0.6719048023223877, + "learning_rate": 0.00011179685167981041, + "loss": 2.5915, + "step": 9301 + }, + { + "epoch": 0.7507061576951013, + "grad_norm": 0.6664413213729858, + "learning_rate": 0.00011178117486972164, + "loss": 2.5479, + "step": 9302 + }, + { + "epoch": 0.7507868614316843, + "grad_norm": 0.7433286905288696, + "learning_rate": 0.00011176549776601517, + "loss": 2.5941, + "step": 9303 + }, + { + "epoch": 0.7508675651682672, + "grad_norm": 0.7868518233299255, + "learning_rate": 0.00011174982036908177, + "loss": 2.5537, + "step": 9304 + }, + { + "epoch": 0.7509482689048503, + "grad_norm": 0.7037336826324463, + "learning_rate": 0.0001117341426793121, + "loss": 2.568, + "step": 9305 + }, + { + "epoch": 0.7510289726414333, + "grad_norm": 0.6630405783653259, + "learning_rate": 0.00011171846469709697, + "loss": 2.4906, + "step": 9306 + }, + { + "epoch": 0.7511096763780163, + "grad_norm": 0.7398669719696045, + "learning_rate": 0.00011170278642282701, + "loss": 2.574, + "step": 9307 + }, + { + "epoch": 0.7511903801145993, + "grad_norm": 0.7557641267776489, + "learning_rate": 0.00011168710785689304, + "loss": 2.5237, + "step": 9308 + }, + { + "epoch": 0.7512710838511824, + "grad_norm": 0.6883708238601685, + "learning_rate": 0.00011167142899968581, + "loss": 2.5643, + "step": 9309 + }, + { + "epoch": 0.7513517875877653, + "grad_norm": 0.6623669862747192, + "learning_rate": 0.00011165574985159606, + "loss": 2.5319, + "step": 9310 + }, + { + "epoch": 0.7514324913243483, + "grad_norm": 0.6938778758049011, + "learning_rate": 0.00011164007041301454, + "loss": 2.5083, + "step": 9311 + }, + { + "epoch": 0.7515131950609313, + "grad_norm": 0.718534529209137, + "learning_rate": 0.00011162439068433204, + "loss": 2.4791, + "step": 9312 + }, + { + "epoch": 0.7515938987975144, + "grad_norm": 0.672113299369812, + "learning_rate": 0.00011160871066593934, + "loss": 2.5264, + "step": 9313 + }, + { + "epoch": 0.7516746025340973, + "grad_norm": 0.6854343414306641, + "learning_rate": 0.00011159303035822723, + "loss": 2.5734, + "step": 9314 + }, + { + "epoch": 0.7517553062706803, + "grad_norm": 0.6494589447975159, + "learning_rate": 0.0001115773497615865, + "loss": 2.5564, + "step": 9315 + }, + { + "epoch": 0.7518360100072633, + "grad_norm": 0.7219608426094055, + "learning_rate": 0.00011156166887640793, + "loss": 2.6049, + "step": 9316 + }, + { + "epoch": 0.7519167137438464, + "grad_norm": 0.6892502903938293, + "learning_rate": 0.00011154598770308236, + "loss": 2.5333, + "step": 9317 + }, + { + "epoch": 0.7519974174804294, + "grad_norm": 0.6670175790786743, + "learning_rate": 0.0001115303062420006, + "loss": 2.5882, + "step": 9318 + }, + { + "epoch": 0.7520781212170123, + "grad_norm": 0.7367776036262512, + "learning_rate": 0.00011151462449355347, + "loss": 2.5634, + "step": 9319 + }, + { + "epoch": 0.7521588249535953, + "grad_norm": 0.6971952319145203, + "learning_rate": 0.00011149894245813182, + "loss": 2.5323, + "step": 9320 + }, + { + "epoch": 0.7522395286901784, + "grad_norm": 0.6555755734443665, + "learning_rate": 0.00011148326013612642, + "loss": 2.5597, + "step": 9321 + }, + { + "epoch": 0.7523202324267614, + "grad_norm": 0.7004384994506836, + "learning_rate": 0.00011146757752792819, + "loss": 2.4761, + "step": 9322 + }, + { + "epoch": 0.7524009361633444, + "grad_norm": 0.7151978015899658, + "learning_rate": 0.00011145189463392791, + "loss": 2.5825, + "step": 9323 + }, + { + "epoch": 0.7524816398999273, + "grad_norm": 0.7176918387413025, + "learning_rate": 0.00011143621145451653, + "loss": 2.6112, + "step": 9324 + }, + { + "epoch": 0.7525623436365104, + "grad_norm": 0.7156146168708801, + "learning_rate": 0.00011142052799008487, + "loss": 2.5293, + "step": 9325 + }, + { + "epoch": 0.7526430473730934, + "grad_norm": 0.7360113263130188, + "learning_rate": 0.00011140484424102375, + "loss": 2.5703, + "step": 9326 + }, + { + "epoch": 0.7527237511096764, + "grad_norm": 0.65630042552948, + "learning_rate": 0.00011138916020772414, + "loss": 2.5224, + "step": 9327 + }, + { + "epoch": 0.7528044548462594, + "grad_norm": 0.7088161110877991, + "learning_rate": 0.00011137347589057687, + "loss": 2.6673, + "step": 9328 + }, + { + "epoch": 0.7528851585828424, + "grad_norm": 0.7335243821144104, + "learning_rate": 0.00011135779128997283, + "loss": 2.5693, + "step": 9329 + }, + { + "epoch": 0.7529658623194254, + "grad_norm": 0.7166211605072021, + "learning_rate": 0.00011134210640630298, + "loss": 2.5612, + "step": 9330 + }, + { + "epoch": 0.7530465660560084, + "grad_norm": 0.7324960231781006, + "learning_rate": 0.00011132642123995816, + "loss": 2.5682, + "step": 9331 + }, + { + "epoch": 0.7531272697925914, + "grad_norm": 0.7133917808532715, + "learning_rate": 0.00011131073579132936, + "loss": 2.6131, + "step": 9332 + }, + { + "epoch": 0.7532079735291743, + "grad_norm": 0.678741455078125, + "learning_rate": 0.0001112950500608074, + "loss": 2.6109, + "step": 9333 + }, + { + "epoch": 0.7532886772657574, + "grad_norm": 0.7000784277915955, + "learning_rate": 0.0001112793640487833, + "loss": 2.5087, + "step": 9334 + }, + { + "epoch": 0.7533693810023404, + "grad_norm": 0.719976544380188, + "learning_rate": 0.00011126367775564795, + "loss": 2.4665, + "step": 9335 + }, + { + "epoch": 0.7534500847389234, + "grad_norm": 0.7127155065536499, + "learning_rate": 0.00011124799118179232, + "loss": 2.5254, + "step": 9336 + }, + { + "epoch": 0.7535307884755064, + "grad_norm": 0.6306474804878235, + "learning_rate": 0.00011123230432760734, + "loss": 2.5487, + "step": 9337 + }, + { + "epoch": 0.7536114922120895, + "grad_norm": 0.667019784450531, + "learning_rate": 0.00011121661719348397, + "loss": 2.5576, + "step": 9338 + }, + { + "epoch": 0.7536921959486724, + "grad_norm": 0.6869673132896423, + "learning_rate": 0.00011120092977981318, + "loss": 2.544, + "step": 9339 + }, + { + "epoch": 0.7537728996852554, + "grad_norm": 0.6688670516014099, + "learning_rate": 0.00011118524208698596, + "loss": 2.6017, + "step": 9340 + }, + { + "epoch": 0.7538536034218384, + "grad_norm": 0.6717860102653503, + "learning_rate": 0.00011116955411539325, + "loss": 2.5571, + "step": 9341 + }, + { + "epoch": 0.7539343071584215, + "grad_norm": 0.7113999724388123, + "learning_rate": 0.00011115386586542604, + "loss": 2.5684, + "step": 9342 + }, + { + "epoch": 0.7540150108950044, + "grad_norm": 0.6687907576560974, + "learning_rate": 0.00011113817733747536, + "loss": 2.548, + "step": 9343 + }, + { + "epoch": 0.7540957146315874, + "grad_norm": 0.6828920841217041, + "learning_rate": 0.00011112248853193219, + "loss": 2.5544, + "step": 9344 + }, + { + "epoch": 0.7541764183681704, + "grad_norm": 0.6793262362480164, + "learning_rate": 0.00011110679944918749, + "loss": 2.4655, + "step": 9345 + }, + { + "epoch": 0.7542571221047535, + "grad_norm": 0.6812230348587036, + "learning_rate": 0.00011109111008963235, + "loss": 2.5473, + "step": 9346 + }, + { + "epoch": 0.7543378258413365, + "grad_norm": 0.6838300824165344, + "learning_rate": 0.00011107542045365775, + "loss": 2.5248, + "step": 9347 + }, + { + "epoch": 0.7544185295779194, + "grad_norm": 0.7101932764053345, + "learning_rate": 0.0001110597305416547, + "loss": 2.5235, + "step": 9348 + }, + { + "epoch": 0.7544992333145024, + "grad_norm": 0.7136144042015076, + "learning_rate": 0.0001110440403540143, + "loss": 2.5592, + "step": 9349 + }, + { + "epoch": 0.7545799370510855, + "grad_norm": 0.6673154234886169, + "learning_rate": 0.00011102834989112751, + "loss": 2.4962, + "step": 9350 + }, + { + "epoch": 0.7546606407876685, + "grad_norm": 0.6849049925804138, + "learning_rate": 0.00011101265915338544, + "loss": 2.5793, + "step": 9351 + }, + { + "epoch": 0.7547413445242515, + "grad_norm": 0.7239733338356018, + "learning_rate": 0.0001109969681411791, + "loss": 2.5556, + "step": 9352 + }, + { + "epoch": 0.7548220482608344, + "grad_norm": 0.6738215684890747, + "learning_rate": 0.00011098127685489955, + "loss": 2.6181, + "step": 9353 + }, + { + "epoch": 0.7549027519974175, + "grad_norm": 0.6212114095687866, + "learning_rate": 0.00011096558529493787, + "loss": 2.5509, + "step": 9354 + }, + { + "epoch": 0.7549834557340005, + "grad_norm": 0.6801952123641968, + "learning_rate": 0.00011094989346168517, + "loss": 2.6454, + "step": 9355 + }, + { + "epoch": 0.7550641594705835, + "grad_norm": 0.6605944037437439, + "learning_rate": 0.0001109342013555325, + "loss": 2.5218, + "step": 9356 + }, + { + "epoch": 0.7551448632071665, + "grad_norm": 0.6486438512802124, + "learning_rate": 0.00011091850897687096, + "loss": 2.5431, + "step": 9357 + }, + { + "epoch": 0.7552255669437495, + "grad_norm": 0.6701794266700745, + "learning_rate": 0.0001109028163260916, + "loss": 2.563, + "step": 9358 + }, + { + "epoch": 0.7553062706803325, + "grad_norm": 0.6486446261405945, + "learning_rate": 0.00011088712340358555, + "loss": 2.5147, + "step": 9359 + }, + { + "epoch": 0.7553869744169155, + "grad_norm": 0.695197582244873, + "learning_rate": 0.00011087143020974396, + "loss": 2.5707, + "step": 9360 + }, + { + "epoch": 0.7554676781534985, + "grad_norm": 0.6910821199417114, + "learning_rate": 0.00011085573674495791, + "loss": 2.5797, + "step": 9361 + }, + { + "epoch": 0.7555483818900816, + "grad_norm": 0.7084208726882935, + "learning_rate": 0.00011084004300961852, + "loss": 2.5362, + "step": 9362 + }, + { + "epoch": 0.7556290856266645, + "grad_norm": 0.6750916242599487, + "learning_rate": 0.00011082434900411691, + "loss": 2.5554, + "step": 9363 + }, + { + "epoch": 0.7557097893632475, + "grad_norm": 0.6711466908454895, + "learning_rate": 0.0001108086547288442, + "loss": 2.5577, + "step": 9364 + }, + { + "epoch": 0.7557904930998305, + "grad_norm": 0.7267118096351624, + "learning_rate": 0.00011079296018419163, + "loss": 2.5422, + "step": 9365 + }, + { + "epoch": 0.7558711968364136, + "grad_norm": 0.692730188369751, + "learning_rate": 0.00011077726537055021, + "loss": 2.5281, + "step": 9366 + }, + { + "epoch": 0.7559519005729965, + "grad_norm": 0.7071926593780518, + "learning_rate": 0.00011076157028831122, + "loss": 2.5273, + "step": 9367 + }, + { + "epoch": 0.7560326043095795, + "grad_norm": 0.7662521600723267, + "learning_rate": 0.00011074587493786574, + "loss": 2.5433, + "step": 9368 + }, + { + "epoch": 0.7561133080461625, + "grad_norm": 0.7173436880111694, + "learning_rate": 0.00011073017931960496, + "loss": 2.579, + "step": 9369 + }, + { + "epoch": 0.7561940117827456, + "grad_norm": 0.6401154398918152, + "learning_rate": 0.00011071448343392008, + "loss": 2.5189, + "step": 9370 + }, + { + "epoch": 0.7562747155193286, + "grad_norm": 0.6510714292526245, + "learning_rate": 0.00011069878728120224, + "loss": 2.5682, + "step": 9371 + }, + { + "epoch": 0.7563554192559115, + "grad_norm": 0.7189988493919373, + "learning_rate": 0.00011068309086184269, + "loss": 2.5247, + "step": 9372 + }, + { + "epoch": 0.7564361229924945, + "grad_norm": 0.678753137588501, + "learning_rate": 0.00011066739417623258, + "loss": 2.5083, + "step": 9373 + }, + { + "epoch": 0.7565168267290776, + "grad_norm": 0.6903115510940552, + "learning_rate": 0.0001106516972247631, + "loss": 2.5658, + "step": 9374 + }, + { + "epoch": 0.7565975304656606, + "grad_norm": 0.6772382855415344, + "learning_rate": 0.0001106360000078255, + "loss": 2.5445, + "step": 9375 + }, + { + "epoch": 0.7566782342022436, + "grad_norm": 0.6655055284500122, + "learning_rate": 0.00011062030252581097, + "loss": 2.5186, + "step": 9376 + }, + { + "epoch": 0.7567589379388265, + "grad_norm": 0.7173851728439331, + "learning_rate": 0.00011060460477911074, + "loss": 2.5297, + "step": 9377 + }, + { + "epoch": 0.7568396416754096, + "grad_norm": 0.6891282200813293, + "learning_rate": 0.00011058890676811606, + "loss": 2.5706, + "step": 9378 + }, + { + "epoch": 0.7569203454119926, + "grad_norm": 0.7053082585334778, + "learning_rate": 0.0001105732084932181, + "loss": 2.5475, + "step": 9379 + }, + { + "epoch": 0.7570010491485756, + "grad_norm": 0.7503373622894287, + "learning_rate": 0.00011055750995480818, + "loss": 2.6438, + "step": 9380 + }, + { + "epoch": 0.7570817528851586, + "grad_norm": 0.6703453660011292, + "learning_rate": 0.0001105418111532775, + "loss": 2.5485, + "step": 9381 + }, + { + "epoch": 0.7571624566217416, + "grad_norm": 0.6651757955551147, + "learning_rate": 0.00011052611208901733, + "loss": 2.6079, + "step": 9382 + }, + { + "epoch": 0.7572431603583246, + "grad_norm": 0.6738902926445007, + "learning_rate": 0.00011051041276241895, + "loss": 2.5279, + "step": 9383 + }, + { + "epoch": 0.7573238640949076, + "grad_norm": 0.6803816556930542, + "learning_rate": 0.00011049471317387357, + "loss": 2.5972, + "step": 9384 + }, + { + "epoch": 0.7574045678314906, + "grad_norm": 0.7127584218978882, + "learning_rate": 0.00011047901332377253, + "loss": 2.5275, + "step": 9385 + }, + { + "epoch": 0.7574852715680735, + "grad_norm": 0.7655676007270813, + "learning_rate": 0.00011046331321250711, + "loss": 2.6491, + "step": 9386 + }, + { + "epoch": 0.7575659753046566, + "grad_norm": 0.7005762457847595, + "learning_rate": 0.00011044761284046854, + "loss": 2.5266, + "step": 9387 + }, + { + "epoch": 0.7576466790412396, + "grad_norm": 0.701931357383728, + "learning_rate": 0.00011043191220804817, + "loss": 2.5556, + "step": 9388 + }, + { + "epoch": 0.7577273827778226, + "grad_norm": 0.6888757944107056, + "learning_rate": 0.00011041621131563724, + "loss": 2.5654, + "step": 9389 + }, + { + "epoch": 0.7578080865144056, + "grad_norm": 0.7119149565696716, + "learning_rate": 0.00011040051016362711, + "loss": 2.5925, + "step": 9390 + }, + { + "epoch": 0.7578887902509887, + "grad_norm": 0.7378301024436951, + "learning_rate": 0.00011038480875240911, + "loss": 2.5604, + "step": 9391 + }, + { + "epoch": 0.7579694939875716, + "grad_norm": 0.7221272587776184, + "learning_rate": 0.00011036910708237449, + "loss": 2.5293, + "step": 9392 + }, + { + "epoch": 0.7580501977241546, + "grad_norm": 0.6895891427993774, + "learning_rate": 0.00011035340515391465, + "loss": 2.5177, + "step": 9393 + }, + { + "epoch": 0.7581309014607376, + "grad_norm": 0.6812298893928528, + "learning_rate": 0.00011033770296742086, + "loss": 2.6345, + "step": 9394 + }, + { + "epoch": 0.7582116051973207, + "grad_norm": 0.6733750700950623, + "learning_rate": 0.00011032200052328449, + "loss": 2.5548, + "step": 9395 + }, + { + "epoch": 0.7582923089339036, + "grad_norm": 0.7667728066444397, + "learning_rate": 0.00011030629782189692, + "loss": 2.5858, + "step": 9396 + }, + { + "epoch": 0.7583730126704866, + "grad_norm": 0.6809018850326538, + "learning_rate": 0.00011029059486364946, + "loss": 2.6028, + "step": 9397 + }, + { + "epoch": 0.7584537164070696, + "grad_norm": 0.6817305684089661, + "learning_rate": 0.00011027489164893345, + "loss": 2.5594, + "step": 9398 + }, + { + "epoch": 0.7585344201436527, + "grad_norm": 0.6936343908309937, + "learning_rate": 0.00011025918817814027, + "loss": 2.4997, + "step": 9399 + }, + { + "epoch": 0.7586151238802357, + "grad_norm": 0.7046801447868347, + "learning_rate": 0.00011024348445166133, + "loss": 2.5199, + "step": 9400 + }, + { + "epoch": 0.7586958276168186, + "grad_norm": 0.7247316241264343, + "learning_rate": 0.00011022778046988798, + "loss": 2.5233, + "step": 9401 + }, + { + "epoch": 0.7587765313534016, + "grad_norm": 0.675652265548706, + "learning_rate": 0.00011021207623321162, + "loss": 2.5213, + "step": 9402 + }, + { + "epoch": 0.7588572350899847, + "grad_norm": 0.6866120100021362, + "learning_rate": 0.0001101963717420236, + "loss": 2.6026, + "step": 9403 + }, + { + "epoch": 0.7589379388265677, + "grad_norm": 0.7168806791305542, + "learning_rate": 0.00011018066699671534, + "loss": 2.5707, + "step": 9404 + }, + { + "epoch": 0.7590186425631507, + "grad_norm": 0.6858265995979309, + "learning_rate": 0.00011016496199767825, + "loss": 2.5313, + "step": 9405 + }, + { + "epoch": 0.7590993462997336, + "grad_norm": 0.7064315676689148, + "learning_rate": 0.00011014925674530375, + "loss": 2.5362, + "step": 9406 + }, + { + "epoch": 0.7591800500363167, + "grad_norm": 0.658385694026947, + "learning_rate": 0.00011013355123998324, + "loss": 2.5773, + "step": 9407 + }, + { + "epoch": 0.7592607537728997, + "grad_norm": 0.7112493515014648, + "learning_rate": 0.00011011784548210813, + "loss": 2.589, + "step": 9408 + }, + { + "epoch": 0.7593414575094827, + "grad_norm": 0.6835871934890747, + "learning_rate": 0.00011010213947206986, + "loss": 2.5952, + "step": 9409 + }, + { + "epoch": 0.7594221612460657, + "grad_norm": 0.6920506358146667, + "learning_rate": 0.00011008643321025989, + "loss": 2.5433, + "step": 9410 + }, + { + "epoch": 0.7595028649826487, + "grad_norm": 0.7239150404930115, + "learning_rate": 0.00011007072669706962, + "loss": 2.5291, + "step": 9411 + }, + { + "epoch": 0.7595835687192317, + "grad_norm": 0.644568145275116, + "learning_rate": 0.00011005501993289052, + "loss": 2.5324, + "step": 9412 + }, + { + "epoch": 0.7596642724558147, + "grad_norm": 0.6604863405227661, + "learning_rate": 0.00011003931291811405, + "loss": 2.561, + "step": 9413 + }, + { + "epoch": 0.7597449761923977, + "grad_norm": 0.7056753635406494, + "learning_rate": 0.00011002360565313164, + "loss": 2.6537, + "step": 9414 + }, + { + "epoch": 0.7598256799289808, + "grad_norm": 0.6712720394134521, + "learning_rate": 0.00011000789813833476, + "loss": 2.5222, + "step": 9415 + }, + { + "epoch": 0.7599063836655637, + "grad_norm": 0.6829253435134888, + "learning_rate": 0.00010999219037411492, + "loss": 2.5156, + "step": 9416 + }, + { + "epoch": 0.7599870874021467, + "grad_norm": 0.7386518120765686, + "learning_rate": 0.00010997648236086359, + "loss": 2.5378, + "step": 9417 + }, + { + "epoch": 0.7600677911387297, + "grad_norm": 0.6711105108261108, + "learning_rate": 0.00010996077409897223, + "loss": 2.4985, + "step": 9418 + }, + { + "epoch": 0.7601484948753128, + "grad_norm": 0.6936883926391602, + "learning_rate": 0.00010994506558883233, + "loss": 2.4912, + "step": 9419 + }, + { + "epoch": 0.7602291986118958, + "grad_norm": 0.6927978992462158, + "learning_rate": 0.00010992935683083541, + "loss": 2.5526, + "step": 9420 + }, + { + "epoch": 0.7603099023484787, + "grad_norm": 0.7661495804786682, + "learning_rate": 0.00010991364782537297, + "loss": 2.5778, + "step": 9421 + }, + { + "epoch": 0.7603906060850617, + "grad_norm": 0.7092108726501465, + "learning_rate": 0.0001098979385728365, + "loss": 2.6557, + "step": 9422 + }, + { + "epoch": 0.7604713098216448, + "grad_norm": 0.696666419506073, + "learning_rate": 0.00010988222907361754, + "loss": 2.4897, + "step": 9423 + }, + { + "epoch": 0.7605520135582278, + "grad_norm": 0.6836280822753906, + "learning_rate": 0.00010986651932810756, + "loss": 2.5146, + "step": 9424 + }, + { + "epoch": 0.7606327172948107, + "grad_norm": 0.7269579768180847, + "learning_rate": 0.00010985080933669815, + "loss": 2.5314, + "step": 9425 + }, + { + "epoch": 0.7607134210313937, + "grad_norm": 0.6862092018127441, + "learning_rate": 0.00010983509909978085, + "loss": 2.5415, + "step": 9426 + }, + { + "epoch": 0.7607941247679768, + "grad_norm": 0.7068747878074646, + "learning_rate": 0.00010981938861774713, + "loss": 2.5919, + "step": 9427 + }, + { + "epoch": 0.7608748285045598, + "grad_norm": 0.699999213218689, + "learning_rate": 0.0001098036778909886, + "loss": 2.5175, + "step": 9428 + }, + { + "epoch": 0.7609555322411428, + "grad_norm": 0.6642772555351257, + "learning_rate": 0.0001097879669198968, + "loss": 2.5721, + "step": 9429 + }, + { + "epoch": 0.7610362359777257, + "grad_norm": 0.7100533843040466, + "learning_rate": 0.00010977225570486323, + "loss": 2.5189, + "step": 9430 + }, + { + "epoch": 0.7611169397143088, + "grad_norm": 0.7289063930511475, + "learning_rate": 0.00010975654424627955, + "loss": 2.6139, + "step": 9431 + }, + { + "epoch": 0.7611976434508918, + "grad_norm": 0.7289659380912781, + "learning_rate": 0.00010974083254453726, + "loss": 2.5201, + "step": 9432 + }, + { + "epoch": 0.7612783471874748, + "grad_norm": 0.7389557957649231, + "learning_rate": 0.000109725120600028, + "loss": 2.559, + "step": 9433 + }, + { + "epoch": 0.7613590509240578, + "grad_norm": 0.7021538615226746, + "learning_rate": 0.00010970940841314327, + "loss": 2.6353, + "step": 9434 + }, + { + "epoch": 0.7614397546606407, + "grad_norm": 0.6614113450050354, + "learning_rate": 0.0001096936959842747, + "loss": 2.54, + "step": 9435 + }, + { + "epoch": 0.7615204583972238, + "grad_norm": 0.6905426979064941, + "learning_rate": 0.00010967798331381392, + "loss": 2.5845, + "step": 9436 + }, + { + "epoch": 0.7616011621338068, + "grad_norm": 0.8183904886245728, + "learning_rate": 0.00010966227040215247, + "loss": 2.5255, + "step": 9437 + }, + { + "epoch": 0.7616818658703898, + "grad_norm": 0.7404630780220032, + "learning_rate": 0.00010964655724968199, + "loss": 2.5726, + "step": 9438 + }, + { + "epoch": 0.7617625696069728, + "grad_norm": 0.657127320766449, + "learning_rate": 0.0001096308438567941, + "loss": 2.6233, + "step": 9439 + }, + { + "epoch": 0.7618432733435558, + "grad_norm": 0.7417906522750854, + "learning_rate": 0.00010961513022388039, + "loss": 2.6361, + "step": 9440 + }, + { + "epoch": 0.7619239770801388, + "grad_norm": 0.6930029988288879, + "learning_rate": 0.00010959941635133249, + "loss": 2.5164, + "step": 9441 + }, + { + "epoch": 0.7620046808167218, + "grad_norm": 0.6897261738777161, + "learning_rate": 0.00010958370223954207, + "loss": 2.5626, + "step": 9442 + }, + { + "epoch": 0.7620853845533048, + "grad_norm": 0.6737398505210876, + "learning_rate": 0.00010956798788890072, + "loss": 2.5342, + "step": 9443 + }, + { + "epoch": 0.7621660882898879, + "grad_norm": 0.6550001502037048, + "learning_rate": 0.0001095522732998001, + "loss": 2.5604, + "step": 9444 + }, + { + "epoch": 0.7622467920264708, + "grad_norm": 0.7184637784957886, + "learning_rate": 0.00010953655847263187, + "loss": 2.6006, + "step": 9445 + }, + { + "epoch": 0.7623274957630538, + "grad_norm": 0.6188609600067139, + "learning_rate": 0.00010952084340778766, + "loss": 2.4875, + "step": 9446 + }, + { + "epoch": 0.7624081994996368, + "grad_norm": 0.6550862789154053, + "learning_rate": 0.00010950512810565917, + "loss": 2.5794, + "step": 9447 + }, + { + "epoch": 0.7624889032362199, + "grad_norm": 0.6659231781959534, + "learning_rate": 0.000109489412566638, + "loss": 2.5137, + "step": 9448 + }, + { + "epoch": 0.7625696069728028, + "grad_norm": 0.749376118183136, + "learning_rate": 0.00010947369679111592, + "loss": 2.5923, + "step": 9449 + }, + { + "epoch": 0.7626503107093858, + "grad_norm": 0.6597894430160522, + "learning_rate": 0.0001094579807794845, + "loss": 2.5677, + "step": 9450 + }, + { + "epoch": 0.7627310144459688, + "grad_norm": 0.7194519639015198, + "learning_rate": 0.00010944226453213548, + "loss": 2.5754, + "step": 9451 + }, + { + "epoch": 0.7628117181825519, + "grad_norm": 0.6734583377838135, + "learning_rate": 0.00010942654804946057, + "loss": 2.535, + "step": 9452 + }, + { + "epoch": 0.7628924219191349, + "grad_norm": 0.7171904444694519, + "learning_rate": 0.00010941083133185146, + "loss": 2.5431, + "step": 9453 + }, + { + "epoch": 0.7629731256557178, + "grad_norm": 0.6760339736938477, + "learning_rate": 0.00010939511437969978, + "loss": 2.5163, + "step": 9454 + }, + { + "epoch": 0.7630538293923008, + "grad_norm": 0.6720966696739197, + "learning_rate": 0.00010937939719339731, + "loss": 2.5621, + "step": 9455 + }, + { + "epoch": 0.7631345331288839, + "grad_norm": 0.6374503970146179, + "learning_rate": 0.00010936367977333574, + "loss": 2.5007, + "step": 9456 + }, + { + "epoch": 0.7632152368654669, + "grad_norm": 0.6407146453857422, + "learning_rate": 0.00010934796211990684, + "loss": 2.5724, + "step": 9457 + }, + { + "epoch": 0.7632959406020499, + "grad_norm": 0.6685383319854736, + "learning_rate": 0.00010933224423350225, + "loss": 2.501, + "step": 9458 + }, + { + "epoch": 0.7633766443386328, + "grad_norm": 0.664806604385376, + "learning_rate": 0.00010931652611451373, + "loss": 2.6174, + "step": 9459 + }, + { + "epoch": 0.7634573480752159, + "grad_norm": 0.6383369565010071, + "learning_rate": 0.00010930080776333303, + "loss": 2.557, + "step": 9460 + }, + { + "epoch": 0.7635380518117989, + "grad_norm": 0.6747864484786987, + "learning_rate": 0.0001092850891803519, + "loss": 2.5406, + "step": 9461 + }, + { + "epoch": 0.7636187555483819, + "grad_norm": 0.7312811613082886, + "learning_rate": 0.00010926937036596205, + "loss": 2.5903, + "step": 9462 + }, + { + "epoch": 0.7636994592849649, + "grad_norm": 0.645847737789154, + "learning_rate": 0.00010925365132055529, + "loss": 2.5254, + "step": 9463 + }, + { + "epoch": 0.7637801630215479, + "grad_norm": 0.6466063857078552, + "learning_rate": 0.00010923793204452335, + "loss": 2.5322, + "step": 9464 + }, + { + "epoch": 0.7638608667581309, + "grad_norm": 0.6450574994087219, + "learning_rate": 0.000109222212538258, + "loss": 2.522, + "step": 9465 + }, + { + "epoch": 0.7639415704947139, + "grad_norm": 0.6491848826408386, + "learning_rate": 0.00010920649280215096, + "loss": 2.5545, + "step": 9466 + }, + { + "epoch": 0.7640222742312969, + "grad_norm": 0.6888336539268494, + "learning_rate": 0.0001091907728365941, + "loss": 2.5217, + "step": 9467 + }, + { + "epoch": 0.76410297796788, + "grad_norm": 0.702557384967804, + "learning_rate": 0.00010917505264197914, + "loss": 2.5351, + "step": 9468 + }, + { + "epoch": 0.7641836817044629, + "grad_norm": 0.6552408933639526, + "learning_rate": 0.0001091593322186979, + "loss": 2.5115, + "step": 9469 + }, + { + "epoch": 0.7642643854410459, + "grad_norm": 0.7514002919197083, + "learning_rate": 0.00010914361156714212, + "loss": 2.5196, + "step": 9470 + }, + { + "epoch": 0.7643450891776289, + "grad_norm": 0.6692500710487366, + "learning_rate": 0.00010912789068770366, + "loss": 2.5639, + "step": 9471 + }, + { + "epoch": 0.764425792914212, + "grad_norm": 0.6567397117614746, + "learning_rate": 0.0001091121695807743, + "loss": 2.5027, + "step": 9472 + }, + { + "epoch": 0.764506496650795, + "grad_norm": 0.6876057982444763, + "learning_rate": 0.00010909644824674587, + "loss": 2.519, + "step": 9473 + }, + { + "epoch": 0.7645872003873779, + "grad_norm": 0.747949481010437, + "learning_rate": 0.00010908072668601017, + "loss": 2.5604, + "step": 9474 + }, + { + "epoch": 0.7646679041239609, + "grad_norm": 0.6371368169784546, + "learning_rate": 0.000109065004898959, + "loss": 2.5853, + "step": 9475 + }, + { + "epoch": 0.764748607860544, + "grad_norm": 0.6472185254096985, + "learning_rate": 0.00010904928288598422, + "loss": 2.5662, + "step": 9476 + }, + { + "epoch": 0.764829311597127, + "grad_norm": 0.7009313702583313, + "learning_rate": 0.00010903356064747765, + "loss": 2.5244, + "step": 9477 + }, + { + "epoch": 0.76491001533371, + "grad_norm": 0.7405661940574646, + "learning_rate": 0.00010901783818383116, + "loss": 2.4963, + "step": 9478 + }, + { + "epoch": 0.7649907190702929, + "grad_norm": 0.7693421840667725, + "learning_rate": 0.00010900211549543658, + "loss": 2.6018, + "step": 9479 + }, + { + "epoch": 0.765071422806876, + "grad_norm": 0.6965410709381104, + "learning_rate": 0.00010898639258268571, + "loss": 2.627, + "step": 9480 + }, + { + "epoch": 0.765152126543459, + "grad_norm": 0.7167130708694458, + "learning_rate": 0.00010897066944597046, + "loss": 2.5298, + "step": 9481 + }, + { + "epoch": 0.765232830280042, + "grad_norm": 0.7159689664840698, + "learning_rate": 0.00010895494608568268, + "loss": 2.5179, + "step": 9482 + }, + { + "epoch": 0.7653135340166249, + "grad_norm": 0.7329332232475281, + "learning_rate": 0.00010893922250221423, + "loss": 2.6498, + "step": 9483 + }, + { + "epoch": 0.765394237753208, + "grad_norm": 0.6912567019462585, + "learning_rate": 0.000108923498695957, + "loss": 2.5679, + "step": 9484 + }, + { + "epoch": 0.765474941489791, + "grad_norm": 0.7030324935913086, + "learning_rate": 0.00010890777466730285, + "loss": 2.5678, + "step": 9485 + }, + { + "epoch": 0.765555645226374, + "grad_norm": 0.7238864898681641, + "learning_rate": 0.00010889205041664365, + "loss": 2.5525, + "step": 9486 + }, + { + "epoch": 0.765636348962957, + "grad_norm": 0.6623672842979431, + "learning_rate": 0.00010887632594437134, + "loss": 2.4857, + "step": 9487 + }, + { + "epoch": 0.7657170526995399, + "grad_norm": 0.726645827293396, + "learning_rate": 0.00010886060125087776, + "loss": 2.5405, + "step": 9488 + }, + { + "epoch": 0.765797756436123, + "grad_norm": 0.6624459624290466, + "learning_rate": 0.00010884487633655487, + "loss": 2.5538, + "step": 9489 + }, + { + "epoch": 0.765878460172706, + "grad_norm": 0.7198002934455872, + "learning_rate": 0.00010882915120179453, + "loss": 2.5808, + "step": 9490 + }, + { + "epoch": 0.765959163909289, + "grad_norm": 0.7545582056045532, + "learning_rate": 0.00010881342584698862, + "loss": 2.6059, + "step": 9491 + }, + { + "epoch": 0.766039867645872, + "grad_norm": 0.6748257279396057, + "learning_rate": 0.00010879770027252915, + "loss": 2.5203, + "step": 9492 + }, + { + "epoch": 0.766120571382455, + "grad_norm": 0.7376208901405334, + "learning_rate": 0.00010878197447880796, + "loss": 2.5255, + "step": 9493 + }, + { + "epoch": 0.766201275119038, + "grad_norm": 0.7589401006698608, + "learning_rate": 0.00010876624846621704, + "loss": 2.6304, + "step": 9494 + }, + { + "epoch": 0.766281978855621, + "grad_norm": 0.6963146924972534, + "learning_rate": 0.00010875052223514827, + "loss": 2.5547, + "step": 9495 + }, + { + "epoch": 0.766362682592204, + "grad_norm": 0.6660788059234619, + "learning_rate": 0.00010873479578599361, + "loss": 2.5922, + "step": 9496 + }, + { + "epoch": 0.7664433863287871, + "grad_norm": 0.7506482005119324, + "learning_rate": 0.00010871906911914502, + "loss": 2.5383, + "step": 9497 + }, + { + "epoch": 0.76652409006537, + "grad_norm": 0.7514285445213318, + "learning_rate": 0.00010870334223499443, + "loss": 2.5551, + "step": 9498 + }, + { + "epoch": 0.766604793801953, + "grad_norm": 0.6461809873580933, + "learning_rate": 0.00010868761513393379, + "loss": 2.5367, + "step": 9499 + }, + { + "epoch": 0.766685497538536, + "grad_norm": 0.6328238844871521, + "learning_rate": 0.00010867188781635512, + "loss": 2.5505, + "step": 9500 + }, + { + "epoch": 0.7667662012751191, + "grad_norm": 0.7090224027633667, + "learning_rate": 0.00010865616028265027, + "loss": 2.5921, + "step": 9501 + }, + { + "epoch": 0.766846905011702, + "grad_norm": 0.6404605507850647, + "learning_rate": 0.0001086404325332113, + "loss": 2.5357, + "step": 9502 + }, + { + "epoch": 0.766927608748285, + "grad_norm": 0.652477502822876, + "learning_rate": 0.00010862470456843016, + "loss": 2.5277, + "step": 9503 + }, + { + "epoch": 0.767008312484868, + "grad_norm": 0.7045448422431946, + "learning_rate": 0.00010860897638869887, + "loss": 2.5712, + "step": 9504 + }, + { + "epoch": 0.7670890162214511, + "grad_norm": 0.7024295926094055, + "learning_rate": 0.00010859324799440936, + "loss": 2.5976, + "step": 9505 + }, + { + "epoch": 0.7671697199580341, + "grad_norm": 0.7165585160255432, + "learning_rate": 0.00010857751938595364, + "loss": 2.5378, + "step": 9506 + }, + { + "epoch": 0.767250423694617, + "grad_norm": 0.7037522196769714, + "learning_rate": 0.0001085617905637237, + "loss": 2.554, + "step": 9507 + }, + { + "epoch": 0.7673311274312, + "grad_norm": 0.738210916519165, + "learning_rate": 0.00010854606152811163, + "loss": 2.5102, + "step": 9508 + }, + { + "epoch": 0.7674118311677831, + "grad_norm": 0.7500020861625671, + "learning_rate": 0.0001085303322795093, + "loss": 2.5908, + "step": 9509 + }, + { + "epoch": 0.7674925349043661, + "grad_norm": 0.7669610977172852, + "learning_rate": 0.00010851460281830883, + "loss": 2.5119, + "step": 9510 + }, + { + "epoch": 0.7675732386409491, + "grad_norm": 0.6619212031364441, + "learning_rate": 0.00010849887314490217, + "loss": 2.5622, + "step": 9511 + }, + { + "epoch": 0.767653942377532, + "grad_norm": 0.7142546772956848, + "learning_rate": 0.00010848314325968136, + "loss": 2.596, + "step": 9512 + }, + { + "epoch": 0.7677346461141151, + "grad_norm": 0.7365403175354004, + "learning_rate": 0.0001084674131630385, + "loss": 2.5695, + "step": 9513 + }, + { + "epoch": 0.7678153498506981, + "grad_norm": 0.7843711972236633, + "learning_rate": 0.00010845168285536555, + "loss": 2.5707, + "step": 9514 + }, + { + "epoch": 0.7678960535872811, + "grad_norm": 0.6391385197639465, + "learning_rate": 0.00010843595233705454, + "loss": 2.5523, + "step": 9515 + }, + { + "epoch": 0.7679767573238641, + "grad_norm": 0.6955631971359253, + "learning_rate": 0.00010842022160849758, + "loss": 2.5072, + "step": 9516 + }, + { + "epoch": 0.7680574610604471, + "grad_norm": 0.7291388511657715, + "learning_rate": 0.00010840449067008665, + "loss": 2.5786, + "step": 9517 + }, + { + "epoch": 0.7681381647970301, + "grad_norm": 0.7988889813423157, + "learning_rate": 0.00010838875952221387, + "loss": 2.5622, + "step": 9518 + }, + { + "epoch": 0.7682188685336131, + "grad_norm": 0.726271390914917, + "learning_rate": 0.00010837302816527129, + "loss": 2.5479, + "step": 9519 + }, + { + "epoch": 0.7682995722701961, + "grad_norm": 0.7305205464363098, + "learning_rate": 0.00010835729659965095, + "loss": 2.5946, + "step": 9520 + }, + { + "epoch": 0.7683802760067792, + "grad_norm": 0.7843366265296936, + "learning_rate": 0.00010834156482574493, + "loss": 2.5212, + "step": 9521 + }, + { + "epoch": 0.7684609797433621, + "grad_norm": 0.6988845467567444, + "learning_rate": 0.00010832583284394529, + "loss": 2.5174, + "step": 9522 + }, + { + "epoch": 0.7685416834799451, + "grad_norm": 0.7088077068328857, + "learning_rate": 0.00010831010065464414, + "loss": 2.5253, + "step": 9523 + }, + { + "epoch": 0.7686223872165281, + "grad_norm": 0.7447031140327454, + "learning_rate": 0.00010829436825823358, + "loss": 2.6045, + "step": 9524 + }, + { + "epoch": 0.7687030909531112, + "grad_norm": 0.6865237951278687, + "learning_rate": 0.00010827863565510566, + "loss": 2.558, + "step": 9525 + }, + { + "epoch": 0.7687837946896942, + "grad_norm": 0.7748900651931763, + "learning_rate": 0.0001082629028456525, + "loss": 2.5694, + "step": 9526 + }, + { + "epoch": 0.7688644984262771, + "grad_norm": 0.7031759023666382, + "learning_rate": 0.00010824716983026622, + "loss": 2.5171, + "step": 9527 + }, + { + "epoch": 0.7689452021628601, + "grad_norm": 0.7627702355384827, + "learning_rate": 0.00010823143660933888, + "loss": 2.5715, + "step": 9528 + }, + { + "epoch": 0.7690259058994432, + "grad_norm": 0.707815945148468, + "learning_rate": 0.00010821570318326264, + "loss": 2.5281, + "step": 9529 + }, + { + "epoch": 0.7691066096360262, + "grad_norm": 0.6833841800689697, + "learning_rate": 0.00010819996955242962, + "loss": 2.5702, + "step": 9530 + }, + { + "epoch": 0.7691873133726091, + "grad_norm": 0.7029415369033813, + "learning_rate": 0.00010818423571723189, + "loss": 2.5331, + "step": 9531 + }, + { + "epoch": 0.7692680171091921, + "grad_norm": 0.6442921161651611, + "learning_rate": 0.00010816850167806161, + "loss": 2.5423, + "step": 9532 + }, + { + "epoch": 0.7693487208457752, + "grad_norm": 0.7259004712104797, + "learning_rate": 0.00010815276743531093, + "loss": 2.6014, + "step": 9533 + }, + { + "epoch": 0.7694294245823582, + "grad_norm": 0.6483473777770996, + "learning_rate": 0.00010813703298937199, + "loss": 2.5268, + "step": 9534 + }, + { + "epoch": 0.7695101283189412, + "grad_norm": 0.6805520057678223, + "learning_rate": 0.00010812129834063691, + "loss": 2.5536, + "step": 9535 + }, + { + "epoch": 0.7695908320555241, + "grad_norm": 0.7120587825775146, + "learning_rate": 0.00010810556348949783, + "loss": 2.518, + "step": 9536 + }, + { + "epoch": 0.7696715357921071, + "grad_norm": 0.7280872464179993, + "learning_rate": 0.00010808982843634692, + "loss": 2.5525, + "step": 9537 + }, + { + "epoch": 0.7697522395286902, + "grad_norm": 0.68332439661026, + "learning_rate": 0.00010807409318157636, + "loss": 2.6318, + "step": 9538 + }, + { + "epoch": 0.7698329432652732, + "grad_norm": 0.655352771282196, + "learning_rate": 0.00010805835772557826, + "loss": 2.5781, + "step": 9539 + }, + { + "epoch": 0.7699136470018562, + "grad_norm": 0.7675400972366333, + "learning_rate": 0.00010804262206874484, + "loss": 2.5542, + "step": 9540 + }, + { + "epoch": 0.7699943507384391, + "grad_norm": 0.6676837205886841, + "learning_rate": 0.00010802688621146826, + "loss": 2.5411, + "step": 9541 + }, + { + "epoch": 0.7700750544750222, + "grad_norm": 0.7378436326980591, + "learning_rate": 0.00010801115015414067, + "loss": 2.5416, + "step": 9542 + }, + { + "epoch": 0.7701557582116052, + "grad_norm": 0.7330371141433716, + "learning_rate": 0.0001079954138971543, + "loss": 2.5154, + "step": 9543 + }, + { + "epoch": 0.7702364619481882, + "grad_norm": 0.6792974472045898, + "learning_rate": 0.00010797967744090131, + "loss": 2.5328, + "step": 9544 + }, + { + "epoch": 0.7703171656847712, + "grad_norm": 0.7129618525505066, + "learning_rate": 0.00010796394078577392, + "loss": 2.5688, + "step": 9545 + }, + { + "epoch": 0.7703978694213542, + "grad_norm": 0.6900608539581299, + "learning_rate": 0.00010794820393216429, + "loss": 2.5659, + "step": 9546 + }, + { + "epoch": 0.7704785731579372, + "grad_norm": 0.6798564195632935, + "learning_rate": 0.00010793246688046464, + "loss": 2.5746, + "step": 9547 + }, + { + "epoch": 0.7705592768945202, + "grad_norm": 0.7132395505905151, + "learning_rate": 0.00010791672963106715, + "loss": 2.6277, + "step": 9548 + }, + { + "epoch": 0.7706399806311032, + "grad_norm": 0.6762476563453674, + "learning_rate": 0.0001079009921843641, + "loss": 2.5265, + "step": 9549 + }, + { + "epoch": 0.7707206843676863, + "grad_norm": 0.7223351001739502, + "learning_rate": 0.00010788525454074765, + "loss": 2.6255, + "step": 9550 + }, + { + "epoch": 0.7708013881042692, + "grad_norm": 0.7383624315261841, + "learning_rate": 0.00010786951670061008, + "loss": 2.5744, + "step": 9551 + }, + { + "epoch": 0.7708820918408522, + "grad_norm": 0.6677328944206238, + "learning_rate": 0.00010785377866434355, + "loss": 2.5594, + "step": 9552 + }, + { + "epoch": 0.7709627955774352, + "grad_norm": 0.6572195887565613, + "learning_rate": 0.00010783804043234032, + "loss": 2.5582, + "step": 9553 + }, + { + "epoch": 0.7710434993140183, + "grad_norm": 0.6837800741195679, + "learning_rate": 0.00010782230200499265, + "loss": 2.5311, + "step": 9554 + }, + { + "epoch": 0.7711242030506013, + "grad_norm": 0.7232153415679932, + "learning_rate": 0.00010780656338269277, + "loss": 2.5074, + "step": 9555 + }, + { + "epoch": 0.7712049067871842, + "grad_norm": 0.6722296476364136, + "learning_rate": 0.00010779082456583291, + "loss": 2.551, + "step": 9556 + }, + { + "epoch": 0.7712856105237672, + "grad_norm": 0.6461100578308105, + "learning_rate": 0.00010777508555480535, + "loss": 2.5723, + "step": 9557 + }, + { + "epoch": 0.7713663142603503, + "grad_norm": 0.6573290824890137, + "learning_rate": 0.0001077593463500023, + "loss": 2.4967, + "step": 9558 + }, + { + "epoch": 0.7714470179969333, + "grad_norm": 0.7184738516807556, + "learning_rate": 0.0001077436069518161, + "loss": 2.6703, + "step": 9559 + }, + { + "epoch": 0.7715277217335162, + "grad_norm": 0.7226557731628418, + "learning_rate": 0.00010772786736063895, + "loss": 2.6118, + "step": 9560 + }, + { + "epoch": 0.7716084254700992, + "grad_norm": 0.6800956130027771, + "learning_rate": 0.00010771212757686318, + "loss": 2.578, + "step": 9561 + }, + { + "epoch": 0.7716891292066823, + "grad_norm": 0.6657535433769226, + "learning_rate": 0.00010769638760088099, + "loss": 2.5291, + "step": 9562 + }, + { + "epoch": 0.7717698329432653, + "grad_norm": 0.620527982711792, + "learning_rate": 0.00010768064743308471, + "loss": 2.5518, + "step": 9563 + }, + { + "epoch": 0.7718505366798483, + "grad_norm": 0.693760097026825, + "learning_rate": 0.00010766490707386663, + "loss": 2.52, + "step": 9564 + }, + { + "epoch": 0.7719312404164312, + "grad_norm": 0.6674148440361023, + "learning_rate": 0.000107649166523619, + "loss": 2.5197, + "step": 9565 + }, + { + "epoch": 0.7720119441530143, + "grad_norm": 0.6844033598899841, + "learning_rate": 0.00010763342578273419, + "loss": 2.5842, + "step": 9566 + }, + { + "epoch": 0.7720926478895973, + "grad_norm": 0.6891880035400391, + "learning_rate": 0.00010761768485160442, + "loss": 2.5349, + "step": 9567 + }, + { + "epoch": 0.7721733516261803, + "grad_norm": 0.7157394289970398, + "learning_rate": 0.00010760194373062204, + "loss": 2.5762, + "step": 9568 + }, + { + "epoch": 0.7722540553627633, + "grad_norm": 0.7522526383399963, + "learning_rate": 0.00010758620242017936, + "loss": 2.5348, + "step": 9569 + }, + { + "epoch": 0.7723347590993463, + "grad_norm": 0.6817746162414551, + "learning_rate": 0.00010757046092066869, + "loss": 2.5836, + "step": 9570 + }, + { + "epoch": 0.7724154628359293, + "grad_norm": 0.7274518013000488, + "learning_rate": 0.00010755471923248232, + "loss": 2.5276, + "step": 9571 + }, + { + "epoch": 0.7724961665725123, + "grad_norm": 0.6735557913780212, + "learning_rate": 0.00010753897735601264, + "loss": 2.6116, + "step": 9572 + }, + { + "epoch": 0.7725768703090953, + "grad_norm": 0.6626406908035278, + "learning_rate": 0.00010752323529165186, + "loss": 2.5778, + "step": 9573 + }, + { + "epoch": 0.7726575740456784, + "grad_norm": 0.6627367734909058, + "learning_rate": 0.00010750749303979246, + "loss": 2.5839, + "step": 9574 + }, + { + "epoch": 0.7727382777822613, + "grad_norm": 0.6658251881599426, + "learning_rate": 0.0001074917506008267, + "loss": 2.5233, + "step": 9575 + }, + { + "epoch": 0.7728189815188443, + "grad_norm": 0.6969848871231079, + "learning_rate": 0.00010747600797514692, + "loss": 2.5169, + "step": 9576 + }, + { + "epoch": 0.7728996852554273, + "grad_norm": 0.7313554883003235, + "learning_rate": 0.00010746026516314549, + "loss": 2.5528, + "step": 9577 + }, + { + "epoch": 0.7729803889920104, + "grad_norm": 0.6467077136039734, + "learning_rate": 0.00010744452216521472, + "loss": 2.5158, + "step": 9578 + }, + { + "epoch": 0.7730610927285934, + "grad_norm": 0.6808056235313416, + "learning_rate": 0.00010742877898174702, + "loss": 2.5346, + "step": 9579 + }, + { + "epoch": 0.7731417964651763, + "grad_norm": 0.7537400722503662, + "learning_rate": 0.00010741303561313474, + "loss": 2.5621, + "step": 9580 + }, + { + "epoch": 0.7732225002017593, + "grad_norm": 0.6715610027313232, + "learning_rate": 0.00010739729205977021, + "loss": 2.5384, + "step": 9581 + }, + { + "epoch": 0.7733032039383424, + "grad_norm": 0.7129234075546265, + "learning_rate": 0.00010738154832204586, + "loss": 2.5639, + "step": 9582 + }, + { + "epoch": 0.7733839076749254, + "grad_norm": 0.7156025171279907, + "learning_rate": 0.00010736580440035397, + "loss": 2.5427, + "step": 9583 + }, + { + "epoch": 0.7734646114115084, + "grad_norm": 0.7394191026687622, + "learning_rate": 0.00010735006029508703, + "loss": 2.5809, + "step": 9584 + }, + { + "epoch": 0.7735453151480913, + "grad_norm": 0.7117684483528137, + "learning_rate": 0.00010733431600663737, + "loss": 2.5807, + "step": 9585 + }, + { + "epoch": 0.7736260188846744, + "grad_norm": 0.6622862219810486, + "learning_rate": 0.00010731857153539737, + "loss": 2.5277, + "step": 9586 + }, + { + "epoch": 0.7737067226212574, + "grad_norm": 0.7744547128677368, + "learning_rate": 0.00010730282688175943, + "loss": 2.6119, + "step": 9587 + }, + { + "epoch": 0.7737874263578404, + "grad_norm": 0.6804926991462708, + "learning_rate": 0.00010728708204611597, + "loss": 2.534, + "step": 9588 + }, + { + "epoch": 0.7738681300944233, + "grad_norm": 0.7115367650985718, + "learning_rate": 0.00010727133702885937, + "loss": 2.542, + "step": 9589 + }, + { + "epoch": 0.7739488338310063, + "grad_norm": 0.7623847723007202, + "learning_rate": 0.00010725559183038205, + "loss": 2.587, + "step": 9590 + }, + { + "epoch": 0.7740295375675894, + "grad_norm": 0.6612982153892517, + "learning_rate": 0.00010723984645107641, + "loss": 2.5257, + "step": 9591 + }, + { + "epoch": 0.7741102413041724, + "grad_norm": 0.7553900480270386, + "learning_rate": 0.00010722410089133488, + "loss": 2.6311, + "step": 9592 + }, + { + "epoch": 0.7741909450407554, + "grad_norm": 0.7541414499282837, + "learning_rate": 0.00010720835515154983, + "loss": 2.5978, + "step": 9593 + }, + { + "epoch": 0.7742716487773383, + "grad_norm": 0.6690947413444519, + "learning_rate": 0.00010719260923211376, + "loss": 2.568, + "step": 9594 + }, + { + "epoch": 0.7743523525139214, + "grad_norm": 0.7282151579856873, + "learning_rate": 0.00010717686313341909, + "loss": 2.5375, + "step": 9595 + }, + { + "epoch": 0.7744330562505044, + "grad_norm": 0.6862902045249939, + "learning_rate": 0.00010716111685585821, + "loss": 2.5503, + "step": 9596 + }, + { + "epoch": 0.7745137599870874, + "grad_norm": 0.7076265811920166, + "learning_rate": 0.00010714537039982357, + "loss": 2.4766, + "step": 9597 + }, + { + "epoch": 0.7745944637236704, + "grad_norm": 0.7063891887664795, + "learning_rate": 0.00010712962376570761, + "loss": 2.5822, + "step": 9598 + }, + { + "epoch": 0.7746751674602534, + "grad_norm": 0.6975609064102173, + "learning_rate": 0.00010711387695390282, + "loss": 2.597, + "step": 9599 + }, + { + "epoch": 0.7747558711968364, + "grad_norm": 0.6790002584457397, + "learning_rate": 0.0001070981299648016, + "loss": 2.5705, + "step": 9600 + }, + { + "epoch": 0.7748365749334194, + "grad_norm": 0.6493679881095886, + "learning_rate": 0.00010708238279879643, + "loss": 2.49, + "step": 9601 + }, + { + "epoch": 0.7749172786700024, + "grad_norm": 0.6741142868995667, + "learning_rate": 0.00010706663545627977, + "loss": 2.6008, + "step": 9602 + }, + { + "epoch": 0.7749979824065855, + "grad_norm": 0.6753309965133667, + "learning_rate": 0.00010705088793764408, + "loss": 2.536, + "step": 9603 + }, + { + "epoch": 0.7750786861431684, + "grad_norm": 0.6879377365112305, + "learning_rate": 0.00010703514024328183, + "loss": 2.5884, + "step": 9604 + }, + { + "epoch": 0.7751593898797514, + "grad_norm": 0.6535949110984802, + "learning_rate": 0.00010701939237358549, + "loss": 2.5489, + "step": 9605 + }, + { + "epoch": 0.7752400936163344, + "grad_norm": 0.7308230400085449, + "learning_rate": 0.00010700364432894756, + "loss": 2.5679, + "step": 9606 + }, + { + "epoch": 0.7753207973529175, + "grad_norm": 0.7016584277153015, + "learning_rate": 0.00010698789610976052, + "loss": 2.5678, + "step": 9607 + }, + { + "epoch": 0.7754015010895005, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.00010697214771641682, + "loss": 2.5004, + "step": 9608 + }, + { + "epoch": 0.7754822048260834, + "grad_norm": 0.6414844989776611, + "learning_rate": 0.00010695639914930895, + "loss": 2.4896, + "step": 9609 + }, + { + "epoch": 0.7755629085626664, + "grad_norm": 0.7288017868995667, + "learning_rate": 0.00010694065040882943, + "loss": 2.5945, + "step": 9610 + }, + { + "epoch": 0.7756436122992495, + "grad_norm": 0.6808066368103027, + "learning_rate": 0.00010692490149537079, + "loss": 2.5973, + "step": 9611 + }, + { + "epoch": 0.7757243160358325, + "grad_norm": 0.7924454212188721, + "learning_rate": 0.00010690915240932553, + "loss": 2.5448, + "step": 9612 + }, + { + "epoch": 0.7758050197724154, + "grad_norm": 0.6466094851493835, + "learning_rate": 0.00010689340315108606, + "loss": 2.5065, + "step": 9613 + }, + { + "epoch": 0.7758857235089984, + "grad_norm": 0.6775460243225098, + "learning_rate": 0.00010687765372104502, + "loss": 2.5238, + "step": 9614 + }, + { + "epoch": 0.7759664272455815, + "grad_norm": 0.6901230812072754, + "learning_rate": 0.00010686190411959484, + "loss": 2.5109, + "step": 9615 + }, + { + "epoch": 0.7760471309821645, + "grad_norm": 0.7032039165496826, + "learning_rate": 0.00010684615434712808, + "loss": 2.6094, + "step": 9616 + }, + { + "epoch": 0.7761278347187475, + "grad_norm": 0.7008969187736511, + "learning_rate": 0.00010683040440403727, + "loss": 2.5758, + "step": 9617 + }, + { + "epoch": 0.7762085384553304, + "grad_norm": 0.6909677386283875, + "learning_rate": 0.00010681465429071491, + "loss": 2.5373, + "step": 9618 + }, + { + "epoch": 0.7762892421919135, + "grad_norm": 0.699030339717865, + "learning_rate": 0.00010679890400755355, + "loss": 2.577, + "step": 9619 + }, + { + "epoch": 0.7763699459284965, + "grad_norm": 0.7012344598770142, + "learning_rate": 0.00010678315355494575, + "loss": 2.5205, + "step": 9620 + }, + { + "epoch": 0.7764506496650795, + "grad_norm": 0.7693915367126465, + "learning_rate": 0.000106767402933284, + "loss": 2.5947, + "step": 9621 + }, + { + "epoch": 0.7765313534016625, + "grad_norm": 0.7635772228240967, + "learning_rate": 0.00010675165214296093, + "loss": 2.6221, + "step": 9622 + }, + { + "epoch": 0.7766120571382455, + "grad_norm": 0.701411783695221, + "learning_rate": 0.000106735901184369, + "loss": 2.5236, + "step": 9623 + }, + { + "epoch": 0.7766927608748285, + "grad_norm": 0.7283998727798462, + "learning_rate": 0.00010672015005790079, + "loss": 2.5581, + "step": 9624 + }, + { + "epoch": 0.7767734646114115, + "grad_norm": 0.7069897055625916, + "learning_rate": 0.0001067043987639489, + "loss": 2.5541, + "step": 9625 + }, + { + "epoch": 0.7768541683479945, + "grad_norm": 0.7419753074645996, + "learning_rate": 0.00010668864730290586, + "loss": 2.5992, + "step": 9626 + }, + { + "epoch": 0.7769348720845776, + "grad_norm": 0.6651501059532166, + "learning_rate": 0.00010667289567516426, + "loss": 2.546, + "step": 9627 + }, + { + "epoch": 0.7770155758211605, + "grad_norm": 0.7265670895576477, + "learning_rate": 0.00010665714388111665, + "loss": 2.611, + "step": 9628 + }, + { + "epoch": 0.7770962795577435, + "grad_norm": 0.6520028114318848, + "learning_rate": 0.00010664139192115559, + "loss": 2.5433, + "step": 9629 + }, + { + "epoch": 0.7771769832943265, + "grad_norm": 0.6990057826042175, + "learning_rate": 0.0001066256397956737, + "loss": 2.5325, + "step": 9630 + }, + { + "epoch": 0.7772576870309096, + "grad_norm": 0.7353312373161316, + "learning_rate": 0.00010660988750506355, + "loss": 2.4707, + "step": 9631 + }, + { + "epoch": 0.7773383907674926, + "grad_norm": 0.6810272932052612, + "learning_rate": 0.00010659413504971774, + "loss": 2.5618, + "step": 9632 + }, + { + "epoch": 0.7774190945040755, + "grad_norm": 0.6480081081390381, + "learning_rate": 0.00010657838243002883, + "loss": 2.4543, + "step": 9633 + }, + { + "epoch": 0.7774997982406585, + "grad_norm": 0.6617380976676941, + "learning_rate": 0.00010656262964638942, + "loss": 2.5628, + "step": 9634 + }, + { + "epoch": 0.7775805019772416, + "grad_norm": 0.6761382222175598, + "learning_rate": 0.00010654687669919212, + "loss": 2.5433, + "step": 9635 + }, + { + "epoch": 0.7776612057138246, + "grad_norm": 0.6733867526054382, + "learning_rate": 0.00010653112358882957, + "loss": 2.5282, + "step": 9636 + }, + { + "epoch": 0.7777419094504076, + "grad_norm": 0.6854631304740906, + "learning_rate": 0.00010651537031569433, + "loss": 2.5997, + "step": 9637 + }, + { + "epoch": 0.7778226131869905, + "grad_norm": 0.7451226115226746, + "learning_rate": 0.00010649961688017904, + "loss": 2.5058, + "step": 9638 + }, + { + "epoch": 0.7779033169235735, + "grad_norm": 0.6744229197502136, + "learning_rate": 0.0001064838632826763, + "loss": 2.5962, + "step": 9639 + }, + { + "epoch": 0.7779840206601566, + "grad_norm": 0.7568119764328003, + "learning_rate": 0.00010646810952357873, + "loss": 2.5896, + "step": 9640 + }, + { + "epoch": 0.7780647243967396, + "grad_norm": 0.6860085725784302, + "learning_rate": 0.00010645235560327899, + "loss": 2.5675, + "step": 9641 + }, + { + "epoch": 0.7781454281333225, + "grad_norm": 0.6491742134094238, + "learning_rate": 0.00010643660152216965, + "loss": 2.5374, + "step": 9642 + }, + { + "epoch": 0.7782261318699055, + "grad_norm": 0.6664023399353027, + "learning_rate": 0.0001064208472806434, + "loss": 2.4679, + "step": 9643 + }, + { + "epoch": 0.7783068356064886, + "grad_norm": 0.6595140099525452, + "learning_rate": 0.00010640509287909284, + "loss": 2.5045, + "step": 9644 + }, + { + "epoch": 0.7783875393430716, + "grad_norm": 0.6788576245307922, + "learning_rate": 0.0001063893383179106, + "loss": 2.5706, + "step": 9645 + }, + { + "epoch": 0.7784682430796546, + "grad_norm": 0.6741334199905396, + "learning_rate": 0.00010637358359748939, + "loss": 2.5763, + "step": 9646 + }, + { + "epoch": 0.7785489468162375, + "grad_norm": 0.6837517023086548, + "learning_rate": 0.0001063578287182218, + "loss": 2.5484, + "step": 9647 + }, + { + "epoch": 0.7786296505528206, + "grad_norm": 0.6604229211807251, + "learning_rate": 0.00010634207368050048, + "loss": 2.5465, + "step": 9648 + }, + { + "epoch": 0.7787103542894036, + "grad_norm": 0.6528951525688171, + "learning_rate": 0.00010632631848471813, + "loss": 2.5409, + "step": 9649 + }, + { + "epoch": 0.7787910580259866, + "grad_norm": 0.6615377068519592, + "learning_rate": 0.00010631056313126734, + "loss": 2.5545, + "step": 9650 + }, + { + "epoch": 0.7788717617625696, + "grad_norm": 0.666033923625946, + "learning_rate": 0.00010629480762054089, + "loss": 2.5341, + "step": 9651 + }, + { + "epoch": 0.7789524654991526, + "grad_norm": 0.7022622227668762, + "learning_rate": 0.00010627905195293135, + "loss": 2.5206, + "step": 9652 + }, + { + "epoch": 0.7790331692357356, + "grad_norm": 0.7175850868225098, + "learning_rate": 0.00010626329612883141, + "loss": 2.5912, + "step": 9653 + }, + { + "epoch": 0.7791138729723186, + "grad_norm": 0.6592069268226624, + "learning_rate": 0.00010624754014863379, + "loss": 2.5076, + "step": 9654 + }, + { + "epoch": 0.7791945767089016, + "grad_norm": 0.645893931388855, + "learning_rate": 0.0001062317840127311, + "loss": 2.5124, + "step": 9655 + }, + { + "epoch": 0.7792752804454847, + "grad_norm": 0.6638232469558716, + "learning_rate": 0.00010621602772151607, + "loss": 2.5182, + "step": 9656 + }, + { + "epoch": 0.7793559841820676, + "grad_norm": 0.6718387603759766, + "learning_rate": 0.0001062002712753814, + "loss": 2.4773, + "step": 9657 + }, + { + "epoch": 0.7794366879186506, + "grad_norm": 0.6402876377105713, + "learning_rate": 0.00010618451467471972, + "loss": 2.5557, + "step": 9658 + }, + { + "epoch": 0.7795173916552336, + "grad_norm": 0.6898398399353027, + "learning_rate": 0.00010616875791992382, + "loss": 2.5557, + "step": 9659 + }, + { + "epoch": 0.7795980953918167, + "grad_norm": 0.6718475222587585, + "learning_rate": 0.00010615300101138633, + "loss": 2.5335, + "step": 9660 + }, + { + "epoch": 0.7796787991283997, + "grad_norm": 0.6436911225318909, + "learning_rate": 0.00010613724394949995, + "loss": 2.5214, + "step": 9661 + }, + { + "epoch": 0.7797595028649826, + "grad_norm": 0.7554156184196472, + "learning_rate": 0.00010612148673465743, + "loss": 2.5526, + "step": 9662 + }, + { + "epoch": 0.7798402066015656, + "grad_norm": 0.6728504300117493, + "learning_rate": 0.00010610572936725147, + "loss": 2.5935, + "step": 9663 + }, + { + "epoch": 0.7799209103381487, + "grad_norm": 0.6793323159217834, + "learning_rate": 0.00010608997184767476, + "loss": 2.5515, + "step": 9664 + }, + { + "epoch": 0.7800016140747317, + "grad_norm": 0.7242898941040039, + "learning_rate": 0.00010607421417631999, + "loss": 2.5332, + "step": 9665 + }, + { + "epoch": 0.7800823178113147, + "grad_norm": 0.6719244718551636, + "learning_rate": 0.00010605845635357996, + "loss": 2.5191, + "step": 9666 + }, + { + "epoch": 0.7801630215478976, + "grad_norm": 0.6836631894111633, + "learning_rate": 0.00010604269837984737, + "loss": 2.6489, + "step": 9667 + }, + { + "epoch": 0.7802437252844807, + "grad_norm": 0.6833824515342712, + "learning_rate": 0.00010602694025551496, + "loss": 2.4906, + "step": 9668 + }, + { + "epoch": 0.7803244290210637, + "grad_norm": 0.7449159026145935, + "learning_rate": 0.0001060111819809754, + "loss": 2.5301, + "step": 9669 + }, + { + "epoch": 0.7804051327576467, + "grad_norm": 0.7149158120155334, + "learning_rate": 0.00010599542355662149, + "loss": 2.5097, + "step": 9670 + }, + { + "epoch": 0.7804858364942296, + "grad_norm": 0.6616973876953125, + "learning_rate": 0.00010597966498284595, + "loss": 2.5928, + "step": 9671 + }, + { + "epoch": 0.7805665402308127, + "grad_norm": 0.6556531190872192, + "learning_rate": 0.00010596390626004154, + "loss": 2.5543, + "step": 9672 + }, + { + "epoch": 0.7806472439673957, + "grad_norm": 0.6585283875465393, + "learning_rate": 0.000105948147388601, + "loss": 2.5244, + "step": 9673 + }, + { + "epoch": 0.7807279477039787, + "grad_norm": 0.6484133005142212, + "learning_rate": 0.00010593238836891704, + "loss": 2.4996, + "step": 9674 + }, + { + "epoch": 0.7808086514405617, + "grad_norm": 0.6681119799613953, + "learning_rate": 0.00010591662920138248, + "loss": 2.5322, + "step": 9675 + }, + { + "epoch": 0.7808893551771448, + "grad_norm": 0.709403395652771, + "learning_rate": 0.00010590086988639005, + "loss": 2.5554, + "step": 9676 + }, + { + "epoch": 0.7809700589137277, + "grad_norm": 0.6734669804573059, + "learning_rate": 0.00010588511042433251, + "loss": 2.5452, + "step": 9677 + }, + { + "epoch": 0.7810507626503107, + "grad_norm": 0.6800141930580139, + "learning_rate": 0.00010586935081560268, + "loss": 2.5154, + "step": 9678 + }, + { + "epoch": 0.7811314663868937, + "grad_norm": 0.7757244110107422, + "learning_rate": 0.00010585359106059326, + "loss": 2.5935, + "step": 9679 + }, + { + "epoch": 0.7812121701234768, + "grad_norm": 0.7288491725921631, + "learning_rate": 0.00010583783115969699, + "loss": 2.5276, + "step": 9680 + }, + { + "epoch": 0.7812928738600597, + "grad_norm": 0.6785164475440979, + "learning_rate": 0.00010582207111330678, + "loss": 2.5907, + "step": 9681 + }, + { + "epoch": 0.7813735775966427, + "grad_norm": 0.6651367545127869, + "learning_rate": 0.0001058063109218153, + "loss": 2.545, + "step": 9682 + }, + { + "epoch": 0.7814542813332257, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.0001057905505856154, + "loss": 2.5548, + "step": 9683 + }, + { + "epoch": 0.7815349850698088, + "grad_norm": 0.6486692428588867, + "learning_rate": 0.00010577479010509986, + "loss": 2.5589, + "step": 9684 + }, + { + "epoch": 0.7816156888063918, + "grad_norm": 0.700749397277832, + "learning_rate": 0.0001057590294806614, + "loss": 2.6008, + "step": 9685 + }, + { + "epoch": 0.7816963925429747, + "grad_norm": 0.647051215171814, + "learning_rate": 0.00010574326871269289, + "loss": 2.4894, + "step": 9686 + }, + { + "epoch": 0.7817770962795577, + "grad_norm": 0.6932066679000854, + "learning_rate": 0.00010572750780158713, + "loss": 2.5256, + "step": 9687 + }, + { + "epoch": 0.7818578000161408, + "grad_norm": 0.6330733895301819, + "learning_rate": 0.00010571174674773689, + "loss": 2.5242, + "step": 9688 + }, + { + "epoch": 0.7819385037527238, + "grad_norm": 0.6476379036903381, + "learning_rate": 0.00010569598555153499, + "loss": 2.552, + "step": 9689 + }, + { + "epoch": 0.7820192074893068, + "grad_norm": 0.661204993724823, + "learning_rate": 0.00010568022421337424, + "loss": 2.4869, + "step": 9690 + }, + { + "epoch": 0.7820999112258897, + "grad_norm": 0.6663263440132141, + "learning_rate": 0.00010566446273364746, + "loss": 2.5134, + "step": 9691 + }, + { + "epoch": 0.7821806149624727, + "grad_norm": 0.6982834339141846, + "learning_rate": 0.00010564870111274748, + "loss": 2.5755, + "step": 9692 + }, + { + "epoch": 0.7822613186990558, + "grad_norm": 0.6266167759895325, + "learning_rate": 0.00010563293935106706, + "loss": 2.5413, + "step": 9693 + }, + { + "epoch": 0.7823420224356388, + "grad_norm": 0.6484279632568359, + "learning_rate": 0.0001056171774489991, + "loss": 2.5579, + "step": 9694 + }, + { + "epoch": 0.7824227261722217, + "grad_norm": 0.674933910369873, + "learning_rate": 0.00010560141540693638, + "loss": 2.5364, + "step": 9695 + }, + { + "epoch": 0.7825034299088047, + "grad_norm": 0.7961840033531189, + "learning_rate": 0.00010558565322527174, + "loss": 2.5143, + "step": 9696 + }, + { + "epoch": 0.7825841336453878, + "grad_norm": 0.697158694267273, + "learning_rate": 0.00010556989090439804, + "loss": 2.5341, + "step": 9697 + }, + { + "epoch": 0.7826648373819708, + "grad_norm": 0.6912708282470703, + "learning_rate": 0.00010555412844470806, + "loss": 2.5331, + "step": 9698 + }, + { + "epoch": 0.7827455411185538, + "grad_norm": 0.7078350186347961, + "learning_rate": 0.00010553836584659474, + "loss": 2.5752, + "step": 9699 + }, + { + "epoch": 0.7828262448551367, + "grad_norm": 0.6421065926551819, + "learning_rate": 0.00010552260311045082, + "loss": 2.5393, + "step": 9700 + }, + { + "epoch": 0.7829069485917198, + "grad_norm": 0.644120454788208, + "learning_rate": 0.00010550684023666918, + "loss": 2.5062, + "step": 9701 + }, + { + "epoch": 0.7829876523283028, + "grad_norm": 0.7038589715957642, + "learning_rate": 0.00010549107722564275, + "loss": 2.6074, + "step": 9702 + }, + { + "epoch": 0.7830683560648858, + "grad_norm": 0.6692953109741211, + "learning_rate": 0.00010547531407776427, + "loss": 2.5801, + "step": 9703 + }, + { + "epoch": 0.7831490598014688, + "grad_norm": 0.7059200406074524, + "learning_rate": 0.00010545955079342669, + "loss": 2.5579, + "step": 9704 + }, + { + "epoch": 0.7832297635380518, + "grad_norm": 0.7126718759536743, + "learning_rate": 0.0001054437873730228, + "loss": 2.5764, + "step": 9705 + }, + { + "epoch": 0.7833104672746348, + "grad_norm": 0.696784257888794, + "learning_rate": 0.0001054280238169455, + "loss": 2.5256, + "step": 9706 + }, + { + "epoch": 0.7833911710112178, + "grad_norm": 0.7473082542419434, + "learning_rate": 0.00010541226012558767, + "loss": 2.5983, + "step": 9707 + }, + { + "epoch": 0.7834718747478008, + "grad_norm": 0.6598967909812927, + "learning_rate": 0.00010539649629934219, + "loss": 2.5267, + "step": 9708 + }, + { + "epoch": 0.7835525784843839, + "grad_norm": 0.7168934345245361, + "learning_rate": 0.00010538073233860188, + "loss": 2.5278, + "step": 9709 + }, + { + "epoch": 0.7836332822209668, + "grad_norm": 0.6848951578140259, + "learning_rate": 0.00010536496824375968, + "loss": 2.5267, + "step": 9710 + }, + { + "epoch": 0.7837139859575498, + "grad_norm": 0.7276272773742676, + "learning_rate": 0.0001053492040152084, + "loss": 2.5706, + "step": 9711 + }, + { + "epoch": 0.7837946896941328, + "grad_norm": 0.6929399371147156, + "learning_rate": 0.00010533343965334101, + "loss": 2.5184, + "step": 9712 + }, + { + "epoch": 0.7838753934307159, + "grad_norm": 0.7497181296348572, + "learning_rate": 0.00010531767515855037, + "loss": 2.5626, + "step": 9713 + }, + { + "epoch": 0.7839560971672989, + "grad_norm": 0.6536200046539307, + "learning_rate": 0.00010530191053122935, + "loss": 2.5909, + "step": 9714 + }, + { + "epoch": 0.7840368009038818, + "grad_norm": 0.6750395894050598, + "learning_rate": 0.00010528614577177087, + "loss": 2.5119, + "step": 9715 + }, + { + "epoch": 0.7841175046404648, + "grad_norm": 0.6284878849983215, + "learning_rate": 0.00010527038088056782, + "loss": 2.5417, + "step": 9716 + }, + { + "epoch": 0.7841982083770479, + "grad_norm": 0.6529444456100464, + "learning_rate": 0.00010525461585801308, + "loss": 2.5865, + "step": 9717 + }, + { + "epoch": 0.7842789121136309, + "grad_norm": 0.7332968711853027, + "learning_rate": 0.00010523885070449959, + "loss": 2.561, + "step": 9718 + }, + { + "epoch": 0.7843596158502139, + "grad_norm": 0.7054178714752197, + "learning_rate": 0.00010522308542042025, + "loss": 2.623, + "step": 9719 + }, + { + "epoch": 0.7844403195867968, + "grad_norm": 0.6837820410728455, + "learning_rate": 0.00010520732000616798, + "loss": 2.5586, + "step": 9720 + }, + { + "epoch": 0.7845210233233799, + "grad_norm": 0.7339439392089844, + "learning_rate": 0.00010519155446213565, + "loss": 2.5374, + "step": 9721 + }, + { + "epoch": 0.7846017270599629, + "grad_norm": 0.7625028491020203, + "learning_rate": 0.00010517578878871624, + "loss": 2.5663, + "step": 9722 + }, + { + "epoch": 0.7846824307965459, + "grad_norm": 0.6749752759933472, + "learning_rate": 0.00010516002298630263, + "loss": 2.5744, + "step": 9723 + }, + { + "epoch": 0.7847631345331288, + "grad_norm": 0.6702882647514343, + "learning_rate": 0.00010514425705528776, + "loss": 2.6247, + "step": 9724 + }, + { + "epoch": 0.7848438382697119, + "grad_norm": 0.6641737222671509, + "learning_rate": 0.00010512849099606457, + "loss": 2.5792, + "step": 9725 + }, + { + "epoch": 0.7849245420062949, + "grad_norm": 0.7522993683815002, + "learning_rate": 0.00010511272480902597, + "loss": 2.5941, + "step": 9726 + }, + { + "epoch": 0.7850052457428779, + "grad_norm": 0.7507709860801697, + "learning_rate": 0.00010509695849456487, + "loss": 2.5312, + "step": 9727 + }, + { + "epoch": 0.7850859494794609, + "grad_norm": 0.7101978063583374, + "learning_rate": 0.0001050811920530743, + "loss": 2.5833, + "step": 9728 + }, + { + "epoch": 0.785166653216044, + "grad_norm": 0.6814672946929932, + "learning_rate": 0.0001050654254849471, + "loss": 2.5466, + "step": 9729 + }, + { + "epoch": 0.7852473569526269, + "grad_norm": 0.7250106930732727, + "learning_rate": 0.0001050496587905763, + "loss": 2.5144, + "step": 9730 + }, + { + "epoch": 0.7853280606892099, + "grad_norm": 0.7125658392906189, + "learning_rate": 0.00010503389197035474, + "loss": 2.5384, + "step": 9731 + }, + { + "epoch": 0.7854087644257929, + "grad_norm": 0.7076827883720398, + "learning_rate": 0.00010501812502467547, + "loss": 2.4879, + "step": 9732 + }, + { + "epoch": 0.785489468162376, + "grad_norm": 0.632216215133667, + "learning_rate": 0.00010500235795393141, + "loss": 2.5678, + "step": 9733 + }, + { + "epoch": 0.785570171898959, + "grad_norm": 0.7376949191093445, + "learning_rate": 0.00010498659075851551, + "loss": 2.5024, + "step": 9734 + }, + { + "epoch": 0.7856508756355419, + "grad_norm": 0.6730546951293945, + "learning_rate": 0.00010497082343882072, + "loss": 2.5001, + "step": 9735 + }, + { + "epoch": 0.7857315793721249, + "grad_norm": 0.6958187818527222, + "learning_rate": 0.00010495505599524002, + "loss": 2.538, + "step": 9736 + }, + { + "epoch": 0.785812283108708, + "grad_norm": 0.6882508397102356, + "learning_rate": 0.00010493928842816638, + "loss": 2.5247, + "step": 9737 + }, + { + "epoch": 0.785892986845291, + "grad_norm": 0.711086630821228, + "learning_rate": 0.00010492352073799276, + "loss": 2.5721, + "step": 9738 + }, + { + "epoch": 0.7859736905818739, + "grad_norm": 0.7217094898223877, + "learning_rate": 0.00010490775292511214, + "loss": 2.5827, + "step": 9739 + }, + { + "epoch": 0.7860543943184569, + "grad_norm": 0.6812087893486023, + "learning_rate": 0.0001048919849899175, + "loss": 2.532, + "step": 9740 + }, + { + "epoch": 0.7861350980550399, + "grad_norm": 0.7449110150337219, + "learning_rate": 0.00010487621693280176, + "loss": 2.5611, + "step": 9741 + }, + { + "epoch": 0.786215801791623, + "grad_norm": 0.7297104001045227, + "learning_rate": 0.00010486044875415797, + "loss": 2.5173, + "step": 9742 + }, + { + "epoch": 0.786296505528206, + "grad_norm": 0.6741474270820618, + "learning_rate": 0.0001048446804543791, + "loss": 2.5451, + "step": 9743 + }, + { + "epoch": 0.7863772092647889, + "grad_norm": 0.6450859308242798, + "learning_rate": 0.00010482891203385812, + "loss": 2.551, + "step": 9744 + }, + { + "epoch": 0.7864579130013719, + "grad_norm": 0.6867123246192932, + "learning_rate": 0.00010481314349298805, + "loss": 2.4875, + "step": 9745 + }, + { + "epoch": 0.786538616737955, + "grad_norm": 0.6951552629470825, + "learning_rate": 0.00010479737483216183, + "loss": 2.6253, + "step": 9746 + }, + { + "epoch": 0.786619320474538, + "grad_norm": 0.6786869764328003, + "learning_rate": 0.0001047816060517725, + "loss": 2.5551, + "step": 9747 + }, + { + "epoch": 0.786700024211121, + "grad_norm": 0.698957622051239, + "learning_rate": 0.00010476583715221306, + "loss": 2.5554, + "step": 9748 + }, + { + "epoch": 0.7867807279477039, + "grad_norm": 0.6407502889633179, + "learning_rate": 0.00010475006813387648, + "loss": 2.5112, + "step": 9749 + }, + { + "epoch": 0.786861431684287, + "grad_norm": 0.660418689250946, + "learning_rate": 0.00010473429899715581, + "loss": 2.5557, + "step": 9750 + }, + { + "epoch": 0.78694213542087, + "grad_norm": 0.71445631980896, + "learning_rate": 0.00010471852974244403, + "loss": 2.5169, + "step": 9751 + }, + { + "epoch": 0.787022839157453, + "grad_norm": 0.6620494723320007, + "learning_rate": 0.00010470276037013414, + "loss": 2.5517, + "step": 9752 + }, + { + "epoch": 0.787103542894036, + "grad_norm": 0.6921235918998718, + "learning_rate": 0.00010468699088061917, + "loss": 2.5246, + "step": 9753 + }, + { + "epoch": 0.787184246630619, + "grad_norm": 0.6617140769958496, + "learning_rate": 0.00010467122127429214, + "loss": 2.4941, + "step": 9754 + }, + { + "epoch": 0.787264950367202, + "grad_norm": 0.6549816727638245, + "learning_rate": 0.00010465545155154608, + "loss": 2.5189, + "step": 9755 + }, + { + "epoch": 0.787345654103785, + "grad_norm": 0.7030060887336731, + "learning_rate": 0.00010463968171277396, + "loss": 2.5058, + "step": 9756 + }, + { + "epoch": 0.787426357840368, + "grad_norm": 0.7294049859046936, + "learning_rate": 0.00010462391175836886, + "loss": 2.5166, + "step": 9757 + }, + { + "epoch": 0.787507061576951, + "grad_norm": 0.6407562494277954, + "learning_rate": 0.00010460814168872382, + "loss": 2.5391, + "step": 9758 + }, + { + "epoch": 0.787587765313534, + "grad_norm": 0.8024646639823914, + "learning_rate": 0.0001045923715042318, + "loss": 2.7034, + "step": 9759 + }, + { + "epoch": 0.787668469050117, + "grad_norm": 0.7160943150520325, + "learning_rate": 0.00010457660120528592, + "loss": 2.6016, + "step": 9760 + }, + { + "epoch": 0.7877491727867, + "grad_norm": 0.6987707018852234, + "learning_rate": 0.00010456083079227916, + "loss": 2.5428, + "step": 9761 + }, + { + "epoch": 0.7878298765232831, + "grad_norm": 0.7235369086265564, + "learning_rate": 0.00010454506026560453, + "loss": 2.517, + "step": 9762 + }, + { + "epoch": 0.787910580259866, + "grad_norm": 0.6827502846717834, + "learning_rate": 0.00010452928962565518, + "loss": 2.5777, + "step": 9763 + }, + { + "epoch": 0.787991283996449, + "grad_norm": 0.71755450963974, + "learning_rate": 0.00010451351887282408, + "loss": 2.6004, + "step": 9764 + }, + { + "epoch": 0.788071987733032, + "grad_norm": 0.6988046765327454, + "learning_rate": 0.00010449774800750427, + "loss": 2.6116, + "step": 9765 + }, + { + "epoch": 0.7881526914696151, + "grad_norm": 0.6959548592567444, + "learning_rate": 0.00010448197703008884, + "loss": 2.5856, + "step": 9766 + }, + { + "epoch": 0.7882333952061981, + "grad_norm": 0.687042772769928, + "learning_rate": 0.00010446620594097079, + "loss": 2.5167, + "step": 9767 + }, + { + "epoch": 0.788314098942781, + "grad_norm": 0.6950173377990723, + "learning_rate": 0.00010445043474054325, + "loss": 2.5157, + "step": 9768 + }, + { + "epoch": 0.788394802679364, + "grad_norm": 0.680768609046936, + "learning_rate": 0.00010443466342919926, + "loss": 2.6177, + "step": 9769 + }, + { + "epoch": 0.7884755064159471, + "grad_norm": 0.7790142893791199, + "learning_rate": 0.00010441889200733181, + "loss": 2.5761, + "step": 9770 + }, + { + "epoch": 0.7885562101525301, + "grad_norm": 0.6207798719406128, + "learning_rate": 0.00010440312047533406, + "loss": 2.5305, + "step": 9771 + }, + { + "epoch": 0.7886369138891131, + "grad_norm": 0.7143635749816895, + "learning_rate": 0.00010438734883359903, + "loss": 2.5922, + "step": 9772 + }, + { + "epoch": 0.788717617625696, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00010437157708251977, + "loss": 2.6051, + "step": 9773 + }, + { + "epoch": 0.7887983213622791, + "grad_norm": 0.6602753400802612, + "learning_rate": 0.00010435580522248942, + "loss": 2.6002, + "step": 9774 + }, + { + "epoch": 0.7888790250988621, + "grad_norm": 0.6929246783256531, + "learning_rate": 0.00010434003325390101, + "loss": 2.5798, + "step": 9775 + }, + { + "epoch": 0.7889597288354451, + "grad_norm": 0.7355811595916748, + "learning_rate": 0.00010432426117714762, + "loss": 2.5859, + "step": 9776 + }, + { + "epoch": 0.789040432572028, + "grad_norm": 0.7009611129760742, + "learning_rate": 0.00010430848899262233, + "loss": 2.5535, + "step": 9777 + }, + { + "epoch": 0.7891211363086111, + "grad_norm": 0.6699070930480957, + "learning_rate": 0.00010429271670071823, + "loss": 2.5687, + "step": 9778 + }, + { + "epoch": 0.7892018400451941, + "grad_norm": 0.6632630228996277, + "learning_rate": 0.00010427694430182844, + "loss": 2.5359, + "step": 9779 + }, + { + "epoch": 0.7892825437817771, + "grad_norm": 0.7256911993026733, + "learning_rate": 0.000104261171796346, + "loss": 2.5432, + "step": 9780 + }, + { + "epoch": 0.7893632475183601, + "grad_norm": 0.6654312610626221, + "learning_rate": 0.000104245399184664, + "loss": 2.5432, + "step": 9781 + }, + { + "epoch": 0.7894439512549432, + "grad_norm": 0.6808900237083435, + "learning_rate": 0.00010422962646717557, + "loss": 2.4951, + "step": 9782 + }, + { + "epoch": 0.7895246549915261, + "grad_norm": 0.6655945181846619, + "learning_rate": 0.00010421385364427378, + "loss": 2.5152, + "step": 9783 + }, + { + "epoch": 0.7896053587281091, + "grad_norm": 0.8399274349212646, + "learning_rate": 0.00010419808071635178, + "loss": 2.5688, + "step": 9784 + }, + { + "epoch": 0.7896860624646921, + "grad_norm": 0.6412226557731628, + "learning_rate": 0.00010418230768380262, + "loss": 2.5527, + "step": 9785 + }, + { + "epoch": 0.7897667662012752, + "grad_norm": 0.6505058407783508, + "learning_rate": 0.0001041665345470194, + "loss": 2.5768, + "step": 9786 + }, + { + "epoch": 0.7898474699378581, + "grad_norm": 0.6297653317451477, + "learning_rate": 0.00010415076130639526, + "loss": 2.5372, + "step": 9787 + }, + { + "epoch": 0.7899281736744411, + "grad_norm": 0.6524460315704346, + "learning_rate": 0.00010413498796232331, + "loss": 2.5047, + "step": 9788 + }, + { + "epoch": 0.7900088774110241, + "grad_norm": 0.6637924313545227, + "learning_rate": 0.00010411921451519662, + "loss": 2.508, + "step": 9789 + }, + { + "epoch": 0.7900895811476072, + "grad_norm": 0.6423435211181641, + "learning_rate": 0.00010410344096540836, + "loss": 2.4597, + "step": 9790 + }, + { + "epoch": 0.7901702848841902, + "grad_norm": 0.6361977458000183, + "learning_rate": 0.00010408766731335163, + "loss": 2.5921, + "step": 9791 + }, + { + "epoch": 0.7902509886207731, + "grad_norm": 0.6792182922363281, + "learning_rate": 0.00010407189355941953, + "loss": 2.5543, + "step": 9792 + }, + { + "epoch": 0.7903316923573561, + "grad_norm": 0.6998419761657715, + "learning_rate": 0.00010405611970400519, + "loss": 2.5333, + "step": 9793 + }, + { + "epoch": 0.7904123960939391, + "grad_norm": 0.6730015873908997, + "learning_rate": 0.00010404034574750174, + "loss": 2.596, + "step": 9794 + }, + { + "epoch": 0.7904930998305222, + "grad_norm": 0.7120258808135986, + "learning_rate": 0.00010402457169030235, + "loss": 2.5314, + "step": 9795 + }, + { + "epoch": 0.7905738035671052, + "grad_norm": 0.6553651690483093, + "learning_rate": 0.0001040087975328001, + "loss": 2.4973, + "step": 9796 + }, + { + "epoch": 0.7906545073036881, + "grad_norm": 0.6506681442260742, + "learning_rate": 0.00010399302327538812, + "loss": 2.588, + "step": 9797 + }, + { + "epoch": 0.7907352110402711, + "grad_norm": 0.6737257242202759, + "learning_rate": 0.00010397724891845957, + "loss": 2.5454, + "step": 9798 + }, + { + "epoch": 0.7908159147768542, + "grad_norm": 0.670120894908905, + "learning_rate": 0.00010396147446240756, + "loss": 2.4926, + "step": 9799 + }, + { + "epoch": 0.7908966185134372, + "grad_norm": 0.7028468251228333, + "learning_rate": 0.00010394569990762529, + "loss": 2.5727, + "step": 9800 + }, + { + "epoch": 0.7909773222500202, + "grad_norm": 0.7084455490112305, + "learning_rate": 0.00010392992525450584, + "loss": 2.547, + "step": 9801 + }, + { + "epoch": 0.7910580259866031, + "grad_norm": 0.732694685459137, + "learning_rate": 0.0001039141505034424, + "loss": 2.5871, + "step": 9802 + }, + { + "epoch": 0.7911387297231862, + "grad_norm": 0.7214515209197998, + "learning_rate": 0.00010389837565482807, + "loss": 2.5672, + "step": 9803 + }, + { + "epoch": 0.7912194334597692, + "grad_norm": 0.6495330333709717, + "learning_rate": 0.00010388260070905604, + "loss": 2.5266, + "step": 9804 + }, + { + "epoch": 0.7913001371963522, + "grad_norm": 0.6930941343307495, + "learning_rate": 0.00010386682566651945, + "loss": 2.5734, + "step": 9805 + }, + { + "epoch": 0.7913808409329351, + "grad_norm": 0.714214563369751, + "learning_rate": 0.00010385105052761148, + "loss": 2.4987, + "step": 9806 + }, + { + "epoch": 0.7914615446695182, + "grad_norm": 0.7525388598442078, + "learning_rate": 0.00010383527529272523, + "loss": 2.5427, + "step": 9807 + }, + { + "epoch": 0.7915422484061012, + "grad_norm": 0.6088642477989197, + "learning_rate": 0.00010381949996225389, + "loss": 2.5018, + "step": 9808 + }, + { + "epoch": 0.7916229521426842, + "grad_norm": 0.6797540187835693, + "learning_rate": 0.00010380372453659066, + "loss": 2.5235, + "step": 9809 + }, + { + "epoch": 0.7917036558792672, + "grad_norm": 0.6754054427146912, + "learning_rate": 0.00010378794901612865, + "loss": 2.5343, + "step": 9810 + }, + { + "epoch": 0.7917843596158503, + "grad_norm": 0.7375015020370483, + "learning_rate": 0.00010377217340126106, + "loss": 2.6101, + "step": 9811 + }, + { + "epoch": 0.7918650633524332, + "grad_norm": 0.6487904191017151, + "learning_rate": 0.00010375639769238103, + "loss": 2.5408, + "step": 9812 + }, + { + "epoch": 0.7919457670890162, + "grad_norm": 0.7280275821685791, + "learning_rate": 0.00010374062188988176, + "loss": 2.5503, + "step": 9813 + }, + { + "epoch": 0.7920264708255992, + "grad_norm": 0.6944922208786011, + "learning_rate": 0.00010372484599415644, + "loss": 2.5815, + "step": 9814 + }, + { + "epoch": 0.7921071745621823, + "grad_norm": 0.6970139741897583, + "learning_rate": 0.00010370907000559818, + "loss": 2.546, + "step": 9815 + }, + { + "epoch": 0.7921878782987652, + "grad_norm": 0.7338151335716248, + "learning_rate": 0.00010369329392460023, + "loss": 2.5449, + "step": 9816 + }, + { + "epoch": 0.7922685820353482, + "grad_norm": 0.7763465642929077, + "learning_rate": 0.00010367751775155574, + "loss": 2.5331, + "step": 9817 + }, + { + "epoch": 0.7923492857719312, + "grad_norm": 0.6892645955085754, + "learning_rate": 0.00010366174148685786, + "loss": 2.5617, + "step": 9818 + }, + { + "epoch": 0.7924299895085143, + "grad_norm": 0.7388250231742859, + "learning_rate": 0.00010364596513089984, + "loss": 2.5236, + "step": 9819 + }, + { + "epoch": 0.7925106932450973, + "grad_norm": 0.7035132646560669, + "learning_rate": 0.00010363018868407482, + "loss": 2.5711, + "step": 9820 + }, + { + "epoch": 0.7925913969816802, + "grad_norm": 0.7087043523788452, + "learning_rate": 0.00010361441214677603, + "loss": 2.5416, + "step": 9821 + }, + { + "epoch": 0.7926721007182632, + "grad_norm": 0.7173168063163757, + "learning_rate": 0.00010359863551939664, + "loss": 2.529, + "step": 9822 + }, + { + "epoch": 0.7927528044548463, + "grad_norm": 0.7007408738136292, + "learning_rate": 0.00010358285880232983, + "loss": 2.5287, + "step": 9823 + }, + { + "epoch": 0.7928335081914293, + "grad_norm": 0.7731965780258179, + "learning_rate": 0.0001035670819959688, + "loss": 2.5913, + "step": 9824 + }, + { + "epoch": 0.7929142119280123, + "grad_norm": 0.6625120639801025, + "learning_rate": 0.00010355130510070681, + "loss": 2.5815, + "step": 9825 + }, + { + "epoch": 0.7929949156645952, + "grad_norm": 0.6628395318984985, + "learning_rate": 0.00010353552811693699, + "loss": 2.512, + "step": 9826 + }, + { + "epoch": 0.7930756194011783, + "grad_norm": 0.6565915942192078, + "learning_rate": 0.00010351975104505256, + "loss": 2.54, + "step": 9827 + }, + { + "epoch": 0.7931563231377613, + "grad_norm": 0.6581636667251587, + "learning_rate": 0.00010350397388544672, + "loss": 2.5462, + "step": 9828 + }, + { + "epoch": 0.7932370268743443, + "grad_norm": 0.705668568611145, + "learning_rate": 0.0001034881966385127, + "loss": 2.5241, + "step": 9829 + }, + { + "epoch": 0.7933177306109273, + "grad_norm": 0.7047126293182373, + "learning_rate": 0.00010347241930464373, + "loss": 2.5275, + "step": 9830 + }, + { + "epoch": 0.7933984343475103, + "grad_norm": 0.6285849213600159, + "learning_rate": 0.00010345664188423296, + "loss": 2.518, + "step": 9831 + }, + { + "epoch": 0.7934791380840933, + "grad_norm": 0.697542130947113, + "learning_rate": 0.00010344086437767366, + "loss": 2.5219, + "step": 9832 + }, + { + "epoch": 0.7935598418206763, + "grad_norm": 0.6349283456802368, + "learning_rate": 0.00010342508678535903, + "loss": 2.5277, + "step": 9833 + }, + { + "epoch": 0.7936405455572593, + "grad_norm": 0.7084335088729858, + "learning_rate": 0.00010340930910768225, + "loss": 2.476, + "step": 9834 + }, + { + "epoch": 0.7937212492938424, + "grad_norm": 0.6714156866073608, + "learning_rate": 0.00010339353134503662, + "loss": 2.556, + "step": 9835 + }, + { + "epoch": 0.7938019530304253, + "grad_norm": 0.6687895059585571, + "learning_rate": 0.00010337775349781527, + "loss": 2.5756, + "step": 9836 + }, + { + "epoch": 0.7938826567670083, + "grad_norm": 0.669784665107727, + "learning_rate": 0.00010336197556641152, + "loss": 2.5545, + "step": 9837 + }, + { + "epoch": 0.7939633605035913, + "grad_norm": 0.6738600134849548, + "learning_rate": 0.0001033461975512185, + "loss": 2.5807, + "step": 9838 + }, + { + "epoch": 0.7940440642401744, + "grad_norm": 0.691443681716919, + "learning_rate": 0.00010333041945262953, + "loss": 2.5279, + "step": 9839 + }, + { + "epoch": 0.7941247679767574, + "grad_norm": 0.6283861398696899, + "learning_rate": 0.0001033146412710378, + "loss": 2.5355, + "step": 9840 + }, + { + "epoch": 0.7942054717133403, + "grad_norm": 0.6491204500198364, + "learning_rate": 0.00010329886300683655, + "loss": 2.5431, + "step": 9841 + }, + { + "epoch": 0.7942861754499233, + "grad_norm": 0.6673988103866577, + "learning_rate": 0.00010328308466041898, + "loss": 2.5845, + "step": 9842 + }, + { + "epoch": 0.7943668791865063, + "grad_norm": 0.6669130325317383, + "learning_rate": 0.00010326730623217837, + "loss": 2.5348, + "step": 9843 + }, + { + "epoch": 0.7944475829230894, + "grad_norm": 0.7003189921379089, + "learning_rate": 0.00010325152772250795, + "loss": 2.5779, + "step": 9844 + }, + { + "epoch": 0.7945282866596723, + "grad_norm": 0.6602177619934082, + "learning_rate": 0.00010323574913180097, + "loss": 2.5527, + "step": 9845 + }, + { + "epoch": 0.7946089903962553, + "grad_norm": 0.7053726315498352, + "learning_rate": 0.00010321997046045066, + "loss": 2.566, + "step": 9846 + }, + { + "epoch": 0.7946896941328383, + "grad_norm": 0.7428076863288879, + "learning_rate": 0.00010320419170885025, + "loss": 2.5348, + "step": 9847 + }, + { + "epoch": 0.7947703978694214, + "grad_norm": 0.7029163837432861, + "learning_rate": 0.00010318841287739303, + "loss": 2.5387, + "step": 9848 + }, + { + "epoch": 0.7948511016060044, + "grad_norm": 0.6159133911132812, + "learning_rate": 0.00010317263396647221, + "loss": 2.5408, + "step": 9849 + }, + { + "epoch": 0.7949318053425873, + "grad_norm": 0.6748857498168945, + "learning_rate": 0.00010315685497648106, + "loss": 2.5299, + "step": 9850 + }, + { + "epoch": 0.7950125090791703, + "grad_norm": 0.6281898021697998, + "learning_rate": 0.00010314107590781284, + "loss": 2.5202, + "step": 9851 + }, + { + "epoch": 0.7950932128157534, + "grad_norm": 0.6602163910865784, + "learning_rate": 0.00010312529676086078, + "loss": 2.5119, + "step": 9852 + }, + { + "epoch": 0.7951739165523364, + "grad_norm": 0.6665403246879578, + "learning_rate": 0.00010310951753601818, + "loss": 2.5913, + "step": 9853 + }, + { + "epoch": 0.7952546202889194, + "grad_norm": 0.6705873012542725, + "learning_rate": 0.00010309373823367827, + "loss": 2.6039, + "step": 9854 + }, + { + "epoch": 0.7953353240255023, + "grad_norm": 0.6571313738822937, + "learning_rate": 0.0001030779588542343, + "loss": 2.5629, + "step": 9855 + }, + { + "epoch": 0.7954160277620854, + "grad_norm": 0.6597230434417725, + "learning_rate": 0.00010306217939807956, + "loss": 2.5569, + "step": 9856 + }, + { + "epoch": 0.7954967314986684, + "grad_norm": 0.7098817229270935, + "learning_rate": 0.00010304639986560733, + "loss": 2.4736, + "step": 9857 + }, + { + "epoch": 0.7955774352352514, + "grad_norm": 0.628663957118988, + "learning_rate": 0.00010303062025721082, + "loss": 2.5241, + "step": 9858 + }, + { + "epoch": 0.7956581389718343, + "grad_norm": 0.630843460559845, + "learning_rate": 0.00010301484057328333, + "loss": 2.5604, + "step": 9859 + }, + { + "epoch": 0.7957388427084174, + "grad_norm": 0.7457596659660339, + "learning_rate": 0.00010299906081421813, + "loss": 2.5675, + "step": 9860 + }, + { + "epoch": 0.7958195464450004, + "grad_norm": 0.6566091775894165, + "learning_rate": 0.00010298328098040851, + "loss": 2.4918, + "step": 9861 + }, + { + "epoch": 0.7959002501815834, + "grad_norm": 0.657357931137085, + "learning_rate": 0.00010296750107224773, + "loss": 2.5268, + "step": 9862 + }, + { + "epoch": 0.7959809539181664, + "grad_norm": 0.7021927833557129, + "learning_rate": 0.00010295172109012905, + "loss": 2.528, + "step": 9863 + }, + { + "epoch": 0.7960616576547495, + "grad_norm": 0.662053108215332, + "learning_rate": 0.00010293594103444578, + "loss": 2.5483, + "step": 9864 + }, + { + "epoch": 0.7961423613913324, + "grad_norm": 0.776407778263092, + "learning_rate": 0.00010292016090559118, + "loss": 2.6089, + "step": 9865 + }, + { + "epoch": 0.7962230651279154, + "grad_norm": 0.6499512791633606, + "learning_rate": 0.00010290438070395854, + "loss": 2.5609, + "step": 9866 + }, + { + "epoch": 0.7963037688644984, + "grad_norm": 0.6802246570587158, + "learning_rate": 0.00010288860042994113, + "loss": 2.5217, + "step": 9867 + }, + { + "epoch": 0.7963844726010815, + "grad_norm": 0.6371235847473145, + "learning_rate": 0.00010287282008393224, + "loss": 2.4783, + "step": 9868 + }, + { + "epoch": 0.7964651763376644, + "grad_norm": 0.7070169448852539, + "learning_rate": 0.00010285703966632518, + "loss": 2.5006, + "step": 9869 + }, + { + "epoch": 0.7965458800742474, + "grad_norm": 0.657738208770752, + "learning_rate": 0.00010284125917751323, + "loss": 2.551, + "step": 9870 + }, + { + "epoch": 0.7966265838108304, + "grad_norm": 0.7936853170394897, + "learning_rate": 0.00010282547861788964, + "loss": 2.574, + "step": 9871 + }, + { + "epoch": 0.7967072875474135, + "grad_norm": 0.675715982913971, + "learning_rate": 0.00010280969798784779, + "loss": 2.5288, + "step": 9872 + }, + { + "epoch": 0.7967879912839965, + "grad_norm": 0.6980394124984741, + "learning_rate": 0.00010279391728778092, + "loss": 2.5437, + "step": 9873 + }, + { + "epoch": 0.7968686950205794, + "grad_norm": 0.6580469608306885, + "learning_rate": 0.00010277813651808226, + "loss": 2.5574, + "step": 9874 + }, + { + "epoch": 0.7969493987571624, + "grad_norm": 0.6960238218307495, + "learning_rate": 0.00010276235567914522, + "loss": 2.5477, + "step": 9875 + }, + { + "epoch": 0.7970301024937455, + "grad_norm": 0.704140841960907, + "learning_rate": 0.00010274657477136304, + "loss": 2.5099, + "step": 9876 + }, + { + "epoch": 0.7971108062303285, + "grad_norm": 0.7238990068435669, + "learning_rate": 0.00010273079379512906, + "loss": 2.6182, + "step": 9877 + }, + { + "epoch": 0.7971915099669115, + "grad_norm": 0.6527700424194336, + "learning_rate": 0.00010271501275083657, + "loss": 2.5148, + "step": 9878 + }, + { + "epoch": 0.7972722137034944, + "grad_norm": 0.6665365695953369, + "learning_rate": 0.00010269923163887884, + "loss": 2.5624, + "step": 9879 + }, + { + "epoch": 0.7973529174400775, + "grad_norm": 0.7304019927978516, + "learning_rate": 0.0001026834504596492, + "loss": 2.5537, + "step": 9880 + }, + { + "epoch": 0.7974336211766605, + "grad_norm": 0.6645877957344055, + "learning_rate": 0.00010266766921354099, + "loss": 2.5381, + "step": 9881 + }, + { + "epoch": 0.7975143249132435, + "grad_norm": 0.6817314624786377, + "learning_rate": 0.00010265188790094744, + "loss": 2.5399, + "step": 9882 + }, + { + "epoch": 0.7975950286498265, + "grad_norm": 0.7477232217788696, + "learning_rate": 0.00010263610652226194, + "loss": 2.6461, + "step": 9883 + }, + { + "epoch": 0.7976757323864095, + "grad_norm": 0.7087170481681824, + "learning_rate": 0.00010262032507787777, + "loss": 2.5469, + "step": 9884 + }, + { + "epoch": 0.7977564361229925, + "grad_norm": 0.7093435525894165, + "learning_rate": 0.00010260454356818825, + "loss": 2.5606, + "step": 9885 + }, + { + "epoch": 0.7978371398595755, + "grad_norm": 0.6662636399269104, + "learning_rate": 0.00010258876199358672, + "loss": 2.5415, + "step": 9886 + }, + { + "epoch": 0.7979178435961585, + "grad_norm": 0.6829736232757568, + "learning_rate": 0.00010257298035446644, + "loss": 2.5618, + "step": 9887 + }, + { + "epoch": 0.7979985473327416, + "grad_norm": 0.6872264742851257, + "learning_rate": 0.00010255719865122077, + "loss": 2.5629, + "step": 9888 + }, + { + "epoch": 0.7980792510693245, + "grad_norm": 0.6988633871078491, + "learning_rate": 0.00010254141688424303, + "loss": 2.5191, + "step": 9889 + }, + { + "epoch": 0.7981599548059075, + "grad_norm": 0.6787285804748535, + "learning_rate": 0.00010252563505392654, + "loss": 2.5003, + "step": 9890 + }, + { + "epoch": 0.7982406585424905, + "grad_norm": 0.6703466773033142, + "learning_rate": 0.00010250985316066461, + "loss": 2.5442, + "step": 9891 + }, + { + "epoch": 0.7983213622790736, + "grad_norm": 0.6463642120361328, + "learning_rate": 0.0001024940712048506, + "loss": 2.5236, + "step": 9892 + }, + { + "epoch": 0.7984020660156566, + "grad_norm": 0.6835207939147949, + "learning_rate": 0.0001024782891868778, + "loss": 2.5094, + "step": 9893 + }, + { + "epoch": 0.7984827697522395, + "grad_norm": 0.6621001958847046, + "learning_rate": 0.00010246250710713956, + "loss": 2.5456, + "step": 9894 + }, + { + "epoch": 0.7985634734888225, + "grad_norm": 0.6675469875335693, + "learning_rate": 0.0001024467249660292, + "loss": 2.5312, + "step": 9895 + }, + { + "epoch": 0.7986441772254055, + "grad_norm": 0.7357796430587769, + "learning_rate": 0.00010243094276394007, + "loss": 2.5374, + "step": 9896 + }, + { + "epoch": 0.7987248809619886, + "grad_norm": 0.7005879878997803, + "learning_rate": 0.00010241516050126549, + "loss": 2.5667, + "step": 9897 + }, + { + "epoch": 0.7988055846985715, + "grad_norm": 0.669870913028717, + "learning_rate": 0.0001023993781783988, + "loss": 2.533, + "step": 9898 + }, + { + "epoch": 0.7988862884351545, + "grad_norm": 0.7584091424942017, + "learning_rate": 0.00010238359579573333, + "loss": 2.5995, + "step": 9899 + }, + { + "epoch": 0.7989669921717375, + "grad_norm": 0.6931570172309875, + "learning_rate": 0.00010236781335366239, + "loss": 2.5506, + "step": 9900 + }, + { + "epoch": 0.7990476959083206, + "grad_norm": 0.6810948848724365, + "learning_rate": 0.0001023520308525794, + "loss": 2.5048, + "step": 9901 + }, + { + "epoch": 0.7991283996449036, + "grad_norm": 0.6857194900512695, + "learning_rate": 0.00010233624829287765, + "loss": 2.5559, + "step": 9902 + }, + { + "epoch": 0.7992091033814865, + "grad_norm": 0.6685707569122314, + "learning_rate": 0.00010232046567495046, + "loss": 2.5661, + "step": 9903 + }, + { + "epoch": 0.7992898071180695, + "grad_norm": 0.6626694202423096, + "learning_rate": 0.00010230468299919121, + "loss": 2.6293, + "step": 9904 + }, + { + "epoch": 0.7993705108546526, + "grad_norm": 0.6407302021980286, + "learning_rate": 0.00010228890026599323, + "loss": 2.5552, + "step": 9905 + }, + { + "epoch": 0.7994512145912356, + "grad_norm": 0.762235701084137, + "learning_rate": 0.00010227311747574986, + "loss": 2.4904, + "step": 9906 + }, + { + "epoch": 0.7995319183278186, + "grad_norm": 0.703507661819458, + "learning_rate": 0.0001022573346288545, + "loss": 2.5684, + "step": 9907 + }, + { + "epoch": 0.7996126220644015, + "grad_norm": 0.82541823387146, + "learning_rate": 0.00010224155172570043, + "loss": 2.521, + "step": 9908 + }, + { + "epoch": 0.7996933258009846, + "grad_norm": 0.6836804747581482, + "learning_rate": 0.00010222576876668104, + "loss": 2.5364, + "step": 9909 + }, + { + "epoch": 0.7997740295375676, + "grad_norm": 0.7388977408409119, + "learning_rate": 0.00010220998575218966, + "loss": 2.5724, + "step": 9910 + }, + { + "epoch": 0.7998547332741506, + "grad_norm": 0.7380896806716919, + "learning_rate": 0.00010219420268261966, + "loss": 2.5918, + "step": 9911 + }, + { + "epoch": 0.7999354370107336, + "grad_norm": 0.7303522825241089, + "learning_rate": 0.00010217841955836442, + "loss": 2.5432, + "step": 9912 + }, + { + "epoch": 0.8000161407473166, + "grad_norm": 0.6859301924705505, + "learning_rate": 0.00010216263637981727, + "loss": 2.5734, + "step": 9913 + }, + { + "epoch": 0.8000968444838996, + "grad_norm": 0.731910228729248, + "learning_rate": 0.00010214685314737154, + "loss": 2.5227, + "step": 9914 + }, + { + "epoch": 0.8001775482204826, + "grad_norm": 0.7105006575584412, + "learning_rate": 0.00010213106986142062, + "loss": 2.5335, + "step": 9915 + }, + { + "epoch": 0.8002582519570656, + "grad_norm": 0.7337056994438171, + "learning_rate": 0.00010211528652235786, + "loss": 2.6204, + "step": 9916 + }, + { + "epoch": 0.8003389556936487, + "grad_norm": 0.7350614666938782, + "learning_rate": 0.00010209950313057668, + "loss": 2.5264, + "step": 9917 + }, + { + "epoch": 0.8004196594302316, + "grad_norm": 0.6411921977996826, + "learning_rate": 0.00010208371968647036, + "loss": 2.4642, + "step": 9918 + }, + { + "epoch": 0.8005003631668146, + "grad_norm": 0.7601611018180847, + "learning_rate": 0.00010206793619043229, + "loss": 2.6249, + "step": 9919 + }, + { + "epoch": 0.8005810669033976, + "grad_norm": 0.7086012363433838, + "learning_rate": 0.00010205215264285585, + "loss": 2.5508, + "step": 9920 + }, + { + "epoch": 0.8006617706399807, + "grad_norm": 0.7267128825187683, + "learning_rate": 0.00010203636904413443, + "loss": 2.5109, + "step": 9921 + }, + { + "epoch": 0.8007424743765637, + "grad_norm": 0.7606067657470703, + "learning_rate": 0.00010202058539466132, + "loss": 2.5172, + "step": 9922 + }, + { + "epoch": 0.8008231781131466, + "grad_norm": 0.7610498666763306, + "learning_rate": 0.00010200480169483, + "loss": 2.5085, + "step": 9923 + }, + { + "epoch": 0.8009038818497296, + "grad_norm": 0.7604225873947144, + "learning_rate": 0.00010198901794503373, + "loss": 2.5615, + "step": 9924 + }, + { + "epoch": 0.8009845855863127, + "grad_norm": 0.739532470703125, + "learning_rate": 0.00010197323414566596, + "loss": 2.5574, + "step": 9925 + }, + { + "epoch": 0.8010652893228957, + "grad_norm": 0.6913303136825562, + "learning_rate": 0.00010195745029712003, + "loss": 2.5403, + "step": 9926 + }, + { + "epoch": 0.8011459930594786, + "grad_norm": 0.6963592767715454, + "learning_rate": 0.0001019416663997893, + "loss": 2.5615, + "step": 9927 + }, + { + "epoch": 0.8012266967960616, + "grad_norm": 0.681481122970581, + "learning_rate": 0.0001019258824540672, + "loss": 2.5125, + "step": 9928 + }, + { + "epoch": 0.8013074005326447, + "grad_norm": 0.7192744016647339, + "learning_rate": 0.00010191009846034709, + "loss": 2.5952, + "step": 9929 + }, + { + "epoch": 0.8013881042692277, + "grad_norm": 0.7030046582221985, + "learning_rate": 0.00010189431441902228, + "loss": 2.5445, + "step": 9930 + }, + { + "epoch": 0.8014688080058107, + "grad_norm": 0.6180598139762878, + "learning_rate": 0.00010187853033048622, + "loss": 2.4902, + "step": 9931 + }, + { + "epoch": 0.8015495117423936, + "grad_norm": 0.7479971051216125, + "learning_rate": 0.0001018627461951323, + "loss": 2.5703, + "step": 9932 + }, + { + "epoch": 0.8016302154789767, + "grad_norm": 0.7339857220649719, + "learning_rate": 0.00010184696201335387, + "loss": 2.5744, + "step": 9933 + }, + { + "epoch": 0.8017109192155597, + "grad_norm": 0.6741397380828857, + "learning_rate": 0.00010183117778554432, + "loss": 2.5777, + "step": 9934 + }, + { + "epoch": 0.8017916229521427, + "grad_norm": 0.6731706857681274, + "learning_rate": 0.00010181539351209699, + "loss": 2.5438, + "step": 9935 + }, + { + "epoch": 0.8018723266887257, + "grad_norm": 0.6929418444633484, + "learning_rate": 0.00010179960919340535, + "loss": 2.5308, + "step": 9936 + }, + { + "epoch": 0.8019530304253087, + "grad_norm": 0.7383175492286682, + "learning_rate": 0.00010178382482986271, + "loss": 2.5623, + "step": 9937 + }, + { + "epoch": 0.8020337341618917, + "grad_norm": 0.6872193217277527, + "learning_rate": 0.00010176804042186252, + "loss": 2.5271, + "step": 9938 + }, + { + "epoch": 0.8021144378984747, + "grad_norm": 0.7354295253753662, + "learning_rate": 0.00010175225596979816, + "loss": 2.5122, + "step": 9939 + }, + { + "epoch": 0.8021951416350577, + "grad_norm": 0.7589237689971924, + "learning_rate": 0.00010173647147406297, + "loss": 2.5529, + "step": 9940 + }, + { + "epoch": 0.8022758453716408, + "grad_norm": 0.6998353004455566, + "learning_rate": 0.00010172068693505037, + "loss": 2.4683, + "step": 9941 + }, + { + "epoch": 0.8023565491082237, + "grad_norm": 0.6816055178642273, + "learning_rate": 0.00010170490235315377, + "loss": 2.567, + "step": 9942 + }, + { + "epoch": 0.8024372528448067, + "grad_norm": 0.7188318371772766, + "learning_rate": 0.00010168911772876652, + "loss": 2.5631, + "step": 9943 + }, + { + "epoch": 0.8025179565813897, + "grad_norm": 0.6925922632217407, + "learning_rate": 0.00010167333306228209, + "loss": 2.4872, + "step": 9944 + }, + { + "epoch": 0.8025986603179727, + "grad_norm": 0.7081493735313416, + "learning_rate": 0.00010165754835409377, + "loss": 2.5482, + "step": 9945 + }, + { + "epoch": 0.8026793640545558, + "grad_norm": 0.6838935613632202, + "learning_rate": 0.00010164176360459505, + "loss": 2.541, + "step": 9946 + }, + { + "epoch": 0.8027600677911387, + "grad_norm": 0.6959214210510254, + "learning_rate": 0.00010162597881417928, + "loss": 2.4574, + "step": 9947 + }, + { + "epoch": 0.8028407715277217, + "grad_norm": 0.693004310131073, + "learning_rate": 0.00010161019398323986, + "loss": 2.5553, + "step": 9948 + }, + { + "epoch": 0.8029214752643047, + "grad_norm": 0.6683690547943115, + "learning_rate": 0.00010159440911217022, + "loss": 2.5501, + "step": 9949 + }, + { + "epoch": 0.8030021790008878, + "grad_norm": 0.6797001361846924, + "learning_rate": 0.0001015786242013637, + "loss": 2.5731, + "step": 9950 + }, + { + "epoch": 0.8030828827374707, + "grad_norm": 0.6621012091636658, + "learning_rate": 0.00010156283925121375, + "loss": 2.5278, + "step": 9951 + }, + { + "epoch": 0.8031635864740537, + "grad_norm": 0.7024650573730469, + "learning_rate": 0.00010154705426211377, + "loss": 2.5939, + "step": 9952 + }, + { + "epoch": 0.8032442902106367, + "grad_norm": 0.6756548285484314, + "learning_rate": 0.00010153126923445714, + "loss": 2.5797, + "step": 9953 + }, + { + "epoch": 0.8033249939472198, + "grad_norm": 0.6560662984848022, + "learning_rate": 0.00010151548416863732, + "loss": 2.5358, + "step": 9954 + }, + { + "epoch": 0.8034056976838028, + "grad_norm": 0.7172456979751587, + "learning_rate": 0.00010149969906504766, + "loss": 2.5054, + "step": 9955 + }, + { + "epoch": 0.8034864014203857, + "grad_norm": 0.6379461288452148, + "learning_rate": 0.00010148391392408152, + "loss": 2.5341, + "step": 9956 + }, + { + "epoch": 0.8035671051569687, + "grad_norm": 0.6553892493247986, + "learning_rate": 0.00010146812874613243, + "loss": 2.5618, + "step": 9957 + }, + { + "epoch": 0.8036478088935518, + "grad_norm": 0.6940072178840637, + "learning_rate": 0.00010145234353159372, + "loss": 2.5686, + "step": 9958 + }, + { + "epoch": 0.8037285126301348, + "grad_norm": 0.6641896963119507, + "learning_rate": 0.00010143655828085878, + "loss": 2.5188, + "step": 9959 + }, + { + "epoch": 0.8038092163667178, + "grad_norm": 0.6622887253761292, + "learning_rate": 0.00010142077299432111, + "loss": 2.54, + "step": 9960 + }, + { + "epoch": 0.8038899201033007, + "grad_norm": 0.7216808795928955, + "learning_rate": 0.000101404987672374, + "loss": 2.5775, + "step": 9961 + }, + { + "epoch": 0.8039706238398838, + "grad_norm": 0.6544952988624573, + "learning_rate": 0.00010138920231541095, + "loss": 2.6066, + "step": 9962 + }, + { + "epoch": 0.8040513275764668, + "grad_norm": 0.6869354248046875, + "learning_rate": 0.00010137341692382539, + "loss": 2.5157, + "step": 9963 + }, + { + "epoch": 0.8041320313130498, + "grad_norm": 0.6731898784637451, + "learning_rate": 0.00010135763149801063, + "loss": 2.4369, + "step": 9964 + }, + { + "epoch": 0.8042127350496328, + "grad_norm": 0.6943373084068298, + "learning_rate": 0.00010134184603836017, + "loss": 2.5529, + "step": 9965 + }, + { + "epoch": 0.8042934387862158, + "grad_norm": 0.729928195476532, + "learning_rate": 0.00010132606054526739, + "loss": 2.5814, + "step": 9966 + }, + { + "epoch": 0.8043741425227988, + "grad_norm": 0.6491130590438843, + "learning_rate": 0.00010131027501912571, + "loss": 2.5246, + "step": 9967 + }, + { + "epoch": 0.8044548462593818, + "grad_norm": 0.747756838798523, + "learning_rate": 0.00010129448946032857, + "loss": 2.513, + "step": 9968 + }, + { + "epoch": 0.8045355499959648, + "grad_norm": 0.6449645757675171, + "learning_rate": 0.00010127870386926935, + "loss": 2.5232, + "step": 9969 + }, + { + "epoch": 0.8046162537325479, + "grad_norm": 0.6425037980079651, + "learning_rate": 0.0001012629182463415, + "loss": 2.5065, + "step": 9970 + }, + { + "epoch": 0.8046969574691308, + "grad_norm": 0.7340624332427979, + "learning_rate": 0.00010124713259193843, + "loss": 2.5325, + "step": 9971 + }, + { + "epoch": 0.8047776612057138, + "grad_norm": 0.7308940291404724, + "learning_rate": 0.00010123134690645352, + "loss": 2.5717, + "step": 9972 + }, + { + "epoch": 0.8048583649422968, + "grad_norm": 0.7128338813781738, + "learning_rate": 0.00010121556119028028, + "loss": 2.5548, + "step": 9973 + }, + { + "epoch": 0.8049390686788799, + "grad_norm": 0.7027677893638611, + "learning_rate": 0.00010119977544381207, + "loss": 2.5311, + "step": 9974 + }, + { + "epoch": 0.8050197724154629, + "grad_norm": 0.7022054195404053, + "learning_rate": 0.00010118398966744229, + "loss": 2.5177, + "step": 9975 + }, + { + "epoch": 0.8051004761520458, + "grad_norm": 0.7382696270942688, + "learning_rate": 0.00010116820386156441, + "loss": 2.532, + "step": 9976 + }, + { + "epoch": 0.8051811798886288, + "grad_norm": 0.6968613862991333, + "learning_rate": 0.00010115241802657181, + "loss": 2.536, + "step": 9977 + }, + { + "epoch": 0.8052618836252119, + "grad_norm": 0.8277899026870728, + "learning_rate": 0.00010113663216285798, + "loss": 2.5963, + "step": 9978 + }, + { + "epoch": 0.8053425873617949, + "grad_norm": 0.677707314491272, + "learning_rate": 0.00010112084627081629, + "loss": 2.5041, + "step": 9979 + }, + { + "epoch": 0.8054232910983778, + "grad_norm": 0.6943314075469971, + "learning_rate": 0.00010110506035084017, + "loss": 2.4776, + "step": 9980 + }, + { + "epoch": 0.8055039948349608, + "grad_norm": 0.6948177218437195, + "learning_rate": 0.00010108927440332306, + "loss": 2.5306, + "step": 9981 + }, + { + "epoch": 0.8055846985715439, + "grad_norm": 0.6873918771743774, + "learning_rate": 0.0001010734884286584, + "loss": 2.5783, + "step": 9982 + }, + { + "epoch": 0.8056654023081269, + "grad_norm": 0.6370649933815002, + "learning_rate": 0.00010105770242723958, + "loss": 2.5584, + "step": 9983 + }, + { + "epoch": 0.8057461060447099, + "grad_norm": 0.7594422698020935, + "learning_rate": 0.00010104191639946008, + "loss": 2.543, + "step": 9984 + }, + { + "epoch": 0.8058268097812928, + "grad_norm": 0.697380542755127, + "learning_rate": 0.00010102613034571327, + "loss": 2.5295, + "step": 9985 + }, + { + "epoch": 0.8059075135178759, + "grad_norm": 0.6597251892089844, + "learning_rate": 0.00010101034426639264, + "loss": 2.5917, + "step": 9986 + }, + { + "epoch": 0.8059882172544589, + "grad_norm": 0.6583479046821594, + "learning_rate": 0.00010099455816189156, + "loss": 2.6206, + "step": 9987 + }, + { + "epoch": 0.8060689209910419, + "grad_norm": 0.6603943705558777, + "learning_rate": 0.00010097877203260349, + "loss": 2.5223, + "step": 9988 + }, + { + "epoch": 0.8061496247276249, + "grad_norm": 0.716454267501831, + "learning_rate": 0.00010096298587892188, + "loss": 2.5572, + "step": 9989 + }, + { + "epoch": 0.806230328464208, + "grad_norm": 0.6511488556861877, + "learning_rate": 0.00010094719970124016, + "loss": 2.5815, + "step": 9990 + }, + { + "epoch": 0.8063110322007909, + "grad_norm": 0.6969261169433594, + "learning_rate": 0.00010093141349995173, + "loss": 2.5902, + "step": 9991 + }, + { + "epoch": 0.8063917359373739, + "grad_norm": 0.7012695074081421, + "learning_rate": 0.00010091562727545001, + "loss": 2.5134, + "step": 9992 + }, + { + "epoch": 0.8064724396739569, + "grad_norm": 0.6368406414985657, + "learning_rate": 0.00010089984102812848, + "loss": 2.568, + "step": 9993 + }, + { + "epoch": 0.80655314341054, + "grad_norm": 0.6552153825759888, + "learning_rate": 0.00010088405475838059, + "loss": 2.5101, + "step": 9994 + }, + { + "epoch": 0.8066338471471229, + "grad_norm": 0.6949633359909058, + "learning_rate": 0.00010086826846659974, + "loss": 2.5427, + "step": 9995 + }, + { + "epoch": 0.8067145508837059, + "grad_norm": 0.6593093872070312, + "learning_rate": 0.00010085248215317935, + "loss": 2.5551, + "step": 9996 + }, + { + "epoch": 0.8067952546202889, + "grad_norm": 0.6963745355606079, + "learning_rate": 0.00010083669581851287, + "loss": 2.4956, + "step": 9997 + }, + { + "epoch": 0.8068759583568719, + "grad_norm": 0.7093523144721985, + "learning_rate": 0.00010082090946299377, + "loss": 2.5876, + "step": 9998 + }, + { + "epoch": 0.806956662093455, + "grad_norm": 0.6796671152114868, + "learning_rate": 0.00010080512308701544, + "loss": 2.5302, + "step": 9999 + }, + { + "epoch": 0.8070373658300379, + "grad_norm": 0.7170542478561401, + "learning_rate": 0.00010078933669097135, + "loss": 2.5886, + "step": 10000 + }, + { + "epoch": 0.8070373658300379, + "eval_loss": 2.4734926223754883, + "eval_runtime": 788.2594, + "eval_samples_per_second": 3.324, + "eval_steps_per_second": 0.554, + "step": 10000 + }, + { + "epoch": 0.8071180695666209, + "grad_norm": 0.6566126346588135, + "learning_rate": 0.0001007735502752549, + "loss": 2.4441, + "step": 10001 + }, + { + "epoch": 0.8071987733032039, + "grad_norm": 0.6739515662193298, + "learning_rate": 0.00010075776384025957, + "loss": 2.5767, + "step": 10002 + }, + { + "epoch": 0.807279477039787, + "grad_norm": 0.6334208846092224, + "learning_rate": 0.00010074197738637881, + "loss": 2.5321, + "step": 10003 + }, + { + "epoch": 0.80736018077637, + "grad_norm": 0.6764520406723022, + "learning_rate": 0.000100726190914006, + "loss": 2.5144, + "step": 10004 + }, + { + "epoch": 0.8074408845129529, + "grad_norm": 0.7090082764625549, + "learning_rate": 0.00010071040442353464, + "loss": 2.5626, + "step": 10005 + }, + { + "epoch": 0.8075215882495359, + "grad_norm": 0.6915304064750671, + "learning_rate": 0.00010069461791535814, + "loss": 2.5261, + "step": 10006 + }, + { + "epoch": 0.807602291986119, + "grad_norm": 0.6685747504234314, + "learning_rate": 0.00010067883138986991, + "loss": 2.492, + "step": 10007 + }, + { + "epoch": 0.807682995722702, + "grad_norm": 0.7179074883460999, + "learning_rate": 0.00010066304484746347, + "loss": 2.4601, + "step": 10008 + }, + { + "epoch": 0.807763699459285, + "grad_norm": 0.7032761573791504, + "learning_rate": 0.00010064725828853219, + "loss": 2.578, + "step": 10009 + }, + { + "epoch": 0.8078444031958679, + "grad_norm": 0.710322916507721, + "learning_rate": 0.00010063147171346959, + "loss": 2.5514, + "step": 10010 + }, + { + "epoch": 0.807925106932451, + "grad_norm": 0.6552841067314148, + "learning_rate": 0.00010061568512266903, + "loss": 2.5474, + "step": 10011 + }, + { + "epoch": 0.808005810669034, + "grad_norm": 0.6862452626228333, + "learning_rate": 0.00010059989851652398, + "loss": 2.5772, + "step": 10012 + }, + { + "epoch": 0.808086514405617, + "grad_norm": 0.7123851180076599, + "learning_rate": 0.00010058411189542788, + "loss": 2.4936, + "step": 10013 + }, + { + "epoch": 0.8081672181421999, + "grad_norm": 0.6889944672584534, + "learning_rate": 0.00010056832525977422, + "loss": 2.5041, + "step": 10014 + }, + { + "epoch": 0.808247921878783, + "grad_norm": 0.6986924409866333, + "learning_rate": 0.0001005525386099564, + "loss": 2.5591, + "step": 10015 + }, + { + "epoch": 0.808328625615366, + "grad_norm": 0.6935306787490845, + "learning_rate": 0.00010053675194636787, + "loss": 2.5423, + "step": 10016 + }, + { + "epoch": 0.808409329351949, + "grad_norm": 0.6751969456672668, + "learning_rate": 0.00010052096526940207, + "loss": 2.5666, + "step": 10017 + }, + { + "epoch": 0.808490033088532, + "grad_norm": 0.676909327507019, + "learning_rate": 0.00010050517857945243, + "loss": 2.5394, + "step": 10018 + }, + { + "epoch": 0.808570736825115, + "grad_norm": 0.7439377307891846, + "learning_rate": 0.00010048939187691246, + "loss": 2.5011, + "step": 10019 + }, + { + "epoch": 0.808651440561698, + "grad_norm": 0.6594791412353516, + "learning_rate": 0.00010047360516217554, + "loss": 2.5159, + "step": 10020 + }, + { + "epoch": 0.808732144298281, + "grad_norm": 0.7013304233551025, + "learning_rate": 0.00010045781843563517, + "loss": 2.5439, + "step": 10021 + }, + { + "epoch": 0.808812848034864, + "grad_norm": 0.7537491917610168, + "learning_rate": 0.00010044203169768476, + "loss": 2.5837, + "step": 10022 + }, + { + "epoch": 0.8088935517714471, + "grad_norm": 0.7273866534233093, + "learning_rate": 0.00010042624494871773, + "loss": 2.5546, + "step": 10023 + }, + { + "epoch": 0.80897425550803, + "grad_norm": 0.6716369986534119, + "learning_rate": 0.0001004104581891276, + "loss": 2.5264, + "step": 10024 + }, + { + "epoch": 0.809054959244613, + "grad_norm": 0.7544769644737244, + "learning_rate": 0.00010039467141930777, + "loss": 2.5502, + "step": 10025 + }, + { + "epoch": 0.809135662981196, + "grad_norm": 0.8713179230690002, + "learning_rate": 0.0001003788846396517, + "loss": 2.5178, + "step": 10026 + }, + { + "epoch": 0.8092163667177791, + "grad_norm": 0.6704887747764587, + "learning_rate": 0.00010036309785055283, + "loss": 2.5136, + "step": 10027 + }, + { + "epoch": 0.809297070454362, + "grad_norm": 0.7308552861213684, + "learning_rate": 0.00010034731105240458, + "loss": 2.4781, + "step": 10028 + }, + { + "epoch": 0.809377774190945, + "grad_norm": 0.7214144468307495, + "learning_rate": 0.00010033152424560049, + "loss": 2.5946, + "step": 10029 + }, + { + "epoch": 0.809458477927528, + "grad_norm": 0.6946821808815002, + "learning_rate": 0.00010031573743053393, + "loss": 2.4937, + "step": 10030 + }, + { + "epoch": 0.8095391816641111, + "grad_norm": 0.7348416447639465, + "learning_rate": 0.00010029995060759833, + "loss": 2.5959, + "step": 10031 + }, + { + "epoch": 0.8096198854006941, + "grad_norm": 0.7482579350471497, + "learning_rate": 0.00010028416377718721, + "loss": 2.6, + "step": 10032 + }, + { + "epoch": 0.809700589137277, + "grad_norm": 0.7114939093589783, + "learning_rate": 0.00010026837693969397, + "loss": 2.5376, + "step": 10033 + }, + { + "epoch": 0.80978129287386, + "grad_norm": 0.6559228897094727, + "learning_rate": 0.00010025259009551209, + "loss": 2.4961, + "step": 10034 + }, + { + "epoch": 0.8098619966104431, + "grad_norm": 0.7494906187057495, + "learning_rate": 0.00010023680324503501, + "loss": 2.5723, + "step": 10035 + }, + { + "epoch": 0.8099427003470261, + "grad_norm": 0.7207093834877014, + "learning_rate": 0.00010022101638865618, + "loss": 2.5523, + "step": 10036 + }, + { + "epoch": 0.8100234040836091, + "grad_norm": 0.6730504035949707, + "learning_rate": 0.00010020522952676903, + "loss": 2.5135, + "step": 10037 + }, + { + "epoch": 0.810104107820192, + "grad_norm": 0.6805168390274048, + "learning_rate": 0.000100189442659767, + "loss": 2.5598, + "step": 10038 + }, + { + "epoch": 0.8101848115567751, + "grad_norm": 0.6639137268066406, + "learning_rate": 0.00010017365578804358, + "loss": 2.5152, + "step": 10039 + }, + { + "epoch": 0.8102655152933581, + "grad_norm": 0.6604194641113281, + "learning_rate": 0.00010015786891199221, + "loss": 2.5302, + "step": 10040 + }, + { + "epoch": 0.8103462190299411, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.00010014208203200634, + "loss": 2.5437, + "step": 10041 + }, + { + "epoch": 0.8104269227665241, + "grad_norm": 0.7404079437255859, + "learning_rate": 0.00010012629514847942, + "loss": 2.6559, + "step": 10042 + }, + { + "epoch": 0.8105076265031071, + "grad_norm": 0.694006085395813, + "learning_rate": 0.00010011050826180488, + "loss": 2.5571, + "step": 10043 + }, + { + "epoch": 0.8105883302396901, + "grad_norm": 0.7007058262825012, + "learning_rate": 0.00010009472137237616, + "loss": 2.5639, + "step": 10044 + }, + { + "epoch": 0.8106690339762731, + "grad_norm": 0.7331913113594055, + "learning_rate": 0.00010007893448058678, + "loss": 2.5499, + "step": 10045 + }, + { + "epoch": 0.8107497377128561, + "grad_norm": 0.7636487483978271, + "learning_rate": 0.00010006314758683015, + "loss": 2.6068, + "step": 10046 + }, + { + "epoch": 0.810830441449439, + "grad_norm": 0.6505223512649536, + "learning_rate": 0.0001000473606914997, + "loss": 2.5313, + "step": 10047 + }, + { + "epoch": 0.8109111451860221, + "grad_norm": 0.6425966620445251, + "learning_rate": 0.00010003157379498886, + "loss": 2.5998, + "step": 10048 + }, + { + "epoch": 0.8109918489226051, + "grad_norm": 0.7163281440734863, + "learning_rate": 0.00010001578689769116, + "loss": 2.5493, + "step": 10049 + }, + { + "epoch": 0.8110725526591881, + "grad_norm": 0.7345306873321533, + "learning_rate": 0.0001, + "loss": 2.5609, + "step": 10050 + }, + { + "epoch": 0.8111532563957711, + "grad_norm": 0.6808427572250366, + "learning_rate": 9.998421310230884e-05, + "loss": 2.4823, + "step": 10051 + }, + { + "epoch": 0.8112339601323542, + "grad_norm": 0.7456082105636597, + "learning_rate": 9.996842620501115e-05, + "loss": 2.4782, + "step": 10052 + }, + { + "epoch": 0.8113146638689371, + "grad_norm": 0.7061728239059448, + "learning_rate": 9.995263930850034e-05, + "loss": 2.4906, + "step": 10053 + }, + { + "epoch": 0.8113953676055201, + "grad_norm": 0.691663920879364, + "learning_rate": 9.993685241316986e-05, + "loss": 2.5842, + "step": 10054 + }, + { + "epoch": 0.8114760713421031, + "grad_norm": 0.6899400353431702, + "learning_rate": 9.992106551941325e-05, + "loss": 2.5628, + "step": 10055 + }, + { + "epoch": 0.8115567750786862, + "grad_norm": 0.6909289360046387, + "learning_rate": 9.990527862762385e-05, + "loss": 2.5173, + "step": 10056 + }, + { + "epoch": 0.8116374788152692, + "grad_norm": 0.6507968306541443, + "learning_rate": 9.988949173819514e-05, + "loss": 2.5763, + "step": 10057 + }, + { + "epoch": 0.8117181825518521, + "grad_norm": 0.6972371339797974, + "learning_rate": 9.98737048515206e-05, + "loss": 2.604, + "step": 10058 + }, + { + "epoch": 0.8117988862884351, + "grad_norm": 0.6500107049942017, + "learning_rate": 9.985791796799368e-05, + "loss": 2.509, + "step": 10059 + }, + { + "epoch": 0.8118795900250182, + "grad_norm": 0.704501211643219, + "learning_rate": 9.98421310880078e-05, + "loss": 2.5773, + "step": 10060 + }, + { + "epoch": 0.8119602937616012, + "grad_norm": 0.7037203311920166, + "learning_rate": 9.982634421195641e-05, + "loss": 2.5968, + "step": 10061 + }, + { + "epoch": 0.8120409974981841, + "grad_norm": 0.7161232829093933, + "learning_rate": 9.981055734023304e-05, + "loss": 2.5373, + "step": 10062 + }, + { + "epoch": 0.8121217012347671, + "grad_norm": 0.6602928638458252, + "learning_rate": 9.979477047323099e-05, + "loss": 2.5851, + "step": 10063 + }, + { + "epoch": 0.8122024049713502, + "grad_norm": 0.6685947775840759, + "learning_rate": 9.977898361134383e-05, + "loss": 2.5543, + "step": 10064 + }, + { + "epoch": 0.8122831087079332, + "grad_norm": 0.6772760152816772, + "learning_rate": 9.976319675496502e-05, + "loss": 2.5355, + "step": 10065 + }, + { + "epoch": 0.8123638124445162, + "grad_norm": 0.6140885949134827, + "learning_rate": 9.974740990448792e-05, + "loss": 2.489, + "step": 10066 + }, + { + "epoch": 0.8124445161810991, + "grad_norm": 0.6597142219543457, + "learning_rate": 9.973162306030604e-05, + "loss": 2.5619, + "step": 10067 + }, + { + "epoch": 0.8125252199176822, + "grad_norm": 0.6768592000007629, + "learning_rate": 9.971583622281281e-05, + "loss": 2.5107, + "step": 10068 + }, + { + "epoch": 0.8126059236542652, + "grad_norm": 0.682296633720398, + "learning_rate": 9.970004939240168e-05, + "loss": 2.5003, + "step": 10069 + }, + { + "epoch": 0.8126866273908482, + "grad_norm": 0.7356325387954712, + "learning_rate": 9.96842625694661e-05, + "loss": 2.5864, + "step": 10070 + }, + { + "epoch": 0.8127673311274312, + "grad_norm": 0.6818091869354248, + "learning_rate": 9.966847575439956e-05, + "loss": 2.5375, + "step": 10071 + }, + { + "epoch": 0.8128480348640142, + "grad_norm": 0.6954368352890015, + "learning_rate": 9.965268894759543e-05, + "loss": 2.5314, + "step": 10072 + }, + { + "epoch": 0.8129287386005972, + "grad_norm": 0.6759306192398071, + "learning_rate": 9.963690214944721e-05, + "loss": 2.5881, + "step": 10073 + }, + { + "epoch": 0.8130094423371802, + "grad_norm": 0.6546545624732971, + "learning_rate": 9.962111536034832e-05, + "loss": 2.5264, + "step": 10074 + }, + { + "epoch": 0.8130901460737632, + "grad_norm": 0.6709586977958679, + "learning_rate": 9.960532858069226e-05, + "loss": 2.5906, + "step": 10075 + }, + { + "epoch": 0.8131708498103463, + "grad_norm": 0.7310851812362671, + "learning_rate": 9.958954181087241e-05, + "loss": 2.5134, + "step": 10076 + }, + { + "epoch": 0.8132515535469292, + "grad_norm": 0.6793027520179749, + "learning_rate": 9.957375505128227e-05, + "loss": 2.5387, + "step": 10077 + }, + { + "epoch": 0.8133322572835122, + "grad_norm": 0.6965875029563904, + "learning_rate": 9.955796830231528e-05, + "loss": 2.5649, + "step": 10078 + }, + { + "epoch": 0.8134129610200952, + "grad_norm": 0.6597574353218079, + "learning_rate": 9.954218156436485e-05, + "loss": 2.5281, + "step": 10079 + }, + { + "epoch": 0.8134936647566783, + "grad_norm": 0.7911555171012878, + "learning_rate": 9.952639483782445e-05, + "loss": 2.535, + "step": 10080 + }, + { + "epoch": 0.8135743684932613, + "grad_norm": 0.7405688762664795, + "learning_rate": 9.951060812308757e-05, + "loss": 2.5303, + "step": 10081 + }, + { + "epoch": 0.8136550722298442, + "grad_norm": 0.6961480379104614, + "learning_rate": 9.949482142054758e-05, + "loss": 2.4959, + "step": 10082 + }, + { + "epoch": 0.8137357759664272, + "grad_norm": 0.6761718392372131, + "learning_rate": 9.947903473059797e-05, + "loss": 2.5591, + "step": 10083 + }, + { + "epoch": 0.8138164797030103, + "grad_norm": 0.7383104562759399, + "learning_rate": 9.946324805363218e-05, + "loss": 2.5848, + "step": 10084 + }, + { + "epoch": 0.8138971834395933, + "grad_norm": 0.6495873928070068, + "learning_rate": 9.944746139004364e-05, + "loss": 2.4972, + "step": 10085 + }, + { + "epoch": 0.8139778871761763, + "grad_norm": 0.7247152328491211, + "learning_rate": 9.94316747402258e-05, + "loss": 2.5361, + "step": 10086 + }, + { + "epoch": 0.8140585909127592, + "grad_norm": 0.6965751051902771, + "learning_rate": 9.941588810457215e-05, + "loss": 2.4997, + "step": 10087 + }, + { + "epoch": 0.8141392946493423, + "grad_norm": 0.7138223648071289, + "learning_rate": 9.940010148347603e-05, + "loss": 2.5226, + "step": 10088 + }, + { + "epoch": 0.8142199983859253, + "grad_norm": 0.6571210622787476, + "learning_rate": 9.938431487733099e-05, + "loss": 2.5388, + "step": 10089 + }, + { + "epoch": 0.8143007021225083, + "grad_norm": 0.6721277832984924, + "learning_rate": 9.936852828653042e-05, + "loss": 2.5219, + "step": 10090 + }, + { + "epoch": 0.8143814058590912, + "grad_norm": 0.647520124912262, + "learning_rate": 9.935274171146782e-05, + "loss": 2.6199, + "step": 10091 + }, + { + "epoch": 0.8144621095956743, + "grad_norm": 0.6892204284667969, + "learning_rate": 9.933695515253654e-05, + "loss": 2.5132, + "step": 10092 + }, + { + "epoch": 0.8145428133322573, + "grad_norm": 0.6979050636291504, + "learning_rate": 9.932116861013008e-05, + "loss": 2.5148, + "step": 10093 + }, + { + "epoch": 0.8146235170688403, + "grad_norm": 0.6682664752006531, + "learning_rate": 9.930538208464189e-05, + "loss": 2.5795, + "step": 10094 + }, + { + "epoch": 0.8147042208054233, + "grad_norm": 0.734121561050415, + "learning_rate": 9.928959557646537e-05, + "loss": 2.5469, + "step": 10095 + }, + { + "epoch": 0.8147849245420064, + "grad_norm": 0.6669620275497437, + "learning_rate": 9.9273809085994e-05, + "loss": 2.5277, + "step": 10096 + }, + { + "epoch": 0.8148656282785893, + "grad_norm": 0.6750600934028625, + "learning_rate": 9.925802261362124e-05, + "loss": 2.5869, + "step": 10097 + }, + { + "epoch": 0.8149463320151723, + "grad_norm": 0.6813061237335205, + "learning_rate": 9.924223615974044e-05, + "loss": 2.585, + "step": 10098 + }, + { + "epoch": 0.8150270357517553, + "grad_norm": 0.6775497794151306, + "learning_rate": 9.92264497247451e-05, + "loss": 2.5353, + "step": 10099 + }, + { + "epoch": 0.8151077394883383, + "grad_norm": 0.6877530813217163, + "learning_rate": 9.92106633090287e-05, + "loss": 2.5349, + "step": 10100 + }, + { + "epoch": 0.8151884432249213, + "grad_norm": 0.6984169483184814, + "learning_rate": 9.91948769129846e-05, + "loss": 2.5986, + "step": 10101 + }, + { + "epoch": 0.8152691469615043, + "grad_norm": 0.7144806981086731, + "learning_rate": 9.917909053700626e-05, + "loss": 2.5797, + "step": 10102 + }, + { + "epoch": 0.8153498506980873, + "grad_norm": 0.6494203209877014, + "learning_rate": 9.916330418148715e-05, + "loss": 2.5035, + "step": 10103 + }, + { + "epoch": 0.8154305544346703, + "grad_norm": 0.6669752597808838, + "learning_rate": 9.914751784682069e-05, + "loss": 2.5489, + "step": 10104 + }, + { + "epoch": 0.8155112581712534, + "grad_norm": 0.6557981371879578, + "learning_rate": 9.913173153340029e-05, + "loss": 2.5266, + "step": 10105 + }, + { + "epoch": 0.8155919619078363, + "grad_norm": 0.6633948087692261, + "learning_rate": 9.911594524161941e-05, + "loss": 2.5263, + "step": 10106 + }, + { + "epoch": 0.8156726656444193, + "grad_norm": 0.7191522717475891, + "learning_rate": 9.910015897187154e-05, + "loss": 2.5625, + "step": 10107 + }, + { + "epoch": 0.8157533693810023, + "grad_norm": 0.7089062929153442, + "learning_rate": 9.908437272455001e-05, + "loss": 2.5644, + "step": 10108 + }, + { + "epoch": 0.8158340731175854, + "grad_norm": 0.7662761211395264, + "learning_rate": 9.906858650004831e-05, + "loss": 2.5875, + "step": 10109 + }, + { + "epoch": 0.8159147768541684, + "grad_norm": 0.6658861041069031, + "learning_rate": 9.905280029875988e-05, + "loss": 2.5818, + "step": 10110 + }, + { + "epoch": 0.8159954805907513, + "grad_norm": 0.7229514718055725, + "learning_rate": 9.903701412107815e-05, + "loss": 2.5421, + "step": 10111 + }, + { + "epoch": 0.8160761843273343, + "grad_norm": 0.7295149564743042, + "learning_rate": 9.902122796739652e-05, + "loss": 2.5298, + "step": 10112 + }, + { + "epoch": 0.8161568880639174, + "grad_norm": 0.6805420517921448, + "learning_rate": 9.900544183810849e-05, + "loss": 2.6693, + "step": 10113 + }, + { + "epoch": 0.8162375918005004, + "grad_norm": 0.6560602188110352, + "learning_rate": 9.898965573360738e-05, + "loss": 2.5445, + "step": 10114 + }, + { + "epoch": 0.8163182955370833, + "grad_norm": 0.690396785736084, + "learning_rate": 9.897386965428674e-05, + "loss": 2.5281, + "step": 10115 + }, + { + "epoch": 0.8163989992736663, + "grad_norm": 0.6905054450035095, + "learning_rate": 9.895808360053998e-05, + "loss": 2.5406, + "step": 10116 + }, + { + "epoch": 0.8164797030102494, + "grad_norm": 0.6905301213264465, + "learning_rate": 9.894229757276045e-05, + "loss": 2.5458, + "step": 10117 + }, + { + "epoch": 0.8165604067468324, + "grad_norm": 0.6827620267868042, + "learning_rate": 9.892651157134162e-05, + "loss": 2.4403, + "step": 10118 + }, + { + "epoch": 0.8166411104834154, + "grad_norm": 0.7614343166351318, + "learning_rate": 9.891072559667697e-05, + "loss": 2.6369, + "step": 10119 + }, + { + "epoch": 0.8167218142199983, + "grad_norm": 0.6913704872131348, + "learning_rate": 9.889493964915985e-05, + "loss": 2.5914, + "step": 10120 + }, + { + "epoch": 0.8168025179565814, + "grad_norm": 0.7026088237762451, + "learning_rate": 9.887915372918372e-05, + "loss": 2.5139, + "step": 10121 + }, + { + "epoch": 0.8168832216931644, + "grad_norm": 0.7064465284347534, + "learning_rate": 9.886336783714203e-05, + "loss": 2.549, + "step": 10122 + }, + { + "epoch": 0.8169639254297474, + "grad_norm": 0.7345553040504456, + "learning_rate": 9.884758197342821e-05, + "loss": 2.5887, + "step": 10123 + }, + { + "epoch": 0.8170446291663304, + "grad_norm": 0.6916251182556152, + "learning_rate": 9.883179613843563e-05, + "loss": 2.5659, + "step": 10124 + }, + { + "epoch": 0.8171253329029134, + "grad_norm": 0.6428200602531433, + "learning_rate": 9.881601033255771e-05, + "loss": 2.5379, + "step": 10125 + }, + { + "epoch": 0.8172060366394964, + "grad_norm": 0.7433571815490723, + "learning_rate": 9.880022455618796e-05, + "loss": 2.5751, + "step": 10126 + }, + { + "epoch": 0.8172867403760794, + "grad_norm": 0.733256995677948, + "learning_rate": 9.878443880971974e-05, + "loss": 2.4971, + "step": 10127 + }, + { + "epoch": 0.8173674441126624, + "grad_norm": 0.708289384841919, + "learning_rate": 9.876865309354646e-05, + "loss": 2.635, + "step": 10128 + }, + { + "epoch": 0.8174481478492455, + "grad_norm": 0.6877188682556152, + "learning_rate": 9.87528674080616e-05, + "loss": 2.5827, + "step": 10129 + }, + { + "epoch": 0.8175288515858284, + "grad_norm": 0.7108712792396545, + "learning_rate": 9.873708175365852e-05, + "loss": 2.5643, + "step": 10130 + }, + { + "epoch": 0.8176095553224114, + "grad_norm": 0.7435629367828369, + "learning_rate": 9.872129613073065e-05, + "loss": 2.5267, + "step": 10131 + }, + { + "epoch": 0.8176902590589944, + "grad_norm": 0.669913113117218, + "learning_rate": 9.870551053967148e-05, + "loss": 2.5684, + "step": 10132 + }, + { + "epoch": 0.8177709627955775, + "grad_norm": 0.6981424689292908, + "learning_rate": 9.868972498087431e-05, + "loss": 2.592, + "step": 10133 + }, + { + "epoch": 0.8178516665321605, + "grad_norm": 0.6661834716796875, + "learning_rate": 9.867393945473263e-05, + "loss": 2.5082, + "step": 10134 + }, + { + "epoch": 0.8179323702687434, + "grad_norm": 0.6611261367797852, + "learning_rate": 9.865815396163987e-05, + "loss": 2.556, + "step": 10135 + }, + { + "epoch": 0.8180130740053264, + "grad_norm": 0.6732283234596252, + "learning_rate": 9.86423685019894e-05, + "loss": 2.5668, + "step": 10136 + }, + { + "epoch": 0.8180937777419095, + "grad_norm": 0.6768637299537659, + "learning_rate": 9.862658307617465e-05, + "loss": 2.5467, + "step": 10137 + }, + { + "epoch": 0.8181744814784925, + "grad_norm": 0.6943596601486206, + "learning_rate": 9.861079768458904e-05, + "loss": 2.5989, + "step": 10138 + }, + { + "epoch": 0.8182551852150755, + "grad_norm": 0.7369638681411743, + "learning_rate": 9.859501232762601e-05, + "loss": 2.5189, + "step": 10139 + }, + { + "epoch": 0.8183358889516584, + "grad_norm": 0.7443112730979919, + "learning_rate": 9.857922700567892e-05, + "loss": 2.5979, + "step": 10140 + }, + { + "epoch": 0.8184165926882415, + "grad_norm": 0.6726163029670715, + "learning_rate": 9.85634417191412e-05, + "loss": 2.5451, + "step": 10141 + }, + { + "epoch": 0.8184972964248245, + "grad_norm": 0.720492422580719, + "learning_rate": 9.854765646840632e-05, + "loss": 2.6116, + "step": 10142 + }, + { + "epoch": 0.8185780001614075, + "grad_norm": 0.6998233795166016, + "learning_rate": 9.85318712538676e-05, + "loss": 2.556, + "step": 10143 + }, + { + "epoch": 0.8186587038979904, + "grad_norm": 0.7580110430717468, + "learning_rate": 9.851608607591848e-05, + "loss": 2.5222, + "step": 10144 + }, + { + "epoch": 0.8187394076345735, + "grad_norm": 0.6893007755279541, + "learning_rate": 9.85003009349524e-05, + "loss": 2.4639, + "step": 10145 + }, + { + "epoch": 0.8188201113711565, + "grad_norm": 0.6448441743850708, + "learning_rate": 9.84845158313627e-05, + "loss": 2.5249, + "step": 10146 + }, + { + "epoch": 0.8189008151077395, + "grad_norm": 0.7591872215270996, + "learning_rate": 9.846873076554285e-05, + "loss": 2.5173, + "step": 10147 + }, + { + "epoch": 0.8189815188443225, + "grad_norm": 0.6994685530662537, + "learning_rate": 9.845294573788626e-05, + "loss": 2.5181, + "step": 10148 + }, + { + "epoch": 0.8190622225809054, + "grad_norm": 0.6822378635406494, + "learning_rate": 9.843716074878628e-05, + "loss": 2.5109, + "step": 10149 + }, + { + "epoch": 0.8191429263174885, + "grad_norm": 0.6730359792709351, + "learning_rate": 9.842137579863632e-05, + "loss": 2.5402, + "step": 10150 + }, + { + "epoch": 0.8192236300540715, + "grad_norm": 0.6280627846717834, + "learning_rate": 9.840559088782984e-05, + "loss": 2.4806, + "step": 10151 + }, + { + "epoch": 0.8193043337906545, + "grad_norm": 0.6887876391410828, + "learning_rate": 9.838980601676017e-05, + "loss": 2.5498, + "step": 10152 + }, + { + "epoch": 0.8193850375272375, + "grad_norm": 0.7823790907859802, + "learning_rate": 9.837402118582075e-05, + "loss": 2.467, + "step": 10153 + }, + { + "epoch": 0.8194657412638205, + "grad_norm": 0.8109384179115295, + "learning_rate": 9.835823639540496e-05, + "loss": 2.5898, + "step": 10154 + }, + { + "epoch": 0.8195464450004035, + "grad_norm": 0.6883066892623901, + "learning_rate": 9.834245164590624e-05, + "loss": 2.5589, + "step": 10155 + }, + { + "epoch": 0.8196271487369865, + "grad_norm": 0.7291175723075867, + "learning_rate": 9.832666693771794e-05, + "loss": 2.5317, + "step": 10156 + }, + { + "epoch": 0.8197078524735695, + "grad_norm": 0.6819449663162231, + "learning_rate": 9.831088227123346e-05, + "loss": 2.5513, + "step": 10157 + }, + { + "epoch": 0.8197885562101526, + "grad_norm": 0.7038870453834534, + "learning_rate": 9.829509764684626e-05, + "loss": 2.5301, + "step": 10158 + }, + { + "epoch": 0.8198692599467355, + "grad_norm": 0.7483033537864685, + "learning_rate": 9.827931306494965e-05, + "loss": 2.5273, + "step": 10159 + }, + { + "epoch": 0.8199499636833185, + "grad_norm": 0.6998303532600403, + "learning_rate": 9.826352852593705e-05, + "loss": 2.5083, + "step": 10160 + }, + { + "epoch": 0.8200306674199015, + "grad_norm": 0.6865512728691101, + "learning_rate": 9.824774403020188e-05, + "loss": 2.5693, + "step": 10161 + }, + { + "epoch": 0.8201113711564846, + "grad_norm": 0.8144257068634033, + "learning_rate": 9.823195957813749e-05, + "loss": 2.6052, + "step": 10162 + }, + { + "epoch": 0.8201920748930676, + "grad_norm": 0.6920810341835022, + "learning_rate": 9.821617517013729e-05, + "loss": 2.5467, + "step": 10163 + }, + { + "epoch": 0.8202727786296505, + "grad_norm": 0.7538061141967773, + "learning_rate": 9.820039080659469e-05, + "loss": 2.5933, + "step": 10164 + }, + { + "epoch": 0.8203534823662335, + "grad_norm": 0.6744310259819031, + "learning_rate": 9.818460648790302e-05, + "loss": 2.5633, + "step": 10165 + }, + { + "epoch": 0.8204341861028166, + "grad_norm": 0.6943854689598083, + "learning_rate": 9.816882221445571e-05, + "loss": 2.5868, + "step": 10166 + }, + { + "epoch": 0.8205148898393996, + "grad_norm": 0.6486902832984924, + "learning_rate": 9.815303798664614e-05, + "loss": 2.4983, + "step": 10167 + }, + { + "epoch": 0.8205955935759826, + "grad_norm": 0.6699065566062927, + "learning_rate": 9.813725380486773e-05, + "loss": 2.563, + "step": 10168 + }, + { + "epoch": 0.8206762973125655, + "grad_norm": 0.6547110080718994, + "learning_rate": 9.812146966951379e-05, + "loss": 2.5404, + "step": 10169 + }, + { + "epoch": 0.8207570010491486, + "grad_norm": 0.692592203617096, + "learning_rate": 9.810568558097774e-05, + "loss": 2.5625, + "step": 10170 + }, + { + "epoch": 0.8208377047857316, + "grad_norm": 0.6696702837944031, + "learning_rate": 9.808990153965296e-05, + "loss": 2.5866, + "step": 10171 + }, + { + "epoch": 0.8209184085223146, + "grad_norm": 0.6425998210906982, + "learning_rate": 9.807411754593282e-05, + "loss": 2.5487, + "step": 10172 + }, + { + "epoch": 0.8209991122588975, + "grad_norm": 0.6849769949913025, + "learning_rate": 9.805833360021069e-05, + "loss": 2.5772, + "step": 10173 + }, + { + "epoch": 0.8210798159954806, + "grad_norm": 0.7451414465904236, + "learning_rate": 9.804254970288001e-05, + "loss": 2.5089, + "step": 10174 + }, + { + "epoch": 0.8211605197320636, + "grad_norm": 0.7134390473365784, + "learning_rate": 9.802676585433408e-05, + "loss": 2.541, + "step": 10175 + }, + { + "epoch": 0.8212412234686466, + "grad_norm": 0.7490564584732056, + "learning_rate": 9.801098205496627e-05, + "loss": 2.5299, + "step": 10176 + }, + { + "epoch": 0.8213219272052296, + "grad_norm": 0.6614408493041992, + "learning_rate": 9.799519830517005e-05, + "loss": 2.5252, + "step": 10177 + }, + { + "epoch": 0.8214026309418127, + "grad_norm": 0.761049211025238, + "learning_rate": 9.797941460533869e-05, + "loss": 2.5153, + "step": 10178 + }, + { + "epoch": 0.8214833346783956, + "grad_norm": 0.6352702379226685, + "learning_rate": 9.796363095586561e-05, + "loss": 2.5407, + "step": 10179 + }, + { + "epoch": 0.8215640384149786, + "grad_norm": 0.684212863445282, + "learning_rate": 9.794784735714417e-05, + "loss": 2.5425, + "step": 10180 + }, + { + "epoch": 0.8216447421515616, + "grad_norm": 0.652987539768219, + "learning_rate": 9.793206380956772e-05, + "loss": 2.5542, + "step": 10181 + }, + { + "epoch": 0.8217254458881447, + "grad_norm": 0.6912897229194641, + "learning_rate": 9.791628031352966e-05, + "loss": 2.5041, + "step": 10182 + }, + { + "epoch": 0.8218061496247276, + "grad_norm": 0.7025408744812012, + "learning_rate": 9.790049686942333e-05, + "loss": 2.5296, + "step": 10183 + }, + { + "epoch": 0.8218868533613106, + "grad_norm": 0.7580777406692505, + "learning_rate": 9.788471347764215e-05, + "loss": 2.578, + "step": 10184 + }, + { + "epoch": 0.8219675570978936, + "grad_norm": 0.7044378519058228, + "learning_rate": 9.78689301385794e-05, + "loss": 2.5093, + "step": 10185 + }, + { + "epoch": 0.8220482608344767, + "grad_norm": 0.7339754700660706, + "learning_rate": 9.785314685262849e-05, + "loss": 2.5202, + "step": 10186 + }, + { + "epoch": 0.8221289645710597, + "grad_norm": 0.6872244477272034, + "learning_rate": 9.783736362018277e-05, + "loss": 2.541, + "step": 10187 + }, + { + "epoch": 0.8222096683076426, + "grad_norm": 0.7052434682846069, + "learning_rate": 9.78215804416356e-05, + "loss": 2.4968, + "step": 10188 + }, + { + "epoch": 0.8222903720442256, + "grad_norm": 0.6739610433578491, + "learning_rate": 9.780579731738033e-05, + "loss": 2.5137, + "step": 10189 + }, + { + "epoch": 0.8223710757808087, + "grad_norm": 0.6842939853668213, + "learning_rate": 9.779001424781035e-05, + "loss": 2.5329, + "step": 10190 + }, + { + "epoch": 0.8224517795173917, + "grad_norm": 0.7057977914810181, + "learning_rate": 9.777423123331898e-05, + "loss": 2.5657, + "step": 10191 + }, + { + "epoch": 0.8225324832539747, + "grad_norm": 0.6748424172401428, + "learning_rate": 9.775844827429958e-05, + "loss": 2.6104, + "step": 10192 + }, + { + "epoch": 0.8226131869905576, + "grad_norm": 0.6492514610290527, + "learning_rate": 9.774266537114555e-05, + "loss": 2.58, + "step": 10193 + }, + { + "epoch": 0.8226938907271407, + "grad_norm": 0.6987641453742981, + "learning_rate": 9.772688252425016e-05, + "loss": 2.5301, + "step": 10194 + }, + { + "epoch": 0.8227745944637237, + "grad_norm": 0.710921585559845, + "learning_rate": 9.771109973400679e-05, + "loss": 2.6245, + "step": 10195 + }, + { + "epoch": 0.8228552982003067, + "grad_norm": 0.6673738360404968, + "learning_rate": 9.769531700080883e-05, + "loss": 2.5205, + "step": 10196 + }, + { + "epoch": 0.8229360019368896, + "grad_norm": 0.6705252528190613, + "learning_rate": 9.767953432504958e-05, + "loss": 2.4932, + "step": 10197 + }, + { + "epoch": 0.8230167056734727, + "grad_norm": 0.6587076783180237, + "learning_rate": 9.766375170712237e-05, + "loss": 2.5085, + "step": 10198 + }, + { + "epoch": 0.8230974094100557, + "grad_norm": 0.7285338640213013, + "learning_rate": 9.764796914742061e-05, + "loss": 2.5481, + "step": 10199 + }, + { + "epoch": 0.8231781131466387, + "grad_norm": 0.6971831321716309, + "learning_rate": 9.763218664633763e-05, + "loss": 2.6092, + "step": 10200 + }, + { + "epoch": 0.8232588168832217, + "grad_norm": 0.6940265893936157, + "learning_rate": 9.761640420426669e-05, + "loss": 2.5325, + "step": 10201 + }, + { + "epoch": 0.8233395206198046, + "grad_norm": 0.6612978577613831, + "learning_rate": 9.76006218216012e-05, + "loss": 2.5532, + "step": 10202 + }, + { + "epoch": 0.8234202243563877, + "grad_norm": 0.6707638502120972, + "learning_rate": 9.758483949873453e-05, + "loss": 2.512, + "step": 10203 + }, + { + "epoch": 0.8235009280929707, + "grad_norm": 0.6636764407157898, + "learning_rate": 9.756905723605994e-05, + "loss": 2.5446, + "step": 10204 + }, + { + "epoch": 0.8235816318295537, + "grad_norm": 0.6996643543243408, + "learning_rate": 9.755327503397081e-05, + "loss": 2.5504, + "step": 10205 + }, + { + "epoch": 0.8236623355661367, + "grad_norm": 0.604487955570221, + "learning_rate": 9.753749289286046e-05, + "loss": 2.4767, + "step": 10206 + }, + { + "epoch": 0.8237430393027197, + "grad_norm": 0.6484553217887878, + "learning_rate": 9.752171081312222e-05, + "loss": 2.5522, + "step": 10207 + }, + { + "epoch": 0.8238237430393027, + "grad_norm": 0.6890987753868103, + "learning_rate": 9.75059287951494e-05, + "loss": 2.5545, + "step": 10208 + }, + { + "epoch": 0.8239044467758857, + "grad_norm": 0.6786034107208252, + "learning_rate": 9.749014683933541e-05, + "loss": 2.591, + "step": 10209 + }, + { + "epoch": 0.8239851505124687, + "grad_norm": 0.751192033290863, + "learning_rate": 9.747436494607349e-05, + "loss": 2.5335, + "step": 10210 + }, + { + "epoch": 0.8240658542490518, + "grad_norm": 0.6611589789390564, + "learning_rate": 9.7458583115757e-05, + "loss": 2.5104, + "step": 10211 + }, + { + "epoch": 0.8241465579856347, + "grad_norm": 0.6602892875671387, + "learning_rate": 9.744280134877926e-05, + "loss": 2.5319, + "step": 10212 + }, + { + "epoch": 0.8242272617222177, + "grad_norm": 0.6856467127799988, + "learning_rate": 9.742701964553359e-05, + "loss": 2.5418, + "step": 10213 + }, + { + "epoch": 0.8243079654588007, + "grad_norm": 0.6810153126716614, + "learning_rate": 9.741123800641332e-05, + "loss": 2.5691, + "step": 10214 + }, + { + "epoch": 0.8243886691953838, + "grad_norm": 0.7044229507446289, + "learning_rate": 9.739545643181175e-05, + "loss": 2.5911, + "step": 10215 + }, + { + "epoch": 0.8244693729319668, + "grad_norm": 0.6689271330833435, + "learning_rate": 9.737967492212225e-05, + "loss": 2.5374, + "step": 10216 + }, + { + "epoch": 0.8245500766685497, + "grad_norm": 0.6558904051780701, + "learning_rate": 9.736389347773807e-05, + "loss": 2.5118, + "step": 10217 + }, + { + "epoch": 0.8246307804051327, + "grad_norm": 0.6900291442871094, + "learning_rate": 9.734811209905255e-05, + "loss": 2.515, + "step": 10218 + }, + { + "epoch": 0.8247114841417158, + "grad_norm": 0.7129492163658142, + "learning_rate": 9.733233078645907e-05, + "loss": 2.5191, + "step": 10219 + }, + { + "epoch": 0.8247921878782988, + "grad_norm": 0.7031866908073425, + "learning_rate": 9.731654954035082e-05, + "loss": 2.5616, + "step": 10220 + }, + { + "epoch": 0.8248728916148818, + "grad_norm": 0.6418820023536682, + "learning_rate": 9.730076836112118e-05, + "loss": 2.537, + "step": 10221 + }, + { + "epoch": 0.8249535953514647, + "grad_norm": 0.6731035113334656, + "learning_rate": 9.728498724916347e-05, + "loss": 2.5483, + "step": 10222 + }, + { + "epoch": 0.8250342990880478, + "grad_norm": 0.6941342353820801, + "learning_rate": 9.726920620487096e-05, + "loss": 2.5314, + "step": 10223 + }, + { + "epoch": 0.8251150028246308, + "grad_norm": 0.6808927059173584, + "learning_rate": 9.725342522863696e-05, + "loss": 2.5521, + "step": 10224 + }, + { + "epoch": 0.8251957065612138, + "grad_norm": 0.6873155832290649, + "learning_rate": 9.723764432085481e-05, + "loss": 2.5205, + "step": 10225 + }, + { + "epoch": 0.8252764102977967, + "grad_norm": 0.8590287566184998, + "learning_rate": 9.722186348191776e-05, + "loss": 2.5378, + "step": 10226 + }, + { + "epoch": 0.8253571140343798, + "grad_norm": 0.691523015499115, + "learning_rate": 9.720608271221912e-05, + "loss": 2.5062, + "step": 10227 + }, + { + "epoch": 0.8254378177709628, + "grad_norm": 0.6695523262023926, + "learning_rate": 9.719030201215226e-05, + "loss": 2.5164, + "step": 10228 + }, + { + "epoch": 0.8255185215075458, + "grad_norm": 0.745516300201416, + "learning_rate": 9.717452138211037e-05, + "loss": 2.5207, + "step": 10229 + }, + { + "epoch": 0.8255992252441288, + "grad_norm": 0.6628115773200989, + "learning_rate": 9.715874082248679e-05, + "loss": 2.5293, + "step": 10230 + }, + { + "epoch": 0.8256799289807119, + "grad_norm": 0.6531884074211121, + "learning_rate": 9.714296033367482e-05, + "loss": 2.4812, + "step": 10231 + }, + { + "epoch": 0.8257606327172948, + "grad_norm": 0.7444833517074585, + "learning_rate": 9.712717991606777e-05, + "loss": 2.5422, + "step": 10232 + }, + { + "epoch": 0.8258413364538778, + "grad_norm": 0.7013139128684998, + "learning_rate": 9.711139957005888e-05, + "loss": 2.5117, + "step": 10233 + }, + { + "epoch": 0.8259220401904608, + "grad_norm": 0.6588132977485657, + "learning_rate": 9.709561929604147e-05, + "loss": 2.5257, + "step": 10234 + }, + { + "epoch": 0.8260027439270439, + "grad_norm": 0.7538537383079529, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5225, + "step": 10235 + }, + { + "epoch": 0.8260834476636268, + "grad_norm": Infinity, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5532, + "step": 10236 + }, + { + "epoch": 0.8261641514002098, + "grad_norm": 0.7414929270744324, + "learning_rate": 9.706405896555425e-05, + "loss": 2.5653, + "step": 10237 + }, + { + "epoch": 0.8262448551367928, + "grad_norm": 0.757057785987854, + "learning_rate": 9.704827890987097e-05, + "loss": 2.5732, + "step": 10238 + }, + { + "epoch": 0.8263255588733759, + "grad_norm": 0.730721652507782, + "learning_rate": 9.703249892775232e-05, + "loss": 2.5317, + "step": 10239 + }, + { + "epoch": 0.8264062626099589, + "grad_norm": 0.6943208575248718, + "learning_rate": 9.701671901959151e-05, + "loss": 2.5849, + "step": 10240 + }, + { + "epoch": 0.8264869663465418, + "grad_norm": 0.7111102938652039, + "learning_rate": 9.700093918578188e-05, + "loss": 2.5007, + "step": 10241 + }, + { + "epoch": 0.8265676700831248, + "grad_norm": 0.7240251302719116, + "learning_rate": 9.69851594267167e-05, + "loss": 2.5002, + "step": 10242 + }, + { + "epoch": 0.8266483738197079, + "grad_norm": 0.6624411344528198, + "learning_rate": 9.696937974278922e-05, + "loss": 2.5175, + "step": 10243 + }, + { + "epoch": 0.8267290775562909, + "grad_norm": 0.6972576975822449, + "learning_rate": 9.695360013439269e-05, + "loss": 2.5285, + "step": 10244 + }, + { + "epoch": 0.8268097812928739, + "grad_norm": 0.684446394443512, + "learning_rate": 9.693782060192046e-05, + "loss": 2.57, + "step": 10245 + }, + { + "epoch": 0.8268904850294568, + "grad_norm": 0.6920011639595032, + "learning_rate": 9.692204114576573e-05, + "loss": 2.5042, + "step": 10246 + }, + { + "epoch": 0.8269711887660399, + "grad_norm": 0.7526013851165771, + "learning_rate": 9.690626176632176e-05, + "loss": 2.5878, + "step": 10247 + }, + { + "epoch": 0.8270518925026229, + "grad_norm": 0.6936177611351013, + "learning_rate": 9.689048246398184e-05, + "loss": 2.5572, + "step": 10248 + }, + { + "epoch": 0.8271325962392059, + "grad_norm": 0.672168493270874, + "learning_rate": 9.687470323913922e-05, + "loss": 2.5127, + "step": 10249 + }, + { + "epoch": 0.8272132999757889, + "grad_norm": 0.6847899556159973, + "learning_rate": 9.685892409218717e-05, + "loss": 2.5443, + "step": 10250 + }, + { + "epoch": 0.8272940037123718, + "grad_norm": 0.6877103447914124, + "learning_rate": 9.684314502351894e-05, + "loss": 2.4924, + "step": 10251 + }, + { + "epoch": 0.8273747074489549, + "grad_norm": 0.6894243359565735, + "learning_rate": 9.682736603352783e-05, + "loss": 2.5107, + "step": 10252 + }, + { + "epoch": 0.8274554111855379, + "grad_norm": 0.7318278551101685, + "learning_rate": 9.681158712260698e-05, + "loss": 2.5276, + "step": 10253 + }, + { + "epoch": 0.8275361149221209, + "grad_norm": 0.6949039101600647, + "learning_rate": 9.679580829114975e-05, + "loss": 2.5128, + "step": 10254 + }, + { + "epoch": 0.8276168186587038, + "grad_norm": 0.6523800492286682, + "learning_rate": 9.678002953954939e-05, + "loss": 2.5584, + "step": 10255 + }, + { + "epoch": 0.8276975223952869, + "grad_norm": 0.6914480328559875, + "learning_rate": 9.676425086819905e-05, + "loss": 2.5597, + "step": 10256 + }, + { + "epoch": 0.8277782261318699, + "grad_norm": 0.7107869982719421, + "learning_rate": 9.674847227749206e-05, + "loss": 2.5009, + "step": 10257 + }, + { + "epoch": 0.8278589298684529, + "grad_norm": 0.7066758275032043, + "learning_rate": 9.673269376782166e-05, + "loss": 2.4599, + "step": 10258 + }, + { + "epoch": 0.8279396336050359, + "grad_norm": 0.7147037982940674, + "learning_rate": 9.671691533958104e-05, + "loss": 2.4478, + "step": 10259 + }, + { + "epoch": 0.828020337341619, + "grad_norm": 0.666265606880188, + "learning_rate": 9.670113699316347e-05, + "loss": 2.5652, + "step": 10260 + }, + { + "epoch": 0.8281010410782019, + "grad_norm": 0.7026315927505493, + "learning_rate": 9.668535872896225e-05, + "loss": 2.5397, + "step": 10261 + }, + { + "epoch": 0.8281817448147849, + "grad_norm": 0.6611438393592834, + "learning_rate": 9.66695805473705e-05, + "loss": 2.5628, + "step": 10262 + }, + { + "epoch": 0.8282624485513679, + "grad_norm": 0.7211201190948486, + "learning_rate": 9.66538024487815e-05, + "loss": 2.5551, + "step": 10263 + }, + { + "epoch": 0.828343152287951, + "grad_norm": 0.7224553227424622, + "learning_rate": 9.663802443358849e-05, + "loss": 2.5329, + "step": 10264 + }, + { + "epoch": 0.8284238560245339, + "grad_norm": 0.6805843710899353, + "learning_rate": 9.662224650218474e-05, + "loss": 2.5744, + "step": 10265 + }, + { + "epoch": 0.8285045597611169, + "grad_norm": 0.7101335525512695, + "learning_rate": 9.66064686549634e-05, + "loss": 2.5281, + "step": 10266 + }, + { + "epoch": 0.8285852634976999, + "grad_norm": 0.7208443284034729, + "learning_rate": 9.659069089231774e-05, + "loss": 2.5326, + "step": 10267 + }, + { + "epoch": 0.828665967234283, + "grad_norm": 0.747894287109375, + "learning_rate": 9.6574913214641e-05, + "loss": 2.4909, + "step": 10268 + }, + { + "epoch": 0.828746670970866, + "grad_norm": 0.6618027091026306, + "learning_rate": 9.655913562232635e-05, + "loss": 2.6091, + "step": 10269 + }, + { + "epoch": 0.8288273747074489, + "grad_norm": 0.7101535201072693, + "learning_rate": 9.654335811576704e-05, + "loss": 2.5194, + "step": 10270 + }, + { + "epoch": 0.8289080784440319, + "grad_norm": 0.727763831615448, + "learning_rate": 9.652758069535631e-05, + "loss": 2.5767, + "step": 10271 + }, + { + "epoch": 0.828988782180615, + "grad_norm": 0.6936737895011902, + "learning_rate": 9.65118033614873e-05, + "loss": 2.498, + "step": 10272 + }, + { + "epoch": 0.829069485917198, + "grad_norm": 0.699462354183197, + "learning_rate": 9.64960261145533e-05, + "loss": 2.5033, + "step": 10273 + }, + { + "epoch": 0.829150189653781, + "grad_norm": 0.7024868726730347, + "learning_rate": 9.648024895494749e-05, + "loss": 2.5937, + "step": 10274 + }, + { + "epoch": 0.8292308933903639, + "grad_norm": 0.7028421759605408, + "learning_rate": 9.646447188306305e-05, + "loss": 2.5528, + "step": 10275 + }, + { + "epoch": 0.829311597126947, + "grad_norm": 0.7216476202011108, + "learning_rate": 9.644869489929321e-05, + "loss": 2.5298, + "step": 10276 + }, + { + "epoch": 0.82939230086353, + "grad_norm": 0.6815251111984253, + "learning_rate": 9.643291800403123e-05, + "loss": 2.5138, + "step": 10277 + }, + { + "epoch": 0.829473004600113, + "grad_norm": 0.6961970925331116, + "learning_rate": 9.64171411976702e-05, + "loss": 2.5441, + "step": 10278 + }, + { + "epoch": 0.829553708336696, + "grad_norm": 0.7317311763763428, + "learning_rate": 9.640136448060337e-05, + "loss": 2.5885, + "step": 10279 + }, + { + "epoch": 0.829634412073279, + "grad_norm": 0.729086697101593, + "learning_rate": 9.638558785322396e-05, + "loss": 2.475, + "step": 10280 + }, + { + "epoch": 0.829715115809862, + "grad_norm": 0.7790165543556213, + "learning_rate": 9.636981131592521e-05, + "loss": 2.5538, + "step": 10281 + }, + { + "epoch": 0.829795819546445, + "grad_norm": 0.7066864967346191, + "learning_rate": 9.635403486910018e-05, + "loss": 2.5916, + "step": 10282 + }, + { + "epoch": 0.829876523283028, + "grad_norm": 0.7070252299308777, + "learning_rate": 9.633825851314215e-05, + "loss": 2.5879, + "step": 10283 + }, + { + "epoch": 0.829957227019611, + "grad_norm": 0.7604004740715027, + "learning_rate": 9.63224822484443e-05, + "loss": 2.5298, + "step": 10284 + }, + { + "epoch": 0.830037930756194, + "grad_norm": 0.7548386454582214, + "learning_rate": 9.63067060753998e-05, + "loss": 2.5313, + "step": 10285 + }, + { + "epoch": 0.830118634492777, + "grad_norm": 0.7241540551185608, + "learning_rate": 9.629092999440183e-05, + "loss": 2.5498, + "step": 10286 + }, + { + "epoch": 0.83019933822936, + "grad_norm": 0.6748291850090027, + "learning_rate": 9.627515400584361e-05, + "loss": 2.523, + "step": 10287 + }, + { + "epoch": 0.8302800419659431, + "grad_norm": 0.6624683141708374, + "learning_rate": 9.625937811011826e-05, + "loss": 2.568, + "step": 10288 + }, + { + "epoch": 0.830360745702526, + "grad_norm": 0.6681114435195923, + "learning_rate": 9.624360230761899e-05, + "loss": 2.5255, + "step": 10289 + }, + { + "epoch": 0.830441449439109, + "grad_norm": 0.6895325183868408, + "learning_rate": 9.622782659873899e-05, + "loss": 2.5275, + "step": 10290 + }, + { + "epoch": 0.830522153175692, + "grad_norm": 0.7257826924324036, + "learning_rate": 9.621205098387137e-05, + "loss": 2.5102, + "step": 10291 + }, + { + "epoch": 0.8306028569122751, + "grad_norm": 0.6567066311836243, + "learning_rate": 9.619627546340935e-05, + "loss": 2.5721, + "step": 10292 + }, + { + "epoch": 0.8306835606488581, + "grad_norm": 0.6571428179740906, + "learning_rate": 9.61805000377461e-05, + "loss": 2.5014, + "step": 10293 + }, + { + "epoch": 0.830764264385441, + "grad_norm": 0.7807042598724365, + "learning_rate": 9.61647247072748e-05, + "loss": 2.632, + "step": 10294 + }, + { + "epoch": 0.830844968122024, + "grad_norm": 0.6688913702964783, + "learning_rate": 9.614894947238854e-05, + "loss": 2.5457, + "step": 10295 + }, + { + "epoch": 0.8309256718586071, + "grad_norm": 0.7769338488578796, + "learning_rate": 9.613317433348055e-05, + "loss": 2.4775, + "step": 10296 + }, + { + "epoch": 0.8310063755951901, + "grad_norm": 0.7089162468910217, + "learning_rate": 9.611739929094399e-05, + "loss": 2.4887, + "step": 10297 + }, + { + "epoch": 0.8310870793317731, + "grad_norm": 0.6901174783706665, + "learning_rate": 9.610162434517196e-05, + "loss": 2.6127, + "step": 10298 + }, + { + "epoch": 0.831167783068356, + "grad_norm": 0.6862173676490784, + "learning_rate": 9.608584949655764e-05, + "loss": 2.5432, + "step": 10299 + }, + { + "epoch": 0.8312484868049391, + "grad_norm": 0.6789367198944092, + "learning_rate": 9.607007474549418e-05, + "loss": 2.5135, + "step": 10300 + }, + { + "epoch": 0.8313291905415221, + "grad_norm": 0.6548805832862854, + "learning_rate": 9.605430009237474e-05, + "loss": 2.5466, + "step": 10301 + }, + { + "epoch": 0.8314098942781051, + "grad_norm": 0.6873800158500671, + "learning_rate": 9.603852553759244e-05, + "loss": 2.4954, + "step": 10302 + }, + { + "epoch": 0.831490598014688, + "grad_norm": 0.6816138029098511, + "learning_rate": 9.602275108154046e-05, + "loss": 2.5556, + "step": 10303 + }, + { + "epoch": 0.831571301751271, + "grad_norm": 0.6890314221382141, + "learning_rate": 9.600697672461189e-05, + "loss": 2.5253, + "step": 10304 + }, + { + "epoch": 0.8316520054878541, + "grad_norm": 0.6217427849769592, + "learning_rate": 9.599120246719992e-05, + "loss": 2.53, + "step": 10305 + }, + { + "epoch": 0.8317327092244371, + "grad_norm": 0.6638299226760864, + "learning_rate": 9.59754283096977e-05, + "loss": 2.5323, + "step": 10306 + }, + { + "epoch": 0.8318134129610201, + "grad_norm": 0.6834245920181274, + "learning_rate": 9.595965425249828e-05, + "loss": 2.5339, + "step": 10307 + }, + { + "epoch": 0.831894116697603, + "grad_norm": 0.8013476729393005, + "learning_rate": 9.594388029599484e-05, + "loss": 2.4925, + "step": 10308 + }, + { + "epoch": 0.8319748204341861, + "grad_norm": 0.7677187323570251, + "learning_rate": 9.592810644058049e-05, + "loss": 2.5717, + "step": 10309 + }, + { + "epoch": 0.8320555241707691, + "grad_norm": 0.6558046340942383, + "learning_rate": 9.591233268664841e-05, + "loss": 2.5631, + "step": 10310 + }, + { + "epoch": 0.8321362279073521, + "grad_norm": 0.6648481488227844, + "learning_rate": 9.589655903459165e-05, + "loss": 2.5232, + "step": 10311 + }, + { + "epoch": 0.8322169316439351, + "grad_norm": 0.6907756328582764, + "learning_rate": 9.588078548480338e-05, + "loss": 2.4804, + "step": 10312 + }, + { + "epoch": 0.8322976353805182, + "grad_norm": 0.6924928426742554, + "learning_rate": 9.586501203767675e-05, + "loss": 2.4648, + "step": 10313 + }, + { + "epoch": 0.8323783391171011, + "grad_norm": 0.7654799222946167, + "learning_rate": 9.584923869360477e-05, + "loss": 2.6184, + "step": 10314 + }, + { + "epoch": 0.8324590428536841, + "grad_norm": 0.7056179046630859, + "learning_rate": 9.58334654529806e-05, + "loss": 2.5862, + "step": 10315 + }, + { + "epoch": 0.8325397465902671, + "grad_norm": 0.7245064973831177, + "learning_rate": 9.581769231619743e-05, + "loss": 2.4866, + "step": 10316 + }, + { + "epoch": 0.8326204503268502, + "grad_norm": 0.6782355308532715, + "learning_rate": 9.580191928364824e-05, + "loss": 2.5519, + "step": 10317 + }, + { + "epoch": 0.8327011540634331, + "grad_norm": 0.6910805106163025, + "learning_rate": 9.578614635572621e-05, + "loss": 2.542, + "step": 10318 + }, + { + "epoch": 0.8327818578000161, + "grad_norm": 0.6858026385307312, + "learning_rate": 9.577037353282444e-05, + "loss": 2.5601, + "step": 10319 + }, + { + "epoch": 0.8328625615365991, + "grad_norm": 0.6886423230171204, + "learning_rate": 9.5754600815336e-05, + "loss": 2.5817, + "step": 10320 + }, + { + "epoch": 0.8329432652731822, + "grad_norm": 0.7585750818252563, + "learning_rate": 9.573882820365402e-05, + "loss": 2.5153, + "step": 10321 + }, + { + "epoch": 0.8330239690097652, + "grad_norm": 0.7004472613334656, + "learning_rate": 9.57230556981716e-05, + "loss": 2.5456, + "step": 10322 + }, + { + "epoch": 0.8331046727463481, + "grad_norm": 0.6530508399009705, + "learning_rate": 9.570728329928179e-05, + "loss": 2.5453, + "step": 10323 + }, + { + "epoch": 0.8331853764829311, + "grad_norm": 0.6767956614494324, + "learning_rate": 9.569151100737769e-05, + "loss": 2.5311, + "step": 10324 + }, + { + "epoch": 0.8332660802195142, + "grad_norm": 0.6835905909538269, + "learning_rate": 9.56757388228524e-05, + "loss": 2.5417, + "step": 10325 + }, + { + "epoch": 0.8333467839560972, + "grad_norm": 0.6582748889923096, + "learning_rate": 9.565996674609901e-05, + "loss": 2.5144, + "step": 10326 + }, + { + "epoch": 0.8334274876926802, + "grad_norm": 0.6815205216407776, + "learning_rate": 9.56441947775106e-05, + "loss": 2.5272, + "step": 10327 + }, + { + "epoch": 0.8335081914292631, + "grad_norm": 0.6810150146484375, + "learning_rate": 9.562842291748022e-05, + "loss": 2.5475, + "step": 10328 + }, + { + "epoch": 0.8335888951658462, + "grad_norm": 0.7220990657806396, + "learning_rate": 9.5612651166401e-05, + "loss": 2.54, + "step": 10329 + }, + { + "epoch": 0.8336695989024292, + "grad_norm": 0.6840164065361023, + "learning_rate": 9.559687952466596e-05, + "loss": 2.5987, + "step": 10330 + }, + { + "epoch": 0.8337503026390122, + "grad_norm": 0.7085031867027283, + "learning_rate": 9.558110799266819e-05, + "loss": 2.5674, + "step": 10331 + }, + { + "epoch": 0.8338310063755952, + "grad_norm": 0.6658117175102234, + "learning_rate": 9.55653365708008e-05, + "loss": 2.5793, + "step": 10332 + }, + { + "epoch": 0.8339117101121782, + "grad_norm": 0.782648503780365, + "learning_rate": 9.554956525945677e-05, + "loss": 2.5463, + "step": 10333 + }, + { + "epoch": 0.8339924138487612, + "grad_norm": 0.6999937891960144, + "learning_rate": 9.553379405902922e-05, + "loss": 2.5961, + "step": 10334 + }, + { + "epoch": 0.8340731175853442, + "grad_norm": 0.6681220531463623, + "learning_rate": 9.55180229699112e-05, + "loss": 2.6055, + "step": 10335 + }, + { + "epoch": 0.8341538213219272, + "grad_norm": 0.7127133011817932, + "learning_rate": 9.550225199249577e-05, + "loss": 2.5571, + "step": 10336 + }, + { + "epoch": 0.8342345250585103, + "grad_norm": 0.6939001679420471, + "learning_rate": 9.548648112717596e-05, + "loss": 2.5653, + "step": 10337 + }, + { + "epoch": 0.8343152287950932, + "grad_norm": 0.7483924031257629, + "learning_rate": 9.547071037434487e-05, + "loss": 2.5316, + "step": 10338 + }, + { + "epoch": 0.8343959325316762, + "grad_norm": 0.7975850105285645, + "learning_rate": 9.545493973439548e-05, + "loss": 2.6039, + "step": 10339 + }, + { + "epoch": 0.8344766362682592, + "grad_norm": 0.6893026232719421, + "learning_rate": 9.543916920772087e-05, + "loss": 2.5797, + "step": 10340 + }, + { + "epoch": 0.8345573400048423, + "grad_norm": 0.752869188785553, + "learning_rate": 9.542339879471409e-05, + "loss": 2.5677, + "step": 10341 + }, + { + "epoch": 0.8346380437414253, + "grad_norm": 0.7336339354515076, + "learning_rate": 9.540762849576822e-05, + "loss": 2.5212, + "step": 10342 + }, + { + "epoch": 0.8347187474780082, + "grad_norm": 0.7742713689804077, + "learning_rate": 9.539185831127621e-05, + "loss": 2.5599, + "step": 10343 + }, + { + "epoch": 0.8347994512145912, + "grad_norm": 0.7205352783203125, + "learning_rate": 9.537608824163114e-05, + "loss": 2.5591, + "step": 10344 + }, + { + "epoch": 0.8348801549511743, + "grad_norm": 0.7794787287712097, + "learning_rate": 9.536031828722605e-05, + "loss": 2.5858, + "step": 10345 + }, + { + "epoch": 0.8349608586877573, + "grad_norm": 0.7129528522491455, + "learning_rate": 9.534454844845396e-05, + "loss": 2.5591, + "step": 10346 + }, + { + "epoch": 0.8350415624243402, + "grad_norm": 0.731038510799408, + "learning_rate": 9.532877872570787e-05, + "loss": 2.5774, + "step": 10347 + }, + { + "epoch": 0.8351222661609232, + "grad_norm": 0.7706510424613953, + "learning_rate": 9.531300911938087e-05, + "loss": 2.6102, + "step": 10348 + }, + { + "epoch": 0.8352029698975063, + "grad_norm": 0.6890363097190857, + "learning_rate": 9.52972396298659e-05, + "loss": 2.5393, + "step": 10349 + }, + { + "epoch": 0.8352836736340893, + "grad_norm": 0.6792402863502502, + "learning_rate": 9.528147025755601e-05, + "loss": 2.5607, + "step": 10350 + }, + { + "epoch": 0.8353643773706723, + "grad_norm": 0.7097377777099609, + "learning_rate": 9.526570100284422e-05, + "loss": 2.5681, + "step": 10351 + }, + { + "epoch": 0.8354450811072552, + "grad_norm": 0.7530940771102905, + "learning_rate": 9.524993186612353e-05, + "loss": 2.5405, + "step": 10352 + }, + { + "epoch": 0.8355257848438382, + "grad_norm": 0.714080810546875, + "learning_rate": 9.523416284778696e-05, + "loss": 2.5365, + "step": 10353 + }, + { + "epoch": 0.8356064885804213, + "grad_norm": 0.6745832562446594, + "learning_rate": 9.521839394822752e-05, + "loss": 2.5553, + "step": 10354 + }, + { + "epoch": 0.8356871923170043, + "grad_norm": 0.7163450121879578, + "learning_rate": 9.52026251678382e-05, + "loss": 2.5074, + "step": 10355 + }, + { + "epoch": 0.8357678960535873, + "grad_norm": 0.6876534223556519, + "learning_rate": 9.518685650701197e-05, + "loss": 2.5652, + "step": 10356 + }, + { + "epoch": 0.8358485997901702, + "grad_norm": 0.6424533128738403, + "learning_rate": 9.517108796614187e-05, + "loss": 2.4823, + "step": 10357 + }, + { + "epoch": 0.8359293035267533, + "grad_norm": 0.646802544593811, + "learning_rate": 9.515531954562094e-05, + "loss": 2.5602, + "step": 10358 + }, + { + "epoch": 0.8360100072633363, + "grad_norm": 0.7266993522644043, + "learning_rate": 9.513955124584205e-05, + "loss": 2.5384, + "step": 10359 + }, + { + "epoch": 0.8360907109999193, + "grad_norm": 0.7358742356300354, + "learning_rate": 9.512378306719826e-05, + "loss": 2.5798, + "step": 10360 + }, + { + "epoch": 0.8361714147365022, + "grad_norm": 0.7191498279571533, + "learning_rate": 9.510801501008256e-05, + "loss": 2.5229, + "step": 10361 + }, + { + "epoch": 0.8362521184730853, + "grad_norm": 0.7058876156806946, + "learning_rate": 9.509224707488788e-05, + "loss": 2.5146, + "step": 10362 + }, + { + "epoch": 0.8363328222096683, + "grad_norm": 0.7348346710205078, + "learning_rate": 9.507647926200725e-05, + "loss": 2.5878, + "step": 10363 + }, + { + "epoch": 0.8364135259462513, + "grad_norm": 0.7464115619659424, + "learning_rate": 9.506071157183366e-05, + "loss": 2.6056, + "step": 10364 + }, + { + "epoch": 0.8364942296828343, + "grad_norm": 0.7077332139015198, + "learning_rate": 9.504494400476e-05, + "loss": 2.5161, + "step": 10365 + }, + { + "epoch": 0.8365749334194174, + "grad_norm": 0.7381827235221863, + "learning_rate": 9.502917656117928e-05, + "loss": 2.519, + "step": 10366 + }, + { + "epoch": 0.8366556371560003, + "grad_norm": 0.743180513381958, + "learning_rate": 9.501340924148452e-05, + "loss": 2.6149, + "step": 10367 + }, + { + "epoch": 0.8367363408925833, + "grad_norm": 0.6496078372001648, + "learning_rate": 9.499764204606863e-05, + "loss": 2.4969, + "step": 10368 + }, + { + "epoch": 0.8368170446291663, + "grad_norm": 0.6796541810035706, + "learning_rate": 9.498187497532454e-05, + "loss": 2.5304, + "step": 10369 + }, + { + "epoch": 0.8368977483657494, + "grad_norm": 0.6555948853492737, + "learning_rate": 9.496610802964529e-05, + "loss": 2.6029, + "step": 10370 + }, + { + "epoch": 0.8369784521023323, + "grad_norm": 0.6990405321121216, + "learning_rate": 9.495034120942374e-05, + "loss": 2.5286, + "step": 10371 + }, + { + "epoch": 0.8370591558389153, + "grad_norm": 0.7417613863945007, + "learning_rate": 9.49345745150529e-05, + "loss": 2.5301, + "step": 10372 + }, + { + "epoch": 0.8371398595754983, + "grad_norm": 0.6809872388839722, + "learning_rate": 9.49188079469257e-05, + "loss": 2.5075, + "step": 10373 + }, + { + "epoch": 0.8372205633120814, + "grad_norm": 0.6537099480628967, + "learning_rate": 9.490304150543514e-05, + "loss": 2.5515, + "step": 10374 + }, + { + "epoch": 0.8373012670486644, + "grad_norm": 0.6660431027412415, + "learning_rate": 9.488727519097407e-05, + "loss": 2.549, + "step": 10375 + }, + { + "epoch": 0.8373819707852473, + "grad_norm": 0.7257838249206543, + "learning_rate": 9.487150900393546e-05, + "loss": 2.546, + "step": 10376 + }, + { + "epoch": 0.8374626745218303, + "grad_norm": 0.742085874080658, + "learning_rate": 9.485574294471226e-05, + "loss": 2.5302, + "step": 10377 + }, + { + "epoch": 0.8375433782584134, + "grad_norm": 0.659934401512146, + "learning_rate": 9.48399770136974e-05, + "loss": 2.5553, + "step": 10378 + }, + { + "epoch": 0.8376240819949964, + "grad_norm": 0.7219613790512085, + "learning_rate": 9.482421121128377e-05, + "loss": 2.6186, + "step": 10379 + }, + { + "epoch": 0.8377047857315794, + "grad_norm": 0.706444263458252, + "learning_rate": 9.480844553786436e-05, + "loss": 2.5082, + "step": 10380 + }, + { + "epoch": 0.8377854894681623, + "grad_norm": 0.7527014017105103, + "learning_rate": 9.479267999383204e-05, + "loss": 2.5625, + "step": 10381 + }, + { + "epoch": 0.8378661932047454, + "grad_norm": 0.7488746643066406, + "learning_rate": 9.477691457957976e-05, + "loss": 2.528, + "step": 10382 + }, + { + "epoch": 0.8379468969413284, + "grad_norm": 0.7394229173660278, + "learning_rate": 9.476114929550045e-05, + "loss": 2.5387, + "step": 10383 + }, + { + "epoch": 0.8380276006779114, + "grad_norm": 0.7490981817245483, + "learning_rate": 9.474538414198695e-05, + "loss": 2.548, + "step": 10384 + }, + { + "epoch": 0.8381083044144944, + "grad_norm": 0.7203173041343689, + "learning_rate": 9.472961911943222e-05, + "loss": 2.5547, + "step": 10385 + }, + { + "epoch": 0.8381890081510774, + "grad_norm": 0.6929850578308105, + "learning_rate": 9.471385422822917e-05, + "loss": 2.4831, + "step": 10386 + }, + { + "epoch": 0.8382697118876604, + "grad_norm": 0.6303263902664185, + "learning_rate": 9.469808946877067e-05, + "loss": 2.4569, + "step": 10387 + }, + { + "epoch": 0.8383504156242434, + "grad_norm": 0.6986981630325317, + "learning_rate": 9.468232484144964e-05, + "loss": 2.5278, + "step": 10388 + }, + { + "epoch": 0.8384311193608264, + "grad_norm": 0.6910964846611023, + "learning_rate": 9.466656034665898e-05, + "loss": 2.5657, + "step": 10389 + }, + { + "epoch": 0.8385118230974095, + "grad_norm": 0.6571134924888611, + "learning_rate": 9.465079598479163e-05, + "loss": 2.6017, + "step": 10390 + }, + { + "epoch": 0.8385925268339924, + "grad_norm": 0.7117733359336853, + "learning_rate": 9.463503175624034e-05, + "loss": 2.56, + "step": 10391 + }, + { + "epoch": 0.8386732305705754, + "grad_norm": 0.7052998542785645, + "learning_rate": 9.461926766139813e-05, + "loss": 2.4998, + "step": 10392 + }, + { + "epoch": 0.8387539343071584, + "grad_norm": 0.7306597232818604, + "learning_rate": 9.460350370065786e-05, + "loss": 2.5292, + "step": 10393 + }, + { + "epoch": 0.8388346380437415, + "grad_norm": 0.681069552898407, + "learning_rate": 9.458773987441235e-05, + "loss": 2.5469, + "step": 10394 + }, + { + "epoch": 0.8389153417803245, + "grad_norm": 0.6681767702102661, + "learning_rate": 9.45719761830545e-05, + "loss": 2.5476, + "step": 10395 + }, + { + "epoch": 0.8389960455169074, + "grad_norm": 0.6759339570999146, + "learning_rate": 9.455621262697723e-05, + "loss": 2.4806, + "step": 10396 + }, + { + "epoch": 0.8390767492534904, + "grad_norm": 0.695829451084137, + "learning_rate": 9.454044920657333e-05, + "loss": 2.5255, + "step": 10397 + }, + { + "epoch": 0.8391574529900735, + "grad_norm": 0.686568558216095, + "learning_rate": 9.452468592223572e-05, + "loss": 2.5655, + "step": 10398 + }, + { + "epoch": 0.8392381567266565, + "grad_norm": 0.6529035568237305, + "learning_rate": 9.45089227743573e-05, + "loss": 2.5026, + "step": 10399 + }, + { + "epoch": 0.8393188604632394, + "grad_norm": 0.6809061765670776, + "learning_rate": 9.449315976333082e-05, + "loss": 2.5549, + "step": 10400 + }, + { + "epoch": 0.8393995641998224, + "grad_norm": 0.6920269727706909, + "learning_rate": 9.447739688954919e-05, + "loss": 2.517, + "step": 10401 + }, + { + "epoch": 0.8394802679364055, + "grad_norm": 0.6626712083816528, + "learning_rate": 9.446163415340526e-05, + "loss": 2.605, + "step": 10402 + }, + { + "epoch": 0.8395609716729885, + "grad_norm": 0.6912916898727417, + "learning_rate": 9.444587155529195e-05, + "loss": 2.588, + "step": 10403 + }, + { + "epoch": 0.8396416754095715, + "grad_norm": 0.6771352291107178, + "learning_rate": 9.443010909560198e-05, + "loss": 2.5148, + "step": 10404 + }, + { + "epoch": 0.8397223791461544, + "grad_norm": 0.7015509009361267, + "learning_rate": 9.441434677472827e-05, + "loss": 2.5425, + "step": 10405 + }, + { + "epoch": 0.8398030828827374, + "grad_norm": 0.6789976358413696, + "learning_rate": 9.439858459306364e-05, + "loss": 2.598, + "step": 10406 + }, + { + "epoch": 0.8398837866193205, + "grad_norm": 0.674391508102417, + "learning_rate": 9.438282255100091e-05, + "loss": 2.5581, + "step": 10407 + }, + { + "epoch": 0.8399644903559035, + "grad_norm": 0.6944772005081177, + "learning_rate": 9.436706064893294e-05, + "loss": 2.5591, + "step": 10408 + }, + { + "epoch": 0.8400451940924865, + "grad_norm": 0.6750832200050354, + "learning_rate": 9.435129888725259e-05, + "loss": 2.533, + "step": 10409 + }, + { + "epoch": 0.8401258978290694, + "grad_norm": 0.6927465200424194, + "learning_rate": 9.433553726635257e-05, + "loss": 2.536, + "step": 10410 + }, + { + "epoch": 0.8402066015656525, + "grad_norm": 0.6399651765823364, + "learning_rate": 9.431977578662578e-05, + "loss": 2.5123, + "step": 10411 + }, + { + "epoch": 0.8402873053022355, + "grad_norm": 0.7588143944740295, + "learning_rate": 9.430401444846505e-05, + "loss": 2.6133, + "step": 10412 + }, + { + "epoch": 0.8403680090388185, + "grad_norm": 0.8010972738265991, + "learning_rate": 9.428825325226313e-05, + "loss": 2.5407, + "step": 10413 + }, + { + "epoch": 0.8404487127754015, + "grad_norm": 0.6847307085990906, + "learning_rate": 9.427249219841288e-05, + "loss": 2.5912, + "step": 10414 + }, + { + "epoch": 0.8405294165119845, + "grad_norm": 0.7005963325500488, + "learning_rate": 9.425673128730716e-05, + "loss": 2.5059, + "step": 10415 + }, + { + "epoch": 0.8406101202485675, + "grad_norm": 0.7383962273597717, + "learning_rate": 9.424097051933862e-05, + "loss": 2.5157, + "step": 10416 + }, + { + "epoch": 0.8406908239851505, + "grad_norm": 0.7078843712806702, + "learning_rate": 9.422520989490018e-05, + "loss": 2.6093, + "step": 10417 + }, + { + "epoch": 0.8407715277217335, + "grad_norm": 0.7449501752853394, + "learning_rate": 9.42094494143846e-05, + "loss": 2.594, + "step": 10418 + }, + { + "epoch": 0.8408522314583166, + "grad_norm": 0.6823872923851013, + "learning_rate": 9.419368907818473e-05, + "loss": 2.5653, + "step": 10419 + }, + { + "epoch": 0.8409329351948995, + "grad_norm": 0.7403056025505066, + "learning_rate": 9.417792888669325e-05, + "loss": 2.5296, + "step": 10420 + }, + { + "epoch": 0.8410136389314825, + "grad_norm": 0.6858980655670166, + "learning_rate": 9.4162168840303e-05, + "loss": 2.5401, + "step": 10421 + }, + { + "epoch": 0.8410943426680655, + "grad_norm": 0.692348837852478, + "learning_rate": 9.41464089394068e-05, + "loss": 2.4797, + "step": 10422 + }, + { + "epoch": 0.8411750464046486, + "grad_norm": 0.6939836144447327, + "learning_rate": 9.413064918439736e-05, + "loss": 2.505, + "step": 10423 + }, + { + "epoch": 0.8412557501412316, + "grad_norm": 0.7334314584732056, + "learning_rate": 9.411488957566748e-05, + "loss": 2.5792, + "step": 10424 + }, + { + "epoch": 0.8413364538778145, + "grad_norm": 0.6977920532226562, + "learning_rate": 9.409913011360999e-05, + "loss": 2.5204, + "step": 10425 + }, + { + "epoch": 0.8414171576143975, + "grad_norm": 0.7121822834014893, + "learning_rate": 9.408337079861756e-05, + "loss": 2.571, + "step": 10426 + }, + { + "epoch": 0.8414978613509806, + "grad_norm": 0.761476993560791, + "learning_rate": 9.406761163108297e-05, + "loss": 2.5845, + "step": 10427 + }, + { + "epoch": 0.8415785650875636, + "grad_norm": 0.7160221934318542, + "learning_rate": 9.405185261139906e-05, + "loss": 2.5331, + "step": 10428 + }, + { + "epoch": 0.8416592688241465, + "grad_norm": 0.6828827857971191, + "learning_rate": 9.40360937399585e-05, + "loss": 2.5596, + "step": 10429 + }, + { + "epoch": 0.8417399725607295, + "grad_norm": 0.756473183631897, + "learning_rate": 9.402033501715406e-05, + "loss": 2.6107, + "step": 10430 + }, + { + "epoch": 0.8418206762973126, + "grad_norm": 0.7486895322799683, + "learning_rate": 9.400457644337853e-05, + "loss": 2.5388, + "step": 10431 + }, + { + "epoch": 0.8419013800338956, + "grad_norm": 0.7759146690368652, + "learning_rate": 9.398881801902461e-05, + "loss": 2.5559, + "step": 10432 + }, + { + "epoch": 0.8419820837704786, + "grad_norm": 0.71756911277771, + "learning_rate": 9.397305974448506e-05, + "loss": 2.6109, + "step": 10433 + }, + { + "epoch": 0.8420627875070615, + "grad_norm": 0.7741644382476807, + "learning_rate": 9.395730162015261e-05, + "loss": 2.5664, + "step": 10434 + }, + { + "epoch": 0.8421434912436446, + "grad_norm": 0.7155938744544983, + "learning_rate": 9.394154364642006e-05, + "loss": 2.5693, + "step": 10435 + }, + { + "epoch": 0.8422241949802276, + "grad_norm": 0.6862725019454956, + "learning_rate": 9.392578582368002e-05, + "loss": 2.4942, + "step": 10436 + }, + { + "epoch": 0.8423048987168106, + "grad_norm": 0.6698417067527771, + "learning_rate": 9.391002815232528e-05, + "loss": 2.5258, + "step": 10437 + }, + { + "epoch": 0.8423856024533936, + "grad_norm": 0.7756468057632446, + "learning_rate": 9.389427063274858e-05, + "loss": 2.5008, + "step": 10438 + }, + { + "epoch": 0.8424663061899766, + "grad_norm": 0.6579857468605042, + "learning_rate": 9.387851326534259e-05, + "loss": 2.5335, + "step": 10439 + }, + { + "epoch": 0.8425470099265596, + "grad_norm": 0.7673436403274536, + "learning_rate": 9.386275605050006e-05, + "loss": 2.5646, + "step": 10440 + }, + { + "epoch": 0.8426277136631426, + "grad_norm": 0.7377188205718994, + "learning_rate": 9.384699898861372e-05, + "loss": 2.568, + "step": 10441 + }, + { + "epoch": 0.8427084173997256, + "grad_norm": 0.6502123475074768, + "learning_rate": 9.38312420800762e-05, + "loss": 2.6091, + "step": 10442 + }, + { + "epoch": 0.8427891211363087, + "grad_norm": 0.729852020740509, + "learning_rate": 9.381548532528026e-05, + "loss": 2.4873, + "step": 10443 + }, + { + "epoch": 0.8428698248728916, + "grad_norm": 0.7419102191925049, + "learning_rate": 9.379972872461865e-05, + "loss": 2.4966, + "step": 10444 + }, + { + "epoch": 0.8429505286094746, + "grad_norm": 0.6921093463897705, + "learning_rate": 9.378397227848395e-05, + "loss": 2.4895, + "step": 10445 + }, + { + "epoch": 0.8430312323460576, + "grad_norm": 0.7697325944900513, + "learning_rate": 9.376821598726892e-05, + "loss": 2.5779, + "step": 10446 + }, + { + "epoch": 0.8431119360826407, + "grad_norm": 0.6441029906272888, + "learning_rate": 9.375245985136626e-05, + "loss": 2.4909, + "step": 10447 + }, + { + "epoch": 0.8431926398192237, + "grad_norm": 0.6962057948112488, + "learning_rate": 9.373670387116861e-05, + "loss": 2.5602, + "step": 10448 + }, + { + "epoch": 0.8432733435558066, + "grad_norm": 0.7030641436576843, + "learning_rate": 9.372094804706867e-05, + "loss": 2.5641, + "step": 10449 + }, + { + "epoch": 0.8433540472923896, + "grad_norm": 0.6969063878059387, + "learning_rate": 9.370519237945912e-05, + "loss": 2.5555, + "step": 10450 + }, + { + "epoch": 0.8434347510289727, + "grad_norm": 0.7169879674911499, + "learning_rate": 9.368943686873267e-05, + "loss": 2.5258, + "step": 10451 + }, + { + "epoch": 0.8435154547655557, + "grad_norm": 0.7198735475540161, + "learning_rate": 9.36736815152819e-05, + "loss": 2.5192, + "step": 10452 + }, + { + "epoch": 0.8435961585021386, + "grad_norm": 0.6613535284996033, + "learning_rate": 9.365792631949951e-05, + "loss": 2.5596, + "step": 10453 + }, + { + "epoch": 0.8436768622387216, + "grad_norm": 0.6377065777778625, + "learning_rate": 9.364217128177824e-05, + "loss": 2.5518, + "step": 10454 + }, + { + "epoch": 0.8437575659753046, + "grad_norm": 0.6670635938644409, + "learning_rate": 9.362641640251063e-05, + "loss": 2.4793, + "step": 10455 + }, + { + "epoch": 0.8438382697118877, + "grad_norm": 0.6556122899055481, + "learning_rate": 9.361066168208939e-05, + "loss": 2.5492, + "step": 10456 + }, + { + "epoch": 0.8439189734484707, + "grad_norm": 0.7262280583381653, + "learning_rate": 9.35949071209072e-05, + "loss": 2.6059, + "step": 10457 + }, + { + "epoch": 0.8439996771850536, + "grad_norm": 0.702953040599823, + "learning_rate": 9.357915271935662e-05, + "loss": 2.5445, + "step": 10458 + }, + { + "epoch": 0.8440803809216366, + "grad_norm": 0.6619930267333984, + "learning_rate": 9.356339847783036e-05, + "loss": 2.5688, + "step": 10459 + }, + { + "epoch": 0.8441610846582197, + "grad_norm": 0.7038032412528992, + "learning_rate": 9.354764439672106e-05, + "loss": 2.5195, + "step": 10460 + }, + { + "epoch": 0.8442417883948027, + "grad_norm": 0.6615132689476013, + "learning_rate": 9.353189047642129e-05, + "loss": 2.5176, + "step": 10461 + }, + { + "epoch": 0.8443224921313857, + "grad_norm": 0.6524826288223267, + "learning_rate": 9.351613671732372e-05, + "loss": 2.4294, + "step": 10462 + }, + { + "epoch": 0.8444031958679686, + "grad_norm": 0.6526279449462891, + "learning_rate": 9.350038311982099e-05, + "loss": 2.595, + "step": 10463 + }, + { + "epoch": 0.8444838996045517, + "grad_norm": 0.6610859632492065, + "learning_rate": 9.348462968430569e-05, + "loss": 2.5311, + "step": 10464 + }, + { + "epoch": 0.8445646033411347, + "grad_norm": 0.6835470795631409, + "learning_rate": 9.346887641117045e-05, + "loss": 2.5694, + "step": 10465 + }, + { + "epoch": 0.8446453070777177, + "grad_norm": 0.6768551468849182, + "learning_rate": 9.345312330080787e-05, + "loss": 2.6082, + "step": 10466 + }, + { + "epoch": 0.8447260108143007, + "grad_norm": 0.6368672847747803, + "learning_rate": 9.343737035361059e-05, + "loss": 2.5221, + "step": 10467 + }, + { + "epoch": 0.8448067145508837, + "grad_norm": 0.6952844858169556, + "learning_rate": 9.34216175699712e-05, + "loss": 2.5003, + "step": 10468 + }, + { + "epoch": 0.8448874182874667, + "grad_norm": 0.6663931012153625, + "learning_rate": 9.340586495028227e-05, + "loss": 2.5469, + "step": 10469 + }, + { + "epoch": 0.8449681220240497, + "grad_norm": 0.6840688586235046, + "learning_rate": 9.339011249493647e-05, + "loss": 2.5499, + "step": 10470 + }, + { + "epoch": 0.8450488257606327, + "grad_norm": 0.6832869052886963, + "learning_rate": 9.337436020432632e-05, + "loss": 2.5492, + "step": 10471 + }, + { + "epoch": 0.8451295294972158, + "grad_norm": 0.7444044947624207, + "learning_rate": 9.335860807884442e-05, + "loss": 2.5791, + "step": 10472 + }, + { + "epoch": 0.8452102332337987, + "grad_norm": 0.6821839809417725, + "learning_rate": 9.334285611888339e-05, + "loss": 2.4772, + "step": 10473 + }, + { + "epoch": 0.8452909369703817, + "grad_norm": 0.6209141612052917, + "learning_rate": 9.332710432483577e-05, + "loss": 2.5656, + "step": 10474 + }, + { + "epoch": 0.8453716407069647, + "grad_norm": 0.6531212329864502, + "learning_rate": 9.331135269709415e-05, + "loss": 2.5285, + "step": 10475 + }, + { + "epoch": 0.8454523444435478, + "grad_norm": 0.6418079137802124, + "learning_rate": 9.329560123605115e-05, + "loss": 2.5503, + "step": 10476 + }, + { + "epoch": 0.8455330481801308, + "grad_norm": 0.6636360287666321, + "learning_rate": 9.327984994209924e-05, + "loss": 2.528, + "step": 10477 + }, + { + "epoch": 0.8456137519167137, + "grad_norm": 0.6196488738059998, + "learning_rate": 9.326409881563102e-05, + "loss": 2.4907, + "step": 10478 + }, + { + "epoch": 0.8456944556532967, + "grad_norm": 0.6339137554168701, + "learning_rate": 9.324834785703913e-05, + "loss": 2.4672, + "step": 10479 + }, + { + "epoch": 0.8457751593898798, + "grad_norm": 0.6803932189941406, + "learning_rate": 9.323259706671602e-05, + "loss": 2.5538, + "step": 10480 + }, + { + "epoch": 0.8458558631264628, + "grad_norm": 0.6815275549888611, + "learning_rate": 9.321684644505429e-05, + "loss": 2.5291, + "step": 10481 + }, + { + "epoch": 0.8459365668630457, + "grad_norm": 0.6497374773025513, + "learning_rate": 9.320109599244646e-05, + "loss": 2.5499, + "step": 10482 + }, + { + "epoch": 0.8460172705996287, + "grad_norm": 0.7966926097869873, + "learning_rate": 9.318534570928512e-05, + "loss": 2.523, + "step": 10483 + }, + { + "epoch": 0.8460979743362118, + "grad_norm": 0.6532156467437744, + "learning_rate": 9.316959559596276e-05, + "loss": 2.5138, + "step": 10484 + }, + { + "epoch": 0.8461786780727948, + "grad_norm": 0.7292522192001343, + "learning_rate": 9.315384565287193e-05, + "loss": 2.5413, + "step": 10485 + }, + { + "epoch": 0.8462593818093778, + "grad_norm": 0.7610795497894287, + "learning_rate": 9.313809588040519e-05, + "loss": 2.5071, + "step": 10486 + }, + { + "epoch": 0.8463400855459607, + "grad_norm": 0.7038258910179138, + "learning_rate": 9.312234627895502e-05, + "loss": 2.5568, + "step": 10487 + }, + { + "epoch": 0.8464207892825438, + "grad_norm": 0.7136046290397644, + "learning_rate": 9.310659684891395e-05, + "loss": 2.5372, + "step": 10488 + }, + { + "epoch": 0.8465014930191268, + "grad_norm": 0.7512896060943604, + "learning_rate": 9.309084759067452e-05, + "loss": 2.5821, + "step": 10489 + }, + { + "epoch": 0.8465821967557098, + "grad_norm": 0.7436400651931763, + "learning_rate": 9.307509850462922e-05, + "loss": 2.5489, + "step": 10490 + }, + { + "epoch": 0.8466629004922928, + "grad_norm": 0.6858603954315186, + "learning_rate": 9.305934959117056e-05, + "loss": 2.5622, + "step": 10491 + }, + { + "epoch": 0.8467436042288758, + "grad_norm": 0.707185685634613, + "learning_rate": 9.304360085069107e-05, + "loss": 2.5275, + "step": 10492 + }, + { + "epoch": 0.8468243079654588, + "grad_norm": 0.7207933068275452, + "learning_rate": 9.302785228358322e-05, + "loss": 2.5877, + "step": 10493 + }, + { + "epoch": 0.8469050117020418, + "grad_norm": 0.6470080614089966, + "learning_rate": 9.30121038902395e-05, + "loss": 2.5117, + "step": 10494 + }, + { + "epoch": 0.8469857154386248, + "grad_norm": 0.75248783826828, + "learning_rate": 9.299635567105247e-05, + "loss": 2.5259, + "step": 10495 + }, + { + "epoch": 0.8470664191752079, + "grad_norm": 0.7150708436965942, + "learning_rate": 9.298060762641452e-05, + "loss": 2.551, + "step": 10496 + }, + { + "epoch": 0.8471471229117908, + "grad_norm": 0.6865069270133972, + "learning_rate": 9.296485975671818e-05, + "loss": 2.5184, + "step": 10497 + }, + { + "epoch": 0.8472278266483738, + "grad_norm": 0.7188237309455872, + "learning_rate": 9.294911206235593e-05, + "loss": 2.5207, + "step": 10498 + }, + { + "epoch": 0.8473085303849568, + "grad_norm": 0.6907880902290344, + "learning_rate": 9.293336454372026e-05, + "loss": 2.5544, + "step": 10499 + }, + { + "epoch": 0.8473892341215399, + "grad_norm": 0.7626079320907593, + "learning_rate": 9.291761720120358e-05, + "loss": 2.5741, + "step": 10500 + }, + { + "epoch": 0.8474699378581229, + "grad_norm": 0.6731963753700256, + "learning_rate": 9.29018700351984e-05, + "loss": 2.5433, + "step": 10501 + }, + { + "epoch": 0.8475506415947058, + "grad_norm": 0.7256288528442383, + "learning_rate": 9.288612304609723e-05, + "loss": 2.5131, + "step": 10502 + }, + { + "epoch": 0.8476313453312888, + "grad_norm": 0.7129119634628296, + "learning_rate": 9.287037623429242e-05, + "loss": 2.5054, + "step": 10503 + }, + { + "epoch": 0.8477120490678719, + "grad_norm": 0.6711156964302063, + "learning_rate": 9.285462960017644e-05, + "loss": 2.5671, + "step": 10504 + }, + { + "epoch": 0.8477927528044549, + "grad_norm": 0.7268081903457642, + "learning_rate": 9.283888314414184e-05, + "loss": 2.5627, + "step": 10505 + }, + { + "epoch": 0.8478734565410379, + "grad_norm": 0.8635050058364868, + "learning_rate": 9.282313686658094e-05, + "loss": 2.517, + "step": 10506 + }, + { + "epoch": 0.8479541602776208, + "grad_norm": 0.7077138423919678, + "learning_rate": 9.280739076788624e-05, + "loss": 2.5551, + "step": 10507 + }, + { + "epoch": 0.8480348640142038, + "grad_norm": 0.6312204599380493, + "learning_rate": 9.279164484845018e-05, + "loss": 2.5329, + "step": 10508 + }, + { + "epoch": 0.8481155677507869, + "grad_norm": 0.6749829649925232, + "learning_rate": 9.277589910866516e-05, + "loss": 2.5092, + "step": 10509 + }, + { + "epoch": 0.8481962714873699, + "grad_norm": 0.753391683101654, + "learning_rate": 9.27601535489236e-05, + "loss": 2.6244, + "step": 10510 + }, + { + "epoch": 0.8482769752239528, + "grad_norm": 0.7230119109153748, + "learning_rate": 9.2744408169618e-05, + "loss": 2.5021, + "step": 10511 + }, + { + "epoch": 0.8483576789605358, + "grad_norm": 0.6759157776832581, + "learning_rate": 9.272866297114067e-05, + "loss": 2.5399, + "step": 10512 + }, + { + "epoch": 0.8484383826971189, + "grad_norm": 0.7049473524093628, + "learning_rate": 9.271291795388406e-05, + "loss": 2.5024, + "step": 10513 + }, + { + "epoch": 0.8485190864337019, + "grad_norm": 0.6579850912094116, + "learning_rate": 9.269717311824058e-05, + "loss": 2.5019, + "step": 10514 + }, + { + "epoch": 0.8485997901702849, + "grad_norm": 0.7091391086578369, + "learning_rate": 9.268142846460265e-05, + "loss": 2.5785, + "step": 10515 + }, + { + "epoch": 0.8486804939068678, + "grad_norm": 0.6612898707389832, + "learning_rate": 9.266568399336266e-05, + "loss": 2.5046, + "step": 10516 + }, + { + "epoch": 0.8487611976434509, + "grad_norm": 0.6348623633384705, + "learning_rate": 9.264993970491298e-05, + "loss": 2.543, + "step": 10517 + }, + { + "epoch": 0.8488419013800339, + "grad_norm": 0.688360869884491, + "learning_rate": 9.263419559964604e-05, + "loss": 2.5294, + "step": 10518 + }, + { + "epoch": 0.8489226051166169, + "grad_norm": 0.6483190059661865, + "learning_rate": 9.261845167795418e-05, + "loss": 2.5623, + "step": 10519 + }, + { + "epoch": 0.8490033088531999, + "grad_norm": 0.689379096031189, + "learning_rate": 9.26027079402298e-05, + "loss": 2.4871, + "step": 10520 + }, + { + "epoch": 0.8490840125897829, + "grad_norm": 0.6627655625343323, + "learning_rate": 9.25869643868653e-05, + "loss": 2.5353, + "step": 10521 + }, + { + "epoch": 0.8491647163263659, + "grad_norm": 0.6701192259788513, + "learning_rate": 9.2571221018253e-05, + "loss": 2.5003, + "step": 10522 + }, + { + "epoch": 0.8492454200629489, + "grad_norm": 0.7413944005966187, + "learning_rate": 9.255547783478529e-05, + "loss": 2.5473, + "step": 10523 + }, + { + "epoch": 0.8493261237995319, + "grad_norm": 0.6490365266799927, + "learning_rate": 9.253973483685455e-05, + "loss": 2.5168, + "step": 10524 + }, + { + "epoch": 0.849406827536115, + "grad_norm": 0.7303688526153564, + "learning_rate": 9.25239920248531e-05, + "loss": 2.5953, + "step": 10525 + }, + { + "epoch": 0.8494875312726979, + "grad_norm": 0.7132991552352905, + "learning_rate": 9.250824939917331e-05, + "loss": 2.475, + "step": 10526 + }, + { + "epoch": 0.8495682350092809, + "grad_norm": 0.6935676336288452, + "learning_rate": 9.249250696020753e-05, + "loss": 2.5212, + "step": 10527 + }, + { + "epoch": 0.8496489387458639, + "grad_norm": 0.732961118221283, + "learning_rate": 9.247676470834814e-05, + "loss": 2.5848, + "step": 10528 + }, + { + "epoch": 0.849729642482447, + "grad_norm": 0.6899160146713257, + "learning_rate": 9.246102264398739e-05, + "loss": 2.4551, + "step": 10529 + }, + { + "epoch": 0.84981034621903, + "grad_norm": 0.6941123604774475, + "learning_rate": 9.244528076751766e-05, + "loss": 2.5441, + "step": 10530 + }, + { + "epoch": 0.8498910499556129, + "grad_norm": 0.7351016998291016, + "learning_rate": 9.242953907933134e-05, + "loss": 2.6519, + "step": 10531 + }, + { + "epoch": 0.8499717536921959, + "grad_norm": 0.7156691551208496, + "learning_rate": 9.241379757982065e-05, + "loss": 2.573, + "step": 10532 + }, + { + "epoch": 0.850052457428779, + "grad_norm": 0.7137688994407654, + "learning_rate": 9.239805626937797e-05, + "loss": 2.5688, + "step": 10533 + }, + { + "epoch": 0.850133161165362, + "grad_norm": 0.7018687129020691, + "learning_rate": 9.238231514839559e-05, + "loss": 2.5725, + "step": 10534 + }, + { + "epoch": 0.850213864901945, + "grad_norm": 0.6723659634590149, + "learning_rate": 9.236657421726583e-05, + "loss": 2.5661, + "step": 10535 + }, + { + "epoch": 0.8502945686385279, + "grad_norm": 0.7105850577354431, + "learning_rate": 9.235083347638098e-05, + "loss": 2.5676, + "step": 10536 + }, + { + "epoch": 0.850375272375111, + "grad_norm": 0.682601809501648, + "learning_rate": 9.233509292613341e-05, + "loss": 2.5489, + "step": 10537 + }, + { + "epoch": 0.850455976111694, + "grad_norm": 0.6703988313674927, + "learning_rate": 9.231935256691531e-05, + "loss": 2.5349, + "step": 10538 + }, + { + "epoch": 0.850536679848277, + "grad_norm": 0.6430882215499878, + "learning_rate": 9.230361239911903e-05, + "loss": 2.4959, + "step": 10539 + }, + { + "epoch": 0.8506173835848599, + "grad_norm": 0.7164519429206848, + "learning_rate": 9.228787242313687e-05, + "loss": 2.4999, + "step": 10540 + }, + { + "epoch": 0.850698087321443, + "grad_norm": 0.7463028430938721, + "learning_rate": 9.227213263936107e-05, + "loss": 2.545, + "step": 10541 + }, + { + "epoch": 0.850778791058026, + "grad_norm": 0.650577187538147, + "learning_rate": 9.22563930481839e-05, + "loss": 2.5707, + "step": 10542 + }, + { + "epoch": 0.850859494794609, + "grad_norm": 0.6808211207389832, + "learning_rate": 9.224065364999768e-05, + "loss": 2.5236, + "step": 10543 + }, + { + "epoch": 0.850940198531192, + "grad_norm": 0.6947758793830872, + "learning_rate": 9.222491444519467e-05, + "loss": 2.555, + "step": 10544 + }, + { + "epoch": 0.851020902267775, + "grad_norm": 0.6805624961853027, + "learning_rate": 9.22091754341671e-05, + "loss": 2.517, + "step": 10545 + }, + { + "epoch": 0.851101606004358, + "grad_norm": 0.6645655035972595, + "learning_rate": 9.219343661730724e-05, + "loss": 2.5237, + "step": 10546 + }, + { + "epoch": 0.851182309740941, + "grad_norm": 0.6912586092948914, + "learning_rate": 9.217769799500738e-05, + "loss": 2.5345, + "step": 10547 + }, + { + "epoch": 0.851263013477524, + "grad_norm": 0.6713781356811523, + "learning_rate": 9.21619595676597e-05, + "loss": 2.56, + "step": 10548 + }, + { + "epoch": 0.8513437172141071, + "grad_norm": 0.7031502723693848, + "learning_rate": 9.214622133565648e-05, + "loss": 2.4885, + "step": 10549 + }, + { + "epoch": 0.85142442095069, + "grad_norm": 0.6616455316543579, + "learning_rate": 9.213048329938997e-05, + "loss": 2.5101, + "step": 10550 + }, + { + "epoch": 0.851505124687273, + "grad_norm": 0.711077094078064, + "learning_rate": 9.211474545925236e-05, + "loss": 2.6264, + "step": 10551 + }, + { + "epoch": 0.851585828423856, + "grad_norm": 0.7534502744674683, + "learning_rate": 9.209900781563592e-05, + "loss": 2.5417, + "step": 10552 + }, + { + "epoch": 0.8516665321604391, + "grad_norm": 0.7405222058296204, + "learning_rate": 9.208327036893288e-05, + "loss": 2.546, + "step": 10553 + }, + { + "epoch": 0.8517472358970221, + "grad_norm": 0.7014057040214539, + "learning_rate": 9.20675331195354e-05, + "loss": 2.5211, + "step": 10554 + }, + { + "epoch": 0.851827939633605, + "grad_norm": 0.6984074115753174, + "learning_rate": 9.205179606783573e-05, + "loss": 2.5181, + "step": 10555 + }, + { + "epoch": 0.851908643370188, + "grad_norm": 0.7312670350074768, + "learning_rate": 9.203605921422613e-05, + "loss": 2.5345, + "step": 10556 + }, + { + "epoch": 0.851989347106771, + "grad_norm": 0.6861104369163513, + "learning_rate": 9.202032255909871e-05, + "loss": 2.5426, + "step": 10557 + }, + { + "epoch": 0.8520700508433541, + "grad_norm": 0.6989030838012695, + "learning_rate": 9.200458610284571e-05, + "loss": 2.5221, + "step": 10558 + }, + { + "epoch": 0.852150754579937, + "grad_norm": 0.6645115613937378, + "learning_rate": 9.198884984585932e-05, + "loss": 2.4755, + "step": 10559 + }, + { + "epoch": 0.85223145831652, + "grad_norm": 0.6577785015106201, + "learning_rate": 9.197311378853176e-05, + "loss": 2.5491, + "step": 10560 + }, + { + "epoch": 0.852312162053103, + "grad_norm": 0.7311568856239319, + "learning_rate": 9.195737793125517e-05, + "loss": 2.5653, + "step": 10561 + }, + { + "epoch": 0.8523928657896861, + "grad_norm": 0.6469970345497131, + "learning_rate": 9.194164227442174e-05, + "loss": 2.5384, + "step": 10562 + }, + { + "epoch": 0.8524735695262691, + "grad_norm": 0.6562933325767517, + "learning_rate": 9.19259068184237e-05, + "loss": 2.5644, + "step": 10563 + }, + { + "epoch": 0.852554273262852, + "grad_norm": 0.7740273475646973, + "learning_rate": 9.19101715636531e-05, + "loss": 2.5868, + "step": 10564 + }, + { + "epoch": 0.852634976999435, + "grad_norm": 0.6461195349693298, + "learning_rate": 9.18944365105022e-05, + "loss": 2.4862, + "step": 10565 + }, + { + "epoch": 0.8527156807360181, + "grad_norm": 0.7230537533760071, + "learning_rate": 9.187870165936313e-05, + "loss": 2.5125, + "step": 10566 + }, + { + "epoch": 0.8527963844726011, + "grad_norm": 0.6858233213424683, + "learning_rate": 9.186296701062805e-05, + "loss": 2.5463, + "step": 10567 + }, + { + "epoch": 0.8528770882091841, + "grad_norm": 0.717407763004303, + "learning_rate": 9.184723256468908e-05, + "loss": 2.5399, + "step": 10568 + }, + { + "epoch": 0.852957791945767, + "grad_norm": 0.7537745237350464, + "learning_rate": 9.18314983219384e-05, + "loss": 2.5164, + "step": 10569 + }, + { + "epoch": 0.8530384956823501, + "grad_norm": 0.7068665027618408, + "learning_rate": 9.181576428276814e-05, + "loss": 2.5747, + "step": 10570 + }, + { + "epoch": 0.8531191994189331, + "grad_norm": 0.8013456463813782, + "learning_rate": 9.18000304475704e-05, + "loss": 2.5401, + "step": 10571 + }, + { + "epoch": 0.8531999031555161, + "grad_norm": 0.6458969712257385, + "learning_rate": 9.178429681673741e-05, + "loss": 2.4781, + "step": 10572 + }, + { + "epoch": 0.8532806068920991, + "grad_norm": 0.7235112190246582, + "learning_rate": 9.176856339066114e-05, + "loss": 2.5753, + "step": 10573 + }, + { + "epoch": 0.8533613106286821, + "grad_norm": 0.6815706491470337, + "learning_rate": 9.175283016973382e-05, + "loss": 2.5526, + "step": 10574 + }, + { + "epoch": 0.8534420143652651, + "grad_norm": 0.739747166633606, + "learning_rate": 9.173709715434751e-05, + "loss": 2.5631, + "step": 10575 + }, + { + "epoch": 0.8535227181018481, + "grad_norm": 0.7325060963630676, + "learning_rate": 9.172136434489437e-05, + "loss": 2.4925, + "step": 10576 + }, + { + "epoch": 0.8536034218384311, + "grad_norm": 0.6505454182624817, + "learning_rate": 9.170563174176645e-05, + "loss": 2.5423, + "step": 10577 + }, + { + "epoch": 0.8536841255750142, + "grad_norm": 0.7267098426818848, + "learning_rate": 9.168989934535586e-05, + "loss": 2.5687, + "step": 10578 + }, + { + "epoch": 0.8537648293115971, + "grad_norm": 0.7264497876167297, + "learning_rate": 9.167416715605476e-05, + "loss": 2.5165, + "step": 10579 + }, + { + "epoch": 0.8538455330481801, + "grad_norm": 0.7473852634429932, + "learning_rate": 9.165843517425509e-05, + "loss": 2.5837, + "step": 10580 + }, + { + "epoch": 0.8539262367847631, + "grad_norm": 0.7249133586883545, + "learning_rate": 9.164270340034906e-05, + "loss": 2.5805, + "step": 10581 + }, + { + "epoch": 0.8540069405213462, + "grad_norm": 0.7463760375976562, + "learning_rate": 9.162697183472875e-05, + "loss": 2.5067, + "step": 10582 + }, + { + "epoch": 0.8540876442579292, + "grad_norm": 0.7125511169433594, + "learning_rate": 9.161124047778614e-05, + "loss": 2.5093, + "step": 10583 + }, + { + "epoch": 0.8541683479945121, + "grad_norm": 0.7247455716133118, + "learning_rate": 9.159550932991335e-05, + "loss": 2.5356, + "step": 10584 + }, + { + "epoch": 0.8542490517310951, + "grad_norm": 0.7593860030174255, + "learning_rate": 9.157977839150246e-05, + "loss": 2.5477, + "step": 10585 + }, + { + "epoch": 0.8543297554676782, + "grad_norm": 0.6758295297622681, + "learning_rate": 9.156404766294547e-05, + "loss": 2.4748, + "step": 10586 + }, + { + "epoch": 0.8544104592042612, + "grad_norm": 0.7114073634147644, + "learning_rate": 9.154831714463447e-05, + "loss": 2.5479, + "step": 10587 + }, + { + "epoch": 0.8544911629408442, + "grad_norm": 0.6881263256072998, + "learning_rate": 9.153258683696156e-05, + "loss": 2.5471, + "step": 10588 + }, + { + "epoch": 0.8545718666774271, + "grad_norm": 0.6509317755699158, + "learning_rate": 9.151685674031866e-05, + "loss": 2.5239, + "step": 10589 + }, + { + "epoch": 0.8546525704140102, + "grad_norm": 0.7754644751548767, + "learning_rate": 9.150112685509787e-05, + "loss": 2.5572, + "step": 10590 + }, + { + "epoch": 0.8547332741505932, + "grad_norm": 0.707080602645874, + "learning_rate": 9.148539718169118e-05, + "loss": 2.5572, + "step": 10591 + }, + { + "epoch": 0.8548139778871762, + "grad_norm": 0.6996685266494751, + "learning_rate": 9.146966772049073e-05, + "loss": 2.4968, + "step": 10592 + }, + { + "epoch": 0.8548946816237591, + "grad_norm": 0.6830589771270752, + "learning_rate": 9.145393847188841e-05, + "loss": 2.5795, + "step": 10593 + }, + { + "epoch": 0.8549753853603422, + "grad_norm": 0.7507784366607666, + "learning_rate": 9.143820943627628e-05, + "loss": 2.6135, + "step": 10594 + }, + { + "epoch": 0.8550560890969252, + "grad_norm": 0.673218309879303, + "learning_rate": 9.142248061404638e-05, + "loss": 2.5875, + "step": 10595 + }, + { + "epoch": 0.8551367928335082, + "grad_norm": 0.6861804723739624, + "learning_rate": 9.140675200559065e-05, + "loss": 2.5892, + "step": 10596 + }, + { + "epoch": 0.8552174965700912, + "grad_norm": 0.6928709149360657, + "learning_rate": 9.139102361130114e-05, + "loss": 2.5303, + "step": 10597 + }, + { + "epoch": 0.8552982003066743, + "grad_norm": 0.6958343386650085, + "learning_rate": 9.137529543156986e-05, + "loss": 2.5567, + "step": 10598 + }, + { + "epoch": 0.8553789040432572, + "grad_norm": 0.703845739364624, + "learning_rate": 9.135956746678873e-05, + "loss": 2.5215, + "step": 10599 + }, + { + "epoch": 0.8554596077798402, + "grad_norm": 0.7108649015426636, + "learning_rate": 9.134383971734975e-05, + "loss": 2.5687, + "step": 10600 + }, + { + "epoch": 0.8555403115164232, + "grad_norm": 0.7249850034713745, + "learning_rate": 9.132811218364495e-05, + "loss": 2.565, + "step": 10601 + }, + { + "epoch": 0.8556210152530063, + "grad_norm": 0.7060014009475708, + "learning_rate": 9.131238486606623e-05, + "loss": 2.5366, + "step": 10602 + }, + { + "epoch": 0.8557017189895892, + "grad_norm": 0.6915088891983032, + "learning_rate": 9.129665776500559e-05, + "loss": 2.527, + "step": 10603 + }, + { + "epoch": 0.8557824227261722, + "grad_norm": 0.7226938605308533, + "learning_rate": 9.128093088085503e-05, + "loss": 2.5999, + "step": 10604 + }, + { + "epoch": 0.8558631264627552, + "grad_norm": 0.6802428364753723, + "learning_rate": 9.126520421400641e-05, + "loss": 2.4788, + "step": 10605 + }, + { + "epoch": 0.8559438301993383, + "grad_norm": 0.7855350375175476, + "learning_rate": 9.124947776485175e-05, + "loss": 2.5349, + "step": 10606 + }, + { + "epoch": 0.8560245339359213, + "grad_norm": 0.6758337020874023, + "learning_rate": 9.123375153378296e-05, + "loss": 2.5874, + "step": 10607 + }, + { + "epoch": 0.8561052376725042, + "grad_norm": 0.675061821937561, + "learning_rate": 9.121802552119206e-05, + "loss": 2.5343, + "step": 10608 + }, + { + "epoch": 0.8561859414090872, + "grad_norm": 0.7044726014137268, + "learning_rate": 9.120229972747087e-05, + "loss": 2.5361, + "step": 10609 + }, + { + "epoch": 0.8562666451456702, + "grad_norm": 0.6324402689933777, + "learning_rate": 9.118657415301137e-05, + "loss": 2.5039, + "step": 10610 + }, + { + "epoch": 0.8563473488822533, + "grad_norm": 0.6621509790420532, + "learning_rate": 9.11708487982055e-05, + "loss": 2.5346, + "step": 10611 + }, + { + "epoch": 0.8564280526188363, + "grad_norm": 0.6709887981414795, + "learning_rate": 9.115512366344516e-05, + "loss": 2.5409, + "step": 10612 + }, + { + "epoch": 0.8565087563554192, + "grad_norm": 0.7237712740898132, + "learning_rate": 9.113939874912223e-05, + "loss": 2.5051, + "step": 10613 + }, + { + "epoch": 0.8565894600920022, + "grad_norm": 0.6646109223365784, + "learning_rate": 9.11236740556287e-05, + "loss": 2.5866, + "step": 10614 + }, + { + "epoch": 0.8566701638285853, + "grad_norm": 0.7131930589675903, + "learning_rate": 9.110794958335637e-05, + "loss": 2.5472, + "step": 10615 + }, + { + "epoch": 0.8567508675651683, + "grad_norm": 0.6662428975105286, + "learning_rate": 9.109222533269715e-05, + "loss": 2.4863, + "step": 10616 + }, + { + "epoch": 0.8568315713017512, + "grad_norm": 0.6527226567268372, + "learning_rate": 9.107650130404304e-05, + "loss": 2.5594, + "step": 10617 + }, + { + "epoch": 0.8569122750383342, + "grad_norm": 0.6639060378074646, + "learning_rate": 9.106077749778578e-05, + "loss": 2.5519, + "step": 10618 + }, + { + "epoch": 0.8569929787749173, + "grad_norm": 0.7088096737861633, + "learning_rate": 9.104505391431734e-05, + "loss": 2.5404, + "step": 10619 + }, + { + "epoch": 0.8570736825115003, + "grad_norm": 0.7155873775482178, + "learning_rate": 9.102933055402957e-05, + "loss": 2.5636, + "step": 10620 + }, + { + "epoch": 0.8571543862480833, + "grad_norm": 0.6522316932678223, + "learning_rate": 9.101360741731431e-05, + "loss": 2.5216, + "step": 10621 + }, + { + "epoch": 0.8572350899846662, + "grad_norm": 0.6515649557113647, + "learning_rate": 9.099788450456345e-05, + "loss": 2.5804, + "step": 10622 + }, + { + "epoch": 0.8573157937212493, + "grad_norm": 0.6791853904724121, + "learning_rate": 9.098216181616883e-05, + "loss": 2.5353, + "step": 10623 + }, + { + "epoch": 0.8573964974578323, + "grad_norm": 0.6946877241134644, + "learning_rate": 9.096643935252236e-05, + "loss": 2.5492, + "step": 10624 + }, + { + "epoch": 0.8574772011944153, + "grad_norm": 0.7235898375511169, + "learning_rate": 9.095071711401581e-05, + "loss": 2.5178, + "step": 10625 + }, + { + "epoch": 0.8575579049309983, + "grad_norm": 0.6740610003471375, + "learning_rate": 9.093499510104102e-05, + "loss": 2.5699, + "step": 10626 + }, + { + "epoch": 0.8576386086675813, + "grad_norm": 0.7441792488098145, + "learning_rate": 9.091927331398988e-05, + "loss": 2.579, + "step": 10627 + }, + { + "epoch": 0.8577193124041643, + "grad_norm": 0.6986937522888184, + "learning_rate": 9.090355175325416e-05, + "loss": 2.5556, + "step": 10628 + }, + { + "epoch": 0.8578000161407473, + "grad_norm": 0.6960151791572571, + "learning_rate": 9.08878304192257e-05, + "loss": 2.5448, + "step": 10629 + }, + { + "epoch": 0.8578807198773303, + "grad_norm": 0.6376819014549255, + "learning_rate": 9.087210931229636e-05, + "loss": 2.4636, + "step": 10630 + }, + { + "epoch": 0.8579614236139134, + "grad_norm": 0.752473771572113, + "learning_rate": 9.08563884328579e-05, + "loss": 2.5451, + "step": 10631 + }, + { + "epoch": 0.8580421273504963, + "grad_norm": 0.6879361867904663, + "learning_rate": 9.084066778130213e-05, + "loss": 2.5365, + "step": 10632 + }, + { + "epoch": 0.8581228310870793, + "grad_norm": 0.6630483865737915, + "learning_rate": 9.082494735802091e-05, + "loss": 2.5085, + "step": 10633 + }, + { + "epoch": 0.8582035348236623, + "grad_norm": 0.689602792263031, + "learning_rate": 9.080922716340594e-05, + "loss": 2.5087, + "step": 10634 + }, + { + "epoch": 0.8582842385602454, + "grad_norm": 0.7333599925041199, + "learning_rate": 9.079350719784905e-05, + "loss": 2.5476, + "step": 10635 + }, + { + "epoch": 0.8583649422968284, + "grad_norm": 0.6895802021026611, + "learning_rate": 9.077778746174204e-05, + "loss": 2.5099, + "step": 10636 + }, + { + "epoch": 0.8584456460334113, + "grad_norm": 0.7202162146568298, + "learning_rate": 9.076206795547668e-05, + "loss": 2.5197, + "step": 10637 + }, + { + "epoch": 0.8585263497699943, + "grad_norm": 0.6454200148582458, + "learning_rate": 9.074634867944472e-05, + "loss": 2.5303, + "step": 10638 + }, + { + "epoch": 0.8586070535065774, + "grad_norm": 0.6842506527900696, + "learning_rate": 9.073062963403795e-05, + "loss": 2.5051, + "step": 10639 + }, + { + "epoch": 0.8586877572431604, + "grad_norm": 0.6979129314422607, + "learning_rate": 9.071491081964815e-05, + "loss": 2.5209, + "step": 10640 + }, + { + "epoch": 0.8587684609797434, + "grad_norm": 0.6851540803909302, + "learning_rate": 9.0699192236667e-05, + "loss": 2.5003, + "step": 10641 + }, + { + "epoch": 0.8588491647163263, + "grad_norm": 0.7528585195541382, + "learning_rate": 9.068347388548627e-05, + "loss": 2.5524, + "step": 10642 + }, + { + "epoch": 0.8589298684529094, + "grad_norm": 0.6297397613525391, + "learning_rate": 9.06677557664978e-05, + "loss": 2.5412, + "step": 10643 + }, + { + "epoch": 0.8590105721894924, + "grad_norm": 0.7034026980400085, + "learning_rate": 9.06520378800932e-05, + "loss": 2.4958, + "step": 10644 + }, + { + "epoch": 0.8590912759260754, + "grad_norm": 0.690258800983429, + "learning_rate": 9.063632022666425e-05, + "loss": 2.4894, + "step": 10645 + }, + { + "epoch": 0.8591719796626583, + "grad_norm": 0.6449949145317078, + "learning_rate": 9.06206028066027e-05, + "loss": 2.507, + "step": 10646 + }, + { + "epoch": 0.8592526833992414, + "grad_norm": 0.6328588724136353, + "learning_rate": 9.060488562030023e-05, + "loss": 2.5503, + "step": 10647 + }, + { + "epoch": 0.8593333871358244, + "grad_norm": 0.6570547819137573, + "learning_rate": 9.058916866814858e-05, + "loss": 2.4993, + "step": 10648 + }, + { + "epoch": 0.8594140908724074, + "grad_norm": 0.7689602375030518, + "learning_rate": 9.057345195053945e-05, + "loss": 2.5498, + "step": 10649 + }, + { + "epoch": 0.8594947946089904, + "grad_norm": 0.6727081537246704, + "learning_rate": 9.055773546786454e-05, + "loss": 2.5172, + "step": 10650 + }, + { + "epoch": 0.8595754983455735, + "grad_norm": 0.694722056388855, + "learning_rate": 9.054201922051552e-05, + "loss": 2.5485, + "step": 10651 + }, + { + "epoch": 0.8596562020821564, + "grad_norm": 0.6638815999031067, + "learning_rate": 9.052630320888411e-05, + "loss": 2.5134, + "step": 10652 + }, + { + "epoch": 0.8597369058187394, + "grad_norm": 0.6600833535194397, + "learning_rate": 9.0510587433362e-05, + "loss": 2.5206, + "step": 10653 + }, + { + "epoch": 0.8598176095553224, + "grad_norm": 0.7193894386291504, + "learning_rate": 9.049487189434084e-05, + "loss": 2.5485, + "step": 10654 + }, + { + "epoch": 0.8598983132919055, + "grad_norm": 0.6651753187179565, + "learning_rate": 9.047915659221233e-05, + "loss": 2.5703, + "step": 10655 + }, + { + "epoch": 0.8599790170284884, + "grad_norm": 0.7346364855766296, + "learning_rate": 9.046344152736815e-05, + "loss": 2.5301, + "step": 10656 + }, + { + "epoch": 0.8600597207650714, + "grad_norm": 0.6681811809539795, + "learning_rate": 9.04477267001999e-05, + "loss": 2.5124, + "step": 10657 + }, + { + "epoch": 0.8601404245016544, + "grad_norm": 0.6928461790084839, + "learning_rate": 9.043201211109929e-05, + "loss": 2.5153, + "step": 10658 + }, + { + "epoch": 0.8602211282382374, + "grad_norm": 0.6957700252532959, + "learning_rate": 9.041629776045797e-05, + "loss": 2.4697, + "step": 10659 + }, + { + "epoch": 0.8603018319748205, + "grad_norm": 0.6361939311027527, + "learning_rate": 9.040058364866752e-05, + "loss": 2.5162, + "step": 10660 + }, + { + "epoch": 0.8603825357114034, + "grad_norm": 0.6827390193939209, + "learning_rate": 9.038486977611964e-05, + "loss": 2.4856, + "step": 10661 + }, + { + "epoch": 0.8604632394479864, + "grad_norm": 0.6638801097869873, + "learning_rate": 9.036915614320595e-05, + "loss": 2.5224, + "step": 10662 + }, + { + "epoch": 0.8605439431845694, + "grad_norm": 0.7249652743339539, + "learning_rate": 9.035344275031802e-05, + "loss": 2.5461, + "step": 10663 + }, + { + "epoch": 0.8606246469211525, + "grad_norm": 0.6693316102027893, + "learning_rate": 9.033772959784754e-05, + "loss": 2.5676, + "step": 10664 + }, + { + "epoch": 0.8607053506577355, + "grad_norm": 0.6787340641021729, + "learning_rate": 9.032201668618614e-05, + "loss": 2.5374, + "step": 10665 + }, + { + "epoch": 0.8607860543943184, + "grad_norm": 0.6581670641899109, + "learning_rate": 9.030630401572533e-05, + "loss": 2.5052, + "step": 10666 + }, + { + "epoch": 0.8608667581309014, + "grad_norm": 0.6975873112678528, + "learning_rate": 9.029059158685675e-05, + "loss": 2.4823, + "step": 10667 + }, + { + "epoch": 0.8609474618674845, + "grad_norm": 0.6632521748542786, + "learning_rate": 9.027487939997201e-05, + "loss": 2.5992, + "step": 10668 + }, + { + "epoch": 0.8610281656040675, + "grad_norm": 0.6793977618217468, + "learning_rate": 9.025916745546276e-05, + "loss": 2.5308, + "step": 10669 + }, + { + "epoch": 0.8611088693406505, + "grad_norm": 0.6499481797218323, + "learning_rate": 9.024345575372046e-05, + "loss": 2.4964, + "step": 10670 + }, + { + "epoch": 0.8611895730772334, + "grad_norm": 0.6858868598937988, + "learning_rate": 9.022774429513677e-05, + "loss": 2.5388, + "step": 10671 + }, + { + "epoch": 0.8612702768138165, + "grad_norm": 0.7586160898208618, + "learning_rate": 9.021203308010324e-05, + "loss": 2.5166, + "step": 10672 + }, + { + "epoch": 0.8613509805503995, + "grad_norm": 0.7179701328277588, + "learning_rate": 9.019632210901141e-05, + "loss": 2.5501, + "step": 10673 + }, + { + "epoch": 0.8614316842869825, + "grad_norm": 0.6830369830131531, + "learning_rate": 9.018061138225287e-05, + "loss": 2.4956, + "step": 10674 + }, + { + "epoch": 0.8615123880235654, + "grad_norm": 0.6710512042045593, + "learning_rate": 9.01649009002192e-05, + "loss": 2.5722, + "step": 10675 + }, + { + "epoch": 0.8615930917601485, + "grad_norm": 0.640011727809906, + "learning_rate": 9.014919066330186e-05, + "loss": 2.5197, + "step": 10676 + }, + { + "epoch": 0.8616737954967315, + "grad_norm": 0.6803860664367676, + "learning_rate": 9.013348067189245e-05, + "loss": 2.4794, + "step": 10677 + }, + { + "epoch": 0.8617544992333145, + "grad_norm": 0.6734865307807922, + "learning_rate": 9.011777092638251e-05, + "loss": 2.5831, + "step": 10678 + }, + { + "epoch": 0.8618352029698975, + "grad_norm": 0.6525718569755554, + "learning_rate": 9.010206142716353e-05, + "loss": 2.4925, + "step": 10679 + }, + { + "epoch": 0.8619159067064806, + "grad_norm": 0.6886672377586365, + "learning_rate": 9.008635217462706e-05, + "loss": 2.491, + "step": 10680 + }, + { + "epoch": 0.8619966104430635, + "grad_norm": 0.6397131085395813, + "learning_rate": 9.007064316916461e-05, + "loss": 2.4684, + "step": 10681 + }, + { + "epoch": 0.8620773141796465, + "grad_norm": 0.6308462023735046, + "learning_rate": 9.005493441116768e-05, + "loss": 2.504, + "step": 10682 + }, + { + "epoch": 0.8621580179162295, + "grad_norm": 0.7223808169364929, + "learning_rate": 9.003922590102778e-05, + "loss": 2.5342, + "step": 10683 + }, + { + "epoch": 0.8622387216528126, + "grad_norm": 0.687515914440155, + "learning_rate": 9.002351763913642e-05, + "loss": 2.4822, + "step": 10684 + }, + { + "epoch": 0.8623194253893955, + "grad_norm": 0.6888468265533447, + "learning_rate": 9.00078096258851e-05, + "loss": 2.5497, + "step": 10685 + }, + { + "epoch": 0.8624001291259785, + "grad_norm": 0.7429301738739014, + "learning_rate": 8.999210186166525e-05, + "loss": 2.624, + "step": 10686 + }, + { + "epoch": 0.8624808328625615, + "grad_norm": 0.6901945471763611, + "learning_rate": 8.997639434686839e-05, + "loss": 2.5268, + "step": 10687 + }, + { + "epoch": 0.8625615365991446, + "grad_norm": 0.7396681308746338, + "learning_rate": 8.9960687081886e-05, + "loss": 2.5427, + "step": 10688 + }, + { + "epoch": 0.8626422403357276, + "grad_norm": 0.6825531125068665, + "learning_rate": 8.99449800671095e-05, + "loss": 2.5722, + "step": 10689 + }, + { + "epoch": 0.8627229440723105, + "grad_norm": 0.6719860434532166, + "learning_rate": 8.992927330293039e-05, + "loss": 2.4939, + "step": 10690 + }, + { + "epoch": 0.8628036478088935, + "grad_norm": 0.644567608833313, + "learning_rate": 8.991356678974017e-05, + "loss": 2.5495, + "step": 10691 + }, + { + "epoch": 0.8628843515454766, + "grad_norm": 0.7066643834114075, + "learning_rate": 8.989786052793015e-05, + "loss": 2.5508, + "step": 10692 + }, + { + "epoch": 0.8629650552820596, + "grad_norm": 0.6697196364402771, + "learning_rate": 8.988215451789187e-05, + "loss": 2.5231, + "step": 10693 + }, + { + "epoch": 0.8630457590186426, + "grad_norm": 0.7143658399581909, + "learning_rate": 8.986644876001681e-05, + "loss": 2.5368, + "step": 10694 + }, + { + "epoch": 0.8631264627552255, + "grad_norm": 0.7597684264183044, + "learning_rate": 8.985074325469628e-05, + "loss": 2.5983, + "step": 10695 + }, + { + "epoch": 0.8632071664918086, + "grad_norm": 0.7418014407157898, + "learning_rate": 8.983503800232176e-05, + "loss": 2.5736, + "step": 10696 + }, + { + "epoch": 0.8632878702283916, + "grad_norm": 0.654435932636261, + "learning_rate": 8.981933300328468e-05, + "loss": 2.5389, + "step": 10697 + }, + { + "epoch": 0.8633685739649746, + "grad_norm": 0.658203661441803, + "learning_rate": 8.980362825797643e-05, + "loss": 2.5204, + "step": 10698 + }, + { + "epoch": 0.8634492777015575, + "grad_norm": 0.7132784724235535, + "learning_rate": 8.97879237667884e-05, + "loss": 2.4982, + "step": 10699 + }, + { + "epoch": 0.8635299814381406, + "grad_norm": 0.6901868581771851, + "learning_rate": 8.9772219530112e-05, + "loss": 2.5599, + "step": 10700 + }, + { + "epoch": 0.8636106851747236, + "grad_norm": 0.6241179704666138, + "learning_rate": 8.975651554833869e-05, + "loss": 2.5185, + "step": 10701 + }, + { + "epoch": 0.8636913889113066, + "grad_norm": 0.693692147731781, + "learning_rate": 8.974081182185974e-05, + "loss": 2.506, + "step": 10702 + }, + { + "epoch": 0.8637720926478896, + "grad_norm": 0.6699246168136597, + "learning_rate": 8.972510835106658e-05, + "loss": 2.557, + "step": 10703 + }, + { + "epoch": 0.8638527963844727, + "grad_norm": 0.7339062094688416, + "learning_rate": 8.970940513635059e-05, + "loss": 2.5614, + "step": 10704 + }, + { + "epoch": 0.8639335001210556, + "grad_norm": 0.7558815479278564, + "learning_rate": 8.969370217810311e-05, + "loss": 2.5949, + "step": 10705 + }, + { + "epoch": 0.8640142038576386, + "grad_norm": 0.6992602348327637, + "learning_rate": 8.96779994767155e-05, + "loss": 2.4755, + "step": 10706 + }, + { + "epoch": 0.8640949075942216, + "grad_norm": 0.6836397647857666, + "learning_rate": 8.966229703257915e-05, + "loss": 2.5172, + "step": 10707 + }, + { + "epoch": 0.8641756113308047, + "grad_norm": 0.7054563760757446, + "learning_rate": 8.964659484608537e-05, + "loss": 2.5186, + "step": 10708 + }, + { + "epoch": 0.8642563150673876, + "grad_norm": 0.7096611261367798, + "learning_rate": 8.963089291762551e-05, + "loss": 2.5157, + "step": 10709 + }, + { + "epoch": 0.8643370188039706, + "grad_norm": 0.657465934753418, + "learning_rate": 8.961519124759094e-05, + "loss": 2.5332, + "step": 10710 + }, + { + "epoch": 0.8644177225405536, + "grad_norm": 0.7490121126174927, + "learning_rate": 8.959948983637291e-05, + "loss": 2.512, + "step": 10711 + }, + { + "epoch": 0.8644984262771366, + "grad_norm": 0.7074166536331177, + "learning_rate": 8.958378868436279e-05, + "loss": 2.4745, + "step": 10712 + }, + { + "epoch": 0.8645791300137197, + "grad_norm": 0.7496227025985718, + "learning_rate": 8.956808779195188e-05, + "loss": 2.5533, + "step": 10713 + }, + { + "epoch": 0.8646598337503026, + "grad_norm": 0.6624657511711121, + "learning_rate": 8.95523871595315e-05, + "loss": 2.5346, + "step": 10714 + }, + { + "epoch": 0.8647405374868856, + "grad_norm": 0.6829125881195068, + "learning_rate": 8.953668678749292e-05, + "loss": 2.558, + "step": 10715 + }, + { + "epoch": 0.8648212412234686, + "grad_norm": 0.6954498887062073, + "learning_rate": 8.952098667622745e-05, + "loss": 2.5617, + "step": 10716 + }, + { + "epoch": 0.8649019449600517, + "grad_norm": 0.6722636818885803, + "learning_rate": 8.950528682612645e-05, + "loss": 2.5565, + "step": 10717 + }, + { + "epoch": 0.8649826486966347, + "grad_norm": 0.6793767213821411, + "learning_rate": 8.948958723758107e-05, + "loss": 2.5803, + "step": 10718 + }, + { + "epoch": 0.8650633524332176, + "grad_norm": 0.7159373760223389, + "learning_rate": 8.947388791098266e-05, + "loss": 2.5465, + "step": 10719 + }, + { + "epoch": 0.8651440561698006, + "grad_norm": 0.6823835372924805, + "learning_rate": 8.945818884672253e-05, + "loss": 2.5079, + "step": 10720 + }, + { + "epoch": 0.8652247599063837, + "grad_norm": 0.7521452903747559, + "learning_rate": 8.944249004519185e-05, + "loss": 2.5628, + "step": 10721 + }, + { + "epoch": 0.8653054636429667, + "grad_norm": 0.6774886846542358, + "learning_rate": 8.94267915067819e-05, + "loss": 2.6042, + "step": 10722 + }, + { + "epoch": 0.8653861673795497, + "grad_norm": 0.6915935277938843, + "learning_rate": 8.941109323188398e-05, + "loss": 2.5563, + "step": 10723 + }, + { + "epoch": 0.8654668711161326, + "grad_norm": 0.6609061360359192, + "learning_rate": 8.939539522088927e-05, + "loss": 2.5083, + "step": 10724 + }, + { + "epoch": 0.8655475748527157, + "grad_norm": 0.6457223892211914, + "learning_rate": 8.937969747418903e-05, + "loss": 2.573, + "step": 10725 + }, + { + "epoch": 0.8656282785892987, + "grad_norm": 0.6960360407829285, + "learning_rate": 8.936399999217455e-05, + "loss": 2.516, + "step": 10726 + }, + { + "epoch": 0.8657089823258817, + "grad_norm": 0.7269721627235413, + "learning_rate": 8.934830277523693e-05, + "loss": 2.5932, + "step": 10727 + }, + { + "epoch": 0.8657896860624646, + "grad_norm": 0.7057532668113708, + "learning_rate": 8.933260582376745e-05, + "loss": 2.5022, + "step": 10728 + }, + { + "epoch": 0.8658703897990477, + "grad_norm": 0.6698749661445618, + "learning_rate": 8.931690913815735e-05, + "loss": 2.5357, + "step": 10729 + }, + { + "epoch": 0.8659510935356307, + "grad_norm": 0.6616599559783936, + "learning_rate": 8.930121271879777e-05, + "loss": 2.4776, + "step": 10730 + }, + { + "epoch": 0.8660317972722137, + "grad_norm": 0.7457093000411987, + "learning_rate": 8.928551656607993e-05, + "loss": 2.5799, + "step": 10731 + }, + { + "epoch": 0.8661125010087967, + "grad_norm": 0.7199469804763794, + "learning_rate": 8.926982068039505e-05, + "loss": 2.5278, + "step": 10732 + }, + { + "epoch": 0.8661932047453798, + "grad_norm": 0.7579182386398315, + "learning_rate": 8.925412506213428e-05, + "loss": 2.5227, + "step": 10733 + }, + { + "epoch": 0.8662739084819627, + "grad_norm": 0.687455952167511, + "learning_rate": 8.92384297116888e-05, + "loss": 2.5099, + "step": 10734 + }, + { + "epoch": 0.8663546122185457, + "grad_norm": 0.7616521120071411, + "learning_rate": 8.922273462944978e-05, + "loss": 2.598, + "step": 10735 + }, + { + "epoch": 0.8664353159551287, + "grad_norm": 0.6730697751045227, + "learning_rate": 8.920703981580842e-05, + "loss": 2.5517, + "step": 10736 + }, + { + "epoch": 0.8665160196917118, + "grad_norm": 0.6769895553588867, + "learning_rate": 8.91913452711558e-05, + "loss": 2.5535, + "step": 10737 + }, + { + "epoch": 0.8665967234282947, + "grad_norm": 0.6284549832344055, + "learning_rate": 8.917565099588312e-05, + "loss": 2.4597, + "step": 10738 + }, + { + "epoch": 0.8666774271648777, + "grad_norm": 0.6900805830955505, + "learning_rate": 8.915995699038152e-05, + "loss": 2.5236, + "step": 10739 + }, + { + "epoch": 0.8667581309014607, + "grad_norm": 0.6842896938323975, + "learning_rate": 8.914426325504211e-05, + "loss": 2.5199, + "step": 10740 + }, + { + "epoch": 0.8668388346380438, + "grad_norm": 0.6637243628501892, + "learning_rate": 8.912856979025604e-05, + "loss": 2.5368, + "step": 10741 + }, + { + "epoch": 0.8669195383746268, + "grad_norm": 0.7474464178085327, + "learning_rate": 8.911287659641449e-05, + "loss": 2.4902, + "step": 10742 + }, + { + "epoch": 0.8670002421112097, + "grad_norm": 0.6977849006652832, + "learning_rate": 8.909718367390843e-05, + "loss": 2.5034, + "step": 10743 + }, + { + "epoch": 0.8670809458477927, + "grad_norm": 0.6968807578086853, + "learning_rate": 8.908149102312907e-05, + "loss": 2.5396, + "step": 10744 + }, + { + "epoch": 0.8671616495843758, + "grad_norm": 0.6656209230422974, + "learning_rate": 8.906579864446755e-05, + "loss": 2.5702, + "step": 10745 + }, + { + "epoch": 0.8672423533209588, + "grad_norm": 0.7079079151153564, + "learning_rate": 8.905010653831486e-05, + "loss": 2.5344, + "step": 10746 + }, + { + "epoch": 0.8673230570575418, + "grad_norm": 0.7423387765884399, + "learning_rate": 8.903441470506214e-05, + "loss": 2.5635, + "step": 10747 + }, + { + "epoch": 0.8674037607941247, + "grad_norm": 0.6607224941253662, + "learning_rate": 8.901872314510046e-05, + "loss": 2.54, + "step": 10748 + }, + { + "epoch": 0.8674844645307078, + "grad_norm": 0.6646947860717773, + "learning_rate": 8.900303185882095e-05, + "loss": 2.4661, + "step": 10749 + }, + { + "epoch": 0.8675651682672908, + "grad_norm": 0.6943496465682983, + "learning_rate": 8.89873408466146e-05, + "loss": 2.5213, + "step": 10750 + }, + { + "epoch": 0.8676458720038738, + "grad_norm": 0.7048123478889465, + "learning_rate": 8.89716501088725e-05, + "loss": 2.5529, + "step": 10751 + }, + { + "epoch": 0.8677265757404568, + "grad_norm": 0.654617428779602, + "learning_rate": 8.895595964598574e-05, + "loss": 2.5535, + "step": 10752 + }, + { + "epoch": 0.8678072794770398, + "grad_norm": 0.672063410282135, + "learning_rate": 8.894026945834531e-05, + "loss": 2.5279, + "step": 10753 + }, + { + "epoch": 0.8678879832136228, + "grad_norm": 0.7134148478507996, + "learning_rate": 8.892457954634225e-05, + "loss": 2.5403, + "step": 10754 + }, + { + "epoch": 0.8679686869502058, + "grad_norm": 0.6457598805427551, + "learning_rate": 8.890888991036768e-05, + "loss": 2.515, + "step": 10755 + }, + { + "epoch": 0.8680493906867888, + "grad_norm": 0.6725220084190369, + "learning_rate": 8.889320055081252e-05, + "loss": 2.4829, + "step": 10756 + }, + { + "epoch": 0.8681300944233719, + "grad_norm": 0.6425862312316895, + "learning_rate": 8.887751146806785e-05, + "loss": 2.4965, + "step": 10757 + }, + { + "epoch": 0.8682107981599548, + "grad_norm": 0.6654682755470276, + "learning_rate": 8.886182266252468e-05, + "loss": 2.48, + "step": 10758 + }, + { + "epoch": 0.8682915018965378, + "grad_norm": 0.7102493643760681, + "learning_rate": 8.884613413457398e-05, + "loss": 2.5415, + "step": 10759 + }, + { + "epoch": 0.8683722056331208, + "grad_norm": 0.6996567249298096, + "learning_rate": 8.883044588460677e-05, + "loss": 2.542, + "step": 10760 + }, + { + "epoch": 0.8684529093697038, + "grad_norm": 0.7011905312538147, + "learning_rate": 8.881475791301405e-05, + "loss": 2.5391, + "step": 10761 + }, + { + "epoch": 0.8685336131062869, + "grad_norm": 0.6508356928825378, + "learning_rate": 8.879907022018686e-05, + "loss": 2.4892, + "step": 10762 + }, + { + "epoch": 0.8686143168428698, + "grad_norm": 0.7104009985923767, + "learning_rate": 8.878338280651605e-05, + "loss": 2.5152, + "step": 10763 + }, + { + "epoch": 0.8686950205794528, + "grad_norm": 0.6501138210296631, + "learning_rate": 8.876769567239268e-05, + "loss": 2.5767, + "step": 10764 + }, + { + "epoch": 0.8687757243160358, + "grad_norm": 0.6463173031806946, + "learning_rate": 8.875200881820771e-05, + "loss": 2.4758, + "step": 10765 + }, + { + "epoch": 0.8688564280526189, + "grad_norm": 0.6494991779327393, + "learning_rate": 8.873632224435206e-05, + "loss": 2.5364, + "step": 10766 + }, + { + "epoch": 0.8689371317892018, + "grad_norm": 0.6926043033599854, + "learning_rate": 8.872063595121671e-05, + "loss": 2.5288, + "step": 10767 + }, + { + "epoch": 0.8690178355257848, + "grad_norm": 0.7076035737991333, + "learning_rate": 8.870494993919261e-05, + "loss": 2.5118, + "step": 10768 + }, + { + "epoch": 0.8690985392623678, + "grad_norm": 0.6456892490386963, + "learning_rate": 8.868926420867068e-05, + "loss": 2.4957, + "step": 10769 + }, + { + "epoch": 0.8691792429989509, + "grad_norm": 0.6585200428962708, + "learning_rate": 8.867357876004183e-05, + "loss": 2.5049, + "step": 10770 + }, + { + "epoch": 0.8692599467355339, + "grad_norm": 0.6893252730369568, + "learning_rate": 8.865789359369706e-05, + "loss": 2.4808, + "step": 10771 + }, + { + "epoch": 0.8693406504721168, + "grad_norm": 0.6700639724731445, + "learning_rate": 8.864220871002719e-05, + "loss": 2.5475, + "step": 10772 + }, + { + "epoch": 0.8694213542086998, + "grad_norm": 0.6551913619041443, + "learning_rate": 8.862652410942315e-05, + "loss": 2.5063, + "step": 10773 + }, + { + "epoch": 0.8695020579452829, + "grad_norm": 0.6870427131652832, + "learning_rate": 8.86108397922759e-05, + "loss": 2.5785, + "step": 10774 + }, + { + "epoch": 0.8695827616818659, + "grad_norm": 0.6489934325218201, + "learning_rate": 8.859515575897626e-05, + "loss": 2.5584, + "step": 10775 + }, + { + "epoch": 0.8696634654184489, + "grad_norm": 0.6726663112640381, + "learning_rate": 8.857947200991517e-05, + "loss": 2.5707, + "step": 10776 + }, + { + "epoch": 0.8697441691550318, + "grad_norm": 0.7696183323860168, + "learning_rate": 8.856378854548347e-05, + "loss": 2.501, + "step": 10777 + }, + { + "epoch": 0.8698248728916149, + "grad_norm": 0.7002642154693604, + "learning_rate": 8.854810536607212e-05, + "loss": 2.5792, + "step": 10778 + }, + { + "epoch": 0.8699055766281979, + "grad_norm": 0.6429435610771179, + "learning_rate": 8.853242247207185e-05, + "loss": 2.5463, + "step": 10779 + }, + { + "epoch": 0.8699862803647809, + "grad_norm": 0.7006216645240784, + "learning_rate": 8.851673986387358e-05, + "loss": 2.5698, + "step": 10780 + }, + { + "epoch": 0.8700669841013638, + "grad_norm": 0.7053292989730835, + "learning_rate": 8.850105754186824e-05, + "loss": 2.5468, + "step": 10781 + }, + { + "epoch": 0.8701476878379469, + "grad_norm": 0.6592122912406921, + "learning_rate": 8.848537550644654e-05, + "loss": 2.5271, + "step": 10782 + }, + { + "epoch": 0.8702283915745299, + "grad_norm": 0.679132342338562, + "learning_rate": 8.846969375799941e-05, + "loss": 2.5281, + "step": 10783 + }, + { + "epoch": 0.8703090953111129, + "grad_norm": 0.6868568062782288, + "learning_rate": 8.845401229691765e-05, + "loss": 2.5415, + "step": 10784 + }, + { + "epoch": 0.8703897990476959, + "grad_norm": 0.7060674428939819, + "learning_rate": 8.843833112359208e-05, + "loss": 2.5649, + "step": 10785 + }, + { + "epoch": 0.870470502784279, + "grad_norm": 0.6663981676101685, + "learning_rate": 8.842265023841352e-05, + "loss": 2.5055, + "step": 10786 + }, + { + "epoch": 0.8705512065208619, + "grad_norm": 0.7095218896865845, + "learning_rate": 8.840696964177282e-05, + "loss": 2.5442, + "step": 10787 + }, + { + "epoch": 0.8706319102574449, + "grad_norm": 0.6884104013442993, + "learning_rate": 8.839128933406069e-05, + "loss": 2.5285, + "step": 10788 + }, + { + "epoch": 0.8707126139940279, + "grad_norm": 0.6427462697029114, + "learning_rate": 8.837560931566798e-05, + "loss": 2.5197, + "step": 10789 + }, + { + "epoch": 0.870793317730611, + "grad_norm": 0.6870493292808533, + "learning_rate": 8.835992958698548e-05, + "loss": 2.4937, + "step": 10790 + }, + { + "epoch": 0.870874021467194, + "grad_norm": 0.7006319761276245, + "learning_rate": 8.834425014840398e-05, + "loss": 2.5148, + "step": 10791 + }, + { + "epoch": 0.8709547252037769, + "grad_norm": 0.690601646900177, + "learning_rate": 8.83285710003142e-05, + "loss": 2.5454, + "step": 10792 + }, + { + "epoch": 0.8710354289403599, + "grad_norm": 0.7205955982208252, + "learning_rate": 8.831289214310695e-05, + "loss": 2.5221, + "step": 10793 + }, + { + "epoch": 0.871116132676943, + "grad_norm": 0.7134295105934143, + "learning_rate": 8.8297213577173e-05, + "loss": 2.5626, + "step": 10794 + }, + { + "epoch": 0.871196836413526, + "grad_norm": 0.6560496091842651, + "learning_rate": 8.828153530290307e-05, + "loss": 2.5408, + "step": 10795 + }, + { + "epoch": 0.8712775401501089, + "grad_norm": 0.7055882215499878, + "learning_rate": 8.82658573206879e-05, + "loss": 2.5173, + "step": 10796 + }, + { + "epoch": 0.8713582438866919, + "grad_norm": 0.6751883029937744, + "learning_rate": 8.825017963091827e-05, + "loss": 2.5378, + "step": 10797 + }, + { + "epoch": 0.871438947623275, + "grad_norm": 0.6794824600219727, + "learning_rate": 8.823450223398485e-05, + "loss": 2.592, + "step": 10798 + }, + { + "epoch": 0.871519651359858, + "grad_norm": 0.675729513168335, + "learning_rate": 8.821882513027838e-05, + "loss": 2.5253, + "step": 10799 + }, + { + "epoch": 0.871600355096441, + "grad_norm": 0.7185894250869751, + "learning_rate": 8.820314832018962e-05, + "loss": 2.5073, + "step": 10800 + }, + { + "epoch": 0.8716810588330239, + "grad_norm": 0.6605187654495239, + "learning_rate": 8.818747180410921e-05, + "loss": 2.5141, + "step": 10801 + }, + { + "epoch": 0.871761762569607, + "grad_norm": 0.6955205798149109, + "learning_rate": 8.817179558242788e-05, + "loss": 2.5313, + "step": 10802 + }, + { + "epoch": 0.87184246630619, + "grad_norm": 0.6307928562164307, + "learning_rate": 8.815611965553638e-05, + "loss": 2.4975, + "step": 10803 + }, + { + "epoch": 0.871923170042773, + "grad_norm": 0.7283728122711182, + "learning_rate": 8.814044402382527e-05, + "loss": 2.4623, + "step": 10804 + }, + { + "epoch": 0.872003873779356, + "grad_norm": 0.7019702792167664, + "learning_rate": 8.81247686876853e-05, + "loss": 2.4755, + "step": 10805 + }, + { + "epoch": 0.872084577515939, + "grad_norm": 0.6769137382507324, + "learning_rate": 8.81090936475072e-05, + "loss": 2.59, + "step": 10806 + }, + { + "epoch": 0.872165281252522, + "grad_norm": 0.6185588836669922, + "learning_rate": 8.80934189036815e-05, + "loss": 2.5308, + "step": 10807 + }, + { + "epoch": 0.872245984989105, + "grad_norm": 0.7127000689506531, + "learning_rate": 8.807774445659894e-05, + "loss": 2.5301, + "step": 10808 + }, + { + "epoch": 0.872326688725688, + "grad_norm": 0.7039114236831665, + "learning_rate": 8.806207030665016e-05, + "loss": 2.5176, + "step": 10809 + }, + { + "epoch": 0.8724073924622711, + "grad_norm": 0.6763370633125305, + "learning_rate": 8.804639645422582e-05, + "loss": 2.5324, + "step": 10810 + }, + { + "epoch": 0.872488096198854, + "grad_norm": 0.7546409368515015, + "learning_rate": 8.803072289971648e-05, + "loss": 2.5446, + "step": 10811 + }, + { + "epoch": 0.872568799935437, + "grad_norm": 0.6916004419326782, + "learning_rate": 8.801504964351284e-05, + "loss": 2.5056, + "step": 10812 + }, + { + "epoch": 0.87264950367202, + "grad_norm": 0.7108416557312012, + "learning_rate": 8.799937668600552e-05, + "loss": 2.5966, + "step": 10813 + }, + { + "epoch": 0.872730207408603, + "grad_norm": 0.7146576046943665, + "learning_rate": 8.798370402758506e-05, + "loss": 2.5152, + "step": 10814 + }, + { + "epoch": 0.872810911145186, + "grad_norm": 0.6708142757415771, + "learning_rate": 8.796803166864211e-05, + "loss": 2.5248, + "step": 10815 + }, + { + "epoch": 0.872891614881769, + "grad_norm": 0.6687600612640381, + "learning_rate": 8.795235960956729e-05, + "loss": 2.4451, + "step": 10816 + }, + { + "epoch": 0.872972318618352, + "grad_norm": 0.724012553691864, + "learning_rate": 8.793668785075114e-05, + "loss": 2.4816, + "step": 10817 + }, + { + "epoch": 0.873053022354935, + "grad_norm": 0.6938769221305847, + "learning_rate": 8.792101639258426e-05, + "loss": 2.5435, + "step": 10818 + }, + { + "epoch": 0.8731337260915181, + "grad_norm": 0.7066235542297363, + "learning_rate": 8.790534523545724e-05, + "loss": 2.5167, + "step": 10819 + }, + { + "epoch": 0.873214429828101, + "grad_norm": 0.7129037380218506, + "learning_rate": 8.788967437976062e-05, + "loss": 2.5079, + "step": 10820 + }, + { + "epoch": 0.873295133564684, + "grad_norm": 0.6949728727340698, + "learning_rate": 8.787400382588497e-05, + "loss": 2.5564, + "step": 10821 + }, + { + "epoch": 0.873375837301267, + "grad_norm": 0.7924233675003052, + "learning_rate": 8.785833357422088e-05, + "loss": 2.5748, + "step": 10822 + }, + { + "epoch": 0.8734565410378501, + "grad_norm": 0.7486331462860107, + "learning_rate": 8.784266362515882e-05, + "loss": 2.565, + "step": 10823 + }, + { + "epoch": 0.8735372447744331, + "grad_norm": 0.7036460638046265, + "learning_rate": 8.782699397908935e-05, + "loss": 2.5101, + "step": 10824 + }, + { + "epoch": 0.873617948511016, + "grad_norm": 0.6691471338272095, + "learning_rate": 8.781132463640302e-05, + "loss": 2.5262, + "step": 10825 + }, + { + "epoch": 0.873698652247599, + "grad_norm": 0.6836682558059692, + "learning_rate": 8.779565559749037e-05, + "loss": 2.5651, + "step": 10826 + }, + { + "epoch": 0.8737793559841821, + "grad_norm": 0.6634507775306702, + "learning_rate": 8.777998686274185e-05, + "loss": 2.5383, + "step": 10827 + }, + { + "epoch": 0.8738600597207651, + "grad_norm": 0.6903105974197388, + "learning_rate": 8.7764318432548e-05, + "loss": 2.5659, + "step": 10828 + }, + { + "epoch": 0.8739407634573481, + "grad_norm": 0.737859308719635, + "learning_rate": 8.774865030729937e-05, + "loss": 2.5859, + "step": 10829 + }, + { + "epoch": 0.874021467193931, + "grad_norm": 0.696843683719635, + "learning_rate": 8.773298248738633e-05, + "loss": 2.5244, + "step": 10830 + }, + { + "epoch": 0.8741021709305141, + "grad_norm": 0.7342235445976257, + "learning_rate": 8.771731497319946e-05, + "loss": 2.5073, + "step": 10831 + }, + { + "epoch": 0.8741828746670971, + "grad_norm": 0.6676939725875854, + "learning_rate": 8.770164776512926e-05, + "loss": 2.5408, + "step": 10832 + }, + { + "epoch": 0.8742635784036801, + "grad_norm": 0.6957886219024658, + "learning_rate": 8.768598086356608e-05, + "loss": 2.5566, + "step": 10833 + }, + { + "epoch": 0.874344282140263, + "grad_norm": 0.6938990950584412, + "learning_rate": 8.767031426890046e-05, + "loss": 2.517, + "step": 10834 + }, + { + "epoch": 0.8744249858768461, + "grad_norm": 0.8387169241905212, + "learning_rate": 8.765464798152286e-05, + "loss": 2.5507, + "step": 10835 + }, + { + "epoch": 0.8745056896134291, + "grad_norm": 0.6396276354789734, + "learning_rate": 8.763898200182368e-05, + "loss": 2.5063, + "step": 10836 + }, + { + "epoch": 0.8745863933500121, + "grad_norm": 0.7122719883918762, + "learning_rate": 8.762331633019339e-05, + "loss": 2.5816, + "step": 10837 + }, + { + "epoch": 0.8746670970865951, + "grad_norm": 0.6807141304016113, + "learning_rate": 8.760765096702244e-05, + "loss": 2.6004, + "step": 10838 + }, + { + "epoch": 0.8747478008231782, + "grad_norm": 0.6764848232269287, + "learning_rate": 8.759198591270117e-05, + "loss": 2.5303, + "step": 10839 + }, + { + "epoch": 0.8748285045597611, + "grad_norm": 0.718515932559967, + "learning_rate": 8.757632116762006e-05, + "loss": 2.5088, + "step": 10840 + }, + { + "epoch": 0.8749092082963441, + "grad_norm": 0.7084362506866455, + "learning_rate": 8.75606567321695e-05, + "loss": 2.5496, + "step": 10841 + }, + { + "epoch": 0.8749899120329271, + "grad_norm": 0.7191734910011292, + "learning_rate": 8.754499260673991e-05, + "loss": 2.5525, + "step": 10842 + }, + { + "epoch": 0.8750706157695102, + "grad_norm": 0.7167977094650269, + "learning_rate": 8.752932879172164e-05, + "loss": 2.5479, + "step": 10843 + }, + { + "epoch": 0.8751513195060932, + "grad_norm": 0.6994979381561279, + "learning_rate": 8.751366528750511e-05, + "loss": 2.4942, + "step": 10844 + }, + { + "epoch": 0.8752320232426761, + "grad_norm": 0.7192725539207458, + "learning_rate": 8.749800209448068e-05, + "loss": 2.5233, + "step": 10845 + }, + { + "epoch": 0.8753127269792591, + "grad_norm": 0.7728807330131531, + "learning_rate": 8.748233921303871e-05, + "loss": 2.5698, + "step": 10846 + }, + { + "epoch": 0.8753934307158422, + "grad_norm": 0.7305434942245483, + "learning_rate": 8.746667664356956e-05, + "loss": 2.5096, + "step": 10847 + }, + { + "epoch": 0.8754741344524252, + "grad_norm": 0.7117629051208496, + "learning_rate": 8.745101438646365e-05, + "loss": 2.5272, + "step": 10848 + }, + { + "epoch": 0.8755548381890081, + "grad_norm": 0.7180361151695251, + "learning_rate": 8.743535244211121e-05, + "loss": 2.4718, + "step": 10849 + }, + { + "epoch": 0.8756355419255911, + "grad_norm": 0.6419457793235779, + "learning_rate": 8.741969081090263e-05, + "loss": 2.5407, + "step": 10850 + }, + { + "epoch": 0.8757162456621742, + "grad_norm": 0.7928328514099121, + "learning_rate": 8.740402949322827e-05, + "loss": 2.488, + "step": 10851 + }, + { + "epoch": 0.8757969493987572, + "grad_norm": 0.7449139952659607, + "learning_rate": 8.738836848947839e-05, + "loss": 2.5943, + "step": 10852 + }, + { + "epoch": 0.8758776531353402, + "grad_norm": 0.7919576168060303, + "learning_rate": 8.737270780004334e-05, + "loss": 2.5556, + "step": 10853 + }, + { + "epoch": 0.8759583568719231, + "grad_norm": 0.6867526769638062, + "learning_rate": 8.735704742531346e-05, + "loss": 2.5395, + "step": 10854 + }, + { + "epoch": 0.8760390606085062, + "grad_norm": 0.7195394039154053, + "learning_rate": 8.734138736567896e-05, + "loss": 2.4404, + "step": 10855 + }, + { + "epoch": 0.8761197643450892, + "grad_norm": 0.68385910987854, + "learning_rate": 8.732572762153016e-05, + "loss": 2.502, + "step": 10856 + }, + { + "epoch": 0.8762004680816722, + "grad_norm": 0.6957393884658813, + "learning_rate": 8.731006819325739e-05, + "loss": 2.5788, + "step": 10857 + }, + { + "epoch": 0.8762811718182552, + "grad_norm": 0.6973037123680115, + "learning_rate": 8.729440908125092e-05, + "loss": 2.4927, + "step": 10858 + }, + { + "epoch": 0.8763618755548382, + "grad_norm": 0.6535985469818115, + "learning_rate": 8.727875028590095e-05, + "loss": 2.596, + "step": 10859 + }, + { + "epoch": 0.8764425792914212, + "grad_norm": 0.7447848320007324, + "learning_rate": 8.726309180759777e-05, + "loss": 2.5825, + "step": 10860 + }, + { + "epoch": 0.8765232830280042, + "grad_norm": 0.7155942320823669, + "learning_rate": 8.724743364673168e-05, + "loss": 2.5105, + "step": 10861 + }, + { + "epoch": 0.8766039867645872, + "grad_norm": 0.6664694547653198, + "learning_rate": 8.723177580369285e-05, + "loss": 2.5244, + "step": 10862 + }, + { + "epoch": 0.8766846905011701, + "grad_norm": 0.7437852025032043, + "learning_rate": 8.721611827887153e-05, + "loss": 2.534, + "step": 10863 + }, + { + "epoch": 0.8767653942377532, + "grad_norm": 0.6752577424049377, + "learning_rate": 8.7200461072658e-05, + "loss": 2.5025, + "step": 10864 + }, + { + "epoch": 0.8768460979743362, + "grad_norm": 0.7420764565467834, + "learning_rate": 8.718480418544241e-05, + "loss": 2.5261, + "step": 10865 + }, + { + "epoch": 0.8769268017109192, + "grad_norm": 0.669384777545929, + "learning_rate": 8.7169147617615e-05, + "loss": 2.5258, + "step": 10866 + }, + { + "epoch": 0.8770075054475022, + "grad_norm": 0.6649587750434875, + "learning_rate": 8.715349136956599e-05, + "loss": 2.5308, + "step": 10867 + }, + { + "epoch": 0.8770882091840853, + "grad_norm": 0.728922426700592, + "learning_rate": 8.713783544168552e-05, + "loss": 2.5251, + "step": 10868 + }, + { + "epoch": 0.8771689129206682, + "grad_norm": 0.6957671642303467, + "learning_rate": 8.712217983436384e-05, + "loss": 2.5818, + "step": 10869 + }, + { + "epoch": 0.8772496166572512, + "grad_norm": 0.6796830892562866, + "learning_rate": 8.710652454799108e-05, + "loss": 2.5122, + "step": 10870 + }, + { + "epoch": 0.8773303203938342, + "grad_norm": 0.7230980396270752, + "learning_rate": 8.709086958295746e-05, + "loss": 2.5836, + "step": 10871 + }, + { + "epoch": 0.8774110241304173, + "grad_norm": 0.6992264986038208, + "learning_rate": 8.707521493965309e-05, + "loss": 2.5907, + "step": 10872 + }, + { + "epoch": 0.8774917278670002, + "grad_norm": 0.7066535353660583, + "learning_rate": 8.705956061846816e-05, + "loss": 2.5508, + "step": 10873 + }, + { + "epoch": 0.8775724316035832, + "grad_norm": 0.6559327244758606, + "learning_rate": 8.704390661979283e-05, + "loss": 2.611, + "step": 10874 + }, + { + "epoch": 0.8776531353401662, + "grad_norm": 0.6673287749290466, + "learning_rate": 8.70282529440172e-05, + "loss": 2.5778, + "step": 10875 + }, + { + "epoch": 0.8777338390767493, + "grad_norm": 0.6715971231460571, + "learning_rate": 8.701259959153139e-05, + "loss": 2.5342, + "step": 10876 + }, + { + "epoch": 0.8778145428133323, + "grad_norm": 0.7456488609313965, + "learning_rate": 8.699694656272557e-05, + "loss": 2.5365, + "step": 10877 + }, + { + "epoch": 0.8778952465499152, + "grad_norm": 0.6658159494400024, + "learning_rate": 8.698129385798983e-05, + "loss": 2.4387, + "step": 10878 + }, + { + "epoch": 0.8779759502864982, + "grad_norm": 0.6653816103935242, + "learning_rate": 8.696564147771427e-05, + "loss": 2.5791, + "step": 10879 + }, + { + "epoch": 0.8780566540230813, + "grad_norm": 0.6763200163841248, + "learning_rate": 8.694998942228902e-05, + "loss": 2.5356, + "step": 10880 + }, + { + "epoch": 0.8781373577596643, + "grad_norm": 0.6534504890441895, + "learning_rate": 8.69343376921041e-05, + "loss": 2.5358, + "step": 10881 + }, + { + "epoch": 0.8782180614962473, + "grad_norm": 0.6341667771339417, + "learning_rate": 8.691868628754967e-05, + "loss": 2.4927, + "step": 10882 + }, + { + "epoch": 0.8782987652328302, + "grad_norm": 0.6215559244155884, + "learning_rate": 8.690303520901579e-05, + "loss": 2.4312, + "step": 10883 + }, + { + "epoch": 0.8783794689694133, + "grad_norm": 0.6705841422080994, + "learning_rate": 8.688738445689248e-05, + "loss": 2.4778, + "step": 10884 + }, + { + "epoch": 0.8784601727059963, + "grad_norm": 0.680275559425354, + "learning_rate": 8.687173403156982e-05, + "loss": 2.5577, + "step": 10885 + }, + { + "epoch": 0.8785408764425793, + "grad_norm": 0.6918728351593018, + "learning_rate": 8.685608393343789e-05, + "loss": 2.5212, + "step": 10886 + }, + { + "epoch": 0.8786215801791623, + "grad_norm": 0.623636782169342, + "learning_rate": 8.68404341628867e-05, + "loss": 2.5131, + "step": 10887 + }, + { + "epoch": 0.8787022839157453, + "grad_norm": 0.7200562357902527, + "learning_rate": 8.682478472030628e-05, + "loss": 2.5517, + "step": 10888 + }, + { + "epoch": 0.8787829876523283, + "grad_norm": 0.6902644634246826, + "learning_rate": 8.680913560608666e-05, + "loss": 2.511, + "step": 10889 + }, + { + "epoch": 0.8788636913889113, + "grad_norm": 0.6855802536010742, + "learning_rate": 8.679348682061792e-05, + "loss": 2.5169, + "step": 10890 + }, + { + "epoch": 0.8789443951254943, + "grad_norm": 0.7229284048080444, + "learning_rate": 8.677783836428995e-05, + "loss": 2.5634, + "step": 10891 + }, + { + "epoch": 0.8790250988620774, + "grad_norm": 0.6350376605987549, + "learning_rate": 8.676219023749281e-05, + "loss": 2.443, + "step": 10892 + }, + { + "epoch": 0.8791058025986603, + "grad_norm": 0.6884307265281677, + "learning_rate": 8.674654244061653e-05, + "loss": 2.524, + "step": 10893 + }, + { + "epoch": 0.8791865063352433, + "grad_norm": 0.6571067571640015, + "learning_rate": 8.673089497405102e-05, + "loss": 2.5322, + "step": 10894 + }, + { + "epoch": 0.8792672100718263, + "grad_norm": 0.7078021764755249, + "learning_rate": 8.67152478381863e-05, + "loss": 2.5317, + "step": 10895 + }, + { + "epoch": 0.8793479138084094, + "grad_norm": 0.6809059381484985, + "learning_rate": 8.669960103341236e-05, + "loss": 2.5767, + "step": 10896 + }, + { + "epoch": 0.8794286175449924, + "grad_norm": 0.7399441003799438, + "learning_rate": 8.66839545601191e-05, + "loss": 2.5194, + "step": 10897 + }, + { + "epoch": 0.8795093212815753, + "grad_norm": 0.6762270927429199, + "learning_rate": 8.66683084186965e-05, + "loss": 2.5306, + "step": 10898 + }, + { + "epoch": 0.8795900250181583, + "grad_norm": 0.7394620776176453, + "learning_rate": 8.665266260953455e-05, + "loss": 2.4516, + "step": 10899 + }, + { + "epoch": 0.8796707287547414, + "grad_norm": 0.6775416135787964, + "learning_rate": 8.663701713302309e-05, + "loss": 2.5574, + "step": 10900 + }, + { + "epoch": 0.8797514324913244, + "grad_norm": 0.7630520462989807, + "learning_rate": 8.66213719895521e-05, + "loss": 2.5516, + "step": 10901 + }, + { + "epoch": 0.8798321362279073, + "grad_norm": 0.6555768847465515, + "learning_rate": 8.660572717951149e-05, + "loss": 2.5267, + "step": 10902 + }, + { + "epoch": 0.8799128399644903, + "grad_norm": 0.6899500489234924, + "learning_rate": 8.659008270329119e-05, + "loss": 2.4938, + "step": 10903 + }, + { + "epoch": 0.8799935437010734, + "grad_norm": 0.6939221024513245, + "learning_rate": 8.657443856128107e-05, + "loss": 2.5358, + "step": 10904 + }, + { + "epoch": 0.8800742474376564, + "grad_norm": 0.6454630494117737, + "learning_rate": 8.655879475387102e-05, + "loss": 2.5528, + "step": 10905 + }, + { + "epoch": 0.8801549511742394, + "grad_norm": 0.7142425775527954, + "learning_rate": 8.654315128145099e-05, + "loss": 2.5668, + "step": 10906 + }, + { + "epoch": 0.8802356549108223, + "grad_norm": 0.7512764930725098, + "learning_rate": 8.652750814441075e-05, + "loss": 2.5224, + "step": 10907 + }, + { + "epoch": 0.8803163586474054, + "grad_norm": 0.6599575877189636, + "learning_rate": 8.651186534314026e-05, + "loss": 2.5363, + "step": 10908 + }, + { + "epoch": 0.8803970623839884, + "grad_norm": 0.6787410974502563, + "learning_rate": 8.649622287802935e-05, + "loss": 2.4587, + "step": 10909 + }, + { + "epoch": 0.8804777661205714, + "grad_norm": 0.7124783396720886, + "learning_rate": 8.648058074946786e-05, + "loss": 2.5842, + "step": 10910 + }, + { + "epoch": 0.8805584698571544, + "grad_norm": 0.6698839664459229, + "learning_rate": 8.646493895784562e-05, + "loss": 2.513, + "step": 10911 + }, + { + "epoch": 0.8806391735937374, + "grad_norm": 0.6660044193267822, + "learning_rate": 8.644929750355249e-05, + "loss": 2.4996, + "step": 10912 + }, + { + "epoch": 0.8807198773303204, + "grad_norm": 0.7060455083847046, + "learning_rate": 8.643365638697828e-05, + "loss": 2.5497, + "step": 10913 + }, + { + "epoch": 0.8808005810669034, + "grad_norm": 0.6835277676582336, + "learning_rate": 8.641801560851281e-05, + "loss": 2.5198, + "step": 10914 + }, + { + "epoch": 0.8808812848034864, + "grad_norm": 0.6994042992591858, + "learning_rate": 8.640237516854595e-05, + "loss": 2.5692, + "step": 10915 + }, + { + "epoch": 0.8809619885400694, + "grad_norm": 0.6583377718925476, + "learning_rate": 8.63867350674674e-05, + "loss": 2.5025, + "step": 10916 + }, + { + "epoch": 0.8810426922766524, + "grad_norm": 0.6882332563400269, + "learning_rate": 8.637109530566698e-05, + "loss": 2.5343, + "step": 10917 + }, + { + "epoch": 0.8811233960132354, + "grad_norm": 0.6329876184463501, + "learning_rate": 8.635545588353449e-05, + "loss": 2.5335, + "step": 10918 + }, + { + "epoch": 0.8812040997498184, + "grad_norm": 0.713196337223053, + "learning_rate": 8.633981680145975e-05, + "loss": 2.4814, + "step": 10919 + }, + { + "epoch": 0.8812848034864014, + "grad_norm": 0.7388820648193359, + "learning_rate": 8.632417805983246e-05, + "loss": 2.4927, + "step": 10920 + }, + { + "epoch": 0.8813655072229845, + "grad_norm": 0.7316160798072815, + "learning_rate": 8.63085396590424e-05, + "loss": 2.508, + "step": 10921 + }, + { + "epoch": 0.8814462109595674, + "grad_norm": 0.6690139174461365, + "learning_rate": 8.629290159947934e-05, + "loss": 2.5719, + "step": 10922 + }, + { + "epoch": 0.8815269146961504, + "grad_norm": 0.6369553208351135, + "learning_rate": 8.627726388153297e-05, + "loss": 2.5277, + "step": 10923 + }, + { + "epoch": 0.8816076184327334, + "grad_norm": 0.6870365738868713, + "learning_rate": 8.626162650559306e-05, + "loss": 2.4731, + "step": 10924 + }, + { + "epoch": 0.8816883221693165, + "grad_norm": 0.6890872716903687, + "learning_rate": 8.624598947204938e-05, + "loss": 2.5417, + "step": 10925 + }, + { + "epoch": 0.8817690259058995, + "grad_norm": 0.6548230051994324, + "learning_rate": 8.623035278129156e-05, + "loss": 2.4888, + "step": 10926 + }, + { + "epoch": 0.8818497296424824, + "grad_norm": 0.6835262775421143, + "learning_rate": 8.621471643370933e-05, + "loss": 2.531, + "step": 10927 + }, + { + "epoch": 0.8819304333790654, + "grad_norm": 0.6910626292228699, + "learning_rate": 8.619908042969243e-05, + "loss": 2.4864, + "step": 10928 + }, + { + "epoch": 0.8820111371156485, + "grad_norm": 0.6727725267410278, + "learning_rate": 8.618344476963049e-05, + "loss": 2.5063, + "step": 10929 + }, + { + "epoch": 0.8820918408522315, + "grad_norm": 0.7285245656967163, + "learning_rate": 8.616780945391323e-05, + "loss": 2.5036, + "step": 10930 + }, + { + "epoch": 0.8821725445888144, + "grad_norm": 0.6561840176582336, + "learning_rate": 8.615217448293035e-05, + "loss": 2.5152, + "step": 10931 + }, + { + "epoch": 0.8822532483253974, + "grad_norm": 0.6524627208709717, + "learning_rate": 8.613653985707144e-05, + "loss": 2.4827, + "step": 10932 + }, + { + "epoch": 0.8823339520619805, + "grad_norm": 0.6815671920776367, + "learning_rate": 8.612090557672619e-05, + "loss": 2.5385, + "step": 10933 + }, + { + "epoch": 0.8824146557985635, + "grad_norm": 0.7479865550994873, + "learning_rate": 8.610527164228429e-05, + "loss": 2.5311, + "step": 10934 + }, + { + "epoch": 0.8824953595351465, + "grad_norm": 0.699504554271698, + "learning_rate": 8.608963805413535e-05, + "loss": 2.5332, + "step": 10935 + }, + { + "epoch": 0.8825760632717294, + "grad_norm": 0.7081198692321777, + "learning_rate": 8.607400481266896e-05, + "loss": 2.5636, + "step": 10936 + }, + { + "epoch": 0.8826567670083125, + "grad_norm": 0.7020730972290039, + "learning_rate": 8.605837191827478e-05, + "loss": 2.498, + "step": 10937 + }, + { + "epoch": 0.8827374707448955, + "grad_norm": 0.8004096150398254, + "learning_rate": 8.604273937134242e-05, + "loss": 2.5352, + "step": 10938 + }, + { + "epoch": 0.8828181744814785, + "grad_norm": 0.6399645209312439, + "learning_rate": 8.602710717226147e-05, + "loss": 2.5673, + "step": 10939 + }, + { + "epoch": 0.8828988782180615, + "grad_norm": 0.683195173740387, + "learning_rate": 8.601147532142153e-05, + "loss": 2.4812, + "step": 10940 + }, + { + "epoch": 0.8829795819546445, + "grad_norm": 0.7783642411231995, + "learning_rate": 8.599584381921224e-05, + "loss": 2.4812, + "step": 10941 + }, + { + "epoch": 0.8830602856912275, + "grad_norm": 0.7107423543930054, + "learning_rate": 8.598021266602308e-05, + "loss": 2.5527, + "step": 10942 + }, + { + "epoch": 0.8831409894278105, + "grad_norm": 0.6419345140457153, + "learning_rate": 8.596458186224365e-05, + "loss": 2.5642, + "step": 10943 + }, + { + "epoch": 0.8832216931643935, + "grad_norm": 0.6897309422492981, + "learning_rate": 8.59489514082636e-05, + "loss": 2.5743, + "step": 10944 + }, + { + "epoch": 0.8833023969009766, + "grad_norm": 0.6901495456695557, + "learning_rate": 8.593332130447236e-05, + "loss": 2.5139, + "step": 10945 + }, + { + "epoch": 0.8833831006375595, + "grad_norm": 0.6865388751029968, + "learning_rate": 8.591769155125953e-05, + "loss": 2.5281, + "step": 10946 + }, + { + "epoch": 0.8834638043741425, + "grad_norm": 0.7070403099060059, + "learning_rate": 8.590206214901465e-05, + "loss": 2.4648, + "step": 10947 + }, + { + "epoch": 0.8835445081107255, + "grad_norm": 0.6846395134925842, + "learning_rate": 8.588643309812721e-05, + "loss": 2.4792, + "step": 10948 + }, + { + "epoch": 0.8836252118473086, + "grad_norm": 0.6875495314598083, + "learning_rate": 8.587080439898675e-05, + "loss": 2.5126, + "step": 10949 + }, + { + "epoch": 0.8837059155838916, + "grad_norm": 0.670098066329956, + "learning_rate": 8.58551760519828e-05, + "loss": 2.4922, + "step": 10950 + }, + { + "epoch": 0.8837866193204745, + "grad_norm": 0.6675527691841125, + "learning_rate": 8.583954805750487e-05, + "loss": 2.499, + "step": 10951 + }, + { + "epoch": 0.8838673230570575, + "grad_norm": 0.6694127321243286, + "learning_rate": 8.582392041594236e-05, + "loss": 2.5286, + "step": 10952 + }, + { + "epoch": 0.8839480267936406, + "grad_norm": 0.7291092872619629, + "learning_rate": 8.580829312768482e-05, + "loss": 2.5705, + "step": 10953 + }, + { + "epoch": 0.8840287305302236, + "grad_norm": 0.709904670715332, + "learning_rate": 8.579266619312174e-05, + "loss": 2.5238, + "step": 10954 + }, + { + "epoch": 0.8841094342668065, + "grad_norm": 0.7037622332572937, + "learning_rate": 8.577703961264254e-05, + "loss": 2.5491, + "step": 10955 + }, + { + "epoch": 0.8841901380033895, + "grad_norm": 0.7553049325942993, + "learning_rate": 8.576141338663668e-05, + "loss": 2.5643, + "step": 10956 + }, + { + "epoch": 0.8842708417399726, + "grad_norm": 0.7177377343177795, + "learning_rate": 8.574578751549364e-05, + "loss": 2.49, + "step": 10957 + }, + { + "epoch": 0.8843515454765556, + "grad_norm": 0.682668149471283, + "learning_rate": 8.573016199960283e-05, + "loss": 2.5221, + "step": 10958 + }, + { + "epoch": 0.8844322492131386, + "grad_norm": 0.7508956789970398, + "learning_rate": 8.571453683935366e-05, + "loss": 2.5766, + "step": 10959 + }, + { + "epoch": 0.8845129529497215, + "grad_norm": 0.6495946645736694, + "learning_rate": 8.569891203513562e-05, + "loss": 2.534, + "step": 10960 + }, + { + "epoch": 0.8845936566863046, + "grad_norm": 0.7362824082374573, + "learning_rate": 8.568328758733806e-05, + "loss": 2.4614, + "step": 10961 + }, + { + "epoch": 0.8846743604228876, + "grad_norm": 0.6571496725082397, + "learning_rate": 8.566766349635037e-05, + "loss": 2.4393, + "step": 10962 + }, + { + "epoch": 0.8847550641594706, + "grad_norm": 0.7088329195976257, + "learning_rate": 8.5652039762562e-05, + "loss": 2.5476, + "step": 10963 + }, + { + "epoch": 0.8848357678960536, + "grad_norm": 0.6414440274238586, + "learning_rate": 8.56364163863623e-05, + "loss": 2.4668, + "step": 10964 + }, + { + "epoch": 0.8849164716326365, + "grad_norm": 0.7333478331565857, + "learning_rate": 8.562079336814063e-05, + "loss": 2.5151, + "step": 10965 + }, + { + "epoch": 0.8849971753692196, + "grad_norm": 0.638038694858551, + "learning_rate": 8.560517070828638e-05, + "loss": 2.5063, + "step": 10966 + }, + { + "epoch": 0.8850778791058026, + "grad_norm": 0.638921320438385, + "learning_rate": 8.558954840718896e-05, + "loss": 2.4769, + "step": 10967 + }, + { + "epoch": 0.8851585828423856, + "grad_norm": 0.6923465728759766, + "learning_rate": 8.557392646523759e-05, + "loss": 2.5388, + "step": 10968 + }, + { + "epoch": 0.8852392865789686, + "grad_norm": 0.7095212936401367, + "learning_rate": 8.555830488282169e-05, + "loss": 2.4955, + "step": 10969 + }, + { + "epoch": 0.8853199903155516, + "grad_norm": 0.689908504486084, + "learning_rate": 8.554268366033065e-05, + "loss": 2.4998, + "step": 10970 + }, + { + "epoch": 0.8854006940521346, + "grad_norm": 0.6551975011825562, + "learning_rate": 8.552706279815366e-05, + "loss": 2.4965, + "step": 10971 + }, + { + "epoch": 0.8854813977887176, + "grad_norm": 0.7239118218421936, + "learning_rate": 8.551144229668012e-05, + "loss": 2.5785, + "step": 10972 + }, + { + "epoch": 0.8855621015253006, + "grad_norm": 0.6743230819702148, + "learning_rate": 8.549582215629932e-05, + "loss": 2.5146, + "step": 10973 + }, + { + "epoch": 0.8856428052618837, + "grad_norm": 0.6991584300994873, + "learning_rate": 8.548020237740052e-05, + "loss": 2.5524, + "step": 10974 + }, + { + "epoch": 0.8857235089984666, + "grad_norm": 0.6605305075645447, + "learning_rate": 8.546458296037304e-05, + "loss": 2.5505, + "step": 10975 + }, + { + "epoch": 0.8858042127350496, + "grad_norm": 0.7011568546295166, + "learning_rate": 8.54489639056062e-05, + "loss": 2.4381, + "step": 10976 + }, + { + "epoch": 0.8858849164716326, + "grad_norm": 0.7015339136123657, + "learning_rate": 8.543334521348916e-05, + "loss": 2.5432, + "step": 10977 + }, + { + "epoch": 0.8859656202082157, + "grad_norm": 0.6892278790473938, + "learning_rate": 8.541772688441124e-05, + "loss": 2.5286, + "step": 10978 + }, + { + "epoch": 0.8860463239447987, + "grad_norm": 0.6680187582969666, + "learning_rate": 8.540210891876168e-05, + "loss": 2.439, + "step": 10979 + }, + { + "epoch": 0.8861270276813816, + "grad_norm": 0.7043240666389465, + "learning_rate": 8.538649131692975e-05, + "loss": 2.5558, + "step": 10980 + }, + { + "epoch": 0.8862077314179646, + "grad_norm": 0.6940229535102844, + "learning_rate": 8.537087407930463e-05, + "loss": 2.5219, + "step": 10981 + }, + { + "epoch": 0.8862884351545477, + "grad_norm": 0.6571553945541382, + "learning_rate": 8.535525720627558e-05, + "loss": 2.5054, + "step": 10982 + }, + { + "epoch": 0.8863691388911307, + "grad_norm": 0.6846656203269958, + "learning_rate": 8.533964069823182e-05, + "loss": 2.497, + "step": 10983 + }, + { + "epoch": 0.8864498426277136, + "grad_norm": 0.6838627457618713, + "learning_rate": 8.53240245555625e-05, + "loss": 2.5495, + "step": 10984 + }, + { + "epoch": 0.8865305463642966, + "grad_norm": 0.6825091242790222, + "learning_rate": 8.530840877865687e-05, + "loss": 2.5656, + "step": 10985 + }, + { + "epoch": 0.8866112501008797, + "grad_norm": 0.7368674278259277, + "learning_rate": 8.529279336790414e-05, + "loss": 2.5378, + "step": 10986 + }, + { + "epoch": 0.8866919538374627, + "grad_norm": 0.7333693504333496, + "learning_rate": 8.527717832369338e-05, + "loss": 2.506, + "step": 10987 + }, + { + "epoch": 0.8867726575740457, + "grad_norm": 0.6623306274414062, + "learning_rate": 8.526156364641384e-05, + "loss": 2.4824, + "step": 10988 + }, + { + "epoch": 0.8868533613106286, + "grad_norm": 0.6863973140716553, + "learning_rate": 8.524594933645468e-05, + "loss": 2.536, + "step": 10989 + }, + { + "epoch": 0.8869340650472117, + "grad_norm": 0.6805100440979004, + "learning_rate": 8.523033539420501e-05, + "loss": 2.4954, + "step": 10990 + }, + { + "epoch": 0.8870147687837947, + "grad_norm": 0.6672216653823853, + "learning_rate": 8.521472182005399e-05, + "loss": 2.4893, + "step": 10991 + }, + { + "epoch": 0.8870954725203777, + "grad_norm": 0.7310158610343933, + "learning_rate": 8.519910861439079e-05, + "loss": 2.5317, + "step": 10992 + }, + { + "epoch": 0.8871761762569607, + "grad_norm": 0.6820743083953857, + "learning_rate": 8.518349577760445e-05, + "loss": 2.4482, + "step": 10993 + }, + { + "epoch": 0.8872568799935437, + "grad_norm": 0.6660269498825073, + "learning_rate": 8.516788331008411e-05, + "loss": 2.5353, + "step": 10994 + }, + { + "epoch": 0.8873375837301267, + "grad_norm": 0.676243007183075, + "learning_rate": 8.51522712122189e-05, + "loss": 2.531, + "step": 10995 + }, + { + "epoch": 0.8874182874667097, + "grad_norm": 0.6677152514457703, + "learning_rate": 8.513665948439796e-05, + "loss": 2.4732, + "step": 10996 + }, + { + "epoch": 0.8874989912032927, + "grad_norm": 0.7341045141220093, + "learning_rate": 8.512104812701027e-05, + "loss": 2.5668, + "step": 10997 + }, + { + "epoch": 0.8875796949398758, + "grad_norm": 0.6475326418876648, + "learning_rate": 8.510543714044496e-05, + "loss": 2.5026, + "step": 10998 + }, + { + "epoch": 0.8876603986764587, + "grad_norm": 0.7335529923439026, + "learning_rate": 8.50898265250911e-05, + "loss": 2.4946, + "step": 10999 + }, + { + "epoch": 0.8877411024130417, + "grad_norm": 0.760108232498169, + "learning_rate": 8.507421628133772e-05, + "loss": 2.5697, + "step": 11000 + }, + { + "epoch": 0.8877411024130417, + "eval_loss": 2.450413465499878, + "eval_runtime": 975.281, + "eval_samples_per_second": 2.686, + "eval_steps_per_second": 0.448, + "step": 11000 + }, + { + "epoch": 0.8878218061496247, + "grad_norm": 0.6420160531997681, + "learning_rate": 8.505860640957391e-05, + "loss": 2.5842, + "step": 11001 + }, + { + "epoch": 0.8879025098862078, + "grad_norm": 0.6625204086303711, + "learning_rate": 8.50429969101887e-05, + "loss": 2.4771, + "step": 11002 + }, + { + "epoch": 0.8879832136227908, + "grad_norm": 0.7430149912834167, + "learning_rate": 8.502738778357107e-05, + "loss": 2.5509, + "step": 11003 + }, + { + "epoch": 0.8880639173593737, + "grad_norm": 0.663624107837677, + "learning_rate": 8.501177903011008e-05, + "loss": 2.504, + "step": 11004 + }, + { + "epoch": 0.8881446210959567, + "grad_norm": 0.6638087630271912, + "learning_rate": 8.499617065019476e-05, + "loss": 2.492, + "step": 11005 + }, + { + "epoch": 0.8882253248325398, + "grad_norm": 0.7321780323982239, + "learning_rate": 8.498056264421406e-05, + "loss": 2.5808, + "step": 11006 + }, + { + "epoch": 0.8883060285691228, + "grad_norm": 0.7108619809150696, + "learning_rate": 8.4964955012557e-05, + "loss": 2.6185, + "step": 11007 + }, + { + "epoch": 0.8883867323057058, + "grad_norm": 0.6745856404304504, + "learning_rate": 8.494934775561258e-05, + "loss": 2.576, + "step": 11008 + }, + { + "epoch": 0.8884674360422887, + "grad_norm": 0.8002225756645203, + "learning_rate": 8.493374087376976e-05, + "loss": 2.5598, + "step": 11009 + }, + { + "epoch": 0.8885481397788718, + "grad_norm": 0.6848840713500977, + "learning_rate": 8.491813436741746e-05, + "loss": 2.5218, + "step": 11010 + }, + { + "epoch": 0.8886288435154548, + "grad_norm": 0.6464105248451233, + "learning_rate": 8.490252823694471e-05, + "loss": 2.5503, + "step": 11011 + }, + { + "epoch": 0.8887095472520378, + "grad_norm": 0.7165790796279907, + "learning_rate": 8.488692248274045e-05, + "loss": 2.5104, + "step": 11012 + }, + { + "epoch": 0.8887902509886207, + "grad_norm": 0.6832898259162903, + "learning_rate": 8.487131710519355e-05, + "loss": 2.5379, + "step": 11013 + }, + { + "epoch": 0.8888709547252038, + "grad_norm": 0.6992432475090027, + "learning_rate": 8.485571210469296e-05, + "loss": 2.5388, + "step": 11014 + }, + { + "epoch": 0.8889516584617868, + "grad_norm": 0.6410119533538818, + "learning_rate": 8.484010748162765e-05, + "loss": 2.5237, + "step": 11015 + }, + { + "epoch": 0.8890323621983698, + "grad_norm": 0.716248095035553, + "learning_rate": 8.482450323638647e-05, + "loss": 2.4977, + "step": 11016 + }, + { + "epoch": 0.8891130659349528, + "grad_norm": 0.6620567440986633, + "learning_rate": 8.480889936935833e-05, + "loss": 2.5088, + "step": 11017 + }, + { + "epoch": 0.8891937696715357, + "grad_norm": 0.7311015129089355, + "learning_rate": 8.479329588093217e-05, + "loss": 2.5547, + "step": 11018 + }, + { + "epoch": 0.8892744734081188, + "grad_norm": 0.757203996181488, + "learning_rate": 8.477769277149676e-05, + "loss": 2.5681, + "step": 11019 + }, + { + "epoch": 0.8893551771447018, + "grad_norm": 0.6941282153129578, + "learning_rate": 8.476209004144107e-05, + "loss": 2.5078, + "step": 11020 + }, + { + "epoch": 0.8894358808812848, + "grad_norm": 0.6381667256355286, + "learning_rate": 8.474648769115396e-05, + "loss": 2.5371, + "step": 11021 + }, + { + "epoch": 0.8895165846178678, + "grad_norm": 0.7978621125221252, + "learning_rate": 8.473088572102422e-05, + "loss": 2.5384, + "step": 11022 + }, + { + "epoch": 0.8895972883544508, + "grad_norm": 0.7229189872741699, + "learning_rate": 8.471528413144072e-05, + "loss": 2.5469, + "step": 11023 + }, + { + "epoch": 0.8896779920910338, + "grad_norm": 0.705545961856842, + "learning_rate": 8.469968292279231e-05, + "loss": 2.5281, + "step": 11024 + }, + { + "epoch": 0.8897586958276168, + "grad_norm": 0.7259972095489502, + "learning_rate": 8.468408209546777e-05, + "loss": 2.5485, + "step": 11025 + }, + { + "epoch": 0.8898393995641998, + "grad_norm": 0.6859608888626099, + "learning_rate": 8.466848164985594e-05, + "loss": 2.5548, + "step": 11026 + }, + { + "epoch": 0.8899201033007829, + "grad_norm": 0.7036644816398621, + "learning_rate": 8.465288158634565e-05, + "loss": 2.5159, + "step": 11027 + }, + { + "epoch": 0.8900008070373658, + "grad_norm": 0.6899380087852478, + "learning_rate": 8.463728190532569e-05, + "loss": 2.5037, + "step": 11028 + }, + { + "epoch": 0.8900815107739488, + "grad_norm": 0.7428410649299622, + "learning_rate": 8.462168260718477e-05, + "loss": 2.5074, + "step": 11029 + }, + { + "epoch": 0.8901622145105318, + "grad_norm": 0.6724158525466919, + "learning_rate": 8.460608369231173e-05, + "loss": 2.5544, + "step": 11030 + }, + { + "epoch": 0.8902429182471149, + "grad_norm": 0.6516450643539429, + "learning_rate": 8.459048516109535e-05, + "loss": 2.5152, + "step": 11031 + }, + { + "epoch": 0.8903236219836979, + "grad_norm": 0.7013405561447144, + "learning_rate": 8.457488701392434e-05, + "loss": 2.5116, + "step": 11032 + }, + { + "epoch": 0.8904043257202808, + "grad_norm": 0.7207479476928711, + "learning_rate": 8.455928925118747e-05, + "loss": 2.6041, + "step": 11033 + }, + { + "epoch": 0.8904850294568638, + "grad_norm": 0.69600510597229, + "learning_rate": 8.454369187327348e-05, + "loss": 2.5794, + "step": 11034 + }, + { + "epoch": 0.8905657331934469, + "grad_norm": 0.6831288933753967, + "learning_rate": 8.452809488057108e-05, + "loss": 2.4682, + "step": 11035 + }, + { + "epoch": 0.8906464369300299, + "grad_norm": 0.6978991627693176, + "learning_rate": 8.451249827346901e-05, + "loss": 2.4862, + "step": 11036 + }, + { + "epoch": 0.8907271406666128, + "grad_norm": 0.6772337555885315, + "learning_rate": 8.4496902052356e-05, + "loss": 2.5357, + "step": 11037 + }, + { + "epoch": 0.8908078444031958, + "grad_norm": 0.6735778450965881, + "learning_rate": 8.448130621762067e-05, + "loss": 2.5115, + "step": 11038 + }, + { + "epoch": 0.8908885481397789, + "grad_norm": 0.6695345044136047, + "learning_rate": 8.446571076965177e-05, + "loss": 2.5083, + "step": 11039 + }, + { + "epoch": 0.8909692518763619, + "grad_norm": 0.685343325138092, + "learning_rate": 8.445011570883796e-05, + "loss": 2.5221, + "step": 11040 + }, + { + "epoch": 0.8910499556129449, + "grad_norm": 0.7030319571495056, + "learning_rate": 8.443452103556792e-05, + "loss": 2.5708, + "step": 11041 + }, + { + "epoch": 0.8911306593495278, + "grad_norm": 0.6910343766212463, + "learning_rate": 8.441892675023029e-05, + "loss": 2.5373, + "step": 11042 + }, + { + "epoch": 0.8912113630861109, + "grad_norm": 0.7207868099212646, + "learning_rate": 8.440333285321374e-05, + "loss": 2.5862, + "step": 11043 + }, + { + "epoch": 0.8912920668226939, + "grad_norm": 0.6780788898468018, + "learning_rate": 8.438773934490692e-05, + "loss": 2.562, + "step": 11044 + }, + { + "epoch": 0.8913727705592769, + "grad_norm": 0.7010074257850647, + "learning_rate": 8.437214622569842e-05, + "loss": 2.4556, + "step": 11045 + }, + { + "epoch": 0.8914534742958599, + "grad_norm": 0.6763667464256287, + "learning_rate": 8.435655349597689e-05, + "loss": 2.5402, + "step": 11046 + }, + { + "epoch": 0.891534178032443, + "grad_norm": 0.6870944499969482, + "learning_rate": 8.4340961156131e-05, + "loss": 2.5307, + "step": 11047 + }, + { + "epoch": 0.8916148817690259, + "grad_norm": 0.7835623025894165, + "learning_rate": 8.432536920654923e-05, + "loss": 2.4974, + "step": 11048 + }, + { + "epoch": 0.8916955855056089, + "grad_norm": 0.7551318407058716, + "learning_rate": 8.430977764762024e-05, + "loss": 2.5206, + "step": 11049 + }, + { + "epoch": 0.8917762892421919, + "grad_norm": 0.6486842632293701, + "learning_rate": 8.429418647973265e-05, + "loss": 2.4909, + "step": 11050 + }, + { + "epoch": 0.891856992978775, + "grad_norm": 0.6894064545631409, + "learning_rate": 8.427859570327494e-05, + "loss": 2.5846, + "step": 11051 + }, + { + "epoch": 0.8919376967153579, + "grad_norm": 0.7597395181655884, + "learning_rate": 8.426300531863571e-05, + "loss": 2.5259, + "step": 11052 + }, + { + "epoch": 0.8920184004519409, + "grad_norm": 0.6784652471542358, + "learning_rate": 8.42474153262036e-05, + "loss": 2.5048, + "step": 11053 + }, + { + "epoch": 0.8920991041885239, + "grad_norm": 0.7703847885131836, + "learning_rate": 8.4231825726367e-05, + "loss": 2.4962, + "step": 11054 + }, + { + "epoch": 0.892179807925107, + "grad_norm": 0.6646561026573181, + "learning_rate": 8.421623651951454e-05, + "loss": 2.491, + "step": 11055 + }, + { + "epoch": 0.89226051166169, + "grad_norm": 0.6901054978370667, + "learning_rate": 8.420064770603475e-05, + "loss": 2.515, + "step": 11056 + }, + { + "epoch": 0.8923412153982729, + "grad_norm": 0.6789328455924988, + "learning_rate": 8.41850592863161e-05, + "loss": 2.5481, + "step": 11057 + }, + { + "epoch": 0.8924219191348559, + "grad_norm": 0.6211017370223999, + "learning_rate": 8.41694712607471e-05, + "loss": 2.51, + "step": 11058 + }, + { + "epoch": 0.892502622871439, + "grad_norm": 0.6482260823249817, + "learning_rate": 8.415388362971626e-05, + "loss": 2.5418, + "step": 11059 + }, + { + "epoch": 0.892583326608022, + "grad_norm": 0.7627651691436768, + "learning_rate": 8.413829639361209e-05, + "loss": 2.5033, + "step": 11060 + }, + { + "epoch": 0.892664030344605, + "grad_norm": 0.6560852527618408, + "learning_rate": 8.412270955282302e-05, + "loss": 2.5442, + "step": 11061 + }, + { + "epoch": 0.8927447340811879, + "grad_norm": 0.7479087114334106, + "learning_rate": 8.410712310773752e-05, + "loss": 2.5189, + "step": 11062 + }, + { + "epoch": 0.892825437817771, + "grad_norm": 0.6970879435539246, + "learning_rate": 8.409153705874411e-05, + "loss": 2.5418, + "step": 11063 + }, + { + "epoch": 0.892906141554354, + "grad_norm": 0.6514548659324646, + "learning_rate": 8.407595140623113e-05, + "loss": 2.5277, + "step": 11064 + }, + { + "epoch": 0.892986845290937, + "grad_norm": 0.6745554804801941, + "learning_rate": 8.406036615058707e-05, + "loss": 2.5085, + "step": 11065 + }, + { + "epoch": 0.89306754902752, + "grad_norm": 0.7510363459587097, + "learning_rate": 8.404478129220037e-05, + "loss": 2.4941, + "step": 11066 + }, + { + "epoch": 0.8931482527641029, + "grad_norm": 0.6531470417976379, + "learning_rate": 8.402919683145941e-05, + "loss": 2.5363, + "step": 11067 + }, + { + "epoch": 0.893228956500686, + "grad_norm": 0.6861493587493896, + "learning_rate": 8.401361276875262e-05, + "loss": 2.6369, + "step": 11068 + }, + { + "epoch": 0.893309660237269, + "grad_norm": 0.6029497981071472, + "learning_rate": 8.39980291044684e-05, + "loss": 2.4953, + "step": 11069 + }, + { + "epoch": 0.893390363973852, + "grad_norm": 0.6831715106964111, + "learning_rate": 8.39824458389951e-05, + "loss": 2.5074, + "step": 11070 + }, + { + "epoch": 0.8934710677104349, + "grad_norm": 0.7076299786567688, + "learning_rate": 8.396686297272112e-05, + "loss": 2.5934, + "step": 11071 + }, + { + "epoch": 0.893551771447018, + "grad_norm": 0.6941438913345337, + "learning_rate": 8.395128050603487e-05, + "loss": 2.5338, + "step": 11072 + }, + { + "epoch": 0.893632475183601, + "grad_norm": 0.6867249011993408, + "learning_rate": 8.393569843932463e-05, + "loss": 2.5311, + "step": 11073 + }, + { + "epoch": 0.893713178920184, + "grad_norm": 0.623991847038269, + "learning_rate": 8.392011677297877e-05, + "loss": 2.5133, + "step": 11074 + }, + { + "epoch": 0.893793882656767, + "grad_norm": 0.6808422803878784, + "learning_rate": 8.390453550738564e-05, + "loss": 2.5398, + "step": 11075 + }, + { + "epoch": 0.89387458639335, + "grad_norm": 0.7136701345443726, + "learning_rate": 8.388895464293357e-05, + "loss": 2.5415, + "step": 11076 + }, + { + "epoch": 0.893955290129933, + "grad_norm": 0.6814287304878235, + "learning_rate": 8.387337418001084e-05, + "loss": 2.4782, + "step": 11077 + }, + { + "epoch": 0.894035993866516, + "grad_norm": 0.8101940155029297, + "learning_rate": 8.385779411900579e-05, + "loss": 2.5292, + "step": 11078 + }, + { + "epoch": 0.894116697603099, + "grad_norm": 0.7106796503067017, + "learning_rate": 8.384221446030676e-05, + "loss": 2.5819, + "step": 11079 + }, + { + "epoch": 0.8941974013396821, + "grad_norm": 0.7840015292167664, + "learning_rate": 8.382663520430191e-05, + "loss": 2.5243, + "step": 11080 + }, + { + "epoch": 0.894278105076265, + "grad_norm": 0.7037288546562195, + "learning_rate": 8.381105635137959e-05, + "loss": 2.5606, + "step": 11081 + }, + { + "epoch": 0.894358808812848, + "grad_norm": 0.671558678150177, + "learning_rate": 8.379547790192812e-05, + "loss": 2.4923, + "step": 11082 + }, + { + "epoch": 0.894439512549431, + "grad_norm": 0.6789675951004028, + "learning_rate": 8.377989985633567e-05, + "loss": 2.5281, + "step": 11083 + }, + { + "epoch": 0.8945202162860141, + "grad_norm": 0.6777840852737427, + "learning_rate": 8.37643222149905e-05, + "loss": 2.5159, + "step": 11084 + }, + { + "epoch": 0.8946009200225971, + "grad_norm": 0.6920693516731262, + "learning_rate": 8.374874497828089e-05, + "loss": 2.4952, + "step": 11085 + }, + { + "epoch": 0.89468162375918, + "grad_norm": 0.7394022941589355, + "learning_rate": 8.373316814659502e-05, + "loss": 2.5035, + "step": 11086 + }, + { + "epoch": 0.894762327495763, + "grad_norm": 0.625960648059845, + "learning_rate": 8.37175917203211e-05, + "loss": 2.5324, + "step": 11087 + }, + { + "epoch": 0.8948430312323461, + "grad_norm": 0.6848758459091187, + "learning_rate": 8.370201569984742e-05, + "loss": 2.5312, + "step": 11088 + }, + { + "epoch": 0.8949237349689291, + "grad_norm": 0.7207037210464478, + "learning_rate": 8.368644008556205e-05, + "loss": 2.5807, + "step": 11089 + }, + { + "epoch": 0.895004438705512, + "grad_norm": 0.7582261562347412, + "learning_rate": 8.367086487785326e-05, + "loss": 2.532, + "step": 11090 + }, + { + "epoch": 0.895085142442095, + "grad_norm": 0.6916806101799011, + "learning_rate": 8.36552900771092e-05, + "loss": 2.4772, + "step": 11091 + }, + { + "epoch": 0.8951658461786781, + "grad_norm": 0.6457386016845703, + "learning_rate": 8.363971568371805e-05, + "loss": 2.4952, + "step": 11092 + }, + { + "epoch": 0.8952465499152611, + "grad_norm": 0.7006754279136658, + "learning_rate": 8.362414169806792e-05, + "loss": 2.5818, + "step": 11093 + }, + { + "epoch": 0.8953272536518441, + "grad_norm": 0.6939932703971863, + "learning_rate": 8.3608568120547e-05, + "loss": 2.5411, + "step": 11094 + }, + { + "epoch": 0.895407957388427, + "grad_norm": 0.6314546465873718, + "learning_rate": 8.359299495154343e-05, + "loss": 2.5408, + "step": 11095 + }, + { + "epoch": 0.8954886611250101, + "grad_norm": 0.7202826738357544, + "learning_rate": 8.357742219144529e-05, + "loss": 2.4925, + "step": 11096 + }, + { + "epoch": 0.8955693648615931, + "grad_norm": 0.6475295424461365, + "learning_rate": 8.356184984064071e-05, + "loss": 2.5023, + "step": 11097 + }, + { + "epoch": 0.8956500685981761, + "grad_norm": 0.6161238551139832, + "learning_rate": 8.354627789951785e-05, + "loss": 2.5053, + "step": 11098 + }, + { + "epoch": 0.8957307723347591, + "grad_norm": 0.6919825077056885, + "learning_rate": 8.353070636846472e-05, + "loss": 2.5387, + "step": 11099 + }, + { + "epoch": 0.8958114760713421, + "grad_norm": 0.6374878883361816, + "learning_rate": 8.351513524786944e-05, + "loss": 2.5526, + "step": 11100 + }, + { + "epoch": 0.8958921798079251, + "grad_norm": 0.7041093707084656, + "learning_rate": 8.349956453812009e-05, + "loss": 2.5282, + "step": 11101 + }, + { + "epoch": 0.8959728835445081, + "grad_norm": 0.7252324819564819, + "learning_rate": 8.348399423960471e-05, + "loss": 2.5723, + "step": 11102 + }, + { + "epoch": 0.8960535872810911, + "grad_norm": 0.681682825088501, + "learning_rate": 8.346842435271137e-05, + "loss": 2.5284, + "step": 11103 + }, + { + "epoch": 0.8961342910176742, + "grad_norm": 0.7293850183486938, + "learning_rate": 8.34528548778281e-05, + "loss": 2.5014, + "step": 11104 + }, + { + "epoch": 0.8962149947542571, + "grad_norm": 0.7057846188545227, + "learning_rate": 8.343728581534299e-05, + "loss": 2.5502, + "step": 11105 + }, + { + "epoch": 0.8962956984908401, + "grad_norm": 0.6740830540657043, + "learning_rate": 8.342171716564398e-05, + "loss": 2.5205, + "step": 11106 + }, + { + "epoch": 0.8963764022274231, + "grad_norm": 0.6917470097541809, + "learning_rate": 8.340614892911907e-05, + "loss": 2.5216, + "step": 11107 + }, + { + "epoch": 0.8964571059640062, + "grad_norm": 0.7495635151863098, + "learning_rate": 8.339058110615638e-05, + "loss": 2.5509, + "step": 11108 + }, + { + "epoch": 0.8965378097005892, + "grad_norm": 0.6687765717506409, + "learning_rate": 8.33750136971438e-05, + "loss": 2.5286, + "step": 11109 + }, + { + "epoch": 0.8966185134371721, + "grad_norm": 0.6901381015777588, + "learning_rate": 8.335944670246931e-05, + "loss": 2.5545, + "step": 11110 + }, + { + "epoch": 0.8966992171737551, + "grad_norm": 0.6645506024360657, + "learning_rate": 8.334388012252094e-05, + "loss": 2.4883, + "step": 11111 + }, + { + "epoch": 0.8967799209103382, + "grad_norm": 0.6427997350692749, + "learning_rate": 8.332831395768662e-05, + "loss": 2.5103, + "step": 11112 + }, + { + "epoch": 0.8968606246469212, + "grad_norm": 0.7224035263061523, + "learning_rate": 8.331274820835425e-05, + "loss": 2.5086, + "step": 11113 + }, + { + "epoch": 0.8969413283835042, + "grad_norm": 0.6918233036994934, + "learning_rate": 8.329718287491188e-05, + "loss": 2.5222, + "step": 11114 + }, + { + "epoch": 0.8970220321200871, + "grad_norm": 0.735583484172821, + "learning_rate": 8.328161795774734e-05, + "loss": 2.5277, + "step": 11115 + }, + { + "epoch": 0.8971027358566702, + "grad_norm": 0.6624864339828491, + "learning_rate": 8.326605345724857e-05, + "loss": 2.532, + "step": 11116 + }, + { + "epoch": 0.8971834395932532, + "grad_norm": 0.6227770447731018, + "learning_rate": 8.325048937380352e-05, + "loss": 2.5386, + "step": 11117 + }, + { + "epoch": 0.8972641433298362, + "grad_norm": 0.6483022570610046, + "learning_rate": 8.323492570780004e-05, + "loss": 2.4958, + "step": 11118 + }, + { + "epoch": 0.8973448470664191, + "grad_norm": 0.7072618007659912, + "learning_rate": 8.321936245962602e-05, + "loss": 2.4931, + "step": 11119 + }, + { + "epoch": 0.8974255508030021, + "grad_norm": 0.6848764419555664, + "learning_rate": 8.320379962966937e-05, + "loss": 2.4549, + "step": 11120 + }, + { + "epoch": 0.8975062545395852, + "grad_norm": 0.6819620132446289, + "learning_rate": 8.318823721831795e-05, + "loss": 2.5156, + "step": 11121 + }, + { + "epoch": 0.8975869582761682, + "grad_norm": 0.6834476590156555, + "learning_rate": 8.31726752259596e-05, + "loss": 2.507, + "step": 11122 + }, + { + "epoch": 0.8976676620127512, + "grad_norm": 0.6785772442817688, + "learning_rate": 8.315711365298214e-05, + "loss": 2.5086, + "step": 11123 + }, + { + "epoch": 0.8977483657493341, + "grad_norm": 0.6303566098213196, + "learning_rate": 8.314155249977351e-05, + "loss": 2.5087, + "step": 11124 + }, + { + "epoch": 0.8978290694859172, + "grad_norm": 0.6544361710548401, + "learning_rate": 8.31259917667214e-05, + "loss": 2.505, + "step": 11125 + }, + { + "epoch": 0.8979097732225002, + "grad_norm": 0.8135818243026733, + "learning_rate": 8.311043145421369e-05, + "loss": 2.5139, + "step": 11126 + }, + { + "epoch": 0.8979904769590832, + "grad_norm": 0.6744341254234314, + "learning_rate": 8.309487156263818e-05, + "loss": 2.4797, + "step": 11127 + }, + { + "epoch": 0.8980711806956662, + "grad_norm": 0.6138790845870972, + "learning_rate": 8.307931209238267e-05, + "loss": 2.5334, + "step": 11128 + }, + { + "epoch": 0.8981518844322492, + "grad_norm": 0.702434241771698, + "learning_rate": 8.306375304383492e-05, + "loss": 2.5343, + "step": 11129 + }, + { + "epoch": 0.8982325881688322, + "grad_norm": 0.6787155270576477, + "learning_rate": 8.304819441738275e-05, + "loss": 2.507, + "step": 11130 + }, + { + "epoch": 0.8983132919054152, + "grad_norm": 0.6963719129562378, + "learning_rate": 8.303263621341386e-05, + "loss": 2.5238, + "step": 11131 + }, + { + "epoch": 0.8983939956419982, + "grad_norm": 0.6623271107673645, + "learning_rate": 8.3017078432316e-05, + "loss": 2.5206, + "step": 11132 + }, + { + "epoch": 0.8984746993785813, + "grad_norm": 0.777222752571106, + "learning_rate": 8.300152107447701e-05, + "loss": 2.5004, + "step": 11133 + }, + { + "epoch": 0.8985554031151642, + "grad_norm": 0.6788455247879028, + "learning_rate": 8.29859641402845e-05, + "loss": 2.5735, + "step": 11134 + }, + { + "epoch": 0.8986361068517472, + "grad_norm": 0.6595063209533691, + "learning_rate": 8.297040763012624e-05, + "loss": 2.4988, + "step": 11135 + }, + { + "epoch": 0.8987168105883302, + "grad_norm": 0.7105697989463806, + "learning_rate": 8.295485154438994e-05, + "loss": 2.5531, + "step": 11136 + }, + { + "epoch": 0.8987975143249133, + "grad_norm": 0.6884949803352356, + "learning_rate": 8.29392958834633e-05, + "loss": 2.5158, + "step": 11137 + }, + { + "epoch": 0.8988782180614963, + "grad_norm": 0.7178345322608948, + "learning_rate": 8.2923740647734e-05, + "loss": 2.5836, + "step": 11138 + }, + { + "epoch": 0.8989589217980792, + "grad_norm": 0.7000541687011719, + "learning_rate": 8.290818583758973e-05, + "loss": 2.5345, + "step": 11139 + }, + { + "epoch": 0.8990396255346622, + "grad_norm": 0.6808128952980042, + "learning_rate": 8.289263145341816e-05, + "loss": 2.5227, + "step": 11140 + }, + { + "epoch": 0.8991203292712453, + "grad_norm": 0.7047473788261414, + "learning_rate": 8.287707749560691e-05, + "loss": 2.477, + "step": 11141 + }, + { + "epoch": 0.8992010330078283, + "grad_norm": 0.6654812693595886, + "learning_rate": 8.286152396454365e-05, + "loss": 2.4575, + "step": 11142 + }, + { + "epoch": 0.8992817367444113, + "grad_norm": 0.6690360307693481, + "learning_rate": 8.284597086061603e-05, + "loss": 2.4755, + "step": 11143 + }, + { + "epoch": 0.8993624404809942, + "grad_norm": 0.7270147204399109, + "learning_rate": 8.283041818421164e-05, + "loss": 2.5893, + "step": 11144 + }, + { + "epoch": 0.8994431442175773, + "grad_norm": 0.5977498888969421, + "learning_rate": 8.28148659357181e-05, + "loss": 2.5108, + "step": 11145 + }, + { + "epoch": 0.8995238479541603, + "grad_norm": 0.694593071937561, + "learning_rate": 8.279931411552307e-05, + "loss": 2.5036, + "step": 11146 + }, + { + "epoch": 0.8996045516907433, + "grad_norm": 0.7395440936088562, + "learning_rate": 8.278376272401404e-05, + "loss": 2.5244, + "step": 11147 + }, + { + "epoch": 0.8996852554273262, + "grad_norm": 0.6483517289161682, + "learning_rate": 8.276821176157867e-05, + "loss": 2.5619, + "step": 11148 + }, + { + "epoch": 0.8997659591639093, + "grad_norm": 0.6996768116950989, + "learning_rate": 8.275266122860454e-05, + "loss": 2.5275, + "step": 11149 + }, + { + "epoch": 0.8998466629004923, + "grad_norm": 0.661122739315033, + "learning_rate": 8.273711112547914e-05, + "loss": 2.5053, + "step": 11150 + }, + { + "epoch": 0.8999273666370753, + "grad_norm": 0.6919111609458923, + "learning_rate": 8.272156145259006e-05, + "loss": 2.578, + "step": 11151 + }, + { + "epoch": 0.9000080703736583, + "grad_norm": 0.6680958867073059, + "learning_rate": 8.270601221032482e-05, + "loss": 2.4942, + "step": 11152 + }, + { + "epoch": 0.9000887741102414, + "grad_norm": 0.6782989501953125, + "learning_rate": 8.269046339907101e-05, + "loss": 2.5461, + "step": 11153 + }, + { + "epoch": 0.9001694778468243, + "grad_norm": 0.743468165397644, + "learning_rate": 8.267491501921605e-05, + "loss": 2.629, + "step": 11154 + }, + { + "epoch": 0.9002501815834073, + "grad_norm": 0.709562361240387, + "learning_rate": 8.265936707114751e-05, + "loss": 2.566, + "step": 11155 + }, + { + "epoch": 0.9003308853199903, + "grad_norm": 0.7075676918029785, + "learning_rate": 8.264381955525291e-05, + "loss": 2.5409, + "step": 11156 + }, + { + "epoch": 0.9004115890565734, + "grad_norm": 0.7021335959434509, + "learning_rate": 8.262827247191963e-05, + "loss": 2.5606, + "step": 11157 + }, + { + "epoch": 0.9004922927931563, + "grad_norm": 0.6507331132888794, + "learning_rate": 8.261272582153524e-05, + "loss": 2.5557, + "step": 11158 + }, + { + "epoch": 0.9005729965297393, + "grad_norm": 0.7182760238647461, + "learning_rate": 8.25971796044872e-05, + "loss": 2.5567, + "step": 11159 + }, + { + "epoch": 0.9006537002663223, + "grad_norm": 0.6632338762283325, + "learning_rate": 8.258163382116291e-05, + "loss": 2.5081, + "step": 11160 + }, + { + "epoch": 0.9007344040029054, + "grad_norm": 0.6889928579330444, + "learning_rate": 8.256608847194983e-05, + "loss": 2.5034, + "step": 11161 + }, + { + "epoch": 0.9008151077394884, + "grad_norm": 0.6374824047088623, + "learning_rate": 8.255054355723542e-05, + "loss": 2.4826, + "step": 11162 + }, + { + "epoch": 0.9008958114760713, + "grad_norm": 0.7100771069526672, + "learning_rate": 8.253499907740706e-05, + "loss": 2.4666, + "step": 11163 + }, + { + "epoch": 0.9009765152126543, + "grad_norm": 0.8141123652458191, + "learning_rate": 8.251945503285218e-05, + "loss": 2.5339, + "step": 11164 + }, + { + "epoch": 0.9010572189492374, + "grad_norm": 0.6621670722961426, + "learning_rate": 8.250391142395822e-05, + "loss": 2.4805, + "step": 11165 + }, + { + "epoch": 0.9011379226858204, + "grad_norm": 0.6624772548675537, + "learning_rate": 8.248836825111245e-05, + "loss": 2.5148, + "step": 11166 + }, + { + "epoch": 0.9012186264224034, + "grad_norm": 0.6783565282821655, + "learning_rate": 8.247282551470235e-05, + "loss": 2.4481, + "step": 11167 + }, + { + "epoch": 0.9012993301589863, + "grad_norm": 0.700089156627655, + "learning_rate": 8.245728321511525e-05, + "loss": 2.5649, + "step": 11168 + }, + { + "epoch": 0.9013800338955693, + "grad_norm": 0.6765339970588684, + "learning_rate": 8.244174135273852e-05, + "loss": 2.5221, + "step": 11169 + }, + { + "epoch": 0.9014607376321524, + "grad_norm": 0.6896056532859802, + "learning_rate": 8.242619992795948e-05, + "loss": 2.4742, + "step": 11170 + }, + { + "epoch": 0.9015414413687354, + "grad_norm": 0.7134374976158142, + "learning_rate": 8.241065894116547e-05, + "loss": 2.5231, + "step": 11171 + }, + { + "epoch": 0.9016221451053184, + "grad_norm": 0.6939442753791809, + "learning_rate": 8.239511839274385e-05, + "loss": 2.5159, + "step": 11172 + }, + { + "epoch": 0.9017028488419013, + "grad_norm": 0.6780345439910889, + "learning_rate": 8.237957828308187e-05, + "loss": 2.5474, + "step": 11173 + }, + { + "epoch": 0.9017835525784844, + "grad_norm": 0.6532382965087891, + "learning_rate": 8.236403861256687e-05, + "loss": 2.4982, + "step": 11174 + }, + { + "epoch": 0.9018642563150674, + "grad_norm": 0.6918137073516846, + "learning_rate": 8.234849938158615e-05, + "loss": 2.4657, + "step": 11175 + }, + { + "epoch": 0.9019449600516504, + "grad_norm": 0.6838762164115906, + "learning_rate": 8.233296059052695e-05, + "loss": 2.5405, + "step": 11176 + }, + { + "epoch": 0.9020256637882333, + "grad_norm": 0.7560290098190308, + "learning_rate": 8.231742223977653e-05, + "loss": 2.5379, + "step": 11177 + }, + { + "epoch": 0.9021063675248164, + "grad_norm": 0.6673319339752197, + "learning_rate": 8.230188432972221e-05, + "loss": 2.4669, + "step": 11178 + }, + { + "epoch": 0.9021870712613994, + "grad_norm": 0.7486294507980347, + "learning_rate": 8.228634686075116e-05, + "loss": 2.526, + "step": 11179 + }, + { + "epoch": 0.9022677749979824, + "grad_norm": 0.7012811303138733, + "learning_rate": 8.227080983325067e-05, + "loss": 2.5544, + "step": 11180 + }, + { + "epoch": 0.9023484787345654, + "grad_norm": 0.6807447075843811, + "learning_rate": 8.225527324760796e-05, + "loss": 2.5139, + "step": 11181 + }, + { + "epoch": 0.9024291824711484, + "grad_norm": 0.7594932317733765, + "learning_rate": 8.223973710421018e-05, + "loss": 2.539, + "step": 11182 + }, + { + "epoch": 0.9025098862077314, + "grad_norm": 0.6764204502105713, + "learning_rate": 8.22242014034446e-05, + "loss": 2.6128, + "step": 11183 + }, + { + "epoch": 0.9025905899443144, + "grad_norm": 0.6499967575073242, + "learning_rate": 8.220866614569837e-05, + "loss": 2.5459, + "step": 11184 + }, + { + "epoch": 0.9026712936808974, + "grad_norm": 0.673076331615448, + "learning_rate": 8.219313133135876e-05, + "loss": 2.5852, + "step": 11185 + }, + { + "epoch": 0.9027519974174805, + "grad_norm": 0.784854531288147, + "learning_rate": 8.21775969608128e-05, + "loss": 2.5586, + "step": 11186 + }, + { + "epoch": 0.9028327011540634, + "grad_norm": 0.658963680267334, + "learning_rate": 8.216206303444771e-05, + "loss": 2.4376, + "step": 11187 + }, + { + "epoch": 0.9029134048906464, + "grad_norm": 0.6456249356269836, + "learning_rate": 8.214652955265067e-05, + "loss": 2.5166, + "step": 11188 + }, + { + "epoch": 0.9029941086272294, + "grad_norm": 0.6940007209777832, + "learning_rate": 8.213099651580874e-05, + "loss": 2.4992, + "step": 11189 + }, + { + "epoch": 0.9030748123638125, + "grad_norm": 0.6661425828933716, + "learning_rate": 8.211546392430911e-05, + "loss": 2.5177, + "step": 11190 + }, + { + "epoch": 0.9031555161003955, + "grad_norm": 0.647834300994873, + "learning_rate": 8.20999317785389e-05, + "loss": 2.4666, + "step": 11191 + }, + { + "epoch": 0.9032362198369784, + "grad_norm": 0.7673383355140686, + "learning_rate": 8.208440007888515e-05, + "loss": 2.4852, + "step": 11192 + }, + { + "epoch": 0.9033169235735614, + "grad_norm": 0.7033390998840332, + "learning_rate": 8.206886882573498e-05, + "loss": 2.5549, + "step": 11193 + }, + { + "epoch": 0.9033976273101445, + "grad_norm": 0.6871141195297241, + "learning_rate": 8.205333801947548e-05, + "loss": 2.4585, + "step": 11194 + }, + { + "epoch": 0.9034783310467275, + "grad_norm": 0.7201984524726868, + "learning_rate": 8.20378076604937e-05, + "loss": 2.5271, + "step": 11195 + }, + { + "epoch": 0.9035590347833105, + "grad_norm": 0.704060971736908, + "learning_rate": 8.202227774917671e-05, + "loss": 2.4915, + "step": 11196 + }, + { + "epoch": 0.9036397385198934, + "grad_norm": 0.6833879947662354, + "learning_rate": 8.200674828591156e-05, + "loss": 2.4496, + "step": 11197 + }, + { + "epoch": 0.9037204422564765, + "grad_norm": 0.6564866304397583, + "learning_rate": 8.199121927108527e-05, + "loss": 2.4818, + "step": 11198 + }, + { + "epoch": 0.9038011459930595, + "grad_norm": 0.6970151662826538, + "learning_rate": 8.197569070508486e-05, + "loss": 2.5812, + "step": 11199 + }, + { + "epoch": 0.9038818497296425, + "grad_norm": 0.7147194743156433, + "learning_rate": 8.196016258829737e-05, + "loss": 2.5543, + "step": 11200 + }, + { + "epoch": 0.9039625534662254, + "grad_norm": 0.6357648968696594, + "learning_rate": 8.194463492110981e-05, + "loss": 2.5254, + "step": 11201 + }, + { + "epoch": 0.9040432572028085, + "grad_norm": 0.7113756537437439, + "learning_rate": 8.19291077039091e-05, + "loss": 2.5179, + "step": 11202 + }, + { + "epoch": 0.9041239609393915, + "grad_norm": 0.7252987623214722, + "learning_rate": 8.191358093708228e-05, + "loss": 2.5658, + "step": 11203 + }, + { + "epoch": 0.9042046646759745, + "grad_norm": 0.7095803618431091, + "learning_rate": 8.189805462101631e-05, + "loss": 2.583, + "step": 11204 + }, + { + "epoch": 0.9042853684125575, + "grad_norm": 0.7447760105133057, + "learning_rate": 8.188252875609812e-05, + "loss": 2.5608, + "step": 11205 + }, + { + "epoch": 0.9043660721491406, + "grad_norm": 0.6578439474105835, + "learning_rate": 8.186700334271468e-05, + "loss": 2.508, + "step": 11206 + }, + { + "epoch": 0.9044467758857235, + "grad_norm": 0.6776832938194275, + "learning_rate": 8.185147838125296e-05, + "loss": 2.6188, + "step": 11207 + }, + { + "epoch": 0.9045274796223065, + "grad_norm": 0.6559253931045532, + "learning_rate": 8.183595387209976e-05, + "loss": 2.5307, + "step": 11208 + }, + { + "epoch": 0.9046081833588895, + "grad_norm": 0.7078405022621155, + "learning_rate": 8.18204298156421e-05, + "loss": 2.5545, + "step": 11209 + }, + { + "epoch": 0.9046888870954726, + "grad_norm": 0.6790273189544678, + "learning_rate": 8.18049062122669e-05, + "loss": 2.4963, + "step": 11210 + }, + { + "epoch": 0.9047695908320555, + "grad_norm": 0.6888250708580017, + "learning_rate": 8.178938306236095e-05, + "loss": 2.5108, + "step": 11211 + }, + { + "epoch": 0.9048502945686385, + "grad_norm": 0.6438474059104919, + "learning_rate": 8.177386036631119e-05, + "loss": 2.4976, + "step": 11212 + }, + { + "epoch": 0.9049309983052215, + "grad_norm": 0.6786646842956543, + "learning_rate": 8.175833812450445e-05, + "loss": 2.4584, + "step": 11213 + }, + { + "epoch": 0.9050117020418046, + "grad_norm": 0.6480324268341064, + "learning_rate": 8.174281633732764e-05, + "loss": 2.5021, + "step": 11214 + }, + { + "epoch": 0.9050924057783876, + "grad_norm": 0.7232171893119812, + "learning_rate": 8.172729500516756e-05, + "loss": 2.4742, + "step": 11215 + }, + { + "epoch": 0.9051731095149705, + "grad_norm": 0.7048845291137695, + "learning_rate": 8.171177412841105e-05, + "loss": 2.518, + "step": 11216 + }, + { + "epoch": 0.9052538132515535, + "grad_norm": 0.6363180875778198, + "learning_rate": 8.169625370744496e-05, + "loss": 2.5154, + "step": 11217 + }, + { + "epoch": 0.9053345169881366, + "grad_norm": 0.7176045179367065, + "learning_rate": 8.168073374265605e-05, + "loss": 2.5182, + "step": 11218 + }, + { + "epoch": 0.9054152207247196, + "grad_norm": 0.7011643052101135, + "learning_rate": 8.166521423443112e-05, + "loss": 2.5615, + "step": 11219 + }, + { + "epoch": 0.9054959244613026, + "grad_norm": 0.6853327751159668, + "learning_rate": 8.164969518315704e-05, + "loss": 2.5057, + "step": 11220 + }, + { + "epoch": 0.9055766281978855, + "grad_norm": 0.6972528696060181, + "learning_rate": 8.163417658922049e-05, + "loss": 2.4949, + "step": 11221 + }, + { + "epoch": 0.9056573319344685, + "grad_norm": 0.6780978441238403, + "learning_rate": 8.161865845300824e-05, + "loss": 2.5601, + "step": 11222 + }, + { + "epoch": 0.9057380356710516, + "grad_norm": 0.6454098224639893, + "learning_rate": 8.160314077490711e-05, + "loss": 2.4203, + "step": 11223 + }, + { + "epoch": 0.9058187394076346, + "grad_norm": 0.7300907969474792, + "learning_rate": 8.158762355530378e-05, + "loss": 2.4818, + "step": 11224 + }, + { + "epoch": 0.9058994431442176, + "grad_norm": 0.682475745677948, + "learning_rate": 8.1572106794585e-05, + "loss": 2.4852, + "step": 11225 + }, + { + "epoch": 0.9059801468808005, + "grad_norm": 0.6666192412376404, + "learning_rate": 8.155659049313754e-05, + "loss": 2.5642, + "step": 11226 + }, + { + "epoch": 0.9060608506173836, + "grad_norm": 0.6873177886009216, + "learning_rate": 8.154107465134801e-05, + "loss": 2.5163, + "step": 11227 + }, + { + "epoch": 0.9061415543539666, + "grad_norm": 0.6704845428466797, + "learning_rate": 8.152555926960315e-05, + "loss": 2.5481, + "step": 11228 + }, + { + "epoch": 0.9062222580905496, + "grad_norm": 0.6340618133544922, + "learning_rate": 8.151004434828963e-05, + "loss": 2.4701, + "step": 11229 + }, + { + "epoch": 0.9063029618271325, + "grad_norm": 0.7886226177215576, + "learning_rate": 8.14945298877942e-05, + "loss": 2.5322, + "step": 11230 + }, + { + "epoch": 0.9063836655637156, + "grad_norm": 0.7086018919944763, + "learning_rate": 8.14790158885034e-05, + "loss": 2.4909, + "step": 11231 + }, + { + "epoch": 0.9064643693002986, + "grad_norm": 0.6791329979896545, + "learning_rate": 8.146350235080396e-05, + "loss": 2.4438, + "step": 11232 + }, + { + "epoch": 0.9065450730368816, + "grad_norm": 0.7070720791816711, + "learning_rate": 8.14479892750825e-05, + "loss": 2.528, + "step": 11233 + }, + { + "epoch": 0.9066257767734646, + "grad_norm": 0.6551348567008972, + "learning_rate": 8.143247666172564e-05, + "loss": 2.4747, + "step": 11234 + }, + { + "epoch": 0.9067064805100477, + "grad_norm": 0.6691645979881287, + "learning_rate": 8.141696451111997e-05, + "loss": 2.5038, + "step": 11235 + }, + { + "epoch": 0.9067871842466306, + "grad_norm": 0.6814864277839661, + "learning_rate": 8.14014528236522e-05, + "loss": 2.5737, + "step": 11236 + }, + { + "epoch": 0.9068678879832136, + "grad_norm": 0.7442377209663391, + "learning_rate": 8.138594159970877e-05, + "loss": 2.5839, + "step": 11237 + }, + { + "epoch": 0.9069485917197966, + "grad_norm": 0.6861338019371033, + "learning_rate": 8.137043083967634e-05, + "loss": 2.567, + "step": 11238 + }, + { + "epoch": 0.9070292954563797, + "grad_norm": 0.7056479454040527, + "learning_rate": 8.135492054394151e-05, + "loss": 2.5297, + "step": 11239 + }, + { + "epoch": 0.9071099991929626, + "grad_norm": 0.7166962623596191, + "learning_rate": 8.133941071289076e-05, + "loss": 2.4834, + "step": 11240 + }, + { + "epoch": 0.9071907029295456, + "grad_norm": 0.6285616159439087, + "learning_rate": 8.132390134691068e-05, + "loss": 2.5066, + "step": 11241 + }, + { + "epoch": 0.9072714066661286, + "grad_norm": 0.681915283203125, + "learning_rate": 8.130839244638783e-05, + "loss": 2.5387, + "step": 11242 + }, + { + "epoch": 0.9073521104027117, + "grad_norm": 0.6876898407936096, + "learning_rate": 8.129288401170866e-05, + "loss": 2.4465, + "step": 11243 + }, + { + "epoch": 0.9074328141392947, + "grad_norm": 0.657132625579834, + "learning_rate": 8.127737604325975e-05, + "loss": 2.499, + "step": 11244 + }, + { + "epoch": 0.9075135178758776, + "grad_norm": 0.6678825616836548, + "learning_rate": 8.126186854142752e-05, + "loss": 2.4872, + "step": 11245 + }, + { + "epoch": 0.9075942216124606, + "grad_norm": 0.7296879291534424, + "learning_rate": 8.124636150659858e-05, + "loss": 2.4783, + "step": 11246 + }, + { + "epoch": 0.9076749253490437, + "grad_norm": 0.7087056040763855, + "learning_rate": 8.12308549391593e-05, + "loss": 2.507, + "step": 11247 + }, + { + "epoch": 0.9077556290856267, + "grad_norm": 0.7099738121032715, + "learning_rate": 8.121534883949616e-05, + "loss": 2.5317, + "step": 11248 + }, + { + "epoch": 0.9078363328222097, + "grad_norm": 0.6421170830726624, + "learning_rate": 8.119984320799566e-05, + "loss": 2.5291, + "step": 11249 + }, + { + "epoch": 0.9079170365587926, + "grad_norm": 0.6835018396377563, + "learning_rate": 8.11843380450442e-05, + "loss": 2.5523, + "step": 11250 + }, + { + "epoch": 0.9079977402953757, + "grad_norm": 0.6638229489326477, + "learning_rate": 8.11688333510282e-05, + "loss": 2.5128, + "step": 11251 + }, + { + "epoch": 0.9080784440319587, + "grad_norm": 0.6783459186553955, + "learning_rate": 8.115332912633415e-05, + "loss": 2.5485, + "step": 11252 + }, + { + "epoch": 0.9081591477685417, + "grad_norm": 0.65911865234375, + "learning_rate": 8.113782537134838e-05, + "loss": 2.5408, + "step": 11253 + }, + { + "epoch": 0.9082398515051247, + "grad_norm": 0.6844244003295898, + "learning_rate": 8.112232208645729e-05, + "loss": 2.6067, + "step": 11254 + }, + { + "epoch": 0.9083205552417077, + "grad_norm": 0.6896870136260986, + "learning_rate": 8.110681927204729e-05, + "loss": 2.5444, + "step": 11255 + }, + { + "epoch": 0.9084012589782907, + "grad_norm": 0.6693820953369141, + "learning_rate": 8.109131692850473e-05, + "loss": 2.5118, + "step": 11256 + }, + { + "epoch": 0.9084819627148737, + "grad_norm": 0.6401854753494263, + "learning_rate": 8.107581505621599e-05, + "loss": 2.4811, + "step": 11257 + }, + { + "epoch": 0.9085626664514567, + "grad_norm": 0.6861663460731506, + "learning_rate": 8.106031365556743e-05, + "loss": 2.4633, + "step": 11258 + }, + { + "epoch": 0.9086433701880398, + "grad_norm": 0.6631655097007751, + "learning_rate": 8.104481272694533e-05, + "loss": 2.5748, + "step": 11259 + }, + { + "epoch": 0.9087240739246227, + "grad_norm": 0.6499454975128174, + "learning_rate": 8.102931227073604e-05, + "loss": 2.5573, + "step": 11260 + }, + { + "epoch": 0.9088047776612057, + "grad_norm": 0.7214524149894714, + "learning_rate": 8.10138122873259e-05, + "loss": 2.4905, + "step": 11261 + }, + { + "epoch": 0.9088854813977887, + "grad_norm": 0.6481152176856995, + "learning_rate": 8.099831277710122e-05, + "loss": 2.5073, + "step": 11262 + }, + { + "epoch": 0.9089661851343718, + "grad_norm": 0.6666486859321594, + "learning_rate": 8.09828137404482e-05, + "loss": 2.5379, + "step": 11263 + }, + { + "epoch": 0.9090468888709548, + "grad_norm": 0.7186474800109863, + "learning_rate": 8.096731517775319e-05, + "loss": 2.5164, + "step": 11264 + }, + { + "epoch": 0.9091275926075377, + "grad_norm": 0.6838653087615967, + "learning_rate": 8.095181708940245e-05, + "loss": 2.49, + "step": 11265 + }, + { + "epoch": 0.9092082963441207, + "grad_norm": 0.7740866541862488, + "learning_rate": 8.093631947578221e-05, + "loss": 2.5487, + "step": 11266 + }, + { + "epoch": 0.9092890000807038, + "grad_norm": 0.7198607325553894, + "learning_rate": 8.092082233727871e-05, + "loss": 2.4477, + "step": 11267 + }, + { + "epoch": 0.9093697038172868, + "grad_norm": 0.6454673409461975, + "learning_rate": 8.090532567427825e-05, + "loss": 2.523, + "step": 11268 + }, + { + "epoch": 0.9094504075538697, + "grad_norm": 0.6169581413269043, + "learning_rate": 8.088982948716692e-05, + "loss": 2.4924, + "step": 11269 + }, + { + "epoch": 0.9095311112904527, + "grad_norm": 0.7034861445426941, + "learning_rate": 8.0874333776331e-05, + "loss": 2.4756, + "step": 11270 + }, + { + "epoch": 0.9096118150270357, + "grad_norm": 0.7231355309486389, + "learning_rate": 8.085883854215671e-05, + "loss": 2.4963, + "step": 11271 + }, + { + "epoch": 0.9096925187636188, + "grad_norm": 0.6597892045974731, + "learning_rate": 8.084334378503017e-05, + "loss": 2.5617, + "step": 11272 + }, + { + "epoch": 0.9097732225002018, + "grad_norm": 0.7257365584373474, + "learning_rate": 8.082784950533759e-05, + "loss": 2.5293, + "step": 11273 + }, + { + "epoch": 0.9098539262367847, + "grad_norm": 0.7305313944816589, + "learning_rate": 8.081235570346512e-05, + "loss": 2.5355, + "step": 11274 + }, + { + "epoch": 0.9099346299733677, + "grad_norm": 0.6814435720443726, + "learning_rate": 8.07968623797989e-05, + "loss": 2.4842, + "step": 11275 + }, + { + "epoch": 0.9100153337099508, + "grad_norm": 0.7342902421951294, + "learning_rate": 8.078136953472506e-05, + "loss": 2.4817, + "step": 11276 + }, + { + "epoch": 0.9100960374465338, + "grad_norm": 0.6456516981124878, + "learning_rate": 8.076587716862973e-05, + "loss": 2.5119, + "step": 11277 + }, + { + "epoch": 0.9101767411831168, + "grad_norm": 0.7268881797790527, + "learning_rate": 8.075038528189906e-05, + "loss": 2.4614, + "step": 11278 + }, + { + "epoch": 0.9102574449196997, + "grad_norm": 0.6901549696922302, + "learning_rate": 8.073489387491906e-05, + "loss": 2.5411, + "step": 11279 + }, + { + "epoch": 0.9103381486562828, + "grad_norm": 0.6850160956382751, + "learning_rate": 8.071940294807588e-05, + "loss": 2.5078, + "step": 11280 + }, + { + "epoch": 0.9104188523928658, + "grad_norm": 0.6550731658935547, + "learning_rate": 8.070391250175558e-05, + "loss": 2.5502, + "step": 11281 + }, + { + "epoch": 0.9104995561294488, + "grad_norm": 0.7524412274360657, + "learning_rate": 8.068842253634421e-05, + "loss": 2.4699, + "step": 11282 + }, + { + "epoch": 0.9105802598660317, + "grad_norm": 0.6659243702888489, + "learning_rate": 8.067293305222784e-05, + "loss": 2.557, + "step": 11283 + }, + { + "epoch": 0.9106609636026148, + "grad_norm": 0.67015540599823, + "learning_rate": 8.065744404979251e-05, + "loss": 2.5929, + "step": 11284 + }, + { + "epoch": 0.9107416673391978, + "grad_norm": 0.7139000296592712, + "learning_rate": 8.064195552942422e-05, + "loss": 2.5262, + "step": 11285 + }, + { + "epoch": 0.9108223710757808, + "grad_norm": 0.6918016672134399, + "learning_rate": 8.062646749150899e-05, + "loss": 2.5161, + "step": 11286 + }, + { + "epoch": 0.9109030748123638, + "grad_norm": 0.7395541667938232, + "learning_rate": 8.061097993643289e-05, + "loss": 2.5351, + "step": 11287 + }, + { + "epoch": 0.9109837785489469, + "grad_norm": 0.6794499158859253, + "learning_rate": 8.05954928645818e-05, + "loss": 2.4617, + "step": 11288 + }, + { + "epoch": 0.9110644822855298, + "grad_norm": 0.6906577348709106, + "learning_rate": 8.058000627634176e-05, + "loss": 2.5701, + "step": 11289 + }, + { + "epoch": 0.9111451860221128, + "grad_norm": 0.6954079866409302, + "learning_rate": 8.056452017209874e-05, + "loss": 2.5137, + "step": 11290 + }, + { + "epoch": 0.9112258897586958, + "grad_norm": 0.7381381988525391, + "learning_rate": 8.054903455223866e-05, + "loss": 2.6666, + "step": 11291 + }, + { + "epoch": 0.9113065934952789, + "grad_norm": 0.6731518507003784, + "learning_rate": 8.053354941714749e-05, + "loss": 2.5173, + "step": 11292 + }, + { + "epoch": 0.9113872972318618, + "grad_norm": 0.6976885795593262, + "learning_rate": 8.051806476721116e-05, + "loss": 2.5089, + "step": 11293 + }, + { + "epoch": 0.9114680009684448, + "grad_norm": 0.6401965618133545, + "learning_rate": 8.050258060281562e-05, + "loss": 2.5295, + "step": 11294 + }, + { + "epoch": 0.9115487047050278, + "grad_norm": 0.7409671545028687, + "learning_rate": 8.048709692434667e-05, + "loss": 2.5074, + "step": 11295 + }, + { + "epoch": 0.9116294084416109, + "grad_norm": 0.6028234958648682, + "learning_rate": 8.04716137321903e-05, + "loss": 2.5437, + "step": 11296 + }, + { + "epoch": 0.9117101121781939, + "grad_norm": 0.727643609046936, + "learning_rate": 8.04561310267324e-05, + "loss": 2.5272, + "step": 11297 + }, + { + "epoch": 0.9117908159147768, + "grad_norm": 0.6912926435470581, + "learning_rate": 8.044064880835876e-05, + "loss": 2.5166, + "step": 11298 + }, + { + "epoch": 0.9118715196513598, + "grad_norm": 0.6971367001533508, + "learning_rate": 8.042516707745528e-05, + "loss": 2.5421, + "step": 11299 + }, + { + "epoch": 0.9119522233879429, + "grad_norm": 0.6722451448440552, + "learning_rate": 8.040968583440783e-05, + "loss": 2.5088, + "step": 11300 + }, + { + "epoch": 0.9120329271245259, + "grad_norm": 0.6469144225120544, + "learning_rate": 8.03942050796022e-05, + "loss": 2.4921, + "step": 11301 + }, + { + "epoch": 0.9121136308611089, + "grad_norm": 0.6709008812904358, + "learning_rate": 8.037872481342423e-05, + "loss": 2.4553, + "step": 11302 + }, + { + "epoch": 0.9121943345976918, + "grad_norm": 0.6540920734405518, + "learning_rate": 8.036324503625977e-05, + "loss": 2.489, + "step": 11303 + }, + { + "epoch": 0.9122750383342749, + "grad_norm": 0.6589755415916443, + "learning_rate": 8.034776574849453e-05, + "loss": 2.5195, + "step": 11304 + }, + { + "epoch": 0.9123557420708579, + "grad_norm": 0.676943838596344, + "learning_rate": 8.033228695051434e-05, + "loss": 2.4877, + "step": 11305 + }, + { + "epoch": 0.9124364458074409, + "grad_norm": 0.6509177088737488, + "learning_rate": 8.031680864270498e-05, + "loss": 2.5229, + "step": 11306 + }, + { + "epoch": 0.9125171495440239, + "grad_norm": 0.7480820417404175, + "learning_rate": 8.030133082545219e-05, + "loss": 2.5016, + "step": 11307 + }, + { + "epoch": 0.9125978532806069, + "grad_norm": 0.7130550742149353, + "learning_rate": 8.028585349914174e-05, + "loss": 2.5251, + "step": 11308 + }, + { + "epoch": 0.9126785570171899, + "grad_norm": 0.6959688067436218, + "learning_rate": 8.027037666415934e-05, + "loss": 2.4776, + "step": 11309 + }, + { + "epoch": 0.9127592607537729, + "grad_norm": 0.7540854215621948, + "learning_rate": 8.025490032089076e-05, + "loss": 2.5097, + "step": 11310 + }, + { + "epoch": 0.9128399644903559, + "grad_norm": 0.6921199560165405, + "learning_rate": 8.023942446972165e-05, + "loss": 2.5354, + "step": 11311 + }, + { + "epoch": 0.912920668226939, + "grad_norm": 0.649824857711792, + "learning_rate": 8.022394911103774e-05, + "loss": 2.5398, + "step": 11312 + }, + { + "epoch": 0.9130013719635219, + "grad_norm": 0.6951068639755249, + "learning_rate": 8.020847424522474e-05, + "loss": 2.5302, + "step": 11313 + }, + { + "epoch": 0.9130820757001049, + "grad_norm": 0.6906851530075073, + "learning_rate": 8.019299987266827e-05, + "loss": 2.581, + "step": 11314 + }, + { + "epoch": 0.9131627794366879, + "grad_norm": 0.6758459210395813, + "learning_rate": 8.0177525993754e-05, + "loss": 2.5208, + "step": 11315 + }, + { + "epoch": 0.913243483173271, + "grad_norm": 0.6915175318717957, + "learning_rate": 8.016205260886766e-05, + "loss": 2.5386, + "step": 11316 + }, + { + "epoch": 0.913324186909854, + "grad_norm": 0.7083550691604614, + "learning_rate": 8.014657971839476e-05, + "loss": 2.4895, + "step": 11317 + }, + { + "epoch": 0.9134048906464369, + "grad_norm": 0.7052562832832336, + "learning_rate": 8.013110732272102e-05, + "loss": 2.4896, + "step": 11318 + }, + { + "epoch": 0.9134855943830199, + "grad_norm": 0.7811834216117859, + "learning_rate": 8.011563542223206e-05, + "loss": 2.5082, + "step": 11319 + }, + { + "epoch": 0.913566298119603, + "grad_norm": 0.6207153797149658, + "learning_rate": 8.01001640173134e-05, + "loss": 2.4967, + "step": 11320 + }, + { + "epoch": 0.913647001856186, + "grad_norm": 0.7637950778007507, + "learning_rate": 8.008469310835065e-05, + "loss": 2.4907, + "step": 11321 + }, + { + "epoch": 0.913727705592769, + "grad_norm": 0.7263950705528259, + "learning_rate": 8.006922269572947e-05, + "loss": 2.5259, + "step": 11322 + }, + { + "epoch": 0.9138084093293519, + "grad_norm": 0.6965721845626831, + "learning_rate": 8.005375277983531e-05, + "loss": 2.5648, + "step": 11323 + }, + { + "epoch": 0.9138891130659349, + "grad_norm": 0.7146127223968506, + "learning_rate": 8.003828336105377e-05, + "loss": 2.53, + "step": 11324 + }, + { + "epoch": 0.913969816802518, + "grad_norm": 0.7083697319030762, + "learning_rate": 8.00228144397704e-05, + "loss": 2.4923, + "step": 11325 + }, + { + "epoch": 0.914050520539101, + "grad_norm": 0.7259312868118286, + "learning_rate": 8.000734601637074e-05, + "loss": 2.5303, + "step": 11326 + }, + { + "epoch": 0.9141312242756839, + "grad_norm": 0.7072086930274963, + "learning_rate": 7.999187809124025e-05, + "loss": 2.4662, + "step": 11327 + }, + { + "epoch": 0.9142119280122669, + "grad_norm": 0.7216035723686218, + "learning_rate": 7.997641066476445e-05, + "loss": 2.5069, + "step": 11328 + }, + { + "epoch": 0.91429263174885, + "grad_norm": 0.6925712823867798, + "learning_rate": 7.99609437373289e-05, + "loss": 2.5107, + "step": 11329 + }, + { + "epoch": 0.914373335485433, + "grad_norm": 0.6672701835632324, + "learning_rate": 7.994547730931896e-05, + "loss": 2.5248, + "step": 11330 + }, + { + "epoch": 0.914454039222016, + "grad_norm": 0.8058515787124634, + "learning_rate": 7.993001138112016e-05, + "loss": 2.4427, + "step": 11331 + }, + { + "epoch": 0.9145347429585989, + "grad_norm": 0.6942592859268188, + "learning_rate": 7.991454595311795e-05, + "loss": 2.6163, + "step": 11332 + }, + { + "epoch": 0.914615446695182, + "grad_norm": 0.7051894068717957, + "learning_rate": 7.989908102569774e-05, + "loss": 2.5327, + "step": 11333 + }, + { + "epoch": 0.914696150431765, + "grad_norm": 0.6824771761894226, + "learning_rate": 7.988361659924496e-05, + "loss": 2.4843, + "step": 11334 + }, + { + "epoch": 0.914776854168348, + "grad_norm": 0.6756488084793091, + "learning_rate": 7.98681526741451e-05, + "loss": 2.5215, + "step": 11335 + }, + { + "epoch": 0.914857557904931, + "grad_norm": 0.6988239288330078, + "learning_rate": 7.985268925078344e-05, + "loss": 2.5153, + "step": 11336 + }, + { + "epoch": 0.914938261641514, + "grad_norm": 0.6446006298065186, + "learning_rate": 7.983722632954544e-05, + "loss": 2.5081, + "step": 11337 + }, + { + "epoch": 0.915018965378097, + "grad_norm": 0.6828100681304932, + "learning_rate": 7.982176391081649e-05, + "loss": 2.5607, + "step": 11338 + }, + { + "epoch": 0.91509966911468, + "grad_norm": 0.659721851348877, + "learning_rate": 7.980630199498193e-05, + "loss": 2.531, + "step": 11339 + }, + { + "epoch": 0.915180372851263, + "grad_norm": 0.6298564076423645, + "learning_rate": 7.979084058242709e-05, + "loss": 2.513, + "step": 11340 + }, + { + "epoch": 0.9152610765878461, + "grad_norm": 0.664299726486206, + "learning_rate": 7.977537967353735e-05, + "loss": 2.5533, + "step": 11341 + }, + { + "epoch": 0.915341780324429, + "grad_norm": 0.7035108804702759, + "learning_rate": 7.975991926869801e-05, + "loss": 2.4868, + "step": 11342 + }, + { + "epoch": 0.915422484061012, + "grad_norm": 0.7428407073020935, + "learning_rate": 7.974445936829438e-05, + "loss": 2.5694, + "step": 11343 + }, + { + "epoch": 0.915503187797595, + "grad_norm": 0.6845505237579346, + "learning_rate": 7.972899997271176e-05, + "loss": 2.5092, + "step": 11344 + }, + { + "epoch": 0.9155838915341781, + "grad_norm": 0.7135340571403503, + "learning_rate": 7.971354108233551e-05, + "loss": 2.5157, + "step": 11345 + }, + { + "epoch": 0.915664595270761, + "grad_norm": 0.7032433152198792, + "learning_rate": 7.969808269755077e-05, + "loss": 2.5292, + "step": 11346 + }, + { + "epoch": 0.915745299007344, + "grad_norm": 0.6874690651893616, + "learning_rate": 7.96826248187429e-05, + "loss": 2.5312, + "step": 11347 + }, + { + "epoch": 0.915826002743927, + "grad_norm": 0.6497030258178711, + "learning_rate": 7.966716744629718e-05, + "loss": 2.505, + "step": 11348 + }, + { + "epoch": 0.9159067064805101, + "grad_norm": 0.6618520021438599, + "learning_rate": 7.965171058059874e-05, + "loss": 2.5287, + "step": 11349 + }, + { + "epoch": 0.9159874102170931, + "grad_norm": 0.6737041473388672, + "learning_rate": 7.963625422203288e-05, + "loss": 2.5494, + "step": 11350 + }, + { + "epoch": 0.916068113953676, + "grad_norm": 0.705646276473999, + "learning_rate": 7.96207983709848e-05, + "loss": 2.5402, + "step": 11351 + }, + { + "epoch": 0.916148817690259, + "grad_norm": 0.6852068901062012, + "learning_rate": 7.96053430278397e-05, + "loss": 2.51, + "step": 11352 + }, + { + "epoch": 0.9162295214268421, + "grad_norm": 0.7166822552680969, + "learning_rate": 7.958988819298274e-05, + "loss": 2.576, + "step": 11353 + }, + { + "epoch": 0.9163102251634251, + "grad_norm": 0.6349207162857056, + "learning_rate": 7.957443386679913e-05, + "loss": 2.5219, + "step": 11354 + }, + { + "epoch": 0.9163909289000081, + "grad_norm": 0.6504647135734558, + "learning_rate": 7.955898004967406e-05, + "loss": 2.4593, + "step": 11355 + }, + { + "epoch": 0.916471632636591, + "grad_norm": 0.7313871383666992, + "learning_rate": 7.95435267419926e-05, + "loss": 2.5616, + "step": 11356 + }, + { + "epoch": 0.9165523363731741, + "grad_norm": 0.6948587894439697, + "learning_rate": 7.95280739441399e-05, + "loss": 2.4608, + "step": 11357 + }, + { + "epoch": 0.9166330401097571, + "grad_norm": 0.6130328178405762, + "learning_rate": 7.95126216565012e-05, + "loss": 2.5563, + "step": 11358 + }, + { + "epoch": 0.9167137438463401, + "grad_norm": 0.7149228453636169, + "learning_rate": 7.949716987946145e-05, + "loss": 2.5664, + "step": 11359 + }, + { + "epoch": 0.916794447582923, + "grad_norm": 0.7452285289764404, + "learning_rate": 7.948171861340584e-05, + "loss": 2.525, + "step": 11360 + }, + { + "epoch": 0.9168751513195061, + "grad_norm": 0.6840611100196838, + "learning_rate": 7.946626785871945e-05, + "loss": 2.537, + "step": 11361 + }, + { + "epoch": 0.9169558550560891, + "grad_norm": 0.7269708514213562, + "learning_rate": 7.945081761578732e-05, + "loss": 2.5227, + "step": 11362 + }, + { + "epoch": 0.9170365587926721, + "grad_norm": 0.6521697044372559, + "learning_rate": 7.943536788499452e-05, + "loss": 2.54, + "step": 11363 + }, + { + "epoch": 0.9171172625292551, + "grad_norm": 0.6516863107681274, + "learning_rate": 7.941991866672618e-05, + "loss": 2.4788, + "step": 11364 + }, + { + "epoch": 0.9171979662658382, + "grad_norm": 0.7673580050468445, + "learning_rate": 7.94044699613672e-05, + "loss": 2.4678, + "step": 11365 + }, + { + "epoch": 0.9172786700024211, + "grad_norm": 0.6666994690895081, + "learning_rate": 7.938902176930268e-05, + "loss": 2.5251, + "step": 11366 + }, + { + "epoch": 0.9173593737390041, + "grad_norm": 0.7261863946914673, + "learning_rate": 7.937357409091761e-05, + "loss": 2.4977, + "step": 11367 + }, + { + "epoch": 0.9174400774755871, + "grad_norm": 0.6920679807662964, + "learning_rate": 7.9358126926597e-05, + "loss": 2.5367, + "step": 11368 + }, + { + "epoch": 0.9175207812121702, + "grad_norm": 0.6715712547302246, + "learning_rate": 7.93426802767258e-05, + "loss": 2.4898, + "step": 11369 + }, + { + "epoch": 0.9176014849487532, + "grad_norm": 0.7014333605766296, + "learning_rate": 7.932723414168904e-05, + "loss": 2.4507, + "step": 11370 + }, + { + "epoch": 0.9176821886853361, + "grad_norm": 0.6755761504173279, + "learning_rate": 7.931178852187163e-05, + "loss": 2.5895, + "step": 11371 + }, + { + "epoch": 0.9177628924219191, + "grad_norm": 0.6846731305122375, + "learning_rate": 7.929634341765852e-05, + "loss": 2.5002, + "step": 11372 + }, + { + "epoch": 0.9178435961585021, + "grad_norm": 0.6422831416130066, + "learning_rate": 7.928089882943466e-05, + "loss": 2.5326, + "step": 11373 + }, + { + "epoch": 0.9179242998950852, + "grad_norm": 0.7256442308425903, + "learning_rate": 7.9265454757585e-05, + "loss": 2.5706, + "step": 11374 + }, + { + "epoch": 0.9180050036316681, + "grad_norm": 0.6514387130737305, + "learning_rate": 7.925001120249436e-05, + "loss": 2.5349, + "step": 11375 + }, + { + "epoch": 0.9180857073682511, + "grad_norm": 0.7596457600593567, + "learning_rate": 7.923456816454768e-05, + "loss": 2.4767, + "step": 11376 + }, + { + "epoch": 0.9181664111048341, + "grad_norm": 0.673283040523529, + "learning_rate": 7.921912564412988e-05, + "loss": 2.5156, + "step": 11377 + }, + { + "epoch": 0.9182471148414172, + "grad_norm": 0.6964103579521179, + "learning_rate": 7.920368364162575e-05, + "loss": 2.5293, + "step": 11378 + }, + { + "epoch": 0.9183278185780002, + "grad_norm": 0.6765062212944031, + "learning_rate": 7.91882421574202e-05, + "loss": 2.5757, + "step": 11379 + }, + { + "epoch": 0.9184085223145831, + "grad_norm": 0.7039035558700562, + "learning_rate": 7.917280119189811e-05, + "loss": 2.513, + "step": 11380 + }, + { + "epoch": 0.9184892260511661, + "grad_norm": 0.6523976922035217, + "learning_rate": 7.915736074544419e-05, + "loss": 2.4712, + "step": 11381 + }, + { + "epoch": 0.9185699297877492, + "grad_norm": 0.7159552574157715, + "learning_rate": 7.914192081844334e-05, + "loss": 2.4713, + "step": 11382 + }, + { + "epoch": 0.9186506335243322, + "grad_norm": 0.7071694731712341, + "learning_rate": 7.912648141128036e-05, + "loss": 2.5367, + "step": 11383 + }, + { + "epoch": 0.9187313372609152, + "grad_norm": 0.6675183773040771, + "learning_rate": 7.911104252434e-05, + "loss": 2.5372, + "step": 11384 + }, + { + "epoch": 0.9188120409974981, + "grad_norm": 0.7293995022773743, + "learning_rate": 7.909560415800707e-05, + "loss": 2.5469, + "step": 11385 + }, + { + "epoch": 0.9188927447340812, + "grad_norm": 0.6774035096168518, + "learning_rate": 7.908016631266635e-05, + "loss": 2.5655, + "step": 11386 + }, + { + "epoch": 0.9189734484706642, + "grad_norm": 0.7068144083023071, + "learning_rate": 7.906472898870256e-05, + "loss": 2.5265, + "step": 11387 + }, + { + "epoch": 0.9190541522072472, + "grad_norm": 0.6756324172019958, + "learning_rate": 7.904929218650044e-05, + "loss": 2.4966, + "step": 11388 + }, + { + "epoch": 0.9191348559438302, + "grad_norm": 0.6964625120162964, + "learning_rate": 7.903385590644473e-05, + "loss": 2.5646, + "step": 11389 + }, + { + "epoch": 0.9192155596804132, + "grad_norm": 0.6760976314544678, + "learning_rate": 7.901842014892018e-05, + "loss": 2.5159, + "step": 11390 + }, + { + "epoch": 0.9192962634169962, + "grad_norm": 0.6648714542388916, + "learning_rate": 7.900298491431139e-05, + "loss": 2.5715, + "step": 11391 + }, + { + "epoch": 0.9193769671535792, + "grad_norm": 0.7492914199829102, + "learning_rate": 7.898755020300312e-05, + "loss": 2.5226, + "step": 11392 + }, + { + "epoch": 0.9194576708901622, + "grad_norm": 0.7041164040565491, + "learning_rate": 7.897211601538004e-05, + "loss": 2.5809, + "step": 11393 + }, + { + "epoch": 0.9195383746267453, + "grad_norm": 0.6746383309364319, + "learning_rate": 7.895668235182677e-05, + "loss": 2.5369, + "step": 11394 + }, + { + "epoch": 0.9196190783633282, + "grad_norm": 0.6486156582832336, + "learning_rate": 7.894124921272798e-05, + "loss": 2.5406, + "step": 11395 + }, + { + "epoch": 0.9196997820999112, + "grad_norm": 0.6828807592391968, + "learning_rate": 7.892581659846834e-05, + "loss": 2.5241, + "step": 11396 + }, + { + "epoch": 0.9197804858364942, + "grad_norm": 0.694970428943634, + "learning_rate": 7.891038450943242e-05, + "loss": 2.4402, + "step": 11397 + }, + { + "epoch": 0.9198611895730773, + "grad_norm": 0.7187039852142334, + "learning_rate": 7.889495294600484e-05, + "loss": 2.5052, + "step": 11398 + }, + { + "epoch": 0.9199418933096603, + "grad_norm": 0.6919832825660706, + "learning_rate": 7.887952190857024e-05, + "loss": 2.5078, + "step": 11399 + }, + { + "epoch": 0.9200225970462432, + "grad_norm": 0.7129504084587097, + "learning_rate": 7.886409139751313e-05, + "loss": 2.5047, + "step": 11400 + }, + { + "epoch": 0.9201033007828262, + "grad_norm": 0.6755272746086121, + "learning_rate": 7.88486614132181e-05, + "loss": 2.4821, + "step": 11401 + }, + { + "epoch": 0.9201840045194093, + "grad_norm": 0.7253937125205994, + "learning_rate": 7.883323195606973e-05, + "loss": 2.5062, + "step": 11402 + }, + { + "epoch": 0.9202647082559923, + "grad_norm": 0.7057155966758728, + "learning_rate": 7.881780302645257e-05, + "loss": 2.5475, + "step": 11403 + }, + { + "epoch": 0.9203454119925752, + "grad_norm": 0.713869571685791, + "learning_rate": 7.880237462475111e-05, + "loss": 2.5335, + "step": 11404 + }, + { + "epoch": 0.9204261157291582, + "grad_norm": 0.769648551940918, + "learning_rate": 7.878694675134987e-05, + "loss": 2.4944, + "step": 11405 + }, + { + "epoch": 0.9205068194657413, + "grad_norm": 0.6444964408874512, + "learning_rate": 7.877151940663343e-05, + "loss": 2.5755, + "step": 11406 + }, + { + "epoch": 0.9205875232023243, + "grad_norm": 0.6811819672584534, + "learning_rate": 7.875609259098618e-05, + "loss": 2.5475, + "step": 11407 + }, + { + "epoch": 0.9206682269389073, + "grad_norm": 0.6959417462348938, + "learning_rate": 7.874066630479259e-05, + "loss": 2.5095, + "step": 11408 + }, + { + "epoch": 0.9207489306754902, + "grad_norm": 0.6721363067626953, + "learning_rate": 7.872524054843724e-05, + "loss": 2.5166, + "step": 11409 + }, + { + "epoch": 0.9208296344120733, + "grad_norm": 0.713122546672821, + "learning_rate": 7.870981532230447e-05, + "loss": 2.5084, + "step": 11410 + }, + { + "epoch": 0.9209103381486563, + "grad_norm": 0.7059469819068909, + "learning_rate": 7.869439062677876e-05, + "loss": 2.437, + "step": 11411 + }, + { + "epoch": 0.9209910418852393, + "grad_norm": 0.6808314323425293, + "learning_rate": 7.867896646224454e-05, + "loss": 2.5658, + "step": 11412 + }, + { + "epoch": 0.9210717456218223, + "grad_norm": 0.7060894966125488, + "learning_rate": 7.86635428290862e-05, + "loss": 2.515, + "step": 11413 + }, + { + "epoch": 0.9211524493584053, + "grad_norm": 0.7538465857505798, + "learning_rate": 7.864811972768813e-05, + "loss": 2.4448, + "step": 11414 + }, + { + "epoch": 0.9212331530949883, + "grad_norm": 0.6824522018432617, + "learning_rate": 7.863269715843478e-05, + "loss": 2.503, + "step": 11415 + }, + { + "epoch": 0.9213138568315713, + "grad_norm": 0.7068174481391907, + "learning_rate": 7.861727512171044e-05, + "loss": 2.5198, + "step": 11416 + }, + { + "epoch": 0.9213945605681543, + "grad_norm": 0.6742961406707764, + "learning_rate": 7.860185361789948e-05, + "loss": 2.5167, + "step": 11417 + }, + { + "epoch": 0.9214752643047374, + "grad_norm": 0.7643383741378784, + "learning_rate": 7.858643264738628e-05, + "loss": 2.5508, + "step": 11418 + }, + { + "epoch": 0.9215559680413203, + "grad_norm": 0.6737802028656006, + "learning_rate": 7.857101221055518e-05, + "loss": 2.589, + "step": 11419 + }, + { + "epoch": 0.9216366717779033, + "grad_norm": 0.668214738368988, + "learning_rate": 7.855559230779043e-05, + "loss": 2.4747, + "step": 11420 + }, + { + "epoch": 0.9217173755144863, + "grad_norm": 0.6933084726333618, + "learning_rate": 7.854017293947638e-05, + "loss": 2.5171, + "step": 11421 + }, + { + "epoch": 0.9217980792510694, + "grad_norm": 0.6320228576660156, + "learning_rate": 7.852475410599736e-05, + "loss": 2.5213, + "step": 11422 + }, + { + "epoch": 0.9218787829876524, + "grad_norm": 0.6578245759010315, + "learning_rate": 7.850933580773756e-05, + "loss": 2.5085, + "step": 11423 + }, + { + "epoch": 0.9219594867242353, + "grad_norm": 0.6741796135902405, + "learning_rate": 7.849391804508129e-05, + "loss": 2.5294, + "step": 11424 + }, + { + "epoch": 0.9220401904608183, + "grad_norm": 0.6875781416893005, + "learning_rate": 7.847850081841285e-05, + "loss": 2.5034, + "step": 11425 + }, + { + "epoch": 0.9221208941974013, + "grad_norm": 0.6515244245529175, + "learning_rate": 7.846308412811638e-05, + "loss": 2.4707, + "step": 11426 + }, + { + "epoch": 0.9222015979339844, + "grad_norm": 0.7326812148094177, + "learning_rate": 7.844766797457615e-05, + "loss": 2.5049, + "step": 11427 + }, + { + "epoch": 0.9222823016705674, + "grad_norm": 0.7539918422698975, + "learning_rate": 7.84322523581764e-05, + "loss": 2.4726, + "step": 11428 + }, + { + "epoch": 0.9223630054071503, + "grad_norm": 0.745468020439148, + "learning_rate": 7.841683727930129e-05, + "loss": 2.5003, + "step": 11429 + }, + { + "epoch": 0.9224437091437333, + "grad_norm": 0.726362943649292, + "learning_rate": 7.840142273833499e-05, + "loss": 2.5056, + "step": 11430 + }, + { + "epoch": 0.9225244128803164, + "grad_norm": 0.7275403738021851, + "learning_rate": 7.838600873566175e-05, + "loss": 2.5188, + "step": 11431 + }, + { + "epoch": 0.9226051166168994, + "grad_norm": 0.6908789873123169, + "learning_rate": 7.837059527166563e-05, + "loss": 2.5349, + "step": 11432 + }, + { + "epoch": 0.9226858203534823, + "grad_norm": 0.7220396399497986, + "learning_rate": 7.835518234673079e-05, + "loss": 2.4863, + "step": 11433 + }, + { + "epoch": 0.9227665240900653, + "grad_norm": 0.6516178846359253, + "learning_rate": 7.833976996124142e-05, + "loss": 2.556, + "step": 11434 + }, + { + "epoch": 0.9228472278266484, + "grad_norm": 0.6958726644515991, + "learning_rate": 7.832435811558163e-05, + "loss": 2.5286, + "step": 11435 + }, + { + "epoch": 0.9229279315632314, + "grad_norm": 0.7734121680259705, + "learning_rate": 7.830894681013546e-05, + "loss": 2.5087, + "step": 11436 + }, + { + "epoch": 0.9230086352998144, + "grad_norm": 0.709064245223999, + "learning_rate": 7.829353604528703e-05, + "loss": 2.4817, + "step": 11437 + }, + { + "epoch": 0.9230893390363973, + "grad_norm": 0.7224971652030945, + "learning_rate": 7.827812582142045e-05, + "loss": 2.5179, + "step": 11438 + }, + { + "epoch": 0.9231700427729804, + "grad_norm": 0.7139936685562134, + "learning_rate": 7.826271613891973e-05, + "loss": 2.537, + "step": 11439 + }, + { + "epoch": 0.9232507465095634, + "grad_norm": 0.671138346195221, + "learning_rate": 7.824730699816896e-05, + "loss": 2.4865, + "step": 11440 + }, + { + "epoch": 0.9233314502461464, + "grad_norm": 0.6547425389289856, + "learning_rate": 7.823189839955218e-05, + "loss": 2.509, + "step": 11441 + }, + { + "epoch": 0.9234121539827294, + "grad_norm": 0.719765305519104, + "learning_rate": 7.821649034345338e-05, + "loss": 2.591, + "step": 11442 + }, + { + "epoch": 0.9234928577193124, + "grad_norm": 0.7128504514694214, + "learning_rate": 7.820108283025656e-05, + "loss": 2.541, + "step": 11443 + }, + { + "epoch": 0.9235735614558954, + "grad_norm": 0.7711538672447205, + "learning_rate": 7.818567586034577e-05, + "loss": 2.5388, + "step": 11444 + }, + { + "epoch": 0.9236542651924784, + "grad_norm": 0.7151121497154236, + "learning_rate": 7.817026943410494e-05, + "loss": 2.5539, + "step": 11445 + }, + { + "epoch": 0.9237349689290614, + "grad_norm": 0.7009569406509399, + "learning_rate": 7.815486355191805e-05, + "loss": 2.4793, + "step": 11446 + }, + { + "epoch": 0.9238156726656445, + "grad_norm": 0.7251109480857849, + "learning_rate": 7.813945821416909e-05, + "loss": 2.5406, + "step": 11447 + }, + { + "epoch": 0.9238963764022274, + "grad_norm": 0.6907934546470642, + "learning_rate": 7.812405342124196e-05, + "loss": 2.5069, + "step": 11448 + }, + { + "epoch": 0.9239770801388104, + "grad_norm": 0.699207067489624, + "learning_rate": 7.810864917352061e-05, + "loss": 2.4844, + "step": 11449 + }, + { + "epoch": 0.9240577838753934, + "grad_norm": 0.718386173248291, + "learning_rate": 7.809324547138893e-05, + "loss": 2.5666, + "step": 11450 + }, + { + "epoch": 0.9241384876119765, + "grad_norm": 0.6420444846153259, + "learning_rate": 7.807784231523089e-05, + "loss": 2.506, + "step": 11451 + }, + { + "epoch": 0.9242191913485595, + "grad_norm": 0.6777252554893494, + "learning_rate": 7.806243970543028e-05, + "loss": 2.487, + "step": 11452 + }, + { + "epoch": 0.9242998950851424, + "grad_norm": 0.6907702684402466, + "learning_rate": 7.804703764237102e-05, + "loss": 2.5284, + "step": 11453 + }, + { + "epoch": 0.9243805988217254, + "grad_norm": 0.6383422613143921, + "learning_rate": 7.803163612643698e-05, + "loss": 2.4704, + "step": 11454 + }, + { + "epoch": 0.9244613025583085, + "grad_norm": 0.6879577040672302, + "learning_rate": 7.801623515801198e-05, + "loss": 2.5103, + "step": 11455 + }, + { + "epoch": 0.9245420062948915, + "grad_norm": 0.6856719851493835, + "learning_rate": 7.800083473747986e-05, + "loss": 2.5086, + "step": 11456 + }, + { + "epoch": 0.9246227100314744, + "grad_norm": 0.7463707327842712, + "learning_rate": 7.79854348652245e-05, + "loss": 2.5456, + "step": 11457 + }, + { + "epoch": 0.9247034137680574, + "grad_norm": 0.7352643013000488, + "learning_rate": 7.79700355416296e-05, + "loss": 2.5335, + "step": 11458 + }, + { + "epoch": 0.9247841175046405, + "grad_norm": 0.7525908350944519, + "learning_rate": 7.795463676707897e-05, + "loss": 2.5855, + "step": 11459 + }, + { + "epoch": 0.9248648212412235, + "grad_norm": 0.7323870658874512, + "learning_rate": 7.79392385419565e-05, + "loss": 2.5471, + "step": 11460 + }, + { + "epoch": 0.9249455249778065, + "grad_norm": 0.7443860769271851, + "learning_rate": 7.792384086664582e-05, + "loss": 2.5449, + "step": 11461 + }, + { + "epoch": 0.9250262287143894, + "grad_norm": 0.6928641200065613, + "learning_rate": 7.790844374153073e-05, + "loss": 2.505, + "step": 11462 + }, + { + "epoch": 0.9251069324509725, + "grad_norm": 0.6491222381591797, + "learning_rate": 7.789304716699498e-05, + "loss": 2.5447, + "step": 11463 + }, + { + "epoch": 0.9251876361875555, + "grad_norm": 0.7351166009902954, + "learning_rate": 7.78776511434223e-05, + "loss": 2.524, + "step": 11464 + }, + { + "epoch": 0.9252683399241385, + "grad_norm": 0.6680036783218384, + "learning_rate": 7.786225567119637e-05, + "loss": 2.5019, + "step": 11465 + }, + { + "epoch": 0.9253490436607215, + "grad_norm": 0.7070801258087158, + "learning_rate": 7.784686075070089e-05, + "loss": 2.5052, + "step": 11466 + }, + { + "epoch": 0.9254297473973045, + "grad_norm": 0.7095211148262024, + "learning_rate": 7.783146638231957e-05, + "loss": 2.4998, + "step": 11467 + }, + { + "epoch": 0.9255104511338875, + "grad_norm": 0.6725812554359436, + "learning_rate": 7.781607256643604e-05, + "loss": 2.4909, + "step": 11468 + }, + { + "epoch": 0.9255911548704705, + "grad_norm": 0.684177577495575, + "learning_rate": 7.780067930343396e-05, + "loss": 2.5636, + "step": 11469 + }, + { + "epoch": 0.9256718586070535, + "grad_norm": 0.703419029712677, + "learning_rate": 7.778528659369702e-05, + "loss": 2.4295, + "step": 11470 + }, + { + "epoch": 0.9257525623436366, + "grad_norm": 0.6850195527076721, + "learning_rate": 7.776989443760877e-05, + "loss": 2.5143, + "step": 11471 + }, + { + "epoch": 0.9258332660802195, + "grad_norm": 0.7322348952293396, + "learning_rate": 7.775450283555286e-05, + "loss": 2.5616, + "step": 11472 + }, + { + "epoch": 0.9259139698168025, + "grad_norm": 0.6924510598182678, + "learning_rate": 7.77391117879129e-05, + "loss": 2.4796, + "step": 11473 + }, + { + "epoch": 0.9259946735533855, + "grad_norm": 0.7006441354751587, + "learning_rate": 7.772372129507249e-05, + "loss": 2.5142, + "step": 11474 + }, + { + "epoch": 0.9260753772899685, + "grad_norm": 0.6379218697547913, + "learning_rate": 7.770833135741513e-05, + "loss": 2.5366, + "step": 11475 + }, + { + "epoch": 0.9261560810265516, + "grad_norm": 0.676163375377655, + "learning_rate": 7.769294197532448e-05, + "loss": 2.4936, + "step": 11476 + }, + { + "epoch": 0.9262367847631345, + "grad_norm": 0.6964210271835327, + "learning_rate": 7.767755314918399e-05, + "loss": 2.429, + "step": 11477 + }, + { + "epoch": 0.9263174884997175, + "grad_norm": 0.7017048597335815, + "learning_rate": 7.766216487937722e-05, + "loss": 2.5488, + "step": 11478 + }, + { + "epoch": 0.9263981922363005, + "grad_norm": 0.6742509603500366, + "learning_rate": 7.76467771662877e-05, + "loss": 2.5121, + "step": 11479 + }, + { + "epoch": 0.9264788959728836, + "grad_norm": 0.6751403212547302, + "learning_rate": 7.763139001029893e-05, + "loss": 2.5897, + "step": 11480 + }, + { + "epoch": 0.9265595997094666, + "grad_norm": 0.6639657616615295, + "learning_rate": 7.761600341179439e-05, + "loss": 2.5015, + "step": 11481 + }, + { + "epoch": 0.9266403034460495, + "grad_norm": 0.6332827210426331, + "learning_rate": 7.760061737115756e-05, + "loss": 2.5518, + "step": 11482 + }, + { + "epoch": 0.9267210071826325, + "grad_norm": 0.6751062870025635, + "learning_rate": 7.758523188877192e-05, + "loss": 2.4252, + "step": 11483 + }, + { + "epoch": 0.9268017109192156, + "grad_norm": 0.6763231754302979, + "learning_rate": 7.756984696502084e-05, + "loss": 2.5683, + "step": 11484 + }, + { + "epoch": 0.9268824146557986, + "grad_norm": 0.6480380296707153, + "learning_rate": 7.755446260028784e-05, + "loss": 2.558, + "step": 11485 + }, + { + "epoch": 0.9269631183923815, + "grad_norm": 0.6925072073936462, + "learning_rate": 7.753907879495634e-05, + "loss": 2.5374, + "step": 11486 + }, + { + "epoch": 0.9270438221289645, + "grad_norm": 0.6771834492683411, + "learning_rate": 7.752369554940966e-05, + "loss": 2.5652, + "step": 11487 + }, + { + "epoch": 0.9271245258655476, + "grad_norm": 0.6747026443481445, + "learning_rate": 7.750831286403124e-05, + "loss": 2.5076, + "step": 11488 + }, + { + "epoch": 0.9272052296021306, + "grad_norm": 0.6727211475372314, + "learning_rate": 7.749293073920448e-05, + "loss": 2.4774, + "step": 11489 + }, + { + "epoch": 0.9272859333387136, + "grad_norm": 0.6334055066108704, + "learning_rate": 7.747754917531272e-05, + "loss": 2.5245, + "step": 11490 + }, + { + "epoch": 0.9273666370752965, + "grad_norm": 0.740700900554657, + "learning_rate": 7.746216817273928e-05, + "loss": 2.5485, + "step": 11491 + }, + { + "epoch": 0.9274473408118796, + "grad_norm": 0.6500691771507263, + "learning_rate": 7.744678773186757e-05, + "loss": 2.5277, + "step": 11492 + }, + { + "epoch": 0.9275280445484626, + "grad_norm": 0.6592985987663269, + "learning_rate": 7.743140785308084e-05, + "loss": 2.5304, + "step": 11493 + }, + { + "epoch": 0.9276087482850456, + "grad_norm": 0.6980452537536621, + "learning_rate": 7.741602853676241e-05, + "loss": 2.544, + "step": 11494 + }, + { + "epoch": 0.9276894520216286, + "grad_norm": 0.643190860748291, + "learning_rate": 7.740064978329555e-05, + "loss": 2.5167, + "step": 11495 + }, + { + "epoch": 0.9277701557582116, + "grad_norm": 0.6789804100990295, + "learning_rate": 7.738527159306366e-05, + "loss": 2.5117, + "step": 11496 + }, + { + "epoch": 0.9278508594947946, + "grad_norm": 0.7109663486480713, + "learning_rate": 7.736989396644987e-05, + "loss": 2.5294, + "step": 11497 + }, + { + "epoch": 0.9279315632313776, + "grad_norm": 0.6752706170082092, + "learning_rate": 7.735451690383746e-05, + "loss": 2.4851, + "step": 11498 + }, + { + "epoch": 0.9280122669679606, + "grad_norm": 0.6947829723358154, + "learning_rate": 7.733914040560972e-05, + "loss": 2.5792, + "step": 11499 + }, + { + "epoch": 0.9280929707045437, + "grad_norm": 0.6701157689094543, + "learning_rate": 7.732376447214981e-05, + "loss": 2.4884, + "step": 11500 + }, + { + "epoch": 0.9281736744411266, + "grad_norm": 0.64533531665802, + "learning_rate": 7.730838910384097e-05, + "loss": 2.4644, + "step": 11501 + }, + { + "epoch": 0.9282543781777096, + "grad_norm": 0.6664395332336426, + "learning_rate": 7.729301430106644e-05, + "loss": 2.5286, + "step": 11502 + }, + { + "epoch": 0.9283350819142926, + "grad_norm": 0.6982395648956299, + "learning_rate": 7.72776400642093e-05, + "loss": 2.5092, + "step": 11503 + }, + { + "epoch": 0.9284157856508757, + "grad_norm": 0.6656171679496765, + "learning_rate": 7.726226639365278e-05, + "loss": 2.4945, + "step": 11504 + }, + { + "epoch": 0.9284964893874587, + "grad_norm": 0.6213308572769165, + "learning_rate": 7.724689328978001e-05, + "loss": 2.5042, + "step": 11505 + }, + { + "epoch": 0.9285771931240416, + "grad_norm": 0.6855599880218506, + "learning_rate": 7.723152075297414e-05, + "loss": 2.5207, + "step": 11506 + }, + { + "epoch": 0.9286578968606246, + "grad_norm": 0.7724171280860901, + "learning_rate": 7.721614878361828e-05, + "loss": 2.4842, + "step": 11507 + }, + { + "epoch": 0.9287386005972077, + "grad_norm": 0.708634614944458, + "learning_rate": 7.720077738209559e-05, + "loss": 2.58, + "step": 11508 + }, + { + "epoch": 0.9288193043337907, + "grad_norm": 0.6766082644462585, + "learning_rate": 7.718540654878907e-05, + "loss": 2.492, + "step": 11509 + }, + { + "epoch": 0.9289000080703737, + "grad_norm": 0.6856982707977295, + "learning_rate": 7.717003628408187e-05, + "loss": 2.5186, + "step": 11510 + }, + { + "epoch": 0.9289807118069566, + "grad_norm": 0.680647611618042, + "learning_rate": 7.715466658835705e-05, + "loss": 2.5305, + "step": 11511 + }, + { + "epoch": 0.9290614155435397, + "grad_norm": 0.7174721360206604, + "learning_rate": 7.713929746199771e-05, + "loss": 2.4498, + "step": 11512 + }, + { + "epoch": 0.9291421192801227, + "grad_norm": 0.6507031321525574, + "learning_rate": 7.712392890538676e-05, + "loss": 2.5334, + "step": 11513 + }, + { + "epoch": 0.9292228230167057, + "grad_norm": 0.7545748353004456, + "learning_rate": 7.710856091890732e-05, + "loss": 2.505, + "step": 11514 + }, + { + "epoch": 0.9293035267532886, + "grad_norm": 0.6978560090065002, + "learning_rate": 7.709319350294242e-05, + "loss": 2.5243, + "step": 11515 + }, + { + "epoch": 0.9293842304898717, + "grad_norm": 0.6620199084281921, + "learning_rate": 7.707782665787497e-05, + "loss": 2.5114, + "step": 11516 + }, + { + "epoch": 0.9294649342264547, + "grad_norm": 0.7160476446151733, + "learning_rate": 7.7062460384088e-05, + "loss": 2.5322, + "step": 11517 + }, + { + "epoch": 0.9295456379630377, + "grad_norm": 0.6637005805969238, + "learning_rate": 7.704709468196454e-05, + "loss": 2.456, + "step": 11518 + }, + { + "epoch": 0.9296263416996207, + "grad_norm": 0.6668851375579834, + "learning_rate": 7.703172955188742e-05, + "loss": 2.5251, + "step": 11519 + }, + { + "epoch": 0.9297070454362037, + "grad_norm": 0.6840329170227051, + "learning_rate": 7.701636499423965e-05, + "loss": 2.5068, + "step": 11520 + }, + { + "epoch": 0.9297877491727867, + "grad_norm": 0.695122241973877, + "learning_rate": 7.700100100940415e-05, + "loss": 2.4822, + "step": 11521 + }, + { + "epoch": 0.9298684529093697, + "grad_norm": 0.6784923672676086, + "learning_rate": 7.698563759776382e-05, + "loss": 2.4978, + "step": 11522 + }, + { + "epoch": 0.9299491566459527, + "grad_norm": 0.6949357986450195, + "learning_rate": 7.697027475970154e-05, + "loss": 2.5392, + "step": 11523 + }, + { + "epoch": 0.9300298603825358, + "grad_norm": 0.7128093242645264, + "learning_rate": 7.695491249560025e-05, + "loss": 2.455, + "step": 11524 + }, + { + "epoch": 0.9301105641191187, + "grad_norm": 0.6534962058067322, + "learning_rate": 7.693955080584277e-05, + "loss": 2.5272, + "step": 11525 + }, + { + "epoch": 0.9301912678557017, + "grad_norm": 0.6893511414527893, + "learning_rate": 7.692418969081194e-05, + "loss": 2.5366, + "step": 11526 + }, + { + "epoch": 0.9302719715922847, + "grad_norm": 0.6335335373878479, + "learning_rate": 7.690882915089064e-05, + "loss": 2.5781, + "step": 11527 + }, + { + "epoch": 0.9303526753288677, + "grad_norm": 0.7264769077301025, + "learning_rate": 7.689346918646172e-05, + "loss": 2.5322, + "step": 11528 + }, + { + "epoch": 0.9304333790654508, + "grad_norm": 0.7156329154968262, + "learning_rate": 7.68781097979079e-05, + "loss": 2.5558, + "step": 11529 + }, + { + "epoch": 0.9305140828020337, + "grad_norm": 0.6914563775062561, + "learning_rate": 7.686275098561203e-05, + "loss": 2.5058, + "step": 11530 + }, + { + "epoch": 0.9305947865386167, + "grad_norm": 0.6939939260482788, + "learning_rate": 7.684739274995691e-05, + "loss": 2.4764, + "step": 11531 + }, + { + "epoch": 0.9306754902751997, + "grad_norm": 0.7103014588356018, + "learning_rate": 7.683203509132526e-05, + "loss": 2.5062, + "step": 11532 + }, + { + "epoch": 0.9307561940117828, + "grad_norm": 0.6558870077133179, + "learning_rate": 7.681667801009985e-05, + "loss": 2.4869, + "step": 11533 + }, + { + "epoch": 0.9308368977483658, + "grad_norm": 0.7280104160308838, + "learning_rate": 7.680132150666348e-05, + "loss": 2.566, + "step": 11534 + }, + { + "epoch": 0.9309176014849487, + "grad_norm": 0.6814180612564087, + "learning_rate": 7.678596558139875e-05, + "loss": 2.4926, + "step": 11535 + }, + { + "epoch": 0.9309983052215317, + "grad_norm": 0.6916589736938477, + "learning_rate": 7.677061023468846e-05, + "loss": 2.5189, + "step": 11536 + }, + { + "epoch": 0.9310790089581148, + "grad_norm": 0.6527554988861084, + "learning_rate": 7.675525546691533e-05, + "loss": 2.4969, + "step": 11537 + }, + { + "epoch": 0.9311597126946978, + "grad_norm": 0.6458954811096191, + "learning_rate": 7.673990127846196e-05, + "loss": 2.5159, + "step": 11538 + }, + { + "epoch": 0.9312404164312807, + "grad_norm": 0.6704902052879333, + "learning_rate": 7.672454766971105e-05, + "loss": 2.49, + "step": 11539 + }, + { + "epoch": 0.9313211201678637, + "grad_norm": 0.6599698066711426, + "learning_rate": 7.670919464104527e-05, + "loss": 2.4872, + "step": 11540 + }, + { + "epoch": 0.9314018239044468, + "grad_norm": 0.7638888955116272, + "learning_rate": 7.669384219284722e-05, + "loss": 2.5228, + "step": 11541 + }, + { + "epoch": 0.9314825276410298, + "grad_norm": 0.6911981105804443, + "learning_rate": 7.667849032549954e-05, + "loss": 2.4675, + "step": 11542 + }, + { + "epoch": 0.9315632313776128, + "grad_norm": 0.6414669156074524, + "learning_rate": 7.666313903938486e-05, + "loss": 2.5137, + "step": 11543 + }, + { + "epoch": 0.9316439351141957, + "grad_norm": 0.7552139759063721, + "learning_rate": 7.66477883348858e-05, + "loss": 2.5778, + "step": 11544 + }, + { + "epoch": 0.9317246388507788, + "grad_norm": 0.6738760471343994, + "learning_rate": 7.663243821238484e-05, + "loss": 2.5326, + "step": 11545 + }, + { + "epoch": 0.9318053425873618, + "grad_norm": 0.7406899333000183, + "learning_rate": 7.661708867226459e-05, + "loss": 2.4608, + "step": 11546 + }, + { + "epoch": 0.9318860463239448, + "grad_norm": 0.7261415719985962, + "learning_rate": 7.660173971490769e-05, + "loss": 2.5684, + "step": 11547 + }, + { + "epoch": 0.9319667500605278, + "grad_norm": 0.636542797088623, + "learning_rate": 7.658639134069654e-05, + "loss": 2.5159, + "step": 11548 + }, + { + "epoch": 0.9320474537971108, + "grad_norm": 0.7730209231376648, + "learning_rate": 7.657104355001373e-05, + "loss": 2.487, + "step": 11549 + }, + { + "epoch": 0.9321281575336938, + "grad_norm": 0.6553641557693481, + "learning_rate": 7.655569634324178e-05, + "loss": 2.5105, + "step": 11550 + }, + { + "epoch": 0.9322088612702768, + "grad_norm": 0.7008326649665833, + "learning_rate": 7.654034972076314e-05, + "loss": 2.492, + "step": 11551 + }, + { + "epoch": 0.9322895650068598, + "grad_norm": 0.7074279189109802, + "learning_rate": 7.65250036829603e-05, + "loss": 2.5221, + "step": 11552 + }, + { + "epoch": 0.9323702687434429, + "grad_norm": 0.7235530018806458, + "learning_rate": 7.650965823021578e-05, + "loss": 2.5285, + "step": 11553 + }, + { + "epoch": 0.9324509724800258, + "grad_norm": 0.7601436376571655, + "learning_rate": 7.649431336291194e-05, + "loss": 2.5071, + "step": 11554 + }, + { + "epoch": 0.9325316762166088, + "grad_norm": 0.6446424126625061, + "learning_rate": 7.647896908143127e-05, + "loss": 2.5032, + "step": 11555 + }, + { + "epoch": 0.9326123799531918, + "grad_norm": 0.7032139897346497, + "learning_rate": 7.646362538615614e-05, + "loss": 2.6096, + "step": 11556 + }, + { + "epoch": 0.9326930836897749, + "grad_norm": 0.6727899312973022, + "learning_rate": 7.644828227746904e-05, + "loss": 2.5041, + "step": 11557 + }, + { + "epoch": 0.9327737874263579, + "grad_norm": 0.6817529201507568, + "learning_rate": 7.643293975575229e-05, + "loss": 2.4474, + "step": 11558 + }, + { + "epoch": 0.9328544911629408, + "grad_norm": 0.6374444365501404, + "learning_rate": 7.641759782138827e-05, + "loss": 2.5204, + "step": 11559 + }, + { + "epoch": 0.9329351948995238, + "grad_norm": 0.6889457702636719, + "learning_rate": 7.640225647475939e-05, + "loss": 2.6344, + "step": 11560 + }, + { + "epoch": 0.9330158986361069, + "grad_norm": 0.6657958626747131, + "learning_rate": 7.638691571624794e-05, + "loss": 2.4672, + "step": 11561 + }, + { + "epoch": 0.9330966023726899, + "grad_norm": 0.6425464749336243, + "learning_rate": 7.637157554623627e-05, + "loss": 2.4756, + "step": 11562 + }, + { + "epoch": 0.9331773061092729, + "grad_norm": 0.7193450927734375, + "learning_rate": 7.635623596510675e-05, + "loss": 2.4969, + "step": 11563 + }, + { + "epoch": 0.9332580098458558, + "grad_norm": 0.6595252156257629, + "learning_rate": 7.634089697324159e-05, + "loss": 2.4647, + "step": 11564 + }, + { + "epoch": 0.9333387135824389, + "grad_norm": 0.6505268812179565, + "learning_rate": 7.632555857102312e-05, + "loss": 2.5059, + "step": 11565 + }, + { + "epoch": 0.9334194173190219, + "grad_norm": 0.6877838969230652, + "learning_rate": 7.631022075883365e-05, + "loss": 2.4855, + "step": 11566 + }, + { + "epoch": 0.9335001210556049, + "grad_norm": 0.6376198530197144, + "learning_rate": 7.629488353705538e-05, + "loss": 2.5024, + "step": 11567 + }, + { + "epoch": 0.9335808247921878, + "grad_norm": 0.6807642579078674, + "learning_rate": 7.627954690607058e-05, + "loss": 2.4954, + "step": 11568 + }, + { + "epoch": 0.9336615285287709, + "grad_norm": 0.6785219311714172, + "learning_rate": 7.62642108662615e-05, + "loss": 2.4854, + "step": 11569 + }, + { + "epoch": 0.9337422322653539, + "grad_norm": 0.8159591555595398, + "learning_rate": 7.624887541801032e-05, + "loss": 2.524, + "step": 11570 + }, + { + "epoch": 0.9338229360019369, + "grad_norm": 0.6912592053413391, + "learning_rate": 7.62335405616992e-05, + "loss": 2.5111, + "step": 11571 + }, + { + "epoch": 0.9339036397385199, + "grad_norm": 0.6772454977035522, + "learning_rate": 7.621820629771041e-05, + "loss": 2.5603, + "step": 11572 + }, + { + "epoch": 0.933984343475103, + "grad_norm": 0.6720221638679504, + "learning_rate": 7.620287262642613e-05, + "loss": 2.5016, + "step": 11573 + }, + { + "epoch": 0.9340650472116859, + "grad_norm": 0.651935338973999, + "learning_rate": 7.618753954822841e-05, + "loss": 2.445, + "step": 11574 + }, + { + "epoch": 0.9341457509482689, + "grad_norm": 0.6731166839599609, + "learning_rate": 7.617220706349947e-05, + "loss": 2.4703, + "step": 11575 + }, + { + "epoch": 0.9342264546848519, + "grad_norm": 0.6283879280090332, + "learning_rate": 7.615687517262143e-05, + "loss": 2.5232, + "step": 11576 + }, + { + "epoch": 0.9343071584214349, + "grad_norm": 0.7193455696105957, + "learning_rate": 7.614154387597638e-05, + "loss": 2.5268, + "step": 11577 + }, + { + "epoch": 0.934387862158018, + "grad_norm": 0.6992828845977783, + "learning_rate": 7.61262131739464e-05, + "loss": 2.5834, + "step": 11578 + }, + { + "epoch": 0.9344685658946009, + "grad_norm": 0.6501220464706421, + "learning_rate": 7.611088306691365e-05, + "loss": 2.5146, + "step": 11579 + }, + { + "epoch": 0.9345492696311839, + "grad_norm": 0.7246220111846924, + "learning_rate": 7.60955535552601e-05, + "loss": 2.5665, + "step": 11580 + }, + { + "epoch": 0.9346299733677669, + "grad_norm": 0.7190428376197815, + "learning_rate": 7.608022463936783e-05, + "loss": 2.5061, + "step": 11581 + }, + { + "epoch": 0.93471067710435, + "grad_norm": 0.7144324779510498, + "learning_rate": 7.606489631961893e-05, + "loss": 2.4982, + "step": 11582 + }, + { + "epoch": 0.9347913808409329, + "grad_norm": 0.7144657373428345, + "learning_rate": 7.604956859639535e-05, + "loss": 2.5506, + "step": 11583 + }, + { + "epoch": 0.9348720845775159, + "grad_norm": 0.6596626043319702, + "learning_rate": 7.603424147007913e-05, + "loss": 2.4911, + "step": 11584 + }, + { + "epoch": 0.9349527883140989, + "grad_norm": 0.7090883851051331, + "learning_rate": 7.601891494105227e-05, + "loss": 2.5087, + "step": 11585 + }, + { + "epoch": 0.935033492050682, + "grad_norm": 0.6679760217666626, + "learning_rate": 7.600358900969671e-05, + "loss": 2.497, + "step": 11586 + }, + { + "epoch": 0.935114195787265, + "grad_norm": 0.6795344948768616, + "learning_rate": 7.598826367639447e-05, + "loss": 2.4839, + "step": 11587 + }, + { + "epoch": 0.9351948995238479, + "grad_norm": 0.6378790736198425, + "learning_rate": 7.597293894152744e-05, + "loss": 2.4656, + "step": 11588 + }, + { + "epoch": 0.9352756032604309, + "grad_norm": 0.6646658182144165, + "learning_rate": 7.595761480547762e-05, + "loss": 2.4739, + "step": 11589 + }, + { + "epoch": 0.935356306997014, + "grad_norm": 0.6662073731422424, + "learning_rate": 7.594229126862687e-05, + "loss": 2.4872, + "step": 11590 + }, + { + "epoch": 0.935437010733597, + "grad_norm": 0.6698113679885864, + "learning_rate": 7.592696833135708e-05, + "loss": 2.4964, + "step": 11591 + }, + { + "epoch": 0.93551771447018, + "grad_norm": 0.6520004272460938, + "learning_rate": 7.59116459940502e-05, + "loss": 2.5616, + "step": 11592 + }, + { + "epoch": 0.9355984182067629, + "grad_norm": 0.6675869226455688, + "learning_rate": 7.589632425708806e-05, + "loss": 2.4854, + "step": 11593 + }, + { + "epoch": 0.935679121943346, + "grad_norm": 0.6914103031158447, + "learning_rate": 7.588100312085251e-05, + "loss": 2.5252, + "step": 11594 + }, + { + "epoch": 0.935759825679929, + "grad_norm": 0.7283286452293396, + "learning_rate": 7.586568258572546e-05, + "loss": 2.543, + "step": 11595 + }, + { + "epoch": 0.935840529416512, + "grad_norm": 0.6881958246231079, + "learning_rate": 7.585036265208864e-05, + "loss": 2.4499, + "step": 11596 + }, + { + "epoch": 0.935921233153095, + "grad_norm": 0.7733677625656128, + "learning_rate": 7.58350433203239e-05, + "loss": 2.5595, + "step": 11597 + }, + { + "epoch": 0.936001936889678, + "grad_norm": 0.672711968421936, + "learning_rate": 7.58197245908131e-05, + "loss": 2.4757, + "step": 11598 + }, + { + "epoch": 0.936082640626261, + "grad_norm": 0.691780686378479, + "learning_rate": 7.580440646393794e-05, + "loss": 2.5134, + "step": 11599 + }, + { + "epoch": 0.936163344362844, + "grad_norm": 0.6935102343559265, + "learning_rate": 7.578908894008021e-05, + "loss": 2.5128, + "step": 11600 + }, + { + "epoch": 0.936244048099427, + "grad_norm": 0.7005696892738342, + "learning_rate": 7.57737720196217e-05, + "loss": 2.5338, + "step": 11601 + }, + { + "epoch": 0.93632475183601, + "grad_norm": 0.6729815602302551, + "learning_rate": 7.575845570294409e-05, + "loss": 2.5373, + "step": 11602 + }, + { + "epoch": 0.936405455572593, + "grad_norm": 0.6694760918617249, + "learning_rate": 7.574313999042913e-05, + "loss": 2.5165, + "step": 11603 + }, + { + "epoch": 0.936486159309176, + "grad_norm": 0.6425337791442871, + "learning_rate": 7.572782488245854e-05, + "loss": 2.5102, + "step": 11604 + }, + { + "epoch": 0.936566863045759, + "grad_norm": 0.6613046526908875, + "learning_rate": 7.571251037941405e-05, + "loss": 2.5108, + "step": 11605 + }, + { + "epoch": 0.9366475667823421, + "grad_norm": 0.7396309971809387, + "learning_rate": 7.569719648167723e-05, + "loss": 2.5261, + "step": 11606 + }, + { + "epoch": 0.936728270518925, + "grad_norm": 0.6783239245414734, + "learning_rate": 7.568188318962981e-05, + "loss": 2.5725, + "step": 11607 + }, + { + "epoch": 0.936808974255508, + "grad_norm": 0.7591684460639954, + "learning_rate": 7.566657050365345e-05, + "loss": 2.5085, + "step": 11608 + }, + { + "epoch": 0.936889677992091, + "grad_norm": 0.6805615425109863, + "learning_rate": 7.565125842412974e-05, + "loss": 2.5598, + "step": 11609 + }, + { + "epoch": 0.9369703817286741, + "grad_norm": 0.680203378200531, + "learning_rate": 7.563594695144032e-05, + "loss": 2.5072, + "step": 11610 + }, + { + "epoch": 0.9370510854652571, + "grad_norm": 0.7035777568817139, + "learning_rate": 7.56206360859668e-05, + "loss": 2.4882, + "step": 11611 + }, + { + "epoch": 0.93713178920184, + "grad_norm": 0.7457048892974854, + "learning_rate": 7.560532582809075e-05, + "loss": 2.4975, + "step": 11612 + }, + { + "epoch": 0.937212492938423, + "grad_norm": 0.702055037021637, + "learning_rate": 7.559001617819374e-05, + "loss": 2.5522, + "step": 11613 + }, + { + "epoch": 0.9372931966750061, + "grad_norm": 0.7618527412414551, + "learning_rate": 7.557470713665738e-05, + "loss": 2.5503, + "step": 11614 + }, + { + "epoch": 0.9373739004115891, + "grad_norm": 0.8611559867858887, + "learning_rate": 7.555939870386312e-05, + "loss": 2.4866, + "step": 11615 + }, + { + "epoch": 0.937454604148172, + "grad_norm": 0.7285227179527283, + "learning_rate": 7.554409088019254e-05, + "loss": 2.4855, + "step": 11616 + }, + { + "epoch": 0.937535307884755, + "grad_norm": 0.7512121796607971, + "learning_rate": 7.552878366602716e-05, + "loss": 2.5496, + "step": 11617 + }, + { + "epoch": 0.9376160116213381, + "grad_norm": 0.7353625297546387, + "learning_rate": 7.551347706174844e-05, + "loss": 2.5754, + "step": 11618 + }, + { + "epoch": 0.9376967153579211, + "grad_norm": 0.7131205797195435, + "learning_rate": 7.549817106773788e-05, + "loss": 2.4927, + "step": 11619 + }, + { + "epoch": 0.9377774190945041, + "grad_norm": 0.6562477946281433, + "learning_rate": 7.548286568437695e-05, + "loss": 2.5247, + "step": 11620 + }, + { + "epoch": 0.937858122831087, + "grad_norm": 0.7094948887825012, + "learning_rate": 7.546756091204713e-05, + "loss": 2.5084, + "step": 11621 + }, + { + "epoch": 0.9379388265676701, + "grad_norm": 0.6890475153923035, + "learning_rate": 7.545225675112977e-05, + "loss": 2.5178, + "step": 11622 + }, + { + "epoch": 0.9380195303042531, + "grad_norm": 0.6801474094390869, + "learning_rate": 7.543695320200634e-05, + "loss": 2.5457, + "step": 11623 + }, + { + "epoch": 0.9381002340408361, + "grad_norm": 0.7093712687492371, + "learning_rate": 7.54216502650583e-05, + "loss": 2.6122, + "step": 11624 + }, + { + "epoch": 0.9381809377774191, + "grad_norm": 0.7246927618980408, + "learning_rate": 7.540634794066695e-05, + "loss": 2.5251, + "step": 11625 + }, + { + "epoch": 0.9382616415140022, + "grad_norm": 0.7358111143112183, + "learning_rate": 7.539104622921368e-05, + "loss": 2.5444, + "step": 11626 + }, + { + "epoch": 0.9383423452505851, + "grad_norm": 0.6915993690490723, + "learning_rate": 7.53757451310799e-05, + "loss": 2.448, + "step": 11627 + }, + { + "epoch": 0.9384230489871681, + "grad_norm": 0.6864039301872253, + "learning_rate": 7.536044464664689e-05, + "loss": 2.5267, + "step": 11628 + }, + { + "epoch": 0.9385037527237511, + "grad_norm": 0.664799690246582, + "learning_rate": 7.534514477629602e-05, + "loss": 2.5602, + "step": 11629 + }, + { + "epoch": 0.9385844564603341, + "grad_norm": 0.6770062446594238, + "learning_rate": 7.532984552040862e-05, + "loss": 2.5034, + "step": 11630 + }, + { + "epoch": 0.9386651601969171, + "grad_norm": 0.6961095929145813, + "learning_rate": 7.531454687936592e-05, + "loss": 2.4523, + "step": 11631 + }, + { + "epoch": 0.9387458639335001, + "grad_norm": 0.6776804327964783, + "learning_rate": 7.529924885354924e-05, + "loss": 2.5526, + "step": 11632 + }, + { + "epoch": 0.9388265676700831, + "grad_norm": 0.785796582698822, + "learning_rate": 7.528395144333988e-05, + "loss": 2.5256, + "step": 11633 + }, + { + "epoch": 0.9389072714066661, + "grad_norm": 0.7016655206680298, + "learning_rate": 7.526865464911902e-05, + "loss": 2.4781, + "step": 11634 + }, + { + "epoch": 0.9389879751432492, + "grad_norm": 0.7027767300605774, + "learning_rate": 7.525335847126795e-05, + "loss": 2.5287, + "step": 11635 + }, + { + "epoch": 0.9390686788798321, + "grad_norm": 0.710624098777771, + "learning_rate": 7.523806291016787e-05, + "loss": 2.5486, + "step": 11636 + }, + { + "epoch": 0.9391493826164151, + "grad_norm": 0.7029656767845154, + "learning_rate": 7.52227679662e-05, + "loss": 2.5244, + "step": 11637 + }, + { + "epoch": 0.9392300863529981, + "grad_norm": 0.7417333722114563, + "learning_rate": 7.520747363974551e-05, + "loss": 2.5561, + "step": 11638 + }, + { + "epoch": 0.9393107900895812, + "grad_norm": 0.6595067381858826, + "learning_rate": 7.519217993118559e-05, + "loss": 2.617, + "step": 11639 + }, + { + "epoch": 0.9393914938261642, + "grad_norm": 0.6808187365531921, + "learning_rate": 7.517688684090141e-05, + "loss": 2.5279, + "step": 11640 + }, + { + "epoch": 0.9394721975627471, + "grad_norm": 0.6618706583976746, + "learning_rate": 7.516159436927408e-05, + "loss": 2.4976, + "step": 11641 + }, + { + "epoch": 0.9395529012993301, + "grad_norm": 0.6979385018348694, + "learning_rate": 7.514630251668475e-05, + "loss": 2.4542, + "step": 11642 + }, + { + "epoch": 0.9396336050359132, + "grad_norm": 0.6380844116210938, + "learning_rate": 7.513101128351454e-05, + "loss": 2.48, + "step": 11643 + }, + { + "epoch": 0.9397143087724962, + "grad_norm": 0.6390014290809631, + "learning_rate": 7.511572067014452e-05, + "loss": 2.5111, + "step": 11644 + }, + { + "epoch": 0.9397950125090792, + "grad_norm": 0.7592498064041138, + "learning_rate": 7.510043067695578e-05, + "loss": 2.5161, + "step": 11645 + }, + { + "epoch": 0.9398757162456621, + "grad_norm": 0.6269322037696838, + "learning_rate": 7.508514130432945e-05, + "loss": 2.491, + "step": 11646 + }, + { + "epoch": 0.9399564199822452, + "grad_norm": 0.6372053623199463, + "learning_rate": 7.506985255264646e-05, + "loss": 2.4826, + "step": 11647 + }, + { + "epoch": 0.9400371237188282, + "grad_norm": 0.6962460875511169, + "learning_rate": 7.505456442228794e-05, + "loss": 2.5605, + "step": 11648 + }, + { + "epoch": 0.9401178274554112, + "grad_norm": 0.7931656241416931, + "learning_rate": 7.503927691363491e-05, + "loss": 2.4909, + "step": 11649 + }, + { + "epoch": 0.9401985311919941, + "grad_norm": 0.688792884349823, + "learning_rate": 7.502399002706832e-05, + "loss": 2.4888, + "step": 11650 + }, + { + "epoch": 0.9402792349285772, + "grad_norm": 0.6683691143989563, + "learning_rate": 7.500870376296918e-05, + "loss": 2.5233, + "step": 11651 + }, + { + "epoch": 0.9403599386651602, + "grad_norm": 0.6537527441978455, + "learning_rate": 7.499341812171846e-05, + "loss": 2.5061, + "step": 11652 + }, + { + "epoch": 0.9404406424017432, + "grad_norm": 0.6657658219337463, + "learning_rate": 7.497813310369717e-05, + "loss": 2.4844, + "step": 11653 + }, + { + "epoch": 0.9405213461383262, + "grad_norm": 0.6865110993385315, + "learning_rate": 7.496284870928618e-05, + "loss": 2.4986, + "step": 11654 + }, + { + "epoch": 0.9406020498749093, + "grad_norm": 0.6724923849105835, + "learning_rate": 7.494756493886644e-05, + "loss": 2.4818, + "step": 11655 + }, + { + "epoch": 0.9406827536114922, + "grad_norm": 0.6478626728057861, + "learning_rate": 7.493228179281892e-05, + "loss": 2.5321, + "step": 11656 + }, + { + "epoch": 0.9407634573480752, + "grad_norm": 0.6474425792694092, + "learning_rate": 7.491699927152443e-05, + "loss": 2.5276, + "step": 11657 + }, + { + "epoch": 0.9408441610846582, + "grad_norm": 0.6736220717430115, + "learning_rate": 7.490171737536387e-05, + "loss": 2.4734, + "step": 11658 + }, + { + "epoch": 0.9409248648212413, + "grad_norm": 0.6714746952056885, + "learning_rate": 7.488643610471815e-05, + "loss": 2.5754, + "step": 11659 + }, + { + "epoch": 0.9410055685578242, + "grad_norm": 0.6714532375335693, + "learning_rate": 7.487115545996805e-05, + "loss": 2.4855, + "step": 11660 + }, + { + "epoch": 0.9410862722944072, + "grad_norm": 0.7601683139801025, + "learning_rate": 7.485587544149447e-05, + "loss": 2.4887, + "step": 11661 + }, + { + "epoch": 0.9411669760309902, + "grad_norm": 0.7655646204948425, + "learning_rate": 7.484059604967821e-05, + "loss": 2.4904, + "step": 11662 + }, + { + "epoch": 0.9412476797675733, + "grad_norm": 0.6841822862625122, + "learning_rate": 7.482531728490006e-05, + "loss": 2.5272, + "step": 11663 + }, + { + "epoch": 0.9413283835041563, + "grad_norm": 0.7683621048927307, + "learning_rate": 7.481003914754078e-05, + "loss": 2.5218, + "step": 11664 + }, + { + "epoch": 0.9414090872407392, + "grad_norm": 0.6597647070884705, + "learning_rate": 7.479476163798124e-05, + "loss": 2.4925, + "step": 11665 + }, + { + "epoch": 0.9414897909773222, + "grad_norm": 0.6573941111564636, + "learning_rate": 7.477948475660208e-05, + "loss": 2.4854, + "step": 11666 + }, + { + "epoch": 0.9415704947139053, + "grad_norm": 0.6639125943183899, + "learning_rate": 7.476420850378407e-05, + "loss": 2.5207, + "step": 11667 + }, + { + "epoch": 0.9416511984504883, + "grad_norm": 0.6770366430282593, + "learning_rate": 7.474893287990796e-05, + "loss": 2.5167, + "step": 11668 + }, + { + "epoch": 0.9417319021870713, + "grad_norm": 0.6908389925956726, + "learning_rate": 7.473365788535447e-05, + "loss": 2.4606, + "step": 11669 + }, + { + "epoch": 0.9418126059236542, + "grad_norm": 0.6625069975852966, + "learning_rate": 7.471838352050427e-05, + "loss": 2.5344, + "step": 11670 + }, + { + "epoch": 0.9418933096602373, + "grad_norm": 0.6690869331359863, + "learning_rate": 7.470310978573803e-05, + "loss": 2.4507, + "step": 11671 + }, + { + "epoch": 0.9419740133968203, + "grad_norm": 0.6741886734962463, + "learning_rate": 7.468783668143645e-05, + "loss": 2.5755, + "step": 11672 + }, + { + "epoch": 0.9420547171334033, + "grad_norm": 0.6876424551010132, + "learning_rate": 7.467256420798009e-05, + "loss": 2.483, + "step": 11673 + }, + { + "epoch": 0.9421354208699863, + "grad_norm": 0.7044318318367004, + "learning_rate": 7.465729236574965e-05, + "loss": 2.5025, + "step": 11674 + }, + { + "epoch": 0.9422161246065693, + "grad_norm": 0.6608660817146301, + "learning_rate": 7.46420211551258e-05, + "loss": 2.5253, + "step": 11675 + }, + { + "epoch": 0.9422968283431523, + "grad_norm": 0.6944260001182556, + "learning_rate": 7.4626750576489e-05, + "loss": 2.5002, + "step": 11676 + }, + { + "epoch": 0.9423775320797353, + "grad_norm": 0.7304964065551758, + "learning_rate": 7.46114806302199e-05, + "loss": 2.5501, + "step": 11677 + }, + { + "epoch": 0.9424582358163183, + "grad_norm": 0.688525378704071, + "learning_rate": 7.459621131669911e-05, + "loss": 2.5291, + "step": 11678 + }, + { + "epoch": 0.9425389395529012, + "grad_norm": 0.7388432025909424, + "learning_rate": 7.45809426363071e-05, + "loss": 2.5391, + "step": 11679 + }, + { + "epoch": 0.9426196432894843, + "grad_norm": 0.6777819991111755, + "learning_rate": 7.456567458942447e-05, + "loss": 2.5425, + "step": 11680 + }, + { + "epoch": 0.9427003470260673, + "grad_norm": 0.7208845615386963, + "learning_rate": 7.455040717643169e-05, + "loss": 2.5306, + "step": 11681 + }, + { + "epoch": 0.9427810507626503, + "grad_norm": 0.745384693145752, + "learning_rate": 7.453514039770934e-05, + "loss": 2.4695, + "step": 11682 + }, + { + "epoch": 0.9428617544992333, + "grad_norm": 0.7088115215301514, + "learning_rate": 7.451987425363782e-05, + "loss": 2.5413, + "step": 11683 + }, + { + "epoch": 0.9429424582358163, + "grad_norm": 0.7287998795509338, + "learning_rate": 7.450460874459762e-05, + "loss": 2.5773, + "step": 11684 + }, + { + "epoch": 0.9430231619723993, + "grad_norm": 0.6897092461585999, + "learning_rate": 7.448934387096928e-05, + "loss": 2.5255, + "step": 11685 + }, + { + "epoch": 0.9431038657089823, + "grad_norm": 0.6227227449417114, + "learning_rate": 7.447407963313313e-05, + "loss": 2.5027, + "step": 11686 + }, + { + "epoch": 0.9431845694455653, + "grad_norm": 0.6954305768013, + "learning_rate": 7.445881603146964e-05, + "loss": 2.5477, + "step": 11687 + }, + { + "epoch": 0.9432652731821484, + "grad_norm": 0.7860052585601807, + "learning_rate": 7.444355306635924e-05, + "loss": 2.469, + "step": 11688 + }, + { + "epoch": 0.9433459769187313, + "grad_norm": 0.6851965188980103, + "learning_rate": 7.442829073818227e-05, + "loss": 2.4997, + "step": 11689 + }, + { + "epoch": 0.9434266806553143, + "grad_norm": 0.7011744379997253, + "learning_rate": 7.441302904731916e-05, + "loss": 2.5399, + "step": 11690 + }, + { + "epoch": 0.9435073843918973, + "grad_norm": 0.703167200088501, + "learning_rate": 7.439776799415028e-05, + "loss": 2.5323, + "step": 11691 + }, + { + "epoch": 0.9435880881284804, + "grad_norm": 0.6747310161590576, + "learning_rate": 7.438250757905591e-05, + "loss": 2.5406, + "step": 11692 + }, + { + "epoch": 0.9436687918650634, + "grad_norm": 0.8631153106689453, + "learning_rate": 7.436724780241642e-05, + "loss": 2.5215, + "step": 11693 + }, + { + "epoch": 0.9437494956016463, + "grad_norm": 0.6919798254966736, + "learning_rate": 7.435198866461214e-05, + "loss": 2.4654, + "step": 11694 + }, + { + "epoch": 0.9438301993382293, + "grad_norm": 0.6747070550918579, + "learning_rate": 7.433673016602332e-05, + "loss": 2.5186, + "step": 11695 + }, + { + "epoch": 0.9439109030748124, + "grad_norm": 0.7368776798248291, + "learning_rate": 7.432147230703026e-05, + "loss": 2.5365, + "step": 11696 + }, + { + "epoch": 0.9439916068113954, + "grad_norm": 0.7443639636039734, + "learning_rate": 7.430621508801325e-05, + "loss": 2.4966, + "step": 11697 + }, + { + "epoch": 0.9440723105479784, + "grad_norm": 0.7371395230293274, + "learning_rate": 7.429095850935255e-05, + "loss": 2.4638, + "step": 11698 + }, + { + "epoch": 0.9441530142845613, + "grad_norm": 0.6917321681976318, + "learning_rate": 7.427570257142832e-05, + "loss": 2.5341, + "step": 11699 + }, + { + "epoch": 0.9442337180211444, + "grad_norm": 0.7704101800918579, + "learning_rate": 7.426044727462085e-05, + "loss": 2.5144, + "step": 11700 + }, + { + "epoch": 0.9443144217577274, + "grad_norm": 0.692197859287262, + "learning_rate": 7.424519261931036e-05, + "loss": 2.5293, + "step": 11701 + }, + { + "epoch": 0.9443951254943104, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.422993860587695e-05, + "loss": 2.5236, + "step": 11702 + }, + { + "epoch": 0.9444758292308933, + "grad_norm": 0.6955052018165588, + "learning_rate": 7.421468523470081e-05, + "loss": 2.4765, + "step": 11703 + }, + { + "epoch": 0.9445565329674764, + "grad_norm": 0.7394432425498962, + "learning_rate": 7.419943250616216e-05, + "loss": 2.5053, + "step": 11704 + }, + { + "epoch": 0.9446372367040594, + "grad_norm": 0.679044246673584, + "learning_rate": 7.418418042064108e-05, + "loss": 2.5413, + "step": 11705 + }, + { + "epoch": 0.9447179404406424, + "grad_norm": 0.7153440117835999, + "learning_rate": 7.41689289785177e-05, + "loss": 2.4938, + "step": 11706 + }, + { + "epoch": 0.9447986441772254, + "grad_norm": 0.697068452835083, + "learning_rate": 7.415367818017217e-05, + "loss": 2.5157, + "step": 11707 + }, + { + "epoch": 0.9448793479138085, + "grad_norm": 0.664616048336029, + "learning_rate": 7.41384280259845e-05, + "loss": 2.4859, + "step": 11708 + }, + { + "epoch": 0.9449600516503914, + "grad_norm": 0.7275365591049194, + "learning_rate": 7.412317851633479e-05, + "loss": 2.523, + "step": 11709 + }, + { + "epoch": 0.9450407553869744, + "grad_norm": 0.7408944368362427, + "learning_rate": 7.410792965160318e-05, + "loss": 2.4994, + "step": 11710 + }, + { + "epoch": 0.9451214591235574, + "grad_norm": 0.7222678065299988, + "learning_rate": 7.40926814321696e-05, + "loss": 2.5084, + "step": 11711 + }, + { + "epoch": 0.9452021628601405, + "grad_norm": 0.7242292761802673, + "learning_rate": 7.407743385841412e-05, + "loss": 2.5165, + "step": 11712 + }, + { + "epoch": 0.9452828665967234, + "grad_norm": 0.6634014844894409, + "learning_rate": 7.406218693071677e-05, + "loss": 2.4947, + "step": 11713 + }, + { + "epoch": 0.9453635703333064, + "grad_norm": 0.8126605153083801, + "learning_rate": 7.404694064945751e-05, + "loss": 2.5553, + "step": 11714 + }, + { + "epoch": 0.9454442740698894, + "grad_norm": 0.679344654083252, + "learning_rate": 7.403169501501632e-05, + "loss": 2.5475, + "step": 11715 + }, + { + "epoch": 0.9455249778064725, + "grad_norm": 0.7584314346313477, + "learning_rate": 7.401645002777318e-05, + "loss": 2.5498, + "step": 11716 + }, + { + "epoch": 0.9456056815430555, + "grad_norm": 0.7191590666770935, + "learning_rate": 7.400120568810806e-05, + "loss": 2.5161, + "step": 11717 + }, + { + "epoch": 0.9456863852796384, + "grad_norm": 0.6738762855529785, + "learning_rate": 7.398596199640084e-05, + "loss": 2.4819, + "step": 11718 + }, + { + "epoch": 0.9457670890162214, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.397071895303143e-05, + "loss": 2.4842, + "step": 11719 + }, + { + "epoch": 0.9458477927528045, + "grad_norm": 0.6885291337966919, + "learning_rate": 7.395547655837976e-05, + "loss": 2.5016, + "step": 11720 + }, + { + "epoch": 0.9459284964893875, + "grad_norm": 0.6807307600975037, + "learning_rate": 7.394023481282568e-05, + "loss": 2.4949, + "step": 11721 + }, + { + "epoch": 0.9460092002259705, + "grad_norm": 0.6683849096298218, + "learning_rate": 7.392499371674907e-05, + "loss": 2.4974, + "step": 11722 + }, + { + "epoch": 0.9460899039625534, + "grad_norm": 0.6615697741508484, + "learning_rate": 7.39097532705298e-05, + "loss": 2.4744, + "step": 11723 + }, + { + "epoch": 0.9461706076991365, + "grad_norm": 0.6463690996170044, + "learning_rate": 7.389451347454765e-05, + "loss": 2.478, + "step": 11724 + }, + { + "epoch": 0.9462513114357195, + "grad_norm": 0.6848269701004028, + "learning_rate": 7.387927432918247e-05, + "loss": 2.5491, + "step": 11725 + }, + { + "epoch": 0.9463320151723025, + "grad_norm": 0.7251551747322083, + "learning_rate": 7.386403583481409e-05, + "loss": 2.4936, + "step": 11726 + }, + { + "epoch": 0.9464127189088855, + "grad_norm": 0.6562095284461975, + "learning_rate": 7.384879799182223e-05, + "loss": 2.4895, + "step": 11727 + }, + { + "epoch": 0.9464934226454685, + "grad_norm": 0.6891352534294128, + "learning_rate": 7.383356080058668e-05, + "loss": 2.508, + "step": 11728 + }, + { + "epoch": 0.9465741263820515, + "grad_norm": 0.7220255136489868, + "learning_rate": 7.381832426148719e-05, + "loss": 2.5181, + "step": 11729 + }, + { + "epoch": 0.9466548301186345, + "grad_norm": 0.7213689088821411, + "learning_rate": 7.38030883749035e-05, + "loss": 2.5136, + "step": 11730 + }, + { + "epoch": 0.9467355338552175, + "grad_norm": 0.6711129546165466, + "learning_rate": 7.378785314121535e-05, + "loss": 2.5463, + "step": 11731 + }, + { + "epoch": 0.9468162375918004, + "grad_norm": 0.6380139589309692, + "learning_rate": 7.377261856080239e-05, + "loss": 2.5092, + "step": 11732 + }, + { + "epoch": 0.9468969413283835, + "grad_norm": 0.66046142578125, + "learning_rate": 7.375738463404437e-05, + "loss": 2.5561, + "step": 11733 + }, + { + "epoch": 0.9469776450649665, + "grad_norm": 0.6857354044914246, + "learning_rate": 7.37421513613209e-05, + "loss": 2.5774, + "step": 11734 + }, + { + "epoch": 0.9470583488015495, + "grad_norm": 0.6811589598655701, + "learning_rate": 7.372691874301163e-05, + "loss": 2.4918, + "step": 11735 + }, + { + "epoch": 0.9471390525381325, + "grad_norm": 0.6401017308235168, + "learning_rate": 7.37116867794963e-05, + "loss": 2.4994, + "step": 11736 + }, + { + "epoch": 0.9472197562747156, + "grad_norm": 0.6967078447341919, + "learning_rate": 7.369645547115438e-05, + "loss": 2.5809, + "step": 11737 + }, + { + "epoch": 0.9473004600112985, + "grad_norm": 0.6695219278335571, + "learning_rate": 7.368122481836557e-05, + "loss": 2.4735, + "step": 11738 + }, + { + "epoch": 0.9473811637478815, + "grad_norm": 0.6540528535842896, + "learning_rate": 7.366599482150944e-05, + "loss": 2.4998, + "step": 11739 + }, + { + "epoch": 0.9474618674844645, + "grad_norm": 0.700683057308197, + "learning_rate": 7.365076548096556e-05, + "loss": 2.5258, + "step": 11740 + }, + { + "epoch": 0.9475425712210476, + "grad_norm": 0.7125419974327087, + "learning_rate": 7.363553679711347e-05, + "loss": 2.4653, + "step": 11741 + }, + { + "epoch": 0.9476232749576305, + "grad_norm": 0.7285346984863281, + "learning_rate": 7.362030877033275e-05, + "loss": 2.5523, + "step": 11742 + }, + { + "epoch": 0.9477039786942135, + "grad_norm": 0.7310814261436462, + "learning_rate": 7.360508140100288e-05, + "loss": 2.5027, + "step": 11743 + }, + { + "epoch": 0.9477846824307965, + "grad_norm": 0.746961772441864, + "learning_rate": 7.358985468950335e-05, + "loss": 2.5485, + "step": 11744 + }, + { + "epoch": 0.9478653861673796, + "grad_norm": 0.6880186796188354, + "learning_rate": 7.357462863621369e-05, + "loss": 2.5243, + "step": 11745 + }, + { + "epoch": 0.9479460899039626, + "grad_norm": 0.6406471133232117, + "learning_rate": 7.355940324151339e-05, + "loss": 2.512, + "step": 11746 + }, + { + "epoch": 0.9480267936405455, + "grad_norm": 0.6503005027770996, + "learning_rate": 7.354417850578184e-05, + "loss": 2.5318, + "step": 11747 + }, + { + "epoch": 0.9481074973771285, + "grad_norm": 0.6458879113197327, + "learning_rate": 7.352895442939852e-05, + "loss": 2.5451, + "step": 11748 + }, + { + "epoch": 0.9481882011137116, + "grad_norm": 0.7382936477661133, + "learning_rate": 7.351373101274288e-05, + "loss": 2.5393, + "step": 11749 + }, + { + "epoch": 0.9482689048502946, + "grad_norm": 0.7366087436676025, + "learning_rate": 7.349850825619429e-05, + "loss": 2.5591, + "step": 11750 + }, + { + "epoch": 0.9483496085868776, + "grad_norm": 0.6652588248252869, + "learning_rate": 7.348328616013213e-05, + "loss": 2.5348, + "step": 11751 + }, + { + "epoch": 0.9484303123234605, + "grad_norm": 0.7515435814857483, + "learning_rate": 7.346806472493584e-05, + "loss": 2.5208, + "step": 11752 + }, + { + "epoch": 0.9485110160600436, + "grad_norm": 0.7161263227462769, + "learning_rate": 7.345284395098469e-05, + "loss": 2.5518, + "step": 11753 + }, + { + "epoch": 0.9485917197966266, + "grad_norm": 0.7433953285217285, + "learning_rate": 7.343762383865807e-05, + "loss": 2.5914, + "step": 11754 + }, + { + "epoch": 0.9486724235332096, + "grad_norm": 0.674991250038147, + "learning_rate": 7.342240438833532e-05, + "loss": 2.5566, + "step": 11755 + }, + { + "epoch": 0.9487531272697926, + "grad_norm": 0.7511670589447021, + "learning_rate": 7.34071856003957e-05, + "loss": 2.5253, + "step": 11756 + }, + { + "epoch": 0.9488338310063756, + "grad_norm": 0.6672492623329163, + "learning_rate": 7.339196747521853e-05, + "loss": 2.4887, + "step": 11757 + }, + { + "epoch": 0.9489145347429586, + "grad_norm": 0.6826158761978149, + "learning_rate": 7.337675001318312e-05, + "loss": 2.5072, + "step": 11758 + }, + { + "epoch": 0.9489952384795416, + "grad_norm": 0.7189450860023499, + "learning_rate": 7.336153321466867e-05, + "loss": 2.5583, + "step": 11759 + }, + { + "epoch": 0.9490759422161246, + "grad_norm": 0.6923015117645264, + "learning_rate": 7.33463170800544e-05, + "loss": 2.5416, + "step": 11760 + }, + { + "epoch": 0.9491566459527077, + "grad_norm": 0.690060555934906, + "learning_rate": 7.333110160971963e-05, + "loss": 2.4931, + "step": 11761 + }, + { + "epoch": 0.9492373496892906, + "grad_norm": 0.6887977719306946, + "learning_rate": 7.331588680404354e-05, + "loss": 2.4676, + "step": 11762 + }, + { + "epoch": 0.9493180534258736, + "grad_norm": 0.8573753237724304, + "learning_rate": 7.330067266340528e-05, + "loss": 2.5074, + "step": 11763 + }, + { + "epoch": 0.9493987571624566, + "grad_norm": 0.6760974526405334, + "learning_rate": 7.328545918818403e-05, + "loss": 2.5395, + "step": 11764 + }, + { + "epoch": 0.9494794608990397, + "grad_norm": 0.6946160197257996, + "learning_rate": 7.327024637875901e-05, + "loss": 2.535, + "step": 11765 + }, + { + "epoch": 0.9495601646356226, + "grad_norm": 0.6851378679275513, + "learning_rate": 7.32550342355093e-05, + "loss": 2.487, + "step": 11766 + }, + { + "epoch": 0.9496408683722056, + "grad_norm": 0.6480168104171753, + "learning_rate": 7.323982275881404e-05, + "loss": 2.513, + "step": 11767 + }, + { + "epoch": 0.9497215721087886, + "grad_norm": 0.6492218971252441, + "learning_rate": 7.322461194905239e-05, + "loss": 2.4532, + "step": 11768 + }, + { + "epoch": 0.9498022758453717, + "grad_norm": 0.6670051217079163, + "learning_rate": 7.320940180660337e-05, + "loss": 2.5258, + "step": 11769 + }, + { + "epoch": 0.9498829795819547, + "grad_norm": 0.6678066253662109, + "learning_rate": 7.319419233184608e-05, + "loss": 2.5388, + "step": 11770 + }, + { + "epoch": 0.9499636833185376, + "grad_norm": 0.693545937538147, + "learning_rate": 7.31789835251596e-05, + "loss": 2.5304, + "step": 11771 + }, + { + "epoch": 0.9500443870551206, + "grad_norm": 0.680486798286438, + "learning_rate": 7.316377538692297e-05, + "loss": 2.5024, + "step": 11772 + }, + { + "epoch": 0.9501250907917037, + "grad_norm": 0.7271847128868103, + "learning_rate": 7.314856791751518e-05, + "loss": 2.5947, + "step": 11773 + }, + { + "epoch": 0.9502057945282867, + "grad_norm": 0.6889839172363281, + "learning_rate": 7.31333611173153e-05, + "loss": 2.5135, + "step": 11774 + }, + { + "epoch": 0.9502864982648697, + "grad_norm": 0.7431777119636536, + "learning_rate": 7.311815498670226e-05, + "loss": 2.5856, + "step": 11775 + }, + { + "epoch": 0.9503672020014526, + "grad_norm": 0.7168101072311401, + "learning_rate": 7.310294952605508e-05, + "loss": 2.4383, + "step": 11776 + }, + { + "epoch": 0.9504479057380357, + "grad_norm": 0.654803454875946, + "learning_rate": 7.308774473575271e-05, + "loss": 2.4908, + "step": 11777 + }, + { + "epoch": 0.9505286094746187, + "grad_norm": 0.6810718774795532, + "learning_rate": 7.307254061617412e-05, + "loss": 2.5073, + "step": 11778 + }, + { + "epoch": 0.9506093132112017, + "grad_norm": 0.637980043888092, + "learning_rate": 7.305733716769817e-05, + "loss": 2.5686, + "step": 11779 + }, + { + "epoch": 0.9506900169477847, + "grad_norm": 0.6549471020698547, + "learning_rate": 7.30421343907038e-05, + "loss": 2.5502, + "step": 11780 + }, + { + "epoch": 0.9507707206843676, + "grad_norm": 0.7087163329124451, + "learning_rate": 7.302693228556994e-05, + "loss": 2.4773, + "step": 11781 + }, + { + "epoch": 0.9508514244209507, + "grad_norm": 0.6230717897415161, + "learning_rate": 7.301173085267541e-05, + "loss": 2.4806, + "step": 11782 + }, + { + "epoch": 0.9509321281575337, + "grad_norm": 0.7145688533782959, + "learning_rate": 7.299653009239911e-05, + "loss": 2.5259, + "step": 11783 + }, + { + "epoch": 0.9510128318941167, + "grad_norm": 0.679100513458252, + "learning_rate": 7.298133000511988e-05, + "loss": 2.5012, + "step": 11784 + }, + { + "epoch": 0.9510935356306996, + "grad_norm": 0.7057691216468811, + "learning_rate": 7.29661305912165e-05, + "loss": 2.4826, + "step": 11785 + }, + { + "epoch": 0.9511742393672827, + "grad_norm": 0.65343177318573, + "learning_rate": 7.295093185106782e-05, + "loss": 2.4553, + "step": 11786 + }, + { + "epoch": 0.9512549431038657, + "grad_norm": 0.7948461174964905, + "learning_rate": 7.293573378505268e-05, + "loss": 2.478, + "step": 11787 + }, + { + "epoch": 0.9513356468404487, + "grad_norm": 0.6511468887329102, + "learning_rate": 7.292053639354975e-05, + "loss": 2.4862, + "step": 11788 + }, + { + "epoch": 0.9514163505770317, + "grad_norm": 0.7293919324874878, + "learning_rate": 7.290533967693782e-05, + "loss": 2.5956, + "step": 11789 + }, + { + "epoch": 0.9514970543136148, + "grad_norm": 0.6691277623176575, + "learning_rate": 7.289014363559567e-05, + "loss": 2.5659, + "step": 11790 + }, + { + "epoch": 0.9515777580501977, + "grad_norm": 0.7054625749588013, + "learning_rate": 7.287494826990203e-05, + "loss": 2.5875, + "step": 11791 + }, + { + "epoch": 0.9516584617867807, + "grad_norm": 0.6597220301628113, + "learning_rate": 7.285975358023555e-05, + "loss": 2.5215, + "step": 11792 + }, + { + "epoch": 0.9517391655233637, + "grad_norm": 0.6719489097595215, + "learning_rate": 7.284455956697497e-05, + "loss": 2.4752, + "step": 11793 + }, + { + "epoch": 0.9518198692599468, + "grad_norm": 0.7325637340545654, + "learning_rate": 7.2829366230499e-05, + "loss": 2.5504, + "step": 11794 + }, + { + "epoch": 0.9519005729965297, + "grad_norm": 0.637668788433075, + "learning_rate": 7.281417357118619e-05, + "loss": 2.5105, + "step": 11795 + }, + { + "epoch": 0.9519812767331127, + "grad_norm": 0.7815340161323547, + "learning_rate": 7.279898158941525e-05, + "loss": 2.4998, + "step": 11796 + }, + { + "epoch": 0.9520619804696957, + "grad_norm": 0.6555821299552917, + "learning_rate": 7.278379028556481e-05, + "loss": 2.4326, + "step": 11797 + }, + { + "epoch": 0.9521426842062788, + "grad_norm": 0.7298933863639832, + "learning_rate": 7.276859966001344e-05, + "loss": 2.4779, + "step": 11798 + }, + { + "epoch": 0.9522233879428618, + "grad_norm": 0.683455765247345, + "learning_rate": 7.275340971313974e-05, + "loss": 2.4416, + "step": 11799 + }, + { + "epoch": 0.9523040916794447, + "grad_norm": 0.6353151798248291, + "learning_rate": 7.273822044532232e-05, + "loss": 2.4777, + "step": 11800 + }, + { + "epoch": 0.9523847954160277, + "grad_norm": 0.6898894309997559, + "learning_rate": 7.27230318569397e-05, + "loss": 2.5351, + "step": 11801 + }, + { + "epoch": 0.9524654991526108, + "grad_norm": 0.6528690457344055, + "learning_rate": 7.270784394837041e-05, + "loss": 2.5145, + "step": 11802 + }, + { + "epoch": 0.9525462028891938, + "grad_norm": 0.6432619094848633, + "learning_rate": 7.269265671999304e-05, + "loss": 2.5002, + "step": 11803 + }, + { + "epoch": 0.9526269066257768, + "grad_norm": 0.7317861318588257, + "learning_rate": 7.267747017218601e-05, + "loss": 2.5318, + "step": 11804 + }, + { + "epoch": 0.9527076103623597, + "grad_norm": 0.7581185698509216, + "learning_rate": 7.266228430532785e-05, + "loss": 2.5313, + "step": 11805 + }, + { + "epoch": 0.9527883140989428, + "grad_norm": 0.7316486239433289, + "learning_rate": 7.264709911979702e-05, + "loss": 2.5147, + "step": 11806 + }, + { + "epoch": 0.9528690178355258, + "grad_norm": 0.7378978729248047, + "learning_rate": 7.263191461597199e-05, + "loss": 2.5149, + "step": 11807 + }, + { + "epoch": 0.9529497215721088, + "grad_norm": 0.6603738069534302, + "learning_rate": 7.26167307942312e-05, + "loss": 2.4684, + "step": 11808 + }, + { + "epoch": 0.9530304253086918, + "grad_norm": 0.7566502690315247, + "learning_rate": 7.260154765495302e-05, + "loss": 2.5535, + "step": 11809 + }, + { + "epoch": 0.9531111290452748, + "grad_norm": 0.693067729473114, + "learning_rate": 7.258636519851596e-05, + "loss": 2.5103, + "step": 11810 + }, + { + "epoch": 0.9531918327818578, + "grad_norm": 0.7049208283424377, + "learning_rate": 7.257118342529826e-05, + "loss": 2.5482, + "step": 11811 + }, + { + "epoch": 0.9532725365184408, + "grad_norm": 0.6986998319625854, + "learning_rate": 7.25560023356784e-05, + "loss": 2.4921, + "step": 11812 + }, + { + "epoch": 0.9533532402550238, + "grad_norm": 0.7079482674598694, + "learning_rate": 7.254082193003476e-05, + "loss": 2.5339, + "step": 11813 + }, + { + "epoch": 0.9534339439916069, + "grad_norm": 0.7283922433853149, + "learning_rate": 7.252564220874553e-05, + "loss": 2.5056, + "step": 11814 + }, + { + "epoch": 0.9535146477281898, + "grad_norm": 0.6965533494949341, + "learning_rate": 7.251046317218914e-05, + "loss": 2.5512, + "step": 11815 + }, + { + "epoch": 0.9535953514647728, + "grad_norm": 0.7367159128189087, + "learning_rate": 7.24952848207439e-05, + "loss": 2.5015, + "step": 11816 + }, + { + "epoch": 0.9536760552013558, + "grad_norm": 0.6959818601608276, + "learning_rate": 7.248010715478802e-05, + "loss": 2.4969, + "step": 11817 + }, + { + "epoch": 0.9537567589379389, + "grad_norm": 0.69304358959198, + "learning_rate": 7.246493017469981e-05, + "loss": 2.5098, + "step": 11818 + }, + { + "epoch": 0.9538374626745219, + "grad_norm": 0.6830596327781677, + "learning_rate": 7.244975388085757e-05, + "loss": 2.5206, + "step": 11819 + }, + { + "epoch": 0.9539181664111048, + "grad_norm": 0.7354303598403931, + "learning_rate": 7.243457827363944e-05, + "loss": 2.5223, + "step": 11820 + }, + { + "epoch": 0.9539988701476878, + "grad_norm": 0.7046182751655579, + "learning_rate": 7.241940335342366e-05, + "loss": 2.4931, + "step": 11821 + }, + { + "epoch": 0.9540795738842709, + "grad_norm": 0.6990540623664856, + "learning_rate": 7.240422912058843e-05, + "loss": 2.4302, + "step": 11822 + }, + { + "epoch": 0.9541602776208539, + "grad_norm": 0.7562115788459778, + "learning_rate": 7.238905557551202e-05, + "loss": 2.5118, + "step": 11823 + }, + { + "epoch": 0.9542409813574368, + "grad_norm": 0.8212862014770508, + "learning_rate": 7.237388271857248e-05, + "loss": 2.5476, + "step": 11824 + }, + { + "epoch": 0.9543216850940198, + "grad_norm": 0.7095397710800171, + "learning_rate": 7.235871055014798e-05, + "loss": 2.5073, + "step": 11825 + }, + { + "epoch": 0.9544023888306029, + "grad_norm": 0.7174660563468933, + "learning_rate": 7.23435390706167e-05, + "loss": 2.4553, + "step": 11826 + }, + { + "epoch": 0.9544830925671859, + "grad_norm": 0.7121314406394958, + "learning_rate": 7.23283682803567e-05, + "loss": 2.5164, + "step": 11827 + }, + { + "epoch": 0.9545637963037689, + "grad_norm": 0.7354126572608948, + "learning_rate": 7.231319817974609e-05, + "loss": 2.5413, + "step": 11828 + }, + { + "epoch": 0.9546445000403518, + "grad_norm": 0.7770543694496155, + "learning_rate": 7.2298028769163e-05, + "loss": 2.5244, + "step": 11829 + }, + { + "epoch": 0.9547252037769349, + "grad_norm": 0.6770393252372742, + "learning_rate": 7.228286004898541e-05, + "loss": 2.4707, + "step": 11830 + }, + { + "epoch": 0.9548059075135179, + "grad_norm": 0.6916880011558533, + "learning_rate": 7.22676920195914e-05, + "loss": 2.506, + "step": 11831 + }, + { + "epoch": 0.9548866112501009, + "grad_norm": 0.6299161314964294, + "learning_rate": 7.225252468135901e-05, + "loss": 2.5042, + "step": 11832 + }, + { + "epoch": 0.9549673149866839, + "grad_norm": 0.7081227898597717, + "learning_rate": 7.223735803466623e-05, + "loss": 2.5537, + "step": 11833 + }, + { + "epoch": 0.9550480187232668, + "grad_norm": 0.6600900888442993, + "learning_rate": 7.222219207989104e-05, + "loss": 2.5329, + "step": 11834 + }, + { + "epoch": 0.9551287224598499, + "grad_norm": 0.6715366244316101, + "learning_rate": 7.22070268174115e-05, + "loss": 2.5273, + "step": 11835 + }, + { + "epoch": 0.9552094261964329, + "grad_norm": 0.6655930280685425, + "learning_rate": 7.219186224760543e-05, + "loss": 2.4254, + "step": 11836 + }, + { + "epoch": 0.9552901299330159, + "grad_norm": 0.6925715208053589, + "learning_rate": 7.217669837085088e-05, + "loss": 2.5104, + "step": 11837 + }, + { + "epoch": 0.9553708336695989, + "grad_norm": 0.7132978439331055, + "learning_rate": 7.216153518752571e-05, + "loss": 2.5238, + "step": 11838 + }, + { + "epoch": 0.9554515374061819, + "grad_norm": 0.661651611328125, + "learning_rate": 7.214637269800791e-05, + "loss": 2.445, + "step": 11839 + }, + { + "epoch": 0.9555322411427649, + "grad_norm": 0.6635430455207825, + "learning_rate": 7.213121090267528e-05, + "loss": 2.4707, + "step": 11840 + }, + { + "epoch": 0.9556129448793479, + "grad_norm": 0.6303616166114807, + "learning_rate": 7.211604980190571e-05, + "loss": 2.4923, + "step": 11841 + }, + { + "epoch": 0.9556936486159309, + "grad_norm": 0.7027459144592285, + "learning_rate": 7.210088939607708e-05, + "loss": 2.5592, + "step": 11842 + }, + { + "epoch": 0.955774352352514, + "grad_norm": 0.6539996862411499, + "learning_rate": 7.208572968556722e-05, + "loss": 2.5256, + "step": 11843 + }, + { + "epoch": 0.9558550560890969, + "grad_norm": 0.7019872069358826, + "learning_rate": 7.207057067075393e-05, + "loss": 2.488, + "step": 11844 + }, + { + "epoch": 0.9559357598256799, + "grad_norm": 0.6848211288452148, + "learning_rate": 7.205541235201507e-05, + "loss": 2.4883, + "step": 11845 + }, + { + "epoch": 0.9560164635622629, + "grad_norm": 0.7806351184844971, + "learning_rate": 7.204025472972834e-05, + "loss": 2.5563, + "step": 11846 + }, + { + "epoch": 0.956097167298846, + "grad_norm": 0.7327724695205688, + "learning_rate": 7.202509780427156e-05, + "loss": 2.5275, + "step": 11847 + }, + { + "epoch": 0.956177871035429, + "grad_norm": 0.6805681586265564, + "learning_rate": 7.200994157602248e-05, + "loss": 2.4723, + "step": 11848 + }, + { + "epoch": 0.9562585747720119, + "grad_norm": 0.7053409814834595, + "learning_rate": 7.19947860453588e-05, + "loss": 2.4471, + "step": 11849 + }, + { + "epoch": 0.9563392785085949, + "grad_norm": 0.6783127188682556, + "learning_rate": 7.197963121265826e-05, + "loss": 2.4586, + "step": 11850 + }, + { + "epoch": 0.956419982245178, + "grad_norm": 0.6639916300773621, + "learning_rate": 7.196447707829857e-05, + "loss": 2.4966, + "step": 11851 + }, + { + "epoch": 0.956500685981761, + "grad_norm": 0.684066891670227, + "learning_rate": 7.194932364265739e-05, + "loss": 2.5676, + "step": 11852 + }, + { + "epoch": 0.9565813897183439, + "grad_norm": 0.7872990965843201, + "learning_rate": 7.193417090611239e-05, + "loss": 2.5101, + "step": 11853 + }, + { + "epoch": 0.9566620934549269, + "grad_norm": 0.7543401122093201, + "learning_rate": 7.19190188690412e-05, + "loss": 2.5503, + "step": 11854 + }, + { + "epoch": 0.95674279719151, + "grad_norm": 0.6514382362365723, + "learning_rate": 7.190386753182152e-05, + "loss": 2.4902, + "step": 11855 + }, + { + "epoch": 0.956823500928093, + "grad_norm": 0.6867108345031738, + "learning_rate": 7.188871689483087e-05, + "loss": 2.5054, + "step": 11856 + }, + { + "epoch": 0.956904204664676, + "grad_norm": 0.6536040306091309, + "learning_rate": 7.187356695844687e-05, + "loss": 2.5462, + "step": 11857 + }, + { + "epoch": 0.9569849084012589, + "grad_norm": 0.690237820148468, + "learning_rate": 7.185841772304711e-05, + "loss": 2.5673, + "step": 11858 + }, + { + "epoch": 0.957065612137842, + "grad_norm": 0.6699091196060181, + "learning_rate": 7.184326918900915e-05, + "loss": 2.4733, + "step": 11859 + }, + { + "epoch": 0.957146315874425, + "grad_norm": 0.6482241153717041, + "learning_rate": 7.18281213567105e-05, + "loss": 2.4897, + "step": 11860 + }, + { + "epoch": 0.957227019611008, + "grad_norm": 0.686130166053772, + "learning_rate": 7.181297422652874e-05, + "loss": 2.4596, + "step": 11861 + }, + { + "epoch": 0.957307723347591, + "grad_norm": 0.6507205367088318, + "learning_rate": 7.179782779884132e-05, + "loss": 2.5527, + "step": 11862 + }, + { + "epoch": 0.957388427084174, + "grad_norm": 0.6578813195228577, + "learning_rate": 7.178268207402577e-05, + "loss": 2.4975, + "step": 11863 + }, + { + "epoch": 0.957469130820757, + "grad_norm": 0.6931977272033691, + "learning_rate": 7.176753705245956e-05, + "loss": 2.5533, + "step": 11864 + }, + { + "epoch": 0.95754983455734, + "grad_norm": 0.7306256890296936, + "learning_rate": 7.17523927345201e-05, + "loss": 2.534, + "step": 11865 + }, + { + "epoch": 0.957630538293923, + "grad_norm": 0.6337448358535767, + "learning_rate": 7.173724912058483e-05, + "loss": 2.5015, + "step": 11866 + }, + { + "epoch": 0.9577112420305061, + "grad_norm": 0.6561456322669983, + "learning_rate": 7.172210621103124e-05, + "loss": 2.4946, + "step": 11867 + }, + { + "epoch": 0.957791945767089, + "grad_norm": 0.6341130137443542, + "learning_rate": 7.170696400623666e-05, + "loss": 2.5611, + "step": 11868 + }, + { + "epoch": 0.957872649503672, + "grad_norm": 0.7202804088592529, + "learning_rate": 7.169182250657849e-05, + "loss": 2.5209, + "step": 11869 + }, + { + "epoch": 0.957953353240255, + "grad_norm": 0.6620556712150574, + "learning_rate": 7.167668171243408e-05, + "loss": 2.4895, + "step": 11870 + }, + { + "epoch": 0.9580340569768381, + "grad_norm": 0.6842508912086487, + "learning_rate": 7.166154162418087e-05, + "loss": 2.4417, + "step": 11871 + }, + { + "epoch": 0.958114760713421, + "grad_norm": 0.7539907693862915, + "learning_rate": 7.164640224219608e-05, + "loss": 2.5153, + "step": 11872 + }, + { + "epoch": 0.958195464450004, + "grad_norm": 0.6524286270141602, + "learning_rate": 7.163126356685703e-05, + "loss": 2.509, + "step": 11873 + }, + { + "epoch": 0.958276168186587, + "grad_norm": 0.7022691965103149, + "learning_rate": 7.16161255985411e-05, + "loss": 2.5223, + "step": 11874 + }, + { + "epoch": 0.9583568719231701, + "grad_norm": 0.6659076809883118, + "learning_rate": 7.160098833762549e-05, + "loss": 2.5231, + "step": 11875 + }, + { + "epoch": 0.9584375756597531, + "grad_norm": 0.6756494641304016, + "learning_rate": 7.15858517844875e-05, + "loss": 2.5017, + "step": 11876 + }, + { + "epoch": 0.958518279396336, + "grad_norm": 0.729850709438324, + "learning_rate": 7.157071593950436e-05, + "loss": 2.4583, + "step": 11877 + }, + { + "epoch": 0.958598983132919, + "grad_norm": 0.7155230641365051, + "learning_rate": 7.155558080305326e-05, + "loss": 2.4753, + "step": 11878 + }, + { + "epoch": 0.9586796868695021, + "grad_norm": 0.6553284525871277, + "learning_rate": 7.154044637551147e-05, + "loss": 2.5093, + "step": 11879 + }, + { + "epoch": 0.9587603906060851, + "grad_norm": 0.6516379117965698, + "learning_rate": 7.152531265725617e-05, + "loss": 2.4996, + "step": 11880 + }, + { + "epoch": 0.9588410943426681, + "grad_norm": 0.6871184706687927, + "learning_rate": 7.151017964866449e-05, + "loss": 2.5322, + "step": 11881 + }, + { + "epoch": 0.958921798079251, + "grad_norm": 0.6998933553695679, + "learning_rate": 7.149504735011358e-05, + "loss": 2.5328, + "step": 11882 + }, + { + "epoch": 0.959002501815834, + "grad_norm": 0.7065120935440063, + "learning_rate": 7.147991576198065e-05, + "loss": 2.5251, + "step": 11883 + }, + { + "epoch": 0.9590832055524171, + "grad_norm": 0.6718337535858154, + "learning_rate": 7.146478488464275e-05, + "loss": 2.5596, + "step": 11884 + }, + { + "epoch": 0.9591639092890001, + "grad_norm": 0.6394883990287781, + "learning_rate": 7.144965471847698e-05, + "loss": 2.5022, + "step": 11885 + }, + { + "epoch": 0.9592446130255831, + "grad_norm": 0.6867207288742065, + "learning_rate": 7.143452526386045e-05, + "loss": 2.4927, + "step": 11886 + }, + { + "epoch": 0.959325316762166, + "grad_norm": 0.6710157990455627, + "learning_rate": 7.141939652117026e-05, + "loss": 2.5127, + "step": 11887 + }, + { + "epoch": 0.9594060204987491, + "grad_norm": 0.6286540627479553, + "learning_rate": 7.14042684907834e-05, + "loss": 2.4966, + "step": 11888 + }, + { + "epoch": 0.9594867242353321, + "grad_norm": 0.7295787334442139, + "learning_rate": 7.13891411730769e-05, + "loss": 2.5127, + "step": 11889 + }, + { + "epoch": 0.9595674279719151, + "grad_norm": 0.646084189414978, + "learning_rate": 7.137401456842784e-05, + "loss": 2.5575, + "step": 11890 + }, + { + "epoch": 0.959648131708498, + "grad_norm": 0.7884495258331299, + "learning_rate": 7.135888867721312e-05, + "loss": 2.4807, + "step": 11891 + }, + { + "epoch": 0.9597288354450811, + "grad_norm": 0.638469934463501, + "learning_rate": 7.134376349980977e-05, + "loss": 2.4989, + "step": 11892 + }, + { + "epoch": 0.9598095391816641, + "grad_norm": 0.6802849769592285, + "learning_rate": 7.132863903659476e-05, + "loss": 2.5139, + "step": 11893 + }, + { + "epoch": 0.9598902429182471, + "grad_norm": 0.6657521724700928, + "learning_rate": 7.131351528794499e-05, + "loss": 2.4488, + "step": 11894 + }, + { + "epoch": 0.9599709466548301, + "grad_norm": 0.6537562012672424, + "learning_rate": 7.129839225423741e-05, + "loss": 2.4664, + "step": 11895 + }, + { + "epoch": 0.9600516503914132, + "grad_norm": 0.689637303352356, + "learning_rate": 7.128326993584897e-05, + "loss": 2.582, + "step": 11896 + }, + { + "epoch": 0.9601323541279961, + "grad_norm": 0.6701640486717224, + "learning_rate": 7.126814833315646e-05, + "loss": 2.4963, + "step": 11897 + }, + { + "epoch": 0.9602130578645791, + "grad_norm": 0.7466658353805542, + "learning_rate": 7.125302744653677e-05, + "loss": 2.5015, + "step": 11898 + }, + { + "epoch": 0.9602937616011621, + "grad_norm": 0.6487225294113159, + "learning_rate": 7.123790727636685e-05, + "loss": 2.5393, + "step": 11899 + }, + { + "epoch": 0.9603744653377452, + "grad_norm": 0.7204654216766357, + "learning_rate": 7.122278782302343e-05, + "loss": 2.4668, + "step": 11900 + }, + { + "epoch": 0.9604551690743282, + "grad_norm": 0.6852861046791077, + "learning_rate": 7.120766908688336e-05, + "loss": 2.5893, + "step": 11901 + }, + { + "epoch": 0.9605358728109111, + "grad_norm": 0.6483901739120483, + "learning_rate": 7.119255106832344e-05, + "loss": 2.48, + "step": 11902 + }, + { + "epoch": 0.9606165765474941, + "grad_norm": 0.6670375466346741, + "learning_rate": 7.117743376772049e-05, + "loss": 2.5225, + "step": 11903 + }, + { + "epoch": 0.9606972802840772, + "grad_norm": 0.6805974841117859, + "learning_rate": 7.116231718545118e-05, + "loss": 2.4652, + "step": 11904 + }, + { + "epoch": 0.9607779840206602, + "grad_norm": 0.6700397729873657, + "learning_rate": 7.114720132189232e-05, + "loss": 2.5115, + "step": 11905 + }, + { + "epoch": 0.9608586877572431, + "grad_norm": 0.7167409062385559, + "learning_rate": 7.113208617742066e-05, + "loss": 2.5062, + "step": 11906 + }, + { + "epoch": 0.9609393914938261, + "grad_norm": 0.7337077856063843, + "learning_rate": 7.111697175241286e-05, + "loss": 2.5768, + "step": 11907 + }, + { + "epoch": 0.9610200952304092, + "grad_norm": 0.6681819558143616, + "learning_rate": 7.110185804724558e-05, + "loss": 2.5058, + "step": 11908 + }, + { + "epoch": 0.9611007989669922, + "grad_norm": 0.7235603332519531, + "learning_rate": 7.10867450622956e-05, + "loss": 2.4606, + "step": 11909 + }, + { + "epoch": 0.9611815027035752, + "grad_norm": 0.6931360363960266, + "learning_rate": 7.107163279793947e-05, + "loss": 2.5129, + "step": 11910 + }, + { + "epoch": 0.9612622064401581, + "grad_norm": 0.7331648468971252, + "learning_rate": 7.105652125455388e-05, + "loss": 2.4916, + "step": 11911 + }, + { + "epoch": 0.9613429101767412, + "grad_norm": 0.6538143754005432, + "learning_rate": 7.104141043251545e-05, + "loss": 2.5184, + "step": 11912 + }, + { + "epoch": 0.9614236139133242, + "grad_norm": 0.7018921375274658, + "learning_rate": 7.102630033220077e-05, + "loss": 2.5446, + "step": 11913 + }, + { + "epoch": 0.9615043176499072, + "grad_norm": 0.7528507709503174, + "learning_rate": 7.10111909539864e-05, + "loss": 2.4404, + "step": 11914 + }, + { + "epoch": 0.9615850213864902, + "grad_norm": 0.7258831858634949, + "learning_rate": 7.099608229824894e-05, + "loss": 2.4758, + "step": 11915 + }, + { + "epoch": 0.9616657251230732, + "grad_norm": 0.6954349875450134, + "learning_rate": 7.098097436536498e-05, + "loss": 2.4894, + "step": 11916 + }, + { + "epoch": 0.9617464288596562, + "grad_norm": 0.691584050655365, + "learning_rate": 7.096586715571092e-05, + "loss": 2.544, + "step": 11917 + }, + { + "epoch": 0.9618271325962392, + "grad_norm": 0.7107009291648865, + "learning_rate": 7.095076066966337e-05, + "loss": 2.4994, + "step": 11918 + }, + { + "epoch": 0.9619078363328222, + "grad_norm": 0.6492058634757996, + "learning_rate": 7.093565490759881e-05, + "loss": 2.5751, + "step": 11919 + }, + { + "epoch": 0.9619885400694053, + "grad_norm": 0.6817753314971924, + "learning_rate": 7.092054986989371e-05, + "loss": 2.5129, + "step": 11920 + }, + { + "epoch": 0.9620692438059882, + "grad_norm": 0.6991822123527527, + "learning_rate": 7.090544555692448e-05, + "loss": 2.5728, + "step": 11921 + }, + { + "epoch": 0.9621499475425712, + "grad_norm": 0.6627625226974487, + "learning_rate": 7.089034196906768e-05, + "loss": 2.4479, + "step": 11922 + }, + { + "epoch": 0.9622306512791542, + "grad_norm": 0.6889652013778687, + "learning_rate": 7.087523910669957e-05, + "loss": 2.5323, + "step": 11923 + }, + { + "epoch": 0.9623113550157373, + "grad_norm": 0.7863786816596985, + "learning_rate": 7.086013697019667e-05, + "loss": 2.5146, + "step": 11924 + }, + { + "epoch": 0.9623920587523203, + "grad_norm": 0.6885324716567993, + "learning_rate": 7.084503555993536e-05, + "loss": 2.5072, + "step": 11925 + }, + { + "epoch": 0.9624727624889032, + "grad_norm": 0.619239091873169, + "learning_rate": 7.082993487629192e-05, + "loss": 2.4622, + "step": 11926 + }, + { + "epoch": 0.9625534662254862, + "grad_norm": 0.6762447953224182, + "learning_rate": 7.081483491964278e-05, + "loss": 2.5155, + "step": 11927 + }, + { + "epoch": 0.9626341699620693, + "grad_norm": 0.6559715867042542, + "learning_rate": 7.079973569036424e-05, + "loss": 2.4729, + "step": 11928 + }, + { + "epoch": 0.9627148736986523, + "grad_norm": 0.633280873298645, + "learning_rate": 7.078463718883261e-05, + "loss": 2.4715, + "step": 11929 + }, + { + "epoch": 0.9627955774352353, + "grad_norm": 0.7740094065666199, + "learning_rate": 7.07695394154242e-05, + "loss": 2.4871, + "step": 11930 + }, + { + "epoch": 0.9628762811718182, + "grad_norm": 0.7103284597396851, + "learning_rate": 7.075444237051527e-05, + "loss": 2.5299, + "step": 11931 + }, + { + "epoch": 0.9629569849084013, + "grad_norm": 0.6800934076309204, + "learning_rate": 7.073934605448212e-05, + "loss": 2.5919, + "step": 11932 + }, + { + "epoch": 0.9630376886449843, + "grad_norm": 0.6680917143821716, + "learning_rate": 7.072425046770092e-05, + "loss": 2.4942, + "step": 11933 + }, + { + "epoch": 0.9631183923815673, + "grad_norm": 0.7248062491416931, + "learning_rate": 7.070915561054792e-05, + "loss": 2.4956, + "step": 11934 + }, + { + "epoch": 0.9631990961181502, + "grad_norm": 0.6635782122612, + "learning_rate": 7.069406148339936e-05, + "loss": 2.4658, + "step": 11935 + }, + { + "epoch": 0.9632797998547332, + "grad_norm": 0.6751061081886292, + "learning_rate": 7.067896808663137e-05, + "loss": 2.4912, + "step": 11936 + }, + { + "epoch": 0.9633605035913163, + "grad_norm": 0.7476027607917786, + "learning_rate": 7.066387542062013e-05, + "loss": 2.4858, + "step": 11937 + }, + { + "epoch": 0.9634412073278993, + "grad_norm": 0.6770931482315063, + "learning_rate": 7.064878348574183e-05, + "loss": 2.4574, + "step": 11938 + }, + { + "epoch": 0.9635219110644823, + "grad_norm": 0.7105392813682556, + "learning_rate": 7.063369228237255e-05, + "loss": 2.5523, + "step": 11939 + }, + { + "epoch": 0.9636026148010652, + "grad_norm": 0.6806207299232483, + "learning_rate": 7.061860181088842e-05, + "loss": 2.4992, + "step": 11940 + }, + { + "epoch": 0.9636833185376483, + "grad_norm": 0.7059600353240967, + "learning_rate": 7.060351207166558e-05, + "loss": 2.5778, + "step": 11941 + }, + { + "epoch": 0.9637640222742313, + "grad_norm": 0.6306884288787842, + "learning_rate": 7.058842306508002e-05, + "loss": 2.5389, + "step": 11942 + }, + { + "epoch": 0.9638447260108143, + "grad_norm": 0.6997150778770447, + "learning_rate": 7.057333479150783e-05, + "loss": 2.5077, + "step": 11943 + }, + { + "epoch": 0.9639254297473973, + "grad_norm": 0.7073743343353271, + "learning_rate": 7.05582472513251e-05, + "loss": 2.5274, + "step": 11944 + }, + { + "epoch": 0.9640061334839803, + "grad_norm": 0.6768803596496582, + "learning_rate": 7.054316044490777e-05, + "loss": 2.5155, + "step": 11945 + }, + { + "epoch": 0.9640868372205633, + "grad_norm": 0.6792057752609253, + "learning_rate": 7.052807437263189e-05, + "loss": 2.5509, + "step": 11946 + }, + { + "epoch": 0.9641675409571463, + "grad_norm": 0.6883981823921204, + "learning_rate": 7.051298903487344e-05, + "loss": 2.5176, + "step": 11947 + }, + { + "epoch": 0.9642482446937293, + "grad_norm": 0.6934401392936707, + "learning_rate": 7.049790443200844e-05, + "loss": 2.502, + "step": 11948 + }, + { + "epoch": 0.9643289484303124, + "grad_norm": 0.6882597804069519, + "learning_rate": 7.048282056441269e-05, + "loss": 2.487, + "step": 11949 + }, + { + "epoch": 0.9644096521668953, + "grad_norm": 0.6972896456718445, + "learning_rate": 7.046773743246225e-05, + "loss": 2.5304, + "step": 11950 + }, + { + "epoch": 0.9644903559034783, + "grad_norm": 0.6591988205909729, + "learning_rate": 7.045265503653303e-05, + "loss": 2.4734, + "step": 11951 + }, + { + "epoch": 0.9645710596400613, + "grad_norm": 0.6890063285827637, + "learning_rate": 7.043757337700082e-05, + "loss": 2.5289, + "step": 11952 + }, + { + "epoch": 0.9646517633766444, + "grad_norm": 0.6931065917015076, + "learning_rate": 7.042249245424157e-05, + "loss": 2.484, + "step": 11953 + }, + { + "epoch": 0.9647324671132274, + "grad_norm": 0.6943762898445129, + "learning_rate": 7.040741226863117e-05, + "loss": 2.501, + "step": 11954 + }, + { + "epoch": 0.9648131708498103, + "grad_norm": 0.677154004573822, + "learning_rate": 7.039233282054536e-05, + "loss": 2.4976, + "step": 11955 + }, + { + "epoch": 0.9648938745863933, + "grad_norm": 0.6662883758544922, + "learning_rate": 7.037725411036003e-05, + "loss": 2.4928, + "step": 11956 + }, + { + "epoch": 0.9649745783229764, + "grad_norm": 0.6854663491249084, + "learning_rate": 7.0362176138451e-05, + "loss": 2.4657, + "step": 11957 + }, + { + "epoch": 0.9650552820595594, + "grad_norm": 0.6703238487243652, + "learning_rate": 7.034709890519397e-05, + "loss": 2.4879, + "step": 11958 + }, + { + "epoch": 0.9651359857961423, + "grad_norm": 0.7023652791976929, + "learning_rate": 7.033202241096474e-05, + "loss": 2.4619, + "step": 11959 + }, + { + "epoch": 0.9652166895327253, + "grad_norm": 0.6950454711914062, + "learning_rate": 7.031694665613911e-05, + "loss": 2.5125, + "step": 11960 + }, + { + "epoch": 0.9652973932693084, + "grad_norm": 0.6740411520004272, + "learning_rate": 7.030187164109272e-05, + "loss": 2.436, + "step": 11961 + }, + { + "epoch": 0.9653780970058914, + "grad_norm": 0.6697152256965637, + "learning_rate": 7.028679736620132e-05, + "loss": 2.5513, + "step": 11962 + }, + { + "epoch": 0.9654588007424744, + "grad_norm": 0.6920599937438965, + "learning_rate": 7.027172383184061e-05, + "loss": 2.5264, + "step": 11963 + }, + { + "epoch": 0.9655395044790573, + "grad_norm": 0.6493465304374695, + "learning_rate": 7.025665103838627e-05, + "loss": 2.4834, + "step": 11964 + }, + { + "epoch": 0.9656202082156404, + "grad_norm": 0.684092104434967, + "learning_rate": 7.02415789862139e-05, + "loss": 2.4662, + "step": 11965 + }, + { + "epoch": 0.9657009119522234, + "grad_norm": 0.7161515355110168, + "learning_rate": 7.022650767569921e-05, + "loss": 2.4648, + "step": 11966 + }, + { + "epoch": 0.9657816156888064, + "grad_norm": 0.6994524002075195, + "learning_rate": 7.021143710721778e-05, + "loss": 2.5186, + "step": 11967 + }, + { + "epoch": 0.9658623194253894, + "grad_norm": 0.7105295062065125, + "learning_rate": 7.019636728114518e-05, + "loss": 2.5132, + "step": 11968 + }, + { + "epoch": 0.9659430231619724, + "grad_norm": 0.7182292938232422, + "learning_rate": 7.018129819785702e-05, + "loss": 2.5469, + "step": 11969 + }, + { + "epoch": 0.9660237268985554, + "grad_norm": 0.7021759152412415, + "learning_rate": 7.016622985772887e-05, + "loss": 2.5477, + "step": 11970 + }, + { + "epoch": 0.9661044306351384, + "grad_norm": 0.6751413941383362, + "learning_rate": 7.015116226113624e-05, + "loss": 2.5174, + "step": 11971 + }, + { + "epoch": 0.9661851343717214, + "grad_norm": 0.6341918110847473, + "learning_rate": 7.013609540845468e-05, + "loss": 2.4778, + "step": 11972 + }, + { + "epoch": 0.9662658381083045, + "grad_norm": 0.7080956697463989, + "learning_rate": 7.012102930005971e-05, + "loss": 2.5304, + "step": 11973 + }, + { + "epoch": 0.9663465418448874, + "grad_norm": 0.6367003321647644, + "learning_rate": 7.010596393632674e-05, + "loss": 2.4857, + "step": 11974 + }, + { + "epoch": 0.9664272455814704, + "grad_norm": 0.6841328740119934, + "learning_rate": 7.009089931763131e-05, + "loss": 2.5365, + "step": 11975 + }, + { + "epoch": 0.9665079493180534, + "grad_norm": 0.6568236351013184, + "learning_rate": 7.00758354443489e-05, + "loss": 2.5286, + "step": 11976 + }, + { + "epoch": 0.9665886530546365, + "grad_norm": 0.7071812152862549, + "learning_rate": 7.006077231685485e-05, + "loss": 2.458, + "step": 11977 + }, + { + "epoch": 0.9666693567912195, + "grad_norm": 0.6997712850570679, + "learning_rate": 7.004570993552462e-05, + "loss": 2.4571, + "step": 11978 + }, + { + "epoch": 0.9667500605278024, + "grad_norm": 0.6920793056488037, + "learning_rate": 7.003064830073359e-05, + "loss": 2.4172, + "step": 11979 + }, + { + "epoch": 0.9668307642643854, + "grad_norm": 0.6823387742042542, + "learning_rate": 7.001558741285718e-05, + "loss": 2.4895, + "step": 11980 + }, + { + "epoch": 0.9669114680009685, + "grad_norm": 0.7309569716453552, + "learning_rate": 7.000052727227068e-05, + "loss": 2.502, + "step": 11981 + }, + { + "epoch": 0.9669921717375515, + "grad_norm": 0.734708845615387, + "learning_rate": 6.998546787934946e-05, + "loss": 2.4918, + "step": 11982 + }, + { + "epoch": 0.9670728754741345, + "grad_norm": 0.690406084060669, + "learning_rate": 6.997040923446889e-05, + "loss": 2.4994, + "step": 11983 + }, + { + "epoch": 0.9671535792107174, + "grad_norm": 0.7126687169075012, + "learning_rate": 6.995535133800416e-05, + "loss": 2.4824, + "step": 11984 + }, + { + "epoch": 0.9672342829473004, + "grad_norm": 0.7020599246025085, + "learning_rate": 6.994029419033062e-05, + "loss": 2.4889, + "step": 11985 + }, + { + "epoch": 0.9673149866838835, + "grad_norm": 0.7690796852111816, + "learning_rate": 6.992523779182356e-05, + "loss": 2.4997, + "step": 11986 + }, + { + "epoch": 0.9673956904204665, + "grad_norm": 0.6635778546333313, + "learning_rate": 6.991018214285816e-05, + "loss": 2.4989, + "step": 11987 + }, + { + "epoch": 0.9674763941570494, + "grad_norm": 0.7088577747344971, + "learning_rate": 6.989512724380967e-05, + "loss": 2.549, + "step": 11988 + }, + { + "epoch": 0.9675570978936324, + "grad_norm": 0.6420924663543701, + "learning_rate": 6.988007309505333e-05, + "loss": 2.4585, + "step": 11989 + }, + { + "epoch": 0.9676378016302155, + "grad_norm": 0.7902400493621826, + "learning_rate": 6.986501969696428e-05, + "loss": 2.5009, + "step": 11990 + }, + { + "epoch": 0.9677185053667985, + "grad_norm": 0.700907289981842, + "learning_rate": 6.984996704991773e-05, + "loss": 2.4778, + "step": 11991 + }, + { + "epoch": 0.9677992091033815, + "grad_norm": 0.664378821849823, + "learning_rate": 6.983491515428883e-05, + "loss": 2.5116, + "step": 11992 + }, + { + "epoch": 0.9678799128399644, + "grad_norm": 0.6314663887023926, + "learning_rate": 6.981986401045266e-05, + "loss": 2.4588, + "step": 11993 + }, + { + "epoch": 0.9679606165765475, + "grad_norm": 0.6521078944206238, + "learning_rate": 6.980481361878438e-05, + "loss": 2.5224, + "step": 11994 + }, + { + "epoch": 0.9680413203131305, + "grad_norm": 0.6336014270782471, + "learning_rate": 6.978976397965907e-05, + "loss": 2.4297, + "step": 11995 + }, + { + "epoch": 0.9681220240497135, + "grad_norm": 0.7321500778198242, + "learning_rate": 6.977471509345183e-05, + "loss": 2.5252, + "step": 11996 + }, + { + "epoch": 0.9682027277862965, + "grad_norm": 0.686950147151947, + "learning_rate": 6.97596669605377e-05, + "loss": 2.5188, + "step": 11997 + }, + { + "epoch": 0.9682834315228795, + "grad_norm": 0.729343056678772, + "learning_rate": 6.97446195812917e-05, + "loss": 2.5157, + "step": 11998 + }, + { + "epoch": 0.9683641352594625, + "grad_norm": 0.6447068452835083, + "learning_rate": 6.972957295608889e-05, + "loss": 2.5041, + "step": 11999 + }, + { + "epoch": 0.9684448389960455, + "grad_norm": 0.6847280859947205, + "learning_rate": 6.971452708530423e-05, + "loss": 2.443, + "step": 12000 + }, + { + "epoch": 0.9684448389960455, + "eval_loss": 2.431878089904785, + "eval_runtime": 758.167, + "eval_samples_per_second": 3.456, + "eval_steps_per_second": 0.576, + "step": 12000 + }, + { + "epoch": 0.9685255427326285, + "grad_norm": 0.6440466046333313, + "learning_rate": 6.969948196931272e-05, + "loss": 2.5091, + "step": 12001 + }, + { + "epoch": 0.9686062464692116, + "grad_norm": 0.6570029258728027, + "learning_rate": 6.968443760848937e-05, + "loss": 2.491, + "step": 12002 + }, + { + "epoch": 0.9686869502057945, + "grad_norm": 0.7610877752304077, + "learning_rate": 6.966939400320905e-05, + "loss": 2.4713, + "step": 12003 + }, + { + "epoch": 0.9687676539423775, + "grad_norm": 0.7187781929969788, + "learning_rate": 6.965435115384669e-05, + "loss": 2.4303, + "step": 12004 + }, + { + "epoch": 0.9688483576789605, + "grad_norm": 0.7668420672416687, + "learning_rate": 6.963930906077727e-05, + "loss": 2.5513, + "step": 12005 + }, + { + "epoch": 0.9689290614155436, + "grad_norm": 0.7025619745254517, + "learning_rate": 6.96242677243756e-05, + "loss": 2.4349, + "step": 12006 + }, + { + "epoch": 0.9690097651521266, + "grad_norm": 0.7066935896873474, + "learning_rate": 6.960922714501657e-05, + "loss": 2.5465, + "step": 12007 + }, + { + "epoch": 0.9690904688887095, + "grad_norm": 0.6758970618247986, + "learning_rate": 6.95941873230751e-05, + "loss": 2.4827, + "step": 12008 + }, + { + "epoch": 0.9691711726252925, + "grad_norm": 0.7108862996101379, + "learning_rate": 6.957914825892591e-05, + "loss": 2.5412, + "step": 12009 + }, + { + "epoch": 0.9692518763618756, + "grad_norm": 0.660784125328064, + "learning_rate": 6.956410995294389e-05, + "loss": 2.5173, + "step": 12010 + }, + { + "epoch": 0.9693325800984586, + "grad_norm": 0.6966561079025269, + "learning_rate": 6.954907240550377e-05, + "loss": 2.5196, + "step": 12011 + }, + { + "epoch": 0.9694132838350416, + "grad_norm": 0.6889416575431824, + "learning_rate": 6.953403561698042e-05, + "loss": 2.5351, + "step": 12012 + }, + { + "epoch": 0.9694939875716245, + "grad_norm": 0.7578341960906982, + "learning_rate": 6.951899958774852e-05, + "loss": 2.5184, + "step": 12013 + }, + { + "epoch": 0.9695746913082076, + "grad_norm": 0.6735317707061768, + "learning_rate": 6.950396431818282e-05, + "loss": 2.4592, + "step": 12014 + }, + { + "epoch": 0.9696553950447906, + "grad_norm": 0.6903232932090759, + "learning_rate": 6.948892980865806e-05, + "loss": 2.5212, + "step": 12015 + }, + { + "epoch": 0.9697360987813736, + "grad_norm": 0.6477165818214417, + "learning_rate": 6.94738960595489e-05, + "loss": 2.4423, + "step": 12016 + }, + { + "epoch": 0.9698168025179565, + "grad_norm": 0.6778751015663147, + "learning_rate": 6.945886307123007e-05, + "loss": 2.547, + "step": 12017 + }, + { + "epoch": 0.9698975062545396, + "grad_norm": 0.690558135509491, + "learning_rate": 6.944383084407623e-05, + "loss": 2.5081, + "step": 12018 + }, + { + "epoch": 0.9699782099911226, + "grad_norm": 0.7210639119148254, + "learning_rate": 6.942879937846196e-05, + "loss": 2.496, + "step": 12019 + }, + { + "epoch": 0.9700589137277056, + "grad_norm": 0.7182444930076599, + "learning_rate": 6.941376867476194e-05, + "loss": 2.6138, + "step": 12020 + }, + { + "epoch": 0.9701396174642886, + "grad_norm": 0.6929295063018799, + "learning_rate": 6.939873873335077e-05, + "loss": 2.4828, + "step": 12021 + }, + { + "epoch": 0.9702203212008716, + "grad_norm": 0.6919693350791931, + "learning_rate": 6.938370955460298e-05, + "loss": 2.5123, + "step": 12022 + }, + { + "epoch": 0.9703010249374546, + "grad_norm": 0.6475244164466858, + "learning_rate": 6.93686811388932e-05, + "loss": 2.4992, + "step": 12023 + }, + { + "epoch": 0.9703817286740376, + "grad_norm": 0.6728265881538391, + "learning_rate": 6.935365348659597e-05, + "loss": 2.4486, + "step": 12024 + }, + { + "epoch": 0.9704624324106206, + "grad_norm": 0.6791470646858215, + "learning_rate": 6.933862659808582e-05, + "loss": 2.4657, + "step": 12025 + }, + { + "epoch": 0.9705431361472037, + "grad_norm": 0.7611662745475769, + "learning_rate": 6.932360047373721e-05, + "loss": 2.5243, + "step": 12026 + }, + { + "epoch": 0.9706238398837866, + "grad_norm": 0.6642355918884277, + "learning_rate": 6.930857511392467e-05, + "loss": 2.5308, + "step": 12027 + }, + { + "epoch": 0.9707045436203696, + "grad_norm": 0.7270805239677429, + "learning_rate": 6.92935505190227e-05, + "loss": 2.4708, + "step": 12028 + }, + { + "epoch": 0.9707852473569526, + "grad_norm": 0.6706295013427734, + "learning_rate": 6.927852668940568e-05, + "loss": 2.5136, + "step": 12029 + }, + { + "epoch": 0.9708659510935357, + "grad_norm": 0.6923376321792603, + "learning_rate": 6.92635036254481e-05, + "loss": 2.5238, + "step": 12030 + }, + { + "epoch": 0.9709466548301187, + "grad_norm": 0.7154483199119568, + "learning_rate": 6.924848132752436e-05, + "loss": 2.488, + "step": 12031 + }, + { + "epoch": 0.9710273585667016, + "grad_norm": 0.6675701141357422, + "learning_rate": 6.923345979600884e-05, + "loss": 2.5066, + "step": 12032 + }, + { + "epoch": 0.9711080623032846, + "grad_norm": 0.7282043695449829, + "learning_rate": 6.921843903127592e-05, + "loss": 2.5096, + "step": 12033 + }, + { + "epoch": 0.9711887660398677, + "grad_norm": 0.663526177406311, + "learning_rate": 6.92034190337e-05, + "loss": 2.5276, + "step": 12034 + }, + { + "epoch": 0.9712694697764507, + "grad_norm": 0.7491087913513184, + "learning_rate": 6.918839980365534e-05, + "loss": 2.5044, + "step": 12035 + }, + { + "epoch": 0.9713501735130337, + "grad_norm": 0.6977766156196594, + "learning_rate": 6.917338134151629e-05, + "loss": 2.6102, + "step": 12036 + }, + { + "epoch": 0.9714308772496166, + "grad_norm": 0.6447446346282959, + "learning_rate": 6.915836364765722e-05, + "loss": 2.5137, + "step": 12037 + }, + { + "epoch": 0.9715115809861996, + "grad_norm": 0.6801442503929138, + "learning_rate": 6.91433467224523e-05, + "loss": 2.5145, + "step": 12038 + }, + { + "epoch": 0.9715922847227827, + "grad_norm": 0.6843627691268921, + "learning_rate": 6.912833056627583e-05, + "loss": 2.6099, + "step": 12039 + }, + { + "epoch": 0.9716729884593657, + "grad_norm": 0.6862856149673462, + "learning_rate": 6.911331517950209e-05, + "loss": 2.5358, + "step": 12040 + }, + { + "epoch": 0.9717536921959486, + "grad_norm": 0.6835047602653503, + "learning_rate": 6.909830056250527e-05, + "loss": 2.5257, + "step": 12041 + }, + { + "epoch": 0.9718343959325316, + "grad_norm": 0.6958080530166626, + "learning_rate": 6.908328671565956e-05, + "loss": 2.5008, + "step": 12042 + }, + { + "epoch": 0.9719150996691147, + "grad_norm": 0.7556219100952148, + "learning_rate": 6.906827363933917e-05, + "loss": 2.5283, + "step": 12043 + }, + { + "epoch": 0.9719958034056977, + "grad_norm": 0.7074917554855347, + "learning_rate": 6.90532613339183e-05, + "loss": 2.4898, + "step": 12044 + }, + { + "epoch": 0.9720765071422807, + "grad_norm": 0.6456350684165955, + "learning_rate": 6.903824979977101e-05, + "loss": 2.4989, + "step": 12045 + }, + { + "epoch": 0.9721572108788636, + "grad_norm": 0.6609941720962524, + "learning_rate": 6.902323903727146e-05, + "loss": 2.4883, + "step": 12046 + }, + { + "epoch": 0.9722379146154467, + "grad_norm": 0.7132936716079712, + "learning_rate": 6.90082290467938e-05, + "loss": 2.4983, + "step": 12047 + }, + { + "epoch": 0.9723186183520297, + "grad_norm": 0.6686434745788574, + "learning_rate": 6.899321982871206e-05, + "loss": 2.4862, + "step": 12048 + }, + { + "epoch": 0.9723993220886127, + "grad_norm": 0.6792194247245789, + "learning_rate": 6.897821138340033e-05, + "loss": 2.5368, + "step": 12049 + }, + { + "epoch": 0.9724800258251957, + "grad_norm": 0.6829379796981812, + "learning_rate": 6.896320371123268e-05, + "loss": 2.4842, + "step": 12050 + }, + { + "epoch": 0.9725607295617787, + "grad_norm": 0.7459573745727539, + "learning_rate": 6.894819681258312e-05, + "loss": 2.5023, + "step": 12051 + }, + { + "epoch": 0.9726414332983617, + "grad_norm": 0.6700068712234497, + "learning_rate": 6.893319068782566e-05, + "loss": 2.552, + "step": 12052 + }, + { + "epoch": 0.9727221370349447, + "grad_norm": 0.7093638777732849, + "learning_rate": 6.891818533733434e-05, + "loss": 2.445, + "step": 12053 + }, + { + "epoch": 0.9728028407715277, + "grad_norm": 0.703599214553833, + "learning_rate": 6.890318076148304e-05, + "loss": 2.5536, + "step": 12054 + }, + { + "epoch": 0.9728835445081108, + "grad_norm": 0.6214482188224792, + "learning_rate": 6.888817696064578e-05, + "loss": 2.5188, + "step": 12055 + }, + { + "epoch": 0.9729642482446937, + "grad_norm": 0.6893547773361206, + "learning_rate": 6.887317393519645e-05, + "loss": 2.5596, + "step": 12056 + }, + { + "epoch": 0.9730449519812767, + "grad_norm": 0.6282656788825989, + "learning_rate": 6.885817168550903e-05, + "loss": 2.4873, + "step": 12057 + }, + { + "epoch": 0.9731256557178597, + "grad_norm": 0.6979188323020935, + "learning_rate": 6.884317021195737e-05, + "loss": 2.5358, + "step": 12058 + }, + { + "epoch": 0.9732063594544428, + "grad_norm": 0.7925785183906555, + "learning_rate": 6.882816951491533e-05, + "loss": 2.5358, + "step": 12059 + }, + { + "epoch": 0.9732870631910258, + "grad_norm": 0.6449821591377258, + "learning_rate": 6.881316959475684e-05, + "loss": 2.4784, + "step": 12060 + }, + { + "epoch": 0.9733677669276087, + "grad_norm": 0.7013393044471741, + "learning_rate": 6.879817045185565e-05, + "loss": 2.4804, + "step": 12061 + }, + { + "epoch": 0.9734484706641917, + "grad_norm": 0.8338057398796082, + "learning_rate": 6.878317208658559e-05, + "loss": 2.512, + "step": 12062 + }, + { + "epoch": 0.9735291744007748, + "grad_norm": 0.6815133094787598, + "learning_rate": 6.876817449932054e-05, + "loss": 2.467, + "step": 12063 + }, + { + "epoch": 0.9736098781373578, + "grad_norm": 0.659156858921051, + "learning_rate": 6.87531776904342e-05, + "loss": 2.503, + "step": 12064 + }, + { + "epoch": 0.9736905818739408, + "grad_norm": 0.7149603962898254, + "learning_rate": 6.873818166030033e-05, + "loss": 2.5135, + "step": 12065 + }, + { + "epoch": 0.9737712856105237, + "grad_norm": 0.7010510563850403, + "learning_rate": 6.872318640929272e-05, + "loss": 2.5133, + "step": 12066 + }, + { + "epoch": 0.9738519893471068, + "grad_norm": 0.6247616410255432, + "learning_rate": 6.870819193778504e-05, + "loss": 2.5189, + "step": 12067 + }, + { + "epoch": 0.9739326930836898, + "grad_norm": 0.6938940286636353, + "learning_rate": 6.869319824615101e-05, + "loss": 2.5053, + "step": 12068 + }, + { + "epoch": 0.9740133968202728, + "grad_norm": 0.7636895179748535, + "learning_rate": 6.867820533476436e-05, + "loss": 2.4989, + "step": 12069 + }, + { + "epoch": 0.9740941005568557, + "grad_norm": 0.6489234566688538, + "learning_rate": 6.866321320399869e-05, + "loss": 2.4935, + "step": 12070 + }, + { + "epoch": 0.9741748042934388, + "grad_norm": 0.6752095818519592, + "learning_rate": 6.864822185422764e-05, + "loss": 2.4835, + "step": 12071 + }, + { + "epoch": 0.9742555080300218, + "grad_norm": 0.6947118639945984, + "learning_rate": 6.863323128582486e-05, + "loss": 2.504, + "step": 12072 + }, + { + "epoch": 0.9743362117666048, + "grad_norm": 0.6815536618232727, + "learning_rate": 6.861824149916398e-05, + "loss": 2.5369, + "step": 12073 + }, + { + "epoch": 0.9744169155031878, + "grad_norm": 0.6550236344337463, + "learning_rate": 6.860325249461852e-05, + "loss": 2.4753, + "step": 12074 + }, + { + "epoch": 0.9744976192397709, + "grad_norm": 0.6833250522613525, + "learning_rate": 6.858826427256209e-05, + "loss": 2.4687, + "step": 12075 + }, + { + "epoch": 0.9745783229763538, + "grad_norm": 0.6925075650215149, + "learning_rate": 6.857327683336824e-05, + "loss": 2.5363, + "step": 12076 + }, + { + "epoch": 0.9746590267129368, + "grad_norm": 0.6754821538925171, + "learning_rate": 6.855829017741046e-05, + "loss": 2.4696, + "step": 12077 + }, + { + "epoch": 0.9747397304495198, + "grad_norm": 0.7360671162605286, + "learning_rate": 6.854330430506228e-05, + "loss": 2.5144, + "step": 12078 + }, + { + "epoch": 0.9748204341861029, + "grad_norm": 0.6814733743667603, + "learning_rate": 6.852831921669723e-05, + "loss": 2.5059, + "step": 12079 + }, + { + "epoch": 0.9749011379226858, + "grad_norm": 0.7106744647026062, + "learning_rate": 6.851333491268869e-05, + "loss": 2.453, + "step": 12080 + }, + { + "epoch": 0.9749818416592688, + "grad_norm": 0.6623831987380981, + "learning_rate": 6.849835139341015e-05, + "loss": 2.5244, + "step": 12081 + }, + { + "epoch": 0.9750625453958518, + "grad_norm": 0.6723372936248779, + "learning_rate": 6.848336865923506e-05, + "loss": 2.5159, + "step": 12082 + }, + { + "epoch": 0.9751432491324349, + "grad_norm": 0.7256618142127991, + "learning_rate": 6.84683867105368e-05, + "loss": 2.494, + "step": 12083 + }, + { + "epoch": 0.9752239528690179, + "grad_norm": 0.6881731152534485, + "learning_rate": 6.845340554768874e-05, + "loss": 2.4374, + "step": 12084 + }, + { + "epoch": 0.9753046566056008, + "grad_norm": 0.6759666204452515, + "learning_rate": 6.843842517106434e-05, + "loss": 2.5082, + "step": 12085 + }, + { + "epoch": 0.9753853603421838, + "grad_norm": 0.6983315348625183, + "learning_rate": 6.842344558103684e-05, + "loss": 2.5191, + "step": 12086 + }, + { + "epoch": 0.9754660640787668, + "grad_norm": 0.6805596351623535, + "learning_rate": 6.840846677797959e-05, + "loss": 2.5289, + "step": 12087 + }, + { + "epoch": 0.9755467678153499, + "grad_norm": 0.712942361831665, + "learning_rate": 6.839348876226595e-05, + "loss": 2.5544, + "step": 12088 + }, + { + "epoch": 0.9756274715519329, + "grad_norm": 0.6931124329566956, + "learning_rate": 6.837851153426924e-05, + "loss": 2.5407, + "step": 12089 + }, + { + "epoch": 0.9757081752885158, + "grad_norm": 0.6939486265182495, + "learning_rate": 6.836353509436264e-05, + "loss": 2.5236, + "step": 12090 + }, + { + "epoch": 0.9757888790250988, + "grad_norm": 0.7434083223342896, + "learning_rate": 6.834855944291944e-05, + "loss": 2.4903, + "step": 12091 + }, + { + "epoch": 0.9758695827616819, + "grad_norm": 0.672177255153656, + "learning_rate": 6.833358458031292e-05, + "loss": 2.4995, + "step": 12092 + }, + { + "epoch": 0.9759502864982649, + "grad_norm": 0.6631280779838562, + "learning_rate": 6.831861050691619e-05, + "loss": 2.4689, + "step": 12093 + }, + { + "epoch": 0.9760309902348479, + "grad_norm": 0.7485793232917786, + "learning_rate": 6.830363722310253e-05, + "loss": 2.5526, + "step": 12094 + }, + { + "epoch": 0.9761116939714308, + "grad_norm": 0.6592193245887756, + "learning_rate": 6.828866472924511e-05, + "loss": 2.4425, + "step": 12095 + }, + { + "epoch": 0.9761923977080139, + "grad_norm": 0.6479860544204712, + "learning_rate": 6.827369302571703e-05, + "loss": 2.4637, + "step": 12096 + }, + { + "epoch": 0.9762731014445969, + "grad_norm": 0.6694966554641724, + "learning_rate": 6.825872211289146e-05, + "loss": 2.5256, + "step": 12097 + }, + { + "epoch": 0.9763538051811799, + "grad_norm": 0.675751805305481, + "learning_rate": 6.82437519911415e-05, + "loss": 2.5021, + "step": 12098 + }, + { + "epoch": 0.9764345089177628, + "grad_norm": 0.7255450487136841, + "learning_rate": 6.822878266084026e-05, + "loss": 2.5275, + "step": 12099 + }, + { + "epoch": 0.9765152126543459, + "grad_norm": 0.7034213542938232, + "learning_rate": 6.821381412236079e-05, + "loss": 2.5432, + "step": 12100 + }, + { + "epoch": 0.9765959163909289, + "grad_norm": 0.6808038949966431, + "learning_rate": 6.819884637607619e-05, + "loss": 2.5044, + "step": 12101 + }, + { + "epoch": 0.9766766201275119, + "grad_norm": 0.6601580381393433, + "learning_rate": 6.818387942235945e-05, + "loss": 2.4602, + "step": 12102 + }, + { + "epoch": 0.9767573238640949, + "grad_norm": 0.7163928151130676, + "learning_rate": 6.816891326158359e-05, + "loss": 2.4785, + "step": 12103 + }, + { + "epoch": 0.976838027600678, + "grad_norm": 0.6616904735565186, + "learning_rate": 6.815394789412164e-05, + "loss": 2.5081, + "step": 12104 + }, + { + "epoch": 0.9769187313372609, + "grad_norm": 0.6476422548294067, + "learning_rate": 6.813898332034657e-05, + "loss": 2.4624, + "step": 12105 + }, + { + "epoch": 0.9769994350738439, + "grad_norm": 0.6468440890312195, + "learning_rate": 6.812401954063131e-05, + "loss": 2.4948, + "step": 12106 + }, + { + "epoch": 0.9770801388104269, + "grad_norm": 0.6988391876220703, + "learning_rate": 6.810905655534878e-05, + "loss": 2.4958, + "step": 12107 + }, + { + "epoch": 0.97716084254701, + "grad_norm": 0.6777953505516052, + "learning_rate": 6.809409436487196e-05, + "loss": 2.5304, + "step": 12108 + }, + { + "epoch": 0.9772415462835929, + "grad_norm": 0.7115550637245178, + "learning_rate": 6.807913296957368e-05, + "loss": 2.5321, + "step": 12109 + }, + { + "epoch": 0.9773222500201759, + "grad_norm": 0.737823486328125, + "learning_rate": 6.806417236982684e-05, + "loss": 2.5121, + "step": 12110 + }, + { + "epoch": 0.9774029537567589, + "grad_norm": 0.6797437071800232, + "learning_rate": 6.804921256600439e-05, + "loss": 2.4783, + "step": 12111 + }, + { + "epoch": 0.977483657493342, + "grad_norm": 0.7240802645683289, + "learning_rate": 6.803425355847897e-05, + "loss": 2.4949, + "step": 12112 + }, + { + "epoch": 0.977564361229925, + "grad_norm": 0.6433781981468201, + "learning_rate": 6.801929534762357e-05, + "loss": 2.4937, + "step": 12113 + }, + { + "epoch": 0.9776450649665079, + "grad_norm": 0.6935293078422546, + "learning_rate": 6.800433793381095e-05, + "loss": 2.5025, + "step": 12114 + }, + { + "epoch": 0.9777257687030909, + "grad_norm": 0.699780285358429, + "learning_rate": 6.798938131741383e-05, + "loss": 2.5231, + "step": 12115 + }, + { + "epoch": 0.977806472439674, + "grad_norm": 0.6414729952812195, + "learning_rate": 6.7974425498805e-05, + "loss": 2.4422, + "step": 12116 + }, + { + "epoch": 0.977887176176257, + "grad_norm": 0.6733608841896057, + "learning_rate": 6.795947047835722e-05, + "loss": 2.4873, + "step": 12117 + }, + { + "epoch": 0.97796787991284, + "grad_norm": 0.6985765099525452, + "learning_rate": 6.794451625644318e-05, + "loss": 2.4994, + "step": 12118 + }, + { + "epoch": 0.9780485836494229, + "grad_norm": 0.6429893374443054, + "learning_rate": 6.792956283343559e-05, + "loss": 2.4968, + "step": 12119 + }, + { + "epoch": 0.978129287386006, + "grad_norm": 0.7129024267196655, + "learning_rate": 6.79146102097071e-05, + "loss": 2.5457, + "step": 12120 + }, + { + "epoch": 0.978209991122589, + "grad_norm": 0.6811943650245667, + "learning_rate": 6.789965838563047e-05, + "loss": 2.5012, + "step": 12121 + }, + { + "epoch": 0.978290694859172, + "grad_norm": 0.7269948720932007, + "learning_rate": 6.788470736157821e-05, + "loss": 2.5124, + "step": 12122 + }, + { + "epoch": 0.978371398595755, + "grad_norm": 0.7396084666252136, + "learning_rate": 6.786975713792299e-05, + "loss": 2.5631, + "step": 12123 + }, + { + "epoch": 0.978452102332338, + "grad_norm": 0.6880094408988953, + "learning_rate": 6.785480771503745e-05, + "loss": 2.5103, + "step": 12124 + }, + { + "epoch": 0.978532806068921, + "grad_norm": 0.737095057964325, + "learning_rate": 6.783985909329409e-05, + "loss": 2.5062, + "step": 12125 + }, + { + "epoch": 0.978613509805504, + "grad_norm": 0.6540948152542114, + "learning_rate": 6.782491127306552e-05, + "loss": 2.5568, + "step": 12126 + }, + { + "epoch": 0.978694213542087, + "grad_norm": 0.669706404209137, + "learning_rate": 6.780996425472427e-05, + "loss": 2.5156, + "step": 12127 + }, + { + "epoch": 0.97877491727867, + "grad_norm": 0.6722843647003174, + "learning_rate": 6.779501803864286e-05, + "loss": 2.4784, + "step": 12128 + }, + { + "epoch": 0.978855621015253, + "grad_norm": 0.6545475125312805, + "learning_rate": 6.778007262519377e-05, + "loss": 2.5159, + "step": 12129 + }, + { + "epoch": 0.978936324751836, + "grad_norm": 0.7010136246681213, + "learning_rate": 6.776512801474953e-05, + "loss": 2.5244, + "step": 12130 + }, + { + "epoch": 0.979017028488419, + "grad_norm": 0.6912714242935181, + "learning_rate": 6.775018420768253e-05, + "loss": 2.5223, + "step": 12131 + }, + { + "epoch": 0.9790977322250021, + "grad_norm": 0.6864827275276184, + "learning_rate": 6.773524120436525e-05, + "loss": 2.5027, + "step": 12132 + }, + { + "epoch": 0.979178435961585, + "grad_norm": 0.7586981058120728, + "learning_rate": 6.77202990051701e-05, + "loss": 2.4554, + "step": 12133 + }, + { + "epoch": 0.979259139698168, + "grad_norm": 0.6487839818000793, + "learning_rate": 6.770535761046948e-05, + "loss": 2.5035, + "step": 12134 + }, + { + "epoch": 0.979339843434751, + "grad_norm": 0.7193071246147156, + "learning_rate": 6.769041702063575e-05, + "loss": 2.4669, + "step": 12135 + }, + { + "epoch": 0.9794205471713341, + "grad_norm": 0.7118960618972778, + "learning_rate": 6.76754772360413e-05, + "loss": 2.493, + "step": 12136 + }, + { + "epoch": 0.9795012509079171, + "grad_norm": 0.6617394685745239, + "learning_rate": 6.766053825705847e-05, + "loss": 2.4771, + "step": 12137 + }, + { + "epoch": 0.9795819546445, + "grad_norm": 0.7664859294891357, + "learning_rate": 6.764560008405953e-05, + "loss": 2.5191, + "step": 12138 + }, + { + "epoch": 0.979662658381083, + "grad_norm": 0.708063542842865, + "learning_rate": 6.763066271741682e-05, + "loss": 2.5521, + "step": 12139 + }, + { + "epoch": 0.979743362117666, + "grad_norm": 0.6951049566268921, + "learning_rate": 6.761572615750267e-05, + "loss": 2.4708, + "step": 12140 + }, + { + "epoch": 0.9798240658542491, + "grad_norm": 0.6914932727813721, + "learning_rate": 6.760079040468921e-05, + "loss": 2.5101, + "step": 12141 + }, + { + "epoch": 0.9799047695908321, + "grad_norm": 0.6843075752258301, + "learning_rate": 6.758585545934876e-05, + "loss": 2.4932, + "step": 12142 + }, + { + "epoch": 0.979985473327415, + "grad_norm": 0.6567733883857727, + "learning_rate": 6.757092132185354e-05, + "loss": 2.4577, + "step": 12143 + }, + { + "epoch": 0.980066177063998, + "grad_norm": 0.6874415874481201, + "learning_rate": 6.75559879925757e-05, + "loss": 2.4818, + "step": 12144 + }, + { + "epoch": 0.9801468808005811, + "grad_norm": 0.7274627685546875, + "learning_rate": 6.754105547188746e-05, + "loss": 2.523, + "step": 12145 + }, + { + "epoch": 0.9802275845371641, + "grad_norm": 0.6991173028945923, + "learning_rate": 6.7526123760161e-05, + "loss": 2.4864, + "step": 12146 + }, + { + "epoch": 0.980308288273747, + "grad_norm": 0.670078456401825, + "learning_rate": 6.75111928577684e-05, + "loss": 2.4889, + "step": 12147 + }, + { + "epoch": 0.98038899201033, + "grad_norm": 0.6653482913970947, + "learning_rate": 6.749626276508178e-05, + "loss": 2.4652, + "step": 12148 + }, + { + "epoch": 0.9804696957469131, + "grad_norm": 0.7329251766204834, + "learning_rate": 6.748133348247326e-05, + "loss": 2.518, + "step": 12149 + }, + { + "epoch": 0.9805503994834961, + "grad_norm": 0.7792871594429016, + "learning_rate": 6.746640501031495e-05, + "loss": 2.5018, + "step": 12150 + }, + { + "epoch": 0.9806311032200791, + "grad_norm": 0.6962797045707703, + "learning_rate": 6.745147734897883e-05, + "loss": 2.4388, + "step": 12151 + }, + { + "epoch": 0.980711806956662, + "grad_norm": 0.6981272101402283, + "learning_rate": 6.7436550498837e-05, + "loss": 2.4886, + "step": 12152 + }, + { + "epoch": 0.9807925106932451, + "grad_norm": 0.6696565747261047, + "learning_rate": 6.742162446026146e-05, + "loss": 2.5258, + "step": 12153 + }, + { + "epoch": 0.9808732144298281, + "grad_norm": 0.6922139525413513, + "learning_rate": 6.740669923362417e-05, + "loss": 2.493, + "step": 12154 + }, + { + "epoch": 0.9809539181664111, + "grad_norm": 0.6745694875717163, + "learning_rate": 6.739177481929715e-05, + "loss": 2.5209, + "step": 12155 + }, + { + "epoch": 0.9810346219029941, + "grad_norm": 0.7023215889930725, + "learning_rate": 6.737685121765238e-05, + "loss": 2.4987, + "step": 12156 + }, + { + "epoch": 0.9811153256395772, + "grad_norm": 0.6337805390357971, + "learning_rate": 6.73619284290617e-05, + "loss": 2.4838, + "step": 12157 + }, + { + "epoch": 0.9811960293761601, + "grad_norm": 0.6747817397117615, + "learning_rate": 6.73470064538971e-05, + "loss": 2.4834, + "step": 12158 + }, + { + "epoch": 0.9812767331127431, + "grad_norm": 0.6714580655097961, + "learning_rate": 6.733208529253047e-05, + "loss": 2.4724, + "step": 12159 + }, + { + "epoch": 0.9813574368493261, + "grad_norm": 0.6927861571311951, + "learning_rate": 6.731716494533364e-05, + "loss": 2.495, + "step": 12160 + }, + { + "epoch": 0.9814381405859092, + "grad_norm": 0.6576036214828491, + "learning_rate": 6.73022454126785e-05, + "loss": 2.5415, + "step": 12161 + }, + { + "epoch": 0.9815188443224921, + "grad_norm": 0.6495294570922852, + "learning_rate": 6.728732669493691e-05, + "loss": 2.4889, + "step": 12162 + }, + { + "epoch": 0.9815995480590751, + "grad_norm": 0.6680364012718201, + "learning_rate": 6.72724087924806e-05, + "loss": 2.4733, + "step": 12163 + }, + { + "epoch": 0.9816802517956581, + "grad_norm": 0.6816582083702087, + "learning_rate": 6.725749170568143e-05, + "loss": 2.4688, + "step": 12164 + }, + { + "epoch": 0.9817609555322412, + "grad_norm": 0.6995956897735596, + "learning_rate": 6.724257543491116e-05, + "loss": 2.4962, + "step": 12165 + }, + { + "epoch": 0.9818416592688242, + "grad_norm": 0.6728340983390808, + "learning_rate": 6.722765998054157e-05, + "loss": 2.5218, + "step": 12166 + }, + { + "epoch": 0.9819223630054071, + "grad_norm": 0.6835319995880127, + "learning_rate": 6.721274534294433e-05, + "loss": 2.4845, + "step": 12167 + }, + { + "epoch": 0.9820030667419901, + "grad_norm": 0.6969910264015198, + "learning_rate": 6.719783152249119e-05, + "loss": 2.4983, + "step": 12168 + }, + { + "epoch": 0.9820837704785732, + "grad_norm": 0.7327036261558533, + "learning_rate": 6.718291851955383e-05, + "loss": 2.5893, + "step": 12169 + }, + { + "epoch": 0.9821644742151562, + "grad_norm": 0.7092839479446411, + "learning_rate": 6.716800633450393e-05, + "loss": 2.5104, + "step": 12170 + }, + { + "epoch": 0.9822451779517392, + "grad_norm": 0.7384308576583862, + "learning_rate": 6.715309496771311e-05, + "loss": 2.5066, + "step": 12171 + }, + { + "epoch": 0.9823258816883221, + "grad_norm": 0.6744845509529114, + "learning_rate": 6.713818441955308e-05, + "loss": 2.469, + "step": 12172 + }, + { + "epoch": 0.9824065854249052, + "grad_norm": 0.6497980952262878, + "learning_rate": 6.712327469039536e-05, + "loss": 2.4943, + "step": 12173 + }, + { + "epoch": 0.9824872891614882, + "grad_norm": 0.6550357937812805, + "learning_rate": 6.710836578061156e-05, + "loss": 2.5019, + "step": 12174 + }, + { + "epoch": 0.9825679928980712, + "grad_norm": 0.6813549995422363, + "learning_rate": 6.709345769057331e-05, + "loss": 2.4314, + "step": 12175 + }, + { + "epoch": 0.9826486966346542, + "grad_norm": 0.6636531352996826, + "learning_rate": 6.707855042065209e-05, + "loss": 2.5202, + "step": 12176 + }, + { + "epoch": 0.9827294003712372, + "grad_norm": 0.6684894561767578, + "learning_rate": 6.706364397121944e-05, + "loss": 2.4353, + "step": 12177 + }, + { + "epoch": 0.9828101041078202, + "grad_norm": 0.6813677549362183, + "learning_rate": 6.704873834264688e-05, + "loss": 2.4254, + "step": 12178 + }, + { + "epoch": 0.9828908078444032, + "grad_norm": 0.6584975719451904, + "learning_rate": 6.70338335353059e-05, + "loss": 2.5647, + "step": 12179 + }, + { + "epoch": 0.9829715115809862, + "grad_norm": 0.6959114074707031, + "learning_rate": 6.701892954956796e-05, + "loss": 2.5203, + "step": 12180 + }, + { + "epoch": 0.9830522153175693, + "grad_norm": 0.6399044990539551, + "learning_rate": 6.700402638580452e-05, + "loss": 2.4697, + "step": 12181 + }, + { + "epoch": 0.9831329190541522, + "grad_norm": 0.6838750839233398, + "learning_rate": 6.698912404438702e-05, + "loss": 2.5261, + "step": 12182 + }, + { + "epoch": 0.9832136227907352, + "grad_norm": 0.6286367177963257, + "learning_rate": 6.697422252568679e-05, + "loss": 2.4264, + "step": 12183 + }, + { + "epoch": 0.9832943265273182, + "grad_norm": 0.901637852191925, + "learning_rate": 6.695932183007528e-05, + "loss": 2.4908, + "step": 12184 + }, + { + "epoch": 0.9833750302639013, + "grad_norm": 0.8361458778381348, + "learning_rate": 6.694442195792386e-05, + "loss": 2.5183, + "step": 12185 + }, + { + "epoch": 0.9834557340004842, + "grad_norm": 0.7033401727676392, + "learning_rate": 6.692952290960384e-05, + "loss": 2.5702, + "step": 12186 + }, + { + "epoch": 0.9835364377370672, + "grad_norm": 0.669486939907074, + "learning_rate": 6.691462468548653e-05, + "loss": 2.5143, + "step": 12187 + }, + { + "epoch": 0.9836171414736502, + "grad_norm": 0.7043797969818115, + "learning_rate": 6.689972728594329e-05, + "loss": 2.5638, + "step": 12188 + }, + { + "epoch": 0.9836978452102332, + "grad_norm": 0.6532511115074158, + "learning_rate": 6.688483071134537e-05, + "loss": 2.5227, + "step": 12189 + }, + { + "epoch": 0.9837785489468163, + "grad_norm": 0.7363922595977783, + "learning_rate": 6.6869934962064e-05, + "loss": 2.4953, + "step": 12190 + }, + { + "epoch": 0.9838592526833992, + "grad_norm": 0.6746651530265808, + "learning_rate": 6.685504003847051e-05, + "loss": 2.5021, + "step": 12191 + }, + { + "epoch": 0.9839399564199822, + "grad_norm": 0.665459930896759, + "learning_rate": 6.684014594093604e-05, + "loss": 2.5126, + "step": 12192 + }, + { + "epoch": 0.9840206601565652, + "grad_norm": 0.6618975400924683, + "learning_rate": 6.682525266983179e-05, + "loss": 2.5046, + "step": 12193 + }, + { + "epoch": 0.9841013638931483, + "grad_norm": 0.6536173224449158, + "learning_rate": 6.6810360225529e-05, + "loss": 2.4222, + "step": 12194 + }, + { + "epoch": 0.9841820676297313, + "grad_norm": 0.6882187724113464, + "learning_rate": 6.679546860839876e-05, + "loss": 2.475, + "step": 12195 + }, + { + "epoch": 0.9842627713663142, + "grad_norm": 0.6941187977790833, + "learning_rate": 6.678057781881224e-05, + "loss": 2.5642, + "step": 12196 + }, + { + "epoch": 0.9843434751028972, + "grad_norm": 0.7057064175605774, + "learning_rate": 6.676568785714057e-05, + "loss": 2.4817, + "step": 12197 + }, + { + "epoch": 0.9844241788394803, + "grad_norm": 0.6455948352813721, + "learning_rate": 6.675079872375487e-05, + "loss": 2.5206, + "step": 12198 + }, + { + "epoch": 0.9845048825760633, + "grad_norm": 0.6559014320373535, + "learning_rate": 6.673591041902613e-05, + "loss": 2.4082, + "step": 12199 + }, + { + "epoch": 0.9845855863126463, + "grad_norm": 0.6732046008110046, + "learning_rate": 6.672102294332542e-05, + "loss": 2.5472, + "step": 12200 + }, + { + "epoch": 0.9846662900492292, + "grad_norm": 0.7074914574623108, + "learning_rate": 6.670613629702391e-05, + "loss": 2.5243, + "step": 12201 + }, + { + "epoch": 0.9847469937858123, + "grad_norm": 0.6780694127082825, + "learning_rate": 6.669125048049246e-05, + "loss": 2.494, + "step": 12202 + }, + { + "epoch": 0.9848276975223953, + "grad_norm": 0.6361132264137268, + "learning_rate": 6.66763654941021e-05, + "loss": 2.4764, + "step": 12203 + }, + { + "epoch": 0.9849084012589783, + "grad_norm": 0.752727210521698, + "learning_rate": 6.666148133822387e-05, + "loss": 2.4942, + "step": 12204 + }, + { + "epoch": 0.9849891049955612, + "grad_norm": 0.7282724976539612, + "learning_rate": 6.664659801322863e-05, + "loss": 2.471, + "step": 12205 + }, + { + "epoch": 0.9850698087321443, + "grad_norm": 0.6977601051330566, + "learning_rate": 6.663171551948736e-05, + "loss": 2.4695, + "step": 12206 + }, + { + "epoch": 0.9851505124687273, + "grad_norm": 0.6957824230194092, + "learning_rate": 6.661683385737101e-05, + "loss": 2.5096, + "step": 12207 + }, + { + "epoch": 0.9852312162053103, + "grad_norm": 0.6197221279144287, + "learning_rate": 6.660195302725037e-05, + "loss": 2.4199, + "step": 12208 + }, + { + "epoch": 0.9853119199418933, + "grad_norm": 0.747558057308197, + "learning_rate": 6.658707302949638e-05, + "loss": 2.5988, + "step": 12209 + }, + { + "epoch": 0.9853926236784764, + "grad_norm": 0.6593184471130371, + "learning_rate": 6.657219386447989e-05, + "loss": 2.4837, + "step": 12210 + }, + { + "epoch": 0.9854733274150593, + "grad_norm": 0.6795992255210876, + "learning_rate": 6.655731553257169e-05, + "loss": 2.498, + "step": 12211 + }, + { + "epoch": 0.9855540311516423, + "grad_norm": 0.7588422298431396, + "learning_rate": 6.65424380341426e-05, + "loss": 2.444, + "step": 12212 + }, + { + "epoch": 0.9856347348882253, + "grad_norm": 0.7791433930397034, + "learning_rate": 6.652756136956342e-05, + "loss": 2.4893, + "step": 12213 + }, + { + "epoch": 0.9857154386248084, + "grad_norm": 0.6320767998695374, + "learning_rate": 6.651268553920493e-05, + "loss": 2.4831, + "step": 12214 + }, + { + "epoch": 0.9857961423613913, + "grad_norm": 0.6818140745162964, + "learning_rate": 6.649781054343783e-05, + "loss": 2.4316, + "step": 12215 + }, + { + "epoch": 0.9858768460979743, + "grad_norm": 0.7460113763809204, + "learning_rate": 6.648293638263285e-05, + "loss": 2.5335, + "step": 12216 + }, + { + "epoch": 0.9859575498345573, + "grad_norm": 0.714074432849884, + "learning_rate": 6.646806305716079e-05, + "loss": 2.4573, + "step": 12217 + }, + { + "epoch": 0.9860382535711404, + "grad_norm": 0.6815951466560364, + "learning_rate": 6.645319056739217e-05, + "loss": 2.4758, + "step": 12218 + }, + { + "epoch": 0.9861189573077234, + "grad_norm": 0.6842799782752991, + "learning_rate": 6.643831891369775e-05, + "loss": 2.4998, + "step": 12219 + }, + { + "epoch": 0.9861996610443063, + "grad_norm": 0.6725212335586548, + "learning_rate": 6.642344809644818e-05, + "loss": 2.5179, + "step": 12220 + }, + { + "epoch": 0.9862803647808893, + "grad_norm": 0.7859417796134949, + "learning_rate": 6.640857811601402e-05, + "loss": 2.5801, + "step": 12221 + }, + { + "epoch": 0.9863610685174724, + "grad_norm": 0.6438577771186829, + "learning_rate": 6.639370897276591e-05, + "loss": 2.4659, + "step": 12222 + }, + { + "epoch": 0.9864417722540554, + "grad_norm": 0.7036609053611755, + "learning_rate": 6.637884066707447e-05, + "loss": 2.5637, + "step": 12223 + }, + { + "epoch": 0.9865224759906384, + "grad_norm": 0.6756969094276428, + "learning_rate": 6.636397319931016e-05, + "loss": 2.5381, + "step": 12224 + }, + { + "epoch": 0.9866031797272213, + "grad_norm": 0.6907589435577393, + "learning_rate": 6.634910656984354e-05, + "loss": 2.4927, + "step": 12225 + }, + { + "epoch": 0.9866838834638044, + "grad_norm": 0.7347010374069214, + "learning_rate": 6.63342407790452e-05, + "loss": 2.5131, + "step": 12226 + }, + { + "epoch": 0.9867645872003874, + "grad_norm": 0.6835876107215881, + "learning_rate": 6.631937582728555e-05, + "loss": 2.4611, + "step": 12227 + }, + { + "epoch": 0.9868452909369704, + "grad_norm": 0.8199172616004944, + "learning_rate": 6.630451171493511e-05, + "loss": 2.5341, + "step": 12228 + }, + { + "epoch": 0.9869259946735534, + "grad_norm": 0.7537188529968262, + "learning_rate": 6.62896484423643e-05, + "loss": 2.5218, + "step": 12229 + }, + { + "epoch": 0.9870066984101364, + "grad_norm": 0.7254310250282288, + "learning_rate": 6.62747860099436e-05, + "loss": 2.4766, + "step": 12230 + }, + { + "epoch": 0.9870874021467194, + "grad_norm": 0.6852995157241821, + "learning_rate": 6.625992441804338e-05, + "loss": 2.548, + "step": 12231 + }, + { + "epoch": 0.9871681058833024, + "grad_norm": 0.7089388966560364, + "learning_rate": 6.624506366703402e-05, + "loss": 2.5125, + "step": 12232 + }, + { + "epoch": 0.9872488096198854, + "grad_norm": 0.7114216685295105, + "learning_rate": 6.623020375728597e-05, + "loss": 2.5408, + "step": 12233 + }, + { + "epoch": 0.9873295133564685, + "grad_norm": 0.7891978025436401, + "learning_rate": 6.621534468916946e-05, + "loss": 2.5946, + "step": 12234 + }, + { + "epoch": 0.9874102170930514, + "grad_norm": 0.671399712562561, + "learning_rate": 6.620048646305488e-05, + "loss": 2.4732, + "step": 12235 + }, + { + "epoch": 0.9874909208296344, + "grad_norm": 0.6712855696678162, + "learning_rate": 6.618562907931256e-05, + "loss": 2.4376, + "step": 12236 + }, + { + "epoch": 0.9875716245662174, + "grad_norm": 0.7183727025985718, + "learning_rate": 6.617077253831272e-05, + "loss": 2.5406, + "step": 12237 + }, + { + "epoch": 0.9876523283028005, + "grad_norm": 0.6857761144638062, + "learning_rate": 6.615591684042568e-05, + "loss": 2.5279, + "step": 12238 + }, + { + "epoch": 0.9877330320393835, + "grad_norm": 0.7268103957176208, + "learning_rate": 6.614106198602165e-05, + "loss": 2.5283, + "step": 12239 + }, + { + "epoch": 0.9878137357759664, + "grad_norm": 0.6703717708587646, + "learning_rate": 6.612620797547087e-05, + "loss": 2.4254, + "step": 12240 + }, + { + "epoch": 0.9878944395125494, + "grad_norm": 0.7110719680786133, + "learning_rate": 6.611135480914352e-05, + "loss": 2.496, + "step": 12241 + }, + { + "epoch": 0.9879751432491324, + "grad_norm": 0.7268263697624207, + "learning_rate": 6.609650248740983e-05, + "loss": 2.5489, + "step": 12242 + }, + { + "epoch": 0.9880558469857155, + "grad_norm": 0.7413432598114014, + "learning_rate": 6.60816510106399e-05, + "loss": 2.4998, + "step": 12243 + }, + { + "epoch": 0.9881365507222984, + "grad_norm": 0.7443360090255737, + "learning_rate": 6.606680037920389e-05, + "loss": 2.5282, + "step": 12244 + }, + { + "epoch": 0.9882172544588814, + "grad_norm": 0.7787832021713257, + "learning_rate": 6.605195059347191e-05, + "loss": 2.5221, + "step": 12245 + }, + { + "epoch": 0.9882979581954644, + "grad_norm": 0.6921473741531372, + "learning_rate": 6.603710165381409e-05, + "loss": 2.5434, + "step": 12246 + }, + { + "epoch": 0.9883786619320475, + "grad_norm": 0.737328827381134, + "learning_rate": 6.602225356060044e-05, + "loss": 2.5222, + "step": 12247 + }, + { + "epoch": 0.9884593656686305, + "grad_norm": 0.698823094367981, + "learning_rate": 6.600740631420106e-05, + "loss": 2.528, + "step": 12248 + }, + { + "epoch": 0.9885400694052134, + "grad_norm": 0.6735067963600159, + "learning_rate": 6.599255991498601e-05, + "loss": 2.4942, + "step": 12249 + }, + { + "epoch": 0.9886207731417964, + "grad_norm": 0.659622311592102, + "learning_rate": 6.59777143633252e-05, + "loss": 2.4822, + "step": 12250 + }, + { + "epoch": 0.9887014768783795, + "grad_norm": 0.6973726153373718, + "learning_rate": 6.596286965958872e-05, + "loss": 2.5499, + "step": 12251 + }, + { + "epoch": 0.9887821806149625, + "grad_norm": 0.6771909594535828, + "learning_rate": 6.594802580414651e-05, + "loss": 2.4968, + "step": 12252 + }, + { + "epoch": 0.9888628843515455, + "grad_norm": 0.68080073595047, + "learning_rate": 6.593318279736849e-05, + "loss": 2.5142, + "step": 12253 + }, + { + "epoch": 0.9889435880881284, + "grad_norm": NaN, + "learning_rate": 6.593318279736849e-05, + "loss": 2.466, + "step": 12254 + }, + { + "epoch": 0.9890242918247115, + "grad_norm": 0.6865221858024597, + "learning_rate": 6.591834063962461e-05, + "loss": 2.4894, + "step": 12255 + }, + { + "epoch": 0.9891049955612945, + "grad_norm": 0.7050445079803467, + "learning_rate": 6.590349933128478e-05, + "loss": 2.5733, + "step": 12256 + }, + { + "epoch": 0.9891856992978775, + "grad_norm": 0.6971526741981506, + "learning_rate": 6.588865887271887e-05, + "loss": 2.4997, + "step": 12257 + }, + { + "epoch": 0.9892664030344605, + "grad_norm": 0.6465088725090027, + "learning_rate": 6.587381926429674e-05, + "loss": 2.5155, + "step": 12258 + }, + { + "epoch": 0.9893471067710435, + "grad_norm": 0.6521422266960144, + "learning_rate": 6.585898050638823e-05, + "loss": 2.4803, + "step": 12259 + }, + { + "epoch": 0.9894278105076265, + "grad_norm": 0.6798849105834961, + "learning_rate": 6.584414259936324e-05, + "loss": 2.5301, + "step": 12260 + }, + { + "epoch": 0.9895085142442095, + "grad_norm": 0.6903446912765503, + "learning_rate": 6.582930554359144e-05, + "loss": 2.4662, + "step": 12261 + }, + { + "epoch": 0.9895892179807925, + "grad_norm": 0.7183516621589661, + "learning_rate": 6.581446933944267e-05, + "loss": 2.4711, + "step": 12262 + }, + { + "epoch": 0.9896699217173756, + "grad_norm": 0.702738344669342, + "learning_rate": 6.579963398728671e-05, + "loss": 2.531, + "step": 12263 + }, + { + "epoch": 0.9897506254539585, + "grad_norm": 0.7187048196792603, + "learning_rate": 6.578479948749325e-05, + "loss": 2.4933, + "step": 12264 + }, + { + "epoch": 0.9898313291905415, + "grad_norm": 0.6988784670829773, + "learning_rate": 6.576996584043202e-05, + "loss": 2.5179, + "step": 12265 + }, + { + "epoch": 0.9899120329271245, + "grad_norm": 0.7434641122817993, + "learning_rate": 6.575513304647276e-05, + "loss": 2.5157, + "step": 12266 + }, + { + "epoch": 0.9899927366637076, + "grad_norm": 0.667881429195404, + "learning_rate": 6.574030110598505e-05, + "loss": 2.5152, + "step": 12267 + }, + { + "epoch": 0.9900734404002905, + "grad_norm": 0.6766676902770996, + "learning_rate": 6.572547001933862e-05, + "loss": 2.5041, + "step": 12268 + }, + { + "epoch": 0.9901541441368735, + "grad_norm": 0.6531797051429749, + "learning_rate": 6.571063978690311e-05, + "loss": 2.5457, + "step": 12269 + }, + { + "epoch": 0.9902348478734565, + "grad_norm": 0.6557255983352661, + "learning_rate": 6.569581040904804e-05, + "loss": 2.5253, + "step": 12270 + }, + { + "epoch": 0.9903155516100396, + "grad_norm": 0.6818893551826477, + "learning_rate": 6.568098188614304e-05, + "loss": 2.5031, + "step": 12271 + }, + { + "epoch": 0.9903962553466226, + "grad_norm": 0.6644853949546814, + "learning_rate": 6.56661542185577e-05, + "loss": 2.5285, + "step": 12272 + }, + { + "epoch": 0.9904769590832055, + "grad_norm": 0.6035603284835815, + "learning_rate": 6.565132740666155e-05, + "loss": 2.46, + "step": 12273 + }, + { + "epoch": 0.9905576628197885, + "grad_norm": 0.7061343193054199, + "learning_rate": 6.56365014508241e-05, + "loss": 2.4731, + "step": 12274 + }, + { + "epoch": 0.9906383665563716, + "grad_norm": 0.6981248259544373, + "learning_rate": 6.562167635141486e-05, + "loss": 2.4518, + "step": 12275 + }, + { + "epoch": 0.9907190702929546, + "grad_norm": 0.6718073487281799, + "learning_rate": 6.560685210880334e-05, + "loss": 2.4919, + "step": 12276 + }, + { + "epoch": 0.9907997740295376, + "grad_norm": 0.7095392942428589, + "learning_rate": 6.559202872335893e-05, + "loss": 2.5284, + "step": 12277 + }, + { + "epoch": 0.9908804777661205, + "grad_norm": 0.7052092552185059, + "learning_rate": 6.557720619545111e-05, + "loss": 2.4781, + "step": 12278 + }, + { + "epoch": 0.9909611815027036, + "grad_norm": 0.653570830821991, + "learning_rate": 6.556238452544934e-05, + "loss": 2.5293, + "step": 12279 + }, + { + "epoch": 0.9910418852392866, + "grad_norm": 0.6705330610275269, + "learning_rate": 6.554756371372293e-05, + "loss": 2.4437, + "step": 12280 + }, + { + "epoch": 0.9911225889758696, + "grad_norm": 0.6494189500808716, + "learning_rate": 6.553274376064127e-05, + "loss": 2.4833, + "step": 12281 + }, + { + "epoch": 0.9912032927124526, + "grad_norm": 0.6497724652290344, + "learning_rate": 6.551792466657378e-05, + "loss": 2.4803, + "step": 12282 + }, + { + "epoch": 0.9912839964490356, + "grad_norm": 0.7740494608879089, + "learning_rate": 6.550310643188972e-05, + "loss": 2.4907, + "step": 12283 + }, + { + "epoch": 0.9913647001856186, + "grad_norm": 0.699562668800354, + "learning_rate": 6.548828905695843e-05, + "loss": 2.4576, + "step": 12284 + }, + { + "epoch": 0.9914454039222016, + "grad_norm": 0.8123162984848022, + "learning_rate": 6.547347254214921e-05, + "loss": 2.5118, + "step": 12285 + }, + { + "epoch": 0.9915261076587846, + "grad_norm": 0.7227715253829956, + "learning_rate": 6.545865688783129e-05, + "loss": 2.4688, + "step": 12286 + }, + { + "epoch": 0.9916068113953677, + "grad_norm": 0.6498493552207947, + "learning_rate": 6.544384209437392e-05, + "loss": 2.477, + "step": 12287 + }, + { + "epoch": 0.9916875151319506, + "grad_norm": 0.6427823901176453, + "learning_rate": 6.542902816214636e-05, + "loss": 2.4388, + "step": 12288 + }, + { + "epoch": 0.9917682188685336, + "grad_norm": 0.6803679466247559, + "learning_rate": 6.541421509151778e-05, + "loss": 2.5095, + "step": 12289 + }, + { + "epoch": 0.9918489226051166, + "grad_norm": 0.7025790810585022, + "learning_rate": 6.539940288285734e-05, + "loss": 2.4881, + "step": 12290 + }, + { + "epoch": 0.9919296263416996, + "grad_norm": 0.6899270415306091, + "learning_rate": 6.538459153653424e-05, + "loss": 2.486, + "step": 12291 + }, + { + "epoch": 0.9920103300782827, + "grad_norm": 0.7379609942436218, + "learning_rate": 6.536978105291762e-05, + "loss": 2.5368, + "step": 12292 + }, + { + "epoch": 0.9920910338148656, + "grad_norm": 0.7279202342033386, + "learning_rate": 6.535497143237657e-05, + "loss": 2.5275, + "step": 12293 + }, + { + "epoch": 0.9921717375514486, + "grad_norm": 0.6810527443885803, + "learning_rate": 6.53401626752802e-05, + "loss": 2.5053, + "step": 12294 + }, + { + "epoch": 0.9922524412880316, + "grad_norm": 0.6578424572944641, + "learning_rate": 6.532535478199759e-05, + "loss": 2.5334, + "step": 12295 + }, + { + "epoch": 0.9923331450246147, + "grad_norm": 0.6819284558296204, + "learning_rate": 6.531054775289778e-05, + "loss": 2.4879, + "step": 12296 + }, + { + "epoch": 0.9924138487611976, + "grad_norm": 0.6524500846862793, + "learning_rate": 6.529574158834977e-05, + "loss": 2.5349, + "step": 12297 + }, + { + "epoch": 0.9924945524977806, + "grad_norm": 0.6853352785110474, + "learning_rate": 6.528093628872263e-05, + "loss": 2.4217, + "step": 12298 + }, + { + "epoch": 0.9925752562343636, + "grad_norm": 0.6731893420219421, + "learning_rate": 6.526613185438529e-05, + "loss": 2.4739, + "step": 12299 + }, + { + "epoch": 0.9926559599709467, + "grad_norm": 0.6515606641769409, + "learning_rate": 6.525132828570673e-05, + "loss": 2.5348, + "step": 12300 + }, + { + "epoch": 0.9927366637075297, + "grad_norm": 0.6819963455200195, + "learning_rate": 6.523652558305596e-05, + "loss": 2.5052, + "step": 12301 + }, + { + "epoch": 0.9928173674441126, + "grad_norm": 0.6521475911140442, + "learning_rate": 6.522172374680177e-05, + "loss": 2.5283, + "step": 12302 + }, + { + "epoch": 0.9928980711806956, + "grad_norm": 0.6488186717033386, + "learning_rate": 6.520692277731315e-05, + "loss": 2.4779, + "step": 12303 + }, + { + "epoch": 0.9929787749172787, + "grad_norm": 0.6509760022163391, + "learning_rate": 6.519212267495903e-05, + "loss": 2.5426, + "step": 12304 + }, + { + "epoch": 0.9930594786538617, + "grad_norm": 0.621366560459137, + "learning_rate": 6.517732344010814e-05, + "loss": 2.4804, + "step": 12305 + }, + { + "epoch": 0.9931401823904447, + "grad_norm": 0.6907268166542053, + "learning_rate": 6.516252507312938e-05, + "loss": 2.4883, + "step": 12306 + }, + { + "epoch": 0.9932208861270276, + "grad_norm": 0.7739343643188477, + "learning_rate": 6.514772757439157e-05, + "loss": 2.481, + "step": 12307 + }, + { + "epoch": 0.9933015898636107, + "grad_norm": 0.6794601082801819, + "learning_rate": 6.513293094426352e-05, + "loss": 2.5244, + "step": 12308 + }, + { + "epoch": 0.9933822936001937, + "grad_norm": 0.7189902663230896, + "learning_rate": 6.511813518311394e-05, + "loss": 2.5221, + "step": 12309 + }, + { + "epoch": 0.9934629973367767, + "grad_norm": 0.733318030834198, + "learning_rate": 6.510334029131163e-05, + "loss": 2.521, + "step": 12310 + }, + { + "epoch": 0.9935437010733597, + "grad_norm": 0.7584299445152283, + "learning_rate": 6.508854626922531e-05, + "loss": 2.4962, + "step": 12311 + }, + { + "epoch": 0.9936244048099427, + "grad_norm": 0.6442410349845886, + "learning_rate": 6.507375311722366e-05, + "loss": 2.4775, + "step": 12312 + }, + { + "epoch": 0.9937051085465257, + "grad_norm": 0.6609243154525757, + "learning_rate": 6.505896083567536e-05, + "loss": 2.4706, + "step": 12313 + }, + { + "epoch": 0.9937858122831087, + "grad_norm": 0.6527631878852844, + "learning_rate": 6.504416942494914e-05, + "loss": 2.4612, + "step": 12314 + }, + { + "epoch": 0.9938665160196917, + "grad_norm": 0.6798218488693237, + "learning_rate": 6.502937888541357e-05, + "loss": 2.5502, + "step": 12315 + }, + { + "epoch": 0.9939472197562748, + "grad_norm": 0.6573790907859802, + "learning_rate": 6.501458921743728e-05, + "loss": 2.5598, + "step": 12316 + }, + { + "epoch": 0.9940279234928577, + "grad_norm": 0.6945913434028625, + "learning_rate": 6.49998004213889e-05, + "loss": 2.5323, + "step": 12317 + }, + { + "epoch": 0.9941086272294407, + "grad_norm": 0.7609078288078308, + "learning_rate": 6.498501249763697e-05, + "loss": 2.5211, + "step": 12318 + }, + { + "epoch": 0.9941893309660237, + "grad_norm": 0.6878666281700134, + "learning_rate": 6.497022544655006e-05, + "loss": 2.5366, + "step": 12319 + }, + { + "epoch": 0.9942700347026068, + "grad_norm": 0.6675810813903809, + "learning_rate": 6.495543926849674e-05, + "loss": 2.512, + "step": 12320 + }, + { + "epoch": 0.9943507384391898, + "grad_norm": 0.7285950779914856, + "learning_rate": 6.494065396384544e-05, + "loss": 2.4741, + "step": 12321 + }, + { + "epoch": 0.9944314421757727, + "grad_norm": 0.6287158131599426, + "learning_rate": 6.49258695329647e-05, + "loss": 2.4824, + "step": 12322 + }, + { + "epoch": 0.9945121459123557, + "grad_norm": 0.6506727337837219, + "learning_rate": 6.491108597622296e-05, + "loss": 2.5126, + "step": 12323 + }, + { + "epoch": 0.9945928496489388, + "grad_norm": 0.7679052352905273, + "learning_rate": 6.489630329398869e-05, + "loss": 2.5503, + "step": 12324 + }, + { + "epoch": 0.9946735533855218, + "grad_norm": 0.637184202671051, + "learning_rate": 6.488152148663029e-05, + "loss": 2.5098, + "step": 12325 + }, + { + "epoch": 0.9947542571221047, + "grad_norm": 0.6747186779975891, + "learning_rate": 6.486674055451619e-05, + "loss": 2.5154, + "step": 12326 + }, + { + "epoch": 0.9948349608586877, + "grad_norm": 0.7288245558738708, + "learning_rate": 6.485196049801476e-05, + "loss": 2.5077, + "step": 12327 + }, + { + "epoch": 0.9949156645952708, + "grad_norm": 0.6914251446723938, + "learning_rate": 6.483718131749435e-05, + "loss": 2.4877, + "step": 12328 + }, + { + "epoch": 0.9949963683318538, + "grad_norm": 0.7224392294883728, + "learning_rate": 6.48224030133233e-05, + "loss": 2.4862, + "step": 12329 + }, + { + "epoch": 0.9950770720684368, + "grad_norm": 0.7365561723709106, + "learning_rate": 6.480762558586995e-05, + "loss": 2.477, + "step": 12330 + }, + { + "epoch": 0.9951577758050197, + "grad_norm": 0.7673236131668091, + "learning_rate": 6.47928490355025e-05, + "loss": 2.5423, + "step": 12331 + }, + { + "epoch": 0.9952384795416028, + "grad_norm": 0.6638002395629883, + "learning_rate": 6.477807336258931e-05, + "loss": 2.5007, + "step": 12332 + }, + { + "epoch": 0.9953191832781858, + "grad_norm": 0.6415974497795105, + "learning_rate": 6.476329856749864e-05, + "loss": 2.4924, + "step": 12333 + }, + { + "epoch": 0.9953998870147688, + "grad_norm": 0.7129398584365845, + "learning_rate": 6.474852465059864e-05, + "loss": 2.5313, + "step": 12334 + }, + { + "epoch": 0.9954805907513518, + "grad_norm": 0.6896344423294067, + "learning_rate": 6.473375161225756e-05, + "loss": 2.5073, + "step": 12335 + }, + { + "epoch": 0.9955612944879348, + "grad_norm": 0.7009317874908447, + "learning_rate": 6.47189794528436e-05, + "loss": 2.574, + "step": 12336 + }, + { + "epoch": 0.9956419982245178, + "grad_norm": 0.6555172801017761, + "learning_rate": 6.470420817272488e-05, + "loss": 2.4769, + "step": 12337 + }, + { + "epoch": 0.9957227019611008, + "grad_norm": 0.7569532990455627, + "learning_rate": 6.468943777226954e-05, + "loss": 2.4691, + "step": 12338 + }, + { + "epoch": 0.9958034056976838, + "grad_norm": 0.68092280626297, + "learning_rate": 6.467466825184569e-05, + "loss": 2.4793, + "step": 12339 + }, + { + "epoch": 0.9958841094342669, + "grad_norm": 0.6977378726005554, + "learning_rate": 6.465989961182152e-05, + "loss": 2.4678, + "step": 12340 + }, + { + "epoch": 0.9959648131708498, + "grad_norm": 0.6702281832695007, + "learning_rate": 6.4645131852565e-05, + "loss": 2.5398, + "step": 12341 + }, + { + "epoch": 0.9960455169074328, + "grad_norm": 0.7584038972854614, + "learning_rate": 6.46303649744442e-05, + "loss": 2.5355, + "step": 12342 + }, + { + "epoch": 0.9961262206440158, + "grad_norm": 0.6779505610466003, + "learning_rate": 6.461559897782718e-05, + "loss": 2.4828, + "step": 12343 + }, + { + "epoch": 0.9962069243805988, + "grad_norm": 0.6968233585357666, + "learning_rate": 6.460083386308192e-05, + "loss": 2.5108, + "step": 12344 + }, + { + "epoch": 0.9962876281171819, + "grad_norm": 0.7114594578742981, + "learning_rate": 6.45860696305764e-05, + "loss": 2.5236, + "step": 12345 + }, + { + "epoch": 0.9963683318537648, + "grad_norm": 0.6850530505180359, + "learning_rate": 6.457130628067865e-05, + "loss": 2.458, + "step": 12346 + }, + { + "epoch": 0.9964490355903478, + "grad_norm": 0.7135400772094727, + "learning_rate": 6.455654381375651e-05, + "loss": 2.539, + "step": 12347 + }, + { + "epoch": 0.9965297393269308, + "grad_norm": 0.6736366748809814, + "learning_rate": 6.454178223017797e-05, + "loss": 2.4721, + "step": 12348 + }, + { + "epoch": 0.9966104430635139, + "grad_norm": 0.6806206107139587, + "learning_rate": 6.45270215303109e-05, + "loss": 2.5035, + "step": 12349 + }, + { + "epoch": 0.9966911468000968, + "grad_norm": 0.7120711803436279, + "learning_rate": 6.451226171452318e-05, + "loss": 2.5344, + "step": 12350 + }, + { + "epoch": 0.9967718505366798, + "grad_norm": 0.6865986585617065, + "learning_rate": 6.449750278318264e-05, + "loss": 2.4807, + "step": 12351 + }, + { + "epoch": 0.9968525542732628, + "grad_norm": 0.6461294889450073, + "learning_rate": 6.448274473665717e-05, + "loss": 2.4878, + "step": 12352 + }, + { + "epoch": 0.9969332580098459, + "grad_norm": 0.7090638279914856, + "learning_rate": 6.446798757531454e-05, + "loss": 2.4599, + "step": 12353 + }, + { + "epoch": 0.9970139617464289, + "grad_norm": 0.6933324337005615, + "learning_rate": 6.445323129952252e-05, + "loss": 2.5398, + "step": 12354 + }, + { + "epoch": 0.9970946654830118, + "grad_norm": 0.7018197774887085, + "learning_rate": 6.443847590964888e-05, + "loss": 2.5159, + "step": 12355 + }, + { + "epoch": 0.9971753692195948, + "grad_norm": 0.7292604446411133, + "learning_rate": 6.442372140606145e-05, + "loss": 2.4934, + "step": 12356 + }, + { + "epoch": 0.9972560729561779, + "grad_norm": 0.6686378121376038, + "learning_rate": 6.440896778912783e-05, + "loss": 2.5076, + "step": 12357 + }, + { + "epoch": 0.9973367766927609, + "grad_norm": 0.7194764018058777, + "learning_rate": 6.439421505921576e-05, + "loss": 2.4958, + "step": 12358 + }, + { + "epoch": 0.9974174804293439, + "grad_norm": 0.662467360496521, + "learning_rate": 6.437946321669296e-05, + "loss": 2.5202, + "step": 12359 + }, + { + "epoch": 0.9974981841659268, + "grad_norm": 0.7222515940666199, + "learning_rate": 6.436471226192703e-05, + "loss": 2.5058, + "step": 12360 + }, + { + "epoch": 0.9975788879025099, + "grad_norm": 0.6354855895042419, + "learning_rate": 6.434996219528562e-05, + "loss": 2.4849, + "step": 12361 + }, + { + "epoch": 0.9976595916390929, + "grad_norm": 0.7689539790153503, + "learning_rate": 6.433521301713636e-05, + "loss": 2.4959, + "step": 12362 + }, + { + "epoch": 0.9977402953756759, + "grad_norm": 0.6894338130950928, + "learning_rate": 6.43204647278468e-05, + "loss": 2.5098, + "step": 12363 + }, + { + "epoch": 0.9978209991122589, + "grad_norm": 0.7694165110588074, + "learning_rate": 6.430571732778451e-05, + "loss": 2.513, + "step": 12364 + }, + { + "epoch": 0.9979017028488419, + "grad_norm": 0.6512044668197632, + "learning_rate": 6.42909708173171e-05, + "loss": 2.4785, + "step": 12365 + }, + { + "epoch": 0.9979824065854249, + "grad_norm": 0.6605672836303711, + "learning_rate": 6.427622519681201e-05, + "loss": 2.4804, + "step": 12366 + }, + { + "epoch": 0.9980631103220079, + "grad_norm": 0.7123624086380005, + "learning_rate": 6.426148046663677e-05, + "loss": 2.4854, + "step": 12367 + }, + { + "epoch": 0.9981438140585909, + "grad_norm": 0.662645697593689, + "learning_rate": 6.424673662715886e-05, + "loss": 2.5314, + "step": 12368 + }, + { + "epoch": 0.998224517795174, + "grad_norm": 0.6482149362564087, + "learning_rate": 6.423199367874573e-05, + "loss": 2.4492, + "step": 12369 + }, + { + "epoch": 0.9983052215317569, + "grad_norm": 0.6545752286911011, + "learning_rate": 6.421725162176482e-05, + "loss": 2.5042, + "step": 12370 + }, + { + "epoch": 0.9983859252683399, + "grad_norm": 0.6698874235153198, + "learning_rate": 6.420251045658353e-05, + "loss": 2.4523, + "step": 12371 + }, + { + "epoch": 0.9984666290049229, + "grad_norm": 0.6961477398872375, + "learning_rate": 6.418777018356929e-05, + "loss": 2.556, + "step": 12372 + }, + { + "epoch": 0.998547332741506, + "grad_norm": 0.67090904712677, + "learning_rate": 6.41730308030894e-05, + "loss": 2.5237, + "step": 12373 + }, + { + "epoch": 0.998628036478089, + "grad_norm": 0.6828685402870178, + "learning_rate": 6.415829231551124e-05, + "loss": 2.453, + "step": 12374 + }, + { + "epoch": 0.9987087402146719, + "grad_norm": 0.6699565649032593, + "learning_rate": 6.414355472120213e-05, + "loss": 2.4632, + "step": 12375 + }, + { + "epoch": 0.9987894439512549, + "grad_norm": 0.6918730735778809, + "learning_rate": 6.412881802052936e-05, + "loss": 2.4532, + "step": 12376 + }, + { + "epoch": 0.998870147687838, + "grad_norm": 0.7222442030906677, + "learning_rate": 6.411408221386021e-05, + "loss": 2.5113, + "step": 12377 + }, + { + "epoch": 0.998950851424421, + "grad_norm": 0.7479627132415771, + "learning_rate": 6.409934730156195e-05, + "loss": 2.4857, + "step": 12378 + }, + { + "epoch": 0.999031555161004, + "grad_norm": 0.6552882194519043, + "learning_rate": 6.40846132840018e-05, + "loss": 2.4816, + "step": 12379 + }, + { + "epoch": 0.9991122588975869, + "grad_norm": 0.5990073084831238, + "learning_rate": 6.406988016154694e-05, + "loss": 2.4753, + "step": 12380 + }, + { + "epoch": 0.99919296263417, + "grad_norm": 0.6671901941299438, + "learning_rate": 6.405514793456465e-05, + "loss": 2.5298, + "step": 12381 + }, + { + "epoch": 0.999273666370753, + "grad_norm": 0.6630427241325378, + "learning_rate": 6.4040416603422e-05, + "loss": 2.485, + "step": 12382 + }, + { + "epoch": 0.999354370107336, + "grad_norm": 0.6873636841773987, + "learning_rate": 6.402568616848614e-05, + "loss": 2.4902, + "step": 12383 + }, + { + "epoch": 0.9994350738439189, + "grad_norm": 0.6912413239479065, + "learning_rate": 6.401095663012424e-05, + "loss": 2.5339, + "step": 12384 + }, + { + "epoch": 0.999515777580502, + "grad_norm": 0.6491912603378296, + "learning_rate": 6.39962279887034e-05, + "loss": 2.5367, + "step": 12385 + }, + { + "epoch": 0.999596481317085, + "grad_norm": 0.6668288111686707, + "learning_rate": 6.398150024459065e-05, + "loss": 2.5294, + "step": 12386 + }, + { + "epoch": 0.999677185053668, + "grad_norm": 0.6603856086730957, + "learning_rate": 6.396677339815306e-05, + "loss": 2.4378, + "step": 12387 + }, + { + "epoch": 0.999757888790251, + "grad_norm": 0.6461218595504761, + "learning_rate": 6.395204744975772e-05, + "loss": 2.4835, + "step": 12388 + }, + { + "epoch": 0.999838592526834, + "grad_norm": 0.6621688604354858, + "learning_rate": 6.39373223997715e-05, + "loss": 2.4834, + "step": 12389 + }, + { + "epoch": 0.999919296263417, + "grad_norm": 0.6758724451065063, + "learning_rate": 6.392259824856153e-05, + "loss": 2.4549, + "step": 12390 + }, + { + "epoch": 1.0, + "grad_norm": 1.1304112672805786, + "learning_rate": 6.390787499649473e-05, + "loss": 2.5547, + "step": 12391 + }, + { + "epoch": 1.000080703736583, + "grad_norm": 0.6919478178024292, + "learning_rate": 6.389315264393801e-05, + "loss": 2.47, + "step": 12392 + }, + { + "epoch": 1.000161407473166, + "grad_norm": 0.6916815638542175, + "learning_rate": 6.38784311912583e-05, + "loss": 2.4636, + "step": 12393 + }, + { + "epoch": 1.000242111209749, + "grad_norm": 0.6627040505409241, + "learning_rate": 6.386371063882252e-05, + "loss": 2.5094, + "step": 12394 + }, + { + "epoch": 1.000322814946332, + "grad_norm": 0.6408648490905762, + "learning_rate": 6.384899098699754e-05, + "loss": 2.426, + "step": 12395 + }, + { + "epoch": 1.000403518682915, + "grad_norm": 0.70432448387146, + "learning_rate": 6.38342722361502e-05, + "loss": 2.4861, + "step": 12396 + }, + { + "epoch": 1.000484222419498, + "grad_norm": 0.7115964889526367, + "learning_rate": 6.381955438664735e-05, + "loss": 2.4824, + "step": 12397 + }, + { + "epoch": 1.000564926156081, + "grad_norm": 0.6547040939331055, + "learning_rate": 6.380483743885574e-05, + "loss": 2.488, + "step": 12398 + }, + { + "epoch": 1.000645629892664, + "grad_norm": 0.6916625499725342, + "learning_rate": 6.379012139314223e-05, + "loss": 2.4864, + "step": 12399 + }, + { + "epoch": 1.0007263336292471, + "grad_norm": 0.6311133503913879, + "learning_rate": 6.377540624987352e-05, + "loss": 2.4672, + "step": 12400 + }, + { + "epoch": 1.00080703736583, + "grad_norm": 0.7115580439567566, + "learning_rate": 6.376069200941642e-05, + "loss": 2.4359, + "step": 12401 + }, + { + "epoch": 1.000887741102413, + "grad_norm": 0.6734051704406738, + "learning_rate": 6.374597867213756e-05, + "loss": 2.4896, + "step": 12402 + }, + { + "epoch": 1.000968444838996, + "grad_norm": 0.6910715699195862, + "learning_rate": 6.373126623840368e-05, + "loss": 2.4502, + "step": 12403 + }, + { + "epoch": 1.001049148575579, + "grad_norm": 0.6807514429092407, + "learning_rate": 6.37165547085815e-05, + "loss": 2.4791, + "step": 12404 + }, + { + "epoch": 1.0011298523121621, + "grad_norm": 0.679350733757019, + "learning_rate": 6.370184408303759e-05, + "loss": 2.4758, + "step": 12405 + }, + { + "epoch": 1.001210556048745, + "grad_norm": 0.6516300439834595, + "learning_rate": 6.36871343621386e-05, + "loss": 2.4338, + "step": 12406 + }, + { + "epoch": 1.001291259785328, + "grad_norm": 0.7033620476722717, + "learning_rate": 6.367242554625119e-05, + "loss": 2.429, + "step": 12407 + }, + { + "epoch": 1.0013719635219112, + "grad_norm": 0.6750274896621704, + "learning_rate": 6.365771763574186e-05, + "loss": 2.4283, + "step": 12408 + }, + { + "epoch": 1.001452667258494, + "grad_norm": 0.7188721895217896, + "learning_rate": 6.364301063097722e-05, + "loss": 2.4509, + "step": 12409 + }, + { + "epoch": 1.001533370995077, + "grad_norm": 0.6936308741569519, + "learning_rate": 6.362830453232379e-05, + "loss": 2.4469, + "step": 12410 + }, + { + "epoch": 1.00161407473166, + "grad_norm": 0.673060953617096, + "learning_rate": 6.361359934014808e-05, + "loss": 2.4444, + "step": 12411 + }, + { + "epoch": 1.001694778468243, + "grad_norm": 0.7465113997459412, + "learning_rate": 6.359889505481658e-05, + "loss": 2.4376, + "step": 12412 + }, + { + "epoch": 1.0017754822048262, + "grad_norm": 0.7180366516113281, + "learning_rate": 6.358419167669582e-05, + "loss": 2.4223, + "step": 12413 + }, + { + "epoch": 1.001856185941409, + "grad_norm": 0.6582302451133728, + "learning_rate": 6.356948920615214e-05, + "loss": 2.4723, + "step": 12414 + }, + { + "epoch": 1.001936889677992, + "grad_norm": 0.6452654600143433, + "learning_rate": 6.3554787643552e-05, + "loss": 2.4609, + "step": 12415 + }, + { + "epoch": 1.0020175934145752, + "grad_norm": 0.7170321345329285, + "learning_rate": 6.354008698926185e-05, + "loss": 2.5377, + "step": 12416 + }, + { + "epoch": 1.002098297151158, + "grad_norm": 0.6483680605888367, + "learning_rate": 6.352538724364809e-05, + "loss": 2.4349, + "step": 12417 + }, + { + "epoch": 1.0021790008877411, + "grad_norm": 0.6567494869232178, + "learning_rate": 6.351068840707697e-05, + "loss": 2.4421, + "step": 12418 + }, + { + "epoch": 1.002259704624324, + "grad_norm": 0.7498565912246704, + "learning_rate": 6.349599047991488e-05, + "loss": 2.4212, + "step": 12419 + }, + { + "epoch": 1.002340408360907, + "grad_norm": 0.6894906759262085, + "learning_rate": 6.348129346252816e-05, + "loss": 2.4356, + "step": 12420 + }, + { + "epoch": 1.0024211120974902, + "grad_norm": 0.657361626625061, + "learning_rate": 6.346659735528304e-05, + "loss": 2.4164, + "step": 12421 + }, + { + "epoch": 1.002501815834073, + "grad_norm": 0.6369211673736572, + "learning_rate": 6.345190215854581e-05, + "loss": 2.4229, + "step": 12422 + }, + { + "epoch": 1.0025825195706561, + "grad_norm": 0.7033721208572388, + "learning_rate": 6.343720787268277e-05, + "loss": 2.5052, + "step": 12423 + }, + { + "epoch": 1.0026632233072392, + "grad_norm": 0.7125518918037415, + "learning_rate": 6.342251449806003e-05, + "loss": 2.514, + "step": 12424 + }, + { + "epoch": 1.002743927043822, + "grad_norm": 0.7355595827102661, + "learning_rate": 6.340782203504385e-05, + "loss": 2.4459, + "step": 12425 + }, + { + "epoch": 1.0028246307804052, + "grad_norm": 0.7244594693183899, + "learning_rate": 6.339313048400042e-05, + "loss": 2.452, + "step": 12426 + }, + { + "epoch": 1.002905334516988, + "grad_norm": 0.7112728357315063, + "learning_rate": 6.337843984529585e-05, + "loss": 2.4951, + "step": 12427 + }, + { + "epoch": 1.0029860382535711, + "grad_norm": 0.7235615849494934, + "learning_rate": 6.336375011929628e-05, + "loss": 2.4697, + "step": 12428 + }, + { + "epoch": 1.0030667419901542, + "grad_norm": 0.653865396976471, + "learning_rate": 6.334906130636784e-05, + "loss": 2.4804, + "step": 12429 + }, + { + "epoch": 1.003147445726737, + "grad_norm": 0.7845149636268616, + "learning_rate": 6.33343734068766e-05, + "loss": 2.5415, + "step": 12430 + }, + { + "epoch": 1.0032281494633202, + "grad_norm": 0.7356342077255249, + "learning_rate": 6.33196864211886e-05, + "loss": 2.5321, + "step": 12431 + }, + { + "epoch": 1.0033088531999033, + "grad_norm": 0.6828265190124512, + "learning_rate": 6.330500034966991e-05, + "loss": 2.3849, + "step": 12432 + }, + { + "epoch": 1.0033895569364861, + "grad_norm": 0.7226579189300537, + "learning_rate": 6.329031519268658e-05, + "loss": 2.512, + "step": 12433 + }, + { + "epoch": 1.0034702606730692, + "grad_norm": 0.6490235924720764, + "learning_rate": 6.327563095060449e-05, + "loss": 2.487, + "step": 12434 + }, + { + "epoch": 1.003550964409652, + "grad_norm": 0.6889309883117676, + "learning_rate": 6.326094762378969e-05, + "loss": 2.4677, + "step": 12435 + }, + { + "epoch": 1.0036316681462352, + "grad_norm": 0.695854127407074, + "learning_rate": 6.324626521260815e-05, + "loss": 2.4362, + "step": 12436 + }, + { + "epoch": 1.0037123718828183, + "grad_norm": 0.7045256495475769, + "learning_rate": 6.32315837174257e-05, + "loss": 2.4307, + "step": 12437 + }, + { + "epoch": 1.0037930756194011, + "grad_norm": 0.662604570388794, + "learning_rate": 6.321690313860833e-05, + "loss": 2.4271, + "step": 12438 + }, + { + "epoch": 1.0038737793559842, + "grad_norm": 0.7682240009307861, + "learning_rate": 6.320222347652191e-05, + "loss": 2.4617, + "step": 12439 + }, + { + "epoch": 1.0039544830925673, + "grad_norm": 0.6599584817886353, + "learning_rate": 6.318754473153221e-05, + "loss": 2.405, + "step": 12440 + }, + { + "epoch": 1.0040351868291502, + "grad_norm": 0.7423116564750671, + "learning_rate": 6.317286690400515e-05, + "loss": 2.5496, + "step": 12441 + }, + { + "epoch": 1.0041158905657332, + "grad_norm": 0.6928953528404236, + "learning_rate": 6.315818999430654e-05, + "loss": 2.4265, + "step": 12442 + }, + { + "epoch": 1.0041965943023161, + "grad_norm": 0.699990451335907, + "learning_rate": 6.314351400280211e-05, + "loss": 2.4747, + "step": 12443 + }, + { + "epoch": 1.0042772980388992, + "grad_norm": 0.673384964466095, + "learning_rate": 6.312883892985765e-05, + "loss": 2.4891, + "step": 12444 + }, + { + "epoch": 1.0043580017754823, + "grad_norm": 0.6668596863746643, + "learning_rate": 6.311416477583893e-05, + "loss": 2.4312, + "step": 12445 + }, + { + "epoch": 1.0044387055120652, + "grad_norm": 0.6931218504905701, + "learning_rate": 6.309949154111163e-05, + "loss": 2.4907, + "step": 12446 + }, + { + "epoch": 1.0045194092486482, + "grad_norm": 0.687683641910553, + "learning_rate": 6.308481922604146e-05, + "loss": 2.4302, + "step": 12447 + }, + { + "epoch": 1.004600112985231, + "grad_norm": 0.6887302398681641, + "learning_rate": 6.30701478309941e-05, + "loss": 2.4749, + "step": 12448 + }, + { + "epoch": 1.0046808167218142, + "grad_norm": 0.6713404655456543, + "learning_rate": 6.305547735633522e-05, + "loss": 2.5046, + "step": 12449 + }, + { + "epoch": 1.0047615204583973, + "grad_norm": 0.7147336006164551, + "learning_rate": 6.304080780243038e-05, + "loss": 2.4578, + "step": 12450 + }, + { + "epoch": 1.0048422241949801, + "grad_norm": 0.87425297498703, + "learning_rate": 6.30261391696452e-05, + "loss": 2.4487, + "step": 12451 + }, + { + "epoch": 1.0049229279315632, + "grad_norm": 0.6641440987586975, + "learning_rate": 6.301147145834534e-05, + "loss": 2.4657, + "step": 12452 + }, + { + "epoch": 1.0050036316681463, + "grad_norm": 0.7311998009681702, + "learning_rate": 6.299680466889626e-05, + "loss": 2.4784, + "step": 12453 + }, + { + "epoch": 1.0050843354047292, + "grad_norm": 0.6722697615623474, + "learning_rate": 6.298213880166354e-05, + "loss": 2.4653, + "step": 12454 + }, + { + "epoch": 1.0051650391413123, + "grad_norm": 0.6886328458786011, + "learning_rate": 6.29674738570127e-05, + "loss": 2.3949, + "step": 12455 + }, + { + "epoch": 1.0052457428778951, + "grad_norm": 0.684688925743103, + "learning_rate": 6.295280983530921e-05, + "loss": 2.4334, + "step": 12456 + }, + { + "epoch": 1.0053264466144782, + "grad_norm": 0.7436798214912415, + "learning_rate": 6.293814673691853e-05, + "loss": 2.5316, + "step": 12457 + }, + { + "epoch": 1.0054071503510613, + "grad_norm": 0.7401304244995117, + "learning_rate": 6.292348456220615e-05, + "loss": 2.4556, + "step": 12458 + }, + { + "epoch": 1.0054878540876442, + "grad_norm": 0.7330329418182373, + "learning_rate": 6.290882331153742e-05, + "loss": 2.4321, + "step": 12459 + }, + { + "epoch": 1.0055685578242273, + "grad_norm": 0.8005052208900452, + "learning_rate": 6.289416298527776e-05, + "loss": 2.415, + "step": 12460 + }, + { + "epoch": 1.0056492615608104, + "grad_norm": 0.8047310709953308, + "learning_rate": 6.28795035837926e-05, + "loss": 2.4144, + "step": 12461 + }, + { + "epoch": 1.0057299652973932, + "grad_norm": 0.7384032011032104, + "learning_rate": 6.28648451074472e-05, + "loss": 2.5237, + "step": 12462 + }, + { + "epoch": 1.0058106690339763, + "grad_norm": 0.7240314483642578, + "learning_rate": 6.285018755660695e-05, + "loss": 2.4894, + "step": 12463 + }, + { + "epoch": 1.0058913727705592, + "grad_norm": 0.6901080012321472, + "learning_rate": 6.283553093163712e-05, + "loss": 2.4244, + "step": 12464 + }, + { + "epoch": 1.0059720765071423, + "grad_norm": 0.6572268605232239, + "learning_rate": 6.282087523290304e-05, + "loss": 2.456, + "step": 12465 + }, + { + "epoch": 1.0060527802437254, + "grad_norm": 0.7207481861114502, + "learning_rate": 6.28062204607699e-05, + "loss": 2.4153, + "step": 12466 + }, + { + "epoch": 1.0061334839803082, + "grad_norm": 0.6901980042457581, + "learning_rate": 6.279156661560299e-05, + "loss": 2.4776, + "step": 12467 + }, + { + "epoch": 1.0062141877168913, + "grad_norm": 0.7003545761108398, + "learning_rate": 6.277691369776752e-05, + "loss": 2.4206, + "step": 12468 + }, + { + "epoch": 1.0062948914534744, + "grad_norm": 0.6978366374969482, + "learning_rate": 6.276226170762865e-05, + "loss": 2.3866, + "step": 12469 + }, + { + "epoch": 1.0063755951900573, + "grad_norm": 0.6763097643852234, + "learning_rate": 6.274761064555154e-05, + "loss": 2.5439, + "step": 12470 + }, + { + "epoch": 1.0064562989266403, + "grad_norm": 0.7146836519241333, + "learning_rate": 6.273296051190139e-05, + "loss": 2.5486, + "step": 12471 + }, + { + "epoch": 1.0065370026632232, + "grad_norm": 0.7448136806488037, + "learning_rate": 6.271831130704326e-05, + "loss": 2.4539, + "step": 12472 + }, + { + "epoch": 1.0066177063998063, + "grad_norm": 0.6918472051620483, + "learning_rate": 6.270366303134226e-05, + "loss": 2.4756, + "step": 12473 + }, + { + "epoch": 1.0066984101363894, + "grad_norm": 0.7067514657974243, + "learning_rate": 6.26890156851635e-05, + "loss": 2.4925, + "step": 12474 + }, + { + "epoch": 1.0067791138729723, + "grad_norm": 0.6517517566680908, + "learning_rate": 6.267436926887197e-05, + "loss": 2.4339, + "step": 12475 + }, + { + "epoch": 1.0068598176095553, + "grad_norm": 0.673367977142334, + "learning_rate": 6.265972378283274e-05, + "loss": 2.416, + "step": 12476 + }, + { + "epoch": 1.0069405213461384, + "grad_norm": 0.7190212607383728, + "learning_rate": 6.26450792274108e-05, + "loss": 2.4822, + "step": 12477 + }, + { + "epoch": 1.0070212250827213, + "grad_norm": 0.7568029165267944, + "learning_rate": 6.263043560297112e-05, + "loss": 2.4607, + "step": 12478 + }, + { + "epoch": 1.0071019288193044, + "grad_norm": 0.6860609650611877, + "learning_rate": 6.261579290987866e-05, + "loss": 2.4429, + "step": 12479 + }, + { + "epoch": 1.0071826325558872, + "grad_norm": 0.7066059112548828, + "learning_rate": 6.260115114849839e-05, + "loss": 2.5504, + "step": 12480 + }, + { + "epoch": 1.0072633362924703, + "grad_norm": 0.6857946515083313, + "learning_rate": 6.25865103191952e-05, + "loss": 2.4776, + "step": 12481 + }, + { + "epoch": 1.0073440400290534, + "grad_norm": 0.6879859566688538, + "learning_rate": 6.257187042233396e-05, + "loss": 2.3651, + "step": 12482 + }, + { + "epoch": 1.0074247437656363, + "grad_norm": 0.6900867223739624, + "learning_rate": 6.255723145827954e-05, + "loss": 2.4644, + "step": 12483 + }, + { + "epoch": 1.0075054475022194, + "grad_norm": 0.7144716382026672, + "learning_rate": 6.254259342739683e-05, + "loss": 2.4219, + "step": 12484 + }, + { + "epoch": 1.0075861512388025, + "grad_norm": 0.674619197845459, + "learning_rate": 6.252795633005056e-05, + "loss": 2.5038, + "step": 12485 + }, + { + "epoch": 1.0076668549753853, + "grad_norm": 0.7036965489387512, + "learning_rate": 6.251332016660558e-05, + "loss": 2.4784, + "step": 12486 + }, + { + "epoch": 1.0077475587119684, + "grad_norm": 0.7046369910240173, + "learning_rate": 6.249868493742668e-05, + "loss": 2.514, + "step": 12487 + }, + { + "epoch": 1.0078282624485513, + "grad_norm": 0.6933087110519409, + "learning_rate": 6.248405064287854e-05, + "loss": 2.4855, + "step": 12488 + }, + { + "epoch": 1.0079089661851344, + "grad_norm": 0.7210546731948853, + "learning_rate": 6.246941728332594e-05, + "loss": 2.5101, + "step": 12489 + }, + { + "epoch": 1.0079896699217175, + "grad_norm": 0.6738288402557373, + "learning_rate": 6.245478485913361e-05, + "loss": 2.4891, + "step": 12490 + }, + { + "epoch": 1.0080703736583003, + "grad_norm": 0.7023273706436157, + "learning_rate": 6.244015337066611e-05, + "loss": 2.4977, + "step": 12491 + }, + { + "epoch": 1.0081510773948834, + "grad_norm": 0.6761355996131897, + "learning_rate": 6.24255228182882e-05, + "loss": 2.4948, + "step": 12492 + }, + { + "epoch": 1.0082317811314665, + "grad_norm": 0.6427976489067078, + "learning_rate": 6.241089320236448e-05, + "loss": 2.466, + "step": 12493 + }, + { + "epoch": 1.0083124848680494, + "grad_norm": 0.6907719969749451, + "learning_rate": 6.23962645232596e-05, + "loss": 2.437, + "step": 12494 + }, + { + "epoch": 1.0083931886046325, + "grad_norm": 0.709032416343689, + "learning_rate": 6.238163678133807e-05, + "loss": 2.4298, + "step": 12495 + }, + { + "epoch": 1.0084738923412153, + "grad_norm": 0.7395734786987305, + "learning_rate": 6.236700997696448e-05, + "loss": 2.4502, + "step": 12496 + }, + { + "epoch": 1.0085545960777984, + "grad_norm": 0.6535435914993286, + "learning_rate": 6.23523841105034e-05, + "loss": 2.4494, + "step": 12497 + }, + { + "epoch": 1.0086352998143815, + "grad_norm": 0.6597761511802673, + "learning_rate": 6.23377591823193e-05, + "loss": 2.4377, + "step": 12498 + }, + { + "epoch": 1.0087160035509644, + "grad_norm": 0.6610515713691711, + "learning_rate": 6.232313519277668e-05, + "loss": 2.4328, + "step": 12499 + }, + { + "epoch": 1.0087967072875474, + "grad_norm": 0.6785424947738647, + "learning_rate": 6.230851214224009e-05, + "loss": 2.457, + "step": 12500 + }, + { + "epoch": 1.0088774110241303, + "grad_norm": 0.6939748525619507, + "learning_rate": 6.229389003107383e-05, + "loss": 2.383, + "step": 12501 + }, + { + "epoch": 1.0089581147607134, + "grad_norm": 0.7592256665229797, + "learning_rate": 6.22792688596424e-05, + "loss": 2.4665, + "step": 12502 + }, + { + "epoch": 1.0090388184972965, + "grad_norm": 0.6751298308372498, + "learning_rate": 6.226464862831023e-05, + "loss": 2.491, + "step": 12503 + }, + { + "epoch": 1.0091195222338794, + "grad_norm": 0.682771623134613, + "learning_rate": 6.225002933744164e-05, + "loss": 2.4275, + "step": 12504 + }, + { + "epoch": 1.0092002259704624, + "grad_norm": 0.7314651608467102, + "learning_rate": 6.223541098740098e-05, + "loss": 2.4489, + "step": 12505 + }, + { + "epoch": 1.0092809297070455, + "grad_norm": 0.7132120132446289, + "learning_rate": 6.222079357855261e-05, + "loss": 2.4819, + "step": 12506 + }, + { + "epoch": 1.0093616334436284, + "grad_norm": 0.6571424007415771, + "learning_rate": 6.220617711126082e-05, + "loss": 2.455, + "step": 12507 + }, + { + "epoch": 1.0094423371802115, + "grad_norm": 0.7675301432609558, + "learning_rate": 6.21915615858899e-05, + "loss": 2.5282, + "step": 12508 + }, + { + "epoch": 1.0095230409167943, + "grad_norm": 0.6907868385314941, + "learning_rate": 6.217694700280408e-05, + "loss": 2.4639, + "step": 12509 + }, + { + "epoch": 1.0096037446533774, + "grad_norm": 0.7223815321922302, + "learning_rate": 6.216233336236764e-05, + "loss": 2.4682, + "step": 12510 + }, + { + "epoch": 1.0096844483899605, + "grad_norm": 0.7325109839439392, + "learning_rate": 6.214772066494474e-05, + "loss": 2.4591, + "step": 12511 + }, + { + "epoch": 1.0097651521265434, + "grad_norm": 0.6589400768280029, + "learning_rate": 6.213310891089957e-05, + "loss": 2.4883, + "step": 12512 + }, + { + "epoch": 1.0098458558631265, + "grad_norm": 0.6692262291908264, + "learning_rate": 6.211849810059635e-05, + "loss": 2.4635, + "step": 12513 + }, + { + "epoch": 1.0099265595997096, + "grad_norm": 0.7352520823478699, + "learning_rate": 6.210388823439914e-05, + "loss": 2.4743, + "step": 12514 + }, + { + "epoch": 1.0100072633362924, + "grad_norm": 0.6631996035575867, + "learning_rate": 6.208927931267212e-05, + "loss": 2.4848, + "step": 12515 + }, + { + "epoch": 1.0100879670728755, + "grad_norm": 0.6985767483711243, + "learning_rate": 6.207467133577937e-05, + "loss": 2.5044, + "step": 12516 + }, + { + "epoch": 1.0101686708094584, + "grad_norm": 0.665635347366333, + "learning_rate": 6.206006430408494e-05, + "loss": 2.4718, + "step": 12517 + }, + { + "epoch": 1.0102493745460415, + "grad_norm": 0.6859133243560791, + "learning_rate": 6.204545821795286e-05, + "loss": 2.4702, + "step": 12518 + }, + { + "epoch": 1.0103300782826246, + "grad_norm": 0.6578841805458069, + "learning_rate": 6.203085307774722e-05, + "loss": 2.4614, + "step": 12519 + }, + { + "epoch": 1.0104107820192074, + "grad_norm": 0.717523455619812, + "learning_rate": 6.201624888383194e-05, + "loss": 2.4412, + "step": 12520 + }, + { + "epoch": 1.0104914857557905, + "grad_norm": 0.7333831787109375, + "learning_rate": 6.200164563657103e-05, + "loss": 2.4157, + "step": 12521 + }, + { + "epoch": 1.0105721894923736, + "grad_norm": 0.6968720555305481, + "learning_rate": 6.198704333632845e-05, + "loss": 2.4556, + "step": 12522 + }, + { + "epoch": 1.0106528932289565, + "grad_norm": 0.6533070802688599, + "learning_rate": 6.19724419834681e-05, + "loss": 2.43, + "step": 12523 + }, + { + "epoch": 1.0107335969655395, + "grad_norm": 0.7341824769973755, + "learning_rate": 6.195784157835391e-05, + "loss": 2.5326, + "step": 12524 + }, + { + "epoch": 1.0108143007021224, + "grad_norm": 0.752912163734436, + "learning_rate": 6.194324212134974e-05, + "loss": 2.4282, + "step": 12525 + }, + { + "epoch": 1.0108950044387055, + "grad_norm": 0.6538611650466919, + "learning_rate": 6.192864361281951e-05, + "loss": 2.4135, + "step": 12526 + }, + { + "epoch": 1.0109757081752886, + "grad_norm": 0.6931454539299011, + "learning_rate": 6.191404605312695e-05, + "loss": 2.5097, + "step": 12527 + }, + { + "epoch": 1.0110564119118715, + "grad_norm": 0.6317688822746277, + "learning_rate": 6.18994494426359e-05, + "loss": 2.4977, + "step": 12528 + }, + { + "epoch": 1.0111371156484545, + "grad_norm": 0.6793715953826904, + "learning_rate": 6.188485378171024e-05, + "loss": 2.4619, + "step": 12529 + }, + { + "epoch": 1.0112178193850376, + "grad_norm": 0.6696654558181763, + "learning_rate": 6.187025907071361e-05, + "loss": 2.4658, + "step": 12530 + }, + { + "epoch": 1.0112985231216205, + "grad_norm": 0.6788807511329651, + "learning_rate": 6.185566531000979e-05, + "loss": 2.4793, + "step": 12531 + }, + { + "epoch": 1.0113792268582036, + "grad_norm": 0.6933971643447876, + "learning_rate": 6.184107249996253e-05, + "loss": 2.4772, + "step": 12532 + }, + { + "epoch": 1.0114599305947864, + "grad_norm": 0.6866000294685364, + "learning_rate": 6.182648064093546e-05, + "loss": 2.428, + "step": 12533 + }, + { + "epoch": 1.0115406343313695, + "grad_norm": 0.7013841271400452, + "learning_rate": 6.181188973329229e-05, + "loss": 2.5273, + "step": 12534 + }, + { + "epoch": 1.0116213380679526, + "grad_norm": 0.6569108963012695, + "learning_rate": 6.179729977739669e-05, + "loss": 2.4125, + "step": 12535 + }, + { + "epoch": 1.0117020418045355, + "grad_norm": 0.7503486275672913, + "learning_rate": 6.17827107736122e-05, + "loss": 2.4385, + "step": 12536 + }, + { + "epoch": 1.0117827455411186, + "grad_norm": 0.6757314205169678, + "learning_rate": 6.176812272230246e-05, + "loss": 2.4364, + "step": 12537 + }, + { + "epoch": 1.0118634492777017, + "grad_norm": 0.6567254662513733, + "learning_rate": 6.175353562383106e-05, + "loss": 2.4992, + "step": 12538 + }, + { + "epoch": 1.0119441530142845, + "grad_norm": 0.7564988732337952, + "learning_rate": 6.17389494785615e-05, + "loss": 2.4777, + "step": 12539 + }, + { + "epoch": 1.0120248567508676, + "grad_norm": 0.6972391605377197, + "learning_rate": 6.172436428685735e-05, + "loss": 2.5041, + "step": 12540 + }, + { + "epoch": 1.0121055604874505, + "grad_norm": 0.6861580610275269, + "learning_rate": 6.170978004908209e-05, + "loss": 2.4684, + "step": 12541 + }, + { + "epoch": 1.0121862642240336, + "grad_norm": 0.6621903777122498, + "learning_rate": 6.169519676559921e-05, + "loss": 2.4614, + "step": 12542 + }, + { + "epoch": 1.0122669679606167, + "grad_norm": 0.6879795789718628, + "learning_rate": 6.168061443677215e-05, + "loss": 2.4765, + "step": 12543 + }, + { + "epoch": 1.0123476716971995, + "grad_norm": 0.6361081004142761, + "learning_rate": 6.166603306296434e-05, + "loss": 2.4792, + "step": 12544 + }, + { + "epoch": 1.0124283754337826, + "grad_norm": 0.6660729050636292, + "learning_rate": 6.165145264453924e-05, + "loss": 2.489, + "step": 12545 + }, + { + "epoch": 1.0125090791703655, + "grad_norm": 0.6900594234466553, + "learning_rate": 6.163687318186015e-05, + "loss": 2.4543, + "step": 12546 + }, + { + "epoch": 1.0125897829069486, + "grad_norm": 0.7195869088172913, + "learning_rate": 6.162229467529046e-05, + "loss": 2.4137, + "step": 12547 + }, + { + "epoch": 1.0126704866435317, + "grad_norm": 0.7030326128005981, + "learning_rate": 6.16077171251935e-05, + "loss": 2.4657, + "step": 12548 + }, + { + "epoch": 1.0127511903801145, + "grad_norm": 0.6712052822113037, + "learning_rate": 6.15931405319326e-05, + "loss": 2.4718, + "step": 12549 + }, + { + "epoch": 1.0128318941166976, + "grad_norm": 0.7471029162406921, + "learning_rate": 6.157856489587102e-05, + "loss": 2.4705, + "step": 12550 + }, + { + "epoch": 1.0129125978532807, + "grad_norm": 0.6813762187957764, + "learning_rate": 6.15639902173721e-05, + "loss": 2.4479, + "step": 12551 + }, + { + "epoch": 1.0129933015898636, + "grad_norm": 0.6657249927520752, + "learning_rate": 6.154941649679894e-05, + "loss": 2.4911, + "step": 12552 + }, + { + "epoch": 1.0130740053264466, + "grad_norm": 0.6700132489204407, + "learning_rate": 6.153484373451483e-05, + "loss": 2.4962, + "step": 12553 + }, + { + "epoch": 1.0131547090630295, + "grad_norm": 0.7058695554733276, + "learning_rate": 6.152027193088302e-05, + "loss": 2.3935, + "step": 12554 + }, + { + "epoch": 1.0132354127996126, + "grad_norm": 0.7390396595001221, + "learning_rate": 6.150570108626658e-05, + "loss": 2.4454, + "step": 12555 + }, + { + "epoch": 1.0133161165361957, + "grad_norm": 0.7251414060592651, + "learning_rate": 6.149113120102869e-05, + "loss": 2.4146, + "step": 12556 + }, + { + "epoch": 1.0133968202727786, + "grad_norm": 0.8262537717819214, + "learning_rate": 6.14765622755325e-05, + "loss": 2.4638, + "step": 12557 + }, + { + "epoch": 1.0134775240093616, + "grad_norm": 0.7184064984321594, + "learning_rate": 6.146199431014106e-05, + "loss": 2.3958, + "step": 12558 + }, + { + "epoch": 1.0135582277459447, + "grad_norm": 0.7544865012168884, + "learning_rate": 6.144742730521746e-05, + "loss": 2.4662, + "step": 12559 + }, + { + "epoch": 1.0136389314825276, + "grad_norm": 0.6866207718849182, + "learning_rate": 6.143286126112475e-05, + "loss": 2.4951, + "step": 12560 + }, + { + "epoch": 1.0137196352191107, + "grad_norm": 0.6566087603569031, + "learning_rate": 6.1418296178226e-05, + "loss": 2.4002, + "step": 12561 + }, + { + "epoch": 1.0138003389556935, + "grad_norm": 0.6999008059501648, + "learning_rate": 6.140373205688411e-05, + "loss": 2.5306, + "step": 12562 + }, + { + "epoch": 1.0138810426922766, + "grad_norm": 0.6682353615760803, + "learning_rate": 6.138916889746212e-05, + "loss": 2.5565, + "step": 12563 + }, + { + "epoch": 1.0139617464288597, + "grad_norm": 0.7443362474441528, + "learning_rate": 6.137460670032298e-05, + "loss": 2.3958, + "step": 12564 + }, + { + "epoch": 1.0140424501654426, + "grad_norm": 0.6542403697967529, + "learning_rate": 6.136004546582958e-05, + "loss": 2.4394, + "step": 12565 + }, + { + "epoch": 1.0141231539020257, + "grad_norm": 0.6524317264556885, + "learning_rate": 6.134548519434488e-05, + "loss": 2.4979, + "step": 12566 + }, + { + "epoch": 1.0142038576386088, + "grad_norm": 0.6605600118637085, + "learning_rate": 6.133092588623174e-05, + "loss": 2.4827, + "step": 12567 + }, + { + "epoch": 1.0142845613751916, + "grad_norm": 0.7114397883415222, + "learning_rate": 6.1316367541853e-05, + "loss": 2.4799, + "step": 12568 + }, + { + "epoch": 1.0143652651117747, + "grad_norm": 0.6607296466827393, + "learning_rate": 6.130181016157148e-05, + "loss": 2.4991, + "step": 12569 + }, + { + "epoch": 1.0144459688483576, + "grad_norm": 0.6750844717025757, + "learning_rate": 6.128725374575005e-05, + "loss": 2.4451, + "step": 12570 + }, + { + "epoch": 1.0145266725849407, + "grad_norm": 0.6978901624679565, + "learning_rate": 6.127269829475141e-05, + "loss": 2.4608, + "step": 12571 + }, + { + "epoch": 1.0146073763215238, + "grad_norm": 0.676343560218811, + "learning_rate": 6.125814380893838e-05, + "loss": 2.4536, + "step": 12572 + }, + { + "epoch": 1.0146880800581066, + "grad_norm": 0.7082604765892029, + "learning_rate": 6.124359028867368e-05, + "loss": 2.45, + "step": 12573 + }, + { + "epoch": 1.0147687837946897, + "grad_norm": 0.7049853205680847, + "learning_rate": 6.122903773432003e-05, + "loss": 2.4378, + "step": 12574 + }, + { + "epoch": 1.0148494875312728, + "grad_norm": 0.6329593062400818, + "learning_rate": 6.121448614624009e-05, + "loss": 2.4386, + "step": 12575 + }, + { + "epoch": 1.0149301912678557, + "grad_norm": 0.7249468564987183, + "learning_rate": 6.119993552479655e-05, + "loss": 2.5191, + "step": 12576 + }, + { + "epoch": 1.0150108950044388, + "grad_norm": 0.7028193473815918, + "learning_rate": 6.118538587035206e-05, + "loss": 2.4376, + "step": 12577 + }, + { + "epoch": 1.0150915987410216, + "grad_norm": 0.697382926940918, + "learning_rate": 6.117083718326917e-05, + "loss": 2.4797, + "step": 12578 + }, + { + "epoch": 1.0151723024776047, + "grad_norm": 0.7386965155601501, + "learning_rate": 6.115628946391055e-05, + "loss": 2.4512, + "step": 12579 + }, + { + "epoch": 1.0152530062141878, + "grad_norm": 0.6614577174186707, + "learning_rate": 6.114174271263875e-05, + "loss": 2.4404, + "step": 12580 + }, + { + "epoch": 1.0153337099507707, + "grad_norm": 0.6927464604377747, + "learning_rate": 6.112719692981627e-05, + "loss": 2.47, + "step": 12581 + }, + { + "epoch": 1.0154144136873537, + "grad_norm": 0.7004262208938599, + "learning_rate": 6.111265211580566e-05, + "loss": 2.4212, + "step": 12582 + }, + { + "epoch": 1.0154951174239368, + "grad_norm": 0.71146559715271, + "learning_rate": 6.109810827096942e-05, + "loss": 2.4431, + "step": 12583 + }, + { + "epoch": 1.0155758211605197, + "grad_norm": 0.6857032775878906, + "learning_rate": 6.108356539567e-05, + "loss": 2.453, + "step": 12584 + }, + { + "epoch": 1.0156565248971028, + "grad_norm": 0.6976168155670166, + "learning_rate": 6.106902349026986e-05, + "loss": 2.4718, + "step": 12585 + }, + { + "epoch": 1.0157372286336857, + "grad_norm": 0.7158414125442505, + "learning_rate": 6.105448255513146e-05, + "loss": 2.425, + "step": 12586 + }, + { + "epoch": 1.0158179323702687, + "grad_norm": 0.6611737608909607, + "learning_rate": 6.103994259061714e-05, + "loss": 2.4563, + "step": 12587 + }, + { + "epoch": 1.0158986361068518, + "grad_norm": 0.7262980937957764, + "learning_rate": 6.102540359708926e-05, + "loss": 2.4538, + "step": 12588 + }, + { + "epoch": 1.0159793398434347, + "grad_norm": 0.7123451828956604, + "learning_rate": 6.10108655749102e-05, + "loss": 2.4677, + "step": 12589 + }, + { + "epoch": 1.0160600435800178, + "grad_norm": 0.7135589122772217, + "learning_rate": 6.099632852444235e-05, + "loss": 2.4312, + "step": 12590 + }, + { + "epoch": 1.0161407473166009, + "grad_norm": 0.6509461998939514, + "learning_rate": 6.09817924460479e-05, + "loss": 2.4716, + "step": 12591 + }, + { + "epoch": 1.0162214510531837, + "grad_norm": 0.8835915923118591, + "learning_rate": 6.096725734008919e-05, + "loss": 2.4817, + "step": 12592 + }, + { + "epoch": 1.0163021547897668, + "grad_norm": 0.7084136605262756, + "learning_rate": 6.095272320692846e-05, + "loss": 2.483, + "step": 12593 + }, + { + "epoch": 1.0163828585263497, + "grad_norm": 0.6866818070411682, + "learning_rate": 6.0938190046927934e-05, + "loss": 2.4838, + "step": 12594 + }, + { + "epoch": 1.0164635622629328, + "grad_norm": 0.7297510504722595, + "learning_rate": 6.0923657860449824e-05, + "loss": 2.4675, + "step": 12595 + }, + { + "epoch": 1.0165442659995159, + "grad_norm": 0.6735619306564331, + "learning_rate": 6.090912664785633e-05, + "loss": 2.444, + "step": 12596 + }, + { + "epoch": 1.0166249697360987, + "grad_norm": 0.7046451568603516, + "learning_rate": 6.0894596409509565e-05, + "loss": 2.4757, + "step": 12597 + }, + { + "epoch": 1.0167056734726818, + "grad_norm": 0.6646085977554321, + "learning_rate": 6.0880067145771656e-05, + "loss": 2.4772, + "step": 12598 + }, + { + "epoch": 1.0167863772092647, + "grad_norm": 0.7217094302177429, + "learning_rate": 6.086553885700478e-05, + "loss": 2.4589, + "step": 12599 + }, + { + "epoch": 1.0168670809458478, + "grad_norm": 0.647378146648407, + "learning_rate": 6.085101154357093e-05, + "loss": 2.4327, + "step": 12600 + }, + { + "epoch": 1.0169477846824309, + "grad_norm": 0.6907125115394592, + "learning_rate": 6.083648520583223e-05, + "loss": 2.467, + "step": 12601 + }, + { + "epoch": 1.0170284884190137, + "grad_norm": 0.690433919429779, + "learning_rate": 6.0821959844150687e-05, + "loss": 2.488, + "step": 12602 + }, + { + "epoch": 1.0171091921555968, + "grad_norm": 0.6528738737106323, + "learning_rate": 6.080743545888833e-05, + "loss": 2.5028, + "step": 12603 + }, + { + "epoch": 1.01718989589218, + "grad_norm": 0.6962323784828186, + "learning_rate": 6.079291205040711e-05, + "loss": 2.5381, + "step": 12604 + }, + { + "epoch": 1.0172705996287628, + "grad_norm": 0.7386075854301453, + "learning_rate": 6.077838961906902e-05, + "loss": 2.4445, + "step": 12605 + }, + { + "epoch": 1.0173513033653458, + "grad_norm": 0.7382189631462097, + "learning_rate": 6.0763868165236025e-05, + "loss": 2.4926, + "step": 12606 + }, + { + "epoch": 1.0174320071019287, + "grad_norm": 0.7291865944862366, + "learning_rate": 6.074934768926995e-05, + "loss": 2.4624, + "step": 12607 + }, + { + "epoch": 1.0175127108385118, + "grad_norm": 0.754843533039093, + "learning_rate": 6.073482819153275e-05, + "loss": 2.4291, + "step": 12608 + }, + { + "epoch": 1.017593414575095, + "grad_norm": 0.6827771663665771, + "learning_rate": 6.072030967238628e-05, + "loss": 2.453, + "step": 12609 + }, + { + "epoch": 1.0176741183116778, + "grad_norm": 0.7138541340827942, + "learning_rate": 6.0705792132192355e-05, + "loss": 2.5172, + "step": 12610 + }, + { + "epoch": 1.0177548220482608, + "grad_norm": 0.6539924740791321, + "learning_rate": 6.06912755713128e-05, + "loss": 2.4393, + "step": 12611 + }, + { + "epoch": 1.017835525784844, + "grad_norm": 0.7021273970603943, + "learning_rate": 6.067675999010945e-05, + "loss": 2.4519, + "step": 12612 + }, + { + "epoch": 1.0179162295214268, + "grad_norm": 0.7124225497245789, + "learning_rate": 6.0662245388944004e-05, + "loss": 2.4417, + "step": 12613 + }, + { + "epoch": 1.0179969332580099, + "grad_norm": 0.7214948534965515, + "learning_rate": 6.064773176817823e-05, + "loss": 2.4708, + "step": 12614 + }, + { + "epoch": 1.0180776369945927, + "grad_norm": 0.6738584041595459, + "learning_rate": 6.063321912817386e-05, + "loss": 2.4574, + "step": 12615 + }, + { + "epoch": 1.0181583407311758, + "grad_norm": 0.7215890884399414, + "learning_rate": 6.061870746929257e-05, + "loss": 2.4903, + "step": 12616 + }, + { + "epoch": 1.018239044467759, + "grad_norm": 0.6720155477523804, + "learning_rate": 6.0604196791896016e-05, + "loss": 2.4251, + "step": 12617 + }, + { + "epoch": 1.0183197482043418, + "grad_norm": 0.7046420574188232, + "learning_rate": 6.058968709634587e-05, + "loss": 2.446, + "step": 12618 + }, + { + "epoch": 1.0184004519409249, + "grad_norm": 0.6419540047645569, + "learning_rate": 6.0575178383003764e-05, + "loss": 2.4052, + "step": 12619 + }, + { + "epoch": 1.018481155677508, + "grad_norm": 0.6948695182800293, + "learning_rate": 6.0560670652231235e-05, + "loss": 2.5068, + "step": 12620 + }, + { + "epoch": 1.0185618594140908, + "grad_norm": 0.7274870276451111, + "learning_rate": 6.05461639043899e-05, + "loss": 2.4705, + "step": 12621 + }, + { + "epoch": 1.018642563150674, + "grad_norm": 0.6809766292572021, + "learning_rate": 6.053165813984134e-05, + "loss": 2.3767, + "step": 12622 + }, + { + "epoch": 1.0187232668872568, + "grad_norm": 0.6197625994682312, + "learning_rate": 6.0517153358946985e-05, + "loss": 2.4639, + "step": 12623 + }, + { + "epoch": 1.0188039706238399, + "grad_norm": 0.6613010764122009, + "learning_rate": 6.050264956206837e-05, + "loss": 2.5155, + "step": 12624 + }, + { + "epoch": 1.018884674360423, + "grad_norm": 0.7335553765296936, + "learning_rate": 6.0488146749567e-05, + "loss": 2.5344, + "step": 12625 + }, + { + "epoch": 1.0189653780970058, + "grad_norm": 0.7175146341323853, + "learning_rate": 6.047364492180428e-05, + "loss": 2.4972, + "step": 12626 + }, + { + "epoch": 1.019046081833589, + "grad_norm": 0.6825357675552368, + "learning_rate": 6.045914407914166e-05, + "loss": 2.4356, + "step": 12627 + }, + { + "epoch": 1.019126785570172, + "grad_norm": 0.6369633078575134, + "learning_rate": 6.044464422194056e-05, + "loss": 2.4692, + "step": 12628 + }, + { + "epoch": 1.0192074893067549, + "grad_norm": 0.7407073378562927, + "learning_rate": 6.0430145350562264e-05, + "loss": 2.4565, + "step": 12629 + }, + { + "epoch": 1.019288193043338, + "grad_norm": 0.6836552619934082, + "learning_rate": 6.041564746536821e-05, + "loss": 2.4357, + "step": 12630 + }, + { + "epoch": 1.0193688967799208, + "grad_norm": 0.6778741478919983, + "learning_rate": 6.040115056671972e-05, + "loss": 2.424, + "step": 12631 + }, + { + "epoch": 1.019449600516504, + "grad_norm": 0.6440724730491638, + "learning_rate": 6.0386654654978035e-05, + "loss": 2.4455, + "step": 12632 + }, + { + "epoch": 1.019530304253087, + "grad_norm": 0.681376039981842, + "learning_rate": 6.0372159730504476e-05, + "loss": 2.4562, + "step": 12633 + }, + { + "epoch": 1.0196110079896699, + "grad_norm": 0.657462477684021, + "learning_rate": 6.035766579366029e-05, + "loss": 2.4315, + "step": 12634 + }, + { + "epoch": 1.019691711726253, + "grad_norm": 0.6540380716323853, + "learning_rate": 6.0343172844806706e-05, + "loss": 2.4789, + "step": 12635 + }, + { + "epoch": 1.019772415462836, + "grad_norm": 0.711883008480072, + "learning_rate": 6.03286808843049e-05, + "loss": 2.4178, + "step": 12636 + }, + { + "epoch": 1.019853119199419, + "grad_norm": 0.6746736168861389, + "learning_rate": 6.031418991251607e-05, + "loss": 2.4351, + "step": 12637 + }, + { + "epoch": 1.019933822936002, + "grad_norm": 0.677237331867218, + "learning_rate": 6.02996999298014e-05, + "loss": 2.4335, + "step": 12638 + }, + { + "epoch": 1.0200145266725849, + "grad_norm": 0.6950497627258301, + "learning_rate": 6.0285210936521955e-05, + "loss": 2.5178, + "step": 12639 + }, + { + "epoch": 1.020095230409168, + "grad_norm": 0.6349243521690369, + "learning_rate": 6.027072293303885e-05, + "loss": 2.4405, + "step": 12640 + }, + { + "epoch": 1.020175934145751, + "grad_norm": 0.744276762008667, + "learning_rate": 6.0256235919713236e-05, + "loss": 2.5156, + "step": 12641 + }, + { + "epoch": 1.020256637882334, + "grad_norm": 0.7697997689247131, + "learning_rate": 6.0241749896906075e-05, + "loss": 2.4393, + "step": 12642 + }, + { + "epoch": 1.020337341618917, + "grad_norm": 0.7784204483032227, + "learning_rate": 6.022726486497844e-05, + "loss": 2.4565, + "step": 12643 + }, + { + "epoch": 1.0204180453555, + "grad_norm": 0.7434312701225281, + "learning_rate": 6.021278082429136e-05, + "loss": 2.4637, + "step": 12644 + }, + { + "epoch": 1.020498749092083, + "grad_norm": 0.7770118117332458, + "learning_rate": 6.019829777520575e-05, + "loss": 2.4998, + "step": 12645 + }, + { + "epoch": 1.020579452828666, + "grad_norm": 0.7021752595901489, + "learning_rate": 6.01838157180826e-05, + "loss": 2.4661, + "step": 12646 + }, + { + "epoch": 1.0206601565652489, + "grad_norm": 0.6812437176704407, + "learning_rate": 6.0169334653282895e-05, + "loss": 2.4611, + "step": 12647 + }, + { + "epoch": 1.020740860301832, + "grad_norm": 0.757724940776825, + "learning_rate": 6.0154854581167455e-05, + "loss": 2.4427, + "step": 12648 + }, + { + "epoch": 1.020821564038415, + "grad_norm": 0.7386252880096436, + "learning_rate": 6.014037550209718e-05, + "loss": 2.424, + "step": 12649 + }, + { + "epoch": 1.020902267774998, + "grad_norm": 0.7138059735298157, + "learning_rate": 6.012589741643295e-05, + "loss": 2.4951, + "step": 12650 + }, + { + "epoch": 1.020982971511581, + "grad_norm": 0.714022159576416, + "learning_rate": 6.011142032453561e-05, + "loss": 2.4398, + "step": 12651 + }, + { + "epoch": 1.0210636752481639, + "grad_norm": 0.6961550712585449, + "learning_rate": 6.00969442267659e-05, + "loss": 2.4495, + "step": 12652 + }, + { + "epoch": 1.021144378984747, + "grad_norm": 0.7196643948554993, + "learning_rate": 6.008246912348467e-05, + "loss": 2.4449, + "step": 12653 + }, + { + "epoch": 1.02122508272133, + "grad_norm": 0.6163341999053955, + "learning_rate": 6.006799501505268e-05, + "loss": 2.4108, + "step": 12654 + }, + { + "epoch": 1.021305786457913, + "grad_norm": 0.6657030582427979, + "learning_rate": 6.005352190183061e-05, + "loss": 2.4328, + "step": 12655 + }, + { + "epoch": 1.021386490194496, + "grad_norm": 0.7183353900909424, + "learning_rate": 6.00390497841792e-05, + "loss": 2.4912, + "step": 12656 + }, + { + "epoch": 1.021467193931079, + "grad_norm": 0.6912575364112854, + "learning_rate": 6.002457866245916e-05, + "loss": 2.4597, + "step": 12657 + }, + { + "epoch": 1.021547897667662, + "grad_norm": 0.7395210266113281, + "learning_rate": 6.0010108537031084e-05, + "loss": 2.4823, + "step": 12658 + }, + { + "epoch": 1.021628601404245, + "grad_norm": 0.722618043422699, + "learning_rate": 5.9995639408255636e-05, + "loss": 2.4924, + "step": 12659 + }, + { + "epoch": 1.021709305140828, + "grad_norm": 0.739009439945221, + "learning_rate": 5.998117127649344e-05, + "loss": 2.4454, + "step": 12660 + }, + { + "epoch": 1.021790008877411, + "grad_norm": 0.7017633318901062, + "learning_rate": 5.996670414210506e-05, + "loss": 2.5058, + "step": 12661 + }, + { + "epoch": 1.021870712613994, + "grad_norm": 0.742664635181427, + "learning_rate": 5.9952238005451046e-05, + "loss": 2.436, + "step": 12662 + }, + { + "epoch": 1.021951416350577, + "grad_norm": 0.6865660548210144, + "learning_rate": 5.9937772866892e-05, + "loss": 2.4364, + "step": 12663 + }, + { + "epoch": 1.02203212008716, + "grad_norm": 0.7376219034194946, + "learning_rate": 5.992330872678833e-05, + "loss": 2.4975, + "step": 12664 + }, + { + "epoch": 1.0221128238237431, + "grad_norm": 0.6496078372001648, + "learning_rate": 5.990884558550054e-05, + "loss": 2.4651, + "step": 12665 + }, + { + "epoch": 1.022193527560326, + "grad_norm": 0.7178322076797485, + "learning_rate": 5.989438344338915e-05, + "loss": 2.5015, + "step": 12666 + }, + { + "epoch": 1.022274231296909, + "grad_norm": 0.7084102034568787, + "learning_rate": 5.987992230081459e-05, + "loss": 2.4741, + "step": 12667 + }, + { + "epoch": 1.022354935033492, + "grad_norm": 0.6634935736656189, + "learning_rate": 5.986546215813722e-05, + "loss": 2.4255, + "step": 12668 + }, + { + "epoch": 1.022435638770075, + "grad_norm": 0.6897543668746948, + "learning_rate": 5.985100301571742e-05, + "loss": 2.4682, + "step": 12669 + }, + { + "epoch": 1.0225163425066581, + "grad_norm": 0.6643948554992676, + "learning_rate": 5.9836544873915614e-05, + "loss": 2.4009, + "step": 12670 + }, + { + "epoch": 1.022597046243241, + "grad_norm": 0.681252658367157, + "learning_rate": 5.982208773309208e-05, + "loss": 2.4542, + "step": 12671 + }, + { + "epoch": 1.022677749979824, + "grad_norm": 0.7608681917190552, + "learning_rate": 5.980763159360714e-05, + "loss": 2.5614, + "step": 12672 + }, + { + "epoch": 1.0227584537164072, + "grad_norm": 0.6855095028877258, + "learning_rate": 5.979317645582112e-05, + "loss": 2.4505, + "step": 12673 + }, + { + "epoch": 1.02283915745299, + "grad_norm": 0.6846089363098145, + "learning_rate": 5.97787223200942e-05, + "loss": 2.4438, + "step": 12674 + }, + { + "epoch": 1.0229198611895731, + "grad_norm": 0.7198090553283691, + "learning_rate": 5.9764269186786684e-05, + "loss": 2.4469, + "step": 12675 + }, + { + "epoch": 1.023000564926156, + "grad_norm": 0.7120245099067688, + "learning_rate": 5.9749817056258764e-05, + "loss": 2.4626, + "step": 12676 + }, + { + "epoch": 1.023081268662739, + "grad_norm": 0.6839897036552429, + "learning_rate": 5.973536592887059e-05, + "loss": 2.4384, + "step": 12677 + }, + { + "epoch": 1.0231619723993222, + "grad_norm": 0.7053773999214172, + "learning_rate": 5.9720915804982356e-05, + "loss": 2.4554, + "step": 12678 + }, + { + "epoch": 1.023242676135905, + "grad_norm": 0.7114294767379761, + "learning_rate": 5.970646668495421e-05, + "loss": 2.3964, + "step": 12679 + }, + { + "epoch": 1.0233233798724881, + "grad_norm": 0.7001516819000244, + "learning_rate": 5.9692018569146224e-05, + "loss": 2.5216, + "step": 12680 + }, + { + "epoch": 1.0234040836090712, + "grad_norm": 0.6715773940086365, + "learning_rate": 5.96775714579185e-05, + "loss": 2.4595, + "step": 12681 + }, + { + "epoch": 1.023484787345654, + "grad_norm": 0.6856278777122498, + "learning_rate": 5.96631253516311e-05, + "loss": 2.4637, + "step": 12682 + }, + { + "epoch": 1.0235654910822372, + "grad_norm": 0.6785625219345093, + "learning_rate": 5.96486802506441e-05, + "loss": 2.4615, + "step": 12683 + }, + { + "epoch": 1.02364619481882, + "grad_norm": 0.6834213137626648, + "learning_rate": 5.963423615531743e-05, + "loss": 2.4729, + "step": 12684 + }, + { + "epoch": 1.023726898555403, + "grad_norm": 0.6729516386985779, + "learning_rate": 5.961979306601109e-05, + "loss": 2.4013, + "step": 12685 + }, + { + "epoch": 1.0238076022919862, + "grad_norm": 0.6785775423049927, + "learning_rate": 5.960535098308511e-05, + "loss": 2.4825, + "step": 12686 + }, + { + "epoch": 1.023888306028569, + "grad_norm": 0.67277991771698, + "learning_rate": 5.959090990689934e-05, + "loss": 2.4606, + "step": 12687 + }, + { + "epoch": 1.0239690097651521, + "grad_norm": 0.7679588198661804, + "learning_rate": 5.957646983781373e-05, + "loss": 2.5234, + "step": 12688 + }, + { + "epoch": 1.0240497135017352, + "grad_norm": 0.6597407460212708, + "learning_rate": 5.956203077618821e-05, + "loss": 2.4699, + "step": 12689 + }, + { + "epoch": 1.024130417238318, + "grad_norm": 0.6743008494377136, + "learning_rate": 5.9547592722382525e-05, + "loss": 2.4266, + "step": 12690 + }, + { + "epoch": 1.0242111209749012, + "grad_norm": 0.7223396897315979, + "learning_rate": 5.953315567675657e-05, + "loss": 2.5117, + "step": 12691 + }, + { + "epoch": 1.024291824711484, + "grad_norm": 0.6729528307914734, + "learning_rate": 5.951871963967022e-05, + "loss": 2.4586, + "step": 12692 + }, + { + "epoch": 1.0243725284480671, + "grad_norm": 0.6523739695549011, + "learning_rate": 5.950428461148314e-05, + "loss": 2.4408, + "step": 12693 + }, + { + "epoch": 1.0244532321846502, + "grad_norm": 0.6830984950065613, + "learning_rate": 5.9489850592555164e-05, + "loss": 2.4094, + "step": 12694 + }, + { + "epoch": 1.024533935921233, + "grad_norm": 0.6223493814468384, + "learning_rate": 5.9475417583246006e-05, + "loss": 2.4105, + "step": 12695 + }, + { + "epoch": 1.0246146396578162, + "grad_norm": 0.6506635546684265, + "learning_rate": 5.9460985583915374e-05, + "loss": 2.4451, + "step": 12696 + }, + { + "epoch": 1.024695343394399, + "grad_norm": 0.7626760005950928, + "learning_rate": 5.944655459492293e-05, + "loss": 2.4643, + "step": 12697 + }, + { + "epoch": 1.0247760471309821, + "grad_norm": 0.7074631452560425, + "learning_rate": 5.943212461662837e-05, + "loss": 2.4662, + "step": 12698 + }, + { + "epoch": 1.0248567508675652, + "grad_norm": 0.718083918094635, + "learning_rate": 5.9417695649391346e-05, + "loss": 2.4686, + "step": 12699 + }, + { + "epoch": 1.024937454604148, + "grad_norm": 0.6850628852844238, + "learning_rate": 5.9403267693571384e-05, + "loss": 2.4542, + "step": 12700 + }, + { + "epoch": 1.0250181583407312, + "grad_norm": 0.6662585735321045, + "learning_rate": 5.938884074952812e-05, + "loss": 2.4676, + "step": 12701 + }, + { + "epoch": 1.0250988620773143, + "grad_norm": 0.6806240677833557, + "learning_rate": 5.9374414817621114e-05, + "loss": 2.4243, + "step": 12702 + }, + { + "epoch": 1.0251795658138971, + "grad_norm": 0.6763548851013184, + "learning_rate": 5.9359989898209876e-05, + "loss": 2.4389, + "step": 12703 + }, + { + "epoch": 1.0252602695504802, + "grad_norm": 0.7390143275260925, + "learning_rate": 5.934556599165393e-05, + "loss": 2.4667, + "step": 12704 + }, + { + "epoch": 1.025340973287063, + "grad_norm": 0.6159299612045288, + "learning_rate": 5.933114309831276e-05, + "loss": 2.3832, + "step": 12705 + }, + { + "epoch": 1.0254216770236462, + "grad_norm": 0.6779586672782898, + "learning_rate": 5.931672121854579e-05, + "loss": 2.4615, + "step": 12706 + }, + { + "epoch": 1.0255023807602293, + "grad_norm": 0.643800675868988, + "learning_rate": 5.930230035271247e-05, + "loss": 2.4725, + "step": 12707 + }, + { + "epoch": 1.0255830844968121, + "grad_norm": 0.6605903506278992, + "learning_rate": 5.928788050117227e-05, + "loss": 2.4332, + "step": 12708 + }, + { + "epoch": 1.0256637882333952, + "grad_norm": 0.7046334743499756, + "learning_rate": 5.927346166428446e-05, + "loss": 2.4445, + "step": 12709 + }, + { + "epoch": 1.0257444919699783, + "grad_norm": 0.6536325216293335, + "learning_rate": 5.925904384240843e-05, + "loss": 2.4168, + "step": 12710 + }, + { + "epoch": 1.0258251957065612, + "grad_norm": 0.6861097812652588, + "learning_rate": 5.9244627035903564e-05, + "loss": 2.512, + "step": 12711 + }, + { + "epoch": 1.0259058994431443, + "grad_norm": 0.6782278418540955, + "learning_rate": 5.923021124512911e-05, + "loss": 2.4667, + "step": 12712 + }, + { + "epoch": 1.0259866031797271, + "grad_norm": 0.724435031414032, + "learning_rate": 5.921579647044436e-05, + "loss": 2.4828, + "step": 12713 + }, + { + "epoch": 1.0260673069163102, + "grad_norm": 0.6690630316734314, + "learning_rate": 5.9201382712208575e-05, + "loss": 2.4832, + "step": 12714 + }, + { + "epoch": 1.0261480106528933, + "grad_norm": 0.7045348286628723, + "learning_rate": 5.9186969970781015e-05, + "loss": 2.4576, + "step": 12715 + }, + { + "epoch": 1.0262287143894762, + "grad_norm": 0.673321008682251, + "learning_rate": 5.9172558246520796e-05, + "loss": 2.3986, + "step": 12716 + }, + { + "epoch": 1.0263094181260592, + "grad_norm": 0.7184785008430481, + "learning_rate": 5.915814753978717e-05, + "loss": 2.4008, + "step": 12717 + }, + { + "epoch": 1.0263901218626423, + "grad_norm": 0.6971293091773987, + "learning_rate": 5.914373785093931e-05, + "loss": 2.4559, + "step": 12718 + }, + { + "epoch": 1.0264708255992252, + "grad_norm": 0.6941563487052917, + "learning_rate": 5.912932918033626e-05, + "loss": 2.4787, + "step": 12719 + }, + { + "epoch": 1.0265515293358083, + "grad_norm": 0.6276142001152039, + "learning_rate": 5.911492152833715e-05, + "loss": 2.4275, + "step": 12720 + }, + { + "epoch": 1.0266322330723912, + "grad_norm": 0.715928316116333, + "learning_rate": 5.9100514895301106e-05, + "loss": 2.4127, + "step": 12721 + }, + { + "epoch": 1.0267129368089742, + "grad_norm": 0.7004076838493347, + "learning_rate": 5.908610928158713e-05, + "loss": 2.4651, + "step": 12722 + }, + { + "epoch": 1.0267936405455573, + "grad_norm": 0.6761921048164368, + "learning_rate": 5.907170468755425e-05, + "loss": 2.4245, + "step": 12723 + }, + { + "epoch": 1.0268743442821402, + "grad_norm": 0.7246574759483337, + "learning_rate": 5.9057301113561515e-05, + "loss": 2.4489, + "step": 12724 + }, + { + "epoch": 1.0269550480187233, + "grad_norm": 0.7196606397628784, + "learning_rate": 5.904289855996783e-05, + "loss": 2.4357, + "step": 12725 + }, + { + "epoch": 1.0270357517553064, + "grad_norm": 0.7142692804336548, + "learning_rate": 5.902849702713216e-05, + "loss": 2.4821, + "step": 12726 + }, + { + "epoch": 1.0271164554918892, + "grad_norm": 0.7207832336425781, + "learning_rate": 5.9014096515413454e-05, + "loss": 2.4337, + "step": 12727 + }, + { + "epoch": 1.0271971592284723, + "grad_norm": 0.6865695714950562, + "learning_rate": 5.899969702517063e-05, + "loss": 2.4549, + "step": 12728 + }, + { + "epoch": 1.0272778629650552, + "grad_norm": 0.7136662006378174, + "learning_rate": 5.898529855676249e-05, + "loss": 2.4606, + "step": 12729 + }, + { + "epoch": 1.0273585667016383, + "grad_norm": 0.701885998249054, + "learning_rate": 5.897090111054795e-05, + "loss": 2.4913, + "step": 12730 + }, + { + "epoch": 1.0274392704382214, + "grad_norm": 0.6671354174613953, + "learning_rate": 5.8956504686885805e-05, + "loss": 2.4064, + "step": 12731 + }, + { + "epoch": 1.0275199741748042, + "grad_norm": 0.6720621585845947, + "learning_rate": 5.894210928613484e-05, + "loss": 2.4908, + "step": 12732 + }, + { + "epoch": 1.0276006779113873, + "grad_norm": 0.7530980706214905, + "learning_rate": 5.892771490865383e-05, + "loss": 2.4486, + "step": 12733 + }, + { + "epoch": 1.0276813816479704, + "grad_norm": 0.6771122813224792, + "learning_rate": 5.891332155480158e-05, + "loss": 2.3954, + "step": 12734 + }, + { + "epoch": 1.0277620853845533, + "grad_norm": 0.6779236793518066, + "learning_rate": 5.889892922493671e-05, + "loss": 2.4404, + "step": 12735 + }, + { + "epoch": 1.0278427891211364, + "grad_norm": 0.7593358755111694, + "learning_rate": 5.8884537919417974e-05, + "loss": 2.4997, + "step": 12736 + }, + { + "epoch": 1.0279234928577192, + "grad_norm": 0.672686755657196, + "learning_rate": 5.8870147638604044e-05, + "loss": 2.5394, + "step": 12737 + }, + { + "epoch": 1.0280041965943023, + "grad_norm": 0.6727546453475952, + "learning_rate": 5.885575838285353e-05, + "loss": 2.4554, + "step": 12738 + }, + { + "epoch": 1.0280849003308854, + "grad_norm": 0.7092764377593994, + "learning_rate": 5.884137015252507e-05, + "loss": 2.4568, + "step": 12739 + }, + { + "epoch": 1.0281656040674683, + "grad_norm": 0.6988070011138916, + "learning_rate": 5.882698294797728e-05, + "loss": 2.4453, + "step": 12740 + }, + { + "epoch": 1.0282463078040514, + "grad_norm": 0.7578697204589844, + "learning_rate": 5.8812596769568676e-05, + "loss": 2.5648, + "step": 12741 + }, + { + "epoch": 1.0283270115406344, + "grad_norm": 0.6523683667182922, + "learning_rate": 5.879821161765782e-05, + "loss": 2.4088, + "step": 12742 + }, + { + "epoch": 1.0284077152772173, + "grad_norm": 0.6797270178794861, + "learning_rate": 5.878382749260323e-05, + "loss": 2.4465, + "step": 12743 + }, + { + "epoch": 1.0284884190138004, + "grad_norm": 0.6823786497116089, + "learning_rate": 5.876944439476345e-05, + "loss": 2.5053, + "step": 12744 + }, + { + "epoch": 1.0285691227503833, + "grad_norm": 0.6840088367462158, + "learning_rate": 5.875506232449686e-05, + "loss": 2.3771, + "step": 12745 + }, + { + "epoch": 1.0286498264869663, + "grad_norm": 0.6985318064689636, + "learning_rate": 5.8740681282161914e-05, + "loss": 2.4456, + "step": 12746 + }, + { + "epoch": 1.0287305302235494, + "grad_norm": 0.7102388739585876, + "learning_rate": 5.872630126811707e-05, + "loss": 2.4802, + "step": 12747 + }, + { + "epoch": 1.0288112339601323, + "grad_norm": 0.7917937636375427, + "learning_rate": 5.871192228272067e-05, + "loss": 2.4606, + "step": 12748 + }, + { + "epoch": 1.0288919376967154, + "grad_norm": 0.683397114276886, + "learning_rate": 5.86975443263311e-05, + "loss": 2.5011, + "step": 12749 + }, + { + "epoch": 1.0289726414332985, + "grad_norm": 0.7543408870697021, + "learning_rate": 5.8683167399306724e-05, + "loss": 2.4705, + "step": 12750 + }, + { + "epoch": 1.0290533451698813, + "grad_norm": 0.6946283578872681, + "learning_rate": 5.866879150200579e-05, + "loss": 2.4986, + "step": 12751 + }, + { + "epoch": 1.0291340489064644, + "grad_norm": 0.6535125374794006, + "learning_rate": 5.8654416634786605e-05, + "loss": 2.4203, + "step": 12752 + }, + { + "epoch": 1.0292147526430473, + "grad_norm": 0.7470195889472961, + "learning_rate": 5.8640042798007455e-05, + "loss": 2.5103, + "step": 12753 + }, + { + "epoch": 1.0292954563796304, + "grad_norm": 0.6782363653182983, + "learning_rate": 5.8625669992026535e-05, + "loss": 2.4087, + "step": 12754 + }, + { + "epoch": 1.0293761601162135, + "grad_norm": 0.7601497173309326, + "learning_rate": 5.861129821720207e-05, + "loss": 2.4752, + "step": 12755 + }, + { + "epoch": 1.0294568638527963, + "grad_norm": 0.6875388026237488, + "learning_rate": 5.859692747389227e-05, + "loss": 2.448, + "step": 12756 + }, + { + "epoch": 1.0295375675893794, + "grad_norm": 0.7153629064559937, + "learning_rate": 5.858255776245525e-05, + "loss": 2.4641, + "step": 12757 + }, + { + "epoch": 1.0296182713259623, + "grad_norm": 0.682954728603363, + "learning_rate": 5.8568189083249145e-05, + "loss": 2.441, + "step": 12758 + }, + { + "epoch": 1.0296989750625454, + "grad_norm": 0.6959100961685181, + "learning_rate": 5.855382143663209e-05, + "loss": 2.4316, + "step": 12759 + }, + { + "epoch": 1.0297796787991285, + "grad_norm": 0.7062023878097534, + "learning_rate": 5.8539454822962167e-05, + "loss": 2.4287, + "step": 12760 + }, + { + "epoch": 1.0298603825357113, + "grad_norm": 0.706523597240448, + "learning_rate": 5.852508924259736e-05, + "loss": 2.4596, + "step": 12761 + }, + { + "epoch": 1.0299410862722944, + "grad_norm": 0.6908385753631592, + "learning_rate": 5.851072469589578e-05, + "loss": 2.4428, + "step": 12762 + }, + { + "epoch": 1.0300217900088775, + "grad_norm": 0.6810726523399353, + "learning_rate": 5.8496361183215386e-05, + "loss": 2.4902, + "step": 12763 + }, + { + "epoch": 1.0301024937454604, + "grad_norm": 0.661613941192627, + "learning_rate": 5.8481998704914156e-05, + "loss": 2.4256, + "step": 12764 + }, + { + "epoch": 1.0301831974820435, + "grad_norm": 0.6633132100105286, + "learning_rate": 5.846763726135005e-05, + "loss": 2.4512, + "step": 12765 + }, + { + "epoch": 1.0302639012186263, + "grad_norm": 0.6991820335388184, + "learning_rate": 5.8453276852881025e-05, + "loss": 2.3747, + "step": 12766 + }, + { + "epoch": 1.0303446049552094, + "grad_norm": 0.7392076253890991, + "learning_rate": 5.843891747986487e-05, + "loss": 2.438, + "step": 12767 + }, + { + "epoch": 1.0304253086917925, + "grad_norm": 0.6371724605560303, + "learning_rate": 5.842455914265958e-05, + "loss": 2.4627, + "step": 12768 + }, + { + "epoch": 1.0305060124283754, + "grad_norm": 0.6475048661231995, + "learning_rate": 5.841020184162298e-05, + "loss": 2.4883, + "step": 12769 + }, + { + "epoch": 1.0305867161649584, + "grad_norm": 0.6848995685577393, + "learning_rate": 5.839584557711283e-05, + "loss": 2.4452, + "step": 12770 + }, + { + "epoch": 1.0306674199015415, + "grad_norm": 0.7345505952835083, + "learning_rate": 5.838149034948697e-05, + "loss": 2.5121, + "step": 12771 + }, + { + "epoch": 1.0307481236381244, + "grad_norm": 0.715373158454895, + "learning_rate": 5.836713615910318e-05, + "loss": 2.4549, + "step": 12772 + }, + { + "epoch": 1.0308288273747075, + "grad_norm": 0.7371035814285278, + "learning_rate": 5.8352783006319166e-05, + "loss": 2.4633, + "step": 12773 + }, + { + "epoch": 1.0309095311112904, + "grad_norm": 0.6843077540397644, + "learning_rate": 5.833843089149267e-05, + "loss": 2.4067, + "step": 12774 + }, + { + "epoch": 1.0309902348478734, + "grad_norm": 0.7398965954780579, + "learning_rate": 5.832407981498136e-05, + "loss": 2.5199, + "step": 12775 + }, + { + "epoch": 1.0310709385844565, + "grad_norm": 0.6860283017158508, + "learning_rate": 5.830972977714294e-05, + "loss": 2.4564, + "step": 12776 + }, + { + "epoch": 1.0311516423210394, + "grad_norm": 0.683893084526062, + "learning_rate": 5.829538077833503e-05, + "loss": 2.4635, + "step": 12777 + }, + { + "epoch": 1.0312323460576225, + "grad_norm": 0.6412089467048645, + "learning_rate": 5.828103281891525e-05, + "loss": 2.4806, + "step": 12778 + }, + { + "epoch": 1.0313130497942056, + "grad_norm": 0.646393895149231, + "learning_rate": 5.826668589924123e-05, + "loss": 2.4674, + "step": 12779 + }, + { + "epoch": 1.0313937535307884, + "grad_norm": 0.6805605292320251, + "learning_rate": 5.825234001967044e-05, + "loss": 2.5145, + "step": 12780 + }, + { + "epoch": 1.0314744572673715, + "grad_norm": 0.681532084941864, + "learning_rate": 5.8237995180560455e-05, + "loss": 2.5041, + "step": 12781 + }, + { + "epoch": 1.0315551610039544, + "grad_norm": 0.6971312165260315, + "learning_rate": 5.8223651382268865e-05, + "loss": 2.5324, + "step": 12782 + }, + { + "epoch": 1.0316358647405375, + "grad_norm": 0.6634463667869568, + "learning_rate": 5.8209308625153026e-05, + "loss": 2.5086, + "step": 12783 + }, + { + "epoch": 1.0317165684771206, + "grad_norm": 0.6752117276191711, + "learning_rate": 5.819496690957047e-05, + "loss": 2.4805, + "step": 12784 + }, + { + "epoch": 1.0317972722137034, + "grad_norm": 0.7242109775543213, + "learning_rate": 5.818062623587861e-05, + "loss": 2.4205, + "step": 12785 + }, + { + "epoch": 1.0318779759502865, + "grad_norm": 0.7338563203811646, + "learning_rate": 5.816628660443486e-05, + "loss": 2.4277, + "step": 12786 + }, + { + "epoch": 1.0319586796868696, + "grad_norm": 0.6764293313026428, + "learning_rate": 5.81519480155966e-05, + "loss": 2.5096, + "step": 12787 + }, + { + "epoch": 1.0320393834234525, + "grad_norm": 0.6757099032402039, + "learning_rate": 5.813761046972124e-05, + "loss": 2.468, + "step": 12788 + }, + { + "epoch": 1.0321200871600356, + "grad_norm": 0.7072502374649048, + "learning_rate": 5.8123273967166017e-05, + "loss": 2.4642, + "step": 12789 + }, + { + "epoch": 1.0322007908966184, + "grad_norm": 0.6470256447792053, + "learning_rate": 5.810893850828827e-05, + "loss": 2.4146, + "step": 12790 + }, + { + "epoch": 1.0322814946332015, + "grad_norm": 0.7403351068496704, + "learning_rate": 5.809460409344527e-05, + "loss": 2.512, + "step": 12791 + }, + { + "epoch": 1.0323621983697846, + "grad_norm": 0.6711490154266357, + "learning_rate": 5.808027072299432e-05, + "loss": 2.4602, + "step": 12792 + }, + { + "epoch": 1.0324429021063675, + "grad_norm": 0.7920248508453369, + "learning_rate": 5.806593839729258e-05, + "loss": 2.4512, + "step": 12793 + }, + { + "epoch": 1.0325236058429506, + "grad_norm": 0.6442045569419861, + "learning_rate": 5.805160711669725e-05, + "loss": 2.4165, + "step": 12794 + }, + { + "epoch": 1.0326043095795336, + "grad_norm": 0.6681340932846069, + "learning_rate": 5.803727688156553e-05, + "loss": 2.4296, + "step": 12795 + }, + { + "epoch": 1.0326850133161165, + "grad_norm": 0.6653337478637695, + "learning_rate": 5.802294769225457e-05, + "loss": 2.5165, + "step": 12796 + }, + { + "epoch": 1.0327657170526996, + "grad_norm": 0.6444782018661499, + "learning_rate": 5.8008619549121476e-05, + "loss": 2.4266, + "step": 12797 + }, + { + "epoch": 1.0328464207892825, + "grad_norm": 0.6741451621055603, + "learning_rate": 5.7994292452523394e-05, + "loss": 2.4837, + "step": 12798 + }, + { + "epoch": 1.0329271245258655, + "grad_norm": 0.6629341840744019, + "learning_rate": 5.797996640281731e-05, + "loss": 2.4368, + "step": 12799 + }, + { + "epoch": 1.0330078282624486, + "grad_norm": 0.6755850315093994, + "learning_rate": 5.796564140036029e-05, + "loss": 2.4834, + "step": 12800 + }, + { + "epoch": 1.0330885319990315, + "grad_norm": 0.7271782755851746, + "learning_rate": 5.795131744550942e-05, + "loss": 2.5025, + "step": 12801 + }, + { + "epoch": 1.0331692357356146, + "grad_norm": 0.6870545744895935, + "learning_rate": 5.7936994538621605e-05, + "loss": 2.4443, + "step": 12802 + }, + { + "epoch": 1.0332499394721975, + "grad_norm": 0.7231935858726501, + "learning_rate": 5.792267268005382e-05, + "loss": 2.4917, + "step": 12803 + }, + { + "epoch": 1.0333306432087805, + "grad_norm": 0.6905832290649414, + "learning_rate": 5.790835187016307e-05, + "loss": 2.4902, + "step": 12804 + }, + { + "epoch": 1.0334113469453636, + "grad_norm": 0.711814284324646, + "learning_rate": 5.789403210930613e-05, + "loss": 2.4579, + "step": 12805 + }, + { + "epoch": 1.0334920506819465, + "grad_norm": 0.6982280015945435, + "learning_rate": 5.787971339784004e-05, + "loss": 2.5275, + "step": 12806 + }, + { + "epoch": 1.0335727544185296, + "grad_norm": 0.6871493458747864, + "learning_rate": 5.7865395736121575e-05, + "loss": 2.4401, + "step": 12807 + }, + { + "epoch": 1.0336534581551127, + "grad_norm": 0.6898353099822998, + "learning_rate": 5.785107912450763e-05, + "loss": 2.4005, + "step": 12808 + }, + { + "epoch": 1.0337341618916955, + "grad_norm": 0.6264411807060242, + "learning_rate": 5.7836763563354946e-05, + "loss": 2.4497, + "step": 12809 + }, + { + "epoch": 1.0338148656282786, + "grad_norm": 0.6997092962265015, + "learning_rate": 5.782244905302032e-05, + "loss": 2.4388, + "step": 12810 + }, + { + "epoch": 1.0338955693648615, + "grad_norm": 0.6834601759910583, + "learning_rate": 5.7808135593860555e-05, + "loss": 2.4298, + "step": 12811 + }, + { + "epoch": 1.0339762731014446, + "grad_norm": 0.664315402507782, + "learning_rate": 5.77938231862323e-05, + "loss": 2.4289, + "step": 12812 + }, + { + "epoch": 1.0340569768380277, + "grad_norm": 0.6660603284835815, + "learning_rate": 5.7779511830492306e-05, + "loss": 2.4772, + "step": 12813 + }, + { + "epoch": 1.0341376805746105, + "grad_norm": 0.6457028388977051, + "learning_rate": 5.776520152699728e-05, + "loss": 2.4408, + "step": 12814 + }, + { + "epoch": 1.0342183843111936, + "grad_norm": 0.7132207155227661, + "learning_rate": 5.7750892276103794e-05, + "loss": 2.4953, + "step": 12815 + }, + { + "epoch": 1.0342990880477767, + "grad_norm": 0.7397382259368896, + "learning_rate": 5.773658407816848e-05, + "loss": 2.4396, + "step": 12816 + }, + { + "epoch": 1.0343797917843596, + "grad_norm": 0.6951746344566345, + "learning_rate": 5.7722276933548034e-05, + "loss": 2.5021, + "step": 12817 + }, + { + "epoch": 1.0344604955209427, + "grad_norm": 0.6789736151695251, + "learning_rate": 5.7707970842598935e-05, + "loss": 2.4883, + "step": 12818 + }, + { + "epoch": 1.0345411992575255, + "grad_norm": 0.7231541872024536, + "learning_rate": 5.7693665805677747e-05, + "loss": 2.4761, + "step": 12819 + }, + { + "epoch": 1.0346219029941086, + "grad_norm": 0.685943603515625, + "learning_rate": 5.767936182314104e-05, + "loss": 2.4489, + "step": 12820 + }, + { + "epoch": 1.0347026067306917, + "grad_norm": 0.7081817984580994, + "learning_rate": 5.7665058895345236e-05, + "loss": 2.4329, + "step": 12821 + }, + { + "epoch": 1.0347833104672746, + "grad_norm": 0.6700818538665771, + "learning_rate": 5.7650757022646804e-05, + "loss": 2.4252, + "step": 12822 + }, + { + "epoch": 1.0348640142038577, + "grad_norm": 0.6712214946746826, + "learning_rate": 5.763645620540223e-05, + "loss": 2.419, + "step": 12823 + }, + { + "epoch": 1.0349447179404407, + "grad_norm": 0.6732817888259888, + "learning_rate": 5.762215644396793e-05, + "loss": 2.3928, + "step": 12824 + }, + { + "epoch": 1.0350254216770236, + "grad_norm": 0.6689301133155823, + "learning_rate": 5.760785773870024e-05, + "loss": 2.3981, + "step": 12825 + }, + { + "epoch": 1.0351061254136067, + "grad_norm": 0.6822957992553711, + "learning_rate": 5.759356008995556e-05, + "loss": 2.5265, + "step": 12826 + }, + { + "epoch": 1.0351868291501896, + "grad_norm": 0.7316287755966187, + "learning_rate": 5.7579263498090194e-05, + "loss": 2.4132, + "step": 12827 + }, + { + "epoch": 1.0352675328867726, + "grad_norm": 0.6688703894615173, + "learning_rate": 5.756496796346047e-05, + "loss": 2.4195, + "step": 12828 + }, + { + "epoch": 1.0353482366233557, + "grad_norm": 0.6894570589065552, + "learning_rate": 5.755067348642268e-05, + "loss": 2.4897, + "step": 12829 + }, + { + "epoch": 1.0354289403599386, + "grad_norm": 0.7635753750801086, + "learning_rate": 5.753638006733311e-05, + "loss": 2.4643, + "step": 12830 + }, + { + "epoch": 1.0355096440965217, + "grad_norm": 0.6353672742843628, + "learning_rate": 5.75220877065479e-05, + "loss": 2.4533, + "step": 12831 + }, + { + "epoch": 1.0355903478331048, + "grad_norm": 0.6725208759307861, + "learning_rate": 5.750779640442332e-05, + "loss": 2.4958, + "step": 12832 + }, + { + "epoch": 1.0356710515696876, + "grad_norm": 0.7350767254829407, + "learning_rate": 5.749350616131556e-05, + "loss": 2.4192, + "step": 12833 + }, + { + "epoch": 1.0357517553062707, + "grad_norm": 0.7322222590446472, + "learning_rate": 5.7479216977580695e-05, + "loss": 2.4719, + "step": 12834 + }, + { + "epoch": 1.0358324590428536, + "grad_norm": 0.7233425974845886, + "learning_rate": 5.7464928853574904e-05, + "loss": 2.4707, + "step": 12835 + }, + { + "epoch": 1.0359131627794367, + "grad_norm": 0.7117420434951782, + "learning_rate": 5.745064178965427e-05, + "loss": 2.4463, + "step": 12836 + }, + { + "epoch": 1.0359938665160198, + "grad_norm": 0.7615050077438354, + "learning_rate": 5.743635578617486e-05, + "loss": 2.4256, + "step": 12837 + }, + { + "epoch": 1.0360745702526026, + "grad_norm": 0.7056093215942383, + "learning_rate": 5.7422070843492734e-05, + "loss": 2.4628, + "step": 12838 + }, + { + "epoch": 1.0361552739891857, + "grad_norm": 0.685989499092102, + "learning_rate": 5.740778696196389e-05, + "loss": 2.4271, + "step": 12839 + }, + { + "epoch": 1.0362359777257688, + "grad_norm": 0.7286686301231384, + "learning_rate": 5.739350414194439e-05, + "loss": 2.4984, + "step": 12840 + }, + { + "epoch": 1.0363166814623517, + "grad_norm": 0.6939802765846252, + "learning_rate": 5.737922238379009e-05, + "loss": 2.4601, + "step": 12841 + }, + { + "epoch": 1.0363973851989348, + "grad_norm": 0.7077060341835022, + "learning_rate": 5.736494168785698e-05, + "loss": 2.4264, + "step": 12842 + }, + { + "epoch": 1.0364780889355176, + "grad_norm": 0.667086124420166, + "learning_rate": 5.7350662054501016e-05, + "loss": 2.4733, + "step": 12843 + }, + { + "epoch": 1.0365587926721007, + "grad_norm": 0.6531338691711426, + "learning_rate": 5.7336383484078004e-05, + "loss": 2.4709, + "step": 12844 + }, + { + "epoch": 1.0366394964086838, + "grad_norm": 0.7141630053520203, + "learning_rate": 5.732210597694383e-05, + "loss": 2.4747, + "step": 12845 + }, + { + "epoch": 1.0367202001452667, + "grad_norm": 0.7186396718025208, + "learning_rate": 5.730782953345435e-05, + "loss": 2.4401, + "step": 12846 + }, + { + "epoch": 1.0368009038818498, + "grad_norm": 0.6709686517715454, + "learning_rate": 5.7293554153965345e-05, + "loss": 2.456, + "step": 12847 + }, + { + "epoch": 1.0368816076184326, + "grad_norm": 0.6867267489433289, + "learning_rate": 5.727927983883261e-05, + "loss": 2.4522, + "step": 12848 + }, + { + "epoch": 1.0369623113550157, + "grad_norm": 0.7016724348068237, + "learning_rate": 5.7265006588411926e-05, + "loss": 2.4348, + "step": 12849 + }, + { + "epoch": 1.0370430150915988, + "grad_norm": 0.6764764785766602, + "learning_rate": 5.725073440305896e-05, + "loss": 2.4241, + "step": 12850 + }, + { + "epoch": 1.0371237188281817, + "grad_norm": 0.6965062618255615, + "learning_rate": 5.7236463283129435e-05, + "loss": 2.4559, + "step": 12851 + }, + { + "epoch": 1.0372044225647647, + "grad_norm": 0.6878135800361633, + "learning_rate": 5.7222193228979037e-05, + "loss": 2.4874, + "step": 12852 + }, + { + "epoch": 1.0372851263013478, + "grad_norm": 0.6576557755470276, + "learning_rate": 5.720792424096344e-05, + "loss": 2.4273, + "step": 12853 + }, + { + "epoch": 1.0373658300379307, + "grad_norm": 0.7463123798370361, + "learning_rate": 5.719365631943818e-05, + "loss": 2.4933, + "step": 12854 + }, + { + "epoch": 1.0374465337745138, + "grad_norm": 0.6920896768569946, + "learning_rate": 5.7179389464758914e-05, + "loss": 2.4799, + "step": 12855 + }, + { + "epoch": 1.0375272375110969, + "grad_norm": 0.7330591082572937, + "learning_rate": 5.71651236772812e-05, + "loss": 2.469, + "step": 12856 + }, + { + "epoch": 1.0376079412476797, + "grad_norm": 0.6766076683998108, + "learning_rate": 5.715085895736057e-05, + "loss": 2.4787, + "step": 12857 + }, + { + "epoch": 1.0376886449842628, + "grad_norm": 0.724278450012207, + "learning_rate": 5.713659530535255e-05, + "loss": 2.4524, + "step": 12858 + }, + { + "epoch": 1.0377693487208457, + "grad_norm": 0.6816281676292419, + "learning_rate": 5.712233272161265e-05, + "loss": 2.4993, + "step": 12859 + }, + { + "epoch": 1.0378500524574288, + "grad_norm": 0.7186439633369446, + "learning_rate": 5.710807120649626e-05, + "loss": 2.4108, + "step": 12860 + }, + { + "epoch": 1.0379307561940119, + "grad_norm": 0.6616777181625366, + "learning_rate": 5.709381076035887e-05, + "loss": 2.4797, + "step": 12861 + }, + { + "epoch": 1.0380114599305947, + "grad_norm": 0.6956895589828491, + "learning_rate": 5.7079551383555906e-05, + "loss": 2.4017, + "step": 12862 + }, + { + "epoch": 1.0380921636671778, + "grad_norm": 0.6650584936141968, + "learning_rate": 5.706529307644268e-05, + "loss": 2.4808, + "step": 12863 + }, + { + "epoch": 1.0381728674037607, + "grad_norm": 0.6362698674201965, + "learning_rate": 5.705103583937458e-05, + "loss": 2.4077, + "step": 12864 + }, + { + "epoch": 1.0382535711403438, + "grad_norm": 0.6962565183639526, + "learning_rate": 5.703677967270697e-05, + "loss": 2.4715, + "step": 12865 + }, + { + "epoch": 1.0383342748769269, + "grad_norm": 0.6927294135093689, + "learning_rate": 5.702252457679509e-05, + "loss": 2.4983, + "step": 12866 + }, + { + "epoch": 1.0384149786135097, + "grad_norm": 0.7107497453689575, + "learning_rate": 5.70082705519942e-05, + "loss": 2.4198, + "step": 12867 + }, + { + "epoch": 1.0384956823500928, + "grad_norm": 0.6459221243858337, + "learning_rate": 5.6994017598659634e-05, + "loss": 2.4423, + "step": 12868 + }, + { + "epoch": 1.038576386086676, + "grad_norm": 0.705563485622406, + "learning_rate": 5.697976571714658e-05, + "loss": 2.5346, + "step": 12869 + }, + { + "epoch": 1.0386570898232588, + "grad_norm": 0.7424784898757935, + "learning_rate": 5.696551490781021e-05, + "loss": 2.4824, + "step": 12870 + }, + { + "epoch": 1.0387377935598419, + "grad_norm": 0.6820988059043884, + "learning_rate": 5.695126517100569e-05, + "loss": 2.4965, + "step": 12871 + }, + { + "epoch": 1.0388184972964247, + "grad_norm": 0.8209595680236816, + "learning_rate": 5.6937016507088225e-05, + "loss": 2.475, + "step": 12872 + }, + { + "epoch": 1.0388992010330078, + "grad_norm": 0.7407695055007935, + "learning_rate": 5.6922768916412815e-05, + "loss": 2.4683, + "step": 12873 + }, + { + "epoch": 1.038979904769591, + "grad_norm": 0.7335677742958069, + "learning_rate": 5.690852239933462e-05, + "loss": 2.4621, + "step": 12874 + }, + { + "epoch": 1.0390606085061738, + "grad_norm": 0.6731325387954712, + "learning_rate": 5.689427695620873e-05, + "loss": 2.4882, + "step": 12875 + }, + { + "epoch": 1.0391413122427569, + "grad_norm": 0.7256175875663757, + "learning_rate": 5.68800325873901e-05, + "loss": 2.4827, + "step": 12876 + }, + { + "epoch": 1.03922201597934, + "grad_norm": 0.711928129196167, + "learning_rate": 5.686578929323377e-05, + "loss": 2.4447, + "step": 12877 + }, + { + "epoch": 1.0393027197159228, + "grad_norm": 0.6445996165275574, + "learning_rate": 5.685154707409473e-05, + "loss": 2.453, + "step": 12878 + }, + { + "epoch": 1.039383423452506, + "grad_norm": 0.6656066179275513, + "learning_rate": 5.6837305930327923e-05, + "loss": 2.4863, + "step": 12879 + }, + { + "epoch": 1.0394641271890888, + "grad_norm": 0.6844663619995117, + "learning_rate": 5.682306586228828e-05, + "loss": 2.4524, + "step": 12880 + }, + { + "epoch": 1.0395448309256718, + "grad_norm": 0.6436383724212646, + "learning_rate": 5.6808826870330746e-05, + "loss": 2.4137, + "step": 12881 + }, + { + "epoch": 1.039625534662255, + "grad_norm": 0.6731196641921997, + "learning_rate": 5.6794588954810104e-05, + "loss": 2.4176, + "step": 12882 + }, + { + "epoch": 1.0397062383988378, + "grad_norm": 0.6994587779045105, + "learning_rate": 5.678035211608125e-05, + "loss": 2.4651, + "step": 12883 + }, + { + "epoch": 1.0397869421354209, + "grad_norm": 0.6912599205970764, + "learning_rate": 5.6766116354499e-05, + "loss": 2.3918, + "step": 12884 + }, + { + "epoch": 1.039867645872004, + "grad_norm": 0.7627033591270447, + "learning_rate": 5.6751881670418185e-05, + "loss": 2.4278, + "step": 12885 + }, + { + "epoch": 1.0399483496085868, + "grad_norm": 0.7107213139533997, + "learning_rate": 5.6737648064193485e-05, + "loss": 2.5249, + "step": 12886 + }, + { + "epoch": 1.04002905334517, + "grad_norm": 0.7254211902618408, + "learning_rate": 5.672341553617968e-05, + "loss": 2.4454, + "step": 12887 + }, + { + "epoch": 1.0401097570817528, + "grad_norm": 0.6776205897331238, + "learning_rate": 5.670918408673149e-05, + "loss": 2.4333, + "step": 12888 + }, + { + "epoch": 1.0401904608183359, + "grad_norm": 0.6824465394020081, + "learning_rate": 5.669495371620359e-05, + "loss": 2.427, + "step": 12889 + }, + { + "epoch": 1.040271164554919, + "grad_norm": 0.6633001565933228, + "learning_rate": 5.668072442495066e-05, + "loss": 2.4874, + "step": 12890 + }, + { + "epoch": 1.0403518682915018, + "grad_norm": 0.6655289530754089, + "learning_rate": 5.666649621332735e-05, + "loss": 2.5023, + "step": 12891 + }, + { + "epoch": 1.040432572028085, + "grad_norm": 0.6892853379249573, + "learning_rate": 5.665226908168818e-05, + "loss": 2.4505, + "step": 12892 + }, + { + "epoch": 1.040513275764668, + "grad_norm": 0.7154649496078491, + "learning_rate": 5.6638043030387774e-05, + "loss": 2.4916, + "step": 12893 + }, + { + "epoch": 1.0405939795012509, + "grad_norm": 0.6780592799186707, + "learning_rate": 5.662381805978074e-05, + "loss": 2.4116, + "step": 12894 + }, + { + "epoch": 1.040674683237834, + "grad_norm": 0.6737352013587952, + "learning_rate": 5.66095941702215e-05, + "loss": 2.3903, + "step": 12895 + }, + { + "epoch": 1.0407553869744168, + "grad_norm": 0.7623820304870605, + "learning_rate": 5.659537136206461e-05, + "loss": 2.4334, + "step": 12896 + }, + { + "epoch": 1.040836090711, + "grad_norm": 0.7043081521987915, + "learning_rate": 5.65811496356645e-05, + "loss": 2.4403, + "step": 12897 + }, + { + "epoch": 1.040916794447583, + "grad_norm": 0.6704873442649841, + "learning_rate": 5.6566928991375654e-05, + "loss": 2.4416, + "step": 12898 + }, + { + "epoch": 1.0409974981841659, + "grad_norm": 0.6556837558746338, + "learning_rate": 5.6552709429552474e-05, + "loss": 2.4904, + "step": 12899 + }, + { + "epoch": 1.041078201920749, + "grad_norm": 0.6926451325416565, + "learning_rate": 5.653849095054935e-05, + "loss": 2.4889, + "step": 12900 + }, + { + "epoch": 1.041158905657332, + "grad_norm": 0.6407613158226013, + "learning_rate": 5.6524273554720674e-05, + "loss": 2.3951, + "step": 12901 + }, + { + "epoch": 1.041239609393915, + "grad_norm": 0.7812615633010864, + "learning_rate": 5.651005724242071e-05, + "loss": 2.4535, + "step": 12902 + }, + { + "epoch": 1.041320313130498, + "grad_norm": 0.6868990659713745, + "learning_rate": 5.6495842014003796e-05, + "loss": 2.4373, + "step": 12903 + }, + { + "epoch": 1.0414010168670809, + "grad_norm": 0.6467776894569397, + "learning_rate": 5.648162786982427e-05, + "loss": 2.4929, + "step": 12904 + }, + { + "epoch": 1.041481720603664, + "grad_norm": 0.6588063836097717, + "learning_rate": 5.64674148102363e-05, + "loss": 2.4445, + "step": 12905 + }, + { + "epoch": 1.041562424340247, + "grad_norm": 0.6880654096603394, + "learning_rate": 5.6453202835594136e-05, + "loss": 2.4298, + "step": 12906 + }, + { + "epoch": 1.04164312807683, + "grad_norm": 0.7471407055854797, + "learning_rate": 5.6438991946251996e-05, + "loss": 2.4669, + "step": 12907 + }, + { + "epoch": 1.041723831813413, + "grad_norm": 0.7069533467292786, + "learning_rate": 5.6424782142564034e-05, + "loss": 2.4498, + "step": 12908 + }, + { + "epoch": 1.0418045355499959, + "grad_norm": 0.7013602256774902, + "learning_rate": 5.641057342488443e-05, + "loss": 2.4993, + "step": 12909 + }, + { + "epoch": 1.041885239286579, + "grad_norm": 0.6870697736740112, + "learning_rate": 5.6396365793567305e-05, + "loss": 2.5338, + "step": 12910 + }, + { + "epoch": 1.041965943023162, + "grad_norm": 0.6569130420684814, + "learning_rate": 5.638215924896669e-05, + "loss": 2.4538, + "step": 12911 + }, + { + "epoch": 1.042046646759745, + "grad_norm": 0.6900331377983093, + "learning_rate": 5.636795379143669e-05, + "loss": 2.4013, + "step": 12912 + }, + { + "epoch": 1.042127350496328, + "grad_norm": 0.6800071001052856, + "learning_rate": 5.635374942133136e-05, + "loss": 2.4733, + "step": 12913 + }, + { + "epoch": 1.042208054232911, + "grad_norm": 0.703601598739624, + "learning_rate": 5.6339546139004663e-05, + "loss": 2.432, + "step": 12914 + }, + { + "epoch": 1.042288757969494, + "grad_norm": 0.6781988739967346, + "learning_rate": 5.6325343944810594e-05, + "loss": 2.4418, + "step": 12915 + }, + { + "epoch": 1.042369461706077, + "grad_norm": 0.7247167825698853, + "learning_rate": 5.6311142839103125e-05, + "loss": 2.5133, + "step": 12916 + }, + { + "epoch": 1.04245016544266, + "grad_norm": 0.7738155126571655, + "learning_rate": 5.629694282223619e-05, + "loss": 2.5137, + "step": 12917 + }, + { + "epoch": 1.042530869179243, + "grad_norm": 0.74723219871521, + "learning_rate": 5.628274389456367e-05, + "loss": 2.3996, + "step": 12918 + }, + { + "epoch": 1.042611572915826, + "grad_norm": 0.7245466709136963, + "learning_rate": 5.6268546056439456e-05, + "loss": 2.4213, + "step": 12919 + }, + { + "epoch": 1.042692276652409, + "grad_norm": 0.6307608485221863, + "learning_rate": 5.625434930821742e-05, + "loss": 2.4195, + "step": 12920 + }, + { + "epoch": 1.042772980388992, + "grad_norm": 0.7138007879257202, + "learning_rate": 5.6240153650251326e-05, + "loss": 2.463, + "step": 12921 + }, + { + "epoch": 1.042853684125575, + "grad_norm": 0.779659628868103, + "learning_rate": 5.622595908289498e-05, + "loss": 2.4898, + "step": 12922 + }, + { + "epoch": 1.042934387862158, + "grad_norm": 0.7144278287887573, + "learning_rate": 5.621176560650221e-05, + "loss": 2.4083, + "step": 12923 + }, + { + "epoch": 1.043015091598741, + "grad_norm": 0.7724754214286804, + "learning_rate": 5.619757322142667e-05, + "loss": 2.3917, + "step": 12924 + }, + { + "epoch": 1.043095795335324, + "grad_norm": 0.7667245268821716, + "learning_rate": 5.618338192802208e-05, + "loss": 2.4943, + "step": 12925 + }, + { + "epoch": 1.043176499071907, + "grad_norm": 0.6528030037879944, + "learning_rate": 5.616919172664221e-05, + "loss": 2.4323, + "step": 12926 + }, + { + "epoch": 1.04325720280849, + "grad_norm": 0.6790263652801514, + "learning_rate": 5.6155002617640615e-05, + "loss": 2.4304, + "step": 12927 + }, + { + "epoch": 1.043337906545073, + "grad_norm": 0.7554369568824768, + "learning_rate": 5.614081460137097e-05, + "loss": 2.4637, + "step": 12928 + }, + { + "epoch": 1.043418610281656, + "grad_norm": 0.7126293182373047, + "learning_rate": 5.612662767818686e-05, + "loss": 2.4765, + "step": 12929 + }, + { + "epoch": 1.0434993140182391, + "grad_norm": 0.6705749034881592, + "learning_rate": 5.611244184844189e-05, + "loss": 2.4746, + "step": 12930 + }, + { + "epoch": 1.043580017754822, + "grad_norm": 0.6595145463943481, + "learning_rate": 5.609825711248958e-05, + "loss": 2.463, + "step": 12931 + }, + { + "epoch": 1.043660721491405, + "grad_norm": 0.6942049860954285, + "learning_rate": 5.6084073470683476e-05, + "loss": 2.5101, + "step": 12932 + }, + { + "epoch": 1.043741425227988, + "grad_norm": 0.7285810708999634, + "learning_rate": 5.6069890923377087e-05, + "loss": 2.467, + "step": 12933 + }, + { + "epoch": 1.043822128964571, + "grad_norm": 0.7702928185462952, + "learning_rate": 5.605570947092382e-05, + "loss": 2.4998, + "step": 12934 + }, + { + "epoch": 1.0439028327011541, + "grad_norm": 0.6631895899772644, + "learning_rate": 5.604152911367713e-05, + "loss": 2.4277, + "step": 12935 + }, + { + "epoch": 1.043983536437737, + "grad_norm": 0.6447882652282715, + "learning_rate": 5.6027349851990494e-05, + "loss": 2.4868, + "step": 12936 + }, + { + "epoch": 1.04406424017432, + "grad_norm": 0.695160448551178, + "learning_rate": 5.6013171686217205e-05, + "loss": 2.3917, + "step": 12937 + }, + { + "epoch": 1.0441449439109032, + "grad_norm": 0.6579271554946899, + "learning_rate": 5.5998994616710656e-05, + "loss": 2.4245, + "step": 12938 + }, + { + "epoch": 1.044225647647486, + "grad_norm": 0.7053574323654175, + "learning_rate": 5.598481864382419e-05, + "loss": 2.4809, + "step": 12939 + }, + { + "epoch": 1.0443063513840691, + "grad_norm": 0.7008736729621887, + "learning_rate": 5.5970643767911105e-05, + "loss": 2.4481, + "step": 12940 + }, + { + "epoch": 1.044387055120652, + "grad_norm": 0.6577918529510498, + "learning_rate": 5.5956469989324644e-05, + "loss": 2.4211, + "step": 12941 + }, + { + "epoch": 1.044467758857235, + "grad_norm": 0.6662739515304565, + "learning_rate": 5.594229730841815e-05, + "loss": 2.4607, + "step": 12942 + }, + { + "epoch": 1.0445484625938182, + "grad_norm": 0.6637060046195984, + "learning_rate": 5.592812572554471e-05, + "loss": 2.4388, + "step": 12943 + }, + { + "epoch": 1.044629166330401, + "grad_norm": 0.7282097935676575, + "learning_rate": 5.5913955241057605e-05, + "loss": 2.4536, + "step": 12944 + }, + { + "epoch": 1.0447098700669841, + "grad_norm": 0.6470810174942017, + "learning_rate": 5.589978585530997e-05, + "loss": 2.4032, + "step": 12945 + }, + { + "epoch": 1.0447905738035672, + "grad_norm": 0.6958881616592407, + "learning_rate": 5.588561756865498e-05, + "loss": 2.4577, + "step": 12946 + }, + { + "epoch": 1.04487127754015, + "grad_norm": 0.6999812722206116, + "learning_rate": 5.587145038144569e-05, + "loss": 2.454, + "step": 12947 + }, + { + "epoch": 1.0449519812767332, + "grad_norm": 0.6919988989830017, + "learning_rate": 5.58572842940352e-05, + "loss": 2.4505, + "step": 12948 + }, + { + "epoch": 1.045032685013316, + "grad_norm": 0.6813084483146667, + "learning_rate": 5.584311930677659e-05, + "loss": 2.4873, + "step": 12949 + }, + { + "epoch": 1.0451133887498991, + "grad_norm": 0.6587427854537964, + "learning_rate": 5.582895542002286e-05, + "loss": 2.4658, + "step": 12950 + }, + { + "epoch": 1.0451940924864822, + "grad_norm": 0.6942041516304016, + "learning_rate": 5.581479263412703e-05, + "loss": 2.47, + "step": 12951 + }, + { + "epoch": 1.045274796223065, + "grad_norm": 0.7330117225646973, + "learning_rate": 5.58006309494421e-05, + "loss": 2.4826, + "step": 12952 + }, + { + "epoch": 1.0453554999596482, + "grad_norm": 0.7197144031524658, + "learning_rate": 5.578647036632096e-05, + "loss": 2.4425, + "step": 12953 + }, + { + "epoch": 1.045436203696231, + "grad_norm": 0.7442573308944702, + "learning_rate": 5.577231088511654e-05, + "loss": 2.4946, + "step": 12954 + }, + { + "epoch": 1.0455169074328141, + "grad_norm": 0.7039753198623657, + "learning_rate": 5.575815250618179e-05, + "loss": 2.4188, + "step": 12955 + }, + { + "epoch": 1.0455976111693972, + "grad_norm": 0.7374606728553772, + "learning_rate": 5.574399522986951e-05, + "loss": 2.3916, + "step": 12956 + }, + { + "epoch": 1.04567831490598, + "grad_norm": 0.6358140707015991, + "learning_rate": 5.572983905653253e-05, + "loss": 2.4502, + "step": 12957 + }, + { + "epoch": 1.0457590186425632, + "grad_norm": 0.712858259677887, + "learning_rate": 5.5715683986523694e-05, + "loss": 2.4746, + "step": 12958 + }, + { + "epoch": 1.0458397223791462, + "grad_norm": 0.6757933497428894, + "learning_rate": 5.5701530020195756e-05, + "loss": 2.4836, + "step": 12959 + }, + { + "epoch": 1.045920426115729, + "grad_norm": 0.7509831786155701, + "learning_rate": 5.568737715790151e-05, + "loss": 2.4061, + "step": 12960 + }, + { + "epoch": 1.0460011298523122, + "grad_norm": 0.7120335102081299, + "learning_rate": 5.5673225399993646e-05, + "loss": 2.4772, + "step": 12961 + }, + { + "epoch": 1.046081833588895, + "grad_norm": 0.7213751673698425, + "learning_rate": 5.5659074746824924e-05, + "loss": 2.4637, + "step": 12962 + }, + { + "epoch": 1.0461625373254781, + "grad_norm": 0.7161290645599365, + "learning_rate": 5.5644925198747934e-05, + "loss": 2.4552, + "step": 12963 + }, + { + "epoch": 1.0462432410620612, + "grad_norm": 0.7303922772407532, + "learning_rate": 5.563077675611534e-05, + "loss": 2.5091, + "step": 12964 + }, + { + "epoch": 1.046323944798644, + "grad_norm": 0.7051636576652527, + "learning_rate": 5.561662941927981e-05, + "loss": 2.3717, + "step": 12965 + }, + { + "epoch": 1.0464046485352272, + "grad_norm": 0.6880733370780945, + "learning_rate": 5.5602483188593866e-05, + "loss": 2.4205, + "step": 12966 + }, + { + "epoch": 1.0464853522718103, + "grad_norm": 0.6942360401153564, + "learning_rate": 5.558833806441008e-05, + "loss": 2.4601, + "step": 12967 + }, + { + "epoch": 1.0465660560083931, + "grad_norm": 0.7264992594718933, + "learning_rate": 5.5574194047081016e-05, + "loss": 2.4612, + "step": 12968 + }, + { + "epoch": 1.0466467597449762, + "grad_norm": 0.7502472996711731, + "learning_rate": 5.5560051136959166e-05, + "loss": 2.4099, + "step": 12969 + }, + { + "epoch": 1.046727463481559, + "grad_norm": 0.691694438457489, + "learning_rate": 5.5545909334397004e-05, + "loss": 2.5071, + "step": 12970 + }, + { + "epoch": 1.0468081672181422, + "grad_norm": 0.7120653986930847, + "learning_rate": 5.5531768639747026e-05, + "loss": 2.4066, + "step": 12971 + }, + { + "epoch": 1.0468888709547253, + "grad_norm": 0.6501363515853882, + "learning_rate": 5.551762905336159e-05, + "loss": 2.4186, + "step": 12972 + }, + { + "epoch": 1.0469695746913081, + "grad_norm": 0.6924965977668762, + "learning_rate": 5.5503490575593095e-05, + "loss": 2.4864, + "step": 12973 + }, + { + "epoch": 1.0470502784278912, + "grad_norm": 0.6772900819778442, + "learning_rate": 5.548935320679398e-05, + "loss": 2.4101, + "step": 12974 + }, + { + "epoch": 1.0471309821644743, + "grad_norm": 0.6950967311859131, + "learning_rate": 5.54752169473165e-05, + "loss": 2.4893, + "step": 12975 + }, + { + "epoch": 1.0472116859010572, + "grad_norm": 0.6663516163825989, + "learning_rate": 5.5461081797512994e-05, + "loss": 2.4136, + "step": 12976 + }, + { + "epoch": 1.0472923896376403, + "grad_norm": 0.7337449789047241, + "learning_rate": 5.5446947757735754e-05, + "loss": 2.473, + "step": 12977 + }, + { + "epoch": 1.0473730933742231, + "grad_norm": 0.6808840036392212, + "learning_rate": 5.543281482833709e-05, + "loss": 2.4473, + "step": 12978 + }, + { + "epoch": 1.0474537971108062, + "grad_norm": 0.6472508907318115, + "learning_rate": 5.5418683009669124e-05, + "loss": 2.4077, + "step": 12979 + }, + { + "epoch": 1.0475345008473893, + "grad_norm": 0.6904192566871643, + "learning_rate": 5.540455230208409e-05, + "loss": 2.482, + "step": 12980 + }, + { + "epoch": 1.0476152045839722, + "grad_norm": 0.6781610250473022, + "learning_rate": 5.5390422705934264e-05, + "loss": 2.4458, + "step": 12981 + }, + { + "epoch": 1.0476959083205553, + "grad_norm": 0.7130050659179688, + "learning_rate": 5.5376294221571666e-05, + "loss": 2.5136, + "step": 12982 + }, + { + "epoch": 1.0477766120571383, + "grad_norm": 0.7727184891700745, + "learning_rate": 5.536216684934846e-05, + "loss": 2.5346, + "step": 12983 + }, + { + "epoch": 1.0478573157937212, + "grad_norm": 0.7177208662033081, + "learning_rate": 5.534804058961679e-05, + "loss": 2.4153, + "step": 12984 + }, + { + "epoch": 1.0479380195303043, + "grad_norm": 0.7333023548126221, + "learning_rate": 5.5333915442728634e-05, + "loss": 2.4171, + "step": 12985 + }, + { + "epoch": 1.0480187232668872, + "grad_norm": 0.658423125743866, + "learning_rate": 5.5319791409036046e-05, + "loss": 2.446, + "step": 12986 + }, + { + "epoch": 1.0480994270034703, + "grad_norm": 0.8305184841156006, + "learning_rate": 5.5305668488891114e-05, + "loss": 2.5026, + "step": 12987 + }, + { + "epoch": 1.0481801307400533, + "grad_norm": 0.7083305716514587, + "learning_rate": 5.52915466826457e-05, + "loss": 2.5366, + "step": 12988 + }, + { + "epoch": 1.0482608344766362, + "grad_norm": 0.7924454212188721, + "learning_rate": 5.5277425990651824e-05, + "loss": 2.528, + "step": 12989 + }, + { + "epoch": 1.0483415382132193, + "grad_norm": 0.633376955986023, + "learning_rate": 5.5263306413261384e-05, + "loss": 2.4442, + "step": 12990 + }, + { + "epoch": 1.0484222419498024, + "grad_norm": 0.7387240529060364, + "learning_rate": 5.5249187950826295e-05, + "loss": 2.4761, + "step": 12991 + }, + { + "epoch": 1.0485029456863852, + "grad_norm": 0.6796224117279053, + "learning_rate": 5.523507060369843e-05, + "loss": 2.4828, + "step": 12992 + }, + { + "epoch": 1.0485836494229683, + "grad_norm": 0.6925581097602844, + "learning_rate": 5.5220954372229604e-05, + "loss": 2.4861, + "step": 12993 + }, + { + "epoch": 1.0486643531595512, + "grad_norm": 0.6854318380355835, + "learning_rate": 5.5206839256771704e-05, + "loss": 2.473, + "step": 12994 + }, + { + "epoch": 1.0487450568961343, + "grad_norm": 0.706375241279602, + "learning_rate": 5.519272525767643e-05, + "loss": 2.4284, + "step": 12995 + }, + { + "epoch": 1.0488257606327174, + "grad_norm": 0.6917428374290466, + "learning_rate": 5.517861237529556e-05, + "loss": 2.4702, + "step": 12996 + }, + { + "epoch": 1.0489064643693002, + "grad_norm": 0.6903818845748901, + "learning_rate": 5.516450060998086e-05, + "loss": 2.4679, + "step": 12997 + }, + { + "epoch": 1.0489871681058833, + "grad_norm": 0.6403356194496155, + "learning_rate": 5.515038996208398e-05, + "loss": 2.396, + "step": 12998 + }, + { + "epoch": 1.0490678718424662, + "grad_norm": 0.6491792798042297, + "learning_rate": 5.513628043195662e-05, + "loss": 2.4543, + "step": 12999 + }, + { + "epoch": 1.0491485755790493, + "grad_norm": 0.687303900718689, + "learning_rate": 5.512217201995043e-05, + "loss": 2.4716, + "step": 13000 + }, + { + "epoch": 1.0491485755790493, + "eval_loss": 2.4177169799804688, + "eval_runtime": 763.9215, + "eval_samples_per_second": 3.43, + "eval_steps_per_second": 0.572, + "step": 13000 + }, + { + "epoch": 1.0492292793156324, + "grad_norm": 0.7020761370658875, + "learning_rate": 5.510806472641701e-05, + "loss": 2.3591, + "step": 13001 + }, + { + "epoch": 1.0493099830522152, + "grad_norm": 0.6978075504302979, + "learning_rate": 5.509395855170798e-05, + "loss": 2.4585, + "step": 13002 + }, + { + "epoch": 1.0493906867887983, + "grad_norm": 0.7327752113342285, + "learning_rate": 5.5079853496174925e-05, + "loss": 2.5265, + "step": 13003 + }, + { + "epoch": 1.0494713905253814, + "grad_norm": 0.7552505135536194, + "learning_rate": 5.50657495601693e-05, + "loss": 2.4821, + "step": 13004 + }, + { + "epoch": 1.0495520942619643, + "grad_norm": 0.7100770473480225, + "learning_rate": 5.5051646744042664e-05, + "loss": 2.4566, + "step": 13005 + }, + { + "epoch": 1.0496327979985474, + "grad_norm": 0.7008209824562073, + "learning_rate": 5.503754504814651e-05, + "loss": 2.4476, + "step": 13006 + }, + { + "epoch": 1.0497135017351304, + "grad_norm": 0.640724241733551, + "learning_rate": 5.502344447283223e-05, + "loss": 2.437, + "step": 13007 + }, + { + "epoch": 1.0497942054717133, + "grad_norm": 0.7064981460571289, + "learning_rate": 5.5009345018451297e-05, + "loss": 2.5129, + "step": 13008 + }, + { + "epoch": 1.0498749092082964, + "grad_norm": 0.6729782223701477, + "learning_rate": 5.49952466853551e-05, + "loss": 2.4867, + "step": 13009 + }, + { + "epoch": 1.0499556129448793, + "grad_norm": 0.7245302200317383, + "learning_rate": 5.4981149473894966e-05, + "loss": 2.4485, + "step": 13010 + }, + { + "epoch": 1.0500363166814624, + "grad_norm": 0.6686248779296875, + "learning_rate": 5.4967053384422294e-05, + "loss": 2.4314, + "step": 13011 + }, + { + "epoch": 1.0501170204180454, + "grad_norm": 0.6790863871574402, + "learning_rate": 5.495295841728836e-05, + "loss": 2.4847, + "step": 13012 + }, + { + "epoch": 1.0501977241546283, + "grad_norm": 0.6516931653022766, + "learning_rate": 5.49388645728445e-05, + "loss": 2.4306, + "step": 13013 + }, + { + "epoch": 1.0502784278912114, + "grad_norm": 0.6967600584030151, + "learning_rate": 5.492477185144189e-05, + "loss": 2.4942, + "step": 13014 + }, + { + "epoch": 1.0503591316277943, + "grad_norm": 0.696246325969696, + "learning_rate": 5.491068025343178e-05, + "loss": 2.4647, + "step": 13015 + }, + { + "epoch": 1.0504398353643774, + "grad_norm": 0.6962751150131226, + "learning_rate": 5.489658977916543e-05, + "loss": 2.5095, + "step": 13016 + }, + { + "epoch": 1.0505205391009604, + "grad_norm": 0.6982631087303162, + "learning_rate": 5.488250042899392e-05, + "loss": 2.4327, + "step": 13017 + }, + { + "epoch": 1.0506012428375433, + "grad_norm": 0.6932644844055176, + "learning_rate": 5.486841220326845e-05, + "loss": 2.4777, + "step": 13018 + }, + { + "epoch": 1.0506819465741264, + "grad_norm": 0.6923339366912842, + "learning_rate": 5.485432510234012e-05, + "loss": 2.4321, + "step": 13019 + }, + { + "epoch": 1.0507626503107095, + "grad_norm": 0.7445859313011169, + "learning_rate": 5.4840239126560015e-05, + "loss": 2.4425, + "step": 13020 + }, + { + "epoch": 1.0508433540472923, + "grad_norm": 0.7122324705123901, + "learning_rate": 5.48261542762792e-05, + "loss": 2.4545, + "step": 13021 + }, + { + "epoch": 1.0509240577838754, + "grad_norm": 0.734779417514801, + "learning_rate": 5.4812070551848736e-05, + "loss": 2.4764, + "step": 13022 + }, + { + "epoch": 1.0510047615204583, + "grad_norm": 0.6544109582901001, + "learning_rate": 5.4797987953619566e-05, + "loss": 2.4492, + "step": 13023 + }, + { + "epoch": 1.0510854652570414, + "grad_norm": 0.6366097331047058, + "learning_rate": 5.4783906481942704e-05, + "loss": 2.4695, + "step": 13024 + }, + { + "epoch": 1.0511661689936245, + "grad_norm": 0.6966270804405212, + "learning_rate": 5.476982613716908e-05, + "loss": 2.4505, + "step": 13025 + }, + { + "epoch": 1.0512468727302073, + "grad_norm": 0.7010120153427124, + "learning_rate": 5.4755746919649665e-05, + "loss": 2.4545, + "step": 13026 + }, + { + "epoch": 1.0513275764667904, + "grad_norm": 0.6704719662666321, + "learning_rate": 5.474166882973526e-05, + "loss": 2.3899, + "step": 13027 + }, + { + "epoch": 1.0514082802033735, + "grad_norm": 0.757152259349823, + "learning_rate": 5.472759186777679e-05, + "loss": 2.5112, + "step": 13028 + }, + { + "epoch": 1.0514889839399564, + "grad_norm": 0.6668868660926819, + "learning_rate": 5.471351603412509e-05, + "loss": 2.4797, + "step": 13029 + }, + { + "epoch": 1.0515696876765395, + "grad_norm": 0.7919496893882751, + "learning_rate": 5.4699441329130887e-05, + "loss": 2.4874, + "step": 13030 + }, + { + "epoch": 1.0516503914131223, + "grad_norm": 0.7595484852790833, + "learning_rate": 5.468536775314506e-05, + "loss": 2.4621, + "step": 13031 + }, + { + "epoch": 1.0517310951497054, + "grad_norm": 0.6575995683670044, + "learning_rate": 5.467129530651835e-05, + "loss": 2.4474, + "step": 13032 + }, + { + "epoch": 1.0518117988862885, + "grad_norm": 0.6817733645439148, + "learning_rate": 5.4657223989601425e-05, + "loss": 2.4329, + "step": 13033 + }, + { + "epoch": 1.0518925026228714, + "grad_norm": 0.722882091999054, + "learning_rate": 5.464315380274501e-05, + "loss": 2.4544, + "step": 13034 + }, + { + "epoch": 1.0519732063594545, + "grad_norm": 0.6957377791404724, + "learning_rate": 5.4629084746299796e-05, + "loss": 2.5669, + "step": 13035 + }, + { + "epoch": 1.0520539100960375, + "grad_norm": 0.6749420166015625, + "learning_rate": 5.461501682061636e-05, + "loss": 2.5053, + "step": 13036 + }, + { + "epoch": 1.0521346138326204, + "grad_norm": 0.8158369064331055, + "learning_rate": 5.4600950026045326e-05, + "loss": 2.429, + "step": 13037 + }, + { + "epoch": 1.0522153175692035, + "grad_norm": 0.6960736513137817, + "learning_rate": 5.458688436293735e-05, + "loss": 2.4731, + "step": 13038 + }, + { + "epoch": 1.0522960213057864, + "grad_norm": 0.6686301231384277, + "learning_rate": 5.457281983164287e-05, + "loss": 2.4495, + "step": 13039 + }, + { + "epoch": 1.0523767250423695, + "grad_norm": 0.6691476106643677, + "learning_rate": 5.455875643251248e-05, + "loss": 2.4329, + "step": 13040 + }, + { + "epoch": 1.0524574287789525, + "grad_norm": 0.7737297415733337, + "learning_rate": 5.454469416589666e-05, + "loss": 2.4664, + "step": 13041 + }, + { + "epoch": 1.0525381325155354, + "grad_norm": 0.7848188281059265, + "learning_rate": 5.453063303214588e-05, + "loss": 2.4799, + "step": 13042 + }, + { + "epoch": 1.0526188362521185, + "grad_norm": 0.7831119894981384, + "learning_rate": 5.45165730316106e-05, + "loss": 2.5076, + "step": 13043 + }, + { + "epoch": 1.0526995399887016, + "grad_norm": 0.691635012626648, + "learning_rate": 5.4502514164641196e-05, + "loss": 2.4866, + "step": 13044 + }, + { + "epoch": 1.0527802437252844, + "grad_norm": 0.6667110919952393, + "learning_rate": 5.4488456431588106e-05, + "loss": 2.4162, + "step": 13045 + }, + { + "epoch": 1.0528609474618675, + "grad_norm": 0.7201905846595764, + "learning_rate": 5.447439983280163e-05, + "loss": 2.498, + "step": 13046 + }, + { + "epoch": 1.0529416511984504, + "grad_norm": 0.8538106083869934, + "learning_rate": 5.44603443686321e-05, + "loss": 2.4477, + "step": 13047 + }, + { + "epoch": 1.0530223549350335, + "grad_norm": 0.6661962270736694, + "learning_rate": 5.444629003942987e-05, + "loss": 2.5253, + "step": 13048 + }, + { + "epoch": 1.0531030586716166, + "grad_norm": 0.7239834666252136, + "learning_rate": 5.4432236845545146e-05, + "loss": 2.4786, + "step": 13049 + }, + { + "epoch": 1.0531837624081994, + "grad_norm": 0.7328412532806396, + "learning_rate": 5.4418184787328186e-05, + "loss": 2.4841, + "step": 13050 + }, + { + "epoch": 1.0532644661447825, + "grad_norm": 0.6395559310913086, + "learning_rate": 5.440413386512922e-05, + "loss": 2.3544, + "step": 13051 + }, + { + "epoch": 1.0533451698813656, + "grad_norm": 0.6632471084594727, + "learning_rate": 5.43900840792984e-05, + "loss": 2.4753, + "step": 13052 + }, + { + "epoch": 1.0534258736179485, + "grad_norm": 0.7262828350067139, + "learning_rate": 5.4376035430185935e-05, + "loss": 2.4162, + "step": 13053 + }, + { + "epoch": 1.0535065773545316, + "grad_norm": 0.7897952198982239, + "learning_rate": 5.436198791814196e-05, + "loss": 2.4571, + "step": 13054 + }, + { + "epoch": 1.0535872810911144, + "grad_norm": 0.7281489372253418, + "learning_rate": 5.434794154351651e-05, + "loss": 2.4531, + "step": 13055 + }, + { + "epoch": 1.0536679848276975, + "grad_norm": 0.7322356700897217, + "learning_rate": 5.4333896306659694e-05, + "loss": 2.4102, + "step": 13056 + }, + { + "epoch": 1.0537486885642806, + "grad_norm": 0.7657945156097412, + "learning_rate": 5.4319852207921554e-05, + "loss": 2.4526, + "step": 13057 + }, + { + "epoch": 1.0538293923008635, + "grad_norm": 0.6732973456382751, + "learning_rate": 5.430580924765214e-05, + "loss": 2.4516, + "step": 13058 + }, + { + "epoch": 1.0539100960374466, + "grad_norm": 0.663398027420044, + "learning_rate": 5.429176742620137e-05, + "loss": 2.4437, + "step": 13059 + }, + { + "epoch": 1.0539907997740294, + "grad_norm": 0.6363258957862854, + "learning_rate": 5.4277726743919244e-05, + "loss": 2.414, + "step": 13060 + }, + { + "epoch": 1.0540715035106125, + "grad_norm": 0.6600647568702698, + "learning_rate": 5.426368720115568e-05, + "loss": 2.4319, + "step": 13061 + }, + { + "epoch": 1.0541522072471956, + "grad_norm": 0.6941983699798584, + "learning_rate": 5.4249648798260574e-05, + "loss": 2.5247, + "step": 13062 + }, + { + "epoch": 1.0542329109837785, + "grad_norm": 0.7419719099998474, + "learning_rate": 5.423561153558383e-05, + "loss": 2.5088, + "step": 13063 + }, + { + "epoch": 1.0543136147203616, + "grad_norm": 0.708073079586029, + "learning_rate": 5.4221575413475326e-05, + "loss": 2.4037, + "step": 13064 + }, + { + "epoch": 1.0543943184569446, + "grad_norm": 0.7081628441810608, + "learning_rate": 5.4207540432284764e-05, + "loss": 2.4556, + "step": 13065 + }, + { + "epoch": 1.0544750221935275, + "grad_norm": 0.7058689594268799, + "learning_rate": 5.419350659236201e-05, + "loss": 2.4244, + "step": 13066 + }, + { + "epoch": 1.0545557259301106, + "grad_norm": 0.6858707070350647, + "learning_rate": 5.417947389405684e-05, + "loss": 2.4431, + "step": 13067 + }, + { + "epoch": 1.0546364296666935, + "grad_norm": 0.6769983768463135, + "learning_rate": 5.416544233771893e-05, + "loss": 2.4257, + "step": 13068 + }, + { + "epoch": 1.0547171334032766, + "grad_norm": 0.7128089070320129, + "learning_rate": 5.4151411923698e-05, + "loss": 2.4558, + "step": 13069 + }, + { + "epoch": 1.0547978371398596, + "grad_norm": 0.6419198513031006, + "learning_rate": 5.413738265234374e-05, + "loss": 2.4421, + "step": 13070 + }, + { + "epoch": 1.0548785408764425, + "grad_norm": 0.760848879814148, + "learning_rate": 5.4123354524005784e-05, + "loss": 2.4427, + "step": 13071 + }, + { + "epoch": 1.0549592446130256, + "grad_norm": 0.6749173998832703, + "learning_rate": 5.410932753903377e-05, + "loss": 2.4902, + "step": 13072 + }, + { + "epoch": 1.0550399483496087, + "grad_norm": 0.6908800601959229, + "learning_rate": 5.4095301697777265e-05, + "loss": 2.4219, + "step": 13073 + }, + { + "epoch": 1.0551206520861915, + "grad_norm": 0.6779965758323669, + "learning_rate": 5.408127700058587e-05, + "loss": 2.4533, + "step": 13074 + }, + { + "epoch": 1.0552013558227746, + "grad_norm": 0.6832355260848999, + "learning_rate": 5.406725344780906e-05, + "loss": 2.418, + "step": 13075 + }, + { + "epoch": 1.0552820595593575, + "grad_norm": 0.6766698956489563, + "learning_rate": 5.4053231039796357e-05, + "loss": 2.4493, + "step": 13076 + }, + { + "epoch": 1.0553627632959406, + "grad_norm": 0.7256276607513428, + "learning_rate": 5.4039209776897285e-05, + "loss": 2.4126, + "step": 13077 + }, + { + "epoch": 1.0554434670325237, + "grad_norm": 0.6687275171279907, + "learning_rate": 5.4025189659461196e-05, + "loss": 2.435, + "step": 13078 + }, + { + "epoch": 1.0555241707691065, + "grad_norm": 0.6800444722175598, + "learning_rate": 5.401117068783758e-05, + "loss": 2.4608, + "step": 13079 + }, + { + "epoch": 1.0556048745056896, + "grad_norm": 0.6947116851806641, + "learning_rate": 5.399715286237583e-05, + "loss": 2.4908, + "step": 13080 + }, + { + "epoch": 1.0556855782422727, + "grad_norm": 0.6907915472984314, + "learning_rate": 5.398313618342521e-05, + "loss": 2.4805, + "step": 13081 + }, + { + "epoch": 1.0557662819788556, + "grad_norm": 0.7429100275039673, + "learning_rate": 5.396912065133516e-05, + "loss": 2.458, + "step": 13082 + }, + { + "epoch": 1.0558469857154387, + "grad_norm": 0.7186924815177917, + "learning_rate": 5.3955106266454994e-05, + "loss": 2.4924, + "step": 13083 + }, + { + "epoch": 1.0559276894520215, + "grad_norm": 0.7017999887466431, + "learning_rate": 5.394109302913391e-05, + "loss": 2.4103, + "step": 13084 + }, + { + "epoch": 1.0560083931886046, + "grad_norm": 0.7318955659866333, + "learning_rate": 5.392708093972117e-05, + "loss": 2.4424, + "step": 13085 + }, + { + "epoch": 1.0560890969251877, + "grad_norm": 0.6278600692749023, + "learning_rate": 5.391306999856602e-05, + "loss": 2.4433, + "step": 13086 + }, + { + "epoch": 1.0561698006617706, + "grad_norm": 0.6895800232887268, + "learning_rate": 5.389906020601767e-05, + "loss": 2.4275, + "step": 13087 + }, + { + "epoch": 1.0562505043983537, + "grad_norm": 0.7197345495223999, + "learning_rate": 5.388505156242522e-05, + "loss": 2.4309, + "step": 13088 + }, + { + "epoch": 1.0563312081349367, + "grad_norm": 0.636433482170105, + "learning_rate": 5.3871044068137824e-05, + "loss": 2.4258, + "step": 13089 + }, + { + "epoch": 1.0564119118715196, + "grad_norm": 0.6884748339653015, + "learning_rate": 5.3857037723504634e-05, + "loss": 2.4543, + "step": 13090 + }, + { + "epoch": 1.0564926156081027, + "grad_norm": 0.7277036309242249, + "learning_rate": 5.384303252887464e-05, + "loss": 2.4911, + "step": 13091 + }, + { + "epoch": 1.0565733193446856, + "grad_norm": 0.6940809488296509, + "learning_rate": 5.38290284845969e-05, + "loss": 2.4112, + "step": 13092 + }, + { + "epoch": 1.0566540230812687, + "grad_norm": 0.6729177236557007, + "learning_rate": 5.3815025591020526e-05, + "loss": 2.4394, + "step": 13093 + }, + { + "epoch": 1.0567347268178517, + "grad_norm": 0.6941854357719421, + "learning_rate": 5.3801023848494416e-05, + "loss": 2.4263, + "step": 13094 + }, + { + "epoch": 1.0568154305544346, + "grad_norm": 0.7046812772750854, + "learning_rate": 5.3787023257367554e-05, + "loss": 2.5196, + "step": 13095 + }, + { + "epoch": 1.0568961342910177, + "grad_norm": 0.6896177530288696, + "learning_rate": 5.377302381798891e-05, + "loss": 2.4178, + "step": 13096 + }, + { + "epoch": 1.0569768380276008, + "grad_norm": 0.6693699955940247, + "learning_rate": 5.375902553070731e-05, + "loss": 2.4908, + "step": 13097 + }, + { + "epoch": 1.0570575417641837, + "grad_norm": 0.6751677989959717, + "learning_rate": 5.3745028395871674e-05, + "loss": 2.4222, + "step": 13098 + }, + { + "epoch": 1.0571382455007667, + "grad_norm": 0.7666265368461609, + "learning_rate": 5.373103241383088e-05, + "loss": 2.4965, + "step": 13099 + }, + { + "epoch": 1.0572189492373496, + "grad_norm": 0.8069329857826233, + "learning_rate": 5.3717037584933674e-05, + "loss": 2.4988, + "step": 13100 + }, + { + "epoch": 1.0572996529739327, + "grad_norm": 0.7160749435424805, + "learning_rate": 5.370304390952887e-05, + "loss": 2.4311, + "step": 13101 + }, + { + "epoch": 1.0573803567105158, + "grad_norm": 0.6936448812484741, + "learning_rate": 5.368905138796523e-05, + "loss": 2.4877, + "step": 13102 + }, + { + "epoch": 1.0574610604470986, + "grad_norm": 0.7202793955802917, + "learning_rate": 5.3675060020591494e-05, + "loss": 2.4841, + "step": 13103 + }, + { + "epoch": 1.0575417641836817, + "grad_norm": 0.7750168442726135, + "learning_rate": 5.366106980775636e-05, + "loss": 2.4828, + "step": 13104 + }, + { + "epoch": 1.0576224679202646, + "grad_norm": 0.7079972624778748, + "learning_rate": 5.364708074980849e-05, + "loss": 2.4912, + "step": 13105 + }, + { + "epoch": 1.0577031716568477, + "grad_norm": 0.704066276550293, + "learning_rate": 5.363309284709657e-05, + "loss": 2.4731, + "step": 13106 + }, + { + "epoch": 1.0577838753934308, + "grad_norm": 0.7040490508079529, + "learning_rate": 5.361910609996915e-05, + "loss": 2.3811, + "step": 13107 + }, + { + "epoch": 1.0578645791300136, + "grad_norm": 0.6669453978538513, + "learning_rate": 5.360512050877484e-05, + "loss": 2.5372, + "step": 13108 + }, + { + "epoch": 1.0579452828665967, + "grad_norm": 0.7197996973991394, + "learning_rate": 5.359113607386226e-05, + "loss": 2.4612, + "step": 13109 + }, + { + "epoch": 1.0580259866031798, + "grad_norm": 0.7192320823669434, + "learning_rate": 5.3577152795579824e-05, + "loss": 2.4636, + "step": 13110 + }, + { + "epoch": 1.0581066903397627, + "grad_norm": 0.6907937526702881, + "learning_rate": 5.35631706742761e-05, + "loss": 2.4791, + "step": 13111 + }, + { + "epoch": 1.0581873940763458, + "grad_norm": 0.687035083770752, + "learning_rate": 5.354918971029954e-05, + "loss": 2.4706, + "step": 13112 + }, + { + "epoch": 1.0582680978129286, + "grad_norm": 0.6666533350944519, + "learning_rate": 5.353520990399861e-05, + "loss": 2.4789, + "step": 13113 + }, + { + "epoch": 1.0583488015495117, + "grad_norm": 0.6261809468269348, + "learning_rate": 5.35212312557217e-05, + "loss": 2.4485, + "step": 13114 + }, + { + "epoch": 1.0584295052860948, + "grad_norm": 0.6740814447402954, + "learning_rate": 5.350725376581725e-05, + "loss": 2.47, + "step": 13115 + }, + { + "epoch": 1.0585102090226777, + "grad_norm": 0.7634154558181763, + "learning_rate": 5.3493277434633526e-05, + "loss": 2.4685, + "step": 13116 + }, + { + "epoch": 1.0585909127592608, + "grad_norm": 0.6674611568450928, + "learning_rate": 5.34793022625189e-05, + "loss": 2.4362, + "step": 13117 + }, + { + "epoch": 1.0586716164958438, + "grad_norm": 0.7584757804870605, + "learning_rate": 5.346532824982167e-05, + "loss": 2.499, + "step": 13118 + }, + { + "epoch": 1.0587523202324267, + "grad_norm": 0.6453456282615662, + "learning_rate": 5.345135539689015e-05, + "loss": 2.4341, + "step": 13119 + }, + { + "epoch": 1.0588330239690098, + "grad_norm": 0.70013427734375, + "learning_rate": 5.343738370407247e-05, + "loss": 2.3448, + "step": 13120 + }, + { + "epoch": 1.0589137277055927, + "grad_norm": 0.6763362884521484, + "learning_rate": 5.342341317171693e-05, + "loss": 2.4234, + "step": 13121 + }, + { + "epoch": 1.0589944314421758, + "grad_norm": 0.6896576881408691, + "learning_rate": 5.3409443800171664e-05, + "loss": 2.4753, + "step": 13122 + }, + { + "epoch": 1.0590751351787588, + "grad_norm": 0.6984997987747192, + "learning_rate": 5.339547558978486e-05, + "loss": 2.4581, + "step": 13123 + }, + { + "epoch": 1.0591558389153417, + "grad_norm": 0.7276118993759155, + "learning_rate": 5.338150854090462e-05, + "loss": 2.4765, + "step": 13124 + }, + { + "epoch": 1.0592365426519248, + "grad_norm": 0.6943252086639404, + "learning_rate": 5.336754265387911e-05, + "loss": 2.4514, + "step": 13125 + }, + { + "epoch": 1.0593172463885079, + "grad_norm": 0.7070014476776123, + "learning_rate": 5.335357792905628e-05, + "loss": 2.4365, + "step": 13126 + }, + { + "epoch": 1.0593979501250907, + "grad_norm": 0.6887189149856567, + "learning_rate": 5.333961436678422e-05, + "loss": 2.4834, + "step": 13127 + }, + { + "epoch": 1.0594786538616738, + "grad_norm": 0.8150162696838379, + "learning_rate": 5.332565196741098e-05, + "loss": 2.4474, + "step": 13128 + }, + { + "epoch": 1.0595593575982567, + "grad_norm": 0.6681316494941711, + "learning_rate": 5.331169073128447e-05, + "loss": 2.4888, + "step": 13129 + }, + { + "epoch": 1.0596400613348398, + "grad_norm": 0.6696690320968628, + "learning_rate": 5.329773065875267e-05, + "loss": 2.3874, + "step": 13130 + }, + { + "epoch": 1.0597207650714229, + "grad_norm": 0.729807436466217, + "learning_rate": 5.32837717501635e-05, + "loss": 2.4442, + "step": 13131 + }, + { + "epoch": 1.0598014688080057, + "grad_norm": 0.6959047913551331, + "learning_rate": 5.326981400586486e-05, + "loss": 2.4697, + "step": 13132 + }, + { + "epoch": 1.0598821725445888, + "grad_norm": 0.667294442653656, + "learning_rate": 5.3255857426204606e-05, + "loss": 2.3986, + "step": 13133 + }, + { + "epoch": 1.059962876281172, + "grad_norm": 0.6953842639923096, + "learning_rate": 5.3241902011530566e-05, + "loss": 2.396, + "step": 13134 + }, + { + "epoch": 1.0600435800177548, + "grad_norm": 0.6544597148895264, + "learning_rate": 5.32279477621906e-05, + "loss": 2.426, + "step": 13135 + }, + { + "epoch": 1.0601242837543379, + "grad_norm": 0.708017885684967, + "learning_rate": 5.321399467853241e-05, + "loss": 2.4931, + "step": 13136 + }, + { + "epoch": 1.0602049874909207, + "grad_norm": 0.6669809818267822, + "learning_rate": 5.3200042760903764e-05, + "loss": 2.4354, + "step": 13137 + }, + { + "epoch": 1.0602856912275038, + "grad_norm": 1.0144098997116089, + "learning_rate": 5.3186092009652435e-05, + "loss": 2.4803, + "step": 13138 + }, + { + "epoch": 1.060366394964087, + "grad_norm": 0.7213768362998962, + "learning_rate": 5.317214242512601e-05, + "loss": 2.4318, + "step": 13139 + }, + { + "epoch": 1.0604470987006698, + "grad_norm": 0.6429069638252258, + "learning_rate": 5.315819400767223e-05, + "loss": 2.458, + "step": 13140 + }, + { + "epoch": 1.0605278024372529, + "grad_norm": 0.6480485796928406, + "learning_rate": 5.3144246757638714e-05, + "loss": 2.4586, + "step": 13141 + }, + { + "epoch": 1.060608506173836, + "grad_norm": 0.7037697434425354, + "learning_rate": 5.3130300675373035e-05, + "loss": 2.4698, + "step": 13142 + }, + { + "epoch": 1.0606892099104188, + "grad_norm": 0.7307559251785278, + "learning_rate": 5.3116355761222725e-05, + "loss": 2.4027, + "step": 13143 + }, + { + "epoch": 1.060769913647002, + "grad_norm": 0.6684615612030029, + "learning_rate": 5.310241201553547e-05, + "loss": 2.478, + "step": 13144 + }, + { + "epoch": 1.0608506173835848, + "grad_norm": 0.7018016576766968, + "learning_rate": 5.308846943865866e-05, + "loss": 2.4229, + "step": 13145 + }, + { + "epoch": 1.0609313211201679, + "grad_norm": 0.7538621425628662, + "learning_rate": 5.307452803093982e-05, + "loss": 2.5201, + "step": 13146 + }, + { + "epoch": 1.061012024856751, + "grad_norm": 0.6957963109016418, + "learning_rate": 5.306058779272645e-05, + "loss": 2.4233, + "step": 13147 + }, + { + "epoch": 1.0610927285933338, + "grad_norm": 0.6280590295791626, + "learning_rate": 5.304664872436588e-05, + "loss": 2.5117, + "step": 13148 + }, + { + "epoch": 1.061173432329917, + "grad_norm": 0.6937280297279358, + "learning_rate": 5.3032710826205564e-05, + "loss": 2.4889, + "step": 13149 + }, + { + "epoch": 1.0612541360664998, + "grad_norm": 0.6750391125679016, + "learning_rate": 5.3018774098592884e-05, + "loss": 2.4472, + "step": 13150 + }, + { + "epoch": 1.0613348398030829, + "grad_norm": 0.6931902766227722, + "learning_rate": 5.300483854187519e-05, + "loss": 2.3883, + "step": 13151 + }, + { + "epoch": 1.061415543539666, + "grad_norm": 0.6982774138450623, + "learning_rate": 5.2990904156399726e-05, + "loss": 2.4688, + "step": 13152 + }, + { + "epoch": 1.0614962472762488, + "grad_norm": 0.6873522996902466, + "learning_rate": 5.297697094251382e-05, + "loss": 2.4818, + "step": 13153 + }, + { + "epoch": 1.061576951012832, + "grad_norm": 0.635377049446106, + "learning_rate": 5.296303890056471e-05, + "loss": 2.3906, + "step": 13154 + }, + { + "epoch": 1.061657654749415, + "grad_norm": 0.6368159651756287, + "learning_rate": 5.294910803089963e-05, + "loss": 2.4714, + "step": 13155 + }, + { + "epoch": 1.0617383584859978, + "grad_norm": 0.7147238254547119, + "learning_rate": 5.293517833386576e-05, + "loss": 2.4746, + "step": 13156 + }, + { + "epoch": 1.061819062222581, + "grad_norm": 0.742189884185791, + "learning_rate": 5.2921249809810326e-05, + "loss": 2.3913, + "step": 13157 + }, + { + "epoch": 1.061899765959164, + "grad_norm": 0.6665734648704529, + "learning_rate": 5.290732245908038e-05, + "loss": 2.4263, + "step": 13158 + }, + { + "epoch": 1.0619804696957469, + "grad_norm": 0.6894757747650146, + "learning_rate": 5.2893396282023055e-05, + "loss": 2.4204, + "step": 13159 + }, + { + "epoch": 1.06206117343233, + "grad_norm": 0.6394561529159546, + "learning_rate": 5.287947127898546e-05, + "loss": 2.4183, + "step": 13160 + }, + { + "epoch": 1.0621418771689128, + "grad_norm": 0.7422548532485962, + "learning_rate": 5.2865547450314576e-05, + "loss": 2.4454, + "step": 13161 + }, + { + "epoch": 1.062222580905496, + "grad_norm": 0.7486133575439453, + "learning_rate": 5.285162479635748e-05, + "loss": 2.4856, + "step": 13162 + }, + { + "epoch": 1.062303284642079, + "grad_norm": 0.6743031144142151, + "learning_rate": 5.283770331746112e-05, + "loss": 2.4318, + "step": 13163 + }, + { + "epoch": 1.0623839883786619, + "grad_norm": 0.6461686491966248, + "learning_rate": 5.282378301397248e-05, + "loss": 2.4133, + "step": 13164 + }, + { + "epoch": 1.062464692115245, + "grad_norm": 0.6745431423187256, + "learning_rate": 5.28098638862385e-05, + "loss": 2.4463, + "step": 13165 + }, + { + "epoch": 1.0625453958518278, + "grad_norm": 0.6646310687065125, + "learning_rate": 5.279594593460606e-05, + "loss": 2.4211, + "step": 13166 + }, + { + "epoch": 1.062626099588411, + "grad_norm": 0.6789249777793884, + "learning_rate": 5.278202915942207e-05, + "loss": 2.4832, + "step": 13167 + }, + { + "epoch": 1.062706803324994, + "grad_norm": 0.7082679867744446, + "learning_rate": 5.2768113561033326e-05, + "loss": 2.4303, + "step": 13168 + }, + { + "epoch": 1.0627875070615769, + "grad_norm": 0.6875587701797485, + "learning_rate": 5.275419913978664e-05, + "loss": 2.4601, + "step": 13169 + }, + { + "epoch": 1.06286821079816, + "grad_norm": 0.6556203961372375, + "learning_rate": 5.274028589602886e-05, + "loss": 2.4359, + "step": 13170 + }, + { + "epoch": 1.062948914534743, + "grad_norm": 0.7280015349388123, + "learning_rate": 5.272637383010666e-05, + "loss": 2.4999, + "step": 13171 + }, + { + "epoch": 1.063029618271326, + "grad_norm": 0.664654016494751, + "learning_rate": 5.271246294236678e-05, + "loss": 2.3951, + "step": 13172 + }, + { + "epoch": 1.063110322007909, + "grad_norm": 0.6941719055175781, + "learning_rate": 5.2698553233155945e-05, + "loss": 2.45, + "step": 13173 + }, + { + "epoch": 1.0631910257444919, + "grad_norm": 0.7212931513786316, + "learning_rate": 5.268464470282082e-05, + "loss": 2.4615, + "step": 13174 + }, + { + "epoch": 1.063271729481075, + "grad_norm": 0.6877106428146362, + "learning_rate": 5.2670737351708014e-05, + "loss": 2.4495, + "step": 13175 + }, + { + "epoch": 1.063352433217658, + "grad_norm": 0.737718939781189, + "learning_rate": 5.26568311801642e-05, + "loss": 2.4971, + "step": 13176 + }, + { + "epoch": 1.063433136954241, + "grad_norm": 0.6909129619598389, + "learning_rate": 5.264292618853587e-05, + "loss": 2.4889, + "step": 13177 + }, + { + "epoch": 1.063513840690824, + "grad_norm": 0.6750304102897644, + "learning_rate": 5.262902237716961e-05, + "loss": 2.4779, + "step": 13178 + }, + { + "epoch": 1.063594544427407, + "grad_norm": 0.7256019115447998, + "learning_rate": 5.2615119746411954e-05, + "loss": 2.4904, + "step": 13179 + }, + { + "epoch": 1.06367524816399, + "grad_norm": 0.7335983514785767, + "learning_rate": 5.26012182966094e-05, + "loss": 2.4357, + "step": 13180 + }, + { + "epoch": 1.063755951900573, + "grad_norm": 0.6534200310707092, + "learning_rate": 5.258731802810837e-05, + "loss": 2.4213, + "step": 13181 + }, + { + "epoch": 1.063836655637156, + "grad_norm": 0.6899768114089966, + "learning_rate": 5.257341894125529e-05, + "loss": 2.4963, + "step": 13182 + }, + { + "epoch": 1.063917359373739, + "grad_norm": 0.7016159892082214, + "learning_rate": 5.25595210363966e-05, + "loss": 2.4583, + "step": 13183 + }, + { + "epoch": 1.063998063110322, + "grad_norm": 0.6868152022361755, + "learning_rate": 5.2545624313878636e-05, + "loss": 2.4523, + "step": 13184 + }, + { + "epoch": 1.064078766846905, + "grad_norm": 0.7442622184753418, + "learning_rate": 5.2531728774047785e-05, + "loss": 2.425, + "step": 13185 + }, + { + "epoch": 1.064159470583488, + "grad_norm": 0.6900869011878967, + "learning_rate": 5.251783441725037e-05, + "loss": 2.459, + "step": 13186 + }, + { + "epoch": 1.0642401743200711, + "grad_norm": 0.6910288333892822, + "learning_rate": 5.25039412438326e-05, + "loss": 2.4882, + "step": 13187 + }, + { + "epoch": 1.064320878056654, + "grad_norm": 0.7644359469413757, + "learning_rate": 5.249004925414076e-05, + "loss": 2.4663, + "step": 13188 + }, + { + "epoch": 1.064401581793237, + "grad_norm": 0.6703082919120789, + "learning_rate": 5.247615844852114e-05, + "loss": 2.4309, + "step": 13189 + }, + { + "epoch": 1.06448228552982, + "grad_norm": 0.6449835896492004, + "learning_rate": 5.246226882731983e-05, + "loss": 2.4307, + "step": 13190 + }, + { + "epoch": 1.064562989266403, + "grad_norm": 0.7332713603973389, + "learning_rate": 5.244838039088305e-05, + "loss": 2.3763, + "step": 13191 + }, + { + "epoch": 1.0646436930029861, + "grad_norm": 0.7626641988754272, + "learning_rate": 5.2434493139556974e-05, + "loss": 2.4167, + "step": 13192 + }, + { + "epoch": 1.064724396739569, + "grad_norm": 0.6924002170562744, + "learning_rate": 5.2420607073687614e-05, + "loss": 2.4751, + "step": 13193 + }, + { + "epoch": 1.064805100476152, + "grad_norm": 0.6815003156661987, + "learning_rate": 5.2406722193621074e-05, + "loss": 2.4731, + "step": 13194 + }, + { + "epoch": 1.064885804212735, + "grad_norm": 0.7632609009742737, + "learning_rate": 5.239283849970347e-05, + "loss": 2.4562, + "step": 13195 + }, + { + "epoch": 1.064966507949318, + "grad_norm": 0.7157592177391052, + "learning_rate": 5.23789559922808e-05, + "loss": 2.4507, + "step": 13196 + }, + { + "epoch": 1.065047211685901, + "grad_norm": 0.7035543918609619, + "learning_rate": 5.2365074671699e-05, + "loss": 2.4616, + "step": 13197 + }, + { + "epoch": 1.065127915422484, + "grad_norm": 0.7566644549369812, + "learning_rate": 5.235119453830406e-05, + "loss": 2.4751, + "step": 13198 + }, + { + "epoch": 1.065208619159067, + "grad_norm": 0.7030916213989258, + "learning_rate": 5.233731559244194e-05, + "loss": 2.381, + "step": 13199 + }, + { + "epoch": 1.0652893228956501, + "grad_norm": 0.7663755416870117, + "learning_rate": 5.232343783445847e-05, + "loss": 2.4822, + "step": 13200 + }, + { + "epoch": 1.065370026632233, + "grad_norm": 0.717767596244812, + "learning_rate": 5.230956126469955e-05, + "loss": 2.4807, + "step": 13201 + }, + { + "epoch": 1.065450730368816, + "grad_norm": 0.6920818090438843, + "learning_rate": 5.229568588351108e-05, + "loss": 2.4643, + "step": 13202 + }, + { + "epoch": 1.0655314341053992, + "grad_norm": 0.6812553405761719, + "learning_rate": 5.228181169123877e-05, + "loss": 2.4443, + "step": 13203 + }, + { + "epoch": 1.065612137841982, + "grad_norm": 0.7241889834403992, + "learning_rate": 5.226793868822846e-05, + "loss": 2.4581, + "step": 13204 + }, + { + "epoch": 1.0656928415785651, + "grad_norm": 0.7254642248153687, + "learning_rate": 5.225406687482588e-05, + "loss": 2.4999, + "step": 13205 + }, + { + "epoch": 1.065773545315148, + "grad_norm": 0.7316950559616089, + "learning_rate": 5.2240196251376764e-05, + "loss": 2.4493, + "step": 13206 + }, + { + "epoch": 1.065854249051731, + "grad_norm": 0.7208307385444641, + "learning_rate": 5.22263268182268e-05, + "loss": 2.5083, + "step": 13207 + }, + { + "epoch": 1.0659349527883142, + "grad_norm": 0.6552214622497559, + "learning_rate": 5.22124585757217e-05, + "loss": 2.4662, + "step": 13208 + }, + { + "epoch": 1.066015656524897, + "grad_norm": 0.7949681878089905, + "learning_rate": 5.219859152420701e-05, + "loss": 2.4584, + "step": 13209 + }, + { + "epoch": 1.0660963602614801, + "grad_norm": 0.7012154459953308, + "learning_rate": 5.2184725664028366e-05, + "loss": 2.4702, + "step": 13210 + }, + { + "epoch": 1.066177063998063, + "grad_norm": 0.7431927919387817, + "learning_rate": 5.217086099553136e-05, + "loss": 2.4422, + "step": 13211 + }, + { + "epoch": 1.066257767734646, + "grad_norm": 0.7235366702079773, + "learning_rate": 5.2156997519061554e-05, + "loss": 2.4173, + "step": 13212 + }, + { + "epoch": 1.0663384714712292, + "grad_norm": 0.7475029826164246, + "learning_rate": 5.214313523496439e-05, + "loss": 2.4924, + "step": 13213 + }, + { + "epoch": 1.066419175207812, + "grad_norm": 0.6326786875724792, + "learning_rate": 5.212927414358542e-05, + "loss": 2.4154, + "step": 13214 + }, + { + "epoch": 1.0664998789443951, + "grad_norm": 0.6755837798118591, + "learning_rate": 5.211541424527004e-05, + "loss": 2.4248, + "step": 13215 + }, + { + "epoch": 1.0665805826809782, + "grad_norm": 0.645395040512085, + "learning_rate": 5.210155554036373e-05, + "loss": 2.4078, + "step": 13216 + }, + { + "epoch": 1.066661286417561, + "grad_norm": 0.799913763999939, + "learning_rate": 5.208769802921185e-05, + "loss": 2.5067, + "step": 13217 + }, + { + "epoch": 1.0667419901541442, + "grad_norm": 0.7056344747543335, + "learning_rate": 5.207384171215983e-05, + "loss": 2.4817, + "step": 13218 + }, + { + "epoch": 1.0668226938907273, + "grad_norm": 0.7082187533378601, + "learning_rate": 5.205998658955291e-05, + "loss": 2.4495, + "step": 13219 + }, + { + "epoch": 1.0669033976273101, + "grad_norm": 0.6948464512825012, + "learning_rate": 5.204613266173646e-05, + "loss": 2.4584, + "step": 13220 + }, + { + "epoch": 1.0669841013638932, + "grad_norm": 0.7812542915344238, + "learning_rate": 5.203227992905575e-05, + "loss": 2.4803, + "step": 13221 + }, + { + "epoch": 1.067064805100476, + "grad_norm": 0.6892200708389282, + "learning_rate": 5.201842839185598e-05, + "loss": 2.4424, + "step": 13222 + }, + { + "epoch": 1.0671455088370592, + "grad_norm": 0.6982070803642273, + "learning_rate": 5.20045780504824e-05, + "loss": 2.4654, + "step": 13223 + }, + { + "epoch": 1.0672262125736423, + "grad_norm": 0.6799101233482361, + "learning_rate": 5.1990728905280205e-05, + "loss": 2.4748, + "step": 13224 + }, + { + "epoch": 1.0673069163102251, + "grad_norm": 0.6703687906265259, + "learning_rate": 5.1976880956594544e-05, + "loss": 2.4459, + "step": 13225 + }, + { + "epoch": 1.0673876200468082, + "grad_norm": 0.6821435689926147, + "learning_rate": 5.196303420477053e-05, + "loss": 2.4517, + "step": 13226 + }, + { + "epoch": 1.067468323783391, + "grad_norm": 0.6369695067405701, + "learning_rate": 5.194918865015328e-05, + "loss": 2.4388, + "step": 13227 + }, + { + "epoch": 1.0675490275199742, + "grad_norm": 0.6465736627578735, + "learning_rate": 5.1935344293087885e-05, + "loss": 2.3839, + "step": 13228 + }, + { + "epoch": 1.0676297312565572, + "grad_norm": 0.6745415329933167, + "learning_rate": 5.192150113391933e-05, + "loss": 2.4676, + "step": 13229 + }, + { + "epoch": 1.0677104349931401, + "grad_norm": 0.7605211138725281, + "learning_rate": 5.190765917299263e-05, + "loss": 2.4764, + "step": 13230 + }, + { + "epoch": 1.0677911387297232, + "grad_norm": 0.7040959596633911, + "learning_rate": 5.1893818410652825e-05, + "loss": 2.4727, + "step": 13231 + }, + { + "epoch": 1.0678718424663063, + "grad_norm": 0.6718928813934326, + "learning_rate": 5.1879978847244785e-05, + "loss": 2.4308, + "step": 13232 + }, + { + "epoch": 1.0679525462028892, + "grad_norm": 0.6788188219070435, + "learning_rate": 5.1866140483113445e-05, + "loss": 2.4278, + "step": 13233 + }, + { + "epoch": 1.0680332499394722, + "grad_norm": 0.7310218811035156, + "learning_rate": 5.185230331860371e-05, + "loss": 2.4585, + "step": 13234 + }, + { + "epoch": 1.068113953676055, + "grad_norm": 0.8092277646064758, + "learning_rate": 5.183846735406044e-05, + "loss": 2.4128, + "step": 13235 + }, + { + "epoch": 1.0681946574126382, + "grad_norm": 0.6469862461090088, + "learning_rate": 5.182463258982846e-05, + "loss": 2.4315, + "step": 13236 + }, + { + "epoch": 1.0682753611492213, + "grad_norm": 0.7948115468025208, + "learning_rate": 5.181079902625261e-05, + "loss": 2.5127, + "step": 13237 + }, + { + "epoch": 1.0683560648858041, + "grad_norm": 0.6988852620124817, + "learning_rate": 5.179696666367757e-05, + "loss": 2.432, + "step": 13238 + }, + { + "epoch": 1.0684367686223872, + "grad_norm": 0.6914555430412292, + "learning_rate": 5.1783135502448124e-05, + "loss": 2.4748, + "step": 13239 + }, + { + "epoch": 1.0685174723589703, + "grad_norm": 0.7586313486099243, + "learning_rate": 5.176930554290902e-05, + "loss": 2.4522, + "step": 13240 + }, + { + "epoch": 1.0685981760955532, + "grad_norm": 0.6763948798179626, + "learning_rate": 5.175547678540487e-05, + "loss": 2.4477, + "step": 13241 + }, + { + "epoch": 1.0686788798321363, + "grad_norm": 0.7625983357429504, + "learning_rate": 5.1741649230280334e-05, + "loss": 2.4725, + "step": 13242 + }, + { + "epoch": 1.0687595835687191, + "grad_norm": 0.6574710011482239, + "learning_rate": 5.172782287788005e-05, + "loss": 2.4212, + "step": 13243 + }, + { + "epoch": 1.0688402873053022, + "grad_norm": 0.770062267780304, + "learning_rate": 5.1713997728548615e-05, + "loss": 2.5065, + "step": 13244 + }, + { + "epoch": 1.0689209910418853, + "grad_norm": 0.7719037532806396, + "learning_rate": 5.170017378263057e-05, + "loss": 2.5082, + "step": 13245 + }, + { + "epoch": 1.0690016947784682, + "grad_norm": 0.7106119394302368, + "learning_rate": 5.168635104047046e-05, + "loss": 2.4922, + "step": 13246 + }, + { + "epoch": 1.0690823985150513, + "grad_norm": 0.711815595626831, + "learning_rate": 5.167252950241281e-05, + "loss": 2.498, + "step": 13247 + }, + { + "epoch": 1.0691631022516344, + "grad_norm": 0.6926038265228271, + "learning_rate": 5.165870916880201e-05, + "loss": 2.4464, + "step": 13248 + }, + { + "epoch": 1.0692438059882172, + "grad_norm": 0.6959360241889954, + "learning_rate": 5.164489003998254e-05, + "loss": 2.4668, + "step": 13249 + }, + { + "epoch": 1.0693245097248003, + "grad_norm": 0.7165184617042542, + "learning_rate": 5.1631072116298875e-05, + "loss": 2.4198, + "step": 13250 + }, + { + "epoch": 1.0694052134613832, + "grad_norm": 0.7133236527442932, + "learning_rate": 5.161725539809527e-05, + "loss": 2.4691, + "step": 13251 + }, + { + "epoch": 1.0694859171979663, + "grad_norm": 0.7057758569717407, + "learning_rate": 5.160343988571613e-05, + "loss": 2.466, + "step": 13252 + }, + { + "epoch": 1.0695666209345494, + "grad_norm": 0.6808326244354248, + "learning_rate": 5.158962557950583e-05, + "loss": 2.4248, + "step": 13253 + }, + { + "epoch": 1.0696473246711322, + "grad_norm": 0.7166025638580322, + "learning_rate": 5.1575812479808563e-05, + "loss": 2.4753, + "step": 13254 + }, + { + "epoch": 1.0697280284077153, + "grad_norm": 0.7395358085632324, + "learning_rate": 5.156200058696863e-05, + "loss": 2.485, + "step": 13255 + }, + { + "epoch": 1.0698087321442982, + "grad_norm": 0.681106686592102, + "learning_rate": 5.154818990133026e-05, + "loss": 2.5077, + "step": 13256 + }, + { + "epoch": 1.0698894358808813, + "grad_norm": 0.7517002820968628, + "learning_rate": 5.153438042323766e-05, + "loss": 2.5093, + "step": 13257 + }, + { + "epoch": 1.0699701396174643, + "grad_norm": 0.6516926288604736, + "learning_rate": 5.152057215303499e-05, + "loss": 2.4416, + "step": 13258 + }, + { + "epoch": 1.0700508433540472, + "grad_norm": 0.6930893063545227, + "learning_rate": 5.150676509106638e-05, + "loss": 2.506, + "step": 13259 + }, + { + "epoch": 1.0701315470906303, + "grad_norm": 0.7737041115760803, + "learning_rate": 5.1492959237675986e-05, + "loss": 2.4355, + "step": 13260 + }, + { + "epoch": 1.0702122508272134, + "grad_norm": 0.7274872660636902, + "learning_rate": 5.14791545932078e-05, + "loss": 2.5552, + "step": 13261 + }, + { + "epoch": 1.0702929545637963, + "grad_norm": 0.7112408876419067, + "learning_rate": 5.146535115800593e-05, + "loss": 2.4041, + "step": 13262 + }, + { + "epoch": 1.0703736583003793, + "grad_norm": 0.6822024583816528, + "learning_rate": 5.1451548932414415e-05, + "loss": 2.4346, + "step": 13263 + }, + { + "epoch": 1.0704543620369624, + "grad_norm": 0.6590598225593567, + "learning_rate": 5.1437747916777165e-05, + "loss": 2.3946, + "step": 13264 + }, + { + "epoch": 1.0705350657735453, + "grad_norm": 0.643014132976532, + "learning_rate": 5.142394811143818e-05, + "loss": 2.4455, + "step": 13265 + }, + { + "epoch": 1.0706157695101284, + "grad_norm": 0.6480194926261902, + "learning_rate": 5.141014951674139e-05, + "loss": 2.4304, + "step": 13266 + }, + { + "epoch": 1.0706964732467112, + "grad_norm": 0.6933526992797852, + "learning_rate": 5.139635213303069e-05, + "loss": 2.4627, + "step": 13267 + }, + { + "epoch": 1.0707771769832943, + "grad_norm": 0.6832638382911682, + "learning_rate": 5.138255596064995e-05, + "loss": 2.4645, + "step": 13268 + }, + { + "epoch": 1.0708578807198774, + "grad_norm": 0.6579757928848267, + "learning_rate": 5.1368760999943034e-05, + "loss": 2.3928, + "step": 13269 + }, + { + "epoch": 1.0709385844564603, + "grad_norm": 0.6658132672309875, + "learning_rate": 5.1354967251253684e-05, + "loss": 2.4732, + "step": 13270 + }, + { + "epoch": 1.0710192881930434, + "grad_norm": 0.7610828876495361, + "learning_rate": 5.13411747149257e-05, + "loss": 2.4781, + "step": 13271 + }, + { + "epoch": 1.0710999919296262, + "grad_norm": 0.682858943939209, + "learning_rate": 5.1327383391302895e-05, + "loss": 2.4545, + "step": 13272 + }, + { + "epoch": 1.0711806956662093, + "grad_norm": 0.7461360692977905, + "learning_rate": 5.131359328072887e-05, + "loss": 2.4647, + "step": 13273 + }, + { + "epoch": 1.0712613994027924, + "grad_norm": 0.6767961382865906, + "learning_rate": 5.129980438354738e-05, + "loss": 2.4562, + "step": 13274 + }, + { + "epoch": 1.0713421031393753, + "grad_norm": 0.6768184304237366, + "learning_rate": 5.1286016700102066e-05, + "loss": 2.4662, + "step": 13275 + }, + { + "epoch": 1.0714228068759584, + "grad_norm": 0.7022743225097656, + "learning_rate": 5.1272230230736554e-05, + "loss": 2.4321, + "step": 13276 + }, + { + "epoch": 1.0715035106125415, + "grad_norm": 0.725488007068634, + "learning_rate": 5.125844497579444e-05, + "loss": 2.457, + "step": 13277 + }, + { + "epoch": 1.0715842143491243, + "grad_norm": 0.7542931437492371, + "learning_rate": 5.124466093561928e-05, + "loss": 2.4302, + "step": 13278 + }, + { + "epoch": 1.0716649180857074, + "grad_norm": 0.6598316431045532, + "learning_rate": 5.123087811055467e-05, + "loss": 2.4552, + "step": 13279 + }, + { + "epoch": 1.0717456218222903, + "grad_norm": 0.7533490061759949, + "learning_rate": 5.1217096500944017e-05, + "loss": 2.4778, + "step": 13280 + }, + { + "epoch": 1.0718263255588734, + "grad_norm": 0.6890795826911926, + "learning_rate": 5.1203316107130825e-05, + "loss": 2.4349, + "step": 13281 + }, + { + "epoch": 1.0719070292954564, + "grad_norm": 0.7004082202911377, + "learning_rate": 5.118953692945862e-05, + "loss": 2.4645, + "step": 13282 + }, + { + "epoch": 1.0719877330320393, + "grad_norm": 0.7409259676933289, + "learning_rate": 5.117575896827068e-05, + "loss": 2.4734, + "step": 13283 + }, + { + "epoch": 1.0720684367686224, + "grad_norm": 0.7035481929779053, + "learning_rate": 5.116198222391046e-05, + "loss": 2.5027, + "step": 13284 + }, + { + "epoch": 1.0721491405052055, + "grad_norm": 0.7146698236465454, + "learning_rate": 5.114820669672132e-05, + "loss": 2.4623, + "step": 13285 + }, + { + "epoch": 1.0722298442417884, + "grad_norm": 0.7813882231712341, + "learning_rate": 5.113443238704656e-05, + "loss": 2.4644, + "step": 13286 + }, + { + "epoch": 1.0723105479783714, + "grad_norm": 0.6592430472373962, + "learning_rate": 5.1120659295229486e-05, + "loss": 2.4682, + "step": 13287 + }, + { + "epoch": 1.0723912517149543, + "grad_norm": 0.7047967910766602, + "learning_rate": 5.1106887421613395e-05, + "loss": 2.4368, + "step": 13288 + }, + { + "epoch": 1.0724719554515374, + "grad_norm": 0.700977087020874, + "learning_rate": 5.109311676654143e-05, + "loss": 2.4471, + "step": 13289 + }, + { + "epoch": 1.0725526591881205, + "grad_norm": 0.6821093559265137, + "learning_rate": 5.107934733035684e-05, + "loss": 2.433, + "step": 13290 + }, + { + "epoch": 1.0726333629247033, + "grad_norm": 0.6579930186271667, + "learning_rate": 5.1065579113402794e-05, + "loss": 2.4527, + "step": 13291 + }, + { + "epoch": 1.0727140666612864, + "grad_norm": 0.658514678478241, + "learning_rate": 5.105181211602248e-05, + "loss": 2.4443, + "step": 13292 + }, + { + "epoch": 1.0727947703978695, + "grad_norm": 0.6963977217674255, + "learning_rate": 5.103804633855891e-05, + "loss": 2.4699, + "step": 13293 + }, + { + "epoch": 1.0728754741344524, + "grad_norm": 0.6670787334442139, + "learning_rate": 5.102428178135522e-05, + "loss": 2.4672, + "step": 13294 + }, + { + "epoch": 1.0729561778710355, + "grad_norm": 0.6959822773933411, + "learning_rate": 5.1010518444754454e-05, + "loss": 2.4338, + "step": 13295 + }, + { + "epoch": 1.0730368816076183, + "grad_norm": 0.6534817218780518, + "learning_rate": 5.0996756329099614e-05, + "loss": 2.4491, + "step": 13296 + }, + { + "epoch": 1.0731175853442014, + "grad_norm": 0.7265146970748901, + "learning_rate": 5.098299543473371e-05, + "loss": 2.4718, + "step": 13297 + }, + { + "epoch": 1.0731982890807845, + "grad_norm": 0.6554745435714722, + "learning_rate": 5.0969235761999746e-05, + "loss": 2.4286, + "step": 13298 + }, + { + "epoch": 1.0732789928173674, + "grad_norm": 0.7003172039985657, + "learning_rate": 5.095547731124053e-05, + "loss": 2.4182, + "step": 13299 + }, + { + "epoch": 1.0733596965539505, + "grad_norm": 0.6700341105461121, + "learning_rate": 5.094172008279904e-05, + "loss": 2.428, + "step": 13300 + }, + { + "epoch": 1.0734404002905333, + "grad_norm": 0.7290289402008057, + "learning_rate": 5.0927964077018164e-05, + "loss": 2.4324, + "step": 13301 + }, + { + "epoch": 1.0735211040271164, + "grad_norm": 0.6999204158782959, + "learning_rate": 5.0914209294240644e-05, + "loss": 2.5386, + "step": 13302 + }, + { + "epoch": 1.0736018077636995, + "grad_norm": 0.7008000612258911, + "learning_rate": 5.090045573480935e-05, + "loss": 2.5295, + "step": 13303 + }, + { + "epoch": 1.0736825115002824, + "grad_norm": 0.7023071646690369, + "learning_rate": 5.088670339906705e-05, + "loss": 2.4418, + "step": 13304 + }, + { + "epoch": 1.0737632152368655, + "grad_norm": 0.627174437046051, + "learning_rate": 5.0872952287356525e-05, + "loss": 2.3782, + "step": 13305 + }, + { + "epoch": 1.0738439189734486, + "grad_norm": 0.6992766857147217, + "learning_rate": 5.0859202400020364e-05, + "loss": 2.4698, + "step": 13306 + }, + { + "epoch": 1.0739246227100314, + "grad_norm": 0.7189817428588867, + "learning_rate": 5.084545373740138e-05, + "loss": 2.5248, + "step": 13307 + }, + { + "epoch": 1.0740053264466145, + "grad_norm": 0.6849164962768555, + "learning_rate": 5.0831706299842216e-05, + "loss": 2.4084, + "step": 13308 + }, + { + "epoch": 1.0740860301831976, + "grad_norm": 0.6985825300216675, + "learning_rate": 5.0817960087685424e-05, + "loss": 2.4893, + "step": 13309 + }, + { + "epoch": 1.0741667339197805, + "grad_norm": 0.6519783139228821, + "learning_rate": 5.080421510127362e-05, + "loss": 2.5144, + "step": 13310 + }, + { + "epoch": 1.0742474376563635, + "grad_norm": 0.6605731248855591, + "learning_rate": 5.079047134094941e-05, + "loss": 2.4487, + "step": 13311 + }, + { + "epoch": 1.0743281413929464, + "grad_norm": 0.7236705422401428, + "learning_rate": 5.077672880705526e-05, + "loss": 2.4578, + "step": 13312 + }, + { + "epoch": 1.0744088451295295, + "grad_norm": 0.7126381397247314, + "learning_rate": 5.07629874999337e-05, + "loss": 2.4528, + "step": 13313 + }, + { + "epoch": 1.0744895488661126, + "grad_norm": 0.7247878313064575, + "learning_rate": 5.0749247419927236e-05, + "loss": 2.563, + "step": 13314 + }, + { + "epoch": 1.0745702526026955, + "grad_norm": 0.728349506855011, + "learning_rate": 5.0735508567378234e-05, + "loss": 2.4229, + "step": 13315 + }, + { + "epoch": 1.0746509563392785, + "grad_norm": 0.6593719124794006, + "learning_rate": 5.072177094262913e-05, + "loss": 2.4853, + "step": 13316 + }, + { + "epoch": 1.0747316600758614, + "grad_norm": 0.6519735455513, + "learning_rate": 5.070803454602231e-05, + "loss": 2.4507, + "step": 13317 + }, + { + "epoch": 1.0748123638124445, + "grad_norm": 0.6660017371177673, + "learning_rate": 5.0694299377900115e-05, + "loss": 2.4286, + "step": 13318 + }, + { + "epoch": 1.0748930675490276, + "grad_norm": 0.7506695985794067, + "learning_rate": 5.0680565438604876e-05, + "loss": 2.4841, + "step": 13319 + }, + { + "epoch": 1.0749737712856104, + "grad_norm": 0.6855955719947815, + "learning_rate": 5.0666832728478863e-05, + "loss": 2.3817, + "step": 13320 + }, + { + "epoch": 1.0750544750221935, + "grad_norm": 0.7151634693145752, + "learning_rate": 5.065310124786438e-05, + "loss": 2.3984, + "step": 13321 + }, + { + "epoch": 1.0751351787587766, + "grad_norm": 0.6551649570465088, + "learning_rate": 5.063937099710356e-05, + "loss": 2.4574, + "step": 13322 + }, + { + "epoch": 1.0752158824953595, + "grad_norm": 0.7443479895591736, + "learning_rate": 5.062564197653865e-05, + "loss": 2.52, + "step": 13323 + }, + { + "epoch": 1.0752965862319426, + "grad_norm": 0.7554972767829895, + "learning_rate": 5.061191418651186e-05, + "loss": 2.483, + "step": 13324 + }, + { + "epoch": 1.0753772899685254, + "grad_norm": 0.7661007642745972, + "learning_rate": 5.059818762736521e-05, + "loss": 2.566, + "step": 13325 + }, + { + "epoch": 1.0754579937051085, + "grad_norm": 0.7416480183601379, + "learning_rate": 5.058446229944087e-05, + "loss": 2.465, + "step": 13326 + }, + { + "epoch": 1.0755386974416916, + "grad_norm": 0.6997848749160767, + "learning_rate": 5.057073820308089e-05, + "loss": 2.4936, + "step": 13327 + }, + { + "epoch": 1.0756194011782745, + "grad_norm": 0.7570235133171082, + "learning_rate": 5.0557015338627345e-05, + "loss": 2.519, + "step": 13328 + }, + { + "epoch": 1.0757001049148576, + "grad_norm": 0.7910803556442261, + "learning_rate": 5.0543293706422214e-05, + "loss": 2.4932, + "step": 13329 + }, + { + "epoch": 1.0757808086514407, + "grad_norm": 0.7068312168121338, + "learning_rate": 5.052957330680752e-05, + "loss": 2.4489, + "step": 13330 + }, + { + "epoch": 1.0758615123880235, + "grad_norm": 0.7818215489387512, + "learning_rate": 5.051585414012514e-05, + "loss": 2.4467, + "step": 13331 + }, + { + "epoch": 1.0759422161246066, + "grad_norm": 0.7359446287155151, + "learning_rate": 5.0502136206717046e-05, + "loss": 2.4348, + "step": 13332 + }, + { + "epoch": 1.0760229198611895, + "grad_norm": 0.694726824760437, + "learning_rate": 5.0488419506925124e-05, + "loss": 2.4554, + "step": 13333 + }, + { + "epoch": 1.0761036235977726, + "grad_norm": 0.6776530742645264, + "learning_rate": 5.047470404109118e-05, + "loss": 2.4206, + "step": 13334 + }, + { + "epoch": 1.0761843273343557, + "grad_norm": 0.6977556943893433, + "learning_rate": 5.0460989809557066e-05, + "loss": 2.4748, + "step": 13335 + }, + { + "epoch": 1.0762650310709385, + "grad_norm": 0.6888061761856079, + "learning_rate": 5.044727681266459e-05, + "loss": 2.4129, + "step": 13336 + }, + { + "epoch": 1.0763457348075216, + "grad_norm": 0.744110643863678, + "learning_rate": 5.043356505075549e-05, + "loss": 2.4815, + "step": 13337 + }, + { + "epoch": 1.0764264385441047, + "grad_norm": 0.6726455688476562, + "learning_rate": 5.041985452417154e-05, + "loss": 2.4299, + "step": 13338 + }, + { + "epoch": 1.0765071422806876, + "grad_norm": 0.6755545735359192, + "learning_rate": 5.040614523325441e-05, + "loss": 2.4188, + "step": 13339 + }, + { + "epoch": 1.0765878460172706, + "grad_norm": 0.7152739763259888, + "learning_rate": 5.039243717834582e-05, + "loss": 2.4366, + "step": 13340 + }, + { + "epoch": 1.0766685497538535, + "grad_norm": 0.7253085374832153, + "learning_rate": 5.037873035978733e-05, + "loss": 2.4681, + "step": 13341 + }, + { + "epoch": 1.0767492534904366, + "grad_norm": 0.6780266165733337, + "learning_rate": 5.03650247779206e-05, + "loss": 2.5163, + "step": 13342 + }, + { + "epoch": 1.0768299572270197, + "grad_norm": 0.7440996170043945, + "learning_rate": 5.035132043308722e-05, + "loss": 2.4831, + "step": 13343 + }, + { + "epoch": 1.0769106609636026, + "grad_norm": 0.6619833111763, + "learning_rate": 5.0337617325628695e-05, + "loss": 2.433, + "step": 13344 + }, + { + "epoch": 1.0769913647001856, + "grad_norm": 0.7518059015274048, + "learning_rate": 5.032391545588656e-05, + "loss": 2.4241, + "step": 13345 + }, + { + "epoch": 1.0770720684367687, + "grad_norm": 0.6592784523963928, + "learning_rate": 5.031021482420231e-05, + "loss": 2.4902, + "step": 13346 + }, + { + "epoch": 1.0771527721733516, + "grad_norm": 0.7192299365997314, + "learning_rate": 5.029651543091739e-05, + "loss": 2.4445, + "step": 13347 + }, + { + "epoch": 1.0772334759099347, + "grad_norm": 0.7376793622970581, + "learning_rate": 5.028281727637323e-05, + "loss": 2.4532, + "step": 13348 + }, + { + "epoch": 1.0773141796465175, + "grad_norm": 0.7344524264335632, + "learning_rate": 5.026912036091127e-05, + "loss": 2.4193, + "step": 13349 + }, + { + "epoch": 1.0773948833831006, + "grad_norm": 0.7343986630439758, + "learning_rate": 5.0255424684872785e-05, + "loss": 2.4912, + "step": 13350 + }, + { + "epoch": 1.0774755871196837, + "grad_norm": 0.7103631496429443, + "learning_rate": 5.024173024859916e-05, + "loss": 2.4611, + "step": 13351 + }, + { + "epoch": 1.0775562908562666, + "grad_norm": 0.7554094791412354, + "learning_rate": 5.022803705243169e-05, + "loss": 2.4875, + "step": 13352 + }, + { + "epoch": 1.0776369945928497, + "grad_norm": 0.6754978895187378, + "learning_rate": 5.0214345096711655e-05, + "loss": 2.4585, + "step": 13353 + }, + { + "epoch": 1.0777176983294328, + "grad_norm": 0.690747857093811, + "learning_rate": 5.020065438178026e-05, + "loss": 2.4751, + "step": 13354 + }, + { + "epoch": 1.0777984020660156, + "grad_norm": 0.7012028694152832, + "learning_rate": 5.018696490797874e-05, + "loss": 2.4443, + "step": 13355 + }, + { + "epoch": 1.0778791058025987, + "grad_norm": 0.6788459420204163, + "learning_rate": 5.017327667564831e-05, + "loss": 2.4135, + "step": 13356 + }, + { + "epoch": 1.0779598095391816, + "grad_norm": 0.6662794351577759, + "learning_rate": 5.015958968512997e-05, + "loss": 2.3801, + "step": 13357 + }, + { + "epoch": 1.0780405132757647, + "grad_norm": 0.7873939275741577, + "learning_rate": 5.0145903936764994e-05, + "loss": 2.4629, + "step": 13358 + }, + { + "epoch": 1.0781212170123478, + "grad_norm": 0.7484980225563049, + "learning_rate": 5.0132219430894455e-05, + "loss": 2.4307, + "step": 13359 + }, + { + "epoch": 1.0782019207489306, + "grad_norm": 0.7559076547622681, + "learning_rate": 5.011853616785932e-05, + "loss": 2.4846, + "step": 13360 + }, + { + "epoch": 1.0782826244855137, + "grad_norm": 0.6822710633277893, + "learning_rate": 5.010485414800066e-05, + "loss": 2.4448, + "step": 13361 + }, + { + "epoch": 1.0783633282220966, + "grad_norm": 0.6665955185890198, + "learning_rate": 5.0091173371659496e-05, + "loss": 2.4562, + "step": 13362 + }, + { + "epoch": 1.0784440319586797, + "grad_norm": 0.6645659804344177, + "learning_rate": 5.0077493839176714e-05, + "loss": 2.4545, + "step": 13363 + }, + { + "epoch": 1.0785247356952627, + "grad_norm": 0.6648181080818176, + "learning_rate": 5.0063815550893276e-05, + "loss": 2.4565, + "step": 13364 + }, + { + "epoch": 1.0786054394318456, + "grad_norm": 0.6679299473762512, + "learning_rate": 5.005013850715014e-05, + "loss": 2.4301, + "step": 13365 + }, + { + "epoch": 1.0786861431684287, + "grad_norm": 0.7116484642028809, + "learning_rate": 5.003646270828808e-05, + "loss": 2.4174, + "step": 13366 + }, + { + "epoch": 1.0787668469050118, + "grad_norm": 0.6850735545158386, + "learning_rate": 5.002278815464798e-05, + "loss": 2.4386, + "step": 13367 + }, + { + "epoch": 1.0788475506415947, + "grad_norm": 0.6613513827323914, + "learning_rate": 5.00091148465706e-05, + "loss": 2.4038, + "step": 13368 + }, + { + "epoch": 1.0789282543781777, + "grad_norm": 0.659635603427887, + "learning_rate": 4.9995442784396827e-05, + "loss": 2.4346, + "step": 13369 + }, + { + "epoch": 1.0790089581147608, + "grad_norm": 0.6775132417678833, + "learning_rate": 4.998177196846731e-05, + "loss": 2.4853, + "step": 13370 + }, + { + "epoch": 1.0790896618513437, + "grad_norm": 0.719860851764679, + "learning_rate": 4.996810239912277e-05, + "loss": 2.4018, + "step": 13371 + }, + { + "epoch": 1.0791703655879268, + "grad_norm": 0.7316389083862305, + "learning_rate": 4.9954434076703946e-05, + "loss": 2.424, + "step": 13372 + }, + { + "epoch": 1.0792510693245096, + "grad_norm": 0.6779622435569763, + "learning_rate": 4.99407670015514e-05, + "loss": 2.4743, + "step": 13373 + }, + { + "epoch": 1.0793317730610927, + "grad_norm": 0.7357139587402344, + "learning_rate": 4.992710117400581e-05, + "loss": 2.4385, + "step": 13374 + }, + { + "epoch": 1.0794124767976758, + "grad_norm": 0.671441912651062, + "learning_rate": 4.9913436594407784e-05, + "loss": 2.3988, + "step": 13375 + }, + { + "epoch": 1.0794931805342587, + "grad_norm": 0.7205149531364441, + "learning_rate": 4.9899773263097804e-05, + "loss": 2.4594, + "step": 13376 + }, + { + "epoch": 1.0795738842708418, + "grad_norm": 0.702910840511322, + "learning_rate": 4.988611118041644e-05, + "loss": 2.4831, + "step": 13377 + }, + { + "epoch": 1.0796545880074246, + "grad_norm": 0.6977962255477905, + "learning_rate": 4.987245034670418e-05, + "loss": 2.422, + "step": 13378 + }, + { + "epoch": 1.0797352917440077, + "grad_norm": 0.7106757760047913, + "learning_rate": 4.985879076230149e-05, + "loss": 2.4073, + "step": 13379 + }, + { + "epoch": 1.0798159954805908, + "grad_norm": 0.7046806812286377, + "learning_rate": 4.9845132427548814e-05, + "loss": 2.4065, + "step": 13380 + }, + { + "epoch": 1.0798966992171737, + "grad_norm": 0.7476605772972107, + "learning_rate": 4.9831475342786574e-05, + "loss": 2.4886, + "step": 13381 + }, + { + "epoch": 1.0799774029537568, + "grad_norm": 0.696977972984314, + "learning_rate": 4.981781950835508e-05, + "loss": 2.4732, + "step": 13382 + }, + { + "epoch": 1.0800581066903399, + "grad_norm": 0.6596804857254028, + "learning_rate": 4.98041649245947e-05, + "loss": 2.4497, + "step": 13383 + }, + { + "epoch": 1.0801388104269227, + "grad_norm": 0.7216050028800964, + "learning_rate": 4.979051159184573e-05, + "loss": 2.4745, + "step": 13384 + }, + { + "epoch": 1.0802195141635058, + "grad_norm": 0.6636630296707153, + "learning_rate": 4.977685951044852e-05, + "loss": 2.4904, + "step": 13385 + }, + { + "epoch": 1.0803002179000887, + "grad_norm": 0.7030208110809326, + "learning_rate": 4.97632086807432e-05, + "loss": 2.4302, + "step": 13386 + }, + { + "epoch": 1.0803809216366718, + "grad_norm": 0.7158327102661133, + "learning_rate": 4.974955910307004e-05, + "loss": 2.4735, + "step": 13387 + }, + { + "epoch": 1.0804616253732549, + "grad_norm": 0.6736464500427246, + "learning_rate": 4.9735910777769234e-05, + "loss": 2.4334, + "step": 13388 + }, + { + "epoch": 1.0805423291098377, + "grad_norm": 0.6913403272628784, + "learning_rate": 4.972226370518092e-05, + "loss": 2.468, + "step": 13389 + }, + { + "epoch": 1.0806230328464208, + "grad_norm": 0.7006524205207825, + "learning_rate": 4.970861788564522e-05, + "loss": 2.4598, + "step": 13390 + }, + { + "epoch": 1.080703736583004, + "grad_norm": 0.6892947554588318, + "learning_rate": 4.969497331950227e-05, + "loss": 2.4297, + "step": 13391 + }, + { + "epoch": 1.0807844403195868, + "grad_norm": 0.7270283699035645, + "learning_rate": 4.968133000709203e-05, + "loss": 2.5344, + "step": 13392 + }, + { + "epoch": 1.0808651440561698, + "grad_norm": 0.735342264175415, + "learning_rate": 4.9667687948754594e-05, + "loss": 2.4431, + "step": 13393 + }, + { + "epoch": 1.0809458477927527, + "grad_norm": 0.6869279146194458, + "learning_rate": 4.9654047144829974e-05, + "loss": 2.5581, + "step": 13394 + }, + { + "epoch": 1.0810265515293358, + "grad_norm": 0.6975715160369873, + "learning_rate": 4.964040759565808e-05, + "loss": 2.4328, + "step": 13395 + }, + { + "epoch": 1.0811072552659189, + "grad_norm": 0.7312532067298889, + "learning_rate": 4.9626769301578856e-05, + "loss": 2.4686, + "step": 13396 + }, + { + "epoch": 1.0811879590025018, + "grad_norm": 0.7824496626853943, + "learning_rate": 4.9613132262932215e-05, + "loss": 2.4564, + "step": 13397 + }, + { + "epoch": 1.0812686627390848, + "grad_norm": 0.7337941527366638, + "learning_rate": 4.959949648005805e-05, + "loss": 2.4752, + "step": 13398 + }, + { + "epoch": 1.081349366475668, + "grad_norm": 0.7450836300849915, + "learning_rate": 4.958586195329617e-05, + "loss": 2.4457, + "step": 13399 + }, + { + "epoch": 1.0814300702122508, + "grad_norm": 0.6990504860877991, + "learning_rate": 4.9572228682986385e-05, + "loss": 2.4172, + "step": 13400 + }, + { + "epoch": 1.0815107739488339, + "grad_norm": 0.7293999791145325, + "learning_rate": 4.955859666946853e-05, + "loss": 2.5295, + "step": 13401 + }, + { + "epoch": 1.0815914776854167, + "grad_norm": 0.6872537136077881, + "learning_rate": 4.9544965913082264e-05, + "loss": 2.5029, + "step": 13402 + }, + { + "epoch": 1.0816721814219998, + "grad_norm": 0.6821706891059875, + "learning_rate": 4.953133641416733e-05, + "loss": 2.4738, + "step": 13403 + }, + { + "epoch": 1.081752885158583, + "grad_norm": 0.6811527609825134, + "learning_rate": 4.951770817306346e-05, + "loss": 2.4323, + "step": 13404 + }, + { + "epoch": 1.0818335888951658, + "grad_norm": 0.7138943076133728, + "learning_rate": 4.950408119011023e-05, + "loss": 2.5155, + "step": 13405 + }, + { + "epoch": 1.0819142926317489, + "grad_norm": 0.6777952909469604, + "learning_rate": 4.949045546564729e-05, + "loss": 2.4414, + "step": 13406 + }, + { + "epoch": 1.0819949963683317, + "grad_norm": 0.7065548896789551, + "learning_rate": 4.9476831000014276e-05, + "loss": 2.4913, + "step": 13407 + }, + { + "epoch": 1.0820757001049148, + "grad_norm": 0.7286355495452881, + "learning_rate": 4.9463207793550626e-05, + "loss": 2.4171, + "step": 13408 + }, + { + "epoch": 1.082156403841498, + "grad_norm": 0.6703049540519714, + "learning_rate": 4.944958584659597e-05, + "loss": 2.4387, + "step": 13409 + }, + { + "epoch": 1.0822371075780808, + "grad_norm": 0.6572019457817078, + "learning_rate": 4.943596515948983e-05, + "loss": 2.4324, + "step": 13410 + }, + { + "epoch": 1.0823178113146639, + "grad_norm": 0.6722360849380493, + "learning_rate": 4.942234573257156e-05, + "loss": 2.4802, + "step": 13411 + }, + { + "epoch": 1.082398515051247, + "grad_norm": 0.7122535109519958, + "learning_rate": 4.9408727566180655e-05, + "loss": 2.4531, + "step": 13412 + }, + { + "epoch": 1.0824792187878298, + "grad_norm": 0.6769903898239136, + "learning_rate": 4.9395110660656505e-05, + "loss": 2.4549, + "step": 13413 + }, + { + "epoch": 1.082559922524413, + "grad_norm": 0.766251266002655, + "learning_rate": 4.938149501633852e-05, + "loss": 2.4416, + "step": 13414 + }, + { + "epoch": 1.082640626260996, + "grad_norm": 0.6677987575531006, + "learning_rate": 4.936788063356596e-05, + "loss": 2.4578, + "step": 13415 + }, + { + "epoch": 1.0827213299975789, + "grad_norm": 0.7461380362510681, + "learning_rate": 4.9354267512678156e-05, + "loss": 2.4776, + "step": 13416 + }, + { + "epoch": 1.082802033734162, + "grad_norm": 0.6681976914405823, + "learning_rate": 4.934065565401443e-05, + "loss": 2.5044, + "step": 13417 + }, + { + "epoch": 1.0828827374707448, + "grad_norm": 0.6809324622154236, + "learning_rate": 4.932704505791397e-05, + "loss": 2.4651, + "step": 13418 + }, + { + "epoch": 1.082963441207328, + "grad_norm": 0.6926563382148743, + "learning_rate": 4.931343572471596e-05, + "loss": 2.4633, + "step": 13419 + }, + { + "epoch": 1.083044144943911, + "grad_norm": 0.6451820135116577, + "learning_rate": 4.929982765475971e-05, + "loss": 2.474, + "step": 13420 + }, + { + "epoch": 1.0831248486804939, + "grad_norm": 0.7088493704795837, + "learning_rate": 4.9286220848384247e-05, + "loss": 2.462, + "step": 13421 + }, + { + "epoch": 1.083205552417077, + "grad_norm": 0.7819172739982605, + "learning_rate": 4.9272615305928725e-05, + "loss": 2.4534, + "step": 13422 + }, + { + "epoch": 1.0832862561536598, + "grad_norm": 0.6579666137695312, + "learning_rate": 4.925901102773227e-05, + "loss": 2.4101, + "step": 13423 + }, + { + "epoch": 1.083366959890243, + "grad_norm": 0.6999555230140686, + "learning_rate": 4.924540801413385e-05, + "loss": 2.4534, + "step": 13424 + }, + { + "epoch": 1.083447663626826, + "grad_norm": 0.7034400105476379, + "learning_rate": 4.9231806265472555e-05, + "loss": 2.4741, + "step": 13425 + }, + { + "epoch": 1.0835283673634089, + "grad_norm": 0.6595034599304199, + "learning_rate": 4.921820578208739e-05, + "loss": 2.4011, + "step": 13426 + }, + { + "epoch": 1.083609071099992, + "grad_norm": 0.666419267654419, + "learning_rate": 4.920460656431723e-05, + "loss": 2.4399, + "step": 13427 + }, + { + "epoch": 1.083689774836575, + "grad_norm": 0.7058294415473938, + "learning_rate": 4.919100861250108e-05, + "loss": 2.434, + "step": 13428 + }, + { + "epoch": 1.083770478573158, + "grad_norm": 0.7045806050300598, + "learning_rate": 4.917741192697779e-05, + "loss": 2.4616, + "step": 13429 + }, + { + "epoch": 1.083851182309741, + "grad_norm": 0.6565639972686768, + "learning_rate": 4.916381650808626e-05, + "loss": 2.3864, + "step": 13430 + }, + { + "epoch": 1.0839318860463238, + "grad_norm": 0.6939674615859985, + "learning_rate": 4.9150222356165295e-05, + "loss": 2.4217, + "step": 13431 + }, + { + "epoch": 1.084012589782907, + "grad_norm": 0.7240599989891052, + "learning_rate": 4.913662947155373e-05, + "loss": 2.447, + "step": 13432 + }, + { + "epoch": 1.08409329351949, + "grad_norm": 0.7369012832641602, + "learning_rate": 4.9123037854590336e-05, + "loss": 2.4588, + "step": 13433 + }, + { + "epoch": 1.0841739972560729, + "grad_norm": 0.714269757270813, + "learning_rate": 4.9109447505613803e-05, + "loss": 2.4921, + "step": 13434 + }, + { + "epoch": 1.084254700992656, + "grad_norm": 0.7541659474372864, + "learning_rate": 4.909585842496287e-05, + "loss": 2.4191, + "step": 13435 + }, + { + "epoch": 1.084335404729239, + "grad_norm": 0.7245596051216125, + "learning_rate": 4.9082270612976243e-05, + "loss": 2.4904, + "step": 13436 + }, + { + "epoch": 1.084416108465822, + "grad_norm": 0.7301090359687805, + "learning_rate": 4.90686840699925e-05, + "loss": 2.4461, + "step": 13437 + }, + { + "epoch": 1.084496812202405, + "grad_norm": 0.7404102683067322, + "learning_rate": 4.905509879635028e-05, + "loss": 2.4826, + "step": 13438 + }, + { + "epoch": 1.0845775159389879, + "grad_norm": 0.7053710222244263, + "learning_rate": 4.9041514792388175e-05, + "loss": 2.4231, + "step": 13439 + }, + { + "epoch": 1.084658219675571, + "grad_norm": 0.6171362400054932, + "learning_rate": 4.9027932058444724e-05, + "loss": 2.4472, + "step": 13440 + }, + { + "epoch": 1.084738923412154, + "grad_norm": 0.7367038130760193, + "learning_rate": 4.901435059485845e-05, + "loss": 2.4847, + "step": 13441 + }, + { + "epoch": 1.084819627148737, + "grad_norm": 0.754828691482544, + "learning_rate": 4.900077040196788e-05, + "loss": 2.4731, + "step": 13442 + }, + { + "epoch": 1.08490033088532, + "grad_norm": 0.7380684018135071, + "learning_rate": 4.8987191480111386e-05, + "loss": 2.4227, + "step": 13443 + }, + { + "epoch": 1.084981034621903, + "grad_norm": 0.6711444854736328, + "learning_rate": 4.897361382962742e-05, + "loss": 2.4744, + "step": 13444 + }, + { + "epoch": 1.085061738358486, + "grad_norm": 0.7709227204322815, + "learning_rate": 4.896003745085438e-05, + "loss": 2.5422, + "step": 13445 + }, + { + "epoch": 1.085142442095069, + "grad_norm": 0.6778519153594971, + "learning_rate": 4.8946462344130675e-05, + "loss": 2.4757, + "step": 13446 + }, + { + "epoch": 1.085223145831652, + "grad_norm": 0.7390698194503784, + "learning_rate": 4.893288850979454e-05, + "loss": 2.4214, + "step": 13447 + }, + { + "epoch": 1.085303849568235, + "grad_norm": 0.6632684469223022, + "learning_rate": 4.891931594818432e-05, + "loss": 2.4689, + "step": 13448 + }, + { + "epoch": 1.085384553304818, + "grad_norm": 0.68693608045578, + "learning_rate": 4.890574465963827e-05, + "loss": 2.4788, + "step": 13449 + }, + { + "epoch": 1.085465257041401, + "grad_norm": 0.6910344362258911, + "learning_rate": 4.8892174644494625e-05, + "loss": 2.4611, + "step": 13450 + }, + { + "epoch": 1.085545960777984, + "grad_norm": 0.6935380101203918, + "learning_rate": 4.887860590309158e-05, + "loss": 2.4481, + "step": 13451 + }, + { + "epoch": 1.085626664514567, + "grad_norm": 0.7086954712867737, + "learning_rate": 4.886503843576735e-05, + "loss": 2.4583, + "step": 13452 + }, + { + "epoch": 1.08570736825115, + "grad_norm": 0.7447777986526489, + "learning_rate": 4.8851472242859994e-05, + "loss": 2.5035, + "step": 13453 + }, + { + "epoch": 1.085788071987733, + "grad_norm": 0.6896036267280579, + "learning_rate": 4.8837907324707656e-05, + "loss": 2.4622, + "step": 13454 + }, + { + "epoch": 1.085868775724316, + "grad_norm": 0.7261155247688293, + "learning_rate": 4.882434368164843e-05, + "loss": 2.4958, + "step": 13455 + }, + { + "epoch": 1.085949479460899, + "grad_norm": 0.6868197321891785, + "learning_rate": 4.881078131402031e-05, + "loss": 2.4952, + "step": 13456 + }, + { + "epoch": 1.0860301831974821, + "grad_norm": 0.6338867545127869, + "learning_rate": 4.879722022216132e-05, + "loss": 2.4553, + "step": 13457 + }, + { + "epoch": 1.086110886934065, + "grad_norm": 0.7214454412460327, + "learning_rate": 4.878366040640946e-05, + "loss": 2.4433, + "step": 13458 + }, + { + "epoch": 1.086191590670648, + "grad_norm": 0.6871301531791687, + "learning_rate": 4.877010186710266e-05, + "loss": 2.4118, + "step": 13459 + }, + { + "epoch": 1.0862722944072312, + "grad_norm": 0.6845650672912598, + "learning_rate": 4.875654460457883e-05, + "loss": 2.4684, + "step": 13460 + }, + { + "epoch": 1.086352998143814, + "grad_norm": 0.7027513980865479, + "learning_rate": 4.8742988619175865e-05, + "loss": 2.4569, + "step": 13461 + }, + { + "epoch": 1.0864337018803971, + "grad_norm": 0.6428621411323547, + "learning_rate": 4.8729433911231646e-05, + "loss": 2.4211, + "step": 13462 + }, + { + "epoch": 1.08651440561698, + "grad_norm": 0.6921488046646118, + "learning_rate": 4.8715880481083934e-05, + "loss": 2.4668, + "step": 13463 + }, + { + "epoch": 1.086595109353563, + "grad_norm": 0.7001025676727295, + "learning_rate": 4.870232832907051e-05, + "loss": 2.4685, + "step": 13464 + }, + { + "epoch": 1.0866758130901462, + "grad_norm": 0.7460644245147705, + "learning_rate": 4.868877745552922e-05, + "loss": 2.3922, + "step": 13465 + }, + { + "epoch": 1.086756516826729, + "grad_norm": 0.7418891191482544, + "learning_rate": 4.867522786079768e-05, + "loss": 2.3777, + "step": 13466 + }, + { + "epoch": 1.0868372205633121, + "grad_norm": 0.6430083513259888, + "learning_rate": 4.8661679545213625e-05, + "loss": 2.4385, + "step": 13467 + }, + { + "epoch": 1.086917924299895, + "grad_norm": 0.6963593363761902, + "learning_rate": 4.864813250911475e-05, + "loss": 2.4083, + "step": 13468 + }, + { + "epoch": 1.086998628036478, + "grad_norm": 0.6796097159385681, + "learning_rate": 4.8634586752838606e-05, + "loss": 2.4984, + "step": 13469 + }, + { + "epoch": 1.0870793317730612, + "grad_norm": 0.6845307946205139, + "learning_rate": 4.862104227672281e-05, + "loss": 2.4168, + "step": 13470 + }, + { + "epoch": 1.087160035509644, + "grad_norm": 0.705348014831543, + "learning_rate": 4.8607499081105e-05, + "loss": 2.4216, + "step": 13471 + }, + { + "epoch": 1.087240739246227, + "grad_norm": 0.6906474828720093, + "learning_rate": 4.8593957166322636e-05, + "loss": 2.4955, + "step": 13472 + }, + { + "epoch": 1.0873214429828102, + "grad_norm": 0.696489691734314, + "learning_rate": 4.858041653271323e-05, + "loss": 2.4186, + "step": 13473 + }, + { + "epoch": 1.087402146719393, + "grad_norm": 0.6997761726379395, + "learning_rate": 4.856687718061429e-05, + "loss": 2.441, + "step": 13474 + }, + { + "epoch": 1.0874828504559761, + "grad_norm": 0.6515649557113647, + "learning_rate": 4.8553339110363184e-05, + "loss": 2.3997, + "step": 13475 + }, + { + "epoch": 1.087563554192559, + "grad_norm": 0.6902725696563721, + "learning_rate": 4.853980232229734e-05, + "loss": 2.4765, + "step": 13476 + }, + { + "epoch": 1.087644257929142, + "grad_norm": 0.6832055449485779, + "learning_rate": 4.852626681675415e-05, + "loss": 2.411, + "step": 13477 + }, + { + "epoch": 1.0877249616657252, + "grad_norm": 0.668520987033844, + "learning_rate": 4.8512732594070984e-05, + "loss": 2.4742, + "step": 13478 + }, + { + "epoch": 1.087805665402308, + "grad_norm": 0.7019832134246826, + "learning_rate": 4.849919965458507e-05, + "loss": 2.4638, + "step": 13479 + }, + { + "epoch": 1.0878863691388911, + "grad_norm": 0.6986027359962463, + "learning_rate": 4.8485667998633724e-05, + "loss": 2.4866, + "step": 13480 + }, + { + "epoch": 1.0879670728754742, + "grad_norm": 0.659037709236145, + "learning_rate": 4.8472137626554195e-05, + "loss": 2.4821, + "step": 13481 + }, + { + "epoch": 1.088047776612057, + "grad_norm": 0.6506801247596741, + "learning_rate": 4.8458608538683694e-05, + "loss": 2.4686, + "step": 13482 + }, + { + "epoch": 1.0881284803486402, + "grad_norm": 0.7136878967285156, + "learning_rate": 4.844508073535939e-05, + "loss": 2.4523, + "step": 13483 + }, + { + "epoch": 1.088209184085223, + "grad_norm": 0.6663414239883423, + "learning_rate": 4.843155421691848e-05, + "loss": 2.4287, + "step": 13484 + }, + { + "epoch": 1.0882898878218061, + "grad_norm": 0.7192783355712891, + "learning_rate": 4.8418028983698006e-05, + "loss": 2.4433, + "step": 13485 + }, + { + "epoch": 1.0883705915583892, + "grad_norm": 0.6620980501174927, + "learning_rate": 4.8404505036035086e-05, + "loss": 2.4823, + "step": 13486 + }, + { + "epoch": 1.088451295294972, + "grad_norm": 0.6282123327255249, + "learning_rate": 4.83909823742668e-05, + "loss": 2.4641, + "step": 13487 + }, + { + "epoch": 1.0885319990315552, + "grad_norm": 0.6384354829788208, + "learning_rate": 4.837746099873012e-05, + "loss": 2.4234, + "step": 13488 + }, + { + "epoch": 1.0886127027681383, + "grad_norm": 0.6550076603889465, + "learning_rate": 4.836394090976204e-05, + "loss": 2.4743, + "step": 13489 + }, + { + "epoch": 1.0886934065047211, + "grad_norm": 0.6987888216972351, + "learning_rate": 4.8350422107699545e-05, + "loss": 2.4263, + "step": 13490 + }, + { + "epoch": 1.0887741102413042, + "grad_norm": 0.7012613415718079, + "learning_rate": 4.833690459287953e-05, + "loss": 2.4801, + "step": 13491 + }, + { + "epoch": 1.088854813977887, + "grad_norm": 0.6986923217773438, + "learning_rate": 4.832338836563891e-05, + "loss": 2.426, + "step": 13492 + }, + { + "epoch": 1.0889355177144702, + "grad_norm": 0.6936241984367371, + "learning_rate": 4.830987342631453e-05, + "loss": 2.4361, + "step": 13493 + }, + { + "epoch": 1.0890162214510533, + "grad_norm": 0.6612359881401062, + "learning_rate": 4.8296359775243275e-05, + "loss": 2.4385, + "step": 13494 + }, + { + "epoch": 1.0890969251876361, + "grad_norm": 0.6927692294120789, + "learning_rate": 4.828284741276183e-05, + "loss": 2.4692, + "step": 13495 + }, + { + "epoch": 1.0891776289242192, + "grad_norm": 0.6710225343704224, + "learning_rate": 4.8269336339207036e-05, + "loss": 2.4078, + "step": 13496 + }, + { + "epoch": 1.0892583326608023, + "grad_norm": 0.639076828956604, + "learning_rate": 4.825582655491564e-05, + "loss": 2.4368, + "step": 13497 + }, + { + "epoch": 1.0893390363973852, + "grad_norm": 0.7050483226776123, + "learning_rate": 4.824231806022426e-05, + "loss": 2.4308, + "step": 13498 + }, + { + "epoch": 1.0894197401339683, + "grad_norm": 0.7097769975662231, + "learning_rate": 4.822881085546962e-05, + "loss": 2.4378, + "step": 13499 + }, + { + "epoch": 1.0895004438705511, + "grad_norm": 0.6939458847045898, + "learning_rate": 4.821530494098834e-05, + "loss": 2.4678, + "step": 13500 + }, + { + "epoch": 1.0895811476071342, + "grad_norm": 0.6797441840171814, + "learning_rate": 4.8201800317117016e-05, + "loss": 2.4837, + "step": 13501 + }, + { + "epoch": 1.0896618513437173, + "grad_norm": 0.7451521158218384, + "learning_rate": 4.818829698419225e-05, + "loss": 2.4651, + "step": 13502 + }, + { + "epoch": 1.0897425550803002, + "grad_norm": 0.6749109625816345, + "learning_rate": 4.8174794942550585e-05, + "loss": 2.4569, + "step": 13503 + }, + { + "epoch": 1.0898232588168832, + "grad_norm": 0.6321636438369751, + "learning_rate": 4.8161294192528474e-05, + "loss": 2.4049, + "step": 13504 + }, + { + "epoch": 1.0899039625534663, + "grad_norm": 0.7002367377281189, + "learning_rate": 4.8147794734462415e-05, + "loss": 2.4489, + "step": 13505 + }, + { + "epoch": 1.0899846662900492, + "grad_norm": 0.758057713508606, + "learning_rate": 4.813429656868889e-05, + "loss": 2.436, + "step": 13506 + }, + { + "epoch": 1.0900653700266323, + "grad_norm": 0.6665529012680054, + "learning_rate": 4.812079969554424e-05, + "loss": 2.3805, + "step": 13507 + }, + { + "epoch": 1.0901460737632152, + "grad_norm": 0.6962547898292542, + "learning_rate": 4.810730411536487e-05, + "loss": 2.4203, + "step": 13508 + }, + { + "epoch": 1.0902267774997982, + "grad_norm": 0.6860647201538086, + "learning_rate": 4.809380982848712e-05, + "loss": 2.4482, + "step": 13509 + }, + { + "epoch": 1.0903074812363813, + "grad_norm": 0.7045090198516846, + "learning_rate": 4.808031683524733e-05, + "loss": 2.4155, + "step": 13510 + }, + { + "epoch": 1.0903881849729642, + "grad_norm": 0.6609304547309875, + "learning_rate": 4.806682513598176e-05, + "loss": 2.4295, + "step": 13511 + }, + { + "epoch": 1.0904688887095473, + "grad_norm": 0.7647323608398438, + "learning_rate": 4.8053334731026665e-05, + "loss": 2.4704, + "step": 13512 + }, + { + "epoch": 1.0905495924461301, + "grad_norm": 0.677449643611908, + "learning_rate": 4.803984562071829e-05, + "loss": 2.4501, + "step": 13513 + }, + { + "epoch": 1.0906302961827132, + "grad_norm": 0.645866334438324, + "learning_rate": 4.8026357805392754e-05, + "loss": 2.427, + "step": 13514 + }, + { + "epoch": 1.0907109999192963, + "grad_norm": 0.6968488097190857, + "learning_rate": 4.801287128538624e-05, + "loss": 2.3933, + "step": 13515 + }, + { + "epoch": 1.0907917036558792, + "grad_norm": 0.7137444615364075, + "learning_rate": 4.799938606103491e-05, + "loss": 2.4611, + "step": 13516 + }, + { + "epoch": 1.0908724073924623, + "grad_norm": 0.6860007047653198, + "learning_rate": 4.7985902132674765e-05, + "loss": 2.4252, + "step": 13517 + }, + { + "epoch": 1.0909531111290454, + "grad_norm": 0.726290762424469, + "learning_rate": 4.797241950064192e-05, + "loss": 2.44, + "step": 13518 + }, + { + "epoch": 1.0910338148656282, + "grad_norm": 0.6833362579345703, + "learning_rate": 4.795893816527241e-05, + "loss": 2.4199, + "step": 13519 + }, + { + "epoch": 1.0911145186022113, + "grad_norm": 0.7412242293357849, + "learning_rate": 4.794545812690212e-05, + "loss": 2.5412, + "step": 13520 + }, + { + "epoch": 1.0911952223387944, + "grad_norm": 0.6882274150848389, + "learning_rate": 4.793197938586712e-05, + "loss": 2.473, + "step": 13521 + }, + { + "epoch": 1.0912759260753773, + "grad_norm": 0.7334007024765015, + "learning_rate": 4.791850194250335e-05, + "loss": 2.4357, + "step": 13522 + }, + { + "epoch": 1.0913566298119604, + "grad_norm": 0.6564081311225891, + "learning_rate": 4.790502579714661e-05, + "loss": 2.4425, + "step": 13523 + }, + { + "epoch": 1.0914373335485432, + "grad_norm": 0.7045762538909912, + "learning_rate": 4.78915509501328e-05, + "loss": 2.4929, + "step": 13524 + }, + { + "epoch": 1.0915180372851263, + "grad_norm": 0.7512505650520325, + "learning_rate": 4.787807740179776e-05, + "loss": 2.4187, + "step": 13525 + }, + { + "epoch": 1.0915987410217094, + "grad_norm": 0.6592997908592224, + "learning_rate": 4.786460515247732e-05, + "loss": 2.4344, + "step": 13526 + }, + { + "epoch": 1.0916794447582923, + "grad_norm": 0.6721770763397217, + "learning_rate": 4.785113420250715e-05, + "loss": 2.4415, + "step": 13527 + }, + { + "epoch": 1.0917601484948753, + "grad_norm": 0.7544431686401367, + "learning_rate": 4.783766455222305e-05, + "loss": 2.4831, + "step": 13528 + }, + { + "epoch": 1.0918408522314582, + "grad_norm": 0.7226355671882629, + "learning_rate": 4.782419620196073e-05, + "loss": 2.4807, + "step": 13529 + }, + { + "epoch": 1.0919215559680413, + "grad_norm": 0.6386340260505676, + "learning_rate": 4.78107291520558e-05, + "loss": 2.4062, + "step": 13530 + }, + { + "epoch": 1.0920022597046244, + "grad_norm": 0.6670595407485962, + "learning_rate": 4.7797263402843926e-05, + "loss": 2.4009, + "step": 13531 + }, + { + "epoch": 1.0920829634412073, + "grad_norm": 0.6600756049156189, + "learning_rate": 4.778379895466071e-05, + "loss": 2.4321, + "step": 13532 + }, + { + "epoch": 1.0921636671777903, + "grad_norm": 0.7190701961517334, + "learning_rate": 4.77703358078417e-05, + "loss": 2.4229, + "step": 13533 + }, + { + "epoch": 1.0922443709143734, + "grad_norm": 0.6554828882217407, + "learning_rate": 4.775687396272247e-05, + "loss": 2.442, + "step": 13534 + }, + { + "epoch": 1.0923250746509563, + "grad_norm": 0.6720205545425415, + "learning_rate": 4.774341341963853e-05, + "loss": 2.4994, + "step": 13535 + }, + { + "epoch": 1.0924057783875394, + "grad_norm": 0.7161003947257996, + "learning_rate": 4.7729954178925295e-05, + "loss": 2.4666, + "step": 13536 + }, + { + "epoch": 1.0924864821241222, + "grad_norm": 0.6817156672477722, + "learning_rate": 4.771649624091824e-05, + "loss": 2.4203, + "step": 13537 + }, + { + "epoch": 1.0925671858607053, + "grad_norm": 0.7167035937309265, + "learning_rate": 4.770303960595277e-05, + "loss": 2.4214, + "step": 13538 + }, + { + "epoch": 1.0926478895972884, + "grad_norm": 0.6373945474624634, + "learning_rate": 4.768958427436429e-05, + "loss": 2.485, + "step": 13539 + }, + { + "epoch": 1.0927285933338713, + "grad_norm": 0.7361387014389038, + "learning_rate": 4.767613024648808e-05, + "loss": 2.5192, + "step": 13540 + }, + { + "epoch": 1.0928092970704544, + "grad_norm": 0.7034375667572021, + "learning_rate": 4.766267752265947e-05, + "loss": 2.4324, + "step": 13541 + }, + { + "epoch": 1.0928900008070375, + "grad_norm": 0.7355689406394958, + "learning_rate": 4.7649226103213765e-05, + "loss": 2.5048, + "step": 13542 + }, + { + "epoch": 1.0929707045436203, + "grad_norm": 0.7120445966720581, + "learning_rate": 4.7635775988486176e-05, + "loss": 2.449, + "step": 13543 + }, + { + "epoch": 1.0930514082802034, + "grad_norm": 0.695888876914978, + "learning_rate": 4.7622327178811935e-05, + "loss": 2.4974, + "step": 13544 + }, + { + "epoch": 1.0931321120167863, + "grad_norm": 0.6953639984130859, + "learning_rate": 4.760887967452625e-05, + "loss": 2.3927, + "step": 13545 + }, + { + "epoch": 1.0932128157533694, + "grad_norm": 0.6457183957099915, + "learning_rate": 4.759543347596421e-05, + "loss": 2.4501, + "step": 13546 + }, + { + "epoch": 1.0932935194899525, + "grad_norm": 0.7259296774864197, + "learning_rate": 4.7581988583460946e-05, + "loss": 2.4896, + "step": 13547 + }, + { + "epoch": 1.0933742232265353, + "grad_norm": 0.6897724270820618, + "learning_rate": 4.7568544997351586e-05, + "loss": 2.4181, + "step": 13548 + }, + { + "epoch": 1.0934549269631184, + "grad_norm": 0.6723688840866089, + "learning_rate": 4.755510271797111e-05, + "loss": 2.5097, + "step": 13549 + }, + { + "epoch": 1.0935356306997015, + "grad_norm": 0.7353307604789734, + "learning_rate": 4.754166174565456e-05, + "loss": 2.4548, + "step": 13550 + }, + { + "epoch": 1.0936163344362844, + "grad_norm": 0.7334069013595581, + "learning_rate": 4.752822208073693e-05, + "loss": 2.5113, + "step": 13551 + }, + { + "epoch": 1.0936970381728675, + "grad_norm": 0.6581420302391052, + "learning_rate": 4.751478372355317e-05, + "loss": 2.4546, + "step": 13552 + }, + { + "epoch": 1.0937777419094503, + "grad_norm": 0.7890802621841431, + "learning_rate": 4.75013466744382e-05, + "loss": 2.4092, + "step": 13553 + }, + { + "epoch": 1.0938584456460334, + "grad_norm": 0.7226595282554626, + "learning_rate": 4.7487910933726895e-05, + "loss": 2.457, + "step": 13554 + }, + { + "epoch": 1.0939391493826165, + "grad_norm": 0.7108014225959778, + "learning_rate": 4.7474476501754165e-05, + "loss": 2.471, + "step": 13555 + }, + { + "epoch": 1.0940198531191994, + "grad_norm": 0.6864863038063049, + "learning_rate": 4.746104337885473e-05, + "loss": 2.4778, + "step": 13556 + }, + { + "epoch": 1.0941005568557824, + "grad_norm": 0.6890624165534973, + "learning_rate": 4.744761156536345e-05, + "loss": 2.456, + "step": 13557 + }, + { + "epoch": 1.0941812605923653, + "grad_norm": 0.7052781581878662, + "learning_rate": 4.743418106161509e-05, + "loss": 2.4796, + "step": 13558 + }, + { + "epoch": 1.0942619643289484, + "grad_norm": 0.6569164991378784, + "learning_rate": 4.742075186794431e-05, + "loss": 2.469, + "step": 13559 + }, + { + "epoch": 1.0943426680655315, + "grad_norm": 0.7302874326705933, + "learning_rate": 4.7407323984685836e-05, + "loss": 2.4543, + "step": 13560 + }, + { + "epoch": 1.0944233718021144, + "grad_norm": 0.6499345898628235, + "learning_rate": 4.7393897412174335e-05, + "loss": 2.4037, + "step": 13561 + }, + { + "epoch": 1.0945040755386974, + "grad_norm": 0.6643944382667542, + "learning_rate": 4.7380472150744416e-05, + "loss": 2.4067, + "step": 13562 + }, + { + "epoch": 1.0945847792752805, + "grad_norm": 0.7491872906684875, + "learning_rate": 4.736704820073069e-05, + "loss": 2.4277, + "step": 13563 + }, + { + "epoch": 1.0946654830118634, + "grad_norm": 0.7319512367248535, + "learning_rate": 4.735362556246773e-05, + "loss": 2.4588, + "step": 13564 + }, + { + "epoch": 1.0947461867484465, + "grad_norm": 0.7404350638389587, + "learning_rate": 4.734020423629001e-05, + "loss": 2.432, + "step": 13565 + }, + { + "epoch": 1.0948268904850296, + "grad_norm": 0.6462193727493286, + "learning_rate": 4.732678422253206e-05, + "loss": 2.4417, + "step": 13566 + }, + { + "epoch": 1.0949075942216124, + "grad_norm": 0.6711323857307434, + "learning_rate": 4.731336552152836e-05, + "loss": 2.4023, + "step": 13567 + }, + { + "epoch": 1.0949882979581955, + "grad_norm": 0.658261239528656, + "learning_rate": 4.729994813361329e-05, + "loss": 2.4132, + "step": 13568 + }, + { + "epoch": 1.0950690016947784, + "grad_norm": 0.8081904053688049, + "learning_rate": 4.728653205912127e-05, + "loss": 2.4412, + "step": 13569 + }, + { + "epoch": 1.0951497054313615, + "grad_norm": 0.6620786786079407, + "learning_rate": 4.727311729838666e-05, + "loss": 2.4357, + "step": 13570 + }, + { + "epoch": 1.0952304091679446, + "grad_norm": 0.7026848793029785, + "learning_rate": 4.725970385174381e-05, + "loss": 2.4159, + "step": 13571 + }, + { + "epoch": 1.0953111129045274, + "grad_norm": 0.7017392516136169, + "learning_rate": 4.7246291719526995e-05, + "loss": 2.4253, + "step": 13572 + }, + { + "epoch": 1.0953918166411105, + "grad_norm": 0.710172712802887, + "learning_rate": 4.7232880902070483e-05, + "loss": 2.4057, + "step": 13573 + }, + { + "epoch": 1.0954725203776934, + "grad_norm": 0.7208876013755798, + "learning_rate": 4.721947139970856e-05, + "loss": 2.4803, + "step": 13574 + }, + { + "epoch": 1.0955532241142765, + "grad_norm": 0.693219006061554, + "learning_rate": 4.720606321277534e-05, + "loss": 2.3611, + "step": 13575 + }, + { + "epoch": 1.0956339278508596, + "grad_norm": 0.737206757068634, + "learning_rate": 4.7192656341605026e-05, + "loss": 2.3873, + "step": 13576 + }, + { + "epoch": 1.0957146315874424, + "grad_norm": 0.6605268120765686, + "learning_rate": 4.717925078653179e-05, + "loss": 2.4155, + "step": 13577 + }, + { + "epoch": 1.0957953353240255, + "grad_norm": 0.7143047451972961, + "learning_rate": 4.716584654788967e-05, + "loss": 2.4526, + "step": 13578 + }, + { + "epoch": 1.0958760390606086, + "grad_norm": 0.6980953216552734, + "learning_rate": 4.715244362601277e-05, + "loss": 2.4422, + "step": 13579 + }, + { + "epoch": 1.0959567427971915, + "grad_norm": 0.6852009892463684, + "learning_rate": 4.713904202123515e-05, + "loss": 2.4599, + "step": 13580 + }, + { + "epoch": 1.0960374465337746, + "grad_norm": 0.7436656355857849, + "learning_rate": 4.712564173389074e-05, + "loss": 2.4441, + "step": 13581 + }, + { + "epoch": 1.0961181502703574, + "grad_norm": 0.7090624570846558, + "learning_rate": 4.711224276431352e-05, + "loss": 2.4741, + "step": 13582 + }, + { + "epoch": 1.0961988540069405, + "grad_norm": 0.6611043810844421, + "learning_rate": 4.709884511283753e-05, + "loss": 2.4589, + "step": 13583 + }, + { + "epoch": 1.0962795577435236, + "grad_norm": 0.6932426691055298, + "learning_rate": 4.708544877979658e-05, + "loss": 2.4199, + "step": 13584 + }, + { + "epoch": 1.0963602614801065, + "grad_norm": 0.7629422545433044, + "learning_rate": 4.707205376552456e-05, + "loss": 2.4588, + "step": 13585 + }, + { + "epoch": 1.0964409652166895, + "grad_norm": 0.8116739392280579, + "learning_rate": 4.705866007035531e-05, + "loss": 2.472, + "step": 13586 + }, + { + "epoch": 1.0965216689532726, + "grad_norm": 0.6711297631263733, + "learning_rate": 4.704526769462269e-05, + "loss": 2.4086, + "step": 13587 + }, + { + "epoch": 1.0966023726898555, + "grad_norm": 0.716015636920929, + "learning_rate": 4.703187663866037e-05, + "loss": 2.4411, + "step": 13588 + }, + { + "epoch": 1.0966830764264386, + "grad_norm": 0.6982430219650269, + "learning_rate": 4.701848690280215e-05, + "loss": 2.4438, + "step": 13589 + }, + { + "epoch": 1.0967637801630215, + "grad_norm": 0.7183159589767456, + "learning_rate": 4.7005098487381785e-05, + "loss": 2.4464, + "step": 13590 + }, + { + "epoch": 1.0968444838996045, + "grad_norm": 0.6983399391174316, + "learning_rate": 4.699171139273284e-05, + "loss": 2.4354, + "step": 13591 + }, + { + "epoch": 1.0969251876361876, + "grad_norm": 0.7157938480377197, + "learning_rate": 4.697832561918901e-05, + "loss": 2.4393, + "step": 13592 + }, + { + "epoch": 1.0970058913727705, + "grad_norm": 0.6991363763809204, + "learning_rate": 4.696494116708392e-05, + "loss": 2.4723, + "step": 13593 + }, + { + "epoch": 1.0970865951093536, + "grad_norm": 0.6722309589385986, + "learning_rate": 4.695155803675112e-05, + "loss": 2.447, + "step": 13594 + }, + { + "epoch": 1.0971672988459367, + "grad_norm": 0.6492688655853271, + "learning_rate": 4.6938176228524175e-05, + "loss": 2.4213, + "step": 13595 + }, + { + "epoch": 1.0972480025825195, + "grad_norm": 0.6941642165184021, + "learning_rate": 4.6924795742736616e-05, + "loss": 2.4714, + "step": 13596 + }, + { + "epoch": 1.0973287063191026, + "grad_norm": 0.7506042122840881, + "learning_rate": 4.691141657972185e-05, + "loss": 2.4563, + "step": 13597 + }, + { + "epoch": 1.0974094100556855, + "grad_norm": 0.7032836675643921, + "learning_rate": 4.6898038739813356e-05, + "loss": 2.4824, + "step": 13598 + }, + { + "epoch": 1.0974901137922686, + "grad_norm": 0.6908734440803528, + "learning_rate": 4.6884662223344575e-05, + "loss": 2.4486, + "step": 13599 + }, + { + "epoch": 1.0975708175288517, + "grad_norm": 0.714971661567688, + "learning_rate": 4.687128703064883e-05, + "loss": 2.4372, + "step": 13600 + }, + { + "epoch": 1.0976515212654345, + "grad_norm": 0.6989198327064514, + "learning_rate": 4.6857913162059486e-05, + "loss": 2.395, + "step": 13601 + }, + { + "epoch": 1.0977322250020176, + "grad_norm": 0.7163406014442444, + "learning_rate": 4.684454061790987e-05, + "loss": 2.4868, + "step": 13602 + }, + { + "epoch": 1.0978129287386005, + "grad_norm": 0.6600626707077026, + "learning_rate": 4.6831169398533245e-05, + "loss": 2.5134, + "step": 13603 + }, + { + "epoch": 1.0978936324751836, + "grad_norm": 0.6657080054283142, + "learning_rate": 4.681779950426286e-05, + "loss": 2.4701, + "step": 13604 + }, + { + "epoch": 1.0979743362117667, + "grad_norm": 0.665860116481781, + "learning_rate": 4.680443093543194e-05, + "loss": 2.4593, + "step": 13605 + }, + { + "epoch": 1.0980550399483495, + "grad_norm": 0.7000327110290527, + "learning_rate": 4.679106369237368e-05, + "loss": 2.4523, + "step": 13606 + }, + { + "epoch": 1.0981357436849326, + "grad_norm": 0.6969157457351685, + "learning_rate": 4.677769777542118e-05, + "loss": 2.4935, + "step": 13607 + }, + { + "epoch": 1.0982164474215157, + "grad_norm": 0.6864836812019348, + "learning_rate": 4.676433318490757e-05, + "loss": 2.457, + "step": 13608 + }, + { + "epoch": 1.0982971511580986, + "grad_norm": 0.7331364750862122, + "learning_rate": 4.675096992116598e-05, + "loss": 2.4253, + "step": 13609 + }, + { + "epoch": 1.0983778548946816, + "grad_norm": 0.75, + "learning_rate": 4.673760798452936e-05, + "loss": 2.4147, + "step": 13610 + }, + { + "epoch": 1.0984585586312647, + "grad_norm": 0.6589440703392029, + "learning_rate": 4.6724247375330786e-05, + "loss": 2.4718, + "step": 13611 + }, + { + "epoch": 1.0985392623678476, + "grad_norm": 0.7032667994499207, + "learning_rate": 4.671088809390324e-05, + "loss": 2.4724, + "step": 13612 + }, + { + "epoch": 1.0986199661044307, + "grad_norm": 0.7544135451316833, + "learning_rate": 4.6697530140579646e-05, + "loss": 2.4804, + "step": 13613 + }, + { + "epoch": 1.0987006698410136, + "grad_norm": 0.6503081917762756, + "learning_rate": 4.668417351569295e-05, + "loss": 2.3829, + "step": 13614 + }, + { + "epoch": 1.0987813735775966, + "grad_norm": 0.6928786039352417, + "learning_rate": 4.667081821957605e-05, + "loss": 2.5678, + "step": 13615 + }, + { + "epoch": 1.0988620773141797, + "grad_norm": 0.6652864217758179, + "learning_rate": 4.665746425256173e-05, + "loss": 2.4585, + "step": 13616 + }, + { + "epoch": 1.0989427810507626, + "grad_norm": 0.700265109539032, + "learning_rate": 4.664411161498283e-05, + "loss": 2.4785, + "step": 13617 + }, + { + "epoch": 1.0990234847873457, + "grad_norm": 0.7443608045578003, + "learning_rate": 4.663076030717216e-05, + "loss": 2.4869, + "step": 13618 + }, + { + "epoch": 1.0991041885239285, + "grad_norm": 0.7037705779075623, + "learning_rate": 4.6617410329462477e-05, + "loss": 2.4518, + "step": 13619 + }, + { + "epoch": 1.0991848922605116, + "grad_norm": 0.7528365850448608, + "learning_rate": 4.660406168218643e-05, + "loss": 2.4616, + "step": 13620 + }, + { + "epoch": 1.0992655959970947, + "grad_norm": 0.7149221301078796, + "learning_rate": 4.659071436567676e-05, + "loss": 2.4661, + "step": 13621 + }, + { + "epoch": 1.0993462997336776, + "grad_norm": 0.7212862968444824, + "learning_rate": 4.657736838026608e-05, + "loss": 2.4424, + "step": 13622 + }, + { + "epoch": 1.0994270034702607, + "grad_norm": 0.6934216022491455, + "learning_rate": 4.6564023726287045e-05, + "loss": 2.4633, + "step": 13623 + }, + { + "epoch": 1.0995077072068438, + "grad_norm": 0.7244036793708801, + "learning_rate": 4.655068040407221e-05, + "loss": 2.409, + "step": 13624 + }, + { + "epoch": 1.0995884109434266, + "grad_norm": 0.6911318898200989, + "learning_rate": 4.653733841395419e-05, + "loss": 2.5117, + "step": 13625 + }, + { + "epoch": 1.0996691146800097, + "grad_norm": 0.7579816579818726, + "learning_rate": 4.65239977562654e-05, + "loss": 2.4927, + "step": 13626 + }, + { + "epoch": 1.0997498184165928, + "grad_norm": 0.7699651122093201, + "learning_rate": 4.651065843133837e-05, + "loss": 2.4083, + "step": 13627 + }, + { + "epoch": 1.0998305221531757, + "grad_norm": 0.6669431328773499, + "learning_rate": 4.649732043950561e-05, + "loss": 2.4402, + "step": 13628 + }, + { + "epoch": 1.0999112258897588, + "grad_norm": 0.7134940028190613, + "learning_rate": 4.6483983781099426e-05, + "loss": 2.4275, + "step": 13629 + }, + { + "epoch": 1.0999919296263416, + "grad_norm": 0.7107651233673096, + "learning_rate": 4.647064845645227e-05, + "loss": 2.4654, + "step": 13630 + }, + { + "epoch": 1.1000726333629247, + "grad_norm": 0.7101391553878784, + "learning_rate": 4.645731446589652e-05, + "loss": 2.4357, + "step": 13631 + }, + { + "epoch": 1.1001533370995078, + "grad_norm": 0.7511606216430664, + "learning_rate": 4.6443981809764405e-05, + "loss": 2.5016, + "step": 13632 + }, + { + "epoch": 1.1002340408360907, + "grad_norm": 0.7315953373908997, + "learning_rate": 4.6430650488388226e-05, + "loss": 2.4541, + "step": 13633 + }, + { + "epoch": 1.1003147445726738, + "grad_norm": 0.6701769232749939, + "learning_rate": 4.6417320502100316e-05, + "loss": 2.4071, + "step": 13634 + }, + { + "epoch": 1.1003954483092566, + "grad_norm": 0.7164294123649597, + "learning_rate": 4.6403991851232876e-05, + "loss": 2.478, + "step": 13635 + }, + { + "epoch": 1.1004761520458397, + "grad_norm": 0.7003894448280334, + "learning_rate": 4.639066453611802e-05, + "loss": 2.4686, + "step": 13636 + }, + { + "epoch": 1.1005568557824228, + "grad_norm": 0.6855250000953674, + "learning_rate": 4.6377338557087957e-05, + "loss": 2.4531, + "step": 13637 + }, + { + "epoch": 1.1006375595190057, + "grad_norm": 0.6581299901008606, + "learning_rate": 4.6364013914474816e-05, + "loss": 2.4511, + "step": 13638 + }, + { + "epoch": 1.1007182632555887, + "grad_norm": 0.7599080204963684, + "learning_rate": 4.6350690608610604e-05, + "loss": 2.5143, + "step": 13639 + }, + { + "epoch": 1.1007989669921718, + "grad_norm": 0.7029981017112732, + "learning_rate": 4.633736863982744e-05, + "loss": 2.4541, + "step": 13640 + }, + { + "epoch": 1.1008796707287547, + "grad_norm": 0.7378708720207214, + "learning_rate": 4.6324048008457357e-05, + "loss": 2.4319, + "step": 13641 + }, + { + "epoch": 1.1009603744653378, + "grad_norm": 0.7087826728820801, + "learning_rate": 4.631072871483226e-05, + "loss": 2.4148, + "step": 13642 + }, + { + "epoch": 1.1010410782019207, + "grad_norm": 0.7000819444656372, + "learning_rate": 4.629741075928415e-05, + "loss": 2.4692, + "step": 13643 + }, + { + "epoch": 1.1011217819385037, + "grad_norm": 0.7363965511322021, + "learning_rate": 4.628409414214496e-05, + "loss": 2.4584, + "step": 13644 + }, + { + "epoch": 1.1012024856750868, + "grad_norm": 0.6691753268241882, + "learning_rate": 4.627077886374656e-05, + "loss": 2.4356, + "step": 13645 + }, + { + "epoch": 1.1012831894116697, + "grad_norm": 0.6864185929298401, + "learning_rate": 4.625746492442078e-05, + "loss": 2.4713, + "step": 13646 + }, + { + "epoch": 1.1013638931482528, + "grad_norm": 0.714318573474884, + "learning_rate": 4.624415232449947e-05, + "loss": 2.4482, + "step": 13647 + }, + { + "epoch": 1.1014445968848359, + "grad_norm": 0.6383495330810547, + "learning_rate": 4.623084106431444e-05, + "loss": 2.4248, + "step": 13648 + }, + { + "epoch": 1.1015253006214187, + "grad_norm": 0.7014495730400085, + "learning_rate": 4.6217531144197365e-05, + "loss": 2.4393, + "step": 13649 + }, + { + "epoch": 1.1016060043580018, + "grad_norm": 0.8128634095191956, + "learning_rate": 4.620422256448e-05, + "loss": 2.4741, + "step": 13650 + }, + { + "epoch": 1.1016867080945847, + "grad_norm": 0.7333208322525024, + "learning_rate": 4.619091532549408e-05, + "loss": 2.4288, + "step": 13651 + }, + { + "epoch": 1.1017674118311678, + "grad_norm": 0.7023218274116516, + "learning_rate": 4.617760942757117e-05, + "loss": 2.5025, + "step": 13652 + }, + { + "epoch": 1.1018481155677509, + "grad_norm": 0.6420873403549194, + "learning_rate": 4.616430487104292e-05, + "loss": 2.4165, + "step": 13653 + }, + { + "epoch": 1.1019288193043337, + "grad_norm": 0.6767684817314148, + "learning_rate": 4.615100165624092e-05, + "loss": 2.4642, + "step": 13654 + }, + { + "epoch": 1.1020095230409168, + "grad_norm": 0.7361159920692444, + "learning_rate": 4.613769978349672e-05, + "loss": 2.5343, + "step": 13655 + }, + { + "epoch": 1.1020902267775, + "grad_norm": 0.6642624735832214, + "learning_rate": 4.6124399253141846e-05, + "loss": 2.3769, + "step": 13656 + }, + { + "epoch": 1.1021709305140828, + "grad_norm": 0.6912256479263306, + "learning_rate": 4.611110006550781e-05, + "loss": 2.455, + "step": 13657 + }, + { + "epoch": 1.1022516342506659, + "grad_norm": 0.7419310212135315, + "learning_rate": 4.609780222092599e-05, + "loss": 2.4171, + "step": 13658 + }, + { + "epoch": 1.1023323379872487, + "grad_norm": 0.718953549861908, + "learning_rate": 4.6084505719727835e-05, + "loss": 2.4791, + "step": 13659 + }, + { + "epoch": 1.1024130417238318, + "grad_norm": 0.7904248237609863, + "learning_rate": 4.607121056224477e-05, + "loss": 2.4429, + "step": 13660 + }, + { + "epoch": 1.102493745460415, + "grad_norm": 0.6743534803390503, + "learning_rate": 4.605791674880808e-05, + "loss": 2.4481, + "step": 13661 + }, + { + "epoch": 1.1025744491969978, + "grad_norm": 0.6829143166542053, + "learning_rate": 4.6044624279749106e-05, + "loss": 2.4078, + "step": 13662 + }, + { + "epoch": 1.1026551529335809, + "grad_norm": 0.6803167462348938, + "learning_rate": 4.6031333155399136e-05, + "loss": 2.4509, + "step": 13663 + }, + { + "epoch": 1.1027358566701637, + "grad_norm": 0.7474592328071594, + "learning_rate": 4.601804337608943e-05, + "loss": 2.4563, + "step": 13664 + }, + { + "epoch": 1.1028165604067468, + "grad_norm": 0.6753630042076111, + "learning_rate": 4.6004754942151174e-05, + "loss": 2.4285, + "step": 13665 + }, + { + "epoch": 1.10289726414333, + "grad_norm": 0.7990161180496216, + "learning_rate": 4.599146785391558e-05, + "loss": 2.4907, + "step": 13666 + }, + { + "epoch": 1.1029779678799128, + "grad_norm": 0.8161290287971497, + "learning_rate": 4.597818211171383e-05, + "loss": 2.4599, + "step": 13667 + }, + { + "epoch": 1.1030586716164958, + "grad_norm": 0.6813610792160034, + "learning_rate": 4.596489771587695e-05, + "loss": 2.4484, + "step": 13668 + }, + { + "epoch": 1.103139375353079, + "grad_norm": 0.6598966121673584, + "learning_rate": 4.5951614666736076e-05, + "loss": 2.4326, + "step": 13669 + }, + { + "epoch": 1.1032200790896618, + "grad_norm": 0.7084827423095703, + "learning_rate": 4.593833296462228e-05, + "loss": 2.4188, + "step": 13670 + }, + { + "epoch": 1.1033007828262449, + "grad_norm": 0.6876685619354248, + "learning_rate": 4.59250526098665e-05, + "loss": 2.4482, + "step": 13671 + }, + { + "epoch": 1.103381486562828, + "grad_norm": 0.7292699813842773, + "learning_rate": 4.591177360279978e-05, + "loss": 2.4452, + "step": 13672 + }, + { + "epoch": 1.1034621902994108, + "grad_norm": 0.7057675123214722, + "learning_rate": 4.589849594375304e-05, + "loss": 2.4336, + "step": 13673 + }, + { + "epoch": 1.103542894035994, + "grad_norm": 0.7684180736541748, + "learning_rate": 4.5885219633057196e-05, + "loss": 2.4453, + "step": 13674 + }, + { + "epoch": 1.1036235977725768, + "grad_norm": 0.7107112407684326, + "learning_rate": 4.5871944671043154e-05, + "loss": 2.4116, + "step": 13675 + }, + { + "epoch": 1.1037043015091599, + "grad_norm": 0.659501314163208, + "learning_rate": 4.585867105804177e-05, + "loss": 2.4907, + "step": 13676 + }, + { + "epoch": 1.103785005245743, + "grad_norm": 0.7553967833518982, + "learning_rate": 4.5845398794383786e-05, + "loss": 2.3982, + "step": 13677 + }, + { + "epoch": 1.1038657089823258, + "grad_norm": 0.6861104965209961, + "learning_rate": 4.583212788040003e-05, + "loss": 2.416, + "step": 13678 + }, + { + "epoch": 1.103946412718909, + "grad_norm": 0.6546811461448669, + "learning_rate": 4.5818858316421254e-05, + "loss": 2.4506, + "step": 13679 + }, + { + "epoch": 1.1040271164554918, + "grad_norm": 0.7012909650802612, + "learning_rate": 4.58055901027782e-05, + "loss": 2.439, + "step": 13680 + }, + { + "epoch": 1.1041078201920749, + "grad_norm": 0.7594780325889587, + "learning_rate": 4.5792323239801446e-05, + "loss": 2.4437, + "step": 13681 + }, + { + "epoch": 1.104188523928658, + "grad_norm": 0.6576492190361023, + "learning_rate": 4.577905772782172e-05, + "loss": 2.443, + "step": 13682 + }, + { + "epoch": 1.1042692276652408, + "grad_norm": 0.6751925349235535, + "learning_rate": 4.576579356716963e-05, + "loss": 2.507, + "step": 13683 + }, + { + "epoch": 1.104349931401824, + "grad_norm": 0.7206710577011108, + "learning_rate": 4.575253075817567e-05, + "loss": 2.4236, + "step": 13684 + }, + { + "epoch": 1.104430635138407, + "grad_norm": 0.7736170291900635, + "learning_rate": 4.5739269301170485e-05, + "loss": 2.4095, + "step": 13685 + }, + { + "epoch": 1.1045113388749899, + "grad_norm": 0.6901736855506897, + "learning_rate": 4.572600919648457e-05, + "loss": 2.4519, + "step": 13686 + }, + { + "epoch": 1.104592042611573, + "grad_norm": 0.7762539982795715, + "learning_rate": 4.571275044444836e-05, + "loss": 2.5018, + "step": 13687 + }, + { + "epoch": 1.1046727463481558, + "grad_norm": 0.7231423854827881, + "learning_rate": 4.569949304539232e-05, + "loss": 2.4553, + "step": 13688 + }, + { + "epoch": 1.104753450084739, + "grad_norm": 0.7713531255722046, + "learning_rate": 4.568623699964688e-05, + "loss": 2.49, + "step": 13689 + }, + { + "epoch": 1.104834153821322, + "grad_norm": 0.7355079650878906, + "learning_rate": 4.5672982307542354e-05, + "loss": 2.5191, + "step": 13690 + }, + { + "epoch": 1.1049148575579049, + "grad_norm": 0.6916452050209045, + "learning_rate": 4.565972896940913e-05, + "loss": 2.3867, + "step": 13691 + }, + { + "epoch": 1.104995561294488, + "grad_norm": 0.6622549295425415, + "learning_rate": 4.5646476985577544e-05, + "loss": 2.4364, + "step": 13692 + }, + { + "epoch": 1.105076265031071, + "grad_norm": 0.6683297157287598, + "learning_rate": 4.563322635637779e-05, + "loss": 2.43, + "step": 13693 + }, + { + "epoch": 1.105156968767654, + "grad_norm": 0.6857880353927612, + "learning_rate": 4.561997708214015e-05, + "loss": 2.4515, + "step": 13694 + }, + { + "epoch": 1.105237672504237, + "grad_norm": 0.7473817467689514, + "learning_rate": 4.5606729163194807e-05, + "loss": 2.442, + "step": 13695 + }, + { + "epoch": 1.1053183762408199, + "grad_norm": 0.6988846063613892, + "learning_rate": 4.559348259987203e-05, + "loss": 2.3886, + "step": 13696 + }, + { + "epoch": 1.105399079977403, + "grad_norm": 0.6450650691986084, + "learning_rate": 4.5580237392501836e-05, + "loss": 2.4647, + "step": 13697 + }, + { + "epoch": 1.105479783713986, + "grad_norm": 0.7669623494148254, + "learning_rate": 4.556699354141439e-05, + "loss": 2.4362, + "step": 13698 + }, + { + "epoch": 1.105560487450569, + "grad_norm": 0.7019730806350708, + "learning_rate": 4.55537510469398e-05, + "loss": 2.49, + "step": 13699 + }, + { + "epoch": 1.105641191187152, + "grad_norm": 0.6736636757850647, + "learning_rate": 4.5540509909408e-05, + "loss": 2.43, + "step": 13700 + }, + { + "epoch": 1.105721894923735, + "grad_norm": 0.6872034668922424, + "learning_rate": 4.552727012914907e-05, + "loss": 2.4507, + "step": 13701 + }, + { + "epoch": 1.105802598660318, + "grad_norm": 0.6726621985435486, + "learning_rate": 4.5514031706492986e-05, + "loss": 2.4193, + "step": 13702 + }, + { + "epoch": 1.105883302396901, + "grad_norm": 0.7345453500747681, + "learning_rate": 4.550079464176963e-05, + "loss": 2.4257, + "step": 13703 + }, + { + "epoch": 1.105964006133484, + "grad_norm": 0.6764804124832153, + "learning_rate": 4.548755893530894e-05, + "loss": 2.4656, + "step": 13704 + }, + { + "epoch": 1.106044709870067, + "grad_norm": 0.6915058493614197, + "learning_rate": 4.5474324587440766e-05, + "loss": 2.4148, + "step": 13705 + }, + { + "epoch": 1.10612541360665, + "grad_norm": 0.7960236668586731, + "learning_rate": 4.5461091598494954e-05, + "loss": 2.4148, + "step": 13706 + }, + { + "epoch": 1.106206117343233, + "grad_norm": 0.7058970928192139, + "learning_rate": 4.544785996880131e-05, + "loss": 2.4795, + "step": 13707 + }, + { + "epoch": 1.106286821079816, + "grad_norm": 0.6979549527168274, + "learning_rate": 4.5434629698689634e-05, + "loss": 2.4329, + "step": 13708 + }, + { + "epoch": 1.1063675248163989, + "grad_norm": 0.6805241107940674, + "learning_rate": 4.5421400788489586e-05, + "loss": 2.4303, + "step": 13709 + }, + { + "epoch": 1.106448228552982, + "grad_norm": 0.7566354274749756, + "learning_rate": 4.5408173238530905e-05, + "loss": 2.4769, + "step": 13710 + }, + { + "epoch": 1.106528932289565, + "grad_norm": 0.647773802280426, + "learning_rate": 4.539494704914324e-05, + "loss": 2.4037, + "step": 13711 + }, + { + "epoch": 1.106609636026148, + "grad_norm": 0.7248135209083557, + "learning_rate": 4.538172222065628e-05, + "loss": 2.4366, + "step": 13712 + }, + { + "epoch": 1.106690339762731, + "grad_norm": 0.6861057281494141, + "learning_rate": 4.536849875339953e-05, + "loss": 2.456, + "step": 13713 + }, + { + "epoch": 1.106771043499314, + "grad_norm": 0.7386166453361511, + "learning_rate": 4.5355276647702605e-05, + "loss": 2.4806, + "step": 13714 + }, + { + "epoch": 1.106851747235897, + "grad_norm": 0.664402961730957, + "learning_rate": 4.534205590389503e-05, + "loss": 2.4846, + "step": 13715 + }, + { + "epoch": 1.10693245097248, + "grad_norm": 0.8123969435691833, + "learning_rate": 4.5328836522306296e-05, + "loss": 2.4945, + "step": 13716 + }, + { + "epoch": 1.1070131547090631, + "grad_norm": 0.7375624775886536, + "learning_rate": 4.5315618503265865e-05, + "loss": 2.4533, + "step": 13717 + }, + { + "epoch": 1.107093858445646, + "grad_norm": 0.70960932970047, + "learning_rate": 4.53024018471032e-05, + "loss": 2.4351, + "step": 13718 + }, + { + "epoch": 1.107174562182229, + "grad_norm": 0.7170885801315308, + "learning_rate": 4.5289186554147645e-05, + "loss": 2.4654, + "step": 13719 + }, + { + "epoch": 1.107255265918812, + "grad_norm": 0.6986895203590393, + "learning_rate": 4.5275972624728556e-05, + "loss": 2.4079, + "step": 13720 + }, + { + "epoch": 1.107335969655395, + "grad_norm": 0.6948813796043396, + "learning_rate": 4.526276005917532e-05, + "loss": 2.4981, + "step": 13721 + }, + { + "epoch": 1.1074166733919781, + "grad_norm": 0.7719457149505615, + "learning_rate": 4.524954885781717e-05, + "loss": 2.4853, + "step": 13722 + }, + { + "epoch": 1.107497377128561, + "grad_norm": 0.652686357498169, + "learning_rate": 4.5236339020983363e-05, + "loss": 2.3672, + "step": 13723 + }, + { + "epoch": 1.107578080865144, + "grad_norm": 0.7517427802085876, + "learning_rate": 4.5223130549003144e-05, + "loss": 2.3947, + "step": 13724 + }, + { + "epoch": 1.107658784601727, + "grad_norm": 0.6755498647689819, + "learning_rate": 4.5209923442205705e-05, + "loss": 2.4173, + "step": 13725 + }, + { + "epoch": 1.10773948833831, + "grad_norm": 0.6801806688308716, + "learning_rate": 4.519671770092019e-05, + "loss": 2.4366, + "step": 13726 + }, + { + "epoch": 1.1078201920748931, + "grad_norm": 0.6665045619010925, + "learning_rate": 4.5183513325475724e-05, + "loss": 2.4797, + "step": 13727 + }, + { + "epoch": 1.107900895811476, + "grad_norm": 0.7303451299667358, + "learning_rate": 4.517031031620145e-05, + "loss": 2.4487, + "step": 13728 + }, + { + "epoch": 1.107981599548059, + "grad_norm": 0.7241206765174866, + "learning_rate": 4.515710867342632e-05, + "loss": 2.4632, + "step": 13729 + }, + { + "epoch": 1.1080623032846422, + "grad_norm": 0.738835334777832, + "learning_rate": 4.514390839747941e-05, + "loss": 2.3937, + "step": 13730 + }, + { + "epoch": 1.108143007021225, + "grad_norm": 0.7062843441963196, + "learning_rate": 4.5130709488689726e-05, + "loss": 2.4576, + "step": 13731 + }, + { + "epoch": 1.1082237107578081, + "grad_norm": 0.7074100971221924, + "learning_rate": 4.511751194738616e-05, + "loss": 2.4843, + "step": 13732 + }, + { + "epoch": 1.108304414494391, + "grad_norm": 0.751742959022522, + "learning_rate": 4.510431577389765e-05, + "loss": 2.4607, + "step": 13733 + }, + { + "epoch": 1.108385118230974, + "grad_norm": 0.7370054125785828, + "learning_rate": 4.50911209685531e-05, + "loss": 2.4877, + "step": 13734 + }, + { + "epoch": 1.1084658219675572, + "grad_norm": 0.6410251259803772, + "learning_rate": 4.507792753168135e-05, + "loss": 2.4254, + "step": 13735 + }, + { + "epoch": 1.10854652570414, + "grad_norm": 0.7141317129135132, + "learning_rate": 4.506473546361121e-05, + "loss": 2.4962, + "step": 13736 + }, + { + "epoch": 1.1086272294407231, + "grad_norm": 0.6903412342071533, + "learning_rate": 4.50515447646715e-05, + "loss": 2.4315, + "step": 13737 + }, + { + "epoch": 1.1087079331773062, + "grad_norm": 0.7068564891815186, + "learning_rate": 4.50383554351909e-05, + "loss": 2.5795, + "step": 13738 + }, + { + "epoch": 1.108788636913889, + "grad_norm": 0.6880627274513245, + "learning_rate": 4.5025167475498154e-05, + "loss": 2.4399, + "step": 13739 + }, + { + "epoch": 1.1088693406504722, + "grad_norm": 0.6721192598342896, + "learning_rate": 4.5011980885921965e-05, + "loss": 2.4651, + "step": 13740 + }, + { + "epoch": 1.108950044387055, + "grad_norm": 0.7084259986877441, + "learning_rate": 4.499879566679093e-05, + "loss": 2.4121, + "step": 13741 + }, + { + "epoch": 1.109030748123638, + "grad_norm": 0.6809335947036743, + "learning_rate": 4.498561181843368e-05, + "loss": 2.4714, + "step": 13742 + }, + { + "epoch": 1.1091114518602212, + "grad_norm": 0.690416693687439, + "learning_rate": 4.497242934117879e-05, + "loss": 2.4744, + "step": 13743 + }, + { + "epoch": 1.109192155596804, + "grad_norm": 0.728522002696991, + "learning_rate": 4.495924823535483e-05, + "loss": 2.4374, + "step": 13744 + }, + { + "epoch": 1.1092728593333872, + "grad_norm": 0.7000796794891357, + "learning_rate": 4.494606850129026e-05, + "loss": 2.4635, + "step": 13745 + }, + { + "epoch": 1.1093535630699702, + "grad_norm": 0.824645459651947, + "learning_rate": 4.493289013931353e-05, + "loss": 2.3724, + "step": 13746 + }, + { + "epoch": 1.109434266806553, + "grad_norm": 0.6561198830604553, + "learning_rate": 4.491971314975321e-05, + "loss": 2.3726, + "step": 13747 + }, + { + "epoch": 1.1095149705431362, + "grad_norm": 0.7067599892616272, + "learning_rate": 4.490653753293757e-05, + "loss": 2.4285, + "step": 13748 + }, + { + "epoch": 1.109595674279719, + "grad_norm": 0.6954898834228516, + "learning_rate": 4.489336328919503e-05, + "loss": 2.4252, + "step": 13749 + }, + { + "epoch": 1.1096763780163021, + "grad_norm": 0.6683667302131653, + "learning_rate": 4.4880190418853974e-05, + "loss": 2.4815, + "step": 13750 + }, + { + "epoch": 1.1097570817528852, + "grad_norm": 0.7554971575737, + "learning_rate": 4.486701892224261e-05, + "loss": 2.5036, + "step": 13751 + }, + { + "epoch": 1.109837785489468, + "grad_norm": 0.7043242454528809, + "learning_rate": 4.485384879968926e-05, + "loss": 2.3757, + "step": 13752 + }, + { + "epoch": 1.1099184892260512, + "grad_norm": 0.8016893863677979, + "learning_rate": 4.4840680051522186e-05, + "loss": 2.4655, + "step": 13753 + }, + { + "epoch": 1.1099991929626343, + "grad_norm": 0.7022131085395813, + "learning_rate": 4.4827512678069515e-05, + "loss": 2.475, + "step": 13754 + }, + { + "epoch": 1.1100798966992171, + "grad_norm": 0.6963247656822205, + "learning_rate": 4.4814346679659455e-05, + "loss": 2.4866, + "step": 13755 + }, + { + "epoch": 1.1101606004358002, + "grad_norm": 0.6980907917022705, + "learning_rate": 4.4801182056620125e-05, + "loss": 2.4322, + "step": 13756 + }, + { + "epoch": 1.110241304172383, + "grad_norm": 0.68063884973526, + "learning_rate": 4.478801880927964e-05, + "loss": 2.426, + "step": 13757 + }, + { + "epoch": 1.1103220079089662, + "grad_norm": 0.7454195618629456, + "learning_rate": 4.477485693796605e-05, + "loss": 2.5042, + "step": 13758 + }, + { + "epoch": 1.1104027116455493, + "grad_norm": 0.685975193977356, + "learning_rate": 4.476169644300737e-05, + "loss": 2.4874, + "step": 13759 + }, + { + "epoch": 1.1104834153821321, + "grad_norm": 0.7060961723327637, + "learning_rate": 4.4748537324731664e-05, + "loss": 2.4126, + "step": 13760 + }, + { + "epoch": 1.1105641191187152, + "grad_norm": 0.6794416904449463, + "learning_rate": 4.4735379583466795e-05, + "loss": 2.4112, + "step": 13761 + }, + { + "epoch": 1.1106448228552983, + "grad_norm": 0.6854961514472961, + "learning_rate": 4.472222321954073e-05, + "loss": 2.4909, + "step": 13762 + }, + { + "epoch": 1.1107255265918812, + "grad_norm": 0.7660776972770691, + "learning_rate": 4.470906823328139e-05, + "loss": 2.5021, + "step": 13763 + }, + { + "epoch": 1.1108062303284643, + "grad_norm": 0.7027743458747864, + "learning_rate": 4.4695914625016564e-05, + "loss": 2.4375, + "step": 13764 + }, + { + "epoch": 1.1108869340650471, + "grad_norm": 0.6896719336509705, + "learning_rate": 4.468276239507413e-05, + "loss": 2.4574, + "step": 13765 + }, + { + "epoch": 1.1109676378016302, + "grad_norm": 0.685141384601593, + "learning_rate": 4.4669611543781844e-05, + "loss": 2.4311, + "step": 13766 + }, + { + "epoch": 1.1110483415382133, + "grad_norm": 0.7108263373374939, + "learning_rate": 4.465646207146746e-05, + "loss": 2.4565, + "step": 13767 + }, + { + "epoch": 1.1111290452747962, + "grad_norm": 0.63578861951828, + "learning_rate": 4.464331397845873e-05, + "loss": 2.449, + "step": 13768 + }, + { + "epoch": 1.1112097490113793, + "grad_norm": 0.6917306780815125, + "learning_rate": 4.463016726508335e-05, + "loss": 2.4681, + "step": 13769 + }, + { + "epoch": 1.1112904527479621, + "grad_norm": 0.7328054308891296, + "learning_rate": 4.4617021931668914e-05, + "loss": 2.404, + "step": 13770 + }, + { + "epoch": 1.1113711564845452, + "grad_norm": 0.6501660943031311, + "learning_rate": 4.460387797854305e-05, + "loss": 2.4228, + "step": 13771 + }, + { + "epoch": 1.1114518602211283, + "grad_norm": 0.6656771302223206, + "learning_rate": 4.459073540603336e-05, + "loss": 2.4814, + "step": 13772 + }, + { + "epoch": 1.1115325639577112, + "grad_norm": 0.671017587184906, + "learning_rate": 4.457759421446742e-05, + "loss": 2.4605, + "step": 13773 + }, + { + "epoch": 1.1116132676942942, + "grad_norm": 0.6715343594551086, + "learning_rate": 4.456445440417267e-05, + "loss": 2.424, + "step": 13774 + }, + { + "epoch": 1.1116939714308773, + "grad_norm": 0.7051515579223633, + "learning_rate": 4.4551315975476626e-05, + "loss": 2.4358, + "step": 13775 + }, + { + "epoch": 1.1117746751674602, + "grad_norm": 0.7810437679290771, + "learning_rate": 4.453817892870673e-05, + "loss": 2.4718, + "step": 13776 + }, + { + "epoch": 1.1118553789040433, + "grad_norm": 0.7072561383247375, + "learning_rate": 4.4525043264190405e-05, + "loss": 2.4429, + "step": 13777 + }, + { + "epoch": 1.1119360826406264, + "grad_norm": 0.7949702143669128, + "learning_rate": 4.4511908982255e-05, + "loss": 2.4413, + "step": 13778 + }, + { + "epoch": 1.1120167863772092, + "grad_norm": 0.6716235876083374, + "learning_rate": 4.449877608322792e-05, + "loss": 2.427, + "step": 13779 + }, + { + "epoch": 1.1120974901137923, + "grad_norm": 0.7332563996315002, + "learning_rate": 4.448564456743638e-05, + "loss": 2.4567, + "step": 13780 + }, + { + "epoch": 1.1121781938503752, + "grad_norm": 0.7264607548713684, + "learning_rate": 4.447251443520769e-05, + "loss": 2.4844, + "step": 13781 + }, + { + "epoch": 1.1122588975869583, + "grad_norm": 0.7819967865943909, + "learning_rate": 4.4459385686869136e-05, + "loss": 2.5129, + "step": 13782 + }, + { + "epoch": 1.1123396013235414, + "grad_norm": 0.7587651610374451, + "learning_rate": 4.4446258322747824e-05, + "loss": 2.4714, + "step": 13783 + }, + { + "epoch": 1.1124203050601242, + "grad_norm": 0.6392871141433716, + "learning_rate": 4.443313234317099e-05, + "loss": 2.462, + "step": 13784 + }, + { + "epoch": 1.1125010087967073, + "grad_norm": 0.6609585881233215, + "learning_rate": 4.442000774846574e-05, + "loss": 2.4566, + "step": 13785 + }, + { + "epoch": 1.1125817125332902, + "grad_norm": 0.762924075126648, + "learning_rate": 4.440688453895919e-05, + "loss": 2.4613, + "step": 13786 + }, + { + "epoch": 1.1126624162698733, + "grad_norm": 0.7096089124679565, + "learning_rate": 4.4393762714978394e-05, + "loss": 2.4195, + "step": 13787 + }, + { + "epoch": 1.1127431200064564, + "grad_norm": 0.6663284301757812, + "learning_rate": 4.438064227685039e-05, + "loss": 2.422, + "step": 13788 + }, + { + "epoch": 1.1128238237430392, + "grad_norm": 0.6653628945350647, + "learning_rate": 4.436752322490221e-05, + "loss": 2.4477, + "step": 13789 + }, + { + "epoch": 1.1129045274796223, + "grad_norm": 0.6527605056762695, + "learning_rate": 4.435440555946073e-05, + "loss": 2.3874, + "step": 13790 + }, + { + "epoch": 1.1129852312162054, + "grad_norm": 0.6801275014877319, + "learning_rate": 4.4341289280852935e-05, + "loss": 2.4474, + "step": 13791 + }, + { + "epoch": 1.1130659349527883, + "grad_norm": 0.729905366897583, + "learning_rate": 4.432817438940574e-05, + "loss": 2.4711, + "step": 13792 + }, + { + "epoch": 1.1131466386893714, + "grad_norm": 0.7074751853942871, + "learning_rate": 4.431506088544593e-05, + "loss": 2.451, + "step": 13793 + }, + { + "epoch": 1.1132273424259542, + "grad_norm": 0.7241154313087463, + "learning_rate": 4.430194876930035e-05, + "loss": 2.4883, + "step": 13794 + }, + { + "epoch": 1.1133080461625373, + "grad_norm": 0.6549142003059387, + "learning_rate": 4.428883804129586e-05, + "loss": 2.4243, + "step": 13795 + }, + { + "epoch": 1.1133887498991204, + "grad_norm": 0.7046780586242676, + "learning_rate": 4.427572870175907e-05, + "loss": 2.4143, + "step": 13796 + }, + { + "epoch": 1.1134694536357033, + "grad_norm": 0.6563952565193176, + "learning_rate": 4.426262075101682e-05, + "loss": 2.416, + "step": 13797 + }, + { + "epoch": 1.1135501573722864, + "grad_norm": 0.7002081871032715, + "learning_rate": 4.4249514189395803e-05, + "loss": 2.3673, + "step": 13798 + }, + { + "epoch": 1.1136308611088694, + "grad_norm": 0.6766571998596191, + "learning_rate": 4.423640901722259e-05, + "loss": 2.4941, + "step": 13799 + }, + { + "epoch": 1.1137115648454523, + "grad_norm": 0.7404381632804871, + "learning_rate": 4.422330523482383e-05, + "loss": 2.4794, + "step": 13800 + }, + { + "epoch": 1.1137922685820354, + "grad_norm": 0.6670998930931091, + "learning_rate": 4.421020284252614e-05, + "loss": 2.5131, + "step": 13801 + }, + { + "epoch": 1.1138729723186183, + "grad_norm": 0.803720235824585, + "learning_rate": 4.4197101840655995e-05, + "loss": 2.4751, + "step": 13802 + }, + { + "epoch": 1.1139536760552013, + "grad_norm": 0.6532074809074402, + "learning_rate": 4.4184002229539947e-05, + "loss": 2.4147, + "step": 13803 + }, + { + "epoch": 1.1140343797917844, + "grad_norm": 0.6548035144805908, + "learning_rate": 4.417090400950447e-05, + "loss": 2.4601, + "step": 13804 + }, + { + "epoch": 1.1141150835283673, + "grad_norm": 0.6971763968467712, + "learning_rate": 4.415780718087603e-05, + "loss": 2.4752, + "step": 13805 + }, + { + "epoch": 1.1141957872649504, + "grad_norm": 0.6624024510383606, + "learning_rate": 4.414471174398098e-05, + "loss": 2.4183, + "step": 13806 + }, + { + "epoch": 1.1142764910015335, + "grad_norm": 0.6571507453918457, + "learning_rate": 4.4131617699145714e-05, + "loss": 2.4747, + "step": 13807 + }, + { + "epoch": 1.1143571947381163, + "grad_norm": 0.7165808081626892, + "learning_rate": 4.411852504669658e-05, + "loss": 2.453, + "step": 13808 + }, + { + "epoch": 1.1144378984746994, + "grad_norm": 0.6708057522773743, + "learning_rate": 4.410543378695988e-05, + "loss": 2.4858, + "step": 13809 + }, + { + "epoch": 1.1145186022112823, + "grad_norm": 0.889302134513855, + "learning_rate": 4.409234392026187e-05, + "loss": 2.4333, + "step": 13810 + }, + { + "epoch": 1.1145993059478654, + "grad_norm": 0.7440677881240845, + "learning_rate": 4.407925544692884e-05, + "loss": 2.49, + "step": 13811 + }, + { + "epoch": 1.1146800096844485, + "grad_norm": 0.6688372492790222, + "learning_rate": 4.406616836728691e-05, + "loss": 2.4663, + "step": 13812 + }, + { + "epoch": 1.1147607134210313, + "grad_norm": 0.7108204364776611, + "learning_rate": 4.4053082681662264e-05, + "loss": 2.4843, + "step": 13813 + }, + { + "epoch": 1.1148414171576144, + "grad_norm": 0.7270475029945374, + "learning_rate": 4.4039998390381087e-05, + "loss": 2.4158, + "step": 13814 + }, + { + "epoch": 1.1149221208941973, + "grad_norm": 0.7243396639823914, + "learning_rate": 4.402691549376939e-05, + "loss": 2.3969, + "step": 13815 + }, + { + "epoch": 1.1150028246307804, + "grad_norm": 0.6687803268432617, + "learning_rate": 4.4013833992153285e-05, + "loss": 2.42, + "step": 13816 + }, + { + "epoch": 1.1150835283673635, + "grad_norm": 0.6892626285552979, + "learning_rate": 4.400075388585877e-05, + "loss": 2.4086, + "step": 13817 + }, + { + "epoch": 1.1151642321039463, + "grad_norm": 0.7556231021881104, + "learning_rate": 4.398767517521186e-05, + "loss": 2.4201, + "step": 13818 + }, + { + "epoch": 1.1152449358405294, + "grad_norm": 0.6872838735580444, + "learning_rate": 4.397459786053851e-05, + "loss": 2.4143, + "step": 13819 + }, + { + "epoch": 1.1153256395771125, + "grad_norm": 0.6681817770004272, + "learning_rate": 4.396152194216463e-05, + "loss": 2.4404, + "step": 13820 + }, + { + "epoch": 1.1154063433136954, + "grad_norm": 0.7107201218605042, + "learning_rate": 4.394844742041614e-05, + "loss": 2.4503, + "step": 13821 + }, + { + "epoch": 1.1154870470502785, + "grad_norm": 0.706541121006012, + "learning_rate": 4.3935374295618824e-05, + "loss": 2.5106, + "step": 13822 + }, + { + "epoch": 1.1155677507868615, + "grad_norm": 0.6659905910491943, + "learning_rate": 4.392230256809854e-05, + "loss": 2.3839, + "step": 13823 + }, + { + "epoch": 1.1156484545234444, + "grad_norm": 0.7125810980796814, + "learning_rate": 4.3909232238181095e-05, + "loss": 2.4463, + "step": 13824 + }, + { + "epoch": 1.1157291582600275, + "grad_norm": 0.6581901907920837, + "learning_rate": 4.389616330619217e-05, + "loss": 2.4004, + "step": 13825 + }, + { + "epoch": 1.1158098619966104, + "grad_norm": 0.7660872340202332, + "learning_rate": 4.388309577245752e-05, + "loss": 2.4685, + "step": 13826 + }, + { + "epoch": 1.1158905657331935, + "grad_norm": 0.699526846408844, + "learning_rate": 4.387002963730281e-05, + "loss": 2.4131, + "step": 13827 + }, + { + "epoch": 1.1159712694697765, + "grad_norm": 0.7031015753746033, + "learning_rate": 4.3856964901053685e-05, + "loss": 2.4476, + "step": 13828 + }, + { + "epoch": 1.1160519732063594, + "grad_norm": 0.6876828074455261, + "learning_rate": 4.384390156403575e-05, + "loss": 2.4402, + "step": 13829 + }, + { + "epoch": 1.1161326769429425, + "grad_norm": 0.7188935279846191, + "learning_rate": 4.3830839626574626e-05, + "loss": 2.4473, + "step": 13830 + }, + { + "epoch": 1.1162133806795254, + "grad_norm": 0.6825287938117981, + "learning_rate": 4.381777908899577e-05, + "loss": 2.4757, + "step": 13831 + }, + { + "epoch": 1.1162940844161084, + "grad_norm": 0.718267560005188, + "learning_rate": 4.380471995162472e-05, + "loss": 2.483, + "step": 13832 + }, + { + "epoch": 1.1163747881526915, + "grad_norm": 0.6526767611503601, + "learning_rate": 4.379166221478697e-05, + "loss": 2.4161, + "step": 13833 + }, + { + "epoch": 1.1164554918892744, + "grad_norm": 0.7541480660438538, + "learning_rate": 4.37786058788079e-05, + "loss": 2.4876, + "step": 13834 + }, + { + "epoch": 1.1165361956258575, + "grad_norm": 0.7144232988357544, + "learning_rate": 4.376555094401294e-05, + "loss": 2.4153, + "step": 13835 + }, + { + "epoch": 1.1166168993624406, + "grad_norm": 0.7544882297515869, + "learning_rate": 4.3752497410727445e-05, + "loss": 2.4634, + "step": 13836 + }, + { + "epoch": 1.1166976030990234, + "grad_norm": 0.7263267040252686, + "learning_rate": 4.373944527927674e-05, + "loss": 2.5189, + "step": 13837 + }, + { + "epoch": 1.1167783068356065, + "grad_norm": 0.7709252834320068, + "learning_rate": 4.3726394549986135e-05, + "loss": 2.5036, + "step": 13838 + }, + { + "epoch": 1.1168590105721894, + "grad_norm": 0.6849128007888794, + "learning_rate": 4.3713345223180866e-05, + "loss": 2.414, + "step": 13839 + }, + { + "epoch": 1.1169397143087725, + "grad_norm": 0.6807512044906616, + "learning_rate": 4.3700297299186224e-05, + "loss": 2.4924, + "step": 13840 + }, + { + "epoch": 1.1170204180453556, + "grad_norm": 0.6894977688789368, + "learning_rate": 4.3687250778327294e-05, + "loss": 2.4183, + "step": 13841 + }, + { + "epoch": 1.1171011217819384, + "grad_norm": 0.6657617092132568, + "learning_rate": 4.367420566092928e-05, + "loss": 2.448, + "step": 13842 + }, + { + "epoch": 1.1171818255185215, + "grad_norm": 0.7104446291923523, + "learning_rate": 4.366116194731733e-05, + "loss": 2.4862, + "step": 13843 + }, + { + "epoch": 1.1172625292551046, + "grad_norm": 0.7485257387161255, + "learning_rate": 4.3648119637816465e-05, + "loss": 2.4253, + "step": 13844 + }, + { + "epoch": 1.1173432329916875, + "grad_norm": 0.7079899907112122, + "learning_rate": 4.363507873275177e-05, + "loss": 2.4235, + "step": 13845 + }, + { + "epoch": 1.1174239367282706, + "grad_norm": 0.6891573667526245, + "learning_rate": 4.3622039232448274e-05, + "loss": 2.4382, + "step": 13846 + }, + { + "epoch": 1.1175046404648534, + "grad_norm": 0.6886103749275208, + "learning_rate": 4.360900113723086e-05, + "loss": 2.5115, + "step": 13847 + }, + { + "epoch": 1.1175853442014365, + "grad_norm": 0.7511457800865173, + "learning_rate": 4.35959644474246e-05, + "loss": 2.4071, + "step": 13848 + }, + { + "epoch": 1.1176660479380196, + "grad_norm": 0.6526182293891907, + "learning_rate": 4.358292916335437e-05, + "loss": 2.4242, + "step": 13849 + }, + { + "epoch": 1.1177467516746025, + "grad_norm": 0.7385138273239136, + "learning_rate": 4.356989528534499e-05, + "loss": 2.4459, + "step": 13850 + }, + { + "epoch": 1.1178274554111856, + "grad_norm": 0.6668610572814941, + "learning_rate": 4.355686281372132e-05, + "loss": 2.4188, + "step": 13851 + }, + { + "epoch": 1.1179081591477686, + "grad_norm": 0.6950691342353821, + "learning_rate": 4.354383174880818e-05, + "loss": 2.4339, + "step": 13852 + }, + { + "epoch": 1.1179888628843515, + "grad_norm": 0.7017496824264526, + "learning_rate": 4.3530802090930375e-05, + "loss": 2.4733, + "step": 13853 + }, + { + "epoch": 1.1180695666209346, + "grad_norm": 0.8118221759796143, + "learning_rate": 4.351777384041254e-05, + "loss": 2.4826, + "step": 13854 + }, + { + "epoch": 1.1181502703575175, + "grad_norm": 0.7233164310455322, + "learning_rate": 4.350474699757945e-05, + "loss": 2.4637, + "step": 13855 + }, + { + "epoch": 1.1182309740941005, + "grad_norm": 0.6354575157165527, + "learning_rate": 4.349172156275576e-05, + "loss": 2.4487, + "step": 13856 + }, + { + "epoch": 1.1183116778306836, + "grad_norm": 0.6776937246322632, + "learning_rate": 4.347869753626606e-05, + "loss": 2.4292, + "step": 13857 + }, + { + "epoch": 1.1183923815672665, + "grad_norm": 0.6656864881515503, + "learning_rate": 4.3465674918434953e-05, + "loss": 2.484, + "step": 13858 + }, + { + "epoch": 1.1184730853038496, + "grad_norm": 0.7659650444984436, + "learning_rate": 4.345265370958702e-05, + "loss": 2.4181, + "step": 13859 + }, + { + "epoch": 1.1185537890404325, + "grad_norm": 0.6546063423156738, + "learning_rate": 4.3439633910046764e-05, + "loss": 2.4657, + "step": 13860 + }, + { + "epoch": 1.1186344927770155, + "grad_norm": 0.6869762539863586, + "learning_rate": 4.342661552013869e-05, + "loss": 2.513, + "step": 13861 + }, + { + "epoch": 1.1187151965135986, + "grad_norm": 0.6633490324020386, + "learning_rate": 4.3413598540187275e-05, + "loss": 2.4716, + "step": 13862 + }, + { + "epoch": 1.1187959002501815, + "grad_norm": 0.7238267660140991, + "learning_rate": 4.340058297051687e-05, + "loss": 2.4353, + "step": 13863 + }, + { + "epoch": 1.1188766039867646, + "grad_norm": 0.67429119348526, + "learning_rate": 4.3387568811451875e-05, + "loss": 2.4808, + "step": 13864 + }, + { + "epoch": 1.1189573077233477, + "grad_norm": 0.6901153326034546, + "learning_rate": 4.33745560633167e-05, + "loss": 2.4785, + "step": 13865 + }, + { + "epoch": 1.1190380114599305, + "grad_norm": 0.7227689027786255, + "learning_rate": 4.336154472643556e-05, + "loss": 2.4414, + "step": 13866 + }, + { + "epoch": 1.1191187151965136, + "grad_norm": 0.713793933391571, + "learning_rate": 4.33485348011328e-05, + "loss": 2.5136, + "step": 13867 + }, + { + "epoch": 1.1191994189330967, + "grad_norm": 0.6495655179023743, + "learning_rate": 4.333552628773263e-05, + "loss": 2.4267, + "step": 13868 + }, + { + "epoch": 1.1192801226696796, + "grad_norm": 0.7265790104866028, + "learning_rate": 4.3322519186559274e-05, + "loss": 2.4406, + "step": 13869 + }, + { + "epoch": 1.1193608264062627, + "grad_norm": 0.6700571179389954, + "learning_rate": 4.330951349793688e-05, + "loss": 2.4457, + "step": 13870 + }, + { + "epoch": 1.1194415301428455, + "grad_norm": 0.7112334966659546, + "learning_rate": 4.3296509222189616e-05, + "loss": 2.4788, + "step": 13871 + }, + { + "epoch": 1.1195222338794286, + "grad_norm": 0.7056662440299988, + "learning_rate": 4.32835063596416e-05, + "loss": 2.5195, + "step": 13872 + }, + { + "epoch": 1.1196029376160117, + "grad_norm": 0.7198836207389832, + "learning_rate": 4.327050491061683e-05, + "loss": 2.4827, + "step": 13873 + }, + { + "epoch": 1.1196836413525946, + "grad_norm": 0.7384079694747925, + "learning_rate": 4.325750487543936e-05, + "loss": 2.4556, + "step": 13874 + }, + { + "epoch": 1.1197643450891777, + "grad_norm": 0.7315430641174316, + "learning_rate": 4.324450625443324e-05, + "loss": 2.4302, + "step": 13875 + }, + { + "epoch": 1.1198450488257605, + "grad_norm": 0.6692587733268738, + "learning_rate": 4.323150904792234e-05, + "loss": 2.5283, + "step": 13876 + }, + { + "epoch": 1.1199257525623436, + "grad_norm": 0.7407168745994568, + "learning_rate": 4.321851325623063e-05, + "loss": 2.4757, + "step": 13877 + }, + { + "epoch": 1.1200064562989267, + "grad_norm": 0.7387246489524841, + "learning_rate": 4.3205518879682e-05, + "loss": 2.5025, + "step": 13878 + }, + { + "epoch": 1.1200871600355096, + "grad_norm": 0.8058405518531799, + "learning_rate": 4.319252591860031e-05, + "loss": 2.4951, + "step": 13879 + }, + { + "epoch": 1.1201678637720927, + "grad_norm": 0.6964818835258484, + "learning_rate": 4.317953437330936e-05, + "loss": 2.4462, + "step": 13880 + }, + { + "epoch": 1.1202485675086757, + "grad_norm": 0.6904557347297668, + "learning_rate": 4.316654424413294e-05, + "loss": 2.3981, + "step": 13881 + }, + { + "epoch": 1.1203292712452586, + "grad_norm": 0.6555196046829224, + "learning_rate": 4.315355553139485e-05, + "loss": 2.418, + "step": 13882 + }, + { + "epoch": 1.1204099749818417, + "grad_norm": 0.7745094299316406, + "learning_rate": 4.3140568235418724e-05, + "loss": 2.4635, + "step": 13883 + }, + { + "epoch": 1.1204906787184246, + "grad_norm": 0.686676025390625, + "learning_rate": 4.312758235652825e-05, + "loss": 2.4847, + "step": 13884 + }, + { + "epoch": 1.1205713824550076, + "grad_norm": 0.6937002539634705, + "learning_rate": 4.311459789504714e-05, + "loss": 2.4632, + "step": 13885 + }, + { + "epoch": 1.1206520861915907, + "grad_norm": 0.7024590373039246, + "learning_rate": 4.310161485129891e-05, + "loss": 2.4268, + "step": 13886 + }, + { + "epoch": 1.1207327899281736, + "grad_norm": 0.6848484873771667, + "learning_rate": 4.308863322560717e-05, + "loss": 2.4895, + "step": 13887 + }, + { + "epoch": 1.1208134936647567, + "grad_norm": 0.7071602940559387, + "learning_rate": 4.307565301829546e-05, + "loss": 2.4348, + "step": 13888 + }, + { + "epoch": 1.1208941974013398, + "grad_norm": 0.6868199706077576, + "learning_rate": 4.3062674229687274e-05, + "loss": 2.4613, + "step": 13889 + }, + { + "epoch": 1.1209749011379226, + "grad_norm": 0.7283496260643005, + "learning_rate": 4.304969686010608e-05, + "loss": 2.478, + "step": 13890 + }, + { + "epoch": 1.1210556048745057, + "grad_norm": 0.6907255053520203, + "learning_rate": 4.303672090987535e-05, + "loss": 2.4431, + "step": 13891 + }, + { + "epoch": 1.1211363086110886, + "grad_norm": 0.675089418888092, + "learning_rate": 4.302374637931841e-05, + "loss": 2.4398, + "step": 13892 + }, + { + "epoch": 1.1212170123476717, + "grad_norm": 0.6929863095283508, + "learning_rate": 4.301077326875863e-05, + "loss": 2.3909, + "step": 13893 + }, + { + "epoch": 1.1212977160842548, + "grad_norm": 0.6746132969856262, + "learning_rate": 4.29978015785194e-05, + "loss": 2.4726, + "step": 13894 + }, + { + "epoch": 1.1213784198208376, + "grad_norm": 0.720781147480011, + "learning_rate": 4.298483130892392e-05, + "loss": 2.4445, + "step": 13895 + }, + { + "epoch": 1.1214591235574207, + "grad_norm": 0.6624416708946228, + "learning_rate": 4.297186246029549e-05, + "loss": 2.3868, + "step": 13896 + }, + { + "epoch": 1.1215398272940038, + "grad_norm": 0.7849127054214478, + "learning_rate": 4.295889503295731e-05, + "loss": 2.4479, + "step": 13897 + }, + { + "epoch": 1.1216205310305867, + "grad_norm": 0.6655337810516357, + "learning_rate": 4.294592902723259e-05, + "loss": 2.5093, + "step": 13898 + }, + { + "epoch": 1.1217012347671698, + "grad_norm": 0.7055402398109436, + "learning_rate": 4.293296444344445e-05, + "loss": 2.4385, + "step": 13899 + }, + { + "epoch": 1.1217819385037526, + "grad_norm": 0.7388767600059509, + "learning_rate": 4.2920001281916e-05, + "loss": 2.4863, + "step": 13900 + }, + { + "epoch": 1.1218626422403357, + "grad_norm": 0.6915223002433777, + "learning_rate": 4.2907039542970373e-05, + "loss": 2.4218, + "step": 13901 + }, + { + "epoch": 1.1219433459769188, + "grad_norm": 0.7124893665313721, + "learning_rate": 4.289407922693053e-05, + "loss": 2.4514, + "step": 13902 + }, + { + "epoch": 1.1220240497135017, + "grad_norm": 0.6552406549453735, + "learning_rate": 4.28811203341195e-05, + "loss": 2.4558, + "step": 13903 + }, + { + "epoch": 1.1221047534500848, + "grad_norm": 0.6641791462898254, + "learning_rate": 4.286816286486031e-05, + "loss": 2.4277, + "step": 13904 + }, + { + "epoch": 1.1221854571866678, + "grad_norm": 0.677733838558197, + "learning_rate": 4.285520681947579e-05, + "loss": 2.4861, + "step": 13905 + }, + { + "epoch": 1.1222661609232507, + "grad_norm": 0.6572888493537903, + "learning_rate": 4.284225219828891e-05, + "loss": 2.4657, + "step": 13906 + }, + { + "epoch": 1.1223468646598338, + "grad_norm": 0.6923860907554626, + "learning_rate": 4.2829299001622546e-05, + "loss": 2.4857, + "step": 13907 + }, + { + "epoch": 1.1224275683964167, + "grad_norm": 0.6971977949142456, + "learning_rate": 4.281634722979947e-05, + "loss": 2.4434, + "step": 13908 + }, + { + "epoch": 1.1225082721329998, + "grad_norm": 0.6828060746192932, + "learning_rate": 4.2803396883142456e-05, + "loss": 2.4342, + "step": 13909 + }, + { + "epoch": 1.1225889758695828, + "grad_norm": 0.7001270651817322, + "learning_rate": 4.279044796197438e-05, + "loss": 2.5222, + "step": 13910 + }, + { + "epoch": 1.1226696796061657, + "grad_norm": 0.6425578594207764, + "learning_rate": 4.277750046661785e-05, + "loss": 2.42, + "step": 13911 + }, + { + "epoch": 1.1227503833427488, + "grad_norm": 0.6498209834098816, + "learning_rate": 4.2764554397395585e-05, + "loss": 2.4448, + "step": 13912 + }, + { + "epoch": 1.1228310870793319, + "grad_norm": 0.6894031763076782, + "learning_rate": 4.275160975463025e-05, + "loss": 2.4508, + "step": 13913 + }, + { + "epoch": 1.1229117908159147, + "grad_norm": 0.7286608219146729, + "learning_rate": 4.273866653864448e-05, + "loss": 2.4557, + "step": 13914 + }, + { + "epoch": 1.1229924945524978, + "grad_norm": 0.753826379776001, + "learning_rate": 4.272572474976079e-05, + "loss": 2.4635, + "step": 13915 + }, + { + "epoch": 1.1230731982890807, + "grad_norm": 0.6715937256813049, + "learning_rate": 4.271278438830174e-05, + "loss": 2.5107, + "step": 13916 + }, + { + "epoch": 1.1231539020256638, + "grad_norm": 0.6833200454711914, + "learning_rate": 4.26998454545899e-05, + "loss": 2.4883, + "step": 13917 + }, + { + "epoch": 1.1232346057622469, + "grad_norm": 0.6763597130775452, + "learning_rate": 4.2686907948947666e-05, + "loss": 2.4178, + "step": 13918 + }, + { + "epoch": 1.1233153094988297, + "grad_norm": 0.7336227297782898, + "learning_rate": 4.26739718716975e-05, + "loss": 2.4542, + "step": 13919 + }, + { + "epoch": 1.1233960132354128, + "grad_norm": 0.6583260297775269, + "learning_rate": 4.2661037223161806e-05, + "loss": 2.3998, + "step": 13920 + }, + { + "epoch": 1.1234767169719957, + "grad_norm": 0.6444356441497803, + "learning_rate": 4.264810400366295e-05, + "loss": 2.4354, + "step": 13921 + }, + { + "epoch": 1.1235574207085788, + "grad_norm": 0.6786002516746521, + "learning_rate": 4.2635172213523255e-05, + "loss": 2.3989, + "step": 13922 + }, + { + "epoch": 1.1236381244451619, + "grad_norm": 0.6838372349739075, + "learning_rate": 4.262224185306507e-05, + "loss": 2.4431, + "step": 13923 + }, + { + "epoch": 1.1237188281817447, + "grad_norm": 0.7516793012619019, + "learning_rate": 4.260931292261056e-05, + "loss": 2.4373, + "step": 13924 + }, + { + "epoch": 1.1237995319183278, + "grad_norm": 0.6860260367393494, + "learning_rate": 4.2596385422481985e-05, + "loss": 2.4457, + "step": 13925 + }, + { + "epoch": 1.123880235654911, + "grad_norm": 0.6556448936462402, + "learning_rate": 4.2583459353001595e-05, + "loss": 2.4165, + "step": 13926 + }, + { + "epoch": 1.1239609393914938, + "grad_norm": 0.729131281375885, + "learning_rate": 4.257053471449144e-05, + "loss": 2.4124, + "step": 13927 + }, + { + "epoch": 1.1240416431280769, + "grad_norm": 0.6941910982131958, + "learning_rate": 4.2557611507273684e-05, + "loss": 2.4095, + "step": 13928 + }, + { + "epoch": 1.12412234686466, + "grad_norm": 0.6390536427497864, + "learning_rate": 4.25446897316704e-05, + "loss": 2.4221, + "step": 13929 + }, + { + "epoch": 1.1242030506012428, + "grad_norm": 0.7034881114959717, + "learning_rate": 4.253176938800365e-05, + "loss": 2.4685, + "step": 13930 + }, + { + "epoch": 1.124283754337826, + "grad_norm": 0.6975526809692383, + "learning_rate": 4.251885047659542e-05, + "loss": 2.4771, + "step": 13931 + }, + { + "epoch": 1.1243644580744088, + "grad_norm": 0.7020023465156555, + "learning_rate": 4.2505932997767695e-05, + "loss": 2.4746, + "step": 13932 + }, + { + "epoch": 1.1244451618109919, + "grad_norm": 0.7207093238830566, + "learning_rate": 4.2493016951842444e-05, + "loss": 2.4707, + "step": 13933 + }, + { + "epoch": 1.124525865547575, + "grad_norm": 0.7711251974105835, + "learning_rate": 4.24801023391415e-05, + "loss": 2.5104, + "step": 13934 + }, + { + "epoch": 1.1246065692841578, + "grad_norm": 0.7324040532112122, + "learning_rate": 4.246718915998677e-05, + "loss": 2.4257, + "step": 13935 + }, + { + "epoch": 1.124687273020741, + "grad_norm": 0.6532757878303528, + "learning_rate": 4.2454277414700116e-05, + "loss": 2.3708, + "step": 13936 + }, + { + "epoch": 1.1247679767573238, + "grad_norm": 0.6933012008666992, + "learning_rate": 4.244136710360325e-05, + "loss": 2.4985, + "step": 13937 + }, + { + "epoch": 1.1248486804939068, + "grad_norm": 0.6787589192390442, + "learning_rate": 4.242845822701798e-05, + "loss": 2.402, + "step": 13938 + }, + { + "epoch": 1.12492938423049, + "grad_norm": 0.6567786931991577, + "learning_rate": 4.241555078526602e-05, + "loss": 2.4295, + "step": 13939 + }, + { + "epoch": 1.1250100879670728, + "grad_norm": 0.6962547302246094, + "learning_rate": 4.2402644778669074e-05, + "loss": 2.4006, + "step": 13940 + }, + { + "epoch": 1.125090791703656, + "grad_norm": 0.7152721285820007, + "learning_rate": 4.238974020754877e-05, + "loss": 2.4757, + "step": 13941 + }, + { + "epoch": 1.125171495440239, + "grad_norm": 0.6869861483573914, + "learning_rate": 4.237683707222677e-05, + "loss": 2.3877, + "step": 13942 + }, + { + "epoch": 1.1252521991768218, + "grad_norm": 0.6951470971107483, + "learning_rate": 4.236393537302459e-05, + "loss": 2.3755, + "step": 13943 + }, + { + "epoch": 1.125332902913405, + "grad_norm": 0.6997567415237427, + "learning_rate": 4.2351035110263805e-05, + "loss": 2.4731, + "step": 13944 + }, + { + "epoch": 1.125413606649988, + "grad_norm": 0.6765854358673096, + "learning_rate": 4.23381362842659e-05, + "loss": 2.4004, + "step": 13945 + }, + { + "epoch": 1.1254943103865709, + "grad_norm": 0.7046722173690796, + "learning_rate": 4.2325238895352426e-05, + "loss": 2.4379, + "step": 13946 + }, + { + "epoch": 1.125575014123154, + "grad_norm": 0.6862985491752625, + "learning_rate": 4.231234294384472e-05, + "loss": 2.4614, + "step": 13947 + }, + { + "epoch": 1.1256557178597368, + "grad_norm": 0.6637778282165527, + "learning_rate": 4.229944843006422e-05, + "loss": 2.4412, + "step": 13948 + }, + { + "epoch": 1.12573642159632, + "grad_norm": 0.7042228579521179, + "learning_rate": 4.228655535433231e-05, + "loss": 2.4296, + "step": 13949 + }, + { + "epoch": 1.1258171253329028, + "grad_norm": 0.6767764687538147, + "learning_rate": 4.227366371697029e-05, + "loss": 2.409, + "step": 13950 + }, + { + "epoch": 1.1258978290694859, + "grad_norm": 0.6886798143386841, + "learning_rate": 4.226077351829948e-05, + "loss": 2.4786, + "step": 13951 + }, + { + "epoch": 1.125978532806069, + "grad_norm": 0.7723653316497803, + "learning_rate": 4.224788475864115e-05, + "loss": 2.4111, + "step": 13952 + }, + { + "epoch": 1.1260592365426518, + "grad_norm": 0.7614055275917053, + "learning_rate": 4.2234997438316473e-05, + "loss": 2.5055, + "step": 13953 + }, + { + "epoch": 1.126139940279235, + "grad_norm": 0.7195241451263428, + "learning_rate": 4.222211155764665e-05, + "loss": 2.411, + "step": 13954 + }, + { + "epoch": 1.126220644015818, + "grad_norm": 0.7130021452903748, + "learning_rate": 4.220922711695288e-05, + "loss": 2.4819, + "step": 13955 + }, + { + "epoch": 1.1263013477524009, + "grad_norm": 0.6972241401672363, + "learning_rate": 4.2196344116556194e-05, + "loss": 2.4611, + "step": 13956 + }, + { + "epoch": 1.126382051488984, + "grad_norm": 0.7023231387138367, + "learning_rate": 4.218346255677772e-05, + "loss": 2.4509, + "step": 13957 + }, + { + "epoch": 1.126462755225567, + "grad_norm": 0.6959301829338074, + "learning_rate": 4.2170582437938534e-05, + "loss": 2.4441, + "step": 13958 + }, + { + "epoch": 1.12654345896215, + "grad_norm": 0.7423149347305298, + "learning_rate": 4.2157703760359555e-05, + "loss": 2.4452, + "step": 13959 + }, + { + "epoch": 1.126624162698733, + "grad_norm": 0.6587820053100586, + "learning_rate": 4.214482652436177e-05, + "loss": 2.3936, + "step": 13960 + }, + { + "epoch": 1.1267048664353159, + "grad_norm": 0.6601768136024475, + "learning_rate": 4.213195073026618e-05, + "loss": 2.453, + "step": 13961 + }, + { + "epoch": 1.126785570171899, + "grad_norm": 0.6986891031265259, + "learning_rate": 4.2119076378393676e-05, + "loss": 2.452, + "step": 13962 + }, + { + "epoch": 1.126866273908482, + "grad_norm": 0.7207025289535522, + "learning_rate": 4.2106203469065055e-05, + "loss": 2.4048, + "step": 13963 + }, + { + "epoch": 1.126946977645065, + "grad_norm": 0.6731177568435669, + "learning_rate": 4.2093332002601184e-05, + "loss": 2.4573, + "step": 13964 + }, + { + "epoch": 1.127027681381648, + "grad_norm": 0.7330070734024048, + "learning_rate": 4.208046197932288e-05, + "loss": 2.4274, + "step": 13965 + }, + { + "epoch": 1.1271083851182309, + "grad_norm": 0.7008770704269409, + "learning_rate": 4.206759339955084e-05, + "loss": 2.4933, + "step": 13966 + }, + { + "epoch": 1.127189088854814, + "grad_norm": 0.8309584259986877, + "learning_rate": 4.20547262636058e-05, + "loss": 2.3857, + "step": 13967 + }, + { + "epoch": 1.127269792591397, + "grad_norm": 0.6705843210220337, + "learning_rate": 4.204186057180849e-05, + "loss": 2.4303, + "step": 13968 + }, + { + "epoch": 1.12735049632798, + "grad_norm": 0.7526851296424866, + "learning_rate": 4.202899632447949e-05, + "loss": 2.455, + "step": 13969 + }, + { + "epoch": 1.127431200064563, + "grad_norm": 0.6690995097160339, + "learning_rate": 4.201613352193943e-05, + "loss": 2.4398, + "step": 13970 + }, + { + "epoch": 1.127511903801146, + "grad_norm": 0.6946840286254883, + "learning_rate": 4.20032721645089e-05, + "loss": 2.4032, + "step": 13971 + }, + { + "epoch": 1.127592607537729, + "grad_norm": 0.7438863515853882, + "learning_rate": 4.1990412252508426e-05, + "loss": 2.4644, + "step": 13972 + }, + { + "epoch": 1.127673311274312, + "grad_norm": 0.6975359916687012, + "learning_rate": 4.197755378625852e-05, + "loss": 2.3991, + "step": 13973 + }, + { + "epoch": 1.1277540150108951, + "grad_norm": 0.6799279451370239, + "learning_rate": 4.196469676607968e-05, + "loss": 2.4328, + "step": 13974 + }, + { + "epoch": 1.127834718747478, + "grad_norm": 0.7014481425285339, + "learning_rate": 4.1951841192292274e-05, + "loss": 2.5045, + "step": 13975 + }, + { + "epoch": 1.127915422484061, + "grad_norm": 0.7074011564254761, + "learning_rate": 4.1938987065216716e-05, + "loss": 2.4583, + "step": 13976 + }, + { + "epoch": 1.127996126220644, + "grad_norm": 0.7246339917182922, + "learning_rate": 4.192613438517338e-05, + "loss": 2.447, + "step": 13977 + }, + { + "epoch": 1.128076829957227, + "grad_norm": 0.6757462620735168, + "learning_rate": 4.191328315248262e-05, + "loss": 2.4181, + "step": 13978 + }, + { + "epoch": 1.12815753369381, + "grad_norm": 0.6758493185043335, + "learning_rate": 4.1900433367464644e-05, + "loss": 2.4837, + "step": 13979 + }, + { + "epoch": 1.128238237430393, + "grad_norm": 0.6782165765762329, + "learning_rate": 4.1887585030439736e-05, + "loss": 2.3946, + "step": 13980 + }, + { + "epoch": 1.128318941166976, + "grad_norm": 0.7176415324211121, + "learning_rate": 4.187473814172812e-05, + "loss": 2.4538, + "step": 13981 + }, + { + "epoch": 1.128399644903559, + "grad_norm": 0.6636224985122681, + "learning_rate": 4.186189270164997e-05, + "loss": 2.4493, + "step": 13982 + }, + { + "epoch": 1.128480348640142, + "grad_norm": 0.6613143086433411, + "learning_rate": 4.184904871052544e-05, + "loss": 2.4994, + "step": 13983 + }, + { + "epoch": 1.128561052376725, + "grad_norm": 0.7148364186286926, + "learning_rate": 4.183620616867465e-05, + "loss": 2.4673, + "step": 13984 + }, + { + "epoch": 1.128641756113308, + "grad_norm": 0.6657952070236206, + "learning_rate": 4.1823365076417606e-05, + "loss": 2.3915, + "step": 13985 + }, + { + "epoch": 1.128722459849891, + "grad_norm": 0.7135687470436096, + "learning_rate": 4.181052543407439e-05, + "loss": 2.4961, + "step": 13986 + }, + { + "epoch": 1.1288031635864741, + "grad_norm": 0.7245377898216248, + "learning_rate": 4.179768724196501e-05, + "loss": 2.4519, + "step": 13987 + }, + { + "epoch": 1.128883867323057, + "grad_norm": 0.6832938194274902, + "learning_rate": 4.1784850500409376e-05, + "loss": 2.4471, + "step": 13988 + }, + { + "epoch": 1.12896457105964, + "grad_norm": 0.7303032279014587, + "learning_rate": 4.177201520972746e-05, + "loss": 2.3906, + "step": 13989 + }, + { + "epoch": 1.1290452747962232, + "grad_norm": 0.698581874370575, + "learning_rate": 4.175918137023911e-05, + "loss": 2.4667, + "step": 13990 + }, + { + "epoch": 1.129125978532806, + "grad_norm": 0.69133061170578, + "learning_rate": 4.174634898226422e-05, + "loss": 2.4285, + "step": 13991 + }, + { + "epoch": 1.1292066822693891, + "grad_norm": 0.7029501795768738, + "learning_rate": 4.1733518046122576e-05, + "loss": 2.4839, + "step": 13992 + }, + { + "epoch": 1.129287386005972, + "grad_norm": 0.7566521167755127, + "learning_rate": 4.172068856213398e-05, + "loss": 2.5019, + "step": 13993 + }, + { + "epoch": 1.129368089742555, + "grad_norm": 0.697998046875, + "learning_rate": 4.1707860530618204e-05, + "loss": 2.4305, + "step": 13994 + }, + { + "epoch": 1.1294487934791382, + "grad_norm": 0.674194872379303, + "learning_rate": 4.169503395189489e-05, + "loss": 2.4361, + "step": 13995 + }, + { + "epoch": 1.129529497215721, + "grad_norm": 0.6936436891555786, + "learning_rate": 4.168220882628373e-05, + "loss": 2.518, + "step": 13996 + }, + { + "epoch": 1.1296102009523041, + "grad_norm": 0.6831670999526978, + "learning_rate": 4.166938515410442e-05, + "loss": 2.4197, + "step": 13997 + }, + { + "epoch": 1.129690904688887, + "grad_norm": 0.7323662638664246, + "learning_rate": 4.165656293567647e-05, + "loss": 2.4555, + "step": 13998 + }, + { + "epoch": 1.12977160842547, + "grad_norm": 0.7699782848358154, + "learning_rate": 4.164374217131948e-05, + "loss": 2.4456, + "step": 13999 + }, + { + "epoch": 1.1298523121620532, + "grad_norm": 0.7009051442146301, + "learning_rate": 4.163092286135297e-05, + "loss": 2.4429, + "step": 14000 + }, + { + "epoch": 1.1298523121620532, + "eval_loss": 2.4034411907196045, + "eval_runtime": 771.1158, + "eval_samples_per_second": 3.398, + "eval_steps_per_second": 0.567, + "step": 14000 + }, + { + "epoch": 1.129933015898636, + "grad_norm": 0.674665093421936, + "learning_rate": 4.1618105006096456e-05, + "loss": 2.4127, + "step": 14001 + }, + { + "epoch": 1.1300137196352191, + "grad_norm": 0.7332403659820557, + "learning_rate": 4.1605288605869365e-05, + "loss": 2.4854, + "step": 14002 + }, + { + "epoch": 1.1300944233718022, + "grad_norm": 0.70233553647995, + "learning_rate": 4.159247366099117e-05, + "loss": 2.4433, + "step": 14003 + }, + { + "epoch": 1.130175127108385, + "grad_norm": 0.6259445548057556, + "learning_rate": 4.157966017178118e-05, + "loss": 2.3605, + "step": 14004 + }, + { + "epoch": 1.1302558308449682, + "grad_norm": 0.717408299446106, + "learning_rate": 4.1566848138558755e-05, + "loss": 2.4378, + "step": 14005 + }, + { + "epoch": 1.130336534581551, + "grad_norm": 0.6973297595977783, + "learning_rate": 4.155403756164323e-05, + "loss": 2.4363, + "step": 14006 + }, + { + "epoch": 1.1304172383181341, + "grad_norm": 0.7204940915107727, + "learning_rate": 4.154122844135391e-05, + "loss": 2.4814, + "step": 14007 + }, + { + "epoch": 1.1304979420547172, + "grad_norm": 0.8976696133613586, + "learning_rate": 4.1528420778009935e-05, + "loss": 2.4654, + "step": 14008 + }, + { + "epoch": 1.1305786457913, + "grad_norm": 0.7270354628562927, + "learning_rate": 4.151561457193057e-05, + "loss": 2.4088, + "step": 14009 + }, + { + "epoch": 1.1306593495278832, + "grad_norm": 0.7200367450714111, + "learning_rate": 4.1502809823434985e-05, + "loss": 2.4412, + "step": 14010 + }, + { + "epoch": 1.130740053264466, + "grad_norm": 0.7593986392021179, + "learning_rate": 4.149000653284227e-05, + "loss": 2.5058, + "step": 14011 + }, + { + "epoch": 1.1308207570010491, + "grad_norm": 0.7322795987129211, + "learning_rate": 4.147720470047155e-05, + "loss": 2.4899, + "step": 14012 + }, + { + "epoch": 1.1309014607376322, + "grad_norm": 0.6649030447006226, + "learning_rate": 4.1464404326641905e-05, + "loss": 2.4358, + "step": 14013 + }, + { + "epoch": 1.130982164474215, + "grad_norm": 0.7258814573287964, + "learning_rate": 4.145160541167228e-05, + "loss": 2.4732, + "step": 14014 + }, + { + "epoch": 1.1310628682107982, + "grad_norm": 0.7414976358413696, + "learning_rate": 4.1438807955881695e-05, + "loss": 2.4157, + "step": 14015 + }, + { + "epoch": 1.1311435719473812, + "grad_norm": 0.6813236474990845, + "learning_rate": 4.142601195958914e-05, + "loss": 2.3966, + "step": 14016 + }, + { + "epoch": 1.131224275683964, + "grad_norm": 0.6715923547744751, + "learning_rate": 4.141321742311344e-05, + "loss": 2.4358, + "step": 14017 + }, + { + "epoch": 1.1313049794205472, + "grad_norm": 0.7174912691116333, + "learning_rate": 4.14004243467735e-05, + "loss": 2.4838, + "step": 14018 + }, + { + "epoch": 1.1313856831571303, + "grad_norm": 0.6945109963417053, + "learning_rate": 4.138763273088821e-05, + "loss": 2.4674, + "step": 14019 + }, + { + "epoch": 1.1314663868937131, + "grad_norm": 0.6759494543075562, + "learning_rate": 4.137484257577629e-05, + "loss": 2.4659, + "step": 14020 + }, + { + "epoch": 1.1315470906302962, + "grad_norm": 0.7077876925468445, + "learning_rate": 4.1362053881756534e-05, + "loss": 2.4731, + "step": 14021 + }, + { + "epoch": 1.131627794366879, + "grad_norm": 0.6769500970840454, + "learning_rate": 4.1349266649147654e-05, + "loss": 2.3606, + "step": 14022 + }, + { + "epoch": 1.1317084981034622, + "grad_norm": 0.7104208469390869, + "learning_rate": 4.1336480878268424e-05, + "loss": 2.4626, + "step": 14023 + }, + { + "epoch": 1.1317892018400453, + "grad_norm": 0.7102686762809753, + "learning_rate": 4.132369656943741e-05, + "loss": 2.4545, + "step": 14024 + }, + { + "epoch": 1.1318699055766281, + "grad_norm": 0.7773897647857666, + "learning_rate": 4.1310913722973256e-05, + "loss": 2.5107, + "step": 14025 + }, + { + "epoch": 1.1319506093132112, + "grad_norm": 0.6427130103111267, + "learning_rate": 4.1298132339194585e-05, + "loss": 2.4349, + "step": 14026 + }, + { + "epoch": 1.132031313049794, + "grad_norm": 0.6725162863731384, + "learning_rate": 4.128535241841987e-05, + "loss": 2.4566, + "step": 14027 + }, + { + "epoch": 1.1321120167863772, + "grad_norm": 0.7182251214981079, + "learning_rate": 4.127257396096764e-05, + "loss": 2.4472, + "step": 14028 + }, + { + "epoch": 1.1321927205229603, + "grad_norm": 0.6712302565574646, + "learning_rate": 4.1259796967156426e-05, + "loss": 2.4326, + "step": 14029 + }, + { + "epoch": 1.1322734242595431, + "grad_norm": 0.7726041078567505, + "learning_rate": 4.124702143730459e-05, + "loss": 2.4994, + "step": 14030 + }, + { + "epoch": 1.1323541279961262, + "grad_norm": 0.651899516582489, + "learning_rate": 4.123424737173056e-05, + "loss": 2.4244, + "step": 14031 + }, + { + "epoch": 1.1324348317327093, + "grad_norm": 0.6646261215209961, + "learning_rate": 4.12214747707527e-05, + "loss": 2.5027, + "step": 14032 + }, + { + "epoch": 1.1325155354692922, + "grad_norm": 0.729098916053772, + "learning_rate": 4.120870363468933e-05, + "loss": 2.5117, + "step": 14033 + }, + { + "epoch": 1.1325962392058753, + "grad_norm": 0.7056638598442078, + "learning_rate": 4.119593396385876e-05, + "loss": 2.4279, + "step": 14034 + }, + { + "epoch": 1.1326769429424584, + "grad_norm": 0.7051844000816345, + "learning_rate": 4.1183165758579255e-05, + "loss": 2.3844, + "step": 14035 + }, + { + "epoch": 1.1327576466790412, + "grad_norm": 0.6954311728477478, + "learning_rate": 4.1170399019168984e-05, + "loss": 2.4041, + "step": 14036 + }, + { + "epoch": 1.1328383504156243, + "grad_norm": 0.650044858455658, + "learning_rate": 4.1157633745946135e-05, + "loss": 2.4397, + "step": 14037 + }, + { + "epoch": 1.1329190541522072, + "grad_norm": 0.6974380016326904, + "learning_rate": 4.114486993922888e-05, + "loss": 2.4391, + "step": 14038 + }, + { + "epoch": 1.1329997578887903, + "grad_norm": 0.7252807021141052, + "learning_rate": 4.113210759933536e-05, + "loss": 2.4471, + "step": 14039 + }, + { + "epoch": 1.1330804616253733, + "grad_norm": 0.7001414895057678, + "learning_rate": 4.111934672658354e-05, + "loss": 2.402, + "step": 14040 + }, + { + "epoch": 1.1331611653619562, + "grad_norm": 0.7420533895492554, + "learning_rate": 4.110658732129153e-05, + "loss": 2.4987, + "step": 14041 + }, + { + "epoch": 1.1332418690985393, + "grad_norm": 0.6850644946098328, + "learning_rate": 4.1093829383777315e-05, + "loss": 2.4355, + "step": 14042 + }, + { + "epoch": 1.1333225728351222, + "grad_norm": 0.6905977725982666, + "learning_rate": 4.108107291435885e-05, + "loss": 2.4818, + "step": 14043 + }, + { + "epoch": 1.1334032765717053, + "grad_norm": 0.6555112600326538, + "learning_rate": 4.106831791335407e-05, + "loss": 2.425, + "step": 14044 + }, + { + "epoch": 1.1334839803082883, + "grad_norm": 0.6570355892181396, + "learning_rate": 4.105556438108089e-05, + "loss": 2.4232, + "step": 14045 + }, + { + "epoch": 1.1335646840448712, + "grad_norm": 0.7910747528076172, + "learning_rate": 4.104281231785708e-05, + "loss": 2.484, + "step": 14046 + }, + { + "epoch": 1.1336453877814543, + "grad_norm": 0.6581952571868896, + "learning_rate": 4.103006172400052e-05, + "loss": 2.4102, + "step": 14047 + }, + { + "epoch": 1.1337260915180374, + "grad_norm": 0.6834773421287537, + "learning_rate": 4.1017312599828994e-05, + "loss": 2.4602, + "step": 14048 + }, + { + "epoch": 1.1338067952546202, + "grad_norm": 0.7588350772857666, + "learning_rate": 4.1004564945660195e-05, + "loss": 2.5059, + "step": 14049 + }, + { + "epoch": 1.1338874989912033, + "grad_norm": 0.6604699492454529, + "learning_rate": 4.099181876181185e-05, + "loss": 2.4403, + "step": 14050 + }, + { + "epoch": 1.1339682027277862, + "grad_norm": 0.6957669258117676, + "learning_rate": 4.097907404860163e-05, + "loss": 2.4218, + "step": 14051 + }, + { + "epoch": 1.1340489064643693, + "grad_norm": 0.7091849446296692, + "learning_rate": 4.0966330806347166e-05, + "loss": 2.4396, + "step": 14052 + }, + { + "epoch": 1.1341296102009524, + "grad_norm": 0.6637482047080994, + "learning_rate": 4.095358903536605e-05, + "loss": 2.4514, + "step": 14053 + }, + { + "epoch": 1.1342103139375352, + "grad_norm": 0.7485960125923157, + "learning_rate": 4.0940848735975846e-05, + "loss": 2.4401, + "step": 14054 + }, + { + "epoch": 1.1342910176741183, + "grad_norm": 0.6509774327278137, + "learning_rate": 4.092810990849411e-05, + "loss": 2.4575, + "step": 14055 + }, + { + "epoch": 1.1343717214107012, + "grad_norm": 0.7151626348495483, + "learning_rate": 4.091537255323825e-05, + "loss": 2.45, + "step": 14056 + }, + { + "epoch": 1.1344524251472843, + "grad_norm": 0.7536267042160034, + "learning_rate": 4.0902636670525764e-05, + "loss": 2.497, + "step": 14057 + }, + { + "epoch": 1.1345331288838674, + "grad_norm": 0.7779545783996582, + "learning_rate": 4.0889902260674086e-05, + "loss": 2.412, + "step": 14058 + }, + { + "epoch": 1.1346138326204502, + "grad_norm": 0.7211748957633972, + "learning_rate": 4.087716932400052e-05, + "loss": 2.4727, + "step": 14059 + }, + { + "epoch": 1.1346945363570333, + "grad_norm": 0.6710701584815979, + "learning_rate": 4.086443786082245e-05, + "loss": 2.4318, + "step": 14060 + }, + { + "epoch": 1.1347752400936164, + "grad_norm": 0.7072857022285461, + "learning_rate": 4.085170787145717e-05, + "loss": 2.4672, + "step": 14061 + }, + { + "epoch": 1.1348559438301993, + "grad_norm": 0.6475152969360352, + "learning_rate": 4.083897935622194e-05, + "loss": 2.4104, + "step": 14062 + }, + { + "epoch": 1.1349366475667824, + "grad_norm": 0.7408067584037781, + "learning_rate": 4.0826252315433986e-05, + "loss": 2.4129, + "step": 14063 + }, + { + "epoch": 1.1350173513033655, + "grad_norm": 0.732540488243103, + "learning_rate": 4.081352674941056e-05, + "loss": 2.4209, + "step": 14064 + }, + { + "epoch": 1.1350980550399483, + "grad_norm": 0.6933332681655884, + "learning_rate": 4.080080265846872e-05, + "loss": 2.3797, + "step": 14065 + }, + { + "epoch": 1.1351787587765314, + "grad_norm": 0.6507896780967712, + "learning_rate": 4.078808004292561e-05, + "loss": 2.4372, + "step": 14066 + }, + { + "epoch": 1.1352594625131143, + "grad_norm": 0.729292094707489, + "learning_rate": 4.0775358903098384e-05, + "loss": 2.5513, + "step": 14067 + }, + { + "epoch": 1.1353401662496974, + "grad_norm": 0.692757248878479, + "learning_rate": 4.076263923930398e-05, + "loss": 2.4228, + "step": 14068 + }, + { + "epoch": 1.1354208699862804, + "grad_norm": 0.7028260231018066, + "learning_rate": 4.074992105185946e-05, + "loss": 2.4478, + "step": 14069 + }, + { + "epoch": 1.1355015737228633, + "grad_norm": 0.65067058801651, + "learning_rate": 4.073720434108179e-05, + "loss": 2.3729, + "step": 14070 + }, + { + "epoch": 1.1355822774594464, + "grad_norm": 0.6884061098098755, + "learning_rate": 4.0724489107287933e-05, + "loss": 2.3693, + "step": 14071 + }, + { + "epoch": 1.1356629811960293, + "grad_norm": 0.70686936378479, + "learning_rate": 4.071177535079472e-05, + "loss": 2.4989, + "step": 14072 + }, + { + "epoch": 1.1357436849326124, + "grad_norm": 0.6792482733726501, + "learning_rate": 4.0699063071919016e-05, + "loss": 2.393, + "step": 14073 + }, + { + "epoch": 1.1358243886691954, + "grad_norm": 0.7231085896492004, + "learning_rate": 4.0686352270977745e-05, + "loss": 2.4597, + "step": 14074 + }, + { + "epoch": 1.1359050924057783, + "grad_norm": 0.8024532198905945, + "learning_rate": 4.067364294828758e-05, + "loss": 2.4409, + "step": 14075 + }, + { + "epoch": 1.1359857961423614, + "grad_norm": 0.6761424541473389, + "learning_rate": 4.066093510416532e-05, + "loss": 2.4598, + "step": 14076 + }, + { + "epoch": 1.1360664998789445, + "grad_norm": 0.7075559496879578, + "learning_rate": 4.064822873892771e-05, + "loss": 2.4649, + "step": 14077 + }, + { + "epoch": 1.1361472036155273, + "grad_norm": 0.6292272806167603, + "learning_rate": 4.063552385289134e-05, + "loss": 2.445, + "step": 14078 + }, + { + "epoch": 1.1362279073521104, + "grad_norm": 0.6435273885726929, + "learning_rate": 4.06228204463729e-05, + "loss": 2.4105, + "step": 14079 + }, + { + "epoch": 1.1363086110886935, + "grad_norm": 0.7135637402534485, + "learning_rate": 4.061011851968903e-05, + "loss": 2.3907, + "step": 14080 + }, + { + "epoch": 1.1363893148252764, + "grad_norm": 0.7424013614654541, + "learning_rate": 4.059741807315621e-05, + "loss": 2.4405, + "step": 14081 + }, + { + "epoch": 1.1364700185618595, + "grad_norm": 0.6649916768074036, + "learning_rate": 4.0584719107091016e-05, + "loss": 2.4314, + "step": 14082 + }, + { + "epoch": 1.1365507222984423, + "grad_norm": 0.6700563430786133, + "learning_rate": 4.0572021621809944e-05, + "loss": 2.4093, + "step": 14083 + }, + { + "epoch": 1.1366314260350254, + "grad_norm": 0.6740709543228149, + "learning_rate": 4.055932561762942e-05, + "loss": 2.4301, + "step": 14084 + }, + { + "epoch": 1.1367121297716085, + "grad_norm": 0.7039555907249451, + "learning_rate": 4.0546631094865895e-05, + "loss": 2.4427, + "step": 14085 + }, + { + "epoch": 1.1367928335081914, + "grad_norm": 0.7461164593696594, + "learning_rate": 4.053393805383573e-05, + "loss": 2.3865, + "step": 14086 + }, + { + "epoch": 1.1368735372447745, + "grad_norm": 0.6808290481567383, + "learning_rate": 4.0521246494855316e-05, + "loss": 2.3738, + "step": 14087 + }, + { + "epoch": 1.1369542409813573, + "grad_norm": 0.6942760944366455, + "learning_rate": 4.0508556418240875e-05, + "loss": 2.4351, + "step": 14088 + }, + { + "epoch": 1.1370349447179404, + "grad_norm": 0.7615510821342468, + "learning_rate": 4.049586782430872e-05, + "loss": 2.3968, + "step": 14089 + }, + { + "epoch": 1.1371156484545235, + "grad_norm": 0.7240662574768066, + "learning_rate": 4.048318071337512e-05, + "loss": 2.4046, + "step": 14090 + }, + { + "epoch": 1.1371963521911064, + "grad_norm": 0.7286471128463745, + "learning_rate": 4.047049508575621e-05, + "loss": 2.4039, + "step": 14091 + }, + { + "epoch": 1.1372770559276895, + "grad_norm": 0.7031459212303162, + "learning_rate": 4.045781094176816e-05, + "loss": 2.4494, + "step": 14092 + }, + { + "epoch": 1.1373577596642725, + "grad_norm": 0.7116301655769348, + "learning_rate": 4.0445128281727116e-05, + "loss": 2.3991, + "step": 14093 + }, + { + "epoch": 1.1374384634008554, + "grad_norm": 0.6719788312911987, + "learning_rate": 4.043244710594914e-05, + "loss": 2.4823, + "step": 14094 + }, + { + "epoch": 1.1375191671374385, + "grad_norm": 0.6770508885383606, + "learning_rate": 4.041976741475031e-05, + "loss": 2.4362, + "step": 14095 + }, + { + "epoch": 1.1375998708740216, + "grad_norm": 0.6808609962463379, + "learning_rate": 4.040708920844666e-05, + "loss": 2.435, + "step": 14096 + }, + { + "epoch": 1.1376805746106045, + "grad_norm": 0.7445514798164368, + "learning_rate": 4.0394412487354074e-05, + "loss": 2.4749, + "step": 14097 + }, + { + "epoch": 1.1377612783471875, + "grad_norm": 0.7024775743484497, + "learning_rate": 4.038173725178854e-05, + "loss": 2.4354, + "step": 14098 + }, + { + "epoch": 1.1378419820837704, + "grad_norm": 0.6925685405731201, + "learning_rate": 4.0369063502066e-05, + "loss": 2.4462, + "step": 14099 + }, + { + "epoch": 1.1379226858203535, + "grad_norm": 0.6970539689064026, + "learning_rate": 4.035639123850223e-05, + "loss": 2.3842, + "step": 14100 + }, + { + "epoch": 1.1380033895569364, + "grad_norm": 0.6571836471557617, + "learning_rate": 4.0343720461413107e-05, + "loss": 2.4213, + "step": 14101 + }, + { + "epoch": 1.1380840932935194, + "grad_norm": 0.7264918684959412, + "learning_rate": 4.033105117111441e-05, + "loss": 2.4697, + "step": 14102 + }, + { + "epoch": 1.1381647970301025, + "grad_norm": 0.6929560899734497, + "learning_rate": 4.03183833679219e-05, + "loss": 2.461, + "step": 14103 + }, + { + "epoch": 1.1382455007666854, + "grad_norm": 0.6533559560775757, + "learning_rate": 4.030571705215128e-05, + "loss": 2.4336, + "step": 14104 + }, + { + "epoch": 1.1383262045032685, + "grad_norm": 0.7372364401817322, + "learning_rate": 4.0293052224118234e-05, + "loss": 2.4396, + "step": 14105 + }, + { + "epoch": 1.1384069082398516, + "grad_norm": 0.6736310720443726, + "learning_rate": 4.028038888413844e-05, + "loss": 2.4123, + "step": 14106 + }, + { + "epoch": 1.1384876119764344, + "grad_norm": 0.6898338794708252, + "learning_rate": 4.026772703252742e-05, + "loss": 2.431, + "step": 14107 + }, + { + "epoch": 1.1385683157130175, + "grad_norm": 0.7933369278907776, + "learning_rate": 4.02550666696008e-05, + "loss": 2.4669, + "step": 14108 + }, + { + "epoch": 1.1386490194496006, + "grad_norm": 0.7218122482299805, + "learning_rate": 4.024240779567412e-05, + "loss": 2.3761, + "step": 14109 + }, + { + "epoch": 1.1387297231861835, + "grad_norm": 0.7018248438835144, + "learning_rate": 4.022975041106281e-05, + "loss": 2.4011, + "step": 14110 + }, + { + "epoch": 1.1388104269227666, + "grad_norm": 0.6709668040275574, + "learning_rate": 4.0217094516082364e-05, + "loss": 2.426, + "step": 14111 + }, + { + "epoch": 1.1388911306593494, + "grad_norm": 0.7241504192352295, + "learning_rate": 4.0204440111048195e-05, + "loss": 2.4085, + "step": 14112 + }, + { + "epoch": 1.1389718343959325, + "grad_norm": 0.731347382068634, + "learning_rate": 4.0191787196275675e-05, + "loss": 2.502, + "step": 14113 + }, + { + "epoch": 1.1390525381325156, + "grad_norm": 0.6630167365074158, + "learning_rate": 4.0179135772080166e-05, + "loss": 2.3999, + "step": 14114 + }, + { + "epoch": 1.1391332418690985, + "grad_norm": 0.7094748616218567, + "learning_rate": 4.016648583877698e-05, + "loss": 2.4666, + "step": 14115 + }, + { + "epoch": 1.1392139456056816, + "grad_norm": 0.7262436151504517, + "learning_rate": 4.0153837396681395e-05, + "loss": 2.4369, + "step": 14116 + }, + { + "epoch": 1.1392946493422644, + "grad_norm": 0.6796039938926697, + "learning_rate": 4.014119044610859e-05, + "loss": 2.4607, + "step": 14117 + }, + { + "epoch": 1.1393753530788475, + "grad_norm": 0.6690036058425903, + "learning_rate": 4.0128544987373785e-05, + "loss": 2.4145, + "step": 14118 + }, + { + "epoch": 1.1394560568154306, + "grad_norm": 0.6987181305885315, + "learning_rate": 4.011590102079219e-05, + "loss": 2.4294, + "step": 14119 + }, + { + "epoch": 1.1395367605520135, + "grad_norm": 0.6756789684295654, + "learning_rate": 4.0103258546678836e-05, + "loss": 2.396, + "step": 14120 + }, + { + "epoch": 1.1396174642885966, + "grad_norm": 0.7027772068977356, + "learning_rate": 4.009061756534885e-05, + "loss": 2.3971, + "step": 14121 + }, + { + "epoch": 1.1396981680251796, + "grad_norm": 0.6872174143791199, + "learning_rate": 4.007797807711732e-05, + "loss": 2.4297, + "step": 14122 + }, + { + "epoch": 1.1397788717617625, + "grad_norm": 0.7213007211685181, + "learning_rate": 4.006534008229914e-05, + "loss": 2.4792, + "step": 14123 + }, + { + "epoch": 1.1398595754983456, + "grad_norm": 0.6771649122238159, + "learning_rate": 4.0052703581209395e-05, + "loss": 2.4397, + "step": 14124 + }, + { + "epoch": 1.1399402792349287, + "grad_norm": 0.6577184796333313, + "learning_rate": 4.0040068574163013e-05, + "loss": 2.4113, + "step": 14125 + }, + { + "epoch": 1.1400209829715116, + "grad_norm": 0.7493160367012024, + "learning_rate": 4.002743506147483e-05, + "loss": 2.4454, + "step": 14126 + }, + { + "epoch": 1.1401016867080946, + "grad_norm": 0.6820357441902161, + "learning_rate": 4.0014803043459726e-05, + "loss": 2.4126, + "step": 14127 + }, + { + "epoch": 1.1401823904446775, + "grad_norm": 0.7177188992500305, + "learning_rate": 4.000217252043258e-05, + "loss": 2.4355, + "step": 14128 + }, + { + "epoch": 1.1402630941812606, + "grad_norm": 0.654371440410614, + "learning_rate": 3.998954349270808e-05, + "loss": 2.4932, + "step": 14129 + }, + { + "epoch": 1.1403437979178437, + "grad_norm": 0.7029837965965271, + "learning_rate": 3.997691596060104e-05, + "loss": 2.4341, + "step": 14130 + }, + { + "epoch": 1.1404245016544265, + "grad_norm": 0.7971171140670776, + "learning_rate": 3.996428992442615e-05, + "loss": 2.4466, + "step": 14131 + }, + { + "epoch": 1.1405052053910096, + "grad_norm": 0.6941849589347839, + "learning_rate": 3.9951665384498114e-05, + "loss": 2.4861, + "step": 14132 + }, + { + "epoch": 1.1405859091275925, + "grad_norm": 0.6657733917236328, + "learning_rate": 3.993904234113153e-05, + "loss": 2.4266, + "step": 14133 + }, + { + "epoch": 1.1406666128641756, + "grad_norm": 0.6780329346656799, + "learning_rate": 3.9926420794641e-05, + "loss": 2.458, + "step": 14134 + }, + { + "epoch": 1.1407473166007587, + "grad_norm": 0.7070702910423279, + "learning_rate": 3.991380074534109e-05, + "loss": 2.368, + "step": 14135 + }, + { + "epoch": 1.1408280203373415, + "grad_norm": 0.7186575531959534, + "learning_rate": 3.990118219354635e-05, + "loss": 2.4611, + "step": 14136 + }, + { + "epoch": 1.1409087240739246, + "grad_norm": 0.7171763777732849, + "learning_rate": 3.988856513957123e-05, + "loss": 2.4315, + "step": 14137 + }, + { + "epoch": 1.1409894278105077, + "grad_norm": 0.7090228796005249, + "learning_rate": 3.987594958373025e-05, + "loss": 2.4668, + "step": 14138 + }, + { + "epoch": 1.1410701315470906, + "grad_norm": 0.6523951888084412, + "learning_rate": 3.986333552633773e-05, + "loss": 2.4392, + "step": 14139 + }, + { + "epoch": 1.1411508352836737, + "grad_norm": 0.706000804901123, + "learning_rate": 3.98507229677081e-05, + "loss": 2.4382, + "step": 14140 + }, + { + "epoch": 1.1412315390202568, + "grad_norm": 0.6537537574768066, + "learning_rate": 3.983811190815571e-05, + "loss": 2.456, + "step": 14141 + }, + { + "epoch": 1.1413122427568396, + "grad_norm": 0.7509549856185913, + "learning_rate": 3.982550234799479e-05, + "loss": 2.4744, + "step": 14142 + }, + { + "epoch": 1.1413929464934227, + "grad_norm": 0.7188650965690613, + "learning_rate": 3.981289428753967e-05, + "loss": 2.4632, + "step": 14143 + }, + { + "epoch": 1.1414736502300056, + "grad_norm": 0.7563674449920654, + "learning_rate": 3.9800287727104544e-05, + "loss": 2.5063, + "step": 14144 + }, + { + "epoch": 1.1415543539665887, + "grad_norm": 0.8374128341674805, + "learning_rate": 3.978768266700361e-05, + "loss": 2.4942, + "step": 14145 + }, + { + "epoch": 1.1416350577031718, + "grad_norm": 0.7020177841186523, + "learning_rate": 3.9775079107551027e-05, + "loss": 2.4404, + "step": 14146 + }, + { + "epoch": 1.1417157614397546, + "grad_norm": 0.7326170802116394, + "learning_rate": 3.9762477049060895e-05, + "loss": 2.4127, + "step": 14147 + }, + { + "epoch": 1.1417964651763377, + "grad_norm": 0.6661173105239868, + "learning_rate": 3.974987649184734e-05, + "loss": 2.4649, + "step": 14148 + }, + { + "epoch": 1.1418771689129206, + "grad_norm": 0.7186033129692078, + "learning_rate": 3.973727743622432e-05, + "loss": 2.4275, + "step": 14149 + }, + { + "epoch": 1.1419578726495037, + "grad_norm": 0.7193881869316101, + "learning_rate": 3.972467988250588e-05, + "loss": 2.4997, + "step": 14150 + }, + { + "epoch": 1.1420385763860867, + "grad_norm": 0.7139542102813721, + "learning_rate": 3.971208383100601e-05, + "loss": 2.4211, + "step": 14151 + }, + { + "epoch": 1.1421192801226696, + "grad_norm": 0.6840166449546814, + "learning_rate": 3.969948928203856e-05, + "loss": 2.4504, + "step": 14152 + }, + { + "epoch": 1.1421999838592527, + "grad_norm": 0.8261072039604187, + "learning_rate": 3.968689623591747e-05, + "loss": 2.4901, + "step": 14153 + }, + { + "epoch": 1.1422806875958358, + "grad_norm": 0.7636086940765381, + "learning_rate": 3.96743046929566e-05, + "loss": 2.4202, + "step": 14154 + }, + { + "epoch": 1.1423613913324187, + "grad_norm": 0.7477976679801941, + "learning_rate": 3.966171465346973e-05, + "loss": 2.492, + "step": 14155 + }, + { + "epoch": 1.1424420950690017, + "grad_norm": 0.7516389489173889, + "learning_rate": 3.9649126117770665e-05, + "loss": 2.4512, + "step": 14156 + }, + { + "epoch": 1.1425227988055846, + "grad_norm": 0.6987521648406982, + "learning_rate": 3.9636539086173174e-05, + "loss": 2.4005, + "step": 14157 + }, + { + "epoch": 1.1426035025421677, + "grad_norm": 0.7242532968521118, + "learning_rate": 3.962395355899088e-05, + "loss": 2.4414, + "step": 14158 + }, + { + "epoch": 1.1426842062787508, + "grad_norm": 0.6616180539131165, + "learning_rate": 3.961136953653749e-05, + "loss": 2.4442, + "step": 14159 + }, + { + "epoch": 1.1427649100153336, + "grad_norm": 0.7165415287017822, + "learning_rate": 3.959878701912667e-05, + "loss": 2.4658, + "step": 14160 + }, + { + "epoch": 1.1428456137519167, + "grad_norm": 0.6619318127632141, + "learning_rate": 3.9586206007071926e-05, + "loss": 2.3803, + "step": 14161 + }, + { + "epoch": 1.1429263174884996, + "grad_norm": 0.6654838919639587, + "learning_rate": 3.957362650068684e-05, + "loss": 2.4584, + "step": 14162 + }, + { + "epoch": 1.1430070212250827, + "grad_norm": 0.6947140097618103, + "learning_rate": 3.956104850028496e-05, + "loss": 2.4236, + "step": 14163 + }, + { + "epoch": 1.1430877249616658, + "grad_norm": 0.6510412096977234, + "learning_rate": 3.954847200617973e-05, + "loss": 2.3589, + "step": 14164 + }, + { + "epoch": 1.1431684286982486, + "grad_norm": 0.7550667524337769, + "learning_rate": 3.95358970186846e-05, + "loss": 2.419, + "step": 14165 + }, + { + "epoch": 1.1432491324348317, + "grad_norm": 0.7898361682891846, + "learning_rate": 3.9523323538112975e-05, + "loss": 2.4549, + "step": 14166 + }, + { + "epoch": 1.1433298361714148, + "grad_norm": 0.7162390947341919, + "learning_rate": 3.9510751564778246e-05, + "loss": 2.4493, + "step": 14167 + }, + { + "epoch": 1.1434105399079977, + "grad_norm": 0.8251990079879761, + "learning_rate": 3.949818109899367e-05, + "loss": 2.4474, + "step": 14168 + }, + { + "epoch": 1.1434912436445808, + "grad_norm": 0.6739209890365601, + "learning_rate": 3.948561214107258e-05, + "loss": 2.4564, + "step": 14169 + }, + { + "epoch": 1.1435719473811639, + "grad_norm": 0.6606340408325195, + "learning_rate": 3.9473044691328254e-05, + "loss": 2.3838, + "step": 14170 + }, + { + "epoch": 1.1436526511177467, + "grad_norm": 0.7297452092170715, + "learning_rate": 3.946047875007384e-05, + "loss": 2.4673, + "step": 14171 + }, + { + "epoch": 1.1437333548543298, + "grad_norm": 0.7382420301437378, + "learning_rate": 3.9447914317622546e-05, + "loss": 2.4279, + "step": 14172 + }, + { + "epoch": 1.1438140585909127, + "grad_norm": 0.6947354674339294, + "learning_rate": 3.9435351394287546e-05, + "loss": 2.4553, + "step": 14173 + }, + { + "epoch": 1.1438947623274958, + "grad_norm": 0.670369565486908, + "learning_rate": 3.942278998038183e-05, + "loss": 2.4285, + "step": 14174 + }, + { + "epoch": 1.1439754660640788, + "grad_norm": 0.7097954154014587, + "learning_rate": 3.941023007621859e-05, + "loss": 2.477, + "step": 14175 + }, + { + "epoch": 1.1440561698006617, + "grad_norm": 0.6490213871002197, + "learning_rate": 3.9397671682110826e-05, + "loss": 2.3943, + "step": 14176 + }, + { + "epoch": 1.1441368735372448, + "grad_norm": 0.6505936980247498, + "learning_rate": 3.938511479837147e-05, + "loss": 2.4188, + "step": 14177 + }, + { + "epoch": 1.1442175772738277, + "grad_norm": 0.6696773767471313, + "learning_rate": 3.9372559425313496e-05, + "loss": 2.4377, + "step": 14178 + }, + { + "epoch": 1.1442982810104108, + "grad_norm": 0.6747034192085266, + "learning_rate": 3.936000556324982e-05, + "loss": 2.4111, + "step": 14179 + }, + { + "epoch": 1.1443789847469938, + "grad_norm": 0.7766546607017517, + "learning_rate": 3.934745321249336e-05, + "loss": 2.3873, + "step": 14180 + }, + { + "epoch": 1.1444596884835767, + "grad_norm": 0.7608100175857544, + "learning_rate": 3.933490237335688e-05, + "loss": 2.4567, + "step": 14181 + }, + { + "epoch": 1.1445403922201598, + "grad_norm": 0.7724356055259705, + "learning_rate": 3.9322353046153205e-05, + "loss": 2.4729, + "step": 14182 + }, + { + "epoch": 1.1446210959567429, + "grad_norm": 0.6908414363861084, + "learning_rate": 3.930980523119515e-05, + "loss": 2.41, + "step": 14183 + }, + { + "epoch": 1.1447017996933257, + "grad_norm": 0.7209733128547668, + "learning_rate": 3.9297258928795356e-05, + "loss": 2.4629, + "step": 14184 + }, + { + "epoch": 1.1447825034299088, + "grad_norm": 0.7116519212722778, + "learning_rate": 3.928471413926651e-05, + "loss": 2.5081, + "step": 14185 + }, + { + "epoch": 1.144863207166492, + "grad_norm": 0.6704578995704651, + "learning_rate": 3.9272170862921365e-05, + "loss": 2.494, + "step": 14186 + }, + { + "epoch": 1.1449439109030748, + "grad_norm": 0.6914607882499695, + "learning_rate": 3.9259629100072435e-05, + "loss": 2.3979, + "step": 14187 + }, + { + "epoch": 1.1450246146396579, + "grad_norm": 0.7413245439529419, + "learning_rate": 3.924708885103233e-05, + "loss": 2.4534, + "step": 14188 + }, + { + "epoch": 1.1451053183762407, + "grad_norm": 0.7411661744117737, + "learning_rate": 3.923455011611362e-05, + "loss": 2.4191, + "step": 14189 + }, + { + "epoch": 1.1451860221128238, + "grad_norm": 0.6581972241401672, + "learning_rate": 3.9222012895628716e-05, + "loss": 2.4494, + "step": 14190 + }, + { + "epoch": 1.145266725849407, + "grad_norm": 0.6628647446632385, + "learning_rate": 3.920947718989013e-05, + "loss": 2.4483, + "step": 14191 + }, + { + "epoch": 1.1453474295859898, + "grad_norm": 0.7068151831626892, + "learning_rate": 3.9196942999210316e-05, + "loss": 2.4549, + "step": 14192 + }, + { + "epoch": 1.1454281333225729, + "grad_norm": 0.6727713942527771, + "learning_rate": 3.918441032390159e-05, + "loss": 2.4261, + "step": 14193 + }, + { + "epoch": 1.1455088370591557, + "grad_norm": 0.6680718660354614, + "learning_rate": 3.9171879164276334e-05, + "loss": 2.4705, + "step": 14194 + }, + { + "epoch": 1.1455895407957388, + "grad_norm": 0.710096538066864, + "learning_rate": 3.915934952064685e-05, + "loss": 2.474, + "step": 14195 + }, + { + "epoch": 1.145670244532322, + "grad_norm": 0.6927496790885925, + "learning_rate": 3.9146821393325414e-05, + "loss": 2.3979, + "step": 14196 + }, + { + "epoch": 1.1457509482689048, + "grad_norm": 0.6887550354003906, + "learning_rate": 3.913429478262427e-05, + "loss": 2.4588, + "step": 14197 + }, + { + "epoch": 1.1458316520054879, + "grad_norm": 0.6847062706947327, + "learning_rate": 3.912176968885559e-05, + "loss": 2.4602, + "step": 14198 + }, + { + "epoch": 1.145912355742071, + "grad_norm": 0.6832349300384521, + "learning_rate": 3.91092461123316e-05, + "loss": 2.4672, + "step": 14199 + }, + { + "epoch": 1.1459930594786538, + "grad_norm": 0.6789066791534424, + "learning_rate": 3.909672405336432e-05, + "loss": 2.5029, + "step": 14200 + }, + { + "epoch": 1.146073763215237, + "grad_norm": 0.6953951120376587, + "learning_rate": 3.9084203512265885e-05, + "loss": 2.4223, + "step": 14201 + }, + { + "epoch": 1.1461544669518198, + "grad_norm": 0.6629688739776611, + "learning_rate": 3.907168448934836e-05, + "loss": 2.4028, + "step": 14202 + }, + { + "epoch": 1.1462351706884029, + "grad_norm": 0.6661216020584106, + "learning_rate": 3.90591669849237e-05, + "loss": 2.4668, + "step": 14203 + }, + { + "epoch": 1.146315874424986, + "grad_norm": 0.6814442276954651, + "learning_rate": 3.9046650999303894e-05, + "loss": 2.4273, + "step": 14204 + }, + { + "epoch": 1.1463965781615688, + "grad_norm": 0.6678626537322998, + "learning_rate": 3.903413653280088e-05, + "loss": 2.444, + "step": 14205 + }, + { + "epoch": 1.146477281898152, + "grad_norm": 0.6703703999519348, + "learning_rate": 3.902162358572655e-05, + "loss": 2.4273, + "step": 14206 + }, + { + "epoch": 1.1465579856347348, + "grad_norm": 0.7052578926086426, + "learning_rate": 3.900911215839276e-05, + "loss": 2.4397, + "step": 14207 + }, + { + "epoch": 1.1466386893713179, + "grad_norm": 0.6792036294937134, + "learning_rate": 3.899660225111136e-05, + "loss": 2.439, + "step": 14208 + }, + { + "epoch": 1.146719393107901, + "grad_norm": 0.6995401978492737, + "learning_rate": 3.898409386419407e-05, + "loss": 2.5002, + "step": 14209 + }, + { + "epoch": 1.1468000968444838, + "grad_norm": 0.6527338027954102, + "learning_rate": 3.897158699795265e-05, + "loss": 2.4523, + "step": 14210 + }, + { + "epoch": 1.146880800581067, + "grad_norm": 0.7509400248527527, + "learning_rate": 3.8959081652698814e-05, + "loss": 2.4193, + "step": 14211 + }, + { + "epoch": 1.14696150431765, + "grad_norm": 0.6985350251197815, + "learning_rate": 3.894657782874426e-05, + "loss": 2.4251, + "step": 14212 + }, + { + "epoch": 1.1470422080542328, + "grad_norm": 0.6831483840942383, + "learning_rate": 3.893407552640055e-05, + "loss": 2.4172, + "step": 14213 + }, + { + "epoch": 1.147122911790816, + "grad_norm": 0.7281469702720642, + "learning_rate": 3.892157474597929e-05, + "loss": 2.4451, + "step": 14214 + }, + { + "epoch": 1.147203615527399, + "grad_norm": 0.7326027750968933, + "learning_rate": 3.8909075487792066e-05, + "loss": 2.3926, + "step": 14215 + }, + { + "epoch": 1.1472843192639819, + "grad_norm": 0.7030496597290039, + "learning_rate": 3.889657775215036e-05, + "loss": 2.435, + "step": 14216 + }, + { + "epoch": 1.147365023000565, + "grad_norm": 0.6915596127510071, + "learning_rate": 3.888408153936568e-05, + "loss": 2.4622, + "step": 14217 + }, + { + "epoch": 1.1474457267371478, + "grad_norm": 0.678600013256073, + "learning_rate": 3.8871586849749474e-05, + "loss": 2.4264, + "step": 14218 + }, + { + "epoch": 1.147526430473731, + "grad_norm": 0.7487786412239075, + "learning_rate": 3.885909368361308e-05, + "loss": 2.4038, + "step": 14219 + }, + { + "epoch": 1.147607134210314, + "grad_norm": 0.6658064723014832, + "learning_rate": 3.8846602041267886e-05, + "loss": 2.4079, + "step": 14220 + }, + { + "epoch": 1.1476878379468969, + "grad_norm": 0.6985111832618713, + "learning_rate": 3.883411192302527e-05, + "loss": 2.481, + "step": 14221 + }, + { + "epoch": 1.14776854168348, + "grad_norm": 0.7056208848953247, + "learning_rate": 3.8821623329196445e-05, + "loss": 2.4409, + "step": 14222 + }, + { + "epoch": 1.1478492454200628, + "grad_norm": 0.7107830047607422, + "learning_rate": 3.880913626009268e-05, + "loss": 2.4578, + "step": 14223 + }, + { + "epoch": 1.147929949156646, + "grad_norm": 0.6678555607795715, + "learning_rate": 3.87966507160252e-05, + "loss": 2.4548, + "step": 14224 + }, + { + "epoch": 1.148010652893229, + "grad_norm": 0.6699830293655396, + "learning_rate": 3.8784166697305157e-05, + "loss": 2.3763, + "step": 14225 + }, + { + "epoch": 1.1480913566298119, + "grad_norm": 0.7695464491844177, + "learning_rate": 3.8771684204243716e-05, + "loss": 2.4774, + "step": 14226 + }, + { + "epoch": 1.148172060366395, + "grad_norm": 0.7801330089569092, + "learning_rate": 3.8759203237151954e-05, + "loss": 2.4598, + "step": 14227 + }, + { + "epoch": 1.148252764102978, + "grad_norm": 0.7029622793197632, + "learning_rate": 3.8746723796340955e-05, + "loss": 2.3901, + "step": 14228 + }, + { + "epoch": 1.148333467839561, + "grad_norm": 0.7472359538078308, + "learning_rate": 3.873424588212169e-05, + "loss": 2.4724, + "step": 14229 + }, + { + "epoch": 1.148414171576144, + "grad_norm": 0.6621725559234619, + "learning_rate": 3.872176949480517e-05, + "loss": 2.4523, + "step": 14230 + }, + { + "epoch": 1.148494875312727, + "grad_norm": 0.722658634185791, + "learning_rate": 3.8709294634702376e-05, + "loss": 2.4032, + "step": 14231 + }, + { + "epoch": 1.14857557904931, + "grad_norm": 0.7743202447891235, + "learning_rate": 3.869682130212413e-05, + "loss": 2.4373, + "step": 14232 + }, + { + "epoch": 1.148656282785893, + "grad_norm": 0.6906178593635559, + "learning_rate": 3.868434949738136e-05, + "loss": 2.4765, + "step": 14233 + }, + { + "epoch": 1.148736986522476, + "grad_norm": 0.6708275675773621, + "learning_rate": 3.86718792207849e-05, + "loss": 2.4263, + "step": 14234 + }, + { + "epoch": 1.148817690259059, + "grad_norm": 0.6992776989936829, + "learning_rate": 3.8659410472645494e-05, + "loss": 2.378, + "step": 14235 + }, + { + "epoch": 1.148898393995642, + "grad_norm": 0.7229011058807373, + "learning_rate": 3.864694325327389e-05, + "loss": 2.4075, + "step": 14236 + }, + { + "epoch": 1.148979097732225, + "grad_norm": 0.6622509956359863, + "learning_rate": 3.863447756298091e-05, + "loss": 2.3954, + "step": 14237 + }, + { + "epoch": 1.149059801468808, + "grad_norm": 0.7233534455299377, + "learning_rate": 3.862201340207712e-05, + "loss": 2.4506, + "step": 14238 + }, + { + "epoch": 1.149140505205391, + "grad_norm": 0.716869056224823, + "learning_rate": 3.860955077087321e-05, + "loss": 2.4304, + "step": 14239 + }, + { + "epoch": 1.149221208941974, + "grad_norm": 0.6550257205963135, + "learning_rate": 3.8597089669679766e-05, + "loss": 2.4261, + "step": 14240 + }, + { + "epoch": 1.149301912678557, + "grad_norm": 0.6981741786003113, + "learning_rate": 3.858463009880738e-05, + "loss": 2.4115, + "step": 14241 + }, + { + "epoch": 1.14938261641514, + "grad_norm": 0.6792196035385132, + "learning_rate": 3.8572172058566534e-05, + "loss": 2.4195, + "step": 14242 + }, + { + "epoch": 1.149463320151723, + "grad_norm": 0.7278807163238525, + "learning_rate": 3.855971554926773e-05, + "loss": 2.418, + "step": 14243 + }, + { + "epoch": 1.1495440238883061, + "grad_norm": 0.6451076865196228, + "learning_rate": 3.8547260571221456e-05, + "loss": 2.4591, + "step": 14244 + }, + { + "epoch": 1.149624727624889, + "grad_norm": 0.7052451968193054, + "learning_rate": 3.853480712473805e-05, + "loss": 2.4023, + "step": 14245 + }, + { + "epoch": 1.149705431361472, + "grad_norm": 0.7016182541847229, + "learning_rate": 3.852235521012793e-05, + "loss": 2.4959, + "step": 14246 + }, + { + "epoch": 1.1497861350980552, + "grad_norm": 0.7287492156028748, + "learning_rate": 3.850990482770141e-05, + "loss": 2.3884, + "step": 14247 + }, + { + "epoch": 1.149866838834638, + "grad_norm": 0.6648508310317993, + "learning_rate": 3.84974559777688e-05, + "loss": 2.4632, + "step": 14248 + }, + { + "epoch": 1.1499475425712211, + "grad_norm": 0.7387828230857849, + "learning_rate": 3.848500866064036e-05, + "loss": 2.4053, + "step": 14249 + }, + { + "epoch": 1.150028246307804, + "grad_norm": 0.7230356931686401, + "learning_rate": 3.847256287662635e-05, + "loss": 2.5128, + "step": 14250 + }, + { + "epoch": 1.150108950044387, + "grad_norm": 0.7209547162055969, + "learning_rate": 3.846011862603686e-05, + "loss": 2.4626, + "step": 14251 + }, + { + "epoch": 1.1501896537809702, + "grad_norm": 0.7177916765213013, + "learning_rate": 3.844767590918209e-05, + "loss": 2.4469, + "step": 14252 + }, + { + "epoch": 1.150270357517553, + "grad_norm": 0.7850151658058167, + "learning_rate": 3.843523472637216e-05, + "loss": 2.4731, + "step": 14253 + }, + { + "epoch": 1.150351061254136, + "grad_norm": 0.7051519155502319, + "learning_rate": 3.8422795077917084e-05, + "loss": 2.3696, + "step": 14254 + }, + { + "epoch": 1.150431764990719, + "grad_norm": 0.7434025406837463, + "learning_rate": 3.841035696412692e-05, + "loss": 2.444, + "step": 14255 + }, + { + "epoch": 1.150512468727302, + "grad_norm": 0.7404719591140747, + "learning_rate": 3.839792038531166e-05, + "loss": 2.4415, + "step": 14256 + }, + { + "epoch": 1.1505931724638851, + "grad_norm": 0.6883764266967773, + "learning_rate": 3.838548534178125e-05, + "loss": 2.4887, + "step": 14257 + }, + { + "epoch": 1.150673876200468, + "grad_norm": 0.6697155237197876, + "learning_rate": 3.83730518338456e-05, + "loss": 2.3721, + "step": 14258 + }, + { + "epoch": 1.150754579937051, + "grad_norm": 0.68825763463974, + "learning_rate": 3.836061986181459e-05, + "loss": 2.4712, + "step": 14259 + }, + { + "epoch": 1.1508352836736342, + "grad_norm": 0.6810611486434937, + "learning_rate": 3.8348189425998114e-05, + "loss": 2.3995, + "step": 14260 + }, + { + "epoch": 1.150915987410217, + "grad_norm": 0.6718329787254333, + "learning_rate": 3.8335760526705866e-05, + "loss": 2.4068, + "step": 14261 + }, + { + "epoch": 1.1509966911468001, + "grad_norm": 0.694618284702301, + "learning_rate": 3.832333316424767e-05, + "loss": 2.458, + "step": 14262 + }, + { + "epoch": 1.151077394883383, + "grad_norm": 0.6824250817298889, + "learning_rate": 3.8310907338933266e-05, + "loss": 2.4623, + "step": 14263 + }, + { + "epoch": 1.151158098619966, + "grad_norm": 0.6875178217887878, + "learning_rate": 3.8298483051072264e-05, + "loss": 2.4827, + "step": 14264 + }, + { + "epoch": 1.1512388023565492, + "grad_norm": 0.7868281602859497, + "learning_rate": 3.828606030097437e-05, + "loss": 2.4638, + "step": 14265 + }, + { + "epoch": 1.151319506093132, + "grad_norm": 0.7003639936447144, + "learning_rate": 3.8273639088949165e-05, + "loss": 2.4885, + "step": 14266 + }, + { + "epoch": 1.1514002098297151, + "grad_norm": 0.6965197920799255, + "learning_rate": 3.826121941530623e-05, + "loss": 2.3983, + "step": 14267 + }, + { + "epoch": 1.151480913566298, + "grad_norm": 0.7241101264953613, + "learning_rate": 3.824880128035509e-05, + "loss": 2.4598, + "step": 14268 + }, + { + "epoch": 1.151561617302881, + "grad_norm": 0.700764536857605, + "learning_rate": 3.823638468440528e-05, + "loss": 2.3627, + "step": 14269 + }, + { + "epoch": 1.1516423210394642, + "grad_norm": 0.6889846324920654, + "learning_rate": 3.822396962776619e-05, + "loss": 2.4442, + "step": 14270 + }, + { + "epoch": 1.151723024776047, + "grad_norm": 0.6660009026527405, + "learning_rate": 3.8211556110747245e-05, + "loss": 2.403, + "step": 14271 + }, + { + "epoch": 1.1518037285126301, + "grad_norm": 0.6537240743637085, + "learning_rate": 3.819914413365785e-05, + "loss": 2.4358, + "step": 14272 + }, + { + "epoch": 1.1518844322492132, + "grad_norm": 0.6852741837501526, + "learning_rate": 3.818673369680735e-05, + "loss": 2.4272, + "step": 14273 + }, + { + "epoch": 1.151965135985796, + "grad_norm": 0.701874852180481, + "learning_rate": 3.817432480050501e-05, + "loss": 2.4419, + "step": 14274 + }, + { + "epoch": 1.1520458397223792, + "grad_norm": 0.7089500427246094, + "learning_rate": 3.816191744506011e-05, + "loss": 2.4537, + "step": 14275 + }, + { + "epoch": 1.1521265434589623, + "grad_norm": 0.698564887046814, + "learning_rate": 3.8149511630781866e-05, + "loss": 2.3991, + "step": 14276 + }, + { + "epoch": 1.1522072471955451, + "grad_norm": 0.6940335035324097, + "learning_rate": 3.813710735797947e-05, + "loss": 2.5022, + "step": 14277 + }, + { + "epoch": 1.1522879509321282, + "grad_norm": 0.6916826367378235, + "learning_rate": 3.812470462696208e-05, + "loss": 2.4449, + "step": 14278 + }, + { + "epoch": 1.152368654668711, + "grad_norm": 0.7115256190299988, + "learning_rate": 3.811230343803882e-05, + "loss": 2.4371, + "step": 14279 + }, + { + "epoch": 1.1524493584052942, + "grad_norm": 0.6857369542121887, + "learning_rate": 3.80999037915187e-05, + "loss": 2.4426, + "step": 14280 + }, + { + "epoch": 1.1525300621418773, + "grad_norm": 0.7605363130569458, + "learning_rate": 3.808750568771079e-05, + "loss": 2.4999, + "step": 14281 + }, + { + "epoch": 1.1526107658784601, + "grad_norm": 0.6604358553886414, + "learning_rate": 3.8075109126924115e-05, + "loss": 2.419, + "step": 14282 + }, + { + "epoch": 1.1526914696150432, + "grad_norm": 0.6945412755012512, + "learning_rate": 3.806271410946756e-05, + "loss": 2.4555, + "step": 14283 + }, + { + "epoch": 1.152772173351626, + "grad_norm": 0.7205908894538879, + "learning_rate": 3.805032063565007e-05, + "loss": 2.4745, + "step": 14284 + }, + { + "epoch": 1.1528528770882092, + "grad_norm": 0.7198025584220886, + "learning_rate": 3.8037928705780554e-05, + "loss": 2.4358, + "step": 14285 + }, + { + "epoch": 1.1529335808247922, + "grad_norm": 0.7231044769287109, + "learning_rate": 3.802553832016781e-05, + "loss": 2.4713, + "step": 14286 + }, + { + "epoch": 1.1530142845613751, + "grad_norm": 0.6878815293312073, + "learning_rate": 3.80131494791206e-05, + "loss": 2.4479, + "step": 14287 + }, + { + "epoch": 1.1530949882979582, + "grad_norm": 0.6930533647537231, + "learning_rate": 3.800076218294779e-05, + "loss": 2.3912, + "step": 14288 + }, + { + "epoch": 1.1531756920345413, + "grad_norm": 0.703521192073822, + "learning_rate": 3.798837643195808e-05, + "loss": 2.451, + "step": 14289 + }, + { + "epoch": 1.1532563957711242, + "grad_norm": 0.7099746465682983, + "learning_rate": 3.79759922264601e-05, + "loss": 2.4957, + "step": 14290 + }, + { + "epoch": 1.1533370995077072, + "grad_norm": 0.7268218398094177, + "learning_rate": 3.7963609566762527e-05, + "loss": 2.4242, + "step": 14291 + }, + { + "epoch": 1.1534178032442903, + "grad_norm": 0.7465239763259888, + "learning_rate": 3.7951228453174004e-05, + "loss": 2.3867, + "step": 14292 + }, + { + "epoch": 1.1534985069808732, + "grad_norm": 0.704584002494812, + "learning_rate": 3.793884888600302e-05, + "loss": 2.5009, + "step": 14293 + }, + { + "epoch": 1.1535792107174563, + "grad_norm": 0.7057262063026428, + "learning_rate": 3.792647086555816e-05, + "loss": 2.4381, + "step": 14294 + }, + { + "epoch": 1.1536599144540391, + "grad_norm": 0.7045955061912537, + "learning_rate": 3.791409439214794e-05, + "loss": 2.4456, + "step": 14295 + }, + { + "epoch": 1.1537406181906222, + "grad_norm": 0.705476701259613, + "learning_rate": 3.790171946608074e-05, + "loss": 2.466, + "step": 14296 + }, + { + "epoch": 1.1538213219272053, + "grad_norm": 0.7128286957740784, + "learning_rate": 3.788934608766503e-05, + "loss": 2.4891, + "step": 14297 + }, + { + "epoch": 1.1539020256637882, + "grad_norm": 0.678144633769989, + "learning_rate": 3.787697425720918e-05, + "loss": 2.4453, + "step": 14298 + }, + { + "epoch": 1.1539827294003713, + "grad_norm": 0.754216730594635, + "learning_rate": 3.786460397502151e-05, + "loss": 2.4331, + "step": 14299 + }, + { + "epoch": 1.1540634331369541, + "grad_norm": 0.6881092190742493, + "learning_rate": 3.7852235241410325e-05, + "loss": 2.3692, + "step": 14300 + }, + { + "epoch": 1.1541441368735372, + "grad_norm": 0.7498507499694824, + "learning_rate": 3.783986805668395e-05, + "loss": 2.4556, + "step": 14301 + }, + { + "epoch": 1.1542248406101203, + "grad_norm": 0.6312216520309448, + "learning_rate": 3.7827502421150496e-05, + "loss": 2.4727, + "step": 14302 + }, + { + "epoch": 1.1543055443467032, + "grad_norm": 0.7156404256820679, + "learning_rate": 3.781513833511822e-05, + "loss": 2.4003, + "step": 14303 + }, + { + "epoch": 1.1543862480832863, + "grad_norm": 0.6589376926422119, + "learning_rate": 3.7802775798895226e-05, + "loss": 2.4461, + "step": 14304 + }, + { + "epoch": 1.1544669518198694, + "grad_norm": 0.7259865999221802, + "learning_rate": 3.77904148127897e-05, + "loss": 2.4021, + "step": 14305 + }, + { + "epoch": 1.1545476555564522, + "grad_norm": 0.7248456478118896, + "learning_rate": 3.777805537710961e-05, + "loss": 2.4784, + "step": 14306 + }, + { + "epoch": 1.1546283592930353, + "grad_norm": 0.7085593342781067, + "learning_rate": 3.7765697492163034e-05, + "loss": 2.4394, + "step": 14307 + }, + { + "epoch": 1.1547090630296182, + "grad_norm": 0.7394313216209412, + "learning_rate": 3.775334115825796e-05, + "loss": 2.5055, + "step": 14308 + }, + { + "epoch": 1.1547897667662013, + "grad_norm": 0.7231999039649963, + "learning_rate": 3.7740986375702336e-05, + "loss": 2.4551, + "step": 14309 + }, + { + "epoch": 1.1548704705027844, + "grad_norm": 0.6875953078269958, + "learning_rate": 3.7728633144804084e-05, + "loss": 2.4641, + "step": 14310 + }, + { + "epoch": 1.1549511742393672, + "grad_norm": 0.7477203607559204, + "learning_rate": 3.7716281465871094e-05, + "loss": 2.4929, + "step": 14311 + }, + { + "epoch": 1.1550318779759503, + "grad_norm": 0.6653971076011658, + "learning_rate": 3.770393133921115e-05, + "loss": 2.4819, + "step": 14312 + }, + { + "epoch": 1.1551125817125332, + "grad_norm": 0.7267318964004517, + "learning_rate": 3.769158276513209e-05, + "loss": 2.4568, + "step": 14313 + }, + { + "epoch": 1.1551932854491163, + "grad_norm": 0.6675654053688049, + "learning_rate": 3.76792357439417e-05, + "loss": 2.4789, + "step": 14314 + }, + { + "epoch": 1.1552739891856993, + "grad_norm": 0.6847487688064575, + "learning_rate": 3.7666890275947616e-05, + "loss": 2.4034, + "step": 14315 + }, + { + "epoch": 1.1553546929222822, + "grad_norm": 0.811553418636322, + "learning_rate": 3.765454636145758e-05, + "loss": 2.5051, + "step": 14316 + }, + { + "epoch": 1.1554353966588653, + "grad_norm": 0.690026581287384, + "learning_rate": 3.7642204000779204e-05, + "loss": 2.4477, + "step": 14317 + }, + { + "epoch": 1.1555161003954484, + "grad_norm": 0.695810079574585, + "learning_rate": 3.762986319422013e-05, + "loss": 2.4516, + "step": 14318 + }, + { + "epoch": 1.1555968041320313, + "grad_norm": 0.6869217753410339, + "learning_rate": 3.7617523942087886e-05, + "loss": 2.3802, + "step": 14319 + }, + { + "epoch": 1.1556775078686143, + "grad_norm": 0.7109078764915466, + "learning_rate": 3.7605186244690016e-05, + "loss": 2.4306, + "step": 14320 + }, + { + "epoch": 1.1557582116051974, + "grad_norm": 0.7385044693946838, + "learning_rate": 3.759285010233404e-05, + "loss": 2.4288, + "step": 14321 + }, + { + "epoch": 1.1558389153417803, + "grad_norm": 0.6775605082511902, + "learning_rate": 3.7580515515327355e-05, + "loss": 2.4155, + "step": 14322 + }, + { + "epoch": 1.1559196190783634, + "grad_norm": 0.7325694561004639, + "learning_rate": 3.7568182483977375e-05, + "loss": 2.5035, + "step": 14323 + }, + { + "epoch": 1.1560003228149462, + "grad_norm": 0.6896799206733704, + "learning_rate": 3.7555851008591526e-05, + "loss": 2.4739, + "step": 14324 + }, + { + "epoch": 1.1560810265515293, + "grad_norm": 0.7086506485939026, + "learning_rate": 3.7543521089477065e-05, + "loss": 2.4815, + "step": 14325 + }, + { + "epoch": 1.1561617302881124, + "grad_norm": 0.6886687874794006, + "learning_rate": 3.753119272694132e-05, + "loss": 2.4261, + "step": 14326 + }, + { + "epoch": 1.1562424340246953, + "grad_norm": 0.675136148929596, + "learning_rate": 3.751886592129155e-05, + "loss": 2.3946, + "step": 14327 + }, + { + "epoch": 1.1563231377612784, + "grad_norm": 0.706729531288147, + "learning_rate": 3.7506540672834964e-05, + "loss": 2.4199, + "step": 14328 + }, + { + "epoch": 1.1564038414978612, + "grad_norm": 0.6790904998779297, + "learning_rate": 3.749421698187875e-05, + "loss": 2.4419, + "step": 14329 + }, + { + "epoch": 1.1564845452344443, + "grad_norm": 0.6688171029090881, + "learning_rate": 3.748189484873007e-05, + "loss": 2.4516, + "step": 14330 + }, + { + "epoch": 1.1565652489710274, + "grad_norm": 0.6782420873641968, + "learning_rate": 3.746957427369596e-05, + "loss": 2.4586, + "step": 14331 + }, + { + "epoch": 1.1566459527076103, + "grad_norm": 0.7633399367332458, + "learning_rate": 3.7457255257083514e-05, + "loss": 2.3776, + "step": 14332 + }, + { + "epoch": 1.1567266564441934, + "grad_norm": 0.680000364780426, + "learning_rate": 3.744493779919976e-05, + "loss": 2.4978, + "step": 14333 + }, + { + "epoch": 1.1568073601807765, + "grad_norm": 0.6993350386619568, + "learning_rate": 3.743262190035171e-05, + "loss": 2.3974, + "step": 14334 + }, + { + "epoch": 1.1568880639173593, + "grad_norm": 0.7316375374794006, + "learning_rate": 3.7420307560846234e-05, + "loss": 2.4423, + "step": 14335 + }, + { + "epoch": 1.1569687676539424, + "grad_norm": 0.7384842038154602, + "learning_rate": 3.7407994780990285e-05, + "loss": 2.4604, + "step": 14336 + }, + { + "epoch": 1.1570494713905255, + "grad_norm": 0.6980708837509155, + "learning_rate": 3.739568356109072e-05, + "loss": 2.4408, + "step": 14337 + }, + { + "epoch": 1.1571301751271084, + "grad_norm": 0.6510182619094849, + "learning_rate": 3.738337390145438e-05, + "loss": 2.4076, + "step": 14338 + }, + { + "epoch": 1.1572108788636915, + "grad_norm": 0.7458614706993103, + "learning_rate": 3.737106580238804e-05, + "loss": 2.4976, + "step": 14339 + }, + { + "epoch": 1.1572915826002743, + "grad_norm": 0.6663469672203064, + "learning_rate": 3.735875926419849e-05, + "loss": 2.4414, + "step": 14340 + }, + { + "epoch": 1.1573722863368574, + "grad_norm": 0.6611858606338501, + "learning_rate": 3.7346454287192355e-05, + "loss": 2.3783, + "step": 14341 + }, + { + "epoch": 1.1574529900734405, + "grad_norm": 0.6605291366577148, + "learning_rate": 3.7334150871676364e-05, + "loss": 2.4291, + "step": 14342 + }, + { + "epoch": 1.1575336938100234, + "grad_norm": 0.6879985928535461, + "learning_rate": 3.7321849017957186e-05, + "loss": 2.4229, + "step": 14343 + }, + { + "epoch": 1.1576143975466064, + "grad_norm": 0.7466493844985962, + "learning_rate": 3.7309548726341334e-05, + "loss": 2.4278, + "step": 14344 + }, + { + "epoch": 1.1576951012831893, + "grad_norm": 0.7476457357406616, + "learning_rate": 3.72972499971354e-05, + "loss": 2.4944, + "step": 14345 + }, + { + "epoch": 1.1577758050197724, + "grad_norm": 0.6339364647865295, + "learning_rate": 3.728495283064594e-05, + "loss": 2.3753, + "step": 14346 + }, + { + "epoch": 1.1578565087563555, + "grad_norm": 0.6885230541229248, + "learning_rate": 3.7272657227179355e-05, + "loss": 2.4519, + "step": 14347 + }, + { + "epoch": 1.1579372124929384, + "grad_norm": 0.7561741471290588, + "learning_rate": 3.7260363187042126e-05, + "loss": 2.4808, + "step": 14348 + }, + { + "epoch": 1.1580179162295214, + "grad_norm": 0.8007705211639404, + "learning_rate": 3.724807071054062e-05, + "loss": 2.4649, + "step": 14349 + }, + { + "epoch": 1.1580986199661045, + "grad_norm": 0.6920937895774841, + "learning_rate": 3.72357797979813e-05, + "loss": 2.4145, + "step": 14350 + }, + { + "epoch": 1.1581793237026874, + "grad_norm": 0.7310675978660583, + "learning_rate": 3.7223490449670364e-05, + "loss": 2.4475, + "step": 14351 + }, + { + "epoch": 1.1582600274392705, + "grad_norm": 0.6600463390350342, + "learning_rate": 3.7211202665914155e-05, + "loss": 2.3938, + "step": 14352 + }, + { + "epoch": 1.1583407311758536, + "grad_norm": 0.690258800983429, + "learning_rate": 3.719891644701894e-05, + "loss": 2.3944, + "step": 14353 + }, + { + "epoch": 1.1584214349124364, + "grad_norm": 0.7075135111808777, + "learning_rate": 3.718663179329085e-05, + "loss": 2.3931, + "step": 14354 + }, + { + "epoch": 1.1585021386490195, + "grad_norm": 0.7416332960128784, + "learning_rate": 3.71743487050361e-05, + "loss": 2.4566, + "step": 14355 + }, + { + "epoch": 1.1585828423856024, + "grad_norm": 0.7459710836410522, + "learning_rate": 3.7162067182560846e-05, + "loss": 2.4232, + "step": 14356 + }, + { + "epoch": 1.1586635461221855, + "grad_norm": 0.7265400886535645, + "learning_rate": 3.71497872261711e-05, + "loss": 2.4798, + "step": 14357 + }, + { + "epoch": 1.1587442498587683, + "grad_norm": 0.7142636775970459, + "learning_rate": 3.713750883617294e-05, + "loss": 2.4576, + "step": 14358 + }, + { + "epoch": 1.1588249535953514, + "grad_norm": 0.7279871702194214, + "learning_rate": 3.712523201287239e-05, + "loss": 2.439, + "step": 14359 + }, + { + "epoch": 1.1589056573319345, + "grad_norm": 0.7151274681091309, + "learning_rate": 3.7112956756575414e-05, + "loss": 2.4684, + "step": 14360 + }, + { + "epoch": 1.1589863610685174, + "grad_norm": 0.7142657041549683, + "learning_rate": 3.7100683067587946e-05, + "loss": 2.4582, + "step": 14361 + }, + { + "epoch": 1.1590670648051005, + "grad_norm": 0.7716035842895508, + "learning_rate": 3.7088410946215914e-05, + "loss": 2.5038, + "step": 14362 + }, + { + "epoch": 1.1591477685416836, + "grad_norm": 0.7232338190078735, + "learning_rate": 3.707614039276509e-05, + "loss": 2.4558, + "step": 14363 + }, + { + "epoch": 1.1592284722782664, + "grad_norm": 0.7388719916343689, + "learning_rate": 3.706387140754134e-05, + "loss": 2.4535, + "step": 14364 + }, + { + "epoch": 1.1593091760148495, + "grad_norm": 0.7022652626037598, + "learning_rate": 3.7051603990850425e-05, + "loss": 2.4479, + "step": 14365 + }, + { + "epoch": 1.1593898797514326, + "grad_norm": 0.7861798405647278, + "learning_rate": 3.703933814299813e-05, + "loss": 2.4219, + "step": 14366 + }, + { + "epoch": 1.1594705834880155, + "grad_norm": 0.6928723454475403, + "learning_rate": 3.7027073864290074e-05, + "loss": 2.4401, + "step": 14367 + }, + { + "epoch": 1.1595512872245985, + "grad_norm": 0.6312821507453918, + "learning_rate": 3.701481115503194e-05, + "loss": 2.3975, + "step": 14368 + }, + { + "epoch": 1.1596319909611814, + "grad_norm": 0.7008257508277893, + "learning_rate": 3.700255001552937e-05, + "loss": 2.4988, + "step": 14369 + }, + { + "epoch": 1.1597126946977645, + "grad_norm": 0.6664693355560303, + "learning_rate": 3.699029044608792e-05, + "loss": 2.4123, + "step": 14370 + }, + { + "epoch": 1.1597933984343476, + "grad_norm": 0.6613842844963074, + "learning_rate": 3.6978032447013145e-05, + "loss": 2.4802, + "step": 14371 + }, + { + "epoch": 1.1598741021709305, + "grad_norm": 0.707788348197937, + "learning_rate": 3.696577601861057e-05, + "loss": 2.4432, + "step": 14372 + }, + { + "epoch": 1.1599548059075135, + "grad_norm": 0.6547604203224182, + "learning_rate": 3.695352116118561e-05, + "loss": 2.412, + "step": 14373 + }, + { + "epoch": 1.1600355096440964, + "grad_norm": 0.7238109707832336, + "learning_rate": 3.69412678750437e-05, + "loss": 2.4858, + "step": 14374 + }, + { + "epoch": 1.1601162133806795, + "grad_norm": 0.8156580328941345, + "learning_rate": 3.692901616049026e-05, + "loss": 2.4063, + "step": 14375 + }, + { + "epoch": 1.1601969171172626, + "grad_norm": 0.7035481333732605, + "learning_rate": 3.6916766017830585e-05, + "loss": 2.4586, + "step": 14376 + }, + { + "epoch": 1.1602776208538454, + "grad_norm": 0.7523401379585266, + "learning_rate": 3.690451744736999e-05, + "loss": 2.4262, + "step": 14377 + }, + { + "epoch": 1.1603583245904285, + "grad_norm": 0.6740732192993164, + "learning_rate": 3.689227044941376e-05, + "loss": 2.5215, + "step": 14378 + }, + { + "epoch": 1.1604390283270116, + "grad_norm": 0.6502695083618164, + "learning_rate": 3.6880025024267115e-05, + "loss": 2.4292, + "step": 14379 + }, + { + "epoch": 1.1605197320635945, + "grad_norm": 0.7000409364700317, + "learning_rate": 3.686778117223524e-05, + "loss": 2.4323, + "step": 14380 + }, + { + "epoch": 1.1606004358001776, + "grad_norm": 0.7415478229522705, + "learning_rate": 3.68555388936233e-05, + "loss": 2.4515, + "step": 14381 + }, + { + "epoch": 1.1606811395367607, + "grad_norm": 0.6890547871589661, + "learning_rate": 3.684329818873641e-05, + "loss": 2.4115, + "step": 14382 + }, + { + "epoch": 1.1607618432733435, + "grad_norm": 0.8238685727119446, + "learning_rate": 3.68310590578796e-05, + "loss": 2.4666, + "step": 14383 + }, + { + "epoch": 1.1608425470099266, + "grad_norm": 0.8098889589309692, + "learning_rate": 3.681882150135791e-05, + "loss": 2.4667, + "step": 14384 + }, + { + "epoch": 1.1609232507465095, + "grad_norm": 0.6932713985443115, + "learning_rate": 3.680658551947639e-05, + "loss": 2.4574, + "step": 14385 + }, + { + "epoch": 1.1610039544830926, + "grad_norm": 0.7062943577766418, + "learning_rate": 3.6794351112539915e-05, + "loss": 2.4408, + "step": 14386 + }, + { + "epoch": 1.1610846582196757, + "grad_norm": 0.7859255075454712, + "learning_rate": 3.678211828085343e-05, + "loss": 2.3946, + "step": 14387 + }, + { + "epoch": 1.1611653619562585, + "grad_norm": 0.674609899520874, + "learning_rate": 3.676988702472181e-05, + "loss": 2.4456, + "step": 14388 + }, + { + "epoch": 1.1612460656928416, + "grad_norm": 0.7068402171134949, + "learning_rate": 3.675765734444989e-05, + "loss": 2.4393, + "step": 14389 + }, + { + "epoch": 1.1613267694294245, + "grad_norm": 0.7276526689529419, + "learning_rate": 3.674542924034246e-05, + "loss": 2.456, + "step": 14390 + }, + { + "epoch": 1.1614074731660076, + "grad_norm": 0.7670585513114929, + "learning_rate": 3.673320271270433e-05, + "loss": 2.3774, + "step": 14391 + }, + { + "epoch": 1.1614881769025907, + "grad_norm": 0.702173113822937, + "learning_rate": 3.672097776184013e-05, + "loss": 2.3974, + "step": 14392 + }, + { + "epoch": 1.1615688806391735, + "grad_norm": 0.6922066807746887, + "learning_rate": 3.670875438805457e-05, + "loss": 2.4035, + "step": 14393 + }, + { + "epoch": 1.1616495843757566, + "grad_norm": 0.6675707697868347, + "learning_rate": 3.6696532591652335e-05, + "loss": 2.4369, + "step": 14394 + }, + { + "epoch": 1.1617302881123397, + "grad_norm": 0.6939712762832642, + "learning_rate": 3.668431237293796e-05, + "loss": 2.4265, + "step": 14395 + }, + { + "epoch": 1.1618109918489226, + "grad_norm": 0.719510018825531, + "learning_rate": 3.667209373221602e-05, + "loss": 2.4686, + "step": 14396 + }, + { + "epoch": 1.1618916955855056, + "grad_norm": 0.7167489528656006, + "learning_rate": 3.665987666979104e-05, + "loss": 2.5077, + "step": 14397 + }, + { + "epoch": 1.1619723993220887, + "grad_norm": 0.6539514064788818, + "learning_rate": 3.664766118596754e-05, + "loss": 2.4476, + "step": 14398 + }, + { + "epoch": 1.1620531030586716, + "grad_norm": 0.6926440596580505, + "learning_rate": 3.6635447281049876e-05, + "loss": 2.4336, + "step": 14399 + }, + { + "epoch": 1.1621338067952547, + "grad_norm": 0.7124993205070496, + "learning_rate": 3.662323495534252e-05, + "loss": 2.3938, + "step": 14400 + }, + { + "epoch": 1.1622145105318376, + "grad_norm": 0.7073954939842224, + "learning_rate": 3.661102420914986e-05, + "loss": 2.4232, + "step": 14401 + }, + { + "epoch": 1.1622952142684206, + "grad_norm": 0.7491076588630676, + "learning_rate": 3.659881504277613e-05, + "loss": 2.5047, + "step": 14402 + }, + { + "epoch": 1.1623759180050037, + "grad_norm": 0.6698675155639648, + "learning_rate": 3.658660745652568e-05, + "loss": 2.4164, + "step": 14403 + }, + { + "epoch": 1.1624566217415866, + "grad_norm": 0.6576815843582153, + "learning_rate": 3.657440145070276e-05, + "loss": 2.4368, + "step": 14404 + }, + { + "epoch": 1.1625373254781697, + "grad_norm": 0.8236953020095825, + "learning_rate": 3.6562197025611524e-05, + "loss": 2.5041, + "step": 14405 + }, + { + "epoch": 1.1626180292147525, + "grad_norm": 0.7391532063484192, + "learning_rate": 3.6549994181556157e-05, + "loss": 2.4556, + "step": 14406 + }, + { + "epoch": 1.1626987329513356, + "grad_norm": 0.6529936790466309, + "learning_rate": 3.653779291884084e-05, + "loss": 2.4559, + "step": 14407 + }, + { + "epoch": 1.1627794366879187, + "grad_norm": 0.7101796269416809, + "learning_rate": 3.652559323776957e-05, + "loss": 2.3937, + "step": 14408 + }, + { + "epoch": 1.1628601404245016, + "grad_norm": 0.6890308260917664, + "learning_rate": 3.651339513864645e-05, + "loss": 2.4694, + "step": 14409 + }, + { + "epoch": 1.1629408441610847, + "grad_norm": 0.6919918060302734, + "learning_rate": 3.650119862177548e-05, + "loss": 2.4793, + "step": 14410 + }, + { + "epoch": 1.1630215478976678, + "grad_norm": 0.6553575992584229, + "learning_rate": 3.6489003687460624e-05, + "loss": 2.454, + "step": 14411 + }, + { + "epoch": 1.1631022516342506, + "grad_norm": 0.7095460891723633, + "learning_rate": 3.6476810336005804e-05, + "loss": 2.4672, + "step": 14412 + }, + { + "epoch": 1.1631829553708337, + "grad_norm": 0.738301694393158, + "learning_rate": 3.6464618567714935e-05, + "loss": 2.4369, + "step": 14413 + }, + { + "epoch": 1.1632636591074166, + "grad_norm": 0.7574542760848999, + "learning_rate": 3.645242838289189e-05, + "loss": 2.4981, + "step": 14414 + }, + { + "epoch": 1.1633443628439997, + "grad_norm": 0.6780585646629333, + "learning_rate": 3.64402397818404e-05, + "loss": 2.4811, + "step": 14415 + }, + { + "epoch": 1.1634250665805828, + "grad_norm": 0.7050060629844666, + "learning_rate": 3.6428052764864287e-05, + "loss": 2.4607, + "step": 14416 + }, + { + "epoch": 1.1635057703171656, + "grad_norm": 0.6946923136711121, + "learning_rate": 3.6415867332267316e-05, + "loss": 2.4482, + "step": 14417 + }, + { + "epoch": 1.1635864740537487, + "grad_norm": 0.7202015519142151, + "learning_rate": 3.64036834843531e-05, + "loss": 2.4764, + "step": 14418 + }, + { + "epoch": 1.1636671777903316, + "grad_norm": 0.7845996618270874, + "learning_rate": 3.639150122142534e-05, + "loss": 2.4926, + "step": 14419 + }, + { + "epoch": 1.1637478815269147, + "grad_norm": 0.6924630403518677, + "learning_rate": 3.6379320543787645e-05, + "loss": 2.4664, + "step": 14420 + }, + { + "epoch": 1.1638285852634978, + "grad_norm": 0.7225920557975769, + "learning_rate": 3.636714145174358e-05, + "loss": 2.4638, + "step": 14421 + }, + { + "epoch": 1.1639092890000806, + "grad_norm": 0.6587103605270386, + "learning_rate": 3.63549639455967e-05, + "loss": 2.3629, + "step": 14422 + }, + { + "epoch": 1.1639899927366637, + "grad_norm": 0.7537658214569092, + "learning_rate": 3.634278802565051e-05, + "loss": 2.4971, + "step": 14423 + }, + { + "epoch": 1.1640706964732468, + "grad_norm": 0.6881381273269653, + "learning_rate": 3.633061369220841e-05, + "loss": 2.3737, + "step": 14424 + }, + { + "epoch": 1.1641514002098297, + "grad_norm": 0.693779468536377, + "learning_rate": 3.6318440945573864e-05, + "loss": 2.4346, + "step": 14425 + }, + { + "epoch": 1.1642321039464127, + "grad_norm": 0.777563750743866, + "learning_rate": 3.6306269786050265e-05, + "loss": 2.4288, + "step": 14426 + }, + { + "epoch": 1.1643128076829958, + "grad_norm": 0.6786738634109497, + "learning_rate": 3.629410021394087e-05, + "loss": 2.4094, + "step": 14427 + }, + { + "epoch": 1.1643935114195787, + "grad_norm": 0.7478442788124084, + "learning_rate": 3.628193222954904e-05, + "loss": 2.4163, + "step": 14428 + }, + { + "epoch": 1.1644742151561618, + "grad_norm": 0.6530766487121582, + "learning_rate": 3.626976583317803e-05, + "loss": 2.4328, + "step": 14429 + }, + { + "epoch": 1.1645549188927447, + "grad_norm": 0.6665371060371399, + "learning_rate": 3.6257601025131026e-05, + "loss": 2.4006, + "step": 14430 + }, + { + "epoch": 1.1646356226293277, + "grad_norm": 0.7184741497039795, + "learning_rate": 3.624543780571125e-05, + "loss": 2.462, + "step": 14431 + }, + { + "epoch": 1.1647163263659108, + "grad_norm": 0.7039462327957153, + "learning_rate": 3.6233276175221794e-05, + "loss": 2.4321, + "step": 14432 + }, + { + "epoch": 1.1647970301024937, + "grad_norm": 0.7039144039154053, + "learning_rate": 3.622111613396584e-05, + "loss": 2.4399, + "step": 14433 + }, + { + "epoch": 1.1648777338390768, + "grad_norm": 0.6690253615379333, + "learning_rate": 3.620895768224635e-05, + "loss": 2.3976, + "step": 14434 + }, + { + "epoch": 1.1649584375756596, + "grad_norm": 0.7048032879829407, + "learning_rate": 3.6196800820366384e-05, + "loss": 2.4848, + "step": 14435 + }, + { + "epoch": 1.1650391413122427, + "grad_norm": 0.668971836566925, + "learning_rate": 3.618464554862896e-05, + "loss": 2.4614, + "step": 14436 + }, + { + "epoch": 1.1651198450488258, + "grad_norm": 0.704858660697937, + "learning_rate": 3.617249186733695e-05, + "loss": 2.3962, + "step": 14437 + }, + { + "epoch": 1.1652005487854087, + "grad_norm": 0.692435085773468, + "learning_rate": 3.6160339776793296e-05, + "loss": 2.4059, + "step": 14438 + }, + { + "epoch": 1.1652812525219918, + "grad_norm": 0.6774182319641113, + "learning_rate": 3.614818927730085e-05, + "loss": 2.4975, + "step": 14439 + }, + { + "epoch": 1.1653619562585749, + "grad_norm": 0.6507411003112793, + "learning_rate": 3.613604036916243e-05, + "loss": 2.5029, + "step": 14440 + }, + { + "epoch": 1.1654426599951577, + "grad_norm": 0.7223206162452698, + "learning_rate": 3.612389305268084e-05, + "loss": 2.4599, + "step": 14441 + }, + { + "epoch": 1.1655233637317408, + "grad_norm": 0.6523364186286926, + "learning_rate": 3.611174732815883e-05, + "loss": 2.4521, + "step": 14442 + }, + { + "epoch": 1.165604067468324, + "grad_norm": 0.6668452024459839, + "learning_rate": 3.6099603195899046e-05, + "loss": 2.4082, + "step": 14443 + }, + { + "epoch": 1.1656847712049068, + "grad_norm": 0.6878299117088318, + "learning_rate": 3.60874606562042e-05, + "loss": 2.4144, + "step": 14444 + }, + { + "epoch": 1.1657654749414899, + "grad_norm": 0.6662277579307556, + "learning_rate": 3.6075319709376895e-05, + "loss": 2.438, + "step": 14445 + }, + { + "epoch": 1.1658461786780727, + "grad_norm": 0.721422553062439, + "learning_rate": 3.606318035571976e-05, + "loss": 2.4414, + "step": 14446 + }, + { + "epoch": 1.1659268824146558, + "grad_norm": 0.6739782691001892, + "learning_rate": 3.6051042595535264e-05, + "loss": 2.4093, + "step": 14447 + }, + { + "epoch": 1.166007586151239, + "grad_norm": 0.6890884637832642, + "learning_rate": 3.603890642912596e-05, + "loss": 2.4385, + "step": 14448 + }, + { + "epoch": 1.1660882898878218, + "grad_norm": 0.6503998637199402, + "learning_rate": 3.602677185679433e-05, + "loss": 2.4498, + "step": 14449 + }, + { + "epoch": 1.1661689936244048, + "grad_norm": 0.6748046875, + "learning_rate": 3.601463887884271e-05, + "loss": 2.3739, + "step": 14450 + }, + { + "epoch": 1.1662496973609877, + "grad_norm": 0.6843422651290894, + "learning_rate": 3.600250749557358e-05, + "loss": 2.4323, + "step": 14451 + }, + { + "epoch": 1.1663304010975708, + "grad_norm": 0.7061208486557007, + "learning_rate": 3.599037770728929e-05, + "loss": 2.4611, + "step": 14452 + }, + { + "epoch": 1.166411104834154, + "grad_norm": 0.6614537239074707, + "learning_rate": 3.597824951429208e-05, + "loss": 2.4656, + "step": 14453 + }, + { + "epoch": 1.1664918085707368, + "grad_norm": 0.6620328426361084, + "learning_rate": 3.596612291688424e-05, + "loss": 2.415, + "step": 14454 + }, + { + "epoch": 1.1665725123073198, + "grad_norm": 0.6936565041542053, + "learning_rate": 3.595399791536804e-05, + "loss": 2.4655, + "step": 14455 + }, + { + "epoch": 1.166653216043903, + "grad_norm": 0.6766063570976257, + "learning_rate": 3.594187451004559e-05, + "loss": 2.4628, + "step": 14456 + }, + { + "epoch": 1.1667339197804858, + "grad_norm": 0.6588734984397888, + "learning_rate": 3.592975270121909e-05, + "loss": 2.4503, + "step": 14457 + }, + { + "epoch": 1.1668146235170689, + "grad_norm": 0.7290894985198975, + "learning_rate": 3.591763248919062e-05, + "loss": 2.5075, + "step": 14458 + }, + { + "epoch": 1.1668953272536517, + "grad_norm": 0.6952784657478333, + "learning_rate": 3.590551387426231e-05, + "loss": 2.4258, + "step": 14459 + }, + { + "epoch": 1.1669760309902348, + "grad_norm": 0.6737042665481567, + "learning_rate": 3.5893396856736096e-05, + "loss": 2.4459, + "step": 14460 + }, + { + "epoch": 1.167056734726818, + "grad_norm": 0.6616976857185364, + "learning_rate": 3.588128143691397e-05, + "loss": 2.4726, + "step": 14461 + }, + { + "epoch": 1.1671374384634008, + "grad_norm": 0.7017171382904053, + "learning_rate": 3.5869167615098e-05, + "loss": 2.375, + "step": 14462 + }, + { + "epoch": 1.1672181421999839, + "grad_norm": 0.7153809666633606, + "learning_rate": 3.585705539158997e-05, + "loss": 2.4271, + "step": 14463 + }, + { + "epoch": 1.1672988459365667, + "grad_norm": 0.749196469783783, + "learning_rate": 3.584494476669179e-05, + "loss": 2.4713, + "step": 14464 + }, + { + "epoch": 1.1673795496731498, + "grad_norm": 0.6593676209449768, + "learning_rate": 3.583283574070533e-05, + "loss": 2.4276, + "step": 14465 + }, + { + "epoch": 1.167460253409733, + "grad_norm": 0.6949084401130676, + "learning_rate": 3.5820728313932295e-05, + "loss": 2.4128, + "step": 14466 + }, + { + "epoch": 1.1675409571463158, + "grad_norm": 0.6795482039451599, + "learning_rate": 3.5808622486674484e-05, + "loss": 2.485, + "step": 14467 + }, + { + "epoch": 1.1676216608828989, + "grad_norm": 0.6763483881950378, + "learning_rate": 3.5796518259233625e-05, + "loss": 2.4063, + "step": 14468 + }, + { + "epoch": 1.167702364619482, + "grad_norm": 0.665687620639801, + "learning_rate": 3.578441563191133e-05, + "loss": 2.437, + "step": 14469 + }, + { + "epoch": 1.1677830683560648, + "grad_norm": 0.6338435411453247, + "learning_rate": 3.577231460500926e-05, + "loss": 2.3747, + "step": 14470 + }, + { + "epoch": 1.167863772092648, + "grad_norm": 0.7031865119934082, + "learning_rate": 3.5760215178829e-05, + "loss": 2.3952, + "step": 14471 + }, + { + "epoch": 1.167944475829231, + "grad_norm": 0.7544599771499634, + "learning_rate": 3.5748117353672106e-05, + "loss": 2.3941, + "step": 14472 + }, + { + "epoch": 1.1680251795658139, + "grad_norm": 0.7271532416343689, + "learning_rate": 3.5736021129840083e-05, + "loss": 2.4371, + "step": 14473 + }, + { + "epoch": 1.168105883302397, + "grad_norm": 0.709048867225647, + "learning_rate": 3.572392650763441e-05, + "loss": 2.482, + "step": 14474 + }, + { + "epoch": 1.1681865870389798, + "grad_norm": 0.6894589066505432, + "learning_rate": 3.571183348735653e-05, + "loss": 2.4347, + "step": 14475 + }, + { + "epoch": 1.168267290775563, + "grad_norm": 0.6680620908737183, + "learning_rate": 3.5699742069307774e-05, + "loss": 2.3995, + "step": 14476 + }, + { + "epoch": 1.168347994512146, + "grad_norm": 0.701669454574585, + "learning_rate": 3.568765225378954e-05, + "loss": 2.4045, + "step": 14477 + }, + { + "epoch": 1.1684286982487289, + "grad_norm": 0.7102392911911011, + "learning_rate": 3.567556404110315e-05, + "loss": 2.4695, + "step": 14478 + }, + { + "epoch": 1.168509401985312, + "grad_norm": 0.6820430755615234, + "learning_rate": 3.566347743154982e-05, + "loss": 2.4155, + "step": 14479 + }, + { + "epoch": 1.1685901057218948, + "grad_norm": 0.6611022353172302, + "learning_rate": 3.565139242543081e-05, + "loss": 2.3992, + "step": 14480 + }, + { + "epoch": 1.168670809458478, + "grad_norm": 0.6844382882118225, + "learning_rate": 3.5639309023047306e-05, + "loss": 2.4345, + "step": 14481 + }, + { + "epoch": 1.168751513195061, + "grad_norm": 0.7557988166809082, + "learning_rate": 3.5627227224700464e-05, + "loss": 2.4454, + "step": 14482 + }, + { + "epoch": 1.1688322169316439, + "grad_norm": 0.6652555465698242, + "learning_rate": 3.5615147030691384e-05, + "loss": 2.3749, + "step": 14483 + }, + { + "epoch": 1.168912920668227, + "grad_norm": 0.6912989020347595, + "learning_rate": 3.56030684413212e-05, + "loss": 2.4737, + "step": 14484 + }, + { + "epoch": 1.16899362440481, + "grad_norm": 0.735103964805603, + "learning_rate": 3.559099145689083e-05, + "loss": 2.4098, + "step": 14485 + }, + { + "epoch": 1.169074328141393, + "grad_norm": 0.6873028874397278, + "learning_rate": 3.557891607770133e-05, + "loss": 2.4247, + "step": 14486 + }, + { + "epoch": 1.169155031877976, + "grad_norm": 0.7364680171012878, + "learning_rate": 3.556684230405367e-05, + "loss": 2.4314, + "step": 14487 + }, + { + "epoch": 1.169235735614559, + "grad_norm": 0.679122269153595, + "learning_rate": 3.55547701362487e-05, + "loss": 2.4196, + "step": 14488 + }, + { + "epoch": 1.169316439351142, + "grad_norm": 0.6783872246742249, + "learning_rate": 3.554269957458731e-05, + "loss": 2.4212, + "step": 14489 + }, + { + "epoch": 1.169397143087725, + "grad_norm": 0.7434942126274109, + "learning_rate": 3.553063061937034e-05, + "loss": 2.4139, + "step": 14490 + }, + { + "epoch": 1.1694778468243079, + "grad_norm": 0.6799852252006531, + "learning_rate": 3.55185632708986e-05, + "loss": 2.4252, + "step": 14491 + }, + { + "epoch": 1.169558550560891, + "grad_norm": 0.7040107250213623, + "learning_rate": 3.5506497529472795e-05, + "loss": 2.3937, + "step": 14492 + }, + { + "epoch": 1.169639254297474, + "grad_norm": 0.7350315451622009, + "learning_rate": 3.549443339539368e-05, + "loss": 2.4063, + "step": 14493 + }, + { + "epoch": 1.169719958034057, + "grad_norm": 0.694521963596344, + "learning_rate": 3.548237086896192e-05, + "loss": 2.4715, + "step": 14494 + }, + { + "epoch": 1.16980066177064, + "grad_norm": 0.6648221015930176, + "learning_rate": 3.5470309950478096e-05, + "loss": 2.4365, + "step": 14495 + }, + { + "epoch": 1.1698813655072229, + "grad_norm": 0.688024640083313, + "learning_rate": 3.545825064024284e-05, + "loss": 2.449, + "step": 14496 + }, + { + "epoch": 1.169962069243806, + "grad_norm": 0.6743311882019043, + "learning_rate": 3.544619293855672e-05, + "loss": 2.4283, + "step": 14497 + }, + { + "epoch": 1.170042772980389, + "grad_norm": 0.669119119644165, + "learning_rate": 3.543413684572019e-05, + "loss": 2.4363, + "step": 14498 + }, + { + "epoch": 1.170123476716972, + "grad_norm": 0.6998667120933533, + "learning_rate": 3.5422082362033745e-05, + "loss": 2.425, + "step": 14499 + }, + { + "epoch": 1.170204180453555, + "grad_norm": 0.7681630253791809, + "learning_rate": 3.5410029487797845e-05, + "loss": 2.4382, + "step": 14500 + }, + { + "epoch": 1.170284884190138, + "grad_norm": 0.6925049424171448, + "learning_rate": 3.539797822331279e-05, + "loss": 2.4261, + "step": 14501 + }, + { + "epoch": 1.170365587926721, + "grad_norm": 0.7145542502403259, + "learning_rate": 3.538592856887901e-05, + "loss": 2.4681, + "step": 14502 + }, + { + "epoch": 1.170446291663304, + "grad_norm": 0.6441611647605896, + "learning_rate": 3.537388052479684e-05, + "loss": 2.4187, + "step": 14503 + }, + { + "epoch": 1.1705269953998871, + "grad_norm": 0.6622560620307922, + "learning_rate": 3.5361834091366466e-05, + "loss": 2.4615, + "step": 14504 + }, + { + "epoch": 1.17060769913647, + "grad_norm": 0.6987677812576294, + "learning_rate": 3.5349789268888144e-05, + "loss": 2.413, + "step": 14505 + }, + { + "epoch": 1.170688402873053, + "grad_norm": 0.668358325958252, + "learning_rate": 3.533774605766207e-05, + "loss": 2.5146, + "step": 14506 + }, + { + "epoch": 1.170769106609636, + "grad_norm": 0.7514958381652832, + "learning_rate": 3.532570445798844e-05, + "loss": 2.4474, + "step": 14507 + }, + { + "epoch": 1.170849810346219, + "grad_norm": 0.6454465389251709, + "learning_rate": 3.5313664470167276e-05, + "loss": 2.3911, + "step": 14508 + }, + { + "epoch": 1.170930514082802, + "grad_norm": 0.6653602719306946, + "learning_rate": 3.5301626094498674e-05, + "loss": 2.4223, + "step": 14509 + }, + { + "epoch": 1.171011217819385, + "grad_norm": 0.6782815456390381, + "learning_rate": 3.5289589331282715e-05, + "loss": 2.457, + "step": 14510 + }, + { + "epoch": 1.171091921555968, + "grad_norm": 0.720973014831543, + "learning_rate": 3.527755418081932e-05, + "loss": 2.4541, + "step": 14511 + }, + { + "epoch": 1.171172625292551, + "grad_norm": 0.6300156712532043, + "learning_rate": 3.526552064340841e-05, + "loss": 2.4451, + "step": 14512 + }, + { + "epoch": 1.171253329029134, + "grad_norm": 0.7660964727401733, + "learning_rate": 3.5253488719350026e-05, + "loss": 2.5031, + "step": 14513 + }, + { + "epoch": 1.1713340327657171, + "grad_norm": 0.6931602358818054, + "learning_rate": 3.5241458408943905e-05, + "loss": 2.4249, + "step": 14514 + }, + { + "epoch": 1.1714147365023, + "grad_norm": 0.6863045692443848, + "learning_rate": 3.522942971248993e-05, + "loss": 2.4429, + "step": 14515 + }, + { + "epoch": 1.171495440238883, + "grad_norm": 0.6993531584739685, + "learning_rate": 3.521740263028791e-05, + "loss": 2.3864, + "step": 14516 + }, + { + "epoch": 1.1715761439754662, + "grad_norm": 0.807991087436676, + "learning_rate": 3.520537716263753e-05, + "loss": 2.459, + "step": 14517 + }, + { + "epoch": 1.171656847712049, + "grad_norm": 0.6722908020019531, + "learning_rate": 3.519335330983852e-05, + "loss": 2.4426, + "step": 14518 + }, + { + "epoch": 1.1717375514486321, + "grad_norm": 0.6934377551078796, + "learning_rate": 3.5181331072190585e-05, + "loss": 2.4326, + "step": 14519 + }, + { + "epoch": 1.171818255185215, + "grad_norm": 0.6532938480377197, + "learning_rate": 3.516931044999329e-05, + "loss": 2.3778, + "step": 14520 + }, + { + "epoch": 1.171898958921798, + "grad_norm": 0.6779183745384216, + "learning_rate": 3.5157291443546247e-05, + "loss": 2.4089, + "step": 14521 + }, + { + "epoch": 1.1719796626583812, + "grad_norm": 0.687005877494812, + "learning_rate": 3.514527405314899e-05, + "loss": 2.4669, + "step": 14522 + }, + { + "epoch": 1.172060366394964, + "grad_norm": 0.6804830431938171, + "learning_rate": 3.5133258279101045e-05, + "loss": 2.4789, + "step": 14523 + }, + { + "epoch": 1.1721410701315471, + "grad_norm": 0.8345538973808289, + "learning_rate": 3.512124412170187e-05, + "loss": 2.4506, + "step": 14524 + }, + { + "epoch": 1.17222177386813, + "grad_norm": 0.6571901440620422, + "learning_rate": 3.510923158125088e-05, + "loss": 2.4911, + "step": 14525 + }, + { + "epoch": 1.172302477604713, + "grad_norm": 0.6607047915458679, + "learning_rate": 3.5097220658047504e-05, + "loss": 2.4882, + "step": 14526 + }, + { + "epoch": 1.1723831813412962, + "grad_norm": 0.6883669495582581, + "learning_rate": 3.508521135239101e-05, + "loss": 2.4083, + "step": 14527 + }, + { + "epoch": 1.172463885077879, + "grad_norm": 0.6792941689491272, + "learning_rate": 3.5073203664580746e-05, + "loss": 2.368, + "step": 14528 + }, + { + "epoch": 1.172544588814462, + "grad_norm": 0.6675198674201965, + "learning_rate": 3.506119759491598e-05, + "loss": 2.4193, + "step": 14529 + }, + { + "epoch": 1.1726252925510452, + "grad_norm": 0.7267464399337769, + "learning_rate": 3.504919314369591e-05, + "loss": 2.3906, + "step": 14530 + }, + { + "epoch": 1.172705996287628, + "grad_norm": 0.6927710175514221, + "learning_rate": 3.503719031121973e-05, + "loss": 2.4082, + "step": 14531 + }, + { + "epoch": 1.1727867000242111, + "grad_norm": 0.7231000065803528, + "learning_rate": 3.502518909778656e-05, + "loss": 2.4845, + "step": 14532 + }, + { + "epoch": 1.1728674037607942, + "grad_norm": 0.7087520360946655, + "learning_rate": 3.5013189503695544e-05, + "loss": 2.4622, + "step": 14533 + }, + { + "epoch": 1.172948107497377, + "grad_norm": 0.6669846177101135, + "learning_rate": 3.5001191529245716e-05, + "loss": 2.4151, + "step": 14534 + }, + { + "epoch": 1.1730288112339602, + "grad_norm": 0.7338447570800781, + "learning_rate": 3.4989195174736134e-05, + "loss": 2.4274, + "step": 14535 + }, + { + "epoch": 1.173109514970543, + "grad_norm": 0.7032054662704468, + "learning_rate": 3.497720044046572e-05, + "loss": 2.4066, + "step": 14536 + }, + { + "epoch": 1.1731902187071261, + "grad_norm": 0.6571083068847656, + "learning_rate": 3.496520732673344e-05, + "loss": 2.4581, + "step": 14537 + }, + { + "epoch": 1.1732709224437092, + "grad_norm": 0.6618444919586182, + "learning_rate": 3.495321583383819e-05, + "loss": 2.3675, + "step": 14538 + }, + { + "epoch": 1.173351626180292, + "grad_norm": 0.6597652435302734, + "learning_rate": 3.4941225962078885e-05, + "loss": 2.416, + "step": 14539 + }, + { + "epoch": 1.1734323299168752, + "grad_norm": 0.682634711265564, + "learning_rate": 3.492923771175425e-05, + "loss": 2.5081, + "step": 14540 + }, + { + "epoch": 1.173513033653458, + "grad_norm": 0.7046132683753967, + "learning_rate": 3.49172510831631e-05, + "loss": 2.4439, + "step": 14541 + }, + { + "epoch": 1.1735937373900411, + "grad_norm": 0.6734833717346191, + "learning_rate": 3.4905266076604196e-05, + "loss": 2.4348, + "step": 14542 + }, + { + "epoch": 1.1736744411266242, + "grad_norm": 0.6624744534492493, + "learning_rate": 3.4893282692376214e-05, + "loss": 2.4364, + "step": 14543 + }, + { + "epoch": 1.173755144863207, + "grad_norm": 0.8425754308700562, + "learning_rate": 3.4881300930777815e-05, + "loss": 2.4803, + "step": 14544 + }, + { + "epoch": 1.1738358485997902, + "grad_norm": 0.6438888311386108, + "learning_rate": 3.486932079210766e-05, + "loss": 2.3973, + "step": 14545 + }, + { + "epoch": 1.1739165523363733, + "grad_norm": 0.650399923324585, + "learning_rate": 3.485734227666424e-05, + "loss": 2.4183, + "step": 14546 + }, + { + "epoch": 1.1739972560729561, + "grad_norm": 0.6857002973556519, + "learning_rate": 3.4845365384746144e-05, + "loss": 2.4061, + "step": 14547 + }, + { + "epoch": 1.1740779598095392, + "grad_norm": 0.6680994629859924, + "learning_rate": 3.483339011665189e-05, + "loss": 2.421, + "step": 14548 + }, + { + "epoch": 1.1741586635461223, + "grad_norm": 0.6440950632095337, + "learning_rate": 3.482141647267987e-05, + "loss": 2.3914, + "step": 14549 + }, + { + "epoch": 1.1742393672827052, + "grad_norm": 0.7329740524291992, + "learning_rate": 3.480944445312853e-05, + "loss": 2.4805, + "step": 14550 + }, + { + "epoch": 1.1743200710192883, + "grad_norm": 0.6848189234733582, + "learning_rate": 3.4797474058296245e-05, + "loss": 2.3611, + "step": 14551 + }, + { + "epoch": 1.1744007747558711, + "grad_norm": 0.6994072794914246, + "learning_rate": 3.478550528848134e-05, + "loss": 2.5106, + "step": 14552 + }, + { + "epoch": 1.1744814784924542, + "grad_norm": 0.6826444268226624, + "learning_rate": 3.477353814398212e-05, + "loss": 2.467, + "step": 14553 + }, + { + "epoch": 1.1745621822290373, + "grad_norm": 0.6658408045768738, + "learning_rate": 3.476157262509683e-05, + "loss": 2.423, + "step": 14554 + }, + { + "epoch": 1.1746428859656202, + "grad_norm": 0.6963697075843811, + "learning_rate": 3.474960873212372e-05, + "loss": 2.457, + "step": 14555 + }, + { + "epoch": 1.1747235897022033, + "grad_norm": 0.7574479579925537, + "learning_rate": 3.4737646465360894e-05, + "loss": 2.4292, + "step": 14556 + }, + { + "epoch": 1.1748042934387861, + "grad_norm": 0.7494931817054749, + "learning_rate": 3.472568582510652e-05, + "loss": 2.4395, + "step": 14557 + }, + { + "epoch": 1.1748849971753692, + "grad_norm": 0.7062687873840332, + "learning_rate": 3.471372681165872e-05, + "loss": 2.4561, + "step": 14558 + }, + { + "epoch": 1.1749657009119523, + "grad_norm": 0.6875349879264832, + "learning_rate": 3.4701769425315465e-05, + "loss": 2.4728, + "step": 14559 + }, + { + "epoch": 1.1750464046485352, + "grad_norm": 0.7009960412979126, + "learning_rate": 3.46898136663748e-05, + "loss": 2.5364, + "step": 14560 + }, + { + "epoch": 1.1751271083851182, + "grad_norm": 0.673791766166687, + "learning_rate": 3.467785953513475e-05, + "loss": 2.4611, + "step": 14561 + }, + { + "epoch": 1.1752078121217013, + "grad_norm": 0.7166882753372192, + "learning_rate": 3.4665907031893164e-05, + "loss": 2.4451, + "step": 14562 + }, + { + "epoch": 1.1752885158582842, + "grad_norm": 0.6868429780006409, + "learning_rate": 3.465395615694791e-05, + "loss": 2.4282, + "step": 14563 + }, + { + "epoch": 1.1753692195948673, + "grad_norm": 0.7212893962860107, + "learning_rate": 3.464200691059697e-05, + "loss": 2.4239, + "step": 14564 + }, + { + "epoch": 1.1754499233314502, + "grad_norm": 0.7213432192802429, + "learning_rate": 3.463005929313802e-05, + "loss": 2.4872, + "step": 14565 + }, + { + "epoch": 1.1755306270680332, + "grad_norm": 0.6805179119110107, + "learning_rate": 3.461811330486887e-05, + "loss": 2.4192, + "step": 14566 + }, + { + "epoch": 1.1756113308046163, + "grad_norm": 0.6746333241462708, + "learning_rate": 3.460616894608725e-05, + "loss": 2.3911, + "step": 14567 + }, + { + "epoch": 1.1756920345411992, + "grad_norm": 0.7388630509376526, + "learning_rate": 3.459422621709088e-05, + "loss": 2.4758, + "step": 14568 + }, + { + "epoch": 1.1757727382777823, + "grad_norm": 0.7730274200439453, + "learning_rate": 3.458228511817731e-05, + "loss": 2.4159, + "step": 14569 + }, + { + "epoch": 1.1758534420143651, + "grad_norm": 0.721075177192688, + "learning_rate": 3.457034564964422e-05, + "loss": 2.4673, + "step": 14570 + }, + { + "epoch": 1.1759341457509482, + "grad_norm": 0.6647645235061646, + "learning_rate": 3.4558407811789184e-05, + "loss": 2.395, + "step": 14571 + }, + { + "epoch": 1.1760148494875313, + "grad_norm": 0.7155466675758362, + "learning_rate": 3.454647160490965e-05, + "loss": 2.503, + "step": 14572 + }, + { + "epoch": 1.1760955532241142, + "grad_norm": 0.6789268851280212, + "learning_rate": 3.453453702930314e-05, + "loss": 2.401, + "step": 14573 + }, + { + "epoch": 1.1761762569606973, + "grad_norm": 0.7488093376159668, + "learning_rate": 3.4522604085267105e-05, + "loss": 2.4434, + "step": 14574 + }, + { + "epoch": 1.1762569606972804, + "grad_norm": 0.7954889535903931, + "learning_rate": 3.451067277309893e-05, + "loss": 2.5302, + "step": 14575 + }, + { + "epoch": 1.1763376644338632, + "grad_norm": 0.7008484601974487, + "learning_rate": 3.4498743093095975e-05, + "loss": 2.3935, + "step": 14576 + }, + { + "epoch": 1.1764183681704463, + "grad_norm": 0.6725437641143799, + "learning_rate": 3.448681504555561e-05, + "loss": 2.399, + "step": 14577 + }, + { + "epoch": 1.1764990719070294, + "grad_norm": 0.6778931617736816, + "learning_rate": 3.4474888630775026e-05, + "loss": 2.4178, + "step": 14578 + }, + { + "epoch": 1.1765797756436123, + "grad_norm": 0.7043762803077698, + "learning_rate": 3.44629638490515e-05, + "loss": 2.5581, + "step": 14579 + }, + { + "epoch": 1.1766604793801954, + "grad_norm": 0.6848085522651672, + "learning_rate": 3.445104070068227e-05, + "loss": 2.436, + "step": 14580 + }, + { + "epoch": 1.1767411831167782, + "grad_norm": 0.7504082322120667, + "learning_rate": 3.443911918596441e-05, + "loss": 2.4138, + "step": 14581 + }, + { + "epoch": 1.1768218868533613, + "grad_norm": 0.7441161870956421, + "learning_rate": 3.442719930519508e-05, + "loss": 2.4333, + "step": 14582 + }, + { + "epoch": 1.1769025905899444, + "grad_norm": 0.663894772529602, + "learning_rate": 3.4415281058671354e-05, + "loss": 2.4672, + "step": 14583 + }, + { + "epoch": 1.1769832943265273, + "grad_norm": 0.6814345121383667, + "learning_rate": 3.440336444669027e-05, + "loss": 2.4196, + "step": 14584 + }, + { + "epoch": 1.1770639980631104, + "grad_norm": 0.7566598057746887, + "learning_rate": 3.439144946954881e-05, + "loss": 2.4586, + "step": 14585 + }, + { + "epoch": 1.1771447017996932, + "grad_norm": 0.7324996590614319, + "learning_rate": 3.4379536127543934e-05, + "loss": 2.4286, + "step": 14586 + }, + { + "epoch": 1.1772254055362763, + "grad_norm": 0.6632608771324158, + "learning_rate": 3.436762442097259e-05, + "loss": 2.4713, + "step": 14587 + }, + { + "epoch": 1.1773061092728594, + "grad_norm": 0.7246156930923462, + "learning_rate": 3.4355714350131564e-05, + "loss": 2.4374, + "step": 14588 + }, + { + "epoch": 1.1773868130094423, + "grad_norm": 0.7096351981163025, + "learning_rate": 3.4343805915317737e-05, + "loss": 2.4649, + "step": 14589 + }, + { + "epoch": 1.1774675167460253, + "grad_norm": 0.7090620398521423, + "learning_rate": 3.433189911682793e-05, + "loss": 2.396, + "step": 14590 + }, + { + "epoch": 1.1775482204826084, + "grad_norm": 0.7782440185546875, + "learning_rate": 3.431999395495882e-05, + "loss": 2.4506, + "step": 14591 + }, + { + "epoch": 1.1776289242191913, + "grad_norm": 0.6933457851409912, + "learning_rate": 3.4308090430007155e-05, + "loss": 2.3985, + "step": 14592 + }, + { + "epoch": 1.1777096279557744, + "grad_norm": 0.6935414671897888, + "learning_rate": 3.429618854226959e-05, + "loss": 2.4372, + "step": 14593 + }, + { + "epoch": 1.1777903316923575, + "grad_norm": 0.6971156597137451, + "learning_rate": 3.428428829204276e-05, + "loss": 2.4837, + "step": 14594 + }, + { + "epoch": 1.1778710354289403, + "grad_norm": 0.6460022926330566, + "learning_rate": 3.427238967962325e-05, + "loss": 2.3742, + "step": 14595 + }, + { + "epoch": 1.1779517391655234, + "grad_norm": 0.6941941976547241, + "learning_rate": 3.426049270530763e-05, + "loss": 2.4706, + "step": 14596 + }, + { + "epoch": 1.1780324429021063, + "grad_norm": 0.7062166333198547, + "learning_rate": 3.424859736939236e-05, + "loss": 2.3893, + "step": 14597 + }, + { + "epoch": 1.1781131466386894, + "grad_norm": 0.6586433053016663, + "learning_rate": 3.42367036721739e-05, + "loss": 2.4385, + "step": 14598 + }, + { + "epoch": 1.1781938503752725, + "grad_norm": 0.6781242489814758, + "learning_rate": 3.422481161394869e-05, + "loss": 2.3876, + "step": 14599 + }, + { + "epoch": 1.1782745541118553, + "grad_norm": 0.710127592086792, + "learning_rate": 3.421292119501316e-05, + "loss": 2.4067, + "step": 14600 + }, + { + "epoch": 1.1783552578484384, + "grad_norm": 0.6856096982955933, + "learning_rate": 3.420103241566357e-05, + "loss": 2.4855, + "step": 14601 + }, + { + "epoch": 1.1784359615850213, + "grad_norm": 0.7173380851745605, + "learning_rate": 3.4189145276196245e-05, + "loss": 2.4871, + "step": 14602 + }, + { + "epoch": 1.1785166653216044, + "grad_norm": 0.6895382404327393, + "learning_rate": 3.417725977690745e-05, + "loss": 2.4066, + "step": 14603 + }, + { + "epoch": 1.1785973690581875, + "grad_norm": 0.7417690753936768, + "learning_rate": 3.416537591809341e-05, + "loss": 2.3779, + "step": 14604 + }, + { + "epoch": 1.1786780727947703, + "grad_norm": 0.7258411049842834, + "learning_rate": 3.4153493700050286e-05, + "loss": 2.4334, + "step": 14605 + }, + { + "epoch": 1.1787587765313534, + "grad_norm": 0.65704345703125, + "learning_rate": 3.414161312307427e-05, + "loss": 2.4531, + "step": 14606 + }, + { + "epoch": 1.1788394802679365, + "grad_norm": 0.6937118172645569, + "learning_rate": 3.4129734187461374e-05, + "loss": 2.4562, + "step": 14607 + }, + { + "epoch": 1.1789201840045194, + "grad_norm": 0.7331998348236084, + "learning_rate": 3.411785689350768e-05, + "loss": 2.4418, + "step": 14608 + }, + { + "epoch": 1.1790008877411025, + "grad_norm": 0.666582465171814, + "learning_rate": 3.410598124150924e-05, + "loss": 2.4154, + "step": 14609 + }, + { + "epoch": 1.1790815914776853, + "grad_norm": 0.6684321165084839, + "learning_rate": 3.409410723176197e-05, + "loss": 2.4155, + "step": 14610 + }, + { + "epoch": 1.1791622952142684, + "grad_norm": 0.6413382291793823, + "learning_rate": 3.408223486456184e-05, + "loss": 2.3924, + "step": 14611 + }, + { + "epoch": 1.1792429989508515, + "grad_norm": 0.7081305384635925, + "learning_rate": 3.407036414020475e-05, + "loss": 2.3811, + "step": 14612 + }, + { + "epoch": 1.1793237026874344, + "grad_norm": 0.7550063133239746, + "learning_rate": 3.405849505898645e-05, + "loss": 2.4425, + "step": 14613 + }, + { + "epoch": 1.1794044064240174, + "grad_norm": 0.677200198173523, + "learning_rate": 3.404662762120288e-05, + "loss": 2.5182, + "step": 14614 + }, + { + "epoch": 1.1794851101606003, + "grad_norm": 0.6829770803451538, + "learning_rate": 3.4034761827149745e-05, + "loss": 2.5068, + "step": 14615 + }, + { + "epoch": 1.1795658138971834, + "grad_norm": 0.7069409489631653, + "learning_rate": 3.4022897677122815e-05, + "loss": 2.4449, + "step": 14616 + }, + { + "epoch": 1.1796465176337665, + "grad_norm": 0.6604448556900024, + "learning_rate": 3.4011035171417696e-05, + "loss": 2.3996, + "step": 14617 + }, + { + "epoch": 1.1797272213703494, + "grad_norm": 0.6577324271202087, + "learning_rate": 3.3999174310330084e-05, + "loss": 2.4723, + "step": 14618 + }, + { + "epoch": 1.1798079251069324, + "grad_norm": 0.8159187436103821, + "learning_rate": 3.398731509415561e-05, + "loss": 2.4655, + "step": 14619 + }, + { + "epoch": 1.1798886288435155, + "grad_norm": 0.7170652747154236, + "learning_rate": 3.397545752318977e-05, + "loss": 2.5095, + "step": 14620 + }, + { + "epoch": 1.1799693325800984, + "grad_norm": 0.6865009665489197, + "learning_rate": 3.396360159772812e-05, + "loss": 2.4358, + "step": 14621 + }, + { + "epoch": 1.1800500363166815, + "grad_norm": 0.6485020518302917, + "learning_rate": 3.3951747318066175e-05, + "loss": 2.4576, + "step": 14622 + }, + { + "epoch": 1.1801307400532646, + "grad_norm": 0.6626582145690918, + "learning_rate": 3.39398946844993e-05, + "loss": 2.4824, + "step": 14623 + }, + { + "epoch": 1.1802114437898474, + "grad_norm": 0.718588650226593, + "learning_rate": 3.392804369732293e-05, + "loss": 2.4211, + "step": 14624 + }, + { + "epoch": 1.1802921475264305, + "grad_norm": 0.7449582815170288, + "learning_rate": 3.391619435683243e-05, + "loss": 2.444, + "step": 14625 + }, + { + "epoch": 1.1803728512630134, + "grad_norm": 0.6988492012023926, + "learning_rate": 3.3904346663323115e-05, + "loss": 2.4262, + "step": 14626 + }, + { + "epoch": 1.1804535549995965, + "grad_norm": 0.6779490113258362, + "learning_rate": 3.389250061709025e-05, + "loss": 2.4751, + "step": 14627 + }, + { + "epoch": 1.1805342587361796, + "grad_norm": 0.6883673667907715, + "learning_rate": 3.388065621842912e-05, + "loss": 2.4995, + "step": 14628 + }, + { + "epoch": 1.1806149624727624, + "grad_norm": 0.7112017273902893, + "learning_rate": 3.386881346763483e-05, + "loss": 2.4181, + "step": 14629 + }, + { + "epoch": 1.1806956662093455, + "grad_norm": 0.6960459351539612, + "learning_rate": 3.385697236500258e-05, + "loss": 2.4888, + "step": 14630 + }, + { + "epoch": 1.1807763699459284, + "grad_norm": 0.6874156594276428, + "learning_rate": 3.3845132910827484e-05, + "loss": 2.4175, + "step": 14631 + }, + { + "epoch": 1.1808570736825115, + "grad_norm": 0.7075642347335815, + "learning_rate": 3.383329510540463e-05, + "loss": 2.4315, + "step": 14632 + }, + { + "epoch": 1.1809377774190946, + "grad_norm": 0.674907386302948, + "learning_rate": 3.3821458949028995e-05, + "loss": 2.4216, + "step": 14633 + }, + { + "epoch": 1.1810184811556774, + "grad_norm": 0.7008463740348816, + "learning_rate": 3.380962444199559e-05, + "loss": 2.4114, + "step": 14634 + }, + { + "epoch": 1.1810991848922605, + "grad_norm": 0.6784217953681946, + "learning_rate": 3.379779158459937e-05, + "loss": 2.3663, + "step": 14635 + }, + { + "epoch": 1.1811798886288436, + "grad_norm": 0.7174829244613647, + "learning_rate": 3.378596037713525e-05, + "loss": 2.4582, + "step": 14636 + }, + { + "epoch": 1.1812605923654265, + "grad_norm": 0.7106035947799683, + "learning_rate": 3.3774130819898065e-05, + "loss": 2.5095, + "step": 14637 + }, + { + "epoch": 1.1813412961020096, + "grad_norm": 0.809107780456543, + "learning_rate": 3.3762302913182696e-05, + "loss": 2.4942, + "step": 14638 + }, + { + "epoch": 1.1814219998385926, + "grad_norm": 0.7150272727012634, + "learning_rate": 3.375047665728386e-05, + "loss": 2.378, + "step": 14639 + }, + { + "epoch": 1.1815027035751755, + "grad_norm": 0.7016271352767944, + "learning_rate": 3.373865205249632e-05, + "loss": 2.4393, + "step": 14640 + }, + { + "epoch": 1.1815834073117586, + "grad_norm": 0.6387282013893127, + "learning_rate": 3.372682909911481e-05, + "loss": 2.4399, + "step": 14641 + }, + { + "epoch": 1.1816641110483415, + "grad_norm": 0.834181010723114, + "learning_rate": 3.371500779743393e-05, + "loss": 2.4312, + "step": 14642 + }, + { + "epoch": 1.1817448147849245, + "grad_norm": 0.6690472960472107, + "learning_rate": 3.370318814774832e-05, + "loss": 2.407, + "step": 14643 + }, + { + "epoch": 1.1818255185215076, + "grad_norm": 0.6594302654266357, + "learning_rate": 3.369137015035256e-05, + "loss": 2.4275, + "step": 14644 + }, + { + "epoch": 1.1819062222580905, + "grad_norm": 0.7284699082374573, + "learning_rate": 3.3679553805541194e-05, + "loss": 2.3981, + "step": 14645 + }, + { + "epoch": 1.1819869259946736, + "grad_norm": 0.7109572291374207, + "learning_rate": 3.366773911360871e-05, + "loss": 2.4345, + "step": 14646 + }, + { + "epoch": 1.1820676297312565, + "grad_norm": 0.6874241828918457, + "learning_rate": 3.3655926074849566e-05, + "loss": 2.4488, + "step": 14647 + }, + { + "epoch": 1.1821483334678395, + "grad_norm": 0.6698973178863525, + "learning_rate": 3.364411468955819e-05, + "loss": 2.42, + "step": 14648 + }, + { + "epoch": 1.1822290372044226, + "grad_norm": 0.7816089391708374, + "learning_rate": 3.3632304958028915e-05, + "loss": 2.4638, + "step": 14649 + }, + { + "epoch": 1.1823097409410055, + "grad_norm": 0.6718220710754395, + "learning_rate": 3.3620496880556075e-05, + "loss": 2.413, + "step": 14650 + }, + { + "epoch": 1.1823904446775886, + "grad_norm": 0.753463089466095, + "learning_rate": 3.360869045743401e-05, + "loss": 2.3772, + "step": 14651 + }, + { + "epoch": 1.1824711484141717, + "grad_norm": 0.7031456828117371, + "learning_rate": 3.359688568895689e-05, + "loss": 2.4198, + "step": 14652 + }, + { + "epoch": 1.1825518521507545, + "grad_norm": 0.7857323288917542, + "learning_rate": 3.358508257541897e-05, + "loss": 2.4223, + "step": 14653 + }, + { + "epoch": 1.1826325558873376, + "grad_norm": 0.7779297828674316, + "learning_rate": 3.357328111711439e-05, + "loss": 2.5266, + "step": 14654 + }, + { + "epoch": 1.1827132596239207, + "grad_norm": 0.7382386326789856, + "learning_rate": 3.356148131433728e-05, + "loss": 2.4673, + "step": 14655 + }, + { + "epoch": 1.1827939633605036, + "grad_norm": 0.7868054509162903, + "learning_rate": 3.354968316738174e-05, + "loss": 2.4285, + "step": 14656 + }, + { + "epoch": 1.1828746670970867, + "grad_norm": 0.7007591724395752, + "learning_rate": 3.353788667654183e-05, + "loss": 2.4054, + "step": 14657 + }, + { + "epoch": 1.1829553708336695, + "grad_norm": 0.6627741456031799, + "learning_rate": 3.352609184211148e-05, + "loss": 2.4224, + "step": 14658 + }, + { + "epoch": 1.1830360745702526, + "grad_norm": 0.6865360736846924, + "learning_rate": 3.351429866438469e-05, + "loss": 2.4084, + "step": 14659 + }, + { + "epoch": 1.1831167783068357, + "grad_norm": 0.7572095990180969, + "learning_rate": 3.3502507143655404e-05, + "loss": 2.4339, + "step": 14660 + }, + { + "epoch": 1.1831974820434186, + "grad_norm": 0.6907969117164612, + "learning_rate": 3.349071728021743e-05, + "loss": 2.4578, + "step": 14661 + }, + { + "epoch": 1.1832781857800017, + "grad_norm": 0.6618743538856506, + "learning_rate": 3.347892907436465e-05, + "loss": 2.4131, + "step": 14662 + }, + { + "epoch": 1.1833588895165845, + "grad_norm": 0.777159571647644, + "learning_rate": 3.346714252639084e-05, + "loss": 2.419, + "step": 14663 + }, + { + "epoch": 1.1834395932531676, + "grad_norm": 0.666344165802002, + "learning_rate": 3.345535763658975e-05, + "loss": 2.4155, + "step": 14664 + }, + { + "epoch": 1.1835202969897507, + "grad_norm": 0.708848774433136, + "learning_rate": 3.3443574405255095e-05, + "loss": 2.4794, + "step": 14665 + }, + { + "epoch": 1.1836010007263336, + "grad_norm": 0.7247438430786133, + "learning_rate": 3.3431792832680555e-05, + "loss": 2.4445, + "step": 14666 + }, + { + "epoch": 1.1836817044629167, + "grad_norm": 0.6870034337043762, + "learning_rate": 3.342001291915978e-05, + "loss": 2.4309, + "step": 14667 + }, + { + "epoch": 1.1837624081994997, + "grad_norm": 0.7088049650192261, + "learning_rate": 3.340823466498629e-05, + "loss": 2.4456, + "step": 14668 + }, + { + "epoch": 1.1838431119360826, + "grad_norm": 0.695148229598999, + "learning_rate": 3.3396458070453676e-05, + "loss": 2.4018, + "step": 14669 + }, + { + "epoch": 1.1839238156726657, + "grad_norm": 0.7947117686271667, + "learning_rate": 3.3384683135855444e-05, + "loss": 2.4099, + "step": 14670 + }, + { + "epoch": 1.1840045194092486, + "grad_norm": 0.7268195748329163, + "learning_rate": 3.337290986148502e-05, + "loss": 2.3955, + "step": 14671 + }, + { + "epoch": 1.1840852231458316, + "grad_norm": 0.6932024955749512, + "learning_rate": 3.336113824763585e-05, + "loss": 2.4046, + "step": 14672 + }, + { + "epoch": 1.1841659268824147, + "grad_norm": 0.7408114671707153, + "learning_rate": 3.3349368294601334e-05, + "loss": 2.4186, + "step": 14673 + }, + { + "epoch": 1.1842466306189976, + "grad_norm": 0.6678428053855896, + "learning_rate": 3.3337600002674765e-05, + "loss": 2.4324, + "step": 14674 + }, + { + "epoch": 1.1843273343555807, + "grad_norm": 0.7221381664276123, + "learning_rate": 3.3325833372149416e-05, + "loss": 2.4474, + "step": 14675 + }, + { + "epoch": 1.1844080380921636, + "grad_norm": 0.6971224546432495, + "learning_rate": 3.3314068403318654e-05, + "loss": 2.4197, + "step": 14676 + }, + { + "epoch": 1.1844887418287466, + "grad_norm": 0.65053391456604, + "learning_rate": 3.3302305096475604e-05, + "loss": 2.4169, + "step": 14677 + }, + { + "epoch": 1.1845694455653297, + "grad_norm": 0.7231155633926392, + "learning_rate": 3.3290543451913457e-05, + "loss": 2.4222, + "step": 14678 + }, + { + "epoch": 1.1846501493019126, + "grad_norm": 0.6458824872970581, + "learning_rate": 3.3278783469925345e-05, + "loss": 2.422, + "step": 14679 + }, + { + "epoch": 1.1847308530384957, + "grad_norm": 0.6783488392829895, + "learning_rate": 3.32670251508044e-05, + "loss": 2.4231, + "step": 14680 + }, + { + "epoch": 1.1848115567750788, + "grad_norm": 0.6742293238639832, + "learning_rate": 3.3255268494843586e-05, + "loss": 2.409, + "step": 14681 + }, + { + "epoch": 1.1848922605116616, + "grad_norm": 0.7455186247825623, + "learning_rate": 3.3243513502335956e-05, + "loss": 2.4121, + "step": 14682 + }, + { + "epoch": 1.1849729642482447, + "grad_norm": 0.7042234539985657, + "learning_rate": 3.323176017357451e-05, + "loss": 2.4574, + "step": 14683 + }, + { + "epoch": 1.1850536679848278, + "grad_norm": 0.7897992134094238, + "learning_rate": 3.3220008508852094e-05, + "loss": 2.4796, + "step": 14684 + }, + { + "epoch": 1.1851343717214107, + "grad_norm": 0.6894058585166931, + "learning_rate": 3.3208258508461644e-05, + "loss": 2.4125, + "step": 14685 + }, + { + "epoch": 1.1852150754579938, + "grad_norm": 0.7574072480201721, + "learning_rate": 3.319651017269597e-05, + "loss": 2.4714, + "step": 14686 + }, + { + "epoch": 1.1852957791945766, + "grad_norm": 0.7457531094551086, + "learning_rate": 3.3184763501847905e-05, + "loss": 2.4793, + "step": 14687 + }, + { + "epoch": 1.1853764829311597, + "grad_norm": 0.6819709539413452, + "learning_rate": 3.317301849621018e-05, + "loss": 2.4563, + "step": 14688 + }, + { + "epoch": 1.1854571866677428, + "grad_norm": 0.6998026371002197, + "learning_rate": 3.316127515607555e-05, + "loss": 2.4548, + "step": 14689 + }, + { + "epoch": 1.1855378904043257, + "grad_norm": 0.7148768305778503, + "learning_rate": 3.314953348173664e-05, + "loss": 2.4897, + "step": 14690 + }, + { + "epoch": 1.1856185941409088, + "grad_norm": 0.6581987738609314, + "learning_rate": 3.31377934734861e-05, + "loss": 2.4683, + "step": 14691 + }, + { + "epoch": 1.1856992978774916, + "grad_norm": 0.7493093609809875, + "learning_rate": 3.312605513161653e-05, + "loss": 2.4564, + "step": 14692 + }, + { + "epoch": 1.1857800016140747, + "grad_norm": 0.7095562219619751, + "learning_rate": 3.311431845642051e-05, + "loss": 2.4595, + "step": 14693 + }, + { + "epoch": 1.1858607053506578, + "grad_norm": 0.8045323491096497, + "learning_rate": 3.310258344819047e-05, + "loss": 2.5044, + "step": 14694 + }, + { + "epoch": 1.1859414090872407, + "grad_norm": 0.7381219267845154, + "learning_rate": 3.3090850107218943e-05, + "loss": 2.415, + "step": 14695 + }, + { + "epoch": 1.1860221128238237, + "grad_norm": 0.6859883069992065, + "learning_rate": 3.307911843379832e-05, + "loss": 2.4314, + "step": 14696 + }, + { + "epoch": 1.1861028165604068, + "grad_norm": 0.7084196209907532, + "learning_rate": 3.306738842822099e-05, + "loss": 2.4404, + "step": 14697 + }, + { + "epoch": 1.1861835202969897, + "grad_norm": 0.6964806318283081, + "learning_rate": 3.305566009077932e-05, + "loss": 2.4391, + "step": 14698 + }, + { + "epoch": 1.1862642240335728, + "grad_norm": 0.7272049188613892, + "learning_rate": 3.304393342176562e-05, + "loss": 2.4395, + "step": 14699 + }, + { + "epoch": 1.1863449277701559, + "grad_norm": 0.6651458144187927, + "learning_rate": 3.303220842147209e-05, + "loss": 2.4059, + "step": 14700 + }, + { + "epoch": 1.1864256315067387, + "grad_norm": 0.7599130868911743, + "learning_rate": 3.302048509019099e-05, + "loss": 2.5044, + "step": 14701 + }, + { + "epoch": 1.1865063352433218, + "grad_norm": 0.6694391965866089, + "learning_rate": 3.3008763428214505e-05, + "loss": 2.4817, + "step": 14702 + }, + { + "epoch": 1.1865870389799047, + "grad_norm": 0.7176856398582458, + "learning_rate": 3.299704343583473e-05, + "loss": 2.4702, + "step": 14703 + }, + { + "epoch": 1.1866677427164878, + "grad_norm": 0.7133145332336426, + "learning_rate": 3.298532511334378e-05, + "loss": 2.4685, + "step": 14704 + }, + { + "epoch": 1.1867484464530709, + "grad_norm": 0.7170277833938599, + "learning_rate": 3.297360846103371e-05, + "loss": 2.4203, + "step": 14705 + }, + { + "epoch": 1.1868291501896537, + "grad_norm": 0.6853376626968384, + "learning_rate": 3.296189347919652e-05, + "loss": 2.4067, + "step": 14706 + }, + { + "epoch": 1.1869098539262368, + "grad_norm": 0.7269156575202942, + "learning_rate": 3.2950180168124175e-05, + "loss": 2.4211, + "step": 14707 + }, + { + "epoch": 1.1869905576628197, + "grad_norm": 0.8649005889892578, + "learning_rate": 3.2938468528108626e-05, + "loss": 2.4611, + "step": 14708 + }, + { + "epoch": 1.1870712613994028, + "grad_norm": 0.7256221771240234, + "learning_rate": 3.292675855944177e-05, + "loss": 2.4618, + "step": 14709 + }, + { + "epoch": 1.1871519651359859, + "grad_norm": 0.6854279637336731, + "learning_rate": 3.291505026241539e-05, + "loss": 2.4466, + "step": 14710 + }, + { + "epoch": 1.1872326688725687, + "grad_norm": 0.7182712554931641, + "learning_rate": 3.2903343637321316e-05, + "loss": 2.4847, + "step": 14711 + }, + { + "epoch": 1.1873133726091518, + "grad_norm": 0.6795300841331482, + "learning_rate": 3.289163868445134e-05, + "loss": 2.4407, + "step": 14712 + }, + { + "epoch": 1.187394076345735, + "grad_norm": 0.685146689414978, + "learning_rate": 3.287993540409713e-05, + "loss": 2.4537, + "step": 14713 + }, + { + "epoch": 1.1874747800823178, + "grad_norm": 0.7891005873680115, + "learning_rate": 3.2868233796550375e-05, + "loss": 2.4085, + "step": 14714 + }, + { + "epoch": 1.1875554838189009, + "grad_norm": 0.6521769762039185, + "learning_rate": 3.2856533862102724e-05, + "loss": 2.4174, + "step": 14715 + }, + { + "epoch": 1.1876361875554837, + "grad_norm": 0.7486612200737, + "learning_rate": 3.284483560104575e-05, + "loss": 2.4072, + "step": 14716 + }, + { + "epoch": 1.1877168912920668, + "grad_norm": 0.6895913481712341, + "learning_rate": 3.283313901367103e-05, + "loss": 2.4398, + "step": 14717 + }, + { + "epoch": 1.18779759502865, + "grad_norm": 0.6595678329467773, + "learning_rate": 3.282144410027009e-05, + "loss": 2.4407, + "step": 14718 + }, + { + "epoch": 1.1878782987652328, + "grad_norm": 0.7724249958992004, + "learning_rate": 3.280975086113435e-05, + "loss": 2.464, + "step": 14719 + }, + { + "epoch": 1.1879590025018159, + "grad_norm": 0.659472644329071, + "learning_rate": 3.279805929655524e-05, + "loss": 2.4774, + "step": 14720 + }, + { + "epoch": 1.1880397062383987, + "grad_norm": 0.7187919020652771, + "learning_rate": 3.27863694068242e-05, + "loss": 2.4767, + "step": 14721 + }, + { + "epoch": 1.1881204099749818, + "grad_norm": 0.7740198373794556, + "learning_rate": 3.2774681192232506e-05, + "loss": 2.4762, + "step": 14722 + }, + { + "epoch": 1.188201113711565, + "grad_norm": 0.700591504573822, + "learning_rate": 3.2762994653071464e-05, + "loss": 2.448, + "step": 14723 + }, + { + "epoch": 1.1882818174481478, + "grad_norm": 0.7168558239936829, + "learning_rate": 3.275130978963237e-05, + "loss": 2.4084, + "step": 14724 + }, + { + "epoch": 1.1883625211847308, + "grad_norm": 0.8039551973342896, + "learning_rate": 3.273962660220646e-05, + "loss": 2.3849, + "step": 14725 + }, + { + "epoch": 1.188443224921314, + "grad_norm": 0.6453016400337219, + "learning_rate": 3.27279450910848e-05, + "loss": 2.3856, + "step": 14726 + }, + { + "epoch": 1.1885239286578968, + "grad_norm": 0.7194651365280151, + "learning_rate": 3.2716265256558644e-05, + "loss": 2.4337, + "step": 14727 + }, + { + "epoch": 1.1886046323944799, + "grad_norm": 0.7298597097396851, + "learning_rate": 3.270458709891906e-05, + "loss": 2.4491, + "step": 14728 + }, + { + "epoch": 1.188685336131063, + "grad_norm": 0.7127524614334106, + "learning_rate": 3.269291061845705e-05, + "loss": 2.4319, + "step": 14729 + }, + { + "epoch": 1.1887660398676458, + "grad_norm": 0.6782705783843994, + "learning_rate": 3.2681235815463654e-05, + "loss": 2.4375, + "step": 14730 + }, + { + "epoch": 1.188846743604229, + "grad_norm": 0.7418326735496521, + "learning_rate": 3.266956269022987e-05, + "loss": 2.4149, + "step": 14731 + }, + { + "epoch": 1.1889274473408118, + "grad_norm": 0.7442455291748047, + "learning_rate": 3.265789124304654e-05, + "loss": 2.3935, + "step": 14732 + }, + { + "epoch": 1.1890081510773949, + "grad_norm": 0.7238253951072693, + "learning_rate": 3.264622147420461e-05, + "loss": 2.4592, + "step": 14733 + }, + { + "epoch": 1.189088854813978, + "grad_norm": 0.6488127708435059, + "learning_rate": 3.2634553383994925e-05, + "loss": 2.3468, + "step": 14734 + }, + { + "epoch": 1.1891695585505608, + "grad_norm": 0.7182446718215942, + "learning_rate": 3.2622886972708246e-05, + "loss": 2.4457, + "step": 14735 + }, + { + "epoch": 1.189250262287144, + "grad_norm": 0.6885523796081543, + "learning_rate": 3.261122224063534e-05, + "loss": 2.3943, + "step": 14736 + }, + { + "epoch": 1.1893309660237268, + "grad_norm": 0.653367817401886, + "learning_rate": 3.259955918806693e-05, + "loss": 2.4188, + "step": 14737 + }, + { + "epoch": 1.1894116697603099, + "grad_norm": 0.6968675851821899, + "learning_rate": 3.2587897815293686e-05, + "loss": 2.4276, + "step": 14738 + }, + { + "epoch": 1.189492373496893, + "grad_norm": 0.6827409267425537, + "learning_rate": 3.257623812260626e-05, + "loss": 2.4417, + "step": 14739 + }, + { + "epoch": 1.1895730772334758, + "grad_norm": 0.6807438731193542, + "learning_rate": 3.256458011029523e-05, + "loss": 2.4495, + "step": 14740 + }, + { + "epoch": 1.189653780970059, + "grad_norm": 0.6692882180213928, + "learning_rate": 3.255292377865116e-05, + "loss": 2.3789, + "step": 14741 + }, + { + "epoch": 1.189734484706642, + "grad_norm": 0.6581685543060303, + "learning_rate": 3.2541269127964515e-05, + "loss": 2.4073, + "step": 14742 + }, + { + "epoch": 1.1898151884432249, + "grad_norm": 0.6458544731140137, + "learning_rate": 3.252961615852578e-05, + "loss": 2.4657, + "step": 14743 + }, + { + "epoch": 1.189895892179808, + "grad_norm": 0.6971322298049927, + "learning_rate": 3.251796487062541e-05, + "loss": 2.4404, + "step": 14744 + }, + { + "epoch": 1.189976595916391, + "grad_norm": 0.6770374178886414, + "learning_rate": 3.2506315264553724e-05, + "loss": 2.4329, + "step": 14745 + }, + { + "epoch": 1.190057299652974, + "grad_norm": 0.7634715437889099, + "learning_rate": 3.2494667340601085e-05, + "loss": 2.4234, + "step": 14746 + }, + { + "epoch": 1.190138003389557, + "grad_norm": 0.7717967629432678, + "learning_rate": 3.24830210990578e-05, + "loss": 2.5009, + "step": 14747 + }, + { + "epoch": 1.1902187071261399, + "grad_norm": 0.7133559584617615, + "learning_rate": 3.2471376540214124e-05, + "loss": 2.4272, + "step": 14748 + }, + { + "epoch": 1.190299410862723, + "grad_norm": 0.7273291349411011, + "learning_rate": 3.245973366436027e-05, + "loss": 2.4174, + "step": 14749 + }, + { + "epoch": 1.190380114599306, + "grad_norm": 0.6955052614212036, + "learning_rate": 3.244809247178643e-05, + "loss": 2.3605, + "step": 14750 + }, + { + "epoch": 1.190460818335889, + "grad_norm": 0.7072615027427673, + "learning_rate": 3.2436452962782685e-05, + "loss": 2.4897, + "step": 14751 + }, + { + "epoch": 1.190541522072472, + "grad_norm": 0.7095344662666321, + "learning_rate": 3.242481513763913e-05, + "loss": 2.4172, + "step": 14752 + }, + { + "epoch": 1.1906222258090549, + "grad_norm": 0.7260944247245789, + "learning_rate": 3.2413178996645864e-05, + "loss": 2.4272, + "step": 14753 + }, + { + "epoch": 1.190702929545638, + "grad_norm": 0.6601141691207886, + "learning_rate": 3.2401544540092824e-05, + "loss": 2.4072, + "step": 14754 + }, + { + "epoch": 1.190783633282221, + "grad_norm": 0.6684936881065369, + "learning_rate": 3.238991176827e-05, + "loss": 2.3968, + "step": 14755 + }, + { + "epoch": 1.190864337018804, + "grad_norm": 0.7264483571052551, + "learning_rate": 3.23782806814673e-05, + "loss": 2.4263, + "step": 14756 + }, + { + "epoch": 1.190945040755387, + "grad_norm": 0.6927621960639954, + "learning_rate": 3.2366651279974614e-05, + "loss": 2.4495, + "step": 14757 + }, + { + "epoch": 1.19102574449197, + "grad_norm": 0.7007272243499756, + "learning_rate": 3.2355023564081775e-05, + "loss": 2.4373, + "step": 14758 + }, + { + "epoch": 1.191106448228553, + "grad_norm": 0.6756663918495178, + "learning_rate": 3.234339753407857e-05, + "loss": 2.4148, + "step": 14759 + }, + { + "epoch": 1.191187151965136, + "grad_norm": 0.6741094589233398, + "learning_rate": 3.233177319025479e-05, + "loss": 2.3976, + "step": 14760 + }, + { + "epoch": 1.1912678557017191, + "grad_norm": 0.7098578810691833, + "learning_rate": 3.2320150532900085e-05, + "loss": 2.4326, + "step": 14761 + }, + { + "epoch": 1.191348559438302, + "grad_norm": 0.750271737575531, + "learning_rate": 3.230852956230413e-05, + "loss": 2.4766, + "step": 14762 + }, + { + "epoch": 1.191429263174885, + "grad_norm": 0.68764728307724, + "learning_rate": 3.229691027875661e-05, + "loss": 2.4128, + "step": 14763 + }, + { + "epoch": 1.191509966911468, + "grad_norm": 0.656295657157898, + "learning_rate": 3.228529268254702e-05, + "loss": 2.3928, + "step": 14764 + }, + { + "epoch": 1.191590670648051, + "grad_norm": 0.6690353155136108, + "learning_rate": 3.2273676773964955e-05, + "loss": 2.408, + "step": 14765 + }, + { + "epoch": 1.1916713743846339, + "grad_norm": 0.8111640214920044, + "learning_rate": 3.22620625532999e-05, + "loss": 2.4644, + "step": 14766 + }, + { + "epoch": 1.191752078121217, + "grad_norm": 0.7329768538475037, + "learning_rate": 3.2250450020841316e-05, + "loss": 2.4235, + "step": 14767 + }, + { + "epoch": 1.1918327818578, + "grad_norm": 0.6902688145637512, + "learning_rate": 3.223883917687861e-05, + "loss": 2.3883, + "step": 14768 + }, + { + "epoch": 1.191913485594383, + "grad_norm": 0.797249972820282, + "learning_rate": 3.2227230021701205e-05, + "loss": 2.523, + "step": 14769 + }, + { + "epoch": 1.191994189330966, + "grad_norm": 0.6294408440589905, + "learning_rate": 3.221562255559834e-05, + "loss": 2.4156, + "step": 14770 + }, + { + "epoch": 1.192074893067549, + "grad_norm": 0.7326164245605469, + "learning_rate": 3.220401677885936e-05, + "loss": 2.3828, + "step": 14771 + }, + { + "epoch": 1.192155596804132, + "grad_norm": 0.783747673034668, + "learning_rate": 3.219241269177351e-05, + "loss": 2.4321, + "step": 14772 + }, + { + "epoch": 1.192236300540715, + "grad_norm": 0.7415335178375244, + "learning_rate": 3.2180810294630005e-05, + "loss": 2.4446, + "step": 14773 + }, + { + "epoch": 1.1923170042772981, + "grad_norm": 0.7125591039657593, + "learning_rate": 3.2169209587717966e-05, + "loss": 2.3914, + "step": 14774 + }, + { + "epoch": 1.192397708013881, + "grad_norm": 0.6714075207710266, + "learning_rate": 3.215761057132652e-05, + "loss": 2.3918, + "step": 14775 + }, + { + "epoch": 1.192478411750464, + "grad_norm": 0.7147830724716187, + "learning_rate": 3.214601324574481e-05, + "loss": 2.4389, + "step": 14776 + }, + { + "epoch": 1.192559115487047, + "grad_norm": 0.6780480146408081, + "learning_rate": 3.2134417611261755e-05, + "loss": 2.4119, + "step": 14777 + }, + { + "epoch": 1.19263981922363, + "grad_norm": 0.7473881840705872, + "learning_rate": 3.212282366816645e-05, + "loss": 2.4547, + "step": 14778 + }, + { + "epoch": 1.1927205229602131, + "grad_norm": 0.7418377995491028, + "learning_rate": 3.211123141674784e-05, + "loss": 2.4156, + "step": 14779 + }, + { + "epoch": 1.192801226696796, + "grad_norm": 0.687524139881134, + "learning_rate": 3.209964085729477e-05, + "loss": 2.4309, + "step": 14780 + }, + { + "epoch": 1.192881930433379, + "grad_norm": 0.6965883374214172, + "learning_rate": 3.208805199009615e-05, + "loss": 2.4028, + "step": 14781 + }, + { + "epoch": 1.192962634169962, + "grad_norm": 0.7024682760238647, + "learning_rate": 3.207646481544082e-05, + "loss": 2.4482, + "step": 14782 + }, + { + "epoch": 1.193043337906545, + "grad_norm": 0.6835834383964539, + "learning_rate": 3.2064879333617514e-05, + "loss": 2.3898, + "step": 14783 + }, + { + "epoch": 1.1931240416431281, + "grad_norm": 0.7002003788948059, + "learning_rate": 3.2053295544915e-05, + "loss": 2.487, + "step": 14784 + }, + { + "epoch": 1.193204745379711, + "grad_norm": 0.7128168940544128, + "learning_rate": 3.2041713449622e-05, + "loss": 2.4591, + "step": 14785 + }, + { + "epoch": 1.193285449116294, + "grad_norm": 0.6897242665290833, + "learning_rate": 3.203013304802712e-05, + "loss": 2.4458, + "step": 14786 + }, + { + "epoch": 1.1933661528528772, + "grad_norm": 0.7281817197799683, + "learning_rate": 3.2018554340419004e-05, + "loss": 2.3772, + "step": 14787 + }, + { + "epoch": 1.19344685658946, + "grad_norm": 0.6956086754798889, + "learning_rate": 3.200697732708619e-05, + "loss": 2.4316, + "step": 14788 + }, + { + "epoch": 1.1935275603260431, + "grad_norm": 0.7679805159568787, + "learning_rate": 3.199540200831729e-05, + "loss": 2.4464, + "step": 14789 + }, + { + "epoch": 1.1936082640626262, + "grad_norm": 0.6993041634559631, + "learning_rate": 3.19838283844007e-05, + "loss": 2.3881, + "step": 14790 + }, + { + "epoch": 1.193688967799209, + "grad_norm": 0.689618706703186, + "learning_rate": 3.197225645562493e-05, + "loss": 2.4184, + "step": 14791 + }, + { + "epoch": 1.1937696715357922, + "grad_norm": 0.6896520853042603, + "learning_rate": 3.1960686222278354e-05, + "loss": 2.4484, + "step": 14792 + }, + { + "epoch": 1.193850375272375, + "grad_norm": 0.6743811368942261, + "learning_rate": 3.1949117684649334e-05, + "loss": 2.4636, + "step": 14793 + }, + { + "epoch": 1.1939310790089581, + "grad_norm": 0.7028046250343323, + "learning_rate": 3.1937550843026163e-05, + "loss": 2.4576, + "step": 14794 + }, + { + "epoch": 1.1940117827455412, + "grad_norm": 0.7219679951667786, + "learning_rate": 3.192598569769718e-05, + "loss": 2.4495, + "step": 14795 + }, + { + "epoch": 1.194092486482124, + "grad_norm": 0.731438159942627, + "learning_rate": 3.191442224895056e-05, + "loss": 2.4699, + "step": 14796 + }, + { + "epoch": 1.1941731902187072, + "grad_norm": 0.6731431484222412, + "learning_rate": 3.19028604970745e-05, + "loss": 2.4292, + "step": 14797 + }, + { + "epoch": 1.19425389395529, + "grad_norm": 0.6720147728919983, + "learning_rate": 3.1891300442357174e-05, + "loss": 2.4482, + "step": 14798 + }, + { + "epoch": 1.1943345976918731, + "grad_norm": 0.7504273653030396, + "learning_rate": 3.187974208508667e-05, + "loss": 2.4233, + "step": 14799 + }, + { + "epoch": 1.1944153014284562, + "grad_norm": 0.6882641315460205, + "learning_rate": 3.186818542555108e-05, + "loss": 2.4633, + "step": 14800 + }, + { + "epoch": 1.194496005165039, + "grad_norm": 0.7337899208068848, + "learning_rate": 3.1856630464038385e-05, + "loss": 2.4257, + "step": 14801 + }, + { + "epoch": 1.1945767089016222, + "grad_norm": 0.7026493549346924, + "learning_rate": 3.1845077200836636e-05, + "loss": 2.482, + "step": 14802 + }, + { + "epoch": 1.1946574126382052, + "grad_norm": 0.763351321220398, + "learning_rate": 3.1833525636233675e-05, + "loss": 2.4428, + "step": 14803 + }, + { + "epoch": 1.194738116374788, + "grad_norm": 0.6568076610565186, + "learning_rate": 3.182197577051745e-05, + "loss": 2.4373, + "step": 14804 + }, + { + "epoch": 1.1948188201113712, + "grad_norm": 0.6954717040061951, + "learning_rate": 3.1810427603975844e-05, + "loss": 2.4582, + "step": 14805 + }, + { + "epoch": 1.1948995238479543, + "grad_norm": 0.7130215167999268, + "learning_rate": 3.179888113689661e-05, + "loss": 2.443, + "step": 14806 + }, + { + "epoch": 1.1949802275845371, + "grad_norm": 0.6789865493774414, + "learning_rate": 3.178733636956752e-05, + "loss": 2.4138, + "step": 14807 + }, + { + "epoch": 1.1950609313211202, + "grad_norm": 0.7725361585617065, + "learning_rate": 3.177579330227633e-05, + "loss": 2.4783, + "step": 14808 + }, + { + "epoch": 1.195141635057703, + "grad_norm": 0.6952371001243591, + "learning_rate": 3.17642519353107e-05, + "loss": 2.4571, + "step": 14809 + }, + { + "epoch": 1.1952223387942862, + "grad_norm": 0.7541885375976562, + "learning_rate": 3.1752712268958275e-05, + "loss": 2.4075, + "step": 14810 + }, + { + "epoch": 1.1953030425308693, + "grad_norm": 0.6974624395370483, + "learning_rate": 3.174117430350671e-05, + "loss": 2.4525, + "step": 14811 + }, + { + "epoch": 1.1953837462674521, + "grad_norm": 0.7293709516525269, + "learning_rate": 3.172963803924347e-05, + "loss": 2.4646, + "step": 14812 + }, + { + "epoch": 1.1954644500040352, + "grad_norm": 0.6944144368171692, + "learning_rate": 3.1718103476456106e-05, + "loss": 2.462, + "step": 14813 + }, + { + "epoch": 1.195545153740618, + "grad_norm": 0.6415363550186157, + "learning_rate": 3.170657061543214e-05, + "loss": 2.4086, + "step": 14814 + }, + { + "epoch": 1.1956258574772012, + "grad_norm": 0.6511349081993103, + "learning_rate": 3.169503945645892e-05, + "loss": 2.4376, + "step": 14815 + }, + { + "epoch": 1.1957065612137843, + "grad_norm": 0.7420210242271423, + "learning_rate": 3.1683509999823854e-05, + "loss": 2.4317, + "step": 14816 + }, + { + "epoch": 1.1957872649503671, + "grad_norm": 0.7291967272758484, + "learning_rate": 3.1671982245814316e-05, + "loss": 2.4369, + "step": 14817 + }, + { + "epoch": 1.1958679686869502, + "grad_norm": 0.685743510723114, + "learning_rate": 3.166045619471758e-05, + "loss": 2.465, + "step": 14818 + }, + { + "epoch": 1.1959486724235333, + "grad_norm": 0.7130060195922852, + "learning_rate": 3.164893184682093e-05, + "loss": 2.4305, + "step": 14819 + }, + { + "epoch": 1.1960293761601162, + "grad_norm": 0.694508969783783, + "learning_rate": 3.163740920241156e-05, + "loss": 2.4278, + "step": 14820 + }, + { + "epoch": 1.1961100798966993, + "grad_norm": 0.6478514075279236, + "learning_rate": 3.162588826177669e-05, + "loss": 2.4721, + "step": 14821 + }, + { + "epoch": 1.1961907836332821, + "grad_norm": 0.6586465835571289, + "learning_rate": 3.1614369025203386e-05, + "loss": 2.4716, + "step": 14822 + }, + { + "epoch": 1.1962714873698652, + "grad_norm": 0.7558106184005737, + "learning_rate": 3.160285149297876e-05, + "loss": 2.4656, + "step": 14823 + }, + { + "epoch": 1.1963521911064483, + "grad_norm": 0.7208340764045715, + "learning_rate": 3.1591335665389896e-05, + "loss": 2.4374, + "step": 14824 + }, + { + "epoch": 1.1964328948430312, + "grad_norm": 0.70301353931427, + "learning_rate": 3.157982154272375e-05, + "loss": 2.397, + "step": 14825 + }, + { + "epoch": 1.1965135985796143, + "grad_norm": 0.6857609152793884, + "learning_rate": 3.15683091252673e-05, + "loss": 2.4258, + "step": 14826 + }, + { + "epoch": 1.1965943023161971, + "grad_norm": 0.6954602003097534, + "learning_rate": 3.155679841330747e-05, + "loss": 2.4566, + "step": 14827 + }, + { + "epoch": 1.1966750060527802, + "grad_norm": 0.6923913955688477, + "learning_rate": 3.154528940713113e-05, + "loss": 2.4, + "step": 14828 + }, + { + "epoch": 1.1967557097893633, + "grad_norm": 0.6641134023666382, + "learning_rate": 3.1533782107025124e-05, + "loss": 2.4721, + "step": 14829 + }, + { + "epoch": 1.1968364135259462, + "grad_norm": 0.7470134496688843, + "learning_rate": 3.152227651327627e-05, + "loss": 2.4253, + "step": 14830 + }, + { + "epoch": 1.1969171172625293, + "grad_norm": 0.7234545350074768, + "learning_rate": 3.151077262617126e-05, + "loss": 2.4109, + "step": 14831 + }, + { + "epoch": 1.1969978209991123, + "grad_norm": 0.7814013957977295, + "learning_rate": 3.149927044599682e-05, + "loss": 2.4522, + "step": 14832 + }, + { + "epoch": 1.1970785247356952, + "grad_norm": 0.6825435161590576, + "learning_rate": 3.1487769973039624e-05, + "loss": 2.4728, + "step": 14833 + }, + { + "epoch": 1.1971592284722783, + "grad_norm": 0.7091361880302429, + "learning_rate": 3.147627120758634e-05, + "loss": 2.4615, + "step": 14834 + }, + { + "epoch": 1.1972399322088614, + "grad_norm": 0.7271433472633362, + "learning_rate": 3.146477414992346e-05, + "loss": 2.4154, + "step": 14835 + }, + { + "epoch": 1.1973206359454442, + "grad_norm": 0.6557306051254272, + "learning_rate": 3.145327880033756e-05, + "loss": 2.4348, + "step": 14836 + }, + { + "epoch": 1.1974013396820273, + "grad_norm": 0.6667891144752502, + "learning_rate": 3.1441785159115166e-05, + "loss": 2.4123, + "step": 14837 + }, + { + "epoch": 1.1974820434186102, + "grad_norm": 0.6755266189575195, + "learning_rate": 3.143029322654266e-05, + "loss": 2.4287, + "step": 14838 + }, + { + "epoch": 1.1975627471551933, + "grad_norm": 0.7647396922111511, + "learning_rate": 3.1418803002906475e-05, + "loss": 2.4343, + "step": 14839 + }, + { + "epoch": 1.1976434508917764, + "grad_norm": 0.7288243174552917, + "learning_rate": 3.140731448849305e-05, + "loss": 2.4536, + "step": 14840 + }, + { + "epoch": 1.1977241546283592, + "grad_norm": 0.6126244068145752, + "learning_rate": 3.1395827683588605e-05, + "loss": 2.4187, + "step": 14841 + }, + { + "epoch": 1.1978048583649423, + "grad_norm": 0.6773896217346191, + "learning_rate": 3.138434258847948e-05, + "loss": 2.3916, + "step": 14842 + }, + { + "epoch": 1.1978855621015252, + "grad_norm": 0.724413275718689, + "learning_rate": 3.1372859203451934e-05, + "loss": 2.4614, + "step": 14843 + }, + { + "epoch": 1.1979662658381083, + "grad_norm": 0.7043039798736572, + "learning_rate": 3.136137752879209e-05, + "loss": 2.4343, + "step": 14844 + }, + { + "epoch": 1.1980469695746914, + "grad_norm": 0.7543383240699768, + "learning_rate": 3.134989756478615e-05, + "loss": 2.4345, + "step": 14845 + }, + { + "epoch": 1.1981276733112742, + "grad_norm": 0.7193408608436584, + "learning_rate": 3.1338419311720244e-05, + "loss": 2.4728, + "step": 14846 + }, + { + "epoch": 1.1982083770478573, + "grad_norm": 0.8090186715126038, + "learning_rate": 3.132694276988038e-05, + "loss": 2.4246, + "step": 14847 + }, + { + "epoch": 1.1982890807844404, + "grad_norm": 0.7154600620269775, + "learning_rate": 3.131546793955261e-05, + "loss": 2.4061, + "step": 14848 + }, + { + "epoch": 1.1983697845210233, + "grad_norm": 0.6987032890319824, + "learning_rate": 3.130399482102293e-05, + "loss": 2.4525, + "step": 14849 + }, + { + "epoch": 1.1984504882576064, + "grad_norm": 0.7123507261276245, + "learning_rate": 3.129252341457727e-05, + "loss": 2.4017, + "step": 14850 + }, + { + "epoch": 1.1985311919941894, + "grad_norm": 0.6475987434387207, + "learning_rate": 3.128105372050153e-05, + "loss": 2.4617, + "step": 14851 + }, + { + "epoch": 1.1986118957307723, + "grad_norm": 0.6799046993255615, + "learning_rate": 3.126958573908156e-05, + "loss": 2.4337, + "step": 14852 + }, + { + "epoch": 1.1986925994673554, + "grad_norm": 0.6910607218742371, + "learning_rate": 3.125811947060322e-05, + "loss": 2.415, + "step": 14853 + }, + { + "epoch": 1.1987733032039383, + "grad_norm": 0.6879963278770447, + "learning_rate": 3.124665491535219e-05, + "loss": 2.4912, + "step": 14854 + }, + { + "epoch": 1.1988540069405214, + "grad_norm": 0.7038810849189758, + "learning_rate": 3.123519207361425e-05, + "loss": 2.4528, + "step": 14855 + }, + { + "epoch": 1.1989347106771044, + "grad_norm": 0.6771957278251648, + "learning_rate": 3.1223730945675104e-05, + "loss": 2.4524, + "step": 14856 + }, + { + "epoch": 1.1990154144136873, + "grad_norm": 0.7529320120811462, + "learning_rate": 3.1212271531820336e-05, + "loss": 2.4667, + "step": 14857 + }, + { + "epoch": 1.1990961181502704, + "grad_norm": 0.6498474478721619, + "learning_rate": 3.1200813832335574e-05, + "loss": 2.3863, + "step": 14858 + }, + { + "epoch": 1.1991768218868533, + "grad_norm": 0.7587705850601196, + "learning_rate": 3.1189357847506383e-05, + "loss": 2.4962, + "step": 14859 + }, + { + "epoch": 1.1992575256234363, + "grad_norm": 0.674013078212738, + "learning_rate": 3.117790357761825e-05, + "loss": 2.3939, + "step": 14860 + }, + { + "epoch": 1.1993382293600194, + "grad_norm": 0.6546844840049744, + "learning_rate": 3.116645102295668e-05, + "loss": 2.4775, + "step": 14861 + }, + { + "epoch": 1.1994189330966023, + "grad_norm": 0.7558320760726929, + "learning_rate": 3.11550001838071e-05, + "loss": 2.3918, + "step": 14862 + }, + { + "epoch": 1.1994996368331854, + "grad_norm": 0.7074883580207825, + "learning_rate": 3.114355106045486e-05, + "loss": 2.3969, + "step": 14863 + }, + { + "epoch": 1.1995803405697685, + "grad_norm": 0.706078290939331, + "learning_rate": 3.1132103653185305e-05, + "loss": 2.5028, + "step": 14864 + }, + { + "epoch": 1.1996610443063513, + "grad_norm": 0.6883544921875, + "learning_rate": 3.1120657962283764e-05, + "loss": 2.4407, + "step": 14865 + }, + { + "epoch": 1.1997417480429344, + "grad_norm": 0.6905466914176941, + "learning_rate": 3.110921398803551e-05, + "loss": 2.3893, + "step": 14866 + }, + { + "epoch": 1.1998224517795173, + "grad_norm": 0.6584910154342651, + "learning_rate": 3.109777173072569e-05, + "loss": 2.4515, + "step": 14867 + }, + { + "epoch": 1.1999031555161004, + "grad_norm": 0.6957471370697021, + "learning_rate": 3.108633119063951e-05, + "loss": 2.4483, + "step": 14868 + }, + { + "epoch": 1.1999838592526835, + "grad_norm": 0.6716276407241821, + "learning_rate": 3.1074892368062095e-05, + "loss": 2.4298, + "step": 14869 + }, + { + "epoch": 1.2000645629892663, + "grad_norm": 0.7350820302963257, + "learning_rate": 3.1063455263278543e-05, + "loss": 2.4088, + "step": 14870 + }, + { + "epoch": 1.2001452667258494, + "grad_norm": 0.7409771680831909, + "learning_rate": 3.105201987657388e-05, + "loss": 2.4089, + "step": 14871 + }, + { + "epoch": 1.2002259704624323, + "grad_norm": 0.7273266911506653, + "learning_rate": 3.104058620823315e-05, + "loss": 2.5149, + "step": 14872 + }, + { + "epoch": 1.2003066741990154, + "grad_norm": 0.6793962717056274, + "learning_rate": 3.102915425854124e-05, + "loss": 2.4422, + "step": 14873 + }, + { + "epoch": 1.2003873779355985, + "grad_norm": 0.72386234998703, + "learning_rate": 3.101772402778309e-05, + "loss": 2.4756, + "step": 14874 + }, + { + "epoch": 1.2004680816721813, + "grad_norm": 0.6530055999755859, + "learning_rate": 3.1006295516243625e-05, + "loss": 2.4145, + "step": 14875 + }, + { + "epoch": 1.2005487854087644, + "grad_norm": 0.7288365960121155, + "learning_rate": 3.099486872420758e-05, + "loss": 2.4565, + "step": 14876 + }, + { + "epoch": 1.2006294891453475, + "grad_norm": 0.6982102394104004, + "learning_rate": 3.09834436519598e-05, + "loss": 2.4788, + "step": 14877 + }, + { + "epoch": 1.2007101928819304, + "grad_norm": 0.7208256125450134, + "learning_rate": 3.0972020299785007e-05, + "loss": 2.4186, + "step": 14878 + }, + { + "epoch": 1.2007908966185135, + "grad_norm": 0.6928278803825378, + "learning_rate": 3.096059866796791e-05, + "loss": 2.4177, + "step": 14879 + }, + { + "epoch": 1.2008716003550965, + "grad_norm": 0.7145438194274902, + "learning_rate": 3.094917875679317e-05, + "loss": 2.4796, + "step": 14880 + }, + { + "epoch": 1.2009523040916794, + "grad_norm": 0.7126322388648987, + "learning_rate": 3.093776056654539e-05, + "loss": 2.4926, + "step": 14881 + }, + { + "epoch": 1.2010330078282625, + "grad_norm": 0.7775046825408936, + "learning_rate": 3.092634409750919e-05, + "loss": 2.4386, + "step": 14882 + }, + { + "epoch": 1.2011137115648454, + "grad_norm": 0.6387330889701843, + "learning_rate": 3.091492934996901e-05, + "loss": 2.4302, + "step": 14883 + }, + { + "epoch": 1.2011944153014285, + "grad_norm": 0.6883525252342224, + "learning_rate": 3.090351632420939e-05, + "loss": 2.4644, + "step": 14884 + }, + { + "epoch": 1.2012751190380115, + "grad_norm": 0.6698900461196899, + "learning_rate": 3.0892105020514795e-05, + "loss": 2.414, + "step": 14885 + }, + { + "epoch": 1.2013558227745944, + "grad_norm": 0.7124409079551697, + "learning_rate": 3.088069543916956e-05, + "loss": 2.4275, + "step": 14886 + }, + { + "epoch": 1.2014365265111775, + "grad_norm": 0.6996601223945618, + "learning_rate": 3.0869287580458076e-05, + "loss": 2.4725, + "step": 14887 + }, + { + "epoch": 1.2015172302477604, + "grad_norm": 0.653087317943573, + "learning_rate": 3.085788144466468e-05, + "loss": 2.383, + "step": 14888 + }, + { + "epoch": 1.2015979339843434, + "grad_norm": 0.7426899671554565, + "learning_rate": 3.0846477032073554e-05, + "loss": 2.4064, + "step": 14889 + }, + { + "epoch": 1.2016786377209265, + "grad_norm": 0.6417646408081055, + "learning_rate": 3.083507434296903e-05, + "loss": 2.3964, + "step": 14890 + }, + { + "epoch": 1.2017593414575094, + "grad_norm": 0.6301923394203186, + "learning_rate": 3.0823673377635274e-05, + "loss": 2.4285, + "step": 14891 + }, + { + "epoch": 1.2018400451940925, + "grad_norm": 0.7621259093284607, + "learning_rate": 3.081227413635638e-05, + "loss": 2.4731, + "step": 14892 + }, + { + "epoch": 1.2019207489306756, + "grad_norm": 0.6637598872184753, + "learning_rate": 3.080087661941648e-05, + "loss": 2.4126, + "step": 14893 + }, + { + "epoch": 1.2020014526672584, + "grad_norm": 0.6820287108421326, + "learning_rate": 3.078948082709964e-05, + "loss": 2.4108, + "step": 14894 + }, + { + "epoch": 1.2020821564038415, + "grad_norm": 0.7090989351272583, + "learning_rate": 3.077808675968983e-05, + "loss": 2.4678, + "step": 14895 + }, + { + "epoch": 1.2021628601404246, + "grad_norm": 0.7242181897163391, + "learning_rate": 3.076669441747105e-05, + "loss": 2.5346, + "step": 14896 + }, + { + "epoch": 1.2022435638770075, + "grad_norm": 0.7790088653564453, + "learning_rate": 3.075530380072722e-05, + "loss": 2.4436, + "step": 14897 + }, + { + "epoch": 1.2023242676135906, + "grad_norm": 0.6828821301460266, + "learning_rate": 3.074391490974225e-05, + "loss": 2.3767, + "step": 14898 + }, + { + "epoch": 1.2024049713501734, + "grad_norm": 0.709815502166748, + "learning_rate": 3.0732527744799945e-05, + "loss": 2.4139, + "step": 14899 + }, + { + "epoch": 1.2024856750867565, + "grad_norm": 0.6561180353164673, + "learning_rate": 3.07211423061841e-05, + "loss": 2.399, + "step": 14900 + }, + { + "epoch": 1.2025663788233396, + "grad_norm": 0.7122004628181458, + "learning_rate": 3.0709758594178495e-05, + "loss": 2.4314, + "step": 14901 + }, + { + "epoch": 1.2026470825599225, + "grad_norm": 0.6817516684532166, + "learning_rate": 3.0698376609066825e-05, + "loss": 2.4241, + "step": 14902 + }, + { + "epoch": 1.2027277862965056, + "grad_norm": 0.6848475337028503, + "learning_rate": 3.068699635113277e-05, + "loss": 2.4583, + "step": 14903 + }, + { + "epoch": 1.2028084900330884, + "grad_norm": 0.6567823886871338, + "learning_rate": 3.067561782065999e-05, + "loss": 2.3818, + "step": 14904 + }, + { + "epoch": 1.2028891937696715, + "grad_norm": 0.7373961806297302, + "learning_rate": 3.066424101793198e-05, + "loss": 2.4075, + "step": 14905 + }, + { + "epoch": 1.2029698975062546, + "grad_norm": 0.6968079209327698, + "learning_rate": 3.0652865943232346e-05, + "loss": 2.4701, + "step": 14906 + }, + { + "epoch": 1.2030506012428375, + "grad_norm": 0.7356292009353638, + "learning_rate": 3.064149259684459e-05, + "loss": 2.4188, + "step": 14907 + }, + { + "epoch": 1.2031313049794206, + "grad_norm": 0.7144857048988342, + "learning_rate": 3.063012097905211e-05, + "loss": 2.4411, + "step": 14908 + }, + { + "epoch": 1.2032120087160036, + "grad_norm": 0.734531044960022, + "learning_rate": 3.0618751090138365e-05, + "loss": 2.4595, + "step": 14909 + }, + { + "epoch": 1.2032927124525865, + "grad_norm": 0.6658234000205994, + "learning_rate": 3.060738293038669e-05, + "loss": 2.4206, + "step": 14910 + }, + { + "epoch": 1.2033734161891696, + "grad_norm": 0.678424596786499, + "learning_rate": 3.059601650008044e-05, + "loss": 2.4704, + "step": 14911 + }, + { + "epoch": 1.2034541199257527, + "grad_norm": 0.6852440237998962, + "learning_rate": 3.058465179950287e-05, + "loss": 2.46, + "step": 14912 + }, + { + "epoch": 1.2035348236623356, + "grad_norm": 0.702881395816803, + "learning_rate": 3.057328882893724e-05, + "loss": 2.4372, + "step": 14913 + }, + { + "epoch": 1.2036155273989186, + "grad_norm": 0.6978999972343445, + "learning_rate": 3.056192758866676e-05, + "loss": 2.401, + "step": 14914 + }, + { + "epoch": 1.2036962311355015, + "grad_norm": 0.7070993185043335, + "learning_rate": 3.055056807897454e-05, + "loss": 2.3967, + "step": 14915 + }, + { + "epoch": 1.2037769348720846, + "grad_norm": 0.7159305810928345, + "learning_rate": 3.0539210300143693e-05, + "loss": 2.4388, + "step": 14916 + }, + { + "epoch": 1.2038576386086675, + "grad_norm": 0.6920869946479797, + "learning_rate": 3.0527854252457333e-05, + "loss": 2.441, + "step": 14917 + }, + { + "epoch": 1.2039383423452505, + "grad_norm": 0.7014884352684021, + "learning_rate": 3.0516499936198417e-05, + "loss": 2.4115, + "step": 14918 + }, + { + "epoch": 1.2040190460818336, + "grad_norm": 0.6754150986671448, + "learning_rate": 3.0505147351649955e-05, + "loss": 2.3722, + "step": 14919 + }, + { + "epoch": 1.2040997498184165, + "grad_norm": 0.7681791186332703, + "learning_rate": 3.0493796499094874e-05, + "loss": 2.4331, + "step": 14920 + }, + { + "epoch": 1.2041804535549996, + "grad_norm": 0.7265221476554871, + "learning_rate": 3.0482447378816082e-05, + "loss": 2.4806, + "step": 14921 + }, + { + "epoch": 1.2042611572915827, + "grad_norm": 0.6841520667076111, + "learning_rate": 3.047109999109642e-05, + "loss": 2.3896, + "step": 14922 + }, + { + "epoch": 1.2043418610281655, + "grad_norm": 0.746347963809967, + "learning_rate": 3.0459754336218737e-05, + "loss": 2.4081, + "step": 14923 + }, + { + "epoch": 1.2044225647647486, + "grad_norm": 0.6679818034172058, + "learning_rate": 3.0448410414465712e-05, + "loss": 2.4206, + "step": 14924 + }, + { + "epoch": 1.2045032685013317, + "grad_norm": 0.7122265100479126, + "learning_rate": 3.0437068226120114e-05, + "loss": 2.4217, + "step": 14925 + }, + { + "epoch": 1.2045839722379146, + "grad_norm": 0.7023499011993408, + "learning_rate": 3.0425727771464618e-05, + "loss": 2.4597, + "step": 14926 + }, + { + "epoch": 1.2046646759744977, + "grad_norm": 0.7304259538650513, + "learning_rate": 3.0414389050781876e-05, + "loss": 2.4915, + "step": 14927 + }, + { + "epoch": 1.2047453797110805, + "grad_norm": 0.7209908962249756, + "learning_rate": 3.0403052064354442e-05, + "loss": 2.4163, + "step": 14928 + }, + { + "epoch": 1.2048260834476636, + "grad_norm": 0.7367275953292847, + "learning_rate": 3.0391716812464865e-05, + "loss": 2.4192, + "step": 14929 + }, + { + "epoch": 1.2049067871842467, + "grad_norm": 0.6576591730117798, + "learning_rate": 3.0380383295395674e-05, + "loss": 2.4606, + "step": 14930 + }, + { + "epoch": 1.2049874909208296, + "grad_norm": 0.7082500457763672, + "learning_rate": 3.0369051513429315e-05, + "loss": 2.4079, + "step": 14931 + }, + { + "epoch": 1.2050681946574127, + "grad_norm": 0.6770346760749817, + "learning_rate": 3.03577214668482e-05, + "loss": 2.45, + "step": 14932 + }, + { + "epoch": 1.2051488983939955, + "grad_norm": 0.6979790925979614, + "learning_rate": 3.034639315593476e-05, + "loss": 2.3966, + "step": 14933 + }, + { + "epoch": 1.2052296021305786, + "grad_norm": 0.6863394975662231, + "learning_rate": 3.033506658097124e-05, + "loss": 2.4637, + "step": 14934 + }, + { + "epoch": 1.2053103058671617, + "grad_norm": 0.7522799372673035, + "learning_rate": 3.0323741742239963e-05, + "loss": 2.4585, + "step": 14935 + }, + { + "epoch": 1.2053910096037446, + "grad_norm": 0.7119878530502319, + "learning_rate": 3.031241864002321e-05, + "loss": 2.4473, + "step": 14936 + }, + { + "epoch": 1.2054717133403277, + "grad_norm": 0.690861701965332, + "learning_rate": 3.030109727460312e-05, + "loss": 2.4564, + "step": 14937 + }, + { + "epoch": 1.2055524170769107, + "grad_norm": 0.6825447082519531, + "learning_rate": 3.0289777646261886e-05, + "loss": 2.4511, + "step": 14938 + }, + { + "epoch": 1.2056331208134936, + "grad_norm": 0.7404600977897644, + "learning_rate": 3.027845975528164e-05, + "loss": 2.4461, + "step": 14939 + }, + { + "epoch": 1.2057138245500767, + "grad_norm": 0.6871766448020935, + "learning_rate": 3.026714360194437e-05, + "loss": 2.4486, + "step": 14940 + }, + { + "epoch": 1.2057945282866598, + "grad_norm": 0.6646476984024048, + "learning_rate": 3.02558291865322e-05, + "loss": 2.378, + "step": 14941 + }, + { + "epoch": 1.2058752320232426, + "grad_norm": 0.6998385787010193, + "learning_rate": 3.024451650932707e-05, + "loss": 2.4646, + "step": 14942 + }, + { + "epoch": 1.2059559357598257, + "grad_norm": 0.6763097047805786, + "learning_rate": 3.023320557061098e-05, + "loss": 2.3971, + "step": 14943 + }, + { + "epoch": 1.2060366394964086, + "grad_norm": 0.7409633994102478, + "learning_rate": 3.0221896370665736e-05, + "loss": 2.4405, + "step": 14944 + }, + { + "epoch": 1.2061173432329917, + "grad_norm": 0.6972076892852783, + "learning_rate": 3.0210588909773242e-05, + "loss": 2.3935, + "step": 14945 + }, + { + "epoch": 1.2061980469695748, + "grad_norm": 0.6898512840270996, + "learning_rate": 3.0199283188215333e-05, + "loss": 2.4173, + "step": 14946 + }, + { + "epoch": 1.2062787507061576, + "grad_norm": 0.6878097057342529, + "learning_rate": 3.0187979206273707e-05, + "loss": 2.44, + "step": 14947 + }, + { + "epoch": 1.2063594544427407, + "grad_norm": 0.6629695296287537, + "learning_rate": 3.0176676964230143e-05, + "loss": 2.3836, + "step": 14948 + }, + { + "epoch": 1.2064401581793236, + "grad_norm": 0.717654824256897, + "learning_rate": 3.0165376462366336e-05, + "loss": 2.415, + "step": 14949 + }, + { + "epoch": 1.2065208619159067, + "grad_norm": 0.7526129484176636, + "learning_rate": 3.0154077700963867e-05, + "loss": 2.4985, + "step": 14950 + }, + { + "epoch": 1.2066015656524898, + "grad_norm": 0.6867300271987915, + "learning_rate": 3.014278068030435e-05, + "loss": 2.395, + "step": 14951 + }, + { + "epoch": 1.2066822693890726, + "grad_norm": 0.7321466207504272, + "learning_rate": 3.0131485400669356e-05, + "loss": 2.4503, + "step": 14952 + }, + { + "epoch": 1.2067629731256557, + "grad_norm": 0.6915534734725952, + "learning_rate": 3.0120191862340387e-05, + "loss": 2.398, + "step": 14953 + }, + { + "epoch": 1.2068436768622388, + "grad_norm": 0.7017377018928528, + "learning_rate": 3.01089000655989e-05, + "loss": 2.4367, + "step": 14954 + }, + { + "epoch": 1.2069243805988217, + "grad_norm": 0.7032245397567749, + "learning_rate": 3.0097610010726353e-05, + "loss": 2.4078, + "step": 14955 + }, + { + "epoch": 1.2070050843354048, + "grad_norm": 0.6795478463172913, + "learning_rate": 3.008632169800406e-05, + "loss": 2.3508, + "step": 14956 + }, + { + "epoch": 1.2070857880719879, + "grad_norm": 0.7149559855461121, + "learning_rate": 3.007503512771339e-05, + "loss": 2.4023, + "step": 14957 + }, + { + "epoch": 1.2071664918085707, + "grad_norm": 0.724756121635437, + "learning_rate": 3.006375030013563e-05, + "loss": 2.4439, + "step": 14958 + }, + { + "epoch": 1.2072471955451538, + "grad_norm": 0.7233348488807678, + "learning_rate": 3.005246721555205e-05, + "loss": 2.3819, + "step": 14959 + }, + { + "epoch": 1.2073278992817367, + "grad_norm": 0.700322151184082, + "learning_rate": 3.0041185874243815e-05, + "loss": 2.4222, + "step": 14960 + }, + { + "epoch": 1.2074086030183198, + "grad_norm": 0.7268145680427551, + "learning_rate": 3.002990627649209e-05, + "loss": 2.4698, + "step": 14961 + }, + { + "epoch": 1.2074893067549028, + "grad_norm": 0.6885111331939697, + "learning_rate": 3.001862842257801e-05, + "loss": 2.4505, + "step": 14962 + }, + { + "epoch": 1.2075700104914857, + "grad_norm": 0.7237974405288696, + "learning_rate": 3.0007352312782632e-05, + "loss": 2.422, + "step": 14963 + }, + { + "epoch": 1.2076507142280688, + "grad_norm": 0.7214741110801697, + "learning_rate": 2.9996077947387015e-05, + "loss": 2.4428, + "step": 14964 + }, + { + "epoch": 1.2077314179646517, + "grad_norm": 0.7264460921287537, + "learning_rate": 2.998480532667215e-05, + "loss": 2.4669, + "step": 14965 + }, + { + "epoch": 1.2078121217012348, + "grad_norm": 0.7055517435073853, + "learning_rate": 2.9973534450918928e-05, + "loss": 2.5082, + "step": 14966 + }, + { + "epoch": 1.2078928254378178, + "grad_norm": 0.6886781454086304, + "learning_rate": 2.9962265320408268e-05, + "loss": 2.4697, + "step": 14967 + }, + { + "epoch": 1.2079735291744007, + "grad_norm": 0.6875878572463989, + "learning_rate": 2.9950997935421076e-05, + "loss": 2.4384, + "step": 14968 + }, + { + "epoch": 1.2080542329109838, + "grad_norm": 0.7586886882781982, + "learning_rate": 2.99397322962381e-05, + "loss": 2.4088, + "step": 14969 + }, + { + "epoch": 1.2081349366475669, + "grad_norm": 0.6744365096092224, + "learning_rate": 2.992846840314013e-05, + "loss": 2.4109, + "step": 14970 + }, + { + "epoch": 1.2082156403841497, + "grad_norm": 0.6589661240577698, + "learning_rate": 2.9917206256407893e-05, + "loss": 2.4386, + "step": 14971 + }, + { + "epoch": 1.2082963441207328, + "grad_norm": 0.6787264943122864, + "learning_rate": 2.990594585632208e-05, + "loss": 2.401, + "step": 14972 + }, + { + "epoch": 1.2083770478573157, + "grad_norm": 0.710517406463623, + "learning_rate": 2.9894687203163317e-05, + "loss": 2.4813, + "step": 14973 + }, + { + "epoch": 1.2084577515938988, + "grad_norm": 0.676110029220581, + "learning_rate": 2.988343029721221e-05, + "loss": 2.4654, + "step": 14974 + }, + { + "epoch": 1.2085384553304819, + "grad_norm": 0.6940518617630005, + "learning_rate": 2.9872175138749336e-05, + "loss": 2.4188, + "step": 14975 + }, + { + "epoch": 1.2086191590670647, + "grad_norm": 0.6849910020828247, + "learning_rate": 2.9860921728055147e-05, + "loss": 2.384, + "step": 14976 + }, + { + "epoch": 1.2086998628036478, + "grad_norm": 0.6902467608451843, + "learning_rate": 2.9849670065410128e-05, + "loss": 2.4364, + "step": 14977 + }, + { + "epoch": 1.2087805665402307, + "grad_norm": 0.6742224097251892, + "learning_rate": 2.9838420151094747e-05, + "loss": 2.5085, + "step": 14978 + }, + { + "epoch": 1.2088612702768138, + "grad_norm": 0.6635094285011292, + "learning_rate": 2.9827171985389303e-05, + "loss": 2.3635, + "step": 14979 + }, + { + "epoch": 1.2089419740133969, + "grad_norm": 0.7189158201217651, + "learning_rate": 2.9815925568574165e-05, + "loss": 2.458, + "step": 14980 + }, + { + "epoch": 1.2090226777499797, + "grad_norm": 0.7370143532752991, + "learning_rate": 2.9804680900929628e-05, + "loss": 2.4543, + "step": 14981 + }, + { + "epoch": 1.2091033814865628, + "grad_norm": 0.7410217523574829, + "learning_rate": 2.979343798273593e-05, + "loss": 2.4537, + "step": 14982 + }, + { + "epoch": 1.209184085223146, + "grad_norm": 0.7525770664215088, + "learning_rate": 2.9782196814273277e-05, + "loss": 2.5147, + "step": 14983 + }, + { + "epoch": 1.2092647889597288, + "grad_norm": 0.7302291393280029, + "learning_rate": 2.9770957395821863e-05, + "loss": 2.4711, + "step": 14984 + }, + { + "epoch": 1.2093454926963119, + "grad_norm": 0.7154920101165771, + "learning_rate": 2.975971972766175e-05, + "loss": 2.5224, + "step": 14985 + }, + { + "epoch": 1.209426196432895, + "grad_norm": 0.6827684640884399, + "learning_rate": 2.9748483810073025e-05, + "loss": 2.4477, + "step": 14986 + }, + { + "epoch": 1.2095069001694778, + "grad_norm": 0.7753484845161438, + "learning_rate": 2.973724964333575e-05, + "loss": 2.4257, + "step": 14987 + }, + { + "epoch": 1.209587603906061, + "grad_norm": 0.7146809101104736, + "learning_rate": 2.9726017227729862e-05, + "loss": 2.3953, + "step": 14988 + }, + { + "epoch": 1.2096683076426438, + "grad_norm": 0.7360730767250061, + "learning_rate": 2.9714786563535313e-05, + "loss": 2.3774, + "step": 14989 + }, + { + "epoch": 1.2097490113792269, + "grad_norm": 0.7159923911094666, + "learning_rate": 2.970355765103201e-05, + "loss": 2.4068, + "step": 14990 + }, + { + "epoch": 1.20982971511581, + "grad_norm": 0.6732171773910522, + "learning_rate": 2.969233049049982e-05, + "loss": 2.4215, + "step": 14991 + }, + { + "epoch": 1.2099104188523928, + "grad_norm": 0.749812126159668, + "learning_rate": 2.968110508221853e-05, + "loss": 2.4415, + "step": 14992 + }, + { + "epoch": 1.209991122588976, + "grad_norm": 0.7185530662536621, + "learning_rate": 2.9669881426467916e-05, + "loss": 2.4536, + "step": 14993 + }, + { + "epoch": 1.2100718263255588, + "grad_norm": 0.6757143139839172, + "learning_rate": 2.9658659523527733e-05, + "loss": 2.3892, + "step": 14994 + }, + { + "epoch": 1.2101525300621419, + "grad_norm": 0.7187495231628418, + "learning_rate": 2.96474393736776e-05, + "loss": 2.434, + "step": 14995 + }, + { + "epoch": 1.210233233798725, + "grad_norm": 0.7016372680664062, + "learning_rate": 2.9636220977197182e-05, + "loss": 2.4903, + "step": 14996 + }, + { + "epoch": 1.2103139375353078, + "grad_norm": 0.7528983950614929, + "learning_rate": 2.9625004334366103e-05, + "loss": 2.3829, + "step": 14997 + }, + { + "epoch": 1.210394641271891, + "grad_norm": 0.6735692024230957, + "learning_rate": 2.9613789445463837e-05, + "loss": 2.3844, + "step": 14998 + }, + { + "epoch": 1.210475345008474, + "grad_norm": 0.6825322508811951, + "learning_rate": 2.9602576310769935e-05, + "loss": 2.4691, + "step": 14999 + }, + { + "epoch": 1.2105560487450568, + "grad_norm": 0.7507675290107727, + "learning_rate": 2.959136493056389e-05, + "loss": 2.4605, + "step": 15000 + }, + { + "epoch": 1.2105560487450568, + "eval_loss": 2.3882925510406494, + "eval_runtime": 1014.0781, + "eval_samples_per_second": 2.584, + "eval_steps_per_second": 0.431, + "step": 15000 + }, + { + "epoch": 1.21063675248164, + "grad_norm": 0.6937146782875061, + "learning_rate": 2.9580155305125044e-05, + "loss": 2.4444, + "step": 15001 + }, + { + "epoch": 1.210717456218223, + "grad_norm": 0.6572179794311523, + "learning_rate": 2.9568947434732775e-05, + "loss": 2.4373, + "step": 15002 + }, + { + "epoch": 1.2107981599548059, + "grad_norm": 0.7420738935470581, + "learning_rate": 2.955774131966651e-05, + "loss": 2.4046, + "step": 15003 + }, + { + "epoch": 1.210878863691389, + "grad_norm": 0.7952237129211426, + "learning_rate": 2.954653696020543e-05, + "loss": 2.4082, + "step": 15004 + }, + { + "epoch": 1.2109595674279718, + "grad_norm": 0.6640750765800476, + "learning_rate": 2.9535334356628817e-05, + "loss": 2.4109, + "step": 15005 + }, + { + "epoch": 1.211040271164555, + "grad_norm": 0.6968019008636475, + "learning_rate": 2.952413350921588e-05, + "loss": 2.3991, + "step": 15006 + }, + { + "epoch": 1.211120974901138, + "grad_norm": 0.7174221277236938, + "learning_rate": 2.9512934418245787e-05, + "loss": 2.3909, + "step": 15007 + }, + { + "epoch": 1.2112016786377209, + "grad_norm": 0.6854268908500671, + "learning_rate": 2.9501737083997595e-05, + "loss": 2.4321, + "step": 15008 + }, + { + "epoch": 1.211282382374304, + "grad_norm": 0.6705672740936279, + "learning_rate": 2.949054150675039e-05, + "loss": 2.4749, + "step": 15009 + }, + { + "epoch": 1.2113630861108868, + "grad_norm": 0.7871068716049194, + "learning_rate": 2.9479347686783244e-05, + "loss": 2.424, + "step": 15010 + }, + { + "epoch": 1.21144378984747, + "grad_norm": 0.8194620609283447, + "learning_rate": 2.946815562437506e-05, + "loss": 2.461, + "step": 15011 + }, + { + "epoch": 1.211524493584053, + "grad_norm": 0.673367977142334, + "learning_rate": 2.9456965319804818e-05, + "loss": 2.4212, + "step": 15012 + }, + { + "epoch": 1.2116051973206359, + "grad_norm": 0.6630001068115234, + "learning_rate": 2.9445776773351397e-05, + "loss": 2.4393, + "step": 15013 + }, + { + "epoch": 1.211685901057219, + "grad_norm": 0.676170825958252, + "learning_rate": 2.943458998529365e-05, + "loss": 2.3889, + "step": 15014 + }, + { + "epoch": 1.211766604793802, + "grad_norm": 0.6951417326927185, + "learning_rate": 2.942340495591037e-05, + "loss": 2.4088, + "step": 15015 + }, + { + "epoch": 1.211847308530385, + "grad_norm": 0.6909857988357544, + "learning_rate": 2.941222168548037e-05, + "loss": 2.4282, + "step": 15016 + }, + { + "epoch": 1.211928012266968, + "grad_norm": 0.653264045715332, + "learning_rate": 2.9401040174282292e-05, + "loss": 2.4369, + "step": 15017 + }, + { + "epoch": 1.2120087160035509, + "grad_norm": 0.6994543075561523, + "learning_rate": 2.938986042259484e-05, + "loss": 2.419, + "step": 15018 + }, + { + "epoch": 1.212089419740134, + "grad_norm": 0.709015965461731, + "learning_rate": 2.9378682430696668e-05, + "loss": 2.4747, + "step": 15019 + }, + { + "epoch": 1.212170123476717, + "grad_norm": 0.6899579167366028, + "learning_rate": 2.9367506198866313e-05, + "loss": 2.4134, + "step": 15020 + }, + { + "epoch": 1.2122508272133, + "grad_norm": 0.6811912059783936, + "learning_rate": 2.9356331727382337e-05, + "loss": 2.449, + "step": 15021 + }, + { + "epoch": 1.212331530949883, + "grad_norm": 0.8119748830795288, + "learning_rate": 2.9345159016523237e-05, + "loss": 2.4463, + "step": 15022 + }, + { + "epoch": 1.2124122346864659, + "grad_norm": 0.7323578000068665, + "learning_rate": 2.9333988066567463e-05, + "loss": 2.4305, + "step": 15023 + }, + { + "epoch": 1.212492938423049, + "grad_norm": 0.6639837622642517, + "learning_rate": 2.9322818877793436e-05, + "loss": 2.4237, + "step": 15024 + }, + { + "epoch": 1.212573642159632, + "grad_norm": 0.669623076915741, + "learning_rate": 2.9311651450479516e-05, + "loss": 2.4436, + "step": 15025 + }, + { + "epoch": 1.212654345896215, + "grad_norm": 0.7200437784194946, + "learning_rate": 2.9300485784904054e-05, + "loss": 2.4399, + "step": 15026 + }, + { + "epoch": 1.212735049632798, + "grad_norm": 0.7015525102615356, + "learning_rate": 2.9289321881345254e-05, + "loss": 2.4696, + "step": 15027 + }, + { + "epoch": 1.212815753369381, + "grad_norm": 0.74539715051651, + "learning_rate": 2.9278159740081402e-05, + "loss": 2.4204, + "step": 15028 + }, + { + "epoch": 1.212896457105964, + "grad_norm": 0.6373662352561951, + "learning_rate": 2.9266999361390713e-05, + "loss": 2.4273, + "step": 15029 + }, + { + "epoch": 1.212977160842547, + "grad_norm": 0.8213370442390442, + "learning_rate": 2.9255840745551256e-05, + "loss": 2.4166, + "step": 15030 + }, + { + "epoch": 1.2130578645791301, + "grad_norm": 0.7386181354522705, + "learning_rate": 2.9244683892841185e-05, + "loss": 2.3973, + "step": 15031 + }, + { + "epoch": 1.213138568315713, + "grad_norm": 0.7939273118972778, + "learning_rate": 2.9233528803538534e-05, + "loss": 2.5593, + "step": 15032 + }, + { + "epoch": 1.213219272052296, + "grad_norm": 0.7580689191818237, + "learning_rate": 2.9222375477921347e-05, + "loss": 2.4255, + "step": 15033 + }, + { + "epoch": 1.213299975788879, + "grad_norm": 0.7680409550666809, + "learning_rate": 2.9211223916267573e-05, + "loss": 2.4447, + "step": 15034 + }, + { + "epoch": 1.213380679525462, + "grad_norm": 0.6998565196990967, + "learning_rate": 2.9200074118855135e-05, + "loss": 2.4061, + "step": 15035 + }, + { + "epoch": 1.2134613832620451, + "grad_norm": 0.6673001050949097, + "learning_rate": 2.9188926085961954e-05, + "loss": 2.3989, + "step": 15036 + }, + { + "epoch": 1.213542086998628, + "grad_norm": 0.683215320110321, + "learning_rate": 2.9177779817865815e-05, + "loss": 2.4078, + "step": 15037 + }, + { + "epoch": 1.213622790735211, + "grad_norm": 0.696967363357544, + "learning_rate": 2.9166635314844527e-05, + "loss": 2.4224, + "step": 15038 + }, + { + "epoch": 1.213703494471794, + "grad_norm": 0.6930364370346069, + "learning_rate": 2.915549257717588e-05, + "loss": 2.4112, + "step": 15039 + }, + { + "epoch": 1.213784198208377, + "grad_norm": 0.7387405633926392, + "learning_rate": 2.914435160513752e-05, + "loss": 2.4458, + "step": 15040 + }, + { + "epoch": 1.21386490194496, + "grad_norm": 0.6615941524505615, + "learning_rate": 2.913321239900714e-05, + "loss": 2.4406, + "step": 15041 + }, + { + "epoch": 1.213945605681543, + "grad_norm": 0.7520569562911987, + "learning_rate": 2.912207495906235e-05, + "loss": 2.3991, + "step": 15042 + }, + { + "epoch": 1.214026309418126, + "grad_norm": 0.6952454447746277, + "learning_rate": 2.911093928558072e-05, + "loss": 2.4404, + "step": 15043 + }, + { + "epoch": 1.2141070131547091, + "grad_norm": 0.7595344185829163, + "learning_rate": 2.9099805378839794e-05, + "loss": 2.551, + "step": 15044 + }, + { + "epoch": 1.214187716891292, + "grad_norm": 0.6645220518112183, + "learning_rate": 2.9088673239117094e-05, + "loss": 2.4167, + "step": 15045 + }, + { + "epoch": 1.214268420627875, + "grad_norm": 0.6433377861976624, + "learning_rate": 2.907754286668998e-05, + "loss": 2.3873, + "step": 15046 + }, + { + "epoch": 1.2143491243644582, + "grad_norm": 0.6806936860084534, + "learning_rate": 2.9066414261835894e-05, + "loss": 2.3868, + "step": 15047 + }, + { + "epoch": 1.214429828101041, + "grad_norm": 0.7261343598365784, + "learning_rate": 2.905528742483222e-05, + "loss": 2.4785, + "step": 15048 + }, + { + "epoch": 1.2145105318376241, + "grad_norm": 0.6495440602302551, + "learning_rate": 2.9044162355956196e-05, + "loss": 2.4167, + "step": 15049 + }, + { + "epoch": 1.214591235574207, + "grad_norm": 0.6816607117652893, + "learning_rate": 2.9033039055485135e-05, + "loss": 2.459, + "step": 15050 + }, + { + "epoch": 1.21467193931079, + "grad_norm": 0.6624214053153992, + "learning_rate": 2.902191752369624e-05, + "loss": 2.4498, + "step": 15051 + }, + { + "epoch": 1.2147526430473732, + "grad_norm": 0.6800024509429932, + "learning_rate": 2.9010797760866737e-05, + "loss": 2.4442, + "step": 15052 + }, + { + "epoch": 1.214833346783956, + "grad_norm": 0.711705207824707, + "learning_rate": 2.8999679767273667e-05, + "loss": 2.422, + "step": 15053 + }, + { + "epoch": 1.2149140505205391, + "grad_norm": 0.6854784488677979, + "learning_rate": 2.898856354319419e-05, + "loss": 2.4567, + "step": 15054 + }, + { + "epoch": 1.214994754257122, + "grad_norm": 0.6676114797592163, + "learning_rate": 2.8977449088905373e-05, + "loss": 2.3913, + "step": 15055 + }, + { + "epoch": 1.215075457993705, + "grad_norm": 0.6893348693847656, + "learning_rate": 2.8966336404684145e-05, + "loss": 2.4407, + "step": 15056 + }, + { + "epoch": 1.2151561617302882, + "grad_norm": 0.6749289035797119, + "learning_rate": 2.8955225490807514e-05, + "loss": 2.409, + "step": 15057 + }, + { + "epoch": 1.215236865466871, + "grad_norm": 0.6998956203460693, + "learning_rate": 2.8944116347552387e-05, + "loss": 2.4297, + "step": 15058 + }, + { + "epoch": 1.2153175692034541, + "grad_norm": 0.7040024399757385, + "learning_rate": 2.8933008975195596e-05, + "loss": 2.4262, + "step": 15059 + }, + { + "epoch": 1.2153982729400372, + "grad_norm": 0.6638362407684326, + "learning_rate": 2.8921903374014005e-05, + "loss": 2.4355, + "step": 15060 + }, + { + "epoch": 1.21547897667662, + "grad_norm": 0.6864547729492188, + "learning_rate": 2.8910799544284407e-05, + "loss": 2.4493, + "step": 15061 + }, + { + "epoch": 1.2155596804132032, + "grad_norm": 0.707383394241333, + "learning_rate": 2.8899697486283474e-05, + "loss": 2.4604, + "step": 15062 + }, + { + "epoch": 1.2156403841497863, + "grad_norm": 0.7121397852897644, + "learning_rate": 2.888859720028795e-05, + "loss": 2.4272, + "step": 15063 + }, + { + "epoch": 1.2157210878863691, + "grad_norm": 0.7600439786911011, + "learning_rate": 2.8877498686574455e-05, + "loss": 2.4499, + "step": 15064 + }, + { + "epoch": 1.2158017916229522, + "grad_norm": 0.6654962301254272, + "learning_rate": 2.886640194541962e-05, + "loss": 2.4632, + "step": 15065 + }, + { + "epoch": 1.215882495359535, + "grad_norm": 0.7138063311576843, + "learning_rate": 2.8855306977099994e-05, + "loss": 2.4321, + "step": 15066 + }, + { + "epoch": 1.2159631990961182, + "grad_norm": 0.672604501247406, + "learning_rate": 2.884421378189208e-05, + "loss": 2.4026, + "step": 15067 + }, + { + "epoch": 1.2160439028327013, + "grad_norm": 0.6894693970680237, + "learning_rate": 2.8833122360072405e-05, + "loss": 2.4213, + "step": 15068 + }, + { + "epoch": 1.2161246065692841, + "grad_norm": 0.6784985065460205, + "learning_rate": 2.8822032711917325e-05, + "loss": 2.4207, + "step": 15069 + }, + { + "epoch": 1.2162053103058672, + "grad_norm": 0.6569294929504395, + "learning_rate": 2.8810944837703248e-05, + "loss": 2.4142, + "step": 15070 + }, + { + "epoch": 1.21628601404245, + "grad_norm": 0.7240702509880066, + "learning_rate": 2.879985873770654e-05, + "loss": 2.4173, + "step": 15071 + }, + { + "epoch": 1.2163667177790332, + "grad_norm": 0.6935575604438782, + "learning_rate": 2.8788774412203444e-05, + "loss": 2.4487, + "step": 15072 + }, + { + "epoch": 1.2164474215156162, + "grad_norm": 0.6903246641159058, + "learning_rate": 2.8777691861470234e-05, + "loss": 2.4193, + "step": 15073 + }, + { + "epoch": 1.216528125252199, + "grad_norm": 0.7982182502746582, + "learning_rate": 2.8766611085783123e-05, + "loss": 2.492, + "step": 15074 + }, + { + "epoch": 1.2166088289887822, + "grad_norm": 0.6958058476448059, + "learning_rate": 2.875553208541827e-05, + "loss": 2.4198, + "step": 15075 + }, + { + "epoch": 1.2166895327253653, + "grad_norm": 0.6869969964027405, + "learning_rate": 2.8744454860651794e-05, + "loss": 2.3768, + "step": 15076 + }, + { + "epoch": 1.2167702364619482, + "grad_norm": 0.7263007760047913, + "learning_rate": 2.8733379411759796e-05, + "loss": 2.386, + "step": 15077 + }, + { + "epoch": 1.2168509401985312, + "grad_norm": 0.7010302543640137, + "learning_rate": 2.872230573901825e-05, + "loss": 2.4417, + "step": 15078 + }, + { + "epoch": 1.216931643935114, + "grad_norm": 0.818980872631073, + "learning_rate": 2.8711233842703156e-05, + "loss": 2.433, + "step": 15079 + }, + { + "epoch": 1.2170123476716972, + "grad_norm": 0.6937929391860962, + "learning_rate": 2.87001637230905e-05, + "loss": 2.379, + "step": 15080 + }, + { + "epoch": 1.2170930514082803, + "grad_norm": 0.6954175233840942, + "learning_rate": 2.868909538045612e-05, + "loss": 2.4296, + "step": 15081 + }, + { + "epoch": 1.2171737551448631, + "grad_norm": 0.7177354097366333, + "learning_rate": 2.8678028815075887e-05, + "loss": 2.3978, + "step": 15082 + }, + { + "epoch": 1.2172544588814462, + "grad_norm": 0.7100846171379089, + "learning_rate": 2.8666964027225607e-05, + "loss": 2.4566, + "step": 15083 + }, + { + "epoch": 1.217335162618029, + "grad_norm": 0.6909635066986084, + "learning_rate": 2.8655901017181064e-05, + "loss": 2.4772, + "step": 15084 + }, + { + "epoch": 1.2174158663546122, + "grad_norm": 0.7319501638412476, + "learning_rate": 2.8644839785217947e-05, + "loss": 2.4402, + "step": 15085 + }, + { + "epoch": 1.2174965700911953, + "grad_norm": 0.6691421270370483, + "learning_rate": 2.8633780331611958e-05, + "loss": 2.4465, + "step": 15086 + }, + { + "epoch": 1.2175772738277781, + "grad_norm": 0.7028824687004089, + "learning_rate": 2.8622722656638745e-05, + "loss": 2.4765, + "step": 15087 + }, + { + "epoch": 1.2176579775643612, + "grad_norm": 0.7428398728370667, + "learning_rate": 2.861166676057383e-05, + "loss": 2.441, + "step": 15088 + }, + { + "epoch": 1.2177386813009443, + "grad_norm": 0.6715269684791565, + "learning_rate": 2.8600612643692803e-05, + "loss": 2.4621, + "step": 15089 + }, + { + "epoch": 1.2178193850375272, + "grad_norm": 0.6768512725830078, + "learning_rate": 2.8589560306271168e-05, + "loss": 2.4257, + "step": 15090 + }, + { + "epoch": 1.2179000887741103, + "grad_norm": 0.7442535758018494, + "learning_rate": 2.8578509748584326e-05, + "loss": 2.424, + "step": 15091 + }, + { + "epoch": 1.2179807925106934, + "grad_norm": 0.7275974154472351, + "learning_rate": 2.8567460970907722e-05, + "loss": 2.4698, + "step": 15092 + }, + { + "epoch": 1.2180614962472762, + "grad_norm": 0.7050346732139587, + "learning_rate": 2.8556413973516727e-05, + "loss": 2.4734, + "step": 15093 + }, + { + "epoch": 1.2181421999838593, + "grad_norm": 0.7325939536094666, + "learning_rate": 2.854536875668664e-05, + "loss": 2.4166, + "step": 15094 + }, + { + "epoch": 1.2182229037204422, + "grad_norm": 0.6764184236526489, + "learning_rate": 2.8534325320692746e-05, + "loss": 2.4742, + "step": 15095 + }, + { + "epoch": 1.2183036074570253, + "grad_norm": 0.7405500411987305, + "learning_rate": 2.8523283665810318e-05, + "loss": 2.3959, + "step": 15096 + }, + { + "epoch": 1.2183843111936083, + "grad_norm": 0.6714199185371399, + "learning_rate": 2.8512243792314465e-05, + "loss": 2.4571, + "step": 15097 + }, + { + "epoch": 1.2184650149301912, + "grad_norm": 0.6779391169548035, + "learning_rate": 2.8501205700480372e-05, + "loss": 2.3745, + "step": 15098 + }, + { + "epoch": 1.2185457186667743, + "grad_norm": 0.6876079440116882, + "learning_rate": 2.8490169390583134e-05, + "loss": 2.4432, + "step": 15099 + }, + { + "epoch": 1.2186264224033572, + "grad_norm": 0.7092362642288208, + "learning_rate": 2.8479134862897826e-05, + "loss": 2.4716, + "step": 15100 + }, + { + "epoch": 1.2187071261399403, + "grad_norm": 0.6901989579200745, + "learning_rate": 2.8468102117699414e-05, + "loss": 2.417, + "step": 15101 + }, + { + "epoch": 1.2187878298765233, + "grad_norm": 0.7011592984199524, + "learning_rate": 2.8457071155262884e-05, + "loss": 2.4439, + "step": 15102 + }, + { + "epoch": 1.2188685336131062, + "grad_norm": 0.6923472285270691, + "learning_rate": 2.8446041975863146e-05, + "loss": 2.4247, + "step": 15103 + }, + { + "epoch": 1.2189492373496893, + "grad_norm": 0.6948748230934143, + "learning_rate": 2.843501457977509e-05, + "loss": 2.3902, + "step": 15104 + }, + { + "epoch": 1.2190299410862724, + "grad_norm": 0.7034386396408081, + "learning_rate": 2.842398896727354e-05, + "loss": 2.4277, + "step": 15105 + }, + { + "epoch": 1.2191106448228552, + "grad_norm": 0.7965617775917053, + "learning_rate": 2.8412965138633318e-05, + "loss": 2.435, + "step": 15106 + }, + { + "epoch": 1.2191913485594383, + "grad_norm": 0.7371121644973755, + "learning_rate": 2.8401943094129112e-05, + "loss": 2.3928, + "step": 15107 + }, + { + "epoch": 1.2192720522960214, + "grad_norm": 0.7079561352729797, + "learning_rate": 2.839092283403564e-05, + "loss": 2.4706, + "step": 15108 + }, + { + "epoch": 1.2193527560326043, + "grad_norm": 0.6711337566375732, + "learning_rate": 2.8379904358627584e-05, + "loss": 2.4272, + "step": 15109 + }, + { + "epoch": 1.2194334597691874, + "grad_norm": 0.6840410828590393, + "learning_rate": 2.836888766817951e-05, + "loss": 2.4174, + "step": 15110 + }, + { + "epoch": 1.2195141635057702, + "grad_norm": 0.700366199016571, + "learning_rate": 2.8357872762965986e-05, + "loss": 2.4667, + "step": 15111 + }, + { + "epoch": 1.2195948672423533, + "grad_norm": 0.7090682983398438, + "learning_rate": 2.8346859643261593e-05, + "loss": 2.3748, + "step": 15112 + }, + { + "epoch": 1.2196755709789364, + "grad_norm": 0.7965148687362671, + "learning_rate": 2.8335848309340717e-05, + "loss": 2.5138, + "step": 15113 + }, + { + "epoch": 1.2197562747155193, + "grad_norm": 0.7845773696899414, + "learning_rate": 2.8324838761477833e-05, + "loss": 2.4274, + "step": 15114 + }, + { + "epoch": 1.2198369784521024, + "grad_norm": 0.6545087099075317, + "learning_rate": 2.831383099994731e-05, + "loss": 2.4311, + "step": 15115 + }, + { + "epoch": 1.2199176821886852, + "grad_norm": 0.6846331357955933, + "learning_rate": 2.830282502502356e-05, + "loss": 2.4239, + "step": 15116 + }, + { + "epoch": 1.2199983859252683, + "grad_norm": 0.7062236070632935, + "learning_rate": 2.8291820836980798e-05, + "loss": 2.4429, + "step": 15117 + }, + { + "epoch": 1.2200790896618514, + "grad_norm": 0.7526285648345947, + "learning_rate": 2.8280818436093315e-05, + "loss": 2.4882, + "step": 15118 + }, + { + "epoch": 1.2201597933984343, + "grad_norm": 0.6853364109992981, + "learning_rate": 2.8269817822635337e-05, + "loss": 2.3803, + "step": 15119 + }, + { + "epoch": 1.2202404971350174, + "grad_norm": 0.7796143293380737, + "learning_rate": 2.8258818996880964e-05, + "loss": 2.4157, + "step": 15120 + }, + { + "epoch": 1.2203212008716005, + "grad_norm": 0.7202157378196716, + "learning_rate": 2.824782195910437e-05, + "loss": 2.5101, + "step": 15121 + }, + { + "epoch": 1.2204019046081833, + "grad_norm": 0.6730707287788391, + "learning_rate": 2.8236826709579644e-05, + "loss": 2.4397, + "step": 15122 + }, + { + "epoch": 1.2204826083447664, + "grad_norm": 0.7840865850448608, + "learning_rate": 2.8225833248580745e-05, + "loss": 2.4452, + "step": 15123 + }, + { + "epoch": 1.2205633120813493, + "grad_norm": 0.8323497772216797, + "learning_rate": 2.821484157638171e-05, + "loss": 2.4775, + "step": 15124 + }, + { + "epoch": 1.2206440158179324, + "grad_norm": 0.6699438691139221, + "learning_rate": 2.8203851693256466e-05, + "loss": 2.3958, + "step": 15125 + }, + { + "epoch": 1.2207247195545154, + "grad_norm": 0.6711557507514954, + "learning_rate": 2.8192863599478923e-05, + "loss": 2.477, + "step": 15126 + }, + { + "epoch": 1.2208054232910983, + "grad_norm": 0.6255797743797302, + "learning_rate": 2.8181877295322922e-05, + "loss": 2.4222, + "step": 15127 + }, + { + "epoch": 1.2208861270276814, + "grad_norm": 0.7313731908798218, + "learning_rate": 2.8170892781062297e-05, + "loss": 2.4343, + "step": 15128 + }, + { + "epoch": 1.2209668307642643, + "grad_norm": 0.6611476540565491, + "learning_rate": 2.815991005697076e-05, + "loss": 2.3844, + "step": 15129 + }, + { + "epoch": 1.2210475345008474, + "grad_norm": 0.7293661236763, + "learning_rate": 2.8148929123322065e-05, + "loss": 2.3912, + "step": 15130 + }, + { + "epoch": 1.2211282382374304, + "grad_norm": 0.7150777578353882, + "learning_rate": 2.8137949980389866e-05, + "loss": 2.4227, + "step": 15131 + }, + { + "epoch": 1.2212089419740133, + "grad_norm": 0.7001000642776489, + "learning_rate": 2.8126972628447845e-05, + "loss": 2.4751, + "step": 15132 + }, + { + "epoch": 1.2212896457105964, + "grad_norm": 0.7106043100357056, + "learning_rate": 2.8115997067769505e-05, + "loss": 2.4127, + "step": 15133 + }, + { + "epoch": 1.2213703494471795, + "grad_norm": 0.6969115138053894, + "learning_rate": 2.810502329862842e-05, + "loss": 2.4073, + "step": 15134 + }, + { + "epoch": 1.2214510531837623, + "grad_norm": 0.7493317127227783, + "learning_rate": 2.8094051321298098e-05, + "loss": 2.4541, + "step": 15135 + }, + { + "epoch": 1.2215317569203454, + "grad_norm": 0.6499322652816772, + "learning_rate": 2.808308113605198e-05, + "loss": 2.4057, + "step": 15136 + }, + { + "epoch": 1.2216124606569285, + "grad_norm": 0.6716788411140442, + "learning_rate": 2.807211274316347e-05, + "loss": 2.3856, + "step": 15137 + }, + { + "epoch": 1.2216931643935114, + "grad_norm": 0.7724741101264954, + "learning_rate": 2.8061146142905958e-05, + "loss": 2.4652, + "step": 15138 + }, + { + "epoch": 1.2217738681300945, + "grad_norm": 0.7014325261116028, + "learning_rate": 2.8050181335552718e-05, + "loss": 2.4506, + "step": 15139 + }, + { + "epoch": 1.2218545718666773, + "grad_norm": 0.6705317497253418, + "learning_rate": 2.8039218321377026e-05, + "loss": 2.4581, + "step": 15140 + }, + { + "epoch": 1.2219352756032604, + "grad_norm": 0.709973931312561, + "learning_rate": 2.8028257100652156e-05, + "loss": 2.427, + "step": 15141 + }, + { + "epoch": 1.2220159793398435, + "grad_norm": 0.7021297812461853, + "learning_rate": 2.801729767365122e-05, + "loss": 2.3784, + "step": 15142 + }, + { + "epoch": 1.2220966830764264, + "grad_norm": 0.7431899905204773, + "learning_rate": 2.8006340040647393e-05, + "loss": 2.4135, + "step": 15143 + }, + { + "epoch": 1.2221773868130095, + "grad_norm": 0.6724472045898438, + "learning_rate": 2.7995384201913765e-05, + "loss": 2.3966, + "step": 15144 + }, + { + "epoch": 1.2222580905495923, + "grad_norm": 0.7381375432014465, + "learning_rate": 2.7984430157723384e-05, + "loss": 2.4853, + "step": 15145 + }, + { + "epoch": 1.2223387942861754, + "grad_norm": 0.6809988617897034, + "learning_rate": 2.7973477908349255e-05, + "loss": 2.408, + "step": 15146 + }, + { + "epoch": 1.2224194980227585, + "grad_norm": 0.7042898535728455, + "learning_rate": 2.7962527454064337e-05, + "loss": 2.3981, + "step": 15147 + }, + { + "epoch": 1.2225002017593414, + "grad_norm": 0.7096118330955505, + "learning_rate": 2.7951578795141576e-05, + "loss": 2.4175, + "step": 15148 + }, + { + "epoch": 1.2225809054959245, + "grad_norm": 0.7271720767021179, + "learning_rate": 2.794063193185378e-05, + "loss": 2.4193, + "step": 15149 + }, + { + "epoch": 1.2226616092325076, + "grad_norm": 0.7000352740287781, + "learning_rate": 2.7929686864473792e-05, + "loss": 2.422, + "step": 15150 + }, + { + "epoch": 1.2227423129690904, + "grad_norm": 0.6983076333999634, + "learning_rate": 2.791874359327443e-05, + "loss": 2.4613, + "step": 15151 + }, + { + "epoch": 1.2228230167056735, + "grad_norm": 0.7520100474357605, + "learning_rate": 2.7907802118528383e-05, + "loss": 2.4147, + "step": 15152 + }, + { + "epoch": 1.2229037204422566, + "grad_norm": 0.7056650519371033, + "learning_rate": 2.789686244050834e-05, + "loss": 2.4568, + "step": 15153 + }, + { + "epoch": 1.2229844241788395, + "grad_norm": 0.7092614769935608, + "learning_rate": 2.7885924559486975e-05, + "loss": 2.4758, + "step": 15154 + }, + { + "epoch": 1.2230651279154225, + "grad_norm": 0.702521562576294, + "learning_rate": 2.7874988475736885e-05, + "loss": 2.4893, + "step": 15155 + }, + { + "epoch": 1.2231458316520054, + "grad_norm": 0.7454921007156372, + "learning_rate": 2.786405418953061e-05, + "loss": 2.4277, + "step": 15156 + }, + { + "epoch": 1.2232265353885885, + "grad_norm": 0.659503161907196, + "learning_rate": 2.7853121701140694e-05, + "loss": 2.4664, + "step": 15157 + }, + { + "epoch": 1.2233072391251716, + "grad_norm": 0.6368914842605591, + "learning_rate": 2.7842191010839556e-05, + "loss": 2.3728, + "step": 15158 + }, + { + "epoch": 1.2233879428617545, + "grad_norm": 0.7076737880706787, + "learning_rate": 2.783126211889965e-05, + "loss": 2.4204, + "step": 15159 + }, + { + "epoch": 1.2234686465983375, + "grad_norm": 0.718100905418396, + "learning_rate": 2.7820335025593325e-05, + "loss": 2.478, + "step": 15160 + }, + { + "epoch": 1.2235493503349204, + "grad_norm": 0.6804678440093994, + "learning_rate": 2.7809409731192972e-05, + "loss": 2.3755, + "step": 15161 + }, + { + "epoch": 1.2236300540715035, + "grad_norm": 0.7068643569946289, + "learning_rate": 2.77984862359708e-05, + "loss": 2.3713, + "step": 15162 + }, + { + "epoch": 1.2237107578080866, + "grad_norm": 0.7047072052955627, + "learning_rate": 2.7787564540199097e-05, + "loss": 2.4264, + "step": 15163 + }, + { + "epoch": 1.2237914615446694, + "grad_norm": 0.6985021829605103, + "learning_rate": 2.7776644644150076e-05, + "loss": 2.4101, + "step": 15164 + }, + { + "epoch": 1.2238721652812525, + "grad_norm": 0.7543687224388123, + "learning_rate": 2.776572654809583e-05, + "loss": 2.3722, + "step": 15165 + }, + { + "epoch": 1.2239528690178356, + "grad_norm": 0.7199926972389221, + "learning_rate": 2.7754810252308473e-05, + "loss": 2.3819, + "step": 15166 + }, + { + "epoch": 1.2240335727544185, + "grad_norm": 0.696756899356842, + "learning_rate": 2.7743895757060156e-05, + "loss": 2.4245, + "step": 15167 + }, + { + "epoch": 1.2241142764910016, + "grad_norm": 0.7848933339118958, + "learning_rate": 2.773298306262281e-05, + "loss": 2.4725, + "step": 15168 + }, + { + "epoch": 1.2241949802275847, + "grad_norm": 0.6819389462471008, + "learning_rate": 2.7722072169268432e-05, + "loss": 2.4338, + "step": 15169 + }, + { + "epoch": 1.2242756839641675, + "grad_norm": 0.7185801267623901, + "learning_rate": 2.7711163077268977e-05, + "loss": 2.4745, + "step": 15170 + }, + { + "epoch": 1.2243563877007506, + "grad_norm": 0.7645030617713928, + "learning_rate": 2.7700255786896278e-05, + "loss": 2.4677, + "step": 15171 + }, + { + "epoch": 1.2244370914373335, + "grad_norm": 0.6559275388717651, + "learning_rate": 2.7689350298422202e-05, + "loss": 2.386, + "step": 15172 + }, + { + "epoch": 1.2245177951739166, + "grad_norm": 0.6965066194534302, + "learning_rate": 2.767844661211856e-05, + "loss": 2.4022, + "step": 15173 + }, + { + "epoch": 1.2245984989104994, + "grad_norm": 0.6618858575820923, + "learning_rate": 2.7667544728257057e-05, + "loss": 2.3541, + "step": 15174 + }, + { + "epoch": 1.2246792026470825, + "grad_norm": 0.6635501980781555, + "learning_rate": 2.765664464710941e-05, + "loss": 2.3984, + "step": 15175 + }, + { + "epoch": 1.2247599063836656, + "grad_norm": 0.6987191438674927, + "learning_rate": 2.764574636894729e-05, + "loss": 2.4637, + "step": 15176 + }, + { + "epoch": 1.2248406101202485, + "grad_norm": 0.7289232611656189, + "learning_rate": 2.7634849894042303e-05, + "loss": 2.4033, + "step": 15177 + }, + { + "epoch": 1.2249213138568316, + "grad_norm": 0.7245565056800842, + "learning_rate": 2.762395522266602e-05, + "loss": 2.4281, + "step": 15178 + }, + { + "epoch": 1.2250020175934146, + "grad_norm": 0.6946065425872803, + "learning_rate": 2.761306235508997e-05, + "loss": 2.3869, + "step": 15179 + }, + { + "epoch": 1.2250827213299975, + "grad_norm": 0.6381784677505493, + "learning_rate": 2.7602171291585666e-05, + "loss": 2.404, + "step": 15180 + }, + { + "epoch": 1.2251634250665806, + "grad_norm": 0.6893685460090637, + "learning_rate": 2.759128203242446e-05, + "loss": 2.4807, + "step": 15181 + }, + { + "epoch": 1.2252441288031637, + "grad_norm": 0.6640260815620422, + "learning_rate": 2.7580394577877787e-05, + "loss": 2.4036, + "step": 15182 + }, + { + "epoch": 1.2253248325397466, + "grad_norm": 0.7125177979469299, + "learning_rate": 2.7569508928217026e-05, + "loss": 2.3869, + "step": 15183 + }, + { + "epoch": 1.2254055362763296, + "grad_norm": 0.657865583896637, + "learning_rate": 2.7558625083713397e-05, + "loss": 2.3869, + "step": 15184 + }, + { + "epoch": 1.2254862400129125, + "grad_norm": 0.6776065230369568, + "learning_rate": 2.7547743044638197e-05, + "loss": 2.4128, + "step": 15185 + }, + { + "epoch": 1.2255669437494956, + "grad_norm": 0.7126299738883972, + "learning_rate": 2.753686281126263e-05, + "loss": 2.4465, + "step": 15186 + }, + { + "epoch": 1.2256476474860787, + "grad_norm": 0.6918273568153381, + "learning_rate": 2.7525984383857873e-05, + "loss": 2.428, + "step": 15187 + }, + { + "epoch": 1.2257283512226615, + "grad_norm": 0.7742759585380554, + "learning_rate": 2.7515107762695025e-05, + "loss": 2.4299, + "step": 15188 + }, + { + "epoch": 1.2258090549592446, + "grad_norm": 0.7194607853889465, + "learning_rate": 2.7504232948045205e-05, + "loss": 2.4315, + "step": 15189 + }, + { + "epoch": 1.2258897586958275, + "grad_norm": 0.6962646245956421, + "learning_rate": 2.7493359940179363e-05, + "loss": 2.4494, + "step": 15190 + }, + { + "epoch": 1.2259704624324106, + "grad_norm": 0.6681686639785767, + "learning_rate": 2.7482488739368538e-05, + "loss": 2.427, + "step": 15191 + }, + { + "epoch": 1.2260511661689937, + "grad_norm": 0.6589877009391785, + "learning_rate": 2.747161934588366e-05, + "loss": 2.4333, + "step": 15192 + }, + { + "epoch": 1.2261318699055765, + "grad_norm": 0.7415218949317932, + "learning_rate": 2.746075175999564e-05, + "loss": 2.4203, + "step": 15193 + }, + { + "epoch": 1.2262125736421596, + "grad_norm": 0.7371910214424133, + "learning_rate": 2.7449885981975276e-05, + "loss": 2.4684, + "step": 15194 + }, + { + "epoch": 1.2262932773787427, + "grad_norm": 0.7010802626609802, + "learning_rate": 2.7439022012093407e-05, + "loss": 2.4625, + "step": 15195 + }, + { + "epoch": 1.2263739811153256, + "grad_norm": 0.7125125527381897, + "learning_rate": 2.7428159850620773e-05, + "loss": 2.4075, + "step": 15196 + }, + { + "epoch": 1.2264546848519087, + "grad_norm": 0.701133668422699, + "learning_rate": 2.7417299497828107e-05, + "loss": 2.4525, + "step": 15197 + }, + { + "epoch": 1.2265353885884918, + "grad_norm": 0.7543410658836365, + "learning_rate": 2.7406440953986078e-05, + "loss": 2.474, + "step": 15198 + }, + { + "epoch": 1.2266160923250746, + "grad_norm": 0.69012051820755, + "learning_rate": 2.7395584219365323e-05, + "loss": 2.4853, + "step": 15199 + }, + { + "epoch": 1.2266967960616577, + "grad_norm": 0.6559048295021057, + "learning_rate": 2.7384729294236378e-05, + "loss": 2.4252, + "step": 15200 + }, + { + "epoch": 1.2267774997982406, + "grad_norm": 0.6603518128395081, + "learning_rate": 2.7373876178869794e-05, + "loss": 2.4047, + "step": 15201 + }, + { + "epoch": 1.2268582035348237, + "grad_norm": 0.7159265279769897, + "learning_rate": 2.736302487353609e-05, + "loss": 2.4352, + "step": 15202 + }, + { + "epoch": 1.2269389072714068, + "grad_norm": 0.6784560084342957, + "learning_rate": 2.735217537850565e-05, + "loss": 2.3933, + "step": 15203 + }, + { + "epoch": 1.2270196110079896, + "grad_norm": 0.7341950535774231, + "learning_rate": 2.7341327694048903e-05, + "loss": 2.4514, + "step": 15204 + }, + { + "epoch": 1.2271003147445727, + "grad_norm": 0.726046621799469, + "learning_rate": 2.7330481820436204e-05, + "loss": 2.4427, + "step": 15205 + }, + { + "epoch": 1.2271810184811556, + "grad_norm": 0.6897192001342773, + "learning_rate": 2.7319637757937854e-05, + "loss": 2.4587, + "step": 15206 + }, + { + "epoch": 1.2272617222177387, + "grad_norm": 0.6981058716773987, + "learning_rate": 2.7308795506824124e-05, + "loss": 2.4297, + "step": 15207 + }, + { + "epoch": 1.2273424259543217, + "grad_norm": 0.694583535194397, + "learning_rate": 2.729795506736522e-05, + "loss": 2.3608, + "step": 15208 + }, + { + "epoch": 1.2274231296909046, + "grad_norm": 0.710192084312439, + "learning_rate": 2.728711643983136e-05, + "loss": 2.3733, + "step": 15209 + }, + { + "epoch": 1.2275038334274877, + "grad_norm": 0.7203633785247803, + "learning_rate": 2.7276279624492595e-05, + "loss": 2.389, + "step": 15210 + }, + { + "epoch": 1.2275845371640708, + "grad_norm": 0.7298668622970581, + "learning_rate": 2.726544462161905e-05, + "loss": 2.3981, + "step": 15211 + }, + { + "epoch": 1.2276652409006537, + "grad_norm": 0.6640039682388306, + "learning_rate": 2.725461143148078e-05, + "loss": 2.4073, + "step": 15212 + }, + { + "epoch": 1.2277459446372367, + "grad_norm": 0.7203015685081482, + "learning_rate": 2.724378005434772e-05, + "loss": 2.4901, + "step": 15213 + }, + { + "epoch": 1.2278266483738198, + "grad_norm": 0.6668895483016968, + "learning_rate": 2.723295049048985e-05, + "loss": 2.4482, + "step": 15214 + }, + { + "epoch": 1.2279073521104027, + "grad_norm": 0.7551584839820862, + "learning_rate": 2.7222122740177103e-05, + "loss": 2.4877, + "step": 15215 + }, + { + "epoch": 1.2279880558469858, + "grad_norm": 0.707202672958374, + "learning_rate": 2.721129680367923e-05, + "loss": 2.4577, + "step": 15216 + }, + { + "epoch": 1.2280687595835686, + "grad_norm": 0.685153603553772, + "learning_rate": 2.7200472681266155e-05, + "loss": 2.476, + "step": 15217 + }, + { + "epoch": 1.2281494633201517, + "grad_norm": 0.6843041181564331, + "learning_rate": 2.718965037320762e-05, + "loss": 2.4164, + "step": 15218 + }, + { + "epoch": 1.2282301670567348, + "grad_norm": 0.6548978686332703, + "learning_rate": 2.7178829879773306e-05, + "loss": 2.4187, + "step": 15219 + }, + { + "epoch": 1.2283108707933177, + "grad_norm": 0.7037245035171509, + "learning_rate": 2.7168011201232902e-05, + "loss": 2.3621, + "step": 15220 + }, + { + "epoch": 1.2283915745299008, + "grad_norm": 0.6540676951408386, + "learning_rate": 2.7157194337856074e-05, + "loss": 2.4542, + "step": 15221 + }, + { + "epoch": 1.2284722782664836, + "grad_norm": 0.7699899673461914, + "learning_rate": 2.7146379289912338e-05, + "loss": 2.4639, + "step": 15222 + }, + { + "epoch": 1.2285529820030667, + "grad_norm": 0.7178743481636047, + "learning_rate": 2.713556605767128e-05, + "loss": 2.4222, + "step": 15223 + }, + { + "epoch": 1.2286336857396498, + "grad_norm": 0.6749793887138367, + "learning_rate": 2.7124754641402383e-05, + "loss": 2.4323, + "step": 15224 + }, + { + "epoch": 1.2287143894762327, + "grad_norm": 0.7035594582557678, + "learning_rate": 2.711394504137513e-05, + "loss": 2.4466, + "step": 15225 + }, + { + "epoch": 1.2287950932128158, + "grad_norm": 0.6518487930297852, + "learning_rate": 2.7103137257858868e-05, + "loss": 2.4969, + "step": 15226 + }, + { + "epoch": 1.2288757969493989, + "grad_norm": 0.6739057898521423, + "learning_rate": 2.7092331291122974e-05, + "loss": 2.406, + "step": 15227 + }, + { + "epoch": 1.2289565006859817, + "grad_norm": 0.6584770083427429, + "learning_rate": 2.7081527141436767e-05, + "loss": 2.4304, + "step": 15228 + }, + { + "epoch": 1.2290372044225648, + "grad_norm": 0.6846301555633545, + "learning_rate": 2.7070724809069514e-05, + "loss": 2.3995, + "step": 15229 + }, + { + "epoch": 1.2291179081591477, + "grad_norm": 0.6778364777565002, + "learning_rate": 2.705992429429044e-05, + "loss": 2.38, + "step": 15230 + }, + { + "epoch": 1.2291986118957308, + "grad_norm": 0.6957302689552307, + "learning_rate": 2.7049125597368753e-05, + "loss": 2.3973, + "step": 15231 + }, + { + "epoch": 1.2292793156323139, + "grad_norm": 0.730269193649292, + "learning_rate": 2.7038328718573514e-05, + "loss": 2.4829, + "step": 15232 + }, + { + "epoch": 1.2293600193688967, + "grad_norm": 0.7114049196243286, + "learning_rate": 2.702753365817384e-05, + "loss": 2.3902, + "step": 15233 + }, + { + "epoch": 1.2294407231054798, + "grad_norm": 0.7137531638145447, + "learning_rate": 2.7016740416438823e-05, + "loss": 2.3957, + "step": 15234 + }, + { + "epoch": 1.2295214268420627, + "grad_norm": 0.7178330421447754, + "learning_rate": 2.7005948993637386e-05, + "loss": 2.4429, + "step": 15235 + }, + { + "epoch": 1.2296021305786458, + "grad_norm": 0.6767767071723938, + "learning_rate": 2.6995159390038506e-05, + "loss": 2.4009, + "step": 15236 + }, + { + "epoch": 1.2296828343152288, + "grad_norm": 0.7713541984558105, + "learning_rate": 2.6984371605911086e-05, + "loss": 2.4326, + "step": 15237 + }, + { + "epoch": 1.2297635380518117, + "grad_norm": 0.7218228578567505, + "learning_rate": 2.6973585641523992e-05, + "loss": 2.4358, + "step": 15238 + }, + { + "epoch": 1.2298442417883948, + "grad_norm": 0.6782575249671936, + "learning_rate": 2.696280149714604e-05, + "loss": 2.3844, + "step": 15239 + }, + { + "epoch": 1.2299249455249779, + "grad_norm": 0.6825734972953796, + "learning_rate": 2.6952019173045982e-05, + "loss": 2.4621, + "step": 15240 + }, + { + "epoch": 1.2300056492615608, + "grad_norm": 0.6587522625923157, + "learning_rate": 2.6941238669492608e-05, + "loss": 2.4465, + "step": 15241 + }, + { + "epoch": 1.2300863529981438, + "grad_norm": 0.6898796558380127, + "learning_rate": 2.6930459986754498e-05, + "loss": 2.4469, + "step": 15242 + }, + { + "epoch": 1.230167056734727, + "grad_norm": 0.6764062643051147, + "learning_rate": 2.6919683125100338e-05, + "loss": 2.4476, + "step": 15243 + }, + { + "epoch": 1.2302477604713098, + "grad_norm": 0.6647047400474548, + "learning_rate": 2.6908908084798733e-05, + "loss": 2.3677, + "step": 15244 + }, + { + "epoch": 1.2303284642078929, + "grad_norm": 0.7091608047485352, + "learning_rate": 2.6898134866118174e-05, + "loss": 2.4605, + "step": 15245 + }, + { + "epoch": 1.2304091679444757, + "grad_norm": 0.691007137298584, + "learning_rate": 2.6887363469327188e-05, + "loss": 2.4397, + "step": 15246 + }, + { + "epoch": 1.2304898716810588, + "grad_norm": 0.6685532927513123, + "learning_rate": 2.6876593894694214e-05, + "loss": 2.4279, + "step": 15247 + }, + { + "epoch": 1.230570575417642, + "grad_norm": 0.684474766254425, + "learning_rate": 2.686582614248767e-05, + "loss": 2.4162, + "step": 15248 + }, + { + "epoch": 1.2306512791542248, + "grad_norm": 0.657293975353241, + "learning_rate": 2.6855060212975915e-05, + "loss": 2.4337, + "step": 15249 + }, + { + "epoch": 1.2307319828908079, + "grad_norm": 0.7136504650115967, + "learning_rate": 2.684429610642729e-05, + "loss": 2.4156, + "step": 15250 + }, + { + "epoch": 1.2308126866273907, + "grad_norm": 0.6564410924911499, + "learning_rate": 2.6833533823110013e-05, + "loss": 2.5101, + "step": 15251 + }, + { + "epoch": 1.2308933903639738, + "grad_norm": 0.6628747582435608, + "learning_rate": 2.682277336329233e-05, + "loss": 2.3933, + "step": 15252 + }, + { + "epoch": 1.230974094100557, + "grad_norm": 0.7362595796585083, + "learning_rate": 2.681201472724244e-05, + "loss": 2.4541, + "step": 15253 + }, + { + "epoch": 1.2310547978371398, + "grad_norm": 0.7604697346687317, + "learning_rate": 2.680125791522844e-05, + "loss": 2.4383, + "step": 15254 + }, + { + "epoch": 1.2311355015737229, + "grad_norm": 0.7128429412841797, + "learning_rate": 2.6790502927518434e-05, + "loss": 2.4492, + "step": 15255 + }, + { + "epoch": 1.231216205310306, + "grad_norm": 0.6761955618858337, + "learning_rate": 2.677974976438047e-05, + "loss": 2.4355, + "step": 15256 + }, + { + "epoch": 1.2312969090468888, + "grad_norm": 0.6687077879905701, + "learning_rate": 2.6768998426082538e-05, + "loss": 2.4317, + "step": 15257 + }, + { + "epoch": 1.231377612783472, + "grad_norm": 0.7423825860023499, + "learning_rate": 2.675824891289259e-05, + "loss": 2.4216, + "step": 15258 + }, + { + "epoch": 1.231458316520055, + "grad_norm": 0.671130359172821, + "learning_rate": 2.6747501225078542e-05, + "loss": 2.4775, + "step": 15259 + }, + { + "epoch": 1.2315390202566379, + "grad_norm": 0.7421461939811707, + "learning_rate": 2.6736755362908273e-05, + "loss": 2.4042, + "step": 15260 + }, + { + "epoch": 1.231619723993221, + "grad_norm": 0.7084131240844727, + "learning_rate": 2.6726011326649547e-05, + "loss": 2.4506, + "step": 15261 + }, + { + "epoch": 1.2317004277298038, + "grad_norm": 0.641852855682373, + "learning_rate": 2.671526911657015e-05, + "loss": 2.4261, + "step": 15262 + }, + { + "epoch": 1.231781131466387, + "grad_norm": 0.7627724409103394, + "learning_rate": 2.670452873293785e-05, + "loss": 2.4647, + "step": 15263 + }, + { + "epoch": 1.23186183520297, + "grad_norm": 0.6638163924217224, + "learning_rate": 2.669379017602026e-05, + "loss": 2.4208, + "step": 15264 + }, + { + "epoch": 1.2319425389395529, + "grad_norm": 0.6815361380577087, + "learning_rate": 2.668305344608505e-05, + "loss": 2.4404, + "step": 15265 + }, + { + "epoch": 1.232023242676136, + "grad_norm": 0.6466485857963562, + "learning_rate": 2.6672318543399823e-05, + "loss": 2.4327, + "step": 15266 + }, + { + "epoch": 1.2321039464127188, + "grad_norm": 0.7119305729866028, + "learning_rate": 2.6661585468232042e-05, + "loss": 2.4266, + "step": 15267 + }, + { + "epoch": 1.232184650149302, + "grad_norm": 0.7245718836784363, + "learning_rate": 2.6650854220849286e-05, + "loss": 2.4484, + "step": 15268 + }, + { + "epoch": 1.232265353885885, + "grad_norm": 0.7050287127494812, + "learning_rate": 2.6640124801518972e-05, + "loss": 2.4441, + "step": 15269 + }, + { + "epoch": 1.2323460576224678, + "grad_norm": 0.6906494498252869, + "learning_rate": 2.6629397210508556e-05, + "loss": 2.4297, + "step": 15270 + }, + { + "epoch": 1.232426761359051, + "grad_norm": 0.7224171757698059, + "learning_rate": 2.661867144808532e-05, + "loss": 2.4279, + "step": 15271 + }, + { + "epoch": 1.232507465095634, + "grad_norm": 0.688804030418396, + "learning_rate": 2.6607947514516606e-05, + "loss": 2.4741, + "step": 15272 + }, + { + "epoch": 1.232588168832217, + "grad_norm": 0.6462350487709045, + "learning_rate": 2.6597225410069726e-05, + "loss": 2.4499, + "step": 15273 + }, + { + "epoch": 1.2326688725688, + "grad_norm": 0.6860110759735107, + "learning_rate": 2.658650513501184e-05, + "loss": 2.4488, + "step": 15274 + }, + { + "epoch": 1.2327495763053828, + "grad_norm": 0.7158305644989014, + "learning_rate": 2.6575786689610138e-05, + "loss": 2.4318, + "step": 15275 + }, + { + "epoch": 1.232830280041966, + "grad_norm": 0.7740959525108337, + "learning_rate": 2.6565070074131804e-05, + "loss": 2.4824, + "step": 15276 + }, + { + "epoch": 1.232910983778549, + "grad_norm": 0.7573856711387634, + "learning_rate": 2.6554355288843847e-05, + "loss": 2.4034, + "step": 15277 + }, + { + "epoch": 1.2329916875151319, + "grad_norm": 0.6809369921684265, + "learning_rate": 2.654364233401332e-05, + "loss": 2.5085, + "step": 15278 + }, + { + "epoch": 1.233072391251715, + "grad_norm": 0.6695643067359924, + "learning_rate": 2.6532931209907307e-05, + "loss": 2.4697, + "step": 15279 + }, + { + "epoch": 1.2331530949882978, + "grad_norm": 0.7218750715255737, + "learning_rate": 2.6522221916792655e-05, + "loss": 2.4753, + "step": 15280 + }, + { + "epoch": 1.233233798724881, + "grad_norm": 0.8171822428703308, + "learning_rate": 2.6511514454936314e-05, + "loss": 2.45, + "step": 15281 + }, + { + "epoch": 1.233314502461464, + "grad_norm": 0.7234573364257812, + "learning_rate": 2.6500808824605162e-05, + "loss": 2.3963, + "step": 15282 + }, + { + "epoch": 1.2333952061980469, + "grad_norm": 0.6993409395217896, + "learning_rate": 2.6490105026065948e-05, + "loss": 2.4449, + "step": 15283 + }, + { + "epoch": 1.23347590993463, + "grad_norm": 0.7984449863433838, + "learning_rate": 2.6479403059585472e-05, + "loss": 2.4322, + "step": 15284 + }, + { + "epoch": 1.233556613671213, + "grad_norm": 0.683971107006073, + "learning_rate": 2.6468702925430466e-05, + "loss": 2.4125, + "step": 15285 + }, + { + "epoch": 1.233637317407796, + "grad_norm": 0.6739822626113892, + "learning_rate": 2.6458004623867617e-05, + "loss": 2.4487, + "step": 15286 + }, + { + "epoch": 1.233718021144379, + "grad_norm": 0.7003912925720215, + "learning_rate": 2.644730815516351e-05, + "loss": 2.4437, + "step": 15287 + }, + { + "epoch": 1.233798724880962, + "grad_norm": 0.7011744379997253, + "learning_rate": 2.643661351958474e-05, + "loss": 2.4798, + "step": 15288 + }, + { + "epoch": 1.233879428617545, + "grad_norm": 0.7003397941589355, + "learning_rate": 2.6425920717397867e-05, + "loss": 2.4554, + "step": 15289 + }, + { + "epoch": 1.233960132354128, + "grad_norm": 0.6682165265083313, + "learning_rate": 2.6415229748869374e-05, + "loss": 2.4252, + "step": 15290 + }, + { + "epoch": 1.234040836090711, + "grad_norm": 0.6712457537651062, + "learning_rate": 2.6404540614265715e-05, + "loss": 2.4225, + "step": 15291 + }, + { + "epoch": 1.234121539827294, + "grad_norm": 0.654464602470398, + "learning_rate": 2.63938533138533e-05, + "loss": 2.4462, + "step": 15292 + }, + { + "epoch": 1.234202243563877, + "grad_norm": 0.7311797738075256, + "learning_rate": 2.638316784789845e-05, + "loss": 2.502, + "step": 15293 + }, + { + "epoch": 1.23428294730046, + "grad_norm": 0.6836559176445007, + "learning_rate": 2.6372484216667492e-05, + "loss": 2.5134, + "step": 15294 + }, + { + "epoch": 1.234363651037043, + "grad_norm": 0.6961826086044312, + "learning_rate": 2.636180242042672e-05, + "loss": 2.4479, + "step": 15295 + }, + { + "epoch": 1.234444354773626, + "grad_norm": 0.6824259161949158, + "learning_rate": 2.635112245944229e-05, + "loss": 2.4299, + "step": 15296 + }, + { + "epoch": 1.234525058510209, + "grad_norm": 0.7594609260559082, + "learning_rate": 2.634044433398042e-05, + "loss": 2.4469, + "step": 15297 + }, + { + "epoch": 1.234605762246792, + "grad_norm": 0.7044653296470642, + "learning_rate": 2.632976804430721e-05, + "loss": 2.447, + "step": 15298 + }, + { + "epoch": 1.234686465983375, + "grad_norm": 0.6986916065216064, + "learning_rate": 2.631909359068876e-05, + "loss": 2.4705, + "step": 15299 + }, + { + "epoch": 1.234767169719958, + "grad_norm": 0.7025431990623474, + "learning_rate": 2.630842097339111e-05, + "loss": 2.3951, + "step": 15300 + }, + { + "epoch": 1.2348478734565411, + "grad_norm": 0.6533786058425903, + "learning_rate": 2.6297750192680237e-05, + "loss": 2.3769, + "step": 15301 + }, + { + "epoch": 1.234928577193124, + "grad_norm": 0.6575472354888916, + "learning_rate": 2.628708124882212e-05, + "loss": 2.4293, + "step": 15302 + }, + { + "epoch": 1.235009280929707, + "grad_norm": 0.6712046265602112, + "learning_rate": 2.6276414142082584e-05, + "loss": 2.4819, + "step": 15303 + }, + { + "epoch": 1.2350899846662902, + "grad_norm": 0.6947652101516724, + "learning_rate": 2.6265748872727535e-05, + "loss": 2.449, + "step": 15304 + }, + { + "epoch": 1.235170688402873, + "grad_norm": 0.6881443858146667, + "learning_rate": 2.62550854410228e-05, + "loss": 2.3991, + "step": 15305 + }, + { + "epoch": 1.2352513921394561, + "grad_norm": 0.6681519746780396, + "learning_rate": 2.624442384723407e-05, + "loss": 2.4005, + "step": 15306 + }, + { + "epoch": 1.235332095876039, + "grad_norm": 0.6728120446205139, + "learning_rate": 2.62337640916271e-05, + "loss": 2.4242, + "step": 15307 + }, + { + "epoch": 1.235412799612622, + "grad_norm": 0.707360029220581, + "learning_rate": 2.622310617446755e-05, + "loss": 2.4385, + "step": 15308 + }, + { + "epoch": 1.2354935033492052, + "grad_norm": 0.6890079975128174, + "learning_rate": 2.6212450096021058e-05, + "loss": 2.443, + "step": 15309 + }, + { + "epoch": 1.235574207085788, + "grad_norm": 0.7022379636764526, + "learning_rate": 2.620179585655318e-05, + "loss": 2.3982, + "step": 15310 + }, + { + "epoch": 1.235654910822371, + "grad_norm": 0.7283182740211487, + "learning_rate": 2.61911434563295e-05, + "loss": 2.4197, + "step": 15311 + }, + { + "epoch": 1.235735614558954, + "grad_norm": 0.6721852421760559, + "learning_rate": 2.6180492895615426e-05, + "loss": 2.4356, + "step": 15312 + }, + { + "epoch": 1.235816318295537, + "grad_norm": 0.6817916631698608, + "learning_rate": 2.616984417467645e-05, + "loss": 2.4325, + "step": 15313 + }, + { + "epoch": 1.2358970220321202, + "grad_norm": 0.6826596260070801, + "learning_rate": 2.6159197293777972e-05, + "loss": 2.4043, + "step": 15314 + }, + { + "epoch": 1.235977725768703, + "grad_norm": 0.7135530114173889, + "learning_rate": 2.6148552253185288e-05, + "loss": 2.4269, + "step": 15315 + }, + { + "epoch": 1.236058429505286, + "grad_norm": 0.7027753591537476, + "learning_rate": 2.6137909053163722e-05, + "loss": 2.4266, + "step": 15316 + }, + { + "epoch": 1.2361391332418692, + "grad_norm": 0.6597041487693787, + "learning_rate": 2.6127267693978552e-05, + "loss": 2.4073, + "step": 15317 + }, + { + "epoch": 1.236219836978452, + "grad_norm": 0.6450026631355286, + "learning_rate": 2.6116628175894974e-05, + "loss": 2.4299, + "step": 15318 + }, + { + "epoch": 1.2363005407150351, + "grad_norm": 0.7740476727485657, + "learning_rate": 2.6105990499178156e-05, + "loss": 2.4088, + "step": 15319 + }, + { + "epoch": 1.2363812444516182, + "grad_norm": 0.6460183262825012, + "learning_rate": 2.609535466409322e-05, + "loss": 2.4311, + "step": 15320 + }, + { + "epoch": 1.236461948188201, + "grad_norm": 0.6514838337898254, + "learning_rate": 2.608472067090525e-05, + "loss": 2.4069, + "step": 15321 + }, + { + "epoch": 1.2365426519247842, + "grad_norm": 0.7281234860420227, + "learning_rate": 2.6074088519879237e-05, + "loss": 2.4245, + "step": 15322 + }, + { + "epoch": 1.236623355661367, + "grad_norm": 0.752983570098877, + "learning_rate": 2.606345821128018e-05, + "loss": 2.4149, + "step": 15323 + }, + { + "epoch": 1.2367040593979501, + "grad_norm": 0.6912856101989746, + "learning_rate": 2.6052829745373054e-05, + "loss": 2.4489, + "step": 15324 + }, + { + "epoch": 1.236784763134533, + "grad_norm": 0.6719293594360352, + "learning_rate": 2.604220312242267e-05, + "loss": 2.457, + "step": 15325 + }, + { + "epoch": 1.236865466871116, + "grad_norm": 0.7440586090087891, + "learning_rate": 2.6031578342693918e-05, + "loss": 2.4657, + "step": 15326 + }, + { + "epoch": 1.2369461706076992, + "grad_norm": 0.694442629814148, + "learning_rate": 2.602095540645162e-05, + "loss": 2.4422, + "step": 15327 + }, + { + "epoch": 1.237026874344282, + "grad_norm": 0.7186843752861023, + "learning_rate": 2.601033431396046e-05, + "loss": 2.4229, + "step": 15328 + }, + { + "epoch": 1.2371075780808651, + "grad_norm": 0.7401825785636902, + "learning_rate": 2.5999715065485153e-05, + "loss": 2.45, + "step": 15329 + }, + { + "epoch": 1.2371882818174482, + "grad_norm": 0.6710138916969299, + "learning_rate": 2.598909766129045e-05, + "loss": 2.4074, + "step": 15330 + }, + { + "epoch": 1.237268985554031, + "grad_norm": 0.7867769598960876, + "learning_rate": 2.5978482101640867e-05, + "loss": 2.4709, + "step": 15331 + }, + { + "epoch": 1.2373496892906142, + "grad_norm": 0.7076219916343689, + "learning_rate": 2.5967868386801e-05, + "loss": 2.4887, + "step": 15332 + }, + { + "epoch": 1.2374303930271973, + "grad_norm": 0.7277626991271973, + "learning_rate": 2.5957256517035378e-05, + "loss": 2.4295, + "step": 15333 + }, + { + "epoch": 1.2375110967637801, + "grad_norm": 0.7339804768562317, + "learning_rate": 2.5946646492608506e-05, + "loss": 2.4624, + "step": 15334 + }, + { + "epoch": 1.2375918005003632, + "grad_norm": 0.6707656383514404, + "learning_rate": 2.593603831378475e-05, + "loss": 2.4159, + "step": 15335 + }, + { + "epoch": 1.237672504236946, + "grad_norm": 0.7118813991546631, + "learning_rate": 2.592543198082852e-05, + "loss": 2.4496, + "step": 15336 + }, + { + "epoch": 1.2377532079735292, + "grad_norm": 0.675167977809906, + "learning_rate": 2.591482749400419e-05, + "loss": 2.4519, + "step": 15337 + }, + { + "epoch": 1.2378339117101123, + "grad_norm": 0.8245306611061096, + "learning_rate": 2.5904224853575986e-05, + "loss": 2.4732, + "step": 15338 + }, + { + "epoch": 1.2379146154466951, + "grad_norm": 0.7411863207817078, + "learning_rate": 2.5893624059808184e-05, + "loss": 2.4458, + "step": 15339 + }, + { + "epoch": 1.2379953191832782, + "grad_norm": 0.6864522695541382, + "learning_rate": 2.5883025112964997e-05, + "loss": 2.4264, + "step": 15340 + }, + { + "epoch": 1.238076022919861, + "grad_norm": 0.6585919260978699, + "learning_rate": 2.5872428013310567e-05, + "loss": 2.3904, + "step": 15341 + }, + { + "epoch": 1.2381567266564442, + "grad_norm": 0.6605508327484131, + "learning_rate": 2.5861832761108995e-05, + "loss": 2.4828, + "step": 15342 + }, + { + "epoch": 1.2382374303930272, + "grad_norm": 0.7353223562240601, + "learning_rate": 2.5851239356624392e-05, + "loss": 2.4335, + "step": 15343 + }, + { + "epoch": 1.2383181341296101, + "grad_norm": 0.6907783150672913, + "learning_rate": 2.5840647800120688e-05, + "loss": 2.4394, + "step": 15344 + }, + { + "epoch": 1.2383988378661932, + "grad_norm": 0.7239590287208557, + "learning_rate": 2.5830058091861896e-05, + "loss": 2.4221, + "step": 15345 + }, + { + "epoch": 1.2384795416027763, + "grad_norm": 0.7001412510871887, + "learning_rate": 2.5819470232111975e-05, + "loss": 2.4521, + "step": 15346 + }, + { + "epoch": 1.2385602453393592, + "grad_norm": 0.6983658671379089, + "learning_rate": 2.580888422113473e-05, + "loss": 2.4839, + "step": 15347 + }, + { + "epoch": 1.2386409490759422, + "grad_norm": 0.7829005718231201, + "learning_rate": 2.5798300059194037e-05, + "loss": 2.4546, + "step": 15348 + }, + { + "epoch": 1.2387216528125253, + "grad_norm": 0.7248061299324036, + "learning_rate": 2.5787717746553664e-05, + "loss": 2.4341, + "step": 15349 + }, + { + "epoch": 1.2388023565491082, + "grad_norm": 0.7921163439750671, + "learning_rate": 2.577713728347736e-05, + "loss": 2.475, + "step": 15350 + }, + { + "epoch": 1.2388830602856913, + "grad_norm": 0.6571238040924072, + "learning_rate": 2.5766558670228813e-05, + "loss": 2.4636, + "step": 15351 + }, + { + "epoch": 1.2389637640222741, + "grad_norm": 0.7436683177947998, + "learning_rate": 2.575598190707168e-05, + "loss": 2.4868, + "step": 15352 + }, + { + "epoch": 1.2390444677588572, + "grad_norm": 0.6471900939941406, + "learning_rate": 2.5745406994269573e-05, + "loss": 2.4349, + "step": 15353 + }, + { + "epoch": 1.2391251714954403, + "grad_norm": 0.6612011194229126, + "learning_rate": 2.5734833932086012e-05, + "loss": 2.4088, + "step": 15354 + }, + { + "epoch": 1.2392058752320232, + "grad_norm": 0.6882977485656738, + "learning_rate": 2.572426272078451e-05, + "loss": 2.4344, + "step": 15355 + }, + { + "epoch": 1.2392865789686063, + "grad_norm": 0.6836830973625183, + "learning_rate": 2.5713693360628565e-05, + "loss": 2.4325, + "step": 15356 + }, + { + "epoch": 1.2393672827051891, + "grad_norm": 0.712127149105072, + "learning_rate": 2.5703125851881536e-05, + "loss": 2.4505, + "step": 15357 + }, + { + "epoch": 1.2394479864417722, + "grad_norm": 0.7162468433380127, + "learning_rate": 2.5692560194806837e-05, + "loss": 2.4167, + "step": 15358 + }, + { + "epoch": 1.2395286901783553, + "grad_norm": 0.7770177125930786, + "learning_rate": 2.568199638966777e-05, + "loss": 2.4072, + "step": 15359 + }, + { + "epoch": 1.2396093939149382, + "grad_norm": 0.7049651741981506, + "learning_rate": 2.5671434436727636e-05, + "loss": 2.434, + "step": 15360 + }, + { + "epoch": 1.2396900976515213, + "grad_norm": 0.7793349027633667, + "learning_rate": 2.566087433624964e-05, + "loss": 2.4762, + "step": 15361 + }, + { + "epoch": 1.2397708013881044, + "grad_norm": 0.6776690483093262, + "learning_rate": 2.5650316088497018e-05, + "loss": 2.402, + "step": 15362 + }, + { + "epoch": 1.2398515051246872, + "grad_norm": 0.7207701802253723, + "learning_rate": 2.5639759693732834e-05, + "loss": 2.4398, + "step": 15363 + }, + { + "epoch": 1.2399322088612703, + "grad_norm": 0.759787917137146, + "learning_rate": 2.5629205152220215e-05, + "loss": 2.4268, + "step": 15364 + }, + { + "epoch": 1.2400129125978534, + "grad_norm": 0.6906142830848694, + "learning_rate": 2.5618652464222215e-05, + "loss": 2.4075, + "step": 15365 + }, + { + "epoch": 1.2400936163344363, + "grad_norm": 0.7002954483032227, + "learning_rate": 2.560810163000187e-05, + "loss": 2.4516, + "step": 15366 + }, + { + "epoch": 1.2401743200710194, + "grad_norm": 0.7287559509277344, + "learning_rate": 2.5597552649822053e-05, + "loss": 2.4975, + "step": 15367 + }, + { + "epoch": 1.2402550238076022, + "grad_norm": 0.6523926854133606, + "learning_rate": 2.558700552394572e-05, + "loss": 2.4085, + "step": 15368 + }, + { + "epoch": 1.2403357275441853, + "grad_norm": 0.7289387583732605, + "learning_rate": 2.5576460252635727e-05, + "loss": 2.4789, + "step": 15369 + }, + { + "epoch": 1.2404164312807684, + "grad_norm": 0.6613432765007019, + "learning_rate": 2.5565916836154878e-05, + "loss": 2.4263, + "step": 15370 + }, + { + "epoch": 1.2404971350173513, + "grad_norm": 0.7275245785713196, + "learning_rate": 2.555537527476597e-05, + "loss": 2.4652, + "step": 15371 + }, + { + "epoch": 1.2405778387539343, + "grad_norm": 0.6726976037025452, + "learning_rate": 2.554483556873173e-05, + "loss": 2.4092, + "step": 15372 + }, + { + "epoch": 1.2406585424905172, + "grad_norm": 0.6908233761787415, + "learning_rate": 2.5534297718314794e-05, + "loss": 2.3678, + "step": 15373 + }, + { + "epoch": 1.2407392462271003, + "grad_norm": 0.6893147826194763, + "learning_rate": 2.5523761723777806e-05, + "loss": 2.4625, + "step": 15374 + }, + { + "epoch": 1.2408199499636834, + "grad_norm": 0.7640267014503479, + "learning_rate": 2.551322758538339e-05, + "loss": 2.446, + "step": 15375 + }, + { + "epoch": 1.2409006537002663, + "grad_norm": 0.7187458276748657, + "learning_rate": 2.550269530339402e-05, + "loss": 2.4215, + "step": 15376 + }, + { + "epoch": 1.2409813574368493, + "grad_norm": 0.8041789531707764, + "learning_rate": 2.5492164878072234e-05, + "loss": 2.5085, + "step": 15377 + }, + { + "epoch": 1.2410620611734324, + "grad_norm": 0.6582188010215759, + "learning_rate": 2.5481636309680445e-05, + "loss": 2.467, + "step": 15378 + }, + { + "epoch": 1.2411427649100153, + "grad_norm": 0.705731213092804, + "learning_rate": 2.5471109598481112e-05, + "loss": 2.3764, + "step": 15379 + }, + { + "epoch": 1.2412234686465984, + "grad_norm": 0.6918940544128418, + "learning_rate": 2.5460584744736495e-05, + "loss": 2.4513, + "step": 15380 + }, + { + "epoch": 1.2413041723831812, + "grad_norm": 0.7402673959732056, + "learning_rate": 2.5450061748708975e-05, + "loss": 2.5133, + "step": 15381 + }, + { + "epoch": 1.2413848761197643, + "grad_norm": 0.6740667223930359, + "learning_rate": 2.543954061066083e-05, + "loss": 2.4649, + "step": 15382 + }, + { + "epoch": 1.2414655798563474, + "grad_norm": 0.6665407419204712, + "learning_rate": 2.5429021330854197e-05, + "loss": 2.4321, + "step": 15383 + }, + { + "epoch": 1.2415462835929303, + "grad_norm": 0.7324530482292175, + "learning_rate": 2.5418503909551296e-05, + "loss": 2.3574, + "step": 15384 + }, + { + "epoch": 1.2416269873295134, + "grad_norm": 0.7117868661880493, + "learning_rate": 2.5407988347014255e-05, + "loss": 2.4552, + "step": 15385 + }, + { + "epoch": 1.2417076910660962, + "grad_norm": 0.7162930965423584, + "learning_rate": 2.5397474643505103e-05, + "loss": 2.4135, + "step": 15386 + }, + { + "epoch": 1.2417883948026793, + "grad_norm": 0.7301257848739624, + "learning_rate": 2.5386962799285895e-05, + "loss": 2.4277, + "step": 15387 + }, + { + "epoch": 1.2418690985392624, + "grad_norm": 0.7404977679252625, + "learning_rate": 2.5376452814618645e-05, + "loss": 2.478, + "step": 15388 + }, + { + "epoch": 1.2419498022758453, + "grad_norm": 0.6546272039413452, + "learning_rate": 2.536594468976522e-05, + "loss": 2.4879, + "step": 15389 + }, + { + "epoch": 1.2420305060124284, + "grad_norm": 0.6501599550247192, + "learning_rate": 2.5355438424987565e-05, + "loss": 2.3964, + "step": 15390 + }, + { + "epoch": 1.2421112097490115, + "grad_norm": 0.6711748242378235, + "learning_rate": 2.5344934020547496e-05, + "loss": 2.4123, + "step": 15391 + }, + { + "epoch": 1.2421919134855943, + "grad_norm": 0.6803534030914307, + "learning_rate": 2.5334431476706823e-05, + "loss": 2.4271, + "step": 15392 + }, + { + "epoch": 1.2422726172221774, + "grad_norm": 0.7407296299934387, + "learning_rate": 2.5323930793727302e-05, + "loss": 2.49, + "step": 15393 + }, + { + "epoch": 1.2423533209587605, + "grad_norm": 0.701870858669281, + "learning_rate": 2.5313431971870617e-05, + "loss": 2.4534, + "step": 15394 + }, + { + "epoch": 1.2424340246953434, + "grad_norm": 0.6658090353012085, + "learning_rate": 2.5302935011398475e-05, + "loss": 2.4581, + "step": 15395 + }, + { + "epoch": 1.2425147284319265, + "grad_norm": 0.6616473197937012, + "learning_rate": 2.529243991257243e-05, + "loss": 2.4169, + "step": 15396 + }, + { + "epoch": 1.2425954321685093, + "grad_norm": 0.6714773178100586, + "learning_rate": 2.5281946675654067e-05, + "loss": 2.4159, + "step": 15397 + }, + { + "epoch": 1.2426761359050924, + "grad_norm": 0.6789337396621704, + "learning_rate": 2.5271455300904935e-05, + "loss": 2.4211, + "step": 15398 + }, + { + "epoch": 1.2427568396416755, + "grad_norm": 0.6793739795684814, + "learning_rate": 2.5260965788586456e-05, + "loss": 2.4337, + "step": 15399 + }, + { + "epoch": 1.2428375433782584, + "grad_norm": 0.6432294249534607, + "learning_rate": 2.5250478138960076e-05, + "loss": 2.4268, + "step": 15400 + }, + { + "epoch": 1.2429182471148414, + "grad_norm": 0.6960669159889221, + "learning_rate": 2.523999235228718e-05, + "loss": 2.3535, + "step": 15401 + }, + { + "epoch": 1.2429989508514243, + "grad_norm": 0.6724488735198975, + "learning_rate": 2.5229508428829096e-05, + "loss": 2.4294, + "step": 15402 + }, + { + "epoch": 1.2430796545880074, + "grad_norm": 0.636105477809906, + "learning_rate": 2.521902636884711e-05, + "loss": 2.4438, + "step": 15403 + }, + { + "epoch": 1.2431603583245905, + "grad_norm": 0.6865580677986145, + "learning_rate": 2.52085461726025e-05, + "loss": 2.4473, + "step": 15404 + }, + { + "epoch": 1.2432410620611734, + "grad_norm": 0.6740261316299438, + "learning_rate": 2.5198067840356398e-05, + "loss": 2.4642, + "step": 15405 + }, + { + "epoch": 1.2433217657977564, + "grad_norm": 0.7241789698600769, + "learning_rate": 2.518759137236998e-05, + "loss": 2.4294, + "step": 15406 + }, + { + "epoch": 1.2434024695343395, + "grad_norm": 0.6839794516563416, + "learning_rate": 2.5177116768904373e-05, + "loss": 2.4697, + "step": 15407 + }, + { + "epoch": 1.2434831732709224, + "grad_norm": 0.677390992641449, + "learning_rate": 2.5166644030220578e-05, + "loss": 2.4411, + "step": 15408 + }, + { + "epoch": 1.2435638770075055, + "grad_norm": 0.709065854549408, + "learning_rate": 2.515617315657962e-05, + "loss": 2.4392, + "step": 15409 + }, + { + "epoch": 1.2436445807440886, + "grad_norm": 0.6735498905181885, + "learning_rate": 2.514570414824249e-05, + "loss": 2.3924, + "step": 15410 + }, + { + "epoch": 1.2437252844806714, + "grad_norm": 0.6729374527931213, + "learning_rate": 2.513523700547007e-05, + "loss": 2.4464, + "step": 15411 + }, + { + "epoch": 1.2438059882172545, + "grad_norm": 0.7232720851898193, + "learning_rate": 2.5124771728523244e-05, + "loss": 2.3975, + "step": 15412 + }, + { + "epoch": 1.2438866919538374, + "grad_norm": 0.7467584609985352, + "learning_rate": 2.5114308317662837e-05, + "loss": 2.4191, + "step": 15413 + }, + { + "epoch": 1.2439673956904205, + "grad_norm": 0.6951141953468323, + "learning_rate": 2.5103846773149642e-05, + "loss": 2.4207, + "step": 15414 + }, + { + "epoch": 1.2440480994270036, + "grad_norm": 0.6427489519119263, + "learning_rate": 2.5093387095244336e-05, + "loss": 2.3539, + "step": 15415 + }, + { + "epoch": 1.2441288031635864, + "grad_norm": 0.729580283164978, + "learning_rate": 2.5082929284207644e-05, + "loss": 2.4464, + "step": 15416 + }, + { + "epoch": 1.2442095069001695, + "grad_norm": 0.7247009873390198, + "learning_rate": 2.5072473340300207e-05, + "loss": 2.4294, + "step": 15417 + }, + { + "epoch": 1.2442902106367524, + "grad_norm": 0.7037674784660339, + "learning_rate": 2.5062019263782577e-05, + "loss": 2.4294, + "step": 15418 + }, + { + "epoch": 1.2443709143733355, + "grad_norm": 0.6997841596603394, + "learning_rate": 2.5051567054915303e-05, + "loss": 2.4976, + "step": 15419 + }, + { + "epoch": 1.2444516181099186, + "grad_norm": 0.7001172304153442, + "learning_rate": 2.504111671395891e-05, + "loss": 2.371, + "step": 15420 + }, + { + "epoch": 1.2445323218465014, + "grad_norm": 0.6781473159790039, + "learning_rate": 2.5030668241173827e-05, + "loss": 2.4124, + "step": 15421 + }, + { + "epoch": 1.2446130255830845, + "grad_norm": 0.7053182125091553, + "learning_rate": 2.5020221636820463e-05, + "loss": 2.4109, + "step": 15422 + }, + { + "epoch": 1.2446937293196676, + "grad_norm": 0.68635493516922, + "learning_rate": 2.50097769011592e-05, + "loss": 2.4548, + "step": 15423 + }, + { + "epoch": 1.2447744330562505, + "grad_norm": 0.7015564441680908, + "learning_rate": 2.4999334034450293e-05, + "loss": 2.4537, + "step": 15424 + }, + { + "epoch": 1.2448551367928335, + "grad_norm": 0.694054901599884, + "learning_rate": 2.4988893036954043e-05, + "loss": 2.4396, + "step": 15425 + }, + { + "epoch": 1.2449358405294164, + "grad_norm": 0.702518880367279, + "learning_rate": 2.4978453908930665e-05, + "loss": 2.4015, + "step": 15426 + }, + { + "epoch": 1.2450165442659995, + "grad_norm": 0.7237387895584106, + "learning_rate": 2.4968016650640348e-05, + "loss": 2.4257, + "step": 15427 + }, + { + "epoch": 1.2450972480025826, + "grad_norm": 0.7133163809776306, + "learning_rate": 2.4957581262343154e-05, + "loss": 2.4532, + "step": 15428 + }, + { + "epoch": 1.2451779517391655, + "grad_norm": 0.8339287042617798, + "learning_rate": 2.4947147744299203e-05, + "loss": 2.4621, + "step": 15429 + }, + { + "epoch": 1.2452586554757485, + "grad_norm": 0.7620034217834473, + "learning_rate": 2.493671609676852e-05, + "loss": 2.365, + "step": 15430 + }, + { + "epoch": 1.2453393592123314, + "grad_norm": 0.7445465922355652, + "learning_rate": 2.4926286320011094e-05, + "loss": 2.4764, + "step": 15431 + }, + { + "epoch": 1.2454200629489145, + "grad_norm": 0.7366160154342651, + "learning_rate": 2.4915858414286852e-05, + "loss": 2.4597, + "step": 15432 + }, + { + "epoch": 1.2455007666854976, + "grad_norm": 0.7098437547683716, + "learning_rate": 2.490543237985572e-05, + "loss": 2.4202, + "step": 15433 + }, + { + "epoch": 1.2455814704220805, + "grad_norm": 0.6483333706855774, + "learning_rate": 2.4895008216977478e-05, + "loss": 2.4108, + "step": 15434 + }, + { + "epoch": 1.2456621741586635, + "grad_norm": 0.6797904968261719, + "learning_rate": 2.4884585925911963e-05, + "loss": 2.4414, + "step": 15435 + }, + { + "epoch": 1.2457428778952466, + "grad_norm": 0.6853424310684204, + "learning_rate": 2.4874165506918957e-05, + "loss": 2.4226, + "step": 15436 + }, + { + "epoch": 1.2458235816318295, + "grad_norm": 0.6861590147018433, + "learning_rate": 2.4863746960258094e-05, + "loss": 2.3748, + "step": 15437 + }, + { + "epoch": 1.2459042853684126, + "grad_norm": 0.7360263466835022, + "learning_rate": 2.4853330286189058e-05, + "loss": 2.4441, + "step": 15438 + }, + { + "epoch": 1.2459849891049957, + "grad_norm": 0.6894183158874512, + "learning_rate": 2.4842915484971496e-05, + "loss": 2.3495, + "step": 15439 + }, + { + "epoch": 1.2460656928415785, + "grad_norm": 0.7570669651031494, + "learning_rate": 2.4832502556864923e-05, + "loss": 2.4622, + "step": 15440 + }, + { + "epoch": 1.2461463965781616, + "grad_norm": 0.6986069083213806, + "learning_rate": 2.4822091502128876e-05, + "loss": 2.3647, + "step": 15441 + }, + { + "epoch": 1.2462271003147445, + "grad_norm": 0.681450366973877, + "learning_rate": 2.481168232102279e-05, + "loss": 2.3872, + "step": 15442 + }, + { + "epoch": 1.2463078040513276, + "grad_norm": 0.7241837978363037, + "learning_rate": 2.480127501380618e-05, + "loss": 2.4692, + "step": 15443 + }, + { + "epoch": 1.2463885077879107, + "grad_norm": 0.6575295329093933, + "learning_rate": 2.479086958073834e-05, + "loss": 2.5057, + "step": 15444 + }, + { + "epoch": 1.2464692115244935, + "grad_norm": 0.7289770841598511, + "learning_rate": 2.478046602207864e-05, + "loss": 2.4164, + "step": 15445 + }, + { + "epoch": 1.2465499152610766, + "grad_norm": 0.6682024598121643, + "learning_rate": 2.4770064338086374e-05, + "loss": 2.4466, + "step": 15446 + }, + { + "epoch": 1.2466306189976595, + "grad_norm": 0.7238918542861938, + "learning_rate": 2.475966452902072e-05, + "loss": 2.4367, + "step": 15447 + }, + { + "epoch": 1.2467113227342426, + "grad_norm": 0.6825705170631409, + "learning_rate": 2.4749266595140918e-05, + "loss": 2.4337, + "step": 15448 + }, + { + "epoch": 1.2467920264708257, + "grad_norm": 0.7352269887924194, + "learning_rate": 2.4738870536706126e-05, + "loss": 2.4103, + "step": 15449 + }, + { + "epoch": 1.2468727302074085, + "grad_norm": 0.658930778503418, + "learning_rate": 2.4728476353975394e-05, + "loss": 2.4281, + "step": 15450 + }, + { + "epoch": 1.2469534339439916, + "grad_norm": 0.6933601498603821, + "learning_rate": 2.4718084047207778e-05, + "loss": 2.4502, + "step": 15451 + }, + { + "epoch": 1.2470341376805747, + "grad_norm": 0.6901879906654358, + "learning_rate": 2.4707693616662308e-05, + "loss": 2.4057, + "step": 15452 + }, + { + "epoch": 1.2471148414171576, + "grad_norm": 0.7648913860321045, + "learning_rate": 2.469730506259792e-05, + "loss": 2.4163, + "step": 15453 + }, + { + "epoch": 1.2471955451537406, + "grad_norm": 0.6496175527572632, + "learning_rate": 2.4686918385273537e-05, + "loss": 2.4373, + "step": 15454 + }, + { + "epoch": 1.2472762488903237, + "grad_norm": 0.6949105858802795, + "learning_rate": 2.4676533584948048e-05, + "loss": 2.4108, + "step": 15455 + }, + { + "epoch": 1.2473569526269066, + "grad_norm": 0.7018688321113586, + "learning_rate": 2.4666150661880206e-05, + "loss": 2.4589, + "step": 15456 + }, + { + "epoch": 1.2474376563634897, + "grad_norm": 0.7141219973564148, + "learning_rate": 2.4655769616328827e-05, + "loss": 2.4022, + "step": 15457 + }, + { + "epoch": 1.2475183601000726, + "grad_norm": 0.7276743054389954, + "learning_rate": 2.4645390448552608e-05, + "loss": 2.4443, + "step": 15458 + }, + { + "epoch": 1.2475990638366556, + "grad_norm": 0.6861153244972229, + "learning_rate": 2.463501315881027e-05, + "loss": 2.4478, + "step": 15459 + }, + { + "epoch": 1.2476797675732387, + "grad_norm": 0.7252256274223328, + "learning_rate": 2.462463774736038e-05, + "loss": 2.446, + "step": 15460 + }, + { + "epoch": 1.2477604713098216, + "grad_norm": 0.6914857625961304, + "learning_rate": 2.4614264214461557e-05, + "loss": 2.4294, + "step": 15461 + }, + { + "epoch": 1.2478411750464047, + "grad_norm": 0.6815036535263062, + "learning_rate": 2.460389256037232e-05, + "loss": 2.4389, + "step": 15462 + }, + { + "epoch": 1.2479218787829875, + "grad_norm": 0.7420194745063782, + "learning_rate": 2.4593522785351176e-05, + "loss": 2.4932, + "step": 15463 + }, + { + "epoch": 1.2480025825195706, + "grad_norm": 0.6622182130813599, + "learning_rate": 2.4583154889656556e-05, + "loss": 2.4327, + "step": 15464 + }, + { + "epoch": 1.2480832862561537, + "grad_norm": 0.6527934074401855, + "learning_rate": 2.457278887354689e-05, + "loss": 2.3857, + "step": 15465 + }, + { + "epoch": 1.2481639899927366, + "grad_norm": 0.6942344903945923, + "learning_rate": 2.4562424737280465e-05, + "loss": 2.4181, + "step": 15466 + }, + { + "epoch": 1.2482446937293197, + "grad_norm": 0.7449823021888733, + "learning_rate": 2.45520624811156e-05, + "loss": 2.4575, + "step": 15467 + }, + { + "epoch": 1.2483253974659028, + "grad_norm": 0.6905208826065063, + "learning_rate": 2.4541702105310605e-05, + "loss": 2.3858, + "step": 15468 + }, + { + "epoch": 1.2484061012024856, + "grad_norm": 0.6928502917289734, + "learning_rate": 2.4531343610123603e-05, + "loss": 2.4212, + "step": 15469 + }, + { + "epoch": 1.2484868049390687, + "grad_norm": 0.7182145118713379, + "learning_rate": 2.45209869958128e-05, + "loss": 2.4063, + "step": 15470 + }, + { + "epoch": 1.2485675086756518, + "grad_norm": 0.7379452586174011, + "learning_rate": 2.4510632262636314e-05, + "loss": 2.4612, + "step": 15471 + }, + { + "epoch": 1.2486482124122347, + "grad_norm": 0.6663349270820618, + "learning_rate": 2.450027941085219e-05, + "loss": 2.4583, + "step": 15472 + }, + { + "epoch": 1.2487289161488178, + "grad_norm": 0.7266560792922974, + "learning_rate": 2.4489928440718467e-05, + "loss": 2.4483, + "step": 15473 + }, + { + "epoch": 1.2488096198854006, + "grad_norm": 0.7046550512313843, + "learning_rate": 2.447957935249311e-05, + "loss": 2.4087, + "step": 15474 + }, + { + "epoch": 1.2488903236219837, + "grad_norm": 0.684248685836792, + "learning_rate": 2.4469232146434084e-05, + "loss": 2.4352, + "step": 15475 + }, + { + "epoch": 1.2489710273585668, + "grad_norm": 0.6864973902702332, + "learning_rate": 2.4458886822799198e-05, + "loss": 2.3872, + "step": 15476 + }, + { + "epoch": 1.2490517310951497, + "grad_norm": 0.6964752674102783, + "learning_rate": 2.444854338184631e-05, + "loss": 2.437, + "step": 15477 + }, + { + "epoch": 1.2491324348317328, + "grad_norm": 0.6755973100662231, + "learning_rate": 2.4438201823833252e-05, + "loss": 2.4302, + "step": 15478 + }, + { + "epoch": 1.2492131385683156, + "grad_norm": 0.6434857249259949, + "learning_rate": 2.44278621490177e-05, + "loss": 2.406, + "step": 15479 + }, + { + "epoch": 1.2492938423048987, + "grad_norm": 0.7342328429222107, + "learning_rate": 2.441752435765736e-05, + "loss": 2.451, + "step": 15480 + }, + { + "epoch": 1.2493745460414818, + "grad_norm": 0.7486860752105713, + "learning_rate": 2.44071884500099e-05, + "loss": 2.4536, + "step": 15481 + }, + { + "epoch": 1.2494552497780647, + "grad_norm": 0.7274537086486816, + "learning_rate": 2.4396854426332903e-05, + "loss": 2.4599, + "step": 15482 + }, + { + "epoch": 1.2495359535146477, + "grad_norm": 0.7580124735832214, + "learning_rate": 2.4386522286883918e-05, + "loss": 2.4038, + "step": 15483 + }, + { + "epoch": 1.2496166572512308, + "grad_norm": 0.6776975393295288, + "learning_rate": 2.4376192031920488e-05, + "loss": 2.4246, + "step": 15484 + }, + { + "epoch": 1.2496973609878137, + "grad_norm": 0.6899511814117432, + "learning_rate": 2.4365863661699996e-05, + "loss": 2.3922, + "step": 15485 + }, + { + "epoch": 1.2497780647243968, + "grad_norm": 0.7487930059432983, + "learning_rate": 2.4355537176479903e-05, + "loss": 2.4573, + "step": 15486 + }, + { + "epoch": 1.2498587684609797, + "grad_norm": 0.7306599617004395, + "learning_rate": 2.4345212576517575e-05, + "loss": 2.4745, + "step": 15487 + }, + { + "epoch": 1.2499394721975627, + "grad_norm": 0.7152543067932129, + "learning_rate": 2.43348898620703e-05, + "loss": 2.4768, + "step": 15488 + }, + { + "epoch": 1.2500201759341458, + "grad_norm": 0.6576277017593384, + "learning_rate": 2.432456903339535e-05, + "loss": 2.4289, + "step": 15489 + }, + { + "epoch": 1.2501008796707287, + "grad_norm": 0.6974572539329529, + "learning_rate": 2.4314250090749956e-05, + "loss": 2.4218, + "step": 15490 + }, + { + "epoch": 1.2501815834073118, + "grad_norm": 0.7869577407836914, + "learning_rate": 2.4303933034391323e-05, + "loss": 2.3899, + "step": 15491 + }, + { + "epoch": 1.2502622871438946, + "grad_norm": 0.6723129749298096, + "learning_rate": 2.42936178645765e-05, + "loss": 2.4238, + "step": 15492 + }, + { + "epoch": 1.2503429908804777, + "grad_norm": 0.6839526891708374, + "learning_rate": 2.428330458156265e-05, + "loss": 2.4037, + "step": 15493 + }, + { + "epoch": 1.2504236946170608, + "grad_norm": 0.6866093277931213, + "learning_rate": 2.4272993185606796e-05, + "loss": 2.4228, + "step": 15494 + }, + { + "epoch": 1.2505043983536437, + "grad_norm": 0.6992947459220886, + "learning_rate": 2.426268367696588e-05, + "loss": 2.4248, + "step": 15495 + }, + { + "epoch": 1.2505851020902268, + "grad_norm": 0.6836698651313782, + "learning_rate": 2.4252376055896862e-05, + "loss": 2.5387, + "step": 15496 + }, + { + "epoch": 1.2506658058268099, + "grad_norm": 0.6990752816200256, + "learning_rate": 2.4242070322656663e-05, + "loss": 2.4438, + "step": 15497 + }, + { + "epoch": 1.2507465095633927, + "grad_norm": 0.7143029570579529, + "learning_rate": 2.4231766477502082e-05, + "loss": 2.4, + "step": 15498 + }, + { + "epoch": 1.2508272132999758, + "grad_norm": 0.6585043668746948, + "learning_rate": 2.422146452068994e-05, + "loss": 2.4256, + "step": 15499 + }, + { + "epoch": 1.250907917036559, + "grad_norm": 0.739107072353363, + "learning_rate": 2.421116445247702e-05, + "loss": 2.428, + "step": 15500 + }, + { + "epoch": 1.2509886207731418, + "grad_norm": 0.6675287485122681, + "learning_rate": 2.420086627311997e-05, + "loss": 2.5095, + "step": 15501 + }, + { + "epoch": 1.2510693245097249, + "grad_norm": 0.7133405804634094, + "learning_rate": 2.4190569982875467e-05, + "loss": 2.4719, + "step": 15502 + }, + { + "epoch": 1.2511500282463077, + "grad_norm": 0.710904061794281, + "learning_rate": 2.4180275582000134e-05, + "loss": 2.4449, + "step": 15503 + }, + { + "epoch": 1.2512307319828908, + "grad_norm": 0.7088729739189148, + "learning_rate": 2.4169983070750525e-05, + "loss": 2.4059, + "step": 15504 + }, + { + "epoch": 1.2513114357194737, + "grad_norm": 0.7187358736991882, + "learning_rate": 2.4159692449383152e-05, + "loss": 2.4577, + "step": 15505 + }, + { + "epoch": 1.2513921394560568, + "grad_norm": 0.7531955242156982, + "learning_rate": 2.4149403718154497e-05, + "loss": 2.4101, + "step": 15506 + }, + { + "epoch": 1.2514728431926398, + "grad_norm": 0.7565199136734009, + "learning_rate": 2.413911687732101e-05, + "loss": 2.4805, + "step": 15507 + }, + { + "epoch": 1.2515535469292227, + "grad_norm": 0.706471860408783, + "learning_rate": 2.4128831927139008e-05, + "loss": 2.4494, + "step": 15508 + }, + { + "epoch": 1.2516342506658058, + "grad_norm": 0.7022314667701721, + "learning_rate": 2.4118548867864832e-05, + "loss": 2.4442, + "step": 15509 + }, + { + "epoch": 1.251714954402389, + "grad_norm": 0.6885591745376587, + "learning_rate": 2.4108267699754806e-05, + "loss": 2.4186, + "step": 15510 + }, + { + "epoch": 1.2517956581389718, + "grad_norm": 0.6963610649108887, + "learning_rate": 2.409798842306511e-05, + "loss": 2.4209, + "step": 15511 + }, + { + "epoch": 1.2518763618755548, + "grad_norm": 0.7117185592651367, + "learning_rate": 2.4087711038051942e-05, + "loss": 2.4106, + "step": 15512 + }, + { + "epoch": 1.251957065612138, + "grad_norm": 0.6944519281387329, + "learning_rate": 2.407743554497146e-05, + "loss": 2.4493, + "step": 15513 + }, + { + "epoch": 1.2520377693487208, + "grad_norm": 0.689818263053894, + "learning_rate": 2.406716194407974e-05, + "loss": 2.4358, + "step": 15514 + }, + { + "epoch": 1.2521184730853039, + "grad_norm": 0.8132768273353577, + "learning_rate": 2.4056890235632846e-05, + "loss": 2.4574, + "step": 15515 + }, + { + "epoch": 1.252199176821887, + "grad_norm": 0.6855002045631409, + "learning_rate": 2.4046620419886777e-05, + "loss": 2.4118, + "step": 15516 + }, + { + "epoch": 1.2522798805584698, + "grad_norm": 0.6616373658180237, + "learning_rate": 2.4036352497097458e-05, + "loss": 2.4332, + "step": 15517 + }, + { + "epoch": 1.252360584295053, + "grad_norm": 0.6657225489616394, + "learning_rate": 2.4026086467520803e-05, + "loss": 2.3989, + "step": 15518 + }, + { + "epoch": 1.2524412880316358, + "grad_norm": 0.6796447038650513, + "learning_rate": 2.4015822331412664e-05, + "loss": 2.4269, + "step": 15519 + }, + { + "epoch": 1.2525219917682189, + "grad_norm": 0.7168079614639282, + "learning_rate": 2.400556008902889e-05, + "loss": 2.4263, + "step": 15520 + }, + { + "epoch": 1.2526026955048017, + "grad_norm": 0.6985058188438416, + "learning_rate": 2.3995299740625186e-05, + "loss": 2.437, + "step": 15521 + }, + { + "epoch": 1.2526833992413848, + "grad_norm": 0.7078086137771606, + "learning_rate": 2.3985041286457287e-05, + "loss": 2.3996, + "step": 15522 + }, + { + "epoch": 1.252764102977968, + "grad_norm": 0.6989054083824158, + "learning_rate": 2.3974784726780865e-05, + "loss": 2.4717, + "step": 15523 + }, + { + "epoch": 1.2528448067145508, + "grad_norm": 0.747606098651886, + "learning_rate": 2.396453006185153e-05, + "loss": 2.4228, + "step": 15524 + }, + { + "epoch": 1.2529255104511339, + "grad_norm": 0.7500887513160706, + "learning_rate": 2.3954277291924876e-05, + "loss": 2.4636, + "step": 15525 + }, + { + "epoch": 1.253006214187717, + "grad_norm": 0.7710712552070618, + "learning_rate": 2.3944026417256437e-05, + "loss": 2.4405, + "step": 15526 + }, + { + "epoch": 1.2530869179242998, + "grad_norm": 0.7278285622596741, + "learning_rate": 2.3933777438101657e-05, + "loss": 2.4279, + "step": 15527 + }, + { + "epoch": 1.253167621660883, + "grad_norm": 0.6979010701179504, + "learning_rate": 2.3923530354715973e-05, + "loss": 2.4272, + "step": 15528 + }, + { + "epoch": 1.253248325397466, + "grad_norm": 0.7330336570739746, + "learning_rate": 2.3913285167354804e-05, + "loss": 2.3861, + "step": 15529 + }, + { + "epoch": 1.2533290291340489, + "grad_norm": 0.675499677658081, + "learning_rate": 2.3903041876273436e-05, + "loss": 2.3987, + "step": 15530 + }, + { + "epoch": 1.253409732870632, + "grad_norm": 0.6854682564735413, + "learning_rate": 2.3892800481727186e-05, + "loss": 2.4085, + "step": 15531 + }, + { + "epoch": 1.253490436607215, + "grad_norm": 0.713810384273529, + "learning_rate": 2.388256098397129e-05, + "loss": 2.3897, + "step": 15532 + }, + { + "epoch": 1.253571140343798, + "grad_norm": 0.683214545249939, + "learning_rate": 2.3872323383260953e-05, + "loss": 2.4526, + "step": 15533 + }, + { + "epoch": 1.253651844080381, + "grad_norm": 0.6718357801437378, + "learning_rate": 2.3862087679851318e-05, + "loss": 2.4612, + "step": 15534 + }, + { + "epoch": 1.2537325478169639, + "grad_norm": 0.722283124923706, + "learning_rate": 2.3851853873997488e-05, + "loss": 2.4163, + "step": 15535 + }, + { + "epoch": 1.253813251553547, + "grad_norm": 0.689393162727356, + "learning_rate": 2.384162196595453e-05, + "loss": 2.3984, + "step": 15536 + }, + { + "epoch": 1.2538939552901298, + "grad_norm": 0.7146410346031189, + "learning_rate": 2.3831391955977412e-05, + "loss": 2.4442, + "step": 15537 + }, + { + "epoch": 1.253974659026713, + "grad_norm": 0.6651021838188171, + "learning_rate": 2.3821163844321104e-05, + "loss": 2.4064, + "step": 15538 + }, + { + "epoch": 1.254055362763296, + "grad_norm": 0.7088985443115234, + "learning_rate": 2.381093763124056e-05, + "loss": 2.4831, + "step": 15539 + }, + { + "epoch": 1.2541360664998789, + "grad_norm": 0.661375105381012, + "learning_rate": 2.3800713316990588e-05, + "loss": 2.3657, + "step": 15540 + }, + { + "epoch": 1.254216770236462, + "grad_norm": 0.6870979070663452, + "learning_rate": 2.3790490901826012e-05, + "loss": 2.4208, + "step": 15541 + }, + { + "epoch": 1.254297473973045, + "grad_norm": 0.6256219148635864, + "learning_rate": 2.3780270386001657e-05, + "loss": 2.4182, + "step": 15542 + }, + { + "epoch": 1.254378177709628, + "grad_norm": 0.7070638537406921, + "learning_rate": 2.377005176977215e-05, + "loss": 2.3758, + "step": 15543 + }, + { + "epoch": 1.254458881446211, + "grad_norm": 0.6571370363235474, + "learning_rate": 2.3759835053392242e-05, + "loss": 2.3927, + "step": 15544 + }, + { + "epoch": 1.254539585182794, + "grad_norm": 0.644263744354248, + "learning_rate": 2.3749620237116565e-05, + "loss": 2.3992, + "step": 15545 + }, + { + "epoch": 1.254620288919377, + "grad_norm": 0.7127394676208496, + "learning_rate": 2.3739407321199648e-05, + "loss": 2.3942, + "step": 15546 + }, + { + "epoch": 1.25470099265596, + "grad_norm": 0.7274866104125977, + "learning_rate": 2.372919630589605e-05, + "loss": 2.5232, + "step": 15547 + }, + { + "epoch": 1.2547816963925431, + "grad_norm": 0.690138041973114, + "learning_rate": 2.3718987191460274e-05, + "loss": 2.4371, + "step": 15548 + }, + { + "epoch": 1.254862400129126, + "grad_norm": 0.6990681886672974, + "learning_rate": 2.3708779978146724e-05, + "loss": 2.4568, + "step": 15549 + }, + { + "epoch": 1.254943103865709, + "grad_norm": 0.7430790662765503, + "learning_rate": 2.3698574666209793e-05, + "loss": 2.423, + "step": 15550 + }, + { + "epoch": 1.255023807602292, + "grad_norm": 0.6991416215896606, + "learning_rate": 2.3688371255903828e-05, + "loss": 2.4529, + "step": 15551 + }, + { + "epoch": 1.255104511338875, + "grad_norm": 0.6733322739601135, + "learning_rate": 2.367816974748317e-05, + "loss": 2.4531, + "step": 15552 + }, + { + "epoch": 1.2551852150754579, + "grad_norm": 0.7460463047027588, + "learning_rate": 2.3667970141202e-05, + "loss": 2.4267, + "step": 15553 + }, + { + "epoch": 1.255265918812041, + "grad_norm": 0.6784021854400635, + "learning_rate": 2.3657772437314517e-05, + "loss": 2.4996, + "step": 15554 + }, + { + "epoch": 1.255346622548624, + "grad_norm": 0.7499529719352722, + "learning_rate": 2.3647576636074975e-05, + "loss": 2.4749, + "step": 15555 + }, + { + "epoch": 1.255427326285207, + "grad_norm": 0.6698335409164429, + "learning_rate": 2.3637382737737368e-05, + "loss": 2.4499, + "step": 15556 + }, + { + "epoch": 1.25550803002179, + "grad_norm": 0.6644846200942993, + "learning_rate": 2.3627190742555806e-05, + "loss": 2.397, + "step": 15557 + }, + { + "epoch": 1.255588733758373, + "grad_norm": 0.7041488289833069, + "learning_rate": 2.3617000650784315e-05, + "loss": 2.4012, + "step": 15558 + }, + { + "epoch": 1.255669437494956, + "grad_norm": 0.72523033618927, + "learning_rate": 2.3606812462676798e-05, + "loss": 2.4151, + "step": 15559 + }, + { + "epoch": 1.255750141231539, + "grad_norm": 0.77669757604599, + "learning_rate": 2.3596626178487225e-05, + "loss": 2.4478, + "step": 15560 + }, + { + "epoch": 1.2558308449681221, + "grad_norm": 0.6919559836387634, + "learning_rate": 2.3586441798469462e-05, + "loss": 2.4548, + "step": 15561 + }, + { + "epoch": 1.255911548704705, + "grad_norm": 0.7613349556922913, + "learning_rate": 2.3576259322877292e-05, + "loss": 2.4475, + "step": 15562 + }, + { + "epoch": 1.255992252441288, + "grad_norm": 0.6738333106040955, + "learning_rate": 2.3566078751964515e-05, + "loss": 2.4242, + "step": 15563 + }, + { + "epoch": 1.256072956177871, + "grad_norm": 0.7242118716239929, + "learning_rate": 2.355590008598486e-05, + "loss": 2.4047, + "step": 15564 + }, + { + "epoch": 1.256153659914454, + "grad_norm": 0.7117685675621033, + "learning_rate": 2.354572332519199e-05, + "loss": 2.4473, + "step": 15565 + }, + { + "epoch": 1.256234363651037, + "grad_norm": 0.7466531991958618, + "learning_rate": 2.3535548469839564e-05, + "loss": 2.453, + "step": 15566 + }, + { + "epoch": 1.25631506738762, + "grad_norm": 0.6750668883323669, + "learning_rate": 2.3525375520181136e-05, + "loss": 2.4367, + "step": 15567 + }, + { + "epoch": 1.256395771124203, + "grad_norm": 0.7640851736068726, + "learning_rate": 2.35152044764703e-05, + "loss": 2.5014, + "step": 15568 + }, + { + "epoch": 1.256476474860786, + "grad_norm": 0.7198928594589233, + "learning_rate": 2.3505035338960456e-05, + "loss": 2.5138, + "step": 15569 + }, + { + "epoch": 1.256557178597369, + "grad_norm": 0.7079946398735046, + "learning_rate": 2.349486810790511e-05, + "loss": 2.4172, + "step": 15570 + }, + { + "epoch": 1.2566378823339521, + "grad_norm": 0.7477186918258667, + "learning_rate": 2.3484702783557655e-05, + "loss": 2.4224, + "step": 15571 + }, + { + "epoch": 1.256718586070535, + "grad_norm": 0.6875394582748413, + "learning_rate": 2.3474539366171388e-05, + "loss": 2.4621, + "step": 15572 + }, + { + "epoch": 1.256799289807118, + "grad_norm": 0.7164824604988098, + "learning_rate": 2.346437785599964e-05, + "loss": 2.4416, + "step": 15573 + }, + { + "epoch": 1.2568799935437012, + "grad_norm": 0.7031935453414917, + "learning_rate": 2.3454218253295668e-05, + "loss": 2.3943, + "step": 15574 + }, + { + "epoch": 1.256960697280284, + "grad_norm": 0.6739614009857178, + "learning_rate": 2.3444060558312665e-05, + "loss": 2.4114, + "step": 15575 + }, + { + "epoch": 1.2570414010168671, + "grad_norm": 0.6710866689682007, + "learning_rate": 2.3433904771303794e-05, + "loss": 2.4077, + "step": 15576 + }, + { + "epoch": 1.2571221047534502, + "grad_norm": 0.6589750051498413, + "learning_rate": 2.342375089252219e-05, + "loss": 2.3494, + "step": 15577 + }, + { + "epoch": 1.257202808490033, + "grad_norm": 0.7018333077430725, + "learning_rate": 2.3413598922220857e-05, + "loss": 2.459, + "step": 15578 + }, + { + "epoch": 1.2572835122266162, + "grad_norm": 0.7735301852226257, + "learning_rate": 2.3403448860652842e-05, + "loss": 2.4524, + "step": 15579 + }, + { + "epoch": 1.257364215963199, + "grad_norm": 0.7009726762771606, + "learning_rate": 2.339330070807113e-05, + "loss": 2.4244, + "step": 15580 + }, + { + "epoch": 1.2574449196997821, + "grad_norm": 0.671521008014679, + "learning_rate": 2.3383154464728595e-05, + "loss": 2.3808, + "step": 15581 + }, + { + "epoch": 1.257525623436365, + "grad_norm": 0.7736711502075195, + "learning_rate": 2.3373010130878126e-05, + "loss": 2.4936, + "step": 15582 + }, + { + "epoch": 1.257606327172948, + "grad_norm": 0.6987056136131287, + "learning_rate": 2.336286770677255e-05, + "loss": 2.4484, + "step": 15583 + }, + { + "epoch": 1.2576870309095312, + "grad_norm": 0.6337067484855652, + "learning_rate": 2.3352727192664635e-05, + "loss": 2.4196, + "step": 15584 + }, + { + "epoch": 1.257767734646114, + "grad_norm": 0.6832795143127441, + "learning_rate": 2.3342588588807123e-05, + "loss": 2.3681, + "step": 15585 + }, + { + "epoch": 1.257848438382697, + "grad_norm": 0.7208079695701599, + "learning_rate": 2.3332451895452688e-05, + "loss": 2.4436, + "step": 15586 + }, + { + "epoch": 1.2579291421192802, + "grad_norm": 0.6607621312141418, + "learning_rate": 2.3322317112853986e-05, + "loss": 2.4088, + "step": 15587 + }, + { + "epoch": 1.258009845855863, + "grad_norm": 0.7261247038841248, + "learning_rate": 2.331218424126356e-05, + "loss": 2.4389, + "step": 15588 + }, + { + "epoch": 1.2580905495924462, + "grad_norm": 0.6187729239463806, + "learning_rate": 2.3302053280933954e-05, + "loss": 2.3568, + "step": 15589 + }, + { + "epoch": 1.2581712533290292, + "grad_norm": 0.6196430921554565, + "learning_rate": 2.3291924232117713e-05, + "loss": 2.4285, + "step": 15590 + }, + { + "epoch": 1.258251957065612, + "grad_norm": 0.7271853685379028, + "learning_rate": 2.3281797095067193e-05, + "loss": 2.4058, + "step": 15591 + }, + { + "epoch": 1.2583326608021952, + "grad_norm": 0.7141130566596985, + "learning_rate": 2.327167187003484e-05, + "loss": 2.3971, + "step": 15592 + }, + { + "epoch": 1.2584133645387783, + "grad_norm": 0.680743932723999, + "learning_rate": 2.3261548557273027e-05, + "loss": 2.4387, + "step": 15593 + }, + { + "epoch": 1.2584940682753611, + "grad_norm": 0.718173086643219, + "learning_rate": 2.3251427157033955e-05, + "loss": 2.43, + "step": 15594 + }, + { + "epoch": 1.2585747720119442, + "grad_norm": 0.7600045800209045, + "learning_rate": 2.324130766956998e-05, + "loss": 2.4584, + "step": 15595 + }, + { + "epoch": 1.258655475748527, + "grad_norm": 0.7432500123977661, + "learning_rate": 2.3231190095133294e-05, + "loss": 2.4717, + "step": 15596 + }, + { + "epoch": 1.2587361794851102, + "grad_norm": 0.6603000164031982, + "learning_rate": 2.3221074433975988e-05, + "loss": 2.3952, + "step": 15597 + }, + { + "epoch": 1.258816883221693, + "grad_norm": 0.7020140290260315, + "learning_rate": 2.3210960686350213e-05, + "loss": 2.4064, + "step": 15598 + }, + { + "epoch": 1.2588975869582761, + "grad_norm": 0.7434887290000916, + "learning_rate": 2.320084885250804e-05, + "loss": 2.4708, + "step": 15599 + }, + { + "epoch": 1.2589782906948592, + "grad_norm": 0.6626797318458557, + "learning_rate": 2.3190738932701482e-05, + "loss": 2.4503, + "step": 15600 + }, + { + "epoch": 1.259058994431442, + "grad_norm": 0.7880598902702332, + "learning_rate": 2.3180630927182466e-05, + "loss": 2.384, + "step": 15601 + }, + { + "epoch": 1.2591396981680252, + "grad_norm": 0.7766147255897522, + "learning_rate": 2.3170524836202933e-05, + "loss": 2.4019, + "step": 15602 + }, + { + "epoch": 1.2592204019046083, + "grad_norm": 0.7817980051040649, + "learning_rate": 2.3160420660014792e-05, + "loss": 2.4729, + "step": 15603 + }, + { + "epoch": 1.2593011056411911, + "grad_norm": 0.6915614604949951, + "learning_rate": 2.3150318398869787e-05, + "loss": 2.4028, + "step": 15604 + }, + { + "epoch": 1.2593818093777742, + "grad_norm": 0.690882682800293, + "learning_rate": 2.3140218053019714e-05, + "loss": 2.4386, + "step": 15605 + }, + { + "epoch": 1.2594625131143573, + "grad_norm": 0.6670350432395935, + "learning_rate": 2.3130119622716382e-05, + "loss": 2.4224, + "step": 15606 + }, + { + "epoch": 1.2595432168509402, + "grad_norm": 0.6680006980895996, + "learning_rate": 2.3120023108211375e-05, + "loss": 2.3475, + "step": 15607 + }, + { + "epoch": 1.2596239205875233, + "grad_norm": 0.7003577947616577, + "learning_rate": 2.310992850975636e-05, + "loss": 2.4198, + "step": 15608 + }, + { + "epoch": 1.2597046243241061, + "grad_norm": 0.7444167733192444, + "learning_rate": 2.3099835827602944e-05, + "loss": 2.3756, + "step": 15609 + }, + { + "epoch": 1.2597853280606892, + "grad_norm": 0.6757989525794983, + "learning_rate": 2.3089745062002612e-05, + "loss": 2.3955, + "step": 15610 + }, + { + "epoch": 1.259866031797272, + "grad_norm": 0.6955820322036743, + "learning_rate": 2.3079656213206878e-05, + "loss": 2.4031, + "step": 15611 + }, + { + "epoch": 1.2599467355338552, + "grad_norm": 0.6646408438682556, + "learning_rate": 2.3069569281467184e-05, + "loss": 2.4246, + "step": 15612 + }, + { + "epoch": 1.2600274392704383, + "grad_norm": 0.6922882199287415, + "learning_rate": 2.3059484267034958e-05, + "loss": 2.4157, + "step": 15613 + }, + { + "epoch": 1.2601081430070211, + "grad_norm": 0.8092310428619385, + "learning_rate": 2.3049401170161468e-05, + "loss": 2.4137, + "step": 15614 + }, + { + "epoch": 1.2601888467436042, + "grad_norm": 0.7024559378623962, + "learning_rate": 2.3039319991098063e-05, + "loss": 2.4497, + "step": 15615 + }, + { + "epoch": 1.2602695504801873, + "grad_norm": 0.7096099853515625, + "learning_rate": 2.302924073009597e-05, + "loss": 2.4045, + "step": 15616 + }, + { + "epoch": 1.2603502542167702, + "grad_norm": 0.6777564287185669, + "learning_rate": 2.3019163387406406e-05, + "loss": 2.4607, + "step": 15617 + }, + { + "epoch": 1.2604309579533532, + "grad_norm": 0.7564159035682678, + "learning_rate": 2.300908796328052e-05, + "loss": 2.4985, + "step": 15618 + }, + { + "epoch": 1.2605116616899363, + "grad_norm": 0.7432986497879028, + "learning_rate": 2.2999014457969447e-05, + "loss": 2.4326, + "step": 15619 + }, + { + "epoch": 1.2605923654265192, + "grad_norm": 0.7178141474723816, + "learning_rate": 2.2988942871724182e-05, + "loss": 2.4118, + "step": 15620 + }, + { + "epoch": 1.2606730691631023, + "grad_norm": 0.7074497938156128, + "learning_rate": 2.2978873204795782e-05, + "loss": 2.4163, + "step": 15621 + }, + { + "epoch": 1.2607537728996854, + "grad_norm": 0.670200765132904, + "learning_rate": 2.2968805457435217e-05, + "loss": 2.4081, + "step": 15622 + }, + { + "epoch": 1.2608344766362682, + "grad_norm": 0.7258187532424927, + "learning_rate": 2.2958739629893355e-05, + "loss": 2.4889, + "step": 15623 + }, + { + "epoch": 1.2609151803728513, + "grad_norm": 0.6999781727790833, + "learning_rate": 2.2948675722421086e-05, + "loss": 2.3945, + "step": 15624 + }, + { + "epoch": 1.2609958841094342, + "grad_norm": 0.7030084133148193, + "learning_rate": 2.2938613735269243e-05, + "loss": 2.4509, + "step": 15625 + }, + { + "epoch": 1.2610765878460173, + "grad_norm": 0.6875420212745667, + "learning_rate": 2.292855366868858e-05, + "loss": 2.3658, + "step": 15626 + }, + { + "epoch": 1.2611572915826001, + "grad_norm": 0.7375235557556152, + "learning_rate": 2.2918495522929817e-05, + "loss": 2.4308, + "step": 15627 + }, + { + "epoch": 1.2612379953191832, + "grad_norm": 0.7021106481552124, + "learning_rate": 2.2908439298243644e-05, + "loss": 2.4046, + "step": 15628 + }, + { + "epoch": 1.2613186990557663, + "grad_norm": 0.76661616563797, + "learning_rate": 2.2898384994880716e-05, + "loss": 2.5156, + "step": 15629 + }, + { + "epoch": 1.2613994027923492, + "grad_norm": 0.6684869527816772, + "learning_rate": 2.2888332613091558e-05, + "loss": 2.4342, + "step": 15630 + }, + { + "epoch": 1.2614801065289323, + "grad_norm": 0.6878669261932373, + "learning_rate": 2.2878282153126706e-05, + "loss": 2.4544, + "step": 15631 + }, + { + "epoch": 1.2615608102655154, + "grad_norm": 0.6659132838249207, + "learning_rate": 2.2868233615236702e-05, + "loss": 2.4341, + "step": 15632 + }, + { + "epoch": 1.2616415140020982, + "grad_norm": 0.657474160194397, + "learning_rate": 2.2858186999671905e-05, + "loss": 2.3515, + "step": 15633 + }, + { + "epoch": 1.2617222177386813, + "grad_norm": 0.7245650291442871, + "learning_rate": 2.284814230668274e-05, + "loss": 2.3983, + "step": 15634 + }, + { + "epoch": 1.2618029214752644, + "grad_norm": 0.6400195360183716, + "learning_rate": 2.2838099536519554e-05, + "loss": 2.3535, + "step": 15635 + }, + { + "epoch": 1.2618836252118473, + "grad_norm": 0.6719450950622559, + "learning_rate": 2.282805868943262e-05, + "loss": 2.3906, + "step": 15636 + }, + { + "epoch": 1.2619643289484304, + "grad_norm": 0.682746946811676, + "learning_rate": 2.2818019765672207e-05, + "loss": 2.4045, + "step": 15637 + }, + { + "epoch": 1.2620450326850134, + "grad_norm": 0.6631760597229004, + "learning_rate": 2.2807982765488513e-05, + "loss": 2.4896, + "step": 15638 + }, + { + "epoch": 1.2621257364215963, + "grad_norm": 0.782202422618866, + "learning_rate": 2.279794768913164e-05, + "loss": 2.4628, + "step": 15639 + }, + { + "epoch": 1.2622064401581794, + "grad_norm": 0.7579823732376099, + "learning_rate": 2.278791453685173e-05, + "loss": 2.4635, + "step": 15640 + }, + { + "epoch": 1.2622871438947623, + "grad_norm": 0.665096640586853, + "learning_rate": 2.277788330889884e-05, + "loss": 2.4899, + "step": 15641 + }, + { + "epoch": 1.2623678476313454, + "grad_norm": 0.7635685205459595, + "learning_rate": 2.2767854005522936e-05, + "loss": 2.4146, + "step": 15642 + }, + { + "epoch": 1.2624485513679282, + "grad_norm": 0.7579118609428406, + "learning_rate": 2.2757826626974e-05, + "loss": 2.3692, + "step": 15643 + }, + { + "epoch": 1.2625292551045113, + "grad_norm": 0.6772074699401855, + "learning_rate": 2.2747801173501938e-05, + "loss": 2.3954, + "step": 15644 + }, + { + "epoch": 1.2626099588410944, + "grad_norm": 0.7028382420539856, + "learning_rate": 2.2737777645356606e-05, + "loss": 2.4799, + "step": 15645 + }, + { + "epoch": 1.2626906625776773, + "grad_norm": 0.7152617573738098, + "learning_rate": 2.2727756042787818e-05, + "loss": 2.4095, + "step": 15646 + }, + { + "epoch": 1.2627713663142603, + "grad_norm": 0.7286608219146729, + "learning_rate": 2.271773636604535e-05, + "loss": 2.4496, + "step": 15647 + }, + { + "epoch": 1.2628520700508434, + "grad_norm": 0.7006896734237671, + "learning_rate": 2.2707718615378935e-05, + "loss": 2.4128, + "step": 15648 + }, + { + "epoch": 1.2629327737874263, + "grad_norm": 0.6856697797775269, + "learning_rate": 2.2697702791038177e-05, + "loss": 2.4169, + "step": 15649 + }, + { + "epoch": 1.2630134775240094, + "grad_norm": 0.7582918405532837, + "learning_rate": 2.268768889327275e-05, + "loss": 2.4007, + "step": 15650 + }, + { + "epoch": 1.2630941812605925, + "grad_norm": 0.664633572101593, + "learning_rate": 2.2677676922332237e-05, + "loss": 2.3876, + "step": 15651 + }, + { + "epoch": 1.2631748849971753, + "grad_norm": 0.7283070087432861, + "learning_rate": 2.266766687846611e-05, + "loss": 2.4175, + "step": 15652 + }, + { + "epoch": 1.2632555887337584, + "grad_norm": 0.7309537529945374, + "learning_rate": 2.2657658761923863e-05, + "loss": 2.3998, + "step": 15653 + }, + { + "epoch": 1.2633362924703415, + "grad_norm": 0.6386510133743286, + "learning_rate": 2.2647652572954968e-05, + "loss": 2.3723, + "step": 15654 + }, + { + "epoch": 1.2634169962069244, + "grad_norm": 0.6805689930915833, + "learning_rate": 2.263764831180876e-05, + "loss": 2.3989, + "step": 15655 + }, + { + "epoch": 1.2634976999435072, + "grad_norm": 0.7147208452224731, + "learning_rate": 2.2627645978734536e-05, + "loss": 2.4748, + "step": 15656 + }, + { + "epoch": 1.2635784036800903, + "grad_norm": 0.6835155487060547, + "learning_rate": 2.2617645573981683e-05, + "loss": 2.4266, + "step": 15657 + }, + { + "epoch": 1.2636591074166734, + "grad_norm": 0.7631552219390869, + "learning_rate": 2.2607647097799368e-05, + "loss": 2.4152, + "step": 15658 + }, + { + "epoch": 1.2637398111532563, + "grad_norm": 0.6793624758720398, + "learning_rate": 2.2597650550436777e-05, + "loss": 2.3491, + "step": 15659 + }, + { + "epoch": 1.2638205148898394, + "grad_norm": 0.6465637683868408, + "learning_rate": 2.2587655932143083e-05, + "loss": 2.3774, + "step": 15660 + }, + { + "epoch": 1.2639012186264225, + "grad_norm": 0.6920284628868103, + "learning_rate": 2.2577663243167368e-05, + "loss": 2.4321, + "step": 15661 + }, + { + "epoch": 1.2639819223630053, + "grad_norm": 0.6922522783279419, + "learning_rate": 2.256767248375866e-05, + "loss": 2.4242, + "step": 15662 + }, + { + "epoch": 1.2640626260995884, + "grad_norm": 0.6811214089393616, + "learning_rate": 2.255768365416595e-05, + "loss": 2.4101, + "step": 15663 + }, + { + "epoch": 1.2641433298361715, + "grad_norm": 0.6704947352409363, + "learning_rate": 2.2547696754638238e-05, + "loss": 2.4792, + "step": 15664 + }, + { + "epoch": 1.2642240335727544, + "grad_norm": 0.6814701557159424, + "learning_rate": 2.2537711785424354e-05, + "loss": 2.4429, + "step": 15665 + }, + { + "epoch": 1.2643047373093375, + "grad_norm": 0.6778244972229004, + "learning_rate": 2.252772874677318e-05, + "loss": 2.3882, + "step": 15666 + }, + { + "epoch": 1.2643854410459205, + "grad_norm": 0.6570093035697937, + "learning_rate": 2.2517747638933518e-05, + "loss": 2.4162, + "step": 15667 + }, + { + "epoch": 1.2644661447825034, + "grad_norm": 0.6973466873168945, + "learning_rate": 2.2507768462154133e-05, + "loss": 2.3646, + "step": 15668 + }, + { + "epoch": 1.2645468485190865, + "grad_norm": 0.7258623242378235, + "learning_rate": 2.2497791216683715e-05, + "loss": 2.404, + "step": 15669 + }, + { + "epoch": 1.2646275522556694, + "grad_norm": 0.7462170124053955, + "learning_rate": 2.248781590277097e-05, + "loss": 2.5076, + "step": 15670 + }, + { + "epoch": 1.2647082559922525, + "grad_norm": 0.7070441246032715, + "learning_rate": 2.247784252066444e-05, + "loss": 2.3817, + "step": 15671 + }, + { + "epoch": 1.2647889597288353, + "grad_norm": 0.7150183916091919, + "learning_rate": 2.246787107061272e-05, + "loss": 2.461, + "step": 15672 + }, + { + "epoch": 1.2648696634654184, + "grad_norm": 0.668436586856842, + "learning_rate": 2.2457901552864347e-05, + "loss": 2.466, + "step": 15673 + }, + { + "epoch": 1.2649503672020015, + "grad_norm": 0.7011097073554993, + "learning_rate": 2.2447933967667745e-05, + "loss": 2.4582, + "step": 15674 + }, + { + "epoch": 1.2650310709385844, + "grad_norm": 0.7149096727371216, + "learning_rate": 2.243796831527134e-05, + "loss": 2.4461, + "step": 15675 + }, + { + "epoch": 1.2651117746751674, + "grad_norm": 0.6810914278030396, + "learning_rate": 2.2428004595923525e-05, + "loss": 2.4043, + "step": 15676 + }, + { + "epoch": 1.2651924784117505, + "grad_norm": 0.7700765132904053, + "learning_rate": 2.241804280987261e-05, + "loss": 2.4197, + "step": 15677 + }, + { + "epoch": 1.2652731821483334, + "grad_norm": 0.6897448897361755, + "learning_rate": 2.240808295736686e-05, + "loss": 2.4052, + "step": 15678 + }, + { + "epoch": 1.2653538858849165, + "grad_norm": 0.7092932462692261, + "learning_rate": 2.2398125038654515e-05, + "loss": 2.4088, + "step": 15679 + }, + { + "epoch": 1.2654345896214996, + "grad_norm": 0.6930294632911682, + "learning_rate": 2.2388169053983777e-05, + "loss": 2.4504, + "step": 15680 + }, + { + "epoch": 1.2655152933580824, + "grad_norm": 0.7056782245635986, + "learning_rate": 2.237821500360271e-05, + "loss": 2.3975, + "step": 15681 + }, + { + "epoch": 1.2655959970946655, + "grad_norm": 0.651772141456604, + "learning_rate": 2.236826288775944e-05, + "loss": 2.3941, + "step": 15682 + }, + { + "epoch": 1.2656767008312486, + "grad_norm": 0.7254980206489563, + "learning_rate": 2.2358312706702012e-05, + "loss": 2.4149, + "step": 15683 + }, + { + "epoch": 1.2657574045678315, + "grad_norm": 0.6553635597229004, + "learning_rate": 2.2348364460678373e-05, + "loss": 2.4099, + "step": 15684 + }, + { + "epoch": 1.2658381083044146, + "grad_norm": 0.6952616572380066, + "learning_rate": 2.233841814993646e-05, + "loss": 2.384, + "step": 15685 + }, + { + "epoch": 1.2659188120409974, + "grad_norm": 0.72947096824646, + "learning_rate": 2.2328473774724178e-05, + "loss": 2.5033, + "step": 15686 + }, + { + "epoch": 1.2659995157775805, + "grad_norm": 0.7419683933258057, + "learning_rate": 2.231853133528937e-05, + "loss": 2.4881, + "step": 15687 + }, + { + "epoch": 1.2660802195141634, + "grad_norm": 0.7125211358070374, + "learning_rate": 2.2308590831879827e-05, + "loss": 2.4334, + "step": 15688 + }, + { + "epoch": 1.2661609232507465, + "grad_norm": 0.6668617129325867, + "learning_rate": 2.2298652264743315e-05, + "loss": 2.4144, + "step": 15689 + }, + { + "epoch": 1.2662416269873296, + "grad_norm": 0.8075512051582336, + "learning_rate": 2.2288715634127465e-05, + "loss": 2.421, + "step": 15690 + }, + { + "epoch": 1.2663223307239124, + "grad_norm": 0.6894629001617432, + "learning_rate": 2.2278780940279965e-05, + "loss": 2.4142, + "step": 15691 + }, + { + "epoch": 1.2664030344604955, + "grad_norm": 0.7418074011802673, + "learning_rate": 2.226884818344841e-05, + "loss": 2.4214, + "step": 15692 + }, + { + "epoch": 1.2664837381970786, + "grad_norm": 0.6724219918251038, + "learning_rate": 2.225891736388037e-05, + "loss": 2.4455, + "step": 15693 + }, + { + "epoch": 1.2665644419336615, + "grad_norm": 0.7202882766723633, + "learning_rate": 2.224898848182331e-05, + "loss": 2.4017, + "step": 15694 + }, + { + "epoch": 1.2666451456702446, + "grad_norm": 0.7671259641647339, + "learning_rate": 2.2239061537524698e-05, + "loss": 2.4386, + "step": 15695 + }, + { + "epoch": 1.2667258494068276, + "grad_norm": 0.7154317498207092, + "learning_rate": 2.222913653123194e-05, + "loss": 2.3754, + "step": 15696 + }, + { + "epoch": 1.2668065531434105, + "grad_norm": 0.7203264236450195, + "learning_rate": 2.221921346319239e-05, + "loss": 2.3926, + "step": 15697 + }, + { + "epoch": 1.2668872568799936, + "grad_norm": 0.7104187607765198, + "learning_rate": 2.2209292333653365e-05, + "loss": 2.4528, + "step": 15698 + }, + { + "epoch": 1.2669679606165767, + "grad_norm": 0.7650138139724731, + "learning_rate": 2.2199373142862158e-05, + "loss": 2.4372, + "step": 15699 + }, + { + "epoch": 1.2670486643531595, + "grad_norm": 0.6796044111251831, + "learning_rate": 2.2189455891065903e-05, + "loss": 2.415, + "step": 15700 + }, + { + "epoch": 1.2671293680897426, + "grad_norm": 0.6749297380447388, + "learning_rate": 2.2179540578511813e-05, + "loss": 2.4337, + "step": 15701 + }, + { + "epoch": 1.2672100718263255, + "grad_norm": 0.7330272793769836, + "learning_rate": 2.216962720544703e-05, + "loss": 2.4322, + "step": 15702 + }, + { + "epoch": 1.2672907755629086, + "grad_norm": 0.6793510913848877, + "learning_rate": 2.215971577211855e-05, + "loss": 2.4473, + "step": 15703 + }, + { + "epoch": 1.2673714792994915, + "grad_norm": 0.7477267384529114, + "learning_rate": 2.2149806278773433e-05, + "loss": 2.4699, + "step": 15704 + }, + { + "epoch": 1.2674521830360745, + "grad_norm": 0.7048643827438354, + "learning_rate": 2.213989872565867e-05, + "loss": 2.4341, + "step": 15705 + }, + { + "epoch": 1.2675328867726576, + "grad_norm": 0.647433340549469, + "learning_rate": 2.2129993113021108e-05, + "loss": 2.423, + "step": 15706 + }, + { + "epoch": 1.2676135905092405, + "grad_norm": 0.6886507272720337, + "learning_rate": 2.2120089441107706e-05, + "loss": 2.4185, + "step": 15707 + }, + { + "epoch": 1.2676942942458236, + "grad_norm": 0.6720516085624695, + "learning_rate": 2.2110187710165242e-05, + "loss": 2.4587, + "step": 15708 + }, + { + "epoch": 1.2677749979824067, + "grad_norm": 0.676665723323822, + "learning_rate": 2.2100287920440543e-05, + "loss": 2.4241, + "step": 15709 + }, + { + "epoch": 1.2678557017189895, + "grad_norm": 0.6939559578895569, + "learning_rate": 2.209039007218028e-05, + "loss": 2.3974, + "step": 15710 + }, + { + "epoch": 1.2679364054555726, + "grad_norm": 0.6485786437988281, + "learning_rate": 2.2080494165631137e-05, + "loss": 2.4041, + "step": 15711 + }, + { + "epoch": 1.2680171091921557, + "grad_norm": 0.668319582939148, + "learning_rate": 2.2070600201039802e-05, + "loss": 2.4705, + "step": 15712 + }, + { + "epoch": 1.2680978129287386, + "grad_norm": 0.6837478280067444, + "learning_rate": 2.206070817865279e-05, + "loss": 2.4474, + "step": 15713 + }, + { + "epoch": 1.2681785166653217, + "grad_norm": 0.7000131011009216, + "learning_rate": 2.2050818098716664e-05, + "loss": 2.4463, + "step": 15714 + }, + { + "epoch": 1.2682592204019045, + "grad_norm": 0.7063068151473999, + "learning_rate": 2.204092996147794e-05, + "loss": 2.4226, + "step": 15715 + }, + { + "epoch": 1.2683399241384876, + "grad_norm": 0.6497172117233276, + "learning_rate": 2.2031043767183003e-05, + "loss": 2.3678, + "step": 15716 + }, + { + "epoch": 1.2684206278750705, + "grad_norm": 0.6558645963668823, + "learning_rate": 2.2021159516078262e-05, + "loss": 2.4021, + "step": 15717 + }, + { + "epoch": 1.2685013316116536, + "grad_norm": 0.7411713600158691, + "learning_rate": 2.2011277208410062e-05, + "loss": 2.4346, + "step": 15718 + }, + { + "epoch": 1.2685820353482367, + "grad_norm": 0.7275578379631042, + "learning_rate": 2.2001396844424714e-05, + "loss": 2.4262, + "step": 15719 + }, + { + "epoch": 1.2686627390848195, + "grad_norm": 0.7010936141014099, + "learning_rate": 2.199151842436844e-05, + "loss": 2.4774, + "step": 15720 + }, + { + "epoch": 1.2687434428214026, + "grad_norm": 0.7551137208938599, + "learning_rate": 2.1981641948487462e-05, + "loss": 2.5286, + "step": 15721 + }, + { + "epoch": 1.2688241465579857, + "grad_norm": 0.6510799527168274, + "learning_rate": 2.1971767417027888e-05, + "loss": 2.3813, + "step": 15722 + }, + { + "epoch": 1.2689048502945686, + "grad_norm": 0.636050283908844, + "learning_rate": 2.196189483023584e-05, + "loss": 2.4226, + "step": 15723 + }, + { + "epoch": 1.2689855540311517, + "grad_norm": 0.6939265131950378, + "learning_rate": 2.1952024188357368e-05, + "loss": 2.4516, + "step": 15724 + }, + { + "epoch": 1.2690662577677347, + "grad_norm": 0.6715239882469177, + "learning_rate": 2.1942155491638494e-05, + "loss": 2.4358, + "step": 15725 + }, + { + "epoch": 1.2691469615043176, + "grad_norm": 0.740680456161499, + "learning_rate": 2.1932288740325123e-05, + "loss": 2.4135, + "step": 15726 + }, + { + "epoch": 1.2692276652409007, + "grad_norm": 0.6969335079193115, + "learning_rate": 2.1922423934663193e-05, + "loss": 2.43, + "step": 15727 + }, + { + "epoch": 1.2693083689774838, + "grad_norm": 0.6390758156776428, + "learning_rate": 2.1912561074898554e-05, + "loss": 2.4492, + "step": 15728 + }, + { + "epoch": 1.2693890727140666, + "grad_norm": 0.7129701375961304, + "learning_rate": 2.190270016127701e-05, + "loss": 2.3799, + "step": 15729 + }, + { + "epoch": 1.2694697764506497, + "grad_norm": 0.7309553027153015, + "learning_rate": 2.1892841194044332e-05, + "loss": 2.4955, + "step": 15730 + }, + { + "epoch": 1.2695504801872326, + "grad_norm": 0.7257225513458252, + "learning_rate": 2.1882984173446252e-05, + "loss": 2.4184, + "step": 15731 + }, + { + "epoch": 1.2696311839238157, + "grad_norm": 0.7434510588645935, + "learning_rate": 2.1873129099728384e-05, + "loss": 2.453, + "step": 15732 + }, + { + "epoch": 1.2697118876603986, + "grad_norm": 0.6643160581588745, + "learning_rate": 2.1863275973136356e-05, + "loss": 2.3619, + "step": 15733 + }, + { + "epoch": 1.2697925913969816, + "grad_norm": 0.6677344441413879, + "learning_rate": 2.1853424793915778e-05, + "loss": 2.406, + "step": 15734 + }, + { + "epoch": 1.2698732951335647, + "grad_norm": 0.760028064250946, + "learning_rate": 2.1843575562312092e-05, + "loss": 2.5479, + "step": 15735 + }, + { + "epoch": 1.2699539988701476, + "grad_norm": 0.6668389439582825, + "learning_rate": 2.183372827857082e-05, + "loss": 2.4104, + "step": 15736 + }, + { + "epoch": 1.2700347026067307, + "grad_norm": 0.651155412197113, + "learning_rate": 2.182388294293736e-05, + "loss": 2.3738, + "step": 15737 + }, + { + "epoch": 1.2701154063433138, + "grad_norm": 0.736907958984375, + "learning_rate": 2.1814039555657084e-05, + "loss": 2.4179, + "step": 15738 + }, + { + "epoch": 1.2701961100798966, + "grad_norm": 0.7068225741386414, + "learning_rate": 2.180419811697534e-05, + "loss": 2.3911, + "step": 15739 + }, + { + "epoch": 1.2702768138164797, + "grad_norm": 0.6959261894226074, + "learning_rate": 2.1794358627137368e-05, + "loss": 2.452, + "step": 15740 + }, + { + "epoch": 1.2703575175530628, + "grad_norm": 0.6886181235313416, + "learning_rate": 2.1784521086388442e-05, + "loss": 2.4166, + "step": 15741 + }, + { + "epoch": 1.2704382212896457, + "grad_norm": 0.6494541168212891, + "learning_rate": 2.177468549497369e-05, + "loss": 2.3589, + "step": 15742 + }, + { + "epoch": 1.2705189250262288, + "grad_norm": 0.7008326649665833, + "learning_rate": 2.1764851853138247e-05, + "loss": 2.3697, + "step": 15743 + }, + { + "epoch": 1.2705996287628119, + "grad_norm": 0.6800456643104553, + "learning_rate": 2.1755020161127238e-05, + "loss": 2.4162, + "step": 15744 + }, + { + "epoch": 1.2706803324993947, + "grad_norm": 0.6836018562316895, + "learning_rate": 2.1745190419185634e-05, + "loss": 2.3977, + "step": 15745 + }, + { + "epoch": 1.2707610362359778, + "grad_norm": 0.6489691138267517, + "learning_rate": 2.173536262755844e-05, + "loss": 2.464, + "step": 15746 + }, + { + "epoch": 1.2708417399725607, + "grad_norm": 0.7309786677360535, + "learning_rate": 2.172553678649061e-05, + "loss": 2.4065, + "step": 15747 + }, + { + "epoch": 1.2709224437091438, + "grad_norm": 0.6752686500549316, + "learning_rate": 2.1715712896227004e-05, + "loss": 2.3935, + "step": 15748 + }, + { + "epoch": 1.2710031474457266, + "grad_norm": 0.7039850354194641, + "learning_rate": 2.1705890957012465e-05, + "loss": 2.4605, + "step": 15749 + }, + { + "epoch": 1.2710838511823097, + "grad_norm": 0.6904652714729309, + "learning_rate": 2.169607096909182e-05, + "loss": 2.4264, + "step": 15750 + }, + { + "epoch": 1.2711645549188928, + "grad_norm": 0.7104331254959106, + "learning_rate": 2.168625293270974e-05, + "loss": 2.378, + "step": 15751 + }, + { + "epoch": 1.2712452586554757, + "grad_norm": 0.6732800602912903, + "learning_rate": 2.167643684811096e-05, + "loss": 2.4216, + "step": 15752 + }, + { + "epoch": 1.2713259623920588, + "grad_norm": 0.7207335829734802, + "learning_rate": 2.166662271554011e-05, + "loss": 2.3861, + "step": 15753 + }, + { + "epoch": 1.2714066661286418, + "grad_norm": 0.7561055421829224, + "learning_rate": 2.1656810535241813e-05, + "loss": 2.4753, + "step": 15754 + }, + { + "epoch": 1.2714873698652247, + "grad_norm": 0.7018210887908936, + "learning_rate": 2.1647000307460564e-05, + "loss": 2.401, + "step": 15755 + }, + { + "epoch": 1.2715680736018078, + "grad_norm": 0.6908013224601746, + "learning_rate": 2.163719203244089e-05, + "loss": 2.4451, + "step": 15756 + }, + { + "epoch": 1.2716487773383909, + "grad_norm": 0.734909176826477, + "learning_rate": 2.162738571042723e-05, + "loss": 2.4221, + "step": 15757 + }, + { + "epoch": 1.2717294810749737, + "grad_norm": 0.7047279477119446, + "learning_rate": 2.1617581341663973e-05, + "loss": 2.4149, + "step": 15758 + }, + { + "epoch": 1.2718101848115568, + "grad_norm": 0.6875640749931335, + "learning_rate": 2.1607778926395496e-05, + "loss": 2.3874, + "step": 15759 + }, + { + "epoch": 1.2718908885481397, + "grad_norm": 0.7300851345062256, + "learning_rate": 2.159797846486611e-05, + "loss": 2.4706, + "step": 15760 + }, + { + "epoch": 1.2719715922847228, + "grad_norm": 0.733775794506073, + "learning_rate": 2.1588179957320022e-05, + "loss": 2.4208, + "step": 15761 + }, + { + "epoch": 1.2720522960213057, + "grad_norm": 0.8375213742256165, + "learning_rate": 2.1578383404001458e-05, + "loss": 2.4672, + "step": 15762 + }, + { + "epoch": 1.2721329997578887, + "grad_norm": 0.7276780009269714, + "learning_rate": 2.15685888051546e-05, + "loss": 2.4536, + "step": 15763 + }, + { + "epoch": 1.2722137034944718, + "grad_norm": 0.7765224575996399, + "learning_rate": 2.1558796161023508e-05, + "loss": 2.3671, + "step": 15764 + }, + { + "epoch": 1.2722944072310547, + "grad_norm": 0.7225642204284668, + "learning_rate": 2.1549005471852256e-05, + "loss": 2.4316, + "step": 15765 + }, + { + "epoch": 1.2723751109676378, + "grad_norm": 0.6959484219551086, + "learning_rate": 2.1539216737884904e-05, + "loss": 2.4581, + "step": 15766 + }, + { + "epoch": 1.2724558147042209, + "grad_norm": 0.6943621039390564, + "learning_rate": 2.1529429959365332e-05, + "loss": 2.4372, + "step": 15767 + }, + { + "epoch": 1.2725365184408037, + "grad_norm": 0.7067148089408875, + "learning_rate": 2.151964513653746e-05, + "loss": 2.431, + "step": 15768 + }, + { + "epoch": 1.2726172221773868, + "grad_norm": 0.8317076563835144, + "learning_rate": 2.150986226964521e-05, + "loss": 2.4177, + "step": 15769 + }, + { + "epoch": 1.27269792591397, + "grad_norm": 0.7390087246894836, + "learning_rate": 2.150008135893239e-05, + "loss": 2.4711, + "step": 15770 + }, + { + "epoch": 1.2727786296505528, + "grad_norm": 0.6829150915145874, + "learning_rate": 2.1490302404642725e-05, + "loss": 2.4477, + "step": 15771 + }, + { + "epoch": 1.2728593333871359, + "grad_norm": 0.7355613708496094, + "learning_rate": 2.148052540701995e-05, + "loss": 2.493, + "step": 15772 + }, + { + "epoch": 1.272940037123719, + "grad_norm": 0.6872289776802063, + "learning_rate": 2.1470750366307747e-05, + "loss": 2.4363, + "step": 15773 + }, + { + "epoch": 1.2730207408603018, + "grad_norm": 0.7753220796585083, + "learning_rate": 2.1460977282749705e-05, + "loss": 2.4376, + "step": 15774 + }, + { + "epoch": 1.273101444596885, + "grad_norm": 0.6717056632041931, + "learning_rate": 2.145120615658942e-05, + "loss": 2.4383, + "step": 15775 + }, + { + "epoch": 1.2731821483334678, + "grad_norm": 0.7441569566726685, + "learning_rate": 2.1441436988070428e-05, + "loss": 2.462, + "step": 15776 + }, + { + "epoch": 1.2732628520700509, + "grad_norm": 0.6824371814727783, + "learning_rate": 2.143166977743615e-05, + "loss": 2.4173, + "step": 15777 + }, + { + "epoch": 1.2733435558066337, + "grad_norm": 0.7310225963592529, + "learning_rate": 2.1421904524930038e-05, + "loss": 2.4222, + "step": 15778 + }, + { + "epoch": 1.2734242595432168, + "grad_norm": 0.7198066115379333, + "learning_rate": 2.141214123079548e-05, + "loss": 2.4262, + "step": 15779 + }, + { + "epoch": 1.2735049632798, + "grad_norm": 0.7081776857376099, + "learning_rate": 2.1402379895275783e-05, + "loss": 2.4473, + "step": 15780 + }, + { + "epoch": 1.2735856670163828, + "grad_norm": 0.6909368634223938, + "learning_rate": 2.1392620518614235e-05, + "loss": 2.4528, + "step": 15781 + }, + { + "epoch": 1.2736663707529658, + "grad_norm": 0.7170675992965698, + "learning_rate": 2.1382863101054107e-05, + "loss": 2.4214, + "step": 15782 + }, + { + "epoch": 1.273747074489549, + "grad_norm": 0.6992846727371216, + "learning_rate": 2.1373107642838497e-05, + "loss": 2.4397, + "step": 15783 + }, + { + "epoch": 1.2738277782261318, + "grad_norm": 0.7245237231254578, + "learning_rate": 2.1363354144210578e-05, + "loss": 2.373, + "step": 15784 + }, + { + "epoch": 1.273908481962715, + "grad_norm": 0.6929232478141785, + "learning_rate": 2.1353602605413435e-05, + "loss": 2.4297, + "step": 15785 + }, + { + "epoch": 1.273989185699298, + "grad_norm": 0.7243950366973877, + "learning_rate": 2.134385302669013e-05, + "loss": 2.3856, + "step": 15786 + }, + { + "epoch": 1.2740698894358808, + "grad_norm": 0.6712679266929626, + "learning_rate": 2.133410540828359e-05, + "loss": 2.3818, + "step": 15787 + }, + { + "epoch": 1.274150593172464, + "grad_norm": 0.7433474063873291, + "learning_rate": 2.1324359750436774e-05, + "loss": 2.4148, + "step": 15788 + }, + { + "epoch": 1.274231296909047, + "grad_norm": 0.7225894927978516, + "learning_rate": 2.1314616053392577e-05, + "loss": 2.395, + "step": 15789 + }, + { + "epoch": 1.2743120006456299, + "grad_norm": 0.7026889324188232, + "learning_rate": 2.130487431739383e-05, + "loss": 2.4693, + "step": 15790 + }, + { + "epoch": 1.274392704382213, + "grad_norm": 0.6898565292358398, + "learning_rate": 2.1295134542683325e-05, + "loss": 2.3643, + "step": 15791 + }, + { + "epoch": 1.2744734081187958, + "grad_norm": 0.7212820649147034, + "learning_rate": 2.1285396729503826e-05, + "loss": 2.4178, + "step": 15792 + }, + { + "epoch": 1.274554111855379, + "grad_norm": 0.7149149179458618, + "learning_rate": 2.127566087809798e-05, + "loss": 2.4023, + "step": 15793 + }, + { + "epoch": 1.2746348155919618, + "grad_norm": 0.7039671540260315, + "learning_rate": 2.126592698870846e-05, + "loss": 2.4667, + "step": 15794 + }, + { + "epoch": 1.2747155193285449, + "grad_norm": 0.806849479675293, + "learning_rate": 2.1256195061577877e-05, + "loss": 2.4741, + "step": 15795 + }, + { + "epoch": 1.274796223065128, + "grad_norm": 0.7544776797294617, + "learning_rate": 2.124646509694872e-05, + "loss": 2.4258, + "step": 15796 + }, + { + "epoch": 1.2748769268017108, + "grad_norm": 0.6946810483932495, + "learning_rate": 2.1236737095063518e-05, + "loss": 2.4088, + "step": 15797 + }, + { + "epoch": 1.274957630538294, + "grad_norm": 0.7714219093322754, + "learning_rate": 2.1227011056164714e-05, + "loss": 2.4705, + "step": 15798 + }, + { + "epoch": 1.275038334274877, + "grad_norm": 0.6789658665657043, + "learning_rate": 2.121728698049471e-05, + "loss": 2.4692, + "step": 15799 + }, + { + "epoch": 1.2751190380114599, + "grad_norm": 0.7003477215766907, + "learning_rate": 2.120756486829586e-05, + "loss": 2.4437, + "step": 15800 + }, + { + "epoch": 1.275199741748043, + "grad_norm": 0.6802948117256165, + "learning_rate": 2.1197844719810455e-05, + "loss": 2.4002, + "step": 15801 + }, + { + "epoch": 1.275280445484626, + "grad_norm": 0.67823326587677, + "learning_rate": 2.1188126535280773e-05, + "loss": 2.5119, + "step": 15802 + }, + { + "epoch": 1.275361149221209, + "grad_norm": 0.6580843925476074, + "learning_rate": 2.1178410314948972e-05, + "loss": 2.3814, + "step": 15803 + }, + { + "epoch": 1.275441852957792, + "grad_norm": 0.681642472743988, + "learning_rate": 2.1168696059057226e-05, + "loss": 2.4206, + "step": 15804 + }, + { + "epoch": 1.275522556694375, + "grad_norm": 0.7483543753623962, + "learning_rate": 2.1158983767847674e-05, + "loss": 2.4633, + "step": 15805 + }, + { + "epoch": 1.275603260430958, + "grad_norm": 0.6565235257148743, + "learning_rate": 2.11492734415623e-05, + "loss": 2.4145, + "step": 15806 + }, + { + "epoch": 1.275683964167541, + "grad_norm": 0.6606764793395996, + "learning_rate": 2.1139565080443157e-05, + "loss": 2.3935, + "step": 15807 + }, + { + "epoch": 1.275764667904124, + "grad_norm": 0.7915800213813782, + "learning_rate": 2.1129858684732206e-05, + "loss": 2.4288, + "step": 15808 + }, + { + "epoch": 1.275845371640707, + "grad_norm": 0.6763594746589661, + "learning_rate": 2.112015425467133e-05, + "loss": 2.4147, + "step": 15809 + }, + { + "epoch": 1.2759260753772899, + "grad_norm": 0.6886053085327148, + "learning_rate": 2.1110451790502405e-05, + "loss": 2.3798, + "step": 15810 + }, + { + "epoch": 1.276006779113873, + "grad_norm": 0.686122715473175, + "learning_rate": 2.110075129246728e-05, + "loss": 2.3896, + "step": 15811 + }, + { + "epoch": 1.276087482850456, + "grad_norm": 0.6989614367485046, + "learning_rate": 2.109105276080764e-05, + "loss": 2.4533, + "step": 15812 + }, + { + "epoch": 1.276168186587039, + "grad_norm": 0.6818450689315796, + "learning_rate": 2.1081356195765232e-05, + "loss": 2.4012, + "step": 15813 + }, + { + "epoch": 1.276248890323622, + "grad_norm": 0.7492663860321045, + "learning_rate": 2.107166159758176e-05, + "loss": 2.4269, + "step": 15814 + }, + { + "epoch": 1.276329594060205, + "grad_norm": 0.6752359867095947, + "learning_rate": 2.1061968966498767e-05, + "loss": 2.4478, + "step": 15815 + }, + { + "epoch": 1.276410297796788, + "grad_norm": 0.6784162521362305, + "learning_rate": 2.1052278302757854e-05, + "loss": 2.4853, + "step": 15816 + }, + { + "epoch": 1.276491001533371, + "grad_norm": 0.7273215651512146, + "learning_rate": 2.104258960660055e-05, + "loss": 2.4365, + "step": 15817 + }, + { + "epoch": 1.2765717052699541, + "grad_norm": 0.7021621465682983, + "learning_rate": 2.1032902878268323e-05, + "loss": 2.4665, + "step": 15818 + }, + { + "epoch": 1.276652409006537, + "grad_norm": 0.666828989982605, + "learning_rate": 2.102321811800253e-05, + "loss": 2.3922, + "step": 15819 + }, + { + "epoch": 1.27673311274312, + "grad_norm": 0.6780487298965454, + "learning_rate": 2.1013535326044608e-05, + "loss": 2.4072, + "step": 15820 + }, + { + "epoch": 1.276813816479703, + "grad_norm": 0.6474688053131104, + "learning_rate": 2.1003854502635888e-05, + "loss": 2.4145, + "step": 15821 + }, + { + "epoch": 1.276894520216286, + "grad_norm": 0.6712753772735596, + "learning_rate": 2.0994175648017587e-05, + "loss": 2.4349, + "step": 15822 + }, + { + "epoch": 1.2769752239528689, + "grad_norm": 0.6705189943313599, + "learning_rate": 2.098449876243096e-05, + "loss": 2.4376, + "step": 15823 + }, + { + "epoch": 1.277055927689452, + "grad_norm": 0.6794685125350952, + "learning_rate": 2.0974823846117197e-05, + "loss": 2.3717, + "step": 15824 + }, + { + "epoch": 1.277136631426035, + "grad_norm": 0.7145677804946899, + "learning_rate": 2.0965150899317364e-05, + "loss": 2.3829, + "step": 15825 + }, + { + "epoch": 1.277217335162618, + "grad_norm": 0.7043245434761047, + "learning_rate": 2.095547992227257e-05, + "loss": 2.405, + "step": 15826 + }, + { + "epoch": 1.277298038899201, + "grad_norm": 0.7969205379486084, + "learning_rate": 2.0945810915223873e-05, + "loss": 2.4115, + "step": 15827 + }, + { + "epoch": 1.277378742635784, + "grad_norm": 0.657482385635376, + "learning_rate": 2.0936143878412186e-05, + "loss": 2.372, + "step": 15828 + }, + { + "epoch": 1.277459446372367, + "grad_norm": 0.7315167784690857, + "learning_rate": 2.0926478812078466e-05, + "loss": 2.4372, + "step": 15829 + }, + { + "epoch": 1.27754015010895, + "grad_norm": 0.6985061764717102, + "learning_rate": 2.09168157164636e-05, + "loss": 2.3901, + "step": 15830 + }, + { + "epoch": 1.2776208538455331, + "grad_norm": 0.6906184554100037, + "learning_rate": 2.0907154591808408e-05, + "loss": 2.4562, + "step": 15831 + }, + { + "epoch": 1.277701557582116, + "grad_norm": 0.655094563961029, + "learning_rate": 2.0897495438353676e-05, + "loss": 2.451, + "step": 15832 + }, + { + "epoch": 1.277782261318699, + "grad_norm": 0.7663134932518005, + "learning_rate": 2.0887838256340143e-05, + "loss": 2.4634, + "step": 15833 + }, + { + "epoch": 1.2778629650552822, + "grad_norm": 0.7164491415023804, + "learning_rate": 2.087818304600849e-05, + "loss": 2.4624, + "step": 15834 + }, + { + "epoch": 1.277943668791865, + "grad_norm": 0.6962822079658508, + "learning_rate": 2.0868529807599336e-05, + "loss": 2.4325, + "step": 15835 + }, + { + "epoch": 1.2780243725284481, + "grad_norm": 0.702985405921936, + "learning_rate": 2.0858878541353255e-05, + "loss": 2.4219, + "step": 15836 + }, + { + "epoch": 1.278105076265031, + "grad_norm": 0.7605595588684082, + "learning_rate": 2.0849229247510826e-05, + "loss": 2.4201, + "step": 15837 + }, + { + "epoch": 1.278185780001614, + "grad_norm": 0.8479344248771667, + "learning_rate": 2.083958192631249e-05, + "loss": 2.4689, + "step": 15838 + }, + { + "epoch": 1.278266483738197, + "grad_norm": 0.7241235375404358, + "learning_rate": 2.082993657799869e-05, + "loss": 2.4861, + "step": 15839 + }, + { + "epoch": 1.27834718747478, + "grad_norm": 0.7069835066795349, + "learning_rate": 2.0820293202809827e-05, + "loss": 2.3759, + "step": 15840 + }, + { + "epoch": 1.2784278912113631, + "grad_norm": 0.6606370210647583, + "learning_rate": 2.0810651800986237e-05, + "loss": 2.4444, + "step": 15841 + }, + { + "epoch": 1.278508594947946, + "grad_norm": 0.6608174443244934, + "learning_rate": 2.08010123727682e-05, + "loss": 2.4339, + "step": 15842 + }, + { + "epoch": 1.278589298684529, + "grad_norm": 0.751000702381134, + "learning_rate": 2.0791374918396e-05, + "loss": 2.4327, + "step": 15843 + }, + { + "epoch": 1.2786700024211122, + "grad_norm": 0.7223808765411377, + "learning_rate": 2.0781739438109748e-05, + "loss": 2.3573, + "step": 15844 + }, + { + "epoch": 1.278750706157695, + "grad_norm": 0.6872109770774841, + "learning_rate": 2.0772105932149642e-05, + "loss": 2.3973, + "step": 15845 + }, + { + "epoch": 1.2788314098942781, + "grad_norm": 0.6967385411262512, + "learning_rate": 2.0762474400755762e-05, + "loss": 2.4622, + "step": 15846 + }, + { + "epoch": 1.2789121136308612, + "grad_norm": 0.7289159893989563, + "learning_rate": 2.0752844844168163e-05, + "loss": 2.4507, + "step": 15847 + }, + { + "epoch": 1.278992817367444, + "grad_norm": 0.7735978364944458, + "learning_rate": 2.0743217262626802e-05, + "loss": 2.4341, + "step": 15848 + }, + { + "epoch": 1.2790735211040272, + "grad_norm": 0.7209177017211914, + "learning_rate": 2.0733591656371655e-05, + "loss": 2.4024, + "step": 15849 + }, + { + "epoch": 1.2791542248406103, + "grad_norm": 0.6789259314537048, + "learning_rate": 2.0723968025642604e-05, + "loss": 2.3809, + "step": 15850 + }, + { + "epoch": 1.2792349285771931, + "grad_norm": 0.6972812414169312, + "learning_rate": 2.0714346370679495e-05, + "loss": 2.3986, + "step": 15851 + }, + { + "epoch": 1.2793156323137762, + "grad_norm": 0.7144166827201843, + "learning_rate": 2.070472669172213e-05, + "loss": 2.4241, + "step": 15852 + }, + { + "epoch": 1.279396336050359, + "grad_norm": 0.7325223088264465, + "learning_rate": 2.0695108989010282e-05, + "loss": 2.452, + "step": 15853 + }, + { + "epoch": 1.2794770397869422, + "grad_norm": 0.6900116205215454, + "learning_rate": 2.0685493262783608e-05, + "loss": 2.4091, + "step": 15854 + }, + { + "epoch": 1.279557743523525, + "grad_norm": 0.6846197843551636, + "learning_rate": 2.0675879513281758e-05, + "loss": 2.4337, + "step": 15855 + }, + { + "epoch": 1.2796384472601081, + "grad_norm": 0.6901541352272034, + "learning_rate": 2.0666267740744372e-05, + "loss": 2.4586, + "step": 15856 + }, + { + "epoch": 1.2797191509966912, + "grad_norm": 0.6842665672302246, + "learning_rate": 2.0656657945410953e-05, + "loss": 2.4383, + "step": 15857 + }, + { + "epoch": 1.279799854733274, + "grad_norm": 0.7450493574142456, + "learning_rate": 2.0647050127521028e-05, + "loss": 2.4308, + "step": 15858 + }, + { + "epoch": 1.2798805584698572, + "grad_norm": 0.6928436160087585, + "learning_rate": 2.0637444287314033e-05, + "loss": 2.4726, + "step": 15859 + }, + { + "epoch": 1.2799612622064402, + "grad_norm": 0.6539968252182007, + "learning_rate": 2.06278404250294e-05, + "loss": 2.3983, + "step": 15860 + }, + { + "epoch": 1.280041965943023, + "grad_norm": 0.7183163166046143, + "learning_rate": 2.0618238540906444e-05, + "loss": 2.4172, + "step": 15861 + }, + { + "epoch": 1.2801226696796062, + "grad_norm": 0.7070814371109009, + "learning_rate": 2.0608638635184507e-05, + "loss": 2.4018, + "step": 15862 + }, + { + "epoch": 1.2802033734161893, + "grad_norm": 0.7589142918586731, + "learning_rate": 2.0599040708102847e-05, + "loss": 2.4175, + "step": 15863 + }, + { + "epoch": 1.2802840771527721, + "grad_norm": 0.6945414543151855, + "learning_rate": 2.0589444759900613e-05, + "loss": 2.4093, + "step": 15864 + }, + { + "epoch": 1.2803647808893552, + "grad_norm": 0.685482919216156, + "learning_rate": 2.0579850790817003e-05, + "loss": 2.4388, + "step": 15865 + }, + { + "epoch": 1.280445484625938, + "grad_norm": 0.7089706063270569, + "learning_rate": 2.0570258801091148e-05, + "loss": 2.3779, + "step": 15866 + }, + { + "epoch": 1.2805261883625212, + "grad_norm": 0.6994217038154602, + "learning_rate": 2.0560668790962046e-05, + "loss": 2.3757, + "step": 15867 + }, + { + "epoch": 1.280606892099104, + "grad_norm": 0.7170232534408569, + "learning_rate": 2.055108076066874e-05, + "loss": 2.4087, + "step": 15868 + }, + { + "epoch": 1.2806875958356871, + "grad_norm": 0.7008751034736633, + "learning_rate": 2.0541494710450206e-05, + "loss": 2.4384, + "step": 15869 + }, + { + "epoch": 1.2807682995722702, + "grad_norm": 0.6795800924301147, + "learning_rate": 2.053191064054527e-05, + "loss": 2.415, + "step": 15870 + }, + { + "epoch": 1.280849003308853, + "grad_norm": 0.6650210022926331, + "learning_rate": 2.0522328551192882e-05, + "loss": 2.4421, + "step": 15871 + }, + { + "epoch": 1.2809297070454362, + "grad_norm": 0.7045374512672424, + "learning_rate": 2.0512748442631858e-05, + "loss": 2.4285, + "step": 15872 + }, + { + "epoch": 1.2810104107820193, + "grad_norm": 0.6585350632667542, + "learning_rate": 2.0503170315100883e-05, + "loss": 2.3806, + "step": 15873 + }, + { + "epoch": 1.2810911145186021, + "grad_norm": 0.7833496332168579, + "learning_rate": 2.0493594168838725e-05, + "loss": 2.4557, + "step": 15874 + }, + { + "epoch": 1.2811718182551852, + "grad_norm": 0.7237457036972046, + "learning_rate": 2.0484020004084048e-05, + "loss": 2.3966, + "step": 15875 + }, + { + "epoch": 1.2812525219917683, + "grad_norm": 0.7416609525680542, + "learning_rate": 2.0474447821075426e-05, + "loss": 2.3729, + "step": 15876 + }, + { + "epoch": 1.2813332257283512, + "grad_norm": 0.7148095369338989, + "learning_rate": 2.046487762005146e-05, + "loss": 2.4163, + "step": 15877 + }, + { + "epoch": 1.2814139294649343, + "grad_norm": 0.670281171798706, + "learning_rate": 2.0455309401250632e-05, + "loss": 2.383, + "step": 15878 + }, + { + "epoch": 1.2814946332015174, + "grad_norm": 0.6968950629234314, + "learning_rate": 2.0445743164911457e-05, + "loss": 2.3967, + "step": 15879 + }, + { + "epoch": 1.2815753369381002, + "grad_norm": 0.783441960811615, + "learning_rate": 2.0436178911272298e-05, + "loss": 2.455, + "step": 15880 + }, + { + "epoch": 1.2816560406746833, + "grad_norm": 0.709032416343689, + "learning_rate": 2.0426616640571518e-05, + "loss": 2.4207, + "step": 15881 + }, + { + "epoch": 1.2817367444112662, + "grad_norm": 0.6727990508079529, + "learning_rate": 2.0417056353047504e-05, + "loss": 2.4115, + "step": 15882 + }, + { + "epoch": 1.2818174481478493, + "grad_norm": 0.7336034774780273, + "learning_rate": 2.0407498048938445e-05, + "loss": 2.43, + "step": 15883 + }, + { + "epoch": 1.2818981518844321, + "grad_norm": 0.7649042010307312, + "learning_rate": 2.0397941728482604e-05, + "loss": 2.4655, + "step": 15884 + }, + { + "epoch": 1.2819788556210152, + "grad_norm": 0.7218052744865417, + "learning_rate": 2.038838739191816e-05, + "loss": 2.4872, + "step": 15885 + }, + { + "epoch": 1.2820595593575983, + "grad_norm": 0.7192350625991821, + "learning_rate": 2.0378835039483178e-05, + "loss": 2.4751, + "step": 15886 + }, + { + "epoch": 1.2821402630941812, + "grad_norm": 0.7059212923049927, + "learning_rate": 2.0369284671415768e-05, + "loss": 2.43, + "step": 15887 + }, + { + "epoch": 1.2822209668307643, + "grad_norm": 0.7387098073959351, + "learning_rate": 2.0359736287953956e-05, + "loss": 2.4281, + "step": 15888 + }, + { + "epoch": 1.2823016705673473, + "grad_norm": 0.7454321980476379, + "learning_rate": 2.035018988933568e-05, + "loss": 2.4372, + "step": 15889 + }, + { + "epoch": 1.2823823743039302, + "grad_norm": 0.6822765469551086, + "learning_rate": 2.034064547579888e-05, + "loss": 2.3728, + "step": 15890 + }, + { + "epoch": 1.2824630780405133, + "grad_norm": 0.6917527914047241, + "learning_rate": 2.0331103047581412e-05, + "loss": 2.3997, + "step": 15891 + }, + { + "epoch": 1.2825437817770964, + "grad_norm": 0.6734376549720764, + "learning_rate": 2.032156260492113e-05, + "loss": 2.4495, + "step": 15892 + }, + { + "epoch": 1.2826244855136792, + "grad_norm": 0.7222443222999573, + "learning_rate": 2.0312024148055776e-05, + "loss": 2.3466, + "step": 15893 + }, + { + "epoch": 1.2827051892502623, + "grad_norm": 0.703714907169342, + "learning_rate": 2.030248767722309e-05, + "loss": 2.4599, + "step": 15894 + }, + { + "epoch": 1.2827858929868454, + "grad_norm": 0.655161440372467, + "learning_rate": 2.029295319266078e-05, + "loss": 2.3896, + "step": 15895 + }, + { + "epoch": 1.2828665967234283, + "grad_norm": 0.6449242234230042, + "learning_rate": 2.028342069460639e-05, + "loss": 2.3511, + "step": 15896 + }, + { + "epoch": 1.2829473004600114, + "grad_norm": 0.6578382849693298, + "learning_rate": 2.027389018329755e-05, + "loss": 2.3678, + "step": 15897 + }, + { + "epoch": 1.2830280041965942, + "grad_norm": 0.7047572731971741, + "learning_rate": 2.0264361658971797e-05, + "loss": 2.4522, + "step": 15898 + }, + { + "epoch": 1.2831087079331773, + "grad_norm": 0.7310267090797424, + "learning_rate": 2.0254835121866554e-05, + "loss": 2.4117, + "step": 15899 + }, + { + "epoch": 1.2831894116697602, + "grad_norm": 0.7020776867866516, + "learning_rate": 2.024531057221927e-05, + "loss": 2.4033, + "step": 15900 + }, + { + "epoch": 1.2832701154063433, + "grad_norm": 0.6967746615409851, + "learning_rate": 2.023578801026733e-05, + "loss": 2.3491, + "step": 15901 + }, + { + "epoch": 1.2833508191429264, + "grad_norm": 0.7062339782714844, + "learning_rate": 2.022626743624807e-05, + "loss": 2.4598, + "step": 15902 + }, + { + "epoch": 1.2834315228795092, + "grad_norm": 0.730625331401825, + "learning_rate": 2.0216748850398748e-05, + "loss": 2.4995, + "step": 15903 + }, + { + "epoch": 1.2835122266160923, + "grad_norm": 0.6634403467178345, + "learning_rate": 2.020723225295662e-05, + "loss": 2.3843, + "step": 15904 + }, + { + "epoch": 1.2835929303526754, + "grad_norm": 0.6924816966056824, + "learning_rate": 2.019771764415883e-05, + "loss": 2.4258, + "step": 15905 + }, + { + "epoch": 1.2836736340892583, + "grad_norm": 0.7127227187156677, + "learning_rate": 2.018820502424251e-05, + "loss": 2.4038, + "step": 15906 + }, + { + "epoch": 1.2837543378258414, + "grad_norm": 0.7108431458473206, + "learning_rate": 2.0178694393444785e-05, + "loss": 2.4571, + "step": 15907 + }, + { + "epoch": 1.2838350415624245, + "grad_norm": 0.7478229999542236, + "learning_rate": 2.016918575200262e-05, + "loss": 2.4526, + "step": 15908 + }, + { + "epoch": 1.2839157452990073, + "grad_norm": 0.65651935338974, + "learning_rate": 2.015967910015303e-05, + "loss": 2.434, + "step": 15909 + }, + { + "epoch": 1.2839964490355904, + "grad_norm": 0.7285312414169312, + "learning_rate": 2.015017443813294e-05, + "loss": 2.3857, + "step": 15910 + }, + { + "epoch": 1.2840771527721733, + "grad_norm": 0.6947231292724609, + "learning_rate": 2.014067176617923e-05, + "loss": 2.4294, + "step": 15911 + }, + { + "epoch": 1.2841578565087564, + "grad_norm": 0.6965867877006531, + "learning_rate": 2.0131171084528744e-05, + "loss": 2.4514, + "step": 15912 + }, + { + "epoch": 1.2842385602453392, + "grad_norm": 0.6962311863899231, + "learning_rate": 2.0121672393418246e-05, + "loss": 2.4391, + "step": 15913 + }, + { + "epoch": 1.2843192639819223, + "grad_norm": 0.6687992215156555, + "learning_rate": 2.01121756930845e-05, + "loss": 2.4266, + "step": 15914 + }, + { + "epoch": 1.2843999677185054, + "grad_norm": 0.7118954658508301, + "learning_rate": 2.0102680983764145e-05, + "loss": 2.3436, + "step": 15915 + }, + { + "epoch": 1.2844806714550883, + "grad_norm": 0.6866199970245361, + "learning_rate": 2.009318826569382e-05, + "loss": 2.3719, + "step": 15916 + }, + { + "epoch": 1.2845613751916714, + "grad_norm": 0.6701404452323914, + "learning_rate": 2.008369753911016e-05, + "loss": 2.4875, + "step": 15917 + }, + { + "epoch": 1.2846420789282544, + "grad_norm": 0.7020917534828186, + "learning_rate": 2.007420880424963e-05, + "loss": 2.3871, + "step": 15918 + }, + { + "epoch": 1.2847227826648373, + "grad_norm": 0.6865704655647278, + "learning_rate": 2.006472206134875e-05, + "loss": 2.3815, + "step": 15919 + }, + { + "epoch": 1.2848034864014204, + "grad_norm": 0.7106871008872986, + "learning_rate": 2.0055237310643948e-05, + "loss": 2.4276, + "step": 15920 + }, + { + "epoch": 1.2848841901380035, + "grad_norm": 0.6891976594924927, + "learning_rate": 2.004575455237161e-05, + "loss": 2.3641, + "step": 15921 + }, + { + "epoch": 1.2849648938745863, + "grad_norm": 0.6385056972503662, + "learning_rate": 2.0036273786768067e-05, + "loss": 2.3898, + "step": 15922 + }, + { + "epoch": 1.2850455976111694, + "grad_norm": 0.7038321495056152, + "learning_rate": 2.0026795014069633e-05, + "loss": 2.4688, + "step": 15923 + }, + { + "epoch": 1.2851263013477525, + "grad_norm": 0.6310208439826965, + "learning_rate": 2.0017318234512494e-05, + "loss": 2.3821, + "step": 15924 + }, + { + "epoch": 1.2852070050843354, + "grad_norm": 0.6989426016807556, + "learning_rate": 2.0007843448332865e-05, + "loss": 2.434, + "step": 15925 + }, + { + "epoch": 1.2852877088209185, + "grad_norm": 0.6666426658630371, + "learning_rate": 1.9998370655766886e-05, + "loss": 2.4687, + "step": 15926 + }, + { + "epoch": 1.2853684125575013, + "grad_norm": 0.6421633958816528, + "learning_rate": 1.9988899857050648e-05, + "loss": 2.4269, + "step": 15927 + }, + { + "epoch": 1.2854491162940844, + "grad_norm": 0.7229343056678772, + "learning_rate": 1.997943105242016e-05, + "loss": 2.4139, + "step": 15928 + }, + { + "epoch": 1.2855298200306673, + "grad_norm": 0.7168964743614197, + "learning_rate": 1.9969964242111427e-05, + "loss": 2.405, + "step": 15929 + }, + { + "epoch": 1.2856105237672504, + "grad_norm": 0.6824480891227722, + "learning_rate": 1.99604994263604e-05, + "loss": 2.3955, + "step": 15930 + }, + { + "epoch": 1.2856912275038335, + "grad_norm": 0.670956552028656, + "learning_rate": 1.995103660540294e-05, + "loss": 2.3743, + "step": 15931 + }, + { + "epoch": 1.2857719312404163, + "grad_norm": 0.7057971954345703, + "learning_rate": 1.9941575779474864e-05, + "loss": 2.4496, + "step": 15932 + }, + { + "epoch": 1.2858526349769994, + "grad_norm": 0.7802264094352722, + "learning_rate": 1.9932116948812052e-05, + "loss": 2.4231, + "step": 15933 + }, + { + "epoch": 1.2859333387135825, + "grad_norm": 0.7151160836219788, + "learning_rate": 1.992266011365016e-05, + "loss": 2.4319, + "step": 15934 + }, + { + "epoch": 1.2860140424501654, + "grad_norm": 0.7078769207000732, + "learning_rate": 1.991320527422489e-05, + "loss": 2.4037, + "step": 15935 + }, + { + "epoch": 1.2860947461867485, + "grad_norm": 0.7483938336372375, + "learning_rate": 1.9903752430771927e-05, + "loss": 2.4946, + "step": 15936 + }, + { + "epoch": 1.2861754499233315, + "grad_norm": 0.7774620056152344, + "learning_rate": 1.9894301583526808e-05, + "loss": 2.4536, + "step": 15937 + }, + { + "epoch": 1.2862561536599144, + "grad_norm": 0.7311348915100098, + "learning_rate": 1.988485273272509e-05, + "loss": 2.4178, + "step": 15938 + }, + { + "epoch": 1.2863368573964975, + "grad_norm": 0.6821309328079224, + "learning_rate": 1.9875405878602282e-05, + "loss": 2.4851, + "step": 15939 + }, + { + "epoch": 1.2864175611330806, + "grad_norm": 0.7081651091575623, + "learning_rate": 1.9865961021393785e-05, + "loss": 2.4377, + "step": 15940 + }, + { + "epoch": 1.2864982648696635, + "grad_norm": 0.8093439340591431, + "learning_rate": 1.9856518161335014e-05, + "loss": 2.4681, + "step": 15941 + }, + { + "epoch": 1.2865789686062465, + "grad_norm": 0.6769521832466125, + "learning_rate": 1.984707729866131e-05, + "loss": 2.4231, + "step": 15942 + }, + { + "epoch": 1.2866596723428294, + "grad_norm": 0.6973356604576111, + "learning_rate": 1.983763843360795e-05, + "loss": 2.4144, + "step": 15943 + }, + { + "epoch": 1.2867403760794125, + "grad_norm": 0.7814682722091675, + "learning_rate": 1.9828201566410197e-05, + "loss": 2.3935, + "step": 15944 + }, + { + "epoch": 1.2868210798159954, + "grad_norm": 0.7545498609542847, + "learning_rate": 1.9818766697303236e-05, + "loss": 2.4136, + "step": 15945 + }, + { + "epoch": 1.2869017835525784, + "grad_norm": 0.7165581583976746, + "learning_rate": 1.9809333826522225e-05, + "loss": 2.3757, + "step": 15946 + }, + { + "epoch": 1.2869824872891615, + "grad_norm": 0.6812456846237183, + "learning_rate": 1.9799902954302208e-05, + "loss": 2.4143, + "step": 15947 + }, + { + "epoch": 1.2870631910257444, + "grad_norm": 0.7231366634368896, + "learning_rate": 1.9790474080878262e-05, + "loss": 2.4837, + "step": 15948 + }, + { + "epoch": 1.2871438947623275, + "grad_norm": 0.690916121006012, + "learning_rate": 1.9781047206485393e-05, + "loss": 2.4513, + "step": 15949 + }, + { + "epoch": 1.2872245984989106, + "grad_norm": 0.6608129143714905, + "learning_rate": 1.9771622331358485e-05, + "loss": 2.3908, + "step": 15950 + }, + { + "epoch": 1.2873053022354934, + "grad_norm": 0.7194501161575317, + "learning_rate": 1.976219945573249e-05, + "loss": 2.38, + "step": 15951 + }, + { + "epoch": 1.2873860059720765, + "grad_norm": 0.7315083146095276, + "learning_rate": 1.9752778579842213e-05, + "loss": 2.4351, + "step": 15952 + }, + { + "epoch": 1.2874667097086596, + "grad_norm": 0.7313492298126221, + "learning_rate": 1.974335970392246e-05, + "loss": 2.3531, + "step": 15953 + }, + { + "epoch": 1.2875474134452425, + "grad_norm": 0.6982418894767761, + "learning_rate": 1.9733942828207985e-05, + "loss": 2.4319, + "step": 15954 + }, + { + "epoch": 1.2876281171818256, + "grad_norm": 0.6664792895317078, + "learning_rate": 1.972452795293347e-05, + "loss": 2.3981, + "step": 15955 + }, + { + "epoch": 1.2877088209184087, + "grad_norm": 0.6849696040153503, + "learning_rate": 1.9715115078333578e-05, + "loss": 2.3952, + "step": 15956 + }, + { + "epoch": 1.2877895246549915, + "grad_norm": 0.7355225086212158, + "learning_rate": 1.9705704204642873e-05, + "loss": 2.4556, + "step": 15957 + }, + { + "epoch": 1.2878702283915746, + "grad_norm": 0.6850876808166504, + "learning_rate": 1.9696295332095906e-05, + "loss": 2.3873, + "step": 15958 + }, + { + "epoch": 1.2879509321281575, + "grad_norm": 0.6449069976806641, + "learning_rate": 1.9686888460927198e-05, + "loss": 2.4226, + "step": 15959 + }, + { + "epoch": 1.2880316358647406, + "grad_norm": 0.7517794966697693, + "learning_rate": 1.967748359137114e-05, + "loss": 2.377, + "step": 15960 + }, + { + "epoch": 1.2881123396013234, + "grad_norm": 0.6861303448677063, + "learning_rate": 1.9668080723662162e-05, + "loss": 2.4451, + "step": 15961 + }, + { + "epoch": 1.2881930433379065, + "grad_norm": 0.7025154829025269, + "learning_rate": 1.9658679858034602e-05, + "loss": 2.3856, + "step": 15962 + }, + { + "epoch": 1.2882737470744896, + "grad_norm": 0.6775577068328857, + "learning_rate": 1.964928099472275e-05, + "loss": 2.4383, + "step": 15963 + }, + { + "epoch": 1.2883544508110725, + "grad_norm": 0.6889605522155762, + "learning_rate": 1.963988413396086e-05, + "loss": 2.3766, + "step": 15964 + }, + { + "epoch": 1.2884351545476556, + "grad_norm": 0.6697166562080383, + "learning_rate": 1.9630489275983156e-05, + "loss": 2.44, + "step": 15965 + }, + { + "epoch": 1.2885158582842386, + "grad_norm": 0.6895437836647034, + "learning_rate": 1.96210964210237e-05, + "loss": 2.4242, + "step": 15966 + }, + { + "epoch": 1.2885965620208215, + "grad_norm": 0.6955164670944214, + "learning_rate": 1.9611705569316652e-05, + "loss": 2.3915, + "step": 15967 + }, + { + "epoch": 1.2886772657574046, + "grad_norm": 0.7133461236953735, + "learning_rate": 1.960231672109605e-05, + "loss": 2.4307, + "step": 15968 + }, + { + "epoch": 1.2887579694939877, + "grad_norm": 0.6874761581420898, + "learning_rate": 1.9592929876595857e-05, + "loss": 2.4371, + "step": 15969 + }, + { + "epoch": 1.2888386732305706, + "grad_norm": 0.7168406248092651, + "learning_rate": 1.9583545036050044e-05, + "loss": 2.4681, + "step": 15970 + }, + { + "epoch": 1.2889193769671536, + "grad_norm": 0.701874852180481, + "learning_rate": 1.9574162199692492e-05, + "loss": 2.4746, + "step": 15971 + }, + { + "epoch": 1.2890000807037365, + "grad_norm": 0.7118390202522278, + "learning_rate": 1.9564781367757058e-05, + "loss": 2.4139, + "step": 15972 + }, + { + "epoch": 1.2890807844403196, + "grad_norm": 0.6597239971160889, + "learning_rate": 1.955540254047753e-05, + "loss": 2.4346, + "step": 15973 + }, + { + "epoch": 1.2891614881769025, + "grad_norm": 0.7461068630218506, + "learning_rate": 1.9546025718087645e-05, + "loss": 2.4331, + "step": 15974 + }, + { + "epoch": 1.2892421919134855, + "grad_norm": 0.6992977857589722, + "learning_rate": 1.953665090082115e-05, + "loss": 2.424, + "step": 15975 + }, + { + "epoch": 1.2893228956500686, + "grad_norm": 0.6674031615257263, + "learning_rate": 1.9527278088911617e-05, + "loss": 2.4545, + "step": 15976 + }, + { + "epoch": 1.2894035993866515, + "grad_norm": 0.7377402782440186, + "learning_rate": 1.9517907282592662e-05, + "loss": 2.4625, + "step": 15977 + }, + { + "epoch": 1.2894843031232346, + "grad_norm": 0.720579206943512, + "learning_rate": 1.950853848209788e-05, + "loss": 2.4073, + "step": 15978 + }, + { + "epoch": 1.2895650068598177, + "grad_norm": 0.7221893668174744, + "learning_rate": 1.9499171687660688e-05, + "loss": 2.4056, + "step": 15979 + }, + { + "epoch": 1.2896457105964005, + "grad_norm": 0.7409725189208984, + "learning_rate": 1.9489806899514574e-05, + "loss": 2.3899, + "step": 15980 + }, + { + "epoch": 1.2897264143329836, + "grad_norm": 0.6946583986282349, + "learning_rate": 1.948044411789296e-05, + "loss": 2.4832, + "step": 15981 + }, + { + "epoch": 1.2898071180695667, + "grad_norm": 0.7031306028366089, + "learning_rate": 1.9471083343029096e-05, + "loss": 2.4265, + "step": 15982 + }, + { + "epoch": 1.2898878218061496, + "grad_norm": 0.660093367099762, + "learning_rate": 1.946172457515637e-05, + "loss": 2.4883, + "step": 15983 + }, + { + "epoch": 1.2899685255427327, + "grad_norm": 0.700641930103302, + "learning_rate": 1.945236781450802e-05, + "loss": 2.4096, + "step": 15984 + }, + { + "epoch": 1.2900492292793158, + "grad_norm": 0.7350760698318481, + "learning_rate": 1.9443013061317205e-05, + "loss": 2.4161, + "step": 15985 + }, + { + "epoch": 1.2901299330158986, + "grad_norm": 0.7567386031150818, + "learning_rate": 1.9433660315817072e-05, + "loss": 2.3978, + "step": 15986 + }, + { + "epoch": 1.2902106367524817, + "grad_norm": 0.7471369504928589, + "learning_rate": 1.9424309578240717e-05, + "loss": 2.4079, + "step": 15987 + }, + { + "epoch": 1.2902913404890646, + "grad_norm": 0.6630815267562866, + "learning_rate": 1.941496084882124e-05, + "loss": 2.4223, + "step": 15988 + }, + { + "epoch": 1.2903720442256477, + "grad_norm": 0.687224268913269, + "learning_rate": 1.940561412779155e-05, + "loss": 2.4413, + "step": 15989 + }, + { + "epoch": 1.2904527479622305, + "grad_norm": 0.6989685297012329, + "learning_rate": 1.9396269415384637e-05, + "loss": 2.3651, + "step": 15990 + }, + { + "epoch": 1.2905334516988136, + "grad_norm": 0.7256720066070557, + "learning_rate": 1.938692671183342e-05, + "loss": 2.4526, + "step": 15991 + }, + { + "epoch": 1.2906141554353967, + "grad_norm": 0.692032516002655, + "learning_rate": 1.9377586017370685e-05, + "loss": 2.3936, + "step": 15992 + }, + { + "epoch": 1.2906948591719796, + "grad_norm": 0.6733511686325073, + "learning_rate": 1.936824733222925e-05, + "loss": 2.4691, + "step": 15993 + }, + { + "epoch": 1.2907755629085627, + "grad_norm": 0.6698563098907471, + "learning_rate": 1.935891065664187e-05, + "loss": 2.3904, + "step": 15994 + }, + { + "epoch": 1.2908562666451457, + "grad_norm": 0.660521388053894, + "learning_rate": 1.934957599084123e-05, + "loss": 2.4647, + "step": 15995 + }, + { + "epoch": 1.2909369703817286, + "grad_norm": 0.6714615821838379, + "learning_rate": 1.9340243335059982e-05, + "loss": 2.403, + "step": 15996 + }, + { + "epoch": 1.2910176741183117, + "grad_norm": 0.726099967956543, + "learning_rate": 1.9330912689530746e-05, + "loss": 2.4101, + "step": 15997 + }, + { + "epoch": 1.2910983778548948, + "grad_norm": 0.6585896015167236, + "learning_rate": 1.932158405448601e-05, + "loss": 2.3813, + "step": 15998 + }, + { + "epoch": 1.2911790815914777, + "grad_norm": 0.7967908382415771, + "learning_rate": 1.9312257430158286e-05, + "loss": 2.4188, + "step": 15999 + }, + { + "epoch": 1.2912597853280607, + "grad_norm": 0.7340367436408997, + "learning_rate": 1.9302932816780063e-05, + "loss": 2.4642, + "step": 16000 + }, + { + "epoch": 1.2912597853280607, + "eval_loss": 2.3791537284851074, + "eval_runtime": 780.6124, + "eval_samples_per_second": 3.356, + "eval_steps_per_second": 0.56, + "step": 16000 + } + ], + "logging_steps": 1, + "max_steps": 20000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.83913097421312e+17, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-16000/training_args.bin b/out/checkpoint-16000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae4a8b118e2a671c30e37a5d24a42d8090b49055 --- /dev/null +++ b/out/checkpoint-16000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2928f4418c9a306cbe65ca0c1b156ae660c125ec9122008a9f527a50891704 +size 5112 diff --git a/out/checkpoint-17000/config.json b/out/checkpoint-17000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..16f06bb1cdbf882eb90d57ea1906b3790e298a3f --- /dev/null +++ b/out/checkpoint-17000/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "./models/checkpoint-10000", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1877, + "pad_token_id": 1026, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 6027 +} diff --git a/out/checkpoint-17000/generation_config.json b/out/checkpoint-17000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..51f4dbe1c89cfa9da69401685604ff16254d9d20 --- /dev/null +++ b/out/checkpoint-17000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "pad_token_id": 1026, + "transformers_version": "4.41.2" +} diff --git a/out/checkpoint-17000/model.safetensors b/out/checkpoint-17000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5cc83fe7fb93e1eb36d93fb2cd805c9a9938d45 --- /dev/null +++ b/out/checkpoint-17000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5908bb23ce4d95c4debe9c87c955d1579afdd895efad2abeb078daa82d143cee +size 364520064 diff --git a/out/checkpoint-17000/optimizer.pt b/out/checkpoint-17000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c683a5354b78c90a3de54f1d5dee92124c438f3d --- /dev/null +++ b/out/checkpoint-17000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8d2bca9516f1245ba29b1c950023e320c69fe514fac1ec3de7ccb5d512c40a9 +size 729134010 diff --git a/out/checkpoint-17000/rng_state.pth b/out/checkpoint-17000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c51ac30239bc582d9164676ef031df3402b8cd9 --- /dev/null +++ b/out/checkpoint-17000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a86bcf45382dcd681026439a76f0ba22431207f858c90bfa5b701cb621ac2f37 +size 14244 diff --git a/out/checkpoint-17000/scheduler.pt b/out/checkpoint-17000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f5899f26665543ebdb126e05829b9ed0f668ba5 --- /dev/null +++ b/out/checkpoint-17000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90565d609d17b685b7ce614ba9856a01495c88aaa563cceed5b159cc71e24fc +size 1064 diff --git a/out/checkpoint-17000/special_tokens_map.json b/out/checkpoint-17000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9fa6207c25267215ce16bfacdcb9089df3e897 --- /dev/null +++ b/out/checkpoint-17000/special_tokens_map.json @@ -0,0 +1,9 @@ +{ + "pad_token": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-17000/tokenizer.json b/out/checkpoint-17000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..2bf66a33fda75b69f9b1a9597987f418f5acfb49 --- /dev/null +++ b/out/checkpoint-17000/tokenizer.json @@ -0,0 +1,20279 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|audio:0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|audio:1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|audio:2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "<|audio:3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "<|audio:4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 5, + "content": "<|audio:5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 6, + "content": "<|audio:6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 7, + "content": "<|audio:7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 8, + "content": "<|audio:8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 9, + "content": "<|audio:9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 10, + "content": "<|audio:10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 11, + "content": "<|audio:11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 12, + "content": "<|audio:12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 13, + "content": "<|audio:13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 14, + "content": "<|audio:14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 15, + "content": "<|audio:15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 16, + "content": "<|audio:16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 17, + "content": "<|audio:17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 18, + "content": "<|audio:18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 19, + "content": "<|audio:19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 20, + "content": "<|audio:20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 21, + "content": "<|audio:21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 22, + "content": "<|audio:22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 23, + "content": "<|audio:23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 24, + "content": "<|audio:24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 25, + "content": "<|audio:25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 26, + "content": "<|audio:26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 27, + "content": "<|audio:27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 28, + "content": "<|audio:28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 29, + "content": "<|audio:29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 30, + "content": "<|audio:30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 31, + "content": "<|audio:31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 32, + "content": "<|audio:32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 33, + "content": "<|audio:33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 34, + "content": "<|audio:34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 35, + "content": "<|audio:35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 36, + "content": "<|audio:36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 37, + "content": "<|audio:37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 38, + "content": "<|audio:38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 39, + "content": "<|audio:39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 40, + "content": "<|audio:40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 41, + "content": "<|audio:41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 42, + "content": "<|audio:42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 43, + "content": "<|audio:43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 44, + "content": "<|audio:44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 45, + "content": "<|audio:45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 46, + "content": "<|audio:46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 47, + "content": "<|audio:47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 48, + "content": "<|audio:48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 49, + "content": "<|audio:49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 50, + "content": "<|audio:50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 51, + "content": "<|audio:51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 52, + "content": "<|audio:52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 53, + "content": "<|audio:53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 54, + "content": "<|audio:54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 55, + "content": "<|audio:55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 56, + "content": "<|audio:56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 57, + "content": "<|audio:57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 58, + "content": "<|audio:58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 59, + "content": "<|audio:59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 60, + "content": "<|audio:60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 61, + "content": "<|audio:61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 62, + "content": "<|audio:62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 63, + "content": "<|audio:63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 64, + "content": "<|audio:64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 65, + "content": "<|audio:65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 66, + "content": "<|audio:66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 67, + "content": "<|audio:67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 68, + "content": "<|audio:68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 69, + "content": "<|audio:69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 70, + "content": "<|audio:70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 71, + "content": "<|audio:71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 72, + "content": "<|audio:72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 73, + "content": "<|audio:73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 74, + "content": "<|audio:74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 75, + "content": "<|audio:75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 76, + "content": "<|audio:76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 77, + "content": "<|audio:77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 78, + "content": "<|audio:78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 79, + "content": "<|audio:79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 80, + "content": "<|audio:80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 81, + "content": "<|audio:81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 82, + "content": "<|audio:82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 83, + "content": "<|audio:83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 84, + "content": "<|audio:84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 85, + "content": "<|audio:85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 86, + "content": "<|audio:86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 87, + "content": "<|audio:87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 88, + "content": "<|audio:88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 89, + "content": "<|audio:89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 90, + "content": "<|audio:90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 91, + "content": "<|audio:91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 92, + "content": "<|audio:92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 93, + "content": "<|audio:93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 94, + "content": "<|audio:94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 95, + "content": "<|audio:95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 96, + "content": "<|audio:96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 97, + "content": "<|audio:97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 98, + "content": "<|audio:98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 99, + "content": "<|audio:99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 100, + "content": "<|audio:100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 101, + "content": "<|audio:101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 102, + "content": "<|audio:102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 103, + "content": "<|audio:103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 104, + "content": "<|audio:104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 105, + "content": "<|audio:105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 106, + "content": "<|audio:106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 107, + "content": "<|audio:107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 108, + "content": "<|audio:108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 109, + "content": "<|audio:109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 110, + "content": "<|audio:110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 111, + "content": "<|audio:111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 112, + "content": "<|audio:112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 113, + "content": "<|audio:113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 114, + "content": "<|audio:114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 115, + "content": "<|audio:115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 116, + "content": "<|audio:116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 117, + "content": "<|audio:117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 118, + "content": "<|audio:118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 119, + "content": "<|audio:119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 120, + "content": "<|audio:120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 121, + "content": "<|audio:121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 122, + "content": "<|audio:122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 123, + "content": "<|audio:123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 124, + "content": "<|audio:124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 125, + "content": "<|audio:125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 126, + "content": "<|audio:126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127, + "content": "<|audio:127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128, + "content": "<|audio:128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 129, + "content": "<|audio:129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 130, + "content": "<|audio:130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 131, + "content": "<|audio:131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 132, + "content": "<|audio:132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 133, + "content": "<|audio:133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 134, + "content": "<|audio:134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 135, + "content": "<|audio:135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 136, + "content": "<|audio:136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 137, + "content": "<|audio:137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 138, + "content": "<|audio:138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 139, + "content": "<|audio:139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 140, + "content": "<|audio:140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 141, + "content": "<|audio:141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 142, + "content": "<|audio:142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 143, + "content": "<|audio:143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 144, + "content": "<|audio:144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 145, + "content": "<|audio:145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 146, + "content": "<|audio:146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 147, + "content": "<|audio:147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 148, + "content": "<|audio:148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 149, + "content": "<|audio:149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 150, + "content": "<|audio:150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 151, + "content": "<|audio:151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 152, + "content": "<|audio:152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 153, + "content": "<|audio:153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 154, + "content": "<|audio:154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 155, + "content": "<|audio:155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 156, + "content": "<|audio:156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 157, + "content": "<|audio:157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 158, + "content": "<|audio:158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 159, + "content": "<|audio:159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 160, + "content": "<|audio:160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 161, + "content": "<|audio:161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 162, + "content": "<|audio:162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 163, + "content": "<|audio:163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 164, + "content": "<|audio:164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 165, + "content": "<|audio:165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 166, + "content": "<|audio:166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 167, + "content": "<|audio:167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 168, + "content": "<|audio:168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 169, + "content": "<|audio:169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 170, + "content": "<|audio:170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 171, + "content": "<|audio:171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 172, + "content": "<|audio:172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 173, + "content": "<|audio:173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 174, + "content": "<|audio:174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 175, + "content": "<|audio:175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 176, + "content": "<|audio:176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 177, + "content": "<|audio:177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 178, + "content": "<|audio:178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 179, + "content": "<|audio:179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 180, + "content": "<|audio:180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 181, + "content": "<|audio:181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 182, + "content": "<|audio:182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 183, + "content": "<|audio:183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 184, + "content": "<|audio:184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 185, + "content": "<|audio:185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 186, + "content": "<|audio:186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 187, + "content": "<|audio:187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 188, + "content": "<|audio:188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 189, + "content": "<|audio:189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 190, + "content": "<|audio:190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 191, + "content": "<|audio:191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 192, + "content": "<|audio:192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 193, + "content": "<|audio:193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 194, + "content": "<|audio:194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 195, + "content": "<|audio:195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 196, + "content": "<|audio:196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 197, + "content": "<|audio:197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 198, + "content": "<|audio:198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 199, + "content": "<|audio:199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 200, + "content": "<|audio:200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 201, + "content": "<|audio:201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 202, + "content": "<|audio:202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 203, + "content": "<|audio:203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 204, + "content": "<|audio:204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 205, + "content": "<|audio:205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 206, + "content": "<|audio:206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 207, + "content": "<|audio:207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 208, + "content": "<|audio:208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 209, + "content": "<|audio:209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 210, + "content": "<|audio:210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 211, + "content": "<|audio:211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 212, + "content": "<|audio:212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 213, + "content": "<|audio:213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 214, + "content": "<|audio:214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 215, + "content": "<|audio:215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 216, + "content": "<|audio:216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 217, + "content": "<|audio:217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 218, + "content": "<|audio:218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 219, + "content": "<|audio:219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 220, + "content": "<|audio:220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 221, + "content": "<|audio:221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 222, + "content": "<|audio:222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 223, + "content": "<|audio:223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 224, + "content": "<|audio:224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 225, + "content": "<|audio:225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 226, + "content": "<|audio:226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 227, + "content": "<|audio:227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 228, + "content": "<|audio:228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 229, + "content": "<|audio:229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 230, + "content": "<|audio:230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 231, + "content": "<|audio:231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 232, + "content": "<|audio:232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 233, + "content": "<|audio:233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 234, + "content": "<|audio:234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 235, + "content": "<|audio:235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 236, + "content": "<|audio:236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 237, + "content": "<|audio:237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 238, + "content": "<|audio:238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 239, + "content": "<|audio:239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 240, + "content": "<|audio:240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 241, + "content": "<|audio:241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 242, + "content": "<|audio:242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 243, + "content": "<|audio:243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 244, + "content": "<|audio:244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 245, + "content": "<|audio:245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 246, + "content": "<|audio:246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 247, + "content": "<|audio:247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 248, + "content": "<|audio:248|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 249, + "content": "<|audio:249|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 250, + "content": "<|audio:250|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 251, + "content": "<|audio:251|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 252, + "content": "<|audio:252|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 253, + "content": "<|audio:253|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 254, + "content": "<|audio:254|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 255, + "content": "<|audio:255|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 256, + "content": "<|audio:256|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 257, + "content": "<|audio:257|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 258, + "content": "<|audio:258|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "<|audio:259|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 260, + "content": "<|audio:260|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 261, + "content": "<|audio:261|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 262, + "content": "<|audio:262|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 263, + "content": "<|audio:263|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 264, + "content": "<|audio:264|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 265, + "content": "<|audio:265|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 266, + "content": "<|audio:266|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 267, + "content": "<|audio:267|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 268, + "content": "<|audio:268|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 269, + "content": "<|audio:269|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 270, + "content": "<|audio:270|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 271, + "content": "<|audio:271|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 272, + "content": "<|audio:272|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 273, + "content": "<|audio:273|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 274, + "content": "<|audio:274|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 275, + "content": "<|audio:275|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 276, + "content": "<|audio:276|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 277, + "content": "<|audio:277|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 278, + "content": "<|audio:278|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 279, + "content": "<|audio:279|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 280, + "content": "<|audio:280|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 281, + "content": "<|audio:281|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 282, + "content": "<|audio:282|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 283, + "content": "<|audio:283|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 284, + "content": "<|audio:284|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 285, + "content": "<|audio:285|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 286, + "content": "<|audio:286|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 287, + "content": "<|audio:287|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 288, + "content": "<|audio:288|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 289, + "content": "<|audio:289|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 290, + "content": "<|audio:290|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 291, + "content": "<|audio:291|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 292, + "content": "<|audio:292|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 293, + "content": "<|audio:293|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 294, + "content": "<|audio:294|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 295, + "content": "<|audio:295|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 296, + "content": "<|audio:296|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 297, + "content": "<|audio:297|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 298, + "content": "<|audio:298|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 299, + "content": "<|audio:299|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 300, + "content": "<|audio:300|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 301, + "content": "<|audio:301|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 302, + "content": "<|audio:302|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 303, + "content": "<|audio:303|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 304, + "content": "<|audio:304|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 305, + "content": "<|audio:305|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 306, + "content": "<|audio:306|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 307, + "content": "<|audio:307|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 308, + "content": "<|audio:308|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 309, + "content": "<|audio:309|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 310, + "content": "<|audio:310|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 311, + "content": "<|audio:311|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 312, + "content": "<|audio:312|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 313, + "content": "<|audio:313|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 314, + "content": "<|audio:314|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 315, + "content": "<|audio:315|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 316, + "content": "<|audio:316|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 317, + "content": "<|audio:317|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 318, + "content": "<|audio:318|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 319, + "content": "<|audio:319|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 320, + "content": "<|audio:320|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 321, + "content": "<|audio:321|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 322, + "content": "<|audio:322|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 323, + "content": "<|audio:323|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 324, + "content": "<|audio:324|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 325, + "content": "<|audio:325|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 326, + "content": "<|audio:326|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 327, + "content": "<|audio:327|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 328, + "content": "<|audio:328|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 329, + "content": "<|audio:329|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 330, + "content": "<|audio:330|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 331, + "content": "<|audio:331|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 332, + "content": "<|audio:332|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 333, + "content": "<|audio:333|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 334, + "content": "<|audio:334|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 335, + "content": "<|audio:335|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 336, + "content": "<|audio:336|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 337, + "content": "<|audio:337|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 338, + "content": "<|audio:338|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 339, + "content": "<|audio:339|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 340, + "content": "<|audio:340|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 341, + "content": "<|audio:341|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 342, + "content": "<|audio:342|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 343, + "content": "<|audio:343|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 344, + "content": "<|audio:344|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 345, + "content": "<|audio:345|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 346, + "content": "<|audio:346|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 347, + "content": "<|audio:347|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 348, + "content": "<|audio:348|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 349, + "content": "<|audio:349|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 350, + "content": "<|audio:350|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 351, + "content": "<|audio:351|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 352, + "content": "<|audio:352|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 353, + "content": "<|audio:353|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 354, + "content": "<|audio:354|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 355, + "content": "<|audio:355|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 356, + "content": "<|audio:356|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 357, + "content": "<|audio:357|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 358, + "content": "<|audio:358|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 359, + "content": "<|audio:359|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 360, + "content": "<|audio:360|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 361, + "content": "<|audio:361|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 362, + "content": "<|audio:362|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 363, + "content": "<|audio:363|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 364, + "content": "<|audio:364|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 365, + "content": "<|audio:365|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 366, + "content": "<|audio:366|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 367, + "content": "<|audio:367|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 368, + "content": "<|audio:368|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 369, + "content": "<|audio:369|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 370, + "content": "<|audio:370|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 371, + "content": "<|audio:371|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 372, + "content": "<|audio:372|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 373, + "content": "<|audio:373|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 374, + "content": "<|audio:374|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 375, + "content": "<|audio:375|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 376, + "content": "<|audio:376|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 377, + "content": "<|audio:377|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 378, + "content": "<|audio:378|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 379, + "content": "<|audio:379|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 380, + "content": "<|audio:380|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 381, + "content": "<|audio:381|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 382, + "content": "<|audio:382|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 383, + "content": "<|audio:383|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 384, + "content": "<|audio:384|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 385, + "content": "<|audio:385|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 386, + "content": "<|audio:386|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 387, + "content": "<|audio:387|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 388, + "content": "<|audio:388|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 389, + "content": "<|audio:389|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 390, + "content": "<|audio:390|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 391, + "content": "<|audio:391|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 392, + "content": "<|audio:392|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 393, + "content": "<|audio:393|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 394, + "content": "<|audio:394|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 395, + "content": "<|audio:395|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 396, + "content": "<|audio:396|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 397, + "content": "<|audio:397|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 398, + "content": "<|audio:398|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 399, + "content": "<|audio:399|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 400, + "content": "<|audio:400|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 401, + "content": "<|audio:401|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 402, + "content": "<|audio:402|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 403, + "content": "<|audio:403|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 404, + "content": "<|audio:404|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 405, + "content": "<|audio:405|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 406, + "content": "<|audio:406|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 407, + "content": "<|audio:407|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 408, + "content": "<|audio:408|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 409, + "content": "<|audio:409|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 410, + "content": "<|audio:410|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 411, + "content": "<|audio:411|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 412, + "content": "<|audio:412|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 413, + "content": "<|audio:413|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 414, + "content": "<|audio:414|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 415, + "content": "<|audio:415|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 416, + "content": "<|audio:416|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 417, + "content": "<|audio:417|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 418, + "content": "<|audio:418|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 419, + "content": "<|audio:419|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 420, + "content": "<|audio:420|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 421, + "content": "<|audio:421|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 422, + "content": "<|audio:422|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 423, + "content": "<|audio:423|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 424, + "content": "<|audio:424|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 425, + "content": "<|audio:425|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 426, + "content": "<|audio:426|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 427, + "content": "<|audio:427|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 428, + "content": "<|audio:428|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 429, + "content": "<|audio:429|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 430, + "content": "<|audio:430|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 431, + "content": "<|audio:431|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 432, + "content": "<|audio:432|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 433, + "content": "<|audio:433|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 434, + "content": "<|audio:434|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 435, + "content": "<|audio:435|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 436, + "content": "<|audio:436|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 437, + "content": "<|audio:437|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 438, + "content": "<|audio:438|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 439, + "content": "<|audio:439|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 440, + "content": "<|audio:440|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 441, + "content": "<|audio:441|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 442, + "content": "<|audio:442|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 443, + "content": "<|audio:443|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 444, + "content": "<|audio:444|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 445, + "content": "<|audio:445|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 446, + "content": "<|audio:446|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 447, + "content": "<|audio:447|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 448, + "content": "<|audio:448|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 449, + "content": "<|audio:449|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 450, + "content": "<|audio:450|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 451, + "content": "<|audio:451|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 452, + "content": "<|audio:452|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 453, + "content": "<|audio:453|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 454, + "content": "<|audio:454|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 455, + "content": "<|audio:455|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 456, + "content": "<|audio:456|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 457, + "content": "<|audio:457|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 458, + "content": "<|audio:458|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 459, + "content": "<|audio:459|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 460, + "content": "<|audio:460|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 461, + "content": "<|audio:461|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 462, + "content": "<|audio:462|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 463, + "content": "<|audio:463|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 464, + "content": "<|audio:464|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 465, + "content": "<|audio:465|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 466, + "content": "<|audio:466|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 467, + "content": "<|audio:467|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 468, + "content": "<|audio:468|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 469, + "content": "<|audio:469|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 470, + "content": "<|audio:470|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 471, + "content": "<|audio:471|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 472, + "content": "<|audio:472|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 473, + "content": "<|audio:473|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 474, + "content": "<|audio:474|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 475, + "content": "<|audio:475|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 476, + "content": "<|audio:476|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 477, + "content": "<|audio:477|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 478, + "content": "<|audio:478|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 479, + "content": "<|audio:479|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 480, + "content": "<|audio:480|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 481, + "content": "<|audio:481|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 482, + "content": "<|audio:482|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 483, + "content": "<|audio:483|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 484, + "content": "<|audio:484|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 485, + "content": "<|audio:485|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 486, + "content": "<|audio:486|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 487, + "content": "<|audio:487|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 488, + "content": "<|audio:488|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 489, + "content": "<|audio:489|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 490, + "content": "<|audio:490|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 491, + "content": "<|audio:491|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 492, + "content": "<|audio:492|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 493, + "content": "<|audio:493|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 494, + "content": "<|audio:494|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 495, + "content": "<|audio:495|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 496, + "content": "<|audio:496|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 497, + "content": "<|audio:497|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 498, + "content": "<|audio:498|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 499, + "content": "<|audio:499|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 500, + "content": "<|audio:500|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 501, + "content": "<|audio:501|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 502, + "content": "<|audio:502|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 503, + "content": "<|audio:503|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 504, + "content": "<|audio:504|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 505, + "content": "<|audio:505|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 506, + "content": "<|audio:506|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 507, + "content": "<|audio:507|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 508, + "content": "<|audio:508|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 509, + "content": "<|audio:509|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 510, + "content": "<|audio:510|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 511, + "content": "<|audio:511|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 512, + "content": "<|audio:512|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 513, + "content": "<|audio:513|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 514, + "content": "<|audio:514|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 515, + "content": "<|audio:515|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 516, + "content": "<|audio:516|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 517, + "content": "<|audio:517|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 518, + "content": "<|audio:518|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 519, + "content": "<|audio:519|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 520, + "content": "<|audio:520|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 521, + "content": "<|audio:521|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 522, + "content": "<|audio:522|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 523, + "content": "<|audio:523|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 524, + "content": "<|audio:524|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 525, + "content": "<|audio:525|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 526, + "content": "<|audio:526|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 527, + "content": "<|audio:527|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 528, + "content": "<|audio:528|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 529, + "content": "<|audio:529|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 530, + "content": "<|audio:530|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 531, + "content": "<|audio:531|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 532, + "content": "<|audio:532|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 533, + "content": "<|audio:533|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 534, + "content": "<|audio:534|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 535, + "content": "<|audio:535|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 536, + "content": "<|audio:536|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 537, + "content": "<|audio:537|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 538, + "content": "<|audio:538|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 539, + "content": "<|audio:539|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 540, + "content": "<|audio:540|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 541, + "content": "<|audio:541|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 542, + "content": "<|audio:542|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 543, + "content": "<|audio:543|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 544, + "content": "<|audio:544|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 545, + "content": "<|audio:545|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 546, + "content": "<|audio:546|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 547, + "content": "<|audio:547|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 548, + "content": "<|audio:548|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 549, + "content": "<|audio:549|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 550, + "content": "<|audio:550|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 551, + "content": "<|audio:551|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 552, + "content": "<|audio:552|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 553, + "content": "<|audio:553|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 554, + "content": "<|audio:554|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 555, + "content": "<|audio:555|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 556, + "content": "<|audio:556|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 557, + "content": "<|audio:557|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 558, + "content": "<|audio:558|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 559, + "content": "<|audio:559|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 560, + "content": "<|audio:560|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 561, + "content": "<|audio:561|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 562, + "content": "<|audio:562|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 563, + "content": "<|audio:563|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 564, + "content": "<|audio:564|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 565, + "content": "<|audio:565|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 566, + "content": "<|audio:566|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 567, + "content": "<|audio:567|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 568, + "content": "<|audio:568|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 569, + "content": "<|audio:569|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 570, + "content": "<|audio:570|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 571, + "content": "<|audio:571|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 572, + "content": "<|audio:572|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 573, + "content": "<|audio:573|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 574, + "content": "<|audio:574|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 575, + "content": "<|audio:575|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 576, + "content": "<|audio:576|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 577, + "content": "<|audio:577|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 578, + "content": "<|audio:578|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 579, + "content": "<|audio:579|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 580, + "content": "<|audio:580|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 581, + "content": "<|audio:581|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 582, + "content": "<|audio:582|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 583, + "content": "<|audio:583|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 584, + "content": "<|audio:584|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 585, + "content": "<|audio:585|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 586, + "content": "<|audio:586|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 587, + "content": "<|audio:587|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 588, + "content": "<|audio:588|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 589, + "content": "<|audio:589|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 590, + "content": "<|audio:590|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 591, + "content": "<|audio:591|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 592, + "content": "<|audio:592|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 593, + "content": "<|audio:593|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 594, + "content": "<|audio:594|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 595, + "content": "<|audio:595|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 596, + "content": "<|audio:596|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 597, + "content": "<|audio:597|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 598, + "content": "<|audio:598|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 599, + "content": "<|audio:599|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 600, + "content": "<|audio:600|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 601, + "content": "<|audio:601|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 602, + "content": "<|audio:602|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 603, + "content": "<|audio:603|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 604, + "content": "<|audio:604|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "<|audio:605|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "<|audio:606|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "<|audio:607|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "<|audio:608|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "<|audio:609|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "<|audio:610|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "<|audio:611|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "<|audio:612|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "<|audio:613|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "<|audio:614|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "<|audio:615|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "<|audio:616|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "<|audio:617|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "<|audio:618|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "<|audio:619|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "<|audio:620|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "<|audio:621|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "<|audio:622|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "<|audio:623|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "<|audio:624|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "<|audio:625|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "<|audio:626|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "<|audio:627|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "<|audio:628|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "<|audio:629|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "<|audio:630|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "<|audio:631|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "<|audio:632|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "<|audio:633|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "<|audio:634|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "<|audio:635|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "<|audio:636|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "<|audio:637|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "<|audio:638|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "<|audio:639|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 640, + "content": "<|audio:640|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 641, + "content": "<|audio:641|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 642, + "content": "<|audio:642|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 643, + "content": "<|audio:643|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 644, + "content": "<|audio:644|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 645, + "content": "<|audio:645|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 646, + "content": "<|audio:646|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 647, + "content": "<|audio:647|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 648, + "content": "<|audio:648|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 649, + "content": "<|audio:649|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 650, + "content": "<|audio:650|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 651, + "content": "<|audio:651|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 652, + "content": "<|audio:652|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 653, + "content": "<|audio:653|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 654, + "content": "<|audio:654|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 655, + "content": "<|audio:655|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 656, + "content": "<|audio:656|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 657, + "content": "<|audio:657|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 658, + "content": "<|audio:658|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 659, + "content": "<|audio:659|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 660, + "content": "<|audio:660|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 661, + "content": "<|audio:661|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 662, + "content": "<|audio:662|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 663, + "content": "<|audio:663|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 664, + "content": "<|audio:664|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 665, + "content": "<|audio:665|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 666, + "content": "<|audio:666|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 667, + "content": "<|audio:667|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 668, + "content": "<|audio:668|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 669, + "content": "<|audio:669|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 670, + "content": "<|audio:670|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 671, + "content": "<|audio:671|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 672, + "content": "<|audio:672|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 673, + "content": "<|audio:673|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 674, + "content": "<|audio:674|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 675, + "content": "<|audio:675|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 676, + "content": "<|audio:676|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 677, + "content": "<|audio:677|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 678, + "content": "<|audio:678|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 679, + "content": "<|audio:679|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 680, + "content": "<|audio:680|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 681, + "content": "<|audio:681|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 682, + "content": "<|audio:682|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 683, + "content": "<|audio:683|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 684, + "content": "<|audio:684|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 685, + "content": "<|audio:685|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 686, + "content": "<|audio:686|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 687, + "content": "<|audio:687|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 688, + "content": "<|audio:688|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 689, + "content": "<|audio:689|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 690, + "content": "<|audio:690|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 691, + "content": "<|audio:691|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 692, + "content": "<|audio:692|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 693, + "content": "<|audio:693|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 694, + "content": "<|audio:694|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "<|audio:695|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "<|audio:696|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "<|audio:697|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "<|audio:698|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "<|audio:699|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "<|audio:700|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "<|audio:701|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "<|audio:702|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "<|audio:703|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 704, + "content": "<|audio:704|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 705, + "content": "<|audio:705|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 706, + "content": "<|audio:706|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 707, + "content": "<|audio:707|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 708, + "content": "<|audio:708|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 709, + "content": "<|audio:709|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 710, + "content": "<|audio:710|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 711, + "content": "<|audio:711|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 712, + "content": "<|audio:712|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 713, + "content": "<|audio:713|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 714, + "content": "<|audio:714|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 715, + "content": "<|audio:715|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 716, + "content": "<|audio:716|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 717, + "content": "<|audio:717|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 718, + "content": "<|audio:718|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 719, + "content": "<|audio:719|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 720, + "content": "<|audio:720|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 721, + "content": "<|audio:721|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 722, + "content": "<|audio:722|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 723, + "content": "<|audio:723|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 724, + "content": "<|audio:724|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 725, + "content": "<|audio:725|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 726, + "content": "<|audio:726|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 727, + "content": "<|audio:727|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 728, + "content": "<|audio:728|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 729, + "content": "<|audio:729|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 730, + "content": "<|audio:730|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 731, + "content": "<|audio:731|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 732, + "content": "<|audio:732|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 733, + "content": "<|audio:733|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 734, + "content": "<|audio:734|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 735, + "content": "<|audio:735|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 736, + "content": "<|audio:736|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 737, + "content": "<|audio:737|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 738, + "content": "<|audio:738|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 739, + "content": "<|audio:739|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 740, + "content": "<|audio:740|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 741, + "content": "<|audio:741|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 742, + "content": "<|audio:742|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 743, + "content": "<|audio:743|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 744, + "content": "<|audio:744|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 745, + "content": "<|audio:745|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 746, + "content": "<|audio:746|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 747, + "content": "<|audio:747|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 748, + "content": "<|audio:748|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 749, + "content": "<|audio:749|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 750, + "content": "<|audio:750|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 751, + "content": "<|audio:751|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 752, + "content": "<|audio:752|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 753, + "content": "<|audio:753|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 754, + "content": "<|audio:754|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 755, + "content": "<|audio:755|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 756, + "content": "<|audio:756|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 757, + "content": "<|audio:757|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 758, + "content": "<|audio:758|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 759, + "content": "<|audio:759|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 760, + "content": "<|audio:760|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 761, + "content": "<|audio:761|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 762, + "content": "<|audio:762|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 763, + "content": "<|audio:763|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 764, + "content": "<|audio:764|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 765, + "content": "<|audio:765|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 766, + "content": "<|audio:766|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 767, + "content": "<|audio:767|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 768, + "content": "<|audio:768|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 769, + "content": "<|audio:769|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 770, + "content": "<|audio:770|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 771, + "content": "<|audio:771|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 772, + "content": "<|audio:772|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 773, + "content": "<|audio:773|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 774, + "content": "<|audio:774|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 775, + "content": "<|audio:775|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 776, + "content": "<|audio:776|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 777, + "content": "<|audio:777|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 778, + "content": "<|audio:778|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 779, + "content": "<|audio:779|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 780, + "content": "<|audio:780|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 781, + "content": "<|audio:781|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 782, + "content": "<|audio:782|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 783, + "content": "<|audio:783|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 784, + "content": "<|audio:784|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 785, + "content": "<|audio:785|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 786, + "content": "<|audio:786|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 787, + "content": "<|audio:787|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 788, + "content": "<|audio:788|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 789, + "content": "<|audio:789|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 790, + "content": "<|audio:790|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 791, + "content": "<|audio:791|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 792, + "content": "<|audio:792|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 793, + "content": "<|audio:793|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 794, + "content": "<|audio:794|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 795, + "content": "<|audio:795|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 796, + "content": "<|audio:796|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 797, + "content": "<|audio:797|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 798, + "content": "<|audio:798|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 799, + "content": "<|audio:799|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 800, + "content": "<|audio:800|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 801, + "content": "<|audio:801|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 802, + "content": "<|audio:802|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 803, + "content": "<|audio:803|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 804, + "content": "<|audio:804|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 805, + "content": "<|audio:805|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 806, + "content": "<|audio:806|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 807, + "content": "<|audio:807|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 808, + "content": "<|audio:808|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 809, + "content": "<|audio:809|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 810, + "content": "<|audio:810|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 811, + "content": "<|audio:811|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 812, + "content": "<|audio:812|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 813, + "content": "<|audio:813|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 814, + "content": "<|audio:814|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 815, + "content": "<|audio:815|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 816, + "content": "<|audio:816|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 817, + "content": "<|audio:817|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 818, + "content": "<|audio:818|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 819, + "content": "<|audio:819|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 820, + "content": "<|audio:820|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 821, + "content": "<|audio:821|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 822, + "content": "<|audio:822|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 823, + "content": "<|audio:823|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 824, + "content": "<|audio:824|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 825, + "content": "<|audio:825|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 826, + "content": "<|audio:826|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 827, + "content": "<|audio:827|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 828, + "content": "<|audio:828|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 829, + "content": "<|audio:829|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 830, + "content": "<|audio:830|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 831, + "content": "<|audio:831|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 832, + "content": "<|audio:832|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 833, + "content": "<|audio:833|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 834, + "content": "<|audio:834|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 835, + "content": "<|audio:835|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 836, + "content": "<|audio:836|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 837, + "content": "<|audio:837|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 838, + "content": "<|audio:838|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 839, + "content": "<|audio:839|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 840, + "content": "<|audio:840|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 841, + "content": "<|audio:841|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 842, + "content": "<|audio:842|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 843, + "content": "<|audio:843|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 844, + "content": "<|audio:844|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 845, + "content": "<|audio:845|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 846, + "content": "<|audio:846|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 847, + "content": "<|audio:847|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 848, + "content": "<|audio:848|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 849, + "content": "<|audio:849|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 850, + "content": "<|audio:850|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 851, + "content": "<|audio:851|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 852, + "content": "<|audio:852|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 853, + "content": "<|audio:853|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 854, + "content": "<|audio:854|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 855, + "content": "<|audio:855|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 856, + "content": "<|audio:856|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 857, + "content": "<|audio:857|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 858, + "content": "<|audio:858|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 859, + "content": "<|audio:859|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 860, + "content": "<|audio:860|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 861, + "content": "<|audio:861|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 862, + "content": "<|audio:862|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 863, + "content": "<|audio:863|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 864, + "content": "<|audio:864|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 865, + "content": "<|audio:865|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 866, + "content": "<|audio:866|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 867, + "content": "<|audio:867|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 868, + "content": "<|audio:868|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 869, + "content": "<|audio:869|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 870, + "content": "<|audio:870|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 871, + "content": "<|audio:871|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 872, + "content": "<|audio:872|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 873, + "content": "<|audio:873|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 874, + "content": "<|audio:874|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 875, + "content": "<|audio:875|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 876, + "content": "<|audio:876|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 877, + "content": "<|audio:877|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 878, + "content": "<|audio:878|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 879, + "content": "<|audio:879|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 880, + "content": "<|audio:880|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 881, + "content": "<|audio:881|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 882, + "content": "<|audio:882|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 883, + "content": "<|audio:883|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 884, + "content": "<|audio:884|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 885, + "content": "<|audio:885|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 886, + "content": "<|audio:886|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 887, + "content": "<|audio:887|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 888, + "content": "<|audio:888|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 889, + "content": "<|audio:889|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 890, + "content": "<|audio:890|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 891, + "content": "<|audio:891|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 892, + "content": "<|audio:892|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 893, + "content": "<|audio:893|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 894, + "content": "<|audio:894|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 895, + "content": "<|audio:895|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 896, + "content": "<|audio:896|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 897, + "content": "<|audio:897|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 898, + "content": "<|audio:898|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 899, + "content": "<|audio:899|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 900, + "content": "<|audio:900|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 901, + "content": "<|audio:901|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 902, + "content": "<|audio:902|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 903, + "content": "<|audio:903|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 904, + "content": "<|audio:904|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 905, + "content": "<|audio:905|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 906, + "content": "<|audio:906|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 907, + "content": "<|audio:907|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 908, + "content": "<|audio:908|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 909, + "content": "<|audio:909|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 910, + "content": "<|audio:910|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 911, + "content": "<|audio:911|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 912, + "content": "<|audio:912|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 913, + "content": "<|audio:913|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 914, + "content": "<|audio:914|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 915, + "content": "<|audio:915|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 916, + "content": "<|audio:916|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 917, + "content": "<|audio:917|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 918, + "content": "<|audio:918|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 919, + "content": "<|audio:919|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 920, + "content": "<|audio:920|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 921, + "content": "<|audio:921|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 922, + "content": "<|audio:922|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 923, + "content": "<|audio:923|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 924, + "content": "<|audio:924|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 925, + "content": "<|audio:925|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 926, + "content": "<|audio:926|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 927, + "content": "<|audio:927|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 928, + "content": "<|audio:928|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 929, + "content": "<|audio:929|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 930, + "content": "<|audio:930|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 931, + "content": "<|audio:931|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 932, + "content": "<|audio:932|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 933, + "content": "<|audio:933|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 934, + "content": "<|audio:934|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 935, + "content": "<|audio:935|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 936, + "content": "<|audio:936|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 937, + "content": "<|audio:937|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 938, + "content": "<|audio:938|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 939, + "content": "<|audio:939|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 940, + "content": "<|audio:940|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 941, + "content": "<|audio:941|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 942, + "content": "<|audio:942|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 943, + "content": "<|audio:943|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 944, + "content": "<|audio:944|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 945, + "content": "<|audio:945|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 946, + "content": "<|audio:946|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 947, + "content": "<|audio:947|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 948, + "content": "<|audio:948|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 949, + "content": "<|audio:949|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 950, + "content": "<|audio:950|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 951, + "content": "<|audio:951|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 952, + "content": "<|audio:952|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 953, + "content": "<|audio:953|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 954, + "content": "<|audio:954|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 955, + "content": "<|audio:955|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 956, + "content": "<|audio:956|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 957, + "content": "<|audio:957|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 958, + "content": "<|audio:958|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 959, + "content": "<|audio:959|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 960, + "content": "<|audio:960|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 961, + "content": "<|audio:961|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 962, + "content": "<|audio:962|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 963, + "content": "<|audio:963|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 964, + "content": "<|audio:964|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 965, + "content": "<|audio:965|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 966, + "content": "<|audio:966|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 967, + "content": "<|audio:967|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 968, + "content": "<|audio:968|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 969, + "content": "<|audio:969|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 970, + "content": "<|audio:970|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 971, + "content": "<|audio:971|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 972, + "content": "<|audio:972|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 973, + "content": "<|audio:973|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 974, + "content": "<|audio:974|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 975, + "content": "<|audio:975|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 976, + "content": "<|audio:976|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 977, + "content": "<|audio:977|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 978, + "content": "<|audio:978|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 979, + "content": "<|audio:979|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 980, + "content": "<|audio:980|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 981, + "content": "<|audio:981|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 982, + "content": "<|audio:982|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 983, + "content": "<|audio:983|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 984, + "content": "<|audio:984|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 985, + "content": "<|audio:985|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 986, + "content": "<|audio:986|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 987, + "content": "<|audio:987|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 988, + "content": "<|audio:988|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 989, + "content": "<|audio:989|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 990, + "content": "<|audio:990|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 991, + "content": "<|audio:991|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 992, + "content": "<|audio:992|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 993, + "content": "<|audio:993|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 994, + "content": "<|audio:994|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 995, + "content": "<|audio:995|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 996, + "content": "<|audio:996|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 997, + "content": "<|audio:997|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 998, + "content": "<|audio:998|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 999, + "content": "<|audio:999|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1000, + "content": "<|audio:1000|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1001, + "content": "<|audio:1001|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1002, + "content": "<|audio:1002|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1003, + "content": "<|audio:1003|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1004, + "content": "<|audio:1004|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1005, + "content": "<|audio:1005|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1006, + "content": "<|audio:1006|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1007, + "content": "<|audio:1007|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1008, + "content": "<|audio:1008|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1009, + "content": "<|audio:1009|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1010, + "content": "<|audio:1010|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1011, + "content": "<|audio:1011|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1012, + "content": "<|audio:1012|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1013, + "content": "<|audio:1013|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1014, + "content": "<|audio:1014|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1015, + "content": "<|audio:1015|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1016, + "content": "<|audio:1016|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1017, + "content": "<|audio:1017|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1018, + "content": "<|audio:1018|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1019, + "content": "<|audio:1019|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1020, + "content": "<|audio:1020|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1021, + "content": "<|audio:1021|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1022, + "content": "<|audio:1022|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1023, + "content": "<|audio:1023|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1024, + "content": "<|startoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1025, + "content": "<|endoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1026, + "content": "<|padding|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFKC" + }, + "pre_tokenizer": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "post_processor": null, + "decoder": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|audio:0|>": 0, + "<|audio:1|>": 1, + "<|audio:2|>": 2, + "<|audio:3|>": 3, + "<|audio:4|>": 4, + "<|audio:5|>": 5, + "<|audio:6|>": 6, + "<|audio:7|>": 7, + "<|audio:8|>": 8, + "<|audio:9|>": 9, + "<|audio:10|>": 10, + "<|audio:11|>": 11, + "<|audio:12|>": 12, + "<|audio:13|>": 13, + "<|audio:14|>": 14, + "<|audio:15|>": 15, + "<|audio:16|>": 16, + "<|audio:17|>": 17, + "<|audio:18|>": 18, + "<|audio:19|>": 19, + "<|audio:20|>": 20, + "<|audio:21|>": 21, + "<|audio:22|>": 22, + "<|audio:23|>": 23, + "<|audio:24|>": 24, + "<|audio:25|>": 25, + "<|audio:26|>": 26, + "<|audio:27|>": 27, + "<|audio:28|>": 28, + "<|audio:29|>": 29, + "<|audio:30|>": 30, + "<|audio:31|>": 31, + "<|audio:32|>": 32, + "<|audio:33|>": 33, + "<|audio:34|>": 34, + "<|audio:35|>": 35, + "<|audio:36|>": 36, + "<|audio:37|>": 37, + "<|audio:38|>": 38, + "<|audio:39|>": 39, + "<|audio:40|>": 40, + "<|audio:41|>": 41, + "<|audio:42|>": 42, + "<|audio:43|>": 43, + "<|audio:44|>": 44, + "<|audio:45|>": 45, + "<|audio:46|>": 46, + "<|audio:47|>": 47, + "<|audio:48|>": 48, + "<|audio:49|>": 49, + "<|audio:50|>": 50, + "<|audio:51|>": 51, + "<|audio:52|>": 52, + "<|audio:53|>": 53, + "<|audio:54|>": 54, + "<|audio:55|>": 55, + "<|audio:56|>": 56, + "<|audio:57|>": 57, + "<|audio:58|>": 58, + "<|audio:59|>": 59, + "<|audio:60|>": 60, + "<|audio:61|>": 61, + "<|audio:62|>": 62, + "<|audio:63|>": 63, + "<|audio:64|>": 64, + "<|audio:65|>": 65, + "<|audio:66|>": 66, + "<|audio:67|>": 67, + "<|audio:68|>": 68, + "<|audio:69|>": 69, + "<|audio:70|>": 70, + "<|audio:71|>": 71, + "<|audio:72|>": 72, + "<|audio:73|>": 73, + "<|audio:74|>": 74, + "<|audio:75|>": 75, + "<|audio:76|>": 76, + "<|audio:77|>": 77, + "<|audio:78|>": 78, + "<|audio:79|>": 79, + "<|audio:80|>": 80, + "<|audio:81|>": 81, + "<|audio:82|>": 82, + "<|audio:83|>": 83, + "<|audio:84|>": 84, + "<|audio:85|>": 85, + "<|audio:86|>": 86, + "<|audio:87|>": 87, + "<|audio:88|>": 88, + "<|audio:89|>": 89, + "<|audio:90|>": 90, + "<|audio:91|>": 91, + "<|audio:92|>": 92, + "<|audio:93|>": 93, + "<|audio:94|>": 94, + "<|audio:95|>": 95, + "<|audio:96|>": 96, + "<|audio:97|>": 97, + "<|audio:98|>": 98, + "<|audio:99|>": 99, + "<|audio:100|>": 100, + "<|audio:101|>": 101, + "<|audio:102|>": 102, + "<|audio:103|>": 103, + "<|audio:104|>": 104, + "<|audio:105|>": 105, + "<|audio:106|>": 106, + "<|audio:107|>": 107, + "<|audio:108|>": 108, + "<|audio:109|>": 109, + "<|audio:110|>": 110, + "<|audio:111|>": 111, + "<|audio:112|>": 112, + "<|audio:113|>": 113, + "<|audio:114|>": 114, + "<|audio:115|>": 115, + "<|audio:116|>": 116, + "<|audio:117|>": 117, + "<|audio:118|>": 118, + "<|audio:119|>": 119, + "<|audio:120|>": 120, + "<|audio:121|>": 121, + "<|audio:122|>": 122, + "<|audio:123|>": 123, + "<|audio:124|>": 124, + "<|audio:125|>": 125, + "<|audio:126|>": 126, + "<|audio:127|>": 127, + "<|audio:128|>": 128, + "<|audio:129|>": 129, + "<|audio:130|>": 130, + "<|audio:131|>": 131, + "<|audio:132|>": 132, + "<|audio:133|>": 133, + "<|audio:134|>": 134, + "<|audio:135|>": 135, + "<|audio:136|>": 136, + "<|audio:137|>": 137, + "<|audio:138|>": 138, + "<|audio:139|>": 139, + "<|audio:140|>": 140, + "<|audio:141|>": 141, + "<|audio:142|>": 142, + "<|audio:143|>": 143, + "<|audio:144|>": 144, + "<|audio:145|>": 145, + "<|audio:146|>": 146, + "<|audio:147|>": 147, + "<|audio:148|>": 148, + "<|audio:149|>": 149, + "<|audio:150|>": 150, + "<|audio:151|>": 151, + "<|audio:152|>": 152, + "<|audio:153|>": 153, + "<|audio:154|>": 154, + "<|audio:155|>": 155, + "<|audio:156|>": 156, + "<|audio:157|>": 157, + "<|audio:158|>": 158, + "<|audio:159|>": 159, + "<|audio:160|>": 160, + "<|audio:161|>": 161, + "<|audio:162|>": 162, + "<|audio:163|>": 163, + "<|audio:164|>": 164, + "<|audio:165|>": 165, + "<|audio:166|>": 166, + "<|audio:167|>": 167, + "<|audio:168|>": 168, + "<|audio:169|>": 169, + "<|audio:170|>": 170, + "<|audio:171|>": 171, + "<|audio:172|>": 172, + "<|audio:173|>": 173, + "<|audio:174|>": 174, + "<|audio:175|>": 175, + "<|audio:176|>": 176, + "<|audio:177|>": 177, + "<|audio:178|>": 178, + "<|audio:179|>": 179, + "<|audio:180|>": 180, + "<|audio:181|>": 181, + "<|audio:182|>": 182, + "<|audio:183|>": 183, + "<|audio:184|>": 184, + "<|audio:185|>": 185, + "<|audio:186|>": 186, + "<|audio:187|>": 187, + "<|audio:188|>": 188, + "<|audio:189|>": 189, + "<|audio:190|>": 190, + "<|audio:191|>": 191, + "<|audio:192|>": 192, + "<|audio:193|>": 193, + "<|audio:194|>": 194, + "<|audio:195|>": 195, + "<|audio:196|>": 196, + "<|audio:197|>": 197, + "<|audio:198|>": 198, + "<|audio:199|>": 199, + "<|audio:200|>": 200, + "<|audio:201|>": 201, + "<|audio:202|>": 202, + "<|audio:203|>": 203, + "<|audio:204|>": 204, + "<|audio:205|>": 205, + "<|audio:206|>": 206, + "<|audio:207|>": 207, + "<|audio:208|>": 208, + "<|audio:209|>": 209, + "<|audio:210|>": 210, + "<|audio:211|>": 211, + "<|audio:212|>": 212, + "<|audio:213|>": 213, + "<|audio:214|>": 214, + "<|audio:215|>": 215, + "<|audio:216|>": 216, + "<|audio:217|>": 217, + "<|audio:218|>": 218, + "<|audio:219|>": 219, + "<|audio:220|>": 220, + "<|audio:221|>": 221, + "<|audio:222|>": 222, + "<|audio:223|>": 223, + "<|audio:224|>": 224, + "<|audio:225|>": 225, + "<|audio:226|>": 226, + "<|audio:227|>": 227, + "<|audio:228|>": 228, + "<|audio:229|>": 229, + "<|audio:230|>": 230, + "<|audio:231|>": 231, + "<|audio:232|>": 232, + "<|audio:233|>": 233, + "<|audio:234|>": 234, + "<|audio:235|>": 235, + "<|audio:236|>": 236, + "<|audio:237|>": 237, + "<|audio:238|>": 238, + "<|audio:239|>": 239, + "<|audio:240|>": 240, + "<|audio:241|>": 241, + "<|audio:242|>": 242, + "<|audio:243|>": 243, + "<|audio:244|>": 244, + "<|audio:245|>": 245, + "<|audio:246|>": 246, + "<|audio:247|>": 247, + "<|audio:248|>": 248, + "<|audio:249|>": 249, + "<|audio:250|>": 250, + "<|audio:251|>": 251, + "<|audio:252|>": 252, + "<|audio:253|>": 253, + "<|audio:254|>": 254, + "<|audio:255|>": 255, + "<|audio:256|>": 256, + "<|audio:257|>": 257, + "<|audio:258|>": 258, + "<|audio:259|>": 259, + "<|audio:260|>": 260, + "<|audio:261|>": 261, + "<|audio:262|>": 262, + "<|audio:263|>": 263, + "<|audio:264|>": 264, + "<|audio:265|>": 265, + "<|audio:266|>": 266, + "<|audio:267|>": 267, + "<|audio:268|>": 268, + "<|audio:269|>": 269, + "<|audio:270|>": 270, + "<|audio:271|>": 271, + "<|audio:272|>": 272, + "<|audio:273|>": 273, + "<|audio:274|>": 274, + "<|audio:275|>": 275, + "<|audio:276|>": 276, + "<|audio:277|>": 277, + "<|audio:278|>": 278, + "<|audio:279|>": 279, + "<|audio:280|>": 280, + "<|audio:281|>": 281, + "<|audio:282|>": 282, + "<|audio:283|>": 283, + "<|audio:284|>": 284, + "<|audio:285|>": 285, + "<|audio:286|>": 286, + "<|audio:287|>": 287, + "<|audio:288|>": 288, + "<|audio:289|>": 289, + "<|audio:290|>": 290, + "<|audio:291|>": 291, + "<|audio:292|>": 292, + "<|audio:293|>": 293, + "<|audio:294|>": 294, + "<|audio:295|>": 295, + "<|audio:296|>": 296, + "<|audio:297|>": 297, + "<|audio:298|>": 298, + "<|audio:299|>": 299, + "<|audio:300|>": 300, + "<|audio:301|>": 301, + "<|audio:302|>": 302, + "<|audio:303|>": 303, + "<|audio:304|>": 304, + "<|audio:305|>": 305, + "<|audio:306|>": 306, + "<|audio:307|>": 307, + "<|audio:308|>": 308, + "<|audio:309|>": 309, + "<|audio:310|>": 310, + "<|audio:311|>": 311, + "<|audio:312|>": 312, + "<|audio:313|>": 313, + "<|audio:314|>": 314, + "<|audio:315|>": 315, + "<|audio:316|>": 316, + "<|audio:317|>": 317, + "<|audio:318|>": 318, + "<|audio:319|>": 319, + "<|audio:320|>": 320, + "<|audio:321|>": 321, + "<|audio:322|>": 322, + "<|audio:323|>": 323, + "<|audio:324|>": 324, + "<|audio:325|>": 325, + "<|audio:326|>": 326, + "<|audio:327|>": 327, + "<|audio:328|>": 328, + "<|audio:329|>": 329, + "<|audio:330|>": 330, + "<|audio:331|>": 331, + "<|audio:332|>": 332, + "<|audio:333|>": 333, + "<|audio:334|>": 334, + "<|audio:335|>": 335, + "<|audio:336|>": 336, + "<|audio:337|>": 337, + "<|audio:338|>": 338, + "<|audio:339|>": 339, + "<|audio:340|>": 340, + "<|audio:341|>": 341, + "<|audio:342|>": 342, + "<|audio:343|>": 343, + "<|audio:344|>": 344, + "<|audio:345|>": 345, + "<|audio:346|>": 346, + "<|audio:347|>": 347, + "<|audio:348|>": 348, + "<|audio:349|>": 349, + "<|audio:350|>": 350, + "<|audio:351|>": 351, + "<|audio:352|>": 352, + "<|audio:353|>": 353, + "<|audio:354|>": 354, + "<|audio:355|>": 355, + "<|audio:356|>": 356, + "<|audio:357|>": 357, + "<|audio:358|>": 358, + "<|audio:359|>": 359, + "<|audio:360|>": 360, + "<|audio:361|>": 361, + "<|audio:362|>": 362, + "<|audio:363|>": 363, + "<|audio:364|>": 364, + "<|audio:365|>": 365, + "<|audio:366|>": 366, + "<|audio:367|>": 367, + "<|audio:368|>": 368, + "<|audio:369|>": 369, + "<|audio:370|>": 370, + "<|audio:371|>": 371, + "<|audio:372|>": 372, + "<|audio:373|>": 373, + "<|audio:374|>": 374, + "<|audio:375|>": 375, + "<|audio:376|>": 376, + "<|audio:377|>": 377, + "<|audio:378|>": 378, + "<|audio:379|>": 379, + "<|audio:380|>": 380, + "<|audio:381|>": 381, + "<|audio:382|>": 382, + "<|audio:383|>": 383, + "<|audio:384|>": 384, + "<|audio:385|>": 385, + "<|audio:386|>": 386, + "<|audio:387|>": 387, + "<|audio:388|>": 388, + "<|audio:389|>": 389, + "<|audio:390|>": 390, + "<|audio:391|>": 391, + "<|audio:392|>": 392, + "<|audio:393|>": 393, + "<|audio:394|>": 394, + "<|audio:395|>": 395, + "<|audio:396|>": 396, + "<|audio:397|>": 397, + "<|audio:398|>": 398, + "<|audio:399|>": 399, + "<|audio:400|>": 400, + "<|audio:401|>": 401, + "<|audio:402|>": 402, + "<|audio:403|>": 403, + "<|audio:404|>": 404, + "<|audio:405|>": 405, + "<|audio:406|>": 406, + "<|audio:407|>": 407, + "<|audio:408|>": 408, + "<|audio:409|>": 409, + "<|audio:410|>": 410, + "<|audio:411|>": 411, + "<|audio:412|>": 412, + "<|audio:413|>": 413, + "<|audio:414|>": 414, + "<|audio:415|>": 415, + "<|audio:416|>": 416, + "<|audio:417|>": 417, + "<|audio:418|>": 418, + "<|audio:419|>": 419, + "<|audio:420|>": 420, + "<|audio:421|>": 421, + "<|audio:422|>": 422, + "<|audio:423|>": 423, + "<|audio:424|>": 424, + "<|audio:425|>": 425, + "<|audio:426|>": 426, + "<|audio:427|>": 427, + "<|audio:428|>": 428, + "<|audio:429|>": 429, + "<|audio:430|>": 430, + "<|audio:431|>": 431, + "<|audio:432|>": 432, + "<|audio:433|>": 433, + "<|audio:434|>": 434, + "<|audio:435|>": 435, + "<|audio:436|>": 436, + "<|audio:437|>": 437, + "<|audio:438|>": 438, + "<|audio:439|>": 439, + "<|audio:440|>": 440, + "<|audio:441|>": 441, + "<|audio:442|>": 442, + "<|audio:443|>": 443, + "<|audio:444|>": 444, + "<|audio:445|>": 445, + "<|audio:446|>": 446, + "<|audio:447|>": 447, + "<|audio:448|>": 448, + "<|audio:449|>": 449, + "<|audio:450|>": 450, + "<|audio:451|>": 451, + "<|audio:452|>": 452, + "<|audio:453|>": 453, + "<|audio:454|>": 454, + "<|audio:455|>": 455, + "<|audio:456|>": 456, + "<|audio:457|>": 457, + "<|audio:458|>": 458, + "<|audio:459|>": 459, + "<|audio:460|>": 460, + "<|audio:461|>": 461, + "<|audio:462|>": 462, + "<|audio:463|>": 463, + "<|audio:464|>": 464, + "<|audio:465|>": 465, + "<|audio:466|>": 466, + "<|audio:467|>": 467, + "<|audio:468|>": 468, + "<|audio:469|>": 469, + "<|audio:470|>": 470, + "<|audio:471|>": 471, + "<|audio:472|>": 472, + "<|audio:473|>": 473, + "<|audio:474|>": 474, + "<|audio:475|>": 475, + "<|audio:476|>": 476, + "<|audio:477|>": 477, + "<|audio:478|>": 478, + "<|audio:479|>": 479, + "<|audio:480|>": 480, + "<|audio:481|>": 481, + "<|audio:482|>": 482, + "<|audio:483|>": 483, + "<|audio:484|>": 484, + "<|audio:485|>": 485, + "<|audio:486|>": 486, + "<|audio:487|>": 487, + "<|audio:488|>": 488, + "<|audio:489|>": 489, + "<|audio:490|>": 490, + "<|audio:491|>": 491, + "<|audio:492|>": 492, + "<|audio:493|>": 493, + "<|audio:494|>": 494, + "<|audio:495|>": 495, + "<|audio:496|>": 496, + "<|audio:497|>": 497, + "<|audio:498|>": 498, + "<|audio:499|>": 499, + "<|audio:500|>": 500, + "<|audio:501|>": 501, + "<|audio:502|>": 502, + "<|audio:503|>": 503, + "<|audio:504|>": 504, + "<|audio:505|>": 505, + "<|audio:506|>": 506, + "<|audio:507|>": 507, + "<|audio:508|>": 508, + "<|audio:509|>": 509, + "<|audio:510|>": 510, + "<|audio:511|>": 511, + "<|audio:512|>": 512, + "<|audio:513|>": 513, + "<|audio:514|>": 514, + "<|audio:515|>": 515, + "<|audio:516|>": 516, + "<|audio:517|>": 517, + "<|audio:518|>": 518, + "<|audio:519|>": 519, + "<|audio:520|>": 520, + "<|audio:521|>": 521, + "<|audio:522|>": 522, + "<|audio:523|>": 523, + "<|audio:524|>": 524, + "<|audio:525|>": 525, + "<|audio:526|>": 526, + "<|audio:527|>": 527, + "<|audio:528|>": 528, + "<|audio:529|>": 529, + "<|audio:530|>": 530, + "<|audio:531|>": 531, + "<|audio:532|>": 532, + "<|audio:533|>": 533, + "<|audio:534|>": 534, + "<|audio:535|>": 535, + "<|audio:536|>": 536, + "<|audio:537|>": 537, + "<|audio:538|>": 538, + "<|audio:539|>": 539, + "<|audio:540|>": 540, + "<|audio:541|>": 541, + "<|audio:542|>": 542, + "<|audio:543|>": 543, + "<|audio:544|>": 544, + "<|audio:545|>": 545, + "<|audio:546|>": 546, + "<|audio:547|>": 547, + "<|audio:548|>": 548, + "<|audio:549|>": 549, + "<|audio:550|>": 550, + "<|audio:551|>": 551, + "<|audio:552|>": 552, + "<|audio:553|>": 553, + "<|audio:554|>": 554, + "<|audio:555|>": 555, + "<|audio:556|>": 556, + "<|audio:557|>": 557, + "<|audio:558|>": 558, + "<|audio:559|>": 559, + "<|audio:560|>": 560, + "<|audio:561|>": 561, + "<|audio:562|>": 562, + "<|audio:563|>": 563, + "<|audio:564|>": 564, + "<|audio:565|>": 565, + "<|audio:566|>": 566, + "<|audio:567|>": 567, + "<|audio:568|>": 568, + "<|audio:569|>": 569, + "<|audio:570|>": 570, + "<|audio:571|>": 571, + "<|audio:572|>": 572, + "<|audio:573|>": 573, + "<|audio:574|>": 574, + "<|audio:575|>": 575, + "<|audio:576|>": 576, + "<|audio:577|>": 577, + "<|audio:578|>": 578, + "<|audio:579|>": 579, + "<|audio:580|>": 580, + "<|audio:581|>": 581, + "<|audio:582|>": 582, + "<|audio:583|>": 583, + "<|audio:584|>": 584, + "<|audio:585|>": 585, + "<|audio:586|>": 586, + "<|audio:587|>": 587, + "<|audio:588|>": 588, + "<|audio:589|>": 589, + "<|audio:590|>": 590, + "<|audio:591|>": 591, + "<|audio:592|>": 592, + "<|audio:593|>": 593, + "<|audio:594|>": 594, + "<|audio:595|>": 595, + "<|audio:596|>": 596, + "<|audio:597|>": 597, + "<|audio:598|>": 598, + "<|audio:599|>": 599, + "<|audio:600|>": 600, + "<|audio:601|>": 601, + "<|audio:602|>": 602, + "<|audio:603|>": 603, + "<|audio:604|>": 604, + "<|audio:605|>": 605, + "<|audio:606|>": 606, + "<|audio:607|>": 607, + "<|audio:608|>": 608, + "<|audio:609|>": 609, + "<|audio:610|>": 610, + "<|audio:611|>": 611, + "<|audio:612|>": 612, + "<|audio:613|>": 613, + "<|audio:614|>": 614, + "<|audio:615|>": 615, + "<|audio:616|>": 616, + "<|audio:617|>": 617, + "<|audio:618|>": 618, + "<|audio:619|>": 619, + "<|audio:620|>": 620, + "<|audio:621|>": 621, + "<|audio:622|>": 622, + "<|audio:623|>": 623, + "<|audio:624|>": 624, + "<|audio:625|>": 625, + "<|audio:626|>": 626, + "<|audio:627|>": 627, + "<|audio:628|>": 628, + "<|audio:629|>": 629, + "<|audio:630|>": 630, + "<|audio:631|>": 631, + "<|audio:632|>": 632, + "<|audio:633|>": 633, + "<|audio:634|>": 634, + "<|audio:635|>": 635, + "<|audio:636|>": 636, + "<|audio:637|>": 637, + "<|audio:638|>": 638, + "<|audio:639|>": 639, + "<|audio:640|>": 640, + "<|audio:641|>": 641, + "<|audio:642|>": 642, + "<|audio:643|>": 643, + "<|audio:644|>": 644, + "<|audio:645|>": 645, + "<|audio:646|>": 646, + "<|audio:647|>": 647, + "<|audio:648|>": 648, + "<|audio:649|>": 649, + "<|audio:650|>": 650, + "<|audio:651|>": 651, + "<|audio:652|>": 652, + "<|audio:653|>": 653, + "<|audio:654|>": 654, + "<|audio:655|>": 655, + "<|audio:656|>": 656, + "<|audio:657|>": 657, + "<|audio:658|>": 658, + "<|audio:659|>": 659, + "<|audio:660|>": 660, + "<|audio:661|>": 661, + "<|audio:662|>": 662, + "<|audio:663|>": 663, + "<|audio:664|>": 664, + "<|audio:665|>": 665, + "<|audio:666|>": 666, + "<|audio:667|>": 667, + "<|audio:668|>": 668, + "<|audio:669|>": 669, + "<|audio:670|>": 670, + "<|audio:671|>": 671, + "<|audio:672|>": 672, + "<|audio:673|>": 673, + "<|audio:674|>": 674, + "<|audio:675|>": 675, + "<|audio:676|>": 676, + "<|audio:677|>": 677, + "<|audio:678|>": 678, + "<|audio:679|>": 679, + "<|audio:680|>": 680, + "<|audio:681|>": 681, + "<|audio:682|>": 682, + "<|audio:683|>": 683, + "<|audio:684|>": 684, + "<|audio:685|>": 685, + "<|audio:686|>": 686, + "<|audio:687|>": 687, + "<|audio:688|>": 688, + "<|audio:689|>": 689, + "<|audio:690|>": 690, + "<|audio:691|>": 691, + "<|audio:692|>": 692, + "<|audio:693|>": 693, + "<|audio:694|>": 694, + "<|audio:695|>": 695, + "<|audio:696|>": 696, + "<|audio:697|>": 697, + "<|audio:698|>": 698, + "<|audio:699|>": 699, + "<|audio:700|>": 700, + "<|audio:701|>": 701, + "<|audio:702|>": 702, + "<|audio:703|>": 703, + "<|audio:704|>": 704, + "<|audio:705|>": 705, + "<|audio:706|>": 706, + "<|audio:707|>": 707, + "<|audio:708|>": 708, + "<|audio:709|>": 709, + "<|audio:710|>": 710, + "<|audio:711|>": 711, + "<|audio:712|>": 712, + "<|audio:713|>": 713, + "<|audio:714|>": 714, + "<|audio:715|>": 715, + "<|audio:716|>": 716, + "<|audio:717|>": 717, + "<|audio:718|>": 718, + "<|audio:719|>": 719, + "<|audio:720|>": 720, + "<|audio:721|>": 721, + "<|audio:722|>": 722, + "<|audio:723|>": 723, + "<|audio:724|>": 724, + "<|audio:725|>": 725, + "<|audio:726|>": 726, + "<|audio:727|>": 727, + "<|audio:728|>": 728, + "<|audio:729|>": 729, + "<|audio:730|>": 730, + "<|audio:731|>": 731, + "<|audio:732|>": 732, + "<|audio:733|>": 733, + "<|audio:734|>": 734, + "<|audio:735|>": 735, + "<|audio:736|>": 736, + "<|audio:737|>": 737, + "<|audio:738|>": 738, + "<|audio:739|>": 739, + "<|audio:740|>": 740, + "<|audio:741|>": 741, + "<|audio:742|>": 742, + "<|audio:743|>": 743, + "<|audio:744|>": 744, + "<|audio:745|>": 745, + "<|audio:746|>": 746, + "<|audio:747|>": 747, + "<|audio:748|>": 748, + "<|audio:749|>": 749, + "<|audio:750|>": 750, + "<|audio:751|>": 751, + "<|audio:752|>": 752, + "<|audio:753|>": 753, + "<|audio:754|>": 754, + "<|audio:755|>": 755, + "<|audio:756|>": 756, + "<|audio:757|>": 757, + "<|audio:758|>": 758, + "<|audio:759|>": 759, + "<|audio:760|>": 760, + "<|audio:761|>": 761, + "<|audio:762|>": 762, + "<|audio:763|>": 763, + "<|audio:764|>": 764, + "<|audio:765|>": 765, + "<|audio:766|>": 766, + "<|audio:767|>": 767, + "<|audio:768|>": 768, + "<|audio:769|>": 769, + "<|audio:770|>": 770, + "<|audio:771|>": 771, + "<|audio:772|>": 772, + "<|audio:773|>": 773, + "<|audio:774|>": 774, + "<|audio:775|>": 775, + "<|audio:776|>": 776, + "<|audio:777|>": 777, + "<|audio:778|>": 778, + "<|audio:779|>": 779, + "<|audio:780|>": 780, + "<|audio:781|>": 781, + "<|audio:782|>": 782, + "<|audio:783|>": 783, + "<|audio:784|>": 784, + "<|audio:785|>": 785, + "<|audio:786|>": 786, + "<|audio:787|>": 787, + "<|audio:788|>": 788, + "<|audio:789|>": 789, + "<|audio:790|>": 790, + "<|audio:791|>": 791, + "<|audio:792|>": 792, + "<|audio:793|>": 793, + "<|audio:794|>": 794, + "<|audio:795|>": 795, + "<|audio:796|>": 796, + "<|audio:797|>": 797, + "<|audio:798|>": 798, + "<|audio:799|>": 799, + "<|audio:800|>": 800, + "<|audio:801|>": 801, + "<|audio:802|>": 802, + "<|audio:803|>": 803, + "<|audio:804|>": 804, + "<|audio:805|>": 805, + "<|audio:806|>": 806, + "<|audio:807|>": 807, + "<|audio:808|>": 808, + "<|audio:809|>": 809, + "<|audio:810|>": 810, + "<|audio:811|>": 811, + "<|audio:812|>": 812, + "<|audio:813|>": 813, + "<|audio:814|>": 814, + "<|audio:815|>": 815, + "<|audio:816|>": 816, + "<|audio:817|>": 817, + "<|audio:818|>": 818, + "<|audio:819|>": 819, + "<|audio:820|>": 820, + "<|audio:821|>": 821, + "<|audio:822|>": 822, + "<|audio:823|>": 823, + "<|audio:824|>": 824, + "<|audio:825|>": 825, + "<|audio:826|>": 826, + "<|audio:827|>": 827, + "<|audio:828|>": 828, + "<|audio:829|>": 829, + "<|audio:830|>": 830, + "<|audio:831|>": 831, + "<|audio:832|>": 832, + "<|audio:833|>": 833, + "<|audio:834|>": 834, + "<|audio:835|>": 835, + "<|audio:836|>": 836, + "<|audio:837|>": 837, + "<|audio:838|>": 838, + "<|audio:839|>": 839, + "<|audio:840|>": 840, + "<|audio:841|>": 841, + "<|audio:842|>": 842, + "<|audio:843|>": 843, + "<|audio:844|>": 844, + "<|audio:845|>": 845, + "<|audio:846|>": 846, + "<|audio:847|>": 847, + "<|audio:848|>": 848, + "<|audio:849|>": 849, + "<|audio:850|>": 850, + "<|audio:851|>": 851, + "<|audio:852|>": 852, + "<|audio:853|>": 853, + "<|audio:854|>": 854, + "<|audio:855|>": 855, + "<|audio:856|>": 856, + "<|audio:857|>": 857, + "<|audio:858|>": 858, + "<|audio:859|>": 859, + "<|audio:860|>": 860, + "<|audio:861|>": 861, + "<|audio:862|>": 862, + "<|audio:863|>": 863, + "<|audio:864|>": 864, + "<|audio:865|>": 865, + "<|audio:866|>": 866, + "<|audio:867|>": 867, + "<|audio:868|>": 868, + "<|audio:869|>": 869, + "<|audio:870|>": 870, + "<|audio:871|>": 871, + "<|audio:872|>": 872, + "<|audio:873|>": 873, + "<|audio:874|>": 874, + "<|audio:875|>": 875, + "<|audio:876|>": 876, + "<|audio:877|>": 877, + "<|audio:878|>": 878, + "<|audio:879|>": 879, + "<|audio:880|>": 880, + "<|audio:881|>": 881, + "<|audio:882|>": 882, + "<|audio:883|>": 883, + "<|audio:884|>": 884, + "<|audio:885|>": 885, + "<|audio:886|>": 886, + "<|audio:887|>": 887, + "<|audio:888|>": 888, + "<|audio:889|>": 889, + "<|audio:890|>": 890, + "<|audio:891|>": 891, + "<|audio:892|>": 892, + "<|audio:893|>": 893, + "<|audio:894|>": 894, + "<|audio:895|>": 895, + "<|audio:896|>": 896, + "<|audio:897|>": 897, + "<|audio:898|>": 898, + "<|audio:899|>": 899, + "<|audio:900|>": 900, + "<|audio:901|>": 901, + "<|audio:902|>": 902, + "<|audio:903|>": 903, + "<|audio:904|>": 904, + "<|audio:905|>": 905, + "<|audio:906|>": 906, + "<|audio:907|>": 907, + "<|audio:908|>": 908, + "<|audio:909|>": 909, + "<|audio:910|>": 910, + "<|audio:911|>": 911, + "<|audio:912|>": 912, + "<|audio:913|>": 913, + "<|audio:914|>": 914, + "<|audio:915|>": 915, + "<|audio:916|>": 916, + "<|audio:917|>": 917, + "<|audio:918|>": 918, + "<|audio:919|>": 919, + "<|audio:920|>": 920, + "<|audio:921|>": 921, + "<|audio:922|>": 922, + "<|audio:923|>": 923, + "<|audio:924|>": 924, + "<|audio:925|>": 925, + "<|audio:926|>": 926, + "<|audio:927|>": 927, + "<|audio:928|>": 928, + "<|audio:929|>": 929, + "<|audio:930|>": 930, + "<|audio:931|>": 931, + "<|audio:932|>": 932, + "<|audio:933|>": 933, + "<|audio:934|>": 934, + "<|audio:935|>": 935, + "<|audio:936|>": 936, + "<|audio:937|>": 937, + "<|audio:938|>": 938, + "<|audio:939|>": 939, + "<|audio:940|>": 940, + "<|audio:941|>": 941, + "<|audio:942|>": 942, + "<|audio:943|>": 943, + "<|audio:944|>": 944, + "<|audio:945|>": 945, + "<|audio:946|>": 946, + "<|audio:947|>": 947, + "<|audio:948|>": 948, + "<|audio:949|>": 949, + "<|audio:950|>": 950, + "<|audio:951|>": 951, + "<|audio:952|>": 952, + "<|audio:953|>": 953, + "<|audio:954|>": 954, + "<|audio:955|>": 955, + "<|audio:956|>": 956, + "<|audio:957|>": 957, + "<|audio:958|>": 958, + "<|audio:959|>": 959, + "<|audio:960|>": 960, + "<|audio:961|>": 961, + "<|audio:962|>": 962, + "<|audio:963|>": 963, + "<|audio:964|>": 964, + "<|audio:965|>": 965, + "<|audio:966|>": 966, + "<|audio:967|>": 967, + "<|audio:968|>": 968, + "<|audio:969|>": 969, + "<|audio:970|>": 970, + "<|audio:971|>": 971, + "<|audio:972|>": 972, + "<|audio:973|>": 973, + "<|audio:974|>": 974, + "<|audio:975|>": 975, + "<|audio:976|>": 976, + "<|audio:977|>": 977, + "<|audio:978|>": 978, + "<|audio:979|>": 979, + "<|audio:980|>": 980, + "<|audio:981|>": 981, + "<|audio:982|>": 982, + "<|audio:983|>": 983, + "<|audio:984|>": 984, + "<|audio:985|>": 985, + "<|audio:986|>": 986, + "<|audio:987|>": 987, + "<|audio:988|>": 988, + "<|audio:989|>": 989, + "<|audio:990|>": 990, + "<|audio:991|>": 991, + "<|audio:992|>": 992, + "<|audio:993|>": 993, + "<|audio:994|>": 994, + "<|audio:995|>": 995, + "<|audio:996|>": 996, + "<|audio:997|>": 997, + "<|audio:998|>": 998, + "<|audio:999|>": 999, + "<|audio:1000|>": 1000, + "<|audio:1001|>": 1001, + "<|audio:1002|>": 1002, + "<|audio:1003|>": 1003, + "<|audio:1004|>": 1004, + "<|audio:1005|>": 1005, + "<|audio:1006|>": 1006, + "<|audio:1007|>": 1007, + "<|audio:1008|>": 1008, + "<|audio:1009|>": 1009, + "<|audio:1010|>": 1010, + "<|audio:1011|>": 1011, + "<|audio:1012|>": 1012, + "<|audio:1013|>": 1013, + "<|audio:1014|>": 1014, + "<|audio:1015|>": 1015, + "<|audio:1016|>": 1016, + "<|audio:1017|>": 1017, + "<|audio:1018|>": 1018, + "<|audio:1019|>": 1019, + "<|audio:1020|>": 1020, + "<|audio:1021|>": 1021, + "<|audio:1022|>": 1022, + "<|audio:1023|>": 1023, + "<|startoftranscript|>": 1024, + "<|endoftranscript|>": 1025, + "<|padding|>": 1026, + "'": 1027, + "a": 1028, + "b": 1029, + "c": 1030, + "d": 1031, + "e": 1032, + "f": 1033, + "g": 1034, + "h": 1035, + "i": 1036, + "j": 1037, + "k": 1038, + "l": 1039, + "m": 1040, + "n": 1041, + "o": 1042, + "p": 1043, + "q": 1044, + "r": 1045, + "s": 1046, + "t": 1047, + "u": 1048, + "v": 1049, + "w": 1050, + "x": 1051, + "y": 1052, + "z": 1053, + "▁": 1054, + "▁t": 1055, + "he": 1056, + "▁a": 1057, + "▁the": 1058, + "in": 1059, + "▁s": 1060, + "▁w": 1061, + "▁o": 1062, + "re": 1063, + "nd": 1064, + "▁b": 1065, + "▁h": 1066, + "er": 1067, + "▁m": 1068, + "▁i": 1069, + "ou": 1070, + "▁c": 1071, + "▁f": 1072, + "at": 1073, + "ed": 1074, + "▁and": 1075, + "en": 1076, + "▁to": 1077, + "▁of": 1078, + "on": 1079, + "is": 1080, + "▁d": 1081, + "ing": 1082, + "▁th": 1083, + "▁p": 1084, + "▁he": 1085, + "or": 1086, + "▁l": 1087, + "es": 1088, + "▁in": 1089, + "ll": 1090, + "it": 1091, + "ar": 1092, + "as": 1093, + "an": 1094, + "▁n": 1095, + "▁g": 1096, + "om": 1097, + "▁be": 1098, + "▁ha": 1099, + "▁e": 1100, + "le": 1101, + "ot": 1102, + "▁y": 1103, + "ut": 1104, + "ow": 1105, + "ic": 1106, + "▁wh": 1107, + "▁it": 1108, + "ld": 1109, + "ve": 1110, + "▁that": 1111, + "ly": 1112, + "▁was": 1113, + "id": 1114, + "se": 1115, + "st": 1116, + "▁on": 1117, + "gh": 1118, + "ent": 1119, + "▁re": 1120, + "▁you": 1121, + "im": 1122, + "ce": 1123, + "▁u": 1124, + "ver": 1125, + "ion": 1126, + "▁as": 1127, + "et": 1128, + "▁for": 1129, + "ay": 1130, + "▁his": 1131, + "▁we": 1132, + "ith": 1133, + "al": 1134, + "ir": 1135, + "▁r": 1136, + "▁with": 1137, + "▁st": 1138, + "ad": 1139, + "ur": 1140, + "ght": 1141, + "▁an": 1142, + "▁her": 1143, + "▁not": 1144, + "▁is": 1145, + "▁had": 1146, + "ter": 1147, + "her": 1148, + "ac": 1149, + "am": 1150, + "▁at": 1151, + "oo": 1152, + "▁but": 1153, + "ould": 1154, + "▁she": 1155, + "▁k": 1156, + "▁se": 1157, + "▁sa": 1158, + "▁sh": 1159, + "▁fr": 1160, + "▁him": 1161, + "▁so": 1162, + "▁me": 1163, + "ill": 1164, + "ain": 1165, + "▁su": 1166, + "ight": 1167, + "ch": 1168, + "red": 1169, + "ct": 1170, + "all": 1171, + "ro": 1172, + "ke": 1173, + "ess": 1174, + "il": 1175, + "'s": 1176, + "ore": 1177, + "▁de": 1178, + "▁my": 1179, + "▁they": 1180, + "▁whe": 1181, + "▁all": 1182, + "ich": 1183, + "▁ne": 1184, + "ri": 1185, + "▁by": 1186, + "▁have": 1187, + "ome": 1188, + "pp": 1189, + "▁this": 1190, + "▁li": 1191, + "▁do": 1192, + "▁con": 1193, + "us": 1194, + "▁which": 1195, + "▁ch": 1196, + "ul": 1197, + "qu": 1198, + "▁j": 1199, + "▁up": 1200, + "▁said": 1201, + "▁from": 1202, + "ard": 1203, + "ge": 1204, + "▁or": 1205, + "▁v": 1206, + "▁one": 1207, + "▁no": 1208, + "th": 1209, + "▁ex": 1210, + "▁were": 1211, + "▁there": 1212, + "pe": 1213, + "and": 1214, + "est": 1215, + "▁man": 1216, + "▁who": 1217, + "ble": 1218, + "ie": 1219, + "▁al": 1220, + "ant": 1221, + "res": 1222, + "ous": 1223, + "ust": 1224, + "very": 1225, + "ation": 1226, + "▁fe": 1227, + "▁them": 1228, + "lf": 1229, + "▁when": 1230, + "nt": 1231, + "ame": 1232, + "ind": 1233, + "ra": 1234, + "▁go": 1235, + "ers": 1236, + "ast": 1237, + "fe": 1238, + "ood": 1239, + "▁kn": 1240, + "▁int": 1241, + "ist": 1242, + "▁are": 1243, + "art": 1244, + "out": 1245, + "▁would": 1246, + "▁le": 1247, + "▁what": 1248, + "os": 1249, + "▁their": 1250, + "ong": 1251, + "our": 1252, + "▁if": 1253, + "▁com": 1254, + "ound": 1255, + "▁ab": 1256, + "▁out": 1257, + "▁wor": 1258, + "em": 1259, + "▁will": 1260, + "ak": 1261, + "▁mis": 1262, + "ate": 1263, + "ol": 1264, + "um": 1265, + "un": 1266, + "itt": 1267, + "ough": 1268, + "ked": 1269, + "ig": 1270, + "ap": 1271, + "one": 1272, + "▁been": 1273, + "own": 1274, + "ive": 1275, + "▁then": 1276, + "▁br": 1277, + "ven": 1278, + "if": 1279, + "▁ar": 1280, + "'t": 1281, + "self": 1282, + "▁tr": 1283, + "▁pl": 1284, + "▁ro": 1285, + "▁pr": 1286, + "ther": 1287, + "reat": 1288, + "▁un": 1289, + "▁af": 1290, + "▁sp": 1291, + "▁qu": 1292, + "▁pro": 1293, + "ity": 1294, + "hed": 1295, + "▁tw": 1296, + "▁ag": 1297, + "▁could": 1298, + "ost": 1299, + "ace": 1300, + "ort": 1301, + "ure": 1302, + "ake": 1303, + "▁am": 1304, + "ack": 1305, + "▁any": 1306, + "▁some": 1307, + "▁your": 1308, + "▁more": 1309, + "▁can": 1310, + "au": 1311, + "▁tim": 1312, + "ep": 1313, + "ag": 1314, + "▁en": 1315, + "ck": 1316, + "▁into": 1317, + "▁cl": 1318, + "ry": 1319, + "▁now": 1320, + "hing": 1321, + "nder": 1322, + "are": 1323, + "▁very": 1324, + "▁gr": 1325, + "el": 1326, + "ose": 1327, + "▁loo": 1328, + "▁bo": 1329, + "ved": 1330, + "op": 1331, + "▁other": 1332, + "▁did": 1333, + "ance": 1334, + "▁than": 1335, + "ittle": 1336, + "▁little": 1337, + "ine": 1338, + "ies": 1339, + "way": 1340, + "ite": 1341, + "▁like": 1342, + "ide": 1343, + "▁lo": 1344, + "ass": 1345, + "▁bl": 1346, + "able": 1347, + "urn": 1348, + "ought": 1349, + "▁know": 1350, + "other": 1351, + "▁time": 1352, + "▁im": 1353, + "▁dis": 1354, + "▁us": 1355, + "▁co": 1356, + "fore": 1357, + "▁how": 1358, + "▁te": 1359, + "ence": 1360, + "▁day": 1361, + "▁ad": 1362, + "ade": 1363, + "ice": 1364, + "▁about": 1365, + "▁see": 1366, + "▁over": 1367, + "pt": 1368, + "cc": 1369, + "▁too": 1370, + "ink": 1371, + "▁fl": 1372, + "wn": 1373, + "▁great": 1374, + "▁after": 1375, + "pl": 1376, + "de": 1377, + "▁per": 1378, + "ment": 1379, + "▁again": 1380, + "▁upon": 1381, + "▁hand": 1382, + "ab": 1383, + "▁has": 1384, + "ree": 1385, + "ish": 1386, + "ci": 1387, + "▁only": 1388, + "ally": 1389, + "▁well": 1390, + "▁should": 1391, + "▁po": 1392, + "▁mar": 1393, + "ress": 1394, + "▁say": 1395, + "▁good": 1396, + "ather": 1397, + "▁two": 1398, + "ings": 1399, + "▁pe": 1400, + "ount": 1401, + "▁our": 1402, + "ire": 1403, + "ving": 1404, + "▁down": 1405, + "ars": 1406, + "ert": 1407, + "we": 1408, + "▁before": 1409, + "ile": 1410, + "ves": 1411, + "▁app": 1412, + "▁every": 1413, + "▁its": 1414, + "▁old": 1415, + "▁thr": 1416, + "▁mu": 1417, + "▁made": 1418, + "ied": 1419, + "ick": 1420, + "▁long": 1421, + "age": 1422, + "te": 1423, + "ft": 1424, + "▁where": 1425, + "ang": 1426, + "▁never": 1427, + "▁must": 1428, + "▁pre": 1429, + "▁sm": 1430, + "ful": 1431, + "▁such": 1432, + "ull": 1433, + "▁str": 1434, + "ions": 1435, + "▁off": 1436, + "▁sc": 1437, + "▁came": 1438, + "ious": 1439, + "ue": 1440, + "▁miss": 1441, + "ward": 1442, + "ild": 1443, + "▁fir": 1444, + "▁even": 1445, + "▁under": 1446, + "act": 1447, + "▁these": 1448, + "▁come": 1449, + "▁part": 1450, + "▁fo": 1451, + "ated": 1452, + "ness": 1453, + "▁rem": 1454, + "ord": 1455, + "▁bec": 1456, + "ty": 1457, + "▁may": 1458, + "▁much": 1459, + "▁think": 1460, + "per": 1461, + "▁way": 1462, + "▁mister": 1463, + "led": 1464, + "▁let": 1465, + "orn": 1466, + "▁ey": 1467, + "▁gl": 1468, + "▁cont": 1469, + "▁thought": 1470, + "▁look": 1471, + "ect": 1472, + "▁spe": 1473, + "ise": 1474, + "▁back": 1475, + "▁bet": 1476, + "ady": 1477, + "▁ye": 1478, + "ans": 1479, + "ach": 1480, + "▁here": 1481, + "▁just": 1482, + "ren": 1483, + "▁first": 1484, + "▁ho": 1485, + "▁own": 1486, + "▁des": 1487, + "▁ob": 1488, + "ried": 1489, + "ud": 1490, + "ary": 1491, + "▁went": 1492, + "▁mo": 1493, + "▁himself": 1494, + "▁men": 1495, + "air": 1496, + "cl": 1497, + "ave": 1498, + "ath": 1499, + "ff": 1500, + "▁sl": 1501, + "co": 1502, + "on't": 1503, + "llow": 1504, + "▁cr": 1505, + "▁res": 1506, + "▁i'": 1507, + "▁might": 1508, + "ily": 1509, + "▁seem": 1510, + "int": 1511, + "ip": 1512, + "▁beg": 1513, + "ouse": 1514, + "anc": 1515, + "n't": 1516, + "▁wat": 1517, + "▁through": 1518, + "▁comp": 1519, + "ber": 1520, + "▁away": 1521, + "▁car": 1522, + "▁em": 1523, + "▁get": 1524, + "▁imp": 1525, + "▁head": 1526, + "oss": 1527, + "▁life": 1528, + "▁bel": 1529, + "▁without": 1530, + "▁most": 1531, + "▁pass": 1532, + "▁make": 1533, + "▁cons": 1534, + "ened": 1535, + "▁som": 1536, + "▁turn": 1537, + "av": 1538, + "ng": 1539, + "▁shall": 1540, + "▁acc": 1541, + "▁those": 1542, + "▁pres": 1543, + "▁eyes": 1544, + "▁house": 1545, + "iz": 1546, + "▁somet": 1547, + "▁jo": 1548, + "▁still": 1549, + "▁call": 1550, + "▁night": 1551, + "hes": 1552, + "▁op": 1553, + "ause": 1554, + "▁wom": 1555, + "▁last": 1556, + "ks": 1557, + "less": 1558, + "ared": 1559, + "▁comm": 1560, + "▁don't": 1561, + "▁tell": 1562, + "▁ent": 1563, + "▁nothing": 1564, + "▁new": 1565, + "ign": 1566, + "▁take": 1567, + "▁being": 1568, + "▁many": 1569, + "▁word": 1570, + "ons": 1571, + "▁found": 1572, + "▁ret": 1573, + "ase": 1574, + "▁ear": 1575, + "▁while": 1576, + "▁att": 1577, + "ory": 1578, + "ix": 1579, + "▁ser": 1580, + "▁saw": 1581, + "▁put": 1582, + "ne": 1583, + "oth": 1584, + "iend": 1585, + "▁peop": 1586, + "▁wr": 1587, + "▁young": 1588, + "ark": 1589, + "dy": 1590, + "aking": 1591, + "les": 1592, + "▁count": 1593, + "▁once": 1594, + "▁friend": 1595, + "▁la": 1596, + "ens": 1597, + "▁people": 1598, + "pect": 1599, + "ors": 1600, + "fect": 1601, + "▁mat": 1602, + "ince": 1603, + "ible": 1604, + "ered": 1605, + "▁room": 1606, + "▁three": 1607, + "▁yet": 1608, + "ail": 1609, + "▁same": 1610, + "▁father": 1611, + "▁right": 1612, + "▁child": 1613, + "▁cour": 1614, + "igh": 1615, + "▁place": 1616, + "▁another": 1617, + "ult": 1618, + "iv": 1619, + "ition": 1620, + "▁ind": 1621, + "▁want": 1622, + "▁though": 1623, + "▁nor": 1624, + "▁far": 1625, + "▁king": 1626, + "▁happ": 1627, + "▁heart": 1628, + "▁face": 1629, + "▁end": 1630, + "▁ever": 1631, + "▁nat": 1632, + "thing": 1633, + "▁love": 1634, + "get": 1635, + "▁took": 1636, + "▁dist": 1637, + "ever": 1638, + "ian": 1639, + "▁hu": 1640, + "ew": 1641, + "▁arm": 1642, + "▁inst": 1643, + "man": 1644, + "▁work": 1645, + "▁light": 1646, + "▁char": 1647, + "▁ple": 1648, + "ict": 1649, + "▁set": 1650, + "▁ac": 1651, + "▁looked": 1652, + "▁missus": 1653, + "▁asked": 1654, + "▁mind": 1655, + "▁yes": 1656, + "▁supp": 1657, + "▁inte": 1658, + "▁rep": 1659, + "cess": 1660, + "ently": 1661, + "▁left": 1662, + "gg": 1663, + "ertain": 1664, + "▁ke": 1665, + "ished": 1666, + "ub": 1667, + "▁pers": 1668, + "ways": 1669, + "▁things": 1670, + "alk": 1671, + "irl": 1672, + "▁mom": 1673, + "▁sir": 1674, + "▁wa": 1675, + "▁moment": 1676, + "ations": 1677, + "▁sat": 1678, + "sel": 1679, + "▁find": 1680, + "ower": 1681, + "ia": 1682, + "vent": 1683, + "rew": 1684, + "▁world": 1685, + "ject": 1686, + "▁give": 1687, + "▁cap": 1688, + "▁why": 1689, + "so": 1690, + "▁gu": 1691, + "▁mother": 1692, + "▁gen": 1693, + "▁sw": 1694, + "▁always": 1695, + "der": 1696, + "lt": 1697, + "ling": 1698, + "▁ans": 1699, + "pped": 1700, + "▁soon": 1701, + "▁act": 1702, + "▁form": 1703, + "▁el": 1704, + "dd": 1705, + "▁heard": 1706, + "ret": 1707, + "▁thing": 1708, + "▁something": 1709, + "▁seemed": 1710, + "▁sub": 1711, + "▁door": 1712, + "ange": 1713, + "▁girl": 1714, + "ced": 1715, + "▁appe": 1716, + "ither": 1717, + "▁wind": 1718, + "▁because": 1719, + "▁dif": 1720, + "▁mon": 1721, + "ss": 1722, + "▁going": 1723, + "▁told": 1724, + "orm": 1725, + "▁home": 1726, + "ained": 1727, + "▁got": 1728, + "▁war": 1729, + "▁god": 1730, + "aught": 1731, + "▁gi": 1732, + "▁eng": 1733, + "▁sur": 1734, + "ning": 1735, + "▁hands": 1736, + "▁woman": 1737, + "▁follow": 1738, + "land": 1739, + "aut": 1740, + "▁vo": 1741, + "▁feel": 1742, + "▁rel": 1743, + "▁poss": 1744, + "ched": 1745, + "ical": 1746, + "ple": 1747, + "ph": 1748, + "▁boy": 1749, + "▁return": 1750, + "▁reg": 1751, + "▁rest": 1752, + "ook": 1753, + "▁knew": 1754, + "ner": 1755, + "▁each": 1756, + "▁oh": 1757, + "▁sil": 1758, + "▁kind": 1759, + "▁exp": 1760, + "▁ma": 1761, + "▁cle": 1762, + "▁hel": 1763, + "iver": 1764, + "ting": 1765, + "▁del": 1766, + "ual": 1767, + "▁inf": 1768, + "▁ass": 1769, + "▁water": 1770, + "▁conf": 1771, + "▁bre": 1772, + "▁wo": 1773, + "cept": 1774, + "▁belie": 1775, + "▁certain": 1776, + "▁against": 1777, + "▁hard": 1778, + "▁ph": 1779, + "row": 1780, + "▁unt": 1781, + "▁years": 1782, + "▁quite": 1783, + "▁side": 1784, + "iness": 1785, + "ined": 1786, + "▁near": 1787, + "▁hor": 1788, + "ters": 1789, + "ired": 1790, + "ool": 1791, + "▁four": 1792, + "▁few": 1793, + "▁done": 1794, + "ier": 1795, + "▁che": 1796, + "rest": 1797, + "ited": 1798, + "most": 1799, + "▁better": 1800, + "▁half": 1801, + "▁min": 1802, + "▁tre": 1803, + "ps": 1804, + "▁also": 1805, + "▁care": 1806, + "ock": 1807, + "uck": 1808, + "oub": 1809, + "▁began": 1810, + "ully": 1811, + "▁enough": 1812, + "ised": 1813, + "ru": 1814, + "▁having": 1815, + "▁seen": 1816, + "▁gener": 1817, + "▁lady": 1818, + "▁dra": 1819, + "▁hum": 1820, + "aps": 1821, + "ott": 1822, + "▁pur": 1823, + "aken": 1824, + "ross": 1825, + "ying": 1826, + "▁ter": 1827, + "▁hour": 1828, + "▁inde": 1829, + "ank": 1830, + "▁called": 1831, + "ial": 1832, + "ason": 1833, + "▁beh": 1834, + "▁does": 1835, + "▁whole": 1836, + "▁morn": 1837, + "▁turned": 1838, + "▁pleas": 1839, + "▁ste": 1840, + "▁ref": 1841, + "▁gave": 1842, + "ense": 1843, + "▁occ": 1844, + "ib": 1845, + "▁course": 1846, + "▁ins": 1847, + "ream": 1848, + "gether": 1849, + "uth": 1850, + "▁both": 1851, + "▁sou": 1852, + "▁cur": 1853, + "▁add": 1854, + "een": 1855, + "▁col": 1856, + "▁read": 1857, + "ween": 1858, + "selves": 1859, + "▁among": 1860, + "▁between": 1861, + "▁inc": 1862, + "▁keep": 1863, + "▁beaut": 1864, + "ular": 1865, + "▁poor": 1866, + "▁it's": 1867, + "▁sure": 1868, + "▁morning": 1869, + "▁white": 1870, + "ged": 1871, + "▁name": 1872, + "▁dear": 1873, + "▁toward": 1874, + "ute": 1875, + "▁small": 1876, + "▁whom": 1877, + "▁repl": 1878, + "▁sk": 1879, + "▁lar": 1880, + "▁felt": 1881, + "bo": 1882, + "osed": 1883, + "ating": 1884, + "▁myself": 1885, + "▁open": 1886, + "▁six": 1887, + "▁herself": 1888, + "▁however": 1889, + "▁bu": 1890, + "ond": 1891, + "aint": 1892, + "xt": 1893, + "▁fore": 1894, + "▁inter": 1895, + "▁ev": 1896, + "▁high": 1897, + "ction": 1898, + "▁hund": 1899, + "▁stood": 1900, + "▁hundred": 1901, + "aster": 1902, + "▁tra": 1903, + "▁show": 1904, + "▁sent": 1905, + "ife": 1906, + "▁round": 1907, + "▁sim": 1908, + "▁dr": 1909, + "▁gra": 1910, + "▁words": 1911, + "▁days": 1912, + "▁almost": 1913, + "ale": 1914, + "vel": 1915, + "▁point": 1916, + "ents": 1917, + "▁gre": 1918, + "▁eight": 1919, + "ces": 1920, + "ates": 1921, + "dden": 1922, + "▁fam": 1923, + "▁stand": 1924, + "▁bus": 1925, + "▁land": 1926, + "▁ed": 1927, + "▁mean": 1928, + "ung": 1929, + "haps": 1930, + "▁sun": 1931, + "ures": 1932, + "▁since": 1933, + "iet": 1934, + "ird": 1935, + "▁perhaps": 1936, + "ned": 1937, + "▁sle": 1938, + "iss": 1939, + "▁best": 1940, + "▁sudden": 1941, + "▁dark": 1942, + "▁replied": 1943, + "▁voice": 1944, + "▁met": 1945, + "▁anything": 1946, + "▁till": 1947, + "▁underst": 1948, + "▁bar": 1949, + "its": 1950, + "▁until": 1951, + "ins": 1952, + "oud": 1953, + "▁black": 1954, + "▁bro": 1955, + "▁hear": 1956, + "▁looking": 1957, + "▁cried": 1958, + "▁you'": 1959, + "▁fact": 1960, + "amp": 1961, + "▁prin": 1962, + "▁less": 1963, + "▁lay": 1964, + "▁next": 1965, + "▁law": 1966, + "up": 1967, + "▁power": 1968, + "▁prop": 1969, + "not": 1970, + "rent": 1971, + "▁brought": 1972, + "ately": 1973, + "enty": 1974, + "▁country": 1975, + "▁help": 1976, + "als": 1977, + "▁quest": 1978, + "med": 1979, + "▁use": 1980, + "▁vis": 1981, + "▁sn": 1982, + "▁i'm": 1983, + "fully": 1984, + "▁spo": 1985, + "▁together": 1986, + "▁need": 1987, + "▁air": 1988, + "▁adv": 1989, + "▁person": 1990, + "▁indeed": 1991, + "▁contin": 1992, + "▁unc": 1993, + "oney": 1994, + "▁gent": 1995, + "▁present": 1996, + "▁aw": 1997, + "▁par": 1998, + "ows": 1999, + "ured": 2000, + "▁full": 2001, + "tain": 2002, + "▁run": 2003, + "▁rather": 2004, + "▁ide": 2005, + "▁cond": 2006, + "nded": 2007, + "▁lat": 2008, + "▁sy": 2009, + "be": 2010, + "du": 2011, + "▁har": 2012, + "▁feet": 2013, + "▁fin": 2014, + "eter": 2015, + "▁fall": 2016, + "cei": 2017, + "▁five": 2018, + "▁mil": 2019, + "▁bed": 2020, + "oc": 2021, + "▁doct": 2022, + "▁interest": 2023, + "ressed": 2024, + "▁matter": 2025, + "▁lord": 2026, + "▁gone": 2027, + "▁es": 2028, + "fort": 2029, + "▁death": 2030, + "▁wife": 2031, + "▁serv": 2032, + "▁pat": 2033, + "ering": 2034, + "oubt": 2035, + "▁adm": 2036, + "▁talk": 2037, + "▁taken": 2038, + "▁art": 2039, + "▁tri": 2040, + "▁others": 2041, + "▁hope": 2042, + "ash": 2043, + "az": 2044, + "▁ext": 2045, + "▁cannot": 2046, + "ief": 2047, + "▁speak": 2048, + "▁lau": 2049, + "▁themselves": 2050, + "▁along": 2051, + "▁dire": 2052, + "ove": 2053, + "mb": 2054, + "pr": 2055, + "▁bes": 2056, + "▁cou": 2057, + "▁mor": 2058, + "ten": 2059, + "▁gentle": 2060, + "uring": 2061, + "▁fire": 2062, + "▁large": 2063, + "▁pol": 2064, + "▁cat": 2065, + "▁swe": 2066, + "ention": 2067, + "vers": 2068, + "▁thus": 2069, + "app": 2070, + "▁sec": 2071, + "▁play": 2072, + "▁real": 2073, + "▁prom": 2074, + "ments": 2075, + "wered": 2076, + "ield": 2077, + "ains": 2078, + "ison": 2079, + "ached": 2080, + "▁thou": 2081, + "▁reason": 2082, + "▁thous": 2083, + "iting": 2084, + "▁brother": 2085, + "akes": 2086, + "▁thousand": 2087, + "ont": 2088, + "▁money": 2089, + "▁remem": 2090, + "▁dep": 2091, + "▁answered": 2092, + "▁true": 2093, + "▁children": 2094, + "▁behind": 2095, + "oy": 2096, + "▁sound": 2097, + "ants": 2098, + "ably": 2099, + "▁wood": 2100, + "used": 2101, + "▁dec": 2102, + "▁whose": 2103, + "od": 2104, + "▁ele": 2105, + "▁twenty": 2106, + "▁ra": 2107, + "itu": 2108, + "▁believe": 2109, + "▁wonder": 2110, + "ene": 2111, + "▁inv": 2112, + "▁hon": 2113, + "aring": 2114, + "sh": 2115, + "ued": 2116, + "▁suff": 2117, + "▁opp": 2118, + "▁doubt": 2119, + "▁rec": 2120, + "ton": 2121, + "▁hold": 2122, + "▁diffe": 2123, + "▁passed": 2124, + "▁cor": 2125, + "me": 2126, + "ided": 2127, + "ities": 2128, + "▁mer": 2129, + "▁sing": 2130, + "▁nature": 2131, + "▁alone": 2132, + "▁dead": 2133, + "▁pri": 2134, + "ken": 2135, + "lic": 2136, + "▁red": 2137, + "▁bur": 2138, + "aces": 2139, + "▁close": 2140, + "▁gold": 2141, + "▁start": 2142, + "▁hur": 2143, + "▁fur": 2144, + "og": 2145, + "ances": 2146, + "▁ask": 2147, + "▁doctor": 2148, + "▁son": 2149, + "▁ground": 2150, + "wer": 2151, + "ets": 2152, + "▁sea": 2153, + "▁strong": 2154, + "▁leave": 2155, + "▁compan": 2156, + "▁i'll": 2157, + "ery": 2158, + "cy": 2159, + "illed": 2160, + "ept": 2161, + "ides": 2162, + "tle": 2163, + "▁ce": 2164, + "▁obs": 2165, + "body": 2166, + "▁fell": 2167, + "▁sign": 2168, + "cond": 2169, + "▁mount": 2170, + "▁fair": 2171, + "▁given": 2172, + "▁therefore": 2173, + "ane": 2174, + "▁ir": 2175, + "▁deep": 2176, + "iful": 2177, + "fic": 2178, + "ys": 2179, + "▁often": 2180, + "▁body": 2181, + "unt": 2182, + "▁short": 2183, + "▁tem": 2184, + "▁fa": 2185, + "▁master": 2186, + "▁earth": 2187, + "▁pap": 2188, + "ceed": 2189, + "▁stre": 2190, + "▁second": 2191, + "▁fort": 2192, + "bed": 2193, + "gth": 2194, + "owed": 2195, + "▁horse": 2196, + "idd": 2197, + "▁mad": 2198, + "ually": 2199, + "▁pa": 2200, + "▁chr": 2201, + "▁order": 2202, + "▁ten": 2203, + "vered": 2204, + "▁const": 2205, + "▁wish": 2206, + "▁fif": 2207, + "▁eas": 2208, + "▁cir": 2209, + "▁dro": 2210, + "aim": 2211, + "hen": 2212, + "▁ca": 2213, + "▁really": 2214, + "read": 2215, + "ceived": 2216, + "▁ill": 2217, + "▁fear": 2218, + "osition": 2219, + "▁understand": 2220, + "▁spir": 2221, + "▁list": 2222, + "▁abs": 2223, + "▁spr": 2224, + "aced": 2225, + "▁question": 2226, + "anger": 2227, + "▁everything": 2228, + "aughter": 2229, + "▁aff": 2230, + "▁wall": 2231, + "▁coming": 2232, + "ching": 2233, + "ready": 2234, + "ider": 2235, + "▁above": 2236, + "▁prince": 2237, + "▁already": 2238, + "▁least": 2239, + "▁reco": 2240, + "▁expl": 2241, + "▁step": 2242, + "▁used": 2243, + "▁ru": 2244, + "▁itself": 2245, + "ister": 2246, + "▁necess": 2247, + "▁case": 2248, + "▁around": 2249, + "hn": 2250, + "▁soul": 2251, + "▁suddenly": 2252, + "ger": 2253, + "▁lad": 2254, + "▁evening": 2255, + "▁mag": 2256, + "▁general": 2257, + "▁num": 2258, + "imes": 2259, + "▁known": 2260, + "▁wal": 2261, + "▁quick": 2262, + "ized": 2263, + "▁mus": 2264, + "▁sch": 2265, + "▁captain": 2266, + "▁that's": 2267, + "ific": 2268, + "▁whether": 2269, + "▁lear": 2270, + "gn": 2271, + "▁within": 2272, + "men": 2273, + "▁live": 2274, + "vern": 2275, + "▁times": 2276, + "▁expect": 2277, + "▁state": 2278, + "▁friends": 2279, + "▁bring": 2280, + "▁sort": 2281, + "▁women": 2282, + "▁table": 2283, + "▁meet": 2284, + "▁john": 2285, + "▁circ": 2286, + "▁sum": 2287, + "▁returned": 2288, + "iled": 2289, + "▁dri": 2290, + "▁held": 2291, + "▁exc": 2292, + "▁big": 2293, + "▁says": 2294, + "▁perfect": 2295, + "▁lea": 2296, + "▁obser": 2297, + "▁else": 2298, + "▁during": 2299, + "ident": 2300, + "▁hus": 2301, + "ted": 2302, + "▁beautiful": 2303, + "▁clear": 2304, + "▁either": 2305, + "▁town": 2306, + "▁sight": 2307, + "▁lost": 2308, + "▁sleep": 2309, + "▁means": 2310, + "▁foot": 2311, + "▁cut": 2312, + "▁cal": 2313, + "▁kept": 2314, + "▁ran": 2315, + "ience": 2316, + "▁prof": 2317, + "tered": 2318, + "here": 2319, + "ety": 2320, + "▁fellow": 2321, + "▁can't": 2322, + "▁mist": 2323, + "▁past": 2324, + "▁dream": 2325, + "ages": 2326, + "▁became": 2327, + "▁pret": 2328, + "▁disc": 2329, + "▁bad": 2330, + "▁making": 2331, + "ution": 2332, + "▁object": 2333, + "▁towards": 2334, + "▁low": 2335, + "ught": 2336, + "▁dev": 2337, + "▁human": 2338, + "▁manner": 2339, + "▁strange": 2340, + "▁year": 2341, + "old": 2342, + "ient": 2343, + "ines": 2344, + "▁sever": 2345, + "mon": 2346, + "▁ann": 2347, + "airs": 2348, + "ches": 2349, + "▁city": 2350, + "▁sometimes": 2351, + "'d": 2352, + "▁rose": 2353, + "▁est": 2354, + "ility": 2355, + "▁walk": 2356, + "▁ready": 2357, + "▁pal": 2358, + "▁leg": 2359, + "▁road": 2360, + "ians": 2361, + "cious": 2362, + "▁corn": 2363, + "▁thy": 2364, + "▁cold": 2365, + "lly": 2366, + "iously": 2367, + "lish": 2368, + "▁stra": 2369, + "mer": 2370, + "▁bat": 2371, + "owing": 2372, + "iew": 2373, + "▁christ": 2374, + "▁squ": 2375, + "▁truth": 2376, + "cri": 2377, + "lled": 2378, + "▁thir": 2379, + "▁didn't": 2380, + "bert": 2381, + "▁soci": 2382, + "br": 2383, + "▁bit": 2384, + "▁subject": 2385, + "▁ship": 2386, + "▁mur": 2387, + "▁appro": 2388, + "▁pie": 2389, + "▁answer": 2390, + "▁free": 2391, + "▁business": 2392, + "▁ut": 2393, + "ape": 2394, + "▁appear": 2395, + "▁river": 2396, + "▁sto": 2397, + "▁cast": 2398, + "▁family": 2399, + "▁jud": 2400, + "▁excl": 2401, + "▁letter": 2402, + "ingly": 2403, + "rie": 2404, + "▁hair": 2405, + "ote": 2406, + "▁arms": 2407, + "▁become": 2408, + "ern": 2409, + "ouble": 2410, + "▁different": 2411, + "▁val": 2412, + "ffect": 2413, + "▁natur": 2414, + "▁possible": 2415, + "▁several": 2416, + "▁fine": 2417, + "ah": 2418, + "▁lead": 2419, + "▁forg": 2420, + "▁express": 2421, + "li": 2422, + "▁sus": 2423, + "▁glad": 2424, + "oon": 2425, + "▁arri": 2426, + "▁blood": 2427, + "itting": 2428, + "▁quiet": 2429, + "rence": 2430, + "▁idea": 2431, + "▁able": 2432, + "itted": 2433, + "ster": 2434, + "▁charac": 2435, + "▁begin": 2436, + "▁chur": 2437, + "▁tou": 2438, + "▁story": 2439, + "▁eye": 2440, + "band": 2441, + "ative": 2442, + "▁grand": 2443, + "▁consider": 2444, + "▁across": 2445, + "▁pen": 2446, + "▁except": 2447, + "▁fre": 2448, + "▁win": 2449, + "▁equ": 2450, + "eth": 2451, + "▁cent": 2452, + "isf": 2453, + "▁partic": 2454, + "▁diffic": 2455, + "▁window": 2456, + "▁surpr": 2457, + "llect": 2458, + "▁prov": 2459, + "▁direct": 2460, + "▁conc": 2461, + "ey": 2462, + "aw": 2463, + "▁govern": 2464, + "▁disco": 2465, + "▁wild": 2466, + "▁dog": 2467, + "▁flo": 2468, + "▁soft": 2469, + "teen": 2470, + "▁cross": 2471, + "ased": 2472, + "▁effect": 2473, + "▁sor": 2474, + "▁longer": 2475, + "▁hen": 2476, + "▁followed": 2477, + "▁sold": 2478, + "▁thee": 2479, + "▁pub": 2480, + "▁husband": 2481, + "ards": 2482, + "antly": 2483, + "by": 2484, + "▁ap": 2485, + "▁suppose": 2486, + "▁respect": 2487, + "ts": 2488, + "▁hast": 2489, + "▁sal": 2490, + "▁comple": 2491, + "▁heav": 2492, + "▁happy": 2493, + "▁rich": 2494, + "▁creat": 2495, + "une": 2496, + "▁taking": 2497, + "▁requ": 2498, + "▁stay": 2499, + "▁spoke": 2500, + "▁daughter": 2501, + "▁wee": 2502, + "▁ve": 2503, + "▁du": 2504, + "▁green": 2505, + "▁anim": 2506, + "▁din": 2507, + "'ll": 2508, + "▁bird": 2509, + "alth": 2510, + "▁mere": 2511, + "▁gard": 2512, + "ny": 2513, + "ley": 2514, + "▁possess": 2515, + "empt": 2516, + "▁reached": 2517, + "▁appeared": 2518, + "ov": 2519, + "▁exist": 2520, + "ination": 2521, + "▁pretty": 2522, + "▁remember": 2523, + "▁hea": 2524, + "▁opened": 2525, + "▁tom": 2526, + "anged": 2527, + "▁slow": 2528, + "▁imag": 2529, + "▁i've": 2530, + "ract": 2531, + "▁saying": 2532, + "king": 2533, + "utes": 2534, + "▁common": 2535, + "▁occas": 2536, + "▁book": 2537, + "▁rus": 2538, + "ames": 2539, + "ices": 2540, + "▁bright": 2541, + "ms": 2542, + "▁satisf": 2543, + "▁sense": 2544, + "▁fav": 2545, + "▁succ": 2546, + "ump": 2547, + "ising": 2548, + "▁lu": 2549, + "▁accord": 2550, + "tern": 2551, + "▁break": 2552, + "▁exper": 2553, + "▁month": 2554, + "use": 2555, + "▁dem": 2556, + "▁scar": 2557, + "▁continued": 2558, + "▁secret": 2559, + "▁church": 2560, + "▁tree": 2561, + "▁stri": 2562, + "▁carried": 2563, + "▁cry": 2564, + "nding": 2565, + "▁spirit": 2566, + "▁wanted": 2567, + "eric": 2568, + "▁certainly": 2569, + "▁command": 2570, + "▁dest": 2571, + "▁move": 2572, + "oun": 2573, + "▁sweet": 2574, + "▁street": 2575, + "▁ought": 2576, + "▁account": 2577, + "▁def": 2578, + "ham": 2579, + "▁prep": 2580, + "▁sens": 2581, + "▁esc": 2582, + "▁rock": 2583, + "ots": 2584, + "▁decl": 2585, + "▁purp": 2586, + "riage": 2587, + "outh": 2588, + "owers": 2589, + "▁draw": 2590, + "▁eat": 2591, + "▁breat": 2592, + "▁character": 2593, + "ime": 2594, + "cul": 2595, + "medi": 2596, + "▁stud": 2597, + "▁school": 2598, + "itude": 2599, + "▁heaven": 2600, + "▁feeling": 2601, + "▁sad": 2602, + "▁regard": 2603, + "ement": 2604, + "▁pain": 2605, + "▁worth": 2606, + "▁bra": 2607, + "ney": 2608, + "▁dut": 2609, + "▁smo": 2610, + "aimed": 2611, + "▁trans": 2612, + "▁delight": 2613, + "▁quar": 2614, + "▁hung": 2615, + "▁mot": 2616, + "▁blue": 2617, + "▁hot": 2618, + "▁hill": 2619, + "▁div": 2620, + "umb": 2621, + "▁disapp": 2622, + "▁marg": 2623, + "▁laugh": 2624, + "idence": 2625, + "▁produ": 2626, + "▁success": 2627, + "ury": 2628, + "son": 2629, + "▁fast": 2630, + "▁english": 2631, + "▁dress": 2632, + "▁hat": 2633, + "▁terri": 2634, + "▁port": 2635, + "▁neither": 2636, + "▁court": 2637, + "▁seven": 2638, + "▁fight": 2639, + "▁princess": 2640, + "▁lived": 2641, + "▁view": 2642, + "▁immedi": 2643, + "▁self": 2644, + "▁var": 2645, + "▁hours": 2646, + "▁mill": 2647, + "▁sol": 2648, + "▁exam": 2649, + "▁tried": 2650, + "▁won't": 2651, + "▁entered": 2652, + "▁disp": 2653, + "to": 2654, + "ric": 2655, + "▁carry": 2656, + "▁import": 2657, + "▁ang": 2658, + "ze": 2659, + "ony": 2660, + "▁danger": 2661, + "ledge": 2662, + "▁offic": 2663, + "▁cause": 2664, + "▁none": 2665, + "▁forward": 2666, + "▁uncle": 2667, + "▁tor": 2668, + "▁det": 2669, + "ask": 2670, + "▁len": 2671, + "▁further": 2672, + "▁pay": 2673, + "▁added": 2674, + "▁front": 2675, + "ror": 2676, + "▁ge": 2677, + "▁particular": 2678, + "▁deal": 2679, + "▁prot": 2680, + "▁led": 2681, + "▁acqu": 2682, + "▁pray": 2683, + "▁eff": 2684, + "▁happened": 2685, + "▁chief": 2686, + "lect": 2687, + "▁walked": 2688, + "▁later": 2689, + "▁joy": 2690, + "iar": 2691, + "day": 2692, + "▁ord": 2693, + "▁alth": 2694, + "▁comfort": 2695, + "▁prob": 2696, + "▁maj": 2697, + "▁affect": 2698, + "▁public": 2699, + "▁bene": 2700, + "ening": 2701, + "▁although": 2702, + "gr": 2703, + "▁sho": 2704, + "▁fig": 2705, + "resh": 2706, + "▁fail": 2707, + "uct": 2708, + "ug": 2709, + "ality": 2710, + "▁mem": 2711, + "▁seems": 2712, + "▁yourself": 2713, + "ship": 2714, + "ead": 2715, + "iam": 2716, + "▁number": 2717, + "side": 2718, + "▁ah": 2719, + "▁doing": 2720, + "▁living": 2721, + "arent": 2722, + "▁desp": 2723, + "ize": 2724, + "oof": 2725, + "▁field": 2726, + "▁received": 2727, + "▁shad": 2728, + "▁bey": 2729, + "▁beyond": 2730, + "▁phil": 2731, + "▁line": 2732, + "▁visit": 2733, + "inct": 2734, + "rig": 2735, + "▁party": 2736, + "▁garden": 2737, + "▁je": 2738, + "▁mouth": 2739, + "▁hall": 2740, + "▁queen": 2741, + "▁boat": 2742, + "▁bear": 2743, + "▁americ": 2744, + "ism": 2745, + "▁gentleman": 2746, + "▁vi": 2747, + "irt": 2748, + "uff": 2749, + "▁laid": 2750, + "raid": 2751, + "▁occasion": 2752, + "▁entire": 2753, + "▁age": 2754, + "▁sister": 2755, + "▁clot": 2756, + "▁repe": 2757, + "ously": 2758, + "▁prison": 2759, + "▁accom": 2760, + "▁whis": 2761, + "▁nearly": 2762, + "▁trees": 2763, + "iling": 2764, + "iff": 2765, + "▁eighteen": 2766, + "bit": 2767, + "wards": 2768, + "▁early": 2769, + "▁tal": 2770, + "▁lab": 2771, + "▁forth": 2772, + "ming": 2773, + "ones": 2774, + "▁med": 2775, + "▁try": 2776, + "▁da": 2777, + "ilt": 2778, + "anced": 2779, + "▁princi": 2780, + "▁enem": 2781, + "▁thinking": 2782, + "▁chance": 2783, + "where": 2784, + "▁cre": 2785, + "▁minutes": 2786, + "▁anx": 2787, + "▁mary": 2788, + "▁pict": 2789, + "▁wait": 2790, + "▁vill": 2791, + "▁stren": 2792, + "▁afraid": 2793, + "▁crow": 2794, + "▁smile": 2795, + "▁late": 2796, + "▁england": 2797, + "▁pleasure": 2798, + "▁aunt": 2799, + "▁news": 2800, + "▁wis": 2801, + "▁fle": 2802, + "▁seeing": 2803, + "▁super": 2804, + "▁faith": 2805, + "▁rob": 2806, + "iment": 2807, + "oint": 2808, + "▁bill": 2809, + "lling": 2810, + "▁neigh": 2811, + "▁trouble": 2812, + "▁silence": 2813, + "▁plain": 2814, + "▁there's": 2815, + "aret": 2816, + "pend": 2817, + "▁exclaimed": 2818, + "rench": 2819, + "gy": 2820, + "▁miles": 2821, + "ply": 2822, + "▁glass": 2823, + "▁drew": 2824, + "▁neighb": 2825, + "els": 2826, + "▁mine": 2827, + "▁pract": 2828, + "▁heavy": 2829, + "▁standing": 2830, + "▁sevent": 2831, + "▁shar": 2832, + "▁change": 2833, + "▁necessary": 2834, + "▁chap": 2835, + "▁purpose": 2836, + "▁inqu": 2837, + "▁natural": 2838, + "▁deter": 2839, + "icked": 2840, + "▁bott": 2841, + "▁hardly": 2842, + "▁bell": 2843, + "▁top": 2844, + "▁caught": 2845, + "fered": 2846, + "wh": 2847, + "ives": 2848, + "ounded": 2849, + "▁auth": 2850, + "▁circum": 2851, + "▁fing": 2852, + "▁stopped": 2853, + "uc": 2854, + "▁wit": 2855, + "ament": 2856, + "▁opin": 2857, + "▁av": 2858, + "▁priv": 2859, + "aining": 2860, + "▁instead": 2861, + "rupt": 2862, + "▁grew": 2863, + "▁loved": 2864, + "▁island": 2865, + "▁knight": 2866, + "▁ago": 2867, + "▁length": 2868, + "▁inn": 2869, + "▁peace": 2870, + "ls": 2871, + "inary": 2872, + "ior": 2873, + "ues": 2874, + "▁third": 2875, + "ush": 2876, + "▁beauty": 2877, + "▁hig": 2878, + "▁he's": 2879, + "the": 2880, + "form": 2881, + "head": 2882, + "ically": 2883, + "asp": 2884, + "ancy": 2885, + "▁determ": 2886, + "▁straight": 2887, + "▁cra": 2888, + "ining": 2889, + "pper": 2890, + "ler": 2891, + "▁infl": 2892, + "▁thor": 2893, + "▁convers": 2894, + "▁besides": 2895, + "▁position": 2896, + "▁thirty": 2897, + "▁den": 2898, + "rage": 2899, + "▁attention": 2900, + "ma": 2901, + "▁conv": 2902, + "ager": 2903, + "▁hist": 2904, + "ored": 2905, + "▁comes": 2906, + "aged": 2907, + "▁force": 2908, + "▁sitting": 2909, + "▁please": 2910, + "tend": 2911, + "iter": 2912, + "▁whatever": 2913, + "▁inform": 2914, + "▁hop": 2915, + "▁chair": 2916, + "▁build": 2917, + "▁bab": 2918, + "ustom": 2919, + "▁girls": 2920, + "▁rom": 2921, + "▁french": 2922, + "▁struck": 2923, + "▁pull": 2924, + "▁ast": 2925, + "▁lie": 2926, + "▁wrong": 2927, + "▁knowledge": 2928, + "▁grace": 2929, + "▁scarce": 2930, + "ghed": 2931, + "▁resol": 2932, + "▁watch": 2933, + "▁thoughts": 2934, + "▁rid": 2935, + "▁attempt": 2936, + "▁fifty": 2937, + "▁rap": 2938, + "▁box": 2939, + "hood": 2940, + "▁getting": 2941, + "▁ver": 2942, + "▁fat": 2943, + "▁company": 2944, + "▁arr": 2945, + "▁crowd": 2946, + "▁burn": 2947, + "▁slight": 2948, + "▁class": 2949, + "▁south": 2950, + "▁die": 2951, + "▁exact": 2952, + "▁drink": 2953, + "▁enj": 2954, + "▁thick": 2955, + "▁dinner": 2956, + "▁save": 2957, + "▁maid": 2958, + "▁plan": 2959, + "▁saint": 2960, + "▁immediately": 2961, + "iers": 2962, + "▁born": 2963, + "ius": 2964, + "▁rev": 2965, + "▁tears": 2966, + "ists": 2967, + "▁treat": 2968, + "usion": 2969, + "▁meant": 2970, + "▁boys": 2971, + "pping": 2972, + "▁slowly": 2973, + "▁incl": 2974, + "▁lim": 2975, + "▁died": 2976, + "iced": 2977, + "▁compl": 2978, + "▁fool": 2979, + "▁forest": 2980, + "▁sugg": 2981, + "▁post": 2982, + "▁accept": 2983, + "▁result": 2984, + "▁author": 2985, + "ndon": 2986, + "ceive": 2987, + "▁suggest": 2988, + "cient": 2989, + "▁stone": 2990, + "▁fright": 2991, + "▁paper": 2992, + "▁conse": 2993, + "▁jour": 2994, + "▁ty": 2995, + "▁enc": 2996, + "▁quickly": 2997, + "▁contr": 2998, + "▁youth": 2999, + "▁send": 3000, + "▁vict": 3001, + "ified": 3002, + "▁belong": 3003, + "▁warm": 3004, + "▁fix": 3005, + "▁imposs": 3006, + "▁beside": 3007, + "▁er": 3008, + "▁tone": 3009, + "▁camp": 3010, + "▁desire": 3011, + "▁bound": 3012, + "▁makes": 3013, + "▁margaret": 3014, + "▁north": 3015, + "▁brown": 3016, + "▁moon": 3017, + "▁lips": 3018, + "▁placed": 3019, + "val": 3020, + "▁circumst": 3021, + "▁food": 3022, + "▁filled": 3023, + "ics": 3024, + "ift": 3025, + "ann": 3026, + "▁london": 3027, + "▁distance": 3028, + "ging": 3029, + "▁strength": 3030, + "▁id": 3031, + "▁floor": 3032, + "▁forget": 3033, + "▁obl": 3034, + "▁mid": 3035, + "ries": 3036, + "itions": 3037, + "bs": 3038, + "▁spring": 3039, + "▁you're": 3040, + "▁viol": 3041, + "▁jack": 3042, + "▁pock": 3043, + "ooks": 3044, + "▁following": 3045, + "▁sac": 3046, + "▁remained": 3047, + "arch": 3048, + "▁grow": 3049, + "▁snow": 3050, + "▁government": 3051, + "▁ball": 3052, + "▁hors": 3053, + "▁nar": 3054, + "aded": 3055, + "▁broken": 3056, + "▁laughed": 3057, + "▁descri": 3058, + "▁safe": 3059, + "itten": 3060, + "ively": 3061, + "▁profess": 3062, + "▁o'": 3063, + "amed": 3064, + "▁depart": 3065, + "▁easy": 3066, + "oured": 3067, + "▁und": 3068, + "▁coun": 3069, + "▁thank": 3070, + "▁knows": 3071, + "▁waiting": 3072, + "dom": 3073, + "ats": 3074, + "▁ger": 3075, + "▁van": 3076, + "▁anne": 3077, + "▁horses": 3078, + "ugg": 3079, + "▁dread": 3080, + "▁une": 3081, + "ges": 3082, + "acy": 3083, + "▁proceed": 3084, + "▁gaz": 3085, + "▁shout": 3086, + "▁started": 3087, + "ented": 3088, + "▁complete": 3089, + "ope": 3090, + "▁gall": 3091, + "dered": 3092, + "▁wide": 3093, + "ires": 3094, + "▁neck": 3095, + "asure": 3096, + "isted": 3097, + "▁service": 3098, + "▁piece": 3099, + "cially": 3100, + "ences": 3101, + "▁sail": 3102, + "▁palace": 3103, + "erv": 3104, + "▁guard": 3105, + "▁doll": 3106, + "▁talking": 3107, + "▁man's": 3108, + "▁lift": 3109, + "▁grave": 3110, + "▁week": 3111, + "let": 3112, + "▁impossible": 3113, + "▁effort": 3114, + "▁imm": 3115, + "▁army": 3116, + "well": 3117, + "▁difficult": 3118, + "und": 3119, + "▁fresh": 3120, + "▁fun": 3121, + "reme": 3122, + "▁stop": 3123, + "▁mess": 3124, + "▁gar": 3125, + "▁deg": 3126, + "▁incre": 3127, + "▁corner": 3128, + "▁society": 3129, + "▁weak": 3130, + "▁shut": 3131, + "▁hy": 3132, + "▁proper": 3133, + "aching": 3134, + "▁cloud": 3135, + "iddle": 3136, + "ivid": 3137, + "▁demand": 3138, + "▁nine": 3139, + "▁sit": 3140, + "▁recogn": 3141, + "▁beat": 3142, + "uss": 3143, + "▁turning": 3144, + "▁sky": 3145, + "▁opinion": 3146, + "▁single": 3147, + "pic": 3148, + "▁fly": 3149, + "▁lang": 3150, + "▁mass": 3151, + "cell": 3152, + "▁outside": 3153, + "▁kiss": 3154, + "▁trust": 3155, + "▁occup": 3156, + "▁evil": 3157, + "▁below": 3158, + "▁appearance": 3159, + "uit": 3160, + "▁aftern": 3161, + "▁glo": 3162, + "▁gun": 3163, + "▁west": 3164, + "ency": 3165, + "par": 3166, + "▁showed": 3167, + "▁conversation": 3168, + "ises": 3169, + "▁conn": 3170, + "▁couldn't": 3171, + "▁running": 3172, + "▁mention": 3173, + "▁greater": 3174, + "▁music": 3175, + "▁breath": 3176, + "ases": 3177, + "▁nin": 3178, + "▁ant": 3179, + "arer": 3180, + "▁morrow": 3181, + "▁bank": 3182, + "▁espe": 3183, + "▁peter": 3184, + "ork": 3185, + "cial": 3186, + "▁presence": 3187, + "▁battle": 3188, + "▁winter": 3189, + "hered": 3190, + "▁probably": 3191, + "▁clothes": 3192, + "▁fash": 3193, + "▁mark": 3194, + "▁wished": 3195, + "vere": 3196, + "▁coll": 3197, + "▁emb": 3198, + "▁kne": 3199, + "▁married": 3200, + "▁arrived": 3201, + "▁pun": 3202, + "▁event": 3203, + "ushed": 3204, + "▁suffic": 3205, + "▁eager": 3206, + "▁former": 3207, + "▁giving": 3208, + "▁pop": 3209, + "▁sand": 3210, + "▁neg": 3211, + "▁usual": 3212, + "▁relig": 3213, + "▁simple": 3214, + "▁sym": 3215, + "itation": 3216, + "▁gro": 3217, + "ories": 3218, + "▁moved": 3219, + "▁months": 3220, + "▁speaking": 3221, + "▁pet": 3222, + "▁silent": 3223, + "▁cab": 3224, + "▁mountain": 3225, + "▁expression": 3226, + "gar": 3227, + "▁covered": 3228, + "▁hunt": 3229, + "▁afternoon": 3230, + "aped": 3231, + "▁occur": 3232, + "rief": 3233, + "▁states": 3234, + "▁z": 3235, + "str": 3236, + "▁loc": 3237, + "light": 3238, + "▁shore": 3239, + "che": 3240, + "▁easily": 3241, + "▁pale": 3242, + "unity": 3243, + "▁remark": 3244, + "▁phys": 3245, + "▁beginning": 3246, + "▁duty": 3247, + "▁chapter": 3248, + "▁influ": 3249, + "cho": 3250, + "▁concl": 3251, + "amb": 3252, + "▁instant": 3253, + "▁polit": 3254, + "zz": 3255, + "▁enjoy": 3256, + "▁sick": 3257, + "▁remain": 3258, + "uel": 3259, + "▁stream": 3260, + "▁figure": 3261, + "ald": 3262, + "▁tur": 3263, + "▁path": 3264, + "▁vol": 3265, + "▁minute": 3266, + "▁pleasant": 3267, + "▁scarcely": 3268, + "▁conscious": 3269, + "▁terrible": 3270, + "▁kill": 3271, + "▁raised": 3272, + "▁fashion": 3273, + "▁twel": 3274, + "yal": 3275, + "▁leaving": 3276, + "▁twelve": 3277, + "ature": 3278, + "▁fut": 3279, + "▁threw": 3280, + "▁star": 3281, + "▁flowers": 3282, + "olog": 3283, + "▁trying": 3284, + "rib": 3285, + "▁sword": 3286, + "▁tall": 3287, + "▁marry": 3288, + "▁ben": 3289, + "▁expected": 3290, + "▁according": 3291, + "▁forty": 3292, + "▁stick": 3293, + "inal": 3294, + "▁guess": 3295, + "▁silver": 3296, + "▁iron": 3297, + "▁oblig": 3298, + "▁office": 3299, + "▁rapid": 3300, + "▁ladies": 3301, + "▁especially": 3302, + "ipped": 3303, + "orted": 3304, + "▁bread": 3305, + "ech": 3306, + "▁tender": 3307, + "orth": 3308, + "▁learned": 3309, + "▁books": 3310, + "▁isn't": 3311, + "▁surprise": 3312, + "▁write": 3313, + "▁purs": 3314, + "pered": 3315, + "▁written": 3316, + "▁killed": 3317, + "▁consequ": 3318, + "▁exh": 3319, + "▁places": 3320, + "▁condition": 3321, + "▁direction": 3322, + "▁cho": 3323, + "ulty": 3324, + "jo": 3325, + "mit": 3326, + "▁entirely": 3327, + "tering": 3328, + "▁enter": 3329, + "▁action": 3330, + "wise": 3331, + "▁suc": 3332, + "ibly": 3333, + "▁happiness": 3334, + "▁decided": 3335, + "▁golden": 3336, + "▁langu": 3337, + "eness": 3338, + "▁note": 3339, + "▁unless": 3340, + "uous": 3341, + "▁fal": 3342, + "aled": 3343, + "▁you'll": 3344, + "▁wonderful": 3345, + "ounds": 3346, + "ume": 3347, + "'re": 3348, + "▁shook": 3349, + "er's": 3350, + "oop": 3351, + "onel": 3352, + "▁perfectly": 3353, + "▁geor": 3354, + "ndered": 3355, + "▁broad": 3356, + "atic": 3357, + "▁closed": 3358, + "a's": 3359, + "▁spot": 3360, + "tended": 3361, + "▁latter": 3362, + "▁steps": 3363, + "▁merely": 3364, + "▁history": 3365, + "fer": 3366, + "▁wise": 3367, + "ishing": 3368, + "osing": 3369, + "▁middle": 3370, + "idered": 3371, + "▁understood": 3372, + "▁enemy": 3373, + "▁sole": 3374, + "llig": 3375, + "▁jew": 3376, + "▁simply": 3377, + "gan": 3378, + "▁conduct": 3379, + "▁tast": 3380, + "▁board": 3381, + "▁sav": 3382, + "▁wouldn't": 3383, + "▁shot": 3384, + "▁reply": 3385, + "▁changed": 3386, + "mn": 3387, + "▁grass": 3388, + "▁finally": 3389, + "▁admir": 3390, + "ital": 3391, + "▁sharp": 3392, + "itch": 3393, + "▁fortune": 3394, + "▁summer": 3395, + "▁experience": 3396, + "▁succeed": 3397, + "gress": 3398, + "uted": 3399, + "▁orig": 3400, + "retched": 3401, + "▁journey": 3402, + "▁excell": 3403, + "▁observed": 3404, + "ax": 3405, + "▁afterwards": 3406, + "fast": 3407, + "sy": 3408, + "▁bow": 3409, + "▁flat": 3410, + "▁persons": 3411, + "▁lean": 3412, + "▁earn": 3413, + "▁broke": 3414, + "▁mir": 3415, + "▁fit": 3416, + "osp": 3417, + "▁marriage": 3418, + "▁repres": 3419, + "io": 3420, + "▁lying": 3421, + "unk": 3422, + "▁trave": 3423, + "▁situ": 3424, + "▁listen": 3425, + "▁acquaint": 3426, + "▁ring": 3427, + "cience": 3428, + "▁faint": 3429, + "olute": 3430, + "▁calm": 3431, + "bered": 3432, + "▁lives": 3433, + "▁escape": 3434, + "▁beneath": 3435, + "ouses": 3436, + "▁clim": 3437, + "▁bless": 3438, + "▁repeated": 3439, + "▁pocket": 3440, + "ests": 3441, + "▁tail": 3442, + "▁passion": 3443, + "▁dick": 3444, + "▁ven": 3445, + "oses": 3446, + "clock": 3447, + "▁mut": 3448, + "▁becom": 3449, + "▁oper": 3450, + "▁o'clock": 3451, + "▁fish": 3452, + "▁lou": 3453, + "semb": 3454, + "▁prev": 3455, + "▁allowed": 3456, + "▁famil": 3457, + "hel": 3458, + "▁gate": 3459, + "▁spite": 3460, + "ivers": 3461, + "▁health": 3462, + "ission": 3463, + "▁ign": 3464, + "▁reach": 3465, + "▁cand": 3466, + "▁rain": 3467, + "▁empl": 3468, + "▁ban": 3469, + "▁strugg": 3470, + "▁firm": 3471, + "▁bitter": 3472, + "▁sorry": 3473, + "bing": 3474, + "▁father's": 3475, + "▁temper": 3476, + "▁madame": 3477, + "ples": 3478, + "▁furn": 3479, + "▁future": 3480, + "umed": 3481, + "▁nice": 3482, + "▁separ": 3483, + "▁presently": 3484, + "▁circumstances": 3485, + "▁connect": 3486, + "iding": 3487, + "▁sett": 3488, + "kes": 3489, + "▁loud": 3490, + "▁worse": 3491, + "▁wand": 3492, + "▁spread": 3493, + "▁i'd": 3494, + "▁letters": 3495, + "▁yellow": 3496, + "▁magn": 3497, + "▁passing": 3498, + "▁kit": 3499, + "▁pleased": 3500, + "▁darkness": 3501, + "▁remar": 3502, + "idden": 3503, + "come": 3504, + "▁tea": 3505, + "▁civ": 3506, + "▁apart": 3507, + "▁disappe": 3508, + "▁important": 3509, + "▁legs": 3510, + "▁nation": 3511, + "▁delic": 3512, + "▁dressed": 3513, + "▁game": 3514, + "▁walls": 3515, + "ec": 3516, + "▁dry": 3517, + "▁virt": 3518, + "▁dim": 3519, + "idently": 3520, + "rel": 3521, + "▁rub": 3522, + "▁absolute": 3523, + "▁blind": 3524, + "▁discovered": 3525, + "▁exactly": 3526, + "▁dam": 3527, + "otten": 3528, + "▁sorrow": 3529, + "my": 3530, + "▁cost": 3531, + "ference": 3532, + "▁employ": 3533, + "velop": 3534, + "▁cous": 3535, + "▁beast": 3536, + "▁spec": 3537, + "▁opport": 3538, + "▁ears": 3539, + "▁dropped": 3540, + "▁subst": 3541, + "▁chee": 3542, + "▁protect": 3543, + "ils": 3544, + "▁smiled": 3545, + "ina": 3546, + "▁resp": 3547, + "▁promise": 3548, + "▁bag": 3549, + "▁host": 3550, + "urs": 3551, + "▁creature": 3552, + "▁notice": 3553, + "▁knowing": 3554, + "▁heads": 3555, + "▁concer": 3556, + "▁seat": 3557, + "ishment": 3558, + "▁individ": 3559, + "▁existence": 3560, + "▁determined": 3561, + "lend": 3562, + "▁storm": 3563, + "roy": 3564, + "ours": 3565, + "▁conce": 3566, + "anging": 3567, + "▁fixed": 3568, + "▁press": 3569, + "▁major": 3570, + "oved": 3571, + "▁ves": 3572, + "iod": 3573, + "▁learn": 3574, + "▁motion": 3575, + "▁empt": 3576, + "▁leaves": 3577, + "▁bottom": 3578, + "▁arg": 3579, + "iety": 3580, + "▁nobody": 3581, + "▁pros": 3582, + "que": 3583, + "▁utter": 3584, + "▁pick": 3585, + "acked": 3586, + "▁intellig": 3587, + "▁hes": 3588, + "▁stir": 3589, + "▁prevent": 3590, + "▁assist": 3591, + "▁dom": 3592, + "▁disg": 3593, + "▁advant": 3594, + "erable": 3595, + "▁vent": 3596, + "ument": 3597, + "▁tired": 3598, + "rect": 3599, + "ashed": 3600, + "action": 3601, + "▁considered": 3602, + "▁wrote": 3603, + "▁houses": 3604, + "▁suit": 3605, + "▁cheer": 3606, + "▁castle": 3607, + "▁pra": 3608, + "▁perform": 3609, + "ancing": 3610, + "▁clean": 3611, + "ruct": 3612, + "▁stro": 3613, + "▁frequ": 3614, + "▁drawing": 3615, + "▁luck": 3616, + "▁habit": 3617, + "idge": 3618, + "ell": 3619, + "▁ones": 3620, + "▁noble": 3621, + "▁splend": 3622, + "▁honor": 3623, + "zen": 3624, + "▁paid": 3625, + "▁speech": 3626, + "▁estab": 3627, + "▁ur": 3628, + "istr": 3629, + "▁individual": 3630, + "inite": 3631, + "▁vall": 3632, + "▁birds": 3633, + "rodu": 3634, + "▁dar": 3635, + "▁allow": 3636, + "▁confess": 3637, + "▁impress": 3638, + "▁propert": 3639, + "▁jane": 3640, + "▁song": 3641, + "▁various": 3642, + "▁narrow": 3643, + "▁moder": 3644, + "▁believed": 3645, + "ays": 3646, + "▁extra": 3647, + "▁pure": 3648, + "arily": 3649, + "▁period": 3650, + "▁shadow": 3651, + "▁somewh": 3652, + "▁mal": 3653, + "▁cott": 3654, + "▁extreme": 3655, + "▁judge": 3656, + "▁village": 3657, + "▁royal": 3658, + "▁somewhat": 3659, + "▁lower": 3660, + "▁ham": 3661, + "▁agree": 3662, + "▁remembered": 3663, + "▁aston": 3664, + "enth": 3665, + "▁declared": 3666, + "pan": 3667, + "▁train": 3668, + "▁parts": 3669, + "▁colonel": 3670, + "amber": 3671, + "▁breakfast": 3672, + "▁surely": 3673, + "▁sin": 3674, + "ayed": 3675, + "▁scene": 3676, + "go": 3677, + "▁greatest": 3678, + "▁influence": 3679, + "▁custom": 3680, + "itary": 3681, + "▁animal": 3682, + "▁sake": 3683, + "▁mod": 3684, + "▁soldiers": 3685, + "iny": 3686, + "▁ancient": 3687, + "▁drawn": 3688, + "▁evidently": 3689, + "▁ways": 3690, + "▁looks": 3691, + "▁revol": 3692, + "ator": 3693, + "anted": 3694, + "▁reflect": 3695, + "▁picture": 3696, + "▁likely": 3697, + "▁shr": 3698, + "▁laws": 3699, + "▁holding": 3700, + "▁difficulty": 3701, + "▁inj": 3702, + "▁mel": 3703, + "▁courage": 3704, + "nes": 3705, + "▁mort": 3706, + "▁troub": 3707, + "▁burst": 3708, + "▁angry": 3709, + "▁proud": 3710, + "gged": 3711, + "▁spoken": 3712, + "ision": 3713, + "▁desert": 3714, + "ption": 3715, + "▁comb": 3716, + "▁apparent": 3717, + "ring": 3718, + "▁watched": 3719, + "na": 3720, + "▁east": 3721, + "▁shop": 3722, + "▁agre": 3723, + "▁private": 3724, + "esty": 3725, + "▁jul": 3726, + "▁finished": 3727, + "▁anxious": 3728, + "otion": 3729, + "▁fifteen": 3730, + "▁social": 3731, + "under": 3732, + "▁dism": 3733, + "▁touch": 3734, + "▁wine": 3735, + "▁attack": 3736, + "▁ideas": 3737, + "▁george": 3738, + "af": 3739, + "rer": 3740, + "oose": 3741, + "▁space": 3742, + "▁scr": 3743, + "▁inside": 3744, + "▁gentlemen": 3745, + "▁civil": 3746, + "iently": 3747, + "▁formed": 3748, + "▁fol": 3749, + "▁goes": 3750, + "▁you've": 3751, + "▁thin": 3752, + "▁surf": 3753, + "▁servant": 3754, + "▁bal": 3755, + "▁cover": 3756, + "▁ourselves": 3757, + "▁fallen": 3758, + "▁henry": 3759, + "▁lot": 3760, + "ium": 3761, + "▁advent": 3762, + "▁carriage": 3763, + "▁baby": 3764, + "▁elect": 3765, + "▁tong": 3766, + "▁appre": 3767, + "▁everybody": 3768, + "uded": 3769, + "▁commun": 3770, + "▁ine": 3771, + "itive": 3772, + "▁waited": 3773, + "cise": 3774, + "▁grou": 3775, + "het": 3776, + "▁vain": 3777, + "▁impro": 3778, + "▁favor": 3779, + "erial": 3780, + "▁speed": 3781, + "▁windows": 3782, + "▁carefully": 3783, + "▁ice": 3784, + "▁noise": 3785, + "▁hero": 3786, + "▁jim": 3787, + "▁william": 3788, + "▁pecul": 3789, + "▁promised": 3790, + "▁walking": 3791, + "▁forgotten": 3792, + "▁obliged": 3793, + "▁earnest": 3794, + "▁main": 3795, + "▁lose": 3796, + "▁glance": 3797, + "▁vessel": 3798, + "▁grad": 3799, + "▁thro": 3800, + "▁bod": 3801, + "▁shoulder": 3802, + "▁meth": 3803, + "▁animals": 3804, + "▁noticed": 3805, + "ables": 3806, + "▁peculiar": 3807, + "▁fier": 3808, + "▁pot": 3809, + "▁quietly": 3810, + "▁cup": 3811, + "▁serious": 3812, + "▁tremb": 3813, + "▁generally": 3814, + "▁american": 3815, + "▁symp": 3816, + "ral": 3817, + "▁don": 3818, + "▁france": 3819, + "iction": 3820, + "▁property": 3821, + "▁shoulders": 3822, + "▁stranger": 3823, + "▁san": 3824, + "▁cow": 3825, + "▁what's": 3826, + "▁dust": 3827, + "▁affection": 3828, + "▁handsome": 3829, + "▁higher": 3830, + "iant": 3831, + "nday": 3832, + "▁wel": 3833, + "▁poet": 3834, + "▁sla": 3835, + "▁distinct": 3836, + "▁mam": 3837, + "▁pier": 3838, + "acing": 3839, + "ague": 3840, + "▁grown": 3841, + "uly": 3842, + "▁d'": 3843, + "▁chamber": 3844, + "▁desce": 3845, + "▁murm": 3846, + "stem": 3847, + "▁personal": 3848, + "▁fancy": 3849, + "▁offered": 3850, + "osite": 3851, + "onsie": 3852, + "▁built": 3853, + "▁edge": 3854, + "▁whispered": 3855, + "▁skin": 3856, + "▁pieces": 3857, + "itated": 3858, + "cher": 3859, + "osity": 3860, + "▁pit": 3861, + "▁contro": 3862, + "▁faces": 3863, + "▁spent": 3864, + "▁interrupt": 3865, + "how": 3866, + "isters": 3867, + "▁butter": 3868, + "▁develop": 3869, + "▁unk": 3870, + "hip": 3871, + "▁heat": 3872, + "▁fond": 3873, + "▁coat": 3874, + "▁touched": 3875, + "▁hol": 3876, + "ingu": 3877, + "▁pi": 3878, + "▁race": 3879, + "▁jump": 3880, + "▁surprised": 3881, + "oted": 3882, + "▁defe": 3883, + "enced": 3884, + "▁wasn't": 3885, + "▁wear": 3886, + "andon": 3887, + "▁fan": 3888, + "acher": 3889, + "▁arch": 3890, + "▁educ": 3891, + "▁brave": 3892, + "athered": 3893, + "▁eld": 3894, + "▁wealth": 3895, + "▁system": 3896, + "▁german": 3897, + "▁false": 3898, + "wood": 3899, + "▁dare": 3900, + "aked": 3901, + "▁cousin": 3902, + "▁fer": 3903, + "key": 3904, + "▁lin": 3905, + "▁intellect": 3906, + "▁prepared": 3907, + "▁fingers": 3908, + "▁surr": 3909, + "▁mountains": 3910, + "ipp": 3911, + "▁opportunity": 3912, + "aff": 3913, + "▁bare": 3914, + "▁dor": 3915, + "▁introdu": 3916, + "▁collect": 3917, + "▁lovely": 3918, + "▁rag": 3919, + "▁crown": 3920, + "▁matters": 3921, + "▁companion": 3922, + "▁weather": 3923, + "▁alar": 3924, + "▁innoc": 3925, + "▁ris": 3926, + "▁mix": 3927, + "▁lake": 3928, + "▁store": 3929, + "▁unh": 3930, + "▁meaning": 3931, + "▁memory": 3932, + "over": 3933, + "▁band": 3934, + "leep": 3935, + "▁finding": 3936, + "ee": 3937, + "▁charge": 3938, + "▁grat": 3939, + "▁attract": 3940, + "▁gray": 3941, + "▁quarter": 3942, + "▁avo": 3943, + "▁greatly": 3944, + "▁mach": 3945, + "▁inh": 3946, + "▁asleep": 3947, + "▁paris": 3948, + "▁dav": 3949, + "▁alto": 3950, + "▁offer": 3951, + "▁opposite": 3952, + "ounced": 3953, + "erve": 3954, + "▁breast": 3955, + "nown": 3956, + "▁reading": 3957, + "▁altogether": 3958, + "▁writing": 3959, + "pected": 3960, + "▁degree": 3961, + "cing": 3962, + "night": 3963, + "▁exec": 3964, + "fortun": 3965, + "▁stat": 3966, + "▁feelings": 3967, + "▁hath": 3968, + "▁cook": 3969, + "▁rail": 3970, + "▁honour": 3971, + "ding": 3972, + "▁fate": 3973, + "▁por": 3974, + "▁frank": 3975, + "▁meeting": 3976, + "▁rough": 3977, + "▁alive": 3978, + "▁hide": 3979, + "ites": 3980, + "ilar": 3981, + "▁blow": 3982, + "▁cruel": 3983, + "raph": 3984, + "▁hurt": 3985, + "▁loss": 3986, + "▁thrown": 3987, + "▁caused": 3988, + "▁we'll": 3989, + "▁serve": 3990, + "▁duke": 3991, + "▁bent": 3992, + "▁united": 3993, + "▁seek": 3994, + "▁kingdom": 3995, + "▁situation": 3996, + "▁empty": 3997, + "ners": 3998, + "▁due": 3999, + "▁liked": 4000, + "▁swift": 4001, + "▁opening": 4002, + "▁servants": 4003, + "chen": 4004, + "oura": 4005, + "▁gh": 4006, + "▁suspic": 4007, + "▁freed": 4008, + "ointed": 4009, + "▁surface": 4010, + "cil": 4011, + "▁questions": 4012, + "▁ess": 4013, + "▁curious": 4014, + "▁constit": 4015, + "▁accompan": 4016, + "▁christian": 4017, + "▁fill": 4018, + "arest": 4019, + "▁satisfied": 4020, + "ron": 4021, + "▁sides": 4022, + "▁pity": 4023, + "▁reve": 4024, + "▁equal": 4025, + "▁height": 4026, + "▁ordered": 4027, + "osop": 4028, + "▁grey": 4029, + "▁listened": 4030, + "pet": 4031, + "▁rejo": 4032, + "▁capt": 4033, + "ibility": 4034, + "ob": 4035, + "▁mart": 4036, + "▁happen": 4037, + "▁hurried": 4038, + "▁dollars": 4039, + "▁language": 4040, + "▁ange": 4041, + "▁yours": 4042, + "▁supposed": 4043, + "▁laughing": 4044, + "▁settled": 4045, + "▁rode": 4046, + "▁perm": 4047, + "▁distingu": 4048, + "▁hurry": 4049, + "▁destroy": 4050, + "▁talked": 4051, + "▁lifted": 4052, + "ocr": 4053, + "▁square": 4054, + "▁value": 4055, + "▁taste": 4056, + "▁vast": 4057, + "▁king's": 4058, + "▁rul": 4059, + "▁roof": 4060, + "▁telling": 4061, + "▁study": 4062, + "▁ow": 4063, + "▁pan": 4064, + "▁bas": 4065, + "▁rising": 4066, + "▁sufficient": 4067, + "▁forced": 4068, + "▁rise": 4069, + "▁attend": 4070, + "▁philosop": 4071, + "▁nose": 4072, + "▁sixty": 4073, + "hest": 4074, + "▁pin": 4075, + "▁egg": 4076, + "▁amb": 4077, + "▁fault": 4078, + "bur": 4079, + "▁station": 4080, + "▁distur": 4081, + "▁regular": 4082, + "ille": 4083, + "▁pack": 4084, + "▁special": 4085, + "▁honest": 4086, + "▁building": 4087, + "▁season": 4088, + "▁shape": 4089, + "▁pride": 4090, + "▁smiling": 4091, + "like": 4092, + "▁orders": 4093, + "yn": 4094, + "▁woods": 4095, + "▁accompl": 4096, + "con": 4097, + "▁sam": 4098, + "▁usually": 4099, + "▁watching": 4100, + "▁sacri": 4101, + "erved": 4102, + "▁passage": 4103, + "▁material": 4104, + "▁valley": 4105, + "yr": 4106, + "▁stairs": 4107, + "▁libert": 4108, + "▁frightened": 4109, + "▁remarked": 4110, + "▁tit": 4111, + "▁wed": 4112, + "▁mistress": 4113, + "▁directly": 4114, + "▁suffer": 4115, + "▁gloom": 4116, + "▁lines": 4117, + "▁stock": 4118, + "▁justice": 4119, + "▁diam": 4120, + "ested": 4121, + "▁growing": 4122, + "▁doesn't": 4123, + "▁gathered": 4124, + "▁ordinary": 4125, + "uce": 4126, + "▁eur": 4127, + "▁unf": 4128, + "▁kitchen": 4129, + "▁threat": 4130, + "▁depend": 4131, + "▁weeks": 4132, + "▁despair": 4133, + "▁method": 4134, + "▁seized": 4135, + "▁discuss": 4136, + "▁exer": 4137, + "ify": 4138, + "▁flower": 4139, + "▁ignor": 4140, + "eer": 4141, + "ades": 4142, + "▁deb": 4143, + "eping": 4144, + "▁ale": 4145, + "▁yo": 4146, + "chief": 4147, + "▁supper": 4148, + "ik": 4149, + "▁bold": 4150, + "▁putting": 4151, + "▁nearer": 4152, + "uses": 4153, + "▁one's": 4154, + "▁ble": 4155, + "▁york": 4156, + "▁ende": 4157, + "▁affairs": 4158, + "▁soldier": 4159, + "▁contrary": 4160, + "▁moving": 4161, + "▁streets": 4162, + "▁bir": 4163, + "rance": 4164, + "hens": 4165, + "▁cit": 4166, + "icated": 4167, + "▁catch": 4168, + "▁imagine": 4169, + "eds": 4170, + "▁march": 4171, + "▁search": 4172, + "ara": 4173, + "▁receive": 4174, + "imate": 4175, + "▁monsie": 4176, + "▁twice": 4177, + "▁papa": 4178, + "▁monsieur": 4179, + "▁reck": 4180, + "min": 4181, + "ude": 4182, + "▁process": 4183, + "▁hole": 4184, + "aly": 4185, + "lin": 4186, + "▁cro": 4187, + "▁favour": 4188, + "▁dign": 4189, + "▁working": 4190, + "▁harm": 4191, + "▁europe": 4192, + "antic": 4193, + "▁proved": 4194, + "ocked": 4195, + "▁prove": 4196, + "▁cler": 4197, + "▁lod": 4198, + "ception": 4199, + "▁pulled": 4200, + "▁arth": 4201, + "▁authority": 4202, + "▁haven": 4203, + "▁jer": 4204, + "▁uns": 4205, + "▁movement": 4206, + "usted": 4207, + "▁engaged": 4208, + "▁brothers": 4209, + "▁advantage": 4210, + "lished": 4211, + "ole": 4212, + "▁arthur": 4213, + "▁aut": 4214, + "▁stones": 4215, + "▁farm": 4216, + "▁difference": 4217, + "▁fart": 4218, + "▁aside": 4219, + "▁mas": 4220, + "▁observ": 4221, + "▁hence": 4222, + "▁possession": 4223, + "▁hills": 4224, + "▁fortun": 4225, + "uls": 4226, + "ails": 4227, + "▁instance": 4228, + "▁she's": 4229, + "▁ol": 4230, + "▁holy": 4231, + "▁flew": 4232, + "ky": 4233, + "▁color": 4234, + "▁rate": 4235, + "▁doors": 4236, + "▁busy": 4237, + "set": 4238, + "▁address": 4239, + "▁familiar": 4240, + "▁weight": 4241, + "▁aware": 4242, + "▁played": 4243, + "▁sympath": 4244, + "lls": 4245, + "▁solemn": 4246, + "▁liter": 4247, + "▁test": 4248, + "▁emper": 4249, + "▁indian": 4250, + "▁distant": 4251, + "▁interesting": 4252, + "▁bull": 4253, + "▁thorough": 4254, + "▁wore": 4255, + "▁worked": 4256, + "▁explained": 4257, + "▁excellent": 4258, + "▁splendid": 4259, + "▁tongue": 4260, + "▁di": 4261, + "▁pard": 4262, + "▁named": 4263, + "▁shame": 4264, + "▁franc": 4265, + "▁spect": 4266, + "▁moments": 4267, + "bers": 4268, + "▁wil": 4269, + "▁myster": 4270, + "▁seated": 4271, + "▁instantly": 4272, + "▁similar": 4273, + "▁endeav": 4274, + "▁measure": 4275, + "▁naturally": 4276, + "nds": 4277, + "▁suf": 4278, + "▁amount": 4279, + "▁imper": 4280, + "▁dogs": 4281, + "itable": 4282, + "▁brit": 4283, + "▁necessity": 4284, + "rid": 4285, + "ulous": 4286, + "▁confidence": 4287, + "den": 4288, + "▁parent": 4289, + "▁wid": 4290, + "▁vir": 4291, + "▁neverthe": 4292, + "▁agreed": 4293, + "▁nevertheless": 4294, + "unch": 4295, + "▁hearing": 4296, + "▁takes": 4297, + "▁aug": 4298, + "▁univers": 4299, + "enance": 4300, + "▁unw": 4301, + "▁earl": 4302, + "▁keeping": 4303, + "▁drive": 4304, + "▁produced": 4305, + "▁aud": 4306, + "on's": 4307, + "▁names": 4308, + "agn": 4309, + "▁disappeared": 4310, + "▁throw": 4311, + "▁president": 4312, + "▁gods": 4313, + "▁magic": 4314, + "▁represent": 4315, + "▁unknown": 4316, + "por": 4317, + "▁terror": 4318, + "▁haven't": 4319, + "asc": 4320, + "▁support": 4321, + "▁smoke": 4322, + "▁wicked": 4323, + "ker": 4324, + "▁works": 4325, + "▁artic": 4326, + "▁dull": 4327, + "▁yester": 4328, + "▁falling": 4329, + "▁worthy": 4330, + "▁liberty": 4331, + "ulation": 4332, + "▁design": 4333, + "▁wants": 4334, + "▁evidence": 4335, + "▁companions": 4336, + "▁spirits": 4337, + "▁coast": 4338, + "▁mighty": 4339, + "▁particularly": 4340, + "▁witness": 4341, + "▁discover": 4342, + "▁sought": 4343, + "▁span": 4344, + "'ve": 4345, + "▁rare": 4346, + "▁officers": 4347, + "lv": 4348, + "zy": 4349, + "▁yesterday": 4350, + "vey": 4351, + "cent": 4352, + "▁powers": 4353, + "▁yield": 4354, + "▁cool": 4355, + "▁organ": 4356, + "▁amaz": 4357, + "▁pointed": 4358, + "ford": 4359, + "▁claim": 4360, + "▁content": 4361, + "▁possibly": 4362, + "▁terms": 4363, + "▁trium": 4364, + "▁officer": 4365, + "▁persu": 4366, + "▁ceased": 4367, + "▁drove": 4368, + "▁occurred": 4369, + "▁gree": 4370, + "▁lies": 4371, + "▁otherwise": 4372, + "▁emperor": 4373, + "▁hom": 4374, + "▁stars": 4375, + "▁knees": 4376, + "▁triumph": 4377, + "ruction": 4378, + "▁paused": 4379, + "oms": 4380, + "▁required": 4381, + "▁failed": 4382, + "▁unhapp": 4383, + "▁diamond": 4384, + "▁rat": 4385, + "▁ali": 4386, + "▁double": 4387, + "▁forms": 4388, + "▁gives": 4389, + "▁finger": 4390, + "race": 4391, + "▁pair": 4392, + "alous": 4393, + "illa": 4394, + "▁bob": 4395, + "▁eliz": 4396, + "▁travel": 4397, + "▁carrying": 4398, + "▁gle": 4399, + "iles": 4400, + "▁teeth": 4401, + "esh": 4402, + "▁shown": 4403, + "▁fruit": 4404, + "▁waters": 4405, + "▁entertain": 4406, + "▁hearts": 4407, + "umn": 4408, + "▁labor": 4409, + "in't": 4410, + "▁pill": 4411, + "▁ener": 4412, + "soci": 4413, + "▁example": 4414, + "▁upper": 4415, + "▁foreign": 4416, + "▁moral": 4417, + "▁softly": 4418, + "rose": 4419, + "▁huge": 4420, + "▁charles": 4421, + "▁priest": 4422, + "▁excit": 4423, + "▁fet": 4424, + "▁mother's": 4425, + "▁possessed": 4426, + "▁cases": 4427, + "▁report": 4428, + "▁milk": 4429, + "▁affair": 4430, + "▁principle": 4431, + "▁inhab": 4432, + "▁freedom": 4433, + "▁proof": 4434, + "▁intended": 4435, + "▁satisfaction": 4436, + "▁shouted": 4437, + "isc": 4438, + "▁plat": 4439, + "▁bask": 4440, + "ental": 4441, + "▁group": 4442, + "▁farther": 4443, + "asm": 4444, + "▁unfortun": 4445, + "▁unto": 4446, + "▁singing": 4447, + "▁arrange": 4448, + "▁religion": 4449, + "▁ber": 4450, + "▁rocks": 4451, + "▁seventeen": 4452, + "▁der": 4453, + "▁james": 4454, + "▁buy": 4455, + "▁succeeded": 4456, + "▁rooms": 4457, + "▁leading": 4458, + "▁majesty": 4459, + "▁events": 4460, + "▁dance": 4461, + "▁paint": 4462, + "▁gently": 4463, + "acle": 4464, + "▁tele": 4465, + "▁pardon": 4466, + "using": 4467, + "▁drop": 4468, + "father": 4469, + "▁invent": 4470, + "▁key": 4471, + "▁mentioned": 4472, + "▁seventy": 4473, + "▁ros": 4474, + "▁suffering": 4475, + "▁record": 4476, + "▁cabin": 4477, + "road": 4478, + "▁diss": 4479, + "ival": 4480, + "▁demanded": 4481, + "▁excitement": 4482, + "▁associ": 4483, + "▁progress": 4484, + "angers": 4485, + "▁curi": 4486, + "▁america": 4487, + "▁rule": 4488, + "▁bor": 4489, + "▁vig": 4490, + "lessly": 4491, + "▁clearly": 4492, + "▁bore": 4493, + "▁sheep": 4494, + "▁regret": 4495, + "▁neighbour": 4496, + "bly": 4497, + "iance": 4498, + "▁instinct": 4499, + "▁advice": 4500, + "▁awful": 4501, + "▁sen": 4502, + "▁fully": 4503, + "▁gather": 4504, + "▁papers": 4505, + "▁hidden": 4506, + "▁chest": 4507, + "▁birth": 4508, + "hy": 4509, + "pap": 4510, + "▁hither": 4511, + "▁stuff": 4512, + "▁impat": 4513, + "▁calling": 4514, + "▁fourth": 4515, + "▁dreadful": 4516, + "▁pos": 4517, + "▁grief": 4518, + "▁brill": 4519, + "▁powerful": 4520, + "▁presented": 4521, + "▁fairy": 4522, + "▁explain": 4523, + "▁shoot": 4524, + "▁prisoner": 4525, + "▁joined": 4526, + "▁afford": 4527, + "mond": 4528, + "attered": 4529, + "▁ing": 4530, + "iments": 4531, + "▁shel": 4532, + "▁prefer": 4533, + "▁considerable": 4534, + "▁obey": 4535, + "▁voices": 4536, + "▁interv": 4537, + "▁interested": 4538, + "▁virg": 4539, + "▁cred": 4540, + "▁card": 4541, + "▁ep": 4542, + "▁needed": 4543, + "▁pounds": 4544, + "▁conqu": 4545, + "▁clever": 4546, + "▁advanced": 4547, + "▁cord": 4548, + "ighed": 4549, + "▁undert": 4550, + "▁resolved": 4551, + "▁wag": 4552, + "istic": 4553, + "▁paul": 4554, + "▁excited": 4555, + "▁conditions": 4556, + "▁pictures": 4557, + "acious": 4558, + "▁shining": 4559, + "▁sunday": 4560, + "▁served": 4561, + "▁steam": 4562, + "▁police": 4563, + "▁sprang": 4564, + "sie": 4565, + "ora": 4566, + "ese": 4567, + "▁jes": 4568, + "▁nodd": 4569, + "▁salt": 4570, + "▁fields": 4571, + "▁cart": 4572, + "▁indians": 4573, + "▁fierce": 4574, + "dle": 4575, + "▁ride": 4576, + "▁desired": 4577, + "▁edward": 4578, + "▁importance": 4579, + "▁information": 4580, + "ture": 4581, + "▁hosp": 4582, + "▁memb": 4583, + "▁perceived": 4584, + "▁yard": 4585, + "▁crit": 4586, + "ternal": 4587, + "▁task": 4588, + "▁fold": 4589, + "rant": 4590, + "▁sooner": 4591, + "▁merch": 4592, + "▁absolutely": 4593, + "▁citiz": 4594, + "▁suffered": 4595, + "▁tight": 4596, + "▁dur": 4597, + "▁iss": 4598, + "illy": 4599, + "▁log": 4600, + "▁completely": 4601, + "hold": 4602, + "▁rad": 4603, + "▁share": 4604, + "▁willing": 4605, + "▁devil": 4606, + "▁ships": 4607, + "▁imagination": 4608, + "▁superior": 4609, + "com": 4610, + "ams": 4611, + "▁anybody": 4612, + "▁env": 4613, + "▁appl": 4614, + "▁drag": 4615, + "▁dawn": 4616, + "asped": 4617, + "▁occupied": 4618, + "▁curiosity": 4619, + "iest": 4620, + "▁sigh": 4621, + "▁fox": 4622, + "asant": 4623, + "▁myst": 4624, + "▁stead": 4625, + "ett": 4626, + "▁couple": 4627, + "▁type": 4628, + "▁extraord": 4629, + "▁apparently": 4630, + "▁welcome": 4631, + "▁daily": 4632, + "▁modern": 4633, + "iot": 4634, + "▁ain't": 4635, + "▁dying": 4636, + "llen": 4637, + "▁feat": 4638, + "▁accident": 4639, + "▁countenance": 4640, + "▁abandon": 4641, + "ortion": 4642, + "▁lock": 4643, + "▁crime": 4644, + "pir": 4645, + "▁mult": 4646, + "▁alas": 4647, + "▁refused": 4648, + "▁hate": 4649, + "▁dw": 4650, + "▁whenever": 4651, + "▁thanks": 4652, + "▁slave": 4653, + "▁regarded": 4654, + "▁suggested": 4655, + "ulf": 4656, + "▁actually": 4657, + "gment": 4658, + "▁size": 4659, + "reg": 4660, + "▁cult": 4661, + "▁kat": 4662, + "▁bodies": 4663, + "hus": 4664, + "▁bay": 4665, + "▁truly": 4666, + "▁flesh": 4667, + "ishop": 4668, + "▁smith": 4669, + "▁betr": 4670, + "with": 4671, + "▁wet": 4672, + "▁rapidly": 4673, + "gers": 4674, + "▁odd": 4675, + "asons": 4676, + "ette": 4677, + "▁club": 4678, + "abel": 4679, + "▁horror": 4680, + "▁mile": 4681, + "▁flight": 4682, + "▁crossed": 4683, + "▁professor": 4684, + "▁oce": 4685, + "▁worst": 4686, + "ization": 4687, + "▁rushed": 4688, + "▁science": 4689, + "▁brief": 4690, + "▁stepped": 4691, + "▁midst": 4692, + "ha": 4693, + "▁sour": 4694, + "▁maint": 4695, + "▁brain": 4696, + "▁cottage": 4697, + "▁expressed": 4698, + "▁equally": 4699, + "▁education": 4700, + "▁august": 4701, + "▁buck": 4702, + "▁nay": 4703, + "ids": 4704, + "▁tempt": 4705, + "▁inquir": 4706, + "▁foolish": 4707, + "▁taught": 4708, + "▁cop": 4709, + "▁dun": 4710, + "▁picked": 4711, + "▁elsie": 4712, + "▁lands": 4713, + "▁driven": 4714, + "▁political": 4715, + "mas": 4716, + "▁deck": 4717, + "▁resist": 4718, + "▁instr": 4719, + "▁bon": 4720, + "▁ken": 4721, + "ips": 4722, + "▁hotel": 4723, + "▁dangerous": 4724, + "ially": 4725, + "now": 4726, + "▁dozen": 4727, + "▁trade": 4728, + "▁points": 4729, + "▁ninet": 4730, + "ability": 4731, + "▁crim": 4732, + "▁relations": 4733, + "▁interp": 4734, + "▁barb": 4735, + "▁delighted": 4736, + "▁members": 4737, + "▁sisters": 4738, + "▁sty": 4739, + "▁anger": 4740, + "▁belief": 4741, + "▁asking": 4742, + "▁meat": 4743, + "▁displ": 4744, + "▁relief": 4745, + "ification": 4746, + "▁hunting": 4747, + "▁alex": 4748, + "aries": 4749, + "▁obst": 4750, + "▁behold": 4751, + "▁mistake": 4752, + "▁inquired": 4753, + "▁remarkable": 4754, + "▁origin": 4755, + "cked": 4756, + "▁nerv": 4757, + "acks": 4758, + "vert": 4759, + "rop": 4760, + "▁careful": 4761, + "▁wounded": 4762, + "ading": 4763, + "▁cere": 4764, + "▁enemies": 4765, + "▁gradually": 4766, + "▁interrupted": 4767, + "▁fis": 4768, + "▁stup": 4769, + "▁severe": 4770, + "▁keen": 4771, + "▁sixteen": 4772, + "kins": 4773, + "resp": 4774, + "▁worn": 4775, + "▁flour": 4776, + "▁sylv": 4777, + "▁control": 4778, + "kin": 4779, + "▁lone": 4780, + "asing": 4781, + "▁nap": 4782, + "▁assert": 4783, + "▁depth": 4784, + "▁kindly": 4785, + "▁murder": 4786, + "acity": 4787, + "▁eleven": 4788, + "▁invol": 4789, + "▁d'art": 4790, + "▁wings": 4791, + "▁oak": 4792, + "▁et": 4793, + "▁begun": 4794, + "▁dreams": 4795, + "while": 4796, + "▁moreover": 4797, + "▁exped": 4798, + "▁independ": 4799, + "▁buried": 4800, + "▁approached": 4801, + "agnan": 4802, + "▁d'artagnan": 4803, + "▁sex": 4804, + "▁saved": 4805, + "▁harry": 4806, + "▁physical": 4807, + "▁species": 4808, + "cer": 4809, + "oe": 4810, + "▁glory": 4811, + "▁creatures": 4812, + "▁newspap": 4813, + "▁sang": 4814, + "▁plenty": 4815, + "▁useful": 4816, + "▁shoes": 4817, + "▁hoped": 4818, + "▁frequently": 4819, + "▁saf": 4820, + "▁distr": 4821, + "▁princip": 4822, + "▁pu": 4823, + "y's": 4824, + "aunt": 4825, + "▁lover": 4826, + "▁famous": 4827, + "▁recollect": 4828, + "▁nur": 4829, + "▁grim": 4830, + "▁indif": 4831, + "▁charming": 4832, + "▁aim": 4833, + "▁loose": 4834, + "▁consciousness": 4835, + "▁mamma": 4836, + "▁enthus": 4837, + "▁slept": 4838, + "▁smooth": 4839, + "▁fighting": 4840, + "▁hyp": 4841, + "▁enthusi": 4842, + "▁dig": 4843, + "aling": 4844, + "▁stage": 4845, + "▁anyone": 4846, + "▁thrust": 4847, + "▁desper": 4848, + "▁tar": 4849, + "▁lamp": 4850, + "stone": 4851, + "▁stern": 4852, + "▁evident": 4853, + "▁meanwhile": 4854, + "▁forgive": 4855, + "▁accepted": 4856, + "▁ocean": 4857, + "▁tot": 4858, + "▁they're": 4859, + "▁wondered": 4860, + "▁playing": 4861, + "▁detect": 4862, + "▁hale": 4863, + "▁knife": 4864, + "ailed": 4865, + "▁closely": 4866, + "▁meas": 4867, + "▁proceeded": 4868, + "▁message": 4869, + "▁mour": 4870, + "▁fac": 4871, + "▁union": 4872, + "ustomed": 4873, + "hem": 4874, + "aming": 4875, + "▁exceed": 4876, + "▁feather": 4877, + "▁precious": 4878, + "▁century": 4879, + "▁unex": 4880, + "▁park": 4881, + "ication": 4882, + "▁everywhere": 4883, + "▁minds": 4884, + "▁extraordinary": 4885, + "▁arose": 4886, + "▁entrance": 4887, + "▁capital": 4888, + "▁recall": 4889, + "▁burning": 4890, + "▁magnific": 4891, + "oes": 4892, + "orious": 4893, + "stand": 4894, + "▁assemb": 4895, + "▁plant": 4896, + "▁neighbor": 4897, + "▁lest": 4898, + "uments": 4899, + "▁colle": 4900, + "▁virtue": 4901, + "▁bew": 4902, + "▁forb": 4903, + "▁retreat": 4904, + "▁capable": 4905, + "▁assured": 4906, + "▁constant": 4907, + "▁governor": 4908, + "▁increased": 4909, + "▁horn": 4910, + "▁removed": 4911, + "▁facts": 4912, + "▁absence": 4913, + "▁explan": 4914, + "▁ack": 4915, + "▁somebody": 4916, + "▁awa": 4917, + "▁admit": 4918, + "▁correct": 4919, + "▁forgot": 4920, + "▁jealous": 4921, + "▁kissed": 4922, + "▁popular": 4923, + "▁hut": 4924, + "▁ug": 4925, + "pelled": 4926, + "▁grant": 4927, + "▁friendship": 4928, + "▁indign": 4929, + "▁sympathy": 4930, + "iable": 4931, + "erous": 4932, + "▁thom": 4933, + "▁alice": 4934, + "▁level": 4935, + "▁objects": 4936, + "▁pressed": 4937, + "▁sha": 4938, + "room": 4939, + "▁qual": 4940, + "▁begged": 4941, + "▁emp": 4942, + "▁hind": 4943, + "▁highest": 4944, + "▁clouds": 4945, + "▁ghost": 4946, + "▁acknow": 4947, + "oused": 4948, + "▁strike": 4949, + "▁wishes": 4950, + "▁becomes": 4951, + "▁trembling": 4952, + "▁nob": 4953, + "▁kindness": 4954, + "▁accordingly": 4955, + "▁throat": 4956, + "ration": 4957, + "▁fare": 4958, + "▁we're": 4959, + "▁stretched": 4960, + "▁frag": 4961, + "▁wheel": 4962, + "▁queer": 4963, + "▁grandfather": 4964, + "for": 4965, + "▁choose": 4966, + "▁helen": 4967, + "▁eighty": 4968, + "▁ly": 4969, + "▁miserable": 4970, + "▁contempt": 4971, + "igned": 4972, + "▁military": 4973, + "▁russ": 4974, + "▁basket": 4975, + "▁ahead": 4976, + "oops": 4977, + "ivered": 4978, + "▁listening": 4979, + "▁fro": 4980, + "▁larger": 4981, + "▁divine": 4982, + "iber": 4983, + "▁stories": 4984, + "anches": 4985, + "ushing": 4986, + "izing": 4987, + "▁treasure": 4988, + "▁excuse": 4989, + "▁innocent": 4990, + "▁aid": 4991, + "▁remind": 4992, + "▁slaves": 4993, + "rit": 4994, + "stairs": 4995, + "▁reward": 4996, + "ograph": 4997, + "▁manage": 4998, + "▁dish": 4999, + "▁throughout": 5000, + "▁waves": 5001, + "▁judgment": 5002, + "▁arrival": 5003, + "▁choice": 5004, + "▁unhappy": 5005, + "astic": 5006, + "▁blank": 5007, + "▁advance": 5008, + "▁informed": 5009, + "▁acquaintance": 5010, + "▁impression": 5011, + "▁mysterious": 5012, + "bb": 5013, + "▁ara": 5014, + "▁notes": 5015, + "▁hadn't": 5016, + "▁sell": 5017, + "▁comr": 5018, + "▁impl": 5019, + "▁indust": 5020, + "▁ended": 5021, + "▁lights": 5022, + "▁nurse": 5023, + "▁sout": 5024, + "▁bought": 5025, + "▁fred": 5026, + "▁marked": 5027, + "▁scream": 5028, + "mend": 5029, + "▁uneas": 5030, + "▁delicate": 5031, + "▁weary": 5032, + "estic": 5033, + "▁prompt": 5034, + "▁experi": 5035, + "▁hungry": 5036, + "▁flying": 5037, + "▁pow": 5038, + "▁bridge": 5039, + "▁join": 5040, + "▁visible": 5041, + "▁understanding": 5042, + "▁crying": 5043, + "▁avoid": 5044, + "▁tis": 5045, + "▁stiff": 5046, + "aches": 5047, + "▁restr": 5048, + "▁sounds": 5049, + "▁bowed": 5050, + "▁caut": 5051, + "▁goods": 5052, + "▁david": 5053, + "▁unable": 5054, + "▁you'd": 5055, + "hamed": 5056, + "▁bos": 5057, + "eral": 5058, + "▁ashamed": 5059, + "▁somewhere": 5060, + "▁infinite": 5061, + "ocks": 5062, + "▁dignity": 5063, + "▁gay": 5064, + "▁vic": 5065, + "▁amid": 5066, + "▁hollow": 5067, + "▁emotion": 5068, + "▁admitted": 5069, + "▁parents": 5070, + "▁wra": 5071, + "▁hint": 5072, + "▁temple": 5073, + "▁comfortable": 5074, + "▁intelligence": 5075, + "orous": 5076, + "▁bearing": 5077, + "▁hers": 5078, + "abeth": 5079, + "▁remains": 5080, + "▁contem": 5081, + "▁settle": 5082, + "▁immense": 5083, + "ffe": 5084, + "pher": 5085, + "▁cher": 5086, + "ldom": 5087, + "▁weap": 5088, + "ulated": 5089, + "▁lighted": 5090, + "gypt": 5091, + "▁adventure": 5092, + "▁thoroughly": 5093, + "▁egypt": 5094, + "ilst": 5095, + "anges": 5096, + "▁obt": 5097, + "▁friendly": 5098, + "▁reckon": 5099, + "▁stupid": 5100, + "▁fed": 5101, + "▁rome": 5102, + "▁meal": 5103, + "▁intention": 5104, + "▁returning": 5105, + "▁convin": 5106, + "▁coo": 5107, + "lection": 5108, + "▁ash": 5109, + "achel": 5110, + "▁rope": 5111, + "▁price": 5112, + "▁project": 5113, + "elt": 5114, + "rows": 5115, + "▁secure": 5116, + "▁escaped": 5117, + "▁hopes": 5118, + "▁elizabeth": 5119, + "▁safety": 5120, + "▁wound": 5121, + "▁sup": 5122, + "▁unus": 5123, + "onscious": 5124, + "▁horri": 5125, + "▁minister": 5126, + "▁ox": 5127, + "lla": 5128, + "ensive": 5129, + "▁helped": 5130, + "▁plainly": 5131, + "▁seldom": 5132, + "▁thinks": 5133, + "▁fellows": 5134, + "▁mood": 5135, + "▁pushed": 5136, + "▁exhib": 5137, + "inging": 5138, + "▁thunder": 5139, + "aud": 5140, + "iana": 5141, + "▁fairly": 5142, + "▁elder": 5143, + "▁eggs": 5144, + "irm": 5145, + "▁maiden": 5146, + "mother": 5147, + "▁appears": 5148, + "▁cheeks": 5149, + "▁won": 5150, + "▁ease": 5151, + "▁redu": 5152, + "▁skill": 5153, + "▁extent": 5154, + "▁practice": 5155, + "▁religious": 5156, + "▁becoming": 5157, + "▁virgin": 5158, + "▁features": 5159, + "▁tied": 5160, + "▁whence": 5161, + "▁somehow": 5162, + "▁greet": 5163, + "▁faithful": 5164, + "▁concerned": 5165, + "▁theat": 5166, + "▁bishop": 5167, + "▁pink": 5168, + "▁eagerly": 5169, + "rees": 5170, + "▁eating": 5171, + "▁waste": 5172, + "▁rank": 5173, + "▁fem": 5174, + "▁bride": 5175, + "▁unl": 5176, + "otted": 5177, + "ceiving": 5178, + "▁trib": 5179, + "▁original": 5180, + "▁concerning": 5181, + "▁hab": 5182, + "▁accustomed": 5183, + "▁patient": 5184, + "▁recom": 5185, + "▁cell": 5186, + "ointment": 5187, + "▁arranged": 5188, + "ville": 5189, + "iture": 5190, + "▁wholly": 5191, + "▁older": 5192, + "▁colour": 5193, + "▁provided": 5194, + "▁ate": 5195, + "▁partly": 5196, + "▁mont": 5197, + "ology": 5198, + "▁prospect": 5199, + "▁ceremon": 5200, + "▁ze": 5201, + "▁laughter": 5202, + "▁fee": 5203, + "▁branches": 5204, + "▁fled": 5205, + "right": 5206, + "▁whilst": 5207, + "▁slipped": 5208, + "▁violent": 5209, + "▁inhabit": 5210, + "▁sons": 5211, + "▁engage": 5212, + "▁uncom": 5213, + "▁deeply": 5214, + "▁substance": 5215, + "▁tale": 5216, + "▁tiny": 5217, + "▁dan": 5218, + "▁ga": 5219, + "▁bee": 5220, + "▁yards": 5221, + "icks": 5222, + "▁hastily": 5223, + "held": 5224, + "▁wes": 5225, + "▁vague": 5226, + "▁amuse": 5227, + "▁mud": 5228, + "▁wolf": 5229, + "▁hans": 5230, + "illing": 5231, + "▁supply": 5232, + "▁silk": 5233, + "▁constantly": 5234, + "▁christmas": 5235, + "▁million": 5236, + "▁whisper": 5237, + "▁mental": 5238, + "▁washing": 5239, + "verse": 5240, + "▁cloth": 5241, + "▁baron": 5242, + "▁corresp": 5243, + "▁nodded": 5244, + "▁correspond": 5245, + "ka": 5246, + "▁hell": 5247, + "▁gain": 5248, + "▁rust": 5249, + "▁obtain": 5250, + "▁unconscious": 5251, + "▁struggle": 5252, + "▁established": 5253, + "▁lawy": 5254, + "ols": 5255, + "▁signs": 5256, + "▁uttered": 5257, + "▁roman": 5258, + "▁constitution": 5259, + "pes": 5260, + "▁cave": 5261, + "▁spare": 5262, + "▁quant": 5263, + "▁image": 5264, + "▁merry": 5265, + "▁treated": 5266, + "▁efforts": 5267, + "▁lonely": 5268, + "rated": 5269, + "▁nut": 5270, + "▁glanced": 5271, + "▁portion": 5272, + "itor": 5273, + "▁resemb": 5274, + "▁withd": 5275, + "▁mead": 5276, + "▁feast": 5277, + "▁prim": 5278, + "▁cliff": 5279, + "▁emer": 5280, + "▁proportion": 5281, + "▁consideration": 5282, + "▁haste": 5283, + "▁gaze": 5284, + "▁savage": 5285, + "▁crew": 5286, + "▁tower": 5287, + "▁lack": 5288, + "▁conscience": 5289, + "▁mercy": 5290, + "▁exha": 5291, + "▁consent": 5292, + "ators": 5293, + "urd": 5294, + "▁outl": 5295, + "▁clo": 5296, + "▁adop": 5297, + "▁amongst": 5298, + "▁hanging": 5299, + "▁circle": 5300, + "▁prepar": 5301, + "▁brilliant": 5302, + "fl": 5303, + "▁gained": 5304, + "▁row": 5305, + "▁troops": 5306, + "▁repro": 5307, + "▁ming": 5308, + "oul": 5309, + "▁dared": 5310, + "▁lion": 5311, + "▁joe": 5312, + "▁winds": 5313, + "▁bringing": 5314, + "▁anxiety": 5315, + "▁billy": 5316, + "▁consequence": 5317, + "fice": 5318, + "pse": 5319, + "▁fought": 5320, + "▁pred": 5321, + "▁scra": 5322, + "▁glim": 5323, + "▁victory": 5324, + "ped": 5325, + "▁rab": 5326, + "▁scot": 5327, + "▁obv": 5328, + "▁shock": 5329, + "chan": 5330, + "▁knock": 5331, + "ourse": 5332, + "▁handed": 5333, + "▁indul": 5334, + "▁patience": 5335, + "▁souther": 5336, + "▁jose": 5337, + "▁fever": 5338, + "▁rolled": 5339, + "icted": 5340, + "▁setting": 5341, + "▁profession": 5342, + "▁sylvia": 5343, + "▁hun": 5344, + "utions": 5345, + "▁feared": 5346, + "▁brand": 5347, + "▁boots": 5348, + "▁forehead": 5349, + "▁principles": 5350, + "▁sink": 5351, + "▁rig": 5352, + "aval": 5353, + "▁purch": 5354, + "▁gazed": 5355, + "▁employed": 5356, + "▁murmured": 5357, + "more": 5358, + "▁sar": 5359, + "ashing": 5360, + "ural": 5361, + "acles": 5362, + "▁trad": 5363, + "▁active": 5364, + "▁benef": 5365, + "▁bottle": 5366, + "▁rage": 5367, + "▁invest": 5368, + "▁lux": 5369, + "▁sank": 5370, + "▁hang": 5371, + "▁beard": 5372, + "ential": 5373, + "▁loving": 5374, + "▁native": 5375, + "▁instruct": 5376, + "▁waist": 5377, + "▁relation": 5378, + "▁discovery": 5379, + "▁melan": 5380, + "▁nervous": 5381, + "▁obtained": 5382, + "▁pig": 5383, + "▁sear": 5384, + "▁flag": 5385, + "▁trail": 5386, + "▁distinguished": 5387, + "▁stared": 5388, + "▁misery": 5389, + "▁print": 5390, + "▁guil": 5391, + "▁jumped": 5392, + "▁swim": 5393, + "▁approaching": 5394, + "▁suspicion": 5395, + "▁iv": 5396, + "▁managed": 5397, + "aker": 5398, + "▁teach": 5399, + "▁match": 5400, + "▁guilty": 5401, + "▁wretched": 5402, + "▁rum": 5403, + "▁compar": 5404, + "▁theory": 5405, + "▁sher": 5406, + "▁bree": 5407, + "▁kings": 5408, + "▁shone": 5409, + "atherine": 5410, + "▁throne": 5411, + "▁showing": 5412, + "aws": 5413, + "▁robin": 5414, + "▁embar": 5415, + "utation": 5416, + "▁woman's": 5417, + "▁addressed": 5418, + "▁protest": 5419, + "▁admiration": 5420, + "▁troubled": 5421, + "▁ugly": 5422, + "oom": 5423, + "erves": 5424, + "▁flung": 5425, + "▁subs": 5426, + "▁relie": 5427, + "▁thousands": 5428, + "nce": 5429, + "▁od": 5430, + "▁current": 5431, + "▁wooden": 5432, + "▁sacrifice": 5433, + "urity": 5434, + "cip": 5435, + "▁pear": 5436, + "▁farmer": 5437, + "▁needs": 5438, + "▁condem": 5439, + "▁member": 5440, + "▁bade": 5441, + "▁dancing": 5442, + "▁reasons": 5443, + "▁consult": 5444, + "▁swall": 5445, + "▁shadows": 5446, + "▁angel": 5447, + "▁nineteen": 5448, + "▁style": 5449, + "field": 5450, + "▁lan": 5451, + "▁manif": 5452, + "▁robert": 5453, + "▁grate": 5454, + "▁engine": 5455, + "▁wisdom": 5456, + "▁jesus": 5457, + "▁convent": 5458, + "▁preced": 5459, + "▁interests": 5460, + "▁trial": 5461, + "bor": 5462, + "iven": 5463, + "▁nest": 5464, + "▁exch": 5465, + "▁voy": 5466, + "▁illust": 5467, + "▁worship": 5468, + "▁adam": 5469, + "▁phr": 5470, + "▁principal": 5471, + "▁hit": 5472, + "▁spend": 5473, + "▁stands": 5474, + "▁respons": 5475, + "▁ay": 5476, + "▁haw": 5477, + "▁whist": 5478, + "▁arrest": 5479, + "▁kinds": 5480, + "▁require": 5481, + "▁described": 5482, + "▁lit": 5483, + "▁precise": 5484, + "▁proposed": 5485, + "▁produce": 5486, + "▁utterly": 5487, + "ulse": 5488, + "▁novel": 5489, + "▁blame": 5490, + "▁credit": 5491, + "▁pause": 5492, + "osen": 5493, + "▁household": 5494, + "▁armed": 5495, + "▁follows": 5496, + "upon": 5497, + "▁approach": 5498, + "▁ninety": 5499, + "▁pir": 5500, + "▁flore": 5501, + "ivity": 5502, + "▁refuse": 5503, + "▁sensible": 5504, + "choly": 5505, + "▁national": 5506, + "▁grie": 5507, + "▁reven": 5508, + "▁let's": 5509, + "▁delightful": 5510, + "▁extremely": 5511, + "▁melancholy": 5512, + "uing": 5513, + "▁enorm": 5514, + "cles": 5515, + "▁slightly": 5516, + "▁sacred": 5517, + "▁recognized": 5518, + "▁mystery": 5519, + "▁gri": 5520, + "▁compre": 5521, + "▁distress": 5522, + "▁warri": 5523, + "▁useless": 5524, + "▁trif": 5525, + "▁mounted": 5526, + "▁philip": 5527, + "▁energy": 5528, + "▁explanation": 5529, + "▁cas": 5530, + "atory": 5531, + "▁pour": 5532, + "▁ric": 5533, + "▁chosen": 5534, + "▁everyone": 5535, + "umbled": 5536, + "▁apr": 5537, + "▁cam": 5538, + "▁proc": 5539, + "▁resumed": 5540, + "▁appreci": 5541, + "▁alexand": 5542, + "▁aven": 5543, + "▁wing": 5544, + "▁intense": 5545, + "▁highly": 5546, + "▁lucy": 5547, + "▁solid": 5548, + "▁departure": 5549, + "▁agreeable": 5550, + "▁exercise": 5551, + "apped": 5552, + "▁ward": 5553, + "▁bud": 5554, + "▁dwell": 5555, + "icate": 5556, + "▁dece": 5557, + "▁teacher": 5558, + "tending": 5559, + "▁max": 5560, + "▁request": 5561, + "▁unexpected": 5562, + "▁joseph": 5563, + "col": 5564, + "▁leap": 5565, + "▁victim": 5566, + "▁sighed": 5567, + "▁forces": 5568, + "chie": 5569, + "▁feed": 5570, + "▁sport": 5571, + "▁drift": 5572, + "▁wedding": 5573, + "▁british": 5574, + "sec": 5575, + "▁attitude": 5576, + "▁vision": 5577, + "▁pipe": 5578, + "▁tow": 5579, + "▁halt": 5580, + "▁manners": 5581, + "▁tend": 5582, + "▁flood": 5583, + "▁commission": 5584, + "▁guide": 5585, + "▁observe": 5586, + "▁concern": 5587, + "▁rush": 5588, + "▁affected": 5589, + "fall": 5590, + "▁stret": 5591, + "▁coach": 5592, + "▁poison": 5593, + "▁directed": 5594, + "▁medic": 5595, + "▁gest": 5596, + "▁echo": 5597, + "▁younger": 5598, + "▁confusion": 5599, + "▁continue": 5600, + "▁parli": 5601, + "▁absor": 5602, + "▁centre": 5603, + "conom": 5604, + "▁horrible": 5605, + "rison": 5606, + "▁bol": 5607, + "▁bath": 5608, + "▁gown": 5609, + "▁bye": 5610, + "▁aloud": 5611, + "▁suppl": 5612, + "▁profound": 5613, + "▁err": 5614, + "▁cheerful": 5615, + "worth": 5616, + "▁sentence": 5617, + "▁mistaken": 5618, + "▁torn": 5619, + "▁figures": 5620, + "▁accompanied": 5621, + "▁catherine": 5622, + "▁econom": 5623, + "▁atm": 5624, + "▁shaking": 5625, + "umber": 5626, + "▁council": 5627, + "lot": 5628, + "▁asce": 5629, + "ilities": 5630, + "▁spar": 5631, + "▁ends": 5632, + "▁straw": 5633, + "▁knights": 5634, + "▁atmosp": 5635, + "▁shade": 5636, + "▁brow": 5637, + "▁spark": 5638, + "▁rested": 5639, + "▁sentiment": 5640, + "▁recovered": 5641, + "▁subjects": 5642, + "▁duties": 5643, + "▁composed": 5644, + "▁swept": 5645, + "▁reality": 5646, + "▁singular": 5647, + "▁transp": 5648, + "▁locked": 5649, + "▁louis": 5650, + "▁assistance": 5651, + "▁wake": 5652, + "rem": 5653, + "▁sovere": 5654, + "▁unp": 5655, + "▁loves": 5656, + "▁absurd": 5657, + "▁souls": 5658, + "▁immediate": 5659, + "▁riding": 5660, + "▁connection": 5661, + "▁cheek": 5662, + "▁magnificent": 5663, + "▁ere": 5664, + "▁sugar": 5665, + "▁plans": 5666, + "▁prud": 5667, + "▁dise": 5668, + "▁adj": 5669, + "▁leaning": 5670, + "▁surrounded": 5671, + "▁we've": 5672, + "▁orn": 5673, + "▁roll": 5674, + "▁proble": 5675, + "▁strict": 5676, + "▁awake": 5677, + "▁praise": 5678, + "▁convinced": 5679, + "▁rele": 5680, + "▁frame": 5681, + "▁breaking": 5682, + "▁curtain": 5683, + "▁stayed": 5684, + "▁divided": 5685, + "▁craw": 5686, + "▁inclined": 5687, + "▁previous": 5688, + "ault": 5689, + "omen": 5690, + "▁stair": 5691, + "▁sees": 5692, + "▁pron": 5693, + "board": 5694, + "▁complex": 5695, + "▁prayer": 5696, + "▁pierre": 5697, + "▁unfortunate": 5698, + "gs": 5699, + "▁genius": 5700, + "▁increase": 5701, + "▁sufficiently": 5702, + "▁banks": 5703, + "▁revolution": 5704, + "▁southern": 5705, + "ki": 5706, + "oke": 5707, + "▁aust": 5708, + "edy": 5709, + "▁ling": 5710, + "▁countess": 5711, + "▁sleeping": 5712, + "▁devoted": 5713, + "▁utmost": 5714, + "▁market": 5715, + "▁bosom": 5716, + "▁bark": 5717, + "▁cath": 5718, + "alt": 5719, + "char": 5720, + "▁clock": 5721, + "▁handker": 5722, + "▁admin": 5723, + "▁senses": 5724, + "▁ident": 5725, + "▁midnight": 5726, + "▁connected": 5727, + "▁permitted": 5728, + "▁hid": 5729, + "▁fil": 5730, + "▁faced": 5731, + "▁gift": 5732, + "▁chat": 5733, + "▁brid": 5734, + "▁norther": 5735, + "▁horiz": 5736, + "▁college": 5737, + "▁handkerchief": 5738, + "isions": 5739, + "▁rebe": 5740, + "▁polic": 5741, + "▁announced": 5742, + "ounce": 5743, + "▁nons": 5744, + "▁nurs": 5745, + "ales": 5746, + "▁fleet": 5747, + "▁ragged": 5748, + "▁coffe": 5749, + "▁parties": 5750, + "▁delay": 5751, + "▁sounded": 5752, + "▁cities": 5753, + "▁wash": 5754, + "▁appointed": 5755, + "▁nights": 5756, + "▁instit": 5757, + "▁god's": 5758, + "▁striking": 5759, + "▁guns": 5760, + "▁astonishment": 5761, + "▁merchant": 5762, + "▁parliament": 5763, + "nal": 5764, + "▁ax": 5765, + "atched": 5766, + "▁pil": 5767, + "▁page": 5768, + "iform": 5769, + "▁plate": 5770, + "▁thirst": 5771, + "▁negro": 5772, + "▁ruin": 5773, + "▁inhabitants": 5774, + "win": 5775, + "arf": 5776, + "▁rib": 5777, + "▁addition": 5778, + "▁argument": 5779, + "bour": 5780, + "▁tad": 5781, + "▁scen": 5782, + "▁guests": 5783, + "▁wondering": 5784, + "▁acquainted": 5785, + "▁intent": 5786, + "pless": 5787, + "▁destroyed": 5788, + "▁coffee": 5789, + "inent": 5790, + "lebr": 5791, + "▁render": 5792, + "▁sob": 5793, + "▁demon": 5794, + "▁desir": 5795, + "uding": 5796, + "▁gets": 5797, + "▁assure": 5798, + "▁raise": 5799, + "▁sharply": 5800, + "▁privile": 5801, + "▁alarm": 5802, + "▁machine": 5803, + "fied": 5804, + "▁contract": 5805, + "▁deliber": 5806, + "▁drown": 5807, + "▁afterward": 5808, + "▁guest": 5809, + "▁conclusion": 5810, + "▁risk": 5811, + "▁ignorant": 5812, + "bury": 5813, + "kind": 5814, + "▁pian": 5815, + "an's": 5816, + "uries": 5817, + "▁soil": 5818, + "▁refer": 5819, + "▁commanded": 5820, + "▁practical": 5821, + "▁toss": 5822, + "▁offe": 5823, + "▁beheld": 5824, + "▁arist": 5825, + "▁quarters": 5826, + "▁degrees": 5827, + "▁fisher": 5828, + "▁nonsense": 5829, + "▁mc": 5830, + "isp": 5831, + "▁mechan": 5832, + "keep": 5833, + "▁doubtless": 5834, + "▁violence": 5835, + "▁neglect": 5836, + "▁folk": 5837, + "liness": 5838, + "▁bul": 5839, + "▁easter": 5840, + "▁loft": 5841, + "▁contained": 5842, + "▁reflection": 5843, + "▁celebr": 5844, + "▁leaf": 5845, + "▁concluded": 5846, + "▁district": 5847, + "iation": 5848, + "rs": 5849, + "▁scient": 5850, + "▁he'd": 5851, + "▁scorn": 5852, + "▁crack": 5853, + "▁steep": 5854, + "▁muttered": 5855, + "▁establish": 5856, + "▁darling": 5857, + "▁andrew": 5858, + "▁chim": 5859, + "quis": 5860, + "▁quality": 5861, + "▁polly": 5862, + "▁check": 5863, + "▁craft": 5864, + "▁travell": 5865, + "▁universal": 5866, + "inate": 5867, + "▁cig": 5868, + "atives": 5869, + "omp": 5870, + "uten": 5871, + "▁jac": 5872, + "▁job": 5873, + "▁subm": 5874, + "▁reader": 5875, + "▁leis": 5876, + "▁emph": 5877, + "▁surround": 5878, + "ox": 5879, + "pent": 5880, + "itate": 5881, + "▁extended": 5882, + "▁lev": 5883, + "▁overt": 5884, + "▁retired": 5885, + "▁puzz": 5886, + "uable": 5887, + "▁libr": 5888, + "▁chin": 5889, + "▁spl": 5890, + "▁realized": 5891, + "▁causes": 5892, + "▁punishment": 5893, + "▁physic": 5894, + "▁leisure": 5895, + "can": 5896, + "▁wave": 5897, + "▁shake": 5898, + "▁charm": 5899, + "▁belonged": 5900, + "mber": 5901, + "▁bones": 5902, + "▁gas": 5903, + "▁range": 5904, + "▁prec": 5905, + "▁smell": 5906, + "▁maybe": 5907, + "▁invited": 5908, + "▁troubles": 5909, + "▁tables": 5910, + "anch": 5911, + "icip": 5912, + "▁june": 5913, + "▁abo": 5914, + "▁ages": 5915, + "▁anywhere": 5916, + "ffin": 5917, + "▁drunk": 5918, + "▁properly": 5919, + "▁local": 5920, + "▁improve": 5921, + "▁atmosphere": 5922, + "▁dir": 5923, + "▁he'll": 5924, + "▁reb": 5925, + "▁rang": 5926, + "▁compass": 5927, + "▁lieuten": 5928, + "▁leaned": 5929, + "▁firmly": 5930, + "▁nations": 5931, + "▁hay": 5932, + "▁wept": 5933, + "▁ral": 5934, + "▁conven": 5935, + "▁uniform": 5936, + "▁julia": 5937, + "eem": 5938, + "rass": 5939, + "▁track": 5940, + "▁commer": 5941, + "▁bushes": 5942, + "▁obsc": 5943, + "▁sorts": 5944, + "▁difficulties": 5945, + "▁intellectual": 5946, + "▁introduced": 5947, + "mith": 5948, + "▁tro": 5949, + "iday": 5950, + "▁rendered": 5951, + "▁rout": 5952, + "add": 5953, + "▁plun": 5954, + "▁throwing": 5955, + "▁humble": 5956, + "▁polite": 5957, + "▁numerous": 5958, + "▁movements": 5959, + "▁successful": 5960, + "▁candle": 5961, + "▁separate": 5962, + "▁protection": 5963, + "▁thomas": 5964, + "▁enormous": 5965, + "▁unb": 5966, + "▁repub": 5967, + "▁sunsh": 5968, + "▁descended": 5969, + "▁unusual": 5970, + "ived": 5971, + "▁blaz": 5972, + "▁shows": 5973, + "▁simpl": 5974, + "▁cattle": 5975, + "▁crept": 5976, + "▁astonished": 5977, + "▁deserted": 5978, + "▁lap": 5979, + "arse": 5980, + "▁nearest": 5981, + "udes": 5982, + "▁entering": 5983, + "▁ideal": 5984, + "standing": 5985, + "nders": 5986, + "▁sore": 5987, + "aine": 5988, + "▁clos": 5989, + "▁ours": 5990, + "▁wherever": 5991, + "▁term": 5992, + "▁visited": 5993, + "▁calcul": 5994, + "ds": 5995, + "▁base": 5996, + "▁gates": 5997, + "▁stamp": 5998, + "▁liber": 5999, + "▁official": 6000, + "▁erect": 6001, + "▁alt": 6002, + "elia": 6003, + "▁harmon": 6004, + "▁painful": 6005, + "▁burned": 6006, + "▁republic": 6007, + "uer": 6008, + "▁lately": 6009, + "▁ital": 6010, + "amm": 6011, + "▁tear": 6012, + "▁actions": 6013, + "▁final": 6014, + "▁startled": 6015, + "▁sensation": 6016, + "▁fatal": 6017, + "olic": 6018, + "▁flash": 6019, + "▁appet": 6020, + "▁stronger": 6021, + "▁numbers": 6022, + "▁gratitude": 6023, + "▁female": 6024, + "▁western": 6025, + "lest": 6026 + }, + "merges": [ + "▁ t", + "h e", + "▁ a", + "▁t he", + "i n", + "▁ s", + "▁ w", + "▁ o", + "r e", + "n d", + "▁ b", + "▁ h", + "e r", + "▁ m", + "▁ i", + "o u", + "▁ c", + "▁ f", + "a t", + "e d", + "▁a nd", + "e n", + "▁t o", + "▁o f", + "o n", + "i s", + "▁ d", + "in g", + "▁t h", + "▁ p", + "▁ he", + "o r", + "▁ l", + "e s", + "▁ in", + "l l", + "i t", + "a r", + "a s", + "a n", + "▁ n", + "▁ g", + "o m", + "▁b e", + "▁h a", + "▁ e", + "l e", + "o t", + "▁ y", + "u t", + "o w", + "i c", + "▁w h", + "▁i t", + "l d", + "v e", + "▁th at", + "l y", + "▁w as", + "i d", + "s e", + "s t", + "▁o n", + "g h", + "en t", + "▁ re", + "▁y ou", + "i m", + "c e", + "▁ u", + "v er", + "i on", + "▁a s", + "e t", + "▁f or", + "a y", + "▁h is", + "▁w e", + "it h", + "a l", + "i r", + "▁ r", + "▁w ith", + "▁s t", + "a d", + "u r", + "gh t", + "▁a n", + "▁he r", + "▁n ot", + "▁i s", + "▁ha d", + "t er", + "he r", + "a c", + "a m", + "▁a t", + "o o", + "▁b ut", + "ou ld", + "▁s he", + "▁ k", + "▁s e", + "▁s a", + "▁s h", + "▁f r", + "▁h im", + "▁s o", + "▁m e", + "i ll", + "a in", + "▁s u", + "i ght", + "c h", + "re d", + "c t", + "a ll", + "r o", + "k e", + "es s", + "i l", + "' s", + "o re", + "▁d e", + "▁m y", + "▁the y", + "▁w he", + "▁a ll", + "ic h", + "▁n e", + "r i", + "▁b y", + "▁ha ve", + "om e", + "p p", + "▁th is", + "▁l i", + "▁d o", + "▁c on", + "u s", + "▁wh ich", + "▁c h", + "u l", + "q u", + "▁ j", + "▁u p", + "▁sa id", + "▁fr om", + "ar d", + "g e", + "▁o r", + "▁ v", + "▁on e", + "▁n o", + "t h", + "▁e x", + "▁we re", + "▁the re", + "p e", + "a nd", + "es t", + "▁m an", + "▁wh o", + "b le", + "i e", + "▁a l", + "an t", + "re s", + "ou s", + "u st", + "ver y", + "at ion", + "▁f e", + "▁the m", + "l f", + "▁whe n", + "n t", + "am e", + "in d", + "r a", + "▁g o", + "er s", + "as t", + "f e", + "oo d", + "▁k n", + "▁in t", + "is t", + "▁a re", + "ar t", + "ou t", + "▁w ould", + "▁l e", + "▁wh at", + "o s", + "▁the ir", + "on g", + "ou r", + "▁i f", + "▁c om", + "ou nd", + "▁a b", + "▁o ut", + "▁w or", + "e m", + "▁w ill", + "a k", + "▁m is", + "at e", + "o l", + "u m", + "u n", + "it t", + "ou gh", + "k ed", + "i g", + "a p", + "on e", + "▁be en", + "ow n", + "i ve", + "▁the n", + "▁b r", + "v en", + "i f", + "▁a r", + "' t", + "se lf", + "▁t r", + "▁p l", + "▁r o", + "▁p r", + "t her", + "re at", + "▁u n", + "▁a f", + "▁s p", + "▁ qu", + "▁p ro", + "it y", + "he d", + "▁t w", + "▁a g", + "▁c ould", + "o st", + "a ce", + "or t", + "u re", + "a ke", + "▁a m", + "ac k", + "▁an y", + "▁s ome", + "▁you r", + "▁m ore", + "▁c an", + "a u", + "▁t im", + "e p", + "a g", + "▁ en", + "c k", + "▁int o", + "▁c l", + "r y", + "▁n ow", + "h ing", + "nd er", + "a re", + "▁ very", + "▁g r", + "e l", + "o se", + "▁l oo", + "▁b o", + "v ed", + "o p", + "▁o ther", + "▁d id", + "an ce", + "▁th an", + "itt le", + "▁l ittle", + "in e", + "i es", + "w ay", + "it e", + "▁li ke", + "id e", + "▁l o", + "as s", + "▁b l", + "a ble", + "ur n", + "ou ght", + "▁kn ow", + "ot her", + "▁tim e", + "▁i m", + "▁d is", + "▁u s", + "▁c o", + "f ore", + "▁h ow", + "▁t e", + "en ce", + "▁d ay", + "▁a d", + "ad e", + "ic e", + "▁ab out", + "▁se e", + "▁o ver", + "p t", + "c c", + "▁to o", + "in k", + "▁f l", + "w n", + "▁g reat", + "▁af ter", + "p l", + "d e", + "▁p er", + "m ent", + "▁ag ain", + "▁up on", + "▁ha nd", + "a b", + "▁h as", + "re e", + "is h", + "c i", + "▁on ly", + "all y", + "▁we ll", + "▁sh ould", + "▁p o", + "▁m ar", + "res s", + "▁s ay", + "▁g ood", + "at her", + "▁tw o", + "ing s", + "▁p e", + "ou nt", + "▁o ur", + "i re", + "v ing", + "▁d own", + "ar s", + "er t", + "w e", + "▁be fore", + "i le", + "v es", + "▁a pp", + "▁e very", + "▁it s", + "▁o ld", + "▁th r", + "▁m u", + "▁m ade", + "i ed", + "ic k", + "▁l ong", + "a ge", + "t e", + "f t", + "▁whe re", + "an g", + "▁ne ver", + "▁m ust", + "▁p re", + "▁s m", + "f ul", + "▁su ch", + "u ll", + "▁st r", + "ion s", + "▁of f", + "▁s c", + "▁c ame", + "i ous", + "u e", + "▁mis s", + "w ard", + "i ld", + "▁f ir", + "▁e ven", + "▁u nder", + "ac t", + "▁the se", + "▁c ome", + "▁p art", + "▁f o", + "at ed", + "n ess", + "▁re m", + "or d", + "▁be c", + "t y", + "▁m ay", + "▁mu ch", + "▁th ink", + "p er", + "▁w ay", + "▁mis ter", + "l ed", + "▁l et", + "or n", + "▁e y", + "▁g l", + "▁con t", + "▁th ought", + "▁loo k", + "e ct", + "▁s pe", + "is e", + "▁b ack", + "▁be t", + "ad y", + "▁y e", + "an s", + "ac h", + "▁he re", + "▁j ust", + "re n", + "▁fir st", + "▁h o", + "▁o wn", + "▁d es", + "▁o b", + "ri ed", + "u d", + "ar y", + "▁w ent", + "▁m o", + "▁him self", + "▁m en", + "a ir", + "c l", + "a ve", + "at h", + "f f", + "▁s l", + "c o", + "on 't", + "ll ow", + "▁c r", + "▁re s", + "▁i '", + "▁m ight", + "i ly", + "▁se em", + "in t", + "i p", + "▁be g", + "ou se", + "an c", + "n 't", + "▁w at", + "▁thr ough", + "▁com p", + "b er", + "▁a way", + "▁c ar", + "▁e m", + "▁g et", + "▁im p", + "▁he ad", + "os s", + "▁li fe", + "▁be l", + "▁with out", + "▁m ost", + "▁p ass", + "▁m ake", + "▁con s", + "en ed", + "▁s om", + "▁t urn", + "a v", + "n g", + "▁sh all", + "▁a cc", + "▁th ose", + "▁p res", + "▁ey es", + "▁h ouse", + "i z", + "▁som et", + "▁j o", + "▁st ill", + "▁c all", + "▁n ight", + "he s", + "▁o p", + "au se", + "▁w om", + "▁l ast", + "k s", + "l ess", + "a red", + "▁com m", + "▁d on't", + "▁te ll", + "▁ ent", + "▁not hing", + "▁ne w", + "ig n", + "▁t ake", + "▁be ing", + "▁man y", + "▁wor d", + "on s", + "▁f ound", + "▁re t", + "as e", + "▁e ar", + "▁wh ile", + "▁at t", + "or y", + "i x", + "▁s er", + "▁sa w", + "▁p ut", + "n e", + "ot h", + "ie nd", + "▁pe op", + "▁w r", + "▁you ng", + "ar k", + "d y", + "ak ing", + "l es", + "▁c ount", + "▁on ce", + "▁fr iend", + "▁l a", + "en s", + "▁peop le", + "pe ct", + "or s", + "fe ct", + "▁m at", + "in ce", + "i ble", + "e red", + "▁ro om", + "▁th ree", + "▁y et", + "a il", + "▁s ame", + "▁f ather", + "▁r ight", + "▁ch ild", + "▁c our", + "i gh", + "▁pl ace", + "▁an other", + "ul t", + "i v", + "it ion", + "▁in d", + "▁w ant", + "▁th ough", + "▁n or", + "▁f ar", + "▁k ing", + "▁ha pp", + "▁he art", + "▁f ace", + "▁e nd", + "▁e ver", + "▁n at", + "th ing", + "▁lo ve", + "g et", + "▁too k", + "▁d ist", + "e ver", + "i an", + "▁h u", + "e w", + "▁ar m", + "▁in st", + "m an", + "▁wor k", + "▁l ight", + "▁ch ar", + "▁p le", + "ic t", + "▁s et", + "▁a c", + "▁loo ked", + "▁miss us", + "▁as ked", + "▁m ind", + "▁y es", + "▁su pp", + "▁int e", + "▁re p", + "c ess", + "ent ly", + "▁le ft", + "g g", + "ert ain", + "▁k e", + "is hed", + "u b", + "▁p ers", + "way s", + "▁th ings", + "al k", + "ir l", + "▁m om", + "▁s ir", + "▁w a", + "▁mom ent", + "ation s", + "▁s at", + "se l", + "▁f ind", + "ow er", + "i a", + "v ent", + "re w", + "▁wor ld", + "j ect", + "▁g ive", + "▁c ap", + "▁wh y", + "s o", + "▁g u", + "▁m other", + "▁g en", + "▁s w", + "▁al ways", + "d er", + "l t", + "l ing", + "▁an s", + "pp ed", + "▁so on", + "▁a ct", + "▁for m", + "▁e l", + "d d", + "▁he ard", + "re t", + "▁th ing", + "▁somet hing", + "▁seem ed", + "▁su b", + "▁do or", + "an ge", + "▁g irl", + "c ed", + "▁app e", + "it her", + "▁w ind", + "▁bec ause", + "▁d if", + "▁m on", + "s s", + "▁go ing", + "▁to ld", + "or m", + "▁h ome", + "ain ed", + "▁g ot", + "▁w ar", + "▁go d", + "au ght", + "▁g i", + "▁en g", + "▁s ur", + "n ing", + "▁hand s", + "▁wom an", + "▁fo llow", + "l and", + "a ut", + "▁v o", + "▁fe el", + "▁re l", + "▁p oss", + "c hed", + "ic al", + "p le", + "p h", + "▁bo y", + "▁ret urn", + "▁re g", + "▁re st", + "oo k", + "▁kn ew", + "n er", + "▁e ach", + "▁o h", + "▁s il", + "▁k ind", + "▁ex p", + "▁m a", + "▁c le", + "▁he l", + "i ver", + "t ing", + "▁de l", + "u al", + "▁in f", + "▁as s", + "▁wat er", + "▁con f", + "▁b re", + "▁w o", + "ce pt", + "▁bel ie", + "▁c ertain", + "▁again st", + "▁h ard", + "▁p h", + "r ow", + "▁u nt", + "▁ye ars", + "▁qu ite", + "▁s ide", + "in ess", + "in ed", + "▁ne ar", + "▁h or", + "ter s", + "i red", + "oo l", + "▁f our", + "▁fe w", + "▁d one", + "i er", + "▁c he", + "re st", + "it ed", + "m ost", + "▁bet ter", + "▁ha lf", + "▁m in", + "▁t re", + "p s", + "▁al so", + "▁c are", + "o ck", + "u ck", + "ou b", + "▁beg an", + "ull y", + "▁en ough", + "is ed", + "r u", + "▁ha ving", + "▁se en", + "▁gen er", + "▁l ady", + "▁d ra", + "▁h um", + "ap s", + "ot t", + "▁p ur", + "ak en", + "ro ss", + "y ing", + "▁t er", + "▁h our", + "▁in de", + "an k", + "▁call ed", + "i al", + "as on", + "▁be h", + "▁do es", + "▁who le", + "▁m orn", + "▁turn ed", + "▁ple as", + "▁st e", + "▁re f", + "▁g ave", + "en se", + "▁o cc", + "i b", + "▁cour se", + "▁in s", + "re am", + "get her", + "ut h", + "▁b oth", + "▁s ou", + "▁c ur", + "▁ad d", + "e en", + "▁c ol", + "▁re ad", + "we en", + "sel ves", + "▁am ong", + "▁bet ween", + "▁in c", + "▁ke ep", + "▁be aut", + "ul ar", + "▁po or", + "▁it 's", + "▁su re", + "▁morn ing", + "▁wh ite", + "g ed", + "▁n ame", + "▁de ar", + "▁to ward", + "ut e", + "▁sm all", + "▁wh om", + "▁re pl", + "▁s k", + "▁l ar", + "▁fe lt", + "b o", + "os ed", + "at ing", + "▁my self", + "▁op en", + "▁s ix", + "▁her self", + "▁how ever", + "▁b u", + "o nd", + "ain t", + "x t", + "▁f ore", + "▁in ter", + "▁e v", + "▁h igh", + "ct ion", + "▁hu nd", + "▁st ood", + "▁hund red", + "as ter", + "▁t ra", + "▁sh ow", + "▁s ent", + "i fe", + "▁r ound", + "▁s im", + "▁d r", + "▁g ra", + "▁word s", + "▁day s", + "▁al most", + "a le", + "ve l", + "▁po int", + "ent s", + "▁g re", + "▁e ight", + "c es", + "at es", + "dd en", + "▁f am", + "▁st and", + "▁b us", + "▁l and", + "▁ ed", + "▁me an", + "un g", + "h aps", + "▁su n", + "u res", + "▁s ince", + "i et", + "ir d", + "▁per haps", + "n ed", + "▁s le", + "is s", + "▁b est", + "▁su dden", + "▁d ark", + "▁repl ied", + "▁vo ice", + "▁m et", + "▁any thing", + "▁t ill", + "▁under st", + "▁b ar", + "it s", + "▁unt il", + "in s", + "ou d", + "▁bl ack", + "▁b ro", + "▁he ar", + "▁look ing", + "▁c ried", + "▁you '", + "▁f act", + "am p", + "▁pr in", + "▁l ess", + "▁l ay", + "▁ne xt", + "▁la w", + "u p", + "▁p ower", + "▁pro p", + "n ot", + "re nt", + "▁br ought", + "ate ly", + "ent y", + "▁count ry", + "▁hel p", + "al s", + "▁qu est", + "m ed", + "▁u se", + "▁v is", + "▁s n", + "▁i' m", + "f ully", + "▁sp o", + "▁to gether", + "▁ne ed", + "▁a ir", + "▁ad v", + "▁pers on", + "▁inde ed", + "▁cont in", + "▁un c", + "one y", + "▁g ent", + "▁pres ent", + "▁a w", + "▁p ar", + "ow s", + "u red", + "▁f ull", + "t ain", + "▁r un", + "▁r ather", + "▁i de", + "▁co nd", + "nd ed", + "▁l at", + "▁s y", + "b e", + "d u", + "▁h ar", + "▁fe et", + "▁f in", + "et er", + "▁f all", + "ce i", + "▁f ive", + "▁m il", + "▁b ed", + "o c", + "▁do ct", + "▁inte rest", + "ress ed", + "▁mat ter", + "▁l ord", + "▁g one", + "▁ es", + "f ort", + "▁de ath", + "▁w ife", + "▁ser v", + "▁p at", + "er ing", + "oub t", + "▁ad m", + "▁t alk", + "▁t aken", + "▁ar t", + "▁t ri", + "▁other s", + "▁ho pe", + "as h", + "a z", + "▁ex t", + "▁can not", + "ie f", + "▁spe ak", + "▁l au", + "▁them selves", + "▁al ong", + "▁d ire", + "o ve", + "m b", + "p r", + "▁b es", + "▁c ou", + "▁m or", + "t en", + "▁gent le", + "ur ing", + "▁f ire", + "▁lar ge", + "▁p ol", + "▁c at", + "▁s we", + "ent ion", + "ver s", + "▁th us", + "a pp", + "▁se c", + "▁pl ay", + "▁re al", + "▁pr om", + "ment s", + "we red", + "ie ld", + "ain s", + "is on", + "ac hed", + "▁th ou", + "▁re ason", + "▁th ous", + "it ing", + "▁br other", + "ak es", + "▁thous and", + "on t", + "▁m oney", + "▁rem em", + "▁de p", + "▁ans wered", + "▁tr ue", + "▁child ren", + "▁beh ind", + "o y", + "▁s ound", + "ant s", + "ab ly", + "▁w ood", + "us ed", + "▁de c", + "▁who se", + "o d", + "▁e le", + "▁tw enty", + "▁r a", + "it u", + "▁belie ve", + "▁wo nder", + "en e", + "▁in v", + "▁h on", + "ar ing", + "s h", + "u ed", + "▁su ff", + "▁o pp", + "▁d oubt", + "▁re c", + "t on", + "▁ho ld", + "▁dif fe", + "▁pass ed", + "▁c or", + "m e", + "id ed", + "it ies", + "▁m er", + "▁s ing", + "▁nat ure", + "▁al one", + "▁de ad", + "▁p ri", + "k en", + "l ic", + "▁re d", + "▁b ur", + "ac es", + "▁cl ose", + "▁go ld", + "▁st art", + "▁h ur", + "▁f ur", + "o g", + "anc es", + "▁as k", + "▁doct or", + "▁s on", + "▁gr ound", + "w er", + "et s", + "▁se a", + "▁str ong", + "▁le ave", + "▁comp an", + "▁i' ll", + "er y", + "c y", + "ill ed", + "ep t", + "id es", + "t le", + "▁c e", + "▁ob s", + "bo dy", + "▁fe ll", + "▁s ign", + "co nd", + "▁m ount", + "▁f air", + "▁gi ven", + "▁there fore", + "an e", + "▁i r", + "▁de ep", + "if ul", + "f ic", + "y s", + "▁of ten", + "▁bo dy", + "u nt", + "▁sh ort", + "▁t em", + "▁f a", + "▁m aster", + "▁ear th", + "▁p ap", + "ce ed", + "▁st re", + "▁se cond", + "▁for t", + "b ed", + "g th", + "ow ed", + "▁hor se", + "id d", + "▁m ad", + "u ally", + "▁p a", + "▁ch r", + "▁or der", + "▁t en", + "ve red", + "▁con st", + "▁w ish", + "▁f if", + "▁e as", + "▁c ir", + "▁d ro", + "a im", + "he n", + "▁c a", + "▁re ally", + "re ad", + "cei ved", + "▁i ll", + "▁fe ar", + "os ition", + "▁underst and", + "▁sp ir", + "▁l ist", + "▁ab s", + "▁sp r", + "ac ed", + "▁quest ion", + "ang er", + "▁every thing", + "aught er", + "▁af f", + "▁w all", + "▁com ing", + "ch ing", + "re ady", + "id er", + "▁ab ove", + "▁pr ince", + "▁al ready", + "▁le ast", + "▁re co", + "▁ex pl", + "▁st ep", + "▁us ed", + "▁r u", + "▁it self", + "is ter", + "▁ne cess", + "▁c ase", + "▁ar ound", + "h n", + "▁sou l", + "▁sudden ly", + "g er", + "▁l ad", + "▁even ing", + "▁m ag", + "▁gener al", + "▁n um", + "im es", + "▁kn own", + "▁w al", + "▁qu ick", + "iz ed", + "▁m us", + "▁s ch", + "▁cap tain", + "▁that 's", + "if ic", + "▁whe ther", + "▁le ar", + "g n", + "▁with in", + "m en", + "▁li ve", + "ver n", + "▁tim es", + "▁ex pect", + "▁st ate", + "▁friend s", + "▁br ing", + "▁s ort", + "▁wom en", + "▁t able", + "▁me et", + "▁jo hn", + "▁cir c", + "▁su m", + "▁return ed", + "il ed", + "▁d ri", + "▁he ld", + "▁ex c", + "▁b ig", + "▁say s", + "▁per fect", + "▁le a", + "▁obs er", + "▁el se", + "▁d uring", + "id ent", + "▁h us", + "t ed", + "▁beaut iful", + "▁cle ar", + "▁e ither", + "▁to wn", + "▁s ight", + "▁l ost", + "▁sle ep", + "▁me ans", + "▁fo ot", + "▁c ut", + "▁c al", + "▁k ept", + "▁r an", + "i ence", + "▁pro f", + "te red", + "he re", + "et y", + "▁fe llow", + "▁can 't", + "▁m ist", + "▁p ast", + "▁d ream", + "ag es", + "▁bec ame", + "▁pre t", + "▁dis c", + "▁b ad", + "▁m aking", + "ut ion", + "▁ob ject", + "▁toward s", + "▁l ow", + "u ght", + "▁de v", + "▁hum an", + "▁man ner", + "▁str ange", + "▁ye ar", + "o ld", + "i ent", + "in es", + "▁se ver", + "m on", + "▁an n", + "air s", + "c hes", + "▁c ity", + "▁somet imes", + "' d", + "▁ro se", + "▁ est", + "il ity", + "▁w alk", + "▁re ady", + "▁p al", + "▁le g", + "▁ro ad", + "i ans", + "ci ous", + "▁c orn", + "▁th y", + "▁co ld", + "ll y", + "ious ly", + "l ish", + "▁st ra", + "m er", + "▁b at", + "ow ing", + "ie w", + "▁chr ist", + "▁s qu", + "▁tr uth", + "c ri", + "ll ed", + "▁th ir", + "▁did n't", + "b ert", + "▁so ci", + "b r", + "▁b it", + "▁sub ject", + "▁sh ip", + "▁m ur", + "▁app ro", + "▁p ie", + "▁ans wer", + "▁f ree", + "▁bus iness", + "▁ ut", + "a pe", + "▁appe ar", + "▁r iver", + "▁st o", + "▁c ast", + "▁fam ily", + "▁j ud", + "▁ex cl", + "▁let ter", + "ing ly", + "ri e", + "▁ha ir", + "ot e", + "▁arm s", + "▁bec ome", + "er n", + "ou ble", + "▁diffe rent", + "▁v al", + "f fect", + "▁nat ur", + "▁poss ible", + "▁sever al", + "▁f ine", + "a h", + "▁le ad", + "▁for g", + "▁exp ress", + "l i", + "▁su s", + "▁gl ad", + "o on", + "▁ar ri", + "▁bl ood", + "itt ing", + "▁qu iet", + "ren ce", + "▁ide a", + "▁a ble", + "itt ed", + "st er", + "▁char ac", + "▁beg in", + "▁ch ur", + "▁t ou", + "▁st ory", + "▁ey e", + "b and", + "at ive", + "▁gr and", + "▁cons ider", + "▁ac ross", + "▁p en", + "▁ex cept", + "▁f re", + "▁w in", + "▁e qu", + "et h", + "▁c ent", + "is f", + "▁part ic", + "▁dif fic", + "▁wind ow", + "▁sur pr", + "ll ect", + "▁pro v", + "▁dire ct", + "▁con c", + "e y", + "a w", + "▁go vern", + "▁dis co", + "▁w ild", + "▁do g", + "▁fl o", + "▁so ft", + "te en", + "▁c ross", + "as ed", + "▁e ffect", + "▁s or", + "▁long er", + "▁he n", + "▁follow ed", + "▁so ld", + "▁the e", + "▁p ub", + "▁hus band", + "ard s", + "ant ly", + "b y", + "▁a p", + "▁supp ose", + "▁res pect", + "t s", + "▁h ast", + "▁s al", + "▁comp le", + "▁he av", + "▁happ y", + "▁r ich", + "▁c reat", + "un e", + "▁t aking", + "▁re qu", + "▁st ay", + "▁spo ke", + "▁d aughter", + "▁we e", + "▁ ve", + "▁d u", + "▁gre en", + "▁an im", + "▁d in", + "' ll", + "▁b ird", + "al th", + "▁me re", + "▁g ard", + "n y", + "le y", + "▁poss ess", + "em pt", + "▁re ached", + "▁appe ared", + "o v", + "▁ex ist", + "in ation", + "▁pret ty", + "▁remem ber", + "▁he a", + "▁op ened", + "▁to m", + "ang ed", + "▁sl ow", + "▁im ag", + "▁i' ve", + "r act", + "▁say ing", + "k ing", + "ut es", + "▁comm on", + "▁occ as", + "▁b ook", + "▁r us", + "am es", + "ic es", + "▁br ight", + "m s", + "▁sat isf", + "▁s ense", + "▁f av", + "▁su cc", + "um p", + "is ing", + "▁l u", + "▁acc ord", + "ter n", + "▁bre ak", + "▁ex per", + "▁mon th", + "u se", + "▁de m", + "▁sc ar", + "▁contin ued", + "▁sec ret", + "▁chur ch", + "▁t ree", + "▁st ri", + "▁car ried", + "▁c ry", + "nd ing", + "▁spir it", + "▁want ed", + "er ic", + "▁certain ly", + "▁comm and", + "▁d est", + "▁mo ve", + "ou n", + "▁swe et", + "▁stre et", + "▁o ught", + "▁acc ount", + "▁de f", + "h am", + "▁pre p", + "▁s ens", + "▁es c", + "▁ro ck", + "ot s", + "▁de cl", + "▁pur p", + "ri age", + "ou th", + "ow ers", + "▁dra w", + "▁e at", + "▁b reat", + "▁charac ter", + "im e", + "c ul", + "med i", + "▁st ud", + "▁sch ool", + "itu de", + "▁hea ven", + "▁feel ing", + "▁s ad", + "▁reg ard", + "em ent", + "▁p ain", + "▁wor th", + "▁b ra", + "ne y", + "▁d ut", + "▁sm o", + "aim ed", + "▁tr ans", + "▁del ight", + "▁qu ar", + "▁h ung", + "▁m ot", + "▁bl ue", + "▁h ot", + "▁h ill", + "▁d iv", + "um b", + "▁dis app", + "▁mar g", + "▁lau gh", + "id ence", + "▁pro du", + "▁succ ess", + "ur y", + "s on", + "▁f ast", + "▁eng lish", + "▁d ress", + "▁h at", + "▁ter ri", + "▁p ort", + "▁ne ither", + "▁cour t", + "▁se ven", + "▁f ight", + "▁prin cess", + "▁li ved", + "▁v iew", + "▁im medi", + "▁se lf", + "▁v ar", + "▁hour s", + "▁m ill", + "▁so l", + "▁ex am", + "▁t ried", + "▁w on't", + "▁ent ered", + "▁dis p", + "t o", + "r ic", + "▁car ry", + "▁imp ort", + "▁an g", + "z e", + "on y", + "▁d anger", + "led ge", + "▁off ic", + "▁c ause", + "▁n one", + "▁for ward", + "▁unc le", + "▁to r", + "▁d et", + "as k", + "▁l en", + "▁fur ther", + "▁p ay", + "▁add ed", + "▁fr ont", + "r or", + "▁g e", + "▁partic ular", + "▁de al", + "▁pr ot", + "▁l ed", + "▁ac qu", + "▁pr ay", + "▁e ff", + "▁happ ened", + "▁ch ief", + "le ct", + "▁wal ked", + "▁lat er", + "▁jo y", + "i ar", + "d ay", + "▁or d", + "▁al th", + "▁com fort", + "▁pro b", + "▁ma j", + "▁af fect", + "▁pub lic", + "▁b ene", + "en ing", + "▁alth ough", + "g r", + "▁sh o", + "▁f ig", + "res h", + "▁f ail", + "u ct", + "u g", + "al ity", + "▁me m", + "▁seem s", + "▁your self", + "sh ip", + "e ad", + "i am", + "▁num ber", + "s ide", + "▁a h", + "▁do ing", + "▁li ving", + "are nt", + "▁des p", + "iz e", + "oo f", + "▁f ield", + "▁re ceived", + "▁sh ad", + "▁be y", + "▁bey ond", + "▁ph il", + "▁l ine", + "▁vis it", + "in ct", + "ri g", + "▁part y", + "▁gard en", + "▁j e", + "▁m outh", + "▁ha ll", + "▁qu een", + "▁bo at", + "▁be ar", + "▁am eric", + "is m", + "▁gentle man", + "▁v i", + "ir t", + "u ff", + "▁la id", + "ra id", + "▁occas ion", + "▁ent ire", + "▁a ge", + "▁s ister", + "▁cl ot", + "▁re pe", + "ous ly", + "▁pr ison", + "▁acc om", + "▁wh is", + "▁near ly", + "▁tre es", + "il ing", + "if f", + "▁eight een", + "b it", + "ward s", + "▁ear ly", + "▁t al", + "▁l ab", + "▁for th", + "m ing", + "on es", + "▁m ed", + "▁tr y", + "▁d a", + "il t", + "anc ed", + "▁prin ci", + "▁en em", + "▁think ing", + "▁ch ance", + "w here", + "▁c re", + "▁min utes", + "▁an x", + "▁mar y", + "▁p ict", + "▁wa it", + "▁v ill", + "▁st ren", + "▁af raid", + "▁cr ow", + "▁sm ile", + "▁l ate", + "▁eng land", + "▁pleas ure", + "▁a unt", + "▁new s", + "▁w is", + "▁f le", + "▁see ing", + "▁su per", + "▁fa ith", + "▁ro b", + "im ent", + "o int", + "▁b ill", + "ll ing", + "▁ne igh", + "▁tr ouble", + "▁sil ence", + "▁pl ain", + "▁there 's", + "are t", + "pe nd", + "▁excl aimed", + "ren ch", + "g y", + "▁mil es", + "p ly", + "▁gl ass", + "▁d rew", + "▁neigh b", + "el s", + "▁m ine", + "▁pr act", + "▁heav y", + "▁stand ing", + "▁se vent", + "▁sh ar", + "▁ch ange", + "▁necess ary", + "▁ch ap", + "▁purp ose", + "▁in qu", + "▁natur al", + "▁d eter", + "ic ked", + "▁b ott", + "▁hard ly", + "▁be ll", + "▁to p", + "▁c aught", + "fe red", + "w h", + "i ves", + "ound ed", + "▁a uth", + "▁circ um", + "▁f ing", + "▁sto pped", + "u c", + "▁w it", + "am ent", + "▁op in", + "▁a v", + "▁pri v", + "ain ing", + "▁inst ead", + "ru pt", + "▁g rew", + "▁lo ved", + "▁is land", + "▁kn ight", + "▁ag o", + "▁len gth", + "▁in n", + "▁pe ace", + "l s", + "in ary", + "i or", + "u es", + "▁th ird", + "us h", + "▁beaut y", + "▁h ig", + "▁he 's", + "t he", + "f orm", + "he ad", + "ic ally", + "as p", + "anc y", + "▁deter m", + "▁stra ight", + "▁c ra", + "in ing", + "pp er", + "l er", + "▁inf l", + "▁th or", + "▁con vers", + "▁bes ides", + "▁p osition", + "▁thir ty", + "▁d en", + "ra ge", + "▁att ention", + "m a", + "▁con v", + "ag er", + "▁his t", + "o red", + "▁com es", + "ag ed", + "▁for ce", + "▁s itting", + "▁ple ase", + "te nd", + "it er", + "▁what ever", + "▁inf orm", + "▁h op", + "▁ch air", + "▁bu ild", + "▁b ab", + "ust om", + "▁girl s", + "▁r om", + "▁f rench", + "▁str uck", + "▁p ull", + "▁a st", + "▁li e", + "▁wr ong", + "▁know ledge", + "▁gra ce", + "▁scar ce", + "g hed", + "▁res ol", + "▁wat ch", + "▁thought s", + "▁r id", + "▁att empt", + "▁fif ty", + "▁r ap", + "▁bo x", + "h ood", + "▁get ting", + "▁ ver", + "▁f at", + "▁compan y", + "▁ar r", + "▁crow d", + "▁b urn", + "▁sl ight", + "▁cl ass", + "▁sou th", + "▁d ie", + "▁ex act", + "▁dr ink", + "▁en j", + "▁th ick", + "▁din ner", + "▁sa ve", + "▁ma id", + "▁pl an", + "▁sa int", + "▁immedi ately", + "i ers", + "▁b orn", + "i us", + "▁re v", + "▁te ars", + "ist s", + "▁t reat", + "us ion", + "▁me ant", + "▁boy s", + "pp ing", + "▁slow ly", + "▁in cl", + "▁l im", + "▁d ied", + "ic ed", + "▁com pl", + "▁f ool", + "▁fore st", + "▁su gg", + "▁p ost", + "▁ac cept", + "▁res ult", + "▁auth or", + "nd on", + "ce ive", + "▁sugg est", + "ci ent", + "▁st one", + "▁fr ight", + "▁pap er", + "▁con se", + "▁j our", + "▁t y", + "▁en c", + "▁quick ly", + "▁cont r", + "▁you th", + "▁se nd", + "▁v ict", + "if ied", + "▁bel ong", + "▁war m", + "▁f ix", + "▁imp oss", + "▁bes ide", + "▁ er", + "▁to ne", + "▁c amp", + "▁des ire", + "▁b ound", + "▁m akes", + "▁marg aret", + "▁nor th", + "▁br own", + "▁mo on", + "▁li ps", + "▁pl aced", + "v al", + "▁circum st", + "▁f ood", + "▁f illed", + "ic s", + "if t", + "an n", + "▁lo ndon", + "▁dist ance", + "g ing", + "▁stren gth", + "▁i d", + "▁flo or", + "▁for get", + "▁ob l", + "▁m id", + "ri es", + "it ions", + "b s", + "▁spr ing", + "▁you' re", + "▁vi ol", + "▁j ack", + "▁po ck", + "oo ks", + "▁follow ing", + "▁s ac", + "▁rem ained", + "ar ch", + "▁gr ow", + "▁sn ow", + "▁govern ment", + "▁b all", + "▁h ors", + "▁n ar", + "ad ed", + "▁bro ken", + "▁lau ghed", + "▁des cri", + "▁sa fe", + "itt en", + "ive ly", + "▁prof ess", + "▁o '", + "am ed", + "▁dep art", + "▁eas y", + "ou red", + "▁u nd", + "▁cou n", + "▁than k", + "▁know s", + "▁wa iting", + "d om", + "at s", + "▁g er", + "▁v an", + "▁an ne", + "▁hors es", + "u gg", + "▁d read", + "▁un e", + "g es", + "ac y", + "▁pro ceed", + "▁g az", + "▁sh out", + "▁start ed", + "ent ed", + "▁comple te", + "o pe", + "▁g all", + "de red", + "▁w ide", + "i res", + "▁ne ck", + "as ure", + "ist ed", + "▁serv ice", + "▁pie ce", + "ci ally", + "en ces", + "▁sa il", + "▁pal ace", + "er v", + "▁gu ard", + "▁do ll", + "▁talk ing", + "▁man 's", + "▁li ft", + "▁gra ve", + "▁wee k", + "le t", + "▁imposs ible", + "▁eff ort", + "▁im m", + "▁arm y", + "we ll", + "▁diffic ult", + "u nd", + "▁f resh", + "▁f un", + "re me", + "▁st op", + "▁m ess", + "▁g ar", + "▁de g", + "▁inc re", + "▁corn er", + "▁soci ety", + "▁we ak", + "▁sh ut", + "▁h y", + "▁pro per", + "ac hing", + "▁cl oud", + "idd le", + "iv id", + "▁dem and", + "▁n ine", + "▁s it", + "▁reco gn", + "▁be at", + "us s", + "▁turn ing", + "▁sk y", + "▁opin ion", + "▁sing le", + "p ic", + "▁f ly", + "▁l ang", + "▁m ass", + "ce ll", + "▁out side", + "▁k iss", + "▁tr ust", + "▁occ up", + "▁ev il", + "▁bel ow", + "▁appear ance", + "u it", + "▁after n", + "▁gl o", + "▁g un", + "▁w est", + "en cy", + "p ar", + "▁show ed", + "▁convers ation", + "is es", + "▁con n", + "▁could n't", + "▁run ning", + "▁m ention", + "▁great er", + "▁mus ic", + "▁breat h", + "as es", + "▁n in", + "▁an t", + "are r", + "▁mor row", + "▁b ank", + "▁es pe", + "▁p eter", + "or k", + "ci al", + "▁pres ence", + "▁bat tle", + "▁win ter", + "he red", + "▁prob ably", + "▁clot hes", + "▁f ash", + "▁mar k", + "▁w ished", + "ve re", + "▁co ll", + "▁em b", + "▁kn e", + "▁mar ried", + "▁arri ved", + "▁p un", + "▁e vent", + "us hed", + "▁suff ic", + "▁e ager", + "▁form er", + "▁gi ving", + "▁p op", + "▁sa nd", + "▁ne g", + "▁us ual", + "▁rel ig", + "▁sim ple", + "▁sy m", + "it ation", + "▁g ro", + "or ies", + "▁mo ved", + "▁month s", + "▁spe aking", + "▁p et", + "▁sil ent", + "▁c ab", + "▁mount ain", + "▁express ion", + "g ar", + "▁co vered", + "▁hu nt", + "▁aftern oon", + "ap ed", + "▁occ ur", + "rie f", + "▁st ates", + "▁ z", + "st r", + "▁lo c", + "l ight", + "▁sh ore", + "c he", + "▁eas ily", + "▁p ale", + "un ity", + "▁rem ark", + "▁ph ys", + "▁begin ning", + "▁dut y", + "▁chap ter", + "▁infl u", + "ch o", + "▁con cl", + "am b", + "▁inst ant", + "▁pol it", + "z z", + "▁enj oy", + "▁s ick", + "▁rem ain", + "u el", + "▁st ream", + "▁fig ure", + "a ld", + "▁t ur", + "▁p ath", + "▁v ol", + "▁min ute", + "▁pleas ant", + "▁scarce ly", + "▁cons cious", + "▁terri ble", + "▁k ill", + "▁ra ised", + "▁fash ion", + "▁tw el", + "y al", + "▁lea ving", + "▁twel ve", + "at ure", + "▁f ut", + "▁th rew", + "▁st ar", + "▁fl owers", + "ol og", + "▁tr ying", + "ri b", + "▁sw ord", + "▁t all", + "▁mar ry", + "▁b en", + "▁expect ed", + "▁accord ing", + "▁for ty", + "▁st ick", + "in al", + "▁gu ess", + "▁sil ver", + "▁ir on", + "▁obl ig", + "▁off ice", + "▁rap id", + "▁lad ies", + "▁espe cially", + "i pped", + "ort ed", + "▁bre ad", + "e ch", + "▁te nder", + "or th", + "▁lear ned", + "▁b ooks", + "▁is n't", + "▁surpr ise", + "▁wr ite", + "▁pur s", + "pe red", + "▁wr itten", + "▁k illed", + "▁conse qu", + "▁ex h", + "▁pl aces", + "▁cond ition", + "▁dire ction", + "▁ch o", + "ul ty", + "j o", + "m it", + "▁entire ly", + "ter ing", + "▁ent er", + "▁act ion", + "w ise", + "▁su c", + "ib ly", + "▁happ iness", + "▁dec ided", + "▁gold en", + "▁lang u", + "en ess", + "▁not e", + "▁un less", + "u ous", + "▁f al", + "al ed", + "▁you' ll", + "▁wonder ful", + "ound s", + "um e", + "' re", + "▁sh ook", + "er 's", + "oo p", + "one l", + "▁perfect ly", + "▁ge or", + "nd ered", + "▁bro ad", + "at ic", + "▁cl osed", + "a 's", + "▁sp ot", + "te nded", + "▁lat ter", + "▁step s", + "▁mere ly", + "▁hist ory", + "f er", + "▁w ise", + "is hing", + "os ing", + "▁m iddle", + "ide red", + "▁underst ood", + "▁enem y", + "▁so le", + "ll ig", + "▁j ew", + "▁sim ply", + "g an", + "▁cond uct", + "▁t ast", + "▁bo ard", + "▁sa v", + "▁would n't", + "▁sh ot", + "▁rep ly", + "▁ch anged", + "m n", + "▁gr ass", + "▁fin ally", + "▁adm ir", + "it al", + "▁shar p", + "it ch", + "▁fort une", + "▁sum mer", + "▁exper ience", + "▁suc ceed", + "g ress", + "ut ed", + "▁o rig", + "ret ched", + "▁jour ney", + "▁ex cell", + "▁obser ved", + "a x", + "▁after wards", + "f ast", + "s y", + "▁b ow", + "▁fl at", + "▁pers ons", + "▁le an", + "▁ear n", + "▁bro ke", + "▁m ir", + "▁f it", + "os p", + "▁mar riage", + "▁rep res", + "i o", + "▁l ying", + "un k", + "▁tra ve", + "▁s itu", + "▁list en", + "▁acqu aint", + "▁r ing", + "ci ence", + "▁f aint", + "ol ute", + "▁cal m", + "b ered", + "▁li ves", + "▁esc ape", + "▁bene ath", + "ous es", + "▁cl im", + "▁bl ess", + "▁repe ated", + "▁pock et", + "est s", + "▁t ail", + "▁pass ion", + "▁d ick", + "▁v en", + "os es", + "cl ock", + "▁m ut", + "▁bec om", + "▁o per", + "▁o' clock", + "▁f ish", + "▁l ou", + "se mb", + "▁pre v", + "▁all owed", + "▁fam il", + "he l", + "▁g ate", + "▁sp ite", + "iver s", + "▁he alth", + "iss ion", + "▁i gn", + "▁re ach", + "▁c and", + "▁r ain", + "▁em pl", + "▁b an", + "▁str ugg", + "▁fir m", + "▁bit ter", + "▁sor ry", + "b ing", + "▁father 's", + "▁tem per", + "▁mad ame", + "pl es", + "▁f urn", + "▁fut ure", + "um ed", + "▁n ice", + "▁se par", + "▁pres ently", + "▁circumst ances", + "▁conn ect", + "id ing", + "▁set t", + "k es", + "▁l oud", + "▁wor se", + "▁w and", + "▁sp read", + "▁i' d", + "▁let ters", + "▁ye llow", + "▁mag n", + "▁pass ing", + "▁k it", + "▁pleas ed", + "▁dark ness", + "▁rem ar", + "idd en", + "c ome", + "▁te a", + "▁c iv", + "▁ap art", + "▁disapp e", + "▁import ant", + "▁leg s", + "▁n ation", + "▁del ic", + "▁d ressed", + "▁g ame", + "▁wall s", + "e c", + "▁d ry", + "▁v irt", + "▁d im", + "id ently", + "re l", + "▁r ub", + "▁abs olute", + "▁bl ind", + "▁disco vered", + "▁exact ly", + "▁d am", + "ott en", + "▁sor row", + "m y", + "▁c ost", + "fe rence", + "▁empl oy", + "vel op", + "▁c ous", + "▁be ast", + "▁spe c", + "▁opp ort", + "▁e ars", + "▁dro pped", + "▁sub st", + "▁che e", + "▁prot ect", + "il s", + "▁sm iled", + "in a", + "▁res p", + "▁prom ise", + "▁b ag", + "▁h ost", + "ur s", + "▁creat ure", + "▁not ice", + "▁know ing", + "▁head s", + "▁conc er", + "▁se at", + "ish ment", + "▁ind ivid", + "▁exist ence", + "▁determ ined", + "le nd", + "▁st orm", + "ro y", + "our s", + "▁con ce", + "ang ing", + "▁fix ed", + "▁p ress", + "▁maj or", + "o ved", + "▁v es", + "i od", + "▁lear n", + "▁mot ion", + "▁em pt", + "▁lea ves", + "▁bott om", + "▁ar g", + "iet y", + "▁no body", + "▁pro s", + "qu e", + "▁ut ter", + "▁p ick", + "ac ked", + "▁inte llig", + "▁he s", + "▁st ir", + "▁pre vent", + "▁ass ist", + "▁d om", + "▁dis g", + "▁adv ant", + "er able", + "▁v ent", + "um ent", + "▁t ired", + "re ct", + "as hed", + "act ion", + "▁cons idered", + "▁wr ote", + "▁h ouses", + "▁su it", + "▁che er", + "▁cast le", + "▁p ra", + "▁per form", + "anc ing", + "▁cle an", + "ru ct", + "▁st ro", + "▁fre qu", + "▁draw ing", + "▁l uck", + "▁ha bit", + "id ge", + "e ll", + "▁on es", + "▁no ble", + "▁sp lend", + "▁hon or", + "z en", + "▁pa id", + "▁spe ech", + "▁est ab", + "▁u r", + "ist r", + "▁individ ual", + "in ite", + "▁v all", + "▁bird s", + "ro du", + "▁d ar", + "▁all ow", + "▁conf ess", + "▁imp ress", + "▁prop ert", + "▁j ane", + "▁s ong", + "▁var ious", + "▁nar row", + "▁mo der", + "▁belie ved", + "ay s", + "▁ext ra", + "▁p ure", + "ar ily", + "▁per iod", + "▁shad ow", + "▁some wh", + "▁m al", + "▁c ott", + "▁ext reme", + "▁jud ge", + "▁vill age", + "▁ro yal", + "▁somewh at", + "▁l ower", + "▁ha m", + "▁ag ree", + "▁remem bered", + "▁ast on", + "ent h", + "▁decl ared", + "p an", + "▁tr ain", + "▁part s", + "▁col onel", + "am ber", + "▁break fast", + "▁sure ly", + "▁s in", + "ay ed", + "▁sc ene", + "g o", + "▁great est", + "▁influ ence", + "▁c ustom", + "it ary", + "▁anim al", + "▁sa ke", + "▁mo d", + "▁sold iers", + "in y", + "▁an cient", + "▁dra wn", + "▁ev idently", + "▁way s", + "▁look s", + "▁rev ol", + "at or", + "ant ed", + "▁ref lect", + "▁pict ure", + "▁like ly", + "▁sh r", + "▁law s", + "▁hold ing", + "▁diffic ulty", + "▁in j", + "▁me l", + "▁cou rage", + "n es", + "▁m ort", + "▁tr oub", + "▁bur st", + "▁ang ry", + "▁pr oud", + "gg ed", + "▁spo ken", + "is ion", + "▁des ert", + "pt ion", + "▁com b", + "▁app arent", + "r ing", + "▁wat ched", + "n a", + "▁e ast", + "▁sh op", + "▁ag re", + "▁priv ate", + "est y", + "▁j ul", + "▁fin ished", + "▁anx ious", + "ot ion", + "▁fif teen", + "▁soci al", + "u nder", + "▁dis m", + "▁tou ch", + "▁w ine", + "▁att ack", + "▁ide as", + "▁geor ge", + "a f", + "re r", + "oo se", + "▁sp ace", + "▁sc r", + "▁ins ide", + "▁gentle men", + "▁civ il", + "i ently", + "▁form ed", + "▁f ol", + "▁go es", + "▁you' ve", + "▁th in", + "▁sur f", + "▁serv ant", + "▁b al", + "▁co ver", + "▁our selves", + "▁fall en", + "▁hen ry", + "▁l ot", + "i um", + "▁ad vent", + "▁car riage", + "▁bab y", + "▁ele ct", + "▁to ng", + "▁app re", + "▁every body", + "ud ed", + "▁comm un", + "▁in e", + "it ive", + "▁wa ited", + "c ise", + "▁gr ou", + "he t", + "▁v ain", + "▁imp ro", + "▁fav or", + "er ial", + "▁spe ed", + "▁wind ows", + "▁care fully", + "▁i ce", + "▁no ise", + "▁her o", + "▁j im", + "▁will iam", + "▁pe cul", + "▁prom ised", + "▁walk ing", + "▁forg otten", + "▁oblig ed", + "▁earn est", + "▁m ain", + "▁l ose", + "▁gl ance", + "▁ves sel", + "▁gr ad", + "▁th ro", + "▁bo d", + "▁should er", + "▁met h", + "▁anim als", + "▁not iced", + "ab les", + "▁pecul iar", + "▁f ier", + "▁p ot", + "▁quiet ly", + "▁c up", + "▁ser ious", + "▁tre mb", + "▁gener ally", + "▁americ an", + "▁sym p", + "r al", + "▁d on", + "▁fr ance", + "ict ion", + "▁propert y", + "▁should ers", + "▁str anger", + "▁s an", + "▁c ow", + "▁what 's", + "▁d ust", + "▁affect ion", + "▁hands ome", + "▁hig her", + "i ant", + "nd ay", + "▁we l", + "▁po et", + "▁sl a", + "▁dist inct", + "▁m am", + "▁p ier", + "ac ing", + "ag ue", + "▁gr own", + "u ly", + "▁d '", + "▁ch amber", + "▁des ce", + "▁mur m", + "st em", + "▁person al", + "▁f ancy", + "▁of fered", + "os ite", + "ons ie", + "▁bu ilt", + "▁ed ge", + "▁whis pered", + "▁sk in", + "▁pie ces", + "it ated", + "c her", + "os ity", + "▁p it", + "▁cont ro", + "▁f aces", + "▁sp ent", + "▁inter rupt", + "h ow", + "is ters", + "▁but ter", + "▁de velop", + "▁un k", + "h ip", + "▁he at", + "▁fo nd", + "▁co at", + "▁tou ched", + "▁h ol", + "ing u", + "▁p i", + "▁r ace", + "▁j ump", + "▁surpr ised", + "ot ed", + "▁de fe", + "en ced", + "▁was n't", + "▁we ar", + "and on", + "▁f an", + "ac her", + "▁ar ch", + "▁ed uc", + "▁bra ve", + "at hered", + "▁e ld", + "▁we alth", + "▁sy stem", + "▁ger man", + "▁fal se", + "w ood", + "▁d are", + "ak ed", + "▁cous in", + "▁f er", + "ke y", + "▁l in", + "▁inte llect", + "▁prep ared", + "▁fing ers", + "▁sur r", + "▁mount ains", + "i pp", + "▁opport unity", + "a ff", + "▁b are", + "▁d or", + "▁int rodu", + "▁co llect", + "▁love ly", + "▁r ag", + "▁cr own", + "▁mat ters", + "▁compan ion", + "▁we ather", + "▁al ar", + "▁inn oc", + "▁r is", + "▁m ix", + "▁l ake", + "▁st ore", + "▁un h", + "▁mean ing", + "▁mem ory", + "o ver", + "▁b and", + "le ep", + "▁find ing", + "e e", + "▁char ge", + "▁gr at", + "▁att ract", + "▁gr ay", + "▁quar ter", + "▁av o", + "▁great ly", + "▁m ach", + "▁in h", + "▁as leep", + "▁par is", + "▁d av", + "▁al to", + "▁off er", + "▁opp osite", + "oun ced", + "er ve", + "▁bre ast", + "n own", + "▁read ing", + "▁alto gether", + "▁wr iting", + "pect ed", + "▁deg ree", + "c ing", + "n ight", + "▁ex ec", + "fort un", + "▁st at", + "▁feel ings", + "▁h ath", + "▁c ook", + "▁r ail", + "▁hon our", + "d ing", + "▁f ate", + "▁p or", + "▁fr ank", + "▁meet ing", + "▁r ough", + "▁al ive", + "▁h ide", + "it es", + "il ar", + "▁bl ow", + "▁cr uel", + "ra ph", + "▁hur t", + "▁l oss", + "▁thr own", + "▁ca used", + "▁we 'll", + "▁ser ve", + "▁du ke", + "▁b ent", + "▁un ited", + "▁see k", + "▁king dom", + "▁situ ation", + "▁empt y", + "n ers", + "▁d ue", + "▁li ked", + "▁sw ift", + "▁open ing", + "▁serv ants", + "c hen", + "ou ra", + "▁g h", + "▁sus pic", + "▁fre ed", + "oint ed", + "▁surf ace", + "c il", + "▁quest ions", + "▁ ess", + "▁cur ious", + "▁const it", + "▁accom pan", + "▁christ ian", + "▁f ill", + "are st", + "▁satisf ied", + "r on", + "▁s ides", + "▁p ity", + "▁re ve", + "▁equ al", + "▁he ight", + "▁or dered", + "os op", + "▁gre y", + "▁list ened", + "p et", + "▁re jo", + "▁cap t", + "ib ility", + "o b", + "▁m art", + "▁happ en", + "▁hur ried", + "▁doll ars", + "▁langu age", + "▁an ge", + "▁your s", + "▁supp osed", + "▁laugh ing", + "▁sett led", + "▁ro de", + "▁per m", + "▁dist ingu", + "▁hur ry", + "▁dest roy", + "▁tal ked", + "▁lift ed", + "oc r", + "▁squ are", + "▁val ue", + "▁tast e", + "▁v ast", + "▁king 's", + "▁r ul", + "▁r oof", + "▁tell ing", + "▁stud y", + "▁o w", + "▁p an", + "▁b as", + "▁r ising", + "▁suffic ient", + "▁for ced", + "▁r ise", + "▁at tend", + "▁phil osop", + "▁no se", + "▁six ty", + "he st", + "▁p in", + "▁e gg", + "▁am b", + "▁fa ult", + "b ur", + "▁st ation", + "▁dist ur", + "▁reg ular", + "ill e", + "▁p ack", + "▁spe cial", + "▁hon est", + "▁build ing", + "▁se ason", + "▁sh ape", + "▁pr ide", + "▁sm iling", + "li ke", + "▁ord ers", + "y n", + "▁wood s", + "▁accom pl", + "c on", + "▁s am", + "▁us ually", + "▁wat ching", + "▁sac ri", + "er ved", + "▁pass age", + "▁mat erial", + "▁vall ey", + "y r", + "▁st airs", + "▁li bert", + "▁fright ened", + "▁remar ked", + "▁t it", + "▁w ed", + "▁mist ress", + "▁direct ly", + "▁suff er", + "▁glo om", + "▁l ines", + "▁st ock", + "▁just ice", + "▁d iam", + "est ed", + "▁gr owing", + "▁does n't", + "▁g athered", + "▁ord inary", + "u ce", + "▁e ur", + "▁un f", + "▁kit chen", + "▁th reat", + "▁de pend", + "▁wee ks", + "▁desp air", + "▁meth od", + "▁se ized", + "▁disc uss", + "▁ex er", + "if y", + "▁fl ower", + "▁ign or", + "e er", + "ad es", + "▁de b", + "ep ing", + "▁a le", + "▁y o", + "ch ief", + "▁supp er", + "i k", + "▁bo ld", + "▁put ting", + "▁ne arer", + "us es", + "▁one 's", + "▁b le", + "▁y ork", + "▁end e", + "▁aff airs", + "▁sold ier", + "▁contr ary", + "▁mo ving", + "▁stre ets", + "▁b ir", + "r ance", + "hen s", + "▁c it", + "ic ated", + "▁cat ch", + "▁imag ine", + "ed s", + "▁mar ch", + "▁se arch", + "ar a", + "▁re ceive", + "im ate", + "▁m onsie", + "▁tw ice", + "▁pap a", + "▁monsie ur", + "▁re ck", + "m in", + "u de", + "▁pro cess", + "▁ho le", + "a ly", + "l in", + "▁c ro", + "▁fav our", + "▁d ign", + "▁work ing", + "▁har m", + "▁eur ope", + "ant ic", + "▁pro ved", + "oc ked", + "▁pro ve", + "▁cl er", + "▁lo d", + "cept ion", + "▁pull ed", + "▁ar th", + "▁author ity", + "▁ha ven", + "▁j er", + "▁un s", + "▁move ment", + "ust ed", + "▁eng aged", + "▁brother s", + "▁advant age", + "l ished", + "o le", + "▁arth ur", + "▁a ut", + "▁st ones", + "▁far m", + "▁diffe rence", + "▁f art", + "▁as ide", + "▁m as", + "▁obser v", + "▁hen ce", + "▁possess ion", + "▁hill s", + "▁fort un", + "ul s", + "ail s", + "▁inst ance", + "▁she 's", + "▁o l", + "▁ho ly", + "▁fle w", + "k y", + "▁col or", + "▁r ate", + "▁do ors", + "▁bus y", + "se t", + "▁add ress", + "▁famil iar", + "▁we ight", + "▁aw are", + "▁play ed", + "▁symp ath", + "ll s", + "▁sole mn", + "▁l iter", + "▁t est", + "▁em per", + "▁ind ian", + "▁dist ant", + "▁interest ing", + "▁b ull", + "▁thor ough", + "▁w ore", + "▁wor ked", + "▁expl ained", + "▁excell ent", + "▁splend id", + "▁tong ue", + "▁d i", + "▁p ard", + "▁n amed", + "▁sh ame", + "▁fr anc", + "▁spe ct", + "▁moment s", + "b ers", + "▁w il", + "▁my ster", + "▁se ated", + "▁inst antly", + "▁sim ilar", + "▁ende av", + "▁me asure", + "▁natur ally", + "nd s", + "▁su f", + "▁am ount", + "▁im per", + "▁dog s", + "it able", + "▁br it", + "▁necess ity", + "r id", + "ul ous", + "▁conf idence", + "d en", + "▁p arent", + "▁w id", + "▁v ir", + "▁never the", + "▁agre ed", + "▁neverthe less", + "un ch", + "▁hear ing", + "▁t akes", + "▁a ug", + "▁un ivers", + "en ance", + "▁un w", + "▁ear l", + "▁keep ing", + "▁dri ve", + "▁produ ced", + "▁a ud", + "on 's", + "▁n ames", + "ag n", + "▁disappe ared", + "▁thr ow", + "▁pres ident", + "▁god s", + "▁mag ic", + "▁repres ent", + "▁unk nown", + "p or", + "▁ter ror", + "▁haven 't", + "as c", + "▁supp ort", + "▁smo ke", + "▁w icked", + "k er", + "▁wor ks", + "▁art ic", + "▁d ull", + "▁yes ter", + "▁fall ing", + "▁worth y", + "▁libert y", + "ul ation", + "▁des ign", + "▁want s", + "▁ev idence", + "▁compan ions", + "▁spir its", + "▁co ast", + "▁might y", + "▁particular ly", + "▁wit ness", + "▁disco ver", + "▁s ought", + "▁sp an", + "' ve", + "▁r are", + "▁offic ers", + "l v", + "z y", + "▁yester day", + "ve y", + "c ent", + "▁p owers", + "▁y ield", + "▁c ool", + "▁or gan", + "▁am az", + "▁point ed", + "f ord", + "▁cl aim", + "▁cont ent", + "▁poss ibly", + "▁ter ms", + "▁tri um", + "▁offic er", + "▁pers u", + "▁ce ased", + "▁dro ve", + "▁occur red", + "▁g ree", + "▁li es", + "▁other wise", + "▁emper or", + "▁h om", + "▁st ars", + "▁kne es", + "▁trium ph", + "ru ction", + "▁pa used", + "om s", + "▁requ ired", + "▁fail ed", + "▁unh app", + "▁diam ond", + "▁r at", + "▁al i", + "▁d ouble", + "▁form s", + "▁gi ves", + "▁fing er", + "ra ce", + "▁p air", + "al ous", + "ill a", + "▁bo b", + "▁el iz", + "▁tra vel", + "▁carry ing", + "▁g le", + "il es", + "▁te eth", + "es h", + "▁sh own", + "▁fr uit", + "▁wat ers", + "▁ent ertain", + "▁heart s", + "um n", + "▁lab or", + "in 't", + "▁p ill", + "▁en er", + "so ci", + "▁exam ple", + "▁u pper", + "▁fore ign", + "▁mor al", + "▁soft ly", + "ro se", + "▁hu ge", + "▁char les", + "▁pri est", + "▁exc it", + "▁f et", + "▁mother 's", + "▁possess ed", + "▁c ases", + "▁rep ort", + "▁mil k", + "▁aff air", + "▁princi ple", + "▁inh ab", + "▁freed om", + "▁pr oof", + "▁inte nded", + "▁satisf action", + "▁shout ed", + "is c", + "▁pl at", + "▁b ask", + "ent al", + "▁grou p", + "▁fart her", + "as m", + "▁un fortun", + "▁unt o", + "▁sing ing", + "▁arr ange", + "▁relig ion", + "▁b er", + "▁rock s", + "▁sevent een", + "▁d er", + "▁j ames", + "▁bu y", + "▁succeed ed", + "▁room s", + "▁lead ing", + "▁maj esty", + "▁event s", + "▁d ance", + "▁p aint", + "▁g ently", + "ac le", + "▁te le", + "▁pard on", + "us ing", + "▁dro p", + "f ather", + "▁in vent", + "▁ke y", + "▁mention ed", + "▁sevent y", + "▁r os", + "▁suff ering", + "▁rec ord", + "▁cab in", + "ro ad", + "▁dis s", + "iv al", + "▁demand ed", + "▁excit ement", + "▁as soci", + "▁pro gress", + "ang ers", + "▁cur i", + "▁americ a", + "▁ru le", + "▁b or", + "▁v ig", + "less ly", + "▁clear ly", + "▁b ore", + "▁she ep", + "▁reg ret", + "▁neighb our", + "b ly", + "i ance", + "▁inst inct", + "▁adv ice", + "▁aw ful", + "▁s en", + "▁f ully", + "▁g ather", + "▁pap ers", + "▁h idden", + "▁che st", + "▁bir th", + "h y", + "p ap", + "▁h ither", + "▁st uff", + "▁imp at", + "▁call ing", + "▁four th", + "▁dread ful", + "▁p os", + "▁g rief", + "▁br ill", + "▁power ful", + "▁present ed", + "▁fair y", + "▁expl ain", + "▁sho ot", + "▁prison er", + "▁jo ined", + "▁aff ord", + "m ond", + "at tered", + "▁ ing", + "im ents", + "▁she l", + "▁pre fer", + "▁consider able", + "▁ob ey", + "▁vo ices", + "▁inter v", + "▁interest ed", + "▁vir g", + "▁c red", + "▁c ard", + "▁e p", + "▁need ed", + "▁p ounds", + "▁con qu", + "▁cle ver", + "▁adv anced", + "▁c ord", + "ig hed", + "▁under t", + "▁resol ved", + "▁w ag", + "ist ic", + "▁pa ul", + "▁exc ited", + "▁cond itions", + "▁pict ures", + "ac ious", + "▁sh ining", + "▁su nday", + "▁ser ved", + "▁ste am", + "▁pol ice", + "▁spr ang", + "s ie", + "or a", + "es e", + "▁j es", + "▁no dd", + "▁sal t", + "▁field s", + "▁c art", + "▁ind ians", + "▁fier ce", + "d le", + "▁r ide", + "▁des ired", + "▁ed ward", + "▁import ance", + "▁inform ation", + "t ure", + "▁h osp", + "▁me mb", + "▁per ceived", + "▁y ard", + "▁cr it", + "tern al", + "▁t ask", + "▁fo ld", + "r ant", + "▁soon er", + "▁mer ch", + "▁absolute ly", + "▁cit iz", + "▁suf fered", + "▁t ight", + "▁d ur", + "▁is s", + "ill y", + "▁lo g", + "▁complete ly", + "h old", + "▁r ad", + "▁sh are", + "▁will ing", + "▁dev il", + "▁ship s", + "▁imag ination", + "▁super ior", + "c om", + "am s", + "▁any body", + "▁en v", + "▁app l", + "▁dra g", + "▁da wn", + "asp ed", + "▁occup ied", + "▁curi osity", + "i est", + "▁s igh", + "▁fo x", + "as ant", + "▁my st", + "▁ste ad", + "et t", + "▁cou ple", + "▁ty pe", + "▁extra ord", + "▁apparent ly", + "▁wel come", + "▁da ily", + "▁moder n", + "i ot", + "▁a in't", + "▁d ying", + "ll en", + "▁fe at", + "▁acc ident", + "▁count enance", + "▁ab andon", + "ort ion", + "▁lo ck", + "▁cr ime", + "p ir", + "▁m ult", + "▁al as", + "▁ref used", + "▁h ate", + "▁d w", + "▁when ever", + "▁than ks", + "▁sl ave", + "▁regard ed", + "▁suggest ed", + "ul f", + "▁act ually", + "g ment", + "▁s ize", + "re g", + "▁c ult", + "▁k at", + "▁bod ies", + "h us", + "▁b ay", + "▁tr uly", + "▁fl esh", + "ish op", + "▁sm ith", + "▁bet r", + "w ith", + "▁w et", + "▁rapid ly", + "g ers", + "▁o dd", + "as ons", + "et te", + "▁cl ub", + "ab el", + "▁hor ror", + "▁m ile", + "▁fl ight", + "▁cross ed", + "▁profess or", + "▁o ce", + "▁wor st", + "iz ation", + "▁rus hed", + "▁s cience", + "▁b rief", + "▁ste pped", + "▁mid st", + "h a", + "▁s our", + "▁m aint", + "▁br ain", + "▁cott age", + "▁exp ressed", + "▁equ ally", + "▁educ ation", + "▁aug ust", + "▁b uck", + "▁n ay", + "id s", + "▁tem pt", + "▁inqu ir", + "▁fool ish", + "▁t aught", + "▁c op", + "▁d un", + "▁p icked", + "▁el sie", + "▁land s", + "▁dri ven", + "▁polit ical", + "m as", + "▁de ck", + "▁res ist", + "▁inst r", + "▁b on", + "▁k en", + "ip s", + "▁hot el", + "▁danger ous", + "i ally", + "n ow", + "▁do zen", + "▁tr ade", + "▁point s", + "▁nin et", + "ab ility", + "▁cr im", + "▁rel ations", + "▁inter p", + "▁bar b", + "▁delight ed", + "▁memb ers", + "▁s isters", + "▁st y", + "▁an ger", + "▁belie f", + "▁ask ing", + "▁me at", + "▁dis pl", + "▁rel ief", + "ific ation", + "▁hunt ing", + "▁ale x", + "ar ies", + "▁ob st", + "▁beh old", + "▁mist ake", + "▁inqu ired", + "▁remark able", + "▁orig in", + "c ked", + "▁n erv", + "ack s", + "ver t", + "ro p", + "▁care ful", + "▁w ounded", + "ad ing", + "▁ce re", + "▁enem ies", + "▁grad ually", + "▁interrupt ed", + "▁f is", + "▁st up", + "▁se vere", + "▁ke en", + "▁six teen", + "k ins", + "res p", + "▁wor n", + "▁fl our", + "▁sy lv", + "▁contro l", + "k in", + "▁l one", + "as ing", + "▁n ap", + "▁ass ert", + "▁dep th", + "▁kind ly", + "▁mur der", + "ac ity", + "▁ele ven", + "▁inv ol", + "▁d' art", + "▁w ings", + "▁o ak", + "▁e t", + "▁beg un", + "▁dream s", + "wh ile", + "▁more over", + "▁exp ed", + "▁inde pend", + "▁bur ied", + "▁appro ached", + "agn an", + "▁d'art agnan", + "▁se x", + "▁sa ved", + "▁har ry", + "▁phys ical", + "▁spec ies", + "c er", + "o e", + "▁gl ory", + "▁creat ures", + "▁news pap", + "▁s ang", + "▁pl enty", + "▁use ful", + "▁sho es", + "▁hop ed", + "▁frequ ently", + "▁sa f", + "▁dist r", + "▁princi p", + "▁p u", + "y 's", + "au nt", + "▁lo ver", + "▁fam ous", + "▁reco llect", + "▁n ur", + "▁gr im", + "▁ind if", + "▁char ming", + "▁a im", + "▁loo se", + "▁conscious ness", + "▁mam ma", + "▁ent hus", + "▁sle pt", + "▁smo oth", + "▁fight ing", + "▁hy p", + "▁enthus i", + "▁d ig", + "al ing", + "▁st age", + "▁any one", + "▁thr ust", + "▁des per", + "▁t ar", + "▁l amp", + "st one", + "▁st ern", + "▁ev ident", + "▁mean while", + "▁forg ive", + "▁accept ed", + "▁oce an", + "▁to t", + "▁they 're", + "▁wo ndered", + "▁play ing", + "▁det ect", + "▁ha le", + "▁kn ife", + "ail ed", + "▁close ly", + "▁me as", + "▁proceed ed", + "▁mess age", + "▁m our", + "▁f ac", + "▁un ion", + "ustom ed", + "he m", + "am ing", + "▁ex ceed", + "▁fe ather", + "▁pre cious", + "▁cent ury", + "▁une x", + "▁p ark", + "ic ation", + "▁every where", + "▁mind s", + "▁extraord inary", + "▁a rose", + "▁ent rance", + "▁cap ital", + "▁rec all", + "▁burn ing", + "▁magn ific", + "o es", + "or ious", + "st and", + "▁as semb", + "▁pl ant", + "▁neighb or", + "▁l est", + "um ents", + "▁coll e", + "▁virt ue", + "▁be w", + "▁for b", + "▁ret reat", + "▁cap able", + "▁ass ured", + "▁const ant", + "▁govern or", + "▁incre ased", + "▁h orn", + "▁rem oved", + "▁fact s", + "▁abs ence", + "▁expl an", + "▁a ck", + "▁some body", + "▁aw a", + "▁adm it", + "▁cor rect", + "▁forg ot", + "▁je alous", + "▁kiss ed", + "▁pop ular", + "▁h ut", + "▁u g", + "pe lled", + "▁gr ant", + "▁friend ship", + "▁ind ign", + "▁sympath y", + "i able", + "er ous", + "▁th om", + "▁al ice", + "▁le vel", + "▁object s", + "▁p ressed", + "▁sh a", + "ro om", + "▁qu al", + "▁beg ged", + "▁em p", + "▁h ind", + "▁hig hest", + "▁cloud s", + "▁gh ost", + "▁ack now", + "ous ed", + "▁stri ke", + "▁wis hes", + "▁becom es", + "▁tremb ling", + "▁no b", + "▁kind ness", + "▁accord ingly", + "▁thro at", + "r ation", + "▁f are", + "▁we 're", + "▁st retched", + "▁fr ag", + "▁whe el", + "▁qu eer", + "▁grand father", + "f or", + "▁ch oose", + "▁hel en", + "▁eight y", + "▁l y", + "▁mis erable", + "▁cont empt", + "ign ed", + "▁mil itary", + "▁rus s", + "▁bask et", + "▁a head", + "oo ps", + "ive red", + "▁list ening", + "▁fr o", + "▁lar ger", + "▁div ine", + "i ber", + "▁st ories", + "anc hes", + "us hing", + "iz ing", + "▁tre asure", + "▁exc use", + "▁innoc ent", + "▁a id", + "▁rem ind", + "▁sla ves", + "r it", + "st airs", + "▁re ward", + "og raph", + "▁man age", + "▁dis h", + "▁through out", + "▁wa ves", + "▁jud gment", + "▁arri val", + "▁cho ice", + "▁unhapp y", + "ast ic", + "▁bl ank", + "▁adv ance", + "▁inform ed", + "▁acquaint ance", + "▁impress ion", + "▁myster ious", + "b b", + "▁a ra", + "▁not es", + "▁had n't", + "▁se ll", + "▁com r", + "▁im pl", + "▁ind ust", + "▁end ed", + "▁light s", + "▁nur se", + "▁s out", + "▁b ought", + "▁f red", + "▁mar ked", + "▁sc ream", + "me nd", + "▁une as", + "▁delic ate", + "▁we ary", + "est ic", + "▁prom pt", + "▁exper i", + "▁hung ry", + "▁fly ing", + "▁p ow", + "▁br idge", + "▁jo in", + "▁vis ible", + "▁understand ing", + "▁cry ing", + "▁avo id", + "▁t is", + "▁st iff", + "ac hes", + "▁rest r", + "▁sound s", + "▁b owed", + "▁c aut", + "▁good s", + "▁dav id", + "▁un able", + "▁you' d", + "ham ed", + "▁b os", + "er al", + "▁as hamed", + "▁some where", + "▁inf inite", + "ock s", + "▁dign ity", + "▁g ay", + "▁v ic", + "▁am id", + "▁ho llow", + "▁em otion", + "▁adm itted", + "▁parent s", + "▁w ra", + "▁h int", + "▁tem ple", + "▁comfort able", + "▁intellig ence", + "or ous", + "▁be aring", + "▁her s", + "ab eth", + "▁rem ains", + "▁cont em", + "▁set tle", + "▁imm ense", + "f fe", + "p her", + "▁c her", + "ld om", + "▁we ap", + "ul ated", + "▁light ed", + "gy pt", + "▁advent ure", + "▁thorough ly", + "▁e gypt", + "il st", + "ang es", + "▁ob t", + "▁friend ly", + "▁reck on", + "▁stup id", + "▁f ed", + "▁r ome", + "▁me al", + "▁int ention", + "▁return ing", + "▁conv in", + "▁c oo", + "le ction", + "▁as h", + "ac hel", + "▁ro pe", + "▁pr ice", + "▁pro ject", + "el t", + "row s", + "▁sec ure", + "▁esc aped", + "▁hop es", + "▁eliz abeth", + "▁saf ety", + "▁w ound", + "▁su p", + "▁un us", + "ons cious", + "▁hor ri", + "▁min ister", + "▁o x", + "ll a", + "ens ive", + "▁help ed", + "▁plain ly", + "▁se ldom", + "▁think s", + "▁fellow s", + "▁m ood", + "▁p ushed", + "▁exh ib", + "ing ing", + "▁th under", + "au d", + "ian a", + "▁fair ly", + "▁eld er", + "▁egg s", + "ir m", + "▁maid en", + "m other", + "▁appe ars", + "▁chee ks", + "▁w on", + "▁e ase", + "▁re du", + "▁sk ill", + "▁ext ent", + "▁pract ice", + "▁relig ious", + "▁becom ing", + "▁virg in", + "▁feat ures", + "▁t ied", + "▁when ce", + "▁some how", + "▁gre et", + "▁faith ful", + "▁concer ned", + "▁the at", + "▁b ishop", + "▁p ink", + "▁eager ly", + "re es", + "▁e ating", + "▁was te", + "▁r ank", + "▁fe m", + "▁br ide", + "▁un l", + "ott ed", + "cei ving", + "▁tri b", + "▁orig inal", + "▁concer ning", + "▁ha b", + "▁acc ustomed", + "▁pat ient", + "▁rec om", + "▁ce ll", + "oint ment", + "▁arr anged", + "v ille", + "it ure", + "▁who lly", + "▁old er", + "▁col our", + "▁prov ided", + "▁at e", + "▁part ly", + "▁mon t", + "olog y", + "▁pros pect", + "▁cere mon", + "▁ ze", + "▁l aughter", + "▁fe e", + "▁br anches", + "▁fl ed", + "r ight", + "▁wh ilst", + "▁sl ipped", + "▁viol ent", + "▁inhab it", + "▁s ons", + "▁eng age", + "▁unc om", + "▁deep ly", + "▁subst ance", + "▁t ale", + "▁t iny", + "▁d an", + "▁g a", + "▁be e", + "▁y ards", + "ick s", + "▁hast ily", + "he ld", + "▁w es", + "▁v ague", + "▁am use", + "▁mu d", + "▁wo lf", + "▁h ans", + "ill ing", + "▁supp ly", + "▁sil k", + "▁const antly", + "▁christ mas", + "▁mill ion", + "▁whis per", + "▁m ental", + "▁was hing", + "ver se", + "▁cl oth", + "▁bar on", + "▁cor resp", + "▁nodd ed", + "▁corresp ond", + "k a", + "▁he ll", + "▁g ain", + "▁r ust", + "▁ob tain", + "▁unc onscious", + "▁strugg le", + "▁estab lished", + "▁law y", + "ol s", + "▁sign s", + "▁ut tered", + "▁rom an", + "▁constit ution", + "p es", + "▁c ave", + "▁sp are", + "▁qu ant", + "▁im age", + "▁mer ry", + "▁treat ed", + "▁effort s", + "▁lone ly", + "r ated", + "▁n ut", + "▁gl anced", + "▁port ion", + "it or", + "▁re semb", + "▁with d", + "▁me ad", + "▁fe ast", + "▁pr im", + "▁cl iff", + "▁em er", + "▁prop ortion", + "▁consider ation", + "▁hast e", + "▁gaz e", + "▁sav age", + "▁c rew", + "▁to wer", + "▁l ack", + "▁cons cience", + "▁mer cy", + "▁exh a", + "▁cons ent", + "at ors", + "ur d", + "▁out l", + "▁cl o", + "▁ad op", + "▁among st", + "▁h anging", + "▁circ le", + "▁prep ar", + "▁brill iant", + "f l", + "▁g ained", + "▁r ow", + "▁tr oops", + "▁rep ro", + "▁m ing", + "ou l", + "▁d ared", + "▁l ion", + "▁jo e", + "▁wind s", + "▁bring ing", + "▁anx iety", + "▁bill y", + "▁consequ ence", + "f ice", + "p se", + "▁f ought", + "▁p red", + "▁sc ra", + "▁gl im", + "▁vict ory", + "p ed", + "▁r ab", + "▁sc ot", + "▁ob v", + "▁sh ock", + "ch an", + "▁kn ock", + "our se", + "▁hand ed", + "▁ind ul", + "▁pat ience", + "▁sout her", + "▁j ose", + "▁fe ver", + "▁ro lled", + "ict ed", + "▁set ting", + "▁profess ion", + "▁sylv ia", + "▁h un", + "ut ions", + "▁fe ared", + "▁br and", + "▁bo ots", + "▁fore head", + "▁princi ples", + "▁s ink", + "▁r ig", + "av al", + "▁pur ch", + "▁gaz ed", + "▁employ ed", + "▁murm ured", + "m ore", + "▁s ar", + "as hing", + "ur al", + "ac les", + "▁tr ad", + "▁act ive", + "▁bene f", + "▁bott le", + "▁r age", + "▁inv est", + "▁lu x", + "▁s ank", + "▁h ang", + "▁be ard", + "ent ial", + "▁lo ving", + "▁nat ive", + "▁inst ruct", + "▁wa ist", + "▁rel ation", + "▁disco very", + "▁mel an", + "▁nerv ous", + "▁obt ained", + "▁p ig", + "▁se ar", + "▁fl ag", + "▁tra il", + "▁distingu ished", + "▁st ared", + "▁mis ery", + "▁pr int", + "▁gu il", + "▁jump ed", + "▁sw im", + "▁appro aching", + "▁suspic ion", + "▁i v", + "▁man aged", + "ak er", + "▁te ach", + "▁mat ch", + "▁guil ty", + "▁w retched", + "▁r um", + "▁comp ar", + "▁the ory", + "▁s her", + "▁b ree", + "▁k ings", + "▁sh one", + "ather ine", + "▁thr one", + "▁show ing", + "aw s", + "▁rob in", + "▁emb ar", + "ut ation", + "▁woman 's", + "▁add ressed", + "▁prot est", + "▁admir ation", + "▁troub led", + "▁ug ly", + "o om", + "er ves", + "▁fl ung", + "▁sub s", + "▁rel ie", + "▁thousand s", + "n ce", + "▁o d", + "▁cur rent", + "▁wood en", + "▁sacri fice", + "ur ity", + "ci p", + "▁pe ar", + "▁far mer", + "▁need s", + "▁cond em", + "▁mem ber", + "▁b ade", + "▁d ancing", + "▁re asons", + "▁cons ult", + "▁sw all", + "▁shad ows", + "▁ange l", + "▁ninet een", + "▁sty le", + "f ield", + "▁l an", + "▁man if", + "▁ro bert", + "▁gr ate", + "▁eng ine", + "▁wis dom", + "▁jes us", + "▁con vent", + "▁pre ced", + "▁interest s", + "▁tri al", + "b or", + "i ven", + "▁n est", + "▁ex ch", + "▁vo y", + "▁ill ust", + "▁wor ship", + "▁ad am", + "▁ph r", + "▁princip al", + "▁h it", + "▁spe nd", + "▁stand s", + "▁resp ons", + "▁a y", + "▁ha w", + "▁wh ist", + "▁ar rest", + "▁kind s", + "▁requ ire", + "▁descri bed", + "▁l it", + "▁pre cise", + "▁prop osed", + "▁produ ce", + "▁utter ly", + "ul se", + "▁no vel", + "▁bl ame", + "▁cred it", + "▁p ause", + "os en", + "▁house hold", + "▁arm ed", + "▁follow s", + "up on", + "▁appro ach", + "▁nin ety", + "▁p ir", + "▁fl ore", + "iv ity", + "▁ref use", + "▁sens ible", + "cho ly", + "▁nation al", + "▁g rie", + "▁re ven", + "▁let 's", + "▁delight ful", + "▁extreme ly", + "▁melan choly", + "u ing", + "▁en orm", + "cl es", + "▁slight ly", + "▁sac red", + "▁recogn ized", + "▁myst ery", + "▁g ri", + "▁comp re", + "▁dist ress", + "▁war ri", + "▁use less", + "▁tri f", + "▁mount ed", + "▁phil ip", + "▁ener gy", + "▁explan ation", + "▁c as", + "at ory", + "▁p our", + "▁r ic", + "▁ch osen", + "▁every one", + "umb led", + "▁a pr", + "▁c am", + "▁pro c", + "▁res umed", + "▁appre ci", + "▁alex and", + "▁a ven", + "▁w ing", + "▁int ense", + "▁high ly", + "▁lu cy", + "▁sol id", + "▁depart ure", + "▁agree able", + "▁exer cise", + "a pped", + "▁w ard", + "▁b ud", + "▁d well", + "ic ate", + "▁de ce", + "▁te acher", + "te nding", + "▁ma x", + "▁requ est", + "▁unex pected", + "▁jose ph", + "c ol", + "▁le ap", + "▁vict im", + "▁s ighed", + "▁for ces", + "ch ie", + "▁fe ed", + "▁sp ort", + "▁dri ft", + "▁wed ding", + "▁brit ish", + "se c", + "▁att itude", + "▁vis ion", + "▁pi pe", + "▁to w", + "▁ha lt", + "▁man ners", + "▁te nd", + "▁fl ood", + "▁comm ission", + "▁gu ide", + "▁obser ve", + "▁conc ern", + "▁rus h", + "▁affect ed", + "f all", + "▁st ret", + "▁co ach", + "▁po ison", + "▁direct ed", + "▁med ic", + "▁g est", + "▁e cho", + "▁young er", + "▁conf usion", + "▁contin ue", + "▁par li", + "▁abs or", + "▁cent re", + "con om", + "▁horri ble", + "r ison", + "▁b ol", + "▁b ath", + "▁g own", + "▁by e", + "▁al oud", + "▁supp l", + "▁prof ound", + "▁er r", + "▁cheer ful", + "w orth", + "▁sent ence", + "▁mist aken", + "▁tor n", + "▁fig ures", + "▁accompan ied", + "▁c atherine", + "▁e conom", + "▁at m", + "▁sh aking", + "um ber", + "▁coun cil", + "l ot", + "▁as ce", + "il ities", + "▁sp ar", + "▁end s", + "▁stra w", + "▁knight s", + "▁atm osp", + "▁sh ade", + "▁br ow", + "▁sp ark", + "▁rest ed", + "▁sent iment", + "▁reco vered", + "▁subject s", + "▁dut ies", + "▁comp osed", + "▁sw ept", + "▁real ity", + "▁sing ular", + "▁trans p", + "▁loc ked", + "▁lou is", + "▁assist ance", + "▁w ake", + "re m", + "▁so vere", + "▁un p", + "▁lo ves", + "▁abs urd", + "▁soul s", + "▁immedi ate", + "▁rid ing", + "▁connect ion", + "▁chee k", + "▁magnific ent", + "▁e re", + "▁su gar", + "▁pl ans", + "▁pr ud", + "▁dis e", + "▁ad j", + "▁lean ing", + "▁surr ounded", + "▁we 've", + "▁or n", + "▁ro ll", + "▁pro ble", + "▁str ict", + "▁aw ake", + "▁pra ise", + "▁convin ced", + "▁re le", + "▁fr ame", + "▁bre aking", + "▁cur tain", + "▁stay ed", + "▁div ided", + "▁cra w", + "▁incl ined", + "▁prev ious", + "a ult", + "om en", + "▁st air", + "▁se es", + "▁pr on", + "bo ard", + "▁comple x", + "▁pray er", + "▁pier re", + "▁unfortun ate", + "g s", + "▁gen ius", + "▁incre ase", + "▁suffic iently", + "▁ban ks", + "▁revol ution", + "▁souther n", + "k i", + "o ke", + "▁a ust", + "ed y", + "▁l ing", + "▁count ess", + "▁sleep ing", + "▁dev oted", + "▁ut most", + "▁mark et", + "▁bos om", + "▁b ark", + "▁c ath", + "al t", + "ch ar", + "▁cl ock", + "▁hand ker", + "▁adm in", + "▁sens es", + "▁id ent", + "▁mid night", + "▁connect ed", + "▁perm itted", + "▁h id", + "▁f il", + "▁f aced", + "▁g ift", + "▁ch at", + "▁br id", + "▁nor ther", + "▁hor iz", + "▁colle ge", + "▁handker chief", + "is ions", + "▁re be", + "▁pol ic", + "▁ann ounced", + "oun ce", + "▁n ons", + "▁n urs", + "al es", + "▁fle et", + "▁rag ged", + "▁co ffe", + "▁part ies", + "▁del ay", + "▁sound ed", + "▁c ities", + "▁was h", + "▁app ointed", + "▁night s", + "▁inst it", + "▁god 's", + "▁stri king", + "▁gun s", + "▁aston ishment", + "▁merch ant", + "▁parli ament", + "n al", + "▁a x", + "at ched", + "▁p il", + "▁p age", + "if orm", + "▁pl ate", + "▁thir st", + "▁neg ro", + "▁ru in", + "▁inhabit ants", + "w in", + "ar f", + "▁r ib", + "▁add ition", + "▁arg ument", + "b our", + "▁t ad", + "▁sc en", + "▁gu ests", + "▁wonder ing", + "▁acquaint ed", + "▁int ent", + "pl ess", + "▁destroy ed", + "▁coffe e", + "in ent", + "le br", + "▁re nder", + "▁so b", + "▁de mon", + "▁des ir", + "ud ing", + "▁get s", + "▁ass ure", + "▁ra ise", + "▁shar ply", + "▁priv ile", + "▁alar m", + "▁mach ine", + "f ied", + "▁cont ract", + "▁del iber", + "▁dr own", + "▁after ward", + "▁gu est", + "▁concl usion", + "▁ris k", + "▁ignor ant", + "b ury", + "k ind", + "▁p ian", + "an 's", + "ur ies", + "▁so il", + "▁ref er", + "▁command ed", + "▁pract ical", + "▁to ss", + "▁of fe", + "▁be held", + "▁ar ist", + "▁quar ters", + "▁deg rees", + "▁fis her", + "▁nons ense", + "▁m c", + "is p", + "▁me chan", + "ke ep", + "▁doubt less", + "▁viol ence", + "▁neg lect", + "▁fol k", + "l iness", + "▁b ul", + "▁e aster", + "▁lo ft", + "▁cont ained", + "▁ref lection", + "▁ce lebr", + "▁lea f", + "▁concl uded", + "▁distr ict", + "i ation", + "r s", + "▁s cient", + "▁he 'd", + "▁sc orn", + "▁cr ack", + "▁ste ep", + "▁mut tered", + "▁estab lish", + "▁dar ling", + "▁and rew", + "▁ch im", + "qu is", + "▁qu ality", + "▁po lly", + "▁che ck", + "▁cra ft", + "▁trave ll", + "▁univers al", + "in ate", + "▁c ig", + "at ives", + "om p", + "ut en", + "▁j ac", + "▁jo b", + "▁sub m", + "▁read er", + "▁le is", + "▁em ph", + "▁surr ound", + "o x", + "p ent", + "it ate", + "▁ex tended", + "▁le v", + "▁over t", + "▁ret ired", + "▁pu zz", + "u able", + "▁li br", + "▁ch in", + "▁sp l", + "▁real ized", + "▁ca uses", + "▁pun ishment", + "▁phys ic", + "▁leis ure", + "c an", + "▁w ave", + "▁sh ake", + "▁char m", + "▁belong ed", + "m ber", + "▁b ones", + "▁g as", + "▁r ange", + "▁pre c", + "▁sm ell", + "▁may be", + "▁inv ited", + "▁troub les", + "▁t ables", + "an ch", + "ic ip", + "▁j une", + "▁ab o", + "▁ag es", + "▁any where", + "ff in", + "▁dr unk", + "▁proper ly", + "▁loc al", + "▁impro ve", + "▁atmosp here", + "▁d ir", + "▁he 'll", + "▁re b", + "▁r ang", + "▁comp ass", + "▁lie uten", + "▁lean ed", + "▁firm ly", + "▁n ations", + "▁ha y", + "▁we pt", + "▁r al", + "▁con ven", + "▁un iform", + "▁jul ia", + "e em", + "r ass", + "▁tr ack", + "▁comm er", + "▁bus hes", + "▁obs c", + "▁sort s", + "▁difficult ies", + "▁intellect ual", + "▁introdu ced", + "m ith", + "▁t ro", + "id ay", + "▁re ndered", + "▁r out", + "ad d", + "▁pl un", + "▁thr owing", + "▁hum ble", + "▁pol ite", + "▁num erous", + "▁move ments", + "▁success ful", + "▁cand le", + "▁separ ate", + "▁protect ion", + "▁thom as", + "▁enorm ous", + "▁un b", + "▁rep ub", + "▁sun sh", + "▁desce nded", + "▁unus ual", + "i ved", + "▁bl az", + "▁show s", + "▁sim pl", + "▁cat tle", + "▁cre pt", + "▁aston ished", + "▁desert ed", + "▁l ap", + "ar se", + "▁ne arest", + "ud es", + "▁ent ering", + "▁ide al", + "stand ing", + "nd ers", + "▁so re", + "ain e", + "▁cl os", + "▁our s", + "▁where ver", + "▁ter m", + "▁vis ited", + "▁cal cul", + "d s", + "▁b ase", + "▁g ates", + "▁st amp", + "▁li ber", + "▁offic ial", + "▁e rect", + "▁al t", + "el ia", + "▁har mon", + "▁pain ful", + "▁burn ed", + "▁repub lic", + "u er", + "▁l ately", + "▁it al", + "am m", + "▁te ar", + "▁act ions", + "▁fin al", + "▁start led", + "▁sens ation", + "▁fat al", + "ol ic", + "▁fl ash", + "▁app et", + "▁strong er", + "▁num bers", + "▁grat itude", + "▁fem ale", + "▁wes tern", + "l est" + ] + } +} \ No newline at end of file diff --git a/out/checkpoint-17000/tokenizer_config.json b/out/checkpoint-17000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0073e6415da746fc5c44a52e02785cb94510efa4 --- /dev/null +++ b/out/checkpoint-17000/tokenizer_config.json @@ -0,0 +1,9253 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|audio:0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|audio:1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|audio:2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "<|audio:3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "<|audio:4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "<|audio:5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "<|audio:6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "<|audio:7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "8": { + "content": "<|audio:8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9": { + "content": "<|audio:9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10": { + "content": "<|audio:10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11": { + "content": "<|audio:11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12": { + "content": "<|audio:12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13": { + "content": "<|audio:13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "14": { + "content": "<|audio:14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15": { + "content": "<|audio:15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "16": { + "content": "<|audio:16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17": { + "content": "<|audio:17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "18": { + "content": "<|audio:18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19": { + "content": "<|audio:19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20": { + "content": "<|audio:20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21": { + "content": "<|audio:21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22": { + "content": "<|audio:22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "23": { + "content": "<|audio:23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "24": { + "content": "<|audio:24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "25": { + "content": "<|audio:25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "26": { + "content": "<|audio:26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27": { + "content": "<|audio:27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "<|audio:28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "<|audio:29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "<|audio:30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "<|audio:31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "<|audio:32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "33": { + "content": "<|audio:33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34": { + "content": "<|audio:34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "35": { + "content": "<|audio:35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "36": { + "content": "<|audio:36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "37": { + "content": "<|audio:37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "38": { + "content": "<|audio:38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39": { + "content": "<|audio:39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "40": { + "content": "<|audio:40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "41": { + "content": "<|audio:41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "42": { + "content": "<|audio:42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43": { + "content": "<|audio:43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "44": { + "content": "<|audio:44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "45": { + "content": "<|audio:45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46": { + "content": "<|audio:46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47": { + "content": "<|audio:47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "48": { + "content": "<|audio:48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "49": { + "content": "<|audio:49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50": { + "content": "<|audio:50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "51": { + "content": "<|audio:51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52": { + "content": "<|audio:52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "53": { + "content": "<|audio:53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54": { + "content": "<|audio:54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "55": { + "content": "<|audio:55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "56": { + "content": "<|audio:56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "57": { + "content": "<|audio:57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58": { + "content": "<|audio:58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "59": { + "content": "<|audio:59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "60": { + "content": "<|audio:60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "61": { + "content": "<|audio:61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "62": { + "content": "<|audio:62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "63": { + "content": "<|audio:63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64": { + "content": "<|audio:64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "65": { + "content": "<|audio:65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66": { + "content": "<|audio:66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "67": { + "content": "<|audio:67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68": { + "content": "<|audio:68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "69": { + "content": "<|audio:69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70": { + "content": "<|audio:70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "71": { + "content": "<|audio:71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72": { + "content": "<|audio:72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "73": { + "content": "<|audio:73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74": { + "content": "<|audio:74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "75": { + "content": "<|audio:75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "76": { + "content": "<|audio:76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77": { + "content": "<|audio:77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "78": { + "content": "<|audio:78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "<|audio:79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "<|audio:80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81": { + "content": "<|audio:81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82": { + "content": "<|audio:82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "83": { + "content": "<|audio:83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84": { + "content": "<|audio:84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "85": { + "content": "<|audio:85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86": { + "content": "<|audio:86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "87": { + "content": "<|audio:87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "88": { + "content": "<|audio:88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "89": { + "content": "<|audio:89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "90": { + "content": "<|audio:90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "91": { + "content": "<|audio:91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92": { + "content": "<|audio:92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93": { + "content": "<|audio:93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94": { + "content": "<|audio:94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95": { + "content": "<|audio:95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "96": { + "content": "<|audio:96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "97": { + "content": "<|audio:97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "98": { + "content": "<|audio:98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "99": { + "content": "<|audio:99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100": { + "content": "<|audio:100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "<|audio:101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "<|audio:102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "<|audio:103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104": { + "content": "<|audio:104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "105": { + "content": "<|audio:105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "<|audio:106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "<|audio:107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "<|audio:108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "109": { + "content": "<|audio:109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110": { + "content": "<|audio:110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "111": { + "content": "<|audio:111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "112": { + "content": "<|audio:112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "113": { + "content": "<|audio:113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "114": { + "content": "<|audio:114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "115": { + "content": "<|audio:115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "116": { + "content": "<|audio:116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117": { + "content": "<|audio:117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118": { + "content": "<|audio:118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "119": { + "content": "<|audio:119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "120": { + "content": "<|audio:120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "121": { + "content": "<|audio:121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "122": { + "content": "<|audio:122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "123": { + "content": "<|audio:123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "124": { + "content": "<|audio:124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "125": { + "content": "<|audio:125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126": { + "content": "<|audio:126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "127": { + "content": "<|audio:127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128": { + "content": "<|audio:128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "129": { + "content": "<|audio:129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130": { + "content": "<|audio:130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "131": { + "content": "<|audio:131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "132": { + "content": "<|audio:132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "133": { + "content": "<|audio:133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "134": { + "content": "<|audio:134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "135": { + "content": "<|audio:135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "136": { + "content": "<|audio:136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "137": { + "content": "<|audio:137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "138": { + "content": "<|audio:138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "139": { + "content": "<|audio:139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "140": { + "content": "<|audio:140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "141": { + "content": "<|audio:141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "142": { + "content": "<|audio:142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "143": { + "content": "<|audio:143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "144": { + "content": "<|audio:144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "145": { + "content": "<|audio:145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "146": { + "content": "<|audio:146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147": { + "content": "<|audio:147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "148": { + "content": "<|audio:148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "149": { + "content": "<|audio:149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "150": { + "content": "<|audio:150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151": { + "content": "<|audio:151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152": { + "content": "<|audio:152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "153": { + "content": "<|audio:153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "154": { + "content": "<|audio:154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155": { + "content": "<|audio:155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "156": { + "content": "<|audio:156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "157": { + "content": "<|audio:157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158": { + "content": "<|audio:158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "159": { + "content": "<|audio:159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "160": { + "content": "<|audio:160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "161": { + "content": "<|audio:161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162": { + "content": "<|audio:162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "163": { + "content": "<|audio:163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "164": { + "content": "<|audio:164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "165": { + "content": "<|audio:165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166": { + "content": "<|audio:166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "167": { + "content": "<|audio:167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "168": { + "content": "<|audio:168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "169": { + "content": "<|audio:169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "170": { + "content": "<|audio:170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171": { + "content": "<|audio:171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172": { + "content": "<|audio:172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "173": { + "content": "<|audio:173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "174": { + "content": "<|audio:174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175": { + "content": "<|audio:175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "176": { + "content": "<|audio:176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177": { + "content": "<|audio:177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178": { + "content": "<|audio:178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "179": { + "content": "<|audio:179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "180": { + "content": "<|audio:180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "181": { + "content": "<|audio:181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "182": { + "content": "<|audio:182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "183": { + "content": "<|audio:183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "184": { + "content": "<|audio:184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "185": { + "content": "<|audio:185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186": { + "content": "<|audio:186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187": { + "content": "<|audio:187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "188": { + "content": "<|audio:188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "189": { + "content": "<|audio:189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "190": { + "content": "<|audio:190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "191": { + "content": "<|audio:191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "192": { + "content": "<|audio:192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "193": { + "content": "<|audio:193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "194": { + "content": "<|audio:194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "195": { + "content": "<|audio:195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "196": { + "content": "<|audio:196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "197": { + "content": "<|audio:197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "198": { + "content": "<|audio:198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199": { + "content": "<|audio:199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200": { + "content": "<|audio:200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "201": { + "content": "<|audio:201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "202": { + "content": "<|audio:202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "203": { + "content": "<|audio:203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "204": { + "content": "<|audio:204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "205": { + "content": "<|audio:205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "206": { + "content": "<|audio:206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "207": { + "content": "<|audio:207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "208": { + "content": "<|audio:208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "209": { + "content": "<|audio:209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "210": { + "content": "<|audio:210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "211": { + "content": "<|audio:211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "212": { + "content": "<|audio:212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "213": { + "content": "<|audio:213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "214": { + "content": "<|audio:214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "215": { + "content": "<|audio:215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "216": { + "content": "<|audio:216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "217": { + "content": "<|audio:217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "218": { + "content": "<|audio:218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "219": { + "content": "<|audio:219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "220": { + "content": "<|audio:220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "221": { + "content": "<|audio:221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "222": { + "content": "<|audio:222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "223": { + "content": "<|audio:223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "224": { + "content": "<|audio:224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "225": { + "content": "<|audio:225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "226": { + "content": "<|audio:226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "227": { + "content": "<|audio:227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "228": { + "content": "<|audio:228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "229": { + "content": "<|audio:229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "230": { + "content": "<|audio:230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "231": { + "content": "<|audio:231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "232": { + "content": "<|audio:232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "233": { + "content": "<|audio:233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "234": { + "content": "<|audio:234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "235": { + "content": "<|audio:235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "236": { + "content": "<|audio:236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "237": { + "content": "<|audio:237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "238": { + "content": "<|audio:238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "239": { + "content": "<|audio:239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "240": { + "content": "<|audio:240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "241": { + "content": "<|audio:241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "242": { + "content": "<|audio:242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "243": { + "content": "<|audio:243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "244": { + "content": "<|audio:244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "245": { + "content": "<|audio:245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "246": { + "content": "<|audio:246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "247": { + "content": "<|audio:247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "248": { + "content": "<|audio:248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "249": { + "content": "<|audio:249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250": { + "content": "<|audio:250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "251": { + "content": "<|audio:251|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "252": { + "content": "<|audio:252|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "253": { + "content": "<|audio:253|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "254": { + "content": "<|audio:254|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255": { + "content": "<|audio:255|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256": { + "content": "<|audio:256|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "257": { + "content": "<|audio:257|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "258": { + "content": "<|audio:258|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "<|audio:259|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "<|audio:260|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "<|audio:261|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "<|audio:262|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "<|audio:263|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "<|audio:264|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "<|audio:265|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "<|audio:266|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "<|audio:267|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "<|audio:268|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "<|audio:269|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "<|audio:270|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "<|audio:271|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "<|audio:272|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "<|audio:273|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "<|audio:274|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "<|audio:275|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "<|audio:276|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "<|audio:277|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "<|audio:278|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "<|audio:279|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "<|audio:280|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "<|audio:281|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "<|audio:282|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "<|audio:283|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "<|audio:284|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "<|audio:285|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "<|audio:286|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "<|audio:287|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "<|audio:288|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "<|audio:289|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "<|audio:290|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "<|audio:291|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "<|audio:292|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "<|audio:293|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "<|audio:294|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "<|audio:295|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "<|audio:296|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "<|audio:297|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "<|audio:298|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "<|audio:299|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "<|audio:300|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "<|audio:301|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "<|audio:302|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "<|audio:303|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "<|audio:304|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "<|audio:305|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "<|audio:306|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "<|audio:307|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "<|audio:308|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "<|audio:309|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "<|audio:310|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "<|audio:311|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "<|audio:312|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "<|audio:313|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "<|audio:314|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "<|audio:315|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "<|audio:316|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "<|audio:317|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "<|audio:318|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "<|audio:319|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "<|audio:320|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "<|audio:321|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "<|audio:322|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "<|audio:323|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "<|audio:324|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "<|audio:325|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "<|audio:326|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "<|audio:327|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "<|audio:328|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "<|audio:329|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "<|audio:330|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "<|audio:331|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "<|audio:332|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "<|audio:333|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "<|audio:334|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "<|audio:335|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "<|audio:336|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "<|audio:337|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "<|audio:338|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "<|audio:339|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "<|audio:340|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "<|audio:341|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "<|audio:342|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "<|audio:343|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "<|audio:344|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "<|audio:345|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "<|audio:346|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "<|audio:347|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "<|audio:348|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "<|audio:349|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "<|audio:350|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "<|audio:351|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "<|audio:352|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "<|audio:353|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "<|audio:354|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "<|audio:355|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "<|audio:356|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "<|audio:357|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "<|audio:358|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "<|audio:359|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "<|audio:360|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "<|audio:361|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "<|audio:362|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "<|audio:363|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "<|audio:364|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "<|audio:365|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "<|audio:366|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "<|audio:367|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "<|audio:368|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "<|audio:369|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "<|audio:370|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "<|audio:371|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "<|audio:372|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "<|audio:373|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "<|audio:374|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "<|audio:375|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "<|audio:376|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "<|audio:377|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "<|audio:378|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "<|audio:379|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "<|audio:380|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "<|audio:381|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "<|audio:382|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "<|audio:383|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "384": { + "content": "<|audio:384|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "385": { + "content": "<|audio:385|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "386": { + "content": "<|audio:386|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "387": { + "content": "<|audio:387|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "388": { + "content": "<|audio:388|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "389": { + "content": "<|audio:389|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "390": { + "content": "<|audio:390|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "391": { + "content": "<|audio:391|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "392": { + "content": "<|audio:392|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "393": { + "content": "<|audio:393|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "394": { + "content": "<|audio:394|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "395": { + "content": "<|audio:395|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "396": { + "content": "<|audio:396|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "397": { + "content": "<|audio:397|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "398": { + "content": "<|audio:398|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "399": { + "content": "<|audio:399|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "400": { + "content": "<|audio:400|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "401": { + "content": "<|audio:401|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "402": { + "content": "<|audio:402|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "403": { + "content": "<|audio:403|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "404": { + "content": "<|audio:404|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "405": { + "content": "<|audio:405|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "406": { + "content": "<|audio:406|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "407": { + "content": "<|audio:407|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "408": { + "content": "<|audio:408|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "409": { + "content": "<|audio:409|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "410": { + "content": "<|audio:410|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "411": { + "content": "<|audio:411|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "412": { + "content": "<|audio:412|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "413": { + "content": "<|audio:413|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "414": { + "content": "<|audio:414|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "415": { + "content": "<|audio:415|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "416": { + "content": "<|audio:416|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "417": { + "content": "<|audio:417|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "418": { + "content": "<|audio:418|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "419": { + "content": "<|audio:419|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "420": { + "content": "<|audio:420|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "421": { + "content": "<|audio:421|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "422": { + "content": "<|audio:422|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "423": { + "content": "<|audio:423|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "424": { + "content": "<|audio:424|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "425": { + "content": "<|audio:425|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "426": { + "content": "<|audio:426|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "427": { + "content": "<|audio:427|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "428": { + "content": "<|audio:428|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "429": { + "content": "<|audio:429|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "430": { + "content": "<|audio:430|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "431": { + "content": "<|audio:431|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "432": { + "content": "<|audio:432|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "433": { + "content": "<|audio:433|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "434": { + "content": "<|audio:434|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "435": { + "content": "<|audio:435|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "436": { + "content": "<|audio:436|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "437": { + "content": "<|audio:437|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "438": { + "content": "<|audio:438|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "439": { + "content": "<|audio:439|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "440": { + "content": "<|audio:440|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "441": { + "content": "<|audio:441|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "442": { + "content": "<|audio:442|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "443": { + "content": "<|audio:443|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "444": { + "content": "<|audio:444|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "445": { + "content": "<|audio:445|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "446": { + "content": "<|audio:446|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "447": { + "content": "<|audio:447|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "448": { + "content": "<|audio:448|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "449": { + "content": "<|audio:449|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "450": { + "content": "<|audio:450|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "451": { + "content": "<|audio:451|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "452": { + "content": "<|audio:452|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "453": { + "content": "<|audio:453|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "454": { + "content": "<|audio:454|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "455": { + "content": "<|audio:455|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "456": { + "content": "<|audio:456|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "457": { + "content": "<|audio:457|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "458": { + "content": "<|audio:458|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "459": { + "content": "<|audio:459|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "460": { + "content": "<|audio:460|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "461": { + "content": "<|audio:461|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "462": { + "content": "<|audio:462|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "463": { + "content": "<|audio:463|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "464": { + "content": "<|audio:464|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "465": { + "content": "<|audio:465|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "466": { + "content": "<|audio:466|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "467": { + "content": "<|audio:467|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "468": { + "content": "<|audio:468|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "469": { + "content": "<|audio:469|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "470": { + "content": "<|audio:470|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "471": { + "content": "<|audio:471|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "472": { + "content": "<|audio:472|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "473": { + "content": "<|audio:473|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "474": { + "content": "<|audio:474|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "475": { + "content": "<|audio:475|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "476": { + "content": "<|audio:476|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "477": { + "content": "<|audio:477|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "478": { + "content": "<|audio:478|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "479": { + "content": "<|audio:479|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "480": { + "content": "<|audio:480|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "481": { + "content": "<|audio:481|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "482": { + "content": "<|audio:482|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "483": { + "content": "<|audio:483|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "484": { + "content": "<|audio:484|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "485": { + "content": "<|audio:485|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "486": { + "content": "<|audio:486|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "487": { + "content": "<|audio:487|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "488": { + "content": "<|audio:488|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "489": { + "content": "<|audio:489|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "490": { + "content": "<|audio:490|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "491": { + "content": "<|audio:491|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "492": { + "content": "<|audio:492|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "493": { + "content": "<|audio:493|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "494": { + "content": "<|audio:494|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "495": { + "content": "<|audio:495|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "496": { + "content": "<|audio:496|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "497": { + "content": "<|audio:497|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "498": { + "content": "<|audio:498|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "499": { + "content": "<|audio:499|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "500": { + "content": "<|audio:500|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "501": { + "content": "<|audio:501|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "502": { + "content": "<|audio:502|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "503": { + "content": "<|audio:503|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "504": { + "content": "<|audio:504|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "505": { + "content": "<|audio:505|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "506": { + "content": "<|audio:506|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "507": { + "content": "<|audio:507|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "508": { + "content": "<|audio:508|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "509": { + "content": "<|audio:509|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "510": { + "content": "<|audio:510|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "511": { + "content": "<|audio:511|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "512": { + "content": "<|audio:512|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "513": { + "content": "<|audio:513|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "514": { + "content": "<|audio:514|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "515": { + "content": "<|audio:515|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "516": { + "content": "<|audio:516|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "517": { + "content": "<|audio:517|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "518": { + "content": "<|audio:518|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "519": { + "content": "<|audio:519|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "520": { + "content": "<|audio:520|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "521": { + "content": "<|audio:521|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "522": { + "content": "<|audio:522|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "523": { + "content": "<|audio:523|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "524": { + "content": "<|audio:524|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "525": { + "content": "<|audio:525|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "526": { + "content": "<|audio:526|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "527": { + "content": "<|audio:527|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "528": { + "content": "<|audio:528|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "529": { + "content": "<|audio:529|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "530": { + "content": "<|audio:530|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "531": { + "content": "<|audio:531|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "532": { + "content": "<|audio:532|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "533": { + "content": "<|audio:533|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "534": { + "content": "<|audio:534|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "535": { + "content": "<|audio:535|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "536": { + "content": "<|audio:536|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "537": { + "content": "<|audio:537|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "538": { + "content": "<|audio:538|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "539": { + "content": "<|audio:539|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "540": { + "content": "<|audio:540|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "541": { + "content": "<|audio:541|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "542": { + "content": "<|audio:542|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "543": { + "content": "<|audio:543|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "544": { + "content": "<|audio:544|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "545": { + "content": "<|audio:545|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "546": { + "content": "<|audio:546|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "547": { + "content": "<|audio:547|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "548": { + "content": "<|audio:548|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "549": { + "content": "<|audio:549|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "550": { + "content": "<|audio:550|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "551": { + "content": "<|audio:551|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "552": { + "content": "<|audio:552|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "553": { + "content": "<|audio:553|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "554": { + "content": "<|audio:554|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "555": { + "content": "<|audio:555|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "556": { + "content": "<|audio:556|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "557": { + "content": "<|audio:557|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "558": { + "content": "<|audio:558|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "559": { + "content": "<|audio:559|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "560": { + "content": "<|audio:560|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "561": { + "content": "<|audio:561|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "562": { + "content": "<|audio:562|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "563": { + "content": "<|audio:563|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "564": { + "content": "<|audio:564|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "565": { + "content": "<|audio:565|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "566": { + "content": "<|audio:566|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "567": { + "content": "<|audio:567|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "568": { + "content": "<|audio:568|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "569": { + "content": "<|audio:569|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "570": { + "content": "<|audio:570|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "571": { + "content": "<|audio:571|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "572": { + "content": "<|audio:572|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "573": { + "content": "<|audio:573|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "574": { + "content": "<|audio:574|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "575": { + "content": "<|audio:575|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "576": { + "content": "<|audio:576|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "577": { + "content": "<|audio:577|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "578": { + "content": "<|audio:578|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "579": { + "content": "<|audio:579|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "580": { + "content": "<|audio:580|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "581": { + "content": "<|audio:581|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "582": { + "content": "<|audio:582|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "583": { + "content": "<|audio:583|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "584": { + "content": "<|audio:584|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "585": { + "content": "<|audio:585|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "586": { + "content": "<|audio:586|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "587": { + "content": "<|audio:587|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "588": { + "content": "<|audio:588|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "589": { + "content": "<|audio:589|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "590": { + "content": "<|audio:590|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "591": { + "content": "<|audio:591|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "592": { + "content": "<|audio:592|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "593": { + "content": "<|audio:593|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "594": { + "content": "<|audio:594|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "595": { + "content": "<|audio:595|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "596": { + "content": "<|audio:596|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "597": { + "content": "<|audio:597|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "598": { + "content": "<|audio:598|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "599": { + "content": "<|audio:599|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "600": { + "content": "<|audio:600|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "601": { + "content": "<|audio:601|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "602": { + "content": "<|audio:602|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "603": { + "content": "<|audio:603|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "604": { + "content": "<|audio:604|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "605": { + "content": "<|audio:605|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "606": { + "content": "<|audio:606|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "607": { + "content": "<|audio:607|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "608": { + "content": "<|audio:608|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "609": { + "content": "<|audio:609|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "610": { + "content": "<|audio:610|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "611": { + "content": "<|audio:611|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "612": { + "content": "<|audio:612|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "613": { + "content": "<|audio:613|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "614": { + "content": "<|audio:614|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "615": { + "content": "<|audio:615|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "616": { + "content": "<|audio:616|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "617": { + "content": "<|audio:617|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "618": { + "content": "<|audio:618|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "619": { + "content": "<|audio:619|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "620": { + "content": "<|audio:620|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "621": { + "content": "<|audio:621|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "622": { + "content": "<|audio:622|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "623": { + "content": "<|audio:623|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "624": { + "content": "<|audio:624|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "625": { + "content": "<|audio:625|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "626": { + "content": "<|audio:626|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "627": { + "content": "<|audio:627|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "628": { + "content": "<|audio:628|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "629": { + "content": "<|audio:629|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "630": { + "content": "<|audio:630|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "631": { + "content": "<|audio:631|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "632": { + "content": "<|audio:632|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "633": { + "content": "<|audio:633|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "634": { + "content": "<|audio:634|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "635": { + "content": "<|audio:635|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "636": { + "content": "<|audio:636|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "637": { + "content": "<|audio:637|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "638": { + "content": "<|audio:638|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "639": { + "content": "<|audio:639|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "640": { + "content": "<|audio:640|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "641": { + "content": "<|audio:641|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "642": { + "content": "<|audio:642|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "643": { + "content": "<|audio:643|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "644": { + "content": "<|audio:644|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "645": { + "content": "<|audio:645|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "646": { + "content": "<|audio:646|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "647": { + "content": "<|audio:647|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "648": { + "content": "<|audio:648|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "649": { + "content": "<|audio:649|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "650": { + "content": "<|audio:650|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "651": { + "content": "<|audio:651|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "652": { + "content": "<|audio:652|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "653": { + "content": "<|audio:653|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "654": { + "content": "<|audio:654|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "655": { + "content": "<|audio:655|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "656": { + "content": "<|audio:656|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "657": { + "content": "<|audio:657|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "658": { + "content": "<|audio:658|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "659": { + "content": "<|audio:659|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "660": { + "content": "<|audio:660|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "661": { + "content": "<|audio:661|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "662": { + "content": "<|audio:662|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "663": { + "content": "<|audio:663|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "664": { + "content": "<|audio:664|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "665": { + "content": "<|audio:665|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "666": { + "content": "<|audio:666|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "667": { + "content": "<|audio:667|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "668": { + "content": "<|audio:668|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "669": { + "content": "<|audio:669|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "670": { + "content": "<|audio:670|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "671": { + "content": "<|audio:671|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "672": { + "content": "<|audio:672|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "673": { + "content": "<|audio:673|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "674": { + "content": "<|audio:674|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "675": { + "content": "<|audio:675|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "676": { + "content": "<|audio:676|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "677": { + "content": "<|audio:677|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "678": { + "content": "<|audio:678|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "679": { + "content": "<|audio:679|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "680": { + "content": "<|audio:680|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "681": { + "content": "<|audio:681|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "682": { + "content": "<|audio:682|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "683": { + "content": "<|audio:683|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "684": { + "content": "<|audio:684|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "685": { + "content": "<|audio:685|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "686": { + "content": "<|audio:686|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "687": { + "content": "<|audio:687|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "688": { + "content": "<|audio:688|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "689": { + "content": "<|audio:689|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "690": { + "content": "<|audio:690|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "691": { + "content": "<|audio:691|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "692": { + "content": "<|audio:692|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "693": { + "content": "<|audio:693|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "694": { + "content": "<|audio:694|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "695": { + "content": "<|audio:695|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "696": { + "content": "<|audio:696|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "697": { + "content": "<|audio:697|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "698": { + "content": "<|audio:698|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "699": { + "content": "<|audio:699|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "700": { + "content": "<|audio:700|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "701": { + "content": "<|audio:701|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "702": { + "content": "<|audio:702|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "703": { + "content": "<|audio:703|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "704": { + "content": "<|audio:704|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "705": { + "content": "<|audio:705|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "706": { + "content": "<|audio:706|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "707": { + "content": "<|audio:707|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "708": { + "content": "<|audio:708|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "709": { + "content": "<|audio:709|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "710": { + "content": "<|audio:710|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "711": { + "content": "<|audio:711|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "712": { + "content": "<|audio:712|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "713": { + "content": "<|audio:713|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "714": { + "content": "<|audio:714|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "715": { + "content": "<|audio:715|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "716": { + "content": "<|audio:716|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "717": { + "content": "<|audio:717|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "718": { + "content": "<|audio:718|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "719": { + "content": "<|audio:719|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "720": { + "content": "<|audio:720|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "721": { + "content": "<|audio:721|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "722": { + "content": "<|audio:722|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "723": { + "content": "<|audio:723|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "724": { + "content": "<|audio:724|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "725": { + "content": "<|audio:725|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "726": { + "content": "<|audio:726|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "727": { + "content": "<|audio:727|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "728": { + "content": "<|audio:728|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "729": { + "content": "<|audio:729|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "730": { + "content": "<|audio:730|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "731": { + "content": "<|audio:731|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "732": { + "content": "<|audio:732|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "733": { + "content": "<|audio:733|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "734": { + "content": "<|audio:734|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "735": { + "content": "<|audio:735|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "736": { + "content": "<|audio:736|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "737": { + "content": "<|audio:737|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "738": { + "content": "<|audio:738|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "739": { + "content": "<|audio:739|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "740": { + "content": "<|audio:740|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "741": { + "content": "<|audio:741|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "742": { + "content": "<|audio:742|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "743": { + "content": "<|audio:743|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "744": { + "content": "<|audio:744|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "745": { + "content": "<|audio:745|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "746": { + "content": "<|audio:746|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "747": { + "content": "<|audio:747|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "748": { + "content": "<|audio:748|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "749": { + "content": "<|audio:749|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "750": { + "content": "<|audio:750|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "751": { + "content": "<|audio:751|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "752": { + "content": "<|audio:752|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "753": { + "content": "<|audio:753|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "754": { + "content": "<|audio:754|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "755": { + "content": "<|audio:755|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "756": { + "content": "<|audio:756|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "757": { + "content": "<|audio:757|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "758": { + "content": "<|audio:758|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "759": { + "content": "<|audio:759|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "760": { + "content": "<|audio:760|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "761": { + "content": "<|audio:761|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "762": { + "content": "<|audio:762|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "763": { + "content": "<|audio:763|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "764": { + "content": "<|audio:764|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "765": { + "content": "<|audio:765|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "766": { + "content": "<|audio:766|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "767": { + "content": "<|audio:767|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "768": { + "content": "<|audio:768|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "769": { + "content": "<|audio:769|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "770": { + "content": "<|audio:770|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "771": { + "content": "<|audio:771|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "772": { + "content": "<|audio:772|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "773": { + "content": "<|audio:773|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "774": { + "content": "<|audio:774|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "775": { + "content": "<|audio:775|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "776": { + "content": "<|audio:776|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "777": { + "content": "<|audio:777|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "778": { + "content": "<|audio:778|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "779": { + "content": "<|audio:779|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "780": { + "content": "<|audio:780|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "781": { + "content": "<|audio:781|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "782": { + "content": "<|audio:782|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "783": { + "content": "<|audio:783|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "784": { + "content": "<|audio:784|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "785": { + "content": "<|audio:785|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "786": { + "content": "<|audio:786|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "787": { + "content": "<|audio:787|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "788": { + "content": "<|audio:788|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "789": { + "content": "<|audio:789|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "790": { + "content": "<|audio:790|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "791": { + "content": "<|audio:791|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "792": { + "content": "<|audio:792|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "793": { + "content": "<|audio:793|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "794": { + "content": "<|audio:794|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "795": { + "content": "<|audio:795|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "796": { + "content": "<|audio:796|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "797": { + "content": "<|audio:797|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "798": { + "content": "<|audio:798|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "799": { + "content": "<|audio:799|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "800": { + "content": "<|audio:800|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "801": { + "content": "<|audio:801|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "802": { + "content": "<|audio:802|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "803": { + "content": "<|audio:803|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "804": { + "content": "<|audio:804|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "805": { + "content": "<|audio:805|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "806": { + "content": "<|audio:806|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "807": { + "content": "<|audio:807|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "808": { + "content": "<|audio:808|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "809": { + "content": "<|audio:809|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "810": { + "content": "<|audio:810|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "811": { + "content": "<|audio:811|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "812": { + "content": "<|audio:812|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "813": { + "content": "<|audio:813|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "814": { + "content": "<|audio:814|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "815": { + "content": "<|audio:815|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "816": { + "content": "<|audio:816|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "817": { + "content": "<|audio:817|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "818": { + "content": "<|audio:818|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "819": { + "content": "<|audio:819|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "820": { + "content": "<|audio:820|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "821": { + "content": "<|audio:821|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "822": { + "content": "<|audio:822|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "823": { + "content": "<|audio:823|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "824": { + "content": "<|audio:824|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "825": { + "content": "<|audio:825|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "826": { + "content": "<|audio:826|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "827": { + "content": "<|audio:827|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "828": { + "content": "<|audio:828|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "829": { + "content": "<|audio:829|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "830": { + "content": "<|audio:830|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "831": { + "content": "<|audio:831|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "832": { + "content": "<|audio:832|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "833": { + "content": "<|audio:833|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "834": { + "content": "<|audio:834|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "835": { + "content": "<|audio:835|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "836": { + "content": "<|audio:836|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "837": { + "content": "<|audio:837|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "838": { + "content": "<|audio:838|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "839": { + "content": "<|audio:839|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "840": { + "content": "<|audio:840|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "841": { + "content": "<|audio:841|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "842": { + "content": "<|audio:842|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "843": { + "content": "<|audio:843|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "844": { + "content": "<|audio:844|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "845": { + "content": "<|audio:845|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "846": { + "content": "<|audio:846|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "847": { + "content": "<|audio:847|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "848": { + "content": "<|audio:848|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "849": { + "content": "<|audio:849|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "850": { + "content": "<|audio:850|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "851": { + "content": "<|audio:851|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "852": { + "content": "<|audio:852|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "853": { + "content": "<|audio:853|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "854": { + "content": "<|audio:854|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "855": { + "content": "<|audio:855|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "856": { + "content": "<|audio:856|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "857": { + "content": "<|audio:857|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "858": { + "content": "<|audio:858|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "859": { + "content": "<|audio:859|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "860": { + "content": "<|audio:860|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "861": { + "content": "<|audio:861|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "862": { + "content": "<|audio:862|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "863": { + "content": "<|audio:863|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "864": { + "content": "<|audio:864|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "865": { + "content": "<|audio:865|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "866": { + "content": "<|audio:866|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "867": { + "content": "<|audio:867|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "868": { + "content": "<|audio:868|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "869": { + "content": "<|audio:869|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "870": { + "content": "<|audio:870|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "871": { + "content": "<|audio:871|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "872": { + "content": "<|audio:872|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "873": { + "content": "<|audio:873|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "874": { + "content": "<|audio:874|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "875": { + "content": "<|audio:875|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "876": { + "content": "<|audio:876|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "877": { + "content": "<|audio:877|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "878": { + "content": "<|audio:878|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "879": { + "content": "<|audio:879|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "880": { + "content": "<|audio:880|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "881": { + "content": "<|audio:881|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "882": { + "content": "<|audio:882|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "883": { + "content": "<|audio:883|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "884": { + "content": "<|audio:884|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "885": { + "content": "<|audio:885|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "886": { + "content": "<|audio:886|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "887": { + "content": "<|audio:887|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "888": { + "content": "<|audio:888|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "889": { + "content": "<|audio:889|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "890": { + "content": "<|audio:890|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "891": { + "content": "<|audio:891|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "892": { + "content": "<|audio:892|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "893": { + "content": "<|audio:893|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "894": { + "content": "<|audio:894|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "895": { + "content": "<|audio:895|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "896": { + "content": "<|audio:896|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "897": { + "content": "<|audio:897|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "898": { + "content": "<|audio:898|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "899": { + "content": "<|audio:899|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "900": { + "content": "<|audio:900|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "901": { + "content": "<|audio:901|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "902": { + "content": "<|audio:902|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "903": { + "content": "<|audio:903|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "904": { + "content": "<|audio:904|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "905": { + "content": "<|audio:905|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "906": { + "content": "<|audio:906|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "907": { + "content": "<|audio:907|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "908": { + "content": "<|audio:908|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "909": { + "content": "<|audio:909|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "910": { + "content": "<|audio:910|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "911": { + "content": "<|audio:911|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "912": { + "content": "<|audio:912|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "913": { + "content": "<|audio:913|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "914": { + "content": "<|audio:914|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "915": { + "content": "<|audio:915|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "916": { + "content": "<|audio:916|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "917": { + "content": "<|audio:917|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "918": { + "content": "<|audio:918|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "919": { + "content": "<|audio:919|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "920": { + "content": "<|audio:920|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "921": { + "content": "<|audio:921|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "922": { + "content": "<|audio:922|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "923": { + "content": "<|audio:923|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "924": { + "content": "<|audio:924|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "925": { + "content": "<|audio:925|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "926": { + "content": "<|audio:926|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "927": { + "content": "<|audio:927|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "928": { + "content": "<|audio:928|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "929": { + "content": "<|audio:929|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "930": { + "content": "<|audio:930|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "931": { + "content": "<|audio:931|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "932": { + "content": "<|audio:932|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "933": { + "content": "<|audio:933|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "934": { + "content": "<|audio:934|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "935": { + "content": "<|audio:935|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "936": { + "content": "<|audio:936|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "937": { + "content": "<|audio:937|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "938": { + "content": "<|audio:938|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "939": { + "content": "<|audio:939|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "940": { + "content": "<|audio:940|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "941": { + "content": "<|audio:941|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "942": { + "content": "<|audio:942|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "943": { + "content": "<|audio:943|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "944": { + "content": "<|audio:944|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "945": { + "content": "<|audio:945|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "946": { + "content": "<|audio:946|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "947": { + "content": "<|audio:947|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "948": { + "content": "<|audio:948|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "949": { + "content": "<|audio:949|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "950": { + "content": "<|audio:950|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "951": { + "content": "<|audio:951|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "952": { + "content": "<|audio:952|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "953": { + "content": "<|audio:953|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "954": { + "content": "<|audio:954|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "955": { + "content": "<|audio:955|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "956": { + "content": "<|audio:956|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "957": { + "content": "<|audio:957|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "958": { + "content": "<|audio:958|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "959": { + "content": "<|audio:959|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "960": { + "content": "<|audio:960|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "961": { + "content": "<|audio:961|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "962": { + "content": "<|audio:962|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "963": { + "content": "<|audio:963|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "964": { + "content": "<|audio:964|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "965": { + "content": "<|audio:965|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "966": { + "content": "<|audio:966|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "967": { + "content": "<|audio:967|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "968": { + "content": "<|audio:968|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "969": { + "content": "<|audio:969|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "970": { + "content": "<|audio:970|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "971": { + "content": "<|audio:971|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "972": { + "content": "<|audio:972|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "973": { + "content": "<|audio:973|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "974": { + "content": "<|audio:974|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "975": { + "content": "<|audio:975|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "976": { + "content": "<|audio:976|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "977": { + "content": "<|audio:977|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "978": { + "content": "<|audio:978|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "979": { + "content": "<|audio:979|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "980": { + "content": "<|audio:980|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "981": { + "content": "<|audio:981|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "982": { + "content": "<|audio:982|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "983": { + "content": "<|audio:983|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "984": { + "content": "<|audio:984|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "985": { + "content": "<|audio:985|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "986": { + "content": "<|audio:986|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "987": { + "content": "<|audio:987|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "988": { + "content": "<|audio:988|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "989": { + "content": "<|audio:989|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "990": { + "content": "<|audio:990|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "991": { + "content": "<|audio:991|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "992": { + "content": "<|audio:992|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "993": { + "content": "<|audio:993|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "994": { + "content": "<|audio:994|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "995": { + "content": "<|audio:995|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "996": { + "content": "<|audio:996|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "997": { + "content": "<|audio:997|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "998": { + "content": "<|audio:998|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "999": { + "content": "<|audio:999|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1000": { + "content": "<|audio:1000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1001": { + "content": "<|audio:1001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1002": { + "content": "<|audio:1002|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1003": { + "content": "<|audio:1003|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1004": { + "content": "<|audio:1004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1005": { + "content": "<|audio:1005|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1006": { + "content": "<|audio:1006|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1007": { + "content": "<|audio:1007|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1008": { + "content": "<|audio:1008|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1009": { + "content": "<|audio:1009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1010": { + "content": "<|audio:1010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1011": { + "content": "<|audio:1011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1012": { + "content": "<|audio:1012|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1013": { + "content": "<|audio:1013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1014": { + "content": "<|audio:1014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1015": { + "content": "<|audio:1015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1016": { + "content": "<|audio:1016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1017": { + "content": "<|audio:1017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1018": { + "content": "<|audio:1018|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1019": { + "content": "<|audio:1019|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1020": { + "content": "<|audio:1020|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1021": { + "content": "<|audio:1021|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1022": { + "content": "<|audio:1022|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1023": { + "content": "<|audio:1023|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1024": { + "content": "<|startoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1025": { + "content": "<|endoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1026": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "model_max_length": 1877, + "pad_token": "<|padding|>", + "special_tokens": [ + "<|audio:0|>", + "<|audio:1|>", + "<|audio:2|>", + "<|audio:3|>", + "<|audio:4|>", + "<|audio:5|>", + "<|audio:6|>", + "<|audio:7|>", + "<|audio:8|>", + "<|audio:9|>", + "<|audio:10|>", + "<|audio:11|>", + "<|audio:12|>", + "<|audio:13|>", + "<|audio:14|>", + "<|audio:15|>", + "<|audio:16|>", + "<|audio:17|>", + "<|audio:18|>", + "<|audio:19|>", + "<|audio:20|>", + "<|audio:21|>", + "<|audio:22|>", + "<|audio:23|>", + "<|audio:24|>", + "<|audio:25|>", + "<|audio:26|>", + "<|audio:27|>", + "<|audio:28|>", + "<|audio:29|>", + "<|audio:30|>", + "<|audio:31|>", + "<|audio:32|>", + "<|audio:33|>", + "<|audio:34|>", + "<|audio:35|>", + "<|audio:36|>", + "<|audio:37|>", + "<|audio:38|>", + "<|audio:39|>", + "<|audio:40|>", + "<|audio:41|>", + "<|audio:42|>", + "<|audio:43|>", + "<|audio:44|>", + "<|audio:45|>", + "<|audio:46|>", + "<|audio:47|>", + "<|audio:48|>", + "<|audio:49|>", + "<|audio:50|>", + "<|audio:51|>", + "<|audio:52|>", + "<|audio:53|>", + "<|audio:54|>", + "<|audio:55|>", + "<|audio:56|>", + "<|audio:57|>", + "<|audio:58|>", + "<|audio:59|>", + "<|audio:60|>", + "<|audio:61|>", + "<|audio:62|>", + "<|audio:63|>", + "<|audio:64|>", + "<|audio:65|>", + "<|audio:66|>", + "<|audio:67|>", + "<|audio:68|>", + "<|audio:69|>", + "<|audio:70|>", + "<|audio:71|>", + "<|audio:72|>", + "<|audio:73|>", + "<|audio:74|>", + "<|audio:75|>", + "<|audio:76|>", + "<|audio:77|>", + "<|audio:78|>", + "<|audio:79|>", + "<|audio:80|>", + "<|audio:81|>", + "<|audio:82|>", + "<|audio:83|>", + "<|audio:84|>", + "<|audio:85|>", + "<|audio:86|>", + "<|audio:87|>", + "<|audio:88|>", + "<|audio:89|>", + "<|audio:90|>", + "<|audio:91|>", + "<|audio:92|>", + "<|audio:93|>", + "<|audio:94|>", + "<|audio:95|>", + "<|audio:96|>", + "<|audio:97|>", + "<|audio:98|>", + "<|audio:99|>", + "<|audio:100|>", + "<|audio:101|>", + "<|audio:102|>", + "<|audio:103|>", + "<|audio:104|>", + "<|audio:105|>", + "<|audio:106|>", + "<|audio:107|>", + "<|audio:108|>", + "<|audio:109|>", + "<|audio:110|>", + "<|audio:111|>", + "<|audio:112|>", + "<|audio:113|>", + "<|audio:114|>", + "<|audio:115|>", + "<|audio:116|>", + "<|audio:117|>", + "<|audio:118|>", + "<|audio:119|>", + "<|audio:120|>", + "<|audio:121|>", + "<|audio:122|>", + "<|audio:123|>", + "<|audio:124|>", + "<|audio:125|>", + "<|audio:126|>", + "<|audio:127|>", + "<|audio:128|>", + "<|audio:129|>", + "<|audio:130|>", + "<|audio:131|>", + "<|audio:132|>", + "<|audio:133|>", + "<|audio:134|>", + "<|audio:135|>", + "<|audio:136|>", + "<|audio:137|>", + "<|audio:138|>", + "<|audio:139|>", + "<|audio:140|>", + "<|audio:141|>", + "<|audio:142|>", + "<|audio:143|>", + "<|audio:144|>", + "<|audio:145|>", + "<|audio:146|>", + "<|audio:147|>", + "<|audio:148|>", + "<|audio:149|>", + "<|audio:150|>", + "<|audio:151|>", + "<|audio:152|>", + "<|audio:153|>", + "<|audio:154|>", + "<|audio:155|>", + "<|audio:156|>", + "<|audio:157|>", + "<|audio:158|>", + "<|audio:159|>", + "<|audio:160|>", + "<|audio:161|>", + "<|audio:162|>", + "<|audio:163|>", + "<|audio:164|>", + "<|audio:165|>", + "<|audio:166|>", + "<|audio:167|>", + "<|audio:168|>", + "<|audio:169|>", + "<|audio:170|>", + "<|audio:171|>", + "<|audio:172|>", + "<|audio:173|>", + "<|audio:174|>", + "<|audio:175|>", + "<|audio:176|>", + "<|audio:177|>", + "<|audio:178|>", + "<|audio:179|>", + "<|audio:180|>", + "<|audio:181|>", + "<|audio:182|>", + "<|audio:183|>", + "<|audio:184|>", + "<|audio:185|>", + "<|audio:186|>", + "<|audio:187|>", + "<|audio:188|>", + "<|audio:189|>", + "<|audio:190|>", + "<|audio:191|>", + "<|audio:192|>", + "<|audio:193|>", + "<|audio:194|>", + "<|audio:195|>", + "<|audio:196|>", + "<|audio:197|>", + "<|audio:198|>", + "<|audio:199|>", + "<|audio:200|>", + "<|audio:201|>", + "<|audio:202|>", + "<|audio:203|>", + "<|audio:204|>", + "<|audio:205|>", + "<|audio:206|>", + "<|audio:207|>", + "<|audio:208|>", + "<|audio:209|>", + "<|audio:210|>", + "<|audio:211|>", + "<|audio:212|>", + "<|audio:213|>", + "<|audio:214|>", + "<|audio:215|>", + "<|audio:216|>", + "<|audio:217|>", + "<|audio:218|>", + "<|audio:219|>", + "<|audio:220|>", + "<|audio:221|>", + "<|audio:222|>", + "<|audio:223|>", + "<|audio:224|>", + "<|audio:225|>", + "<|audio:226|>", + "<|audio:227|>", + "<|audio:228|>", + "<|audio:229|>", + "<|audio:230|>", + "<|audio:231|>", + "<|audio:232|>", + "<|audio:233|>", + "<|audio:234|>", + "<|audio:235|>", + "<|audio:236|>", + "<|audio:237|>", + "<|audio:238|>", + "<|audio:239|>", + "<|audio:240|>", + "<|audio:241|>", + "<|audio:242|>", + "<|audio:243|>", + "<|audio:244|>", + "<|audio:245|>", + "<|audio:246|>", + "<|audio:247|>", + "<|audio:248|>", + "<|audio:249|>", + "<|audio:250|>", + "<|audio:251|>", + "<|audio:252|>", + "<|audio:253|>", + "<|audio:254|>", + "<|audio:255|>", + "<|audio:256|>", + "<|audio:257|>", + "<|audio:258|>", + "<|audio:259|>", + "<|audio:260|>", + "<|audio:261|>", + "<|audio:262|>", + "<|audio:263|>", + "<|audio:264|>", + "<|audio:265|>", + "<|audio:266|>", + "<|audio:267|>", + "<|audio:268|>", + "<|audio:269|>", + "<|audio:270|>", + "<|audio:271|>", + "<|audio:272|>", + "<|audio:273|>", + "<|audio:274|>", + "<|audio:275|>", + "<|audio:276|>", + "<|audio:277|>", + "<|audio:278|>", + "<|audio:279|>", + "<|audio:280|>", + "<|audio:281|>", + "<|audio:282|>", + "<|audio:283|>", + "<|audio:284|>", + "<|audio:285|>", + "<|audio:286|>", + "<|audio:287|>", + "<|audio:288|>", + "<|audio:289|>", + "<|audio:290|>", + "<|audio:291|>", + "<|audio:292|>", + "<|audio:293|>", + "<|audio:294|>", + "<|audio:295|>", + "<|audio:296|>", + "<|audio:297|>", + "<|audio:298|>", + "<|audio:299|>", + "<|audio:300|>", + "<|audio:301|>", + "<|audio:302|>", + "<|audio:303|>", + "<|audio:304|>", + "<|audio:305|>", + "<|audio:306|>", + "<|audio:307|>", + "<|audio:308|>", + "<|audio:309|>", + "<|audio:310|>", + "<|audio:311|>", + "<|audio:312|>", + "<|audio:313|>", + "<|audio:314|>", + "<|audio:315|>", + "<|audio:316|>", + "<|audio:317|>", + "<|audio:318|>", + "<|audio:319|>", + "<|audio:320|>", + "<|audio:321|>", + "<|audio:322|>", + "<|audio:323|>", + "<|audio:324|>", + "<|audio:325|>", + "<|audio:326|>", + "<|audio:327|>", + "<|audio:328|>", + "<|audio:329|>", + "<|audio:330|>", + "<|audio:331|>", + "<|audio:332|>", + "<|audio:333|>", + "<|audio:334|>", + "<|audio:335|>", + "<|audio:336|>", + "<|audio:337|>", + "<|audio:338|>", + "<|audio:339|>", + "<|audio:340|>", + "<|audio:341|>", + "<|audio:342|>", + "<|audio:343|>", + "<|audio:344|>", + "<|audio:345|>", + "<|audio:346|>", + "<|audio:347|>", + "<|audio:348|>", + "<|audio:349|>", + "<|audio:350|>", + "<|audio:351|>", + "<|audio:352|>", + "<|audio:353|>", + "<|audio:354|>", + "<|audio:355|>", + "<|audio:356|>", + "<|audio:357|>", + "<|audio:358|>", + "<|audio:359|>", + "<|audio:360|>", + "<|audio:361|>", + "<|audio:362|>", + "<|audio:363|>", + "<|audio:364|>", + "<|audio:365|>", + "<|audio:366|>", + "<|audio:367|>", + "<|audio:368|>", + "<|audio:369|>", + "<|audio:370|>", + "<|audio:371|>", + "<|audio:372|>", + "<|audio:373|>", + "<|audio:374|>", + "<|audio:375|>", + "<|audio:376|>", + "<|audio:377|>", + "<|audio:378|>", + "<|audio:379|>", + "<|audio:380|>", + "<|audio:381|>", + "<|audio:382|>", + "<|audio:383|>", + "<|audio:384|>", + "<|audio:385|>", + "<|audio:386|>", + "<|audio:387|>", + "<|audio:388|>", + "<|audio:389|>", + "<|audio:390|>", + "<|audio:391|>", + "<|audio:392|>", + "<|audio:393|>", + "<|audio:394|>", + "<|audio:395|>", + "<|audio:396|>", + "<|audio:397|>", + "<|audio:398|>", + "<|audio:399|>", + "<|audio:400|>", + "<|audio:401|>", + "<|audio:402|>", + "<|audio:403|>", + "<|audio:404|>", + "<|audio:405|>", + "<|audio:406|>", + "<|audio:407|>", + "<|audio:408|>", + "<|audio:409|>", + "<|audio:410|>", + "<|audio:411|>", + "<|audio:412|>", + "<|audio:413|>", + "<|audio:414|>", + "<|audio:415|>", + "<|audio:416|>", + "<|audio:417|>", + "<|audio:418|>", + "<|audio:419|>", + "<|audio:420|>", + "<|audio:421|>", + "<|audio:422|>", + "<|audio:423|>", + "<|audio:424|>", + "<|audio:425|>", + "<|audio:426|>", + "<|audio:427|>", + "<|audio:428|>", + "<|audio:429|>", + "<|audio:430|>", + "<|audio:431|>", + "<|audio:432|>", + "<|audio:433|>", + "<|audio:434|>", + "<|audio:435|>", + "<|audio:436|>", + "<|audio:437|>", + "<|audio:438|>", + "<|audio:439|>", + "<|audio:440|>", + "<|audio:441|>", + "<|audio:442|>", + "<|audio:443|>", + "<|audio:444|>", + "<|audio:445|>", + "<|audio:446|>", + "<|audio:447|>", + "<|audio:448|>", + "<|audio:449|>", + "<|audio:450|>", + "<|audio:451|>", + "<|audio:452|>", + "<|audio:453|>", + "<|audio:454|>", + "<|audio:455|>", + "<|audio:456|>", + "<|audio:457|>", + "<|audio:458|>", + "<|audio:459|>", + "<|audio:460|>", + "<|audio:461|>", + "<|audio:462|>", + "<|audio:463|>", + "<|audio:464|>", + "<|audio:465|>", + "<|audio:466|>", + "<|audio:467|>", + "<|audio:468|>", + "<|audio:469|>", + "<|audio:470|>", + "<|audio:471|>", + "<|audio:472|>", + "<|audio:473|>", + "<|audio:474|>", + "<|audio:475|>", + "<|audio:476|>", + "<|audio:477|>", + "<|audio:478|>", + "<|audio:479|>", + "<|audio:480|>", + "<|audio:481|>", + "<|audio:482|>", + "<|audio:483|>", + "<|audio:484|>", + "<|audio:485|>", + "<|audio:486|>", + "<|audio:487|>", + "<|audio:488|>", + "<|audio:489|>", + "<|audio:490|>", + "<|audio:491|>", + "<|audio:492|>", + "<|audio:493|>", + "<|audio:494|>", + "<|audio:495|>", + "<|audio:496|>", + "<|audio:497|>", + "<|audio:498|>", + "<|audio:499|>", + "<|audio:500|>", + "<|audio:501|>", + "<|audio:502|>", + "<|audio:503|>", + "<|audio:504|>", + "<|audio:505|>", + "<|audio:506|>", + "<|audio:507|>", + "<|audio:508|>", + "<|audio:509|>", + "<|audio:510|>", + "<|audio:511|>", + "<|audio:512|>", + "<|audio:513|>", + "<|audio:514|>", + "<|audio:515|>", + "<|audio:516|>", + "<|audio:517|>", + "<|audio:518|>", + "<|audio:519|>", + "<|audio:520|>", + "<|audio:521|>", + "<|audio:522|>", + "<|audio:523|>", + "<|audio:524|>", + "<|audio:525|>", + "<|audio:526|>", + "<|audio:527|>", + "<|audio:528|>", + "<|audio:529|>", + "<|audio:530|>", + "<|audio:531|>", + "<|audio:532|>", + "<|audio:533|>", + "<|audio:534|>", + "<|audio:535|>", + "<|audio:536|>", + "<|audio:537|>", + "<|audio:538|>", + "<|audio:539|>", + "<|audio:540|>", + "<|audio:541|>", + "<|audio:542|>", + "<|audio:543|>", + "<|audio:544|>", + "<|audio:545|>", + "<|audio:546|>", + "<|audio:547|>", + "<|audio:548|>", + "<|audio:549|>", + "<|audio:550|>", + "<|audio:551|>", + "<|audio:552|>", + "<|audio:553|>", + "<|audio:554|>", + "<|audio:555|>", + "<|audio:556|>", + "<|audio:557|>", + "<|audio:558|>", + "<|audio:559|>", + "<|audio:560|>", + "<|audio:561|>", + "<|audio:562|>", + "<|audio:563|>", + "<|audio:564|>", + "<|audio:565|>", + "<|audio:566|>", + "<|audio:567|>", + "<|audio:568|>", + "<|audio:569|>", + "<|audio:570|>", + "<|audio:571|>", + "<|audio:572|>", + "<|audio:573|>", + "<|audio:574|>", + "<|audio:575|>", + "<|audio:576|>", + "<|audio:577|>", + "<|audio:578|>", + "<|audio:579|>", + "<|audio:580|>", + "<|audio:581|>", + "<|audio:582|>", + "<|audio:583|>", + "<|audio:584|>", + "<|audio:585|>", + "<|audio:586|>", + "<|audio:587|>", + "<|audio:588|>", + "<|audio:589|>", + "<|audio:590|>", + "<|audio:591|>", + "<|audio:592|>", + "<|audio:593|>", + "<|audio:594|>", + "<|audio:595|>", + "<|audio:596|>", + "<|audio:597|>", + "<|audio:598|>", + "<|audio:599|>", + "<|audio:600|>", + "<|audio:601|>", + "<|audio:602|>", + "<|audio:603|>", + "<|audio:604|>", + "<|audio:605|>", + "<|audio:606|>", + "<|audio:607|>", + "<|audio:608|>", + "<|audio:609|>", + "<|audio:610|>", + "<|audio:611|>", + "<|audio:612|>", + "<|audio:613|>", + "<|audio:614|>", + "<|audio:615|>", + "<|audio:616|>", + "<|audio:617|>", + "<|audio:618|>", + "<|audio:619|>", + "<|audio:620|>", + "<|audio:621|>", + "<|audio:622|>", + "<|audio:623|>", + "<|audio:624|>", + "<|audio:625|>", + "<|audio:626|>", + "<|audio:627|>", + "<|audio:628|>", + "<|audio:629|>", + "<|audio:630|>", + "<|audio:631|>", + "<|audio:632|>", + "<|audio:633|>", + "<|audio:634|>", + "<|audio:635|>", + "<|audio:636|>", + "<|audio:637|>", + "<|audio:638|>", + "<|audio:639|>", + "<|audio:640|>", + "<|audio:641|>", + "<|audio:642|>", + "<|audio:643|>", + "<|audio:644|>", + "<|audio:645|>", + "<|audio:646|>", + "<|audio:647|>", + "<|audio:648|>", + "<|audio:649|>", + "<|audio:650|>", + "<|audio:651|>", + "<|audio:652|>", + "<|audio:653|>", + "<|audio:654|>", + "<|audio:655|>", + "<|audio:656|>", + "<|audio:657|>", + "<|audio:658|>", + "<|audio:659|>", + "<|audio:660|>", + "<|audio:661|>", + "<|audio:662|>", + "<|audio:663|>", + "<|audio:664|>", + "<|audio:665|>", + "<|audio:666|>", + "<|audio:667|>", + "<|audio:668|>", + "<|audio:669|>", + "<|audio:670|>", + "<|audio:671|>", + "<|audio:672|>", + "<|audio:673|>", + "<|audio:674|>", + "<|audio:675|>", + "<|audio:676|>", + "<|audio:677|>", + "<|audio:678|>", + "<|audio:679|>", + "<|audio:680|>", + "<|audio:681|>", + "<|audio:682|>", + "<|audio:683|>", + "<|audio:684|>", + "<|audio:685|>", + "<|audio:686|>", + "<|audio:687|>", + "<|audio:688|>", + "<|audio:689|>", + "<|audio:690|>", + "<|audio:691|>", + "<|audio:692|>", + "<|audio:693|>", + "<|audio:694|>", + "<|audio:695|>", + "<|audio:696|>", + "<|audio:697|>", + "<|audio:698|>", + "<|audio:699|>", + "<|audio:700|>", + "<|audio:701|>", + "<|audio:702|>", + "<|audio:703|>", + "<|audio:704|>", + "<|audio:705|>", + "<|audio:706|>", + "<|audio:707|>", + "<|audio:708|>", + "<|audio:709|>", + "<|audio:710|>", + "<|audio:711|>", + "<|audio:712|>", + "<|audio:713|>", + "<|audio:714|>", + "<|audio:715|>", + "<|audio:716|>", + "<|audio:717|>", + "<|audio:718|>", + "<|audio:719|>", + "<|audio:720|>", + "<|audio:721|>", + "<|audio:722|>", + "<|audio:723|>", + "<|audio:724|>", + "<|audio:725|>", + "<|audio:726|>", + "<|audio:727|>", + "<|audio:728|>", + "<|audio:729|>", + "<|audio:730|>", + "<|audio:731|>", + "<|audio:732|>", + "<|audio:733|>", + "<|audio:734|>", + "<|audio:735|>", + "<|audio:736|>", + "<|audio:737|>", + "<|audio:738|>", + "<|audio:739|>", + "<|audio:740|>", + "<|audio:741|>", + "<|audio:742|>", + "<|audio:743|>", + "<|audio:744|>", + "<|audio:745|>", + "<|audio:746|>", + "<|audio:747|>", + "<|audio:748|>", + "<|audio:749|>", + "<|audio:750|>", + "<|audio:751|>", + "<|audio:752|>", + "<|audio:753|>", + "<|audio:754|>", + "<|audio:755|>", + "<|audio:756|>", + "<|audio:757|>", + "<|audio:758|>", + "<|audio:759|>", + "<|audio:760|>", + "<|audio:761|>", + "<|audio:762|>", + "<|audio:763|>", + "<|audio:764|>", + "<|audio:765|>", + "<|audio:766|>", + "<|audio:767|>", + "<|audio:768|>", + "<|audio:769|>", + "<|audio:770|>", + "<|audio:771|>", + "<|audio:772|>", + "<|audio:773|>", + "<|audio:774|>", + "<|audio:775|>", + "<|audio:776|>", + "<|audio:777|>", + "<|audio:778|>", + "<|audio:779|>", + "<|audio:780|>", + "<|audio:781|>", + "<|audio:782|>", + "<|audio:783|>", + "<|audio:784|>", + "<|audio:785|>", + "<|audio:786|>", + "<|audio:787|>", + "<|audio:788|>", + "<|audio:789|>", + "<|audio:790|>", + "<|audio:791|>", + "<|audio:792|>", + "<|audio:793|>", + "<|audio:794|>", + "<|audio:795|>", + "<|audio:796|>", + "<|audio:797|>", + "<|audio:798|>", + "<|audio:799|>", + "<|audio:800|>", + "<|audio:801|>", + "<|audio:802|>", + "<|audio:803|>", + "<|audio:804|>", + "<|audio:805|>", + "<|audio:806|>", + "<|audio:807|>", + "<|audio:808|>", + "<|audio:809|>", + "<|audio:810|>", + "<|audio:811|>", + "<|audio:812|>", + "<|audio:813|>", + "<|audio:814|>", + "<|audio:815|>", + "<|audio:816|>", + "<|audio:817|>", + "<|audio:818|>", + "<|audio:819|>", + "<|audio:820|>", + "<|audio:821|>", + "<|audio:822|>", + "<|audio:823|>", + "<|audio:824|>", + "<|audio:825|>", + "<|audio:826|>", + "<|audio:827|>", + "<|audio:828|>", + "<|audio:829|>", + "<|audio:830|>", + "<|audio:831|>", + "<|audio:832|>", + "<|audio:833|>", + "<|audio:834|>", + "<|audio:835|>", + "<|audio:836|>", + "<|audio:837|>", + "<|audio:838|>", + "<|audio:839|>", + "<|audio:840|>", + "<|audio:841|>", + "<|audio:842|>", + "<|audio:843|>", + "<|audio:844|>", + "<|audio:845|>", + "<|audio:846|>", + "<|audio:847|>", + "<|audio:848|>", + "<|audio:849|>", + "<|audio:850|>", + "<|audio:851|>", + "<|audio:852|>", + "<|audio:853|>", + "<|audio:854|>", + "<|audio:855|>", + "<|audio:856|>", + "<|audio:857|>", + "<|audio:858|>", + "<|audio:859|>", + "<|audio:860|>", + "<|audio:861|>", + "<|audio:862|>", + "<|audio:863|>", + "<|audio:864|>", + "<|audio:865|>", + "<|audio:866|>", + "<|audio:867|>", + "<|audio:868|>", + "<|audio:869|>", + "<|audio:870|>", + "<|audio:871|>", + "<|audio:872|>", + "<|audio:873|>", + "<|audio:874|>", + "<|audio:875|>", + "<|audio:876|>", + "<|audio:877|>", + "<|audio:878|>", + "<|audio:879|>", + "<|audio:880|>", + "<|audio:881|>", + "<|audio:882|>", + "<|audio:883|>", + "<|audio:884|>", + "<|audio:885|>", + "<|audio:886|>", + "<|audio:887|>", + "<|audio:888|>", + "<|audio:889|>", + "<|audio:890|>", + "<|audio:891|>", + "<|audio:892|>", + "<|audio:893|>", + "<|audio:894|>", + "<|audio:895|>", + "<|audio:896|>", + "<|audio:897|>", + "<|audio:898|>", + "<|audio:899|>", + "<|audio:900|>", + "<|audio:901|>", + "<|audio:902|>", + "<|audio:903|>", + "<|audio:904|>", + "<|audio:905|>", + "<|audio:906|>", + "<|audio:907|>", + "<|audio:908|>", + "<|audio:909|>", + "<|audio:910|>", + "<|audio:911|>", + "<|audio:912|>", + "<|audio:913|>", + "<|audio:914|>", + "<|audio:915|>", + "<|audio:916|>", + "<|audio:917|>", + "<|audio:918|>", + "<|audio:919|>", + "<|audio:920|>", + "<|audio:921|>", + "<|audio:922|>", + "<|audio:923|>", + "<|audio:924|>", + "<|audio:925|>", + "<|audio:926|>", + "<|audio:927|>", + "<|audio:928|>", + "<|audio:929|>", + "<|audio:930|>", + "<|audio:931|>", + "<|audio:932|>", + "<|audio:933|>", + "<|audio:934|>", + "<|audio:935|>", + "<|audio:936|>", + "<|audio:937|>", + "<|audio:938|>", + "<|audio:939|>", + "<|audio:940|>", + "<|audio:941|>", + "<|audio:942|>", + "<|audio:943|>", + "<|audio:944|>", + "<|audio:945|>", + "<|audio:946|>", + "<|audio:947|>", + "<|audio:948|>", + "<|audio:949|>", + "<|audio:950|>", + "<|audio:951|>", + "<|audio:952|>", + "<|audio:953|>", + "<|audio:954|>", + "<|audio:955|>", + "<|audio:956|>", + "<|audio:957|>", + "<|audio:958|>", + "<|audio:959|>", + "<|audio:960|>", + "<|audio:961|>", + "<|audio:962|>", + "<|audio:963|>", + "<|audio:964|>", + "<|audio:965|>", + "<|audio:966|>", + "<|audio:967|>", + "<|audio:968|>", + "<|audio:969|>", + "<|audio:970|>", + "<|audio:971|>", + "<|audio:972|>", + "<|audio:973|>", + "<|audio:974|>", + "<|audio:975|>", + "<|audio:976|>", + "<|audio:977|>", + "<|audio:978|>", + "<|audio:979|>", + "<|audio:980|>", + "<|audio:981|>", + "<|audio:982|>", + "<|audio:983|>", + "<|audio:984|>", + "<|audio:985|>", + "<|audio:986|>", + "<|audio:987|>", + "<|audio:988|>", + "<|audio:989|>", + "<|audio:990|>", + "<|audio:991|>", + "<|audio:992|>", + "<|audio:993|>", + "<|audio:994|>", + "<|audio:995|>", + "<|audio:996|>", + "<|audio:997|>", + "<|audio:998|>", + "<|audio:999|>", + "<|audio:1000|>", + "<|audio:1001|>", + "<|audio:1002|>", + "<|audio:1003|>", + "<|audio:1004|>", + "<|audio:1005|>", + "<|audio:1006|>", + "<|audio:1007|>", + "<|audio:1008|>", + "<|audio:1009|>", + "<|audio:1010|>", + "<|audio:1011|>", + "<|audio:1012|>", + "<|audio:1013|>", + "<|audio:1014|>", + "<|audio:1015|>", + "<|audio:1016|>", + "<|audio:1017|>", + "<|audio:1018|>", + "<|audio:1019|>", + "<|audio:1020|>", + "<|audio:1021|>", + "<|audio:1022|>", + "<|audio:1023|>", + "<|startoftranscript|>", + "<|endoftranscript|>", + "<|padding|>" + ], + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/out/checkpoint-17000/trainer_state.json b/out/checkpoint-17000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4fe9ed7663f5b200798eb175e5d08d0c0bffae55 --- /dev/null +++ b/out/checkpoint-17000/trainer_state.json @@ -0,0 +1,119169 @@ +{ + "best_metric": 2.3730249404907227, + "best_model_checkpoint": "./out/checkpoint-17000", + "epoch": 1.3719635219110644, + "eval_steps": 1000, + "global_step": 17000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 8.07037365830038e-05, + "grad_norm": 0.8911969065666199, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.6759, + "step": 1 + }, + { + "epoch": 0.0001614074731660076, + "grad_norm": 0.8724873661994934, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7001, + "step": 2 + }, + { + "epoch": 0.00024211120974901139, + "grad_norm": 0.9050428867340088, + "learning_rate": 6e-06, + "loss": 2.6291, + "step": 3 + }, + { + "epoch": 0.0003228149463320152, + "grad_norm": 0.9249712824821472, + "learning_rate": 8.000000000000001e-06, + "loss": 2.7174, + "step": 4 + }, + { + "epoch": 0.000403518682915019, + "grad_norm": 0.9102846384048462, + "learning_rate": 1e-05, + "loss": 2.6831, + "step": 5 + }, + { + "epoch": 0.00048422241949802277, + "grad_norm": 0.9129141569137573, + "learning_rate": 1.2e-05, + "loss": 2.684, + "step": 6 + }, + { + "epoch": 0.0005649261560810266, + "grad_norm": 0.8648065328598022, + "learning_rate": 1.4000000000000001e-05, + "loss": 2.6488, + "step": 7 + }, + { + "epoch": 0.0006456298926640304, + "grad_norm": 0.8677545785903931, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.7143, + "step": 8 + }, + { + "epoch": 0.0007263336292470342, + "grad_norm": 0.919029712677002, + "learning_rate": 1.8e-05, + "loss": 2.631, + "step": 9 + }, + { + "epoch": 0.000807037365830038, + "grad_norm": 0.9289683103561401, + "learning_rate": 2e-05, + "loss": 2.6564, + "step": 10 + }, + { + "epoch": 0.0008877411024130417, + "grad_norm": 0.8810267448425293, + "learning_rate": 2.2000000000000003e-05, + "loss": 2.6395, + "step": 11 + }, + { + "epoch": 0.0009684448389960455, + "grad_norm": 0.8185754418373108, + "learning_rate": 2.4e-05, + "loss": 2.6871, + "step": 12 + }, + { + "epoch": 0.0010491485755790492, + "grad_norm": 0.9476913213729858, + "learning_rate": 2.6000000000000002e-05, + "loss": 2.7011, + "step": 13 + }, + { + "epoch": 0.0011298523121620531, + "grad_norm": 0.9616057872772217, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.7373, + "step": 14 + }, + { + "epoch": 0.0012105560487450568, + "grad_norm": 0.9429686665534973, + "learning_rate": 3e-05, + "loss": 2.7556, + "step": 15 + }, + { + "epoch": 0.0012912597853280607, + "grad_norm": 1.0331422090530396, + "learning_rate": 3.2000000000000005e-05, + "loss": 2.7756, + "step": 16 + }, + { + "epoch": 0.0013719635219110644, + "grad_norm": 0.906057596206665, + "learning_rate": 3.4000000000000007e-05, + "loss": 2.7053, + "step": 17 + }, + { + "epoch": 0.0014526672584940683, + "grad_norm": 0.8677626252174377, + "learning_rate": 3.6e-05, + "loss": 2.7012, + "step": 18 + }, + { + "epoch": 0.001533370995077072, + "grad_norm": 0.9378079175949097, + "learning_rate": 3.8e-05, + "loss": 2.6786, + "step": 19 + }, + { + "epoch": 0.001614074731660076, + "grad_norm": 1.0333882570266724, + "learning_rate": 4e-05, + "loss": 2.689, + "step": 20 + }, + { + "epoch": 0.0016947784682430796, + "grad_norm": 0.9435378909111023, + "learning_rate": 4.2e-05, + "loss": 2.7084, + "step": 21 + }, + { + "epoch": 0.0017754822048260835, + "grad_norm": 0.9530225396156311, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.7039, + "step": 22 + }, + { + "epoch": 0.0018561859414090872, + "grad_norm": 1.0154749155044556, + "learning_rate": 4.600000000000001e-05, + "loss": 2.6623, + "step": 23 + }, + { + "epoch": 0.001936889677992091, + "grad_norm": 1.0341671705245972, + "learning_rate": 4.8e-05, + "loss": 2.7072, + "step": 24 + }, + { + "epoch": 0.002017593414575095, + "grad_norm": 0.9185739159584045, + "learning_rate": 5e-05, + "loss": 2.6595, + "step": 25 + }, + { + "epoch": 0.0020982971511580985, + "grad_norm": 1.060390591621399, + "learning_rate": 5.2000000000000004e-05, + "loss": 2.7045, + "step": 26 + }, + { + "epoch": 0.0021790008877411024, + "grad_norm": 0.9720118641853333, + "learning_rate": 5.4000000000000005e-05, + "loss": 2.6513, + "step": 27 + }, + { + "epoch": 0.0022597046243241063, + "grad_norm": 0.9426784515380859, + "learning_rate": 5.6000000000000006e-05, + "loss": 2.6541, + "step": 28 + }, + { + "epoch": 0.00234040836090711, + "grad_norm": 0.9736170768737793, + "learning_rate": 5.8e-05, + "loss": 2.7324, + "step": 29 + }, + { + "epoch": 0.0024211120974901136, + "grad_norm": 0.9831354022026062, + "learning_rate": 6e-05, + "loss": 2.6651, + "step": 30 + }, + { + "epoch": 0.0025018158340731175, + "grad_norm": 1.0222605466842651, + "learning_rate": 6.2e-05, + "loss": 2.7375, + "step": 31 + }, + { + "epoch": 0.0025825195706561214, + "grad_norm": 0.9182235598564148, + "learning_rate": 6.400000000000001e-05, + "loss": 2.7142, + "step": 32 + }, + { + "epoch": 0.0026632233072391254, + "grad_norm": 1.0200958251953125, + "learning_rate": 6.6e-05, + "loss": 2.6785, + "step": 33 + }, + { + "epoch": 0.002743927043822129, + "grad_norm": 1.0153381824493408, + "learning_rate": 6.800000000000001e-05, + "loss": 2.6737, + "step": 34 + }, + { + "epoch": 0.0028246307804051327, + "grad_norm": 0.8998087644577026, + "learning_rate": 7e-05, + "loss": 2.7594, + "step": 35 + }, + { + "epoch": 0.0029053345169881366, + "grad_norm": 0.9005621671676636, + "learning_rate": 7.2e-05, + "loss": 2.713, + "step": 36 + }, + { + "epoch": 0.0029860382535711405, + "grad_norm": 1.0165663957595825, + "learning_rate": 7.4e-05, + "loss": 2.7197, + "step": 37 + }, + { + "epoch": 0.003066741990154144, + "grad_norm": 1.0011894702911377, + "learning_rate": 7.6e-05, + "loss": 2.6315, + "step": 38 + }, + { + "epoch": 0.003147445726737148, + "grad_norm": 1.141209602355957, + "learning_rate": 7.800000000000001e-05, + "loss": 2.7249, + "step": 39 + }, + { + "epoch": 0.003228149463320152, + "grad_norm": 0.9114719033241272, + "learning_rate": 8e-05, + "loss": 2.7039, + "step": 40 + }, + { + "epoch": 0.0033088531999031557, + "grad_norm": 1.0193392038345337, + "learning_rate": 8.2e-05, + "loss": 2.6501, + "step": 41 + }, + { + "epoch": 0.003389556936486159, + "grad_norm": 0.9458270072937012, + "learning_rate": 8.4e-05, + "loss": 2.725, + "step": 42 + }, + { + "epoch": 0.003470260673069163, + "grad_norm": 0.9667492508888245, + "learning_rate": 8.6e-05, + "loss": 2.7232, + "step": 43 + }, + { + "epoch": 0.003550964409652167, + "grad_norm": 0.9987972378730774, + "learning_rate": 8.800000000000001e-05, + "loss": 2.6554, + "step": 44 + }, + { + "epoch": 0.003631668146235171, + "grad_norm": 1.0166393518447876, + "learning_rate": 9e-05, + "loss": 2.7291, + "step": 45 + }, + { + "epoch": 0.0037123718828181744, + "grad_norm": 0.9557009935379028, + "learning_rate": 9.200000000000001e-05, + "loss": 2.7194, + "step": 46 + }, + { + "epoch": 0.0037930756194011783, + "grad_norm": 0.9575492143630981, + "learning_rate": 9.4e-05, + "loss": 2.6671, + "step": 47 + }, + { + "epoch": 0.003873779355984182, + "grad_norm": 0.9614555239677429, + "learning_rate": 9.6e-05, + "loss": 2.6865, + "step": 48 + }, + { + "epoch": 0.003954483092567186, + "grad_norm": 0.9245515465736389, + "learning_rate": 9.8e-05, + "loss": 2.7821, + "step": 49 + }, + { + "epoch": 0.00403518682915019, + "grad_norm": 0.9756044745445251, + "learning_rate": 0.0001, + "loss": 2.7608, + "step": 50 + }, + { + "epoch": 0.0041158905657331935, + "grad_norm": 0.95787513256073, + "learning_rate": 0.00010200000000000001, + "loss": 2.6458, + "step": 51 + }, + { + "epoch": 0.004196594302316197, + "grad_norm": 1.0102490186691284, + "learning_rate": 0.00010400000000000001, + "loss": 2.7835, + "step": 52 + }, + { + "epoch": 0.004277298038899201, + "grad_norm": 0.9676176309585571, + "learning_rate": 0.00010600000000000002, + "loss": 2.702, + "step": 53 + }, + { + "epoch": 0.004358001775482205, + "grad_norm": 0.9724096655845642, + "learning_rate": 0.00010800000000000001, + "loss": 2.714, + "step": 54 + }, + { + "epoch": 0.004438705512065208, + "grad_norm": 0.9482994675636292, + "learning_rate": 0.00011000000000000002, + "loss": 2.8069, + "step": 55 + }, + { + "epoch": 0.0045194092486482125, + "grad_norm": 0.9886480569839478, + "learning_rate": 0.00011200000000000001, + "loss": 2.7468, + "step": 56 + }, + { + "epoch": 0.004600112985231216, + "grad_norm": 0.9696247577667236, + "learning_rate": 0.00011399999999999999, + "loss": 2.7486, + "step": 57 + }, + { + "epoch": 0.00468081672181422, + "grad_norm": 1.0638912916183472, + "learning_rate": 0.000116, + "loss": 2.7747, + "step": 58 + }, + { + "epoch": 0.004761520458397224, + "grad_norm": 1.016483187675476, + "learning_rate": 0.000118, + "loss": 2.6925, + "step": 59 + }, + { + "epoch": 0.004842224194980227, + "grad_norm": 1.0298779010772705, + "learning_rate": 0.00012, + "loss": 2.7487, + "step": 60 + }, + { + "epoch": 0.004922927931563232, + "grad_norm": 1.1082268953323364, + "learning_rate": 0.000122, + "loss": 2.7697, + "step": 61 + }, + { + "epoch": 0.005003631668146235, + "grad_norm": 0.9202101826667786, + "learning_rate": 0.000124, + "loss": 2.7429, + "step": 62 + }, + { + "epoch": 0.0050843354047292386, + "grad_norm": 1.0140503644943237, + "learning_rate": 0.000126, + "loss": 2.7492, + "step": 63 + }, + { + "epoch": 0.005165039141312243, + "grad_norm": 1.0689163208007812, + "learning_rate": 0.00012800000000000002, + "loss": 2.7353, + "step": 64 + }, + { + "epoch": 0.005245742877895246, + "grad_norm": 0.9947141408920288, + "learning_rate": 0.00013000000000000002, + "loss": 2.7385, + "step": 65 + }, + { + "epoch": 0.005326446614478251, + "grad_norm": 1.2034410238265991, + "learning_rate": 0.000132, + "loss": 2.7632, + "step": 66 + }, + { + "epoch": 0.005407150351061254, + "grad_norm": 0.9450412392616272, + "learning_rate": 0.000134, + "loss": 2.7547, + "step": 67 + }, + { + "epoch": 0.005487854087644258, + "grad_norm": 1.1818269491195679, + "learning_rate": 0.00013600000000000003, + "loss": 2.7663, + "step": 68 + }, + { + "epoch": 0.005568557824227262, + "grad_norm": 1.003347396850586, + "learning_rate": 0.000138, + "loss": 2.7299, + "step": 69 + }, + { + "epoch": 0.0056492615608102655, + "grad_norm": 1.0105760097503662, + "learning_rate": 0.00014, + "loss": 2.7261, + "step": 70 + }, + { + "epoch": 0.005729965297393269, + "grad_norm": 0.9459090232849121, + "learning_rate": 0.000142, + "loss": 2.7237, + "step": 71 + }, + { + "epoch": 0.005810669033976273, + "grad_norm": 0.9716219305992126, + "learning_rate": 0.000144, + "loss": 2.8175, + "step": 72 + }, + { + "epoch": 0.005891372770559277, + "grad_norm": 0.9968419075012207, + "learning_rate": 0.000146, + "loss": 2.7828, + "step": 73 + }, + { + "epoch": 0.005972076507142281, + "grad_norm": 1.099680781364441, + "learning_rate": 0.000148, + "loss": 2.7111, + "step": 74 + }, + { + "epoch": 0.0060527802437252845, + "grad_norm": 1.004846453666687, + "learning_rate": 0.00015000000000000001, + "loss": 2.7508, + "step": 75 + }, + { + "epoch": 0.006133483980308288, + "grad_norm": 1.0568128824234009, + "learning_rate": 0.000152, + "loss": 2.7341, + "step": 76 + }, + { + "epoch": 0.006214187716891292, + "grad_norm": 0.9871000051498413, + "learning_rate": 0.000154, + "loss": 2.7831, + "step": 77 + }, + { + "epoch": 0.006294891453474296, + "grad_norm": 1.005947232246399, + "learning_rate": 0.00015600000000000002, + "loss": 2.6798, + "step": 78 + }, + { + "epoch": 0.006375595190057299, + "grad_norm": 0.9984713792800903, + "learning_rate": 0.00015800000000000002, + "loss": 2.8126, + "step": 79 + }, + { + "epoch": 0.006456298926640304, + "grad_norm": 0.9805751442909241, + "learning_rate": 0.00016, + "loss": 2.7826, + "step": 80 + }, + { + "epoch": 0.006537002663223307, + "grad_norm": 1.02998685836792, + "learning_rate": 0.000162, + "loss": 2.7636, + "step": 81 + }, + { + "epoch": 0.006617706399806311, + "grad_norm": 1.0790135860443115, + "learning_rate": 0.000164, + "loss": 2.7809, + "step": 82 + }, + { + "epoch": 0.006698410136389315, + "grad_norm": 1.1058307886123657, + "learning_rate": 0.000166, + "loss": 2.787, + "step": 83 + }, + { + "epoch": 0.006779113872972318, + "grad_norm": 1.0199624300003052, + "learning_rate": 0.000168, + "loss": 2.7171, + "step": 84 + }, + { + "epoch": 0.006859817609555323, + "grad_norm": 1.006494402885437, + "learning_rate": 0.00017, + "loss": 2.7791, + "step": 85 + }, + { + "epoch": 0.006940521346138326, + "grad_norm": 0.9672449827194214, + "learning_rate": 0.000172, + "loss": 2.6929, + "step": 86 + }, + { + "epoch": 0.00702122508272133, + "grad_norm": 0.9747781157493591, + "learning_rate": 0.000174, + "loss": 2.7676, + "step": 87 + }, + { + "epoch": 0.007101928819304334, + "grad_norm": 0.9193839430809021, + "learning_rate": 0.00017600000000000002, + "loss": 2.7124, + "step": 88 + }, + { + "epoch": 0.0071826325558873375, + "grad_norm": 1.078499436378479, + "learning_rate": 0.00017800000000000002, + "loss": 2.8018, + "step": 89 + }, + { + "epoch": 0.007263336292470342, + "grad_norm": 1.070957899093628, + "learning_rate": 0.00018, + "loss": 2.7889, + "step": 90 + }, + { + "epoch": 0.007344040029053345, + "grad_norm": 1.160942554473877, + "learning_rate": 0.000182, + "loss": 2.8026, + "step": 91 + }, + { + "epoch": 0.007424743765636349, + "grad_norm": 0.9988501071929932, + "learning_rate": 0.00018400000000000003, + "loss": 2.7746, + "step": 92 + }, + { + "epoch": 0.007505447502219353, + "grad_norm": 1.0882319211959839, + "learning_rate": 0.00018600000000000002, + "loss": 2.8105, + "step": 93 + }, + { + "epoch": 0.0075861512388023565, + "grad_norm": 1.1882357597351074, + "learning_rate": 0.000188, + "loss": 2.8294, + "step": 94 + }, + { + "epoch": 0.00766685497538536, + "grad_norm": 1.0761829614639282, + "learning_rate": 0.00019, + "loss": 2.7846, + "step": 95 + }, + { + "epoch": 0.007747558711968364, + "grad_norm": 1.0665982961654663, + "learning_rate": 0.000192, + "loss": 2.8542, + "step": 96 + }, + { + "epoch": 0.007828262448551369, + "grad_norm": 1.206127405166626, + "learning_rate": 0.000194, + "loss": 2.7711, + "step": 97 + }, + { + "epoch": 0.007908966185134371, + "grad_norm": 1.095150113105774, + "learning_rate": 0.000196, + "loss": 2.732, + "step": 98 + }, + { + "epoch": 0.007989669921717376, + "grad_norm": 1.118348240852356, + "learning_rate": 0.00019800000000000002, + "loss": 2.7736, + "step": 99 + }, + { + "epoch": 0.00807037365830038, + "grad_norm": 1.0646461248397827, + "learning_rate": 0.0002, + "loss": 2.8584, + "step": 100 + }, + { + "epoch": 0.008151077394883383, + "grad_norm": 1.0387661457061768, + "learning_rate": 0.0001999999987538693, + "loss": 2.7961, + "step": 101 + }, + { + "epoch": 0.008231781131466387, + "grad_norm": 1.1905474662780762, + "learning_rate": 0.00019999999501547723, + "loss": 2.8615, + "step": 102 + }, + { + "epoch": 0.008312484868049391, + "grad_norm": 0.9630722999572754, + "learning_rate": 0.0001999999887848239, + "loss": 2.8076, + "step": 103 + }, + { + "epoch": 0.008393188604632394, + "grad_norm": 1.1034537553787231, + "learning_rate": 0.00019999998006190942, + "loss": 2.8402, + "step": 104 + }, + { + "epoch": 0.008473892341215398, + "grad_norm": 1.0679295063018799, + "learning_rate": 0.00019999996884673403, + "loss": 2.7948, + "step": 105 + }, + { + "epoch": 0.008554596077798403, + "grad_norm": 1.0108860731124878, + "learning_rate": 0.00019999995513929802, + "loss": 2.7996, + "step": 106 + }, + { + "epoch": 0.008635299814381405, + "grad_norm": 1.3762084245681763, + "learning_rate": 0.0001999999389396017, + "loss": 2.8023, + "step": 107 + }, + { + "epoch": 0.00871600355096441, + "grad_norm": 1.1320533752441406, + "learning_rate": 0.00019999992024764555, + "loss": 2.793, + "step": 108 + }, + { + "epoch": 0.008796707287547414, + "grad_norm": 1.1752389669418335, + "learning_rate": 0.00019999989906342998, + "loss": 2.8274, + "step": 109 + }, + { + "epoch": 0.008877411024130416, + "grad_norm": 1.2734956741333008, + "learning_rate": 0.00019999987538695552, + "loss": 2.8017, + "step": 110 + }, + { + "epoch": 0.00895811476071342, + "grad_norm": 1.3703055381774902, + "learning_rate": 0.00019999984921822273, + "loss": 2.8699, + "step": 111 + }, + { + "epoch": 0.009038818497296425, + "grad_norm": 1.0079127550125122, + "learning_rate": 0.0001999998205572323, + "loss": 2.8845, + "step": 112 + }, + { + "epoch": 0.00911952223387943, + "grad_norm": 1.28025484085083, + "learning_rate": 0.000199999789403985, + "loss": 2.8636, + "step": 113 + }, + { + "epoch": 0.009200225970462432, + "grad_norm": 1.1057093143463135, + "learning_rate": 0.00019999975575848148, + "loss": 2.8484, + "step": 114 + }, + { + "epoch": 0.009280929707045436, + "grad_norm": 1.0874677896499634, + "learning_rate": 0.00019999971962072265, + "loss": 2.7314, + "step": 115 + }, + { + "epoch": 0.00936163344362844, + "grad_norm": 1.0909658670425415, + "learning_rate": 0.00019999968099070943, + "loss": 2.7827, + "step": 116 + }, + { + "epoch": 0.009442337180211443, + "grad_norm": 1.0881624221801758, + "learning_rate": 0.00019999963986844273, + "loss": 2.827, + "step": 117 + }, + { + "epoch": 0.009523040916794448, + "grad_norm": 1.2498180866241455, + "learning_rate": 0.00019999959625392362, + "loss": 2.8695, + "step": 118 + }, + { + "epoch": 0.009603744653377452, + "grad_norm": 1.1344549655914307, + "learning_rate": 0.00019999955014715317, + "loss": 2.8079, + "step": 119 + }, + { + "epoch": 0.009684448389960455, + "grad_norm": 1.032563328742981, + "learning_rate": 0.00019999950154813253, + "loss": 2.7787, + "step": 120 + }, + { + "epoch": 0.009765152126543459, + "grad_norm": 0.9630110263824463, + "learning_rate": 0.0001999994504568629, + "loss": 2.8103, + "step": 121 + }, + { + "epoch": 0.009845855863126463, + "grad_norm": 1.0418641567230225, + "learning_rate": 0.0001999993968733456, + "loss": 2.8679, + "step": 122 + }, + { + "epoch": 0.009926559599709466, + "grad_norm": 0.9797310829162598, + "learning_rate": 0.00019999934079758188, + "loss": 2.7792, + "step": 123 + }, + { + "epoch": 0.01000726333629247, + "grad_norm": 1.0494028329849243, + "learning_rate": 0.00019999928222957323, + "loss": 2.8007, + "step": 124 + }, + { + "epoch": 0.010087967072875475, + "grad_norm": 1.1570640802383423, + "learning_rate": 0.00019999922116932105, + "loss": 2.8331, + "step": 125 + }, + { + "epoch": 0.010168670809458477, + "grad_norm": 1.2753098011016846, + "learning_rate": 0.00019999915761682684, + "loss": 2.8533, + "step": 126 + }, + { + "epoch": 0.010249374546041481, + "grad_norm": 0.9804013967514038, + "learning_rate": 0.00019999909157209227, + "loss": 2.841, + "step": 127 + }, + { + "epoch": 0.010330078282624486, + "grad_norm": 1.320839285850525, + "learning_rate": 0.00019999902303511892, + "loss": 2.8738, + "step": 128 + }, + { + "epoch": 0.01041078201920749, + "grad_norm": 1.1105059385299683, + "learning_rate": 0.0001999989520059085, + "loss": 2.8458, + "step": 129 + }, + { + "epoch": 0.010491485755790493, + "grad_norm": 1.2869762182235718, + "learning_rate": 0.0001999988784844628, + "loss": 2.7951, + "step": 130 + }, + { + "epoch": 0.010572189492373497, + "grad_norm": 1.1609153747558594, + "learning_rate": 0.00019999880247078368, + "loss": 2.8147, + "step": 131 + }, + { + "epoch": 0.010652893228956501, + "grad_norm": 1.066728115081787, + "learning_rate": 0.00019999872396487297, + "loss": 2.863, + "step": 132 + }, + { + "epoch": 0.010733596965539504, + "grad_norm": 1.2868720293045044, + "learning_rate": 0.0001999986429667327, + "loss": 2.7765, + "step": 133 + }, + { + "epoch": 0.010814300702122508, + "grad_norm": 1.0064955949783325, + "learning_rate": 0.00019999855947636485, + "loss": 2.7834, + "step": 134 + }, + { + "epoch": 0.010895004438705513, + "grad_norm": 1.146589756011963, + "learning_rate": 0.00019999847349377143, + "loss": 2.7966, + "step": 135 + }, + { + "epoch": 0.010975708175288515, + "grad_norm": 0.9831073880195618, + "learning_rate": 0.0001999983850189547, + "loss": 2.8877, + "step": 136 + }, + { + "epoch": 0.01105641191187152, + "grad_norm": 1.1690322160720825, + "learning_rate": 0.0001999982940519168, + "loss": 2.8514, + "step": 137 + }, + { + "epoch": 0.011137115648454524, + "grad_norm": 1.0014944076538086, + "learning_rate": 0.00019999820059266003, + "loss": 2.7846, + "step": 138 + }, + { + "epoch": 0.011217819385037527, + "grad_norm": 0.9581566452980042, + "learning_rate": 0.0001999981046411867, + "loss": 2.7907, + "step": 139 + }, + { + "epoch": 0.011298523121620531, + "grad_norm": 1.1300675868988037, + "learning_rate": 0.00019999800619749922, + "loss": 2.8099, + "step": 140 + }, + { + "epoch": 0.011379226858203535, + "grad_norm": 0.9845526814460754, + "learning_rate": 0.0001999979052616, + "loss": 2.8607, + "step": 141 + }, + { + "epoch": 0.011459930594786538, + "grad_norm": 1.0781387090682983, + "learning_rate": 0.0001999978018334916, + "loss": 2.831, + "step": 142 + }, + { + "epoch": 0.011540634331369542, + "grad_norm": 1.1142648458480835, + "learning_rate": 0.00019999769591317658, + "loss": 2.9194, + "step": 143 + }, + { + "epoch": 0.011621338067952547, + "grad_norm": 0.9972650408744812, + "learning_rate": 0.00019999758750065757, + "loss": 2.8253, + "step": 144 + }, + { + "epoch": 0.01170204180453555, + "grad_norm": 1.040738582611084, + "learning_rate": 0.0001999974765959373, + "loss": 2.7378, + "step": 145 + }, + { + "epoch": 0.011782745541118553, + "grad_norm": 0.9824327826499939, + "learning_rate": 0.00019999736319901848, + "loss": 2.8263, + "step": 146 + }, + { + "epoch": 0.011863449277701558, + "grad_norm": 1.0531679391860962, + "learning_rate": 0.00019999724730990402, + "loss": 2.7975, + "step": 147 + }, + { + "epoch": 0.011944153014284562, + "grad_norm": 1.0699561834335327, + "learning_rate": 0.0001999971289285967, + "loss": 2.8199, + "step": 148 + }, + { + "epoch": 0.012024856750867565, + "grad_norm": 1.0203633308410645, + "learning_rate": 0.0001999970080550996, + "loss": 2.8479, + "step": 149 + }, + { + "epoch": 0.012105560487450569, + "grad_norm": 1.035589575767517, + "learning_rate": 0.00019999688468941564, + "loss": 2.8263, + "step": 150 + }, + { + "epoch": 0.012186264224033573, + "grad_norm": 0.9706670641899109, + "learning_rate": 0.00019999675883154792, + "loss": 2.8324, + "step": 151 + }, + { + "epoch": 0.012266967960616576, + "grad_norm": 1.1565446853637695, + "learning_rate": 0.00019999663048149958, + "loss": 2.8098, + "step": 152 + }, + { + "epoch": 0.01234767169719958, + "grad_norm": 1.025796890258789, + "learning_rate": 0.0001999964996392738, + "loss": 2.7906, + "step": 153 + }, + { + "epoch": 0.012428375433782585, + "grad_norm": 1.117438554763794, + "learning_rate": 0.00019999636630487386, + "loss": 2.8276, + "step": 154 + }, + { + "epoch": 0.012509079170365587, + "grad_norm": 1.025159478187561, + "learning_rate": 0.00019999623047830308, + "loss": 2.8089, + "step": 155 + }, + { + "epoch": 0.012589782906948592, + "grad_norm": 1.007582664489746, + "learning_rate": 0.00019999609215956487, + "loss": 2.8147, + "step": 156 + }, + { + "epoch": 0.012670486643531596, + "grad_norm": 1.0504885911941528, + "learning_rate": 0.0001999959513486626, + "loss": 2.8329, + "step": 157 + }, + { + "epoch": 0.012751190380114599, + "grad_norm": 0.918382465839386, + "learning_rate": 0.00019999580804559987, + "loss": 2.878, + "step": 158 + }, + { + "epoch": 0.012831894116697603, + "grad_norm": 0.9397236704826355, + "learning_rate": 0.0001999956622503802, + "loss": 2.8254, + "step": 159 + }, + { + "epoch": 0.012912597853280607, + "grad_norm": 0.9985697269439697, + "learning_rate": 0.00019999551396300723, + "loss": 2.8417, + "step": 160 + }, + { + "epoch": 0.01299330158986361, + "grad_norm": 0.9866878390312195, + "learning_rate": 0.00019999536318348465, + "loss": 2.7524, + "step": 161 + }, + { + "epoch": 0.013074005326446614, + "grad_norm": 1.0707440376281738, + "learning_rate": 0.00019999520991181627, + "loss": 2.8171, + "step": 162 + }, + { + "epoch": 0.013154709063029619, + "grad_norm": 0.9359755516052246, + "learning_rate": 0.00019999505414800583, + "loss": 2.8463, + "step": 163 + }, + { + "epoch": 0.013235412799612623, + "grad_norm": 1.056647777557373, + "learning_rate": 0.00019999489589205726, + "loss": 2.8602, + "step": 164 + }, + { + "epoch": 0.013316116536195625, + "grad_norm": 0.975370466709137, + "learning_rate": 0.0001999947351439745, + "loss": 2.8292, + "step": 165 + }, + { + "epoch": 0.01339682027277863, + "grad_norm": 0.9241237044334412, + "learning_rate": 0.00019999457190376157, + "loss": 2.7827, + "step": 166 + }, + { + "epoch": 0.013477524009361634, + "grad_norm": 0.9478302001953125, + "learning_rate": 0.00019999440617142247, + "loss": 2.7708, + "step": 167 + }, + { + "epoch": 0.013558227745944637, + "grad_norm": 0.9804863333702087, + "learning_rate": 0.00019999423794696142, + "loss": 2.7696, + "step": 168 + }, + { + "epoch": 0.013638931482527641, + "grad_norm": 0.9764013886451721, + "learning_rate": 0.00019999406723038255, + "loss": 2.8521, + "step": 169 + }, + { + "epoch": 0.013719635219110645, + "grad_norm": 1.026532769203186, + "learning_rate": 0.00019999389402169016, + "loss": 2.8507, + "step": 170 + }, + { + "epoch": 0.013800338955693648, + "grad_norm": 0.9983204007148743, + "learning_rate": 0.00019999371832088854, + "loss": 2.8761, + "step": 171 + }, + { + "epoch": 0.013881042692276652, + "grad_norm": 0.9914593696594238, + "learning_rate": 0.00019999354012798206, + "loss": 2.8723, + "step": 172 + }, + { + "epoch": 0.013961746428859657, + "grad_norm": 1.066962718963623, + "learning_rate": 0.00019999335944297517, + "loss": 2.8635, + "step": 173 + }, + { + "epoch": 0.01404245016544266, + "grad_norm": 1.0848973989486694, + "learning_rate": 0.0001999931762658724, + "loss": 2.8645, + "step": 174 + }, + { + "epoch": 0.014123153902025664, + "grad_norm": 1.0245702266693115, + "learning_rate": 0.0001999929905966783, + "loss": 2.8463, + "step": 175 + }, + { + "epoch": 0.014203857638608668, + "grad_norm": 1.2363669872283936, + "learning_rate": 0.00019999280243539747, + "loss": 2.8345, + "step": 176 + }, + { + "epoch": 0.01428456137519167, + "grad_norm": 1.0224756002426147, + "learning_rate": 0.0001999926117820346, + "loss": 2.8309, + "step": 177 + }, + { + "epoch": 0.014365265111774675, + "grad_norm": 1.0882402658462524, + "learning_rate": 0.0001999924186365945, + "loss": 2.8619, + "step": 178 + }, + { + "epoch": 0.01444596884835768, + "grad_norm": 1.0384254455566406, + "learning_rate": 0.00019999222299908192, + "loss": 2.8477, + "step": 179 + }, + { + "epoch": 0.014526672584940684, + "grad_norm": 0.9662587642669678, + "learning_rate": 0.00019999202486950177, + "loss": 2.8087, + "step": 180 + }, + { + "epoch": 0.014607376321523686, + "grad_norm": 0.9086892604827881, + "learning_rate": 0.000199991824247859, + "loss": 2.7688, + "step": 181 + }, + { + "epoch": 0.01468808005810669, + "grad_norm": 1.004185676574707, + "learning_rate": 0.00019999162113415854, + "loss": 2.8237, + "step": 182 + }, + { + "epoch": 0.014768783794689695, + "grad_norm": 0.997965395450592, + "learning_rate": 0.00019999141552840552, + "loss": 2.8228, + "step": 183 + }, + { + "epoch": 0.014849487531272697, + "grad_norm": 0.9844975471496582, + "learning_rate": 0.00019999120743060503, + "loss": 2.8582, + "step": 184 + }, + { + "epoch": 0.014930191267855702, + "grad_norm": 1.0531272888183594, + "learning_rate": 0.00019999099684076232, + "loss": 2.8571, + "step": 185 + }, + { + "epoch": 0.015010895004438706, + "grad_norm": 1.1178920269012451, + "learning_rate": 0.00019999078375888257, + "loss": 2.85, + "step": 186 + }, + { + "epoch": 0.015091598741021709, + "grad_norm": 1.0773903131484985, + "learning_rate": 0.0001999905681849711, + "loss": 2.826, + "step": 187 + }, + { + "epoch": 0.015172302477604713, + "grad_norm": 1.1573486328125, + "learning_rate": 0.00019999035011903325, + "loss": 2.8866, + "step": 188 + }, + { + "epoch": 0.015253006214187717, + "grad_norm": 1.0401980876922607, + "learning_rate": 0.00019999012956107456, + "loss": 2.788, + "step": 189 + }, + { + "epoch": 0.01533370995077072, + "grad_norm": 1.0150686502456665, + "learning_rate": 0.00019998990651110045, + "loss": 2.8542, + "step": 190 + }, + { + "epoch": 0.015414413687353724, + "grad_norm": 1.1902797222137451, + "learning_rate": 0.0001999896809691165, + "loss": 2.9209, + "step": 191 + }, + { + "epoch": 0.015495117423936729, + "grad_norm": 1.0177555084228516, + "learning_rate": 0.0001999894529351283, + "loss": 2.7852, + "step": 192 + }, + { + "epoch": 0.015575821160519731, + "grad_norm": 1.062322974205017, + "learning_rate": 0.00019998922240914159, + "loss": 2.8328, + "step": 193 + }, + { + "epoch": 0.015656524897102737, + "grad_norm": 1.0937334299087524, + "learning_rate": 0.00019998898939116205, + "loss": 2.8069, + "step": 194 + }, + { + "epoch": 0.015737228633685738, + "grad_norm": 0.9553198218345642, + "learning_rate": 0.00019998875388119554, + "loss": 2.8402, + "step": 195 + }, + { + "epoch": 0.015817932370268743, + "grad_norm": 1.1802356243133545, + "learning_rate": 0.0001999885158792479, + "loss": 2.945, + "step": 196 + }, + { + "epoch": 0.015898636106851747, + "grad_norm": 1.160346269607544, + "learning_rate": 0.0001999882753853251, + "loss": 2.8341, + "step": 197 + }, + { + "epoch": 0.01597933984343475, + "grad_norm": 1.0379278659820557, + "learning_rate": 0.00019998803239943305, + "loss": 2.898, + "step": 198 + }, + { + "epoch": 0.016060043580017756, + "grad_norm": 1.2022395133972168, + "learning_rate": 0.00019998778692157792, + "loss": 2.8302, + "step": 199 + }, + { + "epoch": 0.01614074731660076, + "grad_norm": 1.057017207145691, + "learning_rate": 0.00019998753895176575, + "loss": 2.8474, + "step": 200 + }, + { + "epoch": 0.01622145105318376, + "grad_norm": 0.9299072027206421, + "learning_rate": 0.00019998728849000271, + "loss": 2.8266, + "step": 201 + }, + { + "epoch": 0.016302154789766765, + "grad_norm": 1.0296592712402344, + "learning_rate": 0.00019998703553629512, + "loss": 2.8106, + "step": 202 + }, + { + "epoch": 0.01638285852634977, + "grad_norm": 0.9641671180725098, + "learning_rate": 0.0001999867800906492, + "loss": 2.8089, + "step": 203 + }, + { + "epoch": 0.016463562262932774, + "grad_norm": 0.9951125383377075, + "learning_rate": 0.00019998652215307136, + "loss": 2.813, + "step": 204 + }, + { + "epoch": 0.016544265999515778, + "grad_norm": 1.0089969635009766, + "learning_rate": 0.00019998626172356804, + "loss": 2.8021, + "step": 205 + }, + { + "epoch": 0.016624969736098782, + "grad_norm": 0.9916231632232666, + "learning_rate": 0.00019998599880214566, + "loss": 2.8455, + "step": 206 + }, + { + "epoch": 0.016705673472681787, + "grad_norm": 0.9612492322921753, + "learning_rate": 0.00019998573338881088, + "loss": 2.8653, + "step": 207 + }, + { + "epoch": 0.016786377209264788, + "grad_norm": 0.984578013420105, + "learning_rate": 0.00019998546548357022, + "loss": 2.8359, + "step": 208 + }, + { + "epoch": 0.016867080945847792, + "grad_norm": 0.9457565546035767, + "learning_rate": 0.0001999851950864304, + "loss": 2.8507, + "step": 209 + }, + { + "epoch": 0.016947784682430796, + "grad_norm": 1.0219026803970337, + "learning_rate": 0.00019998492219739817, + "loss": 2.8326, + "step": 210 + }, + { + "epoch": 0.0170284884190138, + "grad_norm": 0.971570611000061, + "learning_rate": 0.00019998464681648032, + "loss": 2.8079, + "step": 211 + }, + { + "epoch": 0.017109192155596805, + "grad_norm": 0.9731320738792419, + "learning_rate": 0.00019998436894368368, + "loss": 2.8536, + "step": 212 + }, + { + "epoch": 0.01718989589217981, + "grad_norm": 1.0519105195999146, + "learning_rate": 0.00019998408857901525, + "loss": 2.8589, + "step": 213 + }, + { + "epoch": 0.01727059962876281, + "grad_norm": 0.9725883603096008, + "learning_rate": 0.00019998380572248194, + "loss": 2.7937, + "step": 214 + }, + { + "epoch": 0.017351303365345815, + "grad_norm": 1.0397064685821533, + "learning_rate": 0.00019998352037409084, + "loss": 2.9145, + "step": 215 + }, + { + "epoch": 0.01743200710192882, + "grad_norm": 0.9094852209091187, + "learning_rate": 0.00019998323253384904, + "loss": 2.7692, + "step": 216 + }, + { + "epoch": 0.017512710838511823, + "grad_norm": 0.941646158695221, + "learning_rate": 0.00019998294220176374, + "loss": 2.7975, + "step": 217 + }, + { + "epoch": 0.017593414575094828, + "grad_norm": 0.9939892888069153, + "learning_rate": 0.00019998264937784216, + "loss": 2.8421, + "step": 218 + }, + { + "epoch": 0.017674118311677832, + "grad_norm": 0.8985795378684998, + "learning_rate": 0.0001999823540620916, + "loss": 2.8146, + "step": 219 + }, + { + "epoch": 0.017754822048260833, + "grad_norm": 1.0436078310012817, + "learning_rate": 0.00019998205625451943, + "loss": 2.8416, + "step": 220 + }, + { + "epoch": 0.017835525784843837, + "grad_norm": 0.9941675066947937, + "learning_rate": 0.00019998175595513305, + "loss": 2.8723, + "step": 221 + }, + { + "epoch": 0.01791622952142684, + "grad_norm": 0.9203903675079346, + "learning_rate": 0.00019998145316393995, + "loss": 2.7791, + "step": 222 + }, + { + "epoch": 0.017996933258009846, + "grad_norm": 0.9325969815254211, + "learning_rate": 0.00019998114788094768, + "loss": 2.8664, + "step": 223 + }, + { + "epoch": 0.01807763699459285, + "grad_norm": 0.9483599662780762, + "learning_rate": 0.00019998084010616388, + "loss": 2.7782, + "step": 224 + }, + { + "epoch": 0.018158340731175854, + "grad_norm": 0.9555078744888306, + "learning_rate": 0.00019998052983959615, + "loss": 2.7771, + "step": 225 + }, + { + "epoch": 0.01823904446775886, + "grad_norm": 0.9452421069145203, + "learning_rate": 0.00019998021708125233, + "loss": 2.8878, + "step": 226 + }, + { + "epoch": 0.01831974820434186, + "grad_norm": 0.9784894585609436, + "learning_rate": 0.00019997990183114007, + "loss": 2.8382, + "step": 227 + }, + { + "epoch": 0.018400451940924864, + "grad_norm": 1.0844931602478027, + "learning_rate": 0.00019997958408926735, + "loss": 2.8015, + "step": 228 + }, + { + "epoch": 0.01848115567750787, + "grad_norm": 1.0416710376739502, + "learning_rate": 0.00019997926385564207, + "loss": 2.8364, + "step": 229 + }, + { + "epoch": 0.018561859414090873, + "grad_norm": 0.9213813543319702, + "learning_rate": 0.00019997894113027215, + "loss": 2.8489, + "step": 230 + }, + { + "epoch": 0.018642563150673877, + "grad_norm": 1.0186388492584229, + "learning_rate": 0.00019997861591316567, + "loss": 2.914, + "step": 231 + }, + { + "epoch": 0.01872326688725688, + "grad_norm": 1.0032236576080322, + "learning_rate": 0.00019997828820433072, + "loss": 2.8733, + "step": 232 + }, + { + "epoch": 0.018803970623839882, + "grad_norm": 0.9783569574356079, + "learning_rate": 0.0001999779580037755, + "loss": 2.851, + "step": 233 + }, + { + "epoch": 0.018884674360422887, + "grad_norm": 0.8471441268920898, + "learning_rate": 0.00019997762531150825, + "loss": 2.7923, + "step": 234 + }, + { + "epoch": 0.01896537809700589, + "grad_norm": 0.8912937641143799, + "learning_rate": 0.00019997729012753717, + "loss": 2.8725, + "step": 235 + }, + { + "epoch": 0.019046081833588895, + "grad_norm": 1.2453325986862183, + "learning_rate": 0.00019997695245187075, + "loss": 2.9292, + "step": 236 + }, + { + "epoch": 0.0191267855701719, + "grad_norm": 0.8870908617973328, + "learning_rate": 0.0001999766122845173, + "loss": 2.8008, + "step": 237 + }, + { + "epoch": 0.019207489306754904, + "grad_norm": 1.0679768323898315, + "learning_rate": 0.0001999762696254853, + "loss": 2.8919, + "step": 238 + }, + { + "epoch": 0.01928819304333791, + "grad_norm": 0.9769917130470276, + "learning_rate": 0.00019997592447478337, + "loss": 2.7937, + "step": 239 + }, + { + "epoch": 0.01936889677992091, + "grad_norm": 1.066183090209961, + "learning_rate": 0.00019997557683242004, + "loss": 2.8375, + "step": 240 + }, + { + "epoch": 0.019449600516503913, + "grad_norm": 0.9834103584289551, + "learning_rate": 0.000199975226698404, + "loss": 2.8577, + "step": 241 + }, + { + "epoch": 0.019530304253086918, + "grad_norm": 1.102211833000183, + "learning_rate": 0.00019997487407274396, + "loss": 2.8466, + "step": 242 + }, + { + "epoch": 0.019611007989669922, + "grad_norm": 0.9936226606369019, + "learning_rate": 0.00019997451895544872, + "loss": 2.7729, + "step": 243 + }, + { + "epoch": 0.019691711726252926, + "grad_norm": 1.0995992422103882, + "learning_rate": 0.00019997416134652713, + "loss": 2.8425, + "step": 244 + }, + { + "epoch": 0.01977241546283593, + "grad_norm": 0.94181889295578, + "learning_rate": 0.00019997380124598814, + "loss": 2.8495, + "step": 245 + }, + { + "epoch": 0.01985311919941893, + "grad_norm": 0.9791487455368042, + "learning_rate": 0.00019997343865384067, + "loss": 2.8919, + "step": 246 + }, + { + "epoch": 0.019933822936001936, + "grad_norm": 0.9173399209976196, + "learning_rate": 0.00019997307357009375, + "loss": 2.8593, + "step": 247 + }, + { + "epoch": 0.02001452667258494, + "grad_norm": 0.9675281047821045, + "learning_rate": 0.00019997270599475653, + "loss": 2.8226, + "step": 248 + }, + { + "epoch": 0.020095230409167945, + "grad_norm": 0.8928244113922119, + "learning_rate": 0.00019997233592783812, + "loss": 2.8296, + "step": 249 + }, + { + "epoch": 0.02017593414575095, + "grad_norm": 0.928601861000061, + "learning_rate": 0.0001999719633693478, + "loss": 2.8399, + "step": 250 + }, + { + "epoch": 0.020256637882333953, + "grad_norm": 0.9378123879432678, + "learning_rate": 0.00019997158831929482, + "loss": 2.8711, + "step": 251 + }, + { + "epoch": 0.020337341618916954, + "grad_norm": 0.9041047692298889, + "learning_rate": 0.00019997121077768853, + "loss": 2.8338, + "step": 252 + }, + { + "epoch": 0.02041804535549996, + "grad_norm": 0.9673274755477905, + "learning_rate": 0.00019997083074453832, + "loss": 2.8556, + "step": 253 + }, + { + "epoch": 0.020498749092082963, + "grad_norm": 0.9204083681106567, + "learning_rate": 0.0001999704482198537, + "loss": 2.7954, + "step": 254 + }, + { + "epoch": 0.020579452828665967, + "grad_norm": 0.9267606735229492, + "learning_rate": 0.00019997006320364417, + "loss": 2.8656, + "step": 255 + }, + { + "epoch": 0.02066015656524897, + "grad_norm": 0.9562919735908508, + "learning_rate": 0.00019996967569591936, + "loss": 2.8406, + "step": 256 + }, + { + "epoch": 0.020740860301831976, + "grad_norm": 0.9065950512886047, + "learning_rate": 0.0001999692856966889, + "loss": 2.7856, + "step": 257 + }, + { + "epoch": 0.02082156403841498, + "grad_norm": 0.9136463403701782, + "learning_rate": 0.0001999688932059625, + "loss": 2.8083, + "step": 258 + }, + { + "epoch": 0.02090226777499798, + "grad_norm": 0.9785570502281189, + "learning_rate": 0.00019996849822374998, + "loss": 2.7984, + "step": 259 + }, + { + "epoch": 0.020982971511580985, + "grad_norm": 0.9549168348312378, + "learning_rate": 0.00019996810075006117, + "loss": 2.8048, + "step": 260 + }, + { + "epoch": 0.02106367524816399, + "grad_norm": 0.8923975825309753, + "learning_rate": 0.00019996770078490594, + "loss": 2.8559, + "step": 261 + }, + { + "epoch": 0.021144378984746994, + "grad_norm": 0.9516206383705139, + "learning_rate": 0.0001999672983282943, + "loss": 2.9171, + "step": 262 + }, + { + "epoch": 0.02122508272133, + "grad_norm": 0.9101666808128357, + "learning_rate": 0.0001999668933802363, + "loss": 2.8746, + "step": 263 + }, + { + "epoch": 0.021305786457913003, + "grad_norm": 0.9081267714500427, + "learning_rate": 0.00019996648594074195, + "loss": 2.8637, + "step": 264 + }, + { + "epoch": 0.021386490194496004, + "grad_norm": 1.0048178434371948, + "learning_rate": 0.0001999660760098215, + "loss": 2.8783, + "step": 265 + }, + { + "epoch": 0.021467193931079008, + "grad_norm": 0.9625924229621887, + "learning_rate": 0.0001999656635874851, + "loss": 2.8226, + "step": 266 + }, + { + "epoch": 0.021547897667662012, + "grad_norm": 0.9911805391311646, + "learning_rate": 0.00019996524867374306, + "loss": 2.8135, + "step": 267 + }, + { + "epoch": 0.021628601404245017, + "grad_norm": 0.8920134902000427, + "learning_rate": 0.00019996483126860572, + "loss": 2.7934, + "step": 268 + }, + { + "epoch": 0.02170930514082802, + "grad_norm": 1.0806514024734497, + "learning_rate": 0.00019996441137208346, + "loss": 2.8435, + "step": 269 + }, + { + "epoch": 0.021790008877411025, + "grad_norm": 0.9426547884941101, + "learning_rate": 0.00019996398898418675, + "loss": 2.7919, + "step": 270 + }, + { + "epoch": 0.021870712613994026, + "grad_norm": 0.9893020987510681, + "learning_rate": 0.00019996356410492615, + "loss": 2.8616, + "step": 271 + }, + { + "epoch": 0.02195141635057703, + "grad_norm": 1.0196046829223633, + "learning_rate": 0.00019996313673431218, + "loss": 2.8101, + "step": 272 + }, + { + "epoch": 0.022032120087160035, + "grad_norm": 0.9556699991226196, + "learning_rate": 0.00019996270687235558, + "loss": 2.8669, + "step": 273 + }, + { + "epoch": 0.02211282382374304, + "grad_norm": 0.8985902667045593, + "learning_rate": 0.00019996227451906702, + "loss": 2.8078, + "step": 274 + }, + { + "epoch": 0.022193527560326044, + "grad_norm": 1.0198246240615845, + "learning_rate": 0.00019996183967445726, + "loss": 2.8314, + "step": 275 + }, + { + "epoch": 0.022274231296909048, + "grad_norm": 0.9360179901123047, + "learning_rate": 0.00019996140233853715, + "loss": 2.7969, + "step": 276 + }, + { + "epoch": 0.022354935033492052, + "grad_norm": 1.0250160694122314, + "learning_rate": 0.00019996096251131759, + "loss": 2.7897, + "step": 277 + }, + { + "epoch": 0.022435638770075053, + "grad_norm": 0.934582531452179, + "learning_rate": 0.00019996052019280954, + "loss": 2.8667, + "step": 278 + }, + { + "epoch": 0.022516342506658057, + "grad_norm": 0.9394461512565613, + "learning_rate": 0.00019996007538302407, + "loss": 2.7681, + "step": 279 + }, + { + "epoch": 0.022597046243241062, + "grad_norm": 0.9468861222267151, + "learning_rate": 0.00019995962808197216, + "loss": 2.7709, + "step": 280 + }, + { + "epoch": 0.022677749979824066, + "grad_norm": 0.9798515439033508, + "learning_rate": 0.00019995917828966506, + "loss": 2.8274, + "step": 281 + }, + { + "epoch": 0.02275845371640707, + "grad_norm": 1.0403941869735718, + "learning_rate": 0.00019995872600611395, + "loss": 2.8897, + "step": 282 + }, + { + "epoch": 0.022839157452990075, + "grad_norm": 0.9795030951499939, + "learning_rate": 0.00019995827123133006, + "loss": 2.8792, + "step": 283 + }, + { + "epoch": 0.022919861189573076, + "grad_norm": 0.9162538647651672, + "learning_rate": 0.00019995781396532479, + "loss": 2.8339, + "step": 284 + }, + { + "epoch": 0.02300056492615608, + "grad_norm": 1.0864707231521606, + "learning_rate": 0.00019995735420810947, + "loss": 2.8599, + "step": 285 + }, + { + "epoch": 0.023081268662739084, + "grad_norm": 0.9181776642799377, + "learning_rate": 0.0001999568919596956, + "loss": 2.8736, + "step": 286 + }, + { + "epoch": 0.02316197239932209, + "grad_norm": 0.8880531191825867, + "learning_rate": 0.00019995642722009472, + "loss": 2.8215, + "step": 287 + }, + { + "epoch": 0.023242676135905093, + "grad_norm": 0.9287240505218506, + "learning_rate": 0.00019995595998931835, + "loss": 2.844, + "step": 288 + }, + { + "epoch": 0.023323379872488097, + "grad_norm": 0.886894941329956, + "learning_rate": 0.0001999554902673782, + "loss": 2.8319, + "step": 289 + }, + { + "epoch": 0.0234040836090711, + "grad_norm": 0.9564458131790161, + "learning_rate": 0.0001999550180542859, + "loss": 2.8126, + "step": 290 + }, + { + "epoch": 0.023484787345654103, + "grad_norm": 0.8745970726013184, + "learning_rate": 0.00019995454335005334, + "loss": 2.8344, + "step": 291 + }, + { + "epoch": 0.023565491082237107, + "grad_norm": 1.0343137979507446, + "learning_rate": 0.00019995406615469217, + "loss": 2.8498, + "step": 292 + }, + { + "epoch": 0.02364619481882011, + "grad_norm": 0.9951575994491577, + "learning_rate": 0.0001999535864682145, + "loss": 2.8655, + "step": 293 + }, + { + "epoch": 0.023726898555403116, + "grad_norm": 0.8457592725753784, + "learning_rate": 0.0001999531042906321, + "loss": 2.8189, + "step": 294 + }, + { + "epoch": 0.02380760229198612, + "grad_norm": 0.9126954674720764, + "learning_rate": 0.00019995261962195708, + "loss": 2.8272, + "step": 295 + }, + { + "epoch": 0.023888306028569124, + "grad_norm": 1.0171937942504883, + "learning_rate": 0.0001999521324622015, + "loss": 2.869, + "step": 296 + }, + { + "epoch": 0.023969009765152125, + "grad_norm": 0.9887226223945618, + "learning_rate": 0.00019995164281137753, + "loss": 2.7643, + "step": 297 + }, + { + "epoch": 0.02404971350173513, + "grad_norm": 1.4240798950195312, + "learning_rate": 0.00019995115066949733, + "loss": 2.8332, + "step": 298 + }, + { + "epoch": 0.024130417238318134, + "grad_norm": 0.9856921434402466, + "learning_rate": 0.00019995065603657316, + "loss": 2.8283, + "step": 299 + }, + { + "epoch": 0.024211120974901138, + "grad_norm": 0.997164785861969, + "learning_rate": 0.0001999501589126174, + "loss": 2.9164, + "step": 300 + }, + { + "epoch": 0.024291824711484142, + "grad_norm": 1.6480412483215332, + "learning_rate": 0.00019994965929764238, + "loss": 2.8941, + "step": 301 + }, + { + "epoch": 0.024372528448067147, + "grad_norm": 1.1590758562088013, + "learning_rate": 0.0001999491571916606, + "loss": 2.8127, + "step": 302 + }, + { + "epoch": 0.024453232184650148, + "grad_norm": 1.1228376626968384, + "learning_rate": 0.00019994865259468454, + "loss": 2.8439, + "step": 303 + }, + { + "epoch": 0.024533935921233152, + "grad_norm": 1.0426349639892578, + "learning_rate": 0.0001999481455067268, + "loss": 2.8671, + "step": 304 + }, + { + "epoch": 0.024614639657816156, + "grad_norm": 1.0911917686462402, + "learning_rate": 0.00019994763592779996, + "loss": 2.8297, + "step": 305 + }, + { + "epoch": 0.02469534339439916, + "grad_norm": 1.0493195056915283, + "learning_rate": 0.00019994712385791683, + "loss": 2.7996, + "step": 306 + }, + { + "epoch": 0.024776047130982165, + "grad_norm": 0.9275023341178894, + "learning_rate": 0.00019994660929709008, + "loss": 2.7949, + "step": 307 + }, + { + "epoch": 0.02485675086756517, + "grad_norm": 1.1074799299240112, + "learning_rate": 0.00019994609224533255, + "loss": 2.8364, + "step": 308 + }, + { + "epoch": 0.024937454604148174, + "grad_norm": 0.9189429879188538, + "learning_rate": 0.00019994557270265717, + "loss": 2.8293, + "step": 309 + }, + { + "epoch": 0.025018158340731175, + "grad_norm": 0.9577780961990356, + "learning_rate": 0.00019994505066907683, + "loss": 2.8295, + "step": 310 + }, + { + "epoch": 0.02509886207731418, + "grad_norm": 1.0707277059555054, + "learning_rate": 0.0001999445261446046, + "loss": 2.795, + "step": 311 + }, + { + "epoch": 0.025179565813897183, + "grad_norm": 0.9211257696151733, + "learning_rate": 0.0001999439991292535, + "loss": 2.8355, + "step": 312 + }, + { + "epoch": 0.025260269550480188, + "grad_norm": 0.987779438495636, + "learning_rate": 0.00019994346962303667, + "loss": 2.8175, + "step": 313 + }, + { + "epoch": 0.025340973287063192, + "grad_norm": 0.9317128658294678, + "learning_rate": 0.00019994293762596734, + "loss": 2.8205, + "step": 314 + }, + { + "epoch": 0.025421677023646196, + "grad_norm": 0.8989154100418091, + "learning_rate": 0.00019994240313805873, + "loss": 2.8257, + "step": 315 + }, + { + "epoch": 0.025502380760229197, + "grad_norm": 0.8391042351722717, + "learning_rate": 0.00019994186615932423, + "loss": 2.8105, + "step": 316 + }, + { + "epoch": 0.0255830844968122, + "grad_norm": 0.8908089995384216, + "learning_rate": 0.00019994132668977715, + "loss": 2.7894, + "step": 317 + }, + { + "epoch": 0.025663788233395206, + "grad_norm": 0.8666881322860718, + "learning_rate": 0.00019994078472943097, + "loss": 2.7934, + "step": 318 + }, + { + "epoch": 0.02574449196997821, + "grad_norm": 0.8834616541862488, + "learning_rate": 0.00019994024027829914, + "loss": 2.8166, + "step": 319 + }, + { + "epoch": 0.025825195706561214, + "grad_norm": 0.9831370115280151, + "learning_rate": 0.00019993969333639532, + "loss": 2.889, + "step": 320 + }, + { + "epoch": 0.02590589944314422, + "grad_norm": 0.9171644449234009, + "learning_rate": 0.00019993914390373308, + "loss": 2.8582, + "step": 321 + }, + { + "epoch": 0.02598660317972722, + "grad_norm": 0.9624861478805542, + "learning_rate": 0.00019993859198032615, + "loss": 2.8574, + "step": 322 + }, + { + "epoch": 0.026067306916310224, + "grad_norm": 0.8826586008071899, + "learning_rate": 0.00019993803756618826, + "loss": 2.8544, + "step": 323 + }, + { + "epoch": 0.02614801065289323, + "grad_norm": 0.9286447763442993, + "learning_rate": 0.0001999374806613332, + "loss": 2.7937, + "step": 324 + }, + { + "epoch": 0.026228714389476233, + "grad_norm": 0.9901685118675232, + "learning_rate": 0.00019993692126577493, + "loss": 2.7654, + "step": 325 + }, + { + "epoch": 0.026309418126059237, + "grad_norm": 0.9624341130256653, + "learning_rate": 0.00019993635937952734, + "loss": 2.8804, + "step": 326 + }, + { + "epoch": 0.02639012186264224, + "grad_norm": 0.8867596387863159, + "learning_rate": 0.0001999357950026044, + "loss": 2.8254, + "step": 327 + }, + { + "epoch": 0.026470825599225246, + "grad_norm": 0.9243817925453186, + "learning_rate": 0.00019993522813502022, + "loss": 2.8177, + "step": 328 + }, + { + "epoch": 0.026551529335808247, + "grad_norm": 0.9322247505187988, + "learning_rate": 0.00019993465877678895, + "loss": 2.9023, + "step": 329 + }, + { + "epoch": 0.02663223307239125, + "grad_norm": 0.8768174648284912, + "learning_rate": 0.00019993408692792474, + "loss": 2.8184, + "step": 330 + }, + { + "epoch": 0.026712936808974255, + "grad_norm": 0.9436870813369751, + "learning_rate": 0.00019993351258844184, + "loss": 2.8319, + "step": 331 + }, + { + "epoch": 0.02679364054555726, + "grad_norm": 0.9970327019691467, + "learning_rate": 0.0001999329357583546, + "loss": 2.7946, + "step": 332 + }, + { + "epoch": 0.026874344282140264, + "grad_norm": 0.9100088477134705, + "learning_rate": 0.00019993235643767736, + "loss": 2.782, + "step": 333 + }, + { + "epoch": 0.02695504801872327, + "grad_norm": 0.9693402051925659, + "learning_rate": 0.00019993177462642456, + "loss": 2.8182, + "step": 334 + }, + { + "epoch": 0.02703575175530627, + "grad_norm": 0.8761965036392212, + "learning_rate": 0.00019993119032461073, + "loss": 2.8058, + "step": 335 + }, + { + "epoch": 0.027116455491889273, + "grad_norm": 1.0699270963668823, + "learning_rate": 0.00019993060353225043, + "loss": 2.9211, + "step": 336 + }, + { + "epoch": 0.027197159228472278, + "grad_norm": 1.0094172954559326, + "learning_rate": 0.00019993001424935822, + "loss": 2.8837, + "step": 337 + }, + { + "epoch": 0.027277862965055282, + "grad_norm": 0.9683573842048645, + "learning_rate": 0.00019992942247594887, + "loss": 2.8523, + "step": 338 + }, + { + "epoch": 0.027358566701638286, + "grad_norm": 1.3243813514709473, + "learning_rate": 0.00019992882821203708, + "loss": 2.7891, + "step": 339 + }, + { + "epoch": 0.02743927043822129, + "grad_norm": 1.0227056741714478, + "learning_rate": 0.0001999282314576377, + "loss": 2.8396, + "step": 340 + }, + { + "epoch": 0.027519974174804295, + "grad_norm": 1.03257417678833, + "learning_rate": 0.00019992763221276556, + "loss": 2.824, + "step": 341 + }, + { + "epoch": 0.027600677911387296, + "grad_norm": 0.86456698179245, + "learning_rate": 0.00019992703047743562, + "loss": 2.8006, + "step": 342 + }, + { + "epoch": 0.0276813816479703, + "grad_norm": 0.965339720249176, + "learning_rate": 0.00019992642625166286, + "loss": 2.8658, + "step": 343 + }, + { + "epoch": 0.027762085384553305, + "grad_norm": 1.0028942823410034, + "learning_rate": 0.00019992581953546236, + "loss": 2.8311, + "step": 344 + }, + { + "epoch": 0.02784278912113631, + "grad_norm": 0.984307050704956, + "learning_rate": 0.0001999252103288492, + "loss": 2.8748, + "step": 345 + }, + { + "epoch": 0.027923492857719313, + "grad_norm": 0.9405032396316528, + "learning_rate": 0.00019992459863183858, + "loss": 2.8371, + "step": 346 + }, + { + "epoch": 0.028004196594302318, + "grad_norm": 0.9867002367973328, + "learning_rate": 0.0001999239844444458, + "loss": 2.7914, + "step": 347 + }, + { + "epoch": 0.02808490033088532, + "grad_norm": 0.9224951267242432, + "learning_rate": 0.00019992336776668613, + "loss": 2.7986, + "step": 348 + }, + { + "epoch": 0.028165604067468323, + "grad_norm": 1.002838134765625, + "learning_rate": 0.0001999227485985749, + "loss": 2.8207, + "step": 349 + }, + { + "epoch": 0.028246307804051327, + "grad_norm": 0.8922045826911926, + "learning_rate": 0.00019992212694012757, + "loss": 2.8264, + "step": 350 + }, + { + "epoch": 0.02832701154063433, + "grad_norm": 1.0860323905944824, + "learning_rate": 0.00019992150279135964, + "loss": 2.8778, + "step": 351 + }, + { + "epoch": 0.028407715277217336, + "grad_norm": 1.0995604991912842, + "learning_rate": 0.0001999208761522867, + "loss": 2.8599, + "step": 352 + }, + { + "epoch": 0.02848841901380034, + "grad_norm": 0.8741658926010132, + "learning_rate": 0.0001999202470229243, + "loss": 2.7757, + "step": 353 + }, + { + "epoch": 0.02856912275038334, + "grad_norm": 0.9142587184906006, + "learning_rate": 0.00019991961540328815, + "loss": 2.8235, + "step": 354 + }, + { + "epoch": 0.028649826486966345, + "grad_norm": 1.0000953674316406, + "learning_rate": 0.000199918981293394, + "loss": 2.8, + "step": 355 + }, + { + "epoch": 0.02873053022354935, + "grad_norm": 0.9416046738624573, + "learning_rate": 0.00019991834469325763, + "loss": 2.7941, + "step": 356 + }, + { + "epoch": 0.028811233960132354, + "grad_norm": 0.9135935306549072, + "learning_rate": 0.00019991770560289496, + "loss": 2.8315, + "step": 357 + }, + { + "epoch": 0.02889193769671536, + "grad_norm": 0.8867244124412537, + "learning_rate": 0.00019991706402232184, + "loss": 2.8649, + "step": 358 + }, + { + "epoch": 0.028972641433298363, + "grad_norm": 0.9360243678092957, + "learning_rate": 0.00019991641995155431, + "loss": 2.7556, + "step": 359 + }, + { + "epoch": 0.029053345169881367, + "grad_norm": 0.8903766870498657, + "learning_rate": 0.00019991577339060842, + "loss": 2.8379, + "step": 360 + }, + { + "epoch": 0.029134048906464368, + "grad_norm": 1.0178784132003784, + "learning_rate": 0.00019991512433950023, + "loss": 2.8045, + "step": 361 + }, + { + "epoch": 0.029214752643047372, + "grad_norm": 0.9318631887435913, + "learning_rate": 0.000199914472798246, + "loss": 2.823, + "step": 362 + }, + { + "epoch": 0.029295456379630377, + "grad_norm": 0.9384647011756897, + "learning_rate": 0.00019991381876686195, + "loss": 2.9379, + "step": 363 + }, + { + "epoch": 0.02937616011621338, + "grad_norm": 0.9318633675575256, + "learning_rate": 0.00019991316224536433, + "loss": 2.8222, + "step": 364 + }, + { + "epoch": 0.029456863852796385, + "grad_norm": 0.8653938174247742, + "learning_rate": 0.00019991250323376952, + "loss": 2.8447, + "step": 365 + }, + { + "epoch": 0.02953756758937939, + "grad_norm": 0.8997991681098938, + "learning_rate": 0.00019991184173209398, + "loss": 2.8523, + "step": 366 + }, + { + "epoch": 0.02961827132596239, + "grad_norm": 0.8587092161178589, + "learning_rate": 0.00019991117774035416, + "loss": 2.8141, + "step": 367 + }, + { + "epoch": 0.029698975062545395, + "grad_norm": 0.8740741014480591, + "learning_rate": 0.00019991051125856663, + "loss": 2.7487, + "step": 368 + }, + { + "epoch": 0.0297796787991284, + "grad_norm": 0.9099416732788086, + "learning_rate": 0.00019990984228674798, + "loss": 2.834, + "step": 369 + }, + { + "epoch": 0.029860382535711404, + "grad_norm": 0.8675365447998047, + "learning_rate": 0.0001999091708249149, + "loss": 2.8259, + "step": 370 + }, + { + "epoch": 0.029941086272294408, + "grad_norm": 1.0141092538833618, + "learning_rate": 0.00019990849687308412, + "loss": 2.8369, + "step": 371 + }, + { + "epoch": 0.030021790008877412, + "grad_norm": 0.849155604839325, + "learning_rate": 0.00019990782043127243, + "loss": 2.7505, + "step": 372 + }, + { + "epoch": 0.030102493745460413, + "grad_norm": 1.073754072189331, + "learning_rate": 0.0001999071414994967, + "loss": 2.8939, + "step": 373 + }, + { + "epoch": 0.030183197482043417, + "grad_norm": 0.8615279197692871, + "learning_rate": 0.00019990646007777383, + "loss": 2.7662, + "step": 374 + }, + { + "epoch": 0.030263901218626422, + "grad_norm": 0.8803398609161377, + "learning_rate": 0.0001999057761661208, + "loss": 2.7992, + "step": 375 + }, + { + "epoch": 0.030344604955209426, + "grad_norm": 0.8901834487915039, + "learning_rate": 0.00019990508976455473, + "loss": 2.8222, + "step": 376 + }, + { + "epoch": 0.03042530869179243, + "grad_norm": 0.9443284869194031, + "learning_rate": 0.00019990440087309263, + "loss": 2.8326, + "step": 377 + }, + { + "epoch": 0.030506012428375435, + "grad_norm": 0.9122868180274963, + "learning_rate": 0.0001999037094917517, + "loss": 2.7653, + "step": 378 + }, + { + "epoch": 0.03058671616495844, + "grad_norm": 0.8764635920524597, + "learning_rate": 0.0001999030156205492, + "loss": 2.7813, + "step": 379 + }, + { + "epoch": 0.03066741990154144, + "grad_norm": 0.8466865420341492, + "learning_rate": 0.0001999023192595024, + "loss": 2.8338, + "step": 380 + }, + { + "epoch": 0.030748123638124444, + "grad_norm": 0.8833961486816406, + "learning_rate": 0.00019990162040862863, + "loss": 2.78, + "step": 381 + }, + { + "epoch": 0.03082882737470745, + "grad_norm": 1.0298357009887695, + "learning_rate": 0.00019990091906794537, + "loss": 2.8059, + "step": 382 + }, + { + "epoch": 0.030909531111290453, + "grad_norm": 0.8651318550109863, + "learning_rate": 0.00019990021523747005, + "loss": 2.8608, + "step": 383 + }, + { + "epoch": 0.030990234847873457, + "grad_norm": 1.0262864828109741, + "learning_rate": 0.0001998995089172202, + "loss": 2.8226, + "step": 384 + }, + { + "epoch": 0.03107093858445646, + "grad_norm": 0.9266276955604553, + "learning_rate": 0.00019989880010721348, + "loss": 2.9414, + "step": 385 + }, + { + "epoch": 0.031151642321039463, + "grad_norm": 0.8762117028236389, + "learning_rate": 0.00019989808880746749, + "loss": 2.8023, + "step": 386 + }, + { + "epoch": 0.031232346057622467, + "grad_norm": 0.8531816601753235, + "learning_rate": 0.00019989737501800004, + "loss": 2.777, + "step": 387 + }, + { + "epoch": 0.031313049794205475, + "grad_norm": 0.8999545574188232, + "learning_rate": 0.0001998966587388288, + "loss": 2.8656, + "step": 388 + }, + { + "epoch": 0.03139375353078847, + "grad_norm": 0.932248055934906, + "learning_rate": 0.00019989593996997177, + "loss": 2.8212, + "step": 389 + }, + { + "epoch": 0.031474457267371476, + "grad_norm": 0.9059134125709534, + "learning_rate": 0.00019989521871144672, + "loss": 2.7945, + "step": 390 + }, + { + "epoch": 0.03155516100395448, + "grad_norm": 0.9323028922080994, + "learning_rate": 0.00019989449496327172, + "loss": 2.8338, + "step": 391 + }, + { + "epoch": 0.031635864740537485, + "grad_norm": 0.9141251444816589, + "learning_rate": 0.0001998937687254648, + "loss": 2.7935, + "step": 392 + }, + { + "epoch": 0.03171656847712049, + "grad_norm": 1.0026880502700806, + "learning_rate": 0.000199893039998044, + "loss": 2.8811, + "step": 393 + }, + { + "epoch": 0.031797272213703494, + "grad_norm": 1.0178622007369995, + "learning_rate": 0.00019989230878102756, + "loss": 2.9003, + "step": 394 + }, + { + "epoch": 0.0318779759502865, + "grad_norm": 0.9111912846565247, + "learning_rate": 0.00019989157507443363, + "loss": 2.8399, + "step": 395 + }, + { + "epoch": 0.0319586796868695, + "grad_norm": 1.054563283920288, + "learning_rate": 0.00019989083887828052, + "loss": 2.9088, + "step": 396 + }, + { + "epoch": 0.03203938342345251, + "grad_norm": 0.9459816217422485, + "learning_rate": 0.00019989010019258663, + "loss": 2.805, + "step": 397 + }, + { + "epoch": 0.03212008716003551, + "grad_norm": 1.0139873027801514, + "learning_rate": 0.00019988935901737033, + "loss": 2.8452, + "step": 398 + }, + { + "epoch": 0.032200790896618516, + "grad_norm": 0.986325204372406, + "learning_rate": 0.00019988861535265006, + "loss": 2.8311, + "step": 399 + }, + { + "epoch": 0.03228149463320152, + "grad_norm": 0.9565223455429077, + "learning_rate": 0.00019988786919844436, + "loss": 2.7766, + "step": 400 + }, + { + "epoch": 0.032362198369784524, + "grad_norm": 0.8901559710502625, + "learning_rate": 0.0001998871205547719, + "loss": 2.7966, + "step": 401 + }, + { + "epoch": 0.03244290210636752, + "grad_norm": 1.0959528684616089, + "learning_rate": 0.00019988636942165123, + "loss": 2.8377, + "step": 402 + }, + { + "epoch": 0.032523605842950526, + "grad_norm": 1.0768988132476807, + "learning_rate": 0.00019988561579910118, + "loss": 2.8267, + "step": 403 + }, + { + "epoch": 0.03260430957953353, + "grad_norm": 0.9563855528831482, + "learning_rate": 0.00019988485968714048, + "loss": 2.8459, + "step": 404 + }, + { + "epoch": 0.032685013316116535, + "grad_norm": 0.930927038192749, + "learning_rate": 0.00019988410108578796, + "loss": 2.8053, + "step": 405 + }, + { + "epoch": 0.03276571705269954, + "grad_norm": 1.0658363103866577, + "learning_rate": 0.00019988333999506255, + "loss": 2.8512, + "step": 406 + }, + { + "epoch": 0.03284642078928254, + "grad_norm": 0.9258090257644653, + "learning_rate": 0.0001998825764149832, + "loss": 2.8541, + "step": 407 + }, + { + "epoch": 0.03292712452586555, + "grad_norm": 1.18158757686615, + "learning_rate": 0.00019988181034556895, + "loss": 2.8838, + "step": 408 + }, + { + "epoch": 0.03300782826244855, + "grad_norm": 0.9506754875183105, + "learning_rate": 0.00019988104178683891, + "loss": 2.7733, + "step": 409 + }, + { + "epoch": 0.033088531999031556, + "grad_norm": 0.9559460282325745, + "learning_rate": 0.0001998802707388122, + "loss": 2.9259, + "step": 410 + }, + { + "epoch": 0.03316923573561456, + "grad_norm": 0.9322298765182495, + "learning_rate": 0.00019987949720150808, + "loss": 2.8318, + "step": 411 + }, + { + "epoch": 0.033249939472197565, + "grad_norm": 0.9226691722869873, + "learning_rate": 0.00019987872117494576, + "loss": 2.9063, + "step": 412 + }, + { + "epoch": 0.03333064320878057, + "grad_norm": 1.0543674230575562, + "learning_rate": 0.00019987794265914464, + "loss": 2.7877, + "step": 413 + }, + { + "epoch": 0.033411346945363574, + "grad_norm": 0.989986002445221, + "learning_rate": 0.00019987716165412408, + "loss": 2.8354, + "step": 414 + }, + { + "epoch": 0.03349205068194657, + "grad_norm": 0.8703451752662659, + "learning_rate": 0.0001998763781599036, + "loss": 2.8127, + "step": 415 + }, + { + "epoch": 0.033572754418529575, + "grad_norm": 0.974943220615387, + "learning_rate": 0.0001998755921765027, + "loss": 2.9272, + "step": 416 + }, + { + "epoch": 0.03365345815511258, + "grad_norm": 0.8714169859886169, + "learning_rate": 0.000199874803703941, + "loss": 2.8027, + "step": 417 + }, + { + "epoch": 0.033734161891695584, + "grad_norm": 0.9251161217689514, + "learning_rate": 0.00019987401274223804, + "loss": 2.8186, + "step": 418 + }, + { + "epoch": 0.03381486562827859, + "grad_norm": 0.9657236933708191, + "learning_rate": 0.00019987321929141366, + "loss": 2.8297, + "step": 419 + }, + { + "epoch": 0.03389556936486159, + "grad_norm": 0.9022002816200256, + "learning_rate": 0.00019987242335148757, + "loss": 2.881, + "step": 420 + }, + { + "epoch": 0.0339762731014446, + "grad_norm": 0.9479621052742004, + "learning_rate": 0.0001998716249224796, + "loss": 2.8288, + "step": 421 + }, + { + "epoch": 0.0340569768380276, + "grad_norm": 0.9458955526351929, + "learning_rate": 0.00019987082400440968, + "loss": 2.8861, + "step": 422 + }, + { + "epoch": 0.034137680574610606, + "grad_norm": 0.9444572329521179, + "learning_rate": 0.0001998700205972978, + "loss": 2.8877, + "step": 423 + }, + { + "epoch": 0.03421838431119361, + "grad_norm": 0.9263925552368164, + "learning_rate": 0.00019986921470116392, + "loss": 2.8028, + "step": 424 + }, + { + "epoch": 0.034299088047776614, + "grad_norm": 1.0690566301345825, + "learning_rate": 0.00019986840631602812, + "loss": 2.882, + "step": 425 + }, + { + "epoch": 0.03437979178435962, + "grad_norm": 0.8999007940292358, + "learning_rate": 0.0001998675954419106, + "loss": 2.8179, + "step": 426 + }, + { + "epoch": 0.03446049552094262, + "grad_norm": 0.894395112991333, + "learning_rate": 0.00019986678207883153, + "loss": 2.814, + "step": 427 + }, + { + "epoch": 0.03454119925752562, + "grad_norm": 0.8621550798416138, + "learning_rate": 0.00019986596622681123, + "loss": 2.7584, + "step": 428 + }, + { + "epoch": 0.034621902994108625, + "grad_norm": 0.9452527165412903, + "learning_rate": 0.00019986514788587, + "loss": 2.8949, + "step": 429 + }, + { + "epoch": 0.03470260673069163, + "grad_norm": 0.8973272442817688, + "learning_rate": 0.0001998643270560282, + "loss": 2.868, + "step": 430 + }, + { + "epoch": 0.034783310467274633, + "grad_norm": 0.9887418150901794, + "learning_rate": 0.00019986350373730634, + "loss": 2.8009, + "step": 431 + }, + { + "epoch": 0.03486401420385764, + "grad_norm": 0.9449994564056396, + "learning_rate": 0.0001998626779297249, + "loss": 2.8305, + "step": 432 + }, + { + "epoch": 0.03494471794044064, + "grad_norm": 1.052871823310852, + "learning_rate": 0.0001998618496333045, + "loss": 2.8136, + "step": 433 + }, + { + "epoch": 0.035025421677023647, + "grad_norm": 0.9600724577903748, + "learning_rate": 0.00019986101884806576, + "loss": 2.7857, + "step": 434 + }, + { + "epoch": 0.03510612541360665, + "grad_norm": 0.874043345451355, + "learning_rate": 0.00019986018557402942, + "loss": 2.8524, + "step": 435 + }, + { + "epoch": 0.035186829150189655, + "grad_norm": 0.9810616374015808, + "learning_rate": 0.0001998593498112162, + "loss": 2.7506, + "step": 436 + }, + { + "epoch": 0.03526753288677266, + "grad_norm": 0.9163016080856323, + "learning_rate": 0.00019985851155964693, + "loss": 2.798, + "step": 437 + }, + { + "epoch": 0.035348236623355664, + "grad_norm": 1.0688380002975464, + "learning_rate": 0.00019985767081934252, + "loss": 2.8916, + "step": 438 + }, + { + "epoch": 0.03542894035993867, + "grad_norm": 0.925020158290863, + "learning_rate": 0.00019985682759032393, + "loss": 2.8017, + "step": 439 + }, + { + "epoch": 0.035509644096521666, + "grad_norm": 0.9429430961608887, + "learning_rate": 0.0001998559818726122, + "loss": 2.837, + "step": 440 + }, + { + "epoch": 0.03559034783310467, + "grad_norm": 0.9135627150535583, + "learning_rate": 0.00019985513366622832, + "loss": 2.8423, + "step": 441 + }, + { + "epoch": 0.035671051569687674, + "grad_norm": 0.9218924045562744, + "learning_rate": 0.00019985428297119353, + "loss": 2.854, + "step": 442 + }, + { + "epoch": 0.03575175530627068, + "grad_norm": 0.9307878613471985, + "learning_rate": 0.00019985342978752897, + "loss": 2.8591, + "step": 443 + }, + { + "epoch": 0.03583245904285368, + "grad_norm": 0.935394287109375, + "learning_rate": 0.00019985257411525592, + "loss": 2.8388, + "step": 444 + }, + { + "epoch": 0.03591316277943669, + "grad_norm": 0.890959620475769, + "learning_rate": 0.0001998517159543957, + "loss": 2.78, + "step": 445 + }, + { + "epoch": 0.03599386651601969, + "grad_norm": 1.110924482345581, + "learning_rate": 0.0001998508553049697, + "loss": 2.8117, + "step": 446 + }, + { + "epoch": 0.036074570252602696, + "grad_norm": 0.8774176239967346, + "learning_rate": 0.0001998499921669994, + "loss": 2.8368, + "step": 447 + }, + { + "epoch": 0.0361552739891857, + "grad_norm": 0.9766948819160461, + "learning_rate": 0.00019984912654050625, + "loss": 2.764, + "step": 448 + }, + { + "epoch": 0.036235977725768705, + "grad_norm": 1.1439398527145386, + "learning_rate": 0.00019984825842551187, + "loss": 2.84, + "step": 449 + }, + { + "epoch": 0.03631668146235171, + "grad_norm": 0.8995118737220764, + "learning_rate": 0.0001998473878220379, + "loss": 2.834, + "step": 450 + }, + { + "epoch": 0.03639738519893471, + "grad_norm": 0.9810060858726501, + "learning_rate": 0.000199846514730106, + "loss": 2.9338, + "step": 451 + }, + { + "epoch": 0.03647808893551772, + "grad_norm": 1.0862053632736206, + "learning_rate": 0.00019984563914973795, + "loss": 2.837, + "step": 452 + }, + { + "epoch": 0.036558792672100715, + "grad_norm": 0.9456702470779419, + "learning_rate": 0.0001998447610809556, + "loss": 2.7664, + "step": 453 + }, + { + "epoch": 0.03663949640868372, + "grad_norm": 1.0714432001113892, + "learning_rate": 0.0001998438805237808, + "loss": 2.8339, + "step": 454 + }, + { + "epoch": 0.036720200145266724, + "grad_norm": 0.89134281873703, + "learning_rate": 0.00019984299747823547, + "loss": 2.7818, + "step": 455 + }, + { + "epoch": 0.03680090388184973, + "grad_norm": 0.869742214679718, + "learning_rate": 0.0001998421119443417, + "loss": 2.7916, + "step": 456 + }, + { + "epoch": 0.03688160761843273, + "grad_norm": 0.9307265281677246, + "learning_rate": 0.00019984122392212149, + "loss": 2.8485, + "step": 457 + }, + { + "epoch": 0.03696231135501574, + "grad_norm": 0.900215744972229, + "learning_rate": 0.00019984033341159698, + "loss": 2.8536, + "step": 458 + }, + { + "epoch": 0.03704301509159874, + "grad_norm": 0.8679699897766113, + "learning_rate": 0.00019983944041279038, + "loss": 2.8344, + "step": 459 + }, + { + "epoch": 0.037123718828181745, + "grad_norm": 0.9540488719940186, + "learning_rate": 0.00019983854492572394, + "loss": 2.873, + "step": 460 + }, + { + "epoch": 0.03720442256476475, + "grad_norm": 0.8697962760925293, + "learning_rate": 0.00019983764695042, + "loss": 2.8122, + "step": 461 + }, + { + "epoch": 0.037285126301347754, + "grad_norm": 0.9534483551979065, + "learning_rate": 0.0001998367464869009, + "loss": 2.8842, + "step": 462 + }, + { + "epoch": 0.03736583003793076, + "grad_norm": 0.8402275443077087, + "learning_rate": 0.00019983584353518911, + "loss": 2.8135, + "step": 463 + }, + { + "epoch": 0.03744653377451376, + "grad_norm": 0.8226146697998047, + "learning_rate": 0.0001998349380953071, + "loss": 2.8036, + "step": 464 + }, + { + "epoch": 0.03752723751109677, + "grad_norm": 0.9292199611663818, + "learning_rate": 0.0001998340301672775, + "loss": 2.7887, + "step": 465 + }, + { + "epoch": 0.037607941247679764, + "grad_norm": 0.9035555124282837, + "learning_rate": 0.0001998331197511229, + "loss": 2.7851, + "step": 466 + }, + { + "epoch": 0.03768864498426277, + "grad_norm": 0.9411706328392029, + "learning_rate": 0.00019983220684686596, + "loss": 2.7782, + "step": 467 + }, + { + "epoch": 0.03776934872084577, + "grad_norm": 0.9867696166038513, + "learning_rate": 0.0001998312914545295, + "loss": 2.8125, + "step": 468 + }, + { + "epoch": 0.03785005245742878, + "grad_norm": 0.9683675169944763, + "learning_rate": 0.00019983037357413624, + "loss": 2.8325, + "step": 469 + }, + { + "epoch": 0.03793075619401178, + "grad_norm": 0.963941752910614, + "learning_rate": 0.00019982945320570913, + "loss": 2.8281, + "step": 470 + }, + { + "epoch": 0.038011459930594786, + "grad_norm": 0.9812459349632263, + "learning_rate": 0.0001998285303492711, + "loss": 2.765, + "step": 471 + }, + { + "epoch": 0.03809216366717779, + "grad_norm": 0.9681405425071716, + "learning_rate": 0.00019982760500484516, + "loss": 2.8882, + "step": 472 + }, + { + "epoch": 0.038172867403760795, + "grad_norm": 0.8983948826789856, + "learning_rate": 0.00019982667717245432, + "loss": 2.8182, + "step": 473 + }, + { + "epoch": 0.0382535711403438, + "grad_norm": 0.9875261783599854, + "learning_rate": 0.00019982574685212178, + "loss": 2.8072, + "step": 474 + }, + { + "epoch": 0.038334274876926804, + "grad_norm": 0.8889442086219788, + "learning_rate": 0.00019982481404387064, + "loss": 2.8635, + "step": 475 + }, + { + "epoch": 0.03841497861350981, + "grad_norm": 0.8904242515563965, + "learning_rate": 0.00019982387874772418, + "loss": 2.829, + "step": 476 + }, + { + "epoch": 0.03849568235009281, + "grad_norm": 1.0182000398635864, + "learning_rate": 0.00019982294096370574, + "loss": 2.8552, + "step": 477 + }, + { + "epoch": 0.03857638608667582, + "grad_norm": 0.9867151975631714, + "learning_rate": 0.00019982200069183867, + "loss": 2.8201, + "step": 478 + }, + { + "epoch": 0.038657089823258814, + "grad_norm": 0.9785345196723938, + "learning_rate": 0.0001998210579321464, + "loss": 2.8652, + "step": 479 + }, + { + "epoch": 0.03873779355984182, + "grad_norm": 0.9696915149688721, + "learning_rate": 0.00019982011268465243, + "loss": 2.8276, + "step": 480 + }, + { + "epoch": 0.03881849729642482, + "grad_norm": 0.9257470965385437, + "learning_rate": 0.00019981916494938033, + "loss": 2.8321, + "step": 481 + }, + { + "epoch": 0.03889920103300783, + "grad_norm": 0.9394895434379578, + "learning_rate": 0.00019981821472635369, + "loss": 2.8747, + "step": 482 + }, + { + "epoch": 0.03897990476959083, + "grad_norm": 0.9888504147529602, + "learning_rate": 0.00019981726201559626, + "loss": 2.8201, + "step": 483 + }, + { + "epoch": 0.039060608506173836, + "grad_norm": 0.8957003951072693, + "learning_rate": 0.0001998163068171317, + "loss": 2.8255, + "step": 484 + }, + { + "epoch": 0.03914131224275684, + "grad_norm": 0.9792008996009827, + "learning_rate": 0.00019981534913098383, + "loss": 2.7985, + "step": 485 + }, + { + "epoch": 0.039222015979339844, + "grad_norm": 0.8689060211181641, + "learning_rate": 0.00019981438895717656, + "loss": 2.7945, + "step": 486 + }, + { + "epoch": 0.03930271971592285, + "grad_norm": 0.9932593703269958, + "learning_rate": 0.0001998134262957338, + "loss": 2.9041, + "step": 487 + }, + { + "epoch": 0.03938342345250585, + "grad_norm": 0.8496069312095642, + "learning_rate": 0.00019981246114667955, + "loss": 2.8433, + "step": 488 + }, + { + "epoch": 0.03946412718908886, + "grad_norm": 0.8484126925468445, + "learning_rate": 0.00019981149351003786, + "loss": 2.7872, + "step": 489 + }, + { + "epoch": 0.03954483092567186, + "grad_norm": 0.9208858013153076, + "learning_rate": 0.00019981052338583283, + "loss": 2.7776, + "step": 490 + }, + { + "epoch": 0.03962553466225486, + "grad_norm": 0.9305418729782104, + "learning_rate": 0.00019980955077408865, + "loss": 2.7851, + "step": 491 + }, + { + "epoch": 0.03970623839883786, + "grad_norm": 0.9803212881088257, + "learning_rate": 0.00019980857567482955, + "loss": 2.8469, + "step": 492 + }, + { + "epoch": 0.03978694213542087, + "grad_norm": 0.9165790677070618, + "learning_rate": 0.00019980759808807985, + "loss": 2.8513, + "step": 493 + }, + { + "epoch": 0.03986764587200387, + "grad_norm": 0.9153794050216675, + "learning_rate": 0.00019980661801386393, + "loss": 2.8322, + "step": 494 + }, + { + "epoch": 0.039948349608586876, + "grad_norm": 0.89347904920578, + "learning_rate": 0.00019980563545220616, + "loss": 2.8316, + "step": 495 + }, + { + "epoch": 0.04002905334516988, + "grad_norm": 0.9882236123085022, + "learning_rate": 0.00019980465040313105, + "loss": 2.7471, + "step": 496 + }, + { + "epoch": 0.040109757081752885, + "grad_norm": 0.9391099810600281, + "learning_rate": 0.00019980366286666322, + "loss": 2.8182, + "step": 497 + }, + { + "epoch": 0.04019046081833589, + "grad_norm": 1.0155293941497803, + "learning_rate": 0.00019980267284282717, + "loss": 2.8721, + "step": 498 + }, + { + "epoch": 0.040271164554918894, + "grad_norm": 0.9952930212020874, + "learning_rate": 0.00019980168033164765, + "loss": 2.8538, + "step": 499 + }, + { + "epoch": 0.0403518682915019, + "grad_norm": 0.8385666608810425, + "learning_rate": 0.00019980068533314934, + "loss": 2.8242, + "step": 500 + }, + { + "epoch": 0.0404325720280849, + "grad_norm": 0.8747559785842896, + "learning_rate": 0.0001997996878473571, + "loss": 2.7908, + "step": 501 + }, + { + "epoch": 0.04051327576466791, + "grad_norm": 0.9267926216125488, + "learning_rate": 0.00019979868787429575, + "loss": 2.8359, + "step": 502 + }, + { + "epoch": 0.04059397950125091, + "grad_norm": 0.8194155693054199, + "learning_rate": 0.00019979768541399022, + "loss": 2.8161, + "step": 503 + }, + { + "epoch": 0.04067468323783391, + "grad_norm": 0.8923258185386658, + "learning_rate": 0.00019979668046646548, + "loss": 2.7547, + "step": 504 + }, + { + "epoch": 0.04075538697441691, + "grad_norm": 0.8965646028518677, + "learning_rate": 0.00019979567303174663, + "loss": 2.8432, + "step": 505 + }, + { + "epoch": 0.04083609071099992, + "grad_norm": 0.814481794834137, + "learning_rate": 0.0001997946631098587, + "loss": 2.8327, + "step": 506 + }, + { + "epoch": 0.04091679444758292, + "grad_norm": 0.8806928396224976, + "learning_rate": 0.00019979365070082694, + "loss": 2.8573, + "step": 507 + }, + { + "epoch": 0.040997498184165926, + "grad_norm": 0.8546919822692871, + "learning_rate": 0.00019979263580467653, + "loss": 2.8618, + "step": 508 + }, + { + "epoch": 0.04107820192074893, + "grad_norm": 0.8557277321815491, + "learning_rate": 0.00019979161842143274, + "loss": 2.8454, + "step": 509 + }, + { + "epoch": 0.041158905657331935, + "grad_norm": 0.9153180122375488, + "learning_rate": 0.00019979059855112098, + "loss": 2.8027, + "step": 510 + }, + { + "epoch": 0.04123960939391494, + "grad_norm": 0.8616741895675659, + "learning_rate": 0.00019978957619376666, + "loss": 2.7628, + "step": 511 + }, + { + "epoch": 0.04132031313049794, + "grad_norm": 0.8777137398719788, + "learning_rate": 0.00019978855134939524, + "loss": 2.8443, + "step": 512 + }, + { + "epoch": 0.04140101686708095, + "grad_norm": 0.852100133895874, + "learning_rate": 0.0001997875240180323, + "loss": 2.8125, + "step": 513 + }, + { + "epoch": 0.04148172060366395, + "grad_norm": 0.8470742702484131, + "learning_rate": 0.00019978649419970338, + "loss": 2.8139, + "step": 514 + }, + { + "epoch": 0.041562424340246956, + "grad_norm": 0.8890305161476135, + "learning_rate": 0.0001997854618944342, + "loss": 2.8633, + "step": 515 + }, + { + "epoch": 0.04164312807682996, + "grad_norm": 0.8893599510192871, + "learning_rate": 0.00019978442710225043, + "loss": 2.8066, + "step": 516 + }, + { + "epoch": 0.04172383181341296, + "grad_norm": 0.9093891382217407, + "learning_rate": 0.00019978338982317792, + "loss": 2.8026, + "step": 517 + }, + { + "epoch": 0.04180453554999596, + "grad_norm": 0.9775434136390686, + "learning_rate": 0.00019978235005724252, + "loss": 2.849, + "step": 518 + }, + { + "epoch": 0.04188523928657897, + "grad_norm": 1.0014091730117798, + "learning_rate": 0.00019978130780447012, + "loss": 2.8572, + "step": 519 + }, + { + "epoch": 0.04196594302316197, + "grad_norm": 0.8487632870674133, + "learning_rate": 0.00019978026306488668, + "loss": 2.7611, + "step": 520 + }, + { + "epoch": 0.042046646759744975, + "grad_norm": 0.86592698097229, + "learning_rate": 0.00019977921583851825, + "loss": 2.7616, + "step": 521 + }, + { + "epoch": 0.04212735049632798, + "grad_norm": 1.0285916328430176, + "learning_rate": 0.00019977816612539093, + "loss": 2.8049, + "step": 522 + }, + { + "epoch": 0.042208054232910984, + "grad_norm": 0.9716495871543884, + "learning_rate": 0.00019977711392553092, + "loss": 2.8459, + "step": 523 + }, + { + "epoch": 0.04228875796949399, + "grad_norm": 0.8842264413833618, + "learning_rate": 0.0001997760592389644, + "loss": 2.7934, + "step": 524 + }, + { + "epoch": 0.04236946170607699, + "grad_norm": 0.8839964866638184, + "learning_rate": 0.00019977500206571765, + "loss": 2.8135, + "step": 525 + }, + { + "epoch": 0.04245016544266, + "grad_norm": 0.870331346988678, + "learning_rate": 0.00019977394240581705, + "loss": 2.8684, + "step": 526 + }, + { + "epoch": 0.042530869179243, + "grad_norm": 0.8844720125198364, + "learning_rate": 0.000199772880259289, + "loss": 2.7867, + "step": 527 + }, + { + "epoch": 0.042611572915826006, + "grad_norm": 0.9353455901145935, + "learning_rate": 0.00019977181562615994, + "loss": 2.8051, + "step": 528 + }, + { + "epoch": 0.04269227665240901, + "grad_norm": 0.9530816078186035, + "learning_rate": 0.00019977074850645646, + "loss": 2.7915, + "step": 529 + }, + { + "epoch": 0.04277298038899201, + "grad_norm": 0.8984190821647644, + "learning_rate": 0.00019976967890020507, + "loss": 2.7957, + "step": 530 + }, + { + "epoch": 0.04285368412557501, + "grad_norm": 0.9146613478660583, + "learning_rate": 0.00019976860680743252, + "loss": 2.9053, + "step": 531 + }, + { + "epoch": 0.042934387862158016, + "grad_norm": 0.9228026866912842, + "learning_rate": 0.0001997675322281655, + "loss": 2.8578, + "step": 532 + }, + { + "epoch": 0.04301509159874102, + "grad_norm": 0.8266343474388123, + "learning_rate": 0.0001997664551624308, + "loss": 2.7393, + "step": 533 + }, + { + "epoch": 0.043095795335324025, + "grad_norm": 0.9197628498077393, + "learning_rate": 0.0001997653756102552, + "loss": 2.8828, + "step": 534 + }, + { + "epoch": 0.04317649907190703, + "grad_norm": 0.9145991802215576, + "learning_rate": 0.00019976429357166566, + "loss": 2.7767, + "step": 535 + }, + { + "epoch": 0.04325720280849003, + "grad_norm": 0.9123281240463257, + "learning_rate": 0.00019976320904668913, + "loss": 2.7993, + "step": 536 + }, + { + "epoch": 0.04333790654507304, + "grad_norm": 0.8597636818885803, + "learning_rate": 0.00019976212203535266, + "loss": 2.8148, + "step": 537 + }, + { + "epoch": 0.04341861028165604, + "grad_norm": 0.8963296413421631, + "learning_rate": 0.00019976103253768334, + "loss": 2.7722, + "step": 538 + }, + { + "epoch": 0.043499314018239046, + "grad_norm": 0.9480688571929932, + "learning_rate": 0.0001997599405537083, + "loss": 2.8038, + "step": 539 + }, + { + "epoch": 0.04358001775482205, + "grad_norm": 0.8115736842155457, + "learning_rate": 0.00019975884608345476, + "loss": 2.8069, + "step": 540 + }, + { + "epoch": 0.043660721491405055, + "grad_norm": 0.9642506837844849, + "learning_rate": 0.00019975774912695, + "loss": 2.8703, + "step": 541 + }, + { + "epoch": 0.04374142522798805, + "grad_norm": 0.9638697504997253, + "learning_rate": 0.0001997566496842214, + "loss": 2.8223, + "step": 542 + }, + { + "epoch": 0.04382212896457106, + "grad_norm": 0.9478490352630615, + "learning_rate": 0.00019975554775529628, + "loss": 2.8164, + "step": 543 + }, + { + "epoch": 0.04390283270115406, + "grad_norm": 1.1771583557128906, + "learning_rate": 0.00019975444334020215, + "loss": 2.7969, + "step": 544 + }, + { + "epoch": 0.043983536437737066, + "grad_norm": 0.9597339034080505, + "learning_rate": 0.00019975333643896655, + "loss": 2.8025, + "step": 545 + }, + { + "epoch": 0.04406424017432007, + "grad_norm": 0.981595516204834, + "learning_rate": 0.00019975222705161704, + "loss": 2.7994, + "step": 546 + }, + { + "epoch": 0.044144943910903074, + "grad_norm": 0.9581133723258972, + "learning_rate": 0.00019975111517818127, + "loss": 2.802, + "step": 547 + }, + { + "epoch": 0.04422564764748608, + "grad_norm": 0.8643878698348999, + "learning_rate": 0.00019975000081868697, + "loss": 2.7958, + "step": 548 + }, + { + "epoch": 0.04430635138406908, + "grad_norm": 1.2188652753829956, + "learning_rate": 0.0001997488839731619, + "loss": 2.8786, + "step": 549 + }, + { + "epoch": 0.04438705512065209, + "grad_norm": 0.9138071537017822, + "learning_rate": 0.00019974776464163387, + "loss": 2.809, + "step": 550 + }, + { + "epoch": 0.04446775885723509, + "grad_norm": 0.9604587554931641, + "learning_rate": 0.00019974664282413083, + "loss": 2.8009, + "step": 551 + }, + { + "epoch": 0.044548462593818096, + "grad_norm": 1.0271116495132446, + "learning_rate": 0.00019974551852068072, + "loss": 2.8689, + "step": 552 + }, + { + "epoch": 0.0446291663304011, + "grad_norm": 0.9330877065658569, + "learning_rate": 0.00019974439173131155, + "loss": 2.7613, + "step": 553 + }, + { + "epoch": 0.044709870066984105, + "grad_norm": 0.9549325108528137, + "learning_rate": 0.00019974326245605136, + "loss": 2.8314, + "step": 554 + }, + { + "epoch": 0.0447905738035671, + "grad_norm": 0.8928439021110535, + "learning_rate": 0.00019974213069492836, + "loss": 2.8097, + "step": 555 + }, + { + "epoch": 0.044871277540150106, + "grad_norm": 0.8705076575279236, + "learning_rate": 0.00019974099644797075, + "loss": 2.8112, + "step": 556 + }, + { + "epoch": 0.04495198127673311, + "grad_norm": 0.988345742225647, + "learning_rate": 0.00019973985971520676, + "loss": 2.7648, + "step": 557 + }, + { + "epoch": 0.045032685013316115, + "grad_norm": 0.9161957502365112, + "learning_rate": 0.00019973872049666475, + "loss": 2.8691, + "step": 558 + }, + { + "epoch": 0.04511338874989912, + "grad_norm": 0.8404076099395752, + "learning_rate": 0.00019973757879237312, + "loss": 2.7708, + "step": 559 + }, + { + "epoch": 0.045194092486482124, + "grad_norm": 1.05247962474823, + "learning_rate": 0.0001997364346023603, + "loss": 2.8638, + "step": 560 + }, + { + "epoch": 0.04527479622306513, + "grad_norm": 0.9235066175460815, + "learning_rate": 0.00019973528792665483, + "loss": 2.7876, + "step": 561 + }, + { + "epoch": 0.04535549995964813, + "grad_norm": 1.220075249671936, + "learning_rate": 0.00019973413876528526, + "loss": 2.8563, + "step": 562 + }, + { + "epoch": 0.04543620369623114, + "grad_norm": 0.9098384976387024, + "learning_rate": 0.00019973298711828025, + "loss": 2.8427, + "step": 563 + }, + { + "epoch": 0.04551690743281414, + "grad_norm": 0.8792217969894409, + "learning_rate": 0.00019973183298566848, + "loss": 2.8673, + "step": 564 + }, + { + "epoch": 0.045597611169397145, + "grad_norm": 0.9895235896110535, + "learning_rate": 0.00019973067636747875, + "loss": 2.8262, + "step": 565 + }, + { + "epoch": 0.04567831490598015, + "grad_norm": 0.9191479086875916, + "learning_rate": 0.00019972951726373984, + "loss": 2.8005, + "step": 566 + }, + { + "epoch": 0.045759018642563154, + "grad_norm": 0.9631491899490356, + "learning_rate": 0.0001997283556744807, + "loss": 2.8438, + "step": 567 + }, + { + "epoch": 0.04583972237914615, + "grad_norm": 0.8302746415138245, + "learning_rate": 0.00019972719159973024, + "loss": 2.8221, + "step": 568 + }, + { + "epoch": 0.045920426115729156, + "grad_norm": 0.8238534927368164, + "learning_rate": 0.00019972602503951748, + "loss": 2.7674, + "step": 569 + }, + { + "epoch": 0.04600112985231216, + "grad_norm": 0.9675811529159546, + "learning_rate": 0.00019972485599387146, + "loss": 2.8457, + "step": 570 + }, + { + "epoch": 0.046081833588895164, + "grad_norm": 0.8663914203643799, + "learning_rate": 0.00019972368446282134, + "loss": 2.7851, + "step": 571 + }, + { + "epoch": 0.04616253732547817, + "grad_norm": 0.9904592633247375, + "learning_rate": 0.00019972251044639636, + "loss": 2.8792, + "step": 572 + }, + { + "epoch": 0.04624324106206117, + "grad_norm": 0.907600462436676, + "learning_rate": 0.0001997213339446257, + "loss": 2.7991, + "step": 573 + }, + { + "epoch": 0.04632394479864418, + "grad_norm": 0.871362566947937, + "learning_rate": 0.00019972015495753876, + "loss": 2.7959, + "step": 574 + }, + { + "epoch": 0.04640464853522718, + "grad_norm": 0.9664937853813171, + "learning_rate": 0.00019971897348516486, + "loss": 2.7847, + "step": 575 + }, + { + "epoch": 0.046485352271810186, + "grad_norm": 1.0670619010925293, + "learning_rate": 0.0001997177895275335, + "loss": 2.8864, + "step": 576 + }, + { + "epoch": 0.04656605600839319, + "grad_norm": 0.9281025528907776, + "learning_rate": 0.00019971660308467414, + "loss": 2.8568, + "step": 577 + }, + { + "epoch": 0.046646759744976195, + "grad_norm": 0.8964822888374329, + "learning_rate": 0.00019971541415661639, + "loss": 2.7246, + "step": 578 + }, + { + "epoch": 0.0467274634815592, + "grad_norm": 0.8921917676925659, + "learning_rate": 0.00019971422274338985, + "loss": 2.8513, + "step": 579 + }, + { + "epoch": 0.0468081672181422, + "grad_norm": 0.9550159573554993, + "learning_rate": 0.0001997130288450242, + "loss": 2.7615, + "step": 580 + }, + { + "epoch": 0.0468888709547252, + "grad_norm": 0.9330170154571533, + "learning_rate": 0.00019971183246154925, + "loss": 2.9017, + "step": 581 + }, + { + "epoch": 0.046969574691308205, + "grad_norm": 0.9125271439552307, + "learning_rate": 0.00019971063359299477, + "loss": 2.8263, + "step": 582 + }, + { + "epoch": 0.04705027842789121, + "grad_norm": 1.0005927085876465, + "learning_rate": 0.00019970943223939066, + "loss": 2.8371, + "step": 583 + }, + { + "epoch": 0.047130982164474214, + "grad_norm": 1.0333613157272339, + "learning_rate": 0.00019970822840076685, + "loss": 2.8275, + "step": 584 + }, + { + "epoch": 0.04721168590105722, + "grad_norm": 0.8684708476066589, + "learning_rate": 0.00019970702207715334, + "loss": 2.8343, + "step": 585 + }, + { + "epoch": 0.04729238963764022, + "grad_norm": 1.1112761497497559, + "learning_rate": 0.00019970581326858025, + "loss": 2.9012, + "step": 586 + }, + { + "epoch": 0.04737309337422323, + "grad_norm": 1.0187962055206299, + "learning_rate": 0.00019970460197507763, + "loss": 2.8423, + "step": 587 + }, + { + "epoch": 0.04745379711080623, + "grad_norm": 0.9802024960517883, + "learning_rate": 0.00019970338819667567, + "loss": 2.867, + "step": 588 + }, + { + "epoch": 0.047534500847389236, + "grad_norm": 0.9825551509857178, + "learning_rate": 0.00019970217193340467, + "loss": 2.8359, + "step": 589 + }, + { + "epoch": 0.04761520458397224, + "grad_norm": 1.1399210691452026, + "learning_rate": 0.00019970095318529494, + "loss": 2.8356, + "step": 590 + }, + { + "epoch": 0.047695908320555244, + "grad_norm": 1.0373995304107666, + "learning_rate": 0.00019969973195237684, + "loss": 2.8005, + "step": 591 + }, + { + "epoch": 0.04777661205713825, + "grad_norm": 1.133596420288086, + "learning_rate": 0.00019969850823468077, + "loss": 2.8778, + "step": 592 + }, + { + "epoch": 0.047857315793721246, + "grad_norm": 1.0187327861785889, + "learning_rate": 0.00019969728203223728, + "loss": 2.8291, + "step": 593 + }, + { + "epoch": 0.04793801953030425, + "grad_norm": 1.0588128566741943, + "learning_rate": 0.00019969605334507688, + "loss": 2.9396, + "step": 594 + }, + { + "epoch": 0.048018723266887255, + "grad_norm": 0.8783230781555176, + "learning_rate": 0.00019969482217323026, + "loss": 2.8076, + "step": 595 + }, + { + "epoch": 0.04809942700347026, + "grad_norm": 1.0500195026397705, + "learning_rate": 0.00019969358851672805, + "loss": 2.9099, + "step": 596 + }, + { + "epoch": 0.04818013074005326, + "grad_norm": 0.9523593187332153, + "learning_rate": 0.000199692352375601, + "loss": 2.7448, + "step": 597 + }, + { + "epoch": 0.04826083447663627, + "grad_norm": 1.0008500814437866, + "learning_rate": 0.00019969111374987995, + "loss": 2.8212, + "step": 598 + }, + { + "epoch": 0.04834153821321927, + "grad_norm": 0.8992626070976257, + "learning_rate": 0.00019968987263959575, + "loss": 2.8698, + "step": 599 + }, + { + "epoch": 0.048422241949802276, + "grad_norm": 0.9914852380752563, + "learning_rate": 0.00019968862904477935, + "loss": 2.8221, + "step": 600 + }, + { + "epoch": 0.04850294568638528, + "grad_norm": 0.9633241295814514, + "learning_rate": 0.00019968738296546168, + "loss": 2.8835, + "step": 601 + }, + { + "epoch": 0.048583649422968285, + "grad_norm": 1.055831789970398, + "learning_rate": 0.00019968613440167387, + "loss": 2.8781, + "step": 602 + }, + { + "epoch": 0.04866435315955129, + "grad_norm": 0.913856029510498, + "learning_rate": 0.000199684883353447, + "loss": 2.7863, + "step": 603 + }, + { + "epoch": 0.048745056896134294, + "grad_norm": 0.8429243564605713, + "learning_rate": 0.00019968362982081226, + "loss": 2.7753, + "step": 604 + }, + { + "epoch": 0.0488257606327173, + "grad_norm": 0.9324761629104614, + "learning_rate": 0.0001996823738038009, + "loss": 2.8058, + "step": 605 + }, + { + "epoch": 0.048906464369300295, + "grad_norm": 1.0004981756210327, + "learning_rate": 0.0001996811153024442, + "loss": 2.8537, + "step": 606 + }, + { + "epoch": 0.0489871681058833, + "grad_norm": 0.9438043236732483, + "learning_rate": 0.00019967985431677354, + "loss": 2.8828, + "step": 607 + }, + { + "epoch": 0.049067871842466304, + "grad_norm": 0.9359340071678162, + "learning_rate": 0.00019967859084682034, + "loss": 2.8149, + "step": 608 + }, + { + "epoch": 0.04914857557904931, + "grad_norm": 1.0400227308273315, + "learning_rate": 0.00019967732489261609, + "loss": 2.8489, + "step": 609 + }, + { + "epoch": 0.04922927931563231, + "grad_norm": 0.8978031277656555, + "learning_rate": 0.00019967605645419237, + "loss": 2.8599, + "step": 610 + }, + { + "epoch": 0.04930998305221532, + "grad_norm": 0.9982689619064331, + "learning_rate": 0.00019967478553158073, + "loss": 2.9024, + "step": 611 + }, + { + "epoch": 0.04939068678879832, + "grad_norm": 1.0695222616195679, + "learning_rate": 0.00019967351212481292, + "loss": 2.8483, + "step": 612 + }, + { + "epoch": 0.049471390525381326, + "grad_norm": 1.0615525245666504, + "learning_rate": 0.0001996722362339206, + "loss": 2.806, + "step": 613 + }, + { + "epoch": 0.04955209426196433, + "grad_norm": 0.9624890089035034, + "learning_rate": 0.0001996709578589356, + "loss": 2.8641, + "step": 614 + }, + { + "epoch": 0.049632797998547334, + "grad_norm": 0.9156595468521118, + "learning_rate": 0.00019966967699988985, + "loss": 2.7991, + "step": 615 + }, + { + "epoch": 0.04971350173513034, + "grad_norm": 0.8687645196914673, + "learning_rate": 0.00019966839365681517, + "loss": 2.774, + "step": 616 + }, + { + "epoch": 0.04979420547171334, + "grad_norm": 0.9175437688827515, + "learning_rate": 0.00019966710782974359, + "loss": 2.8064, + "step": 617 + }, + { + "epoch": 0.04987490920829635, + "grad_norm": 0.8897463083267212, + "learning_rate": 0.00019966581951870715, + "loss": 2.8487, + "step": 618 + }, + { + "epoch": 0.049955612944879345, + "grad_norm": 0.8908397555351257, + "learning_rate": 0.00019966452872373795, + "loss": 2.8523, + "step": 619 + }, + { + "epoch": 0.05003631668146235, + "grad_norm": 0.95484858751297, + "learning_rate": 0.00019966323544486818, + "loss": 2.8471, + "step": 620 + }, + { + "epoch": 0.050117020418045354, + "grad_norm": 0.9995831251144409, + "learning_rate": 0.00019966193968213008, + "loss": 2.8341, + "step": 621 + }, + { + "epoch": 0.05019772415462836, + "grad_norm": 0.8731706142425537, + "learning_rate": 0.00019966064143555587, + "loss": 2.8491, + "step": 622 + }, + { + "epoch": 0.05027842789121136, + "grad_norm": 0.9213298559188843, + "learning_rate": 0.000199659340705178, + "loss": 2.8256, + "step": 623 + }, + { + "epoch": 0.050359131627794367, + "grad_norm": 0.9565179347991943, + "learning_rate": 0.00019965803749102885, + "loss": 2.8177, + "step": 624 + }, + { + "epoch": 0.05043983536437737, + "grad_norm": 1.0076881647109985, + "learning_rate": 0.00019965673179314086, + "loss": 2.7812, + "step": 625 + }, + { + "epoch": 0.050520539100960375, + "grad_norm": 0.989647388458252, + "learning_rate": 0.00019965542361154666, + "loss": 2.9226, + "step": 626 + }, + { + "epoch": 0.05060124283754338, + "grad_norm": 0.9671580791473389, + "learning_rate": 0.00019965411294627878, + "loss": 2.8204, + "step": 627 + }, + { + "epoch": 0.050681946574126384, + "grad_norm": 0.9275986552238464, + "learning_rate": 0.00019965279979736989, + "loss": 2.8481, + "step": 628 + }, + { + "epoch": 0.05076265031070939, + "grad_norm": 0.9949543476104736, + "learning_rate": 0.00019965148416485273, + "loss": 2.8606, + "step": 629 + }, + { + "epoch": 0.05084335404729239, + "grad_norm": 0.9506482481956482, + "learning_rate": 0.0001996501660487601, + "loss": 2.8088, + "step": 630 + }, + { + "epoch": 0.0509240577838754, + "grad_norm": 0.9147887229919434, + "learning_rate": 0.00019964884544912488, + "loss": 2.7997, + "step": 631 + }, + { + "epoch": 0.051004761520458394, + "grad_norm": 0.8964840769767761, + "learning_rate": 0.00019964752236597993, + "loss": 2.8342, + "step": 632 + }, + { + "epoch": 0.0510854652570414, + "grad_norm": 0.931811511516571, + "learning_rate": 0.00019964619679935824, + "loss": 2.8229, + "step": 633 + }, + { + "epoch": 0.0511661689936244, + "grad_norm": 0.8634423017501831, + "learning_rate": 0.00019964486874929282, + "loss": 2.803, + "step": 634 + }, + { + "epoch": 0.05124687273020741, + "grad_norm": 0.892223596572876, + "learning_rate": 0.00019964353821581683, + "loss": 2.802, + "step": 635 + }, + { + "epoch": 0.05132757646679041, + "grad_norm": 0.8373630046844482, + "learning_rate": 0.00019964220519896338, + "loss": 2.7693, + "step": 636 + }, + { + "epoch": 0.051408280203373416, + "grad_norm": 0.8729730248451233, + "learning_rate": 0.0001996408696987657, + "loss": 2.8467, + "step": 637 + }, + { + "epoch": 0.05148898393995642, + "grad_norm": 0.8994413614273071, + "learning_rate": 0.0001996395317152571, + "loss": 2.8837, + "step": 638 + }, + { + "epoch": 0.051569687676539425, + "grad_norm": 0.9146113395690918, + "learning_rate": 0.0001996381912484709, + "loss": 2.8189, + "step": 639 + }, + { + "epoch": 0.05165039141312243, + "grad_norm": 0.9330562353134155, + "learning_rate": 0.00019963684829844052, + "loss": 2.7873, + "step": 640 + }, + { + "epoch": 0.05173109514970543, + "grad_norm": 0.9076224565505981, + "learning_rate": 0.00019963550286519944, + "loss": 2.802, + "step": 641 + }, + { + "epoch": 0.05181179888628844, + "grad_norm": 0.9580704569816589, + "learning_rate": 0.00019963415494878115, + "loss": 2.8173, + "step": 642 + }, + { + "epoch": 0.05189250262287144, + "grad_norm": 0.9291248917579651, + "learning_rate": 0.00019963280454921928, + "loss": 2.7866, + "step": 643 + }, + { + "epoch": 0.05197320635945444, + "grad_norm": 0.9815296530723572, + "learning_rate": 0.0001996314516665475, + "loss": 2.7903, + "step": 644 + }, + { + "epoch": 0.052053910096037444, + "grad_norm": 0.9461820721626282, + "learning_rate": 0.00019963009630079949, + "loss": 2.7854, + "step": 645 + }, + { + "epoch": 0.05213461383262045, + "grad_norm": 0.9660771489143372, + "learning_rate": 0.00019962873845200908, + "loss": 2.9187, + "step": 646 + }, + { + "epoch": 0.05221531756920345, + "grad_norm": 0.8987802863121033, + "learning_rate": 0.00019962737812021002, + "loss": 2.8854, + "step": 647 + }, + { + "epoch": 0.05229602130578646, + "grad_norm": 0.9810429215431213, + "learning_rate": 0.0001996260153054363, + "loss": 2.8974, + "step": 648 + }, + { + "epoch": 0.05237672504236946, + "grad_norm": 0.8185738325119019, + "learning_rate": 0.00019962465000772183, + "loss": 2.797, + "step": 649 + }, + { + "epoch": 0.052457428778952465, + "grad_norm": 0.8976237773895264, + "learning_rate": 0.0001996232822271007, + "loss": 2.8557, + "step": 650 + }, + { + "epoch": 0.05253813251553547, + "grad_norm": 0.8591496348381042, + "learning_rate": 0.0001996219119636069, + "loss": 2.8521, + "step": 651 + }, + { + "epoch": 0.052618836252118474, + "grad_norm": 0.8907031416893005, + "learning_rate": 0.00019962053921727472, + "loss": 2.8117, + "step": 652 + }, + { + "epoch": 0.05269953998870148, + "grad_norm": 0.9034241437911987, + "learning_rate": 0.00019961916398813823, + "loss": 2.741, + "step": 653 + }, + { + "epoch": 0.05278024372528448, + "grad_norm": 0.8284802436828613, + "learning_rate": 0.00019961778627623176, + "loss": 2.776, + "step": 654 + }, + { + "epoch": 0.05286094746186749, + "grad_norm": 0.8459529876708984, + "learning_rate": 0.00019961640608158967, + "loss": 2.8027, + "step": 655 + }, + { + "epoch": 0.05294165119845049, + "grad_norm": 0.9720042943954468, + "learning_rate": 0.00019961502340424636, + "loss": 2.9086, + "step": 656 + }, + { + "epoch": 0.05302235493503349, + "grad_norm": 0.8581427335739136, + "learning_rate": 0.00019961363824423626, + "loss": 2.8347, + "step": 657 + }, + { + "epoch": 0.05310305867161649, + "grad_norm": 0.9545331597328186, + "learning_rate": 0.00019961225060159386, + "loss": 2.828, + "step": 658 + }, + { + "epoch": 0.0531837624081995, + "grad_norm": 1.0303562879562378, + "learning_rate": 0.00019961086047635385, + "loss": 2.8461, + "step": 659 + }, + { + "epoch": 0.0532644661447825, + "grad_norm": 0.86605304479599, + "learning_rate": 0.0001996094678685508, + "loss": 2.8355, + "step": 660 + }, + { + "epoch": 0.053345169881365506, + "grad_norm": 0.8146334886550903, + "learning_rate": 0.0001996080727782194, + "loss": 2.8638, + "step": 661 + }, + { + "epoch": 0.05342587361794851, + "grad_norm": 0.9434560537338257, + "learning_rate": 0.00019960667520539446, + "loss": 2.8196, + "step": 662 + }, + { + "epoch": 0.053506577354531515, + "grad_norm": 0.9362602829933167, + "learning_rate": 0.00019960527515011084, + "loss": 2.8452, + "step": 663 + }, + { + "epoch": 0.05358728109111452, + "grad_norm": 0.828713059425354, + "learning_rate": 0.00019960387261240334, + "loss": 2.8079, + "step": 664 + }, + { + "epoch": 0.053667984827697524, + "grad_norm": 0.8610214591026306, + "learning_rate": 0.00019960246759230697, + "loss": 2.8197, + "step": 665 + }, + { + "epoch": 0.05374868856428053, + "grad_norm": 0.8913124799728394, + "learning_rate": 0.00019960106008985674, + "loss": 2.8392, + "step": 666 + }, + { + "epoch": 0.05382939230086353, + "grad_norm": 0.8109759092330933, + "learning_rate": 0.00019959965010508778, + "loss": 2.7961, + "step": 667 + }, + { + "epoch": 0.05391009603744654, + "grad_norm": 0.8714832663536072, + "learning_rate": 0.00019959823763803514, + "loss": 2.7984, + "step": 668 + }, + { + "epoch": 0.05399079977402954, + "grad_norm": 0.9008125066757202, + "learning_rate": 0.00019959682268873408, + "loss": 2.8319, + "step": 669 + }, + { + "epoch": 0.05407150351061254, + "grad_norm": 0.8718584775924683, + "learning_rate": 0.00019959540525721985, + "loss": 2.7973, + "step": 670 + }, + { + "epoch": 0.05415220724719554, + "grad_norm": 0.8666327595710754, + "learning_rate": 0.00019959398534352774, + "loss": 2.8296, + "step": 671 + }, + { + "epoch": 0.05423291098377855, + "grad_norm": 0.9755229949951172, + "learning_rate": 0.00019959256294769322, + "loss": 2.8358, + "step": 672 + }, + { + "epoch": 0.05431361472036155, + "grad_norm": 1.193708062171936, + "learning_rate": 0.0001995911380697517, + "loss": 2.7672, + "step": 673 + }, + { + "epoch": 0.054394318456944556, + "grad_norm": 0.9104088544845581, + "learning_rate": 0.00019958971070973866, + "loss": 2.8389, + "step": 674 + }, + { + "epoch": 0.05447502219352756, + "grad_norm": 0.9266251921653748, + "learning_rate": 0.0001995882808676897, + "loss": 2.8226, + "step": 675 + }, + { + "epoch": 0.054555725930110564, + "grad_norm": 1.1161282062530518, + "learning_rate": 0.00019958684854364046, + "loss": 2.8236, + "step": 676 + }, + { + "epoch": 0.05463642966669357, + "grad_norm": 0.9200586080551147, + "learning_rate": 0.00019958541373762666, + "loss": 2.8074, + "step": 677 + }, + { + "epoch": 0.05471713340327657, + "grad_norm": 1.0372560024261475, + "learning_rate": 0.000199583976449684, + "loss": 2.815, + "step": 678 + }, + { + "epoch": 0.05479783713985958, + "grad_norm": 0.8822301030158997, + "learning_rate": 0.0001995825366798483, + "loss": 2.7985, + "step": 679 + }, + { + "epoch": 0.05487854087644258, + "grad_norm": 0.9226076006889343, + "learning_rate": 0.00019958109442815553, + "loss": 2.7649, + "step": 680 + }, + { + "epoch": 0.054959244613025586, + "grad_norm": 0.8769479990005493, + "learning_rate": 0.00019957964969464156, + "loss": 2.8483, + "step": 681 + }, + { + "epoch": 0.05503994834960859, + "grad_norm": 0.8601027727127075, + "learning_rate": 0.0001995782024793424, + "loss": 2.8072, + "step": 682 + }, + { + "epoch": 0.05512065208619159, + "grad_norm": 0.9684911370277405, + "learning_rate": 0.00019957675278229416, + "loss": 2.8693, + "step": 683 + }, + { + "epoch": 0.05520135582277459, + "grad_norm": 0.9119890928268433, + "learning_rate": 0.00019957530060353294, + "loss": 2.853, + "step": 684 + }, + { + "epoch": 0.055282059559357596, + "grad_norm": 0.9588247537612915, + "learning_rate": 0.0001995738459430949, + "loss": 2.8435, + "step": 685 + }, + { + "epoch": 0.0553627632959406, + "grad_norm": 0.8317441940307617, + "learning_rate": 0.00019957238880101636, + "loss": 2.8208, + "step": 686 + }, + { + "epoch": 0.055443467032523605, + "grad_norm": 0.92695152759552, + "learning_rate": 0.00019957092917733361, + "loss": 2.8378, + "step": 687 + }, + { + "epoch": 0.05552417076910661, + "grad_norm": 0.8908315300941467, + "learning_rate": 0.00019956946707208305, + "loss": 2.8041, + "step": 688 + }, + { + "epoch": 0.055604874505689614, + "grad_norm": 0.9787055253982544, + "learning_rate": 0.00019956800248530107, + "loss": 2.8604, + "step": 689 + }, + { + "epoch": 0.05568557824227262, + "grad_norm": 0.8707631826400757, + "learning_rate": 0.00019956653541702415, + "loss": 2.7763, + "step": 690 + }, + { + "epoch": 0.05576628197885562, + "grad_norm": 1.0059715509414673, + "learning_rate": 0.00019956506586728896, + "loss": 2.8267, + "step": 691 + }, + { + "epoch": 0.05584698571543863, + "grad_norm": 0.88490891456604, + "learning_rate": 0.00019956359383613203, + "loss": 2.8278, + "step": 692 + }, + { + "epoch": 0.05592768945202163, + "grad_norm": 0.9527923464775085, + "learning_rate": 0.00019956211932359007, + "loss": 2.8251, + "step": 693 + }, + { + "epoch": 0.056008393188604635, + "grad_norm": 0.9612617492675781, + "learning_rate": 0.00019956064232969987, + "loss": 2.8148, + "step": 694 + }, + { + "epoch": 0.05608909692518763, + "grad_norm": 0.9261285066604614, + "learning_rate": 0.0001995591628544982, + "loss": 2.8176, + "step": 695 + }, + { + "epoch": 0.05616980066177064, + "grad_norm": 0.9766250252723694, + "learning_rate": 0.0001995576808980219, + "loss": 2.7968, + "step": 696 + }, + { + "epoch": 0.05625050439835364, + "grad_norm": 0.9287495017051697, + "learning_rate": 0.00019955619646030802, + "loss": 2.7679, + "step": 697 + }, + { + "epoch": 0.056331208134936646, + "grad_norm": 0.9182924032211304, + "learning_rate": 0.00019955470954139345, + "loss": 2.8295, + "step": 698 + }, + { + "epoch": 0.05641191187151965, + "grad_norm": 0.8650663495063782, + "learning_rate": 0.00019955322014131524, + "loss": 2.7928, + "step": 699 + }, + { + "epoch": 0.056492615608102655, + "grad_norm": 0.9543934464454651, + "learning_rate": 0.00019955172826011062, + "loss": 2.8049, + "step": 700 + }, + { + "epoch": 0.05657331934468566, + "grad_norm": 0.9060636162757874, + "learning_rate": 0.00019955023389781664, + "loss": 2.871, + "step": 701 + }, + { + "epoch": 0.05665402308126866, + "grad_norm": 0.9824137091636658, + "learning_rate": 0.00019954873705447065, + "loss": 2.816, + "step": 702 + }, + { + "epoch": 0.05673472681785167, + "grad_norm": 0.8831053972244263, + "learning_rate": 0.00019954723773010988, + "loss": 2.8207, + "step": 703 + }, + { + "epoch": 0.05681543055443467, + "grad_norm": 0.9603390693664551, + "learning_rate": 0.00019954573592477173, + "loss": 2.831, + "step": 704 + }, + { + "epoch": 0.056896134291017676, + "grad_norm": 0.911556601524353, + "learning_rate": 0.00019954423163849364, + "loss": 2.7679, + "step": 705 + }, + { + "epoch": 0.05697683802760068, + "grad_norm": 0.8558745384216309, + "learning_rate": 0.00019954272487131305, + "loss": 2.7934, + "step": 706 + }, + { + "epoch": 0.057057541764183685, + "grad_norm": 1.0175282955169678, + "learning_rate": 0.00019954121562326758, + "loss": 2.905, + "step": 707 + }, + { + "epoch": 0.05713824550076668, + "grad_norm": 0.9480875730514526, + "learning_rate": 0.00019953970389439483, + "loss": 2.85, + "step": 708 + }, + { + "epoch": 0.05721894923734969, + "grad_norm": 0.9271003603935242, + "learning_rate": 0.0001995381896847324, + "loss": 2.8237, + "step": 709 + }, + { + "epoch": 0.05729965297393269, + "grad_norm": 0.8439653515815735, + "learning_rate": 0.00019953667299431815, + "loss": 2.821, + "step": 710 + }, + { + "epoch": 0.057380356710515695, + "grad_norm": 0.9750552177429199, + "learning_rate": 0.0001995351538231898, + "loss": 2.8613, + "step": 711 + }, + { + "epoch": 0.0574610604470987, + "grad_norm": 0.9409266710281372, + "learning_rate": 0.0001995336321713852, + "loss": 2.7876, + "step": 712 + }, + { + "epoch": 0.057541764183681704, + "grad_norm": 0.811138927936554, + "learning_rate": 0.00019953210803894233, + "loss": 2.7957, + "step": 713 + }, + { + "epoch": 0.05762246792026471, + "grad_norm": 0.9504825472831726, + "learning_rate": 0.00019953058142589916, + "loss": 2.8536, + "step": 714 + }, + { + "epoch": 0.05770317165684771, + "grad_norm": 0.8183554410934448, + "learning_rate": 0.00019952905233229368, + "loss": 2.7697, + "step": 715 + }, + { + "epoch": 0.05778387539343072, + "grad_norm": 1.1146113872528076, + "learning_rate": 0.0001995275207581641, + "loss": 2.8629, + "step": 716 + }, + { + "epoch": 0.05786457913001372, + "grad_norm": 0.8797986507415771, + "learning_rate": 0.00019952598670354852, + "loss": 2.7962, + "step": 717 + }, + { + "epoch": 0.057945282866596726, + "grad_norm": 0.8771101832389832, + "learning_rate": 0.00019952445016848517, + "loss": 2.8323, + "step": 718 + }, + { + "epoch": 0.05802598660317973, + "grad_norm": 0.9003355503082275, + "learning_rate": 0.00019952291115301235, + "loss": 2.777, + "step": 719 + }, + { + "epoch": 0.058106690339762734, + "grad_norm": 0.846125602722168, + "learning_rate": 0.00019952136965716846, + "loss": 2.7875, + "step": 720 + }, + { + "epoch": 0.05818739407634573, + "grad_norm": 0.908833920955658, + "learning_rate": 0.00019951982568099187, + "loss": 2.7975, + "step": 721 + }, + { + "epoch": 0.058268097812928736, + "grad_norm": 0.8616230487823486, + "learning_rate": 0.00019951827922452106, + "loss": 2.7486, + "step": 722 + }, + { + "epoch": 0.05834880154951174, + "grad_norm": 0.8791850805282593, + "learning_rate": 0.00019951673028779462, + "loss": 2.8301, + "step": 723 + }, + { + "epoch": 0.058429505286094745, + "grad_norm": 0.9437321424484253, + "learning_rate": 0.00019951517887085112, + "loss": 2.7956, + "step": 724 + }, + { + "epoch": 0.05851020902267775, + "grad_norm": 0.9263394474983215, + "learning_rate": 0.00019951362497372922, + "loss": 2.867, + "step": 725 + }, + { + "epoch": 0.05859091275926075, + "grad_norm": 0.9442462921142578, + "learning_rate": 0.00019951206859646764, + "loss": 2.8447, + "step": 726 + }, + { + "epoch": 0.05867161649584376, + "grad_norm": 0.9286711812019348, + "learning_rate": 0.0001995105097391052, + "loss": 2.7588, + "step": 727 + }, + { + "epoch": 0.05875232023242676, + "grad_norm": 0.9338774085044861, + "learning_rate": 0.00019950894840168072, + "loss": 2.7394, + "step": 728 + }, + { + "epoch": 0.058833023969009766, + "grad_norm": 0.8880760073661804, + "learning_rate": 0.00019950738458423314, + "loss": 2.7949, + "step": 729 + }, + { + "epoch": 0.05891372770559277, + "grad_norm": 1.0091183185577393, + "learning_rate": 0.00019950581828680143, + "loss": 2.8633, + "step": 730 + }, + { + "epoch": 0.058994431442175775, + "grad_norm": 0.8657729625701904, + "learning_rate": 0.0001995042495094246, + "loss": 2.8649, + "step": 731 + }, + { + "epoch": 0.05907513517875878, + "grad_norm": 1.0084047317504883, + "learning_rate": 0.00019950267825214176, + "loss": 2.8422, + "step": 732 + }, + { + "epoch": 0.059155838915341784, + "grad_norm": 0.9096506237983704, + "learning_rate": 0.00019950110451499208, + "loss": 2.7908, + "step": 733 + }, + { + "epoch": 0.05923654265192478, + "grad_norm": 1.1338937282562256, + "learning_rate": 0.0001994995282980148, + "loss": 2.8093, + "step": 734 + }, + { + "epoch": 0.059317246388507786, + "grad_norm": 0.8813811540603638, + "learning_rate": 0.00019949794960124915, + "loss": 2.8866, + "step": 735 + }, + { + "epoch": 0.05939795012509079, + "grad_norm": 0.8457592129707336, + "learning_rate": 0.00019949636842473453, + "loss": 2.7744, + "step": 736 + }, + { + "epoch": 0.059478653861673794, + "grad_norm": 0.8731856346130371, + "learning_rate": 0.0001994947847685103, + "loss": 2.7822, + "step": 737 + }, + { + "epoch": 0.0595593575982568, + "grad_norm": 0.8915185332298279, + "learning_rate": 0.00019949319863261597, + "loss": 2.773, + "step": 738 + }, + { + "epoch": 0.0596400613348398, + "grad_norm": 0.9478987455368042, + "learning_rate": 0.00019949161001709106, + "loss": 2.8462, + "step": 739 + }, + { + "epoch": 0.05972076507142281, + "grad_norm": 0.8903716206550598, + "learning_rate": 0.00019949001892197515, + "loss": 2.7741, + "step": 740 + }, + { + "epoch": 0.05980146880800581, + "grad_norm": 0.8870117664337158, + "learning_rate": 0.00019948842534730786, + "loss": 2.8255, + "step": 741 + }, + { + "epoch": 0.059882172544588816, + "grad_norm": 1.0766080617904663, + "learning_rate": 0.00019948682929312898, + "loss": 2.8865, + "step": 742 + }, + { + "epoch": 0.05996287628117182, + "grad_norm": 0.846447229385376, + "learning_rate": 0.00019948523075947824, + "loss": 2.8441, + "step": 743 + }, + { + "epoch": 0.060043580017754825, + "grad_norm": 0.9847991466522217, + "learning_rate": 0.00019948362974639552, + "loss": 2.8099, + "step": 744 + }, + { + "epoch": 0.06012428375433783, + "grad_norm": 0.9170514941215515, + "learning_rate": 0.00019948202625392068, + "loss": 2.8797, + "step": 745 + }, + { + "epoch": 0.060204987490920826, + "grad_norm": 0.8564898371696472, + "learning_rate": 0.0001994804202820937, + "loss": 2.7993, + "step": 746 + }, + { + "epoch": 0.06028569122750383, + "grad_norm": 0.8527392148971558, + "learning_rate": 0.00019947881183095457, + "loss": 2.7816, + "step": 747 + }, + { + "epoch": 0.060366394964086835, + "grad_norm": 0.9170876145362854, + "learning_rate": 0.00019947720090054342, + "loss": 2.8031, + "step": 748 + }, + { + "epoch": 0.06044709870066984, + "grad_norm": 0.8891414403915405, + "learning_rate": 0.0001994755874909004, + "loss": 2.8072, + "step": 749 + }, + { + "epoch": 0.060527802437252844, + "grad_norm": 0.8853670358657837, + "learning_rate": 0.0001994739716020657, + "loss": 2.8857, + "step": 750 + }, + { + "epoch": 0.06060850617383585, + "grad_norm": 0.9011211395263672, + "learning_rate": 0.0001994723532340796, + "loss": 2.8519, + "step": 751 + }, + { + "epoch": 0.06068920991041885, + "grad_norm": 0.8843330144882202, + "learning_rate": 0.00019947073238698243, + "loss": 2.7882, + "step": 752 + }, + { + "epoch": 0.06076991364700186, + "grad_norm": 0.8712944984436035, + "learning_rate": 0.00019946910906081463, + "loss": 2.791, + "step": 753 + }, + { + "epoch": 0.06085061738358486, + "grad_norm": 0.8296090364456177, + "learning_rate": 0.00019946748325561656, + "loss": 2.8073, + "step": 754 + }, + { + "epoch": 0.060931321120167865, + "grad_norm": 0.9239117503166199, + "learning_rate": 0.00019946585497142885, + "loss": 2.8209, + "step": 755 + }, + { + "epoch": 0.06101202485675087, + "grad_norm": 0.8885170221328735, + "learning_rate": 0.000199464224208292, + "loss": 2.8391, + "step": 756 + }, + { + "epoch": 0.061092728593333874, + "grad_norm": 0.933720588684082, + "learning_rate": 0.0001994625909662467, + "loss": 2.7635, + "step": 757 + }, + { + "epoch": 0.06117343232991688, + "grad_norm": 0.9751253724098206, + "learning_rate": 0.00019946095524533362, + "loss": 2.7933, + "step": 758 + }, + { + "epoch": 0.061254136066499876, + "grad_norm": 0.9469670057296753, + "learning_rate": 0.00019945931704559353, + "loss": 2.7652, + "step": 759 + }, + { + "epoch": 0.06133483980308288, + "grad_norm": 0.8559684157371521, + "learning_rate": 0.00019945767636706728, + "loss": 2.8258, + "step": 760 + }, + { + "epoch": 0.061415543539665884, + "grad_norm": 1.021478295326233, + "learning_rate": 0.00019945603320979574, + "loss": 2.8047, + "step": 761 + }, + { + "epoch": 0.06149624727624889, + "grad_norm": 0.8421681523323059, + "learning_rate": 0.00019945438757381986, + "loss": 2.8233, + "step": 762 + }, + { + "epoch": 0.06157695101283189, + "grad_norm": 0.900654137134552, + "learning_rate": 0.0001994527394591807, + "loss": 2.7591, + "step": 763 + }, + { + "epoch": 0.0616576547494149, + "grad_norm": 0.878300666809082, + "learning_rate": 0.0001994510888659193, + "loss": 2.715, + "step": 764 + }, + { + "epoch": 0.0617383584859979, + "grad_norm": 0.9170855283737183, + "learning_rate": 0.00019944943579407678, + "loss": 2.8604, + "step": 765 + }, + { + "epoch": 0.061819062222580906, + "grad_norm": 0.8532859683036804, + "learning_rate": 0.00019944778024369434, + "loss": 2.8124, + "step": 766 + }, + { + "epoch": 0.06189976595916391, + "grad_norm": 0.8549049496650696, + "learning_rate": 0.00019944612221481332, + "loss": 2.8066, + "step": 767 + }, + { + "epoch": 0.061980469695746915, + "grad_norm": 0.9602857828140259, + "learning_rate": 0.00019944446170747492, + "loss": 2.8424, + "step": 768 + }, + { + "epoch": 0.06206117343232992, + "grad_norm": 0.910953164100647, + "learning_rate": 0.0001994427987217206, + "loss": 2.8093, + "step": 769 + }, + { + "epoch": 0.06214187716891292, + "grad_norm": 0.8536386489868164, + "learning_rate": 0.0001994411332575918, + "loss": 2.802, + "step": 770 + }, + { + "epoch": 0.06222258090549593, + "grad_norm": 0.9166232347488403, + "learning_rate": 0.00019943946531513, + "loss": 2.783, + "step": 771 + }, + { + "epoch": 0.062303284642078925, + "grad_norm": 0.9954056739807129, + "learning_rate": 0.00019943779489437678, + "loss": 2.8198, + "step": 772 + }, + { + "epoch": 0.06238398837866193, + "grad_norm": 0.8527171015739441, + "learning_rate": 0.0001994361219953738, + "loss": 2.8159, + "step": 773 + }, + { + "epoch": 0.062464692115244934, + "grad_norm": 0.8951592445373535, + "learning_rate": 0.00019943444661816274, + "loss": 2.7969, + "step": 774 + }, + { + "epoch": 0.06254539585182795, + "grad_norm": 0.9348207116127014, + "learning_rate": 0.00019943276876278532, + "loss": 2.8403, + "step": 775 + }, + { + "epoch": 0.06262609958841095, + "grad_norm": 0.866318941116333, + "learning_rate": 0.00019943108842928342, + "loss": 2.7886, + "step": 776 + }, + { + "epoch": 0.06270680332499395, + "grad_norm": 0.8571285605430603, + "learning_rate": 0.00019942940561769884, + "loss": 2.771, + "step": 777 + }, + { + "epoch": 0.06278750706157694, + "grad_norm": 0.8384295105934143, + "learning_rate": 0.00019942772032807357, + "loss": 2.7885, + "step": 778 + }, + { + "epoch": 0.06286821079815995, + "grad_norm": 0.9934808611869812, + "learning_rate": 0.00019942603256044961, + "loss": 2.8399, + "step": 779 + }, + { + "epoch": 0.06294891453474295, + "grad_norm": 0.8275915384292603, + "learning_rate": 0.00019942434231486902, + "loss": 2.8983, + "step": 780 + }, + { + "epoch": 0.06302961827132596, + "grad_norm": 0.9073596000671387, + "learning_rate": 0.0001994226495913739, + "loss": 2.7886, + "step": 781 + }, + { + "epoch": 0.06311032200790896, + "grad_norm": 0.9091461300849915, + "learning_rate": 0.00019942095439000646, + "loss": 2.814, + "step": 782 + }, + { + "epoch": 0.06319102574449197, + "grad_norm": 0.9356934428215027, + "learning_rate": 0.000199419256710809, + "loss": 2.8238, + "step": 783 + }, + { + "epoch": 0.06327172948107497, + "grad_norm": 0.883514940738678, + "learning_rate": 0.00019941755655382374, + "loss": 2.7912, + "step": 784 + }, + { + "epoch": 0.06335243321765797, + "grad_norm": 0.8770506381988525, + "learning_rate": 0.00019941585391909308, + "loss": 2.7774, + "step": 785 + }, + { + "epoch": 0.06343313695424098, + "grad_norm": 0.8891726136207581, + "learning_rate": 0.00019941414880665948, + "loss": 2.7975, + "step": 786 + }, + { + "epoch": 0.06351384069082398, + "grad_norm": 0.9280585050582886, + "learning_rate": 0.00019941244121656545, + "loss": 2.9468, + "step": 787 + }, + { + "epoch": 0.06359454442740699, + "grad_norm": 0.8545510768890381, + "learning_rate": 0.00019941073114885347, + "loss": 2.8165, + "step": 788 + }, + { + "epoch": 0.06367524816398999, + "grad_norm": 0.8631312847137451, + "learning_rate": 0.0001994090186035662, + "loss": 2.7955, + "step": 789 + }, + { + "epoch": 0.063755951900573, + "grad_norm": 0.8883851170539856, + "learning_rate": 0.00019940730358074634, + "loss": 2.7828, + "step": 790 + }, + { + "epoch": 0.063836655637156, + "grad_norm": 0.8421074748039246, + "learning_rate": 0.00019940558608043664, + "loss": 2.7999, + "step": 791 + }, + { + "epoch": 0.063917359373739, + "grad_norm": 0.918134868144989, + "learning_rate": 0.0001994038661026799, + "loss": 2.7888, + "step": 792 + }, + { + "epoch": 0.06399806311032201, + "grad_norm": 0.8513637781143188, + "learning_rate": 0.00019940214364751896, + "loss": 2.7719, + "step": 793 + }, + { + "epoch": 0.06407876684690501, + "grad_norm": 0.9181898236274719, + "learning_rate": 0.00019940041871499675, + "loss": 2.8345, + "step": 794 + }, + { + "epoch": 0.06415947058348802, + "grad_norm": 0.8129134774208069, + "learning_rate": 0.00019939869130515626, + "loss": 2.7316, + "step": 795 + }, + { + "epoch": 0.06424017432007102, + "grad_norm": 0.8782191872596741, + "learning_rate": 0.00019939696141804057, + "loss": 2.7852, + "step": 796 + }, + { + "epoch": 0.06432087805665403, + "grad_norm": 0.9064851403236389, + "learning_rate": 0.00019939522905369276, + "loss": 2.8105, + "step": 797 + }, + { + "epoch": 0.06440158179323703, + "grad_norm": 0.9888454675674438, + "learning_rate": 0.00019939349421215603, + "loss": 2.8496, + "step": 798 + }, + { + "epoch": 0.06448228552982004, + "grad_norm": 0.8717427253723145, + "learning_rate": 0.0001993917568934736, + "loss": 2.8227, + "step": 799 + }, + { + "epoch": 0.06456298926640304, + "grad_norm": 0.922980010509491, + "learning_rate": 0.0001993900170976888, + "loss": 2.8571, + "step": 800 + }, + { + "epoch": 0.06464369300298604, + "grad_norm": 0.8311850428581238, + "learning_rate": 0.00019938827482484492, + "loss": 2.7905, + "step": 801 + }, + { + "epoch": 0.06472439673956905, + "grad_norm": 0.9274900555610657, + "learning_rate": 0.0001993865300749855, + "loss": 2.8526, + "step": 802 + }, + { + "epoch": 0.06480510047615205, + "grad_norm": 0.9072165489196777, + "learning_rate": 0.00019938478284815388, + "loss": 2.8384, + "step": 803 + }, + { + "epoch": 0.06488580421273504, + "grad_norm": 0.854099452495575, + "learning_rate": 0.0001993830331443937, + "loss": 2.8459, + "step": 804 + }, + { + "epoch": 0.06496650794931805, + "grad_norm": 0.824126660823822, + "learning_rate": 0.00019938128096374854, + "loss": 2.7845, + "step": 805 + }, + { + "epoch": 0.06504721168590105, + "grad_norm": 0.8570442795753479, + "learning_rate": 0.0001993795263062621, + "loss": 2.8446, + "step": 806 + }, + { + "epoch": 0.06512791542248406, + "grad_norm": 0.8998628854751587, + "learning_rate": 0.00019937776917197805, + "loss": 2.8604, + "step": 807 + }, + { + "epoch": 0.06520861915906706, + "grad_norm": 0.9189189076423645, + "learning_rate": 0.00019937600956094023, + "loss": 2.7866, + "step": 808 + }, + { + "epoch": 0.06528932289565006, + "grad_norm": 0.9471604824066162, + "learning_rate": 0.00019937424747319248, + "loss": 2.7619, + "step": 809 + }, + { + "epoch": 0.06537002663223307, + "grad_norm": 0.8507755994796753, + "learning_rate": 0.00019937248290877874, + "loss": 2.8259, + "step": 810 + }, + { + "epoch": 0.06545073036881607, + "grad_norm": 0.8800963759422302, + "learning_rate": 0.00019937071586774292, + "loss": 2.827, + "step": 811 + }, + { + "epoch": 0.06553143410539908, + "grad_norm": 0.8851124048233032, + "learning_rate": 0.00019936894635012915, + "loss": 2.793, + "step": 812 + }, + { + "epoch": 0.06561213784198208, + "grad_norm": 0.88127601146698, + "learning_rate": 0.00019936717435598144, + "loss": 2.8885, + "step": 813 + }, + { + "epoch": 0.06569284157856509, + "grad_norm": 0.9115073084831238, + "learning_rate": 0.000199365399885344, + "loss": 2.8278, + "step": 814 + }, + { + "epoch": 0.06577354531514809, + "grad_norm": 0.8722662925720215, + "learning_rate": 0.00019936362293826107, + "loss": 2.8125, + "step": 815 + }, + { + "epoch": 0.0658542490517311, + "grad_norm": 0.8332365155220032, + "learning_rate": 0.0001993618435147769, + "loss": 2.7682, + "step": 816 + }, + { + "epoch": 0.0659349527883141, + "grad_norm": 0.9524003863334656, + "learning_rate": 0.0001993600616149359, + "loss": 2.8166, + "step": 817 + }, + { + "epoch": 0.0660156565248971, + "grad_norm": 0.8402767181396484, + "learning_rate": 0.0001993582772387824, + "loss": 2.8192, + "step": 818 + }, + { + "epoch": 0.06609636026148011, + "grad_norm": 0.8589913249015808, + "learning_rate": 0.0001993564903863609, + "loss": 2.7785, + "step": 819 + }, + { + "epoch": 0.06617706399806311, + "grad_norm": 1.034550428390503, + "learning_rate": 0.00019935470105771598, + "loss": 2.8407, + "step": 820 + }, + { + "epoch": 0.06625776773464612, + "grad_norm": 0.856490969657898, + "learning_rate": 0.0001993529092528921, + "loss": 2.794, + "step": 821 + }, + { + "epoch": 0.06633847147122912, + "grad_norm": 0.897498369216919, + "learning_rate": 0.0001993511149719341, + "loss": 2.7959, + "step": 822 + }, + { + "epoch": 0.06641917520781213, + "grad_norm": 0.8495277166366577, + "learning_rate": 0.00019934931821488658, + "loss": 2.783, + "step": 823 + }, + { + "epoch": 0.06649987894439513, + "grad_norm": 0.8362239599227905, + "learning_rate": 0.00019934751898179436, + "loss": 2.8628, + "step": 824 + }, + { + "epoch": 0.06658058268097813, + "grad_norm": 0.8702061176300049, + "learning_rate": 0.00019934571727270225, + "loss": 2.7878, + "step": 825 + }, + { + "epoch": 0.06666128641756114, + "grad_norm": 0.8341560363769531, + "learning_rate": 0.0001993439130876552, + "loss": 2.7345, + "step": 826 + }, + { + "epoch": 0.06674199015414414, + "grad_norm": 0.880181074142456, + "learning_rate": 0.00019934210642669813, + "loss": 2.7789, + "step": 827 + }, + { + "epoch": 0.06682269389072715, + "grad_norm": 0.9088126420974731, + "learning_rate": 0.00019934029728987607, + "loss": 2.7893, + "step": 828 + }, + { + "epoch": 0.06690339762731014, + "grad_norm": 0.8087106347084045, + "learning_rate": 0.00019933848567723416, + "loss": 2.7967, + "step": 829 + }, + { + "epoch": 0.06698410136389314, + "grad_norm": 0.8970876336097717, + "learning_rate": 0.00019933667158881745, + "loss": 2.8837, + "step": 830 + }, + { + "epoch": 0.06706480510047615, + "grad_norm": 0.9344804883003235, + "learning_rate": 0.00019933485502467128, + "loss": 2.7754, + "step": 831 + }, + { + "epoch": 0.06714550883705915, + "grad_norm": 0.8119301795959473, + "learning_rate": 0.00019933303598484084, + "loss": 2.7919, + "step": 832 + }, + { + "epoch": 0.06722621257364216, + "grad_norm": 0.9370681047439575, + "learning_rate": 0.00019933121446937148, + "loss": 2.8011, + "step": 833 + }, + { + "epoch": 0.06730691631022516, + "grad_norm": 0.8358973264694214, + "learning_rate": 0.00019932939047830858, + "loss": 2.8339, + "step": 834 + }, + { + "epoch": 0.06738762004680816, + "grad_norm": 0.8565972447395325, + "learning_rate": 0.00019932756401169765, + "loss": 2.8269, + "step": 835 + }, + { + "epoch": 0.06746832378339117, + "grad_norm": 0.8405514359474182, + "learning_rate": 0.00019932573506958417, + "loss": 2.7621, + "step": 836 + }, + { + "epoch": 0.06754902751997417, + "grad_norm": 0.8217617869377136, + "learning_rate": 0.00019932390365201373, + "loss": 2.8363, + "step": 837 + }, + { + "epoch": 0.06762973125655718, + "grad_norm": 0.9121438264846802, + "learning_rate": 0.00019932206975903198, + "loss": 2.8033, + "step": 838 + }, + { + "epoch": 0.06771043499314018, + "grad_norm": 0.9113054871559143, + "learning_rate": 0.00019932023339068464, + "loss": 2.8696, + "step": 839 + }, + { + "epoch": 0.06779113872972319, + "grad_norm": 0.8638293743133545, + "learning_rate": 0.00019931839454701743, + "loss": 2.8008, + "step": 840 + }, + { + "epoch": 0.06787184246630619, + "grad_norm": 0.862932562828064, + "learning_rate": 0.0001993165532280762, + "loss": 2.8092, + "step": 841 + }, + { + "epoch": 0.0679525462028892, + "grad_norm": 0.9089607000350952, + "learning_rate": 0.00019931470943390685, + "loss": 2.8921, + "step": 842 + }, + { + "epoch": 0.0680332499394722, + "grad_norm": 0.9233555793762207, + "learning_rate": 0.00019931286316455537, + "loss": 2.9025, + "step": 843 + }, + { + "epoch": 0.0681139536760552, + "grad_norm": 0.9403017163276672, + "learning_rate": 0.0001993110144200677, + "loss": 2.7875, + "step": 844 + }, + { + "epoch": 0.06819465741263821, + "grad_norm": 0.9194290637969971, + "learning_rate": 0.00019930916320048996, + "loss": 2.8254, + "step": 845 + }, + { + "epoch": 0.06827536114922121, + "grad_norm": 0.8238688111305237, + "learning_rate": 0.00019930730950586828, + "loss": 2.82, + "step": 846 + }, + { + "epoch": 0.06835606488580422, + "grad_norm": 0.8560660481452942, + "learning_rate": 0.00019930545333624885, + "loss": 2.8516, + "step": 847 + }, + { + "epoch": 0.06843676862238722, + "grad_norm": 0.9127222895622253, + "learning_rate": 0.0001993035946916779, + "loss": 2.7674, + "step": 848 + }, + { + "epoch": 0.06851747235897022, + "grad_norm": 0.8679420948028564, + "learning_rate": 0.00019930173357220182, + "loss": 2.777, + "step": 849 + }, + { + "epoch": 0.06859817609555323, + "grad_norm": 0.9686945676803589, + "learning_rate": 0.00019929986997786699, + "loss": 2.7841, + "step": 850 + }, + { + "epoch": 0.06867887983213623, + "grad_norm": 0.8366333246231079, + "learning_rate": 0.00019929800390871977, + "loss": 2.7993, + "step": 851 + }, + { + "epoch": 0.06875958356871924, + "grad_norm": 0.8374585509300232, + "learning_rate": 0.00019929613536480675, + "loss": 2.7545, + "step": 852 + }, + { + "epoch": 0.06884028730530224, + "grad_norm": 0.9843763709068298, + "learning_rate": 0.00019929426434617451, + "loss": 2.8118, + "step": 853 + }, + { + "epoch": 0.06892099104188525, + "grad_norm": 0.8093454241752625, + "learning_rate": 0.0001992923908528696, + "loss": 2.7301, + "step": 854 + }, + { + "epoch": 0.06900169477846824, + "grad_norm": 0.8374418020248413, + "learning_rate": 0.00019929051488493877, + "loss": 2.7745, + "step": 855 + }, + { + "epoch": 0.06908239851505124, + "grad_norm": 0.869965136051178, + "learning_rate": 0.00019928863644242875, + "loss": 2.7637, + "step": 856 + }, + { + "epoch": 0.06916310225163425, + "grad_norm": 0.9280590415000916, + "learning_rate": 0.00019928675552538638, + "loss": 2.7792, + "step": 857 + }, + { + "epoch": 0.06924380598821725, + "grad_norm": 0.8624193668365479, + "learning_rate": 0.00019928487213385852, + "loss": 2.7755, + "step": 858 + }, + { + "epoch": 0.06932450972480025, + "grad_norm": 0.8379972577095032, + "learning_rate": 0.00019928298626789212, + "loss": 2.8563, + "step": 859 + }, + { + "epoch": 0.06940521346138326, + "grad_norm": 0.9272914528846741, + "learning_rate": 0.00019928109792753418, + "loss": 2.836, + "step": 860 + }, + { + "epoch": 0.06948591719796626, + "grad_norm": 0.9239040613174438, + "learning_rate": 0.00019927920711283175, + "loss": 2.7999, + "step": 861 + }, + { + "epoch": 0.06956662093454927, + "grad_norm": 0.9125113487243652, + "learning_rate": 0.00019927731382383195, + "loss": 2.8494, + "step": 862 + }, + { + "epoch": 0.06964732467113227, + "grad_norm": 0.8782855868339539, + "learning_rate": 0.00019927541806058198, + "loss": 2.767, + "step": 863 + }, + { + "epoch": 0.06972802840771528, + "grad_norm": 0.8815447092056274, + "learning_rate": 0.00019927351982312907, + "loss": 2.7877, + "step": 864 + }, + { + "epoch": 0.06980873214429828, + "grad_norm": 0.8555476069450378, + "learning_rate": 0.00019927161911152056, + "loss": 2.8057, + "step": 865 + }, + { + "epoch": 0.06988943588088128, + "grad_norm": 0.8562924265861511, + "learning_rate": 0.00019926971592580382, + "loss": 2.8049, + "step": 866 + }, + { + "epoch": 0.06997013961746429, + "grad_norm": 0.846503734588623, + "learning_rate": 0.00019926781026602625, + "loss": 2.8545, + "step": 867 + }, + { + "epoch": 0.07005084335404729, + "grad_norm": 0.8439623713493347, + "learning_rate": 0.00019926590213223535, + "loss": 2.7451, + "step": 868 + }, + { + "epoch": 0.0701315470906303, + "grad_norm": 0.8471730351448059, + "learning_rate": 0.00019926399152447868, + "loss": 2.7879, + "step": 869 + }, + { + "epoch": 0.0702122508272133, + "grad_norm": 0.8721400499343872, + "learning_rate": 0.00019926207844280387, + "loss": 2.8594, + "step": 870 + }, + { + "epoch": 0.0702929545637963, + "grad_norm": 0.8110925555229187, + "learning_rate": 0.0001992601628872586, + "loss": 2.7789, + "step": 871 + }, + { + "epoch": 0.07037365830037931, + "grad_norm": 0.9593119025230408, + "learning_rate": 0.0001992582448578906, + "loss": 2.8792, + "step": 872 + }, + { + "epoch": 0.07045436203696231, + "grad_norm": 0.8553354144096375, + "learning_rate": 0.00019925632435474765, + "loss": 2.8056, + "step": 873 + }, + { + "epoch": 0.07053506577354532, + "grad_norm": 0.8062612414360046, + "learning_rate": 0.00019925440137787768, + "loss": 2.7762, + "step": 874 + }, + { + "epoch": 0.07061576951012832, + "grad_norm": 0.8264921307563782, + "learning_rate": 0.00019925247592732858, + "loss": 2.8435, + "step": 875 + }, + { + "epoch": 0.07069647324671133, + "grad_norm": 0.7770401835441589, + "learning_rate": 0.00019925054800314828, + "loss": 2.7846, + "step": 876 + }, + { + "epoch": 0.07077717698329433, + "grad_norm": 0.8426765203475952, + "learning_rate": 0.0001992486176053849, + "loss": 2.782, + "step": 877 + }, + { + "epoch": 0.07085788071987734, + "grad_norm": 0.855330228805542, + "learning_rate": 0.00019924668473408655, + "loss": 2.8051, + "step": 878 + }, + { + "epoch": 0.07093858445646034, + "grad_norm": 0.8762049674987793, + "learning_rate": 0.00019924474938930135, + "loss": 2.7634, + "step": 879 + }, + { + "epoch": 0.07101928819304333, + "grad_norm": 0.9226812124252319, + "learning_rate": 0.0001992428115710776, + "loss": 2.8342, + "step": 880 + }, + { + "epoch": 0.07109999192962634, + "grad_norm": 0.9031660556793213, + "learning_rate": 0.00019924087127946353, + "loss": 2.7953, + "step": 881 + }, + { + "epoch": 0.07118069566620934, + "grad_norm": 1.0151792764663696, + "learning_rate": 0.00019923892851450757, + "loss": 2.8225, + "step": 882 + }, + { + "epoch": 0.07126139940279234, + "grad_norm": 0.9805678725242615, + "learning_rate": 0.00019923698327625806, + "loss": 2.7727, + "step": 883 + }, + { + "epoch": 0.07134210313937535, + "grad_norm": 0.8831729888916016, + "learning_rate": 0.00019923503556476356, + "loss": 2.7682, + "step": 884 + }, + { + "epoch": 0.07142280687595835, + "grad_norm": 1.0311404466629028, + "learning_rate": 0.00019923308538007253, + "loss": 2.8422, + "step": 885 + }, + { + "epoch": 0.07150351061254136, + "grad_norm": 0.8143388628959656, + "learning_rate": 0.0001992311327222336, + "loss": 2.7876, + "step": 886 + }, + { + "epoch": 0.07158421434912436, + "grad_norm": 0.877017617225647, + "learning_rate": 0.00019922917759129552, + "loss": 2.7486, + "step": 887 + }, + { + "epoch": 0.07166491808570737, + "grad_norm": 0.930646538734436, + "learning_rate": 0.0001992272199873069, + "loss": 2.8022, + "step": 888 + }, + { + "epoch": 0.07174562182229037, + "grad_norm": 0.934753954410553, + "learning_rate": 0.00019922525991031655, + "loss": 2.8485, + "step": 889 + }, + { + "epoch": 0.07182632555887337, + "grad_norm": 0.9564220905303955, + "learning_rate": 0.00019922329736037339, + "loss": 2.761, + "step": 890 + }, + { + "epoch": 0.07190702929545638, + "grad_norm": 0.9457311630249023, + "learning_rate": 0.00019922133233752626, + "loss": 2.8279, + "step": 891 + }, + { + "epoch": 0.07198773303203938, + "grad_norm": 0.9385658502578735, + "learning_rate": 0.0001992193648418242, + "loss": 2.8222, + "step": 892 + }, + { + "epoch": 0.07206843676862239, + "grad_norm": 1.0157524347305298, + "learning_rate": 0.00019921739487331616, + "loss": 2.9166, + "step": 893 + }, + { + "epoch": 0.07214914050520539, + "grad_norm": 0.9143860340118408, + "learning_rate": 0.00019921542243205132, + "loss": 2.8139, + "step": 894 + }, + { + "epoch": 0.0722298442417884, + "grad_norm": 0.8769320249557495, + "learning_rate": 0.00019921344751807878, + "loss": 2.8023, + "step": 895 + }, + { + "epoch": 0.0723105479783714, + "grad_norm": 0.9647517204284668, + "learning_rate": 0.0001992114701314478, + "loss": 2.8872, + "step": 896 + }, + { + "epoch": 0.0723912517149544, + "grad_norm": 1.025978446006775, + "learning_rate": 0.00019920949027220762, + "loss": 2.837, + "step": 897 + }, + { + "epoch": 0.07247195545153741, + "grad_norm": 0.8848521113395691, + "learning_rate": 0.0001992075079404076, + "loss": 2.7498, + "step": 898 + }, + { + "epoch": 0.07255265918812041, + "grad_norm": 0.9395595788955688, + "learning_rate": 0.0001992055231360972, + "loss": 2.8752, + "step": 899 + }, + { + "epoch": 0.07263336292470342, + "grad_norm": 0.8711572885513306, + "learning_rate": 0.00019920353585932578, + "loss": 2.8608, + "step": 900 + }, + { + "epoch": 0.07271406666128642, + "grad_norm": 0.8606846332550049, + "learning_rate": 0.00019920154611014295, + "loss": 2.829, + "step": 901 + }, + { + "epoch": 0.07279477039786943, + "grad_norm": 0.859354555606842, + "learning_rate": 0.0001991995538885983, + "loss": 2.8102, + "step": 902 + }, + { + "epoch": 0.07287547413445243, + "grad_norm": 0.9063243865966797, + "learning_rate": 0.00019919755919474143, + "loss": 2.8509, + "step": 903 + }, + { + "epoch": 0.07295617787103544, + "grad_norm": 0.8321940898895264, + "learning_rate": 0.00019919556202862207, + "loss": 2.796, + "step": 904 + }, + { + "epoch": 0.07303688160761844, + "grad_norm": 0.8875191807746887, + "learning_rate": 0.00019919356239029003, + "loss": 2.8672, + "step": 905 + }, + { + "epoch": 0.07311758534420143, + "grad_norm": 0.9028071165084839, + "learning_rate": 0.0001991915602797951, + "loss": 2.8926, + "step": 906 + }, + { + "epoch": 0.07319828908078443, + "grad_norm": 0.9449291825294495, + "learning_rate": 0.0001991895556971872, + "loss": 2.8159, + "step": 907 + }, + { + "epoch": 0.07327899281736744, + "grad_norm": 0.871576189994812, + "learning_rate": 0.0001991875486425163, + "loss": 2.8162, + "step": 908 + }, + { + "epoch": 0.07335969655395044, + "grad_norm": 0.818423330783844, + "learning_rate": 0.0001991855391158324, + "loss": 2.8882, + "step": 909 + }, + { + "epoch": 0.07344040029053345, + "grad_norm": 0.8802343606948853, + "learning_rate": 0.0001991835271171856, + "loss": 2.8245, + "step": 910 + }, + { + "epoch": 0.07352110402711645, + "grad_norm": 0.916023313999176, + "learning_rate": 0.000199181512646626, + "loss": 2.8966, + "step": 911 + }, + { + "epoch": 0.07360180776369946, + "grad_norm": 1.0663317441940308, + "learning_rate": 0.0001991794957042039, + "loss": 2.7736, + "step": 912 + }, + { + "epoch": 0.07368251150028246, + "grad_norm": 0.9212445616722107, + "learning_rate": 0.00019917747628996947, + "loss": 2.7924, + "step": 913 + }, + { + "epoch": 0.07376321523686546, + "grad_norm": 0.9785256385803223, + "learning_rate": 0.00019917545440397308, + "loss": 2.8021, + "step": 914 + }, + { + "epoch": 0.07384391897344847, + "grad_norm": 0.8510444760322571, + "learning_rate": 0.00019917343004626514, + "loss": 2.7991, + "step": 915 + }, + { + "epoch": 0.07392462271003147, + "grad_norm": 0.8967106342315674, + "learning_rate": 0.0001991714032168961, + "loss": 2.8838, + "step": 916 + }, + { + "epoch": 0.07400532644661448, + "grad_norm": 0.8940563797950745, + "learning_rate": 0.0001991693739159164, + "loss": 2.8124, + "step": 917 + }, + { + "epoch": 0.07408603018319748, + "grad_norm": 0.9270479679107666, + "learning_rate": 0.0001991673421433767, + "loss": 2.7627, + "step": 918 + }, + { + "epoch": 0.07416673391978049, + "grad_norm": 0.905805230140686, + "learning_rate": 0.0001991653078993276, + "loss": 2.781, + "step": 919 + }, + { + "epoch": 0.07424743765636349, + "grad_norm": 0.9295129179954529, + "learning_rate": 0.00019916327118381982, + "loss": 2.8332, + "step": 920 + }, + { + "epoch": 0.0743281413929465, + "grad_norm": 0.863331139087677, + "learning_rate": 0.00019916123199690408, + "loss": 2.8489, + "step": 921 + }, + { + "epoch": 0.0744088451295295, + "grad_norm": 0.9966896772384644, + "learning_rate": 0.00019915919033863127, + "loss": 2.9107, + "step": 922 + }, + { + "epoch": 0.0744895488661125, + "grad_norm": 0.8921390771865845, + "learning_rate": 0.00019915714620905218, + "loss": 2.7668, + "step": 923 + }, + { + "epoch": 0.07457025260269551, + "grad_norm": 0.9378434419631958, + "learning_rate": 0.00019915509960821782, + "loss": 2.8305, + "step": 924 + }, + { + "epoch": 0.07465095633927851, + "grad_norm": 1.0351817607879639, + "learning_rate": 0.0001991530505361792, + "loss": 2.9412, + "step": 925 + }, + { + "epoch": 0.07473166007586152, + "grad_norm": 0.7995476722717285, + "learning_rate": 0.0001991509989929874, + "loss": 2.7872, + "step": 926 + }, + { + "epoch": 0.07481236381244452, + "grad_norm": 0.858830988407135, + "learning_rate": 0.0001991489449786935, + "loss": 2.7775, + "step": 927 + }, + { + "epoch": 0.07489306754902753, + "grad_norm": 1.1254682540893555, + "learning_rate": 0.00019914688849334867, + "loss": 2.7913, + "step": 928 + }, + { + "epoch": 0.07497377128561053, + "grad_norm": 0.9475330710411072, + "learning_rate": 0.00019914482953700428, + "loss": 2.7945, + "step": 929 + }, + { + "epoch": 0.07505447502219353, + "grad_norm": 0.8427290916442871, + "learning_rate": 0.00019914276810971152, + "loss": 2.8297, + "step": 930 + }, + { + "epoch": 0.07513517875877652, + "grad_norm": 0.9308956265449524, + "learning_rate": 0.00019914070421152183, + "loss": 2.8534, + "step": 931 + }, + { + "epoch": 0.07521588249535953, + "grad_norm": 0.9264787435531616, + "learning_rate": 0.00019913863784248664, + "loss": 2.7959, + "step": 932 + }, + { + "epoch": 0.07529658623194253, + "grad_norm": 0.8432087302207947, + "learning_rate": 0.00019913656900265742, + "loss": 2.8479, + "step": 933 + }, + { + "epoch": 0.07537728996852554, + "grad_norm": 0.8237274885177612, + "learning_rate": 0.0001991344976920858, + "loss": 2.782, + "step": 934 + }, + { + "epoch": 0.07545799370510854, + "grad_norm": 0.8143243789672852, + "learning_rate": 0.0001991324239108233, + "loss": 2.7567, + "step": 935 + }, + { + "epoch": 0.07553869744169155, + "grad_norm": 0.8824434280395508, + "learning_rate": 0.0001991303476589217, + "loss": 2.7971, + "step": 936 + }, + { + "epoch": 0.07561940117827455, + "grad_norm": 0.8202407360076904, + "learning_rate": 0.00019912826893643272, + "loss": 2.7825, + "step": 937 + }, + { + "epoch": 0.07570010491485755, + "grad_norm": 0.8001337647438049, + "learning_rate": 0.00019912618774340813, + "loss": 2.8294, + "step": 938 + }, + { + "epoch": 0.07578080865144056, + "grad_norm": 0.8875572085380554, + "learning_rate": 0.00019912410407989982, + "loss": 2.8013, + "step": 939 + }, + { + "epoch": 0.07586151238802356, + "grad_norm": 0.8676280379295349, + "learning_rate": 0.0001991220179459597, + "loss": 2.767, + "step": 940 + }, + { + "epoch": 0.07594221612460657, + "grad_norm": 0.9767136573791504, + "learning_rate": 0.00019911992934163982, + "loss": 2.8315, + "step": 941 + }, + { + "epoch": 0.07602291986118957, + "grad_norm": 0.8690733909606934, + "learning_rate": 0.0001991178382669922, + "loss": 2.8042, + "step": 942 + }, + { + "epoch": 0.07610362359777258, + "grad_norm": 0.862978458404541, + "learning_rate": 0.00019911574472206893, + "loss": 2.8243, + "step": 943 + }, + { + "epoch": 0.07618432733435558, + "grad_norm": 0.9116127490997314, + "learning_rate": 0.00019911364870692225, + "loss": 2.7377, + "step": 944 + }, + { + "epoch": 0.07626503107093859, + "grad_norm": 0.8765420317649841, + "learning_rate": 0.00019911155022160433, + "loss": 2.7673, + "step": 945 + }, + { + "epoch": 0.07634573480752159, + "grad_norm": 0.8229342699050903, + "learning_rate": 0.0001991094492661675, + "loss": 2.7749, + "step": 946 + }, + { + "epoch": 0.0764264385441046, + "grad_norm": 0.8340098261833191, + "learning_rate": 0.00019910734584066412, + "loss": 2.7871, + "step": 947 + }, + { + "epoch": 0.0765071422806876, + "grad_norm": 0.8116940259933472, + "learning_rate": 0.0001991052399451466, + "loss": 2.8202, + "step": 948 + }, + { + "epoch": 0.0765878460172706, + "grad_norm": 0.8730412721633911, + "learning_rate": 0.00019910313157966747, + "loss": 2.8661, + "step": 949 + }, + { + "epoch": 0.07666854975385361, + "grad_norm": 0.8272213339805603, + "learning_rate": 0.0001991010207442792, + "loss": 2.8352, + "step": 950 + }, + { + "epoch": 0.07674925349043661, + "grad_norm": 0.8586944937705994, + "learning_rate": 0.0001990989074390345, + "loss": 2.8018, + "step": 951 + }, + { + "epoch": 0.07682995722701962, + "grad_norm": 0.81830894947052, + "learning_rate": 0.00019909679166398592, + "loss": 2.8154, + "step": 952 + }, + { + "epoch": 0.07691066096360262, + "grad_norm": 0.8158484101295471, + "learning_rate": 0.00019909467341918627, + "loss": 2.7618, + "step": 953 + }, + { + "epoch": 0.07699136470018562, + "grad_norm": 0.816834032535553, + "learning_rate": 0.00019909255270468833, + "loss": 2.8125, + "step": 954 + }, + { + "epoch": 0.07707206843676863, + "grad_norm": 0.944790780544281, + "learning_rate": 0.00019909042952054496, + "loss": 2.8054, + "step": 955 + }, + { + "epoch": 0.07715277217335163, + "grad_norm": 0.9281302690505981, + "learning_rate": 0.00019908830386680904, + "loss": 2.8724, + "step": 956 + }, + { + "epoch": 0.07723347590993462, + "grad_norm": 0.8850300908088684, + "learning_rate": 0.00019908617574353356, + "loss": 2.7906, + "step": 957 + }, + { + "epoch": 0.07731417964651763, + "grad_norm": 0.8997938632965088, + "learning_rate": 0.00019908404515077158, + "loss": 2.7814, + "step": 958 + }, + { + "epoch": 0.07739488338310063, + "grad_norm": 0.8814194798469543, + "learning_rate": 0.0001990819120885762, + "loss": 2.7423, + "step": 959 + }, + { + "epoch": 0.07747558711968364, + "grad_norm": 0.8759928345680237, + "learning_rate": 0.00019907977655700054, + "loss": 2.7803, + "step": 960 + }, + { + "epoch": 0.07755629085626664, + "grad_norm": 0.8439476490020752, + "learning_rate": 0.00019907763855609787, + "loss": 2.8277, + "step": 961 + }, + { + "epoch": 0.07763699459284965, + "grad_norm": 0.8745121955871582, + "learning_rate": 0.00019907549808592144, + "loss": 2.8152, + "step": 962 + }, + { + "epoch": 0.07771769832943265, + "grad_norm": 1.0439598560333252, + "learning_rate": 0.00019907335514652465, + "loss": 2.7882, + "step": 963 + }, + { + "epoch": 0.07779840206601565, + "grad_norm": 0.9516503810882568, + "learning_rate": 0.00019907120973796082, + "loss": 2.8555, + "step": 964 + }, + { + "epoch": 0.07787910580259866, + "grad_norm": 0.928717315196991, + "learning_rate": 0.0001990690618602835, + "loss": 2.8214, + "step": 965 + }, + { + "epoch": 0.07795980953918166, + "grad_norm": 0.7923071384429932, + "learning_rate": 0.00019906691151354617, + "loss": 2.8153, + "step": 966 + }, + { + "epoch": 0.07804051327576467, + "grad_norm": 0.8783324956893921, + "learning_rate": 0.00019906475869780246, + "loss": 2.7691, + "step": 967 + }, + { + "epoch": 0.07812121701234767, + "grad_norm": 0.8974801301956177, + "learning_rate": 0.000199062603413106, + "loss": 2.8156, + "step": 968 + }, + { + "epoch": 0.07820192074893068, + "grad_norm": 0.9304391741752625, + "learning_rate": 0.00019906044565951052, + "loss": 2.8489, + "step": 969 + }, + { + "epoch": 0.07828262448551368, + "grad_norm": 0.8351098895072937, + "learning_rate": 0.00019905828543706976, + "loss": 2.7744, + "step": 970 + }, + { + "epoch": 0.07836332822209668, + "grad_norm": 0.8634265065193176, + "learning_rate": 0.0001990561227458376, + "loss": 2.8193, + "step": 971 + }, + { + "epoch": 0.07844403195867969, + "grad_norm": 0.8969653248786926, + "learning_rate": 0.00019905395758586792, + "loss": 2.7548, + "step": 972 + }, + { + "epoch": 0.07852473569526269, + "grad_norm": 0.8964852094650269, + "learning_rate": 0.0001990517899572147, + "loss": 2.8037, + "step": 973 + }, + { + "epoch": 0.0786054394318457, + "grad_norm": 0.8567596077919006, + "learning_rate": 0.00019904961985993196, + "loss": 2.7942, + "step": 974 + }, + { + "epoch": 0.0786861431684287, + "grad_norm": 0.8275273442268372, + "learning_rate": 0.00019904744729407374, + "loss": 2.8359, + "step": 975 + }, + { + "epoch": 0.0787668469050117, + "grad_norm": 0.9458810091018677, + "learning_rate": 0.00019904527225969424, + "loss": 2.8354, + "step": 976 + }, + { + "epoch": 0.07884755064159471, + "grad_norm": 0.8690593838691711, + "learning_rate": 0.00019904309475684767, + "loss": 2.7894, + "step": 977 + }, + { + "epoch": 0.07892825437817771, + "grad_norm": 0.810279130935669, + "learning_rate": 0.00019904091478558823, + "loss": 2.7939, + "step": 978 + }, + { + "epoch": 0.07900895811476072, + "grad_norm": 0.8779012560844421, + "learning_rate": 0.0001990387323459703, + "loss": 2.7551, + "step": 979 + }, + { + "epoch": 0.07908966185134372, + "grad_norm": 0.7936381101608276, + "learning_rate": 0.00019903654743804833, + "loss": 2.814, + "step": 980 + }, + { + "epoch": 0.07917036558792673, + "grad_norm": 0.9567989110946655, + "learning_rate": 0.00019903436006187667, + "loss": 2.7715, + "step": 981 + }, + { + "epoch": 0.07925106932450972, + "grad_norm": 0.9250255823135376, + "learning_rate": 0.00019903217021750987, + "loss": 2.8967, + "step": 982 + }, + { + "epoch": 0.07933177306109272, + "grad_norm": 0.8342804312705994, + "learning_rate": 0.00019902997790500256, + "loss": 2.7728, + "step": 983 + }, + { + "epoch": 0.07941247679767573, + "grad_norm": 0.8321473598480225, + "learning_rate": 0.00019902778312440932, + "loss": 2.8479, + "step": 984 + }, + { + "epoch": 0.07949318053425873, + "grad_norm": 0.894727885723114, + "learning_rate": 0.00019902558587578484, + "loss": 2.8211, + "step": 985 + }, + { + "epoch": 0.07957388427084174, + "grad_norm": 0.8093457221984863, + "learning_rate": 0.0001990233861591839, + "loss": 2.7481, + "step": 986 + }, + { + "epoch": 0.07965458800742474, + "grad_norm": 0.8626284599304199, + "learning_rate": 0.00019902118397466132, + "loss": 2.8368, + "step": 987 + }, + { + "epoch": 0.07973529174400774, + "grad_norm": 0.799648642539978, + "learning_rate": 0.00019901897932227204, + "loss": 2.8713, + "step": 988 + }, + { + "epoch": 0.07981599548059075, + "grad_norm": 0.9658265709877014, + "learning_rate": 0.00019901677220207092, + "loss": 2.7284, + "step": 989 + }, + { + "epoch": 0.07989669921717375, + "grad_norm": 0.877299427986145, + "learning_rate": 0.00019901456261411303, + "loss": 2.7916, + "step": 990 + }, + { + "epoch": 0.07997740295375676, + "grad_norm": 0.926450252532959, + "learning_rate": 0.00019901235055845337, + "loss": 2.8207, + "step": 991 + }, + { + "epoch": 0.08005810669033976, + "grad_norm": 0.8858455419540405, + "learning_rate": 0.00019901013603514716, + "loss": 2.795, + "step": 992 + }, + { + "epoch": 0.08013881042692277, + "grad_norm": 0.8619922995567322, + "learning_rate": 0.0001990079190442495, + "loss": 2.8163, + "step": 993 + }, + { + "epoch": 0.08021951416350577, + "grad_norm": 0.859200656414032, + "learning_rate": 0.00019900569958581572, + "loss": 2.7715, + "step": 994 + }, + { + "epoch": 0.08030021790008877, + "grad_norm": 0.8346282839775085, + "learning_rate": 0.0001990034776599011, + "loss": 2.8312, + "step": 995 + }, + { + "epoch": 0.08038092163667178, + "grad_norm": 0.9188725352287292, + "learning_rate": 0.00019900125326656102, + "loss": 2.799, + "step": 996 + }, + { + "epoch": 0.08046162537325478, + "grad_norm": 0.8548648953437805, + "learning_rate": 0.00019899902640585092, + "loss": 2.7778, + "step": 997 + }, + { + "epoch": 0.08054232910983779, + "grad_norm": 0.8883183002471924, + "learning_rate": 0.00019899679707782624, + "loss": 2.809, + "step": 998 + }, + { + "epoch": 0.08062303284642079, + "grad_norm": 0.8915852308273315, + "learning_rate": 0.00019899456528254267, + "loss": 2.8309, + "step": 999 + }, + { + "epoch": 0.0807037365830038, + "grad_norm": 0.8092094659805298, + "learning_rate": 0.00019899233102005573, + "loss": 2.7753, + "step": 1000 + }, + { + "epoch": 0.0807037365830038, + "eval_loss": 2.7104671001434326, + "eval_runtime": 773.7354, + "eval_samples_per_second": 3.386, + "eval_steps_per_second": 0.565, + "step": 1000 + }, + { + "epoch": 0.0807844403195868, + "grad_norm": 0.8744900226593018, + "learning_rate": 0.00019899009429042114, + "loss": 2.7948, + "step": 1001 + }, + { + "epoch": 0.0808651440561698, + "grad_norm": 0.8749974370002747, + "learning_rate": 0.0001989878550936946, + "loss": 2.7609, + "step": 1002 + }, + { + "epoch": 0.08094584779275281, + "grad_norm": 0.8622820377349854, + "learning_rate": 0.000198985613429932, + "loss": 2.8023, + "step": 1003 + }, + { + "epoch": 0.08102655152933581, + "grad_norm": 0.9404367208480835, + "learning_rate": 0.00019898336929918915, + "loss": 2.7992, + "step": 1004 + }, + { + "epoch": 0.08110725526591882, + "grad_norm": 0.8846708536148071, + "learning_rate": 0.000198981122701522, + "loss": 2.8084, + "step": 1005 + }, + { + "epoch": 0.08118795900250182, + "grad_norm": 0.8105908036231995, + "learning_rate": 0.0001989788736369865, + "loss": 2.8504, + "step": 1006 + }, + { + "epoch": 0.08126866273908483, + "grad_norm": 1.0107187032699585, + "learning_rate": 0.0001989766221056388, + "loss": 2.7935, + "step": 1007 + }, + { + "epoch": 0.08134936647566782, + "grad_norm": 0.7825451493263245, + "learning_rate": 0.0001989743681075349, + "loss": 2.8024, + "step": 1008 + }, + { + "epoch": 0.08143007021225082, + "grad_norm": 0.8478613495826721, + "learning_rate": 0.000198972111642731, + "loss": 2.8645, + "step": 1009 + }, + { + "epoch": 0.08151077394883383, + "grad_norm": 0.8432144522666931, + "learning_rate": 0.0001989698527112834, + "loss": 2.8469, + "step": 1010 + }, + { + "epoch": 0.08159147768541683, + "grad_norm": 0.8147936463356018, + "learning_rate": 0.00019896759131324835, + "loss": 2.7799, + "step": 1011 + }, + { + "epoch": 0.08167218142199983, + "grad_norm": 0.8446993827819824, + "learning_rate": 0.00019896532744868224, + "loss": 2.7685, + "step": 1012 + }, + { + "epoch": 0.08175288515858284, + "grad_norm": 0.7635807394981384, + "learning_rate": 0.00019896306111764146, + "loss": 2.7823, + "step": 1013 + }, + { + "epoch": 0.08183358889516584, + "grad_norm": 0.8272855877876282, + "learning_rate": 0.00019896079232018253, + "loss": 2.7877, + "step": 1014 + }, + { + "epoch": 0.08191429263174885, + "grad_norm": 0.8079700469970703, + "learning_rate": 0.00019895852105636193, + "loss": 2.7849, + "step": 1015 + }, + { + "epoch": 0.08199499636833185, + "grad_norm": 0.8518063426017761, + "learning_rate": 0.0001989562473262363, + "loss": 2.8622, + "step": 1016 + }, + { + "epoch": 0.08207570010491486, + "grad_norm": 0.8646622896194458, + "learning_rate": 0.00019895397112986235, + "loss": 2.8224, + "step": 1017 + }, + { + "epoch": 0.08215640384149786, + "grad_norm": 0.8764398097991943, + "learning_rate": 0.00019895169246729672, + "loss": 2.938, + "step": 1018 + }, + { + "epoch": 0.08223710757808086, + "grad_norm": 0.8304057717323303, + "learning_rate": 0.0001989494113385963, + "loss": 2.7586, + "step": 1019 + }, + { + "epoch": 0.08231781131466387, + "grad_norm": 0.8569272756576538, + "learning_rate": 0.00019894712774381787, + "loss": 2.7803, + "step": 1020 + }, + { + "epoch": 0.08239851505124687, + "grad_norm": 0.8788578510284424, + "learning_rate": 0.00019894484168301836, + "loss": 2.8138, + "step": 1021 + }, + { + "epoch": 0.08247921878782988, + "grad_norm": 0.9113569855690002, + "learning_rate": 0.0001989425531562548, + "loss": 2.8023, + "step": 1022 + }, + { + "epoch": 0.08255992252441288, + "grad_norm": 0.8630590438842773, + "learning_rate": 0.00019894026216358413, + "loss": 2.791, + "step": 1023 + }, + { + "epoch": 0.08264062626099589, + "grad_norm": 0.8691157698631287, + "learning_rate": 0.00019893796870506348, + "loss": 2.811, + "step": 1024 + }, + { + "epoch": 0.08272132999757889, + "grad_norm": 0.9078284502029419, + "learning_rate": 0.00019893567278075007, + "loss": 2.8282, + "step": 1025 + }, + { + "epoch": 0.0828020337341619, + "grad_norm": 0.867511510848999, + "learning_rate": 0.00019893337439070105, + "loss": 2.7862, + "step": 1026 + }, + { + "epoch": 0.0828827374707449, + "grad_norm": 0.8016698360443115, + "learning_rate": 0.00019893107353497372, + "loss": 2.8083, + "step": 1027 + }, + { + "epoch": 0.0829634412073279, + "grad_norm": 0.8583545684814453, + "learning_rate": 0.00019892877021362543, + "loss": 2.8041, + "step": 1028 + }, + { + "epoch": 0.08304414494391091, + "grad_norm": 0.8302493691444397, + "learning_rate": 0.0001989264644267136, + "loss": 2.7866, + "step": 1029 + }, + { + "epoch": 0.08312484868049391, + "grad_norm": 0.9628411531448364, + "learning_rate": 0.00019892415617429567, + "loss": 2.8187, + "step": 1030 + }, + { + "epoch": 0.08320555241707692, + "grad_norm": 0.874840259552002, + "learning_rate": 0.0001989218454564292, + "loss": 2.7475, + "step": 1031 + }, + { + "epoch": 0.08328625615365992, + "grad_norm": 0.8641294836997986, + "learning_rate": 0.0001989195322731717, + "loss": 2.7795, + "step": 1032 + }, + { + "epoch": 0.08336695989024291, + "grad_norm": 0.8219757080078125, + "learning_rate": 0.0001989172166245809, + "loss": 2.7683, + "step": 1033 + }, + { + "epoch": 0.08344766362682592, + "grad_norm": 0.7905694246292114, + "learning_rate": 0.00019891489851071455, + "loss": 2.7668, + "step": 1034 + }, + { + "epoch": 0.08352836736340892, + "grad_norm": 0.8180816173553467, + "learning_rate": 0.0001989125779316303, + "loss": 2.7661, + "step": 1035 + }, + { + "epoch": 0.08360907109999192, + "grad_norm": 0.8337293267250061, + "learning_rate": 0.00019891025488738605, + "loss": 2.7823, + "step": 1036 + }, + { + "epoch": 0.08368977483657493, + "grad_norm": 0.9673140048980713, + "learning_rate": 0.00019890792937803973, + "loss": 2.8164, + "step": 1037 + }, + { + "epoch": 0.08377047857315793, + "grad_norm": 0.8810501098632812, + "learning_rate": 0.00019890560140364922, + "loss": 2.7904, + "step": 1038 + }, + { + "epoch": 0.08385118230974094, + "grad_norm": 0.9507614374160767, + "learning_rate": 0.0001989032709642726, + "loss": 2.7928, + "step": 1039 + }, + { + "epoch": 0.08393188604632394, + "grad_norm": 0.953738808631897, + "learning_rate": 0.00019890093805996793, + "loss": 2.7922, + "step": 1040 + }, + { + "epoch": 0.08401258978290695, + "grad_norm": 0.8079931139945984, + "learning_rate": 0.00019889860269079336, + "loss": 2.7909, + "step": 1041 + }, + { + "epoch": 0.08409329351948995, + "grad_norm": 1.0330647230148315, + "learning_rate": 0.0001988962648568071, + "loss": 2.7526, + "step": 1042 + }, + { + "epoch": 0.08417399725607295, + "grad_norm": 0.8988988399505615, + "learning_rate": 0.00019889392455806738, + "loss": 2.7471, + "step": 1043 + }, + { + "epoch": 0.08425470099265596, + "grad_norm": 0.7986348271369934, + "learning_rate": 0.00019889158179463255, + "loss": 2.7208, + "step": 1044 + }, + { + "epoch": 0.08433540472923896, + "grad_norm": 0.9231631755828857, + "learning_rate": 0.000198889236566561, + "loss": 2.7953, + "step": 1045 + }, + { + "epoch": 0.08441610846582197, + "grad_norm": 0.8438155055046082, + "learning_rate": 0.00019888688887391117, + "loss": 2.8006, + "step": 1046 + }, + { + "epoch": 0.08449681220240497, + "grad_norm": 0.8915219306945801, + "learning_rate": 0.0001988845387167416, + "loss": 2.8184, + "step": 1047 + }, + { + "epoch": 0.08457751593898798, + "grad_norm": 0.924401581287384, + "learning_rate": 0.0001988821860951108, + "loss": 2.8411, + "step": 1048 + }, + { + "epoch": 0.08465821967557098, + "grad_norm": 0.8144630193710327, + "learning_rate": 0.00019887983100907745, + "loss": 2.8258, + "step": 1049 + }, + { + "epoch": 0.08473892341215399, + "grad_norm": 0.9974459409713745, + "learning_rate": 0.00019887747345870028, + "loss": 2.7567, + "step": 1050 + }, + { + "epoch": 0.08481962714873699, + "grad_norm": 0.944526195526123, + "learning_rate": 0.00019887511344403796, + "loss": 2.8657, + "step": 1051 + }, + { + "epoch": 0.08490033088532, + "grad_norm": 0.8204831480979919, + "learning_rate": 0.00019887275096514936, + "loss": 2.8054, + "step": 1052 + }, + { + "epoch": 0.084981034621903, + "grad_norm": 0.8855900168418884, + "learning_rate": 0.00019887038602209336, + "loss": 2.8019, + "step": 1053 + }, + { + "epoch": 0.085061738358486, + "grad_norm": 0.9025108814239502, + "learning_rate": 0.0001988680186149289, + "loss": 2.7934, + "step": 1054 + }, + { + "epoch": 0.08514244209506901, + "grad_norm": 0.8486441373825073, + "learning_rate": 0.00019886564874371494, + "loss": 2.809, + "step": 1055 + }, + { + "epoch": 0.08522314583165201, + "grad_norm": 0.778364896774292, + "learning_rate": 0.00019886327640851058, + "loss": 2.7783, + "step": 1056 + }, + { + "epoch": 0.08530384956823502, + "grad_norm": 0.8515299558639526, + "learning_rate": 0.00019886090160937497, + "loss": 2.8122, + "step": 1057 + }, + { + "epoch": 0.08538455330481802, + "grad_norm": 0.8466131091117859, + "learning_rate": 0.00019885852434636724, + "loss": 2.7798, + "step": 1058 + }, + { + "epoch": 0.08546525704140101, + "grad_norm": 0.8856541514396667, + "learning_rate": 0.00019885614461954667, + "loss": 2.8033, + "step": 1059 + }, + { + "epoch": 0.08554596077798401, + "grad_norm": 0.8853924870491028, + "learning_rate": 0.00019885376242897258, + "loss": 2.8368, + "step": 1060 + }, + { + "epoch": 0.08562666451456702, + "grad_norm": 0.7858660221099854, + "learning_rate": 0.0001988513777747043, + "loss": 2.7806, + "step": 1061 + }, + { + "epoch": 0.08570736825115002, + "grad_norm": 0.8601513504981995, + "learning_rate": 0.0001988489906568013, + "loss": 2.8434, + "step": 1062 + }, + { + "epoch": 0.08578807198773303, + "grad_norm": 0.9126001596450806, + "learning_rate": 0.00019884660107532306, + "loss": 2.8469, + "step": 1063 + }, + { + "epoch": 0.08586877572431603, + "grad_norm": 0.9016061425209045, + "learning_rate": 0.00019884420903032912, + "loss": 2.7907, + "step": 1064 + }, + { + "epoch": 0.08594947946089904, + "grad_norm": 0.9134494066238403, + "learning_rate": 0.00019884181452187915, + "loss": 2.8426, + "step": 1065 + }, + { + "epoch": 0.08603018319748204, + "grad_norm": 0.8891138434410095, + "learning_rate": 0.00019883941755003272, + "loss": 2.8092, + "step": 1066 + }, + { + "epoch": 0.08611088693406505, + "grad_norm": 0.822884202003479, + "learning_rate": 0.0001988370181148497, + "loss": 2.8454, + "step": 1067 + }, + { + "epoch": 0.08619159067064805, + "grad_norm": 0.8341901898384094, + "learning_rate": 0.0001988346162163898, + "loss": 2.8027, + "step": 1068 + }, + { + "epoch": 0.08627229440723105, + "grad_norm": 0.8653229475021362, + "learning_rate": 0.00019883221185471291, + "loss": 2.7487, + "step": 1069 + }, + { + "epoch": 0.08635299814381406, + "grad_norm": 0.8065966367721558, + "learning_rate": 0.00019882980502987894, + "loss": 2.7847, + "step": 1070 + }, + { + "epoch": 0.08643370188039706, + "grad_norm": 0.9106903076171875, + "learning_rate": 0.0001988273957419479, + "loss": 2.7962, + "step": 1071 + }, + { + "epoch": 0.08651440561698007, + "grad_norm": 0.953815221786499, + "learning_rate": 0.0001988249839909798, + "loss": 2.8168, + "step": 1072 + }, + { + "epoch": 0.08659510935356307, + "grad_norm": 0.8642842173576355, + "learning_rate": 0.00019882256977703477, + "loss": 2.8205, + "step": 1073 + }, + { + "epoch": 0.08667581309014608, + "grad_norm": 0.8500350117683411, + "learning_rate": 0.000198820153100173, + "loss": 2.8798, + "step": 1074 + }, + { + "epoch": 0.08675651682672908, + "grad_norm": 0.9212989807128906, + "learning_rate": 0.00019881773396045467, + "loss": 2.8088, + "step": 1075 + }, + { + "epoch": 0.08683722056331208, + "grad_norm": 0.8897970914840698, + "learning_rate": 0.0001988153123579401, + "loss": 2.7983, + "step": 1076 + }, + { + "epoch": 0.08691792429989509, + "grad_norm": 0.7942636609077454, + "learning_rate": 0.00019881288829268968, + "loss": 2.7711, + "step": 1077 + }, + { + "epoch": 0.08699862803647809, + "grad_norm": 0.8286700248718262, + "learning_rate": 0.00019881046176476374, + "loss": 2.7995, + "step": 1078 + }, + { + "epoch": 0.0870793317730611, + "grad_norm": 0.9436343908309937, + "learning_rate": 0.00019880803277422281, + "loss": 2.8399, + "step": 1079 + }, + { + "epoch": 0.0871600355096441, + "grad_norm": 0.9592518210411072, + "learning_rate": 0.00019880560132112742, + "loss": 2.7888, + "step": 1080 + }, + { + "epoch": 0.0872407392462271, + "grad_norm": 0.8956589698791504, + "learning_rate": 0.00019880316740553816, + "loss": 2.7635, + "step": 1081 + }, + { + "epoch": 0.08732144298281011, + "grad_norm": 1.055312156677246, + "learning_rate": 0.00019880073102751574, + "loss": 2.7778, + "step": 1082 + }, + { + "epoch": 0.08740214671939311, + "grad_norm": 0.783273458480835, + "learning_rate": 0.00019879829218712075, + "loss": 2.735, + "step": 1083 + }, + { + "epoch": 0.0874828504559761, + "grad_norm": 0.8315421938896179, + "learning_rate": 0.00019879585088441413, + "loss": 2.7973, + "step": 1084 + }, + { + "epoch": 0.08756355419255911, + "grad_norm": 0.9550945162773132, + "learning_rate": 0.00019879340711945662, + "loss": 2.8083, + "step": 1085 + }, + { + "epoch": 0.08764425792914211, + "grad_norm": 0.9579277634620667, + "learning_rate": 0.00019879096089230915, + "loss": 2.7411, + "step": 1086 + }, + { + "epoch": 0.08772496166572512, + "grad_norm": 0.8602219223976135, + "learning_rate": 0.0001987885122030327, + "loss": 2.7461, + "step": 1087 + }, + { + "epoch": 0.08780566540230812, + "grad_norm": 0.9749068021774292, + "learning_rate": 0.00019878606105168829, + "loss": 2.7701, + "step": 1088 + }, + { + "epoch": 0.08788636913889113, + "grad_norm": 0.8128982186317444, + "learning_rate": 0.00019878360743833703, + "loss": 2.7949, + "step": 1089 + }, + { + "epoch": 0.08796707287547413, + "grad_norm": 0.9177080988883972, + "learning_rate": 0.00019878115136304003, + "loss": 2.7471, + "step": 1090 + }, + { + "epoch": 0.08804777661205714, + "grad_norm": 0.9052132368087769, + "learning_rate": 0.0001987786928258585, + "loss": 2.8356, + "step": 1091 + }, + { + "epoch": 0.08812848034864014, + "grad_norm": 0.8972994089126587, + "learning_rate": 0.00019877623182685378, + "loss": 2.8304, + "step": 1092 + }, + { + "epoch": 0.08820918408522314, + "grad_norm": 0.861251950263977, + "learning_rate": 0.0001987737683660871, + "loss": 2.8436, + "step": 1093 + }, + { + "epoch": 0.08828988782180615, + "grad_norm": 0.9139869809150696, + "learning_rate": 0.00019877130244361996, + "loss": 2.7583, + "step": 1094 + }, + { + "epoch": 0.08837059155838915, + "grad_norm": 0.8441170454025269, + "learning_rate": 0.00019876883405951377, + "loss": 2.7508, + "step": 1095 + }, + { + "epoch": 0.08845129529497216, + "grad_norm": 0.8624769449234009, + "learning_rate": 0.00019876636321383004, + "loss": 2.8003, + "step": 1096 + }, + { + "epoch": 0.08853199903155516, + "grad_norm": 0.9033877849578857, + "learning_rate": 0.00019876388990663037, + "loss": 2.7934, + "step": 1097 + }, + { + "epoch": 0.08861270276813817, + "grad_norm": 0.9492632746696472, + "learning_rate": 0.0001987614141379764, + "loss": 2.7852, + "step": 1098 + }, + { + "epoch": 0.08869340650472117, + "grad_norm": 0.9004682302474976, + "learning_rate": 0.00019875893590792982, + "loss": 2.7518, + "step": 1099 + }, + { + "epoch": 0.08877411024130417, + "grad_norm": 0.8352272510528564, + "learning_rate": 0.0001987564552165524, + "loss": 2.8035, + "step": 1100 + }, + { + "epoch": 0.08885481397788718, + "grad_norm": 0.8488562107086182, + "learning_rate": 0.00019875397206390593, + "loss": 2.7672, + "step": 1101 + }, + { + "epoch": 0.08893551771447018, + "grad_norm": 0.9450985193252563, + "learning_rate": 0.00019875148645005238, + "loss": 2.7558, + "step": 1102 + }, + { + "epoch": 0.08901622145105319, + "grad_norm": 0.9203561544418335, + "learning_rate": 0.0001987489983750536, + "loss": 2.7983, + "step": 1103 + }, + { + "epoch": 0.08909692518763619, + "grad_norm": 0.8761897087097168, + "learning_rate": 0.0001987465078389717, + "loss": 2.7536, + "step": 1104 + }, + { + "epoch": 0.0891776289242192, + "grad_norm": 0.9064637422561646, + "learning_rate": 0.00019874401484186867, + "loss": 2.8104, + "step": 1105 + }, + { + "epoch": 0.0892583326608022, + "grad_norm": 0.8394999504089355, + "learning_rate": 0.00019874151938380666, + "loss": 2.7459, + "step": 1106 + }, + { + "epoch": 0.0893390363973852, + "grad_norm": 0.8782099485397339, + "learning_rate": 0.00019873902146484785, + "loss": 2.8675, + "step": 1107 + }, + { + "epoch": 0.08941974013396821, + "grad_norm": 0.8564850091934204, + "learning_rate": 0.00019873652108505458, + "loss": 2.8561, + "step": 1108 + }, + { + "epoch": 0.08950044387055121, + "grad_norm": 0.8343809843063354, + "learning_rate": 0.0001987340182444891, + "loss": 2.8406, + "step": 1109 + }, + { + "epoch": 0.0895811476071342, + "grad_norm": 1.096273422241211, + "learning_rate": 0.00019873151294321376, + "loss": 2.8264, + "step": 1110 + }, + { + "epoch": 0.08966185134371721, + "grad_norm": 0.8654618263244629, + "learning_rate": 0.00019872900518129103, + "loss": 2.7956, + "step": 1111 + }, + { + "epoch": 0.08974255508030021, + "grad_norm": 0.8868138194084167, + "learning_rate": 0.00019872649495878344, + "loss": 2.8028, + "step": 1112 + }, + { + "epoch": 0.08982325881688322, + "grad_norm": 0.8139104843139648, + "learning_rate": 0.00019872398227575348, + "loss": 2.7502, + "step": 1113 + }, + { + "epoch": 0.08990396255346622, + "grad_norm": 0.8277762532234192, + "learning_rate": 0.00019872146713226384, + "loss": 2.7913, + "step": 1114 + }, + { + "epoch": 0.08998466629004923, + "grad_norm": 0.8470397591590881, + "learning_rate": 0.00019871894952837717, + "loss": 2.7982, + "step": 1115 + }, + { + "epoch": 0.09006537002663223, + "grad_norm": 0.8424760103225708, + "learning_rate": 0.00019871642946415625, + "loss": 2.8067, + "step": 1116 + }, + { + "epoch": 0.09014607376321523, + "grad_norm": 0.8253894448280334, + "learning_rate": 0.00019871390693966382, + "loss": 2.8339, + "step": 1117 + }, + { + "epoch": 0.09022677749979824, + "grad_norm": 0.8120691776275635, + "learning_rate": 0.00019871138195496282, + "loss": 2.7938, + "step": 1118 + }, + { + "epoch": 0.09030748123638124, + "grad_norm": 0.920189619064331, + "learning_rate": 0.00019870885451011617, + "loss": 2.8083, + "step": 1119 + }, + { + "epoch": 0.09038818497296425, + "grad_norm": 0.8990969657897949, + "learning_rate": 0.0001987063246051868, + "loss": 2.7481, + "step": 1120 + }, + { + "epoch": 0.09046888870954725, + "grad_norm": 0.8280801773071289, + "learning_rate": 0.0001987037922402378, + "loss": 2.8536, + "step": 1121 + }, + { + "epoch": 0.09054959244613026, + "grad_norm": 0.8510503768920898, + "learning_rate": 0.0001987012574153323, + "loss": 2.758, + "step": 1122 + }, + { + "epoch": 0.09063029618271326, + "grad_norm": 0.9103946685791016, + "learning_rate": 0.00019869872013053344, + "loss": 2.7594, + "step": 1123 + }, + { + "epoch": 0.09071099991929626, + "grad_norm": 0.804916262626648, + "learning_rate": 0.00019869618038590448, + "loss": 2.7489, + "step": 1124 + }, + { + "epoch": 0.09079170365587927, + "grad_norm": 0.7542802095413208, + "learning_rate": 0.00019869363818150867, + "loss": 2.76, + "step": 1125 + }, + { + "epoch": 0.09087240739246227, + "grad_norm": 0.7725108861923218, + "learning_rate": 0.00019869109351740947, + "loss": 2.8124, + "step": 1126 + }, + { + "epoch": 0.09095311112904528, + "grad_norm": 0.8533692955970764, + "learning_rate": 0.0001986885463936702, + "loss": 2.8499, + "step": 1127 + }, + { + "epoch": 0.09103381486562828, + "grad_norm": 0.8351541757583618, + "learning_rate": 0.0001986859968103544, + "loss": 2.8075, + "step": 1128 + }, + { + "epoch": 0.09111451860221129, + "grad_norm": 0.8780044913291931, + "learning_rate": 0.0001986834447675256, + "loss": 2.7587, + "step": 1129 + }, + { + "epoch": 0.09119522233879429, + "grad_norm": 0.9587519764900208, + "learning_rate": 0.00019868089026524736, + "loss": 2.8069, + "step": 1130 + }, + { + "epoch": 0.0912759260753773, + "grad_norm": 0.8285651206970215, + "learning_rate": 0.00019867833330358342, + "loss": 2.8209, + "step": 1131 + }, + { + "epoch": 0.0913566298119603, + "grad_norm": 0.8589211106300354, + "learning_rate": 0.00019867577388259745, + "loss": 2.8144, + "step": 1132 + }, + { + "epoch": 0.0914373335485433, + "grad_norm": 0.8740364909172058, + "learning_rate": 0.00019867321200235324, + "loss": 2.858, + "step": 1133 + }, + { + "epoch": 0.09151803728512631, + "grad_norm": 0.8368108868598938, + "learning_rate": 0.00019867064766291467, + "loss": 2.7997, + "step": 1134 + }, + { + "epoch": 0.0915987410217093, + "grad_norm": 0.8243690133094788, + "learning_rate": 0.00019866808086434564, + "loss": 2.7925, + "step": 1135 + }, + { + "epoch": 0.0916794447582923, + "grad_norm": 0.8296996355056763, + "learning_rate": 0.0001986655116067101, + "loss": 2.7953, + "step": 1136 + }, + { + "epoch": 0.09176014849487531, + "grad_norm": 0.9255942702293396, + "learning_rate": 0.0001986629398900721, + "loss": 2.844, + "step": 1137 + }, + { + "epoch": 0.09184085223145831, + "grad_norm": 0.7498174905776978, + "learning_rate": 0.00019866036571449574, + "loss": 2.7372, + "step": 1138 + }, + { + "epoch": 0.09192155596804132, + "grad_norm": 0.8170139193534851, + "learning_rate": 0.00019865778908004513, + "loss": 2.7656, + "step": 1139 + }, + { + "epoch": 0.09200225970462432, + "grad_norm": 0.8858106732368469, + "learning_rate": 0.00019865520998678458, + "loss": 2.7657, + "step": 1140 + }, + { + "epoch": 0.09208296344120732, + "grad_norm": 0.8789847493171692, + "learning_rate": 0.00019865262843477826, + "loss": 2.8419, + "step": 1141 + }, + { + "epoch": 0.09216366717779033, + "grad_norm": 0.8433314561843872, + "learning_rate": 0.00019865004442409058, + "loss": 2.7981, + "step": 1142 + }, + { + "epoch": 0.09224437091437333, + "grad_norm": 0.8822595477104187, + "learning_rate": 0.0001986474579547859, + "loss": 2.8368, + "step": 1143 + }, + { + "epoch": 0.09232507465095634, + "grad_norm": 0.9067013263702393, + "learning_rate": 0.00019864486902692872, + "loss": 2.7807, + "step": 1144 + }, + { + "epoch": 0.09240577838753934, + "grad_norm": 0.9551558494567871, + "learning_rate": 0.00019864227764058355, + "loss": 2.7617, + "step": 1145 + }, + { + "epoch": 0.09248648212412235, + "grad_norm": 0.8337206244468689, + "learning_rate": 0.00019863968379581494, + "loss": 2.8289, + "step": 1146 + }, + { + "epoch": 0.09256718586070535, + "grad_norm": 0.952702522277832, + "learning_rate": 0.0001986370874926876, + "loss": 2.8508, + "step": 1147 + }, + { + "epoch": 0.09264788959728835, + "grad_norm": 0.8586699366569519, + "learning_rate": 0.00019863448873126615, + "loss": 2.8784, + "step": 1148 + }, + { + "epoch": 0.09272859333387136, + "grad_norm": 0.7625309228897095, + "learning_rate": 0.00019863188751161544, + "loss": 2.7936, + "step": 1149 + }, + { + "epoch": 0.09280929707045436, + "grad_norm": 0.8912700414657593, + "learning_rate": 0.0001986292838338003, + "loss": 2.8745, + "step": 1150 + }, + { + "epoch": 0.09289000080703737, + "grad_norm": 0.8618904948234558, + "learning_rate": 0.00019862667769788553, + "loss": 2.8086, + "step": 1151 + }, + { + "epoch": 0.09297070454362037, + "grad_norm": 1.0013352632522583, + "learning_rate": 0.00019862406910393617, + "loss": 2.8211, + "step": 1152 + }, + { + "epoch": 0.09305140828020338, + "grad_norm": 0.7922475337982178, + "learning_rate": 0.0001986214580520172, + "loss": 2.7668, + "step": 1153 + }, + { + "epoch": 0.09313211201678638, + "grad_norm": 0.9490330815315247, + "learning_rate": 0.00019861884454219365, + "loss": 2.7571, + "step": 1154 + }, + { + "epoch": 0.09321281575336939, + "grad_norm": 0.8780270218849182, + "learning_rate": 0.00019861622857453076, + "loss": 2.7598, + "step": 1155 + }, + { + "epoch": 0.09329351948995239, + "grad_norm": 0.9220066070556641, + "learning_rate": 0.00019861361014909365, + "loss": 2.7609, + "step": 1156 + }, + { + "epoch": 0.0933742232265354, + "grad_norm": 0.8299020528793335, + "learning_rate": 0.0001986109892659476, + "loss": 2.8655, + "step": 1157 + }, + { + "epoch": 0.0934549269631184, + "grad_norm": 0.9700348377227783, + "learning_rate": 0.0001986083659251579, + "loss": 2.8597, + "step": 1158 + }, + { + "epoch": 0.0935356306997014, + "grad_norm": 0.8820784687995911, + "learning_rate": 0.00019860574012679001, + "loss": 2.8776, + "step": 1159 + }, + { + "epoch": 0.0936163344362844, + "grad_norm": 0.8134172558784485, + "learning_rate": 0.0001986031118709093, + "loss": 2.8163, + "step": 1160 + }, + { + "epoch": 0.0936970381728674, + "grad_norm": 0.885974109172821, + "learning_rate": 0.00019860048115758123, + "loss": 2.752, + "step": 1161 + }, + { + "epoch": 0.0937777419094504, + "grad_norm": 0.9650186896324158, + "learning_rate": 0.0001985978479868715, + "loss": 2.7587, + "step": 1162 + }, + { + "epoch": 0.0938584456460334, + "grad_norm": 0.8550445437431335, + "learning_rate": 0.00019859521235884563, + "loss": 2.7887, + "step": 1163 + }, + { + "epoch": 0.09393914938261641, + "grad_norm": 0.9686560034751892, + "learning_rate": 0.00019859257427356933, + "loss": 2.7974, + "step": 1164 + }, + { + "epoch": 0.09401985311919941, + "grad_norm": 0.9185387492179871, + "learning_rate": 0.00019858993373110837, + "loss": 2.7933, + "step": 1165 + }, + { + "epoch": 0.09410055685578242, + "grad_norm": 0.9549610018730164, + "learning_rate": 0.00019858729073152852, + "loss": 2.7698, + "step": 1166 + }, + { + "epoch": 0.09418126059236542, + "grad_norm": 1.0523492097854614, + "learning_rate": 0.0001985846452748957, + "loss": 2.7215, + "step": 1167 + }, + { + "epoch": 0.09426196432894843, + "grad_norm": 0.8551118969917297, + "learning_rate": 0.00019858199736127582, + "loss": 2.805, + "step": 1168 + }, + { + "epoch": 0.09434266806553143, + "grad_norm": 1.021374225616455, + "learning_rate": 0.0001985793469907349, + "loss": 2.794, + "step": 1169 + }, + { + "epoch": 0.09442337180211444, + "grad_norm": 0.8745501041412354, + "learning_rate": 0.0001985766941633389, + "loss": 2.7793, + "step": 1170 + }, + { + "epoch": 0.09450407553869744, + "grad_norm": 0.7426434755325317, + "learning_rate": 0.00019857403887915402, + "loss": 2.7808, + "step": 1171 + }, + { + "epoch": 0.09458477927528045, + "grad_norm": 0.9183726906776428, + "learning_rate": 0.0001985713811382464, + "loss": 2.8001, + "step": 1172 + }, + { + "epoch": 0.09466548301186345, + "grad_norm": 0.8136709928512573, + "learning_rate": 0.00019856872094068233, + "loss": 2.7394, + "step": 1173 + }, + { + "epoch": 0.09474618674844645, + "grad_norm": 0.9399348497390747, + "learning_rate": 0.00019856605828652807, + "loss": 2.7733, + "step": 1174 + }, + { + "epoch": 0.09482689048502946, + "grad_norm": 0.8233176469802856, + "learning_rate": 0.00019856339317584997, + "loss": 2.7672, + "step": 1175 + }, + { + "epoch": 0.09490759422161246, + "grad_norm": 0.9157048463821411, + "learning_rate": 0.00019856072560871447, + "loss": 2.7992, + "step": 1176 + }, + { + "epoch": 0.09498829795819547, + "grad_norm": 0.8729545474052429, + "learning_rate": 0.00019855805558518803, + "loss": 2.749, + "step": 1177 + }, + { + "epoch": 0.09506900169477847, + "grad_norm": 0.8592300415039062, + "learning_rate": 0.00019855538310533722, + "loss": 2.7257, + "step": 1178 + }, + { + "epoch": 0.09514970543136148, + "grad_norm": 0.8470803499221802, + "learning_rate": 0.00019855270816922867, + "loss": 2.7479, + "step": 1179 + }, + { + "epoch": 0.09523040916794448, + "grad_norm": 0.8538667559623718, + "learning_rate": 0.00019855003077692897, + "loss": 2.7576, + "step": 1180 + }, + { + "epoch": 0.09531111290452748, + "grad_norm": 0.8890984654426575, + "learning_rate": 0.0001985473509285049, + "loss": 2.7961, + "step": 1181 + }, + { + "epoch": 0.09539181664111049, + "grad_norm": 0.7769411206245422, + "learning_rate": 0.00019854466862402324, + "loss": 2.8087, + "step": 1182 + }, + { + "epoch": 0.09547252037769349, + "grad_norm": 0.8892520666122437, + "learning_rate": 0.00019854198386355085, + "loss": 2.7935, + "step": 1183 + }, + { + "epoch": 0.0955532241142765, + "grad_norm": 0.8675585389137268, + "learning_rate": 0.00019853929664715464, + "loss": 2.833, + "step": 1184 + }, + { + "epoch": 0.0956339278508595, + "grad_norm": 0.8053853511810303, + "learning_rate": 0.00019853660697490154, + "loss": 2.8002, + "step": 1185 + }, + { + "epoch": 0.09571463158744249, + "grad_norm": 0.9237198829650879, + "learning_rate": 0.00019853391484685865, + "loss": 2.8281, + "step": 1186 + }, + { + "epoch": 0.0957953353240255, + "grad_norm": 0.8432926535606384, + "learning_rate": 0.000198531220263093, + "loss": 2.8131, + "step": 1187 + }, + { + "epoch": 0.0958760390606085, + "grad_norm": 0.796380341053009, + "learning_rate": 0.0001985285232236718, + "loss": 2.753, + "step": 1188 + }, + { + "epoch": 0.0959567427971915, + "grad_norm": 0.9183037281036377, + "learning_rate": 0.00019852582372866225, + "loss": 2.7625, + "step": 1189 + }, + { + "epoch": 0.09603744653377451, + "grad_norm": 0.8194435238838196, + "learning_rate": 0.0001985231217781316, + "loss": 2.7906, + "step": 1190 + }, + { + "epoch": 0.09611815027035751, + "grad_norm": 0.8430871367454529, + "learning_rate": 0.00019852041737214725, + "loss": 2.8457, + "step": 1191 + }, + { + "epoch": 0.09619885400694052, + "grad_norm": 1.0237345695495605, + "learning_rate": 0.0001985177105107765, + "loss": 2.789, + "step": 1192 + }, + { + "epoch": 0.09627955774352352, + "grad_norm": 0.8721581101417542, + "learning_rate": 0.00019851500119408692, + "loss": 2.7187, + "step": 1193 + }, + { + "epoch": 0.09636026148010653, + "grad_norm": 0.8089142441749573, + "learning_rate": 0.00019851228942214603, + "loss": 2.7544, + "step": 1194 + }, + { + "epoch": 0.09644096521668953, + "grad_norm": 1.1076842546463013, + "learning_rate": 0.0001985095751950213, + "loss": 2.7859, + "step": 1195 + }, + { + "epoch": 0.09652166895327254, + "grad_norm": 0.84585040807724, + "learning_rate": 0.0001985068585127805, + "loss": 2.8005, + "step": 1196 + }, + { + "epoch": 0.09660237268985554, + "grad_norm": 0.8231167197227478, + "learning_rate": 0.00019850413937549127, + "loss": 2.8561, + "step": 1197 + }, + { + "epoch": 0.09668307642643854, + "grad_norm": 1.0028103590011597, + "learning_rate": 0.00019850141778322136, + "loss": 2.8049, + "step": 1198 + }, + { + "epoch": 0.09676378016302155, + "grad_norm": 0.8575148582458496, + "learning_rate": 0.0001984986937360387, + "loss": 2.7723, + "step": 1199 + }, + { + "epoch": 0.09684448389960455, + "grad_norm": 0.8567116260528564, + "learning_rate": 0.00019849596723401107, + "loss": 2.7418, + "step": 1200 + }, + { + "epoch": 0.09692518763618756, + "grad_norm": 1.1159218549728394, + "learning_rate": 0.00019849323827720645, + "loss": 2.8352, + "step": 1201 + }, + { + "epoch": 0.09700589137277056, + "grad_norm": 0.849656879901886, + "learning_rate": 0.0001984905068656929, + "loss": 2.7875, + "step": 1202 + }, + { + "epoch": 0.09708659510935357, + "grad_norm": 0.8479150533676147, + "learning_rate": 0.00019848777299953847, + "loss": 2.7828, + "step": 1203 + }, + { + "epoch": 0.09716729884593657, + "grad_norm": 0.9143954515457153, + "learning_rate": 0.00019848503667881125, + "loss": 2.7978, + "step": 1204 + }, + { + "epoch": 0.09724800258251957, + "grad_norm": 0.8162297010421753, + "learning_rate": 0.0001984822979035795, + "loss": 2.7621, + "step": 1205 + }, + { + "epoch": 0.09732870631910258, + "grad_norm": 0.8625509142875671, + "learning_rate": 0.00019847955667391144, + "loss": 2.7484, + "step": 1206 + }, + { + "epoch": 0.09740941005568558, + "grad_norm": 0.8485168218612671, + "learning_rate": 0.00019847681298987543, + "loss": 2.7599, + "step": 1207 + }, + { + "epoch": 0.09749011379226859, + "grad_norm": 0.8962678909301758, + "learning_rate": 0.00019847406685153976, + "loss": 2.7753, + "step": 1208 + }, + { + "epoch": 0.09757081752885159, + "grad_norm": 0.8890791535377502, + "learning_rate": 0.00019847131825897297, + "loss": 2.7635, + "step": 1209 + }, + { + "epoch": 0.0976515212654346, + "grad_norm": 0.8461710810661316, + "learning_rate": 0.00019846856721224355, + "loss": 2.796, + "step": 1210 + }, + { + "epoch": 0.0977322250020176, + "grad_norm": 0.912738025188446, + "learning_rate": 0.00019846581371141996, + "loss": 2.7889, + "step": 1211 + }, + { + "epoch": 0.09781292873860059, + "grad_norm": 0.8530749082565308, + "learning_rate": 0.00019846305775657097, + "loss": 2.8298, + "step": 1212 + }, + { + "epoch": 0.0978936324751836, + "grad_norm": 0.8890148401260376, + "learning_rate": 0.00019846029934776516, + "loss": 2.7491, + "step": 1213 + }, + { + "epoch": 0.0979743362117666, + "grad_norm": 0.8936887979507446, + "learning_rate": 0.0001984575384850713, + "loss": 2.7759, + "step": 1214 + }, + { + "epoch": 0.0980550399483496, + "grad_norm": 0.7811321020126343, + "learning_rate": 0.00019845477516855823, + "loss": 2.8126, + "step": 1215 + }, + { + "epoch": 0.09813574368493261, + "grad_norm": 0.8751768469810486, + "learning_rate": 0.00019845200939829484, + "loss": 2.792, + "step": 1216 + }, + { + "epoch": 0.09821644742151561, + "grad_norm": 0.8749501705169678, + "learning_rate": 0.00019844924117434998, + "loss": 2.7818, + "step": 1217 + }, + { + "epoch": 0.09829715115809862, + "grad_norm": 0.8130955100059509, + "learning_rate": 0.0001984464704967927, + "loss": 2.8581, + "step": 1218 + }, + { + "epoch": 0.09837785489468162, + "grad_norm": 0.8158220648765564, + "learning_rate": 0.00019844369736569196, + "loss": 2.7704, + "step": 1219 + }, + { + "epoch": 0.09845855863126463, + "grad_norm": 0.9351849555969238, + "learning_rate": 0.00019844092178111702, + "loss": 2.7857, + "step": 1220 + }, + { + "epoch": 0.09853926236784763, + "grad_norm": 0.8373914957046509, + "learning_rate": 0.00019843814374313697, + "loss": 2.8217, + "step": 1221 + }, + { + "epoch": 0.09861996610443063, + "grad_norm": 0.8919960856437683, + "learning_rate": 0.00019843536325182104, + "loss": 2.7914, + "step": 1222 + }, + { + "epoch": 0.09870066984101364, + "grad_norm": 0.9994316697120667, + "learning_rate": 0.00019843258030723858, + "loss": 2.7981, + "step": 1223 + }, + { + "epoch": 0.09878137357759664, + "grad_norm": 0.8144915699958801, + "learning_rate": 0.0001984297949094589, + "loss": 2.811, + "step": 1224 + }, + { + "epoch": 0.09886207731417965, + "grad_norm": 0.8957876563072205, + "learning_rate": 0.0001984270070585514, + "loss": 2.7752, + "step": 1225 + }, + { + "epoch": 0.09894278105076265, + "grad_norm": 0.9426520466804504, + "learning_rate": 0.0001984242167545856, + "loss": 2.8139, + "step": 1226 + }, + { + "epoch": 0.09902348478734566, + "grad_norm": 0.888769268989563, + "learning_rate": 0.00019842142399763106, + "loss": 2.8305, + "step": 1227 + }, + { + "epoch": 0.09910418852392866, + "grad_norm": 0.9497748613357544, + "learning_rate": 0.00019841862878775736, + "loss": 2.748, + "step": 1228 + }, + { + "epoch": 0.09918489226051166, + "grad_norm": 0.8715065717697144, + "learning_rate": 0.00019841583112503416, + "loss": 2.7794, + "step": 1229 + }, + { + "epoch": 0.09926559599709467, + "grad_norm": 0.875599205493927, + "learning_rate": 0.00019841303100953116, + "loss": 2.8016, + "step": 1230 + }, + { + "epoch": 0.09934629973367767, + "grad_norm": 0.8631919622421265, + "learning_rate": 0.0001984102284413182, + "loss": 2.8239, + "step": 1231 + }, + { + "epoch": 0.09942700347026068, + "grad_norm": 0.9028074741363525, + "learning_rate": 0.0001984074234204651, + "loss": 2.8372, + "step": 1232 + }, + { + "epoch": 0.09950770720684368, + "grad_norm": 0.890933096408844, + "learning_rate": 0.00019840461594704175, + "loss": 2.799, + "step": 1233 + }, + { + "epoch": 0.09958841094342669, + "grad_norm": 0.9626480340957642, + "learning_rate": 0.00019840180602111816, + "loss": 2.8207, + "step": 1234 + }, + { + "epoch": 0.09966911468000969, + "grad_norm": 0.798394501209259, + "learning_rate": 0.00019839899364276433, + "loss": 2.7784, + "step": 1235 + }, + { + "epoch": 0.0997498184165927, + "grad_norm": 0.8246447443962097, + "learning_rate": 0.00019839617881205036, + "loss": 2.8193, + "step": 1236 + }, + { + "epoch": 0.09983052215317569, + "grad_norm": 0.8315989375114441, + "learning_rate": 0.0001983933615290464, + "loss": 2.8036, + "step": 1237 + }, + { + "epoch": 0.09991122588975869, + "grad_norm": 0.8889075517654419, + "learning_rate": 0.00019839054179382267, + "loss": 2.7606, + "step": 1238 + }, + { + "epoch": 0.0999919296263417, + "grad_norm": 0.7558645009994507, + "learning_rate": 0.00019838771960644942, + "loss": 2.7666, + "step": 1239 + }, + { + "epoch": 0.1000726333629247, + "grad_norm": 0.8876601457595825, + "learning_rate": 0.00019838489496699704, + "loss": 2.8778, + "step": 1240 + }, + { + "epoch": 0.1001533370995077, + "grad_norm": 0.8609516620635986, + "learning_rate": 0.00019838206787553588, + "loss": 2.8189, + "step": 1241 + }, + { + "epoch": 0.10023404083609071, + "grad_norm": 0.8521148562431335, + "learning_rate": 0.00019837923833213644, + "loss": 2.8159, + "step": 1242 + }, + { + "epoch": 0.10031474457267371, + "grad_norm": 0.9155359268188477, + "learning_rate": 0.0001983764063368692, + "loss": 2.8351, + "step": 1243 + }, + { + "epoch": 0.10039544830925672, + "grad_norm": 0.8595378398895264, + "learning_rate": 0.00019837357188980475, + "loss": 2.8447, + "step": 1244 + }, + { + "epoch": 0.10047615204583972, + "grad_norm": 0.900244951248169, + "learning_rate": 0.00019837073499101373, + "loss": 2.8646, + "step": 1245 + }, + { + "epoch": 0.10055685578242272, + "grad_norm": 0.8404260277748108, + "learning_rate": 0.00019836789564056689, + "loss": 2.7824, + "step": 1246 + }, + { + "epoch": 0.10063755951900573, + "grad_norm": 0.8776196241378784, + "learning_rate": 0.0001983650538385349, + "loss": 2.8045, + "step": 1247 + }, + { + "epoch": 0.10071826325558873, + "grad_norm": 0.8889327049255371, + "learning_rate": 0.00019836220958498868, + "loss": 2.7967, + "step": 1248 + }, + { + "epoch": 0.10079896699217174, + "grad_norm": 0.8905191421508789, + "learning_rate": 0.00019835936287999906, + "loss": 2.8167, + "step": 1249 + }, + { + "epoch": 0.10087967072875474, + "grad_norm": 0.839970052242279, + "learning_rate": 0.000198356513723637, + "loss": 2.8643, + "step": 1250 + }, + { + "epoch": 0.10096037446533775, + "grad_norm": 0.7989531755447388, + "learning_rate": 0.00019835366211597353, + "loss": 2.8493, + "step": 1251 + }, + { + "epoch": 0.10104107820192075, + "grad_norm": 0.7960095405578613, + "learning_rate": 0.0001983508080570797, + "loss": 2.7377, + "step": 1252 + }, + { + "epoch": 0.10112178193850375, + "grad_norm": 0.7989903092384338, + "learning_rate": 0.00019834795154702661, + "loss": 2.7409, + "step": 1253 + }, + { + "epoch": 0.10120248567508676, + "grad_norm": 0.8557813167572021, + "learning_rate": 0.0001983450925858855, + "loss": 2.7945, + "step": 1254 + }, + { + "epoch": 0.10128318941166976, + "grad_norm": 0.948357880115509, + "learning_rate": 0.0001983422311737276, + "loss": 2.826, + "step": 1255 + }, + { + "epoch": 0.10136389314825277, + "grad_norm": 0.8356020450592041, + "learning_rate": 0.00019833936731062423, + "loss": 2.8157, + "step": 1256 + }, + { + "epoch": 0.10144459688483577, + "grad_norm": 0.8199872970581055, + "learning_rate": 0.00019833650099664678, + "loss": 2.7273, + "step": 1257 + }, + { + "epoch": 0.10152530062141878, + "grad_norm": 0.8178466558456421, + "learning_rate": 0.00019833363223186669, + "loss": 2.7513, + "step": 1258 + }, + { + "epoch": 0.10160600435800178, + "grad_norm": 0.8165889978408813, + "learning_rate": 0.00019833076101635538, + "loss": 2.7689, + "step": 1259 + }, + { + "epoch": 0.10168670809458479, + "grad_norm": 0.8240275979042053, + "learning_rate": 0.0001983278873501845, + "loss": 2.7477, + "step": 1260 + }, + { + "epoch": 0.10176741183116779, + "grad_norm": 0.8470584750175476, + "learning_rate": 0.00019832501123342563, + "loss": 2.7414, + "step": 1261 + }, + { + "epoch": 0.1018481155677508, + "grad_norm": 0.819063663482666, + "learning_rate": 0.00019832213266615046, + "loss": 2.7335, + "step": 1262 + }, + { + "epoch": 0.10192881930433378, + "grad_norm": 0.8045673370361328, + "learning_rate": 0.00019831925164843071, + "loss": 2.8141, + "step": 1263 + }, + { + "epoch": 0.10200952304091679, + "grad_norm": 0.7827214598655701, + "learning_rate": 0.00019831636818033824, + "loss": 2.7549, + "step": 1264 + }, + { + "epoch": 0.10209022677749979, + "grad_norm": 0.9596436619758606, + "learning_rate": 0.00019831348226194485, + "loss": 2.7327, + "step": 1265 + }, + { + "epoch": 0.1021709305140828, + "grad_norm": 0.826909601688385, + "learning_rate": 0.0001983105938933225, + "loss": 2.7166, + "step": 1266 + }, + { + "epoch": 0.1022516342506658, + "grad_norm": 0.8060985207557678, + "learning_rate": 0.00019830770307454313, + "loss": 2.7514, + "step": 1267 + }, + { + "epoch": 0.1023323379872488, + "grad_norm": 0.8257390856742859, + "learning_rate": 0.00019830480980567887, + "loss": 2.77, + "step": 1268 + }, + { + "epoch": 0.10241304172383181, + "grad_norm": 0.844406008720398, + "learning_rate": 0.00019830191408680173, + "loss": 2.8548, + "step": 1269 + }, + { + "epoch": 0.10249374546041481, + "grad_norm": 0.84171462059021, + "learning_rate": 0.00019829901591798398, + "loss": 2.7404, + "step": 1270 + }, + { + "epoch": 0.10257444919699782, + "grad_norm": 0.8084118962287903, + "learning_rate": 0.00019829611529929774, + "loss": 2.8078, + "step": 1271 + }, + { + "epoch": 0.10265515293358082, + "grad_norm": 0.8273561000823975, + "learning_rate": 0.00019829321223081538, + "loss": 2.787, + "step": 1272 + }, + { + "epoch": 0.10273585667016383, + "grad_norm": 0.799098551273346, + "learning_rate": 0.00019829030671260925, + "loss": 2.7563, + "step": 1273 + }, + { + "epoch": 0.10281656040674683, + "grad_norm": 0.885866105556488, + "learning_rate": 0.00019828739874475172, + "loss": 2.7313, + "step": 1274 + }, + { + "epoch": 0.10289726414332984, + "grad_norm": 0.7702760696411133, + "learning_rate": 0.00019828448832731529, + "loss": 2.7919, + "step": 1275 + }, + { + "epoch": 0.10297796787991284, + "grad_norm": 0.7577444911003113, + "learning_rate": 0.0001982815754603725, + "loss": 2.7149, + "step": 1276 + }, + { + "epoch": 0.10305867161649584, + "grad_norm": 0.8439713716506958, + "learning_rate": 0.00019827866014399592, + "loss": 2.7881, + "step": 1277 + }, + { + "epoch": 0.10313937535307885, + "grad_norm": 0.8504937291145325, + "learning_rate": 0.00019827574237825827, + "loss": 2.7611, + "step": 1278 + }, + { + "epoch": 0.10322007908966185, + "grad_norm": 0.7775665521621704, + "learning_rate": 0.00019827282216323218, + "loss": 2.7312, + "step": 1279 + }, + { + "epoch": 0.10330078282624486, + "grad_norm": 0.8671591281890869, + "learning_rate": 0.00019826989949899048, + "loss": 2.836, + "step": 1280 + }, + { + "epoch": 0.10338148656282786, + "grad_norm": 0.9308713674545288, + "learning_rate": 0.00019826697438560603, + "loss": 2.7494, + "step": 1281 + }, + { + "epoch": 0.10346219029941087, + "grad_norm": 0.9145268797874451, + "learning_rate": 0.0001982640468231517, + "loss": 2.8054, + "step": 1282 + }, + { + "epoch": 0.10354289403599387, + "grad_norm": 0.8150805234909058, + "learning_rate": 0.00019826111681170043, + "loss": 2.7879, + "step": 1283 + }, + { + "epoch": 0.10362359777257688, + "grad_norm": 0.8576685786247253, + "learning_rate": 0.00019825818435132531, + "loss": 2.8184, + "step": 1284 + }, + { + "epoch": 0.10370430150915988, + "grad_norm": 0.8838599920272827, + "learning_rate": 0.00019825524944209937, + "loss": 2.7838, + "step": 1285 + }, + { + "epoch": 0.10378500524574288, + "grad_norm": 0.9119304418563843, + "learning_rate": 0.00019825231208409576, + "loss": 2.8392, + "step": 1286 + }, + { + "epoch": 0.10386570898232589, + "grad_norm": 0.8112398982048035, + "learning_rate": 0.00019824937227738771, + "loss": 2.7844, + "step": 1287 + }, + { + "epoch": 0.10394641271890888, + "grad_norm": 0.8714308738708496, + "learning_rate": 0.00019824643002204847, + "loss": 2.7765, + "step": 1288 + }, + { + "epoch": 0.10402711645549188, + "grad_norm": 0.8733358979225159, + "learning_rate": 0.00019824348531815138, + "loss": 2.771, + "step": 1289 + }, + { + "epoch": 0.10410782019207489, + "grad_norm": 0.8218281269073486, + "learning_rate": 0.00019824053816576981, + "loss": 2.8099, + "step": 1290 + }, + { + "epoch": 0.10418852392865789, + "grad_norm": 0.8647308945655823, + "learning_rate": 0.00019823758856497725, + "loss": 2.7738, + "step": 1291 + }, + { + "epoch": 0.1042692276652409, + "grad_norm": 0.8358582854270935, + "learning_rate": 0.00019823463651584718, + "loss": 2.8021, + "step": 1292 + }, + { + "epoch": 0.1043499314018239, + "grad_norm": 0.7943673133850098, + "learning_rate": 0.00019823168201845318, + "loss": 2.8293, + "step": 1293 + }, + { + "epoch": 0.1044306351384069, + "grad_norm": 0.8501425981521606, + "learning_rate": 0.0001982287250728689, + "loss": 2.7701, + "step": 1294 + }, + { + "epoch": 0.10451133887498991, + "grad_norm": 0.8503665328025818, + "learning_rate": 0.00019822576567916797, + "loss": 2.7881, + "step": 1295 + }, + { + "epoch": 0.10459204261157291, + "grad_norm": 0.9687628149986267, + "learning_rate": 0.0001982228038374242, + "loss": 2.7623, + "step": 1296 + }, + { + "epoch": 0.10467274634815592, + "grad_norm": 0.8034376502037048, + "learning_rate": 0.00019821983954771146, + "loss": 2.8072, + "step": 1297 + }, + { + "epoch": 0.10475345008473892, + "grad_norm": 0.817135214805603, + "learning_rate": 0.00019821687281010352, + "loss": 2.7572, + "step": 1298 + }, + { + "epoch": 0.10483415382132193, + "grad_norm": 0.7961457371711731, + "learning_rate": 0.0001982139036246744, + "loss": 2.8405, + "step": 1299 + }, + { + "epoch": 0.10491485755790493, + "grad_norm": 0.7572407722473145, + "learning_rate": 0.00019821093199149804, + "loss": 2.7495, + "step": 1300 + }, + { + "epoch": 0.10499556129448794, + "grad_norm": 0.7990664839744568, + "learning_rate": 0.00019820795791064856, + "loss": 2.7567, + "step": 1301 + }, + { + "epoch": 0.10507626503107094, + "grad_norm": 0.8197236061096191, + "learning_rate": 0.0001982049813822, + "loss": 2.7807, + "step": 1302 + }, + { + "epoch": 0.10515696876765394, + "grad_norm": 0.9491304159164429, + "learning_rate": 0.00019820200240622664, + "loss": 2.8531, + "step": 1303 + }, + { + "epoch": 0.10523767250423695, + "grad_norm": 0.8143845200538635, + "learning_rate": 0.00019819902098280268, + "loss": 2.7542, + "step": 1304 + }, + { + "epoch": 0.10531837624081995, + "grad_norm": 0.9055941104888916, + "learning_rate": 0.0001981960371120024, + "loss": 2.863, + "step": 1305 + }, + { + "epoch": 0.10539907997740296, + "grad_norm": 0.7804721593856812, + "learning_rate": 0.0001981930507939002, + "loss": 2.8213, + "step": 1306 + }, + { + "epoch": 0.10547978371398596, + "grad_norm": 0.8375318050384521, + "learning_rate": 0.00019819006202857046, + "loss": 2.8222, + "step": 1307 + }, + { + "epoch": 0.10556048745056897, + "grad_norm": 0.9145569801330566, + "learning_rate": 0.00019818707081608773, + "loss": 2.805, + "step": 1308 + }, + { + "epoch": 0.10564119118715197, + "grad_norm": 0.7899324893951416, + "learning_rate": 0.00019818407715652654, + "loss": 2.8246, + "step": 1309 + }, + { + "epoch": 0.10572189492373497, + "grad_norm": 0.7843480110168457, + "learning_rate": 0.0001981810810499615, + "loss": 2.7909, + "step": 1310 + }, + { + "epoch": 0.10580259866031798, + "grad_norm": 0.8071008920669556, + "learning_rate": 0.00019817808249646723, + "loss": 2.7434, + "step": 1311 + }, + { + "epoch": 0.10588330239690098, + "grad_norm": 0.8682011961936951, + "learning_rate": 0.0001981750814961185, + "loss": 2.8387, + "step": 1312 + }, + { + "epoch": 0.10596400613348399, + "grad_norm": 0.7501091361045837, + "learning_rate": 0.0001981720780489902, + "loss": 2.7633, + "step": 1313 + }, + { + "epoch": 0.10604470987006698, + "grad_norm": 0.9259567856788635, + "learning_rate": 0.000198169072155157, + "loss": 2.8309, + "step": 1314 + }, + { + "epoch": 0.10612541360664998, + "grad_norm": 0.8018674254417419, + "learning_rate": 0.00019816606381469393, + "loss": 2.8647, + "step": 1315 + }, + { + "epoch": 0.10620611734323299, + "grad_norm": 0.8218088746070862, + "learning_rate": 0.00019816305302767595, + "loss": 2.823, + "step": 1316 + }, + { + "epoch": 0.10628682107981599, + "grad_norm": 0.812125027179718, + "learning_rate": 0.00019816003979417808, + "loss": 2.7216, + "step": 1317 + }, + { + "epoch": 0.106367524816399, + "grad_norm": 0.787407636642456, + "learning_rate": 0.0001981570241142754, + "loss": 2.7639, + "step": 1318 + }, + { + "epoch": 0.106448228552982, + "grad_norm": 0.7982528805732727, + "learning_rate": 0.00019815400598804312, + "loss": 2.8597, + "step": 1319 + }, + { + "epoch": 0.106528932289565, + "grad_norm": 0.8490404486656189, + "learning_rate": 0.00019815098541555646, + "loss": 2.7947, + "step": 1320 + }, + { + "epoch": 0.10660963602614801, + "grad_norm": 0.8743172883987427, + "learning_rate": 0.00019814796239689064, + "loss": 2.8674, + "step": 1321 + }, + { + "epoch": 0.10669033976273101, + "grad_norm": 0.8338125348091125, + "learning_rate": 0.00019814493693212106, + "loss": 2.781, + "step": 1322 + }, + { + "epoch": 0.10677104349931402, + "grad_norm": 0.871516764163971, + "learning_rate": 0.00019814190902132307, + "loss": 2.8742, + "step": 1323 + }, + { + "epoch": 0.10685174723589702, + "grad_norm": 0.8935555815696716, + "learning_rate": 0.00019813887866457216, + "loss": 2.7991, + "step": 1324 + }, + { + "epoch": 0.10693245097248003, + "grad_norm": 0.840067446231842, + "learning_rate": 0.00019813584586194388, + "loss": 2.7922, + "step": 1325 + }, + { + "epoch": 0.10701315470906303, + "grad_norm": 0.7919262647628784, + "learning_rate": 0.0001981328106135138, + "loss": 2.7912, + "step": 1326 + }, + { + "epoch": 0.10709385844564603, + "grad_norm": 0.7974550127983093, + "learning_rate": 0.00019812977291935752, + "loss": 2.8497, + "step": 1327 + }, + { + "epoch": 0.10717456218222904, + "grad_norm": 0.9126157164573669, + "learning_rate": 0.00019812673277955082, + "loss": 2.7698, + "step": 1328 + }, + { + "epoch": 0.10725526591881204, + "grad_norm": 0.8329752683639526, + "learning_rate": 0.0001981236901941694, + "loss": 2.8366, + "step": 1329 + }, + { + "epoch": 0.10733596965539505, + "grad_norm": 0.8313524127006531, + "learning_rate": 0.00019812064516328915, + "loss": 2.6863, + "step": 1330 + }, + { + "epoch": 0.10741667339197805, + "grad_norm": 0.8917783498764038, + "learning_rate": 0.0001981175976869859, + "loss": 2.7817, + "step": 1331 + }, + { + "epoch": 0.10749737712856106, + "grad_norm": 0.8370450735092163, + "learning_rate": 0.00019811454776533566, + "loss": 2.837, + "step": 1332 + }, + { + "epoch": 0.10757808086514406, + "grad_norm": 0.8415676355361938, + "learning_rate": 0.00019811149539841443, + "loss": 2.7399, + "step": 1333 + }, + { + "epoch": 0.10765878460172706, + "grad_norm": 0.8576632142066956, + "learning_rate": 0.00019810844058629825, + "loss": 2.7747, + "step": 1334 + }, + { + "epoch": 0.10773948833831007, + "grad_norm": 0.8943549394607544, + "learning_rate": 0.00019810538332906328, + "loss": 2.7368, + "step": 1335 + }, + { + "epoch": 0.10782019207489307, + "grad_norm": 0.8878718018531799, + "learning_rate": 0.00019810232362678568, + "loss": 2.7907, + "step": 1336 + }, + { + "epoch": 0.10790089581147608, + "grad_norm": 0.8131409287452698, + "learning_rate": 0.00019809926147954174, + "loss": 2.7782, + "step": 1337 + }, + { + "epoch": 0.10798159954805908, + "grad_norm": 0.8733747005462646, + "learning_rate": 0.0001980961968874078, + "loss": 2.8552, + "step": 1338 + }, + { + "epoch": 0.10806230328464207, + "grad_norm": 0.8997320532798767, + "learning_rate": 0.0001980931298504602, + "loss": 2.8452, + "step": 1339 + }, + { + "epoch": 0.10814300702122508, + "grad_norm": 0.8400282263755798, + "learning_rate": 0.00019809006036877538, + "loss": 2.786, + "step": 1340 + }, + { + "epoch": 0.10822371075780808, + "grad_norm": 0.8173925280570984, + "learning_rate": 0.00019808698844242983, + "loss": 2.8363, + "step": 1341 + }, + { + "epoch": 0.10830441449439109, + "grad_norm": 0.872278094291687, + "learning_rate": 0.00019808391407150015, + "loss": 2.7789, + "step": 1342 + }, + { + "epoch": 0.10838511823097409, + "grad_norm": 0.8939952254295349, + "learning_rate": 0.00019808083725606293, + "loss": 2.7453, + "step": 1343 + }, + { + "epoch": 0.1084658219675571, + "grad_norm": 0.8351218104362488, + "learning_rate": 0.00019807775799619484, + "loss": 2.8004, + "step": 1344 + }, + { + "epoch": 0.1085465257041401, + "grad_norm": 0.8381102681159973, + "learning_rate": 0.00019807467629197266, + "loss": 2.8155, + "step": 1345 + }, + { + "epoch": 0.1086272294407231, + "grad_norm": 0.869458019733429, + "learning_rate": 0.00019807159214347317, + "loss": 2.8219, + "step": 1346 + }, + { + "epoch": 0.10870793317730611, + "grad_norm": 0.8251017928123474, + "learning_rate": 0.00019806850555077326, + "loss": 2.7978, + "step": 1347 + }, + { + "epoch": 0.10878863691388911, + "grad_norm": 0.8056492209434509, + "learning_rate": 0.0001980654165139498, + "loss": 2.7994, + "step": 1348 + }, + { + "epoch": 0.10886934065047212, + "grad_norm": 0.9566174745559692, + "learning_rate": 0.00019806232503307984, + "loss": 2.794, + "step": 1349 + }, + { + "epoch": 0.10895004438705512, + "grad_norm": 0.7891408801078796, + "learning_rate": 0.0001980592311082404, + "loss": 2.7134, + "step": 1350 + }, + { + "epoch": 0.10903074812363812, + "grad_norm": 0.8894741535186768, + "learning_rate": 0.00019805613473950862, + "loss": 2.7829, + "step": 1351 + }, + { + "epoch": 0.10911145186022113, + "grad_norm": 0.893086850643158, + "learning_rate": 0.0001980530359269616, + "loss": 2.7475, + "step": 1352 + }, + { + "epoch": 0.10919215559680413, + "grad_norm": 0.8758537173271179, + "learning_rate": 0.00019804993467067666, + "loss": 2.8715, + "step": 1353 + }, + { + "epoch": 0.10927285933338714, + "grad_norm": 0.9304648041725159, + "learning_rate": 0.00019804683097073098, + "loss": 2.8051, + "step": 1354 + }, + { + "epoch": 0.10935356306997014, + "grad_norm": 0.8465876579284668, + "learning_rate": 0.00019804372482720202, + "loss": 2.7879, + "step": 1355 + }, + { + "epoch": 0.10943426680655315, + "grad_norm": 0.8485612273216248, + "learning_rate": 0.00019804061624016713, + "loss": 2.7783, + "step": 1356 + }, + { + "epoch": 0.10951497054313615, + "grad_norm": 0.835630476474762, + "learning_rate": 0.0001980375052097038, + "loss": 2.8116, + "step": 1357 + }, + { + "epoch": 0.10959567427971915, + "grad_norm": 0.8404836058616638, + "learning_rate": 0.00019803439173588956, + "loss": 2.8257, + "step": 1358 + }, + { + "epoch": 0.10967637801630216, + "grad_norm": 0.8048505783081055, + "learning_rate": 0.00019803127581880206, + "loss": 2.7762, + "step": 1359 + }, + { + "epoch": 0.10975708175288516, + "grad_norm": 0.8481776118278503, + "learning_rate": 0.00019802815745851885, + "loss": 2.8243, + "step": 1360 + }, + { + "epoch": 0.10983778548946817, + "grad_norm": 0.8565996885299683, + "learning_rate": 0.00019802503665511775, + "loss": 2.7958, + "step": 1361 + }, + { + "epoch": 0.10991848922605117, + "grad_norm": 0.8867515921592712, + "learning_rate": 0.0001980219134086765, + "loss": 2.7973, + "step": 1362 + }, + { + "epoch": 0.10999919296263418, + "grad_norm": 0.8459765911102295, + "learning_rate": 0.0001980187877192729, + "loss": 2.848, + "step": 1363 + }, + { + "epoch": 0.11007989669921718, + "grad_norm": 0.7929832339286804, + "learning_rate": 0.0001980156595869849, + "loss": 2.8583, + "step": 1364 + }, + { + "epoch": 0.11016060043580017, + "grad_norm": 0.8475651741027832, + "learning_rate": 0.00019801252901189043, + "loss": 2.8436, + "step": 1365 + }, + { + "epoch": 0.11024130417238318, + "grad_norm": 0.8545576333999634, + "learning_rate": 0.00019800939599406755, + "loss": 2.7457, + "step": 1366 + }, + { + "epoch": 0.11032200790896618, + "grad_norm": 1.0093715190887451, + "learning_rate": 0.00019800626053359435, + "loss": 2.8198, + "step": 1367 + }, + { + "epoch": 0.11040271164554918, + "grad_norm": 0.8728145956993103, + "learning_rate": 0.0001980031226305489, + "loss": 2.7794, + "step": 1368 + }, + { + "epoch": 0.11048341538213219, + "grad_norm": 0.8538581728935242, + "learning_rate": 0.00019799998228500946, + "loss": 2.8018, + "step": 1369 + }, + { + "epoch": 0.11056411911871519, + "grad_norm": 0.9452785849571228, + "learning_rate": 0.00019799683949705432, + "loss": 2.8173, + "step": 1370 + }, + { + "epoch": 0.1106448228552982, + "grad_norm": 0.806508481502533, + "learning_rate": 0.00019799369426676174, + "loss": 2.8192, + "step": 1371 + }, + { + "epoch": 0.1107255265918812, + "grad_norm": 0.8952856063842773, + "learning_rate": 0.00019799054659421018, + "loss": 2.8072, + "step": 1372 + }, + { + "epoch": 0.1108062303284642, + "grad_norm": 0.8863561749458313, + "learning_rate": 0.00019798739647947802, + "loss": 2.7836, + "step": 1373 + }, + { + "epoch": 0.11088693406504721, + "grad_norm": 0.8544357419013977, + "learning_rate": 0.00019798424392264378, + "loss": 2.7714, + "step": 1374 + }, + { + "epoch": 0.11096763780163021, + "grad_norm": 0.807546854019165, + "learning_rate": 0.00019798108892378607, + "loss": 2.7635, + "step": 1375 + }, + { + "epoch": 0.11104834153821322, + "grad_norm": 0.8198233246803284, + "learning_rate": 0.0001979779314829835, + "loss": 2.8253, + "step": 1376 + }, + { + "epoch": 0.11112904527479622, + "grad_norm": 0.9268671870231628, + "learning_rate": 0.00019797477160031477, + "loss": 2.8007, + "step": 1377 + }, + { + "epoch": 0.11120974901137923, + "grad_norm": 0.8547680974006653, + "learning_rate": 0.0001979716092758586, + "loss": 2.7749, + "step": 1378 + }, + { + "epoch": 0.11129045274796223, + "grad_norm": 0.8052394390106201, + "learning_rate": 0.00019796844450969384, + "loss": 2.763, + "step": 1379 + }, + { + "epoch": 0.11137115648454524, + "grad_norm": 0.8291144371032715, + "learning_rate": 0.00019796527730189936, + "loss": 2.8053, + "step": 1380 + }, + { + "epoch": 0.11145186022112824, + "grad_norm": 0.8114006519317627, + "learning_rate": 0.00019796210765255404, + "loss": 2.8047, + "step": 1381 + }, + { + "epoch": 0.11153256395771124, + "grad_norm": 0.9326293468475342, + "learning_rate": 0.00019795893556173697, + "loss": 2.8199, + "step": 1382 + }, + { + "epoch": 0.11161326769429425, + "grad_norm": 0.7702555656433105, + "learning_rate": 0.00019795576102952714, + "loss": 2.7909, + "step": 1383 + }, + { + "epoch": 0.11169397143087725, + "grad_norm": 0.8115492463111877, + "learning_rate": 0.0001979525840560037, + "loss": 2.748, + "step": 1384 + }, + { + "epoch": 0.11177467516746026, + "grad_norm": 0.8926187753677368, + "learning_rate": 0.0001979494046412458, + "loss": 2.7791, + "step": 1385 + }, + { + "epoch": 0.11185537890404326, + "grad_norm": 0.8549754023551941, + "learning_rate": 0.0001979462227853327, + "loss": 2.7989, + "step": 1386 + }, + { + "epoch": 0.11193608264062627, + "grad_norm": 0.8625262975692749, + "learning_rate": 0.0001979430384883437, + "loss": 2.7202, + "step": 1387 + }, + { + "epoch": 0.11201678637720927, + "grad_norm": 0.8134698867797852, + "learning_rate": 0.00019793985175035813, + "loss": 2.8008, + "step": 1388 + }, + { + "epoch": 0.11209749011379228, + "grad_norm": 0.8546617031097412, + "learning_rate": 0.00019793666257145547, + "loss": 2.8076, + "step": 1389 + }, + { + "epoch": 0.11217819385037527, + "grad_norm": 0.8003748059272766, + "learning_rate": 0.00019793347095171514, + "loss": 2.826, + "step": 1390 + }, + { + "epoch": 0.11225889758695827, + "grad_norm": 0.8116614818572998, + "learning_rate": 0.00019793027689121674, + "loss": 2.7096, + "step": 1391 + }, + { + "epoch": 0.11233960132354127, + "grad_norm": 0.7785829901695251, + "learning_rate": 0.00019792708039003984, + "loss": 2.748, + "step": 1392 + }, + { + "epoch": 0.11242030506012428, + "grad_norm": 0.7999277710914612, + "learning_rate": 0.0001979238814482641, + "loss": 2.7671, + "step": 1393 + }, + { + "epoch": 0.11250100879670728, + "grad_norm": 0.8862190842628479, + "learning_rate": 0.00019792068006596925, + "loss": 2.8484, + "step": 1394 + }, + { + "epoch": 0.11258171253329029, + "grad_norm": 0.8747627139091492, + "learning_rate": 0.00019791747624323512, + "loss": 2.7477, + "step": 1395 + }, + { + "epoch": 0.11266241626987329, + "grad_norm": 0.8280831575393677, + "learning_rate": 0.0001979142699801415, + "loss": 2.87, + "step": 1396 + }, + { + "epoch": 0.1127431200064563, + "grad_norm": 0.8069074153900146, + "learning_rate": 0.00019791106127676832, + "loss": 2.7724, + "step": 1397 + }, + { + "epoch": 0.1128238237430393, + "grad_norm": 0.8253301382064819, + "learning_rate": 0.00019790785013319557, + "loss": 2.7351, + "step": 1398 + }, + { + "epoch": 0.1129045274796223, + "grad_norm": 0.8298853635787964, + "learning_rate": 0.00019790463654950323, + "loss": 2.7709, + "step": 1399 + }, + { + "epoch": 0.11298523121620531, + "grad_norm": 0.7796407341957092, + "learning_rate": 0.0001979014205257715, + "loss": 2.7766, + "step": 1400 + }, + { + "epoch": 0.11306593495278831, + "grad_norm": 0.8922166228294373, + "learning_rate": 0.00019789820206208037, + "loss": 2.8473, + "step": 1401 + }, + { + "epoch": 0.11314663868937132, + "grad_norm": 0.7763219475746155, + "learning_rate": 0.00019789498115851015, + "loss": 2.8629, + "step": 1402 + }, + { + "epoch": 0.11322734242595432, + "grad_norm": 0.8679928779602051, + "learning_rate": 0.0001978917578151411, + "loss": 2.8017, + "step": 1403 + }, + { + "epoch": 0.11330804616253733, + "grad_norm": 0.8491933941841125, + "learning_rate": 0.00019788853203205357, + "loss": 2.7156, + "step": 1404 + }, + { + "epoch": 0.11338874989912033, + "grad_norm": 0.8271194696426392, + "learning_rate": 0.00019788530380932792, + "loss": 2.7892, + "step": 1405 + }, + { + "epoch": 0.11346945363570334, + "grad_norm": 0.9224163293838501, + "learning_rate": 0.00019788207314704463, + "loss": 2.7824, + "step": 1406 + }, + { + "epoch": 0.11355015737228634, + "grad_norm": 0.7662777900695801, + "learning_rate": 0.00019787884004528422, + "loss": 2.7364, + "step": 1407 + }, + { + "epoch": 0.11363086110886934, + "grad_norm": 0.8750362396240234, + "learning_rate": 0.00019787560450412728, + "loss": 2.7546, + "step": 1408 + }, + { + "epoch": 0.11371156484545235, + "grad_norm": 0.9158821105957031, + "learning_rate": 0.0001978723665236544, + "loss": 2.8304, + "step": 1409 + }, + { + "epoch": 0.11379226858203535, + "grad_norm": 0.8291050791740417, + "learning_rate": 0.0001978691261039463, + "loss": 2.758, + "step": 1410 + }, + { + "epoch": 0.11387297231861836, + "grad_norm": 0.801886796951294, + "learning_rate": 0.00019786588324508374, + "loss": 2.7805, + "step": 1411 + }, + { + "epoch": 0.11395367605520136, + "grad_norm": 0.8140222430229187, + "learning_rate": 0.00019786263794714757, + "loss": 2.8155, + "step": 1412 + }, + { + "epoch": 0.11403437979178437, + "grad_norm": 0.7747580409049988, + "learning_rate": 0.00019785939021021865, + "loss": 2.778, + "step": 1413 + }, + { + "epoch": 0.11411508352836737, + "grad_norm": 0.8954138159751892, + "learning_rate": 0.0001978561400343779, + "loss": 2.7756, + "step": 1414 + }, + { + "epoch": 0.11419578726495037, + "grad_norm": 0.9038921594619751, + "learning_rate": 0.00019785288741970634, + "loss": 2.7181, + "step": 1415 + }, + { + "epoch": 0.11427649100153336, + "grad_norm": 0.8284393548965454, + "learning_rate": 0.000197849632366285, + "loss": 2.7467, + "step": 1416 + }, + { + "epoch": 0.11435719473811637, + "grad_norm": 0.8996441960334778, + "learning_rate": 0.00019784637487419514, + "loss": 2.7918, + "step": 1417 + }, + { + "epoch": 0.11443789847469937, + "grad_norm": 0.9868448376655579, + "learning_rate": 0.00019784311494351777, + "loss": 2.7687, + "step": 1418 + }, + { + "epoch": 0.11451860221128238, + "grad_norm": 0.8491402864456177, + "learning_rate": 0.0001978398525743342, + "loss": 2.8492, + "step": 1419 + }, + { + "epoch": 0.11459930594786538, + "grad_norm": 1.06125807762146, + "learning_rate": 0.0001978365877667258, + "loss": 2.8041, + "step": 1420 + }, + { + "epoch": 0.11468000968444839, + "grad_norm": 0.8194011449813843, + "learning_rate": 0.00019783332052077386, + "loss": 2.7109, + "step": 1421 + }, + { + "epoch": 0.11476071342103139, + "grad_norm": 0.972620964050293, + "learning_rate": 0.00019783005083655984, + "loss": 2.8107, + "step": 1422 + }, + { + "epoch": 0.1148414171576144, + "grad_norm": 0.925410270690918, + "learning_rate": 0.0001978267787141652, + "loss": 2.7603, + "step": 1423 + }, + { + "epoch": 0.1149221208941974, + "grad_norm": 0.920156717300415, + "learning_rate": 0.00019782350415367152, + "loss": 2.7644, + "step": 1424 + }, + { + "epoch": 0.1150028246307804, + "grad_norm": 0.8617576360702515, + "learning_rate": 0.00019782022715516043, + "loss": 2.769, + "step": 1425 + }, + { + "epoch": 0.11508352836736341, + "grad_norm": 1.0987342596054077, + "learning_rate": 0.00019781694771871356, + "loss": 2.8224, + "step": 1426 + }, + { + "epoch": 0.11516423210394641, + "grad_norm": 0.8418076634407043, + "learning_rate": 0.00019781366584441264, + "loss": 2.7947, + "step": 1427 + }, + { + "epoch": 0.11524493584052942, + "grad_norm": 0.8010901808738708, + "learning_rate": 0.0001978103815323395, + "loss": 2.733, + "step": 1428 + }, + { + "epoch": 0.11532563957711242, + "grad_norm": 0.8649042844772339, + "learning_rate": 0.00019780709478257598, + "loss": 2.7681, + "step": 1429 + }, + { + "epoch": 0.11540634331369543, + "grad_norm": 0.7728127837181091, + "learning_rate": 0.00019780380559520397, + "loss": 2.7795, + "step": 1430 + }, + { + "epoch": 0.11548704705027843, + "grad_norm": 0.7770940065383911, + "learning_rate": 0.00019780051397030545, + "loss": 2.743, + "step": 1431 + }, + { + "epoch": 0.11556775078686143, + "grad_norm": 0.8341890573501587, + "learning_rate": 0.0001977972199079625, + "loss": 2.8047, + "step": 1432 + }, + { + "epoch": 0.11564845452344444, + "grad_norm": 0.7894187569618225, + "learning_rate": 0.00019779392340825717, + "loss": 2.7757, + "step": 1433 + }, + { + "epoch": 0.11572915826002744, + "grad_norm": 0.8002873063087463, + "learning_rate": 0.00019779062447127164, + "loss": 2.7816, + "step": 1434 + }, + { + "epoch": 0.11580986199661045, + "grad_norm": 0.8256075978279114, + "learning_rate": 0.0001977873230970881, + "loss": 2.7839, + "step": 1435 + }, + { + "epoch": 0.11589056573319345, + "grad_norm": 0.8695322871208191, + "learning_rate": 0.0001977840192857889, + "loss": 2.746, + "step": 1436 + }, + { + "epoch": 0.11597126946977646, + "grad_norm": 0.767425537109375, + "learning_rate": 0.00019778071303745628, + "loss": 2.797, + "step": 1437 + }, + { + "epoch": 0.11605197320635946, + "grad_norm": 0.8263241052627563, + "learning_rate": 0.0001977774043521727, + "loss": 2.7702, + "step": 1438 + }, + { + "epoch": 0.11613267694294246, + "grad_norm": 0.8108638525009155, + "learning_rate": 0.0001977740932300206, + "loss": 2.6981, + "step": 1439 + }, + { + "epoch": 0.11621338067952547, + "grad_norm": 0.7945007681846619, + "learning_rate": 0.00019777077967108255, + "loss": 2.7357, + "step": 1440 + }, + { + "epoch": 0.11629408441610846, + "grad_norm": 0.8480326533317566, + "learning_rate": 0.00019776746367544107, + "loss": 2.8563, + "step": 1441 + }, + { + "epoch": 0.11637478815269146, + "grad_norm": 0.8202071785926819, + "learning_rate": 0.00019776414524317882, + "loss": 2.7955, + "step": 1442 + }, + { + "epoch": 0.11645549188927447, + "grad_norm": 0.8202874660491943, + "learning_rate": 0.00019776082437437852, + "loss": 2.765, + "step": 1443 + }, + { + "epoch": 0.11653619562585747, + "grad_norm": 0.8053051829338074, + "learning_rate": 0.00019775750106912294, + "loss": 2.6866, + "step": 1444 + }, + { + "epoch": 0.11661689936244048, + "grad_norm": 0.831968367099762, + "learning_rate": 0.00019775417532749486, + "loss": 2.7022, + "step": 1445 + }, + { + "epoch": 0.11669760309902348, + "grad_norm": 0.8903129696846008, + "learning_rate": 0.00019775084714957725, + "loss": 2.7308, + "step": 1446 + }, + { + "epoch": 0.11677830683560649, + "grad_norm": 0.8178622722625732, + "learning_rate": 0.000197747516535453, + "loss": 2.7446, + "step": 1447 + }, + { + "epoch": 0.11685901057218949, + "grad_norm": 0.8270576596260071, + "learning_rate": 0.00019774418348520508, + "loss": 2.7716, + "step": 1448 + }, + { + "epoch": 0.1169397143087725, + "grad_norm": 0.7965807914733887, + "learning_rate": 0.00019774084799891662, + "loss": 2.7305, + "step": 1449 + }, + { + "epoch": 0.1170204180453555, + "grad_norm": 0.8499472737312317, + "learning_rate": 0.00019773751007667073, + "loss": 2.7584, + "step": 1450 + }, + { + "epoch": 0.1171011217819385, + "grad_norm": 0.8961663842201233, + "learning_rate": 0.0001977341697185506, + "loss": 2.7729, + "step": 1451 + }, + { + "epoch": 0.1171818255185215, + "grad_norm": 1.0203527212142944, + "learning_rate": 0.0001977308269246395, + "loss": 2.727, + "step": 1452 + }, + { + "epoch": 0.11726252925510451, + "grad_norm": 0.953289806842804, + "learning_rate": 0.0001977274816950207, + "loss": 2.8158, + "step": 1453 + }, + { + "epoch": 0.11734323299168752, + "grad_norm": 1.0064597129821777, + "learning_rate": 0.0001977241340297776, + "loss": 2.8743, + "step": 1454 + }, + { + "epoch": 0.11742393672827052, + "grad_norm": 0.8541988730430603, + "learning_rate": 0.00019772078392899363, + "loss": 2.8532, + "step": 1455 + }, + { + "epoch": 0.11750464046485352, + "grad_norm": 0.8351433873176575, + "learning_rate": 0.00019771743139275228, + "loss": 2.7749, + "step": 1456 + }, + { + "epoch": 0.11758534420143653, + "grad_norm": 0.9555812478065491, + "learning_rate": 0.00019771407642113712, + "loss": 2.7408, + "step": 1457 + }, + { + "epoch": 0.11766604793801953, + "grad_norm": 0.7943894267082214, + "learning_rate": 0.0001977107190142317, + "loss": 2.7265, + "step": 1458 + }, + { + "epoch": 0.11774675167460254, + "grad_norm": 0.8636460900306702, + "learning_rate": 0.0001977073591721198, + "loss": 2.8178, + "step": 1459 + }, + { + "epoch": 0.11782745541118554, + "grad_norm": 0.8673834800720215, + "learning_rate": 0.00019770399689488506, + "loss": 2.7928, + "step": 1460 + }, + { + "epoch": 0.11790815914776855, + "grad_norm": 0.9463722705841064, + "learning_rate": 0.00019770063218261133, + "loss": 2.7448, + "step": 1461 + }, + { + "epoch": 0.11798886288435155, + "grad_norm": 0.8429726362228394, + "learning_rate": 0.00019769726503538246, + "loss": 2.7564, + "step": 1462 + }, + { + "epoch": 0.11806956662093455, + "grad_norm": 0.9412201642990112, + "learning_rate": 0.00019769389545328236, + "loss": 2.793, + "step": 1463 + }, + { + "epoch": 0.11815027035751756, + "grad_norm": 0.9112111926078796, + "learning_rate": 0.000197690523436395, + "loss": 2.7787, + "step": 1464 + }, + { + "epoch": 0.11823097409410056, + "grad_norm": 0.8417023420333862, + "learning_rate": 0.00019768714898480444, + "loss": 2.7654, + "step": 1465 + }, + { + "epoch": 0.11831167783068357, + "grad_norm": 0.8275290727615356, + "learning_rate": 0.00019768377209859476, + "loss": 2.7914, + "step": 1466 + }, + { + "epoch": 0.11839238156726656, + "grad_norm": 0.8113142848014832, + "learning_rate": 0.00019768039277785017, + "loss": 2.7516, + "step": 1467 + }, + { + "epoch": 0.11847308530384956, + "grad_norm": 0.8655288219451904, + "learning_rate": 0.0001976770110226548, + "loss": 2.8158, + "step": 1468 + }, + { + "epoch": 0.11855378904043257, + "grad_norm": 0.8063547611236572, + "learning_rate": 0.000197673626833093, + "loss": 2.7624, + "step": 1469 + }, + { + "epoch": 0.11863449277701557, + "grad_norm": 0.843772292137146, + "learning_rate": 0.00019767024020924908, + "loss": 2.86, + "step": 1470 + }, + { + "epoch": 0.11871519651359858, + "grad_norm": 0.7942481637001038, + "learning_rate": 0.0001976668511512075, + "loss": 2.758, + "step": 1471 + }, + { + "epoch": 0.11879590025018158, + "grad_norm": 0.841275155544281, + "learning_rate": 0.00019766345965905268, + "loss": 2.8014, + "step": 1472 + }, + { + "epoch": 0.11887660398676458, + "grad_norm": 0.8003600835800171, + "learning_rate": 0.00019766006573286915, + "loss": 2.7829, + "step": 1473 + }, + { + "epoch": 0.11895730772334759, + "grad_norm": 0.8437239527702332, + "learning_rate": 0.00019765666937274147, + "loss": 2.7706, + "step": 1474 + }, + { + "epoch": 0.11903801145993059, + "grad_norm": 0.8118240833282471, + "learning_rate": 0.00019765327057875433, + "loss": 2.8185, + "step": 1475 + }, + { + "epoch": 0.1191187151965136, + "grad_norm": 0.8051649928092957, + "learning_rate": 0.00019764986935099244, + "loss": 2.7676, + "step": 1476 + }, + { + "epoch": 0.1191994189330966, + "grad_norm": 0.7786862850189209, + "learning_rate": 0.00019764646568954053, + "loss": 2.8069, + "step": 1477 + }, + { + "epoch": 0.1192801226696796, + "grad_norm": 0.8199592232704163, + "learning_rate": 0.0001976430595944834, + "loss": 2.7718, + "step": 1478 + }, + { + "epoch": 0.11936082640626261, + "grad_norm": 0.8696652054786682, + "learning_rate": 0.00019763965106590604, + "loss": 2.7682, + "step": 1479 + }, + { + "epoch": 0.11944153014284561, + "grad_norm": 0.7993931174278259, + "learning_rate": 0.00019763624010389334, + "loss": 2.7607, + "step": 1480 + }, + { + "epoch": 0.11952223387942862, + "grad_norm": 0.8107055425643921, + "learning_rate": 0.0001976328267085303, + "loss": 2.7885, + "step": 1481 + }, + { + "epoch": 0.11960293761601162, + "grad_norm": 0.8189423084259033, + "learning_rate": 0.000197629410879902, + "loss": 2.7332, + "step": 1482 + }, + { + "epoch": 0.11968364135259463, + "grad_norm": 0.9134814143180847, + "learning_rate": 0.0001976259926180936, + "loss": 2.7691, + "step": 1483 + }, + { + "epoch": 0.11976434508917763, + "grad_norm": 0.8642883896827698, + "learning_rate": 0.00019762257192319023, + "loss": 2.7876, + "step": 1484 + }, + { + "epoch": 0.11984504882576064, + "grad_norm": 0.7411352396011353, + "learning_rate": 0.0001976191487952772, + "loss": 2.7577, + "step": 1485 + }, + { + "epoch": 0.11992575256234364, + "grad_norm": 0.7741669416427612, + "learning_rate": 0.00019761572323443978, + "loss": 2.8005, + "step": 1486 + }, + { + "epoch": 0.12000645629892664, + "grad_norm": 0.8195405602455139, + "learning_rate": 0.0001976122952407634, + "loss": 2.7421, + "step": 1487 + }, + { + "epoch": 0.12008716003550965, + "grad_norm": 0.8355886936187744, + "learning_rate": 0.00019760886481433345, + "loss": 2.8156, + "step": 1488 + }, + { + "epoch": 0.12016786377209265, + "grad_norm": 0.8321093916893005, + "learning_rate": 0.00019760543195523542, + "loss": 2.7261, + "step": 1489 + }, + { + "epoch": 0.12024856750867566, + "grad_norm": 0.7792446613311768, + "learning_rate": 0.0001976019966635549, + "loss": 2.7319, + "step": 1490 + }, + { + "epoch": 0.12032927124525866, + "grad_norm": 0.770535409450531, + "learning_rate": 0.00019759855893937748, + "loss": 2.7727, + "step": 1491 + }, + { + "epoch": 0.12040997498184165, + "grad_norm": 0.8168532252311707, + "learning_rate": 0.00019759511878278887, + "loss": 2.7763, + "step": 1492 + }, + { + "epoch": 0.12049067871842466, + "grad_norm": 0.8395755290985107, + "learning_rate": 0.00019759167619387476, + "loss": 2.8382, + "step": 1493 + }, + { + "epoch": 0.12057138245500766, + "grad_norm": 0.8682762384414673, + "learning_rate": 0.00019758823117272097, + "loss": 2.8056, + "step": 1494 + }, + { + "epoch": 0.12065208619159067, + "grad_norm": 0.815192699432373, + "learning_rate": 0.00019758478371941337, + "loss": 2.7602, + "step": 1495 + }, + { + "epoch": 0.12073278992817367, + "grad_norm": 0.7919273376464844, + "learning_rate": 0.00019758133383403786, + "loss": 2.7989, + "step": 1496 + }, + { + "epoch": 0.12081349366475667, + "grad_norm": 1.004387378692627, + "learning_rate": 0.00019757788151668045, + "loss": 2.7765, + "step": 1497 + }, + { + "epoch": 0.12089419740133968, + "grad_norm": 1.0032062530517578, + "learning_rate": 0.00019757442676742715, + "loss": 2.7751, + "step": 1498 + }, + { + "epoch": 0.12097490113792268, + "grad_norm": 0.8797723054885864, + "learning_rate": 0.00019757096958636407, + "loss": 2.7798, + "step": 1499 + }, + { + "epoch": 0.12105560487450569, + "grad_norm": 0.9239820241928101, + "learning_rate": 0.0001975675099735774, + "loss": 2.7976, + "step": 1500 + }, + { + "epoch": 0.12113630861108869, + "grad_norm": 0.9903601408004761, + "learning_rate": 0.00019756404792915328, + "loss": 2.7891, + "step": 1501 + }, + { + "epoch": 0.1212170123476717, + "grad_norm": 0.8402895331382751, + "learning_rate": 0.0001975605834531781, + "loss": 2.8037, + "step": 1502 + }, + { + "epoch": 0.1212977160842547, + "grad_norm": 0.8986102342605591, + "learning_rate": 0.00019755711654573813, + "loss": 2.8375, + "step": 1503 + }, + { + "epoch": 0.1213784198208377, + "grad_norm": 0.8795471787452698, + "learning_rate": 0.0001975536472069198, + "loss": 2.7916, + "step": 1504 + }, + { + "epoch": 0.12145912355742071, + "grad_norm": 0.866278350353241, + "learning_rate": 0.00019755017543680962, + "loss": 2.7884, + "step": 1505 + }, + { + "epoch": 0.12153982729400371, + "grad_norm": 0.7877952456474304, + "learning_rate": 0.00019754670123549398, + "loss": 2.7659, + "step": 1506 + }, + { + "epoch": 0.12162053103058672, + "grad_norm": 0.857155978679657, + "learning_rate": 0.00019754322460305962, + "loss": 2.8029, + "step": 1507 + }, + { + "epoch": 0.12170123476716972, + "grad_norm": 0.8323284387588501, + "learning_rate": 0.00019753974553959314, + "loss": 2.7764, + "step": 1508 + }, + { + "epoch": 0.12178193850375273, + "grad_norm": 0.8557485938072205, + "learning_rate": 0.00019753626404518117, + "loss": 2.7448, + "step": 1509 + }, + { + "epoch": 0.12186264224033573, + "grad_norm": 0.8026818037033081, + "learning_rate": 0.00019753278011991058, + "loss": 2.7323, + "step": 1510 + }, + { + "epoch": 0.12194334597691874, + "grad_norm": 0.8578904271125793, + "learning_rate": 0.00019752929376386816, + "loss": 2.759, + "step": 1511 + }, + { + "epoch": 0.12202404971350174, + "grad_norm": 0.8617175221443176, + "learning_rate": 0.00019752580497714076, + "loss": 2.7641, + "step": 1512 + }, + { + "epoch": 0.12210475345008474, + "grad_norm": 0.8261943459510803, + "learning_rate": 0.00019752231375981538, + "loss": 2.7554, + "step": 1513 + }, + { + "epoch": 0.12218545718666775, + "grad_norm": 0.9984099268913269, + "learning_rate": 0.00019751882011197902, + "loss": 2.763, + "step": 1514 + }, + { + "epoch": 0.12226616092325075, + "grad_norm": 0.8014064431190491, + "learning_rate": 0.00019751532403371874, + "loss": 2.8083, + "step": 1515 + }, + { + "epoch": 0.12234686465983376, + "grad_norm": 0.9276653528213501, + "learning_rate": 0.0001975118255251217, + "loss": 2.8055, + "step": 1516 + }, + { + "epoch": 0.12242756839641676, + "grad_norm": 0.9365193843841553, + "learning_rate": 0.00019750832458627503, + "loss": 2.7397, + "step": 1517 + }, + { + "epoch": 0.12250827213299975, + "grad_norm": 0.8952646851539612, + "learning_rate": 0.00019750482121726605, + "loss": 2.8305, + "step": 1518 + }, + { + "epoch": 0.12258897586958276, + "grad_norm": 0.8395531177520752, + "learning_rate": 0.00019750131541818204, + "loss": 2.7852, + "step": 1519 + }, + { + "epoch": 0.12266967960616576, + "grad_norm": 0.8123572468757629, + "learning_rate": 0.0001974978071891104, + "loss": 2.831, + "step": 1520 + }, + { + "epoch": 0.12275038334274876, + "grad_norm": 0.8716141581535339, + "learning_rate": 0.00019749429653013851, + "loss": 2.8012, + "step": 1521 + }, + { + "epoch": 0.12283108707933177, + "grad_norm": 0.7848379611968994, + "learning_rate": 0.0001974907834413539, + "loss": 2.7812, + "step": 1522 + }, + { + "epoch": 0.12291179081591477, + "grad_norm": 0.834072470664978, + "learning_rate": 0.00019748726792284414, + "loss": 2.7442, + "step": 1523 + }, + { + "epoch": 0.12299249455249778, + "grad_norm": 0.8377225399017334, + "learning_rate": 0.0001974837499746968, + "loss": 2.7967, + "step": 1524 + }, + { + "epoch": 0.12307319828908078, + "grad_norm": 0.8809494376182556, + "learning_rate": 0.0001974802295969996, + "loss": 2.8042, + "step": 1525 + }, + { + "epoch": 0.12315390202566379, + "grad_norm": 0.8504741787910461, + "learning_rate": 0.00019747670678984028, + "loss": 2.7909, + "step": 1526 + }, + { + "epoch": 0.12323460576224679, + "grad_norm": 0.9444355368614197, + "learning_rate": 0.00019747318155330663, + "loss": 2.8567, + "step": 1527 + }, + { + "epoch": 0.1233153094988298, + "grad_norm": 0.859166145324707, + "learning_rate": 0.00019746965388748645, + "loss": 2.8305, + "step": 1528 + }, + { + "epoch": 0.1233960132354128, + "grad_norm": 0.8431086540222168, + "learning_rate": 0.00019746612379246777, + "loss": 2.7799, + "step": 1529 + }, + { + "epoch": 0.1234767169719958, + "grad_norm": 0.8872438669204712, + "learning_rate": 0.00019746259126833846, + "loss": 2.8413, + "step": 1530 + }, + { + "epoch": 0.12355742070857881, + "grad_norm": 0.8698925375938416, + "learning_rate": 0.0001974590563151866, + "loss": 2.8446, + "step": 1531 + }, + { + "epoch": 0.12363812444516181, + "grad_norm": 0.8926429152488708, + "learning_rate": 0.0001974555189331003, + "loss": 2.7859, + "step": 1532 + }, + { + "epoch": 0.12371882818174482, + "grad_norm": 0.8089048862457275, + "learning_rate": 0.00019745197912216775, + "loss": 2.7985, + "step": 1533 + }, + { + "epoch": 0.12379953191832782, + "grad_norm": 0.8180400729179382, + "learning_rate": 0.0001974484368824771, + "loss": 2.7587, + "step": 1534 + }, + { + "epoch": 0.12388023565491083, + "grad_norm": 0.9584212303161621, + "learning_rate": 0.00019744489221411668, + "loss": 2.766, + "step": 1535 + }, + { + "epoch": 0.12396093939149383, + "grad_norm": 0.8425920009613037, + "learning_rate": 0.00019744134511717485, + "loss": 2.8125, + "step": 1536 + }, + { + "epoch": 0.12404164312807683, + "grad_norm": 0.9109299182891846, + "learning_rate": 0.00019743779559173996, + "loss": 2.8613, + "step": 1537 + }, + { + "epoch": 0.12412234686465984, + "grad_norm": 0.8840214610099792, + "learning_rate": 0.0001974342436379005, + "loss": 2.7603, + "step": 1538 + }, + { + "epoch": 0.12420305060124284, + "grad_norm": 0.8128962516784668, + "learning_rate": 0.00019743068925574502, + "loss": 2.7593, + "step": 1539 + }, + { + "epoch": 0.12428375433782585, + "grad_norm": 0.8150052428245544, + "learning_rate": 0.00019742713244536204, + "loss": 2.8099, + "step": 1540 + }, + { + "epoch": 0.12436445807440885, + "grad_norm": 0.8442968130111694, + "learning_rate": 0.00019742357320684027, + "loss": 2.7746, + "step": 1541 + }, + { + "epoch": 0.12444516181099186, + "grad_norm": 0.9347402453422546, + "learning_rate": 0.00019742001154026838, + "loss": 2.8247, + "step": 1542 + }, + { + "epoch": 0.12452586554757485, + "grad_norm": 0.8305966854095459, + "learning_rate": 0.00019741644744573512, + "loss": 2.7398, + "step": 1543 + }, + { + "epoch": 0.12460656928415785, + "grad_norm": 0.8811129927635193, + "learning_rate": 0.00019741288092332935, + "loss": 2.8014, + "step": 1544 + }, + { + "epoch": 0.12468727302074085, + "grad_norm": 1.0287303924560547, + "learning_rate": 0.00019740931197313996, + "loss": 2.8449, + "step": 1545 + }, + { + "epoch": 0.12476797675732386, + "grad_norm": 0.8499771356582642, + "learning_rate": 0.00019740574059525588, + "loss": 2.7845, + "step": 1546 + }, + { + "epoch": 0.12484868049390686, + "grad_norm": 0.8110969066619873, + "learning_rate": 0.00019740216678976614, + "loss": 2.7565, + "step": 1547 + }, + { + "epoch": 0.12492938423048987, + "grad_norm": 0.8530771136283875, + "learning_rate": 0.00019739859055675977, + "loss": 2.8098, + "step": 1548 + }, + { + "epoch": 0.12501008796707289, + "grad_norm": 0.8483901619911194, + "learning_rate": 0.00019739501189632591, + "loss": 2.812, + "step": 1549 + }, + { + "epoch": 0.1250907917036559, + "grad_norm": 0.7894467711448669, + "learning_rate": 0.00019739143080855378, + "loss": 2.8576, + "step": 1550 + }, + { + "epoch": 0.1251714954402389, + "grad_norm": 0.8270247578620911, + "learning_rate": 0.0001973878472935326, + "loss": 2.7613, + "step": 1551 + }, + { + "epoch": 0.1252521991768219, + "grad_norm": 0.8496212959289551, + "learning_rate": 0.00019738426135135174, + "loss": 2.8375, + "step": 1552 + }, + { + "epoch": 0.1253329029134049, + "grad_norm": 0.8465524911880493, + "learning_rate": 0.00019738067298210045, + "loss": 2.8023, + "step": 1553 + }, + { + "epoch": 0.1254136066499879, + "grad_norm": 0.7843824028968811, + "learning_rate": 0.00019737708218586826, + "loss": 2.7424, + "step": 1554 + }, + { + "epoch": 0.1254943103865709, + "grad_norm": 0.8310040235519409, + "learning_rate": 0.00019737348896274462, + "loss": 2.7608, + "step": 1555 + }, + { + "epoch": 0.1255750141231539, + "grad_norm": 0.7895017266273499, + "learning_rate": 0.00019736989331281914, + "loss": 2.7549, + "step": 1556 + }, + { + "epoch": 0.1256557178597369, + "grad_norm": 0.8140431642532349, + "learning_rate": 0.00019736629523618138, + "loss": 2.802, + "step": 1557 + }, + { + "epoch": 0.1257364215963199, + "grad_norm": 0.8026889562606812, + "learning_rate": 0.000197362694732921, + "loss": 2.7758, + "step": 1558 + }, + { + "epoch": 0.1258171253329029, + "grad_norm": 0.8018048405647278, + "learning_rate": 0.0001973590918031278, + "loss": 2.7729, + "step": 1559 + }, + { + "epoch": 0.1258978290694859, + "grad_norm": 0.8394612073898315, + "learning_rate": 0.00019735548644689147, + "loss": 2.7692, + "step": 1560 + }, + { + "epoch": 0.1259785328060689, + "grad_norm": 0.819804310798645, + "learning_rate": 0.00019735187866430198, + "loss": 2.6933, + "step": 1561 + }, + { + "epoch": 0.12605923654265191, + "grad_norm": 0.8094257116317749, + "learning_rate": 0.0001973482684554492, + "loss": 2.7722, + "step": 1562 + }, + { + "epoch": 0.12613994027923492, + "grad_norm": 0.8647315502166748, + "learning_rate": 0.00019734465582042305, + "loss": 2.787, + "step": 1563 + }, + { + "epoch": 0.12622064401581792, + "grad_norm": 0.8439335823059082, + "learning_rate": 0.00019734104075931367, + "loss": 2.8, + "step": 1564 + }, + { + "epoch": 0.12630134775240093, + "grad_norm": 0.852480947971344, + "learning_rate": 0.00019733742327221105, + "loss": 2.8656, + "step": 1565 + }, + { + "epoch": 0.12638205148898393, + "grad_norm": 0.813846230506897, + "learning_rate": 0.00019733380335920542, + "loss": 2.7733, + "step": 1566 + }, + { + "epoch": 0.12646275522556694, + "grad_norm": 0.7860896587371826, + "learning_rate": 0.00019733018102038698, + "loss": 2.8201, + "step": 1567 + }, + { + "epoch": 0.12654345896214994, + "grad_norm": 0.7857748866081238, + "learning_rate": 0.00019732655625584602, + "loss": 2.8726, + "step": 1568 + }, + { + "epoch": 0.12662416269873294, + "grad_norm": 0.8152899146080017, + "learning_rate": 0.00019732292906567286, + "loss": 2.7738, + "step": 1569 + }, + { + "epoch": 0.12670486643531595, + "grad_norm": 0.8281696438789368, + "learning_rate": 0.00019731929944995788, + "loss": 2.7966, + "step": 1570 + }, + { + "epoch": 0.12678557017189895, + "grad_norm": 0.8070773482322693, + "learning_rate": 0.00019731566740879158, + "loss": 2.6988, + "step": 1571 + }, + { + "epoch": 0.12686627390848196, + "grad_norm": 0.7859680652618408, + "learning_rate": 0.00019731203294226445, + "loss": 2.7241, + "step": 1572 + }, + { + "epoch": 0.12694697764506496, + "grad_norm": 0.7753982543945312, + "learning_rate": 0.0001973083960504671, + "loss": 2.7621, + "step": 1573 + }, + { + "epoch": 0.12702768138164797, + "grad_norm": 0.8063471913337708, + "learning_rate": 0.00019730475673349014, + "loss": 2.7298, + "step": 1574 + }, + { + "epoch": 0.12710838511823097, + "grad_norm": 0.7943962812423706, + "learning_rate": 0.0001973011149914243, + "loss": 2.7714, + "step": 1575 + }, + { + "epoch": 0.12718908885481398, + "grad_norm": 0.8297483325004578, + "learning_rate": 0.00019729747082436033, + "loss": 2.7743, + "step": 1576 + }, + { + "epoch": 0.12726979259139698, + "grad_norm": 0.8728111386299133, + "learning_rate": 0.000197293824232389, + "loss": 2.8251, + "step": 1577 + }, + { + "epoch": 0.12735049632797998, + "grad_norm": 0.8762480020523071, + "learning_rate": 0.00019729017521560128, + "loss": 2.8036, + "step": 1578 + }, + { + "epoch": 0.127431200064563, + "grad_norm": 0.9266185164451599, + "learning_rate": 0.00019728652377408806, + "loss": 2.7335, + "step": 1579 + }, + { + "epoch": 0.127511903801146, + "grad_norm": 0.9289839267730713, + "learning_rate": 0.00019728286990794037, + "loss": 2.7715, + "step": 1580 + }, + { + "epoch": 0.127592607537729, + "grad_norm": 0.8811823725700378, + "learning_rate": 0.0001972792136172493, + "loss": 2.7389, + "step": 1581 + }, + { + "epoch": 0.127673311274312, + "grad_norm": 0.8174294233322144, + "learning_rate": 0.00019727555490210588, + "loss": 2.7483, + "step": 1582 + }, + { + "epoch": 0.127754015010895, + "grad_norm": 0.8254107236862183, + "learning_rate": 0.00019727189376260137, + "loss": 2.7897, + "step": 1583 + }, + { + "epoch": 0.127834718747478, + "grad_norm": 0.8478763699531555, + "learning_rate": 0.000197268230198827, + "loss": 2.7394, + "step": 1584 + }, + { + "epoch": 0.12791542248406101, + "grad_norm": 0.8356192111968994, + "learning_rate": 0.00019726456421087404, + "loss": 2.7518, + "step": 1585 + }, + { + "epoch": 0.12799612622064402, + "grad_norm": 0.8523107767105103, + "learning_rate": 0.00019726089579883392, + "loss": 2.7893, + "step": 1586 + }, + { + "epoch": 0.12807682995722702, + "grad_norm": 0.9048579931259155, + "learning_rate": 0.00019725722496279804, + "loss": 2.7488, + "step": 1587 + }, + { + "epoch": 0.12815753369381003, + "grad_norm": 0.8242251873016357, + "learning_rate": 0.00019725355170285787, + "loss": 2.7544, + "step": 1588 + }, + { + "epoch": 0.12823823743039303, + "grad_norm": 0.8343983888626099, + "learning_rate": 0.00019724987601910497, + "loss": 2.7317, + "step": 1589 + }, + { + "epoch": 0.12831894116697604, + "grad_norm": 0.8084509372711182, + "learning_rate": 0.00019724619791163095, + "loss": 2.7822, + "step": 1590 + }, + { + "epoch": 0.12839964490355904, + "grad_norm": 0.8397380113601685, + "learning_rate": 0.00019724251738052745, + "loss": 2.8188, + "step": 1591 + }, + { + "epoch": 0.12848034864014204, + "grad_norm": 0.8558558821678162, + "learning_rate": 0.00019723883442588624, + "loss": 2.7623, + "step": 1592 + }, + { + "epoch": 0.12856105237672505, + "grad_norm": 0.7602639198303223, + "learning_rate": 0.0001972351490477991, + "loss": 2.7932, + "step": 1593 + }, + { + "epoch": 0.12864175611330805, + "grad_norm": 0.8379851579666138, + "learning_rate": 0.00019723146124635786, + "loss": 2.8296, + "step": 1594 + }, + { + "epoch": 0.12872245984989106, + "grad_norm": 0.8454548716545105, + "learning_rate": 0.00019722777102165444, + "loss": 2.8192, + "step": 1595 + }, + { + "epoch": 0.12880316358647406, + "grad_norm": 0.8344082832336426, + "learning_rate": 0.0001972240783737808, + "loss": 2.7628, + "step": 1596 + }, + { + "epoch": 0.12888386732305707, + "grad_norm": 0.809093713760376, + "learning_rate": 0.000197220383302829, + "loss": 2.8055, + "step": 1597 + }, + { + "epoch": 0.12896457105964007, + "grad_norm": 0.7909694910049438, + "learning_rate": 0.0001972166858088911, + "loss": 2.7292, + "step": 1598 + }, + { + "epoch": 0.12904527479622308, + "grad_norm": 0.8350280523300171, + "learning_rate": 0.00019721298589205928, + "loss": 2.7671, + "step": 1599 + }, + { + "epoch": 0.12912597853280608, + "grad_norm": 0.7857616543769836, + "learning_rate": 0.00019720928355242568, + "loss": 2.729, + "step": 1600 + }, + { + "epoch": 0.12920668226938908, + "grad_norm": 0.7899746298789978, + "learning_rate": 0.0001972055787900827, + "loss": 2.8023, + "step": 1601 + }, + { + "epoch": 0.1292873860059721, + "grad_norm": 0.8604246377944946, + "learning_rate": 0.00019720187160512256, + "loss": 2.749, + "step": 1602 + }, + { + "epoch": 0.1293680897425551, + "grad_norm": 0.8517864942550659, + "learning_rate": 0.0001971981619976377, + "loss": 2.7203, + "step": 1603 + }, + { + "epoch": 0.1294487934791381, + "grad_norm": 0.8860471248626709, + "learning_rate": 0.00019719444996772056, + "loss": 2.7372, + "step": 1604 + }, + { + "epoch": 0.1295294972157211, + "grad_norm": 0.8355888724327087, + "learning_rate": 0.00019719073551546367, + "loss": 2.7284, + "step": 1605 + }, + { + "epoch": 0.1296102009523041, + "grad_norm": 0.7998479604721069, + "learning_rate": 0.00019718701864095955, + "loss": 2.7726, + "step": 1606 + }, + { + "epoch": 0.12969090468888708, + "grad_norm": 0.8564549088478088, + "learning_rate": 0.00019718329934430092, + "loss": 2.7334, + "step": 1607 + }, + { + "epoch": 0.1297716084254701, + "grad_norm": 0.8594443798065186, + "learning_rate": 0.00019717957762558044, + "loss": 2.7865, + "step": 1608 + }, + { + "epoch": 0.1298523121620531, + "grad_norm": 0.804553210735321, + "learning_rate": 0.00019717585348489082, + "loss": 2.8094, + "step": 1609 + }, + { + "epoch": 0.1299330158986361, + "grad_norm": 0.7892553806304932, + "learning_rate": 0.0001971721269223249, + "loss": 2.7969, + "step": 1610 + }, + { + "epoch": 0.1300137196352191, + "grad_norm": 0.8703331351280212, + "learning_rate": 0.0001971683979379756, + "loss": 2.8192, + "step": 1611 + }, + { + "epoch": 0.1300944233718021, + "grad_norm": 0.8176589012145996, + "learning_rate": 0.00019716466653193582, + "loss": 2.7902, + "step": 1612 + }, + { + "epoch": 0.1301751271083851, + "grad_norm": 0.8305137157440186, + "learning_rate": 0.00019716093270429855, + "loss": 2.8202, + "step": 1613 + }, + { + "epoch": 0.1302558308449681, + "grad_norm": 0.8261505365371704, + "learning_rate": 0.00019715719645515688, + "loss": 2.7905, + "step": 1614 + }, + { + "epoch": 0.13033653458155112, + "grad_norm": 0.9465535879135132, + "learning_rate": 0.00019715345778460389, + "loss": 2.7965, + "step": 1615 + }, + { + "epoch": 0.13041723831813412, + "grad_norm": 0.8847100138664246, + "learning_rate": 0.00019714971669273275, + "loss": 2.8177, + "step": 1616 + }, + { + "epoch": 0.13049794205471713, + "grad_norm": 0.9768328666687012, + "learning_rate": 0.0001971459731796367, + "loss": 2.7668, + "step": 1617 + }, + { + "epoch": 0.13057864579130013, + "grad_norm": 0.7498586177825928, + "learning_rate": 0.0001971422272454091, + "loss": 2.761, + "step": 1618 + }, + { + "epoch": 0.13065934952788313, + "grad_norm": 1.0455373525619507, + "learning_rate": 0.00019713847889014325, + "loss": 2.7652, + "step": 1619 + }, + { + "epoch": 0.13074005326446614, + "grad_norm": 0.8484631180763245, + "learning_rate": 0.00019713472811393258, + "loss": 2.7858, + "step": 1620 + }, + { + "epoch": 0.13082075700104914, + "grad_norm": 0.8190686702728271, + "learning_rate": 0.00019713097491687057, + "loss": 2.7217, + "step": 1621 + }, + { + "epoch": 0.13090146073763215, + "grad_norm": 0.8866000175476074, + "learning_rate": 0.00019712721929905077, + "loss": 2.7868, + "step": 1622 + }, + { + "epoch": 0.13098216447421515, + "grad_norm": 0.8026713132858276, + "learning_rate": 0.00019712346126056677, + "loss": 2.7276, + "step": 1623 + }, + { + "epoch": 0.13106286821079816, + "grad_norm": 0.8306462168693542, + "learning_rate": 0.00019711970080151225, + "loss": 2.7747, + "step": 1624 + }, + { + "epoch": 0.13114357194738116, + "grad_norm": 0.8276618123054504, + "learning_rate": 0.0001971159379219809, + "loss": 2.7146, + "step": 1625 + }, + { + "epoch": 0.13122427568396416, + "grad_norm": 0.9749011993408203, + "learning_rate": 0.00019711217262206648, + "loss": 2.8731, + "step": 1626 + }, + { + "epoch": 0.13130497942054717, + "grad_norm": 0.828484058380127, + "learning_rate": 0.00019710840490186292, + "loss": 2.803, + "step": 1627 + }, + { + "epoch": 0.13138568315713017, + "grad_norm": 0.8095957636833191, + "learning_rate": 0.00019710463476146402, + "loss": 2.7751, + "step": 1628 + }, + { + "epoch": 0.13146638689371318, + "grad_norm": 0.8731853365898132, + "learning_rate": 0.0001971008622009638, + "loss": 2.8274, + "step": 1629 + }, + { + "epoch": 0.13154709063029618, + "grad_norm": 0.8180200457572937, + "learning_rate": 0.00019709708722045628, + "loss": 2.813, + "step": 1630 + }, + { + "epoch": 0.13162779436687919, + "grad_norm": 0.7740067839622498, + "learning_rate": 0.00019709330982003553, + "loss": 2.7319, + "step": 1631 + }, + { + "epoch": 0.1317084981034622, + "grad_norm": 0.8439326882362366, + "learning_rate": 0.0001970895299997957, + "loss": 2.8182, + "step": 1632 + }, + { + "epoch": 0.1317892018400452, + "grad_norm": 0.8254802823066711, + "learning_rate": 0.000197085747759831, + "loss": 2.7874, + "step": 1633 + }, + { + "epoch": 0.1318699055766282, + "grad_norm": 0.8128175139427185, + "learning_rate": 0.00019708196310023562, + "loss": 2.8125, + "step": 1634 + }, + { + "epoch": 0.1319506093132112, + "grad_norm": 0.8664820790290833, + "learning_rate": 0.00019707817602110402, + "loss": 2.8446, + "step": 1635 + }, + { + "epoch": 0.1320313130497942, + "grad_norm": 0.8101332783699036, + "learning_rate": 0.00019707438652253044, + "loss": 2.8027, + "step": 1636 + }, + { + "epoch": 0.1321120167863772, + "grad_norm": 0.8296725153923035, + "learning_rate": 0.00019707059460460945, + "loss": 2.7677, + "step": 1637 + }, + { + "epoch": 0.13219272052296022, + "grad_norm": 0.7321150898933411, + "learning_rate": 0.0001970668002674355, + "loss": 2.6991, + "step": 1638 + }, + { + "epoch": 0.13227342425954322, + "grad_norm": 0.8321375250816345, + "learning_rate": 0.0001970630035111031, + "loss": 2.6948, + "step": 1639 + }, + { + "epoch": 0.13235412799612623, + "grad_norm": 0.7622714042663574, + "learning_rate": 0.00019705920433570694, + "loss": 2.6957, + "step": 1640 + }, + { + "epoch": 0.13243483173270923, + "grad_norm": 0.8413416147232056, + "learning_rate": 0.00019705540274134173, + "loss": 2.7277, + "step": 1641 + }, + { + "epoch": 0.13251553546929223, + "grad_norm": 0.8798941373825073, + "learning_rate": 0.00019705159872810218, + "loss": 2.7699, + "step": 1642 + }, + { + "epoch": 0.13259623920587524, + "grad_norm": 0.788287341594696, + "learning_rate": 0.00019704779229608304, + "loss": 2.7933, + "step": 1643 + }, + { + "epoch": 0.13267694294245824, + "grad_norm": 0.8547430634498596, + "learning_rate": 0.00019704398344537927, + "loss": 2.7706, + "step": 1644 + }, + { + "epoch": 0.13275764667904125, + "grad_norm": 0.8474008440971375, + "learning_rate": 0.00019704017217608575, + "loss": 2.8005, + "step": 1645 + }, + { + "epoch": 0.13283835041562425, + "grad_norm": 0.8636945486068726, + "learning_rate": 0.00019703635848829747, + "loss": 2.8241, + "step": 1646 + }, + { + "epoch": 0.13291905415220726, + "grad_norm": 0.8158168792724609, + "learning_rate": 0.00019703254238210947, + "loss": 2.7576, + "step": 1647 + }, + { + "epoch": 0.13299975788879026, + "grad_norm": 0.8420887589454651, + "learning_rate": 0.0001970287238576169, + "loss": 2.7677, + "step": 1648 + }, + { + "epoch": 0.13308046162537326, + "grad_norm": 0.7910059690475464, + "learning_rate": 0.00019702490291491486, + "loss": 2.7807, + "step": 1649 + }, + { + "epoch": 0.13316116536195627, + "grad_norm": 0.8308143615722656, + "learning_rate": 0.00019702107955409863, + "loss": 2.7698, + "step": 1650 + }, + { + "epoch": 0.13324186909853927, + "grad_norm": 0.8215764760971069, + "learning_rate": 0.00019701725377526349, + "loss": 2.8263, + "step": 1651 + }, + { + "epoch": 0.13332257283512228, + "grad_norm": 0.8780504465103149, + "learning_rate": 0.00019701342557850476, + "loss": 2.8032, + "step": 1652 + }, + { + "epoch": 0.13340327657170528, + "grad_norm": 0.8125136494636536, + "learning_rate": 0.0001970095949639179, + "loss": 2.8317, + "step": 1653 + }, + { + "epoch": 0.13348398030828829, + "grad_norm": 0.8170902132987976, + "learning_rate": 0.00019700576193159831, + "loss": 2.7528, + "step": 1654 + }, + { + "epoch": 0.1335646840448713, + "grad_norm": 0.8318637013435364, + "learning_rate": 0.00019700192648164157, + "loss": 2.7963, + "step": 1655 + }, + { + "epoch": 0.1336453877814543, + "grad_norm": 0.8445270657539368, + "learning_rate": 0.00019699808861414327, + "loss": 2.772, + "step": 1656 + }, + { + "epoch": 0.1337260915180373, + "grad_norm": 0.7908959984779358, + "learning_rate": 0.00019699424832919906, + "loss": 2.7528, + "step": 1657 + }, + { + "epoch": 0.13380679525462028, + "grad_norm": 0.8153900504112244, + "learning_rate": 0.00019699040562690462, + "loss": 2.7643, + "step": 1658 + }, + { + "epoch": 0.13388749899120328, + "grad_norm": 0.86302250623703, + "learning_rate": 0.0001969865605073557, + "loss": 2.8037, + "step": 1659 + }, + { + "epoch": 0.13396820272778628, + "grad_norm": 0.8373419046401978, + "learning_rate": 0.0001969827129706482, + "loss": 2.7647, + "step": 1660 + }, + { + "epoch": 0.1340489064643693, + "grad_norm": 0.8166481852531433, + "learning_rate": 0.00019697886301687798, + "loss": 2.8333, + "step": 1661 + }, + { + "epoch": 0.1341296102009523, + "grad_norm": 0.7807812094688416, + "learning_rate": 0.00019697501064614098, + "loss": 2.7495, + "step": 1662 + }, + { + "epoch": 0.1342103139375353, + "grad_norm": 0.8375338315963745, + "learning_rate": 0.00019697115585853324, + "loss": 2.7518, + "step": 1663 + }, + { + "epoch": 0.1342910176741183, + "grad_norm": 0.7392182350158691, + "learning_rate": 0.00019696729865415077, + "loss": 2.758, + "step": 1664 + }, + { + "epoch": 0.1343717214107013, + "grad_norm": 0.8041971921920776, + "learning_rate": 0.00019696343903308978, + "loss": 2.7485, + "step": 1665 + }, + { + "epoch": 0.1344524251472843, + "grad_norm": 0.789310097694397, + "learning_rate": 0.00019695957699544643, + "loss": 2.8179, + "step": 1666 + }, + { + "epoch": 0.13453312888386731, + "grad_norm": 0.7643609642982483, + "learning_rate": 0.00019695571254131693, + "loss": 2.7791, + "step": 1667 + }, + { + "epoch": 0.13461383262045032, + "grad_norm": 0.8284661769866943, + "learning_rate": 0.00019695184567079766, + "loss": 2.717, + "step": 1668 + }, + { + "epoch": 0.13469453635703332, + "grad_norm": 0.7620903253555298, + "learning_rate": 0.00019694797638398494, + "loss": 2.7808, + "step": 1669 + }, + { + "epoch": 0.13477524009361633, + "grad_norm": 0.9123913645744324, + "learning_rate": 0.00019694410468097524, + "loss": 2.7648, + "step": 1670 + }, + { + "epoch": 0.13485594383019933, + "grad_norm": 0.735518217086792, + "learning_rate": 0.000196940230561865, + "loss": 2.7653, + "step": 1671 + }, + { + "epoch": 0.13493664756678234, + "grad_norm": 0.8363413214683533, + "learning_rate": 0.00019693635402675085, + "loss": 2.766, + "step": 1672 + }, + { + "epoch": 0.13501735130336534, + "grad_norm": 0.8206491470336914, + "learning_rate": 0.00019693247507572936, + "loss": 2.7829, + "step": 1673 + }, + { + "epoch": 0.13509805503994834, + "grad_norm": 0.7726099491119385, + "learning_rate": 0.0001969285937088972, + "loss": 2.7381, + "step": 1674 + }, + { + "epoch": 0.13517875877653135, + "grad_norm": 0.8970316052436829, + "learning_rate": 0.0001969247099263511, + "loss": 2.7836, + "step": 1675 + }, + { + "epoch": 0.13525946251311435, + "grad_norm": 0.7966172099113464, + "learning_rate": 0.00019692082372818788, + "loss": 2.7135, + "step": 1676 + }, + { + "epoch": 0.13534016624969736, + "grad_norm": 0.8583024740219116, + "learning_rate": 0.00019691693511450438, + "loss": 2.7908, + "step": 1677 + }, + { + "epoch": 0.13542086998628036, + "grad_norm": 0.9430457353591919, + "learning_rate": 0.0001969130440853975, + "loss": 2.7311, + "step": 1678 + }, + { + "epoch": 0.13550157372286337, + "grad_norm": 0.8066009879112244, + "learning_rate": 0.00019690915064096424, + "loss": 2.7039, + "step": 1679 + }, + { + "epoch": 0.13558227745944637, + "grad_norm": 1.0169655084609985, + "learning_rate": 0.0001969052547813016, + "loss": 2.7832, + "step": 1680 + }, + { + "epoch": 0.13566298119602938, + "grad_norm": 0.8606080412864685, + "learning_rate": 0.00019690135650650672, + "loss": 2.751, + "step": 1681 + }, + { + "epoch": 0.13574368493261238, + "grad_norm": 0.8625333905220032, + "learning_rate": 0.00019689745581667674, + "loss": 2.761, + "step": 1682 + }, + { + "epoch": 0.13582438866919538, + "grad_norm": 0.9304285645484924, + "learning_rate": 0.00019689355271190886, + "loss": 2.7566, + "step": 1683 + }, + { + "epoch": 0.1359050924057784, + "grad_norm": 0.793397068977356, + "learning_rate": 0.00019688964719230035, + "loss": 2.7648, + "step": 1684 + }, + { + "epoch": 0.1359857961423614, + "grad_norm": 0.8496749401092529, + "learning_rate": 0.00019688573925794858, + "loss": 2.7461, + "step": 1685 + }, + { + "epoch": 0.1360664998789444, + "grad_norm": 0.7807914018630981, + "learning_rate": 0.0001968818289089509, + "loss": 2.8266, + "step": 1686 + }, + { + "epoch": 0.1361472036155274, + "grad_norm": 0.8186607956886292, + "learning_rate": 0.0001968779161454048, + "loss": 2.8447, + "step": 1687 + }, + { + "epoch": 0.1362279073521104, + "grad_norm": 0.8007118701934814, + "learning_rate": 0.0001968740009674078, + "loss": 2.7888, + "step": 1688 + }, + { + "epoch": 0.1363086110886934, + "grad_norm": 0.8735570311546326, + "learning_rate": 0.00019687008337505749, + "loss": 2.7152, + "step": 1689 + }, + { + "epoch": 0.13638931482527641, + "grad_norm": 0.8546476364135742, + "learning_rate": 0.00019686616336845144, + "loss": 2.8113, + "step": 1690 + }, + { + "epoch": 0.13647001856185942, + "grad_norm": 0.9156736135482788, + "learning_rate": 0.0001968622409476874, + "loss": 2.7561, + "step": 1691 + }, + { + "epoch": 0.13655072229844242, + "grad_norm": 0.8091925382614136, + "learning_rate": 0.0001968583161128631, + "loss": 2.7384, + "step": 1692 + }, + { + "epoch": 0.13663142603502543, + "grad_norm": 0.7871039509773254, + "learning_rate": 0.0001968543888640764, + "loss": 2.7138, + "step": 1693 + }, + { + "epoch": 0.13671212977160843, + "grad_norm": 0.9537062048912048, + "learning_rate": 0.00019685045920142516, + "loss": 2.7726, + "step": 1694 + }, + { + "epoch": 0.13679283350819144, + "grad_norm": 0.8663280010223389, + "learning_rate": 0.00019684652712500728, + "loss": 2.7509, + "step": 1695 + }, + { + "epoch": 0.13687353724477444, + "grad_norm": 0.8717214465141296, + "learning_rate": 0.0001968425926349208, + "loss": 2.791, + "step": 1696 + }, + { + "epoch": 0.13695424098135744, + "grad_norm": 0.8942584991455078, + "learning_rate": 0.00019683865573126374, + "loss": 2.77, + "step": 1697 + }, + { + "epoch": 0.13703494471794045, + "grad_norm": 0.8243421316146851, + "learning_rate": 0.00019683471641413424, + "loss": 2.8063, + "step": 1698 + }, + { + "epoch": 0.13711564845452345, + "grad_norm": 0.8618699908256531, + "learning_rate": 0.0001968307746836305, + "loss": 2.6872, + "step": 1699 + }, + { + "epoch": 0.13719635219110646, + "grad_norm": 0.7931695580482483, + "learning_rate": 0.00019682683053985072, + "loss": 2.7495, + "step": 1700 + }, + { + "epoch": 0.13727705592768946, + "grad_norm": 0.7549482583999634, + "learning_rate": 0.00019682288398289324, + "loss": 2.7543, + "step": 1701 + }, + { + "epoch": 0.13735775966427247, + "grad_norm": 0.7953789234161377, + "learning_rate": 0.00019681893501285636, + "loss": 2.6895, + "step": 1702 + }, + { + "epoch": 0.13743846340085547, + "grad_norm": 0.7916574478149414, + "learning_rate": 0.00019681498362983857, + "loss": 2.819, + "step": 1703 + }, + { + "epoch": 0.13751916713743847, + "grad_norm": 0.7986735105514526, + "learning_rate": 0.0001968110298339383, + "loss": 2.8062, + "step": 1704 + }, + { + "epoch": 0.13759987087402148, + "grad_norm": 0.8601658940315247, + "learning_rate": 0.00019680707362525407, + "loss": 2.7625, + "step": 1705 + }, + { + "epoch": 0.13768057461060448, + "grad_norm": 0.8888362050056458, + "learning_rate": 0.00019680311500388454, + "loss": 2.7747, + "step": 1706 + }, + { + "epoch": 0.1377612783471875, + "grad_norm": 0.7762896418571472, + "learning_rate": 0.00019679915396992833, + "loss": 2.7959, + "step": 1707 + }, + { + "epoch": 0.1378419820837705, + "grad_norm": 0.8942253589630127, + "learning_rate": 0.00019679519052348416, + "loss": 2.7717, + "step": 1708 + }, + { + "epoch": 0.13792268582035347, + "grad_norm": 0.8388909697532654, + "learning_rate": 0.00019679122466465082, + "loss": 2.7448, + "step": 1709 + }, + { + "epoch": 0.13800338955693647, + "grad_norm": 0.8826024532318115, + "learning_rate": 0.00019678725639352712, + "loss": 2.7307, + "step": 1710 + }, + { + "epoch": 0.13808409329351948, + "grad_norm": 0.8972313404083252, + "learning_rate": 0.00019678328571021204, + "loss": 2.7619, + "step": 1711 + }, + { + "epoch": 0.13816479703010248, + "grad_norm": 0.9373044371604919, + "learning_rate": 0.00019677931261480444, + "loss": 2.7664, + "step": 1712 + }, + { + "epoch": 0.1382455007666855, + "grad_norm": 0.8060994148254395, + "learning_rate": 0.00019677533710740343, + "loss": 2.7707, + "step": 1713 + }, + { + "epoch": 0.1383262045032685, + "grad_norm": 0.8324100971221924, + "learning_rate": 0.000196771359188108, + "loss": 2.8249, + "step": 1714 + }, + { + "epoch": 0.1384069082398515, + "grad_norm": 0.879176676273346, + "learning_rate": 0.00019676737885701738, + "loss": 2.7767, + "step": 1715 + }, + { + "epoch": 0.1384876119764345, + "grad_norm": 0.8823966979980469, + "learning_rate": 0.0001967633961142307, + "loss": 2.791, + "step": 1716 + }, + { + "epoch": 0.1385683157130175, + "grad_norm": 0.8176039457321167, + "learning_rate": 0.00019675941095984728, + "loss": 2.8225, + "step": 1717 + }, + { + "epoch": 0.1386490194496005, + "grad_norm": 0.8005076050758362, + "learning_rate": 0.00019675542339396635, + "loss": 2.8175, + "step": 1718 + }, + { + "epoch": 0.1387297231861835, + "grad_norm": 0.800854504108429, + "learning_rate": 0.0001967514334166874, + "loss": 2.8226, + "step": 1719 + }, + { + "epoch": 0.13881042692276652, + "grad_norm": 0.7941261529922485, + "learning_rate": 0.00019674744102810978, + "loss": 2.7488, + "step": 1720 + }, + { + "epoch": 0.13889113065934952, + "grad_norm": 0.7955947518348694, + "learning_rate": 0.00019674344622833302, + "loss": 2.7749, + "step": 1721 + }, + { + "epoch": 0.13897183439593253, + "grad_norm": 0.8353856205940247, + "learning_rate": 0.00019673944901745674, + "loss": 2.7982, + "step": 1722 + }, + { + "epoch": 0.13905253813251553, + "grad_norm": 0.8711503744125366, + "learning_rate": 0.00019673544939558047, + "loss": 2.8007, + "step": 1723 + }, + { + "epoch": 0.13913324186909853, + "grad_norm": 0.8525274991989136, + "learning_rate": 0.00019673144736280396, + "loss": 2.7423, + "step": 1724 + }, + { + "epoch": 0.13921394560568154, + "grad_norm": 0.8143991231918335, + "learning_rate": 0.0001967274429192269, + "loss": 2.7752, + "step": 1725 + }, + { + "epoch": 0.13929464934226454, + "grad_norm": 0.8508228063583374, + "learning_rate": 0.00019672343606494912, + "loss": 2.7422, + "step": 1726 + }, + { + "epoch": 0.13937535307884755, + "grad_norm": 0.8320932984352112, + "learning_rate": 0.0001967194268000705, + "loss": 2.7598, + "step": 1727 + }, + { + "epoch": 0.13945605681543055, + "grad_norm": 0.8233908414840698, + "learning_rate": 0.00019671541512469092, + "loss": 2.7834, + "step": 1728 + }, + { + "epoch": 0.13953676055201356, + "grad_norm": 0.8097162246704102, + "learning_rate": 0.00019671140103891038, + "loss": 2.7856, + "step": 1729 + }, + { + "epoch": 0.13961746428859656, + "grad_norm": 0.9043141007423401, + "learning_rate": 0.0001967073845428289, + "loss": 2.8047, + "step": 1730 + }, + { + "epoch": 0.13969816802517956, + "grad_norm": 0.9118517637252808, + "learning_rate": 0.00019670336563654662, + "loss": 2.789, + "step": 1731 + }, + { + "epoch": 0.13977887176176257, + "grad_norm": 0.8016074895858765, + "learning_rate": 0.00019669934432016368, + "loss": 2.7506, + "step": 1732 + }, + { + "epoch": 0.13985957549834557, + "grad_norm": 0.8376848697662354, + "learning_rate": 0.0001966953205937803, + "loss": 2.7832, + "step": 1733 + }, + { + "epoch": 0.13994027923492858, + "grad_norm": 0.8511834144592285, + "learning_rate": 0.0001966912944574968, + "loss": 2.7564, + "step": 1734 + }, + { + "epoch": 0.14002098297151158, + "grad_norm": 0.7796351909637451, + "learning_rate": 0.00019668726591141344, + "loss": 2.7489, + "step": 1735 + }, + { + "epoch": 0.14010168670809459, + "grad_norm": 0.8204767107963562, + "learning_rate": 0.00019668323495563068, + "loss": 2.7634, + "step": 1736 + }, + { + "epoch": 0.1401823904446776, + "grad_norm": 0.9049975872039795, + "learning_rate": 0.000196679201590249, + "loss": 2.7863, + "step": 1737 + }, + { + "epoch": 0.1402630941812606, + "grad_norm": 0.7473673224449158, + "learning_rate": 0.0001966751658153689, + "loss": 2.7557, + "step": 1738 + }, + { + "epoch": 0.1403437979178436, + "grad_norm": 0.7765525579452515, + "learning_rate": 0.0001966711276310909, + "loss": 2.7865, + "step": 1739 + }, + { + "epoch": 0.1404245016544266, + "grad_norm": 0.8766517043113708, + "learning_rate": 0.00019666708703751576, + "loss": 2.7873, + "step": 1740 + }, + { + "epoch": 0.1405052053910096, + "grad_norm": 0.8351505994796753, + "learning_rate": 0.00019666304403474408, + "loss": 2.7355, + "step": 1741 + }, + { + "epoch": 0.1405859091275926, + "grad_norm": 0.7612324953079224, + "learning_rate": 0.00019665899862287667, + "loss": 2.7608, + "step": 1742 + }, + { + "epoch": 0.14066661286417562, + "grad_norm": 0.894249439239502, + "learning_rate": 0.00019665495080201434, + "loss": 2.7469, + "step": 1743 + }, + { + "epoch": 0.14074731660075862, + "grad_norm": 0.8528907895088196, + "learning_rate": 0.00019665090057225803, + "loss": 2.773, + "step": 1744 + }, + { + "epoch": 0.14082802033734163, + "grad_norm": 0.7718498706817627, + "learning_rate": 0.00019664684793370855, + "loss": 2.8045, + "step": 1745 + }, + { + "epoch": 0.14090872407392463, + "grad_norm": 0.8013718128204346, + "learning_rate": 0.00019664279288646706, + "loss": 2.7665, + "step": 1746 + }, + { + "epoch": 0.14098942781050763, + "grad_norm": 0.828803539276123, + "learning_rate": 0.00019663873543063448, + "loss": 2.7846, + "step": 1747 + }, + { + "epoch": 0.14107013154709064, + "grad_norm": 0.8349393606185913, + "learning_rate": 0.00019663467556631204, + "loss": 2.7405, + "step": 1748 + }, + { + "epoch": 0.14115083528367364, + "grad_norm": 0.8273345232009888, + "learning_rate": 0.00019663061329360085, + "loss": 2.7578, + "step": 1749 + }, + { + "epoch": 0.14123153902025665, + "grad_norm": 0.7989444136619568, + "learning_rate": 0.0001966265486126022, + "loss": 2.739, + "step": 1750 + }, + { + "epoch": 0.14131224275683965, + "grad_norm": 0.8690519332885742, + "learning_rate": 0.00019662248152341736, + "loss": 2.7566, + "step": 1751 + }, + { + "epoch": 0.14139294649342266, + "grad_norm": 0.8453623056411743, + "learning_rate": 0.0001966184120261477, + "loss": 2.8572, + "step": 1752 + }, + { + "epoch": 0.14147365023000566, + "grad_norm": 0.8396254777908325, + "learning_rate": 0.00019661434012089468, + "loss": 2.786, + "step": 1753 + }, + { + "epoch": 0.14155435396658866, + "grad_norm": 0.7643738389015198, + "learning_rate": 0.00019661026580775973, + "loss": 2.8193, + "step": 1754 + }, + { + "epoch": 0.14163505770317167, + "grad_norm": 0.8124154806137085, + "learning_rate": 0.00019660618908684443, + "loss": 2.7754, + "step": 1755 + }, + { + "epoch": 0.14171576143975467, + "grad_norm": 0.8620683550834656, + "learning_rate": 0.00019660210995825036, + "loss": 2.7827, + "step": 1756 + }, + { + "epoch": 0.14179646517633768, + "grad_norm": 0.8241196274757385, + "learning_rate": 0.0001965980284220792, + "loss": 2.7573, + "step": 1757 + }, + { + "epoch": 0.14187716891292068, + "grad_norm": 0.8264089822769165, + "learning_rate": 0.00019659394447843262, + "loss": 2.8214, + "step": 1758 + }, + { + "epoch": 0.14195787264950369, + "grad_norm": 0.9129722118377686, + "learning_rate": 0.00019658985812741247, + "loss": 2.7962, + "step": 1759 + }, + { + "epoch": 0.14203857638608666, + "grad_norm": 0.7976365089416504, + "learning_rate": 0.00019658576936912057, + "loss": 2.7534, + "step": 1760 + }, + { + "epoch": 0.14211928012266967, + "grad_norm": 0.7587228417396545, + "learning_rate": 0.00019658167820365882, + "loss": 2.7083, + "step": 1761 + }, + { + "epoch": 0.14219998385925267, + "grad_norm": 0.757882833480835, + "learning_rate": 0.00019657758463112918, + "loss": 2.7135, + "step": 1762 + }, + { + "epoch": 0.14228068759583568, + "grad_norm": 0.8541501760482788, + "learning_rate": 0.00019657348865163369, + "loss": 2.7833, + "step": 1763 + }, + { + "epoch": 0.14236139133241868, + "grad_norm": 0.7708966135978699, + "learning_rate": 0.00019656939026527442, + "loss": 2.7128, + "step": 1764 + }, + { + "epoch": 0.14244209506900168, + "grad_norm": 0.8733000159263611, + "learning_rate": 0.00019656528947215347, + "loss": 2.7597, + "step": 1765 + }, + { + "epoch": 0.1425227988055847, + "grad_norm": 0.7913360595703125, + "learning_rate": 0.0001965611862723731, + "loss": 2.7681, + "step": 1766 + }, + { + "epoch": 0.1426035025421677, + "grad_norm": 0.8692380785942078, + "learning_rate": 0.00019655708066603555, + "loss": 2.7587, + "step": 1767 + }, + { + "epoch": 0.1426842062787507, + "grad_norm": 0.8231006860733032, + "learning_rate": 0.00019655297265324317, + "loss": 2.772, + "step": 1768 + }, + { + "epoch": 0.1427649100153337, + "grad_norm": 0.7373722791671753, + "learning_rate": 0.0001965488622340983, + "loss": 2.7875, + "step": 1769 + }, + { + "epoch": 0.1428456137519167, + "grad_norm": 0.8614751696586609, + "learning_rate": 0.0001965447494087034, + "loss": 2.7962, + "step": 1770 + }, + { + "epoch": 0.1429263174884997, + "grad_norm": 0.8336494565010071, + "learning_rate": 0.000196540634177161, + "loss": 2.7072, + "step": 1771 + }, + { + "epoch": 0.14300702122508271, + "grad_norm": 0.844292163848877, + "learning_rate": 0.00019653651653957362, + "loss": 2.8043, + "step": 1772 + }, + { + "epoch": 0.14308772496166572, + "grad_norm": 0.7366824150085449, + "learning_rate": 0.0001965323964960439, + "loss": 2.7296, + "step": 1773 + }, + { + "epoch": 0.14316842869824872, + "grad_norm": 0.75767982006073, + "learning_rate": 0.0001965282740466745, + "loss": 2.7946, + "step": 1774 + }, + { + "epoch": 0.14324913243483173, + "grad_norm": 0.8361382484436035, + "learning_rate": 0.00019652414919156823, + "loss": 2.7232, + "step": 1775 + }, + { + "epoch": 0.14332983617141473, + "grad_norm": 0.8473719358444214, + "learning_rate": 0.0001965200219308278, + "loss": 2.774, + "step": 1776 + }, + { + "epoch": 0.14341053990799774, + "grad_norm": 0.7446423172950745, + "learning_rate": 0.00019651589226455613, + "loss": 2.7439, + "step": 1777 + }, + { + "epoch": 0.14349124364458074, + "grad_norm": 0.8332851529121399, + "learning_rate": 0.00019651176019285616, + "loss": 2.7891, + "step": 1778 + }, + { + "epoch": 0.14357194738116374, + "grad_norm": 0.885313868522644, + "learning_rate": 0.0001965076257158308, + "loss": 2.7677, + "step": 1779 + }, + { + "epoch": 0.14365265111774675, + "grad_norm": 0.8506965637207031, + "learning_rate": 0.00019650348883358315, + "loss": 2.8112, + "step": 1780 + }, + { + "epoch": 0.14373335485432975, + "grad_norm": 0.8415799736976624, + "learning_rate": 0.0001964993495462163, + "loss": 2.8242, + "step": 1781 + }, + { + "epoch": 0.14381405859091276, + "grad_norm": 0.8501513004302979, + "learning_rate": 0.00019649520785383338, + "loss": 2.8352, + "step": 1782 + }, + { + "epoch": 0.14389476232749576, + "grad_norm": 0.7839778065681458, + "learning_rate": 0.00019649106375653767, + "loss": 2.7194, + "step": 1783 + }, + { + "epoch": 0.14397546606407877, + "grad_norm": 0.8013346195220947, + "learning_rate": 0.00019648691725443243, + "loss": 2.7665, + "step": 1784 + }, + { + "epoch": 0.14405616980066177, + "grad_norm": 1.0338317155838013, + "learning_rate": 0.00019648276834762095, + "loss": 2.8599, + "step": 1785 + }, + { + "epoch": 0.14413687353724478, + "grad_norm": 0.898417592048645, + "learning_rate": 0.0001964786170362067, + "loss": 2.7192, + "step": 1786 + }, + { + "epoch": 0.14421757727382778, + "grad_norm": 0.8876320123672485, + "learning_rate": 0.00019647446332029313, + "loss": 2.7722, + "step": 1787 + }, + { + "epoch": 0.14429828101041078, + "grad_norm": 0.819461464881897, + "learning_rate": 0.00019647030719998373, + "loss": 2.7698, + "step": 1788 + }, + { + "epoch": 0.1443789847469938, + "grad_norm": 0.848380446434021, + "learning_rate": 0.0001964661486753821, + "loss": 2.7894, + "step": 1789 + }, + { + "epoch": 0.1444596884835768, + "grad_norm": 0.8343753814697266, + "learning_rate": 0.0001964619877465919, + "loss": 2.699, + "step": 1790 + }, + { + "epoch": 0.1445403922201598, + "grad_norm": 0.8718340396881104, + "learning_rate": 0.0001964578244137168, + "loss": 2.7313, + "step": 1791 + }, + { + "epoch": 0.1446210959567428, + "grad_norm": 0.866122841835022, + "learning_rate": 0.00019645365867686056, + "loss": 2.7112, + "step": 1792 + }, + { + "epoch": 0.1447017996933258, + "grad_norm": 0.8351789712905884, + "learning_rate": 0.000196449490536127, + "loss": 2.7765, + "step": 1793 + }, + { + "epoch": 0.1447825034299088, + "grad_norm": 0.8628408312797546, + "learning_rate": 0.00019644531999162004, + "loss": 2.7375, + "step": 1794 + }, + { + "epoch": 0.14486320716649181, + "grad_norm": 0.8414484858512878, + "learning_rate": 0.00019644114704344358, + "loss": 2.7502, + "step": 1795 + }, + { + "epoch": 0.14494391090307482, + "grad_norm": 0.9092586636543274, + "learning_rate": 0.00019643697169170166, + "loss": 2.7714, + "step": 1796 + }, + { + "epoch": 0.14502461463965782, + "grad_norm": 0.8458060622215271, + "learning_rate": 0.0001964327939364983, + "loss": 2.8376, + "step": 1797 + }, + { + "epoch": 0.14510531837624083, + "grad_norm": 0.8150759935379028, + "learning_rate": 0.00019642861377793764, + "loss": 2.7147, + "step": 1798 + }, + { + "epoch": 0.14518602211282383, + "grad_norm": 0.9008790850639343, + "learning_rate": 0.00019642443121612387, + "loss": 2.7786, + "step": 1799 + }, + { + "epoch": 0.14526672584940684, + "grad_norm": 0.848671555519104, + "learning_rate": 0.00019642024625116117, + "loss": 2.7813, + "step": 1800 + }, + { + "epoch": 0.14534742958598984, + "grad_norm": 0.8035007119178772, + "learning_rate": 0.00019641605888315393, + "loss": 2.7988, + "step": 1801 + }, + { + "epoch": 0.14542813332257284, + "grad_norm": 0.8210242390632629, + "learning_rate": 0.00019641186911220645, + "loss": 2.8451, + "step": 1802 + }, + { + "epoch": 0.14550883705915585, + "grad_norm": 0.8852066397666931, + "learning_rate": 0.00019640767693842318, + "loss": 2.7492, + "step": 1803 + }, + { + "epoch": 0.14558954079573885, + "grad_norm": 0.8421196937561035, + "learning_rate": 0.0001964034823619086, + "loss": 2.759, + "step": 1804 + }, + { + "epoch": 0.14567024453232186, + "grad_norm": 0.8166298866271973, + "learning_rate": 0.00019639928538276724, + "loss": 2.7942, + "step": 1805 + }, + { + "epoch": 0.14575094826890486, + "grad_norm": 0.8502809405326843, + "learning_rate": 0.00019639508600110368, + "loss": 2.7829, + "step": 1806 + }, + { + "epoch": 0.14583165200548787, + "grad_norm": 0.8371078372001648, + "learning_rate": 0.0001963908842170226, + "loss": 2.7168, + "step": 1807 + }, + { + "epoch": 0.14591235574207087, + "grad_norm": 0.8148230910301208, + "learning_rate": 0.0001963866800306287, + "loss": 2.7706, + "step": 1808 + }, + { + "epoch": 0.14599305947865387, + "grad_norm": 0.8984564542770386, + "learning_rate": 0.0001963824734420268, + "loss": 2.7761, + "step": 1809 + }, + { + "epoch": 0.14607376321523688, + "grad_norm": 0.9357183575630188, + "learning_rate": 0.00019637826445132172, + "loss": 2.7738, + "step": 1810 + }, + { + "epoch": 0.14615446695181986, + "grad_norm": 0.8545449376106262, + "learning_rate": 0.00019637405305861834, + "loss": 2.772, + "step": 1811 + }, + { + "epoch": 0.14623517068840286, + "grad_norm": 1.1674948930740356, + "learning_rate": 0.00019636983926402165, + "loss": 2.8988, + "step": 1812 + }, + { + "epoch": 0.14631587442498586, + "grad_norm": 0.7875451445579529, + "learning_rate": 0.00019636562306763665, + "loss": 2.7053, + "step": 1813 + }, + { + "epoch": 0.14639657816156887, + "grad_norm": 0.8980962038040161, + "learning_rate": 0.0001963614044695684, + "loss": 2.7731, + "step": 1814 + }, + { + "epoch": 0.14647728189815187, + "grad_norm": 0.8403381705284119, + "learning_rate": 0.00019635718346992207, + "loss": 2.8555, + "step": 1815 + }, + { + "epoch": 0.14655798563473488, + "grad_norm": 0.8736433982849121, + "learning_rate": 0.00019635296006880284, + "loss": 2.7918, + "step": 1816 + }, + { + "epoch": 0.14663868937131788, + "grad_norm": 0.8604151606559753, + "learning_rate": 0.000196348734266316, + "loss": 2.7493, + "step": 1817 + }, + { + "epoch": 0.1467193931079009, + "grad_norm": 0.8329424262046814, + "learning_rate": 0.00019634450606256681, + "loss": 2.7348, + "step": 1818 + }, + { + "epoch": 0.1468000968444839, + "grad_norm": 0.9835913181304932, + "learning_rate": 0.0001963402754576607, + "loss": 2.7651, + "step": 1819 + }, + { + "epoch": 0.1468808005810669, + "grad_norm": 0.7968378067016602, + "learning_rate": 0.0001963360424517031, + "loss": 2.7672, + "step": 1820 + }, + { + "epoch": 0.1469615043176499, + "grad_norm": 0.8012512922286987, + "learning_rate": 0.00019633180704479948, + "loss": 2.8022, + "step": 1821 + }, + { + "epoch": 0.1470422080542329, + "grad_norm": 0.7656376957893372, + "learning_rate": 0.0001963275692370554, + "loss": 2.7561, + "step": 1822 + }, + { + "epoch": 0.1471229117908159, + "grad_norm": 0.8030453324317932, + "learning_rate": 0.00019632332902857656, + "loss": 2.8048, + "step": 1823 + }, + { + "epoch": 0.1472036155273989, + "grad_norm": 0.8050903677940369, + "learning_rate": 0.0001963190864194685, + "loss": 2.7846, + "step": 1824 + }, + { + "epoch": 0.14728431926398192, + "grad_norm": 0.8001886606216431, + "learning_rate": 0.00019631484140983705, + "loss": 2.7382, + "step": 1825 + }, + { + "epoch": 0.14736502300056492, + "grad_norm": 0.8589862585067749, + "learning_rate": 0.00019631059399978796, + "loss": 2.8376, + "step": 1826 + }, + { + "epoch": 0.14744572673714793, + "grad_norm": 0.86325603723526, + "learning_rate": 0.00019630634418942714, + "loss": 2.7643, + "step": 1827 + }, + { + "epoch": 0.14752643047373093, + "grad_norm": 0.7893280386924744, + "learning_rate": 0.00019630209197886046, + "loss": 2.713, + "step": 1828 + }, + { + "epoch": 0.14760713421031393, + "grad_norm": 0.8890528082847595, + "learning_rate": 0.00019629783736819394, + "loss": 2.7435, + "step": 1829 + }, + { + "epoch": 0.14768783794689694, + "grad_norm": 0.794924795627594, + "learning_rate": 0.00019629358035753357, + "loss": 2.7703, + "step": 1830 + }, + { + "epoch": 0.14776854168347994, + "grad_norm": 0.7712973952293396, + "learning_rate": 0.00019628932094698545, + "loss": 2.7487, + "step": 1831 + }, + { + "epoch": 0.14784924542006295, + "grad_norm": 0.7810670137405396, + "learning_rate": 0.00019628505913665576, + "loss": 2.7687, + "step": 1832 + }, + { + "epoch": 0.14792994915664595, + "grad_norm": 0.8331059813499451, + "learning_rate": 0.0001962807949266507, + "loss": 2.7166, + "step": 1833 + }, + { + "epoch": 0.14801065289322896, + "grad_norm": 0.8983452916145325, + "learning_rate": 0.00019627652831707656, + "loss": 2.8096, + "step": 1834 + }, + { + "epoch": 0.14809135662981196, + "grad_norm": 0.8387179374694824, + "learning_rate": 0.00019627225930803963, + "loss": 2.8252, + "step": 1835 + }, + { + "epoch": 0.14817206036639496, + "grad_norm": 0.8619294762611389, + "learning_rate": 0.0001962679878996464, + "loss": 2.7623, + "step": 1836 + }, + { + "epoch": 0.14825276410297797, + "grad_norm": 0.8195026516914368, + "learning_rate": 0.0001962637140920032, + "loss": 2.7295, + "step": 1837 + }, + { + "epoch": 0.14833346783956097, + "grad_norm": 0.806216835975647, + "learning_rate": 0.00019625943788521664, + "loss": 2.7184, + "step": 1838 + }, + { + "epoch": 0.14841417157614398, + "grad_norm": 0.7758379578590393, + "learning_rate": 0.00019625515927939327, + "loss": 2.7675, + "step": 1839 + }, + { + "epoch": 0.14849487531272698, + "grad_norm": 0.7617168426513672, + "learning_rate": 0.0001962508782746397, + "loss": 2.8041, + "step": 1840 + }, + { + "epoch": 0.14857557904930999, + "grad_norm": 0.9630066156387329, + "learning_rate": 0.00019624659487106264, + "loss": 2.814, + "step": 1841 + }, + { + "epoch": 0.148656282785893, + "grad_norm": 0.7656112313270569, + "learning_rate": 0.00019624230906876888, + "loss": 2.7564, + "step": 1842 + }, + { + "epoch": 0.148736986522476, + "grad_norm": 0.9394779801368713, + "learning_rate": 0.0001962380208678652, + "loss": 2.7958, + "step": 1843 + }, + { + "epoch": 0.148817690259059, + "grad_norm": 0.7647004127502441, + "learning_rate": 0.00019623373026845842, + "loss": 2.72, + "step": 1844 + }, + { + "epoch": 0.148898393995642, + "grad_norm": 0.809079647064209, + "learning_rate": 0.00019622943727065555, + "loss": 2.7732, + "step": 1845 + }, + { + "epoch": 0.148979097732225, + "grad_norm": 0.8241337537765503, + "learning_rate": 0.00019622514187456357, + "loss": 2.759, + "step": 1846 + }, + { + "epoch": 0.149059801468808, + "grad_norm": 0.8979619145393372, + "learning_rate": 0.00019622084408028948, + "loss": 2.8307, + "step": 1847 + }, + { + "epoch": 0.14914050520539102, + "grad_norm": 0.8058865666389465, + "learning_rate": 0.00019621654388794047, + "loss": 2.807, + "step": 1848 + }, + { + "epoch": 0.14922120894197402, + "grad_norm": 0.81967693567276, + "learning_rate": 0.00019621224129762364, + "loss": 2.7762, + "step": 1849 + }, + { + "epoch": 0.14930191267855702, + "grad_norm": 0.7385755777359009, + "learning_rate": 0.0001962079363094463, + "loss": 2.7854, + "step": 1850 + }, + { + "epoch": 0.14938261641514003, + "grad_norm": 0.8585657477378845, + "learning_rate": 0.00019620362892351566, + "loss": 2.7781, + "step": 1851 + }, + { + "epoch": 0.14946332015172303, + "grad_norm": 0.8328986763954163, + "learning_rate": 0.00019619931913993912, + "loss": 2.8245, + "step": 1852 + }, + { + "epoch": 0.14954402388830604, + "grad_norm": 0.749727189540863, + "learning_rate": 0.0001961950069588241, + "loss": 2.8049, + "step": 1853 + }, + { + "epoch": 0.14962472762488904, + "grad_norm": 0.7886502742767334, + "learning_rate": 0.00019619069238027803, + "loss": 2.7521, + "step": 1854 + }, + { + "epoch": 0.14970543136147205, + "grad_norm": 0.816137433052063, + "learning_rate": 0.00019618637540440848, + "loss": 2.8383, + "step": 1855 + }, + { + "epoch": 0.14978613509805505, + "grad_norm": 0.80442214012146, + "learning_rate": 0.000196182056031323, + "loss": 2.7227, + "step": 1856 + }, + { + "epoch": 0.14986683883463806, + "grad_norm": 0.7605221271514893, + "learning_rate": 0.00019617773426112924, + "loss": 2.7494, + "step": 1857 + }, + { + "epoch": 0.14994754257122106, + "grad_norm": 0.8745137453079224, + "learning_rate": 0.00019617341009393497, + "loss": 2.6978, + "step": 1858 + }, + { + "epoch": 0.15002824630780406, + "grad_norm": 0.8151741623878479, + "learning_rate": 0.00019616908352984789, + "loss": 2.7817, + "step": 1859 + }, + { + "epoch": 0.15010895004438707, + "grad_norm": 0.773876428604126, + "learning_rate": 0.0001961647545689759, + "loss": 2.812, + "step": 1860 + }, + { + "epoch": 0.15018965378097007, + "grad_norm": 0.8216966390609741, + "learning_rate": 0.00019616042321142683, + "loss": 2.8181, + "step": 1861 + }, + { + "epoch": 0.15027035751755305, + "grad_norm": 0.8097409605979919, + "learning_rate": 0.00019615608945730862, + "loss": 2.8336, + "step": 1862 + }, + { + "epoch": 0.15035106125413605, + "grad_norm": 0.8085697293281555, + "learning_rate": 0.00019615175330672932, + "loss": 2.8176, + "step": 1863 + }, + { + "epoch": 0.15043176499071906, + "grad_norm": 0.7658133506774902, + "learning_rate": 0.00019614741475979701, + "loss": 2.7543, + "step": 1864 + }, + { + "epoch": 0.15051246872730206, + "grad_norm": 0.7193909883499146, + "learning_rate": 0.00019614307381661978, + "loss": 2.7475, + "step": 1865 + }, + { + "epoch": 0.15059317246388507, + "grad_norm": 0.835608959197998, + "learning_rate": 0.0001961387304773058, + "loss": 2.8017, + "step": 1866 + }, + { + "epoch": 0.15067387620046807, + "grad_norm": 0.7898489832878113, + "learning_rate": 0.0001961343847419634, + "loss": 2.7613, + "step": 1867 + }, + { + "epoch": 0.15075457993705108, + "grad_norm": 0.8031982183456421, + "learning_rate": 0.0001961300366107008, + "loss": 2.7442, + "step": 1868 + }, + { + "epoch": 0.15083528367363408, + "grad_norm": 0.8427363634109497, + "learning_rate": 0.00019612568608362642, + "loss": 2.8095, + "step": 1869 + }, + { + "epoch": 0.15091598741021708, + "grad_norm": 0.8282802700996399, + "learning_rate": 0.00019612133316084863, + "loss": 2.7216, + "step": 1870 + }, + { + "epoch": 0.1509966911468001, + "grad_norm": 0.7799758911132812, + "learning_rate": 0.000196116977842476, + "loss": 2.793, + "step": 1871 + }, + { + "epoch": 0.1510773948833831, + "grad_norm": 0.8151525259017944, + "learning_rate": 0.00019611262012861702, + "loss": 2.7641, + "step": 1872 + }, + { + "epoch": 0.1511580986199661, + "grad_norm": 0.7926812767982483, + "learning_rate": 0.0001961082600193803, + "loss": 2.7523, + "step": 1873 + }, + { + "epoch": 0.1512388023565491, + "grad_norm": 0.8737135529518127, + "learning_rate": 0.0001961038975148745, + "loss": 2.7965, + "step": 1874 + }, + { + "epoch": 0.1513195060931321, + "grad_norm": 0.7948090434074402, + "learning_rate": 0.00019609953261520837, + "loss": 2.7737, + "step": 1875 + }, + { + "epoch": 0.1514002098297151, + "grad_norm": 0.8161277770996094, + "learning_rate": 0.0001960951653204907, + "loss": 2.7423, + "step": 1876 + }, + { + "epoch": 0.15148091356629811, + "grad_norm": 0.8904973864555359, + "learning_rate": 0.00019609079563083026, + "loss": 2.7066, + "step": 1877 + }, + { + "epoch": 0.15156161730288112, + "grad_norm": 0.8107061982154846, + "learning_rate": 0.00019608642354633604, + "loss": 2.7939, + "step": 1878 + }, + { + "epoch": 0.15164232103946412, + "grad_norm": 0.8410987854003906, + "learning_rate": 0.00019608204906711694, + "loss": 2.7521, + "step": 1879 + }, + { + "epoch": 0.15172302477604713, + "grad_norm": 0.8336483836174011, + "learning_rate": 0.0001960776721932821, + "loss": 2.7613, + "step": 1880 + }, + { + "epoch": 0.15180372851263013, + "grad_norm": 0.730549156665802, + "learning_rate": 0.00019607329292494044, + "loss": 2.8019, + "step": 1881 + }, + { + "epoch": 0.15188443224921314, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001960689112622012, + "loss": 2.6907, + "step": 1882 + }, + { + "epoch": 0.15196513598579614, + "grad_norm": 0.848414421081543, + "learning_rate": 0.00019606452720517359, + "loss": 2.7278, + "step": 1883 + }, + { + "epoch": 0.15204583972237914, + "grad_norm": 0.8331718444824219, + "learning_rate": 0.00019606014075396682, + "loss": 2.6994, + "step": 1884 + }, + { + "epoch": 0.15212654345896215, + "grad_norm": 0.9192764759063721, + "learning_rate": 0.00019605575190869025, + "loss": 2.7095, + "step": 1885 + }, + { + "epoch": 0.15220724719554515, + "grad_norm": 0.8377116322517395, + "learning_rate": 0.00019605136066945324, + "loss": 2.7925, + "step": 1886 + }, + { + "epoch": 0.15228795093212816, + "grad_norm": 0.7302869558334351, + "learning_rate": 0.00019604696703636525, + "loss": 2.7286, + "step": 1887 + }, + { + "epoch": 0.15236865466871116, + "grad_norm": 0.7972438335418701, + "learning_rate": 0.00019604257100953577, + "loss": 2.7732, + "step": 1888 + }, + { + "epoch": 0.15244935840529417, + "grad_norm": 1.0350826978683472, + "learning_rate": 0.00019603817258907435, + "loss": 2.8211, + "step": 1889 + }, + { + "epoch": 0.15253006214187717, + "grad_norm": 0.782755970954895, + "learning_rate": 0.00019603377177509067, + "loss": 2.8489, + "step": 1890 + }, + { + "epoch": 0.15261076587846018, + "grad_norm": 0.9072603583335876, + "learning_rate": 0.0001960293685676943, + "loss": 2.7764, + "step": 1891 + }, + { + "epoch": 0.15269146961504318, + "grad_norm": 0.7878704071044922, + "learning_rate": 0.0001960249629669951, + "loss": 2.7494, + "step": 1892 + }, + { + "epoch": 0.15277217335162618, + "grad_norm": 0.8770418167114258, + "learning_rate": 0.00019602055497310278, + "loss": 2.7318, + "step": 1893 + }, + { + "epoch": 0.1528528770882092, + "grad_norm": 0.8004975914955139, + "learning_rate": 0.00019601614458612723, + "loss": 2.7272, + "step": 1894 + }, + { + "epoch": 0.1529335808247922, + "grad_norm": 0.8511070013046265, + "learning_rate": 0.00019601173180617835, + "loss": 2.7876, + "step": 1895 + }, + { + "epoch": 0.1530142845613752, + "grad_norm": 0.7946128845214844, + "learning_rate": 0.00019600731663336617, + "loss": 2.7435, + "step": 1896 + }, + { + "epoch": 0.1530949882979582, + "grad_norm": 0.8155317902565002, + "learning_rate": 0.00019600289906780067, + "loss": 2.7642, + "step": 1897 + }, + { + "epoch": 0.1531756920345412, + "grad_norm": 0.8086098432540894, + "learning_rate": 0.000195998479109592, + "loss": 2.7358, + "step": 1898 + }, + { + "epoch": 0.1532563957711242, + "grad_norm": 0.8698278665542603, + "learning_rate": 0.00019599405675885026, + "loss": 2.725, + "step": 1899 + }, + { + "epoch": 0.15333709950770721, + "grad_norm": 0.8756006360054016, + "learning_rate": 0.00019598963201568573, + "loss": 2.7209, + "step": 1900 + }, + { + "epoch": 0.15341780324429022, + "grad_norm": 0.7984628081321716, + "learning_rate": 0.0001959852048802086, + "loss": 2.7685, + "step": 1901 + }, + { + "epoch": 0.15349850698087322, + "grad_norm": 0.8244056105613708, + "learning_rate": 0.0001959807753525293, + "loss": 2.7692, + "step": 1902 + }, + { + "epoch": 0.15357921071745623, + "grad_norm": 0.8577731251716614, + "learning_rate": 0.00019597634343275814, + "loss": 2.7571, + "step": 1903 + }, + { + "epoch": 0.15365991445403923, + "grad_norm": 0.8410975933074951, + "learning_rate": 0.00019597190912100566, + "loss": 2.7862, + "step": 1904 + }, + { + "epoch": 0.15374061819062224, + "grad_norm": 0.9094158411026001, + "learning_rate": 0.0001959674724173823, + "loss": 2.7655, + "step": 1905 + }, + { + "epoch": 0.15382132192720524, + "grad_norm": 0.8375208973884583, + "learning_rate": 0.00019596303332199868, + "loss": 2.8129, + "step": 1906 + }, + { + "epoch": 0.15390202566378824, + "grad_norm": 0.8335977792739868, + "learning_rate": 0.00019595859183496543, + "loss": 2.7835, + "step": 1907 + }, + { + "epoch": 0.15398272940037125, + "grad_norm": 0.7973531484603882, + "learning_rate": 0.0001959541479563932, + "loss": 2.7785, + "step": 1908 + }, + { + "epoch": 0.15406343313695425, + "grad_norm": 0.7808824181556702, + "learning_rate": 0.0001959497016863928, + "loss": 2.7862, + "step": 1909 + }, + { + "epoch": 0.15414413687353726, + "grad_norm": 0.853824257850647, + "learning_rate": 0.00019594525302507504, + "loss": 2.6721, + "step": 1910 + }, + { + "epoch": 0.15422484061012026, + "grad_norm": 0.8589324355125427, + "learning_rate": 0.00019594080197255073, + "loss": 2.7948, + "step": 1911 + }, + { + "epoch": 0.15430554434670327, + "grad_norm": 0.7951898574829102, + "learning_rate": 0.00019593634852893086, + "loss": 2.7903, + "step": 1912 + }, + { + "epoch": 0.15438624808328624, + "grad_norm": 0.8333349227905273, + "learning_rate": 0.0001959318926943264, + "loss": 2.8073, + "step": 1913 + }, + { + "epoch": 0.15446695181986925, + "grad_norm": 0.8552380800247192, + "learning_rate": 0.0001959274344688484, + "loss": 2.8199, + "step": 1914 + }, + { + "epoch": 0.15454765555645225, + "grad_norm": 0.8356214165687561, + "learning_rate": 0.000195922973852608, + "loss": 2.7985, + "step": 1915 + }, + { + "epoch": 0.15462835929303526, + "grad_norm": 0.7167248725891113, + "learning_rate": 0.00019591851084571634, + "loss": 2.6802, + "step": 1916 + }, + { + "epoch": 0.15470906302961826, + "grad_norm": 0.7980726361274719, + "learning_rate": 0.00019591404544828464, + "loss": 2.692, + "step": 1917 + }, + { + "epoch": 0.15478976676620126, + "grad_norm": 0.7766004800796509, + "learning_rate": 0.00019590957766042424, + "loss": 2.7219, + "step": 1918 + }, + { + "epoch": 0.15487047050278427, + "grad_norm": 0.828852653503418, + "learning_rate": 0.0001959051074822464, + "loss": 2.7369, + "step": 1919 + }, + { + "epoch": 0.15495117423936727, + "grad_norm": 0.7818129062652588, + "learning_rate": 0.0001959006349138626, + "loss": 2.7778, + "step": 1920 + }, + { + "epoch": 0.15503187797595028, + "grad_norm": 0.8428593873977661, + "learning_rate": 0.00019589615995538432, + "loss": 2.8257, + "step": 1921 + }, + { + "epoch": 0.15511258171253328, + "grad_norm": 0.8756616115570068, + "learning_rate": 0.00019589168260692307, + "loss": 2.7692, + "step": 1922 + }, + { + "epoch": 0.15519328544911629, + "grad_norm": 0.7802519202232361, + "learning_rate": 0.0001958872028685904, + "loss": 2.7811, + "step": 1923 + }, + { + "epoch": 0.1552739891856993, + "grad_norm": 0.7787032723426819, + "learning_rate": 0.00019588272074049797, + "loss": 2.7546, + "step": 1924 + }, + { + "epoch": 0.1553546929222823, + "grad_norm": 0.848479151725769, + "learning_rate": 0.0001958782362227575, + "loss": 2.7759, + "step": 1925 + }, + { + "epoch": 0.1554353966588653, + "grad_norm": 0.8331353664398193, + "learning_rate": 0.00019587374931548076, + "loss": 2.7881, + "step": 1926 + }, + { + "epoch": 0.1555161003954483, + "grad_norm": 0.8646424412727356, + "learning_rate": 0.00019586926001877958, + "loss": 2.8059, + "step": 1927 + }, + { + "epoch": 0.1555968041320313, + "grad_norm": 0.912253737449646, + "learning_rate": 0.00019586476833276584, + "loss": 2.7446, + "step": 1928 + }, + { + "epoch": 0.1556775078686143, + "grad_norm": 0.9256471395492554, + "learning_rate": 0.00019586027425755147, + "loss": 2.8, + "step": 1929 + }, + { + "epoch": 0.15575821160519732, + "grad_norm": 1.0984607934951782, + "learning_rate": 0.0001958557777932485, + "loss": 2.7759, + "step": 1930 + }, + { + "epoch": 0.15583891534178032, + "grad_norm": 0.8736081123352051, + "learning_rate": 0.00019585127893996895, + "loss": 2.7464, + "step": 1931 + }, + { + "epoch": 0.15591961907836333, + "grad_norm": 0.932538628578186, + "learning_rate": 0.00019584677769782498, + "loss": 2.7874, + "step": 1932 + }, + { + "epoch": 0.15600032281494633, + "grad_norm": 0.9742087125778198, + "learning_rate": 0.0001958422740669288, + "loss": 2.7727, + "step": 1933 + }, + { + "epoch": 0.15608102655152933, + "grad_norm": 0.8975874781608582, + "learning_rate": 0.00019583776804739256, + "loss": 2.7812, + "step": 1934 + }, + { + "epoch": 0.15616173028811234, + "grad_norm": 0.9380232691764832, + "learning_rate": 0.00019583325963932864, + "loss": 2.7284, + "step": 1935 + }, + { + "epoch": 0.15624243402469534, + "grad_norm": 0.8332872986793518, + "learning_rate": 0.00019582874884284938, + "loss": 2.7792, + "step": 1936 + }, + { + "epoch": 0.15632313776127835, + "grad_norm": 1.0017194747924805, + "learning_rate": 0.0001958242356580672, + "loss": 2.7187, + "step": 1937 + }, + { + "epoch": 0.15640384149786135, + "grad_norm": 0.9433515667915344, + "learning_rate": 0.0001958197200850946, + "loss": 2.8394, + "step": 1938 + }, + { + "epoch": 0.15648454523444436, + "grad_norm": 0.8781030178070068, + "learning_rate": 0.00019581520212404407, + "loss": 2.7667, + "step": 1939 + }, + { + "epoch": 0.15656524897102736, + "grad_norm": 0.895656168460846, + "learning_rate": 0.00019581068177502826, + "loss": 2.799, + "step": 1940 + }, + { + "epoch": 0.15664595270761036, + "grad_norm": 0.8336960673332214, + "learning_rate": 0.0001958061590381598, + "loss": 2.8152, + "step": 1941 + }, + { + "epoch": 0.15672665644419337, + "grad_norm": 0.9184536337852478, + "learning_rate": 0.00019580163391355143, + "loss": 2.7746, + "step": 1942 + }, + { + "epoch": 0.15680736018077637, + "grad_norm": 0.8564908504486084, + "learning_rate": 0.00019579710640131587, + "loss": 2.7674, + "step": 1943 + }, + { + "epoch": 0.15688806391735938, + "grad_norm": 0.7491608262062073, + "learning_rate": 0.00019579257650156605, + "loss": 2.7665, + "step": 1944 + }, + { + "epoch": 0.15696876765394238, + "grad_norm": 0.9165031313896179, + "learning_rate": 0.00019578804421441478, + "loss": 2.7343, + "step": 1945 + }, + { + "epoch": 0.15704947139052539, + "grad_norm": 0.8413978815078735, + "learning_rate": 0.00019578350953997512, + "loss": 2.7503, + "step": 1946 + }, + { + "epoch": 0.1571301751271084, + "grad_norm": 0.7820419073104858, + "learning_rate": 0.00019577897247835993, + "loss": 2.7535, + "step": 1947 + }, + { + "epoch": 0.1572108788636914, + "grad_norm": 0.8134996294975281, + "learning_rate": 0.00019577443302968246, + "loss": 2.7504, + "step": 1948 + }, + { + "epoch": 0.1572915826002744, + "grad_norm": 0.8201301097869873, + "learning_rate": 0.00019576989119405574, + "loss": 2.6927, + "step": 1949 + }, + { + "epoch": 0.1573722863368574, + "grad_norm": 0.8343217372894287, + "learning_rate": 0.00019576534697159296, + "loss": 2.7742, + "step": 1950 + }, + { + "epoch": 0.1574529900734404, + "grad_norm": 0.8161751627922058, + "learning_rate": 0.0001957608003624074, + "loss": 2.8236, + "step": 1951 + }, + { + "epoch": 0.1575336938100234, + "grad_norm": 0.8626808524131775, + "learning_rate": 0.00019575625136661242, + "loss": 2.7305, + "step": 1952 + }, + { + "epoch": 0.15761439754660642, + "grad_norm": 0.8238986730575562, + "learning_rate": 0.0001957516999843213, + "loss": 2.7641, + "step": 1953 + }, + { + "epoch": 0.15769510128318942, + "grad_norm": 0.7806095480918884, + "learning_rate": 0.00019574714621564755, + "loss": 2.7155, + "step": 1954 + }, + { + "epoch": 0.15777580501977242, + "grad_norm": 0.8137761950492859, + "learning_rate": 0.0001957425900607046, + "loss": 2.7529, + "step": 1955 + }, + { + "epoch": 0.15785650875635543, + "grad_norm": 0.8383988738059998, + "learning_rate": 0.00019573803151960606, + "loss": 2.7726, + "step": 1956 + }, + { + "epoch": 0.15793721249293843, + "grad_norm": 0.8734413385391235, + "learning_rate": 0.00019573347059246549, + "loss": 2.8563, + "step": 1957 + }, + { + "epoch": 0.15801791622952144, + "grad_norm": 0.8018438816070557, + "learning_rate": 0.0001957289072793966, + "loss": 2.8031, + "step": 1958 + }, + { + "epoch": 0.15809861996610444, + "grad_norm": 0.8175764083862305, + "learning_rate": 0.0001957243415805131, + "loss": 2.7824, + "step": 1959 + }, + { + "epoch": 0.15817932370268745, + "grad_norm": 0.7642164826393127, + "learning_rate": 0.00019571977349592878, + "loss": 2.7666, + "step": 1960 + }, + { + "epoch": 0.15826002743927045, + "grad_norm": 0.7584841847419739, + "learning_rate": 0.0001957152030257575, + "loss": 2.7211, + "step": 1961 + }, + { + "epoch": 0.15834073117585346, + "grad_norm": 0.822610080242157, + "learning_rate": 0.00019571063017011312, + "loss": 2.7025, + "step": 1962 + }, + { + "epoch": 0.15842143491243646, + "grad_norm": 0.7553817629814148, + "learning_rate": 0.00019570605492910968, + "loss": 2.8122, + "step": 1963 + }, + { + "epoch": 0.15850213864901944, + "grad_norm": 0.7224497199058533, + "learning_rate": 0.0001957014773028612, + "loss": 2.7613, + "step": 1964 + }, + { + "epoch": 0.15858284238560244, + "grad_norm": 0.8563623428344727, + "learning_rate": 0.00019569689729148168, + "loss": 2.8005, + "step": 1965 + }, + { + "epoch": 0.15866354612218544, + "grad_norm": 0.7665508389472961, + "learning_rate": 0.00019569231489508537, + "loss": 2.7387, + "step": 1966 + }, + { + "epoch": 0.15874424985876845, + "grad_norm": 0.7788479328155518, + "learning_rate": 0.0001956877301137864, + "loss": 2.7229, + "step": 1967 + }, + { + "epoch": 0.15882495359535145, + "grad_norm": 0.7326748371124268, + "learning_rate": 0.00019568314294769908, + "loss": 2.7728, + "step": 1968 + }, + { + "epoch": 0.15890565733193446, + "grad_norm": 0.790492594242096, + "learning_rate": 0.00019567855339693772, + "loss": 2.7809, + "step": 1969 + }, + { + "epoch": 0.15898636106851746, + "grad_norm": 0.8026898503303528, + "learning_rate": 0.0001956739614616167, + "loss": 2.7267, + "step": 1970 + }, + { + "epoch": 0.15906706480510047, + "grad_norm": 0.7963770627975464, + "learning_rate": 0.00019566936714185046, + "loss": 2.7161, + "step": 1971 + }, + { + "epoch": 0.15914776854168347, + "grad_norm": 0.7708200216293335, + "learning_rate": 0.00019566477043775354, + "loss": 2.7223, + "step": 1972 + }, + { + "epoch": 0.15922847227826648, + "grad_norm": 0.8036624789237976, + "learning_rate": 0.00019566017134944042, + "loss": 2.7644, + "step": 1973 + }, + { + "epoch": 0.15930917601484948, + "grad_norm": 0.8221341967582703, + "learning_rate": 0.00019565556987702581, + "loss": 2.7629, + "step": 1974 + }, + { + "epoch": 0.15938987975143248, + "grad_norm": 0.7685462832450867, + "learning_rate": 0.00019565096602062435, + "loss": 2.8016, + "step": 1975 + }, + { + "epoch": 0.1594705834880155, + "grad_norm": 0.8173574209213257, + "learning_rate": 0.00019564635978035075, + "loss": 2.761, + "step": 1976 + }, + { + "epoch": 0.1595512872245985, + "grad_norm": 0.7567519545555115, + "learning_rate": 0.00019564175115631988, + "loss": 2.7794, + "step": 1977 + }, + { + "epoch": 0.1596319909611815, + "grad_norm": 0.8754587173461914, + "learning_rate": 0.00019563714014864654, + "loss": 2.7769, + "step": 1978 + }, + { + "epoch": 0.1597126946977645, + "grad_norm": 0.753871738910675, + "learning_rate": 0.00019563252675744569, + "loss": 2.7489, + "step": 1979 + }, + { + "epoch": 0.1597933984343475, + "grad_norm": 0.777103841304779, + "learning_rate": 0.00019562791098283225, + "loss": 2.7667, + "step": 1980 + }, + { + "epoch": 0.1598741021709305, + "grad_norm": 0.8227293491363525, + "learning_rate": 0.00019562329282492131, + "loss": 2.7904, + "step": 1981 + }, + { + "epoch": 0.15995480590751351, + "grad_norm": 0.7595541477203369, + "learning_rate": 0.00019561867228382797, + "loss": 2.7654, + "step": 1982 + }, + { + "epoch": 0.16003550964409652, + "grad_norm": 0.8330550789833069, + "learning_rate": 0.00019561404935966733, + "loss": 2.7533, + "step": 1983 + }, + { + "epoch": 0.16011621338067952, + "grad_norm": 0.8213297128677368, + "learning_rate": 0.0001956094240525547, + "loss": 2.8103, + "step": 1984 + }, + { + "epoch": 0.16019691711726253, + "grad_norm": 0.8046056628227234, + "learning_rate": 0.00019560479636260527, + "loss": 2.7666, + "step": 1985 + }, + { + "epoch": 0.16027762085384553, + "grad_norm": 0.7886037230491638, + "learning_rate": 0.0001956001662899344, + "loss": 2.7066, + "step": 1986 + }, + { + "epoch": 0.16035832459042854, + "grad_norm": 0.8300043940544128, + "learning_rate": 0.00019559553383465748, + "loss": 2.7617, + "step": 1987 + }, + { + "epoch": 0.16043902832701154, + "grad_norm": 0.7963815331459045, + "learning_rate": 0.00019559089899688994, + "loss": 2.6891, + "step": 1988 + }, + { + "epoch": 0.16051973206359454, + "grad_norm": 0.7794002294540405, + "learning_rate": 0.00019558626177674734, + "loss": 2.8012, + "step": 1989 + }, + { + "epoch": 0.16060043580017755, + "grad_norm": 0.8345863819122314, + "learning_rate": 0.00019558162217434526, + "loss": 2.7715, + "step": 1990 + }, + { + "epoch": 0.16068113953676055, + "grad_norm": 0.8883393406867981, + "learning_rate": 0.00019557698018979927, + "loss": 2.7863, + "step": 1991 + }, + { + "epoch": 0.16076184327334356, + "grad_norm": 0.8069450259208679, + "learning_rate": 0.0001955723358232251, + "loss": 2.759, + "step": 1992 + }, + { + "epoch": 0.16084254700992656, + "grad_norm": 0.9014191031455994, + "learning_rate": 0.00019556768907473852, + "loss": 2.711, + "step": 1993 + }, + { + "epoch": 0.16092325074650957, + "grad_norm": 0.8429470658302307, + "learning_rate": 0.0001955630399444553, + "loss": 2.6936, + "step": 1994 + }, + { + "epoch": 0.16100395448309257, + "grad_norm": 0.7859500050544739, + "learning_rate": 0.00019555838843249128, + "loss": 2.7343, + "step": 1995 + }, + { + "epoch": 0.16108465821967557, + "grad_norm": 0.8068249821662903, + "learning_rate": 0.00019555373453896245, + "loss": 2.7492, + "step": 1996 + }, + { + "epoch": 0.16116536195625858, + "grad_norm": 0.8194023370742798, + "learning_rate": 0.00019554907826398478, + "loss": 2.7265, + "step": 1997 + }, + { + "epoch": 0.16124606569284158, + "grad_norm": 0.8139404654502869, + "learning_rate": 0.00019554441960767434, + "loss": 2.7311, + "step": 1998 + }, + { + "epoch": 0.1613267694294246, + "grad_norm": 0.8210673928260803, + "learning_rate": 0.00019553975857014718, + "loss": 2.7095, + "step": 1999 + }, + { + "epoch": 0.1614074731660076, + "grad_norm": 0.8615561723709106, + "learning_rate": 0.0001955350951515195, + "loss": 2.7458, + "step": 2000 + }, + { + "epoch": 0.1614074731660076, + "eval_loss": 2.6739437580108643, + "eval_runtime": 813.8274, + "eval_samples_per_second": 3.219, + "eval_steps_per_second": 0.537, + "step": 2000 + }, + { + "epoch": 0.1614881769025906, + "grad_norm": 0.8945594429969788, + "learning_rate": 0.0001955304293519075, + "loss": 2.776, + "step": 2001 + }, + { + "epoch": 0.1615688806391736, + "grad_norm": 0.7943438291549683, + "learning_rate": 0.00019552576117142748, + "loss": 2.7484, + "step": 2002 + }, + { + "epoch": 0.1616495843757566, + "grad_norm": 0.8264374136924744, + "learning_rate": 0.00019552109061019582, + "loss": 2.7725, + "step": 2003 + }, + { + "epoch": 0.1617302881123396, + "grad_norm": 0.7591681480407715, + "learning_rate": 0.00019551641766832887, + "loss": 2.7217, + "step": 2004 + }, + { + "epoch": 0.16181099184892261, + "grad_norm": 0.8275293707847595, + "learning_rate": 0.0001955117423459431, + "loss": 2.7279, + "step": 2005 + }, + { + "epoch": 0.16189169558550562, + "grad_norm": 0.8109650611877441, + "learning_rate": 0.00019550706464315504, + "loss": 2.8111, + "step": 2006 + }, + { + "epoch": 0.16197239932208862, + "grad_norm": 0.8710397481918335, + "learning_rate": 0.00019550238456008127, + "loss": 2.7166, + "step": 2007 + }, + { + "epoch": 0.16205310305867163, + "grad_norm": 0.8569270968437195, + "learning_rate": 0.00019549770209683845, + "loss": 2.7739, + "step": 2008 + }, + { + "epoch": 0.16213380679525463, + "grad_norm": 0.7927817702293396, + "learning_rate": 0.00019549301725354325, + "loss": 2.7154, + "step": 2009 + }, + { + "epoch": 0.16221451053183764, + "grad_norm": 0.7576590776443481, + "learning_rate": 0.00019548833003031244, + "loss": 2.7276, + "step": 2010 + }, + { + "epoch": 0.16229521426842064, + "grad_norm": 0.8092780709266663, + "learning_rate": 0.00019548364042726283, + "loss": 2.7494, + "step": 2011 + }, + { + "epoch": 0.16237591800500364, + "grad_norm": 0.7643424868583679, + "learning_rate": 0.0001954789484445113, + "loss": 2.7877, + "step": 2012 + }, + { + "epoch": 0.16245662174158665, + "grad_norm": 0.8235166072845459, + "learning_rate": 0.0001954742540821748, + "loss": 2.7884, + "step": 2013 + }, + { + "epoch": 0.16253732547816965, + "grad_norm": 0.9297853708267212, + "learning_rate": 0.00019546955734037034, + "loss": 2.765, + "step": 2014 + }, + { + "epoch": 0.16261802921475263, + "grad_norm": 0.7778275609016418, + "learning_rate": 0.0001954648582192149, + "loss": 2.7178, + "step": 2015 + }, + { + "epoch": 0.16269873295133563, + "grad_norm": 0.8767017126083374, + "learning_rate": 0.00019546015671882566, + "loss": 2.8254, + "step": 2016 + }, + { + "epoch": 0.16277943668791864, + "grad_norm": 0.7870603203773499, + "learning_rate": 0.0001954554528393198, + "loss": 2.797, + "step": 2017 + }, + { + "epoch": 0.16286014042450164, + "grad_norm": 0.8112391233444214, + "learning_rate": 0.00019545074658081454, + "loss": 2.8562, + "step": 2018 + }, + { + "epoch": 0.16294084416108465, + "grad_norm": 0.8216677308082581, + "learning_rate": 0.00019544603794342713, + "loss": 2.7894, + "step": 2019 + }, + { + "epoch": 0.16302154789766765, + "grad_norm": 0.8445515632629395, + "learning_rate": 0.00019544132692727497, + "loss": 2.8618, + "step": 2020 + }, + { + "epoch": 0.16310225163425066, + "grad_norm": 0.8275444507598877, + "learning_rate": 0.00019543661353247548, + "loss": 2.8087, + "step": 2021 + }, + { + "epoch": 0.16318295537083366, + "grad_norm": 0.8142833709716797, + "learning_rate": 0.00019543189775914608, + "loss": 2.8075, + "step": 2022 + }, + { + "epoch": 0.16326365910741666, + "grad_norm": 0.8182976245880127, + "learning_rate": 0.0001954271796074043, + "loss": 2.8312, + "step": 2023 + }, + { + "epoch": 0.16334436284399967, + "grad_norm": 0.7629228234291077, + "learning_rate": 0.0001954224590773678, + "loss": 2.7191, + "step": 2024 + }, + { + "epoch": 0.16342506658058267, + "grad_norm": 0.8630000948905945, + "learning_rate": 0.00019541773616915418, + "loss": 2.8013, + "step": 2025 + }, + { + "epoch": 0.16350577031716568, + "grad_norm": 0.8917906880378723, + "learning_rate": 0.00019541301088288115, + "loss": 2.7573, + "step": 2026 + }, + { + "epoch": 0.16358647405374868, + "grad_norm": 0.8641694188117981, + "learning_rate": 0.00019540828321866648, + "loss": 2.7509, + "step": 2027 + }, + { + "epoch": 0.16366717779033169, + "grad_norm": 0.7687639594078064, + "learning_rate": 0.00019540355317662798, + "loss": 2.7266, + "step": 2028 + }, + { + "epoch": 0.1637478815269147, + "grad_norm": 0.7870400547981262, + "learning_rate": 0.00019539882075688355, + "loss": 2.8217, + "step": 2029 + }, + { + "epoch": 0.1638285852634977, + "grad_norm": 0.9373054504394531, + "learning_rate": 0.0001953940859595511, + "loss": 2.7562, + "step": 2030 + }, + { + "epoch": 0.1639092890000807, + "grad_norm": 0.7941255569458008, + "learning_rate": 0.00019538934878474872, + "loss": 2.7553, + "step": 2031 + }, + { + "epoch": 0.1639899927366637, + "grad_norm": 0.735977053642273, + "learning_rate": 0.00019538460923259438, + "loss": 2.7058, + "step": 2032 + }, + { + "epoch": 0.1640706964732467, + "grad_norm": 0.7812782526016235, + "learning_rate": 0.00019537986730320625, + "loss": 2.7885, + "step": 2033 + }, + { + "epoch": 0.1641514002098297, + "grad_norm": 1.1534128189086914, + "learning_rate": 0.0001953751229967025, + "loss": 2.7139, + "step": 2034 + }, + { + "epoch": 0.16423210394641272, + "grad_norm": 0.9139814972877502, + "learning_rate": 0.00019537037631320135, + "loss": 2.7869, + "step": 2035 + }, + { + "epoch": 0.16431280768299572, + "grad_norm": 0.8330421447753906, + "learning_rate": 0.00019536562725282116, + "loss": 2.7491, + "step": 2036 + }, + { + "epoch": 0.16439351141957873, + "grad_norm": 0.9040594696998596, + "learning_rate": 0.00019536087581568026, + "loss": 2.7637, + "step": 2037 + }, + { + "epoch": 0.16447421515616173, + "grad_norm": 0.9158666729927063, + "learning_rate": 0.00019535612200189705, + "loss": 2.7709, + "step": 2038 + }, + { + "epoch": 0.16455491889274473, + "grad_norm": 0.8668088912963867, + "learning_rate": 0.00019535136581158997, + "loss": 2.7994, + "step": 2039 + }, + { + "epoch": 0.16463562262932774, + "grad_norm": 0.9179345369338989, + "learning_rate": 0.00019534660724487764, + "loss": 2.747, + "step": 2040 + }, + { + "epoch": 0.16471632636591074, + "grad_norm": 0.9690881967544556, + "learning_rate": 0.00019534184630187862, + "loss": 2.742, + "step": 2041 + }, + { + "epoch": 0.16479703010249375, + "grad_norm": 0.8478729724884033, + "learning_rate": 0.00019533708298271157, + "loss": 2.7824, + "step": 2042 + }, + { + "epoch": 0.16487773383907675, + "grad_norm": 0.8286584615707397, + "learning_rate": 0.00019533231728749518, + "loss": 2.7263, + "step": 2043 + }, + { + "epoch": 0.16495843757565976, + "grad_norm": 0.8095324039459229, + "learning_rate": 0.00019532754921634826, + "loss": 2.7845, + "step": 2044 + }, + { + "epoch": 0.16503914131224276, + "grad_norm": 0.9552872776985168, + "learning_rate": 0.0001953227787693896, + "loss": 2.7676, + "step": 2045 + }, + { + "epoch": 0.16511984504882576, + "grad_norm": 1.021515130996704, + "learning_rate": 0.00019531800594673815, + "loss": 2.784, + "step": 2046 + }, + { + "epoch": 0.16520054878540877, + "grad_norm": 0.7847293019294739, + "learning_rate": 0.00019531323074851276, + "loss": 2.7319, + "step": 2047 + }, + { + "epoch": 0.16528125252199177, + "grad_norm": 0.7803899049758911, + "learning_rate": 0.0001953084531748326, + "loss": 2.8321, + "step": 2048 + }, + { + "epoch": 0.16536195625857478, + "grad_norm": 0.8687692880630493, + "learning_rate": 0.0001953036732258166, + "loss": 2.763, + "step": 2049 + }, + { + "epoch": 0.16544265999515778, + "grad_norm": 0.8212031126022339, + "learning_rate": 0.00019529889090158392, + "loss": 2.7262, + "step": 2050 + }, + { + "epoch": 0.16552336373174079, + "grad_norm": 0.8460689187049866, + "learning_rate": 0.0001952941062022538, + "loss": 2.8018, + "step": 2051 + }, + { + "epoch": 0.1656040674683238, + "grad_norm": 0.9189361929893494, + "learning_rate": 0.00019528931912794547, + "loss": 2.8079, + "step": 2052 + }, + { + "epoch": 0.1656847712049068, + "grad_norm": 0.9529987573623657, + "learning_rate": 0.00019528452967877816, + "loss": 2.8015, + "step": 2053 + }, + { + "epoch": 0.1657654749414898, + "grad_norm": 0.8468493223190308, + "learning_rate": 0.00019527973785487133, + "loss": 2.8013, + "step": 2054 + }, + { + "epoch": 0.1658461786780728, + "grad_norm": 0.8150945901870728, + "learning_rate": 0.00019527494365634436, + "loss": 2.7975, + "step": 2055 + }, + { + "epoch": 0.1659268824146558, + "grad_norm": 0.814942479133606, + "learning_rate": 0.00019527014708331674, + "loss": 2.7503, + "step": 2056 + }, + { + "epoch": 0.1660075861512388, + "grad_norm": 0.7841517329216003, + "learning_rate": 0.000195265348135908, + "loss": 2.7921, + "step": 2057 + }, + { + "epoch": 0.16608828988782182, + "grad_norm": 0.7603738903999329, + "learning_rate": 0.0001952605468142378, + "loss": 2.7658, + "step": 2058 + }, + { + "epoch": 0.16616899362440482, + "grad_norm": 0.8460882902145386, + "learning_rate": 0.00019525574311842574, + "loss": 2.7644, + "step": 2059 + }, + { + "epoch": 0.16624969736098782, + "grad_norm": 0.8633555173873901, + "learning_rate": 0.00019525093704859156, + "loss": 2.7956, + "step": 2060 + }, + { + "epoch": 0.16633040109757083, + "grad_norm": 0.7700977325439453, + "learning_rate": 0.00019524612860485503, + "loss": 2.7103, + "step": 2061 + }, + { + "epoch": 0.16641110483415383, + "grad_norm": 0.888770580291748, + "learning_rate": 0.00019524131778733602, + "loss": 2.7325, + "step": 2062 + }, + { + "epoch": 0.16649180857073684, + "grad_norm": 0.8338149189949036, + "learning_rate": 0.00019523650459615438, + "loss": 2.7533, + "step": 2063 + }, + { + "epoch": 0.16657251230731984, + "grad_norm": 0.7723987698554993, + "learning_rate": 0.0001952316890314301, + "loss": 2.7316, + "step": 2064 + }, + { + "epoch": 0.16665321604390285, + "grad_norm": 0.8952934145927429, + "learning_rate": 0.0001952268710932832, + "loss": 2.7825, + "step": 2065 + }, + { + "epoch": 0.16673391978048582, + "grad_norm": 0.8201496601104736, + "learning_rate": 0.00019522205078183378, + "loss": 2.7162, + "step": 2066 + }, + { + "epoch": 0.16681462351706883, + "grad_norm": 0.7733781337738037, + "learning_rate": 0.00019521722809720188, + "loss": 2.7834, + "step": 2067 + }, + { + "epoch": 0.16689532725365183, + "grad_norm": 0.8285118937492371, + "learning_rate": 0.0001952124030395078, + "loss": 2.8475, + "step": 2068 + }, + { + "epoch": 0.16697603099023484, + "grad_norm": 0.84097820520401, + "learning_rate": 0.00019520757560887174, + "loss": 2.784, + "step": 2069 + }, + { + "epoch": 0.16705673472681784, + "grad_norm": 0.7336563467979431, + "learning_rate": 0.000195202745805414, + "loss": 2.7663, + "step": 2070 + }, + { + "epoch": 0.16713743846340084, + "grad_norm": 0.8359388113021851, + "learning_rate": 0.000195197913629255, + "loss": 2.7931, + "step": 2071 + }, + { + "epoch": 0.16721814219998385, + "grad_norm": 0.8272559642791748, + "learning_rate": 0.0001951930790805151, + "loss": 2.8578, + "step": 2072 + }, + { + "epoch": 0.16729884593656685, + "grad_norm": 0.7970743179321289, + "learning_rate": 0.00019518824215931487, + "loss": 2.8148, + "step": 2073 + }, + { + "epoch": 0.16737954967314986, + "grad_norm": 0.856200098991394, + "learning_rate": 0.00019518340286577482, + "loss": 2.8067, + "step": 2074 + }, + { + "epoch": 0.16746025340973286, + "grad_norm": 0.7581893801689148, + "learning_rate": 0.00019517856120001556, + "loss": 2.7339, + "step": 2075 + }, + { + "epoch": 0.16754095714631587, + "grad_norm": 0.8488386869430542, + "learning_rate": 0.00019517371716215774, + "loss": 2.7332, + "step": 2076 + }, + { + "epoch": 0.16762166088289887, + "grad_norm": 0.7488275170326233, + "learning_rate": 0.00019516887075232212, + "loss": 2.7734, + "step": 2077 + }, + { + "epoch": 0.16770236461948188, + "grad_norm": 0.9173932075500488, + "learning_rate": 0.00019516402197062945, + "loss": 2.7792, + "step": 2078 + }, + { + "epoch": 0.16778306835606488, + "grad_norm": 0.8200702667236328, + "learning_rate": 0.0001951591708172006, + "loss": 2.8046, + "step": 2079 + }, + { + "epoch": 0.16786377209264788, + "grad_norm": 0.8270781636238098, + "learning_rate": 0.00019515431729215642, + "loss": 2.7467, + "step": 2080 + }, + { + "epoch": 0.1679444758292309, + "grad_norm": 0.8660609722137451, + "learning_rate": 0.00019514946139561799, + "loss": 2.8169, + "step": 2081 + }, + { + "epoch": 0.1680251795658139, + "grad_norm": 0.78753262758255, + "learning_rate": 0.0001951446031277062, + "loss": 2.7388, + "step": 2082 + }, + { + "epoch": 0.1681058833023969, + "grad_norm": 0.791593074798584, + "learning_rate": 0.00019513974248854224, + "loss": 2.8776, + "step": 2083 + }, + { + "epoch": 0.1681865870389799, + "grad_norm": 0.7883535623550415, + "learning_rate": 0.0001951348794782472, + "loss": 2.78, + "step": 2084 + }, + { + "epoch": 0.1682672907755629, + "grad_norm": 0.7877013087272644, + "learning_rate": 0.00019513001409694224, + "loss": 2.7559, + "step": 2085 + }, + { + "epoch": 0.1683479945121459, + "grad_norm": 0.8838450908660889, + "learning_rate": 0.00019512514634474864, + "loss": 2.7489, + "step": 2086 + }, + { + "epoch": 0.16842869824872891, + "grad_norm": 0.7751588821411133, + "learning_rate": 0.00019512027622178775, + "loss": 2.6832, + "step": 2087 + }, + { + "epoch": 0.16850940198531192, + "grad_norm": 0.90345299243927, + "learning_rate": 0.00019511540372818095, + "loss": 2.8189, + "step": 2088 + }, + { + "epoch": 0.16859010572189492, + "grad_norm": 0.7820938229560852, + "learning_rate": 0.00019511052886404966, + "loss": 2.7655, + "step": 2089 + }, + { + "epoch": 0.16867080945847793, + "grad_norm": 0.8250375986099243, + "learning_rate": 0.00019510565162951537, + "loss": 2.7866, + "step": 2090 + }, + { + "epoch": 0.16875151319506093, + "grad_norm": 0.8063845634460449, + "learning_rate": 0.00019510077202469962, + "loss": 2.7774, + "step": 2091 + }, + { + "epoch": 0.16883221693164394, + "grad_norm": 0.7627965807914734, + "learning_rate": 0.00019509589004972403, + "loss": 2.7201, + "step": 2092 + }, + { + "epoch": 0.16891292066822694, + "grad_norm": 0.8392470479011536, + "learning_rate": 0.00019509100570471027, + "loss": 2.7613, + "step": 2093 + }, + { + "epoch": 0.16899362440480994, + "grad_norm": 0.7807552814483643, + "learning_rate": 0.0001950861189897801, + "loss": 2.7451, + "step": 2094 + }, + { + "epoch": 0.16907432814139295, + "grad_norm": 0.7829259634017944, + "learning_rate": 0.00019508122990505528, + "loss": 2.7128, + "step": 2095 + }, + { + "epoch": 0.16915503187797595, + "grad_norm": 0.7793046832084656, + "learning_rate": 0.00019507633845065766, + "loss": 2.7849, + "step": 2096 + }, + { + "epoch": 0.16923573561455896, + "grad_norm": 0.869752824306488, + "learning_rate": 0.00019507144462670915, + "loss": 2.7882, + "step": 2097 + }, + { + "epoch": 0.16931643935114196, + "grad_norm": 0.7550783753395081, + "learning_rate": 0.00019506654843333174, + "loss": 2.7211, + "step": 2098 + }, + { + "epoch": 0.16939714308772497, + "grad_norm": 0.8364891409873962, + "learning_rate": 0.0001950616498706474, + "loss": 2.7171, + "step": 2099 + }, + { + "epoch": 0.16947784682430797, + "grad_norm": 0.8026537299156189, + "learning_rate": 0.0001950567489387783, + "loss": 2.8362, + "step": 2100 + }, + { + "epoch": 0.16955855056089097, + "grad_norm": 0.8073398470878601, + "learning_rate": 0.00019505184563784652, + "loss": 2.7635, + "step": 2101 + }, + { + "epoch": 0.16963925429747398, + "grad_norm": 0.8168368935585022, + "learning_rate": 0.00019504693996797424, + "loss": 2.7553, + "step": 2102 + }, + { + "epoch": 0.16971995803405698, + "grad_norm": 0.7933681011199951, + "learning_rate": 0.0001950420319292838, + "loss": 2.7887, + "step": 2103 + }, + { + "epoch": 0.16980066177064, + "grad_norm": 0.8326540589332581, + "learning_rate": 0.00019503712152189748, + "loss": 2.7844, + "step": 2104 + }, + { + "epoch": 0.169881365507223, + "grad_norm": 0.8357202410697937, + "learning_rate": 0.00019503220874593765, + "loss": 2.7744, + "step": 2105 + }, + { + "epoch": 0.169962069243806, + "grad_norm": 0.8541022539138794, + "learning_rate": 0.00019502729360152676, + "loss": 2.7867, + "step": 2106 + }, + { + "epoch": 0.170042772980389, + "grad_norm": 0.8338841795921326, + "learning_rate": 0.0001950223760887873, + "loss": 2.7208, + "step": 2107 + }, + { + "epoch": 0.170123476716972, + "grad_norm": 0.8824255466461182, + "learning_rate": 0.00019501745620784187, + "loss": 2.7658, + "step": 2108 + }, + { + "epoch": 0.170204180453555, + "grad_norm": 0.7710463404655457, + "learning_rate": 0.00019501253395881306, + "loss": 2.7167, + "step": 2109 + }, + { + "epoch": 0.17028488419013801, + "grad_norm": 0.7740076184272766, + "learning_rate": 0.0001950076093418235, + "loss": 2.7251, + "step": 2110 + }, + { + "epoch": 0.17036558792672102, + "grad_norm": 0.8258434534072876, + "learning_rate": 0.00019500268235699597, + "loss": 2.7533, + "step": 2111 + }, + { + "epoch": 0.17044629166330402, + "grad_norm": 0.8347997069358826, + "learning_rate": 0.00019499775300445326, + "loss": 2.7372, + "step": 2112 + }, + { + "epoch": 0.17052699539988703, + "grad_norm": 0.8246529698371887, + "learning_rate": 0.00019499282128431823, + "loss": 2.7458, + "step": 2113 + }, + { + "epoch": 0.17060769913647003, + "grad_norm": 0.8510704040527344, + "learning_rate": 0.00019498788719671378, + "loss": 2.8144, + "step": 2114 + }, + { + "epoch": 0.17068840287305304, + "grad_norm": 0.7793454527854919, + "learning_rate": 0.00019498295074176286, + "loss": 2.7927, + "step": 2115 + }, + { + "epoch": 0.17076910660963604, + "grad_norm": 0.7888665199279785, + "learning_rate": 0.00019497801191958853, + "loss": 2.7156, + "step": 2116 + }, + { + "epoch": 0.17084981034621902, + "grad_norm": 0.8502812385559082, + "learning_rate": 0.00019497307073031386, + "loss": 2.7906, + "step": 2117 + }, + { + "epoch": 0.17093051408280202, + "grad_norm": 0.8376502990722656, + "learning_rate": 0.00019496812717406203, + "loss": 2.7354, + "step": 2118 + }, + { + "epoch": 0.17101121781938503, + "grad_norm": 0.7974401116371155, + "learning_rate": 0.0001949631812509562, + "loss": 2.7755, + "step": 2119 + }, + { + "epoch": 0.17109192155596803, + "grad_norm": 0.7760190963745117, + "learning_rate": 0.00019495823296111965, + "loss": 2.7694, + "step": 2120 + }, + { + "epoch": 0.17117262529255103, + "grad_norm": 0.7721701860427856, + "learning_rate": 0.00019495328230467575, + "loss": 2.7474, + "step": 2121 + }, + { + "epoch": 0.17125332902913404, + "grad_norm": 0.7360577583312988, + "learning_rate": 0.0001949483292817478, + "loss": 2.8044, + "step": 2122 + }, + { + "epoch": 0.17133403276571704, + "grad_norm": 0.7536107301712036, + "learning_rate": 0.0001949433738924593, + "loss": 2.8165, + "step": 2123 + }, + { + "epoch": 0.17141473650230005, + "grad_norm": 0.7668276429176331, + "learning_rate": 0.00019493841613693375, + "loss": 2.7964, + "step": 2124 + }, + { + "epoch": 0.17149544023888305, + "grad_norm": 0.8323161602020264, + "learning_rate": 0.0001949334560152947, + "loss": 2.7395, + "step": 2125 + }, + { + "epoch": 0.17157614397546606, + "grad_norm": 0.8132179975509644, + "learning_rate": 0.00019492849352766576, + "loss": 2.7511, + "step": 2126 + }, + { + "epoch": 0.17165684771204906, + "grad_norm": 0.7806998491287231, + "learning_rate": 0.0001949235286741706, + "loss": 2.7649, + "step": 2127 + }, + { + "epoch": 0.17173755144863206, + "grad_norm": 0.8315939903259277, + "learning_rate": 0.00019491856145493298, + "loss": 2.7742, + "step": 2128 + }, + { + "epoch": 0.17181825518521507, + "grad_norm": 0.8368063569068909, + "learning_rate": 0.00019491359187007672, + "loss": 2.7667, + "step": 2129 + }, + { + "epoch": 0.17189895892179807, + "grad_norm": 0.9183431267738342, + "learning_rate": 0.0001949086199197256, + "loss": 2.7444, + "step": 2130 + }, + { + "epoch": 0.17197966265838108, + "grad_norm": 0.7824065089225769, + "learning_rate": 0.0001949036456040036, + "loss": 2.7455, + "step": 2131 + }, + { + "epoch": 0.17206036639496408, + "grad_norm": 0.777974009513855, + "learning_rate": 0.00019489866892303468, + "loss": 2.7466, + "step": 2132 + }, + { + "epoch": 0.17214107013154709, + "grad_norm": 0.8068816065788269, + "learning_rate": 0.00019489368987694286, + "loss": 2.7081, + "step": 2133 + }, + { + "epoch": 0.1722217738681301, + "grad_norm": 0.8757622838020325, + "learning_rate": 0.00019488870846585222, + "loss": 2.8005, + "step": 2134 + }, + { + "epoch": 0.1723024776047131, + "grad_norm": 0.7967162728309631, + "learning_rate": 0.00019488372468988693, + "loss": 2.7737, + "step": 2135 + }, + { + "epoch": 0.1723831813412961, + "grad_norm": 0.7700283527374268, + "learning_rate": 0.00019487873854917117, + "loss": 2.7431, + "step": 2136 + }, + { + "epoch": 0.1724638850778791, + "grad_norm": 0.8259130716323853, + "learning_rate": 0.00019487375004382927, + "loss": 2.7635, + "step": 2137 + }, + { + "epoch": 0.1725445888144621, + "grad_norm": 0.8253815770149231, + "learning_rate": 0.0001948687591739855, + "loss": 2.7046, + "step": 2138 + }, + { + "epoch": 0.1726252925510451, + "grad_norm": 0.8087987303733826, + "learning_rate": 0.00019486376593976426, + "loss": 2.7728, + "step": 2139 + }, + { + "epoch": 0.17270599628762812, + "grad_norm": 0.8437588214874268, + "learning_rate": 0.00019485877034128998, + "loss": 2.7606, + "step": 2140 + }, + { + "epoch": 0.17278670002421112, + "grad_norm": 0.8416075110435486, + "learning_rate": 0.00019485377237868723, + "loss": 2.7396, + "step": 2141 + }, + { + "epoch": 0.17286740376079412, + "grad_norm": 0.784275472164154, + "learning_rate": 0.00019484877205208046, + "loss": 2.766, + "step": 2142 + }, + { + "epoch": 0.17294810749737713, + "grad_norm": 0.8082472681999207, + "learning_rate": 0.0001948437693615944, + "loss": 2.8, + "step": 2143 + }, + { + "epoch": 0.17302881123396013, + "grad_norm": 0.8904329538345337, + "learning_rate": 0.00019483876430735365, + "loss": 2.6579, + "step": 2144 + }, + { + "epoch": 0.17310951497054314, + "grad_norm": 0.7864851355552673, + "learning_rate": 0.000194833756889483, + "loss": 2.8231, + "step": 2145 + }, + { + "epoch": 0.17319021870712614, + "grad_norm": 0.7445049285888672, + "learning_rate": 0.00019482874710810723, + "loss": 2.7498, + "step": 2146 + }, + { + "epoch": 0.17327092244370915, + "grad_norm": 0.8266116380691528, + "learning_rate": 0.00019482373496335117, + "loss": 2.7152, + "step": 2147 + }, + { + "epoch": 0.17335162618029215, + "grad_norm": 0.7712300419807434, + "learning_rate": 0.0001948187204553398, + "loss": 2.7751, + "step": 2148 + }, + { + "epoch": 0.17343232991687516, + "grad_norm": 0.7472708225250244, + "learning_rate": 0.00019481370358419807, + "loss": 2.7397, + "step": 2149 + }, + { + "epoch": 0.17351303365345816, + "grad_norm": 0.763454020023346, + "learning_rate": 0.00019480868435005095, + "loss": 2.7174, + "step": 2150 + }, + { + "epoch": 0.17359373739004116, + "grad_norm": 0.8187674283981323, + "learning_rate": 0.00019480366275302362, + "loss": 2.8424, + "step": 2151 + }, + { + "epoch": 0.17367444112662417, + "grad_norm": 0.8183228373527527, + "learning_rate": 0.0001947986387932412, + "loss": 2.7351, + "step": 2152 + }, + { + "epoch": 0.17375514486320717, + "grad_norm": 0.807231605052948, + "learning_rate": 0.00019479361247082884, + "loss": 2.8054, + "step": 2153 + }, + { + "epoch": 0.17383584859979018, + "grad_norm": 0.8383626341819763, + "learning_rate": 0.00019478858378591194, + "loss": 2.7181, + "step": 2154 + }, + { + "epoch": 0.17391655233637318, + "grad_norm": 0.8330298662185669, + "learning_rate": 0.0001947835527386157, + "loss": 2.748, + "step": 2155 + }, + { + "epoch": 0.17399725607295619, + "grad_norm": 0.8433073163032532, + "learning_rate": 0.0001947785193290656, + "loss": 2.8115, + "step": 2156 + }, + { + "epoch": 0.1740779598095392, + "grad_norm": 0.8873384594917297, + "learning_rate": 0.000194773483557387, + "loss": 2.8288, + "step": 2157 + }, + { + "epoch": 0.1741586635461222, + "grad_norm": 0.8399423360824585, + "learning_rate": 0.00019476844542370546, + "loss": 2.7514, + "step": 2158 + }, + { + "epoch": 0.1742393672827052, + "grad_norm": 0.7808830738067627, + "learning_rate": 0.00019476340492814655, + "loss": 2.7003, + "step": 2159 + }, + { + "epoch": 0.1743200710192882, + "grad_norm": 0.8268750905990601, + "learning_rate": 0.00019475836207083589, + "loss": 2.7961, + "step": 2160 + }, + { + "epoch": 0.1744007747558712, + "grad_norm": 0.9144260883331299, + "learning_rate": 0.0001947533168518991, + "loss": 2.769, + "step": 2161 + }, + { + "epoch": 0.1744814784924542, + "grad_norm": 0.8409113883972168, + "learning_rate": 0.000194748269271462, + "loss": 2.8004, + "step": 2162 + }, + { + "epoch": 0.17456218222903722, + "grad_norm": 0.8747037649154663, + "learning_rate": 0.00019474321932965035, + "loss": 2.7602, + "step": 2163 + }, + { + "epoch": 0.17464288596562022, + "grad_norm": 0.8582575917243958, + "learning_rate": 0.00019473816702659, + "loss": 2.7292, + "step": 2164 + }, + { + "epoch": 0.17472358970220322, + "grad_norm": 0.7402843832969666, + "learning_rate": 0.0001947331123624069, + "loss": 2.7287, + "step": 2165 + }, + { + "epoch": 0.17480429343878623, + "grad_norm": 0.8019410967826843, + "learning_rate": 0.000194728055337227, + "loss": 2.7451, + "step": 2166 + }, + { + "epoch": 0.17488499717536923, + "grad_norm": 0.9137046337127686, + "learning_rate": 0.0001947229959511763, + "loss": 2.808, + "step": 2167 + }, + { + "epoch": 0.1749657009119522, + "grad_norm": 0.7539177536964417, + "learning_rate": 0.000194717934204381, + "loss": 2.7031, + "step": 2168 + }, + { + "epoch": 0.17504640464853521, + "grad_norm": 0.8611089587211609, + "learning_rate": 0.00019471287009696715, + "loss": 2.8751, + "step": 2169 + }, + { + "epoch": 0.17512710838511822, + "grad_norm": 0.906134843826294, + "learning_rate": 0.000194707803629061, + "loss": 2.9163, + "step": 2170 + }, + { + "epoch": 0.17520781212170122, + "grad_norm": 0.8066667318344116, + "learning_rate": 0.00019470273480078879, + "loss": 2.7549, + "step": 2171 + }, + { + "epoch": 0.17528851585828423, + "grad_norm": 0.7962325215339661, + "learning_rate": 0.00019469766361227692, + "loss": 2.7964, + "step": 2172 + }, + { + "epoch": 0.17536921959486723, + "grad_norm": 0.7802287340164185, + "learning_rate": 0.0001946925900636517, + "loss": 2.7022, + "step": 2173 + }, + { + "epoch": 0.17544992333145024, + "grad_norm": 0.783478319644928, + "learning_rate": 0.0001946875141550396, + "loss": 2.7798, + "step": 2174 + }, + { + "epoch": 0.17553062706803324, + "grad_norm": 0.8006815314292908, + "learning_rate": 0.00019468243588656713, + "loss": 2.7345, + "step": 2175 + }, + { + "epoch": 0.17561133080461624, + "grad_norm": 0.7566428184509277, + "learning_rate": 0.00019467735525836085, + "loss": 2.7822, + "step": 2176 + }, + { + "epoch": 0.17569203454119925, + "grad_norm": 0.772282600402832, + "learning_rate": 0.0001946722722705474, + "loss": 2.7346, + "step": 2177 + }, + { + "epoch": 0.17577273827778225, + "grad_norm": 0.7808345556259155, + "learning_rate": 0.00019466718692325347, + "loss": 2.755, + "step": 2178 + }, + { + "epoch": 0.17585344201436526, + "grad_norm": 0.8150362372398376, + "learning_rate": 0.00019466209921660576, + "loss": 2.7691, + "step": 2179 + }, + { + "epoch": 0.17593414575094826, + "grad_norm": 0.7952939867973328, + "learning_rate": 0.0001946570091507311, + "loss": 2.8175, + "step": 2180 + }, + { + "epoch": 0.17601484948753127, + "grad_norm": 0.8211334347724915, + "learning_rate": 0.00019465191672575634, + "loss": 2.7561, + "step": 2181 + }, + { + "epoch": 0.17609555322411427, + "grad_norm": 0.7726178765296936, + "learning_rate": 0.00019464682194180838, + "loss": 2.7435, + "step": 2182 + }, + { + "epoch": 0.17617625696069728, + "grad_norm": 0.7614372372627258, + "learning_rate": 0.00019464172479901422, + "loss": 2.7301, + "step": 2183 + }, + { + "epoch": 0.17625696069728028, + "grad_norm": 0.7818898558616638, + "learning_rate": 0.00019463662529750083, + "loss": 2.6964, + "step": 2184 + }, + { + "epoch": 0.17633766443386328, + "grad_norm": 0.7849796414375305, + "learning_rate": 0.0001946315234373954, + "loss": 2.7431, + "step": 2185 + }, + { + "epoch": 0.1764183681704463, + "grad_norm": 0.7939459085464478, + "learning_rate": 0.00019462641921882506, + "loss": 2.7126, + "step": 2186 + }, + { + "epoch": 0.1764990719070293, + "grad_norm": 0.8391629457473755, + "learning_rate": 0.00019462131264191696, + "loss": 2.8394, + "step": 2187 + }, + { + "epoch": 0.1765797756436123, + "grad_norm": 0.7548067569732666, + "learning_rate": 0.0001946162037067984, + "loss": 2.7315, + "step": 2188 + }, + { + "epoch": 0.1766604793801953, + "grad_norm": 0.8278634548187256, + "learning_rate": 0.00019461109241359674, + "loss": 2.8298, + "step": 2189 + }, + { + "epoch": 0.1767411831167783, + "grad_norm": 0.8275949954986572, + "learning_rate": 0.00019460597876243933, + "loss": 2.8072, + "step": 2190 + }, + { + "epoch": 0.1768218868533613, + "grad_norm": 0.7720363140106201, + "learning_rate": 0.00019460086275345363, + "loss": 2.7478, + "step": 2191 + }, + { + "epoch": 0.17690259058994431, + "grad_norm": 0.7795925140380859, + "learning_rate": 0.00019459574438676714, + "loss": 2.7633, + "step": 2192 + }, + { + "epoch": 0.17698329432652732, + "grad_norm": 0.7722043991088867, + "learning_rate": 0.00019459062366250743, + "loss": 2.8001, + "step": 2193 + }, + { + "epoch": 0.17706399806311032, + "grad_norm": 0.8560587763786316, + "learning_rate": 0.00019458550058080212, + "loss": 2.7494, + "step": 2194 + }, + { + "epoch": 0.17714470179969333, + "grad_norm": 0.7473754286766052, + "learning_rate": 0.00019458037514177886, + "loss": 2.7112, + "step": 2195 + }, + { + "epoch": 0.17722540553627633, + "grad_norm": 0.7625827789306641, + "learning_rate": 0.00019457524734556542, + "loss": 2.7496, + "step": 2196 + }, + { + "epoch": 0.17730610927285934, + "grad_norm": 0.7809351682662964, + "learning_rate": 0.00019457011719228962, + "loss": 2.7764, + "step": 2197 + }, + { + "epoch": 0.17738681300944234, + "grad_norm": 0.7846190333366394, + "learning_rate": 0.00019456498468207927, + "loss": 2.7189, + "step": 2198 + }, + { + "epoch": 0.17746751674602534, + "grad_norm": 0.7919551134109497, + "learning_rate": 0.0001945598498150623, + "loss": 2.7798, + "step": 2199 + }, + { + "epoch": 0.17754822048260835, + "grad_norm": 0.796183705329895, + "learning_rate": 0.0001945547125913667, + "loss": 2.7498, + "step": 2200 + }, + { + "epoch": 0.17762892421919135, + "grad_norm": 0.791668176651001, + "learning_rate": 0.0001945495730111205, + "loss": 2.7638, + "step": 2201 + }, + { + "epoch": 0.17770962795577436, + "grad_norm": 0.8303191661834717, + "learning_rate": 0.0001945444310744518, + "loss": 2.8079, + "step": 2202 + }, + { + "epoch": 0.17779033169235736, + "grad_norm": 0.8245917558670044, + "learning_rate": 0.00019453928678148872, + "loss": 2.7222, + "step": 2203 + }, + { + "epoch": 0.17787103542894037, + "grad_norm": 0.793456494808197, + "learning_rate": 0.0001945341401323595, + "loss": 2.8532, + "step": 2204 + }, + { + "epoch": 0.17795173916552337, + "grad_norm": 0.7574856877326965, + "learning_rate": 0.00019452899112719235, + "loss": 2.7361, + "step": 2205 + }, + { + "epoch": 0.17803244290210637, + "grad_norm": 0.7748556733131409, + "learning_rate": 0.0001945238397661157, + "loss": 2.7423, + "step": 2206 + }, + { + "epoch": 0.17811314663868938, + "grad_norm": 0.8973588347434998, + "learning_rate": 0.00019451868604925782, + "loss": 2.7604, + "step": 2207 + }, + { + "epoch": 0.17819385037527238, + "grad_norm": 0.7613589763641357, + "learning_rate": 0.00019451352997674722, + "loss": 2.7168, + "step": 2208 + }, + { + "epoch": 0.1782745541118554, + "grad_norm": 0.8152763247489929, + "learning_rate": 0.00019450837154871243, + "loss": 2.7904, + "step": 2209 + }, + { + "epoch": 0.1783552578484384, + "grad_norm": 0.8115083575248718, + "learning_rate": 0.00019450321076528194, + "loss": 2.7595, + "step": 2210 + }, + { + "epoch": 0.1784359615850214, + "grad_norm": 0.772665798664093, + "learning_rate": 0.00019449804762658438, + "loss": 2.7125, + "step": 2211 + }, + { + "epoch": 0.1785166653216044, + "grad_norm": 0.8002723455429077, + "learning_rate": 0.0001944928821327485, + "loss": 2.8121, + "step": 2212 + }, + { + "epoch": 0.1785973690581874, + "grad_norm": 0.8354858160018921, + "learning_rate": 0.00019448771428390296, + "loss": 2.8662, + "step": 2213 + }, + { + "epoch": 0.1786780727947704, + "grad_norm": 0.7799130082130432, + "learning_rate": 0.0001944825440801766, + "loss": 2.7247, + "step": 2214 + }, + { + "epoch": 0.1787587765313534, + "grad_norm": 0.810265302658081, + "learning_rate": 0.00019447737152169828, + "loss": 2.7095, + "step": 2215 + }, + { + "epoch": 0.17883948026793642, + "grad_norm": 0.8305599093437195, + "learning_rate": 0.00019447219660859687, + "loss": 2.7448, + "step": 2216 + }, + { + "epoch": 0.17892018400451942, + "grad_norm": 0.7899554371833801, + "learning_rate": 0.00019446701934100138, + "loss": 2.7295, + "step": 2217 + }, + { + "epoch": 0.17900088774110243, + "grad_norm": 0.7675672173500061, + "learning_rate": 0.00019446183971904082, + "loss": 2.7236, + "step": 2218 + }, + { + "epoch": 0.1790815914776854, + "grad_norm": 0.8717279434204102, + "learning_rate": 0.0001944566577428443, + "loss": 2.8044, + "step": 2219 + }, + { + "epoch": 0.1791622952142684, + "grad_norm": 0.8151431679725647, + "learning_rate": 0.00019445147341254094, + "loss": 2.7753, + "step": 2220 + }, + { + "epoch": 0.1792429989508514, + "grad_norm": 0.8481619358062744, + "learning_rate": 0.00019444628672825998, + "loss": 2.7954, + "step": 2221 + }, + { + "epoch": 0.17932370268743442, + "grad_norm": 0.8133199214935303, + "learning_rate": 0.00019444109769013065, + "loss": 2.7235, + "step": 2222 + }, + { + "epoch": 0.17940440642401742, + "grad_norm": 0.8250097036361694, + "learning_rate": 0.00019443590629828232, + "loss": 2.8352, + "step": 2223 + }, + { + "epoch": 0.17948511016060043, + "grad_norm": 0.8279787302017212, + "learning_rate": 0.00019443071255284433, + "loss": 2.7513, + "step": 2224 + }, + { + "epoch": 0.17956581389718343, + "grad_norm": 0.7781538963317871, + "learning_rate": 0.00019442551645394612, + "loss": 2.7239, + "step": 2225 + }, + { + "epoch": 0.17964651763376643, + "grad_norm": 0.7718615531921387, + "learning_rate": 0.00019442031800171727, + "loss": 2.7387, + "step": 2226 + }, + { + "epoch": 0.17972722137034944, + "grad_norm": 0.7704512476921082, + "learning_rate": 0.00019441511719628724, + "loss": 2.792, + "step": 2227 + }, + { + "epoch": 0.17980792510693244, + "grad_norm": 0.8290835618972778, + "learning_rate": 0.00019440991403778566, + "loss": 2.7745, + "step": 2228 + }, + { + "epoch": 0.17988862884351545, + "grad_norm": 0.8408392667770386, + "learning_rate": 0.00019440470852634227, + "loss": 2.7688, + "step": 2229 + }, + { + "epoch": 0.17996933258009845, + "grad_norm": 0.8503465056419373, + "learning_rate": 0.00019439950066208676, + "loss": 2.6747, + "step": 2230 + }, + { + "epoch": 0.18005003631668146, + "grad_norm": 0.8213364481925964, + "learning_rate": 0.0001943942904451489, + "loss": 2.7212, + "step": 2231 + }, + { + "epoch": 0.18013074005326446, + "grad_norm": 0.8511209487915039, + "learning_rate": 0.0001943890778756586, + "loss": 2.701, + "step": 2232 + }, + { + "epoch": 0.18021144378984746, + "grad_norm": 0.8034417033195496, + "learning_rate": 0.00019438386295374577, + "loss": 2.7029, + "step": 2233 + }, + { + "epoch": 0.18029214752643047, + "grad_norm": 0.7603715658187866, + "learning_rate": 0.0001943786456795403, + "loss": 2.7201, + "step": 2234 + }, + { + "epoch": 0.18037285126301347, + "grad_norm": 0.9210647940635681, + "learning_rate": 0.0001943734260531723, + "loss": 2.7847, + "step": 2235 + }, + { + "epoch": 0.18045355499959648, + "grad_norm": 0.7429665923118591, + "learning_rate": 0.00019436820407477186, + "loss": 2.7493, + "step": 2236 + }, + { + "epoch": 0.18053425873617948, + "grad_norm": 0.8290510773658752, + "learning_rate": 0.00019436297974446905, + "loss": 2.7711, + "step": 2237 + }, + { + "epoch": 0.18061496247276249, + "grad_norm": 0.7593570947647095, + "learning_rate": 0.0001943577530623941, + "loss": 2.7539, + "step": 2238 + }, + { + "epoch": 0.1806956662093455, + "grad_norm": 0.8222225308418274, + "learning_rate": 0.00019435252402867734, + "loss": 2.7703, + "step": 2239 + }, + { + "epoch": 0.1807763699459285, + "grad_norm": 0.8280842900276184, + "learning_rate": 0.00019434729264344898, + "loss": 2.7966, + "step": 2240 + }, + { + "epoch": 0.1808570736825115, + "grad_norm": 0.8258495926856995, + "learning_rate": 0.00019434205890683952, + "loss": 2.759, + "step": 2241 + }, + { + "epoch": 0.1809377774190945, + "grad_norm": 0.8294420838356018, + "learning_rate": 0.00019433682281897932, + "loss": 2.6996, + "step": 2242 + }, + { + "epoch": 0.1810184811556775, + "grad_norm": 0.8258811235427856, + "learning_rate": 0.0001943315843799989, + "loss": 2.774, + "step": 2243 + }, + { + "epoch": 0.1810991848922605, + "grad_norm": 0.8035838007926941, + "learning_rate": 0.0001943263435900288, + "loss": 2.7806, + "step": 2244 + }, + { + "epoch": 0.18117988862884352, + "grad_norm": 0.7900332808494568, + "learning_rate": 0.00019432110044919964, + "loss": 2.7462, + "step": 2245 + }, + { + "epoch": 0.18126059236542652, + "grad_norm": 0.8126730918884277, + "learning_rate": 0.00019431585495764212, + "loss": 2.6913, + "step": 2246 + }, + { + "epoch": 0.18134129610200952, + "grad_norm": 0.8411321043968201, + "learning_rate": 0.00019431060711548695, + "loss": 2.7503, + "step": 2247 + }, + { + "epoch": 0.18142199983859253, + "grad_norm": 0.7712867856025696, + "learning_rate": 0.0001943053569228649, + "loss": 2.7703, + "step": 2248 + }, + { + "epoch": 0.18150270357517553, + "grad_norm": 0.9093566536903381, + "learning_rate": 0.00019430010437990688, + "loss": 2.7838, + "step": 2249 + }, + { + "epoch": 0.18158340731175854, + "grad_norm": 0.8184913396835327, + "learning_rate": 0.00019429484948674372, + "loss": 2.8167, + "step": 2250 + }, + { + "epoch": 0.18166411104834154, + "grad_norm": 0.7215915322303772, + "learning_rate": 0.00019428959224350643, + "loss": 2.739, + "step": 2251 + }, + { + "epoch": 0.18174481478492455, + "grad_norm": 0.7842726111412048, + "learning_rate": 0.000194284332650326, + "loss": 2.8547, + "step": 2252 + }, + { + "epoch": 0.18182551852150755, + "grad_norm": 0.7758263349533081, + "learning_rate": 0.00019427907070733357, + "loss": 2.7746, + "step": 2253 + }, + { + "epoch": 0.18190622225809056, + "grad_norm": 0.7710500359535217, + "learning_rate": 0.00019427380641466027, + "loss": 2.7415, + "step": 2254 + }, + { + "epoch": 0.18198692599467356, + "grad_norm": 0.8233851194381714, + "learning_rate": 0.00019426853977243724, + "loss": 2.7471, + "step": 2255 + }, + { + "epoch": 0.18206762973125656, + "grad_norm": 0.7856284379959106, + "learning_rate": 0.00019426327078079578, + "loss": 2.6892, + "step": 2256 + }, + { + "epoch": 0.18214833346783957, + "grad_norm": 0.7978290915489197, + "learning_rate": 0.00019425799943986722, + "loss": 2.7346, + "step": 2257 + }, + { + "epoch": 0.18222903720442257, + "grad_norm": 0.8339362740516663, + "learning_rate": 0.00019425272574978293, + "loss": 2.7403, + "step": 2258 + }, + { + "epoch": 0.18230974094100558, + "grad_norm": 0.8035171031951904, + "learning_rate": 0.0001942474497106743, + "loss": 2.7444, + "step": 2259 + }, + { + "epoch": 0.18239044467758858, + "grad_norm": 0.7950475811958313, + "learning_rate": 0.0001942421713226729, + "loss": 2.7218, + "step": 2260 + }, + { + "epoch": 0.18247114841417159, + "grad_norm": 0.8439741730690002, + "learning_rate": 0.00019423689058591022, + "loss": 2.7498, + "step": 2261 + }, + { + "epoch": 0.1825518521507546, + "grad_norm": 0.8585919737815857, + "learning_rate": 0.00019423160750051789, + "loss": 2.7459, + "step": 2262 + }, + { + "epoch": 0.1826325558873376, + "grad_norm": 0.857276201248169, + "learning_rate": 0.00019422632206662755, + "loss": 2.8404, + "step": 2263 + }, + { + "epoch": 0.1827132596239206, + "grad_norm": 0.7692707777023315, + "learning_rate": 0.000194221034284371, + "loss": 2.8069, + "step": 2264 + }, + { + "epoch": 0.1827939633605036, + "grad_norm": 0.9107782244682312, + "learning_rate": 0.00019421574415387998, + "loss": 2.7554, + "step": 2265 + }, + { + "epoch": 0.1828746670970866, + "grad_norm": 0.763300895690918, + "learning_rate": 0.00019421045167528628, + "loss": 2.8031, + "step": 2266 + }, + { + "epoch": 0.1829553708336696, + "grad_norm": 0.8625530004501343, + "learning_rate": 0.0001942051568487219, + "loss": 2.7622, + "step": 2267 + }, + { + "epoch": 0.18303607457025262, + "grad_norm": 0.8483080863952637, + "learning_rate": 0.00019419985967431875, + "loss": 2.7726, + "step": 2268 + }, + { + "epoch": 0.18311677830683562, + "grad_norm": 0.8295309543609619, + "learning_rate": 0.00019419456015220884, + "loss": 2.7676, + "step": 2269 + }, + { + "epoch": 0.1831974820434186, + "grad_norm": 0.812976062297821, + "learning_rate": 0.0001941892582825243, + "loss": 2.745, + "step": 2270 + }, + { + "epoch": 0.1832781857800016, + "grad_norm": 0.799846351146698, + "learning_rate": 0.00019418395406539717, + "loss": 2.7474, + "step": 2271 + }, + { + "epoch": 0.1833588895165846, + "grad_norm": 0.7825174331665039, + "learning_rate": 0.00019417864750095976, + "loss": 2.7982, + "step": 2272 + }, + { + "epoch": 0.1834395932531676, + "grad_norm": 0.8331060409545898, + "learning_rate": 0.00019417333858934424, + "loss": 2.7279, + "step": 2273 + }, + { + "epoch": 0.18352029698975061, + "grad_norm": 0.8579809665679932, + "learning_rate": 0.00019416802733068295, + "loss": 2.7425, + "step": 2274 + }, + { + "epoch": 0.18360100072633362, + "grad_norm": 0.8643589019775391, + "learning_rate": 0.0001941627137251083, + "loss": 2.7369, + "step": 2275 + }, + { + "epoch": 0.18368170446291662, + "grad_norm": 0.9086846113204956, + "learning_rate": 0.00019415739777275265, + "loss": 2.7681, + "step": 2276 + }, + { + "epoch": 0.18376240819949963, + "grad_norm": 0.8442896604537964, + "learning_rate": 0.00019415207947374853, + "loss": 2.7733, + "step": 2277 + }, + { + "epoch": 0.18384311193608263, + "grad_norm": 0.7858592867851257, + "learning_rate": 0.00019414675882822846, + "loss": 2.7726, + "step": 2278 + }, + { + "epoch": 0.18392381567266564, + "grad_norm": 0.8191118240356445, + "learning_rate": 0.00019414143583632503, + "loss": 2.8142, + "step": 2279 + }, + { + "epoch": 0.18400451940924864, + "grad_norm": 0.8093815445899963, + "learning_rate": 0.00019413611049817097, + "loss": 2.7068, + "step": 2280 + }, + { + "epoch": 0.18408522314583164, + "grad_norm": 0.80247563123703, + "learning_rate": 0.00019413078281389895, + "loss": 2.7459, + "step": 2281 + }, + { + "epoch": 0.18416592688241465, + "grad_norm": 0.8200877904891968, + "learning_rate": 0.00019412545278364176, + "loss": 2.6963, + "step": 2282 + }, + { + "epoch": 0.18424663061899765, + "grad_norm": 0.870662271976471, + "learning_rate": 0.00019412012040753224, + "loss": 2.8636, + "step": 2283 + }, + { + "epoch": 0.18432733435558066, + "grad_norm": 0.7626601457595825, + "learning_rate": 0.00019411478568570332, + "loss": 2.8082, + "step": 2284 + }, + { + "epoch": 0.18440803809216366, + "grad_norm": 0.7492787837982178, + "learning_rate": 0.00019410944861828787, + "loss": 2.7231, + "step": 2285 + }, + { + "epoch": 0.18448874182874667, + "grad_norm": 0.8172419667243958, + "learning_rate": 0.000194104109205419, + "loss": 2.7054, + "step": 2286 + }, + { + "epoch": 0.18456944556532967, + "grad_norm": 0.7749670147895813, + "learning_rate": 0.0001940987674472297, + "loss": 2.6907, + "step": 2287 + }, + { + "epoch": 0.18465014930191267, + "grad_norm": 0.8855465054512024, + "learning_rate": 0.00019409342334385316, + "loss": 2.7439, + "step": 2288 + }, + { + "epoch": 0.18473085303849568, + "grad_norm": 0.8066419363021851, + "learning_rate": 0.00019408807689542257, + "loss": 2.7126, + "step": 2289 + }, + { + "epoch": 0.18481155677507868, + "grad_norm": 0.7759004235267639, + "learning_rate": 0.00019408272810207114, + "loss": 2.7207, + "step": 2290 + }, + { + "epoch": 0.1848922605116617, + "grad_norm": 0.8593513369560242, + "learning_rate": 0.00019407737696393215, + "loss": 2.7375, + "step": 2291 + }, + { + "epoch": 0.1849729642482447, + "grad_norm": 0.8154759407043457, + "learning_rate": 0.00019407202348113904, + "loss": 2.7608, + "step": 2292 + }, + { + "epoch": 0.1850536679848277, + "grad_norm": 0.7912892699241638, + "learning_rate": 0.0001940666676538252, + "loss": 2.7886, + "step": 2293 + }, + { + "epoch": 0.1851343717214107, + "grad_norm": 0.9184576272964478, + "learning_rate": 0.0001940613094821241, + "loss": 2.7867, + "step": 2294 + }, + { + "epoch": 0.1852150754579937, + "grad_norm": 0.8114588856697083, + "learning_rate": 0.0001940559489661693, + "loss": 2.8105, + "step": 2295 + }, + { + "epoch": 0.1852957791945767, + "grad_norm": 0.7681595683097839, + "learning_rate": 0.00019405058610609438, + "loss": 2.7707, + "step": 2296 + }, + { + "epoch": 0.18537648293115971, + "grad_norm": 0.7719643712043762, + "learning_rate": 0.000194045220902033, + "loss": 2.6767, + "step": 2297 + }, + { + "epoch": 0.18545718666774272, + "grad_norm": 0.7602487206459045, + "learning_rate": 0.00019403985335411888, + "loss": 2.7698, + "step": 2298 + }, + { + "epoch": 0.18553789040432572, + "grad_norm": 0.8044554591178894, + "learning_rate": 0.00019403448346248578, + "loss": 2.7578, + "step": 2299 + }, + { + "epoch": 0.18561859414090873, + "grad_norm": 0.7830328345298767, + "learning_rate": 0.00019402911122726757, + "loss": 2.7113, + "step": 2300 + }, + { + "epoch": 0.18569929787749173, + "grad_norm": 0.7793100476264954, + "learning_rate": 0.0001940237366485981, + "loss": 2.7388, + "step": 2301 + }, + { + "epoch": 0.18578000161407474, + "grad_norm": 0.9127374887466431, + "learning_rate": 0.00019401835972661133, + "loss": 2.7459, + "step": 2302 + }, + { + "epoch": 0.18586070535065774, + "grad_norm": 0.8007177114486694, + "learning_rate": 0.00019401298046144128, + "loss": 2.776, + "step": 2303 + }, + { + "epoch": 0.18594140908724074, + "grad_norm": 0.7384614944458008, + "learning_rate": 0.000194007598853222, + "loss": 2.6819, + "step": 2304 + }, + { + "epoch": 0.18602211282382375, + "grad_norm": 0.798909068107605, + "learning_rate": 0.0001940022149020876, + "loss": 2.7218, + "step": 2305 + }, + { + "epoch": 0.18610281656040675, + "grad_norm": 0.8388963341712952, + "learning_rate": 0.0001939968286081723, + "loss": 2.8248, + "step": 2306 + }, + { + "epoch": 0.18618352029698976, + "grad_norm": 0.8411754369735718, + "learning_rate": 0.0001939914399716103, + "loss": 2.7575, + "step": 2307 + }, + { + "epoch": 0.18626422403357276, + "grad_norm": 0.7936103343963623, + "learning_rate": 0.00019398604899253594, + "loss": 2.7488, + "step": 2308 + }, + { + "epoch": 0.18634492777015577, + "grad_norm": 0.7913734912872314, + "learning_rate": 0.00019398065567108357, + "loss": 2.7963, + "step": 2309 + }, + { + "epoch": 0.18642563150673877, + "grad_norm": 0.8341575860977173, + "learning_rate": 0.00019397526000738754, + "loss": 2.7698, + "step": 2310 + }, + { + "epoch": 0.18650633524332177, + "grad_norm": 0.8323128819465637, + "learning_rate": 0.00019396986200158244, + "loss": 2.7218, + "step": 2311 + }, + { + "epoch": 0.18658703897990478, + "grad_norm": 0.748073160648346, + "learning_rate": 0.0001939644616538027, + "loss": 2.7798, + "step": 2312 + }, + { + "epoch": 0.18666774271648778, + "grad_norm": 0.8166958689689636, + "learning_rate": 0.00019395905896418296, + "loss": 2.661, + "step": 2313 + }, + { + "epoch": 0.1867484464530708, + "grad_norm": 0.796791672706604, + "learning_rate": 0.00019395365393285786, + "loss": 2.7297, + "step": 2314 + }, + { + "epoch": 0.1868291501896538, + "grad_norm": 0.7851170897483826, + "learning_rate": 0.0001939482465599621, + "loss": 2.7798, + "step": 2315 + }, + { + "epoch": 0.1869098539262368, + "grad_norm": 0.7545836567878723, + "learning_rate": 0.00019394283684563045, + "loss": 2.7327, + "step": 2316 + }, + { + "epoch": 0.1869905576628198, + "grad_norm": 0.8100360631942749, + "learning_rate": 0.00019393742478999776, + "loss": 2.7901, + "step": 2317 + }, + { + "epoch": 0.1870712613994028, + "grad_norm": 0.7874314785003662, + "learning_rate": 0.00019393201039319887, + "loss": 2.7597, + "step": 2318 + }, + { + "epoch": 0.1871519651359858, + "grad_norm": 0.7698730826377869, + "learning_rate": 0.00019392659365536876, + "loss": 2.7327, + "step": 2319 + }, + { + "epoch": 0.1872326688725688, + "grad_norm": 0.7417994141578674, + "learning_rate": 0.0001939211745766424, + "loss": 2.7413, + "step": 2320 + }, + { + "epoch": 0.1873133726091518, + "grad_norm": 0.7823258638381958, + "learning_rate": 0.00019391575315715485, + "loss": 2.7577, + "step": 2321 + }, + { + "epoch": 0.1873940763457348, + "grad_norm": 0.82382732629776, + "learning_rate": 0.00019391032939704124, + "loss": 2.7769, + "step": 2322 + }, + { + "epoch": 0.1874747800823178, + "grad_norm": 0.8405026197433472, + "learning_rate": 0.0001939049032964367, + "loss": 2.8402, + "step": 2323 + }, + { + "epoch": 0.1875554838189008, + "grad_norm": 0.8307906985282898, + "learning_rate": 0.00019389947485547654, + "loss": 2.7642, + "step": 2324 + }, + { + "epoch": 0.1876361875554838, + "grad_norm": 0.8618248701095581, + "learning_rate": 0.000193894044074296, + "loss": 2.7853, + "step": 2325 + }, + { + "epoch": 0.1877168912920668, + "grad_norm": 0.8040831685066223, + "learning_rate": 0.00019388861095303046, + "loss": 2.7467, + "step": 2326 + }, + { + "epoch": 0.18779759502864982, + "grad_norm": 0.7723637223243713, + "learning_rate": 0.0001938831754918153, + "loss": 2.7222, + "step": 2327 + }, + { + "epoch": 0.18787829876523282, + "grad_norm": 0.8189084529876709, + "learning_rate": 0.000193877737690786, + "loss": 2.7857, + "step": 2328 + }, + { + "epoch": 0.18795900250181583, + "grad_norm": 0.8335791826248169, + "learning_rate": 0.00019387229755007805, + "loss": 2.6997, + "step": 2329 + }, + { + "epoch": 0.18803970623839883, + "grad_norm": 0.7732782959938049, + "learning_rate": 0.00019386685506982707, + "loss": 2.7155, + "step": 2330 + }, + { + "epoch": 0.18812040997498183, + "grad_norm": 0.8262906670570374, + "learning_rate": 0.0001938614102501687, + "loss": 2.7638, + "step": 2331 + }, + { + "epoch": 0.18820111371156484, + "grad_norm": 0.7969058156013489, + "learning_rate": 0.00019385596309123862, + "loss": 2.7363, + "step": 2332 + }, + { + "epoch": 0.18828181744814784, + "grad_norm": 0.7834853529930115, + "learning_rate": 0.0001938505135931726, + "loss": 2.7205, + "step": 2333 + }, + { + "epoch": 0.18836252118473085, + "grad_norm": 0.748481810092926, + "learning_rate": 0.00019384506175610647, + "loss": 2.7759, + "step": 2334 + }, + { + "epoch": 0.18844322492131385, + "grad_norm": 0.8137786984443665, + "learning_rate": 0.00019383960758017604, + "loss": 2.828, + "step": 2335 + }, + { + "epoch": 0.18852392865789686, + "grad_norm": 0.8065745234489441, + "learning_rate": 0.00019383415106551734, + "loss": 2.7408, + "step": 2336 + }, + { + "epoch": 0.18860463239447986, + "grad_norm": 0.768643856048584, + "learning_rate": 0.0001938286922122663, + "loss": 2.6503, + "step": 2337 + }, + { + "epoch": 0.18868533613106286, + "grad_norm": 0.7677921652793884, + "learning_rate": 0.00019382323102055897, + "loss": 2.7088, + "step": 2338 + }, + { + "epoch": 0.18876603986764587, + "grad_norm": 0.7648717164993286, + "learning_rate": 0.0001938177674905315, + "loss": 2.7015, + "step": 2339 + }, + { + "epoch": 0.18884674360422887, + "grad_norm": 0.7517116665840149, + "learning_rate": 0.00019381230162231997, + "loss": 2.7095, + "step": 2340 + }, + { + "epoch": 0.18892744734081188, + "grad_norm": 0.8147841691970825, + "learning_rate": 0.00019380683341606067, + "loss": 2.8563, + "step": 2341 + }, + { + "epoch": 0.18900815107739488, + "grad_norm": 0.7849822640419006, + "learning_rate": 0.00019380136287188988, + "loss": 2.7432, + "step": 2342 + }, + { + "epoch": 0.18908885481397789, + "grad_norm": 0.813811719417572, + "learning_rate": 0.0001937958899899439, + "loss": 2.7419, + "step": 2343 + }, + { + "epoch": 0.1891695585505609, + "grad_norm": 0.8142707943916321, + "learning_rate": 0.00019379041477035923, + "loss": 2.7658, + "step": 2344 + }, + { + "epoch": 0.1892502622871439, + "grad_norm": 0.7594506740570068, + "learning_rate": 0.00019378493721327217, + "loss": 2.7298, + "step": 2345 + }, + { + "epoch": 0.1893309660237269, + "grad_norm": 0.8374232053756714, + "learning_rate": 0.00019377945731881936, + "loss": 2.8112, + "step": 2346 + }, + { + "epoch": 0.1894116697603099, + "grad_norm": 0.783608615398407, + "learning_rate": 0.00019377397508713734, + "loss": 2.8168, + "step": 2347 + }, + { + "epoch": 0.1894923734968929, + "grad_norm": 0.720214307308197, + "learning_rate": 0.0001937684905183627, + "loss": 2.7516, + "step": 2348 + }, + { + "epoch": 0.1895730772334759, + "grad_norm": 0.7939600944519043, + "learning_rate": 0.0001937630036126322, + "loss": 2.7609, + "step": 2349 + }, + { + "epoch": 0.18965378097005892, + "grad_norm": 0.787315309047699, + "learning_rate": 0.00019375751437008252, + "loss": 2.758, + "step": 2350 + }, + { + "epoch": 0.18973448470664192, + "grad_norm": 0.7862411141395569, + "learning_rate": 0.00019375202279085053, + "loss": 2.6866, + "step": 2351 + }, + { + "epoch": 0.18981518844322492, + "grad_norm": 0.8651136159896851, + "learning_rate": 0.000193746528875073, + "loss": 2.7488, + "step": 2352 + }, + { + "epoch": 0.18989589217980793, + "grad_norm": 0.8150602579116821, + "learning_rate": 0.00019374103262288696, + "loss": 2.7417, + "step": 2353 + }, + { + "epoch": 0.18997659591639093, + "grad_norm": 0.9053540229797363, + "learning_rate": 0.00019373553403442934, + "loss": 2.7587, + "step": 2354 + }, + { + "epoch": 0.19005729965297394, + "grad_norm": 0.8775703310966492, + "learning_rate": 0.0001937300331098372, + "loss": 2.733, + "step": 2355 + }, + { + "epoch": 0.19013800338955694, + "grad_norm": 0.7714357972145081, + "learning_rate": 0.0001937245298492476, + "loss": 2.7595, + "step": 2356 + }, + { + "epoch": 0.19021870712613995, + "grad_norm": 0.8648017048835754, + "learning_rate": 0.0001937190242527977, + "loss": 2.7944, + "step": 2357 + }, + { + "epoch": 0.19029941086272295, + "grad_norm": 0.9367388486862183, + "learning_rate": 0.00019371351632062477, + "loss": 2.7902, + "step": 2358 + }, + { + "epoch": 0.19038011459930596, + "grad_norm": 0.8116368651390076, + "learning_rate": 0.00019370800605286604, + "loss": 2.7291, + "step": 2359 + }, + { + "epoch": 0.19046081833588896, + "grad_norm": 0.7892753481864929, + "learning_rate": 0.00019370249344965882, + "loss": 2.8192, + "step": 2360 + }, + { + "epoch": 0.19054152207247196, + "grad_norm": 0.8109372854232788, + "learning_rate": 0.00019369697851114056, + "loss": 2.6982, + "step": 2361 + }, + { + "epoch": 0.19062222580905497, + "grad_norm": 0.8756314516067505, + "learning_rate": 0.00019369146123744864, + "loss": 2.744, + "step": 2362 + }, + { + "epoch": 0.19070292954563797, + "grad_norm": 0.7400399446487427, + "learning_rate": 0.00019368594162872058, + "loss": 2.7328, + "step": 2363 + }, + { + "epoch": 0.19078363328222098, + "grad_norm": 0.8223158717155457, + "learning_rate": 0.000193680419685094, + "loss": 2.7614, + "step": 2364 + }, + { + "epoch": 0.19086433701880398, + "grad_norm": 0.7350139617919922, + "learning_rate": 0.00019367489540670645, + "loss": 2.7074, + "step": 2365 + }, + { + "epoch": 0.19094504075538699, + "grad_norm": 0.7915631532669067, + "learning_rate": 0.00019366936879369563, + "loss": 2.7835, + "step": 2366 + }, + { + "epoch": 0.19102574449197, + "grad_norm": 0.7765628099441528, + "learning_rate": 0.00019366383984619932, + "loss": 2.765, + "step": 2367 + }, + { + "epoch": 0.191106448228553, + "grad_norm": 0.8127059936523438, + "learning_rate": 0.00019365830856435525, + "loss": 2.7753, + "step": 2368 + }, + { + "epoch": 0.191187151965136, + "grad_norm": 0.8652897477149963, + "learning_rate": 0.0001936527749483013, + "loss": 2.7137, + "step": 2369 + }, + { + "epoch": 0.191267855701719, + "grad_norm": 0.8086774945259094, + "learning_rate": 0.00019364723899817541, + "loss": 2.7209, + "step": 2370 + }, + { + "epoch": 0.191348559438302, + "grad_norm": 0.7965098023414612, + "learning_rate": 0.00019364170071411554, + "loss": 2.786, + "step": 2371 + }, + { + "epoch": 0.19142926317488498, + "grad_norm": 0.7954064607620239, + "learning_rate": 0.00019363616009625967, + "loss": 2.7508, + "step": 2372 + }, + { + "epoch": 0.191509966911468, + "grad_norm": 0.7835928201675415, + "learning_rate": 0.00019363061714474595, + "loss": 2.7423, + "step": 2373 + }, + { + "epoch": 0.191590670648051, + "grad_norm": 0.8720580339431763, + "learning_rate": 0.0001936250718597125, + "loss": 2.7877, + "step": 2374 + }, + { + "epoch": 0.191671374384634, + "grad_norm": 0.836066484451294, + "learning_rate": 0.00019361952424129747, + "loss": 2.8456, + "step": 2375 + }, + { + "epoch": 0.191752078121217, + "grad_norm": 0.793666660785675, + "learning_rate": 0.00019361397428963923, + "loss": 2.786, + "step": 2376 + }, + { + "epoch": 0.1918327818578, + "grad_norm": 0.8573217391967773, + "learning_rate": 0.000193608422004876, + "loss": 2.7569, + "step": 2377 + }, + { + "epoch": 0.191913485594383, + "grad_norm": 0.81243896484375, + "learning_rate": 0.00019360286738714623, + "loss": 2.771, + "step": 2378 + }, + { + "epoch": 0.19199418933096601, + "grad_norm": 0.7449626326560974, + "learning_rate": 0.00019359731043658832, + "loss": 2.7479, + "step": 2379 + }, + { + "epoch": 0.19207489306754902, + "grad_norm": 0.8124165534973145, + "learning_rate": 0.00019359175115334076, + "loss": 2.7602, + "step": 2380 + }, + { + "epoch": 0.19215559680413202, + "grad_norm": 0.7786986827850342, + "learning_rate": 0.00019358618953754211, + "loss": 2.6926, + "step": 2381 + }, + { + "epoch": 0.19223630054071503, + "grad_norm": 0.7987258434295654, + "learning_rate": 0.000193580625589331, + "loss": 2.7573, + "step": 2382 + }, + { + "epoch": 0.19231700427729803, + "grad_norm": 0.8236463665962219, + "learning_rate": 0.00019357505930884606, + "loss": 2.6755, + "step": 2383 + }, + { + "epoch": 0.19239770801388104, + "grad_norm": 0.8285779356956482, + "learning_rate": 0.00019356949069622602, + "loss": 2.7658, + "step": 2384 + }, + { + "epoch": 0.19247841175046404, + "grad_norm": 0.7823960781097412, + "learning_rate": 0.0001935639197516097, + "loss": 2.7404, + "step": 2385 + }, + { + "epoch": 0.19255911548704704, + "grad_norm": 0.968638002872467, + "learning_rate": 0.00019355834647513591, + "loss": 2.7836, + "step": 2386 + }, + { + "epoch": 0.19263981922363005, + "grad_norm": 0.8170328736305237, + "learning_rate": 0.00019355277086694357, + "loss": 2.7816, + "step": 2387 + }, + { + "epoch": 0.19272052296021305, + "grad_norm": 0.8342583179473877, + "learning_rate": 0.00019354719292717163, + "loss": 2.8204, + "step": 2388 + }, + { + "epoch": 0.19280122669679606, + "grad_norm": 0.8160435557365417, + "learning_rate": 0.0001935416126559591, + "loss": 2.6938, + "step": 2389 + }, + { + "epoch": 0.19288193043337906, + "grad_norm": 0.7888174653053284, + "learning_rate": 0.00019353603005344504, + "loss": 2.6804, + "step": 2390 + }, + { + "epoch": 0.19296263416996207, + "grad_norm": 0.8389205932617188, + "learning_rate": 0.00019353044511976865, + "loss": 2.7571, + "step": 2391 + }, + { + "epoch": 0.19304333790654507, + "grad_norm": 0.7920562028884888, + "learning_rate": 0.00019352485785506906, + "loss": 2.7174, + "step": 2392 + }, + { + "epoch": 0.19312404164312807, + "grad_norm": 0.7853459715843201, + "learning_rate": 0.00019351926825948555, + "loss": 2.7626, + "step": 2393 + }, + { + "epoch": 0.19320474537971108, + "grad_norm": 0.9109459519386292, + "learning_rate": 0.0001935136763331574, + "loss": 2.7568, + "step": 2394 + }, + { + "epoch": 0.19328544911629408, + "grad_norm": 0.7983853816986084, + "learning_rate": 0.00019350808207622397, + "loss": 2.7412, + "step": 2395 + }, + { + "epoch": 0.1933661528528771, + "grad_norm": 0.7416854500770569, + "learning_rate": 0.00019350248548882472, + "loss": 2.7335, + "step": 2396 + }, + { + "epoch": 0.1934468565894601, + "grad_norm": 0.7305171489715576, + "learning_rate": 0.0001934968865710991, + "loss": 2.7295, + "step": 2397 + }, + { + "epoch": 0.1935275603260431, + "grad_norm": 0.7717033624649048, + "learning_rate": 0.0001934912853231867, + "loss": 2.7568, + "step": 2398 + }, + { + "epoch": 0.1936082640626261, + "grad_norm": 0.7833831906318665, + "learning_rate": 0.00019348568174522705, + "loss": 2.736, + "step": 2399 + }, + { + "epoch": 0.1936889677992091, + "grad_norm": 0.872831404209137, + "learning_rate": 0.00019348007583735983, + "loss": 2.7719, + "step": 2400 + }, + { + "epoch": 0.1937696715357921, + "grad_norm": 0.8389193415641785, + "learning_rate": 0.0001934744675997248, + "loss": 2.7572, + "step": 2401 + }, + { + "epoch": 0.19385037527237511, + "grad_norm": 0.8442249298095703, + "learning_rate": 0.00019346885703246165, + "loss": 2.8117, + "step": 2402 + }, + { + "epoch": 0.19393107900895812, + "grad_norm": 0.8451170325279236, + "learning_rate": 0.00019346324413571027, + "loss": 2.7216, + "step": 2403 + }, + { + "epoch": 0.19401178274554112, + "grad_norm": 0.898529052734375, + "learning_rate": 0.00019345762890961052, + "loss": 2.8119, + "step": 2404 + }, + { + "epoch": 0.19409248648212413, + "grad_norm": 0.8302313685417175, + "learning_rate": 0.00019345201135430236, + "loss": 2.76, + "step": 2405 + }, + { + "epoch": 0.19417319021870713, + "grad_norm": 0.8975207209587097, + "learning_rate": 0.00019344639146992582, + "loss": 2.8043, + "step": 2406 + }, + { + "epoch": 0.19425389395529014, + "grad_norm": 0.8972581028938293, + "learning_rate": 0.0001934407692566209, + "loss": 2.7487, + "step": 2407 + }, + { + "epoch": 0.19433459769187314, + "grad_norm": 0.8311447501182556, + "learning_rate": 0.00019343514471452776, + "loss": 2.7653, + "step": 2408 + }, + { + "epoch": 0.19441530142845614, + "grad_norm": 0.8336243033409119, + "learning_rate": 0.0001934295178437866, + "loss": 2.753, + "step": 2409 + }, + { + "epoch": 0.19449600516503915, + "grad_norm": 0.8339207172393799, + "learning_rate": 0.0001934238886445376, + "loss": 2.7643, + "step": 2410 + }, + { + "epoch": 0.19457670890162215, + "grad_norm": 0.906074583530426, + "learning_rate": 0.0001934182571169211, + "loss": 2.7777, + "step": 2411 + }, + { + "epoch": 0.19465741263820516, + "grad_norm": 0.8759943246841431, + "learning_rate": 0.00019341262326107742, + "loss": 2.77, + "step": 2412 + }, + { + "epoch": 0.19473811637478816, + "grad_norm": 0.8399369716644287, + "learning_rate": 0.00019340698707714699, + "loss": 2.752, + "step": 2413 + }, + { + "epoch": 0.19481882011137117, + "grad_norm": 0.8551808595657349, + "learning_rate": 0.00019340134856527026, + "loss": 2.6727, + "step": 2414 + }, + { + "epoch": 0.19489952384795417, + "grad_norm": 0.7660732865333557, + "learning_rate": 0.00019339570772558778, + "loss": 2.7491, + "step": 2415 + }, + { + "epoch": 0.19498022758453717, + "grad_norm": 0.8257685303688049, + "learning_rate": 0.00019339006455824015, + "loss": 2.7584, + "step": 2416 + }, + { + "epoch": 0.19506093132112018, + "grad_norm": 0.797275960445404, + "learning_rate": 0.00019338441906336794, + "loss": 2.7051, + "step": 2417 + }, + { + "epoch": 0.19514163505770318, + "grad_norm": 0.8311913013458252, + "learning_rate": 0.00019337877124111193, + "loss": 2.8084, + "step": 2418 + }, + { + "epoch": 0.1952223387942862, + "grad_norm": 0.7995893359184265, + "learning_rate": 0.0001933731210916128, + "loss": 2.7556, + "step": 2419 + }, + { + "epoch": 0.1953030425308692, + "grad_norm": 0.792850136756897, + "learning_rate": 0.00019336746861501147, + "loss": 2.7289, + "step": 2420 + }, + { + "epoch": 0.1953837462674522, + "grad_norm": 0.8058848977088928, + "learning_rate": 0.00019336181381144873, + "loss": 2.7394, + "step": 2421 + }, + { + "epoch": 0.1954644500040352, + "grad_norm": 0.8267124891281128, + "learning_rate": 0.00019335615668106555, + "loss": 2.771, + "step": 2422 + }, + { + "epoch": 0.19554515374061818, + "grad_norm": 0.7641060948371887, + "learning_rate": 0.00019335049722400292, + "loss": 2.7311, + "step": 2423 + }, + { + "epoch": 0.19562585747720118, + "grad_norm": 0.8023245930671692, + "learning_rate": 0.00019334483544040186, + "loss": 2.7658, + "step": 2424 + }, + { + "epoch": 0.19570656121378419, + "grad_norm": 0.8341927528381348, + "learning_rate": 0.00019333917133040348, + "loss": 2.7476, + "step": 2425 + }, + { + "epoch": 0.1957872649503672, + "grad_norm": 0.7985726594924927, + "learning_rate": 0.000193333504894149, + "loss": 2.7362, + "step": 2426 + }, + { + "epoch": 0.1958679686869502, + "grad_norm": 0.7267594933509827, + "learning_rate": 0.0001933278361317796, + "loss": 2.6875, + "step": 2427 + }, + { + "epoch": 0.1959486724235332, + "grad_norm": 0.8292990326881409, + "learning_rate": 0.00019332216504343652, + "loss": 2.7619, + "step": 2428 + }, + { + "epoch": 0.1960293761601162, + "grad_norm": 0.7549588680267334, + "learning_rate": 0.00019331649162926116, + "loss": 2.7385, + "step": 2429 + }, + { + "epoch": 0.1961100798966992, + "grad_norm": 0.7688446640968323, + "learning_rate": 0.0001933108158893949, + "loss": 2.7544, + "step": 2430 + }, + { + "epoch": 0.1961907836332822, + "grad_norm": 0.8168436884880066, + "learning_rate": 0.00019330513782397918, + "loss": 2.8013, + "step": 2431 + }, + { + "epoch": 0.19627148736986522, + "grad_norm": 0.8405759334564209, + "learning_rate": 0.00019329945743315556, + "loss": 2.7299, + "step": 2432 + }, + { + "epoch": 0.19635219110644822, + "grad_norm": 0.79430091381073, + "learning_rate": 0.00019329377471706554, + "loss": 2.7293, + "step": 2433 + }, + { + "epoch": 0.19643289484303122, + "grad_norm": 0.8428656458854675, + "learning_rate": 0.0001932880896758508, + "loss": 2.8211, + "step": 2434 + }, + { + "epoch": 0.19651359857961423, + "grad_norm": 0.7883139252662659, + "learning_rate": 0.00019328240230965298, + "loss": 2.6943, + "step": 2435 + }, + { + "epoch": 0.19659430231619723, + "grad_norm": 0.7539335489273071, + "learning_rate": 0.00019327671261861387, + "loss": 2.6926, + "step": 2436 + }, + { + "epoch": 0.19667500605278024, + "grad_norm": 0.9986057281494141, + "learning_rate": 0.00019327102060287524, + "loss": 2.7851, + "step": 2437 + }, + { + "epoch": 0.19675570978936324, + "grad_norm": 0.7716113924980164, + "learning_rate": 0.000193265326262579, + "loss": 2.752, + "step": 2438 + }, + { + "epoch": 0.19683641352594625, + "grad_norm": 0.9134296774864197, + "learning_rate": 0.000193259629597867, + "loss": 2.7698, + "step": 2439 + }, + { + "epoch": 0.19691711726252925, + "grad_norm": 0.7966345548629761, + "learning_rate": 0.00019325393060888124, + "loss": 2.7839, + "step": 2440 + }, + { + "epoch": 0.19699782099911226, + "grad_norm": 0.8051251173019409, + "learning_rate": 0.0001932482292957638, + "loss": 2.7322, + "step": 2441 + }, + { + "epoch": 0.19707852473569526, + "grad_norm": 0.843169629573822, + "learning_rate": 0.0001932425256586567, + "loss": 2.8263, + "step": 2442 + }, + { + "epoch": 0.19715922847227826, + "grad_norm": 0.7552370429039001, + "learning_rate": 0.00019323681969770213, + "loss": 2.7342, + "step": 2443 + }, + { + "epoch": 0.19723993220886127, + "grad_norm": 0.844473123550415, + "learning_rate": 0.0001932311114130423, + "loss": 2.776, + "step": 2444 + }, + { + "epoch": 0.19732063594544427, + "grad_norm": 0.8002473711967468, + "learning_rate": 0.00019322540080481945, + "loss": 2.7382, + "step": 2445 + }, + { + "epoch": 0.19740133968202728, + "grad_norm": 0.8564329147338867, + "learning_rate": 0.00019321968787317594, + "loss": 2.7592, + "step": 2446 + }, + { + "epoch": 0.19748204341861028, + "grad_norm": 0.7853825688362122, + "learning_rate": 0.00019321397261825408, + "loss": 2.7101, + "step": 2447 + }, + { + "epoch": 0.19756274715519329, + "grad_norm": 0.8482939004898071, + "learning_rate": 0.0001932082550401964, + "loss": 2.7891, + "step": 2448 + }, + { + "epoch": 0.1976434508917763, + "grad_norm": 0.8361770510673523, + "learning_rate": 0.00019320253513914536, + "loss": 2.7341, + "step": 2449 + }, + { + "epoch": 0.1977241546283593, + "grad_norm": 0.7814618945121765, + "learning_rate": 0.0001931968129152435, + "loss": 2.771, + "step": 2450 + }, + { + "epoch": 0.1978048583649423, + "grad_norm": 0.7588146924972534, + "learning_rate": 0.00019319108836863343, + "loss": 2.7577, + "step": 2451 + }, + { + "epoch": 0.1978855621015253, + "grad_norm": 0.9184895157814026, + "learning_rate": 0.00019318536149945785, + "loss": 2.7711, + "step": 2452 + }, + { + "epoch": 0.1979662658381083, + "grad_norm": 0.8454298973083496, + "learning_rate": 0.00019317963230785947, + "loss": 2.7748, + "step": 2453 + }, + { + "epoch": 0.1980469695746913, + "grad_norm": 0.7662420868873596, + "learning_rate": 0.0001931739007939811, + "loss": 2.7704, + "step": 2454 + }, + { + "epoch": 0.19812767331127432, + "grad_norm": 0.837888777256012, + "learning_rate": 0.0001931681669579655, + "loss": 2.7613, + "step": 2455 + }, + { + "epoch": 0.19820837704785732, + "grad_norm": 0.7835226058959961, + "learning_rate": 0.0001931624307999557, + "loss": 2.6888, + "step": 2456 + }, + { + "epoch": 0.19828908078444032, + "grad_norm": 0.8491464257240295, + "learning_rate": 0.00019315669232009456, + "loss": 2.7521, + "step": 2457 + }, + { + "epoch": 0.19836978452102333, + "grad_norm": 0.7590088248252869, + "learning_rate": 0.00019315095151852516, + "loss": 2.7441, + "step": 2458 + }, + { + "epoch": 0.19845048825760633, + "grad_norm": 0.9316127300262451, + "learning_rate": 0.00019314520839539052, + "loss": 2.786, + "step": 2459 + }, + { + "epoch": 0.19853119199418934, + "grad_norm": 0.7819615006446838, + "learning_rate": 0.0001931394629508338, + "loss": 2.7003, + "step": 2460 + }, + { + "epoch": 0.19861189573077234, + "grad_norm": 0.7675932049751282, + "learning_rate": 0.0001931337151849982, + "loss": 2.7065, + "step": 2461 + }, + { + "epoch": 0.19869259946735535, + "grad_norm": 0.7797678112983704, + "learning_rate": 0.000193127965098027, + "loss": 2.7605, + "step": 2462 + }, + { + "epoch": 0.19877330320393835, + "grad_norm": 0.789544403553009, + "learning_rate": 0.00019312221269006345, + "loss": 2.7913, + "step": 2463 + }, + { + "epoch": 0.19885400694052136, + "grad_norm": 0.9594957232475281, + "learning_rate": 0.00019311645796125094, + "loss": 2.785, + "step": 2464 + }, + { + "epoch": 0.19893471067710436, + "grad_norm": 0.8154739141464233, + "learning_rate": 0.00019311070091173287, + "loss": 2.6716, + "step": 2465 + }, + { + "epoch": 0.19901541441368736, + "grad_norm": 0.9042142629623413, + "learning_rate": 0.00019310494154165274, + "loss": 2.734, + "step": 2466 + }, + { + "epoch": 0.19909611815027037, + "grad_norm": 0.7803483605384827, + "learning_rate": 0.0001930991798511541, + "loss": 2.7052, + "step": 2467 + }, + { + "epoch": 0.19917682188685337, + "grad_norm": 0.7917614579200745, + "learning_rate": 0.00019309341584038055, + "loss": 2.728, + "step": 2468 + }, + { + "epoch": 0.19925752562343638, + "grad_norm": 0.8295063376426697, + "learning_rate": 0.00019308764950947568, + "loss": 2.7496, + "step": 2469 + }, + { + "epoch": 0.19933822936001938, + "grad_norm": 0.790831983089447, + "learning_rate": 0.0001930818808585833, + "loss": 2.7356, + "step": 2470 + }, + { + "epoch": 0.19941893309660239, + "grad_norm": 0.8527843952178955, + "learning_rate": 0.0001930761098878471, + "loss": 2.718, + "step": 2471 + }, + { + "epoch": 0.1994996368331854, + "grad_norm": 0.8518494367599487, + "learning_rate": 0.00019307033659741096, + "loss": 2.7189, + "step": 2472 + }, + { + "epoch": 0.1995803405697684, + "grad_norm": 0.8027220368385315, + "learning_rate": 0.00019306456098741872, + "loss": 2.7272, + "step": 2473 + }, + { + "epoch": 0.19966104430635137, + "grad_norm": 0.7516468167304993, + "learning_rate": 0.00019305878305801434, + "loss": 2.798, + "step": 2474 + }, + { + "epoch": 0.19974174804293438, + "grad_norm": 0.7676397562026978, + "learning_rate": 0.00019305300280934187, + "loss": 2.8076, + "step": 2475 + }, + { + "epoch": 0.19982245177951738, + "grad_norm": 0.8237762451171875, + "learning_rate": 0.00019304722024154528, + "loss": 2.6998, + "step": 2476 + }, + { + "epoch": 0.19990315551610038, + "grad_norm": 0.8397759199142456, + "learning_rate": 0.0001930414353547688, + "loss": 2.806, + "step": 2477 + }, + { + "epoch": 0.1999838592526834, + "grad_norm": 0.8911117911338806, + "learning_rate": 0.00019303564814915645, + "loss": 2.7566, + "step": 2478 + }, + { + "epoch": 0.2000645629892664, + "grad_norm": 0.765404999256134, + "learning_rate": 0.00019302985862485264, + "loss": 2.7363, + "step": 2479 + }, + { + "epoch": 0.2001452667258494, + "grad_norm": 0.7898589372634888, + "learning_rate": 0.0001930240667820015, + "loss": 2.7007, + "step": 2480 + }, + { + "epoch": 0.2002259704624324, + "grad_norm": 0.7581521272659302, + "learning_rate": 0.0001930182726207475, + "loss": 2.7508, + "step": 2481 + }, + { + "epoch": 0.2003066741990154, + "grad_norm": 0.8179795742034912, + "learning_rate": 0.00019301247614123495, + "loss": 2.7327, + "step": 2482 + }, + { + "epoch": 0.2003873779355984, + "grad_norm": 0.8103611469268799, + "learning_rate": 0.00019300667734360838, + "loss": 2.7869, + "step": 2483 + }, + { + "epoch": 0.20046808167218141, + "grad_norm": 0.7368054389953613, + "learning_rate": 0.0001930008762280123, + "loss": 2.73, + "step": 2484 + }, + { + "epoch": 0.20054878540876442, + "grad_norm": 0.7679662108421326, + "learning_rate": 0.00019299507279459127, + "loss": 2.7905, + "step": 2485 + }, + { + "epoch": 0.20062948914534742, + "grad_norm": 0.7783839702606201, + "learning_rate": 0.0001929892670434899, + "loss": 2.6816, + "step": 2486 + }, + { + "epoch": 0.20071019288193043, + "grad_norm": 0.7575809359550476, + "learning_rate": 0.00019298345897485298, + "loss": 2.7351, + "step": 2487 + }, + { + "epoch": 0.20079089661851343, + "grad_norm": 0.7674959301948547, + "learning_rate": 0.00019297764858882514, + "loss": 2.7682, + "step": 2488 + }, + { + "epoch": 0.20087160035509644, + "grad_norm": 0.7972592115402222, + "learning_rate": 0.00019297183588555127, + "loss": 2.782, + "step": 2489 + }, + { + "epoch": 0.20095230409167944, + "grad_norm": 0.8417105674743652, + "learning_rate": 0.00019296602086517624, + "loss": 2.8173, + "step": 2490 + }, + { + "epoch": 0.20103300782826244, + "grad_norm": 0.7194239497184753, + "learning_rate": 0.00019296020352784496, + "loss": 2.7735, + "step": 2491 + }, + { + "epoch": 0.20111371156484545, + "grad_norm": 0.801895022392273, + "learning_rate": 0.00019295438387370237, + "loss": 2.7018, + "step": 2492 + }, + { + "epoch": 0.20119441530142845, + "grad_norm": 0.900943398475647, + "learning_rate": 0.0001929485619028936, + "loss": 2.77, + "step": 2493 + }, + { + "epoch": 0.20127511903801146, + "grad_norm": 0.7882106304168701, + "learning_rate": 0.00019294273761556366, + "loss": 2.7195, + "step": 2494 + }, + { + "epoch": 0.20135582277459446, + "grad_norm": 0.7471950054168701, + "learning_rate": 0.00019293691101185775, + "loss": 2.7346, + "step": 2495 + }, + { + "epoch": 0.20143652651117747, + "grad_norm": 0.7498352527618408, + "learning_rate": 0.00019293108209192104, + "loss": 2.7255, + "step": 2496 + }, + { + "epoch": 0.20151723024776047, + "grad_norm": 0.8233164548873901, + "learning_rate": 0.0001929252508558989, + "loss": 2.8253, + "step": 2497 + }, + { + "epoch": 0.20159793398434347, + "grad_norm": 0.7533289790153503, + "learning_rate": 0.00019291941730393658, + "loss": 2.7487, + "step": 2498 + }, + { + "epoch": 0.20167863772092648, + "grad_norm": 0.7372691035270691, + "learning_rate": 0.0001929135814361795, + "loss": 2.6799, + "step": 2499 + }, + { + "epoch": 0.20175934145750948, + "grad_norm": 0.7760890126228333, + "learning_rate": 0.00019290774325277305, + "loss": 2.8366, + "step": 2500 + }, + { + "epoch": 0.2018400451940925, + "grad_norm": 0.7653746008872986, + "learning_rate": 0.0001929019027538628, + "loss": 2.7413, + "step": 2501 + }, + { + "epoch": 0.2019207489306755, + "grad_norm": 0.7364951372146606, + "learning_rate": 0.0001928960599395943, + "loss": 2.7405, + "step": 2502 + }, + { + "epoch": 0.2020014526672585, + "grad_norm": 0.8317872285842896, + "learning_rate": 0.00019289021481011314, + "loss": 2.7186, + "step": 2503 + }, + { + "epoch": 0.2020821564038415, + "grad_norm": 0.8325691223144531, + "learning_rate": 0.00019288436736556502, + "loss": 2.7305, + "step": 2504 + }, + { + "epoch": 0.2021628601404245, + "grad_norm": 0.7674683332443237, + "learning_rate": 0.00019287851760609566, + "loss": 2.7171, + "step": 2505 + }, + { + "epoch": 0.2022435638770075, + "grad_norm": 0.8043155074119568, + "learning_rate": 0.00019287266553185084, + "loss": 2.7425, + "step": 2506 + }, + { + "epoch": 0.2023242676135905, + "grad_norm": 0.8522058725357056, + "learning_rate": 0.00019286681114297642, + "loss": 2.7764, + "step": 2507 + }, + { + "epoch": 0.20240497135017352, + "grad_norm": 0.7700086236000061, + "learning_rate": 0.00019286095443961832, + "loss": 2.7499, + "step": 2508 + }, + { + "epoch": 0.20248567508675652, + "grad_norm": 0.8078013062477112, + "learning_rate": 0.0001928550954219225, + "loss": 2.7863, + "step": 2509 + }, + { + "epoch": 0.20256637882333953, + "grad_norm": 0.7431712746620178, + "learning_rate": 0.00019284923409003496, + "loss": 2.8296, + "step": 2510 + }, + { + "epoch": 0.20264708255992253, + "grad_norm": 0.753754734992981, + "learning_rate": 0.00019284337044410182, + "loss": 2.722, + "step": 2511 + }, + { + "epoch": 0.20272778629650554, + "grad_norm": 0.8117631077766418, + "learning_rate": 0.00019283750448426918, + "loss": 2.7718, + "step": 2512 + }, + { + "epoch": 0.20280849003308854, + "grad_norm": 0.9149020910263062, + "learning_rate": 0.00019283163621068325, + "loss": 2.7416, + "step": 2513 + }, + { + "epoch": 0.20288919376967154, + "grad_norm": 0.8240262866020203, + "learning_rate": 0.0001928257656234903, + "loss": 2.811, + "step": 2514 + }, + { + "epoch": 0.20296989750625455, + "grad_norm": 0.7394035458564758, + "learning_rate": 0.00019281989272283657, + "loss": 2.7345, + "step": 2515 + }, + { + "epoch": 0.20305060124283755, + "grad_norm": 0.7827345132827759, + "learning_rate": 0.00019281401750886854, + "loss": 2.7955, + "step": 2516 + }, + { + "epoch": 0.20313130497942056, + "grad_norm": 0.7482333183288574, + "learning_rate": 0.00019280813998173252, + "loss": 2.6963, + "step": 2517 + }, + { + "epoch": 0.20321200871600356, + "grad_norm": 0.8187180757522583, + "learning_rate": 0.00019280226014157509, + "loss": 2.7413, + "step": 2518 + }, + { + "epoch": 0.20329271245258657, + "grad_norm": 0.7708666920661926, + "learning_rate": 0.00019279637798854274, + "loss": 2.7636, + "step": 2519 + }, + { + "epoch": 0.20337341618916957, + "grad_norm": 0.7414180040359497, + "learning_rate": 0.00019279049352278208, + "loss": 2.7321, + "step": 2520 + }, + { + "epoch": 0.20345411992575257, + "grad_norm": 0.8172248601913452, + "learning_rate": 0.00019278460674443975, + "loss": 2.8026, + "step": 2521 + }, + { + "epoch": 0.20353482366233558, + "grad_norm": 0.7463089227676392, + "learning_rate": 0.0001927787176536625, + "loss": 2.74, + "step": 2522 + }, + { + "epoch": 0.20361552739891858, + "grad_norm": 0.7684210538864136, + "learning_rate": 0.00019277282625059704, + "loss": 2.782, + "step": 2523 + }, + { + "epoch": 0.2036962311355016, + "grad_norm": 0.9246797561645508, + "learning_rate": 0.00019276693253539027, + "loss": 2.8546, + "step": 2524 + }, + { + "epoch": 0.20377693487208456, + "grad_norm": 0.753753125667572, + "learning_rate": 0.00019276103650818906, + "loss": 2.7422, + "step": 2525 + }, + { + "epoch": 0.20385763860866757, + "grad_norm": 0.7461897134780884, + "learning_rate": 0.00019275513816914032, + "loss": 2.7575, + "step": 2526 + }, + { + "epoch": 0.20393834234525057, + "grad_norm": 0.7555257081985474, + "learning_rate": 0.00019274923751839106, + "loss": 2.7423, + "step": 2527 + }, + { + "epoch": 0.20401904608183358, + "grad_norm": 0.7628511786460876, + "learning_rate": 0.00019274333455608837, + "loss": 2.7386, + "step": 2528 + }, + { + "epoch": 0.20409974981841658, + "grad_norm": 0.7529371976852417, + "learning_rate": 0.00019273742928237937, + "loss": 2.6852, + "step": 2529 + }, + { + "epoch": 0.20418045355499959, + "grad_norm": 0.7466779351234436, + "learning_rate": 0.00019273152169741118, + "loss": 2.6996, + "step": 2530 + }, + { + "epoch": 0.2042611572915826, + "grad_norm": 0.7916153073310852, + "learning_rate": 0.0001927256118013311, + "loss": 2.7644, + "step": 2531 + }, + { + "epoch": 0.2043418610281656, + "grad_norm": 0.7662972211837769, + "learning_rate": 0.00019271969959428636, + "loss": 2.7497, + "step": 2532 + }, + { + "epoch": 0.2044225647647486, + "grad_norm": 0.8244680166244507, + "learning_rate": 0.00019271378507642432, + "loss": 2.7598, + "step": 2533 + }, + { + "epoch": 0.2045032685013316, + "grad_norm": 0.7721532583236694, + "learning_rate": 0.00019270786824789244, + "loss": 2.7303, + "step": 2534 + }, + { + "epoch": 0.2045839722379146, + "grad_norm": 0.7598209381103516, + "learning_rate": 0.0001927019491088381, + "loss": 2.734, + "step": 2535 + }, + { + "epoch": 0.2046646759744976, + "grad_norm": 0.7778685092926025, + "learning_rate": 0.00019269602765940887, + "loss": 2.7113, + "step": 2536 + }, + { + "epoch": 0.20474537971108062, + "grad_norm": 0.7447141408920288, + "learning_rate": 0.00019269010389975235, + "loss": 2.7205, + "step": 2537 + }, + { + "epoch": 0.20482608344766362, + "grad_norm": 0.8066664338111877, + "learning_rate": 0.00019268417783001613, + "loss": 2.7637, + "step": 2538 + }, + { + "epoch": 0.20490678718424662, + "grad_norm": 0.7055318355560303, + "learning_rate": 0.00019267824945034794, + "loss": 2.6936, + "step": 2539 + }, + { + "epoch": 0.20498749092082963, + "grad_norm": 0.832647979259491, + "learning_rate": 0.0001926723187608955, + "loss": 2.7423, + "step": 2540 + }, + { + "epoch": 0.20506819465741263, + "grad_norm": 0.7316983938217163, + "learning_rate": 0.0001926663857618066, + "loss": 2.7136, + "step": 2541 + }, + { + "epoch": 0.20514889839399564, + "grad_norm": 0.8115554451942444, + "learning_rate": 0.00019266045045322915, + "loss": 2.6964, + "step": 2542 + }, + { + "epoch": 0.20522960213057864, + "grad_norm": 0.802573025226593, + "learning_rate": 0.00019265451283531108, + "loss": 2.7989, + "step": 2543 + }, + { + "epoch": 0.20531030586716165, + "grad_norm": 0.7073348164558411, + "learning_rate": 0.00019264857290820033, + "loss": 2.7399, + "step": 2544 + }, + { + "epoch": 0.20539100960374465, + "grad_norm": 0.7749258279800415, + "learning_rate": 0.00019264263067204495, + "loss": 2.7321, + "step": 2545 + }, + { + "epoch": 0.20547171334032766, + "grad_norm": 0.7473557591438293, + "learning_rate": 0.00019263668612699305, + "loss": 2.7774, + "step": 2546 + }, + { + "epoch": 0.20555241707691066, + "grad_norm": 0.8073423504829407, + "learning_rate": 0.0001926307392731928, + "loss": 2.7429, + "step": 2547 + }, + { + "epoch": 0.20563312081349366, + "grad_norm": 0.9106586575508118, + "learning_rate": 0.00019262479011079235, + "loss": 2.7972, + "step": 2548 + }, + { + "epoch": 0.20571382455007667, + "grad_norm": 0.7975970506668091, + "learning_rate": 0.00019261883863994002, + "loss": 2.7561, + "step": 2549 + }, + { + "epoch": 0.20579452828665967, + "grad_norm": 0.8967030048370361, + "learning_rate": 0.00019261288486078414, + "loss": 2.7368, + "step": 2550 + }, + { + "epoch": 0.20587523202324268, + "grad_norm": 0.7157345414161682, + "learning_rate": 0.00019260692877347304, + "loss": 2.7329, + "step": 2551 + }, + { + "epoch": 0.20595593575982568, + "grad_norm": 0.8758620619773865, + "learning_rate": 0.00019260097037815524, + "loss": 2.7522, + "step": 2552 + }, + { + "epoch": 0.20603663949640869, + "grad_norm": 0.7948124408721924, + "learning_rate": 0.00019259500967497916, + "loss": 2.7675, + "step": 2553 + }, + { + "epoch": 0.2061173432329917, + "grad_norm": 0.8233941197395325, + "learning_rate": 0.00019258904666409344, + "loss": 2.7728, + "step": 2554 + }, + { + "epoch": 0.2061980469695747, + "grad_norm": 0.8084299564361572, + "learning_rate": 0.0001925830813456466, + "loss": 2.7728, + "step": 2555 + }, + { + "epoch": 0.2062787507061577, + "grad_norm": 0.8004557490348816, + "learning_rate": 0.00019257711371978737, + "loss": 2.7783, + "step": 2556 + }, + { + "epoch": 0.2063594544427407, + "grad_norm": 0.7999755144119263, + "learning_rate": 0.0001925711437866645, + "loss": 2.7632, + "step": 2557 + }, + { + "epoch": 0.2064401581793237, + "grad_norm": 0.7317264080047607, + "learning_rate": 0.0001925651715464267, + "loss": 2.7101, + "step": 2558 + }, + { + "epoch": 0.2065208619159067, + "grad_norm": 0.7906385660171509, + "learning_rate": 0.00019255919699922287, + "loss": 2.7258, + "step": 2559 + }, + { + "epoch": 0.20660156565248972, + "grad_norm": 0.7932917475700378, + "learning_rate": 0.0001925532201452019, + "loss": 2.7714, + "step": 2560 + }, + { + "epoch": 0.20668226938907272, + "grad_norm": 0.8039286732673645, + "learning_rate": 0.00019254724098451275, + "loss": 2.7469, + "step": 2561 + }, + { + "epoch": 0.20676297312565572, + "grad_norm": 0.79400634765625, + "learning_rate": 0.00019254125951730444, + "loss": 2.7499, + "step": 2562 + }, + { + "epoch": 0.20684367686223873, + "grad_norm": 0.8072263598442078, + "learning_rate": 0.00019253527574372603, + "loss": 2.7805, + "step": 2563 + }, + { + "epoch": 0.20692438059882173, + "grad_norm": 0.7117579579353333, + "learning_rate": 0.00019252928966392667, + "loss": 2.7321, + "step": 2564 + }, + { + "epoch": 0.20700508433540474, + "grad_norm": 0.7080324292182922, + "learning_rate": 0.00019252330127805554, + "loss": 2.7225, + "step": 2565 + }, + { + "epoch": 0.20708578807198774, + "grad_norm": 0.7276670336723328, + "learning_rate": 0.00019251731058626186, + "loss": 2.7592, + "step": 2566 + }, + { + "epoch": 0.20716649180857075, + "grad_norm": 0.8030811548233032, + "learning_rate": 0.00019251131758869495, + "loss": 2.7184, + "step": 2567 + }, + { + "epoch": 0.20724719554515375, + "grad_norm": 0.7808283567428589, + "learning_rate": 0.0001925053222855042, + "loss": 2.7504, + "step": 2568 + }, + { + "epoch": 0.20732789928173675, + "grad_norm": 0.783225953578949, + "learning_rate": 0.00019249932467683902, + "loss": 2.7125, + "step": 2569 + }, + { + "epoch": 0.20740860301831976, + "grad_norm": 0.7440134286880493, + "learning_rate": 0.00019249332476284887, + "loss": 2.7938, + "step": 2570 + }, + { + "epoch": 0.20748930675490276, + "grad_norm": 0.8729553818702698, + "learning_rate": 0.00019248732254368328, + "loss": 2.8338, + "step": 2571 + }, + { + "epoch": 0.20757001049148577, + "grad_norm": 0.8170497417449951, + "learning_rate": 0.0001924813180194918, + "loss": 2.7254, + "step": 2572 + }, + { + "epoch": 0.20765071422806877, + "grad_norm": 0.733220100402832, + "learning_rate": 0.00019247531119042418, + "loss": 2.6401, + "step": 2573 + }, + { + "epoch": 0.20773141796465178, + "grad_norm": 0.7247937917709351, + "learning_rate": 0.00019246930205663008, + "loss": 2.736, + "step": 2574 + }, + { + "epoch": 0.20781212170123478, + "grad_norm": 0.7880212068557739, + "learning_rate": 0.00019246329061825925, + "loss": 2.7173, + "step": 2575 + }, + { + "epoch": 0.20789282543781776, + "grad_norm": 0.820808470249176, + "learning_rate": 0.00019245727687546149, + "loss": 2.7331, + "step": 2576 + }, + { + "epoch": 0.20797352917440076, + "grad_norm": 0.8605412840843201, + "learning_rate": 0.00019245126082838673, + "loss": 2.761, + "step": 2577 + }, + { + "epoch": 0.20805423291098377, + "grad_norm": 0.763506293296814, + "learning_rate": 0.00019244524247718486, + "loss": 2.7053, + "step": 2578 + }, + { + "epoch": 0.20813493664756677, + "grad_norm": 0.8428114652633667, + "learning_rate": 0.00019243922182200592, + "loss": 2.724, + "step": 2579 + }, + { + "epoch": 0.20821564038414977, + "grad_norm": 0.821986734867096, + "learning_rate": 0.0001924331988629999, + "loss": 2.7615, + "step": 2580 + }, + { + "epoch": 0.20829634412073278, + "grad_norm": 0.8177430629730225, + "learning_rate": 0.00019242717360031693, + "loss": 2.7012, + "step": 2581 + }, + { + "epoch": 0.20837704785731578, + "grad_norm": 0.7584180235862732, + "learning_rate": 0.00019242114603410724, + "loss": 2.7372, + "step": 2582 + }, + { + "epoch": 0.2084577515938988, + "grad_norm": 0.9384645223617554, + "learning_rate": 0.00019241511616452096, + "loss": 2.695, + "step": 2583 + }, + { + "epoch": 0.2085384553304818, + "grad_norm": 0.8518964648246765, + "learning_rate": 0.00019240908399170844, + "loss": 2.8216, + "step": 2584 + }, + { + "epoch": 0.2086191590670648, + "grad_norm": 0.9082949161529541, + "learning_rate": 0.00019240304951581995, + "loss": 2.777, + "step": 2585 + }, + { + "epoch": 0.2086998628036478, + "grad_norm": 0.7906371355056763, + "learning_rate": 0.00019239701273700597, + "loss": 2.7083, + "step": 2586 + }, + { + "epoch": 0.2087805665402308, + "grad_norm": 0.7711954712867737, + "learning_rate": 0.00019239097365541686, + "loss": 2.6907, + "step": 2587 + }, + { + "epoch": 0.2088612702768138, + "grad_norm": 0.8155506253242493, + "learning_rate": 0.0001923849322712032, + "loss": 2.7602, + "step": 2588 + }, + { + "epoch": 0.20894197401339681, + "grad_norm": 0.8843441009521484, + "learning_rate": 0.0001923788885845155, + "loss": 2.7525, + "step": 2589 + }, + { + "epoch": 0.20902267774997982, + "grad_norm": 0.7336379289627075, + "learning_rate": 0.00019237284259550444, + "loss": 2.731, + "step": 2590 + }, + { + "epoch": 0.20910338148656282, + "grad_norm": 0.8261263370513916, + "learning_rate": 0.00019236679430432066, + "loss": 2.6493, + "step": 2591 + }, + { + "epoch": 0.20918408522314583, + "grad_norm": 0.7716216444969177, + "learning_rate": 0.00019236074371111497, + "loss": 2.7775, + "step": 2592 + }, + { + "epoch": 0.20926478895972883, + "grad_norm": 0.8390100598335266, + "learning_rate": 0.00019235469081603808, + "loss": 2.7532, + "step": 2593 + }, + { + "epoch": 0.20934549269631184, + "grad_norm": 0.8388446569442749, + "learning_rate": 0.00019234863561924087, + "loss": 2.8171, + "step": 2594 + }, + { + "epoch": 0.20942619643289484, + "grad_norm": 0.8003209829330444, + "learning_rate": 0.00019234257812087425, + "loss": 2.7385, + "step": 2595 + }, + { + "epoch": 0.20950690016947784, + "grad_norm": 0.8008458018302917, + "learning_rate": 0.00019233651832108918, + "loss": 2.7366, + "step": 2596 + }, + { + "epoch": 0.20958760390606085, + "grad_norm": 0.7701897025108337, + "learning_rate": 0.00019233045622003676, + "loss": 2.69, + "step": 2597 + }, + { + "epoch": 0.20966830764264385, + "grad_norm": 0.8106730580329895, + "learning_rate": 0.00019232439181786796, + "loss": 2.6911, + "step": 2598 + }, + { + "epoch": 0.20974901137922686, + "grad_norm": 0.9580766558647156, + "learning_rate": 0.00019231832511473401, + "loss": 2.7663, + "step": 2599 + }, + { + "epoch": 0.20982971511580986, + "grad_norm": 0.7851876616477966, + "learning_rate": 0.0001923122561107861, + "loss": 2.7632, + "step": 2600 + }, + { + "epoch": 0.20991041885239287, + "grad_norm": 0.8160942196846008, + "learning_rate": 0.0001923061848061754, + "loss": 2.8533, + "step": 2601 + }, + { + "epoch": 0.20999112258897587, + "grad_norm": 0.8540663719177246, + "learning_rate": 0.00019230011120105334, + "loss": 2.7083, + "step": 2602 + }, + { + "epoch": 0.21007182632555887, + "grad_norm": 0.8273833394050598, + "learning_rate": 0.0001922940352955712, + "loss": 2.7916, + "step": 2603 + }, + { + "epoch": 0.21015253006214188, + "grad_norm": 0.8394255638122559, + "learning_rate": 0.00019228795708988046, + "loss": 2.8561, + "step": 2604 + }, + { + "epoch": 0.21023323379872488, + "grad_norm": 0.8291410803794861, + "learning_rate": 0.00019228187658413258, + "loss": 2.7462, + "step": 2605 + }, + { + "epoch": 0.2103139375353079, + "grad_norm": 0.7984235286712646, + "learning_rate": 0.00019227579377847912, + "loss": 2.7459, + "step": 2606 + }, + { + "epoch": 0.2103946412718909, + "grad_norm": 0.8343340158462524, + "learning_rate": 0.00019226970867307163, + "loss": 2.6963, + "step": 2607 + }, + { + "epoch": 0.2104753450084739, + "grad_norm": 0.6982808709144592, + "learning_rate": 0.00019226362126806184, + "loss": 2.7333, + "step": 2608 + }, + { + "epoch": 0.2105560487450569, + "grad_norm": 0.8039572834968567, + "learning_rate": 0.0001922575315636014, + "loss": 2.7253, + "step": 2609 + }, + { + "epoch": 0.2106367524816399, + "grad_norm": 0.8708705902099609, + "learning_rate": 0.00019225143955984214, + "loss": 2.7555, + "step": 2610 + }, + { + "epoch": 0.2107174562182229, + "grad_norm": 0.8773347735404968, + "learning_rate": 0.00019224534525693585, + "loss": 2.7598, + "step": 2611 + }, + { + "epoch": 0.2107981599548059, + "grad_norm": 0.8151054978370667, + "learning_rate": 0.0001922392486550344, + "loss": 2.7398, + "step": 2612 + }, + { + "epoch": 0.21087886369138892, + "grad_norm": 0.7922329306602478, + "learning_rate": 0.0001922331497542898, + "loss": 2.7296, + "step": 2613 + }, + { + "epoch": 0.21095956742797192, + "grad_norm": 0.7536506652832031, + "learning_rate": 0.00019222704855485396, + "loss": 2.7897, + "step": 2614 + }, + { + "epoch": 0.21104027116455493, + "grad_norm": 0.7539274096488953, + "learning_rate": 0.000192220945056879, + "loss": 2.7809, + "step": 2615 + }, + { + "epoch": 0.21112097490113793, + "grad_norm": 0.7737646698951721, + "learning_rate": 0.00019221483926051705, + "loss": 2.7195, + "step": 2616 + }, + { + "epoch": 0.21120167863772094, + "grad_norm": 0.7421913743019104, + "learning_rate": 0.00019220873116592024, + "loss": 2.6817, + "step": 2617 + }, + { + "epoch": 0.21128238237430394, + "grad_norm": 0.7872927784919739, + "learning_rate": 0.0001922026207732408, + "loss": 2.7379, + "step": 2618 + }, + { + "epoch": 0.21136308611088694, + "grad_norm": 0.7950671315193176, + "learning_rate": 0.00019219650808263104, + "loss": 2.7135, + "step": 2619 + }, + { + "epoch": 0.21144378984746995, + "grad_norm": 0.7711792588233948, + "learning_rate": 0.0001921903930942433, + "loss": 2.7021, + "step": 2620 + }, + { + "epoch": 0.21152449358405295, + "grad_norm": 0.9030743837356567, + "learning_rate": 0.00019218427580822996, + "loss": 2.8083, + "step": 2621 + }, + { + "epoch": 0.21160519732063596, + "grad_norm": 0.8191907405853271, + "learning_rate": 0.0001921781562247435, + "loss": 2.6998, + "step": 2622 + }, + { + "epoch": 0.21168590105721896, + "grad_norm": 0.7883538603782654, + "learning_rate": 0.00019217203434393644, + "loss": 2.7573, + "step": 2623 + }, + { + "epoch": 0.21176660479380197, + "grad_norm": 0.7565868496894836, + "learning_rate": 0.00019216591016596134, + "loss": 2.7725, + "step": 2624 + }, + { + "epoch": 0.21184730853038497, + "grad_norm": 0.8579828143119812, + "learning_rate": 0.00019215978369097086, + "loss": 2.7529, + "step": 2625 + }, + { + "epoch": 0.21192801226696797, + "grad_norm": 0.7835422158241272, + "learning_rate": 0.0001921536549191176, + "loss": 2.6926, + "step": 2626 + }, + { + "epoch": 0.21200871600355095, + "grad_norm": 0.8041907548904419, + "learning_rate": 0.00019214752385055442, + "loss": 2.7541, + "step": 2627 + }, + { + "epoch": 0.21208941974013396, + "grad_norm": 0.7754014730453491, + "learning_rate": 0.00019214139048543406, + "loss": 2.6807, + "step": 2628 + }, + { + "epoch": 0.21217012347671696, + "grad_norm": 0.8222344517707825, + "learning_rate": 0.00019213525482390936, + "loss": 2.7339, + "step": 2629 + }, + { + "epoch": 0.21225082721329996, + "grad_norm": 0.8083673715591431, + "learning_rate": 0.0001921291168661333, + "loss": 2.739, + "step": 2630 + }, + { + "epoch": 0.21233153094988297, + "grad_norm": 0.8039100766181946, + "learning_rate": 0.0001921229766122588, + "loss": 2.7372, + "step": 2631 + }, + { + "epoch": 0.21241223468646597, + "grad_norm": 0.7513072490692139, + "learning_rate": 0.00019211683406243892, + "loss": 2.7284, + "step": 2632 + }, + { + "epoch": 0.21249293842304898, + "grad_norm": 0.7653890252113342, + "learning_rate": 0.00019211068921682673, + "loss": 2.6911, + "step": 2633 + }, + { + "epoch": 0.21257364215963198, + "grad_norm": 0.7210217714309692, + "learning_rate": 0.00019210454207557542, + "loss": 2.6989, + "step": 2634 + }, + { + "epoch": 0.21265434589621499, + "grad_norm": 0.7389202117919922, + "learning_rate": 0.00019209839263883814, + "loss": 2.7016, + "step": 2635 + }, + { + "epoch": 0.212735049632798, + "grad_norm": 0.8069031238555908, + "learning_rate": 0.00019209224090676813, + "loss": 2.8213, + "step": 2636 + }, + { + "epoch": 0.212815753369381, + "grad_norm": 0.8019161224365234, + "learning_rate": 0.00019208608687951877, + "loss": 2.7413, + "step": 2637 + }, + { + "epoch": 0.212896457105964, + "grad_norm": 0.775572657585144, + "learning_rate": 0.00019207993055724343, + "loss": 2.7016, + "step": 2638 + }, + { + "epoch": 0.212977160842547, + "grad_norm": 0.7482941746711731, + "learning_rate": 0.0001920737719400955, + "loss": 2.7991, + "step": 2639 + }, + { + "epoch": 0.21305786457913, + "grad_norm": 0.8467636704444885, + "learning_rate": 0.0001920676110282285, + "loss": 2.7401, + "step": 2640 + }, + { + "epoch": 0.213138568315713, + "grad_norm": 0.8726305365562439, + "learning_rate": 0.00019206144782179597, + "loss": 2.7599, + "step": 2641 + }, + { + "epoch": 0.21321927205229602, + "grad_norm": 0.740527868270874, + "learning_rate": 0.00019205528232095148, + "loss": 2.7326, + "step": 2642 + }, + { + "epoch": 0.21329997578887902, + "grad_norm": 0.7932354211807251, + "learning_rate": 0.00019204911452584873, + "loss": 2.7873, + "step": 2643 + }, + { + "epoch": 0.21338067952546202, + "grad_norm": 0.7994125485420227, + "learning_rate": 0.00019204294443664143, + "loss": 2.7305, + "step": 2644 + }, + { + "epoch": 0.21346138326204503, + "grad_norm": 0.880557656288147, + "learning_rate": 0.00019203677205348338, + "loss": 2.7295, + "step": 2645 + }, + { + "epoch": 0.21354208699862803, + "grad_norm": 0.8269557952880859, + "learning_rate": 0.00019203059737652836, + "loss": 2.765, + "step": 2646 + }, + { + "epoch": 0.21362279073521104, + "grad_norm": 0.8732784986495972, + "learning_rate": 0.00019202442040593026, + "loss": 2.6742, + "step": 2647 + }, + { + "epoch": 0.21370349447179404, + "grad_norm": 0.7921704649925232, + "learning_rate": 0.0001920182411418431, + "loss": 2.7144, + "step": 2648 + }, + { + "epoch": 0.21378419820837705, + "grad_norm": 0.8097628355026245, + "learning_rate": 0.00019201205958442082, + "loss": 2.7513, + "step": 2649 + }, + { + "epoch": 0.21386490194496005, + "grad_norm": 0.8230542540550232, + "learning_rate": 0.00019200587573381744, + "loss": 2.7648, + "step": 2650 + }, + { + "epoch": 0.21394560568154306, + "grad_norm": 0.7719153761863708, + "learning_rate": 0.0001919996895901872, + "loss": 2.7637, + "step": 2651 + }, + { + "epoch": 0.21402630941812606, + "grad_norm": 0.9022669792175293, + "learning_rate": 0.00019199350115368415, + "loss": 2.7707, + "step": 2652 + }, + { + "epoch": 0.21410701315470906, + "grad_norm": 0.8111257553100586, + "learning_rate": 0.00019198731042446263, + "loss": 2.7423, + "step": 2653 + }, + { + "epoch": 0.21418771689129207, + "grad_norm": 0.7534981966018677, + "learning_rate": 0.00019198111740267683, + "loss": 2.7474, + "step": 2654 + }, + { + "epoch": 0.21426842062787507, + "grad_norm": 0.761411190032959, + "learning_rate": 0.00019197492208848117, + "loss": 2.7541, + "step": 2655 + }, + { + "epoch": 0.21434912436445808, + "grad_norm": 0.8076324462890625, + "learning_rate": 0.00019196872448203002, + "loss": 2.7198, + "step": 2656 + }, + { + "epoch": 0.21442982810104108, + "grad_norm": 0.7987746000289917, + "learning_rate": 0.00019196252458347784, + "loss": 2.7164, + "step": 2657 + }, + { + "epoch": 0.21451053183762409, + "grad_norm": 0.7581545114517212, + "learning_rate": 0.0001919563223929792, + "loss": 2.6837, + "step": 2658 + }, + { + "epoch": 0.2145912355742071, + "grad_norm": 0.8773601055145264, + "learning_rate": 0.00019195011791068857, + "loss": 2.8248, + "step": 2659 + }, + { + "epoch": 0.2146719393107901, + "grad_norm": 0.7027503252029419, + "learning_rate": 0.00019194391113676066, + "loss": 2.6726, + "step": 2660 + }, + { + "epoch": 0.2147526430473731, + "grad_norm": 0.8650866746902466, + "learning_rate": 0.00019193770207135015, + "loss": 2.7348, + "step": 2661 + }, + { + "epoch": 0.2148333467839561, + "grad_norm": 0.8521862030029297, + "learning_rate": 0.0001919314907146118, + "loss": 2.7409, + "step": 2662 + }, + { + "epoch": 0.2149140505205391, + "grad_norm": 0.8098535537719727, + "learning_rate": 0.00019192527706670033, + "loss": 2.7615, + "step": 2663 + }, + { + "epoch": 0.2149947542571221, + "grad_norm": 0.7396193146705627, + "learning_rate": 0.0001919190611277707, + "loss": 2.7191, + "step": 2664 + }, + { + "epoch": 0.21507545799370512, + "grad_norm": 0.8245799541473389, + "learning_rate": 0.00019191284289797776, + "loss": 2.7429, + "step": 2665 + }, + { + "epoch": 0.21515616173028812, + "grad_norm": 0.791646420955658, + "learning_rate": 0.00019190662237747656, + "loss": 2.7197, + "step": 2666 + }, + { + "epoch": 0.21523686546687112, + "grad_norm": 0.7850802540779114, + "learning_rate": 0.00019190039956642205, + "loss": 2.7353, + "step": 2667 + }, + { + "epoch": 0.21531756920345413, + "grad_norm": 0.7657971978187561, + "learning_rate": 0.00019189417446496937, + "loss": 2.7083, + "step": 2668 + }, + { + "epoch": 0.21539827294003713, + "grad_norm": 0.7704403400421143, + "learning_rate": 0.00019188794707327363, + "loss": 2.7813, + "step": 2669 + }, + { + "epoch": 0.21547897667662014, + "grad_norm": 0.7345917224884033, + "learning_rate": 0.00019188171739149005, + "loss": 2.7098, + "step": 2670 + }, + { + "epoch": 0.21555968041320314, + "grad_norm": 0.728831946849823, + "learning_rate": 0.00019187548541977392, + "loss": 2.6745, + "step": 2671 + }, + { + "epoch": 0.21564038414978615, + "grad_norm": 0.8079627156257629, + "learning_rate": 0.0001918692511582805, + "loss": 2.6427, + "step": 2672 + }, + { + "epoch": 0.21572108788636915, + "grad_norm": 0.766808032989502, + "learning_rate": 0.0001918630146071652, + "loss": 2.6956, + "step": 2673 + }, + { + "epoch": 0.21580179162295215, + "grad_norm": 0.7555391192436218, + "learning_rate": 0.00019185677576658345, + "loss": 2.6499, + "step": 2674 + }, + { + "epoch": 0.21588249535953516, + "grad_norm": 0.7740229964256287, + "learning_rate": 0.00019185053463669074, + "loss": 2.7685, + "step": 2675 + }, + { + "epoch": 0.21596319909611816, + "grad_norm": 0.8272803425788879, + "learning_rate": 0.00019184429121764257, + "loss": 2.7272, + "step": 2676 + }, + { + "epoch": 0.21604390283270117, + "grad_norm": 0.870625376701355, + "learning_rate": 0.00019183804550959463, + "loss": 2.7509, + "step": 2677 + }, + { + "epoch": 0.21612460656928414, + "grad_norm": 0.8021238446235657, + "learning_rate": 0.0001918317975127025, + "loss": 2.7058, + "step": 2678 + }, + { + "epoch": 0.21620531030586715, + "grad_norm": 0.729918897151947, + "learning_rate": 0.00019182554722712192, + "loss": 2.6145, + "step": 2679 + }, + { + "epoch": 0.21628601404245015, + "grad_norm": 0.7658380270004272, + "learning_rate": 0.00019181929465300867, + "loss": 2.712, + "step": 2680 + }, + { + "epoch": 0.21636671777903316, + "grad_norm": 0.7702174186706543, + "learning_rate": 0.00019181303979051858, + "loss": 2.8257, + "step": 2681 + }, + { + "epoch": 0.21644742151561616, + "grad_norm": 0.7782231569290161, + "learning_rate": 0.00019180678263980755, + "loss": 2.8226, + "step": 2682 + }, + { + "epoch": 0.21652812525219917, + "grad_norm": 0.7448495626449585, + "learning_rate": 0.0001918005232010315, + "loss": 2.7877, + "step": 2683 + }, + { + "epoch": 0.21660882898878217, + "grad_norm": 0.7273527979850769, + "learning_rate": 0.00019179426147434647, + "loss": 2.7169, + "step": 2684 + }, + { + "epoch": 0.21668953272536517, + "grad_norm": 0.7730992436408997, + "learning_rate": 0.00019178799745990846, + "loss": 2.717, + "step": 2685 + }, + { + "epoch": 0.21677023646194818, + "grad_norm": 0.7709231376647949, + "learning_rate": 0.0001917817311578736, + "loss": 2.7676, + "step": 2686 + }, + { + "epoch": 0.21685094019853118, + "grad_norm": 0.7825181484222412, + "learning_rate": 0.00019177546256839812, + "loss": 2.7473, + "step": 2687 + }, + { + "epoch": 0.2169316439351142, + "grad_norm": 0.8133581280708313, + "learning_rate": 0.0001917691916916382, + "loss": 2.7242, + "step": 2688 + }, + { + "epoch": 0.2170123476716972, + "grad_norm": 0.7833015322685242, + "learning_rate": 0.00019176291852775011, + "loss": 2.8128, + "step": 2689 + }, + { + "epoch": 0.2170930514082802, + "grad_norm": 0.7423487305641174, + "learning_rate": 0.00019175664307689028, + "loss": 2.6999, + "step": 2690 + }, + { + "epoch": 0.2171737551448632, + "grad_norm": 0.7881289124488831, + "learning_rate": 0.000191750365339215, + "loss": 2.7349, + "step": 2691 + }, + { + "epoch": 0.2172544588814462, + "grad_norm": 0.8316197395324707, + "learning_rate": 0.00019174408531488077, + "loss": 2.7654, + "step": 2692 + }, + { + "epoch": 0.2173351626180292, + "grad_norm": 0.7589917778968811, + "learning_rate": 0.00019173780300404413, + "loss": 2.6815, + "step": 2693 + }, + { + "epoch": 0.21741586635461221, + "grad_norm": 0.7752439975738525, + "learning_rate": 0.00019173151840686163, + "loss": 2.7804, + "step": 2694 + }, + { + "epoch": 0.21749657009119522, + "grad_norm": 0.8156552910804749, + "learning_rate": 0.0001917252315234899, + "loss": 2.7325, + "step": 2695 + }, + { + "epoch": 0.21757727382777822, + "grad_norm": 0.8886982798576355, + "learning_rate": 0.00019171894235408564, + "loss": 2.7257, + "step": 2696 + }, + { + "epoch": 0.21765797756436123, + "grad_norm": 0.8270704746246338, + "learning_rate": 0.00019171265089880558, + "loss": 2.7357, + "step": 2697 + }, + { + "epoch": 0.21773868130094423, + "grad_norm": 0.807700514793396, + "learning_rate": 0.00019170635715780651, + "loss": 2.7488, + "step": 2698 + }, + { + "epoch": 0.21781938503752724, + "grad_norm": 0.8195288181304932, + "learning_rate": 0.00019170006113124533, + "loss": 2.7048, + "step": 2699 + }, + { + "epoch": 0.21790008877411024, + "grad_norm": 0.817097008228302, + "learning_rate": 0.00019169376281927888, + "loss": 2.7148, + "step": 2700 + }, + { + "epoch": 0.21798079251069324, + "grad_norm": 0.8415588140487671, + "learning_rate": 0.0001916874622220642, + "loss": 2.7376, + "step": 2701 + }, + { + "epoch": 0.21806149624727625, + "grad_norm": 0.8004198670387268, + "learning_rate": 0.00019168115933975826, + "loss": 2.7145, + "step": 2702 + }, + { + "epoch": 0.21814219998385925, + "grad_norm": 0.8167368769645691, + "learning_rate": 0.0001916748541725182, + "loss": 2.6923, + "step": 2703 + }, + { + "epoch": 0.21822290372044226, + "grad_norm": 0.8877980709075928, + "learning_rate": 0.0001916685467205011, + "loss": 2.8232, + "step": 2704 + }, + { + "epoch": 0.21830360745702526, + "grad_norm": 0.7835622429847717, + "learning_rate": 0.00019166223698386422, + "loss": 2.7797, + "step": 2705 + }, + { + "epoch": 0.21838431119360827, + "grad_norm": 0.8023552894592285, + "learning_rate": 0.00019165592496276477, + "loss": 2.6697, + "step": 2706 + }, + { + "epoch": 0.21846501493019127, + "grad_norm": 0.8549069166183472, + "learning_rate": 0.00019164961065736008, + "loss": 2.729, + "step": 2707 + }, + { + "epoch": 0.21854571866677427, + "grad_norm": 0.8561950325965881, + "learning_rate": 0.00019164329406780753, + "loss": 2.772, + "step": 2708 + }, + { + "epoch": 0.21862642240335728, + "grad_norm": 0.6979276537895203, + "learning_rate": 0.00019163697519426453, + "loss": 2.7195, + "step": 2709 + }, + { + "epoch": 0.21870712613994028, + "grad_norm": 0.7659175395965576, + "learning_rate": 0.00019163065403688856, + "loss": 2.7742, + "step": 2710 + }, + { + "epoch": 0.2187878298765233, + "grad_norm": 0.8621466755867004, + "learning_rate": 0.00019162433059583718, + "loss": 2.721, + "step": 2711 + }, + { + "epoch": 0.2188685336131063, + "grad_norm": 0.8086833357810974, + "learning_rate": 0.00019161800487126795, + "loss": 2.7356, + "step": 2712 + }, + { + "epoch": 0.2189492373496893, + "grad_norm": 0.816215455532074, + "learning_rate": 0.00019161167686333855, + "loss": 2.7159, + "step": 2713 + }, + { + "epoch": 0.2190299410862723, + "grad_norm": 0.9180822968482971, + "learning_rate": 0.0001916053465722067, + "loss": 2.7162, + "step": 2714 + }, + { + "epoch": 0.2191106448228553, + "grad_norm": 0.7547199130058289, + "learning_rate": 0.00019159901399803014, + "loss": 2.7338, + "step": 2715 + }, + { + "epoch": 0.2191913485594383, + "grad_norm": 0.7380769848823547, + "learning_rate": 0.00019159267914096675, + "loss": 2.7149, + "step": 2716 + }, + { + "epoch": 0.2192720522960213, + "grad_norm": 0.7242285013198853, + "learning_rate": 0.00019158634200117433, + "loss": 2.724, + "step": 2717 + }, + { + "epoch": 0.21935275603260432, + "grad_norm": 0.8400316834449768, + "learning_rate": 0.00019158000257881087, + "loss": 2.7528, + "step": 2718 + }, + { + "epoch": 0.21943345976918732, + "grad_norm": 0.8437172770500183, + "learning_rate": 0.00019157366087403435, + "loss": 2.7872, + "step": 2719 + }, + { + "epoch": 0.21951416350577033, + "grad_norm": 0.7428301572799683, + "learning_rate": 0.00019156731688700282, + "loss": 2.6831, + "step": 2720 + }, + { + "epoch": 0.21959486724235333, + "grad_norm": 0.7589641213417053, + "learning_rate": 0.00019156097061787445, + "loss": 2.7105, + "step": 2721 + }, + { + "epoch": 0.21967557097893634, + "grad_norm": 0.7607305645942688, + "learning_rate": 0.00019155462206680727, + "loss": 2.7913, + "step": 2722 + }, + { + "epoch": 0.21975627471551934, + "grad_norm": 0.7455689311027527, + "learning_rate": 0.00019154827123395963, + "loss": 2.6321, + "step": 2723 + }, + { + "epoch": 0.21983697845210234, + "grad_norm": 0.7860318422317505, + "learning_rate": 0.00019154191811948974, + "loss": 2.7907, + "step": 2724 + }, + { + "epoch": 0.21991768218868535, + "grad_norm": 0.8101385235786438, + "learning_rate": 0.00019153556272355596, + "loss": 2.7682, + "step": 2725 + }, + { + "epoch": 0.21999838592526835, + "grad_norm": 0.7437283396720886, + "learning_rate": 0.00019152920504631667, + "loss": 2.7271, + "step": 2726 + }, + { + "epoch": 0.22007908966185136, + "grad_norm": 0.7390851974487305, + "learning_rate": 0.00019152284508793034, + "loss": 2.7492, + "step": 2727 + }, + { + "epoch": 0.22015979339843436, + "grad_norm": 0.9074966311454773, + "learning_rate": 0.0001915164828485555, + "loss": 2.8076, + "step": 2728 + }, + { + "epoch": 0.22024049713501734, + "grad_norm": 0.7644218802452087, + "learning_rate": 0.00019151011832835063, + "loss": 2.7238, + "step": 2729 + }, + { + "epoch": 0.22032120087160034, + "grad_norm": 0.823567807674408, + "learning_rate": 0.0001915037515274744, + "loss": 2.7701, + "step": 2730 + }, + { + "epoch": 0.22040190460818335, + "grad_norm": 0.7601858377456665, + "learning_rate": 0.00019149738244608552, + "loss": 2.6981, + "step": 2731 + }, + { + "epoch": 0.22048260834476635, + "grad_norm": 0.8242961764335632, + "learning_rate": 0.00019149101108434269, + "loss": 2.6916, + "step": 2732 + }, + { + "epoch": 0.22056331208134936, + "grad_norm": 0.7970656156539917, + "learning_rate": 0.0001914846374424047, + "loss": 2.7858, + "step": 2733 + }, + { + "epoch": 0.22064401581793236, + "grad_norm": 0.7844050526618958, + "learning_rate": 0.0001914782615204304, + "loss": 2.6782, + "step": 2734 + }, + { + "epoch": 0.22072471955451536, + "grad_norm": 0.7965044975280762, + "learning_rate": 0.00019147188331857868, + "loss": 2.7563, + "step": 2735 + }, + { + "epoch": 0.22080542329109837, + "grad_norm": 0.8189071416854858, + "learning_rate": 0.00019146550283700856, + "loss": 2.7587, + "step": 2736 + }, + { + "epoch": 0.22088612702768137, + "grad_norm": 0.7610960602760315, + "learning_rate": 0.00019145912007587898, + "loss": 2.663, + "step": 2737 + }, + { + "epoch": 0.22096683076426438, + "grad_norm": 0.7642313838005066, + "learning_rate": 0.00019145273503534907, + "loss": 2.78, + "step": 2738 + }, + { + "epoch": 0.22104753450084738, + "grad_norm": 0.7699539065361023, + "learning_rate": 0.0001914463477155779, + "loss": 2.7429, + "step": 2739 + }, + { + "epoch": 0.22112823823743039, + "grad_norm": 0.7674413919448853, + "learning_rate": 0.00019143995811672477, + "loss": 2.7048, + "step": 2740 + }, + { + "epoch": 0.2212089419740134, + "grad_norm": 0.7871866226196289, + "learning_rate": 0.00019143356623894882, + "loss": 2.7769, + "step": 2741 + }, + { + "epoch": 0.2212896457105964, + "grad_norm": 0.8453468680381775, + "learning_rate": 0.00019142717208240937, + "loss": 2.7677, + "step": 2742 + }, + { + "epoch": 0.2213703494471794, + "grad_norm": 0.8050780892372131, + "learning_rate": 0.00019142077564726582, + "loss": 2.7809, + "step": 2743 + }, + { + "epoch": 0.2214510531837624, + "grad_norm": 0.811287522315979, + "learning_rate": 0.0001914143769336776, + "loss": 2.7201, + "step": 2744 + }, + { + "epoch": 0.2215317569203454, + "grad_norm": 0.823106050491333, + "learning_rate": 0.00019140797594180412, + "loss": 2.7371, + "step": 2745 + }, + { + "epoch": 0.2216124606569284, + "grad_norm": 0.778126060962677, + "learning_rate": 0.0001914015726718049, + "loss": 2.6925, + "step": 2746 + }, + { + "epoch": 0.22169316439351142, + "grad_norm": 0.8240278959274292, + "learning_rate": 0.0001913951671238396, + "loss": 2.7227, + "step": 2747 + }, + { + "epoch": 0.22177386813009442, + "grad_norm": 0.8061805963516235, + "learning_rate": 0.0001913887592980678, + "loss": 2.7092, + "step": 2748 + }, + { + "epoch": 0.22185457186667742, + "grad_norm": 0.9111800789833069, + "learning_rate": 0.00019138234919464925, + "loss": 2.7364, + "step": 2749 + }, + { + "epoch": 0.22193527560326043, + "grad_norm": 0.8154863715171814, + "learning_rate": 0.0001913759368137437, + "loss": 2.6983, + "step": 2750 + }, + { + "epoch": 0.22201597933984343, + "grad_norm": 0.8547734022140503, + "learning_rate": 0.0001913695221555109, + "loss": 2.7016, + "step": 2751 + }, + { + "epoch": 0.22209668307642644, + "grad_norm": 0.7488531470298767, + "learning_rate": 0.00019136310522011079, + "loss": 2.6641, + "step": 2752 + }, + { + "epoch": 0.22217738681300944, + "grad_norm": 0.9118027091026306, + "learning_rate": 0.00019135668600770326, + "loss": 2.6965, + "step": 2753 + }, + { + "epoch": 0.22225809054959245, + "grad_norm": 0.7629117369651794, + "learning_rate": 0.00019135026451844834, + "loss": 2.7836, + "step": 2754 + }, + { + "epoch": 0.22233879428617545, + "grad_norm": 0.8081222176551819, + "learning_rate": 0.000191343840752506, + "loss": 2.7339, + "step": 2755 + }, + { + "epoch": 0.22241949802275846, + "grad_norm": 0.9143899083137512, + "learning_rate": 0.00019133741471003636, + "loss": 2.7051, + "step": 2756 + }, + { + "epoch": 0.22250020175934146, + "grad_norm": 0.8096790909767151, + "learning_rate": 0.00019133098639119962, + "loss": 2.6884, + "step": 2757 + }, + { + "epoch": 0.22258090549592446, + "grad_norm": 0.7959297895431519, + "learning_rate": 0.00019132455579615597, + "loss": 2.7127, + "step": 2758 + }, + { + "epoch": 0.22266160923250747, + "grad_norm": 0.7111356854438782, + "learning_rate": 0.00019131812292506563, + "loss": 2.7418, + "step": 2759 + }, + { + "epoch": 0.22274231296909047, + "grad_norm": 0.7584012150764465, + "learning_rate": 0.00019131168777808898, + "loss": 2.6705, + "step": 2760 + }, + { + "epoch": 0.22282301670567348, + "grad_norm": 0.7646663784980774, + "learning_rate": 0.0001913052503553864, + "loss": 2.7166, + "step": 2761 + }, + { + "epoch": 0.22290372044225648, + "grad_norm": 0.7643954157829285, + "learning_rate": 0.00019129881065711827, + "loss": 2.7967, + "step": 2762 + }, + { + "epoch": 0.22298442417883949, + "grad_norm": 0.7591429948806763, + "learning_rate": 0.0001912923686834451, + "loss": 2.6611, + "step": 2763 + }, + { + "epoch": 0.2230651279154225, + "grad_norm": 0.7182386517524719, + "learning_rate": 0.00019128592443452749, + "loss": 2.6808, + "step": 2764 + }, + { + "epoch": 0.2231458316520055, + "grad_norm": 0.7689648270606995, + "learning_rate": 0.00019127947791052602, + "loss": 2.7288, + "step": 2765 + }, + { + "epoch": 0.2232265353885885, + "grad_norm": 0.7851321697235107, + "learning_rate": 0.00019127302911160136, + "loss": 2.7227, + "step": 2766 + }, + { + "epoch": 0.2233072391251715, + "grad_norm": 0.8419411182403564, + "learning_rate": 0.00019126657803791424, + "loss": 2.7397, + "step": 2767 + }, + { + "epoch": 0.2233879428617545, + "grad_norm": 0.7657596468925476, + "learning_rate": 0.0001912601246896254, + "loss": 2.7223, + "step": 2768 + }, + { + "epoch": 0.2234686465983375, + "grad_norm": 0.8033619523048401, + "learning_rate": 0.00019125366906689567, + "loss": 2.7256, + "step": 2769 + }, + { + "epoch": 0.22354935033492052, + "grad_norm": 0.7784682512283325, + "learning_rate": 0.00019124721116988601, + "loss": 2.7692, + "step": 2770 + }, + { + "epoch": 0.22363005407150352, + "grad_norm": 0.7842707633972168, + "learning_rate": 0.00019124075099875731, + "loss": 2.7707, + "step": 2771 + }, + { + "epoch": 0.22371075780808652, + "grad_norm": 0.7864845395088196, + "learning_rate": 0.0001912342885536706, + "loss": 2.6912, + "step": 2772 + }, + { + "epoch": 0.22379146154466953, + "grad_norm": 0.8544312715530396, + "learning_rate": 0.0001912278238347869, + "loss": 2.8345, + "step": 2773 + }, + { + "epoch": 0.22387216528125253, + "grad_norm": 0.7210882306098938, + "learning_rate": 0.0001912213568422674, + "loss": 2.6933, + "step": 2774 + }, + { + "epoch": 0.22395286901783554, + "grad_norm": 0.8877022862434387, + "learning_rate": 0.00019121488757627318, + "loss": 2.7583, + "step": 2775 + }, + { + "epoch": 0.22403357275441854, + "grad_norm": 0.902886688709259, + "learning_rate": 0.00019120841603696554, + "loss": 2.8, + "step": 2776 + }, + { + "epoch": 0.22411427649100155, + "grad_norm": 0.771294355392456, + "learning_rate": 0.0001912019422245058, + "loss": 2.7712, + "step": 2777 + }, + { + "epoch": 0.22419498022758455, + "grad_norm": 0.7973463535308838, + "learning_rate": 0.0001911954661390552, + "loss": 2.7368, + "step": 2778 + }, + { + "epoch": 0.22427568396416755, + "grad_norm": 0.776836633682251, + "learning_rate": 0.00019118898778077524, + "loss": 2.7126, + "step": 2779 + }, + { + "epoch": 0.22435638770075053, + "grad_norm": 0.8286641240119934, + "learning_rate": 0.00019118250714982731, + "loss": 2.7148, + "step": 2780 + }, + { + "epoch": 0.22443709143733354, + "grad_norm": 0.7848700284957886, + "learning_rate": 0.00019117602424637294, + "loss": 2.7284, + "step": 2781 + }, + { + "epoch": 0.22451779517391654, + "grad_norm": 0.7658216953277588, + "learning_rate": 0.0001911695390705737, + "loss": 2.7186, + "step": 2782 + }, + { + "epoch": 0.22459849891049954, + "grad_norm": 0.7596792578697205, + "learning_rate": 0.00019116305162259124, + "loss": 2.6854, + "step": 2783 + }, + { + "epoch": 0.22467920264708255, + "grad_norm": 0.7901157140731812, + "learning_rate": 0.00019115656190258726, + "loss": 2.7347, + "step": 2784 + }, + { + "epoch": 0.22475990638366555, + "grad_norm": 0.7499287128448486, + "learning_rate": 0.00019115006991072346, + "loss": 2.7219, + "step": 2785 + }, + { + "epoch": 0.22484061012024856, + "grad_norm": 0.7427374124526978, + "learning_rate": 0.00019114357564716162, + "loss": 2.7147, + "step": 2786 + }, + { + "epoch": 0.22492131385683156, + "grad_norm": 0.8305855393409729, + "learning_rate": 0.00019113707911206363, + "loss": 2.7587, + "step": 2787 + }, + { + "epoch": 0.22500201759341457, + "grad_norm": 0.8266459703445435, + "learning_rate": 0.00019113058030559142, + "loss": 2.7275, + "step": 2788 + }, + { + "epoch": 0.22508272132999757, + "grad_norm": 0.7338323593139648, + "learning_rate": 0.0001911240792279069, + "loss": 2.762, + "step": 2789 + }, + { + "epoch": 0.22516342506658057, + "grad_norm": 0.7653434872627258, + "learning_rate": 0.00019111757587917216, + "loss": 2.6715, + "step": 2790 + }, + { + "epoch": 0.22524412880316358, + "grad_norm": 0.76301509141922, + "learning_rate": 0.00019111107025954923, + "loss": 2.698, + "step": 2791 + }, + { + "epoch": 0.22532483253974658, + "grad_norm": 0.7810547947883606, + "learning_rate": 0.00019110456236920024, + "loss": 2.7295, + "step": 2792 + }, + { + "epoch": 0.2254055362763296, + "grad_norm": 0.7885214686393738, + "learning_rate": 0.00019109805220828742, + "loss": 2.7724, + "step": 2793 + }, + { + "epoch": 0.2254862400129126, + "grad_norm": 0.8087031841278076, + "learning_rate": 0.00019109153977697301, + "loss": 2.7888, + "step": 2794 + }, + { + "epoch": 0.2255669437494956, + "grad_norm": 0.795101523399353, + "learning_rate": 0.00019108502507541933, + "loss": 2.6815, + "step": 2795 + }, + { + "epoch": 0.2256476474860786, + "grad_norm": 0.8337482213973999, + "learning_rate": 0.0001910785081037887, + "loss": 2.8192, + "step": 2796 + }, + { + "epoch": 0.2257283512226616, + "grad_norm": 0.8357288241386414, + "learning_rate": 0.00019107198886224357, + "loss": 2.7867, + "step": 2797 + }, + { + "epoch": 0.2258090549592446, + "grad_norm": 0.80678391456604, + "learning_rate": 0.00019106546735094644, + "loss": 2.7313, + "step": 2798 + }, + { + "epoch": 0.2258897586958276, + "grad_norm": 0.7481401562690735, + "learning_rate": 0.00019105894357005979, + "loss": 2.7073, + "step": 2799 + }, + { + "epoch": 0.22597046243241062, + "grad_norm": 0.8025074005126953, + "learning_rate": 0.00019105241751974622, + "loss": 2.6922, + "step": 2800 + }, + { + "epoch": 0.22605116616899362, + "grad_norm": 0.7308986186981201, + "learning_rate": 0.00019104588920016842, + "loss": 2.7511, + "step": 2801 + }, + { + "epoch": 0.22613186990557663, + "grad_norm": 0.7727689146995544, + "learning_rate": 0.00019103935861148905, + "loss": 2.707, + "step": 2802 + }, + { + "epoch": 0.22621257364215963, + "grad_norm": 0.8611076474189758, + "learning_rate": 0.0001910328257538709, + "loss": 2.8494, + "step": 2803 + }, + { + "epoch": 0.22629327737874264, + "grad_norm": 0.8487605452537537, + "learning_rate": 0.00019102629062747677, + "loss": 2.7698, + "step": 2804 + }, + { + "epoch": 0.22637398111532564, + "grad_norm": 0.7495502233505249, + "learning_rate": 0.00019101975323246952, + "loss": 2.7091, + "step": 2805 + }, + { + "epoch": 0.22645468485190864, + "grad_norm": 0.7334234118461609, + "learning_rate": 0.0001910132135690121, + "loss": 2.7375, + "step": 2806 + }, + { + "epoch": 0.22653538858849165, + "grad_norm": 0.879912257194519, + "learning_rate": 0.00019100667163726747, + "loss": 2.7278, + "step": 2807 + }, + { + "epoch": 0.22661609232507465, + "grad_norm": 0.8087306618690491, + "learning_rate": 0.0001910001274373987, + "loss": 2.8065, + "step": 2808 + }, + { + "epoch": 0.22669679606165766, + "grad_norm": 0.7548169493675232, + "learning_rate": 0.00019099358096956887, + "loss": 2.7235, + "step": 2809 + }, + { + "epoch": 0.22677749979824066, + "grad_norm": 0.7505785822868347, + "learning_rate": 0.00019098703223394118, + "loss": 2.6633, + "step": 2810 + }, + { + "epoch": 0.22685820353482367, + "grad_norm": 0.829075813293457, + "learning_rate": 0.00019098048123067875, + "loss": 2.7389, + "step": 2811 + }, + { + "epoch": 0.22693890727140667, + "grad_norm": 0.7731673121452332, + "learning_rate": 0.00019097392795994493, + "loss": 2.7639, + "step": 2812 + }, + { + "epoch": 0.22701961100798967, + "grad_norm": 0.7389004826545715, + "learning_rate": 0.00019096737242190303, + "loss": 2.717, + "step": 2813 + }, + { + "epoch": 0.22710031474457268, + "grad_norm": 0.7520460486412048, + "learning_rate": 0.0001909608146167164, + "loss": 2.7203, + "step": 2814 + }, + { + "epoch": 0.22718101848115568, + "grad_norm": 0.7272354364395142, + "learning_rate": 0.00019095425454454849, + "loss": 2.7306, + "step": 2815 + }, + { + "epoch": 0.2272617222177387, + "grad_norm": 0.7593528032302856, + "learning_rate": 0.00019094769220556282, + "loss": 2.7565, + "step": 2816 + }, + { + "epoch": 0.2273424259543217, + "grad_norm": 0.7312695384025574, + "learning_rate": 0.0001909411275999229, + "loss": 2.744, + "step": 2817 + }, + { + "epoch": 0.2274231296909047, + "grad_norm": 0.7483308911323547, + "learning_rate": 0.00019093456072779238, + "loss": 2.7938, + "step": 2818 + }, + { + "epoch": 0.2275038334274877, + "grad_norm": 0.8515620231628418, + "learning_rate": 0.00019092799158933486, + "loss": 2.7392, + "step": 2819 + }, + { + "epoch": 0.2275845371640707, + "grad_norm": 0.7119776606559753, + "learning_rate": 0.00019092142018471415, + "loss": 2.6985, + "step": 2820 + }, + { + "epoch": 0.2276652409006537, + "grad_norm": 0.7549445033073425, + "learning_rate": 0.00019091484651409394, + "loss": 2.7621, + "step": 2821 + }, + { + "epoch": 0.2277459446372367, + "grad_norm": 0.8728097081184387, + "learning_rate": 0.00019090827057763814, + "loss": 2.8321, + "step": 2822 + }, + { + "epoch": 0.22782664837381972, + "grad_norm": 0.755043089389801, + "learning_rate": 0.00019090169237551057, + "loss": 2.7341, + "step": 2823 + }, + { + "epoch": 0.22790735211040272, + "grad_norm": 0.7949401140213013, + "learning_rate": 0.00019089511190787523, + "loss": 2.7646, + "step": 2824 + }, + { + "epoch": 0.22798805584698573, + "grad_norm": 0.8027622103691101, + "learning_rate": 0.00019088852917489607, + "loss": 2.7606, + "step": 2825 + }, + { + "epoch": 0.22806875958356873, + "grad_norm": 0.8609418869018555, + "learning_rate": 0.0001908819441767372, + "loss": 2.7433, + "step": 2826 + }, + { + "epoch": 0.22814946332015174, + "grad_norm": 0.8021805882453918, + "learning_rate": 0.00019087535691356271, + "loss": 2.7723, + "step": 2827 + }, + { + "epoch": 0.22823016705673474, + "grad_norm": 0.8104252219200134, + "learning_rate": 0.00019086876738553675, + "loss": 2.7229, + "step": 2828 + }, + { + "epoch": 0.22831087079331774, + "grad_norm": 0.8714433908462524, + "learning_rate": 0.00019086217559282362, + "loss": 2.75, + "step": 2829 + }, + { + "epoch": 0.22839157452990075, + "grad_norm": 0.7598714828491211, + "learning_rate": 0.0001908555815355875, + "loss": 2.6979, + "step": 2830 + }, + { + "epoch": 0.22847227826648372, + "grad_norm": 0.859708309173584, + "learning_rate": 0.00019084898521399283, + "loss": 2.7863, + "step": 2831 + }, + { + "epoch": 0.22855298200306673, + "grad_norm": 0.7798011302947998, + "learning_rate": 0.00019084238662820397, + "loss": 2.7623, + "step": 2832 + }, + { + "epoch": 0.22863368573964973, + "grad_norm": 0.7869576811790466, + "learning_rate": 0.00019083578577838535, + "loss": 2.7341, + "step": 2833 + }, + { + "epoch": 0.22871438947623274, + "grad_norm": 0.7486738562583923, + "learning_rate": 0.0001908291826647015, + "loss": 2.7615, + "step": 2834 + }, + { + "epoch": 0.22879509321281574, + "grad_norm": 0.8270190954208374, + "learning_rate": 0.00019082257728731704, + "loss": 2.7515, + "step": 2835 + }, + { + "epoch": 0.22887579694939875, + "grad_norm": 0.9060254693031311, + "learning_rate": 0.00019081596964639648, + "loss": 2.874, + "step": 2836 + }, + { + "epoch": 0.22895650068598175, + "grad_norm": 0.7802320122718811, + "learning_rate": 0.00019080935974210458, + "loss": 2.7224, + "step": 2837 + }, + { + "epoch": 0.22903720442256476, + "grad_norm": 0.9513018131256104, + "learning_rate": 0.00019080274757460607, + "loss": 2.7168, + "step": 2838 + }, + { + "epoch": 0.22911790815914776, + "grad_norm": 0.7139711976051331, + "learning_rate": 0.0001907961331440657, + "loss": 2.676, + "step": 2839 + }, + { + "epoch": 0.22919861189573076, + "grad_norm": 0.8635632395744324, + "learning_rate": 0.00019078951645064838, + "loss": 2.6979, + "step": 2840 + }, + { + "epoch": 0.22927931563231377, + "grad_norm": 0.8823218941688538, + "learning_rate": 0.000190782897494519, + "loss": 2.7345, + "step": 2841 + }, + { + "epoch": 0.22936001936889677, + "grad_norm": 0.8139359354972839, + "learning_rate": 0.00019077627627584246, + "loss": 2.6988, + "step": 2842 + }, + { + "epoch": 0.22944072310547978, + "grad_norm": 0.8935994505882263, + "learning_rate": 0.00019076965279478383, + "loss": 2.7706, + "step": 2843 + }, + { + "epoch": 0.22952142684206278, + "grad_norm": 0.8362705111503601, + "learning_rate": 0.00019076302705150816, + "loss": 2.7593, + "step": 2844 + }, + { + "epoch": 0.22960213057864579, + "grad_norm": 0.7534157633781433, + "learning_rate": 0.00019075639904618066, + "loss": 2.7501, + "step": 2845 + }, + { + "epoch": 0.2296828343152288, + "grad_norm": 0.8826640248298645, + "learning_rate": 0.00019074976877896642, + "loss": 2.7758, + "step": 2846 + }, + { + "epoch": 0.2297635380518118, + "grad_norm": 0.8395571112632751, + "learning_rate": 0.0001907431362500307, + "loss": 2.7625, + "step": 2847 + }, + { + "epoch": 0.2298442417883948, + "grad_norm": 0.7927684783935547, + "learning_rate": 0.00019073650145953885, + "loss": 2.7392, + "step": 2848 + }, + { + "epoch": 0.2299249455249778, + "grad_norm": 0.823208749294281, + "learning_rate": 0.00019072986440765618, + "loss": 2.7259, + "step": 2849 + }, + { + "epoch": 0.2300056492615608, + "grad_norm": 0.889416515827179, + "learning_rate": 0.00019072322509454815, + "loss": 2.7539, + "step": 2850 + }, + { + "epoch": 0.2300863529981438, + "grad_norm": 0.7957748770713806, + "learning_rate": 0.0001907165835203802, + "loss": 2.7756, + "step": 2851 + }, + { + "epoch": 0.23016705673472682, + "grad_norm": 0.7924029231071472, + "learning_rate": 0.00019070993968531782, + "loss": 2.7439, + "step": 2852 + }, + { + "epoch": 0.23024776047130982, + "grad_norm": 0.7811052799224854, + "learning_rate": 0.0001907032935895266, + "loss": 2.7479, + "step": 2853 + }, + { + "epoch": 0.23032846420789282, + "grad_norm": 0.7973877191543579, + "learning_rate": 0.00019069664523317225, + "loss": 2.7502, + "step": 2854 + }, + { + "epoch": 0.23040916794447583, + "grad_norm": 0.7524267435073853, + "learning_rate": 0.0001906899946164204, + "loss": 2.75, + "step": 2855 + }, + { + "epoch": 0.23048987168105883, + "grad_norm": 0.7594791054725647, + "learning_rate": 0.00019068334173943683, + "loss": 2.6534, + "step": 2856 + }, + { + "epoch": 0.23057057541764184, + "grad_norm": 0.7253785729408264, + "learning_rate": 0.00019067668660238733, + "loss": 2.7246, + "step": 2857 + }, + { + "epoch": 0.23065127915422484, + "grad_norm": 0.788737416267395, + "learning_rate": 0.00019067002920543775, + "loss": 2.757, + "step": 2858 + }, + { + "epoch": 0.23073198289080785, + "grad_norm": 0.7577618956565857, + "learning_rate": 0.00019066336954875403, + "loss": 2.674, + "step": 2859 + }, + { + "epoch": 0.23081268662739085, + "grad_norm": 0.7682929635047913, + "learning_rate": 0.0001906567076325022, + "loss": 2.8193, + "step": 2860 + }, + { + "epoch": 0.23089339036397385, + "grad_norm": 0.7742112874984741, + "learning_rate": 0.00019065004345684817, + "loss": 2.6969, + "step": 2861 + }, + { + "epoch": 0.23097409410055686, + "grad_norm": 0.7981678247451782, + "learning_rate": 0.00019064337702195814, + "loss": 2.7681, + "step": 2862 + }, + { + "epoch": 0.23105479783713986, + "grad_norm": 0.7608500123023987, + "learning_rate": 0.00019063670832799817, + "loss": 2.7459, + "step": 2863 + }, + { + "epoch": 0.23113550157372287, + "grad_norm": 0.7563463449478149, + "learning_rate": 0.00019063003737513455, + "loss": 2.7678, + "step": 2864 + }, + { + "epoch": 0.23121620531030587, + "grad_norm": 0.7915034890174866, + "learning_rate": 0.00019062336416353343, + "loss": 2.7577, + "step": 2865 + }, + { + "epoch": 0.23129690904688888, + "grad_norm": 0.7229592204093933, + "learning_rate": 0.00019061668869336122, + "loss": 2.7308, + "step": 2866 + }, + { + "epoch": 0.23137761278347188, + "grad_norm": 0.7910905480384827, + "learning_rate": 0.00019061001096478425, + "loss": 2.7571, + "step": 2867 + }, + { + "epoch": 0.23145831652005489, + "grad_norm": 0.8474656939506531, + "learning_rate": 0.00019060333097796895, + "loss": 2.7011, + "step": 2868 + }, + { + "epoch": 0.2315390202566379, + "grad_norm": 0.8005419373512268, + "learning_rate": 0.00019059664873308178, + "loss": 2.7441, + "step": 2869 + }, + { + "epoch": 0.2316197239932209, + "grad_norm": 0.7728021740913391, + "learning_rate": 0.00019058996423028935, + "loss": 2.7753, + "step": 2870 + }, + { + "epoch": 0.2317004277298039, + "grad_norm": 0.7338094115257263, + "learning_rate": 0.00019058327746975816, + "loss": 2.7009, + "step": 2871 + }, + { + "epoch": 0.2317811314663869, + "grad_norm": 0.7746245265007019, + "learning_rate": 0.00019057658845165494, + "loss": 2.6938, + "step": 2872 + }, + { + "epoch": 0.2318618352029699, + "grad_norm": 0.7474356293678284, + "learning_rate": 0.00019056989717614636, + "loss": 2.7161, + "step": 2873 + }, + { + "epoch": 0.2319425389395529, + "grad_norm": 0.9540585279464722, + "learning_rate": 0.00019056320364339917, + "loss": 2.7753, + "step": 2874 + }, + { + "epoch": 0.23202324267613592, + "grad_norm": 0.799726665019989, + "learning_rate": 0.00019055650785358024, + "loss": 2.7301, + "step": 2875 + }, + { + "epoch": 0.23210394641271892, + "grad_norm": 0.8087828159332275, + "learning_rate": 0.0001905498098068564, + "loss": 2.7305, + "step": 2876 + }, + { + "epoch": 0.23218465014930192, + "grad_norm": 0.8177600502967834, + "learning_rate": 0.00019054310950339457, + "loss": 2.7462, + "step": 2877 + }, + { + "epoch": 0.23226535388588493, + "grad_norm": 0.7106238603591919, + "learning_rate": 0.00019053640694336181, + "loss": 2.7183, + "step": 2878 + }, + { + "epoch": 0.23234605762246793, + "grad_norm": 0.884185791015625, + "learning_rate": 0.00019052970212692514, + "loss": 2.7549, + "step": 2879 + }, + { + "epoch": 0.23242676135905094, + "grad_norm": 0.7532132267951965, + "learning_rate": 0.00019052299505425163, + "loss": 2.7524, + "step": 2880 + }, + { + "epoch": 0.23250746509563394, + "grad_norm": 0.7295021414756775, + "learning_rate": 0.00019051628572550842, + "loss": 2.6928, + "step": 2881 + }, + { + "epoch": 0.23258816883221692, + "grad_norm": 0.8475896716117859, + "learning_rate": 0.00019050957414086278, + "loss": 2.7138, + "step": 2882 + }, + { + "epoch": 0.23266887256879992, + "grad_norm": 0.7219378352165222, + "learning_rate": 0.00019050286030048198, + "loss": 2.7034, + "step": 2883 + }, + { + "epoch": 0.23274957630538293, + "grad_norm": 0.8410176634788513, + "learning_rate": 0.0001904961442045333, + "loss": 2.7413, + "step": 2884 + }, + { + "epoch": 0.23283028004196593, + "grad_norm": 0.7792301177978516, + "learning_rate": 0.00019048942585318414, + "loss": 2.6771, + "step": 2885 + }, + { + "epoch": 0.23291098377854894, + "grad_norm": 0.7457073926925659, + "learning_rate": 0.00019048270524660196, + "loss": 2.7325, + "step": 2886 + }, + { + "epoch": 0.23299168751513194, + "grad_norm": 0.8258858323097229, + "learning_rate": 0.00019047598238495424, + "loss": 2.7434, + "step": 2887 + }, + { + "epoch": 0.23307239125171494, + "grad_norm": 0.8188657164573669, + "learning_rate": 0.00019046925726840853, + "loss": 2.732, + "step": 2888 + }, + { + "epoch": 0.23315309498829795, + "grad_norm": 0.8084142208099365, + "learning_rate": 0.00019046252989713246, + "loss": 2.7537, + "step": 2889 + }, + { + "epoch": 0.23323379872488095, + "grad_norm": 0.75553297996521, + "learning_rate": 0.00019045580027129364, + "loss": 2.6685, + "step": 2890 + }, + { + "epoch": 0.23331450246146396, + "grad_norm": 0.8145995736122131, + "learning_rate": 0.00019044906839105986, + "loss": 2.7654, + "step": 2891 + }, + { + "epoch": 0.23339520619804696, + "grad_norm": 0.8433949947357178, + "learning_rate": 0.0001904423342565988, + "loss": 2.7713, + "step": 2892 + }, + { + "epoch": 0.23347590993462997, + "grad_norm": 0.7826054096221924, + "learning_rate": 0.0001904355978680784, + "loss": 2.7108, + "step": 2893 + }, + { + "epoch": 0.23355661367121297, + "grad_norm": 0.7281686663627625, + "learning_rate": 0.0001904288592256665, + "loss": 2.7606, + "step": 2894 + }, + { + "epoch": 0.23363731740779597, + "grad_norm": 0.8282813429832458, + "learning_rate": 0.00019042211832953103, + "loss": 2.6662, + "step": 2895 + }, + { + "epoch": 0.23371802114437898, + "grad_norm": 0.8227263689041138, + "learning_rate": 0.00019041537517984, + "loss": 2.7493, + "step": 2896 + }, + { + "epoch": 0.23379872488096198, + "grad_norm": 0.839350700378418, + "learning_rate": 0.0001904086297767615, + "loss": 2.7258, + "step": 2897 + }, + { + "epoch": 0.233879428617545, + "grad_norm": 0.713231086730957, + "learning_rate": 0.00019040188212046357, + "loss": 2.6722, + "step": 2898 + }, + { + "epoch": 0.233960132354128, + "grad_norm": 0.8314552903175354, + "learning_rate": 0.00019039513221111447, + "loss": 2.8509, + "step": 2899 + }, + { + "epoch": 0.234040836090711, + "grad_norm": 0.8885688781738281, + "learning_rate": 0.0001903883800488824, + "loss": 2.7608, + "step": 2900 + }, + { + "epoch": 0.234121539827294, + "grad_norm": 0.755308210849762, + "learning_rate": 0.00019038162563393555, + "loss": 2.7065, + "step": 2901 + }, + { + "epoch": 0.234202243563877, + "grad_norm": 0.7436641454696655, + "learning_rate": 0.00019037486896644236, + "loss": 2.6865, + "step": 2902 + }, + { + "epoch": 0.23428294730046, + "grad_norm": 0.7861987948417664, + "learning_rate": 0.0001903681100465712, + "loss": 2.7238, + "step": 2903 + }, + { + "epoch": 0.234363651037043, + "grad_norm": 0.7481045126914978, + "learning_rate": 0.0001903613488744905, + "loss": 2.7038, + "step": 2904 + }, + { + "epoch": 0.23444435477362602, + "grad_norm": 0.790765106678009, + "learning_rate": 0.0001903545854503688, + "loss": 2.6865, + "step": 2905 + }, + { + "epoch": 0.23452505851020902, + "grad_norm": 0.8594793677330017, + "learning_rate": 0.0001903478197743746, + "loss": 2.7324, + "step": 2906 + }, + { + "epoch": 0.23460576224679203, + "grad_norm": 0.7504310011863708, + "learning_rate": 0.00019034105184667662, + "loss": 2.6535, + "step": 2907 + }, + { + "epoch": 0.23468646598337503, + "grad_norm": 0.7824578881263733, + "learning_rate": 0.00019033428166744342, + "loss": 2.7113, + "step": 2908 + }, + { + "epoch": 0.23476716971995804, + "grad_norm": 0.7766899466514587, + "learning_rate": 0.0001903275092368438, + "loss": 2.6907, + "step": 2909 + }, + { + "epoch": 0.23484787345654104, + "grad_norm": 0.8082600235939026, + "learning_rate": 0.00019032073455504657, + "loss": 2.6781, + "step": 2910 + }, + { + "epoch": 0.23492857719312404, + "grad_norm": 0.7790517210960388, + "learning_rate": 0.0001903139576222205, + "loss": 2.7277, + "step": 2911 + }, + { + "epoch": 0.23500928092970705, + "grad_norm": 0.7449578046798706, + "learning_rate": 0.00019030717843853453, + "loss": 2.7078, + "step": 2912 + }, + { + "epoch": 0.23508998466629005, + "grad_norm": 0.7931632399559021, + "learning_rate": 0.0001903003970041576, + "loss": 2.7165, + "step": 2913 + }, + { + "epoch": 0.23517068840287306, + "grad_norm": 0.7970653176307678, + "learning_rate": 0.00019029361331925873, + "loss": 2.7993, + "step": 2914 + }, + { + "epoch": 0.23525139213945606, + "grad_norm": 0.8497335314750671, + "learning_rate": 0.00019028682738400697, + "loss": 2.7564, + "step": 2915 + }, + { + "epoch": 0.23533209587603907, + "grad_norm": 0.7840128540992737, + "learning_rate": 0.0001902800391985715, + "loss": 2.7546, + "step": 2916 + }, + { + "epoch": 0.23541279961262207, + "grad_norm": 0.8237372636795044, + "learning_rate": 0.00019027324876312146, + "loss": 2.7507, + "step": 2917 + }, + { + "epoch": 0.23549350334920507, + "grad_norm": 0.8445321917533875, + "learning_rate": 0.00019026645607782603, + "loss": 2.7287, + "step": 2918 + }, + { + "epoch": 0.23557420708578808, + "grad_norm": 0.8380417227745056, + "learning_rate": 0.0001902596611428546, + "loss": 2.7778, + "step": 2919 + }, + { + "epoch": 0.23565491082237108, + "grad_norm": 0.7989064455032349, + "learning_rate": 0.00019025286395837646, + "loss": 2.7254, + "step": 2920 + }, + { + "epoch": 0.2357356145589541, + "grad_norm": 0.8223496079444885, + "learning_rate": 0.00019024606452456102, + "loss": 2.7028, + "step": 2921 + }, + { + "epoch": 0.2358163182955371, + "grad_norm": 0.8090229630470276, + "learning_rate": 0.00019023926284157775, + "loss": 2.6911, + "step": 2922 + }, + { + "epoch": 0.2358970220321201, + "grad_norm": 0.7556560635566711, + "learning_rate": 0.00019023245890959615, + "loss": 2.7183, + "step": 2923 + }, + { + "epoch": 0.2359777257687031, + "grad_norm": 0.7907983660697937, + "learning_rate": 0.00019022565272878582, + "loss": 2.6805, + "step": 2924 + }, + { + "epoch": 0.2360584295052861, + "grad_norm": 0.9404142498970032, + "learning_rate": 0.0001902188442993164, + "loss": 2.8081, + "step": 2925 + }, + { + "epoch": 0.2361391332418691, + "grad_norm": 0.8349069952964783, + "learning_rate": 0.0001902120336213575, + "loss": 2.8329, + "step": 2926 + }, + { + "epoch": 0.2362198369784521, + "grad_norm": 0.8557522892951965, + "learning_rate": 0.00019020522069507892, + "loss": 2.704, + "step": 2927 + }, + { + "epoch": 0.23630054071503512, + "grad_norm": 0.7557278275489807, + "learning_rate": 0.00019019840552065044, + "loss": 2.7071, + "step": 2928 + }, + { + "epoch": 0.23638124445161812, + "grad_norm": 0.8810723423957825, + "learning_rate": 0.00019019158809824193, + "loss": 2.7535, + "step": 2929 + }, + { + "epoch": 0.23646194818820113, + "grad_norm": 0.7845562100410461, + "learning_rate": 0.00019018476842802326, + "loss": 2.7254, + "step": 2930 + }, + { + "epoch": 0.23654265192478413, + "grad_norm": 0.7566044926643372, + "learning_rate": 0.00019017794651016444, + "loss": 2.7295, + "step": 2931 + }, + { + "epoch": 0.23662335566136714, + "grad_norm": 0.8083382248878479, + "learning_rate": 0.00019017112234483545, + "loss": 2.7305, + "step": 2932 + }, + { + "epoch": 0.2367040593979501, + "grad_norm": 0.7924187183380127, + "learning_rate": 0.00019016429593220638, + "loss": 2.7659, + "step": 2933 + }, + { + "epoch": 0.23678476313453312, + "grad_norm": 0.8400307297706604, + "learning_rate": 0.00019015746727244737, + "loss": 2.7293, + "step": 2934 + }, + { + "epoch": 0.23686546687111612, + "grad_norm": 0.6931199431419373, + "learning_rate": 0.0001901506363657286, + "loss": 2.7189, + "step": 2935 + }, + { + "epoch": 0.23694617060769912, + "grad_norm": 0.8263585567474365, + "learning_rate": 0.0001901438032122203, + "loss": 2.7368, + "step": 2936 + }, + { + "epoch": 0.23702687434428213, + "grad_norm": 0.8001893162727356, + "learning_rate": 0.0001901369678120928, + "loss": 2.7793, + "step": 2937 + }, + { + "epoch": 0.23710757808086513, + "grad_norm": 0.7724235653877258, + "learning_rate": 0.00019013013016551644, + "loss": 2.717, + "step": 2938 + }, + { + "epoch": 0.23718828181744814, + "grad_norm": 0.7617147564888, + "learning_rate": 0.00019012329027266164, + "loss": 2.7275, + "step": 2939 + }, + { + "epoch": 0.23726898555403114, + "grad_norm": 0.80738765001297, + "learning_rate": 0.00019011644813369884, + "loss": 2.7444, + "step": 2940 + }, + { + "epoch": 0.23734968929061415, + "grad_norm": 0.7885528802871704, + "learning_rate": 0.00019010960374879861, + "loss": 2.7377, + "step": 2941 + }, + { + "epoch": 0.23743039302719715, + "grad_norm": 0.720268964767456, + "learning_rate": 0.00019010275711813147, + "loss": 2.6897, + "step": 2942 + }, + { + "epoch": 0.23751109676378016, + "grad_norm": 0.7532111406326294, + "learning_rate": 0.00019009590824186815, + "loss": 2.8117, + "step": 2943 + }, + { + "epoch": 0.23759180050036316, + "grad_norm": 0.780777633190155, + "learning_rate": 0.00019008905712017925, + "loss": 2.7565, + "step": 2944 + }, + { + "epoch": 0.23767250423694616, + "grad_norm": 0.8721919059753418, + "learning_rate": 0.00019008220375323553, + "loss": 2.801, + "step": 2945 + }, + { + "epoch": 0.23775320797352917, + "grad_norm": 0.8258914947509766, + "learning_rate": 0.00019007534814120786, + "loss": 2.7696, + "step": 2946 + }, + { + "epoch": 0.23783391171011217, + "grad_norm": 0.7292730808258057, + "learning_rate": 0.00019006849028426704, + "loss": 2.7512, + "step": 2947 + }, + { + "epoch": 0.23791461544669518, + "grad_norm": 0.7789164185523987, + "learning_rate": 0.00019006163018258398, + "loss": 2.7489, + "step": 2948 + }, + { + "epoch": 0.23799531918327818, + "grad_norm": 0.8049725294113159, + "learning_rate": 0.00019005476783632967, + "loss": 2.672, + "step": 2949 + }, + { + "epoch": 0.23807602291986119, + "grad_norm": 0.7440119981765747, + "learning_rate": 0.00019004790324567519, + "loss": 2.7208, + "step": 2950 + }, + { + "epoch": 0.2381567266564442, + "grad_norm": 0.7695925235748291, + "learning_rate": 0.00019004103641079154, + "loss": 2.7816, + "step": 2951 + }, + { + "epoch": 0.2382374303930272, + "grad_norm": 0.7623234391212463, + "learning_rate": 0.00019003416733184988, + "loss": 2.7034, + "step": 2952 + }, + { + "epoch": 0.2383181341296102, + "grad_norm": 0.8136502504348755, + "learning_rate": 0.00019002729600902141, + "loss": 2.7638, + "step": 2953 + }, + { + "epoch": 0.2383988378661932, + "grad_norm": 0.7813066840171814, + "learning_rate": 0.00019002042244247743, + "loss": 2.7606, + "step": 2954 + }, + { + "epoch": 0.2384795416027762, + "grad_norm": 0.7863059043884277, + "learning_rate": 0.0001900135466323892, + "loss": 2.7219, + "step": 2955 + }, + { + "epoch": 0.2385602453393592, + "grad_norm": 0.8712359070777893, + "learning_rate": 0.00019000666857892806, + "loss": 2.7485, + "step": 2956 + }, + { + "epoch": 0.23864094907594222, + "grad_norm": 0.8130611777305603, + "learning_rate": 0.00018999978828226547, + "loss": 2.7195, + "step": 2957 + }, + { + "epoch": 0.23872165281252522, + "grad_norm": 0.759503960609436, + "learning_rate": 0.00018999290574257292, + "loss": 2.6856, + "step": 2958 + }, + { + "epoch": 0.23880235654910822, + "grad_norm": 0.7490882277488708, + "learning_rate": 0.0001899860209600219, + "loss": 2.7587, + "step": 2959 + }, + { + "epoch": 0.23888306028569123, + "grad_norm": 0.8111297488212585, + "learning_rate": 0.000189979133934784, + "loss": 2.7688, + "step": 2960 + }, + { + "epoch": 0.23896376402227423, + "grad_norm": 0.844894289970398, + "learning_rate": 0.0001899722446670309, + "loss": 2.7706, + "step": 2961 + }, + { + "epoch": 0.23904446775885724, + "grad_norm": 0.7875459790229797, + "learning_rate": 0.00018996535315693423, + "loss": 2.7535, + "step": 2962 + }, + { + "epoch": 0.23912517149544024, + "grad_norm": 0.7768518328666687, + "learning_rate": 0.0001899584594046658, + "loss": 2.7268, + "step": 2963 + }, + { + "epoch": 0.23920587523202325, + "grad_norm": 0.8645716309547424, + "learning_rate": 0.00018995156341039744, + "loss": 2.7856, + "step": 2964 + }, + { + "epoch": 0.23928657896860625, + "grad_norm": 0.7816600799560547, + "learning_rate": 0.00018994466517430097, + "loss": 2.757, + "step": 2965 + }, + { + "epoch": 0.23936728270518925, + "grad_norm": 0.7967644333839417, + "learning_rate": 0.00018993776469654832, + "loss": 2.7021, + "step": 2966 + }, + { + "epoch": 0.23944798644177226, + "grad_norm": 0.800589919090271, + "learning_rate": 0.00018993086197731146, + "loss": 2.6838, + "step": 2967 + }, + { + "epoch": 0.23952869017835526, + "grad_norm": 0.7658529281616211, + "learning_rate": 0.00018992395701676246, + "loss": 2.6992, + "step": 2968 + }, + { + "epoch": 0.23960939391493827, + "grad_norm": 0.848456621170044, + "learning_rate": 0.00018991704981507338, + "loss": 2.7249, + "step": 2969 + }, + { + "epoch": 0.23969009765152127, + "grad_norm": 0.7365427017211914, + "learning_rate": 0.00018991014037241638, + "loss": 2.7044, + "step": 2970 + }, + { + "epoch": 0.23977080138810428, + "grad_norm": 0.8026351928710938, + "learning_rate": 0.00018990322868896365, + "loss": 2.7409, + "step": 2971 + }, + { + "epoch": 0.23985150512468728, + "grad_norm": 0.788646936416626, + "learning_rate": 0.00018989631476488744, + "loss": 2.7331, + "step": 2972 + }, + { + "epoch": 0.23993220886127029, + "grad_norm": 0.8388644456863403, + "learning_rate": 0.00018988939860036007, + "loss": 2.7478, + "step": 2973 + }, + { + "epoch": 0.2400129125978533, + "grad_norm": 0.7479026913642883, + "learning_rate": 0.00018988248019555394, + "loss": 2.7248, + "step": 2974 + }, + { + "epoch": 0.2400936163344363, + "grad_norm": 0.7313364744186401, + "learning_rate": 0.00018987555955064144, + "loss": 2.7323, + "step": 2975 + }, + { + "epoch": 0.2401743200710193, + "grad_norm": 0.7858260273933411, + "learning_rate": 0.00018986863666579505, + "loss": 2.6845, + "step": 2976 + }, + { + "epoch": 0.2402550238076023, + "grad_norm": 0.8090949654579163, + "learning_rate": 0.00018986171154118732, + "loss": 2.8094, + "step": 2977 + }, + { + "epoch": 0.2403357275441853, + "grad_norm": 0.7917135953903198, + "learning_rate": 0.00018985478417699085, + "loss": 2.7106, + "step": 2978 + }, + { + "epoch": 0.2404164312807683, + "grad_norm": 0.8192126154899597, + "learning_rate": 0.00018984785457337825, + "loss": 2.7729, + "step": 2979 + }, + { + "epoch": 0.24049713501735132, + "grad_norm": 0.797922670841217, + "learning_rate": 0.00018984092273052226, + "loss": 2.7747, + "step": 2980 + }, + { + "epoch": 0.24057783875393432, + "grad_norm": 0.9050948023796082, + "learning_rate": 0.00018983398864859564, + "loss": 2.7453, + "step": 2981 + }, + { + "epoch": 0.24065854249051732, + "grad_norm": 0.7827617526054382, + "learning_rate": 0.0001898270523277712, + "loss": 2.7371, + "step": 2982 + }, + { + "epoch": 0.24073924622710033, + "grad_norm": 0.7530156373977661, + "learning_rate": 0.0001898201137682218, + "loss": 2.7397, + "step": 2983 + }, + { + "epoch": 0.2408199499636833, + "grad_norm": 0.7989545464515686, + "learning_rate": 0.00018981317297012034, + "loss": 2.7532, + "step": 2984 + }, + { + "epoch": 0.2409006537002663, + "grad_norm": 0.7501168847084045, + "learning_rate": 0.00018980622993363988, + "loss": 2.7395, + "step": 2985 + }, + { + "epoch": 0.2409813574368493, + "grad_norm": 0.8073468208312988, + "learning_rate": 0.0001897992846589534, + "loss": 2.7673, + "step": 2986 + }, + { + "epoch": 0.24106206117343232, + "grad_norm": 0.9155512452125549, + "learning_rate": 0.00018979233714623401, + "loss": 2.6608, + "step": 2987 + }, + { + "epoch": 0.24114276491001532, + "grad_norm": 0.7461311221122742, + "learning_rate": 0.00018978538739565485, + "loss": 2.7657, + "step": 2988 + }, + { + "epoch": 0.24122346864659833, + "grad_norm": 0.8011443018913269, + "learning_rate": 0.00018977843540738914, + "loss": 2.7363, + "step": 2989 + }, + { + "epoch": 0.24130417238318133, + "grad_norm": 0.7602998614311218, + "learning_rate": 0.0001897714811816101, + "loss": 2.7285, + "step": 2990 + }, + { + "epoch": 0.24138487611976434, + "grad_norm": 0.8283531069755554, + "learning_rate": 0.00018976452471849116, + "loss": 2.7614, + "step": 2991 + }, + { + "epoch": 0.24146557985634734, + "grad_norm": 0.7358889579772949, + "learning_rate": 0.00018975756601820556, + "loss": 2.7429, + "step": 2992 + }, + { + "epoch": 0.24154628359293034, + "grad_norm": 0.7749240398406982, + "learning_rate": 0.0001897506050809268, + "loss": 2.6884, + "step": 2993 + }, + { + "epoch": 0.24162698732951335, + "grad_norm": 0.7529963254928589, + "learning_rate": 0.00018974364190682837, + "loss": 2.7619, + "step": 2994 + }, + { + "epoch": 0.24170769106609635, + "grad_norm": 0.7946054935455322, + "learning_rate": 0.00018973667649608376, + "loss": 2.7403, + "step": 2995 + }, + { + "epoch": 0.24178839480267936, + "grad_norm": 0.735870897769928, + "learning_rate": 0.0001897297088488666, + "loss": 2.7158, + "step": 2996 + }, + { + "epoch": 0.24186909853926236, + "grad_norm": 0.8409188985824585, + "learning_rate": 0.00018972273896535055, + "loss": 2.768, + "step": 2997 + }, + { + "epoch": 0.24194980227584537, + "grad_norm": 0.8351938724517822, + "learning_rate": 0.0001897157668457093, + "loss": 2.7548, + "step": 2998 + }, + { + "epoch": 0.24203050601242837, + "grad_norm": 0.8339046239852905, + "learning_rate": 0.00018970879249011663, + "loss": 2.7842, + "step": 2999 + }, + { + "epoch": 0.24211120974901137, + "grad_norm": 0.8092730641365051, + "learning_rate": 0.00018970181589874637, + "loss": 2.7141, + "step": 3000 + }, + { + "epoch": 0.24211120974901137, + "eval_loss": 2.643277406692505, + "eval_runtime": 784.7512, + "eval_samples_per_second": 3.339, + "eval_steps_per_second": 0.557, + "step": 3000 + }, + { + "epoch": 0.24219191348559438, + "grad_norm": 0.8014447093009949, + "learning_rate": 0.00018969483707177235, + "loss": 2.7341, + "step": 3001 + }, + { + "epoch": 0.24227261722217738, + "grad_norm": 0.744153618812561, + "learning_rate": 0.00018968785600936855, + "loss": 2.678, + "step": 3002 + }, + { + "epoch": 0.2423533209587604, + "grad_norm": 0.7264240384101868, + "learning_rate": 0.0001896808727117089, + "loss": 2.7321, + "step": 3003 + }, + { + "epoch": 0.2424340246953434, + "grad_norm": 0.8214067220687866, + "learning_rate": 0.00018967388717896748, + "loss": 2.7311, + "step": 3004 + }, + { + "epoch": 0.2425147284319264, + "grad_norm": 0.7871330976486206, + "learning_rate": 0.00018966689941131838, + "loss": 2.7184, + "step": 3005 + }, + { + "epoch": 0.2425954321685094, + "grad_norm": 0.7301360964775085, + "learning_rate": 0.00018965990940893575, + "loss": 2.7039, + "step": 3006 + }, + { + "epoch": 0.2426761359050924, + "grad_norm": 0.8290385603904724, + "learning_rate": 0.00018965291717199382, + "loss": 2.7848, + "step": 3007 + }, + { + "epoch": 0.2427568396416754, + "grad_norm": 0.7465909123420715, + "learning_rate": 0.00018964592270066683, + "loss": 2.7271, + "step": 3008 + }, + { + "epoch": 0.2428375433782584, + "grad_norm": 0.7992933988571167, + "learning_rate": 0.00018963892599512913, + "loss": 2.7749, + "step": 3009 + }, + { + "epoch": 0.24291824711484142, + "grad_norm": 0.7879100441932678, + "learning_rate": 0.00018963192705555507, + "loss": 2.6844, + "step": 3010 + }, + { + "epoch": 0.24299895085142442, + "grad_norm": 0.7895401120185852, + "learning_rate": 0.00018962492588211905, + "loss": 2.725, + "step": 3011 + }, + { + "epoch": 0.24307965458800743, + "grad_norm": 0.7699374556541443, + "learning_rate": 0.00018961792247499564, + "loss": 2.7408, + "step": 3012 + }, + { + "epoch": 0.24316035832459043, + "grad_norm": 0.828372597694397, + "learning_rate": 0.0001896109168343593, + "loss": 2.7527, + "step": 3013 + }, + { + "epoch": 0.24324106206117344, + "grad_norm": 0.7611951231956482, + "learning_rate": 0.0001896039089603847, + "loss": 2.7294, + "step": 3014 + }, + { + "epoch": 0.24332176579775644, + "grad_norm": 0.8214892148971558, + "learning_rate": 0.00018959689885324646, + "loss": 2.6931, + "step": 3015 + }, + { + "epoch": 0.24340246953433944, + "grad_norm": 0.7472538352012634, + "learning_rate": 0.00018958988651311928, + "loss": 2.7316, + "step": 3016 + }, + { + "epoch": 0.24348317327092245, + "grad_norm": 0.7574933171272278, + "learning_rate": 0.00018958287194017795, + "loss": 2.7764, + "step": 3017 + }, + { + "epoch": 0.24356387700750545, + "grad_norm": 0.739152729511261, + "learning_rate": 0.00018957585513459723, + "loss": 2.7949, + "step": 3018 + }, + { + "epoch": 0.24364458074408846, + "grad_norm": 0.824097752571106, + "learning_rate": 0.00018956883609655208, + "loss": 2.6612, + "step": 3019 + }, + { + "epoch": 0.24372528448067146, + "grad_norm": 0.7891144156455994, + "learning_rate": 0.00018956181482621744, + "loss": 2.7139, + "step": 3020 + }, + { + "epoch": 0.24380598821725447, + "grad_norm": 0.7364415526390076, + "learning_rate": 0.0001895547913237682, + "loss": 2.6984, + "step": 3021 + }, + { + "epoch": 0.24388669195383747, + "grad_norm": 0.7631362080574036, + "learning_rate": 0.0001895477655893795, + "loss": 2.7015, + "step": 3022 + }, + { + "epoch": 0.24396739569042047, + "grad_norm": 0.780541181564331, + "learning_rate": 0.00018954073762322637, + "loss": 2.7716, + "step": 3023 + }, + { + "epoch": 0.24404809942700348, + "grad_norm": 0.7877349853515625, + "learning_rate": 0.00018953370742548403, + "loss": 2.6654, + "step": 3024 + }, + { + "epoch": 0.24412880316358648, + "grad_norm": 0.7786216139793396, + "learning_rate": 0.00018952667499632763, + "loss": 2.7491, + "step": 3025 + }, + { + "epoch": 0.2442095069001695, + "grad_norm": 0.8207663893699646, + "learning_rate": 0.00018951964033593247, + "loss": 2.7212, + "step": 3026 + }, + { + "epoch": 0.2442902106367525, + "grad_norm": 0.8271831274032593, + "learning_rate": 0.00018951260344447386, + "loss": 2.7456, + "step": 3027 + }, + { + "epoch": 0.2443709143733355, + "grad_norm": 0.7610505819320679, + "learning_rate": 0.00018950556432212722, + "loss": 2.7472, + "step": 3028 + }, + { + "epoch": 0.2444516181099185, + "grad_norm": 0.7521701455116272, + "learning_rate": 0.00018949852296906792, + "loss": 2.7263, + "step": 3029 + }, + { + "epoch": 0.2445323218465015, + "grad_norm": 0.7518337965011597, + "learning_rate": 0.00018949147938547144, + "loss": 2.7069, + "step": 3030 + }, + { + "epoch": 0.2446130255830845, + "grad_norm": 0.7823107838630676, + "learning_rate": 0.00018948443357151343, + "loss": 2.7858, + "step": 3031 + }, + { + "epoch": 0.2446937293196675, + "grad_norm": 0.733132004737854, + "learning_rate": 0.00018947738552736938, + "loss": 2.7194, + "step": 3032 + }, + { + "epoch": 0.24477443305625052, + "grad_norm": 0.7756488919258118, + "learning_rate": 0.00018947033525321501, + "loss": 2.7299, + "step": 3033 + }, + { + "epoch": 0.24485513679283352, + "grad_norm": 0.7971112728118896, + "learning_rate": 0.00018946328274922598, + "loss": 2.7474, + "step": 3034 + }, + { + "epoch": 0.2449358405294165, + "grad_norm": 0.7871260643005371, + "learning_rate": 0.0001894562280155781, + "loss": 2.6994, + "step": 3035 + }, + { + "epoch": 0.2450165442659995, + "grad_norm": 0.7431116104125977, + "learning_rate": 0.00018944917105244717, + "loss": 2.6834, + "step": 3036 + }, + { + "epoch": 0.2450972480025825, + "grad_norm": 0.7372273206710815, + "learning_rate": 0.00018944211186000906, + "loss": 2.6988, + "step": 3037 + }, + { + "epoch": 0.2451779517391655, + "grad_norm": 0.8161508440971375, + "learning_rate": 0.00018943505043843975, + "loss": 2.7595, + "step": 3038 + }, + { + "epoch": 0.24525865547574852, + "grad_norm": 0.8062586784362793, + "learning_rate": 0.00018942798678791518, + "loss": 2.6893, + "step": 3039 + }, + { + "epoch": 0.24533935921233152, + "grad_norm": 0.824023425579071, + "learning_rate": 0.0001894209209086114, + "loss": 2.7188, + "step": 3040 + }, + { + "epoch": 0.24542006294891452, + "grad_norm": 0.740466833114624, + "learning_rate": 0.00018941385280070455, + "loss": 2.674, + "step": 3041 + }, + { + "epoch": 0.24550076668549753, + "grad_norm": 0.8543577194213867, + "learning_rate": 0.00018940678246437073, + "loss": 2.7423, + "step": 3042 + }, + { + "epoch": 0.24558147042208053, + "grad_norm": 0.7059324979782104, + "learning_rate": 0.0001893997098997862, + "loss": 2.6669, + "step": 3043 + }, + { + "epoch": 0.24566217415866354, + "grad_norm": 0.7739956974983215, + "learning_rate": 0.00018939263510712721, + "loss": 2.7118, + "step": 3044 + }, + { + "epoch": 0.24574287789524654, + "grad_norm": 0.7701205611228943, + "learning_rate": 0.00018938555808657007, + "loss": 2.7653, + "step": 3045 + }, + { + "epoch": 0.24582358163182955, + "grad_norm": 0.7243000864982605, + "learning_rate": 0.00018937847883829115, + "loss": 2.6789, + "step": 3046 + }, + { + "epoch": 0.24590428536841255, + "grad_norm": 0.7645598649978638, + "learning_rate": 0.00018937139736246693, + "loss": 2.7108, + "step": 3047 + }, + { + "epoch": 0.24598498910499556, + "grad_norm": 0.7544745802879333, + "learning_rate": 0.00018936431365927385, + "loss": 2.6958, + "step": 3048 + }, + { + "epoch": 0.24606569284157856, + "grad_norm": 0.709282398223877, + "learning_rate": 0.00018935722772888848, + "loss": 2.6728, + "step": 3049 + }, + { + "epoch": 0.24614639657816156, + "grad_norm": 0.7524243593215942, + "learning_rate": 0.00018935013957148742, + "loss": 2.7283, + "step": 3050 + }, + { + "epoch": 0.24622710031474457, + "grad_norm": 0.7959655523300171, + "learning_rate": 0.0001893430491872473, + "loss": 2.7384, + "step": 3051 + }, + { + "epoch": 0.24630780405132757, + "grad_norm": 0.7252553105354309, + "learning_rate": 0.00018933595657634486, + "loss": 2.7226, + "step": 3052 + }, + { + "epoch": 0.24638850778791058, + "grad_norm": 0.7387316226959229, + "learning_rate": 0.00018932886173895686, + "loss": 2.7546, + "step": 3053 + }, + { + "epoch": 0.24646921152449358, + "grad_norm": 0.804856538772583, + "learning_rate": 0.0001893217646752601, + "loss": 2.7321, + "step": 3054 + }, + { + "epoch": 0.24654991526107659, + "grad_norm": 0.6929069757461548, + "learning_rate": 0.0001893146653854315, + "loss": 2.6735, + "step": 3055 + }, + { + "epoch": 0.2466306189976596, + "grad_norm": 0.7076159715652466, + "learning_rate": 0.00018930756386964794, + "loss": 2.7368, + "step": 3056 + }, + { + "epoch": 0.2467113227342426, + "grad_norm": 0.7522851228713989, + "learning_rate": 0.00018930046012808648, + "loss": 2.7448, + "step": 3057 + }, + { + "epoch": 0.2467920264708256, + "grad_norm": 0.8347200155258179, + "learning_rate": 0.00018929335416092408, + "loss": 2.6837, + "step": 3058 + }, + { + "epoch": 0.2468727302074086, + "grad_norm": 0.737503707408905, + "learning_rate": 0.00018928624596833786, + "loss": 2.693, + "step": 3059 + }, + { + "epoch": 0.2469534339439916, + "grad_norm": 0.7836787104606628, + "learning_rate": 0.00018927913555050503, + "loss": 2.7335, + "step": 3060 + }, + { + "epoch": 0.2470341376805746, + "grad_norm": 0.7823840975761414, + "learning_rate": 0.00018927202290760278, + "loss": 2.6736, + "step": 3061 + }, + { + "epoch": 0.24711484141715762, + "grad_norm": 0.7894529700279236, + "learning_rate": 0.00018926490803980833, + "loss": 2.7112, + "step": 3062 + }, + { + "epoch": 0.24719554515374062, + "grad_norm": 0.8289024829864502, + "learning_rate": 0.000189257790947299, + "loss": 2.7667, + "step": 3063 + }, + { + "epoch": 0.24727624889032362, + "grad_norm": 0.70560222864151, + "learning_rate": 0.00018925067163025227, + "loss": 2.6946, + "step": 3064 + }, + { + "epoch": 0.24735695262690663, + "grad_norm": 0.6954196095466614, + "learning_rate": 0.00018924355008884548, + "loss": 2.7237, + "step": 3065 + }, + { + "epoch": 0.24743765636348963, + "grad_norm": 0.7975121736526489, + "learning_rate": 0.0001892364263232561, + "loss": 2.6392, + "step": 3066 + }, + { + "epoch": 0.24751836010007264, + "grad_norm": 0.777350902557373, + "learning_rate": 0.00018922930033366174, + "loss": 2.7284, + "step": 3067 + }, + { + "epoch": 0.24759906383665564, + "grad_norm": 0.738240659236908, + "learning_rate": 0.00018922217212023995, + "loss": 2.6884, + "step": 3068 + }, + { + "epoch": 0.24767976757323865, + "grad_norm": 0.8077268600463867, + "learning_rate": 0.0001892150416831684, + "loss": 2.7205, + "step": 3069 + }, + { + "epoch": 0.24776047130982165, + "grad_norm": 0.8108188509941101, + "learning_rate": 0.00018920790902262483, + "loss": 2.7592, + "step": 3070 + }, + { + "epoch": 0.24784117504640465, + "grad_norm": 0.7842642664909363, + "learning_rate": 0.00018920077413878695, + "loss": 2.7474, + "step": 3071 + }, + { + "epoch": 0.24792187878298766, + "grad_norm": 0.7644543051719666, + "learning_rate": 0.0001891936370318326, + "loss": 2.7179, + "step": 3072 + }, + { + "epoch": 0.24800258251957066, + "grad_norm": 0.7761854529380798, + "learning_rate": 0.00018918649770193965, + "loss": 2.71, + "step": 3073 + }, + { + "epoch": 0.24808328625615367, + "grad_norm": 0.7724074125289917, + "learning_rate": 0.00018917935614928607, + "loss": 2.7359, + "step": 3074 + }, + { + "epoch": 0.24816398999273667, + "grad_norm": 0.7360609173774719, + "learning_rate": 0.0001891722123740498, + "loss": 2.7342, + "step": 3075 + }, + { + "epoch": 0.24824469372931968, + "grad_norm": 0.757561206817627, + "learning_rate": 0.00018916506637640894, + "loss": 2.7647, + "step": 3076 + }, + { + "epoch": 0.24832539746590268, + "grad_norm": 0.7180947065353394, + "learning_rate": 0.00018915791815654148, + "loss": 2.6771, + "step": 3077 + }, + { + "epoch": 0.24840610120248569, + "grad_norm": 0.7219653129577637, + "learning_rate": 0.0001891507677146257, + "loss": 2.7772, + "step": 3078 + }, + { + "epoch": 0.2484868049390687, + "grad_norm": 0.749113917350769, + "learning_rate": 0.0001891436150508397, + "loss": 2.6996, + "step": 3079 + }, + { + "epoch": 0.2485675086756517, + "grad_norm": 0.766180157661438, + "learning_rate": 0.00018913646016536183, + "loss": 2.7896, + "step": 3080 + }, + { + "epoch": 0.2486482124122347, + "grad_norm": 0.7672411799430847, + "learning_rate": 0.00018912930305837032, + "loss": 2.7307, + "step": 3081 + }, + { + "epoch": 0.2487289161488177, + "grad_norm": 0.7639018297195435, + "learning_rate": 0.00018912214373004364, + "loss": 2.6569, + "step": 3082 + }, + { + "epoch": 0.2488096198854007, + "grad_norm": 0.8935483694076538, + "learning_rate": 0.00018911498218056013, + "loss": 2.6897, + "step": 3083 + }, + { + "epoch": 0.2488903236219837, + "grad_norm": 0.8506368398666382, + "learning_rate": 0.00018910781841009836, + "loss": 2.778, + "step": 3084 + }, + { + "epoch": 0.24897102735856672, + "grad_norm": 0.8026999235153198, + "learning_rate": 0.0001891006524188368, + "loss": 2.7799, + "step": 3085 + }, + { + "epoch": 0.2490517310951497, + "grad_norm": 0.784637987613678, + "learning_rate": 0.00018909348420695406, + "loss": 2.673, + "step": 3086 + }, + { + "epoch": 0.2491324348317327, + "grad_norm": 0.8949337601661682, + "learning_rate": 0.00018908631377462882, + "loss": 2.7726, + "step": 3087 + }, + { + "epoch": 0.2492131385683157, + "grad_norm": 0.73841792345047, + "learning_rate": 0.00018907914112203974, + "loss": 2.7403, + "step": 3088 + }, + { + "epoch": 0.2492938423048987, + "grad_norm": 0.7305924296379089, + "learning_rate": 0.00018907196624936564, + "loss": 2.6713, + "step": 3089 + }, + { + "epoch": 0.2493745460414817, + "grad_norm": 0.7707394361495972, + "learning_rate": 0.0001890647891567853, + "loss": 2.7306, + "step": 3090 + }, + { + "epoch": 0.2494552497780647, + "grad_norm": 0.8691473603248596, + "learning_rate": 0.00018905760984447759, + "loss": 2.6775, + "step": 3091 + }, + { + "epoch": 0.24953595351464772, + "grad_norm": 0.7466028332710266, + "learning_rate": 0.00018905042831262144, + "loss": 2.7196, + "step": 3092 + }, + { + "epoch": 0.24961665725123072, + "grad_norm": 0.7785150408744812, + "learning_rate": 0.0001890432445613958, + "loss": 2.7099, + "step": 3093 + }, + { + "epoch": 0.24969736098781373, + "grad_norm": 0.7775028347969055, + "learning_rate": 0.0001890360585909798, + "loss": 2.698, + "step": 3094 + }, + { + "epoch": 0.24977806472439673, + "grad_norm": 0.829257071018219, + "learning_rate": 0.00018902887040155245, + "loss": 2.711, + "step": 3095 + }, + { + "epoch": 0.24985876846097974, + "grad_norm": 0.8492234945297241, + "learning_rate": 0.00018902167999329295, + "loss": 2.7164, + "step": 3096 + }, + { + "epoch": 0.24993947219756274, + "grad_norm": 0.7332174777984619, + "learning_rate": 0.00018901448736638045, + "loss": 2.6925, + "step": 3097 + }, + { + "epoch": 0.25002017593414577, + "grad_norm": 0.7494251728057861, + "learning_rate": 0.00018900729252099426, + "loss": 2.6899, + "step": 3098 + }, + { + "epoch": 0.25010087967072875, + "grad_norm": 0.7760747075080872, + "learning_rate": 0.00018900009545731367, + "loss": 2.6626, + "step": 3099 + }, + { + "epoch": 0.2501815834073118, + "grad_norm": 0.7270001173019409, + "learning_rate": 0.00018899289617551804, + "loss": 2.7338, + "step": 3100 + }, + { + "epoch": 0.25026228714389476, + "grad_norm": 0.7832693457603455, + "learning_rate": 0.0001889856946757868, + "loss": 2.6668, + "step": 3101 + }, + { + "epoch": 0.2503429908804778, + "grad_norm": 0.8833239674568176, + "learning_rate": 0.00018897849095829945, + "loss": 2.7219, + "step": 3102 + }, + { + "epoch": 0.25042369461706077, + "grad_norm": 0.8144814372062683, + "learning_rate": 0.0001889712850232355, + "loss": 2.724, + "step": 3103 + }, + { + "epoch": 0.2505043983536438, + "grad_norm": 0.9466180801391602, + "learning_rate": 0.0001889640768707746, + "loss": 2.7499, + "step": 3104 + }, + { + "epoch": 0.2505851020902268, + "grad_norm": 0.926292359828949, + "learning_rate": 0.00018895686650109632, + "loss": 2.7391, + "step": 3105 + }, + { + "epoch": 0.2506658058268098, + "grad_norm": 0.8214002251625061, + "learning_rate": 0.00018894965391438038, + "loss": 2.7546, + "step": 3106 + }, + { + "epoch": 0.2507465095633928, + "grad_norm": 0.9021030068397522, + "learning_rate": 0.00018894243911080655, + "loss": 2.7188, + "step": 3107 + }, + { + "epoch": 0.2508272132999758, + "grad_norm": 0.778366208076477, + "learning_rate": 0.00018893522209055465, + "loss": 2.7852, + "step": 3108 + }, + { + "epoch": 0.2509079170365588, + "grad_norm": 0.8780209422111511, + "learning_rate": 0.00018892800285380456, + "loss": 2.7344, + "step": 3109 + }, + { + "epoch": 0.2509886207731418, + "grad_norm": 0.7581839561462402, + "learning_rate": 0.00018892078140073614, + "loss": 2.6697, + "step": 3110 + }, + { + "epoch": 0.2510693245097248, + "grad_norm": 0.7818635702133179, + "learning_rate": 0.00018891355773152944, + "loss": 2.6969, + "step": 3111 + }, + { + "epoch": 0.2511500282463078, + "grad_norm": 0.7528424859046936, + "learning_rate": 0.0001889063318463644, + "loss": 2.7359, + "step": 3112 + }, + { + "epoch": 0.2512307319828908, + "grad_norm": 0.8274288773536682, + "learning_rate": 0.0001888991037454212, + "loss": 2.7124, + "step": 3113 + }, + { + "epoch": 0.2513114357194738, + "grad_norm": 0.7186813354492188, + "learning_rate": 0.00018889187342888, + "loss": 2.7037, + "step": 3114 + }, + { + "epoch": 0.2513921394560568, + "grad_norm": 0.7458071112632751, + "learning_rate": 0.00018888464089692088, + "loss": 2.7178, + "step": 3115 + }, + { + "epoch": 0.2514728431926398, + "grad_norm": 0.7814257740974426, + "learning_rate": 0.00018887740614972418, + "loss": 2.7554, + "step": 3116 + }, + { + "epoch": 0.2515535469292228, + "grad_norm": 0.7706831097602844, + "learning_rate": 0.0001888701691874702, + "loss": 2.7441, + "step": 3117 + }, + { + "epoch": 0.2516342506658058, + "grad_norm": 0.8177775740623474, + "learning_rate": 0.0001888629300103393, + "loss": 2.7257, + "step": 3118 + }, + { + "epoch": 0.25171495440238884, + "grad_norm": 0.791097104549408, + "learning_rate": 0.00018885568861851188, + "loss": 2.6937, + "step": 3119 + }, + { + "epoch": 0.2517956581389718, + "grad_norm": 0.7521430850028992, + "learning_rate": 0.00018884844501216845, + "loss": 2.7723, + "step": 3120 + }, + { + "epoch": 0.25187636187555484, + "grad_norm": 0.8119359016418457, + "learning_rate": 0.00018884119919148948, + "loss": 2.7573, + "step": 3121 + }, + { + "epoch": 0.2519570656121378, + "grad_norm": 0.7579830288887024, + "learning_rate": 0.00018883395115665562, + "loss": 2.6943, + "step": 3122 + }, + { + "epoch": 0.25203776934872085, + "grad_norm": 0.7718791365623474, + "learning_rate": 0.00018882670090784748, + "loss": 2.6911, + "step": 3123 + }, + { + "epoch": 0.25211847308530383, + "grad_norm": 0.7718087434768677, + "learning_rate": 0.00018881944844524576, + "loss": 2.7505, + "step": 3124 + }, + { + "epoch": 0.25219917682188686, + "grad_norm": 0.7696875333786011, + "learning_rate": 0.0001888121937690312, + "loss": 2.7272, + "step": 3125 + }, + { + "epoch": 0.25227988055846984, + "grad_norm": 0.8082131743431091, + "learning_rate": 0.00018880493687938464, + "loss": 2.6677, + "step": 3126 + }, + { + "epoch": 0.25236058429505287, + "grad_norm": 0.857224702835083, + "learning_rate": 0.00018879767777648686, + "loss": 2.7237, + "step": 3127 + }, + { + "epoch": 0.25244128803163585, + "grad_norm": 0.8135749697685242, + "learning_rate": 0.00018879041646051886, + "loss": 2.7298, + "step": 3128 + }, + { + "epoch": 0.2525219917682189, + "grad_norm": 0.7772457003593445, + "learning_rate": 0.0001887831529316616, + "loss": 2.7723, + "step": 3129 + }, + { + "epoch": 0.25260269550480186, + "grad_norm": 0.795555055141449, + "learning_rate": 0.00018877588719009607, + "loss": 2.7207, + "step": 3130 + }, + { + "epoch": 0.2526833992413849, + "grad_norm": 0.7677939534187317, + "learning_rate": 0.00018876861923600337, + "loss": 2.6649, + "step": 3131 + }, + { + "epoch": 0.25276410297796786, + "grad_norm": 0.7706151008605957, + "learning_rate": 0.00018876134906956464, + "loss": 2.7154, + "step": 3132 + }, + { + "epoch": 0.2528448067145509, + "grad_norm": 0.8230584859848022, + "learning_rate": 0.00018875407669096105, + "loss": 2.7871, + "step": 3133 + }, + { + "epoch": 0.2529255104511339, + "grad_norm": 0.7037158608436584, + "learning_rate": 0.0001887468021003739, + "loss": 2.669, + "step": 3134 + }, + { + "epoch": 0.2530062141877169, + "grad_norm": 0.8485400080680847, + "learning_rate": 0.00018873952529798441, + "loss": 2.7517, + "step": 3135 + }, + { + "epoch": 0.2530869179242999, + "grad_norm": 0.7803399562835693, + "learning_rate": 0.000188732246283974, + "loss": 2.6987, + "step": 3136 + }, + { + "epoch": 0.2531676216608829, + "grad_norm": 0.7884016633033752, + "learning_rate": 0.0001887249650585241, + "loss": 2.7348, + "step": 3137 + }, + { + "epoch": 0.2532483253974659, + "grad_norm": 0.7794530987739563, + "learning_rate": 0.0001887176816218161, + "loss": 2.6934, + "step": 3138 + }, + { + "epoch": 0.2533290291340489, + "grad_norm": 0.7905173301696777, + "learning_rate": 0.00018871039597403156, + "loss": 2.714, + "step": 3139 + }, + { + "epoch": 0.2534097328706319, + "grad_norm": 0.7857949137687683, + "learning_rate": 0.0001887031081153521, + "loss": 2.7591, + "step": 3140 + }, + { + "epoch": 0.25349043660721493, + "grad_norm": 0.8602419495582581, + "learning_rate": 0.00018869581804595927, + "loss": 2.7819, + "step": 3141 + }, + { + "epoch": 0.2535711403437979, + "grad_norm": 0.7845202088356018, + "learning_rate": 0.00018868852576603483, + "loss": 2.6796, + "step": 3142 + }, + { + "epoch": 0.25365184408038094, + "grad_norm": 0.7600612640380859, + "learning_rate": 0.00018868123127576048, + "loss": 2.6785, + "step": 3143 + }, + { + "epoch": 0.2537325478169639, + "grad_norm": 0.7731521725654602, + "learning_rate": 0.000188673934575318, + "loss": 2.7435, + "step": 3144 + }, + { + "epoch": 0.25381325155354695, + "grad_norm": 0.8214225172996521, + "learning_rate": 0.0001886666356648893, + "loss": 2.7264, + "step": 3145 + }, + { + "epoch": 0.2538939552901299, + "grad_norm": 0.7623010277748108, + "learning_rate": 0.00018865933454465628, + "loss": 2.73, + "step": 3146 + }, + { + "epoch": 0.25397465902671296, + "grad_norm": 0.7864633798599243, + "learning_rate": 0.00018865203121480088, + "loss": 2.7654, + "step": 3147 + }, + { + "epoch": 0.25405536276329593, + "grad_norm": 0.7654051780700684, + "learning_rate": 0.0001886447256755051, + "loss": 2.7171, + "step": 3148 + }, + { + "epoch": 0.25413606649987897, + "grad_norm": 0.8045486211776733, + "learning_rate": 0.0001886374179269511, + "loss": 2.7385, + "step": 3149 + }, + { + "epoch": 0.25421677023646194, + "grad_norm": 0.8504971861839294, + "learning_rate": 0.0001886301079693209, + "loss": 2.6719, + "step": 3150 + }, + { + "epoch": 0.254297473973045, + "grad_norm": 0.771538496017456, + "learning_rate": 0.0001886227958027967, + "loss": 2.6707, + "step": 3151 + }, + { + "epoch": 0.25437817770962795, + "grad_norm": 0.8472220301628113, + "learning_rate": 0.0001886154814275608, + "loss": 2.7201, + "step": 3152 + }, + { + "epoch": 0.254458881446211, + "grad_norm": 0.7639158368110657, + "learning_rate": 0.00018860816484379545, + "loss": 2.76, + "step": 3153 + }, + { + "epoch": 0.25453958518279396, + "grad_norm": 0.8042064905166626, + "learning_rate": 0.000188600846051683, + "loss": 2.6862, + "step": 3154 + }, + { + "epoch": 0.254620288919377, + "grad_norm": 0.7481087446212769, + "learning_rate": 0.0001885935250514059, + "loss": 2.7394, + "step": 3155 + }, + { + "epoch": 0.25470099265595997, + "grad_norm": 0.7826097011566162, + "learning_rate": 0.00018858620184314653, + "loss": 2.596, + "step": 3156 + }, + { + "epoch": 0.254781696392543, + "grad_norm": 0.7477610111236572, + "learning_rate": 0.00018857887642708743, + "loss": 2.7385, + "step": 3157 + }, + { + "epoch": 0.254862400129126, + "grad_norm": 0.7347466945648193, + "learning_rate": 0.00018857154880341122, + "loss": 2.722, + "step": 3158 + }, + { + "epoch": 0.254943103865709, + "grad_norm": 0.7853806018829346, + "learning_rate": 0.00018856421897230048, + "loss": 2.7675, + "step": 3159 + }, + { + "epoch": 0.255023807602292, + "grad_norm": 0.7497034072875977, + "learning_rate": 0.0001885568869339379, + "loss": 2.6882, + "step": 3160 + }, + { + "epoch": 0.255104511338875, + "grad_norm": 0.7932263612747192, + "learning_rate": 0.0001885495526885062, + "loss": 2.7938, + "step": 3161 + }, + { + "epoch": 0.255185215075458, + "grad_norm": 0.7776823043823242, + "learning_rate": 0.00018854221623618815, + "loss": 2.6955, + "step": 3162 + }, + { + "epoch": 0.25526591881204097, + "grad_norm": 0.7564878463745117, + "learning_rate": 0.00018853487757716666, + "loss": 2.7644, + "step": 3163 + }, + { + "epoch": 0.255346622548624, + "grad_norm": 0.836270809173584, + "learning_rate": 0.00018852753671162454, + "loss": 2.7119, + "step": 3164 + }, + { + "epoch": 0.255427326285207, + "grad_norm": 0.7540388703346252, + "learning_rate": 0.00018852019363974485, + "loss": 2.797, + "step": 3165 + }, + { + "epoch": 0.25550803002179, + "grad_norm": 0.7943860292434692, + "learning_rate": 0.0001885128483617105, + "loss": 2.7973, + "step": 3166 + }, + { + "epoch": 0.255588733758373, + "grad_norm": 0.7743831276893616, + "learning_rate": 0.00018850550087770463, + "loss": 2.7403, + "step": 3167 + }, + { + "epoch": 0.255669437494956, + "grad_norm": 0.7593801021575928, + "learning_rate": 0.00018849815118791028, + "loss": 2.7203, + "step": 3168 + }, + { + "epoch": 0.255750141231539, + "grad_norm": 0.7663586139678955, + "learning_rate": 0.00018849079929251068, + "loss": 2.7481, + "step": 3169 + }, + { + "epoch": 0.25583084496812203, + "grad_norm": 0.7218170166015625, + "learning_rate": 0.00018848344519168905, + "loss": 2.6698, + "step": 3170 + }, + { + "epoch": 0.255911548704705, + "grad_norm": 0.8374441266059875, + "learning_rate": 0.00018847608888562868, + "loss": 2.8121, + "step": 3171 + }, + { + "epoch": 0.25599225244128804, + "grad_norm": 0.7488373517990112, + "learning_rate": 0.00018846873037451286, + "loss": 2.6871, + "step": 3172 + }, + { + "epoch": 0.256072956177871, + "grad_norm": 0.7513325810432434, + "learning_rate": 0.00018846136965852505, + "loss": 2.6924, + "step": 3173 + }, + { + "epoch": 0.25615365991445405, + "grad_norm": 0.7467690706253052, + "learning_rate": 0.00018845400673784865, + "loss": 2.714, + "step": 3174 + }, + { + "epoch": 0.256234363651037, + "grad_norm": 0.7717954516410828, + "learning_rate": 0.0001884466416126672, + "loss": 2.6679, + "step": 3175 + }, + { + "epoch": 0.25631506738762005, + "grad_norm": 0.7086547613143921, + "learning_rate": 0.0001884392742831642, + "loss": 2.7046, + "step": 3176 + }, + { + "epoch": 0.25639577112420303, + "grad_norm": 0.7024885416030884, + "learning_rate": 0.00018843190474952337, + "loss": 2.6724, + "step": 3177 + }, + { + "epoch": 0.25647647486078606, + "grad_norm": 0.8376390933990479, + "learning_rate": 0.00018842453301192827, + "loss": 2.7818, + "step": 3178 + }, + { + "epoch": 0.25655717859736904, + "grad_norm": 0.8190221190452576, + "learning_rate": 0.00018841715907056265, + "loss": 2.7455, + "step": 3179 + }, + { + "epoch": 0.25663788233395207, + "grad_norm": 0.8029047846794128, + "learning_rate": 0.0001884097829256103, + "loss": 2.7102, + "step": 3180 + }, + { + "epoch": 0.25671858607053505, + "grad_norm": 0.7467923760414124, + "learning_rate": 0.00018840240457725508, + "loss": 2.7051, + "step": 3181 + }, + { + "epoch": 0.2567992898071181, + "grad_norm": 0.7850394248962402, + "learning_rate": 0.00018839502402568086, + "loss": 2.6826, + "step": 3182 + }, + { + "epoch": 0.25687999354370106, + "grad_norm": 0.7144927978515625, + "learning_rate": 0.00018838764127107155, + "loss": 2.6694, + "step": 3183 + }, + { + "epoch": 0.2569606972802841, + "grad_norm": 0.7580311894416809, + "learning_rate": 0.0001883802563136112, + "loss": 2.7191, + "step": 3184 + }, + { + "epoch": 0.25704140101686707, + "grad_norm": 0.7366482615470886, + "learning_rate": 0.0001883728691534838, + "loss": 2.7175, + "step": 3185 + }, + { + "epoch": 0.2571221047534501, + "grad_norm": 0.6961715817451477, + "learning_rate": 0.0001883654797908735, + "loss": 2.7705, + "step": 3186 + }, + { + "epoch": 0.2572028084900331, + "grad_norm": 0.7473716735839844, + "learning_rate": 0.00018835808822596445, + "loss": 2.707, + "step": 3187 + }, + { + "epoch": 0.2572835122266161, + "grad_norm": 0.8376151919364929, + "learning_rate": 0.00018835069445894087, + "loss": 2.7424, + "step": 3188 + }, + { + "epoch": 0.2573642159631991, + "grad_norm": 0.7950237393379211, + "learning_rate": 0.00018834329848998706, + "loss": 2.7593, + "step": 3189 + }, + { + "epoch": 0.2574449196997821, + "grad_norm": 0.7637122869491577, + "learning_rate": 0.0001883359003192873, + "loss": 2.6708, + "step": 3190 + }, + { + "epoch": 0.2575256234363651, + "grad_norm": 0.709516704082489, + "learning_rate": 0.00018832849994702597, + "loss": 2.6988, + "step": 3191 + }, + { + "epoch": 0.2576063271729481, + "grad_norm": 0.7465435266494751, + "learning_rate": 0.00018832109737338757, + "loss": 2.7183, + "step": 3192 + }, + { + "epoch": 0.2576870309095311, + "grad_norm": 0.7619186043739319, + "learning_rate": 0.00018831369259855653, + "loss": 2.6833, + "step": 3193 + }, + { + "epoch": 0.25776773464611413, + "grad_norm": 0.7501961588859558, + "learning_rate": 0.0001883062856227174, + "loss": 2.725, + "step": 3194 + }, + { + "epoch": 0.2578484383826971, + "grad_norm": 0.7720133066177368, + "learning_rate": 0.00018829887644605483, + "loss": 2.7988, + "step": 3195 + }, + { + "epoch": 0.25792914211928014, + "grad_norm": 0.7253942489624023, + "learning_rate": 0.00018829146506875344, + "loss": 2.6999, + "step": 3196 + }, + { + "epoch": 0.2580098458558631, + "grad_norm": 0.7759599685668945, + "learning_rate": 0.00018828405149099792, + "loss": 2.6831, + "step": 3197 + }, + { + "epoch": 0.25809054959244615, + "grad_norm": 0.7250547409057617, + "learning_rate": 0.0001882766357129731, + "loss": 2.6742, + "step": 3198 + }, + { + "epoch": 0.2581712533290291, + "grad_norm": 0.7565183043479919, + "learning_rate": 0.00018826921773486372, + "loss": 2.6777, + "step": 3199 + }, + { + "epoch": 0.25825195706561216, + "grad_norm": 0.7183675169944763, + "learning_rate": 0.0001882617975568547, + "loss": 2.6743, + "step": 3200 + }, + { + "epoch": 0.25833266080219514, + "grad_norm": 0.7021663784980774, + "learning_rate": 0.00018825437517913098, + "loss": 2.727, + "step": 3201 + }, + { + "epoch": 0.25841336453877817, + "grad_norm": 0.7406932711601257, + "learning_rate": 0.00018824695060187753, + "loss": 2.7448, + "step": 3202 + }, + { + "epoch": 0.25849406827536114, + "grad_norm": 0.7766773104667664, + "learning_rate": 0.0001882395238252794, + "loss": 2.69, + "step": 3203 + }, + { + "epoch": 0.2585747720119442, + "grad_norm": 0.7483372688293457, + "learning_rate": 0.00018823209484952164, + "loss": 2.6611, + "step": 3204 + }, + { + "epoch": 0.25865547574852715, + "grad_norm": 0.781831681728363, + "learning_rate": 0.0001882246636747895, + "loss": 2.7292, + "step": 3205 + }, + { + "epoch": 0.2587361794851102, + "grad_norm": 0.7188203930854797, + "learning_rate": 0.00018821723030126806, + "loss": 2.718, + "step": 3206 + }, + { + "epoch": 0.25881688322169316, + "grad_norm": 0.7332054972648621, + "learning_rate": 0.00018820979472914263, + "loss": 2.6492, + "step": 3207 + }, + { + "epoch": 0.2588975869582762, + "grad_norm": 0.7044041156768799, + "learning_rate": 0.00018820235695859858, + "loss": 2.7047, + "step": 3208 + }, + { + "epoch": 0.25897829069485917, + "grad_norm": 0.8651862740516663, + "learning_rate": 0.00018819491698982121, + "loss": 2.6301, + "step": 3209 + }, + { + "epoch": 0.2590589944314422, + "grad_norm": 0.8118106126785278, + "learning_rate": 0.00018818747482299598, + "loss": 2.6522, + "step": 3210 + }, + { + "epoch": 0.2591396981680252, + "grad_norm": 0.7239218354225159, + "learning_rate": 0.00018818003045830832, + "loss": 2.7058, + "step": 3211 + }, + { + "epoch": 0.2592204019046082, + "grad_norm": 0.8557687997817993, + "learning_rate": 0.00018817258389594382, + "loss": 2.7125, + "step": 3212 + }, + { + "epoch": 0.2593011056411912, + "grad_norm": 0.7685148119926453, + "learning_rate": 0.00018816513513608801, + "loss": 2.7516, + "step": 3213 + }, + { + "epoch": 0.25938180937777416, + "grad_norm": 0.7497698664665222, + "learning_rate": 0.00018815768417892664, + "loss": 2.6536, + "step": 3214 + }, + { + "epoch": 0.2594625131143572, + "grad_norm": 0.7041923403739929, + "learning_rate": 0.0001881502310246453, + "loss": 2.7031, + "step": 3215 + }, + { + "epoch": 0.2595432168509402, + "grad_norm": 0.7815428376197815, + "learning_rate": 0.00018814277567342976, + "loss": 2.7291, + "step": 3216 + }, + { + "epoch": 0.2596239205875232, + "grad_norm": 0.7285065650939941, + "learning_rate": 0.00018813531812546583, + "loss": 2.7712, + "step": 3217 + }, + { + "epoch": 0.2597046243241062, + "grad_norm": 0.7606547474861145, + "learning_rate": 0.0001881278583809394, + "loss": 2.6714, + "step": 3218 + }, + { + "epoch": 0.2597853280606892, + "grad_norm": 0.7166680097579956, + "learning_rate": 0.00018812039644003638, + "loss": 2.7147, + "step": 3219 + }, + { + "epoch": 0.2598660317972722, + "grad_norm": 0.8977978229522705, + "learning_rate": 0.0001881129323029427, + "loss": 2.7743, + "step": 3220 + }, + { + "epoch": 0.2599467355338552, + "grad_norm": 0.7447277307510376, + "learning_rate": 0.00018810546596984446, + "loss": 2.7049, + "step": 3221 + }, + { + "epoch": 0.2600274392704382, + "grad_norm": 0.7343515157699585, + "learning_rate": 0.00018809799744092768, + "loss": 2.6999, + "step": 3222 + }, + { + "epoch": 0.26010814300702123, + "grad_norm": 0.7303341627120972, + "learning_rate": 0.00018809052671637852, + "loss": 2.7222, + "step": 3223 + }, + { + "epoch": 0.2601888467436042, + "grad_norm": 0.7412950396537781, + "learning_rate": 0.00018808305379638314, + "loss": 2.6957, + "step": 3224 + }, + { + "epoch": 0.26026955048018724, + "grad_norm": 0.7495343089103699, + "learning_rate": 0.00018807557868112781, + "loss": 2.7123, + "step": 3225 + }, + { + "epoch": 0.2603502542167702, + "grad_norm": 0.8137524724006653, + "learning_rate": 0.00018806810137079886, + "loss": 2.7191, + "step": 3226 + }, + { + "epoch": 0.26043095795335325, + "grad_norm": 0.786374568939209, + "learning_rate": 0.0001880606218655826, + "loss": 2.7237, + "step": 3227 + }, + { + "epoch": 0.2605116616899362, + "grad_norm": 0.9969484806060791, + "learning_rate": 0.00018805314016566543, + "loss": 2.7603, + "step": 3228 + }, + { + "epoch": 0.26059236542651926, + "grad_norm": 0.8132432103157043, + "learning_rate": 0.00018804565627123386, + "loss": 2.6807, + "step": 3229 + }, + { + "epoch": 0.26067306916310223, + "grad_norm": 0.7604904174804688, + "learning_rate": 0.00018803817018247436, + "loss": 2.7105, + "step": 3230 + }, + { + "epoch": 0.26075377289968527, + "grad_norm": 0.743505597114563, + "learning_rate": 0.00018803068189957354, + "loss": 2.7152, + "step": 3231 + }, + { + "epoch": 0.26083447663626824, + "grad_norm": 0.7780006527900696, + "learning_rate": 0.000188023191422718, + "loss": 2.7043, + "step": 3232 + }, + { + "epoch": 0.2609151803728513, + "grad_norm": 0.7683089375495911, + "learning_rate": 0.00018801569875209447, + "loss": 2.7033, + "step": 3233 + }, + { + "epoch": 0.26099588410943425, + "grad_norm": 0.7540118098258972, + "learning_rate": 0.0001880082038878896, + "loss": 2.7121, + "step": 3234 + }, + { + "epoch": 0.2610765878460173, + "grad_norm": 0.7509592771530151, + "learning_rate": 0.00018800070683029025, + "loss": 2.6575, + "step": 3235 + }, + { + "epoch": 0.26115729158260026, + "grad_norm": 0.8015461564064026, + "learning_rate": 0.00018799320757948327, + "loss": 2.6956, + "step": 3236 + }, + { + "epoch": 0.2612379953191833, + "grad_norm": 0.7586383819580078, + "learning_rate": 0.00018798570613565553, + "loss": 2.6719, + "step": 3237 + }, + { + "epoch": 0.26131869905576627, + "grad_norm": 0.7833155989646912, + "learning_rate": 0.000187978202498994, + "loss": 2.7317, + "step": 3238 + }, + { + "epoch": 0.2613994027923493, + "grad_norm": 0.7976018786430359, + "learning_rate": 0.00018797069666968565, + "loss": 2.7514, + "step": 3239 + }, + { + "epoch": 0.2614801065289323, + "grad_norm": 0.8388968706130981, + "learning_rate": 0.00018796318864791763, + "loss": 2.6845, + "step": 3240 + }, + { + "epoch": 0.2615608102655153, + "grad_norm": 0.8082842230796814, + "learning_rate": 0.00018795567843387701, + "loss": 2.7204, + "step": 3241 + }, + { + "epoch": 0.2616415140020983, + "grad_norm": 0.7514800429344177, + "learning_rate": 0.00018794816602775094, + "loss": 2.7117, + "step": 3242 + }, + { + "epoch": 0.2617222177386813, + "grad_norm": 0.8676564693450928, + "learning_rate": 0.00018794065142972664, + "loss": 2.6596, + "step": 3243 + }, + { + "epoch": 0.2618029214752643, + "grad_norm": 0.7449865341186523, + "learning_rate": 0.0001879331346399915, + "loss": 2.7089, + "step": 3244 + }, + { + "epoch": 0.2618836252118473, + "grad_norm": 0.8020811676979065, + "learning_rate": 0.00018792561565873274, + "loss": 2.7293, + "step": 3245 + }, + { + "epoch": 0.2619643289484303, + "grad_norm": 0.7961642146110535, + "learning_rate": 0.00018791809448613783, + "loss": 2.7269, + "step": 3246 + }, + { + "epoch": 0.26204503268501333, + "grad_norm": 0.7842351198196411, + "learning_rate": 0.00018791057112239415, + "loss": 2.6773, + "step": 3247 + }, + { + "epoch": 0.2621257364215963, + "grad_norm": 0.7494246959686279, + "learning_rate": 0.00018790304556768925, + "loss": 2.7317, + "step": 3248 + }, + { + "epoch": 0.26220644015817934, + "grad_norm": 0.7822836637496948, + "learning_rate": 0.0001878955178222107, + "loss": 2.6834, + "step": 3249 + }, + { + "epoch": 0.2622871438947623, + "grad_norm": 0.8432494401931763, + "learning_rate": 0.00018788798788614607, + "loss": 2.7048, + "step": 3250 + }, + { + "epoch": 0.26236784763134535, + "grad_norm": 0.9599446058273315, + "learning_rate": 0.000187880455759683, + "loss": 2.7793, + "step": 3251 + }, + { + "epoch": 0.26244855136792833, + "grad_norm": 0.8097226023674011, + "learning_rate": 0.00018787292144300928, + "loss": 2.7177, + "step": 3252 + }, + { + "epoch": 0.26252925510451136, + "grad_norm": 0.8423499464988708, + "learning_rate": 0.00018786538493631265, + "loss": 2.7265, + "step": 3253 + }, + { + "epoch": 0.26260995884109434, + "grad_norm": 0.7388847470283508, + "learning_rate": 0.00018785784623978095, + "loss": 2.6778, + "step": 3254 + }, + { + "epoch": 0.26269066257767737, + "grad_norm": 0.766368567943573, + "learning_rate": 0.0001878503053536021, + "loss": 2.654, + "step": 3255 + }, + { + "epoch": 0.26277136631426035, + "grad_norm": 0.8181266188621521, + "learning_rate": 0.00018784276227796394, + "loss": 2.7568, + "step": 3256 + }, + { + "epoch": 0.2628520700508434, + "grad_norm": 0.8235312104225159, + "learning_rate": 0.00018783521701305452, + "loss": 2.7317, + "step": 3257 + }, + { + "epoch": 0.26293277378742635, + "grad_norm": 0.7103183269500732, + "learning_rate": 0.00018782766955906195, + "loss": 2.6919, + "step": 3258 + }, + { + "epoch": 0.2630134775240094, + "grad_norm": 0.7202538251876831, + "learning_rate": 0.0001878201199161742, + "loss": 2.7179, + "step": 3259 + }, + { + "epoch": 0.26309418126059236, + "grad_norm": 0.8402286171913147, + "learning_rate": 0.00018781256808457952, + "loss": 2.7789, + "step": 3260 + }, + { + "epoch": 0.2631748849971754, + "grad_norm": 0.8136829137802124, + "learning_rate": 0.00018780501406446613, + "loss": 2.6872, + "step": 3261 + }, + { + "epoch": 0.26325558873375837, + "grad_norm": 0.8017000555992126, + "learning_rate": 0.00018779745785602224, + "loss": 2.7527, + "step": 3262 + }, + { + "epoch": 0.2633362924703414, + "grad_norm": 0.7880774140357971, + "learning_rate": 0.00018778989945943619, + "loss": 2.7348, + "step": 3263 + }, + { + "epoch": 0.2634169962069244, + "grad_norm": 0.7402438521385193, + "learning_rate": 0.00018778233887489635, + "loss": 2.6946, + "step": 3264 + }, + { + "epoch": 0.26349769994350736, + "grad_norm": 0.7450907230377197, + "learning_rate": 0.0001877747761025912, + "loss": 2.7502, + "step": 3265 + }, + { + "epoch": 0.2635784036800904, + "grad_norm": 0.7504056692123413, + "learning_rate": 0.00018776721114270917, + "loss": 2.832, + "step": 3266 + }, + { + "epoch": 0.26365910741667337, + "grad_norm": 0.7710226774215698, + "learning_rate": 0.00018775964399543878, + "loss": 2.6895, + "step": 3267 + }, + { + "epoch": 0.2637398111532564, + "grad_norm": 0.769927978515625, + "learning_rate": 0.00018775207466096867, + "loss": 2.6801, + "step": 3268 + }, + { + "epoch": 0.2638205148898394, + "grad_norm": 0.7210869193077087, + "learning_rate": 0.0001877445031394875, + "loss": 2.6966, + "step": 3269 + }, + { + "epoch": 0.2639012186264224, + "grad_norm": 0.7731119990348816, + "learning_rate": 0.00018773692943118393, + "loss": 2.6965, + "step": 3270 + }, + { + "epoch": 0.2639819223630054, + "grad_norm": 0.7539728283882141, + "learning_rate": 0.00018772935353624672, + "loss": 2.753, + "step": 3271 + }, + { + "epoch": 0.2640626260995884, + "grad_norm": 0.7993821501731873, + "learning_rate": 0.00018772177545486472, + "loss": 2.7177, + "step": 3272 + }, + { + "epoch": 0.2641433298361714, + "grad_norm": 0.7880005240440369, + "learning_rate": 0.00018771419518722672, + "loss": 2.6854, + "step": 3273 + }, + { + "epoch": 0.2642240335727544, + "grad_norm": 0.8079188466072083, + "learning_rate": 0.0001877066127335217, + "loss": 2.734, + "step": 3274 + }, + { + "epoch": 0.2643047373093374, + "grad_norm": 0.8241428732872009, + "learning_rate": 0.00018769902809393865, + "loss": 2.7156, + "step": 3275 + }, + { + "epoch": 0.26438544104592043, + "grad_norm": 0.8007158041000366, + "learning_rate": 0.00018769144126866657, + "loss": 2.693, + "step": 3276 + }, + { + "epoch": 0.2644661447825034, + "grad_norm": 0.8360451459884644, + "learning_rate": 0.00018768385225789456, + "loss": 2.6919, + "step": 3277 + }, + { + "epoch": 0.26454684851908644, + "grad_norm": 0.7596627473831177, + "learning_rate": 0.00018767626106181172, + "loss": 2.7861, + "step": 3278 + }, + { + "epoch": 0.2646275522556694, + "grad_norm": 0.7469248175621033, + "learning_rate": 0.00018766866768060727, + "loss": 2.7305, + "step": 3279 + }, + { + "epoch": 0.26470825599225245, + "grad_norm": 0.7103936076164246, + "learning_rate": 0.00018766107211447045, + "loss": 2.6456, + "step": 3280 + }, + { + "epoch": 0.2647889597288354, + "grad_norm": 0.7595266103744507, + "learning_rate": 0.00018765347436359056, + "loss": 2.7235, + "step": 3281 + }, + { + "epoch": 0.26486966346541846, + "grad_norm": 0.786648154258728, + "learning_rate": 0.00018764587442815698, + "loss": 2.7182, + "step": 3282 + }, + { + "epoch": 0.26495036720200144, + "grad_norm": 0.7152618169784546, + "learning_rate": 0.00018763827230835908, + "loss": 2.6842, + "step": 3283 + }, + { + "epoch": 0.26503107093858447, + "grad_norm": 0.89169842004776, + "learning_rate": 0.00018763066800438636, + "loss": 2.7661, + "step": 3284 + }, + { + "epoch": 0.26511177467516744, + "grad_norm": 0.8148171305656433, + "learning_rate": 0.00018762306151642833, + "loss": 2.7264, + "step": 3285 + }, + { + "epoch": 0.2651924784117505, + "grad_norm": 0.8070533871650696, + "learning_rate": 0.00018761545284467454, + "loss": 2.7425, + "step": 3286 + }, + { + "epoch": 0.26527318214833345, + "grad_norm": 0.8536118268966675, + "learning_rate": 0.00018760784198931465, + "loss": 2.702, + "step": 3287 + }, + { + "epoch": 0.2653538858849165, + "grad_norm": 0.7422329783439636, + "learning_rate": 0.00018760022895053833, + "loss": 2.6913, + "step": 3288 + }, + { + "epoch": 0.26543458962149946, + "grad_norm": 0.7415527105331421, + "learning_rate": 0.0001875926137285353, + "loss": 2.6472, + "step": 3289 + }, + { + "epoch": 0.2655152933580825, + "grad_norm": 0.8432031273841858, + "learning_rate": 0.00018758499632349538, + "loss": 2.7506, + "step": 3290 + }, + { + "epoch": 0.26559599709466547, + "grad_norm": 0.8113259077072144, + "learning_rate": 0.0001875773767356084, + "loss": 2.6866, + "step": 3291 + }, + { + "epoch": 0.2656767008312485, + "grad_norm": 0.7898122668266296, + "learning_rate": 0.00018756975496506424, + "loss": 2.6516, + "step": 3292 + }, + { + "epoch": 0.2657574045678315, + "grad_norm": 0.7627275586128235, + "learning_rate": 0.0001875621310120529, + "loss": 2.7065, + "step": 3293 + }, + { + "epoch": 0.2658381083044145, + "grad_norm": 0.8227291107177734, + "learning_rate": 0.00018755450487676435, + "loss": 2.7614, + "step": 3294 + }, + { + "epoch": 0.2659188120409975, + "grad_norm": 0.8162109851837158, + "learning_rate": 0.00018754687655938868, + "loss": 2.7924, + "step": 3295 + }, + { + "epoch": 0.2659995157775805, + "grad_norm": 0.7231846451759338, + "learning_rate": 0.00018753924606011602, + "loss": 2.7505, + "step": 3296 + }, + { + "epoch": 0.2660802195141635, + "grad_norm": 0.8635944724082947, + "learning_rate": 0.00018753161337913647, + "loss": 2.7505, + "step": 3297 + }, + { + "epoch": 0.26616092325074653, + "grad_norm": 0.8131890892982483, + "learning_rate": 0.00018752397851664031, + "loss": 2.7872, + "step": 3298 + }, + { + "epoch": 0.2662416269873295, + "grad_norm": 0.7336695790290833, + "learning_rate": 0.00018751634147281786, + "loss": 2.7517, + "step": 3299 + }, + { + "epoch": 0.26632233072391254, + "grad_norm": 0.7541754841804504, + "learning_rate": 0.00018750870224785939, + "loss": 2.7807, + "step": 3300 + }, + { + "epoch": 0.2664030344604955, + "grad_norm": 0.9347110390663147, + "learning_rate": 0.0001875010608419553, + "loss": 2.6954, + "step": 3301 + }, + { + "epoch": 0.26648373819707855, + "grad_norm": 0.7591213583946228, + "learning_rate": 0.00018749341725529604, + "loss": 2.7019, + "step": 3302 + }, + { + "epoch": 0.2665644419336615, + "grad_norm": 0.811527669429779, + "learning_rate": 0.00018748577148807211, + "loss": 2.7123, + "step": 3303 + }, + { + "epoch": 0.26664514567024455, + "grad_norm": 0.7419980764389038, + "learning_rate": 0.00018747812354047408, + "loss": 2.7383, + "step": 3304 + }, + { + "epoch": 0.26672584940682753, + "grad_norm": 0.7801192402839661, + "learning_rate": 0.00018747047341269256, + "loss": 2.7245, + "step": 3305 + }, + { + "epoch": 0.26680655314341056, + "grad_norm": 0.7392756938934326, + "learning_rate": 0.00018746282110491816, + "loss": 2.6992, + "step": 3306 + }, + { + "epoch": 0.26688725687999354, + "grad_norm": 0.7085927724838257, + "learning_rate": 0.00018745516661734161, + "loss": 2.739, + "step": 3307 + }, + { + "epoch": 0.26696796061657657, + "grad_norm": 0.7218676209449768, + "learning_rate": 0.00018744750995015373, + "loss": 2.7091, + "step": 3308 + }, + { + "epoch": 0.26704866435315955, + "grad_norm": 0.847872257232666, + "learning_rate": 0.0001874398511035453, + "loss": 2.699, + "step": 3309 + }, + { + "epoch": 0.2671293680897426, + "grad_norm": 0.8280770778656006, + "learning_rate": 0.00018743219007770723, + "loss": 2.763, + "step": 3310 + }, + { + "epoch": 0.26721007182632556, + "grad_norm": 0.7271165251731873, + "learning_rate": 0.0001874245268728304, + "loss": 2.7219, + "step": 3311 + }, + { + "epoch": 0.2672907755629086, + "grad_norm": 0.7342363595962524, + "learning_rate": 0.00018741686148910586, + "loss": 2.6765, + "step": 3312 + }, + { + "epoch": 0.26737147929949157, + "grad_norm": 0.7260174751281738, + "learning_rate": 0.0001874091939267246, + "loss": 2.7003, + "step": 3313 + }, + { + "epoch": 0.2674521830360746, + "grad_norm": 0.742494523525238, + "learning_rate": 0.00018740152418587775, + "loss": 2.7371, + "step": 3314 + }, + { + "epoch": 0.2675328867726576, + "grad_norm": 0.7238131165504456, + "learning_rate": 0.00018739385226675646, + "loss": 2.7486, + "step": 3315 + }, + { + "epoch": 0.26761359050924055, + "grad_norm": 0.7329363226890564, + "learning_rate": 0.0001873861781695519, + "loss": 2.6414, + "step": 3316 + }, + { + "epoch": 0.2676942942458236, + "grad_norm": 0.7078117728233337, + "learning_rate": 0.00018737850189445534, + "loss": 2.7271, + "step": 3317 + }, + { + "epoch": 0.26777499798240656, + "grad_norm": 0.7945309281349182, + "learning_rate": 0.00018737082344165814, + "loss": 2.7323, + "step": 3318 + }, + { + "epoch": 0.2678557017189896, + "grad_norm": 0.7510890364646912, + "learning_rate": 0.0001873631428113516, + "loss": 2.6563, + "step": 3319 + }, + { + "epoch": 0.26793640545557257, + "grad_norm": 0.7790820002555847, + "learning_rate": 0.0001873554600037272, + "loss": 2.7445, + "step": 3320 + }, + { + "epoch": 0.2680171091921556, + "grad_norm": 0.7689393162727356, + "learning_rate": 0.00018734777501897636, + "loss": 2.669, + "step": 3321 + }, + { + "epoch": 0.2680978129287386, + "grad_norm": 0.8227118253707886, + "learning_rate": 0.00018734008785729065, + "loss": 2.7279, + "step": 3322 + }, + { + "epoch": 0.2681785166653216, + "grad_norm": 0.7551290392875671, + "learning_rate": 0.00018733239851886162, + "loss": 2.6864, + "step": 3323 + }, + { + "epoch": 0.2682592204019046, + "grad_norm": 0.8572004437446594, + "learning_rate": 0.00018732470700388097, + "loss": 2.8159, + "step": 3324 + }, + { + "epoch": 0.2683399241384876, + "grad_norm": 0.7509044408798218, + "learning_rate": 0.00018731701331254033, + "loss": 2.7698, + "step": 3325 + }, + { + "epoch": 0.2684206278750706, + "grad_norm": 0.8474129438400269, + "learning_rate": 0.00018730931744503148, + "loss": 2.6745, + "step": 3326 + }, + { + "epoch": 0.2685013316116536, + "grad_norm": 0.8310953378677368, + "learning_rate": 0.00018730161940154618, + "loss": 2.712, + "step": 3327 + }, + { + "epoch": 0.2685820353482366, + "grad_norm": 0.8820717334747314, + "learning_rate": 0.00018729391918227632, + "loss": 2.7776, + "step": 3328 + }, + { + "epoch": 0.26866273908481964, + "grad_norm": 0.8827663064002991, + "learning_rate": 0.00018728621678741384, + "loss": 2.7115, + "step": 3329 + }, + { + "epoch": 0.2687434428214026, + "grad_norm": 0.7896323800086975, + "learning_rate": 0.00018727851221715064, + "loss": 2.6799, + "step": 3330 + }, + { + "epoch": 0.26882414655798564, + "grad_norm": 0.7775614261627197, + "learning_rate": 0.0001872708054716788, + "loss": 2.7021, + "step": 3331 + }, + { + "epoch": 0.2689048502945686, + "grad_norm": 0.8150187134742737, + "learning_rate": 0.0001872630965511903, + "loss": 2.679, + "step": 3332 + }, + { + "epoch": 0.26898555403115165, + "grad_norm": 0.7821844220161438, + "learning_rate": 0.00018725538545587736, + "loss": 2.7067, + "step": 3333 + }, + { + "epoch": 0.26906625776773463, + "grad_norm": 0.8390234112739563, + "learning_rate": 0.00018724767218593216, + "loss": 2.7133, + "step": 3334 + }, + { + "epoch": 0.26914696150431766, + "grad_norm": 0.8150694370269775, + "learning_rate": 0.00018723995674154687, + "loss": 2.7022, + "step": 3335 + }, + { + "epoch": 0.26922766524090064, + "grad_norm": 0.7473872900009155, + "learning_rate": 0.0001872322391229138, + "loss": 2.7268, + "step": 3336 + }, + { + "epoch": 0.26930836897748367, + "grad_norm": 0.7591951489448547, + "learning_rate": 0.0001872245193302253, + "loss": 2.7516, + "step": 3337 + }, + { + "epoch": 0.26938907271406665, + "grad_norm": 0.7914662957191467, + "learning_rate": 0.00018721679736367382, + "loss": 2.6613, + "step": 3338 + }, + { + "epoch": 0.2694697764506497, + "grad_norm": 0.7823428511619568, + "learning_rate": 0.00018720907322345172, + "loss": 2.6661, + "step": 3339 + }, + { + "epoch": 0.26955048018723266, + "grad_norm": 0.8428264260292053, + "learning_rate": 0.00018720134690975156, + "loss": 2.672, + "step": 3340 + }, + { + "epoch": 0.2696311839238157, + "grad_norm": 0.71320641040802, + "learning_rate": 0.00018719361842276587, + "loss": 2.7326, + "step": 3341 + }, + { + "epoch": 0.26971188766039866, + "grad_norm": 0.7972821593284607, + "learning_rate": 0.00018718588776268731, + "loss": 2.7182, + "step": 3342 + }, + { + "epoch": 0.2697925913969817, + "grad_norm": 0.7924500107765198, + "learning_rate": 0.0001871781549297085, + "loss": 2.7308, + "step": 3343 + }, + { + "epoch": 0.2698732951335647, + "grad_norm": 0.7668356895446777, + "learning_rate": 0.0001871704199240222, + "loss": 2.678, + "step": 3344 + }, + { + "epoch": 0.2699539988701477, + "grad_norm": 0.866973876953125, + "learning_rate": 0.00018716268274582114, + "loss": 2.7802, + "step": 3345 + }, + { + "epoch": 0.2700347026067307, + "grad_norm": 0.7709557414054871, + "learning_rate": 0.0001871549433952982, + "loss": 2.7418, + "step": 3346 + }, + { + "epoch": 0.2701154063433137, + "grad_norm": 0.7707573771476746, + "learning_rate": 0.00018714720187264626, + "loss": 2.7486, + "step": 3347 + }, + { + "epoch": 0.2701961100798967, + "grad_norm": 0.8007768392562866, + "learning_rate": 0.00018713945817805822, + "loss": 2.7106, + "step": 3348 + }, + { + "epoch": 0.2702768138164797, + "grad_norm": 0.7239583134651184, + "learning_rate": 0.0001871317123117271, + "loss": 2.7209, + "step": 3349 + }, + { + "epoch": 0.2703575175530627, + "grad_norm": 0.775104820728302, + "learning_rate": 0.00018712396427384594, + "loss": 2.6503, + "step": 3350 + }, + { + "epoch": 0.27043822128964573, + "grad_norm": 0.7492741346359253, + "learning_rate": 0.0001871162140646079, + "loss": 2.699, + "step": 3351 + }, + { + "epoch": 0.2705189250262287, + "grad_norm": 0.7550846338272095, + "learning_rate": 0.00018710846168420604, + "loss": 2.7458, + "step": 3352 + }, + { + "epoch": 0.27059962876281174, + "grad_norm": 0.807996928691864, + "learning_rate": 0.0001871007071328336, + "loss": 2.7604, + "step": 3353 + }, + { + "epoch": 0.2706803324993947, + "grad_norm": 0.7381845116615295, + "learning_rate": 0.00018709295041068386, + "loss": 2.6833, + "step": 3354 + }, + { + "epoch": 0.27076103623597775, + "grad_norm": 0.7542420625686646, + "learning_rate": 0.00018708519151795016, + "loss": 2.6462, + "step": 3355 + }, + { + "epoch": 0.2708417399725607, + "grad_norm": 0.7675846219062805, + "learning_rate": 0.00018707743045482582, + "loss": 2.7068, + "step": 3356 + }, + { + "epoch": 0.27092244370914376, + "grad_norm": 0.7437357902526855, + "learning_rate": 0.0001870696672215043, + "loss": 2.73, + "step": 3357 + }, + { + "epoch": 0.27100314744572673, + "grad_norm": 0.7880852222442627, + "learning_rate": 0.00018706190181817903, + "loss": 2.759, + "step": 3358 + }, + { + "epoch": 0.27108385118230977, + "grad_norm": 0.7403178811073303, + "learning_rate": 0.00018705413424504363, + "loss": 2.7538, + "step": 3359 + }, + { + "epoch": 0.27116455491889274, + "grad_norm": 0.7601225972175598, + "learning_rate": 0.00018704636450229164, + "loss": 2.7331, + "step": 3360 + }, + { + "epoch": 0.2712452586554758, + "grad_norm": 0.7810701727867126, + "learning_rate": 0.0001870385925901167, + "loss": 2.7736, + "step": 3361 + }, + { + "epoch": 0.27132596239205875, + "grad_norm": 0.8934530019760132, + "learning_rate": 0.0001870308185087125, + "loss": 2.7214, + "step": 3362 + }, + { + "epoch": 0.2714066661286418, + "grad_norm": 0.7468441128730774, + "learning_rate": 0.0001870230422582728, + "loss": 2.6957, + "step": 3363 + }, + { + "epoch": 0.27148736986522476, + "grad_norm": 0.7643293142318726, + "learning_rate": 0.00018701526383899144, + "loss": 2.6773, + "step": 3364 + }, + { + "epoch": 0.2715680736018078, + "grad_norm": 0.7602033615112305, + "learning_rate": 0.0001870074832510622, + "loss": 2.7095, + "step": 3365 + }, + { + "epoch": 0.27164877733839077, + "grad_norm": 0.772065281867981, + "learning_rate": 0.00018699970049467908, + "loss": 2.6753, + "step": 3366 + }, + { + "epoch": 0.27172948107497374, + "grad_norm": 0.7718359231948853, + "learning_rate": 0.00018699191557003598, + "loss": 2.6857, + "step": 3367 + }, + { + "epoch": 0.2718101848115568, + "grad_norm": 0.8207093477249146, + "learning_rate": 0.00018698412847732693, + "loss": 2.7549, + "step": 3368 + }, + { + "epoch": 0.27189088854813975, + "grad_norm": 0.7393590807914734, + "learning_rate": 0.00018697633921674605, + "loss": 2.6884, + "step": 3369 + }, + { + "epoch": 0.2719715922847228, + "grad_norm": 0.7955869436264038, + "learning_rate": 0.0001869685477884874, + "loss": 2.708, + "step": 3370 + }, + { + "epoch": 0.27205229602130576, + "grad_norm": 0.7392188906669617, + "learning_rate": 0.00018696075419274527, + "loss": 2.717, + "step": 3371 + }, + { + "epoch": 0.2721329997578888, + "grad_norm": 0.800204873085022, + "learning_rate": 0.00018695295842971376, + "loss": 2.7184, + "step": 3372 + }, + { + "epoch": 0.27221370349447177, + "grad_norm": 0.8195740580558777, + "learning_rate": 0.00018694516049958725, + "loss": 2.6865, + "step": 3373 + }, + { + "epoch": 0.2722944072310548, + "grad_norm": 0.8617578148841858, + "learning_rate": 0.00018693736040256007, + "loss": 2.7098, + "step": 3374 + }, + { + "epoch": 0.2723751109676378, + "grad_norm": 0.8184413909912109, + "learning_rate": 0.00018692955813882662, + "loss": 2.7449, + "step": 3375 + }, + { + "epoch": 0.2724558147042208, + "grad_norm": 0.990275502204895, + "learning_rate": 0.00018692175370858133, + "loss": 2.7891, + "step": 3376 + }, + { + "epoch": 0.2725365184408038, + "grad_norm": 0.7857810854911804, + "learning_rate": 0.0001869139471120187, + "loss": 2.6884, + "step": 3377 + }, + { + "epoch": 0.2726172221773868, + "grad_norm": 0.8040915131568909, + "learning_rate": 0.00018690613834933335, + "loss": 2.7047, + "step": 3378 + }, + { + "epoch": 0.2726979259139698, + "grad_norm": 0.7512348294258118, + "learning_rate": 0.00018689832742071983, + "loss": 2.6898, + "step": 3379 + }, + { + "epoch": 0.27277862965055283, + "grad_norm": 0.6781859397888184, + "learning_rate": 0.00018689051432637288, + "loss": 2.6396, + "step": 3380 + }, + { + "epoch": 0.2728593333871358, + "grad_norm": 0.7858247756958008, + "learning_rate": 0.00018688269906648716, + "loss": 2.6785, + "step": 3381 + }, + { + "epoch": 0.27294003712371884, + "grad_norm": 0.7342140674591064, + "learning_rate": 0.00018687488164125744, + "loss": 2.6778, + "step": 3382 + }, + { + "epoch": 0.2730207408603018, + "grad_norm": 0.8113372921943665, + "learning_rate": 0.00018686706205087858, + "loss": 2.6982, + "step": 3383 + }, + { + "epoch": 0.27310144459688485, + "grad_norm": 0.7904205918312073, + "learning_rate": 0.0001868592402955455, + "loss": 2.7891, + "step": 3384 + }, + { + "epoch": 0.2731821483334678, + "grad_norm": 0.7274135947227478, + "learning_rate": 0.00018685141637545308, + "loss": 2.6908, + "step": 3385 + }, + { + "epoch": 0.27326285207005085, + "grad_norm": 0.7675744295120239, + "learning_rate": 0.0001868435902907963, + "loss": 2.6987, + "step": 3386 + }, + { + "epoch": 0.27334355580663383, + "grad_norm": 0.8085030913352966, + "learning_rate": 0.00018683576204177026, + "loss": 2.7798, + "step": 3387 + }, + { + "epoch": 0.27342425954321686, + "grad_norm": 0.7498135566711426, + "learning_rate": 0.00018682793162857006, + "loss": 2.7216, + "step": 3388 + }, + { + "epoch": 0.27350496327979984, + "grad_norm": 0.900741696357727, + "learning_rate": 0.0001868200990513908, + "loss": 2.6871, + "step": 3389 + }, + { + "epoch": 0.27358566701638287, + "grad_norm": 0.7948571443557739, + "learning_rate": 0.00018681226431042772, + "loss": 2.6985, + "step": 3390 + }, + { + "epoch": 0.27366637075296585, + "grad_norm": 0.8739100098609924, + "learning_rate": 0.00018680442740587612, + "loss": 2.6922, + "step": 3391 + }, + { + "epoch": 0.2737470744895489, + "grad_norm": 0.730084240436554, + "learning_rate": 0.00018679658833793125, + "loss": 2.7029, + "step": 3392 + }, + { + "epoch": 0.27382777822613186, + "grad_norm": 0.7560603022575378, + "learning_rate": 0.00018678874710678853, + "loss": 2.7429, + "step": 3393 + }, + { + "epoch": 0.2739084819627149, + "grad_norm": 0.8331460356712341, + "learning_rate": 0.00018678090371264334, + "loss": 2.7157, + "step": 3394 + }, + { + "epoch": 0.27398918569929787, + "grad_norm": 0.8070168495178223, + "learning_rate": 0.00018677305815569122, + "loss": 2.7629, + "step": 3395 + }, + { + "epoch": 0.2740698894358809, + "grad_norm": 0.7922534346580505, + "learning_rate": 0.00018676521043612762, + "loss": 2.7159, + "step": 3396 + }, + { + "epoch": 0.2741505931724639, + "grad_norm": 0.7838901281356812, + "learning_rate": 0.0001867573605541482, + "loss": 2.6721, + "step": 3397 + }, + { + "epoch": 0.2742312969090469, + "grad_norm": 0.8912512063980103, + "learning_rate": 0.00018674950850994856, + "loss": 2.7243, + "step": 3398 + }, + { + "epoch": 0.2743120006456299, + "grad_norm": 0.7205448150634766, + "learning_rate": 0.0001867416543037244, + "loss": 2.7152, + "step": 3399 + }, + { + "epoch": 0.2743927043822129, + "grad_norm": 0.6992877721786499, + "learning_rate": 0.00018673379793567146, + "loss": 2.7183, + "step": 3400 + }, + { + "epoch": 0.2744734081187959, + "grad_norm": 0.8009448051452637, + "learning_rate": 0.00018672593940598556, + "loss": 2.715, + "step": 3401 + }, + { + "epoch": 0.2745541118553789, + "grad_norm": 0.7812647819519043, + "learning_rate": 0.0001867180787148626, + "loss": 2.7579, + "step": 3402 + }, + { + "epoch": 0.2746348155919619, + "grad_norm": 0.7300555109977722, + "learning_rate": 0.00018671021586249835, + "loss": 2.694, + "step": 3403 + }, + { + "epoch": 0.27471551932854493, + "grad_norm": 0.8082736134529114, + "learning_rate": 0.00018670235084908887, + "loss": 2.768, + "step": 3404 + }, + { + "epoch": 0.2747962230651279, + "grad_norm": 0.7729581594467163, + "learning_rate": 0.0001866944836748302, + "loss": 2.7256, + "step": 3405 + }, + { + "epoch": 0.27487692680171094, + "grad_norm": 0.8113458752632141, + "learning_rate": 0.00018668661433991835, + "loss": 2.6692, + "step": 3406 + }, + { + "epoch": 0.2749576305382939, + "grad_norm": 0.7757337689399719, + "learning_rate": 0.00018667874284454948, + "loss": 2.6769, + "step": 3407 + }, + { + "epoch": 0.27503833427487695, + "grad_norm": 0.7896093726158142, + "learning_rate": 0.00018667086918891976, + "loss": 2.7118, + "step": 3408 + }, + { + "epoch": 0.2751190380114599, + "grad_norm": 0.7764071822166443, + "learning_rate": 0.00018666299337322543, + "loss": 2.7284, + "step": 3409 + }, + { + "epoch": 0.27519974174804296, + "grad_norm": 0.794815182685852, + "learning_rate": 0.00018665511539766273, + "loss": 2.7232, + "step": 3410 + }, + { + "epoch": 0.27528044548462594, + "grad_norm": 0.8134122490882874, + "learning_rate": 0.0001866472352624281, + "loss": 2.7023, + "step": 3411 + }, + { + "epoch": 0.27536114922120897, + "grad_norm": 0.7654025554656982, + "learning_rate": 0.00018663935296771782, + "loss": 2.7002, + "step": 3412 + }, + { + "epoch": 0.27544185295779194, + "grad_norm": 0.6930806636810303, + "learning_rate": 0.0001866314685137284, + "loss": 2.6764, + "step": 3413 + }, + { + "epoch": 0.275522556694375, + "grad_norm": 0.7535184621810913, + "learning_rate": 0.00018662358190065631, + "loss": 2.6657, + "step": 3414 + }, + { + "epoch": 0.27560326043095795, + "grad_norm": 0.7775620818138123, + "learning_rate": 0.00018661569312869816, + "loss": 2.6931, + "step": 3415 + }, + { + "epoch": 0.275683964167541, + "grad_norm": 0.7209072113037109, + "learning_rate": 0.00018660780219805048, + "loss": 2.7293, + "step": 3416 + }, + { + "epoch": 0.27576466790412396, + "grad_norm": 0.7182055711746216, + "learning_rate": 0.00018659990910891, + "loss": 2.6561, + "step": 3417 + }, + { + "epoch": 0.27584537164070694, + "grad_norm": 0.7130969166755676, + "learning_rate": 0.00018659201386147338, + "loss": 2.7156, + "step": 3418 + }, + { + "epoch": 0.27592607537728997, + "grad_norm": 0.7296265959739685, + "learning_rate": 0.00018658411645593745, + "loss": 2.6894, + "step": 3419 + }, + { + "epoch": 0.27600677911387295, + "grad_norm": 0.7707972526550293, + "learning_rate": 0.000186576216892499, + "loss": 2.7528, + "step": 3420 + }, + { + "epoch": 0.276087482850456, + "grad_norm": 0.6945170164108276, + "learning_rate": 0.0001865683151713549, + "loss": 2.6762, + "step": 3421 + }, + { + "epoch": 0.27616818658703896, + "grad_norm": 0.7664114236831665, + "learning_rate": 0.0001865604112927021, + "loss": 2.7212, + "step": 3422 + }, + { + "epoch": 0.276248890323622, + "grad_norm": 0.6950399875640869, + "learning_rate": 0.0001865525052567376, + "loss": 2.7035, + "step": 3423 + }, + { + "epoch": 0.27632959406020496, + "grad_norm": 0.7307506799697876, + "learning_rate": 0.00018654459706365838, + "loss": 2.7296, + "step": 3424 + }, + { + "epoch": 0.276410297796788, + "grad_norm": 0.720912516117096, + "learning_rate": 0.0001865366867136616, + "loss": 2.6884, + "step": 3425 + }, + { + "epoch": 0.276491001533371, + "grad_norm": 0.7581072449684143, + "learning_rate": 0.00018652877420694436, + "loss": 2.705, + "step": 3426 + }, + { + "epoch": 0.276571705269954, + "grad_norm": 0.7473136186599731, + "learning_rate": 0.0001865208595437039, + "loss": 2.7316, + "step": 3427 + }, + { + "epoch": 0.276652409006537, + "grad_norm": 0.7272855639457703, + "learning_rate": 0.00018651294272413745, + "loss": 2.6834, + "step": 3428 + }, + { + "epoch": 0.27673311274312, + "grad_norm": 0.7046366930007935, + "learning_rate": 0.0001865050237484423, + "loss": 2.6491, + "step": 3429 + }, + { + "epoch": 0.276813816479703, + "grad_norm": 0.7521376609802246, + "learning_rate": 0.00018649710261681586, + "loss": 2.708, + "step": 3430 + }, + { + "epoch": 0.276894520216286, + "grad_norm": 0.7372453808784485, + "learning_rate": 0.0001864891793294555, + "loss": 2.682, + "step": 3431 + }, + { + "epoch": 0.276975223952869, + "grad_norm": 0.7381749749183655, + "learning_rate": 0.0001864812538865587, + "loss": 2.7526, + "step": 3432 + }, + { + "epoch": 0.27705592768945203, + "grad_norm": 0.7891514301300049, + "learning_rate": 0.00018647332628832298, + "loss": 2.6904, + "step": 3433 + }, + { + "epoch": 0.277136631426035, + "grad_norm": 0.7942724823951721, + "learning_rate": 0.00018646539653494596, + "loss": 2.7873, + "step": 3434 + }, + { + "epoch": 0.27721733516261804, + "grad_norm": 0.7365398406982422, + "learning_rate": 0.0001864574646266252, + "loss": 2.6684, + "step": 3435 + }, + { + "epoch": 0.277298038899201, + "grad_norm": 0.7802249193191528, + "learning_rate": 0.00018644953056355846, + "loss": 2.7152, + "step": 3436 + }, + { + "epoch": 0.27737874263578405, + "grad_norm": 0.7801448106765747, + "learning_rate": 0.0001864415943459434, + "loss": 2.7034, + "step": 3437 + }, + { + "epoch": 0.277459446372367, + "grad_norm": 0.7722738981246948, + "learning_rate": 0.00018643365597397786, + "loss": 2.7135, + "step": 3438 + }, + { + "epoch": 0.27754015010895006, + "grad_norm": 0.7847445011138916, + "learning_rate": 0.00018642571544785967, + "loss": 2.6999, + "step": 3439 + }, + { + "epoch": 0.27762085384553303, + "grad_norm": 0.7226125597953796, + "learning_rate": 0.00018641777276778675, + "loss": 2.7613, + "step": 3440 + }, + { + "epoch": 0.27770155758211607, + "grad_norm": 0.713188111782074, + "learning_rate": 0.000186409827933957, + "loss": 2.6953, + "step": 3441 + }, + { + "epoch": 0.27778226131869904, + "grad_norm": 0.7308298349380493, + "learning_rate": 0.0001864018809465685, + "loss": 2.7045, + "step": 3442 + }, + { + "epoch": 0.2778629650552821, + "grad_norm": 0.7606719732284546, + "learning_rate": 0.00018639393180581925, + "loss": 2.7883, + "step": 3443 + }, + { + "epoch": 0.27794366879186505, + "grad_norm": 0.7583296895027161, + "learning_rate": 0.00018638598051190738, + "loss": 2.6734, + "step": 3444 + }, + { + "epoch": 0.2780243725284481, + "grad_norm": 0.7147012948989868, + "learning_rate": 0.00018637802706503108, + "loss": 2.7223, + "step": 3445 + }, + { + "epoch": 0.27810507626503106, + "grad_norm": 0.7812997102737427, + "learning_rate": 0.00018637007146538853, + "loss": 2.7277, + "step": 3446 + }, + { + "epoch": 0.2781857800016141, + "grad_norm": 0.7460772395133972, + "learning_rate": 0.000186362113713178, + "loss": 2.6875, + "step": 3447 + }, + { + "epoch": 0.27826648373819707, + "grad_norm": 0.7359143495559692, + "learning_rate": 0.0001863541538085979, + "loss": 2.7122, + "step": 3448 + }, + { + "epoch": 0.2783471874747801, + "grad_norm": 0.7122978568077087, + "learning_rate": 0.00018634619175184655, + "loss": 2.6381, + "step": 3449 + }, + { + "epoch": 0.2784278912113631, + "grad_norm": 0.6965885758399963, + "learning_rate": 0.00018633822754312234, + "loss": 2.6957, + "step": 3450 + }, + { + "epoch": 0.2785085949479461, + "grad_norm": 0.7737082242965698, + "learning_rate": 0.00018633026118262385, + "loss": 2.7579, + "step": 3451 + }, + { + "epoch": 0.2785892986845291, + "grad_norm": 0.6925420165061951, + "learning_rate": 0.00018632229267054958, + "loss": 2.6226, + "step": 3452 + }, + { + "epoch": 0.2786700024211121, + "grad_norm": 0.7496356964111328, + "learning_rate": 0.0001863143220070981, + "loss": 2.7059, + "step": 3453 + }, + { + "epoch": 0.2787507061576951, + "grad_norm": 0.7066817283630371, + "learning_rate": 0.0001863063491924681, + "loss": 2.681, + "step": 3454 + }, + { + "epoch": 0.2788314098942781, + "grad_norm": 0.8143237829208374, + "learning_rate": 0.0001862983742268583, + "loss": 2.6698, + "step": 3455 + }, + { + "epoch": 0.2789121136308611, + "grad_norm": 0.7518483996391296, + "learning_rate": 0.00018629039711046737, + "loss": 2.7041, + "step": 3456 + }, + { + "epoch": 0.27899281736744413, + "grad_norm": 0.8756366968154907, + "learning_rate": 0.00018628241784349422, + "loss": 2.7547, + "step": 3457 + }, + { + "epoch": 0.2790735211040271, + "grad_norm": 0.8709446787834167, + "learning_rate": 0.0001862744364261377, + "loss": 2.7068, + "step": 3458 + }, + { + "epoch": 0.27915422484061014, + "grad_norm": 0.8121913075447083, + "learning_rate": 0.00018626645285859666, + "loss": 2.673, + "step": 3459 + }, + { + "epoch": 0.2792349285771931, + "grad_norm": 0.7685909271240234, + "learning_rate": 0.00018625846714107012, + "loss": 2.7389, + "step": 3460 + }, + { + "epoch": 0.27931563231377615, + "grad_norm": 0.7098073363304138, + "learning_rate": 0.0001862504792737571, + "loss": 2.6942, + "step": 3461 + }, + { + "epoch": 0.27939633605035913, + "grad_norm": 0.7718049883842468, + "learning_rate": 0.00018624248925685666, + "loss": 2.7359, + "step": 3462 + }, + { + "epoch": 0.27947703978694216, + "grad_norm": 0.7912909984588623, + "learning_rate": 0.00018623449709056797, + "loss": 2.6658, + "step": 3463 + }, + { + "epoch": 0.27955774352352514, + "grad_norm": 0.7255454659461975, + "learning_rate": 0.0001862265027750902, + "loss": 2.771, + "step": 3464 + }, + { + "epoch": 0.27963844726010817, + "grad_norm": 0.7542218565940857, + "learning_rate": 0.00018621850631062254, + "loss": 2.6741, + "step": 3465 + }, + { + "epoch": 0.27971915099669115, + "grad_norm": 0.8386052846908569, + "learning_rate": 0.00018621050769736437, + "loss": 2.67, + "step": 3466 + }, + { + "epoch": 0.2797998547332742, + "grad_norm": 0.8563781976699829, + "learning_rate": 0.00018620250693551495, + "loss": 2.7461, + "step": 3467 + }, + { + "epoch": 0.27988055846985715, + "grad_norm": 0.7490699291229248, + "learning_rate": 0.00018619450402527376, + "loss": 2.6863, + "step": 3468 + }, + { + "epoch": 0.27996126220644013, + "grad_norm": 0.8008999824523926, + "learning_rate": 0.00018618649896684017, + "loss": 2.7769, + "step": 3469 + }, + { + "epoch": 0.28004196594302316, + "grad_norm": 0.7678235769271851, + "learning_rate": 0.00018617849176041378, + "loss": 2.7237, + "step": 3470 + }, + { + "epoch": 0.28012266967960614, + "grad_norm": 0.8774877786636353, + "learning_rate": 0.00018617048240619408, + "loss": 2.7502, + "step": 3471 + }, + { + "epoch": 0.28020337341618917, + "grad_norm": 0.8150283098220825, + "learning_rate": 0.00018616247090438073, + "loss": 2.6941, + "step": 3472 + }, + { + "epoch": 0.28028407715277215, + "grad_norm": 0.7330089807510376, + "learning_rate": 0.00018615445725517332, + "loss": 2.7002, + "step": 3473 + }, + { + "epoch": 0.2803647808893552, + "grad_norm": 0.748275101184845, + "learning_rate": 0.00018614644145877168, + "loss": 2.6996, + "step": 3474 + }, + { + "epoch": 0.28044548462593816, + "grad_norm": 0.7718296647071838, + "learning_rate": 0.0001861384235153755, + "loss": 2.7333, + "step": 3475 + }, + { + "epoch": 0.2805261883625212, + "grad_norm": 0.7751123309135437, + "learning_rate": 0.00018613040342518465, + "loss": 2.7362, + "step": 3476 + }, + { + "epoch": 0.28060689209910417, + "grad_norm": 0.70979243516922, + "learning_rate": 0.000186122381188399, + "loss": 2.6651, + "step": 3477 + }, + { + "epoch": 0.2806875958356872, + "grad_norm": 0.9607138633728027, + "learning_rate": 0.00018611435680521848, + "loss": 2.7779, + "step": 3478 + }, + { + "epoch": 0.2807682995722702, + "grad_norm": 0.709671676158905, + "learning_rate": 0.0001861063302758431, + "loss": 2.6994, + "step": 3479 + }, + { + "epoch": 0.2808490033088532, + "grad_norm": 0.8765757083892822, + "learning_rate": 0.00018609830160047283, + "loss": 2.7107, + "step": 3480 + }, + { + "epoch": 0.2809297070454362, + "grad_norm": 0.7996764183044434, + "learning_rate": 0.0001860902707793079, + "loss": 2.7921, + "step": 3481 + }, + { + "epoch": 0.2810104107820192, + "grad_norm": 0.7094513177871704, + "learning_rate": 0.0001860822378125483, + "loss": 2.7211, + "step": 3482 + }, + { + "epoch": 0.2810911145186022, + "grad_norm": 0.8068607449531555, + "learning_rate": 0.0001860742027003944, + "loss": 2.675, + "step": 3483 + }, + { + "epoch": 0.2811718182551852, + "grad_norm": 0.7737938165664673, + "learning_rate": 0.00018606616544304628, + "loss": 2.7538, + "step": 3484 + }, + { + "epoch": 0.2812525219917682, + "grad_norm": 0.7979975342750549, + "learning_rate": 0.0001860581260407044, + "loss": 2.7894, + "step": 3485 + }, + { + "epoch": 0.28133322572835123, + "grad_norm": 0.7671655416488647, + "learning_rate": 0.00018605008449356904, + "loss": 2.7097, + "step": 3486 + }, + { + "epoch": 0.2814139294649342, + "grad_norm": 0.7284159064292908, + "learning_rate": 0.00018604204080184062, + "loss": 2.7447, + "step": 3487 + }, + { + "epoch": 0.28149463320151724, + "grad_norm": 0.7425351142883301, + "learning_rate": 0.00018603399496571968, + "loss": 2.7302, + "step": 3488 + }, + { + "epoch": 0.2815753369381002, + "grad_norm": 0.7709810733795166, + "learning_rate": 0.00018602594698540663, + "loss": 2.6979, + "step": 3489 + }, + { + "epoch": 0.28165604067468325, + "grad_norm": 0.744628369808197, + "learning_rate": 0.00018601789686110214, + "loss": 2.7279, + "step": 3490 + }, + { + "epoch": 0.2817367444112662, + "grad_norm": 0.7679976224899292, + "learning_rate": 0.00018600984459300678, + "loss": 2.6862, + "step": 3491 + }, + { + "epoch": 0.28181744814784926, + "grad_norm": 0.7923497557640076, + "learning_rate": 0.0001860017901813213, + "loss": 2.6975, + "step": 3492 + }, + { + "epoch": 0.28189815188443224, + "grad_norm": 0.7896692156791687, + "learning_rate": 0.00018599373362624636, + "loss": 2.7052, + "step": 3493 + }, + { + "epoch": 0.28197885562101527, + "grad_norm": 0.7913276553153992, + "learning_rate": 0.00018598567492798284, + "loss": 2.7233, + "step": 3494 + }, + { + "epoch": 0.28205955935759824, + "grad_norm": 0.7385257482528687, + "learning_rate": 0.00018597761408673146, + "loss": 2.7616, + "step": 3495 + }, + { + "epoch": 0.2821402630941813, + "grad_norm": 0.7181909084320068, + "learning_rate": 0.00018596955110269323, + "loss": 2.718, + "step": 3496 + }, + { + "epoch": 0.28222096683076425, + "grad_norm": 0.8313151597976685, + "learning_rate": 0.00018596148597606907, + "loss": 2.6775, + "step": 3497 + }, + { + "epoch": 0.2823016705673473, + "grad_norm": 0.7235481142997742, + "learning_rate": 0.00018595341870705995, + "loss": 2.7085, + "step": 3498 + }, + { + "epoch": 0.28238237430393026, + "grad_norm": 0.7092145085334778, + "learning_rate": 0.00018594534929586697, + "loss": 2.7167, + "step": 3499 + }, + { + "epoch": 0.2824630780405133, + "grad_norm": 0.7929207682609558, + "learning_rate": 0.0001859372777426912, + "loss": 2.663, + "step": 3500 + }, + { + "epoch": 0.28254378177709627, + "grad_norm": 0.7488871216773987, + "learning_rate": 0.00018592920404773383, + "loss": 2.7911, + "step": 3501 + }, + { + "epoch": 0.2826244855136793, + "grad_norm": 0.8230419158935547, + "learning_rate": 0.0001859211282111961, + "loss": 2.754, + "step": 3502 + }, + { + "epoch": 0.2827051892502623, + "grad_norm": 0.731971025466919, + "learning_rate": 0.00018591305023327924, + "loss": 2.7142, + "step": 3503 + }, + { + "epoch": 0.2827858929868453, + "grad_norm": 0.8159881234169006, + "learning_rate": 0.00018590497011418457, + "loss": 2.7046, + "step": 3504 + }, + { + "epoch": 0.2828665967234283, + "grad_norm": 0.750266432762146, + "learning_rate": 0.0001858968878541135, + "loss": 2.6951, + "step": 3505 + }, + { + "epoch": 0.2829473004600113, + "grad_norm": 0.7750049233436584, + "learning_rate": 0.00018588880345326748, + "loss": 2.6958, + "step": 3506 + }, + { + "epoch": 0.2830280041965943, + "grad_norm": 0.8559218049049377, + "learning_rate": 0.00018588071691184795, + "loss": 2.7205, + "step": 3507 + }, + { + "epoch": 0.28310870793317733, + "grad_norm": 0.7334830164909363, + "learning_rate": 0.00018587262823005642, + "loss": 2.7134, + "step": 3508 + }, + { + "epoch": 0.2831894116697603, + "grad_norm": 0.8749497532844543, + "learning_rate": 0.00018586453740809456, + "loss": 2.6811, + "step": 3509 + }, + { + "epoch": 0.28327011540634334, + "grad_norm": 0.8800753355026245, + "learning_rate": 0.00018585644444616396, + "loss": 2.7427, + "step": 3510 + }, + { + "epoch": 0.2833508191429263, + "grad_norm": 0.8666185736656189, + "learning_rate": 0.00018584834934446632, + "loss": 2.6828, + "step": 3511 + }, + { + "epoch": 0.28343152287950935, + "grad_norm": 0.7451635003089905, + "learning_rate": 0.00018584025210320343, + "loss": 2.6784, + "step": 3512 + }, + { + "epoch": 0.2835122266160923, + "grad_norm": 0.8512656688690186, + "learning_rate": 0.00018583215272257708, + "loss": 2.7762, + "step": 3513 + }, + { + "epoch": 0.28359293035267535, + "grad_norm": 0.9298297166824341, + "learning_rate": 0.00018582405120278907, + "loss": 2.7714, + "step": 3514 + }, + { + "epoch": 0.28367363408925833, + "grad_norm": 0.7968065738677979, + "learning_rate": 0.0001858159475440414, + "loss": 2.7286, + "step": 3515 + }, + { + "epoch": 0.28375433782584136, + "grad_norm": 0.7381564378738403, + "learning_rate": 0.00018580784174653596, + "loss": 2.6697, + "step": 3516 + }, + { + "epoch": 0.28383504156242434, + "grad_norm": 0.8199222683906555, + "learning_rate": 0.00018579973381047481, + "loss": 2.7463, + "step": 3517 + }, + { + "epoch": 0.28391574529900737, + "grad_norm": 0.8022071123123169, + "learning_rate": 0.00018579162373606002, + "loss": 2.6898, + "step": 3518 + }, + { + "epoch": 0.28399644903559035, + "grad_norm": 0.7899700999259949, + "learning_rate": 0.0001857835115234937, + "loss": 2.7074, + "step": 3519 + }, + { + "epoch": 0.2840771527721733, + "grad_norm": 0.7237183451652527, + "learning_rate": 0.00018577539717297805, + "loss": 2.6699, + "step": 3520 + }, + { + "epoch": 0.28415785650875636, + "grad_norm": 0.7627314329147339, + "learning_rate": 0.00018576728068471526, + "loss": 2.7745, + "step": 3521 + }, + { + "epoch": 0.28423856024533933, + "grad_norm": 0.7301654815673828, + "learning_rate": 0.00018575916205890766, + "loss": 2.7191, + "step": 3522 + }, + { + "epoch": 0.28431926398192237, + "grad_norm": 0.7441647052764893, + "learning_rate": 0.00018575104129575753, + "loss": 2.7529, + "step": 3523 + }, + { + "epoch": 0.28439996771850534, + "grad_norm": 0.7715914249420166, + "learning_rate": 0.0001857429183954673, + "loss": 2.6893, + "step": 3524 + }, + { + "epoch": 0.2844806714550884, + "grad_norm": 0.7464057207107544, + "learning_rate": 0.00018573479335823944, + "loss": 2.7169, + "step": 3525 + }, + { + "epoch": 0.28456137519167135, + "grad_norm": 0.753198504447937, + "learning_rate": 0.00018572666618427638, + "loss": 2.7144, + "step": 3526 + }, + { + "epoch": 0.2846420789282544, + "grad_norm": 0.7681953310966492, + "learning_rate": 0.00018571853687378073, + "loss": 2.709, + "step": 3527 + }, + { + "epoch": 0.28472278266483736, + "grad_norm": 0.7591876983642578, + "learning_rate": 0.0001857104054269551, + "loss": 2.7519, + "step": 3528 + }, + { + "epoch": 0.2848034864014204, + "grad_norm": 0.7417709827423096, + "learning_rate": 0.00018570227184400205, + "loss": 2.6756, + "step": 3529 + }, + { + "epoch": 0.28488419013800337, + "grad_norm": 0.7641329169273376, + "learning_rate": 0.0001856941361251244, + "loss": 2.6614, + "step": 3530 + }, + { + "epoch": 0.2849648938745864, + "grad_norm": 0.7813490033149719, + "learning_rate": 0.0001856859982705249, + "loss": 2.7145, + "step": 3531 + }, + { + "epoch": 0.2850455976111694, + "grad_norm": 0.7777202129364014, + "learning_rate": 0.00018567785828040628, + "loss": 2.7015, + "step": 3532 + }, + { + "epoch": 0.2851263013477524, + "grad_norm": 0.7647144794464111, + "learning_rate": 0.0001856697161549715, + "loss": 2.7311, + "step": 3533 + }, + { + "epoch": 0.2852070050843354, + "grad_norm": 0.7477256655693054, + "learning_rate": 0.00018566157189442342, + "loss": 2.6832, + "step": 3534 + }, + { + "epoch": 0.2852877088209184, + "grad_norm": 0.7037049531936646, + "learning_rate": 0.00018565342549896506, + "loss": 2.6942, + "step": 3535 + }, + { + "epoch": 0.2853684125575014, + "grad_norm": 0.7309197783470154, + "learning_rate": 0.00018564527696879945, + "loss": 2.6797, + "step": 3536 + }, + { + "epoch": 0.2854491162940844, + "grad_norm": 0.798075795173645, + "learning_rate": 0.00018563712630412967, + "loss": 2.6926, + "step": 3537 + }, + { + "epoch": 0.2855298200306674, + "grad_norm": 0.7831682562828064, + "learning_rate": 0.0001856289735051588, + "loss": 2.7537, + "step": 3538 + }, + { + "epoch": 0.28561052376725043, + "grad_norm": 0.7983096241950989, + "learning_rate": 0.0001856208185720901, + "loss": 2.7037, + "step": 3539 + }, + { + "epoch": 0.2856912275038334, + "grad_norm": 0.7250573635101318, + "learning_rate": 0.00018561266150512678, + "loss": 2.7282, + "step": 3540 + }, + { + "epoch": 0.28577193124041644, + "grad_norm": 0.7800211906433105, + "learning_rate": 0.00018560450230447218, + "loss": 2.6541, + "step": 3541 + }, + { + "epoch": 0.2858526349769994, + "grad_norm": 0.7624209523200989, + "learning_rate": 0.00018559634097032953, + "loss": 2.7041, + "step": 3542 + }, + { + "epoch": 0.28593333871358245, + "grad_norm": 0.7212036848068237, + "learning_rate": 0.0001855881775029024, + "loss": 2.7287, + "step": 3543 + }, + { + "epoch": 0.28601404245016543, + "grad_norm": 0.7774164080619812, + "learning_rate": 0.00018558001190239408, + "loss": 2.6515, + "step": 3544 + }, + { + "epoch": 0.28609474618674846, + "grad_norm": 0.7169588208198547, + "learning_rate": 0.0001855718441690082, + "loss": 2.7111, + "step": 3545 + }, + { + "epoch": 0.28617544992333144, + "grad_norm": 0.7473909258842468, + "learning_rate": 0.00018556367430294827, + "loss": 2.7405, + "step": 3546 + }, + { + "epoch": 0.28625615365991447, + "grad_norm": 0.7213929295539856, + "learning_rate": 0.0001855555023044179, + "loss": 2.7336, + "step": 3547 + }, + { + "epoch": 0.28633685739649745, + "grad_norm": 0.701816201210022, + "learning_rate": 0.00018554732817362078, + "loss": 2.721, + "step": 3548 + }, + { + "epoch": 0.2864175611330805, + "grad_norm": 0.8158134818077087, + "learning_rate": 0.00018553915191076064, + "loss": 2.6979, + "step": 3549 + }, + { + "epoch": 0.28649826486966345, + "grad_norm": 0.7303084135055542, + "learning_rate": 0.00018553097351604118, + "loss": 2.6734, + "step": 3550 + }, + { + "epoch": 0.2865789686062465, + "grad_norm": 0.8140435814857483, + "learning_rate": 0.00018552279298966634, + "loss": 2.6832, + "step": 3551 + }, + { + "epoch": 0.28665967234282946, + "grad_norm": 0.7024678587913513, + "learning_rate": 0.00018551461033183988, + "loss": 2.7118, + "step": 3552 + }, + { + "epoch": 0.2867403760794125, + "grad_norm": 0.7277806401252747, + "learning_rate": 0.00018550642554276582, + "loss": 2.6362, + "step": 3553 + }, + { + "epoch": 0.28682107981599547, + "grad_norm": 0.8376575112342834, + "learning_rate": 0.00018549823862264812, + "loss": 2.744, + "step": 3554 + }, + { + "epoch": 0.2869017835525785, + "grad_norm": 0.712195098400116, + "learning_rate": 0.00018549004957169082, + "loss": 2.6715, + "step": 3555 + }, + { + "epoch": 0.2869824872891615, + "grad_norm": 0.7511523962020874, + "learning_rate": 0.00018548185839009805, + "loss": 2.7655, + "step": 3556 + }, + { + "epoch": 0.2870631910257445, + "grad_norm": 0.7397211790084839, + "learning_rate": 0.00018547366507807388, + "loss": 2.6813, + "step": 3557 + }, + { + "epoch": 0.2871438947623275, + "grad_norm": 0.6926341652870178, + "learning_rate": 0.00018546546963582253, + "loss": 2.6477, + "step": 3558 + }, + { + "epoch": 0.2872245984989105, + "grad_norm": 0.7776244878768921, + "learning_rate": 0.00018545727206354827, + "loss": 2.6979, + "step": 3559 + }, + { + "epoch": 0.2873053022354935, + "grad_norm": 0.7639400959014893, + "learning_rate": 0.00018544907236145542, + "loss": 2.6913, + "step": 3560 + }, + { + "epoch": 0.28738600597207653, + "grad_norm": 0.7738329768180847, + "learning_rate": 0.0001854408705297483, + "loss": 2.7231, + "step": 3561 + }, + { + "epoch": 0.2874667097086595, + "grad_norm": 0.7182422876358032, + "learning_rate": 0.00018543266656863137, + "loss": 2.718, + "step": 3562 + }, + { + "epoch": 0.28754741344524254, + "grad_norm": 0.7257261276245117, + "learning_rate": 0.00018542446047830903, + "loss": 2.7354, + "step": 3563 + }, + { + "epoch": 0.2876281171818255, + "grad_norm": 0.7761391997337341, + "learning_rate": 0.00018541625225898588, + "loss": 2.705, + "step": 3564 + }, + { + "epoch": 0.28770882091840855, + "grad_norm": 0.9272314310073853, + "learning_rate": 0.0001854080419108664, + "loss": 2.7278, + "step": 3565 + }, + { + "epoch": 0.2877895246549915, + "grad_norm": 0.7622589468955994, + "learning_rate": 0.00018539982943415527, + "loss": 2.7224, + "step": 3566 + }, + { + "epoch": 0.28787022839157456, + "grad_norm": 0.725349485874176, + "learning_rate": 0.0001853916148290572, + "loss": 2.6782, + "step": 3567 + }, + { + "epoch": 0.28795093212815753, + "grad_norm": 0.776242733001709, + "learning_rate": 0.0001853833980957768, + "loss": 2.6467, + "step": 3568 + }, + { + "epoch": 0.28803163586474057, + "grad_norm": 0.8461112976074219, + "learning_rate": 0.00018537517923451896, + "loss": 2.6763, + "step": 3569 + }, + { + "epoch": 0.28811233960132354, + "grad_norm": 0.8161221742630005, + "learning_rate": 0.00018536695824548848, + "loss": 2.7057, + "step": 3570 + }, + { + "epoch": 0.2881930433379065, + "grad_norm": 0.7404211759567261, + "learning_rate": 0.00018535873512889024, + "loss": 2.7083, + "step": 3571 + }, + { + "epoch": 0.28827374707448955, + "grad_norm": 0.831042468547821, + "learning_rate": 0.00018535050988492918, + "loss": 2.6121, + "step": 3572 + }, + { + "epoch": 0.2883544508110725, + "grad_norm": 0.7286352515220642, + "learning_rate": 0.00018534228251381035, + "loss": 2.7165, + "step": 3573 + }, + { + "epoch": 0.28843515454765556, + "grad_norm": 0.7951883673667908, + "learning_rate": 0.00018533405301573872, + "loss": 2.6794, + "step": 3574 + }, + { + "epoch": 0.28851585828423854, + "grad_norm": 0.7431079149246216, + "learning_rate": 0.00018532582139091944, + "loss": 2.6758, + "step": 3575 + }, + { + "epoch": 0.28859656202082157, + "grad_norm": 0.7408809065818787, + "learning_rate": 0.0001853175876395576, + "loss": 2.6901, + "step": 3576 + }, + { + "epoch": 0.28867726575740454, + "grad_norm": 0.7428708672523499, + "learning_rate": 0.00018530935176185848, + "loss": 2.6679, + "step": 3577 + }, + { + "epoch": 0.2887579694939876, + "grad_norm": 0.7670302987098694, + "learning_rate": 0.00018530111375802735, + "loss": 2.7306, + "step": 3578 + }, + { + "epoch": 0.28883867323057055, + "grad_norm": 0.7582474946975708, + "learning_rate": 0.00018529287362826943, + "loss": 2.7715, + "step": 3579 + }, + { + "epoch": 0.2889193769671536, + "grad_norm": 0.750973105430603, + "learning_rate": 0.0001852846313727902, + "loss": 2.7147, + "step": 3580 + }, + { + "epoch": 0.28900008070373656, + "grad_norm": 0.771854043006897, + "learning_rate": 0.00018527638699179498, + "loss": 2.6874, + "step": 3581 + }, + { + "epoch": 0.2890807844403196, + "grad_norm": 0.785469651222229, + "learning_rate": 0.00018526814048548928, + "loss": 2.6858, + "step": 3582 + }, + { + "epoch": 0.28916148817690257, + "grad_norm": 0.7601101398468018, + "learning_rate": 0.00018525989185407864, + "loss": 2.6927, + "step": 3583 + }, + { + "epoch": 0.2892421919134856, + "grad_norm": 0.7313411831855774, + "learning_rate": 0.00018525164109776861, + "loss": 2.6813, + "step": 3584 + }, + { + "epoch": 0.2893228956500686, + "grad_norm": 0.7471718192100525, + "learning_rate": 0.00018524338821676483, + "loss": 2.6791, + "step": 3585 + }, + { + "epoch": 0.2894035993866516, + "grad_norm": 0.7615204453468323, + "learning_rate": 0.00018523513321127302, + "loss": 2.7767, + "step": 3586 + }, + { + "epoch": 0.2894843031232346, + "grad_norm": 0.766793966293335, + "learning_rate": 0.00018522687608149886, + "loss": 2.664, + "step": 3587 + }, + { + "epoch": 0.2895650068598176, + "grad_norm": 0.7897932529449463, + "learning_rate": 0.00018521861682764816, + "loss": 2.7148, + "step": 3588 + }, + { + "epoch": 0.2896457105964006, + "grad_norm": 0.7366818785667419, + "learning_rate": 0.00018521035544992679, + "loss": 2.69, + "step": 3589 + }, + { + "epoch": 0.28972641433298363, + "grad_norm": 0.7503829598426819, + "learning_rate": 0.00018520209194854058, + "loss": 2.7141, + "step": 3590 + }, + { + "epoch": 0.2898071180695666, + "grad_norm": 0.8064351081848145, + "learning_rate": 0.00018519382632369556, + "loss": 2.6738, + "step": 3591 + }, + { + "epoch": 0.28988782180614964, + "grad_norm": 0.7364048361778259, + "learning_rate": 0.00018518555857559768, + "loss": 2.6731, + "step": 3592 + }, + { + "epoch": 0.2899685255427326, + "grad_norm": 0.7065430283546448, + "learning_rate": 0.00018517728870445297, + "loss": 2.7314, + "step": 3593 + }, + { + "epoch": 0.29004922927931565, + "grad_norm": 0.8233428001403809, + "learning_rate": 0.0001851690167104676, + "loss": 2.727, + "step": 3594 + }, + { + "epoch": 0.2901299330158986, + "grad_norm": 0.7563758492469788, + "learning_rate": 0.00018516074259384768, + "loss": 2.665, + "step": 3595 + }, + { + "epoch": 0.29021063675248165, + "grad_norm": 0.7451249361038208, + "learning_rate": 0.00018515246635479943, + "loss": 2.7686, + "step": 3596 + }, + { + "epoch": 0.29029134048906463, + "grad_norm": 0.7374305725097656, + "learning_rate": 0.00018514418799352918, + "loss": 2.6466, + "step": 3597 + }, + { + "epoch": 0.29037204422564766, + "grad_norm": 0.7596983909606934, + "learning_rate": 0.00018513590751024315, + "loss": 2.6763, + "step": 3598 + }, + { + "epoch": 0.29045274796223064, + "grad_norm": 0.7808190584182739, + "learning_rate": 0.0001851276249051478, + "loss": 2.7362, + "step": 3599 + }, + { + "epoch": 0.29053345169881367, + "grad_norm": 0.765785276889801, + "learning_rate": 0.00018511934017844948, + "loss": 2.7049, + "step": 3600 + }, + { + "epoch": 0.29061415543539665, + "grad_norm": 0.7503563165664673, + "learning_rate": 0.0001851110533303547, + "loss": 2.6262, + "step": 3601 + }, + { + "epoch": 0.2906948591719797, + "grad_norm": 0.7287782430648804, + "learning_rate": 0.00018510276436107, + "loss": 2.7076, + "step": 3602 + }, + { + "epoch": 0.29077556290856266, + "grad_norm": 0.7748721837997437, + "learning_rate": 0.00018509447327080193, + "loss": 2.6945, + "step": 3603 + }, + { + "epoch": 0.2908562666451457, + "grad_norm": 0.7482423186302185, + "learning_rate": 0.00018508618005975714, + "loss": 2.7326, + "step": 3604 + }, + { + "epoch": 0.29093697038172867, + "grad_norm": 0.7708765864372253, + "learning_rate": 0.00018507788472814238, + "loss": 2.7602, + "step": 3605 + }, + { + "epoch": 0.2910176741183117, + "grad_norm": 0.7308060526847839, + "learning_rate": 0.0001850695872761643, + "loss": 2.6735, + "step": 3606 + }, + { + "epoch": 0.2910983778548947, + "grad_norm": 0.7512951493263245, + "learning_rate": 0.00018506128770402972, + "loss": 2.6877, + "step": 3607 + }, + { + "epoch": 0.2911790815914777, + "grad_norm": 0.6806616187095642, + "learning_rate": 0.00018505298601194552, + "loss": 2.6689, + "step": 3608 + }, + { + "epoch": 0.2912597853280607, + "grad_norm": 0.7825661301612854, + "learning_rate": 0.00018504468220011857, + "loss": 2.7108, + "step": 3609 + }, + { + "epoch": 0.2913404890646437, + "grad_norm": 0.8243381977081299, + "learning_rate": 0.00018503637626875584, + "loss": 2.6789, + "step": 3610 + }, + { + "epoch": 0.2914211928012267, + "grad_norm": 0.745012640953064, + "learning_rate": 0.00018502806821806429, + "loss": 2.7658, + "step": 3611 + }, + { + "epoch": 0.2915018965378097, + "grad_norm": 0.7091341018676758, + "learning_rate": 0.00018501975804825104, + "loss": 2.7046, + "step": 3612 + }, + { + "epoch": 0.2915826002743927, + "grad_norm": 0.729026734828949, + "learning_rate": 0.0001850114457595232, + "loss": 2.6692, + "step": 3613 + }, + { + "epoch": 0.29166330401097573, + "grad_norm": 0.8098071813583374, + "learning_rate": 0.00018500313135208786, + "loss": 2.712, + "step": 3614 + }, + { + "epoch": 0.2917440077475587, + "grad_norm": 0.7387483716011047, + "learning_rate": 0.0001849948148261523, + "loss": 2.6705, + "step": 3615 + }, + { + "epoch": 0.29182471148414174, + "grad_norm": 0.7904576659202576, + "learning_rate": 0.0001849864961819238, + "loss": 2.5969, + "step": 3616 + }, + { + "epoch": 0.2919054152207247, + "grad_norm": 0.7560681700706482, + "learning_rate": 0.00018497817541960964, + "loss": 2.6971, + "step": 3617 + }, + { + "epoch": 0.29198611895730775, + "grad_norm": 0.8488430976867676, + "learning_rate": 0.00018496985253941723, + "loss": 2.7367, + "step": 3618 + }, + { + "epoch": 0.2920668226938907, + "grad_norm": 0.7641268372535706, + "learning_rate": 0.00018496152754155399, + "loss": 2.6948, + "step": 3619 + }, + { + "epoch": 0.29214752643047376, + "grad_norm": 0.7219721674919128, + "learning_rate": 0.00018495320042622736, + "loss": 2.7225, + "step": 3620 + }, + { + "epoch": 0.29222823016705674, + "grad_norm": 0.7583872675895691, + "learning_rate": 0.00018494487119364493, + "loss": 2.7335, + "step": 3621 + }, + { + "epoch": 0.2923089339036397, + "grad_norm": 0.7771418690681458, + "learning_rate": 0.00018493653984401424, + "loss": 2.6712, + "step": 3622 + }, + { + "epoch": 0.29238963764022274, + "grad_norm": 0.7537891268730164, + "learning_rate": 0.00018492820637754296, + "loss": 2.7282, + "step": 3623 + }, + { + "epoch": 0.2924703413768057, + "grad_norm": 0.7334226965904236, + "learning_rate": 0.00018491987079443875, + "loss": 2.7072, + "step": 3624 + }, + { + "epoch": 0.29255104511338875, + "grad_norm": 0.7768076658248901, + "learning_rate": 0.00018491153309490942, + "loss": 2.7176, + "step": 3625 + }, + { + "epoch": 0.29263174884997173, + "grad_norm": 0.6831281185150146, + "learning_rate": 0.0001849031932791627, + "loss": 2.6982, + "step": 3626 + }, + { + "epoch": 0.29271245258655476, + "grad_norm": 0.7150557637214661, + "learning_rate": 0.00018489485134740648, + "loss": 2.7325, + "step": 3627 + }, + { + "epoch": 0.29279315632313774, + "grad_norm": 0.782667338848114, + "learning_rate": 0.00018488650729984863, + "loss": 2.7146, + "step": 3628 + }, + { + "epoch": 0.29287386005972077, + "grad_norm": 0.7718524932861328, + "learning_rate": 0.0001848781611366971, + "loss": 2.746, + "step": 3629 + }, + { + "epoch": 0.29295456379630375, + "grad_norm": 0.7066439390182495, + "learning_rate": 0.00018486981285815998, + "loss": 2.7497, + "step": 3630 + }, + { + "epoch": 0.2930352675328868, + "grad_norm": 0.7705665826797485, + "learning_rate": 0.00018486146246444522, + "loss": 2.6448, + "step": 3631 + }, + { + "epoch": 0.29311597126946976, + "grad_norm": 0.7334863543510437, + "learning_rate": 0.000184853109955761, + "loss": 2.6931, + "step": 3632 + }, + { + "epoch": 0.2931966750060528, + "grad_norm": 0.7903133630752563, + "learning_rate": 0.0001848447553323155, + "loss": 2.6954, + "step": 3633 + }, + { + "epoch": 0.29327737874263576, + "grad_norm": 0.6821191310882568, + "learning_rate": 0.00018483639859431689, + "loss": 2.6165, + "step": 3634 + }, + { + "epoch": 0.2933580824792188, + "grad_norm": 0.7187811136245728, + "learning_rate": 0.00018482803974197344, + "loss": 2.6387, + "step": 3635 + }, + { + "epoch": 0.2934387862158018, + "grad_norm": 0.7429843544960022, + "learning_rate": 0.00018481967877549354, + "loss": 2.6848, + "step": 3636 + }, + { + "epoch": 0.2935194899523848, + "grad_norm": 0.7431524395942688, + "learning_rate": 0.0001848113156950855, + "loss": 2.7044, + "step": 3637 + }, + { + "epoch": 0.2936001936889678, + "grad_norm": 0.7008687853813171, + "learning_rate": 0.00018480295050095778, + "loss": 2.6922, + "step": 3638 + }, + { + "epoch": 0.2936808974255508, + "grad_norm": 0.7106652855873108, + "learning_rate": 0.00018479458319331884, + "loss": 2.6845, + "step": 3639 + }, + { + "epoch": 0.2937616011621338, + "grad_norm": 0.7288951873779297, + "learning_rate": 0.00018478621377237723, + "loss": 2.7017, + "step": 3640 + }, + { + "epoch": 0.2938423048987168, + "grad_norm": 0.7228607535362244, + "learning_rate": 0.00018477784223834155, + "loss": 2.7449, + "step": 3641 + }, + { + "epoch": 0.2939230086352998, + "grad_norm": 0.7180825471878052, + "learning_rate": 0.00018476946859142043, + "loss": 2.7291, + "step": 3642 + }, + { + "epoch": 0.29400371237188283, + "grad_norm": 0.7854947447776794, + "learning_rate": 0.00018476109283182258, + "loss": 2.7619, + "step": 3643 + }, + { + "epoch": 0.2940844161084658, + "grad_norm": 0.7871318459510803, + "learning_rate": 0.00018475271495975673, + "loss": 2.6695, + "step": 3644 + }, + { + "epoch": 0.29416511984504884, + "grad_norm": 0.7813127636909485, + "learning_rate": 0.00018474433497543165, + "loss": 2.735, + "step": 3645 + }, + { + "epoch": 0.2942458235816318, + "grad_norm": 0.7835291028022766, + "learning_rate": 0.00018473595287905623, + "loss": 2.7336, + "step": 3646 + }, + { + "epoch": 0.29432652731821485, + "grad_norm": 0.6970148682594299, + "learning_rate": 0.00018472756867083935, + "loss": 2.6912, + "step": 3647 + }, + { + "epoch": 0.2944072310547978, + "grad_norm": 0.7968462109565735, + "learning_rate": 0.00018471918235098998, + "loss": 2.6889, + "step": 3648 + }, + { + "epoch": 0.29448793479138086, + "grad_norm": 0.7011313438415527, + "learning_rate": 0.00018471079391971714, + "loss": 2.6989, + "step": 3649 + }, + { + "epoch": 0.29456863852796383, + "grad_norm": 0.8047335743904114, + "learning_rate": 0.00018470240337722991, + "loss": 2.6827, + "step": 3650 + }, + { + "epoch": 0.29464934226454687, + "grad_norm": 0.7446332573890686, + "learning_rate": 0.00018469401072373733, + "loss": 2.7089, + "step": 3651 + }, + { + "epoch": 0.29473004600112984, + "grad_norm": 0.7610359191894531, + "learning_rate": 0.00018468561595944862, + "loss": 2.6766, + "step": 3652 + }, + { + "epoch": 0.2948107497377129, + "grad_norm": 0.7705755233764648, + "learning_rate": 0.000184677219084573, + "loss": 2.7445, + "step": 3653 + }, + { + "epoch": 0.29489145347429585, + "grad_norm": 0.7466446757316589, + "learning_rate": 0.00018466882009931973, + "loss": 2.726, + "step": 3654 + }, + { + "epoch": 0.2949721572108789, + "grad_norm": 0.7912059426307678, + "learning_rate": 0.00018466041900389813, + "loss": 2.6865, + "step": 3655 + }, + { + "epoch": 0.29505286094746186, + "grad_norm": 0.722588837146759, + "learning_rate": 0.00018465201579851757, + "loss": 2.7039, + "step": 3656 + }, + { + "epoch": 0.2951335646840449, + "grad_norm": 0.739311933517456, + "learning_rate": 0.00018464361048338752, + "loss": 2.6991, + "step": 3657 + }, + { + "epoch": 0.29521426842062787, + "grad_norm": 0.7784128785133362, + "learning_rate": 0.00018463520305871743, + "loss": 2.753, + "step": 3658 + }, + { + "epoch": 0.2952949721572109, + "grad_norm": 0.8261777758598328, + "learning_rate": 0.00018462679352471682, + "loss": 2.7257, + "step": 3659 + }, + { + "epoch": 0.2953756758937939, + "grad_norm": 0.7510927319526672, + "learning_rate": 0.0001846183818815953, + "loss": 2.6981, + "step": 3660 + }, + { + "epoch": 0.2954563796303769, + "grad_norm": 0.7403035163879395, + "learning_rate": 0.00018460996812956254, + "loss": 2.744, + "step": 3661 + }, + { + "epoch": 0.2955370833669599, + "grad_norm": 0.7927733063697815, + "learning_rate": 0.00018460155226882817, + "loss": 2.6304, + "step": 3662 + }, + { + "epoch": 0.2956177871035429, + "grad_norm": 0.7923495769500732, + "learning_rate": 0.000184593134299602, + "loss": 2.7882, + "step": 3663 + }, + { + "epoch": 0.2956984908401259, + "grad_norm": 0.7639210224151611, + "learning_rate": 0.00018458471422209377, + "loss": 2.7171, + "step": 3664 + }, + { + "epoch": 0.2957791945767089, + "grad_norm": 0.736652672290802, + "learning_rate": 0.00018457629203651337, + "loss": 2.7479, + "step": 3665 + }, + { + "epoch": 0.2958598983132919, + "grad_norm": 0.7718610763549805, + "learning_rate": 0.00018456786774307066, + "loss": 2.7135, + "step": 3666 + }, + { + "epoch": 0.29594060204987493, + "grad_norm": 0.7711780071258545, + "learning_rate": 0.00018455944134197565, + "loss": 2.6867, + "step": 3667 + }, + { + "epoch": 0.2960213057864579, + "grad_norm": 0.7202491760253906, + "learning_rate": 0.0001845510128334383, + "loss": 2.6657, + "step": 3668 + }, + { + "epoch": 0.29610200952304094, + "grad_norm": 0.8155657649040222, + "learning_rate": 0.00018454258221766869, + "loss": 2.7342, + "step": 3669 + }, + { + "epoch": 0.2961827132596239, + "grad_norm": 0.7972069382667542, + "learning_rate": 0.00018453414949487696, + "loss": 2.7351, + "step": 3670 + }, + { + "epoch": 0.29626341699620695, + "grad_norm": 0.8645625710487366, + "learning_rate": 0.00018452571466527325, + "loss": 2.6778, + "step": 3671 + }, + { + "epoch": 0.29634412073278993, + "grad_norm": 0.7410334944725037, + "learning_rate": 0.00018451727772906775, + "loss": 2.7228, + "step": 3672 + }, + { + "epoch": 0.2964248244693729, + "grad_norm": 0.7845733165740967, + "learning_rate": 0.0001845088386864708, + "loss": 2.7068, + "step": 3673 + }, + { + "epoch": 0.29650552820595594, + "grad_norm": 0.7709881067276001, + "learning_rate": 0.00018450039753769266, + "loss": 2.676, + "step": 3674 + }, + { + "epoch": 0.2965862319425389, + "grad_norm": 0.7214749455451965, + "learning_rate": 0.00018449195428294371, + "loss": 2.6488, + "step": 3675 + }, + { + "epoch": 0.29666693567912195, + "grad_norm": 0.7467561960220337, + "learning_rate": 0.00018448350892243443, + "loss": 2.7262, + "step": 3676 + }, + { + "epoch": 0.2967476394157049, + "grad_norm": 0.8412678241729736, + "learning_rate": 0.00018447506145637522, + "loss": 2.7898, + "step": 3677 + }, + { + "epoch": 0.29682834315228795, + "grad_norm": 0.7130109071731567, + "learning_rate": 0.00018446661188497668, + "loss": 2.7344, + "step": 3678 + }, + { + "epoch": 0.29690904688887093, + "grad_norm": 0.7807374000549316, + "learning_rate": 0.00018445816020844937, + "loss": 2.7198, + "step": 3679 + }, + { + "epoch": 0.29698975062545396, + "grad_norm": 0.8497760891914368, + "learning_rate": 0.00018444970642700394, + "loss": 2.7479, + "step": 3680 + }, + { + "epoch": 0.29707045436203694, + "grad_norm": 0.6827178001403809, + "learning_rate": 0.0001844412505408511, + "loss": 2.727, + "step": 3681 + }, + { + "epoch": 0.29715115809861997, + "grad_norm": 0.8063304424285889, + "learning_rate": 0.00018443279255020152, + "loss": 2.7896, + "step": 3682 + }, + { + "epoch": 0.29723186183520295, + "grad_norm": 0.7759353518486023, + "learning_rate": 0.00018442433245526604, + "loss": 2.7014, + "step": 3683 + }, + { + "epoch": 0.297312565571786, + "grad_norm": 0.7380958199501038, + "learning_rate": 0.00018441587025625554, + "loss": 2.6665, + "step": 3684 + }, + { + "epoch": 0.29739326930836896, + "grad_norm": 0.7623556852340698, + "learning_rate": 0.00018440740595338087, + "loss": 2.6955, + "step": 3685 + }, + { + "epoch": 0.297473973044952, + "grad_norm": 0.8204537630081177, + "learning_rate": 0.000184398939546853, + "loss": 2.6854, + "step": 3686 + }, + { + "epoch": 0.29755467678153497, + "grad_norm": 0.7346726655960083, + "learning_rate": 0.00018439047103688293, + "loss": 2.6664, + "step": 3687 + }, + { + "epoch": 0.297635380518118, + "grad_norm": 0.777860701084137, + "learning_rate": 0.00018438200042368173, + "loss": 2.6423, + "step": 3688 + }, + { + "epoch": 0.297716084254701, + "grad_norm": 0.7331553101539612, + "learning_rate": 0.00018437352770746054, + "loss": 2.6137, + "step": 3689 + }, + { + "epoch": 0.297796787991284, + "grad_norm": 0.7634466290473938, + "learning_rate": 0.00018436505288843043, + "loss": 2.7266, + "step": 3690 + }, + { + "epoch": 0.297877491727867, + "grad_norm": 0.8151016235351562, + "learning_rate": 0.00018435657596680268, + "loss": 2.7373, + "step": 3691 + }, + { + "epoch": 0.29795819546445, + "grad_norm": 0.7806773781776428, + "learning_rate": 0.00018434809694278857, + "loss": 2.7011, + "step": 3692 + }, + { + "epoch": 0.298038899201033, + "grad_norm": 0.7575243711471558, + "learning_rate": 0.00018433961581659935, + "loss": 2.6601, + "step": 3693 + }, + { + "epoch": 0.298119602937616, + "grad_norm": 0.7527276873588562, + "learning_rate": 0.00018433113258844647, + "loss": 2.6864, + "step": 3694 + }, + { + "epoch": 0.298200306674199, + "grad_norm": 0.8024318218231201, + "learning_rate": 0.0001843226472585413, + "loss": 2.728, + "step": 3695 + }, + { + "epoch": 0.29828101041078203, + "grad_norm": 0.7549982666969299, + "learning_rate": 0.0001843141598270954, + "loss": 2.6834, + "step": 3696 + }, + { + "epoch": 0.298361714147365, + "grad_norm": 0.7699971199035645, + "learning_rate": 0.0001843056702943202, + "loss": 2.7209, + "step": 3697 + }, + { + "epoch": 0.29844241788394804, + "grad_norm": 0.823842465877533, + "learning_rate": 0.0001842971786604273, + "loss": 2.6924, + "step": 3698 + }, + { + "epoch": 0.298523121620531, + "grad_norm": 0.7645791172981262, + "learning_rate": 0.00018428868492562837, + "loss": 2.6821, + "step": 3699 + }, + { + "epoch": 0.29860382535711405, + "grad_norm": 0.7530989050865173, + "learning_rate": 0.00018428018909013506, + "loss": 2.7592, + "step": 3700 + }, + { + "epoch": 0.298684529093697, + "grad_norm": 0.7958168387413025, + "learning_rate": 0.00018427169115415914, + "loss": 2.6925, + "step": 3701 + }, + { + "epoch": 0.29876523283028006, + "grad_norm": 0.7777522802352905, + "learning_rate": 0.00018426319111791242, + "loss": 2.6757, + "step": 3702 + }, + { + "epoch": 0.29884593656686304, + "grad_norm": 0.7418079972267151, + "learning_rate": 0.00018425468898160667, + "loss": 2.6445, + "step": 3703 + }, + { + "epoch": 0.29892664030344607, + "grad_norm": 0.7591132521629333, + "learning_rate": 0.00018424618474545382, + "loss": 2.7157, + "step": 3704 + }, + { + "epoch": 0.29900734404002904, + "grad_norm": 0.7591627836227417, + "learning_rate": 0.00018423767840966586, + "loss": 2.6691, + "step": 3705 + }, + { + "epoch": 0.2990880477766121, + "grad_norm": 0.7934779524803162, + "learning_rate": 0.00018422916997445476, + "loss": 2.7262, + "step": 3706 + }, + { + "epoch": 0.29916875151319505, + "grad_norm": 0.7964254021644592, + "learning_rate": 0.00018422065944003252, + "loss": 2.6196, + "step": 3707 + }, + { + "epoch": 0.2992494552497781, + "grad_norm": 0.7448374032974243, + "learning_rate": 0.0001842121468066113, + "loss": 2.6732, + "step": 3708 + }, + { + "epoch": 0.29933015898636106, + "grad_norm": 0.7813000679016113, + "learning_rate": 0.00018420363207440329, + "loss": 2.6978, + "step": 3709 + }, + { + "epoch": 0.2994108627229441, + "grad_norm": 0.7760851979255676, + "learning_rate": 0.00018419511524362064, + "loss": 2.7466, + "step": 3710 + }, + { + "epoch": 0.29949156645952707, + "grad_norm": 0.7786797881126404, + "learning_rate": 0.00018418659631447564, + "loss": 2.7044, + "step": 3711 + }, + { + "epoch": 0.2995722701961101, + "grad_norm": 0.7860158085823059, + "learning_rate": 0.00018417807528718055, + "loss": 2.6587, + "step": 3712 + }, + { + "epoch": 0.2996529739326931, + "grad_norm": 0.8327339291572571, + "learning_rate": 0.0001841695521619478, + "loss": 2.7112, + "step": 3713 + }, + { + "epoch": 0.2997336776692761, + "grad_norm": 0.7535735368728638, + "learning_rate": 0.00018416102693898982, + "loss": 2.726, + "step": 3714 + }, + { + "epoch": 0.2998143814058591, + "grad_norm": 0.7781090140342712, + "learning_rate": 0.000184152499618519, + "loss": 2.7238, + "step": 3715 + }, + { + "epoch": 0.2998950851424421, + "grad_norm": 0.7700545191764832, + "learning_rate": 0.00018414397020074795, + "loss": 2.7081, + "step": 3716 + }, + { + "epoch": 0.2999757888790251, + "grad_norm": 0.7578303217887878, + "learning_rate": 0.0001841354386858892, + "loss": 2.6591, + "step": 3717 + }, + { + "epoch": 0.30005649261560813, + "grad_norm": 0.7506501078605652, + "learning_rate": 0.00018412690507415538, + "loss": 2.6551, + "step": 3718 + }, + { + "epoch": 0.3001371963521911, + "grad_norm": 0.7869547009468079, + "learning_rate": 0.00018411836936575918, + "loss": 2.7169, + "step": 3719 + }, + { + "epoch": 0.30021790008877414, + "grad_norm": 0.7547428607940674, + "learning_rate": 0.00018410983156091332, + "loss": 2.7498, + "step": 3720 + }, + { + "epoch": 0.3002986038253571, + "grad_norm": 0.7829383015632629, + "learning_rate": 0.0001841012916598306, + "loss": 2.6885, + "step": 3721 + }, + { + "epoch": 0.30037930756194015, + "grad_norm": 0.8469082117080688, + "learning_rate": 0.00018409274966272386, + "loss": 2.7594, + "step": 3722 + }, + { + "epoch": 0.3004600112985231, + "grad_norm": 0.7690171599388123, + "learning_rate": 0.00018408420556980596, + "loss": 2.7892, + "step": 3723 + }, + { + "epoch": 0.3005407150351061, + "grad_norm": 0.7295899987220764, + "learning_rate": 0.00018407565938128987, + "loss": 2.7023, + "step": 3724 + }, + { + "epoch": 0.30062141877168913, + "grad_norm": 0.7249528169631958, + "learning_rate": 0.00018406711109738856, + "loss": 2.7135, + "step": 3725 + }, + { + "epoch": 0.3007021225082721, + "grad_norm": 0.7237234711647034, + "learning_rate": 0.0001840585607183151, + "loss": 2.6117, + "step": 3726 + }, + { + "epoch": 0.30078282624485514, + "grad_norm": 0.7426557540893555, + "learning_rate": 0.00018405000824428256, + "loss": 2.7202, + "step": 3727 + }, + { + "epoch": 0.3008635299814381, + "grad_norm": 0.7572938799858093, + "learning_rate": 0.00018404145367550414, + "loss": 2.7373, + "step": 3728 + }, + { + "epoch": 0.30094423371802115, + "grad_norm": 0.7198675274848938, + "learning_rate": 0.00018403289701219295, + "loss": 2.6675, + "step": 3729 + }, + { + "epoch": 0.3010249374546041, + "grad_norm": 0.722532331943512, + "learning_rate": 0.00018402433825456235, + "loss": 2.6933, + "step": 3730 + }, + { + "epoch": 0.30110564119118716, + "grad_norm": 0.7621530890464783, + "learning_rate": 0.0001840157774028256, + "loss": 2.6951, + "step": 3731 + }, + { + "epoch": 0.30118634492777013, + "grad_norm": 0.7435615062713623, + "learning_rate": 0.00018400721445719604, + "loss": 2.7323, + "step": 3732 + }, + { + "epoch": 0.30126704866435317, + "grad_norm": 0.7233619689941406, + "learning_rate": 0.00018399864941788708, + "loss": 2.6789, + "step": 3733 + }, + { + "epoch": 0.30134775240093614, + "grad_norm": 0.7421496510505676, + "learning_rate": 0.00018399008228511224, + "loss": 2.72, + "step": 3734 + }, + { + "epoch": 0.3014284561375192, + "grad_norm": 0.7250909805297852, + "learning_rate": 0.000183981513059085, + "loss": 2.6717, + "step": 3735 + }, + { + "epoch": 0.30150915987410215, + "grad_norm": 0.7642899751663208, + "learning_rate": 0.0001839729417400189, + "loss": 2.6823, + "step": 3736 + }, + { + "epoch": 0.3015898636106852, + "grad_norm": 0.7434508204460144, + "learning_rate": 0.00018396436832812758, + "loss": 2.6441, + "step": 3737 + }, + { + "epoch": 0.30167056734726816, + "grad_norm": 0.7163311839103699, + "learning_rate": 0.00018395579282362473, + "loss": 2.6736, + "step": 3738 + }, + { + "epoch": 0.3017512710838512, + "grad_norm": 0.6936792731285095, + "learning_rate": 0.00018394721522672404, + "loss": 2.6792, + "step": 3739 + }, + { + "epoch": 0.30183197482043417, + "grad_norm": 0.7791975736618042, + "learning_rate": 0.0001839386355376393, + "loss": 2.653, + "step": 3740 + }, + { + "epoch": 0.3019126785570172, + "grad_norm": 0.7902694940567017, + "learning_rate": 0.00018393005375658437, + "loss": 2.7448, + "step": 3741 + }, + { + "epoch": 0.3019933822936002, + "grad_norm": 0.7405624389648438, + "learning_rate": 0.0001839214698837731, + "loss": 2.6977, + "step": 3742 + }, + { + "epoch": 0.3020740860301832, + "grad_norm": 0.8033632040023804, + "learning_rate": 0.00018391288391941943, + "loss": 2.7468, + "step": 3743 + }, + { + "epoch": 0.3021547897667662, + "grad_norm": 0.8148884177207947, + "learning_rate": 0.00018390429586373735, + "loss": 2.6992, + "step": 3744 + }, + { + "epoch": 0.3022354935033492, + "grad_norm": 0.7633625268936157, + "learning_rate": 0.00018389570571694089, + "loss": 2.6604, + "step": 3745 + }, + { + "epoch": 0.3023161972399322, + "grad_norm": 0.8687180876731873, + "learning_rate": 0.00018388711347924413, + "loss": 2.6808, + "step": 3746 + }, + { + "epoch": 0.3023969009765152, + "grad_norm": 0.6974104046821594, + "learning_rate": 0.0001838785191508612, + "loss": 2.7613, + "step": 3747 + }, + { + "epoch": 0.3024776047130982, + "grad_norm": 0.7919288873672485, + "learning_rate": 0.00018386992273200633, + "loss": 2.664, + "step": 3748 + }, + { + "epoch": 0.30255830844968123, + "grad_norm": 0.7708829045295715, + "learning_rate": 0.00018386132422289374, + "loss": 2.7703, + "step": 3749 + }, + { + "epoch": 0.3026390121862642, + "grad_norm": 0.7099813222885132, + "learning_rate": 0.00018385272362373775, + "loss": 2.6485, + "step": 3750 + }, + { + "epoch": 0.30271971592284724, + "grad_norm": 0.7629622220993042, + "learning_rate": 0.0001838441209347527, + "loss": 2.7339, + "step": 3751 + }, + { + "epoch": 0.3028004196594302, + "grad_norm": 0.727275550365448, + "learning_rate": 0.00018383551615615295, + "loss": 2.7194, + "step": 3752 + }, + { + "epoch": 0.30288112339601325, + "grad_norm": 0.7158832550048828, + "learning_rate": 0.00018382690928815302, + "loss": 2.6698, + "step": 3753 + }, + { + "epoch": 0.30296182713259623, + "grad_norm": 0.8075565099716187, + "learning_rate": 0.00018381830033096735, + "loss": 2.7198, + "step": 3754 + }, + { + "epoch": 0.30304253086917926, + "grad_norm": 0.7949094176292419, + "learning_rate": 0.00018380968928481057, + "loss": 2.7048, + "step": 3755 + }, + { + "epoch": 0.30312323460576224, + "grad_norm": 0.7009503841400146, + "learning_rate": 0.00018380107614989724, + "loss": 2.709, + "step": 3756 + }, + { + "epoch": 0.30320393834234527, + "grad_norm": 0.668574869632721, + "learning_rate": 0.00018379246092644204, + "loss": 2.6515, + "step": 3757 + }, + { + "epoch": 0.30328464207892825, + "grad_norm": 0.7470806241035461, + "learning_rate": 0.00018378384361465968, + "loss": 2.7577, + "step": 3758 + }, + { + "epoch": 0.3033653458155113, + "grad_norm": 0.7529913783073425, + "learning_rate": 0.0001837752242147649, + "loss": 2.7189, + "step": 3759 + }, + { + "epoch": 0.30344604955209425, + "grad_norm": 0.7373302578926086, + "learning_rate": 0.00018376660272697258, + "loss": 2.7197, + "step": 3760 + }, + { + "epoch": 0.3035267532886773, + "grad_norm": 0.7650466561317444, + "learning_rate": 0.0001837579791514975, + "loss": 2.6613, + "step": 3761 + }, + { + "epoch": 0.30360745702526026, + "grad_norm": 0.775209903717041, + "learning_rate": 0.00018374935348855468, + "loss": 2.6454, + "step": 3762 + }, + { + "epoch": 0.3036881607618433, + "grad_norm": 0.7049290537834167, + "learning_rate": 0.00018374072573835903, + "loss": 2.6663, + "step": 3763 + }, + { + "epoch": 0.30376886449842627, + "grad_norm": 0.7060630917549133, + "learning_rate": 0.0001837320959011256, + "loss": 2.6908, + "step": 3764 + }, + { + "epoch": 0.3038495682350093, + "grad_norm": 0.7561464905738831, + "learning_rate": 0.00018372346397706944, + "loss": 2.673, + "step": 3765 + }, + { + "epoch": 0.3039302719715923, + "grad_norm": 0.7293568849563599, + "learning_rate": 0.0001837148299664057, + "loss": 2.6431, + "step": 3766 + }, + { + "epoch": 0.3040109757081753, + "grad_norm": 0.8460379838943481, + "learning_rate": 0.00018370619386934962, + "loss": 2.7493, + "step": 3767 + }, + { + "epoch": 0.3040916794447583, + "grad_norm": 0.8136082291603088, + "learning_rate": 0.00018369755568611632, + "loss": 2.7298, + "step": 3768 + }, + { + "epoch": 0.3041723831813413, + "grad_norm": 0.6916636824607849, + "learning_rate": 0.00018368891541692116, + "loss": 2.7173, + "step": 3769 + }, + { + "epoch": 0.3042530869179243, + "grad_norm": 0.7547643780708313, + "learning_rate": 0.0001836802730619795, + "loss": 2.6343, + "step": 3770 + }, + { + "epoch": 0.30433379065450733, + "grad_norm": 0.7439205050468445, + "learning_rate": 0.00018367162862150665, + "loss": 2.6627, + "step": 3771 + }, + { + "epoch": 0.3044144943910903, + "grad_norm": 0.7781087756156921, + "learning_rate": 0.0001836629820957181, + "loss": 2.7223, + "step": 3772 + }, + { + "epoch": 0.30449519812767334, + "grad_norm": 0.7876880764961243, + "learning_rate": 0.00018365433348482935, + "loss": 2.7139, + "step": 3773 + }, + { + "epoch": 0.3045759018642563, + "grad_norm": 0.7571346163749695, + "learning_rate": 0.00018364568278905595, + "loss": 2.6939, + "step": 3774 + }, + { + "epoch": 0.3046566056008393, + "grad_norm": 0.9011813402175903, + "learning_rate": 0.00018363703000861346, + "loss": 2.7516, + "step": 3775 + }, + { + "epoch": 0.3047373093374223, + "grad_norm": 0.7809761762619019, + "learning_rate": 0.00018362837514371755, + "loss": 2.7587, + "step": 3776 + }, + { + "epoch": 0.3048180130740053, + "grad_norm": 0.7486867308616638, + "learning_rate": 0.00018361971819458393, + "loss": 2.6617, + "step": 3777 + }, + { + "epoch": 0.30489871681058833, + "grad_norm": 0.7434267401695251, + "learning_rate": 0.00018361105916142836, + "loss": 2.7328, + "step": 3778 + }, + { + "epoch": 0.3049794205471713, + "grad_norm": 0.7895822525024414, + "learning_rate": 0.0001836023980444666, + "loss": 2.7038, + "step": 3779 + }, + { + "epoch": 0.30506012428375434, + "grad_norm": 0.7329267263412476, + "learning_rate": 0.00018359373484391458, + "loss": 2.6533, + "step": 3780 + }, + { + "epoch": 0.3051408280203373, + "grad_norm": 0.7578477263450623, + "learning_rate": 0.00018358506955998817, + "loss": 2.723, + "step": 3781 + }, + { + "epoch": 0.30522153175692035, + "grad_norm": 0.7174215316772461, + "learning_rate": 0.0001835764021929033, + "loss": 2.7665, + "step": 3782 + }, + { + "epoch": 0.3053022354935033, + "grad_norm": 0.7261673808097839, + "learning_rate": 0.00018356773274287605, + "loss": 2.7239, + "step": 3783 + }, + { + "epoch": 0.30538293923008636, + "grad_norm": 0.7550768852233887, + "learning_rate": 0.00018355906121012244, + "loss": 2.6952, + "step": 3784 + }, + { + "epoch": 0.30546364296666934, + "grad_norm": 0.7805373668670654, + "learning_rate": 0.0001835503875948586, + "loss": 2.6453, + "step": 3785 + }, + { + "epoch": 0.30554434670325237, + "grad_norm": 0.7753674983978271, + "learning_rate": 0.0001835417118973007, + "loss": 2.7188, + "step": 3786 + }, + { + "epoch": 0.30562505043983534, + "grad_norm": 0.719774603843689, + "learning_rate": 0.00018353303411766496, + "loss": 2.69, + "step": 3787 + }, + { + "epoch": 0.3057057541764184, + "grad_norm": 0.786780059337616, + "learning_rate": 0.00018352435425616763, + "loss": 2.7015, + "step": 3788 + }, + { + "epoch": 0.30578645791300135, + "grad_norm": 0.7481613159179688, + "learning_rate": 0.00018351567231302508, + "loss": 2.6267, + "step": 3789 + }, + { + "epoch": 0.3058671616495844, + "grad_norm": 0.8138384222984314, + "learning_rate": 0.00018350698828845365, + "loss": 2.7301, + "step": 3790 + }, + { + "epoch": 0.30594786538616736, + "grad_norm": 0.7911081314086914, + "learning_rate": 0.00018349830218266982, + "loss": 2.6661, + "step": 3791 + }, + { + "epoch": 0.3060285691227504, + "grad_norm": 0.763179361820221, + "learning_rate": 0.00018348961399588997, + "loss": 2.6509, + "step": 3792 + }, + { + "epoch": 0.30610927285933337, + "grad_norm": 0.8214982748031616, + "learning_rate": 0.00018348092372833072, + "loss": 2.6951, + "step": 3793 + }, + { + "epoch": 0.3061899765959164, + "grad_norm": 0.7271003127098083, + "learning_rate": 0.00018347223138020865, + "loss": 2.7227, + "step": 3794 + }, + { + "epoch": 0.3062706803324994, + "grad_norm": 0.7727730870246887, + "learning_rate": 0.00018346353695174037, + "loss": 2.721, + "step": 3795 + }, + { + "epoch": 0.3063513840690824, + "grad_norm": 0.844895601272583, + "learning_rate": 0.00018345484044314257, + "loss": 2.6757, + "step": 3796 + }, + { + "epoch": 0.3064320878056654, + "grad_norm": 0.7409898638725281, + "learning_rate": 0.00018344614185463197, + "loss": 2.6798, + "step": 3797 + }, + { + "epoch": 0.3065127915422484, + "grad_norm": 0.8284425139427185, + "learning_rate": 0.00018343744118642542, + "loss": 2.7573, + "step": 3798 + }, + { + "epoch": 0.3065934952788314, + "grad_norm": 0.7535427808761597, + "learning_rate": 0.00018342873843873973, + "loss": 2.7026, + "step": 3799 + }, + { + "epoch": 0.30667419901541443, + "grad_norm": 0.8013898730278015, + "learning_rate": 0.00018342003361179176, + "loss": 2.7331, + "step": 3800 + }, + { + "epoch": 0.3067549027519974, + "grad_norm": 0.7458386421203613, + "learning_rate": 0.0001834113267057985, + "loss": 2.6976, + "step": 3801 + }, + { + "epoch": 0.30683560648858044, + "grad_norm": 0.8333673477172852, + "learning_rate": 0.00018340261772097695, + "loss": 2.7064, + "step": 3802 + }, + { + "epoch": 0.3069163102251634, + "grad_norm": 0.7273485064506531, + "learning_rate": 0.00018339390665754414, + "loss": 2.6619, + "step": 3803 + }, + { + "epoch": 0.30699701396174645, + "grad_norm": 0.8199014067649841, + "learning_rate": 0.0001833851935157172, + "loss": 2.654, + "step": 3804 + }, + { + "epoch": 0.3070777176983294, + "grad_norm": 0.780197024345398, + "learning_rate": 0.00018337647829571324, + "loss": 2.6814, + "step": 3805 + }, + { + "epoch": 0.30715842143491245, + "grad_norm": 0.7214049100875854, + "learning_rate": 0.0001833677609977495, + "loss": 2.709, + "step": 3806 + }, + { + "epoch": 0.30723912517149543, + "grad_norm": 0.7680457830429077, + "learning_rate": 0.00018335904162204326, + "loss": 2.6628, + "step": 3807 + }, + { + "epoch": 0.30731982890807846, + "grad_norm": 0.760728120803833, + "learning_rate": 0.00018335032016881178, + "loss": 2.7005, + "step": 3808 + }, + { + "epoch": 0.30740053264466144, + "grad_norm": 0.7631687521934509, + "learning_rate": 0.00018334159663827243, + "loss": 2.7012, + "step": 3809 + }, + { + "epoch": 0.30748123638124447, + "grad_norm": 0.7515785694122314, + "learning_rate": 0.00018333287103064266, + "loss": 2.7062, + "step": 3810 + }, + { + "epoch": 0.30756194011782745, + "grad_norm": 0.804500162601471, + "learning_rate": 0.00018332414334613987, + "loss": 2.7888, + "step": 3811 + }, + { + "epoch": 0.3076426438544105, + "grad_norm": 0.7551451325416565, + "learning_rate": 0.00018331541358498164, + "loss": 2.6345, + "step": 3812 + }, + { + "epoch": 0.30772334759099346, + "grad_norm": 0.7342958450317383, + "learning_rate": 0.0001833066817473855, + "loss": 2.6601, + "step": 3813 + }, + { + "epoch": 0.3078040513275765, + "grad_norm": 0.8059296607971191, + "learning_rate": 0.0001832979478335691, + "loss": 2.7694, + "step": 3814 + }, + { + "epoch": 0.30788475506415947, + "grad_norm": 0.7037352919578552, + "learning_rate": 0.0001832892118437501, + "loss": 2.6788, + "step": 3815 + }, + { + "epoch": 0.3079654588007425, + "grad_norm": 0.759509801864624, + "learning_rate": 0.0001832804737781462, + "loss": 2.7115, + "step": 3816 + }, + { + "epoch": 0.3080461625373255, + "grad_norm": 0.7911720871925354, + "learning_rate": 0.00018327173363697524, + "loss": 2.6676, + "step": 3817 + }, + { + "epoch": 0.3081268662739085, + "grad_norm": 0.7592991590499878, + "learning_rate": 0.00018326299142045496, + "loss": 2.7245, + "step": 3818 + }, + { + "epoch": 0.3082075700104915, + "grad_norm": 0.7620227932929993, + "learning_rate": 0.00018325424712880333, + "loss": 2.7224, + "step": 3819 + }, + { + "epoch": 0.3082882737470745, + "grad_norm": 0.7834638953208923, + "learning_rate": 0.0001832455007622382, + "loss": 2.7469, + "step": 3820 + }, + { + "epoch": 0.3083689774836575, + "grad_norm": 0.7765992879867554, + "learning_rate": 0.00018323675232097757, + "loss": 2.7193, + "step": 3821 + }, + { + "epoch": 0.3084496812202405, + "grad_norm": 0.7334728837013245, + "learning_rate": 0.00018322800180523949, + "loss": 2.667, + "step": 3822 + }, + { + "epoch": 0.3085303849568235, + "grad_norm": 0.7674607634544373, + "learning_rate": 0.00018321924921524207, + "loss": 2.6479, + "step": 3823 + }, + { + "epoch": 0.30861108869340653, + "grad_norm": 0.7616469860076904, + "learning_rate": 0.0001832104945512034, + "loss": 2.6535, + "step": 3824 + }, + { + "epoch": 0.3086917924299895, + "grad_norm": 0.7693164944648743, + "learning_rate": 0.00018320173781334172, + "loss": 2.7616, + "step": 3825 + }, + { + "epoch": 0.3087724961665725, + "grad_norm": 0.7099221348762512, + "learning_rate": 0.0001831929790018752, + "loss": 2.6729, + "step": 3826 + }, + { + "epoch": 0.3088531999031555, + "grad_norm": 0.7389346957206726, + "learning_rate": 0.00018318421811702222, + "loss": 2.6396, + "step": 3827 + }, + { + "epoch": 0.3089339036397385, + "grad_norm": 0.8302628397941589, + "learning_rate": 0.00018317545515900106, + "loss": 2.6786, + "step": 3828 + }, + { + "epoch": 0.3090146073763215, + "grad_norm": 0.7441998720169067, + "learning_rate": 0.00018316669012803015, + "loss": 2.6769, + "step": 3829 + }, + { + "epoch": 0.3090953111129045, + "grad_norm": 0.8454675674438477, + "learning_rate": 0.00018315792302432788, + "loss": 2.7275, + "step": 3830 + }, + { + "epoch": 0.30917601484948753, + "grad_norm": 0.8129739761352539, + "learning_rate": 0.00018314915384811282, + "loss": 2.7603, + "step": 3831 + }, + { + "epoch": 0.3092567185860705, + "grad_norm": 0.7525617480278015, + "learning_rate": 0.00018314038259960349, + "loss": 2.7156, + "step": 3832 + }, + { + "epoch": 0.30933742232265354, + "grad_norm": 0.7319022417068481, + "learning_rate": 0.0001831316092790185, + "loss": 2.676, + "step": 3833 + }, + { + "epoch": 0.3094181260592365, + "grad_norm": 0.7767768502235413, + "learning_rate": 0.00018312283388657646, + "loss": 2.7022, + "step": 3834 + }, + { + "epoch": 0.30949882979581955, + "grad_norm": 0.709293007850647, + "learning_rate": 0.00018311405642249616, + "loss": 2.6241, + "step": 3835 + }, + { + "epoch": 0.30957953353240253, + "grad_norm": 0.715360701084137, + "learning_rate": 0.0001831052768869963, + "loss": 2.6777, + "step": 3836 + }, + { + "epoch": 0.30966023726898556, + "grad_norm": 0.7361319065093994, + "learning_rate": 0.0001830964952802957, + "loss": 2.6539, + "step": 3837 + }, + { + "epoch": 0.30974094100556854, + "grad_norm": 0.7243087291717529, + "learning_rate": 0.0001830877116026132, + "loss": 2.7506, + "step": 3838 + }, + { + "epoch": 0.30982164474215157, + "grad_norm": 0.7361106872558594, + "learning_rate": 0.00018307892585416776, + "loss": 2.697, + "step": 3839 + }, + { + "epoch": 0.30990234847873455, + "grad_norm": 0.7541893720626831, + "learning_rate": 0.00018307013803517833, + "loss": 2.694, + "step": 3840 + }, + { + "epoch": 0.3099830522153176, + "grad_norm": 0.7235575914382935, + "learning_rate": 0.00018306134814586388, + "loss": 2.6711, + "step": 3841 + }, + { + "epoch": 0.31006375595190055, + "grad_norm": 0.7868196368217468, + "learning_rate": 0.00018305255618644354, + "loss": 2.7177, + "step": 3842 + }, + { + "epoch": 0.3101444596884836, + "grad_norm": 0.8074443340301514, + "learning_rate": 0.00018304376215713637, + "loss": 2.7293, + "step": 3843 + }, + { + "epoch": 0.31022516342506656, + "grad_norm": 0.6993385553359985, + "learning_rate": 0.00018303496605816158, + "loss": 2.6942, + "step": 3844 + }, + { + "epoch": 0.3103058671616496, + "grad_norm": 0.7272824645042419, + "learning_rate": 0.00018302616788973839, + "loss": 2.7093, + "step": 3845 + }, + { + "epoch": 0.31038657089823257, + "grad_norm": 0.7496963143348694, + "learning_rate": 0.00018301736765208605, + "loss": 2.7096, + "step": 3846 + }, + { + "epoch": 0.3104672746348156, + "grad_norm": 0.7407644987106323, + "learning_rate": 0.00018300856534542387, + "loss": 2.6956, + "step": 3847 + }, + { + "epoch": 0.3105479783713986, + "grad_norm": 0.742382287979126, + "learning_rate": 0.00018299976096997132, + "loss": 2.6744, + "step": 3848 + }, + { + "epoch": 0.3106286821079816, + "grad_norm": 0.7314567565917969, + "learning_rate": 0.0001829909545259477, + "loss": 2.7544, + "step": 3849 + }, + { + "epoch": 0.3107093858445646, + "grad_norm": 0.7550896406173706, + "learning_rate": 0.0001829821460135726, + "loss": 2.714, + "step": 3850 + }, + { + "epoch": 0.3107900895811476, + "grad_norm": 0.7496031522750854, + "learning_rate": 0.00018297333543306548, + "loss": 2.6718, + "step": 3851 + }, + { + "epoch": 0.3108707933177306, + "grad_norm": 0.7600073218345642, + "learning_rate": 0.00018296452278464596, + "loss": 2.7141, + "step": 3852 + }, + { + "epoch": 0.31095149705431363, + "grad_norm": 0.7242388129234314, + "learning_rate": 0.00018295570806853366, + "loss": 2.7407, + "step": 3853 + }, + { + "epoch": 0.3110322007908966, + "grad_norm": 0.723874568939209, + "learning_rate": 0.00018294689128494824, + "loss": 2.7253, + "step": 3854 + }, + { + "epoch": 0.31111290452747964, + "grad_norm": 0.7902834415435791, + "learning_rate": 0.00018293807243410947, + "loss": 2.7118, + "step": 3855 + }, + { + "epoch": 0.3111936082640626, + "grad_norm": 0.7676794528961182, + "learning_rate": 0.00018292925151623717, + "loss": 2.684, + "step": 3856 + }, + { + "epoch": 0.31127431200064565, + "grad_norm": 0.767431378364563, + "learning_rate": 0.0001829204285315511, + "loss": 2.6936, + "step": 3857 + }, + { + "epoch": 0.3113550157372286, + "grad_norm": 0.7802234888076782, + "learning_rate": 0.00018291160348027122, + "loss": 2.7181, + "step": 3858 + }, + { + "epoch": 0.31143571947381166, + "grad_norm": 0.7823610305786133, + "learning_rate": 0.00018290277636261743, + "loss": 2.7014, + "step": 3859 + }, + { + "epoch": 0.31151642321039463, + "grad_norm": 0.8199869394302368, + "learning_rate": 0.00018289394717880978, + "loss": 2.73, + "step": 3860 + }, + { + "epoch": 0.31159712694697766, + "grad_norm": 0.7725761532783508, + "learning_rate": 0.00018288511592906822, + "loss": 2.6978, + "step": 3861 + }, + { + "epoch": 0.31167783068356064, + "grad_norm": 0.752034068107605, + "learning_rate": 0.00018287628261361296, + "loss": 2.6635, + "step": 3862 + }, + { + "epoch": 0.3117585344201437, + "grad_norm": 0.7961714267730713, + "learning_rate": 0.0001828674472326641, + "loss": 2.7047, + "step": 3863 + }, + { + "epoch": 0.31183923815672665, + "grad_norm": 0.7413069605827332, + "learning_rate": 0.00018285860978644182, + "loss": 2.6872, + "step": 3864 + }, + { + "epoch": 0.3119199418933097, + "grad_norm": 0.8943146467208862, + "learning_rate": 0.00018284977027516636, + "loss": 2.7611, + "step": 3865 + }, + { + "epoch": 0.31200064562989266, + "grad_norm": 0.7663856744766235, + "learning_rate": 0.0001828409286990581, + "loss": 2.7541, + "step": 3866 + }, + { + "epoch": 0.3120813493664757, + "grad_norm": 0.7557348608970642, + "learning_rate": 0.00018283208505833731, + "loss": 2.6633, + "step": 3867 + }, + { + "epoch": 0.31216205310305867, + "grad_norm": 0.7690094113349915, + "learning_rate": 0.00018282323935322445, + "loss": 2.7117, + "step": 3868 + }, + { + "epoch": 0.3122427568396417, + "grad_norm": 0.8059033751487732, + "learning_rate": 0.00018281439158393997, + "loss": 2.6743, + "step": 3869 + }, + { + "epoch": 0.3123234605762247, + "grad_norm": 0.7877150774002075, + "learning_rate": 0.00018280554175070438, + "loss": 2.6546, + "step": 3870 + }, + { + "epoch": 0.3124041643128077, + "grad_norm": 0.799670934677124, + "learning_rate": 0.0001827966898537382, + "loss": 2.7184, + "step": 3871 + }, + { + "epoch": 0.3124848680493907, + "grad_norm": 0.8353915214538574, + "learning_rate": 0.0001827878358932621, + "loss": 2.7235, + "step": 3872 + }, + { + "epoch": 0.3125655717859737, + "grad_norm": 0.7954776883125305, + "learning_rate": 0.00018277897986949672, + "loss": 2.5992, + "step": 3873 + }, + { + "epoch": 0.3126462755225567, + "grad_norm": 0.7959856986999512, + "learning_rate": 0.00018277012178266277, + "loss": 2.6877, + "step": 3874 + }, + { + "epoch": 0.3127269792591397, + "grad_norm": 0.8220208883285522, + "learning_rate": 0.00018276126163298102, + "loss": 2.6891, + "step": 3875 + }, + { + "epoch": 0.3128076829957227, + "grad_norm": 0.7827965021133423, + "learning_rate": 0.0001827523994206723, + "loss": 2.7271, + "step": 3876 + }, + { + "epoch": 0.3128883867323057, + "grad_norm": 0.764369010925293, + "learning_rate": 0.00018274353514595746, + "loss": 2.6661, + "step": 3877 + }, + { + "epoch": 0.3129690904688887, + "grad_norm": 0.7440944314002991, + "learning_rate": 0.00018273466880905744, + "loss": 2.6621, + "step": 3878 + }, + { + "epoch": 0.3130497942054717, + "grad_norm": 0.8544813394546509, + "learning_rate": 0.00018272580041019319, + "loss": 2.7168, + "step": 3879 + }, + { + "epoch": 0.3131304979420547, + "grad_norm": 0.7232592701911926, + "learning_rate": 0.00018271692994958577, + "loss": 2.6666, + "step": 3880 + }, + { + "epoch": 0.3132112016786377, + "grad_norm": 0.750525712966919, + "learning_rate": 0.00018270805742745617, + "loss": 2.6984, + "step": 3881 + }, + { + "epoch": 0.31329190541522073, + "grad_norm": 0.8195550441741943, + "learning_rate": 0.00018269918284402565, + "loss": 2.7183, + "step": 3882 + }, + { + "epoch": 0.3133726091518037, + "grad_norm": 0.7695632576942444, + "learning_rate": 0.0001826903061995153, + "loss": 2.7092, + "step": 3883 + }, + { + "epoch": 0.31345331288838674, + "grad_norm": 0.7631582617759705, + "learning_rate": 0.0001826814274941463, + "loss": 2.7061, + "step": 3884 + }, + { + "epoch": 0.3135340166249697, + "grad_norm": 0.8318471908569336, + "learning_rate": 0.0001826725467281401, + "loss": 2.694, + "step": 3885 + }, + { + "epoch": 0.31361472036155275, + "grad_norm": 0.7313492298126221, + "learning_rate": 0.00018266366390171784, + "loss": 2.6729, + "step": 3886 + }, + { + "epoch": 0.3136954240981357, + "grad_norm": 0.7508631944656372, + "learning_rate": 0.00018265477901510105, + "loss": 2.731, + "step": 3887 + }, + { + "epoch": 0.31377612783471875, + "grad_norm": 0.8106402158737183, + "learning_rate": 0.00018264589206851107, + "loss": 2.7113, + "step": 3888 + }, + { + "epoch": 0.31385683157130173, + "grad_norm": 0.771542489528656, + "learning_rate": 0.00018263700306216945, + "loss": 2.644, + "step": 3889 + }, + { + "epoch": 0.31393753530788476, + "grad_norm": 0.812441885471344, + "learning_rate": 0.00018262811199629768, + "loss": 2.6889, + "step": 3890 + }, + { + "epoch": 0.31401823904446774, + "grad_norm": 0.8231199979782104, + "learning_rate": 0.00018261921887111738, + "loss": 2.6466, + "step": 3891 + }, + { + "epoch": 0.31409894278105077, + "grad_norm": 0.7492454051971436, + "learning_rate": 0.00018261032368685012, + "loss": 2.6693, + "step": 3892 + }, + { + "epoch": 0.31417964651763375, + "grad_norm": 0.7651814222335815, + "learning_rate": 0.00018260142644371772, + "loss": 2.6569, + "step": 3893 + }, + { + "epoch": 0.3142603502542168, + "grad_norm": 0.7504465579986572, + "learning_rate": 0.0001825925271419418, + "loss": 2.684, + "step": 3894 + }, + { + "epoch": 0.31434105399079976, + "grad_norm": 0.749650239944458, + "learning_rate": 0.00018258362578174424, + "loss": 2.6482, + "step": 3895 + }, + { + "epoch": 0.3144217577273828, + "grad_norm": 0.8445256352424622, + "learning_rate": 0.00018257472236334686, + "loss": 2.727, + "step": 3896 + }, + { + "epoch": 0.31450246146396577, + "grad_norm": 0.7628257870674133, + "learning_rate": 0.0001825658168869715, + "loss": 2.7314, + "step": 3897 + }, + { + "epoch": 0.3145831652005488, + "grad_norm": 0.7738446593284607, + "learning_rate": 0.00018255690935284019, + "loss": 2.7478, + "step": 3898 + }, + { + "epoch": 0.3146638689371318, + "grad_norm": 0.7578958868980408, + "learning_rate": 0.00018254799976117486, + "loss": 2.6922, + "step": 3899 + }, + { + "epoch": 0.3147445726737148, + "grad_norm": 0.8367362022399902, + "learning_rate": 0.00018253908811219764, + "loss": 2.7347, + "step": 3900 + }, + { + "epoch": 0.3148252764102978, + "grad_norm": 0.7530354857444763, + "learning_rate": 0.00018253017440613057, + "loss": 2.7151, + "step": 3901 + }, + { + "epoch": 0.3149059801468808, + "grad_norm": 0.7168053388595581, + "learning_rate": 0.00018252125864319578, + "loss": 2.7072, + "step": 3902 + }, + { + "epoch": 0.3149866838834638, + "grad_norm": 0.7480056285858154, + "learning_rate": 0.00018251234082361555, + "loss": 2.6489, + "step": 3903 + }, + { + "epoch": 0.3150673876200468, + "grad_norm": 0.8563880324363708, + "learning_rate": 0.0001825034209476121, + "loss": 2.7384, + "step": 3904 + }, + { + "epoch": 0.3151480913566298, + "grad_norm": 0.7959346771240234, + "learning_rate": 0.0001824944990154077, + "loss": 2.631, + "step": 3905 + }, + { + "epoch": 0.31522879509321283, + "grad_norm": 0.7385980486869812, + "learning_rate": 0.00018248557502722476, + "loss": 2.7394, + "step": 3906 + }, + { + "epoch": 0.3153094988297958, + "grad_norm": 0.7682650685310364, + "learning_rate": 0.00018247664898328567, + "loss": 2.7327, + "step": 3907 + }, + { + "epoch": 0.31539020256637884, + "grad_norm": 0.7720316648483276, + "learning_rate": 0.0001824677208838129, + "loss": 2.6442, + "step": 3908 + }, + { + "epoch": 0.3154709063029618, + "grad_norm": 0.7927379608154297, + "learning_rate": 0.00018245879072902895, + "loss": 2.7738, + "step": 3909 + }, + { + "epoch": 0.31555161003954485, + "grad_norm": 0.7506012916564941, + "learning_rate": 0.00018244985851915637, + "loss": 2.6825, + "step": 3910 + }, + { + "epoch": 0.3156323137761278, + "grad_norm": 0.6996353268623352, + "learning_rate": 0.00018244092425441781, + "loss": 2.6783, + "step": 3911 + }, + { + "epoch": 0.31571301751271086, + "grad_norm": 0.8039344549179077, + "learning_rate": 0.00018243198793503588, + "loss": 2.7628, + "step": 3912 + }, + { + "epoch": 0.31579372124929384, + "grad_norm": 0.7890963554382324, + "learning_rate": 0.0001824230495612334, + "loss": 2.7512, + "step": 3913 + }, + { + "epoch": 0.31587442498587687, + "grad_norm": 0.7470870614051819, + "learning_rate": 0.00018241410913323301, + "loss": 2.7058, + "step": 3914 + }, + { + "epoch": 0.31595512872245984, + "grad_norm": 0.7056336402893066, + "learning_rate": 0.0001824051666512576, + "loss": 2.6091, + "step": 3915 + }, + { + "epoch": 0.3160358324590429, + "grad_norm": 0.7818490862846375, + "learning_rate": 0.00018239622211553002, + "loss": 2.7509, + "step": 3916 + }, + { + "epoch": 0.31611653619562585, + "grad_norm": 0.7590607404708862, + "learning_rate": 0.0001823872755262732, + "loss": 2.7238, + "step": 3917 + }, + { + "epoch": 0.3161972399322089, + "grad_norm": 0.7157841920852661, + "learning_rate": 0.00018237832688371014, + "loss": 2.6639, + "step": 3918 + }, + { + "epoch": 0.31627794366879186, + "grad_norm": 0.7515804171562195, + "learning_rate": 0.00018236937618806382, + "loss": 2.6973, + "step": 3919 + }, + { + "epoch": 0.3163586474053749, + "grad_norm": 0.6691949963569641, + "learning_rate": 0.00018236042343955733, + "loss": 2.727, + "step": 3920 + }, + { + "epoch": 0.31643935114195787, + "grad_norm": 0.8122327327728271, + "learning_rate": 0.0001823514686384138, + "loss": 2.7513, + "step": 3921 + }, + { + "epoch": 0.3165200548785409, + "grad_norm": 0.7813653349876404, + "learning_rate": 0.0001823425117848564, + "loss": 2.7037, + "step": 3922 + }, + { + "epoch": 0.3166007586151239, + "grad_norm": 0.6869354844093323, + "learning_rate": 0.00018233355287910834, + "loss": 2.693, + "step": 3923 + }, + { + "epoch": 0.3166814623517069, + "grad_norm": 0.7773037552833557, + "learning_rate": 0.00018232459192139296, + "loss": 2.687, + "step": 3924 + }, + { + "epoch": 0.3167621660882899, + "grad_norm": 0.7644256949424744, + "learning_rate": 0.00018231562891193352, + "loss": 2.6753, + "step": 3925 + }, + { + "epoch": 0.3168428698248729, + "grad_norm": 0.8427005410194397, + "learning_rate": 0.00018230666385095343, + "loss": 2.6641, + "step": 3926 + }, + { + "epoch": 0.3169235735614559, + "grad_norm": 0.7194599509239197, + "learning_rate": 0.0001822976967386761, + "loss": 2.7091, + "step": 3927 + }, + { + "epoch": 0.3170042772980389, + "grad_norm": 0.7710655331611633, + "learning_rate": 0.00018228872757532512, + "loss": 2.6938, + "step": 3928 + }, + { + "epoch": 0.3170849810346219, + "grad_norm": 0.8003759980201721, + "learning_rate": 0.0001822797563611239, + "loss": 2.7019, + "step": 3929 + }, + { + "epoch": 0.3171656847712049, + "grad_norm": 0.7960470914840698, + "learning_rate": 0.00018227078309629606, + "loss": 2.661, + "step": 3930 + }, + { + "epoch": 0.3172463885077879, + "grad_norm": 0.7731126546859741, + "learning_rate": 0.00018226180778106526, + "loss": 2.7023, + "step": 3931 + }, + { + "epoch": 0.3173270922443709, + "grad_norm": 0.7561383843421936, + "learning_rate": 0.00018225283041565515, + "loss": 2.6768, + "step": 3932 + }, + { + "epoch": 0.3174077959809539, + "grad_norm": 0.7578409910202026, + "learning_rate": 0.0001822438510002895, + "loss": 2.7145, + "step": 3933 + }, + { + "epoch": 0.3174884997175369, + "grad_norm": 0.7901952862739563, + "learning_rate": 0.00018223486953519214, + "loss": 2.7121, + "step": 3934 + }, + { + "epoch": 0.31756920345411993, + "grad_norm": 0.82305908203125, + "learning_rate": 0.0001822258860205868, + "loss": 2.7553, + "step": 3935 + }, + { + "epoch": 0.3176499071907029, + "grad_norm": 0.748055636882782, + "learning_rate": 0.0001822169004566975, + "loss": 2.7236, + "step": 3936 + }, + { + "epoch": 0.31773061092728594, + "grad_norm": 0.7981358766555786, + "learning_rate": 0.0001822079128437481, + "loss": 2.7444, + "step": 3937 + }, + { + "epoch": 0.3178113146638689, + "grad_norm": 0.7938945889472961, + "learning_rate": 0.0001821989231819626, + "loss": 2.7512, + "step": 3938 + }, + { + "epoch": 0.31789201840045195, + "grad_norm": 0.7250397205352783, + "learning_rate": 0.0001821899314715651, + "loss": 2.6843, + "step": 3939 + }, + { + "epoch": 0.3179727221370349, + "grad_norm": 0.8844723701477051, + "learning_rate": 0.00018218093771277965, + "loss": 2.6295, + "step": 3940 + }, + { + "epoch": 0.31805342587361796, + "grad_norm": 0.7545698881149292, + "learning_rate": 0.0001821719419058304, + "loss": 2.7478, + "step": 3941 + }, + { + "epoch": 0.31813412961020093, + "grad_norm": 0.7254738807678223, + "learning_rate": 0.00018216294405094157, + "loss": 2.665, + "step": 3942 + }, + { + "epoch": 0.31821483334678397, + "grad_norm": 0.7664754390716553, + "learning_rate": 0.00018215394414833737, + "loss": 2.7431, + "step": 3943 + }, + { + "epoch": 0.31829553708336694, + "grad_norm": 0.8250303864479065, + "learning_rate": 0.00018214494219824217, + "loss": 2.6957, + "step": 3944 + }, + { + "epoch": 0.31837624081995, + "grad_norm": 0.7425532341003418, + "learning_rate": 0.00018213593820088026, + "loss": 2.666, + "step": 3945 + }, + { + "epoch": 0.31845694455653295, + "grad_norm": 0.6943121552467346, + "learning_rate": 0.00018212693215647604, + "loss": 2.716, + "step": 3946 + }, + { + "epoch": 0.318537648293116, + "grad_norm": 0.732829213142395, + "learning_rate": 0.00018211792406525403, + "loss": 2.6557, + "step": 3947 + }, + { + "epoch": 0.31861835202969896, + "grad_norm": 0.7666537165641785, + "learning_rate": 0.00018210891392743866, + "loss": 2.7275, + "step": 3948 + }, + { + "epoch": 0.318699055766282, + "grad_norm": 0.7652621865272522, + "learning_rate": 0.00018209990174325455, + "loss": 2.6372, + "step": 3949 + }, + { + "epoch": 0.31877975950286497, + "grad_norm": 0.7416055202484131, + "learning_rate": 0.00018209088751292626, + "loss": 2.6688, + "step": 3950 + }, + { + "epoch": 0.318860463239448, + "grad_norm": 0.7504609227180481, + "learning_rate": 0.00018208187123667848, + "loss": 2.6912, + "step": 3951 + }, + { + "epoch": 0.318941166976031, + "grad_norm": 0.7308809757232666, + "learning_rate": 0.00018207285291473588, + "loss": 2.7272, + "step": 3952 + }, + { + "epoch": 0.319021870712614, + "grad_norm": 0.8031618595123291, + "learning_rate": 0.00018206383254732326, + "loss": 2.7354, + "step": 3953 + }, + { + "epoch": 0.319102574449197, + "grad_norm": 0.81386798620224, + "learning_rate": 0.00018205481013466542, + "loss": 2.676, + "step": 3954 + }, + { + "epoch": 0.31918327818578, + "grad_norm": 0.7845911383628845, + "learning_rate": 0.0001820457856769872, + "loss": 2.7094, + "step": 3955 + }, + { + "epoch": 0.319263981922363, + "grad_norm": 0.7189298272132874, + "learning_rate": 0.00018203675917451357, + "loss": 2.6764, + "step": 3956 + }, + { + "epoch": 0.319344685658946, + "grad_norm": 0.8253228664398193, + "learning_rate": 0.00018202773062746944, + "loss": 2.6805, + "step": 3957 + }, + { + "epoch": 0.319425389395529, + "grad_norm": 0.7965289950370789, + "learning_rate": 0.0001820187000360798, + "loss": 2.7148, + "step": 3958 + }, + { + "epoch": 0.31950609313211203, + "grad_norm": 0.7505398988723755, + "learning_rate": 0.0001820096674005698, + "loss": 2.6732, + "step": 3959 + }, + { + "epoch": 0.319586796868695, + "grad_norm": 0.7554877400398254, + "learning_rate": 0.0001820006327211645, + "loss": 2.7467, + "step": 3960 + }, + { + "epoch": 0.31966750060527804, + "grad_norm": 0.7836194038391113, + "learning_rate": 0.00018199159599808907, + "loss": 2.7252, + "step": 3961 + }, + { + "epoch": 0.319748204341861, + "grad_norm": 0.7967261672019958, + "learning_rate": 0.00018198255723156877, + "loss": 2.6814, + "step": 3962 + }, + { + "epoch": 0.31982890807844405, + "grad_norm": 0.7411713600158691, + "learning_rate": 0.00018197351642182882, + "loss": 2.6928, + "step": 3963 + }, + { + "epoch": 0.31990961181502703, + "grad_norm": 0.6961422562599182, + "learning_rate": 0.00018196447356909454, + "loss": 2.6651, + "step": 3964 + }, + { + "epoch": 0.31999031555161006, + "grad_norm": 0.7245771884918213, + "learning_rate": 0.00018195542867359134, + "loss": 2.6726, + "step": 3965 + }, + { + "epoch": 0.32007101928819304, + "grad_norm": 0.784654974937439, + "learning_rate": 0.00018194638173554462, + "loss": 2.6829, + "step": 3966 + }, + { + "epoch": 0.32015172302477607, + "grad_norm": 0.7373329997062683, + "learning_rate": 0.00018193733275517985, + "loss": 2.6481, + "step": 3967 + }, + { + "epoch": 0.32023242676135905, + "grad_norm": 0.7878682613372803, + "learning_rate": 0.00018192828173272258, + "loss": 2.6701, + "step": 3968 + }, + { + "epoch": 0.3203131304979421, + "grad_norm": 0.759676992893219, + "learning_rate": 0.00018191922866839835, + "loss": 2.7218, + "step": 3969 + }, + { + "epoch": 0.32039383423452505, + "grad_norm": 0.7923088669776917, + "learning_rate": 0.00018191017356243282, + "loss": 2.6841, + "step": 3970 + }, + { + "epoch": 0.3204745379711081, + "grad_norm": 0.7084882855415344, + "learning_rate": 0.00018190111641505164, + "loss": 2.7167, + "step": 3971 + }, + { + "epoch": 0.32055524170769106, + "grad_norm": 0.7166235446929932, + "learning_rate": 0.00018189205722648054, + "loss": 2.6647, + "step": 3972 + }, + { + "epoch": 0.3206359454442741, + "grad_norm": 0.7997722029685974, + "learning_rate": 0.0001818829959969453, + "loss": 2.7199, + "step": 3973 + }, + { + "epoch": 0.32071664918085707, + "grad_norm": 0.8309516310691833, + "learning_rate": 0.0001818739327266718, + "loss": 2.8006, + "step": 3974 + }, + { + "epoch": 0.3207973529174401, + "grad_norm": 0.7164002656936646, + "learning_rate": 0.00018186486741588582, + "loss": 2.6258, + "step": 3975 + }, + { + "epoch": 0.3208780566540231, + "grad_norm": 0.7715865969657898, + "learning_rate": 0.0001818558000648134, + "loss": 2.7034, + "step": 3976 + }, + { + "epoch": 0.3209587603906061, + "grad_norm": 0.7806593775749207, + "learning_rate": 0.0001818467306736804, + "loss": 2.6758, + "step": 3977 + }, + { + "epoch": 0.3210394641271891, + "grad_norm": 0.8026594519615173, + "learning_rate": 0.00018183765924271298, + "loss": 2.6976, + "step": 3978 + }, + { + "epoch": 0.32112016786377207, + "grad_norm": 0.7971245050430298, + "learning_rate": 0.00018182858577213716, + "loss": 2.7312, + "step": 3979 + }, + { + "epoch": 0.3212008716003551, + "grad_norm": 0.7347297072410583, + "learning_rate": 0.00018181951026217908, + "loss": 2.6664, + "step": 3980 + }, + { + "epoch": 0.3212815753369381, + "grad_norm": 0.7929779291152954, + "learning_rate": 0.0001818104327130649, + "loss": 2.6603, + "step": 3981 + }, + { + "epoch": 0.3213622790735211, + "grad_norm": 0.7465224862098694, + "learning_rate": 0.00018180135312502089, + "loss": 2.6566, + "step": 3982 + }, + { + "epoch": 0.3214429828101041, + "grad_norm": 0.7114695906639099, + "learning_rate": 0.00018179227149827334, + "loss": 2.6492, + "step": 3983 + }, + { + "epoch": 0.3215236865466871, + "grad_norm": 0.7179337739944458, + "learning_rate": 0.00018178318783304857, + "loss": 2.6778, + "step": 3984 + }, + { + "epoch": 0.3216043902832701, + "grad_norm": 0.7182629704475403, + "learning_rate": 0.000181774102129573, + "loss": 2.7057, + "step": 3985 + }, + { + "epoch": 0.3216850940198531, + "grad_norm": 0.7383119463920593, + "learning_rate": 0.000181765014388073, + "loss": 2.6633, + "step": 3986 + }, + { + "epoch": 0.3217657977564361, + "grad_norm": 0.7340527176856995, + "learning_rate": 0.00018175592460877512, + "loss": 2.6838, + "step": 3987 + }, + { + "epoch": 0.32184650149301913, + "grad_norm": 0.7934359312057495, + "learning_rate": 0.00018174683279190593, + "loss": 2.6795, + "step": 3988 + }, + { + "epoch": 0.3219272052296021, + "grad_norm": 0.6960840821266174, + "learning_rate": 0.00018173773893769192, + "loss": 2.6669, + "step": 3989 + }, + { + "epoch": 0.32200790896618514, + "grad_norm": 0.7513574361801147, + "learning_rate": 0.00018172864304635985, + "loss": 2.6744, + "step": 3990 + }, + { + "epoch": 0.3220886127027681, + "grad_norm": 0.7516636848449707, + "learning_rate": 0.00018171954511813629, + "loss": 2.6652, + "step": 3991 + }, + { + "epoch": 0.32216931643935115, + "grad_norm": 0.7817716002464294, + "learning_rate": 0.00018171044515324808, + "loss": 2.6671, + "step": 3992 + }, + { + "epoch": 0.3222500201759341, + "grad_norm": 0.6859925389289856, + "learning_rate": 0.000181701343151922, + "loss": 2.6984, + "step": 3993 + }, + { + "epoch": 0.32233072391251716, + "grad_norm": 0.7669627666473389, + "learning_rate": 0.00018169223911438485, + "loss": 2.7102, + "step": 3994 + }, + { + "epoch": 0.32241142764910014, + "grad_norm": 0.784724235534668, + "learning_rate": 0.00018168313304086357, + "loss": 2.7413, + "step": 3995 + }, + { + "epoch": 0.32249213138568317, + "grad_norm": 0.7341497540473938, + "learning_rate": 0.00018167402493158509, + "loss": 2.706, + "step": 3996 + }, + { + "epoch": 0.32257283512226614, + "grad_norm": 0.7975730299949646, + "learning_rate": 0.00018166491478677641, + "loss": 2.6896, + "step": 3997 + }, + { + "epoch": 0.3226535388588492, + "grad_norm": 0.8138537406921387, + "learning_rate": 0.00018165580260666458, + "loss": 2.6986, + "step": 3998 + }, + { + "epoch": 0.32273424259543215, + "grad_norm": 0.6734997034072876, + "learning_rate": 0.0001816466883914767, + "loss": 2.6686, + "step": 3999 + }, + { + "epoch": 0.3228149463320152, + "grad_norm": 0.7742779850959778, + "learning_rate": 0.00018163757214143992, + "loss": 2.7222, + "step": 4000 + }, + { + "epoch": 0.3228149463320152, + "eval_loss": 2.615234375, + "eval_runtime": 783.0394, + "eval_samples_per_second": 3.346, + "eval_steps_per_second": 0.558, + "step": 4000 + }, + { + "epoch": 0.32289565006859816, + "grad_norm": 0.7654715180397034, + "learning_rate": 0.00018162845385678145, + "loss": 2.7016, + "step": 4001 + }, + { + "epoch": 0.3229763538051812, + "grad_norm": 0.8698763251304626, + "learning_rate": 0.0001816193335377285, + "loss": 2.6709, + "step": 4002 + }, + { + "epoch": 0.32305705754176417, + "grad_norm": 0.758056640625, + "learning_rate": 0.00018161021118450843, + "loss": 2.7277, + "step": 4003 + }, + { + "epoch": 0.3231377612783472, + "grad_norm": 0.7462654113769531, + "learning_rate": 0.00018160108679734856, + "loss": 2.623, + "step": 4004 + }, + { + "epoch": 0.3232184650149302, + "grad_norm": 0.7274953722953796, + "learning_rate": 0.00018159196037647628, + "loss": 2.6875, + "step": 4005 + }, + { + "epoch": 0.3232991687515132, + "grad_norm": 0.7737346887588501, + "learning_rate": 0.0001815828319221191, + "loss": 2.6967, + "step": 4006 + }, + { + "epoch": 0.3233798724880962, + "grad_norm": 0.7793172001838684, + "learning_rate": 0.00018157370143450448, + "loss": 2.724, + "step": 4007 + }, + { + "epoch": 0.3234605762246792, + "grad_norm": 0.7791805863380432, + "learning_rate": 0.00018156456891385995, + "loss": 2.6653, + "step": 4008 + }, + { + "epoch": 0.3235412799612622, + "grad_norm": 0.7225624918937683, + "learning_rate": 0.0001815554343604132, + "loss": 2.745, + "step": 4009 + }, + { + "epoch": 0.32362198369784523, + "grad_norm": 0.6958494782447815, + "learning_rate": 0.0001815462977743918, + "loss": 2.6856, + "step": 4010 + }, + { + "epoch": 0.3237026874344282, + "grad_norm": 0.7572030425071716, + "learning_rate": 0.0001815371591560235, + "loss": 2.7053, + "step": 4011 + }, + { + "epoch": 0.32378339117101124, + "grad_norm": 0.7133952975273132, + "learning_rate": 0.00018152801850553605, + "loss": 2.6984, + "step": 4012 + }, + { + "epoch": 0.3238640949075942, + "grad_norm": 0.7598705291748047, + "learning_rate": 0.00018151887582315728, + "loss": 2.6632, + "step": 4013 + }, + { + "epoch": 0.32394479864417725, + "grad_norm": 0.7670698165893555, + "learning_rate": 0.00018150973110911503, + "loss": 2.7035, + "step": 4014 + }, + { + "epoch": 0.3240255023807602, + "grad_norm": 0.7547060251235962, + "learning_rate": 0.00018150058436363723, + "loss": 2.6531, + "step": 4015 + }, + { + "epoch": 0.32410620611734325, + "grad_norm": 0.7943035364151001, + "learning_rate": 0.00018149143558695178, + "loss": 2.766, + "step": 4016 + }, + { + "epoch": 0.32418690985392623, + "grad_norm": 0.864356517791748, + "learning_rate": 0.00018148228477928675, + "loss": 2.7134, + "step": 4017 + }, + { + "epoch": 0.32426761359050926, + "grad_norm": 0.7773902416229248, + "learning_rate": 0.00018147313194087018, + "loss": 2.6948, + "step": 4018 + }, + { + "epoch": 0.32434831732709224, + "grad_norm": 0.839131772518158, + "learning_rate": 0.0001814639770719302, + "loss": 2.7393, + "step": 4019 + }, + { + "epoch": 0.32442902106367527, + "grad_norm": 0.807837963104248, + "learning_rate": 0.00018145482017269498, + "loss": 2.7835, + "step": 4020 + }, + { + "epoch": 0.32450972480025825, + "grad_norm": 0.7133228182792664, + "learning_rate": 0.00018144566124339272, + "loss": 2.6859, + "step": 4021 + }, + { + "epoch": 0.3245904285368413, + "grad_norm": 0.8450621962547302, + "learning_rate": 0.00018143650028425162, + "loss": 2.7548, + "step": 4022 + }, + { + "epoch": 0.32467113227342426, + "grad_norm": 0.8594980835914612, + "learning_rate": 0.00018142733729550013, + "loss": 2.6636, + "step": 4023 + }, + { + "epoch": 0.3247518360100073, + "grad_norm": 0.7134621739387512, + "learning_rate": 0.0001814181722773665, + "loss": 2.6501, + "step": 4024 + }, + { + "epoch": 0.32483253974659027, + "grad_norm": 0.8630430698394775, + "learning_rate": 0.0001814090052300792, + "loss": 2.6994, + "step": 4025 + }, + { + "epoch": 0.3249132434831733, + "grad_norm": 0.7044873237609863, + "learning_rate": 0.00018139983615386666, + "loss": 2.6603, + "step": 4026 + }, + { + "epoch": 0.3249939472197563, + "grad_norm": 0.6896052360534668, + "learning_rate": 0.00018139066504895744, + "loss": 2.6649, + "step": 4027 + }, + { + "epoch": 0.3250746509563393, + "grad_norm": 0.802855372428894, + "learning_rate": 0.00018138149191558012, + "loss": 2.7067, + "step": 4028 + }, + { + "epoch": 0.3251553546929223, + "grad_norm": 0.7555437088012695, + "learning_rate": 0.00018137231675396324, + "loss": 2.6471, + "step": 4029 + }, + { + "epoch": 0.32523605842950526, + "grad_norm": 0.6846967339515686, + "learning_rate": 0.00018136313956433552, + "loss": 2.6774, + "step": 4030 + }, + { + "epoch": 0.3253167621660883, + "grad_norm": 0.7435858249664307, + "learning_rate": 0.0001813539603469257, + "loss": 2.7135, + "step": 4031 + }, + { + "epoch": 0.32539746590267127, + "grad_norm": 0.7669098377227783, + "learning_rate": 0.00018134477910196253, + "loss": 2.7014, + "step": 4032 + }, + { + "epoch": 0.3254781696392543, + "grad_norm": 0.7797521352767944, + "learning_rate": 0.00018133559582967482, + "loss": 2.7229, + "step": 4033 + }, + { + "epoch": 0.3255588733758373, + "grad_norm": 0.7377886176109314, + "learning_rate": 0.00018132641053029142, + "loss": 2.7196, + "step": 4034 + }, + { + "epoch": 0.3256395771124203, + "grad_norm": 0.7387986779212952, + "learning_rate": 0.0001813172232040413, + "loss": 2.687, + "step": 4035 + }, + { + "epoch": 0.3257202808490033, + "grad_norm": 0.7276624441146851, + "learning_rate": 0.0001813080338511534, + "loss": 2.6954, + "step": 4036 + }, + { + "epoch": 0.3258009845855863, + "grad_norm": 0.7929670214653015, + "learning_rate": 0.00018129884247185683, + "loss": 2.7431, + "step": 4037 + }, + { + "epoch": 0.3258816883221693, + "grad_norm": 0.7896441221237183, + "learning_rate": 0.0001812896490663805, + "loss": 2.6823, + "step": 4038 + }, + { + "epoch": 0.3259623920587523, + "grad_norm": 0.8642957210540771, + "learning_rate": 0.00018128045363495368, + "loss": 2.7334, + "step": 4039 + }, + { + "epoch": 0.3260430957953353, + "grad_norm": 0.7156081795692444, + "learning_rate": 0.00018127125617780542, + "loss": 2.6886, + "step": 4040 + }, + { + "epoch": 0.32612379953191833, + "grad_norm": 0.8260853290557861, + "learning_rate": 0.00018126205669516507, + "loss": 2.6802, + "step": 4041 + }, + { + "epoch": 0.3262045032685013, + "grad_norm": 0.6853542327880859, + "learning_rate": 0.00018125285518726182, + "loss": 2.6392, + "step": 4042 + }, + { + "epoch": 0.32628520700508434, + "grad_norm": 0.7574017643928528, + "learning_rate": 0.00018124365165432505, + "loss": 2.7412, + "step": 4043 + }, + { + "epoch": 0.3263659107416673, + "grad_norm": 0.8656191825866699, + "learning_rate": 0.00018123444609658408, + "loss": 2.6903, + "step": 4044 + }, + { + "epoch": 0.32644661447825035, + "grad_norm": 0.7443257570266724, + "learning_rate": 0.00018122523851426837, + "loss": 2.682, + "step": 4045 + }, + { + "epoch": 0.32652731821483333, + "grad_norm": 0.7222229242324829, + "learning_rate": 0.0001812160289076074, + "loss": 2.6196, + "step": 4046 + }, + { + "epoch": 0.32660802195141636, + "grad_norm": 0.8531985878944397, + "learning_rate": 0.00018120681727683066, + "loss": 2.6777, + "step": 4047 + }, + { + "epoch": 0.32668872568799934, + "grad_norm": 0.7380290627479553, + "learning_rate": 0.0001811976036221678, + "loss": 2.6847, + "step": 4048 + }, + { + "epoch": 0.32676942942458237, + "grad_norm": 0.7250707149505615, + "learning_rate": 0.00018118838794384837, + "loss": 2.6846, + "step": 4049 + }, + { + "epoch": 0.32685013316116535, + "grad_norm": 0.763504147529602, + "learning_rate": 0.00018117917024210208, + "loss": 2.69, + "step": 4050 + }, + { + "epoch": 0.3269308368977484, + "grad_norm": 0.7740737795829773, + "learning_rate": 0.00018116995051715867, + "loss": 2.6945, + "step": 4051 + }, + { + "epoch": 0.32701154063433135, + "grad_norm": 0.7777624726295471, + "learning_rate": 0.00018116072876924792, + "loss": 2.6918, + "step": 4052 + }, + { + "epoch": 0.3270922443709144, + "grad_norm": 0.7957910895347595, + "learning_rate": 0.0001811515049985997, + "loss": 2.7237, + "step": 4053 + }, + { + "epoch": 0.32717294810749736, + "grad_norm": 0.7828991413116455, + "learning_rate": 0.00018114227920544375, + "loss": 2.7008, + "step": 4054 + }, + { + "epoch": 0.3272536518440804, + "grad_norm": 0.6695161461830139, + "learning_rate": 0.00018113305139001016, + "loss": 2.7311, + "step": 4055 + }, + { + "epoch": 0.32733435558066337, + "grad_norm": 0.7693436145782471, + "learning_rate": 0.00018112382155252883, + "loss": 2.7102, + "step": 4056 + }, + { + "epoch": 0.3274150593172464, + "grad_norm": 0.7520042657852173, + "learning_rate": 0.0001811145896932298, + "loss": 2.6455, + "step": 4057 + }, + { + "epoch": 0.3274957630538294, + "grad_norm": 0.786834716796875, + "learning_rate": 0.00018110535581234317, + "loss": 2.6965, + "step": 4058 + }, + { + "epoch": 0.3275764667904124, + "grad_norm": 0.742001473903656, + "learning_rate": 0.00018109611991009905, + "loss": 2.7341, + "step": 4059 + }, + { + "epoch": 0.3276571705269954, + "grad_norm": 0.813522219657898, + "learning_rate": 0.00018108688198672766, + "loss": 2.8116, + "step": 4060 + }, + { + "epoch": 0.3277378742635784, + "grad_norm": 0.7611314058303833, + "learning_rate": 0.00018107764204245916, + "loss": 2.6741, + "step": 4061 + }, + { + "epoch": 0.3278185780001614, + "grad_norm": 0.7285993695259094, + "learning_rate": 0.00018106840007752392, + "loss": 2.671, + "step": 4062 + }, + { + "epoch": 0.32789928173674443, + "grad_norm": 0.773151695728302, + "learning_rate": 0.0001810591560921522, + "loss": 2.7106, + "step": 4063 + }, + { + "epoch": 0.3279799854733274, + "grad_norm": 0.7448920011520386, + "learning_rate": 0.00018104991008657445, + "loss": 2.7176, + "step": 4064 + }, + { + "epoch": 0.32806068920991044, + "grad_norm": 0.7088467478752136, + "learning_rate": 0.0001810406620610211, + "loss": 2.7085, + "step": 4065 + }, + { + "epoch": 0.3281413929464934, + "grad_norm": 0.7507789731025696, + "learning_rate": 0.00018103141201572255, + "loss": 2.7361, + "step": 4066 + }, + { + "epoch": 0.32822209668307645, + "grad_norm": 0.7065643072128296, + "learning_rate": 0.00018102215995090943, + "loss": 2.6573, + "step": 4067 + }, + { + "epoch": 0.3283028004196594, + "grad_norm": 0.6888713836669922, + "learning_rate": 0.0001810129058668123, + "loss": 2.6699, + "step": 4068 + }, + { + "epoch": 0.32838350415624246, + "grad_norm": 0.736347496509552, + "learning_rate": 0.00018100364976366174, + "loss": 2.7089, + "step": 4069 + }, + { + "epoch": 0.32846420789282543, + "grad_norm": 0.6854562759399414, + "learning_rate": 0.0001809943916416885, + "loss": 2.7051, + "step": 4070 + }, + { + "epoch": 0.32854491162940846, + "grad_norm": 0.7481048107147217, + "learning_rate": 0.0001809851315011233, + "loss": 2.7428, + "step": 4071 + }, + { + "epoch": 0.32862561536599144, + "grad_norm": 0.7600961923599243, + "learning_rate": 0.0001809758693421969, + "loss": 2.7153, + "step": 4072 + }, + { + "epoch": 0.3287063191025745, + "grad_norm": 0.7545063495635986, + "learning_rate": 0.00018096660516514024, + "loss": 2.6736, + "step": 4073 + }, + { + "epoch": 0.32878702283915745, + "grad_norm": 0.7967175841331482, + "learning_rate": 0.0001809573389701841, + "loss": 2.6711, + "step": 4074 + }, + { + "epoch": 0.3288677265757405, + "grad_norm": 0.7115446925163269, + "learning_rate": 0.00018094807075755943, + "loss": 2.6761, + "step": 4075 + }, + { + "epoch": 0.32894843031232346, + "grad_norm": 0.8230876326560974, + "learning_rate": 0.00018093880052749725, + "loss": 2.6749, + "step": 4076 + }, + { + "epoch": 0.3290291340489065, + "grad_norm": 0.8549706935882568, + "learning_rate": 0.00018092952828022856, + "loss": 2.7084, + "step": 4077 + }, + { + "epoch": 0.32910983778548947, + "grad_norm": 0.7379534244537354, + "learning_rate": 0.00018092025401598448, + "loss": 2.7241, + "step": 4078 + }, + { + "epoch": 0.3291905415220725, + "grad_norm": 0.7659998536109924, + "learning_rate": 0.00018091097773499616, + "loss": 2.7108, + "step": 4079 + }, + { + "epoch": 0.3292712452586555, + "grad_norm": 0.8074536323547363, + "learning_rate": 0.00018090169943749476, + "loss": 2.676, + "step": 4080 + }, + { + "epoch": 0.32935194899523845, + "grad_norm": 0.7588536143302917, + "learning_rate": 0.00018089241912371153, + "loss": 2.639, + "step": 4081 + }, + { + "epoch": 0.3294326527318215, + "grad_norm": 0.7510811686515808, + "learning_rate": 0.00018088313679387775, + "loss": 2.6722, + "step": 4082 + }, + { + "epoch": 0.32951335646840446, + "grad_norm": 0.7538900971412659, + "learning_rate": 0.0001808738524482248, + "loss": 2.6917, + "step": 4083 + }, + { + "epoch": 0.3295940602049875, + "grad_norm": 0.8071155548095703, + "learning_rate": 0.00018086456608698402, + "loss": 2.6964, + "step": 4084 + }, + { + "epoch": 0.32967476394157047, + "grad_norm": 0.7778098583221436, + "learning_rate": 0.00018085527771038686, + "loss": 2.7301, + "step": 4085 + }, + { + "epoch": 0.3297554676781535, + "grad_norm": 0.7717564702033997, + "learning_rate": 0.00018084598731866485, + "loss": 2.7484, + "step": 4086 + }, + { + "epoch": 0.3298361714147365, + "grad_norm": 0.7361736297607422, + "learning_rate": 0.00018083669491204948, + "loss": 2.6299, + "step": 4087 + }, + { + "epoch": 0.3299168751513195, + "grad_norm": 0.736681342124939, + "learning_rate": 0.00018082740049077238, + "loss": 2.7521, + "step": 4088 + }, + { + "epoch": 0.3299975788879025, + "grad_norm": 0.8011857867240906, + "learning_rate": 0.00018081810405506517, + "loss": 2.724, + "step": 4089 + }, + { + "epoch": 0.3300782826244855, + "grad_norm": 0.7741932272911072, + "learning_rate": 0.00018080880560515956, + "loss": 2.6766, + "step": 4090 + }, + { + "epoch": 0.3301589863610685, + "grad_norm": 0.7321778535842896, + "learning_rate": 0.00018079950514128724, + "loss": 2.6614, + "step": 4091 + }, + { + "epoch": 0.33023969009765153, + "grad_norm": 0.7916514277458191, + "learning_rate": 0.00018079020266368006, + "loss": 2.7177, + "step": 4092 + }, + { + "epoch": 0.3303203938342345, + "grad_norm": 0.7961388826370239, + "learning_rate": 0.00018078089817256986, + "loss": 2.6671, + "step": 4093 + }, + { + "epoch": 0.33040109757081754, + "grad_norm": 0.7167038321495056, + "learning_rate": 0.0001807715916681885, + "loss": 2.6989, + "step": 4094 + }, + { + "epoch": 0.3304818013074005, + "grad_norm": 0.6924864649772644, + "learning_rate": 0.00018076228315076794, + "loss": 2.6484, + "step": 4095 + }, + { + "epoch": 0.33056250504398355, + "grad_norm": 0.777881383895874, + "learning_rate": 0.00018075297262054013, + "loss": 2.6498, + "step": 4096 + }, + { + "epoch": 0.3306432087805665, + "grad_norm": 0.7878376841545105, + "learning_rate": 0.0001807436600777372, + "loss": 2.7745, + "step": 4097 + }, + { + "epoch": 0.33072391251714955, + "grad_norm": 0.8418465256690979, + "learning_rate": 0.0001807343455225912, + "loss": 2.7195, + "step": 4098 + }, + { + "epoch": 0.33080461625373253, + "grad_norm": 0.7780830264091492, + "learning_rate": 0.00018072502895533424, + "loss": 2.6652, + "step": 4099 + }, + { + "epoch": 0.33088531999031556, + "grad_norm": 0.7102445960044861, + "learning_rate": 0.00018071571037619853, + "loss": 2.6618, + "step": 4100 + }, + { + "epoch": 0.33096602372689854, + "grad_norm": 0.7028098106384277, + "learning_rate": 0.00018070638978541633, + "loss": 2.7114, + "step": 4101 + }, + { + "epoch": 0.33104672746348157, + "grad_norm": 0.7529525756835938, + "learning_rate": 0.00018069706718321996, + "loss": 2.7231, + "step": 4102 + }, + { + "epoch": 0.33112743120006455, + "grad_norm": 0.7404564023017883, + "learning_rate": 0.0001806877425698417, + "loss": 2.6564, + "step": 4103 + }, + { + "epoch": 0.3312081349366476, + "grad_norm": 0.7725130319595337, + "learning_rate": 0.00018067841594551401, + "loss": 2.677, + "step": 4104 + }, + { + "epoch": 0.33128883867323056, + "grad_norm": 0.7616425156593323, + "learning_rate": 0.00018066908731046927, + "loss": 2.6586, + "step": 4105 + }, + { + "epoch": 0.3313695424098136, + "grad_norm": 0.7318183779716492, + "learning_rate": 0.00018065975666494002, + "loss": 2.6624, + "step": 4106 + }, + { + "epoch": 0.33145024614639657, + "grad_norm": 0.7012802958488464, + "learning_rate": 0.00018065042400915878, + "loss": 2.6663, + "step": 4107 + }, + { + "epoch": 0.3315309498829796, + "grad_norm": 0.815226674079895, + "learning_rate": 0.00018064108934335814, + "loss": 2.7248, + "step": 4108 + }, + { + "epoch": 0.3316116536195626, + "grad_norm": 0.68972247838974, + "learning_rate": 0.00018063175266777077, + "loss": 2.6961, + "step": 4109 + }, + { + "epoch": 0.3316923573561456, + "grad_norm": 0.7563794255256653, + "learning_rate": 0.00018062241398262937, + "loss": 2.6526, + "step": 4110 + }, + { + "epoch": 0.3317730610927286, + "grad_norm": 0.7878836989402771, + "learning_rate": 0.00018061307328816662, + "loss": 2.7316, + "step": 4111 + }, + { + "epoch": 0.3318537648293116, + "grad_norm": 0.7189129590988159, + "learning_rate": 0.00018060373058461537, + "loss": 2.6577, + "step": 4112 + }, + { + "epoch": 0.3319344685658946, + "grad_norm": 0.7517561912536621, + "learning_rate": 0.00018059438587220847, + "loss": 2.668, + "step": 4113 + }, + { + "epoch": 0.3320151723024776, + "grad_norm": 0.7602595686912537, + "learning_rate": 0.00018058503915117878, + "loss": 2.6741, + "step": 4114 + }, + { + "epoch": 0.3320958760390606, + "grad_norm": 0.7702187299728394, + "learning_rate": 0.00018057569042175927, + "loss": 2.7082, + "step": 4115 + }, + { + "epoch": 0.33217657977564363, + "grad_norm": 0.7289660573005676, + "learning_rate": 0.00018056633968418294, + "loss": 2.6728, + "step": 4116 + }, + { + "epoch": 0.3322572835122266, + "grad_norm": 0.6936683654785156, + "learning_rate": 0.0001805569869386828, + "loss": 2.6735, + "step": 4117 + }, + { + "epoch": 0.33233798724880964, + "grad_norm": 0.7128138542175293, + "learning_rate": 0.000180547632185492, + "loss": 2.646, + "step": 4118 + }, + { + "epoch": 0.3324186909853926, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00018053827542484363, + "loss": 2.6497, + "step": 4119 + }, + { + "epoch": 0.33249939472197565, + "grad_norm": 0.7084202170372009, + "learning_rate": 0.0001805289166569709, + "loss": 2.6328, + "step": 4120 + }, + { + "epoch": 0.3325800984585586, + "grad_norm": 0.8068051934242249, + "learning_rate": 0.00018051955588210708, + "loss": 2.6576, + "step": 4121 + }, + { + "epoch": 0.33266080219514166, + "grad_norm": 0.787680447101593, + "learning_rate": 0.00018051019310048544, + "loss": 2.7091, + "step": 4122 + }, + { + "epoch": 0.33274150593172463, + "grad_norm": 0.698946475982666, + "learning_rate": 0.00018050082831233931, + "loss": 2.6657, + "step": 4123 + }, + { + "epoch": 0.33282220966830767, + "grad_norm": 0.7946122288703918, + "learning_rate": 0.00018049146151790215, + "loss": 2.6981, + "step": 4124 + }, + { + "epoch": 0.33290291340489064, + "grad_norm": 0.8025123476982117, + "learning_rate": 0.00018048209271740736, + "loss": 2.6878, + "step": 4125 + }, + { + "epoch": 0.3329836171414737, + "grad_norm": 0.7493376135826111, + "learning_rate": 0.0001804727219110884, + "loss": 2.6556, + "step": 4126 + }, + { + "epoch": 0.33306432087805665, + "grad_norm": 0.7143186926841736, + "learning_rate": 0.00018046334909917886, + "loss": 2.6879, + "step": 4127 + }, + { + "epoch": 0.3331450246146397, + "grad_norm": 0.7375641465187073, + "learning_rate": 0.00018045397428191235, + "loss": 2.6817, + "step": 4128 + }, + { + "epoch": 0.33322572835122266, + "grad_norm": 0.7201291918754578, + "learning_rate": 0.00018044459745952248, + "loss": 2.6765, + "step": 4129 + }, + { + "epoch": 0.3333064320878057, + "grad_norm": 0.7924519777297974, + "learning_rate": 0.00018043521863224296, + "loss": 2.7748, + "step": 4130 + }, + { + "epoch": 0.33338713582438867, + "grad_norm": 0.7773354053497314, + "learning_rate": 0.00018042583780030752, + "loss": 2.6839, + "step": 4131 + }, + { + "epoch": 0.33346783956097165, + "grad_norm": 0.7527397274971008, + "learning_rate": 0.00018041645496394998, + "loss": 2.6749, + "step": 4132 + }, + { + "epoch": 0.3335485432975547, + "grad_norm": 0.7329208254814148, + "learning_rate": 0.00018040707012340418, + "loss": 2.7535, + "step": 4133 + }, + { + "epoch": 0.33362924703413765, + "grad_norm": 0.7637773752212524, + "learning_rate": 0.00018039768327890397, + "loss": 2.632, + "step": 4134 + }, + { + "epoch": 0.3337099507707207, + "grad_norm": 0.823623776435852, + "learning_rate": 0.00018038829443068333, + "loss": 2.7122, + "step": 4135 + }, + { + "epoch": 0.33379065450730366, + "grad_norm": 0.8040826916694641, + "learning_rate": 0.00018037890357897632, + "loss": 2.7197, + "step": 4136 + }, + { + "epoch": 0.3338713582438867, + "grad_norm": 0.7483998537063599, + "learning_rate": 0.00018036951072401686, + "loss": 2.6535, + "step": 4137 + }, + { + "epoch": 0.33395206198046967, + "grad_norm": 0.8141106367111206, + "learning_rate": 0.00018036011586603914, + "loss": 2.7127, + "step": 4138 + }, + { + "epoch": 0.3340327657170527, + "grad_norm": 0.7226041555404663, + "learning_rate": 0.00018035071900527724, + "loss": 2.6846, + "step": 4139 + }, + { + "epoch": 0.3341134694536357, + "grad_norm": 0.7624794840812683, + "learning_rate": 0.00018034132014196541, + "loss": 2.6725, + "step": 4140 + }, + { + "epoch": 0.3341941731902187, + "grad_norm": 0.7299962043762207, + "learning_rate": 0.00018033191927633785, + "loss": 2.6728, + "step": 4141 + }, + { + "epoch": 0.3342748769268017, + "grad_norm": 0.7920462489128113, + "learning_rate": 0.0001803225164086289, + "loss": 2.6544, + "step": 4142 + }, + { + "epoch": 0.3343555806633847, + "grad_norm": 0.7469778656959534, + "learning_rate": 0.00018031311153907282, + "loss": 2.7356, + "step": 4143 + }, + { + "epoch": 0.3344362843999677, + "grad_norm": 0.8831696510314941, + "learning_rate": 0.0001803037046679041, + "loss": 2.6584, + "step": 4144 + }, + { + "epoch": 0.33451698813655073, + "grad_norm": 0.8047679662704468, + "learning_rate": 0.00018029429579535715, + "loss": 2.6213, + "step": 4145 + }, + { + "epoch": 0.3345976918731337, + "grad_norm": 0.7109517455101013, + "learning_rate": 0.00018028488492166645, + "loss": 2.6622, + "step": 4146 + }, + { + "epoch": 0.33467839560971674, + "grad_norm": 0.7240141034126282, + "learning_rate": 0.0001802754720470665, + "loss": 2.6794, + "step": 4147 + }, + { + "epoch": 0.3347590993462997, + "grad_norm": 0.7292990684509277, + "learning_rate": 0.000180266057171792, + "loss": 2.6079, + "step": 4148 + }, + { + "epoch": 0.33483980308288275, + "grad_norm": 0.8055328130722046, + "learning_rate": 0.00018025664029607756, + "loss": 2.7044, + "step": 4149 + }, + { + "epoch": 0.3349205068194657, + "grad_norm": 0.8348979949951172, + "learning_rate": 0.00018024722142015781, + "loss": 2.6757, + "step": 4150 + }, + { + "epoch": 0.33500121055604876, + "grad_norm": 0.7797044515609741, + "learning_rate": 0.00018023780054426754, + "loss": 2.7125, + "step": 4151 + }, + { + "epoch": 0.33508191429263173, + "grad_norm": 0.802442729473114, + "learning_rate": 0.00018022837766864153, + "loss": 2.7121, + "step": 4152 + }, + { + "epoch": 0.33516261802921476, + "grad_norm": 0.7248829007148743, + "learning_rate": 0.00018021895279351463, + "loss": 2.7344, + "step": 4153 + }, + { + "epoch": 0.33524332176579774, + "grad_norm": 0.7458582520484924, + "learning_rate": 0.00018020952591912175, + "loss": 2.665, + "step": 4154 + }, + { + "epoch": 0.3353240255023808, + "grad_norm": 0.8153703808784485, + "learning_rate": 0.0001802000970456978, + "loss": 2.7416, + "step": 4155 + }, + { + "epoch": 0.33540472923896375, + "grad_norm": 0.7583708763122559, + "learning_rate": 0.00018019066617347779, + "loss": 2.7002, + "step": 4156 + }, + { + "epoch": 0.3354854329755468, + "grad_norm": 0.7522469162940979, + "learning_rate": 0.00018018123330269678, + "loss": 2.7196, + "step": 4157 + }, + { + "epoch": 0.33556613671212976, + "grad_norm": 0.7386923432350159, + "learning_rate": 0.00018017179843358983, + "loss": 2.6947, + "step": 4158 + }, + { + "epoch": 0.3356468404487128, + "grad_norm": 0.7366231083869934, + "learning_rate": 0.00018016236156639205, + "loss": 2.7377, + "step": 4159 + }, + { + "epoch": 0.33572754418529577, + "grad_norm": 0.7727232575416565, + "learning_rate": 0.00018015292270133872, + "loss": 2.7566, + "step": 4160 + }, + { + "epoch": 0.3358082479218788, + "grad_norm": 0.6781843304634094, + "learning_rate": 0.000180143481838665, + "loss": 2.6796, + "step": 4161 + }, + { + "epoch": 0.3358889516584618, + "grad_norm": 0.7036039233207703, + "learning_rate": 0.00018013403897860624, + "loss": 2.7012, + "step": 4162 + }, + { + "epoch": 0.3359696553950448, + "grad_norm": 0.8252625465393066, + "learning_rate": 0.00018012459412139776, + "loss": 2.6613, + "step": 4163 + }, + { + "epoch": 0.3360503591316278, + "grad_norm": 0.6924486756324768, + "learning_rate": 0.00018011514726727493, + "loss": 2.6425, + "step": 4164 + }, + { + "epoch": 0.3361310628682108, + "grad_norm": 0.7735962271690369, + "learning_rate": 0.0001801056984164732, + "loss": 2.7235, + "step": 4165 + }, + { + "epoch": 0.3362117666047938, + "grad_norm": 0.7439951300621033, + "learning_rate": 0.0001800962475692281, + "loss": 2.7428, + "step": 4166 + }, + { + "epoch": 0.3362924703413768, + "grad_norm": 0.6830539107322693, + "learning_rate": 0.0001800867947257751, + "loss": 2.5907, + "step": 4167 + }, + { + "epoch": 0.3363731740779598, + "grad_norm": 0.8355144262313843, + "learning_rate": 0.00018007733988634986, + "loss": 2.6978, + "step": 4168 + }, + { + "epoch": 0.33645387781454283, + "grad_norm": 0.6880978941917419, + "learning_rate": 0.00018006788305118798, + "loss": 2.6934, + "step": 4169 + }, + { + "epoch": 0.3365345815511258, + "grad_norm": 0.762709379196167, + "learning_rate": 0.0001800584242205251, + "loss": 2.684, + "step": 4170 + }, + { + "epoch": 0.33661528528770884, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001800489633945971, + "loss": 2.6857, + "step": 4171 + }, + { + "epoch": 0.3366959890242918, + "grad_norm": 0.787651777267456, + "learning_rate": 0.00018003950057363964, + "loss": 2.6979, + "step": 4172 + }, + { + "epoch": 0.33677669276087485, + "grad_norm": 0.7831481099128723, + "learning_rate": 0.00018003003575788856, + "loss": 2.7158, + "step": 4173 + }, + { + "epoch": 0.33685739649745783, + "grad_norm": 0.844904363155365, + "learning_rate": 0.00018002056894757986, + "loss": 2.6459, + "step": 4174 + }, + { + "epoch": 0.33693810023404086, + "grad_norm": 0.7529420852661133, + "learning_rate": 0.00018001110014294937, + "loss": 2.685, + "step": 4175 + }, + { + "epoch": 0.33701880397062384, + "grad_norm": 0.776719868183136, + "learning_rate": 0.0001800016293442331, + "loss": 2.6353, + "step": 4176 + }, + { + "epoch": 0.33709950770720687, + "grad_norm": 0.7988671660423279, + "learning_rate": 0.00017999215655166716, + "loss": 2.7241, + "step": 4177 + }, + { + "epoch": 0.33718021144378985, + "grad_norm": 0.7190617918968201, + "learning_rate": 0.00017998268176548752, + "loss": 2.7278, + "step": 4178 + }, + { + "epoch": 0.3372609151803729, + "grad_norm": 0.8337060809135437, + "learning_rate": 0.0001799732049859304, + "loss": 2.7059, + "step": 4179 + }, + { + "epoch": 0.33734161891695585, + "grad_norm": 0.7547435164451599, + "learning_rate": 0.0001799637262132319, + "loss": 2.7782, + "step": 4180 + }, + { + "epoch": 0.3374223226535389, + "grad_norm": 0.8067883253097534, + "learning_rate": 0.0001799542454476284, + "loss": 2.7978, + "step": 4181 + }, + { + "epoch": 0.33750302639012186, + "grad_norm": 0.7451581358909607, + "learning_rate": 0.00017994476268935609, + "loss": 2.6931, + "step": 4182 + }, + { + "epoch": 0.33758373012670484, + "grad_norm": 0.7521898746490479, + "learning_rate": 0.00017993527793865125, + "loss": 2.6939, + "step": 4183 + }, + { + "epoch": 0.33766443386328787, + "grad_norm": 0.7608996033668518, + "learning_rate": 0.0001799257911957504, + "loss": 2.715, + "step": 4184 + }, + { + "epoch": 0.33774513759987085, + "grad_norm": 0.7459948658943176, + "learning_rate": 0.00017991630246088987, + "loss": 2.6951, + "step": 4185 + }, + { + "epoch": 0.3378258413364539, + "grad_norm": 0.7549717426300049, + "learning_rate": 0.00017990681173430618, + "loss": 2.7353, + "step": 4186 + }, + { + "epoch": 0.33790654507303686, + "grad_norm": 0.7234344482421875, + "learning_rate": 0.0001798973190162359, + "loss": 2.6491, + "step": 4187 + }, + { + "epoch": 0.3379872488096199, + "grad_norm": 0.7652330994606018, + "learning_rate": 0.00017988782430691553, + "loss": 2.765, + "step": 4188 + }, + { + "epoch": 0.33806795254620287, + "grad_norm": 0.742953360080719, + "learning_rate": 0.00017987832760658177, + "loss": 2.7079, + "step": 4189 + }, + { + "epoch": 0.3381486562827859, + "grad_norm": 0.7440767288208008, + "learning_rate": 0.00017986882891547125, + "loss": 2.6751, + "step": 4190 + }, + { + "epoch": 0.3382293600193689, + "grad_norm": 0.7141925096511841, + "learning_rate": 0.00017985932823382078, + "loss": 2.6249, + "step": 4191 + }, + { + "epoch": 0.3383100637559519, + "grad_norm": 0.7200489044189453, + "learning_rate": 0.00017984982556186707, + "loss": 2.6811, + "step": 4192 + }, + { + "epoch": 0.3383907674925349, + "grad_norm": 0.7677409648895264, + "learning_rate": 0.00017984032089984696, + "loss": 2.6641, + "step": 4193 + }, + { + "epoch": 0.3384714712291179, + "grad_norm": 0.7386545538902283, + "learning_rate": 0.00017983081424799741, + "loss": 2.6504, + "step": 4194 + }, + { + "epoch": 0.3385521749657009, + "grad_norm": 0.7528583407402039, + "learning_rate": 0.00017982130560655526, + "loss": 2.6422, + "step": 4195 + }, + { + "epoch": 0.3386328787022839, + "grad_norm": 0.7339407801628113, + "learning_rate": 0.0001798117949757575, + "loss": 2.7047, + "step": 4196 + }, + { + "epoch": 0.3387135824388669, + "grad_norm": 0.7655882239341736, + "learning_rate": 0.00017980228235584117, + "loss": 2.7644, + "step": 4197 + }, + { + "epoch": 0.33879428617544993, + "grad_norm": 0.7602109909057617, + "learning_rate": 0.00017979276774704342, + "loss": 2.697, + "step": 4198 + }, + { + "epoch": 0.3388749899120329, + "grad_norm": 0.7188911437988281, + "learning_rate": 0.00017978325114960126, + "loss": 2.7147, + "step": 4199 + }, + { + "epoch": 0.33895569364861594, + "grad_norm": 0.7672597765922546, + "learning_rate": 0.00017977373256375194, + "loss": 2.6558, + "step": 4200 + }, + { + "epoch": 0.3390363973851989, + "grad_norm": 0.784187912940979, + "learning_rate": 0.0001797642119897327, + "loss": 2.7005, + "step": 4201 + }, + { + "epoch": 0.33911710112178195, + "grad_norm": 0.7359703779220581, + "learning_rate": 0.00017975468942778075, + "loss": 2.6578, + "step": 4202 + }, + { + "epoch": 0.3391978048583649, + "grad_norm": 0.7776080965995789, + "learning_rate": 0.00017974516487813345, + "loss": 2.6747, + "step": 4203 + }, + { + "epoch": 0.33927850859494796, + "grad_norm": 0.6934135556221008, + "learning_rate": 0.00017973563834102824, + "loss": 2.6335, + "step": 4204 + }, + { + "epoch": 0.33935921233153094, + "grad_norm": 0.7715818881988525, + "learning_rate": 0.00017972610981670245, + "loss": 2.6062, + "step": 4205 + }, + { + "epoch": 0.33943991606811397, + "grad_norm": 0.7466367483139038, + "learning_rate": 0.0001797165793053936, + "loss": 2.7243, + "step": 4206 + }, + { + "epoch": 0.33952061980469694, + "grad_norm": 0.7485085129737854, + "learning_rate": 0.00017970704680733926, + "loss": 2.6603, + "step": 4207 + }, + { + "epoch": 0.33960132354128, + "grad_norm": 0.7365782856941223, + "learning_rate": 0.0001796975123227769, + "loss": 2.7179, + "step": 4208 + }, + { + "epoch": 0.33968202727786295, + "grad_norm": 0.8405506014823914, + "learning_rate": 0.00017968797585194422, + "loss": 2.7413, + "step": 4209 + }, + { + "epoch": 0.339762731014446, + "grad_norm": 0.8227888941764832, + "learning_rate": 0.00017967843739507888, + "loss": 2.6814, + "step": 4210 + }, + { + "epoch": 0.33984343475102896, + "grad_norm": 0.8247283697128296, + "learning_rate": 0.0001796688969524186, + "loss": 2.6802, + "step": 4211 + }, + { + "epoch": 0.339924138487612, + "grad_norm": 0.7639476656913757, + "learning_rate": 0.00017965935452420116, + "loss": 2.7422, + "step": 4212 + }, + { + "epoch": 0.34000484222419497, + "grad_norm": 0.7846776247024536, + "learning_rate": 0.00017964981011066436, + "loss": 2.7443, + "step": 4213 + }, + { + "epoch": 0.340085545960778, + "grad_norm": 0.7593334913253784, + "learning_rate": 0.00017964026371204608, + "loss": 2.7179, + "step": 4214 + }, + { + "epoch": 0.340166249697361, + "grad_norm": 0.7878177165985107, + "learning_rate": 0.00017963071532858425, + "loss": 2.7118, + "step": 4215 + }, + { + "epoch": 0.340246953433944, + "grad_norm": 0.7728220224380493, + "learning_rate": 0.00017962116496051685, + "loss": 2.6646, + "step": 4216 + }, + { + "epoch": 0.340327657170527, + "grad_norm": 0.8419308066368103, + "learning_rate": 0.00017961161260808187, + "loss": 2.7829, + "step": 4217 + }, + { + "epoch": 0.34040836090711, + "grad_norm": 0.7066153883934021, + "learning_rate": 0.0001796020582715174, + "loss": 2.6498, + "step": 4218 + }, + { + "epoch": 0.340489064643693, + "grad_norm": 0.7976264953613281, + "learning_rate": 0.00017959250195106156, + "loss": 2.7496, + "step": 4219 + }, + { + "epoch": 0.34056976838027603, + "grad_norm": 0.736595630645752, + "learning_rate": 0.0001795829436469525, + "loss": 2.6497, + "step": 4220 + }, + { + "epoch": 0.340650472116859, + "grad_norm": 0.818550705909729, + "learning_rate": 0.0001795733833594285, + "loss": 2.6793, + "step": 4221 + }, + { + "epoch": 0.34073117585344204, + "grad_norm": 0.7712778449058533, + "learning_rate": 0.00017956382108872773, + "loss": 2.6215, + "step": 4222 + }, + { + "epoch": 0.340811879590025, + "grad_norm": 0.746306300163269, + "learning_rate": 0.00017955425683508858, + "loss": 2.7372, + "step": 4223 + }, + { + "epoch": 0.34089258332660805, + "grad_norm": 0.7269306778907776, + "learning_rate": 0.00017954469059874937, + "loss": 2.6438, + "step": 4224 + }, + { + "epoch": 0.340973287063191, + "grad_norm": 0.7426211833953857, + "learning_rate": 0.00017953512237994855, + "loss": 2.6539, + "step": 4225 + }, + { + "epoch": 0.34105399079977405, + "grad_norm": 0.7269948124885559, + "learning_rate": 0.0001795255521789246, + "loss": 2.6833, + "step": 4226 + }, + { + "epoch": 0.34113469453635703, + "grad_norm": 0.7279343605041504, + "learning_rate": 0.00017951597999591598, + "loss": 2.7011, + "step": 4227 + }, + { + "epoch": 0.34121539827294006, + "grad_norm": 0.7554663419723511, + "learning_rate": 0.0001795064058311613, + "loss": 2.7036, + "step": 4228 + }, + { + "epoch": 0.34129610200952304, + "grad_norm": 0.7516502141952515, + "learning_rate": 0.00017949682968489912, + "loss": 2.6699, + "step": 4229 + }, + { + "epoch": 0.34137680574610607, + "grad_norm": 0.7931745052337646, + "learning_rate": 0.00017948725155736818, + "loss": 2.6655, + "step": 4230 + }, + { + "epoch": 0.34145750948268905, + "grad_norm": 0.6981344223022461, + "learning_rate": 0.0001794776714488071, + "loss": 2.6987, + "step": 4231 + }, + { + "epoch": 0.3415382132192721, + "grad_norm": 0.7513911724090576, + "learning_rate": 0.00017946808935945474, + "loss": 2.6985, + "step": 4232 + }, + { + "epoch": 0.34161891695585506, + "grad_norm": 0.7373185753822327, + "learning_rate": 0.00017945850528954983, + "loss": 2.7269, + "step": 4233 + }, + { + "epoch": 0.34169962069243803, + "grad_norm": 0.6990259289741516, + "learning_rate": 0.0001794489192393313, + "loss": 2.6763, + "step": 4234 + }, + { + "epoch": 0.34178032442902107, + "grad_norm": 0.7661817669868469, + "learning_rate": 0.00017943933120903797, + "loss": 2.7057, + "step": 4235 + }, + { + "epoch": 0.34186102816560404, + "grad_norm": 0.7570027112960815, + "learning_rate": 0.0001794297411989089, + "loss": 2.7358, + "step": 4236 + }, + { + "epoch": 0.3419417319021871, + "grad_norm": 0.7751824855804443, + "learning_rate": 0.000179420149209183, + "loss": 2.6771, + "step": 4237 + }, + { + "epoch": 0.34202243563877005, + "grad_norm": 0.8028360605239868, + "learning_rate": 0.0001794105552400994, + "loss": 2.6399, + "step": 4238 + }, + { + "epoch": 0.3421031393753531, + "grad_norm": 0.7398171424865723, + "learning_rate": 0.00017940095929189716, + "loss": 2.6532, + "step": 4239 + }, + { + "epoch": 0.34218384311193606, + "grad_norm": 0.8300225138664246, + "learning_rate": 0.0001793913613648155, + "loss": 2.6798, + "step": 4240 + }, + { + "epoch": 0.3422645468485191, + "grad_norm": 0.7501145005226135, + "learning_rate": 0.00017938176145909356, + "loss": 2.7132, + "step": 4241 + }, + { + "epoch": 0.34234525058510207, + "grad_norm": 0.7178483605384827, + "learning_rate": 0.00017937215957497063, + "loss": 2.7172, + "step": 4242 + }, + { + "epoch": 0.3424259543216851, + "grad_norm": 0.7207306027412415, + "learning_rate": 0.00017936255571268599, + "loss": 2.629, + "step": 4243 + }, + { + "epoch": 0.3425066580582681, + "grad_norm": 0.7339839935302734, + "learning_rate": 0.00017935294987247899, + "loss": 2.6262, + "step": 4244 + }, + { + "epoch": 0.3425873617948511, + "grad_norm": 0.6977292895317078, + "learning_rate": 0.00017934334205458907, + "loss": 2.6949, + "step": 4245 + }, + { + "epoch": 0.3426680655314341, + "grad_norm": 0.7368096113204956, + "learning_rate": 0.00017933373225925564, + "loss": 2.681, + "step": 4246 + }, + { + "epoch": 0.3427487692680171, + "grad_norm": 0.7234459519386292, + "learning_rate": 0.00017932412048671825, + "loss": 2.6891, + "step": 4247 + }, + { + "epoch": 0.3428294730046001, + "grad_norm": 0.7659995555877686, + "learning_rate": 0.00017931450673721642, + "loss": 2.7394, + "step": 4248 + }, + { + "epoch": 0.3429101767411831, + "grad_norm": 0.7799893617630005, + "learning_rate": 0.00017930489101098974, + "loss": 2.7707, + "step": 4249 + }, + { + "epoch": 0.3429908804777661, + "grad_norm": 0.7063946723937988, + "learning_rate": 0.00017929527330827786, + "loss": 2.6573, + "step": 4250 + }, + { + "epoch": 0.34307158421434913, + "grad_norm": 0.7090561389923096, + "learning_rate": 0.0001792856536293205, + "loss": 2.7095, + "step": 4251 + }, + { + "epoch": 0.3431522879509321, + "grad_norm": 0.8020029067993164, + "learning_rate": 0.0001792760319743574, + "loss": 2.6905, + "step": 4252 + }, + { + "epoch": 0.34323299168751514, + "grad_norm": 0.7221484780311584, + "learning_rate": 0.00017926640834362836, + "loss": 2.6853, + "step": 4253 + }, + { + "epoch": 0.3433136954240981, + "grad_norm": 0.7102623581886292, + "learning_rate": 0.00017925678273737324, + "loss": 2.6821, + "step": 4254 + }, + { + "epoch": 0.34339439916068115, + "grad_norm": 0.7702807784080505, + "learning_rate": 0.00017924715515583187, + "loss": 2.6986, + "step": 4255 + }, + { + "epoch": 0.34347510289726413, + "grad_norm": 0.7938152551651001, + "learning_rate": 0.00017923752559924425, + "loss": 2.7162, + "step": 4256 + }, + { + "epoch": 0.34355580663384716, + "grad_norm": 0.7340937852859497, + "learning_rate": 0.00017922789406785036, + "loss": 2.6904, + "step": 4257 + }, + { + "epoch": 0.34363651037043014, + "grad_norm": 0.7010839581489563, + "learning_rate": 0.00017921826056189026, + "loss": 2.6969, + "step": 4258 + }, + { + "epoch": 0.34371721410701317, + "grad_norm": 0.758178174495697, + "learning_rate": 0.00017920862508160403, + "loss": 2.6391, + "step": 4259 + }, + { + "epoch": 0.34379791784359615, + "grad_norm": 0.7861726880073547, + "learning_rate": 0.0001791989876272318, + "loss": 2.7088, + "step": 4260 + }, + { + "epoch": 0.3438786215801792, + "grad_norm": 0.6764364242553711, + "learning_rate": 0.00017918934819901377, + "loss": 2.6221, + "step": 4261 + }, + { + "epoch": 0.34395932531676215, + "grad_norm": 0.76728355884552, + "learning_rate": 0.00017917970679719018, + "loss": 2.6854, + "step": 4262 + }, + { + "epoch": 0.3440400290533452, + "grad_norm": 0.7161166071891785, + "learning_rate": 0.00017917006342200133, + "loss": 2.7048, + "step": 4263 + }, + { + "epoch": 0.34412073278992816, + "grad_norm": 0.7182073593139648, + "learning_rate": 0.00017916041807368753, + "loss": 2.7559, + "step": 4264 + }, + { + "epoch": 0.3442014365265112, + "grad_norm": 0.832258403301239, + "learning_rate": 0.0001791507707524892, + "loss": 2.6743, + "step": 4265 + }, + { + "epoch": 0.34428214026309417, + "grad_norm": 0.7048495411872864, + "learning_rate": 0.00017914112145864675, + "loss": 2.693, + "step": 4266 + }, + { + "epoch": 0.3443628439996772, + "grad_norm": 0.7475518584251404, + "learning_rate": 0.00017913147019240068, + "loss": 2.6881, + "step": 4267 + }, + { + "epoch": 0.3444435477362602, + "grad_norm": 0.72830730676651, + "learning_rate": 0.00017912181695399154, + "loss": 2.659, + "step": 4268 + }, + { + "epoch": 0.3445242514728432, + "grad_norm": 0.7183662056922913, + "learning_rate": 0.00017911216174365988, + "loss": 2.6611, + "step": 4269 + }, + { + "epoch": 0.3446049552094262, + "grad_norm": 0.7487103343009949, + "learning_rate": 0.0001791025045616463, + "loss": 2.6518, + "step": 4270 + }, + { + "epoch": 0.3446856589460092, + "grad_norm": 0.7733812928199768, + "learning_rate": 0.0001790928454081916, + "loss": 2.6359, + "step": 4271 + }, + { + "epoch": 0.3447663626825922, + "grad_norm": 0.7774991393089294, + "learning_rate": 0.00017908318428353642, + "loss": 2.6654, + "step": 4272 + }, + { + "epoch": 0.34484706641917523, + "grad_norm": 0.6882895827293396, + "learning_rate": 0.00017907352118792157, + "loss": 2.686, + "step": 4273 + }, + { + "epoch": 0.3449277701557582, + "grad_norm": 0.7571535110473633, + "learning_rate": 0.00017906385612158785, + "loss": 2.7108, + "step": 4274 + }, + { + "epoch": 0.34500847389234124, + "grad_norm": 0.7324517369270325, + "learning_rate": 0.00017905418908477615, + "loss": 2.6663, + "step": 4275 + }, + { + "epoch": 0.3450891776289242, + "grad_norm": 0.7476221919059753, + "learning_rate": 0.00017904452007772744, + "loss": 2.7202, + "step": 4276 + }, + { + "epoch": 0.34516988136550725, + "grad_norm": 0.7648386359214783, + "learning_rate": 0.00017903484910068268, + "loss": 2.6759, + "step": 4277 + }, + { + "epoch": 0.3452505851020902, + "grad_norm": 0.7375434637069702, + "learning_rate": 0.00017902517615388282, + "loss": 2.6603, + "step": 4278 + }, + { + "epoch": 0.34533128883867326, + "grad_norm": 0.7248519062995911, + "learning_rate": 0.00017901550123756906, + "loss": 2.7147, + "step": 4279 + }, + { + "epoch": 0.34541199257525623, + "grad_norm": 0.7264916896820068, + "learning_rate": 0.0001790058243519824, + "loss": 2.6992, + "step": 4280 + }, + { + "epoch": 0.34549269631183926, + "grad_norm": 0.8370026350021362, + "learning_rate": 0.0001789961454973641, + "loss": 2.7114, + "step": 4281 + }, + { + "epoch": 0.34557340004842224, + "grad_norm": 0.72071373462677, + "learning_rate": 0.00017898646467395538, + "loss": 2.6957, + "step": 4282 + }, + { + "epoch": 0.3456541037850053, + "grad_norm": 0.7355397343635559, + "learning_rate": 0.0001789767818819975, + "loss": 2.6744, + "step": 4283 + }, + { + "epoch": 0.34573480752158825, + "grad_norm": 0.734756588935852, + "learning_rate": 0.00017896709712173173, + "loss": 2.726, + "step": 4284 + }, + { + "epoch": 0.3458155112581712, + "grad_norm": 0.7890543341636658, + "learning_rate": 0.00017895741039339945, + "loss": 2.6726, + "step": 4285 + }, + { + "epoch": 0.34589621499475426, + "grad_norm": 0.7768735885620117, + "learning_rate": 0.00017894772169724216, + "loss": 2.7617, + "step": 4286 + }, + { + "epoch": 0.34597691873133724, + "grad_norm": 0.7306547164916992, + "learning_rate": 0.00017893803103350125, + "loss": 2.6253, + "step": 4287 + }, + { + "epoch": 0.34605762246792027, + "grad_norm": 0.767066478729248, + "learning_rate": 0.00017892833840241828, + "loss": 2.6522, + "step": 4288 + }, + { + "epoch": 0.34613832620450324, + "grad_norm": 0.7018097639083862, + "learning_rate": 0.00017891864380423477, + "loss": 2.7111, + "step": 4289 + }, + { + "epoch": 0.3462190299410863, + "grad_norm": 0.7305615544319153, + "learning_rate": 0.00017890894723919236, + "loss": 2.6924, + "step": 4290 + }, + { + "epoch": 0.34629973367766925, + "grad_norm": 0.7588002681732178, + "learning_rate": 0.00017889924870753275, + "loss": 2.6952, + "step": 4291 + }, + { + "epoch": 0.3463804374142523, + "grad_norm": 0.7162861824035645, + "learning_rate": 0.0001788895482094976, + "loss": 2.6239, + "step": 4292 + }, + { + "epoch": 0.34646114115083526, + "grad_norm": 0.7494024634361267, + "learning_rate": 0.00017887984574532868, + "loss": 2.6763, + "step": 4293 + }, + { + "epoch": 0.3465418448874183, + "grad_norm": 0.7100037336349487, + "learning_rate": 0.0001788701413152678, + "loss": 2.6378, + "step": 4294 + }, + { + "epoch": 0.34662254862400127, + "grad_norm": 0.7316900491714478, + "learning_rate": 0.00017886043491955684, + "loss": 2.7001, + "step": 4295 + }, + { + "epoch": 0.3467032523605843, + "grad_norm": 0.8467028737068176, + "learning_rate": 0.00017885072655843772, + "loss": 2.7536, + "step": 4296 + }, + { + "epoch": 0.3467839560971673, + "grad_norm": 0.7248796820640564, + "learning_rate": 0.00017884101623215237, + "loss": 2.6956, + "step": 4297 + }, + { + "epoch": 0.3468646598337503, + "grad_norm": 0.7183107137680054, + "learning_rate": 0.0001788313039409428, + "loss": 2.743, + "step": 4298 + }, + { + "epoch": 0.3469453635703333, + "grad_norm": 0.6835163831710815, + "learning_rate": 0.00017882158968505105, + "loss": 2.7016, + "step": 4299 + }, + { + "epoch": 0.3470260673069163, + "grad_norm": 0.7973365783691406, + "learning_rate": 0.00017881187346471925, + "loss": 2.6927, + "step": 4300 + }, + { + "epoch": 0.3471067710434993, + "grad_norm": 0.700040876865387, + "learning_rate": 0.00017880215528018954, + "loss": 2.6961, + "step": 4301 + }, + { + "epoch": 0.34718747478008233, + "grad_norm": 0.8180583119392395, + "learning_rate": 0.00017879243513170415, + "loss": 2.642, + "step": 4302 + }, + { + "epoch": 0.3472681785166653, + "grad_norm": 0.7134599685668945, + "learning_rate": 0.0001787827130195053, + "loss": 2.6901, + "step": 4303 + }, + { + "epoch": 0.34734888225324834, + "grad_norm": 0.767998218536377, + "learning_rate": 0.0001787729889438353, + "loss": 2.6472, + "step": 4304 + }, + { + "epoch": 0.3474295859898313, + "grad_norm": 0.7260780930519104, + "learning_rate": 0.0001787632629049365, + "loss": 2.6791, + "step": 4305 + }, + { + "epoch": 0.34751028972641435, + "grad_norm": 0.6918236613273621, + "learning_rate": 0.00017875353490305132, + "loss": 2.6596, + "step": 4306 + }, + { + "epoch": 0.3475909934629973, + "grad_norm": 0.7734197974205017, + "learning_rate": 0.00017874380493842216, + "loss": 2.6402, + "step": 4307 + }, + { + "epoch": 0.34767169719958035, + "grad_norm": 0.7051037549972534, + "learning_rate": 0.00017873407301129154, + "loss": 2.7517, + "step": 4308 + }, + { + "epoch": 0.34775240093616333, + "grad_norm": 0.7026919722557068, + "learning_rate": 0.00017872433912190203, + "loss": 2.7058, + "step": 4309 + }, + { + "epoch": 0.34783310467274636, + "grad_norm": 0.7248546481132507, + "learning_rate": 0.00017871460327049618, + "loss": 2.666, + "step": 4310 + }, + { + "epoch": 0.34791380840932934, + "grad_norm": 0.7348842620849609, + "learning_rate": 0.0001787048654573167, + "loss": 2.7712, + "step": 4311 + }, + { + "epoch": 0.34799451214591237, + "grad_norm": 0.7923693656921387, + "learning_rate": 0.00017869512568260618, + "loss": 2.6469, + "step": 4312 + }, + { + "epoch": 0.34807521588249535, + "grad_norm": 0.7604066729545593, + "learning_rate": 0.00017868538394660743, + "loss": 2.7152, + "step": 4313 + }, + { + "epoch": 0.3481559196190784, + "grad_norm": 0.6811137795448303, + "learning_rate": 0.00017867564024956324, + "loss": 2.715, + "step": 4314 + }, + { + "epoch": 0.34823662335566136, + "grad_norm": 0.7292799353599548, + "learning_rate": 0.00017866589459171643, + "loss": 2.6374, + "step": 4315 + }, + { + "epoch": 0.3483173270922444, + "grad_norm": 0.6961250901222229, + "learning_rate": 0.0001786561469733099, + "loss": 2.6592, + "step": 4316 + }, + { + "epoch": 0.34839803082882737, + "grad_norm": 0.7447086572647095, + "learning_rate": 0.00017864639739458658, + "loss": 2.6965, + "step": 4317 + }, + { + "epoch": 0.3484787345654104, + "grad_norm": 0.7107378244400024, + "learning_rate": 0.00017863664585578942, + "loss": 2.7057, + "step": 4318 + }, + { + "epoch": 0.3485594383019934, + "grad_norm": 0.7372235655784607, + "learning_rate": 0.00017862689235716153, + "loss": 2.6289, + "step": 4319 + }, + { + "epoch": 0.3486401420385764, + "grad_norm": 0.7360481023788452, + "learning_rate": 0.00017861713689894593, + "loss": 2.7208, + "step": 4320 + }, + { + "epoch": 0.3487208457751594, + "grad_norm": 0.7378106713294983, + "learning_rate": 0.00017860737948138575, + "loss": 2.6836, + "step": 4321 + }, + { + "epoch": 0.3488015495117424, + "grad_norm": 0.7110548615455627, + "learning_rate": 0.00017859762010472423, + "loss": 2.6941, + "step": 4322 + }, + { + "epoch": 0.3488822532483254, + "grad_norm": 0.7419706583023071, + "learning_rate": 0.00017858785876920455, + "loss": 2.6591, + "step": 4323 + }, + { + "epoch": 0.3489629569849084, + "grad_norm": 0.7759542465209961, + "learning_rate": 0.00017857809547506997, + "loss": 2.6966, + "step": 4324 + }, + { + "epoch": 0.3490436607214914, + "grad_norm": 0.7894207239151001, + "learning_rate": 0.0001785683302225639, + "loss": 2.7298, + "step": 4325 + }, + { + "epoch": 0.34912436445807443, + "grad_norm": 0.7342399954795837, + "learning_rate": 0.0001785585630119296, + "loss": 2.6998, + "step": 4326 + }, + { + "epoch": 0.3492050681946574, + "grad_norm": 0.8684173822402954, + "learning_rate": 0.0001785487938434106, + "loss": 2.7179, + "step": 4327 + }, + { + "epoch": 0.34928577193124044, + "grad_norm": 0.7557523846626282, + "learning_rate": 0.00017853902271725033, + "loss": 2.7081, + "step": 4328 + }, + { + "epoch": 0.3493664756678234, + "grad_norm": 0.7910173535346985, + "learning_rate": 0.0001785292496336923, + "loss": 2.718, + "step": 4329 + }, + { + "epoch": 0.34944717940440645, + "grad_norm": 0.7878917455673218, + "learning_rate": 0.00017851947459298007, + "loss": 2.674, + "step": 4330 + }, + { + "epoch": 0.3495278831409894, + "grad_norm": 0.7290656566619873, + "learning_rate": 0.0001785096975953573, + "loss": 2.6962, + "step": 4331 + }, + { + "epoch": 0.34960858687757246, + "grad_norm": 0.8465737104415894, + "learning_rate": 0.00017849991864106763, + "loss": 2.6793, + "step": 4332 + }, + { + "epoch": 0.34968929061415543, + "grad_norm": 0.7183132171630859, + "learning_rate": 0.0001784901377303548, + "loss": 2.6902, + "step": 4333 + }, + { + "epoch": 0.34976999435073847, + "grad_norm": 0.7535461783409119, + "learning_rate": 0.00017848035486346255, + "loss": 2.7153, + "step": 4334 + }, + { + "epoch": 0.34985069808732144, + "grad_norm": 0.778734028339386, + "learning_rate": 0.0001784705700406347, + "loss": 2.6316, + "step": 4335 + }, + { + "epoch": 0.3499314018239044, + "grad_norm": 0.6937401294708252, + "learning_rate": 0.00017846078326211516, + "loss": 2.6902, + "step": 4336 + }, + { + "epoch": 0.35001210556048745, + "grad_norm": 0.7450751066207886, + "learning_rate": 0.00017845099452814774, + "loss": 2.6898, + "step": 4337 + }, + { + "epoch": 0.35009280929707043, + "grad_norm": 0.7535614967346191, + "learning_rate": 0.0001784412038389765, + "loss": 2.6969, + "step": 4338 + }, + { + "epoch": 0.35017351303365346, + "grad_norm": 0.6971385478973389, + "learning_rate": 0.00017843141119484543, + "loss": 2.6517, + "step": 4339 + }, + { + "epoch": 0.35025421677023644, + "grad_norm": 0.7233202457427979, + "learning_rate": 0.00017842161659599858, + "loss": 2.7332, + "step": 4340 + }, + { + "epoch": 0.35033492050681947, + "grad_norm": 0.7870340347290039, + "learning_rate": 0.00017841182004268, + "loss": 2.6485, + "step": 4341 + }, + { + "epoch": 0.35041562424340245, + "grad_norm": 0.7387053966522217, + "learning_rate": 0.0001784020215351339, + "loss": 2.6945, + "step": 4342 + }, + { + "epoch": 0.3504963279799855, + "grad_norm": 0.8357887268066406, + "learning_rate": 0.00017839222107360453, + "loss": 2.703, + "step": 4343 + }, + { + "epoch": 0.35057703171656845, + "grad_norm": 0.7197332978248596, + "learning_rate": 0.000178382418658336, + "loss": 2.6649, + "step": 4344 + }, + { + "epoch": 0.3506577354531515, + "grad_norm": 0.7416980862617493, + "learning_rate": 0.0001783726142895728, + "loss": 2.7393, + "step": 4345 + }, + { + "epoch": 0.35073843918973446, + "grad_norm": 0.6807832717895508, + "learning_rate": 0.00017836280796755912, + "loss": 2.6619, + "step": 4346 + }, + { + "epoch": 0.3508191429263175, + "grad_norm": 0.6858795285224915, + "learning_rate": 0.00017835299969253945, + "loss": 2.6266, + "step": 4347 + }, + { + "epoch": 0.35089984666290047, + "grad_norm": 0.8432363867759705, + "learning_rate": 0.0001783431894647582, + "loss": 2.6534, + "step": 4348 + }, + { + "epoch": 0.3509805503994835, + "grad_norm": 0.7240749001502991, + "learning_rate": 0.0001783333772844599, + "loss": 2.6851, + "step": 4349 + }, + { + "epoch": 0.3510612541360665, + "grad_norm": 0.7814531326293945, + "learning_rate": 0.00017832356315188906, + "loss": 2.7085, + "step": 4350 + }, + { + "epoch": 0.3511419578726495, + "grad_norm": 0.6989716291427612, + "learning_rate": 0.00017831374706729026, + "loss": 2.6674, + "step": 4351 + }, + { + "epoch": 0.3512226616092325, + "grad_norm": 0.7118446230888367, + "learning_rate": 0.0001783039290309082, + "loss": 2.6837, + "step": 4352 + }, + { + "epoch": 0.3513033653458155, + "grad_norm": 0.7641892433166504, + "learning_rate": 0.00017829410904298754, + "loss": 2.6415, + "step": 4353 + }, + { + "epoch": 0.3513840690823985, + "grad_norm": 0.6975794434547424, + "learning_rate": 0.000178284287103773, + "loss": 2.6679, + "step": 4354 + }, + { + "epoch": 0.35146477281898153, + "grad_norm": 0.7192546725273132, + "learning_rate": 0.00017827446321350943, + "loss": 2.6539, + "step": 4355 + }, + { + "epoch": 0.3515454765555645, + "grad_norm": 0.8749549388885498, + "learning_rate": 0.00017826463737244155, + "loss": 2.7254, + "step": 4356 + }, + { + "epoch": 0.35162618029214754, + "grad_norm": 0.8509732484817505, + "learning_rate": 0.0001782548095808144, + "loss": 2.7679, + "step": 4357 + }, + { + "epoch": 0.3517068840287305, + "grad_norm": 0.7647901773452759, + "learning_rate": 0.00017824497983887278, + "loss": 2.7049, + "step": 4358 + }, + { + "epoch": 0.35178758776531355, + "grad_norm": 0.7551973462104797, + "learning_rate": 0.00017823514814686178, + "loss": 2.7086, + "step": 4359 + }, + { + "epoch": 0.3518682915018965, + "grad_norm": 0.730140209197998, + "learning_rate": 0.00017822531450502633, + "loss": 2.6334, + "step": 4360 + }, + { + "epoch": 0.35194899523847956, + "grad_norm": 0.8210160136222839, + "learning_rate": 0.00017821547891361158, + "loss": 2.7248, + "step": 4361 + }, + { + "epoch": 0.35202969897506253, + "grad_norm": 0.761972963809967, + "learning_rate": 0.00017820564137286264, + "loss": 2.6502, + "step": 4362 + }, + { + "epoch": 0.35211040271164556, + "grad_norm": 0.7564061284065247, + "learning_rate": 0.00017819580188302466, + "loss": 2.6795, + "step": 4363 + }, + { + "epoch": 0.35219110644822854, + "grad_norm": 0.7382947206497192, + "learning_rate": 0.00017818596044434293, + "loss": 2.6754, + "step": 4364 + }, + { + "epoch": 0.3522718101848116, + "grad_norm": 0.737194836139679, + "learning_rate": 0.00017817611705706266, + "loss": 2.7098, + "step": 4365 + }, + { + "epoch": 0.35235251392139455, + "grad_norm": 0.7183281779289246, + "learning_rate": 0.0001781662717214292, + "loss": 2.6528, + "step": 4366 + }, + { + "epoch": 0.3524332176579776, + "grad_norm": 0.7785990238189697, + "learning_rate": 0.00017815642443768794, + "loss": 2.6419, + "step": 4367 + }, + { + "epoch": 0.35251392139456056, + "grad_norm": 0.7114452719688416, + "learning_rate": 0.00017814657520608427, + "loss": 2.7088, + "step": 4368 + }, + { + "epoch": 0.3525946251311436, + "grad_norm": 0.746969997882843, + "learning_rate": 0.00017813672402686365, + "loss": 2.7199, + "step": 4369 + }, + { + "epoch": 0.35267532886772657, + "grad_norm": 0.7700605988502502, + "learning_rate": 0.00017812687090027165, + "loss": 2.6713, + "step": 4370 + }, + { + "epoch": 0.3527560326043096, + "grad_norm": 0.7733504772186279, + "learning_rate": 0.0001781170158265538, + "loss": 2.6916, + "step": 4371 + }, + { + "epoch": 0.3528367363408926, + "grad_norm": 0.7769689559936523, + "learning_rate": 0.00017810715880595566, + "loss": 2.7787, + "step": 4372 + }, + { + "epoch": 0.3529174400774756, + "grad_norm": 0.7538996934890747, + "learning_rate": 0.000178097299838723, + "loss": 2.6964, + "step": 4373 + }, + { + "epoch": 0.3529981438140586, + "grad_norm": 0.7777890563011169, + "learning_rate": 0.00017808743892510146, + "loss": 2.6882, + "step": 4374 + }, + { + "epoch": 0.3530788475506416, + "grad_norm": 0.8331751823425293, + "learning_rate": 0.00017807757606533683, + "loss": 2.7113, + "step": 4375 + }, + { + "epoch": 0.3531595512872246, + "grad_norm": 0.8039207458496094, + "learning_rate": 0.00017806771125967492, + "loss": 2.6694, + "step": 4376 + }, + { + "epoch": 0.3532402550238076, + "grad_norm": 0.7727575898170471, + "learning_rate": 0.00017805784450836154, + "loss": 2.6639, + "step": 4377 + }, + { + "epoch": 0.3533209587603906, + "grad_norm": 0.8247967958450317, + "learning_rate": 0.00017804797581164264, + "loss": 2.6539, + "step": 4378 + }, + { + "epoch": 0.35340166249697363, + "grad_norm": 0.7574009299278259, + "learning_rate": 0.0001780381051697642, + "loss": 2.7163, + "step": 4379 + }, + { + "epoch": 0.3534823662335566, + "grad_norm": 0.7304368615150452, + "learning_rate": 0.0001780282325829721, + "loss": 2.5759, + "step": 4380 + }, + { + "epoch": 0.35356306997013964, + "grad_norm": 0.7133963704109192, + "learning_rate": 0.00017801835805151257, + "loss": 2.7008, + "step": 4381 + }, + { + "epoch": 0.3536437737067226, + "grad_norm": 0.7525407075881958, + "learning_rate": 0.00017800848157563157, + "loss": 2.6785, + "step": 4382 + }, + { + "epoch": 0.35372447744330565, + "grad_norm": 0.7306779623031616, + "learning_rate": 0.00017799860315557528, + "loss": 2.6454, + "step": 4383 + }, + { + "epoch": 0.35380518117988863, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.00017798872279158994, + "loss": 2.708, + "step": 4384 + }, + { + "epoch": 0.35388588491647166, + "grad_norm": 0.7655978202819824, + "learning_rate": 0.00017797884048392177, + "loss": 2.727, + "step": 4385 + }, + { + "epoch": 0.35396658865305464, + "grad_norm": 0.6802939176559448, + "learning_rate": 0.00017796895623281702, + "loss": 2.659, + "step": 4386 + }, + { + "epoch": 0.3540472923896376, + "grad_norm": 0.7191160917282104, + "learning_rate": 0.00017795907003852207, + "loss": 2.6335, + "step": 4387 + }, + { + "epoch": 0.35412799612622065, + "grad_norm": 0.7771886587142944, + "learning_rate": 0.00017794918190128337, + "loss": 2.6658, + "step": 4388 + }, + { + "epoch": 0.3542086998628036, + "grad_norm": 0.7133512496948242, + "learning_rate": 0.00017793929182134723, + "loss": 2.6701, + "step": 4389 + }, + { + "epoch": 0.35428940359938665, + "grad_norm": 0.7795221209526062, + "learning_rate": 0.00017792939979896022, + "loss": 2.6932, + "step": 4390 + }, + { + "epoch": 0.35437010733596963, + "grad_norm": 0.726767897605896, + "learning_rate": 0.00017791950583436887, + "loss": 2.676, + "step": 4391 + }, + { + "epoch": 0.35445081107255266, + "grad_norm": 0.7447288632392883, + "learning_rate": 0.00017790960992781972, + "loss": 2.7195, + "step": 4392 + }, + { + "epoch": 0.35453151480913564, + "grad_norm": 0.8053649663925171, + "learning_rate": 0.0001778997120795595, + "loss": 2.6851, + "step": 4393 + }, + { + "epoch": 0.35461221854571867, + "grad_norm": 0.7258884906768799, + "learning_rate": 0.00017788981228983474, + "loss": 2.6819, + "step": 4394 + }, + { + "epoch": 0.35469292228230165, + "grad_norm": 0.7279395461082458, + "learning_rate": 0.0001778799105588923, + "loss": 2.6954, + "step": 4395 + }, + { + "epoch": 0.3547736260188847, + "grad_norm": 0.7372962236404419, + "learning_rate": 0.0001778700068869789, + "loss": 2.7049, + "step": 4396 + }, + { + "epoch": 0.35485432975546766, + "grad_norm": 0.712003767490387, + "learning_rate": 0.00017786010127434135, + "loss": 2.7413, + "step": 4397 + }, + { + "epoch": 0.3549350334920507, + "grad_norm": 0.7487424612045288, + "learning_rate": 0.0001778501937212266, + "loss": 2.7231, + "step": 4398 + }, + { + "epoch": 0.35501573722863367, + "grad_norm": 0.73053377866745, + "learning_rate": 0.00017784028422788146, + "loss": 2.7029, + "step": 4399 + }, + { + "epoch": 0.3550964409652167, + "grad_norm": 0.697062611579895, + "learning_rate": 0.00017783037279455298, + "loss": 2.7139, + "step": 4400 + }, + { + "epoch": 0.3551771447017997, + "grad_norm": 0.7750880718231201, + "learning_rate": 0.00017782045942148819, + "loss": 2.6601, + "step": 4401 + }, + { + "epoch": 0.3552578484383827, + "grad_norm": 0.7124977111816406, + "learning_rate": 0.00017781054410893413, + "loss": 2.6119, + "step": 4402 + }, + { + "epoch": 0.3553385521749657, + "grad_norm": 0.7773111462593079, + "learning_rate": 0.00017780062685713785, + "loss": 2.7181, + "step": 4403 + }, + { + "epoch": 0.3554192559115487, + "grad_norm": 0.7282142639160156, + "learning_rate": 0.00017779070766634663, + "loss": 2.7141, + "step": 4404 + }, + { + "epoch": 0.3554999596481317, + "grad_norm": 0.8578598499298096, + "learning_rate": 0.0001777807865368076, + "loss": 2.7628, + "step": 4405 + }, + { + "epoch": 0.3555806633847147, + "grad_norm": 0.7126399874687195, + "learning_rate": 0.00017777086346876809, + "loss": 2.6914, + "step": 4406 + }, + { + "epoch": 0.3556613671212977, + "grad_norm": 0.8026365637779236, + "learning_rate": 0.00017776093846247533, + "loss": 2.7059, + "step": 4407 + }, + { + "epoch": 0.35574207085788073, + "grad_norm": 0.7839884161949158, + "learning_rate": 0.0001777510115181767, + "loss": 2.7265, + "step": 4408 + }, + { + "epoch": 0.3558227745944637, + "grad_norm": 0.7498767971992493, + "learning_rate": 0.00017774108263611966, + "loss": 2.7201, + "step": 4409 + }, + { + "epoch": 0.35590347833104674, + "grad_norm": 0.6996301412582397, + "learning_rate": 0.0001777311518165516, + "loss": 2.6271, + "step": 4410 + }, + { + "epoch": 0.3559841820676297, + "grad_norm": 0.7721461057662964, + "learning_rate": 0.00017772121905972003, + "loss": 2.6739, + "step": 4411 + }, + { + "epoch": 0.35606488580421275, + "grad_norm": 0.8018803000450134, + "learning_rate": 0.00017771128436587256, + "loss": 2.7092, + "step": 4412 + }, + { + "epoch": 0.3561455895407957, + "grad_norm": 0.7185639142990112, + "learning_rate": 0.0001777013477352567, + "loss": 2.6996, + "step": 4413 + }, + { + "epoch": 0.35622629327737876, + "grad_norm": 0.7218519449234009, + "learning_rate": 0.0001776914091681202, + "loss": 2.6555, + "step": 4414 + }, + { + "epoch": 0.35630699701396173, + "grad_norm": 0.7234479188919067, + "learning_rate": 0.00017768146866471062, + "loss": 2.6762, + "step": 4415 + }, + { + "epoch": 0.35638770075054477, + "grad_norm": 0.6723350286483765, + "learning_rate": 0.00017767152622527582, + "loss": 2.6272, + "step": 4416 + }, + { + "epoch": 0.35646840448712774, + "grad_norm": 0.7281947731971741, + "learning_rate": 0.00017766158185006356, + "loss": 2.7216, + "step": 4417 + }, + { + "epoch": 0.3565491082237108, + "grad_norm": 0.8350874781608582, + "learning_rate": 0.00017765163553932166, + "loss": 2.6619, + "step": 4418 + }, + { + "epoch": 0.35662981196029375, + "grad_norm": 0.7454007267951965, + "learning_rate": 0.00017764168729329801, + "loss": 2.6623, + "step": 4419 + }, + { + "epoch": 0.3567105156968768, + "grad_norm": 0.7419041395187378, + "learning_rate": 0.00017763173711224058, + "loss": 2.6773, + "step": 4420 + }, + { + "epoch": 0.35679121943345976, + "grad_norm": 0.7965987920761108, + "learning_rate": 0.0001776217849963973, + "loss": 2.6426, + "step": 4421 + }, + { + "epoch": 0.3568719231700428, + "grad_norm": 0.7093302607536316, + "learning_rate": 0.00017761183094601622, + "loss": 2.6745, + "step": 4422 + }, + { + "epoch": 0.35695262690662577, + "grad_norm": 0.7937216758728027, + "learning_rate": 0.00017760187496134548, + "loss": 2.7275, + "step": 4423 + }, + { + "epoch": 0.3570333306432088, + "grad_norm": 0.9185259938240051, + "learning_rate": 0.00017759191704263313, + "loss": 2.7055, + "step": 4424 + }, + { + "epoch": 0.3571140343797918, + "grad_norm": 0.7365124821662903, + "learning_rate": 0.00017758195719012743, + "loss": 2.6504, + "step": 4425 + }, + { + "epoch": 0.3571947381163748, + "grad_norm": 0.6992416977882385, + "learning_rate": 0.0001775719954040765, + "loss": 2.6684, + "step": 4426 + }, + { + "epoch": 0.3572754418529578, + "grad_norm": 0.7742372751235962, + "learning_rate": 0.00017756203168472866, + "loss": 2.6877, + "step": 4427 + }, + { + "epoch": 0.3573561455895408, + "grad_norm": 0.7448472380638123, + "learning_rate": 0.0001775520660323323, + "loss": 2.7027, + "step": 4428 + }, + { + "epoch": 0.3574368493261238, + "grad_norm": 0.7201915979385376, + "learning_rate": 0.00017754209844713569, + "loss": 2.7046, + "step": 4429 + }, + { + "epoch": 0.3575175530627068, + "grad_norm": 0.6675081253051758, + "learning_rate": 0.0001775321289293873, + "loss": 2.6503, + "step": 4430 + }, + { + "epoch": 0.3575982567992898, + "grad_norm": 0.7252706289291382, + "learning_rate": 0.0001775221574793356, + "loss": 2.6053, + "step": 4431 + }, + { + "epoch": 0.35767896053587284, + "grad_norm": 0.7134702801704407, + "learning_rate": 0.00017751218409722906, + "loss": 2.6857, + "step": 4432 + }, + { + "epoch": 0.3577596642724558, + "grad_norm": 0.7074102163314819, + "learning_rate": 0.0001775022087833163, + "loss": 2.6871, + "step": 4433 + }, + { + "epoch": 0.35784036800903885, + "grad_norm": 0.693520724773407, + "learning_rate": 0.00017749223153784588, + "loss": 2.6629, + "step": 4434 + }, + { + "epoch": 0.3579210717456218, + "grad_norm": 0.6933221817016602, + "learning_rate": 0.0001774822523610665, + "loss": 2.6793, + "step": 4435 + }, + { + "epoch": 0.35800177548220485, + "grad_norm": 0.75307297706604, + "learning_rate": 0.00017747227125322685, + "loss": 2.7012, + "step": 4436 + }, + { + "epoch": 0.35808247921878783, + "grad_norm": 0.7732915282249451, + "learning_rate": 0.0001774622882145757, + "loss": 2.6908, + "step": 4437 + }, + { + "epoch": 0.3581631829553708, + "grad_norm": 0.7067054510116577, + "learning_rate": 0.0001774523032453618, + "loss": 2.7494, + "step": 4438 + }, + { + "epoch": 0.35824388669195384, + "grad_norm": 0.7412838935852051, + "learning_rate": 0.00017744231634583406, + "loss": 2.6734, + "step": 4439 + }, + { + "epoch": 0.3583245904285368, + "grad_norm": 0.7663930654525757, + "learning_rate": 0.00017743232751624136, + "loss": 2.6952, + "step": 4440 + }, + { + "epoch": 0.35840529416511985, + "grad_norm": 0.70650714635849, + "learning_rate": 0.00017742233675683268, + "loss": 2.6806, + "step": 4441 + }, + { + "epoch": 0.3584859979017028, + "grad_norm": 0.698310375213623, + "learning_rate": 0.00017741234406785692, + "loss": 2.6471, + "step": 4442 + }, + { + "epoch": 0.35856670163828586, + "grad_norm": 0.7274026274681091, + "learning_rate": 0.00017740234944956323, + "loss": 2.6688, + "step": 4443 + }, + { + "epoch": 0.35864740537486883, + "grad_norm": 0.6944074034690857, + "learning_rate": 0.00017739235290220067, + "loss": 2.6954, + "step": 4444 + }, + { + "epoch": 0.35872810911145186, + "grad_norm": 0.841995358467102, + "learning_rate": 0.00017738235442601834, + "loss": 2.7169, + "step": 4445 + }, + { + "epoch": 0.35880881284803484, + "grad_norm": 0.74863201379776, + "learning_rate": 0.00017737235402126545, + "loss": 2.6534, + "step": 4446 + }, + { + "epoch": 0.3588895165846179, + "grad_norm": 0.7260422110557556, + "learning_rate": 0.00017736235168819126, + "loss": 2.6266, + "step": 4447 + }, + { + "epoch": 0.35897022032120085, + "grad_norm": 0.7450951337814331, + "learning_rate": 0.00017735234742704504, + "loss": 2.7328, + "step": 4448 + }, + { + "epoch": 0.3590509240577839, + "grad_norm": 0.6942493319511414, + "learning_rate": 0.00017734234123807614, + "loss": 2.7219, + "step": 4449 + }, + { + "epoch": 0.35913162779436686, + "grad_norm": 0.7676761746406555, + "learning_rate": 0.00017733233312153393, + "loss": 2.6594, + "step": 4450 + }, + { + "epoch": 0.3592123315309499, + "grad_norm": 0.7446104288101196, + "learning_rate": 0.00017732232307766778, + "loss": 2.6877, + "step": 4451 + }, + { + "epoch": 0.35929303526753287, + "grad_norm": 0.7551130056381226, + "learning_rate": 0.00017731231110672727, + "loss": 2.672, + "step": 4452 + }, + { + "epoch": 0.3593737390041159, + "grad_norm": 0.6876464486122131, + "learning_rate": 0.00017730229720896182, + "loss": 2.6658, + "step": 4453 + }, + { + "epoch": 0.3594544427406989, + "grad_norm": 0.6992844343185425, + "learning_rate": 0.00017729228138462107, + "loss": 2.6805, + "step": 4454 + }, + { + "epoch": 0.3595351464772819, + "grad_norm": 0.8437497615814209, + "learning_rate": 0.00017728226363395466, + "loss": 2.6884, + "step": 4455 + }, + { + "epoch": 0.3596158502138649, + "grad_norm": 0.7669322490692139, + "learning_rate": 0.00017727224395721217, + "loss": 2.6432, + "step": 4456 + }, + { + "epoch": 0.3596965539504479, + "grad_norm": 0.7613428831100464, + "learning_rate": 0.0001772622223546434, + "loss": 2.6124, + "step": 4457 + }, + { + "epoch": 0.3597772576870309, + "grad_norm": 0.719932496547699, + "learning_rate": 0.00017725219882649807, + "loss": 2.6623, + "step": 4458 + }, + { + "epoch": 0.3598579614236139, + "grad_norm": 0.7650800347328186, + "learning_rate": 0.000177242173373026, + "loss": 2.7551, + "step": 4459 + }, + { + "epoch": 0.3599386651601969, + "grad_norm": 0.7423754930496216, + "learning_rate": 0.0001772321459944771, + "loss": 2.7375, + "step": 4460 + }, + { + "epoch": 0.36001936889677993, + "grad_norm": 0.7602835297584534, + "learning_rate": 0.0001772221166911012, + "loss": 2.7086, + "step": 4461 + }, + { + "epoch": 0.3601000726333629, + "grad_norm": 0.7246943712234497, + "learning_rate": 0.00017721208546314827, + "loss": 2.7068, + "step": 4462 + }, + { + "epoch": 0.36018077636994594, + "grad_norm": 0.715965211391449, + "learning_rate": 0.00017720205231086837, + "loss": 2.689, + "step": 4463 + }, + { + "epoch": 0.3602614801065289, + "grad_norm": 0.7696218490600586, + "learning_rate": 0.00017719201723451151, + "loss": 2.611, + "step": 4464 + }, + { + "epoch": 0.36034218384311195, + "grad_norm": 0.7599236369132996, + "learning_rate": 0.00017718198023432779, + "loss": 2.6504, + "step": 4465 + }, + { + "epoch": 0.36042288757969493, + "grad_norm": 0.7674956321716309, + "learning_rate": 0.0001771719413105674, + "loss": 2.7559, + "step": 4466 + }, + { + "epoch": 0.36050359131627796, + "grad_norm": 0.7263289093971252, + "learning_rate": 0.00017716190046348045, + "loss": 2.6822, + "step": 4467 + }, + { + "epoch": 0.36058429505286094, + "grad_norm": 0.7564195990562439, + "learning_rate": 0.0001771518576933173, + "loss": 2.7319, + "step": 4468 + }, + { + "epoch": 0.36066499878944397, + "grad_norm": 0.7291253805160522, + "learning_rate": 0.00017714181300032813, + "loss": 2.704, + "step": 4469 + }, + { + "epoch": 0.36074570252602695, + "grad_norm": 0.7354169487953186, + "learning_rate": 0.00017713176638476332, + "loss": 2.6344, + "step": 4470 + }, + { + "epoch": 0.36082640626261, + "grad_norm": 0.7104110717773438, + "learning_rate": 0.0001771217178468733, + "loss": 2.665, + "step": 4471 + }, + { + "epoch": 0.36090710999919295, + "grad_norm": 0.6913934350013733, + "learning_rate": 0.00017711166738690847, + "loss": 2.6674, + "step": 4472 + }, + { + "epoch": 0.360987813735776, + "grad_norm": 0.7999634742736816, + "learning_rate": 0.0001771016150051193, + "loss": 2.6847, + "step": 4473 + }, + { + "epoch": 0.36106851747235896, + "grad_norm": 0.7878915667533875, + "learning_rate": 0.00017709156070175634, + "loss": 2.7125, + "step": 4474 + }, + { + "epoch": 0.361149221208942, + "grad_norm": 0.7145688533782959, + "learning_rate": 0.00017708150447707017, + "loss": 2.6863, + "step": 4475 + }, + { + "epoch": 0.36122992494552497, + "grad_norm": 0.7518604397773743, + "learning_rate": 0.00017707144633131143, + "loss": 2.6616, + "step": 4476 + }, + { + "epoch": 0.361310628682108, + "grad_norm": 0.735634982585907, + "learning_rate": 0.0001770613862647308, + "loss": 2.6315, + "step": 4477 + }, + { + "epoch": 0.361391332418691, + "grad_norm": 0.7925180196762085, + "learning_rate": 0.00017705132427757895, + "loss": 2.6951, + "step": 4478 + }, + { + "epoch": 0.361472036155274, + "grad_norm": 0.6949547529220581, + "learning_rate": 0.00017704126037010667, + "loss": 2.6934, + "step": 4479 + }, + { + "epoch": 0.361552739891857, + "grad_norm": 0.7233577966690063, + "learning_rate": 0.00017703119454256483, + "loss": 2.6773, + "step": 4480 + }, + { + "epoch": 0.36163344362844, + "grad_norm": 0.7303269505500793, + "learning_rate": 0.00017702112679520424, + "loss": 2.6351, + "step": 4481 + }, + { + "epoch": 0.361714147365023, + "grad_norm": 0.7620660066604614, + "learning_rate": 0.00017701105712827583, + "loss": 2.6748, + "step": 4482 + }, + { + "epoch": 0.36179485110160603, + "grad_norm": 0.7744965553283691, + "learning_rate": 0.00017700098554203057, + "loss": 2.7013, + "step": 4483 + }, + { + "epoch": 0.361875554838189, + "grad_norm": 0.8017357587814331, + "learning_rate": 0.00017699091203671947, + "loss": 2.7273, + "step": 4484 + }, + { + "epoch": 0.36195625857477204, + "grad_norm": 0.8014432191848755, + "learning_rate": 0.0001769808366125936, + "loss": 2.6864, + "step": 4485 + }, + { + "epoch": 0.362036962311355, + "grad_norm": 0.6914888620376587, + "learning_rate": 0.00017697075926990406, + "loss": 2.6851, + "step": 4486 + }, + { + "epoch": 0.36211766604793805, + "grad_norm": 0.7472698092460632, + "learning_rate": 0.00017696068000890196, + "loss": 2.695, + "step": 4487 + }, + { + "epoch": 0.362198369784521, + "grad_norm": 0.7506285309791565, + "learning_rate": 0.00017695059882983855, + "loss": 2.7055, + "step": 4488 + }, + { + "epoch": 0.362279073521104, + "grad_norm": 0.7501141428947449, + "learning_rate": 0.00017694051573296507, + "loss": 2.7109, + "step": 4489 + }, + { + "epoch": 0.36235977725768703, + "grad_norm": 0.6654670834541321, + "learning_rate": 0.00017693043071853284, + "loss": 2.6165, + "step": 4490 + }, + { + "epoch": 0.36244048099427, + "grad_norm": 0.7894664406776428, + "learning_rate": 0.00017692034378679315, + "loss": 2.7274, + "step": 4491 + }, + { + "epoch": 0.36252118473085304, + "grad_norm": 0.7206711173057556, + "learning_rate": 0.00017691025493799743, + "loss": 2.7047, + "step": 4492 + }, + { + "epoch": 0.362601888467436, + "grad_norm": 0.7656282186508179, + "learning_rate": 0.00017690016417239708, + "loss": 2.696, + "step": 4493 + }, + { + "epoch": 0.36268259220401905, + "grad_norm": 0.7357437610626221, + "learning_rate": 0.00017689007149024362, + "loss": 2.7279, + "step": 4494 + }, + { + "epoch": 0.362763295940602, + "grad_norm": 0.7262146472930908, + "learning_rate": 0.00017687997689178864, + "loss": 2.6964, + "step": 4495 + }, + { + "epoch": 0.36284399967718506, + "grad_norm": 0.7839891910552979, + "learning_rate": 0.00017686988037728365, + "loss": 2.651, + "step": 4496 + }, + { + "epoch": 0.36292470341376803, + "grad_norm": 0.7150306105613708, + "learning_rate": 0.00017685978194698028, + "loss": 2.6481, + "step": 4497 + }, + { + "epoch": 0.36300540715035107, + "grad_norm": 0.7144685387611389, + "learning_rate": 0.00017684968160113025, + "loss": 2.7169, + "step": 4498 + }, + { + "epoch": 0.36308611088693404, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00017683957933998525, + "loss": 2.7543, + "step": 4499 + }, + { + "epoch": 0.3631668146235171, + "grad_norm": 0.7301446199417114, + "learning_rate": 0.00017682947516379707, + "loss": 2.6806, + "step": 4500 + }, + { + "epoch": 0.36324751836010005, + "grad_norm": 0.7314243316650391, + "learning_rate": 0.00017681936907281757, + "loss": 2.7227, + "step": 4501 + }, + { + "epoch": 0.3633282220966831, + "grad_norm": 0.7695817351341248, + "learning_rate": 0.00017680926106729852, + "loss": 2.7229, + "step": 4502 + }, + { + "epoch": 0.36340892583326606, + "grad_norm": 0.6885762810707092, + "learning_rate": 0.00017679915114749198, + "loss": 2.7246, + "step": 4503 + }, + { + "epoch": 0.3634896295698491, + "grad_norm": 0.6893608570098877, + "learning_rate": 0.0001767890393136498, + "loss": 2.6572, + "step": 4504 + }, + { + "epoch": 0.36357033330643207, + "grad_norm": 0.7011978626251221, + "learning_rate": 0.00017677892556602402, + "loss": 2.6775, + "step": 4505 + }, + { + "epoch": 0.3636510370430151, + "grad_norm": 0.6693406105041504, + "learning_rate": 0.00017676880990486672, + "loss": 2.6183, + "step": 4506 + }, + { + "epoch": 0.3637317407795981, + "grad_norm": 0.7023048996925354, + "learning_rate": 0.00017675869233043002, + "loss": 2.6772, + "step": 4507 + }, + { + "epoch": 0.3638124445161811, + "grad_norm": 0.6903806328773499, + "learning_rate": 0.00017674857284296605, + "loss": 2.6486, + "step": 4508 + }, + { + "epoch": 0.3638931482527641, + "grad_norm": 0.6799258589744568, + "learning_rate": 0.000176738451442727, + "loss": 2.6305, + "step": 4509 + }, + { + "epoch": 0.3639738519893471, + "grad_norm": 0.7935682535171509, + "learning_rate": 0.00017672832812996517, + "loss": 2.7365, + "step": 4510 + }, + { + "epoch": 0.3640545557259301, + "grad_norm": 0.7593684196472168, + "learning_rate": 0.00017671820290493284, + "loss": 2.7029, + "step": 4511 + }, + { + "epoch": 0.36413525946251313, + "grad_norm": 0.7185288667678833, + "learning_rate": 0.00017670807576788234, + "loss": 2.6646, + "step": 4512 + }, + { + "epoch": 0.3642159631990961, + "grad_norm": 0.7260291576385498, + "learning_rate": 0.00017669794671906606, + "loss": 2.6615, + "step": 4513 + }, + { + "epoch": 0.36429666693567914, + "grad_norm": 0.6933417916297913, + "learning_rate": 0.00017668781575873646, + "loss": 2.6678, + "step": 4514 + }, + { + "epoch": 0.3643773706722621, + "grad_norm": 0.7657343149185181, + "learning_rate": 0.00017667768288714603, + "loss": 2.7155, + "step": 4515 + }, + { + "epoch": 0.36445807440884515, + "grad_norm": 0.7326949834823608, + "learning_rate": 0.0001766675481045473, + "loss": 2.732, + "step": 4516 + }, + { + "epoch": 0.3645387781454281, + "grad_norm": 0.7370324730873108, + "learning_rate": 0.0001766574114111929, + "loss": 2.6124, + "step": 4517 + }, + { + "epoch": 0.36461948188201115, + "grad_norm": 0.7280072569847107, + "learning_rate": 0.00017664727280733536, + "loss": 2.6793, + "step": 4518 + }, + { + "epoch": 0.36470018561859413, + "grad_norm": 0.7174237370491028, + "learning_rate": 0.00017663713229322748, + "loss": 2.629, + "step": 4519 + }, + { + "epoch": 0.36478088935517716, + "grad_norm": 0.6660771369934082, + "learning_rate": 0.0001766269898691219, + "loss": 2.6862, + "step": 4520 + }, + { + "epoch": 0.36486159309176014, + "grad_norm": 0.7024446725845337, + "learning_rate": 0.00017661684553527143, + "loss": 2.6602, + "step": 4521 + }, + { + "epoch": 0.36494229682834317, + "grad_norm": 0.7419618964195251, + "learning_rate": 0.0001766066992919289, + "loss": 2.6904, + "step": 4522 + }, + { + "epoch": 0.36502300056492615, + "grad_norm": 0.7425804138183594, + "learning_rate": 0.00017659655113934716, + "loss": 2.7312, + "step": 4523 + }, + { + "epoch": 0.3651037043015092, + "grad_norm": 0.7117013931274414, + "learning_rate": 0.00017658640107777915, + "loss": 2.6411, + "step": 4524 + }, + { + "epoch": 0.36518440803809216, + "grad_norm": 0.719613254070282, + "learning_rate": 0.00017657624910747782, + "loss": 2.6799, + "step": 4525 + }, + { + "epoch": 0.3652651117746752, + "grad_norm": 0.7654159665107727, + "learning_rate": 0.0001765660952286962, + "loss": 2.6675, + "step": 4526 + }, + { + "epoch": 0.36534581551125817, + "grad_norm": 0.7111814022064209, + "learning_rate": 0.00017655593944168734, + "loss": 2.6717, + "step": 4527 + }, + { + "epoch": 0.3654265192478412, + "grad_norm": 0.7494712471961975, + "learning_rate": 0.00017654578174670436, + "loss": 2.7181, + "step": 4528 + }, + { + "epoch": 0.3655072229844242, + "grad_norm": 0.8062291145324707, + "learning_rate": 0.0001765356221440004, + "loss": 2.6563, + "step": 4529 + }, + { + "epoch": 0.3655879267210072, + "grad_norm": 0.7923303842544556, + "learning_rate": 0.00017652546063382866, + "loss": 2.6295, + "step": 4530 + }, + { + "epoch": 0.3656686304575902, + "grad_norm": 0.7417340278625488, + "learning_rate": 0.00017651529721644238, + "loss": 2.6727, + "step": 4531 + }, + { + "epoch": 0.3657493341941732, + "grad_norm": 0.7326166033744812, + "learning_rate": 0.0001765051318920949, + "loss": 2.702, + "step": 4532 + }, + { + "epoch": 0.3658300379307562, + "grad_norm": 0.8133745193481445, + "learning_rate": 0.00017649496466103957, + "loss": 2.7157, + "step": 4533 + }, + { + "epoch": 0.3659107416673392, + "grad_norm": 0.710502564907074, + "learning_rate": 0.00017648479552352973, + "loss": 2.6668, + "step": 4534 + }, + { + "epoch": 0.3659914454039222, + "grad_norm": 0.6947012543678284, + "learning_rate": 0.00017647462447981885, + "loss": 2.6865, + "step": 4535 + }, + { + "epoch": 0.36607214914050523, + "grad_norm": 0.8432720899581909, + "learning_rate": 0.0001764644515301604, + "loss": 2.6226, + "step": 4536 + }, + { + "epoch": 0.3661528528770882, + "grad_norm": 0.7321269512176514, + "learning_rate": 0.00017645427667480802, + "loss": 2.662, + "step": 4537 + }, + { + "epoch": 0.36623355661367124, + "grad_norm": 0.8099743723869324, + "learning_rate": 0.00017644409991401515, + "loss": 2.6853, + "step": 4538 + }, + { + "epoch": 0.3663142603502542, + "grad_norm": 0.6885355114936829, + "learning_rate": 0.0001764339212480355, + "loss": 2.6672, + "step": 4539 + }, + { + "epoch": 0.3663949640868372, + "grad_norm": 0.911396324634552, + "learning_rate": 0.00017642374067712276, + "loss": 2.5778, + "step": 4540 + }, + { + "epoch": 0.3664756678234202, + "grad_norm": 0.7461941838264465, + "learning_rate": 0.0001764135582015306, + "loss": 2.6629, + "step": 4541 + }, + { + "epoch": 0.3665563715600032, + "grad_norm": 0.772741436958313, + "learning_rate": 0.0001764033738215128, + "loss": 2.725, + "step": 4542 + }, + { + "epoch": 0.36663707529658623, + "grad_norm": 0.7256152629852295, + "learning_rate": 0.0001763931875373232, + "loss": 2.6439, + "step": 4543 + }, + { + "epoch": 0.3667177790331692, + "grad_norm": 0.8089167475700378, + "learning_rate": 0.0001763829993492157, + "loss": 2.5972, + "step": 4544 + }, + { + "epoch": 0.36679848276975224, + "grad_norm": 0.7115232944488525, + "learning_rate": 0.0001763728092574442, + "loss": 2.633, + "step": 4545 + }, + { + "epoch": 0.3668791865063352, + "grad_norm": 0.7189347147941589, + "learning_rate": 0.00017636261726226266, + "loss": 2.619, + "step": 4546 + }, + { + "epoch": 0.36695989024291825, + "grad_norm": 0.7667742967605591, + "learning_rate": 0.00017635242336392506, + "loss": 2.667, + "step": 4547 + }, + { + "epoch": 0.36704059397950123, + "grad_norm": 0.7982457876205444, + "learning_rate": 0.00017634222756268545, + "loss": 2.6667, + "step": 4548 + }, + { + "epoch": 0.36712129771608426, + "grad_norm": 0.7465574145317078, + "learning_rate": 0.00017633202985879804, + "loss": 2.6436, + "step": 4549 + }, + { + "epoch": 0.36720200145266724, + "grad_norm": 0.7297804951667786, + "learning_rate": 0.00017632183025251686, + "loss": 2.6464, + "step": 4550 + }, + { + "epoch": 0.36728270518925027, + "grad_norm": 0.6885054111480713, + "learning_rate": 0.0001763116287440962, + "loss": 2.6742, + "step": 4551 + }, + { + "epoch": 0.36736340892583325, + "grad_norm": 0.7341574430465698, + "learning_rate": 0.00017630142533379023, + "loss": 2.6688, + "step": 4552 + }, + { + "epoch": 0.3674441126624163, + "grad_norm": 0.8565430045127869, + "learning_rate": 0.0001762912200218533, + "loss": 2.6889, + "step": 4553 + }, + { + "epoch": 0.36752481639899925, + "grad_norm": 0.7509489059448242, + "learning_rate": 0.00017628101280853974, + "loss": 2.6177, + "step": 4554 + }, + { + "epoch": 0.3676055201355823, + "grad_norm": 0.8128334879875183, + "learning_rate": 0.00017627080369410396, + "loss": 2.7301, + "step": 4555 + }, + { + "epoch": 0.36768622387216526, + "grad_norm": 0.7511637210845947, + "learning_rate": 0.00017626059267880035, + "loss": 2.7327, + "step": 4556 + }, + { + "epoch": 0.3677669276087483, + "grad_norm": 0.8350822925567627, + "learning_rate": 0.00017625037976288347, + "loss": 2.6073, + "step": 4557 + }, + { + "epoch": 0.36784763134533127, + "grad_norm": 0.7743313312530518, + "learning_rate": 0.00017624016494660776, + "loss": 2.7055, + "step": 4558 + }, + { + "epoch": 0.3679283350819143, + "grad_norm": 0.8196439146995544, + "learning_rate": 0.00017622994823022787, + "loss": 2.6565, + "step": 4559 + }, + { + "epoch": 0.3680090388184973, + "grad_norm": 0.7223393321037292, + "learning_rate": 0.00017621972961399837, + "loss": 2.68, + "step": 4560 + }, + { + "epoch": 0.3680897425550803, + "grad_norm": 0.7215418219566345, + "learning_rate": 0.000176209509098174, + "loss": 2.6627, + "step": 4561 + }, + { + "epoch": 0.3681704462916633, + "grad_norm": 0.8050473928451538, + "learning_rate": 0.00017619928668300946, + "loss": 2.5802, + "step": 4562 + }, + { + "epoch": 0.3682511500282463, + "grad_norm": 0.7452750205993652, + "learning_rate": 0.00017618906236875948, + "loss": 2.6524, + "step": 4563 + }, + { + "epoch": 0.3683318537648293, + "grad_norm": 0.7950742244720459, + "learning_rate": 0.00017617883615567888, + "loss": 2.6371, + "step": 4564 + }, + { + "epoch": 0.36841255750141233, + "grad_norm": 0.7185397744178772, + "learning_rate": 0.00017616860804402261, + "loss": 2.6531, + "step": 4565 + }, + { + "epoch": 0.3684932612379953, + "grad_norm": 0.7480553388595581, + "learning_rate": 0.0001761583780340455, + "loss": 2.6727, + "step": 4566 + }, + { + "epoch": 0.36857396497457834, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.00017614814612600251, + "loss": 2.6095, + "step": 4567 + }, + { + "epoch": 0.3686546687111613, + "grad_norm": 0.9159810543060303, + "learning_rate": 0.00017613791232014866, + "loss": 2.7039, + "step": 4568 + }, + { + "epoch": 0.36873537244774435, + "grad_norm": 0.7478305697441101, + "learning_rate": 0.00017612767661673905, + "loss": 2.6307, + "step": 4569 + }, + { + "epoch": 0.3688160761843273, + "grad_norm": 0.9154726266860962, + "learning_rate": 0.00017611743901602874, + "loss": 2.675, + "step": 4570 + }, + { + "epoch": 0.36889677992091036, + "grad_norm": 0.7903287410736084, + "learning_rate": 0.0001761071995182728, + "loss": 2.6938, + "step": 4571 + }, + { + "epoch": 0.36897748365749333, + "grad_norm": 0.7919119596481323, + "learning_rate": 0.0001760969581237266, + "loss": 2.7092, + "step": 4572 + }, + { + "epoch": 0.36905818739407636, + "grad_norm": 0.8052253723144531, + "learning_rate": 0.00017608671483264522, + "loss": 2.6914, + "step": 4573 + }, + { + "epoch": 0.36913889113065934, + "grad_norm": 0.7660435438156128, + "learning_rate": 0.00017607646964528403, + "loss": 2.674, + "step": 4574 + }, + { + "epoch": 0.3692195948672424, + "grad_norm": 0.8554383516311646, + "learning_rate": 0.00017606622256189836, + "loss": 2.6792, + "step": 4575 + }, + { + "epoch": 0.36930029860382535, + "grad_norm": 0.7719140648841858, + "learning_rate": 0.00017605597358274358, + "loss": 2.6836, + "step": 4576 + }, + { + "epoch": 0.3693810023404084, + "grad_norm": 0.733068585395813, + "learning_rate": 0.00017604572270807513, + "loss": 2.6496, + "step": 4577 + }, + { + "epoch": 0.36946170607699136, + "grad_norm": 0.7622445225715637, + "learning_rate": 0.00017603546993814849, + "loss": 2.7097, + "step": 4578 + }, + { + "epoch": 0.3695424098135744, + "grad_norm": 0.7326679825782776, + "learning_rate": 0.00017602521527321913, + "loss": 2.6786, + "step": 4579 + }, + { + "epoch": 0.36962311355015737, + "grad_norm": 0.7579432129859924, + "learning_rate": 0.00017601495871354272, + "loss": 2.6618, + "step": 4580 + }, + { + "epoch": 0.3697038172867404, + "grad_norm": 0.8812715411186218, + "learning_rate": 0.00017600470025937485, + "loss": 2.6942, + "step": 4581 + }, + { + "epoch": 0.3697845210233234, + "grad_norm": 0.7230449318885803, + "learning_rate": 0.00017599443991097116, + "loss": 2.6374, + "step": 4582 + }, + { + "epoch": 0.3698652247599064, + "grad_norm": 0.8347739577293396, + "learning_rate": 0.00017598417766858735, + "loss": 2.6653, + "step": 4583 + }, + { + "epoch": 0.3699459284964894, + "grad_norm": 0.7826598882675171, + "learning_rate": 0.0001759739135324792, + "loss": 2.6342, + "step": 4584 + }, + { + "epoch": 0.3700266322330724, + "grad_norm": 0.749060332775116, + "learning_rate": 0.00017596364750290254, + "loss": 2.7256, + "step": 4585 + }, + { + "epoch": 0.3701073359696554, + "grad_norm": 0.7470815181732178, + "learning_rate": 0.00017595337958011323, + "loss": 2.6485, + "step": 4586 + }, + { + "epoch": 0.3701880397062384, + "grad_norm": 0.7251530289649963, + "learning_rate": 0.00017594310976436716, + "loss": 2.6613, + "step": 4587 + }, + { + "epoch": 0.3702687434428214, + "grad_norm": 0.7143718004226685, + "learning_rate": 0.00017593283805592027, + "loss": 2.6101, + "step": 4588 + }, + { + "epoch": 0.37034944717940443, + "grad_norm": 0.7378203272819519, + "learning_rate": 0.00017592256445502855, + "loss": 2.6735, + "step": 4589 + }, + { + "epoch": 0.3704301509159874, + "grad_norm": 0.7193629741668701, + "learning_rate": 0.00017591228896194808, + "loss": 2.719, + "step": 4590 + }, + { + "epoch": 0.3705108546525704, + "grad_norm": 0.7377258539199829, + "learning_rate": 0.00017590201157693494, + "loss": 2.6789, + "step": 4591 + }, + { + "epoch": 0.3705915583891534, + "grad_norm": 0.7468351721763611, + "learning_rate": 0.00017589173230024522, + "loss": 2.6389, + "step": 4592 + }, + { + "epoch": 0.3706722621257364, + "grad_norm": 0.7612246870994568, + "learning_rate": 0.0001758814511321352, + "loss": 2.7045, + "step": 4593 + }, + { + "epoch": 0.37075296586231943, + "grad_norm": 0.7603838443756104, + "learning_rate": 0.00017587116807286102, + "loss": 2.7323, + "step": 4594 + }, + { + "epoch": 0.3708336695989024, + "grad_norm": 0.7436477541923523, + "learning_rate": 0.000175860883122679, + "loss": 2.7331, + "step": 4595 + }, + { + "epoch": 0.37091437333548544, + "grad_norm": 0.7004369497299194, + "learning_rate": 0.0001758505962818455, + "loss": 2.6418, + "step": 4596 + }, + { + "epoch": 0.3709950770720684, + "grad_norm": 0.711980938911438, + "learning_rate": 0.00017584030755061683, + "loss": 2.6184, + "step": 4597 + }, + { + "epoch": 0.37107578080865145, + "grad_norm": 0.6999367475509644, + "learning_rate": 0.0001758300169292495, + "loss": 2.6584, + "step": 4598 + }, + { + "epoch": 0.3711564845452344, + "grad_norm": 0.6755785942077637, + "learning_rate": 0.0001758197244179999, + "loss": 2.664, + "step": 4599 + }, + { + "epoch": 0.37123718828181745, + "grad_norm": 0.7174055576324463, + "learning_rate": 0.00017580943001712455, + "loss": 2.6821, + "step": 4600 + }, + { + "epoch": 0.37131789201840043, + "grad_norm": 0.8218933343887329, + "learning_rate": 0.00017579913372688005, + "loss": 2.6355, + "step": 4601 + }, + { + "epoch": 0.37139859575498346, + "grad_norm": 0.7417960166931152, + "learning_rate": 0.000175788835547523, + "loss": 2.7226, + "step": 4602 + }, + { + "epoch": 0.37147929949156644, + "grad_norm": 0.824421763420105, + "learning_rate": 0.00017577853547931006, + "loss": 2.6526, + "step": 4603 + }, + { + "epoch": 0.37156000322814947, + "grad_norm": 0.7391949892044067, + "learning_rate": 0.00017576823352249794, + "loss": 2.6702, + "step": 4604 + }, + { + "epoch": 0.37164070696473245, + "grad_norm": 0.7890247106552124, + "learning_rate": 0.00017575792967734337, + "loss": 2.7281, + "step": 4605 + }, + { + "epoch": 0.3717214107013155, + "grad_norm": 0.785527765750885, + "learning_rate": 0.00017574762394410317, + "loss": 2.6728, + "step": 4606 + }, + { + "epoch": 0.37180211443789846, + "grad_norm": 0.7195863127708435, + "learning_rate": 0.00017573731632303415, + "loss": 2.6329, + "step": 4607 + }, + { + "epoch": 0.3718828181744815, + "grad_norm": 0.7896780371665955, + "learning_rate": 0.0001757270068143932, + "loss": 2.6776, + "step": 4608 + }, + { + "epoch": 0.37196352191106447, + "grad_norm": 0.7568275332450867, + "learning_rate": 0.00017571669541843735, + "loss": 2.6668, + "step": 4609 + }, + { + "epoch": 0.3720442256476475, + "grad_norm": 0.7923939228057861, + "learning_rate": 0.00017570638213542348, + "loss": 2.7033, + "step": 4610 + }, + { + "epoch": 0.3721249293842305, + "grad_norm": 0.7586569786071777, + "learning_rate": 0.00017569606696560868, + "loss": 2.7286, + "step": 4611 + }, + { + "epoch": 0.3722056331208135, + "grad_norm": 0.8222009539604187, + "learning_rate": 0.00017568574990925004, + "loss": 2.6448, + "step": 4612 + }, + { + "epoch": 0.3722863368573965, + "grad_norm": 0.7144019603729248, + "learning_rate": 0.00017567543096660466, + "loss": 2.6671, + "step": 4613 + }, + { + "epoch": 0.3723670405939795, + "grad_norm": 0.7602240443229675, + "learning_rate": 0.00017566511013792973, + "loss": 2.6492, + "step": 4614 + }, + { + "epoch": 0.3724477443305625, + "grad_norm": 0.7949689626693726, + "learning_rate": 0.00017565478742348245, + "loss": 2.7002, + "step": 4615 + }, + { + "epoch": 0.3725284480671455, + "grad_norm": 0.6922519207000732, + "learning_rate": 0.00017564446282352012, + "loss": 2.6917, + "step": 4616 + }, + { + "epoch": 0.3726091518037285, + "grad_norm": 0.7382915616035461, + "learning_rate": 0.0001756341363383, + "loss": 2.6375, + "step": 4617 + }, + { + "epoch": 0.37268985554031153, + "grad_norm": 0.7511888742446899, + "learning_rate": 0.00017562380796807956, + "loss": 2.6823, + "step": 4618 + }, + { + "epoch": 0.3727705592768945, + "grad_norm": 0.7273457646369934, + "learning_rate": 0.00017561347771311608, + "loss": 2.6124, + "step": 4619 + }, + { + "epoch": 0.37285126301347754, + "grad_norm": 0.689440131187439, + "learning_rate": 0.0001756031455736671, + "loss": 2.6931, + "step": 4620 + }, + { + "epoch": 0.3729319667500605, + "grad_norm": 0.7755659222602844, + "learning_rate": 0.00017559281154999013, + "loss": 2.6273, + "step": 4621 + }, + { + "epoch": 0.37301267048664355, + "grad_norm": 0.6940193176269531, + "learning_rate": 0.00017558247564234265, + "loss": 2.641, + "step": 4622 + }, + { + "epoch": 0.3730933742232265, + "grad_norm": 0.7387529015541077, + "learning_rate": 0.00017557213785098232, + "loss": 2.7229, + "step": 4623 + }, + { + "epoch": 0.37317407795980956, + "grad_norm": 0.6807727217674255, + "learning_rate": 0.00017556179817616678, + "loss": 2.6469, + "step": 4624 + }, + { + "epoch": 0.37325478169639253, + "grad_norm": 0.7203819751739502, + "learning_rate": 0.0001755514566181537, + "loss": 2.6239, + "step": 4625 + }, + { + "epoch": 0.37333548543297557, + "grad_norm": 0.9345876574516296, + "learning_rate": 0.0001755411131772008, + "loss": 2.7154, + "step": 4626 + }, + { + "epoch": 0.37341618916955854, + "grad_norm": 0.6787357330322266, + "learning_rate": 0.00017553076785356594, + "loss": 2.6374, + "step": 4627 + }, + { + "epoch": 0.3734968929061416, + "grad_norm": 0.7153670191764832, + "learning_rate": 0.0001755204206475069, + "loss": 2.6734, + "step": 4628 + }, + { + "epoch": 0.37357759664272455, + "grad_norm": 0.736464262008667, + "learning_rate": 0.00017551007155928154, + "loss": 2.7241, + "step": 4629 + }, + { + "epoch": 0.3736583003793076, + "grad_norm": 0.7134939432144165, + "learning_rate": 0.0001754997205891478, + "loss": 2.682, + "step": 4630 + }, + { + "epoch": 0.37373900411589056, + "grad_norm": 0.7071199417114258, + "learning_rate": 0.0001754893677373637, + "loss": 2.7361, + "step": 4631 + }, + { + "epoch": 0.3738197078524736, + "grad_norm": 0.7040621638298035, + "learning_rate": 0.00017547901300418722, + "loss": 2.7031, + "step": 4632 + }, + { + "epoch": 0.37390041158905657, + "grad_norm": 0.7179287075996399, + "learning_rate": 0.00017546865638987642, + "loss": 2.6755, + "step": 4633 + }, + { + "epoch": 0.3739811153256396, + "grad_norm": 0.7579259276390076, + "learning_rate": 0.00017545829789468944, + "loss": 2.6514, + "step": 4634 + }, + { + "epoch": 0.3740618190622226, + "grad_norm": 0.7825835347175598, + "learning_rate": 0.0001754479375188844, + "loss": 2.6876, + "step": 4635 + }, + { + "epoch": 0.3741425227988056, + "grad_norm": 0.7913421988487244, + "learning_rate": 0.00017543757526271956, + "loss": 2.7153, + "step": 4636 + }, + { + "epoch": 0.3742232265353886, + "grad_norm": 0.7766042947769165, + "learning_rate": 0.00017542721112645313, + "loss": 2.645, + "step": 4637 + }, + { + "epoch": 0.3743039302719716, + "grad_norm": 0.7363953590393066, + "learning_rate": 0.00017541684511034343, + "loss": 2.6376, + "step": 4638 + }, + { + "epoch": 0.3743846340085546, + "grad_norm": 0.6928617358207703, + "learning_rate": 0.00017540647721464881, + "loss": 2.6882, + "step": 4639 + }, + { + "epoch": 0.3744653377451376, + "grad_norm": 0.7832257747650146, + "learning_rate": 0.0001753961074396277, + "loss": 2.7305, + "step": 4640 + }, + { + "epoch": 0.3745460414817206, + "grad_norm": 0.7180350422859192, + "learning_rate": 0.00017538573578553844, + "loss": 2.6783, + "step": 4641 + }, + { + "epoch": 0.3746267452183036, + "grad_norm": 0.718209981918335, + "learning_rate": 0.00017537536225263964, + "loss": 2.6961, + "step": 4642 + }, + { + "epoch": 0.3747074489548866, + "grad_norm": 0.7056655287742615, + "learning_rate": 0.00017536498684118975, + "loss": 2.7096, + "step": 4643 + }, + { + "epoch": 0.3747881526914696, + "grad_norm": 0.8004828691482544, + "learning_rate": 0.0001753546095514474, + "loss": 2.7168, + "step": 4644 + }, + { + "epoch": 0.3748688564280526, + "grad_norm": 0.7630821466445923, + "learning_rate": 0.0001753442303836712, + "loss": 2.7091, + "step": 4645 + }, + { + "epoch": 0.3749495601646356, + "grad_norm": 0.7539668083190918, + "learning_rate": 0.0001753338493381198, + "loss": 2.651, + "step": 4646 + }, + { + "epoch": 0.37503026390121863, + "grad_norm": 0.7243319749832153, + "learning_rate": 0.000175323466415052, + "loss": 2.6765, + "step": 4647 + }, + { + "epoch": 0.3751109676378016, + "grad_norm": 0.8906281590461731, + "learning_rate": 0.00017531308161472647, + "loss": 2.5938, + "step": 4648 + }, + { + "epoch": 0.37519167137438464, + "grad_norm": 0.787966251373291, + "learning_rate": 0.0001753026949374021, + "loss": 2.6011, + "step": 4649 + }, + { + "epoch": 0.3752723751109676, + "grad_norm": 0.7763915061950684, + "learning_rate": 0.00017529230638333772, + "loss": 2.7197, + "step": 4650 + }, + { + "epoch": 0.37535307884755065, + "grad_norm": 0.7717103362083435, + "learning_rate": 0.00017528191595279224, + "loss": 2.6605, + "step": 4651 + }, + { + "epoch": 0.3754337825841336, + "grad_norm": 0.7340055108070374, + "learning_rate": 0.00017527152364602464, + "loss": 2.6856, + "step": 4652 + }, + { + "epoch": 0.37551448632071666, + "grad_norm": 0.7805169820785522, + "learning_rate": 0.0001752611294632939, + "loss": 2.7088, + "step": 4653 + }, + { + "epoch": 0.37559519005729963, + "grad_norm": 0.7894891500473022, + "learning_rate": 0.00017525073340485912, + "loss": 2.6691, + "step": 4654 + }, + { + "epoch": 0.37567589379388266, + "grad_norm": 0.7627872824668884, + "learning_rate": 0.0001752403354709793, + "loss": 2.6536, + "step": 4655 + }, + { + "epoch": 0.37575659753046564, + "grad_norm": 0.8097225427627563, + "learning_rate": 0.00017522993566191367, + "loss": 2.7108, + "step": 4656 + }, + { + "epoch": 0.3758373012670487, + "grad_norm": 0.834449827671051, + "learning_rate": 0.00017521953397792137, + "loss": 2.7565, + "step": 4657 + }, + { + "epoch": 0.37591800500363165, + "grad_norm": 0.7924147844314575, + "learning_rate": 0.00017520913041926166, + "loss": 2.7101, + "step": 4658 + }, + { + "epoch": 0.3759987087402147, + "grad_norm": 0.7407249808311462, + "learning_rate": 0.00017519872498619385, + "loss": 2.6501, + "step": 4659 + }, + { + "epoch": 0.37607941247679766, + "grad_norm": 0.7251791954040527, + "learning_rate": 0.0001751883176789772, + "loss": 2.6786, + "step": 4660 + }, + { + "epoch": 0.3761601162133807, + "grad_norm": 0.7120431661605835, + "learning_rate": 0.00017517790849787116, + "loss": 2.7244, + "step": 4661 + }, + { + "epoch": 0.37624081994996367, + "grad_norm": 0.724836528301239, + "learning_rate": 0.00017516749744313513, + "loss": 2.7099, + "step": 4662 + }, + { + "epoch": 0.3763215236865467, + "grad_norm": 0.7788939476013184, + "learning_rate": 0.00017515708451502855, + "loss": 2.6206, + "step": 4663 + }, + { + "epoch": 0.3764022274231297, + "grad_norm": 0.7518914341926575, + "learning_rate": 0.00017514666971381099, + "loss": 2.7505, + "step": 4664 + }, + { + "epoch": 0.3764829311597127, + "grad_norm": 0.8004730939865112, + "learning_rate": 0.00017513625303974194, + "loss": 2.6119, + "step": 4665 + }, + { + "epoch": 0.3765636348962957, + "grad_norm": 0.7661109566688538, + "learning_rate": 0.00017512583449308107, + "loss": 2.724, + "step": 4666 + }, + { + "epoch": 0.3766443386328787, + "grad_norm": 0.7669692635536194, + "learning_rate": 0.00017511541407408805, + "loss": 2.7109, + "step": 4667 + }, + { + "epoch": 0.3767250423694617, + "grad_norm": 0.738608181476593, + "learning_rate": 0.00017510499178302253, + "loss": 2.6642, + "step": 4668 + }, + { + "epoch": 0.3768057461060447, + "grad_norm": 0.7194661498069763, + "learning_rate": 0.00017509456762014432, + "loss": 2.6906, + "step": 4669 + }, + { + "epoch": 0.3768864498426277, + "grad_norm": 0.7025040984153748, + "learning_rate": 0.00017508414158571314, + "loss": 2.6596, + "step": 4670 + }, + { + "epoch": 0.37696715357921073, + "grad_norm": 0.7756575345993042, + "learning_rate": 0.00017507371367998892, + "loss": 2.7114, + "step": 4671 + }, + { + "epoch": 0.3770478573157937, + "grad_norm": 0.834966778755188, + "learning_rate": 0.00017506328390323148, + "loss": 2.7554, + "step": 4672 + }, + { + "epoch": 0.37712856105237674, + "grad_norm": 0.6997280120849609, + "learning_rate": 0.0001750528522557008, + "loss": 2.6285, + "step": 4673 + }, + { + "epoch": 0.3772092647889597, + "grad_norm": 0.7101716995239258, + "learning_rate": 0.0001750424187376569, + "loss": 2.6465, + "step": 4674 + }, + { + "epoch": 0.37728996852554275, + "grad_norm": 0.6577222347259521, + "learning_rate": 0.0001750319833493597, + "loss": 2.6372, + "step": 4675 + }, + { + "epoch": 0.37737067226212573, + "grad_norm": 0.7402529120445251, + "learning_rate": 0.00017502154609106937, + "loss": 2.6464, + "step": 4676 + }, + { + "epoch": 0.37745137599870876, + "grad_norm": 0.6858490705490112, + "learning_rate": 0.00017501110696304596, + "loss": 2.6141, + "step": 4677 + }, + { + "epoch": 0.37753207973529174, + "grad_norm": 0.729468822479248, + "learning_rate": 0.0001750006659655497, + "loss": 2.6671, + "step": 4678 + }, + { + "epoch": 0.37761278347187477, + "grad_norm": 0.7197559475898743, + "learning_rate": 0.0001749902230988408, + "loss": 2.6462, + "step": 4679 + }, + { + "epoch": 0.37769348720845775, + "grad_norm": 0.7171144485473633, + "learning_rate": 0.00017497977836317957, + "loss": 2.6427, + "step": 4680 + }, + { + "epoch": 0.3777741909450408, + "grad_norm": 0.7423805594444275, + "learning_rate": 0.00017496933175882617, + "loss": 2.662, + "step": 4681 + }, + { + "epoch": 0.37785489468162375, + "grad_norm": 0.7498061060905457, + "learning_rate": 0.0001749588832860411, + "loss": 2.6243, + "step": 4682 + }, + { + "epoch": 0.3779355984182068, + "grad_norm": 0.7706165909767151, + "learning_rate": 0.0001749484329450847, + "loss": 2.6928, + "step": 4683 + }, + { + "epoch": 0.37801630215478976, + "grad_norm": 0.723363995552063, + "learning_rate": 0.00017493798073621745, + "loss": 2.6787, + "step": 4684 + }, + { + "epoch": 0.3780970058913728, + "grad_norm": 0.7444875836372375, + "learning_rate": 0.00017492752665969983, + "loss": 2.6789, + "step": 4685 + }, + { + "epoch": 0.37817770962795577, + "grad_norm": 0.6946491599082947, + "learning_rate": 0.00017491707071579237, + "loss": 2.6761, + "step": 4686 + }, + { + "epoch": 0.3782584133645388, + "grad_norm": 0.7171412706375122, + "learning_rate": 0.00017490661290475568, + "loss": 2.6788, + "step": 4687 + }, + { + "epoch": 0.3783391171011218, + "grad_norm": 0.7503272891044617, + "learning_rate": 0.00017489615322685038, + "loss": 2.7057, + "step": 4688 + }, + { + "epoch": 0.3784198208377048, + "grad_norm": 0.7458747625350952, + "learning_rate": 0.00017488569168233714, + "loss": 2.6857, + "step": 4689 + }, + { + "epoch": 0.3785005245742878, + "grad_norm": 0.7030516266822815, + "learning_rate": 0.0001748752282714768, + "loss": 2.6522, + "step": 4690 + }, + { + "epoch": 0.3785812283108708, + "grad_norm": 0.7717545628547668, + "learning_rate": 0.00017486476299452994, + "loss": 2.6527, + "step": 4691 + }, + { + "epoch": 0.3786619320474538, + "grad_norm": 0.6788322925567627, + "learning_rate": 0.0001748542958517575, + "loss": 2.6362, + "step": 4692 + }, + { + "epoch": 0.3787426357840368, + "grad_norm": 0.8518630266189575, + "learning_rate": 0.0001748438268434204, + "loss": 2.6812, + "step": 4693 + }, + { + "epoch": 0.3788233395206198, + "grad_norm": 0.7167141437530518, + "learning_rate": 0.00017483335596977945, + "loss": 2.6414, + "step": 4694 + }, + { + "epoch": 0.3789040432572028, + "grad_norm": 0.7748053073883057, + "learning_rate": 0.00017482288323109567, + "loss": 2.7291, + "step": 4695 + }, + { + "epoch": 0.3789847469937858, + "grad_norm": 0.7203041911125183, + "learning_rate": 0.00017481240862763002, + "loss": 2.6957, + "step": 4696 + }, + { + "epoch": 0.3790654507303688, + "grad_norm": 0.7973119020462036, + "learning_rate": 0.00017480193215964362, + "loss": 2.7456, + "step": 4697 + }, + { + "epoch": 0.3791461544669518, + "grad_norm": 0.7851223945617676, + "learning_rate": 0.00017479145382739755, + "loss": 2.6525, + "step": 4698 + }, + { + "epoch": 0.3792268582035348, + "grad_norm": 0.7012068629264832, + "learning_rate": 0.0001747809736311529, + "loss": 2.6662, + "step": 4699 + }, + { + "epoch": 0.37930756194011783, + "grad_norm": 0.7266128659248352, + "learning_rate": 0.00017477049157117093, + "loss": 2.5853, + "step": 4700 + }, + { + "epoch": 0.3793882656767008, + "grad_norm": 0.7264416217803955, + "learning_rate": 0.00017476000764771285, + "loss": 2.6972, + "step": 4701 + }, + { + "epoch": 0.37946896941328384, + "grad_norm": 0.797709047794342, + "learning_rate": 0.00017474952186103995, + "loss": 2.6997, + "step": 4702 + }, + { + "epoch": 0.3795496731498668, + "grad_norm": 0.7552568912506104, + "learning_rate": 0.00017473903421141358, + "loss": 2.7178, + "step": 4703 + }, + { + "epoch": 0.37963037688644985, + "grad_norm": 0.7611108422279358, + "learning_rate": 0.0001747285446990951, + "loss": 2.6997, + "step": 4704 + }, + { + "epoch": 0.3797110806230328, + "grad_norm": 0.8081753253936768, + "learning_rate": 0.00017471805332434595, + "loss": 2.7242, + "step": 4705 + }, + { + "epoch": 0.37979178435961586, + "grad_norm": 0.728301465511322, + "learning_rate": 0.0001747075600874276, + "loss": 2.5885, + "step": 4706 + }, + { + "epoch": 0.37987248809619883, + "grad_norm": 0.7548539638519287, + "learning_rate": 0.00017469706498860155, + "loss": 2.7038, + "step": 4707 + }, + { + "epoch": 0.37995319183278187, + "grad_norm": 0.7054354548454285, + "learning_rate": 0.00017468656802812938, + "loss": 2.6566, + "step": 4708 + }, + { + "epoch": 0.38003389556936484, + "grad_norm": 0.7231585383415222, + "learning_rate": 0.0001746760692062727, + "loss": 2.6564, + "step": 4709 + }, + { + "epoch": 0.3801145993059479, + "grad_norm": 0.6931934952735901, + "learning_rate": 0.00017466556852329318, + "loss": 2.6403, + "step": 4710 + }, + { + "epoch": 0.38019530304253085, + "grad_norm": 0.7882393598556519, + "learning_rate": 0.00017465506597945255, + "loss": 2.6337, + "step": 4711 + }, + { + "epoch": 0.3802760067791139, + "grad_norm": 0.7015109658241272, + "learning_rate": 0.0001746445615750125, + "loss": 2.6742, + "step": 4712 + }, + { + "epoch": 0.38035671051569686, + "grad_norm": 0.7653505802154541, + "learning_rate": 0.0001746340553102348, + "loss": 2.6742, + "step": 4713 + }, + { + "epoch": 0.3804374142522799, + "grad_norm": 0.7166270613670349, + "learning_rate": 0.0001746235471853814, + "loss": 2.5995, + "step": 4714 + }, + { + "epoch": 0.38051811798886287, + "grad_norm": 0.7612236738204956, + "learning_rate": 0.0001746130372007141, + "loss": 2.7595, + "step": 4715 + }, + { + "epoch": 0.3805988217254459, + "grad_norm": 0.6783852577209473, + "learning_rate": 0.00017460252535649493, + "loss": 2.6156, + "step": 4716 + }, + { + "epoch": 0.3806795254620289, + "grad_norm": 0.7495827078819275, + "learning_rate": 0.00017459201165298578, + "loss": 2.6847, + "step": 4717 + }, + { + "epoch": 0.3807602291986119, + "grad_norm": 0.814798891544342, + "learning_rate": 0.0001745814960904487, + "loss": 2.6211, + "step": 4718 + }, + { + "epoch": 0.3808409329351949, + "grad_norm": 0.7541367411613464, + "learning_rate": 0.0001745709786691458, + "loss": 2.6214, + "step": 4719 + }, + { + "epoch": 0.3809216366717779, + "grad_norm": 0.7065702676773071, + "learning_rate": 0.00017456045938933921, + "loss": 2.6699, + "step": 4720 + }, + { + "epoch": 0.3810023404083609, + "grad_norm": 0.751960813999176, + "learning_rate": 0.000174549938251291, + "loss": 2.6085, + "step": 4721 + }, + { + "epoch": 0.3810830441449439, + "grad_norm": 0.72068190574646, + "learning_rate": 0.00017453941525526353, + "loss": 2.6201, + "step": 4722 + }, + { + "epoch": 0.3811637478815269, + "grad_norm": 0.7201167941093445, + "learning_rate": 0.00017452889040151892, + "loss": 2.6775, + "step": 4723 + }, + { + "epoch": 0.38124445161810994, + "grad_norm": 0.7904958128929138, + "learning_rate": 0.00017451836369031956, + "loss": 2.7217, + "step": 4724 + }, + { + "epoch": 0.3813251553546929, + "grad_norm": 0.7096366882324219, + "learning_rate": 0.0001745078351219278, + "loss": 2.7004, + "step": 4725 + }, + { + "epoch": 0.38140585909127594, + "grad_norm": 0.6812441945075989, + "learning_rate": 0.00017449730469660602, + "loss": 2.6555, + "step": 4726 + }, + { + "epoch": 0.3814865628278589, + "grad_norm": 0.8037428855895996, + "learning_rate": 0.00017448677241461665, + "loss": 2.7094, + "step": 4727 + }, + { + "epoch": 0.38156726656444195, + "grad_norm": 0.7282679677009583, + "learning_rate": 0.00017447623827622223, + "loss": 2.6699, + "step": 4728 + }, + { + "epoch": 0.38164797030102493, + "grad_norm": 0.745705783367157, + "learning_rate": 0.00017446570228168523, + "loss": 2.6098, + "step": 4729 + }, + { + "epoch": 0.38172867403760796, + "grad_norm": 0.7098714113235474, + "learning_rate": 0.00017445516443126828, + "loss": 2.6628, + "step": 4730 + }, + { + "epoch": 0.38180937777419094, + "grad_norm": 0.7376620769500732, + "learning_rate": 0.00017444462472523405, + "loss": 2.7086, + "step": 4731 + }, + { + "epoch": 0.38189008151077397, + "grad_norm": 0.717800498008728, + "learning_rate": 0.00017443408316384512, + "loss": 2.6582, + "step": 4732 + }, + { + "epoch": 0.38197078524735695, + "grad_norm": 0.7061530947685242, + "learning_rate": 0.00017442353974736428, + "loss": 2.6817, + "step": 4733 + }, + { + "epoch": 0.38205148898394, + "grad_norm": 0.744667112827301, + "learning_rate": 0.0001744129944760543, + "loss": 2.6649, + "step": 4734 + }, + { + "epoch": 0.38213219272052296, + "grad_norm": 0.7302529215812683, + "learning_rate": 0.00017440244735017797, + "loss": 2.7313, + "step": 4735 + }, + { + "epoch": 0.382212896457106, + "grad_norm": 0.6845258474349976, + "learning_rate": 0.00017439189836999816, + "loss": 2.637, + "step": 4736 + }, + { + "epoch": 0.38229360019368896, + "grad_norm": 0.7060490250587463, + "learning_rate": 0.0001743813475357778, + "loss": 2.6674, + "step": 4737 + }, + { + "epoch": 0.382374303930272, + "grad_norm": 0.7146841287612915, + "learning_rate": 0.00017437079484777977, + "loss": 2.6607, + "step": 4738 + }, + { + "epoch": 0.382455007666855, + "grad_norm": 0.7107662558555603, + "learning_rate": 0.00017436024030626719, + "loss": 2.6777, + "step": 4739 + }, + { + "epoch": 0.382535711403438, + "grad_norm": 0.7356777191162109, + "learning_rate": 0.00017434968391150303, + "loss": 2.5801, + "step": 4740 + }, + { + "epoch": 0.382616415140021, + "grad_norm": 0.6839054226875305, + "learning_rate": 0.00017433912566375037, + "loss": 2.6319, + "step": 4741 + }, + { + "epoch": 0.382697118876604, + "grad_norm": 0.7049627900123596, + "learning_rate": 0.00017432856556327236, + "loss": 2.741, + "step": 4742 + }, + { + "epoch": 0.382777822613187, + "grad_norm": 0.7926551103591919, + "learning_rate": 0.00017431800361033224, + "loss": 2.64, + "step": 4743 + }, + { + "epoch": 0.38285852634976997, + "grad_norm": 0.734272301197052, + "learning_rate": 0.0001743074398051932, + "loss": 2.6575, + "step": 4744 + }, + { + "epoch": 0.382939230086353, + "grad_norm": 0.6959543824195862, + "learning_rate": 0.00017429687414811847, + "loss": 2.664, + "step": 4745 + }, + { + "epoch": 0.383019933822936, + "grad_norm": 0.7258255481719971, + "learning_rate": 0.00017428630663937148, + "loss": 2.6597, + "step": 4746 + }, + { + "epoch": 0.383100637559519, + "grad_norm": 0.8067473769187927, + "learning_rate": 0.0001742757372792155, + "loss": 2.6798, + "step": 4747 + }, + { + "epoch": 0.383181341296102, + "grad_norm": 0.7000626921653748, + "learning_rate": 0.000174265166067914, + "loss": 2.6561, + "step": 4748 + }, + { + "epoch": 0.383262045032685, + "grad_norm": 0.818914532661438, + "learning_rate": 0.00017425459300573045, + "loss": 2.6491, + "step": 4749 + }, + { + "epoch": 0.383342748769268, + "grad_norm": 0.7060543298721313, + "learning_rate": 0.00017424401809292833, + "loss": 2.6825, + "step": 4750 + }, + { + "epoch": 0.383423452505851, + "grad_norm": 0.893488883972168, + "learning_rate": 0.0001742334413297712, + "loss": 2.7201, + "step": 4751 + }, + { + "epoch": 0.383504156242434, + "grad_norm": 0.8131078481674194, + "learning_rate": 0.00017422286271652265, + "loss": 2.7828, + "step": 4752 + }, + { + "epoch": 0.38358485997901703, + "grad_norm": 0.7735587954521179, + "learning_rate": 0.00017421228225344634, + "loss": 2.6489, + "step": 4753 + }, + { + "epoch": 0.3836655637156, + "grad_norm": 0.713800311088562, + "learning_rate": 0.000174201699940806, + "loss": 2.6686, + "step": 4754 + }, + { + "epoch": 0.38374626745218304, + "grad_norm": 0.8246580362319946, + "learning_rate": 0.00017419111577886528, + "loss": 2.6771, + "step": 4755 + }, + { + "epoch": 0.383826971188766, + "grad_norm": 0.694542646408081, + "learning_rate": 0.00017418052976788805, + "loss": 2.6632, + "step": 4756 + }, + { + "epoch": 0.38390767492534905, + "grad_norm": 0.7200453281402588, + "learning_rate": 0.0001741699419081381, + "loss": 2.6386, + "step": 4757 + }, + { + "epoch": 0.38398837866193203, + "grad_norm": 0.7002073526382446, + "learning_rate": 0.00017415935219987933, + "loss": 2.6399, + "step": 4758 + }, + { + "epoch": 0.38406908239851506, + "grad_norm": 0.7056967616081238, + "learning_rate": 0.00017414876064337565, + "loss": 2.7048, + "step": 4759 + }, + { + "epoch": 0.38414978613509804, + "grad_norm": 0.7406448721885681, + "learning_rate": 0.000174138167238891, + "loss": 2.6256, + "step": 4760 + }, + { + "epoch": 0.38423048987168107, + "grad_norm": 0.7280529737472534, + "learning_rate": 0.00017412757198668945, + "loss": 2.6393, + "step": 4761 + }, + { + "epoch": 0.38431119360826405, + "grad_norm": 0.7626908421516418, + "learning_rate": 0.00017411697488703502, + "loss": 2.6717, + "step": 4762 + }, + { + "epoch": 0.3843918973448471, + "grad_norm": 0.716345489025116, + "learning_rate": 0.00017410637594019184, + "loss": 2.6457, + "step": 4763 + }, + { + "epoch": 0.38447260108143005, + "grad_norm": 0.8825077414512634, + "learning_rate": 0.00017409577514642405, + "loss": 2.7042, + "step": 4764 + }, + { + "epoch": 0.3845533048180131, + "grad_norm": 0.7301186919212341, + "learning_rate": 0.00017408517250599585, + "loss": 2.7065, + "step": 4765 + }, + { + "epoch": 0.38463400855459606, + "grad_norm": 0.8235788345336914, + "learning_rate": 0.0001740745680191715, + "loss": 2.6315, + "step": 4766 + }, + { + "epoch": 0.3847147122911791, + "grad_norm": 0.7355515956878662, + "learning_rate": 0.00017406396168621527, + "loss": 2.6939, + "step": 4767 + }, + { + "epoch": 0.38479541602776207, + "grad_norm": 0.6781682372093201, + "learning_rate": 0.0001740533535073915, + "loss": 2.6071, + "step": 4768 + }, + { + "epoch": 0.3848761197643451, + "grad_norm": 0.801191508769989, + "learning_rate": 0.0001740427434829646, + "loss": 2.6635, + "step": 4769 + }, + { + "epoch": 0.3849568235009281, + "grad_norm": 0.759682297706604, + "learning_rate": 0.00017403213161319903, + "loss": 2.6823, + "step": 4770 + }, + { + "epoch": 0.3850375272375111, + "grad_norm": 0.806498110294342, + "learning_rate": 0.00017402151789835916, + "loss": 2.7111, + "step": 4771 + }, + { + "epoch": 0.3851182309740941, + "grad_norm": 0.7677996158599854, + "learning_rate": 0.00017401090233870958, + "loss": 2.6701, + "step": 4772 + }, + { + "epoch": 0.3851989347106771, + "grad_norm": 0.7449933290481567, + "learning_rate": 0.00017400028493451487, + "loss": 2.7037, + "step": 4773 + }, + { + "epoch": 0.3852796384472601, + "grad_norm": 0.7506107091903687, + "learning_rate": 0.0001739896656860396, + "loss": 2.6587, + "step": 4774 + }, + { + "epoch": 0.38536034218384313, + "grad_norm": 0.8781036734580994, + "learning_rate": 0.00017397904459354844, + "loss": 2.7634, + "step": 4775 + }, + { + "epoch": 0.3854410459204261, + "grad_norm": 0.7067514657974243, + "learning_rate": 0.0001739684216573061, + "loss": 2.638, + "step": 4776 + }, + { + "epoch": 0.38552174965700914, + "grad_norm": 0.7742886543273926, + "learning_rate": 0.00017395779687757735, + "loss": 2.7043, + "step": 4777 + }, + { + "epoch": 0.3856024533935921, + "grad_norm": 0.7348291277885437, + "learning_rate": 0.00017394717025462697, + "loss": 2.7404, + "step": 4778 + }, + { + "epoch": 0.38568315713017515, + "grad_norm": 0.7449346780776978, + "learning_rate": 0.00017393654178871984, + "loss": 2.631, + "step": 4779 + }, + { + "epoch": 0.3857638608667581, + "grad_norm": 0.7191200256347656, + "learning_rate": 0.00017392591148012078, + "loss": 2.6776, + "step": 4780 + }, + { + "epoch": 0.38584456460334116, + "grad_norm": 0.7055533528327942, + "learning_rate": 0.00017391527932909476, + "loss": 2.6219, + "step": 4781 + }, + { + "epoch": 0.38592526833992413, + "grad_norm": 0.73755943775177, + "learning_rate": 0.0001739046453359068, + "loss": 2.6692, + "step": 4782 + }, + { + "epoch": 0.38600597207650716, + "grad_norm": 0.7469369769096375, + "learning_rate": 0.00017389400950082185, + "loss": 2.6572, + "step": 4783 + }, + { + "epoch": 0.38608667581309014, + "grad_norm": 0.7552534341812134, + "learning_rate": 0.00017388337182410504, + "loss": 2.6853, + "step": 4784 + }, + { + "epoch": 0.3861673795496732, + "grad_norm": 0.7453532814979553, + "learning_rate": 0.00017387273230602145, + "loss": 2.6601, + "step": 4785 + }, + { + "epoch": 0.38624808328625615, + "grad_norm": 0.7259301543235779, + "learning_rate": 0.0001738620909468363, + "loss": 2.6997, + "step": 4786 + }, + { + "epoch": 0.3863287870228392, + "grad_norm": 0.6970019936561584, + "learning_rate": 0.00017385144774681476, + "loss": 2.7497, + "step": 4787 + }, + { + "epoch": 0.38640949075942216, + "grad_norm": 0.7172032594680786, + "learning_rate": 0.00017384080270622208, + "loss": 2.7182, + "step": 4788 + }, + { + "epoch": 0.3864901944960052, + "grad_norm": 0.7184371948242188, + "learning_rate": 0.00017383015582532357, + "loss": 2.6358, + "step": 4789 + }, + { + "epoch": 0.38657089823258817, + "grad_norm": 0.7302096486091614, + "learning_rate": 0.00017381950710438458, + "loss": 2.6066, + "step": 4790 + }, + { + "epoch": 0.3866516019691712, + "grad_norm": 0.7043540477752686, + "learning_rate": 0.00017380885654367053, + "loss": 2.699, + "step": 4791 + }, + { + "epoch": 0.3867323057057542, + "grad_norm": 0.6919732689857483, + "learning_rate": 0.0001737982041434468, + "loss": 2.6025, + "step": 4792 + }, + { + "epoch": 0.3868130094423372, + "grad_norm": 0.7277705669403076, + "learning_rate": 0.00017378754990397894, + "loss": 2.6764, + "step": 4793 + }, + { + "epoch": 0.3868937131789202, + "grad_norm": 0.7546190619468689, + "learning_rate": 0.00017377689382553247, + "loss": 2.5865, + "step": 4794 + }, + { + "epoch": 0.38697441691550316, + "grad_norm": 0.7636401653289795, + "learning_rate": 0.00017376623590837294, + "loss": 2.6488, + "step": 4795 + }, + { + "epoch": 0.3870551206520862, + "grad_norm": 0.6945658922195435, + "learning_rate": 0.00017375557615276595, + "loss": 2.6739, + "step": 4796 + }, + { + "epoch": 0.38713582438866917, + "grad_norm": 0.7503637075424194, + "learning_rate": 0.00017374491455897722, + "loss": 2.6854, + "step": 4797 + }, + { + "epoch": 0.3872165281252522, + "grad_norm": 0.7457373142242432, + "learning_rate": 0.00017373425112727247, + "loss": 2.6659, + "step": 4798 + }, + { + "epoch": 0.3872972318618352, + "grad_norm": 0.7742534875869751, + "learning_rate": 0.0001737235858579174, + "loss": 2.6461, + "step": 4799 + }, + { + "epoch": 0.3873779355984182, + "grad_norm": 0.7397909760475159, + "learning_rate": 0.0001737129187511779, + "loss": 2.6779, + "step": 4800 + }, + { + "epoch": 0.3874586393350012, + "grad_norm": 0.7922031879425049, + "learning_rate": 0.00017370224980731974, + "loss": 2.6417, + "step": 4801 + }, + { + "epoch": 0.3875393430715842, + "grad_norm": 0.8503968715667725, + "learning_rate": 0.00017369157902660887, + "loss": 2.7063, + "step": 4802 + }, + { + "epoch": 0.3876200468081672, + "grad_norm": 0.7143701314926147, + "learning_rate": 0.00017368090640931125, + "loss": 2.6152, + "step": 4803 + }, + { + "epoch": 0.38770075054475023, + "grad_norm": 0.8016753196716309, + "learning_rate": 0.0001736702319556928, + "loss": 2.6005, + "step": 4804 + }, + { + "epoch": 0.3877814542813332, + "grad_norm": 0.7329538464546204, + "learning_rate": 0.00017365955566601962, + "loss": 2.6027, + "step": 4805 + }, + { + "epoch": 0.38786215801791624, + "grad_norm": 0.7005148530006409, + "learning_rate": 0.00017364887754055773, + "loss": 2.6585, + "step": 4806 + }, + { + "epoch": 0.3879428617544992, + "grad_norm": 0.7092769145965576, + "learning_rate": 0.00017363819757957333, + "loss": 2.6763, + "step": 4807 + }, + { + "epoch": 0.38802356549108225, + "grad_norm": 0.7475202679634094, + "learning_rate": 0.0001736275157833325, + "loss": 2.5969, + "step": 4808 + }, + { + "epoch": 0.3881042692276652, + "grad_norm": 0.822496235370636, + "learning_rate": 0.0001736168321521016, + "loss": 2.6758, + "step": 4809 + }, + { + "epoch": 0.38818497296424825, + "grad_norm": 0.7756842374801636, + "learning_rate": 0.0001736061466861467, + "loss": 2.6676, + "step": 4810 + }, + { + "epoch": 0.38826567670083123, + "grad_norm": 0.7192497849464417, + "learning_rate": 0.00017359545938573428, + "loss": 2.7045, + "step": 4811 + }, + { + "epoch": 0.38834638043741426, + "grad_norm": 0.7064149379730225, + "learning_rate": 0.00017358477025113063, + "loss": 2.6169, + "step": 4812 + }, + { + "epoch": 0.38842708417399724, + "grad_norm": 0.7297258973121643, + "learning_rate": 0.00017357407928260215, + "loss": 2.612, + "step": 4813 + }, + { + "epoch": 0.38850778791058027, + "grad_norm": 0.7011935114860535, + "learning_rate": 0.00017356338648041528, + "loss": 2.6507, + "step": 4814 + }, + { + "epoch": 0.38858849164716325, + "grad_norm": 0.7647256255149841, + "learning_rate": 0.00017355269184483651, + "loss": 2.6838, + "step": 4815 + }, + { + "epoch": 0.3886691953837463, + "grad_norm": 0.690182089805603, + "learning_rate": 0.0001735419953761324, + "loss": 2.6996, + "step": 4816 + }, + { + "epoch": 0.38874989912032926, + "grad_norm": 0.7142173647880554, + "learning_rate": 0.00017353129707456955, + "loss": 2.6705, + "step": 4817 + }, + { + "epoch": 0.3888306028569123, + "grad_norm": 0.801369309425354, + "learning_rate": 0.00017352059694041456, + "loss": 2.7002, + "step": 4818 + }, + { + "epoch": 0.38891130659349527, + "grad_norm": 0.7021649479866028, + "learning_rate": 0.0001735098949739341, + "loss": 2.7042, + "step": 4819 + }, + { + "epoch": 0.3889920103300783, + "grad_norm": 0.6802586317062378, + "learning_rate": 0.00017349919117539488, + "loss": 2.7186, + "step": 4820 + }, + { + "epoch": 0.3890727140666613, + "grad_norm": 0.7723212838172913, + "learning_rate": 0.0001734884855450637, + "loss": 2.608, + "step": 4821 + }, + { + "epoch": 0.3891534178032443, + "grad_norm": 0.7037193179130554, + "learning_rate": 0.00017347777808320735, + "loss": 2.6198, + "step": 4822 + }, + { + "epoch": 0.3892341215398273, + "grad_norm": 0.7172731161117554, + "learning_rate": 0.00017346706879009272, + "loss": 2.7037, + "step": 4823 + }, + { + "epoch": 0.3893148252764103, + "grad_norm": 0.7421539425849915, + "learning_rate": 0.00017345635766598667, + "loss": 2.6619, + "step": 4824 + }, + { + "epoch": 0.3893955290129933, + "grad_norm": 0.7587071061134338, + "learning_rate": 0.0001734456447111562, + "loss": 2.6229, + "step": 4825 + }, + { + "epoch": 0.3894762327495763, + "grad_norm": 0.6981459259986877, + "learning_rate": 0.00017343492992586822, + "loss": 2.5927, + "step": 4826 + }, + { + "epoch": 0.3895569364861593, + "grad_norm": 0.7628491520881653, + "learning_rate": 0.00017342421331038987, + "loss": 2.7047, + "step": 4827 + }, + { + "epoch": 0.38963764022274233, + "grad_norm": 0.8005064129829407, + "learning_rate": 0.00017341349486498818, + "loss": 2.6918, + "step": 4828 + }, + { + "epoch": 0.3897183439593253, + "grad_norm": 0.7756431102752686, + "learning_rate": 0.0001734027745899303, + "loss": 2.6621, + "step": 4829 + }, + { + "epoch": 0.38979904769590834, + "grad_norm": 0.7317833304405212, + "learning_rate": 0.00017339205248548338, + "loss": 2.7134, + "step": 4830 + }, + { + "epoch": 0.3898797514324913, + "grad_norm": 0.7293959259986877, + "learning_rate": 0.0001733813285519147, + "loss": 2.6865, + "step": 4831 + }, + { + "epoch": 0.38996045516907435, + "grad_norm": 0.7120299935340881, + "learning_rate": 0.00017337060278949147, + "loss": 2.6915, + "step": 4832 + }, + { + "epoch": 0.3900411589056573, + "grad_norm": 0.7255397439002991, + "learning_rate": 0.00017335987519848103, + "loss": 2.6671, + "step": 4833 + }, + { + "epoch": 0.39012186264224036, + "grad_norm": 0.7849408388137817, + "learning_rate": 0.0001733491457791507, + "loss": 2.6301, + "step": 4834 + }, + { + "epoch": 0.39020256637882333, + "grad_norm": 0.6998472809791565, + "learning_rate": 0.00017333841453176797, + "loss": 2.6587, + "step": 4835 + }, + { + "epoch": 0.39028327011540637, + "grad_norm": 0.7530023455619812, + "learning_rate": 0.00017332768145660024, + "loss": 2.7011, + "step": 4836 + }, + { + "epoch": 0.39036397385198934, + "grad_norm": 0.7251207828521729, + "learning_rate": 0.00017331694655391497, + "loss": 2.6416, + "step": 4837 + }, + { + "epoch": 0.3904446775885724, + "grad_norm": 0.7016854882240295, + "learning_rate": 0.00017330620982397975, + "loss": 2.7224, + "step": 4838 + }, + { + "epoch": 0.39052538132515535, + "grad_norm": 0.7253310084342957, + "learning_rate": 0.00017329547126706217, + "loss": 2.6747, + "step": 4839 + }, + { + "epoch": 0.3906060850617384, + "grad_norm": 0.7114601731300354, + "learning_rate": 0.00017328473088342987, + "loss": 2.6654, + "step": 4840 + }, + { + "epoch": 0.39068678879832136, + "grad_norm": 0.7773289680480957, + "learning_rate": 0.00017327398867335048, + "loss": 2.6625, + "step": 4841 + }, + { + "epoch": 0.3907674925349044, + "grad_norm": 0.7541868686676025, + "learning_rate": 0.00017326324463709175, + "loss": 2.667, + "step": 4842 + }, + { + "epoch": 0.39084819627148737, + "grad_norm": 0.8095890283584595, + "learning_rate": 0.00017325249877492147, + "loss": 2.706, + "step": 4843 + }, + { + "epoch": 0.3909289000080704, + "grad_norm": 0.7019474506378174, + "learning_rate": 0.00017324175108710742, + "loss": 2.6125, + "step": 4844 + }, + { + "epoch": 0.3910096037446534, + "grad_norm": 0.7055396437644958, + "learning_rate": 0.00017323100157391746, + "loss": 2.6373, + "step": 4845 + }, + { + "epoch": 0.39109030748123635, + "grad_norm": 0.7332476377487183, + "learning_rate": 0.00017322025023561955, + "loss": 2.6559, + "step": 4846 + }, + { + "epoch": 0.3911710112178194, + "grad_norm": 0.7740387916564941, + "learning_rate": 0.00017320949707248158, + "loss": 2.7341, + "step": 4847 + }, + { + "epoch": 0.39125171495440236, + "grad_norm": 0.7371044754981995, + "learning_rate": 0.0001731987420847716, + "loss": 2.7318, + "step": 4848 + }, + { + "epoch": 0.3913324186909854, + "grad_norm": 0.7897786498069763, + "learning_rate": 0.00017318798527275758, + "loss": 2.6759, + "step": 4849 + }, + { + "epoch": 0.39141312242756837, + "grad_norm": 0.7149896621704102, + "learning_rate": 0.0001731772266367077, + "loss": 2.7097, + "step": 4850 + }, + { + "epoch": 0.3914938261641514, + "grad_norm": 0.7824358344078064, + "learning_rate": 0.00017316646617689002, + "loss": 2.6376, + "step": 4851 + }, + { + "epoch": 0.3915745299007344, + "grad_norm": 0.7704496383666992, + "learning_rate": 0.00017315570389357272, + "loss": 2.6539, + "step": 4852 + }, + { + "epoch": 0.3916552336373174, + "grad_norm": 0.7489706873893738, + "learning_rate": 0.00017314493978702407, + "loss": 2.6716, + "step": 4853 + }, + { + "epoch": 0.3917359373739004, + "grad_norm": 0.7368690967559814, + "learning_rate": 0.00017313417385751234, + "loss": 2.7171, + "step": 4854 + }, + { + "epoch": 0.3918166411104834, + "grad_norm": 0.7215858697891235, + "learning_rate": 0.00017312340610530579, + "loss": 2.6306, + "step": 4855 + }, + { + "epoch": 0.3918973448470664, + "grad_norm": 0.7622217535972595, + "learning_rate": 0.00017311263653067285, + "loss": 2.6089, + "step": 4856 + }, + { + "epoch": 0.39197804858364943, + "grad_norm": 0.7317889332771301, + "learning_rate": 0.00017310186513388185, + "loss": 2.6831, + "step": 4857 + }, + { + "epoch": 0.3920587523202324, + "grad_norm": 0.894185483455658, + "learning_rate": 0.0001730910919152013, + "loss": 2.684, + "step": 4858 + }, + { + "epoch": 0.39213945605681544, + "grad_norm": 0.7313157916069031, + "learning_rate": 0.00017308031687489968, + "loss": 2.6465, + "step": 4859 + }, + { + "epoch": 0.3922201597933984, + "grad_norm": 0.7765825390815735, + "learning_rate": 0.00017306954001324552, + "loss": 2.6526, + "step": 4860 + }, + { + "epoch": 0.39230086352998145, + "grad_norm": 0.7171424031257629, + "learning_rate": 0.00017305876133050742, + "loss": 2.6212, + "step": 4861 + }, + { + "epoch": 0.3923815672665644, + "grad_norm": 0.7215112447738647, + "learning_rate": 0.000173047980826954, + "loss": 2.6329, + "step": 4862 + }, + { + "epoch": 0.39246227100314746, + "grad_norm": 0.7393578886985779, + "learning_rate": 0.00017303719850285396, + "loss": 2.7264, + "step": 4863 + }, + { + "epoch": 0.39254297473973043, + "grad_norm": 0.7620136737823486, + "learning_rate": 0.00017302641435847603, + "loss": 2.6686, + "step": 4864 + }, + { + "epoch": 0.39262367847631346, + "grad_norm": 0.7290963530540466, + "learning_rate": 0.00017301562839408893, + "loss": 2.578, + "step": 4865 + }, + { + "epoch": 0.39270438221289644, + "grad_norm": 0.6978541612625122, + "learning_rate": 0.00017300484060996153, + "loss": 2.6783, + "step": 4866 + }, + { + "epoch": 0.3927850859494795, + "grad_norm": 0.7212007641792297, + "learning_rate": 0.00017299405100636264, + "loss": 2.6282, + "step": 4867 + }, + { + "epoch": 0.39286578968606245, + "grad_norm": 0.757324755191803, + "learning_rate": 0.0001729832595835612, + "loss": 2.6933, + "step": 4868 + }, + { + "epoch": 0.3929464934226455, + "grad_norm": 0.7052869200706482, + "learning_rate": 0.00017297246634182618, + "loss": 2.7152, + "step": 4869 + }, + { + "epoch": 0.39302719715922846, + "grad_norm": 0.7326259016990662, + "learning_rate": 0.0001729616712814265, + "loss": 2.6792, + "step": 4870 + }, + { + "epoch": 0.3931079008958115, + "grad_norm": 0.7540302276611328, + "learning_rate": 0.00017295087440263128, + "loss": 2.6621, + "step": 4871 + }, + { + "epoch": 0.39318860463239447, + "grad_norm": 0.765454888343811, + "learning_rate": 0.00017294007570570956, + "loss": 2.7049, + "step": 4872 + }, + { + "epoch": 0.3932693083689775, + "grad_norm": 0.7303065061569214, + "learning_rate": 0.0001729292751909305, + "loss": 2.6867, + "step": 4873 + }, + { + "epoch": 0.3933500121055605, + "grad_norm": 0.7049854397773743, + "learning_rate": 0.00017291847285856325, + "loss": 2.7052, + "step": 4874 + }, + { + "epoch": 0.3934307158421435, + "grad_norm": 0.7199053764343262, + "learning_rate": 0.00017290766870887704, + "loss": 2.7195, + "step": 4875 + }, + { + "epoch": 0.3935114195787265, + "grad_norm": 0.7536180019378662, + "learning_rate": 0.00017289686274214118, + "loss": 2.6861, + "step": 4876 + }, + { + "epoch": 0.3935921233153095, + "grad_norm": 0.7295238971710205, + "learning_rate": 0.00017288605495862492, + "loss": 2.6684, + "step": 4877 + }, + { + "epoch": 0.3936728270518925, + "grad_norm": 0.7575719952583313, + "learning_rate": 0.00017287524535859763, + "loss": 2.6439, + "step": 4878 + }, + { + "epoch": 0.3937535307884755, + "grad_norm": 0.678909182548523, + "learning_rate": 0.00017286443394232874, + "loss": 2.6562, + "step": 4879 + }, + { + "epoch": 0.3938342345250585, + "grad_norm": 0.6908892393112183, + "learning_rate": 0.00017285362071008768, + "loss": 2.6364, + "step": 4880 + }, + { + "epoch": 0.39391493826164153, + "grad_norm": 0.7414079904556274, + "learning_rate": 0.00017284280566214397, + "loss": 2.5872, + "step": 4881 + }, + { + "epoch": 0.3939956419982245, + "grad_norm": 0.6824749112129211, + "learning_rate": 0.0001728319887987671, + "loss": 2.641, + "step": 4882 + }, + { + "epoch": 0.39407634573480754, + "grad_norm": 0.6908513903617859, + "learning_rate": 0.0001728211701202267, + "loss": 2.6977, + "step": 4883 + }, + { + "epoch": 0.3941570494713905, + "grad_norm": 0.7214735746383667, + "learning_rate": 0.0001728103496267924, + "loss": 2.5826, + "step": 4884 + }, + { + "epoch": 0.39423775320797355, + "grad_norm": 0.812781572341919, + "learning_rate": 0.00017279952731873385, + "loss": 2.6806, + "step": 4885 + }, + { + "epoch": 0.39431845694455653, + "grad_norm": 0.7610746026039124, + "learning_rate": 0.00017278870319632078, + "loss": 2.6046, + "step": 4886 + }, + { + "epoch": 0.39439916068113956, + "grad_norm": 0.7151652574539185, + "learning_rate": 0.00017277787725982293, + "loss": 2.6543, + "step": 4887 + }, + { + "epoch": 0.39447986441772254, + "grad_norm": 0.7293612360954285, + "learning_rate": 0.00017276704950951017, + "loss": 2.6384, + "step": 4888 + }, + { + "epoch": 0.39456056815430557, + "grad_norm": 0.8138254284858704, + "learning_rate": 0.00017275621994565233, + "loss": 2.7208, + "step": 4889 + }, + { + "epoch": 0.39464127189088855, + "grad_norm": 0.7557196021080017, + "learning_rate": 0.00017274538856851924, + "loss": 2.6571, + "step": 4890 + }, + { + "epoch": 0.3947219756274716, + "grad_norm": 0.7297266721725464, + "learning_rate": 0.00017273455537838097, + "loss": 2.6222, + "step": 4891 + }, + { + "epoch": 0.39480267936405455, + "grad_norm": 0.7838431596755981, + "learning_rate": 0.00017272372037550743, + "loss": 2.782, + "step": 4892 + }, + { + "epoch": 0.3948833831006376, + "grad_norm": 0.7799673676490784, + "learning_rate": 0.00017271288356016866, + "loss": 2.6658, + "step": 4893 + }, + { + "epoch": 0.39496408683722056, + "grad_norm": 0.8495545387268066, + "learning_rate": 0.0001727020449326348, + "loss": 2.6552, + "step": 4894 + }, + { + "epoch": 0.3950447905738036, + "grad_norm": 0.7317770719528198, + "learning_rate": 0.00017269120449317588, + "loss": 2.6616, + "step": 4895 + }, + { + "epoch": 0.39512549431038657, + "grad_norm": 0.7518885731697083, + "learning_rate": 0.00017268036224206217, + "loss": 2.6864, + "step": 4896 + }, + { + "epoch": 0.39520619804696955, + "grad_norm": 0.83487468957901, + "learning_rate": 0.00017266951817956382, + "loss": 2.7535, + "step": 4897 + }, + { + "epoch": 0.3952869017835526, + "grad_norm": 0.7440658211708069, + "learning_rate": 0.00017265867230595113, + "loss": 2.6584, + "step": 4898 + }, + { + "epoch": 0.39536760552013556, + "grad_norm": 0.7060485482215881, + "learning_rate": 0.00017264782462149438, + "loss": 2.6892, + "step": 4899 + }, + { + "epoch": 0.3954483092567186, + "grad_norm": 0.8410428166389465, + "learning_rate": 0.00017263697512646394, + "loss": 2.6425, + "step": 4900 + }, + { + "epoch": 0.39552901299330157, + "grad_norm": 0.757046639919281, + "learning_rate": 0.0001726261238211302, + "loss": 2.6159, + "step": 4901 + }, + { + "epoch": 0.3956097167298846, + "grad_norm": 0.7288908958435059, + "learning_rate": 0.00017261527070576365, + "loss": 2.6753, + "step": 4902 + }, + { + "epoch": 0.3956904204664676, + "grad_norm": 0.8194541335105896, + "learning_rate": 0.0001726044157806347, + "loss": 2.6673, + "step": 4903 + }, + { + "epoch": 0.3957711242030506, + "grad_norm": 0.7957740426063538, + "learning_rate": 0.00017259355904601393, + "loss": 2.6662, + "step": 4904 + }, + { + "epoch": 0.3958518279396336, + "grad_norm": 0.8790122270584106, + "learning_rate": 0.0001725827005021719, + "loss": 2.7513, + "step": 4905 + }, + { + "epoch": 0.3959325316762166, + "grad_norm": 0.7674984335899353, + "learning_rate": 0.00017257184014937924, + "loss": 2.6375, + "step": 4906 + }, + { + "epoch": 0.3960132354127996, + "grad_norm": 0.7250992655754089, + "learning_rate": 0.00017256097798790663, + "loss": 2.63, + "step": 4907 + }, + { + "epoch": 0.3960939391493826, + "grad_norm": 0.8578312397003174, + "learning_rate": 0.00017255011401802475, + "loss": 2.702, + "step": 4908 + }, + { + "epoch": 0.3961746428859656, + "grad_norm": 0.7365253567695618, + "learning_rate": 0.00017253924824000438, + "loss": 2.6156, + "step": 4909 + }, + { + "epoch": 0.39625534662254863, + "grad_norm": 0.7148925065994263, + "learning_rate": 0.00017252838065411633, + "loss": 2.6658, + "step": 4910 + }, + { + "epoch": 0.3963360503591316, + "grad_norm": 0.7517829537391663, + "learning_rate": 0.00017251751126063148, + "loss": 2.6347, + "step": 4911 + }, + { + "epoch": 0.39641675409571464, + "grad_norm": 0.7880864143371582, + "learning_rate": 0.00017250664005982066, + "loss": 2.7045, + "step": 4912 + }, + { + "epoch": 0.3964974578322976, + "grad_norm": 0.7460693120956421, + "learning_rate": 0.00017249576705195482, + "loss": 2.6976, + "step": 4913 + }, + { + "epoch": 0.39657816156888065, + "grad_norm": 0.7179895043373108, + "learning_rate": 0.00017248489223730496, + "loss": 2.6366, + "step": 4914 + }, + { + "epoch": 0.3966588653054636, + "grad_norm": 0.7737421989440918, + "learning_rate": 0.00017247401561614213, + "loss": 2.7116, + "step": 4915 + }, + { + "epoch": 0.39673956904204666, + "grad_norm": 0.8561483025550842, + "learning_rate": 0.0001724631371887374, + "loss": 2.6591, + "step": 4916 + }, + { + "epoch": 0.39682027277862963, + "grad_norm": 0.7616356611251831, + "learning_rate": 0.00017245225695536182, + "loss": 2.6436, + "step": 4917 + }, + { + "epoch": 0.39690097651521267, + "grad_norm": 0.7754645943641663, + "learning_rate": 0.0001724413749162866, + "loss": 2.6699, + "step": 4918 + }, + { + "epoch": 0.39698168025179564, + "grad_norm": 0.800165593624115, + "learning_rate": 0.000172430491071783, + "loss": 2.7155, + "step": 4919 + }, + { + "epoch": 0.3970623839883787, + "grad_norm": 0.8448799848556519, + "learning_rate": 0.00017241960542212223, + "loss": 2.6991, + "step": 4920 + }, + { + "epoch": 0.39714308772496165, + "grad_norm": 0.7106496095657349, + "learning_rate": 0.00017240871796757556, + "loss": 2.628, + "step": 4921 + }, + { + "epoch": 0.3972237914615447, + "grad_norm": 0.7332959175109863, + "learning_rate": 0.00017239782870841436, + "loss": 2.6159, + "step": 4922 + }, + { + "epoch": 0.39730449519812766, + "grad_norm": 0.7573551535606384, + "learning_rate": 0.00017238693764491002, + "loss": 2.67, + "step": 4923 + }, + { + "epoch": 0.3973851989347107, + "grad_norm": 0.7833136320114136, + "learning_rate": 0.00017237604477733399, + "loss": 2.7276, + "step": 4924 + }, + { + "epoch": 0.39746590267129367, + "grad_norm": 0.7233073711395264, + "learning_rate": 0.00017236515010595773, + "loss": 2.6654, + "step": 4925 + }, + { + "epoch": 0.3975466064078767, + "grad_norm": 0.7920324206352234, + "learning_rate": 0.00017235425363105273, + "loss": 2.7611, + "step": 4926 + }, + { + "epoch": 0.3976273101444597, + "grad_norm": 0.7096883058547974, + "learning_rate": 0.00017234335535289063, + "loss": 2.687, + "step": 4927 + }, + { + "epoch": 0.3977080138810427, + "grad_norm": 0.7231960296630859, + "learning_rate": 0.000172332455271743, + "loss": 2.6441, + "step": 4928 + }, + { + "epoch": 0.3977887176176257, + "grad_norm": 0.7852105498313904, + "learning_rate": 0.00017232155338788146, + "loss": 2.5948, + "step": 4929 + }, + { + "epoch": 0.3978694213542087, + "grad_norm": 0.788789689540863, + "learning_rate": 0.0001723106497015778, + "loss": 2.6797, + "step": 4930 + }, + { + "epoch": 0.3979501250907917, + "grad_norm": 0.7082793116569519, + "learning_rate": 0.00017229974421310377, + "loss": 2.6787, + "step": 4931 + }, + { + "epoch": 0.3980308288273747, + "grad_norm": 0.8157992362976074, + "learning_rate": 0.00017228883692273106, + "loss": 2.6367, + "step": 4932 + }, + { + "epoch": 0.3981115325639577, + "grad_norm": 0.7576673030853271, + "learning_rate": 0.00017227792783073157, + "loss": 2.6826, + "step": 4933 + }, + { + "epoch": 0.39819223630054074, + "grad_norm": 0.7225388884544373, + "learning_rate": 0.00017226701693737718, + "loss": 2.668, + "step": 4934 + }, + { + "epoch": 0.3982729400371237, + "grad_norm": 0.7029562592506409, + "learning_rate": 0.00017225610424293985, + "loss": 2.6613, + "step": 4935 + }, + { + "epoch": 0.39835364377370674, + "grad_norm": 0.73081374168396, + "learning_rate": 0.0001722451897476915, + "loss": 2.6378, + "step": 4936 + }, + { + "epoch": 0.3984343475102897, + "grad_norm": 0.744008481502533, + "learning_rate": 0.0001722342734519042, + "loss": 2.6501, + "step": 4937 + }, + { + "epoch": 0.39851505124687275, + "grad_norm": 0.7482618093490601, + "learning_rate": 0.00017222335535584996, + "loss": 2.7287, + "step": 4938 + }, + { + "epoch": 0.39859575498345573, + "grad_norm": 0.6487892866134644, + "learning_rate": 0.00017221243545980093, + "loss": 2.6417, + "step": 4939 + }, + { + "epoch": 0.39867645872003876, + "grad_norm": 0.7894789576530457, + "learning_rate": 0.00017220151376402923, + "loss": 2.7431, + "step": 4940 + }, + { + "epoch": 0.39875716245662174, + "grad_norm": 0.8232294321060181, + "learning_rate": 0.00017219059026880708, + "loss": 2.6824, + "step": 4941 + }, + { + "epoch": 0.39883786619320477, + "grad_norm": 0.6844691634178162, + "learning_rate": 0.00017217966497440668, + "loss": 2.6294, + "step": 4942 + }, + { + "epoch": 0.39891856992978775, + "grad_norm": 0.7245259881019592, + "learning_rate": 0.00017216873788110037, + "loss": 2.6815, + "step": 4943 + }, + { + "epoch": 0.3989992736663708, + "grad_norm": 0.7197226881980896, + "learning_rate": 0.00017215780898916045, + "loss": 2.725, + "step": 4944 + }, + { + "epoch": 0.39907997740295376, + "grad_norm": 0.8391285538673401, + "learning_rate": 0.00017214687829885934, + "loss": 2.6724, + "step": 4945 + }, + { + "epoch": 0.3991606811395368, + "grad_norm": 0.7357564568519592, + "learning_rate": 0.00017213594581046938, + "loss": 2.7052, + "step": 4946 + }, + { + "epoch": 0.39924138487611976, + "grad_norm": 0.7611483931541443, + "learning_rate": 0.00017212501152426312, + "loss": 2.7214, + "step": 4947 + }, + { + "epoch": 0.39932208861270274, + "grad_norm": 0.7314950227737427, + "learning_rate": 0.00017211407544051306, + "loss": 2.6594, + "step": 4948 + }, + { + "epoch": 0.3994027923492858, + "grad_norm": 0.774131178855896, + "learning_rate": 0.00017210313755949169, + "loss": 2.6812, + "step": 4949 + }, + { + "epoch": 0.39948349608586875, + "grad_norm": 0.707003116607666, + "learning_rate": 0.00017209219788147167, + "loss": 2.7334, + "step": 4950 + }, + { + "epoch": 0.3995641998224518, + "grad_norm": 0.8179643154144287, + "learning_rate": 0.0001720812564067256, + "loss": 2.6554, + "step": 4951 + }, + { + "epoch": 0.39964490355903476, + "grad_norm": 0.6572005152702332, + "learning_rate": 0.00017207031313552621, + "loss": 2.6423, + "step": 4952 + }, + { + "epoch": 0.3997256072956178, + "grad_norm": 0.7663072943687439, + "learning_rate": 0.00017205936806814623, + "loss": 2.689, + "step": 4953 + }, + { + "epoch": 0.39980631103220077, + "grad_norm": 0.7351107001304626, + "learning_rate": 0.00017204842120485846, + "loss": 2.631, + "step": 4954 + }, + { + "epoch": 0.3998870147687838, + "grad_norm": 0.7754253149032593, + "learning_rate": 0.00017203747254593564, + "loss": 2.6371, + "step": 4955 + }, + { + "epoch": 0.3999677185053668, + "grad_norm": 0.7471042275428772, + "learning_rate": 0.00017202652209165074, + "loss": 2.6542, + "step": 4956 + }, + { + "epoch": 0.4000484222419498, + "grad_norm": 0.7357343435287476, + "learning_rate": 0.00017201556984227664, + "loss": 2.6226, + "step": 4957 + }, + { + "epoch": 0.4001291259785328, + "grad_norm": 0.8096252679824829, + "learning_rate": 0.00017200461579808626, + "loss": 2.6458, + "step": 4958 + }, + { + "epoch": 0.4002098297151158, + "grad_norm": 0.7622970938682556, + "learning_rate": 0.0001719936599593526, + "loss": 2.7129, + "step": 4959 + }, + { + "epoch": 0.4002905334516988, + "grad_norm": 0.7374953627586365, + "learning_rate": 0.00017198270232634882, + "loss": 2.696, + "step": 4960 + }, + { + "epoch": 0.4003712371882818, + "grad_norm": 0.7897924184799194, + "learning_rate": 0.00017197174289934787, + "loss": 2.7508, + "step": 4961 + }, + { + "epoch": 0.4004519409248648, + "grad_norm": 0.7047984004020691, + "learning_rate": 0.00017196078167862298, + "loss": 2.6733, + "step": 4962 + }, + { + "epoch": 0.40053264466144783, + "grad_norm": 0.7866294980049133, + "learning_rate": 0.0001719498186644473, + "loss": 2.694, + "step": 4963 + }, + { + "epoch": 0.4006133483980308, + "grad_norm": 0.739923894405365, + "learning_rate": 0.00017193885385709409, + "loss": 2.7125, + "step": 4964 + }, + { + "epoch": 0.40069405213461384, + "grad_norm": 0.7506374716758728, + "learning_rate": 0.00017192788725683652, + "loss": 2.627, + "step": 4965 + }, + { + "epoch": 0.4007747558711968, + "grad_norm": 0.6591607928276062, + "learning_rate": 0.00017191691886394802, + "loss": 2.6723, + "step": 4966 + }, + { + "epoch": 0.40085545960777985, + "grad_norm": 0.7748788595199585, + "learning_rate": 0.00017190594867870192, + "loss": 2.6486, + "step": 4967 + }, + { + "epoch": 0.40093616334436283, + "grad_norm": 0.7518232464790344, + "learning_rate": 0.0001718949767013716, + "loss": 2.6879, + "step": 4968 + }, + { + "epoch": 0.40101686708094586, + "grad_norm": 0.7360039949417114, + "learning_rate": 0.00017188400293223052, + "loss": 2.6506, + "step": 4969 + }, + { + "epoch": 0.40109757081752884, + "grad_norm": 0.7217130064964294, + "learning_rate": 0.0001718730273715522, + "loss": 2.6263, + "step": 4970 + }, + { + "epoch": 0.40117827455411187, + "grad_norm": 0.7246078252792358, + "learning_rate": 0.00017186205001961015, + "loss": 2.6222, + "step": 4971 + }, + { + "epoch": 0.40125897829069485, + "grad_norm": 0.7566879391670227, + "learning_rate": 0.00017185107087667794, + "loss": 2.7003, + "step": 4972 + }, + { + "epoch": 0.4013396820272779, + "grad_norm": 0.7881271243095398, + "learning_rate": 0.00017184008994302924, + "loss": 2.6463, + "step": 4973 + }, + { + "epoch": 0.40142038576386085, + "grad_norm": 0.7307420372962952, + "learning_rate": 0.00017182910721893775, + "loss": 2.667, + "step": 4974 + }, + { + "epoch": 0.4015010895004439, + "grad_norm": 0.7088132500648499, + "learning_rate": 0.00017181812270467708, + "loss": 2.6073, + "step": 4975 + }, + { + "epoch": 0.40158179323702686, + "grad_norm": 0.7839647531509399, + "learning_rate": 0.0001718071364005211, + "loss": 2.6594, + "step": 4976 + }, + { + "epoch": 0.4016624969736099, + "grad_norm": 0.7472013235092163, + "learning_rate": 0.00017179614830674353, + "loss": 2.737, + "step": 4977 + }, + { + "epoch": 0.40174320071019287, + "grad_norm": 0.7241616249084473, + "learning_rate": 0.0001717851584236183, + "loss": 2.6615, + "step": 4978 + }, + { + "epoch": 0.4018239044467759, + "grad_norm": 0.7918941378593445, + "learning_rate": 0.00017177416675141929, + "loss": 2.6774, + "step": 4979 + }, + { + "epoch": 0.4019046081833589, + "grad_norm": 0.801003098487854, + "learning_rate": 0.00017176317329042039, + "loss": 2.6749, + "step": 4980 + }, + { + "epoch": 0.4019853119199419, + "grad_norm": 0.7556802034378052, + "learning_rate": 0.00017175217804089564, + "loss": 2.6197, + "step": 4981 + }, + { + "epoch": 0.4020660156565249, + "grad_norm": 0.7539604902267456, + "learning_rate": 0.00017174118100311904, + "loss": 2.6222, + "step": 4982 + }, + { + "epoch": 0.4021467193931079, + "grad_norm": 0.741436243057251, + "learning_rate": 0.0001717301821773647, + "loss": 2.6471, + "step": 4983 + }, + { + "epoch": 0.4022274231296909, + "grad_norm": 0.7449339628219604, + "learning_rate": 0.0001717191815639067, + "loss": 2.6448, + "step": 4984 + }, + { + "epoch": 0.40230812686627393, + "grad_norm": 0.7771497964859009, + "learning_rate": 0.0001717081791630192, + "loss": 2.673, + "step": 4985 + }, + { + "epoch": 0.4023888306028569, + "grad_norm": 0.6916669607162476, + "learning_rate": 0.00017169717497497646, + "loss": 2.6025, + "step": 4986 + }, + { + "epoch": 0.40246953433943994, + "grad_norm": 0.7373276948928833, + "learning_rate": 0.0001716861690000527, + "loss": 2.6783, + "step": 4987 + }, + { + "epoch": 0.4025502380760229, + "grad_norm": 0.7756158709526062, + "learning_rate": 0.0001716751612385222, + "loss": 2.7296, + "step": 4988 + }, + { + "epoch": 0.40263094181260595, + "grad_norm": 0.7725681066513062, + "learning_rate": 0.00017166415169065933, + "loss": 2.7169, + "step": 4989 + }, + { + "epoch": 0.4027116455491889, + "grad_norm": 0.7165024280548096, + "learning_rate": 0.00017165314035673846, + "loss": 2.677, + "step": 4990 + }, + { + "epoch": 0.40279234928577196, + "grad_norm": 0.8888981938362122, + "learning_rate": 0.00017164212723703404, + "loss": 2.7694, + "step": 4991 + }, + { + "epoch": 0.40287305302235493, + "grad_norm": 0.7439224720001221, + "learning_rate": 0.00017163111233182052, + "loss": 2.674, + "step": 4992 + }, + { + "epoch": 0.40295375675893796, + "grad_norm": 0.6948431730270386, + "learning_rate": 0.00017162009564137244, + "loss": 2.6595, + "step": 4993 + }, + { + "epoch": 0.40303446049552094, + "grad_norm": 0.7274380922317505, + "learning_rate": 0.00017160907716596438, + "loss": 2.649, + "step": 4994 + }, + { + "epoch": 0.403115164232104, + "grad_norm": 0.7127148509025574, + "learning_rate": 0.0001715980569058709, + "loss": 2.6883, + "step": 4995 + }, + { + "epoch": 0.40319586796868695, + "grad_norm": 0.7129155993461609, + "learning_rate": 0.00017158703486136668, + "loss": 2.6516, + "step": 4996 + }, + { + "epoch": 0.40327657170527, + "grad_norm": 0.7848126292228699, + "learning_rate": 0.00017157601103272646, + "loss": 2.6778, + "step": 4997 + }, + { + "epoch": 0.40335727544185296, + "grad_norm": 0.752268373966217, + "learning_rate": 0.0001715649854202249, + "loss": 2.7228, + "step": 4998 + }, + { + "epoch": 0.40343797917843593, + "grad_norm": 0.7750338912010193, + "learning_rate": 0.00017155395802413684, + "loss": 2.6338, + "step": 4999 + }, + { + "epoch": 0.40351868291501897, + "grad_norm": 0.7165457010269165, + "learning_rate": 0.00017154292884473713, + "loss": 2.6195, + "step": 5000 + }, + { + "epoch": 0.40351868291501897, + "eval_loss": 2.585501194000244, + "eval_runtime": 901.8519, + "eval_samples_per_second": 2.905, + "eval_steps_per_second": 0.485, + "step": 5000 + }, + { + "epoch": 0.40359938665160194, + "grad_norm": 0.8118943572044373, + "learning_rate": 0.00017153189788230062, + "loss": 2.6649, + "step": 5001 + }, + { + "epoch": 0.403680090388185, + "grad_norm": 0.722984790802002, + "learning_rate": 0.00017152086513710221, + "loss": 2.6929, + "step": 5002 + }, + { + "epoch": 0.40376079412476795, + "grad_norm": 0.700690507888794, + "learning_rate": 0.00017150983060941686, + "loss": 2.6368, + "step": 5003 + }, + { + "epoch": 0.403841497861351, + "grad_norm": 0.7331504225730896, + "learning_rate": 0.00017149879429951965, + "loss": 2.6826, + "step": 5004 + }, + { + "epoch": 0.40392220159793396, + "grad_norm": 0.7312643527984619, + "learning_rate": 0.00017148775620768553, + "loss": 2.6279, + "step": 5005 + }, + { + "epoch": 0.404002905334517, + "grad_norm": 0.7488462924957275, + "learning_rate": 0.00017147671633418972, + "loss": 2.6711, + "step": 5006 + }, + { + "epoch": 0.40408360907109997, + "grad_norm": 0.8620340824127197, + "learning_rate": 0.00017146567467930725, + "loss": 2.6637, + "step": 5007 + }, + { + "epoch": 0.404164312807683, + "grad_norm": 0.683907151222229, + "learning_rate": 0.00017145463124331335, + "loss": 2.6331, + "step": 5008 + }, + { + "epoch": 0.404245016544266, + "grad_norm": 0.7389389276504517, + "learning_rate": 0.0001714435860264833, + "loss": 2.7232, + "step": 5009 + }, + { + "epoch": 0.404325720280849, + "grad_norm": 0.7456515431404114, + "learning_rate": 0.00017143253902909228, + "loss": 2.6363, + "step": 5010 + }, + { + "epoch": 0.404406424017432, + "grad_norm": 0.7044962644577026, + "learning_rate": 0.0001714214902514157, + "loss": 2.6672, + "step": 5011 + }, + { + "epoch": 0.404487127754015, + "grad_norm": 0.7410328984260559, + "learning_rate": 0.00017141043969372887, + "loss": 2.6059, + "step": 5012 + }, + { + "epoch": 0.404567831490598, + "grad_norm": 0.6697140336036682, + "learning_rate": 0.00017139938735630722, + "loss": 2.7151, + "step": 5013 + }, + { + "epoch": 0.404648535227181, + "grad_norm": 0.746675431728363, + "learning_rate": 0.00017138833323942617, + "loss": 2.6792, + "step": 5014 + }, + { + "epoch": 0.404729238963764, + "grad_norm": 0.7724997401237488, + "learning_rate": 0.00017137727734336129, + "loss": 2.6234, + "step": 5015 + }, + { + "epoch": 0.40480994270034704, + "grad_norm": 0.8014429211616516, + "learning_rate": 0.00017136621966838805, + "loss": 2.6795, + "step": 5016 + }, + { + "epoch": 0.40489064643693, + "grad_norm": 0.6900430917739868, + "learning_rate": 0.00017135516021478205, + "loss": 2.7127, + "step": 5017 + }, + { + "epoch": 0.40497135017351304, + "grad_norm": 0.6648666858673096, + "learning_rate": 0.00017134409898281896, + "loss": 2.6564, + "step": 5018 + }, + { + "epoch": 0.405052053910096, + "grad_norm": 0.7054181098937988, + "learning_rate": 0.00017133303597277442, + "loss": 2.6652, + "step": 5019 + }, + { + "epoch": 0.40513275764667905, + "grad_norm": 0.6847733855247498, + "learning_rate": 0.00017132197118492414, + "loss": 2.6997, + "step": 5020 + }, + { + "epoch": 0.40521346138326203, + "grad_norm": 0.7047749757766724, + "learning_rate": 0.00017131090461954392, + "loss": 2.6752, + "step": 5021 + }, + { + "epoch": 0.40529416511984506, + "grad_norm": 0.7549976706504822, + "learning_rate": 0.00017129983627690957, + "loss": 2.6736, + "step": 5022 + }, + { + "epoch": 0.40537486885642804, + "grad_norm": 0.7436367273330688, + "learning_rate": 0.00017128876615729686, + "loss": 2.7189, + "step": 5023 + }, + { + "epoch": 0.40545557259301107, + "grad_norm": 0.6515071988105774, + "learning_rate": 0.00017127769426098177, + "loss": 2.6422, + "step": 5024 + }, + { + "epoch": 0.40553627632959405, + "grad_norm": 0.6960858702659607, + "learning_rate": 0.00017126662058824024, + "loss": 2.6619, + "step": 5025 + }, + { + "epoch": 0.4056169800661771, + "grad_norm": 0.8075968623161316, + "learning_rate": 0.0001712555451393482, + "loss": 2.6678, + "step": 5026 + }, + { + "epoch": 0.40569768380276006, + "grad_norm": 0.6864624619483948, + "learning_rate": 0.00017124446791458176, + "loss": 2.6331, + "step": 5027 + }, + { + "epoch": 0.4057783875393431, + "grad_norm": 0.7218763828277588, + "learning_rate": 0.0001712333889142169, + "loss": 2.6316, + "step": 5028 + }, + { + "epoch": 0.40585909127592606, + "grad_norm": 0.7024715542793274, + "learning_rate": 0.0001712223081385298, + "loss": 2.623, + "step": 5029 + }, + { + "epoch": 0.4059397950125091, + "grad_norm": 0.6681575775146484, + "learning_rate": 0.0001712112255877966, + "loss": 2.6786, + "step": 5030 + }, + { + "epoch": 0.4060204987490921, + "grad_norm": 0.7249817848205566, + "learning_rate": 0.0001712001412622935, + "loss": 2.6179, + "step": 5031 + }, + { + "epoch": 0.4061012024856751, + "grad_norm": 0.7178316116333008, + "learning_rate": 0.00017118905516229677, + "loss": 2.696, + "step": 5032 + }, + { + "epoch": 0.4061819062222581, + "grad_norm": 0.7838767766952515, + "learning_rate": 0.0001711779672880827, + "loss": 2.6881, + "step": 5033 + }, + { + "epoch": 0.4062626099588411, + "grad_norm": 0.799937903881073, + "learning_rate": 0.0001711668776399276, + "loss": 2.7587, + "step": 5034 + }, + { + "epoch": 0.4063433136954241, + "grad_norm": 0.7622246146202087, + "learning_rate": 0.0001711557862181079, + "loss": 2.6621, + "step": 5035 + }, + { + "epoch": 0.4064240174320071, + "grad_norm": 0.7158814072608948, + "learning_rate": 0.00017114469302290003, + "loss": 2.6421, + "step": 5036 + }, + { + "epoch": 0.4065047211685901, + "grad_norm": 0.7913404107093811, + "learning_rate": 0.0001711335980545804, + "loss": 2.6323, + "step": 5037 + }, + { + "epoch": 0.40658542490517313, + "grad_norm": 0.718325138092041, + "learning_rate": 0.00017112250131342556, + "loss": 2.6171, + "step": 5038 + }, + { + "epoch": 0.4066661286417561, + "grad_norm": 0.7793646454811096, + "learning_rate": 0.0001711114027997121, + "loss": 2.7494, + "step": 5039 + }, + { + "epoch": 0.40674683237833914, + "grad_norm": 0.7774816155433655, + "learning_rate": 0.00017110030251371656, + "loss": 2.5534, + "step": 5040 + }, + { + "epoch": 0.4068275361149221, + "grad_norm": 0.8547549247741699, + "learning_rate": 0.00017108920045571564, + "loss": 2.7155, + "step": 5041 + }, + { + "epoch": 0.40690823985150515, + "grad_norm": 0.7685851454734802, + "learning_rate": 0.000171078096625986, + "loss": 2.6109, + "step": 5042 + }, + { + "epoch": 0.4069889435880881, + "grad_norm": 0.7953611016273499, + "learning_rate": 0.00017106699102480445, + "loss": 2.7034, + "step": 5043 + }, + { + "epoch": 0.40706964732467116, + "grad_norm": 0.7550730109214783, + "learning_rate": 0.00017105588365244764, + "loss": 2.7026, + "step": 5044 + }, + { + "epoch": 0.40715035106125413, + "grad_norm": 0.7036548256874084, + "learning_rate": 0.0001710447745091925, + "loss": 2.6246, + "step": 5045 + }, + { + "epoch": 0.40723105479783717, + "grad_norm": 0.7154512405395508, + "learning_rate": 0.00017103366359531586, + "loss": 2.6592, + "step": 5046 + }, + { + "epoch": 0.40731175853442014, + "grad_norm": 0.7773932218551636, + "learning_rate": 0.00017102255091109463, + "loss": 2.6458, + "step": 5047 + }, + { + "epoch": 0.4073924622710032, + "grad_norm": 0.7458996176719666, + "learning_rate": 0.0001710114364568058, + "loss": 2.643, + "step": 5048 + }, + { + "epoch": 0.40747316600758615, + "grad_norm": 0.7465376257896423, + "learning_rate": 0.00017100032023272633, + "loss": 2.6677, + "step": 5049 + }, + { + "epoch": 0.40755386974416913, + "grad_norm": 0.7340850830078125, + "learning_rate": 0.0001709892022391333, + "loss": 2.6372, + "step": 5050 + }, + { + "epoch": 0.40763457348075216, + "grad_norm": 0.7189164757728577, + "learning_rate": 0.00017097808247630377, + "loss": 2.6524, + "step": 5051 + }, + { + "epoch": 0.40771527721733514, + "grad_norm": 0.6954184174537659, + "learning_rate": 0.0001709669609445149, + "loss": 2.7383, + "step": 5052 + }, + { + "epoch": 0.40779598095391817, + "grad_norm": 0.736409604549408, + "learning_rate": 0.00017095583764404384, + "loss": 2.6424, + "step": 5053 + }, + { + "epoch": 0.40787668469050115, + "grad_norm": 0.6773545742034912, + "learning_rate": 0.0001709447125751678, + "loss": 2.6557, + "step": 5054 + }, + { + "epoch": 0.4079573884270842, + "grad_norm": 0.718748927116394, + "learning_rate": 0.00017093358573816412, + "loss": 2.6884, + "step": 5055 + }, + { + "epoch": 0.40803809216366715, + "grad_norm": 0.8276848793029785, + "learning_rate": 0.00017092245713331002, + "loss": 2.6642, + "step": 5056 + }, + { + "epoch": 0.4081187959002502, + "grad_norm": 0.7694761157035828, + "learning_rate": 0.00017091132676088294, + "loss": 2.644, + "step": 5057 + }, + { + "epoch": 0.40819949963683316, + "grad_norm": 0.766724705696106, + "learning_rate": 0.0001709001946211602, + "loss": 2.6918, + "step": 5058 + }, + { + "epoch": 0.4082802033734162, + "grad_norm": 0.7067074775695801, + "learning_rate": 0.00017088906071441927, + "loss": 2.7228, + "step": 5059 + }, + { + "epoch": 0.40836090710999917, + "grad_norm": 0.7216899991035461, + "learning_rate": 0.00017087792504093767, + "loss": 2.7068, + "step": 5060 + }, + { + "epoch": 0.4084416108465822, + "grad_norm": 0.6728984713554382, + "learning_rate": 0.00017086678760099287, + "loss": 2.686, + "step": 5061 + }, + { + "epoch": 0.4085223145831652, + "grad_norm": 0.7546882033348083, + "learning_rate": 0.0001708556483948625, + "loss": 2.6907, + "step": 5062 + }, + { + "epoch": 0.4086030183197482, + "grad_norm": 0.7471179962158203, + "learning_rate": 0.00017084450742282416, + "loss": 2.6857, + "step": 5063 + }, + { + "epoch": 0.4086837220563312, + "grad_norm": 0.7879743576049805, + "learning_rate": 0.00017083336468515548, + "loss": 2.7224, + "step": 5064 + }, + { + "epoch": 0.4087644257929142, + "grad_norm": 0.691343367099762, + "learning_rate": 0.00017082222018213422, + "loss": 2.6561, + "step": 5065 + }, + { + "epoch": 0.4088451295294972, + "grad_norm": 0.7497386336326599, + "learning_rate": 0.00017081107391403805, + "loss": 2.6317, + "step": 5066 + }, + { + "epoch": 0.40892583326608023, + "grad_norm": 0.6846269965171814, + "learning_rate": 0.00017079992588114485, + "loss": 2.6522, + "step": 5067 + }, + { + "epoch": 0.4090065370026632, + "grad_norm": 0.7312905192375183, + "learning_rate": 0.0001707887760837324, + "loss": 2.588, + "step": 5068 + }, + { + "epoch": 0.40908724073924624, + "grad_norm": 0.6966867446899414, + "learning_rate": 0.00017077762452207866, + "loss": 2.6316, + "step": 5069 + }, + { + "epoch": 0.4091679444758292, + "grad_norm": 0.6882073283195496, + "learning_rate": 0.00017076647119646147, + "loss": 2.6977, + "step": 5070 + }, + { + "epoch": 0.40924864821241225, + "grad_norm": 0.7392483949661255, + "learning_rate": 0.00017075531610715884, + "loss": 2.6768, + "step": 5071 + }, + { + "epoch": 0.4093293519489952, + "grad_norm": 0.7311073541641235, + "learning_rate": 0.00017074415925444876, + "loss": 2.6628, + "step": 5072 + }, + { + "epoch": 0.40941005568557826, + "grad_norm": 0.6769934296607971, + "learning_rate": 0.00017073300063860934, + "loss": 2.6438, + "step": 5073 + }, + { + "epoch": 0.40949075942216123, + "grad_norm": 0.736456573009491, + "learning_rate": 0.00017072184025991862, + "loss": 2.6151, + "step": 5074 + }, + { + "epoch": 0.40957146315874426, + "grad_norm": 0.7026283740997314, + "learning_rate": 0.00017071067811865476, + "loss": 2.6726, + "step": 5075 + }, + { + "epoch": 0.40965216689532724, + "grad_norm": 0.6825234293937683, + "learning_rate": 0.00017069951421509597, + "loss": 2.6795, + "step": 5076 + }, + { + "epoch": 0.4097328706319103, + "grad_norm": 0.7243828773498535, + "learning_rate": 0.0001706883485495205, + "loss": 2.687, + "step": 5077 + }, + { + "epoch": 0.40981357436849325, + "grad_norm": 0.7300469875335693, + "learning_rate": 0.00017067718112220658, + "loss": 2.6268, + "step": 5078 + }, + { + "epoch": 0.4098942781050763, + "grad_norm": 0.698095440864563, + "learning_rate": 0.00017066601193343255, + "loss": 2.6461, + "step": 5079 + }, + { + "epoch": 0.40997498184165926, + "grad_norm": 0.7318777441978455, + "learning_rate": 0.00017065484098347677, + "loss": 2.6817, + "step": 5080 + }, + { + "epoch": 0.4100556855782423, + "grad_norm": 0.7681582570075989, + "learning_rate": 0.00017064366827261772, + "loss": 2.7309, + "step": 5081 + }, + { + "epoch": 0.41013638931482527, + "grad_norm": 0.7690179944038391, + "learning_rate": 0.0001706324938011337, + "loss": 2.6292, + "step": 5082 + }, + { + "epoch": 0.4102170930514083, + "grad_norm": 0.6745284199714661, + "learning_rate": 0.00017062131756930338, + "loss": 2.7133, + "step": 5083 + }, + { + "epoch": 0.4102977967879913, + "grad_norm": 0.7524279952049255, + "learning_rate": 0.00017061013957740518, + "loss": 2.6237, + "step": 5084 + }, + { + "epoch": 0.4103785005245743, + "grad_norm": 0.7813692092895508, + "learning_rate": 0.00017059895982571773, + "loss": 2.6953, + "step": 5085 + }, + { + "epoch": 0.4104592042611573, + "grad_norm": 0.7128829956054688, + "learning_rate": 0.00017058777831451967, + "loss": 2.6771, + "step": 5086 + }, + { + "epoch": 0.4105399079977403, + "grad_norm": 0.7249834537506104, + "learning_rate": 0.00017057659504408963, + "loss": 2.6376, + "step": 5087 + }, + { + "epoch": 0.4106206117343233, + "grad_norm": 0.7742593288421631, + "learning_rate": 0.00017056541001470637, + "loss": 2.6227, + "step": 5088 + }, + { + "epoch": 0.4107013154709063, + "grad_norm": 0.6994228959083557, + "learning_rate": 0.00017055422322664863, + "loss": 2.6573, + "step": 5089 + }, + { + "epoch": 0.4107820192074893, + "grad_norm": 0.7144249081611633, + "learning_rate": 0.00017054303468019518, + "loss": 2.6602, + "step": 5090 + }, + { + "epoch": 0.41086272294407233, + "grad_norm": 0.7695099711418152, + "learning_rate": 0.00017053184437562497, + "loss": 2.6516, + "step": 5091 + }, + { + "epoch": 0.4109434266806553, + "grad_norm": 0.7610031962394714, + "learning_rate": 0.00017052065231321678, + "loss": 2.6963, + "step": 5092 + }, + { + "epoch": 0.41102413041723834, + "grad_norm": 0.7117859721183777, + "learning_rate": 0.0001705094584932496, + "loss": 2.6954, + "step": 5093 + }, + { + "epoch": 0.4111048341538213, + "grad_norm": 0.7891486287117004, + "learning_rate": 0.00017049826291600244, + "loss": 2.7265, + "step": 5094 + }, + { + "epoch": 0.41118553789040435, + "grad_norm": 0.7347370386123657, + "learning_rate": 0.00017048706558175423, + "loss": 2.658, + "step": 5095 + }, + { + "epoch": 0.41126624162698733, + "grad_norm": 0.7541289925575256, + "learning_rate": 0.00017047586649078414, + "loss": 2.6596, + "step": 5096 + }, + { + "epoch": 0.41134694536357036, + "grad_norm": 0.7471255660057068, + "learning_rate": 0.00017046466564337118, + "loss": 2.7008, + "step": 5097 + }, + { + "epoch": 0.41142764910015334, + "grad_norm": 0.7566937208175659, + "learning_rate": 0.00017045346303979457, + "loss": 2.7006, + "step": 5098 + }, + { + "epoch": 0.41150835283673637, + "grad_norm": 0.6991304159164429, + "learning_rate": 0.00017044225868033353, + "loss": 2.6846, + "step": 5099 + }, + { + "epoch": 0.41158905657331935, + "grad_norm": 0.7286314368247986, + "learning_rate": 0.00017043105256526724, + "loss": 2.6219, + "step": 5100 + }, + { + "epoch": 0.4116697603099023, + "grad_norm": 0.6953727006912231, + "learning_rate": 0.000170419844694875, + "loss": 2.6093, + "step": 5101 + }, + { + "epoch": 0.41175046404648535, + "grad_norm": 0.6942756772041321, + "learning_rate": 0.00017040863506943615, + "loss": 2.6399, + "step": 5102 + }, + { + "epoch": 0.41183116778306833, + "grad_norm": 0.7513531446456909, + "learning_rate": 0.00017039742368923005, + "loss": 2.6187, + "step": 5103 + }, + { + "epoch": 0.41191187151965136, + "grad_norm": 0.7530633211135864, + "learning_rate": 0.00017038621055453617, + "loss": 2.6124, + "step": 5104 + }, + { + "epoch": 0.41199257525623434, + "grad_norm": 0.7487555146217346, + "learning_rate": 0.00017037499566563392, + "loss": 2.6331, + "step": 5105 + }, + { + "epoch": 0.41207327899281737, + "grad_norm": 0.7641858458518982, + "learning_rate": 0.00017036377902280282, + "loss": 2.6875, + "step": 5106 + }, + { + "epoch": 0.41215398272940035, + "grad_norm": 0.6962767839431763, + "learning_rate": 0.0001703525606263224, + "loss": 2.6538, + "step": 5107 + }, + { + "epoch": 0.4122346864659834, + "grad_norm": 0.8183409571647644, + "learning_rate": 0.0001703413404764723, + "loss": 2.6204, + "step": 5108 + }, + { + "epoch": 0.41231539020256636, + "grad_norm": 0.7029808759689331, + "learning_rate": 0.00017033011857353207, + "loss": 2.6369, + "step": 5109 + }, + { + "epoch": 0.4123960939391494, + "grad_norm": 0.7171663045883179, + "learning_rate": 0.00017031889491778149, + "loss": 2.6211, + "step": 5110 + }, + { + "epoch": 0.41247679767573237, + "grad_norm": 0.7456090450286865, + "learning_rate": 0.0001703076695095002, + "loss": 2.6574, + "step": 5111 + }, + { + "epoch": 0.4125575014123154, + "grad_norm": 0.7468575239181519, + "learning_rate": 0.000170296442348968, + "loss": 2.598, + "step": 5112 + }, + { + "epoch": 0.4126382051488984, + "grad_norm": 0.7106603384017944, + "learning_rate": 0.0001702852134364647, + "loss": 2.6577, + "step": 5113 + }, + { + "epoch": 0.4127189088854814, + "grad_norm": 0.7788330912590027, + "learning_rate": 0.00017027398277227017, + "loss": 2.6797, + "step": 5114 + }, + { + "epoch": 0.4127996126220644, + "grad_norm": 0.7794120907783508, + "learning_rate": 0.00017026275035666427, + "loss": 2.5834, + "step": 5115 + }, + { + "epoch": 0.4128803163586474, + "grad_norm": 0.7270684838294983, + "learning_rate": 0.00017025151618992702, + "loss": 2.7153, + "step": 5116 + }, + { + "epoch": 0.4129610200952304, + "grad_norm": 0.8169006109237671, + "learning_rate": 0.00017024028027233827, + "loss": 2.6786, + "step": 5117 + }, + { + "epoch": 0.4130417238318134, + "grad_norm": 0.8053112626075745, + "learning_rate": 0.00017022904260417815, + "loss": 2.6456, + "step": 5118 + }, + { + "epoch": 0.4131224275683964, + "grad_norm": 0.7646365165710449, + "learning_rate": 0.0001702178031857267, + "loss": 2.6784, + "step": 5119 + }, + { + "epoch": 0.41320313130497943, + "grad_norm": 0.7878902554512024, + "learning_rate": 0.00017020656201726406, + "loss": 2.66, + "step": 5120 + }, + { + "epoch": 0.4132838350415624, + "grad_norm": 0.8602383732795715, + "learning_rate": 0.00017019531909907037, + "loss": 2.7018, + "step": 5121 + }, + { + "epoch": 0.41336453877814544, + "grad_norm": 0.801092267036438, + "learning_rate": 0.00017018407443142585, + "loss": 2.7728, + "step": 5122 + }, + { + "epoch": 0.4134452425147284, + "grad_norm": 0.7372604012489319, + "learning_rate": 0.00017017282801461074, + "loss": 2.6588, + "step": 5123 + }, + { + "epoch": 0.41352594625131145, + "grad_norm": 0.7553830146789551, + "learning_rate": 0.0001701615798489053, + "loss": 2.6844, + "step": 5124 + }, + { + "epoch": 0.4136066499878944, + "grad_norm": 0.7699872255325317, + "learning_rate": 0.0001701503299345899, + "loss": 2.6523, + "step": 5125 + }, + { + "epoch": 0.41368735372447746, + "grad_norm": 0.7087047696113586, + "learning_rate": 0.0001701390782719449, + "loss": 2.6785, + "step": 5126 + }, + { + "epoch": 0.41376805746106043, + "grad_norm": 0.7835792303085327, + "learning_rate": 0.0001701278248612507, + "loss": 2.7064, + "step": 5127 + }, + { + "epoch": 0.41384876119764347, + "grad_norm": 0.7833154201507568, + "learning_rate": 0.0001701165697027878, + "loss": 2.6552, + "step": 5128 + }, + { + "epoch": 0.41392946493422644, + "grad_norm": 0.8240615725517273, + "learning_rate": 0.0001701053127968367, + "loss": 2.7074, + "step": 5129 + }, + { + "epoch": 0.4140101686708095, + "grad_norm": 0.7612149119377136, + "learning_rate": 0.0001700940541436779, + "loss": 2.7484, + "step": 5130 + }, + { + "epoch": 0.41409087240739245, + "grad_norm": 0.7795391082763672, + "learning_rate": 0.00017008279374359212, + "loss": 2.6022, + "step": 5131 + }, + { + "epoch": 0.4141715761439755, + "grad_norm": 0.7714587450027466, + "learning_rate": 0.00017007153159685992, + "loss": 2.6529, + "step": 5132 + }, + { + "epoch": 0.41425227988055846, + "grad_norm": 0.7821317911148071, + "learning_rate": 0.00017006026770376194, + "loss": 2.6356, + "step": 5133 + }, + { + "epoch": 0.4143329836171415, + "grad_norm": 0.7300596833229065, + "learning_rate": 0.00017004900206457897, + "loss": 2.6552, + "step": 5134 + }, + { + "epoch": 0.41441368735372447, + "grad_norm": 0.780505359172821, + "learning_rate": 0.00017003773467959174, + "loss": 2.675, + "step": 5135 + }, + { + "epoch": 0.4144943910903075, + "grad_norm": 0.7107391357421875, + "learning_rate": 0.00017002646554908107, + "loss": 2.7096, + "step": 5136 + }, + { + "epoch": 0.4145750948268905, + "grad_norm": 0.7358834743499756, + "learning_rate": 0.0001700151946733279, + "loss": 2.6619, + "step": 5137 + }, + { + "epoch": 0.4146557985634735, + "grad_norm": 0.7573859095573425, + "learning_rate": 0.00017000392205261298, + "loss": 2.6234, + "step": 5138 + }, + { + "epoch": 0.4147365023000565, + "grad_norm": 0.7032024264335632, + "learning_rate": 0.00016999264768721738, + "loss": 2.6096, + "step": 5139 + }, + { + "epoch": 0.4148172060366395, + "grad_norm": 0.743813693523407, + "learning_rate": 0.00016998137157742203, + "loss": 2.6782, + "step": 5140 + }, + { + "epoch": 0.4148979097732225, + "grad_norm": 0.8861347436904907, + "learning_rate": 0.00016997009372350793, + "loss": 2.6645, + "step": 5141 + }, + { + "epoch": 0.4149786135098055, + "grad_norm": 0.7598684430122375, + "learning_rate": 0.00016995881412575623, + "loss": 2.649, + "step": 5142 + }, + { + "epoch": 0.4150593172463885, + "grad_norm": 0.7535565495491028, + "learning_rate": 0.00016994753278444798, + "loss": 2.6449, + "step": 5143 + }, + { + "epoch": 0.41514002098297154, + "grad_norm": 0.7073138356208801, + "learning_rate": 0.0001699362496998644, + "loss": 2.6253, + "step": 5144 + }, + { + "epoch": 0.4152207247195545, + "grad_norm": 0.7161526679992676, + "learning_rate": 0.00016992496487228662, + "loss": 2.6623, + "step": 5145 + }, + { + "epoch": 0.41530142845613754, + "grad_norm": 0.8284714818000793, + "learning_rate": 0.00016991367830199595, + "loss": 2.7363, + "step": 5146 + }, + { + "epoch": 0.4153821321927205, + "grad_norm": 0.7127673625946045, + "learning_rate": 0.0001699023899892737, + "loss": 2.6274, + "step": 5147 + }, + { + "epoch": 0.41546283592930355, + "grad_norm": 0.7496370673179626, + "learning_rate": 0.00016989109993440112, + "loss": 2.6364, + "step": 5148 + }, + { + "epoch": 0.41554353966588653, + "grad_norm": 0.7616143822669983, + "learning_rate": 0.00016987980813765963, + "loss": 2.7225, + "step": 5149 + }, + { + "epoch": 0.41562424340246956, + "grad_norm": 0.6935909986495972, + "learning_rate": 0.00016986851459933067, + "loss": 2.6109, + "step": 5150 + }, + { + "epoch": 0.41570494713905254, + "grad_norm": 0.721023678779602, + "learning_rate": 0.00016985721931969566, + "loss": 2.6993, + "step": 5151 + }, + { + "epoch": 0.4157856508756355, + "grad_norm": 0.8216699361801147, + "learning_rate": 0.00016984592229903617, + "loss": 2.6512, + "step": 5152 + }, + { + "epoch": 0.41586635461221855, + "grad_norm": 0.7425234913825989, + "learning_rate": 0.00016983462353763372, + "loss": 2.5903, + "step": 5153 + }, + { + "epoch": 0.4159470583488015, + "grad_norm": 0.7292542457580566, + "learning_rate": 0.00016982332303576986, + "loss": 2.692, + "step": 5154 + }, + { + "epoch": 0.41602776208538456, + "grad_norm": 0.7466831803321838, + "learning_rate": 0.0001698120207937263, + "loss": 2.7145, + "step": 5155 + }, + { + "epoch": 0.41610846582196753, + "grad_norm": 0.7271949648857117, + "learning_rate": 0.00016980071681178471, + "loss": 2.655, + "step": 5156 + }, + { + "epoch": 0.41618916955855056, + "grad_norm": 0.7505547404289246, + "learning_rate": 0.00016978941109022677, + "loss": 2.7167, + "step": 5157 + }, + { + "epoch": 0.41626987329513354, + "grad_norm": 0.7307172417640686, + "learning_rate": 0.00016977810362933427, + "loss": 2.6735, + "step": 5158 + }, + { + "epoch": 0.4163505770317166, + "grad_norm": 0.7839170098304749, + "learning_rate": 0.00016976679442938904, + "loss": 2.6818, + "step": 5159 + }, + { + "epoch": 0.41643128076829955, + "grad_norm": 0.7131803631782532, + "learning_rate": 0.00016975548349067293, + "loss": 2.6921, + "step": 5160 + }, + { + "epoch": 0.4165119845048826, + "grad_norm": 0.8129798173904419, + "learning_rate": 0.0001697441708134678, + "loss": 2.6682, + "step": 5161 + }, + { + "epoch": 0.41659268824146556, + "grad_norm": 0.7634746432304382, + "learning_rate": 0.00016973285639805563, + "loss": 2.6684, + "step": 5162 + }, + { + "epoch": 0.4166733919780486, + "grad_norm": 0.7367348074913025, + "learning_rate": 0.0001697215402447184, + "loss": 2.6424, + "step": 5163 + }, + { + "epoch": 0.41675409571463157, + "grad_norm": 0.7235338687896729, + "learning_rate": 0.00016971022235373815, + "loss": 2.6817, + "step": 5164 + }, + { + "epoch": 0.4168347994512146, + "grad_norm": 0.7764291763305664, + "learning_rate": 0.0001696989027253969, + "loss": 2.6477, + "step": 5165 + }, + { + "epoch": 0.4169155031877976, + "grad_norm": 0.8207562565803528, + "learning_rate": 0.00016968758135997683, + "loss": 2.6408, + "step": 5166 + }, + { + "epoch": 0.4169962069243806, + "grad_norm": 0.7291484475135803, + "learning_rate": 0.00016967625825776005, + "loss": 2.6233, + "step": 5167 + }, + { + "epoch": 0.4170769106609636, + "grad_norm": 0.7060603499412537, + "learning_rate": 0.0001696649334190288, + "loss": 2.6204, + "step": 5168 + }, + { + "epoch": 0.4171576143975466, + "grad_norm": 0.7058241963386536, + "learning_rate": 0.00016965360684406528, + "loss": 2.6212, + "step": 5169 + }, + { + "epoch": 0.4172383181341296, + "grad_norm": 0.8248410224914551, + "learning_rate": 0.00016964227853315177, + "loss": 2.6688, + "step": 5170 + }, + { + "epoch": 0.4173190218707126, + "grad_norm": 0.7287606596946716, + "learning_rate": 0.0001696309484865707, + "loss": 2.6201, + "step": 5171 + }, + { + "epoch": 0.4173997256072956, + "grad_norm": 0.7214288115501404, + "learning_rate": 0.00016961961670460433, + "loss": 2.682, + "step": 5172 + }, + { + "epoch": 0.41748042934387863, + "grad_norm": 0.7133594155311584, + "learning_rate": 0.00016960828318753516, + "loss": 2.7167, + "step": 5173 + }, + { + "epoch": 0.4175611330804616, + "grad_norm": 0.6935842633247375, + "learning_rate": 0.00016959694793564558, + "loss": 2.6134, + "step": 5174 + }, + { + "epoch": 0.41764183681704464, + "grad_norm": 0.6863382458686829, + "learning_rate": 0.00016958561094921815, + "loss": 2.6396, + "step": 5175 + }, + { + "epoch": 0.4177225405536276, + "grad_norm": 0.7659433484077454, + "learning_rate": 0.0001695742722285354, + "loss": 2.6926, + "step": 5176 + }, + { + "epoch": 0.41780324429021065, + "grad_norm": 0.6997129917144775, + "learning_rate": 0.00016956293177387992, + "loss": 2.6983, + "step": 5177 + }, + { + "epoch": 0.41788394802679363, + "grad_norm": 0.6784526705741882, + "learning_rate": 0.00016955158958553433, + "loss": 2.6961, + "step": 5178 + }, + { + "epoch": 0.41796465176337666, + "grad_norm": 0.8227884769439697, + "learning_rate": 0.00016954024566378132, + "loss": 2.7008, + "step": 5179 + }, + { + "epoch": 0.41804535549995964, + "grad_norm": 0.7733054757118225, + "learning_rate": 0.0001695289000089036, + "loss": 2.6615, + "step": 5180 + }, + { + "epoch": 0.41812605923654267, + "grad_norm": 0.7077545523643494, + "learning_rate": 0.00016951755262118394, + "loss": 2.6388, + "step": 5181 + }, + { + "epoch": 0.41820676297312565, + "grad_norm": 0.7962050437927246, + "learning_rate": 0.00016950620350090513, + "loss": 2.7063, + "step": 5182 + }, + { + "epoch": 0.4182874667097087, + "grad_norm": 0.6950554847717285, + "learning_rate": 0.00016949485264835005, + "loss": 2.7076, + "step": 5183 + }, + { + "epoch": 0.41836817044629165, + "grad_norm": 0.8546960949897766, + "learning_rate": 0.00016948350006380162, + "loss": 2.6533, + "step": 5184 + }, + { + "epoch": 0.4184488741828747, + "grad_norm": 0.7469324469566345, + "learning_rate": 0.00016947214574754272, + "loss": 2.5884, + "step": 5185 + }, + { + "epoch": 0.41852957791945766, + "grad_norm": 0.7125554084777832, + "learning_rate": 0.0001694607896998563, + "loss": 2.6448, + "step": 5186 + }, + { + "epoch": 0.4186102816560407, + "grad_norm": 0.6998329758644104, + "learning_rate": 0.00016944943192102549, + "loss": 2.5569, + "step": 5187 + }, + { + "epoch": 0.41869098539262367, + "grad_norm": 0.9046749472618103, + "learning_rate": 0.00016943807241133328, + "loss": 2.7701, + "step": 5188 + }, + { + "epoch": 0.4187716891292067, + "grad_norm": 0.7842074036598206, + "learning_rate": 0.00016942671117106274, + "loss": 2.7124, + "step": 5189 + }, + { + "epoch": 0.4188523928657897, + "grad_norm": 0.7625874280929565, + "learning_rate": 0.00016941534820049713, + "loss": 2.6626, + "step": 5190 + }, + { + "epoch": 0.4189330966023727, + "grad_norm": 0.7006461024284363, + "learning_rate": 0.00016940398349991957, + "loss": 2.6283, + "step": 5191 + }, + { + "epoch": 0.4190138003389557, + "grad_norm": 0.7081875205039978, + "learning_rate": 0.00016939261706961332, + "loss": 2.69, + "step": 5192 + }, + { + "epoch": 0.4190945040755387, + "grad_norm": 0.7554503083229065, + "learning_rate": 0.00016938124890986166, + "loss": 2.641, + "step": 5193 + }, + { + "epoch": 0.4191752078121217, + "grad_norm": 0.7478535175323486, + "learning_rate": 0.0001693698790209479, + "loss": 2.7035, + "step": 5194 + }, + { + "epoch": 0.41925591154870473, + "grad_norm": 0.7323064208030701, + "learning_rate": 0.00016935850740315545, + "loss": 2.6713, + "step": 5195 + }, + { + "epoch": 0.4193366152852877, + "grad_norm": 0.8011505007743835, + "learning_rate": 0.00016934713405676764, + "loss": 2.6413, + "step": 5196 + }, + { + "epoch": 0.41941731902187074, + "grad_norm": 0.768851637840271, + "learning_rate": 0.00016933575898206804, + "loss": 2.6147, + "step": 5197 + }, + { + "epoch": 0.4194980227584537, + "grad_norm": 0.7255160808563232, + "learning_rate": 0.00016932438217934006, + "loss": 2.6093, + "step": 5198 + }, + { + "epoch": 0.41957872649503675, + "grad_norm": 0.7431769967079163, + "learning_rate": 0.00016931300364886722, + "loss": 2.6658, + "step": 5199 + }, + { + "epoch": 0.4196594302316197, + "grad_norm": 0.7532122731208801, + "learning_rate": 0.00016930162339093318, + "loss": 2.6371, + "step": 5200 + }, + { + "epoch": 0.41974013396820276, + "grad_norm": 0.7253943681716919, + "learning_rate": 0.00016929024140582152, + "loss": 2.6365, + "step": 5201 + }, + { + "epoch": 0.41982083770478573, + "grad_norm": 0.7323265075683594, + "learning_rate": 0.00016927885769381593, + "loss": 2.7096, + "step": 5202 + }, + { + "epoch": 0.4199015414413687, + "grad_norm": 0.7340009808540344, + "learning_rate": 0.00016926747225520008, + "loss": 2.6983, + "step": 5203 + }, + { + "epoch": 0.41998224517795174, + "grad_norm": 0.838706374168396, + "learning_rate": 0.00016925608509025776, + "loss": 2.7098, + "step": 5204 + }, + { + "epoch": 0.4200629489145347, + "grad_norm": 0.7320838570594788, + "learning_rate": 0.0001692446961992728, + "loss": 2.6767, + "step": 5205 + }, + { + "epoch": 0.42014365265111775, + "grad_norm": 0.7275335192680359, + "learning_rate": 0.00016923330558252898, + "loss": 2.6754, + "step": 5206 + }, + { + "epoch": 0.4202243563877007, + "grad_norm": 0.7572353482246399, + "learning_rate": 0.00016922191324031017, + "loss": 2.7076, + "step": 5207 + }, + { + "epoch": 0.42030506012428376, + "grad_norm": 0.7991098165512085, + "learning_rate": 0.0001692105191729004, + "loss": 2.7281, + "step": 5208 + }, + { + "epoch": 0.42038576386086673, + "grad_norm": 0.70769202709198, + "learning_rate": 0.00016919912338058356, + "loss": 2.684, + "step": 5209 + }, + { + "epoch": 0.42046646759744977, + "grad_norm": 0.6895349621772766, + "learning_rate": 0.0001691877258636436, + "loss": 2.6723, + "step": 5210 + }, + { + "epoch": 0.42054717133403274, + "grad_norm": 0.7368944883346558, + "learning_rate": 0.00016917632662236476, + "loss": 2.601, + "step": 5211 + }, + { + "epoch": 0.4206278750706158, + "grad_norm": 0.7122060060501099, + "learning_rate": 0.00016916492565703097, + "loss": 2.703, + "step": 5212 + }, + { + "epoch": 0.42070857880719875, + "grad_norm": 0.735251784324646, + "learning_rate": 0.00016915352296792646, + "loss": 2.7715, + "step": 5213 + }, + { + "epoch": 0.4207892825437818, + "grad_norm": 0.7686039805412292, + "learning_rate": 0.00016914211855533536, + "loss": 2.6935, + "step": 5214 + }, + { + "epoch": 0.42086998628036476, + "grad_norm": 0.8457472920417786, + "learning_rate": 0.00016913071241954195, + "loss": 2.6535, + "step": 5215 + }, + { + "epoch": 0.4209506900169478, + "grad_norm": 0.6913465261459351, + "learning_rate": 0.00016911930456083046, + "loss": 2.6453, + "step": 5216 + }, + { + "epoch": 0.42103139375353077, + "grad_norm": 0.6939878463745117, + "learning_rate": 0.00016910789497948524, + "loss": 2.6483, + "step": 5217 + }, + { + "epoch": 0.4211120974901138, + "grad_norm": 0.7240888476371765, + "learning_rate": 0.00016909648367579062, + "loss": 2.6649, + "step": 5218 + }, + { + "epoch": 0.4211928012266968, + "grad_norm": 0.7570972442626953, + "learning_rate": 0.00016908507065003102, + "loss": 2.6633, + "step": 5219 + }, + { + "epoch": 0.4212735049632798, + "grad_norm": 0.72161465883255, + "learning_rate": 0.00016907365590249082, + "loss": 2.6999, + "step": 5220 + }, + { + "epoch": 0.4213542086998628, + "grad_norm": 0.7818038463592529, + "learning_rate": 0.00016906223943345458, + "loss": 2.6478, + "step": 5221 + }, + { + "epoch": 0.4214349124364458, + "grad_norm": 0.7292464971542358, + "learning_rate": 0.00016905082124320684, + "loss": 2.6725, + "step": 5222 + }, + { + "epoch": 0.4215156161730288, + "grad_norm": 0.7612937092781067, + "learning_rate": 0.0001690394013320321, + "loss": 2.6474, + "step": 5223 + }, + { + "epoch": 0.4215963199096118, + "grad_norm": 0.7325131297111511, + "learning_rate": 0.000169027979700215, + "loss": 2.6525, + "step": 5224 + }, + { + "epoch": 0.4216770236461948, + "grad_norm": 0.7736644148826599, + "learning_rate": 0.00016901655634804022, + "loss": 2.662, + "step": 5225 + }, + { + "epoch": 0.42175772738277784, + "grad_norm": 0.758522629737854, + "learning_rate": 0.00016900513127579244, + "loss": 2.6558, + "step": 5226 + }, + { + "epoch": 0.4218384311193608, + "grad_norm": 0.7559491991996765, + "learning_rate": 0.00016899370448375642, + "loss": 2.7361, + "step": 5227 + }, + { + "epoch": 0.42191913485594384, + "grad_norm": 0.7791146039962769, + "learning_rate": 0.00016898227597221692, + "loss": 2.6739, + "step": 5228 + }, + { + "epoch": 0.4219998385925268, + "grad_norm": 0.7280717492103577, + "learning_rate": 0.00016897084574145878, + "loss": 2.6316, + "step": 5229 + }, + { + "epoch": 0.42208054232910985, + "grad_norm": 0.7455596327781677, + "learning_rate": 0.0001689594137917669, + "loss": 2.7244, + "step": 5230 + }, + { + "epoch": 0.42216124606569283, + "grad_norm": 0.7965813875198364, + "learning_rate": 0.00016894798012342613, + "loss": 2.6757, + "step": 5231 + }, + { + "epoch": 0.42224194980227586, + "grad_norm": 0.6740596294403076, + "learning_rate": 0.00016893654473672148, + "loss": 2.631, + "step": 5232 + }, + { + "epoch": 0.42232265353885884, + "grad_norm": 0.695105254650116, + "learning_rate": 0.00016892510763193795, + "loss": 2.6563, + "step": 5233 + }, + { + "epoch": 0.42240335727544187, + "grad_norm": 0.7623865008354187, + "learning_rate": 0.00016891366880936051, + "loss": 2.6738, + "step": 5234 + }, + { + "epoch": 0.42248406101202485, + "grad_norm": 0.7545912265777588, + "learning_rate": 0.00016890222826927435, + "loss": 2.6949, + "step": 5235 + }, + { + "epoch": 0.4225647647486079, + "grad_norm": 0.7280749678611755, + "learning_rate": 0.00016889078601196452, + "loss": 2.6571, + "step": 5236 + }, + { + "epoch": 0.42264546848519086, + "grad_norm": 0.6624523401260376, + "learning_rate": 0.00016887934203771625, + "loss": 2.6854, + "step": 5237 + }, + { + "epoch": 0.4227261722217739, + "grad_norm": 0.7835487127304077, + "learning_rate": 0.0001688678963468147, + "loss": 2.6437, + "step": 5238 + }, + { + "epoch": 0.42280687595835686, + "grad_norm": 0.7384940981864929, + "learning_rate": 0.00016885644893954518, + "loss": 2.6584, + "step": 5239 + }, + { + "epoch": 0.4228875796949399, + "grad_norm": 0.8227531313896179, + "learning_rate": 0.00016884499981619292, + "loss": 2.673, + "step": 5240 + }, + { + "epoch": 0.4229682834315229, + "grad_norm": 0.7442220449447632, + "learning_rate": 0.00016883354897704334, + "loss": 2.6729, + "step": 5241 + }, + { + "epoch": 0.4230489871681059, + "grad_norm": 0.7182636857032776, + "learning_rate": 0.00016882209642238175, + "loss": 2.6833, + "step": 5242 + }, + { + "epoch": 0.4231296909046889, + "grad_norm": 0.7061870098114014, + "learning_rate": 0.00016881064215249362, + "loss": 2.6696, + "step": 5243 + }, + { + "epoch": 0.4232103946412719, + "grad_norm": 0.6792885065078735, + "learning_rate": 0.00016879918616766445, + "loss": 2.6805, + "step": 5244 + }, + { + "epoch": 0.4232910983778549, + "grad_norm": 0.7439807057380676, + "learning_rate": 0.00016878772846817968, + "loss": 2.6522, + "step": 5245 + }, + { + "epoch": 0.4233718021144379, + "grad_norm": 0.7078969478607178, + "learning_rate": 0.00016877626905432492, + "loss": 2.6549, + "step": 5246 + }, + { + "epoch": 0.4234525058510209, + "grad_norm": 0.7103868126869202, + "learning_rate": 0.00016876480792638577, + "loss": 2.6812, + "step": 5247 + }, + { + "epoch": 0.42353320958760393, + "grad_norm": 0.7224452495574951, + "learning_rate": 0.00016875334508464782, + "loss": 2.6657, + "step": 5248 + }, + { + "epoch": 0.4236139133241869, + "grad_norm": 0.6885106563568115, + "learning_rate": 0.00016874188052939682, + "loss": 2.6421, + "step": 5249 + }, + { + "epoch": 0.42369461706076994, + "grad_norm": 0.6736720204353333, + "learning_rate": 0.00016873041426091845, + "loss": 2.6717, + "step": 5250 + }, + { + "epoch": 0.4237753207973529, + "grad_norm": 0.7597963809967041, + "learning_rate": 0.00016871894627949846, + "loss": 2.6231, + "step": 5251 + }, + { + "epoch": 0.42385602453393595, + "grad_norm": 0.8295687437057495, + "learning_rate": 0.00016870747658542275, + "loss": 2.6631, + "step": 5252 + }, + { + "epoch": 0.4239367282705189, + "grad_norm": 0.6750548481941223, + "learning_rate": 0.0001686960051789771, + "loss": 2.6997, + "step": 5253 + }, + { + "epoch": 0.4240174320071019, + "grad_norm": 0.7229160666465759, + "learning_rate": 0.0001686845320604474, + "loss": 2.6525, + "step": 5254 + }, + { + "epoch": 0.42409813574368493, + "grad_norm": 0.8318623900413513, + "learning_rate": 0.00016867305723011967, + "loss": 2.7774, + "step": 5255 + }, + { + "epoch": 0.4241788394802679, + "grad_norm": 0.8391026854515076, + "learning_rate": 0.00016866158068827979, + "loss": 2.6712, + "step": 5256 + }, + { + "epoch": 0.42425954321685094, + "grad_norm": 0.691146969795227, + "learning_rate": 0.00016865010243521388, + "loss": 2.6459, + "step": 5257 + }, + { + "epoch": 0.4243402469534339, + "grad_norm": 0.7223602533340454, + "learning_rate": 0.00016863862247120794, + "loss": 2.6675, + "step": 5258 + }, + { + "epoch": 0.42442095069001695, + "grad_norm": 0.8400631546974182, + "learning_rate": 0.0001686271407965481, + "loss": 2.6978, + "step": 5259 + }, + { + "epoch": 0.42450165442659993, + "grad_norm": 0.737684965133667, + "learning_rate": 0.0001686156574115205, + "loss": 2.6992, + "step": 5260 + }, + { + "epoch": 0.42458235816318296, + "grad_norm": 0.7511717677116394, + "learning_rate": 0.0001686041723164114, + "loss": 2.6947, + "step": 5261 + }, + { + "epoch": 0.42466306189976594, + "grad_norm": 0.7434492707252502, + "learning_rate": 0.00016859268551150698, + "loss": 2.7353, + "step": 5262 + }, + { + "epoch": 0.42474376563634897, + "grad_norm": 0.746609628200531, + "learning_rate": 0.00016858119699709353, + "loss": 2.7519, + "step": 5263 + }, + { + "epoch": 0.42482446937293195, + "grad_norm": 0.7709949612617493, + "learning_rate": 0.0001685697067734574, + "loss": 2.7018, + "step": 5264 + }, + { + "epoch": 0.424905173109515, + "grad_norm": 0.7496309876441956, + "learning_rate": 0.00016855821484088488, + "loss": 2.6761, + "step": 5265 + }, + { + "epoch": 0.42498587684609795, + "grad_norm": 0.7071252465248108, + "learning_rate": 0.00016854672119966243, + "loss": 2.6762, + "step": 5266 + }, + { + "epoch": 0.425066580582681, + "grad_norm": 0.7991356253623962, + "learning_rate": 0.00016853522585007658, + "loss": 2.6134, + "step": 5267 + }, + { + "epoch": 0.42514728431926396, + "grad_norm": 0.8194605708122253, + "learning_rate": 0.0001685237287924137, + "loss": 2.6601, + "step": 5268 + }, + { + "epoch": 0.425227988055847, + "grad_norm": 0.7451688051223755, + "learning_rate": 0.00016851223002696037, + "loss": 2.6631, + "step": 5269 + }, + { + "epoch": 0.42530869179242997, + "grad_norm": 0.7220263481140137, + "learning_rate": 0.0001685007295540032, + "loss": 2.6631, + "step": 5270 + }, + { + "epoch": 0.425389395529013, + "grad_norm": 0.7268854975700378, + "learning_rate": 0.00016848922737382874, + "loss": 2.6752, + "step": 5271 + }, + { + "epoch": 0.425470099265596, + "grad_norm": 0.8841642141342163, + "learning_rate": 0.00016847772348672378, + "loss": 2.7153, + "step": 5272 + }, + { + "epoch": 0.425550803002179, + "grad_norm": 0.7725942134857178, + "learning_rate": 0.00016846621789297489, + "loss": 2.6726, + "step": 5273 + }, + { + "epoch": 0.425631506738762, + "grad_norm": 0.7179448008537292, + "learning_rate": 0.00016845471059286887, + "loss": 2.6659, + "step": 5274 + }, + { + "epoch": 0.425712210475345, + "grad_norm": 0.7630325555801392, + "learning_rate": 0.00016844320158669257, + "loss": 2.7133, + "step": 5275 + }, + { + "epoch": 0.425792914211928, + "grad_norm": 0.7349739670753479, + "learning_rate": 0.00016843169087473272, + "loss": 2.6397, + "step": 5276 + }, + { + "epoch": 0.42587361794851103, + "grad_norm": 0.7670298218727112, + "learning_rate": 0.00016842017845727626, + "loss": 2.6485, + "step": 5277 + }, + { + "epoch": 0.425954321685094, + "grad_norm": 0.692095160484314, + "learning_rate": 0.00016840866433461013, + "loss": 2.6058, + "step": 5278 + }, + { + "epoch": 0.42603502542167704, + "grad_norm": 0.6888624429702759, + "learning_rate": 0.00016839714850702125, + "loss": 2.5757, + "step": 5279 + }, + { + "epoch": 0.42611572915826, + "grad_norm": 0.6816484332084656, + "learning_rate": 0.00016838563097479664, + "loss": 2.6656, + "step": 5280 + }, + { + "epoch": 0.42619643289484305, + "grad_norm": 0.7778486609458923, + "learning_rate": 0.00016837411173822333, + "loss": 2.6738, + "step": 5281 + }, + { + "epoch": 0.426277136631426, + "grad_norm": 0.73436439037323, + "learning_rate": 0.00016836259079758845, + "loss": 2.6346, + "step": 5282 + }, + { + "epoch": 0.42635784036800906, + "grad_norm": 0.673528254032135, + "learning_rate": 0.00016835106815317908, + "loss": 2.6636, + "step": 5283 + }, + { + "epoch": 0.42643854410459203, + "grad_norm": 0.6892737150192261, + "learning_rate": 0.00016833954380528242, + "loss": 2.6723, + "step": 5284 + }, + { + "epoch": 0.42651924784117506, + "grad_norm": 0.7404607534408569, + "learning_rate": 0.00016832801775418571, + "loss": 2.6751, + "step": 5285 + }, + { + "epoch": 0.42659995157775804, + "grad_norm": 0.7040587663650513, + "learning_rate": 0.00016831649000017618, + "loss": 2.6079, + "step": 5286 + }, + { + "epoch": 0.4266806553143411, + "grad_norm": 0.7295164465904236, + "learning_rate": 0.00016830496054354112, + "loss": 2.5928, + "step": 5287 + }, + { + "epoch": 0.42676135905092405, + "grad_norm": 0.7269962430000305, + "learning_rate": 0.00016829342938456788, + "loss": 2.6648, + "step": 5288 + }, + { + "epoch": 0.4268420627875071, + "grad_norm": 0.7296550273895264, + "learning_rate": 0.0001682818965235439, + "loss": 2.6814, + "step": 5289 + }, + { + "epoch": 0.42692276652409006, + "grad_norm": 0.8376085758209229, + "learning_rate": 0.00016827036196075655, + "loss": 2.702, + "step": 5290 + }, + { + "epoch": 0.4270034702606731, + "grad_norm": 0.7461032271385193, + "learning_rate": 0.00016825882569649332, + "loss": 2.6959, + "step": 5291 + }, + { + "epoch": 0.42708417399725607, + "grad_norm": 0.7218661308288574, + "learning_rate": 0.00016824728773104171, + "loss": 2.7182, + "step": 5292 + }, + { + "epoch": 0.4271648777338391, + "grad_norm": 0.7012860774993896, + "learning_rate": 0.00016823574806468933, + "loss": 2.6989, + "step": 5293 + }, + { + "epoch": 0.4272455814704221, + "grad_norm": 0.7039482593536377, + "learning_rate": 0.0001682242066977237, + "loss": 2.6153, + "step": 5294 + }, + { + "epoch": 0.4273262852070051, + "grad_norm": 0.8783851861953735, + "learning_rate": 0.0001682126636304325, + "loss": 2.7174, + "step": 5295 + }, + { + "epoch": 0.4274069889435881, + "grad_norm": 0.7266566157341003, + "learning_rate": 0.00016820111886310343, + "loss": 2.6571, + "step": 5296 + }, + { + "epoch": 0.4274876926801711, + "grad_norm": 0.7512212991714478, + "learning_rate": 0.0001681895723960242, + "loss": 2.6802, + "step": 5297 + }, + { + "epoch": 0.4275683964167541, + "grad_norm": 0.7786974310874939, + "learning_rate": 0.00016817802422948254, + "loss": 2.6514, + "step": 5298 + }, + { + "epoch": 0.4276491001533371, + "grad_norm": 0.7454531788825989, + "learning_rate": 0.00016816647436376634, + "loss": 2.6508, + "step": 5299 + }, + { + "epoch": 0.4277298038899201, + "grad_norm": 0.7542992830276489, + "learning_rate": 0.0001681549227991634, + "loss": 2.6455, + "step": 5300 + }, + { + "epoch": 0.42781050762650313, + "grad_norm": 0.7405722141265869, + "learning_rate": 0.0001681433695359616, + "loss": 2.6505, + "step": 5301 + }, + { + "epoch": 0.4278912113630861, + "grad_norm": 0.7120002508163452, + "learning_rate": 0.00016813181457444896, + "loss": 2.6652, + "step": 5302 + }, + { + "epoch": 0.42797191509966914, + "grad_norm": 0.7645997405052185, + "learning_rate": 0.00016812025791491334, + "loss": 2.6456, + "step": 5303 + }, + { + "epoch": 0.4280526188362521, + "grad_norm": 0.7214465141296387, + "learning_rate": 0.00016810869955764286, + "loss": 2.6261, + "step": 5304 + }, + { + "epoch": 0.4281333225728351, + "grad_norm": 0.7653367519378662, + "learning_rate": 0.00016809713950292551, + "loss": 2.7295, + "step": 5305 + }, + { + "epoch": 0.4282140263094181, + "grad_norm": 0.6798970103263855, + "learning_rate": 0.0001680855777510495, + "loss": 2.6549, + "step": 5306 + }, + { + "epoch": 0.4282947300460011, + "grad_norm": 0.7693684101104736, + "learning_rate": 0.00016807401430230288, + "loss": 2.7001, + "step": 5307 + }, + { + "epoch": 0.42837543378258414, + "grad_norm": 0.6962063312530518, + "learning_rate": 0.00016806244915697384, + "loss": 2.6582, + "step": 5308 + }, + { + "epoch": 0.4284561375191671, + "grad_norm": 0.7526959776878357, + "learning_rate": 0.00016805088231535068, + "loss": 2.7204, + "step": 5309 + }, + { + "epoch": 0.42853684125575014, + "grad_norm": 0.7403820753097534, + "learning_rate": 0.0001680393137777217, + "loss": 2.6505, + "step": 5310 + }, + { + "epoch": 0.4286175449923331, + "grad_norm": 0.7056909799575806, + "learning_rate": 0.00016802774354437506, + "loss": 2.5981, + "step": 5311 + }, + { + "epoch": 0.42869824872891615, + "grad_norm": 0.6756439805030823, + "learning_rate": 0.0001680161716155993, + "loss": 2.6845, + "step": 5312 + }, + { + "epoch": 0.42877895246549913, + "grad_norm": 0.7634297013282776, + "learning_rate": 0.0001680045979916827, + "loss": 2.6399, + "step": 5313 + }, + { + "epoch": 0.42885965620208216, + "grad_norm": 0.6793022751808167, + "learning_rate": 0.0001679930226729138, + "loss": 2.6808, + "step": 5314 + }, + { + "epoch": 0.42894035993866514, + "grad_norm": 0.7692369222640991, + "learning_rate": 0.00016798144565958103, + "loss": 2.673, + "step": 5315 + }, + { + "epoch": 0.42902106367524817, + "grad_norm": 0.668798565864563, + "learning_rate": 0.00016796986695197293, + "loss": 2.6465, + "step": 5316 + }, + { + "epoch": 0.42910176741183115, + "grad_norm": 0.719160795211792, + "learning_rate": 0.00016795828655037805, + "loss": 2.5876, + "step": 5317 + }, + { + "epoch": 0.4291824711484142, + "grad_norm": 0.7352864742279053, + "learning_rate": 0.000167946704455085, + "loss": 2.625, + "step": 5318 + }, + { + "epoch": 0.42926317488499716, + "grad_norm": 0.7103392481803894, + "learning_rate": 0.00016793512066638254, + "loss": 2.602, + "step": 5319 + }, + { + "epoch": 0.4293438786215802, + "grad_norm": 0.7005727291107178, + "learning_rate": 0.0001679235351845592, + "loss": 2.6723, + "step": 5320 + }, + { + "epoch": 0.42942458235816316, + "grad_norm": 0.7686243653297424, + "learning_rate": 0.00016791194800990387, + "loss": 2.693, + "step": 5321 + }, + { + "epoch": 0.4295052860947462, + "grad_norm": 0.7026933431625366, + "learning_rate": 0.00016790035914270526, + "loss": 2.6334, + "step": 5322 + }, + { + "epoch": 0.4295859898313292, + "grad_norm": 0.748938262462616, + "learning_rate": 0.0001678887685832522, + "loss": 2.6757, + "step": 5323 + }, + { + "epoch": 0.4296666935679122, + "grad_norm": 0.7753568887710571, + "learning_rate": 0.00016787717633183355, + "loss": 2.6782, + "step": 5324 + }, + { + "epoch": 0.4297473973044952, + "grad_norm": 0.7605767846107483, + "learning_rate": 0.00016786558238873823, + "loss": 2.6822, + "step": 5325 + }, + { + "epoch": 0.4298281010410782, + "grad_norm": 0.7516531348228455, + "learning_rate": 0.00016785398675425524, + "loss": 2.6802, + "step": 5326 + }, + { + "epoch": 0.4299088047776612, + "grad_norm": 0.7551677227020264, + "learning_rate": 0.0001678423894286735, + "loss": 2.6509, + "step": 5327 + }, + { + "epoch": 0.4299895085142442, + "grad_norm": 0.765364944934845, + "learning_rate": 0.00016783079041228206, + "loss": 2.6552, + "step": 5328 + }, + { + "epoch": 0.4300702122508272, + "grad_norm": 0.7016649842262268, + "learning_rate": 0.00016781918970537002, + "loss": 2.6861, + "step": 5329 + }, + { + "epoch": 0.43015091598741023, + "grad_norm": 0.7266311645507812, + "learning_rate": 0.0001678075873082265, + "loss": 2.7064, + "step": 5330 + }, + { + "epoch": 0.4302316197239932, + "grad_norm": 0.7414532899856567, + "learning_rate": 0.00016779598322114064, + "loss": 2.6273, + "step": 5331 + }, + { + "epoch": 0.43031232346057624, + "grad_norm": 0.7032443881034851, + "learning_rate": 0.00016778437744440167, + "loss": 2.6577, + "step": 5332 + }, + { + "epoch": 0.4303930271971592, + "grad_norm": 0.7150338888168335, + "learning_rate": 0.00016777276997829882, + "loss": 2.6586, + "step": 5333 + }, + { + "epoch": 0.43047373093374225, + "grad_norm": 0.6893971562385559, + "learning_rate": 0.0001677611608231214, + "loss": 2.6713, + "step": 5334 + }, + { + "epoch": 0.4305544346703252, + "grad_norm": 0.861935555934906, + "learning_rate": 0.00016774954997915867, + "loss": 2.7037, + "step": 5335 + }, + { + "epoch": 0.43063513840690826, + "grad_norm": 0.7140138745307922, + "learning_rate": 0.00016773793744670012, + "loss": 2.6684, + "step": 5336 + }, + { + "epoch": 0.43071584214349123, + "grad_norm": 0.7245929837226868, + "learning_rate": 0.00016772632322603506, + "loss": 2.6349, + "step": 5337 + }, + { + "epoch": 0.43079654588007427, + "grad_norm": 0.7216203808784485, + "learning_rate": 0.000167714707317453, + "loss": 2.6338, + "step": 5338 + }, + { + "epoch": 0.43087724961665724, + "grad_norm": 0.7076452374458313, + "learning_rate": 0.00016770308972124343, + "loss": 2.6614, + "step": 5339 + }, + { + "epoch": 0.4309579533532403, + "grad_norm": 0.7392035722732544, + "learning_rate": 0.00016769147043769586, + "loss": 2.6697, + "step": 5340 + }, + { + "epoch": 0.43103865708982325, + "grad_norm": 0.7235357761383057, + "learning_rate": 0.00016767984946709994, + "loss": 2.6664, + "step": 5341 + }, + { + "epoch": 0.4311193608264063, + "grad_norm": 0.6985526084899902, + "learning_rate": 0.00016766822680974524, + "loss": 2.6157, + "step": 5342 + }, + { + "epoch": 0.43120006456298926, + "grad_norm": 0.769963264465332, + "learning_rate": 0.0001676566024659214, + "loss": 2.6096, + "step": 5343 + }, + { + "epoch": 0.4312807682995723, + "grad_norm": 0.7504093050956726, + "learning_rate": 0.00016764497643591823, + "loss": 2.5795, + "step": 5344 + }, + { + "epoch": 0.43136147203615527, + "grad_norm": 0.7193379402160645, + "learning_rate": 0.0001676333487200254, + "loss": 2.6158, + "step": 5345 + }, + { + "epoch": 0.4314421757727383, + "grad_norm": 0.777357280254364, + "learning_rate": 0.00016762171931853273, + "loss": 2.6388, + "step": 5346 + }, + { + "epoch": 0.4315228795093213, + "grad_norm": 0.8590179085731506, + "learning_rate": 0.00016761008823173003, + "loss": 2.6597, + "step": 5347 + }, + { + "epoch": 0.4316035832459043, + "grad_norm": 0.7040170431137085, + "learning_rate": 0.0001675984554599072, + "loss": 2.6447, + "step": 5348 + }, + { + "epoch": 0.4316842869824873, + "grad_norm": 0.7682301998138428, + "learning_rate": 0.00016758682100335417, + "loss": 2.6738, + "step": 5349 + }, + { + "epoch": 0.4317649907190703, + "grad_norm": 0.8342414498329163, + "learning_rate": 0.00016757518486236087, + "loss": 2.7058, + "step": 5350 + }, + { + "epoch": 0.4318456944556533, + "grad_norm": 0.7410600781440735, + "learning_rate": 0.00016756354703721736, + "loss": 2.6597, + "step": 5351 + }, + { + "epoch": 0.4319263981922363, + "grad_norm": 0.7633174061775208, + "learning_rate": 0.00016755190752821363, + "loss": 2.6461, + "step": 5352 + }, + { + "epoch": 0.4320071019288193, + "grad_norm": 0.7855150103569031, + "learning_rate": 0.00016754026633563973, + "loss": 2.6556, + "step": 5353 + }, + { + "epoch": 0.43208780566540234, + "grad_norm": 0.7197602391242981, + "learning_rate": 0.00016752862345978587, + "loss": 2.6511, + "step": 5354 + }, + { + "epoch": 0.4321685094019853, + "grad_norm": 0.7748876810073853, + "learning_rate": 0.00016751697890094223, + "loss": 2.7, + "step": 5355 + }, + { + "epoch": 0.4322492131385683, + "grad_norm": 0.7457308173179626, + "learning_rate": 0.00016750533265939895, + "loss": 2.6934, + "step": 5356 + }, + { + "epoch": 0.4323299168751513, + "grad_norm": 0.8003394603729248, + "learning_rate": 0.00016749368473544633, + "loss": 2.6273, + "step": 5357 + }, + { + "epoch": 0.4324106206117343, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.00016748203512937464, + "loss": 2.6605, + "step": 5358 + }, + { + "epoch": 0.43249132434831733, + "grad_norm": 0.6859120726585388, + "learning_rate": 0.00016747038384147422, + "loss": 2.6748, + "step": 5359 + }, + { + "epoch": 0.4325720280849003, + "grad_norm": 0.7169440984725952, + "learning_rate": 0.0001674587308720355, + "loss": 2.6674, + "step": 5360 + }, + { + "epoch": 0.43265273182148334, + "grad_norm": 0.7762351036071777, + "learning_rate": 0.00016744707622134888, + "loss": 2.6673, + "step": 5361 + }, + { + "epoch": 0.4327334355580663, + "grad_norm": 0.7169542908668518, + "learning_rate": 0.0001674354198897048, + "loss": 2.7341, + "step": 5362 + }, + { + "epoch": 0.43281413929464935, + "grad_norm": 0.7903403043746948, + "learning_rate": 0.00016742376187739376, + "loss": 2.6019, + "step": 5363 + }, + { + "epoch": 0.4328948430312323, + "grad_norm": 0.8395403027534485, + "learning_rate": 0.00016741210218470634, + "loss": 2.6519, + "step": 5364 + }, + { + "epoch": 0.43297554676781536, + "grad_norm": 0.7521546483039856, + "learning_rate": 0.0001674004408119331, + "loss": 2.6067, + "step": 5365 + }, + { + "epoch": 0.43305625050439833, + "grad_norm": 0.7186779975891113, + "learning_rate": 0.0001673887777593647, + "loss": 2.6435, + "step": 5366 + }, + { + "epoch": 0.43313695424098136, + "grad_norm": 0.7362968921661377, + "learning_rate": 0.0001673771130272918, + "loss": 2.6031, + "step": 5367 + }, + { + "epoch": 0.43321765797756434, + "grad_norm": 0.8033537864685059, + "learning_rate": 0.0001673654466160051, + "loss": 2.7234, + "step": 5368 + }, + { + "epoch": 0.4332983617141474, + "grad_norm": 0.7109711766242981, + "learning_rate": 0.0001673537785257954, + "loss": 2.6621, + "step": 5369 + }, + { + "epoch": 0.43337906545073035, + "grad_norm": 0.7499226927757263, + "learning_rate": 0.0001673421087569535, + "loss": 2.706, + "step": 5370 + }, + { + "epoch": 0.4334597691873134, + "grad_norm": 0.7192875146865845, + "learning_rate": 0.00016733043730977017, + "loss": 2.6053, + "step": 5371 + }, + { + "epoch": 0.43354047292389636, + "grad_norm": 0.6939374208450317, + "learning_rate": 0.00016731876418453636, + "loss": 2.6621, + "step": 5372 + }, + { + "epoch": 0.4336211766604794, + "grad_norm": 0.720741331577301, + "learning_rate": 0.00016730708938154297, + "loss": 2.6358, + "step": 5373 + }, + { + "epoch": 0.43370188039706237, + "grad_norm": 0.6979780793190002, + "learning_rate": 0.00016729541290108095, + "loss": 2.6162, + "step": 5374 + }, + { + "epoch": 0.4337825841336454, + "grad_norm": 0.8014200925827026, + "learning_rate": 0.00016728373474344136, + "loss": 2.6255, + "step": 5375 + }, + { + "epoch": 0.4338632878702284, + "grad_norm": 0.7780057787895203, + "learning_rate": 0.0001672720549089152, + "loss": 2.6257, + "step": 5376 + }, + { + "epoch": 0.4339439916068114, + "grad_norm": 0.7111102938652039, + "learning_rate": 0.00016726037339779358, + "loss": 2.6384, + "step": 5377 + }, + { + "epoch": 0.4340246953433944, + "grad_norm": 0.7077106833457947, + "learning_rate": 0.00016724869021036764, + "loss": 2.6293, + "step": 5378 + }, + { + "epoch": 0.4341053990799774, + "grad_norm": 0.8328250646591187, + "learning_rate": 0.00016723700534692853, + "loss": 2.6186, + "step": 5379 + }, + { + "epoch": 0.4341861028165604, + "grad_norm": 0.6942149996757507, + "learning_rate": 0.00016722531880776752, + "loss": 2.6032, + "step": 5380 + }, + { + "epoch": 0.4342668065531434, + "grad_norm": 0.7180305123329163, + "learning_rate": 0.00016721363059317583, + "loss": 2.6166, + "step": 5381 + }, + { + "epoch": 0.4343475102897264, + "grad_norm": 0.8093443512916565, + "learning_rate": 0.00016720194070344476, + "loss": 2.6596, + "step": 5382 + }, + { + "epoch": 0.43442821402630943, + "grad_norm": 0.7337743043899536, + "learning_rate": 0.00016719024913886568, + "loss": 2.6137, + "step": 5383 + }, + { + "epoch": 0.4345089177628924, + "grad_norm": 0.7590384483337402, + "learning_rate": 0.00016717855589972993, + "loss": 2.6541, + "step": 5384 + }, + { + "epoch": 0.43458962149947544, + "grad_norm": 0.6945257186889648, + "learning_rate": 0.00016716686098632898, + "loss": 2.686, + "step": 5385 + }, + { + "epoch": 0.4346703252360584, + "grad_norm": 0.7175764441490173, + "learning_rate": 0.00016715516439895424, + "loss": 2.6081, + "step": 5386 + }, + { + "epoch": 0.43475102897264145, + "grad_norm": 0.7287259697914124, + "learning_rate": 0.00016714346613789732, + "loss": 2.6462, + "step": 5387 + }, + { + "epoch": 0.43483173270922443, + "grad_norm": 0.6864096522331238, + "learning_rate": 0.00016713176620344964, + "loss": 2.7104, + "step": 5388 + }, + { + "epoch": 0.43491243644580746, + "grad_norm": 0.6554383039474487, + "learning_rate": 0.00016712006459590289, + "loss": 2.6153, + "step": 5389 + }, + { + "epoch": 0.43499314018239044, + "grad_norm": 0.6415165662765503, + "learning_rate": 0.00016710836131554867, + "loss": 2.6198, + "step": 5390 + }, + { + "epoch": 0.43507384391897347, + "grad_norm": 0.6998475193977356, + "learning_rate": 0.00016709665636267869, + "loss": 2.6774, + "step": 5391 + }, + { + "epoch": 0.43515454765555645, + "grad_norm": 0.7437679171562195, + "learning_rate": 0.00016708494973758465, + "loss": 2.6176, + "step": 5392 + }, + { + "epoch": 0.4352352513921395, + "grad_norm": 0.6898311376571655, + "learning_rate": 0.00016707324144055825, + "loss": 2.6194, + "step": 5393 + }, + { + "epoch": 0.43531595512872245, + "grad_norm": 0.7536425590515137, + "learning_rate": 0.00016706153147189138, + "loss": 2.672, + "step": 5394 + }, + { + "epoch": 0.4353966588653055, + "grad_norm": 0.7576118111610413, + "learning_rate": 0.00016704981983187581, + "loss": 2.6473, + "step": 5395 + }, + { + "epoch": 0.43547736260188846, + "grad_norm": 0.7452495098114014, + "learning_rate": 0.00016703810652080349, + "loss": 2.6487, + "step": 5396 + }, + { + "epoch": 0.4355580663384715, + "grad_norm": 0.7817744612693787, + "learning_rate": 0.0001670263915389663, + "loss": 2.61, + "step": 5397 + }, + { + "epoch": 0.43563877007505447, + "grad_norm": 0.7195492386817932, + "learning_rate": 0.00016701467488665624, + "loss": 2.6745, + "step": 5398 + }, + { + "epoch": 0.4357194738116375, + "grad_norm": 0.7703930735588074, + "learning_rate": 0.0001670029565641653, + "loss": 2.7196, + "step": 5399 + }, + { + "epoch": 0.4358001775482205, + "grad_norm": 0.6859520673751831, + "learning_rate": 0.00016699123657178553, + "loss": 2.6317, + "step": 5400 + }, + { + "epoch": 0.4358808812848035, + "grad_norm": 0.7380268573760986, + "learning_rate": 0.00016697951490980903, + "loss": 2.6008, + "step": 5401 + }, + { + "epoch": 0.4359615850213865, + "grad_norm": 0.7903439402580261, + "learning_rate": 0.00016696779157852792, + "loss": 2.6411, + "step": 5402 + }, + { + "epoch": 0.4360422887579695, + "grad_norm": 0.7022606134414673, + "learning_rate": 0.0001669560665782344, + "loss": 2.6153, + "step": 5403 + }, + { + "epoch": 0.4361229924945525, + "grad_norm": 0.8196203112602234, + "learning_rate": 0.00016694433990922068, + "loss": 2.6128, + "step": 5404 + }, + { + "epoch": 0.43620369623113553, + "grad_norm": 0.7342696189880371, + "learning_rate": 0.000166932611571779, + "loss": 2.6802, + "step": 5405 + }, + { + "epoch": 0.4362843999677185, + "grad_norm": 0.7475131154060364, + "learning_rate": 0.0001669208815662017, + "loss": 2.6106, + "step": 5406 + }, + { + "epoch": 0.4363651037043015, + "grad_norm": 0.7067655324935913, + "learning_rate": 0.00016690914989278107, + "loss": 2.6362, + "step": 5407 + }, + { + "epoch": 0.4364458074408845, + "grad_norm": 0.7550163865089417, + "learning_rate": 0.00016689741655180956, + "loss": 2.6256, + "step": 5408 + }, + { + "epoch": 0.4365265111774675, + "grad_norm": 0.7341828346252441, + "learning_rate": 0.00016688568154357952, + "loss": 2.6912, + "step": 5409 + }, + { + "epoch": 0.4366072149140505, + "grad_norm": 0.7501869201660156, + "learning_rate": 0.00016687394486838349, + "loss": 2.7122, + "step": 5410 + }, + { + "epoch": 0.4366879186506335, + "grad_norm": 0.7041562795639038, + "learning_rate": 0.00016686220652651392, + "loss": 2.6755, + "step": 5411 + }, + { + "epoch": 0.43676862238721653, + "grad_norm": 0.7218217253684998, + "learning_rate": 0.00016685046651826338, + "loss": 2.693, + "step": 5412 + }, + { + "epoch": 0.4368493261237995, + "grad_norm": 0.6880577206611633, + "learning_rate": 0.00016683872484392448, + "loss": 2.638, + "step": 5413 + }, + { + "epoch": 0.43693002986038254, + "grad_norm": 0.6864475607872009, + "learning_rate": 0.0001668269815037898, + "loss": 2.6497, + "step": 5414 + }, + { + "epoch": 0.4370107335969655, + "grad_norm": 0.7326167821884155, + "learning_rate": 0.00016681523649815212, + "loss": 2.6858, + "step": 5415 + }, + { + "epoch": 0.43709143733354855, + "grad_norm": 0.6773428320884705, + "learning_rate": 0.00016680348982730405, + "loss": 2.6489, + "step": 5416 + }, + { + "epoch": 0.4371721410701315, + "grad_norm": 0.7117835283279419, + "learning_rate": 0.00016679174149153837, + "loss": 2.6607, + "step": 5417 + }, + { + "epoch": 0.43725284480671456, + "grad_norm": 0.7268334031105042, + "learning_rate": 0.00016677999149114793, + "loss": 2.703, + "step": 5418 + }, + { + "epoch": 0.43733354854329753, + "grad_norm": 0.7672972679138184, + "learning_rate": 0.00016676823982642554, + "loss": 2.5803, + "step": 5419 + }, + { + "epoch": 0.43741425227988057, + "grad_norm": 0.6966733932495117, + "learning_rate": 0.00016675648649766407, + "loss": 2.6149, + "step": 5420 + }, + { + "epoch": 0.43749495601646354, + "grad_norm": 0.752896249294281, + "learning_rate": 0.00016674473150515644, + "loss": 2.7108, + "step": 5421 + }, + { + "epoch": 0.4375756597530466, + "grad_norm": 0.7094796895980835, + "learning_rate": 0.00016673297484919565, + "loss": 2.6989, + "step": 5422 + }, + { + "epoch": 0.43765636348962955, + "grad_norm": 0.7631612420082092, + "learning_rate": 0.00016672121653007465, + "loss": 2.6673, + "step": 5423 + }, + { + "epoch": 0.4377370672262126, + "grad_norm": 0.7083843946456909, + "learning_rate": 0.00016670945654808655, + "loss": 2.6529, + "step": 5424 + }, + { + "epoch": 0.43781777096279556, + "grad_norm": 0.7291569709777832, + "learning_rate": 0.0001666976949035244, + "loss": 2.633, + "step": 5425 + }, + { + "epoch": 0.4378984746993786, + "grad_norm": 0.8351448774337769, + "learning_rate": 0.00016668593159668138, + "loss": 2.5993, + "step": 5426 + }, + { + "epoch": 0.43797917843596157, + "grad_norm": 0.7339642643928528, + "learning_rate": 0.00016667416662785058, + "loss": 2.6486, + "step": 5427 + }, + { + "epoch": 0.4380598821725446, + "grad_norm": 0.7257512211799622, + "learning_rate": 0.00016666239999732526, + "loss": 2.6453, + "step": 5428 + }, + { + "epoch": 0.4381405859091276, + "grad_norm": 0.7282476425170898, + "learning_rate": 0.00016665063170539872, + "loss": 2.6654, + "step": 5429 + }, + { + "epoch": 0.4382212896457106, + "grad_norm": 0.726685643196106, + "learning_rate": 0.00016663886175236417, + "loss": 2.65, + "step": 5430 + }, + { + "epoch": 0.4383019933822936, + "grad_norm": 0.7478880286216736, + "learning_rate": 0.000166627090138515, + "loss": 2.623, + "step": 5431 + }, + { + "epoch": 0.4383826971188766, + "grad_norm": 0.7624948024749756, + "learning_rate": 0.00016661531686414457, + "loss": 2.6438, + "step": 5432 + }, + { + "epoch": 0.4384634008554596, + "grad_norm": 0.8098936676979065, + "learning_rate": 0.00016660354192954633, + "loss": 2.6226, + "step": 5433 + }, + { + "epoch": 0.4385441045920426, + "grad_norm": 0.7305725812911987, + "learning_rate": 0.0001665917653350137, + "loss": 2.6425, + "step": 5434 + }, + { + "epoch": 0.4386248083286256, + "grad_norm": 0.7064421772956848, + "learning_rate": 0.00016657998708084027, + "loss": 2.6069, + "step": 5435 + }, + { + "epoch": 0.43870551206520864, + "grad_norm": 0.8279524445533752, + "learning_rate": 0.00016656820716731945, + "loss": 2.6609, + "step": 5436 + }, + { + "epoch": 0.4387862158017916, + "grad_norm": 0.742659866809845, + "learning_rate": 0.00016655642559474488, + "loss": 2.64, + "step": 5437 + }, + { + "epoch": 0.43886691953837464, + "grad_norm": 0.757780909538269, + "learning_rate": 0.00016654464236341026, + "loss": 2.6546, + "step": 5438 + }, + { + "epoch": 0.4389476232749576, + "grad_norm": 0.7439742684364319, + "learning_rate": 0.00016653285747360918, + "loss": 2.6717, + "step": 5439 + }, + { + "epoch": 0.43902832701154065, + "grad_norm": 0.7529581189155579, + "learning_rate": 0.0001665210709256354, + "loss": 2.6204, + "step": 5440 + }, + { + "epoch": 0.43910903074812363, + "grad_norm": 0.7224153876304626, + "learning_rate": 0.00016650928271978258, + "loss": 2.6417, + "step": 5441 + }, + { + "epoch": 0.43918973448470666, + "grad_norm": 0.6792185306549072, + "learning_rate": 0.00016649749285634462, + "loss": 2.6382, + "step": 5442 + }, + { + "epoch": 0.43927043822128964, + "grad_norm": 0.6887058019638062, + "learning_rate": 0.00016648570133561533, + "loss": 2.6302, + "step": 5443 + }, + { + "epoch": 0.43935114195787267, + "grad_norm": 0.7373671531677246, + "learning_rate": 0.00016647390815788853, + "loss": 2.625, + "step": 5444 + }, + { + "epoch": 0.43943184569445565, + "grad_norm": 0.7595719695091248, + "learning_rate": 0.0001664621133234582, + "loss": 2.6444, + "step": 5445 + }, + { + "epoch": 0.4395125494310387, + "grad_norm": 0.7331473231315613, + "learning_rate": 0.00016645031683261825, + "loss": 2.6308, + "step": 5446 + }, + { + "epoch": 0.43959325316762166, + "grad_norm": 0.7724922895431519, + "learning_rate": 0.0001664385186856627, + "loss": 2.6646, + "step": 5447 + }, + { + "epoch": 0.4396739569042047, + "grad_norm": 0.6960163712501526, + "learning_rate": 0.00016642671888288563, + "loss": 2.6196, + "step": 5448 + }, + { + "epoch": 0.43975466064078766, + "grad_norm": 0.6769189834594727, + "learning_rate": 0.00016641491742458103, + "loss": 2.6558, + "step": 5449 + }, + { + "epoch": 0.4398353643773707, + "grad_norm": 0.7435783743858337, + "learning_rate": 0.0001664031143110431, + "loss": 2.6717, + "step": 5450 + }, + { + "epoch": 0.4399160681139537, + "grad_norm": 0.7234118580818176, + "learning_rate": 0.00016639130954256603, + "loss": 2.6549, + "step": 5451 + }, + { + "epoch": 0.4399967718505367, + "grad_norm": 0.720825731754303, + "learning_rate": 0.00016637950311944392, + "loss": 2.6098, + "step": 5452 + }, + { + "epoch": 0.4400774755871197, + "grad_norm": 0.6977505087852478, + "learning_rate": 0.0001663676950419711, + "loss": 2.6351, + "step": 5453 + }, + { + "epoch": 0.4401581793237027, + "grad_norm": 0.6959076523780823, + "learning_rate": 0.00016635588531044185, + "loss": 2.6918, + "step": 5454 + }, + { + "epoch": 0.4402388830602857, + "grad_norm": 0.7022189497947693, + "learning_rate": 0.00016634407392515044, + "loss": 2.6218, + "step": 5455 + }, + { + "epoch": 0.4403195867968687, + "grad_norm": 0.7147775292396545, + "learning_rate": 0.0001663322608863913, + "loss": 2.6966, + "step": 5456 + }, + { + "epoch": 0.4404002905334517, + "grad_norm": 0.7592755556106567, + "learning_rate": 0.00016632044619445882, + "loss": 2.6326, + "step": 5457 + }, + { + "epoch": 0.4404809942700347, + "grad_norm": 0.6914302110671997, + "learning_rate": 0.00016630862984964745, + "loss": 2.603, + "step": 5458 + }, + { + "epoch": 0.4405616980066177, + "grad_norm": 0.7735368609428406, + "learning_rate": 0.0001662968118522517, + "loss": 2.6666, + "step": 5459 + }, + { + "epoch": 0.4406424017432007, + "grad_norm": 0.7175899744033813, + "learning_rate": 0.00016628499220256612, + "loss": 2.666, + "step": 5460 + }, + { + "epoch": 0.4407231054797837, + "grad_norm": 0.6735796332359314, + "learning_rate": 0.00016627317090088523, + "loss": 2.6451, + "step": 5461 + }, + { + "epoch": 0.4408038092163667, + "grad_norm": 0.72022545337677, + "learning_rate": 0.0001662613479475037, + "loss": 2.6295, + "step": 5462 + }, + { + "epoch": 0.4408845129529497, + "grad_norm": 0.7084751725196838, + "learning_rate": 0.00016624952334271616, + "loss": 2.6633, + "step": 5463 + }, + { + "epoch": 0.4409652166895327, + "grad_norm": 0.7399250864982605, + "learning_rate": 0.00016623769708681735, + "loss": 2.6076, + "step": 5464 + }, + { + "epoch": 0.44104592042611573, + "grad_norm": 0.6904892325401306, + "learning_rate": 0.00016622586918010193, + "loss": 2.6799, + "step": 5465 + }, + { + "epoch": 0.4411266241626987, + "grad_norm": 0.7419006824493408, + "learning_rate": 0.00016621403962286478, + "loss": 2.65, + "step": 5466 + }, + { + "epoch": 0.44120732789928174, + "grad_norm": 0.7201282978057861, + "learning_rate": 0.00016620220841540064, + "loss": 2.6769, + "step": 5467 + }, + { + "epoch": 0.4412880316358647, + "grad_norm": 0.7223218679428101, + "learning_rate": 0.00016619037555800443, + "loss": 2.6342, + "step": 5468 + }, + { + "epoch": 0.44136873537244775, + "grad_norm": 0.7517585754394531, + "learning_rate": 0.00016617854105097104, + "loss": 2.6103, + "step": 5469 + }, + { + "epoch": 0.44144943910903073, + "grad_norm": 0.6765139698982239, + "learning_rate": 0.0001661667048945954, + "loss": 2.624, + "step": 5470 + }, + { + "epoch": 0.44153014284561376, + "grad_norm": 0.7197677493095398, + "learning_rate": 0.00016615486708917255, + "loss": 2.5786, + "step": 5471 + }, + { + "epoch": 0.44161084658219674, + "grad_norm": 0.7196774482727051, + "learning_rate": 0.00016614302763499742, + "loss": 2.6147, + "step": 5472 + }, + { + "epoch": 0.44169155031877977, + "grad_norm": 0.7210293412208557, + "learning_rate": 0.00016613118653236518, + "loss": 2.6526, + "step": 5473 + }, + { + "epoch": 0.44177225405536275, + "grad_norm": 0.6870129108428955, + "learning_rate": 0.00016611934378157092, + "loss": 2.665, + "step": 5474 + }, + { + "epoch": 0.4418529577919458, + "grad_norm": 0.6925365328788757, + "learning_rate": 0.00016610749938290975, + "loss": 2.5734, + "step": 5475 + }, + { + "epoch": 0.44193366152852875, + "grad_norm": 0.7399131655693054, + "learning_rate": 0.0001660956533366769, + "loss": 2.6935, + "step": 5476 + }, + { + "epoch": 0.4420143652651118, + "grad_norm": 0.7348966002464294, + "learning_rate": 0.00016608380564316758, + "loss": 2.6788, + "step": 5477 + }, + { + "epoch": 0.44209506900169476, + "grad_norm": 0.7597334980964661, + "learning_rate": 0.00016607195630267708, + "loss": 2.6732, + "step": 5478 + }, + { + "epoch": 0.4421757727382778, + "grad_norm": 0.6847043037414551, + "learning_rate": 0.00016606010531550072, + "loss": 2.6475, + "step": 5479 + }, + { + "epoch": 0.44225647647486077, + "grad_norm": 0.7065151929855347, + "learning_rate": 0.00016604825268193388, + "loss": 2.6674, + "step": 5480 + }, + { + "epoch": 0.4423371802114438, + "grad_norm": 0.7102208137512207, + "learning_rate": 0.0001660363984022719, + "loss": 2.6723, + "step": 5481 + }, + { + "epoch": 0.4424178839480268, + "grad_norm": 0.6912767887115479, + "learning_rate": 0.00016602454247681024, + "loss": 2.628, + "step": 5482 + }, + { + "epoch": 0.4424985876846098, + "grad_norm": 0.7265123128890991, + "learning_rate": 0.0001660126849058444, + "loss": 2.5935, + "step": 5483 + }, + { + "epoch": 0.4425792914211928, + "grad_norm": 0.8177923560142517, + "learning_rate": 0.0001660008256896699, + "loss": 2.6402, + "step": 5484 + }, + { + "epoch": 0.4426599951577758, + "grad_norm": 0.7196556925773621, + "learning_rate": 0.00016598896482858231, + "loss": 2.6939, + "step": 5485 + }, + { + "epoch": 0.4427406988943588, + "grad_norm": 0.7459850907325745, + "learning_rate": 0.0001659771023228772, + "loss": 2.6343, + "step": 5486 + }, + { + "epoch": 0.44282140263094183, + "grad_norm": 0.7399095892906189, + "learning_rate": 0.00016596523817285024, + "loss": 2.6139, + "step": 5487 + }, + { + "epoch": 0.4429021063675248, + "grad_norm": 0.7517558336257935, + "learning_rate": 0.0001659533723787971, + "loss": 2.6609, + "step": 5488 + }, + { + "epoch": 0.44298281010410784, + "grad_norm": 0.7073537707328796, + "learning_rate": 0.00016594150494101355, + "loss": 2.6326, + "step": 5489 + }, + { + "epoch": 0.4430635138406908, + "grad_norm": 0.7414752244949341, + "learning_rate": 0.0001659296358597953, + "loss": 2.6759, + "step": 5490 + }, + { + "epoch": 0.44314421757727385, + "grad_norm": 0.7636380195617676, + "learning_rate": 0.0001659177651354382, + "loss": 2.5743, + "step": 5491 + }, + { + "epoch": 0.4432249213138568, + "grad_norm": 0.6839539408683777, + "learning_rate": 0.00016590589276823804, + "loss": 2.631, + "step": 5492 + }, + { + "epoch": 0.44330562505043986, + "grad_norm": 0.8057516813278198, + "learning_rate": 0.0001658940187584908, + "loss": 2.6916, + "step": 5493 + }, + { + "epoch": 0.44338632878702283, + "grad_norm": 0.7479767799377441, + "learning_rate": 0.00016588214310649232, + "loss": 2.6811, + "step": 5494 + }, + { + "epoch": 0.44346703252360586, + "grad_norm": 0.7854729294776917, + "learning_rate": 0.00016587026581253866, + "loss": 2.6746, + "step": 5495 + }, + { + "epoch": 0.44354773626018884, + "grad_norm": 0.7782836556434631, + "learning_rate": 0.00016585838687692577, + "loss": 2.61, + "step": 5496 + }, + { + "epoch": 0.4436284399967719, + "grad_norm": 0.7047034502029419, + "learning_rate": 0.00016584650629994968, + "loss": 2.6573, + "step": 5497 + }, + { + "epoch": 0.44370914373335485, + "grad_norm": 0.7398735880851746, + "learning_rate": 0.0001658346240819066, + "loss": 2.6338, + "step": 5498 + }, + { + "epoch": 0.4437898474699379, + "grad_norm": 0.7243468165397644, + "learning_rate": 0.00016582274022309258, + "loss": 2.5898, + "step": 5499 + }, + { + "epoch": 0.44387055120652086, + "grad_norm": 0.7415906190872192, + "learning_rate": 0.00016581085472380376, + "loss": 2.5893, + "step": 5500 + }, + { + "epoch": 0.4439512549431039, + "grad_norm": 0.6935107707977295, + "learning_rate": 0.00016579896758433645, + "loss": 2.6704, + "step": 5501 + }, + { + "epoch": 0.44403195867968687, + "grad_norm": 0.7188034653663635, + "learning_rate": 0.00016578707880498685, + "loss": 2.643, + "step": 5502 + }, + { + "epoch": 0.4441126624162699, + "grad_norm": 0.6697022914886475, + "learning_rate": 0.0001657751883860513, + "loss": 2.6313, + "step": 5503 + }, + { + "epoch": 0.4441933661528529, + "grad_norm": 0.760154664516449, + "learning_rate": 0.00016576329632782613, + "loss": 2.6604, + "step": 5504 + }, + { + "epoch": 0.4442740698894359, + "grad_norm": 0.6883447170257568, + "learning_rate": 0.00016575140263060765, + "loss": 2.64, + "step": 5505 + }, + { + "epoch": 0.4443547736260189, + "grad_norm": 0.8628804683685303, + "learning_rate": 0.0001657395072946924, + "loss": 2.6651, + "step": 5506 + }, + { + "epoch": 0.4444354773626019, + "grad_norm": 0.7125170230865479, + "learning_rate": 0.0001657276103203768, + "loss": 2.7132, + "step": 5507 + }, + { + "epoch": 0.4445161810991849, + "grad_norm": 0.6965304613113403, + "learning_rate": 0.00016571571170795725, + "loss": 2.7109, + "step": 5508 + }, + { + "epoch": 0.44459688483576787, + "grad_norm": 0.720327615737915, + "learning_rate": 0.00016570381145773042, + "loss": 2.6323, + "step": 5509 + }, + { + "epoch": 0.4446775885723509, + "grad_norm": 0.7097898125648499, + "learning_rate": 0.00016569190956999287, + "loss": 2.6461, + "step": 5510 + }, + { + "epoch": 0.4447582923089339, + "grad_norm": 0.7142884731292725, + "learning_rate": 0.0001656800060450412, + "loss": 2.6894, + "step": 5511 + }, + { + "epoch": 0.4448389960455169, + "grad_norm": 0.6992002725601196, + "learning_rate": 0.0001656681008831721, + "loss": 2.6116, + "step": 5512 + }, + { + "epoch": 0.4449196997820999, + "grad_norm": 0.763841450214386, + "learning_rate": 0.00016565619408468227, + "loss": 2.6441, + "step": 5513 + }, + { + "epoch": 0.4450004035186829, + "grad_norm": 0.6958404183387756, + "learning_rate": 0.00016564428564986848, + "loss": 2.5751, + "step": 5514 + }, + { + "epoch": 0.4450811072552659, + "grad_norm": 0.8804046511650085, + "learning_rate": 0.00016563237557902744, + "loss": 2.6353, + "step": 5515 + }, + { + "epoch": 0.4451618109918489, + "grad_norm": 0.744864821434021, + "learning_rate": 0.00016562046387245608, + "loss": 2.6887, + "step": 5516 + }, + { + "epoch": 0.4452425147284319, + "grad_norm": 0.7627978920936584, + "learning_rate": 0.0001656085505304512, + "loss": 2.6347, + "step": 5517 + }, + { + "epoch": 0.44532321846501494, + "grad_norm": 0.7728918194770813, + "learning_rate": 0.00016559663555330975, + "loss": 2.6344, + "step": 5518 + }, + { + "epoch": 0.4454039222015979, + "grad_norm": 0.7853842377662659, + "learning_rate": 0.00016558471894132865, + "loss": 2.7239, + "step": 5519 + }, + { + "epoch": 0.44548462593818094, + "grad_norm": 0.7981860041618347, + "learning_rate": 0.00016557280069480495, + "loss": 2.66, + "step": 5520 + }, + { + "epoch": 0.4455653296747639, + "grad_norm": 0.7555295825004578, + "learning_rate": 0.0001655608808140356, + "loss": 2.6636, + "step": 5521 + }, + { + "epoch": 0.44564603341134695, + "grad_norm": 0.6893854141235352, + "learning_rate": 0.00016554895929931778, + "loss": 2.5999, + "step": 5522 + }, + { + "epoch": 0.44572673714792993, + "grad_norm": 0.7740506529808044, + "learning_rate": 0.0001655370361509485, + "loss": 2.6308, + "step": 5523 + }, + { + "epoch": 0.44580744088451296, + "grad_norm": 0.6956021785736084, + "learning_rate": 0.00016552511136922498, + "loss": 2.6376, + "step": 5524 + }, + { + "epoch": 0.44588814462109594, + "grad_norm": 0.7408841252326965, + "learning_rate": 0.00016551318495444445, + "loss": 2.6644, + "step": 5525 + }, + { + "epoch": 0.44596884835767897, + "grad_norm": 0.7715663313865662, + "learning_rate": 0.000165501256906904, + "loss": 2.6791, + "step": 5526 + }, + { + "epoch": 0.44604955209426195, + "grad_norm": 0.6880629062652588, + "learning_rate": 0.0001654893272269011, + "loss": 2.7209, + "step": 5527 + }, + { + "epoch": 0.446130255830845, + "grad_norm": 0.6765853762626648, + "learning_rate": 0.0001654773959147329, + "loss": 2.6548, + "step": 5528 + }, + { + "epoch": 0.44621095956742796, + "grad_norm": 0.739248514175415, + "learning_rate": 0.00016546546297069688, + "loss": 2.69, + "step": 5529 + }, + { + "epoch": 0.446291663304011, + "grad_norm": 0.7655714750289917, + "learning_rate": 0.00016545352839509038, + "loss": 2.6238, + "step": 5530 + }, + { + "epoch": 0.44637236704059396, + "grad_norm": 0.706068217754364, + "learning_rate": 0.00016544159218821088, + "loss": 2.6528, + "step": 5531 + }, + { + "epoch": 0.446453070777177, + "grad_norm": 0.7411316633224487, + "learning_rate": 0.00016542965435035578, + "loss": 2.7034, + "step": 5532 + }, + { + "epoch": 0.44653377451376, + "grad_norm": 0.6550690531730652, + "learning_rate": 0.0001654177148818227, + "loss": 2.6388, + "step": 5533 + }, + { + "epoch": 0.446614478250343, + "grad_norm": 0.7151147127151489, + "learning_rate": 0.00016540577378290915, + "loss": 2.7382, + "step": 5534 + }, + { + "epoch": 0.446695181986926, + "grad_norm": 0.7343939542770386, + "learning_rate": 0.00016539383105391276, + "loss": 2.6316, + "step": 5535 + }, + { + "epoch": 0.446775885723509, + "grad_norm": 0.702036440372467, + "learning_rate": 0.00016538188669513115, + "loss": 2.6465, + "step": 5536 + }, + { + "epoch": 0.446856589460092, + "grad_norm": 0.7212840914726257, + "learning_rate": 0.00016536994070686197, + "loss": 2.6471, + "step": 5537 + }, + { + "epoch": 0.446937293196675, + "grad_norm": 0.7345479130744934, + "learning_rate": 0.00016535799308940304, + "loss": 2.6746, + "step": 5538 + }, + { + "epoch": 0.447017996933258, + "grad_norm": 0.7447341084480286, + "learning_rate": 0.00016534604384305207, + "loss": 2.6487, + "step": 5539 + }, + { + "epoch": 0.44709870066984103, + "grad_norm": 0.6865687370300293, + "learning_rate": 0.00016533409296810687, + "loss": 2.6202, + "step": 5540 + }, + { + "epoch": 0.447179404406424, + "grad_norm": 0.8210769891738892, + "learning_rate": 0.0001653221404648653, + "loss": 2.7155, + "step": 5541 + }, + { + "epoch": 0.44726010814300704, + "grad_norm": 0.7768925428390503, + "learning_rate": 0.0001653101863336252, + "loss": 2.6011, + "step": 5542 + }, + { + "epoch": 0.44734081187959, + "grad_norm": 0.7160049080848694, + "learning_rate": 0.00016529823057468456, + "loss": 2.6541, + "step": 5543 + }, + { + "epoch": 0.44742151561617305, + "grad_norm": 0.7386900782585144, + "learning_rate": 0.00016528627318834134, + "loss": 2.6586, + "step": 5544 + }, + { + "epoch": 0.447502219352756, + "grad_norm": 0.7415460348129272, + "learning_rate": 0.0001652743141748935, + "loss": 2.7032, + "step": 5545 + }, + { + "epoch": 0.44758292308933906, + "grad_norm": 0.8483054637908936, + "learning_rate": 0.00016526235353463912, + "loss": 2.6145, + "step": 5546 + }, + { + "epoch": 0.44766362682592203, + "grad_norm": 0.7428778409957886, + "learning_rate": 0.00016525039126787629, + "loss": 2.7005, + "step": 5547 + }, + { + "epoch": 0.44774433056250507, + "grad_norm": 0.7214285731315613, + "learning_rate": 0.00016523842737490316, + "loss": 2.6267, + "step": 5548 + }, + { + "epoch": 0.44782503429908804, + "grad_norm": 0.6753950715065002, + "learning_rate": 0.0001652264618560179, + "loss": 2.6732, + "step": 5549 + }, + { + "epoch": 0.4479057380356711, + "grad_norm": 0.6969403028488159, + "learning_rate": 0.00016521449471151867, + "loss": 2.6218, + "step": 5550 + }, + { + "epoch": 0.44798644177225405, + "grad_norm": 0.7562664151191711, + "learning_rate": 0.00016520252594170377, + "loss": 2.69, + "step": 5551 + }, + { + "epoch": 0.4480671455088371, + "grad_norm": 0.6831937432289124, + "learning_rate": 0.0001651905555468715, + "loss": 2.709, + "step": 5552 + }, + { + "epoch": 0.44814784924542006, + "grad_norm": 0.6753427386283875, + "learning_rate": 0.00016517858352732017, + "loss": 2.5852, + "step": 5553 + }, + { + "epoch": 0.4482285529820031, + "grad_norm": 0.7573871612548828, + "learning_rate": 0.00016516660988334815, + "loss": 2.6187, + "step": 5554 + }, + { + "epoch": 0.44830925671858607, + "grad_norm": 0.6424254775047302, + "learning_rate": 0.00016515463461525383, + "loss": 2.6411, + "step": 5555 + }, + { + "epoch": 0.4483899604551691, + "grad_norm": 0.7460073232650757, + "learning_rate": 0.0001651426577233358, + "loss": 2.6239, + "step": 5556 + }, + { + "epoch": 0.4484706641917521, + "grad_norm": 0.6980866193771362, + "learning_rate": 0.0001651306792078924, + "loss": 2.605, + "step": 5557 + }, + { + "epoch": 0.4485513679283351, + "grad_norm": 0.7376009225845337, + "learning_rate": 0.00016511869906922217, + "loss": 2.7114, + "step": 5558 + }, + { + "epoch": 0.4486320716649181, + "grad_norm": 0.7227364778518677, + "learning_rate": 0.0001651067173076238, + "loss": 2.6212, + "step": 5559 + }, + { + "epoch": 0.44871277540150106, + "grad_norm": 0.8989635705947876, + "learning_rate": 0.00016509473392339584, + "loss": 2.671, + "step": 5560 + }, + { + "epoch": 0.4487934791380841, + "grad_norm": 0.7273553609848022, + "learning_rate": 0.0001650827489168369, + "loss": 2.6556, + "step": 5561 + }, + { + "epoch": 0.44887418287466707, + "grad_norm": 0.839439868927002, + "learning_rate": 0.00016507076228824578, + "loss": 2.6959, + "step": 5562 + }, + { + "epoch": 0.4489548866112501, + "grad_norm": 0.6912770867347717, + "learning_rate": 0.00016505877403792115, + "loss": 2.6709, + "step": 5563 + }, + { + "epoch": 0.4490355903478331, + "grad_norm": 0.7850949168205261, + "learning_rate": 0.00016504678416616182, + "loss": 2.7257, + "step": 5564 + }, + { + "epoch": 0.4491162940844161, + "grad_norm": 0.7768355011940002, + "learning_rate": 0.0001650347926732666, + "loss": 2.5939, + "step": 5565 + }, + { + "epoch": 0.4491969978209991, + "grad_norm": 0.6518398523330688, + "learning_rate": 0.0001650227995595343, + "loss": 2.6589, + "step": 5566 + }, + { + "epoch": 0.4492777015575821, + "grad_norm": 0.6855975389480591, + "learning_rate": 0.0001650108048252639, + "loss": 2.6372, + "step": 5567 + }, + { + "epoch": 0.4493584052941651, + "grad_norm": 0.7176938056945801, + "learning_rate": 0.0001649988084707543, + "loss": 2.6506, + "step": 5568 + }, + { + "epoch": 0.44943910903074813, + "grad_norm": 0.735335648059845, + "learning_rate": 0.00016498681049630448, + "loss": 2.608, + "step": 5569 + }, + { + "epoch": 0.4495198127673311, + "grad_norm": 0.6862306594848633, + "learning_rate": 0.00016497481090221346, + "loss": 2.5982, + "step": 5570 + }, + { + "epoch": 0.44960051650391414, + "grad_norm": 0.7213380336761475, + "learning_rate": 0.0001649628096887803, + "loss": 2.6457, + "step": 5571 + }, + { + "epoch": 0.4496812202404971, + "grad_norm": 0.7118985652923584, + "learning_rate": 0.0001649508068563041, + "loss": 2.6321, + "step": 5572 + }, + { + "epoch": 0.44976192397708015, + "grad_norm": 0.7663396596908569, + "learning_rate": 0.00016493880240508405, + "loss": 2.5865, + "step": 5573 + }, + { + "epoch": 0.4498426277136631, + "grad_norm": 0.6854543089866638, + "learning_rate": 0.00016492679633541926, + "loss": 2.6536, + "step": 5574 + }, + { + "epoch": 0.44992333145024616, + "grad_norm": 0.7071701884269714, + "learning_rate": 0.000164914788647609, + "loss": 2.6149, + "step": 5575 + }, + { + "epoch": 0.45000403518682913, + "grad_norm": 0.7610478401184082, + "learning_rate": 0.00016490277934195252, + "loss": 2.6326, + "step": 5576 + }, + { + "epoch": 0.45008473892341216, + "grad_norm": 0.7117596864700317, + "learning_rate": 0.0001648907684187491, + "loss": 2.6938, + "step": 5577 + }, + { + "epoch": 0.45016544265999514, + "grad_norm": 0.6980494856834412, + "learning_rate": 0.00016487875587829813, + "loss": 2.6798, + "step": 5578 + }, + { + "epoch": 0.4502461463965782, + "grad_norm": 0.7957972288131714, + "learning_rate": 0.00016486674172089898, + "loss": 2.6029, + "step": 5579 + }, + { + "epoch": 0.45032685013316115, + "grad_norm": 0.7258082032203674, + "learning_rate": 0.00016485472594685103, + "loss": 2.6785, + "step": 5580 + }, + { + "epoch": 0.4504075538697442, + "grad_norm": 0.7402041554450989, + "learning_rate": 0.0001648427085564538, + "loss": 2.6263, + "step": 5581 + }, + { + "epoch": 0.45048825760632716, + "grad_norm": 0.6943814158439636, + "learning_rate": 0.00016483068955000673, + "loss": 2.6761, + "step": 5582 + }, + { + "epoch": 0.4505689613429102, + "grad_norm": 0.8021644353866577, + "learning_rate": 0.00016481866892780947, + "loss": 2.6376, + "step": 5583 + }, + { + "epoch": 0.45064966507949317, + "grad_norm": 0.7748533487319946, + "learning_rate": 0.0001648066466901615, + "loss": 2.7465, + "step": 5584 + }, + { + "epoch": 0.4507303688160762, + "grad_norm": 0.7432222366333008, + "learning_rate": 0.00016479462283736248, + "loss": 2.6368, + "step": 5585 + }, + { + "epoch": 0.4508110725526592, + "grad_norm": 0.7835286259651184, + "learning_rate": 0.00016478259736971214, + "loss": 2.6449, + "step": 5586 + }, + { + "epoch": 0.4508917762892422, + "grad_norm": 0.7372995018959045, + "learning_rate": 0.00016477057028751007, + "loss": 2.6091, + "step": 5587 + }, + { + "epoch": 0.4509724800258252, + "grad_norm": 0.8230665326118469, + "learning_rate": 0.0001647585415910561, + "loss": 2.6345, + "step": 5588 + }, + { + "epoch": 0.4510531837624082, + "grad_norm": 0.7490825057029724, + "learning_rate": 0.00016474651128065002, + "loss": 2.5996, + "step": 5589 + }, + { + "epoch": 0.4511338874989912, + "grad_norm": 0.7950569987297058, + "learning_rate": 0.00016473447935659157, + "loss": 2.7109, + "step": 5590 + }, + { + "epoch": 0.4512145912355742, + "grad_norm": 0.7648342251777649, + "learning_rate": 0.00016472244581918074, + "loss": 2.6268, + "step": 5591 + }, + { + "epoch": 0.4512952949721572, + "grad_norm": 0.726828396320343, + "learning_rate": 0.00016471041066871733, + "loss": 2.5959, + "step": 5592 + }, + { + "epoch": 0.45137599870874023, + "grad_norm": 0.7855841517448425, + "learning_rate": 0.00016469837390550133, + "loss": 2.6671, + "step": 5593 + }, + { + "epoch": 0.4514567024453232, + "grad_norm": 0.6858882904052734, + "learning_rate": 0.00016468633552983275, + "loss": 2.6003, + "step": 5594 + }, + { + "epoch": 0.45153740618190624, + "grad_norm": 0.710926353931427, + "learning_rate": 0.0001646742955420116, + "loss": 2.6049, + "step": 5595 + }, + { + "epoch": 0.4516181099184892, + "grad_norm": 0.8359978199005127, + "learning_rate": 0.0001646622539423379, + "loss": 2.6636, + "step": 5596 + }, + { + "epoch": 0.45169881365507225, + "grad_norm": 0.7628041505813599, + "learning_rate": 0.00016465021073111186, + "loss": 2.6586, + "step": 5597 + }, + { + "epoch": 0.4517795173916552, + "grad_norm": 0.7723419666290283, + "learning_rate": 0.00016463816590863356, + "loss": 2.6213, + "step": 5598 + }, + { + "epoch": 0.45186022112823826, + "grad_norm": 0.7210986018180847, + "learning_rate": 0.0001646261194752032, + "loss": 2.6674, + "step": 5599 + }, + { + "epoch": 0.45194092486482124, + "grad_norm": 0.7665949463844299, + "learning_rate": 0.00016461407143112097, + "loss": 2.68, + "step": 5600 + }, + { + "epoch": 0.45202162860140427, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016460202177668722, + "loss": 2.6473, + "step": 5601 + }, + { + "epoch": 0.45210233233798724, + "grad_norm": 0.6831738948822021, + "learning_rate": 0.0001645899705122022, + "loss": 2.6863, + "step": 5602 + }, + { + "epoch": 0.4521830360745703, + "grad_norm": 0.7006321549415588, + "learning_rate": 0.00016457791763796627, + "loss": 2.6242, + "step": 5603 + }, + { + "epoch": 0.45226373981115325, + "grad_norm": 0.7245663404464722, + "learning_rate": 0.00016456586315427983, + "loss": 2.6201, + "step": 5604 + }, + { + "epoch": 0.4523444435477363, + "grad_norm": 0.7444287538528442, + "learning_rate": 0.00016455380706144332, + "loss": 2.6684, + "step": 5605 + }, + { + "epoch": 0.45242514728431926, + "grad_norm": 0.6562673449516296, + "learning_rate": 0.00016454174935975714, + "loss": 2.5912, + "step": 5606 + }, + { + "epoch": 0.4525058510209023, + "grad_norm": 0.6494336724281311, + "learning_rate": 0.0001645296900495219, + "loss": 2.6245, + "step": 5607 + }, + { + "epoch": 0.45258655475748527, + "grad_norm": 0.6968161463737488, + "learning_rate": 0.0001645176291310381, + "loss": 2.6494, + "step": 5608 + }, + { + "epoch": 0.4526672584940683, + "grad_norm": 0.7351142764091492, + "learning_rate": 0.00016450556660460632, + "loss": 2.574, + "step": 5609 + }, + { + "epoch": 0.4527479622306513, + "grad_norm": 0.7522323131561279, + "learning_rate": 0.0001644935024705272, + "loss": 2.6512, + "step": 5610 + }, + { + "epoch": 0.45282866596723426, + "grad_norm": 0.6744225025177002, + "learning_rate": 0.0001644814367291014, + "loss": 2.6288, + "step": 5611 + }, + { + "epoch": 0.4529093697038173, + "grad_norm": 0.6933234333992004, + "learning_rate": 0.00016446936938062967, + "loss": 2.6076, + "step": 5612 + }, + { + "epoch": 0.45299007344040026, + "grad_norm": 0.7101204991340637, + "learning_rate": 0.00016445730042541272, + "loss": 2.6322, + "step": 5613 + }, + { + "epoch": 0.4530707771769833, + "grad_norm": 0.7647581696510315, + "learning_rate": 0.00016444522986375134, + "loss": 2.7021, + "step": 5614 + }, + { + "epoch": 0.4531514809135663, + "grad_norm": 0.7028820514678955, + "learning_rate": 0.00016443315769594635, + "loss": 2.6171, + "step": 5615 + }, + { + "epoch": 0.4532321846501493, + "grad_norm": 0.6933851838111877, + "learning_rate": 0.00016442108392229868, + "loss": 2.6119, + "step": 5616 + }, + { + "epoch": 0.4533128883867323, + "grad_norm": 0.7218462824821472, + "learning_rate": 0.0001644090085431092, + "loss": 2.6661, + "step": 5617 + }, + { + "epoch": 0.4533935921233153, + "grad_norm": 0.7390525341033936, + "learning_rate": 0.00016439693155867883, + "loss": 2.7084, + "step": 5618 + }, + { + "epoch": 0.4534742958598983, + "grad_norm": 0.734136164188385, + "learning_rate": 0.0001643848529693086, + "loss": 2.6896, + "step": 5619 + }, + { + "epoch": 0.4535549995964813, + "grad_norm": 0.8082060813903809, + "learning_rate": 0.00016437277277529954, + "loss": 2.5828, + "step": 5620 + }, + { + "epoch": 0.4536357033330643, + "grad_norm": 0.695988655090332, + "learning_rate": 0.0001643606909769527, + "loss": 2.6383, + "step": 5621 + }, + { + "epoch": 0.45371640706964733, + "grad_norm": 0.7415786385536194, + "learning_rate": 0.00016434860757456922, + "loss": 2.6388, + "step": 5622 + }, + { + "epoch": 0.4537971108062303, + "grad_norm": 0.7378649115562439, + "learning_rate": 0.0001643365225684502, + "loss": 2.6534, + "step": 5623 + }, + { + "epoch": 0.45387781454281334, + "grad_norm": 0.7686129808425903, + "learning_rate": 0.0001643244359588969, + "loss": 2.6637, + "step": 5624 + }, + { + "epoch": 0.4539585182793963, + "grad_norm": 0.7305558323860168, + "learning_rate": 0.00016431234774621047, + "loss": 2.6525, + "step": 5625 + }, + { + "epoch": 0.45403922201597935, + "grad_norm": 0.7994235157966614, + "learning_rate": 0.00016430025793069225, + "loss": 2.6316, + "step": 5626 + }, + { + "epoch": 0.4541199257525623, + "grad_norm": 0.6945801377296448, + "learning_rate": 0.0001642881665126435, + "loss": 2.6367, + "step": 5627 + }, + { + "epoch": 0.45420062948914536, + "grad_norm": 0.6855447292327881, + "learning_rate": 0.00016427607349236558, + "loss": 2.6317, + "step": 5628 + }, + { + "epoch": 0.45428133322572833, + "grad_norm": 0.6961888670921326, + "learning_rate": 0.00016426397887015992, + "loss": 2.6477, + "step": 5629 + }, + { + "epoch": 0.45436203696231137, + "grad_norm": 0.7531994581222534, + "learning_rate": 0.0001642518826463279, + "loss": 2.7219, + "step": 5630 + }, + { + "epoch": 0.45444274069889434, + "grad_norm": 0.7442335486412048, + "learning_rate": 0.00016423978482117102, + "loss": 2.706, + "step": 5631 + }, + { + "epoch": 0.4545234444354774, + "grad_norm": 0.7075700759887695, + "learning_rate": 0.00016422768539499076, + "loss": 2.6481, + "step": 5632 + }, + { + "epoch": 0.45460414817206035, + "grad_norm": 0.7831876873970032, + "learning_rate": 0.0001642155843680887, + "loss": 2.616, + "step": 5633 + }, + { + "epoch": 0.4546848519086434, + "grad_norm": 0.7514604926109314, + "learning_rate": 0.00016420348174076642, + "loss": 2.6282, + "step": 5634 + }, + { + "epoch": 0.45476555564522636, + "grad_norm": 0.7136685252189636, + "learning_rate": 0.0001641913775133255, + "loss": 2.6764, + "step": 5635 + }, + { + "epoch": 0.4548462593818094, + "grad_norm": 0.7406740784645081, + "learning_rate": 0.00016417927168606771, + "loss": 2.6126, + "step": 5636 + }, + { + "epoch": 0.45492696311839237, + "grad_norm": 0.7257869839668274, + "learning_rate": 0.0001641671642592947, + "loss": 2.6035, + "step": 5637 + }, + { + "epoch": 0.4550076668549754, + "grad_norm": 0.8378798961639404, + "learning_rate": 0.00016415505523330822, + "loss": 2.6657, + "step": 5638 + }, + { + "epoch": 0.4550883705915584, + "grad_norm": 0.7218836545944214, + "learning_rate": 0.00016414294460841003, + "loss": 2.6209, + "step": 5639 + }, + { + "epoch": 0.4551690743281414, + "grad_norm": 0.7792766690254211, + "learning_rate": 0.00016413083238490204, + "loss": 2.7208, + "step": 5640 + }, + { + "epoch": 0.4552497780647244, + "grad_norm": 0.7800823450088501, + "learning_rate": 0.000164118718563086, + "loss": 2.6351, + "step": 5641 + }, + { + "epoch": 0.4553304818013074, + "grad_norm": 0.7593275904655457, + "learning_rate": 0.00016410660314326395, + "loss": 2.7025, + "step": 5642 + }, + { + "epoch": 0.4554111855378904, + "grad_norm": 0.7561587691307068, + "learning_rate": 0.00016409448612573772, + "loss": 2.6188, + "step": 5643 + }, + { + "epoch": 0.4554918892744734, + "grad_norm": 0.7674516439437866, + "learning_rate": 0.00016408236751080937, + "loss": 2.629, + "step": 5644 + }, + { + "epoch": 0.4555725930110564, + "grad_norm": 0.7112495303153992, + "learning_rate": 0.00016407024729878095, + "loss": 2.6261, + "step": 5645 + }, + { + "epoch": 0.45565329674763944, + "grad_norm": 0.6861695647239685, + "learning_rate": 0.00016405812548995444, + "loss": 2.6984, + "step": 5646 + }, + { + "epoch": 0.4557340004842224, + "grad_norm": 0.7711648941040039, + "learning_rate": 0.000164046002084632, + "loss": 2.6839, + "step": 5647 + }, + { + "epoch": 0.45581470422080544, + "grad_norm": 0.6862967014312744, + "learning_rate": 0.00016403387708311578, + "loss": 2.5964, + "step": 5648 + }, + { + "epoch": 0.4558954079573884, + "grad_norm": 0.707374632358551, + "learning_rate": 0.00016402175048570793, + "loss": 2.6191, + "step": 5649 + }, + { + "epoch": 0.45597611169397145, + "grad_norm": 0.7980892658233643, + "learning_rate": 0.00016400962229271072, + "loss": 2.6288, + "step": 5650 + }, + { + "epoch": 0.45605681543055443, + "grad_norm": 0.686187744140625, + "learning_rate": 0.0001639974925044264, + "loss": 2.6277, + "step": 5651 + }, + { + "epoch": 0.45613751916713746, + "grad_norm": 0.6970425844192505, + "learning_rate": 0.0001639853611211573, + "loss": 2.5726, + "step": 5652 + }, + { + "epoch": 0.45621822290372044, + "grad_norm": 0.701500415802002, + "learning_rate": 0.00016397322814320573, + "loss": 2.6275, + "step": 5653 + }, + { + "epoch": 0.45629892664030347, + "grad_norm": 0.8432207107543945, + "learning_rate": 0.00016396109357087407, + "loss": 2.6185, + "step": 5654 + }, + { + "epoch": 0.45637963037688645, + "grad_norm": 0.7049770951271057, + "learning_rate": 0.00016394895740446476, + "loss": 2.674, + "step": 5655 + }, + { + "epoch": 0.4564603341134695, + "grad_norm": 0.7068646550178528, + "learning_rate": 0.00016393681964428026, + "loss": 2.6072, + "step": 5656 + }, + { + "epoch": 0.45654103785005246, + "grad_norm": 0.7698760032653809, + "learning_rate": 0.00016392468029062312, + "loss": 2.6547, + "step": 5657 + }, + { + "epoch": 0.4566217415866355, + "grad_norm": 0.7381031513214111, + "learning_rate": 0.00016391253934379583, + "loss": 2.6125, + "step": 5658 + }, + { + "epoch": 0.45670244532321846, + "grad_norm": 0.7367781400680542, + "learning_rate": 0.00016390039680410097, + "loss": 2.6763, + "step": 5659 + }, + { + "epoch": 0.4567831490598015, + "grad_norm": 0.7416272759437561, + "learning_rate": 0.00016388825267184121, + "loss": 2.7059, + "step": 5660 + }, + { + "epoch": 0.4568638527963845, + "grad_norm": 0.6933416724205017, + "learning_rate": 0.0001638761069473192, + "loss": 2.6028, + "step": 5661 + }, + { + "epoch": 0.45694455653296745, + "grad_norm": 0.7311314940452576, + "learning_rate": 0.00016386395963083756, + "loss": 2.6266, + "step": 5662 + }, + { + "epoch": 0.4570252602695505, + "grad_norm": 0.7172734141349792, + "learning_rate": 0.00016385181072269917, + "loss": 2.6754, + "step": 5663 + }, + { + "epoch": 0.45710596400613346, + "grad_norm": 0.7286428213119507, + "learning_rate": 0.00016383966022320671, + "loss": 2.6637, + "step": 5664 + }, + { + "epoch": 0.4571866677427165, + "grad_norm": 0.7296474575996399, + "learning_rate": 0.00016382750813266308, + "loss": 2.6655, + "step": 5665 + }, + { + "epoch": 0.45726737147929947, + "grad_norm": 0.6929224133491516, + "learning_rate": 0.00016381535445137105, + "loss": 2.6376, + "step": 5666 + }, + { + "epoch": 0.4573480752158825, + "grad_norm": 0.7012765407562256, + "learning_rate": 0.0001638031991796336, + "loss": 2.6222, + "step": 5667 + }, + { + "epoch": 0.4574287789524655, + "grad_norm": 0.7360745668411255, + "learning_rate": 0.00016379104231775368, + "loss": 2.6304, + "step": 5668 + }, + { + "epoch": 0.4575094826890485, + "grad_norm": 0.7276801466941833, + "learning_rate": 0.00016377888386603419, + "loss": 2.7046, + "step": 5669 + }, + { + "epoch": 0.4575901864256315, + "grad_norm": 0.688432514667511, + "learning_rate": 0.0001637667238247782, + "loss": 2.6598, + "step": 5670 + }, + { + "epoch": 0.4576708901622145, + "grad_norm": 0.6874414682388306, + "learning_rate": 0.00016375456219428877, + "loss": 2.7, + "step": 5671 + }, + { + "epoch": 0.4577515938987975, + "grad_norm": 0.711091160774231, + "learning_rate": 0.000163742398974869, + "loss": 2.6063, + "step": 5672 + }, + { + "epoch": 0.4578322976353805, + "grad_norm": 0.7131791710853577, + "learning_rate": 0.000163730234166822, + "loss": 2.5948, + "step": 5673 + }, + { + "epoch": 0.4579130013719635, + "grad_norm": 0.7166630625724792, + "learning_rate": 0.000163718067770451, + "loss": 2.6488, + "step": 5674 + }, + { + "epoch": 0.45799370510854653, + "grad_norm": 0.7285952568054199, + "learning_rate": 0.00016370589978605916, + "loss": 2.6445, + "step": 5675 + }, + { + "epoch": 0.4580744088451295, + "grad_norm": 0.728050172328949, + "learning_rate": 0.0001636937302139498, + "loss": 2.5425, + "step": 5676 + }, + { + "epoch": 0.45815511258171254, + "grad_norm": 0.7196047902107239, + "learning_rate": 0.00016368155905442615, + "loss": 2.7426, + "step": 5677 + }, + { + "epoch": 0.4582358163182955, + "grad_norm": 0.6844602823257446, + "learning_rate": 0.0001636693863077916, + "loss": 2.6157, + "step": 5678 + }, + { + "epoch": 0.45831652005487855, + "grad_norm": 0.7375781536102295, + "learning_rate": 0.0001636572119743495, + "loss": 2.7069, + "step": 5679 + }, + { + "epoch": 0.4583972237914615, + "grad_norm": 0.7667750120162964, + "learning_rate": 0.0001636450360544033, + "loss": 2.6589, + "step": 5680 + }, + { + "epoch": 0.45847792752804456, + "grad_norm": 0.6569861173629761, + "learning_rate": 0.00016363285854825642, + "loss": 2.6197, + "step": 5681 + }, + { + "epoch": 0.45855863126462754, + "grad_norm": 0.7177335023880005, + "learning_rate": 0.00016362067945621239, + "loss": 2.6104, + "step": 5682 + }, + { + "epoch": 0.45863933500121057, + "grad_norm": 0.7260481715202332, + "learning_rate": 0.00016360849877857469, + "loss": 2.6435, + "step": 5683 + }, + { + "epoch": 0.45872003873779355, + "grad_norm": 0.7083989381790161, + "learning_rate": 0.00016359631651564693, + "loss": 2.6366, + "step": 5684 + }, + { + "epoch": 0.4588007424743766, + "grad_norm": 0.6417020559310913, + "learning_rate": 0.00016358413266773271, + "loss": 2.6311, + "step": 5685 + }, + { + "epoch": 0.45888144621095955, + "grad_norm": 0.737856924533844, + "learning_rate": 0.0001635719472351357, + "loss": 2.6647, + "step": 5686 + }, + { + "epoch": 0.4589621499475426, + "grad_norm": 0.6774190068244934, + "learning_rate": 0.0001635597602181596, + "loss": 2.6366, + "step": 5687 + }, + { + "epoch": 0.45904285368412556, + "grad_norm": 0.6480480432510376, + "learning_rate": 0.0001635475716171081, + "loss": 2.6501, + "step": 5688 + }, + { + "epoch": 0.4591235574207086, + "grad_norm": 0.7886860370635986, + "learning_rate": 0.0001635353814322851, + "loss": 2.7239, + "step": 5689 + }, + { + "epoch": 0.45920426115729157, + "grad_norm": 0.7579021453857422, + "learning_rate": 0.0001635231896639942, + "loss": 2.6155, + "step": 5690 + }, + { + "epoch": 0.4592849648938746, + "grad_norm": 0.6853809356689453, + "learning_rate": 0.0001635109963125394, + "loss": 2.5933, + "step": 5691 + }, + { + "epoch": 0.4593656686304576, + "grad_norm": 0.661342978477478, + "learning_rate": 0.00016349880137822456, + "loss": 2.6277, + "step": 5692 + }, + { + "epoch": 0.4594463723670406, + "grad_norm": 0.6795682311058044, + "learning_rate": 0.0001634866048613536, + "loss": 2.6221, + "step": 5693 + }, + { + "epoch": 0.4595270761036236, + "grad_norm": 0.7375383377075195, + "learning_rate": 0.00016347440676223047, + "loss": 2.6082, + "step": 5694 + }, + { + "epoch": 0.4596077798402066, + "grad_norm": 0.7565153241157532, + "learning_rate": 0.0001634622070811592, + "loss": 2.6615, + "step": 5695 + }, + { + "epoch": 0.4596884835767896, + "grad_norm": 0.6869745254516602, + "learning_rate": 0.00016345000581844386, + "loss": 2.6172, + "step": 5696 + }, + { + "epoch": 0.45976918731337263, + "grad_norm": 0.7192853689193726, + "learning_rate": 0.0001634378029743885, + "loss": 2.6324, + "step": 5697 + }, + { + "epoch": 0.4598498910499556, + "grad_norm": 0.6919218301773071, + "learning_rate": 0.00016342559854929726, + "loss": 2.5965, + "step": 5698 + }, + { + "epoch": 0.45993059478653864, + "grad_norm": 0.6715282797813416, + "learning_rate": 0.00016341339254347432, + "loss": 2.6225, + "step": 5699 + }, + { + "epoch": 0.4600112985231216, + "grad_norm": 0.6768380999565125, + "learning_rate": 0.00016340118495722388, + "loss": 2.6376, + "step": 5700 + }, + { + "epoch": 0.46009200225970465, + "grad_norm": 0.6898325681686401, + "learning_rate": 0.00016338897579085018, + "loss": 2.667, + "step": 5701 + }, + { + "epoch": 0.4601727059962876, + "grad_norm": 0.7171810865402222, + "learning_rate": 0.00016337676504465747, + "loss": 2.678, + "step": 5702 + }, + { + "epoch": 0.46025340973287066, + "grad_norm": 0.7050724029541016, + "learning_rate": 0.00016336455271895016, + "loss": 2.619, + "step": 5703 + }, + { + "epoch": 0.46033411346945363, + "grad_norm": 0.8287240862846375, + "learning_rate": 0.00016335233881403248, + "loss": 2.71, + "step": 5704 + }, + { + "epoch": 0.46041481720603666, + "grad_norm": 0.6880568861961365, + "learning_rate": 0.000163340123330209, + "loss": 2.6516, + "step": 5705 + }, + { + "epoch": 0.46049552094261964, + "grad_norm": 0.7222896218299866, + "learning_rate": 0.00016332790626778402, + "loss": 2.5899, + "step": 5706 + }, + { + "epoch": 0.4605762246792027, + "grad_norm": 0.7707448601722717, + "learning_rate": 0.00016331568762706207, + "loss": 2.6116, + "step": 5707 + }, + { + "epoch": 0.46065692841578565, + "grad_norm": 0.7780653834342957, + "learning_rate": 0.0001633034674083477, + "loss": 2.6072, + "step": 5708 + }, + { + "epoch": 0.4607376321523687, + "grad_norm": 0.7551524639129639, + "learning_rate": 0.00016329124561194545, + "loss": 2.548, + "step": 5709 + }, + { + "epoch": 0.46081833588895166, + "grad_norm": 0.9312284588813782, + "learning_rate": 0.0001632790222381599, + "loss": 2.6557, + "step": 5710 + }, + { + "epoch": 0.4608990396255347, + "grad_norm": 0.7404753565788269, + "learning_rate": 0.0001632667972872957, + "loss": 2.6889, + "step": 5711 + }, + { + "epoch": 0.46097974336211767, + "grad_norm": 0.7423726916313171, + "learning_rate": 0.00016325457075965752, + "loss": 2.6265, + "step": 5712 + }, + { + "epoch": 0.46106044709870064, + "grad_norm": 1.0683187246322632, + "learning_rate": 0.0001632423426555501, + "loss": 2.6827, + "step": 5713 + }, + { + "epoch": 0.4611411508352837, + "grad_norm": 0.7204160094261169, + "learning_rate": 0.0001632301129752782, + "loss": 2.702, + "step": 5714 + }, + { + "epoch": 0.46122185457186665, + "grad_norm": 0.7591153383255005, + "learning_rate": 0.0001632178817191466, + "loss": 2.6031, + "step": 5715 + }, + { + "epoch": 0.4613025583084497, + "grad_norm": 0.8147456645965576, + "learning_rate": 0.00016320564888746013, + "loss": 2.6117, + "step": 5716 + }, + { + "epoch": 0.46138326204503266, + "grad_norm": 0.7880246639251709, + "learning_rate": 0.00016319341448052364, + "loss": 2.5896, + "step": 5717 + }, + { + "epoch": 0.4614639657816157, + "grad_norm": 0.6875137686729431, + "learning_rate": 0.00016318117849864206, + "loss": 2.6258, + "step": 5718 + }, + { + "epoch": 0.46154466951819867, + "grad_norm": 0.7197960615158081, + "learning_rate": 0.00016316894094212044, + "loss": 2.6656, + "step": 5719 + }, + { + "epoch": 0.4616253732547817, + "grad_norm": 0.7049540281295776, + "learning_rate": 0.0001631567018112636, + "loss": 2.6698, + "step": 5720 + }, + { + "epoch": 0.4617060769913647, + "grad_norm": 0.7128825783729553, + "learning_rate": 0.00016314446110637668, + "loss": 2.6552, + "step": 5721 + }, + { + "epoch": 0.4617867807279477, + "grad_norm": 0.7956201434135437, + "learning_rate": 0.00016313221882776477, + "loss": 2.6747, + "step": 5722 + }, + { + "epoch": 0.4618674844645307, + "grad_norm": 0.7598347663879395, + "learning_rate": 0.0001631199749757329, + "loss": 2.6187, + "step": 5723 + }, + { + "epoch": 0.4619481882011137, + "grad_norm": 0.6587582230567932, + "learning_rate": 0.00016310772955058627, + "loss": 2.596, + "step": 5724 + }, + { + "epoch": 0.4620288919376967, + "grad_norm": 0.700136125087738, + "learning_rate": 0.00016309548255263003, + "loss": 2.6527, + "step": 5725 + }, + { + "epoch": 0.4621095956742797, + "grad_norm": 0.7246582508087158, + "learning_rate": 0.00016308323398216945, + "loss": 2.6577, + "step": 5726 + }, + { + "epoch": 0.4621902994108627, + "grad_norm": 0.6951557993888855, + "learning_rate": 0.00016307098383950977, + "loss": 2.5816, + "step": 5727 + }, + { + "epoch": 0.46227100314744574, + "grad_norm": 0.7109191417694092, + "learning_rate": 0.0001630587321249563, + "loss": 2.6586, + "step": 5728 + }, + { + "epoch": 0.4623517068840287, + "grad_norm": 0.7357863783836365, + "learning_rate": 0.0001630464788388144, + "loss": 2.691, + "step": 5729 + }, + { + "epoch": 0.46243241062061174, + "grad_norm": 0.7916350960731506, + "learning_rate": 0.00016303422398138945, + "loss": 2.6584, + "step": 5730 + }, + { + "epoch": 0.4625131143571947, + "grad_norm": 0.6543231010437012, + "learning_rate": 0.00016302196755298685, + "loss": 2.6482, + "step": 5731 + }, + { + "epoch": 0.46259381809377775, + "grad_norm": 0.6978787183761597, + "learning_rate": 0.00016300970955391208, + "loss": 2.5956, + "step": 5732 + }, + { + "epoch": 0.46267452183036073, + "grad_norm": 0.7301886677742004, + "learning_rate": 0.00016299744998447065, + "loss": 2.6178, + "step": 5733 + }, + { + "epoch": 0.46275522556694376, + "grad_norm": 0.7381030321121216, + "learning_rate": 0.00016298518884496808, + "loss": 2.6712, + "step": 5734 + }, + { + "epoch": 0.46283592930352674, + "grad_norm": 0.7769027948379517, + "learning_rate": 0.00016297292613570995, + "loss": 2.6082, + "step": 5735 + }, + { + "epoch": 0.46291663304010977, + "grad_norm": 0.7698354721069336, + "learning_rate": 0.0001629606618570019, + "loss": 2.6543, + "step": 5736 + }, + { + "epoch": 0.46299733677669275, + "grad_norm": 0.7001554369926453, + "learning_rate": 0.00016294839600914957, + "loss": 2.6174, + "step": 5737 + }, + { + "epoch": 0.4630780405132758, + "grad_norm": 0.7589300274848938, + "learning_rate": 0.00016293612859245868, + "loss": 2.6338, + "step": 5738 + }, + { + "epoch": 0.46315874424985876, + "grad_norm": 0.7083945274353027, + "learning_rate": 0.00016292385960723493, + "loss": 2.6793, + "step": 5739 + }, + { + "epoch": 0.4632394479864418, + "grad_norm": 0.739439845085144, + "learning_rate": 0.00016291158905378412, + "loss": 2.7335, + "step": 5740 + }, + { + "epoch": 0.46332015172302476, + "grad_norm": 0.6868166923522949, + "learning_rate": 0.00016289931693241205, + "loss": 2.6139, + "step": 5741 + }, + { + "epoch": 0.4634008554596078, + "grad_norm": 0.7385871410369873, + "learning_rate": 0.0001628870432434246, + "loss": 2.6783, + "step": 5742 + }, + { + "epoch": 0.4634815591961908, + "grad_norm": 0.7227835655212402, + "learning_rate": 0.00016287476798712764, + "loss": 2.6732, + "step": 5743 + }, + { + "epoch": 0.4635622629327738, + "grad_norm": 0.6662411689758301, + "learning_rate": 0.00016286249116382709, + "loss": 2.6645, + "step": 5744 + }, + { + "epoch": 0.4636429666693568, + "grad_norm": 0.8110263347625732, + "learning_rate": 0.00016285021277382894, + "loss": 2.6448, + "step": 5745 + }, + { + "epoch": 0.4637236704059398, + "grad_norm": 0.7419269680976868, + "learning_rate": 0.0001628379328174392, + "loss": 2.7286, + "step": 5746 + }, + { + "epoch": 0.4638043741425228, + "grad_norm": 0.6518125534057617, + "learning_rate": 0.0001628256512949639, + "loss": 2.6545, + "step": 5747 + }, + { + "epoch": 0.4638850778791058, + "grad_norm": 0.6816060543060303, + "learning_rate": 0.00016281336820670917, + "loss": 2.6167, + "step": 5748 + }, + { + "epoch": 0.4639657816156888, + "grad_norm": 0.6537362337112427, + "learning_rate": 0.0001628010835529811, + "loss": 2.6522, + "step": 5749 + }, + { + "epoch": 0.46404648535227183, + "grad_norm": 0.6720992922782898, + "learning_rate": 0.00016278879733408585, + "loss": 2.6028, + "step": 5750 + }, + { + "epoch": 0.4641271890888548, + "grad_norm": 0.6778908371925354, + "learning_rate": 0.00016277650955032967, + "loss": 2.5591, + "step": 5751 + }, + { + "epoch": 0.46420789282543784, + "grad_norm": 0.6908471584320068, + "learning_rate": 0.0001627642202020187, + "loss": 2.6574, + "step": 5752 + }, + { + "epoch": 0.4642885965620208, + "grad_norm": 0.7034298181533813, + "learning_rate": 0.00016275192928945936, + "loss": 2.657, + "step": 5753 + }, + { + "epoch": 0.46436930029860385, + "grad_norm": 0.7245952486991882, + "learning_rate": 0.0001627396368129579, + "loss": 2.6572, + "step": 5754 + }, + { + "epoch": 0.4644500040351868, + "grad_norm": 0.6764482855796814, + "learning_rate": 0.0001627273427728207, + "loss": 2.6576, + "step": 5755 + }, + { + "epoch": 0.46453070777176986, + "grad_norm": 0.7074379920959473, + "learning_rate": 0.0001627150471693541, + "loss": 2.614, + "step": 5756 + }, + { + "epoch": 0.46461141150835283, + "grad_norm": 0.7292052507400513, + "learning_rate": 0.0001627027500028646, + "loss": 2.673, + "step": 5757 + }, + { + "epoch": 0.46469211524493587, + "grad_norm": 0.7554025650024414, + "learning_rate": 0.0001626904512736587, + "loss": 2.5919, + "step": 5758 + }, + { + "epoch": 0.46477281898151884, + "grad_norm": 0.6829606890678406, + "learning_rate": 0.00016267815098204284, + "loss": 2.7206, + "step": 5759 + }, + { + "epoch": 0.4648535227181019, + "grad_norm": 0.7201548218727112, + "learning_rate": 0.00016266584912832363, + "loss": 2.6651, + "step": 5760 + }, + { + "epoch": 0.46493422645468485, + "grad_norm": 0.6889227628707886, + "learning_rate": 0.00016265354571280764, + "loss": 2.6776, + "step": 5761 + }, + { + "epoch": 0.4650149301912679, + "grad_norm": 0.7286190986633301, + "learning_rate": 0.00016264124073580156, + "loss": 2.591, + "step": 5762 + }, + { + "epoch": 0.46509563392785086, + "grad_norm": 0.7222036123275757, + "learning_rate": 0.00016262893419761196, + "loss": 2.6422, + "step": 5763 + }, + { + "epoch": 0.46517633766443384, + "grad_norm": 0.6822768449783325, + "learning_rate": 0.00016261662609854562, + "loss": 2.6126, + "step": 5764 + }, + { + "epoch": 0.46525704140101687, + "grad_norm": 0.7263356447219849, + "learning_rate": 0.00016260431643890929, + "loss": 2.6304, + "step": 5765 + }, + { + "epoch": 0.46533774513759985, + "grad_norm": 0.7152180075645447, + "learning_rate": 0.00016259200521900972, + "loss": 2.6489, + "step": 5766 + }, + { + "epoch": 0.4654184488741829, + "grad_norm": 0.6988116502761841, + "learning_rate": 0.00016257969243915378, + "loss": 2.6151, + "step": 5767 + }, + { + "epoch": 0.46549915261076585, + "grad_norm": 0.7131790518760681, + "learning_rate": 0.00016256737809964831, + "loss": 2.6284, + "step": 5768 + }, + { + "epoch": 0.4655798563473489, + "grad_norm": 0.674196183681488, + "learning_rate": 0.00016255506220080025, + "loss": 2.5815, + "step": 5769 + }, + { + "epoch": 0.46566056008393186, + "grad_norm": 0.7166198492050171, + "learning_rate": 0.0001625427447429165, + "loss": 2.6594, + "step": 5770 + }, + { + "epoch": 0.4657412638205149, + "grad_norm": 0.6997127532958984, + "learning_rate": 0.00016253042572630407, + "loss": 2.6502, + "step": 5771 + }, + { + "epoch": 0.46582196755709787, + "grad_norm": 0.7761591076850891, + "learning_rate": 0.00016251810515126994, + "loss": 2.624, + "step": 5772 + }, + { + "epoch": 0.4659026712936809, + "grad_norm": 0.7038728594779968, + "learning_rate": 0.00016250578301812125, + "loss": 2.6096, + "step": 5773 + }, + { + "epoch": 0.4659833750302639, + "grad_norm": 0.7080080509185791, + "learning_rate": 0.00016249345932716505, + "loss": 2.6196, + "step": 5774 + }, + { + "epoch": 0.4660640787668469, + "grad_norm": 0.7461444735527039, + "learning_rate": 0.00016248113407870847, + "loss": 2.65, + "step": 5775 + }, + { + "epoch": 0.4661447825034299, + "grad_norm": 0.7914463877677917, + "learning_rate": 0.00016246880727305868, + "loss": 2.6539, + "step": 5776 + }, + { + "epoch": 0.4662254862400129, + "grad_norm": 0.7067776918411255, + "learning_rate": 0.00016245647891052295, + "loss": 2.72, + "step": 5777 + }, + { + "epoch": 0.4663061899765959, + "grad_norm": 0.7190818190574646, + "learning_rate": 0.00016244414899140852, + "loss": 2.7029, + "step": 5778 + }, + { + "epoch": 0.46638689371317893, + "grad_norm": 0.6740003824234009, + "learning_rate": 0.00016243181751602261, + "loss": 2.6404, + "step": 5779 + }, + { + "epoch": 0.4664675974497619, + "grad_norm": 0.7942661643028259, + "learning_rate": 0.00016241948448467267, + "loss": 2.6333, + "step": 5780 + }, + { + "epoch": 0.46654830118634494, + "grad_norm": 0.6415690183639526, + "learning_rate": 0.00016240714989766597, + "loss": 2.6354, + "step": 5781 + }, + { + "epoch": 0.4666290049229279, + "grad_norm": 0.7287769913673401, + "learning_rate": 0.00016239481375530997, + "loss": 2.6721, + "step": 5782 + }, + { + "epoch": 0.46670970865951095, + "grad_norm": 0.8197699189186096, + "learning_rate": 0.00016238247605791212, + "loss": 2.7577, + "step": 5783 + }, + { + "epoch": 0.4667904123960939, + "grad_norm": 0.8182012438774109, + "learning_rate": 0.0001623701368057799, + "loss": 2.6475, + "step": 5784 + }, + { + "epoch": 0.46687111613267696, + "grad_norm": 0.6974665522575378, + "learning_rate": 0.00016235779599922082, + "loss": 2.5897, + "step": 5785 + }, + { + "epoch": 0.46695181986925993, + "grad_norm": 0.7156379222869873, + "learning_rate": 0.00016234545363854247, + "loss": 2.5981, + "step": 5786 + }, + { + "epoch": 0.46703252360584296, + "grad_norm": 0.6875364780426025, + "learning_rate": 0.0001623331097240524, + "loss": 2.6333, + "step": 5787 + }, + { + "epoch": 0.46711322734242594, + "grad_norm": 0.7222917675971985, + "learning_rate": 0.00016232076425605835, + "loss": 2.5865, + "step": 5788 + }, + { + "epoch": 0.467193931079009, + "grad_norm": 0.7224915027618408, + "learning_rate": 0.00016230841723486792, + "loss": 2.667, + "step": 5789 + }, + { + "epoch": 0.46727463481559195, + "grad_norm": 0.7125402688980103, + "learning_rate": 0.00016229606866078887, + "loss": 2.6548, + "step": 5790 + }, + { + "epoch": 0.467355338552175, + "grad_norm": 0.6866132616996765, + "learning_rate": 0.00016228371853412894, + "loss": 2.6381, + "step": 5791 + }, + { + "epoch": 0.46743604228875796, + "grad_norm": 0.7573552131652832, + "learning_rate": 0.00016227136685519593, + "loss": 2.6766, + "step": 5792 + }, + { + "epoch": 0.467516746025341, + "grad_norm": 0.7565932273864746, + "learning_rate": 0.00016225901362429767, + "loss": 2.5965, + "step": 5793 + }, + { + "epoch": 0.46759744976192397, + "grad_norm": 0.7279250621795654, + "learning_rate": 0.00016224665884174207, + "loss": 2.6599, + "step": 5794 + }, + { + "epoch": 0.467678153498507, + "grad_norm": 0.7501276731491089, + "learning_rate": 0.000162234302507837, + "loss": 2.636, + "step": 5795 + }, + { + "epoch": 0.46775885723509, + "grad_norm": 0.7823930978775024, + "learning_rate": 0.00016222194462289042, + "loss": 2.6277, + "step": 5796 + }, + { + "epoch": 0.467839560971673, + "grad_norm": 0.7168415784835815, + "learning_rate": 0.00016220958518721034, + "loss": 2.6868, + "step": 5797 + }, + { + "epoch": 0.467920264708256, + "grad_norm": 0.7468454241752625, + "learning_rate": 0.00016219722420110478, + "loss": 2.7209, + "step": 5798 + }, + { + "epoch": 0.468000968444839, + "grad_norm": 0.6915228962898254, + "learning_rate": 0.0001621848616648818, + "loss": 2.6356, + "step": 5799 + }, + { + "epoch": 0.468081672181422, + "grad_norm": 0.7731573581695557, + "learning_rate": 0.00016217249757884955, + "loss": 2.6396, + "step": 5800 + }, + { + "epoch": 0.468162375918005, + "grad_norm": 0.6579388380050659, + "learning_rate": 0.0001621601319433161, + "loss": 2.6077, + "step": 5801 + }, + { + "epoch": 0.468243079654588, + "grad_norm": 0.7136246562004089, + "learning_rate": 0.00016214776475858967, + "loss": 2.6602, + "step": 5802 + }, + { + "epoch": 0.46832378339117103, + "grad_norm": 0.6929461359977722, + "learning_rate": 0.0001621353960249785, + "loss": 2.6851, + "step": 5803 + }, + { + "epoch": 0.468404487127754, + "grad_norm": 0.8001779913902283, + "learning_rate": 0.00016212302574279087, + "loss": 2.6577, + "step": 5804 + }, + { + "epoch": 0.46848519086433704, + "grad_norm": 0.7637671828269958, + "learning_rate": 0.00016211065391233498, + "loss": 2.6923, + "step": 5805 + }, + { + "epoch": 0.46856589460092, + "grad_norm": 0.6879906058311462, + "learning_rate": 0.0001620982805339193, + "loss": 2.6555, + "step": 5806 + }, + { + "epoch": 0.46864659833750305, + "grad_norm": 0.7731223702430725, + "learning_rate": 0.0001620859056078521, + "loss": 2.6301, + "step": 5807 + }, + { + "epoch": 0.468727302074086, + "grad_norm": 0.7351491451263428, + "learning_rate": 0.00016207352913444185, + "loss": 2.6154, + "step": 5808 + }, + { + "epoch": 0.46880800581066906, + "grad_norm": 0.716314435005188, + "learning_rate": 0.000162061151113997, + "loss": 2.6294, + "step": 5809 + }, + { + "epoch": 0.46888870954725204, + "grad_norm": 0.6974702477455139, + "learning_rate": 0.00016204877154682605, + "loss": 2.6046, + "step": 5810 + }, + { + "epoch": 0.46896941328383507, + "grad_norm": 0.7456035614013672, + "learning_rate": 0.00016203639043323745, + "loss": 2.6308, + "step": 5811 + }, + { + "epoch": 0.46905011702041804, + "grad_norm": 0.7198047637939453, + "learning_rate": 0.0001620240077735399, + "loss": 2.6303, + "step": 5812 + }, + { + "epoch": 0.4691308207570011, + "grad_norm": 0.7098269462585449, + "learning_rate": 0.00016201162356804192, + "loss": 2.6352, + "step": 5813 + }, + { + "epoch": 0.46921152449358405, + "grad_norm": 0.7060410976409912, + "learning_rate": 0.0001619992378170522, + "loss": 2.6489, + "step": 5814 + }, + { + "epoch": 0.46929222823016703, + "grad_norm": 0.7126092314720154, + "learning_rate": 0.0001619868505208794, + "loss": 2.66, + "step": 5815 + }, + { + "epoch": 0.46937293196675006, + "grad_norm": 0.7391123175621033, + "learning_rate": 0.00016197446167983223, + "loss": 2.6066, + "step": 5816 + }, + { + "epoch": 0.46945363570333304, + "grad_norm": 0.7282211780548096, + "learning_rate": 0.0001619620712942195, + "loss": 2.6422, + "step": 5817 + }, + { + "epoch": 0.46953433943991607, + "grad_norm": 0.7581801414489746, + "learning_rate": 0.00016194967936434998, + "loss": 2.702, + "step": 5818 + }, + { + "epoch": 0.46961504317649905, + "grad_norm": 0.6649011373519897, + "learning_rate": 0.00016193728589053248, + "loss": 2.6235, + "step": 5819 + }, + { + "epoch": 0.4696957469130821, + "grad_norm": 0.720312237739563, + "learning_rate": 0.00016192489087307592, + "loss": 2.5961, + "step": 5820 + }, + { + "epoch": 0.46977645064966506, + "grad_norm": 0.72076016664505, + "learning_rate": 0.0001619124943122892, + "loss": 2.6793, + "step": 5821 + }, + { + "epoch": 0.4698571543862481, + "grad_norm": 0.6695740818977356, + "learning_rate": 0.0001619000962084813, + "loss": 2.6325, + "step": 5822 + }, + { + "epoch": 0.46993785812283106, + "grad_norm": 0.7678804993629456, + "learning_rate": 0.0001618876965619612, + "loss": 2.7473, + "step": 5823 + }, + { + "epoch": 0.4700185618594141, + "grad_norm": 0.782349169254303, + "learning_rate": 0.00016187529537303792, + "loss": 2.6139, + "step": 5824 + }, + { + "epoch": 0.4700992655959971, + "grad_norm": 0.6906631588935852, + "learning_rate": 0.00016186289264202052, + "loss": 2.6529, + "step": 5825 + }, + { + "epoch": 0.4701799693325801, + "grad_norm": 0.732947051525116, + "learning_rate": 0.00016185048836921814, + "loss": 2.6416, + "step": 5826 + }, + { + "epoch": 0.4702606730691631, + "grad_norm": 0.8306718468666077, + "learning_rate": 0.0001618380825549399, + "loss": 2.6566, + "step": 5827 + }, + { + "epoch": 0.4703413768057461, + "grad_norm": 0.725764811038971, + "learning_rate": 0.00016182567519949502, + "loss": 2.6664, + "step": 5828 + }, + { + "epoch": 0.4704220805423291, + "grad_norm": 0.7301872372627258, + "learning_rate": 0.00016181326630319268, + "loss": 2.6666, + "step": 5829 + }, + { + "epoch": 0.4705027842789121, + "grad_norm": 0.7297122478485107, + "learning_rate": 0.00016180085586634216, + "loss": 2.6415, + "step": 5830 + }, + { + "epoch": 0.4705834880154951, + "grad_norm": 0.7445664405822754, + "learning_rate": 0.00016178844388925278, + "loss": 2.6112, + "step": 5831 + }, + { + "epoch": 0.47066419175207813, + "grad_norm": 0.7787267565727234, + "learning_rate": 0.00016177603037223384, + "loss": 2.6452, + "step": 5832 + }, + { + "epoch": 0.4707448954886611, + "grad_norm": 0.7386903762817383, + "learning_rate": 0.00016176361531559474, + "loss": 2.6919, + "step": 5833 + }, + { + "epoch": 0.47082559922524414, + "grad_norm": 0.7991776466369629, + "learning_rate": 0.0001617511987196449, + "loss": 2.6728, + "step": 5834 + }, + { + "epoch": 0.4709063029618271, + "grad_norm": 0.7196263670921326, + "learning_rate": 0.00016173878058469375, + "loss": 2.6008, + "step": 5835 + }, + { + "epoch": 0.47098700669841015, + "grad_norm": 0.6773477792739868, + "learning_rate": 0.00016172636091105086, + "loss": 2.6184, + "step": 5836 + }, + { + "epoch": 0.4710677104349931, + "grad_norm": 0.7238345742225647, + "learning_rate": 0.00016171393969902567, + "loss": 2.6221, + "step": 5837 + }, + { + "epoch": 0.47114841417157616, + "grad_norm": 0.702104926109314, + "learning_rate": 0.00016170151694892777, + "loss": 2.5909, + "step": 5838 + }, + { + "epoch": 0.47122911790815913, + "grad_norm": 0.7571590542793274, + "learning_rate": 0.00016168909266106677, + "loss": 2.6044, + "step": 5839 + }, + { + "epoch": 0.47130982164474217, + "grad_norm": 0.7408227324485779, + "learning_rate": 0.00016167666683575234, + "loss": 2.5771, + "step": 5840 + }, + { + "epoch": 0.47139052538132514, + "grad_norm": 0.6760764122009277, + "learning_rate": 0.00016166423947329414, + "loss": 2.6202, + "step": 5841 + }, + { + "epoch": 0.4714712291179082, + "grad_norm": 0.7085632681846619, + "learning_rate": 0.00016165181057400192, + "loss": 2.5887, + "step": 5842 + }, + { + "epoch": 0.47155193285449115, + "grad_norm": 0.7298943400382996, + "learning_rate": 0.00016163938013818538, + "loss": 2.609, + "step": 5843 + }, + { + "epoch": 0.4716326365910742, + "grad_norm": 0.7591157555580139, + "learning_rate": 0.0001616269481661544, + "loss": 2.6582, + "step": 5844 + }, + { + "epoch": 0.47171334032765716, + "grad_norm": 0.6727088093757629, + "learning_rate": 0.00016161451465821877, + "loss": 2.6289, + "step": 5845 + }, + { + "epoch": 0.4717940440642402, + "grad_norm": 0.6782706379890442, + "learning_rate": 0.00016160207961468835, + "loss": 2.6875, + "step": 5846 + }, + { + "epoch": 0.47187474780082317, + "grad_norm": 0.6839444041252136, + "learning_rate": 0.00016158964303587313, + "loss": 2.5687, + "step": 5847 + }, + { + "epoch": 0.4719554515374062, + "grad_norm": 0.7565997838973999, + "learning_rate": 0.00016157720492208295, + "loss": 2.6855, + "step": 5848 + }, + { + "epoch": 0.4720361552739892, + "grad_norm": 0.7286611199378967, + "learning_rate": 0.0001615647652736279, + "loss": 2.5906, + "step": 5849 + }, + { + "epoch": 0.4721168590105722, + "grad_norm": 0.7503396272659302, + "learning_rate": 0.00016155232409081793, + "loss": 2.6419, + "step": 5850 + }, + { + "epoch": 0.4721975627471552, + "grad_norm": 0.6924198865890503, + "learning_rate": 0.00016153988137396317, + "loss": 2.661, + "step": 5851 + }, + { + "epoch": 0.4722782664837382, + "grad_norm": 0.7731672525405884, + "learning_rate": 0.0001615274371233737, + "loss": 2.6993, + "step": 5852 + }, + { + "epoch": 0.4723589702203212, + "grad_norm": 0.7422799468040466, + "learning_rate": 0.00016151499133935964, + "loss": 2.6134, + "step": 5853 + }, + { + "epoch": 0.4724396739569042, + "grad_norm": 0.6924546957015991, + "learning_rate": 0.0001615025440222312, + "loss": 2.672, + "step": 5854 + }, + { + "epoch": 0.4725203776934872, + "grad_norm": 0.7205976843833923, + "learning_rate": 0.00016149009517229862, + "loss": 2.6722, + "step": 5855 + }, + { + "epoch": 0.47260108143007024, + "grad_norm": 0.6898519992828369, + "learning_rate": 0.0001614776447898721, + "loss": 2.6474, + "step": 5856 + }, + { + "epoch": 0.4726817851666532, + "grad_norm": 0.7512481212615967, + "learning_rate": 0.00016146519287526197, + "loss": 2.7413, + "step": 5857 + }, + { + "epoch": 0.47276248890323624, + "grad_norm": 0.6734220385551453, + "learning_rate": 0.0001614527394287786, + "loss": 2.6114, + "step": 5858 + }, + { + "epoch": 0.4728431926398192, + "grad_norm": 0.6745339632034302, + "learning_rate": 0.00016144028445073228, + "loss": 2.6039, + "step": 5859 + }, + { + "epoch": 0.47292389637640225, + "grad_norm": 0.7463086843490601, + "learning_rate": 0.0001614278279414335, + "loss": 2.6109, + "step": 5860 + }, + { + "epoch": 0.47300460011298523, + "grad_norm": 0.7203261256217957, + "learning_rate": 0.00016141536990119264, + "loss": 2.651, + "step": 5861 + }, + { + "epoch": 0.47308530384956826, + "grad_norm": 0.7718746066093445, + "learning_rate": 0.00016140291033032024, + "loss": 2.6953, + "step": 5862 + }, + { + "epoch": 0.47316600758615124, + "grad_norm": 0.7854858040809631, + "learning_rate": 0.0001613904492291268, + "loss": 2.5941, + "step": 5863 + }, + { + "epoch": 0.47324671132273427, + "grad_norm": 0.7218664288520813, + "learning_rate": 0.0001613779865979229, + "loss": 2.6447, + "step": 5864 + }, + { + "epoch": 0.47332741505931725, + "grad_norm": 0.7479045987129211, + "learning_rate": 0.0001613655224370191, + "loss": 2.6662, + "step": 5865 + }, + { + "epoch": 0.4734081187959002, + "grad_norm": 0.7335021495819092, + "learning_rate": 0.00016135305674672612, + "loss": 2.6283, + "step": 5866 + }, + { + "epoch": 0.47348882253248326, + "grad_norm": 0.7650331258773804, + "learning_rate": 0.00016134058952735453, + "loss": 2.7168, + "step": 5867 + }, + { + "epoch": 0.47356952626906623, + "grad_norm": 0.733383297920227, + "learning_rate": 0.00016132812077921513, + "loss": 2.6352, + "step": 5868 + }, + { + "epoch": 0.47365023000564926, + "grad_norm": 1.3944146633148193, + "learning_rate": 0.00016131565050261866, + "loss": 2.7518, + "step": 5869 + }, + { + "epoch": 0.47373093374223224, + "grad_norm": 0.746112585067749, + "learning_rate": 0.0001613031786978759, + "loss": 2.6253, + "step": 5870 + }, + { + "epoch": 0.4738116374788153, + "grad_norm": 0.9859737753868103, + "learning_rate": 0.00016129070536529766, + "loss": 2.6682, + "step": 5871 + }, + { + "epoch": 0.47389234121539825, + "grad_norm": 0.7358877062797546, + "learning_rate": 0.00016127823050519484, + "loss": 2.6712, + "step": 5872 + }, + { + "epoch": 0.4739730449519813, + "grad_norm": 0.7379923462867737, + "learning_rate": 0.0001612657541178783, + "loss": 2.6268, + "step": 5873 + }, + { + "epoch": 0.47405374868856426, + "grad_norm": 0.7671005725860596, + "learning_rate": 0.00016125327620365907, + "loss": 2.6127, + "step": 5874 + }, + { + "epoch": 0.4741344524251473, + "grad_norm": 0.8007156252861023, + "learning_rate": 0.00016124079676284805, + "loss": 2.6173, + "step": 5875 + }, + { + "epoch": 0.47421515616173027, + "grad_norm": 0.7930500507354736, + "learning_rate": 0.00016122831579575627, + "loss": 2.589, + "step": 5876 + }, + { + "epoch": 0.4742958598983133, + "grad_norm": 0.788006603717804, + "learning_rate": 0.00016121583330269484, + "loss": 2.6731, + "step": 5877 + }, + { + "epoch": 0.4743765636348963, + "grad_norm": 0.742148220539093, + "learning_rate": 0.00016120334928397483, + "loss": 2.674, + "step": 5878 + }, + { + "epoch": 0.4744572673714793, + "grad_norm": 0.6823038458824158, + "learning_rate": 0.00016119086373990736, + "loss": 2.6153, + "step": 5879 + }, + { + "epoch": 0.4745379711080623, + "grad_norm": 0.7542331218719482, + "learning_rate": 0.00016117837667080356, + "loss": 2.6739, + "step": 5880 + }, + { + "epoch": 0.4746186748446453, + "grad_norm": 0.8163543343544006, + "learning_rate": 0.00016116588807697476, + "loss": 2.6558, + "step": 5881 + }, + { + "epoch": 0.4746993785812283, + "grad_norm": 0.7528213858604431, + "learning_rate": 0.0001611533979587321, + "loss": 2.6243, + "step": 5882 + }, + { + "epoch": 0.4747800823178113, + "grad_norm": 0.7476626038551331, + "learning_rate": 0.00016114090631638695, + "loss": 2.5984, + "step": 5883 + }, + { + "epoch": 0.4748607860543943, + "grad_norm": 0.7436621785163879, + "learning_rate": 0.00016112841315025055, + "loss": 2.6118, + "step": 5884 + }, + { + "epoch": 0.47494148979097733, + "grad_norm": 0.8024004101753235, + "learning_rate": 0.0001611159184606343, + "loss": 2.6926, + "step": 5885 + }, + { + "epoch": 0.4750221935275603, + "grad_norm": 0.7475626468658447, + "learning_rate": 0.00016110342224784962, + "loss": 2.6175, + "step": 5886 + }, + { + "epoch": 0.47510289726414334, + "grad_norm": 0.7900637984275818, + "learning_rate": 0.00016109092451220796, + "loss": 2.6503, + "step": 5887 + }, + { + "epoch": 0.4751836010007263, + "grad_norm": 0.6988356113433838, + "learning_rate": 0.00016107842525402074, + "loss": 2.6494, + "step": 5888 + }, + { + "epoch": 0.47526430473730935, + "grad_norm": 1.0214186906814575, + "learning_rate": 0.00016106592447359948, + "loss": 2.6476, + "step": 5889 + }, + { + "epoch": 0.4753450084738923, + "grad_norm": 0.741527795791626, + "learning_rate": 0.00016105342217125578, + "loss": 2.6054, + "step": 5890 + }, + { + "epoch": 0.47542571221047536, + "grad_norm": 0.7196603417396545, + "learning_rate": 0.0001610409183473012, + "loss": 2.6146, + "step": 5891 + }, + { + "epoch": 0.47550641594705834, + "grad_norm": 0.8130923509597778, + "learning_rate": 0.00016102841300204737, + "loss": 2.6505, + "step": 5892 + }, + { + "epoch": 0.47558711968364137, + "grad_norm": 0.7929537892341614, + "learning_rate": 0.00016101590613580596, + "loss": 2.6725, + "step": 5893 + }, + { + "epoch": 0.47566782342022434, + "grad_norm": 0.7149303555488586, + "learning_rate": 0.00016100339774888865, + "loss": 2.6272, + "step": 5894 + }, + { + "epoch": 0.4757485271568074, + "grad_norm": 0.7242792248725891, + "learning_rate": 0.00016099088784160724, + "loss": 2.5948, + "step": 5895 + }, + { + "epoch": 0.47582923089339035, + "grad_norm": 0.7571540474891663, + "learning_rate": 0.00016097837641427346, + "loss": 2.689, + "step": 5896 + }, + { + "epoch": 0.4759099346299734, + "grad_norm": 0.7402021288871765, + "learning_rate": 0.00016096586346719916, + "loss": 2.7035, + "step": 5897 + }, + { + "epoch": 0.47599063836655636, + "grad_norm": 0.7195574045181274, + "learning_rate": 0.00016095334900069613, + "loss": 2.5862, + "step": 5898 + }, + { + "epoch": 0.4760713421031394, + "grad_norm": 0.7677412033081055, + "learning_rate": 0.00016094083301507634, + "loss": 2.6715, + "step": 5899 + }, + { + "epoch": 0.47615204583972237, + "grad_norm": 0.7131708860397339, + "learning_rate": 0.0001609283155106517, + "loss": 2.6555, + "step": 5900 + }, + { + "epoch": 0.4762327495763054, + "grad_norm": 0.6774055361747742, + "learning_rate": 0.00016091579648773414, + "loss": 2.621, + "step": 5901 + }, + { + "epoch": 0.4763134533128884, + "grad_norm": 0.6873257160186768, + "learning_rate": 0.00016090327594663571, + "loss": 2.6719, + "step": 5902 + }, + { + "epoch": 0.4763941570494714, + "grad_norm": 0.8004229068756104, + "learning_rate": 0.00016089075388766845, + "loss": 2.6926, + "step": 5903 + }, + { + "epoch": 0.4764748607860544, + "grad_norm": 0.7196173667907715, + "learning_rate": 0.00016087823031114438, + "loss": 2.6032, + "step": 5904 + }, + { + "epoch": 0.4765555645226374, + "grad_norm": 0.7665518522262573, + "learning_rate": 0.00016086570521737573, + "loss": 2.6359, + "step": 5905 + }, + { + "epoch": 0.4766362682592204, + "grad_norm": 0.7240240573883057, + "learning_rate": 0.0001608531786066746, + "loss": 2.6489, + "step": 5906 + }, + { + "epoch": 0.47671697199580343, + "grad_norm": 0.7603839039802551, + "learning_rate": 0.00016084065047935317, + "loss": 2.6064, + "step": 5907 + }, + { + "epoch": 0.4767976757323864, + "grad_norm": 0.7394058704376221, + "learning_rate": 0.0001608281208357237, + "loss": 2.6643, + "step": 5908 + }, + { + "epoch": 0.47687837946896944, + "grad_norm": 0.7183148860931396, + "learning_rate": 0.00016081558967609845, + "loss": 2.56, + "step": 5909 + }, + { + "epoch": 0.4769590832055524, + "grad_norm": 0.7181926965713501, + "learning_rate": 0.00016080305700078972, + "loss": 2.6665, + "step": 5910 + }, + { + "epoch": 0.47703978694213545, + "grad_norm": 0.7634081840515137, + "learning_rate": 0.00016079052281010988, + "loss": 2.7076, + "step": 5911 + }, + { + "epoch": 0.4771204906787184, + "grad_norm": 0.7928739190101624, + "learning_rate": 0.0001607779871043713, + "loss": 2.6512, + "step": 5912 + }, + { + "epoch": 0.47720119441530146, + "grad_norm": 0.7192893028259277, + "learning_rate": 0.00016076544988388643, + "loss": 2.6453, + "step": 5913 + }, + { + "epoch": 0.47728189815188443, + "grad_norm": 0.7171720862388611, + "learning_rate": 0.00016075291114896767, + "loss": 2.6501, + "step": 5914 + }, + { + "epoch": 0.47736260188846746, + "grad_norm": 0.6787160038948059, + "learning_rate": 0.00016074037089992756, + "loss": 2.6566, + "step": 5915 + }, + { + "epoch": 0.47744330562505044, + "grad_norm": 0.8118634819984436, + "learning_rate": 0.00016072782913707868, + "loss": 2.6635, + "step": 5916 + }, + { + "epoch": 0.4775240093616334, + "grad_norm": 0.7188509702682495, + "learning_rate": 0.0001607152858607335, + "loss": 2.6899, + "step": 5917 + }, + { + "epoch": 0.47760471309821645, + "grad_norm": 0.6742647290229797, + "learning_rate": 0.00016070274107120468, + "loss": 2.6221, + "step": 5918 + }, + { + "epoch": 0.4776854168347994, + "grad_norm": 0.7274083495140076, + "learning_rate": 0.00016069019476880488, + "loss": 2.6588, + "step": 5919 + }, + { + "epoch": 0.47776612057138246, + "grad_norm": 0.6984386444091797, + "learning_rate": 0.00016067764695384682, + "loss": 2.6376, + "step": 5920 + }, + { + "epoch": 0.47784682430796543, + "grad_norm": 0.7260883450508118, + "learning_rate": 0.00016066509762664315, + "loss": 2.6623, + "step": 5921 + }, + { + "epoch": 0.47792752804454847, + "grad_norm": 0.7540579438209534, + "learning_rate": 0.00016065254678750666, + "loss": 2.695, + "step": 5922 + }, + { + "epoch": 0.47800823178113144, + "grad_norm": 0.7032651305198669, + "learning_rate": 0.00016063999443675017, + "loss": 2.6791, + "step": 5923 + }, + { + "epoch": 0.4780889355177145, + "grad_norm": 0.682842493057251, + "learning_rate": 0.0001606274405746865, + "loss": 2.6198, + "step": 5924 + }, + { + "epoch": 0.47816963925429745, + "grad_norm": 0.6843859553337097, + "learning_rate": 0.00016061488520162853, + "loss": 2.6432, + "step": 5925 + }, + { + "epoch": 0.4782503429908805, + "grad_norm": 0.652119517326355, + "learning_rate": 0.00016060232831788918, + "loss": 2.6461, + "step": 5926 + }, + { + "epoch": 0.47833104672746346, + "grad_norm": 0.6986887454986572, + "learning_rate": 0.0001605897699237814, + "loss": 2.5885, + "step": 5927 + }, + { + "epoch": 0.4784117504640465, + "grad_norm": 0.7156725525856018, + "learning_rate": 0.00016057721001961817, + "loss": 2.6526, + "step": 5928 + }, + { + "epoch": 0.47849245420062947, + "grad_norm": 0.7367579936981201, + "learning_rate": 0.0001605646486057125, + "loss": 2.5842, + "step": 5929 + }, + { + "epoch": 0.4785731579372125, + "grad_norm": 0.7059770822525024, + "learning_rate": 0.00016055208568237746, + "loss": 2.617, + "step": 5930 + }, + { + "epoch": 0.4786538616737955, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016053952124992619, + "loss": 2.6499, + "step": 5931 + }, + { + "epoch": 0.4787345654103785, + "grad_norm": 0.7027475237846375, + "learning_rate": 0.00016052695530867177, + "loss": 2.5934, + "step": 5932 + }, + { + "epoch": 0.4788152691469615, + "grad_norm": 0.7031852602958679, + "learning_rate": 0.00016051438785892743, + "loss": 2.5947, + "step": 5933 + }, + { + "epoch": 0.4788959728835445, + "grad_norm": 0.6731768846511841, + "learning_rate": 0.00016050181890100635, + "loss": 2.6811, + "step": 5934 + }, + { + "epoch": 0.4789766766201275, + "grad_norm": 0.7120038866996765, + "learning_rate": 0.0001604892484352218, + "loss": 2.6625, + "step": 5935 + }, + { + "epoch": 0.4790573803567105, + "grad_norm": 0.6895150542259216, + "learning_rate": 0.00016047667646188702, + "loss": 2.6784, + "step": 5936 + }, + { + "epoch": 0.4791380840932935, + "grad_norm": 0.7080708742141724, + "learning_rate": 0.0001604641029813154, + "loss": 2.6491, + "step": 5937 + }, + { + "epoch": 0.47921878782987654, + "grad_norm": 0.6522819399833679, + "learning_rate": 0.00016045152799382025, + "loss": 2.6113, + "step": 5938 + }, + { + "epoch": 0.4792994915664595, + "grad_norm": 0.6988112926483154, + "learning_rate": 0.00016043895149971506, + "loss": 2.6892, + "step": 5939 + }, + { + "epoch": 0.47938019530304254, + "grad_norm": 0.7545368671417236, + "learning_rate": 0.00016042637349931318, + "loss": 2.6872, + "step": 5940 + }, + { + "epoch": 0.4794608990396255, + "grad_norm": 0.7083707451820374, + "learning_rate": 0.0001604137939929281, + "loss": 2.6726, + "step": 5941 + }, + { + "epoch": 0.47954160277620855, + "grad_norm": 0.8198027014732361, + "learning_rate": 0.00016040121298087337, + "loss": 2.647, + "step": 5942 + }, + { + "epoch": 0.47962230651279153, + "grad_norm": 0.7296201586723328, + "learning_rate": 0.00016038863046346252, + "loss": 2.7122, + "step": 5943 + }, + { + "epoch": 0.47970301024937456, + "grad_norm": 0.7262474298477173, + "learning_rate": 0.00016037604644100913, + "loss": 2.6903, + "step": 5944 + }, + { + "epoch": 0.47978371398595754, + "grad_norm": 0.8010182976722717, + "learning_rate": 0.00016036346091382686, + "loss": 2.6942, + "step": 5945 + }, + { + "epoch": 0.47986441772254057, + "grad_norm": 0.7227098345756531, + "learning_rate": 0.00016035087388222932, + "loss": 2.6661, + "step": 5946 + }, + { + "epoch": 0.47994512145912355, + "grad_norm": 0.7374662756919861, + "learning_rate": 0.00016033828534653028, + "loss": 2.6233, + "step": 5947 + }, + { + "epoch": 0.4800258251957066, + "grad_norm": 0.7139650583267212, + "learning_rate": 0.00016032569530704342, + "loss": 2.5859, + "step": 5948 + }, + { + "epoch": 0.48010652893228956, + "grad_norm": 0.7067660689353943, + "learning_rate": 0.00016031310376408254, + "loss": 2.6677, + "step": 5949 + }, + { + "epoch": 0.4801872326688726, + "grad_norm": 0.694715142250061, + "learning_rate": 0.00016030051071796146, + "loss": 2.6415, + "step": 5950 + }, + { + "epoch": 0.48026793640545556, + "grad_norm": 0.728918194770813, + "learning_rate": 0.00016028791616899403, + "loss": 2.6274, + "step": 5951 + }, + { + "epoch": 0.4803486401420386, + "grad_norm": 0.699846088886261, + "learning_rate": 0.00016027532011749412, + "loss": 2.6613, + "step": 5952 + }, + { + "epoch": 0.4804293438786216, + "grad_norm": 0.7177432179450989, + "learning_rate": 0.0001602627225637757, + "loss": 2.6107, + "step": 5953 + }, + { + "epoch": 0.4805100476152046, + "grad_norm": 0.7502370476722717, + "learning_rate": 0.00016025012350815267, + "loss": 2.6534, + "step": 5954 + }, + { + "epoch": 0.4805907513517876, + "grad_norm": 0.7730218172073364, + "learning_rate": 0.0001602375229509391, + "loss": 2.7037, + "step": 5955 + }, + { + "epoch": 0.4806714550883706, + "grad_norm": 0.7046666145324707, + "learning_rate": 0.00016022492089244898, + "loss": 2.6336, + "step": 5956 + }, + { + "epoch": 0.4807521588249536, + "grad_norm": 0.7991104125976562, + "learning_rate": 0.0001602123173329964, + "loss": 2.7024, + "step": 5957 + }, + { + "epoch": 0.4808328625615366, + "grad_norm": 0.7056288123130798, + "learning_rate": 0.00016019971227289548, + "loss": 2.6088, + "step": 5958 + }, + { + "epoch": 0.4809135662981196, + "grad_norm": 0.7277925610542297, + "learning_rate": 0.00016018710571246038, + "loss": 2.6245, + "step": 5959 + }, + { + "epoch": 0.48099427003470263, + "grad_norm": 0.7545790672302246, + "learning_rate": 0.00016017449765200526, + "loss": 2.6076, + "step": 5960 + }, + { + "epoch": 0.4810749737712856, + "grad_norm": 0.7106321454048157, + "learning_rate": 0.00016016188809184434, + "loss": 2.5561, + "step": 5961 + }, + { + "epoch": 0.48115567750786864, + "grad_norm": 0.7464704513549805, + "learning_rate": 0.0001601492770322919, + "loss": 2.6336, + "step": 5962 + }, + { + "epoch": 0.4812363812444516, + "grad_norm": 0.7531768083572388, + "learning_rate": 0.00016013666447366228, + "loss": 2.6236, + "step": 5963 + }, + { + "epoch": 0.48131708498103465, + "grad_norm": 0.7412876486778259, + "learning_rate": 0.00016012405041626978, + "loss": 2.6309, + "step": 5964 + }, + { + "epoch": 0.4813977887176176, + "grad_norm": 0.7030940055847168, + "learning_rate": 0.00016011143486042878, + "loss": 2.6252, + "step": 5965 + }, + { + "epoch": 0.48147849245420066, + "grad_norm": 0.7932302951812744, + "learning_rate": 0.00016009881780645367, + "loss": 2.6797, + "step": 5966 + }, + { + "epoch": 0.48155919619078363, + "grad_norm": 0.7366262078285217, + "learning_rate": 0.00016008619925465893, + "loss": 2.6616, + "step": 5967 + }, + { + "epoch": 0.4816398999273666, + "grad_norm": 0.6938421130180359, + "learning_rate": 0.00016007357920535902, + "loss": 2.6888, + "step": 5968 + }, + { + "epoch": 0.48172060366394964, + "grad_norm": 0.7560005784034729, + "learning_rate": 0.00016006095765886853, + "loss": 2.6044, + "step": 5969 + }, + { + "epoch": 0.4818013074005326, + "grad_norm": 0.7330430150032043, + "learning_rate": 0.0001600483346155019, + "loss": 2.7023, + "step": 5970 + }, + { + "epoch": 0.48188201113711565, + "grad_norm": 0.7257955074310303, + "learning_rate": 0.00016003571007557388, + "loss": 2.6763, + "step": 5971 + }, + { + "epoch": 0.4819627148736986, + "grad_norm": 0.704187273979187, + "learning_rate": 0.000160023084039399, + "loss": 2.6229, + "step": 5972 + }, + { + "epoch": 0.48204341861028166, + "grad_norm": 0.7014813423156738, + "learning_rate": 0.00016001045650729196, + "loss": 2.6207, + "step": 5973 + }, + { + "epoch": 0.48212412234686464, + "grad_norm": 0.8039405941963196, + "learning_rate": 0.00015999782747956747, + "loss": 2.6198, + "step": 5974 + }, + { + "epoch": 0.48220482608344767, + "grad_norm": 0.7114945650100708, + "learning_rate": 0.0001599851969565403, + "loss": 2.6154, + "step": 5975 + }, + { + "epoch": 0.48228552982003065, + "grad_norm": 0.7603329420089722, + "learning_rate": 0.00015997256493852517, + "loss": 2.6217, + "step": 5976 + }, + { + "epoch": 0.4823662335566137, + "grad_norm": 0.7773346900939941, + "learning_rate": 0.000159959931425837, + "loss": 2.7054, + "step": 5977 + }, + { + "epoch": 0.48244693729319665, + "grad_norm": 0.8022029399871826, + "learning_rate": 0.0001599472964187906, + "loss": 2.6844, + "step": 5978 + }, + { + "epoch": 0.4825276410297797, + "grad_norm": 0.7384541630744934, + "learning_rate": 0.00015993465991770087, + "loss": 2.6516, + "step": 5979 + }, + { + "epoch": 0.48260834476636266, + "grad_norm": 0.6993509531021118, + "learning_rate": 0.00015992202192288273, + "loss": 2.6837, + "step": 5980 + }, + { + "epoch": 0.4826890485029457, + "grad_norm": 0.7430509328842163, + "learning_rate": 0.00015990938243465116, + "loss": 2.6717, + "step": 5981 + }, + { + "epoch": 0.48276975223952867, + "grad_norm": 0.7544847726821899, + "learning_rate": 0.0001598967414533212, + "loss": 2.6573, + "step": 5982 + }, + { + "epoch": 0.4828504559761117, + "grad_norm": 0.736955463886261, + "learning_rate": 0.00015988409897920786, + "loss": 2.6865, + "step": 5983 + }, + { + "epoch": 0.4829311597126947, + "grad_norm": 0.7771684527397156, + "learning_rate": 0.00015987145501262622, + "loss": 2.6173, + "step": 5984 + }, + { + "epoch": 0.4830118634492777, + "grad_norm": 0.7504391670227051, + "learning_rate": 0.00015985880955389143, + "loss": 2.6218, + "step": 5985 + }, + { + "epoch": 0.4830925671858607, + "grad_norm": 0.7025442123413086, + "learning_rate": 0.00015984616260331861, + "loss": 2.6107, + "step": 5986 + }, + { + "epoch": 0.4831732709224437, + "grad_norm": 0.6906485557556152, + "learning_rate": 0.000159833514161223, + "loss": 2.633, + "step": 5987 + }, + { + "epoch": 0.4832539746590267, + "grad_norm": 0.7771004438400269, + "learning_rate": 0.00015982086422791983, + "loss": 2.5956, + "step": 5988 + }, + { + "epoch": 0.48333467839560973, + "grad_norm": 0.6927372813224792, + "learning_rate": 0.00015980821280372432, + "loss": 2.5984, + "step": 5989 + }, + { + "epoch": 0.4834153821321927, + "grad_norm": 0.7196357846260071, + "learning_rate": 0.00015979555988895184, + "loss": 2.6386, + "step": 5990 + }, + { + "epoch": 0.48349608586877574, + "grad_norm": 0.7601087689399719, + "learning_rate": 0.0001597829054839177, + "loss": 2.6707, + "step": 5991 + }, + { + "epoch": 0.4835767896053587, + "grad_norm": 0.7783588767051697, + "learning_rate": 0.00015977024958893722, + "loss": 2.5815, + "step": 5992 + }, + { + "epoch": 0.48365749334194175, + "grad_norm": 0.7651833891868591, + "learning_rate": 0.00015975759220432592, + "loss": 2.6235, + "step": 5993 + }, + { + "epoch": 0.4837381970785247, + "grad_norm": 0.7158511877059937, + "learning_rate": 0.0001597449333303992, + "loss": 2.6813, + "step": 5994 + }, + { + "epoch": 0.48381890081510776, + "grad_norm": 0.7411341667175293, + "learning_rate": 0.0001597322729674726, + "loss": 2.7231, + "step": 5995 + }, + { + "epoch": 0.48389960455169073, + "grad_norm": 0.7168158292770386, + "learning_rate": 0.0001597196111158616, + "loss": 2.6408, + "step": 5996 + }, + { + "epoch": 0.48398030828827376, + "grad_norm": 0.7603393793106079, + "learning_rate": 0.00015970694777588175, + "loss": 2.7821, + "step": 5997 + }, + { + "epoch": 0.48406101202485674, + "grad_norm": 0.7298564910888672, + "learning_rate": 0.0001596942829478487, + "loss": 2.6828, + "step": 5998 + }, + { + "epoch": 0.4841417157614398, + "grad_norm": 0.7850572466850281, + "learning_rate": 0.0001596816166320781, + "loss": 2.6191, + "step": 5999 + }, + { + "epoch": 0.48422241949802275, + "grad_norm": 0.7697601914405823, + "learning_rate": 0.00015966894882888562, + "loss": 2.6768, + "step": 6000 + }, + { + "epoch": 0.48422241949802275, + "eval_loss": 2.5610127449035645, + "eval_runtime": 760.0481, + "eval_samples_per_second": 3.447, + "eval_steps_per_second": 0.575, + "step": 6000 + }, + { + "epoch": 0.4843031232346058, + "grad_norm": 0.7212432026863098, + "learning_rate": 0.00015965627953858693, + "loss": 2.5967, + "step": 6001 + }, + { + "epoch": 0.48438382697118876, + "grad_norm": 0.7629631757736206, + "learning_rate": 0.0001596436087614978, + "loss": 2.7005, + "step": 6002 + }, + { + "epoch": 0.4844645307077718, + "grad_norm": 0.7154754400253296, + "learning_rate": 0.00015963093649793404, + "loss": 2.6909, + "step": 6003 + }, + { + "epoch": 0.48454523444435477, + "grad_norm": 0.7365279793739319, + "learning_rate": 0.00015961826274821147, + "loss": 2.6268, + "step": 6004 + }, + { + "epoch": 0.4846259381809378, + "grad_norm": 0.8114632964134216, + "learning_rate": 0.00015960558751264596, + "loss": 2.6647, + "step": 6005 + }, + { + "epoch": 0.4847066419175208, + "grad_norm": 0.7411556243896484, + "learning_rate": 0.00015959291079155338, + "loss": 2.6378, + "step": 6006 + }, + { + "epoch": 0.4847873456541038, + "grad_norm": 0.7137390375137329, + "learning_rate": 0.00015958023258524968, + "loss": 2.6454, + "step": 6007 + }, + { + "epoch": 0.4848680493906868, + "grad_norm": 0.7477054595947266, + "learning_rate": 0.00015956755289405088, + "loss": 2.6463, + "step": 6008 + }, + { + "epoch": 0.4849487531272698, + "grad_norm": 0.7198071479797363, + "learning_rate": 0.0001595548717182729, + "loss": 2.6537, + "step": 6009 + }, + { + "epoch": 0.4850294568638528, + "grad_norm": 0.6697781085968018, + "learning_rate": 0.00015954218905823186, + "loss": 2.7018, + "step": 6010 + }, + { + "epoch": 0.4851101606004358, + "grad_norm": 0.7577201724052429, + "learning_rate": 0.00015952950491424382, + "loss": 2.6531, + "step": 6011 + }, + { + "epoch": 0.4851908643370188, + "grad_norm": 0.6852774024009705, + "learning_rate": 0.0001595168192866249, + "loss": 2.5819, + "step": 6012 + }, + { + "epoch": 0.48527156807360183, + "grad_norm": 0.7116097807884216, + "learning_rate": 0.0001595041321756913, + "loss": 2.5691, + "step": 6013 + }, + { + "epoch": 0.4853522718101848, + "grad_norm": 0.7478477954864502, + "learning_rate": 0.00015949144358175916, + "loss": 2.6658, + "step": 6014 + }, + { + "epoch": 0.48543297554676784, + "grad_norm": 0.816969633102417, + "learning_rate": 0.0001594787535051447, + "loss": 2.6709, + "step": 6015 + }, + { + "epoch": 0.4855136792833508, + "grad_norm": 0.6953164339065552, + "learning_rate": 0.00015946606194616427, + "loss": 2.6139, + "step": 6016 + }, + { + "epoch": 0.48559438301993385, + "grad_norm": 0.6698834300041199, + "learning_rate": 0.0001594533689051341, + "loss": 2.574, + "step": 6017 + }, + { + "epoch": 0.4856750867565168, + "grad_norm": 0.7686784267425537, + "learning_rate": 0.0001594406743823706, + "loss": 2.6271, + "step": 6018 + }, + { + "epoch": 0.4857557904930998, + "grad_norm": 0.7713280916213989, + "learning_rate": 0.00015942797837819009, + "loss": 2.6682, + "step": 6019 + }, + { + "epoch": 0.48583649422968284, + "grad_norm": 0.8102596998214722, + "learning_rate": 0.00015941528089290902, + "loss": 2.6771, + "step": 6020 + }, + { + "epoch": 0.4859171979662658, + "grad_norm": 0.7140331864356995, + "learning_rate": 0.00015940258192684382, + "loss": 2.6267, + "step": 6021 + }, + { + "epoch": 0.48599790170284884, + "grad_norm": 0.7057615518569946, + "learning_rate": 0.000159389881480311, + "loss": 2.6011, + "step": 6022 + }, + { + "epoch": 0.4860786054394318, + "grad_norm": 0.7106850147247314, + "learning_rate": 0.0001593771795536271, + "loss": 2.6681, + "step": 6023 + }, + { + "epoch": 0.48615930917601485, + "grad_norm": 0.7618210315704346, + "learning_rate": 0.00015936447614710867, + "loss": 2.6545, + "step": 6024 + }, + { + "epoch": 0.48624001291259783, + "grad_norm": 0.7577608227729797, + "learning_rate": 0.00015935177126107233, + "loss": 2.6479, + "step": 6025 + }, + { + "epoch": 0.48632071664918086, + "grad_norm": 0.758745551109314, + "learning_rate": 0.00015933906489583468, + "loss": 2.7057, + "step": 6026 + }, + { + "epoch": 0.48640142038576384, + "grad_norm": 0.785906970500946, + "learning_rate": 0.00015932635705171241, + "loss": 2.7081, + "step": 6027 + }, + { + "epoch": 0.48648212412234687, + "grad_norm": 0.6744558215141296, + "learning_rate": 0.00015931364772902228, + "loss": 2.6438, + "step": 6028 + }, + { + "epoch": 0.48656282785892985, + "grad_norm": 0.7451377511024475, + "learning_rate": 0.00015930093692808099, + "loss": 2.6509, + "step": 6029 + }, + { + "epoch": 0.4866435315955129, + "grad_norm": 0.6590149402618408, + "learning_rate": 0.0001592882246492053, + "loss": 2.5683, + "step": 6030 + }, + { + "epoch": 0.48672423533209586, + "grad_norm": 0.7433840036392212, + "learning_rate": 0.0001592755108927121, + "loss": 2.6647, + "step": 6031 + }, + { + "epoch": 0.4868049390686789, + "grad_norm": 0.876806378364563, + "learning_rate": 0.00015926279565891822, + "loss": 2.6482, + "step": 6032 + }, + { + "epoch": 0.48688564280526186, + "grad_norm": 0.7495005130767822, + "learning_rate": 0.00015925007894814058, + "loss": 2.6346, + "step": 6033 + }, + { + "epoch": 0.4869663465418449, + "grad_norm": 0.7005730271339417, + "learning_rate": 0.00015923736076069604, + "loss": 2.6241, + "step": 6034 + }, + { + "epoch": 0.4870470502784279, + "grad_norm": 0.664098858833313, + "learning_rate": 0.00015922464109690166, + "loss": 2.6281, + "step": 6035 + }, + { + "epoch": 0.4871277540150109, + "grad_norm": 0.7482514977455139, + "learning_rate": 0.00015921191995707442, + "loss": 2.5764, + "step": 6036 + }, + { + "epoch": 0.4872084577515939, + "grad_norm": 0.7450351715087891, + "learning_rate": 0.0001591991973415313, + "loss": 2.6433, + "step": 6037 + }, + { + "epoch": 0.4872891614881769, + "grad_norm": 0.6738519072532654, + "learning_rate": 0.00015918647325058948, + "loss": 2.6688, + "step": 6038 + }, + { + "epoch": 0.4873698652247599, + "grad_norm": 0.7999960780143738, + "learning_rate": 0.000159173747684566, + "loss": 2.6309, + "step": 6039 + }, + { + "epoch": 0.4874505689613429, + "grad_norm": 0.7249687910079956, + "learning_rate": 0.00015916102064377806, + "loss": 2.5808, + "step": 6040 + }, + { + "epoch": 0.4875312726979259, + "grad_norm": 0.7014601826667786, + "learning_rate": 0.00015914829212854286, + "loss": 2.6646, + "step": 6041 + }, + { + "epoch": 0.48761197643450893, + "grad_norm": 0.7091174721717834, + "learning_rate": 0.00015913556213917757, + "loss": 2.6576, + "step": 6042 + }, + { + "epoch": 0.4876926801710919, + "grad_norm": 0.6949019432067871, + "learning_rate": 0.00015912283067599952, + "loss": 2.5883, + "step": 6043 + }, + { + "epoch": 0.48777338390767494, + "grad_norm": 0.6990448236465454, + "learning_rate": 0.00015911009773932598, + "loss": 2.6413, + "step": 6044 + }, + { + "epoch": 0.4878540876442579, + "grad_norm": 0.7106831073760986, + "learning_rate": 0.00015909736332947425, + "loss": 2.6122, + "step": 6045 + }, + { + "epoch": 0.48793479138084095, + "grad_norm": 0.7052395343780518, + "learning_rate": 0.00015908462744676177, + "loss": 2.572, + "step": 6046 + }, + { + "epoch": 0.4880154951174239, + "grad_norm": 0.7250158190727234, + "learning_rate": 0.00015907189009150592, + "loss": 2.6582, + "step": 6047 + }, + { + "epoch": 0.48809619885400696, + "grad_norm": 0.7213590145111084, + "learning_rate": 0.00015905915126402414, + "loss": 2.7025, + "step": 6048 + }, + { + "epoch": 0.48817690259058993, + "grad_norm": 0.7136254906654358, + "learning_rate": 0.00015904641096463394, + "loss": 2.6823, + "step": 6049 + }, + { + "epoch": 0.48825760632717297, + "grad_norm": 0.7163361310958862, + "learning_rate": 0.00015903366919365282, + "loss": 2.6642, + "step": 6050 + }, + { + "epoch": 0.48833831006375594, + "grad_norm": 0.6842724680900574, + "learning_rate": 0.00015902092595139838, + "loss": 2.6599, + "step": 6051 + }, + { + "epoch": 0.488419013800339, + "grad_norm": 0.7426519393920898, + "learning_rate": 0.0001590081812381882, + "loss": 2.6271, + "step": 6052 + }, + { + "epoch": 0.48849971753692195, + "grad_norm": 0.7415586709976196, + "learning_rate": 0.00015899543505433985, + "loss": 2.6105, + "step": 6053 + }, + { + "epoch": 0.488580421273505, + "grad_norm": 0.7286739945411682, + "learning_rate": 0.00015898268740017105, + "loss": 2.6304, + "step": 6054 + }, + { + "epoch": 0.48866112501008796, + "grad_norm": 0.6898483633995056, + "learning_rate": 0.00015896993827599947, + "loss": 2.6237, + "step": 6055 + }, + { + "epoch": 0.488741828746671, + "grad_norm": 0.7020056247711182, + "learning_rate": 0.00015895718768214293, + "loss": 2.6166, + "step": 6056 + }, + { + "epoch": 0.48882253248325397, + "grad_norm": 0.7145286798477173, + "learning_rate": 0.00015894443561891914, + "loss": 2.6729, + "step": 6057 + }, + { + "epoch": 0.488903236219837, + "grad_norm": 0.6888289451599121, + "learning_rate": 0.00015893168208664594, + "loss": 2.6154, + "step": 6058 + }, + { + "epoch": 0.48898393995642, + "grad_norm": 0.6929970383644104, + "learning_rate": 0.00015891892708564116, + "loss": 2.6748, + "step": 6059 + }, + { + "epoch": 0.489064643693003, + "grad_norm": 0.679853618144989, + "learning_rate": 0.0001589061706162227, + "loss": 2.605, + "step": 6060 + }, + { + "epoch": 0.489145347429586, + "grad_norm": 0.71812504529953, + "learning_rate": 0.0001588934126787085, + "loss": 2.7249, + "step": 6061 + }, + { + "epoch": 0.489226051166169, + "grad_norm": 0.7083466053009033, + "learning_rate": 0.00015888065327341648, + "loss": 2.5986, + "step": 6062 + }, + { + "epoch": 0.489306754902752, + "grad_norm": 0.7476792931556702, + "learning_rate": 0.00015886789240066466, + "loss": 2.5942, + "step": 6063 + }, + { + "epoch": 0.489387458639335, + "grad_norm": 0.7197855114936829, + "learning_rate": 0.00015885513006077114, + "loss": 2.6198, + "step": 6064 + }, + { + "epoch": 0.489468162375918, + "grad_norm": 0.6678233742713928, + "learning_rate": 0.00015884236625405385, + "loss": 2.5793, + "step": 6065 + }, + { + "epoch": 0.48954886611250104, + "grad_norm": 0.7371037602424622, + "learning_rate": 0.00015882960098083105, + "loss": 2.6231, + "step": 6066 + }, + { + "epoch": 0.489629569849084, + "grad_norm": 0.7087417244911194, + "learning_rate": 0.00015881683424142078, + "loss": 2.6483, + "step": 6067 + }, + { + "epoch": 0.48971027358566704, + "grad_norm": 0.7300292253494263, + "learning_rate": 0.00015880406603614126, + "loss": 2.6778, + "step": 6068 + }, + { + "epoch": 0.48979097732225, + "grad_norm": 0.8347866535186768, + "learning_rate": 0.0001587912963653107, + "loss": 2.554, + "step": 6069 + }, + { + "epoch": 0.489871681058833, + "grad_norm": 0.7717794179916382, + "learning_rate": 0.00015877852522924732, + "loss": 2.6904, + "step": 6070 + }, + { + "epoch": 0.48995238479541603, + "grad_norm": 0.6960952281951904, + "learning_rate": 0.00015876575262826944, + "loss": 2.6059, + "step": 6071 + }, + { + "epoch": 0.490033088531999, + "grad_norm": 0.7316592931747437, + "learning_rate": 0.00015875297856269543, + "loss": 2.6685, + "step": 6072 + }, + { + "epoch": 0.49011379226858204, + "grad_norm": 0.6775457859039307, + "learning_rate": 0.00015874020303284362, + "loss": 2.6232, + "step": 6073 + }, + { + "epoch": 0.490194496005165, + "grad_norm": 0.7741925120353699, + "learning_rate": 0.00015872742603903237, + "loss": 2.6767, + "step": 6074 + }, + { + "epoch": 0.49027519974174805, + "grad_norm": 0.857490599155426, + "learning_rate": 0.00015871464758158017, + "loss": 2.6649, + "step": 6075 + }, + { + "epoch": 0.490355903478331, + "grad_norm": 0.7474274039268494, + "learning_rate": 0.00015870186766080545, + "loss": 2.6926, + "step": 6076 + }, + { + "epoch": 0.49043660721491406, + "grad_norm": 0.7266567945480347, + "learning_rate": 0.00015868908627702675, + "loss": 2.5919, + "step": 6077 + }, + { + "epoch": 0.49051731095149703, + "grad_norm": 0.7247830629348755, + "learning_rate": 0.0001586763034305626, + "loss": 2.6158, + "step": 6078 + }, + { + "epoch": 0.49059801468808006, + "grad_norm": 0.7654951214790344, + "learning_rate": 0.00015866351912173157, + "loss": 2.7236, + "step": 6079 + }, + { + "epoch": 0.49067871842466304, + "grad_norm": 0.732431948184967, + "learning_rate": 0.00015865073335085236, + "loss": 2.6349, + "step": 6080 + }, + { + "epoch": 0.4907594221612461, + "grad_norm": 0.7240673303604126, + "learning_rate": 0.0001586379461182435, + "loss": 2.6282, + "step": 6081 + }, + { + "epoch": 0.49084012589782905, + "grad_norm": 0.767473042011261, + "learning_rate": 0.00015862515742422374, + "loss": 2.6939, + "step": 6082 + }, + { + "epoch": 0.4909208296344121, + "grad_norm": 0.6977359056472778, + "learning_rate": 0.00015861236726911183, + "loss": 2.6591, + "step": 6083 + }, + { + "epoch": 0.49100153337099506, + "grad_norm": 0.7676639556884766, + "learning_rate": 0.00015859957565322655, + "loss": 2.6189, + "step": 6084 + }, + { + "epoch": 0.4910822371075781, + "grad_norm": 0.7157976031303406, + "learning_rate": 0.0001585867825768866, + "loss": 2.644, + "step": 6085 + }, + { + "epoch": 0.49116294084416107, + "grad_norm": 0.7080803513526917, + "learning_rate": 0.0001585739880404109, + "loss": 2.6099, + "step": 6086 + }, + { + "epoch": 0.4912436445807441, + "grad_norm": 0.7109760046005249, + "learning_rate": 0.0001585611920441183, + "loss": 2.7087, + "step": 6087 + }, + { + "epoch": 0.4913243483173271, + "grad_norm": 0.7274255156517029, + "learning_rate": 0.00015854839458832772, + "loss": 2.6394, + "step": 6088 + }, + { + "epoch": 0.4914050520539101, + "grad_norm": 0.7407883405685425, + "learning_rate": 0.00015853559567335812, + "loss": 2.6729, + "step": 6089 + }, + { + "epoch": 0.4914857557904931, + "grad_norm": 0.6879885196685791, + "learning_rate": 0.00015852279529952843, + "loss": 2.5971, + "step": 6090 + }, + { + "epoch": 0.4915664595270761, + "grad_norm": 0.7678415179252625, + "learning_rate": 0.00015850999346715772, + "loss": 2.6606, + "step": 6091 + }, + { + "epoch": 0.4916471632636591, + "grad_norm": 0.7108608484268188, + "learning_rate": 0.00015849719017656504, + "loss": 2.6494, + "step": 6092 + }, + { + "epoch": 0.4917278670002421, + "grad_norm": 0.7238833904266357, + "learning_rate": 0.00015848438542806945, + "loss": 2.6742, + "step": 6093 + }, + { + "epoch": 0.4918085707368251, + "grad_norm": 0.7316902279853821, + "learning_rate": 0.0001584715792219901, + "loss": 2.6757, + "step": 6094 + }, + { + "epoch": 0.49188927447340813, + "grad_norm": 0.7339446544647217, + "learning_rate": 0.00015845877155864612, + "loss": 2.607, + "step": 6095 + }, + { + "epoch": 0.4919699782099911, + "grad_norm": 0.6931337714195251, + "learning_rate": 0.0001584459624383568, + "loss": 2.6203, + "step": 6096 + }, + { + "epoch": 0.49205068194657414, + "grad_norm": 0.734229326248169, + "learning_rate": 0.00015843315186144126, + "loss": 2.646, + "step": 6097 + }, + { + "epoch": 0.4921313856831571, + "grad_norm": 0.7764919400215149, + "learning_rate": 0.00015842033982821883, + "loss": 2.6698, + "step": 6098 + }, + { + "epoch": 0.49221208941974015, + "grad_norm": 0.7707986235618591, + "learning_rate": 0.00015840752633900887, + "loss": 2.6995, + "step": 6099 + }, + { + "epoch": 0.4922927931563231, + "grad_norm": 0.7321949601173401, + "learning_rate": 0.00015839471139413066, + "loss": 2.6517, + "step": 6100 + }, + { + "epoch": 0.49237349689290616, + "grad_norm": 0.7087488770484924, + "learning_rate": 0.00015838189499390353, + "loss": 2.6153, + "step": 6101 + }, + { + "epoch": 0.49245420062948914, + "grad_norm": 0.7300730347633362, + "learning_rate": 0.00015836907713864706, + "loss": 2.5868, + "step": 6102 + }, + { + "epoch": 0.49253490436607217, + "grad_norm": 0.8476536273956299, + "learning_rate": 0.00015835625782868054, + "loss": 2.7158, + "step": 6103 + }, + { + "epoch": 0.49261560810265514, + "grad_norm": 0.8062012791633606, + "learning_rate": 0.0001583434370643236, + "loss": 2.6896, + "step": 6104 + }, + { + "epoch": 0.4926963118392382, + "grad_norm": 0.7336686849594116, + "learning_rate": 0.00015833061484589562, + "loss": 2.6416, + "step": 6105 + }, + { + "epoch": 0.49277701557582115, + "grad_norm": 0.6976929306983948, + "learning_rate": 0.00015831779117371627, + "loss": 2.6279, + "step": 6106 + }, + { + "epoch": 0.4928577193124042, + "grad_norm": 0.7262609601020813, + "learning_rate": 0.00015830496604810513, + "loss": 2.6144, + "step": 6107 + }, + { + "epoch": 0.49293842304898716, + "grad_norm": 0.7274572253227234, + "learning_rate": 0.00015829213946938183, + "loss": 2.7409, + "step": 6108 + }, + { + "epoch": 0.4930191267855702, + "grad_norm": 0.7438454031944275, + "learning_rate": 0.000158279311437866, + "loss": 2.5928, + "step": 6109 + }, + { + "epoch": 0.49309983052215317, + "grad_norm": 0.6885421872138977, + "learning_rate": 0.00015826648195387742, + "loss": 2.6659, + "step": 6110 + }, + { + "epoch": 0.4931805342587362, + "grad_norm": 0.6781450510025024, + "learning_rate": 0.0001582536510177358, + "loss": 2.6068, + "step": 6111 + }, + { + "epoch": 0.4932612379953192, + "grad_norm": 0.7618128657341003, + "learning_rate": 0.0001582408186297609, + "loss": 2.6705, + "step": 6112 + }, + { + "epoch": 0.4933419417319022, + "grad_norm": 0.7011203765869141, + "learning_rate": 0.00015822798479027256, + "loss": 2.596, + "step": 6113 + }, + { + "epoch": 0.4934226454684852, + "grad_norm": 0.7727806568145752, + "learning_rate": 0.00015821514949959065, + "loss": 2.6458, + "step": 6114 + }, + { + "epoch": 0.4935033492050682, + "grad_norm": 0.7318129539489746, + "learning_rate": 0.00015820231275803502, + "loss": 2.6009, + "step": 6115 + }, + { + "epoch": 0.4935840529416512, + "grad_norm": 0.6836227178573608, + "learning_rate": 0.00015818947456592563, + "loss": 2.6311, + "step": 6116 + }, + { + "epoch": 0.49366475667823423, + "grad_norm": 0.7657275199890137, + "learning_rate": 0.0001581766349235824, + "loss": 2.6079, + "step": 6117 + }, + { + "epoch": 0.4937454604148172, + "grad_norm": 0.74736487865448, + "learning_rate": 0.0001581637938313254, + "loss": 2.6752, + "step": 6118 + }, + { + "epoch": 0.49382616415140024, + "grad_norm": 0.716708242893219, + "learning_rate": 0.00015815095128947454, + "loss": 2.5896, + "step": 6119 + }, + { + "epoch": 0.4939068678879832, + "grad_norm": 0.740727424621582, + "learning_rate": 0.00015813810729835002, + "loss": 2.6528, + "step": 6120 + }, + { + "epoch": 0.4939875716245662, + "grad_norm": 0.6746687293052673, + "learning_rate": 0.0001581252618582719, + "loss": 2.6438, + "step": 6121 + }, + { + "epoch": 0.4940682753611492, + "grad_norm": 0.7547900080680847, + "learning_rate": 0.00015811241496956028, + "loss": 2.631, + "step": 6122 + }, + { + "epoch": 0.4941489790977322, + "grad_norm": 0.7500903606414795, + "learning_rate": 0.0001580995666325354, + "loss": 2.7039, + "step": 6123 + }, + { + "epoch": 0.49422968283431523, + "grad_norm": 0.7692849636077881, + "learning_rate": 0.00015808671684751743, + "loss": 2.5922, + "step": 6124 + }, + { + "epoch": 0.4943103865708982, + "grad_norm": 0.6964236497879028, + "learning_rate": 0.00015807386561482662, + "loss": 2.6239, + "step": 6125 + }, + { + "epoch": 0.49439109030748124, + "grad_norm": 0.7094165086746216, + "learning_rate": 0.0001580610129347833, + "loss": 2.6239, + "step": 6126 + }, + { + "epoch": 0.4944717940440642, + "grad_norm": 0.7579131126403809, + "learning_rate": 0.00015804815880770775, + "loss": 2.6654, + "step": 6127 + }, + { + "epoch": 0.49455249778064725, + "grad_norm": 0.7687693238258362, + "learning_rate": 0.00015803530323392034, + "loss": 2.6557, + "step": 6128 + }, + { + "epoch": 0.4946332015172302, + "grad_norm": 0.6913540363311768, + "learning_rate": 0.0001580224462137415, + "loss": 2.6299, + "step": 6129 + }, + { + "epoch": 0.49471390525381326, + "grad_norm": 0.7574129700660706, + "learning_rate": 0.0001580095877474916, + "loss": 2.6327, + "step": 6130 + }, + { + "epoch": 0.49479460899039623, + "grad_norm": 0.6834598183631897, + "learning_rate": 0.0001579967278354911, + "loss": 2.6402, + "step": 6131 + }, + { + "epoch": 0.49487531272697927, + "grad_norm": 0.7872750163078308, + "learning_rate": 0.00015798386647806057, + "loss": 2.6647, + "step": 6132 + }, + { + "epoch": 0.49495601646356224, + "grad_norm": 0.705211341381073, + "learning_rate": 0.00015797100367552055, + "loss": 2.6288, + "step": 6133 + }, + { + "epoch": 0.4950367202001453, + "grad_norm": 0.7302640080451965, + "learning_rate": 0.00015795813942819155, + "loss": 2.6683, + "step": 6134 + }, + { + "epoch": 0.49511742393672825, + "grad_norm": 0.7522360682487488, + "learning_rate": 0.0001579452737363942, + "loss": 2.5885, + "step": 6135 + }, + { + "epoch": 0.4951981276733113, + "grad_norm": 0.657376229763031, + "learning_rate": 0.0001579324066004492, + "loss": 2.5775, + "step": 6136 + }, + { + "epoch": 0.49527883140989426, + "grad_norm": 0.7539556622505188, + "learning_rate": 0.00015791953802067715, + "loss": 2.6236, + "step": 6137 + }, + { + "epoch": 0.4953595351464773, + "grad_norm": 0.7090374827384949, + "learning_rate": 0.00015790666799739883, + "loss": 2.5845, + "step": 6138 + }, + { + "epoch": 0.49544023888306027, + "grad_norm": 0.6883948445320129, + "learning_rate": 0.00015789379653093497, + "loss": 2.6621, + "step": 6139 + }, + { + "epoch": 0.4955209426196433, + "grad_norm": 0.7466424107551575, + "learning_rate": 0.00015788092362160633, + "loss": 2.6289, + "step": 6140 + }, + { + "epoch": 0.4956016463562263, + "grad_norm": 0.7424437403678894, + "learning_rate": 0.00015786804926973383, + "loss": 2.6405, + "step": 6141 + }, + { + "epoch": 0.4956823500928093, + "grad_norm": 0.7227851748466492, + "learning_rate": 0.00015785517347563822, + "loss": 2.6537, + "step": 6142 + }, + { + "epoch": 0.4957630538293923, + "grad_norm": 0.7548653483390808, + "learning_rate": 0.00015784229623964048, + "loss": 2.7377, + "step": 6143 + }, + { + "epoch": 0.4958437575659753, + "grad_norm": 0.7086976170539856, + "learning_rate": 0.00015782941756206152, + "loss": 2.6194, + "step": 6144 + }, + { + "epoch": 0.4959244613025583, + "grad_norm": 0.6605533957481384, + "learning_rate": 0.0001578165374432223, + "loss": 2.6265, + "step": 6145 + }, + { + "epoch": 0.4960051650391413, + "grad_norm": 0.7187899947166443, + "learning_rate": 0.00015780365588344384, + "loss": 2.5639, + "step": 6146 + }, + { + "epoch": 0.4960858687757243, + "grad_norm": 0.7014074921607971, + "learning_rate": 0.00015779077288304716, + "loss": 2.6011, + "step": 6147 + }, + { + "epoch": 0.49616657251230734, + "grad_norm": 0.7463840842247009, + "learning_rate": 0.00015777788844235335, + "loss": 2.6059, + "step": 6148 + }, + { + "epoch": 0.4962472762488903, + "grad_norm": 0.8022417426109314, + "learning_rate": 0.00015776500256168356, + "loss": 2.6011, + "step": 6149 + }, + { + "epoch": 0.49632797998547334, + "grad_norm": 0.7140083909034729, + "learning_rate": 0.0001577521152413589, + "loss": 2.6891, + "step": 6150 + }, + { + "epoch": 0.4964086837220563, + "grad_norm": 0.7266198992729187, + "learning_rate": 0.00015773922648170053, + "loss": 2.6561, + "step": 6151 + }, + { + "epoch": 0.49648938745863935, + "grad_norm": 0.7241406440734863, + "learning_rate": 0.0001577263362830297, + "loss": 2.6835, + "step": 6152 + }, + { + "epoch": 0.49657009119522233, + "grad_norm": 0.7422344088554382, + "learning_rate": 0.0001577134446456677, + "loss": 2.6039, + "step": 6153 + }, + { + "epoch": 0.49665079493180536, + "grad_norm": 0.8764764666557312, + "learning_rate": 0.0001577005515699358, + "loss": 2.68, + "step": 6154 + }, + { + "epoch": 0.49673149866838834, + "grad_norm": 0.7224323749542236, + "learning_rate": 0.0001576876570561553, + "loss": 2.5824, + "step": 6155 + }, + { + "epoch": 0.49681220240497137, + "grad_norm": 0.7601075172424316, + "learning_rate": 0.00015767476110464758, + "loss": 2.7124, + "step": 6156 + }, + { + "epoch": 0.49689290614155435, + "grad_norm": 0.7425428628921509, + "learning_rate": 0.0001576618637157341, + "loss": 2.5913, + "step": 6157 + }, + { + "epoch": 0.4969736098781374, + "grad_norm": 0.721969723701477, + "learning_rate": 0.0001576489648897362, + "loss": 2.6482, + "step": 6158 + }, + { + "epoch": 0.49705431361472036, + "grad_norm": 0.8142126798629761, + "learning_rate": 0.00015763606462697544, + "loss": 2.6231, + "step": 6159 + }, + { + "epoch": 0.4971350173513034, + "grad_norm": 0.6636359691619873, + "learning_rate": 0.00015762316292777326, + "loss": 2.6388, + "step": 6160 + }, + { + "epoch": 0.49721572108788636, + "grad_norm": 0.7093132734298706, + "learning_rate": 0.00015761025979245123, + "loss": 2.6562, + "step": 6161 + }, + { + "epoch": 0.4972964248244694, + "grad_norm": 0.7130851745605469, + "learning_rate": 0.00015759735522133094, + "loss": 2.6856, + "step": 6162 + }, + { + "epoch": 0.4973771285610524, + "grad_norm": 0.7303292155265808, + "learning_rate": 0.000157584449214734, + "loss": 2.6077, + "step": 6163 + }, + { + "epoch": 0.4974578322976354, + "grad_norm": 0.6742258071899414, + "learning_rate": 0.00015757154177298204, + "loss": 2.6644, + "step": 6164 + }, + { + "epoch": 0.4975385360342184, + "grad_norm": 0.6882894039154053, + "learning_rate": 0.00015755863289639677, + "loss": 2.6462, + "step": 6165 + }, + { + "epoch": 0.4976192397708014, + "grad_norm": 0.7882276773452759, + "learning_rate": 0.00015754572258529993, + "loss": 2.6509, + "step": 6166 + }, + { + "epoch": 0.4976999435073844, + "grad_norm": 0.7163859009742737, + "learning_rate": 0.00015753281084001324, + "loss": 2.627, + "step": 6167 + }, + { + "epoch": 0.4977806472439674, + "grad_norm": 0.7194411158561707, + "learning_rate": 0.0001575198976608585, + "loss": 2.6798, + "step": 6168 + }, + { + "epoch": 0.4978613509805504, + "grad_norm": 0.7233198881149292, + "learning_rate": 0.0001575069830481576, + "loss": 2.6616, + "step": 6169 + }, + { + "epoch": 0.49794205471713343, + "grad_norm": 0.7246997952461243, + "learning_rate": 0.00015749406700223231, + "loss": 2.6262, + "step": 6170 + }, + { + "epoch": 0.4980227584537164, + "grad_norm": 0.7509368658065796, + "learning_rate": 0.00015748114952340457, + "loss": 2.6148, + "step": 6171 + }, + { + "epoch": 0.4981034621902994, + "grad_norm": 0.7079075574874878, + "learning_rate": 0.00015746823061199637, + "loss": 2.6712, + "step": 6172 + }, + { + "epoch": 0.4981841659268824, + "grad_norm": 0.6821560859680176, + "learning_rate": 0.0001574553102683296, + "loss": 2.6253, + "step": 6173 + }, + { + "epoch": 0.4982648696634654, + "grad_norm": 0.7623000741004944, + "learning_rate": 0.00015744238849272634, + "loss": 2.6252, + "step": 6174 + }, + { + "epoch": 0.4983455734000484, + "grad_norm": 0.709434449672699, + "learning_rate": 0.00015742946528550858, + "loss": 2.555, + "step": 6175 + }, + { + "epoch": 0.4984262771366314, + "grad_norm": 0.7277799844741821, + "learning_rate": 0.00015741654064699846, + "loss": 2.6551, + "step": 6176 + }, + { + "epoch": 0.49850698087321443, + "grad_norm": 0.7208690643310547, + "learning_rate": 0.00015740361457751802, + "loss": 2.6747, + "step": 6177 + }, + { + "epoch": 0.4985876846097974, + "grad_norm": 0.8458136916160583, + "learning_rate": 0.00015739068707738946, + "loss": 2.6551, + "step": 6178 + }, + { + "epoch": 0.49866838834638044, + "grad_norm": 0.7718539834022522, + "learning_rate": 0.00015737775814693498, + "loss": 2.6246, + "step": 6179 + }, + { + "epoch": 0.4987490920829634, + "grad_norm": 0.6982735395431519, + "learning_rate": 0.00015736482778647674, + "loss": 2.5726, + "step": 6180 + }, + { + "epoch": 0.49882979581954645, + "grad_norm": 0.6759411692619324, + "learning_rate": 0.00015735189599633707, + "loss": 2.6603, + "step": 6181 + }, + { + "epoch": 0.4989104995561294, + "grad_norm": 0.7016656994819641, + "learning_rate": 0.0001573389627768382, + "loss": 2.6045, + "step": 6182 + }, + { + "epoch": 0.49899120329271246, + "grad_norm": 0.7170618176460266, + "learning_rate": 0.00015732602812830253, + "loss": 2.6419, + "step": 6183 + }, + { + "epoch": 0.49907190702929544, + "grad_norm": 0.6963300704956055, + "learning_rate": 0.00015731309205105237, + "loss": 2.6377, + "step": 6184 + }, + { + "epoch": 0.49915261076587847, + "grad_norm": 0.7437995672225952, + "learning_rate": 0.00015730015454541014, + "loss": 2.7013, + "step": 6185 + }, + { + "epoch": 0.49923331450246144, + "grad_norm": 0.6846518516540527, + "learning_rate": 0.00015728721561169827, + "loss": 2.5526, + "step": 6186 + }, + { + "epoch": 0.4993140182390445, + "grad_norm": 0.7343618273735046, + "learning_rate": 0.00015727427525023924, + "loss": 2.6567, + "step": 6187 + }, + { + "epoch": 0.49939472197562745, + "grad_norm": 0.6947566270828247, + "learning_rate": 0.00015726133346135554, + "loss": 2.6642, + "step": 6188 + }, + { + "epoch": 0.4994754257122105, + "grad_norm": 0.7402610778808594, + "learning_rate": 0.00015724839024536976, + "loss": 2.6964, + "step": 6189 + }, + { + "epoch": 0.49955612944879346, + "grad_norm": 0.7318306565284729, + "learning_rate": 0.00015723544560260444, + "loss": 2.5864, + "step": 6190 + }, + { + "epoch": 0.4996368331853765, + "grad_norm": 0.752216100692749, + "learning_rate": 0.00015722249953338215, + "loss": 2.6357, + "step": 6191 + }, + { + "epoch": 0.49971753692195947, + "grad_norm": 0.70283442735672, + "learning_rate": 0.00015720955203802565, + "loss": 2.5892, + "step": 6192 + }, + { + "epoch": 0.4997982406585425, + "grad_norm": 0.7457823753356934, + "learning_rate": 0.00015719660311685755, + "loss": 2.6663, + "step": 6193 + }, + { + "epoch": 0.4998789443951255, + "grad_norm": 0.7296229600906372, + "learning_rate": 0.00015718365277020058, + "loss": 2.6238, + "step": 6194 + }, + { + "epoch": 0.4999596481317085, + "grad_norm": 0.6963346004486084, + "learning_rate": 0.0001571707009983775, + "loss": 2.6303, + "step": 6195 + }, + { + "epoch": 0.5000403518682915, + "grad_norm": 0.7074694633483887, + "learning_rate": 0.0001571577478017111, + "loss": 2.6077, + "step": 6196 + }, + { + "epoch": 0.5001210556048745, + "grad_norm": 0.7826260328292847, + "learning_rate": 0.00015714479318052423, + "loss": 2.6668, + "step": 6197 + }, + { + "epoch": 0.5002017593414575, + "grad_norm": 0.6908758282661438, + "learning_rate": 0.00015713183713513974, + "loss": 2.6195, + "step": 6198 + }, + { + "epoch": 0.5002824630780405, + "grad_norm": 0.7571602463722229, + "learning_rate": 0.0001571188796658805, + "loss": 2.6546, + "step": 6199 + }, + { + "epoch": 0.5003631668146236, + "grad_norm": 0.7359431385993958, + "learning_rate": 0.0001571059207730695, + "loss": 2.5792, + "step": 6200 + }, + { + "epoch": 0.5004438705512065, + "grad_norm": 0.6886340379714966, + "learning_rate": 0.00015709296045702967, + "loss": 2.6099, + "step": 6201 + }, + { + "epoch": 0.5005245742877895, + "grad_norm": 0.6900473833084106, + "learning_rate": 0.000157079998718084, + "loss": 2.6461, + "step": 6202 + }, + { + "epoch": 0.5006052780243725, + "grad_norm": 0.66212397813797, + "learning_rate": 0.00015706703555655555, + "loss": 2.6178, + "step": 6203 + }, + { + "epoch": 0.5006859817609556, + "grad_norm": 0.7666565179824829, + "learning_rate": 0.00015705407097276744, + "loss": 2.7097, + "step": 6204 + }, + { + "epoch": 0.5007666854975386, + "grad_norm": 0.7294591069221497, + "learning_rate": 0.0001570411049670427, + "loss": 2.5995, + "step": 6205 + }, + { + "epoch": 0.5008473892341215, + "grad_norm": 0.7279765009880066, + "learning_rate": 0.00015702813753970453, + "loss": 2.5554, + "step": 6206 + }, + { + "epoch": 0.5009280929707045, + "grad_norm": 0.7174742817878723, + "learning_rate": 0.0001570151686910761, + "loss": 2.6523, + "step": 6207 + }, + { + "epoch": 0.5010087967072876, + "grad_norm": 0.67017662525177, + "learning_rate": 0.00015700219842148063, + "loss": 2.5613, + "step": 6208 + }, + { + "epoch": 0.5010895004438706, + "grad_norm": 0.7000258564949036, + "learning_rate": 0.00015698922673124138, + "loss": 2.5658, + "step": 6209 + }, + { + "epoch": 0.5011702041804535, + "grad_norm": 0.6894544363021851, + "learning_rate": 0.00015697625362068164, + "loss": 2.6925, + "step": 6210 + }, + { + "epoch": 0.5012509079170365, + "grad_norm": 0.6742957234382629, + "learning_rate": 0.00015696327909012466, + "loss": 2.6429, + "step": 6211 + }, + { + "epoch": 0.5013316116536196, + "grad_norm": 0.7039656639099121, + "learning_rate": 0.0001569503031398939, + "loss": 2.6313, + "step": 6212 + }, + { + "epoch": 0.5014123153902026, + "grad_norm": 0.720003604888916, + "learning_rate": 0.00015693732577031272, + "loss": 2.6207, + "step": 6213 + }, + { + "epoch": 0.5014930191267856, + "grad_norm": 0.8611499071121216, + "learning_rate": 0.00015692434698170456, + "loss": 2.6855, + "step": 6214 + }, + { + "epoch": 0.5015737228633685, + "grad_norm": 0.6664702296257019, + "learning_rate": 0.00015691136677439284, + "loss": 2.6174, + "step": 6215 + }, + { + "epoch": 0.5016544265999516, + "grad_norm": 0.7258509993553162, + "learning_rate": 0.00015689838514870111, + "loss": 2.6558, + "step": 6216 + }, + { + "epoch": 0.5017351303365346, + "grad_norm": 0.6972211599349976, + "learning_rate": 0.0001568854021049529, + "loss": 2.5913, + "step": 6217 + }, + { + "epoch": 0.5018158340731176, + "grad_norm": 0.7927280068397522, + "learning_rate": 0.00015687241764347177, + "loss": 2.6466, + "step": 6218 + }, + { + "epoch": 0.5018965378097006, + "grad_norm": 0.7044646143913269, + "learning_rate": 0.00015685943176458128, + "loss": 2.6195, + "step": 6219 + }, + { + "epoch": 0.5019772415462836, + "grad_norm": 0.6935598254203796, + "learning_rate": 0.00015684644446860516, + "loss": 2.6486, + "step": 6220 + }, + { + "epoch": 0.5020579452828666, + "grad_norm": 0.7965792417526245, + "learning_rate": 0.00015683345575586704, + "loss": 2.6265, + "step": 6221 + }, + { + "epoch": 0.5021386490194496, + "grad_norm": 0.727053701877594, + "learning_rate": 0.00015682046562669064, + "loss": 2.6714, + "step": 6222 + }, + { + "epoch": 0.5022193527560326, + "grad_norm": 0.7919184565544128, + "learning_rate": 0.0001568074740813997, + "loss": 2.7115, + "step": 6223 + }, + { + "epoch": 0.5023000564926156, + "grad_norm": 0.7724714279174805, + "learning_rate": 0.00015679448112031801, + "loss": 2.6636, + "step": 6224 + }, + { + "epoch": 0.5023807602291986, + "grad_norm": 0.6893701553344727, + "learning_rate": 0.0001567814867437694, + "loss": 2.6562, + "step": 6225 + }, + { + "epoch": 0.5024614639657816, + "grad_norm": 0.7089633345603943, + "learning_rate": 0.00015676849095207769, + "loss": 2.6125, + "step": 6226 + }, + { + "epoch": 0.5025421677023646, + "grad_norm": 0.7620012760162354, + "learning_rate": 0.00015675549374556682, + "loss": 2.6935, + "step": 6227 + }, + { + "epoch": 0.5026228714389476, + "grad_norm": 0.7293741703033447, + "learning_rate": 0.00015674249512456065, + "loss": 2.66, + "step": 6228 + }, + { + "epoch": 0.5027035751755307, + "grad_norm": 0.7366519570350647, + "learning_rate": 0.00015672949508938318, + "loss": 2.5968, + "step": 6229 + }, + { + "epoch": 0.5027842789121136, + "grad_norm": 0.6646310091018677, + "learning_rate": 0.00015671649364035846, + "loss": 2.5751, + "step": 6230 + }, + { + "epoch": 0.5028649826486966, + "grad_norm": 0.6682632565498352, + "learning_rate": 0.00015670349077781038, + "loss": 2.5902, + "step": 6231 + }, + { + "epoch": 0.5029456863852796, + "grad_norm": 0.7327528595924377, + "learning_rate": 0.00015669048650206313, + "loss": 2.6487, + "step": 6232 + }, + { + "epoch": 0.5030263901218627, + "grad_norm": 0.7114281058311462, + "learning_rate": 0.00015667748081344074, + "loss": 2.5779, + "step": 6233 + }, + { + "epoch": 0.5031070938584457, + "grad_norm": 0.7908105850219727, + "learning_rate": 0.00015666447371226737, + "loss": 2.6099, + "step": 6234 + }, + { + "epoch": 0.5031877975950286, + "grad_norm": 0.7823575139045715, + "learning_rate": 0.00015665146519886725, + "loss": 2.6339, + "step": 6235 + }, + { + "epoch": 0.5032685013316116, + "grad_norm": 0.7404836416244507, + "learning_rate": 0.00015663845527356447, + "loss": 2.6035, + "step": 6236 + }, + { + "epoch": 0.5033492050681947, + "grad_norm": 0.7448995113372803, + "learning_rate": 0.00015662544393668334, + "loss": 2.6566, + "step": 6237 + }, + { + "epoch": 0.5034299088047777, + "grad_norm": 0.7209747433662415, + "learning_rate": 0.00015661243118854815, + "loss": 2.682, + "step": 6238 + }, + { + "epoch": 0.5035106125413606, + "grad_norm": 0.691759467124939, + "learning_rate": 0.00015659941702948315, + "loss": 2.6435, + "step": 6239 + }, + { + "epoch": 0.5035913162779436, + "grad_norm": 0.7646063566207886, + "learning_rate": 0.00015658640145981275, + "loss": 2.591, + "step": 6240 + }, + { + "epoch": 0.5036720200145267, + "grad_norm": 0.8319387435913086, + "learning_rate": 0.00015657338447986133, + "loss": 2.5937, + "step": 6241 + }, + { + "epoch": 0.5037527237511097, + "grad_norm": 0.729193389415741, + "learning_rate": 0.00015656036608995323, + "loss": 2.651, + "step": 6242 + }, + { + "epoch": 0.5038334274876927, + "grad_norm": 0.720098614692688, + "learning_rate": 0.000156547346290413, + "loss": 2.681, + "step": 6243 + }, + { + "epoch": 0.5039141312242756, + "grad_norm": 0.7172541618347168, + "learning_rate": 0.00015653432508156508, + "loss": 2.5906, + "step": 6244 + }, + { + "epoch": 0.5039948349608587, + "grad_norm": 0.7352481484413147, + "learning_rate": 0.00015652130246373398, + "loss": 2.6376, + "step": 6245 + }, + { + "epoch": 0.5040755386974417, + "grad_norm": 0.6664925813674927, + "learning_rate": 0.0001565082784372443, + "loss": 2.706, + "step": 6246 + }, + { + "epoch": 0.5041562424340247, + "grad_norm": 0.7292987704277039, + "learning_rate": 0.0001564952530024206, + "loss": 2.6149, + "step": 6247 + }, + { + "epoch": 0.5042369461706077, + "grad_norm": 0.6904531121253967, + "learning_rate": 0.00015648222615958747, + "loss": 2.579, + "step": 6248 + }, + { + "epoch": 0.5043176499071907, + "grad_norm": 0.7385311722755432, + "learning_rate": 0.00015646919790906965, + "loss": 2.6137, + "step": 6249 + }, + { + "epoch": 0.5043983536437737, + "grad_norm": 0.7869507074356079, + "learning_rate": 0.0001564561682511918, + "loss": 2.6831, + "step": 6250 + }, + { + "epoch": 0.5044790573803567, + "grad_norm": 0.723680317401886, + "learning_rate": 0.00015644313718627867, + "loss": 2.6083, + "step": 6251 + }, + { + "epoch": 0.5045597611169397, + "grad_norm": 0.7029969692230225, + "learning_rate": 0.00015643010471465502, + "loss": 2.6462, + "step": 6252 + }, + { + "epoch": 0.5046404648535228, + "grad_norm": 0.818975031375885, + "learning_rate": 0.00015641707083664566, + "loss": 2.6393, + "step": 6253 + }, + { + "epoch": 0.5047211685901057, + "grad_norm": 0.7237667441368103, + "learning_rate": 0.0001564040355525754, + "loss": 2.5995, + "step": 6254 + }, + { + "epoch": 0.5048018723266887, + "grad_norm": 0.8613824248313904, + "learning_rate": 0.00015639099886276912, + "loss": 2.748, + "step": 6255 + }, + { + "epoch": 0.5048825760632717, + "grad_norm": 0.6802194118499756, + "learning_rate": 0.00015637796076755178, + "loss": 2.6393, + "step": 6256 + }, + { + "epoch": 0.5049632797998548, + "grad_norm": 0.7816255688667297, + "learning_rate": 0.00015636492126724823, + "loss": 2.6218, + "step": 6257 + }, + { + "epoch": 0.5050439835364378, + "grad_norm": 0.7443990707397461, + "learning_rate": 0.00015635188036218356, + "loss": 2.6181, + "step": 6258 + }, + { + "epoch": 0.5051246872730207, + "grad_norm": 0.7869458794593811, + "learning_rate": 0.0001563388380526827, + "loss": 2.6641, + "step": 6259 + }, + { + "epoch": 0.5052053910096037, + "grad_norm": 0.7423158288002014, + "learning_rate": 0.00015632579433907072, + "loss": 2.5849, + "step": 6260 + }, + { + "epoch": 0.5052860947461868, + "grad_norm": 0.7888280153274536, + "learning_rate": 0.00015631274922167272, + "loss": 2.7095, + "step": 6261 + }, + { + "epoch": 0.5053667984827698, + "grad_norm": 0.7053405046463013, + "learning_rate": 0.0001562997027008138, + "loss": 2.5747, + "step": 6262 + }, + { + "epoch": 0.5054475022193528, + "grad_norm": 0.7930825352668762, + "learning_rate": 0.0001562866547768191, + "loss": 2.6359, + "step": 6263 + }, + { + "epoch": 0.5055282059559357, + "grad_norm": 0.7431469559669495, + "learning_rate": 0.0001562736054500139, + "loss": 2.6167, + "step": 6264 + }, + { + "epoch": 0.5056089096925188, + "grad_norm": 0.8395694494247437, + "learning_rate": 0.00015626055472072324, + "loss": 2.7217, + "step": 6265 + }, + { + "epoch": 0.5056896134291018, + "grad_norm": 0.7318898439407349, + "learning_rate": 0.0001562475025892726, + "loss": 2.6866, + "step": 6266 + }, + { + "epoch": 0.5057703171656848, + "grad_norm": 0.7487025856971741, + "learning_rate": 0.0001562344490559871, + "loss": 2.7206, + "step": 6267 + }, + { + "epoch": 0.5058510209022677, + "grad_norm": 0.8187269568443298, + "learning_rate": 0.00015622139412119212, + "loss": 2.658, + "step": 6268 + }, + { + "epoch": 0.5059317246388508, + "grad_norm": 0.6714495420455933, + "learning_rate": 0.00015620833778521307, + "loss": 2.6182, + "step": 6269 + }, + { + "epoch": 0.5060124283754338, + "grad_norm": 0.7556246519088745, + "learning_rate": 0.00015619528004837528, + "loss": 2.6502, + "step": 6270 + }, + { + "epoch": 0.5060931321120168, + "grad_norm": 0.6989960074424744, + "learning_rate": 0.00015618222091100424, + "loss": 2.6031, + "step": 6271 + }, + { + "epoch": 0.5061738358485998, + "grad_norm": 0.7002139091491699, + "learning_rate": 0.0001561691603734254, + "loss": 2.6563, + "step": 6272 + }, + { + "epoch": 0.5062545395851827, + "grad_norm": 0.7064816355705261, + "learning_rate": 0.00015615609843596423, + "loss": 2.6482, + "step": 6273 + }, + { + "epoch": 0.5063352433217658, + "grad_norm": 0.6971433162689209, + "learning_rate": 0.00015614303509894634, + "loss": 2.6522, + "step": 6274 + }, + { + "epoch": 0.5064159470583488, + "grad_norm": 0.6982942223548889, + "learning_rate": 0.0001561299703626972, + "loss": 2.6477, + "step": 6275 + }, + { + "epoch": 0.5064966507949318, + "grad_norm": 0.7219811081886292, + "learning_rate": 0.0001561169042275425, + "loss": 2.6514, + "step": 6276 + }, + { + "epoch": 0.5065773545315148, + "grad_norm": 0.7391932010650635, + "learning_rate": 0.00015610383669380787, + "loss": 2.698, + "step": 6277 + }, + { + "epoch": 0.5066580582680978, + "grad_norm": 0.7852853536605835, + "learning_rate": 0.00015609076776181894, + "loss": 2.6281, + "step": 6278 + }, + { + "epoch": 0.5067387620046808, + "grad_norm": 0.7435647249221802, + "learning_rate": 0.00015607769743190147, + "loss": 2.6403, + "step": 6279 + }, + { + "epoch": 0.5068194657412638, + "grad_norm": 0.7300949096679688, + "learning_rate": 0.00015606462570438119, + "loss": 2.6125, + "step": 6280 + }, + { + "epoch": 0.5069001694778468, + "grad_norm": 0.7081549167633057, + "learning_rate": 0.00015605155257958388, + "loss": 2.6192, + "step": 6281 + }, + { + "epoch": 0.5069808732144299, + "grad_norm": 0.709020733833313, + "learning_rate": 0.00015603847805783537, + "loss": 2.6745, + "step": 6282 + }, + { + "epoch": 0.5070615769510128, + "grad_norm": 0.691684901714325, + "learning_rate": 0.0001560254021394615, + "loss": 2.5638, + "step": 6283 + }, + { + "epoch": 0.5071422806875958, + "grad_norm": 0.8338537812232971, + "learning_rate": 0.00015601232482478813, + "loss": 2.5835, + "step": 6284 + }, + { + "epoch": 0.5072229844241788, + "grad_norm": 0.659436047077179, + "learning_rate": 0.00015599924611414126, + "loss": 2.601, + "step": 6285 + }, + { + "epoch": 0.5073036881607619, + "grad_norm": 0.72590172290802, + "learning_rate": 0.00015598616600784676, + "loss": 2.602, + "step": 6286 + }, + { + "epoch": 0.5073843918973449, + "grad_norm": 0.6704443693161011, + "learning_rate": 0.00015597308450623066, + "loss": 2.5703, + "step": 6287 + }, + { + "epoch": 0.5074650956339278, + "grad_norm": 0.7298632264137268, + "learning_rate": 0.00015596000160961898, + "loss": 2.6859, + "step": 6288 + }, + { + "epoch": 0.5075457993705108, + "grad_norm": 0.6900345087051392, + "learning_rate": 0.00015594691731833776, + "loss": 2.6264, + "step": 6289 + }, + { + "epoch": 0.5076265031070939, + "grad_norm": 0.6705992221832275, + "learning_rate": 0.0001559338316327131, + "loss": 2.6135, + "step": 6290 + }, + { + "epoch": 0.5077072068436769, + "grad_norm": 0.691545307636261, + "learning_rate": 0.0001559207445530712, + "loss": 2.6538, + "step": 6291 + }, + { + "epoch": 0.5077879105802598, + "grad_norm": 0.6579985618591309, + "learning_rate": 0.00015590765607973811, + "loss": 2.6224, + "step": 6292 + }, + { + "epoch": 0.5078686143168428, + "grad_norm": 0.6938790678977966, + "learning_rate": 0.00015589456621304014, + "loss": 2.5932, + "step": 6293 + }, + { + "epoch": 0.5079493180534259, + "grad_norm": 0.7421671748161316, + "learning_rate": 0.00015588147495330346, + "loss": 2.7098, + "step": 6294 + }, + { + "epoch": 0.5080300217900089, + "grad_norm": 0.7076674699783325, + "learning_rate": 0.0001558683823008543, + "loss": 2.664, + "step": 6295 + }, + { + "epoch": 0.5081107255265919, + "grad_norm": 0.6829726696014404, + "learning_rate": 0.00015585528825601906, + "loss": 2.6029, + "step": 6296 + }, + { + "epoch": 0.5081914292631748, + "grad_norm": 0.6968080401420593, + "learning_rate": 0.000155842192819124, + "loss": 2.6256, + "step": 6297 + }, + { + "epoch": 0.5082721329997579, + "grad_norm": 0.7453410625457764, + "learning_rate": 0.00015582909599049554, + "loss": 2.6577, + "step": 6298 + }, + { + "epoch": 0.5083528367363409, + "grad_norm": 0.6603519916534424, + "learning_rate": 0.00015581599777046007, + "loss": 2.6066, + "step": 6299 + }, + { + "epoch": 0.5084335404729239, + "grad_norm": 0.7096173763275146, + "learning_rate": 0.00015580289815934401, + "loss": 2.5488, + "step": 6300 + }, + { + "epoch": 0.5085142442095069, + "grad_norm": 0.799298107624054, + "learning_rate": 0.0001557897971574739, + "loss": 2.6021, + "step": 6301 + }, + { + "epoch": 0.50859494794609, + "grad_norm": 0.6820314526557922, + "learning_rate": 0.00015577669476517618, + "loss": 2.6276, + "step": 6302 + }, + { + "epoch": 0.5086756516826729, + "grad_norm": 0.7119347453117371, + "learning_rate": 0.00015576359098277742, + "loss": 2.6627, + "step": 6303 + }, + { + "epoch": 0.5087563554192559, + "grad_norm": 0.7638720273971558, + "learning_rate": 0.00015575048581060422, + "loss": 2.6824, + "step": 6304 + }, + { + "epoch": 0.5088370591558389, + "grad_norm": 0.7360339164733887, + "learning_rate": 0.00015573737924898316, + "loss": 2.5805, + "step": 6305 + }, + { + "epoch": 0.508917762892422, + "grad_norm": 0.7220984697341919, + "learning_rate": 0.00015572427129824091, + "loss": 2.6374, + "step": 6306 + }, + { + "epoch": 0.5089984666290049, + "grad_norm": 0.670964777469635, + "learning_rate": 0.00015571116195870418, + "loss": 2.6371, + "step": 6307 + }, + { + "epoch": 0.5090791703655879, + "grad_norm": 0.7826075553894043, + "learning_rate": 0.00015569805123069968, + "loss": 2.7666, + "step": 6308 + }, + { + "epoch": 0.5091598741021709, + "grad_norm": 0.7691593766212463, + "learning_rate": 0.00015568493911455412, + "loss": 2.6242, + "step": 6309 + }, + { + "epoch": 0.509240577838754, + "grad_norm": 0.714500367641449, + "learning_rate": 0.0001556718256105943, + "loss": 2.6551, + "step": 6310 + }, + { + "epoch": 0.509321281575337, + "grad_norm": 0.7634009718894958, + "learning_rate": 0.00015565871071914706, + "loss": 2.7069, + "step": 6311 + }, + { + "epoch": 0.5094019853119199, + "grad_norm": 0.7134168148040771, + "learning_rate": 0.00015564559444053926, + "loss": 2.5816, + "step": 6312 + }, + { + "epoch": 0.5094826890485029, + "grad_norm": 0.6548121571540833, + "learning_rate": 0.0001556324767750978, + "loss": 2.6192, + "step": 6313 + }, + { + "epoch": 0.509563392785086, + "grad_norm": 0.7244428992271423, + "learning_rate": 0.0001556193577231496, + "loss": 2.6072, + "step": 6314 + }, + { + "epoch": 0.509644096521669, + "grad_norm": 0.6976662278175354, + "learning_rate": 0.0001556062372850216, + "loss": 2.6148, + "step": 6315 + }, + { + "epoch": 0.509724800258252, + "grad_norm": 0.772726833820343, + "learning_rate": 0.00015559311546104083, + "loss": 2.6458, + "step": 6316 + }, + { + "epoch": 0.5098055039948349, + "grad_norm": 0.7976188659667969, + "learning_rate": 0.00015557999225153428, + "loss": 2.6772, + "step": 6317 + }, + { + "epoch": 0.509886207731418, + "grad_norm": 0.6458039283752441, + "learning_rate": 0.00015556686765682903, + "loss": 2.6143, + "step": 6318 + }, + { + "epoch": 0.509966911468001, + "grad_norm": 0.7295405268669128, + "learning_rate": 0.0001555537416772522, + "loss": 2.5919, + "step": 6319 + }, + { + "epoch": 0.510047615204584, + "grad_norm": 0.657978355884552, + "learning_rate": 0.00015554061431313093, + "loss": 2.6245, + "step": 6320 + }, + { + "epoch": 0.510128318941167, + "grad_norm": 0.6726922392845154, + "learning_rate": 0.00015552748556479232, + "loss": 2.6207, + "step": 6321 + }, + { + "epoch": 0.51020902267775, + "grad_norm": 0.7954673767089844, + "learning_rate": 0.00015551435543256363, + "loss": 2.7177, + "step": 6322 + }, + { + "epoch": 0.510289726414333, + "grad_norm": 0.7186735272407532, + "learning_rate": 0.00015550122391677211, + "loss": 2.5953, + "step": 6323 + }, + { + "epoch": 0.510370430150916, + "grad_norm": 0.7835420966148376, + "learning_rate": 0.00015548809101774498, + "loss": 2.7039, + "step": 6324 + }, + { + "epoch": 0.510451133887499, + "grad_norm": 0.6966592073440552, + "learning_rate": 0.00015547495673580962, + "loss": 2.6287, + "step": 6325 + }, + { + "epoch": 0.5105318376240819, + "grad_norm": 0.6676180362701416, + "learning_rate": 0.00015546182107129328, + "loss": 2.638, + "step": 6326 + }, + { + "epoch": 0.510612541360665, + "grad_norm": 0.7285657525062561, + "learning_rate": 0.0001554486840245234, + "loss": 2.6661, + "step": 6327 + }, + { + "epoch": 0.510693245097248, + "grad_norm": 0.6453657150268555, + "learning_rate": 0.00015543554559582735, + "loss": 2.715, + "step": 6328 + }, + { + "epoch": 0.510773948833831, + "grad_norm": 0.7364684343338013, + "learning_rate": 0.0001554224057855326, + "loss": 2.6475, + "step": 6329 + }, + { + "epoch": 0.510854652570414, + "grad_norm": 0.670894980430603, + "learning_rate": 0.00015540926459396665, + "loss": 2.6091, + "step": 6330 + }, + { + "epoch": 0.510935356306997, + "grad_norm": 0.6750168204307556, + "learning_rate": 0.00015539612202145696, + "loss": 2.6473, + "step": 6331 + }, + { + "epoch": 0.51101606004358, + "grad_norm": 0.6552454233169556, + "learning_rate": 0.0001553829780683311, + "loss": 2.6158, + "step": 6332 + }, + { + "epoch": 0.511096763780163, + "grad_norm": 0.7387828230857849, + "learning_rate": 0.00015536983273491668, + "loss": 2.6219, + "step": 6333 + }, + { + "epoch": 0.511177467516746, + "grad_norm": 0.6993975639343262, + "learning_rate": 0.00015535668602154127, + "loss": 2.6446, + "step": 6334 + }, + { + "epoch": 0.5112581712533291, + "grad_norm": 0.6491217613220215, + "learning_rate": 0.00015534353792853254, + "loss": 2.6404, + "step": 6335 + }, + { + "epoch": 0.511338874989912, + "grad_norm": 0.7165521383285522, + "learning_rate": 0.0001553303884562182, + "loss": 2.6339, + "step": 6336 + }, + { + "epoch": 0.511419578726495, + "grad_norm": 0.7363756895065308, + "learning_rate": 0.0001553172376049259, + "loss": 2.6411, + "step": 6337 + }, + { + "epoch": 0.511500282463078, + "grad_norm": 0.7148438096046448, + "learning_rate": 0.00015530408537498347, + "loss": 2.5617, + "step": 6338 + }, + { + "epoch": 0.5115809861996611, + "grad_norm": 0.7140451669692993, + "learning_rate": 0.00015529093176671864, + "loss": 2.5898, + "step": 6339 + }, + { + "epoch": 0.5116616899362441, + "grad_norm": 0.7799252271652222, + "learning_rate": 0.00015527777678045926, + "loss": 2.6176, + "step": 6340 + }, + { + "epoch": 0.511742393672827, + "grad_norm": 0.7292928099632263, + "learning_rate": 0.00015526462041653323, + "loss": 2.6722, + "step": 6341 + }, + { + "epoch": 0.51182309740941, + "grad_norm": 0.6986904740333557, + "learning_rate": 0.00015525146267526837, + "loss": 2.6154, + "step": 6342 + }, + { + "epoch": 0.5119038011459931, + "grad_norm": 0.7239612936973572, + "learning_rate": 0.00015523830355699262, + "loss": 2.5664, + "step": 6343 + }, + { + "epoch": 0.5119845048825761, + "grad_norm": 0.6805121898651123, + "learning_rate": 0.00015522514306203395, + "loss": 2.6204, + "step": 6344 + }, + { + "epoch": 0.512065208619159, + "grad_norm": 0.7036689519882202, + "learning_rate": 0.00015521198119072035, + "loss": 2.6211, + "step": 6345 + }, + { + "epoch": 0.512145912355742, + "grad_norm": 0.7155849933624268, + "learning_rate": 0.00015519881794337988, + "loss": 2.6074, + "step": 6346 + }, + { + "epoch": 0.5122266160923251, + "grad_norm": 0.7183938026428223, + "learning_rate": 0.00015518565332034057, + "loss": 2.6148, + "step": 6347 + }, + { + "epoch": 0.5123073198289081, + "grad_norm": 0.7053570747375488, + "learning_rate": 0.0001551724873219305, + "loss": 2.6476, + "step": 6348 + }, + { + "epoch": 0.5123880235654911, + "grad_norm": 0.714846670627594, + "learning_rate": 0.00015515931994847785, + "loss": 2.5728, + "step": 6349 + }, + { + "epoch": 0.512468727302074, + "grad_norm": 0.7504729628562927, + "learning_rate": 0.00015514615120031076, + "loss": 2.6415, + "step": 6350 + }, + { + "epoch": 0.5125494310386571, + "grad_norm": 0.6940335035324097, + "learning_rate": 0.0001551329810777574, + "loss": 2.6115, + "step": 6351 + }, + { + "epoch": 0.5126301347752401, + "grad_norm": 0.7166119813919067, + "learning_rate": 0.00015511980958114608, + "loss": 2.6284, + "step": 6352 + }, + { + "epoch": 0.5127108385118231, + "grad_norm": 0.7787839770317078, + "learning_rate": 0.00015510663671080497, + "loss": 2.6385, + "step": 6353 + }, + { + "epoch": 0.5127915422484061, + "grad_norm": 0.7298412322998047, + "learning_rate": 0.00015509346246706245, + "loss": 2.629, + "step": 6354 + }, + { + "epoch": 0.5128722459849892, + "grad_norm": 0.7918897271156311, + "learning_rate": 0.00015508028685024683, + "loss": 2.6777, + "step": 6355 + }, + { + "epoch": 0.5129529497215721, + "grad_norm": 0.6867843866348267, + "learning_rate": 0.00015506710986068646, + "loss": 2.6101, + "step": 6356 + }, + { + "epoch": 0.5130336534581551, + "grad_norm": 0.716468870639801, + "learning_rate": 0.00015505393149870978, + "loss": 2.6558, + "step": 6357 + }, + { + "epoch": 0.5131143571947381, + "grad_norm": 0.6704092621803284, + "learning_rate": 0.0001550407517646452, + "loss": 2.6128, + "step": 6358 + }, + { + "epoch": 0.5131950609313212, + "grad_norm": 0.820716381072998, + "learning_rate": 0.00015502757065882124, + "loss": 2.6052, + "step": 6359 + }, + { + "epoch": 0.5132757646679041, + "grad_norm": 0.7328094840049744, + "learning_rate": 0.00015501438818156635, + "loss": 2.6399, + "step": 6360 + }, + { + "epoch": 0.5133564684044871, + "grad_norm": 0.6602808833122253, + "learning_rate": 0.00015500120433320911, + "loss": 2.5509, + "step": 6361 + }, + { + "epoch": 0.5134371721410701, + "grad_norm": 0.7013166546821594, + "learning_rate": 0.00015498801911407805, + "loss": 2.6439, + "step": 6362 + }, + { + "epoch": 0.5135178758776532, + "grad_norm": 0.7415499091148376, + "learning_rate": 0.00015497483252450186, + "loss": 2.575, + "step": 6363 + }, + { + "epoch": 0.5135985796142362, + "grad_norm": 0.7262336015701294, + "learning_rate": 0.00015496164456480912, + "loss": 2.6815, + "step": 6364 + }, + { + "epoch": 0.5136792833508191, + "grad_norm": 0.7353699803352356, + "learning_rate": 0.0001549484552353285, + "loss": 2.6172, + "step": 6365 + }, + { + "epoch": 0.5137599870874021, + "grad_norm": 0.7005086541175842, + "learning_rate": 0.00015493526453638879, + "loss": 2.5945, + "step": 6366 + }, + { + "epoch": 0.5138406908239852, + "grad_norm": 0.7469770908355713, + "learning_rate": 0.00015492207246831864, + "loss": 2.6797, + "step": 6367 + }, + { + "epoch": 0.5139213945605682, + "grad_norm": 0.6768934726715088, + "learning_rate": 0.00015490887903144693, + "loss": 2.6369, + "step": 6368 + }, + { + "epoch": 0.5140020982971512, + "grad_norm": 0.7625820636749268, + "learning_rate": 0.00015489568422610237, + "loss": 2.6182, + "step": 6369 + }, + { + "epoch": 0.5140828020337341, + "grad_norm": 0.749351978302002, + "learning_rate": 0.00015488248805261388, + "loss": 2.6066, + "step": 6370 + }, + { + "epoch": 0.5141635057703172, + "grad_norm": 0.8369480967521667, + "learning_rate": 0.00015486929051131032, + "loss": 2.7627, + "step": 6371 + }, + { + "epoch": 0.5142442095069002, + "grad_norm": 0.6482037305831909, + "learning_rate": 0.0001548560916025206, + "loss": 2.609, + "step": 6372 + }, + { + "epoch": 0.5143249132434832, + "grad_norm": 0.6801851391792297, + "learning_rate": 0.0001548428913265737, + "loss": 2.5878, + "step": 6373 + }, + { + "epoch": 0.5144056169800661, + "grad_norm": 0.744926929473877, + "learning_rate": 0.0001548296896837986, + "loss": 2.6569, + "step": 6374 + }, + { + "epoch": 0.5144863207166491, + "grad_norm": 0.6862614750862122, + "learning_rate": 0.00015481648667452425, + "loss": 2.5626, + "step": 6375 + }, + { + "epoch": 0.5145670244532322, + "grad_norm": 0.7186449766159058, + "learning_rate": 0.0001548032822990798, + "loss": 2.6783, + "step": 6376 + }, + { + "epoch": 0.5146477281898152, + "grad_norm": 0.699715256690979, + "learning_rate": 0.0001547900765577943, + "loss": 2.6709, + "step": 6377 + }, + { + "epoch": 0.5147284319263982, + "grad_norm": 0.7272205352783203, + "learning_rate": 0.00015477686945099687, + "loss": 2.6076, + "step": 6378 + }, + { + "epoch": 0.5148091356629811, + "grad_norm": 0.7667459845542908, + "learning_rate": 0.00015476366097901667, + "loss": 2.6541, + "step": 6379 + }, + { + "epoch": 0.5148898393995642, + "grad_norm": 0.6538121700286865, + "learning_rate": 0.00015475045114218285, + "loss": 2.5806, + "step": 6380 + }, + { + "epoch": 0.5149705431361472, + "grad_norm": 0.7388994097709656, + "learning_rate": 0.00015473723994082473, + "loss": 2.6293, + "step": 6381 + }, + { + "epoch": 0.5150512468727302, + "grad_norm": 0.7044215202331543, + "learning_rate": 0.00015472402737527142, + "loss": 2.5755, + "step": 6382 + }, + { + "epoch": 0.5151319506093132, + "grad_norm": 0.6807994246482849, + "learning_rate": 0.00015471081344585236, + "loss": 2.6493, + "step": 6383 + }, + { + "epoch": 0.5152126543458962, + "grad_norm": 0.676278293132782, + "learning_rate": 0.00015469759815289681, + "loss": 2.6319, + "step": 6384 + }, + { + "epoch": 0.5152933580824792, + "grad_norm": 0.7515453696250916, + "learning_rate": 0.00015468438149673412, + "loss": 2.6415, + "step": 6385 + }, + { + "epoch": 0.5153740618190622, + "grad_norm": 0.8694239854812622, + "learning_rate": 0.0001546711634776937, + "loss": 2.5818, + "step": 6386 + }, + { + "epoch": 0.5154547655556452, + "grad_norm": 0.717090368270874, + "learning_rate": 0.000154657944096105, + "loss": 2.7132, + "step": 6387 + }, + { + "epoch": 0.5155354692922283, + "grad_norm": 0.7098804116249084, + "learning_rate": 0.00015464472335229742, + "loss": 2.564, + "step": 6388 + }, + { + "epoch": 0.5156161730288112, + "grad_norm": 0.6879690289497375, + "learning_rate": 0.0001546315012466005, + "loss": 2.6094, + "step": 6389 + }, + { + "epoch": 0.5156968767653942, + "grad_norm": 0.7110763788223267, + "learning_rate": 0.00015461827777934377, + "loss": 2.5982, + "step": 6390 + }, + { + "epoch": 0.5157775805019772, + "grad_norm": 0.7168039679527283, + "learning_rate": 0.00015460505295085677, + "loss": 2.5451, + "step": 6391 + }, + { + "epoch": 0.5158582842385603, + "grad_norm": 0.7059877514839172, + "learning_rate": 0.00015459182676146914, + "loss": 2.6655, + "step": 6392 + }, + { + "epoch": 0.5159389879751433, + "grad_norm": 0.7278143763542175, + "learning_rate": 0.00015457859921151043, + "loss": 2.6587, + "step": 6393 + }, + { + "epoch": 0.5160196917117262, + "grad_norm": 0.7301023602485657, + "learning_rate": 0.0001545653703013104, + "loss": 2.7672, + "step": 6394 + }, + { + "epoch": 0.5161003954483092, + "grad_norm": 0.6933302283287048, + "learning_rate": 0.0001545521400311987, + "loss": 2.5924, + "step": 6395 + }, + { + "epoch": 0.5161810991848923, + "grad_norm": 0.7074775099754333, + "learning_rate": 0.00015453890840150508, + "loss": 2.6663, + "step": 6396 + }, + { + "epoch": 0.5162618029214753, + "grad_norm": 0.7069801092147827, + "learning_rate": 0.00015452567541255924, + "loss": 2.6791, + "step": 6397 + }, + { + "epoch": 0.5163425066580583, + "grad_norm": 0.6586462259292603, + "learning_rate": 0.00015451244106469108, + "loss": 2.6368, + "step": 6398 + }, + { + "epoch": 0.5164232103946412, + "grad_norm": 0.6862531900405884, + "learning_rate": 0.00015449920535823042, + "loss": 2.7099, + "step": 6399 + }, + { + "epoch": 0.5165039141312243, + "grad_norm": 0.7177795767784119, + "learning_rate": 0.00015448596829350706, + "loss": 2.5921, + "step": 6400 + }, + { + "epoch": 0.5165846178678073, + "grad_norm": 0.6936569213867188, + "learning_rate": 0.00015447272987085094, + "loss": 2.5739, + "step": 6401 + }, + { + "epoch": 0.5166653216043903, + "grad_norm": 0.7394363284111023, + "learning_rate": 0.00015445949009059202, + "loss": 2.5941, + "step": 6402 + }, + { + "epoch": 0.5167460253409732, + "grad_norm": 0.6713366508483887, + "learning_rate": 0.00015444624895306027, + "loss": 2.574, + "step": 6403 + }, + { + "epoch": 0.5168267290775563, + "grad_norm": 0.679128885269165, + "learning_rate": 0.0001544330064585856, + "loss": 2.6422, + "step": 6404 + }, + { + "epoch": 0.5169074328141393, + "grad_norm": 0.6803367137908936, + "learning_rate": 0.0001544197626074982, + "loss": 2.6503, + "step": 6405 + }, + { + "epoch": 0.5169881365507223, + "grad_norm": 0.8009794354438782, + "learning_rate": 0.000154406517400128, + "loss": 2.6434, + "step": 6406 + }, + { + "epoch": 0.5170688402873053, + "grad_norm": 0.7292529344558716, + "learning_rate": 0.00015439327083680517, + "loss": 2.6333, + "step": 6407 + }, + { + "epoch": 0.5171495440238884, + "grad_norm": 0.67046719789505, + "learning_rate": 0.00015438002291785988, + "loss": 2.5791, + "step": 6408 + }, + { + "epoch": 0.5172302477604713, + "grad_norm": 0.755501925945282, + "learning_rate": 0.00015436677364362225, + "loss": 2.5558, + "step": 6409 + }, + { + "epoch": 0.5173109514970543, + "grad_norm": 0.6957115530967712, + "learning_rate": 0.0001543535230144225, + "loss": 2.5839, + "step": 6410 + }, + { + "epoch": 0.5173916552336373, + "grad_norm": 0.6629074215888977, + "learning_rate": 0.0001543402710305909, + "loss": 2.6529, + "step": 6411 + }, + { + "epoch": 0.5174723589702204, + "grad_norm": 0.6647019386291504, + "learning_rate": 0.00015432701769245766, + "loss": 2.589, + "step": 6412 + }, + { + "epoch": 0.5175530627068033, + "grad_norm": 0.6472512483596802, + "learning_rate": 0.00015431376300035316, + "loss": 2.6184, + "step": 6413 + }, + { + "epoch": 0.5176337664433863, + "grad_norm": 0.6900136470794678, + "learning_rate": 0.0001543005069546077, + "loss": 2.7029, + "step": 6414 + }, + { + "epoch": 0.5177144701799693, + "grad_norm": 0.7702177166938782, + "learning_rate": 0.00015428724955555165, + "loss": 2.6189, + "step": 6415 + }, + { + "epoch": 0.5177951739165524, + "grad_norm": 0.641655445098877, + "learning_rate": 0.00015427399080351545, + "loss": 2.6486, + "step": 6416 + }, + { + "epoch": 0.5178758776531354, + "grad_norm": 0.6826485991477966, + "learning_rate": 0.00015426073069882952, + "loss": 2.6105, + "step": 6417 + }, + { + "epoch": 0.5179565813897183, + "grad_norm": 0.749812662601471, + "learning_rate": 0.00015424746924182434, + "loss": 2.5644, + "step": 6418 + }, + { + "epoch": 0.5180372851263013, + "grad_norm": 0.6737890243530273, + "learning_rate": 0.0001542342064328304, + "loss": 2.686, + "step": 6419 + }, + { + "epoch": 0.5181179888628844, + "grad_norm": 0.7131822109222412, + "learning_rate": 0.0001542209422721783, + "loss": 2.697, + "step": 6420 + }, + { + "epoch": 0.5181986925994674, + "grad_norm": 0.7543746829032898, + "learning_rate": 0.0001542076767601986, + "loss": 2.6349, + "step": 6421 + }, + { + "epoch": 0.5182793963360504, + "grad_norm": 0.7589309215545654, + "learning_rate": 0.00015419440989722184, + "loss": 2.63, + "step": 6422 + }, + { + "epoch": 0.5183601000726333, + "grad_norm": 0.7036365866661072, + "learning_rate": 0.00015418114168357872, + "loss": 2.605, + "step": 6423 + }, + { + "epoch": 0.5184408038092164, + "grad_norm": 0.733161985874176, + "learning_rate": 0.00015416787211959998, + "loss": 2.6708, + "step": 6424 + }, + { + "epoch": 0.5185215075457994, + "grad_norm": 0.6928101181983948, + "learning_rate": 0.00015415460120561623, + "loss": 2.6549, + "step": 6425 + }, + { + "epoch": 0.5186022112823824, + "grad_norm": 0.6557250022888184, + "learning_rate": 0.00015414132894195825, + "loss": 2.6185, + "step": 6426 + }, + { + "epoch": 0.5186829150189654, + "grad_norm": 0.7236297726631165, + "learning_rate": 0.00015412805532895684, + "loss": 2.6185, + "step": 6427 + }, + { + "epoch": 0.5187636187555483, + "grad_norm": 0.7194060683250427, + "learning_rate": 0.0001541147803669428, + "loss": 2.6123, + "step": 6428 + }, + { + "epoch": 0.5188443224921314, + "grad_norm": 0.7077342867851257, + "learning_rate": 0.00015410150405624696, + "loss": 2.6628, + "step": 6429 + }, + { + "epoch": 0.5189250262287144, + "grad_norm": 0.7036150693893433, + "learning_rate": 0.00015408822639720023, + "loss": 2.5966, + "step": 6430 + }, + { + "epoch": 0.5190057299652974, + "grad_norm": 0.7047349810600281, + "learning_rate": 0.00015407494739013352, + "loss": 2.6626, + "step": 6431 + }, + { + "epoch": 0.5190864337018803, + "grad_norm": 0.7537584900856018, + "learning_rate": 0.00015406166703537777, + "loss": 2.6452, + "step": 6432 + }, + { + "epoch": 0.5191671374384634, + "grad_norm": 0.7944707870483398, + "learning_rate": 0.00015404838533326394, + "loss": 2.6834, + "step": 6433 + }, + { + "epoch": 0.5192478411750464, + "grad_norm": 0.8602458238601685, + "learning_rate": 0.00015403510228412305, + "loss": 2.6238, + "step": 6434 + }, + { + "epoch": 0.5193285449116294, + "grad_norm": 0.7181896567344666, + "learning_rate": 0.0001540218178882862, + "loss": 2.652, + "step": 6435 + }, + { + "epoch": 0.5194092486482124, + "grad_norm": 0.7470960021018982, + "learning_rate": 0.0001540085321460844, + "loss": 2.6703, + "step": 6436 + }, + { + "epoch": 0.5194899523847955, + "grad_norm": 0.8249944448471069, + "learning_rate": 0.00015399524505784883, + "loss": 2.5945, + "step": 6437 + }, + { + "epoch": 0.5195706561213784, + "grad_norm": 0.7332444190979004, + "learning_rate": 0.00015398195662391057, + "loss": 2.6472, + "step": 6438 + }, + { + "epoch": 0.5196513598579614, + "grad_norm": 0.7727739810943604, + "learning_rate": 0.0001539686668446009, + "loss": 2.6276, + "step": 6439 + }, + { + "epoch": 0.5197320635945444, + "grad_norm": 0.7161617279052734, + "learning_rate": 0.00015395537572025094, + "loss": 2.624, + "step": 6440 + }, + { + "epoch": 0.5198127673311275, + "grad_norm": 0.7657529711723328, + "learning_rate": 0.00015394208325119198, + "loss": 2.6604, + "step": 6441 + }, + { + "epoch": 0.5198934710677104, + "grad_norm": 0.732904314994812, + "learning_rate": 0.00015392878943775527, + "loss": 2.6334, + "step": 6442 + }, + { + "epoch": 0.5199741748042934, + "grad_norm": 0.7058991193771362, + "learning_rate": 0.0001539154942802722, + "loss": 2.5936, + "step": 6443 + }, + { + "epoch": 0.5200548785408764, + "grad_norm": 0.7328821420669556, + "learning_rate": 0.00015390219777907405, + "loss": 2.5969, + "step": 6444 + }, + { + "epoch": 0.5201355822774595, + "grad_norm": 0.7899969220161438, + "learning_rate": 0.00015388889993449224, + "loss": 2.5856, + "step": 6445 + }, + { + "epoch": 0.5202162860140425, + "grad_norm": 0.6963860392570496, + "learning_rate": 0.00015387560074685817, + "loss": 2.6139, + "step": 6446 + }, + { + "epoch": 0.5202969897506254, + "grad_norm": 0.812053918838501, + "learning_rate": 0.00015386230021650327, + "loss": 2.716, + "step": 6447 + }, + { + "epoch": 0.5203776934872084, + "grad_norm": 0.766781210899353, + "learning_rate": 0.0001538489983437591, + "loss": 2.6509, + "step": 6448 + }, + { + "epoch": 0.5204583972237915, + "grad_norm": 0.6877299547195435, + "learning_rate": 0.00015383569512895712, + "loss": 2.6076, + "step": 6449 + }, + { + "epoch": 0.5205391009603745, + "grad_norm": 0.7009176015853882, + "learning_rate": 0.00015382239057242888, + "loss": 2.608, + "step": 6450 + }, + { + "epoch": 0.5206198046969575, + "grad_norm": 0.7187578678131104, + "learning_rate": 0.000153809084674506, + "loss": 2.5946, + "step": 6451 + }, + { + "epoch": 0.5207005084335404, + "grad_norm": 0.7242687344551086, + "learning_rate": 0.00015379577743552001, + "loss": 2.6752, + "step": 6452 + }, + { + "epoch": 0.5207812121701235, + "grad_norm": 0.7668174505233765, + "learning_rate": 0.00015378246885580266, + "loss": 2.6694, + "step": 6453 + }, + { + "epoch": 0.5208619159067065, + "grad_norm": 0.7676039338111877, + "learning_rate": 0.00015376915893568557, + "loss": 2.6379, + "step": 6454 + }, + { + "epoch": 0.5209426196432895, + "grad_norm": 0.7394412159919739, + "learning_rate": 0.00015375584767550053, + "loss": 2.6046, + "step": 6455 + }, + { + "epoch": 0.5210233233798724, + "grad_norm": 0.7246636748313904, + "learning_rate": 0.00015374253507557923, + "loss": 2.592, + "step": 6456 + }, + { + "epoch": 0.5211040271164555, + "grad_norm": 0.7121255993843079, + "learning_rate": 0.00015372922113625345, + "loss": 2.634, + "step": 6457 + }, + { + "epoch": 0.5211847308530385, + "grad_norm": 0.7378345131874084, + "learning_rate": 0.00015371590585785505, + "loss": 2.5753, + "step": 6458 + }, + { + "epoch": 0.5212654345896215, + "grad_norm": 0.6682030558586121, + "learning_rate": 0.00015370258924071587, + "loss": 2.6305, + "step": 6459 + }, + { + "epoch": 0.5213461383262045, + "grad_norm": 0.7164177894592285, + "learning_rate": 0.00015368927128516776, + "loss": 2.7188, + "step": 6460 + }, + { + "epoch": 0.5214268420627876, + "grad_norm": 0.7341115474700928, + "learning_rate": 0.00015367595199154273, + "loss": 2.6204, + "step": 6461 + }, + { + "epoch": 0.5215075457993705, + "grad_norm": 0.6781840920448303, + "learning_rate": 0.00015366263136017258, + "loss": 2.6104, + "step": 6462 + }, + { + "epoch": 0.5215882495359535, + "grad_norm": 0.7029077410697937, + "learning_rate": 0.0001536493093913894, + "loss": 2.6055, + "step": 6463 + }, + { + "epoch": 0.5216689532725365, + "grad_norm": 0.6958553194999695, + "learning_rate": 0.00015363598608552522, + "loss": 2.5991, + "step": 6464 + }, + { + "epoch": 0.5217496570091196, + "grad_norm": 0.6919750571250916, + "learning_rate": 0.00015362266144291207, + "loss": 2.6022, + "step": 6465 + }, + { + "epoch": 0.5218303607457025, + "grad_norm": 0.6980622410774231, + "learning_rate": 0.000153609335463882, + "loss": 2.6289, + "step": 6466 + }, + { + "epoch": 0.5219110644822855, + "grad_norm": 0.7468248009681702, + "learning_rate": 0.00015359600814876715, + "loss": 2.6327, + "step": 6467 + }, + { + "epoch": 0.5219917682188685, + "grad_norm": 0.7183729410171509, + "learning_rate": 0.00015358267949789966, + "loss": 2.6389, + "step": 6468 + }, + { + "epoch": 0.5220724719554516, + "grad_norm": 0.6558868885040283, + "learning_rate": 0.00015356934951161178, + "loss": 2.6261, + "step": 6469 + }, + { + "epoch": 0.5221531756920346, + "grad_norm": 0.8000216484069824, + "learning_rate": 0.00015355601819023562, + "loss": 2.6908, + "step": 6470 + }, + { + "epoch": 0.5222338794286175, + "grad_norm": 0.775056004524231, + "learning_rate": 0.00015354268553410355, + "loss": 2.6763, + "step": 6471 + }, + { + "epoch": 0.5223145831652005, + "grad_norm": 0.7345123291015625, + "learning_rate": 0.00015352935154354776, + "loss": 2.582, + "step": 6472 + }, + { + "epoch": 0.5223952869017836, + "grad_norm": 0.731311023235321, + "learning_rate": 0.0001535160162189006, + "loss": 2.6519, + "step": 6473 + }, + { + "epoch": 0.5224759906383666, + "grad_norm": 0.6481007933616638, + "learning_rate": 0.00015350267956049443, + "loss": 2.5695, + "step": 6474 + }, + { + "epoch": 0.5225566943749496, + "grad_norm": 0.7698814868927002, + "learning_rate": 0.00015348934156866163, + "loss": 2.5732, + "step": 6475 + }, + { + "epoch": 0.5226373981115325, + "grad_norm": 0.7404680848121643, + "learning_rate": 0.00015347600224373462, + "loss": 2.5826, + "step": 6476 + }, + { + "epoch": 0.5227181018481155, + "grad_norm": 0.6965613961219788, + "learning_rate": 0.00015346266158604584, + "loss": 2.6069, + "step": 6477 + }, + { + "epoch": 0.5227988055846986, + "grad_norm": 0.6611152291297913, + "learning_rate": 0.00015344931959592777, + "loss": 2.4937, + "step": 6478 + }, + { + "epoch": 0.5228795093212816, + "grad_norm": 0.7418150305747986, + "learning_rate": 0.00015343597627371296, + "loss": 2.5747, + "step": 6479 + }, + { + "epoch": 0.5229602130578646, + "grad_norm": 0.6847610473632812, + "learning_rate": 0.00015342263161973393, + "loss": 2.5906, + "step": 6480 + }, + { + "epoch": 0.5230409167944475, + "grad_norm": 0.7054881453514099, + "learning_rate": 0.00015340928563432326, + "loss": 2.5914, + "step": 6481 + }, + { + "epoch": 0.5231216205310306, + "grad_norm": 0.6918888092041016, + "learning_rate": 0.0001533959383178136, + "loss": 2.6412, + "step": 6482 + }, + { + "epoch": 0.5232023242676136, + "grad_norm": 0.7232856154441833, + "learning_rate": 0.00015338258967053755, + "loss": 2.6364, + "step": 6483 + }, + { + "epoch": 0.5232830280041966, + "grad_norm": 0.7345031499862671, + "learning_rate": 0.00015336923969282786, + "loss": 2.6649, + "step": 6484 + }, + { + "epoch": 0.5233637317407795, + "grad_norm": 0.7644383907318115, + "learning_rate": 0.0001533558883850172, + "loss": 2.6949, + "step": 6485 + }, + { + "epoch": 0.5234444354773626, + "grad_norm": 0.6532372832298279, + "learning_rate": 0.0001533425357474383, + "loss": 2.5915, + "step": 6486 + }, + { + "epoch": 0.5235251392139456, + "grad_norm": 0.7089118361473083, + "learning_rate": 0.000153329181780424, + "loss": 2.6446, + "step": 6487 + }, + { + "epoch": 0.5236058429505286, + "grad_norm": 0.6966068148612976, + "learning_rate": 0.00015331582648430705, + "loss": 2.6764, + "step": 6488 + }, + { + "epoch": 0.5236865466871116, + "grad_norm": 0.7130835056304932, + "learning_rate": 0.00015330246985942035, + "loss": 2.6279, + "step": 6489 + }, + { + "epoch": 0.5237672504236947, + "grad_norm": 0.729727029800415, + "learning_rate": 0.00015328911190609678, + "loss": 2.612, + "step": 6490 + }, + { + "epoch": 0.5238479541602776, + "grad_norm": 0.6804213523864746, + "learning_rate": 0.0001532757526246692, + "loss": 2.6113, + "step": 6491 + }, + { + "epoch": 0.5239286578968606, + "grad_norm": 0.7324437499046326, + "learning_rate": 0.0001532623920154707, + "loss": 2.6054, + "step": 6492 + }, + { + "epoch": 0.5240093616334436, + "grad_norm": 0.6166699528694153, + "learning_rate": 0.00015324903007883406, + "loss": 2.5822, + "step": 6493 + }, + { + "epoch": 0.5240900653700267, + "grad_norm": 0.7339944839477539, + "learning_rate": 0.00015323566681509242, + "loss": 2.6204, + "step": 6494 + }, + { + "epoch": 0.5241707691066096, + "grad_norm": 0.7267727255821228, + "learning_rate": 0.00015322230222457886, + "loss": 2.6094, + "step": 6495 + }, + { + "epoch": 0.5242514728431926, + "grad_norm": 0.6417120695114136, + "learning_rate": 0.00015320893630762635, + "loss": 2.6044, + "step": 6496 + }, + { + "epoch": 0.5243321765797756, + "grad_norm": 0.7092922329902649, + "learning_rate": 0.00015319556906456808, + "loss": 2.6428, + "step": 6497 + }, + { + "epoch": 0.5244128803163587, + "grad_norm": 0.7482922673225403, + "learning_rate": 0.00015318220049573714, + "loss": 2.6025, + "step": 6498 + }, + { + "epoch": 0.5244935840529417, + "grad_norm": 0.691925048828125, + "learning_rate": 0.00015316883060146675, + "loss": 2.6308, + "step": 6499 + }, + { + "epoch": 0.5245742877895246, + "grad_norm": 0.7084488272666931, + "learning_rate": 0.00015315545938209015, + "loss": 2.6535, + "step": 6500 + }, + { + "epoch": 0.5246549915261076, + "grad_norm": 0.7182802557945251, + "learning_rate": 0.00015314208683794056, + "loss": 2.6045, + "step": 6501 + }, + { + "epoch": 0.5247356952626907, + "grad_norm": 0.7043096423149109, + "learning_rate": 0.00015312871296935122, + "loss": 2.6465, + "step": 6502 + }, + { + "epoch": 0.5248163989992737, + "grad_norm": 0.7679466009140015, + "learning_rate": 0.00015311533777665547, + "loss": 2.6624, + "step": 6503 + }, + { + "epoch": 0.5248971027358567, + "grad_norm": 0.6825870275497437, + "learning_rate": 0.00015310196126018668, + "loss": 2.5548, + "step": 6504 + }, + { + "epoch": 0.5249778064724396, + "grad_norm": 0.7364058494567871, + "learning_rate": 0.00015308858342027816, + "loss": 2.6495, + "step": 6505 + }, + { + "epoch": 0.5250585102090227, + "grad_norm": 0.7333239316940308, + "learning_rate": 0.00015307520425726341, + "loss": 2.5835, + "step": 6506 + }, + { + "epoch": 0.5251392139456057, + "grad_norm": 0.7479620575904846, + "learning_rate": 0.00015306182377147583, + "loss": 2.6065, + "step": 6507 + }, + { + "epoch": 0.5252199176821887, + "grad_norm": 0.7347591519355774, + "learning_rate": 0.00015304844196324888, + "loss": 2.6624, + "step": 6508 + }, + { + "epoch": 0.5253006214187717, + "grad_norm": 0.6879193782806396, + "learning_rate": 0.0001530350588329161, + "loss": 2.6598, + "step": 6509 + }, + { + "epoch": 0.5253813251553547, + "grad_norm": 0.7841597199440002, + "learning_rate": 0.000153021674380811, + "loss": 2.53, + "step": 6510 + }, + { + "epoch": 0.5254620288919377, + "grad_norm": 0.7916845679283142, + "learning_rate": 0.0001530082886072672, + "loss": 2.6995, + "step": 6511 + }, + { + "epoch": 0.5255427326285207, + "grad_norm": 0.7066318988800049, + "learning_rate": 0.0001529949015126183, + "loss": 2.58, + "step": 6512 + }, + { + "epoch": 0.5256234363651037, + "grad_norm": 0.6871134638786316, + "learning_rate": 0.00015298151309719787, + "loss": 2.6095, + "step": 6513 + }, + { + "epoch": 0.5257041401016868, + "grad_norm": 0.7479702830314636, + "learning_rate": 0.00015296812336133963, + "loss": 2.608, + "step": 6514 + }, + { + "epoch": 0.5257848438382697, + "grad_norm": 0.6772119402885437, + "learning_rate": 0.00015295473230537735, + "loss": 2.5679, + "step": 6515 + }, + { + "epoch": 0.5258655475748527, + "grad_norm": 0.7365416884422302, + "learning_rate": 0.0001529413399296447, + "loss": 2.6722, + "step": 6516 + }, + { + "epoch": 0.5259462513114357, + "grad_norm": 0.7538040280342102, + "learning_rate": 0.00015292794623447545, + "loss": 2.5562, + "step": 6517 + }, + { + "epoch": 0.5260269550480188, + "grad_norm": 0.7471820712089539, + "learning_rate": 0.00015291455122020344, + "loss": 2.7079, + "step": 6518 + }, + { + "epoch": 0.5261076587846018, + "grad_norm": 0.7605932354927063, + "learning_rate": 0.00015290115488716247, + "loss": 2.6696, + "step": 6519 + }, + { + "epoch": 0.5261883625211847, + "grad_norm": 0.7081854939460754, + "learning_rate": 0.00015288775723568647, + "loss": 2.6502, + "step": 6520 + }, + { + "epoch": 0.5262690662577677, + "grad_norm": 0.7236372828483582, + "learning_rate": 0.0001528743582661093, + "loss": 2.662, + "step": 6521 + }, + { + "epoch": 0.5263497699943508, + "grad_norm": 0.6710047721862793, + "learning_rate": 0.0001528609579787649, + "loss": 2.5947, + "step": 6522 + }, + { + "epoch": 0.5264304737309338, + "grad_norm": 0.709381103515625, + "learning_rate": 0.00015284755637398726, + "loss": 2.5922, + "step": 6523 + }, + { + "epoch": 0.5265111774675167, + "grad_norm": 0.7029775381088257, + "learning_rate": 0.00015283415345211033, + "loss": 2.6777, + "step": 6524 + }, + { + "epoch": 0.5265918812040997, + "grad_norm": 0.7250857949256897, + "learning_rate": 0.00015282074921346825, + "loss": 2.6027, + "step": 6525 + }, + { + "epoch": 0.5266725849406828, + "grad_norm": 0.7192760705947876, + "learning_rate": 0.00015280734365839498, + "loss": 2.6544, + "step": 6526 + }, + { + "epoch": 0.5267532886772658, + "grad_norm": 0.693583071231842, + "learning_rate": 0.0001527939367872247, + "loss": 2.6302, + "step": 6527 + }, + { + "epoch": 0.5268339924138488, + "grad_norm": 0.7031428217887878, + "learning_rate": 0.00015278052860029145, + "loss": 2.6944, + "step": 6528 + }, + { + "epoch": 0.5269146961504317, + "grad_norm": 0.6986895799636841, + "learning_rate": 0.00015276711909792949, + "loss": 2.6595, + "step": 6529 + }, + { + "epoch": 0.5269953998870147, + "grad_norm": 0.7375979423522949, + "learning_rate": 0.000152753708280473, + "loss": 2.6839, + "step": 6530 + }, + { + "epoch": 0.5270761036235978, + "grad_norm": 0.7126755714416504, + "learning_rate": 0.0001527402961482562, + "loss": 2.5597, + "step": 6531 + }, + { + "epoch": 0.5271568073601808, + "grad_norm": 0.6631070971488953, + "learning_rate": 0.00015272688270161338, + "loss": 2.5566, + "step": 6532 + }, + { + "epoch": 0.5272375110967638, + "grad_norm": 0.6896609663963318, + "learning_rate": 0.00015271346794087874, + "loss": 2.5801, + "step": 6533 + }, + { + "epoch": 0.5273182148333467, + "grad_norm": 0.7437502145767212, + "learning_rate": 0.00015270005186638673, + "loss": 2.6572, + "step": 6534 + }, + { + "epoch": 0.5273989185699298, + "grad_norm": 0.7013052701950073, + "learning_rate": 0.00015268663447847166, + "loss": 2.621, + "step": 6535 + }, + { + "epoch": 0.5274796223065128, + "grad_norm": 0.7161773443222046, + "learning_rate": 0.00015267321577746795, + "loss": 2.5989, + "step": 6536 + }, + { + "epoch": 0.5275603260430958, + "grad_norm": 0.7654534578323364, + "learning_rate": 0.00015265979576371, + "loss": 2.6338, + "step": 6537 + }, + { + "epoch": 0.5276410297796787, + "grad_norm": 0.694646954536438, + "learning_rate": 0.0001526463744375323, + "loss": 2.6036, + "step": 6538 + }, + { + "epoch": 0.5277217335162618, + "grad_norm": 0.6594679355621338, + "learning_rate": 0.0001526329517992693, + "loss": 2.6256, + "step": 6539 + }, + { + "epoch": 0.5278024372528448, + "grad_norm": 0.6424389481544495, + "learning_rate": 0.00015261952784925557, + "loss": 2.6389, + "step": 6540 + }, + { + "epoch": 0.5278831409894278, + "grad_norm": 0.7465235590934753, + "learning_rate": 0.0001526061025878257, + "loss": 2.5449, + "step": 6541 + }, + { + "epoch": 0.5279638447260108, + "grad_norm": 0.6900132298469543, + "learning_rate": 0.0001525926760153142, + "loss": 2.5597, + "step": 6542 + }, + { + "epoch": 0.5280445484625939, + "grad_norm": 0.7505282163619995, + "learning_rate": 0.00015257924813205572, + "loss": 2.6526, + "step": 6543 + }, + { + "epoch": 0.5281252521991768, + "grad_norm": 0.72642582654953, + "learning_rate": 0.00015256581893838495, + "loss": 2.6593, + "step": 6544 + }, + { + "epoch": 0.5282059559357598, + "grad_norm": 0.6901132464408875, + "learning_rate": 0.00015255238843463656, + "loss": 2.6726, + "step": 6545 + }, + { + "epoch": 0.5282866596723428, + "grad_norm": 0.7741395831108093, + "learning_rate": 0.0001525389566211453, + "loss": 2.5929, + "step": 6546 + }, + { + "epoch": 0.5283673634089259, + "grad_norm": 0.7282403111457825, + "learning_rate": 0.00015252552349824585, + "loss": 2.5696, + "step": 6547 + }, + { + "epoch": 0.5284480671455088, + "grad_norm": 0.7421764731407166, + "learning_rate": 0.0001525120890662731, + "loss": 2.5593, + "step": 6548 + }, + { + "epoch": 0.5285287708820918, + "grad_norm": 0.6830468773841858, + "learning_rate": 0.00015249865332556182, + "loss": 2.6396, + "step": 6549 + }, + { + "epoch": 0.5286094746186748, + "grad_norm": 0.6758440732955933, + "learning_rate": 0.00015248521627644684, + "loss": 2.5375, + "step": 6550 + }, + { + "epoch": 0.5286901783552579, + "grad_norm": 0.6897253394126892, + "learning_rate": 0.00015247177791926308, + "loss": 2.6148, + "step": 6551 + }, + { + "epoch": 0.5287708820918409, + "grad_norm": 0.6391426920890808, + "learning_rate": 0.00015245833825434547, + "loss": 2.5563, + "step": 6552 + }, + { + "epoch": 0.5288515858284238, + "grad_norm": 0.7213610410690308, + "learning_rate": 0.00015244489728202893, + "loss": 2.6158, + "step": 6553 + }, + { + "epoch": 0.5289322895650068, + "grad_norm": 0.6678160429000854, + "learning_rate": 0.00015243145500264845, + "loss": 2.6177, + "step": 6554 + }, + { + "epoch": 0.5290129933015899, + "grad_norm": 0.7041724324226379, + "learning_rate": 0.00015241801141653905, + "loss": 2.6504, + "step": 6555 + }, + { + "epoch": 0.5290936970381729, + "grad_norm": 0.6551648378372192, + "learning_rate": 0.0001524045665240358, + "loss": 2.577, + "step": 6556 + }, + { + "epoch": 0.5291744007747559, + "grad_norm": 0.7190412878990173, + "learning_rate": 0.00015239112032547377, + "loss": 2.596, + "step": 6557 + }, + { + "epoch": 0.5292551045113388, + "grad_norm": 0.6936302781105042, + "learning_rate": 0.00015237767282118807, + "loss": 2.6551, + "step": 6558 + }, + { + "epoch": 0.5293358082479219, + "grad_norm": 0.6901839971542358, + "learning_rate": 0.0001523642240115138, + "loss": 2.6263, + "step": 6559 + }, + { + "epoch": 0.5294165119845049, + "grad_norm": 0.6905068159103394, + "learning_rate": 0.00015235077389678624, + "loss": 2.6323, + "step": 6560 + }, + { + "epoch": 0.5294972157210879, + "grad_norm": 0.7495188117027283, + "learning_rate": 0.00015233732247734057, + "loss": 2.6243, + "step": 6561 + }, + { + "epoch": 0.5295779194576709, + "grad_norm": 0.6758708357810974, + "learning_rate": 0.00015232386975351197, + "loss": 2.6184, + "step": 6562 + }, + { + "epoch": 0.5296586231942539, + "grad_norm": 0.6443266868591309, + "learning_rate": 0.00015231041572563573, + "loss": 2.6543, + "step": 6563 + }, + { + "epoch": 0.5297393269308369, + "grad_norm": 0.7384275794029236, + "learning_rate": 0.00015229696039404723, + "loss": 2.6117, + "step": 6564 + }, + { + "epoch": 0.5298200306674199, + "grad_norm": 0.6873897314071655, + "learning_rate": 0.00015228350375908178, + "loss": 2.5689, + "step": 6565 + }, + { + "epoch": 0.5299007344040029, + "grad_norm": 0.6715645790100098, + "learning_rate": 0.00015227004582107472, + "loss": 2.5943, + "step": 6566 + }, + { + "epoch": 0.529981438140586, + "grad_norm": 0.6814208030700684, + "learning_rate": 0.00015225658658036151, + "loss": 2.5562, + "step": 6567 + }, + { + "epoch": 0.5300621418771689, + "grad_norm": 0.6942310929298401, + "learning_rate": 0.00015224312603727755, + "loss": 2.5902, + "step": 6568 + }, + { + "epoch": 0.5301428456137519, + "grad_norm": 0.6856299042701721, + "learning_rate": 0.0001522296641921583, + "loss": 2.6115, + "step": 6569 + }, + { + "epoch": 0.5302235493503349, + "grad_norm": 0.870833694934845, + "learning_rate": 0.0001522162010453393, + "loss": 2.7492, + "step": 6570 + }, + { + "epoch": 0.530304253086918, + "grad_norm": 0.6796989440917969, + "learning_rate": 0.0001522027365971561, + "loss": 2.6957, + "step": 6571 + }, + { + "epoch": 0.530384956823501, + "grad_norm": 0.7043026685714722, + "learning_rate": 0.00015218927084794423, + "loss": 2.604, + "step": 6572 + }, + { + "epoch": 0.5304656605600839, + "grad_norm": 0.7533933520317078, + "learning_rate": 0.00015217580379803933, + "loss": 2.6271, + "step": 6573 + }, + { + "epoch": 0.5305463642966669, + "grad_norm": 0.7526697516441345, + "learning_rate": 0.000152162335447777, + "loss": 2.553, + "step": 6574 + }, + { + "epoch": 0.53062706803325, + "grad_norm": 0.6942071318626404, + "learning_rate": 0.00015214886579749284, + "loss": 2.7206, + "step": 6575 + }, + { + "epoch": 0.530707771769833, + "grad_norm": 0.7133236527442932, + "learning_rate": 0.00015213539484752273, + "loss": 2.6545, + "step": 6576 + }, + { + "epoch": 0.530788475506416, + "grad_norm": 0.7229849696159363, + "learning_rate": 0.00015212192259820222, + "loss": 2.6647, + "step": 6577 + }, + { + "epoch": 0.5308691792429989, + "grad_norm": 0.7142449617385864, + "learning_rate": 0.0001521084490498672, + "loss": 2.5777, + "step": 6578 + }, + { + "epoch": 0.5309498829795819, + "grad_norm": 0.6950247287750244, + "learning_rate": 0.00015209497420285342, + "loss": 2.6159, + "step": 6579 + }, + { + "epoch": 0.531030586716165, + "grad_norm": 0.7492622137069702, + "learning_rate": 0.00015208149805749668, + "loss": 2.6927, + "step": 6580 + }, + { + "epoch": 0.531111290452748, + "grad_norm": 0.7618215084075928, + "learning_rate": 0.00015206802061413287, + "loss": 2.5831, + "step": 6581 + }, + { + "epoch": 0.5311919941893309, + "grad_norm": 0.7448660731315613, + "learning_rate": 0.0001520545418730979, + "loss": 2.6123, + "step": 6582 + }, + { + "epoch": 0.5312726979259139, + "grad_norm": 0.7450618147850037, + "learning_rate": 0.00015204106183472766, + "loss": 2.5768, + "step": 6583 + }, + { + "epoch": 0.531353401662497, + "grad_norm": 0.7426019310951233, + "learning_rate": 0.0001520275804993581, + "loss": 2.603, + "step": 6584 + }, + { + "epoch": 0.53143410539908, + "grad_norm": 0.7503333687782288, + "learning_rate": 0.00015201409786732526, + "loss": 2.6159, + "step": 6585 + }, + { + "epoch": 0.531514809135663, + "grad_norm": 0.6944373846054077, + "learning_rate": 0.00015200061393896513, + "loss": 2.5201, + "step": 6586 + }, + { + "epoch": 0.5315955128722459, + "grad_norm": 0.6958110332489014, + "learning_rate": 0.00015198712871461375, + "loss": 2.5592, + "step": 6587 + }, + { + "epoch": 0.531676216608829, + "grad_norm": 0.7838244438171387, + "learning_rate": 0.00015197364219460727, + "loss": 2.6663, + "step": 6588 + }, + { + "epoch": 0.531756920345412, + "grad_norm": 0.754338800907135, + "learning_rate": 0.00015196015437928174, + "loss": 2.6183, + "step": 6589 + }, + { + "epoch": 0.531837624081995, + "grad_norm": 0.7394337058067322, + "learning_rate": 0.00015194666526897332, + "loss": 2.5622, + "step": 6590 + }, + { + "epoch": 0.531918327818578, + "grad_norm": 0.7352069020271301, + "learning_rate": 0.00015193317486401824, + "loss": 2.6173, + "step": 6591 + }, + { + "epoch": 0.531999031555161, + "grad_norm": 0.6318944096565247, + "learning_rate": 0.00015191968316475267, + "loss": 2.6159, + "step": 6592 + }, + { + "epoch": 0.532079735291744, + "grad_norm": 0.7071281671524048, + "learning_rate": 0.00015190619017151291, + "loss": 2.633, + "step": 6593 + }, + { + "epoch": 0.532160439028327, + "grad_norm": 0.7762585282325745, + "learning_rate": 0.00015189269588463517, + "loss": 2.6445, + "step": 6594 + }, + { + "epoch": 0.53224114276491, + "grad_norm": 0.7979930639266968, + "learning_rate": 0.0001518792003044558, + "loss": 2.5825, + "step": 6595 + }, + { + "epoch": 0.5323218465014931, + "grad_norm": 0.7355580925941467, + "learning_rate": 0.00015186570343131114, + "loss": 2.6197, + "step": 6596 + }, + { + "epoch": 0.532402550238076, + "grad_norm": 0.7286938428878784, + "learning_rate": 0.0001518522052655376, + "loss": 2.6385, + "step": 6597 + }, + { + "epoch": 0.532483253974659, + "grad_norm": 0.689143180847168, + "learning_rate": 0.00015183870580747156, + "loss": 2.6593, + "step": 6598 + }, + { + "epoch": 0.532563957711242, + "grad_norm": 0.714746356010437, + "learning_rate": 0.00015182520505744945, + "loss": 2.6059, + "step": 6599 + }, + { + "epoch": 0.5326446614478251, + "grad_norm": 0.8055040240287781, + "learning_rate": 0.00015181170301580777, + "loss": 2.6983, + "step": 6600 + }, + { + "epoch": 0.532725365184408, + "grad_norm": 0.7104170918464661, + "learning_rate": 0.00015179819968288297, + "loss": 2.6578, + "step": 6601 + }, + { + "epoch": 0.532806068920991, + "grad_norm": 0.7175524830818176, + "learning_rate": 0.0001517846950590117, + "loss": 2.6263, + "step": 6602 + }, + { + "epoch": 0.532886772657574, + "grad_norm": 0.6755492091178894, + "learning_rate": 0.00015177118914453042, + "loss": 2.5752, + "step": 6603 + }, + { + "epoch": 0.5329674763941571, + "grad_norm": 0.7020289897918701, + "learning_rate": 0.00015175768193977578, + "loss": 2.6186, + "step": 6604 + }, + { + "epoch": 0.5330481801307401, + "grad_norm": 0.7550958395004272, + "learning_rate": 0.0001517441734450844, + "loss": 2.628, + "step": 6605 + }, + { + "epoch": 0.533128883867323, + "grad_norm": 0.6697603464126587, + "learning_rate": 0.00015173066366079297, + "loss": 2.6433, + "step": 6606 + }, + { + "epoch": 0.533209587603906, + "grad_norm": 0.715372622013092, + "learning_rate": 0.0001517171525872382, + "loss": 2.6022, + "step": 6607 + }, + { + "epoch": 0.5332902913404891, + "grad_norm": 0.7081933617591858, + "learning_rate": 0.00015170364022475675, + "loss": 2.675, + "step": 6608 + }, + { + "epoch": 0.5333709950770721, + "grad_norm": 0.7074152231216431, + "learning_rate": 0.00015169012657368546, + "loss": 2.6637, + "step": 6609 + }, + { + "epoch": 0.5334516988136551, + "grad_norm": 0.6692848801612854, + "learning_rate": 0.00015167661163436108, + "loss": 2.5855, + "step": 6610 + }, + { + "epoch": 0.533532402550238, + "grad_norm": 0.7307556867599487, + "learning_rate": 0.00015166309540712048, + "loss": 2.6105, + "step": 6611 + }, + { + "epoch": 0.5336131062868211, + "grad_norm": 0.7026669383049011, + "learning_rate": 0.00015164957789230048, + "loss": 2.6656, + "step": 6612 + }, + { + "epoch": 0.5336938100234041, + "grad_norm": 0.6579706072807312, + "learning_rate": 0.000151636059090238, + "loss": 2.6456, + "step": 6613 + }, + { + "epoch": 0.5337745137599871, + "grad_norm": 0.6854498386383057, + "learning_rate": 0.00015162253900126993, + "loss": 2.5969, + "step": 6614 + }, + { + "epoch": 0.5338552174965701, + "grad_norm": 0.7542434334754944, + "learning_rate": 0.00015160901762573323, + "loss": 2.6333, + "step": 6615 + }, + { + "epoch": 0.5339359212331531, + "grad_norm": 0.6795105934143066, + "learning_rate": 0.0001515954949639649, + "loss": 2.6268, + "step": 6616 + }, + { + "epoch": 0.5340166249697361, + "grad_norm": 0.6395254135131836, + "learning_rate": 0.000151581971016302, + "loss": 2.5684, + "step": 6617 + }, + { + "epoch": 0.5340973287063191, + "grad_norm": 0.7069850564002991, + "learning_rate": 0.00015156844578308155, + "loss": 2.64, + "step": 6618 + }, + { + "epoch": 0.5341780324429021, + "grad_norm": 0.6779203414916992, + "learning_rate": 0.0001515549192646406, + "loss": 2.6255, + "step": 6619 + }, + { + "epoch": 0.5342587361794852, + "grad_norm": 0.6403560638427734, + "learning_rate": 0.00015154139146131632, + "loss": 2.611, + "step": 6620 + }, + { + "epoch": 0.5343394399160681, + "grad_norm": 0.7532669901847839, + "learning_rate": 0.00015152786237344583, + "loss": 2.5641, + "step": 6621 + }, + { + "epoch": 0.5344201436526511, + "grad_norm": 0.6827573776245117, + "learning_rate": 0.00015151433200136629, + "loss": 2.6096, + "step": 6622 + }, + { + "epoch": 0.5345008473892341, + "grad_norm": 0.6691904067993164, + "learning_rate": 0.000151500800345415, + "loss": 2.6602, + "step": 6623 + }, + { + "epoch": 0.5345815511258172, + "grad_norm": 0.7288634777069092, + "learning_rate": 0.00015148726740592906, + "loss": 2.6468, + "step": 6624 + }, + { + "epoch": 0.5346622548624002, + "grad_norm": 0.7087839245796204, + "learning_rate": 0.00015147373318324586, + "loss": 2.5795, + "step": 6625 + }, + { + "epoch": 0.5347429585989831, + "grad_norm": 0.6618373394012451, + "learning_rate": 0.00015146019767770267, + "loss": 2.638, + "step": 6626 + }, + { + "epoch": 0.5348236623355661, + "grad_norm": 0.7384989857673645, + "learning_rate": 0.00015144666088963684, + "loss": 2.6104, + "step": 6627 + }, + { + "epoch": 0.5349043660721492, + "grad_norm": 0.6662275195121765, + "learning_rate": 0.00015143312281938576, + "loss": 2.6174, + "step": 6628 + }, + { + "epoch": 0.5349850698087322, + "grad_norm": 0.6617184281349182, + "learning_rate": 0.0001514195834672868, + "loss": 2.6154, + "step": 6629 + }, + { + "epoch": 0.5350657735453151, + "grad_norm": 0.7173622846603394, + "learning_rate": 0.0001514060428336774, + "loss": 2.5741, + "step": 6630 + }, + { + "epoch": 0.5351464772818981, + "grad_norm": 0.7773584127426147, + "learning_rate": 0.00015139250091889502, + "loss": 2.6333, + "step": 6631 + }, + { + "epoch": 0.5352271810184811, + "grad_norm": 0.7255204916000366, + "learning_rate": 0.0001513789577232772, + "loss": 2.5459, + "step": 6632 + }, + { + "epoch": 0.5353078847550642, + "grad_norm": 0.7308403849601746, + "learning_rate": 0.00015136541324716144, + "loss": 2.5934, + "step": 6633 + }, + { + "epoch": 0.5353885884916472, + "grad_norm": 0.699367880821228, + "learning_rate": 0.0001513518674908853, + "loss": 2.6797, + "step": 6634 + }, + { + "epoch": 0.5354692922282301, + "grad_norm": 0.7236449718475342, + "learning_rate": 0.0001513383204547864, + "loss": 2.6289, + "step": 6635 + }, + { + "epoch": 0.5355499959648131, + "grad_norm": 0.6860557794570923, + "learning_rate": 0.00015132477213920234, + "loss": 2.6736, + "step": 6636 + }, + { + "epoch": 0.5356306997013962, + "grad_norm": 0.6724153161048889, + "learning_rate": 0.00015131122254447084, + "loss": 2.5581, + "step": 6637 + }, + { + "epoch": 0.5357114034379792, + "grad_norm": 0.6818630695343018, + "learning_rate": 0.00015129767167092949, + "loss": 2.5979, + "step": 6638 + }, + { + "epoch": 0.5357921071745622, + "grad_norm": 0.6956631541252136, + "learning_rate": 0.00015128411951891607, + "loss": 2.6116, + "step": 6639 + }, + { + "epoch": 0.5358728109111451, + "grad_norm": 0.6698076128959656, + "learning_rate": 0.00015127056608876837, + "loss": 2.65, + "step": 6640 + }, + { + "epoch": 0.5359535146477282, + "grad_norm": 0.7763264179229736, + "learning_rate": 0.00015125701138082415, + "loss": 2.6164, + "step": 6641 + }, + { + "epoch": 0.5360342183843112, + "grad_norm": 0.7148340940475464, + "learning_rate": 0.00015124345539542118, + "loss": 2.6467, + "step": 6642 + }, + { + "epoch": 0.5361149221208942, + "grad_norm": 0.7350041270256042, + "learning_rate": 0.00015122989813289733, + "loss": 2.6477, + "step": 6643 + }, + { + "epoch": 0.5361956258574772, + "grad_norm": 0.6993441581726074, + "learning_rate": 0.00015121633959359055, + "loss": 2.7526, + "step": 6644 + }, + { + "epoch": 0.5362763295940602, + "grad_norm": 0.6828470826148987, + "learning_rate": 0.00015120277977783873, + "loss": 2.6439, + "step": 6645 + }, + { + "epoch": 0.5363570333306432, + "grad_norm": 0.7076796889305115, + "learning_rate": 0.0001511892186859797, + "loss": 2.6375, + "step": 6646 + }, + { + "epoch": 0.5364377370672262, + "grad_norm": 0.6830769777297974, + "learning_rate": 0.0001511756563183516, + "loss": 2.6052, + "step": 6647 + }, + { + "epoch": 0.5365184408038092, + "grad_norm": 0.6482179760932922, + "learning_rate": 0.00015116209267529237, + "loss": 2.6251, + "step": 6648 + }, + { + "epoch": 0.5365991445403923, + "grad_norm": 0.6687620878219604, + "learning_rate": 0.00015114852775714, + "loss": 2.659, + "step": 6649 + }, + { + "epoch": 0.5366798482769752, + "grad_norm": 0.734108030796051, + "learning_rate": 0.0001511349615642327, + "loss": 2.6542, + "step": 6650 + }, + { + "epoch": 0.5367605520135582, + "grad_norm": 0.7092111706733704, + "learning_rate": 0.00015112139409690842, + "loss": 2.6228, + "step": 6651 + }, + { + "epoch": 0.5368412557501412, + "grad_norm": 0.6544996500015259, + "learning_rate": 0.0001511078253555054, + "loss": 2.5661, + "step": 6652 + }, + { + "epoch": 0.5369219594867243, + "grad_norm": 0.7012531161308289, + "learning_rate": 0.00015109425534036176, + "loss": 2.6447, + "step": 6653 + }, + { + "epoch": 0.5370026632233073, + "grad_norm": 0.6813335418701172, + "learning_rate": 0.0001510806840518157, + "loss": 2.5723, + "step": 6654 + }, + { + "epoch": 0.5370833669598902, + "grad_norm": 0.6711288094520569, + "learning_rate": 0.0001510671114902055, + "loss": 2.6096, + "step": 6655 + }, + { + "epoch": 0.5371640706964732, + "grad_norm": 0.721866250038147, + "learning_rate": 0.00015105353765586935, + "loss": 2.6167, + "step": 6656 + }, + { + "epoch": 0.5372447744330563, + "grad_norm": 0.8140639066696167, + "learning_rate": 0.00015103996254914562, + "loss": 2.5768, + "step": 6657 + }, + { + "epoch": 0.5373254781696393, + "grad_norm": 0.6859177947044373, + "learning_rate": 0.0001510263861703726, + "loss": 2.5638, + "step": 6658 + }, + { + "epoch": 0.5374061819062222, + "grad_norm": 0.7254204154014587, + "learning_rate": 0.00015101280851988864, + "loss": 2.5855, + "step": 6659 + }, + { + "epoch": 0.5374868856428052, + "grad_norm": 0.7181829810142517, + "learning_rate": 0.00015099922959803218, + "loss": 2.5358, + "step": 6660 + }, + { + "epoch": 0.5375675893793883, + "grad_norm": 0.7092663645744324, + "learning_rate": 0.00015098564940514155, + "loss": 2.679, + "step": 6661 + }, + { + "epoch": 0.5376482931159713, + "grad_norm": 0.7126225233078003, + "learning_rate": 0.00015097206794155527, + "loss": 2.6167, + "step": 6662 + }, + { + "epoch": 0.5377289968525543, + "grad_norm": 0.7469925880432129, + "learning_rate": 0.00015095848520761186, + "loss": 2.5906, + "step": 6663 + }, + { + "epoch": 0.5378097005891372, + "grad_norm": 0.6911186575889587, + "learning_rate": 0.00015094490120364973, + "loss": 2.6488, + "step": 6664 + }, + { + "epoch": 0.5378904043257203, + "grad_norm": 0.6579635143280029, + "learning_rate": 0.00015093131593000753, + "loss": 2.5894, + "step": 6665 + }, + { + "epoch": 0.5379711080623033, + "grad_norm": 0.7107242345809937, + "learning_rate": 0.00015091772938702377, + "loss": 2.6568, + "step": 6666 + }, + { + "epoch": 0.5380518117988863, + "grad_norm": 0.6845428943634033, + "learning_rate": 0.00015090414157503714, + "loss": 2.5697, + "step": 6667 + }, + { + "epoch": 0.5381325155354693, + "grad_norm": 0.6713212132453918, + "learning_rate": 0.00015089055249438622, + "loss": 2.5747, + "step": 6668 + }, + { + "epoch": 0.5382132192720523, + "grad_norm": 0.7091513276100159, + "learning_rate": 0.0001508769621454097, + "loss": 2.6765, + "step": 6669 + }, + { + "epoch": 0.5382939230086353, + "grad_norm": 0.7403436899185181, + "learning_rate": 0.00015086337052844627, + "loss": 2.6841, + "step": 6670 + }, + { + "epoch": 0.5383746267452183, + "grad_norm": 0.6745626330375671, + "learning_rate": 0.0001508497776438347, + "loss": 2.6436, + "step": 6671 + }, + { + "epoch": 0.5384553304818013, + "grad_norm": 0.7491294145584106, + "learning_rate": 0.00015083618349191372, + "loss": 2.6376, + "step": 6672 + }, + { + "epoch": 0.5385360342183844, + "grad_norm": 0.719761848449707, + "learning_rate": 0.00015082258807302222, + "loss": 2.5885, + "step": 6673 + }, + { + "epoch": 0.5386167379549673, + "grad_norm": 0.7302667498588562, + "learning_rate": 0.00015080899138749895, + "loss": 2.7019, + "step": 6674 + }, + { + "epoch": 0.5386974416915503, + "grad_norm": 0.7640584111213684, + "learning_rate": 0.0001507953934356828, + "loss": 2.6404, + "step": 6675 + }, + { + "epoch": 0.5387781454281333, + "grad_norm": 0.699515700340271, + "learning_rate": 0.0001507817942179127, + "loss": 2.6407, + "step": 6676 + }, + { + "epoch": 0.5388588491647164, + "grad_norm": 0.7305224537849426, + "learning_rate": 0.00015076819373452746, + "loss": 2.5994, + "step": 6677 + }, + { + "epoch": 0.5389395529012994, + "grad_norm": 0.7125952243804932, + "learning_rate": 0.00015075459198586616, + "loss": 2.6472, + "step": 6678 + }, + { + "epoch": 0.5390202566378823, + "grad_norm": 0.7077293395996094, + "learning_rate": 0.00015074098897226778, + "loss": 2.6168, + "step": 6679 + }, + { + "epoch": 0.5391009603744653, + "grad_norm": 0.6713843941688538, + "learning_rate": 0.00015072738469407127, + "loss": 2.5736, + "step": 6680 + }, + { + "epoch": 0.5391816641110483, + "grad_norm": 0.7101294994354248, + "learning_rate": 0.00015071377915161578, + "loss": 2.6994, + "step": 6681 + }, + { + "epoch": 0.5392623678476314, + "grad_norm": 0.7132740020751953, + "learning_rate": 0.00015070017234524032, + "loss": 2.586, + "step": 6682 + }, + { + "epoch": 0.5393430715842144, + "grad_norm": 0.7043401598930359, + "learning_rate": 0.00015068656427528402, + "loss": 2.6025, + "step": 6683 + }, + { + "epoch": 0.5394237753207973, + "grad_norm": 0.6831551194190979, + "learning_rate": 0.00015067295494208607, + "loss": 2.6183, + "step": 6684 + }, + { + "epoch": 0.5395044790573803, + "grad_norm": 0.7066370844841003, + "learning_rate": 0.0001506593443459856, + "loss": 2.6467, + "step": 6685 + }, + { + "epoch": 0.5395851827939634, + "grad_norm": 0.7908033132553101, + "learning_rate": 0.0001506457324873219, + "loss": 2.6929, + "step": 6686 + }, + { + "epoch": 0.5396658865305464, + "grad_norm": 0.7186774611473083, + "learning_rate": 0.00015063211936643407, + "loss": 2.5841, + "step": 6687 + }, + { + "epoch": 0.5397465902671293, + "grad_norm": 0.6634512543678284, + "learning_rate": 0.0001506185049836615, + "loss": 2.5517, + "step": 6688 + }, + { + "epoch": 0.5398272940037123, + "grad_norm": 0.734406590461731, + "learning_rate": 0.00015060488933934353, + "loss": 2.6317, + "step": 6689 + }, + { + "epoch": 0.5399079977402954, + "grad_norm": 0.7754772305488586, + "learning_rate": 0.00015059127243381937, + "loss": 2.6885, + "step": 6690 + }, + { + "epoch": 0.5399887014768784, + "grad_norm": 0.7636603713035583, + "learning_rate": 0.00015057765426742848, + "loss": 2.5767, + "step": 6691 + }, + { + "epoch": 0.5400694052134614, + "grad_norm": 0.6621577143669128, + "learning_rate": 0.00015056403484051017, + "loss": 2.5905, + "step": 6692 + }, + { + "epoch": 0.5401501089500443, + "grad_norm": 0.7605881094932556, + "learning_rate": 0.00015055041415340404, + "loss": 2.6166, + "step": 6693 + }, + { + "epoch": 0.5402308126866274, + "grad_norm": 0.7603485584259033, + "learning_rate": 0.0001505367922064494, + "loss": 2.6123, + "step": 6694 + }, + { + "epoch": 0.5403115164232104, + "grad_norm": 0.7021469473838806, + "learning_rate": 0.0001505231689999858, + "loss": 2.6754, + "step": 6695 + }, + { + "epoch": 0.5403922201597934, + "grad_norm": 0.7291955947875977, + "learning_rate": 0.00015050954453435273, + "loss": 2.6393, + "step": 6696 + }, + { + "epoch": 0.5404729238963764, + "grad_norm": 0.6658700704574585, + "learning_rate": 0.00015049591880988977, + "loss": 2.5888, + "step": 6697 + }, + { + "epoch": 0.5405536276329594, + "grad_norm": 0.7080146074295044, + "learning_rate": 0.00015048229182693657, + "loss": 2.6318, + "step": 6698 + }, + { + "epoch": 0.5406343313695424, + "grad_norm": 0.7440849542617798, + "learning_rate": 0.00015046866358583267, + "loss": 2.596, + "step": 6699 + }, + { + "epoch": 0.5407150351061254, + "grad_norm": 0.886578381061554, + "learning_rate": 0.00015045503408691775, + "loss": 2.6479, + "step": 6700 + }, + { + "epoch": 0.5407957388427084, + "grad_norm": 0.7221408486366272, + "learning_rate": 0.00015044140333053148, + "loss": 2.625, + "step": 6701 + }, + { + "epoch": 0.5408764425792915, + "grad_norm": 0.7193209528923035, + "learning_rate": 0.0001504277713170136, + "loss": 2.6044, + "step": 6702 + }, + { + "epoch": 0.5409571463158744, + "grad_norm": 0.7139819860458374, + "learning_rate": 0.00015041413804670384, + "loss": 2.5572, + "step": 6703 + }, + { + "epoch": 0.5410378500524574, + "grad_norm": 0.728875994682312, + "learning_rate": 0.00015040050351994196, + "loss": 2.6373, + "step": 6704 + }, + { + "epoch": 0.5411185537890404, + "grad_norm": 0.6794858574867249, + "learning_rate": 0.0001503868677370678, + "loss": 2.6265, + "step": 6705 + }, + { + "epoch": 0.5411992575256235, + "grad_norm": 0.6874774098396301, + "learning_rate": 0.00015037323069842117, + "loss": 2.6146, + "step": 6706 + }, + { + "epoch": 0.5412799612622065, + "grad_norm": 0.7064409255981445, + "learning_rate": 0.00015035959240434197, + "loss": 2.6126, + "step": 6707 + }, + { + "epoch": 0.5413606649987894, + "grad_norm": 0.7212977409362793, + "learning_rate": 0.00015034595285517006, + "loss": 2.6836, + "step": 6708 + }, + { + "epoch": 0.5414413687353724, + "grad_norm": 0.7826492190361023, + "learning_rate": 0.0001503323120512454, + "loss": 2.6648, + "step": 6709 + }, + { + "epoch": 0.5415220724719555, + "grad_norm": 0.7228415608406067, + "learning_rate": 0.000150318669992908, + "loss": 2.5734, + "step": 6710 + }, + { + "epoch": 0.5416027762085385, + "grad_norm": 0.6929590702056885, + "learning_rate": 0.00015030502668049778, + "loss": 2.6023, + "step": 6711 + }, + { + "epoch": 0.5416834799451214, + "grad_norm": 0.679990291595459, + "learning_rate": 0.0001502913821143548, + "loss": 2.5867, + "step": 6712 + }, + { + "epoch": 0.5417641836817044, + "grad_norm": 0.7324180603027344, + "learning_rate": 0.00015027773629481907, + "loss": 2.5722, + "step": 6713 + }, + { + "epoch": 0.5418448874182875, + "grad_norm": 0.686826765537262, + "learning_rate": 0.00015026408922223078, + "loss": 2.6138, + "step": 6714 + }, + { + "epoch": 0.5419255911548705, + "grad_norm": 0.7045193314552307, + "learning_rate": 0.00015025044089693, + "loss": 2.619, + "step": 6715 + }, + { + "epoch": 0.5420062948914535, + "grad_norm": 0.6839936375617981, + "learning_rate": 0.00015023679131925683, + "loss": 2.5778, + "step": 6716 + }, + { + "epoch": 0.5420869986280364, + "grad_norm": 0.7613961696624756, + "learning_rate": 0.00015022314048955153, + "loss": 2.6262, + "step": 6717 + }, + { + "epoch": 0.5421677023646195, + "grad_norm": 0.7867478728294373, + "learning_rate": 0.00015020948840815428, + "loss": 2.6576, + "step": 6718 + }, + { + "epoch": 0.5422484061012025, + "grad_norm": 0.7371038794517517, + "learning_rate": 0.0001501958350754053, + "loss": 2.6495, + "step": 6719 + }, + { + "epoch": 0.5423291098377855, + "grad_norm": 0.7146512269973755, + "learning_rate": 0.00015018218049164494, + "loss": 2.6514, + "step": 6720 + }, + { + "epoch": 0.5424098135743685, + "grad_norm": 0.7507650256156921, + "learning_rate": 0.00015016852465721346, + "loss": 2.6509, + "step": 6721 + }, + { + "epoch": 0.5424905173109515, + "grad_norm": 0.6786547303199768, + "learning_rate": 0.0001501548675724512, + "loss": 2.5983, + "step": 6722 + }, + { + "epoch": 0.5425712210475345, + "grad_norm": 0.7077932357788086, + "learning_rate": 0.0001501412092376985, + "loss": 2.622, + "step": 6723 + }, + { + "epoch": 0.5426519247841175, + "grad_norm": 0.7191271781921387, + "learning_rate": 0.00015012754965329584, + "loss": 2.6632, + "step": 6724 + }, + { + "epoch": 0.5427326285207005, + "grad_norm": 0.6785906553268433, + "learning_rate": 0.00015011388881958356, + "loss": 2.6312, + "step": 6725 + }, + { + "epoch": 0.5428133322572836, + "grad_norm": 0.6880263090133667, + "learning_rate": 0.00015010022673690222, + "loss": 2.5951, + "step": 6726 + }, + { + "epoch": 0.5428940359938665, + "grad_norm": 0.7769095301628113, + "learning_rate": 0.0001500865634055923, + "loss": 2.5503, + "step": 6727 + }, + { + "epoch": 0.5429747397304495, + "grad_norm": 0.6847476959228516, + "learning_rate": 0.0001500728988259942, + "loss": 2.6824, + "step": 6728 + }, + { + "epoch": 0.5430554434670325, + "grad_norm": 0.6829310059547424, + "learning_rate": 0.00015005923299844863, + "loss": 2.5683, + "step": 6729 + }, + { + "epoch": 0.5431361472036156, + "grad_norm": 0.7436082363128662, + "learning_rate": 0.0001500455659232961, + "loss": 2.6165, + "step": 6730 + }, + { + "epoch": 0.5432168509401986, + "grad_norm": 0.7876375913619995, + "learning_rate": 0.00015003189760087724, + "loss": 2.6203, + "step": 6731 + }, + { + "epoch": 0.5432975546767815, + "grad_norm": 0.6869253516197205, + "learning_rate": 0.0001500182280315327, + "loss": 2.6136, + "step": 6732 + }, + { + "epoch": 0.5433782584133645, + "grad_norm": 0.7179432511329651, + "learning_rate": 0.00015000455721560316, + "loss": 2.6049, + "step": 6733 + }, + { + "epoch": 0.5434589621499475, + "grad_norm": 0.7286917567253113, + "learning_rate": 0.00014999088515342939, + "loss": 2.5704, + "step": 6734 + }, + { + "epoch": 0.5435396658865306, + "grad_norm": 0.6841779351234436, + "learning_rate": 0.00014997721184535206, + "loss": 2.6095, + "step": 6735 + }, + { + "epoch": 0.5436203696231136, + "grad_norm": 0.7661791443824768, + "learning_rate": 0.00014996353729171196, + "loss": 2.6193, + "step": 6736 + }, + { + "epoch": 0.5437010733596965, + "grad_norm": 0.7365885376930237, + "learning_rate": 0.0001499498614928499, + "loss": 2.586, + "step": 6737 + }, + { + "epoch": 0.5437817770962795, + "grad_norm": 0.7423815131187439, + "learning_rate": 0.00014993618444910674, + "loss": 2.6199, + "step": 6738 + }, + { + "epoch": 0.5438624808328626, + "grad_norm": 0.7667781114578247, + "learning_rate": 0.0001499225061608233, + "loss": 2.6584, + "step": 6739 + }, + { + "epoch": 0.5439431845694456, + "grad_norm": 0.7148830890655518, + "learning_rate": 0.00014990882662834057, + "loss": 2.7172, + "step": 6740 + }, + { + "epoch": 0.5440238883060285, + "grad_norm": 0.7206205725669861, + "learning_rate": 0.00014989514585199936, + "loss": 2.5682, + "step": 6741 + }, + { + "epoch": 0.5441045920426115, + "grad_norm": 0.7306448221206665, + "learning_rate": 0.0001498814638321407, + "loss": 2.6724, + "step": 6742 + }, + { + "epoch": 0.5441852957791946, + "grad_norm": 0.7058824896812439, + "learning_rate": 0.00014986778056910556, + "loss": 2.6573, + "step": 6743 + }, + { + "epoch": 0.5442659995157776, + "grad_norm": 0.770588755607605, + "learning_rate": 0.000149854096063235, + "loss": 2.658, + "step": 6744 + }, + { + "epoch": 0.5443467032523606, + "grad_norm": 0.8283931612968445, + "learning_rate": 0.00014984041031487001, + "loss": 2.6624, + "step": 6745 + }, + { + "epoch": 0.5444274069889435, + "grad_norm": 0.6814693808555603, + "learning_rate": 0.00014982672332435176, + "loss": 2.5835, + "step": 6746 + }, + { + "epoch": 0.5445081107255266, + "grad_norm": 0.7059363722801208, + "learning_rate": 0.00014981303509202127, + "loss": 2.5977, + "step": 6747 + }, + { + "epoch": 0.5445888144621096, + "grad_norm": 0.6678106188774109, + "learning_rate": 0.00014979934561821975, + "loss": 2.6479, + "step": 6748 + }, + { + "epoch": 0.5446695181986926, + "grad_norm": 0.8167592883110046, + "learning_rate": 0.00014978565490328835, + "loss": 2.6529, + "step": 6749 + }, + { + "epoch": 0.5447502219352756, + "grad_norm": 0.807209849357605, + "learning_rate": 0.00014977196294756832, + "loss": 2.6546, + "step": 6750 + }, + { + "epoch": 0.5448309256718586, + "grad_norm": 0.7099517583847046, + "learning_rate": 0.00014975826975140085, + "loss": 2.6178, + "step": 6751 + }, + { + "epoch": 0.5449116294084416, + "grad_norm": 0.7900758981704712, + "learning_rate": 0.0001497445753151272, + "loss": 2.586, + "step": 6752 + }, + { + "epoch": 0.5449923331450246, + "grad_norm": 0.6826134920120239, + "learning_rate": 0.00014973087963908875, + "loss": 2.5914, + "step": 6753 + }, + { + "epoch": 0.5450730368816076, + "grad_norm": 0.7383863925933838, + "learning_rate": 0.0001497171827236268, + "loss": 2.6357, + "step": 6754 + }, + { + "epoch": 0.5451537406181907, + "grad_norm": 0.7208051085472107, + "learning_rate": 0.0001497034845690826, + "loss": 2.5435, + "step": 6755 + }, + { + "epoch": 0.5452344443547736, + "grad_norm": 0.680794894695282, + "learning_rate": 0.00014968978517579772, + "loss": 2.5691, + "step": 6756 + }, + { + "epoch": 0.5453151480913566, + "grad_norm": 0.680759847164154, + "learning_rate": 0.00014967608454411347, + "loss": 2.5761, + "step": 6757 + }, + { + "epoch": 0.5453958518279396, + "grad_norm": 0.719634473323822, + "learning_rate": 0.00014966238267437134, + "loss": 2.637, + "step": 6758 + }, + { + "epoch": 0.5454765555645227, + "grad_norm": 0.777302086353302, + "learning_rate": 0.0001496486795669128, + "loss": 2.6457, + "step": 6759 + }, + { + "epoch": 0.5455572593011057, + "grad_norm": 0.6875059604644775, + "learning_rate": 0.0001496349752220794, + "loss": 2.6116, + "step": 6760 + }, + { + "epoch": 0.5456379630376886, + "grad_norm": 0.6884258985519409, + "learning_rate": 0.0001496212696402127, + "loss": 2.5863, + "step": 6761 + }, + { + "epoch": 0.5457186667742716, + "grad_norm": 0.6667922139167786, + "learning_rate": 0.00014960756282165422, + "loss": 2.5892, + "step": 6762 + }, + { + "epoch": 0.5457993705108547, + "grad_norm": 0.6712725162506104, + "learning_rate": 0.00014959385476674559, + "loss": 2.5478, + "step": 6763 + }, + { + "epoch": 0.5458800742474377, + "grad_norm": 0.6803874969482422, + "learning_rate": 0.00014958014547582845, + "loss": 2.5785, + "step": 6764 + }, + { + "epoch": 0.5459607779840207, + "grad_norm": 0.6975811123847961, + "learning_rate": 0.0001495664349492445, + "loss": 2.5765, + "step": 6765 + }, + { + "epoch": 0.5460414817206036, + "grad_norm": 0.7676273584365845, + "learning_rate": 0.00014955272318733544, + "loss": 2.634, + "step": 6766 + }, + { + "epoch": 0.5461221854571867, + "grad_norm": 0.7044547200202942, + "learning_rate": 0.000149539010190443, + "loss": 2.646, + "step": 6767 + }, + { + "epoch": 0.5462028891937697, + "grad_norm": 0.7453166842460632, + "learning_rate": 0.00014952529595890887, + "loss": 2.6137, + "step": 6768 + }, + { + "epoch": 0.5462835929303527, + "grad_norm": 0.7281681299209595, + "learning_rate": 0.00014951158049307493, + "loss": 2.6558, + "step": 6769 + }, + { + "epoch": 0.5463642966669356, + "grad_norm": 0.7131047248840332, + "learning_rate": 0.00014949786379328298, + "loss": 2.6441, + "step": 6770 + }, + { + "epoch": 0.5464450004035187, + "grad_norm": 0.7072219848632812, + "learning_rate": 0.00014948414585987487, + "loss": 2.5861, + "step": 6771 + }, + { + "epoch": 0.5465257041401017, + "grad_norm": 0.7270335555076599, + "learning_rate": 0.00014947042669319252, + "loss": 2.6703, + "step": 6772 + }, + { + "epoch": 0.5466064078766847, + "grad_norm": 0.7314150929450989, + "learning_rate": 0.0001494567062935778, + "loss": 2.6101, + "step": 6773 + }, + { + "epoch": 0.5466871116132677, + "grad_norm": 0.8168460130691528, + "learning_rate": 0.00014944298466137266, + "loss": 2.662, + "step": 6774 + }, + { + "epoch": 0.5467678153498507, + "grad_norm": 0.7338390350341797, + "learning_rate": 0.00014942926179691913, + "loss": 2.6481, + "step": 6775 + }, + { + "epoch": 0.5468485190864337, + "grad_norm": 0.7065639495849609, + "learning_rate": 0.00014941553770055917, + "loss": 2.6192, + "step": 6776 + }, + { + "epoch": 0.5469292228230167, + "grad_norm": 0.7675396203994751, + "learning_rate": 0.00014940181237263483, + "loss": 2.5828, + "step": 6777 + }, + { + "epoch": 0.5470099265595997, + "grad_norm": 0.7085692286491394, + "learning_rate": 0.0001493880858134882, + "loss": 2.5815, + "step": 6778 + }, + { + "epoch": 0.5470906302961828, + "grad_norm": 0.757591187953949, + "learning_rate": 0.00014937435802346135, + "loss": 2.691, + "step": 6779 + }, + { + "epoch": 0.5471713340327657, + "grad_norm": 0.7299168705940247, + "learning_rate": 0.00014936062900289647, + "loss": 2.6246, + "step": 6780 + }, + { + "epoch": 0.5472520377693487, + "grad_norm": 0.693692684173584, + "learning_rate": 0.00014934689875213564, + "loss": 2.6149, + "step": 6781 + }, + { + "epoch": 0.5473327415059317, + "grad_norm": 0.733657956123352, + "learning_rate": 0.00014933316727152113, + "loss": 2.582, + "step": 6782 + }, + { + "epoch": 0.5474134452425147, + "grad_norm": 0.6881953477859497, + "learning_rate": 0.00014931943456139514, + "loss": 2.6023, + "step": 6783 + }, + { + "epoch": 0.5474941489790978, + "grad_norm": 0.7102411985397339, + "learning_rate": 0.00014930570062209988, + "loss": 2.6296, + "step": 6784 + }, + { + "epoch": 0.5475748527156807, + "grad_norm": 0.7263364791870117, + "learning_rate": 0.00014929196545397771, + "loss": 2.6414, + "step": 6785 + }, + { + "epoch": 0.5476555564522637, + "grad_norm": 0.7239066958427429, + "learning_rate": 0.00014927822905737092, + "loss": 2.6174, + "step": 6786 + }, + { + "epoch": 0.5477362601888467, + "grad_norm": 0.6909911632537842, + "learning_rate": 0.0001492644914326218, + "loss": 2.6036, + "step": 6787 + }, + { + "epoch": 0.5478169639254298, + "grad_norm": 0.719693124294281, + "learning_rate": 0.00014925075258007283, + "loss": 2.6507, + "step": 6788 + }, + { + "epoch": 0.5478976676620128, + "grad_norm": 0.7722225785255432, + "learning_rate": 0.0001492370125000663, + "loss": 2.6268, + "step": 6789 + }, + { + "epoch": 0.5479783713985957, + "grad_norm": 0.7456568479537964, + "learning_rate": 0.00014922327119294476, + "loss": 2.6426, + "step": 6790 + }, + { + "epoch": 0.5480590751351787, + "grad_norm": 0.7430242300033569, + "learning_rate": 0.00014920952865905062, + "loss": 2.6632, + "step": 6791 + }, + { + "epoch": 0.5481397788717618, + "grad_norm": 0.7363260388374329, + "learning_rate": 0.0001491957848987264, + "loss": 2.6021, + "step": 6792 + }, + { + "epoch": 0.5482204826083448, + "grad_norm": 0.6903972029685974, + "learning_rate": 0.00014918203991231462, + "loss": 2.6086, + "step": 6793 + }, + { + "epoch": 0.5483011863449277, + "grad_norm": 0.6765161752700806, + "learning_rate": 0.00014916829370015781, + "loss": 2.5806, + "step": 6794 + }, + { + "epoch": 0.5483818900815107, + "grad_norm": 0.7533403635025024, + "learning_rate": 0.0001491545462625986, + "loss": 2.6351, + "step": 6795 + }, + { + "epoch": 0.5484625938180938, + "grad_norm": 0.6841829419136047, + "learning_rate": 0.00014914079759997963, + "loss": 2.606, + "step": 6796 + }, + { + "epoch": 0.5485432975546768, + "grad_norm": 0.7671411037445068, + "learning_rate": 0.00014912704771264353, + "loss": 2.6645, + "step": 6797 + }, + { + "epoch": 0.5486240012912598, + "grad_norm": 0.7218797206878662, + "learning_rate": 0.00014911329660093295, + "loss": 2.6302, + "step": 6798 + }, + { + "epoch": 0.5487047050278427, + "grad_norm": 0.7269994020462036, + "learning_rate": 0.00014909954426519067, + "loss": 2.6261, + "step": 6799 + }, + { + "epoch": 0.5487854087644258, + "grad_norm": 0.765353262424469, + "learning_rate": 0.00014908579070575936, + "loss": 2.5787, + "step": 6800 + }, + { + "epoch": 0.5488661125010088, + "grad_norm": 0.6503065228462219, + "learning_rate": 0.00014907203592298189, + "loss": 2.6404, + "step": 6801 + }, + { + "epoch": 0.5489468162375918, + "grad_norm": 0.6869633197784424, + "learning_rate": 0.00014905827991720097, + "loss": 2.6463, + "step": 6802 + }, + { + "epoch": 0.5490275199741748, + "grad_norm": 0.7221426963806152, + "learning_rate": 0.00014904452268875947, + "loss": 2.6686, + "step": 6803 + }, + { + "epoch": 0.5491082237107578, + "grad_norm": 0.6781399250030518, + "learning_rate": 0.00014903076423800028, + "loss": 2.6274, + "step": 6804 + }, + { + "epoch": 0.5491889274473408, + "grad_norm": 0.7451084852218628, + "learning_rate": 0.00014901700456526626, + "loss": 2.6449, + "step": 6805 + }, + { + "epoch": 0.5492696311839238, + "grad_norm": 0.7159574627876282, + "learning_rate": 0.0001490032436709004, + "loss": 2.6664, + "step": 6806 + }, + { + "epoch": 0.5493503349205068, + "grad_norm": 0.724039614200592, + "learning_rate": 0.00014898948155524558, + "loss": 2.5816, + "step": 6807 + }, + { + "epoch": 0.5494310386570899, + "grad_norm": 0.7194633483886719, + "learning_rate": 0.0001489757182186448, + "loss": 2.5625, + "step": 6808 + }, + { + "epoch": 0.5495117423936728, + "grad_norm": 0.704133927822113, + "learning_rate": 0.0001489619536614411, + "loss": 2.6295, + "step": 6809 + }, + { + "epoch": 0.5495924461302558, + "grad_norm": 0.6717158555984497, + "learning_rate": 0.00014894818788397757, + "loss": 2.6168, + "step": 6810 + }, + { + "epoch": 0.5496731498668388, + "grad_norm": 0.7096573710441589, + "learning_rate": 0.0001489344208865972, + "loss": 2.6316, + "step": 6811 + }, + { + "epoch": 0.5497538536034219, + "grad_norm": 0.6383458375930786, + "learning_rate": 0.00014892065266964316, + "loss": 2.5577, + "step": 6812 + }, + { + "epoch": 0.5498345573400049, + "grad_norm": 0.7606377601623535, + "learning_rate": 0.0001489068832334586, + "loss": 2.7078, + "step": 6813 + }, + { + "epoch": 0.5499152610765878, + "grad_norm": 0.649162232875824, + "learning_rate": 0.00014889311257838665, + "loss": 2.6023, + "step": 6814 + }, + { + "epoch": 0.5499959648131708, + "grad_norm": 0.6445025205612183, + "learning_rate": 0.00014887934070477053, + "loss": 2.6, + "step": 6815 + }, + { + "epoch": 0.5500766685497539, + "grad_norm": 0.6873729825019836, + "learning_rate": 0.00014886556761295342, + "loss": 2.6398, + "step": 6816 + }, + { + "epoch": 0.5501573722863369, + "grad_norm": 0.7814947366714478, + "learning_rate": 0.0001488517933032787, + "loss": 2.5803, + "step": 6817 + }, + { + "epoch": 0.5502380760229199, + "grad_norm": 0.7140909433364868, + "learning_rate": 0.00014883801777608953, + "loss": 2.6051, + "step": 6818 + }, + { + "epoch": 0.5503187797595028, + "grad_norm": 0.7326326370239258, + "learning_rate": 0.00014882424103172936, + "loss": 2.6123, + "step": 6819 + }, + { + "epoch": 0.5503994834960859, + "grad_norm": 0.7093667387962341, + "learning_rate": 0.00014881046307054142, + "loss": 2.6527, + "step": 6820 + }, + { + "epoch": 0.5504801872326689, + "grad_norm": 0.6877567768096924, + "learning_rate": 0.00014879668389286915, + "loss": 2.6086, + "step": 6821 + }, + { + "epoch": 0.5505608909692519, + "grad_norm": 0.7095615863800049, + "learning_rate": 0.000148782903499056, + "loss": 2.6469, + "step": 6822 + }, + { + "epoch": 0.5506415947058348, + "grad_norm": 0.6931191086769104, + "learning_rate": 0.00014876912188944535, + "loss": 2.6842, + "step": 6823 + }, + { + "epoch": 0.5507222984424179, + "grad_norm": 0.7016414403915405, + "learning_rate": 0.00014875533906438072, + "loss": 2.5753, + "step": 6824 + }, + { + "epoch": 0.5508030021790009, + "grad_norm": 0.6813814640045166, + "learning_rate": 0.00014874155502420558, + "loss": 2.5739, + "step": 6825 + }, + { + "epoch": 0.5508837059155839, + "grad_norm": 0.7068608403205872, + "learning_rate": 0.00014872776976926347, + "loss": 2.6325, + "step": 6826 + }, + { + "epoch": 0.5509644096521669, + "grad_norm": 0.6978127360343933, + "learning_rate": 0.00014871398329989796, + "loss": 2.5614, + "step": 6827 + }, + { + "epoch": 0.55104511338875, + "grad_norm": 0.6923051476478577, + "learning_rate": 0.00014870019561645265, + "loss": 2.6075, + "step": 6828 + }, + { + "epoch": 0.5511258171253329, + "grad_norm": 0.6708533763885498, + "learning_rate": 0.00014868640671927117, + "loss": 2.5883, + "step": 6829 + }, + { + "epoch": 0.5512065208619159, + "grad_norm": 0.7679650783538818, + "learning_rate": 0.00014867261660869713, + "loss": 2.6105, + "step": 6830 + }, + { + "epoch": 0.5512872245984989, + "grad_norm": 0.7080917358398438, + "learning_rate": 0.0001486588252850743, + "loss": 2.5855, + "step": 6831 + }, + { + "epoch": 0.551367928335082, + "grad_norm": 0.7218755483627319, + "learning_rate": 0.00014864503274874635, + "loss": 2.5872, + "step": 6832 + }, + { + "epoch": 0.551448632071665, + "grad_norm": 0.689038872718811, + "learning_rate": 0.000148631239000057, + "loss": 2.5902, + "step": 6833 + }, + { + "epoch": 0.5515293358082479, + "grad_norm": 0.6810954213142395, + "learning_rate": 0.00014861744403935005, + "loss": 2.5938, + "step": 6834 + }, + { + "epoch": 0.5516100395448309, + "grad_norm": 0.7509457468986511, + "learning_rate": 0.00014860364786696933, + "loss": 2.593, + "step": 6835 + }, + { + "epoch": 0.5516907432814139, + "grad_norm": 0.739536702632904, + "learning_rate": 0.00014858985048325863, + "loss": 2.6668, + "step": 6836 + }, + { + "epoch": 0.551771447017997, + "grad_norm": 0.661829948425293, + "learning_rate": 0.00014857605188856184, + "loss": 2.6407, + "step": 6837 + }, + { + "epoch": 0.5518521507545799, + "grad_norm": 0.6869735717773438, + "learning_rate": 0.00014856225208322287, + "loss": 2.535, + "step": 6838 + }, + { + "epoch": 0.5519328544911629, + "grad_norm": 0.6724792122840881, + "learning_rate": 0.00014854845106758563, + "loss": 2.5629, + "step": 6839 + }, + { + "epoch": 0.5520135582277459, + "grad_norm": 0.7066503763198853, + "learning_rate": 0.00014853464884199407, + "loss": 2.6002, + "step": 6840 + }, + { + "epoch": 0.552094261964329, + "grad_norm": 0.7354215979576111, + "learning_rate": 0.0001485208454067922, + "loss": 2.6032, + "step": 6841 + }, + { + "epoch": 0.552174965700912, + "grad_norm": 0.8124571442604065, + "learning_rate": 0.00014850704076232405, + "loss": 2.5884, + "step": 6842 + }, + { + "epoch": 0.5522556694374949, + "grad_norm": 0.6941336393356323, + "learning_rate": 0.00014849323490893364, + "loss": 2.6461, + "step": 6843 + }, + { + "epoch": 0.5523363731740779, + "grad_norm": 0.6848790049552917, + "learning_rate": 0.00014847942784696505, + "loss": 2.6098, + "step": 6844 + }, + { + "epoch": 0.552417076910661, + "grad_norm": 0.6688000559806824, + "learning_rate": 0.00014846561957676237, + "loss": 2.6115, + "step": 6845 + }, + { + "epoch": 0.552497780647244, + "grad_norm": 0.6647306084632874, + "learning_rate": 0.00014845181009866975, + "loss": 2.597, + "step": 6846 + }, + { + "epoch": 0.552578484383827, + "grad_norm": 0.7277785539627075, + "learning_rate": 0.0001484379994130314, + "loss": 2.6223, + "step": 6847 + }, + { + "epoch": 0.5526591881204099, + "grad_norm": 0.6623761057853699, + "learning_rate": 0.00014842418752019146, + "loss": 2.5657, + "step": 6848 + }, + { + "epoch": 0.552739891856993, + "grad_norm": 0.7207754254341125, + "learning_rate": 0.00014841037442049423, + "loss": 2.5711, + "step": 6849 + }, + { + "epoch": 0.552820595593576, + "grad_norm": 0.6963560581207275, + "learning_rate": 0.00014839656011428389, + "loss": 2.6078, + "step": 6850 + }, + { + "epoch": 0.552901299330159, + "grad_norm": 0.6875078678131104, + "learning_rate": 0.00014838274460190475, + "loss": 2.6109, + "step": 6851 + }, + { + "epoch": 0.552982003066742, + "grad_norm": 0.7049943804740906, + "learning_rate": 0.00014836892788370118, + "loss": 2.5755, + "step": 6852 + }, + { + "epoch": 0.553062706803325, + "grad_norm": 0.6941191554069519, + "learning_rate": 0.00014835510996001744, + "loss": 2.6694, + "step": 6853 + }, + { + "epoch": 0.553143410539908, + "grad_norm": 0.7589484453201294, + "learning_rate": 0.000148341290831198, + "loss": 2.5677, + "step": 6854 + }, + { + "epoch": 0.553224114276491, + "grad_norm": 0.6594784259796143, + "learning_rate": 0.00014832747049758723, + "loss": 2.6209, + "step": 6855 + }, + { + "epoch": 0.553304818013074, + "grad_norm": 0.726598858833313, + "learning_rate": 0.00014831364895952952, + "loss": 2.6492, + "step": 6856 + }, + { + "epoch": 0.553385521749657, + "grad_norm": 0.6668030023574829, + "learning_rate": 0.0001482998262173694, + "loss": 2.6057, + "step": 6857 + }, + { + "epoch": 0.55346622548624, + "grad_norm": 0.7698997855186462, + "learning_rate": 0.0001482860022714514, + "loss": 2.6215, + "step": 6858 + }, + { + "epoch": 0.553546929222823, + "grad_norm": 0.6805251836776733, + "learning_rate": 0.00014827217712211997, + "loss": 2.5855, + "step": 6859 + }, + { + "epoch": 0.553627632959406, + "grad_norm": 0.8481020331382751, + "learning_rate": 0.00014825835076971968, + "loss": 2.6218, + "step": 6860 + }, + { + "epoch": 0.5537083366959891, + "grad_norm": 0.6801722645759583, + "learning_rate": 0.00014824452321459517, + "loss": 2.5998, + "step": 6861 + }, + { + "epoch": 0.553789040432572, + "grad_norm": 0.7174597978591919, + "learning_rate": 0.00014823069445709104, + "loss": 2.5782, + "step": 6862 + }, + { + "epoch": 0.553869744169155, + "grad_norm": 0.7607117891311646, + "learning_rate": 0.0001482168644975519, + "loss": 2.6492, + "step": 6863 + }, + { + "epoch": 0.553950447905738, + "grad_norm": 0.7554265856742859, + "learning_rate": 0.00014820303333632246, + "loss": 2.6511, + "step": 6864 + }, + { + "epoch": 0.5540311516423211, + "grad_norm": 0.7520260214805603, + "learning_rate": 0.00014818920097374745, + "loss": 2.6258, + "step": 6865 + }, + { + "epoch": 0.5541118553789041, + "grad_norm": 0.7897995114326477, + "learning_rate": 0.00014817536741017152, + "loss": 2.6153, + "step": 6866 + }, + { + "epoch": 0.554192559115487, + "grad_norm": 0.7444615960121155, + "learning_rate": 0.00014816153264593957, + "loss": 2.5892, + "step": 6867 + }, + { + "epoch": 0.55427326285207, + "grad_norm": 0.6593222618103027, + "learning_rate": 0.0001481476966813963, + "loss": 2.6048, + "step": 6868 + }, + { + "epoch": 0.5543539665886531, + "grad_norm": 0.7517102360725403, + "learning_rate": 0.0001481338595168866, + "loss": 2.6496, + "step": 6869 + }, + { + "epoch": 0.5544346703252361, + "grad_norm": 0.7314056754112244, + "learning_rate": 0.00014812002115275529, + "loss": 2.6009, + "step": 6870 + }, + { + "epoch": 0.554515374061819, + "grad_norm": 0.6718037724494934, + "learning_rate": 0.00014810618158934722, + "loss": 2.6279, + "step": 6871 + }, + { + "epoch": 0.554596077798402, + "grad_norm": 0.6853529810905457, + "learning_rate": 0.00014809234082700735, + "loss": 2.6562, + "step": 6872 + }, + { + "epoch": 0.5546767815349851, + "grad_norm": 0.713599443435669, + "learning_rate": 0.0001480784988660807, + "loss": 2.5783, + "step": 6873 + }, + { + "epoch": 0.5547574852715681, + "grad_norm": 0.6820243000984192, + "learning_rate": 0.00014806465570691213, + "loss": 2.5753, + "step": 6874 + }, + { + "epoch": 0.5548381890081511, + "grad_norm": 0.6999152302742004, + "learning_rate": 0.00014805081134984673, + "loss": 2.5839, + "step": 6875 + }, + { + "epoch": 0.554918892744734, + "grad_norm": 0.7145923376083374, + "learning_rate": 0.00014803696579522948, + "loss": 2.6153, + "step": 6876 + }, + { + "epoch": 0.5549995964813171, + "grad_norm": 0.7569223046302795, + "learning_rate": 0.00014802311904340548, + "loss": 2.5879, + "step": 6877 + }, + { + "epoch": 0.5550803002179001, + "grad_norm": 0.6977131962776184, + "learning_rate": 0.00014800927109471983, + "loss": 2.6587, + "step": 6878 + }, + { + "epoch": 0.5551610039544831, + "grad_norm": 0.6693562865257263, + "learning_rate": 0.00014799542194951764, + "loss": 2.6271, + "step": 6879 + }, + { + "epoch": 0.5552417076910661, + "grad_norm": 0.6937456130981445, + "learning_rate": 0.00014798157160814406, + "loss": 2.6213, + "step": 6880 + }, + { + "epoch": 0.5553224114276492, + "grad_norm": 0.761538565158844, + "learning_rate": 0.0001479677200709443, + "loss": 2.6053, + "step": 6881 + }, + { + "epoch": 0.5554031151642321, + "grad_norm": 0.707457959651947, + "learning_rate": 0.00014795386733826356, + "loss": 2.5763, + "step": 6882 + }, + { + "epoch": 0.5554838189008151, + "grad_norm": 0.7323198318481445, + "learning_rate": 0.0001479400134104471, + "loss": 2.6899, + "step": 6883 + }, + { + "epoch": 0.5555645226373981, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.0001479261582878402, + "loss": 2.5743, + "step": 6884 + }, + { + "epoch": 0.5556452263739811, + "grad_norm": 0.7683241367340088, + "learning_rate": 0.00014791230197078813, + "loss": 2.5295, + "step": 6885 + }, + { + "epoch": 0.5557259301105641, + "grad_norm": 0.7248150706291199, + "learning_rate": 0.00014789844445963626, + "loss": 2.6131, + "step": 6886 + }, + { + "epoch": 0.5558066338471471, + "grad_norm": 0.6868402361869812, + "learning_rate": 0.00014788458575472997, + "loss": 2.6182, + "step": 6887 + }, + { + "epoch": 0.5558873375837301, + "grad_norm": 0.6995798945426941, + "learning_rate": 0.0001478707258564146, + "loss": 2.5969, + "step": 6888 + }, + { + "epoch": 0.5559680413203131, + "grad_norm": 0.6912558078765869, + "learning_rate": 0.00014785686476503565, + "loss": 2.6264, + "step": 6889 + }, + { + "epoch": 0.5560487450568962, + "grad_norm": 0.7485123872756958, + "learning_rate": 0.00014784300248093848, + "loss": 2.6036, + "step": 6890 + }, + { + "epoch": 0.5561294487934791, + "grad_norm": 0.7150819897651672, + "learning_rate": 0.00014782913900446864, + "loss": 2.5807, + "step": 6891 + }, + { + "epoch": 0.5562101525300621, + "grad_norm": 0.6715224385261536, + "learning_rate": 0.00014781527433597167, + "loss": 2.6164, + "step": 6892 + }, + { + "epoch": 0.5562908562666451, + "grad_norm": 0.6951256394386292, + "learning_rate": 0.000147801408475793, + "loss": 2.6106, + "step": 6893 + }, + { + "epoch": 0.5563715600032282, + "grad_norm": 0.7296997904777527, + "learning_rate": 0.00014778754142427832, + "loss": 2.6182, + "step": 6894 + }, + { + "epoch": 0.5564522637398112, + "grad_norm": 0.7484713196754456, + "learning_rate": 0.0001477736731817732, + "loss": 2.6384, + "step": 6895 + }, + { + "epoch": 0.5565329674763941, + "grad_norm": 0.6967526078224182, + "learning_rate": 0.00014775980374862326, + "loss": 2.5889, + "step": 6896 + }, + { + "epoch": 0.5566136712129771, + "grad_norm": 0.7004885077476501, + "learning_rate": 0.00014774593312517415, + "loss": 2.6549, + "step": 6897 + }, + { + "epoch": 0.5566943749495602, + "grad_norm": 0.7069302201271057, + "learning_rate": 0.00014773206131177158, + "loss": 2.6408, + "step": 6898 + }, + { + "epoch": 0.5567750786861432, + "grad_norm": 0.7048566341400146, + "learning_rate": 0.00014771818830876127, + "loss": 2.5909, + "step": 6899 + }, + { + "epoch": 0.5568557824227262, + "grad_norm": 0.7386630773544312, + "learning_rate": 0.00014770431411648897, + "loss": 2.6402, + "step": 6900 + }, + { + "epoch": 0.5569364861593091, + "grad_norm": 0.7244876027107239, + "learning_rate": 0.00014769043873530047, + "loss": 2.5548, + "step": 6901 + }, + { + "epoch": 0.5570171898958922, + "grad_norm": 0.6820651888847351, + "learning_rate": 0.00014767656216554156, + "loss": 2.682, + "step": 6902 + }, + { + "epoch": 0.5570978936324752, + "grad_norm": 0.7281784415245056, + "learning_rate": 0.00014766268440755812, + "loss": 2.622, + "step": 6903 + }, + { + "epoch": 0.5571785973690582, + "grad_norm": 0.6525030136108398, + "learning_rate": 0.00014764880546169594, + "loss": 2.5809, + "step": 6904 + }, + { + "epoch": 0.5572593011056411, + "grad_norm": 0.6735210418701172, + "learning_rate": 0.00014763492532830102, + "loss": 2.6645, + "step": 6905 + }, + { + "epoch": 0.5573400048422242, + "grad_norm": 0.674700140953064, + "learning_rate": 0.00014762104400771922, + "loss": 2.6466, + "step": 6906 + }, + { + "epoch": 0.5574207085788072, + "grad_norm": 0.7570134401321411, + "learning_rate": 0.00014760716150029652, + "loss": 2.57, + "step": 6907 + }, + { + "epoch": 0.5575014123153902, + "grad_norm": 0.6532449722290039, + "learning_rate": 0.00014759327780637893, + "loss": 2.6207, + "step": 6908 + }, + { + "epoch": 0.5575821160519732, + "grad_norm": 0.7697737812995911, + "learning_rate": 0.00014757939292631242, + "loss": 2.5846, + "step": 6909 + }, + { + "epoch": 0.5576628197885563, + "grad_norm": 0.6750194430351257, + "learning_rate": 0.00014756550686044308, + "loss": 2.6421, + "step": 6910 + }, + { + "epoch": 0.5577435235251392, + "grad_norm": 0.7357683777809143, + "learning_rate": 0.00014755161960911697, + "loss": 2.6173, + "step": 6911 + }, + { + "epoch": 0.5578242272617222, + "grad_norm": 0.6812090277671814, + "learning_rate": 0.0001475377311726802, + "loss": 2.5556, + "step": 6912 + }, + { + "epoch": 0.5579049309983052, + "grad_norm": 0.7633040547370911, + "learning_rate": 0.00014752384155147888, + "loss": 2.6505, + "step": 6913 + }, + { + "epoch": 0.5579856347348883, + "grad_norm": 0.7426417469978333, + "learning_rate": 0.00014750995074585922, + "loss": 2.5575, + "step": 6914 + }, + { + "epoch": 0.5580663384714712, + "grad_norm": 0.6926711201667786, + "learning_rate": 0.00014749605875616744, + "loss": 2.5751, + "step": 6915 + }, + { + "epoch": 0.5581470422080542, + "grad_norm": 0.70630943775177, + "learning_rate": 0.00014748216558274966, + "loss": 2.6228, + "step": 6916 + }, + { + "epoch": 0.5582277459446372, + "grad_norm": 0.7183346748352051, + "learning_rate": 0.0001474682712259522, + "loss": 2.5704, + "step": 6917 + }, + { + "epoch": 0.5583084496812203, + "grad_norm": 0.7622792720794678, + "learning_rate": 0.00014745437568612136, + "loss": 2.6031, + "step": 6918 + }, + { + "epoch": 0.5583891534178033, + "grad_norm": 0.6967802047729492, + "learning_rate": 0.00014744047896360344, + "loss": 2.6031, + "step": 6919 + }, + { + "epoch": 0.5584698571543862, + "grad_norm": 0.7827191948890686, + "learning_rate": 0.00014742658105874475, + "loss": 2.5427, + "step": 6920 + }, + { + "epoch": 0.5585505608909692, + "grad_norm": 0.6865705847740173, + "learning_rate": 0.0001474126819718917, + "loss": 2.6514, + "step": 6921 + }, + { + "epoch": 0.5586312646275523, + "grad_norm": 0.7181665897369385, + "learning_rate": 0.0001473987817033906, + "loss": 2.613, + "step": 6922 + }, + { + "epoch": 0.5587119683641353, + "grad_norm": 0.7198463082313538, + "learning_rate": 0.00014738488025358806, + "loss": 2.6423, + "step": 6923 + }, + { + "epoch": 0.5587926721007183, + "grad_norm": 0.773078441619873, + "learning_rate": 0.00014737097762283042, + "loss": 2.5946, + "step": 6924 + }, + { + "epoch": 0.5588733758373012, + "grad_norm": 0.7732799649238586, + "learning_rate": 0.00014735707381146416, + "loss": 2.6778, + "step": 6925 + }, + { + "epoch": 0.5589540795738843, + "grad_norm": 0.7639997601509094, + "learning_rate": 0.00014734316881983585, + "loss": 2.6064, + "step": 6926 + }, + { + "epoch": 0.5590347833104673, + "grad_norm": 0.7912085652351379, + "learning_rate": 0.00014732926264829198, + "loss": 2.5765, + "step": 6927 + }, + { + "epoch": 0.5591154870470503, + "grad_norm": 0.7460121512413025, + "learning_rate": 0.0001473153552971792, + "loss": 2.6724, + "step": 6928 + }, + { + "epoch": 0.5591961907836333, + "grad_norm": 0.6853603720664978, + "learning_rate": 0.00014730144676684408, + "loss": 2.5846, + "step": 6929 + }, + { + "epoch": 0.5592768945202163, + "grad_norm": 0.7368159294128418, + "learning_rate": 0.00014728753705763324, + "loss": 2.6626, + "step": 6930 + }, + { + "epoch": 0.5593575982567993, + "grad_norm": 0.6888907551765442, + "learning_rate": 0.0001472736261698934, + "loss": 2.6169, + "step": 6931 + }, + { + "epoch": 0.5594383019933823, + "grad_norm": 0.6978163719177246, + "learning_rate": 0.0001472597141039712, + "loss": 2.6367, + "step": 6932 + }, + { + "epoch": 0.5595190057299653, + "grad_norm": 0.7829774618148804, + "learning_rate": 0.00014724580086021335, + "loss": 2.5983, + "step": 6933 + }, + { + "epoch": 0.5595997094665484, + "grad_norm": 0.7872018218040466, + "learning_rate": 0.0001472318864389667, + "loss": 2.5418, + "step": 6934 + }, + { + "epoch": 0.5596804132031313, + "grad_norm": 0.6994973421096802, + "learning_rate": 0.00014721797084057793, + "loss": 2.6062, + "step": 6935 + }, + { + "epoch": 0.5597611169397143, + "grad_norm": 0.7281144857406616, + "learning_rate": 0.00014720405406539394, + "loss": 2.573, + "step": 6936 + }, + { + "epoch": 0.5598418206762973, + "grad_norm": 0.713513970375061, + "learning_rate": 0.0001471901361137615, + "loss": 2.6589, + "step": 6937 + }, + { + "epoch": 0.5599225244128803, + "grad_norm": 0.7752750515937805, + "learning_rate": 0.00014717621698602754, + "loss": 2.6478, + "step": 6938 + }, + { + "epoch": 0.5600032281494634, + "grad_norm": 0.6876000165939331, + "learning_rate": 0.00014716229668253889, + "loss": 2.6092, + "step": 6939 + }, + { + "epoch": 0.5600839318860463, + "grad_norm": 0.6371028423309326, + "learning_rate": 0.00014714837520364256, + "loss": 2.606, + "step": 6940 + }, + { + "epoch": 0.5601646356226293, + "grad_norm": 0.6488915085792542, + "learning_rate": 0.00014713445254968546, + "loss": 2.5769, + "step": 6941 + }, + { + "epoch": 0.5602453393592123, + "grad_norm": 0.7286413908004761, + "learning_rate": 0.00014712052872101458, + "loss": 2.6267, + "step": 6942 + }, + { + "epoch": 0.5603260430957954, + "grad_norm": 0.6863759160041809, + "learning_rate": 0.00014710660371797696, + "loss": 2.641, + "step": 6943 + }, + { + "epoch": 0.5604067468323783, + "grad_norm": 0.706900417804718, + "learning_rate": 0.00014709267754091964, + "loss": 2.6344, + "step": 6944 + }, + { + "epoch": 0.5604874505689613, + "grad_norm": 0.6462892293930054, + "learning_rate": 0.0001470787501901897, + "loss": 2.5561, + "step": 6945 + }, + { + "epoch": 0.5605681543055443, + "grad_norm": 0.7342472076416016, + "learning_rate": 0.00014706482166613425, + "loss": 2.583, + "step": 6946 + }, + { + "epoch": 0.5606488580421274, + "grad_norm": 0.7132803797721863, + "learning_rate": 0.00014705089196910038, + "loss": 2.558, + "step": 6947 + }, + { + "epoch": 0.5607295617787104, + "grad_norm": 0.7709125876426697, + "learning_rate": 0.00014703696109943533, + "loss": 2.6165, + "step": 6948 + }, + { + "epoch": 0.5608102655152933, + "grad_norm": 0.7108885645866394, + "learning_rate": 0.00014702302905748619, + "loss": 2.5788, + "step": 6949 + }, + { + "epoch": 0.5608909692518763, + "grad_norm": 0.7295591235160828, + "learning_rate": 0.0001470090958436003, + "loss": 2.6526, + "step": 6950 + }, + { + "epoch": 0.5609716729884594, + "grad_norm": 0.7235364317893982, + "learning_rate": 0.00014699516145812486, + "loss": 2.604, + "step": 6951 + }, + { + "epoch": 0.5610523767250424, + "grad_norm": 0.6723269820213318, + "learning_rate": 0.00014698122590140714, + "loss": 2.5838, + "step": 6952 + }, + { + "epoch": 0.5611330804616254, + "grad_norm": 0.7022266983985901, + "learning_rate": 0.00014696728917379447, + "loss": 2.6086, + "step": 6953 + }, + { + "epoch": 0.5612137841982083, + "grad_norm": 0.6923824548721313, + "learning_rate": 0.00014695335127563414, + "loss": 2.6678, + "step": 6954 + }, + { + "epoch": 0.5612944879347914, + "grad_norm": 0.6909339427947998, + "learning_rate": 0.0001469394122072736, + "loss": 2.6397, + "step": 6955 + }, + { + "epoch": 0.5613751916713744, + "grad_norm": 0.710299015045166, + "learning_rate": 0.00014692547196906022, + "loss": 2.5973, + "step": 6956 + }, + { + "epoch": 0.5614558954079574, + "grad_norm": 0.7141178250312805, + "learning_rate": 0.00014691153056134136, + "loss": 2.6111, + "step": 6957 + }, + { + "epoch": 0.5615365991445403, + "grad_norm": 0.6994750499725342, + "learning_rate": 0.00014689758798446456, + "loss": 2.6498, + "step": 6958 + }, + { + "epoch": 0.5616173028811234, + "grad_norm": 0.6951611638069153, + "learning_rate": 0.00014688364423877726, + "loss": 2.6208, + "step": 6959 + }, + { + "epoch": 0.5616980066177064, + "grad_norm": 0.6610642075538635, + "learning_rate": 0.000146869699324627, + "loss": 2.5725, + "step": 6960 + }, + { + "epoch": 0.5617787103542894, + "grad_norm": 0.6771267056465149, + "learning_rate": 0.00014685575324236135, + "loss": 2.6336, + "step": 6961 + }, + { + "epoch": 0.5618594140908724, + "grad_norm": 0.7431008815765381, + "learning_rate": 0.0001468418059923278, + "loss": 2.6782, + "step": 6962 + }, + { + "epoch": 0.5619401178274555, + "grad_norm": 0.7399705648422241, + "learning_rate": 0.000146827857574874, + "loss": 2.6212, + "step": 6963 + }, + { + "epoch": 0.5620208215640384, + "grad_norm": 0.7237067222595215, + "learning_rate": 0.00014681390799034763, + "loss": 2.6261, + "step": 6964 + }, + { + "epoch": 0.5621015253006214, + "grad_norm": 0.7033257484436035, + "learning_rate": 0.00014679995723909623, + "loss": 2.6912, + "step": 6965 + }, + { + "epoch": 0.5621822290372044, + "grad_norm": 0.6953759789466858, + "learning_rate": 0.00014678600532146762, + "loss": 2.6022, + "step": 6966 + }, + { + "epoch": 0.5622629327737875, + "grad_norm": 0.8338057994842529, + "learning_rate": 0.0001467720522378094, + "loss": 2.595, + "step": 6967 + }, + { + "epoch": 0.5623436365103704, + "grad_norm": 0.6506100296974182, + "learning_rate": 0.00014675809798846942, + "loss": 2.6033, + "step": 6968 + }, + { + "epoch": 0.5624243402469534, + "grad_norm": 0.7122468948364258, + "learning_rate": 0.0001467441425737954, + "loss": 2.56, + "step": 6969 + }, + { + "epoch": 0.5625050439835364, + "grad_norm": 0.7012680172920227, + "learning_rate": 0.00014673018599413516, + "loss": 2.6052, + "step": 6970 + }, + { + "epoch": 0.5625857477201195, + "grad_norm": 0.668187141418457, + "learning_rate": 0.00014671622824983653, + "loss": 2.6675, + "step": 6971 + }, + { + "epoch": 0.5626664514567025, + "grad_norm": 0.7259203791618347, + "learning_rate": 0.00014670226934124738, + "loss": 2.5977, + "step": 6972 + }, + { + "epoch": 0.5627471551932854, + "grad_norm": 0.6705875396728516, + "learning_rate": 0.00014668830926871555, + "loss": 2.649, + "step": 6973 + }, + { + "epoch": 0.5628278589298684, + "grad_norm": 0.682731568813324, + "learning_rate": 0.00014667434803258906, + "loss": 2.6084, + "step": 6974 + }, + { + "epoch": 0.5629085626664515, + "grad_norm": 0.7061700224876404, + "learning_rate": 0.00014666038563321577, + "loss": 2.6256, + "step": 6975 + }, + { + "epoch": 0.5629892664030345, + "grad_norm": 0.6839977502822876, + "learning_rate": 0.00014664642207094374, + "loss": 2.6342, + "step": 6976 + }, + { + "epoch": 0.5630699701396175, + "grad_norm": 0.7376503348350525, + "learning_rate": 0.00014663245734612094, + "loss": 2.6001, + "step": 6977 + }, + { + "epoch": 0.5631506738762004, + "grad_norm": 0.6901546716690063, + "learning_rate": 0.0001466184914590954, + "loss": 2.6715, + "step": 6978 + }, + { + "epoch": 0.5632313776127835, + "grad_norm": 0.816223680973053, + "learning_rate": 0.00014660452441021512, + "loss": 2.6407, + "step": 6979 + }, + { + "epoch": 0.5633120813493665, + "grad_norm": 0.6904644966125488, + "learning_rate": 0.00014659055619982835, + "loss": 2.5543, + "step": 6980 + }, + { + "epoch": 0.5633927850859495, + "grad_norm": 0.6784235239028931, + "learning_rate": 0.0001465765868282831, + "loss": 2.6184, + "step": 6981 + }, + { + "epoch": 0.5634734888225325, + "grad_norm": 0.7689006328582764, + "learning_rate": 0.00014656261629592755, + "loss": 2.644, + "step": 6982 + }, + { + "epoch": 0.5635541925591155, + "grad_norm": 0.7608775496482849, + "learning_rate": 0.0001465486446031099, + "loss": 2.5952, + "step": 6983 + }, + { + "epoch": 0.5636348962956985, + "grad_norm": 0.7266525626182556, + "learning_rate": 0.00014653467175017833, + "loss": 2.6479, + "step": 6984 + }, + { + "epoch": 0.5637156000322815, + "grad_norm": 0.6907477974891663, + "learning_rate": 0.00014652069773748113, + "loss": 2.5825, + "step": 6985 + }, + { + "epoch": 0.5637963037688645, + "grad_norm": 0.7790403366088867, + "learning_rate": 0.00014650672256536648, + "loss": 2.5948, + "step": 6986 + }, + { + "epoch": 0.5638770075054474, + "grad_norm": 0.7072858214378357, + "learning_rate": 0.00014649274623418278, + "loss": 2.6017, + "step": 6987 + }, + { + "epoch": 0.5639577112420305, + "grad_norm": 0.7140414118766785, + "learning_rate": 0.0001464787687442783, + "loss": 2.5709, + "step": 6988 + }, + { + "epoch": 0.5640384149786135, + "grad_norm": 0.857783317565918, + "learning_rate": 0.00014646479009600139, + "loss": 2.7049, + "step": 6989 + }, + { + "epoch": 0.5641191187151965, + "grad_norm": 0.7599344253540039, + "learning_rate": 0.00014645081028970047, + "loss": 2.6369, + "step": 6990 + }, + { + "epoch": 0.5641998224517795, + "grad_norm": 0.7286150455474854, + "learning_rate": 0.00014643682932572393, + "loss": 2.6238, + "step": 6991 + }, + { + "epoch": 0.5642805261883626, + "grad_norm": 0.7095075249671936, + "learning_rate": 0.0001464228472044202, + "loss": 2.5924, + "step": 6992 + }, + { + "epoch": 0.5643612299249455, + "grad_norm": 0.7583668828010559, + "learning_rate": 0.0001464088639261378, + "loss": 2.6098, + "step": 6993 + }, + { + "epoch": 0.5644419336615285, + "grad_norm": 0.7393970489501953, + "learning_rate": 0.00014639487949122515, + "loss": 2.6036, + "step": 6994 + }, + { + "epoch": 0.5645226373981115, + "grad_norm": 0.6789388656616211, + "learning_rate": 0.00014638089390003086, + "loss": 2.642, + "step": 6995 + }, + { + "epoch": 0.5646033411346946, + "grad_norm": 0.8021289706230164, + "learning_rate": 0.00014636690715290346, + "loss": 2.6851, + "step": 6996 + }, + { + "epoch": 0.5646840448712775, + "grad_norm": 0.6931039094924927, + "learning_rate": 0.00014635291925019152, + "loss": 2.6358, + "step": 6997 + }, + { + "epoch": 0.5647647486078605, + "grad_norm": 0.7356590032577515, + "learning_rate": 0.00014633893019224366, + "loss": 2.5661, + "step": 6998 + }, + { + "epoch": 0.5648454523444435, + "grad_norm": 0.6777941584587097, + "learning_rate": 0.0001463249399794085, + "loss": 2.5578, + "step": 6999 + }, + { + "epoch": 0.5649261560810266, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.0001463109486120348, + "loss": 2.5582, + "step": 7000 + }, + { + "epoch": 0.5649261560810266, + "eval_loss": 2.5298855304718018, + "eval_runtime": 757.774, + "eval_samples_per_second": 3.457, + "eval_steps_per_second": 0.577, + "step": 7000 + }, + { + "epoch": 0.5650068598176096, + "grad_norm": 0.7175148129463196, + "learning_rate": 0.0001462969560904712, + "loss": 2.568, + "step": 7001 + }, + { + "epoch": 0.5650875635541925, + "grad_norm": 0.6998937129974365, + "learning_rate": 0.00014628296241506636, + "loss": 2.6347, + "step": 7002 + }, + { + "epoch": 0.5651682672907755, + "grad_norm": 0.8140312433242798, + "learning_rate": 0.00014626896758616916, + "loss": 2.6566, + "step": 7003 + }, + { + "epoch": 0.5652489710273586, + "grad_norm": 0.7218164205551147, + "learning_rate": 0.00014625497160412833, + "loss": 2.5693, + "step": 7004 + }, + { + "epoch": 0.5653296747639416, + "grad_norm": 0.6974074244499207, + "learning_rate": 0.0001462409744692927, + "loss": 2.6084, + "step": 7005 + }, + { + "epoch": 0.5654103785005246, + "grad_norm": 0.7475053071975708, + "learning_rate": 0.00014622697618201113, + "loss": 2.6534, + "step": 7006 + }, + { + "epoch": 0.5654910822371075, + "grad_norm": 0.6768492460250854, + "learning_rate": 0.00014621297674263247, + "loss": 2.585, + "step": 7007 + }, + { + "epoch": 0.5655717859736906, + "grad_norm": 0.7023029923439026, + "learning_rate": 0.0001461989761515056, + "loss": 2.6219, + "step": 7008 + }, + { + "epoch": 0.5656524897102736, + "grad_norm": 0.7248445749282837, + "learning_rate": 0.0001461849744089795, + "loss": 2.6382, + "step": 7009 + }, + { + "epoch": 0.5657331934468566, + "grad_norm": 0.6961148381233215, + "learning_rate": 0.00014617097151540308, + "loss": 2.7184, + "step": 7010 + }, + { + "epoch": 0.5658138971834396, + "grad_norm": 0.6649057269096375, + "learning_rate": 0.0001461569674711254, + "loss": 2.6059, + "step": 7011 + }, + { + "epoch": 0.5658946009200226, + "grad_norm": 0.7451788783073425, + "learning_rate": 0.00014614296227649542, + "loss": 2.5697, + "step": 7012 + }, + { + "epoch": 0.5659753046566056, + "grad_norm": 0.6880216598510742, + "learning_rate": 0.0001461289559318622, + "loss": 2.5785, + "step": 7013 + }, + { + "epoch": 0.5660560083931886, + "grad_norm": 0.7505971789360046, + "learning_rate": 0.00014611494843757482, + "loss": 2.5479, + "step": 7014 + }, + { + "epoch": 0.5661367121297716, + "grad_norm": 0.745914876461029, + "learning_rate": 0.00014610093979398235, + "loss": 2.6367, + "step": 7015 + }, + { + "epoch": 0.5662174158663547, + "grad_norm": 0.6758660674095154, + "learning_rate": 0.000146086930001434, + "loss": 2.5673, + "step": 7016 + }, + { + "epoch": 0.5662981196029376, + "grad_norm": 0.7114273309707642, + "learning_rate": 0.00014607291906027886, + "loss": 2.6188, + "step": 7017 + }, + { + "epoch": 0.5663788233395206, + "grad_norm": 0.6791165471076965, + "learning_rate": 0.00014605890697086613, + "loss": 2.6197, + "step": 7018 + }, + { + "epoch": 0.5664595270761036, + "grad_norm": 0.6948217153549194, + "learning_rate": 0.00014604489373354503, + "loss": 2.5996, + "step": 7019 + }, + { + "epoch": 0.5665402308126867, + "grad_norm": 0.6993576884269714, + "learning_rate": 0.00014603087934866483, + "loss": 2.565, + "step": 7020 + }, + { + "epoch": 0.5666209345492697, + "grad_norm": 0.6936905384063721, + "learning_rate": 0.0001460168638165748, + "loss": 2.6524, + "step": 7021 + }, + { + "epoch": 0.5667016382858526, + "grad_norm": 0.6810741424560547, + "learning_rate": 0.00014600284713762424, + "loss": 2.6519, + "step": 7022 + }, + { + "epoch": 0.5667823420224356, + "grad_norm": 0.7540227770805359, + "learning_rate": 0.00014598882931216245, + "loss": 2.659, + "step": 7023 + }, + { + "epoch": 0.5668630457590187, + "grad_norm": 0.6520613431930542, + "learning_rate": 0.0001459748103405388, + "loss": 2.5341, + "step": 7024 + }, + { + "epoch": 0.5669437494956017, + "grad_norm": 0.7159109711647034, + "learning_rate": 0.00014596079022310277, + "loss": 2.6548, + "step": 7025 + }, + { + "epoch": 0.5670244532321846, + "grad_norm": 0.803284227848053, + "learning_rate": 0.00014594676896020366, + "loss": 2.705, + "step": 7026 + }, + { + "epoch": 0.5671051569687676, + "grad_norm": 0.7069976925849915, + "learning_rate": 0.00014593274655219095, + "loss": 2.5733, + "step": 7027 + }, + { + "epoch": 0.5671858607053507, + "grad_norm": 0.7085167169570923, + "learning_rate": 0.00014591872299941417, + "loss": 2.6247, + "step": 7028 + }, + { + "epoch": 0.5672665644419337, + "grad_norm": 0.6748499274253845, + "learning_rate": 0.00014590469830222272, + "loss": 2.6446, + "step": 7029 + }, + { + "epoch": 0.5673472681785167, + "grad_norm": 0.6885821223258972, + "learning_rate": 0.00014589067246096623, + "loss": 2.5879, + "step": 7030 + }, + { + "epoch": 0.5674279719150996, + "grad_norm": 0.7220324277877808, + "learning_rate": 0.0001458766454759942, + "loss": 2.6249, + "step": 7031 + }, + { + "epoch": 0.5675086756516827, + "grad_norm": 0.6712783575057983, + "learning_rate": 0.00014586261734765628, + "loss": 2.5971, + "step": 7032 + }, + { + "epoch": 0.5675893793882657, + "grad_norm": 0.6582161784172058, + "learning_rate": 0.00014584858807630203, + "loss": 2.6224, + "step": 7033 + }, + { + "epoch": 0.5676700831248487, + "grad_norm": 0.6699219346046448, + "learning_rate": 0.0001458345576622811, + "loss": 2.5926, + "step": 7034 + }, + { + "epoch": 0.5677507868614317, + "grad_norm": 0.6508033871650696, + "learning_rate": 0.0001458205261059432, + "loss": 2.6311, + "step": 7035 + }, + { + "epoch": 0.5678314905980147, + "grad_norm": 0.7551338076591492, + "learning_rate": 0.00014580649340763802, + "loss": 2.5729, + "step": 7036 + }, + { + "epoch": 0.5679121943345977, + "grad_norm": 0.6875829100608826, + "learning_rate": 0.00014579245956771527, + "loss": 2.6253, + "step": 7037 + }, + { + "epoch": 0.5679928980711807, + "grad_norm": 0.698204517364502, + "learning_rate": 0.00014577842458652474, + "loss": 2.6218, + "step": 7038 + }, + { + "epoch": 0.5680736018077637, + "grad_norm": 0.8258630037307739, + "learning_rate": 0.00014576438846441615, + "loss": 2.6307, + "step": 7039 + }, + { + "epoch": 0.5681543055443466, + "grad_norm": 0.753105878829956, + "learning_rate": 0.00014575035120173942, + "loss": 2.5664, + "step": 7040 + }, + { + "epoch": 0.5682350092809297, + "grad_norm": 0.6999726295471191, + "learning_rate": 0.00014573631279884435, + "loss": 2.6857, + "step": 7041 + }, + { + "epoch": 0.5683157130175127, + "grad_norm": 0.6484847068786621, + "learning_rate": 0.00014572227325608078, + "loss": 2.6068, + "step": 7042 + }, + { + "epoch": 0.5683964167540957, + "grad_norm": 0.7098011374473572, + "learning_rate": 0.00014570823257379866, + "loss": 2.6591, + "step": 7043 + }, + { + "epoch": 0.5684771204906787, + "grad_norm": 0.8304192423820496, + "learning_rate": 0.0001456941907523479, + "loss": 2.6582, + "step": 7044 + }, + { + "epoch": 0.5685578242272618, + "grad_norm": 0.763214111328125, + "learning_rate": 0.00014568014779207844, + "loss": 2.6605, + "step": 7045 + }, + { + "epoch": 0.5686385279638447, + "grad_norm": 0.6805880665779114, + "learning_rate": 0.00014566610369334032, + "loss": 2.6362, + "step": 7046 + }, + { + "epoch": 0.5687192317004277, + "grad_norm": 0.6753434538841248, + "learning_rate": 0.00014565205845648352, + "loss": 2.6352, + "step": 7047 + }, + { + "epoch": 0.5687999354370107, + "grad_norm": 0.7065438032150269, + "learning_rate": 0.00014563801208185807, + "loss": 2.5975, + "step": 7048 + }, + { + "epoch": 0.5688806391735938, + "grad_norm": 0.6863527894020081, + "learning_rate": 0.00014562396456981407, + "loss": 2.576, + "step": 7049 + }, + { + "epoch": 0.5689613429101767, + "grad_norm": 0.7344440817832947, + "learning_rate": 0.00014560991592070158, + "loss": 2.5933, + "step": 7050 + }, + { + "epoch": 0.5690420466467597, + "grad_norm": 0.699992835521698, + "learning_rate": 0.00014559586613487082, + "loss": 2.6161, + "step": 7051 + }, + { + "epoch": 0.5691227503833427, + "grad_norm": 0.7287258505821228, + "learning_rate": 0.00014558181521267185, + "loss": 2.665, + "step": 7052 + }, + { + "epoch": 0.5692034541199258, + "grad_norm": 0.7304692268371582, + "learning_rate": 0.0001455677631544549, + "loss": 2.5696, + "step": 7053 + }, + { + "epoch": 0.5692841578565088, + "grad_norm": 0.6556086540222168, + "learning_rate": 0.00014555370996057016, + "loss": 2.6405, + "step": 7054 + }, + { + "epoch": 0.5693648615930917, + "grad_norm": 0.6796221137046814, + "learning_rate": 0.0001455396556313679, + "loss": 2.6475, + "step": 7055 + }, + { + "epoch": 0.5694455653296747, + "grad_norm": 0.7067505717277527, + "learning_rate": 0.00014552560016719838, + "loss": 2.6344, + "step": 7056 + }, + { + "epoch": 0.5695262690662578, + "grad_norm": 0.7108997106552124, + "learning_rate": 0.00014551154356841193, + "loss": 2.6543, + "step": 7057 + }, + { + "epoch": 0.5696069728028408, + "grad_norm": 0.7296212911605835, + "learning_rate": 0.0001454974858353588, + "loss": 2.6152, + "step": 7058 + }, + { + "epoch": 0.5696876765394238, + "grad_norm": 0.7329154014587402, + "learning_rate": 0.00014548342696838943, + "loss": 2.6338, + "step": 7059 + }, + { + "epoch": 0.5697683802760067, + "grad_norm": 0.6880258321762085, + "learning_rate": 0.00014546936696785412, + "loss": 2.5834, + "step": 7060 + }, + { + "epoch": 0.5698490840125898, + "grad_norm": 0.7140741348266602, + "learning_rate": 0.00014545530583410336, + "loss": 2.6361, + "step": 7061 + }, + { + "epoch": 0.5699297877491728, + "grad_norm": 0.6419476866722107, + "learning_rate": 0.00014544124356748755, + "loss": 2.4982, + "step": 7062 + }, + { + "epoch": 0.5700104914857558, + "grad_norm": 0.6934036612510681, + "learning_rate": 0.00014542718016835718, + "loss": 2.5748, + "step": 7063 + }, + { + "epoch": 0.5700911952223388, + "grad_norm": 0.721663236618042, + "learning_rate": 0.0001454131156370627, + "loss": 2.5419, + "step": 7064 + }, + { + "epoch": 0.5701718989589218, + "grad_norm": 0.734062671661377, + "learning_rate": 0.00014539904997395468, + "loss": 2.6288, + "step": 7065 + }, + { + "epoch": 0.5702526026955048, + "grad_norm": 0.7927694320678711, + "learning_rate": 0.00014538498317938367, + "loss": 2.6331, + "step": 7066 + }, + { + "epoch": 0.5703333064320878, + "grad_norm": 0.715929388999939, + "learning_rate": 0.00014537091525370025, + "loss": 2.6333, + "step": 7067 + }, + { + "epoch": 0.5704140101686708, + "grad_norm": 0.772230327129364, + "learning_rate": 0.00014535684619725498, + "loss": 2.6019, + "step": 7068 + }, + { + "epoch": 0.5704947139052539, + "grad_norm": 0.7277318239212036, + "learning_rate": 0.0001453427760103986, + "loss": 2.6062, + "step": 7069 + }, + { + "epoch": 0.5705754176418368, + "grad_norm": 0.6708227396011353, + "learning_rate": 0.00014532870469348164, + "loss": 2.6613, + "step": 7070 + }, + { + "epoch": 0.5706561213784198, + "grad_norm": 0.7507323622703552, + "learning_rate": 0.0001453146322468549, + "loss": 2.6456, + "step": 7071 + }, + { + "epoch": 0.5707368251150028, + "grad_norm": 0.6864063739776611, + "learning_rate": 0.00014530055867086912, + "loss": 2.6361, + "step": 7072 + }, + { + "epoch": 0.5708175288515859, + "grad_norm": 0.6805310249328613, + "learning_rate": 0.00014528648396587498, + "loss": 2.6088, + "step": 7073 + }, + { + "epoch": 0.5708982325881689, + "grad_norm": 0.7946523427963257, + "learning_rate": 0.00014527240813222325, + "loss": 2.6533, + "step": 7074 + }, + { + "epoch": 0.5709789363247518, + "grad_norm": 0.6814306974411011, + "learning_rate": 0.00014525833117026474, + "loss": 2.6478, + "step": 7075 + }, + { + "epoch": 0.5710596400613348, + "grad_norm": 0.749664843082428, + "learning_rate": 0.00014524425308035034, + "loss": 2.6296, + "step": 7076 + }, + { + "epoch": 0.5711403437979179, + "grad_norm": 0.6774656772613525, + "learning_rate": 0.00014523017386283091, + "loss": 2.5867, + "step": 7077 + }, + { + "epoch": 0.5712210475345009, + "grad_norm": 0.7331634163856506, + "learning_rate": 0.00014521609351805733, + "loss": 2.6484, + "step": 7078 + }, + { + "epoch": 0.5713017512710838, + "grad_norm": 0.7076910734176636, + "learning_rate": 0.00014520201204638045, + "loss": 2.6464, + "step": 7079 + }, + { + "epoch": 0.5713824550076668, + "grad_norm": 0.74099200963974, + "learning_rate": 0.00014518792944815127, + "loss": 2.6304, + "step": 7080 + }, + { + "epoch": 0.5714631587442499, + "grad_norm": 0.6673823595046997, + "learning_rate": 0.00014517384572372078, + "loss": 2.5903, + "step": 7081 + }, + { + "epoch": 0.5715438624808329, + "grad_norm": 0.6872609257698059, + "learning_rate": 0.00014515976087343997, + "loss": 2.6189, + "step": 7082 + }, + { + "epoch": 0.5716245662174159, + "grad_norm": 0.7363224625587463, + "learning_rate": 0.0001451456748976599, + "loss": 2.5845, + "step": 7083 + }, + { + "epoch": 0.5717052699539988, + "grad_norm": 0.7672157287597656, + "learning_rate": 0.00014513158779673157, + "loss": 2.6331, + "step": 7084 + }, + { + "epoch": 0.5717859736905819, + "grad_norm": 0.661195695400238, + "learning_rate": 0.00014511749957100612, + "loss": 2.5827, + "step": 7085 + }, + { + "epoch": 0.5718666774271649, + "grad_norm": 0.8034788370132446, + "learning_rate": 0.0001451034102208346, + "loss": 2.6209, + "step": 7086 + }, + { + "epoch": 0.5719473811637479, + "grad_norm": 0.7318302392959595, + "learning_rate": 0.00014508931974656822, + "loss": 2.5898, + "step": 7087 + }, + { + "epoch": 0.5720280849003309, + "grad_norm": 0.7334744930267334, + "learning_rate": 0.00014507522814855814, + "loss": 2.5893, + "step": 7088 + }, + { + "epoch": 0.5721087886369138, + "grad_norm": 0.783051609992981, + "learning_rate": 0.00014506113542715553, + "loss": 2.6284, + "step": 7089 + }, + { + "epoch": 0.5721894923734969, + "grad_norm": 0.7319497466087341, + "learning_rate": 0.00014504704158271165, + "loss": 2.5705, + "step": 7090 + }, + { + "epoch": 0.5722701961100799, + "grad_norm": 0.7886925935745239, + "learning_rate": 0.00014503294661557772, + "loss": 2.641, + "step": 7091 + }, + { + "epoch": 0.5723508998466629, + "grad_norm": 0.6882795691490173, + "learning_rate": 0.00014501885052610502, + "loss": 2.5714, + "step": 7092 + }, + { + "epoch": 0.5724316035832459, + "grad_norm": 0.7089235186576843, + "learning_rate": 0.00014500475331464494, + "loss": 2.6073, + "step": 7093 + }, + { + "epoch": 0.5725123073198289, + "grad_norm": 0.7261029481887817, + "learning_rate": 0.00014499065498154874, + "loss": 2.5595, + "step": 7094 + }, + { + "epoch": 0.5725930110564119, + "grad_norm": 0.7625105977058411, + "learning_rate": 0.0001449765555271678, + "loss": 2.5978, + "step": 7095 + }, + { + "epoch": 0.5726737147929949, + "grad_norm": 0.7853986024856567, + "learning_rate": 0.00014496245495185353, + "loss": 2.6378, + "step": 7096 + }, + { + "epoch": 0.5727544185295779, + "grad_norm": 0.8070923686027527, + "learning_rate": 0.00014494835325595736, + "loss": 2.7062, + "step": 7097 + }, + { + "epoch": 0.572835122266161, + "grad_norm": 0.7074965834617615, + "learning_rate": 0.00014493425043983073, + "loss": 2.5177, + "step": 7098 + }, + { + "epoch": 0.5729158260027439, + "grad_norm": 0.6890520453453064, + "learning_rate": 0.00014492014650382512, + "loss": 2.6058, + "step": 7099 + }, + { + "epoch": 0.5729965297393269, + "grad_norm": 0.6979860067367554, + "learning_rate": 0.00014490604144829202, + "loss": 2.5274, + "step": 7100 + }, + { + "epoch": 0.5730772334759099, + "grad_norm": 0.7972229719161987, + "learning_rate": 0.000144891935273583, + "loss": 2.6369, + "step": 7101 + }, + { + "epoch": 0.573157937212493, + "grad_norm": 0.6994345188140869, + "learning_rate": 0.0001448778279800496, + "loss": 2.5975, + "step": 7102 + }, + { + "epoch": 0.573238640949076, + "grad_norm": 0.7943929433822632, + "learning_rate": 0.0001448637195680434, + "loss": 2.6317, + "step": 7103 + }, + { + "epoch": 0.5733193446856589, + "grad_norm": 0.6975306272506714, + "learning_rate": 0.00014484961003791605, + "loss": 2.6264, + "step": 7104 + }, + { + "epoch": 0.5734000484222419, + "grad_norm": 0.6889060735702515, + "learning_rate": 0.00014483549939001917, + "loss": 2.5974, + "step": 7105 + }, + { + "epoch": 0.573480752158825, + "grad_norm": 0.7372777462005615, + "learning_rate": 0.00014482138762470444, + "loss": 2.5851, + "step": 7106 + }, + { + "epoch": 0.573561455895408, + "grad_norm": 0.7045157551765442, + "learning_rate": 0.00014480727474232362, + "loss": 2.6451, + "step": 7107 + }, + { + "epoch": 0.5736421596319909, + "grad_norm": 0.6974517107009888, + "learning_rate": 0.00014479316074322832, + "loss": 2.6796, + "step": 7108 + }, + { + "epoch": 0.5737228633685739, + "grad_norm": 0.7328097224235535, + "learning_rate": 0.00014477904562777038, + "loss": 2.5923, + "step": 7109 + }, + { + "epoch": 0.573803567105157, + "grad_norm": 0.7288877964019775, + "learning_rate": 0.0001447649293963016, + "loss": 2.6012, + "step": 7110 + }, + { + "epoch": 0.57388427084174, + "grad_norm": 0.7054389119148254, + "learning_rate": 0.00014475081204917372, + "loss": 2.6666, + "step": 7111 + }, + { + "epoch": 0.573964974578323, + "grad_norm": 0.7447949647903442, + "learning_rate": 0.00014473669358673865, + "loss": 2.6093, + "step": 7112 + }, + { + "epoch": 0.5740456783149059, + "grad_norm": 0.6431592106819153, + "learning_rate": 0.0001447225740093482, + "loss": 2.6242, + "step": 7113 + }, + { + "epoch": 0.574126382051489, + "grad_norm": 0.7096747756004333, + "learning_rate": 0.00014470845331735434, + "loss": 2.6297, + "step": 7114 + }, + { + "epoch": 0.574207085788072, + "grad_norm": 0.6918880939483643, + "learning_rate": 0.00014469433151110894, + "loss": 2.5849, + "step": 7115 + }, + { + "epoch": 0.574287789524655, + "grad_norm": 0.6617783308029175, + "learning_rate": 0.00014468020859096395, + "loss": 2.5972, + "step": 7116 + }, + { + "epoch": 0.574368493261238, + "grad_norm": 0.6525121927261353, + "learning_rate": 0.0001446660845572714, + "loss": 2.5888, + "step": 7117 + }, + { + "epoch": 0.574449196997821, + "grad_norm": 0.7024720907211304, + "learning_rate": 0.00014465195941038326, + "loss": 2.6135, + "step": 7118 + }, + { + "epoch": 0.574529900734404, + "grad_norm": 0.7660520672798157, + "learning_rate": 0.00014463783315065153, + "loss": 2.5837, + "step": 7119 + }, + { + "epoch": 0.574610604470987, + "grad_norm": 0.8206443190574646, + "learning_rate": 0.00014462370577842838, + "loss": 2.6749, + "step": 7120 + }, + { + "epoch": 0.57469130820757, + "grad_norm": 0.7176216840744019, + "learning_rate": 0.00014460957729406577, + "loss": 2.5814, + "step": 7121 + }, + { + "epoch": 0.5747720119441531, + "grad_norm": 0.7867588400840759, + "learning_rate": 0.0001445954476979159, + "loss": 2.5697, + "step": 7122 + }, + { + "epoch": 0.574852715680736, + "grad_norm": 0.7150471806526184, + "learning_rate": 0.0001445813169903309, + "loss": 2.5689, + "step": 7123 + }, + { + "epoch": 0.574933419417319, + "grad_norm": 0.7082479596138, + "learning_rate": 0.00014456718517166296, + "loss": 2.6081, + "step": 7124 + }, + { + "epoch": 0.575014123153902, + "grad_norm": 0.7207253575325012, + "learning_rate": 0.00014455305224226426, + "loss": 2.6573, + "step": 7125 + }, + { + "epoch": 0.5750948268904851, + "grad_norm": 0.7451751232147217, + "learning_rate": 0.00014453891820248704, + "loss": 2.6057, + "step": 7126 + }, + { + "epoch": 0.575175530627068, + "grad_norm": 0.7030230164527893, + "learning_rate": 0.0001445247830526835, + "loss": 2.6122, + "step": 7127 + }, + { + "epoch": 0.575256234363651, + "grad_norm": 0.7233754396438599, + "learning_rate": 0.00014451064679320605, + "loss": 2.5937, + "step": 7128 + }, + { + "epoch": 0.575336938100234, + "grad_norm": 0.6943942904472351, + "learning_rate": 0.0001444965094244069, + "loss": 2.6327, + "step": 7129 + }, + { + "epoch": 0.5754176418368171, + "grad_norm": 0.682056725025177, + "learning_rate": 0.00014448237094663843, + "loss": 2.6212, + "step": 7130 + }, + { + "epoch": 0.5754983455734001, + "grad_norm": 0.7424136400222778, + "learning_rate": 0.00014446823136025298, + "loss": 2.6031, + "step": 7131 + }, + { + "epoch": 0.575579049309983, + "grad_norm": 0.7464002370834351, + "learning_rate": 0.00014445409066560298, + "loss": 2.6363, + "step": 7132 + }, + { + "epoch": 0.575659753046566, + "grad_norm": 0.7137650847434998, + "learning_rate": 0.00014443994886304085, + "loss": 2.5343, + "step": 7133 + }, + { + "epoch": 0.5757404567831491, + "grad_norm": 0.6744158864021301, + "learning_rate": 0.00014442580595291901, + "loss": 2.6463, + "step": 7134 + }, + { + "epoch": 0.5758211605197321, + "grad_norm": 0.6947084069252014, + "learning_rate": 0.00014441166193558991, + "loss": 2.6074, + "step": 7135 + }, + { + "epoch": 0.5759018642563151, + "grad_norm": 0.6981585621833801, + "learning_rate": 0.00014439751681140616, + "loss": 2.6257, + "step": 7136 + }, + { + "epoch": 0.575982567992898, + "grad_norm": 0.6800102591514587, + "learning_rate": 0.00014438337058072023, + "loss": 2.6447, + "step": 7137 + }, + { + "epoch": 0.5760632717294811, + "grad_norm": 0.6952316164970398, + "learning_rate": 0.00014436922324388465, + "loss": 2.5739, + "step": 7138 + }, + { + "epoch": 0.5761439754660641, + "grad_norm": 0.709170937538147, + "learning_rate": 0.0001443550748012521, + "loss": 2.5918, + "step": 7139 + }, + { + "epoch": 0.5762246792026471, + "grad_norm": 0.7677363157272339, + "learning_rate": 0.00014434092525317512, + "loss": 2.6322, + "step": 7140 + }, + { + "epoch": 0.5763053829392301, + "grad_norm": 0.6730263233184814, + "learning_rate": 0.00014432677460000636, + "loss": 2.6764, + "step": 7141 + }, + { + "epoch": 0.576386086675813, + "grad_norm": 0.6782239675521851, + "learning_rate": 0.0001443126228420985, + "loss": 2.5208, + "step": 7142 + }, + { + "epoch": 0.5764667904123961, + "grad_norm": 0.7737600207328796, + "learning_rate": 0.00014429846997980424, + "loss": 2.6964, + "step": 7143 + }, + { + "epoch": 0.5765474941489791, + "grad_norm": 0.7456403374671936, + "learning_rate": 0.00014428431601347635, + "loss": 2.6163, + "step": 7144 + }, + { + "epoch": 0.5766281978855621, + "grad_norm": 0.7824606895446777, + "learning_rate": 0.00014427016094346754, + "loss": 2.6499, + "step": 7145 + }, + { + "epoch": 0.576708901622145, + "grad_norm": 0.7233635187149048, + "learning_rate": 0.00014425600477013055, + "loss": 2.6064, + "step": 7146 + }, + { + "epoch": 0.5767896053587281, + "grad_norm": 0.7008275389671326, + "learning_rate": 0.00014424184749381824, + "loss": 2.5585, + "step": 7147 + }, + { + "epoch": 0.5768703090953111, + "grad_norm": 0.6817710995674133, + "learning_rate": 0.00014422768911488346, + "loss": 2.6215, + "step": 7148 + }, + { + "epoch": 0.5769510128318941, + "grad_norm": 0.6860779523849487, + "learning_rate": 0.00014421352963367906, + "loss": 2.5877, + "step": 7149 + }, + { + "epoch": 0.5770317165684771, + "grad_norm": 0.732865035533905, + "learning_rate": 0.00014419936905055793, + "loss": 2.5704, + "step": 7150 + }, + { + "epoch": 0.5771124203050602, + "grad_norm": 0.6992458701133728, + "learning_rate": 0.00014418520736587297, + "loss": 2.6654, + "step": 7151 + }, + { + "epoch": 0.5771931240416431, + "grad_norm": 0.6865053176879883, + "learning_rate": 0.00014417104457997715, + "loss": 2.6389, + "step": 7152 + }, + { + "epoch": 0.5772738277782261, + "grad_norm": 0.7652727365493774, + "learning_rate": 0.00014415688069322345, + "loss": 2.6478, + "step": 7153 + }, + { + "epoch": 0.5773545315148091, + "grad_norm": 0.708692193031311, + "learning_rate": 0.0001441427157059648, + "loss": 2.6065, + "step": 7154 + }, + { + "epoch": 0.5774352352513922, + "grad_norm": 0.7549232244491577, + "learning_rate": 0.00014412854961855435, + "loss": 2.6484, + "step": 7155 + }, + { + "epoch": 0.5775159389879752, + "grad_norm": 0.6410655975341797, + "learning_rate": 0.00014411438243134506, + "loss": 2.6061, + "step": 7156 + }, + { + "epoch": 0.5775966427245581, + "grad_norm": 0.7711724042892456, + "learning_rate": 0.00014410021414469005, + "loss": 2.628, + "step": 7157 + }, + { + "epoch": 0.5776773464611411, + "grad_norm": 0.6723695993423462, + "learning_rate": 0.0001440860447589424, + "loss": 2.6214, + "step": 7158 + }, + { + "epoch": 0.5777580501977242, + "grad_norm": 0.7359206676483154, + "learning_rate": 0.0001440718742744553, + "loss": 2.6157, + "step": 7159 + }, + { + "epoch": 0.5778387539343072, + "grad_norm": 0.7320525050163269, + "learning_rate": 0.0001440577026915819, + "loss": 2.6081, + "step": 7160 + }, + { + "epoch": 0.5779194576708901, + "grad_norm": 0.7728561162948608, + "learning_rate": 0.00014404353001067535, + "loss": 2.5989, + "step": 7161 + }, + { + "epoch": 0.5780001614074731, + "grad_norm": 0.7380329370498657, + "learning_rate": 0.0001440293562320889, + "loss": 2.6337, + "step": 7162 + }, + { + "epoch": 0.5780808651440562, + "grad_norm": 0.667789876461029, + "learning_rate": 0.00014401518135617581, + "loss": 2.6324, + "step": 7163 + }, + { + "epoch": 0.5781615688806392, + "grad_norm": 0.6907219886779785, + "learning_rate": 0.00014400100538328935, + "loss": 2.5897, + "step": 7164 + }, + { + "epoch": 0.5782422726172222, + "grad_norm": 0.9051530957221985, + "learning_rate": 0.00014398682831378283, + "loss": 2.6895, + "step": 7165 + }, + { + "epoch": 0.5783229763538051, + "grad_norm": 0.7189533114433289, + "learning_rate": 0.00014397265014800956, + "loss": 2.5948, + "step": 7166 + }, + { + "epoch": 0.5784036800903882, + "grad_norm": 0.7003059983253479, + "learning_rate": 0.00014395847088632285, + "loss": 2.5814, + "step": 7167 + }, + { + "epoch": 0.5784843838269712, + "grad_norm": 0.8083534240722656, + "learning_rate": 0.0001439442905290762, + "loss": 2.6131, + "step": 7168 + }, + { + "epoch": 0.5785650875635542, + "grad_norm": 0.7068585157394409, + "learning_rate": 0.0001439301090766229, + "loss": 2.6027, + "step": 7169 + }, + { + "epoch": 0.5786457913001372, + "grad_norm": 0.7010494470596313, + "learning_rate": 0.00014391592652931653, + "loss": 2.5296, + "step": 7170 + }, + { + "epoch": 0.5787264950367202, + "grad_norm": 0.7577467560768127, + "learning_rate": 0.00014390174288751045, + "loss": 2.6347, + "step": 7171 + }, + { + "epoch": 0.5788071987733032, + "grad_norm": 0.643799364566803, + "learning_rate": 0.00014388755815155813, + "loss": 2.6152, + "step": 7172 + }, + { + "epoch": 0.5788879025098862, + "grad_norm": 0.740352988243103, + "learning_rate": 0.00014387337232181315, + "loss": 2.6123, + "step": 7173 + }, + { + "epoch": 0.5789686062464692, + "grad_norm": 0.7309309840202332, + "learning_rate": 0.00014385918539862907, + "loss": 2.6072, + "step": 7174 + }, + { + "epoch": 0.5790493099830523, + "grad_norm": 0.7237016558647156, + "learning_rate": 0.00014384499738235941, + "loss": 2.6375, + "step": 7175 + }, + { + "epoch": 0.5791300137196352, + "grad_norm": 0.6600970029830933, + "learning_rate": 0.00014383080827335784, + "loss": 2.5285, + "step": 7176 + }, + { + "epoch": 0.5792107174562182, + "grad_norm": 0.6822233200073242, + "learning_rate": 0.00014381661807197794, + "loss": 2.5497, + "step": 7177 + }, + { + "epoch": 0.5792914211928012, + "grad_norm": 0.6990383863449097, + "learning_rate": 0.00014380242677857337, + "loss": 2.6283, + "step": 7178 + }, + { + "epoch": 0.5793721249293843, + "grad_norm": 0.64422208070755, + "learning_rate": 0.00014378823439349783, + "loss": 2.5762, + "step": 7179 + }, + { + "epoch": 0.5794528286659673, + "grad_norm": 0.63804692029953, + "learning_rate": 0.00014377404091710501, + "loss": 2.5523, + "step": 7180 + }, + { + "epoch": 0.5795335324025502, + "grad_norm": 0.6978863477706909, + "learning_rate": 0.0001437598463497487, + "loss": 2.5089, + "step": 7181 + }, + { + "epoch": 0.5796142361391332, + "grad_norm": 0.7091087698936462, + "learning_rate": 0.00014374565069178257, + "loss": 2.7005, + "step": 7182 + }, + { + "epoch": 0.5796949398757163, + "grad_norm": 0.683659553527832, + "learning_rate": 0.00014373145394356053, + "loss": 2.5988, + "step": 7183 + }, + { + "epoch": 0.5797756436122993, + "grad_norm": 0.7352960705757141, + "learning_rate": 0.00014371725610543633, + "loss": 2.5671, + "step": 7184 + }, + { + "epoch": 0.5798563473488823, + "grad_norm": 0.6951913237571716, + "learning_rate": 0.00014370305717776382, + "loss": 2.5917, + "step": 7185 + }, + { + "epoch": 0.5799370510854652, + "grad_norm": 0.6644465923309326, + "learning_rate": 0.0001436888571608969, + "loss": 2.5954, + "step": 7186 + }, + { + "epoch": 0.5800177548220483, + "grad_norm": 0.7406458258628845, + "learning_rate": 0.00014367465605518942, + "loss": 2.6369, + "step": 7187 + }, + { + "epoch": 0.5800984585586313, + "grad_norm": 0.6724697351455688, + "learning_rate": 0.00014366045386099535, + "loss": 2.6227, + "step": 7188 + }, + { + "epoch": 0.5801791622952143, + "grad_norm": 0.6804977059364319, + "learning_rate": 0.00014364625057866867, + "loss": 2.6445, + "step": 7189 + }, + { + "epoch": 0.5802598660317972, + "grad_norm": 0.7020019888877869, + "learning_rate": 0.00014363204620856335, + "loss": 2.6733, + "step": 7190 + }, + { + "epoch": 0.5803405697683802, + "grad_norm": 0.6458491086959839, + "learning_rate": 0.00014361784075103332, + "loss": 2.572, + "step": 7191 + }, + { + "epoch": 0.5804212735049633, + "grad_norm": 0.7078056335449219, + "learning_rate": 0.00014360363420643272, + "loss": 2.7032, + "step": 7192 + }, + { + "epoch": 0.5805019772415463, + "grad_norm": 0.6367471814155579, + "learning_rate": 0.00014358942657511557, + "loss": 2.5369, + "step": 7193 + }, + { + "epoch": 0.5805826809781293, + "grad_norm": 0.7311955094337463, + "learning_rate": 0.00014357521785743596, + "loss": 2.6513, + "step": 7194 + }, + { + "epoch": 0.5806633847147122, + "grad_norm": 0.6957442164421082, + "learning_rate": 0.00014356100805374805, + "loss": 2.6512, + "step": 7195 + }, + { + "epoch": 0.5807440884512953, + "grad_norm": 0.7026693224906921, + "learning_rate": 0.0001435467971644059, + "loss": 2.6049, + "step": 7196 + }, + { + "epoch": 0.5808247921878783, + "grad_norm": 0.7337697744369507, + "learning_rate": 0.00014353258518976376, + "loss": 2.5516, + "step": 7197 + }, + { + "epoch": 0.5809054959244613, + "grad_norm": 0.6891856789588928, + "learning_rate": 0.00014351837213017577, + "loss": 2.5894, + "step": 7198 + }, + { + "epoch": 0.5809861996610443, + "grad_norm": 0.6710659265518188, + "learning_rate": 0.0001435041579859962, + "loss": 2.596, + "step": 7199 + }, + { + "epoch": 0.5810669033976273, + "grad_norm": 0.7637245059013367, + "learning_rate": 0.00014348994275757931, + "loss": 2.6278, + "step": 7200 + }, + { + "epoch": 0.5811476071342103, + "grad_norm": 0.7558664679527283, + "learning_rate": 0.00014347572644527934, + "loss": 2.6917, + "step": 7201 + }, + { + "epoch": 0.5812283108707933, + "grad_norm": 0.7254986763000488, + "learning_rate": 0.00014346150904945065, + "loss": 2.6161, + "step": 7202 + }, + { + "epoch": 0.5813090146073763, + "grad_norm": 0.7177211046218872, + "learning_rate": 0.00014344729057044753, + "loss": 2.555, + "step": 7203 + }, + { + "epoch": 0.5813897183439594, + "grad_norm": 0.6408729553222656, + "learning_rate": 0.00014343307100862432, + "loss": 2.6071, + "step": 7204 + }, + { + "epoch": 0.5814704220805423, + "grad_norm": 0.7399997711181641, + "learning_rate": 0.0001434188503643355, + "loss": 2.6013, + "step": 7205 + }, + { + "epoch": 0.5815511258171253, + "grad_norm": 0.7796236276626587, + "learning_rate": 0.00014340462863793543, + "loss": 2.603, + "step": 7206 + }, + { + "epoch": 0.5816318295537083, + "grad_norm": 0.7420137524604797, + "learning_rate": 0.00014339040582977855, + "loss": 2.5858, + "step": 7207 + }, + { + "epoch": 0.5817125332902914, + "grad_norm": 0.738042414188385, + "learning_rate": 0.00014337618194021928, + "loss": 2.592, + "step": 7208 + }, + { + "epoch": 0.5817932370268744, + "grad_norm": 0.6910614371299744, + "learning_rate": 0.00014336195696961222, + "loss": 2.6448, + "step": 7209 + }, + { + "epoch": 0.5818739407634573, + "grad_norm": 0.7838915586471558, + "learning_rate": 0.00014334773091831185, + "loss": 2.6257, + "step": 7210 + }, + { + "epoch": 0.5819546445000403, + "grad_norm": 0.7362141013145447, + "learning_rate": 0.0001433335037866727, + "loss": 2.6505, + "step": 7211 + }, + { + "epoch": 0.5820353482366234, + "grad_norm": 0.6892269253730774, + "learning_rate": 0.00014331927557504934, + "loss": 2.6518, + "step": 7212 + }, + { + "epoch": 0.5821160519732064, + "grad_norm": 0.7444556951522827, + "learning_rate": 0.0001433050462837964, + "loss": 2.6785, + "step": 7213 + }, + { + "epoch": 0.5821967557097893, + "grad_norm": 0.6948450207710266, + "learning_rate": 0.00014329081591326853, + "loss": 2.5753, + "step": 7214 + }, + { + "epoch": 0.5822774594463723, + "grad_norm": 0.713741660118103, + "learning_rate": 0.00014327658446382032, + "loss": 2.6425, + "step": 7215 + }, + { + "epoch": 0.5823581631829554, + "grad_norm": 0.7352245450019836, + "learning_rate": 0.00014326235193580657, + "loss": 2.6859, + "step": 7216 + }, + { + "epoch": 0.5824388669195384, + "grad_norm": 0.7151867151260376, + "learning_rate": 0.00014324811832958187, + "loss": 2.6106, + "step": 7217 + }, + { + "epoch": 0.5825195706561214, + "grad_norm": 0.7003469467163086, + "learning_rate": 0.000143233883645501, + "loss": 2.618, + "step": 7218 + }, + { + "epoch": 0.5826002743927043, + "grad_norm": 0.7139034867286682, + "learning_rate": 0.00014321964788391878, + "loss": 2.5772, + "step": 7219 + }, + { + "epoch": 0.5826809781292874, + "grad_norm": 0.6368305683135986, + "learning_rate": 0.00014320541104518992, + "loss": 2.5259, + "step": 7220 + }, + { + "epoch": 0.5827616818658704, + "grad_norm": 0.6921548247337341, + "learning_rate": 0.0001431911731296693, + "loss": 2.6403, + "step": 7221 + }, + { + "epoch": 0.5828423856024534, + "grad_norm": 0.6995570659637451, + "learning_rate": 0.00014317693413771175, + "loss": 2.6172, + "step": 7222 + }, + { + "epoch": 0.5829230893390364, + "grad_norm": 0.7557246088981628, + "learning_rate": 0.0001431626940696721, + "loss": 2.6347, + "step": 7223 + }, + { + "epoch": 0.5830037930756194, + "grad_norm": 0.6912205219268799, + "learning_rate": 0.00014314845292590528, + "loss": 2.5958, + "step": 7224 + }, + { + "epoch": 0.5830844968122024, + "grad_norm": 0.6896184682846069, + "learning_rate": 0.00014313421070676625, + "loss": 2.569, + "step": 7225 + }, + { + "epoch": 0.5831652005487854, + "grad_norm": 0.6900814771652222, + "learning_rate": 0.00014311996741260994, + "loss": 2.5466, + "step": 7226 + }, + { + "epoch": 0.5832459042853684, + "grad_norm": 0.7319771647453308, + "learning_rate": 0.00014310572304379132, + "loss": 2.6181, + "step": 7227 + }, + { + "epoch": 0.5833266080219515, + "grad_norm": 0.728138267993927, + "learning_rate": 0.0001430914776006654, + "loss": 2.6644, + "step": 7228 + }, + { + "epoch": 0.5834073117585344, + "grad_norm": 0.7361802458763123, + "learning_rate": 0.0001430772310835872, + "loss": 2.6079, + "step": 7229 + }, + { + "epoch": 0.5834880154951174, + "grad_norm": 0.6893376708030701, + "learning_rate": 0.00014306298349291182, + "loss": 2.5615, + "step": 7230 + }, + { + "epoch": 0.5835687192317004, + "grad_norm": 0.6661401987075806, + "learning_rate": 0.00014304873482899431, + "loss": 2.6028, + "step": 7231 + }, + { + "epoch": 0.5836494229682835, + "grad_norm": 0.6571504473686218, + "learning_rate": 0.0001430344850921898, + "loss": 2.5553, + "step": 7232 + }, + { + "epoch": 0.5837301267048665, + "grad_norm": 0.6878423690795898, + "learning_rate": 0.00014302023428285342, + "loss": 2.5336, + "step": 7233 + }, + { + "epoch": 0.5838108304414494, + "grad_norm": 0.768117368221283, + "learning_rate": 0.00014300598240134035, + "loss": 2.6036, + "step": 7234 + }, + { + "epoch": 0.5838915341780324, + "grad_norm": 0.6876625418663025, + "learning_rate": 0.0001429917294480058, + "loss": 2.6314, + "step": 7235 + }, + { + "epoch": 0.5839722379146155, + "grad_norm": 0.7146790027618408, + "learning_rate": 0.00014297747542320495, + "loss": 2.6029, + "step": 7236 + }, + { + "epoch": 0.5840529416511985, + "grad_norm": 0.7032392024993896, + "learning_rate": 0.00014296322032729308, + "loss": 2.6163, + "step": 7237 + }, + { + "epoch": 0.5841336453877815, + "grad_norm": 0.7323551177978516, + "learning_rate": 0.00014294896416062544, + "loss": 2.6706, + "step": 7238 + }, + { + "epoch": 0.5842143491243644, + "grad_norm": 0.7647258639335632, + "learning_rate": 0.00014293470692355734, + "loss": 2.6744, + "step": 7239 + }, + { + "epoch": 0.5842950528609475, + "grad_norm": 0.6824506521224976, + "learning_rate": 0.00014292044861644414, + "loss": 2.579, + "step": 7240 + }, + { + "epoch": 0.5843757565975305, + "grad_norm": 0.7553619742393494, + "learning_rate": 0.00014290618923964115, + "loss": 2.6196, + "step": 7241 + }, + { + "epoch": 0.5844564603341135, + "grad_norm": 0.6872109770774841, + "learning_rate": 0.00014289192879350375, + "loss": 2.555, + "step": 7242 + }, + { + "epoch": 0.5845371640706964, + "grad_norm": 0.664658784866333, + "learning_rate": 0.00014287766727838735, + "loss": 2.5781, + "step": 7243 + }, + { + "epoch": 0.5846178678072794, + "grad_norm": 0.6709543466567993, + "learning_rate": 0.00014286340469464744, + "loss": 2.6022, + "step": 7244 + }, + { + "epoch": 0.5846985715438625, + "grad_norm": 0.7236210107803345, + "learning_rate": 0.00014284914104263941, + "loss": 2.5609, + "step": 7245 + }, + { + "epoch": 0.5847792752804455, + "grad_norm": 0.6751740574836731, + "learning_rate": 0.0001428348763227188, + "loss": 2.5792, + "step": 7246 + }, + { + "epoch": 0.5848599790170285, + "grad_norm": 0.6684607267379761, + "learning_rate": 0.0001428206105352411, + "loss": 2.5705, + "step": 7247 + }, + { + "epoch": 0.5849406827536114, + "grad_norm": 0.6876732707023621, + "learning_rate": 0.00014280634368056186, + "loss": 2.6576, + "step": 7248 + }, + { + "epoch": 0.5850213864901945, + "grad_norm": 0.758637547492981, + "learning_rate": 0.0001427920757590366, + "loss": 2.6215, + "step": 7249 + }, + { + "epoch": 0.5851020902267775, + "grad_norm": 0.6839025020599365, + "learning_rate": 0.00014277780677102097, + "loss": 2.5898, + "step": 7250 + }, + { + "epoch": 0.5851827939633605, + "grad_norm": 0.6912671327590942, + "learning_rate": 0.00014276353671687056, + "loss": 2.5879, + "step": 7251 + }, + { + "epoch": 0.5852634976999435, + "grad_norm": 0.6727048754692078, + "learning_rate": 0.00014274926559694107, + "loss": 2.5501, + "step": 7252 + }, + { + "epoch": 0.5853442014365265, + "grad_norm": 0.7031945586204529, + "learning_rate": 0.00014273499341158812, + "loss": 2.625, + "step": 7253 + }, + { + "epoch": 0.5854249051731095, + "grad_norm": 0.6886943578720093, + "learning_rate": 0.0001427207201611674, + "loss": 2.6141, + "step": 7254 + }, + { + "epoch": 0.5855056089096925, + "grad_norm": 0.7906915545463562, + "learning_rate": 0.00014270644584603466, + "loss": 2.7189, + "step": 7255 + }, + { + "epoch": 0.5855863126462755, + "grad_norm": 0.6873704195022583, + "learning_rate": 0.00014269217046654567, + "loss": 2.6031, + "step": 7256 + }, + { + "epoch": 0.5856670163828586, + "grad_norm": 0.6655381321907043, + "learning_rate": 0.00014267789402305618, + "loss": 2.5747, + "step": 7257 + }, + { + "epoch": 0.5857477201194415, + "grad_norm": 0.6655673384666443, + "learning_rate": 0.00014266361651592204, + "loss": 2.625, + "step": 7258 + }, + { + "epoch": 0.5858284238560245, + "grad_norm": 0.6752866506576538, + "learning_rate": 0.00014264933794549901, + "loss": 2.5914, + "step": 7259 + }, + { + "epoch": 0.5859091275926075, + "grad_norm": 0.6680975556373596, + "learning_rate": 0.00014263505831214302, + "loss": 2.5572, + "step": 7260 + }, + { + "epoch": 0.5859898313291906, + "grad_norm": 0.6873607039451599, + "learning_rate": 0.00014262077761620994, + "loss": 2.6696, + "step": 7261 + }, + { + "epoch": 0.5860705350657736, + "grad_norm": 0.6745384335517883, + "learning_rate": 0.00014260649585805566, + "loss": 2.5738, + "step": 7262 + }, + { + "epoch": 0.5861512388023565, + "grad_norm": 0.6524637937545776, + "learning_rate": 0.0001425922130380361, + "loss": 2.6209, + "step": 7263 + }, + { + "epoch": 0.5862319425389395, + "grad_norm": 0.6729850172996521, + "learning_rate": 0.00014257792915650728, + "loss": 2.652, + "step": 7264 + }, + { + "epoch": 0.5863126462755226, + "grad_norm": 0.6713503003120422, + "learning_rate": 0.00014256364421382514, + "loss": 2.5658, + "step": 7265 + }, + { + "epoch": 0.5863933500121056, + "grad_norm": 0.6835616827011108, + "learning_rate": 0.00014254935821034575, + "loss": 2.5535, + "step": 7266 + }, + { + "epoch": 0.5864740537486886, + "grad_norm": 0.7425376176834106, + "learning_rate": 0.00014253507114642515, + "loss": 2.6369, + "step": 7267 + }, + { + "epoch": 0.5865547574852715, + "grad_norm": 0.6788069605827332, + "learning_rate": 0.00014252078302241932, + "loss": 2.601, + "step": 7268 + }, + { + "epoch": 0.5866354612218546, + "grad_norm": 0.6828538179397583, + "learning_rate": 0.0001425064938386845, + "loss": 2.5861, + "step": 7269 + }, + { + "epoch": 0.5867161649584376, + "grad_norm": 0.6763372421264648, + "learning_rate": 0.0001424922035955767, + "loss": 2.6035, + "step": 7270 + }, + { + "epoch": 0.5867968686950206, + "grad_norm": 0.6517930626869202, + "learning_rate": 0.0001424779122934521, + "loss": 2.5564, + "step": 7271 + }, + { + "epoch": 0.5868775724316035, + "grad_norm": 0.6633113622665405, + "learning_rate": 0.00014246361993266692, + "loss": 2.6163, + "step": 7272 + }, + { + "epoch": 0.5869582761681866, + "grad_norm": 0.684822678565979, + "learning_rate": 0.00014244932651357733, + "loss": 2.6057, + "step": 7273 + }, + { + "epoch": 0.5870389799047696, + "grad_norm": 0.7679704427719116, + "learning_rate": 0.00014243503203653952, + "loss": 2.6522, + "step": 7274 + }, + { + "epoch": 0.5871196836413526, + "grad_norm": 0.6834188103675842, + "learning_rate": 0.00014242073650190984, + "loss": 2.652, + "step": 7275 + }, + { + "epoch": 0.5872003873779356, + "grad_norm": 0.6903846859931946, + "learning_rate": 0.00014240643991004449, + "loss": 2.5894, + "step": 7276 + }, + { + "epoch": 0.5872810911145186, + "grad_norm": 0.7060866951942444, + "learning_rate": 0.0001423921422612998, + "loss": 2.5994, + "step": 7277 + }, + { + "epoch": 0.5873617948511016, + "grad_norm": 0.6646741628646851, + "learning_rate": 0.0001423778435560321, + "loss": 2.6432, + "step": 7278 + }, + { + "epoch": 0.5874424985876846, + "grad_norm": 0.6930218935012817, + "learning_rate": 0.0001423635437945978, + "loss": 2.6233, + "step": 7279 + }, + { + "epoch": 0.5875232023242676, + "grad_norm": 0.6914143562316895, + "learning_rate": 0.00014234924297735322, + "loss": 2.6143, + "step": 7280 + }, + { + "epoch": 0.5876039060608507, + "grad_norm": 0.7351366281509399, + "learning_rate": 0.0001423349411046548, + "loss": 2.6323, + "step": 7281 + }, + { + "epoch": 0.5876846097974336, + "grad_norm": 0.6813770532608032, + "learning_rate": 0.000142320638176859, + "loss": 2.5964, + "step": 7282 + }, + { + "epoch": 0.5877653135340166, + "grad_norm": 0.7049702405929565, + "learning_rate": 0.00014230633419432226, + "loss": 2.6284, + "step": 7283 + }, + { + "epoch": 0.5878460172705996, + "grad_norm": 0.7140446901321411, + "learning_rate": 0.00014229202915740107, + "loss": 2.6113, + "step": 7284 + }, + { + "epoch": 0.5879267210071827, + "grad_norm": 0.696588933467865, + "learning_rate": 0.00014227772306645196, + "loss": 2.6384, + "step": 7285 + }, + { + "epoch": 0.5880074247437657, + "grad_norm": 0.6800615787506104, + "learning_rate": 0.0001422634159218315, + "loss": 2.5743, + "step": 7286 + }, + { + "epoch": 0.5880881284803486, + "grad_norm": 0.7586596608161926, + "learning_rate": 0.00014224910772389624, + "loss": 2.6504, + "step": 7287 + }, + { + "epoch": 0.5881688322169316, + "grad_norm": 0.73286372423172, + "learning_rate": 0.00014223479847300278, + "loss": 2.6026, + "step": 7288 + }, + { + "epoch": 0.5882495359535147, + "grad_norm": 0.6808766722679138, + "learning_rate": 0.00014222048816950772, + "loss": 2.5822, + "step": 7289 + }, + { + "epoch": 0.5883302396900977, + "grad_norm": 0.7424919009208679, + "learning_rate": 0.0001422061768137677, + "loss": 2.6474, + "step": 7290 + }, + { + "epoch": 0.5884109434266807, + "grad_norm": 0.658183753490448, + "learning_rate": 0.00014219186440613948, + "loss": 2.6051, + "step": 7291 + }, + { + "epoch": 0.5884916471632636, + "grad_norm": 0.6693006157875061, + "learning_rate": 0.0001421775509469797, + "loss": 2.5774, + "step": 7292 + }, + { + "epoch": 0.5885723508998466, + "grad_norm": 0.7298646569252014, + "learning_rate": 0.00014216323643664508, + "loss": 2.5688, + "step": 7293 + }, + { + "epoch": 0.5886530546364297, + "grad_norm": 0.6665881276130676, + "learning_rate": 0.00014214892087549238, + "loss": 2.608, + "step": 7294 + }, + { + "epoch": 0.5887337583730127, + "grad_norm": 0.7220060229301453, + "learning_rate": 0.00014213460426387841, + "loss": 2.6078, + "step": 7295 + }, + { + "epoch": 0.5888144621095956, + "grad_norm": 0.6693970561027527, + "learning_rate": 0.00014212028660215997, + "loss": 2.597, + "step": 7296 + }, + { + "epoch": 0.5888951658461786, + "grad_norm": 0.682331919670105, + "learning_rate": 0.00014210596789069387, + "loss": 2.5752, + "step": 7297 + }, + { + "epoch": 0.5889758695827617, + "grad_norm": 0.7586890459060669, + "learning_rate": 0.000142091648129837, + "loss": 2.6878, + "step": 7298 + }, + { + "epoch": 0.5890565733193447, + "grad_norm": 0.6740901470184326, + "learning_rate": 0.00014207732731994624, + "loss": 2.6083, + "step": 7299 + }, + { + "epoch": 0.5891372770559277, + "grad_norm": 0.6959021091461182, + "learning_rate": 0.00014206300546137842, + "loss": 2.5765, + "step": 7300 + }, + { + "epoch": 0.5892179807925106, + "grad_norm": 0.7446078658103943, + "learning_rate": 0.0001420486825544906, + "loss": 2.662, + "step": 7301 + }, + { + "epoch": 0.5892986845290937, + "grad_norm": 0.7418847680091858, + "learning_rate": 0.0001420343585996397, + "loss": 2.6606, + "step": 7302 + }, + { + "epoch": 0.5893793882656767, + "grad_norm": 0.7185709476470947, + "learning_rate": 0.00014202003359718273, + "loss": 2.563, + "step": 7303 + }, + { + "epoch": 0.5894600920022597, + "grad_norm": 0.6960515379905701, + "learning_rate": 0.00014200570754747664, + "loss": 2.6182, + "step": 7304 + }, + { + "epoch": 0.5895407957388427, + "grad_norm": 0.6589705348014832, + "learning_rate": 0.00014199138045087849, + "loss": 2.6714, + "step": 7305 + }, + { + "epoch": 0.5896214994754257, + "grad_norm": 0.7027507424354553, + "learning_rate": 0.00014197705230774543, + "loss": 2.6145, + "step": 7306 + }, + { + "epoch": 0.5897022032120087, + "grad_norm": 0.6761246919631958, + "learning_rate": 0.00014196272311843447, + "loss": 2.5688, + "step": 7307 + }, + { + "epoch": 0.5897829069485917, + "grad_norm": 0.6618059277534485, + "learning_rate": 0.00014194839288330277, + "loss": 2.6194, + "step": 7308 + }, + { + "epoch": 0.5898636106851747, + "grad_norm": 0.7182614803314209, + "learning_rate": 0.00014193406160270747, + "loss": 2.5452, + "step": 7309 + }, + { + "epoch": 0.5899443144217578, + "grad_norm": 0.6830565333366394, + "learning_rate": 0.0001419197292770057, + "loss": 2.5728, + "step": 7310 + }, + { + "epoch": 0.5900250181583407, + "grad_norm": 0.6744499802589417, + "learning_rate": 0.00014190539590655475, + "loss": 2.5736, + "step": 7311 + }, + { + "epoch": 0.5901057218949237, + "grad_norm": 0.7177874445915222, + "learning_rate": 0.00014189106149171176, + "loss": 2.6271, + "step": 7312 + }, + { + "epoch": 0.5901864256315067, + "grad_norm": 0.6770105361938477, + "learning_rate": 0.000141876726032834, + "loss": 2.5924, + "step": 7313 + }, + { + "epoch": 0.5902671293680898, + "grad_norm": 0.7295818328857422, + "learning_rate": 0.0001418623895302788, + "loss": 2.644, + "step": 7314 + }, + { + "epoch": 0.5903478331046728, + "grad_norm": 0.7244859933853149, + "learning_rate": 0.00014184805198440338, + "loss": 2.5892, + "step": 7315 + }, + { + "epoch": 0.5904285368412557, + "grad_norm": 0.7067728638648987, + "learning_rate": 0.00014183371339556512, + "loss": 2.5985, + "step": 7316 + }, + { + "epoch": 0.5905092405778387, + "grad_norm": 0.6732490062713623, + "learning_rate": 0.0001418193737641214, + "loss": 2.5771, + "step": 7317 + }, + { + "epoch": 0.5905899443144218, + "grad_norm": 0.7087544202804565, + "learning_rate": 0.00014180503309042957, + "loss": 2.6373, + "step": 7318 + }, + { + "epoch": 0.5906706480510048, + "grad_norm": 0.772174596786499, + "learning_rate": 0.00014179069137484703, + "loss": 2.6262, + "step": 7319 + }, + { + "epoch": 0.5907513517875878, + "grad_norm": 0.6855718493461609, + "learning_rate": 0.00014177634861773118, + "loss": 2.6268, + "step": 7320 + }, + { + "epoch": 0.5908320555241707, + "grad_norm": 0.7168720364570618, + "learning_rate": 0.00014176200481943953, + "loss": 2.5892, + "step": 7321 + }, + { + "epoch": 0.5909127592607538, + "grad_norm": 0.7126333713531494, + "learning_rate": 0.0001417476599803296, + "loss": 2.6079, + "step": 7322 + }, + { + "epoch": 0.5909934629973368, + "grad_norm": 0.7451913952827454, + "learning_rate": 0.0001417333141007588, + "loss": 2.635, + "step": 7323 + }, + { + "epoch": 0.5910741667339198, + "grad_norm": 0.7405436038970947, + "learning_rate": 0.00014171896718108475, + "loss": 2.6014, + "step": 7324 + }, + { + "epoch": 0.5911548704705027, + "grad_norm": 0.7583999037742615, + "learning_rate": 0.00014170461922166498, + "loss": 2.6815, + "step": 7325 + }, + { + "epoch": 0.5912355742070858, + "grad_norm": 0.6653509140014648, + "learning_rate": 0.00014169027022285706, + "loss": 2.6153, + "step": 7326 + }, + { + "epoch": 0.5913162779436688, + "grad_norm": 0.7145548462867737, + "learning_rate": 0.00014167592018501864, + "loss": 2.6022, + "step": 7327 + }, + { + "epoch": 0.5913969816802518, + "grad_norm": 0.6996089816093445, + "learning_rate": 0.00014166156910850737, + "loss": 2.6586, + "step": 7328 + }, + { + "epoch": 0.5914776854168348, + "grad_norm": 0.735653281211853, + "learning_rate": 0.0001416472169936809, + "loss": 2.6084, + "step": 7329 + }, + { + "epoch": 0.5915583891534179, + "grad_norm": 0.695036768913269, + "learning_rate": 0.00014163286384089686, + "loss": 2.5058, + "step": 7330 + }, + { + "epoch": 0.5916390928900008, + "grad_norm": 0.9014756679534912, + "learning_rate": 0.00014161850965051307, + "loss": 2.5991, + "step": 7331 + }, + { + "epoch": 0.5917197966265838, + "grad_norm": 0.7079846858978271, + "learning_rate": 0.0001416041544228872, + "loss": 2.6067, + "step": 7332 + }, + { + "epoch": 0.5918005003631668, + "grad_norm": 0.7681204080581665, + "learning_rate": 0.00014158979815837705, + "loss": 2.5414, + "step": 7333 + }, + { + "epoch": 0.5918812040997499, + "grad_norm": 0.6501670479774475, + "learning_rate": 0.00014157544085734042, + "loss": 2.617, + "step": 7334 + }, + { + "epoch": 0.5919619078363328, + "grad_norm": 0.7573496103286743, + "learning_rate": 0.00014156108252013513, + "loss": 2.6341, + "step": 7335 + }, + { + "epoch": 0.5920426115729158, + "grad_norm": 0.6865558624267578, + "learning_rate": 0.00014154672314711903, + "loss": 2.6229, + "step": 7336 + }, + { + "epoch": 0.5921233153094988, + "grad_norm": 0.6859166622161865, + "learning_rate": 0.00014153236273864995, + "loss": 2.6149, + "step": 7337 + }, + { + "epoch": 0.5922040190460819, + "grad_norm": 0.7603647112846375, + "learning_rate": 0.00014151800129508585, + "loss": 2.5645, + "step": 7338 + }, + { + "epoch": 0.5922847227826649, + "grad_norm": 0.6740217208862305, + "learning_rate": 0.00014150363881678464, + "loss": 2.5883, + "step": 7339 + }, + { + "epoch": 0.5923654265192478, + "grad_norm": 0.6412263512611389, + "learning_rate": 0.00014148927530410426, + "loss": 2.576, + "step": 7340 + }, + { + "epoch": 0.5924461302558308, + "grad_norm": 0.669834315776825, + "learning_rate": 0.00014147491075740265, + "loss": 2.542, + "step": 7341 + }, + { + "epoch": 0.5925268339924139, + "grad_norm": 0.720024049282074, + "learning_rate": 0.00014146054517703786, + "loss": 2.6491, + "step": 7342 + }, + { + "epoch": 0.5926075377289969, + "grad_norm": 0.7191612720489502, + "learning_rate": 0.00014144617856336794, + "loss": 2.5933, + "step": 7343 + }, + { + "epoch": 0.5926882414655799, + "grad_norm": 0.7012050747871399, + "learning_rate": 0.00014143181091675087, + "loss": 2.5253, + "step": 7344 + }, + { + "epoch": 0.5927689452021628, + "grad_norm": 0.7825081944465637, + "learning_rate": 0.00014141744223754478, + "loss": 2.6225, + "step": 7345 + }, + { + "epoch": 0.5928496489387458, + "grad_norm": 0.6699295043945312, + "learning_rate": 0.00014140307252610775, + "loss": 2.5893, + "step": 7346 + }, + { + "epoch": 0.5929303526753289, + "grad_norm": 0.6668846011161804, + "learning_rate": 0.00014138870178279794, + "loss": 2.5944, + "step": 7347 + }, + { + "epoch": 0.5930110564119119, + "grad_norm": 0.7681072950363159, + "learning_rate": 0.0001413743300079735, + "loss": 2.5715, + "step": 7348 + }, + { + "epoch": 0.5930917601484949, + "grad_norm": 0.653075635433197, + "learning_rate": 0.00014135995720199258, + "loss": 2.5924, + "step": 7349 + }, + { + "epoch": 0.5931724638850778, + "grad_norm": 0.6807504892349243, + "learning_rate": 0.00014134558336521342, + "loss": 2.5395, + "step": 7350 + }, + { + "epoch": 0.5932531676216609, + "grad_norm": 0.681175708770752, + "learning_rate": 0.00014133120849799423, + "loss": 2.5401, + "step": 7351 + }, + { + "epoch": 0.5933338713582439, + "grad_norm": 0.7159900665283203, + "learning_rate": 0.0001413168326006933, + "loss": 2.5684, + "step": 7352 + }, + { + "epoch": 0.5934145750948269, + "grad_norm": 0.6517181992530823, + "learning_rate": 0.00014130245567366888, + "loss": 2.5887, + "step": 7353 + }, + { + "epoch": 0.5934952788314098, + "grad_norm": 0.6982731223106384, + "learning_rate": 0.00014128807771727936, + "loss": 2.5707, + "step": 7354 + }, + { + "epoch": 0.5935759825679929, + "grad_norm": 0.7003650069236755, + "learning_rate": 0.00014127369873188296, + "loss": 2.6415, + "step": 7355 + }, + { + "epoch": 0.5936566863045759, + "grad_norm": 0.7408339977264404, + "learning_rate": 0.0001412593187178381, + "loss": 2.5655, + "step": 7356 + }, + { + "epoch": 0.5937373900411589, + "grad_norm": 0.717218279838562, + "learning_rate": 0.00014124493767550317, + "loss": 2.586, + "step": 7357 + }, + { + "epoch": 0.5938180937777419, + "grad_norm": 0.6723458766937256, + "learning_rate": 0.00014123055560523657, + "loss": 2.593, + "step": 7358 + }, + { + "epoch": 0.593898797514325, + "grad_norm": 0.6861262321472168, + "learning_rate": 0.00014121617250739677, + "loss": 2.612, + "step": 7359 + }, + { + "epoch": 0.5939795012509079, + "grad_norm": 0.6811453104019165, + "learning_rate": 0.00014120178838234222, + "loss": 2.5708, + "step": 7360 + }, + { + "epoch": 0.5940602049874909, + "grad_norm": 0.6249656677246094, + "learning_rate": 0.00014118740323043136, + "loss": 2.5604, + "step": 7361 + }, + { + "epoch": 0.5941409087240739, + "grad_norm": 0.7671588659286499, + "learning_rate": 0.00014117301705202274, + "loss": 2.547, + "step": 7362 + }, + { + "epoch": 0.594221612460657, + "grad_norm": 0.6856057643890381, + "learning_rate": 0.00014115862984747496, + "loss": 2.6108, + "step": 7363 + }, + { + "epoch": 0.5943023161972399, + "grad_norm": 0.692331850528717, + "learning_rate": 0.0001411442416171465, + "loss": 2.6347, + "step": 7364 + }, + { + "epoch": 0.5943830199338229, + "grad_norm": 0.7256516814231873, + "learning_rate": 0.000141129852361396, + "loss": 2.6098, + "step": 7365 + }, + { + "epoch": 0.5944637236704059, + "grad_norm": 0.7522590160369873, + "learning_rate": 0.00014111546208058203, + "loss": 2.5688, + "step": 7366 + }, + { + "epoch": 0.594544427406989, + "grad_norm": 0.6915806531906128, + "learning_rate": 0.0001411010707750633, + "loss": 2.5899, + "step": 7367 + }, + { + "epoch": 0.594625131143572, + "grad_norm": 0.7355465292930603, + "learning_rate": 0.00014108667844519844, + "loss": 2.5212, + "step": 7368 + }, + { + "epoch": 0.5947058348801549, + "grad_norm": 0.731002926826477, + "learning_rate": 0.00014107228509134615, + "loss": 2.6369, + "step": 7369 + }, + { + "epoch": 0.5947865386167379, + "grad_norm": 0.6764423251152039, + "learning_rate": 0.0001410578907138652, + "loss": 2.6012, + "step": 7370 + }, + { + "epoch": 0.594867242353321, + "grad_norm": 0.7466071844100952, + "learning_rate": 0.0001410434953131142, + "loss": 2.5822, + "step": 7371 + }, + { + "epoch": 0.594947946089904, + "grad_norm": 0.7276137471199036, + "learning_rate": 0.00014102909888945205, + "loss": 2.6055, + "step": 7372 + }, + { + "epoch": 0.595028649826487, + "grad_norm": 0.7411746978759766, + "learning_rate": 0.00014101470144323752, + "loss": 2.6489, + "step": 7373 + }, + { + "epoch": 0.5951093535630699, + "grad_norm": 0.7511908411979675, + "learning_rate": 0.0001410003029748294, + "loss": 2.6268, + "step": 7374 + }, + { + "epoch": 0.595190057299653, + "grad_norm": 0.6623562574386597, + "learning_rate": 0.0001409859034845866, + "loss": 2.58, + "step": 7375 + }, + { + "epoch": 0.595270761036236, + "grad_norm": 0.6948572397232056, + "learning_rate": 0.00014097150297286785, + "loss": 2.5811, + "step": 7376 + }, + { + "epoch": 0.595351464772819, + "grad_norm": 0.6836786270141602, + "learning_rate": 0.0001409571014400322, + "loss": 2.5861, + "step": 7377 + }, + { + "epoch": 0.595432168509402, + "grad_norm": 0.6644341945648193, + "learning_rate": 0.00014094269888643854, + "loss": 2.6339, + "step": 7378 + }, + { + "epoch": 0.595512872245985, + "grad_norm": 0.6434289813041687, + "learning_rate": 0.0001409282953124458, + "loss": 2.4897, + "step": 7379 + }, + { + "epoch": 0.595593575982568, + "grad_norm": 0.6745082139968872, + "learning_rate": 0.0001409138907184129, + "loss": 2.522, + "step": 7380 + }, + { + "epoch": 0.595674279719151, + "grad_norm": 0.725321352481842, + "learning_rate": 0.0001408994851046989, + "loss": 2.5711, + "step": 7381 + }, + { + "epoch": 0.595754983455734, + "grad_norm": 0.7485500574111938, + "learning_rate": 0.00014088507847166283, + "loss": 2.6095, + "step": 7382 + }, + { + "epoch": 0.595835687192317, + "grad_norm": 0.721125602722168, + "learning_rate": 0.00014087067081966376, + "loss": 2.6762, + "step": 7383 + }, + { + "epoch": 0.5959163909289, + "grad_norm": 0.7099901437759399, + "learning_rate": 0.00014085626214906073, + "loss": 2.5667, + "step": 7384 + }, + { + "epoch": 0.595997094665483, + "grad_norm": 0.6889060139656067, + "learning_rate": 0.00014084185246021283, + "loss": 2.6723, + "step": 7385 + }, + { + "epoch": 0.596077798402066, + "grad_norm": 0.735698938369751, + "learning_rate": 0.00014082744175347923, + "loss": 2.6434, + "step": 7386 + }, + { + "epoch": 0.5961585021386491, + "grad_norm": 0.7603070735931396, + "learning_rate": 0.00014081303002921902, + "loss": 2.665, + "step": 7387 + }, + { + "epoch": 0.596239205875232, + "grad_norm": 0.6786355376243591, + "learning_rate": 0.00014079861728779141, + "loss": 2.5842, + "step": 7388 + }, + { + "epoch": 0.596319909611815, + "grad_norm": 0.6693331003189087, + "learning_rate": 0.00014078420352955565, + "loss": 2.6211, + "step": 7389 + }, + { + "epoch": 0.596400613348398, + "grad_norm": 0.74013751745224, + "learning_rate": 0.0001407697887548709, + "loss": 2.5886, + "step": 7390 + }, + { + "epoch": 0.5964813170849811, + "grad_norm": 0.739507257938385, + "learning_rate": 0.00014075537296409646, + "loss": 2.607, + "step": 7391 + }, + { + "epoch": 0.5965620208215641, + "grad_norm": 0.7121848464012146, + "learning_rate": 0.00014074095615759156, + "loss": 2.6052, + "step": 7392 + }, + { + "epoch": 0.596642724558147, + "grad_norm": 0.7526760697364807, + "learning_rate": 0.00014072653833571556, + "loss": 2.6051, + "step": 7393 + }, + { + "epoch": 0.59672342829473, + "grad_norm": 0.7867496609687805, + "learning_rate": 0.00014071211949882777, + "loss": 2.6228, + "step": 7394 + }, + { + "epoch": 0.596804132031313, + "grad_norm": 0.7527757883071899, + "learning_rate": 0.00014069769964728752, + "loss": 2.6793, + "step": 7395 + }, + { + "epoch": 0.5968848357678961, + "grad_norm": 0.7096899747848511, + "learning_rate": 0.00014068327878145423, + "loss": 2.5207, + "step": 7396 + }, + { + "epoch": 0.5969655395044791, + "grad_norm": 0.6863983869552612, + "learning_rate": 0.00014066885690168726, + "loss": 2.7059, + "step": 7397 + }, + { + "epoch": 0.597046243241062, + "grad_norm": 0.7782251834869385, + "learning_rate": 0.0001406544340083461, + "loss": 2.6232, + "step": 7398 + }, + { + "epoch": 0.597126946977645, + "grad_norm": 0.6944136619567871, + "learning_rate": 0.00014064001010179013, + "loss": 2.6134, + "step": 7399 + }, + { + "epoch": 0.5972076507142281, + "grad_norm": 0.7629704475402832, + "learning_rate": 0.00014062558518237892, + "loss": 2.5358, + "step": 7400 + }, + { + "epoch": 0.5972883544508111, + "grad_norm": 0.6922330260276794, + "learning_rate": 0.0001406111592504719, + "loss": 2.5457, + "step": 7401 + }, + { + "epoch": 0.597369058187394, + "grad_norm": 0.6992952227592468, + "learning_rate": 0.00014059673230642865, + "loss": 2.6241, + "step": 7402 + }, + { + "epoch": 0.597449761923977, + "grad_norm": 0.6587642431259155, + "learning_rate": 0.0001405823043506087, + "loss": 2.5867, + "step": 7403 + }, + { + "epoch": 0.5975304656605601, + "grad_norm": 0.6993013024330139, + "learning_rate": 0.00014056787538337164, + "loss": 2.6194, + "step": 7404 + }, + { + "epoch": 0.5976111693971431, + "grad_norm": 0.7605414986610413, + "learning_rate": 0.0001405534454050771, + "loss": 2.607, + "step": 7405 + }, + { + "epoch": 0.5976918731337261, + "grad_norm": 0.6624562740325928, + "learning_rate": 0.00014053901441608466, + "loss": 2.5962, + "step": 7406 + }, + { + "epoch": 0.597772576870309, + "grad_norm": 0.7432621717453003, + "learning_rate": 0.000140524582416754, + "loss": 2.6434, + "step": 7407 + }, + { + "epoch": 0.5978532806068921, + "grad_norm": 0.7184053659439087, + "learning_rate": 0.00014051014940744488, + "loss": 2.6139, + "step": 7408 + }, + { + "epoch": 0.5979339843434751, + "grad_norm": 0.7567455768585205, + "learning_rate": 0.00014049571538851687, + "loss": 2.5788, + "step": 7409 + }, + { + "epoch": 0.5980146880800581, + "grad_norm": 0.6759883761405945, + "learning_rate": 0.00014048128036032984, + "loss": 2.5584, + "step": 7410 + }, + { + "epoch": 0.5980953918166411, + "grad_norm": 0.7607424855232239, + "learning_rate": 0.00014046684432324343, + "loss": 2.5675, + "step": 7411 + }, + { + "epoch": 0.5981760955532242, + "grad_norm": 0.7134036421775818, + "learning_rate": 0.00014045240727761748, + "loss": 2.6805, + "step": 7412 + }, + { + "epoch": 0.5982567992898071, + "grad_norm": 0.6996984481811523, + "learning_rate": 0.00014043796922381184, + "loss": 2.5874, + "step": 7413 + }, + { + "epoch": 0.5983375030263901, + "grad_norm": 0.7098252177238464, + "learning_rate": 0.00014042353016218627, + "loss": 2.5895, + "step": 7414 + }, + { + "epoch": 0.5984182067629731, + "grad_norm": 0.7160520553588867, + "learning_rate": 0.00014040909009310068, + "loss": 2.6042, + "step": 7415 + }, + { + "epoch": 0.5984989104995562, + "grad_norm": 0.6727281212806702, + "learning_rate": 0.00014039464901691493, + "loss": 2.5356, + "step": 7416 + }, + { + "epoch": 0.5985796142361391, + "grad_norm": 0.7052881717681885, + "learning_rate": 0.00014038020693398891, + "loss": 2.6093, + "step": 7417 + }, + { + "epoch": 0.5986603179727221, + "grad_norm": 0.7151781916618347, + "learning_rate": 0.00014036576384468262, + "loss": 2.5776, + "step": 7418 + }, + { + "epoch": 0.5987410217093051, + "grad_norm": 0.7376574873924255, + "learning_rate": 0.0001403513197493559, + "loss": 2.6246, + "step": 7419 + }, + { + "epoch": 0.5988217254458882, + "grad_norm": 0.6882135272026062, + "learning_rate": 0.00014033687464836892, + "loss": 2.6028, + "step": 7420 + }, + { + "epoch": 0.5989024291824712, + "grad_norm": 0.6603999137878418, + "learning_rate": 0.00014032242854208153, + "loss": 2.5897, + "step": 7421 + }, + { + "epoch": 0.5989831329190541, + "grad_norm": 0.7001559734344482, + "learning_rate": 0.0001403079814308538, + "loss": 2.6033, + "step": 7422 + }, + { + "epoch": 0.5990638366556371, + "grad_norm": 0.7184363603591919, + "learning_rate": 0.00014029353331504582, + "loss": 2.7464, + "step": 7423 + }, + { + "epoch": 0.5991445403922202, + "grad_norm": 0.6794769167900085, + "learning_rate": 0.00014027908419501767, + "loss": 2.569, + "step": 7424 + }, + { + "epoch": 0.5992252441288032, + "grad_norm": 0.6846041083335876, + "learning_rate": 0.00014026463407112942, + "loss": 2.5995, + "step": 7425 + }, + { + "epoch": 0.5993059478653862, + "grad_norm": 0.6539658308029175, + "learning_rate": 0.00014025018294374129, + "loss": 2.5749, + "step": 7426 + }, + { + "epoch": 0.5993866516019691, + "grad_norm": 0.6572301983833313, + "learning_rate": 0.00014023573081321336, + "loss": 2.5312, + "step": 7427 + }, + { + "epoch": 0.5994673553385522, + "grad_norm": 0.7010765671730042, + "learning_rate": 0.00014022127767990581, + "loss": 2.5088, + "step": 7428 + }, + { + "epoch": 0.5995480590751352, + "grad_norm": 0.7193396091461182, + "learning_rate": 0.0001402068235441789, + "loss": 2.6193, + "step": 7429 + }, + { + "epoch": 0.5996287628117182, + "grad_norm": 0.6928533315658569, + "learning_rate": 0.00014019236840639288, + "loss": 2.6149, + "step": 7430 + }, + { + "epoch": 0.5997094665483012, + "grad_norm": 0.743658185005188, + "learning_rate": 0.00014017791226690794, + "loss": 2.5466, + "step": 7431 + }, + { + "epoch": 0.5997901702848842, + "grad_norm": 0.752082347869873, + "learning_rate": 0.0001401634551260844, + "loss": 2.6605, + "step": 7432 + }, + { + "epoch": 0.5998708740214672, + "grad_norm": 0.7280415296554565, + "learning_rate": 0.00014014899698428255, + "loss": 2.6128, + "step": 7433 + }, + { + "epoch": 0.5999515777580502, + "grad_norm": 0.7037710547447205, + "learning_rate": 0.0001401345378418628, + "loss": 2.6157, + "step": 7434 + }, + { + "epoch": 0.6000322814946332, + "grad_norm": 0.6984395980834961, + "learning_rate": 0.00014012007769918542, + "loss": 2.5579, + "step": 7435 + }, + { + "epoch": 0.6001129852312163, + "grad_norm": 0.6853601336479187, + "learning_rate": 0.00014010561655661085, + "loss": 2.6316, + "step": 7436 + }, + { + "epoch": 0.6001936889677992, + "grad_norm": 0.7551750540733337, + "learning_rate": 0.00014009115441449948, + "loss": 2.6671, + "step": 7437 + }, + { + "epoch": 0.6002743927043822, + "grad_norm": 0.7680155038833618, + "learning_rate": 0.0001400766912732117, + "loss": 2.6301, + "step": 7438 + }, + { + "epoch": 0.6003550964409652, + "grad_norm": 0.6757175922393799, + "learning_rate": 0.00014006222713310807, + "loss": 2.5584, + "step": 7439 + }, + { + "epoch": 0.6004358001775483, + "grad_norm": 0.6636163592338562, + "learning_rate": 0.00014004776199454897, + "loss": 2.5437, + "step": 7440 + }, + { + "epoch": 0.6005165039141312, + "grad_norm": 0.7317774891853333, + "learning_rate": 0.00014003329585789498, + "loss": 2.594, + "step": 7441 + }, + { + "epoch": 0.6005972076507142, + "grad_norm": 0.6903451681137085, + "learning_rate": 0.0001400188287235066, + "loss": 2.6175, + "step": 7442 + }, + { + "epoch": 0.6006779113872972, + "grad_norm": 0.7137858867645264, + "learning_rate": 0.00014000436059174437, + "loss": 2.6411, + "step": 7443 + }, + { + "epoch": 0.6007586151238803, + "grad_norm": 0.7124149203300476, + "learning_rate": 0.00013998989146296893, + "loss": 2.6562, + "step": 7444 + }, + { + "epoch": 0.6008393188604633, + "grad_norm": 0.7518175840377808, + "learning_rate": 0.00013997542133754087, + "loss": 2.6213, + "step": 7445 + }, + { + "epoch": 0.6009200225970462, + "grad_norm": 0.6843053698539734, + "learning_rate": 0.0001399609502158208, + "loss": 2.6099, + "step": 7446 + }, + { + "epoch": 0.6010007263336292, + "grad_norm": 0.6668025255203247, + "learning_rate": 0.0001399464780981694, + "loss": 2.609, + "step": 7447 + }, + { + "epoch": 0.6010814300702122, + "grad_norm": 0.6849119067192078, + "learning_rate": 0.00013993200498494735, + "loss": 2.6097, + "step": 7448 + }, + { + "epoch": 0.6011621338067953, + "grad_norm": 0.7767381072044373, + "learning_rate": 0.0001399175308765153, + "loss": 2.6351, + "step": 7449 + }, + { + "epoch": 0.6012428375433783, + "grad_norm": 0.6630256772041321, + "learning_rate": 0.0001399030557732341, + "loss": 2.5924, + "step": 7450 + }, + { + "epoch": 0.6013235412799612, + "grad_norm": 0.6918755769729614, + "learning_rate": 0.00013988857967546444, + "loss": 2.6205, + "step": 7451 + }, + { + "epoch": 0.6014042450165442, + "grad_norm": 0.7179181575775146, + "learning_rate": 0.00013987410258356708, + "loss": 2.5971, + "step": 7452 + }, + { + "epoch": 0.6014849487531273, + "grad_norm": 0.7233672738075256, + "learning_rate": 0.00013985962449790284, + "loss": 2.595, + "step": 7453 + }, + { + "epoch": 0.6015656524897103, + "grad_norm": 0.6861593127250671, + "learning_rate": 0.0001398451454188326, + "loss": 2.6127, + "step": 7454 + }, + { + "epoch": 0.6016463562262933, + "grad_norm": 0.6818981170654297, + "learning_rate": 0.00013983066534671714, + "loss": 2.5923, + "step": 7455 + }, + { + "epoch": 0.6017270599628762, + "grad_norm": 0.700036346912384, + "learning_rate": 0.0001398161842819174, + "loss": 2.5474, + "step": 7456 + }, + { + "epoch": 0.6018077636994593, + "grad_norm": 0.6884824633598328, + "learning_rate": 0.00013980170222479426, + "loss": 2.6041, + "step": 7457 + }, + { + "epoch": 0.6018884674360423, + "grad_norm": 0.6745120286941528, + "learning_rate": 0.00013978721917570866, + "loss": 2.6638, + "step": 7458 + }, + { + "epoch": 0.6019691711726253, + "grad_norm": 0.6886256337165833, + "learning_rate": 0.00013977273513502157, + "loss": 2.5733, + "step": 7459 + }, + { + "epoch": 0.6020498749092082, + "grad_norm": 0.7220930457115173, + "learning_rate": 0.00013975825010309394, + "loss": 2.5739, + "step": 7460 + }, + { + "epoch": 0.6021305786457913, + "grad_norm": 0.7281780242919922, + "learning_rate": 0.0001397437640802868, + "loss": 2.5646, + "step": 7461 + }, + { + "epoch": 0.6022112823823743, + "grad_norm": 0.7316896915435791, + "learning_rate": 0.00013972927706696115, + "loss": 2.6532, + "step": 7462 + }, + { + "epoch": 0.6022919861189573, + "grad_norm": 0.6288646459579468, + "learning_rate": 0.00013971478906347806, + "loss": 2.5753, + "step": 7463 + }, + { + "epoch": 0.6023726898555403, + "grad_norm": 0.7110145688056946, + "learning_rate": 0.00013970030007019862, + "loss": 2.6421, + "step": 7464 + }, + { + "epoch": 0.6024533935921234, + "grad_norm": 0.7437754273414612, + "learning_rate": 0.00013968581008748393, + "loss": 2.585, + "step": 7465 + }, + { + "epoch": 0.6025340973287063, + "grad_norm": 0.6839718222618103, + "learning_rate": 0.00013967131911569514, + "loss": 2.6249, + "step": 7466 + }, + { + "epoch": 0.6026148010652893, + "grad_norm": 0.7358397841453552, + "learning_rate": 0.00013965682715519332, + "loss": 2.597, + "step": 7467 + }, + { + "epoch": 0.6026955048018723, + "grad_norm": 0.673651397228241, + "learning_rate": 0.00013964233420633973, + "loss": 2.6111, + "step": 7468 + }, + { + "epoch": 0.6027762085384554, + "grad_norm": 0.7390083074569702, + "learning_rate": 0.00013962784026949553, + "loss": 2.6131, + "step": 7469 + }, + { + "epoch": 0.6028569122750383, + "grad_norm": 0.6902220249176025, + "learning_rate": 0.00013961334534502197, + "loss": 2.6116, + "step": 7470 + }, + { + "epoch": 0.6029376160116213, + "grad_norm": 0.6946651935577393, + "learning_rate": 0.00013959884943328033, + "loss": 2.6307, + "step": 7471 + }, + { + "epoch": 0.6030183197482043, + "grad_norm": 0.7277294993400574, + "learning_rate": 0.00013958435253463183, + "loss": 2.6065, + "step": 7472 + }, + { + "epoch": 0.6030990234847874, + "grad_norm": 0.743833601474762, + "learning_rate": 0.00013956985464943776, + "loss": 2.6644, + "step": 7473 + }, + { + "epoch": 0.6031797272213704, + "grad_norm": 0.6480288505554199, + "learning_rate": 0.0001395553557780595, + "loss": 2.5386, + "step": 7474 + }, + { + "epoch": 0.6032604309579533, + "grad_norm": 0.799443781375885, + "learning_rate": 0.00013954085592085834, + "loss": 2.5653, + "step": 7475 + }, + { + "epoch": 0.6033411346945363, + "grad_norm": 0.6790705323219299, + "learning_rate": 0.00013952635507819575, + "loss": 2.6229, + "step": 7476 + }, + { + "epoch": 0.6034218384311194, + "grad_norm": 0.6871588826179504, + "learning_rate": 0.00013951185325043302, + "loss": 2.6514, + "step": 7477 + }, + { + "epoch": 0.6035025421677024, + "grad_norm": 0.7236921787261963, + "learning_rate": 0.00013949735043793164, + "loss": 2.5931, + "step": 7478 + }, + { + "epoch": 0.6035832459042854, + "grad_norm": 0.6888518929481506, + "learning_rate": 0.00013948284664105305, + "loss": 2.6408, + "step": 7479 + }, + { + "epoch": 0.6036639496408683, + "grad_norm": 0.7292625904083252, + "learning_rate": 0.00013946834186015868, + "loss": 2.5829, + "step": 7480 + }, + { + "epoch": 0.6037446533774514, + "grad_norm": 0.6755293607711792, + "learning_rate": 0.00013945383609561009, + "loss": 2.5917, + "step": 7481 + }, + { + "epoch": 0.6038253571140344, + "grad_norm": 0.6808032989501953, + "learning_rate": 0.00013943932934776877, + "loss": 2.6103, + "step": 7482 + }, + { + "epoch": 0.6039060608506174, + "grad_norm": 0.747173547744751, + "learning_rate": 0.00013942482161699625, + "loss": 2.624, + "step": 7483 + }, + { + "epoch": 0.6039867645872004, + "grad_norm": 0.7265594005584717, + "learning_rate": 0.00013941031290365413, + "loss": 2.5672, + "step": 7484 + }, + { + "epoch": 0.6040674683237834, + "grad_norm": 0.6434060335159302, + "learning_rate": 0.000139395803208104, + "loss": 2.5885, + "step": 7485 + }, + { + "epoch": 0.6041481720603664, + "grad_norm": 0.7148730754852295, + "learning_rate": 0.00013938129253070747, + "loss": 2.6466, + "step": 7486 + }, + { + "epoch": 0.6042288757969494, + "grad_norm": 0.7724708318710327, + "learning_rate": 0.00013936678087182616, + "loss": 2.6364, + "step": 7487 + }, + { + "epoch": 0.6043095795335324, + "grad_norm": 0.6886702179908752, + "learning_rate": 0.0001393522682318218, + "loss": 2.5844, + "step": 7488 + }, + { + "epoch": 0.6043902832701155, + "grad_norm": 0.6501082181930542, + "learning_rate": 0.00013933775461105603, + "loss": 2.5767, + "step": 7489 + }, + { + "epoch": 0.6044709870066984, + "grad_norm": 0.7333959341049194, + "learning_rate": 0.00013932324000989058, + "loss": 2.5735, + "step": 7490 + }, + { + "epoch": 0.6045516907432814, + "grad_norm": 0.7057361602783203, + "learning_rate": 0.00013930872442868722, + "loss": 2.627, + "step": 7491 + }, + { + "epoch": 0.6046323944798644, + "grad_norm": 0.705078661441803, + "learning_rate": 0.00013929420786780767, + "loss": 2.6012, + "step": 7492 + }, + { + "epoch": 0.6047130982164475, + "grad_norm": 0.7192156314849854, + "learning_rate": 0.00013927969032761378, + "loss": 2.5594, + "step": 7493 + }, + { + "epoch": 0.6047938019530305, + "grad_norm": 0.703116774559021, + "learning_rate": 0.00013926517180846726, + "loss": 2.6099, + "step": 7494 + }, + { + "epoch": 0.6048745056896134, + "grad_norm": 0.6970264315605164, + "learning_rate": 0.00013925065231073006, + "loss": 2.5832, + "step": 7495 + }, + { + "epoch": 0.6049552094261964, + "grad_norm": 0.7308031320571899, + "learning_rate": 0.00013923613183476402, + "loss": 2.586, + "step": 7496 + }, + { + "epoch": 0.6050359131627794, + "grad_norm": 0.7212777137756348, + "learning_rate": 0.00013922161038093097, + "loss": 2.6374, + "step": 7497 + }, + { + "epoch": 0.6051166168993625, + "grad_norm": 0.6644641757011414, + "learning_rate": 0.0001392070879495929, + "loss": 2.5226, + "step": 7498 + }, + { + "epoch": 0.6051973206359454, + "grad_norm": 0.6683016419410706, + "learning_rate": 0.0001391925645411117, + "loss": 2.5279, + "step": 7499 + }, + { + "epoch": 0.6052780243725284, + "grad_norm": 0.7341439127922058, + "learning_rate": 0.00013917804015584932, + "loss": 2.5995, + "step": 7500 + }, + { + "epoch": 0.6053587281091114, + "grad_norm": 0.753942608833313, + "learning_rate": 0.0001391635147941678, + "loss": 2.5706, + "step": 7501 + }, + { + "epoch": 0.6054394318456945, + "grad_norm": 0.7541958093643188, + "learning_rate": 0.00013914898845642908, + "loss": 2.6365, + "step": 7502 + }, + { + "epoch": 0.6055201355822775, + "grad_norm": 0.6583349108695984, + "learning_rate": 0.00013913446114299528, + "loss": 2.534, + "step": 7503 + }, + { + "epoch": 0.6056008393188604, + "grad_norm": 0.6545756459236145, + "learning_rate": 0.00013911993285422835, + "loss": 2.5443, + "step": 7504 + }, + { + "epoch": 0.6056815430554434, + "grad_norm": 0.8290210366249084, + "learning_rate": 0.00013910540359049045, + "loss": 2.6196, + "step": 7505 + }, + { + "epoch": 0.6057622467920265, + "grad_norm": 0.7032577395439148, + "learning_rate": 0.0001390908733521437, + "loss": 2.6575, + "step": 7506 + }, + { + "epoch": 0.6058429505286095, + "grad_norm": 0.7018071413040161, + "learning_rate": 0.0001390763421395502, + "loss": 2.6272, + "step": 7507 + }, + { + "epoch": 0.6059236542651925, + "grad_norm": 0.6288552284240723, + "learning_rate": 0.00013906180995307206, + "loss": 2.5295, + "step": 7508 + }, + { + "epoch": 0.6060043580017754, + "grad_norm": 0.7013774514198303, + "learning_rate": 0.00013904727679307153, + "loss": 2.5669, + "step": 7509 + }, + { + "epoch": 0.6060850617383585, + "grad_norm": 0.6811630129814148, + "learning_rate": 0.00013903274265991082, + "loss": 2.5827, + "step": 7510 + }, + { + "epoch": 0.6061657654749415, + "grad_norm": 0.6690269112586975, + "learning_rate": 0.0001390182075539521, + "loss": 2.5947, + "step": 7511 + }, + { + "epoch": 0.6062464692115245, + "grad_norm": 0.6946289539337158, + "learning_rate": 0.00013900367147555768, + "loss": 2.59, + "step": 7512 + }, + { + "epoch": 0.6063271729481075, + "grad_norm": 0.7302843332290649, + "learning_rate": 0.0001389891344250898, + "loss": 2.5994, + "step": 7513 + }, + { + "epoch": 0.6064078766846905, + "grad_norm": 0.7462306022644043, + "learning_rate": 0.00013897459640291074, + "loss": 2.5983, + "step": 7514 + }, + { + "epoch": 0.6064885804212735, + "grad_norm": 0.6948123574256897, + "learning_rate": 0.0001389600574093829, + "loss": 2.5737, + "step": 7515 + }, + { + "epoch": 0.6065692841578565, + "grad_norm": 0.6897372007369995, + "learning_rate": 0.00013894551744486857, + "loss": 2.607, + "step": 7516 + }, + { + "epoch": 0.6066499878944395, + "grad_norm": 0.6808069348335266, + "learning_rate": 0.00013893097650973015, + "loss": 2.5712, + "step": 7517 + }, + { + "epoch": 0.6067306916310226, + "grad_norm": 0.7000731229782104, + "learning_rate": 0.00013891643460433, + "loss": 2.5654, + "step": 7518 + }, + { + "epoch": 0.6068113953676055, + "grad_norm": 0.7197545766830444, + "learning_rate": 0.0001389018917290306, + "loss": 2.5705, + "step": 7519 + }, + { + "epoch": 0.6068920991041885, + "grad_norm": 0.7001069188117981, + "learning_rate": 0.00013888734788419433, + "loss": 2.5934, + "step": 7520 + }, + { + "epoch": 0.6069728028407715, + "grad_norm": 0.7480459213256836, + "learning_rate": 0.00013887280307018377, + "loss": 2.5211, + "step": 7521 + }, + { + "epoch": 0.6070535065773546, + "grad_norm": 0.6913945078849792, + "learning_rate": 0.00013885825728736132, + "loss": 2.6013, + "step": 7522 + }, + { + "epoch": 0.6071342103139376, + "grad_norm": 0.6527336239814758, + "learning_rate": 0.00013884371053608948, + "loss": 2.5901, + "step": 7523 + }, + { + "epoch": 0.6072149140505205, + "grad_norm": 0.6897335052490234, + "learning_rate": 0.00013882916281673086, + "loss": 2.5389, + "step": 7524 + }, + { + "epoch": 0.6072956177871035, + "grad_norm": 0.7159501910209656, + "learning_rate": 0.00013881461412964798, + "loss": 2.5399, + "step": 7525 + }, + { + "epoch": 0.6073763215236866, + "grad_norm": 0.6744364500045776, + "learning_rate": 0.00013880006447520346, + "loss": 2.5658, + "step": 7526 + }, + { + "epoch": 0.6074570252602696, + "grad_norm": 0.819950520992279, + "learning_rate": 0.00013878551385375994, + "loss": 2.6143, + "step": 7527 + }, + { + "epoch": 0.6075377289968525, + "grad_norm": 0.744293212890625, + "learning_rate": 0.00013877096226568, + "loss": 2.6565, + "step": 7528 + }, + { + "epoch": 0.6076184327334355, + "grad_norm": 0.7121254205703735, + "learning_rate": 0.00013875640971132636, + "loss": 2.6151, + "step": 7529 + }, + { + "epoch": 0.6076991364700186, + "grad_norm": 0.7616204023361206, + "learning_rate": 0.00013874185619106163, + "loss": 2.6395, + "step": 7530 + }, + { + "epoch": 0.6077798402066016, + "grad_norm": 0.7481076121330261, + "learning_rate": 0.0001387273017052486, + "loss": 2.597, + "step": 7531 + }, + { + "epoch": 0.6078605439431846, + "grad_norm": 0.6660816073417664, + "learning_rate": 0.00013871274625425, + "loss": 2.5696, + "step": 7532 + }, + { + "epoch": 0.6079412476797675, + "grad_norm": 0.7491411566734314, + "learning_rate": 0.00013869818983842854, + "loss": 2.552, + "step": 7533 + }, + { + "epoch": 0.6080219514163506, + "grad_norm": 0.7130792140960693, + "learning_rate": 0.00013868363245814704, + "loss": 2.5959, + "step": 7534 + }, + { + "epoch": 0.6081026551529336, + "grad_norm": 0.7157341241836548, + "learning_rate": 0.00013866907411376827, + "loss": 2.5598, + "step": 7535 + }, + { + "epoch": 0.6081833588895166, + "grad_norm": 0.7750656008720398, + "learning_rate": 0.00013865451480565513, + "loss": 2.6217, + "step": 7536 + }, + { + "epoch": 0.6082640626260996, + "grad_norm": 0.6915080547332764, + "learning_rate": 0.00013863995453417043, + "loss": 2.6211, + "step": 7537 + }, + { + "epoch": 0.6083447663626826, + "grad_norm": 0.7245940566062927, + "learning_rate": 0.00013862539329967706, + "loss": 2.5619, + "step": 7538 + }, + { + "epoch": 0.6084254700992656, + "grad_norm": 0.8884119391441345, + "learning_rate": 0.0001386108311025379, + "loss": 2.6349, + "step": 7539 + }, + { + "epoch": 0.6085061738358486, + "grad_norm": 0.7889477610588074, + "learning_rate": 0.0001385962679431159, + "loss": 2.6169, + "step": 7540 + }, + { + "epoch": 0.6085868775724316, + "grad_norm": 0.7187505960464478, + "learning_rate": 0.00013858170382177403, + "loss": 2.5582, + "step": 7541 + }, + { + "epoch": 0.6086675813090147, + "grad_norm": 0.7502198219299316, + "learning_rate": 0.00013856713873887526, + "loss": 2.5418, + "step": 7542 + }, + { + "epoch": 0.6087482850455976, + "grad_norm": 0.797704815864563, + "learning_rate": 0.00013855257269478256, + "loss": 2.5764, + "step": 7543 + }, + { + "epoch": 0.6088289887821806, + "grad_norm": 0.7651431560516357, + "learning_rate": 0.00013853800568985896, + "loss": 2.5995, + "step": 7544 + }, + { + "epoch": 0.6089096925187636, + "grad_norm": 0.7048482298851013, + "learning_rate": 0.00013852343772446753, + "loss": 2.5656, + "step": 7545 + }, + { + "epoch": 0.6089903962553467, + "grad_norm": 0.7252251505851746, + "learning_rate": 0.00013850886879897135, + "loss": 2.6509, + "step": 7546 + }, + { + "epoch": 0.6090710999919297, + "grad_norm": 0.7220067381858826, + "learning_rate": 0.00013849429891373344, + "loss": 2.5558, + "step": 7547 + }, + { + "epoch": 0.6091518037285126, + "grad_norm": 0.7672600746154785, + "learning_rate": 0.000138479728069117, + "loss": 2.5682, + "step": 7548 + }, + { + "epoch": 0.6092325074650956, + "grad_norm": 0.7753601670265198, + "learning_rate": 0.0001384651562654852, + "loss": 2.6459, + "step": 7549 + }, + { + "epoch": 0.6093132112016786, + "grad_norm": 0.7346559166908264, + "learning_rate": 0.00013845058350320108, + "loss": 2.5988, + "step": 7550 + }, + { + "epoch": 0.6093939149382617, + "grad_norm": 0.7386072874069214, + "learning_rate": 0.00013843600978262797, + "loss": 2.6366, + "step": 7551 + }, + { + "epoch": 0.6094746186748446, + "grad_norm": 0.7114188075065613, + "learning_rate": 0.00013842143510412898, + "loss": 2.5515, + "step": 7552 + }, + { + "epoch": 0.6095553224114276, + "grad_norm": 0.6836373209953308, + "learning_rate": 0.00013840685946806742, + "loss": 2.6301, + "step": 7553 + }, + { + "epoch": 0.6096360261480106, + "grad_norm": 0.7548927068710327, + "learning_rate": 0.00013839228287480652, + "loss": 2.6508, + "step": 7554 + }, + { + "epoch": 0.6097167298845937, + "grad_norm": 0.6931679248809814, + "learning_rate": 0.00013837770532470957, + "loss": 2.5535, + "step": 7555 + }, + { + "epoch": 0.6097974336211767, + "grad_norm": 0.7621145248413086, + "learning_rate": 0.00013836312681813988, + "loss": 2.6831, + "step": 7556 + }, + { + "epoch": 0.6098781373577596, + "grad_norm": 0.6735427975654602, + "learning_rate": 0.00013834854735546079, + "loss": 2.5338, + "step": 7557 + }, + { + "epoch": 0.6099588410943426, + "grad_norm": 0.7157600522041321, + "learning_rate": 0.00013833396693703565, + "loss": 2.5713, + "step": 7558 + }, + { + "epoch": 0.6100395448309257, + "grad_norm": 0.718032956123352, + "learning_rate": 0.00013831938556322789, + "loss": 2.5625, + "step": 7559 + }, + { + "epoch": 0.6101202485675087, + "grad_norm": 0.7290309071540833, + "learning_rate": 0.0001383048032344008, + "loss": 2.5956, + "step": 7560 + }, + { + "epoch": 0.6102009523040917, + "grad_norm": 0.675470769405365, + "learning_rate": 0.00013829021995091792, + "loss": 2.6053, + "step": 7561 + }, + { + "epoch": 0.6102816560406746, + "grad_norm": 0.7348767518997192, + "learning_rate": 0.00013827563571314268, + "loss": 2.6174, + "step": 7562 + }, + { + "epoch": 0.6103623597772577, + "grad_norm": 0.64495849609375, + "learning_rate": 0.00013826105052143852, + "loss": 2.5923, + "step": 7563 + }, + { + "epoch": 0.6104430635138407, + "grad_norm": 0.7379264235496521, + "learning_rate": 0.000138246464376169, + "loss": 2.6438, + "step": 7564 + }, + { + "epoch": 0.6105237672504237, + "grad_norm": 0.7802134156227112, + "learning_rate": 0.00013823187727769756, + "loss": 2.5884, + "step": 7565 + }, + { + "epoch": 0.6106044709870067, + "grad_norm": 0.6907222867012024, + "learning_rate": 0.00013821728922638782, + "loss": 2.596, + "step": 7566 + }, + { + "epoch": 0.6106851747235897, + "grad_norm": 0.6924182176589966, + "learning_rate": 0.00013820270022260335, + "loss": 2.5631, + "step": 7567 + }, + { + "epoch": 0.6107658784601727, + "grad_norm": 0.729258120059967, + "learning_rate": 0.0001381881102667077, + "loss": 2.5761, + "step": 7568 + }, + { + "epoch": 0.6108465821967557, + "grad_norm": 0.7141425013542175, + "learning_rate": 0.00013817351935906455, + "loss": 2.6214, + "step": 7569 + }, + { + "epoch": 0.6109272859333387, + "grad_norm": 0.7564505338668823, + "learning_rate": 0.00013815892750003748, + "loss": 2.6338, + "step": 7570 + }, + { + "epoch": 0.6110079896699218, + "grad_norm": 0.674705982208252, + "learning_rate": 0.00013814433468999022, + "loss": 2.5604, + "step": 7571 + }, + { + "epoch": 0.6110886934065047, + "grad_norm": 0.6956657767295837, + "learning_rate": 0.00013812974092928642, + "loss": 2.5805, + "step": 7572 + }, + { + "epoch": 0.6111693971430877, + "grad_norm": 0.7393823862075806, + "learning_rate": 0.0001381151462182898, + "loss": 2.6312, + "step": 7573 + }, + { + "epoch": 0.6112501008796707, + "grad_norm": 0.7048184275627136, + "learning_rate": 0.00013810055055736407, + "loss": 2.5948, + "step": 7574 + }, + { + "epoch": 0.6113308046162538, + "grad_norm": 0.748798668384552, + "learning_rate": 0.0001380859539468731, + "loss": 2.5815, + "step": 7575 + }, + { + "epoch": 0.6114115083528368, + "grad_norm": 0.7146531343460083, + "learning_rate": 0.00013807135638718048, + "loss": 2.5803, + "step": 7576 + }, + { + "epoch": 0.6114922120894197, + "grad_norm": 0.6883770823478699, + "learning_rate": 0.00013805675787865025, + "loss": 2.6005, + "step": 7577 + }, + { + "epoch": 0.6115729158260027, + "grad_norm": 0.7808375358581543, + "learning_rate": 0.0001380421584216461, + "loss": 2.6539, + "step": 7578 + }, + { + "epoch": 0.6116536195625858, + "grad_norm": 0.6919417977333069, + "learning_rate": 0.00013802755801653192, + "loss": 2.5812, + "step": 7579 + }, + { + "epoch": 0.6117343232991688, + "grad_norm": 0.6651085615158081, + "learning_rate": 0.0001380129566636716, + "loss": 2.5952, + "step": 7580 + }, + { + "epoch": 0.6118150270357517, + "grad_norm": 0.7806586623191833, + "learning_rate": 0.00013799835436342897, + "loss": 2.6509, + "step": 7581 + }, + { + "epoch": 0.6118957307723347, + "grad_norm": 0.6522969007492065, + "learning_rate": 0.0001379837511161681, + "loss": 2.606, + "step": 7582 + }, + { + "epoch": 0.6119764345089178, + "grad_norm": 0.7566540837287903, + "learning_rate": 0.0001379691469222528, + "loss": 2.6625, + "step": 7583 + }, + { + "epoch": 0.6120571382455008, + "grad_norm": 0.7126421928405762, + "learning_rate": 0.00013795454178204715, + "loss": 2.6396, + "step": 7584 + }, + { + "epoch": 0.6121378419820838, + "grad_norm": 0.6534276008605957, + "learning_rate": 0.0001379399356959151, + "loss": 2.5841, + "step": 7585 + }, + { + "epoch": 0.6122185457186667, + "grad_norm": 0.7663385272026062, + "learning_rate": 0.00013792532866422065, + "loss": 2.6685, + "step": 7586 + }, + { + "epoch": 0.6122992494552498, + "grad_norm": 0.6971656084060669, + "learning_rate": 0.0001379107206873279, + "loss": 2.6036, + "step": 7587 + }, + { + "epoch": 0.6123799531918328, + "grad_norm": 0.6807122230529785, + "learning_rate": 0.00013789611176560088, + "loss": 2.6499, + "step": 7588 + }, + { + "epoch": 0.6124606569284158, + "grad_norm": 0.6712431311607361, + "learning_rate": 0.0001378815018994037, + "loss": 2.6725, + "step": 7589 + }, + { + "epoch": 0.6125413606649988, + "grad_norm": 0.6986604928970337, + "learning_rate": 0.00013786689108910045, + "loss": 2.6159, + "step": 7590 + }, + { + "epoch": 0.6126220644015818, + "grad_norm": 0.7004108428955078, + "learning_rate": 0.0001378522793350553, + "loss": 2.5743, + "step": 7591 + }, + { + "epoch": 0.6127027681381648, + "grad_norm": 0.6782098412513733, + "learning_rate": 0.00013783766663763239, + "loss": 2.5776, + "step": 7592 + }, + { + "epoch": 0.6127834718747478, + "grad_norm": 0.6697036027908325, + "learning_rate": 0.00013782305299719593, + "loss": 2.6195, + "step": 7593 + }, + { + "epoch": 0.6128641756113308, + "grad_norm": 0.6894395351409912, + "learning_rate": 0.00013780843841411014, + "loss": 2.662, + "step": 7594 + }, + { + "epoch": 0.6129448793479139, + "grad_norm": 0.6775636672973633, + "learning_rate": 0.00013779382288873918, + "loss": 2.6083, + "step": 7595 + }, + { + "epoch": 0.6130255830844968, + "grad_norm": 0.7143577337265015, + "learning_rate": 0.00013777920642144738, + "loss": 2.581, + "step": 7596 + }, + { + "epoch": 0.6131062868210798, + "grad_norm": 0.6143797636032104, + "learning_rate": 0.00013776458901259905, + "loss": 2.541, + "step": 7597 + }, + { + "epoch": 0.6131869905576628, + "grad_norm": 0.7003727555274963, + "learning_rate": 0.00013774997066255839, + "loss": 2.5748, + "step": 7598 + }, + { + "epoch": 0.6132676942942458, + "grad_norm": 0.6796504259109497, + "learning_rate": 0.0001377353513716898, + "loss": 2.596, + "step": 7599 + }, + { + "epoch": 0.6133483980308289, + "grad_norm": 0.7011274695396423, + "learning_rate": 0.00013772073114035762, + "loss": 2.5318, + "step": 7600 + }, + { + "epoch": 0.6134291017674118, + "grad_norm": 0.6584382057189941, + "learning_rate": 0.0001377061099689262, + "loss": 2.5793, + "step": 7601 + }, + { + "epoch": 0.6135098055039948, + "grad_norm": 0.6586211919784546, + "learning_rate": 0.00013769148785775995, + "loss": 2.5969, + "step": 7602 + }, + { + "epoch": 0.6135905092405778, + "grad_norm": 0.7187132835388184, + "learning_rate": 0.0001376768648072233, + "loss": 2.6407, + "step": 7603 + }, + { + "epoch": 0.6136712129771609, + "grad_norm": 0.7394679188728333, + "learning_rate": 0.00013766224081768072, + "loss": 2.5959, + "step": 7604 + }, + { + "epoch": 0.6137519167137439, + "grad_norm": 0.6802375912666321, + "learning_rate": 0.00013764761588949665, + "loss": 2.5956, + "step": 7605 + }, + { + "epoch": 0.6138326204503268, + "grad_norm": 0.6949049234390259, + "learning_rate": 0.00013763299002303553, + "loss": 2.556, + "step": 7606 + }, + { + "epoch": 0.6139133241869098, + "grad_norm": 0.7406589388847351, + "learning_rate": 0.00013761836321866196, + "loss": 2.5495, + "step": 7607 + }, + { + "epoch": 0.6139940279234929, + "grad_norm": 0.742499053478241, + "learning_rate": 0.0001376037354767404, + "loss": 2.589, + "step": 7608 + }, + { + "epoch": 0.6140747316600759, + "grad_norm": 0.7669157385826111, + "learning_rate": 0.00013758910679763551, + "loss": 2.576, + "step": 7609 + }, + { + "epoch": 0.6141554353966588, + "grad_norm": 0.6506752967834473, + "learning_rate": 0.00013757447718171182, + "loss": 2.5792, + "step": 7610 + }, + { + "epoch": 0.6142361391332418, + "grad_norm": 0.698514461517334, + "learning_rate": 0.00013755984662933393, + "loss": 2.5809, + "step": 7611 + }, + { + "epoch": 0.6143168428698249, + "grad_norm": 0.6541082262992859, + "learning_rate": 0.00013754521514086645, + "loss": 2.5755, + "step": 7612 + }, + { + "epoch": 0.6143975466064079, + "grad_norm": 0.6619362235069275, + "learning_rate": 0.0001375305827166741, + "loss": 2.5886, + "step": 7613 + }, + { + "epoch": 0.6144782503429909, + "grad_norm": 0.7205569744110107, + "learning_rate": 0.00013751594935712148, + "loss": 2.6293, + "step": 7614 + }, + { + "epoch": 0.6145589540795738, + "grad_norm": 0.7382494211196899, + "learning_rate": 0.00013750131506257339, + "loss": 2.6977, + "step": 7615 + }, + { + "epoch": 0.6146396578161569, + "grad_norm": 0.7492627501487732, + "learning_rate": 0.00013748667983339444, + "loss": 2.6492, + "step": 7616 + }, + { + "epoch": 0.6147203615527399, + "grad_norm": 0.6627328991889954, + "learning_rate": 0.00013747204366994947, + "loss": 2.5458, + "step": 7617 + }, + { + "epoch": 0.6148010652893229, + "grad_norm": 0.7039626836776733, + "learning_rate": 0.00013745740657260323, + "loss": 2.6578, + "step": 7618 + }, + { + "epoch": 0.6148817690259059, + "grad_norm": 0.6999295353889465, + "learning_rate": 0.00013744276854172046, + "loss": 2.6189, + "step": 7619 + }, + { + "epoch": 0.6149624727624889, + "grad_norm": 0.7604365348815918, + "learning_rate": 0.00013742812957766607, + "loss": 2.5344, + "step": 7620 + }, + { + "epoch": 0.6150431764990719, + "grad_norm": 0.6860831379890442, + "learning_rate": 0.0001374134896808048, + "loss": 2.6309, + "step": 7621 + }, + { + "epoch": 0.6151238802356549, + "grad_norm": 0.6628854274749756, + "learning_rate": 0.0001373988488515016, + "loss": 2.6339, + "step": 7622 + }, + { + "epoch": 0.6152045839722379, + "grad_norm": 0.7112562656402588, + "learning_rate": 0.00013738420709012134, + "loss": 2.6064, + "step": 7623 + }, + { + "epoch": 0.615285287708821, + "grad_norm": 0.7068392634391785, + "learning_rate": 0.0001373695643970289, + "loss": 2.624, + "step": 7624 + }, + { + "epoch": 0.6153659914454039, + "grad_norm": 0.6534786224365234, + "learning_rate": 0.00013735492077258924, + "loss": 2.5582, + "step": 7625 + }, + { + "epoch": 0.6154466951819869, + "grad_norm": 0.7433418035507202, + "learning_rate": 0.00013734027621716729, + "loss": 2.5803, + "step": 7626 + }, + { + "epoch": 0.6155273989185699, + "grad_norm": 0.7172532081604004, + "learning_rate": 0.00013732563073112804, + "loss": 2.5906, + "step": 7627 + }, + { + "epoch": 0.615608102655153, + "grad_norm": 0.6712297201156616, + "learning_rate": 0.00013731098431483653, + "loss": 2.5597, + "step": 7628 + }, + { + "epoch": 0.615688806391736, + "grad_norm": 0.7079061269760132, + "learning_rate": 0.00013729633696865775, + "loss": 2.5538, + "step": 7629 + }, + { + "epoch": 0.6157695101283189, + "grad_norm": 0.6968971490859985, + "learning_rate": 0.00013728168869295678, + "loss": 2.6429, + "step": 7630 + }, + { + "epoch": 0.6158502138649019, + "grad_norm": 0.7123236060142517, + "learning_rate": 0.00013726703948809864, + "loss": 2.5607, + "step": 7631 + }, + { + "epoch": 0.615930917601485, + "grad_norm": 0.6441208124160767, + "learning_rate": 0.00013725238935444843, + "loss": 2.6176, + "step": 7632 + }, + { + "epoch": 0.616011621338068, + "grad_norm": 0.7145917415618896, + "learning_rate": 0.00013723773829237137, + "loss": 2.5698, + "step": 7633 + }, + { + "epoch": 0.616092325074651, + "grad_norm": 0.6397334337234497, + "learning_rate": 0.00013722308630223252, + "loss": 2.596, + "step": 7634 + }, + { + "epoch": 0.6161730288112339, + "grad_norm": 0.6372843980789185, + "learning_rate": 0.00013720843338439702, + "loss": 2.5679, + "step": 7635 + }, + { + "epoch": 0.616253732547817, + "grad_norm": 0.707842230796814, + "learning_rate": 0.00013719377953923012, + "loss": 2.6296, + "step": 7636 + }, + { + "epoch": 0.6163344362844, + "grad_norm": 0.6629409193992615, + "learning_rate": 0.000137179124767097, + "loss": 2.542, + "step": 7637 + }, + { + "epoch": 0.616415140020983, + "grad_norm": 0.753646194934845, + "learning_rate": 0.00013716446906836288, + "loss": 2.5741, + "step": 7638 + }, + { + "epoch": 0.6164958437575659, + "grad_norm": 0.6409948468208313, + "learning_rate": 0.0001371498124433931, + "loss": 2.6723, + "step": 7639 + }, + { + "epoch": 0.616576547494149, + "grad_norm": 0.6489264965057373, + "learning_rate": 0.0001371351548925528, + "loss": 2.5806, + "step": 7640 + }, + { + "epoch": 0.616657251230732, + "grad_norm": 0.6857934594154358, + "learning_rate": 0.00013712049641620745, + "loss": 2.6406, + "step": 7641 + }, + { + "epoch": 0.616737954967315, + "grad_norm": 0.6754183769226074, + "learning_rate": 0.00013710583701472226, + "loss": 2.5576, + "step": 7642 + }, + { + "epoch": 0.616818658703898, + "grad_norm": 0.7083800435066223, + "learning_rate": 0.0001370911766884626, + "loss": 2.5747, + "step": 7643 + }, + { + "epoch": 0.616899362440481, + "grad_norm": 0.7281948924064636, + "learning_rate": 0.0001370765154377939, + "loss": 2.5627, + "step": 7644 + }, + { + "epoch": 0.616980066177064, + "grad_norm": 0.655414342880249, + "learning_rate": 0.00013706185326308148, + "loss": 2.5897, + "step": 7645 + }, + { + "epoch": 0.617060769913647, + "grad_norm": 0.6771859526634216, + "learning_rate": 0.0001370471901646908, + "loss": 2.5761, + "step": 7646 + }, + { + "epoch": 0.61714147365023, + "grad_norm": 0.6813557147979736, + "learning_rate": 0.00013703252614298732, + "loss": 2.5807, + "step": 7647 + }, + { + "epoch": 0.6172221773868131, + "grad_norm": 0.6948046684265137, + "learning_rate": 0.00013701786119833646, + "loss": 2.586, + "step": 7648 + }, + { + "epoch": 0.617302881123396, + "grad_norm": 0.643455982208252, + "learning_rate": 0.00013700319533110377, + "loss": 2.592, + "step": 7649 + }, + { + "epoch": 0.617383584859979, + "grad_norm": 0.7292457818984985, + "learning_rate": 0.0001369885285416547, + "loss": 2.6396, + "step": 7650 + }, + { + "epoch": 0.617464288596562, + "grad_norm": 0.642902672290802, + "learning_rate": 0.00013697386083035478, + "loss": 2.6115, + "step": 7651 + }, + { + "epoch": 0.617544992333145, + "grad_norm": 0.6536445021629333, + "learning_rate": 0.00013695919219756966, + "loss": 2.5406, + "step": 7652 + }, + { + "epoch": 0.6176256960697281, + "grad_norm": 0.6643723249435425, + "learning_rate": 0.0001369445226436648, + "loss": 2.6188, + "step": 7653 + }, + { + "epoch": 0.617706399806311, + "grad_norm": 0.6481621265411377, + "learning_rate": 0.00013692985216900592, + "loss": 2.5489, + "step": 7654 + }, + { + "epoch": 0.617787103542894, + "grad_norm": 0.6828036904335022, + "learning_rate": 0.00013691518077395856, + "loss": 2.5114, + "step": 7655 + }, + { + "epoch": 0.617867807279477, + "grad_norm": 0.6802895665168762, + "learning_rate": 0.00013690050845888838, + "loss": 2.5973, + "step": 7656 + }, + { + "epoch": 0.6179485110160601, + "grad_norm": 0.6980829238891602, + "learning_rate": 0.00013688583522416107, + "loss": 2.6032, + "step": 7657 + }, + { + "epoch": 0.618029214752643, + "grad_norm": 0.7157626748085022, + "learning_rate": 0.00013687116107014236, + "loss": 2.5552, + "step": 7658 + }, + { + "epoch": 0.618109918489226, + "grad_norm": 0.69700688123703, + "learning_rate": 0.00013685648599719792, + "loss": 2.5988, + "step": 7659 + }, + { + "epoch": 0.618190622225809, + "grad_norm": 0.6859539151191711, + "learning_rate": 0.0001368418100056935, + "loss": 2.6268, + "step": 7660 + }, + { + "epoch": 0.6182713259623921, + "grad_norm": 0.6812828183174133, + "learning_rate": 0.00013682713309599487, + "loss": 2.6002, + "step": 7661 + }, + { + "epoch": 0.6183520296989751, + "grad_norm": 0.6461766362190247, + "learning_rate": 0.00013681245526846783, + "loss": 2.6064, + "step": 7662 + }, + { + "epoch": 0.618432733435558, + "grad_norm": 0.7198306322097778, + "learning_rate": 0.00013679777652347814, + "loss": 2.6012, + "step": 7663 + }, + { + "epoch": 0.618513437172141, + "grad_norm": 0.7367191910743713, + "learning_rate": 0.00013678309686139168, + "loss": 2.6661, + "step": 7664 + }, + { + "epoch": 0.6185941409087241, + "grad_norm": 0.6975768804550171, + "learning_rate": 0.0001367684162825743, + "loss": 2.6394, + "step": 7665 + }, + { + "epoch": 0.6186748446453071, + "grad_norm": 0.7545140385627747, + "learning_rate": 0.0001367537347873919, + "loss": 2.624, + "step": 7666 + }, + { + "epoch": 0.6187555483818901, + "grad_norm": 0.6683520674705505, + "learning_rate": 0.0001367390523762103, + "loss": 2.6345, + "step": 7667 + }, + { + "epoch": 0.618836252118473, + "grad_norm": 0.6964975595474243, + "learning_rate": 0.00013672436904939552, + "loss": 2.591, + "step": 7668 + }, + { + "epoch": 0.6189169558550561, + "grad_norm": 0.7033975124359131, + "learning_rate": 0.00013670968480731344, + "loss": 2.566, + "step": 7669 + }, + { + "epoch": 0.6189976595916391, + "grad_norm": 0.706136167049408, + "learning_rate": 0.00013669499965033007, + "loss": 2.6073, + "step": 7670 + }, + { + "epoch": 0.6190783633282221, + "grad_norm": 0.7146300673484802, + "learning_rate": 0.0001366803135788114, + "loss": 2.6602, + "step": 7671 + }, + { + "epoch": 0.6191590670648051, + "grad_norm": 0.7603063583374023, + "learning_rate": 0.00013666562659312342, + "loss": 2.5286, + "step": 7672 + }, + { + "epoch": 0.6192397708013881, + "grad_norm": 0.744955837726593, + "learning_rate": 0.00013665093869363217, + "loss": 2.5678, + "step": 7673 + }, + { + "epoch": 0.6193204745379711, + "grad_norm": 0.7548620104789734, + "learning_rate": 0.00013663624988070373, + "loss": 2.6081, + "step": 7674 + }, + { + "epoch": 0.6194011782745541, + "grad_norm": 0.7367276549339294, + "learning_rate": 0.0001366215601547042, + "loss": 2.5559, + "step": 7675 + }, + { + "epoch": 0.6194818820111371, + "grad_norm": 0.7243839502334595, + "learning_rate": 0.00013660686951599962, + "loss": 2.5545, + "step": 7676 + }, + { + "epoch": 0.6195625857477202, + "grad_norm": 0.7595756649971008, + "learning_rate": 0.00013659217796495616, + "loss": 2.6547, + "step": 7677 + }, + { + "epoch": 0.6196432894843031, + "grad_norm": 0.7566717863082886, + "learning_rate": 0.00013657748550193998, + "loss": 2.6521, + "step": 7678 + }, + { + "epoch": 0.6197239932208861, + "grad_norm": 0.8441942930221558, + "learning_rate": 0.00013656279212731728, + "loss": 2.6325, + "step": 7679 + }, + { + "epoch": 0.6198046969574691, + "grad_norm": 0.7481170296669006, + "learning_rate": 0.00013654809784145418, + "loss": 2.6037, + "step": 7680 + }, + { + "epoch": 0.6198854006940522, + "grad_norm": 0.6626241207122803, + "learning_rate": 0.00013653340264471695, + "loss": 2.6028, + "step": 7681 + }, + { + "epoch": 0.6199661044306352, + "grad_norm": 0.7658020853996277, + "learning_rate": 0.00013651870653747186, + "loss": 2.5553, + "step": 7682 + }, + { + "epoch": 0.6200468081672181, + "grad_norm": 0.8218126893043518, + "learning_rate": 0.0001365040095200851, + "loss": 2.5661, + "step": 7683 + }, + { + "epoch": 0.6201275119038011, + "grad_norm": 0.6481068134307861, + "learning_rate": 0.00013648931159292304, + "loss": 2.5675, + "step": 7684 + }, + { + "epoch": 0.6202082156403842, + "grad_norm": 0.7529950141906738, + "learning_rate": 0.0001364746127563519, + "loss": 2.6137, + "step": 7685 + }, + { + "epoch": 0.6202889193769672, + "grad_norm": 0.7133232355117798, + "learning_rate": 0.00013645991301073816, + "loss": 2.6004, + "step": 7686 + }, + { + "epoch": 0.6203696231135502, + "grad_norm": 0.7809340953826904, + "learning_rate": 0.000136445212356448, + "loss": 2.6317, + "step": 7687 + }, + { + "epoch": 0.6204503268501331, + "grad_norm": 0.7106895446777344, + "learning_rate": 0.00013643051079384789, + "loss": 2.6086, + "step": 7688 + }, + { + "epoch": 0.6205310305867162, + "grad_norm": 0.6960744261741638, + "learning_rate": 0.00013641580832330423, + "loss": 2.5554, + "step": 7689 + }, + { + "epoch": 0.6206117343232992, + "grad_norm": 0.7078820466995239, + "learning_rate": 0.00013640110494518343, + "loss": 2.5902, + "step": 7690 + }, + { + "epoch": 0.6206924380598822, + "grad_norm": 0.7150746583938599, + "learning_rate": 0.00013638640065985195, + "loss": 2.5947, + "step": 7691 + }, + { + "epoch": 0.6207731417964651, + "grad_norm": 0.7507869601249695, + "learning_rate": 0.00013637169546767625, + "loss": 2.559, + "step": 7692 + }, + { + "epoch": 0.6208538455330482, + "grad_norm": 0.7453179359436035, + "learning_rate": 0.00013635698936902282, + "loss": 2.5612, + "step": 7693 + }, + { + "epoch": 0.6209345492696312, + "grad_norm": 0.7174177765846252, + "learning_rate": 0.00013634228236425816, + "loss": 2.6221, + "step": 7694 + }, + { + "epoch": 0.6210152530062142, + "grad_norm": 0.7394092679023743, + "learning_rate": 0.00013632757445374884, + "loss": 2.6045, + "step": 7695 + }, + { + "epoch": 0.6210959567427972, + "grad_norm": 0.7346367239952087, + "learning_rate": 0.0001363128656378614, + "loss": 2.677, + "step": 7696 + }, + { + "epoch": 0.6211766604793802, + "grad_norm": 0.6697696447372437, + "learning_rate": 0.00013629815591696245, + "loss": 2.5741, + "step": 7697 + }, + { + "epoch": 0.6212573642159632, + "grad_norm": 0.6993793845176697, + "learning_rate": 0.00013628344529141852, + "loss": 2.5206, + "step": 7698 + }, + { + "epoch": 0.6213380679525462, + "grad_norm": 0.6946697235107422, + "learning_rate": 0.00013626873376159631, + "loss": 2.6046, + "step": 7699 + }, + { + "epoch": 0.6214187716891292, + "grad_norm": 0.7641928195953369, + "learning_rate": 0.00013625402132786248, + "loss": 2.5459, + "step": 7700 + }, + { + "epoch": 0.6214994754257122, + "grad_norm": 0.6513504981994629, + "learning_rate": 0.00013623930799058363, + "loss": 2.6137, + "step": 7701 + }, + { + "epoch": 0.6215801791622952, + "grad_norm": 0.6745209097862244, + "learning_rate": 0.00013622459375012651, + "loss": 2.5285, + "step": 7702 + }, + { + "epoch": 0.6216608828988782, + "grad_norm": 0.7162348628044128, + "learning_rate": 0.0001362098786068578, + "loss": 2.6224, + "step": 7703 + }, + { + "epoch": 0.6217415866354612, + "grad_norm": 0.7387436032295227, + "learning_rate": 0.00013619516256114427, + "loss": 2.6216, + "step": 7704 + }, + { + "epoch": 0.6218222903720442, + "grad_norm": 0.764955461025238, + "learning_rate": 0.00013618044561335268, + "loss": 2.612, + "step": 7705 + }, + { + "epoch": 0.6219029941086273, + "grad_norm": 0.6492719054222107, + "learning_rate": 0.00013616572776384983, + "loss": 2.5532, + "step": 7706 + }, + { + "epoch": 0.6219836978452102, + "grad_norm": 0.6870293617248535, + "learning_rate": 0.0001361510090130025, + "loss": 2.5705, + "step": 7707 + }, + { + "epoch": 0.6220644015817932, + "grad_norm": 0.6899540424346924, + "learning_rate": 0.0001361362893611775, + "loss": 2.5768, + "step": 7708 + }, + { + "epoch": 0.6221451053183762, + "grad_norm": 0.658941924571991, + "learning_rate": 0.0001361215688087417, + "loss": 2.5664, + "step": 7709 + }, + { + "epoch": 0.6222258090549593, + "grad_norm": 0.6875531673431396, + "learning_rate": 0.000136106847356062, + "loss": 2.6128, + "step": 7710 + }, + { + "epoch": 0.6223065127915423, + "grad_norm": 0.657073974609375, + "learning_rate": 0.0001360921250035053, + "loss": 2.6449, + "step": 7711 + }, + { + "epoch": 0.6223872165281252, + "grad_norm": 0.7051201462745667, + "learning_rate": 0.00013607740175143848, + "loss": 2.5925, + "step": 7712 + }, + { + "epoch": 0.6224679202647082, + "grad_norm": 0.702877938747406, + "learning_rate": 0.0001360626776002285, + "loss": 2.5338, + "step": 7713 + }, + { + "epoch": 0.6225486240012913, + "grad_norm": 0.650935709476471, + "learning_rate": 0.00013604795255024233, + "loss": 2.5799, + "step": 7714 + }, + { + "epoch": 0.6226293277378743, + "grad_norm": 0.7035139203071594, + "learning_rate": 0.00013603322660184694, + "loss": 2.5476, + "step": 7715 + }, + { + "epoch": 0.6227100314744572, + "grad_norm": 0.6549977660179138, + "learning_rate": 0.0001360184997554094, + "loss": 2.6117, + "step": 7716 + }, + { + "epoch": 0.6227907352110402, + "grad_norm": 0.6882792115211487, + "learning_rate": 0.00013600377201129662, + "loss": 2.53, + "step": 7717 + }, + { + "epoch": 0.6228714389476233, + "grad_norm": 0.7390840649604797, + "learning_rate": 0.0001359890433698758, + "loss": 2.6345, + "step": 7718 + }, + { + "epoch": 0.6229521426842063, + "grad_norm": 0.7577612400054932, + "learning_rate": 0.00013597431383151386, + "loss": 2.6386, + "step": 7719 + }, + { + "epoch": 0.6230328464207893, + "grad_norm": 0.6818724870681763, + "learning_rate": 0.00013595958339657804, + "loss": 2.5806, + "step": 7720 + }, + { + "epoch": 0.6231135501573722, + "grad_norm": 0.6954349279403687, + "learning_rate": 0.0001359448520654354, + "loss": 2.5913, + "step": 7721 + }, + { + "epoch": 0.6231942538939553, + "grad_norm": 0.7976544499397278, + "learning_rate": 0.00013593011983845308, + "loss": 2.5686, + "step": 7722 + }, + { + "epoch": 0.6232749576305383, + "grad_norm": 0.7362754940986633, + "learning_rate": 0.00013591538671599824, + "loss": 2.5596, + "step": 7723 + }, + { + "epoch": 0.6233556613671213, + "grad_norm": 0.6842390298843384, + "learning_rate": 0.00013590065269843805, + "loss": 2.5793, + "step": 7724 + }, + { + "epoch": 0.6234363651037043, + "grad_norm": 0.6816275715827942, + "learning_rate": 0.0001358859177861398, + "loss": 2.5948, + "step": 7725 + }, + { + "epoch": 0.6235170688402873, + "grad_norm": 0.6892915964126587, + "learning_rate": 0.00013587118197947066, + "loss": 2.6287, + "step": 7726 + }, + { + "epoch": 0.6235977725768703, + "grad_norm": 0.6851752996444702, + "learning_rate": 0.00013585644527879792, + "loss": 2.5781, + "step": 7727 + }, + { + "epoch": 0.6236784763134533, + "grad_norm": 0.7022164463996887, + "learning_rate": 0.00013584170768448877, + "loss": 2.5856, + "step": 7728 + }, + { + "epoch": 0.6237591800500363, + "grad_norm": 0.6752299070358276, + "learning_rate": 0.0001358269691969106, + "loss": 2.6042, + "step": 7729 + }, + { + "epoch": 0.6238398837866194, + "grad_norm": 0.6861466765403748, + "learning_rate": 0.00013581222981643074, + "loss": 2.5887, + "step": 7730 + }, + { + "epoch": 0.6239205875232023, + "grad_norm": 0.7147940397262573, + "learning_rate": 0.00013579748954341647, + "loss": 2.5796, + "step": 7731 + }, + { + "epoch": 0.6240012912597853, + "grad_norm": 0.6704726219177246, + "learning_rate": 0.0001357827483782352, + "loss": 2.6027, + "step": 7732 + }, + { + "epoch": 0.6240819949963683, + "grad_norm": 0.6984317898750305, + "learning_rate": 0.0001357680063212543, + "loss": 2.635, + "step": 7733 + }, + { + "epoch": 0.6241626987329514, + "grad_norm": 0.6205787658691406, + "learning_rate": 0.00013575326337284115, + "loss": 2.5715, + "step": 7734 + }, + { + "epoch": 0.6242434024695344, + "grad_norm": 0.7214726805686951, + "learning_rate": 0.00013573851953336326, + "loss": 2.5605, + "step": 7735 + }, + { + "epoch": 0.6243241062061173, + "grad_norm": 0.6716169714927673, + "learning_rate": 0.000135723774803188, + "loss": 2.6766, + "step": 7736 + }, + { + "epoch": 0.6244048099427003, + "grad_norm": 0.6446832418441772, + "learning_rate": 0.00013570902918268293, + "loss": 2.5629, + "step": 7737 + }, + { + "epoch": 0.6244855136792834, + "grad_norm": 0.6721374988555908, + "learning_rate": 0.0001356942826722155, + "loss": 2.6093, + "step": 7738 + }, + { + "epoch": 0.6245662174158664, + "grad_norm": 0.7430365681648254, + "learning_rate": 0.0001356795352721532, + "loss": 2.5966, + "step": 7739 + }, + { + "epoch": 0.6246469211524494, + "grad_norm": 0.6787518858909607, + "learning_rate": 0.00013566478698286366, + "loss": 2.5519, + "step": 7740 + }, + { + "epoch": 0.6247276248890323, + "grad_norm": 0.6340047121047974, + "learning_rate": 0.0001356500378047144, + "loss": 2.5181, + "step": 7741 + }, + { + "epoch": 0.6248083286256154, + "grad_norm": 0.7559040188789368, + "learning_rate": 0.000135635287738073, + "loss": 2.6068, + "step": 7742 + }, + { + "epoch": 0.6248890323621984, + "grad_norm": 0.6819902062416077, + "learning_rate": 0.00013562053678330707, + "loss": 2.5754, + "step": 7743 + }, + { + "epoch": 0.6249697360987814, + "grad_norm": 0.6463500261306763, + "learning_rate": 0.00013560578494078423, + "loss": 2.5915, + "step": 7744 + }, + { + "epoch": 0.6250504398353643, + "grad_norm": 0.7510617971420288, + "learning_rate": 0.0001355910322108722, + "loss": 2.5738, + "step": 7745 + }, + { + "epoch": 0.6251311435719474, + "grad_norm": 0.75312739610672, + "learning_rate": 0.00013557627859393855, + "loss": 2.5938, + "step": 7746 + }, + { + "epoch": 0.6252118473085304, + "grad_norm": 0.7784396409988403, + "learning_rate": 0.0001355615240903511, + "loss": 2.6634, + "step": 7747 + }, + { + "epoch": 0.6252925510451134, + "grad_norm": 0.7174746990203857, + "learning_rate": 0.00013554676870047752, + "loss": 2.5973, + "step": 7748 + }, + { + "epoch": 0.6253732547816964, + "grad_norm": 0.6854952573776245, + "learning_rate": 0.0001355320124246855, + "loss": 2.5397, + "step": 7749 + }, + { + "epoch": 0.6254539585182795, + "grad_norm": 0.6584961414337158, + "learning_rate": 0.00013551725526334284, + "loss": 2.5574, + "step": 7750 + }, + { + "epoch": 0.6255346622548624, + "grad_norm": 0.7067389488220215, + "learning_rate": 0.00013550249721681738, + "loss": 2.5524, + "step": 7751 + }, + { + "epoch": 0.6256153659914454, + "grad_norm": 0.6923872232437134, + "learning_rate": 0.00013548773828547686, + "loss": 2.5651, + "step": 7752 + }, + { + "epoch": 0.6256960697280284, + "grad_norm": 0.6612355709075928, + "learning_rate": 0.00013547297846968915, + "loss": 2.6075, + "step": 7753 + }, + { + "epoch": 0.6257767734646114, + "grad_norm": 0.6762828826904297, + "learning_rate": 0.00013545821776982206, + "loss": 2.6136, + "step": 7754 + }, + { + "epoch": 0.6258574772011944, + "grad_norm": 0.6940783858299255, + "learning_rate": 0.0001354434561862435, + "loss": 2.5566, + "step": 7755 + }, + { + "epoch": 0.6259381809377774, + "grad_norm": 0.7874250411987305, + "learning_rate": 0.0001354286937193214, + "loss": 2.6732, + "step": 7756 + }, + { + "epoch": 0.6260188846743604, + "grad_norm": 0.6974111795425415, + "learning_rate": 0.0001354139303694236, + "loss": 2.5455, + "step": 7757 + }, + { + "epoch": 0.6260995884109434, + "grad_norm": 0.6710802316665649, + "learning_rate": 0.0001353991661369181, + "loss": 2.5608, + "step": 7758 + }, + { + "epoch": 0.6261802921475265, + "grad_norm": 0.681635320186615, + "learning_rate": 0.00013538440102217286, + "loss": 2.6107, + "step": 7759 + }, + { + "epoch": 0.6262609958841094, + "grad_norm": 0.7229577898979187, + "learning_rate": 0.0001353696350255558, + "loss": 2.5936, + "step": 7760 + }, + { + "epoch": 0.6263416996206924, + "grad_norm": 0.6909681558609009, + "learning_rate": 0.00013535486814743504, + "loss": 2.5521, + "step": 7761 + }, + { + "epoch": 0.6264224033572754, + "grad_norm": 0.7003746032714844, + "learning_rate": 0.0001353401003881785, + "loss": 2.5606, + "step": 7762 + }, + { + "epoch": 0.6265031070938585, + "grad_norm": 0.6883233785629272, + "learning_rate": 0.0001353253317481543, + "loss": 2.5971, + "step": 7763 + }, + { + "epoch": 0.6265838108304415, + "grad_norm": 0.7382355332374573, + "learning_rate": 0.0001353105622277305, + "loss": 2.5449, + "step": 7764 + }, + { + "epoch": 0.6266645145670244, + "grad_norm": 0.7090556621551514, + "learning_rate": 0.00013529579182727515, + "loss": 2.5988, + "step": 7765 + }, + { + "epoch": 0.6267452183036074, + "grad_norm": 0.6842581629753113, + "learning_rate": 0.00013528102054715643, + "loss": 2.6214, + "step": 7766 + }, + { + "epoch": 0.6268259220401905, + "grad_norm": 0.6969670653343201, + "learning_rate": 0.00013526624838774246, + "loss": 2.5443, + "step": 7767 + }, + { + "epoch": 0.6269066257767735, + "grad_norm": 0.7244827151298523, + "learning_rate": 0.00013525147534940138, + "loss": 2.5967, + "step": 7768 + }, + { + "epoch": 0.6269873295133565, + "grad_norm": 0.7022162675857544, + "learning_rate": 0.0001352367014325014, + "loss": 2.599, + "step": 7769 + }, + { + "epoch": 0.6270680332499394, + "grad_norm": 0.7065250873565674, + "learning_rate": 0.00013522192663741067, + "loss": 2.6105, + "step": 7770 + }, + { + "epoch": 0.6271487369865225, + "grad_norm": 0.6690711975097656, + "learning_rate": 0.0001352071509644975, + "loss": 2.55, + "step": 7771 + }, + { + "epoch": 0.6272294407231055, + "grad_norm": 0.6405982971191406, + "learning_rate": 0.00013519237441413011, + "loss": 2.6078, + "step": 7772 + }, + { + "epoch": 0.6273101444596885, + "grad_norm": 0.7340127229690552, + "learning_rate": 0.00013517759698667672, + "loss": 2.6244, + "step": 7773 + }, + { + "epoch": 0.6273908481962714, + "grad_norm": 0.6609435677528381, + "learning_rate": 0.00013516281868250566, + "loss": 2.5746, + "step": 7774 + }, + { + "epoch": 0.6274715519328545, + "grad_norm": 0.6681997179985046, + "learning_rate": 0.00013514803950198523, + "loss": 2.6181, + "step": 7775 + }, + { + "epoch": 0.6275522556694375, + "grad_norm": 0.7120032906532288, + "learning_rate": 0.0001351332594454838, + "loss": 2.6018, + "step": 7776 + }, + { + "epoch": 0.6276329594060205, + "grad_norm": 0.6618601679801941, + "learning_rate": 0.0001351184785133697, + "loss": 2.5342, + "step": 7777 + }, + { + "epoch": 0.6277136631426035, + "grad_norm": 0.7250192165374756, + "learning_rate": 0.00013510369670601132, + "loss": 2.5795, + "step": 7778 + }, + { + "epoch": 0.6277943668791865, + "grad_norm": 0.7918543219566345, + "learning_rate": 0.00013508891402377708, + "loss": 2.6544, + "step": 7779 + }, + { + "epoch": 0.6278750706157695, + "grad_norm": 0.678895890712738, + "learning_rate": 0.00013507413046703534, + "loss": 2.5937, + "step": 7780 + }, + { + "epoch": 0.6279557743523525, + "grad_norm": 0.7336576581001282, + "learning_rate": 0.00013505934603615457, + "loss": 2.598, + "step": 7781 + }, + { + "epoch": 0.6280364780889355, + "grad_norm": 0.6891419291496277, + "learning_rate": 0.00013504456073150332, + "loss": 2.5063, + "step": 7782 + }, + { + "epoch": 0.6281171818255186, + "grad_norm": 0.7949386835098267, + "learning_rate": 0.00013502977455344997, + "loss": 2.5703, + "step": 7783 + }, + { + "epoch": 0.6281978855621015, + "grad_norm": 0.7917985320091248, + "learning_rate": 0.00013501498750236306, + "loss": 2.639, + "step": 7784 + }, + { + "epoch": 0.6282785892986845, + "grad_norm": 0.7387086749076843, + "learning_rate": 0.00013500019957861113, + "loss": 2.5864, + "step": 7785 + }, + { + "epoch": 0.6283592930352675, + "grad_norm": 0.7189435958862305, + "learning_rate": 0.00013498541078256273, + "loss": 2.5627, + "step": 7786 + }, + { + "epoch": 0.6284399967718506, + "grad_norm": 0.6709900498390198, + "learning_rate": 0.00013497062111458646, + "loss": 2.5973, + "step": 7787 + }, + { + "epoch": 0.6285207005084336, + "grad_norm": 0.6925386190414429, + "learning_rate": 0.0001349558305750509, + "loss": 2.615, + "step": 7788 + }, + { + "epoch": 0.6286014042450165, + "grad_norm": 0.7191932201385498, + "learning_rate": 0.00013494103916432466, + "loss": 2.576, + "step": 7789 + }, + { + "epoch": 0.6286821079815995, + "grad_norm": 0.6798804402351379, + "learning_rate": 0.00013492624688277638, + "loss": 2.5661, + "step": 7790 + }, + { + "epoch": 0.6287628117181826, + "grad_norm": 0.6514562964439392, + "learning_rate": 0.00013491145373077475, + "loss": 2.6135, + "step": 7791 + }, + { + "epoch": 0.6288435154547656, + "grad_norm": 0.7345223426818848, + "learning_rate": 0.00013489665970868838, + "loss": 2.6015, + "step": 7792 + }, + { + "epoch": 0.6289242191913486, + "grad_norm": 0.7102675437927246, + "learning_rate": 0.0001348818648168861, + "loss": 2.5545, + "step": 7793 + }, + { + "epoch": 0.6290049229279315, + "grad_norm": 0.7151654362678528, + "learning_rate": 0.0001348670690557365, + "loss": 2.6464, + "step": 7794 + }, + { + "epoch": 0.6290856266645146, + "grad_norm": 0.7344057559967041, + "learning_rate": 0.00013485227242560844, + "loss": 2.6777, + "step": 7795 + }, + { + "epoch": 0.6291663304010976, + "grad_norm": 0.6622766852378845, + "learning_rate": 0.00013483747492687065, + "loss": 2.5713, + "step": 7796 + }, + { + "epoch": 0.6292470341376806, + "grad_norm": 0.6899346709251404, + "learning_rate": 0.0001348226765598919, + "loss": 2.5188, + "step": 7797 + }, + { + "epoch": 0.6293277378742635, + "grad_norm": 0.6711421012878418, + "learning_rate": 0.000134807877325041, + "loss": 2.5603, + "step": 7798 + }, + { + "epoch": 0.6294084416108466, + "grad_norm": 0.6973204016685486, + "learning_rate": 0.00013479307722268687, + "loss": 2.6621, + "step": 7799 + }, + { + "epoch": 0.6294891453474296, + "grad_norm": 0.7782350778579712, + "learning_rate": 0.00013477827625319824, + "loss": 2.5929, + "step": 7800 + }, + { + "epoch": 0.6295698490840126, + "grad_norm": 0.8703733682632446, + "learning_rate": 0.0001347634744169441, + "loss": 2.6884, + "step": 7801 + }, + { + "epoch": 0.6296505528205956, + "grad_norm": 0.7196036577224731, + "learning_rate": 0.00013474867171429326, + "loss": 2.6002, + "step": 7802 + }, + { + "epoch": 0.6297312565571785, + "grad_norm": 0.7224054932594299, + "learning_rate": 0.00013473386814561475, + "loss": 2.6007, + "step": 7803 + }, + { + "epoch": 0.6298119602937616, + "grad_norm": 0.7615752816200256, + "learning_rate": 0.00013471906371127743, + "loss": 2.6459, + "step": 7804 + }, + { + "epoch": 0.6298926640303446, + "grad_norm": 0.7189914584159851, + "learning_rate": 0.00013470425841165024, + "loss": 2.5692, + "step": 7805 + }, + { + "epoch": 0.6299733677669276, + "grad_norm": 0.7101845741271973, + "learning_rate": 0.00013468945224710225, + "loss": 2.5776, + "step": 7806 + }, + { + "epoch": 0.6300540715035106, + "grad_norm": 0.6860305666923523, + "learning_rate": 0.00013467464521800244, + "loss": 2.5567, + "step": 7807 + }, + { + "epoch": 0.6301347752400936, + "grad_norm": 0.7003797292709351, + "learning_rate": 0.0001346598373247198, + "loss": 2.6444, + "step": 7808 + }, + { + "epoch": 0.6302154789766766, + "grad_norm": 0.6341832876205444, + "learning_rate": 0.00013464502856762344, + "loss": 2.5475, + "step": 7809 + }, + { + "epoch": 0.6302961827132596, + "grad_norm": 0.6255922317504883, + "learning_rate": 0.00013463021894708242, + "loss": 2.5875, + "step": 7810 + }, + { + "epoch": 0.6303768864498426, + "grad_norm": 0.7136420607566833, + "learning_rate": 0.00013461540846346575, + "loss": 2.5708, + "step": 7811 + }, + { + "epoch": 0.6304575901864257, + "grad_norm": 0.7164542078971863, + "learning_rate": 0.00013460059711714267, + "loss": 2.4975, + "step": 7812 + }, + { + "epoch": 0.6305382939230086, + "grad_norm": 0.7667872905731201, + "learning_rate": 0.00013458578490848226, + "loss": 2.6124, + "step": 7813 + }, + { + "epoch": 0.6306189976595916, + "grad_norm": 0.6631812453269958, + "learning_rate": 0.0001345709718378537, + "loss": 2.5318, + "step": 7814 + }, + { + "epoch": 0.6306997013961746, + "grad_norm": 0.696864664554596, + "learning_rate": 0.0001345561579056261, + "loss": 2.6171, + "step": 7815 + }, + { + "epoch": 0.6307804051327577, + "grad_norm": 0.7368598580360413, + "learning_rate": 0.00013454134311216873, + "loss": 2.5734, + "step": 7816 + }, + { + "epoch": 0.6308611088693407, + "grad_norm": 0.7279712557792664, + "learning_rate": 0.00013452652745785083, + "loss": 2.6231, + "step": 7817 + }, + { + "epoch": 0.6309418126059236, + "grad_norm": 0.8070993423461914, + "learning_rate": 0.00013451171094304158, + "loss": 2.5486, + "step": 7818 + }, + { + "epoch": 0.6310225163425066, + "grad_norm": 0.7522621750831604, + "learning_rate": 0.0001344968935681103, + "loss": 2.5576, + "step": 7819 + }, + { + "epoch": 0.6311032200790897, + "grad_norm": 0.8185423612594604, + "learning_rate": 0.00013448207533342624, + "loss": 2.6068, + "step": 7820 + }, + { + "epoch": 0.6311839238156727, + "grad_norm": 0.7542584538459778, + "learning_rate": 0.0001344672562393587, + "loss": 2.643, + "step": 7821 + }, + { + "epoch": 0.6312646275522557, + "grad_norm": 0.7892276644706726, + "learning_rate": 0.00013445243628627712, + "loss": 2.6211, + "step": 7822 + }, + { + "epoch": 0.6313453312888386, + "grad_norm": 0.7216602563858032, + "learning_rate": 0.00013443761547455072, + "loss": 2.5725, + "step": 7823 + }, + { + "epoch": 0.6314260350254217, + "grad_norm": 0.6750743985176086, + "learning_rate": 0.0001344227938045489, + "loss": 2.5319, + "step": 7824 + }, + { + "epoch": 0.6315067387620047, + "grad_norm": 0.6711540222167969, + "learning_rate": 0.0001344079712766411, + "loss": 2.5957, + "step": 7825 + }, + { + "epoch": 0.6315874424985877, + "grad_norm": 0.6923524737358093, + "learning_rate": 0.00013439314789119667, + "loss": 2.6084, + "step": 7826 + }, + { + "epoch": 0.6316681462351706, + "grad_norm": 0.6859166026115417, + "learning_rate": 0.00013437832364858517, + "loss": 2.5608, + "step": 7827 + }, + { + "epoch": 0.6317488499717537, + "grad_norm": 0.7340966463088989, + "learning_rate": 0.0001343634985491759, + "loss": 2.531, + "step": 7828 + }, + { + "epoch": 0.6318295537083367, + "grad_norm": 0.7374520301818848, + "learning_rate": 0.00013434867259333848, + "loss": 2.5972, + "step": 7829 + }, + { + "epoch": 0.6319102574449197, + "grad_norm": 0.7252814769744873, + "learning_rate": 0.00013433384578144232, + "loss": 2.5874, + "step": 7830 + }, + { + "epoch": 0.6319909611815027, + "grad_norm": 0.7000489830970764, + "learning_rate": 0.000134319018113857, + "loss": 2.6137, + "step": 7831 + }, + { + "epoch": 0.6320716649180858, + "grad_norm": 0.805981457233429, + "learning_rate": 0.00013430418959095198, + "loss": 2.5581, + "step": 7832 + }, + { + "epoch": 0.6321523686546687, + "grad_norm": 0.7459721565246582, + "learning_rate": 0.00013428936021309693, + "loss": 2.5284, + "step": 7833 + }, + { + "epoch": 0.6322330723912517, + "grad_norm": 0.749794065952301, + "learning_rate": 0.00013427452998066136, + "loss": 2.5927, + "step": 7834 + }, + { + "epoch": 0.6323137761278347, + "grad_norm": 0.6925346255302429, + "learning_rate": 0.00013425969889401494, + "loss": 2.5703, + "step": 7835 + }, + { + "epoch": 0.6323944798644178, + "grad_norm": 0.6647117137908936, + "learning_rate": 0.00013424486695352728, + "loss": 2.5649, + "step": 7836 + }, + { + "epoch": 0.6324751836010007, + "grad_norm": 0.7358147501945496, + "learning_rate": 0.00013423003415956796, + "loss": 2.6122, + "step": 7837 + }, + { + "epoch": 0.6325558873375837, + "grad_norm": 0.7798088788986206, + "learning_rate": 0.00013421520051250675, + "loss": 2.5805, + "step": 7838 + }, + { + "epoch": 0.6326365910741667, + "grad_norm": 0.7108271718025208, + "learning_rate": 0.00013420036601271334, + "loss": 2.5457, + "step": 7839 + }, + { + "epoch": 0.6327172948107498, + "grad_norm": 0.7108528017997742, + "learning_rate": 0.00013418553066055734, + "loss": 2.6313, + "step": 7840 + }, + { + "epoch": 0.6327979985473328, + "grad_norm": 0.7325249910354614, + "learning_rate": 0.00013417069445640858, + "loss": 2.5598, + "step": 7841 + }, + { + "epoch": 0.6328787022839157, + "grad_norm": 0.6861844062805176, + "learning_rate": 0.0001341558574006368, + "loss": 2.5899, + "step": 7842 + }, + { + "epoch": 0.6329594060204987, + "grad_norm": 0.7576130628585815, + "learning_rate": 0.00013414101949361175, + "loss": 2.6077, + "step": 7843 + }, + { + "epoch": 0.6330401097570818, + "grad_norm": 0.7756128907203674, + "learning_rate": 0.0001341261807357033, + "loss": 2.6111, + "step": 7844 + }, + { + "epoch": 0.6331208134936648, + "grad_norm": 0.7131127715110779, + "learning_rate": 0.00013411134112728114, + "loss": 2.5227, + "step": 7845 + }, + { + "epoch": 0.6332015172302478, + "grad_norm": 0.6517898440361023, + "learning_rate": 0.00013409650066871525, + "loss": 2.5825, + "step": 7846 + }, + { + "epoch": 0.6332822209668307, + "grad_norm": 0.8452722430229187, + "learning_rate": 0.0001340816593603754, + "loss": 2.6037, + "step": 7847 + }, + { + "epoch": 0.6333629247034138, + "grad_norm": 0.7421110272407532, + "learning_rate": 0.00013406681720263153, + "loss": 2.5684, + "step": 7848 + }, + { + "epoch": 0.6334436284399968, + "grad_norm": 0.695139467716217, + "learning_rate": 0.0001340519741958535, + "loss": 2.5648, + "step": 7849 + }, + { + "epoch": 0.6335243321765798, + "grad_norm": 0.7780016660690308, + "learning_rate": 0.0001340371303404113, + "loss": 2.6849, + "step": 7850 + }, + { + "epoch": 0.6336050359131628, + "grad_norm": 0.7276864051818848, + "learning_rate": 0.00013402228563667482, + "loss": 2.6198, + "step": 7851 + }, + { + "epoch": 0.6336857396497458, + "grad_norm": 0.7566827535629272, + "learning_rate": 0.00013400744008501404, + "loss": 2.5803, + "step": 7852 + }, + { + "epoch": 0.6337664433863288, + "grad_norm": 0.7933458089828491, + "learning_rate": 0.00013399259368579894, + "loss": 2.6029, + "step": 7853 + }, + { + "epoch": 0.6338471471229118, + "grad_norm": 0.6849822402000427, + "learning_rate": 0.00013397774643939957, + "loss": 2.5454, + "step": 7854 + }, + { + "epoch": 0.6339278508594948, + "grad_norm": 0.7054651379585266, + "learning_rate": 0.00013396289834618594, + "loss": 2.5905, + "step": 7855 + }, + { + "epoch": 0.6340085545960777, + "grad_norm": 0.7036863565444946, + "learning_rate": 0.00013394804940652813, + "loss": 2.6342, + "step": 7856 + }, + { + "epoch": 0.6340892583326608, + "grad_norm": 0.7101735472679138, + "learning_rate": 0.00013393319962079614, + "loss": 2.6402, + "step": 7857 + }, + { + "epoch": 0.6341699620692438, + "grad_norm": 0.7053956389427185, + "learning_rate": 0.0001339183489893601, + "loss": 2.5841, + "step": 7858 + }, + { + "epoch": 0.6342506658058268, + "grad_norm": 0.7734887003898621, + "learning_rate": 0.0001339034975125902, + "loss": 2.652, + "step": 7859 + }, + { + "epoch": 0.6343313695424098, + "grad_norm": 0.6714119911193848, + "learning_rate": 0.0001338886451908565, + "loss": 2.5927, + "step": 7860 + }, + { + "epoch": 0.6344120732789928, + "grad_norm": 0.6580910682678223, + "learning_rate": 0.00013387379202452917, + "loss": 2.6114, + "step": 7861 + }, + { + "epoch": 0.6344927770155758, + "grad_norm": 0.6810200214385986, + "learning_rate": 0.00013385893801397836, + "loss": 2.5616, + "step": 7862 + }, + { + "epoch": 0.6345734807521588, + "grad_norm": 0.6989572048187256, + "learning_rate": 0.00013384408315957432, + "loss": 2.5954, + "step": 7863 + }, + { + "epoch": 0.6346541844887418, + "grad_norm": 0.7033671736717224, + "learning_rate": 0.00013382922746168728, + "loss": 2.6015, + "step": 7864 + }, + { + "epoch": 0.6347348882253249, + "grad_norm": 0.6873033046722412, + "learning_rate": 0.0001338143709206875, + "loss": 2.562, + "step": 7865 + }, + { + "epoch": 0.6348155919619078, + "grad_norm": 0.7361463904380798, + "learning_rate": 0.00013379951353694513, + "loss": 2.6175, + "step": 7866 + }, + { + "epoch": 0.6348962956984908, + "grad_norm": 0.7623226046562195, + "learning_rate": 0.00013378465531083055, + "loss": 2.7342, + "step": 7867 + }, + { + "epoch": 0.6349769994350738, + "grad_norm": 0.7427035570144653, + "learning_rate": 0.0001337697962427141, + "loss": 2.5468, + "step": 7868 + }, + { + "epoch": 0.6350577031716569, + "grad_norm": 0.6865772008895874, + "learning_rate": 0.00013375493633296598, + "loss": 2.6112, + "step": 7869 + }, + { + "epoch": 0.6351384069082399, + "grad_norm": 0.663567304611206, + "learning_rate": 0.00013374007558195666, + "loss": 2.5896, + "step": 7870 + }, + { + "epoch": 0.6352191106448228, + "grad_norm": 0.6804360151290894, + "learning_rate": 0.00013372521399005643, + "loss": 2.58, + "step": 7871 + }, + { + "epoch": 0.6352998143814058, + "grad_norm": 0.6755216121673584, + "learning_rate": 0.0001337103515576357, + "loss": 2.5593, + "step": 7872 + }, + { + "epoch": 0.6353805181179889, + "grad_norm": 0.8148807883262634, + "learning_rate": 0.00013369548828506491, + "loss": 2.6473, + "step": 7873 + }, + { + "epoch": 0.6354612218545719, + "grad_norm": 0.713009774684906, + "learning_rate": 0.00013368062417271447, + "loss": 2.6002, + "step": 7874 + }, + { + "epoch": 0.6355419255911549, + "grad_norm": 0.6390172839164734, + "learning_rate": 0.00013366575922095484, + "loss": 2.5794, + "step": 7875 + }, + { + "epoch": 0.6356226293277378, + "grad_norm": 0.7228195667266846, + "learning_rate": 0.00013365089343015649, + "loss": 2.6051, + "step": 7876 + }, + { + "epoch": 0.6357033330643209, + "grad_norm": 0.7563474178314209, + "learning_rate": 0.00013363602680068986, + "loss": 2.6308, + "step": 7877 + }, + { + "epoch": 0.6357840368009039, + "grad_norm": 0.7366798520088196, + "learning_rate": 0.00013362115933292557, + "loss": 2.5589, + "step": 7878 + }, + { + "epoch": 0.6358647405374869, + "grad_norm": 0.7137070894241333, + "learning_rate": 0.00013360629102723409, + "loss": 2.6428, + "step": 7879 + }, + { + "epoch": 0.6359454442740698, + "grad_norm": 0.6799132823944092, + "learning_rate": 0.000133591421883986, + "loss": 2.5549, + "step": 7880 + }, + { + "epoch": 0.6360261480106529, + "grad_norm": 0.7031344771385193, + "learning_rate": 0.00013357655190355188, + "loss": 2.6298, + "step": 7881 + }, + { + "epoch": 0.6361068517472359, + "grad_norm": 0.7441670298576355, + "learning_rate": 0.00013356168108630227, + "loss": 2.5844, + "step": 7882 + }, + { + "epoch": 0.6361875554838189, + "grad_norm": 0.7281978726387024, + "learning_rate": 0.00013354680943260784, + "loss": 2.5773, + "step": 7883 + }, + { + "epoch": 0.6362682592204019, + "grad_norm": 0.6969650983810425, + "learning_rate": 0.00013353193694283928, + "loss": 2.6156, + "step": 7884 + }, + { + "epoch": 0.636348962956985, + "grad_norm": 0.6668435335159302, + "learning_rate": 0.00013351706361736714, + "loss": 2.6328, + "step": 7885 + }, + { + "epoch": 0.6364296666935679, + "grad_norm": 0.6909573078155518, + "learning_rate": 0.0001335021894565622, + "loss": 2.5772, + "step": 7886 + }, + { + "epoch": 0.6365103704301509, + "grad_norm": 0.6740022897720337, + "learning_rate": 0.0001334873144607951, + "loss": 2.6435, + "step": 7887 + }, + { + "epoch": 0.6365910741667339, + "grad_norm": 0.7203185558319092, + "learning_rate": 0.0001334724386304366, + "loss": 2.5401, + "step": 7888 + }, + { + "epoch": 0.636671777903317, + "grad_norm": 0.7343020439147949, + "learning_rate": 0.0001334575619658574, + "loss": 2.5811, + "step": 7889 + }, + { + "epoch": 0.6367524816399, + "grad_norm": 0.6941348314285278, + "learning_rate": 0.00013344268446742835, + "loss": 2.6267, + "step": 7890 + }, + { + "epoch": 0.6368331853764829, + "grad_norm": 0.6983792185783386, + "learning_rate": 0.00013342780613552016, + "loss": 2.533, + "step": 7891 + }, + { + "epoch": 0.6369138891130659, + "grad_norm": 0.7093533277511597, + "learning_rate": 0.00013341292697050365, + "loss": 2.6616, + "step": 7892 + }, + { + "epoch": 0.636994592849649, + "grad_norm": 0.7377648949623108, + "learning_rate": 0.00013339804697274965, + "loss": 2.6032, + "step": 7893 + }, + { + "epoch": 0.637075296586232, + "grad_norm": 0.6669821739196777, + "learning_rate": 0.00013338316614262903, + "loss": 2.6082, + "step": 7894 + }, + { + "epoch": 0.6371560003228149, + "grad_norm": 0.6665576100349426, + "learning_rate": 0.00013336828448051263, + "loss": 2.6114, + "step": 7895 + }, + { + "epoch": 0.6372367040593979, + "grad_norm": 0.6893584132194519, + "learning_rate": 0.0001333534019867714, + "loss": 2.5886, + "step": 7896 + }, + { + "epoch": 0.637317407795981, + "grad_norm": 0.7651494741439819, + "learning_rate": 0.00013333851866177617, + "loss": 2.5622, + "step": 7897 + }, + { + "epoch": 0.637398111532564, + "grad_norm": 0.8124055862426758, + "learning_rate": 0.00013332363450589788, + "loss": 2.6036, + "step": 7898 + }, + { + "epoch": 0.637478815269147, + "grad_norm": 0.7394436597824097, + "learning_rate": 0.00013330874951950755, + "loss": 2.6214, + "step": 7899 + }, + { + "epoch": 0.6375595190057299, + "grad_norm": 0.6279659867286682, + "learning_rate": 0.00013329386370297615, + "loss": 2.5652, + "step": 7900 + }, + { + "epoch": 0.637640222742313, + "grad_norm": 0.7289649248123169, + "learning_rate": 0.00013327897705667455, + "loss": 2.5628, + "step": 7901 + }, + { + "epoch": 0.637720926478896, + "grad_norm": 0.7267701625823975, + "learning_rate": 0.0001332640895809739, + "loss": 2.5475, + "step": 7902 + }, + { + "epoch": 0.637801630215479, + "grad_norm": 0.7470490336418152, + "learning_rate": 0.00013324920127624515, + "loss": 2.5054, + "step": 7903 + }, + { + "epoch": 0.637882333952062, + "grad_norm": 0.6963294148445129, + "learning_rate": 0.00013323431214285944, + "loss": 2.5992, + "step": 7904 + }, + { + "epoch": 0.6379630376886449, + "grad_norm": 0.6993808746337891, + "learning_rate": 0.00013321942218118778, + "loss": 2.6044, + "step": 7905 + }, + { + "epoch": 0.638043741425228, + "grad_norm": 0.6620917916297913, + "learning_rate": 0.00013320453139160126, + "loss": 2.5278, + "step": 7906 + }, + { + "epoch": 0.638124445161811, + "grad_norm": 0.6535444855690002, + "learning_rate": 0.00013318963977447106, + "loss": 2.6069, + "step": 7907 + }, + { + "epoch": 0.638205148898394, + "grad_norm": 0.6913008689880371, + "learning_rate": 0.00013317474733016824, + "loss": 2.5271, + "step": 7908 + }, + { + "epoch": 0.638285852634977, + "grad_norm": 0.6760269403457642, + "learning_rate": 0.000133159854059064, + "loss": 2.7029, + "step": 7909 + }, + { + "epoch": 0.63836655637156, + "grad_norm": 0.7026536464691162, + "learning_rate": 0.0001331449599615295, + "loss": 2.592, + "step": 7910 + }, + { + "epoch": 0.638447260108143, + "grad_norm": 0.7935923933982849, + "learning_rate": 0.000133130065037936, + "loss": 2.5674, + "step": 7911 + }, + { + "epoch": 0.638527963844726, + "grad_norm": 0.694675087928772, + "learning_rate": 0.00013311516928865466, + "loss": 2.6727, + "step": 7912 + }, + { + "epoch": 0.638608667581309, + "grad_norm": 0.7378186583518982, + "learning_rate": 0.00013310027271405672, + "loss": 2.5691, + "step": 7913 + }, + { + "epoch": 0.638689371317892, + "grad_norm": 0.7684193849563599, + "learning_rate": 0.00013308537531451345, + "loss": 2.5796, + "step": 7914 + }, + { + "epoch": 0.638770075054475, + "grad_norm": 0.6881510019302368, + "learning_rate": 0.00013307047709039619, + "loss": 2.6, + "step": 7915 + }, + { + "epoch": 0.638850778791058, + "grad_norm": 0.7341364026069641, + "learning_rate": 0.00013305557804207618, + "loss": 2.622, + "step": 7916 + }, + { + "epoch": 0.638931482527641, + "grad_norm": 0.7620663642883301, + "learning_rate": 0.00013304067816992474, + "loss": 2.5571, + "step": 7917 + }, + { + "epoch": 0.6390121862642241, + "grad_norm": 0.6929789781570435, + "learning_rate": 0.00013302577747431322, + "loss": 2.6204, + "step": 7918 + }, + { + "epoch": 0.639092890000807, + "grad_norm": 0.6942943334579468, + "learning_rate": 0.000133010875955613, + "loss": 2.6737, + "step": 7919 + }, + { + "epoch": 0.63917359373739, + "grad_norm": 0.69537752866745, + "learning_rate": 0.0001329959736141955, + "loss": 2.6105, + "step": 7920 + }, + { + "epoch": 0.639254297473973, + "grad_norm": 0.6690821051597595, + "learning_rate": 0.00013298107045043203, + "loss": 2.6279, + "step": 7921 + }, + { + "epoch": 0.6393350012105561, + "grad_norm": 0.7748103141784668, + "learning_rate": 0.00013296616646469412, + "loss": 2.6307, + "step": 7922 + }, + { + "epoch": 0.6394157049471391, + "grad_norm": 0.7509558200836182, + "learning_rate": 0.00013295126165735311, + "loss": 2.6388, + "step": 7923 + }, + { + "epoch": 0.639496408683722, + "grad_norm": 0.7641764283180237, + "learning_rate": 0.0001329363560287806, + "loss": 2.5819, + "step": 7924 + }, + { + "epoch": 0.639577112420305, + "grad_norm": 0.6912327408790588, + "learning_rate": 0.00013292144957934794, + "loss": 2.5588, + "step": 7925 + }, + { + "epoch": 0.6396578161568881, + "grad_norm": 0.7568803429603577, + "learning_rate": 0.0001329065423094267, + "loss": 2.5627, + "step": 7926 + }, + { + "epoch": 0.6397385198934711, + "grad_norm": 0.7272306084632874, + "learning_rate": 0.00013289163421938843, + "loss": 2.6101, + "step": 7927 + }, + { + "epoch": 0.6398192236300541, + "grad_norm": 0.6965963840484619, + "learning_rate": 0.00013287672530960465, + "loss": 2.5967, + "step": 7928 + }, + { + "epoch": 0.639899927366637, + "grad_norm": 0.7729843854904175, + "learning_rate": 0.00013286181558044694, + "loss": 2.6222, + "step": 7929 + }, + { + "epoch": 0.6399806311032201, + "grad_norm": 0.6876606941223145, + "learning_rate": 0.00013284690503228687, + "loss": 2.6162, + "step": 7930 + }, + { + "epoch": 0.6400613348398031, + "grad_norm": 0.7555204629898071, + "learning_rate": 0.0001328319936654961, + "loss": 2.588, + "step": 7931 + }, + { + "epoch": 0.6401420385763861, + "grad_norm": 0.7324720621109009, + "learning_rate": 0.0001328170814804462, + "loss": 2.6111, + "step": 7932 + }, + { + "epoch": 0.640222742312969, + "grad_norm": 0.6802392601966858, + "learning_rate": 0.0001328021684775088, + "loss": 2.5955, + "step": 7933 + }, + { + "epoch": 0.6403034460495521, + "grad_norm": 0.7564330697059631, + "learning_rate": 0.00013278725465705568, + "loss": 2.5355, + "step": 7934 + }, + { + "epoch": 0.6403841497861351, + "grad_norm": 0.6916235089302063, + "learning_rate": 0.00013277234001945844, + "loss": 2.6037, + "step": 7935 + }, + { + "epoch": 0.6404648535227181, + "grad_norm": 0.688819169998169, + "learning_rate": 0.00013275742456508885, + "loss": 2.5626, + "step": 7936 + }, + { + "epoch": 0.6405455572593011, + "grad_norm": 0.6647922992706299, + "learning_rate": 0.0001327425082943186, + "loss": 2.6166, + "step": 7937 + }, + { + "epoch": 0.6406262609958842, + "grad_norm": 0.6792626976966858, + "learning_rate": 0.00013272759120751943, + "loss": 2.6206, + "step": 7938 + }, + { + "epoch": 0.6407069647324671, + "grad_norm": 0.6482827663421631, + "learning_rate": 0.00013271267330506312, + "loss": 2.5558, + "step": 7939 + }, + { + "epoch": 0.6407876684690501, + "grad_norm": 0.6628372073173523, + "learning_rate": 0.0001326977545873215, + "loss": 2.5904, + "step": 7940 + }, + { + "epoch": 0.6408683722056331, + "grad_norm": 0.7168916463851929, + "learning_rate": 0.00013268283505466635, + "loss": 2.5189, + "step": 7941 + }, + { + "epoch": 0.6409490759422162, + "grad_norm": 0.6691678762435913, + "learning_rate": 0.00013266791470746957, + "loss": 2.608, + "step": 7942 + }, + { + "epoch": 0.6410297796787991, + "grad_norm": 0.6850359439849854, + "learning_rate": 0.00013265299354610292, + "loss": 2.5929, + "step": 7943 + }, + { + "epoch": 0.6411104834153821, + "grad_norm": 0.6807669401168823, + "learning_rate": 0.0001326380715709383, + "loss": 2.6016, + "step": 7944 + }, + { + "epoch": 0.6411911871519651, + "grad_norm": 0.6450446844100952, + "learning_rate": 0.00013262314878234767, + "loss": 2.6129, + "step": 7945 + }, + { + "epoch": 0.6412718908885482, + "grad_norm": 0.679115355014801, + "learning_rate": 0.00013260822518070285, + "loss": 2.6049, + "step": 7946 + }, + { + "epoch": 0.6413525946251312, + "grad_norm": 0.7082008123397827, + "learning_rate": 0.00013259330076637583, + "loss": 2.5673, + "step": 7947 + }, + { + "epoch": 0.6414332983617141, + "grad_norm": 0.7357851266860962, + "learning_rate": 0.00013257837553973855, + "loss": 2.6118, + "step": 7948 + }, + { + "epoch": 0.6415140020982971, + "grad_norm": 0.687035083770752, + "learning_rate": 0.000132563449501163, + "loss": 2.5359, + "step": 7949 + }, + { + "epoch": 0.6415947058348802, + "grad_norm": 0.6950698494911194, + "learning_rate": 0.00013254852265102117, + "loss": 2.5527, + "step": 7950 + }, + { + "epoch": 0.6416754095714632, + "grad_norm": 0.6878959536552429, + "learning_rate": 0.00013253359498968507, + "loss": 2.611, + "step": 7951 + }, + { + "epoch": 0.6417561133080462, + "grad_norm": 0.7224605083465576, + "learning_rate": 0.00013251866651752675, + "loss": 2.5459, + "step": 7952 + }, + { + "epoch": 0.6418368170446291, + "grad_norm": 0.7299731969833374, + "learning_rate": 0.00013250373723491826, + "loss": 2.5651, + "step": 7953 + }, + { + "epoch": 0.6419175207812122, + "grad_norm": 0.7663037776947021, + "learning_rate": 0.00013248880714223163, + "loss": 2.6073, + "step": 7954 + }, + { + "epoch": 0.6419982245177952, + "grad_norm": 0.6532007455825806, + "learning_rate": 0.00013247387623983902, + "loss": 2.6087, + "step": 7955 + }, + { + "epoch": 0.6420789282543782, + "grad_norm": 0.7520449757575989, + "learning_rate": 0.00013245894452811255, + "loss": 2.5998, + "step": 7956 + }, + { + "epoch": 0.6421596319909612, + "grad_norm": 0.7196050882339478, + "learning_rate": 0.0001324440120074243, + "loss": 2.6448, + "step": 7957 + }, + { + "epoch": 0.6422403357275441, + "grad_norm": 0.7093806862831116, + "learning_rate": 0.0001324290786781465, + "loss": 2.5935, + "step": 7958 + }, + { + "epoch": 0.6423210394641272, + "grad_norm": 0.695541501045227, + "learning_rate": 0.00013241414454065125, + "loss": 2.5872, + "step": 7959 + }, + { + "epoch": 0.6424017432007102, + "grad_norm": 0.6763006448745728, + "learning_rate": 0.0001323992095953108, + "loss": 2.572, + "step": 7960 + }, + { + "epoch": 0.6424824469372932, + "grad_norm": 0.6403522491455078, + "learning_rate": 0.00013238427384249738, + "loss": 2.6137, + "step": 7961 + }, + { + "epoch": 0.6425631506738761, + "grad_norm": 0.6647571325302124, + "learning_rate": 0.00013236933728258315, + "loss": 2.5904, + "step": 7962 + }, + { + "epoch": 0.6426438544104592, + "grad_norm": 0.6931071877479553, + "learning_rate": 0.0001323543999159405, + "loss": 2.6085, + "step": 7963 + }, + { + "epoch": 0.6427245581470422, + "grad_norm": 0.6899439096450806, + "learning_rate": 0.00013233946174294155, + "loss": 2.5555, + "step": 7964 + }, + { + "epoch": 0.6428052618836252, + "grad_norm": 0.6564984321594238, + "learning_rate": 0.0001323245227639587, + "loss": 2.576, + "step": 7965 + }, + { + "epoch": 0.6428859656202082, + "grad_norm": 0.7427607774734497, + "learning_rate": 0.00013230958297936427, + "loss": 2.6178, + "step": 7966 + }, + { + "epoch": 0.6429666693567913, + "grad_norm": 0.6884508728981018, + "learning_rate": 0.00013229464238953054, + "loss": 2.6519, + "step": 7967 + }, + { + "epoch": 0.6430473730933742, + "grad_norm": 0.692442774772644, + "learning_rate": 0.00013227970099482993, + "loss": 2.5784, + "step": 7968 + }, + { + "epoch": 0.6431280768299572, + "grad_norm": 0.6637876629829407, + "learning_rate": 0.00013226475879563477, + "loss": 2.5785, + "step": 7969 + }, + { + "epoch": 0.6432087805665402, + "grad_norm": 0.6844972372055054, + "learning_rate": 0.0001322498157923175, + "loss": 2.5745, + "step": 7970 + }, + { + "epoch": 0.6432894843031233, + "grad_norm": 0.7259756922721863, + "learning_rate": 0.0001322348719852505, + "loss": 2.5696, + "step": 7971 + }, + { + "epoch": 0.6433701880397062, + "grad_norm": 0.6719023585319519, + "learning_rate": 0.00013221992737480625, + "loss": 2.6049, + "step": 7972 + }, + { + "epoch": 0.6434508917762892, + "grad_norm": 0.7160155773162842, + "learning_rate": 0.00013220498196135717, + "loss": 2.572, + "step": 7973 + }, + { + "epoch": 0.6435315955128722, + "grad_norm": 0.6920225620269775, + "learning_rate": 0.00013219003574527576, + "loss": 2.6576, + "step": 7974 + }, + { + "epoch": 0.6436122992494553, + "grad_norm": 0.698518693447113, + "learning_rate": 0.0001321750887269345, + "loss": 2.6074, + "step": 7975 + }, + { + "epoch": 0.6436930029860383, + "grad_norm": 0.7607932090759277, + "learning_rate": 0.00013216014090670594, + "loss": 2.6173, + "step": 7976 + }, + { + "epoch": 0.6437737067226212, + "grad_norm": 0.8130847811698914, + "learning_rate": 0.0001321451922849626, + "loss": 2.6023, + "step": 7977 + }, + { + "epoch": 0.6438544104592042, + "grad_norm": 0.676675021648407, + "learning_rate": 0.00013213024286207702, + "loss": 2.6174, + "step": 7978 + }, + { + "epoch": 0.6439351141957873, + "grad_norm": 0.7018851041793823, + "learning_rate": 0.00013211529263842183, + "loss": 2.5713, + "step": 7979 + }, + { + "epoch": 0.6440158179323703, + "grad_norm": 0.796097457408905, + "learning_rate": 0.00013210034161436954, + "loss": 2.5937, + "step": 7980 + }, + { + "epoch": 0.6440965216689533, + "grad_norm": 0.7118527293205261, + "learning_rate": 0.0001320853897902929, + "loss": 2.5721, + "step": 7981 + }, + { + "epoch": 0.6441772254055362, + "grad_norm": 0.7282249331474304, + "learning_rate": 0.00013207043716656445, + "loss": 2.5975, + "step": 7982 + }, + { + "epoch": 0.6442579291421193, + "grad_norm": 0.6710900664329529, + "learning_rate": 0.00013205548374355686, + "loss": 2.5809, + "step": 7983 + }, + { + "epoch": 0.6443386328787023, + "grad_norm": 0.7045658230781555, + "learning_rate": 0.00013204052952164278, + "loss": 2.5715, + "step": 7984 + }, + { + "epoch": 0.6444193366152853, + "grad_norm": 0.719507098197937, + "learning_rate": 0.00013202557450119504, + "loss": 2.5948, + "step": 7985 + }, + { + "epoch": 0.6445000403518683, + "grad_norm": 0.7603922486305237, + "learning_rate": 0.0001320106186825862, + "loss": 2.6176, + "step": 7986 + }, + { + "epoch": 0.6445807440884513, + "grad_norm": 0.7057444453239441, + "learning_rate": 0.0001319956620661891, + "loss": 2.5905, + "step": 7987 + }, + { + "epoch": 0.6446614478250343, + "grad_norm": 0.7884874939918518, + "learning_rate": 0.00013198070465237645, + "loss": 2.5892, + "step": 7988 + }, + { + "epoch": 0.6447421515616173, + "grad_norm": 0.6932834386825562, + "learning_rate": 0.00013196574644152103, + "loss": 2.6032, + "step": 7989 + }, + { + "epoch": 0.6448228552982003, + "grad_norm": 0.7361180186271667, + "learning_rate": 0.00013195078743399568, + "loss": 2.5877, + "step": 7990 + }, + { + "epoch": 0.6449035590347834, + "grad_norm": 0.6843615174293518, + "learning_rate": 0.00013193582763017315, + "loss": 2.5804, + "step": 7991 + }, + { + "epoch": 0.6449842627713663, + "grad_norm": 0.7592078447341919, + "learning_rate": 0.00013192086703042635, + "loss": 2.6464, + "step": 7992 + }, + { + "epoch": 0.6450649665079493, + "grad_norm": 0.7362154126167297, + "learning_rate": 0.0001319059056351281, + "loss": 2.6154, + "step": 7993 + }, + { + "epoch": 0.6451456702445323, + "grad_norm": 0.6721758246421814, + "learning_rate": 0.00013189094344465125, + "loss": 2.5735, + "step": 7994 + }, + { + "epoch": 0.6452263739811154, + "grad_norm": 0.6221550107002258, + "learning_rate": 0.00013187598045936874, + "loss": 2.5612, + "step": 7995 + }, + { + "epoch": 0.6453070777176984, + "grad_norm": 0.7225528359413147, + "learning_rate": 0.00013186101667965344, + "loss": 2.6263, + "step": 7996 + }, + { + "epoch": 0.6453877814542813, + "grad_norm": 0.7599418759346008, + "learning_rate": 0.00013184605210587837, + "loss": 2.5814, + "step": 7997 + }, + { + "epoch": 0.6454684851908643, + "grad_norm": 0.6778777837753296, + "learning_rate": 0.00013183108673841642, + "loss": 2.6158, + "step": 7998 + }, + { + "epoch": 0.6455491889274474, + "grad_norm": 0.6860963106155396, + "learning_rate": 0.00013181612057764058, + "loss": 2.6207, + "step": 7999 + }, + { + "epoch": 0.6456298926640304, + "grad_norm": 0.6615182757377625, + "learning_rate": 0.00013180115362392382, + "loss": 2.5571, + "step": 8000 + }, + { + "epoch": 0.6456298926640304, + "eval_loss": 2.5128066539764404, + "eval_runtime": 754.3655, + "eval_samples_per_second": 3.473, + "eval_steps_per_second": 0.579, + "step": 8000 + }, + { + "epoch": 0.6457105964006133, + "grad_norm": 0.688169538974762, + "learning_rate": 0.0001317861858776392, + "loss": 2.6513, + "step": 8001 + }, + { + "epoch": 0.6457913001371963, + "grad_norm": 0.6726182103157043, + "learning_rate": 0.00013177121733915975, + "loss": 2.5909, + "step": 8002 + }, + { + "epoch": 0.6458720038737794, + "grad_norm": 0.7348085641860962, + "learning_rate": 0.00013175624800885853, + "loss": 2.577, + "step": 8003 + }, + { + "epoch": 0.6459527076103624, + "grad_norm": 0.677435040473938, + "learning_rate": 0.00013174127788710856, + "loss": 2.5056, + "step": 8004 + }, + { + "epoch": 0.6460334113469454, + "grad_norm": 0.6864951848983765, + "learning_rate": 0.000131726306974283, + "loss": 2.5733, + "step": 8005 + }, + { + "epoch": 0.6461141150835283, + "grad_norm": 0.7070075869560242, + "learning_rate": 0.0001317113352707549, + "loss": 2.5359, + "step": 8006 + }, + { + "epoch": 0.6461948188201113, + "grad_norm": 0.7065049409866333, + "learning_rate": 0.00013169636277689746, + "loss": 2.6261, + "step": 8007 + }, + { + "epoch": 0.6462755225566944, + "grad_norm": 0.6691577434539795, + "learning_rate": 0.0001316813894930838, + "loss": 2.6015, + "step": 8008 + }, + { + "epoch": 0.6463562262932774, + "grad_norm": 0.6754019260406494, + "learning_rate": 0.0001316664154196871, + "loss": 2.5954, + "step": 8009 + }, + { + "epoch": 0.6464369300298604, + "grad_norm": 0.6172776818275452, + "learning_rate": 0.00013165144055708055, + "loss": 2.5599, + "step": 8010 + }, + { + "epoch": 0.6465176337664433, + "grad_norm": 0.6778094172477722, + "learning_rate": 0.00013163646490563737, + "loss": 2.5407, + "step": 8011 + }, + { + "epoch": 0.6465983375030264, + "grad_norm": 0.7363924980163574, + "learning_rate": 0.00013162148846573076, + "loss": 2.6075, + "step": 8012 + }, + { + "epoch": 0.6466790412396094, + "grad_norm": 0.6662711501121521, + "learning_rate": 0.00013160651123773404, + "loss": 2.5611, + "step": 8013 + }, + { + "epoch": 0.6467597449761924, + "grad_norm": 0.699670135974884, + "learning_rate": 0.00013159153322202043, + "loss": 2.5612, + "step": 8014 + }, + { + "epoch": 0.6468404487127754, + "grad_norm": 0.7382899522781372, + "learning_rate": 0.0001315765544189632, + "loss": 2.6017, + "step": 8015 + }, + { + "epoch": 0.6469211524493584, + "grad_norm": 0.7624868154525757, + "learning_rate": 0.0001315615748289357, + "loss": 2.6174, + "step": 8016 + }, + { + "epoch": 0.6470018561859414, + "grad_norm": 0.704622745513916, + "learning_rate": 0.00013154659445231129, + "loss": 2.5367, + "step": 8017 + }, + { + "epoch": 0.6470825599225244, + "grad_norm": 0.7117413878440857, + "learning_rate": 0.00013153161328946324, + "loss": 2.5958, + "step": 8018 + }, + { + "epoch": 0.6471632636591074, + "grad_norm": 0.6825408339500427, + "learning_rate": 0.00013151663134076497, + "loss": 2.5118, + "step": 8019 + }, + { + "epoch": 0.6472439673956905, + "grad_norm": 0.6732384562492371, + "learning_rate": 0.00013150164860658986, + "loss": 2.6312, + "step": 8020 + }, + { + "epoch": 0.6473246711322734, + "grad_norm": 0.712812602519989, + "learning_rate": 0.00013148666508731134, + "loss": 2.576, + "step": 8021 + }, + { + "epoch": 0.6474053748688564, + "grad_norm": 0.8128857612609863, + "learning_rate": 0.0001314716807833028, + "loss": 2.5333, + "step": 8022 + }, + { + "epoch": 0.6474860786054394, + "grad_norm": 0.7817162275314331, + "learning_rate": 0.00013145669569493773, + "loss": 2.6835, + "step": 8023 + }, + { + "epoch": 0.6475667823420225, + "grad_norm": 0.7164301872253418, + "learning_rate": 0.00013144170982258956, + "loss": 2.5573, + "step": 8024 + }, + { + "epoch": 0.6476474860786054, + "grad_norm": 0.67625892162323, + "learning_rate": 0.00013142672316663177, + "loss": 2.5976, + "step": 8025 + }, + { + "epoch": 0.6477281898151884, + "grad_norm": 0.6919494867324829, + "learning_rate": 0.0001314117357274379, + "loss": 2.6179, + "step": 8026 + }, + { + "epoch": 0.6478088935517714, + "grad_norm": 0.6787464618682861, + "learning_rate": 0.0001313967475053815, + "loss": 2.5405, + "step": 8027 + }, + { + "epoch": 0.6478895972883545, + "grad_norm": 0.6305621862411499, + "learning_rate": 0.00013138175850083605, + "loss": 2.6016, + "step": 8028 + }, + { + "epoch": 0.6479703010249375, + "grad_norm": 0.7456182837486267, + "learning_rate": 0.00013136676871417516, + "loss": 2.6091, + "step": 8029 + }, + { + "epoch": 0.6480510047615204, + "grad_norm": 0.7047890424728394, + "learning_rate": 0.00013135177814577238, + "loss": 2.6108, + "step": 8030 + }, + { + "epoch": 0.6481317084981034, + "grad_norm": 0.7509389519691467, + "learning_rate": 0.00013133678679600133, + "loss": 2.6396, + "step": 8031 + }, + { + "epoch": 0.6482124122346865, + "grad_norm": 0.63836270570755, + "learning_rate": 0.00013132179466523566, + "loss": 2.5759, + "step": 8032 + }, + { + "epoch": 0.6482931159712695, + "grad_norm": 0.6994885206222534, + "learning_rate": 0.000131306801753849, + "loss": 2.61, + "step": 8033 + }, + { + "epoch": 0.6483738197078525, + "grad_norm": 0.6762083768844604, + "learning_rate": 0.00013129180806221497, + "loss": 2.5431, + "step": 8034 + }, + { + "epoch": 0.6484545234444354, + "grad_norm": 0.6890944242477417, + "learning_rate": 0.0001312768135907073, + "loss": 2.5922, + "step": 8035 + }, + { + "epoch": 0.6485352271810185, + "grad_norm": 0.7409473061561584, + "learning_rate": 0.0001312618183396997, + "loss": 2.6132, + "step": 8036 + }, + { + "epoch": 0.6486159309176015, + "grad_norm": 0.6660643815994263, + "learning_rate": 0.00013124682230956585, + "loss": 2.5816, + "step": 8037 + }, + { + "epoch": 0.6486966346541845, + "grad_norm": 0.714235246181488, + "learning_rate": 0.0001312318255006795, + "loss": 2.5613, + "step": 8038 + }, + { + "epoch": 0.6487773383907675, + "grad_norm": 0.6568472385406494, + "learning_rate": 0.00013121682791341442, + "loss": 2.6382, + "step": 8039 + }, + { + "epoch": 0.6488580421273505, + "grad_norm": 0.6874251961708069, + "learning_rate": 0.00013120182954814438, + "loss": 2.593, + "step": 8040 + }, + { + "epoch": 0.6489387458639335, + "grad_norm": 0.7620158791542053, + "learning_rate": 0.0001311868304052432, + "loss": 2.589, + "step": 8041 + }, + { + "epoch": 0.6490194496005165, + "grad_norm": 0.6755926609039307, + "learning_rate": 0.00013117183048508467, + "loss": 2.5876, + "step": 8042 + }, + { + "epoch": 0.6491001533370995, + "grad_norm": 0.6952808499336243, + "learning_rate": 0.00013115682978804264, + "loss": 2.5909, + "step": 8043 + }, + { + "epoch": 0.6491808570736826, + "grad_norm": 0.6599535346031189, + "learning_rate": 0.00013114182831449098, + "loss": 2.6031, + "step": 8044 + }, + { + "epoch": 0.6492615608102655, + "grad_norm": 0.7816598415374756, + "learning_rate": 0.00013112682606480355, + "loss": 2.5633, + "step": 8045 + }, + { + "epoch": 0.6493422645468485, + "grad_norm": 0.7188639640808105, + "learning_rate": 0.00013111182303935425, + "loss": 2.6292, + "step": 8046 + }, + { + "epoch": 0.6494229682834315, + "grad_norm": 0.7131505608558655, + "learning_rate": 0.00013109681923851698, + "loss": 2.5729, + "step": 8047 + }, + { + "epoch": 0.6495036720200146, + "grad_norm": 0.7466408014297485, + "learning_rate": 0.00013108181466266568, + "loss": 2.5742, + "step": 8048 + }, + { + "epoch": 0.6495843757565976, + "grad_norm": 0.6707943677902222, + "learning_rate": 0.00013106680931217437, + "loss": 2.5506, + "step": 8049 + }, + { + "epoch": 0.6496650794931805, + "grad_norm": 0.6913424730300903, + "learning_rate": 0.0001310518031874169, + "loss": 2.5639, + "step": 8050 + }, + { + "epoch": 0.6497457832297635, + "grad_norm": 0.8261755704879761, + "learning_rate": 0.00013103679628876733, + "loss": 2.601, + "step": 8051 + }, + { + "epoch": 0.6498264869663466, + "grad_norm": 0.7410566806793213, + "learning_rate": 0.0001310217886165997, + "loss": 2.5326, + "step": 8052 + }, + { + "epoch": 0.6499071907029296, + "grad_norm": 0.7032365202903748, + "learning_rate": 0.00013100678017128798, + "loss": 2.5907, + "step": 8053 + }, + { + "epoch": 0.6499878944395125, + "grad_norm": 0.7074568271636963, + "learning_rate": 0.00013099177095320626, + "loss": 2.6193, + "step": 8054 + }, + { + "epoch": 0.6500685981760955, + "grad_norm": 0.7754546999931335, + "learning_rate": 0.00013097676096272855, + "loss": 2.5832, + "step": 8055 + }, + { + "epoch": 0.6501493019126786, + "grad_norm": 0.7475717663764954, + "learning_rate": 0.00013096175020022903, + "loss": 2.6233, + "step": 8056 + }, + { + "epoch": 0.6502300056492616, + "grad_norm": 0.7863949537277222, + "learning_rate": 0.00013094673866608173, + "loss": 2.5745, + "step": 8057 + }, + { + "epoch": 0.6503107093858446, + "grad_norm": 0.69294673204422, + "learning_rate": 0.0001309317263606608, + "loss": 2.5982, + "step": 8058 + }, + { + "epoch": 0.6503914131224275, + "grad_norm": 0.7096135020256042, + "learning_rate": 0.00013091671328434046, + "loss": 2.5944, + "step": 8059 + }, + { + "epoch": 0.6504721168590105, + "grad_norm": 0.7001097202301025, + "learning_rate": 0.00013090169943749476, + "loss": 2.5435, + "step": 8060 + }, + { + "epoch": 0.6505528205955936, + "grad_norm": 0.7522539496421814, + "learning_rate": 0.00013088668482049792, + "loss": 2.5843, + "step": 8061 + }, + { + "epoch": 0.6506335243321766, + "grad_norm": 0.6675420999526978, + "learning_rate": 0.00013087166943372418, + "loss": 2.5623, + "step": 8062 + }, + { + "epoch": 0.6507142280687596, + "grad_norm": 0.7779181599617004, + "learning_rate": 0.00013085665327754772, + "loss": 2.6087, + "step": 8063 + }, + { + "epoch": 0.6507949318053425, + "grad_norm": 0.7385239005088806, + "learning_rate": 0.00013084163635234284, + "loss": 2.5725, + "step": 8064 + }, + { + "epoch": 0.6508756355419256, + "grad_norm": 0.6966612339019775, + "learning_rate": 0.00013082661865848375, + "loss": 2.5745, + "step": 8065 + }, + { + "epoch": 0.6509563392785086, + "grad_norm": 0.7098337411880493, + "learning_rate": 0.00013081160019634468, + "loss": 2.5461, + "step": 8066 + }, + { + "epoch": 0.6510370430150916, + "grad_norm": 0.6514503359794617, + "learning_rate": 0.00013079658096630002, + "loss": 2.5869, + "step": 8067 + }, + { + "epoch": 0.6511177467516746, + "grad_norm": 0.680422306060791, + "learning_rate": 0.0001307815609687241, + "loss": 2.6316, + "step": 8068 + }, + { + "epoch": 0.6511984504882576, + "grad_norm": 0.6892665028572083, + "learning_rate": 0.00013076654020399117, + "loss": 2.5862, + "step": 8069 + }, + { + "epoch": 0.6512791542248406, + "grad_norm": 0.7605568170547485, + "learning_rate": 0.00013075151867247568, + "loss": 2.5342, + "step": 8070 + }, + { + "epoch": 0.6513598579614236, + "grad_norm": 0.7571204900741577, + "learning_rate": 0.00013073649637455192, + "loss": 2.5762, + "step": 8071 + }, + { + "epoch": 0.6514405616980066, + "grad_norm": 0.6910812258720398, + "learning_rate": 0.00013072147331059431, + "loss": 2.6635, + "step": 8072 + }, + { + "epoch": 0.6515212654345897, + "grad_norm": 0.765559196472168, + "learning_rate": 0.00013070644948097733, + "loss": 2.5885, + "step": 8073 + }, + { + "epoch": 0.6516019691711726, + "grad_norm": 0.7533665299415588, + "learning_rate": 0.00013069142488607532, + "loss": 2.6545, + "step": 8074 + }, + { + "epoch": 0.6516826729077556, + "grad_norm": 0.685089647769928, + "learning_rate": 0.0001306763995262628, + "loss": 2.5955, + "step": 8075 + }, + { + "epoch": 0.6517633766443386, + "grad_norm": 0.7280653715133667, + "learning_rate": 0.00013066137340191422, + "loss": 2.5548, + "step": 8076 + }, + { + "epoch": 0.6518440803809217, + "grad_norm": 0.6881482601165771, + "learning_rate": 0.00013064634651340404, + "loss": 2.6143, + "step": 8077 + }, + { + "epoch": 0.6519247841175047, + "grad_norm": 0.6878265142440796, + "learning_rate": 0.0001306313188611068, + "loss": 2.5681, + "step": 8078 + }, + { + "epoch": 0.6520054878540876, + "grad_norm": 0.685238242149353, + "learning_rate": 0.00013061629044539702, + "loss": 2.5517, + "step": 8079 + }, + { + "epoch": 0.6520861915906706, + "grad_norm": 0.6689820885658264, + "learning_rate": 0.00013060126126664928, + "loss": 2.6201, + "step": 8080 + }, + { + "epoch": 0.6521668953272537, + "grad_norm": 0.7128999829292297, + "learning_rate": 0.00013058623132523807, + "loss": 2.5829, + "step": 8081 + }, + { + "epoch": 0.6522475990638367, + "grad_norm": 0.6835216879844666, + "learning_rate": 0.00013057120062153805, + "loss": 2.6312, + "step": 8082 + }, + { + "epoch": 0.6523283028004196, + "grad_norm": 0.7140012383460999, + "learning_rate": 0.00013055616915592382, + "loss": 2.6148, + "step": 8083 + }, + { + "epoch": 0.6524090065370026, + "grad_norm": 0.7378252148628235, + "learning_rate": 0.00013054113692876994, + "loss": 2.5805, + "step": 8084 + }, + { + "epoch": 0.6524897102735857, + "grad_norm": 0.7569258213043213, + "learning_rate": 0.0001305261039404511, + "loss": 2.6088, + "step": 8085 + }, + { + "epoch": 0.6525704140101687, + "grad_norm": 0.6909007430076599, + "learning_rate": 0.00013051107019134195, + "loss": 2.5285, + "step": 8086 + }, + { + "epoch": 0.6526511177467517, + "grad_norm": 0.6785587072372437, + "learning_rate": 0.0001304960356818172, + "loss": 2.5527, + "step": 8087 + }, + { + "epoch": 0.6527318214833346, + "grad_norm": 0.7058801054954529, + "learning_rate": 0.0001304810004122515, + "loss": 2.6789, + "step": 8088 + }, + { + "epoch": 0.6528125252199177, + "grad_norm": 0.6920512318611145, + "learning_rate": 0.0001304659643830196, + "loss": 2.5748, + "step": 8089 + }, + { + "epoch": 0.6528932289565007, + "grad_norm": 0.6829244494438171, + "learning_rate": 0.00013045092759449625, + "loss": 2.5389, + "step": 8090 + }, + { + "epoch": 0.6529739326930837, + "grad_norm": 0.6942421793937683, + "learning_rate": 0.00013043589004705614, + "loss": 2.5851, + "step": 8091 + }, + { + "epoch": 0.6530546364296667, + "grad_norm": 0.6473072171211243, + "learning_rate": 0.0001304208517410741, + "loss": 2.56, + "step": 8092 + }, + { + "epoch": 0.6531353401662497, + "grad_norm": 0.6692056655883789, + "learning_rate": 0.00013040581267692494, + "loss": 2.5977, + "step": 8093 + }, + { + "epoch": 0.6532160439028327, + "grad_norm": 0.6918915510177612, + "learning_rate": 0.00013039077285498344, + "loss": 2.551, + "step": 8094 + }, + { + "epoch": 0.6532967476394157, + "grad_norm": 0.7432852387428284, + "learning_rate": 0.00013037573227562443, + "loss": 2.5537, + "step": 8095 + }, + { + "epoch": 0.6533774513759987, + "grad_norm": 0.6737081408500671, + "learning_rate": 0.0001303606909392228, + "loss": 2.5947, + "step": 8096 + }, + { + "epoch": 0.6534581551125818, + "grad_norm": 0.6810599565505981, + "learning_rate": 0.0001303456488461533, + "loss": 2.5704, + "step": 8097 + }, + { + "epoch": 0.6535388588491647, + "grad_norm": 0.675240159034729, + "learning_rate": 0.00013033060599679098, + "loss": 2.591, + "step": 8098 + }, + { + "epoch": 0.6536195625857477, + "grad_norm": 0.6888695359230042, + "learning_rate": 0.00013031556239151066, + "loss": 2.5403, + "step": 8099 + }, + { + "epoch": 0.6537002663223307, + "grad_norm": 0.7154796719551086, + "learning_rate": 0.00013030051803068727, + "loss": 2.5654, + "step": 8100 + }, + { + "epoch": 0.6537809700589138, + "grad_norm": 0.6655243635177612, + "learning_rate": 0.0001302854729146958, + "loss": 2.5867, + "step": 8101 + }, + { + "epoch": 0.6538616737954968, + "grad_norm": 0.7070788145065308, + "learning_rate": 0.00013027042704391115, + "loss": 2.5593, + "step": 8102 + }, + { + "epoch": 0.6539423775320797, + "grad_norm": 0.7071834206581116, + "learning_rate": 0.0001302553804187083, + "loss": 2.536, + "step": 8103 + }, + { + "epoch": 0.6540230812686627, + "grad_norm": 0.7086542248725891, + "learning_rate": 0.00013024033303946233, + "loss": 2.5644, + "step": 8104 + }, + { + "epoch": 0.6541037850052458, + "grad_norm": 0.6714556813240051, + "learning_rate": 0.00013022528490654818, + "loss": 2.5167, + "step": 8105 + }, + { + "epoch": 0.6541844887418288, + "grad_norm": 0.6905114054679871, + "learning_rate": 0.00013021023602034095, + "loss": 2.5227, + "step": 8106 + }, + { + "epoch": 0.6542651924784118, + "grad_norm": 0.7050586342811584, + "learning_rate": 0.00013019518638121563, + "loss": 2.5725, + "step": 8107 + }, + { + "epoch": 0.6543458962149947, + "grad_norm": 0.6940500736236572, + "learning_rate": 0.00013018013598954737, + "loss": 2.5912, + "step": 8108 + }, + { + "epoch": 0.6544265999515777, + "grad_norm": 0.7136965990066528, + "learning_rate": 0.00013016508484571122, + "loss": 2.6101, + "step": 8109 + }, + { + "epoch": 0.6545073036881608, + "grad_norm": 0.7205774188041687, + "learning_rate": 0.0001301500329500823, + "loss": 2.5869, + "step": 8110 + }, + { + "epoch": 0.6545880074247438, + "grad_norm": 0.6831154823303223, + "learning_rate": 0.00013013498030303575, + "loss": 2.5309, + "step": 8111 + }, + { + "epoch": 0.6546687111613267, + "grad_norm": 0.6778538823127747, + "learning_rate": 0.0001301199269049467, + "loss": 2.6297, + "step": 8112 + }, + { + "epoch": 0.6547494148979097, + "grad_norm": 0.705055832862854, + "learning_rate": 0.00013010487275619034, + "loss": 2.6188, + "step": 8113 + }, + { + "epoch": 0.6548301186344928, + "grad_norm": 0.6927980780601501, + "learning_rate": 0.00013008981785714188, + "loss": 2.5744, + "step": 8114 + }, + { + "epoch": 0.6549108223710758, + "grad_norm": 0.7070884108543396, + "learning_rate": 0.0001300747622081765, + "loss": 2.618, + "step": 8115 + }, + { + "epoch": 0.6549915261076588, + "grad_norm": 0.723479688167572, + "learning_rate": 0.0001300597058096694, + "loss": 2.5928, + "step": 8116 + }, + { + "epoch": 0.6550722298442417, + "grad_norm": 0.6689562201499939, + "learning_rate": 0.00013004464866199587, + "loss": 2.5592, + "step": 8117 + }, + { + "epoch": 0.6551529335808248, + "grad_norm": 0.6685079336166382, + "learning_rate": 0.00013002959076553115, + "loss": 2.558, + "step": 8118 + }, + { + "epoch": 0.6552336373174078, + "grad_norm": 0.678105890750885, + "learning_rate": 0.00013001453212065057, + "loss": 2.6176, + "step": 8119 + }, + { + "epoch": 0.6553143410539908, + "grad_norm": 0.7355597019195557, + "learning_rate": 0.00012999947272772933, + "loss": 2.6293, + "step": 8120 + }, + { + "epoch": 0.6553950447905738, + "grad_norm": 0.735862672328949, + "learning_rate": 0.00012998441258714284, + "loss": 2.635, + "step": 8121 + }, + { + "epoch": 0.6554757485271568, + "grad_norm": 0.6766025424003601, + "learning_rate": 0.0001299693516992664, + "loss": 2.5829, + "step": 8122 + }, + { + "epoch": 0.6555564522637398, + "grad_norm": 0.6701885461807251, + "learning_rate": 0.00012995429006447542, + "loss": 2.5996, + "step": 8123 + }, + { + "epoch": 0.6556371560003228, + "grad_norm": 0.6814082264900208, + "learning_rate": 0.00012993922768314518, + "loss": 2.5906, + "step": 8124 + }, + { + "epoch": 0.6557178597369058, + "grad_norm": 0.7104958295822144, + "learning_rate": 0.00012992416455565113, + "loss": 2.6708, + "step": 8125 + }, + { + "epoch": 0.6557985634734889, + "grad_norm": 0.6451221108436584, + "learning_rate": 0.0001299091006823687, + "loss": 2.5512, + "step": 8126 + }, + { + "epoch": 0.6558792672100718, + "grad_norm": 0.6736068725585938, + "learning_rate": 0.0001298940360636733, + "loss": 2.5839, + "step": 8127 + }, + { + "epoch": 0.6559599709466548, + "grad_norm": 0.6873149871826172, + "learning_rate": 0.00012987897069994031, + "loss": 2.5804, + "step": 8128 + }, + { + "epoch": 0.6560406746832378, + "grad_norm": 0.6937728524208069, + "learning_rate": 0.00012986390459154533, + "loss": 2.5648, + "step": 8129 + }, + { + "epoch": 0.6561213784198209, + "grad_norm": 0.7109464406967163, + "learning_rate": 0.00012984883773886377, + "loss": 2.6132, + "step": 8130 + }, + { + "epoch": 0.6562020821564039, + "grad_norm": 0.7134159803390503, + "learning_rate": 0.00012983377014227115, + "loss": 2.6029, + "step": 8131 + }, + { + "epoch": 0.6562827858929868, + "grad_norm": 0.6788110733032227, + "learning_rate": 0.000129818701802143, + "loss": 2.6344, + "step": 8132 + }, + { + "epoch": 0.6563634896295698, + "grad_norm": 0.6798231601715088, + "learning_rate": 0.00012980363271885483, + "loss": 2.5758, + "step": 8133 + }, + { + "epoch": 0.6564441933661529, + "grad_norm": 0.6586930155754089, + "learning_rate": 0.00012978856289278226, + "loss": 2.5918, + "step": 8134 + }, + { + "epoch": 0.6565248971027359, + "grad_norm": 0.6614218950271606, + "learning_rate": 0.0001297734923243008, + "loss": 2.5777, + "step": 8135 + }, + { + "epoch": 0.6566056008393188, + "grad_norm": 0.6874340176582336, + "learning_rate": 0.0001297584210137861, + "loss": 2.5528, + "step": 8136 + }, + { + "epoch": 0.6566863045759018, + "grad_norm": 0.6972174048423767, + "learning_rate": 0.00012974334896161376, + "loss": 2.6551, + "step": 8137 + }, + { + "epoch": 0.6567670083124849, + "grad_norm": 0.7414106726646423, + "learning_rate": 0.0001297282761681594, + "loss": 2.5719, + "step": 8138 + }, + { + "epoch": 0.6568477120490679, + "grad_norm": 0.6678279042243958, + "learning_rate": 0.00012971320263379868, + "loss": 2.555, + "step": 8139 + }, + { + "epoch": 0.6569284157856509, + "grad_norm": 0.692149817943573, + "learning_rate": 0.0001296981283589073, + "loss": 2.5991, + "step": 8140 + }, + { + "epoch": 0.6570091195222338, + "grad_norm": 0.6937025189399719, + "learning_rate": 0.00012968305334386094, + "loss": 2.5635, + "step": 8141 + }, + { + "epoch": 0.6570898232588169, + "grad_norm": 0.6250358819961548, + "learning_rate": 0.00012966797758903528, + "loss": 2.55, + "step": 8142 + }, + { + "epoch": 0.6571705269953999, + "grad_norm": 0.7388221025466919, + "learning_rate": 0.00012965290109480607, + "loss": 2.5307, + "step": 8143 + }, + { + "epoch": 0.6572512307319829, + "grad_norm": 0.7165891528129578, + "learning_rate": 0.00012963782386154904, + "loss": 2.5482, + "step": 8144 + }, + { + "epoch": 0.6573319344685659, + "grad_norm": 0.7605282068252563, + "learning_rate": 0.00012962274588963996, + "loss": 2.5839, + "step": 8145 + }, + { + "epoch": 0.657412638205149, + "grad_norm": 0.7259613275527954, + "learning_rate": 0.00012960766717945465, + "loss": 2.5612, + "step": 8146 + }, + { + "epoch": 0.6574933419417319, + "grad_norm": 0.7301480770111084, + "learning_rate": 0.00012959258773136885, + "loss": 2.5365, + "step": 8147 + }, + { + "epoch": 0.6575740456783149, + "grad_norm": 0.6800966262817383, + "learning_rate": 0.0001295775075457584, + "loss": 2.5663, + "step": 8148 + }, + { + "epoch": 0.6576547494148979, + "grad_norm": 0.6968960165977478, + "learning_rate": 0.0001295624266229992, + "loss": 2.5626, + "step": 8149 + }, + { + "epoch": 0.657735453151481, + "grad_norm": 0.9044952392578125, + "learning_rate": 0.00012954734496346704, + "loss": 2.6479, + "step": 8150 + }, + { + "epoch": 0.6578161568880639, + "grad_norm": 0.6955156922340393, + "learning_rate": 0.00012953226256753777, + "loss": 2.5879, + "step": 8151 + }, + { + "epoch": 0.6578968606246469, + "grad_norm": 0.6535033583641052, + "learning_rate": 0.00012951717943558735, + "loss": 2.5372, + "step": 8152 + }, + { + "epoch": 0.6579775643612299, + "grad_norm": 0.720730721950531, + "learning_rate": 0.0001295020955679916, + "loss": 2.5813, + "step": 8153 + }, + { + "epoch": 0.658058268097813, + "grad_norm": 0.7190384268760681, + "learning_rate": 0.00012948701096512655, + "loss": 2.5923, + "step": 8154 + }, + { + "epoch": 0.658138971834396, + "grad_norm": 0.6624464988708496, + "learning_rate": 0.0001294719256273681, + "loss": 2.5548, + "step": 8155 + }, + { + "epoch": 0.6582196755709789, + "grad_norm": 0.7839831709861755, + "learning_rate": 0.00012945683955509224, + "loss": 2.531, + "step": 8156 + }, + { + "epoch": 0.6583003793075619, + "grad_norm": 0.694970965385437, + "learning_rate": 0.00012944175274867497, + "loss": 2.4693, + "step": 8157 + }, + { + "epoch": 0.658381083044145, + "grad_norm": 0.7409366965293884, + "learning_rate": 0.0001294266652084922, + "loss": 2.5706, + "step": 8158 + }, + { + "epoch": 0.658461786780728, + "grad_norm": 0.7502163052558899, + "learning_rate": 0.00012941157693492002, + "loss": 2.6137, + "step": 8159 + }, + { + "epoch": 0.658542490517311, + "grad_norm": 0.6627129912376404, + "learning_rate": 0.00012939648792833447, + "loss": 2.5781, + "step": 8160 + }, + { + "epoch": 0.6586231942538939, + "grad_norm": 0.6775660514831543, + "learning_rate": 0.00012938139818911157, + "loss": 2.5441, + "step": 8161 + }, + { + "epoch": 0.6587038979904769, + "grad_norm": 0.7150553464889526, + "learning_rate": 0.00012936630771762748, + "loss": 2.5763, + "step": 8162 + }, + { + "epoch": 0.65878460172706, + "grad_norm": 0.7461466193199158, + "learning_rate": 0.0001293512165142582, + "loss": 2.54, + "step": 8163 + }, + { + "epoch": 0.658865305463643, + "grad_norm": 0.7635199427604675, + "learning_rate": 0.00012933612457937988, + "loss": 2.5763, + "step": 8164 + }, + { + "epoch": 0.658946009200226, + "grad_norm": 0.7360543608665466, + "learning_rate": 0.00012932103191336865, + "loss": 2.5968, + "step": 8165 + }, + { + "epoch": 0.6590267129368089, + "grad_norm": 0.6482167840003967, + "learning_rate": 0.0001293059385166007, + "loss": 2.5704, + "step": 8166 + }, + { + "epoch": 0.659107416673392, + "grad_norm": 0.7024737596511841, + "learning_rate": 0.00012929084438945208, + "loss": 2.6221, + "step": 8167 + }, + { + "epoch": 0.659188120409975, + "grad_norm": 0.7192068696022034, + "learning_rate": 0.0001292757495322991, + "loss": 2.5574, + "step": 8168 + }, + { + "epoch": 0.659268824146558, + "grad_norm": 0.6900508403778076, + "learning_rate": 0.0001292606539455179, + "loss": 2.5969, + "step": 8169 + }, + { + "epoch": 0.6593495278831409, + "grad_norm": 0.7522475719451904, + "learning_rate": 0.00012924555762948474, + "loss": 2.592, + "step": 8170 + }, + { + "epoch": 0.659430231619724, + "grad_norm": 0.6610947251319885, + "learning_rate": 0.00012923046058457583, + "loss": 2.5404, + "step": 8171 + }, + { + "epoch": 0.659510935356307, + "grad_norm": 0.667628288269043, + "learning_rate": 0.00012921536281116738, + "loss": 2.5551, + "step": 8172 + }, + { + "epoch": 0.65959163909289, + "grad_norm": 0.7119980454444885, + "learning_rate": 0.00012920026430963578, + "loss": 2.6002, + "step": 8173 + }, + { + "epoch": 0.659672342829473, + "grad_norm": 0.712166428565979, + "learning_rate": 0.00012918516508035724, + "loss": 2.626, + "step": 8174 + }, + { + "epoch": 0.659753046566056, + "grad_norm": 0.6993290185928345, + "learning_rate": 0.0001291700651237081, + "loss": 2.6311, + "step": 8175 + }, + { + "epoch": 0.659833750302639, + "grad_norm": 0.6889405250549316, + "learning_rate": 0.0001291549644400647, + "loss": 2.6483, + "step": 8176 + }, + { + "epoch": 0.659914454039222, + "grad_norm": 0.7120937705039978, + "learning_rate": 0.00012913986302980334, + "loss": 2.5489, + "step": 8177 + }, + { + "epoch": 0.659995157775805, + "grad_norm": 0.7112947106361389, + "learning_rate": 0.00012912476089330043, + "loss": 2.6393, + "step": 8178 + }, + { + "epoch": 0.6600758615123881, + "grad_norm": 0.710342526435852, + "learning_rate": 0.00012910965803093237, + "loss": 2.5897, + "step": 8179 + }, + { + "epoch": 0.660156565248971, + "grad_norm": 0.6506931185722351, + "learning_rate": 0.0001290945544430755, + "loss": 2.6429, + "step": 8180 + }, + { + "epoch": 0.660237268985554, + "grad_norm": 0.7147021293640137, + "learning_rate": 0.00012907945013010633, + "loss": 2.5521, + "step": 8181 + }, + { + "epoch": 0.660317972722137, + "grad_norm": 0.6802387833595276, + "learning_rate": 0.0001290643450924012, + "loss": 2.581, + "step": 8182 + }, + { + "epoch": 0.6603986764587201, + "grad_norm": 0.7599670886993408, + "learning_rate": 0.00012904923933033664, + "loss": 2.5532, + "step": 8183 + }, + { + "epoch": 0.6604793801953031, + "grad_norm": 0.7105657458305359, + "learning_rate": 0.0001290341328442891, + "loss": 2.5744, + "step": 8184 + }, + { + "epoch": 0.660560083931886, + "grad_norm": 0.6786425113677979, + "learning_rate": 0.00012901902563463506, + "loss": 2.5326, + "step": 8185 + }, + { + "epoch": 0.660640787668469, + "grad_norm": 0.7305583357810974, + "learning_rate": 0.00012900391770175106, + "loss": 2.6103, + "step": 8186 + }, + { + "epoch": 0.6607214914050521, + "grad_norm": 0.6578992605209351, + "learning_rate": 0.00012898880904601363, + "loss": 2.5833, + "step": 8187 + }, + { + "epoch": 0.6608021951416351, + "grad_norm": 0.6498856544494629, + "learning_rate": 0.00012897369966779926, + "loss": 2.6333, + "step": 8188 + }, + { + "epoch": 0.660882898878218, + "grad_norm": 0.7065569162368774, + "learning_rate": 0.00012895858956748458, + "loss": 2.5326, + "step": 8189 + }, + { + "epoch": 0.660963602614801, + "grad_norm": 0.7676446437835693, + "learning_rate": 0.00012894347874544613, + "loss": 2.6233, + "step": 8190 + }, + { + "epoch": 0.6610443063513841, + "grad_norm": 0.6794395446777344, + "learning_rate": 0.00012892836720206056, + "loss": 2.5426, + "step": 8191 + }, + { + "epoch": 0.6611250100879671, + "grad_norm": 0.7448986768722534, + "learning_rate": 0.00012891325493770444, + "loss": 2.5832, + "step": 8192 + }, + { + "epoch": 0.6612057138245501, + "grad_norm": 0.7789760231971741, + "learning_rate": 0.0001288981419527544, + "loss": 2.6393, + "step": 8193 + }, + { + "epoch": 0.661286417561133, + "grad_norm": 0.7425827980041504, + "learning_rate": 0.00012888302824758718, + "loss": 2.6159, + "step": 8194 + }, + { + "epoch": 0.6613671212977161, + "grad_norm": 0.6677481532096863, + "learning_rate": 0.00012886791382257936, + "loss": 2.5399, + "step": 8195 + }, + { + "epoch": 0.6614478250342991, + "grad_norm": 0.698397159576416, + "learning_rate": 0.0001288527986781077, + "loss": 2.5443, + "step": 8196 + }, + { + "epoch": 0.6615285287708821, + "grad_norm": 0.6862680315971375, + "learning_rate": 0.00012883768281454885, + "loss": 2.5843, + "step": 8197 + }, + { + "epoch": 0.6616092325074651, + "grad_norm": 0.7421948313713074, + "learning_rate": 0.00012882256623227955, + "loss": 2.5885, + "step": 8198 + }, + { + "epoch": 0.6616899362440481, + "grad_norm": 0.7453073859214783, + "learning_rate": 0.00012880744893167654, + "loss": 2.5821, + "step": 8199 + }, + { + "epoch": 0.6617706399806311, + "grad_norm": 0.668218195438385, + "learning_rate": 0.00012879233091311667, + "loss": 2.5941, + "step": 8200 + }, + { + "epoch": 0.6618513437172141, + "grad_norm": 0.6864587664604187, + "learning_rate": 0.00012877721217697657, + "loss": 2.5321, + "step": 8201 + }, + { + "epoch": 0.6619320474537971, + "grad_norm": 0.6521022319793701, + "learning_rate": 0.00012876209272363317, + "loss": 2.5945, + "step": 8202 + }, + { + "epoch": 0.6620127511903802, + "grad_norm": 0.7564631104469299, + "learning_rate": 0.00012874697255346325, + "loss": 2.5901, + "step": 8203 + }, + { + "epoch": 0.6620934549269631, + "grad_norm": 0.731991171836853, + "learning_rate": 0.00012873185166684356, + "loss": 2.649, + "step": 8204 + }, + { + "epoch": 0.6621741586635461, + "grad_norm": 0.6804815530776978, + "learning_rate": 0.00012871673006415108, + "loss": 2.5417, + "step": 8205 + }, + { + "epoch": 0.6622548624001291, + "grad_norm": 0.6862792372703552, + "learning_rate": 0.0001287016077457626, + "loss": 2.6118, + "step": 8206 + }, + { + "epoch": 0.6623355661367122, + "grad_norm": 0.7013735175132751, + "learning_rate": 0.00012868648471205503, + "loss": 2.6296, + "step": 8207 + }, + { + "epoch": 0.6624162698732952, + "grad_norm": 0.7284584045410156, + "learning_rate": 0.00012867136096340529, + "loss": 2.6547, + "step": 8208 + }, + { + "epoch": 0.6624969736098781, + "grad_norm": 0.714546799659729, + "learning_rate": 0.00012865623650019025, + "loss": 2.5955, + "step": 8209 + }, + { + "epoch": 0.6625776773464611, + "grad_norm": 0.7645453214645386, + "learning_rate": 0.0001286411113227869, + "loss": 2.6132, + "step": 8210 + }, + { + "epoch": 0.6626583810830441, + "grad_norm": 0.6615093946456909, + "learning_rate": 0.0001286259854315722, + "loss": 2.5701, + "step": 8211 + }, + { + "epoch": 0.6627390848196272, + "grad_norm": 0.6565523147583008, + "learning_rate": 0.0001286108588269231, + "loss": 2.57, + "step": 8212 + }, + { + "epoch": 0.6628197885562102, + "grad_norm": 0.7173478007316589, + "learning_rate": 0.00012859573150921666, + "loss": 2.589, + "step": 8213 + }, + { + "epoch": 0.6629004922927931, + "grad_norm": 0.7069580554962158, + "learning_rate": 0.00012858060347882975, + "loss": 2.6146, + "step": 8214 + }, + { + "epoch": 0.6629811960293761, + "grad_norm": 0.7004678249359131, + "learning_rate": 0.00012856547473613953, + "loss": 2.5735, + "step": 8215 + }, + { + "epoch": 0.6630618997659592, + "grad_norm": 0.6589130163192749, + "learning_rate": 0.00012855034528152305, + "loss": 2.5731, + "step": 8216 + }, + { + "epoch": 0.6631426035025422, + "grad_norm": 0.7223117351531982, + "learning_rate": 0.0001285352151153573, + "loss": 2.5262, + "step": 8217 + }, + { + "epoch": 0.6632233072391251, + "grad_norm": 0.7045131325721741, + "learning_rate": 0.0001285200842380194, + "loss": 2.5789, + "step": 8218 + }, + { + "epoch": 0.6633040109757081, + "grad_norm": 0.7002174854278564, + "learning_rate": 0.00012850495264988645, + "loss": 2.6386, + "step": 8219 + }, + { + "epoch": 0.6633847147122912, + "grad_norm": 0.6844584941864014, + "learning_rate": 0.00012848982035133555, + "loss": 2.5394, + "step": 8220 + }, + { + "epoch": 0.6634654184488742, + "grad_norm": 0.7154871821403503, + "learning_rate": 0.00012847468734274387, + "loss": 2.5927, + "step": 8221 + }, + { + "epoch": 0.6635461221854572, + "grad_norm": 0.6856776475906372, + "learning_rate": 0.00012845955362448855, + "loss": 2.5694, + "step": 8222 + }, + { + "epoch": 0.6636268259220401, + "grad_norm": 0.7069089412689209, + "learning_rate": 0.00012844441919694676, + "loss": 2.5856, + "step": 8223 + }, + { + "epoch": 0.6637075296586232, + "grad_norm": 0.7084143161773682, + "learning_rate": 0.00012842928406049567, + "loss": 2.6301, + "step": 8224 + }, + { + "epoch": 0.6637882333952062, + "grad_norm": 0.6790862679481506, + "learning_rate": 0.00012841414821551252, + "loss": 2.5586, + "step": 8225 + }, + { + "epoch": 0.6638689371317892, + "grad_norm": 0.6537249684333801, + "learning_rate": 0.00012839901166237453, + "loss": 2.5652, + "step": 8226 + }, + { + "epoch": 0.6639496408683722, + "grad_norm": 0.6670125126838684, + "learning_rate": 0.00012838387440145893, + "loss": 2.5438, + "step": 8227 + }, + { + "epoch": 0.6640303446049552, + "grad_norm": 0.7202955484390259, + "learning_rate": 0.00012836873643314297, + "loss": 2.5632, + "step": 8228 + }, + { + "epoch": 0.6641110483415382, + "grad_norm": 0.6844765543937683, + "learning_rate": 0.00012835359775780394, + "loss": 2.5595, + "step": 8229 + }, + { + "epoch": 0.6641917520781212, + "grad_norm": 0.6557698249816895, + "learning_rate": 0.00012833845837581916, + "loss": 2.5998, + "step": 8230 + }, + { + "epoch": 0.6642724558147042, + "grad_norm": 0.6741784811019897, + "learning_rate": 0.0001283233182875659, + "loss": 2.5591, + "step": 8231 + }, + { + "epoch": 0.6643531595512873, + "grad_norm": 0.6926484704017639, + "learning_rate": 0.00012830817749342154, + "loss": 2.5557, + "step": 8232 + }, + { + "epoch": 0.6644338632878702, + "grad_norm": 0.6866984367370605, + "learning_rate": 0.00012829303599376336, + "loss": 2.5646, + "step": 8233 + }, + { + "epoch": 0.6645145670244532, + "grad_norm": 0.6772707104682922, + "learning_rate": 0.0001282778937889688, + "loss": 2.6028, + "step": 8234 + }, + { + "epoch": 0.6645952707610362, + "grad_norm": 0.693236768245697, + "learning_rate": 0.00012826275087941518, + "loss": 2.611, + "step": 8235 + }, + { + "epoch": 0.6646759744976193, + "grad_norm": 0.7181996703147888, + "learning_rate": 0.00012824760726547993, + "loss": 2.6081, + "step": 8236 + }, + { + "epoch": 0.6647566782342023, + "grad_norm": 0.6845484375953674, + "learning_rate": 0.00012823246294754048, + "loss": 2.5544, + "step": 8237 + }, + { + "epoch": 0.6648373819707852, + "grad_norm": 0.7106444239616394, + "learning_rate": 0.00012821731792597425, + "loss": 2.552, + "step": 8238 + }, + { + "epoch": 0.6649180857073682, + "grad_norm": 0.6930601000785828, + "learning_rate": 0.0001282021722011587, + "loss": 2.5401, + "step": 8239 + }, + { + "epoch": 0.6649987894439513, + "grad_norm": 0.6658228039741516, + "learning_rate": 0.00012818702577347129, + "loss": 2.6287, + "step": 8240 + }, + { + "epoch": 0.6650794931805343, + "grad_norm": 0.6919803619384766, + "learning_rate": 0.0001281718786432895, + "loss": 2.6142, + "step": 8241 + }, + { + "epoch": 0.6651601969171173, + "grad_norm": 0.6675698757171631, + "learning_rate": 0.00012815673081099086, + "loss": 2.5325, + "step": 8242 + }, + { + "epoch": 0.6652409006537002, + "grad_norm": 0.6669798493385315, + "learning_rate": 0.0001281415822769529, + "loss": 2.5355, + "step": 8243 + }, + { + "epoch": 0.6653216043902833, + "grad_norm": 0.6449857950210571, + "learning_rate": 0.00012812643304155316, + "loss": 2.5968, + "step": 8244 + }, + { + "epoch": 0.6654023081268663, + "grad_norm": 0.6972789168357849, + "learning_rate": 0.00012811128310516914, + "loss": 2.6133, + "step": 8245 + }, + { + "epoch": 0.6654830118634493, + "grad_norm": 0.7179878354072571, + "learning_rate": 0.0001280961324681785, + "loss": 2.5793, + "step": 8246 + }, + { + "epoch": 0.6655637156000322, + "grad_norm": 0.6736378073692322, + "learning_rate": 0.0001280809811309588, + "loss": 2.5543, + "step": 8247 + }, + { + "epoch": 0.6656444193366153, + "grad_norm": 0.7376420497894287, + "learning_rate": 0.00012806582909388763, + "loss": 2.5501, + "step": 8248 + }, + { + "epoch": 0.6657251230731983, + "grad_norm": 0.7163094878196716, + "learning_rate": 0.00012805067635734263, + "loss": 2.5538, + "step": 8249 + }, + { + "epoch": 0.6658058268097813, + "grad_norm": 0.7699353694915771, + "learning_rate": 0.00012803552292170144, + "loss": 2.5925, + "step": 8250 + }, + { + "epoch": 0.6658865305463643, + "grad_norm": 0.6504995822906494, + "learning_rate": 0.00012802036878734177, + "loss": 2.5944, + "step": 8251 + }, + { + "epoch": 0.6659672342829474, + "grad_norm": 0.7150379419326782, + "learning_rate": 0.0001280052139546412, + "loss": 2.5959, + "step": 8252 + }, + { + "epoch": 0.6660479380195303, + "grad_norm": 0.7562555074691772, + "learning_rate": 0.00012799005842397757, + "loss": 2.6041, + "step": 8253 + }, + { + "epoch": 0.6661286417561133, + "grad_norm": 0.7242838740348816, + "learning_rate": 0.00012797490219572846, + "loss": 2.6152, + "step": 8254 + }, + { + "epoch": 0.6662093454926963, + "grad_norm": 0.7062848210334778, + "learning_rate": 0.00012795974527027168, + "loss": 2.596, + "step": 8255 + }, + { + "epoch": 0.6662900492292794, + "grad_norm": 0.8179726004600525, + "learning_rate": 0.00012794458764798497, + "loss": 2.5792, + "step": 8256 + }, + { + "epoch": 0.6663707529658623, + "grad_norm": 0.692166268825531, + "learning_rate": 0.00012792942932924608, + "loss": 2.6025, + "step": 8257 + }, + { + "epoch": 0.6664514567024453, + "grad_norm": 0.6540334224700928, + "learning_rate": 0.0001279142703144328, + "loss": 2.5119, + "step": 8258 + }, + { + "epoch": 0.6665321604390283, + "grad_norm": 0.7087461352348328, + "learning_rate": 0.00012789911060392294, + "loss": 2.5808, + "step": 8259 + }, + { + "epoch": 0.6666128641756114, + "grad_norm": 0.6897622346878052, + "learning_rate": 0.0001278839501980943, + "loss": 2.5811, + "step": 8260 + }, + { + "epoch": 0.6666935679121944, + "grad_norm": 0.6653634905815125, + "learning_rate": 0.00012786878909732473, + "loss": 2.5498, + "step": 8261 + }, + { + "epoch": 0.6667742716487773, + "grad_norm": 0.6541483402252197, + "learning_rate": 0.0001278536273019921, + "loss": 2.605, + "step": 8262 + }, + { + "epoch": 0.6668549753853603, + "grad_norm": 0.6748146414756775, + "learning_rate": 0.00012783846481247428, + "loss": 2.5571, + "step": 8263 + }, + { + "epoch": 0.6669356791219433, + "grad_norm": 0.7258282899856567, + "learning_rate": 0.00012782330162914915, + "loss": 2.5562, + "step": 8264 + }, + { + "epoch": 0.6670163828585264, + "grad_norm": 0.6963080167770386, + "learning_rate": 0.00012780813775239457, + "loss": 2.6467, + "step": 8265 + }, + { + "epoch": 0.6670970865951094, + "grad_norm": 0.6627718806266785, + "learning_rate": 0.00012779297318258855, + "loss": 2.5369, + "step": 8266 + }, + { + "epoch": 0.6671777903316923, + "grad_norm": 0.7026168704032898, + "learning_rate": 0.00012777780792010897, + "loss": 2.5639, + "step": 8267 + }, + { + "epoch": 0.6672584940682753, + "grad_norm": 0.6969077587127686, + "learning_rate": 0.0001277626419653338, + "loss": 2.517, + "step": 8268 + }, + { + "epoch": 0.6673391978048584, + "grad_norm": 0.6918485760688782, + "learning_rate": 0.00012774747531864102, + "loss": 2.6388, + "step": 8269 + }, + { + "epoch": 0.6674199015414414, + "grad_norm": 0.6661256551742554, + "learning_rate": 0.00012773230798040862, + "loss": 2.5477, + "step": 8270 + }, + { + "epoch": 0.6675006052780244, + "grad_norm": 0.6778402328491211, + "learning_rate": 0.0001277171399510146, + "loss": 2.6032, + "step": 8271 + }, + { + "epoch": 0.6675813090146073, + "grad_norm": 0.6464864611625671, + "learning_rate": 0.00012770197123083702, + "loss": 2.5396, + "step": 8272 + }, + { + "epoch": 0.6676620127511904, + "grad_norm": 0.7154508233070374, + "learning_rate": 0.0001276868018202539, + "loss": 2.6163, + "step": 8273 + }, + { + "epoch": 0.6677427164877734, + "grad_norm": 0.6849631071090698, + "learning_rate": 0.0001276716317196433, + "loss": 2.549, + "step": 8274 + }, + { + "epoch": 0.6678234202243564, + "grad_norm": 0.6696017980575562, + "learning_rate": 0.00012765646092938334, + "loss": 2.5046, + "step": 8275 + }, + { + "epoch": 0.6679041239609393, + "grad_norm": 0.668153703212738, + "learning_rate": 0.00012764128944985203, + "loss": 2.5422, + "step": 8276 + }, + { + "epoch": 0.6679848276975224, + "grad_norm": 0.6600282192230225, + "learning_rate": 0.00012762611728142756, + "loss": 2.6117, + "step": 8277 + }, + { + "epoch": 0.6680655314341054, + "grad_norm": 0.6691608428955078, + "learning_rate": 0.000127610944424488, + "loss": 2.5761, + "step": 8278 + }, + { + "epoch": 0.6681462351706884, + "grad_norm": 0.695142924785614, + "learning_rate": 0.00012759577087941156, + "loss": 2.6123, + "step": 8279 + }, + { + "epoch": 0.6682269389072714, + "grad_norm": 0.6846559643745422, + "learning_rate": 0.00012758059664657635, + "loss": 2.5882, + "step": 8280 + }, + { + "epoch": 0.6683076426438544, + "grad_norm": 0.7616459131240845, + "learning_rate": 0.0001275654217263606, + "loss": 2.5559, + "step": 8281 + }, + { + "epoch": 0.6683883463804374, + "grad_norm": 0.6995570063591003, + "learning_rate": 0.00012755024611914246, + "loss": 2.5336, + "step": 8282 + }, + { + "epoch": 0.6684690501170204, + "grad_norm": 0.7199691534042358, + "learning_rate": 0.0001275350698253002, + "loss": 2.6618, + "step": 8283 + }, + { + "epoch": 0.6685497538536034, + "grad_norm": 0.6938748955726624, + "learning_rate": 0.000127519892845212, + "loss": 2.574, + "step": 8284 + }, + { + "epoch": 0.6686304575901865, + "grad_norm": 0.6827714443206787, + "learning_rate": 0.00012750471517925614, + "loss": 2.5647, + "step": 8285 + }, + { + "epoch": 0.6687111613267694, + "grad_norm": 0.6684606671333313, + "learning_rate": 0.00012748953682781083, + "loss": 2.528, + "step": 8286 + }, + { + "epoch": 0.6687918650633524, + "grad_norm": 0.6842156052589417, + "learning_rate": 0.00012747435779125448, + "loss": 2.5521, + "step": 8287 + }, + { + "epoch": 0.6688725687999354, + "grad_norm": 0.7440506219863892, + "learning_rate": 0.0001274591780699653, + "loss": 2.5646, + "step": 8288 + }, + { + "epoch": 0.6689532725365185, + "grad_norm": 0.769922137260437, + "learning_rate": 0.0001274439976643216, + "loss": 2.6104, + "step": 8289 + }, + { + "epoch": 0.6690339762731015, + "grad_norm": 0.7793089747428894, + "learning_rate": 0.00012742881657470175, + "loss": 2.6348, + "step": 8290 + }, + { + "epoch": 0.6691146800096844, + "grad_norm": 0.695060133934021, + "learning_rate": 0.0001274136348014841, + "loss": 2.5797, + "step": 8291 + }, + { + "epoch": 0.6691953837462674, + "grad_norm": 0.7089917659759521, + "learning_rate": 0.00012739845234504697, + "loss": 2.5431, + "step": 8292 + }, + { + "epoch": 0.6692760874828505, + "grad_norm": 0.7542717456817627, + "learning_rate": 0.00012738326920576885, + "loss": 2.6172, + "step": 8293 + }, + { + "epoch": 0.6693567912194335, + "grad_norm": 0.6947969794273376, + "learning_rate": 0.00012736808538402802, + "loss": 2.6026, + "step": 8294 + }, + { + "epoch": 0.6694374949560165, + "grad_norm": 0.6696321368217468, + "learning_rate": 0.00012735290088020302, + "loss": 2.5592, + "step": 8295 + }, + { + "epoch": 0.6695181986925994, + "grad_norm": 0.7001518607139587, + "learning_rate": 0.0001273377156946722, + "loss": 2.5994, + "step": 8296 + }, + { + "epoch": 0.6695989024291825, + "grad_norm": 0.6708101630210876, + "learning_rate": 0.000127322529827814, + "loss": 2.6392, + "step": 8297 + }, + { + "epoch": 0.6696796061657655, + "grad_norm": 0.6282601952552795, + "learning_rate": 0.000127307343280007, + "loss": 2.5762, + "step": 8298 + }, + { + "epoch": 0.6697603099023485, + "grad_norm": 0.6879595518112183, + "learning_rate": 0.0001272921560516296, + "loss": 2.5507, + "step": 8299 + }, + { + "epoch": 0.6698410136389314, + "grad_norm": 0.6108266115188599, + "learning_rate": 0.00012727696814306033, + "loss": 2.5865, + "step": 8300 + }, + { + "epoch": 0.6699217173755145, + "grad_norm": 0.6763970851898193, + "learning_rate": 0.0001272617795546777, + "loss": 2.6439, + "step": 8301 + }, + { + "epoch": 0.6700024211120975, + "grad_norm": 0.6997560858726501, + "learning_rate": 0.00012724659028686027, + "loss": 2.5291, + "step": 8302 + }, + { + "epoch": 0.6700831248486805, + "grad_norm": 0.675714910030365, + "learning_rate": 0.0001272314003399866, + "loss": 2.5452, + "step": 8303 + }, + { + "epoch": 0.6701638285852635, + "grad_norm": 0.6847789883613586, + "learning_rate": 0.00012721620971443525, + "loss": 2.6111, + "step": 8304 + }, + { + "epoch": 0.6702445323218466, + "grad_norm": 0.7283920645713806, + "learning_rate": 0.0001272010184105848, + "loss": 2.6322, + "step": 8305 + }, + { + "epoch": 0.6703252360584295, + "grad_norm": 0.7551796436309814, + "learning_rate": 0.00012718582642881382, + "loss": 2.5728, + "step": 8306 + }, + { + "epoch": 0.6704059397950125, + "grad_norm": 0.694526195526123, + "learning_rate": 0.00012717063376950104, + "loss": 2.6241, + "step": 8307 + }, + { + "epoch": 0.6704866435315955, + "grad_norm": 0.6956443190574646, + "learning_rate": 0.00012715544043302504, + "loss": 2.5531, + "step": 8308 + }, + { + "epoch": 0.6705673472681786, + "grad_norm": 0.7649452686309814, + "learning_rate": 0.00012714024641976446, + "loss": 2.5462, + "step": 8309 + }, + { + "epoch": 0.6706480510047615, + "grad_norm": 0.7711065411567688, + "learning_rate": 0.00012712505173009797, + "loss": 2.5878, + "step": 8310 + }, + { + "epoch": 0.6707287547413445, + "grad_norm": 0.68077552318573, + "learning_rate": 0.00012710985636440434, + "loss": 2.5668, + "step": 8311 + }, + { + "epoch": 0.6708094584779275, + "grad_norm": 0.7181024551391602, + "learning_rate": 0.0001270946603230622, + "loss": 2.6104, + "step": 8312 + }, + { + "epoch": 0.6708901622145105, + "grad_norm": 0.7136553525924683, + "learning_rate": 0.0001270794636064503, + "loss": 2.5282, + "step": 8313 + }, + { + "epoch": 0.6709708659510936, + "grad_norm": 0.880094587802887, + "learning_rate": 0.00012706426621494736, + "loss": 2.5837, + "step": 8314 + }, + { + "epoch": 0.6710515696876765, + "grad_norm": 0.7438541054725647, + "learning_rate": 0.00012704906814893217, + "loss": 2.5577, + "step": 8315 + }, + { + "epoch": 0.6711322734242595, + "grad_norm": 0.8197470903396606, + "learning_rate": 0.00012703386940878352, + "loss": 2.569, + "step": 8316 + }, + { + "epoch": 0.6712129771608425, + "grad_norm": 0.7728317975997925, + "learning_rate": 0.00012701866999488014, + "loss": 2.6407, + "step": 8317 + }, + { + "epoch": 0.6712936808974256, + "grad_norm": 0.7594823837280273, + "learning_rate": 0.0001270034699076009, + "loss": 2.5789, + "step": 8318 + }, + { + "epoch": 0.6713743846340086, + "grad_norm": 0.7502284646034241, + "learning_rate": 0.0001269882691473246, + "loss": 2.6068, + "step": 8319 + }, + { + "epoch": 0.6714550883705915, + "grad_norm": 0.7355664372444153, + "learning_rate": 0.0001269730677144301, + "loss": 2.6055, + "step": 8320 + }, + { + "epoch": 0.6715357921071745, + "grad_norm": 0.7218407392501831, + "learning_rate": 0.0001269578656092962, + "loss": 2.5953, + "step": 8321 + }, + { + "epoch": 0.6716164958437576, + "grad_norm": 0.6932538747787476, + "learning_rate": 0.00012694266283230185, + "loss": 2.5795, + "step": 8322 + }, + { + "epoch": 0.6716971995803406, + "grad_norm": 0.7337260246276855, + "learning_rate": 0.00012692745938382591, + "loss": 2.5606, + "step": 8323 + }, + { + "epoch": 0.6717779033169236, + "grad_norm": 0.6959026455879211, + "learning_rate": 0.00012691225526424731, + "loss": 2.5688, + "step": 8324 + }, + { + "epoch": 0.6718586070535065, + "grad_norm": 0.7352995872497559, + "learning_rate": 0.00012689705047394493, + "loss": 2.6308, + "step": 8325 + }, + { + "epoch": 0.6719393107900896, + "grad_norm": 0.7023616433143616, + "learning_rate": 0.00012688184501329777, + "loss": 2.6462, + "step": 8326 + }, + { + "epoch": 0.6720200145266726, + "grad_norm": 0.6581354737281799, + "learning_rate": 0.00012686663888268474, + "loss": 2.5997, + "step": 8327 + }, + { + "epoch": 0.6721007182632556, + "grad_norm": 0.6332606077194214, + "learning_rate": 0.00012685143208248484, + "loss": 2.6348, + "step": 8328 + }, + { + "epoch": 0.6721814219998385, + "grad_norm": 0.6826457977294922, + "learning_rate": 0.00012683622461307707, + "loss": 2.5092, + "step": 8329 + }, + { + "epoch": 0.6722621257364216, + "grad_norm": 0.7641614079475403, + "learning_rate": 0.00012682101647484042, + "loss": 2.7098, + "step": 8330 + }, + { + "epoch": 0.6723428294730046, + "grad_norm": 0.7153630256652832, + "learning_rate": 0.00012680580766815394, + "loss": 2.5647, + "step": 8331 + }, + { + "epoch": 0.6724235332095876, + "grad_norm": 0.6746379137039185, + "learning_rate": 0.00012679059819339664, + "loss": 2.6187, + "step": 8332 + }, + { + "epoch": 0.6725042369461706, + "grad_norm": 0.6748883128166199, + "learning_rate": 0.00012677538805094764, + "loss": 2.6045, + "step": 8333 + }, + { + "epoch": 0.6725849406827537, + "grad_norm": 0.7366370558738708, + "learning_rate": 0.00012676017724118596, + "loss": 2.5789, + "step": 8334 + }, + { + "epoch": 0.6726656444193366, + "grad_norm": 0.7381749153137207, + "learning_rate": 0.00012674496576449074, + "loss": 2.5958, + "step": 8335 + }, + { + "epoch": 0.6727463481559196, + "grad_norm": 0.7109243869781494, + "learning_rate": 0.00012672975362124103, + "loss": 2.5874, + "step": 8336 + }, + { + "epoch": 0.6728270518925026, + "grad_norm": 0.6904270052909851, + "learning_rate": 0.00012671454081181595, + "loss": 2.5891, + "step": 8337 + }, + { + "epoch": 0.6729077556290857, + "grad_norm": 0.6809365749359131, + "learning_rate": 0.00012669932733659476, + "loss": 2.5904, + "step": 8338 + }, + { + "epoch": 0.6729884593656686, + "grad_norm": 0.7527552843093872, + "learning_rate": 0.00012668411319595647, + "loss": 2.5602, + "step": 8339 + }, + { + "epoch": 0.6730691631022516, + "grad_norm": 0.6746577620506287, + "learning_rate": 0.00012666889839028038, + "loss": 2.5468, + "step": 8340 + }, + { + "epoch": 0.6731498668388346, + "grad_norm": 0.6904895305633545, + "learning_rate": 0.00012665368291994562, + "loss": 2.623, + "step": 8341 + }, + { + "epoch": 0.6732305705754177, + "grad_norm": 0.6495908498764038, + "learning_rate": 0.00012663846678533135, + "loss": 2.5843, + "step": 8342 + }, + { + "epoch": 0.6733112743120007, + "grad_norm": 0.6782342195510864, + "learning_rate": 0.00012662324998681692, + "loss": 2.6141, + "step": 8343 + }, + { + "epoch": 0.6733919780485836, + "grad_norm": 0.7090504765510559, + "learning_rate": 0.0001266080325247815, + "loss": 2.6654, + "step": 8344 + }, + { + "epoch": 0.6734726817851666, + "grad_norm": 0.7085515856742859, + "learning_rate": 0.00012659281439960434, + "loss": 2.5394, + "step": 8345 + }, + { + "epoch": 0.6735533855217497, + "grad_norm": 0.6813806295394897, + "learning_rate": 0.00012657759561166473, + "loss": 2.6522, + "step": 8346 + }, + { + "epoch": 0.6736340892583327, + "grad_norm": 0.726378858089447, + "learning_rate": 0.00012656237616134197, + "loss": 2.5922, + "step": 8347 + }, + { + "epoch": 0.6737147929949157, + "grad_norm": 0.6323714256286621, + "learning_rate": 0.00012654715604901534, + "loss": 2.4938, + "step": 8348 + }, + { + "epoch": 0.6737954967314986, + "grad_norm": 0.6925889253616333, + "learning_rate": 0.0001265319352750642, + "loss": 2.635, + "step": 8349 + }, + { + "epoch": 0.6738762004680817, + "grad_norm": 0.6676003932952881, + "learning_rate": 0.00012651671383986788, + "loss": 2.558, + "step": 8350 + }, + { + "epoch": 0.6739569042046647, + "grad_norm": 0.7464616298675537, + "learning_rate": 0.00012650149174380575, + "loss": 2.5777, + "step": 8351 + }, + { + "epoch": 0.6740376079412477, + "grad_norm": 0.6611667275428772, + "learning_rate": 0.00012648626898725715, + "loss": 2.5779, + "step": 8352 + }, + { + "epoch": 0.6741183116778307, + "grad_norm": 0.7391866445541382, + "learning_rate": 0.00012647104557060148, + "loss": 2.5624, + "step": 8353 + }, + { + "epoch": 0.6741990154144137, + "grad_norm": 0.7107826471328735, + "learning_rate": 0.00012645582149421817, + "loss": 2.5744, + "step": 8354 + }, + { + "epoch": 0.6742797191509967, + "grad_norm": 0.7385339736938477, + "learning_rate": 0.00012644059675848666, + "loss": 2.5752, + "step": 8355 + }, + { + "epoch": 0.6743604228875797, + "grad_norm": 0.6887345314025879, + "learning_rate": 0.00012642537136378634, + "loss": 2.5794, + "step": 8356 + }, + { + "epoch": 0.6744411266241627, + "grad_norm": 0.6934933662414551, + "learning_rate": 0.00012641014531049666, + "loss": 2.5361, + "step": 8357 + }, + { + "epoch": 0.6745218303607458, + "grad_norm": 0.7437291741371155, + "learning_rate": 0.00012639491859899716, + "loss": 2.5741, + "step": 8358 + }, + { + "epoch": 0.6746025340973287, + "grad_norm": 0.7088494896888733, + "learning_rate": 0.00012637969122966729, + "loss": 2.6449, + "step": 8359 + }, + { + "epoch": 0.6746832378339117, + "grad_norm": 0.7496390342712402, + "learning_rate": 0.00012636446320288654, + "loss": 2.6109, + "step": 8360 + }, + { + "epoch": 0.6747639415704947, + "grad_norm": 0.6949843764305115, + "learning_rate": 0.00012634923451903447, + "loss": 2.5769, + "step": 8361 + }, + { + "epoch": 0.6748446453070778, + "grad_norm": 0.7192673087120056, + "learning_rate": 0.00012633400517849056, + "loss": 2.6053, + "step": 8362 + }, + { + "epoch": 0.6749253490436607, + "grad_norm": 0.7003379464149475, + "learning_rate": 0.00012631877518163442, + "loss": 2.5745, + "step": 8363 + }, + { + "epoch": 0.6750060527802437, + "grad_norm": 0.7499879002571106, + "learning_rate": 0.00012630354452884563, + "loss": 2.6077, + "step": 8364 + }, + { + "epoch": 0.6750867565168267, + "grad_norm": 0.7047405242919922, + "learning_rate": 0.00012628831322050377, + "loss": 2.5955, + "step": 8365 + }, + { + "epoch": 0.6751674602534097, + "grad_norm": 0.7463203072547913, + "learning_rate": 0.00012627308125698838, + "loss": 2.5421, + "step": 8366 + }, + { + "epoch": 0.6752481639899928, + "grad_norm": 0.7377086877822876, + "learning_rate": 0.00012625784863867914, + "loss": 2.5804, + "step": 8367 + }, + { + "epoch": 0.6753288677265757, + "grad_norm": 0.7136400938034058, + "learning_rate": 0.00012624261536595566, + "loss": 2.5673, + "step": 8368 + }, + { + "epoch": 0.6754095714631587, + "grad_norm": 0.6923615336418152, + "learning_rate": 0.0001262273814391976, + "loss": 2.5832, + "step": 8369 + }, + { + "epoch": 0.6754902751997417, + "grad_norm": 0.7495028972625732, + "learning_rate": 0.00012621214685878469, + "loss": 2.5943, + "step": 8370 + }, + { + "epoch": 0.6755709789363248, + "grad_norm": 0.6751434206962585, + "learning_rate": 0.0001261969116250965, + "loss": 2.5495, + "step": 8371 + }, + { + "epoch": 0.6756516826729078, + "grad_norm": 0.7055973410606384, + "learning_rate": 0.00012618167573851284, + "loss": 2.5651, + "step": 8372 + }, + { + "epoch": 0.6757323864094907, + "grad_norm": 0.7479640245437622, + "learning_rate": 0.00012616643919941337, + "loss": 2.653, + "step": 8373 + }, + { + "epoch": 0.6758130901460737, + "grad_norm": 0.7075015902519226, + "learning_rate": 0.00012615120200817778, + "loss": 2.5787, + "step": 8374 + }, + { + "epoch": 0.6758937938826568, + "grad_norm": 0.7513934969902039, + "learning_rate": 0.00012613596416518593, + "loss": 2.6099, + "step": 8375 + }, + { + "epoch": 0.6759744976192398, + "grad_norm": 0.6742326021194458, + "learning_rate": 0.00012612072567081754, + "loss": 2.5335, + "step": 8376 + }, + { + "epoch": 0.6760552013558228, + "grad_norm": 0.7271459698677063, + "learning_rate": 0.00012610548652545239, + "loss": 2.6082, + "step": 8377 + }, + { + "epoch": 0.6761359050924057, + "grad_norm": 0.7481515407562256, + "learning_rate": 0.00012609024672947022, + "loss": 2.5805, + "step": 8378 + }, + { + "epoch": 0.6762166088289888, + "grad_norm": 0.7484803199768066, + "learning_rate": 0.00012607500628325093, + "loss": 2.6099, + "step": 8379 + }, + { + "epoch": 0.6762973125655718, + "grad_norm": 0.7462390661239624, + "learning_rate": 0.00012605976518717435, + "loss": 2.6054, + "step": 8380 + }, + { + "epoch": 0.6763780163021548, + "grad_norm": 0.7014410495758057, + "learning_rate": 0.00012604452344162028, + "loss": 2.5614, + "step": 8381 + }, + { + "epoch": 0.6764587200387377, + "grad_norm": 0.6902963519096375, + "learning_rate": 0.0001260292810469686, + "loss": 2.5813, + "step": 8382 + }, + { + "epoch": 0.6765394237753208, + "grad_norm": 0.6646186113357544, + "learning_rate": 0.00012601403800359919, + "loss": 2.545, + "step": 8383 + }, + { + "epoch": 0.6766201275119038, + "grad_norm": 0.7067462801933289, + "learning_rate": 0.00012599879431189197, + "loss": 2.6195, + "step": 8384 + }, + { + "epoch": 0.6767008312484868, + "grad_norm": 0.7263965010643005, + "learning_rate": 0.0001259835499722268, + "loss": 2.5929, + "step": 8385 + }, + { + "epoch": 0.6767815349850698, + "grad_norm": 0.6672000885009766, + "learning_rate": 0.0001259683049849837, + "loss": 2.5561, + "step": 8386 + }, + { + "epoch": 0.6768622387216529, + "grad_norm": 0.6543236374855042, + "learning_rate": 0.0001259530593505425, + "loss": 2.6256, + "step": 8387 + }, + { + "epoch": 0.6769429424582358, + "grad_norm": 0.6532339453697205, + "learning_rate": 0.00012593781306928324, + "loss": 2.5074, + "step": 8388 + }, + { + "epoch": 0.6770236461948188, + "grad_norm": 0.7442833185195923, + "learning_rate": 0.00012592256614158591, + "loss": 2.6124, + "step": 8389 + }, + { + "epoch": 0.6771043499314018, + "grad_norm": 0.786685585975647, + "learning_rate": 0.00012590731856783043, + "loss": 2.6077, + "step": 8390 + }, + { + "epoch": 0.6771850536679849, + "grad_norm": 0.7952337265014648, + "learning_rate": 0.00012589207034839687, + "loss": 2.5894, + "step": 8391 + }, + { + "epoch": 0.6772657574045678, + "grad_norm": 0.7847954034805298, + "learning_rate": 0.00012587682148366524, + "loss": 2.4934, + "step": 8392 + }, + { + "epoch": 0.6773464611411508, + "grad_norm": 0.6769007444381714, + "learning_rate": 0.00012586157197401552, + "loss": 2.5695, + "step": 8393 + }, + { + "epoch": 0.6774271648777338, + "grad_norm": 0.6583757996559143, + "learning_rate": 0.00012584632181982788, + "loss": 2.5866, + "step": 8394 + }, + { + "epoch": 0.6775078686143169, + "grad_norm": 0.7375823855400085, + "learning_rate": 0.0001258310710214823, + "loss": 2.5141, + "step": 8395 + }, + { + "epoch": 0.6775885723508999, + "grad_norm": 0.6901078224182129, + "learning_rate": 0.00012581581957935896, + "loss": 2.5732, + "step": 8396 + }, + { + "epoch": 0.6776692760874828, + "grad_norm": 0.687152624130249, + "learning_rate": 0.0001258005674938379, + "loss": 2.5916, + "step": 8397 + }, + { + "epoch": 0.6777499798240658, + "grad_norm": 0.7198586463928223, + "learning_rate": 0.00012578531476529917, + "loss": 2.5626, + "step": 8398 + }, + { + "epoch": 0.6778306835606489, + "grad_norm": 0.7417474985122681, + "learning_rate": 0.00012577006139412309, + "loss": 2.5486, + "step": 8399 + }, + { + "epoch": 0.6779113872972319, + "grad_norm": 0.6588087677955627, + "learning_rate": 0.0001257548073806897, + "loss": 2.6123, + "step": 8400 + }, + { + "epoch": 0.6779920910338149, + "grad_norm": 0.7211382389068604, + "learning_rate": 0.00012573955272537915, + "loss": 2.6402, + "step": 8401 + }, + { + "epoch": 0.6780727947703978, + "grad_norm": 0.7196084856987, + "learning_rate": 0.00012572429742857167, + "loss": 2.51, + "step": 8402 + }, + { + "epoch": 0.6781534985069809, + "grad_norm": 0.6399394273757935, + "learning_rate": 0.00012570904149064748, + "loss": 2.5309, + "step": 8403 + }, + { + "epoch": 0.6782342022435639, + "grad_norm": 0.6969572305679321, + "learning_rate": 0.00012569378491198674, + "loss": 2.5829, + "step": 8404 + }, + { + "epoch": 0.6783149059801469, + "grad_norm": 0.8005492091178894, + "learning_rate": 0.00012567852769296975, + "loss": 2.6277, + "step": 8405 + }, + { + "epoch": 0.6783956097167299, + "grad_norm": 0.6786207556724548, + "learning_rate": 0.0001256632698339767, + "loss": 2.5839, + "step": 8406 + }, + { + "epoch": 0.6784763134533129, + "grad_norm": 0.7047130465507507, + "learning_rate": 0.0001256480113353879, + "loss": 2.533, + "step": 8407 + }, + { + "epoch": 0.6785570171898959, + "grad_norm": 0.7640479803085327, + "learning_rate": 0.0001256327521975836, + "loss": 2.5855, + "step": 8408 + }, + { + "epoch": 0.6786377209264789, + "grad_norm": 0.728111207485199, + "learning_rate": 0.00012561749242094412, + "loss": 2.6184, + "step": 8409 + }, + { + "epoch": 0.6787184246630619, + "grad_norm": 0.7842772603034973, + "learning_rate": 0.00012560223200584975, + "loss": 2.5915, + "step": 8410 + }, + { + "epoch": 0.678799128399645, + "grad_norm": 0.7129092812538147, + "learning_rate": 0.00012558697095268085, + "loss": 2.6526, + "step": 8411 + }, + { + "epoch": 0.6788798321362279, + "grad_norm": 0.751103401184082, + "learning_rate": 0.00012557170926181773, + "loss": 2.605, + "step": 8412 + }, + { + "epoch": 0.6789605358728109, + "grad_norm": 0.6850594878196716, + "learning_rate": 0.0001255564469336408, + "loss": 2.6047, + "step": 8413 + }, + { + "epoch": 0.6790412396093939, + "grad_norm": 0.703037679195404, + "learning_rate": 0.00012554118396853036, + "loss": 2.653, + "step": 8414 + }, + { + "epoch": 0.6791219433459769, + "grad_norm": 0.8097915053367615, + "learning_rate": 0.0001255259203668669, + "loss": 2.5937, + "step": 8415 + }, + { + "epoch": 0.67920264708256, + "grad_norm": 0.700351357460022, + "learning_rate": 0.00012551065612903076, + "loss": 2.6089, + "step": 8416 + }, + { + "epoch": 0.6792833508191429, + "grad_norm": 0.6760888695716858, + "learning_rate": 0.00012549539125540236, + "loss": 2.547, + "step": 8417 + }, + { + "epoch": 0.6793640545557259, + "grad_norm": 0.6751723289489746, + "learning_rate": 0.0001254801257463622, + "loss": 2.625, + "step": 8418 + }, + { + "epoch": 0.6794447582923089, + "grad_norm": 0.6928921937942505, + "learning_rate": 0.00012546485960229065, + "loss": 2.5671, + "step": 8419 + }, + { + "epoch": 0.679525462028892, + "grad_norm": 0.6541565656661987, + "learning_rate": 0.0001254495928235683, + "loss": 2.5837, + "step": 8420 + }, + { + "epoch": 0.679606165765475, + "grad_norm": 0.6228676438331604, + "learning_rate": 0.00012543432541057555, + "loss": 2.5798, + "step": 8421 + }, + { + "epoch": 0.6796868695020579, + "grad_norm": 0.7620853185653687, + "learning_rate": 0.0001254190573636929, + "loss": 2.5885, + "step": 8422 + }, + { + "epoch": 0.6797675732386409, + "grad_norm": 0.7425604462623596, + "learning_rate": 0.0001254037886833009, + "loss": 2.6124, + "step": 8423 + }, + { + "epoch": 0.679848276975224, + "grad_norm": 0.7150974273681641, + "learning_rate": 0.0001253885193697801, + "loss": 2.5423, + "step": 8424 + }, + { + "epoch": 0.679928980711807, + "grad_norm": 0.672649621963501, + "learning_rate": 0.000125373249423511, + "loss": 2.5563, + "step": 8425 + }, + { + "epoch": 0.6800096844483899, + "grad_norm": 0.6913620829582214, + "learning_rate": 0.00012535797884487425, + "loss": 2.5261, + "step": 8426 + }, + { + "epoch": 0.6800903881849729, + "grad_norm": 0.712123692035675, + "learning_rate": 0.00012534270763425034, + "loss": 2.5958, + "step": 8427 + }, + { + "epoch": 0.680171091921556, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00012532743579201993, + "loss": 2.6036, + "step": 8428 + }, + { + "epoch": 0.680251795658139, + "grad_norm": 0.7108714580535889, + "learning_rate": 0.0001253121633185636, + "loss": 2.6004, + "step": 8429 + }, + { + "epoch": 0.680332499394722, + "grad_norm": 0.7142449021339417, + "learning_rate": 0.00012529689021426198, + "loss": 2.588, + "step": 8430 + }, + { + "epoch": 0.6804132031313049, + "grad_norm": 0.7579841017723083, + "learning_rate": 0.00012528161647949574, + "loss": 2.5927, + "step": 8431 + }, + { + "epoch": 0.680493906867888, + "grad_norm": 0.6522083878517151, + "learning_rate": 0.00012526634211464555, + "loss": 2.5619, + "step": 8432 + }, + { + "epoch": 0.680574610604471, + "grad_norm": 0.7681782245635986, + "learning_rate": 0.00012525106712009203, + "loss": 2.6065, + "step": 8433 + }, + { + "epoch": 0.680655314341054, + "grad_norm": 0.6900169253349304, + "learning_rate": 0.00012523579149621594, + "loss": 2.5507, + "step": 8434 + }, + { + "epoch": 0.680736018077637, + "grad_norm": 0.6907666325569153, + "learning_rate": 0.00012522051524339794, + "loss": 2.5213, + "step": 8435 + }, + { + "epoch": 0.68081672181422, + "grad_norm": 0.7202023267745972, + "learning_rate": 0.0001252052383620188, + "loss": 2.6367, + "step": 8436 + }, + { + "epoch": 0.680897425550803, + "grad_norm": 0.7893621325492859, + "learning_rate": 0.00012518996085245925, + "loss": 2.6066, + "step": 8437 + }, + { + "epoch": 0.680978129287386, + "grad_norm": 0.7693532109260559, + "learning_rate": 0.00012517468271509998, + "loss": 2.5346, + "step": 8438 + }, + { + "epoch": 0.681058833023969, + "grad_norm": 0.7976840734481812, + "learning_rate": 0.0001251594039503218, + "loss": 2.5991, + "step": 8439 + }, + { + "epoch": 0.6811395367605521, + "grad_norm": 0.7671225666999817, + "learning_rate": 0.00012514412455850554, + "loss": 2.5959, + "step": 8440 + }, + { + "epoch": 0.681220240497135, + "grad_norm": 0.7143450975418091, + "learning_rate": 0.00012512884454003194, + "loss": 2.5828, + "step": 8441 + }, + { + "epoch": 0.681300944233718, + "grad_norm": 0.6821861863136292, + "learning_rate": 0.00012511356389528192, + "loss": 2.5908, + "step": 8442 + }, + { + "epoch": 0.681381647970301, + "grad_norm": 0.7279960513114929, + "learning_rate": 0.00012509828262463615, + "loss": 2.578, + "step": 8443 + }, + { + "epoch": 0.6814623517068841, + "grad_norm": 0.6503065824508667, + "learning_rate": 0.0001250830007284756, + "loss": 2.525, + "step": 8444 + }, + { + "epoch": 0.681543055443467, + "grad_norm": 0.7276029586791992, + "learning_rate": 0.00012506771820718112, + "loss": 2.584, + "step": 8445 + }, + { + "epoch": 0.68162375918005, + "grad_norm": 0.7635578513145447, + "learning_rate": 0.00012505243506113356, + "loss": 2.627, + "step": 8446 + }, + { + "epoch": 0.681704462916633, + "grad_norm": 0.7086981534957886, + "learning_rate": 0.00012503715129071386, + "loss": 2.6164, + "step": 8447 + }, + { + "epoch": 0.6817851666532161, + "grad_norm": 0.7144165635108948, + "learning_rate": 0.00012502186689630285, + "loss": 2.5642, + "step": 8448 + }, + { + "epoch": 0.6818658703897991, + "grad_norm": 0.8135093450546265, + "learning_rate": 0.00012500658187828155, + "loss": 2.6161, + "step": 8449 + }, + { + "epoch": 0.681946574126382, + "grad_norm": 0.7223377227783203, + "learning_rate": 0.00012499129623703086, + "loss": 2.6192, + "step": 8450 + }, + { + "epoch": 0.682027277862965, + "grad_norm": 0.7189127206802368, + "learning_rate": 0.00012497600997293172, + "loss": 2.6086, + "step": 8451 + }, + { + "epoch": 0.6821079815995481, + "grad_norm": 0.6742144823074341, + "learning_rate": 0.00012496072308636514, + "loss": 2.5747, + "step": 8452 + }, + { + "epoch": 0.6821886853361311, + "grad_norm": 0.7432419657707214, + "learning_rate": 0.0001249454355777121, + "loss": 2.5687, + "step": 8453 + }, + { + "epoch": 0.6822693890727141, + "grad_norm": 0.6140317320823669, + "learning_rate": 0.00012493014744735357, + "loss": 2.5371, + "step": 8454 + }, + { + "epoch": 0.682350092809297, + "grad_norm": 0.7215768098831177, + "learning_rate": 0.0001249148586956706, + "loss": 2.6806, + "step": 8455 + }, + { + "epoch": 0.6824307965458801, + "grad_norm": 0.7485790252685547, + "learning_rate": 0.0001248995693230442, + "loss": 2.575, + "step": 8456 + }, + { + "epoch": 0.6825115002824631, + "grad_norm": 0.744349479675293, + "learning_rate": 0.00012488427932985552, + "loss": 2.5961, + "step": 8457 + }, + { + "epoch": 0.6825922040190461, + "grad_norm": 0.6784959435462952, + "learning_rate": 0.0001248689887164855, + "loss": 2.5501, + "step": 8458 + }, + { + "epoch": 0.682672907755629, + "grad_norm": 0.6664010286331177, + "learning_rate": 0.0001248536974833153, + "loss": 2.5741, + "step": 8459 + }, + { + "epoch": 0.6827536114922121, + "grad_norm": 0.7185953259468079, + "learning_rate": 0.00012483840563072592, + "loss": 2.5875, + "step": 8460 + }, + { + "epoch": 0.6828343152287951, + "grad_norm": 0.6553035378456116, + "learning_rate": 0.00012482311315909864, + "loss": 2.5321, + "step": 8461 + }, + { + "epoch": 0.6829150189653781, + "grad_norm": 0.6713398694992065, + "learning_rate": 0.00012480782006881442, + "loss": 2.6207, + "step": 8462 + }, + { + "epoch": 0.6829957227019611, + "grad_norm": 0.6733734607696533, + "learning_rate": 0.00012479252636025452, + "loss": 2.5746, + "step": 8463 + }, + { + "epoch": 0.6830764264385442, + "grad_norm": 0.7257994413375854, + "learning_rate": 0.00012477723203380004, + "loss": 2.5837, + "step": 8464 + }, + { + "epoch": 0.6831571301751271, + "grad_norm": 0.716242253780365, + "learning_rate": 0.00012476193708983214, + "loss": 2.5611, + "step": 8465 + }, + { + "epoch": 0.6832378339117101, + "grad_norm": 0.6797829866409302, + "learning_rate": 0.0001247466415287321, + "loss": 2.5763, + "step": 8466 + }, + { + "epoch": 0.6833185376482931, + "grad_norm": 0.679931640625, + "learning_rate": 0.000124731345350881, + "loss": 2.606, + "step": 8467 + }, + { + "epoch": 0.6833992413848761, + "grad_norm": 0.6767866611480713, + "learning_rate": 0.00012471604855666016, + "loss": 2.5682, + "step": 8468 + }, + { + "epoch": 0.6834799451214592, + "grad_norm": 0.7297048568725586, + "learning_rate": 0.00012470075114645078, + "loss": 2.5527, + "step": 8469 + }, + { + "epoch": 0.6835606488580421, + "grad_norm": 0.6882644295692444, + "learning_rate": 0.0001246854531206341, + "loss": 2.5712, + "step": 8470 + }, + { + "epoch": 0.6836413525946251, + "grad_norm": 0.7129159569740295, + "learning_rate": 0.00012467015447959143, + "loss": 2.5627, + "step": 8471 + }, + { + "epoch": 0.6837220563312081, + "grad_norm": 0.6671481728553772, + "learning_rate": 0.000124654855223704, + "loss": 2.6226, + "step": 8472 + }, + { + "epoch": 0.6838027600677912, + "grad_norm": 0.7096946835517883, + "learning_rate": 0.00012463955535335313, + "loss": 2.5373, + "step": 8473 + }, + { + "epoch": 0.6838834638043741, + "grad_norm": 0.6781395077705383, + "learning_rate": 0.00012462425486892012, + "loss": 2.5607, + "step": 8474 + }, + { + "epoch": 0.6839641675409571, + "grad_norm": 0.6777891516685486, + "learning_rate": 0.00012460895377078632, + "loss": 2.5991, + "step": 8475 + }, + { + "epoch": 0.6840448712775401, + "grad_norm": 0.7175275087356567, + "learning_rate": 0.00012459365205933306, + "loss": 2.6006, + "step": 8476 + }, + { + "epoch": 0.6841255750141232, + "grad_norm": 0.6832807660102844, + "learning_rate": 0.00012457834973494174, + "loss": 2.5757, + "step": 8477 + }, + { + "epoch": 0.6842062787507062, + "grad_norm": 0.7002938985824585, + "learning_rate": 0.00012456304679799366, + "loss": 2.554, + "step": 8478 + }, + { + "epoch": 0.6842869824872891, + "grad_norm": 0.7236241698265076, + "learning_rate": 0.00012454774324887027, + "loss": 2.6054, + "step": 8479 + }, + { + "epoch": 0.6843676862238721, + "grad_norm": 0.7327216267585754, + "learning_rate": 0.00012453243908795288, + "loss": 2.6101, + "step": 8480 + }, + { + "epoch": 0.6844483899604552, + "grad_norm": 0.7414156794548035, + "learning_rate": 0.00012451713431562306, + "loss": 2.5505, + "step": 8481 + }, + { + "epoch": 0.6845290936970382, + "grad_norm": 0.697795569896698, + "learning_rate": 0.00012450182893226214, + "loss": 2.539, + "step": 8482 + }, + { + "epoch": 0.6846097974336212, + "grad_norm": 0.7053593397140503, + "learning_rate": 0.00012448652293825158, + "loss": 2.6045, + "step": 8483 + }, + { + "epoch": 0.6846905011702041, + "grad_norm": 0.6710856556892395, + "learning_rate": 0.00012447121633397287, + "loss": 2.554, + "step": 8484 + }, + { + "epoch": 0.6847712049067872, + "grad_norm": 0.754454493522644, + "learning_rate": 0.0001244559091198075, + "loss": 2.5523, + "step": 8485 + }, + { + "epoch": 0.6848519086433702, + "grad_norm": 0.6468656659126282, + "learning_rate": 0.0001244406012961369, + "loss": 2.5931, + "step": 8486 + }, + { + "epoch": 0.6849326123799532, + "grad_norm": 0.7169063091278076, + "learning_rate": 0.00012442529286334266, + "loss": 2.5743, + "step": 8487 + }, + { + "epoch": 0.6850133161165362, + "grad_norm": 0.6737040877342224, + "learning_rate": 0.00012440998382180627, + "loss": 2.5734, + "step": 8488 + }, + { + "epoch": 0.6850940198531192, + "grad_norm": 0.7026428580284119, + "learning_rate": 0.0001243946741719093, + "loss": 2.4994, + "step": 8489 + }, + { + "epoch": 0.6851747235897022, + "grad_norm": 0.7378512024879456, + "learning_rate": 0.00012437936391403322, + "loss": 2.5611, + "step": 8490 + }, + { + "epoch": 0.6852554273262852, + "grad_norm": 0.7379863262176514, + "learning_rate": 0.0001243640530485597, + "loss": 2.538, + "step": 8491 + }, + { + "epoch": 0.6853361310628682, + "grad_norm": 0.68398118019104, + "learning_rate": 0.00012434874157587027, + "loss": 2.5593, + "step": 8492 + }, + { + "epoch": 0.6854168347994513, + "grad_norm": 0.6780444383621216, + "learning_rate": 0.0001243334294963466, + "loss": 2.5068, + "step": 8493 + }, + { + "epoch": 0.6854975385360342, + "grad_norm": 0.7425427436828613, + "learning_rate": 0.0001243181168103702, + "loss": 2.6607, + "step": 8494 + }, + { + "epoch": 0.6855782422726172, + "grad_norm": 0.7563300132751465, + "learning_rate": 0.0001243028035183228, + "loss": 2.5915, + "step": 8495 + }, + { + "epoch": 0.6856589460092002, + "grad_norm": 0.6746618151664734, + "learning_rate": 0.000124287489620586, + "loss": 2.5399, + "step": 8496 + }, + { + "epoch": 0.6857396497457833, + "grad_norm": 0.7100487947463989, + "learning_rate": 0.00012427217511754146, + "loss": 2.5927, + "step": 8497 + }, + { + "epoch": 0.6858203534823663, + "grad_norm": 0.6487080454826355, + "learning_rate": 0.00012425686000957088, + "loss": 2.5582, + "step": 8498 + }, + { + "epoch": 0.6859010572189492, + "grad_norm": 0.6577199697494507, + "learning_rate": 0.00012424154429705592, + "loss": 2.5589, + "step": 8499 + }, + { + "epoch": 0.6859817609555322, + "grad_norm": 0.6748726963996887, + "learning_rate": 0.00012422622798037832, + "loss": 2.5651, + "step": 8500 + }, + { + "epoch": 0.6860624646921153, + "grad_norm": 0.7159377336502075, + "learning_rate": 0.0001242109110599198, + "loss": 2.569, + "step": 8501 + }, + { + "epoch": 0.6861431684286983, + "grad_norm": 0.6772934198379517, + "learning_rate": 0.00012419559353606208, + "loss": 2.5533, + "step": 8502 + }, + { + "epoch": 0.6862238721652812, + "grad_norm": 0.6776062846183777, + "learning_rate": 0.00012418027540918693, + "loss": 2.5704, + "step": 8503 + }, + { + "epoch": 0.6863045759018642, + "grad_norm": 0.7009913921356201, + "learning_rate": 0.00012416495667967608, + "loss": 2.5928, + "step": 8504 + }, + { + "epoch": 0.6863852796384473, + "grad_norm": 0.607571005821228, + "learning_rate": 0.00012414963734791137, + "loss": 2.5459, + "step": 8505 + }, + { + "epoch": 0.6864659833750303, + "grad_norm": 0.6798292398452759, + "learning_rate": 0.00012413431741427458, + "loss": 2.6585, + "step": 8506 + }, + { + "epoch": 0.6865466871116133, + "grad_norm": 0.7892771363258362, + "learning_rate": 0.00012411899687914747, + "loss": 2.5781, + "step": 8507 + }, + { + "epoch": 0.6866273908481962, + "grad_norm": 0.6683816909790039, + "learning_rate": 0.00012410367574291199, + "loss": 2.5598, + "step": 8508 + }, + { + "epoch": 0.6867080945847793, + "grad_norm": 0.7591805458068848, + "learning_rate": 0.00012408835400594983, + "loss": 2.6478, + "step": 8509 + }, + { + "epoch": 0.6867887983213623, + "grad_norm": 0.6896353960037231, + "learning_rate": 0.00012407303166864293, + "loss": 2.5418, + "step": 8510 + }, + { + "epoch": 0.6868695020579453, + "grad_norm": 0.6657233834266663, + "learning_rate": 0.00012405770873137316, + "loss": 2.5753, + "step": 8511 + }, + { + "epoch": 0.6869502057945283, + "grad_norm": 0.6775455474853516, + "learning_rate": 0.00012404238519452237, + "loss": 2.4902, + "step": 8512 + }, + { + "epoch": 0.6870309095311113, + "grad_norm": 0.6572847962379456, + "learning_rate": 0.00012402706105847254, + "loss": 2.6189, + "step": 8513 + }, + { + "epoch": 0.6871116132676943, + "grad_norm": 0.7159940004348755, + "learning_rate": 0.00012401173632360557, + "loss": 2.5928, + "step": 8514 + }, + { + "epoch": 0.6871923170042773, + "grad_norm": 0.7178850173950195, + "learning_rate": 0.0001239964109903033, + "loss": 2.5342, + "step": 8515 + }, + { + "epoch": 0.6872730207408603, + "grad_norm": 0.6761649250984192, + "learning_rate": 0.00012398108505894774, + "loss": 2.5716, + "step": 8516 + }, + { + "epoch": 0.6873537244774433, + "grad_norm": 0.6831200122833252, + "learning_rate": 0.0001239657585299209, + "loss": 2.5506, + "step": 8517 + }, + { + "epoch": 0.6874344282140263, + "grad_norm": 0.7064316868782043, + "learning_rate": 0.00012395043140360468, + "loss": 2.541, + "step": 8518 + }, + { + "epoch": 0.6875151319506093, + "grad_norm": 0.7269963026046753, + "learning_rate": 0.00012393510368038113, + "loss": 2.541, + "step": 8519 + }, + { + "epoch": 0.6875958356871923, + "grad_norm": 0.6651471257209778, + "learning_rate": 0.00012391977536063218, + "loss": 2.5476, + "step": 8520 + }, + { + "epoch": 0.6876765394237753, + "grad_norm": 0.7649257779121399, + "learning_rate": 0.00012390444644473994, + "loss": 2.601, + "step": 8521 + }, + { + "epoch": 0.6877572431603584, + "grad_norm": 0.6637376546859741, + "learning_rate": 0.0001238891169330864, + "loss": 2.5582, + "step": 8522 + }, + { + "epoch": 0.6878379468969413, + "grad_norm": 0.6609189510345459, + "learning_rate": 0.0001238737868260536, + "loss": 2.5795, + "step": 8523 + }, + { + "epoch": 0.6879186506335243, + "grad_norm": 0.657494843006134, + "learning_rate": 0.00012385845612402363, + "loss": 2.6005, + "step": 8524 + }, + { + "epoch": 0.6879993543701073, + "grad_norm": 0.6780641674995422, + "learning_rate": 0.00012384312482737858, + "loss": 2.514, + "step": 8525 + }, + { + "epoch": 0.6880800581066904, + "grad_norm": 0.7310795187950134, + "learning_rate": 0.00012382779293650052, + "loss": 2.5707, + "step": 8526 + }, + { + "epoch": 0.6881607618432733, + "grad_norm": 0.6722557544708252, + "learning_rate": 0.0001238124604517716, + "loss": 2.5897, + "step": 8527 + }, + { + "epoch": 0.6882414655798563, + "grad_norm": 0.6502346992492676, + "learning_rate": 0.0001237971273735739, + "loss": 2.5554, + "step": 8528 + }, + { + "epoch": 0.6883221693164393, + "grad_norm": 0.6993897557258606, + "learning_rate": 0.0001237817937022896, + "loss": 2.6328, + "step": 8529 + }, + { + "epoch": 0.6884028730530224, + "grad_norm": 0.7069644331932068, + "learning_rate": 0.00012376645943830083, + "loss": 2.5957, + "step": 8530 + }, + { + "epoch": 0.6884835767896054, + "grad_norm": 0.7193333506584167, + "learning_rate": 0.00012375112458198973, + "loss": 2.6505, + "step": 8531 + }, + { + "epoch": 0.6885642805261883, + "grad_norm": 0.6821088194847107, + "learning_rate": 0.00012373578913373853, + "loss": 2.6129, + "step": 8532 + }, + { + "epoch": 0.6886449842627713, + "grad_norm": 0.6499428749084473, + "learning_rate": 0.00012372045309392947, + "loss": 2.6053, + "step": 8533 + }, + { + "epoch": 0.6887256879993544, + "grad_norm": 0.7469449639320374, + "learning_rate": 0.00012370511646294464, + "loss": 2.6423, + "step": 8534 + }, + { + "epoch": 0.6888063917359374, + "grad_norm": 0.7326325178146362, + "learning_rate": 0.00012368977924116637, + "loss": 2.5708, + "step": 8535 + }, + { + "epoch": 0.6888870954725204, + "grad_norm": 0.7459580302238464, + "learning_rate": 0.00012367444142897686, + "loss": 2.544, + "step": 8536 + }, + { + "epoch": 0.6889677992091033, + "grad_norm": 0.7198929786682129, + "learning_rate": 0.00012365910302675843, + "loss": 2.6295, + "step": 8537 + }, + { + "epoch": 0.6890485029456864, + "grad_norm": 0.8139802813529968, + "learning_rate": 0.0001236437640348933, + "loss": 2.549, + "step": 8538 + }, + { + "epoch": 0.6891292066822694, + "grad_norm": 0.6497162580490112, + "learning_rate": 0.00012362842445376372, + "loss": 2.5849, + "step": 8539 + }, + { + "epoch": 0.6892099104188524, + "grad_norm": 0.7378165125846863, + "learning_rate": 0.00012361308428375208, + "loss": 2.606, + "step": 8540 + }, + { + "epoch": 0.6892906141554354, + "grad_norm": 0.6807567477226257, + "learning_rate": 0.00012359774352524062, + "loss": 2.5892, + "step": 8541 + }, + { + "epoch": 0.6893713178920184, + "grad_norm": 0.6639370918273926, + "learning_rate": 0.0001235824021786117, + "loss": 2.5249, + "step": 8542 + }, + { + "epoch": 0.6894520216286014, + "grad_norm": 0.7140880823135376, + "learning_rate": 0.00012356706024424773, + "loss": 2.5877, + "step": 8543 + }, + { + "epoch": 0.6895327253651844, + "grad_norm": 0.7079257965087891, + "learning_rate": 0.00012355171772253097, + "loss": 2.6011, + "step": 8544 + }, + { + "epoch": 0.6896134291017674, + "grad_norm": 0.7150856852531433, + "learning_rate": 0.00012353637461384387, + "loss": 2.549, + "step": 8545 + }, + { + "epoch": 0.6896941328383505, + "grad_norm": 0.6896397471427917, + "learning_rate": 0.00012352103091856876, + "loss": 2.5452, + "step": 8546 + }, + { + "epoch": 0.6897748365749334, + "grad_norm": 0.696964681148529, + "learning_rate": 0.00012350568663708808, + "loss": 2.5075, + "step": 8547 + }, + { + "epoch": 0.6898555403115164, + "grad_norm": 0.6926069855690002, + "learning_rate": 0.00012349034176978427, + "loss": 2.5905, + "step": 8548 + }, + { + "epoch": 0.6899362440480994, + "grad_norm": 0.6949423551559448, + "learning_rate": 0.00012347499631703968, + "loss": 2.5284, + "step": 8549 + }, + { + "epoch": 0.6900169477846825, + "grad_norm": 0.6480536460876465, + "learning_rate": 0.0001234596502792369, + "loss": 2.5713, + "step": 8550 + }, + { + "epoch": 0.6900976515212655, + "grad_norm": 0.6990019679069519, + "learning_rate": 0.00012344430365675825, + "loss": 2.5826, + "step": 8551 + }, + { + "epoch": 0.6901783552578484, + "grad_norm": 0.7063903212547302, + "learning_rate": 0.00012342895644998627, + "loss": 2.5271, + "step": 8552 + }, + { + "epoch": 0.6902590589944314, + "grad_norm": 0.7037132978439331, + "learning_rate": 0.0001234136086593035, + "loss": 2.5855, + "step": 8553 + }, + { + "epoch": 0.6903397627310145, + "grad_norm": 0.679701030254364, + "learning_rate": 0.00012339826028509235, + "loss": 2.5577, + "step": 8554 + }, + { + "epoch": 0.6904204664675975, + "grad_norm": 0.7088965773582458, + "learning_rate": 0.0001233829113277354, + "loss": 2.5767, + "step": 8555 + }, + { + "epoch": 0.6905011702041804, + "grad_norm": 0.7115551829338074, + "learning_rate": 0.00012336756178761517, + "loss": 2.5651, + "step": 8556 + }, + { + "epoch": 0.6905818739407634, + "grad_norm": 0.6778836250305176, + "learning_rate": 0.00012335221166511425, + "loss": 2.6388, + "step": 8557 + }, + { + "epoch": 0.6906625776773465, + "grad_norm": 0.6358879804611206, + "learning_rate": 0.00012333686096061515, + "loss": 2.5493, + "step": 8558 + }, + { + "epoch": 0.6907432814139295, + "grad_norm": 0.688197135925293, + "learning_rate": 0.00012332150967450046, + "loss": 2.5707, + "step": 8559 + }, + { + "epoch": 0.6908239851505125, + "grad_norm": 0.6931524872779846, + "learning_rate": 0.0001233061578071528, + "loss": 2.5561, + "step": 8560 + }, + { + "epoch": 0.6909046888870954, + "grad_norm": 0.6684975624084473, + "learning_rate": 0.00012329080535895478, + "loss": 2.6442, + "step": 8561 + }, + { + "epoch": 0.6909853926236785, + "grad_norm": 0.6865811347961426, + "learning_rate": 0.00012327545233028898, + "loss": 2.564, + "step": 8562 + }, + { + "epoch": 0.6910660963602615, + "grad_norm": 0.6999006867408752, + "learning_rate": 0.0001232600987215381, + "loss": 2.5607, + "step": 8563 + }, + { + "epoch": 0.6911468000968445, + "grad_norm": 0.6734526753425598, + "learning_rate": 0.0001232447445330847, + "loss": 2.5261, + "step": 8564 + }, + { + "epoch": 0.6912275038334275, + "grad_norm": 0.7447343468666077, + "learning_rate": 0.00012322938976531153, + "loss": 2.5359, + "step": 8565 + }, + { + "epoch": 0.6913082075700105, + "grad_norm": 0.6498517394065857, + "learning_rate": 0.00012321403441860126, + "loss": 2.5345, + "step": 8566 + }, + { + "epoch": 0.6913889113065935, + "grad_norm": 0.692933976650238, + "learning_rate": 0.00012319867849333658, + "loss": 2.6293, + "step": 8567 + }, + { + "epoch": 0.6914696150431765, + "grad_norm": 0.728430449962616, + "learning_rate": 0.00012318332198990015, + "loss": 2.618, + "step": 8568 + }, + { + "epoch": 0.6915503187797595, + "grad_norm": 0.7029061913490295, + "learning_rate": 0.00012316796490867478, + "loss": 2.6151, + "step": 8569 + }, + { + "epoch": 0.6916310225163425, + "grad_norm": 0.6692330241203308, + "learning_rate": 0.00012315260725004313, + "loss": 2.5511, + "step": 8570 + }, + { + "epoch": 0.6917117262529255, + "grad_norm": 0.6811983585357666, + "learning_rate": 0.000123137249014388, + "loss": 2.6337, + "step": 8571 + }, + { + "epoch": 0.6917924299895085, + "grad_norm": 0.7387441992759705, + "learning_rate": 0.00012312189020209212, + "loss": 2.5679, + "step": 8572 + }, + { + "epoch": 0.6918731337260915, + "grad_norm": 0.7180185914039612, + "learning_rate": 0.0001231065308135383, + "loss": 2.639, + "step": 8573 + }, + { + "epoch": 0.6919538374626745, + "grad_norm": 0.6997829079627991, + "learning_rate": 0.00012309117084910936, + "loss": 2.5392, + "step": 8574 + }, + { + "epoch": 0.6920345411992576, + "grad_norm": 0.7004552483558655, + "learning_rate": 0.00012307581030918807, + "loss": 2.6033, + "step": 8575 + }, + { + "epoch": 0.6921152449358405, + "grad_norm": 0.7183418273925781, + "learning_rate": 0.00012306044919415724, + "loss": 2.6302, + "step": 8576 + }, + { + "epoch": 0.6921959486724235, + "grad_norm": 0.6645712852478027, + "learning_rate": 0.00012304508750439976, + "loss": 2.5401, + "step": 8577 + }, + { + "epoch": 0.6922766524090065, + "grad_norm": 0.6455898284912109, + "learning_rate": 0.00012302972524029848, + "loss": 2.5084, + "step": 8578 + }, + { + "epoch": 0.6923573561455896, + "grad_norm": 0.6933849453926086, + "learning_rate": 0.00012301436240223622, + "loss": 2.5734, + "step": 8579 + }, + { + "epoch": 0.6924380598821726, + "grad_norm": 0.7967655658721924, + "learning_rate": 0.00012299899899059587, + "loss": 2.5721, + "step": 8580 + }, + { + "epoch": 0.6925187636187555, + "grad_norm": 0.706730306148529, + "learning_rate": 0.0001229836350057604, + "loss": 2.6216, + "step": 8581 + }, + { + "epoch": 0.6925994673553385, + "grad_norm": 0.7021105885505676, + "learning_rate": 0.0001229682704481126, + "loss": 2.4877, + "step": 8582 + }, + { + "epoch": 0.6926801710919216, + "grad_norm": 0.7197253108024597, + "learning_rate": 0.00012295290531803553, + "loss": 2.6124, + "step": 8583 + }, + { + "epoch": 0.6927608748285046, + "grad_norm": 0.7559605836868286, + "learning_rate": 0.00012293753961591198, + "loss": 2.6391, + "step": 8584 + }, + { + "epoch": 0.6928415785650875, + "grad_norm": 0.7074676752090454, + "learning_rate": 0.00012292217334212505, + "loss": 2.5949, + "step": 8585 + }, + { + "epoch": 0.6929222823016705, + "grad_norm": 0.6843528747558594, + "learning_rate": 0.00012290680649705763, + "loss": 2.4981, + "step": 8586 + }, + { + "epoch": 0.6930029860382536, + "grad_norm": 0.6853117942810059, + "learning_rate": 0.00012289143908109266, + "loss": 2.6352, + "step": 8587 + }, + { + "epoch": 0.6930836897748366, + "grad_norm": 0.6545630097389221, + "learning_rate": 0.00012287607109461325, + "loss": 2.5344, + "step": 8588 + }, + { + "epoch": 0.6931643935114196, + "grad_norm": 0.7377945184707642, + "learning_rate": 0.00012286070253800233, + "loss": 2.5895, + "step": 8589 + }, + { + "epoch": 0.6932450972480025, + "grad_norm": 0.6919971108436584, + "learning_rate": 0.00012284533341164295, + "loss": 2.5825, + "step": 8590 + }, + { + "epoch": 0.6933258009845856, + "grad_norm": 0.6911910176277161, + "learning_rate": 0.00012282996371591816, + "loss": 2.6008, + "step": 8591 + }, + { + "epoch": 0.6934065047211686, + "grad_norm": 0.7486373782157898, + "learning_rate": 0.00012281459345121095, + "loss": 2.6056, + "step": 8592 + }, + { + "epoch": 0.6934872084577516, + "grad_norm": 0.6829040050506592, + "learning_rate": 0.00012279922261790443, + "loss": 2.5161, + "step": 8593 + }, + { + "epoch": 0.6935679121943346, + "grad_norm": 0.7410104870796204, + "learning_rate": 0.00012278385121638173, + "loss": 2.6114, + "step": 8594 + }, + { + "epoch": 0.6936486159309176, + "grad_norm": 0.7355940937995911, + "learning_rate": 0.00012276847924702587, + "loss": 2.6371, + "step": 8595 + }, + { + "epoch": 0.6937293196675006, + "grad_norm": 0.650641679763794, + "learning_rate": 0.00012275310671022003, + "loss": 2.5568, + "step": 8596 + }, + { + "epoch": 0.6938100234040836, + "grad_norm": 0.661573052406311, + "learning_rate": 0.00012273773360634726, + "loss": 2.5828, + "step": 8597 + }, + { + "epoch": 0.6938907271406666, + "grad_norm": 0.6848435401916504, + "learning_rate": 0.00012272235993579072, + "loss": 2.5226, + "step": 8598 + }, + { + "epoch": 0.6939714308772497, + "grad_norm": 0.7015430927276611, + "learning_rate": 0.0001227069856989336, + "loss": 2.6156, + "step": 8599 + }, + { + "epoch": 0.6940521346138326, + "grad_norm": 0.7058628797531128, + "learning_rate": 0.000122691610896159, + "loss": 2.6007, + "step": 8600 + }, + { + "epoch": 0.6941328383504156, + "grad_norm": 0.6589432954788208, + "learning_rate": 0.0001226762355278502, + "loss": 2.5551, + "step": 8601 + }, + { + "epoch": 0.6942135420869986, + "grad_norm": 0.6875284910202026, + "learning_rate": 0.0001226608595943903, + "loss": 2.5537, + "step": 8602 + }, + { + "epoch": 0.6942942458235817, + "grad_norm": 0.7178356051445007, + "learning_rate": 0.00012264548309616252, + "loss": 2.655, + "step": 8603 + }, + { + "epoch": 0.6943749495601647, + "grad_norm": 0.7327077388763428, + "learning_rate": 0.00012263010603355017, + "loss": 2.5574, + "step": 8604 + }, + { + "epoch": 0.6944556532967476, + "grad_norm": 0.6318337917327881, + "learning_rate": 0.0001226147284069364, + "loss": 2.577, + "step": 8605 + }, + { + "epoch": 0.6945363570333306, + "grad_norm": 0.674872875213623, + "learning_rate": 0.00012259935021670444, + "loss": 2.6225, + "step": 8606 + }, + { + "epoch": 0.6946170607699137, + "grad_norm": 0.6554198861122131, + "learning_rate": 0.0001225839714632376, + "loss": 2.5951, + "step": 8607 + }, + { + "epoch": 0.6946977645064967, + "grad_norm": 0.7086453437805176, + "learning_rate": 0.00012256859214691918, + "loss": 2.622, + "step": 8608 + }, + { + "epoch": 0.6947784682430796, + "grad_norm": 0.6609488129615784, + "learning_rate": 0.00012255321226813245, + "loss": 2.5623, + "step": 8609 + }, + { + "epoch": 0.6948591719796626, + "grad_norm": 0.7504609823226929, + "learning_rate": 0.00012253783182726075, + "loss": 2.5264, + "step": 8610 + }, + { + "epoch": 0.6949398757162457, + "grad_norm": 0.6702934503555298, + "learning_rate": 0.00012252245082468733, + "loss": 2.5877, + "step": 8611 + }, + { + "epoch": 0.6950205794528287, + "grad_norm": 0.7116326689720154, + "learning_rate": 0.00012250706926079553, + "loss": 2.5629, + "step": 8612 + }, + { + "epoch": 0.6951012831894117, + "grad_norm": 0.7495368719100952, + "learning_rate": 0.00012249168713596875, + "loss": 2.5731, + "step": 8613 + }, + { + "epoch": 0.6951819869259946, + "grad_norm": 0.7434844970703125, + "learning_rate": 0.0001224763044505904, + "loss": 2.6008, + "step": 8614 + }, + { + "epoch": 0.6952626906625777, + "grad_norm": 0.719667375087738, + "learning_rate": 0.00012246092120504371, + "loss": 2.6051, + "step": 8615 + }, + { + "epoch": 0.6953433943991607, + "grad_norm": 0.7189086079597473, + "learning_rate": 0.00012244553739971216, + "loss": 2.5662, + "step": 8616 + }, + { + "epoch": 0.6954240981357437, + "grad_norm": 0.7222673892974854, + "learning_rate": 0.00012243015303497917, + "loss": 2.609, + "step": 8617 + }, + { + "epoch": 0.6955048018723267, + "grad_norm": 0.7323142290115356, + "learning_rate": 0.00012241476811122813, + "loss": 2.5458, + "step": 8618 + }, + { + "epoch": 0.6955855056089096, + "grad_norm": 0.7374032735824585, + "learning_rate": 0.00012239938262884246, + "loss": 2.6147, + "step": 8619 + }, + { + "epoch": 0.6956662093454927, + "grad_norm": 0.6707843542098999, + "learning_rate": 0.00012238399658820562, + "loss": 2.6462, + "step": 8620 + }, + { + "epoch": 0.6957469130820757, + "grad_norm": 0.7603243589401245, + "learning_rate": 0.0001223686099897011, + "loss": 2.6295, + "step": 8621 + }, + { + "epoch": 0.6958276168186587, + "grad_norm": 0.6966906785964966, + "learning_rate": 0.00012235322283371232, + "loss": 2.545, + "step": 8622 + }, + { + "epoch": 0.6959083205552417, + "grad_norm": 0.6757891774177551, + "learning_rate": 0.0001223378351206228, + "loss": 2.5548, + "step": 8623 + }, + { + "epoch": 0.6959890242918247, + "grad_norm": 0.6901456713676453, + "learning_rate": 0.00012232244685081605, + "loss": 2.5734, + "step": 8624 + }, + { + "epoch": 0.6960697280284077, + "grad_norm": 0.6942903995513916, + "learning_rate": 0.00012230705802467558, + "loss": 2.5495, + "step": 8625 + }, + { + "epoch": 0.6961504317649907, + "grad_norm": 0.6774815320968628, + "learning_rate": 0.0001222916686425849, + "loss": 2.5076, + "step": 8626 + }, + { + "epoch": 0.6962311355015737, + "grad_norm": 0.8037571310997009, + "learning_rate": 0.00012227627870492754, + "loss": 2.6737, + "step": 8627 + }, + { + "epoch": 0.6963118392381568, + "grad_norm": 0.7027560472488403, + "learning_rate": 0.0001222608882120871, + "loss": 2.5401, + "step": 8628 + }, + { + "epoch": 0.6963925429747397, + "grad_norm": 0.6651299595832825, + "learning_rate": 0.00012224549716444714, + "loss": 2.5835, + "step": 8629 + }, + { + "epoch": 0.6964732467113227, + "grad_norm": 0.7082433104515076, + "learning_rate": 0.00012223010556239124, + "loss": 2.5622, + "step": 8630 + }, + { + "epoch": 0.6965539504479057, + "grad_norm": 0.7993464469909668, + "learning_rate": 0.00012221471340630305, + "loss": 2.655, + "step": 8631 + }, + { + "epoch": 0.6966346541844888, + "grad_norm": 0.7375298142433167, + "learning_rate": 0.00012219932069656606, + "loss": 2.598, + "step": 8632 + }, + { + "epoch": 0.6967153579210718, + "grad_norm": 0.6915456652641296, + "learning_rate": 0.00012218392743356397, + "loss": 2.5649, + "step": 8633 + }, + { + "epoch": 0.6967960616576547, + "grad_norm": 0.679256021976471, + "learning_rate": 0.00012216853361768045, + "loss": 2.545, + "step": 8634 + }, + { + "epoch": 0.6968767653942377, + "grad_norm": 0.7234694361686707, + "learning_rate": 0.0001221531392492991, + "loss": 2.5863, + "step": 8635 + }, + { + "epoch": 0.6969574691308208, + "grad_norm": 0.7053319811820984, + "learning_rate": 0.00012213774432880364, + "loss": 2.5829, + "step": 8636 + }, + { + "epoch": 0.6970381728674038, + "grad_norm": 0.7584449648857117, + "learning_rate": 0.00012212234885657772, + "loss": 2.5855, + "step": 8637 + }, + { + "epoch": 0.6971188766039867, + "grad_norm": 0.7098579406738281, + "learning_rate": 0.00012210695283300501, + "loss": 2.6057, + "step": 8638 + }, + { + "epoch": 0.6971995803405697, + "grad_norm": 0.7350205779075623, + "learning_rate": 0.00012209155625846928, + "loss": 2.546, + "step": 8639 + }, + { + "epoch": 0.6972802840771528, + "grad_norm": 0.6842331290245056, + "learning_rate": 0.0001220761591333542, + "loss": 2.5602, + "step": 8640 + }, + { + "epoch": 0.6973609878137358, + "grad_norm": 0.6731252074241638, + "learning_rate": 0.00012206076145804354, + "loss": 2.4676, + "step": 8641 + }, + { + "epoch": 0.6974416915503188, + "grad_norm": 0.7271167635917664, + "learning_rate": 0.00012204536323292104, + "loss": 2.5605, + "step": 8642 + }, + { + "epoch": 0.6975223952869017, + "grad_norm": 0.6860780715942383, + "learning_rate": 0.00012202996445837043, + "loss": 2.5041, + "step": 8643 + }, + { + "epoch": 0.6976030990234848, + "grad_norm": 0.7134578824043274, + "learning_rate": 0.00012201456513477554, + "loss": 2.614, + "step": 8644 + }, + { + "epoch": 0.6976838027600678, + "grad_norm": 0.6995248198509216, + "learning_rate": 0.00012199916526252014, + "loss": 2.5087, + "step": 8645 + }, + { + "epoch": 0.6977645064966508, + "grad_norm": 0.7280197143554688, + "learning_rate": 0.00012198376484198803, + "loss": 2.5723, + "step": 8646 + }, + { + "epoch": 0.6978452102332338, + "grad_norm": 0.6898967623710632, + "learning_rate": 0.00012196836387356306, + "loss": 2.6073, + "step": 8647 + }, + { + "epoch": 0.6979259139698168, + "grad_norm": 0.6670758128166199, + "learning_rate": 0.00012195296235762901, + "loss": 2.5276, + "step": 8648 + }, + { + "epoch": 0.6980066177063998, + "grad_norm": 0.6862780451774597, + "learning_rate": 0.00012193756029456973, + "loss": 2.5363, + "step": 8649 + }, + { + "epoch": 0.6980873214429828, + "grad_norm": 0.6568876504898071, + "learning_rate": 0.00012192215768476916, + "loss": 2.5828, + "step": 8650 + }, + { + "epoch": 0.6981680251795658, + "grad_norm": 0.7237746119499207, + "learning_rate": 0.00012190675452861107, + "loss": 2.6076, + "step": 8651 + }, + { + "epoch": 0.6982487289161489, + "grad_norm": 0.6831536293029785, + "learning_rate": 0.00012189135082647943, + "loss": 2.5199, + "step": 8652 + }, + { + "epoch": 0.6983294326527318, + "grad_norm": 0.6767029166221619, + "learning_rate": 0.00012187594657875805, + "loss": 2.5859, + "step": 8653 + }, + { + "epoch": 0.6984101363893148, + "grad_norm": 0.6977167129516602, + "learning_rate": 0.00012186054178583092, + "loss": 2.5831, + "step": 8654 + }, + { + "epoch": 0.6984908401258978, + "grad_norm": 0.6369525194168091, + "learning_rate": 0.00012184513644808197, + "loss": 2.5839, + "step": 8655 + }, + { + "epoch": 0.6985715438624809, + "grad_norm": 0.6814634203910828, + "learning_rate": 0.00012182973056589508, + "loss": 2.5493, + "step": 8656 + }, + { + "epoch": 0.6986522475990639, + "grad_norm": 0.6895000338554382, + "learning_rate": 0.00012181432413965428, + "loss": 2.5616, + "step": 8657 + }, + { + "epoch": 0.6987329513356468, + "grad_norm": 0.6689717769622803, + "learning_rate": 0.00012179891716974345, + "loss": 2.5481, + "step": 8658 + }, + { + "epoch": 0.6988136550722298, + "grad_norm": 0.6945160031318665, + "learning_rate": 0.00012178350965654666, + "loss": 2.5781, + "step": 8659 + }, + { + "epoch": 0.6988943588088129, + "grad_norm": 0.7226110696792603, + "learning_rate": 0.00012176810160044785, + "loss": 2.5767, + "step": 8660 + }, + { + "epoch": 0.6989750625453959, + "grad_norm": 0.6810569167137146, + "learning_rate": 0.00012175269300183105, + "loss": 2.5184, + "step": 8661 + }, + { + "epoch": 0.6990557662819789, + "grad_norm": 0.727281928062439, + "learning_rate": 0.0001217372838610803, + "loss": 2.5972, + "step": 8662 + }, + { + "epoch": 0.6991364700185618, + "grad_norm": 0.7111573219299316, + "learning_rate": 0.00012172187417857959, + "loss": 2.6445, + "step": 8663 + }, + { + "epoch": 0.6992171737551449, + "grad_norm": 0.6808965802192688, + "learning_rate": 0.00012170646395471296, + "loss": 2.5191, + "step": 8664 + }, + { + "epoch": 0.6992978774917279, + "grad_norm": 0.7063688635826111, + "learning_rate": 0.00012169105318986455, + "loss": 2.6021, + "step": 8665 + }, + { + "epoch": 0.6993785812283109, + "grad_norm": 0.6522886753082275, + "learning_rate": 0.0001216756418844184, + "loss": 2.5697, + "step": 8666 + }, + { + "epoch": 0.6994592849648938, + "grad_norm": 0.6706095337867737, + "learning_rate": 0.00012166023003875859, + "loss": 2.5706, + "step": 8667 + }, + { + "epoch": 0.6995399887014769, + "grad_norm": 0.6744416356086731, + "learning_rate": 0.00012164481765326923, + "loss": 2.5713, + "step": 8668 + }, + { + "epoch": 0.6996206924380599, + "grad_norm": 0.7385411858558655, + "learning_rate": 0.0001216294047283344, + "loss": 2.5543, + "step": 8669 + }, + { + "epoch": 0.6997013961746429, + "grad_norm": 0.7286678552627563, + "learning_rate": 0.0001216139912643383, + "loss": 2.588, + "step": 8670 + }, + { + "epoch": 0.6997820999112259, + "grad_norm": 0.7065937519073486, + "learning_rate": 0.00012159857726166503, + "loss": 2.5475, + "step": 8671 + }, + { + "epoch": 0.6998628036478088, + "grad_norm": 0.6609788537025452, + "learning_rate": 0.00012158316272069874, + "loss": 2.5664, + "step": 8672 + }, + { + "epoch": 0.6999435073843919, + "grad_norm": 0.7360579371452332, + "learning_rate": 0.00012156774764182364, + "loss": 2.5822, + "step": 8673 + }, + { + "epoch": 0.7000242111209749, + "grad_norm": 0.6265058517456055, + "learning_rate": 0.00012155233202542384, + "loss": 2.5849, + "step": 8674 + }, + { + "epoch": 0.7001049148575579, + "grad_norm": 0.646976888179779, + "learning_rate": 0.00012153691587188363, + "loss": 2.5839, + "step": 8675 + }, + { + "epoch": 0.7001856185941409, + "grad_norm": 0.6634985208511353, + "learning_rate": 0.0001215214991815872, + "loss": 2.5434, + "step": 8676 + }, + { + "epoch": 0.700266322330724, + "grad_norm": 0.6757560968399048, + "learning_rate": 0.00012150608195491871, + "loss": 2.6186, + "step": 8677 + }, + { + "epoch": 0.7003470260673069, + "grad_norm": 0.7077112197875977, + "learning_rate": 0.00012149066419226247, + "loss": 2.5757, + "step": 8678 + }, + { + "epoch": 0.7004277298038899, + "grad_norm": 0.698226273059845, + "learning_rate": 0.00012147524589400268, + "loss": 2.5307, + "step": 8679 + }, + { + "epoch": 0.7005084335404729, + "grad_norm": 0.6782405376434326, + "learning_rate": 0.00012145982706052361, + "loss": 2.5582, + "step": 8680 + }, + { + "epoch": 0.700589137277056, + "grad_norm": 0.6832882165908813, + "learning_rate": 0.0001214444076922096, + "loss": 2.574, + "step": 8681 + }, + { + "epoch": 0.7006698410136389, + "grad_norm": 0.7182612419128418, + "learning_rate": 0.00012142898778944485, + "loss": 2.6457, + "step": 8682 + }, + { + "epoch": 0.7007505447502219, + "grad_norm": 0.7043644785881042, + "learning_rate": 0.00012141356735261373, + "loss": 2.5244, + "step": 8683 + }, + { + "epoch": 0.7008312484868049, + "grad_norm": 0.6942669749259949, + "learning_rate": 0.00012139814638210054, + "loss": 2.5507, + "step": 8684 + }, + { + "epoch": 0.700911952223388, + "grad_norm": 0.8412066102027893, + "learning_rate": 0.00012138272487828959, + "loss": 2.6025, + "step": 8685 + }, + { + "epoch": 0.700992655959971, + "grad_norm": 0.6906788945198059, + "learning_rate": 0.00012136730284156525, + "loss": 2.5259, + "step": 8686 + }, + { + "epoch": 0.7010733596965539, + "grad_norm": 0.7258631587028503, + "learning_rate": 0.00012135188027231188, + "loss": 2.6311, + "step": 8687 + }, + { + "epoch": 0.7011540634331369, + "grad_norm": 0.6294744610786438, + "learning_rate": 0.00012133645717091382, + "loss": 2.5969, + "step": 8688 + }, + { + "epoch": 0.70123476716972, + "grad_norm": 0.6994131207466125, + "learning_rate": 0.00012132103353775548, + "loss": 2.5954, + "step": 8689 + }, + { + "epoch": 0.701315470906303, + "grad_norm": 0.671441912651062, + "learning_rate": 0.00012130560937322124, + "loss": 2.5628, + "step": 8690 + }, + { + "epoch": 0.701396174642886, + "grad_norm": 0.6915482878684998, + "learning_rate": 0.00012129018467769555, + "loss": 2.5173, + "step": 8691 + }, + { + "epoch": 0.7014768783794689, + "grad_norm": 0.6810318231582642, + "learning_rate": 0.00012127475945156279, + "loss": 2.6186, + "step": 8692 + }, + { + "epoch": 0.701557582116052, + "grad_norm": 0.7931910157203674, + "learning_rate": 0.00012125933369520741, + "loss": 2.6243, + "step": 8693 + }, + { + "epoch": 0.701638285852635, + "grad_norm": 0.6843162178993225, + "learning_rate": 0.00012124390740901386, + "loss": 2.6072, + "step": 8694 + }, + { + "epoch": 0.701718989589218, + "grad_norm": 0.672115683555603, + "learning_rate": 0.0001212284805933666, + "loss": 2.6027, + "step": 8695 + }, + { + "epoch": 0.7017996933258009, + "grad_norm": 0.65242600440979, + "learning_rate": 0.00012121305324865014, + "loss": 2.5128, + "step": 8696 + }, + { + "epoch": 0.701880397062384, + "grad_norm": 0.7253173589706421, + "learning_rate": 0.00012119762537524893, + "loss": 2.5776, + "step": 8697 + }, + { + "epoch": 0.701961100798967, + "grad_norm": 0.6536431312561035, + "learning_rate": 0.00012118219697354745, + "loss": 2.5656, + "step": 8698 + }, + { + "epoch": 0.70204180453555, + "grad_norm": 0.7121500372886658, + "learning_rate": 0.00012116676804393028, + "loss": 2.5878, + "step": 8699 + }, + { + "epoch": 0.702122508272133, + "grad_norm": 0.676449716091156, + "learning_rate": 0.00012115133858678191, + "loss": 2.6624, + "step": 8700 + }, + { + "epoch": 0.702203212008716, + "grad_norm": 0.7230382561683655, + "learning_rate": 0.0001211359086024869, + "loss": 2.5461, + "step": 8701 + }, + { + "epoch": 0.702283915745299, + "grad_norm": 0.6679937839508057, + "learning_rate": 0.00012112047809142979, + "loss": 2.5568, + "step": 8702 + }, + { + "epoch": 0.702364619481882, + "grad_norm": 0.6627704501152039, + "learning_rate": 0.0001211050470539952, + "loss": 2.4819, + "step": 8703 + }, + { + "epoch": 0.702445323218465, + "grad_norm": 0.6680646538734436, + "learning_rate": 0.0001210896154905676, + "loss": 2.5722, + "step": 8704 + }, + { + "epoch": 0.7025260269550481, + "grad_norm": 0.7406336665153503, + "learning_rate": 0.00012107418340153167, + "loss": 2.5722, + "step": 8705 + }, + { + "epoch": 0.702606730691631, + "grad_norm": 0.6634557247161865, + "learning_rate": 0.00012105875078727203, + "loss": 2.5747, + "step": 8706 + }, + { + "epoch": 0.702687434428214, + "grad_norm": 0.6521568894386292, + "learning_rate": 0.00012104331764817325, + "loss": 2.555, + "step": 8707 + }, + { + "epoch": 0.702768138164797, + "grad_norm": 0.677606463432312, + "learning_rate": 0.00012102788398461999, + "loss": 2.5544, + "step": 8708 + }, + { + "epoch": 0.7028488419013801, + "grad_norm": 0.6593700051307678, + "learning_rate": 0.0001210124497969969, + "loss": 2.5252, + "step": 8709 + }, + { + "epoch": 0.7029295456379631, + "grad_norm": 0.686903715133667, + "learning_rate": 0.00012099701508568863, + "loss": 2.6513, + "step": 8710 + }, + { + "epoch": 0.703010249374546, + "grad_norm": 0.6395620107650757, + "learning_rate": 0.00012098157985107987, + "loss": 2.5169, + "step": 8711 + }, + { + "epoch": 0.703090953111129, + "grad_norm": 0.7387555837631226, + "learning_rate": 0.00012096614409355526, + "loss": 2.5741, + "step": 8712 + }, + { + "epoch": 0.7031716568477121, + "grad_norm": 0.665900707244873, + "learning_rate": 0.00012095070781349957, + "loss": 2.5068, + "step": 8713 + }, + { + "epoch": 0.7032523605842951, + "grad_norm": 0.6983458399772644, + "learning_rate": 0.00012093527101129745, + "loss": 2.5028, + "step": 8714 + }, + { + "epoch": 0.703333064320878, + "grad_norm": 0.6250826120376587, + "learning_rate": 0.00012091983368733366, + "loss": 2.5765, + "step": 8715 + }, + { + "epoch": 0.703413768057461, + "grad_norm": 0.7031501531600952, + "learning_rate": 0.00012090439584199294, + "loss": 2.5885, + "step": 8716 + }, + { + "epoch": 0.7034944717940441, + "grad_norm": 0.7140926122665405, + "learning_rate": 0.00012088895747566002, + "loss": 2.6278, + "step": 8717 + }, + { + "epoch": 0.7035751755306271, + "grad_norm": 0.6753602027893066, + "learning_rate": 0.00012087351858871969, + "loss": 2.5664, + "step": 8718 + }, + { + "epoch": 0.7036558792672101, + "grad_norm": 0.7150039076805115, + "learning_rate": 0.0001208580791815567, + "loss": 2.6739, + "step": 8719 + }, + { + "epoch": 0.703736583003793, + "grad_norm": 0.7120389342308044, + "learning_rate": 0.00012084263925455583, + "loss": 2.565, + "step": 8720 + }, + { + "epoch": 0.703817286740376, + "grad_norm": 0.7775784134864807, + "learning_rate": 0.00012082719880810194, + "loss": 2.5861, + "step": 8721 + }, + { + "epoch": 0.7038979904769591, + "grad_norm": 0.6704322695732117, + "learning_rate": 0.0001208117578425798, + "loss": 2.5957, + "step": 8722 + }, + { + "epoch": 0.7039786942135421, + "grad_norm": 0.6761276721954346, + "learning_rate": 0.00012079631635837426, + "loss": 2.5472, + "step": 8723 + }, + { + "epoch": 0.7040593979501251, + "grad_norm": 0.7639868855476379, + "learning_rate": 0.00012078087435587016, + "loss": 2.6053, + "step": 8724 + }, + { + "epoch": 0.704140101686708, + "grad_norm": 0.7490074038505554, + "learning_rate": 0.0001207654318354523, + "loss": 2.5517, + "step": 8725 + }, + { + "epoch": 0.7042208054232911, + "grad_norm": 0.7068852782249451, + "learning_rate": 0.00012074998879750566, + "loss": 2.5357, + "step": 8726 + }, + { + "epoch": 0.7043015091598741, + "grad_norm": 0.7273775935173035, + "learning_rate": 0.00012073454524241503, + "loss": 2.6028, + "step": 8727 + }, + { + "epoch": 0.7043822128964571, + "grad_norm": 0.7146363258361816, + "learning_rate": 0.00012071910117056533, + "loss": 2.5982, + "step": 8728 + }, + { + "epoch": 0.7044629166330401, + "grad_norm": 0.7631390690803528, + "learning_rate": 0.00012070365658234149, + "loss": 2.6021, + "step": 8729 + }, + { + "epoch": 0.7045436203696231, + "grad_norm": 0.7065283060073853, + "learning_rate": 0.00012068821147812839, + "loss": 2.5538, + "step": 8730 + }, + { + "epoch": 0.7046243241062061, + "grad_norm": 0.7914319634437561, + "learning_rate": 0.00012067276585831097, + "loss": 2.5617, + "step": 8731 + }, + { + "epoch": 0.7047050278427891, + "grad_norm": 0.7036565542221069, + "learning_rate": 0.0001206573197232742, + "loss": 2.5354, + "step": 8732 + }, + { + "epoch": 0.7047857315793721, + "grad_norm": 0.657116711139679, + "learning_rate": 0.00012064187307340303, + "loss": 2.5084, + "step": 8733 + }, + { + "epoch": 0.7048664353159552, + "grad_norm": 0.7246817946434021, + "learning_rate": 0.00012062642590908242, + "loss": 2.5737, + "step": 8734 + }, + { + "epoch": 0.7049471390525381, + "grad_norm": 0.6895857453346252, + "learning_rate": 0.00012061097823069736, + "loss": 2.5792, + "step": 8735 + }, + { + "epoch": 0.7050278427891211, + "grad_norm": 0.7654988169670105, + "learning_rate": 0.00012059553003863282, + "loss": 2.5302, + "step": 8736 + }, + { + "epoch": 0.7051085465257041, + "grad_norm": 0.7611668109893799, + "learning_rate": 0.00012058008133327387, + "loss": 2.6073, + "step": 8737 + }, + { + "epoch": 0.7051892502622872, + "grad_norm": 0.728729784488678, + "learning_rate": 0.00012056463211500546, + "loss": 2.5714, + "step": 8738 + }, + { + "epoch": 0.7052699539988702, + "grad_norm": 0.7251634001731873, + "learning_rate": 0.00012054918238421271, + "loss": 2.627, + "step": 8739 + }, + { + "epoch": 0.7053506577354531, + "grad_norm": 0.827745795249939, + "learning_rate": 0.00012053373214128056, + "loss": 2.6303, + "step": 8740 + }, + { + "epoch": 0.7054313614720361, + "grad_norm": 0.6837510466575623, + "learning_rate": 0.00012051828138659416, + "loss": 2.5837, + "step": 8741 + }, + { + "epoch": 0.7055120652086192, + "grad_norm": 0.6763553619384766, + "learning_rate": 0.00012050283012053856, + "loss": 2.575, + "step": 8742 + }, + { + "epoch": 0.7055927689452022, + "grad_norm": 0.6779605150222778, + "learning_rate": 0.00012048737834349886, + "loss": 2.588, + "step": 8743 + }, + { + "epoch": 0.7056734726817852, + "grad_norm": 0.7207251191139221, + "learning_rate": 0.00012047192605586008, + "loss": 2.6182, + "step": 8744 + }, + { + "epoch": 0.7057541764183681, + "grad_norm": 0.6681165099143982, + "learning_rate": 0.00012045647325800742, + "loss": 2.5595, + "step": 8745 + }, + { + "epoch": 0.7058348801549512, + "grad_norm": 0.7520970702171326, + "learning_rate": 0.00012044101995032594, + "loss": 2.6306, + "step": 8746 + }, + { + "epoch": 0.7059155838915342, + "grad_norm": 0.7148429155349731, + "learning_rate": 0.00012042556613320087, + "loss": 2.5749, + "step": 8747 + }, + { + "epoch": 0.7059962876281172, + "grad_norm": 0.619369626045227, + "learning_rate": 0.00012041011180701729, + "loss": 2.5382, + "step": 8748 + }, + { + "epoch": 0.7060769913647001, + "grad_norm": 0.7450816035270691, + "learning_rate": 0.00012039465697216032, + "loss": 2.5547, + "step": 8749 + }, + { + "epoch": 0.7061576951012832, + "grad_norm": 0.7324537634849548, + "learning_rate": 0.00012037920162901521, + "loss": 2.5756, + "step": 8750 + }, + { + "epoch": 0.7062383988378662, + "grad_norm": 0.7881754636764526, + "learning_rate": 0.00012036374577796715, + "loss": 2.6376, + "step": 8751 + }, + { + "epoch": 0.7063191025744492, + "grad_norm": 0.7095965147018433, + "learning_rate": 0.00012034828941940128, + "loss": 2.5454, + "step": 8752 + }, + { + "epoch": 0.7063998063110322, + "grad_norm": 0.7142949104309082, + "learning_rate": 0.00012033283255370287, + "loss": 2.5738, + "step": 8753 + }, + { + "epoch": 0.7064805100476153, + "grad_norm": 0.6592378616333008, + "learning_rate": 0.0001203173751812571, + "loss": 2.5473, + "step": 8754 + }, + { + "epoch": 0.7065612137841982, + "grad_norm": 0.6964332461357117, + "learning_rate": 0.00012030191730244926, + "loss": 2.5829, + "step": 8755 + }, + { + "epoch": 0.7066419175207812, + "grad_norm": 0.707539975643158, + "learning_rate": 0.00012028645891766455, + "loss": 2.5652, + "step": 8756 + }, + { + "epoch": 0.7067226212573642, + "grad_norm": 0.6991387009620667, + "learning_rate": 0.00012027100002728824, + "loss": 2.5874, + "step": 8757 + }, + { + "epoch": 0.7068033249939473, + "grad_norm": 0.665746808052063, + "learning_rate": 0.00012025554063170566, + "loss": 2.5163, + "step": 8758 + }, + { + "epoch": 0.7068840287305302, + "grad_norm": 0.696130096912384, + "learning_rate": 0.00012024008073130204, + "loss": 2.5748, + "step": 8759 + }, + { + "epoch": 0.7069647324671132, + "grad_norm": 0.698885440826416, + "learning_rate": 0.00012022462032646269, + "loss": 2.5561, + "step": 8760 + }, + { + "epoch": 0.7070454362036962, + "grad_norm": 0.7052211761474609, + "learning_rate": 0.00012020915941757292, + "loss": 2.5979, + "step": 8761 + }, + { + "epoch": 0.7071261399402793, + "grad_norm": 0.7370811104774475, + "learning_rate": 0.00012019369800501808, + "loss": 2.5623, + "step": 8762 + }, + { + "epoch": 0.7072068436768623, + "grad_norm": 0.6699148416519165, + "learning_rate": 0.00012017823608918352, + "loss": 2.5816, + "step": 8763 + }, + { + "epoch": 0.7072875474134452, + "grad_norm": 0.6712930798530579, + "learning_rate": 0.00012016277367045457, + "loss": 2.5495, + "step": 8764 + }, + { + "epoch": 0.7073682511500282, + "grad_norm": 0.7238204479217529, + "learning_rate": 0.00012014731074921659, + "loss": 2.5936, + "step": 8765 + }, + { + "epoch": 0.7074489548866113, + "grad_norm": 0.7303668856620789, + "learning_rate": 0.00012013184732585494, + "loss": 2.6366, + "step": 8766 + }, + { + "epoch": 0.7075296586231943, + "grad_norm": 0.6883132457733154, + "learning_rate": 0.00012011638340075505, + "loss": 2.534, + "step": 8767 + }, + { + "epoch": 0.7076103623597773, + "grad_norm": 0.7057133316993713, + "learning_rate": 0.00012010091897430229, + "loss": 2.6035, + "step": 8768 + }, + { + "epoch": 0.7076910660963602, + "grad_norm": 0.7069352269172668, + "learning_rate": 0.0001200854540468821, + "loss": 2.5047, + "step": 8769 + }, + { + "epoch": 0.7077717698329433, + "grad_norm": 0.7192478775978088, + "learning_rate": 0.00012006998861887985, + "loss": 2.5698, + "step": 8770 + }, + { + "epoch": 0.7078524735695263, + "grad_norm": 0.6992887854576111, + "learning_rate": 0.00012005452269068107, + "loss": 2.5631, + "step": 8771 + }, + { + "epoch": 0.7079331773061093, + "grad_norm": 0.676154613494873, + "learning_rate": 0.00012003905626267114, + "loss": 2.5255, + "step": 8772 + }, + { + "epoch": 0.7080138810426923, + "grad_norm": 0.672269880771637, + "learning_rate": 0.00012002358933523555, + "loss": 2.5766, + "step": 8773 + }, + { + "epoch": 0.7080945847792752, + "grad_norm": 0.7334566712379456, + "learning_rate": 0.00012000812190875976, + "loss": 2.6068, + "step": 8774 + }, + { + "epoch": 0.7081752885158583, + "grad_norm": 0.6599388122558594, + "learning_rate": 0.00011999265398362931, + "loss": 2.6032, + "step": 8775 + }, + { + "epoch": 0.7082559922524413, + "grad_norm": 0.7158498167991638, + "learning_rate": 0.00011997718556022958, + "loss": 2.599, + "step": 8776 + }, + { + "epoch": 0.7083366959890243, + "grad_norm": 0.7470360994338989, + "learning_rate": 0.00011996171663894624, + "loss": 2.58, + "step": 8777 + }, + { + "epoch": 0.7084173997256072, + "grad_norm": 0.6251266002655029, + "learning_rate": 0.00011994624722016472, + "loss": 2.5996, + "step": 8778 + }, + { + "epoch": 0.7084981034621903, + "grad_norm": 0.6649689078330994, + "learning_rate": 0.00011993077730427058, + "loss": 2.6025, + "step": 8779 + }, + { + "epoch": 0.7085788071987733, + "grad_norm": 0.7554693818092346, + "learning_rate": 0.00011991530689164939, + "loss": 2.6207, + "step": 8780 + }, + { + "epoch": 0.7086595109353563, + "grad_norm": 0.7941430807113647, + "learning_rate": 0.00011989983598268661, + "loss": 2.584, + "step": 8781 + }, + { + "epoch": 0.7087402146719393, + "grad_norm": 0.7257998585700989, + "learning_rate": 0.00011988436457776799, + "loss": 2.6152, + "step": 8782 + }, + { + "epoch": 0.7088209184085223, + "grad_norm": 0.716354489326477, + "learning_rate": 0.00011986889267727899, + "loss": 2.585, + "step": 8783 + }, + { + "epoch": 0.7089016221451053, + "grad_norm": 0.7094400525093079, + "learning_rate": 0.00011985342028160525, + "loss": 2.5759, + "step": 8784 + }, + { + "epoch": 0.7089823258816883, + "grad_norm": 0.7211421728134155, + "learning_rate": 0.0001198379473911324, + "loss": 2.5645, + "step": 8785 + }, + { + "epoch": 0.7090630296182713, + "grad_norm": 0.7166693806648254, + "learning_rate": 0.000119822474006246, + "loss": 2.5357, + "step": 8786 + }, + { + "epoch": 0.7091437333548544, + "grad_norm": 0.6702254414558411, + "learning_rate": 0.00011980700012733175, + "loss": 2.5353, + "step": 8787 + }, + { + "epoch": 0.7092244370914373, + "grad_norm": 0.6784049868583679, + "learning_rate": 0.0001197915257547753, + "loss": 2.4942, + "step": 8788 + }, + { + "epoch": 0.7093051408280203, + "grad_norm": 0.6914299726486206, + "learning_rate": 0.00011977605088896226, + "loss": 2.5682, + "step": 8789 + }, + { + "epoch": 0.7093858445646033, + "grad_norm": 0.7324358820915222, + "learning_rate": 0.00011976057553027837, + "loss": 2.564, + "step": 8790 + }, + { + "epoch": 0.7094665483011864, + "grad_norm": 0.6927928924560547, + "learning_rate": 0.00011974509967910927, + "loss": 2.5728, + "step": 8791 + }, + { + "epoch": 0.7095472520377694, + "grad_norm": 0.6795603036880493, + "learning_rate": 0.00011972962333584066, + "loss": 2.588, + "step": 8792 + }, + { + "epoch": 0.7096279557743523, + "grad_norm": 0.7132226228713989, + "learning_rate": 0.00011971414650085828, + "loss": 2.5759, + "step": 8793 + }, + { + "epoch": 0.7097086595109353, + "grad_norm": 0.737195611000061, + "learning_rate": 0.00011969866917454782, + "loss": 2.5721, + "step": 8794 + }, + { + "epoch": 0.7097893632475184, + "grad_norm": 0.6776021718978882, + "learning_rate": 0.00011968319135729507, + "loss": 2.5794, + "step": 8795 + }, + { + "epoch": 0.7098700669841014, + "grad_norm": 0.7113735675811768, + "learning_rate": 0.0001196677130494857, + "loss": 2.5595, + "step": 8796 + }, + { + "epoch": 0.7099507707206844, + "grad_norm": 0.6277747750282288, + "learning_rate": 0.0001196522342515055, + "loss": 2.5003, + "step": 8797 + }, + { + "epoch": 0.7100314744572673, + "grad_norm": 0.6982879042625427, + "learning_rate": 0.00011963675496374028, + "loss": 2.542, + "step": 8798 + }, + { + "epoch": 0.7101121781938504, + "grad_norm": 0.7019705176353455, + "learning_rate": 0.00011962127518657578, + "loss": 2.5723, + "step": 8799 + }, + { + "epoch": 0.7101928819304334, + "grad_norm": 0.6831088662147522, + "learning_rate": 0.00011960579492039783, + "loss": 2.5676, + "step": 8800 + }, + { + "epoch": 0.7102735856670164, + "grad_norm": 0.6744031310081482, + "learning_rate": 0.0001195903141655922, + "loss": 2.58, + "step": 8801 + }, + { + "epoch": 0.7103542894035993, + "grad_norm": 0.6873177289962769, + "learning_rate": 0.00011957483292254473, + "loss": 2.6289, + "step": 8802 + }, + { + "epoch": 0.7104349931401824, + "grad_norm": 0.6340685486793518, + "learning_rate": 0.00011955935119164125, + "loss": 2.5688, + "step": 8803 + }, + { + "epoch": 0.7105156968767654, + "grad_norm": 0.7147708535194397, + "learning_rate": 0.00011954386897326764, + "loss": 2.5471, + "step": 8804 + }, + { + "epoch": 0.7105964006133484, + "grad_norm": 0.699605405330658, + "learning_rate": 0.00011952838626780971, + "loss": 2.6122, + "step": 8805 + }, + { + "epoch": 0.7106771043499314, + "grad_norm": 0.6685385704040527, + "learning_rate": 0.00011951290307565335, + "loss": 2.5423, + "step": 8806 + }, + { + "epoch": 0.7107578080865145, + "grad_norm": 0.6884726881980896, + "learning_rate": 0.00011949741939718439, + "loss": 2.5243, + "step": 8807 + }, + { + "epoch": 0.7108385118230974, + "grad_norm": 0.6991142630577087, + "learning_rate": 0.00011948193523278884, + "loss": 2.6271, + "step": 8808 + }, + { + "epoch": 0.7109192155596804, + "grad_norm": 0.6964353919029236, + "learning_rate": 0.00011946645058285253, + "loss": 2.6296, + "step": 8809 + }, + { + "epoch": 0.7109999192962634, + "grad_norm": 0.7592040300369263, + "learning_rate": 0.00011945096544776136, + "loss": 2.6601, + "step": 8810 + }, + { + "epoch": 0.7110806230328465, + "grad_norm": 0.7146934866905212, + "learning_rate": 0.00011943547982790131, + "loss": 2.54, + "step": 8811 + }, + { + "epoch": 0.7111613267694294, + "grad_norm": 0.6991123557090759, + "learning_rate": 0.00011941999372365827, + "loss": 2.5978, + "step": 8812 + }, + { + "epoch": 0.7112420305060124, + "grad_norm": 0.6835920810699463, + "learning_rate": 0.00011940450713541822, + "loss": 2.6096, + "step": 8813 + }, + { + "epoch": 0.7113227342425954, + "grad_norm": 0.6913917660713196, + "learning_rate": 0.00011938902006356716, + "loss": 2.5624, + "step": 8814 + }, + { + "epoch": 0.7114034379791785, + "grad_norm": 0.6620622873306274, + "learning_rate": 0.00011937353250849102, + "loss": 2.6211, + "step": 8815 + }, + { + "epoch": 0.7114841417157615, + "grad_norm": 0.6738792061805725, + "learning_rate": 0.00011935804447057581, + "loss": 2.5889, + "step": 8816 + }, + { + "epoch": 0.7115648454523444, + "grad_norm": 0.7101936936378479, + "learning_rate": 0.00011934255595020751, + "loss": 2.5846, + "step": 8817 + }, + { + "epoch": 0.7116455491889274, + "grad_norm": 0.6843911409378052, + "learning_rate": 0.00011932706694777216, + "loss": 2.5757, + "step": 8818 + }, + { + "epoch": 0.7117262529255105, + "grad_norm": 0.7217971086502075, + "learning_rate": 0.0001193115774636558, + "loss": 2.6174, + "step": 8819 + }, + { + "epoch": 0.7118069566620935, + "grad_norm": 0.6706245541572571, + "learning_rate": 0.00011929608749824445, + "loss": 2.5893, + "step": 8820 + }, + { + "epoch": 0.7118876603986765, + "grad_norm": 0.7057672739028931, + "learning_rate": 0.00011928059705192413, + "loss": 2.5426, + "step": 8821 + }, + { + "epoch": 0.7119683641352594, + "grad_norm": 0.7354697585105896, + "learning_rate": 0.00011926510612508095, + "loss": 2.5741, + "step": 8822 + }, + { + "epoch": 0.7120490678718424, + "grad_norm": 0.6618186235427856, + "learning_rate": 0.00011924961471810096, + "loss": 2.6007, + "step": 8823 + }, + { + "epoch": 0.7121297716084255, + "grad_norm": 0.6733995676040649, + "learning_rate": 0.00011923412283137028, + "loss": 2.5739, + "step": 8824 + }, + { + "epoch": 0.7122104753450085, + "grad_norm": 0.7324833869934082, + "learning_rate": 0.00011921863046527497, + "loss": 2.5461, + "step": 8825 + }, + { + "epoch": 0.7122911790815915, + "grad_norm": 0.6753048896789551, + "learning_rate": 0.00011920313762020113, + "loss": 2.5066, + "step": 8826 + }, + { + "epoch": 0.7123718828181744, + "grad_norm": 0.7861250638961792, + "learning_rate": 0.00011918764429653489, + "loss": 2.5229, + "step": 8827 + }, + { + "epoch": 0.7124525865547575, + "grad_norm": 0.7037342190742493, + "learning_rate": 0.00011917215049466244, + "loss": 2.5443, + "step": 8828 + }, + { + "epoch": 0.7125332902913405, + "grad_norm": 0.7112773060798645, + "learning_rate": 0.00011915665621496985, + "loss": 2.5656, + "step": 8829 + }, + { + "epoch": 0.7126139940279235, + "grad_norm": 0.6384316682815552, + "learning_rate": 0.00011914116145784333, + "loss": 2.5526, + "step": 8830 + }, + { + "epoch": 0.7126946977645064, + "grad_norm": 0.6673600077629089, + "learning_rate": 0.000119125666223669, + "loss": 2.5868, + "step": 8831 + }, + { + "epoch": 0.7127754015010895, + "grad_norm": 0.6927722692489624, + "learning_rate": 0.0001191101705128331, + "loss": 2.6237, + "step": 8832 + }, + { + "epoch": 0.7128561052376725, + "grad_norm": 0.7410106658935547, + "learning_rate": 0.00011909467432572182, + "loss": 2.5652, + "step": 8833 + }, + { + "epoch": 0.7129368089742555, + "grad_norm": 0.6780139803886414, + "learning_rate": 0.0001190791776627213, + "loss": 2.5343, + "step": 8834 + }, + { + "epoch": 0.7130175127108385, + "grad_norm": 0.7147949934005737, + "learning_rate": 0.00011906368052421781, + "loss": 2.5368, + "step": 8835 + }, + { + "epoch": 0.7130982164474216, + "grad_norm": 0.7092324495315552, + "learning_rate": 0.00011904818291059759, + "loss": 2.538, + "step": 8836 + }, + { + "epoch": 0.7131789201840045, + "grad_norm": 0.761763870716095, + "learning_rate": 0.00011903268482224684, + "loss": 2.5984, + "step": 8837 + }, + { + "epoch": 0.7132596239205875, + "grad_norm": 0.7011365294456482, + "learning_rate": 0.00011901718625955182, + "loss": 2.5383, + "step": 8838 + }, + { + "epoch": 0.7133403276571705, + "grad_norm": 0.7982703447341919, + "learning_rate": 0.00011900168722289882, + "loss": 2.5714, + "step": 8839 + }, + { + "epoch": 0.7134210313937536, + "grad_norm": 0.6788253784179688, + "learning_rate": 0.00011898618771267412, + "loss": 2.5675, + "step": 8840 + }, + { + "epoch": 0.7135017351303365, + "grad_norm": 0.6245018243789673, + "learning_rate": 0.00011897068772926397, + "loss": 2.5497, + "step": 8841 + }, + { + "epoch": 0.7135824388669195, + "grad_norm": 0.732109785079956, + "learning_rate": 0.0001189551872730547, + "loss": 2.5043, + "step": 8842 + }, + { + "epoch": 0.7136631426035025, + "grad_norm": 0.7640885710716248, + "learning_rate": 0.0001189396863444326, + "loss": 2.5974, + "step": 8843 + }, + { + "epoch": 0.7137438463400856, + "grad_norm": 0.6806808710098267, + "learning_rate": 0.00011892418494378403, + "loss": 2.5911, + "step": 8844 + }, + { + "epoch": 0.7138245500766686, + "grad_norm": 0.6730000376701355, + "learning_rate": 0.00011890868307149528, + "loss": 2.5405, + "step": 8845 + }, + { + "epoch": 0.7139052538132515, + "grad_norm": 0.6881929636001587, + "learning_rate": 0.00011889318072795275, + "loss": 2.6083, + "step": 8846 + }, + { + "epoch": 0.7139859575498345, + "grad_norm": 0.7079598307609558, + "learning_rate": 0.00011887767791354275, + "loss": 2.5743, + "step": 8847 + }, + { + "epoch": 0.7140666612864176, + "grad_norm": 0.6760475635528564, + "learning_rate": 0.00011886217462865166, + "loss": 2.5925, + "step": 8848 + }, + { + "epoch": 0.7141473650230006, + "grad_norm": 0.6851043701171875, + "learning_rate": 0.00011884667087366587, + "loss": 2.5839, + "step": 8849 + }, + { + "epoch": 0.7142280687595836, + "grad_norm": 0.6805267930030823, + "learning_rate": 0.00011883116664897178, + "loss": 2.562, + "step": 8850 + }, + { + "epoch": 0.7143087724961665, + "grad_norm": 0.6720704436302185, + "learning_rate": 0.00011881566195495581, + "loss": 2.5381, + "step": 8851 + }, + { + "epoch": 0.7143894762327496, + "grad_norm": 0.718166172504425, + "learning_rate": 0.00011880015679200436, + "loss": 2.5912, + "step": 8852 + }, + { + "epoch": 0.7144701799693326, + "grad_norm": 0.6643497943878174, + "learning_rate": 0.00011878465116050383, + "loss": 2.5122, + "step": 8853 + }, + { + "epoch": 0.7145508837059156, + "grad_norm": 0.705186665058136, + "learning_rate": 0.00011876914506084074, + "loss": 2.617, + "step": 8854 + }, + { + "epoch": 0.7146315874424986, + "grad_norm": 0.6417848467826843, + "learning_rate": 0.00011875363849340144, + "loss": 2.5552, + "step": 8855 + }, + { + "epoch": 0.7147122911790816, + "grad_norm": 0.6861358880996704, + "learning_rate": 0.00011873813145857249, + "loss": 2.6324, + "step": 8856 + }, + { + "epoch": 0.7147929949156646, + "grad_norm": 0.7134111523628235, + "learning_rate": 0.00011872262395674027, + "loss": 2.5892, + "step": 8857 + }, + { + "epoch": 0.7148736986522476, + "grad_norm": 0.7177506685256958, + "learning_rate": 0.00011870711598829135, + "loss": 2.5677, + "step": 8858 + }, + { + "epoch": 0.7149544023888306, + "grad_norm": 0.6435763835906982, + "learning_rate": 0.00011869160755361219, + "loss": 2.5452, + "step": 8859 + }, + { + "epoch": 0.7150351061254137, + "grad_norm": 0.6443132758140564, + "learning_rate": 0.00011867609865308935, + "loss": 2.5566, + "step": 8860 + }, + { + "epoch": 0.7151158098619966, + "grad_norm": 0.7132347822189331, + "learning_rate": 0.00011866058928710925, + "loss": 2.565, + "step": 8861 + }, + { + "epoch": 0.7151965135985796, + "grad_norm": 0.7803207039833069, + "learning_rate": 0.00011864507945605854, + "loss": 2.556, + "step": 8862 + }, + { + "epoch": 0.7152772173351626, + "grad_norm": 0.7277950644493103, + "learning_rate": 0.00011862956916032367, + "loss": 2.5623, + "step": 8863 + }, + { + "epoch": 0.7153579210717457, + "grad_norm": 0.6812277436256409, + "learning_rate": 0.00011861405840029125, + "loss": 2.6146, + "step": 8864 + }, + { + "epoch": 0.7154386248083286, + "grad_norm": 0.7170509099960327, + "learning_rate": 0.00011859854717634786, + "loss": 2.52, + "step": 8865 + }, + { + "epoch": 0.7155193285449116, + "grad_norm": 0.7282906174659729, + "learning_rate": 0.00011858303548888004, + "loss": 2.5605, + "step": 8866 + }, + { + "epoch": 0.7156000322814946, + "grad_norm": 0.7290246486663818, + "learning_rate": 0.00011856752333827439, + "loss": 2.6292, + "step": 8867 + }, + { + "epoch": 0.7156807360180777, + "grad_norm": 0.6870024800300598, + "learning_rate": 0.00011855201072491752, + "loss": 2.6396, + "step": 8868 + }, + { + "epoch": 0.7157614397546607, + "grad_norm": 0.7336156964302063, + "learning_rate": 0.00011853649764919605, + "loss": 2.6356, + "step": 8869 + }, + { + "epoch": 0.7158421434912436, + "grad_norm": 0.7181294560432434, + "learning_rate": 0.00011852098411149661, + "loss": 2.5163, + "step": 8870 + }, + { + "epoch": 0.7159228472278266, + "grad_norm": 0.7355513572692871, + "learning_rate": 0.00011850547011220583, + "loss": 2.5485, + "step": 8871 + }, + { + "epoch": 0.7160035509644097, + "grad_norm": 0.7005351185798645, + "learning_rate": 0.00011848995565171038, + "loss": 2.5187, + "step": 8872 + }, + { + "epoch": 0.7160842547009927, + "grad_norm": 0.6550194025039673, + "learning_rate": 0.00011847444073039686, + "loss": 2.5174, + "step": 8873 + }, + { + "epoch": 0.7161649584375757, + "grad_norm": 0.6568251252174377, + "learning_rate": 0.00011845892534865202, + "loss": 2.5128, + "step": 8874 + }, + { + "epoch": 0.7162456621741586, + "grad_norm": 0.6359419226646423, + "learning_rate": 0.0001184434095068625, + "loss": 2.5967, + "step": 8875 + }, + { + "epoch": 0.7163263659107416, + "grad_norm": 0.6730023622512817, + "learning_rate": 0.00011842789320541504, + "loss": 2.5243, + "step": 8876 + }, + { + "epoch": 0.7164070696473247, + "grad_norm": 0.6750187277793884, + "learning_rate": 0.00011841237644469625, + "loss": 2.602, + "step": 8877 + }, + { + "epoch": 0.7164877733839077, + "grad_norm": 0.7039143443107605, + "learning_rate": 0.00011839685922509291, + "loss": 2.5345, + "step": 8878 + }, + { + "epoch": 0.7165684771204907, + "grad_norm": 0.6602306962013245, + "learning_rate": 0.00011838134154699177, + "loss": 2.5995, + "step": 8879 + }, + { + "epoch": 0.7166491808570736, + "grad_norm": 0.6744598150253296, + "learning_rate": 0.00011836582341077955, + "loss": 2.6005, + "step": 8880 + }, + { + "epoch": 0.7167298845936567, + "grad_norm": 0.7136051058769226, + "learning_rate": 0.00011835030481684302, + "loss": 2.5424, + "step": 8881 + }, + { + "epoch": 0.7168105883302397, + "grad_norm": 0.7085986137390137, + "learning_rate": 0.00011833478576556889, + "loss": 2.5912, + "step": 8882 + }, + { + "epoch": 0.7168912920668227, + "grad_norm": 0.7635689377784729, + "learning_rate": 0.00011831926625734398, + "loss": 2.5836, + "step": 8883 + }, + { + "epoch": 0.7169719958034056, + "grad_norm": 0.6543256640434265, + "learning_rate": 0.00011830374629255508, + "loss": 2.5442, + "step": 8884 + }, + { + "epoch": 0.7170526995399887, + "grad_norm": 0.663840115070343, + "learning_rate": 0.00011828822587158896, + "loss": 2.5529, + "step": 8885 + }, + { + "epoch": 0.7171334032765717, + "grad_norm": 0.6868027448654175, + "learning_rate": 0.00011827270499483247, + "loss": 2.6678, + "step": 8886 + }, + { + "epoch": 0.7172141070131547, + "grad_norm": 0.649172842502594, + "learning_rate": 0.00011825718366267238, + "loss": 2.57, + "step": 8887 + }, + { + "epoch": 0.7172948107497377, + "grad_norm": 0.6818440556526184, + "learning_rate": 0.00011824166187549554, + "loss": 2.5602, + "step": 8888 + }, + { + "epoch": 0.7173755144863208, + "grad_norm": 0.7222314476966858, + "learning_rate": 0.00011822613963368885, + "loss": 2.5526, + "step": 8889 + }, + { + "epoch": 0.7174562182229037, + "grad_norm": 0.7309598922729492, + "learning_rate": 0.00011821061693763909, + "loss": 2.5515, + "step": 8890 + }, + { + "epoch": 0.7175369219594867, + "grad_norm": 0.6935746669769287, + "learning_rate": 0.00011819509378773314, + "loss": 2.5506, + "step": 8891 + }, + { + "epoch": 0.7176176256960697, + "grad_norm": 0.6754423975944519, + "learning_rate": 0.00011817957018435792, + "loss": 2.5621, + "step": 8892 + }, + { + "epoch": 0.7176983294326528, + "grad_norm": 0.7087355852127075, + "learning_rate": 0.00011816404612790026, + "loss": 2.5708, + "step": 8893 + }, + { + "epoch": 0.7177790331692357, + "grad_norm": 0.726820707321167, + "learning_rate": 0.0001181485216187471, + "loss": 2.5741, + "step": 8894 + }, + { + "epoch": 0.7178597369058187, + "grad_norm": 0.6539922952651978, + "learning_rate": 0.00011813299665728532, + "loss": 2.613, + "step": 8895 + }, + { + "epoch": 0.7179404406424017, + "grad_norm": 0.7008066773414612, + "learning_rate": 0.00011811747124390189, + "loss": 2.6029, + "step": 8896 + }, + { + "epoch": 0.7180211443789848, + "grad_norm": 0.6900522708892822, + "learning_rate": 0.00011810194537898374, + "loss": 2.5716, + "step": 8897 + }, + { + "epoch": 0.7181018481155678, + "grad_norm": 0.675345242023468, + "learning_rate": 0.00011808641906291776, + "loss": 2.5742, + "step": 8898 + }, + { + "epoch": 0.7181825518521507, + "grad_norm": 0.6697559356689453, + "learning_rate": 0.00011807089229609092, + "loss": 2.5717, + "step": 8899 + }, + { + "epoch": 0.7182632555887337, + "grad_norm": 0.6874344944953918, + "learning_rate": 0.00011805536507889021, + "loss": 2.5394, + "step": 8900 + }, + { + "epoch": 0.7183439593253168, + "grad_norm": 0.6675494313240051, + "learning_rate": 0.00011803983741170263, + "loss": 2.5655, + "step": 8901 + }, + { + "epoch": 0.7184246630618998, + "grad_norm": 0.6937244534492493, + "learning_rate": 0.00011802430929491517, + "loss": 2.5676, + "step": 8902 + }, + { + "epoch": 0.7185053667984828, + "grad_norm": 0.7591496109962463, + "learning_rate": 0.00011800878072891474, + "loss": 2.5849, + "step": 8903 + }, + { + "epoch": 0.7185860705350657, + "grad_norm": 0.6503129005432129, + "learning_rate": 0.00011799325171408846, + "loss": 2.5416, + "step": 8904 + }, + { + "epoch": 0.7186667742716488, + "grad_norm": 0.6450222134590149, + "learning_rate": 0.00011797772225082333, + "loss": 2.5395, + "step": 8905 + }, + { + "epoch": 0.7187474780082318, + "grad_norm": 0.7317619919776917, + "learning_rate": 0.00011796219233950632, + "loss": 2.609, + "step": 8906 + }, + { + "epoch": 0.7188281817448148, + "grad_norm": 0.7585787773132324, + "learning_rate": 0.00011794666198052455, + "loss": 2.5556, + "step": 8907 + }, + { + "epoch": 0.7189088854813978, + "grad_norm": 0.6718214750289917, + "learning_rate": 0.00011793113117426505, + "loss": 2.5914, + "step": 8908 + }, + { + "epoch": 0.7189895892179808, + "grad_norm": 0.6459314823150635, + "learning_rate": 0.00011791559992111487, + "loss": 2.5956, + "step": 8909 + }, + { + "epoch": 0.7190702929545638, + "grad_norm": 0.6592775583267212, + "learning_rate": 0.00011790006822146113, + "loss": 2.5568, + "step": 8910 + }, + { + "epoch": 0.7191509966911468, + "grad_norm": 0.7277452349662781, + "learning_rate": 0.0001178845360756909, + "loss": 2.5989, + "step": 8911 + }, + { + "epoch": 0.7192317004277298, + "grad_norm": 0.7020131945610046, + "learning_rate": 0.00011786900348419128, + "loss": 2.645, + "step": 8912 + }, + { + "epoch": 0.7193124041643129, + "grad_norm": 0.6746636629104614, + "learning_rate": 0.00011785347044734938, + "loss": 2.5173, + "step": 8913 + }, + { + "epoch": 0.7193931079008958, + "grad_norm": 0.6782798171043396, + "learning_rate": 0.0001178379369655523, + "loss": 2.6007, + "step": 8914 + }, + { + "epoch": 0.7194738116374788, + "grad_norm": 0.705498218536377, + "learning_rate": 0.00011782240303918724, + "loss": 2.5408, + "step": 8915 + }, + { + "epoch": 0.7195545153740618, + "grad_norm": 0.675532341003418, + "learning_rate": 0.00011780686866864128, + "loss": 2.5188, + "step": 8916 + }, + { + "epoch": 0.7196352191106449, + "grad_norm": 0.6552390456199646, + "learning_rate": 0.00011779133385430161, + "loss": 2.5409, + "step": 8917 + }, + { + "epoch": 0.7197159228472279, + "grad_norm": 0.6589654088020325, + "learning_rate": 0.00011777579859655544, + "loss": 2.5447, + "step": 8918 + }, + { + "epoch": 0.7197966265838108, + "grad_norm": 0.7548382878303528, + "learning_rate": 0.00011776026289578985, + "loss": 2.5239, + "step": 8919 + }, + { + "epoch": 0.7198773303203938, + "grad_norm": 0.697325587272644, + "learning_rate": 0.00011774472675239207, + "loss": 2.5887, + "step": 8920 + }, + { + "epoch": 0.7199580340569769, + "grad_norm": 0.734462320804596, + "learning_rate": 0.00011772919016674934, + "loss": 2.5847, + "step": 8921 + }, + { + "epoch": 0.7200387377935599, + "grad_norm": 0.6736955642700195, + "learning_rate": 0.00011771365313924886, + "loss": 2.558, + "step": 8922 + }, + { + "epoch": 0.7201194415301428, + "grad_norm": 0.7157856822013855, + "learning_rate": 0.00011769811567027784, + "loss": 2.6199, + "step": 8923 + }, + { + "epoch": 0.7202001452667258, + "grad_norm": 0.7045830488204956, + "learning_rate": 0.0001176825777602235, + "loss": 2.576, + "step": 8924 + }, + { + "epoch": 0.7202808490033088, + "grad_norm": 0.6875419020652771, + "learning_rate": 0.00011766703940947308, + "loss": 2.6045, + "step": 8925 + }, + { + "epoch": 0.7203615527398919, + "grad_norm": 0.7313494086265564, + "learning_rate": 0.00011765150061841387, + "loss": 2.5388, + "step": 8926 + }, + { + "epoch": 0.7204422564764749, + "grad_norm": 0.7223608493804932, + "learning_rate": 0.00011763596138743313, + "loss": 2.5466, + "step": 8927 + }, + { + "epoch": 0.7205229602130578, + "grad_norm": 0.7289614081382751, + "learning_rate": 0.00011762042171691816, + "loss": 2.5862, + "step": 8928 + }, + { + "epoch": 0.7206036639496408, + "grad_norm": 0.7098878026008606, + "learning_rate": 0.00011760488160725617, + "loss": 2.5497, + "step": 8929 + }, + { + "epoch": 0.7206843676862239, + "grad_norm": 0.7096838355064392, + "learning_rate": 0.00011758934105883452, + "loss": 2.558, + "step": 8930 + }, + { + "epoch": 0.7207650714228069, + "grad_norm": 0.7334743738174438, + "learning_rate": 0.00011757380007204055, + "loss": 2.5966, + "step": 8931 + }, + { + "epoch": 0.7208457751593899, + "grad_norm": 0.7192476391792297, + "learning_rate": 0.00011755825864726149, + "loss": 2.5307, + "step": 8932 + }, + { + "epoch": 0.7209264788959728, + "grad_norm": 0.7329632043838501, + "learning_rate": 0.00011754271678488478, + "loss": 2.6453, + "step": 8933 + }, + { + "epoch": 0.7210071826325559, + "grad_norm": 0.6827974915504456, + "learning_rate": 0.00011752717448529766, + "loss": 2.5507, + "step": 8934 + }, + { + "epoch": 0.7210878863691389, + "grad_norm": 0.8292449116706848, + "learning_rate": 0.00011751163174888756, + "loss": 2.6178, + "step": 8935 + }, + { + "epoch": 0.7211685901057219, + "grad_norm": 0.6504058837890625, + "learning_rate": 0.00011749608857604183, + "loss": 2.574, + "step": 8936 + }, + { + "epoch": 0.7212492938423049, + "grad_norm": 0.6567742824554443, + "learning_rate": 0.00011748054496714785, + "loss": 2.45, + "step": 8937 + }, + { + "epoch": 0.7213299975788879, + "grad_norm": 0.6699101328849792, + "learning_rate": 0.00011746500092259296, + "loss": 2.5827, + "step": 8938 + }, + { + "epoch": 0.7214107013154709, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.0001174494564427646, + "loss": 2.5246, + "step": 8939 + }, + { + "epoch": 0.7214914050520539, + "grad_norm": 0.7276309132575989, + "learning_rate": 0.00011743391152805017, + "loss": 2.6096, + "step": 8940 + }, + { + "epoch": 0.7215721087886369, + "grad_norm": 0.7248005867004395, + "learning_rate": 0.0001174183661788371, + "loss": 2.6362, + "step": 8941 + }, + { + "epoch": 0.72165281252522, + "grad_norm": 0.7773801684379578, + "learning_rate": 0.00011740282039551282, + "loss": 2.547, + "step": 8942 + }, + { + "epoch": 0.7217335162618029, + "grad_norm": 0.7346466779708862, + "learning_rate": 0.00011738727417846476, + "loss": 2.5635, + "step": 8943 + }, + { + "epoch": 0.7218142199983859, + "grad_norm": 0.7042707800865173, + "learning_rate": 0.0001173717275280804, + "loss": 2.5593, + "step": 8944 + }, + { + "epoch": 0.7218949237349689, + "grad_norm": 0.6894899010658264, + "learning_rate": 0.00011735618044474712, + "loss": 2.5272, + "step": 8945 + }, + { + "epoch": 0.721975627471552, + "grad_norm": 0.6643744111061096, + "learning_rate": 0.00011734063292885249, + "loss": 2.6001, + "step": 8946 + }, + { + "epoch": 0.722056331208135, + "grad_norm": 0.7543076276779175, + "learning_rate": 0.00011732508498078396, + "loss": 2.558, + "step": 8947 + }, + { + "epoch": 0.7221370349447179, + "grad_norm": 0.7065596580505371, + "learning_rate": 0.00011730953660092903, + "loss": 2.6255, + "step": 8948 + }, + { + "epoch": 0.7222177386813009, + "grad_norm": 0.6968158483505249, + "learning_rate": 0.0001172939877896752, + "loss": 2.5277, + "step": 8949 + }, + { + "epoch": 0.722298442417884, + "grad_norm": 0.6918557286262512, + "learning_rate": 0.00011727843854740996, + "loss": 2.5456, + "step": 8950 + }, + { + "epoch": 0.722379146154467, + "grad_norm": 0.7262142300605774, + "learning_rate": 0.00011726288887452088, + "loss": 2.5345, + "step": 8951 + }, + { + "epoch": 0.7224598498910499, + "grad_norm": 0.7423329949378967, + "learning_rate": 0.00011724733877139548, + "loss": 2.6335, + "step": 8952 + }, + { + "epoch": 0.7225405536276329, + "grad_norm": 0.7734495997428894, + "learning_rate": 0.00011723178823842136, + "loss": 2.5951, + "step": 8953 + }, + { + "epoch": 0.722621257364216, + "grad_norm": 0.6792804598808289, + "learning_rate": 0.00011721623727598597, + "loss": 2.5927, + "step": 8954 + }, + { + "epoch": 0.722701961100799, + "grad_norm": 0.7971853017807007, + "learning_rate": 0.00011720068588447697, + "loss": 2.5451, + "step": 8955 + }, + { + "epoch": 0.722782664837382, + "grad_norm": 0.7264395356178284, + "learning_rate": 0.00011718513406428189, + "loss": 2.5769, + "step": 8956 + }, + { + "epoch": 0.7228633685739649, + "grad_norm": 0.6536725759506226, + "learning_rate": 0.0001171695818157884, + "loss": 2.6285, + "step": 8957 + }, + { + "epoch": 0.722944072310548, + "grad_norm": 0.6676235198974609, + "learning_rate": 0.000117154029139384, + "loss": 2.5896, + "step": 8958 + }, + { + "epoch": 0.723024776047131, + "grad_norm": 0.7104088664054871, + "learning_rate": 0.00011713847603545636, + "loss": 2.5606, + "step": 8959 + }, + { + "epoch": 0.723105479783714, + "grad_norm": 0.6646785140037537, + "learning_rate": 0.0001171229225043931, + "loss": 2.5617, + "step": 8960 + }, + { + "epoch": 0.723186183520297, + "grad_norm": 0.7148672342300415, + "learning_rate": 0.00011710736854658186, + "loss": 2.5855, + "step": 8961 + }, + { + "epoch": 0.72326688725688, + "grad_norm": 0.6864955425262451, + "learning_rate": 0.00011709181416241028, + "loss": 2.6098, + "step": 8962 + }, + { + "epoch": 0.723347590993463, + "grad_norm": 0.7049087285995483, + "learning_rate": 0.00011707625935226602, + "loss": 2.506, + "step": 8963 + }, + { + "epoch": 0.723428294730046, + "grad_norm": 0.6419759392738342, + "learning_rate": 0.00011706070411653672, + "loss": 2.5485, + "step": 8964 + }, + { + "epoch": 0.723508998466629, + "grad_norm": 0.6879174709320068, + "learning_rate": 0.00011704514845561007, + "loss": 2.5373, + "step": 8965 + }, + { + "epoch": 0.7235897022032121, + "grad_norm": 0.6473780274391174, + "learning_rate": 0.00011702959236987378, + "loss": 2.5479, + "step": 8966 + }, + { + "epoch": 0.723670405939795, + "grad_norm": 0.6924241185188293, + "learning_rate": 0.00011701403585971553, + "loss": 2.5679, + "step": 8967 + }, + { + "epoch": 0.723751109676378, + "grad_norm": 0.7452483773231506, + "learning_rate": 0.00011699847892552305, + "loss": 2.5043, + "step": 8968 + }, + { + "epoch": 0.723831813412961, + "grad_norm": 0.7517218589782715, + "learning_rate": 0.00011698292156768402, + "loss": 2.5554, + "step": 8969 + }, + { + "epoch": 0.7239125171495441, + "grad_norm": 0.6492432355880737, + "learning_rate": 0.00011696736378658618, + "loss": 2.6091, + "step": 8970 + }, + { + "epoch": 0.723993220886127, + "grad_norm": 0.740093469619751, + "learning_rate": 0.0001169518055826173, + "loss": 2.5629, + "step": 8971 + }, + { + "epoch": 0.72407392462271, + "grad_norm": 0.7186923027038574, + "learning_rate": 0.00011693624695616509, + "loss": 2.5537, + "step": 8972 + }, + { + "epoch": 0.724154628359293, + "grad_norm": 0.7066059112548828, + "learning_rate": 0.00011692068790761737, + "loss": 2.5115, + "step": 8973 + }, + { + "epoch": 0.7242353320958761, + "grad_norm": 0.7031805515289307, + "learning_rate": 0.00011690512843736185, + "loss": 2.596, + "step": 8974 + }, + { + "epoch": 0.7243160358324591, + "grad_norm": 0.7308956384658813, + "learning_rate": 0.00011688956854578635, + "loss": 2.6311, + "step": 8975 + }, + { + "epoch": 0.724396739569042, + "grad_norm": 0.6926052570343018, + "learning_rate": 0.00011687400823327863, + "loss": 2.5659, + "step": 8976 + }, + { + "epoch": 0.724477443305625, + "grad_norm": 0.69638991355896, + "learning_rate": 0.00011685844750022654, + "loss": 2.4792, + "step": 8977 + }, + { + "epoch": 0.724558147042208, + "grad_norm": 0.6858355402946472, + "learning_rate": 0.00011684288634701785, + "loss": 2.5707, + "step": 8978 + }, + { + "epoch": 0.7246388507787911, + "grad_norm": 0.6673639416694641, + "learning_rate": 0.00011682732477404044, + "loss": 2.5627, + "step": 8979 + }, + { + "epoch": 0.7247195545153741, + "grad_norm": 0.7174322605133057, + "learning_rate": 0.00011681176278168206, + "loss": 2.5801, + "step": 8980 + }, + { + "epoch": 0.724800258251957, + "grad_norm": 0.6840930581092834, + "learning_rate": 0.00011679620037033064, + "loss": 2.4994, + "step": 8981 + }, + { + "epoch": 0.72488096198854, + "grad_norm": 0.7179884910583496, + "learning_rate": 0.00011678063754037399, + "loss": 2.6408, + "step": 8982 + }, + { + "epoch": 0.7249616657251231, + "grad_norm": 0.6564825773239136, + "learning_rate": 0.00011676507429219998, + "loss": 2.5412, + "step": 8983 + }, + { + "epoch": 0.7250423694617061, + "grad_norm": 0.7020624876022339, + "learning_rate": 0.00011674951062619652, + "loss": 2.5778, + "step": 8984 + }, + { + "epoch": 0.7251230731982891, + "grad_norm": 0.8061255812644958, + "learning_rate": 0.00011673394654275145, + "loss": 2.5581, + "step": 8985 + }, + { + "epoch": 0.725203776934872, + "grad_norm": 0.7653982043266296, + "learning_rate": 0.00011671838204225267, + "loss": 2.5324, + "step": 8986 + }, + { + "epoch": 0.7252844806714551, + "grad_norm": 0.7168377041816711, + "learning_rate": 0.00011670281712508816, + "loss": 2.6357, + "step": 8987 + }, + { + "epoch": 0.7253651844080381, + "grad_norm": 0.6860470771789551, + "learning_rate": 0.00011668725179164575, + "loss": 2.5367, + "step": 8988 + }, + { + "epoch": 0.7254458881446211, + "grad_norm": 0.7175878286361694, + "learning_rate": 0.00011667168604231342, + "loss": 2.549, + "step": 8989 + }, + { + "epoch": 0.725526591881204, + "grad_norm": 0.7124783992767334, + "learning_rate": 0.00011665611987747907, + "loss": 2.5566, + "step": 8990 + }, + { + "epoch": 0.7256072956177871, + "grad_norm": 0.6575417518615723, + "learning_rate": 0.00011664055329753067, + "loss": 2.5455, + "step": 8991 + }, + { + "epoch": 0.7256879993543701, + "grad_norm": 0.6576877236366272, + "learning_rate": 0.00011662498630285623, + "loss": 2.5596, + "step": 8992 + }, + { + "epoch": 0.7257687030909531, + "grad_norm": 0.7235110402107239, + "learning_rate": 0.00011660941889384365, + "loss": 2.6199, + "step": 8993 + }, + { + "epoch": 0.7258494068275361, + "grad_norm": 0.6623982787132263, + "learning_rate": 0.00011659385107088092, + "loss": 2.5642, + "step": 8994 + }, + { + "epoch": 0.7259301105641192, + "grad_norm": 0.7113857865333557, + "learning_rate": 0.00011657828283435605, + "loss": 2.5631, + "step": 8995 + }, + { + "epoch": 0.7260108143007021, + "grad_norm": 0.7076124548912048, + "learning_rate": 0.00011656271418465702, + "loss": 2.5141, + "step": 8996 + }, + { + "epoch": 0.7260915180372851, + "grad_norm": 0.7534562349319458, + "learning_rate": 0.00011654714512217188, + "loss": 2.5896, + "step": 8997 + }, + { + "epoch": 0.7261722217738681, + "grad_norm": 0.7393170595169067, + "learning_rate": 0.00011653157564728865, + "loss": 2.5848, + "step": 8998 + }, + { + "epoch": 0.7262529255104512, + "grad_norm": 0.6829591989517212, + "learning_rate": 0.0001165160057603953, + "loss": 2.5439, + "step": 8999 + }, + { + "epoch": 0.7263336292470342, + "grad_norm": 0.6527189016342163, + "learning_rate": 0.00011650043546187995, + "loss": 2.5655, + "step": 9000 + }, + { + "epoch": 0.7263336292470342, + "eval_loss": 2.487652063369751, + "eval_runtime": 845.9129, + "eval_samples_per_second": 3.097, + "eval_steps_per_second": 0.517, + "step": 9000 + }, + { + "epoch": 0.7264143329836171, + "grad_norm": 0.6545615196228027, + "learning_rate": 0.00011648486475213058, + "loss": 2.5366, + "step": 9001 + }, + { + "epoch": 0.7264950367202001, + "grad_norm": 0.6854971647262573, + "learning_rate": 0.00011646929363153529, + "loss": 2.5832, + "step": 9002 + }, + { + "epoch": 0.7265757404567832, + "grad_norm": 0.7745552062988281, + "learning_rate": 0.00011645372210048218, + "loss": 2.5854, + "step": 9003 + }, + { + "epoch": 0.7266564441933662, + "grad_norm": 0.7159156203269958, + "learning_rate": 0.00011643815015935928, + "loss": 2.614, + "step": 9004 + }, + { + "epoch": 0.7267371479299491, + "grad_norm": 0.700074315071106, + "learning_rate": 0.00011642257780855475, + "loss": 2.6124, + "step": 9005 + }, + { + "epoch": 0.7268178516665321, + "grad_norm": 0.7367869019508362, + "learning_rate": 0.0001164070050484566, + "loss": 2.5512, + "step": 9006 + }, + { + "epoch": 0.7268985554031152, + "grad_norm": 0.6623905897140503, + "learning_rate": 0.00011639143187945301, + "loss": 2.5724, + "step": 9007 + }, + { + "epoch": 0.7269792591396982, + "grad_norm": 0.7111610770225525, + "learning_rate": 0.0001163758583019321, + "loss": 2.547, + "step": 9008 + }, + { + "epoch": 0.7270599628762812, + "grad_norm": 0.6860959529876709, + "learning_rate": 0.00011636028431628199, + "loss": 2.532, + "step": 9009 + }, + { + "epoch": 0.7271406666128641, + "grad_norm": 0.7606309056282043, + "learning_rate": 0.00011634470992289084, + "loss": 2.5214, + "step": 9010 + }, + { + "epoch": 0.7272213703494472, + "grad_norm": 0.6440508365631104, + "learning_rate": 0.00011632913512214677, + "loss": 2.5554, + "step": 9011 + }, + { + "epoch": 0.7273020740860302, + "grad_norm": 0.6770462393760681, + "learning_rate": 0.00011631355991443796, + "loss": 2.5877, + "step": 9012 + }, + { + "epoch": 0.7273827778226132, + "grad_norm": 0.6419155597686768, + "learning_rate": 0.00011629798430015262, + "loss": 2.5337, + "step": 9013 + }, + { + "epoch": 0.7274634815591962, + "grad_norm": 0.6782121658325195, + "learning_rate": 0.00011628240827967891, + "loss": 2.5152, + "step": 9014 + }, + { + "epoch": 0.7275441852957792, + "grad_norm": 0.6972285509109497, + "learning_rate": 0.00011626683185340501, + "loss": 2.5628, + "step": 9015 + }, + { + "epoch": 0.7276248890323622, + "grad_norm": 0.6823342442512512, + "learning_rate": 0.00011625125502171914, + "loss": 2.5977, + "step": 9016 + }, + { + "epoch": 0.7277055927689452, + "grad_norm": 0.723311722278595, + "learning_rate": 0.0001162356777850095, + "loss": 2.5772, + "step": 9017 + }, + { + "epoch": 0.7277862965055282, + "grad_norm": 0.7395427227020264, + "learning_rate": 0.00011622010014366435, + "loss": 2.6068, + "step": 9018 + }, + { + "epoch": 0.7278670002421113, + "grad_norm": 0.6970974206924438, + "learning_rate": 0.00011620452209807192, + "loss": 2.5577, + "step": 9019 + }, + { + "epoch": 0.7279477039786942, + "grad_norm": 0.6921418309211731, + "learning_rate": 0.0001161889436486204, + "loss": 2.5476, + "step": 9020 + }, + { + "epoch": 0.7280284077152772, + "grad_norm": 0.7243841886520386, + "learning_rate": 0.0001161733647956981, + "loss": 2.579, + "step": 9021 + }, + { + "epoch": 0.7281091114518602, + "grad_norm": 0.7240262627601624, + "learning_rate": 0.0001161577855396933, + "loss": 2.5959, + "step": 9022 + }, + { + "epoch": 0.7281898151884433, + "grad_norm": 0.7215476632118225, + "learning_rate": 0.0001161422058809942, + "loss": 2.5979, + "step": 9023 + }, + { + "epoch": 0.7282705189250263, + "grad_norm": 0.7109708786010742, + "learning_rate": 0.00011612662581998917, + "loss": 2.5912, + "step": 9024 + }, + { + "epoch": 0.7283512226616092, + "grad_norm": 0.6814073920249939, + "learning_rate": 0.00011611104535706645, + "loss": 2.5742, + "step": 9025 + }, + { + "epoch": 0.7284319263981922, + "grad_norm": 0.6788144707679749, + "learning_rate": 0.0001160954644926144, + "loss": 2.5656, + "step": 9026 + }, + { + "epoch": 0.7285126301347752, + "grad_norm": 0.7312989830970764, + "learning_rate": 0.00011607988322702126, + "loss": 2.5877, + "step": 9027 + }, + { + "epoch": 0.7285933338713583, + "grad_norm": 0.6725338697433472, + "learning_rate": 0.0001160643015606754, + "loss": 2.5261, + "step": 9028 + }, + { + "epoch": 0.7286740376079412, + "grad_norm": 0.7439326047897339, + "learning_rate": 0.00011604871949396516, + "loss": 2.603, + "step": 9029 + }, + { + "epoch": 0.7287547413445242, + "grad_norm": 0.7091783285140991, + "learning_rate": 0.00011603313702727889, + "loss": 2.5227, + "step": 9030 + }, + { + "epoch": 0.7288354450811072, + "grad_norm": 0.7474398016929626, + "learning_rate": 0.00011601755416100492, + "loss": 2.616, + "step": 9031 + }, + { + "epoch": 0.7289161488176903, + "grad_norm": 0.6904098987579346, + "learning_rate": 0.00011600197089553162, + "loss": 2.556, + "step": 9032 + }, + { + "epoch": 0.7289968525542733, + "grad_norm": 0.7305783033370972, + "learning_rate": 0.00011598638723124739, + "loss": 2.5633, + "step": 9033 + }, + { + "epoch": 0.7290775562908562, + "grad_norm": 0.6626651883125305, + "learning_rate": 0.00011597080316854062, + "loss": 2.5862, + "step": 9034 + }, + { + "epoch": 0.7291582600274392, + "grad_norm": 0.683102548122406, + "learning_rate": 0.00011595521870779968, + "loss": 2.5629, + "step": 9035 + }, + { + "epoch": 0.7292389637640223, + "grad_norm": 0.7486757636070251, + "learning_rate": 0.00011593963384941295, + "loss": 2.5831, + "step": 9036 + }, + { + "epoch": 0.7293196675006053, + "grad_norm": 0.8059591054916382, + "learning_rate": 0.00011592404859376888, + "loss": 2.6414, + "step": 9037 + }, + { + "epoch": 0.7294003712371883, + "grad_norm": 0.8371721506118774, + "learning_rate": 0.00011590846294125594, + "loss": 2.643, + "step": 9038 + }, + { + "epoch": 0.7294810749737712, + "grad_norm": 0.7216931581497192, + "learning_rate": 0.00011589287689226246, + "loss": 2.6, + "step": 9039 + }, + { + "epoch": 0.7295617787103543, + "grad_norm": 0.6940354704856873, + "learning_rate": 0.00011587729044717701, + "loss": 2.546, + "step": 9040 + }, + { + "epoch": 0.7296424824469373, + "grad_norm": 0.6888829469680786, + "learning_rate": 0.00011586170360638792, + "loss": 2.5878, + "step": 9041 + }, + { + "epoch": 0.7297231861835203, + "grad_norm": 0.6863886117935181, + "learning_rate": 0.00011584611637028373, + "loss": 2.5389, + "step": 9042 + }, + { + "epoch": 0.7298038899201033, + "grad_norm": 0.6670756936073303, + "learning_rate": 0.00011583052873925294, + "loss": 2.5465, + "step": 9043 + }, + { + "epoch": 0.7298845936566863, + "grad_norm": 0.7441220879554749, + "learning_rate": 0.00011581494071368392, + "loss": 2.5679, + "step": 9044 + }, + { + "epoch": 0.7299652973932693, + "grad_norm": 0.7135717272758484, + "learning_rate": 0.0001157993522939653, + "loss": 2.5341, + "step": 9045 + }, + { + "epoch": 0.7300460011298523, + "grad_norm": 0.6837992072105408, + "learning_rate": 0.00011578376348048547, + "loss": 2.5233, + "step": 9046 + }, + { + "epoch": 0.7301267048664353, + "grad_norm": 0.706666886806488, + "learning_rate": 0.00011576817427363302, + "loss": 2.6109, + "step": 9047 + }, + { + "epoch": 0.7302074086030184, + "grad_norm": 0.6856269240379333, + "learning_rate": 0.00011575258467379646, + "loss": 2.5651, + "step": 9048 + }, + { + "epoch": 0.7302881123396013, + "grad_norm": 0.6931480169296265, + "learning_rate": 0.00011573699468136427, + "loss": 2.6031, + "step": 9049 + }, + { + "epoch": 0.7303688160761843, + "grad_norm": 0.6558480858802795, + "learning_rate": 0.00011572140429672508, + "loss": 2.5661, + "step": 9050 + }, + { + "epoch": 0.7304495198127673, + "grad_norm": 0.6468425393104553, + "learning_rate": 0.00011570581352026742, + "loss": 2.5171, + "step": 9051 + }, + { + "epoch": 0.7305302235493504, + "grad_norm": 0.7204702496528625, + "learning_rate": 0.00011569022235237974, + "loss": 2.5861, + "step": 9052 + }, + { + "epoch": 0.7306109272859334, + "grad_norm": 0.7536416053771973, + "learning_rate": 0.00011567463079345078, + "loss": 2.633, + "step": 9053 + }, + { + "epoch": 0.7306916310225163, + "grad_norm": 0.6597960591316223, + "learning_rate": 0.00011565903884386904, + "loss": 2.5327, + "step": 9054 + }, + { + "epoch": 0.7307723347590993, + "grad_norm": 0.689153254032135, + "learning_rate": 0.0001156434465040231, + "loss": 2.5397, + "step": 9055 + }, + { + "epoch": 0.7308530384956824, + "grad_norm": 0.7664844393730164, + "learning_rate": 0.00011562785377430159, + "loss": 2.4852, + "step": 9056 + }, + { + "epoch": 0.7309337422322654, + "grad_norm": 0.7122881412506104, + "learning_rate": 0.0001156122606550931, + "loss": 2.5401, + "step": 9057 + }, + { + "epoch": 0.7310144459688483, + "grad_norm": 0.6937551498413086, + "learning_rate": 0.00011559666714678627, + "loss": 2.5705, + "step": 9058 + }, + { + "epoch": 0.7310951497054313, + "grad_norm": 0.6504047513008118, + "learning_rate": 0.00011558107324976974, + "loss": 2.5638, + "step": 9059 + }, + { + "epoch": 0.7311758534420144, + "grad_norm": 0.7759538888931274, + "learning_rate": 0.0001155654789644321, + "loss": 2.5864, + "step": 9060 + }, + { + "epoch": 0.7312565571785974, + "grad_norm": 0.719859778881073, + "learning_rate": 0.00011554988429116207, + "loss": 2.519, + "step": 9061 + }, + { + "epoch": 0.7313372609151804, + "grad_norm": 0.7159178853034973, + "learning_rate": 0.00011553428923034826, + "loss": 2.5301, + "step": 9062 + }, + { + "epoch": 0.7314179646517633, + "grad_norm": 0.6584001183509827, + "learning_rate": 0.00011551869378237934, + "loss": 2.4716, + "step": 9063 + }, + { + "epoch": 0.7314986683883464, + "grad_norm": 0.6548463702201843, + "learning_rate": 0.00011550309794764405, + "loss": 2.5637, + "step": 9064 + }, + { + "epoch": 0.7315793721249294, + "grad_norm": 0.73887699842453, + "learning_rate": 0.000115487501726531, + "loss": 2.5813, + "step": 9065 + }, + { + "epoch": 0.7316600758615124, + "grad_norm": 0.7856181859970093, + "learning_rate": 0.00011547190511942893, + "loss": 2.592, + "step": 9066 + }, + { + "epoch": 0.7317407795980954, + "grad_norm": 0.7040740847587585, + "learning_rate": 0.00011545630812672654, + "loss": 2.5324, + "step": 9067 + }, + { + "epoch": 0.7318214833346784, + "grad_norm": 0.7316064238548279, + "learning_rate": 0.00011544071074881253, + "loss": 2.5487, + "step": 9068 + }, + { + "epoch": 0.7319021870712614, + "grad_norm": 0.7020413279533386, + "learning_rate": 0.00011542511298607568, + "loss": 2.5179, + "step": 9069 + }, + { + "epoch": 0.7319828908078444, + "grad_norm": 0.672605574131012, + "learning_rate": 0.00011540951483890468, + "loss": 2.5367, + "step": 9070 + }, + { + "epoch": 0.7320635945444274, + "grad_norm": 0.7668856382369995, + "learning_rate": 0.00011539391630768828, + "loss": 2.6089, + "step": 9071 + }, + { + "epoch": 0.7321442982810105, + "grad_norm": 0.6641809940338135, + "learning_rate": 0.00011537831739281524, + "loss": 2.5411, + "step": 9072 + }, + { + "epoch": 0.7322250020175934, + "grad_norm": 0.7142000198364258, + "learning_rate": 0.00011536271809467434, + "loss": 2.5469, + "step": 9073 + }, + { + "epoch": 0.7323057057541764, + "grad_norm": 0.7266140580177307, + "learning_rate": 0.00011534711841365435, + "loss": 2.5565, + "step": 9074 + }, + { + "epoch": 0.7323864094907594, + "grad_norm": 0.6763899326324463, + "learning_rate": 0.00011533151835014407, + "loss": 2.551, + "step": 9075 + }, + { + "epoch": 0.7324671132273425, + "grad_norm": 0.6517418026924133, + "learning_rate": 0.00011531591790453224, + "loss": 2.5415, + "step": 9076 + }, + { + "epoch": 0.7325478169639255, + "grad_norm": 0.6602214574813843, + "learning_rate": 0.00011530031707720772, + "loss": 2.593, + "step": 9077 + }, + { + "epoch": 0.7326285207005084, + "grad_norm": 0.7448844313621521, + "learning_rate": 0.00011528471586855931, + "loss": 2.5598, + "step": 9078 + }, + { + "epoch": 0.7327092244370914, + "grad_norm": 0.7197073698043823, + "learning_rate": 0.00011526911427897579, + "loss": 2.5128, + "step": 9079 + }, + { + "epoch": 0.7327899281736744, + "grad_norm": 0.7245968580245972, + "learning_rate": 0.00011525351230884606, + "loss": 2.5016, + "step": 9080 + }, + { + "epoch": 0.7328706319102575, + "grad_norm": 0.6715837717056274, + "learning_rate": 0.00011523790995855892, + "loss": 2.5469, + "step": 9081 + }, + { + "epoch": 0.7329513356468405, + "grad_norm": 0.7143638730049133, + "learning_rate": 0.00011522230722850325, + "loss": 2.5164, + "step": 9082 + }, + { + "epoch": 0.7330320393834234, + "grad_norm": 0.6809647083282471, + "learning_rate": 0.00011520670411906787, + "loss": 2.6071, + "step": 9083 + }, + { + "epoch": 0.7331127431200064, + "grad_norm": 0.7160956859588623, + "learning_rate": 0.00011519110063064167, + "loss": 2.5346, + "step": 9084 + }, + { + "epoch": 0.7331934468565895, + "grad_norm": 0.6814724802970886, + "learning_rate": 0.00011517549676361357, + "loss": 2.5499, + "step": 9085 + }, + { + "epoch": 0.7332741505931725, + "grad_norm": 0.6914821267127991, + "learning_rate": 0.00011515989251837239, + "loss": 2.5386, + "step": 9086 + }, + { + "epoch": 0.7333548543297554, + "grad_norm": 0.7292554378509521, + "learning_rate": 0.00011514428789530705, + "loss": 2.5642, + "step": 9087 + }, + { + "epoch": 0.7334355580663384, + "grad_norm": 0.6894826292991638, + "learning_rate": 0.00011512868289480647, + "loss": 2.6131, + "step": 9088 + }, + { + "epoch": 0.7335162618029215, + "grad_norm": 0.658770740032196, + "learning_rate": 0.00011511307751725957, + "loss": 2.5594, + "step": 9089 + }, + { + "epoch": 0.7335969655395045, + "grad_norm": 0.7508681416511536, + "learning_rate": 0.0001150974717630553, + "loss": 2.595, + "step": 9090 + }, + { + "epoch": 0.7336776692760875, + "grad_norm": 0.69661545753479, + "learning_rate": 0.00011508186563258256, + "loss": 2.5803, + "step": 9091 + }, + { + "epoch": 0.7337583730126704, + "grad_norm": 0.7277412414550781, + "learning_rate": 0.00011506625912623028, + "loss": 2.5456, + "step": 9092 + }, + { + "epoch": 0.7338390767492535, + "grad_norm": 0.658329963684082, + "learning_rate": 0.00011505065224438745, + "loss": 2.5177, + "step": 9093 + }, + { + "epoch": 0.7339197804858365, + "grad_norm": 0.7277211546897888, + "learning_rate": 0.00011503504498744302, + "loss": 2.553, + "step": 9094 + }, + { + "epoch": 0.7340004842224195, + "grad_norm": 0.7240201830863953, + "learning_rate": 0.00011501943735578598, + "loss": 2.5851, + "step": 9095 + }, + { + "epoch": 0.7340811879590025, + "grad_norm": 0.6565662026405334, + "learning_rate": 0.00011500382934980529, + "loss": 2.5865, + "step": 9096 + }, + { + "epoch": 0.7341618916955855, + "grad_norm": 0.658268392086029, + "learning_rate": 0.00011498822096988995, + "loss": 2.5402, + "step": 9097 + }, + { + "epoch": 0.7342425954321685, + "grad_norm": 0.7305087447166443, + "learning_rate": 0.00011497261221642894, + "loss": 2.5483, + "step": 9098 + }, + { + "epoch": 0.7343232991687515, + "grad_norm": 0.7271504402160645, + "learning_rate": 0.00011495700308981134, + "loss": 2.5303, + "step": 9099 + }, + { + "epoch": 0.7344040029053345, + "grad_norm": 0.70429527759552, + "learning_rate": 0.0001149413935904261, + "loss": 2.5878, + "step": 9100 + }, + { + "epoch": 0.7344847066419176, + "grad_norm": 0.7168769836425781, + "learning_rate": 0.00011492578371866229, + "loss": 2.6017, + "step": 9101 + }, + { + "epoch": 0.7345654103785005, + "grad_norm": 0.7131996154785156, + "learning_rate": 0.00011491017347490891, + "loss": 2.5439, + "step": 9102 + }, + { + "epoch": 0.7346461141150835, + "grad_norm": 0.660321056842804, + "learning_rate": 0.00011489456285955504, + "loss": 2.5236, + "step": 9103 + }, + { + "epoch": 0.7347268178516665, + "grad_norm": 0.6742995977401733, + "learning_rate": 0.00011487895187298977, + "loss": 2.5375, + "step": 9104 + }, + { + "epoch": 0.7348075215882496, + "grad_norm": 0.6380610466003418, + "learning_rate": 0.00011486334051560206, + "loss": 2.5173, + "step": 9105 + }, + { + "epoch": 0.7348882253248326, + "grad_norm": 0.6948198080062866, + "learning_rate": 0.0001148477287877811, + "loss": 2.5247, + "step": 9106 + }, + { + "epoch": 0.7349689290614155, + "grad_norm": 0.7088696360588074, + "learning_rate": 0.00011483211668991591, + "loss": 2.587, + "step": 9107 + }, + { + "epoch": 0.7350496327979985, + "grad_norm": 0.6278921961784363, + "learning_rate": 0.00011481650422239556, + "loss": 2.5652, + "step": 9108 + }, + { + "epoch": 0.7351303365345816, + "grad_norm": 0.6901956796646118, + "learning_rate": 0.00011480089138560926, + "loss": 2.5964, + "step": 9109 + }, + { + "epoch": 0.7352110402711646, + "grad_norm": 0.7264819145202637, + "learning_rate": 0.00011478527817994604, + "loss": 2.5437, + "step": 9110 + }, + { + "epoch": 0.7352917440077475, + "grad_norm": 0.6940708756446838, + "learning_rate": 0.00011476966460579501, + "loss": 2.5761, + "step": 9111 + }, + { + "epoch": 0.7353724477443305, + "grad_norm": 0.689588189125061, + "learning_rate": 0.00011475405066354536, + "loss": 2.5457, + "step": 9112 + }, + { + "epoch": 0.7354531514809136, + "grad_norm": 0.6938436031341553, + "learning_rate": 0.00011473843635358618, + "loss": 2.6026, + "step": 9113 + }, + { + "epoch": 0.7355338552174966, + "grad_norm": 0.7122177481651306, + "learning_rate": 0.00011472282167630663, + "loss": 2.5701, + "step": 9114 + }, + { + "epoch": 0.7356145589540796, + "grad_norm": 0.6667213439941406, + "learning_rate": 0.00011470720663209591, + "loss": 2.5944, + "step": 9115 + }, + { + "epoch": 0.7356952626906625, + "grad_norm": 0.705910861492157, + "learning_rate": 0.00011469159122134314, + "loss": 2.6183, + "step": 9116 + }, + { + "epoch": 0.7357759664272456, + "grad_norm": 0.709937572479248, + "learning_rate": 0.00011467597544443751, + "loss": 2.5153, + "step": 9117 + }, + { + "epoch": 0.7358566701638286, + "grad_norm": 0.6870958805084229, + "learning_rate": 0.00011466035930176822, + "loss": 2.5334, + "step": 9118 + }, + { + "epoch": 0.7359373739004116, + "grad_norm": 0.7274392247200012, + "learning_rate": 0.00011464474279372443, + "loss": 2.5336, + "step": 9119 + }, + { + "epoch": 0.7360180776369946, + "grad_norm": 0.6360952258110046, + "learning_rate": 0.0001146291259206954, + "loss": 2.5604, + "step": 9120 + }, + { + "epoch": 0.7360987813735776, + "grad_norm": 0.7990559935569763, + "learning_rate": 0.00011461350868307028, + "loss": 2.624, + "step": 9121 + }, + { + "epoch": 0.7361794851101606, + "grad_norm": 0.6670079827308655, + "learning_rate": 0.00011459789108123835, + "loss": 2.5761, + "step": 9122 + }, + { + "epoch": 0.7362601888467436, + "grad_norm": 0.6994437575340271, + "learning_rate": 0.00011458227311558877, + "loss": 2.5679, + "step": 9123 + }, + { + "epoch": 0.7363408925833266, + "grad_norm": 0.7428358197212219, + "learning_rate": 0.00011456665478651087, + "loss": 2.5874, + "step": 9124 + }, + { + "epoch": 0.7364215963199097, + "grad_norm": 0.7079486846923828, + "learning_rate": 0.00011455103609439387, + "loss": 2.5999, + "step": 9125 + }, + { + "epoch": 0.7365023000564926, + "grad_norm": 0.646244466304779, + "learning_rate": 0.00011453541703962695, + "loss": 2.5053, + "step": 9126 + }, + { + "epoch": 0.7365830037930756, + "grad_norm": 0.6671318411827087, + "learning_rate": 0.0001145197976225995, + "loss": 2.5277, + "step": 9127 + }, + { + "epoch": 0.7366637075296586, + "grad_norm": 0.7060399055480957, + "learning_rate": 0.00011450417784370072, + "loss": 2.6092, + "step": 9128 + }, + { + "epoch": 0.7367444112662416, + "grad_norm": 0.741547703742981, + "learning_rate": 0.00011448855770331989, + "loss": 2.6121, + "step": 9129 + }, + { + "epoch": 0.7368251150028247, + "grad_norm": 0.710267961025238, + "learning_rate": 0.00011447293720184636, + "loss": 2.5141, + "step": 9130 + }, + { + "epoch": 0.7369058187394076, + "grad_norm": 0.6914308071136475, + "learning_rate": 0.0001144573163396694, + "loss": 2.5489, + "step": 9131 + }, + { + "epoch": 0.7369865224759906, + "grad_norm": 0.7051414847373962, + "learning_rate": 0.0001144416951171783, + "loss": 2.5925, + "step": 9132 + }, + { + "epoch": 0.7370672262125736, + "grad_norm": 0.6765387058258057, + "learning_rate": 0.00011442607353476245, + "loss": 2.5864, + "step": 9133 + }, + { + "epoch": 0.7371479299491567, + "grad_norm": 0.706672191619873, + "learning_rate": 0.00011441045159281108, + "loss": 2.4823, + "step": 9134 + }, + { + "epoch": 0.7372286336857397, + "grad_norm": 0.7534066438674927, + "learning_rate": 0.00011439482929171362, + "loss": 2.5728, + "step": 9135 + }, + { + "epoch": 0.7373093374223226, + "grad_norm": 0.6628777384757996, + "learning_rate": 0.00011437920663185939, + "loss": 2.5538, + "step": 9136 + }, + { + "epoch": 0.7373900411589056, + "grad_norm": 0.6575733423233032, + "learning_rate": 0.00011436358361363773, + "loss": 2.4802, + "step": 9137 + }, + { + "epoch": 0.7374707448954887, + "grad_norm": 0.7629329562187195, + "learning_rate": 0.00011434796023743803, + "loss": 2.6169, + "step": 9138 + }, + { + "epoch": 0.7375514486320717, + "grad_norm": 0.7148225903511047, + "learning_rate": 0.00011433233650364965, + "loss": 2.6335, + "step": 9139 + }, + { + "epoch": 0.7376321523686546, + "grad_norm": 0.705210268497467, + "learning_rate": 0.00011431671241266198, + "loss": 2.6261, + "step": 9140 + }, + { + "epoch": 0.7377128561052376, + "grad_norm": 0.7137441635131836, + "learning_rate": 0.00011430108796486441, + "loss": 2.5021, + "step": 9141 + }, + { + "epoch": 0.7377935598418207, + "grad_norm": 0.6979854702949524, + "learning_rate": 0.00011428546316064635, + "loss": 2.5436, + "step": 9142 + }, + { + "epoch": 0.7378742635784037, + "grad_norm": 0.6568784713745117, + "learning_rate": 0.00011426983800039721, + "loss": 2.5882, + "step": 9143 + }, + { + "epoch": 0.7379549673149867, + "grad_norm": 0.666606605052948, + "learning_rate": 0.00011425421248450638, + "loss": 2.5472, + "step": 9144 + }, + { + "epoch": 0.7380356710515696, + "grad_norm": 0.7240840792655945, + "learning_rate": 0.00011423858661336333, + "loss": 2.6057, + "step": 9145 + }, + { + "epoch": 0.7381163747881527, + "grad_norm": 0.7342149615287781, + "learning_rate": 0.0001142229603873575, + "loss": 2.508, + "step": 9146 + }, + { + "epoch": 0.7381970785247357, + "grad_norm": 0.7089941501617432, + "learning_rate": 0.0001142073338068783, + "loss": 2.6115, + "step": 9147 + }, + { + "epoch": 0.7382777822613187, + "grad_norm": 0.6883555054664612, + "learning_rate": 0.00011419170687231519, + "loss": 2.5254, + "step": 9148 + }, + { + "epoch": 0.7383584859979017, + "grad_norm": 0.6819528937339783, + "learning_rate": 0.00011417607958405765, + "loss": 2.5498, + "step": 9149 + }, + { + "epoch": 0.7384391897344847, + "grad_norm": 0.7348979711532593, + "learning_rate": 0.00011416045194249516, + "loss": 2.5547, + "step": 9150 + }, + { + "epoch": 0.7385198934710677, + "grad_norm": 0.6733320355415344, + "learning_rate": 0.00011414482394801719, + "loss": 2.5985, + "step": 9151 + }, + { + "epoch": 0.7386005972076507, + "grad_norm": 0.714771032333374, + "learning_rate": 0.00011412919560101327, + "loss": 2.571, + "step": 9152 + }, + { + "epoch": 0.7386813009442337, + "grad_norm": 0.7010024189949036, + "learning_rate": 0.0001141135669018728, + "loss": 2.5755, + "step": 9153 + }, + { + "epoch": 0.7387620046808168, + "grad_norm": 0.7014826536178589, + "learning_rate": 0.00011409793785098536, + "loss": 2.6033, + "step": 9154 + }, + { + "epoch": 0.7388427084173997, + "grad_norm": 0.7286051511764526, + "learning_rate": 0.0001140823084487405, + "loss": 2.515, + "step": 9155 + }, + { + "epoch": 0.7389234121539827, + "grad_norm": 0.669365406036377, + "learning_rate": 0.00011406667869552768, + "loss": 2.506, + "step": 9156 + }, + { + "epoch": 0.7390041158905657, + "grad_norm": 0.6886852979660034, + "learning_rate": 0.00011405104859173645, + "loss": 2.6123, + "step": 9157 + }, + { + "epoch": 0.7390848196271488, + "grad_norm": 0.6344162225723267, + "learning_rate": 0.00011403541813775635, + "loss": 2.5483, + "step": 9158 + }, + { + "epoch": 0.7391655233637318, + "grad_norm": 0.7043579816818237, + "learning_rate": 0.00011401978733397694, + "loss": 2.5545, + "step": 9159 + }, + { + "epoch": 0.7392462271003147, + "grad_norm": 0.7960262298583984, + "learning_rate": 0.00011400415618078781, + "loss": 2.5666, + "step": 9160 + }, + { + "epoch": 0.7393269308368977, + "grad_norm": 0.6771546006202698, + "learning_rate": 0.00011398852467857848, + "loss": 2.6016, + "step": 9161 + }, + { + "epoch": 0.7394076345734808, + "grad_norm": 0.6522069573402405, + "learning_rate": 0.00011397289282773855, + "loss": 2.5493, + "step": 9162 + }, + { + "epoch": 0.7394883383100638, + "grad_norm": 0.6804657578468323, + "learning_rate": 0.00011395726062865762, + "loss": 2.5856, + "step": 9163 + }, + { + "epoch": 0.7395690420466468, + "grad_norm": 0.7562841176986694, + "learning_rate": 0.00011394162808172526, + "loss": 2.557, + "step": 9164 + }, + { + "epoch": 0.7396497457832297, + "grad_norm": 0.6464113593101501, + "learning_rate": 0.00011392599518733107, + "loss": 2.5292, + "step": 9165 + }, + { + "epoch": 0.7397304495198128, + "grad_norm": 0.7469549775123596, + "learning_rate": 0.00011391036194586466, + "loss": 2.6168, + "step": 9166 + }, + { + "epoch": 0.7398111532563958, + "grad_norm": 0.7095946669578552, + "learning_rate": 0.00011389472835771572, + "loss": 2.5468, + "step": 9167 + }, + { + "epoch": 0.7398918569929788, + "grad_norm": 0.7376375794410706, + "learning_rate": 0.00011387909442327382, + "loss": 2.5576, + "step": 9168 + }, + { + "epoch": 0.7399725607295617, + "grad_norm": 0.736727774143219, + "learning_rate": 0.00011386346014292859, + "loss": 2.6034, + "step": 9169 + }, + { + "epoch": 0.7400532644661448, + "grad_norm": 0.7026904821395874, + "learning_rate": 0.00011384782551706967, + "loss": 2.5848, + "step": 9170 + }, + { + "epoch": 0.7401339682027278, + "grad_norm": 0.6894888877868652, + "learning_rate": 0.00011383219054608678, + "loss": 2.5475, + "step": 9171 + }, + { + "epoch": 0.7402146719393108, + "grad_norm": 0.6754137277603149, + "learning_rate": 0.00011381655523036954, + "loss": 2.5124, + "step": 9172 + }, + { + "epoch": 0.7402953756758938, + "grad_norm": 0.7935643196105957, + "learning_rate": 0.00011380091957030762, + "loss": 2.5898, + "step": 9173 + }, + { + "epoch": 0.7403760794124769, + "grad_norm": 0.7017118334770203, + "learning_rate": 0.0001137852835662907, + "loss": 2.6139, + "step": 9174 + }, + { + "epoch": 0.7404567831490598, + "grad_norm": 0.7246189117431641, + "learning_rate": 0.00011376964721870847, + "loss": 2.4627, + "step": 9175 + }, + { + "epoch": 0.7405374868856428, + "grad_norm": 0.6835598349571228, + "learning_rate": 0.00011375401052795064, + "loss": 2.5707, + "step": 9176 + }, + { + "epoch": 0.7406181906222258, + "grad_norm": 0.6439787745475769, + "learning_rate": 0.00011373837349440693, + "loss": 2.5161, + "step": 9177 + }, + { + "epoch": 0.7406988943588089, + "grad_norm": 0.7249091267585754, + "learning_rate": 0.00011372273611846704, + "loss": 2.5054, + "step": 9178 + }, + { + "epoch": 0.7407795980953918, + "grad_norm": 0.7653267979621887, + "learning_rate": 0.0001137070984005207, + "loss": 2.6016, + "step": 9179 + }, + { + "epoch": 0.7408603018319748, + "grad_norm": 0.7195165157318115, + "learning_rate": 0.0001136914603409576, + "loss": 2.5931, + "step": 9180 + }, + { + "epoch": 0.7409410055685578, + "grad_norm": 0.7093746662139893, + "learning_rate": 0.00011367582194016756, + "loss": 2.5567, + "step": 9181 + }, + { + "epoch": 0.7410217093051408, + "grad_norm": 0.6868107318878174, + "learning_rate": 0.00011366018319854026, + "loss": 2.5769, + "step": 9182 + }, + { + "epoch": 0.7411024130417239, + "grad_norm": 0.6870261430740356, + "learning_rate": 0.00011364454411646552, + "loss": 2.5418, + "step": 9183 + }, + { + "epoch": 0.7411831167783068, + "grad_norm": 0.7034662365913391, + "learning_rate": 0.00011362890469433306, + "loss": 2.5798, + "step": 9184 + }, + { + "epoch": 0.7412638205148898, + "grad_norm": 0.7200794816017151, + "learning_rate": 0.00011361326493253264, + "loss": 2.5523, + "step": 9185 + }, + { + "epoch": 0.7413445242514728, + "grad_norm": 0.7034540772438049, + "learning_rate": 0.0001135976248314541, + "loss": 2.5107, + "step": 9186 + }, + { + "epoch": 0.7414252279880559, + "grad_norm": 0.7155053019523621, + "learning_rate": 0.00011358198439148721, + "loss": 2.5804, + "step": 9187 + }, + { + "epoch": 0.7415059317246389, + "grad_norm": 0.6965398788452148, + "learning_rate": 0.00011356634361302175, + "loss": 2.5532, + "step": 9188 + }, + { + "epoch": 0.7415866354612218, + "grad_norm": 0.65416419506073, + "learning_rate": 0.00011355070249644755, + "loss": 2.5411, + "step": 9189 + }, + { + "epoch": 0.7416673391978048, + "grad_norm": 0.6798486709594727, + "learning_rate": 0.0001135350610421544, + "loss": 2.4957, + "step": 9190 + }, + { + "epoch": 0.7417480429343879, + "grad_norm": 0.6839874386787415, + "learning_rate": 0.00011351941925053218, + "loss": 2.5745, + "step": 9191 + }, + { + "epoch": 0.7418287466709709, + "grad_norm": 0.7374398708343506, + "learning_rate": 0.00011350377712197068, + "loss": 2.4923, + "step": 9192 + }, + { + "epoch": 0.7419094504075538, + "grad_norm": 0.7517396807670593, + "learning_rate": 0.00011348813465685974, + "loss": 2.538, + "step": 9193 + }, + { + "epoch": 0.7419901541441368, + "grad_norm": 0.6670863628387451, + "learning_rate": 0.00011347249185558926, + "loss": 2.5442, + "step": 9194 + }, + { + "epoch": 0.7420708578807199, + "grad_norm": 0.6508080363273621, + "learning_rate": 0.00011345684871854905, + "loss": 2.6665, + "step": 9195 + }, + { + "epoch": 0.7421515616173029, + "grad_norm": 0.6935258507728577, + "learning_rate": 0.00011344120524612898, + "loss": 2.5388, + "step": 9196 + }, + { + "epoch": 0.7422322653538859, + "grad_norm": 0.696067750453949, + "learning_rate": 0.00011342556143871897, + "loss": 2.574, + "step": 9197 + }, + { + "epoch": 0.7423129690904688, + "grad_norm": 0.7486966252326965, + "learning_rate": 0.00011340991729670882, + "loss": 2.5924, + "step": 9198 + }, + { + "epoch": 0.7423936728270519, + "grad_norm": 0.676407516002655, + "learning_rate": 0.00011339427282048854, + "loss": 2.5907, + "step": 9199 + }, + { + "epoch": 0.7424743765636349, + "grad_norm": 0.7241318225860596, + "learning_rate": 0.00011337862801044792, + "loss": 2.5685, + "step": 9200 + }, + { + "epoch": 0.7425550803002179, + "grad_norm": 0.7012883424758911, + "learning_rate": 0.00011336298286697692, + "loss": 2.56, + "step": 9201 + }, + { + "epoch": 0.7426357840368009, + "grad_norm": 0.7313060164451599, + "learning_rate": 0.0001133473373904655, + "loss": 2.632, + "step": 9202 + }, + { + "epoch": 0.742716487773384, + "grad_norm": 0.6829206943511963, + "learning_rate": 0.00011333169158130353, + "loss": 2.5006, + "step": 9203 + }, + { + "epoch": 0.7427971915099669, + "grad_norm": 0.7324578166007996, + "learning_rate": 0.00011331604543988093, + "loss": 2.5004, + "step": 9204 + }, + { + "epoch": 0.7428778952465499, + "grad_norm": 0.6761097311973572, + "learning_rate": 0.00011330039896658766, + "loss": 2.5516, + "step": 9205 + }, + { + "epoch": 0.7429585989831329, + "grad_norm": 0.6909754276275635, + "learning_rate": 0.00011328475216181369, + "loss": 2.5273, + "step": 9206 + }, + { + "epoch": 0.743039302719716, + "grad_norm": 0.6420674324035645, + "learning_rate": 0.00011326910502594899, + "loss": 2.5507, + "step": 9207 + }, + { + "epoch": 0.7431200064562989, + "grad_norm": 0.6442455053329468, + "learning_rate": 0.0001132534575593835, + "loss": 2.542, + "step": 9208 + }, + { + "epoch": 0.7432007101928819, + "grad_norm": 0.7053101658821106, + "learning_rate": 0.0001132378097625072, + "loss": 2.5116, + "step": 9209 + }, + { + "epoch": 0.7432814139294649, + "grad_norm": 0.7570765614509583, + "learning_rate": 0.00011322216163571007, + "loss": 2.5576, + "step": 9210 + }, + { + "epoch": 0.743362117666048, + "grad_norm": 0.6937675476074219, + "learning_rate": 0.00011320651317938214, + "loss": 2.6212, + "step": 9211 + }, + { + "epoch": 0.743442821402631, + "grad_norm": 0.6741313934326172, + "learning_rate": 0.00011319086439391333, + "loss": 2.5723, + "step": 9212 + }, + { + "epoch": 0.7435235251392139, + "grad_norm": 0.711358904838562, + "learning_rate": 0.00011317521527969374, + "loss": 2.5713, + "step": 9213 + }, + { + "epoch": 0.7436042288757969, + "grad_norm": 0.7443268895149231, + "learning_rate": 0.00011315956583711331, + "loss": 2.5301, + "step": 9214 + }, + { + "epoch": 0.74368493261238, + "grad_norm": 0.7001742720603943, + "learning_rate": 0.00011314391606656212, + "loss": 2.5545, + "step": 9215 + }, + { + "epoch": 0.743765636348963, + "grad_norm": 0.7294990420341492, + "learning_rate": 0.00011312826596843019, + "loss": 2.5897, + "step": 9216 + }, + { + "epoch": 0.743846340085546, + "grad_norm": 0.706924319267273, + "learning_rate": 0.00011311261554310753, + "loss": 2.6477, + "step": 9217 + }, + { + "epoch": 0.7439270438221289, + "grad_norm": 0.7065039277076721, + "learning_rate": 0.00011309696479098423, + "loss": 2.5326, + "step": 9218 + }, + { + "epoch": 0.744007747558712, + "grad_norm": 0.6502599716186523, + "learning_rate": 0.00011308131371245037, + "loss": 2.5833, + "step": 9219 + }, + { + "epoch": 0.744088451295295, + "grad_norm": 0.7135158181190491, + "learning_rate": 0.00011306566230789592, + "loss": 2.5686, + "step": 9220 + }, + { + "epoch": 0.744169155031878, + "grad_norm": 0.7239195108413696, + "learning_rate": 0.00011305001057771101, + "loss": 2.6303, + "step": 9221 + }, + { + "epoch": 0.744249858768461, + "grad_norm": 0.6442604660987854, + "learning_rate": 0.00011303435852228574, + "loss": 2.5495, + "step": 9222 + }, + { + "epoch": 0.744330562505044, + "grad_norm": 0.6700316071510315, + "learning_rate": 0.0001130187061420102, + "loss": 2.5575, + "step": 9223 + }, + { + "epoch": 0.744411266241627, + "grad_norm": 0.7532816529273987, + "learning_rate": 0.00011300305343727446, + "loss": 2.5174, + "step": 9224 + }, + { + "epoch": 0.74449196997821, + "grad_norm": 0.7614738941192627, + "learning_rate": 0.00011298740040846862, + "loss": 2.5995, + "step": 9225 + }, + { + "epoch": 0.744572673714793, + "grad_norm": 0.6781208515167236, + "learning_rate": 0.00011297174705598283, + "loss": 2.5225, + "step": 9226 + }, + { + "epoch": 0.744653377451376, + "grad_norm": 0.680525541305542, + "learning_rate": 0.0001129560933802072, + "loss": 2.5844, + "step": 9227 + }, + { + "epoch": 0.744734081187959, + "grad_norm": 0.7196657657623291, + "learning_rate": 0.00011294043938153185, + "loss": 2.564, + "step": 9228 + }, + { + "epoch": 0.744814784924542, + "grad_norm": 0.6997412443161011, + "learning_rate": 0.00011292478506034694, + "loss": 2.6486, + "step": 9229 + }, + { + "epoch": 0.744895488661125, + "grad_norm": 0.7438939809799194, + "learning_rate": 0.00011290913041704256, + "loss": 2.5667, + "step": 9230 + }, + { + "epoch": 0.744976192397708, + "grad_norm": 0.7391374707221985, + "learning_rate": 0.00011289347545200892, + "loss": 2.5974, + "step": 9231 + }, + { + "epoch": 0.745056896134291, + "grad_norm": 0.7845481634140015, + "learning_rate": 0.0001128778201656362, + "loss": 2.5168, + "step": 9232 + }, + { + "epoch": 0.745137599870874, + "grad_norm": 0.728712797164917, + "learning_rate": 0.00011286216455831449, + "loss": 2.5241, + "step": 9233 + }, + { + "epoch": 0.745218303607457, + "grad_norm": 0.7310191988945007, + "learning_rate": 0.00011284650863043407, + "loss": 2.5777, + "step": 9234 + }, + { + "epoch": 0.74529900734404, + "grad_norm": 0.6661474704742432, + "learning_rate": 0.00011283085238238503, + "loss": 2.5471, + "step": 9235 + }, + { + "epoch": 0.7453797110806231, + "grad_norm": 0.7697983384132385, + "learning_rate": 0.00011281519581455761, + "loss": 2.587, + "step": 9236 + }, + { + "epoch": 0.745460414817206, + "grad_norm": 0.7336567640304565, + "learning_rate": 0.00011279953892734203, + "loss": 2.5756, + "step": 9237 + }, + { + "epoch": 0.745541118553789, + "grad_norm": 0.6192059516906738, + "learning_rate": 0.00011278388172112848, + "loss": 2.5038, + "step": 9238 + }, + { + "epoch": 0.745621822290372, + "grad_norm": 0.7180300354957581, + "learning_rate": 0.00011276822419630719, + "loss": 2.5469, + "step": 9239 + }, + { + "epoch": 0.7457025260269551, + "grad_norm": 0.7583367824554443, + "learning_rate": 0.00011275256635326837, + "loss": 2.6274, + "step": 9240 + }, + { + "epoch": 0.7457832297635381, + "grad_norm": 0.6848096251487732, + "learning_rate": 0.00011273690819240221, + "loss": 2.5117, + "step": 9241 + }, + { + "epoch": 0.745863933500121, + "grad_norm": 0.6830503344535828, + "learning_rate": 0.00011272124971409907, + "loss": 2.5114, + "step": 9242 + }, + { + "epoch": 0.745944637236704, + "grad_norm": 0.780240535736084, + "learning_rate": 0.0001127055909187491, + "loss": 2.6432, + "step": 9243 + }, + { + "epoch": 0.7460253409732871, + "grad_norm": 0.7421274185180664, + "learning_rate": 0.00011268993180674261, + "loss": 2.5723, + "step": 9244 + }, + { + "epoch": 0.7461060447098701, + "grad_norm": 0.6695685386657715, + "learning_rate": 0.00011267427237846986, + "loss": 2.5335, + "step": 9245 + }, + { + "epoch": 0.746186748446453, + "grad_norm": 0.8390316963195801, + "learning_rate": 0.00011265861263432104, + "loss": 2.5125, + "step": 9246 + }, + { + "epoch": 0.746267452183036, + "grad_norm": 0.7030535936355591, + "learning_rate": 0.00011264295257468658, + "loss": 2.5986, + "step": 9247 + }, + { + "epoch": 0.7463481559196191, + "grad_norm": 0.6754253506660461, + "learning_rate": 0.00011262729219995669, + "loss": 2.5067, + "step": 9248 + }, + { + "epoch": 0.7464288596562021, + "grad_norm": 0.6809592843055725, + "learning_rate": 0.00011261163151052163, + "loss": 2.5359, + "step": 9249 + }, + { + "epoch": 0.7465095633927851, + "grad_norm": 0.6546878218650818, + "learning_rate": 0.00011259597050677178, + "loss": 2.5357, + "step": 9250 + }, + { + "epoch": 0.746590267129368, + "grad_norm": 0.6514731645584106, + "learning_rate": 0.00011258030918909739, + "loss": 2.5591, + "step": 9251 + }, + { + "epoch": 0.7466709708659511, + "grad_norm": 0.6981258392333984, + "learning_rate": 0.0001125646475578888, + "loss": 2.6171, + "step": 9252 + }, + { + "epoch": 0.7467516746025341, + "grad_norm": 0.6763784885406494, + "learning_rate": 0.00011254898561353639, + "loss": 2.5455, + "step": 9253 + }, + { + "epoch": 0.7468323783391171, + "grad_norm": 0.6241726279258728, + "learning_rate": 0.00011253332335643043, + "loss": 2.6073, + "step": 9254 + }, + { + "epoch": 0.7469130820757001, + "grad_norm": 0.6810312271118164, + "learning_rate": 0.00011251766078696132, + "loss": 2.5285, + "step": 9255 + }, + { + "epoch": 0.7469937858122832, + "grad_norm": 0.6603971123695374, + "learning_rate": 0.00011250199790551934, + "loss": 2.5985, + "step": 9256 + }, + { + "epoch": 0.7470744895488661, + "grad_norm": 0.69618159532547, + "learning_rate": 0.0001124863347124949, + "loss": 2.5728, + "step": 9257 + }, + { + "epoch": 0.7471551932854491, + "grad_norm": 0.6878889203071594, + "learning_rate": 0.00011247067120827837, + "loss": 2.5459, + "step": 9258 + }, + { + "epoch": 0.7472358970220321, + "grad_norm": 0.6613149046897888, + "learning_rate": 0.00011245500739326011, + "loss": 2.6559, + "step": 9259 + }, + { + "epoch": 0.7473166007586152, + "grad_norm": 0.6397448778152466, + "learning_rate": 0.00011243934326783053, + "loss": 2.5712, + "step": 9260 + }, + { + "epoch": 0.7473973044951981, + "grad_norm": 0.6804259419441223, + "learning_rate": 0.00011242367883237996, + "loss": 2.6143, + "step": 9261 + }, + { + "epoch": 0.7474780082317811, + "grad_norm": 0.8029066324234009, + "learning_rate": 0.00011240801408729884, + "loss": 2.5702, + "step": 9262 + }, + { + "epoch": 0.7475587119683641, + "grad_norm": 0.7086285948753357, + "learning_rate": 0.00011239234903297761, + "loss": 2.6113, + "step": 9263 + }, + { + "epoch": 0.7476394157049472, + "grad_norm": 0.6980452537536621, + "learning_rate": 0.00011237668366980665, + "loss": 2.6355, + "step": 9264 + }, + { + "epoch": 0.7477201194415302, + "grad_norm": 0.6906906962394714, + "learning_rate": 0.00011236101799817636, + "loss": 2.5605, + "step": 9265 + }, + { + "epoch": 0.7478008231781131, + "grad_norm": 0.7412894368171692, + "learning_rate": 0.00011234535201847716, + "loss": 2.6073, + "step": 9266 + }, + { + "epoch": 0.7478815269146961, + "grad_norm": 0.6949330568313599, + "learning_rate": 0.00011232968573109955, + "loss": 2.5623, + "step": 9267 + }, + { + "epoch": 0.7479622306512792, + "grad_norm": 0.6916515827178955, + "learning_rate": 0.00011231401913643393, + "loss": 2.5348, + "step": 9268 + }, + { + "epoch": 0.7480429343878622, + "grad_norm": 0.7576180696487427, + "learning_rate": 0.0001122983522348708, + "loss": 2.5968, + "step": 9269 + }, + { + "epoch": 0.7481236381244452, + "grad_norm": 0.6734197735786438, + "learning_rate": 0.00011228268502680052, + "loss": 2.5185, + "step": 9270 + }, + { + "epoch": 0.7482043418610281, + "grad_norm": 0.6952544450759888, + "learning_rate": 0.00011226701751261367, + "loss": 2.57, + "step": 9271 + }, + { + "epoch": 0.7482850455976112, + "grad_norm": 0.6504654884338379, + "learning_rate": 0.00011225134969270068, + "loss": 2.5677, + "step": 9272 + }, + { + "epoch": 0.7483657493341942, + "grad_norm": 0.6843643188476562, + "learning_rate": 0.00011223568156745198, + "loss": 2.5686, + "step": 9273 + }, + { + "epoch": 0.7484464530707772, + "grad_norm": 0.6786371469497681, + "learning_rate": 0.00011222001313725816, + "loss": 2.5024, + "step": 9274 + }, + { + "epoch": 0.7485271568073602, + "grad_norm": 0.6431117057800293, + "learning_rate": 0.00011220434440250967, + "loss": 2.5206, + "step": 9275 + }, + { + "epoch": 0.7486078605439432, + "grad_norm": 0.699547290802002, + "learning_rate": 0.000112188675363597, + "loss": 2.5974, + "step": 9276 + }, + { + "epoch": 0.7486885642805262, + "grad_norm": 0.6870436072349548, + "learning_rate": 0.00011217300602091067, + "loss": 2.5303, + "step": 9277 + }, + { + "epoch": 0.7487692680171092, + "grad_norm": 0.7032173871994019, + "learning_rate": 0.0001121573363748412, + "loss": 2.5045, + "step": 9278 + }, + { + "epoch": 0.7488499717536922, + "grad_norm": 0.6890417337417603, + "learning_rate": 0.00011214166642577917, + "loss": 2.5945, + "step": 9279 + }, + { + "epoch": 0.7489306754902753, + "grad_norm": 0.7257806062698364, + "learning_rate": 0.00011212599617411506, + "loss": 2.6013, + "step": 9280 + }, + { + "epoch": 0.7490113792268582, + "grad_norm": 0.722561240196228, + "learning_rate": 0.0001121103256202394, + "loss": 2.5809, + "step": 9281 + }, + { + "epoch": 0.7490920829634412, + "grad_norm": 0.7360994219779968, + "learning_rate": 0.00011209465476454277, + "loss": 2.5036, + "step": 9282 + }, + { + "epoch": 0.7491727867000242, + "grad_norm": 0.6561676263809204, + "learning_rate": 0.00011207898360741574, + "loss": 2.5302, + "step": 9283 + }, + { + "epoch": 0.7492534904366072, + "grad_norm": 0.7454147338867188, + "learning_rate": 0.00011206331214924887, + "loss": 2.5511, + "step": 9284 + }, + { + "epoch": 0.7493341941731902, + "grad_norm": 0.7085482478141785, + "learning_rate": 0.00011204764039043275, + "loss": 2.5743, + "step": 9285 + }, + { + "epoch": 0.7494148979097732, + "grad_norm": 0.691872775554657, + "learning_rate": 0.0001120319683313579, + "loss": 2.5414, + "step": 9286 + }, + { + "epoch": 0.7494956016463562, + "grad_norm": 0.6661050915718079, + "learning_rate": 0.00011201629597241496, + "loss": 2.5418, + "step": 9287 + }, + { + "epoch": 0.7495763053829392, + "grad_norm": 0.7440990805625916, + "learning_rate": 0.00011200062331399452, + "loss": 2.5543, + "step": 9288 + }, + { + "epoch": 0.7496570091195223, + "grad_norm": 0.6655303835868835, + "learning_rate": 0.00011198495035648715, + "loss": 2.5629, + "step": 9289 + }, + { + "epoch": 0.7497377128561052, + "grad_norm": 0.7550996541976929, + "learning_rate": 0.00011196927710028353, + "loss": 2.5376, + "step": 9290 + }, + { + "epoch": 0.7498184165926882, + "grad_norm": 0.692915678024292, + "learning_rate": 0.00011195360354577422, + "loss": 2.4661, + "step": 9291 + }, + { + "epoch": 0.7498991203292712, + "grad_norm": 0.7572253346443176, + "learning_rate": 0.00011193792969334985, + "loss": 2.5641, + "step": 9292 + }, + { + "epoch": 0.7499798240658543, + "grad_norm": 0.6550531387329102, + "learning_rate": 0.00011192225554340107, + "loss": 2.5591, + "step": 9293 + }, + { + "epoch": 0.7500605278024373, + "grad_norm": 0.677130401134491, + "learning_rate": 0.0001119065810963185, + "loss": 2.5859, + "step": 9294 + }, + { + "epoch": 0.7501412315390202, + "grad_norm": 0.680673360824585, + "learning_rate": 0.00011189090635249287, + "loss": 2.5343, + "step": 9295 + }, + { + "epoch": 0.7502219352756032, + "grad_norm": 0.7574957609176636, + "learning_rate": 0.00011187523131231472, + "loss": 2.5966, + "step": 9296 + }, + { + "epoch": 0.7503026390121863, + "grad_norm": 0.7099971175193787, + "learning_rate": 0.00011185955597617474, + "loss": 2.5547, + "step": 9297 + }, + { + "epoch": 0.7503833427487693, + "grad_norm": 0.7153162956237793, + "learning_rate": 0.00011184388034446367, + "loss": 2.5986, + "step": 9298 + }, + { + "epoch": 0.7504640464853523, + "grad_norm": 0.7154852747917175, + "learning_rate": 0.00011182820441757212, + "loss": 2.5214, + "step": 9299 + }, + { + "epoch": 0.7505447502219352, + "grad_norm": 0.6899208426475525, + "learning_rate": 0.00011181252819589081, + "loss": 2.5026, + "step": 9300 + }, + { + "epoch": 0.7506254539585183, + "grad_norm": 0.6719048023223877, + "learning_rate": 0.00011179685167981041, + "loss": 2.5915, + "step": 9301 + }, + { + "epoch": 0.7507061576951013, + "grad_norm": 0.6664413213729858, + "learning_rate": 0.00011178117486972164, + "loss": 2.5479, + "step": 9302 + }, + { + "epoch": 0.7507868614316843, + "grad_norm": 0.7433286905288696, + "learning_rate": 0.00011176549776601517, + "loss": 2.5941, + "step": 9303 + }, + { + "epoch": 0.7508675651682672, + "grad_norm": 0.7868518233299255, + "learning_rate": 0.00011174982036908177, + "loss": 2.5537, + "step": 9304 + }, + { + "epoch": 0.7509482689048503, + "grad_norm": 0.7037336826324463, + "learning_rate": 0.0001117341426793121, + "loss": 2.568, + "step": 9305 + }, + { + "epoch": 0.7510289726414333, + "grad_norm": 0.6630405783653259, + "learning_rate": 0.00011171846469709697, + "loss": 2.4906, + "step": 9306 + }, + { + "epoch": 0.7511096763780163, + "grad_norm": 0.7398669719696045, + "learning_rate": 0.00011170278642282701, + "loss": 2.574, + "step": 9307 + }, + { + "epoch": 0.7511903801145993, + "grad_norm": 0.7557641267776489, + "learning_rate": 0.00011168710785689304, + "loss": 2.5237, + "step": 9308 + }, + { + "epoch": 0.7512710838511824, + "grad_norm": 0.6883708238601685, + "learning_rate": 0.00011167142899968581, + "loss": 2.5643, + "step": 9309 + }, + { + "epoch": 0.7513517875877653, + "grad_norm": 0.6623669862747192, + "learning_rate": 0.00011165574985159606, + "loss": 2.5319, + "step": 9310 + }, + { + "epoch": 0.7514324913243483, + "grad_norm": 0.6938778758049011, + "learning_rate": 0.00011164007041301454, + "loss": 2.5083, + "step": 9311 + }, + { + "epoch": 0.7515131950609313, + "grad_norm": 0.718534529209137, + "learning_rate": 0.00011162439068433204, + "loss": 2.4791, + "step": 9312 + }, + { + "epoch": 0.7515938987975144, + "grad_norm": 0.672113299369812, + "learning_rate": 0.00011160871066593934, + "loss": 2.5264, + "step": 9313 + }, + { + "epoch": 0.7516746025340973, + "grad_norm": 0.6854343414306641, + "learning_rate": 0.00011159303035822723, + "loss": 2.5734, + "step": 9314 + }, + { + "epoch": 0.7517553062706803, + "grad_norm": 0.6494589447975159, + "learning_rate": 0.0001115773497615865, + "loss": 2.5564, + "step": 9315 + }, + { + "epoch": 0.7518360100072633, + "grad_norm": 0.7219608426094055, + "learning_rate": 0.00011156166887640793, + "loss": 2.6049, + "step": 9316 + }, + { + "epoch": 0.7519167137438464, + "grad_norm": 0.6892502903938293, + "learning_rate": 0.00011154598770308236, + "loss": 2.5333, + "step": 9317 + }, + { + "epoch": 0.7519974174804294, + "grad_norm": 0.6670175790786743, + "learning_rate": 0.0001115303062420006, + "loss": 2.5882, + "step": 9318 + }, + { + "epoch": 0.7520781212170123, + "grad_norm": 0.7367776036262512, + "learning_rate": 0.00011151462449355347, + "loss": 2.5634, + "step": 9319 + }, + { + "epoch": 0.7521588249535953, + "grad_norm": 0.6971952319145203, + "learning_rate": 0.00011149894245813182, + "loss": 2.5323, + "step": 9320 + }, + { + "epoch": 0.7522395286901784, + "grad_norm": 0.6555755734443665, + "learning_rate": 0.00011148326013612642, + "loss": 2.5597, + "step": 9321 + }, + { + "epoch": 0.7523202324267614, + "grad_norm": 0.7004384994506836, + "learning_rate": 0.00011146757752792819, + "loss": 2.4761, + "step": 9322 + }, + { + "epoch": 0.7524009361633444, + "grad_norm": 0.7151978015899658, + "learning_rate": 0.00011145189463392791, + "loss": 2.5825, + "step": 9323 + }, + { + "epoch": 0.7524816398999273, + "grad_norm": 0.7176918387413025, + "learning_rate": 0.00011143621145451653, + "loss": 2.6112, + "step": 9324 + }, + { + "epoch": 0.7525623436365104, + "grad_norm": 0.7156146168708801, + "learning_rate": 0.00011142052799008487, + "loss": 2.5293, + "step": 9325 + }, + { + "epoch": 0.7526430473730934, + "grad_norm": 0.7360113263130188, + "learning_rate": 0.00011140484424102375, + "loss": 2.5703, + "step": 9326 + }, + { + "epoch": 0.7527237511096764, + "grad_norm": 0.65630042552948, + "learning_rate": 0.00011138916020772414, + "loss": 2.5224, + "step": 9327 + }, + { + "epoch": 0.7528044548462594, + "grad_norm": 0.7088161110877991, + "learning_rate": 0.00011137347589057687, + "loss": 2.6673, + "step": 9328 + }, + { + "epoch": 0.7528851585828424, + "grad_norm": 0.7335243821144104, + "learning_rate": 0.00011135779128997283, + "loss": 2.5693, + "step": 9329 + }, + { + "epoch": 0.7529658623194254, + "grad_norm": 0.7166211605072021, + "learning_rate": 0.00011134210640630298, + "loss": 2.5612, + "step": 9330 + }, + { + "epoch": 0.7530465660560084, + "grad_norm": 0.7324960231781006, + "learning_rate": 0.00011132642123995816, + "loss": 2.5682, + "step": 9331 + }, + { + "epoch": 0.7531272697925914, + "grad_norm": 0.7133917808532715, + "learning_rate": 0.00011131073579132936, + "loss": 2.6131, + "step": 9332 + }, + { + "epoch": 0.7532079735291743, + "grad_norm": 0.678741455078125, + "learning_rate": 0.0001112950500608074, + "loss": 2.6109, + "step": 9333 + }, + { + "epoch": 0.7532886772657574, + "grad_norm": 0.7000784277915955, + "learning_rate": 0.0001112793640487833, + "loss": 2.5087, + "step": 9334 + }, + { + "epoch": 0.7533693810023404, + "grad_norm": 0.719976544380188, + "learning_rate": 0.00011126367775564795, + "loss": 2.4665, + "step": 9335 + }, + { + "epoch": 0.7534500847389234, + "grad_norm": 0.7127155065536499, + "learning_rate": 0.00011124799118179232, + "loss": 2.5254, + "step": 9336 + }, + { + "epoch": 0.7535307884755064, + "grad_norm": 0.6306474804878235, + "learning_rate": 0.00011123230432760734, + "loss": 2.5487, + "step": 9337 + }, + { + "epoch": 0.7536114922120895, + "grad_norm": 0.667019784450531, + "learning_rate": 0.00011121661719348397, + "loss": 2.5576, + "step": 9338 + }, + { + "epoch": 0.7536921959486724, + "grad_norm": 0.6869673132896423, + "learning_rate": 0.00011120092977981318, + "loss": 2.544, + "step": 9339 + }, + { + "epoch": 0.7537728996852554, + "grad_norm": 0.6688670516014099, + "learning_rate": 0.00011118524208698596, + "loss": 2.6017, + "step": 9340 + }, + { + "epoch": 0.7538536034218384, + "grad_norm": 0.6717860102653503, + "learning_rate": 0.00011116955411539325, + "loss": 2.5571, + "step": 9341 + }, + { + "epoch": 0.7539343071584215, + "grad_norm": 0.7113999724388123, + "learning_rate": 0.00011115386586542604, + "loss": 2.5684, + "step": 9342 + }, + { + "epoch": 0.7540150108950044, + "grad_norm": 0.6687907576560974, + "learning_rate": 0.00011113817733747536, + "loss": 2.548, + "step": 9343 + }, + { + "epoch": 0.7540957146315874, + "grad_norm": 0.6828920841217041, + "learning_rate": 0.00011112248853193219, + "loss": 2.5544, + "step": 9344 + }, + { + "epoch": 0.7541764183681704, + "grad_norm": 0.6793262362480164, + "learning_rate": 0.00011110679944918749, + "loss": 2.4655, + "step": 9345 + }, + { + "epoch": 0.7542571221047535, + "grad_norm": 0.6812230348587036, + "learning_rate": 0.00011109111008963235, + "loss": 2.5473, + "step": 9346 + }, + { + "epoch": 0.7543378258413365, + "grad_norm": 0.6838300824165344, + "learning_rate": 0.00011107542045365775, + "loss": 2.5248, + "step": 9347 + }, + { + "epoch": 0.7544185295779194, + "grad_norm": 0.7101932764053345, + "learning_rate": 0.0001110597305416547, + "loss": 2.5235, + "step": 9348 + }, + { + "epoch": 0.7544992333145024, + "grad_norm": 0.7136144042015076, + "learning_rate": 0.0001110440403540143, + "loss": 2.5592, + "step": 9349 + }, + { + "epoch": 0.7545799370510855, + "grad_norm": 0.6673154234886169, + "learning_rate": 0.00011102834989112751, + "loss": 2.4962, + "step": 9350 + }, + { + "epoch": 0.7546606407876685, + "grad_norm": 0.6849049925804138, + "learning_rate": 0.00011101265915338544, + "loss": 2.5793, + "step": 9351 + }, + { + "epoch": 0.7547413445242515, + "grad_norm": 0.7239733338356018, + "learning_rate": 0.0001109969681411791, + "loss": 2.5556, + "step": 9352 + }, + { + "epoch": 0.7548220482608344, + "grad_norm": 0.6738215684890747, + "learning_rate": 0.00011098127685489955, + "loss": 2.6181, + "step": 9353 + }, + { + "epoch": 0.7549027519974175, + "grad_norm": 0.6212114095687866, + "learning_rate": 0.00011096558529493787, + "loss": 2.5509, + "step": 9354 + }, + { + "epoch": 0.7549834557340005, + "grad_norm": 0.6801952123641968, + "learning_rate": 0.00011094989346168517, + "loss": 2.6454, + "step": 9355 + }, + { + "epoch": 0.7550641594705835, + "grad_norm": 0.6605944037437439, + "learning_rate": 0.0001109342013555325, + "loss": 2.5218, + "step": 9356 + }, + { + "epoch": 0.7551448632071665, + "grad_norm": 0.6486438512802124, + "learning_rate": 0.00011091850897687096, + "loss": 2.5431, + "step": 9357 + }, + { + "epoch": 0.7552255669437495, + "grad_norm": 0.6701794266700745, + "learning_rate": 0.0001109028163260916, + "loss": 2.563, + "step": 9358 + }, + { + "epoch": 0.7553062706803325, + "grad_norm": 0.6486446261405945, + "learning_rate": 0.00011088712340358555, + "loss": 2.5147, + "step": 9359 + }, + { + "epoch": 0.7553869744169155, + "grad_norm": 0.695197582244873, + "learning_rate": 0.00011087143020974396, + "loss": 2.5707, + "step": 9360 + }, + { + "epoch": 0.7554676781534985, + "grad_norm": 0.6910821199417114, + "learning_rate": 0.00011085573674495791, + "loss": 2.5797, + "step": 9361 + }, + { + "epoch": 0.7555483818900816, + "grad_norm": 0.7084208726882935, + "learning_rate": 0.00011084004300961852, + "loss": 2.5362, + "step": 9362 + }, + { + "epoch": 0.7556290856266645, + "grad_norm": 0.6750916242599487, + "learning_rate": 0.00011082434900411691, + "loss": 2.5554, + "step": 9363 + }, + { + "epoch": 0.7557097893632475, + "grad_norm": 0.6711466908454895, + "learning_rate": 0.0001108086547288442, + "loss": 2.5577, + "step": 9364 + }, + { + "epoch": 0.7557904930998305, + "grad_norm": 0.7267118096351624, + "learning_rate": 0.00011079296018419163, + "loss": 2.5422, + "step": 9365 + }, + { + "epoch": 0.7558711968364136, + "grad_norm": 0.692730188369751, + "learning_rate": 0.00011077726537055021, + "loss": 2.5281, + "step": 9366 + }, + { + "epoch": 0.7559519005729965, + "grad_norm": 0.7071926593780518, + "learning_rate": 0.00011076157028831122, + "loss": 2.5273, + "step": 9367 + }, + { + "epoch": 0.7560326043095795, + "grad_norm": 0.7662521600723267, + "learning_rate": 0.00011074587493786574, + "loss": 2.5433, + "step": 9368 + }, + { + "epoch": 0.7561133080461625, + "grad_norm": 0.7173436880111694, + "learning_rate": 0.00011073017931960496, + "loss": 2.579, + "step": 9369 + }, + { + "epoch": 0.7561940117827456, + "grad_norm": 0.6401154398918152, + "learning_rate": 0.00011071448343392008, + "loss": 2.5189, + "step": 9370 + }, + { + "epoch": 0.7562747155193286, + "grad_norm": 0.6510714292526245, + "learning_rate": 0.00011069878728120224, + "loss": 2.5682, + "step": 9371 + }, + { + "epoch": 0.7563554192559115, + "grad_norm": 0.7189988493919373, + "learning_rate": 0.00011068309086184269, + "loss": 2.5247, + "step": 9372 + }, + { + "epoch": 0.7564361229924945, + "grad_norm": 0.678753137588501, + "learning_rate": 0.00011066739417623258, + "loss": 2.5083, + "step": 9373 + }, + { + "epoch": 0.7565168267290776, + "grad_norm": 0.6903115510940552, + "learning_rate": 0.0001106516972247631, + "loss": 2.5658, + "step": 9374 + }, + { + "epoch": 0.7565975304656606, + "grad_norm": 0.6772382855415344, + "learning_rate": 0.0001106360000078255, + "loss": 2.5445, + "step": 9375 + }, + { + "epoch": 0.7566782342022436, + "grad_norm": 0.6655055284500122, + "learning_rate": 0.00011062030252581097, + "loss": 2.5186, + "step": 9376 + }, + { + "epoch": 0.7567589379388265, + "grad_norm": 0.7173851728439331, + "learning_rate": 0.00011060460477911074, + "loss": 2.5297, + "step": 9377 + }, + { + "epoch": 0.7568396416754096, + "grad_norm": 0.6891282200813293, + "learning_rate": 0.00011058890676811606, + "loss": 2.5706, + "step": 9378 + }, + { + "epoch": 0.7569203454119926, + "grad_norm": 0.7053082585334778, + "learning_rate": 0.0001105732084932181, + "loss": 2.5475, + "step": 9379 + }, + { + "epoch": 0.7570010491485756, + "grad_norm": 0.7503373622894287, + "learning_rate": 0.00011055750995480818, + "loss": 2.6438, + "step": 9380 + }, + { + "epoch": 0.7570817528851586, + "grad_norm": 0.6703453660011292, + "learning_rate": 0.0001105418111532775, + "loss": 2.5485, + "step": 9381 + }, + { + "epoch": 0.7571624566217416, + "grad_norm": 0.6651757955551147, + "learning_rate": 0.00011052611208901733, + "loss": 2.6079, + "step": 9382 + }, + { + "epoch": 0.7572431603583246, + "grad_norm": 0.6738902926445007, + "learning_rate": 0.00011051041276241895, + "loss": 2.5279, + "step": 9383 + }, + { + "epoch": 0.7573238640949076, + "grad_norm": 0.6803816556930542, + "learning_rate": 0.00011049471317387357, + "loss": 2.5972, + "step": 9384 + }, + { + "epoch": 0.7574045678314906, + "grad_norm": 0.7127584218978882, + "learning_rate": 0.00011047901332377253, + "loss": 2.5275, + "step": 9385 + }, + { + "epoch": 0.7574852715680735, + "grad_norm": 0.7655676007270813, + "learning_rate": 0.00011046331321250711, + "loss": 2.6491, + "step": 9386 + }, + { + "epoch": 0.7575659753046566, + "grad_norm": 0.7005762457847595, + "learning_rate": 0.00011044761284046854, + "loss": 2.5266, + "step": 9387 + }, + { + "epoch": 0.7576466790412396, + "grad_norm": 0.701931357383728, + "learning_rate": 0.00011043191220804817, + "loss": 2.5556, + "step": 9388 + }, + { + "epoch": 0.7577273827778226, + "grad_norm": 0.6888757944107056, + "learning_rate": 0.00011041621131563724, + "loss": 2.5654, + "step": 9389 + }, + { + "epoch": 0.7578080865144056, + "grad_norm": 0.7119149565696716, + "learning_rate": 0.00011040051016362711, + "loss": 2.5925, + "step": 9390 + }, + { + "epoch": 0.7578887902509887, + "grad_norm": 0.7378301024436951, + "learning_rate": 0.00011038480875240911, + "loss": 2.5604, + "step": 9391 + }, + { + "epoch": 0.7579694939875716, + "grad_norm": 0.7221272587776184, + "learning_rate": 0.00011036910708237449, + "loss": 2.5293, + "step": 9392 + }, + { + "epoch": 0.7580501977241546, + "grad_norm": 0.6895891427993774, + "learning_rate": 0.00011035340515391465, + "loss": 2.5177, + "step": 9393 + }, + { + "epoch": 0.7581309014607376, + "grad_norm": 0.6812298893928528, + "learning_rate": 0.00011033770296742086, + "loss": 2.6345, + "step": 9394 + }, + { + "epoch": 0.7582116051973207, + "grad_norm": 0.6733750700950623, + "learning_rate": 0.00011032200052328449, + "loss": 2.5548, + "step": 9395 + }, + { + "epoch": 0.7582923089339036, + "grad_norm": 0.7667728066444397, + "learning_rate": 0.00011030629782189692, + "loss": 2.5858, + "step": 9396 + }, + { + "epoch": 0.7583730126704866, + "grad_norm": 0.6809018850326538, + "learning_rate": 0.00011029059486364946, + "loss": 2.6028, + "step": 9397 + }, + { + "epoch": 0.7584537164070696, + "grad_norm": 0.6817305684089661, + "learning_rate": 0.00011027489164893345, + "loss": 2.5594, + "step": 9398 + }, + { + "epoch": 0.7585344201436527, + "grad_norm": 0.6936343908309937, + "learning_rate": 0.00011025918817814027, + "loss": 2.4997, + "step": 9399 + }, + { + "epoch": 0.7586151238802357, + "grad_norm": 0.7046801447868347, + "learning_rate": 0.00011024348445166133, + "loss": 2.5199, + "step": 9400 + }, + { + "epoch": 0.7586958276168186, + "grad_norm": 0.7247316241264343, + "learning_rate": 0.00011022778046988798, + "loss": 2.5233, + "step": 9401 + }, + { + "epoch": 0.7587765313534016, + "grad_norm": 0.675652265548706, + "learning_rate": 0.00011021207623321162, + "loss": 2.5213, + "step": 9402 + }, + { + "epoch": 0.7588572350899847, + "grad_norm": 0.6866120100021362, + "learning_rate": 0.0001101963717420236, + "loss": 2.6026, + "step": 9403 + }, + { + "epoch": 0.7589379388265677, + "grad_norm": 0.7168806791305542, + "learning_rate": 0.00011018066699671534, + "loss": 2.5707, + "step": 9404 + }, + { + "epoch": 0.7590186425631507, + "grad_norm": 0.6858265995979309, + "learning_rate": 0.00011016496199767825, + "loss": 2.5313, + "step": 9405 + }, + { + "epoch": 0.7590993462997336, + "grad_norm": 0.7064315676689148, + "learning_rate": 0.00011014925674530375, + "loss": 2.5362, + "step": 9406 + }, + { + "epoch": 0.7591800500363167, + "grad_norm": 0.658385694026947, + "learning_rate": 0.00011013355123998324, + "loss": 2.5773, + "step": 9407 + }, + { + "epoch": 0.7592607537728997, + "grad_norm": 0.7112493515014648, + "learning_rate": 0.00011011784548210813, + "loss": 2.589, + "step": 9408 + }, + { + "epoch": 0.7593414575094827, + "grad_norm": 0.6835871934890747, + "learning_rate": 0.00011010213947206986, + "loss": 2.5952, + "step": 9409 + }, + { + "epoch": 0.7594221612460657, + "grad_norm": 0.6920506358146667, + "learning_rate": 0.00011008643321025989, + "loss": 2.5433, + "step": 9410 + }, + { + "epoch": 0.7595028649826487, + "grad_norm": 0.7239150404930115, + "learning_rate": 0.00011007072669706962, + "loss": 2.5291, + "step": 9411 + }, + { + "epoch": 0.7595835687192317, + "grad_norm": 0.644568145275116, + "learning_rate": 0.00011005501993289052, + "loss": 2.5324, + "step": 9412 + }, + { + "epoch": 0.7596642724558147, + "grad_norm": 0.6604863405227661, + "learning_rate": 0.00011003931291811405, + "loss": 2.561, + "step": 9413 + }, + { + "epoch": 0.7597449761923977, + "grad_norm": 0.7056753635406494, + "learning_rate": 0.00011002360565313164, + "loss": 2.6537, + "step": 9414 + }, + { + "epoch": 0.7598256799289808, + "grad_norm": 0.6712720394134521, + "learning_rate": 0.00011000789813833476, + "loss": 2.5222, + "step": 9415 + }, + { + "epoch": 0.7599063836655637, + "grad_norm": 0.6829253435134888, + "learning_rate": 0.00010999219037411492, + "loss": 2.5156, + "step": 9416 + }, + { + "epoch": 0.7599870874021467, + "grad_norm": 0.7386518120765686, + "learning_rate": 0.00010997648236086359, + "loss": 2.5378, + "step": 9417 + }, + { + "epoch": 0.7600677911387297, + "grad_norm": 0.6711105108261108, + "learning_rate": 0.00010996077409897223, + "loss": 2.4985, + "step": 9418 + }, + { + "epoch": 0.7601484948753128, + "grad_norm": 0.6936883926391602, + "learning_rate": 0.00010994506558883233, + "loss": 2.4912, + "step": 9419 + }, + { + "epoch": 0.7602291986118958, + "grad_norm": 0.6927978992462158, + "learning_rate": 0.00010992935683083541, + "loss": 2.5526, + "step": 9420 + }, + { + "epoch": 0.7603099023484787, + "grad_norm": 0.7661495804786682, + "learning_rate": 0.00010991364782537297, + "loss": 2.5778, + "step": 9421 + }, + { + "epoch": 0.7603906060850617, + "grad_norm": 0.7092108726501465, + "learning_rate": 0.0001098979385728365, + "loss": 2.6557, + "step": 9422 + }, + { + "epoch": 0.7604713098216448, + "grad_norm": 0.696666419506073, + "learning_rate": 0.00010988222907361754, + "loss": 2.4897, + "step": 9423 + }, + { + "epoch": 0.7605520135582278, + "grad_norm": 0.6836280822753906, + "learning_rate": 0.00010986651932810756, + "loss": 2.5146, + "step": 9424 + }, + { + "epoch": 0.7606327172948107, + "grad_norm": 0.7269579768180847, + "learning_rate": 0.00010985080933669815, + "loss": 2.5314, + "step": 9425 + }, + { + "epoch": 0.7607134210313937, + "grad_norm": 0.6862092018127441, + "learning_rate": 0.00010983509909978085, + "loss": 2.5415, + "step": 9426 + }, + { + "epoch": 0.7607941247679768, + "grad_norm": 0.7068747878074646, + "learning_rate": 0.00010981938861774713, + "loss": 2.5919, + "step": 9427 + }, + { + "epoch": 0.7608748285045598, + "grad_norm": 0.699999213218689, + "learning_rate": 0.0001098036778909886, + "loss": 2.5175, + "step": 9428 + }, + { + "epoch": 0.7609555322411428, + "grad_norm": 0.6642772555351257, + "learning_rate": 0.0001097879669198968, + "loss": 2.5721, + "step": 9429 + }, + { + "epoch": 0.7610362359777257, + "grad_norm": 0.7100533843040466, + "learning_rate": 0.00010977225570486323, + "loss": 2.5189, + "step": 9430 + }, + { + "epoch": 0.7611169397143088, + "grad_norm": 0.7289063930511475, + "learning_rate": 0.00010975654424627955, + "loss": 2.6139, + "step": 9431 + }, + { + "epoch": 0.7611976434508918, + "grad_norm": 0.7289659380912781, + "learning_rate": 0.00010974083254453726, + "loss": 2.5201, + "step": 9432 + }, + { + "epoch": 0.7612783471874748, + "grad_norm": 0.7389557957649231, + "learning_rate": 0.000109725120600028, + "loss": 2.559, + "step": 9433 + }, + { + "epoch": 0.7613590509240578, + "grad_norm": 0.7021538615226746, + "learning_rate": 0.00010970940841314327, + "loss": 2.6353, + "step": 9434 + }, + { + "epoch": 0.7614397546606407, + "grad_norm": 0.6614113450050354, + "learning_rate": 0.0001096936959842747, + "loss": 2.54, + "step": 9435 + }, + { + "epoch": 0.7615204583972238, + "grad_norm": 0.6905426979064941, + "learning_rate": 0.00010967798331381392, + "loss": 2.5845, + "step": 9436 + }, + { + "epoch": 0.7616011621338068, + "grad_norm": 0.8183904886245728, + "learning_rate": 0.00010966227040215247, + "loss": 2.5255, + "step": 9437 + }, + { + "epoch": 0.7616818658703898, + "grad_norm": 0.7404630780220032, + "learning_rate": 0.00010964655724968199, + "loss": 2.5726, + "step": 9438 + }, + { + "epoch": 0.7617625696069728, + "grad_norm": 0.657127320766449, + "learning_rate": 0.0001096308438567941, + "loss": 2.6233, + "step": 9439 + }, + { + "epoch": 0.7618432733435558, + "grad_norm": 0.7417906522750854, + "learning_rate": 0.00010961513022388039, + "loss": 2.6361, + "step": 9440 + }, + { + "epoch": 0.7619239770801388, + "grad_norm": 0.6930029988288879, + "learning_rate": 0.00010959941635133249, + "loss": 2.5164, + "step": 9441 + }, + { + "epoch": 0.7620046808167218, + "grad_norm": 0.6897261738777161, + "learning_rate": 0.00010958370223954207, + "loss": 2.5626, + "step": 9442 + }, + { + "epoch": 0.7620853845533048, + "grad_norm": 0.6737398505210876, + "learning_rate": 0.00010956798788890072, + "loss": 2.5342, + "step": 9443 + }, + { + "epoch": 0.7621660882898879, + "grad_norm": 0.6550001502037048, + "learning_rate": 0.0001095522732998001, + "loss": 2.5604, + "step": 9444 + }, + { + "epoch": 0.7622467920264708, + "grad_norm": 0.7184637784957886, + "learning_rate": 0.00010953655847263187, + "loss": 2.6006, + "step": 9445 + }, + { + "epoch": 0.7623274957630538, + "grad_norm": 0.6188609600067139, + "learning_rate": 0.00010952084340778766, + "loss": 2.4875, + "step": 9446 + }, + { + "epoch": 0.7624081994996368, + "grad_norm": 0.6550862789154053, + "learning_rate": 0.00010950512810565917, + "loss": 2.5794, + "step": 9447 + }, + { + "epoch": 0.7624889032362199, + "grad_norm": 0.6659231781959534, + "learning_rate": 0.000109489412566638, + "loss": 2.5137, + "step": 9448 + }, + { + "epoch": 0.7625696069728028, + "grad_norm": 0.749376118183136, + "learning_rate": 0.00010947369679111592, + "loss": 2.5923, + "step": 9449 + }, + { + "epoch": 0.7626503107093858, + "grad_norm": 0.6597894430160522, + "learning_rate": 0.0001094579807794845, + "loss": 2.5677, + "step": 9450 + }, + { + "epoch": 0.7627310144459688, + "grad_norm": 0.7194519639015198, + "learning_rate": 0.00010944226453213548, + "loss": 2.5754, + "step": 9451 + }, + { + "epoch": 0.7628117181825519, + "grad_norm": 0.6734583377838135, + "learning_rate": 0.00010942654804946057, + "loss": 2.535, + "step": 9452 + }, + { + "epoch": 0.7628924219191349, + "grad_norm": 0.7171904444694519, + "learning_rate": 0.00010941083133185146, + "loss": 2.5431, + "step": 9453 + }, + { + "epoch": 0.7629731256557178, + "grad_norm": 0.6760339736938477, + "learning_rate": 0.00010939511437969978, + "loss": 2.5163, + "step": 9454 + }, + { + "epoch": 0.7630538293923008, + "grad_norm": 0.6720966696739197, + "learning_rate": 0.00010937939719339731, + "loss": 2.5621, + "step": 9455 + }, + { + "epoch": 0.7631345331288839, + "grad_norm": 0.6374503970146179, + "learning_rate": 0.00010936367977333574, + "loss": 2.5007, + "step": 9456 + }, + { + "epoch": 0.7632152368654669, + "grad_norm": 0.6407146453857422, + "learning_rate": 0.00010934796211990684, + "loss": 2.5724, + "step": 9457 + }, + { + "epoch": 0.7632959406020499, + "grad_norm": 0.6685383319854736, + "learning_rate": 0.00010933224423350225, + "loss": 2.501, + "step": 9458 + }, + { + "epoch": 0.7633766443386328, + "grad_norm": 0.664806604385376, + "learning_rate": 0.00010931652611451373, + "loss": 2.6174, + "step": 9459 + }, + { + "epoch": 0.7634573480752159, + "grad_norm": 0.6383369565010071, + "learning_rate": 0.00010930080776333303, + "loss": 2.557, + "step": 9460 + }, + { + "epoch": 0.7635380518117989, + "grad_norm": 0.6747864484786987, + "learning_rate": 0.0001092850891803519, + "loss": 2.5406, + "step": 9461 + }, + { + "epoch": 0.7636187555483819, + "grad_norm": 0.7312811613082886, + "learning_rate": 0.00010926937036596205, + "loss": 2.5903, + "step": 9462 + }, + { + "epoch": 0.7636994592849649, + "grad_norm": 0.645847737789154, + "learning_rate": 0.00010925365132055529, + "loss": 2.5254, + "step": 9463 + }, + { + "epoch": 0.7637801630215479, + "grad_norm": 0.6466063857078552, + "learning_rate": 0.00010923793204452335, + "loss": 2.5322, + "step": 9464 + }, + { + "epoch": 0.7638608667581309, + "grad_norm": 0.6450574994087219, + "learning_rate": 0.000109222212538258, + "loss": 2.522, + "step": 9465 + }, + { + "epoch": 0.7639415704947139, + "grad_norm": 0.6491848826408386, + "learning_rate": 0.00010920649280215096, + "loss": 2.5545, + "step": 9466 + }, + { + "epoch": 0.7640222742312969, + "grad_norm": 0.6888336539268494, + "learning_rate": 0.0001091907728365941, + "loss": 2.5217, + "step": 9467 + }, + { + "epoch": 0.76410297796788, + "grad_norm": 0.702557384967804, + "learning_rate": 0.00010917505264197914, + "loss": 2.5351, + "step": 9468 + }, + { + "epoch": 0.7641836817044629, + "grad_norm": 0.6552408933639526, + "learning_rate": 0.0001091593322186979, + "loss": 2.5115, + "step": 9469 + }, + { + "epoch": 0.7642643854410459, + "grad_norm": 0.7514002919197083, + "learning_rate": 0.00010914361156714212, + "loss": 2.5196, + "step": 9470 + }, + { + "epoch": 0.7643450891776289, + "grad_norm": 0.6692500710487366, + "learning_rate": 0.00010912789068770366, + "loss": 2.5639, + "step": 9471 + }, + { + "epoch": 0.764425792914212, + "grad_norm": 0.6567397117614746, + "learning_rate": 0.0001091121695807743, + "loss": 2.5027, + "step": 9472 + }, + { + "epoch": 0.764506496650795, + "grad_norm": 0.6876057982444763, + "learning_rate": 0.00010909644824674587, + "loss": 2.519, + "step": 9473 + }, + { + "epoch": 0.7645872003873779, + "grad_norm": 0.747949481010437, + "learning_rate": 0.00010908072668601017, + "loss": 2.5604, + "step": 9474 + }, + { + "epoch": 0.7646679041239609, + "grad_norm": 0.6371368169784546, + "learning_rate": 0.000109065004898959, + "loss": 2.5853, + "step": 9475 + }, + { + "epoch": 0.764748607860544, + "grad_norm": 0.6472185254096985, + "learning_rate": 0.00010904928288598422, + "loss": 2.5662, + "step": 9476 + }, + { + "epoch": 0.764829311597127, + "grad_norm": 0.7009313702583313, + "learning_rate": 0.00010903356064747765, + "loss": 2.5244, + "step": 9477 + }, + { + "epoch": 0.76491001533371, + "grad_norm": 0.7405661940574646, + "learning_rate": 0.00010901783818383116, + "loss": 2.4963, + "step": 9478 + }, + { + "epoch": 0.7649907190702929, + "grad_norm": 0.7693421840667725, + "learning_rate": 0.00010900211549543658, + "loss": 2.6018, + "step": 9479 + }, + { + "epoch": 0.765071422806876, + "grad_norm": 0.6965410709381104, + "learning_rate": 0.00010898639258268571, + "loss": 2.627, + "step": 9480 + }, + { + "epoch": 0.765152126543459, + "grad_norm": 0.7167130708694458, + "learning_rate": 0.00010897066944597046, + "loss": 2.5298, + "step": 9481 + }, + { + "epoch": 0.765232830280042, + "grad_norm": 0.7159689664840698, + "learning_rate": 0.00010895494608568268, + "loss": 2.5179, + "step": 9482 + }, + { + "epoch": 0.7653135340166249, + "grad_norm": 0.7329332232475281, + "learning_rate": 0.00010893922250221423, + "loss": 2.6498, + "step": 9483 + }, + { + "epoch": 0.765394237753208, + "grad_norm": 0.6912567019462585, + "learning_rate": 0.000108923498695957, + "loss": 2.5679, + "step": 9484 + }, + { + "epoch": 0.765474941489791, + "grad_norm": 0.7030324935913086, + "learning_rate": 0.00010890777466730285, + "loss": 2.5678, + "step": 9485 + }, + { + "epoch": 0.765555645226374, + "grad_norm": 0.7238864898681641, + "learning_rate": 0.00010889205041664365, + "loss": 2.5525, + "step": 9486 + }, + { + "epoch": 0.765636348962957, + "grad_norm": 0.6623672842979431, + "learning_rate": 0.00010887632594437134, + "loss": 2.4857, + "step": 9487 + }, + { + "epoch": 0.7657170526995399, + "grad_norm": 0.726645827293396, + "learning_rate": 0.00010886060125087776, + "loss": 2.5405, + "step": 9488 + }, + { + "epoch": 0.765797756436123, + "grad_norm": 0.6624459624290466, + "learning_rate": 0.00010884487633655487, + "loss": 2.5538, + "step": 9489 + }, + { + "epoch": 0.765878460172706, + "grad_norm": 0.7198002934455872, + "learning_rate": 0.00010882915120179453, + "loss": 2.5808, + "step": 9490 + }, + { + "epoch": 0.765959163909289, + "grad_norm": 0.7545582056045532, + "learning_rate": 0.00010881342584698862, + "loss": 2.6059, + "step": 9491 + }, + { + "epoch": 0.766039867645872, + "grad_norm": 0.6748257279396057, + "learning_rate": 0.00010879770027252915, + "loss": 2.5203, + "step": 9492 + }, + { + "epoch": 0.766120571382455, + "grad_norm": 0.7376208901405334, + "learning_rate": 0.00010878197447880796, + "loss": 2.5255, + "step": 9493 + }, + { + "epoch": 0.766201275119038, + "grad_norm": 0.7589401006698608, + "learning_rate": 0.00010876624846621704, + "loss": 2.6304, + "step": 9494 + }, + { + "epoch": 0.766281978855621, + "grad_norm": 0.6963146924972534, + "learning_rate": 0.00010875052223514827, + "loss": 2.5547, + "step": 9495 + }, + { + "epoch": 0.766362682592204, + "grad_norm": 0.6660788059234619, + "learning_rate": 0.00010873479578599361, + "loss": 2.5922, + "step": 9496 + }, + { + "epoch": 0.7664433863287871, + "grad_norm": 0.7506482005119324, + "learning_rate": 0.00010871906911914502, + "loss": 2.5383, + "step": 9497 + }, + { + "epoch": 0.76652409006537, + "grad_norm": 0.7514285445213318, + "learning_rate": 0.00010870334223499443, + "loss": 2.5551, + "step": 9498 + }, + { + "epoch": 0.766604793801953, + "grad_norm": 0.6461809873580933, + "learning_rate": 0.00010868761513393379, + "loss": 2.5367, + "step": 9499 + }, + { + "epoch": 0.766685497538536, + "grad_norm": 0.6328238844871521, + "learning_rate": 0.00010867188781635512, + "loss": 2.5505, + "step": 9500 + }, + { + "epoch": 0.7667662012751191, + "grad_norm": 0.7090224027633667, + "learning_rate": 0.00010865616028265027, + "loss": 2.5921, + "step": 9501 + }, + { + "epoch": 0.766846905011702, + "grad_norm": 0.6404605507850647, + "learning_rate": 0.0001086404325332113, + "loss": 2.5357, + "step": 9502 + }, + { + "epoch": 0.766927608748285, + "grad_norm": 0.652477502822876, + "learning_rate": 0.00010862470456843016, + "loss": 2.5277, + "step": 9503 + }, + { + "epoch": 0.767008312484868, + "grad_norm": 0.7045448422431946, + "learning_rate": 0.00010860897638869887, + "loss": 2.5712, + "step": 9504 + }, + { + "epoch": 0.7670890162214511, + "grad_norm": 0.7024295926094055, + "learning_rate": 0.00010859324799440936, + "loss": 2.5976, + "step": 9505 + }, + { + "epoch": 0.7671697199580341, + "grad_norm": 0.7165585160255432, + "learning_rate": 0.00010857751938595364, + "loss": 2.5378, + "step": 9506 + }, + { + "epoch": 0.767250423694617, + "grad_norm": 0.7037522196769714, + "learning_rate": 0.0001085617905637237, + "loss": 2.554, + "step": 9507 + }, + { + "epoch": 0.7673311274312, + "grad_norm": 0.738210916519165, + "learning_rate": 0.00010854606152811163, + "loss": 2.5102, + "step": 9508 + }, + { + "epoch": 0.7674118311677831, + "grad_norm": 0.7500020861625671, + "learning_rate": 0.0001085303322795093, + "loss": 2.5908, + "step": 9509 + }, + { + "epoch": 0.7674925349043661, + "grad_norm": 0.7669610977172852, + "learning_rate": 0.00010851460281830883, + "loss": 2.5119, + "step": 9510 + }, + { + "epoch": 0.7675732386409491, + "grad_norm": 0.6619212031364441, + "learning_rate": 0.00010849887314490217, + "loss": 2.5622, + "step": 9511 + }, + { + "epoch": 0.767653942377532, + "grad_norm": 0.7142546772956848, + "learning_rate": 0.00010848314325968136, + "loss": 2.596, + "step": 9512 + }, + { + "epoch": 0.7677346461141151, + "grad_norm": 0.7365403175354004, + "learning_rate": 0.0001084674131630385, + "loss": 2.5695, + "step": 9513 + }, + { + "epoch": 0.7678153498506981, + "grad_norm": 0.7843711972236633, + "learning_rate": 0.00010845168285536555, + "loss": 2.5707, + "step": 9514 + }, + { + "epoch": 0.7678960535872811, + "grad_norm": 0.6391385197639465, + "learning_rate": 0.00010843595233705454, + "loss": 2.5523, + "step": 9515 + }, + { + "epoch": 0.7679767573238641, + "grad_norm": 0.6955631971359253, + "learning_rate": 0.00010842022160849758, + "loss": 2.5072, + "step": 9516 + }, + { + "epoch": 0.7680574610604471, + "grad_norm": 0.7291388511657715, + "learning_rate": 0.00010840449067008665, + "loss": 2.5786, + "step": 9517 + }, + { + "epoch": 0.7681381647970301, + "grad_norm": 0.7988889813423157, + "learning_rate": 0.00010838875952221387, + "loss": 2.5622, + "step": 9518 + }, + { + "epoch": 0.7682188685336131, + "grad_norm": 0.726271390914917, + "learning_rate": 0.00010837302816527129, + "loss": 2.5479, + "step": 9519 + }, + { + "epoch": 0.7682995722701961, + "grad_norm": 0.7305205464363098, + "learning_rate": 0.00010835729659965095, + "loss": 2.5946, + "step": 9520 + }, + { + "epoch": 0.7683802760067792, + "grad_norm": 0.7843366265296936, + "learning_rate": 0.00010834156482574493, + "loss": 2.5212, + "step": 9521 + }, + { + "epoch": 0.7684609797433621, + "grad_norm": 0.6988845467567444, + "learning_rate": 0.00010832583284394529, + "loss": 2.5174, + "step": 9522 + }, + { + "epoch": 0.7685416834799451, + "grad_norm": 0.7088077068328857, + "learning_rate": 0.00010831010065464414, + "loss": 2.5253, + "step": 9523 + }, + { + "epoch": 0.7686223872165281, + "grad_norm": 0.7447031140327454, + "learning_rate": 0.00010829436825823358, + "loss": 2.6045, + "step": 9524 + }, + { + "epoch": 0.7687030909531112, + "grad_norm": 0.6865237951278687, + "learning_rate": 0.00010827863565510566, + "loss": 2.558, + "step": 9525 + }, + { + "epoch": 0.7687837946896942, + "grad_norm": 0.7748900651931763, + "learning_rate": 0.0001082629028456525, + "loss": 2.5694, + "step": 9526 + }, + { + "epoch": 0.7688644984262771, + "grad_norm": 0.7031759023666382, + "learning_rate": 0.00010824716983026622, + "loss": 2.5171, + "step": 9527 + }, + { + "epoch": 0.7689452021628601, + "grad_norm": 0.7627702355384827, + "learning_rate": 0.00010823143660933888, + "loss": 2.5715, + "step": 9528 + }, + { + "epoch": 0.7690259058994432, + "grad_norm": 0.707815945148468, + "learning_rate": 0.00010821570318326264, + "loss": 2.5281, + "step": 9529 + }, + { + "epoch": 0.7691066096360262, + "grad_norm": 0.6833841800689697, + "learning_rate": 0.00010819996955242962, + "loss": 2.5702, + "step": 9530 + }, + { + "epoch": 0.7691873133726091, + "grad_norm": 0.7029415369033813, + "learning_rate": 0.00010818423571723189, + "loss": 2.5331, + "step": 9531 + }, + { + "epoch": 0.7692680171091921, + "grad_norm": 0.6442921161651611, + "learning_rate": 0.00010816850167806161, + "loss": 2.5423, + "step": 9532 + }, + { + "epoch": 0.7693487208457752, + "grad_norm": 0.7259004712104797, + "learning_rate": 0.00010815276743531093, + "loss": 2.6014, + "step": 9533 + }, + { + "epoch": 0.7694294245823582, + "grad_norm": 0.6483473777770996, + "learning_rate": 0.00010813703298937199, + "loss": 2.5268, + "step": 9534 + }, + { + "epoch": 0.7695101283189412, + "grad_norm": 0.6805520057678223, + "learning_rate": 0.00010812129834063691, + "loss": 2.5536, + "step": 9535 + }, + { + "epoch": 0.7695908320555241, + "grad_norm": 0.7120587825775146, + "learning_rate": 0.00010810556348949783, + "loss": 2.518, + "step": 9536 + }, + { + "epoch": 0.7696715357921071, + "grad_norm": 0.7280872464179993, + "learning_rate": 0.00010808982843634692, + "loss": 2.5525, + "step": 9537 + }, + { + "epoch": 0.7697522395286902, + "grad_norm": 0.68332439661026, + "learning_rate": 0.00010807409318157636, + "loss": 2.6318, + "step": 9538 + }, + { + "epoch": 0.7698329432652732, + "grad_norm": 0.655352771282196, + "learning_rate": 0.00010805835772557826, + "loss": 2.5781, + "step": 9539 + }, + { + "epoch": 0.7699136470018562, + "grad_norm": 0.7675400972366333, + "learning_rate": 0.00010804262206874484, + "loss": 2.5542, + "step": 9540 + }, + { + "epoch": 0.7699943507384391, + "grad_norm": 0.6676837205886841, + "learning_rate": 0.00010802688621146826, + "loss": 2.5411, + "step": 9541 + }, + { + "epoch": 0.7700750544750222, + "grad_norm": 0.7378436326980591, + "learning_rate": 0.00010801115015414067, + "loss": 2.5416, + "step": 9542 + }, + { + "epoch": 0.7701557582116052, + "grad_norm": 0.7330371141433716, + "learning_rate": 0.0001079954138971543, + "loss": 2.5154, + "step": 9543 + }, + { + "epoch": 0.7702364619481882, + "grad_norm": 0.6792974472045898, + "learning_rate": 0.00010797967744090131, + "loss": 2.5328, + "step": 9544 + }, + { + "epoch": 0.7703171656847712, + "grad_norm": 0.7129618525505066, + "learning_rate": 0.00010796394078577392, + "loss": 2.5688, + "step": 9545 + }, + { + "epoch": 0.7703978694213542, + "grad_norm": 0.6900608539581299, + "learning_rate": 0.00010794820393216429, + "loss": 2.5659, + "step": 9546 + }, + { + "epoch": 0.7704785731579372, + "grad_norm": 0.6798564195632935, + "learning_rate": 0.00010793246688046464, + "loss": 2.5746, + "step": 9547 + }, + { + "epoch": 0.7705592768945202, + "grad_norm": 0.7132395505905151, + "learning_rate": 0.00010791672963106715, + "loss": 2.6277, + "step": 9548 + }, + { + "epoch": 0.7706399806311032, + "grad_norm": 0.6762476563453674, + "learning_rate": 0.0001079009921843641, + "loss": 2.5265, + "step": 9549 + }, + { + "epoch": 0.7707206843676863, + "grad_norm": 0.7223351001739502, + "learning_rate": 0.00010788525454074765, + "loss": 2.6255, + "step": 9550 + }, + { + "epoch": 0.7708013881042692, + "grad_norm": 0.7383624315261841, + "learning_rate": 0.00010786951670061008, + "loss": 2.5744, + "step": 9551 + }, + { + "epoch": 0.7708820918408522, + "grad_norm": 0.6677328944206238, + "learning_rate": 0.00010785377866434355, + "loss": 2.5594, + "step": 9552 + }, + { + "epoch": 0.7709627955774352, + "grad_norm": 0.6572195887565613, + "learning_rate": 0.00010783804043234032, + "loss": 2.5582, + "step": 9553 + }, + { + "epoch": 0.7710434993140183, + "grad_norm": 0.6837800741195679, + "learning_rate": 0.00010782230200499265, + "loss": 2.5311, + "step": 9554 + }, + { + "epoch": 0.7711242030506013, + "grad_norm": 0.7232153415679932, + "learning_rate": 0.00010780656338269277, + "loss": 2.5074, + "step": 9555 + }, + { + "epoch": 0.7712049067871842, + "grad_norm": 0.6722296476364136, + "learning_rate": 0.00010779082456583291, + "loss": 2.551, + "step": 9556 + }, + { + "epoch": 0.7712856105237672, + "grad_norm": 0.6461100578308105, + "learning_rate": 0.00010777508555480535, + "loss": 2.5723, + "step": 9557 + }, + { + "epoch": 0.7713663142603503, + "grad_norm": 0.6573290824890137, + "learning_rate": 0.0001077593463500023, + "loss": 2.4967, + "step": 9558 + }, + { + "epoch": 0.7714470179969333, + "grad_norm": 0.7184738516807556, + "learning_rate": 0.0001077436069518161, + "loss": 2.6703, + "step": 9559 + }, + { + "epoch": 0.7715277217335162, + "grad_norm": 0.7226557731628418, + "learning_rate": 0.00010772786736063895, + "loss": 2.6118, + "step": 9560 + }, + { + "epoch": 0.7716084254700992, + "grad_norm": 0.6800956130027771, + "learning_rate": 0.00010771212757686318, + "loss": 2.578, + "step": 9561 + }, + { + "epoch": 0.7716891292066823, + "grad_norm": 0.6657535433769226, + "learning_rate": 0.00010769638760088099, + "loss": 2.5291, + "step": 9562 + }, + { + "epoch": 0.7717698329432653, + "grad_norm": 0.620527982711792, + "learning_rate": 0.00010768064743308471, + "loss": 2.5518, + "step": 9563 + }, + { + "epoch": 0.7718505366798483, + "grad_norm": 0.693760097026825, + "learning_rate": 0.00010766490707386663, + "loss": 2.52, + "step": 9564 + }, + { + "epoch": 0.7719312404164312, + "grad_norm": 0.6674148440361023, + "learning_rate": 0.000107649166523619, + "loss": 2.5197, + "step": 9565 + }, + { + "epoch": 0.7720119441530143, + "grad_norm": 0.6844033598899841, + "learning_rate": 0.00010763342578273419, + "loss": 2.5842, + "step": 9566 + }, + { + "epoch": 0.7720926478895973, + "grad_norm": 0.6891880035400391, + "learning_rate": 0.00010761768485160442, + "loss": 2.5349, + "step": 9567 + }, + { + "epoch": 0.7721733516261803, + "grad_norm": 0.7157394289970398, + "learning_rate": 0.00010760194373062204, + "loss": 2.5762, + "step": 9568 + }, + { + "epoch": 0.7722540553627633, + "grad_norm": 0.7522526383399963, + "learning_rate": 0.00010758620242017936, + "loss": 2.5348, + "step": 9569 + }, + { + "epoch": 0.7723347590993463, + "grad_norm": 0.6817746162414551, + "learning_rate": 0.00010757046092066869, + "loss": 2.5836, + "step": 9570 + }, + { + "epoch": 0.7724154628359293, + "grad_norm": 0.7274518013000488, + "learning_rate": 0.00010755471923248232, + "loss": 2.5276, + "step": 9571 + }, + { + "epoch": 0.7724961665725123, + "grad_norm": 0.6735557913780212, + "learning_rate": 0.00010753897735601264, + "loss": 2.6116, + "step": 9572 + }, + { + "epoch": 0.7725768703090953, + "grad_norm": 0.6626406908035278, + "learning_rate": 0.00010752323529165186, + "loss": 2.5778, + "step": 9573 + }, + { + "epoch": 0.7726575740456784, + "grad_norm": 0.6627367734909058, + "learning_rate": 0.00010750749303979246, + "loss": 2.5839, + "step": 9574 + }, + { + "epoch": 0.7727382777822613, + "grad_norm": 0.6658251881599426, + "learning_rate": 0.0001074917506008267, + "loss": 2.5233, + "step": 9575 + }, + { + "epoch": 0.7728189815188443, + "grad_norm": 0.6969848871231079, + "learning_rate": 0.00010747600797514692, + "loss": 2.5169, + "step": 9576 + }, + { + "epoch": 0.7728996852554273, + "grad_norm": 0.7313554883003235, + "learning_rate": 0.00010746026516314549, + "loss": 2.5528, + "step": 9577 + }, + { + "epoch": 0.7729803889920104, + "grad_norm": 0.6467077136039734, + "learning_rate": 0.00010744452216521472, + "loss": 2.5158, + "step": 9578 + }, + { + "epoch": 0.7730610927285934, + "grad_norm": 0.6808056235313416, + "learning_rate": 0.00010742877898174702, + "loss": 2.5346, + "step": 9579 + }, + { + "epoch": 0.7731417964651763, + "grad_norm": 0.7537400722503662, + "learning_rate": 0.00010741303561313474, + "loss": 2.5621, + "step": 9580 + }, + { + "epoch": 0.7732225002017593, + "grad_norm": 0.6715610027313232, + "learning_rate": 0.00010739729205977021, + "loss": 2.5384, + "step": 9581 + }, + { + "epoch": 0.7733032039383424, + "grad_norm": 0.7129234075546265, + "learning_rate": 0.00010738154832204586, + "loss": 2.5639, + "step": 9582 + }, + { + "epoch": 0.7733839076749254, + "grad_norm": 0.7156025171279907, + "learning_rate": 0.00010736580440035397, + "loss": 2.5427, + "step": 9583 + }, + { + "epoch": 0.7734646114115084, + "grad_norm": 0.7394191026687622, + "learning_rate": 0.00010735006029508703, + "loss": 2.5809, + "step": 9584 + }, + { + "epoch": 0.7735453151480913, + "grad_norm": 0.7117684483528137, + "learning_rate": 0.00010733431600663737, + "loss": 2.5807, + "step": 9585 + }, + { + "epoch": 0.7736260188846744, + "grad_norm": 0.6622862219810486, + "learning_rate": 0.00010731857153539737, + "loss": 2.5277, + "step": 9586 + }, + { + "epoch": 0.7737067226212574, + "grad_norm": 0.7744547128677368, + "learning_rate": 0.00010730282688175943, + "loss": 2.6119, + "step": 9587 + }, + { + "epoch": 0.7737874263578404, + "grad_norm": 0.6804926991462708, + "learning_rate": 0.00010728708204611597, + "loss": 2.534, + "step": 9588 + }, + { + "epoch": 0.7738681300944233, + "grad_norm": 0.7115367650985718, + "learning_rate": 0.00010727133702885937, + "loss": 2.542, + "step": 9589 + }, + { + "epoch": 0.7739488338310063, + "grad_norm": 0.7623847723007202, + "learning_rate": 0.00010725559183038205, + "loss": 2.587, + "step": 9590 + }, + { + "epoch": 0.7740295375675894, + "grad_norm": 0.6612982153892517, + "learning_rate": 0.00010723984645107641, + "loss": 2.5257, + "step": 9591 + }, + { + "epoch": 0.7741102413041724, + "grad_norm": 0.7553900480270386, + "learning_rate": 0.00010722410089133488, + "loss": 2.6311, + "step": 9592 + }, + { + "epoch": 0.7741909450407554, + "grad_norm": 0.7541414499282837, + "learning_rate": 0.00010720835515154983, + "loss": 2.5978, + "step": 9593 + }, + { + "epoch": 0.7742716487773383, + "grad_norm": 0.6690947413444519, + "learning_rate": 0.00010719260923211376, + "loss": 2.568, + "step": 9594 + }, + { + "epoch": 0.7743523525139214, + "grad_norm": 0.7282151579856873, + "learning_rate": 0.00010717686313341909, + "loss": 2.5375, + "step": 9595 + }, + { + "epoch": 0.7744330562505044, + "grad_norm": 0.6862902045249939, + "learning_rate": 0.00010716111685585821, + "loss": 2.5503, + "step": 9596 + }, + { + "epoch": 0.7745137599870874, + "grad_norm": 0.7076265811920166, + "learning_rate": 0.00010714537039982357, + "loss": 2.4766, + "step": 9597 + }, + { + "epoch": 0.7745944637236704, + "grad_norm": 0.7063891887664795, + "learning_rate": 0.00010712962376570761, + "loss": 2.5822, + "step": 9598 + }, + { + "epoch": 0.7746751674602534, + "grad_norm": 0.6975609064102173, + "learning_rate": 0.00010711387695390282, + "loss": 2.597, + "step": 9599 + }, + { + "epoch": 0.7747558711968364, + "grad_norm": 0.6790002584457397, + "learning_rate": 0.0001070981299648016, + "loss": 2.5705, + "step": 9600 + }, + { + "epoch": 0.7748365749334194, + "grad_norm": 0.6493679881095886, + "learning_rate": 0.00010708238279879643, + "loss": 2.49, + "step": 9601 + }, + { + "epoch": 0.7749172786700024, + "grad_norm": 0.6741142868995667, + "learning_rate": 0.00010706663545627977, + "loss": 2.6008, + "step": 9602 + }, + { + "epoch": 0.7749979824065855, + "grad_norm": 0.6753309965133667, + "learning_rate": 0.00010705088793764408, + "loss": 2.536, + "step": 9603 + }, + { + "epoch": 0.7750786861431684, + "grad_norm": 0.6879377365112305, + "learning_rate": 0.00010703514024328183, + "loss": 2.5884, + "step": 9604 + }, + { + "epoch": 0.7751593898797514, + "grad_norm": 0.6535949110984802, + "learning_rate": 0.00010701939237358549, + "loss": 2.5489, + "step": 9605 + }, + { + "epoch": 0.7752400936163344, + "grad_norm": 0.7308230400085449, + "learning_rate": 0.00010700364432894756, + "loss": 2.5679, + "step": 9606 + }, + { + "epoch": 0.7753207973529175, + "grad_norm": 0.7016584277153015, + "learning_rate": 0.00010698789610976052, + "loss": 2.5678, + "step": 9607 + }, + { + "epoch": 0.7754015010895005, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.00010697214771641682, + "loss": 2.5004, + "step": 9608 + }, + { + "epoch": 0.7754822048260834, + "grad_norm": 0.6414844989776611, + "learning_rate": 0.00010695639914930895, + "loss": 2.4896, + "step": 9609 + }, + { + "epoch": 0.7755629085626664, + "grad_norm": 0.7288017868995667, + "learning_rate": 0.00010694065040882943, + "loss": 2.5945, + "step": 9610 + }, + { + "epoch": 0.7756436122992495, + "grad_norm": 0.6808066368103027, + "learning_rate": 0.00010692490149537079, + "loss": 2.5973, + "step": 9611 + }, + { + "epoch": 0.7757243160358325, + "grad_norm": 0.7924454212188721, + "learning_rate": 0.00010690915240932553, + "loss": 2.5448, + "step": 9612 + }, + { + "epoch": 0.7758050197724154, + "grad_norm": 0.6466094851493835, + "learning_rate": 0.00010689340315108606, + "loss": 2.5065, + "step": 9613 + }, + { + "epoch": 0.7758857235089984, + "grad_norm": 0.6775460243225098, + "learning_rate": 0.00010687765372104502, + "loss": 2.5238, + "step": 9614 + }, + { + "epoch": 0.7759664272455815, + "grad_norm": 0.6901230812072754, + "learning_rate": 0.00010686190411959484, + "loss": 2.5109, + "step": 9615 + }, + { + "epoch": 0.7760471309821645, + "grad_norm": 0.7032039165496826, + "learning_rate": 0.00010684615434712808, + "loss": 2.6094, + "step": 9616 + }, + { + "epoch": 0.7761278347187475, + "grad_norm": 0.7008969187736511, + "learning_rate": 0.00010683040440403727, + "loss": 2.5758, + "step": 9617 + }, + { + "epoch": 0.7762085384553304, + "grad_norm": 0.6909677386283875, + "learning_rate": 0.00010681465429071491, + "loss": 2.5373, + "step": 9618 + }, + { + "epoch": 0.7762892421919135, + "grad_norm": 0.699030339717865, + "learning_rate": 0.00010679890400755355, + "loss": 2.577, + "step": 9619 + }, + { + "epoch": 0.7763699459284965, + "grad_norm": 0.7012344598770142, + "learning_rate": 0.00010678315355494575, + "loss": 2.5205, + "step": 9620 + }, + { + "epoch": 0.7764506496650795, + "grad_norm": 0.7693915367126465, + "learning_rate": 0.000106767402933284, + "loss": 2.5947, + "step": 9621 + }, + { + "epoch": 0.7765313534016625, + "grad_norm": 0.7635772228240967, + "learning_rate": 0.00010675165214296093, + "loss": 2.6221, + "step": 9622 + }, + { + "epoch": 0.7766120571382455, + "grad_norm": 0.701411783695221, + "learning_rate": 0.000106735901184369, + "loss": 2.5236, + "step": 9623 + }, + { + "epoch": 0.7766927608748285, + "grad_norm": 0.7283998727798462, + "learning_rate": 0.00010672015005790079, + "loss": 2.5581, + "step": 9624 + }, + { + "epoch": 0.7767734646114115, + "grad_norm": 0.7069897055625916, + "learning_rate": 0.0001067043987639489, + "loss": 2.5541, + "step": 9625 + }, + { + "epoch": 0.7768541683479945, + "grad_norm": 0.7419753074645996, + "learning_rate": 0.00010668864730290586, + "loss": 2.5992, + "step": 9626 + }, + { + "epoch": 0.7769348720845776, + "grad_norm": 0.6651501059532166, + "learning_rate": 0.00010667289567516426, + "loss": 2.546, + "step": 9627 + }, + { + "epoch": 0.7770155758211605, + "grad_norm": 0.7265670895576477, + "learning_rate": 0.00010665714388111665, + "loss": 2.611, + "step": 9628 + }, + { + "epoch": 0.7770962795577435, + "grad_norm": 0.6520028114318848, + "learning_rate": 0.00010664139192115559, + "loss": 2.5433, + "step": 9629 + }, + { + "epoch": 0.7771769832943265, + "grad_norm": 0.6990057826042175, + "learning_rate": 0.0001066256397956737, + "loss": 2.5325, + "step": 9630 + }, + { + "epoch": 0.7772576870309096, + "grad_norm": 0.7353312373161316, + "learning_rate": 0.00010660988750506355, + "loss": 2.4707, + "step": 9631 + }, + { + "epoch": 0.7773383907674926, + "grad_norm": 0.6810272932052612, + "learning_rate": 0.00010659413504971774, + "loss": 2.5618, + "step": 9632 + }, + { + "epoch": 0.7774190945040755, + "grad_norm": 0.6480081081390381, + "learning_rate": 0.00010657838243002883, + "loss": 2.4543, + "step": 9633 + }, + { + "epoch": 0.7774997982406585, + "grad_norm": 0.6617380976676941, + "learning_rate": 0.00010656262964638942, + "loss": 2.5628, + "step": 9634 + }, + { + "epoch": 0.7775805019772416, + "grad_norm": 0.6761382222175598, + "learning_rate": 0.00010654687669919212, + "loss": 2.5433, + "step": 9635 + }, + { + "epoch": 0.7776612057138246, + "grad_norm": 0.6733867526054382, + "learning_rate": 0.00010653112358882957, + "loss": 2.5282, + "step": 9636 + }, + { + "epoch": 0.7777419094504076, + "grad_norm": 0.6854631304740906, + "learning_rate": 0.00010651537031569433, + "loss": 2.5997, + "step": 9637 + }, + { + "epoch": 0.7778226131869905, + "grad_norm": 0.7451226115226746, + "learning_rate": 0.00010649961688017904, + "loss": 2.5058, + "step": 9638 + }, + { + "epoch": 0.7779033169235735, + "grad_norm": 0.6744229197502136, + "learning_rate": 0.0001064838632826763, + "loss": 2.5962, + "step": 9639 + }, + { + "epoch": 0.7779840206601566, + "grad_norm": 0.7568119764328003, + "learning_rate": 0.00010646810952357873, + "loss": 2.5896, + "step": 9640 + }, + { + "epoch": 0.7780647243967396, + "grad_norm": 0.6860085725784302, + "learning_rate": 0.00010645235560327899, + "loss": 2.5675, + "step": 9641 + }, + { + "epoch": 0.7781454281333225, + "grad_norm": 0.6491742134094238, + "learning_rate": 0.00010643660152216965, + "loss": 2.5374, + "step": 9642 + }, + { + "epoch": 0.7782261318699055, + "grad_norm": 0.6664023399353027, + "learning_rate": 0.0001064208472806434, + "loss": 2.4679, + "step": 9643 + }, + { + "epoch": 0.7783068356064886, + "grad_norm": 0.6595140099525452, + "learning_rate": 0.00010640509287909284, + "loss": 2.5045, + "step": 9644 + }, + { + "epoch": 0.7783875393430716, + "grad_norm": 0.6788576245307922, + "learning_rate": 0.0001063893383179106, + "loss": 2.5706, + "step": 9645 + }, + { + "epoch": 0.7784682430796546, + "grad_norm": 0.6741334199905396, + "learning_rate": 0.00010637358359748939, + "loss": 2.5763, + "step": 9646 + }, + { + "epoch": 0.7785489468162375, + "grad_norm": 0.6837517023086548, + "learning_rate": 0.0001063578287182218, + "loss": 2.5484, + "step": 9647 + }, + { + "epoch": 0.7786296505528206, + "grad_norm": 0.6604229211807251, + "learning_rate": 0.00010634207368050048, + "loss": 2.5465, + "step": 9648 + }, + { + "epoch": 0.7787103542894036, + "grad_norm": 0.6528951525688171, + "learning_rate": 0.00010632631848471813, + "loss": 2.5409, + "step": 9649 + }, + { + "epoch": 0.7787910580259866, + "grad_norm": 0.6615377068519592, + "learning_rate": 0.00010631056313126734, + "loss": 2.5545, + "step": 9650 + }, + { + "epoch": 0.7788717617625696, + "grad_norm": 0.666033923625946, + "learning_rate": 0.00010629480762054089, + "loss": 2.5341, + "step": 9651 + }, + { + "epoch": 0.7789524654991526, + "grad_norm": 0.7022622227668762, + "learning_rate": 0.00010627905195293135, + "loss": 2.5206, + "step": 9652 + }, + { + "epoch": 0.7790331692357356, + "grad_norm": 0.7175850868225098, + "learning_rate": 0.00010626329612883141, + "loss": 2.5912, + "step": 9653 + }, + { + "epoch": 0.7791138729723186, + "grad_norm": 0.6592069268226624, + "learning_rate": 0.00010624754014863379, + "loss": 2.5076, + "step": 9654 + }, + { + "epoch": 0.7791945767089016, + "grad_norm": 0.645893931388855, + "learning_rate": 0.0001062317840127311, + "loss": 2.5124, + "step": 9655 + }, + { + "epoch": 0.7792752804454847, + "grad_norm": 0.6638232469558716, + "learning_rate": 0.00010621602772151607, + "loss": 2.5182, + "step": 9656 + }, + { + "epoch": 0.7793559841820676, + "grad_norm": 0.6718387603759766, + "learning_rate": 0.0001062002712753814, + "loss": 2.4773, + "step": 9657 + }, + { + "epoch": 0.7794366879186506, + "grad_norm": 0.6402876377105713, + "learning_rate": 0.00010618451467471972, + "loss": 2.5557, + "step": 9658 + }, + { + "epoch": 0.7795173916552336, + "grad_norm": 0.6898398399353027, + "learning_rate": 0.00010616875791992382, + "loss": 2.5557, + "step": 9659 + }, + { + "epoch": 0.7795980953918167, + "grad_norm": 0.6718475222587585, + "learning_rate": 0.00010615300101138633, + "loss": 2.5335, + "step": 9660 + }, + { + "epoch": 0.7796787991283997, + "grad_norm": 0.6436911225318909, + "learning_rate": 0.00010613724394949995, + "loss": 2.5214, + "step": 9661 + }, + { + "epoch": 0.7797595028649826, + "grad_norm": 0.7554156184196472, + "learning_rate": 0.00010612148673465743, + "loss": 2.5526, + "step": 9662 + }, + { + "epoch": 0.7798402066015656, + "grad_norm": 0.6728504300117493, + "learning_rate": 0.00010610572936725147, + "loss": 2.5935, + "step": 9663 + }, + { + "epoch": 0.7799209103381487, + "grad_norm": 0.6793323159217834, + "learning_rate": 0.00010608997184767476, + "loss": 2.5515, + "step": 9664 + }, + { + "epoch": 0.7800016140747317, + "grad_norm": 0.7242898941040039, + "learning_rate": 0.00010607421417631999, + "loss": 2.5332, + "step": 9665 + }, + { + "epoch": 0.7800823178113147, + "grad_norm": 0.6719244718551636, + "learning_rate": 0.00010605845635357996, + "loss": 2.5191, + "step": 9666 + }, + { + "epoch": 0.7801630215478976, + "grad_norm": 0.6836631894111633, + "learning_rate": 0.00010604269837984737, + "loss": 2.6489, + "step": 9667 + }, + { + "epoch": 0.7802437252844807, + "grad_norm": 0.6833824515342712, + "learning_rate": 0.00010602694025551496, + "loss": 2.4906, + "step": 9668 + }, + { + "epoch": 0.7803244290210637, + "grad_norm": 0.7449159026145935, + "learning_rate": 0.0001060111819809754, + "loss": 2.5301, + "step": 9669 + }, + { + "epoch": 0.7804051327576467, + "grad_norm": 0.7149158120155334, + "learning_rate": 0.00010599542355662149, + "loss": 2.5097, + "step": 9670 + }, + { + "epoch": 0.7804858364942296, + "grad_norm": 0.6616973876953125, + "learning_rate": 0.00010597966498284595, + "loss": 2.5928, + "step": 9671 + }, + { + "epoch": 0.7805665402308127, + "grad_norm": 0.6556531190872192, + "learning_rate": 0.00010596390626004154, + "loss": 2.5543, + "step": 9672 + }, + { + "epoch": 0.7806472439673957, + "grad_norm": 0.6585283875465393, + "learning_rate": 0.000105948147388601, + "loss": 2.5244, + "step": 9673 + }, + { + "epoch": 0.7807279477039787, + "grad_norm": 0.6484133005142212, + "learning_rate": 0.00010593238836891704, + "loss": 2.4996, + "step": 9674 + }, + { + "epoch": 0.7808086514405617, + "grad_norm": 0.6681119799613953, + "learning_rate": 0.00010591662920138248, + "loss": 2.5322, + "step": 9675 + }, + { + "epoch": 0.7808893551771448, + "grad_norm": 0.709403395652771, + "learning_rate": 0.00010590086988639005, + "loss": 2.5554, + "step": 9676 + }, + { + "epoch": 0.7809700589137277, + "grad_norm": 0.6734669804573059, + "learning_rate": 0.00010588511042433251, + "loss": 2.5452, + "step": 9677 + }, + { + "epoch": 0.7810507626503107, + "grad_norm": 0.6800141930580139, + "learning_rate": 0.00010586935081560268, + "loss": 2.5154, + "step": 9678 + }, + { + "epoch": 0.7811314663868937, + "grad_norm": 0.7757244110107422, + "learning_rate": 0.00010585359106059326, + "loss": 2.5935, + "step": 9679 + }, + { + "epoch": 0.7812121701234768, + "grad_norm": 0.7288491725921631, + "learning_rate": 0.00010583783115969699, + "loss": 2.5276, + "step": 9680 + }, + { + "epoch": 0.7812928738600597, + "grad_norm": 0.6785164475440979, + "learning_rate": 0.00010582207111330678, + "loss": 2.5907, + "step": 9681 + }, + { + "epoch": 0.7813735775966427, + "grad_norm": 0.6651367545127869, + "learning_rate": 0.0001058063109218153, + "loss": 2.545, + "step": 9682 + }, + { + "epoch": 0.7814542813332257, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.0001057905505856154, + "loss": 2.5548, + "step": 9683 + }, + { + "epoch": 0.7815349850698088, + "grad_norm": 0.6486692428588867, + "learning_rate": 0.00010577479010509986, + "loss": 2.5589, + "step": 9684 + }, + { + "epoch": 0.7816156888063918, + "grad_norm": 0.700749397277832, + "learning_rate": 0.0001057590294806614, + "loss": 2.6008, + "step": 9685 + }, + { + "epoch": 0.7816963925429747, + "grad_norm": 0.647051215171814, + "learning_rate": 0.00010574326871269289, + "loss": 2.4894, + "step": 9686 + }, + { + "epoch": 0.7817770962795577, + "grad_norm": 0.6932066679000854, + "learning_rate": 0.00010572750780158713, + "loss": 2.5256, + "step": 9687 + }, + { + "epoch": 0.7818578000161408, + "grad_norm": 0.6330733895301819, + "learning_rate": 0.00010571174674773689, + "loss": 2.5242, + "step": 9688 + }, + { + "epoch": 0.7819385037527238, + "grad_norm": 0.6476379036903381, + "learning_rate": 0.00010569598555153499, + "loss": 2.552, + "step": 9689 + }, + { + "epoch": 0.7820192074893068, + "grad_norm": 0.661204993724823, + "learning_rate": 0.00010568022421337424, + "loss": 2.4869, + "step": 9690 + }, + { + "epoch": 0.7820999112258897, + "grad_norm": 0.6663263440132141, + "learning_rate": 0.00010566446273364746, + "loss": 2.5134, + "step": 9691 + }, + { + "epoch": 0.7821806149624727, + "grad_norm": 0.6982834339141846, + "learning_rate": 0.00010564870111274748, + "loss": 2.5755, + "step": 9692 + }, + { + "epoch": 0.7822613186990558, + "grad_norm": 0.6266167759895325, + "learning_rate": 0.00010563293935106706, + "loss": 2.5413, + "step": 9693 + }, + { + "epoch": 0.7823420224356388, + "grad_norm": 0.6484279632568359, + "learning_rate": 0.0001056171774489991, + "loss": 2.5579, + "step": 9694 + }, + { + "epoch": 0.7824227261722217, + "grad_norm": 0.674933910369873, + "learning_rate": 0.00010560141540693638, + "loss": 2.5364, + "step": 9695 + }, + { + "epoch": 0.7825034299088047, + "grad_norm": 0.7961840033531189, + "learning_rate": 0.00010558565322527174, + "loss": 2.5143, + "step": 9696 + }, + { + "epoch": 0.7825841336453878, + "grad_norm": 0.697158694267273, + "learning_rate": 0.00010556989090439804, + "loss": 2.5341, + "step": 9697 + }, + { + "epoch": 0.7826648373819708, + "grad_norm": 0.6912708282470703, + "learning_rate": 0.00010555412844470806, + "loss": 2.5331, + "step": 9698 + }, + { + "epoch": 0.7827455411185538, + "grad_norm": 0.7078350186347961, + "learning_rate": 0.00010553836584659474, + "loss": 2.5752, + "step": 9699 + }, + { + "epoch": 0.7828262448551367, + "grad_norm": 0.6421065926551819, + "learning_rate": 0.00010552260311045082, + "loss": 2.5393, + "step": 9700 + }, + { + "epoch": 0.7829069485917198, + "grad_norm": 0.644120454788208, + "learning_rate": 0.00010550684023666918, + "loss": 2.5062, + "step": 9701 + }, + { + "epoch": 0.7829876523283028, + "grad_norm": 0.7038589715957642, + "learning_rate": 0.00010549107722564275, + "loss": 2.6074, + "step": 9702 + }, + { + "epoch": 0.7830683560648858, + "grad_norm": 0.6692953109741211, + "learning_rate": 0.00010547531407776427, + "loss": 2.5801, + "step": 9703 + }, + { + "epoch": 0.7831490598014688, + "grad_norm": 0.7059200406074524, + "learning_rate": 0.00010545955079342669, + "loss": 2.5579, + "step": 9704 + }, + { + "epoch": 0.7832297635380518, + "grad_norm": 0.7126718759536743, + "learning_rate": 0.0001054437873730228, + "loss": 2.5764, + "step": 9705 + }, + { + "epoch": 0.7833104672746348, + "grad_norm": 0.696784257888794, + "learning_rate": 0.0001054280238169455, + "loss": 2.5256, + "step": 9706 + }, + { + "epoch": 0.7833911710112178, + "grad_norm": 0.7473082542419434, + "learning_rate": 0.00010541226012558767, + "loss": 2.5983, + "step": 9707 + }, + { + "epoch": 0.7834718747478008, + "grad_norm": 0.6598967909812927, + "learning_rate": 0.00010539649629934219, + "loss": 2.5267, + "step": 9708 + }, + { + "epoch": 0.7835525784843839, + "grad_norm": 0.7168934345245361, + "learning_rate": 0.00010538073233860188, + "loss": 2.5278, + "step": 9709 + }, + { + "epoch": 0.7836332822209668, + "grad_norm": 0.6848951578140259, + "learning_rate": 0.00010536496824375968, + "loss": 2.5267, + "step": 9710 + }, + { + "epoch": 0.7837139859575498, + "grad_norm": 0.7276272773742676, + "learning_rate": 0.0001053492040152084, + "loss": 2.5706, + "step": 9711 + }, + { + "epoch": 0.7837946896941328, + "grad_norm": 0.6929399371147156, + "learning_rate": 0.00010533343965334101, + "loss": 2.5184, + "step": 9712 + }, + { + "epoch": 0.7838753934307159, + "grad_norm": 0.7497181296348572, + "learning_rate": 0.00010531767515855037, + "loss": 2.5626, + "step": 9713 + }, + { + "epoch": 0.7839560971672989, + "grad_norm": 0.6536200046539307, + "learning_rate": 0.00010530191053122935, + "loss": 2.5909, + "step": 9714 + }, + { + "epoch": 0.7840368009038818, + "grad_norm": 0.6750395894050598, + "learning_rate": 0.00010528614577177087, + "loss": 2.5119, + "step": 9715 + }, + { + "epoch": 0.7841175046404648, + "grad_norm": 0.6284878849983215, + "learning_rate": 0.00010527038088056782, + "loss": 2.5417, + "step": 9716 + }, + { + "epoch": 0.7841982083770479, + "grad_norm": 0.6529444456100464, + "learning_rate": 0.00010525461585801308, + "loss": 2.5865, + "step": 9717 + }, + { + "epoch": 0.7842789121136309, + "grad_norm": 0.7332968711853027, + "learning_rate": 0.00010523885070449959, + "loss": 2.561, + "step": 9718 + }, + { + "epoch": 0.7843596158502139, + "grad_norm": 0.7054178714752197, + "learning_rate": 0.00010522308542042025, + "loss": 2.623, + "step": 9719 + }, + { + "epoch": 0.7844403195867968, + "grad_norm": 0.6837820410728455, + "learning_rate": 0.00010520732000616798, + "loss": 2.5586, + "step": 9720 + }, + { + "epoch": 0.7845210233233799, + "grad_norm": 0.7339439392089844, + "learning_rate": 0.00010519155446213565, + "loss": 2.5374, + "step": 9721 + }, + { + "epoch": 0.7846017270599629, + "grad_norm": 0.7625028491020203, + "learning_rate": 0.00010517578878871624, + "loss": 2.5663, + "step": 9722 + }, + { + "epoch": 0.7846824307965459, + "grad_norm": 0.6749752759933472, + "learning_rate": 0.00010516002298630263, + "loss": 2.5744, + "step": 9723 + }, + { + "epoch": 0.7847631345331288, + "grad_norm": 0.6702882647514343, + "learning_rate": 0.00010514425705528776, + "loss": 2.6247, + "step": 9724 + }, + { + "epoch": 0.7848438382697119, + "grad_norm": 0.6641737222671509, + "learning_rate": 0.00010512849099606457, + "loss": 2.5792, + "step": 9725 + }, + { + "epoch": 0.7849245420062949, + "grad_norm": 0.7522993683815002, + "learning_rate": 0.00010511272480902597, + "loss": 2.5941, + "step": 9726 + }, + { + "epoch": 0.7850052457428779, + "grad_norm": 0.7507709860801697, + "learning_rate": 0.00010509695849456487, + "loss": 2.5312, + "step": 9727 + }, + { + "epoch": 0.7850859494794609, + "grad_norm": 0.7101978063583374, + "learning_rate": 0.0001050811920530743, + "loss": 2.5833, + "step": 9728 + }, + { + "epoch": 0.785166653216044, + "grad_norm": 0.6814672946929932, + "learning_rate": 0.0001050654254849471, + "loss": 2.5466, + "step": 9729 + }, + { + "epoch": 0.7852473569526269, + "grad_norm": 0.7250106930732727, + "learning_rate": 0.0001050496587905763, + "loss": 2.5144, + "step": 9730 + }, + { + "epoch": 0.7853280606892099, + "grad_norm": 0.7125658392906189, + "learning_rate": 0.00010503389197035474, + "loss": 2.5384, + "step": 9731 + }, + { + "epoch": 0.7854087644257929, + "grad_norm": 0.7076827883720398, + "learning_rate": 0.00010501812502467547, + "loss": 2.4879, + "step": 9732 + }, + { + "epoch": 0.785489468162376, + "grad_norm": 0.632216215133667, + "learning_rate": 0.00010500235795393141, + "loss": 2.5678, + "step": 9733 + }, + { + "epoch": 0.785570171898959, + "grad_norm": 0.7376949191093445, + "learning_rate": 0.00010498659075851551, + "loss": 2.5024, + "step": 9734 + }, + { + "epoch": 0.7856508756355419, + "grad_norm": 0.6730546951293945, + "learning_rate": 0.00010497082343882072, + "loss": 2.5001, + "step": 9735 + }, + { + "epoch": 0.7857315793721249, + "grad_norm": 0.6958187818527222, + "learning_rate": 0.00010495505599524002, + "loss": 2.538, + "step": 9736 + }, + { + "epoch": 0.785812283108708, + "grad_norm": 0.6882508397102356, + "learning_rate": 0.00010493928842816638, + "loss": 2.5247, + "step": 9737 + }, + { + "epoch": 0.785892986845291, + "grad_norm": 0.711086630821228, + "learning_rate": 0.00010492352073799276, + "loss": 2.5721, + "step": 9738 + }, + { + "epoch": 0.7859736905818739, + "grad_norm": 0.7217094898223877, + "learning_rate": 0.00010490775292511214, + "loss": 2.5827, + "step": 9739 + }, + { + "epoch": 0.7860543943184569, + "grad_norm": 0.6812087893486023, + "learning_rate": 0.0001048919849899175, + "loss": 2.532, + "step": 9740 + }, + { + "epoch": 0.7861350980550399, + "grad_norm": 0.7449110150337219, + "learning_rate": 0.00010487621693280176, + "loss": 2.5611, + "step": 9741 + }, + { + "epoch": 0.786215801791623, + "grad_norm": 0.7297104001045227, + "learning_rate": 0.00010486044875415797, + "loss": 2.5173, + "step": 9742 + }, + { + "epoch": 0.786296505528206, + "grad_norm": 0.6741474270820618, + "learning_rate": 0.0001048446804543791, + "loss": 2.5451, + "step": 9743 + }, + { + "epoch": 0.7863772092647889, + "grad_norm": 0.6450859308242798, + "learning_rate": 0.00010482891203385812, + "loss": 2.551, + "step": 9744 + }, + { + "epoch": 0.7864579130013719, + "grad_norm": 0.6867123246192932, + "learning_rate": 0.00010481314349298805, + "loss": 2.4875, + "step": 9745 + }, + { + "epoch": 0.786538616737955, + "grad_norm": 0.6951552629470825, + "learning_rate": 0.00010479737483216183, + "loss": 2.6253, + "step": 9746 + }, + { + "epoch": 0.786619320474538, + "grad_norm": 0.6786869764328003, + "learning_rate": 0.0001047816060517725, + "loss": 2.5551, + "step": 9747 + }, + { + "epoch": 0.786700024211121, + "grad_norm": 0.698957622051239, + "learning_rate": 0.00010476583715221306, + "loss": 2.5554, + "step": 9748 + }, + { + "epoch": 0.7867807279477039, + "grad_norm": 0.6407502889633179, + "learning_rate": 0.00010475006813387648, + "loss": 2.5112, + "step": 9749 + }, + { + "epoch": 0.786861431684287, + "grad_norm": 0.660418689250946, + "learning_rate": 0.00010473429899715581, + "loss": 2.5557, + "step": 9750 + }, + { + "epoch": 0.78694213542087, + "grad_norm": 0.71445631980896, + "learning_rate": 0.00010471852974244403, + "loss": 2.5169, + "step": 9751 + }, + { + "epoch": 0.787022839157453, + "grad_norm": 0.6620494723320007, + "learning_rate": 0.00010470276037013414, + "loss": 2.5517, + "step": 9752 + }, + { + "epoch": 0.787103542894036, + "grad_norm": 0.6921235918998718, + "learning_rate": 0.00010468699088061917, + "loss": 2.5246, + "step": 9753 + }, + { + "epoch": 0.787184246630619, + "grad_norm": 0.6617140769958496, + "learning_rate": 0.00010467122127429214, + "loss": 2.4941, + "step": 9754 + }, + { + "epoch": 0.787264950367202, + "grad_norm": 0.6549816727638245, + "learning_rate": 0.00010465545155154608, + "loss": 2.5189, + "step": 9755 + }, + { + "epoch": 0.787345654103785, + "grad_norm": 0.7030060887336731, + "learning_rate": 0.00010463968171277396, + "loss": 2.5058, + "step": 9756 + }, + { + "epoch": 0.787426357840368, + "grad_norm": 0.7294049859046936, + "learning_rate": 0.00010462391175836886, + "loss": 2.5166, + "step": 9757 + }, + { + "epoch": 0.787507061576951, + "grad_norm": 0.6407562494277954, + "learning_rate": 0.00010460814168872382, + "loss": 2.5391, + "step": 9758 + }, + { + "epoch": 0.787587765313534, + "grad_norm": 0.8024646639823914, + "learning_rate": 0.0001045923715042318, + "loss": 2.7034, + "step": 9759 + }, + { + "epoch": 0.787668469050117, + "grad_norm": 0.7160943150520325, + "learning_rate": 0.00010457660120528592, + "loss": 2.6016, + "step": 9760 + }, + { + "epoch": 0.7877491727867, + "grad_norm": 0.6987707018852234, + "learning_rate": 0.00010456083079227916, + "loss": 2.5428, + "step": 9761 + }, + { + "epoch": 0.7878298765232831, + "grad_norm": 0.7235369086265564, + "learning_rate": 0.00010454506026560453, + "loss": 2.517, + "step": 9762 + }, + { + "epoch": 0.787910580259866, + "grad_norm": 0.6827502846717834, + "learning_rate": 0.00010452928962565518, + "loss": 2.5777, + "step": 9763 + }, + { + "epoch": 0.787991283996449, + "grad_norm": 0.71755450963974, + "learning_rate": 0.00010451351887282408, + "loss": 2.6004, + "step": 9764 + }, + { + "epoch": 0.788071987733032, + "grad_norm": 0.6988046765327454, + "learning_rate": 0.00010449774800750427, + "loss": 2.6116, + "step": 9765 + }, + { + "epoch": 0.7881526914696151, + "grad_norm": 0.6959548592567444, + "learning_rate": 0.00010448197703008884, + "loss": 2.5856, + "step": 9766 + }, + { + "epoch": 0.7882333952061981, + "grad_norm": 0.687042772769928, + "learning_rate": 0.00010446620594097079, + "loss": 2.5167, + "step": 9767 + }, + { + "epoch": 0.788314098942781, + "grad_norm": 0.6950173377990723, + "learning_rate": 0.00010445043474054325, + "loss": 2.5157, + "step": 9768 + }, + { + "epoch": 0.788394802679364, + "grad_norm": 0.680768609046936, + "learning_rate": 0.00010443466342919926, + "loss": 2.6177, + "step": 9769 + }, + { + "epoch": 0.7884755064159471, + "grad_norm": 0.7790142893791199, + "learning_rate": 0.00010441889200733181, + "loss": 2.5761, + "step": 9770 + }, + { + "epoch": 0.7885562101525301, + "grad_norm": 0.6207798719406128, + "learning_rate": 0.00010440312047533406, + "loss": 2.5305, + "step": 9771 + }, + { + "epoch": 0.7886369138891131, + "grad_norm": 0.7143635749816895, + "learning_rate": 0.00010438734883359903, + "loss": 2.5922, + "step": 9772 + }, + { + "epoch": 0.788717617625696, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00010437157708251977, + "loss": 2.6051, + "step": 9773 + }, + { + "epoch": 0.7887983213622791, + "grad_norm": 0.6602753400802612, + "learning_rate": 0.00010435580522248942, + "loss": 2.6002, + "step": 9774 + }, + { + "epoch": 0.7888790250988621, + "grad_norm": 0.6929246783256531, + "learning_rate": 0.00010434003325390101, + "loss": 2.5798, + "step": 9775 + }, + { + "epoch": 0.7889597288354451, + "grad_norm": 0.7355811595916748, + "learning_rate": 0.00010432426117714762, + "loss": 2.5859, + "step": 9776 + }, + { + "epoch": 0.789040432572028, + "grad_norm": 0.7009611129760742, + "learning_rate": 0.00010430848899262233, + "loss": 2.5535, + "step": 9777 + }, + { + "epoch": 0.7891211363086111, + "grad_norm": 0.6699070930480957, + "learning_rate": 0.00010429271670071823, + "loss": 2.5687, + "step": 9778 + }, + { + "epoch": 0.7892018400451941, + "grad_norm": 0.6632630228996277, + "learning_rate": 0.00010427694430182844, + "loss": 2.5359, + "step": 9779 + }, + { + "epoch": 0.7892825437817771, + "grad_norm": 0.7256911993026733, + "learning_rate": 0.000104261171796346, + "loss": 2.5432, + "step": 9780 + }, + { + "epoch": 0.7893632475183601, + "grad_norm": 0.6654312610626221, + "learning_rate": 0.000104245399184664, + "loss": 2.5432, + "step": 9781 + }, + { + "epoch": 0.7894439512549432, + "grad_norm": 0.6808900237083435, + "learning_rate": 0.00010422962646717557, + "loss": 2.4951, + "step": 9782 + }, + { + "epoch": 0.7895246549915261, + "grad_norm": 0.6655945181846619, + "learning_rate": 0.00010421385364427378, + "loss": 2.5152, + "step": 9783 + }, + { + "epoch": 0.7896053587281091, + "grad_norm": 0.8399274349212646, + "learning_rate": 0.00010419808071635178, + "loss": 2.5688, + "step": 9784 + }, + { + "epoch": 0.7896860624646921, + "grad_norm": 0.6412226557731628, + "learning_rate": 0.00010418230768380262, + "loss": 2.5527, + "step": 9785 + }, + { + "epoch": 0.7897667662012752, + "grad_norm": 0.6505058407783508, + "learning_rate": 0.0001041665345470194, + "loss": 2.5768, + "step": 9786 + }, + { + "epoch": 0.7898474699378581, + "grad_norm": 0.6297653317451477, + "learning_rate": 0.00010415076130639526, + "loss": 2.5372, + "step": 9787 + }, + { + "epoch": 0.7899281736744411, + "grad_norm": 0.6524460315704346, + "learning_rate": 0.00010413498796232331, + "loss": 2.5047, + "step": 9788 + }, + { + "epoch": 0.7900088774110241, + "grad_norm": 0.6637924313545227, + "learning_rate": 0.00010411921451519662, + "loss": 2.508, + "step": 9789 + }, + { + "epoch": 0.7900895811476072, + "grad_norm": 0.6423435211181641, + "learning_rate": 0.00010410344096540836, + "loss": 2.4597, + "step": 9790 + }, + { + "epoch": 0.7901702848841902, + "grad_norm": 0.6361977458000183, + "learning_rate": 0.00010408766731335163, + "loss": 2.5921, + "step": 9791 + }, + { + "epoch": 0.7902509886207731, + "grad_norm": 0.6792182922363281, + "learning_rate": 0.00010407189355941953, + "loss": 2.5543, + "step": 9792 + }, + { + "epoch": 0.7903316923573561, + "grad_norm": 0.6998419761657715, + "learning_rate": 0.00010405611970400519, + "loss": 2.5333, + "step": 9793 + }, + { + "epoch": 0.7904123960939391, + "grad_norm": 0.6730015873908997, + "learning_rate": 0.00010404034574750174, + "loss": 2.596, + "step": 9794 + }, + { + "epoch": 0.7904930998305222, + "grad_norm": 0.7120258808135986, + "learning_rate": 0.00010402457169030235, + "loss": 2.5314, + "step": 9795 + }, + { + "epoch": 0.7905738035671052, + "grad_norm": 0.6553651690483093, + "learning_rate": 0.0001040087975328001, + "loss": 2.4973, + "step": 9796 + }, + { + "epoch": 0.7906545073036881, + "grad_norm": 0.6506681442260742, + "learning_rate": 0.00010399302327538812, + "loss": 2.588, + "step": 9797 + }, + { + "epoch": 0.7907352110402711, + "grad_norm": 0.6737257242202759, + "learning_rate": 0.00010397724891845957, + "loss": 2.5454, + "step": 9798 + }, + { + "epoch": 0.7908159147768542, + "grad_norm": 0.670120894908905, + "learning_rate": 0.00010396147446240756, + "loss": 2.4926, + "step": 9799 + }, + { + "epoch": 0.7908966185134372, + "grad_norm": 0.7028468251228333, + "learning_rate": 0.00010394569990762529, + "loss": 2.5727, + "step": 9800 + }, + { + "epoch": 0.7909773222500202, + "grad_norm": 0.7084455490112305, + "learning_rate": 0.00010392992525450584, + "loss": 2.547, + "step": 9801 + }, + { + "epoch": 0.7910580259866031, + "grad_norm": 0.732694685459137, + "learning_rate": 0.0001039141505034424, + "loss": 2.5871, + "step": 9802 + }, + { + "epoch": 0.7911387297231862, + "grad_norm": 0.7214515209197998, + "learning_rate": 0.00010389837565482807, + "loss": 2.5672, + "step": 9803 + }, + { + "epoch": 0.7912194334597692, + "grad_norm": 0.6495330333709717, + "learning_rate": 0.00010388260070905604, + "loss": 2.5266, + "step": 9804 + }, + { + "epoch": 0.7913001371963522, + "grad_norm": 0.6930941343307495, + "learning_rate": 0.00010386682566651945, + "loss": 2.5734, + "step": 9805 + }, + { + "epoch": 0.7913808409329351, + "grad_norm": 0.714214563369751, + "learning_rate": 0.00010385105052761148, + "loss": 2.4987, + "step": 9806 + }, + { + "epoch": 0.7914615446695182, + "grad_norm": 0.7525388598442078, + "learning_rate": 0.00010383527529272523, + "loss": 2.5427, + "step": 9807 + }, + { + "epoch": 0.7915422484061012, + "grad_norm": 0.6088642477989197, + "learning_rate": 0.00010381949996225389, + "loss": 2.5018, + "step": 9808 + }, + { + "epoch": 0.7916229521426842, + "grad_norm": 0.6797540187835693, + "learning_rate": 0.00010380372453659066, + "loss": 2.5235, + "step": 9809 + }, + { + "epoch": 0.7917036558792672, + "grad_norm": 0.6754054427146912, + "learning_rate": 0.00010378794901612865, + "loss": 2.5343, + "step": 9810 + }, + { + "epoch": 0.7917843596158503, + "grad_norm": 0.7375015020370483, + "learning_rate": 0.00010377217340126106, + "loss": 2.6101, + "step": 9811 + }, + { + "epoch": 0.7918650633524332, + "grad_norm": 0.6487904191017151, + "learning_rate": 0.00010375639769238103, + "loss": 2.5408, + "step": 9812 + }, + { + "epoch": 0.7919457670890162, + "grad_norm": 0.7280275821685791, + "learning_rate": 0.00010374062188988176, + "loss": 2.5503, + "step": 9813 + }, + { + "epoch": 0.7920264708255992, + "grad_norm": 0.6944922208786011, + "learning_rate": 0.00010372484599415644, + "loss": 2.5815, + "step": 9814 + }, + { + "epoch": 0.7921071745621823, + "grad_norm": 0.6970139741897583, + "learning_rate": 0.00010370907000559818, + "loss": 2.546, + "step": 9815 + }, + { + "epoch": 0.7921878782987652, + "grad_norm": 0.7338151335716248, + "learning_rate": 0.00010369329392460023, + "loss": 2.5449, + "step": 9816 + }, + { + "epoch": 0.7922685820353482, + "grad_norm": 0.7763465642929077, + "learning_rate": 0.00010367751775155574, + "loss": 2.5331, + "step": 9817 + }, + { + "epoch": 0.7923492857719312, + "grad_norm": 0.6892645955085754, + "learning_rate": 0.00010366174148685786, + "loss": 2.5617, + "step": 9818 + }, + { + "epoch": 0.7924299895085143, + "grad_norm": 0.7388250231742859, + "learning_rate": 0.00010364596513089984, + "loss": 2.5236, + "step": 9819 + }, + { + "epoch": 0.7925106932450973, + "grad_norm": 0.7035132646560669, + "learning_rate": 0.00010363018868407482, + "loss": 2.5711, + "step": 9820 + }, + { + "epoch": 0.7925913969816802, + "grad_norm": 0.7087043523788452, + "learning_rate": 0.00010361441214677603, + "loss": 2.5416, + "step": 9821 + }, + { + "epoch": 0.7926721007182632, + "grad_norm": 0.7173168063163757, + "learning_rate": 0.00010359863551939664, + "loss": 2.529, + "step": 9822 + }, + { + "epoch": 0.7927528044548463, + "grad_norm": 0.7007408738136292, + "learning_rate": 0.00010358285880232983, + "loss": 2.5287, + "step": 9823 + }, + { + "epoch": 0.7928335081914293, + "grad_norm": 0.7731965780258179, + "learning_rate": 0.0001035670819959688, + "loss": 2.5913, + "step": 9824 + }, + { + "epoch": 0.7929142119280123, + "grad_norm": 0.6625120639801025, + "learning_rate": 0.00010355130510070681, + "loss": 2.5815, + "step": 9825 + }, + { + "epoch": 0.7929949156645952, + "grad_norm": 0.6628395318984985, + "learning_rate": 0.00010353552811693699, + "loss": 2.512, + "step": 9826 + }, + { + "epoch": 0.7930756194011783, + "grad_norm": 0.6565915942192078, + "learning_rate": 0.00010351975104505256, + "loss": 2.54, + "step": 9827 + }, + { + "epoch": 0.7931563231377613, + "grad_norm": 0.6581636667251587, + "learning_rate": 0.00010350397388544672, + "loss": 2.5462, + "step": 9828 + }, + { + "epoch": 0.7932370268743443, + "grad_norm": 0.705668568611145, + "learning_rate": 0.0001034881966385127, + "loss": 2.5241, + "step": 9829 + }, + { + "epoch": 0.7933177306109273, + "grad_norm": 0.7047126293182373, + "learning_rate": 0.00010347241930464373, + "loss": 2.5275, + "step": 9830 + }, + { + "epoch": 0.7933984343475103, + "grad_norm": 0.6285849213600159, + "learning_rate": 0.00010345664188423296, + "loss": 2.518, + "step": 9831 + }, + { + "epoch": 0.7934791380840933, + "grad_norm": 0.697542130947113, + "learning_rate": 0.00010344086437767366, + "loss": 2.5219, + "step": 9832 + }, + { + "epoch": 0.7935598418206763, + "grad_norm": 0.6349283456802368, + "learning_rate": 0.00010342508678535903, + "loss": 2.5277, + "step": 9833 + }, + { + "epoch": 0.7936405455572593, + "grad_norm": 0.7084335088729858, + "learning_rate": 0.00010340930910768225, + "loss": 2.476, + "step": 9834 + }, + { + "epoch": 0.7937212492938424, + "grad_norm": 0.6714156866073608, + "learning_rate": 0.00010339353134503662, + "loss": 2.556, + "step": 9835 + }, + { + "epoch": 0.7938019530304253, + "grad_norm": 0.6687895059585571, + "learning_rate": 0.00010337775349781527, + "loss": 2.5756, + "step": 9836 + }, + { + "epoch": 0.7938826567670083, + "grad_norm": 0.669784665107727, + "learning_rate": 0.00010336197556641152, + "loss": 2.5545, + "step": 9837 + }, + { + "epoch": 0.7939633605035913, + "grad_norm": 0.6738600134849548, + "learning_rate": 0.0001033461975512185, + "loss": 2.5807, + "step": 9838 + }, + { + "epoch": 0.7940440642401744, + "grad_norm": 0.691443681716919, + "learning_rate": 0.00010333041945262953, + "loss": 2.5279, + "step": 9839 + }, + { + "epoch": 0.7941247679767574, + "grad_norm": 0.6283861398696899, + "learning_rate": 0.0001033146412710378, + "loss": 2.5355, + "step": 9840 + }, + { + "epoch": 0.7942054717133403, + "grad_norm": 0.6491204500198364, + "learning_rate": 0.00010329886300683655, + "loss": 2.5431, + "step": 9841 + }, + { + "epoch": 0.7942861754499233, + "grad_norm": 0.6673988103866577, + "learning_rate": 0.00010328308466041898, + "loss": 2.5845, + "step": 9842 + }, + { + "epoch": 0.7943668791865063, + "grad_norm": 0.6669130325317383, + "learning_rate": 0.00010326730623217837, + "loss": 2.5348, + "step": 9843 + }, + { + "epoch": 0.7944475829230894, + "grad_norm": 0.7003189921379089, + "learning_rate": 0.00010325152772250795, + "loss": 2.5779, + "step": 9844 + }, + { + "epoch": 0.7945282866596723, + "grad_norm": 0.6602177619934082, + "learning_rate": 0.00010323574913180097, + "loss": 2.5527, + "step": 9845 + }, + { + "epoch": 0.7946089903962553, + "grad_norm": 0.7053726315498352, + "learning_rate": 0.00010321997046045066, + "loss": 2.566, + "step": 9846 + }, + { + "epoch": 0.7946896941328383, + "grad_norm": 0.7428076863288879, + "learning_rate": 0.00010320419170885025, + "loss": 2.5348, + "step": 9847 + }, + { + "epoch": 0.7947703978694214, + "grad_norm": 0.7029163837432861, + "learning_rate": 0.00010318841287739303, + "loss": 2.5387, + "step": 9848 + }, + { + "epoch": 0.7948511016060044, + "grad_norm": 0.6159133911132812, + "learning_rate": 0.00010317263396647221, + "loss": 2.5408, + "step": 9849 + }, + { + "epoch": 0.7949318053425873, + "grad_norm": 0.6748857498168945, + "learning_rate": 0.00010315685497648106, + "loss": 2.5299, + "step": 9850 + }, + { + "epoch": 0.7950125090791703, + "grad_norm": 0.6281898021697998, + "learning_rate": 0.00010314107590781284, + "loss": 2.5202, + "step": 9851 + }, + { + "epoch": 0.7950932128157534, + "grad_norm": 0.6602163910865784, + "learning_rate": 0.00010312529676086078, + "loss": 2.5119, + "step": 9852 + }, + { + "epoch": 0.7951739165523364, + "grad_norm": 0.6665403246879578, + "learning_rate": 0.00010310951753601818, + "loss": 2.5913, + "step": 9853 + }, + { + "epoch": 0.7952546202889194, + "grad_norm": 0.6705873012542725, + "learning_rate": 0.00010309373823367827, + "loss": 2.6039, + "step": 9854 + }, + { + "epoch": 0.7953353240255023, + "grad_norm": 0.6571313738822937, + "learning_rate": 0.0001030779588542343, + "loss": 2.5629, + "step": 9855 + }, + { + "epoch": 0.7954160277620854, + "grad_norm": 0.6597230434417725, + "learning_rate": 0.00010306217939807956, + "loss": 2.5569, + "step": 9856 + }, + { + "epoch": 0.7954967314986684, + "grad_norm": 0.7098817229270935, + "learning_rate": 0.00010304639986560733, + "loss": 2.4736, + "step": 9857 + }, + { + "epoch": 0.7955774352352514, + "grad_norm": 0.628663957118988, + "learning_rate": 0.00010303062025721082, + "loss": 2.5241, + "step": 9858 + }, + { + "epoch": 0.7956581389718343, + "grad_norm": 0.630843460559845, + "learning_rate": 0.00010301484057328333, + "loss": 2.5604, + "step": 9859 + }, + { + "epoch": 0.7957388427084174, + "grad_norm": 0.7457596659660339, + "learning_rate": 0.00010299906081421813, + "loss": 2.5675, + "step": 9860 + }, + { + "epoch": 0.7958195464450004, + "grad_norm": 0.6566091775894165, + "learning_rate": 0.00010298328098040851, + "loss": 2.4918, + "step": 9861 + }, + { + "epoch": 0.7959002501815834, + "grad_norm": 0.657357931137085, + "learning_rate": 0.00010296750107224773, + "loss": 2.5268, + "step": 9862 + }, + { + "epoch": 0.7959809539181664, + "grad_norm": 0.7021927833557129, + "learning_rate": 0.00010295172109012905, + "loss": 2.528, + "step": 9863 + }, + { + "epoch": 0.7960616576547495, + "grad_norm": 0.662053108215332, + "learning_rate": 0.00010293594103444578, + "loss": 2.5483, + "step": 9864 + }, + { + "epoch": 0.7961423613913324, + "grad_norm": 0.776407778263092, + "learning_rate": 0.00010292016090559118, + "loss": 2.6089, + "step": 9865 + }, + { + "epoch": 0.7962230651279154, + "grad_norm": 0.6499512791633606, + "learning_rate": 0.00010290438070395854, + "loss": 2.5609, + "step": 9866 + }, + { + "epoch": 0.7963037688644984, + "grad_norm": 0.6802246570587158, + "learning_rate": 0.00010288860042994113, + "loss": 2.5217, + "step": 9867 + }, + { + "epoch": 0.7963844726010815, + "grad_norm": 0.6371235847473145, + "learning_rate": 0.00010287282008393224, + "loss": 2.4783, + "step": 9868 + }, + { + "epoch": 0.7964651763376644, + "grad_norm": 0.7070169448852539, + "learning_rate": 0.00010285703966632518, + "loss": 2.5006, + "step": 9869 + }, + { + "epoch": 0.7965458800742474, + "grad_norm": 0.657738208770752, + "learning_rate": 0.00010284125917751323, + "loss": 2.551, + "step": 9870 + }, + { + "epoch": 0.7966265838108304, + "grad_norm": 0.7936853170394897, + "learning_rate": 0.00010282547861788964, + "loss": 2.574, + "step": 9871 + }, + { + "epoch": 0.7967072875474135, + "grad_norm": 0.675715982913971, + "learning_rate": 0.00010280969798784779, + "loss": 2.5288, + "step": 9872 + }, + { + "epoch": 0.7967879912839965, + "grad_norm": 0.6980394124984741, + "learning_rate": 0.00010279391728778092, + "loss": 2.5437, + "step": 9873 + }, + { + "epoch": 0.7968686950205794, + "grad_norm": 0.6580469608306885, + "learning_rate": 0.00010277813651808226, + "loss": 2.5574, + "step": 9874 + }, + { + "epoch": 0.7969493987571624, + "grad_norm": 0.6960238218307495, + "learning_rate": 0.00010276235567914522, + "loss": 2.5477, + "step": 9875 + }, + { + "epoch": 0.7970301024937455, + "grad_norm": 0.704140841960907, + "learning_rate": 0.00010274657477136304, + "loss": 2.5099, + "step": 9876 + }, + { + "epoch": 0.7971108062303285, + "grad_norm": 0.7238990068435669, + "learning_rate": 0.00010273079379512906, + "loss": 2.6182, + "step": 9877 + }, + { + "epoch": 0.7971915099669115, + "grad_norm": 0.6527700424194336, + "learning_rate": 0.00010271501275083657, + "loss": 2.5148, + "step": 9878 + }, + { + "epoch": 0.7972722137034944, + "grad_norm": 0.6665365695953369, + "learning_rate": 0.00010269923163887884, + "loss": 2.5624, + "step": 9879 + }, + { + "epoch": 0.7973529174400775, + "grad_norm": 0.7304019927978516, + "learning_rate": 0.0001026834504596492, + "loss": 2.5537, + "step": 9880 + }, + { + "epoch": 0.7974336211766605, + "grad_norm": 0.6645877957344055, + "learning_rate": 0.00010266766921354099, + "loss": 2.5381, + "step": 9881 + }, + { + "epoch": 0.7975143249132435, + "grad_norm": 0.6817314624786377, + "learning_rate": 0.00010265188790094744, + "loss": 2.5399, + "step": 9882 + }, + { + "epoch": 0.7975950286498265, + "grad_norm": 0.7477232217788696, + "learning_rate": 0.00010263610652226194, + "loss": 2.6461, + "step": 9883 + }, + { + "epoch": 0.7976757323864095, + "grad_norm": 0.7087170481681824, + "learning_rate": 0.00010262032507787777, + "loss": 2.5469, + "step": 9884 + }, + { + "epoch": 0.7977564361229925, + "grad_norm": 0.7093435525894165, + "learning_rate": 0.00010260454356818825, + "loss": 2.5606, + "step": 9885 + }, + { + "epoch": 0.7978371398595755, + "grad_norm": 0.6662636399269104, + "learning_rate": 0.00010258876199358672, + "loss": 2.5415, + "step": 9886 + }, + { + "epoch": 0.7979178435961585, + "grad_norm": 0.6829736232757568, + "learning_rate": 0.00010257298035446644, + "loss": 2.5618, + "step": 9887 + }, + { + "epoch": 0.7979985473327416, + "grad_norm": 0.6872264742851257, + "learning_rate": 0.00010255719865122077, + "loss": 2.5629, + "step": 9888 + }, + { + "epoch": 0.7980792510693245, + "grad_norm": 0.6988633871078491, + "learning_rate": 0.00010254141688424303, + "loss": 2.5191, + "step": 9889 + }, + { + "epoch": 0.7981599548059075, + "grad_norm": 0.6787285804748535, + "learning_rate": 0.00010252563505392654, + "loss": 2.5003, + "step": 9890 + }, + { + "epoch": 0.7982406585424905, + "grad_norm": 0.6703466773033142, + "learning_rate": 0.00010250985316066461, + "loss": 2.5442, + "step": 9891 + }, + { + "epoch": 0.7983213622790736, + "grad_norm": 0.6463642120361328, + "learning_rate": 0.0001024940712048506, + "loss": 2.5236, + "step": 9892 + }, + { + "epoch": 0.7984020660156566, + "grad_norm": 0.6835207939147949, + "learning_rate": 0.0001024782891868778, + "loss": 2.5094, + "step": 9893 + }, + { + "epoch": 0.7984827697522395, + "grad_norm": 0.6621001958847046, + "learning_rate": 0.00010246250710713956, + "loss": 2.5456, + "step": 9894 + }, + { + "epoch": 0.7985634734888225, + "grad_norm": 0.6675469875335693, + "learning_rate": 0.0001024467249660292, + "loss": 2.5312, + "step": 9895 + }, + { + "epoch": 0.7986441772254055, + "grad_norm": 0.7357796430587769, + "learning_rate": 0.00010243094276394007, + "loss": 2.5374, + "step": 9896 + }, + { + "epoch": 0.7987248809619886, + "grad_norm": 0.7005879878997803, + "learning_rate": 0.00010241516050126549, + "loss": 2.5667, + "step": 9897 + }, + { + "epoch": 0.7988055846985715, + "grad_norm": 0.669870913028717, + "learning_rate": 0.0001023993781783988, + "loss": 2.533, + "step": 9898 + }, + { + "epoch": 0.7988862884351545, + "grad_norm": 0.7584091424942017, + "learning_rate": 0.00010238359579573333, + "loss": 2.5995, + "step": 9899 + }, + { + "epoch": 0.7989669921717375, + "grad_norm": 0.6931570172309875, + "learning_rate": 0.00010236781335366239, + "loss": 2.5506, + "step": 9900 + }, + { + "epoch": 0.7990476959083206, + "grad_norm": 0.6810948848724365, + "learning_rate": 0.0001023520308525794, + "loss": 2.5048, + "step": 9901 + }, + { + "epoch": 0.7991283996449036, + "grad_norm": 0.6857194900512695, + "learning_rate": 0.00010233624829287765, + "loss": 2.5559, + "step": 9902 + }, + { + "epoch": 0.7992091033814865, + "grad_norm": 0.6685707569122314, + "learning_rate": 0.00010232046567495046, + "loss": 2.5661, + "step": 9903 + }, + { + "epoch": 0.7992898071180695, + "grad_norm": 0.6626694202423096, + "learning_rate": 0.00010230468299919121, + "loss": 2.6293, + "step": 9904 + }, + { + "epoch": 0.7993705108546526, + "grad_norm": 0.6407302021980286, + "learning_rate": 0.00010228890026599323, + "loss": 2.5552, + "step": 9905 + }, + { + "epoch": 0.7994512145912356, + "grad_norm": 0.762235701084137, + "learning_rate": 0.00010227311747574986, + "loss": 2.4904, + "step": 9906 + }, + { + "epoch": 0.7995319183278186, + "grad_norm": 0.703507661819458, + "learning_rate": 0.0001022573346288545, + "loss": 2.5684, + "step": 9907 + }, + { + "epoch": 0.7996126220644015, + "grad_norm": 0.82541823387146, + "learning_rate": 0.00010224155172570043, + "loss": 2.521, + "step": 9908 + }, + { + "epoch": 0.7996933258009846, + "grad_norm": 0.6836804747581482, + "learning_rate": 0.00010222576876668104, + "loss": 2.5364, + "step": 9909 + }, + { + "epoch": 0.7997740295375676, + "grad_norm": 0.7388977408409119, + "learning_rate": 0.00010220998575218966, + "loss": 2.5724, + "step": 9910 + }, + { + "epoch": 0.7998547332741506, + "grad_norm": 0.7380896806716919, + "learning_rate": 0.00010219420268261966, + "loss": 2.5918, + "step": 9911 + }, + { + "epoch": 0.7999354370107336, + "grad_norm": 0.7303522825241089, + "learning_rate": 0.00010217841955836442, + "loss": 2.5432, + "step": 9912 + }, + { + "epoch": 0.8000161407473166, + "grad_norm": 0.6859301924705505, + "learning_rate": 0.00010216263637981727, + "loss": 2.5734, + "step": 9913 + }, + { + "epoch": 0.8000968444838996, + "grad_norm": 0.731910228729248, + "learning_rate": 0.00010214685314737154, + "loss": 2.5227, + "step": 9914 + }, + { + "epoch": 0.8001775482204826, + "grad_norm": 0.7105006575584412, + "learning_rate": 0.00010213106986142062, + "loss": 2.5335, + "step": 9915 + }, + { + "epoch": 0.8002582519570656, + "grad_norm": 0.7337056994438171, + "learning_rate": 0.00010211528652235786, + "loss": 2.6204, + "step": 9916 + }, + { + "epoch": 0.8003389556936487, + "grad_norm": 0.7350614666938782, + "learning_rate": 0.00010209950313057668, + "loss": 2.5264, + "step": 9917 + }, + { + "epoch": 0.8004196594302316, + "grad_norm": 0.6411921977996826, + "learning_rate": 0.00010208371968647036, + "loss": 2.4642, + "step": 9918 + }, + { + "epoch": 0.8005003631668146, + "grad_norm": 0.7601611018180847, + "learning_rate": 0.00010206793619043229, + "loss": 2.6249, + "step": 9919 + }, + { + "epoch": 0.8005810669033976, + "grad_norm": 0.7086012363433838, + "learning_rate": 0.00010205215264285585, + "loss": 2.5508, + "step": 9920 + }, + { + "epoch": 0.8006617706399807, + "grad_norm": 0.7267128825187683, + "learning_rate": 0.00010203636904413443, + "loss": 2.5109, + "step": 9921 + }, + { + "epoch": 0.8007424743765637, + "grad_norm": 0.7606067657470703, + "learning_rate": 0.00010202058539466132, + "loss": 2.5172, + "step": 9922 + }, + { + "epoch": 0.8008231781131466, + "grad_norm": 0.7610498666763306, + "learning_rate": 0.00010200480169483, + "loss": 2.5085, + "step": 9923 + }, + { + "epoch": 0.8009038818497296, + "grad_norm": 0.7604225873947144, + "learning_rate": 0.00010198901794503373, + "loss": 2.5615, + "step": 9924 + }, + { + "epoch": 0.8009845855863127, + "grad_norm": 0.739532470703125, + "learning_rate": 0.00010197323414566596, + "loss": 2.5574, + "step": 9925 + }, + { + "epoch": 0.8010652893228957, + "grad_norm": 0.6913303136825562, + "learning_rate": 0.00010195745029712003, + "loss": 2.5403, + "step": 9926 + }, + { + "epoch": 0.8011459930594786, + "grad_norm": 0.6963592767715454, + "learning_rate": 0.0001019416663997893, + "loss": 2.5615, + "step": 9927 + }, + { + "epoch": 0.8012266967960616, + "grad_norm": 0.681481122970581, + "learning_rate": 0.0001019258824540672, + "loss": 2.5125, + "step": 9928 + }, + { + "epoch": 0.8013074005326447, + "grad_norm": 0.7192744016647339, + "learning_rate": 0.00010191009846034709, + "loss": 2.5952, + "step": 9929 + }, + { + "epoch": 0.8013881042692277, + "grad_norm": 0.7030046582221985, + "learning_rate": 0.00010189431441902228, + "loss": 2.5445, + "step": 9930 + }, + { + "epoch": 0.8014688080058107, + "grad_norm": 0.6180598139762878, + "learning_rate": 0.00010187853033048622, + "loss": 2.4902, + "step": 9931 + }, + { + "epoch": 0.8015495117423936, + "grad_norm": 0.7479971051216125, + "learning_rate": 0.0001018627461951323, + "loss": 2.5703, + "step": 9932 + }, + { + "epoch": 0.8016302154789767, + "grad_norm": 0.7339857220649719, + "learning_rate": 0.00010184696201335387, + "loss": 2.5744, + "step": 9933 + }, + { + "epoch": 0.8017109192155597, + "grad_norm": 0.6741397380828857, + "learning_rate": 0.00010183117778554432, + "loss": 2.5777, + "step": 9934 + }, + { + "epoch": 0.8017916229521427, + "grad_norm": 0.6731706857681274, + "learning_rate": 0.00010181539351209699, + "loss": 2.5438, + "step": 9935 + }, + { + "epoch": 0.8018723266887257, + "grad_norm": 0.6929418444633484, + "learning_rate": 0.00010179960919340535, + "loss": 2.5308, + "step": 9936 + }, + { + "epoch": 0.8019530304253087, + "grad_norm": 0.7383175492286682, + "learning_rate": 0.00010178382482986271, + "loss": 2.5623, + "step": 9937 + }, + { + "epoch": 0.8020337341618917, + "grad_norm": 0.6872193217277527, + "learning_rate": 0.00010176804042186252, + "loss": 2.5271, + "step": 9938 + }, + { + "epoch": 0.8021144378984747, + "grad_norm": 0.7354295253753662, + "learning_rate": 0.00010175225596979816, + "loss": 2.5122, + "step": 9939 + }, + { + "epoch": 0.8021951416350577, + "grad_norm": 0.7589237689971924, + "learning_rate": 0.00010173647147406297, + "loss": 2.5529, + "step": 9940 + }, + { + "epoch": 0.8022758453716408, + "grad_norm": 0.6998353004455566, + "learning_rate": 0.00010172068693505037, + "loss": 2.4683, + "step": 9941 + }, + { + "epoch": 0.8023565491082237, + "grad_norm": 0.6816055178642273, + "learning_rate": 0.00010170490235315377, + "loss": 2.567, + "step": 9942 + }, + { + "epoch": 0.8024372528448067, + "grad_norm": 0.7188318371772766, + "learning_rate": 0.00010168911772876652, + "loss": 2.5631, + "step": 9943 + }, + { + "epoch": 0.8025179565813897, + "grad_norm": 0.6925922632217407, + "learning_rate": 0.00010167333306228209, + "loss": 2.4872, + "step": 9944 + }, + { + "epoch": 0.8025986603179727, + "grad_norm": 0.7081493735313416, + "learning_rate": 0.00010165754835409377, + "loss": 2.5482, + "step": 9945 + }, + { + "epoch": 0.8026793640545558, + "grad_norm": 0.6838935613632202, + "learning_rate": 0.00010164176360459505, + "loss": 2.541, + "step": 9946 + }, + { + "epoch": 0.8027600677911387, + "grad_norm": 0.6959214210510254, + "learning_rate": 0.00010162597881417928, + "loss": 2.4574, + "step": 9947 + }, + { + "epoch": 0.8028407715277217, + "grad_norm": 0.693004310131073, + "learning_rate": 0.00010161019398323986, + "loss": 2.5553, + "step": 9948 + }, + { + "epoch": 0.8029214752643047, + "grad_norm": 0.6683690547943115, + "learning_rate": 0.00010159440911217022, + "loss": 2.5501, + "step": 9949 + }, + { + "epoch": 0.8030021790008878, + "grad_norm": 0.6797001361846924, + "learning_rate": 0.0001015786242013637, + "loss": 2.5731, + "step": 9950 + }, + { + "epoch": 0.8030828827374707, + "grad_norm": 0.6621012091636658, + "learning_rate": 0.00010156283925121375, + "loss": 2.5278, + "step": 9951 + }, + { + "epoch": 0.8031635864740537, + "grad_norm": 0.7024650573730469, + "learning_rate": 0.00010154705426211377, + "loss": 2.5939, + "step": 9952 + }, + { + "epoch": 0.8032442902106367, + "grad_norm": 0.6756548285484314, + "learning_rate": 0.00010153126923445714, + "loss": 2.5797, + "step": 9953 + }, + { + "epoch": 0.8033249939472198, + "grad_norm": 0.6560662984848022, + "learning_rate": 0.00010151548416863732, + "loss": 2.5358, + "step": 9954 + }, + { + "epoch": 0.8034056976838028, + "grad_norm": 0.7172456979751587, + "learning_rate": 0.00010149969906504766, + "loss": 2.5054, + "step": 9955 + }, + { + "epoch": 0.8034864014203857, + "grad_norm": 0.6379461288452148, + "learning_rate": 0.00010148391392408152, + "loss": 2.5341, + "step": 9956 + }, + { + "epoch": 0.8035671051569687, + "grad_norm": 0.6553892493247986, + "learning_rate": 0.00010146812874613243, + "loss": 2.5618, + "step": 9957 + }, + { + "epoch": 0.8036478088935518, + "grad_norm": 0.6940072178840637, + "learning_rate": 0.00010145234353159372, + "loss": 2.5686, + "step": 9958 + }, + { + "epoch": 0.8037285126301348, + "grad_norm": 0.6641896963119507, + "learning_rate": 0.00010143655828085878, + "loss": 2.5188, + "step": 9959 + }, + { + "epoch": 0.8038092163667178, + "grad_norm": 0.6622887253761292, + "learning_rate": 0.00010142077299432111, + "loss": 2.54, + "step": 9960 + }, + { + "epoch": 0.8038899201033007, + "grad_norm": 0.7216808795928955, + "learning_rate": 0.000101404987672374, + "loss": 2.5775, + "step": 9961 + }, + { + "epoch": 0.8039706238398838, + "grad_norm": 0.6544952988624573, + "learning_rate": 0.00010138920231541095, + "loss": 2.6066, + "step": 9962 + }, + { + "epoch": 0.8040513275764668, + "grad_norm": 0.6869354248046875, + "learning_rate": 0.00010137341692382539, + "loss": 2.5157, + "step": 9963 + }, + { + "epoch": 0.8041320313130498, + "grad_norm": 0.6731898784637451, + "learning_rate": 0.00010135763149801063, + "loss": 2.4369, + "step": 9964 + }, + { + "epoch": 0.8042127350496328, + "grad_norm": 0.6943373084068298, + "learning_rate": 0.00010134184603836017, + "loss": 2.5529, + "step": 9965 + }, + { + "epoch": 0.8042934387862158, + "grad_norm": 0.729928195476532, + "learning_rate": 0.00010132606054526739, + "loss": 2.5814, + "step": 9966 + }, + { + "epoch": 0.8043741425227988, + "grad_norm": 0.6491130590438843, + "learning_rate": 0.00010131027501912571, + "loss": 2.5246, + "step": 9967 + }, + { + "epoch": 0.8044548462593818, + "grad_norm": 0.747756838798523, + "learning_rate": 0.00010129448946032857, + "loss": 2.513, + "step": 9968 + }, + { + "epoch": 0.8045355499959648, + "grad_norm": 0.6449645757675171, + "learning_rate": 0.00010127870386926935, + "loss": 2.5232, + "step": 9969 + }, + { + "epoch": 0.8046162537325479, + "grad_norm": 0.6425037980079651, + "learning_rate": 0.0001012629182463415, + "loss": 2.5065, + "step": 9970 + }, + { + "epoch": 0.8046969574691308, + "grad_norm": 0.7340624332427979, + "learning_rate": 0.00010124713259193843, + "loss": 2.5325, + "step": 9971 + }, + { + "epoch": 0.8047776612057138, + "grad_norm": 0.7308940291404724, + "learning_rate": 0.00010123134690645352, + "loss": 2.5717, + "step": 9972 + }, + { + "epoch": 0.8048583649422968, + "grad_norm": 0.7128338813781738, + "learning_rate": 0.00010121556119028028, + "loss": 2.5548, + "step": 9973 + }, + { + "epoch": 0.8049390686788799, + "grad_norm": 0.7027677893638611, + "learning_rate": 0.00010119977544381207, + "loss": 2.5311, + "step": 9974 + }, + { + "epoch": 0.8050197724154629, + "grad_norm": 0.7022054195404053, + "learning_rate": 0.00010118398966744229, + "loss": 2.5177, + "step": 9975 + }, + { + "epoch": 0.8051004761520458, + "grad_norm": 0.7382696270942688, + "learning_rate": 0.00010116820386156441, + "loss": 2.532, + "step": 9976 + }, + { + "epoch": 0.8051811798886288, + "grad_norm": 0.6968613862991333, + "learning_rate": 0.00010115241802657181, + "loss": 2.536, + "step": 9977 + }, + { + "epoch": 0.8052618836252119, + "grad_norm": 0.8277899026870728, + "learning_rate": 0.00010113663216285798, + "loss": 2.5963, + "step": 9978 + }, + { + "epoch": 0.8053425873617949, + "grad_norm": 0.677707314491272, + "learning_rate": 0.00010112084627081629, + "loss": 2.5041, + "step": 9979 + }, + { + "epoch": 0.8054232910983778, + "grad_norm": 0.6943314075469971, + "learning_rate": 0.00010110506035084017, + "loss": 2.4776, + "step": 9980 + }, + { + "epoch": 0.8055039948349608, + "grad_norm": 0.6948177218437195, + "learning_rate": 0.00010108927440332306, + "loss": 2.5306, + "step": 9981 + }, + { + "epoch": 0.8055846985715439, + "grad_norm": 0.6873918771743774, + "learning_rate": 0.0001010734884286584, + "loss": 2.5783, + "step": 9982 + }, + { + "epoch": 0.8056654023081269, + "grad_norm": 0.6370649933815002, + "learning_rate": 0.00010105770242723958, + "loss": 2.5584, + "step": 9983 + }, + { + "epoch": 0.8057461060447099, + "grad_norm": 0.7594422698020935, + "learning_rate": 0.00010104191639946008, + "loss": 2.543, + "step": 9984 + }, + { + "epoch": 0.8058268097812928, + "grad_norm": 0.697380542755127, + "learning_rate": 0.00010102613034571327, + "loss": 2.5295, + "step": 9985 + }, + { + "epoch": 0.8059075135178759, + "grad_norm": 0.6597251892089844, + "learning_rate": 0.00010101034426639264, + "loss": 2.5917, + "step": 9986 + }, + { + "epoch": 0.8059882172544589, + "grad_norm": 0.6583479046821594, + "learning_rate": 0.00010099455816189156, + "loss": 2.6206, + "step": 9987 + }, + { + "epoch": 0.8060689209910419, + "grad_norm": 0.6603943705558777, + "learning_rate": 0.00010097877203260349, + "loss": 2.5223, + "step": 9988 + }, + { + "epoch": 0.8061496247276249, + "grad_norm": 0.716454267501831, + "learning_rate": 0.00010096298587892188, + "loss": 2.5572, + "step": 9989 + }, + { + "epoch": 0.806230328464208, + "grad_norm": 0.6511488556861877, + "learning_rate": 0.00010094719970124016, + "loss": 2.5815, + "step": 9990 + }, + { + "epoch": 0.8063110322007909, + "grad_norm": 0.6969261169433594, + "learning_rate": 0.00010093141349995173, + "loss": 2.5902, + "step": 9991 + }, + { + "epoch": 0.8063917359373739, + "grad_norm": 0.7012695074081421, + "learning_rate": 0.00010091562727545001, + "loss": 2.5134, + "step": 9992 + }, + { + "epoch": 0.8064724396739569, + "grad_norm": 0.6368406414985657, + "learning_rate": 0.00010089984102812848, + "loss": 2.568, + "step": 9993 + }, + { + "epoch": 0.80655314341054, + "grad_norm": 0.6552153825759888, + "learning_rate": 0.00010088405475838059, + "loss": 2.5101, + "step": 9994 + }, + { + "epoch": 0.8066338471471229, + "grad_norm": 0.6949633359909058, + "learning_rate": 0.00010086826846659974, + "loss": 2.5427, + "step": 9995 + }, + { + "epoch": 0.8067145508837059, + "grad_norm": 0.6593093872070312, + "learning_rate": 0.00010085248215317935, + "loss": 2.5551, + "step": 9996 + }, + { + "epoch": 0.8067952546202889, + "grad_norm": 0.6963745355606079, + "learning_rate": 0.00010083669581851287, + "loss": 2.4956, + "step": 9997 + }, + { + "epoch": 0.8068759583568719, + "grad_norm": 0.7093523144721985, + "learning_rate": 0.00010082090946299377, + "loss": 2.5876, + "step": 9998 + }, + { + "epoch": 0.806956662093455, + "grad_norm": 0.6796671152114868, + "learning_rate": 0.00010080512308701544, + "loss": 2.5302, + "step": 9999 + }, + { + "epoch": 0.8070373658300379, + "grad_norm": 0.7170542478561401, + "learning_rate": 0.00010078933669097135, + "loss": 2.5886, + "step": 10000 + }, + { + "epoch": 0.8070373658300379, + "eval_loss": 2.4734926223754883, + "eval_runtime": 788.2594, + "eval_samples_per_second": 3.324, + "eval_steps_per_second": 0.554, + "step": 10000 + }, + { + "epoch": 0.8071180695666209, + "grad_norm": 0.6566126346588135, + "learning_rate": 0.0001007735502752549, + "loss": 2.4441, + "step": 10001 + }, + { + "epoch": 0.8071987733032039, + "grad_norm": 0.6739515662193298, + "learning_rate": 0.00010075776384025957, + "loss": 2.5767, + "step": 10002 + }, + { + "epoch": 0.807279477039787, + "grad_norm": 0.6334208846092224, + "learning_rate": 0.00010074197738637881, + "loss": 2.5321, + "step": 10003 + }, + { + "epoch": 0.80736018077637, + "grad_norm": 0.6764520406723022, + "learning_rate": 0.000100726190914006, + "loss": 2.5144, + "step": 10004 + }, + { + "epoch": 0.8074408845129529, + "grad_norm": 0.7090082764625549, + "learning_rate": 0.00010071040442353464, + "loss": 2.5626, + "step": 10005 + }, + { + "epoch": 0.8075215882495359, + "grad_norm": 0.6915304064750671, + "learning_rate": 0.00010069461791535814, + "loss": 2.5261, + "step": 10006 + }, + { + "epoch": 0.807602291986119, + "grad_norm": 0.6685747504234314, + "learning_rate": 0.00010067883138986991, + "loss": 2.492, + "step": 10007 + }, + { + "epoch": 0.807682995722702, + "grad_norm": 0.7179074883460999, + "learning_rate": 0.00010066304484746347, + "loss": 2.4601, + "step": 10008 + }, + { + "epoch": 0.807763699459285, + "grad_norm": 0.7032761573791504, + "learning_rate": 0.00010064725828853219, + "loss": 2.578, + "step": 10009 + }, + { + "epoch": 0.8078444031958679, + "grad_norm": 0.710322916507721, + "learning_rate": 0.00010063147171346959, + "loss": 2.5514, + "step": 10010 + }, + { + "epoch": 0.807925106932451, + "grad_norm": 0.6552841067314148, + "learning_rate": 0.00010061568512266903, + "loss": 2.5474, + "step": 10011 + }, + { + "epoch": 0.808005810669034, + "grad_norm": 0.6862452626228333, + "learning_rate": 0.00010059989851652398, + "loss": 2.5772, + "step": 10012 + }, + { + "epoch": 0.808086514405617, + "grad_norm": 0.7123851180076599, + "learning_rate": 0.00010058411189542788, + "loss": 2.4936, + "step": 10013 + }, + { + "epoch": 0.8081672181421999, + "grad_norm": 0.6889944672584534, + "learning_rate": 0.00010056832525977422, + "loss": 2.5041, + "step": 10014 + }, + { + "epoch": 0.808247921878783, + "grad_norm": 0.6986924409866333, + "learning_rate": 0.0001005525386099564, + "loss": 2.5591, + "step": 10015 + }, + { + "epoch": 0.808328625615366, + "grad_norm": 0.6935306787490845, + "learning_rate": 0.00010053675194636787, + "loss": 2.5423, + "step": 10016 + }, + { + "epoch": 0.808409329351949, + "grad_norm": 0.6751969456672668, + "learning_rate": 0.00010052096526940207, + "loss": 2.5666, + "step": 10017 + }, + { + "epoch": 0.808490033088532, + "grad_norm": 0.676909327507019, + "learning_rate": 0.00010050517857945243, + "loss": 2.5394, + "step": 10018 + }, + { + "epoch": 0.808570736825115, + "grad_norm": 0.7439377307891846, + "learning_rate": 0.00010048939187691246, + "loss": 2.5011, + "step": 10019 + }, + { + "epoch": 0.808651440561698, + "grad_norm": 0.6594791412353516, + "learning_rate": 0.00010047360516217554, + "loss": 2.5159, + "step": 10020 + }, + { + "epoch": 0.808732144298281, + "grad_norm": 0.7013304233551025, + "learning_rate": 0.00010045781843563517, + "loss": 2.5439, + "step": 10021 + }, + { + "epoch": 0.808812848034864, + "grad_norm": 0.7537491917610168, + "learning_rate": 0.00010044203169768476, + "loss": 2.5837, + "step": 10022 + }, + { + "epoch": 0.8088935517714471, + "grad_norm": 0.7273866534233093, + "learning_rate": 0.00010042624494871773, + "loss": 2.5546, + "step": 10023 + }, + { + "epoch": 0.80897425550803, + "grad_norm": 0.6716369986534119, + "learning_rate": 0.0001004104581891276, + "loss": 2.5264, + "step": 10024 + }, + { + "epoch": 0.809054959244613, + "grad_norm": 0.7544769644737244, + "learning_rate": 0.00010039467141930777, + "loss": 2.5502, + "step": 10025 + }, + { + "epoch": 0.809135662981196, + "grad_norm": 0.8713179230690002, + "learning_rate": 0.0001003788846396517, + "loss": 2.5178, + "step": 10026 + }, + { + "epoch": 0.8092163667177791, + "grad_norm": 0.6704887747764587, + "learning_rate": 0.00010036309785055283, + "loss": 2.5136, + "step": 10027 + }, + { + "epoch": 0.809297070454362, + "grad_norm": 0.7308552861213684, + "learning_rate": 0.00010034731105240458, + "loss": 2.4781, + "step": 10028 + }, + { + "epoch": 0.809377774190945, + "grad_norm": 0.7214144468307495, + "learning_rate": 0.00010033152424560049, + "loss": 2.5946, + "step": 10029 + }, + { + "epoch": 0.809458477927528, + "grad_norm": 0.6946821808815002, + "learning_rate": 0.00010031573743053393, + "loss": 2.4937, + "step": 10030 + }, + { + "epoch": 0.8095391816641111, + "grad_norm": 0.7348416447639465, + "learning_rate": 0.00010029995060759833, + "loss": 2.5959, + "step": 10031 + }, + { + "epoch": 0.8096198854006941, + "grad_norm": 0.7482579350471497, + "learning_rate": 0.00010028416377718721, + "loss": 2.6, + "step": 10032 + }, + { + "epoch": 0.809700589137277, + "grad_norm": 0.7114939093589783, + "learning_rate": 0.00010026837693969397, + "loss": 2.5376, + "step": 10033 + }, + { + "epoch": 0.80978129287386, + "grad_norm": 0.6559228897094727, + "learning_rate": 0.00010025259009551209, + "loss": 2.4961, + "step": 10034 + }, + { + "epoch": 0.8098619966104431, + "grad_norm": 0.7494906187057495, + "learning_rate": 0.00010023680324503501, + "loss": 2.5723, + "step": 10035 + }, + { + "epoch": 0.8099427003470261, + "grad_norm": 0.7207093834877014, + "learning_rate": 0.00010022101638865618, + "loss": 2.5523, + "step": 10036 + }, + { + "epoch": 0.8100234040836091, + "grad_norm": 0.6730504035949707, + "learning_rate": 0.00010020522952676903, + "loss": 2.5135, + "step": 10037 + }, + { + "epoch": 0.810104107820192, + "grad_norm": 0.6805168390274048, + "learning_rate": 0.000100189442659767, + "loss": 2.5598, + "step": 10038 + }, + { + "epoch": 0.8101848115567751, + "grad_norm": 0.6639137268066406, + "learning_rate": 0.00010017365578804358, + "loss": 2.5152, + "step": 10039 + }, + { + "epoch": 0.8102655152933581, + "grad_norm": 0.6604194641113281, + "learning_rate": 0.00010015786891199221, + "loss": 2.5302, + "step": 10040 + }, + { + "epoch": 0.8103462190299411, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.00010014208203200634, + "loss": 2.5437, + "step": 10041 + }, + { + "epoch": 0.8104269227665241, + "grad_norm": 0.7404079437255859, + "learning_rate": 0.00010012629514847942, + "loss": 2.6559, + "step": 10042 + }, + { + "epoch": 0.8105076265031071, + "grad_norm": 0.694006085395813, + "learning_rate": 0.00010011050826180488, + "loss": 2.5571, + "step": 10043 + }, + { + "epoch": 0.8105883302396901, + "grad_norm": 0.7007058262825012, + "learning_rate": 0.00010009472137237616, + "loss": 2.5639, + "step": 10044 + }, + { + "epoch": 0.8106690339762731, + "grad_norm": 0.7331913113594055, + "learning_rate": 0.00010007893448058678, + "loss": 2.5499, + "step": 10045 + }, + { + "epoch": 0.8107497377128561, + "grad_norm": 0.7636487483978271, + "learning_rate": 0.00010006314758683015, + "loss": 2.6068, + "step": 10046 + }, + { + "epoch": 0.810830441449439, + "grad_norm": 0.6505223512649536, + "learning_rate": 0.0001000473606914997, + "loss": 2.5313, + "step": 10047 + }, + { + "epoch": 0.8109111451860221, + "grad_norm": 0.6425966620445251, + "learning_rate": 0.00010003157379498886, + "loss": 2.5998, + "step": 10048 + }, + { + "epoch": 0.8109918489226051, + "grad_norm": 0.7163281440734863, + "learning_rate": 0.00010001578689769116, + "loss": 2.5493, + "step": 10049 + }, + { + "epoch": 0.8110725526591881, + "grad_norm": 0.7345306873321533, + "learning_rate": 0.0001, + "loss": 2.5609, + "step": 10050 + }, + { + "epoch": 0.8111532563957711, + "grad_norm": 0.6808427572250366, + "learning_rate": 9.998421310230884e-05, + "loss": 2.4823, + "step": 10051 + }, + { + "epoch": 0.8112339601323542, + "grad_norm": 0.7456082105636597, + "learning_rate": 9.996842620501115e-05, + "loss": 2.4782, + "step": 10052 + }, + { + "epoch": 0.8113146638689371, + "grad_norm": 0.7061728239059448, + "learning_rate": 9.995263930850034e-05, + "loss": 2.4906, + "step": 10053 + }, + { + "epoch": 0.8113953676055201, + "grad_norm": 0.691663920879364, + "learning_rate": 9.993685241316986e-05, + "loss": 2.5842, + "step": 10054 + }, + { + "epoch": 0.8114760713421031, + "grad_norm": 0.6899400353431702, + "learning_rate": 9.992106551941325e-05, + "loss": 2.5628, + "step": 10055 + }, + { + "epoch": 0.8115567750786862, + "grad_norm": 0.6909289360046387, + "learning_rate": 9.990527862762385e-05, + "loss": 2.5173, + "step": 10056 + }, + { + "epoch": 0.8116374788152692, + "grad_norm": 0.6507968306541443, + "learning_rate": 9.988949173819514e-05, + "loss": 2.5763, + "step": 10057 + }, + { + "epoch": 0.8117181825518521, + "grad_norm": 0.6972371339797974, + "learning_rate": 9.98737048515206e-05, + "loss": 2.604, + "step": 10058 + }, + { + "epoch": 0.8117988862884351, + "grad_norm": 0.6500107049942017, + "learning_rate": 9.985791796799368e-05, + "loss": 2.509, + "step": 10059 + }, + { + "epoch": 0.8118795900250182, + "grad_norm": 0.704501211643219, + "learning_rate": 9.98421310880078e-05, + "loss": 2.5773, + "step": 10060 + }, + { + "epoch": 0.8119602937616012, + "grad_norm": 0.7037203311920166, + "learning_rate": 9.982634421195641e-05, + "loss": 2.5968, + "step": 10061 + }, + { + "epoch": 0.8120409974981841, + "grad_norm": 0.7161232829093933, + "learning_rate": 9.981055734023304e-05, + "loss": 2.5373, + "step": 10062 + }, + { + "epoch": 0.8121217012347671, + "grad_norm": 0.6602928638458252, + "learning_rate": 9.979477047323099e-05, + "loss": 2.5851, + "step": 10063 + }, + { + "epoch": 0.8122024049713502, + "grad_norm": 0.6685947775840759, + "learning_rate": 9.977898361134383e-05, + "loss": 2.5543, + "step": 10064 + }, + { + "epoch": 0.8122831087079332, + "grad_norm": 0.6772760152816772, + "learning_rate": 9.976319675496502e-05, + "loss": 2.5355, + "step": 10065 + }, + { + "epoch": 0.8123638124445162, + "grad_norm": 0.6140885949134827, + "learning_rate": 9.974740990448792e-05, + "loss": 2.489, + "step": 10066 + }, + { + "epoch": 0.8124445161810991, + "grad_norm": 0.6597142219543457, + "learning_rate": 9.973162306030604e-05, + "loss": 2.5619, + "step": 10067 + }, + { + "epoch": 0.8125252199176822, + "grad_norm": 0.6768592000007629, + "learning_rate": 9.971583622281281e-05, + "loss": 2.5107, + "step": 10068 + }, + { + "epoch": 0.8126059236542652, + "grad_norm": 0.682296633720398, + "learning_rate": 9.970004939240168e-05, + "loss": 2.5003, + "step": 10069 + }, + { + "epoch": 0.8126866273908482, + "grad_norm": 0.7356325387954712, + "learning_rate": 9.96842625694661e-05, + "loss": 2.5864, + "step": 10070 + }, + { + "epoch": 0.8127673311274312, + "grad_norm": 0.6818091869354248, + "learning_rate": 9.966847575439956e-05, + "loss": 2.5375, + "step": 10071 + }, + { + "epoch": 0.8128480348640142, + "grad_norm": 0.6954368352890015, + "learning_rate": 9.965268894759543e-05, + "loss": 2.5314, + "step": 10072 + }, + { + "epoch": 0.8129287386005972, + "grad_norm": 0.6759306192398071, + "learning_rate": 9.963690214944721e-05, + "loss": 2.5881, + "step": 10073 + }, + { + "epoch": 0.8130094423371802, + "grad_norm": 0.6546545624732971, + "learning_rate": 9.962111536034832e-05, + "loss": 2.5264, + "step": 10074 + }, + { + "epoch": 0.8130901460737632, + "grad_norm": 0.6709586977958679, + "learning_rate": 9.960532858069226e-05, + "loss": 2.5906, + "step": 10075 + }, + { + "epoch": 0.8131708498103463, + "grad_norm": 0.7310851812362671, + "learning_rate": 9.958954181087241e-05, + "loss": 2.5134, + "step": 10076 + }, + { + "epoch": 0.8132515535469292, + "grad_norm": 0.6793027520179749, + "learning_rate": 9.957375505128227e-05, + "loss": 2.5387, + "step": 10077 + }, + { + "epoch": 0.8133322572835122, + "grad_norm": 0.6965875029563904, + "learning_rate": 9.955796830231528e-05, + "loss": 2.5649, + "step": 10078 + }, + { + "epoch": 0.8134129610200952, + "grad_norm": 0.6597574353218079, + "learning_rate": 9.954218156436485e-05, + "loss": 2.5281, + "step": 10079 + }, + { + "epoch": 0.8134936647566783, + "grad_norm": 0.7911555171012878, + "learning_rate": 9.952639483782445e-05, + "loss": 2.535, + "step": 10080 + }, + { + "epoch": 0.8135743684932613, + "grad_norm": 0.7405688762664795, + "learning_rate": 9.951060812308757e-05, + "loss": 2.5303, + "step": 10081 + }, + { + "epoch": 0.8136550722298442, + "grad_norm": 0.6961480379104614, + "learning_rate": 9.949482142054758e-05, + "loss": 2.4959, + "step": 10082 + }, + { + "epoch": 0.8137357759664272, + "grad_norm": 0.6761718392372131, + "learning_rate": 9.947903473059797e-05, + "loss": 2.5591, + "step": 10083 + }, + { + "epoch": 0.8138164797030103, + "grad_norm": 0.7383104562759399, + "learning_rate": 9.946324805363218e-05, + "loss": 2.5848, + "step": 10084 + }, + { + "epoch": 0.8138971834395933, + "grad_norm": 0.6495873928070068, + "learning_rate": 9.944746139004364e-05, + "loss": 2.4972, + "step": 10085 + }, + { + "epoch": 0.8139778871761763, + "grad_norm": 0.7247152328491211, + "learning_rate": 9.94316747402258e-05, + "loss": 2.5361, + "step": 10086 + }, + { + "epoch": 0.8140585909127592, + "grad_norm": 0.6965751051902771, + "learning_rate": 9.941588810457215e-05, + "loss": 2.4997, + "step": 10087 + }, + { + "epoch": 0.8141392946493423, + "grad_norm": 0.7138223648071289, + "learning_rate": 9.940010148347603e-05, + "loss": 2.5226, + "step": 10088 + }, + { + "epoch": 0.8142199983859253, + "grad_norm": 0.6571210622787476, + "learning_rate": 9.938431487733099e-05, + "loss": 2.5388, + "step": 10089 + }, + { + "epoch": 0.8143007021225083, + "grad_norm": 0.6721277832984924, + "learning_rate": 9.936852828653042e-05, + "loss": 2.5219, + "step": 10090 + }, + { + "epoch": 0.8143814058590912, + "grad_norm": 0.647520124912262, + "learning_rate": 9.935274171146782e-05, + "loss": 2.6199, + "step": 10091 + }, + { + "epoch": 0.8144621095956743, + "grad_norm": 0.6892204284667969, + "learning_rate": 9.933695515253654e-05, + "loss": 2.5132, + "step": 10092 + }, + { + "epoch": 0.8145428133322573, + "grad_norm": 0.6979050636291504, + "learning_rate": 9.932116861013008e-05, + "loss": 2.5148, + "step": 10093 + }, + { + "epoch": 0.8146235170688403, + "grad_norm": 0.6682664752006531, + "learning_rate": 9.930538208464189e-05, + "loss": 2.5795, + "step": 10094 + }, + { + "epoch": 0.8147042208054233, + "grad_norm": 0.734121561050415, + "learning_rate": 9.928959557646537e-05, + "loss": 2.5469, + "step": 10095 + }, + { + "epoch": 0.8147849245420064, + "grad_norm": 0.6669620275497437, + "learning_rate": 9.9273809085994e-05, + "loss": 2.5277, + "step": 10096 + }, + { + "epoch": 0.8148656282785893, + "grad_norm": 0.6750600934028625, + "learning_rate": 9.925802261362124e-05, + "loss": 2.5869, + "step": 10097 + }, + { + "epoch": 0.8149463320151723, + "grad_norm": 0.6813061237335205, + "learning_rate": 9.924223615974044e-05, + "loss": 2.585, + "step": 10098 + }, + { + "epoch": 0.8150270357517553, + "grad_norm": 0.6775497794151306, + "learning_rate": 9.92264497247451e-05, + "loss": 2.5353, + "step": 10099 + }, + { + "epoch": 0.8151077394883383, + "grad_norm": 0.6877530813217163, + "learning_rate": 9.92106633090287e-05, + "loss": 2.5349, + "step": 10100 + }, + { + "epoch": 0.8151884432249213, + "grad_norm": 0.6984169483184814, + "learning_rate": 9.91948769129846e-05, + "loss": 2.5986, + "step": 10101 + }, + { + "epoch": 0.8152691469615043, + "grad_norm": 0.7144806981086731, + "learning_rate": 9.917909053700626e-05, + "loss": 2.5797, + "step": 10102 + }, + { + "epoch": 0.8153498506980873, + "grad_norm": 0.6494203209877014, + "learning_rate": 9.916330418148715e-05, + "loss": 2.5035, + "step": 10103 + }, + { + "epoch": 0.8154305544346703, + "grad_norm": 0.6669752597808838, + "learning_rate": 9.914751784682069e-05, + "loss": 2.5489, + "step": 10104 + }, + { + "epoch": 0.8155112581712534, + "grad_norm": 0.6557981371879578, + "learning_rate": 9.913173153340029e-05, + "loss": 2.5266, + "step": 10105 + }, + { + "epoch": 0.8155919619078363, + "grad_norm": 0.6633948087692261, + "learning_rate": 9.911594524161941e-05, + "loss": 2.5263, + "step": 10106 + }, + { + "epoch": 0.8156726656444193, + "grad_norm": 0.7191522717475891, + "learning_rate": 9.910015897187154e-05, + "loss": 2.5625, + "step": 10107 + }, + { + "epoch": 0.8157533693810023, + "grad_norm": 0.7089062929153442, + "learning_rate": 9.908437272455001e-05, + "loss": 2.5644, + "step": 10108 + }, + { + "epoch": 0.8158340731175854, + "grad_norm": 0.7662761211395264, + "learning_rate": 9.906858650004831e-05, + "loss": 2.5875, + "step": 10109 + }, + { + "epoch": 0.8159147768541684, + "grad_norm": 0.6658861041069031, + "learning_rate": 9.905280029875988e-05, + "loss": 2.5818, + "step": 10110 + }, + { + "epoch": 0.8159954805907513, + "grad_norm": 0.7229514718055725, + "learning_rate": 9.903701412107815e-05, + "loss": 2.5421, + "step": 10111 + }, + { + "epoch": 0.8160761843273343, + "grad_norm": 0.7295149564743042, + "learning_rate": 9.902122796739652e-05, + "loss": 2.5298, + "step": 10112 + }, + { + "epoch": 0.8161568880639174, + "grad_norm": 0.6805420517921448, + "learning_rate": 9.900544183810849e-05, + "loss": 2.6693, + "step": 10113 + }, + { + "epoch": 0.8162375918005004, + "grad_norm": 0.6560602188110352, + "learning_rate": 9.898965573360738e-05, + "loss": 2.5445, + "step": 10114 + }, + { + "epoch": 0.8163182955370833, + "grad_norm": 0.690396785736084, + "learning_rate": 9.897386965428674e-05, + "loss": 2.5281, + "step": 10115 + }, + { + "epoch": 0.8163989992736663, + "grad_norm": 0.6905054450035095, + "learning_rate": 9.895808360053998e-05, + "loss": 2.5406, + "step": 10116 + }, + { + "epoch": 0.8164797030102494, + "grad_norm": 0.6905301213264465, + "learning_rate": 9.894229757276045e-05, + "loss": 2.5458, + "step": 10117 + }, + { + "epoch": 0.8165604067468324, + "grad_norm": 0.6827620267868042, + "learning_rate": 9.892651157134162e-05, + "loss": 2.4403, + "step": 10118 + }, + { + "epoch": 0.8166411104834154, + "grad_norm": 0.7614343166351318, + "learning_rate": 9.891072559667697e-05, + "loss": 2.6369, + "step": 10119 + }, + { + "epoch": 0.8167218142199983, + "grad_norm": 0.6913704872131348, + "learning_rate": 9.889493964915985e-05, + "loss": 2.5914, + "step": 10120 + }, + { + "epoch": 0.8168025179565814, + "grad_norm": 0.7026088237762451, + "learning_rate": 9.887915372918372e-05, + "loss": 2.5139, + "step": 10121 + }, + { + "epoch": 0.8168832216931644, + "grad_norm": 0.7064465284347534, + "learning_rate": 9.886336783714203e-05, + "loss": 2.549, + "step": 10122 + }, + { + "epoch": 0.8169639254297474, + "grad_norm": 0.7345553040504456, + "learning_rate": 9.884758197342821e-05, + "loss": 2.5887, + "step": 10123 + }, + { + "epoch": 0.8170446291663304, + "grad_norm": 0.6916251182556152, + "learning_rate": 9.883179613843563e-05, + "loss": 2.5659, + "step": 10124 + }, + { + "epoch": 0.8171253329029134, + "grad_norm": 0.6428200602531433, + "learning_rate": 9.881601033255771e-05, + "loss": 2.5379, + "step": 10125 + }, + { + "epoch": 0.8172060366394964, + "grad_norm": 0.7433571815490723, + "learning_rate": 9.880022455618796e-05, + "loss": 2.5751, + "step": 10126 + }, + { + "epoch": 0.8172867403760794, + "grad_norm": 0.733256995677948, + "learning_rate": 9.878443880971974e-05, + "loss": 2.4971, + "step": 10127 + }, + { + "epoch": 0.8173674441126624, + "grad_norm": 0.708289384841919, + "learning_rate": 9.876865309354646e-05, + "loss": 2.635, + "step": 10128 + }, + { + "epoch": 0.8174481478492455, + "grad_norm": 0.6877188682556152, + "learning_rate": 9.87528674080616e-05, + "loss": 2.5827, + "step": 10129 + }, + { + "epoch": 0.8175288515858284, + "grad_norm": 0.7108712792396545, + "learning_rate": 9.873708175365852e-05, + "loss": 2.5643, + "step": 10130 + }, + { + "epoch": 0.8176095553224114, + "grad_norm": 0.7435629367828369, + "learning_rate": 9.872129613073065e-05, + "loss": 2.5267, + "step": 10131 + }, + { + "epoch": 0.8176902590589944, + "grad_norm": 0.669913113117218, + "learning_rate": 9.870551053967148e-05, + "loss": 2.5684, + "step": 10132 + }, + { + "epoch": 0.8177709627955775, + "grad_norm": 0.6981424689292908, + "learning_rate": 9.868972498087431e-05, + "loss": 2.592, + "step": 10133 + }, + { + "epoch": 0.8178516665321605, + "grad_norm": 0.6661834716796875, + "learning_rate": 9.867393945473263e-05, + "loss": 2.5082, + "step": 10134 + }, + { + "epoch": 0.8179323702687434, + "grad_norm": 0.6611261367797852, + "learning_rate": 9.865815396163987e-05, + "loss": 2.556, + "step": 10135 + }, + { + "epoch": 0.8180130740053264, + "grad_norm": 0.6732283234596252, + "learning_rate": 9.86423685019894e-05, + "loss": 2.5668, + "step": 10136 + }, + { + "epoch": 0.8180937777419095, + "grad_norm": 0.6768637299537659, + "learning_rate": 9.862658307617465e-05, + "loss": 2.5467, + "step": 10137 + }, + { + "epoch": 0.8181744814784925, + "grad_norm": 0.6943596601486206, + "learning_rate": 9.861079768458904e-05, + "loss": 2.5989, + "step": 10138 + }, + { + "epoch": 0.8182551852150755, + "grad_norm": 0.7369638681411743, + "learning_rate": 9.859501232762601e-05, + "loss": 2.5189, + "step": 10139 + }, + { + "epoch": 0.8183358889516584, + "grad_norm": 0.7443112730979919, + "learning_rate": 9.857922700567892e-05, + "loss": 2.5979, + "step": 10140 + }, + { + "epoch": 0.8184165926882415, + "grad_norm": 0.6726163029670715, + "learning_rate": 9.85634417191412e-05, + "loss": 2.5451, + "step": 10141 + }, + { + "epoch": 0.8184972964248245, + "grad_norm": 0.720492422580719, + "learning_rate": 9.854765646840632e-05, + "loss": 2.6116, + "step": 10142 + }, + { + "epoch": 0.8185780001614075, + "grad_norm": 0.6998233795166016, + "learning_rate": 9.85318712538676e-05, + "loss": 2.556, + "step": 10143 + }, + { + "epoch": 0.8186587038979904, + "grad_norm": 0.7580110430717468, + "learning_rate": 9.851608607591848e-05, + "loss": 2.5222, + "step": 10144 + }, + { + "epoch": 0.8187394076345735, + "grad_norm": 0.6893007755279541, + "learning_rate": 9.85003009349524e-05, + "loss": 2.4639, + "step": 10145 + }, + { + "epoch": 0.8188201113711565, + "grad_norm": 0.6448441743850708, + "learning_rate": 9.84845158313627e-05, + "loss": 2.5249, + "step": 10146 + }, + { + "epoch": 0.8189008151077395, + "grad_norm": 0.7591872215270996, + "learning_rate": 9.846873076554285e-05, + "loss": 2.5173, + "step": 10147 + }, + { + "epoch": 0.8189815188443225, + "grad_norm": 0.6994685530662537, + "learning_rate": 9.845294573788626e-05, + "loss": 2.5181, + "step": 10148 + }, + { + "epoch": 0.8190622225809054, + "grad_norm": 0.6822378635406494, + "learning_rate": 9.843716074878628e-05, + "loss": 2.5109, + "step": 10149 + }, + { + "epoch": 0.8191429263174885, + "grad_norm": 0.6730359792709351, + "learning_rate": 9.842137579863632e-05, + "loss": 2.5402, + "step": 10150 + }, + { + "epoch": 0.8192236300540715, + "grad_norm": 0.6280627846717834, + "learning_rate": 9.840559088782984e-05, + "loss": 2.4806, + "step": 10151 + }, + { + "epoch": 0.8193043337906545, + "grad_norm": 0.6887876391410828, + "learning_rate": 9.838980601676017e-05, + "loss": 2.5498, + "step": 10152 + }, + { + "epoch": 0.8193850375272375, + "grad_norm": 0.7823790907859802, + "learning_rate": 9.837402118582075e-05, + "loss": 2.467, + "step": 10153 + }, + { + "epoch": 0.8194657412638205, + "grad_norm": 0.8109384179115295, + "learning_rate": 9.835823639540496e-05, + "loss": 2.5898, + "step": 10154 + }, + { + "epoch": 0.8195464450004035, + "grad_norm": 0.6883066892623901, + "learning_rate": 9.834245164590624e-05, + "loss": 2.5589, + "step": 10155 + }, + { + "epoch": 0.8196271487369865, + "grad_norm": 0.7291175723075867, + "learning_rate": 9.832666693771794e-05, + "loss": 2.5317, + "step": 10156 + }, + { + "epoch": 0.8197078524735695, + "grad_norm": 0.6819449663162231, + "learning_rate": 9.831088227123346e-05, + "loss": 2.5513, + "step": 10157 + }, + { + "epoch": 0.8197885562101526, + "grad_norm": 0.7038870453834534, + "learning_rate": 9.829509764684626e-05, + "loss": 2.5301, + "step": 10158 + }, + { + "epoch": 0.8198692599467355, + "grad_norm": 0.7483033537864685, + "learning_rate": 9.827931306494965e-05, + "loss": 2.5273, + "step": 10159 + }, + { + "epoch": 0.8199499636833185, + "grad_norm": 0.6998303532600403, + "learning_rate": 9.826352852593705e-05, + "loss": 2.5083, + "step": 10160 + }, + { + "epoch": 0.8200306674199015, + "grad_norm": 0.6865512728691101, + "learning_rate": 9.824774403020188e-05, + "loss": 2.5693, + "step": 10161 + }, + { + "epoch": 0.8201113711564846, + "grad_norm": 0.8144257068634033, + "learning_rate": 9.823195957813749e-05, + "loss": 2.6052, + "step": 10162 + }, + { + "epoch": 0.8201920748930676, + "grad_norm": 0.6920810341835022, + "learning_rate": 9.821617517013729e-05, + "loss": 2.5467, + "step": 10163 + }, + { + "epoch": 0.8202727786296505, + "grad_norm": 0.7538061141967773, + "learning_rate": 9.820039080659469e-05, + "loss": 2.5933, + "step": 10164 + }, + { + "epoch": 0.8203534823662335, + "grad_norm": 0.6744310259819031, + "learning_rate": 9.818460648790302e-05, + "loss": 2.5633, + "step": 10165 + }, + { + "epoch": 0.8204341861028166, + "grad_norm": 0.6943854689598083, + "learning_rate": 9.816882221445571e-05, + "loss": 2.5868, + "step": 10166 + }, + { + "epoch": 0.8205148898393996, + "grad_norm": 0.6486902832984924, + "learning_rate": 9.815303798664614e-05, + "loss": 2.4983, + "step": 10167 + }, + { + "epoch": 0.8205955935759826, + "grad_norm": 0.6699065566062927, + "learning_rate": 9.813725380486773e-05, + "loss": 2.563, + "step": 10168 + }, + { + "epoch": 0.8206762973125655, + "grad_norm": 0.6547110080718994, + "learning_rate": 9.812146966951379e-05, + "loss": 2.5404, + "step": 10169 + }, + { + "epoch": 0.8207570010491486, + "grad_norm": 0.692592203617096, + "learning_rate": 9.810568558097774e-05, + "loss": 2.5625, + "step": 10170 + }, + { + "epoch": 0.8208377047857316, + "grad_norm": 0.6696702837944031, + "learning_rate": 9.808990153965296e-05, + "loss": 2.5866, + "step": 10171 + }, + { + "epoch": 0.8209184085223146, + "grad_norm": 0.6425998210906982, + "learning_rate": 9.807411754593282e-05, + "loss": 2.5487, + "step": 10172 + }, + { + "epoch": 0.8209991122588975, + "grad_norm": 0.6849769949913025, + "learning_rate": 9.805833360021069e-05, + "loss": 2.5772, + "step": 10173 + }, + { + "epoch": 0.8210798159954806, + "grad_norm": 0.7451414465904236, + "learning_rate": 9.804254970288001e-05, + "loss": 2.5089, + "step": 10174 + }, + { + "epoch": 0.8211605197320636, + "grad_norm": 0.7134390473365784, + "learning_rate": 9.802676585433408e-05, + "loss": 2.541, + "step": 10175 + }, + { + "epoch": 0.8212412234686466, + "grad_norm": 0.7490564584732056, + "learning_rate": 9.801098205496627e-05, + "loss": 2.5299, + "step": 10176 + }, + { + "epoch": 0.8213219272052296, + "grad_norm": 0.6614408493041992, + "learning_rate": 9.799519830517005e-05, + "loss": 2.5252, + "step": 10177 + }, + { + "epoch": 0.8214026309418127, + "grad_norm": 0.761049211025238, + "learning_rate": 9.797941460533869e-05, + "loss": 2.5153, + "step": 10178 + }, + { + "epoch": 0.8214833346783956, + "grad_norm": 0.6352702379226685, + "learning_rate": 9.796363095586561e-05, + "loss": 2.5407, + "step": 10179 + }, + { + "epoch": 0.8215640384149786, + "grad_norm": 0.684212863445282, + "learning_rate": 9.794784735714417e-05, + "loss": 2.5425, + "step": 10180 + }, + { + "epoch": 0.8216447421515616, + "grad_norm": 0.652987539768219, + "learning_rate": 9.793206380956772e-05, + "loss": 2.5542, + "step": 10181 + }, + { + "epoch": 0.8217254458881447, + "grad_norm": 0.6912897229194641, + "learning_rate": 9.791628031352966e-05, + "loss": 2.5041, + "step": 10182 + }, + { + "epoch": 0.8218061496247276, + "grad_norm": 0.7025408744812012, + "learning_rate": 9.790049686942333e-05, + "loss": 2.5296, + "step": 10183 + }, + { + "epoch": 0.8218868533613106, + "grad_norm": 0.7580777406692505, + "learning_rate": 9.788471347764215e-05, + "loss": 2.578, + "step": 10184 + }, + { + "epoch": 0.8219675570978936, + "grad_norm": 0.7044378519058228, + "learning_rate": 9.78689301385794e-05, + "loss": 2.5093, + "step": 10185 + }, + { + "epoch": 0.8220482608344767, + "grad_norm": 0.7339754700660706, + "learning_rate": 9.785314685262849e-05, + "loss": 2.5202, + "step": 10186 + }, + { + "epoch": 0.8221289645710597, + "grad_norm": 0.6872244477272034, + "learning_rate": 9.783736362018277e-05, + "loss": 2.541, + "step": 10187 + }, + { + "epoch": 0.8222096683076426, + "grad_norm": 0.7052434682846069, + "learning_rate": 9.78215804416356e-05, + "loss": 2.4968, + "step": 10188 + }, + { + "epoch": 0.8222903720442256, + "grad_norm": 0.6739610433578491, + "learning_rate": 9.780579731738033e-05, + "loss": 2.5137, + "step": 10189 + }, + { + "epoch": 0.8223710757808087, + "grad_norm": 0.6842939853668213, + "learning_rate": 9.779001424781035e-05, + "loss": 2.5329, + "step": 10190 + }, + { + "epoch": 0.8224517795173917, + "grad_norm": 0.7057977914810181, + "learning_rate": 9.777423123331898e-05, + "loss": 2.5657, + "step": 10191 + }, + { + "epoch": 0.8225324832539747, + "grad_norm": 0.6748424172401428, + "learning_rate": 9.775844827429958e-05, + "loss": 2.6104, + "step": 10192 + }, + { + "epoch": 0.8226131869905576, + "grad_norm": 0.6492514610290527, + "learning_rate": 9.774266537114555e-05, + "loss": 2.58, + "step": 10193 + }, + { + "epoch": 0.8226938907271407, + "grad_norm": 0.6987641453742981, + "learning_rate": 9.772688252425016e-05, + "loss": 2.5301, + "step": 10194 + }, + { + "epoch": 0.8227745944637237, + "grad_norm": 0.710921585559845, + "learning_rate": 9.771109973400679e-05, + "loss": 2.6245, + "step": 10195 + }, + { + "epoch": 0.8228552982003067, + "grad_norm": 0.6673738360404968, + "learning_rate": 9.769531700080883e-05, + "loss": 2.5205, + "step": 10196 + }, + { + "epoch": 0.8229360019368896, + "grad_norm": 0.6705252528190613, + "learning_rate": 9.767953432504958e-05, + "loss": 2.4932, + "step": 10197 + }, + { + "epoch": 0.8230167056734727, + "grad_norm": 0.6587076783180237, + "learning_rate": 9.766375170712237e-05, + "loss": 2.5085, + "step": 10198 + }, + { + "epoch": 0.8230974094100557, + "grad_norm": 0.7285338640213013, + "learning_rate": 9.764796914742061e-05, + "loss": 2.5481, + "step": 10199 + }, + { + "epoch": 0.8231781131466387, + "grad_norm": 0.6971831321716309, + "learning_rate": 9.763218664633763e-05, + "loss": 2.6092, + "step": 10200 + }, + { + "epoch": 0.8232588168832217, + "grad_norm": 0.6940265893936157, + "learning_rate": 9.761640420426669e-05, + "loss": 2.5325, + "step": 10201 + }, + { + "epoch": 0.8233395206198046, + "grad_norm": 0.6612978577613831, + "learning_rate": 9.76006218216012e-05, + "loss": 2.5532, + "step": 10202 + }, + { + "epoch": 0.8234202243563877, + "grad_norm": 0.6707638502120972, + "learning_rate": 9.758483949873453e-05, + "loss": 2.512, + "step": 10203 + }, + { + "epoch": 0.8235009280929707, + "grad_norm": 0.6636764407157898, + "learning_rate": 9.756905723605994e-05, + "loss": 2.5446, + "step": 10204 + }, + { + "epoch": 0.8235816318295537, + "grad_norm": 0.6996643543243408, + "learning_rate": 9.755327503397081e-05, + "loss": 2.5504, + "step": 10205 + }, + { + "epoch": 0.8236623355661367, + "grad_norm": 0.604487955570221, + "learning_rate": 9.753749289286046e-05, + "loss": 2.4767, + "step": 10206 + }, + { + "epoch": 0.8237430393027197, + "grad_norm": 0.6484553217887878, + "learning_rate": 9.752171081312222e-05, + "loss": 2.5522, + "step": 10207 + }, + { + "epoch": 0.8238237430393027, + "grad_norm": 0.6890987753868103, + "learning_rate": 9.75059287951494e-05, + "loss": 2.5545, + "step": 10208 + }, + { + "epoch": 0.8239044467758857, + "grad_norm": 0.6786034107208252, + "learning_rate": 9.749014683933541e-05, + "loss": 2.591, + "step": 10209 + }, + { + "epoch": 0.8239851505124687, + "grad_norm": 0.751192033290863, + "learning_rate": 9.747436494607349e-05, + "loss": 2.5335, + "step": 10210 + }, + { + "epoch": 0.8240658542490518, + "grad_norm": 0.6611589789390564, + "learning_rate": 9.7458583115757e-05, + "loss": 2.5104, + "step": 10211 + }, + { + "epoch": 0.8241465579856347, + "grad_norm": 0.6602892875671387, + "learning_rate": 9.744280134877926e-05, + "loss": 2.5319, + "step": 10212 + }, + { + "epoch": 0.8242272617222177, + "grad_norm": 0.6856467127799988, + "learning_rate": 9.742701964553359e-05, + "loss": 2.5418, + "step": 10213 + }, + { + "epoch": 0.8243079654588007, + "grad_norm": 0.6810153126716614, + "learning_rate": 9.741123800641332e-05, + "loss": 2.5691, + "step": 10214 + }, + { + "epoch": 0.8243886691953838, + "grad_norm": 0.7044229507446289, + "learning_rate": 9.739545643181175e-05, + "loss": 2.5911, + "step": 10215 + }, + { + "epoch": 0.8244693729319668, + "grad_norm": 0.6689271330833435, + "learning_rate": 9.737967492212225e-05, + "loss": 2.5374, + "step": 10216 + }, + { + "epoch": 0.8245500766685497, + "grad_norm": 0.6558904051780701, + "learning_rate": 9.736389347773807e-05, + "loss": 2.5118, + "step": 10217 + }, + { + "epoch": 0.8246307804051327, + "grad_norm": 0.6900291442871094, + "learning_rate": 9.734811209905255e-05, + "loss": 2.515, + "step": 10218 + }, + { + "epoch": 0.8247114841417158, + "grad_norm": 0.7129492163658142, + "learning_rate": 9.733233078645907e-05, + "loss": 2.5191, + "step": 10219 + }, + { + "epoch": 0.8247921878782988, + "grad_norm": 0.7031866908073425, + "learning_rate": 9.731654954035082e-05, + "loss": 2.5616, + "step": 10220 + }, + { + "epoch": 0.8248728916148818, + "grad_norm": 0.6418820023536682, + "learning_rate": 9.730076836112118e-05, + "loss": 2.537, + "step": 10221 + }, + { + "epoch": 0.8249535953514647, + "grad_norm": 0.6731035113334656, + "learning_rate": 9.728498724916347e-05, + "loss": 2.5483, + "step": 10222 + }, + { + "epoch": 0.8250342990880478, + "grad_norm": 0.6941342353820801, + "learning_rate": 9.726920620487096e-05, + "loss": 2.5314, + "step": 10223 + }, + { + "epoch": 0.8251150028246308, + "grad_norm": 0.6808927059173584, + "learning_rate": 9.725342522863696e-05, + "loss": 2.5521, + "step": 10224 + }, + { + "epoch": 0.8251957065612138, + "grad_norm": 0.6873155832290649, + "learning_rate": 9.723764432085481e-05, + "loss": 2.5205, + "step": 10225 + }, + { + "epoch": 0.8252764102977967, + "grad_norm": 0.8590287566184998, + "learning_rate": 9.722186348191776e-05, + "loss": 2.5378, + "step": 10226 + }, + { + "epoch": 0.8253571140343798, + "grad_norm": 0.691523015499115, + "learning_rate": 9.720608271221912e-05, + "loss": 2.5062, + "step": 10227 + }, + { + "epoch": 0.8254378177709628, + "grad_norm": 0.6695523262023926, + "learning_rate": 9.719030201215226e-05, + "loss": 2.5164, + "step": 10228 + }, + { + "epoch": 0.8255185215075458, + "grad_norm": 0.745516300201416, + "learning_rate": 9.717452138211037e-05, + "loss": 2.5207, + "step": 10229 + }, + { + "epoch": 0.8255992252441288, + "grad_norm": 0.6628115773200989, + "learning_rate": 9.715874082248679e-05, + "loss": 2.5293, + "step": 10230 + }, + { + "epoch": 0.8256799289807119, + "grad_norm": 0.6531884074211121, + "learning_rate": 9.714296033367482e-05, + "loss": 2.4812, + "step": 10231 + }, + { + "epoch": 0.8257606327172948, + "grad_norm": 0.7444833517074585, + "learning_rate": 9.712717991606777e-05, + "loss": 2.5422, + "step": 10232 + }, + { + "epoch": 0.8258413364538778, + "grad_norm": 0.7013139128684998, + "learning_rate": 9.711139957005888e-05, + "loss": 2.5117, + "step": 10233 + }, + { + "epoch": 0.8259220401904608, + "grad_norm": 0.6588132977485657, + "learning_rate": 9.709561929604147e-05, + "loss": 2.5257, + "step": 10234 + }, + { + "epoch": 0.8260027439270439, + "grad_norm": 0.7538537383079529, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5225, + "step": 10235 + }, + { + "epoch": 0.8260834476636268, + "grad_norm": Infinity, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5532, + "step": 10236 + }, + { + "epoch": 0.8261641514002098, + "grad_norm": 0.7414929270744324, + "learning_rate": 9.706405896555425e-05, + "loss": 2.5653, + "step": 10237 + }, + { + "epoch": 0.8262448551367928, + "grad_norm": 0.757057785987854, + "learning_rate": 9.704827890987097e-05, + "loss": 2.5732, + "step": 10238 + }, + { + "epoch": 0.8263255588733759, + "grad_norm": 0.730721652507782, + "learning_rate": 9.703249892775232e-05, + "loss": 2.5317, + "step": 10239 + }, + { + "epoch": 0.8264062626099589, + "grad_norm": 0.6943208575248718, + "learning_rate": 9.701671901959151e-05, + "loss": 2.5849, + "step": 10240 + }, + { + "epoch": 0.8264869663465418, + "grad_norm": 0.7111102938652039, + "learning_rate": 9.700093918578188e-05, + "loss": 2.5007, + "step": 10241 + }, + { + "epoch": 0.8265676700831248, + "grad_norm": 0.7240251302719116, + "learning_rate": 9.69851594267167e-05, + "loss": 2.5002, + "step": 10242 + }, + { + "epoch": 0.8266483738197079, + "grad_norm": 0.6624411344528198, + "learning_rate": 9.696937974278922e-05, + "loss": 2.5175, + "step": 10243 + }, + { + "epoch": 0.8267290775562909, + "grad_norm": 0.6972576975822449, + "learning_rate": 9.695360013439269e-05, + "loss": 2.5285, + "step": 10244 + }, + { + "epoch": 0.8268097812928739, + "grad_norm": 0.684446394443512, + "learning_rate": 9.693782060192046e-05, + "loss": 2.57, + "step": 10245 + }, + { + "epoch": 0.8268904850294568, + "grad_norm": 0.6920011639595032, + "learning_rate": 9.692204114576573e-05, + "loss": 2.5042, + "step": 10246 + }, + { + "epoch": 0.8269711887660399, + "grad_norm": 0.7526013851165771, + "learning_rate": 9.690626176632176e-05, + "loss": 2.5878, + "step": 10247 + }, + { + "epoch": 0.8270518925026229, + "grad_norm": 0.6936177611351013, + "learning_rate": 9.689048246398184e-05, + "loss": 2.5572, + "step": 10248 + }, + { + "epoch": 0.8271325962392059, + "grad_norm": 0.672168493270874, + "learning_rate": 9.687470323913922e-05, + "loss": 2.5127, + "step": 10249 + }, + { + "epoch": 0.8272132999757889, + "grad_norm": 0.6847899556159973, + "learning_rate": 9.685892409218717e-05, + "loss": 2.5443, + "step": 10250 + }, + { + "epoch": 0.8272940037123718, + "grad_norm": 0.6877103447914124, + "learning_rate": 9.684314502351894e-05, + "loss": 2.4924, + "step": 10251 + }, + { + "epoch": 0.8273747074489549, + "grad_norm": 0.6894243359565735, + "learning_rate": 9.682736603352783e-05, + "loss": 2.5107, + "step": 10252 + }, + { + "epoch": 0.8274554111855379, + "grad_norm": 0.7318278551101685, + "learning_rate": 9.681158712260698e-05, + "loss": 2.5276, + "step": 10253 + }, + { + "epoch": 0.8275361149221209, + "grad_norm": 0.6949039101600647, + "learning_rate": 9.679580829114975e-05, + "loss": 2.5128, + "step": 10254 + }, + { + "epoch": 0.8276168186587038, + "grad_norm": 0.6523800492286682, + "learning_rate": 9.678002953954939e-05, + "loss": 2.5584, + "step": 10255 + }, + { + "epoch": 0.8276975223952869, + "grad_norm": 0.6914480328559875, + "learning_rate": 9.676425086819905e-05, + "loss": 2.5597, + "step": 10256 + }, + { + "epoch": 0.8277782261318699, + "grad_norm": 0.7107869982719421, + "learning_rate": 9.674847227749206e-05, + "loss": 2.5009, + "step": 10257 + }, + { + "epoch": 0.8278589298684529, + "grad_norm": 0.7066758275032043, + "learning_rate": 9.673269376782166e-05, + "loss": 2.4599, + "step": 10258 + }, + { + "epoch": 0.8279396336050359, + "grad_norm": 0.7147037982940674, + "learning_rate": 9.671691533958104e-05, + "loss": 2.4478, + "step": 10259 + }, + { + "epoch": 0.828020337341619, + "grad_norm": 0.666265606880188, + "learning_rate": 9.670113699316347e-05, + "loss": 2.5652, + "step": 10260 + }, + { + "epoch": 0.8281010410782019, + "grad_norm": 0.7026315927505493, + "learning_rate": 9.668535872896225e-05, + "loss": 2.5397, + "step": 10261 + }, + { + "epoch": 0.8281817448147849, + "grad_norm": 0.6611438393592834, + "learning_rate": 9.66695805473705e-05, + "loss": 2.5628, + "step": 10262 + }, + { + "epoch": 0.8282624485513679, + "grad_norm": 0.7211201190948486, + "learning_rate": 9.66538024487815e-05, + "loss": 2.5551, + "step": 10263 + }, + { + "epoch": 0.828343152287951, + "grad_norm": 0.7224553227424622, + "learning_rate": 9.663802443358849e-05, + "loss": 2.5329, + "step": 10264 + }, + { + "epoch": 0.8284238560245339, + "grad_norm": 0.6805843710899353, + "learning_rate": 9.662224650218474e-05, + "loss": 2.5744, + "step": 10265 + }, + { + "epoch": 0.8285045597611169, + "grad_norm": 0.7101335525512695, + "learning_rate": 9.66064686549634e-05, + "loss": 2.5281, + "step": 10266 + }, + { + "epoch": 0.8285852634976999, + "grad_norm": 0.7208443284034729, + "learning_rate": 9.659069089231774e-05, + "loss": 2.5326, + "step": 10267 + }, + { + "epoch": 0.828665967234283, + "grad_norm": 0.747894287109375, + "learning_rate": 9.6574913214641e-05, + "loss": 2.4909, + "step": 10268 + }, + { + "epoch": 0.828746670970866, + "grad_norm": 0.6618027091026306, + "learning_rate": 9.655913562232635e-05, + "loss": 2.6091, + "step": 10269 + }, + { + "epoch": 0.8288273747074489, + "grad_norm": 0.7101535201072693, + "learning_rate": 9.654335811576704e-05, + "loss": 2.5194, + "step": 10270 + }, + { + "epoch": 0.8289080784440319, + "grad_norm": 0.727763831615448, + "learning_rate": 9.652758069535631e-05, + "loss": 2.5767, + "step": 10271 + }, + { + "epoch": 0.828988782180615, + "grad_norm": 0.6936737895011902, + "learning_rate": 9.65118033614873e-05, + "loss": 2.498, + "step": 10272 + }, + { + "epoch": 0.829069485917198, + "grad_norm": 0.699462354183197, + "learning_rate": 9.64960261145533e-05, + "loss": 2.5033, + "step": 10273 + }, + { + "epoch": 0.829150189653781, + "grad_norm": 0.7024868726730347, + "learning_rate": 9.648024895494749e-05, + "loss": 2.5937, + "step": 10274 + }, + { + "epoch": 0.8292308933903639, + "grad_norm": 0.7028421759605408, + "learning_rate": 9.646447188306305e-05, + "loss": 2.5528, + "step": 10275 + }, + { + "epoch": 0.829311597126947, + "grad_norm": 0.7216476202011108, + "learning_rate": 9.644869489929321e-05, + "loss": 2.5298, + "step": 10276 + }, + { + "epoch": 0.82939230086353, + "grad_norm": 0.6815251111984253, + "learning_rate": 9.643291800403123e-05, + "loss": 2.5138, + "step": 10277 + }, + { + "epoch": 0.829473004600113, + "grad_norm": 0.6961970925331116, + "learning_rate": 9.64171411976702e-05, + "loss": 2.5441, + "step": 10278 + }, + { + "epoch": 0.829553708336696, + "grad_norm": 0.7317311763763428, + "learning_rate": 9.640136448060337e-05, + "loss": 2.5885, + "step": 10279 + }, + { + "epoch": 0.829634412073279, + "grad_norm": 0.729086697101593, + "learning_rate": 9.638558785322396e-05, + "loss": 2.475, + "step": 10280 + }, + { + "epoch": 0.829715115809862, + "grad_norm": 0.7790165543556213, + "learning_rate": 9.636981131592521e-05, + "loss": 2.5538, + "step": 10281 + }, + { + "epoch": 0.829795819546445, + "grad_norm": 0.7066864967346191, + "learning_rate": 9.635403486910018e-05, + "loss": 2.5916, + "step": 10282 + }, + { + "epoch": 0.829876523283028, + "grad_norm": 0.7070252299308777, + "learning_rate": 9.633825851314215e-05, + "loss": 2.5879, + "step": 10283 + }, + { + "epoch": 0.829957227019611, + "grad_norm": 0.7604004740715027, + "learning_rate": 9.63224822484443e-05, + "loss": 2.5298, + "step": 10284 + }, + { + "epoch": 0.830037930756194, + "grad_norm": 0.7548386454582214, + "learning_rate": 9.63067060753998e-05, + "loss": 2.5313, + "step": 10285 + }, + { + "epoch": 0.830118634492777, + "grad_norm": 0.7241540551185608, + "learning_rate": 9.629092999440183e-05, + "loss": 2.5498, + "step": 10286 + }, + { + "epoch": 0.83019933822936, + "grad_norm": 0.6748291850090027, + "learning_rate": 9.627515400584361e-05, + "loss": 2.523, + "step": 10287 + }, + { + "epoch": 0.8302800419659431, + "grad_norm": 0.6624683141708374, + "learning_rate": 9.625937811011826e-05, + "loss": 2.568, + "step": 10288 + }, + { + "epoch": 0.830360745702526, + "grad_norm": 0.6681114435195923, + "learning_rate": 9.624360230761899e-05, + "loss": 2.5255, + "step": 10289 + }, + { + "epoch": 0.830441449439109, + "grad_norm": 0.6895325183868408, + "learning_rate": 9.622782659873899e-05, + "loss": 2.5275, + "step": 10290 + }, + { + "epoch": 0.830522153175692, + "grad_norm": 0.7257826924324036, + "learning_rate": 9.621205098387137e-05, + "loss": 2.5102, + "step": 10291 + }, + { + "epoch": 0.8306028569122751, + "grad_norm": 0.6567066311836243, + "learning_rate": 9.619627546340935e-05, + "loss": 2.5721, + "step": 10292 + }, + { + "epoch": 0.8306835606488581, + "grad_norm": 0.6571428179740906, + "learning_rate": 9.61805000377461e-05, + "loss": 2.5014, + "step": 10293 + }, + { + "epoch": 0.830764264385441, + "grad_norm": 0.7807042598724365, + "learning_rate": 9.61647247072748e-05, + "loss": 2.632, + "step": 10294 + }, + { + "epoch": 0.830844968122024, + "grad_norm": 0.6688913702964783, + "learning_rate": 9.614894947238854e-05, + "loss": 2.5457, + "step": 10295 + }, + { + "epoch": 0.8309256718586071, + "grad_norm": 0.7769338488578796, + "learning_rate": 9.613317433348055e-05, + "loss": 2.4775, + "step": 10296 + }, + { + "epoch": 0.8310063755951901, + "grad_norm": 0.7089162468910217, + "learning_rate": 9.611739929094399e-05, + "loss": 2.4887, + "step": 10297 + }, + { + "epoch": 0.8310870793317731, + "grad_norm": 0.6901174783706665, + "learning_rate": 9.610162434517196e-05, + "loss": 2.6127, + "step": 10298 + }, + { + "epoch": 0.831167783068356, + "grad_norm": 0.6862173676490784, + "learning_rate": 9.608584949655764e-05, + "loss": 2.5432, + "step": 10299 + }, + { + "epoch": 0.8312484868049391, + "grad_norm": 0.6789367198944092, + "learning_rate": 9.607007474549418e-05, + "loss": 2.5135, + "step": 10300 + }, + { + "epoch": 0.8313291905415221, + "grad_norm": 0.6548805832862854, + "learning_rate": 9.605430009237474e-05, + "loss": 2.5466, + "step": 10301 + }, + { + "epoch": 0.8314098942781051, + "grad_norm": 0.6873800158500671, + "learning_rate": 9.603852553759244e-05, + "loss": 2.4954, + "step": 10302 + }, + { + "epoch": 0.831490598014688, + "grad_norm": 0.6816138029098511, + "learning_rate": 9.602275108154046e-05, + "loss": 2.5556, + "step": 10303 + }, + { + "epoch": 0.831571301751271, + "grad_norm": 0.6890314221382141, + "learning_rate": 9.600697672461189e-05, + "loss": 2.5253, + "step": 10304 + }, + { + "epoch": 0.8316520054878541, + "grad_norm": 0.6217427849769592, + "learning_rate": 9.599120246719992e-05, + "loss": 2.53, + "step": 10305 + }, + { + "epoch": 0.8317327092244371, + "grad_norm": 0.6638299226760864, + "learning_rate": 9.59754283096977e-05, + "loss": 2.5323, + "step": 10306 + }, + { + "epoch": 0.8318134129610201, + "grad_norm": 0.6834245920181274, + "learning_rate": 9.595965425249828e-05, + "loss": 2.5339, + "step": 10307 + }, + { + "epoch": 0.831894116697603, + "grad_norm": 0.8013476729393005, + "learning_rate": 9.594388029599484e-05, + "loss": 2.4925, + "step": 10308 + }, + { + "epoch": 0.8319748204341861, + "grad_norm": 0.7677187323570251, + "learning_rate": 9.592810644058049e-05, + "loss": 2.5717, + "step": 10309 + }, + { + "epoch": 0.8320555241707691, + "grad_norm": 0.6558046340942383, + "learning_rate": 9.591233268664841e-05, + "loss": 2.5631, + "step": 10310 + }, + { + "epoch": 0.8321362279073521, + "grad_norm": 0.6648481488227844, + "learning_rate": 9.589655903459165e-05, + "loss": 2.5232, + "step": 10311 + }, + { + "epoch": 0.8322169316439351, + "grad_norm": 0.6907756328582764, + "learning_rate": 9.588078548480338e-05, + "loss": 2.4804, + "step": 10312 + }, + { + "epoch": 0.8322976353805182, + "grad_norm": 0.6924928426742554, + "learning_rate": 9.586501203767675e-05, + "loss": 2.4648, + "step": 10313 + }, + { + "epoch": 0.8323783391171011, + "grad_norm": 0.7654799222946167, + "learning_rate": 9.584923869360477e-05, + "loss": 2.6184, + "step": 10314 + }, + { + "epoch": 0.8324590428536841, + "grad_norm": 0.7056179046630859, + "learning_rate": 9.58334654529806e-05, + "loss": 2.5862, + "step": 10315 + }, + { + "epoch": 0.8325397465902671, + "grad_norm": 0.7245064973831177, + "learning_rate": 9.581769231619743e-05, + "loss": 2.4866, + "step": 10316 + }, + { + "epoch": 0.8326204503268502, + "grad_norm": 0.6782355308532715, + "learning_rate": 9.580191928364824e-05, + "loss": 2.5519, + "step": 10317 + }, + { + "epoch": 0.8327011540634331, + "grad_norm": 0.6910805106163025, + "learning_rate": 9.578614635572621e-05, + "loss": 2.542, + "step": 10318 + }, + { + "epoch": 0.8327818578000161, + "grad_norm": 0.6858026385307312, + "learning_rate": 9.577037353282444e-05, + "loss": 2.5601, + "step": 10319 + }, + { + "epoch": 0.8328625615365991, + "grad_norm": 0.6886423230171204, + "learning_rate": 9.5754600815336e-05, + "loss": 2.5817, + "step": 10320 + }, + { + "epoch": 0.8329432652731822, + "grad_norm": 0.7585750818252563, + "learning_rate": 9.573882820365402e-05, + "loss": 2.5153, + "step": 10321 + }, + { + "epoch": 0.8330239690097652, + "grad_norm": 0.7004472613334656, + "learning_rate": 9.57230556981716e-05, + "loss": 2.5456, + "step": 10322 + }, + { + "epoch": 0.8331046727463481, + "grad_norm": 0.6530508399009705, + "learning_rate": 9.570728329928179e-05, + "loss": 2.5453, + "step": 10323 + }, + { + "epoch": 0.8331853764829311, + "grad_norm": 0.6767956614494324, + "learning_rate": 9.569151100737769e-05, + "loss": 2.5311, + "step": 10324 + }, + { + "epoch": 0.8332660802195142, + "grad_norm": 0.6835905909538269, + "learning_rate": 9.56757388228524e-05, + "loss": 2.5417, + "step": 10325 + }, + { + "epoch": 0.8333467839560972, + "grad_norm": 0.6582748889923096, + "learning_rate": 9.565996674609901e-05, + "loss": 2.5144, + "step": 10326 + }, + { + "epoch": 0.8334274876926802, + "grad_norm": 0.6815205216407776, + "learning_rate": 9.56441947775106e-05, + "loss": 2.5272, + "step": 10327 + }, + { + "epoch": 0.8335081914292631, + "grad_norm": 0.6810150146484375, + "learning_rate": 9.562842291748022e-05, + "loss": 2.5475, + "step": 10328 + }, + { + "epoch": 0.8335888951658462, + "grad_norm": 0.7220990657806396, + "learning_rate": 9.5612651166401e-05, + "loss": 2.54, + "step": 10329 + }, + { + "epoch": 0.8336695989024292, + "grad_norm": 0.6840164065361023, + "learning_rate": 9.559687952466596e-05, + "loss": 2.5987, + "step": 10330 + }, + { + "epoch": 0.8337503026390122, + "grad_norm": 0.7085031867027283, + "learning_rate": 9.558110799266819e-05, + "loss": 2.5674, + "step": 10331 + }, + { + "epoch": 0.8338310063755952, + "grad_norm": 0.6658117175102234, + "learning_rate": 9.55653365708008e-05, + "loss": 2.5793, + "step": 10332 + }, + { + "epoch": 0.8339117101121782, + "grad_norm": 0.782648503780365, + "learning_rate": 9.554956525945677e-05, + "loss": 2.5463, + "step": 10333 + }, + { + "epoch": 0.8339924138487612, + "grad_norm": 0.6999937891960144, + "learning_rate": 9.553379405902922e-05, + "loss": 2.5961, + "step": 10334 + }, + { + "epoch": 0.8340731175853442, + "grad_norm": 0.6681220531463623, + "learning_rate": 9.55180229699112e-05, + "loss": 2.6055, + "step": 10335 + }, + { + "epoch": 0.8341538213219272, + "grad_norm": 0.7127133011817932, + "learning_rate": 9.550225199249577e-05, + "loss": 2.5571, + "step": 10336 + }, + { + "epoch": 0.8342345250585103, + "grad_norm": 0.6939001679420471, + "learning_rate": 9.548648112717596e-05, + "loss": 2.5653, + "step": 10337 + }, + { + "epoch": 0.8343152287950932, + "grad_norm": 0.7483924031257629, + "learning_rate": 9.547071037434487e-05, + "loss": 2.5316, + "step": 10338 + }, + { + "epoch": 0.8343959325316762, + "grad_norm": 0.7975850105285645, + "learning_rate": 9.545493973439548e-05, + "loss": 2.6039, + "step": 10339 + }, + { + "epoch": 0.8344766362682592, + "grad_norm": 0.6893026232719421, + "learning_rate": 9.543916920772087e-05, + "loss": 2.5797, + "step": 10340 + }, + { + "epoch": 0.8345573400048423, + "grad_norm": 0.752869188785553, + "learning_rate": 9.542339879471409e-05, + "loss": 2.5677, + "step": 10341 + }, + { + "epoch": 0.8346380437414253, + "grad_norm": 0.7336339354515076, + "learning_rate": 9.540762849576822e-05, + "loss": 2.5212, + "step": 10342 + }, + { + "epoch": 0.8347187474780082, + "grad_norm": 0.7742713689804077, + "learning_rate": 9.539185831127621e-05, + "loss": 2.5599, + "step": 10343 + }, + { + "epoch": 0.8347994512145912, + "grad_norm": 0.7205352783203125, + "learning_rate": 9.537608824163114e-05, + "loss": 2.5591, + "step": 10344 + }, + { + "epoch": 0.8348801549511743, + "grad_norm": 0.7794787287712097, + "learning_rate": 9.536031828722605e-05, + "loss": 2.5858, + "step": 10345 + }, + { + "epoch": 0.8349608586877573, + "grad_norm": 0.7129528522491455, + "learning_rate": 9.534454844845396e-05, + "loss": 2.5591, + "step": 10346 + }, + { + "epoch": 0.8350415624243402, + "grad_norm": 0.731038510799408, + "learning_rate": 9.532877872570787e-05, + "loss": 2.5774, + "step": 10347 + }, + { + "epoch": 0.8351222661609232, + "grad_norm": 0.7706510424613953, + "learning_rate": 9.531300911938087e-05, + "loss": 2.6102, + "step": 10348 + }, + { + "epoch": 0.8352029698975063, + "grad_norm": 0.6890363097190857, + "learning_rate": 9.52972396298659e-05, + "loss": 2.5393, + "step": 10349 + }, + { + "epoch": 0.8352836736340893, + "grad_norm": 0.6792402863502502, + "learning_rate": 9.528147025755601e-05, + "loss": 2.5607, + "step": 10350 + }, + { + "epoch": 0.8353643773706723, + "grad_norm": 0.7097377777099609, + "learning_rate": 9.526570100284422e-05, + "loss": 2.5681, + "step": 10351 + }, + { + "epoch": 0.8354450811072552, + "grad_norm": 0.7530940771102905, + "learning_rate": 9.524993186612353e-05, + "loss": 2.5405, + "step": 10352 + }, + { + "epoch": 0.8355257848438382, + "grad_norm": 0.714080810546875, + "learning_rate": 9.523416284778696e-05, + "loss": 2.5365, + "step": 10353 + }, + { + "epoch": 0.8356064885804213, + "grad_norm": 0.6745832562446594, + "learning_rate": 9.521839394822752e-05, + "loss": 2.5553, + "step": 10354 + }, + { + "epoch": 0.8356871923170043, + "grad_norm": 0.7163450121879578, + "learning_rate": 9.52026251678382e-05, + "loss": 2.5074, + "step": 10355 + }, + { + "epoch": 0.8357678960535873, + "grad_norm": 0.6876534223556519, + "learning_rate": 9.518685650701197e-05, + "loss": 2.5652, + "step": 10356 + }, + { + "epoch": 0.8358485997901702, + "grad_norm": 0.6424533128738403, + "learning_rate": 9.517108796614187e-05, + "loss": 2.4823, + "step": 10357 + }, + { + "epoch": 0.8359293035267533, + "grad_norm": 0.646802544593811, + "learning_rate": 9.515531954562094e-05, + "loss": 2.5602, + "step": 10358 + }, + { + "epoch": 0.8360100072633363, + "grad_norm": 0.7266993522644043, + "learning_rate": 9.513955124584205e-05, + "loss": 2.5384, + "step": 10359 + }, + { + "epoch": 0.8360907109999193, + "grad_norm": 0.7358742356300354, + "learning_rate": 9.512378306719826e-05, + "loss": 2.5798, + "step": 10360 + }, + { + "epoch": 0.8361714147365022, + "grad_norm": 0.7191498279571533, + "learning_rate": 9.510801501008256e-05, + "loss": 2.5229, + "step": 10361 + }, + { + "epoch": 0.8362521184730853, + "grad_norm": 0.7058876156806946, + "learning_rate": 9.509224707488788e-05, + "loss": 2.5146, + "step": 10362 + }, + { + "epoch": 0.8363328222096683, + "grad_norm": 0.7348346710205078, + "learning_rate": 9.507647926200725e-05, + "loss": 2.5878, + "step": 10363 + }, + { + "epoch": 0.8364135259462513, + "grad_norm": 0.7464115619659424, + "learning_rate": 9.506071157183366e-05, + "loss": 2.6056, + "step": 10364 + }, + { + "epoch": 0.8364942296828343, + "grad_norm": 0.7077332139015198, + "learning_rate": 9.504494400476e-05, + "loss": 2.5161, + "step": 10365 + }, + { + "epoch": 0.8365749334194174, + "grad_norm": 0.7381827235221863, + "learning_rate": 9.502917656117928e-05, + "loss": 2.519, + "step": 10366 + }, + { + "epoch": 0.8366556371560003, + "grad_norm": 0.743180513381958, + "learning_rate": 9.501340924148452e-05, + "loss": 2.6149, + "step": 10367 + }, + { + "epoch": 0.8367363408925833, + "grad_norm": 0.6496078372001648, + "learning_rate": 9.499764204606863e-05, + "loss": 2.4969, + "step": 10368 + }, + { + "epoch": 0.8368170446291663, + "grad_norm": 0.6796541810035706, + "learning_rate": 9.498187497532454e-05, + "loss": 2.5304, + "step": 10369 + }, + { + "epoch": 0.8368977483657494, + "grad_norm": 0.6555948853492737, + "learning_rate": 9.496610802964529e-05, + "loss": 2.6029, + "step": 10370 + }, + { + "epoch": 0.8369784521023323, + "grad_norm": 0.6990405321121216, + "learning_rate": 9.495034120942374e-05, + "loss": 2.5286, + "step": 10371 + }, + { + "epoch": 0.8370591558389153, + "grad_norm": 0.7417613863945007, + "learning_rate": 9.49345745150529e-05, + "loss": 2.5301, + "step": 10372 + }, + { + "epoch": 0.8371398595754983, + "grad_norm": 0.6809872388839722, + "learning_rate": 9.49188079469257e-05, + "loss": 2.5075, + "step": 10373 + }, + { + "epoch": 0.8372205633120814, + "grad_norm": 0.6537099480628967, + "learning_rate": 9.490304150543514e-05, + "loss": 2.5515, + "step": 10374 + }, + { + "epoch": 0.8373012670486644, + "grad_norm": 0.6660431027412415, + "learning_rate": 9.488727519097407e-05, + "loss": 2.549, + "step": 10375 + }, + { + "epoch": 0.8373819707852473, + "grad_norm": 0.7257838249206543, + "learning_rate": 9.487150900393546e-05, + "loss": 2.546, + "step": 10376 + }, + { + "epoch": 0.8374626745218303, + "grad_norm": 0.742085874080658, + "learning_rate": 9.485574294471226e-05, + "loss": 2.5302, + "step": 10377 + }, + { + "epoch": 0.8375433782584134, + "grad_norm": 0.659934401512146, + "learning_rate": 9.48399770136974e-05, + "loss": 2.5553, + "step": 10378 + }, + { + "epoch": 0.8376240819949964, + "grad_norm": 0.7219613790512085, + "learning_rate": 9.482421121128377e-05, + "loss": 2.6186, + "step": 10379 + }, + { + "epoch": 0.8377047857315794, + "grad_norm": 0.706444263458252, + "learning_rate": 9.480844553786436e-05, + "loss": 2.5082, + "step": 10380 + }, + { + "epoch": 0.8377854894681623, + "grad_norm": 0.7527014017105103, + "learning_rate": 9.479267999383204e-05, + "loss": 2.5625, + "step": 10381 + }, + { + "epoch": 0.8378661932047454, + "grad_norm": 0.7488746643066406, + "learning_rate": 9.477691457957976e-05, + "loss": 2.528, + "step": 10382 + }, + { + "epoch": 0.8379468969413284, + "grad_norm": 0.7394229173660278, + "learning_rate": 9.476114929550045e-05, + "loss": 2.5387, + "step": 10383 + }, + { + "epoch": 0.8380276006779114, + "grad_norm": 0.7490981817245483, + "learning_rate": 9.474538414198695e-05, + "loss": 2.548, + "step": 10384 + }, + { + "epoch": 0.8381083044144944, + "grad_norm": 0.7203173041343689, + "learning_rate": 9.472961911943222e-05, + "loss": 2.5547, + "step": 10385 + }, + { + "epoch": 0.8381890081510774, + "grad_norm": 0.6929850578308105, + "learning_rate": 9.471385422822917e-05, + "loss": 2.4831, + "step": 10386 + }, + { + "epoch": 0.8382697118876604, + "grad_norm": 0.6303263902664185, + "learning_rate": 9.469808946877067e-05, + "loss": 2.4569, + "step": 10387 + }, + { + "epoch": 0.8383504156242434, + "grad_norm": 0.6986981630325317, + "learning_rate": 9.468232484144964e-05, + "loss": 2.5278, + "step": 10388 + }, + { + "epoch": 0.8384311193608264, + "grad_norm": 0.6910964846611023, + "learning_rate": 9.466656034665898e-05, + "loss": 2.5657, + "step": 10389 + }, + { + "epoch": 0.8385118230974095, + "grad_norm": 0.6571134924888611, + "learning_rate": 9.465079598479163e-05, + "loss": 2.6017, + "step": 10390 + }, + { + "epoch": 0.8385925268339924, + "grad_norm": 0.7117733359336853, + "learning_rate": 9.463503175624034e-05, + "loss": 2.56, + "step": 10391 + }, + { + "epoch": 0.8386732305705754, + "grad_norm": 0.7052998542785645, + "learning_rate": 9.461926766139813e-05, + "loss": 2.4998, + "step": 10392 + }, + { + "epoch": 0.8387539343071584, + "grad_norm": 0.7306597232818604, + "learning_rate": 9.460350370065786e-05, + "loss": 2.5292, + "step": 10393 + }, + { + "epoch": 0.8388346380437415, + "grad_norm": 0.681069552898407, + "learning_rate": 9.458773987441235e-05, + "loss": 2.5469, + "step": 10394 + }, + { + "epoch": 0.8389153417803245, + "grad_norm": 0.6681767702102661, + "learning_rate": 9.45719761830545e-05, + "loss": 2.5476, + "step": 10395 + }, + { + "epoch": 0.8389960455169074, + "grad_norm": 0.6759339570999146, + "learning_rate": 9.455621262697723e-05, + "loss": 2.4806, + "step": 10396 + }, + { + "epoch": 0.8390767492534904, + "grad_norm": 0.695829451084137, + "learning_rate": 9.454044920657333e-05, + "loss": 2.5255, + "step": 10397 + }, + { + "epoch": 0.8391574529900735, + "grad_norm": 0.686568558216095, + "learning_rate": 9.452468592223572e-05, + "loss": 2.5655, + "step": 10398 + }, + { + "epoch": 0.8392381567266565, + "grad_norm": 0.6529035568237305, + "learning_rate": 9.45089227743573e-05, + "loss": 2.5026, + "step": 10399 + }, + { + "epoch": 0.8393188604632394, + "grad_norm": 0.6809061765670776, + "learning_rate": 9.449315976333082e-05, + "loss": 2.5549, + "step": 10400 + }, + { + "epoch": 0.8393995641998224, + "grad_norm": 0.6920269727706909, + "learning_rate": 9.447739688954919e-05, + "loss": 2.517, + "step": 10401 + }, + { + "epoch": 0.8394802679364055, + "grad_norm": 0.6626712083816528, + "learning_rate": 9.446163415340526e-05, + "loss": 2.605, + "step": 10402 + }, + { + "epoch": 0.8395609716729885, + "grad_norm": 0.6912916898727417, + "learning_rate": 9.444587155529195e-05, + "loss": 2.588, + "step": 10403 + }, + { + "epoch": 0.8396416754095715, + "grad_norm": 0.6771352291107178, + "learning_rate": 9.443010909560198e-05, + "loss": 2.5148, + "step": 10404 + }, + { + "epoch": 0.8397223791461544, + "grad_norm": 0.7015509009361267, + "learning_rate": 9.441434677472827e-05, + "loss": 2.5425, + "step": 10405 + }, + { + "epoch": 0.8398030828827374, + "grad_norm": 0.6789976358413696, + "learning_rate": 9.439858459306364e-05, + "loss": 2.598, + "step": 10406 + }, + { + "epoch": 0.8398837866193205, + "grad_norm": 0.674391508102417, + "learning_rate": 9.438282255100091e-05, + "loss": 2.5581, + "step": 10407 + }, + { + "epoch": 0.8399644903559035, + "grad_norm": 0.6944772005081177, + "learning_rate": 9.436706064893294e-05, + "loss": 2.5591, + "step": 10408 + }, + { + "epoch": 0.8400451940924865, + "grad_norm": 0.6750832200050354, + "learning_rate": 9.435129888725259e-05, + "loss": 2.533, + "step": 10409 + }, + { + "epoch": 0.8401258978290694, + "grad_norm": 0.6927465200424194, + "learning_rate": 9.433553726635257e-05, + "loss": 2.536, + "step": 10410 + }, + { + "epoch": 0.8402066015656525, + "grad_norm": 0.6399651765823364, + "learning_rate": 9.431977578662578e-05, + "loss": 2.5123, + "step": 10411 + }, + { + "epoch": 0.8402873053022355, + "grad_norm": 0.7588143944740295, + "learning_rate": 9.430401444846505e-05, + "loss": 2.6133, + "step": 10412 + }, + { + "epoch": 0.8403680090388185, + "grad_norm": 0.8010972738265991, + "learning_rate": 9.428825325226313e-05, + "loss": 2.5407, + "step": 10413 + }, + { + "epoch": 0.8404487127754015, + "grad_norm": 0.6847307085990906, + "learning_rate": 9.427249219841288e-05, + "loss": 2.5912, + "step": 10414 + }, + { + "epoch": 0.8405294165119845, + "grad_norm": 0.7005963325500488, + "learning_rate": 9.425673128730716e-05, + "loss": 2.5059, + "step": 10415 + }, + { + "epoch": 0.8406101202485675, + "grad_norm": 0.7383962273597717, + "learning_rate": 9.424097051933862e-05, + "loss": 2.5157, + "step": 10416 + }, + { + "epoch": 0.8406908239851505, + "grad_norm": 0.7078843712806702, + "learning_rate": 9.422520989490018e-05, + "loss": 2.6093, + "step": 10417 + }, + { + "epoch": 0.8407715277217335, + "grad_norm": 0.7449501752853394, + "learning_rate": 9.42094494143846e-05, + "loss": 2.594, + "step": 10418 + }, + { + "epoch": 0.8408522314583166, + "grad_norm": 0.6823872923851013, + "learning_rate": 9.419368907818473e-05, + "loss": 2.5653, + "step": 10419 + }, + { + "epoch": 0.8409329351948995, + "grad_norm": 0.7403056025505066, + "learning_rate": 9.417792888669325e-05, + "loss": 2.5296, + "step": 10420 + }, + { + "epoch": 0.8410136389314825, + "grad_norm": 0.6858980655670166, + "learning_rate": 9.4162168840303e-05, + "loss": 2.5401, + "step": 10421 + }, + { + "epoch": 0.8410943426680655, + "grad_norm": 0.692348837852478, + "learning_rate": 9.41464089394068e-05, + "loss": 2.4797, + "step": 10422 + }, + { + "epoch": 0.8411750464046486, + "grad_norm": 0.6939836144447327, + "learning_rate": 9.413064918439736e-05, + "loss": 2.505, + "step": 10423 + }, + { + "epoch": 0.8412557501412316, + "grad_norm": 0.7334314584732056, + "learning_rate": 9.411488957566748e-05, + "loss": 2.5792, + "step": 10424 + }, + { + "epoch": 0.8413364538778145, + "grad_norm": 0.6977920532226562, + "learning_rate": 9.409913011360999e-05, + "loss": 2.5204, + "step": 10425 + }, + { + "epoch": 0.8414171576143975, + "grad_norm": 0.7121822834014893, + "learning_rate": 9.408337079861756e-05, + "loss": 2.571, + "step": 10426 + }, + { + "epoch": 0.8414978613509806, + "grad_norm": 0.761476993560791, + "learning_rate": 9.406761163108297e-05, + "loss": 2.5845, + "step": 10427 + }, + { + "epoch": 0.8415785650875636, + "grad_norm": 0.7160221934318542, + "learning_rate": 9.405185261139906e-05, + "loss": 2.5331, + "step": 10428 + }, + { + "epoch": 0.8416592688241465, + "grad_norm": 0.6828827857971191, + "learning_rate": 9.40360937399585e-05, + "loss": 2.5596, + "step": 10429 + }, + { + "epoch": 0.8417399725607295, + "grad_norm": 0.756473183631897, + "learning_rate": 9.402033501715406e-05, + "loss": 2.6107, + "step": 10430 + }, + { + "epoch": 0.8418206762973126, + "grad_norm": 0.7486895322799683, + "learning_rate": 9.400457644337853e-05, + "loss": 2.5388, + "step": 10431 + }, + { + "epoch": 0.8419013800338956, + "grad_norm": 0.7759146690368652, + "learning_rate": 9.398881801902461e-05, + "loss": 2.5559, + "step": 10432 + }, + { + "epoch": 0.8419820837704786, + "grad_norm": 0.71756911277771, + "learning_rate": 9.397305974448506e-05, + "loss": 2.6109, + "step": 10433 + }, + { + "epoch": 0.8420627875070615, + "grad_norm": 0.7741644382476807, + "learning_rate": 9.395730162015261e-05, + "loss": 2.5664, + "step": 10434 + }, + { + "epoch": 0.8421434912436446, + "grad_norm": 0.7155938744544983, + "learning_rate": 9.394154364642006e-05, + "loss": 2.5693, + "step": 10435 + }, + { + "epoch": 0.8422241949802276, + "grad_norm": 0.6862725019454956, + "learning_rate": 9.392578582368002e-05, + "loss": 2.4942, + "step": 10436 + }, + { + "epoch": 0.8423048987168106, + "grad_norm": 0.6698417067527771, + "learning_rate": 9.391002815232528e-05, + "loss": 2.5258, + "step": 10437 + }, + { + "epoch": 0.8423856024533936, + "grad_norm": 0.7756468057632446, + "learning_rate": 9.389427063274858e-05, + "loss": 2.5008, + "step": 10438 + }, + { + "epoch": 0.8424663061899766, + "grad_norm": 0.6579857468605042, + "learning_rate": 9.387851326534259e-05, + "loss": 2.5335, + "step": 10439 + }, + { + "epoch": 0.8425470099265596, + "grad_norm": 0.7673436403274536, + "learning_rate": 9.386275605050006e-05, + "loss": 2.5646, + "step": 10440 + }, + { + "epoch": 0.8426277136631426, + "grad_norm": 0.7377188205718994, + "learning_rate": 9.384699898861372e-05, + "loss": 2.568, + "step": 10441 + }, + { + "epoch": 0.8427084173997256, + "grad_norm": 0.6502123475074768, + "learning_rate": 9.38312420800762e-05, + "loss": 2.6091, + "step": 10442 + }, + { + "epoch": 0.8427891211363087, + "grad_norm": 0.729852020740509, + "learning_rate": 9.381548532528026e-05, + "loss": 2.4873, + "step": 10443 + }, + { + "epoch": 0.8428698248728916, + "grad_norm": 0.7419102191925049, + "learning_rate": 9.379972872461865e-05, + "loss": 2.4966, + "step": 10444 + }, + { + "epoch": 0.8429505286094746, + "grad_norm": 0.6921093463897705, + "learning_rate": 9.378397227848395e-05, + "loss": 2.4895, + "step": 10445 + }, + { + "epoch": 0.8430312323460576, + "grad_norm": 0.7697325944900513, + "learning_rate": 9.376821598726892e-05, + "loss": 2.5779, + "step": 10446 + }, + { + "epoch": 0.8431119360826407, + "grad_norm": 0.6441029906272888, + "learning_rate": 9.375245985136626e-05, + "loss": 2.4909, + "step": 10447 + }, + { + "epoch": 0.8431926398192237, + "grad_norm": 0.6962057948112488, + "learning_rate": 9.373670387116861e-05, + "loss": 2.5602, + "step": 10448 + }, + { + "epoch": 0.8432733435558066, + "grad_norm": 0.7030641436576843, + "learning_rate": 9.372094804706867e-05, + "loss": 2.5641, + "step": 10449 + }, + { + "epoch": 0.8433540472923896, + "grad_norm": 0.6969063878059387, + "learning_rate": 9.370519237945912e-05, + "loss": 2.5555, + "step": 10450 + }, + { + "epoch": 0.8434347510289727, + "grad_norm": 0.7169879674911499, + "learning_rate": 9.368943686873267e-05, + "loss": 2.5258, + "step": 10451 + }, + { + "epoch": 0.8435154547655557, + "grad_norm": 0.7198735475540161, + "learning_rate": 9.36736815152819e-05, + "loss": 2.5192, + "step": 10452 + }, + { + "epoch": 0.8435961585021386, + "grad_norm": 0.6613535284996033, + "learning_rate": 9.365792631949951e-05, + "loss": 2.5596, + "step": 10453 + }, + { + "epoch": 0.8436768622387216, + "grad_norm": 0.6377065777778625, + "learning_rate": 9.364217128177824e-05, + "loss": 2.5518, + "step": 10454 + }, + { + "epoch": 0.8437575659753046, + "grad_norm": 0.6670635938644409, + "learning_rate": 9.362641640251063e-05, + "loss": 2.4793, + "step": 10455 + }, + { + "epoch": 0.8438382697118877, + "grad_norm": 0.6556122899055481, + "learning_rate": 9.361066168208939e-05, + "loss": 2.5492, + "step": 10456 + }, + { + "epoch": 0.8439189734484707, + "grad_norm": 0.7262280583381653, + "learning_rate": 9.35949071209072e-05, + "loss": 2.6059, + "step": 10457 + }, + { + "epoch": 0.8439996771850536, + "grad_norm": 0.702953040599823, + "learning_rate": 9.357915271935662e-05, + "loss": 2.5445, + "step": 10458 + }, + { + "epoch": 0.8440803809216366, + "grad_norm": 0.6619930267333984, + "learning_rate": 9.356339847783036e-05, + "loss": 2.5688, + "step": 10459 + }, + { + "epoch": 0.8441610846582197, + "grad_norm": 0.7038032412528992, + "learning_rate": 9.354764439672106e-05, + "loss": 2.5195, + "step": 10460 + }, + { + "epoch": 0.8442417883948027, + "grad_norm": 0.6615132689476013, + "learning_rate": 9.353189047642129e-05, + "loss": 2.5176, + "step": 10461 + }, + { + "epoch": 0.8443224921313857, + "grad_norm": 0.6524826288223267, + "learning_rate": 9.351613671732372e-05, + "loss": 2.4294, + "step": 10462 + }, + { + "epoch": 0.8444031958679686, + "grad_norm": 0.6526279449462891, + "learning_rate": 9.350038311982099e-05, + "loss": 2.595, + "step": 10463 + }, + { + "epoch": 0.8444838996045517, + "grad_norm": 0.6610859632492065, + "learning_rate": 9.348462968430569e-05, + "loss": 2.5311, + "step": 10464 + }, + { + "epoch": 0.8445646033411347, + "grad_norm": 0.6835470795631409, + "learning_rate": 9.346887641117045e-05, + "loss": 2.5694, + "step": 10465 + }, + { + "epoch": 0.8446453070777177, + "grad_norm": 0.6768551468849182, + "learning_rate": 9.345312330080787e-05, + "loss": 2.6082, + "step": 10466 + }, + { + "epoch": 0.8447260108143007, + "grad_norm": 0.6368672847747803, + "learning_rate": 9.343737035361059e-05, + "loss": 2.5221, + "step": 10467 + }, + { + "epoch": 0.8448067145508837, + "grad_norm": 0.6952844858169556, + "learning_rate": 9.34216175699712e-05, + "loss": 2.5003, + "step": 10468 + }, + { + "epoch": 0.8448874182874667, + "grad_norm": 0.6663931012153625, + "learning_rate": 9.340586495028227e-05, + "loss": 2.5469, + "step": 10469 + }, + { + "epoch": 0.8449681220240497, + "grad_norm": 0.6840688586235046, + "learning_rate": 9.339011249493647e-05, + "loss": 2.5499, + "step": 10470 + }, + { + "epoch": 0.8450488257606327, + "grad_norm": 0.6832869052886963, + "learning_rate": 9.337436020432632e-05, + "loss": 2.5492, + "step": 10471 + }, + { + "epoch": 0.8451295294972158, + "grad_norm": 0.7444044947624207, + "learning_rate": 9.335860807884442e-05, + "loss": 2.5791, + "step": 10472 + }, + { + "epoch": 0.8452102332337987, + "grad_norm": 0.6821839809417725, + "learning_rate": 9.334285611888339e-05, + "loss": 2.4772, + "step": 10473 + }, + { + "epoch": 0.8452909369703817, + "grad_norm": 0.6209141612052917, + "learning_rate": 9.332710432483577e-05, + "loss": 2.5656, + "step": 10474 + }, + { + "epoch": 0.8453716407069647, + "grad_norm": 0.6531212329864502, + "learning_rate": 9.331135269709415e-05, + "loss": 2.5285, + "step": 10475 + }, + { + "epoch": 0.8454523444435478, + "grad_norm": 0.6418079137802124, + "learning_rate": 9.329560123605115e-05, + "loss": 2.5503, + "step": 10476 + }, + { + "epoch": 0.8455330481801308, + "grad_norm": 0.6636360287666321, + "learning_rate": 9.327984994209924e-05, + "loss": 2.528, + "step": 10477 + }, + { + "epoch": 0.8456137519167137, + "grad_norm": 0.6196488738059998, + "learning_rate": 9.326409881563102e-05, + "loss": 2.4907, + "step": 10478 + }, + { + "epoch": 0.8456944556532967, + "grad_norm": 0.6339137554168701, + "learning_rate": 9.324834785703913e-05, + "loss": 2.4672, + "step": 10479 + }, + { + "epoch": 0.8457751593898798, + "grad_norm": 0.6803932189941406, + "learning_rate": 9.323259706671602e-05, + "loss": 2.5538, + "step": 10480 + }, + { + "epoch": 0.8458558631264628, + "grad_norm": 0.6815275549888611, + "learning_rate": 9.321684644505429e-05, + "loss": 2.5291, + "step": 10481 + }, + { + "epoch": 0.8459365668630457, + "grad_norm": 0.6497374773025513, + "learning_rate": 9.320109599244646e-05, + "loss": 2.5499, + "step": 10482 + }, + { + "epoch": 0.8460172705996287, + "grad_norm": 0.7966926097869873, + "learning_rate": 9.318534570928512e-05, + "loss": 2.523, + "step": 10483 + }, + { + "epoch": 0.8460979743362118, + "grad_norm": 0.6532156467437744, + "learning_rate": 9.316959559596276e-05, + "loss": 2.5138, + "step": 10484 + }, + { + "epoch": 0.8461786780727948, + "grad_norm": 0.7292522192001343, + "learning_rate": 9.315384565287193e-05, + "loss": 2.5413, + "step": 10485 + }, + { + "epoch": 0.8462593818093778, + "grad_norm": 0.7610795497894287, + "learning_rate": 9.313809588040519e-05, + "loss": 2.5071, + "step": 10486 + }, + { + "epoch": 0.8463400855459607, + "grad_norm": 0.7038258910179138, + "learning_rate": 9.312234627895502e-05, + "loss": 2.5568, + "step": 10487 + }, + { + "epoch": 0.8464207892825438, + "grad_norm": 0.7136046290397644, + "learning_rate": 9.310659684891395e-05, + "loss": 2.5372, + "step": 10488 + }, + { + "epoch": 0.8465014930191268, + "grad_norm": 0.7512896060943604, + "learning_rate": 9.309084759067452e-05, + "loss": 2.5821, + "step": 10489 + }, + { + "epoch": 0.8465821967557098, + "grad_norm": 0.7436400651931763, + "learning_rate": 9.307509850462922e-05, + "loss": 2.5489, + "step": 10490 + }, + { + "epoch": 0.8466629004922928, + "grad_norm": 0.6858603954315186, + "learning_rate": 9.305934959117056e-05, + "loss": 2.5622, + "step": 10491 + }, + { + "epoch": 0.8467436042288758, + "grad_norm": 0.707185685634613, + "learning_rate": 9.304360085069107e-05, + "loss": 2.5275, + "step": 10492 + }, + { + "epoch": 0.8468243079654588, + "grad_norm": 0.7207933068275452, + "learning_rate": 9.302785228358322e-05, + "loss": 2.5877, + "step": 10493 + }, + { + "epoch": 0.8469050117020418, + "grad_norm": 0.6470080614089966, + "learning_rate": 9.30121038902395e-05, + "loss": 2.5117, + "step": 10494 + }, + { + "epoch": 0.8469857154386248, + "grad_norm": 0.75248783826828, + "learning_rate": 9.299635567105247e-05, + "loss": 2.5259, + "step": 10495 + }, + { + "epoch": 0.8470664191752079, + "grad_norm": 0.7150708436965942, + "learning_rate": 9.298060762641452e-05, + "loss": 2.551, + "step": 10496 + }, + { + "epoch": 0.8471471229117908, + "grad_norm": 0.6865069270133972, + "learning_rate": 9.296485975671818e-05, + "loss": 2.5184, + "step": 10497 + }, + { + "epoch": 0.8472278266483738, + "grad_norm": 0.7188237309455872, + "learning_rate": 9.294911206235593e-05, + "loss": 2.5207, + "step": 10498 + }, + { + "epoch": 0.8473085303849568, + "grad_norm": 0.6907880902290344, + "learning_rate": 9.293336454372026e-05, + "loss": 2.5544, + "step": 10499 + }, + { + "epoch": 0.8473892341215399, + "grad_norm": 0.7626079320907593, + "learning_rate": 9.291761720120358e-05, + "loss": 2.5741, + "step": 10500 + }, + { + "epoch": 0.8474699378581229, + "grad_norm": 0.6731963753700256, + "learning_rate": 9.29018700351984e-05, + "loss": 2.5433, + "step": 10501 + }, + { + "epoch": 0.8475506415947058, + "grad_norm": 0.7256288528442383, + "learning_rate": 9.288612304609723e-05, + "loss": 2.5131, + "step": 10502 + }, + { + "epoch": 0.8476313453312888, + "grad_norm": 0.7129119634628296, + "learning_rate": 9.287037623429242e-05, + "loss": 2.5054, + "step": 10503 + }, + { + "epoch": 0.8477120490678719, + "grad_norm": 0.6711156964302063, + "learning_rate": 9.285462960017644e-05, + "loss": 2.5671, + "step": 10504 + }, + { + "epoch": 0.8477927528044549, + "grad_norm": 0.7268081903457642, + "learning_rate": 9.283888314414184e-05, + "loss": 2.5627, + "step": 10505 + }, + { + "epoch": 0.8478734565410379, + "grad_norm": 0.8635050058364868, + "learning_rate": 9.282313686658094e-05, + "loss": 2.517, + "step": 10506 + }, + { + "epoch": 0.8479541602776208, + "grad_norm": 0.7077138423919678, + "learning_rate": 9.280739076788624e-05, + "loss": 2.5551, + "step": 10507 + }, + { + "epoch": 0.8480348640142038, + "grad_norm": 0.6312204599380493, + "learning_rate": 9.279164484845018e-05, + "loss": 2.5329, + "step": 10508 + }, + { + "epoch": 0.8481155677507869, + "grad_norm": 0.6749829649925232, + "learning_rate": 9.277589910866516e-05, + "loss": 2.5092, + "step": 10509 + }, + { + "epoch": 0.8481962714873699, + "grad_norm": 0.753391683101654, + "learning_rate": 9.27601535489236e-05, + "loss": 2.6244, + "step": 10510 + }, + { + "epoch": 0.8482769752239528, + "grad_norm": 0.7230119109153748, + "learning_rate": 9.2744408169618e-05, + "loss": 2.5021, + "step": 10511 + }, + { + "epoch": 0.8483576789605358, + "grad_norm": 0.6759157776832581, + "learning_rate": 9.272866297114067e-05, + "loss": 2.5399, + "step": 10512 + }, + { + "epoch": 0.8484383826971189, + "grad_norm": 0.7049473524093628, + "learning_rate": 9.271291795388406e-05, + "loss": 2.5024, + "step": 10513 + }, + { + "epoch": 0.8485190864337019, + "grad_norm": 0.6579850912094116, + "learning_rate": 9.269717311824058e-05, + "loss": 2.5019, + "step": 10514 + }, + { + "epoch": 0.8485997901702849, + "grad_norm": 0.7091391086578369, + "learning_rate": 9.268142846460265e-05, + "loss": 2.5785, + "step": 10515 + }, + { + "epoch": 0.8486804939068678, + "grad_norm": 0.6612898707389832, + "learning_rate": 9.266568399336266e-05, + "loss": 2.5046, + "step": 10516 + }, + { + "epoch": 0.8487611976434509, + "grad_norm": 0.6348623633384705, + "learning_rate": 9.264993970491298e-05, + "loss": 2.543, + "step": 10517 + }, + { + "epoch": 0.8488419013800339, + "grad_norm": 0.688360869884491, + "learning_rate": 9.263419559964604e-05, + "loss": 2.5294, + "step": 10518 + }, + { + "epoch": 0.8489226051166169, + "grad_norm": 0.6483190059661865, + "learning_rate": 9.261845167795418e-05, + "loss": 2.5623, + "step": 10519 + }, + { + "epoch": 0.8490033088531999, + "grad_norm": 0.689379096031189, + "learning_rate": 9.26027079402298e-05, + "loss": 2.4871, + "step": 10520 + }, + { + "epoch": 0.8490840125897829, + "grad_norm": 0.6627655625343323, + "learning_rate": 9.25869643868653e-05, + "loss": 2.5353, + "step": 10521 + }, + { + "epoch": 0.8491647163263659, + "grad_norm": 0.6701192259788513, + "learning_rate": 9.2571221018253e-05, + "loss": 2.5003, + "step": 10522 + }, + { + "epoch": 0.8492454200629489, + "grad_norm": 0.7413944005966187, + "learning_rate": 9.255547783478529e-05, + "loss": 2.5473, + "step": 10523 + }, + { + "epoch": 0.8493261237995319, + "grad_norm": 0.6490365266799927, + "learning_rate": 9.253973483685455e-05, + "loss": 2.5168, + "step": 10524 + }, + { + "epoch": 0.849406827536115, + "grad_norm": 0.7303688526153564, + "learning_rate": 9.25239920248531e-05, + "loss": 2.5953, + "step": 10525 + }, + { + "epoch": 0.8494875312726979, + "grad_norm": 0.7132991552352905, + "learning_rate": 9.250824939917331e-05, + "loss": 2.475, + "step": 10526 + }, + { + "epoch": 0.8495682350092809, + "grad_norm": 0.6935676336288452, + "learning_rate": 9.249250696020753e-05, + "loss": 2.5212, + "step": 10527 + }, + { + "epoch": 0.8496489387458639, + "grad_norm": 0.732961118221283, + "learning_rate": 9.247676470834814e-05, + "loss": 2.5848, + "step": 10528 + }, + { + "epoch": 0.849729642482447, + "grad_norm": 0.6899160146713257, + "learning_rate": 9.246102264398739e-05, + "loss": 2.4551, + "step": 10529 + }, + { + "epoch": 0.84981034621903, + "grad_norm": 0.6941123604774475, + "learning_rate": 9.244528076751766e-05, + "loss": 2.5441, + "step": 10530 + }, + { + "epoch": 0.8498910499556129, + "grad_norm": 0.7351016998291016, + "learning_rate": 9.242953907933134e-05, + "loss": 2.6519, + "step": 10531 + }, + { + "epoch": 0.8499717536921959, + "grad_norm": 0.7156691551208496, + "learning_rate": 9.241379757982065e-05, + "loss": 2.573, + "step": 10532 + }, + { + "epoch": 0.850052457428779, + "grad_norm": 0.7137688994407654, + "learning_rate": 9.239805626937797e-05, + "loss": 2.5688, + "step": 10533 + }, + { + "epoch": 0.850133161165362, + "grad_norm": 0.7018687129020691, + "learning_rate": 9.238231514839559e-05, + "loss": 2.5725, + "step": 10534 + }, + { + "epoch": 0.850213864901945, + "grad_norm": 0.6723659634590149, + "learning_rate": 9.236657421726583e-05, + "loss": 2.5661, + "step": 10535 + }, + { + "epoch": 0.8502945686385279, + "grad_norm": 0.7105850577354431, + "learning_rate": 9.235083347638098e-05, + "loss": 2.5676, + "step": 10536 + }, + { + "epoch": 0.850375272375111, + "grad_norm": 0.682601809501648, + "learning_rate": 9.233509292613341e-05, + "loss": 2.5489, + "step": 10537 + }, + { + "epoch": 0.850455976111694, + "grad_norm": 0.6703988313674927, + "learning_rate": 9.231935256691531e-05, + "loss": 2.5349, + "step": 10538 + }, + { + "epoch": 0.850536679848277, + "grad_norm": 0.6430882215499878, + "learning_rate": 9.230361239911903e-05, + "loss": 2.4959, + "step": 10539 + }, + { + "epoch": 0.8506173835848599, + "grad_norm": 0.7164519429206848, + "learning_rate": 9.228787242313687e-05, + "loss": 2.4999, + "step": 10540 + }, + { + "epoch": 0.850698087321443, + "grad_norm": 0.7463028430938721, + "learning_rate": 9.227213263936107e-05, + "loss": 2.545, + "step": 10541 + }, + { + "epoch": 0.850778791058026, + "grad_norm": 0.650577187538147, + "learning_rate": 9.22563930481839e-05, + "loss": 2.5707, + "step": 10542 + }, + { + "epoch": 0.850859494794609, + "grad_norm": 0.6808211207389832, + "learning_rate": 9.224065364999768e-05, + "loss": 2.5236, + "step": 10543 + }, + { + "epoch": 0.850940198531192, + "grad_norm": 0.6947758793830872, + "learning_rate": 9.222491444519467e-05, + "loss": 2.555, + "step": 10544 + }, + { + "epoch": 0.851020902267775, + "grad_norm": 0.6805624961853027, + "learning_rate": 9.22091754341671e-05, + "loss": 2.517, + "step": 10545 + }, + { + "epoch": 0.851101606004358, + "grad_norm": 0.6645655035972595, + "learning_rate": 9.219343661730724e-05, + "loss": 2.5237, + "step": 10546 + }, + { + "epoch": 0.851182309740941, + "grad_norm": 0.6912586092948914, + "learning_rate": 9.217769799500738e-05, + "loss": 2.5345, + "step": 10547 + }, + { + "epoch": 0.851263013477524, + "grad_norm": 0.6713781356811523, + "learning_rate": 9.21619595676597e-05, + "loss": 2.56, + "step": 10548 + }, + { + "epoch": 0.8513437172141071, + "grad_norm": 0.7031502723693848, + "learning_rate": 9.214622133565648e-05, + "loss": 2.4885, + "step": 10549 + }, + { + "epoch": 0.85142442095069, + "grad_norm": 0.6616455316543579, + "learning_rate": 9.213048329938997e-05, + "loss": 2.5101, + "step": 10550 + }, + { + "epoch": 0.851505124687273, + "grad_norm": 0.711077094078064, + "learning_rate": 9.211474545925236e-05, + "loss": 2.6264, + "step": 10551 + }, + { + "epoch": 0.851585828423856, + "grad_norm": 0.7534502744674683, + "learning_rate": 9.209900781563592e-05, + "loss": 2.5417, + "step": 10552 + }, + { + "epoch": 0.8516665321604391, + "grad_norm": 0.7405222058296204, + "learning_rate": 9.208327036893288e-05, + "loss": 2.546, + "step": 10553 + }, + { + "epoch": 0.8517472358970221, + "grad_norm": 0.7014057040214539, + "learning_rate": 9.20675331195354e-05, + "loss": 2.5211, + "step": 10554 + }, + { + "epoch": 0.851827939633605, + "grad_norm": 0.6984074115753174, + "learning_rate": 9.205179606783573e-05, + "loss": 2.5181, + "step": 10555 + }, + { + "epoch": 0.851908643370188, + "grad_norm": 0.7312670350074768, + "learning_rate": 9.203605921422613e-05, + "loss": 2.5345, + "step": 10556 + }, + { + "epoch": 0.851989347106771, + "grad_norm": 0.6861104369163513, + "learning_rate": 9.202032255909871e-05, + "loss": 2.5426, + "step": 10557 + }, + { + "epoch": 0.8520700508433541, + "grad_norm": 0.6989030838012695, + "learning_rate": 9.200458610284571e-05, + "loss": 2.5221, + "step": 10558 + }, + { + "epoch": 0.852150754579937, + "grad_norm": 0.6645115613937378, + "learning_rate": 9.198884984585932e-05, + "loss": 2.4755, + "step": 10559 + }, + { + "epoch": 0.85223145831652, + "grad_norm": 0.6577785015106201, + "learning_rate": 9.197311378853176e-05, + "loss": 2.5491, + "step": 10560 + }, + { + "epoch": 0.852312162053103, + "grad_norm": 0.7311568856239319, + "learning_rate": 9.195737793125517e-05, + "loss": 2.5653, + "step": 10561 + }, + { + "epoch": 0.8523928657896861, + "grad_norm": 0.6469970345497131, + "learning_rate": 9.194164227442174e-05, + "loss": 2.5384, + "step": 10562 + }, + { + "epoch": 0.8524735695262691, + "grad_norm": 0.6562933325767517, + "learning_rate": 9.19259068184237e-05, + "loss": 2.5644, + "step": 10563 + }, + { + "epoch": 0.852554273262852, + "grad_norm": 0.7740273475646973, + "learning_rate": 9.19101715636531e-05, + "loss": 2.5868, + "step": 10564 + }, + { + "epoch": 0.852634976999435, + "grad_norm": 0.6461195349693298, + "learning_rate": 9.18944365105022e-05, + "loss": 2.4862, + "step": 10565 + }, + { + "epoch": 0.8527156807360181, + "grad_norm": 0.7230537533760071, + "learning_rate": 9.187870165936313e-05, + "loss": 2.5125, + "step": 10566 + }, + { + "epoch": 0.8527963844726011, + "grad_norm": 0.6858233213424683, + "learning_rate": 9.186296701062805e-05, + "loss": 2.5463, + "step": 10567 + }, + { + "epoch": 0.8528770882091841, + "grad_norm": 0.717407763004303, + "learning_rate": 9.184723256468908e-05, + "loss": 2.5399, + "step": 10568 + }, + { + "epoch": 0.852957791945767, + "grad_norm": 0.7537745237350464, + "learning_rate": 9.18314983219384e-05, + "loss": 2.5164, + "step": 10569 + }, + { + "epoch": 0.8530384956823501, + "grad_norm": 0.7068665027618408, + "learning_rate": 9.181576428276814e-05, + "loss": 2.5747, + "step": 10570 + }, + { + "epoch": 0.8531191994189331, + "grad_norm": 0.8013456463813782, + "learning_rate": 9.18000304475704e-05, + "loss": 2.5401, + "step": 10571 + }, + { + "epoch": 0.8531999031555161, + "grad_norm": 0.6458969712257385, + "learning_rate": 9.178429681673741e-05, + "loss": 2.4781, + "step": 10572 + }, + { + "epoch": 0.8532806068920991, + "grad_norm": 0.7235112190246582, + "learning_rate": 9.176856339066114e-05, + "loss": 2.5753, + "step": 10573 + }, + { + "epoch": 0.8533613106286821, + "grad_norm": 0.6815706491470337, + "learning_rate": 9.175283016973382e-05, + "loss": 2.5526, + "step": 10574 + }, + { + "epoch": 0.8534420143652651, + "grad_norm": 0.739747166633606, + "learning_rate": 9.173709715434751e-05, + "loss": 2.5631, + "step": 10575 + }, + { + "epoch": 0.8535227181018481, + "grad_norm": 0.7325060963630676, + "learning_rate": 9.172136434489437e-05, + "loss": 2.4925, + "step": 10576 + }, + { + "epoch": 0.8536034218384311, + "grad_norm": 0.6505454182624817, + "learning_rate": 9.170563174176645e-05, + "loss": 2.5423, + "step": 10577 + }, + { + "epoch": 0.8536841255750142, + "grad_norm": 0.7267098426818848, + "learning_rate": 9.168989934535586e-05, + "loss": 2.5687, + "step": 10578 + }, + { + "epoch": 0.8537648293115971, + "grad_norm": 0.7264497876167297, + "learning_rate": 9.167416715605476e-05, + "loss": 2.5165, + "step": 10579 + }, + { + "epoch": 0.8538455330481801, + "grad_norm": 0.7473852634429932, + "learning_rate": 9.165843517425509e-05, + "loss": 2.5837, + "step": 10580 + }, + { + "epoch": 0.8539262367847631, + "grad_norm": 0.7249133586883545, + "learning_rate": 9.164270340034906e-05, + "loss": 2.5805, + "step": 10581 + }, + { + "epoch": 0.8540069405213462, + "grad_norm": 0.7463760375976562, + "learning_rate": 9.162697183472875e-05, + "loss": 2.5067, + "step": 10582 + }, + { + "epoch": 0.8540876442579292, + "grad_norm": 0.7125511169433594, + "learning_rate": 9.161124047778614e-05, + "loss": 2.5093, + "step": 10583 + }, + { + "epoch": 0.8541683479945121, + "grad_norm": 0.7247455716133118, + "learning_rate": 9.159550932991335e-05, + "loss": 2.5356, + "step": 10584 + }, + { + "epoch": 0.8542490517310951, + "grad_norm": 0.7593860030174255, + "learning_rate": 9.157977839150246e-05, + "loss": 2.5477, + "step": 10585 + }, + { + "epoch": 0.8543297554676782, + "grad_norm": 0.6758295297622681, + "learning_rate": 9.156404766294547e-05, + "loss": 2.4748, + "step": 10586 + }, + { + "epoch": 0.8544104592042612, + "grad_norm": 0.7114073634147644, + "learning_rate": 9.154831714463447e-05, + "loss": 2.5479, + "step": 10587 + }, + { + "epoch": 0.8544911629408442, + "grad_norm": 0.6881263256072998, + "learning_rate": 9.153258683696156e-05, + "loss": 2.5471, + "step": 10588 + }, + { + "epoch": 0.8545718666774271, + "grad_norm": 0.6509317755699158, + "learning_rate": 9.151685674031866e-05, + "loss": 2.5239, + "step": 10589 + }, + { + "epoch": 0.8546525704140102, + "grad_norm": 0.7754644751548767, + "learning_rate": 9.150112685509787e-05, + "loss": 2.5572, + "step": 10590 + }, + { + "epoch": 0.8547332741505932, + "grad_norm": 0.707080602645874, + "learning_rate": 9.148539718169118e-05, + "loss": 2.5572, + "step": 10591 + }, + { + "epoch": 0.8548139778871762, + "grad_norm": 0.6996685266494751, + "learning_rate": 9.146966772049073e-05, + "loss": 2.4968, + "step": 10592 + }, + { + "epoch": 0.8548946816237591, + "grad_norm": 0.6830589771270752, + "learning_rate": 9.145393847188841e-05, + "loss": 2.5795, + "step": 10593 + }, + { + "epoch": 0.8549753853603422, + "grad_norm": 0.7507784366607666, + "learning_rate": 9.143820943627628e-05, + "loss": 2.6135, + "step": 10594 + }, + { + "epoch": 0.8550560890969252, + "grad_norm": 0.673218309879303, + "learning_rate": 9.142248061404638e-05, + "loss": 2.5875, + "step": 10595 + }, + { + "epoch": 0.8551367928335082, + "grad_norm": 0.6861804723739624, + "learning_rate": 9.140675200559065e-05, + "loss": 2.5892, + "step": 10596 + }, + { + "epoch": 0.8552174965700912, + "grad_norm": 0.6928709149360657, + "learning_rate": 9.139102361130114e-05, + "loss": 2.5303, + "step": 10597 + }, + { + "epoch": 0.8552982003066743, + "grad_norm": 0.6958343386650085, + "learning_rate": 9.137529543156986e-05, + "loss": 2.5567, + "step": 10598 + }, + { + "epoch": 0.8553789040432572, + "grad_norm": 0.703845739364624, + "learning_rate": 9.135956746678873e-05, + "loss": 2.5215, + "step": 10599 + }, + { + "epoch": 0.8554596077798402, + "grad_norm": 0.7108649015426636, + "learning_rate": 9.134383971734975e-05, + "loss": 2.5687, + "step": 10600 + }, + { + "epoch": 0.8555403115164232, + "grad_norm": 0.7249850034713745, + "learning_rate": 9.132811218364495e-05, + "loss": 2.565, + "step": 10601 + }, + { + "epoch": 0.8556210152530063, + "grad_norm": 0.7060014009475708, + "learning_rate": 9.131238486606623e-05, + "loss": 2.5366, + "step": 10602 + }, + { + "epoch": 0.8557017189895892, + "grad_norm": 0.6915088891983032, + "learning_rate": 9.129665776500559e-05, + "loss": 2.527, + "step": 10603 + }, + { + "epoch": 0.8557824227261722, + "grad_norm": 0.7226938605308533, + "learning_rate": 9.128093088085503e-05, + "loss": 2.5999, + "step": 10604 + }, + { + "epoch": 0.8558631264627552, + "grad_norm": 0.6802428364753723, + "learning_rate": 9.126520421400641e-05, + "loss": 2.4788, + "step": 10605 + }, + { + "epoch": 0.8559438301993383, + "grad_norm": 0.7855350375175476, + "learning_rate": 9.124947776485175e-05, + "loss": 2.5349, + "step": 10606 + }, + { + "epoch": 0.8560245339359213, + "grad_norm": 0.6758337020874023, + "learning_rate": 9.123375153378296e-05, + "loss": 2.5874, + "step": 10607 + }, + { + "epoch": 0.8561052376725042, + "grad_norm": 0.675061821937561, + "learning_rate": 9.121802552119206e-05, + "loss": 2.5343, + "step": 10608 + }, + { + "epoch": 0.8561859414090872, + "grad_norm": 0.7044726014137268, + "learning_rate": 9.120229972747087e-05, + "loss": 2.5361, + "step": 10609 + }, + { + "epoch": 0.8562666451456702, + "grad_norm": 0.6324402689933777, + "learning_rate": 9.118657415301137e-05, + "loss": 2.5039, + "step": 10610 + }, + { + "epoch": 0.8563473488822533, + "grad_norm": 0.6621509790420532, + "learning_rate": 9.11708487982055e-05, + "loss": 2.5346, + "step": 10611 + }, + { + "epoch": 0.8564280526188363, + "grad_norm": 0.6709887981414795, + "learning_rate": 9.115512366344516e-05, + "loss": 2.5409, + "step": 10612 + }, + { + "epoch": 0.8565087563554192, + "grad_norm": 0.7237712740898132, + "learning_rate": 9.113939874912223e-05, + "loss": 2.5051, + "step": 10613 + }, + { + "epoch": 0.8565894600920022, + "grad_norm": 0.6646109223365784, + "learning_rate": 9.11236740556287e-05, + "loss": 2.5866, + "step": 10614 + }, + { + "epoch": 0.8566701638285853, + "grad_norm": 0.7131930589675903, + "learning_rate": 9.110794958335637e-05, + "loss": 2.5472, + "step": 10615 + }, + { + "epoch": 0.8567508675651683, + "grad_norm": 0.6662428975105286, + "learning_rate": 9.109222533269715e-05, + "loss": 2.4863, + "step": 10616 + }, + { + "epoch": 0.8568315713017512, + "grad_norm": 0.6527226567268372, + "learning_rate": 9.107650130404304e-05, + "loss": 2.5594, + "step": 10617 + }, + { + "epoch": 0.8569122750383342, + "grad_norm": 0.6639060378074646, + "learning_rate": 9.106077749778578e-05, + "loss": 2.5519, + "step": 10618 + }, + { + "epoch": 0.8569929787749173, + "grad_norm": 0.7088096737861633, + "learning_rate": 9.104505391431734e-05, + "loss": 2.5404, + "step": 10619 + }, + { + "epoch": 0.8570736825115003, + "grad_norm": 0.7155873775482178, + "learning_rate": 9.102933055402957e-05, + "loss": 2.5636, + "step": 10620 + }, + { + "epoch": 0.8571543862480833, + "grad_norm": 0.6522316932678223, + "learning_rate": 9.101360741731431e-05, + "loss": 2.5216, + "step": 10621 + }, + { + "epoch": 0.8572350899846662, + "grad_norm": 0.6515649557113647, + "learning_rate": 9.099788450456345e-05, + "loss": 2.5804, + "step": 10622 + }, + { + "epoch": 0.8573157937212493, + "grad_norm": 0.6791853904724121, + "learning_rate": 9.098216181616883e-05, + "loss": 2.5353, + "step": 10623 + }, + { + "epoch": 0.8573964974578323, + "grad_norm": 0.6946877241134644, + "learning_rate": 9.096643935252236e-05, + "loss": 2.5492, + "step": 10624 + }, + { + "epoch": 0.8574772011944153, + "grad_norm": 0.7235898375511169, + "learning_rate": 9.095071711401581e-05, + "loss": 2.5178, + "step": 10625 + }, + { + "epoch": 0.8575579049309983, + "grad_norm": 0.6740610003471375, + "learning_rate": 9.093499510104102e-05, + "loss": 2.5699, + "step": 10626 + }, + { + "epoch": 0.8576386086675813, + "grad_norm": 0.7441792488098145, + "learning_rate": 9.091927331398988e-05, + "loss": 2.579, + "step": 10627 + }, + { + "epoch": 0.8577193124041643, + "grad_norm": 0.6986937522888184, + "learning_rate": 9.090355175325416e-05, + "loss": 2.5556, + "step": 10628 + }, + { + "epoch": 0.8578000161407473, + "grad_norm": 0.6960151791572571, + "learning_rate": 9.08878304192257e-05, + "loss": 2.5448, + "step": 10629 + }, + { + "epoch": 0.8578807198773303, + "grad_norm": 0.6376819014549255, + "learning_rate": 9.087210931229636e-05, + "loss": 2.4636, + "step": 10630 + }, + { + "epoch": 0.8579614236139134, + "grad_norm": 0.752473771572113, + "learning_rate": 9.08563884328579e-05, + "loss": 2.5451, + "step": 10631 + }, + { + "epoch": 0.8580421273504963, + "grad_norm": 0.6879361867904663, + "learning_rate": 9.084066778130213e-05, + "loss": 2.5365, + "step": 10632 + }, + { + "epoch": 0.8581228310870793, + "grad_norm": 0.6630483865737915, + "learning_rate": 9.082494735802091e-05, + "loss": 2.5085, + "step": 10633 + }, + { + "epoch": 0.8582035348236623, + "grad_norm": 0.689602792263031, + "learning_rate": 9.080922716340594e-05, + "loss": 2.5087, + "step": 10634 + }, + { + "epoch": 0.8582842385602454, + "grad_norm": 0.7333599925041199, + "learning_rate": 9.079350719784905e-05, + "loss": 2.5476, + "step": 10635 + }, + { + "epoch": 0.8583649422968284, + "grad_norm": 0.6895802021026611, + "learning_rate": 9.077778746174204e-05, + "loss": 2.5099, + "step": 10636 + }, + { + "epoch": 0.8584456460334113, + "grad_norm": 0.7202162146568298, + "learning_rate": 9.076206795547668e-05, + "loss": 2.5197, + "step": 10637 + }, + { + "epoch": 0.8585263497699943, + "grad_norm": 0.6454200148582458, + "learning_rate": 9.074634867944472e-05, + "loss": 2.5303, + "step": 10638 + }, + { + "epoch": 0.8586070535065774, + "grad_norm": 0.6842506527900696, + "learning_rate": 9.073062963403795e-05, + "loss": 2.5051, + "step": 10639 + }, + { + "epoch": 0.8586877572431604, + "grad_norm": 0.6979129314422607, + "learning_rate": 9.071491081964815e-05, + "loss": 2.5209, + "step": 10640 + }, + { + "epoch": 0.8587684609797434, + "grad_norm": 0.6851540803909302, + "learning_rate": 9.0699192236667e-05, + "loss": 2.5003, + "step": 10641 + }, + { + "epoch": 0.8588491647163263, + "grad_norm": 0.7528585195541382, + "learning_rate": 9.068347388548627e-05, + "loss": 2.5524, + "step": 10642 + }, + { + "epoch": 0.8589298684529094, + "grad_norm": 0.6297397613525391, + "learning_rate": 9.06677557664978e-05, + "loss": 2.5412, + "step": 10643 + }, + { + "epoch": 0.8590105721894924, + "grad_norm": 0.7034026980400085, + "learning_rate": 9.06520378800932e-05, + "loss": 2.4958, + "step": 10644 + }, + { + "epoch": 0.8590912759260754, + "grad_norm": 0.690258800983429, + "learning_rate": 9.063632022666425e-05, + "loss": 2.4894, + "step": 10645 + }, + { + "epoch": 0.8591719796626583, + "grad_norm": 0.6449949145317078, + "learning_rate": 9.06206028066027e-05, + "loss": 2.507, + "step": 10646 + }, + { + "epoch": 0.8592526833992414, + "grad_norm": 0.6328588724136353, + "learning_rate": 9.060488562030023e-05, + "loss": 2.5503, + "step": 10647 + }, + { + "epoch": 0.8593333871358244, + "grad_norm": 0.6570547819137573, + "learning_rate": 9.058916866814858e-05, + "loss": 2.4993, + "step": 10648 + }, + { + "epoch": 0.8594140908724074, + "grad_norm": 0.7689602375030518, + "learning_rate": 9.057345195053945e-05, + "loss": 2.5498, + "step": 10649 + }, + { + "epoch": 0.8594947946089904, + "grad_norm": 0.6727081537246704, + "learning_rate": 9.055773546786454e-05, + "loss": 2.5172, + "step": 10650 + }, + { + "epoch": 0.8595754983455735, + "grad_norm": 0.694722056388855, + "learning_rate": 9.054201922051552e-05, + "loss": 2.5485, + "step": 10651 + }, + { + "epoch": 0.8596562020821564, + "grad_norm": 0.6638815999031067, + "learning_rate": 9.052630320888411e-05, + "loss": 2.5134, + "step": 10652 + }, + { + "epoch": 0.8597369058187394, + "grad_norm": 0.6600833535194397, + "learning_rate": 9.0510587433362e-05, + "loss": 2.5206, + "step": 10653 + }, + { + "epoch": 0.8598176095553224, + "grad_norm": 0.7193894386291504, + "learning_rate": 9.049487189434084e-05, + "loss": 2.5485, + "step": 10654 + }, + { + "epoch": 0.8598983132919055, + "grad_norm": 0.6651753187179565, + "learning_rate": 9.047915659221233e-05, + "loss": 2.5703, + "step": 10655 + }, + { + "epoch": 0.8599790170284884, + "grad_norm": 0.7346364855766296, + "learning_rate": 9.046344152736815e-05, + "loss": 2.5301, + "step": 10656 + }, + { + "epoch": 0.8600597207650714, + "grad_norm": 0.6681811809539795, + "learning_rate": 9.04477267001999e-05, + "loss": 2.5124, + "step": 10657 + }, + { + "epoch": 0.8601404245016544, + "grad_norm": 0.6928461790084839, + "learning_rate": 9.043201211109929e-05, + "loss": 2.5153, + "step": 10658 + }, + { + "epoch": 0.8602211282382374, + "grad_norm": 0.6957700252532959, + "learning_rate": 9.041629776045797e-05, + "loss": 2.4697, + "step": 10659 + }, + { + "epoch": 0.8603018319748205, + "grad_norm": 0.6361939311027527, + "learning_rate": 9.040058364866752e-05, + "loss": 2.5162, + "step": 10660 + }, + { + "epoch": 0.8603825357114034, + "grad_norm": 0.6827390193939209, + "learning_rate": 9.038486977611964e-05, + "loss": 2.4856, + "step": 10661 + }, + { + "epoch": 0.8604632394479864, + "grad_norm": 0.6638801097869873, + "learning_rate": 9.036915614320595e-05, + "loss": 2.5224, + "step": 10662 + }, + { + "epoch": 0.8605439431845694, + "grad_norm": 0.7249652743339539, + "learning_rate": 9.035344275031802e-05, + "loss": 2.5461, + "step": 10663 + }, + { + "epoch": 0.8606246469211525, + "grad_norm": 0.6693316102027893, + "learning_rate": 9.033772959784754e-05, + "loss": 2.5676, + "step": 10664 + }, + { + "epoch": 0.8607053506577355, + "grad_norm": 0.6787340641021729, + "learning_rate": 9.032201668618614e-05, + "loss": 2.5374, + "step": 10665 + }, + { + "epoch": 0.8607860543943184, + "grad_norm": 0.6581670641899109, + "learning_rate": 9.030630401572533e-05, + "loss": 2.5052, + "step": 10666 + }, + { + "epoch": 0.8608667581309014, + "grad_norm": 0.6975873112678528, + "learning_rate": 9.029059158685675e-05, + "loss": 2.4823, + "step": 10667 + }, + { + "epoch": 0.8609474618674845, + "grad_norm": 0.6632521748542786, + "learning_rate": 9.027487939997201e-05, + "loss": 2.5992, + "step": 10668 + }, + { + "epoch": 0.8610281656040675, + "grad_norm": 0.6793977618217468, + "learning_rate": 9.025916745546276e-05, + "loss": 2.5308, + "step": 10669 + }, + { + "epoch": 0.8611088693406505, + "grad_norm": 0.6499481797218323, + "learning_rate": 9.024345575372046e-05, + "loss": 2.4964, + "step": 10670 + }, + { + "epoch": 0.8611895730772334, + "grad_norm": 0.6858868598937988, + "learning_rate": 9.022774429513677e-05, + "loss": 2.5388, + "step": 10671 + }, + { + "epoch": 0.8612702768138165, + "grad_norm": 0.7586160898208618, + "learning_rate": 9.021203308010324e-05, + "loss": 2.5166, + "step": 10672 + }, + { + "epoch": 0.8613509805503995, + "grad_norm": 0.7179701328277588, + "learning_rate": 9.019632210901141e-05, + "loss": 2.5501, + "step": 10673 + }, + { + "epoch": 0.8614316842869825, + "grad_norm": 0.6830369830131531, + "learning_rate": 9.018061138225287e-05, + "loss": 2.4956, + "step": 10674 + }, + { + "epoch": 0.8615123880235654, + "grad_norm": 0.6710512042045593, + "learning_rate": 9.01649009002192e-05, + "loss": 2.5722, + "step": 10675 + }, + { + "epoch": 0.8615930917601485, + "grad_norm": 0.640011727809906, + "learning_rate": 9.014919066330186e-05, + "loss": 2.5197, + "step": 10676 + }, + { + "epoch": 0.8616737954967315, + "grad_norm": 0.6803860664367676, + "learning_rate": 9.013348067189245e-05, + "loss": 2.4794, + "step": 10677 + }, + { + "epoch": 0.8617544992333145, + "grad_norm": 0.6734865307807922, + "learning_rate": 9.011777092638251e-05, + "loss": 2.5831, + "step": 10678 + }, + { + "epoch": 0.8618352029698975, + "grad_norm": 0.6525718569755554, + "learning_rate": 9.010206142716353e-05, + "loss": 2.4925, + "step": 10679 + }, + { + "epoch": 0.8619159067064806, + "grad_norm": 0.6886672377586365, + "learning_rate": 9.008635217462706e-05, + "loss": 2.491, + "step": 10680 + }, + { + "epoch": 0.8619966104430635, + "grad_norm": 0.6397131085395813, + "learning_rate": 9.007064316916461e-05, + "loss": 2.4684, + "step": 10681 + }, + { + "epoch": 0.8620773141796465, + "grad_norm": 0.6308462023735046, + "learning_rate": 9.005493441116768e-05, + "loss": 2.504, + "step": 10682 + }, + { + "epoch": 0.8621580179162295, + "grad_norm": 0.7223808169364929, + "learning_rate": 9.003922590102778e-05, + "loss": 2.5342, + "step": 10683 + }, + { + "epoch": 0.8622387216528126, + "grad_norm": 0.687515914440155, + "learning_rate": 9.002351763913642e-05, + "loss": 2.4822, + "step": 10684 + }, + { + "epoch": 0.8623194253893955, + "grad_norm": 0.6888468265533447, + "learning_rate": 9.00078096258851e-05, + "loss": 2.5497, + "step": 10685 + }, + { + "epoch": 0.8624001291259785, + "grad_norm": 0.7429301738739014, + "learning_rate": 8.999210186166525e-05, + "loss": 2.624, + "step": 10686 + }, + { + "epoch": 0.8624808328625615, + "grad_norm": 0.6901945471763611, + "learning_rate": 8.997639434686839e-05, + "loss": 2.5268, + "step": 10687 + }, + { + "epoch": 0.8625615365991446, + "grad_norm": 0.7396681308746338, + "learning_rate": 8.9960687081886e-05, + "loss": 2.5427, + "step": 10688 + }, + { + "epoch": 0.8626422403357276, + "grad_norm": 0.6825531125068665, + "learning_rate": 8.99449800671095e-05, + "loss": 2.5722, + "step": 10689 + }, + { + "epoch": 0.8627229440723105, + "grad_norm": 0.6719860434532166, + "learning_rate": 8.992927330293039e-05, + "loss": 2.4939, + "step": 10690 + }, + { + "epoch": 0.8628036478088935, + "grad_norm": 0.644567608833313, + "learning_rate": 8.991356678974017e-05, + "loss": 2.5495, + "step": 10691 + }, + { + "epoch": 0.8628843515454766, + "grad_norm": 0.7066643834114075, + "learning_rate": 8.989786052793015e-05, + "loss": 2.5508, + "step": 10692 + }, + { + "epoch": 0.8629650552820596, + "grad_norm": 0.6697196364402771, + "learning_rate": 8.988215451789187e-05, + "loss": 2.5231, + "step": 10693 + }, + { + "epoch": 0.8630457590186426, + "grad_norm": 0.7143658399581909, + "learning_rate": 8.986644876001681e-05, + "loss": 2.5368, + "step": 10694 + }, + { + "epoch": 0.8631264627552255, + "grad_norm": 0.7597684264183044, + "learning_rate": 8.985074325469628e-05, + "loss": 2.5983, + "step": 10695 + }, + { + "epoch": 0.8632071664918086, + "grad_norm": 0.7418014407157898, + "learning_rate": 8.983503800232176e-05, + "loss": 2.5736, + "step": 10696 + }, + { + "epoch": 0.8632878702283916, + "grad_norm": 0.654435932636261, + "learning_rate": 8.981933300328468e-05, + "loss": 2.5389, + "step": 10697 + }, + { + "epoch": 0.8633685739649746, + "grad_norm": 0.658203661441803, + "learning_rate": 8.980362825797643e-05, + "loss": 2.5204, + "step": 10698 + }, + { + "epoch": 0.8634492777015575, + "grad_norm": 0.7132784724235535, + "learning_rate": 8.97879237667884e-05, + "loss": 2.4982, + "step": 10699 + }, + { + "epoch": 0.8635299814381406, + "grad_norm": 0.6901868581771851, + "learning_rate": 8.9772219530112e-05, + "loss": 2.5599, + "step": 10700 + }, + { + "epoch": 0.8636106851747236, + "grad_norm": 0.6241179704666138, + "learning_rate": 8.975651554833869e-05, + "loss": 2.5185, + "step": 10701 + }, + { + "epoch": 0.8636913889113066, + "grad_norm": 0.693692147731781, + "learning_rate": 8.974081182185974e-05, + "loss": 2.506, + "step": 10702 + }, + { + "epoch": 0.8637720926478896, + "grad_norm": 0.6699246168136597, + "learning_rate": 8.972510835106658e-05, + "loss": 2.557, + "step": 10703 + }, + { + "epoch": 0.8638527963844727, + "grad_norm": 0.7339062094688416, + "learning_rate": 8.970940513635059e-05, + "loss": 2.5614, + "step": 10704 + }, + { + "epoch": 0.8639335001210556, + "grad_norm": 0.7558815479278564, + "learning_rate": 8.969370217810311e-05, + "loss": 2.5949, + "step": 10705 + }, + { + "epoch": 0.8640142038576386, + "grad_norm": 0.6992602348327637, + "learning_rate": 8.96779994767155e-05, + "loss": 2.4755, + "step": 10706 + }, + { + "epoch": 0.8640949075942216, + "grad_norm": 0.6836397647857666, + "learning_rate": 8.966229703257915e-05, + "loss": 2.5172, + "step": 10707 + }, + { + "epoch": 0.8641756113308047, + "grad_norm": 0.7054563760757446, + "learning_rate": 8.964659484608537e-05, + "loss": 2.5186, + "step": 10708 + }, + { + "epoch": 0.8642563150673876, + "grad_norm": 0.7096611261367798, + "learning_rate": 8.963089291762551e-05, + "loss": 2.5157, + "step": 10709 + }, + { + "epoch": 0.8643370188039706, + "grad_norm": 0.657465934753418, + "learning_rate": 8.961519124759094e-05, + "loss": 2.5332, + "step": 10710 + }, + { + "epoch": 0.8644177225405536, + "grad_norm": 0.7490121126174927, + "learning_rate": 8.959948983637291e-05, + "loss": 2.512, + "step": 10711 + }, + { + "epoch": 0.8644984262771366, + "grad_norm": 0.7074166536331177, + "learning_rate": 8.958378868436279e-05, + "loss": 2.4745, + "step": 10712 + }, + { + "epoch": 0.8645791300137197, + "grad_norm": 0.7496227025985718, + "learning_rate": 8.956808779195188e-05, + "loss": 2.5533, + "step": 10713 + }, + { + "epoch": 0.8646598337503026, + "grad_norm": 0.6624657511711121, + "learning_rate": 8.95523871595315e-05, + "loss": 2.5346, + "step": 10714 + }, + { + "epoch": 0.8647405374868856, + "grad_norm": 0.6829125881195068, + "learning_rate": 8.953668678749292e-05, + "loss": 2.558, + "step": 10715 + }, + { + "epoch": 0.8648212412234686, + "grad_norm": 0.6954498887062073, + "learning_rate": 8.952098667622745e-05, + "loss": 2.5617, + "step": 10716 + }, + { + "epoch": 0.8649019449600517, + "grad_norm": 0.6722636818885803, + "learning_rate": 8.950528682612645e-05, + "loss": 2.5565, + "step": 10717 + }, + { + "epoch": 0.8649826486966347, + "grad_norm": 0.6793767213821411, + "learning_rate": 8.948958723758107e-05, + "loss": 2.5803, + "step": 10718 + }, + { + "epoch": 0.8650633524332176, + "grad_norm": 0.7159373760223389, + "learning_rate": 8.947388791098266e-05, + "loss": 2.5465, + "step": 10719 + }, + { + "epoch": 0.8651440561698006, + "grad_norm": 0.6823835372924805, + "learning_rate": 8.945818884672253e-05, + "loss": 2.5079, + "step": 10720 + }, + { + "epoch": 0.8652247599063837, + "grad_norm": 0.7521452903747559, + "learning_rate": 8.944249004519185e-05, + "loss": 2.5628, + "step": 10721 + }, + { + "epoch": 0.8653054636429667, + "grad_norm": 0.6774886846542358, + "learning_rate": 8.94267915067819e-05, + "loss": 2.6042, + "step": 10722 + }, + { + "epoch": 0.8653861673795497, + "grad_norm": 0.6915935277938843, + "learning_rate": 8.941109323188398e-05, + "loss": 2.5563, + "step": 10723 + }, + { + "epoch": 0.8654668711161326, + "grad_norm": 0.6609061360359192, + "learning_rate": 8.939539522088927e-05, + "loss": 2.5083, + "step": 10724 + }, + { + "epoch": 0.8655475748527157, + "grad_norm": 0.6457223892211914, + "learning_rate": 8.937969747418903e-05, + "loss": 2.573, + "step": 10725 + }, + { + "epoch": 0.8656282785892987, + "grad_norm": 0.6960360407829285, + "learning_rate": 8.936399999217455e-05, + "loss": 2.516, + "step": 10726 + }, + { + "epoch": 0.8657089823258817, + "grad_norm": 0.7269721627235413, + "learning_rate": 8.934830277523693e-05, + "loss": 2.5932, + "step": 10727 + }, + { + "epoch": 0.8657896860624646, + "grad_norm": 0.7057532668113708, + "learning_rate": 8.933260582376745e-05, + "loss": 2.5022, + "step": 10728 + }, + { + "epoch": 0.8658703897990477, + "grad_norm": 0.6698749661445618, + "learning_rate": 8.931690913815735e-05, + "loss": 2.5357, + "step": 10729 + }, + { + "epoch": 0.8659510935356307, + "grad_norm": 0.6616599559783936, + "learning_rate": 8.930121271879777e-05, + "loss": 2.4776, + "step": 10730 + }, + { + "epoch": 0.8660317972722137, + "grad_norm": 0.7457093000411987, + "learning_rate": 8.928551656607993e-05, + "loss": 2.5799, + "step": 10731 + }, + { + "epoch": 0.8661125010087967, + "grad_norm": 0.7199469804763794, + "learning_rate": 8.926982068039505e-05, + "loss": 2.5278, + "step": 10732 + }, + { + "epoch": 0.8661932047453798, + "grad_norm": 0.7579182386398315, + "learning_rate": 8.925412506213428e-05, + "loss": 2.5227, + "step": 10733 + }, + { + "epoch": 0.8662739084819627, + "grad_norm": 0.687455952167511, + "learning_rate": 8.92384297116888e-05, + "loss": 2.5099, + "step": 10734 + }, + { + "epoch": 0.8663546122185457, + "grad_norm": 0.7616521120071411, + "learning_rate": 8.922273462944978e-05, + "loss": 2.598, + "step": 10735 + }, + { + "epoch": 0.8664353159551287, + "grad_norm": 0.6730697751045227, + "learning_rate": 8.920703981580842e-05, + "loss": 2.5517, + "step": 10736 + }, + { + "epoch": 0.8665160196917118, + "grad_norm": 0.6769895553588867, + "learning_rate": 8.91913452711558e-05, + "loss": 2.5535, + "step": 10737 + }, + { + "epoch": 0.8665967234282947, + "grad_norm": 0.6284549832344055, + "learning_rate": 8.917565099588312e-05, + "loss": 2.4597, + "step": 10738 + }, + { + "epoch": 0.8666774271648777, + "grad_norm": 0.6900805830955505, + "learning_rate": 8.915995699038152e-05, + "loss": 2.5236, + "step": 10739 + }, + { + "epoch": 0.8667581309014607, + "grad_norm": 0.6842896938323975, + "learning_rate": 8.914426325504211e-05, + "loss": 2.5199, + "step": 10740 + }, + { + "epoch": 0.8668388346380438, + "grad_norm": 0.6637243628501892, + "learning_rate": 8.912856979025604e-05, + "loss": 2.5368, + "step": 10741 + }, + { + "epoch": 0.8669195383746268, + "grad_norm": 0.7474464178085327, + "learning_rate": 8.911287659641449e-05, + "loss": 2.4902, + "step": 10742 + }, + { + "epoch": 0.8670002421112097, + "grad_norm": 0.6977849006652832, + "learning_rate": 8.909718367390843e-05, + "loss": 2.5034, + "step": 10743 + }, + { + "epoch": 0.8670809458477927, + "grad_norm": 0.6968807578086853, + "learning_rate": 8.908149102312907e-05, + "loss": 2.5396, + "step": 10744 + }, + { + "epoch": 0.8671616495843758, + "grad_norm": 0.6656209230422974, + "learning_rate": 8.906579864446755e-05, + "loss": 2.5702, + "step": 10745 + }, + { + "epoch": 0.8672423533209588, + "grad_norm": 0.7079079151153564, + "learning_rate": 8.905010653831486e-05, + "loss": 2.5344, + "step": 10746 + }, + { + "epoch": 0.8673230570575418, + "grad_norm": 0.7423387765884399, + "learning_rate": 8.903441470506214e-05, + "loss": 2.5635, + "step": 10747 + }, + { + "epoch": 0.8674037607941247, + "grad_norm": 0.6607224941253662, + "learning_rate": 8.901872314510046e-05, + "loss": 2.54, + "step": 10748 + }, + { + "epoch": 0.8674844645307078, + "grad_norm": 0.6646947860717773, + "learning_rate": 8.900303185882095e-05, + "loss": 2.4661, + "step": 10749 + }, + { + "epoch": 0.8675651682672908, + "grad_norm": 0.6943496465682983, + "learning_rate": 8.89873408466146e-05, + "loss": 2.5213, + "step": 10750 + }, + { + "epoch": 0.8676458720038738, + "grad_norm": 0.7048123478889465, + "learning_rate": 8.89716501088725e-05, + "loss": 2.5529, + "step": 10751 + }, + { + "epoch": 0.8677265757404568, + "grad_norm": 0.654617428779602, + "learning_rate": 8.895595964598574e-05, + "loss": 2.5535, + "step": 10752 + }, + { + "epoch": 0.8678072794770398, + "grad_norm": 0.672063410282135, + "learning_rate": 8.894026945834531e-05, + "loss": 2.5279, + "step": 10753 + }, + { + "epoch": 0.8678879832136228, + "grad_norm": 0.7134148478507996, + "learning_rate": 8.892457954634225e-05, + "loss": 2.5403, + "step": 10754 + }, + { + "epoch": 0.8679686869502058, + "grad_norm": 0.6457598805427551, + "learning_rate": 8.890888991036768e-05, + "loss": 2.515, + "step": 10755 + }, + { + "epoch": 0.8680493906867888, + "grad_norm": 0.6725220084190369, + "learning_rate": 8.889320055081252e-05, + "loss": 2.4829, + "step": 10756 + }, + { + "epoch": 0.8681300944233719, + "grad_norm": 0.6425862312316895, + "learning_rate": 8.887751146806785e-05, + "loss": 2.4965, + "step": 10757 + }, + { + "epoch": 0.8682107981599548, + "grad_norm": 0.6654682755470276, + "learning_rate": 8.886182266252468e-05, + "loss": 2.48, + "step": 10758 + }, + { + "epoch": 0.8682915018965378, + "grad_norm": 0.7102493643760681, + "learning_rate": 8.884613413457398e-05, + "loss": 2.5415, + "step": 10759 + }, + { + "epoch": 0.8683722056331208, + "grad_norm": 0.6996567249298096, + "learning_rate": 8.883044588460677e-05, + "loss": 2.542, + "step": 10760 + }, + { + "epoch": 0.8684529093697038, + "grad_norm": 0.7011905312538147, + "learning_rate": 8.881475791301405e-05, + "loss": 2.5391, + "step": 10761 + }, + { + "epoch": 0.8685336131062869, + "grad_norm": 0.6508356928825378, + "learning_rate": 8.879907022018686e-05, + "loss": 2.4892, + "step": 10762 + }, + { + "epoch": 0.8686143168428698, + "grad_norm": 0.7104009985923767, + "learning_rate": 8.878338280651605e-05, + "loss": 2.5152, + "step": 10763 + }, + { + "epoch": 0.8686950205794528, + "grad_norm": 0.6501138210296631, + "learning_rate": 8.876769567239268e-05, + "loss": 2.5767, + "step": 10764 + }, + { + "epoch": 0.8687757243160358, + "grad_norm": 0.6463173031806946, + "learning_rate": 8.875200881820771e-05, + "loss": 2.4758, + "step": 10765 + }, + { + "epoch": 0.8688564280526189, + "grad_norm": 0.6494991779327393, + "learning_rate": 8.873632224435206e-05, + "loss": 2.5364, + "step": 10766 + }, + { + "epoch": 0.8689371317892018, + "grad_norm": 0.6926043033599854, + "learning_rate": 8.872063595121671e-05, + "loss": 2.5288, + "step": 10767 + }, + { + "epoch": 0.8690178355257848, + "grad_norm": 0.7076035737991333, + "learning_rate": 8.870494993919261e-05, + "loss": 2.5118, + "step": 10768 + }, + { + "epoch": 0.8690985392623678, + "grad_norm": 0.6456892490386963, + "learning_rate": 8.868926420867068e-05, + "loss": 2.4957, + "step": 10769 + }, + { + "epoch": 0.8691792429989509, + "grad_norm": 0.6585200428962708, + "learning_rate": 8.867357876004183e-05, + "loss": 2.5049, + "step": 10770 + }, + { + "epoch": 0.8692599467355339, + "grad_norm": 0.6893252730369568, + "learning_rate": 8.865789359369706e-05, + "loss": 2.4808, + "step": 10771 + }, + { + "epoch": 0.8693406504721168, + "grad_norm": 0.6700639724731445, + "learning_rate": 8.864220871002719e-05, + "loss": 2.5475, + "step": 10772 + }, + { + "epoch": 0.8694213542086998, + "grad_norm": 0.6551913619041443, + "learning_rate": 8.862652410942315e-05, + "loss": 2.5063, + "step": 10773 + }, + { + "epoch": 0.8695020579452829, + "grad_norm": 0.6870427131652832, + "learning_rate": 8.86108397922759e-05, + "loss": 2.5785, + "step": 10774 + }, + { + "epoch": 0.8695827616818659, + "grad_norm": 0.6489934325218201, + "learning_rate": 8.859515575897626e-05, + "loss": 2.5584, + "step": 10775 + }, + { + "epoch": 0.8696634654184489, + "grad_norm": 0.6726663112640381, + "learning_rate": 8.857947200991517e-05, + "loss": 2.5707, + "step": 10776 + }, + { + "epoch": 0.8697441691550318, + "grad_norm": 0.7696183323860168, + "learning_rate": 8.856378854548347e-05, + "loss": 2.501, + "step": 10777 + }, + { + "epoch": 0.8698248728916149, + "grad_norm": 0.7002642154693604, + "learning_rate": 8.854810536607212e-05, + "loss": 2.5792, + "step": 10778 + }, + { + "epoch": 0.8699055766281979, + "grad_norm": 0.6429435610771179, + "learning_rate": 8.853242247207185e-05, + "loss": 2.5463, + "step": 10779 + }, + { + "epoch": 0.8699862803647809, + "grad_norm": 0.7006216645240784, + "learning_rate": 8.851673986387358e-05, + "loss": 2.5698, + "step": 10780 + }, + { + "epoch": 0.8700669841013638, + "grad_norm": 0.7053292989730835, + "learning_rate": 8.850105754186824e-05, + "loss": 2.5468, + "step": 10781 + }, + { + "epoch": 0.8701476878379469, + "grad_norm": 0.6592122912406921, + "learning_rate": 8.848537550644654e-05, + "loss": 2.5271, + "step": 10782 + }, + { + "epoch": 0.8702283915745299, + "grad_norm": 0.679132342338562, + "learning_rate": 8.846969375799941e-05, + "loss": 2.5281, + "step": 10783 + }, + { + "epoch": 0.8703090953111129, + "grad_norm": 0.6868568062782288, + "learning_rate": 8.845401229691765e-05, + "loss": 2.5415, + "step": 10784 + }, + { + "epoch": 0.8703897990476959, + "grad_norm": 0.7060674428939819, + "learning_rate": 8.843833112359208e-05, + "loss": 2.5649, + "step": 10785 + }, + { + "epoch": 0.870470502784279, + "grad_norm": 0.6663981676101685, + "learning_rate": 8.842265023841352e-05, + "loss": 2.5055, + "step": 10786 + }, + { + "epoch": 0.8705512065208619, + "grad_norm": 0.7095218896865845, + "learning_rate": 8.840696964177282e-05, + "loss": 2.5442, + "step": 10787 + }, + { + "epoch": 0.8706319102574449, + "grad_norm": 0.6884104013442993, + "learning_rate": 8.839128933406069e-05, + "loss": 2.5285, + "step": 10788 + }, + { + "epoch": 0.8707126139940279, + "grad_norm": 0.6427462697029114, + "learning_rate": 8.837560931566798e-05, + "loss": 2.5197, + "step": 10789 + }, + { + "epoch": 0.870793317730611, + "grad_norm": 0.6870493292808533, + "learning_rate": 8.835992958698548e-05, + "loss": 2.4937, + "step": 10790 + }, + { + "epoch": 0.870874021467194, + "grad_norm": 0.7006319761276245, + "learning_rate": 8.834425014840398e-05, + "loss": 2.5148, + "step": 10791 + }, + { + "epoch": 0.8709547252037769, + "grad_norm": 0.690601646900177, + "learning_rate": 8.83285710003142e-05, + "loss": 2.5454, + "step": 10792 + }, + { + "epoch": 0.8710354289403599, + "grad_norm": 0.7205955982208252, + "learning_rate": 8.831289214310695e-05, + "loss": 2.5221, + "step": 10793 + }, + { + "epoch": 0.871116132676943, + "grad_norm": 0.7134295105934143, + "learning_rate": 8.8297213577173e-05, + "loss": 2.5626, + "step": 10794 + }, + { + "epoch": 0.871196836413526, + "grad_norm": 0.6560496091842651, + "learning_rate": 8.828153530290307e-05, + "loss": 2.5408, + "step": 10795 + }, + { + "epoch": 0.8712775401501089, + "grad_norm": 0.7055882215499878, + "learning_rate": 8.82658573206879e-05, + "loss": 2.5173, + "step": 10796 + }, + { + "epoch": 0.8713582438866919, + "grad_norm": 0.6751883029937744, + "learning_rate": 8.825017963091827e-05, + "loss": 2.5378, + "step": 10797 + }, + { + "epoch": 0.871438947623275, + "grad_norm": 0.6794824600219727, + "learning_rate": 8.823450223398485e-05, + "loss": 2.592, + "step": 10798 + }, + { + "epoch": 0.871519651359858, + "grad_norm": 0.675729513168335, + "learning_rate": 8.821882513027838e-05, + "loss": 2.5253, + "step": 10799 + }, + { + "epoch": 0.871600355096441, + "grad_norm": 0.7185894250869751, + "learning_rate": 8.820314832018962e-05, + "loss": 2.5073, + "step": 10800 + }, + { + "epoch": 0.8716810588330239, + "grad_norm": 0.6605187654495239, + "learning_rate": 8.818747180410921e-05, + "loss": 2.5141, + "step": 10801 + }, + { + "epoch": 0.871761762569607, + "grad_norm": 0.6955205798149109, + "learning_rate": 8.817179558242788e-05, + "loss": 2.5313, + "step": 10802 + }, + { + "epoch": 0.87184246630619, + "grad_norm": 0.6307928562164307, + "learning_rate": 8.815611965553638e-05, + "loss": 2.4975, + "step": 10803 + }, + { + "epoch": 0.871923170042773, + "grad_norm": 0.7283728122711182, + "learning_rate": 8.814044402382527e-05, + "loss": 2.4623, + "step": 10804 + }, + { + "epoch": 0.872003873779356, + "grad_norm": 0.7019702792167664, + "learning_rate": 8.81247686876853e-05, + "loss": 2.4755, + "step": 10805 + }, + { + "epoch": 0.872084577515939, + "grad_norm": 0.6769137382507324, + "learning_rate": 8.81090936475072e-05, + "loss": 2.59, + "step": 10806 + }, + { + "epoch": 0.872165281252522, + "grad_norm": 0.6185588836669922, + "learning_rate": 8.80934189036815e-05, + "loss": 2.5308, + "step": 10807 + }, + { + "epoch": 0.872245984989105, + "grad_norm": 0.7127000689506531, + "learning_rate": 8.807774445659894e-05, + "loss": 2.5301, + "step": 10808 + }, + { + "epoch": 0.872326688725688, + "grad_norm": 0.7039114236831665, + "learning_rate": 8.806207030665016e-05, + "loss": 2.5176, + "step": 10809 + }, + { + "epoch": 0.8724073924622711, + "grad_norm": 0.6763370633125305, + "learning_rate": 8.804639645422582e-05, + "loss": 2.5324, + "step": 10810 + }, + { + "epoch": 0.872488096198854, + "grad_norm": 0.7546409368515015, + "learning_rate": 8.803072289971648e-05, + "loss": 2.5446, + "step": 10811 + }, + { + "epoch": 0.872568799935437, + "grad_norm": 0.6916004419326782, + "learning_rate": 8.801504964351284e-05, + "loss": 2.5056, + "step": 10812 + }, + { + "epoch": 0.87264950367202, + "grad_norm": 0.7108416557312012, + "learning_rate": 8.799937668600552e-05, + "loss": 2.5966, + "step": 10813 + }, + { + "epoch": 0.872730207408603, + "grad_norm": 0.7146576046943665, + "learning_rate": 8.798370402758506e-05, + "loss": 2.5152, + "step": 10814 + }, + { + "epoch": 0.872810911145186, + "grad_norm": 0.6708142757415771, + "learning_rate": 8.796803166864211e-05, + "loss": 2.5248, + "step": 10815 + }, + { + "epoch": 0.872891614881769, + "grad_norm": 0.6687600612640381, + "learning_rate": 8.795235960956729e-05, + "loss": 2.4451, + "step": 10816 + }, + { + "epoch": 0.872972318618352, + "grad_norm": 0.724012553691864, + "learning_rate": 8.793668785075114e-05, + "loss": 2.4816, + "step": 10817 + }, + { + "epoch": 0.873053022354935, + "grad_norm": 0.6938769221305847, + "learning_rate": 8.792101639258426e-05, + "loss": 2.5435, + "step": 10818 + }, + { + "epoch": 0.8731337260915181, + "grad_norm": 0.7066235542297363, + "learning_rate": 8.790534523545724e-05, + "loss": 2.5167, + "step": 10819 + }, + { + "epoch": 0.873214429828101, + "grad_norm": 0.7129037380218506, + "learning_rate": 8.788967437976062e-05, + "loss": 2.5079, + "step": 10820 + }, + { + "epoch": 0.873295133564684, + "grad_norm": 0.6949728727340698, + "learning_rate": 8.787400382588497e-05, + "loss": 2.5564, + "step": 10821 + }, + { + "epoch": 0.873375837301267, + "grad_norm": 0.7924233675003052, + "learning_rate": 8.785833357422088e-05, + "loss": 2.5748, + "step": 10822 + }, + { + "epoch": 0.8734565410378501, + "grad_norm": 0.7486331462860107, + "learning_rate": 8.784266362515882e-05, + "loss": 2.565, + "step": 10823 + }, + { + "epoch": 0.8735372447744331, + "grad_norm": 0.7036460638046265, + "learning_rate": 8.782699397908935e-05, + "loss": 2.5101, + "step": 10824 + }, + { + "epoch": 0.873617948511016, + "grad_norm": 0.6691471338272095, + "learning_rate": 8.781132463640302e-05, + "loss": 2.5262, + "step": 10825 + }, + { + "epoch": 0.873698652247599, + "grad_norm": 0.6836682558059692, + "learning_rate": 8.779565559749037e-05, + "loss": 2.5651, + "step": 10826 + }, + { + "epoch": 0.8737793559841821, + "grad_norm": 0.6634507775306702, + "learning_rate": 8.777998686274185e-05, + "loss": 2.5383, + "step": 10827 + }, + { + "epoch": 0.8738600597207651, + "grad_norm": 0.6903105974197388, + "learning_rate": 8.7764318432548e-05, + "loss": 2.5659, + "step": 10828 + }, + { + "epoch": 0.8739407634573481, + "grad_norm": 0.737859308719635, + "learning_rate": 8.774865030729937e-05, + "loss": 2.5859, + "step": 10829 + }, + { + "epoch": 0.874021467193931, + "grad_norm": 0.696843683719635, + "learning_rate": 8.773298248738633e-05, + "loss": 2.5244, + "step": 10830 + }, + { + "epoch": 0.8741021709305141, + "grad_norm": 0.7342235445976257, + "learning_rate": 8.771731497319946e-05, + "loss": 2.5073, + "step": 10831 + }, + { + "epoch": 0.8741828746670971, + "grad_norm": 0.6676939725875854, + "learning_rate": 8.770164776512926e-05, + "loss": 2.5408, + "step": 10832 + }, + { + "epoch": 0.8742635784036801, + "grad_norm": 0.6957886219024658, + "learning_rate": 8.768598086356608e-05, + "loss": 2.5566, + "step": 10833 + }, + { + "epoch": 0.874344282140263, + "grad_norm": 0.6938990950584412, + "learning_rate": 8.767031426890046e-05, + "loss": 2.517, + "step": 10834 + }, + { + "epoch": 0.8744249858768461, + "grad_norm": 0.8387169241905212, + "learning_rate": 8.765464798152286e-05, + "loss": 2.5507, + "step": 10835 + }, + { + "epoch": 0.8745056896134291, + "grad_norm": 0.6396276354789734, + "learning_rate": 8.763898200182368e-05, + "loss": 2.5063, + "step": 10836 + }, + { + "epoch": 0.8745863933500121, + "grad_norm": 0.7122719883918762, + "learning_rate": 8.762331633019339e-05, + "loss": 2.5816, + "step": 10837 + }, + { + "epoch": 0.8746670970865951, + "grad_norm": 0.6807141304016113, + "learning_rate": 8.760765096702244e-05, + "loss": 2.6004, + "step": 10838 + }, + { + "epoch": 0.8747478008231782, + "grad_norm": 0.6764848232269287, + "learning_rate": 8.759198591270117e-05, + "loss": 2.5303, + "step": 10839 + }, + { + "epoch": 0.8748285045597611, + "grad_norm": 0.718515932559967, + "learning_rate": 8.757632116762006e-05, + "loss": 2.5088, + "step": 10840 + }, + { + "epoch": 0.8749092082963441, + "grad_norm": 0.7084362506866455, + "learning_rate": 8.75606567321695e-05, + "loss": 2.5496, + "step": 10841 + }, + { + "epoch": 0.8749899120329271, + "grad_norm": 0.7191734910011292, + "learning_rate": 8.754499260673991e-05, + "loss": 2.5525, + "step": 10842 + }, + { + "epoch": 0.8750706157695102, + "grad_norm": 0.7167977094650269, + "learning_rate": 8.752932879172164e-05, + "loss": 2.5479, + "step": 10843 + }, + { + "epoch": 0.8751513195060932, + "grad_norm": 0.6994979381561279, + "learning_rate": 8.751366528750511e-05, + "loss": 2.4942, + "step": 10844 + }, + { + "epoch": 0.8752320232426761, + "grad_norm": 0.7192725539207458, + "learning_rate": 8.749800209448068e-05, + "loss": 2.5233, + "step": 10845 + }, + { + "epoch": 0.8753127269792591, + "grad_norm": 0.7728807330131531, + "learning_rate": 8.748233921303871e-05, + "loss": 2.5698, + "step": 10846 + }, + { + "epoch": 0.8753934307158422, + "grad_norm": 0.7305434942245483, + "learning_rate": 8.746667664356956e-05, + "loss": 2.5096, + "step": 10847 + }, + { + "epoch": 0.8754741344524252, + "grad_norm": 0.7117629051208496, + "learning_rate": 8.745101438646365e-05, + "loss": 2.5272, + "step": 10848 + }, + { + "epoch": 0.8755548381890081, + "grad_norm": 0.7180361151695251, + "learning_rate": 8.743535244211121e-05, + "loss": 2.4718, + "step": 10849 + }, + { + "epoch": 0.8756355419255911, + "grad_norm": 0.6419457793235779, + "learning_rate": 8.741969081090263e-05, + "loss": 2.5407, + "step": 10850 + }, + { + "epoch": 0.8757162456621742, + "grad_norm": 0.7928328514099121, + "learning_rate": 8.740402949322827e-05, + "loss": 2.488, + "step": 10851 + }, + { + "epoch": 0.8757969493987572, + "grad_norm": 0.7449139952659607, + "learning_rate": 8.738836848947839e-05, + "loss": 2.5943, + "step": 10852 + }, + { + "epoch": 0.8758776531353402, + "grad_norm": 0.7919576168060303, + "learning_rate": 8.737270780004334e-05, + "loss": 2.5556, + "step": 10853 + }, + { + "epoch": 0.8759583568719231, + "grad_norm": 0.6867526769638062, + "learning_rate": 8.735704742531346e-05, + "loss": 2.5395, + "step": 10854 + }, + { + "epoch": 0.8760390606085062, + "grad_norm": 0.7195394039154053, + "learning_rate": 8.734138736567896e-05, + "loss": 2.4404, + "step": 10855 + }, + { + "epoch": 0.8761197643450892, + "grad_norm": 0.68385910987854, + "learning_rate": 8.732572762153016e-05, + "loss": 2.502, + "step": 10856 + }, + { + "epoch": 0.8762004680816722, + "grad_norm": 0.6957393884658813, + "learning_rate": 8.731006819325739e-05, + "loss": 2.5788, + "step": 10857 + }, + { + "epoch": 0.8762811718182552, + "grad_norm": 0.6973037123680115, + "learning_rate": 8.729440908125092e-05, + "loss": 2.4927, + "step": 10858 + }, + { + "epoch": 0.8763618755548382, + "grad_norm": 0.6535985469818115, + "learning_rate": 8.727875028590095e-05, + "loss": 2.596, + "step": 10859 + }, + { + "epoch": 0.8764425792914212, + "grad_norm": 0.7447848320007324, + "learning_rate": 8.726309180759777e-05, + "loss": 2.5825, + "step": 10860 + }, + { + "epoch": 0.8765232830280042, + "grad_norm": 0.7155942320823669, + "learning_rate": 8.724743364673168e-05, + "loss": 2.5105, + "step": 10861 + }, + { + "epoch": 0.8766039867645872, + "grad_norm": 0.6664694547653198, + "learning_rate": 8.723177580369285e-05, + "loss": 2.5244, + "step": 10862 + }, + { + "epoch": 0.8766846905011701, + "grad_norm": 0.7437852025032043, + "learning_rate": 8.721611827887153e-05, + "loss": 2.534, + "step": 10863 + }, + { + "epoch": 0.8767653942377532, + "grad_norm": 0.6752577424049377, + "learning_rate": 8.7200461072658e-05, + "loss": 2.5025, + "step": 10864 + }, + { + "epoch": 0.8768460979743362, + "grad_norm": 0.7420764565467834, + "learning_rate": 8.718480418544241e-05, + "loss": 2.5261, + "step": 10865 + }, + { + "epoch": 0.8769268017109192, + "grad_norm": 0.669384777545929, + "learning_rate": 8.7169147617615e-05, + "loss": 2.5258, + "step": 10866 + }, + { + "epoch": 0.8770075054475022, + "grad_norm": 0.6649587750434875, + "learning_rate": 8.715349136956599e-05, + "loss": 2.5308, + "step": 10867 + }, + { + "epoch": 0.8770882091840853, + "grad_norm": 0.728922426700592, + "learning_rate": 8.713783544168552e-05, + "loss": 2.5251, + "step": 10868 + }, + { + "epoch": 0.8771689129206682, + "grad_norm": 0.6957671642303467, + "learning_rate": 8.712217983436384e-05, + "loss": 2.5818, + "step": 10869 + }, + { + "epoch": 0.8772496166572512, + "grad_norm": 0.6796830892562866, + "learning_rate": 8.710652454799108e-05, + "loss": 2.5122, + "step": 10870 + }, + { + "epoch": 0.8773303203938342, + "grad_norm": 0.7230980396270752, + "learning_rate": 8.709086958295746e-05, + "loss": 2.5836, + "step": 10871 + }, + { + "epoch": 0.8774110241304173, + "grad_norm": 0.6992264986038208, + "learning_rate": 8.707521493965309e-05, + "loss": 2.5907, + "step": 10872 + }, + { + "epoch": 0.8774917278670002, + "grad_norm": 0.7066535353660583, + "learning_rate": 8.705956061846816e-05, + "loss": 2.5508, + "step": 10873 + }, + { + "epoch": 0.8775724316035832, + "grad_norm": 0.6559327244758606, + "learning_rate": 8.704390661979283e-05, + "loss": 2.611, + "step": 10874 + }, + { + "epoch": 0.8776531353401662, + "grad_norm": 0.6673287749290466, + "learning_rate": 8.70282529440172e-05, + "loss": 2.5778, + "step": 10875 + }, + { + "epoch": 0.8777338390767493, + "grad_norm": 0.6715971231460571, + "learning_rate": 8.701259959153139e-05, + "loss": 2.5342, + "step": 10876 + }, + { + "epoch": 0.8778145428133323, + "grad_norm": 0.7456488609313965, + "learning_rate": 8.699694656272557e-05, + "loss": 2.5365, + "step": 10877 + }, + { + "epoch": 0.8778952465499152, + "grad_norm": 0.6658159494400024, + "learning_rate": 8.698129385798983e-05, + "loss": 2.4387, + "step": 10878 + }, + { + "epoch": 0.8779759502864982, + "grad_norm": 0.6653816103935242, + "learning_rate": 8.696564147771427e-05, + "loss": 2.5791, + "step": 10879 + }, + { + "epoch": 0.8780566540230813, + "grad_norm": 0.6763200163841248, + "learning_rate": 8.694998942228902e-05, + "loss": 2.5356, + "step": 10880 + }, + { + "epoch": 0.8781373577596643, + "grad_norm": 0.6534504890441895, + "learning_rate": 8.69343376921041e-05, + "loss": 2.5358, + "step": 10881 + }, + { + "epoch": 0.8782180614962473, + "grad_norm": 0.6341667771339417, + "learning_rate": 8.691868628754967e-05, + "loss": 2.4927, + "step": 10882 + }, + { + "epoch": 0.8782987652328302, + "grad_norm": 0.6215559244155884, + "learning_rate": 8.690303520901579e-05, + "loss": 2.4312, + "step": 10883 + }, + { + "epoch": 0.8783794689694133, + "grad_norm": 0.6705841422080994, + "learning_rate": 8.688738445689248e-05, + "loss": 2.4778, + "step": 10884 + }, + { + "epoch": 0.8784601727059963, + "grad_norm": 0.680275559425354, + "learning_rate": 8.687173403156982e-05, + "loss": 2.5577, + "step": 10885 + }, + { + "epoch": 0.8785408764425793, + "grad_norm": 0.6918728351593018, + "learning_rate": 8.685608393343789e-05, + "loss": 2.5212, + "step": 10886 + }, + { + "epoch": 0.8786215801791623, + "grad_norm": 0.623636782169342, + "learning_rate": 8.68404341628867e-05, + "loss": 2.5131, + "step": 10887 + }, + { + "epoch": 0.8787022839157453, + "grad_norm": 0.7200562357902527, + "learning_rate": 8.682478472030628e-05, + "loss": 2.5517, + "step": 10888 + }, + { + "epoch": 0.8787829876523283, + "grad_norm": 0.6902644634246826, + "learning_rate": 8.680913560608666e-05, + "loss": 2.511, + "step": 10889 + }, + { + "epoch": 0.8788636913889113, + "grad_norm": 0.6855802536010742, + "learning_rate": 8.679348682061792e-05, + "loss": 2.5169, + "step": 10890 + }, + { + "epoch": 0.8789443951254943, + "grad_norm": 0.7229284048080444, + "learning_rate": 8.677783836428995e-05, + "loss": 2.5634, + "step": 10891 + }, + { + "epoch": 0.8790250988620774, + "grad_norm": 0.6350376605987549, + "learning_rate": 8.676219023749281e-05, + "loss": 2.443, + "step": 10892 + }, + { + "epoch": 0.8791058025986603, + "grad_norm": 0.6884307265281677, + "learning_rate": 8.674654244061653e-05, + "loss": 2.524, + "step": 10893 + }, + { + "epoch": 0.8791865063352433, + "grad_norm": 0.6571067571640015, + "learning_rate": 8.673089497405102e-05, + "loss": 2.5322, + "step": 10894 + }, + { + "epoch": 0.8792672100718263, + "grad_norm": 0.7078021764755249, + "learning_rate": 8.67152478381863e-05, + "loss": 2.5317, + "step": 10895 + }, + { + "epoch": 0.8793479138084094, + "grad_norm": 0.6809059381484985, + "learning_rate": 8.669960103341236e-05, + "loss": 2.5767, + "step": 10896 + }, + { + "epoch": 0.8794286175449924, + "grad_norm": 0.7399441003799438, + "learning_rate": 8.66839545601191e-05, + "loss": 2.5194, + "step": 10897 + }, + { + "epoch": 0.8795093212815753, + "grad_norm": 0.6762270927429199, + "learning_rate": 8.66683084186965e-05, + "loss": 2.5306, + "step": 10898 + }, + { + "epoch": 0.8795900250181583, + "grad_norm": 0.7394620776176453, + "learning_rate": 8.665266260953455e-05, + "loss": 2.4516, + "step": 10899 + }, + { + "epoch": 0.8796707287547414, + "grad_norm": 0.6775416135787964, + "learning_rate": 8.663701713302309e-05, + "loss": 2.5574, + "step": 10900 + }, + { + "epoch": 0.8797514324913244, + "grad_norm": 0.7630520462989807, + "learning_rate": 8.66213719895521e-05, + "loss": 2.5516, + "step": 10901 + }, + { + "epoch": 0.8798321362279073, + "grad_norm": 0.6555768847465515, + "learning_rate": 8.660572717951149e-05, + "loss": 2.5267, + "step": 10902 + }, + { + "epoch": 0.8799128399644903, + "grad_norm": 0.6899500489234924, + "learning_rate": 8.659008270329119e-05, + "loss": 2.4938, + "step": 10903 + }, + { + "epoch": 0.8799935437010734, + "grad_norm": 0.6939221024513245, + "learning_rate": 8.657443856128107e-05, + "loss": 2.5358, + "step": 10904 + }, + { + "epoch": 0.8800742474376564, + "grad_norm": 0.6454630494117737, + "learning_rate": 8.655879475387102e-05, + "loss": 2.5528, + "step": 10905 + }, + { + "epoch": 0.8801549511742394, + "grad_norm": 0.7142425775527954, + "learning_rate": 8.654315128145099e-05, + "loss": 2.5668, + "step": 10906 + }, + { + "epoch": 0.8802356549108223, + "grad_norm": 0.7512764930725098, + "learning_rate": 8.652750814441075e-05, + "loss": 2.5224, + "step": 10907 + }, + { + "epoch": 0.8803163586474054, + "grad_norm": 0.6599575877189636, + "learning_rate": 8.651186534314026e-05, + "loss": 2.5363, + "step": 10908 + }, + { + "epoch": 0.8803970623839884, + "grad_norm": 0.6787410974502563, + "learning_rate": 8.649622287802935e-05, + "loss": 2.4587, + "step": 10909 + }, + { + "epoch": 0.8804777661205714, + "grad_norm": 0.7124783396720886, + "learning_rate": 8.648058074946786e-05, + "loss": 2.5842, + "step": 10910 + }, + { + "epoch": 0.8805584698571544, + "grad_norm": 0.6698839664459229, + "learning_rate": 8.646493895784562e-05, + "loss": 2.513, + "step": 10911 + }, + { + "epoch": 0.8806391735937374, + "grad_norm": 0.6660044193267822, + "learning_rate": 8.644929750355249e-05, + "loss": 2.4996, + "step": 10912 + }, + { + "epoch": 0.8807198773303204, + "grad_norm": 0.7060455083847046, + "learning_rate": 8.643365638697828e-05, + "loss": 2.5497, + "step": 10913 + }, + { + "epoch": 0.8808005810669034, + "grad_norm": 0.6835277676582336, + "learning_rate": 8.641801560851281e-05, + "loss": 2.5198, + "step": 10914 + }, + { + "epoch": 0.8808812848034864, + "grad_norm": 0.6994042992591858, + "learning_rate": 8.640237516854595e-05, + "loss": 2.5692, + "step": 10915 + }, + { + "epoch": 0.8809619885400694, + "grad_norm": 0.6583377718925476, + "learning_rate": 8.63867350674674e-05, + "loss": 2.5025, + "step": 10916 + }, + { + "epoch": 0.8810426922766524, + "grad_norm": 0.6882332563400269, + "learning_rate": 8.637109530566698e-05, + "loss": 2.5343, + "step": 10917 + }, + { + "epoch": 0.8811233960132354, + "grad_norm": 0.6329876184463501, + "learning_rate": 8.635545588353449e-05, + "loss": 2.5335, + "step": 10918 + }, + { + "epoch": 0.8812040997498184, + "grad_norm": 0.713196337223053, + "learning_rate": 8.633981680145975e-05, + "loss": 2.4814, + "step": 10919 + }, + { + "epoch": 0.8812848034864014, + "grad_norm": 0.7388820648193359, + "learning_rate": 8.632417805983246e-05, + "loss": 2.4927, + "step": 10920 + }, + { + "epoch": 0.8813655072229845, + "grad_norm": 0.7316160798072815, + "learning_rate": 8.63085396590424e-05, + "loss": 2.508, + "step": 10921 + }, + { + "epoch": 0.8814462109595674, + "grad_norm": 0.6690139174461365, + "learning_rate": 8.629290159947934e-05, + "loss": 2.5719, + "step": 10922 + }, + { + "epoch": 0.8815269146961504, + "grad_norm": 0.6369553208351135, + "learning_rate": 8.627726388153297e-05, + "loss": 2.5277, + "step": 10923 + }, + { + "epoch": 0.8816076184327334, + "grad_norm": 0.6870365738868713, + "learning_rate": 8.626162650559306e-05, + "loss": 2.4731, + "step": 10924 + }, + { + "epoch": 0.8816883221693165, + "grad_norm": 0.6890872716903687, + "learning_rate": 8.624598947204938e-05, + "loss": 2.5417, + "step": 10925 + }, + { + "epoch": 0.8817690259058995, + "grad_norm": 0.6548230051994324, + "learning_rate": 8.623035278129156e-05, + "loss": 2.4888, + "step": 10926 + }, + { + "epoch": 0.8818497296424824, + "grad_norm": 0.6835262775421143, + "learning_rate": 8.621471643370933e-05, + "loss": 2.531, + "step": 10927 + }, + { + "epoch": 0.8819304333790654, + "grad_norm": 0.6910626292228699, + "learning_rate": 8.619908042969243e-05, + "loss": 2.4864, + "step": 10928 + }, + { + "epoch": 0.8820111371156485, + "grad_norm": 0.6727725267410278, + "learning_rate": 8.618344476963049e-05, + "loss": 2.5063, + "step": 10929 + }, + { + "epoch": 0.8820918408522315, + "grad_norm": 0.7285245656967163, + "learning_rate": 8.616780945391323e-05, + "loss": 2.5036, + "step": 10930 + }, + { + "epoch": 0.8821725445888144, + "grad_norm": 0.6561840176582336, + "learning_rate": 8.615217448293035e-05, + "loss": 2.5152, + "step": 10931 + }, + { + "epoch": 0.8822532483253974, + "grad_norm": 0.6524627208709717, + "learning_rate": 8.613653985707144e-05, + "loss": 2.4827, + "step": 10932 + }, + { + "epoch": 0.8823339520619805, + "grad_norm": 0.6815671920776367, + "learning_rate": 8.612090557672619e-05, + "loss": 2.5385, + "step": 10933 + }, + { + "epoch": 0.8824146557985635, + "grad_norm": 0.7479865550994873, + "learning_rate": 8.610527164228429e-05, + "loss": 2.5311, + "step": 10934 + }, + { + "epoch": 0.8824953595351465, + "grad_norm": 0.699504554271698, + "learning_rate": 8.608963805413535e-05, + "loss": 2.5332, + "step": 10935 + }, + { + "epoch": 0.8825760632717294, + "grad_norm": 0.7081198692321777, + "learning_rate": 8.607400481266896e-05, + "loss": 2.5636, + "step": 10936 + }, + { + "epoch": 0.8826567670083125, + "grad_norm": 0.7020730972290039, + "learning_rate": 8.605837191827478e-05, + "loss": 2.498, + "step": 10937 + }, + { + "epoch": 0.8827374707448955, + "grad_norm": 0.8004096150398254, + "learning_rate": 8.604273937134242e-05, + "loss": 2.5352, + "step": 10938 + }, + { + "epoch": 0.8828181744814785, + "grad_norm": 0.6399645209312439, + "learning_rate": 8.602710717226147e-05, + "loss": 2.5673, + "step": 10939 + }, + { + "epoch": 0.8828988782180615, + "grad_norm": 0.683195173740387, + "learning_rate": 8.601147532142153e-05, + "loss": 2.4812, + "step": 10940 + }, + { + "epoch": 0.8829795819546445, + "grad_norm": 0.7783642411231995, + "learning_rate": 8.599584381921224e-05, + "loss": 2.4812, + "step": 10941 + }, + { + "epoch": 0.8830602856912275, + "grad_norm": 0.7107423543930054, + "learning_rate": 8.598021266602308e-05, + "loss": 2.5527, + "step": 10942 + }, + { + "epoch": 0.8831409894278105, + "grad_norm": 0.6419345140457153, + "learning_rate": 8.596458186224365e-05, + "loss": 2.5642, + "step": 10943 + }, + { + "epoch": 0.8832216931643935, + "grad_norm": 0.6897309422492981, + "learning_rate": 8.59489514082636e-05, + "loss": 2.5743, + "step": 10944 + }, + { + "epoch": 0.8833023969009766, + "grad_norm": 0.6901495456695557, + "learning_rate": 8.593332130447236e-05, + "loss": 2.5139, + "step": 10945 + }, + { + "epoch": 0.8833831006375595, + "grad_norm": 0.6865388751029968, + "learning_rate": 8.591769155125953e-05, + "loss": 2.5281, + "step": 10946 + }, + { + "epoch": 0.8834638043741425, + "grad_norm": 0.7070403099060059, + "learning_rate": 8.590206214901465e-05, + "loss": 2.4648, + "step": 10947 + }, + { + "epoch": 0.8835445081107255, + "grad_norm": 0.6846395134925842, + "learning_rate": 8.588643309812721e-05, + "loss": 2.4792, + "step": 10948 + }, + { + "epoch": 0.8836252118473086, + "grad_norm": 0.6875495314598083, + "learning_rate": 8.587080439898675e-05, + "loss": 2.5126, + "step": 10949 + }, + { + "epoch": 0.8837059155838916, + "grad_norm": 0.670098066329956, + "learning_rate": 8.58551760519828e-05, + "loss": 2.4922, + "step": 10950 + }, + { + "epoch": 0.8837866193204745, + "grad_norm": 0.6675527691841125, + "learning_rate": 8.583954805750487e-05, + "loss": 2.499, + "step": 10951 + }, + { + "epoch": 0.8838673230570575, + "grad_norm": 0.6694127321243286, + "learning_rate": 8.582392041594236e-05, + "loss": 2.5286, + "step": 10952 + }, + { + "epoch": 0.8839480267936406, + "grad_norm": 0.7291092872619629, + "learning_rate": 8.580829312768482e-05, + "loss": 2.5705, + "step": 10953 + }, + { + "epoch": 0.8840287305302236, + "grad_norm": 0.709904670715332, + "learning_rate": 8.579266619312174e-05, + "loss": 2.5238, + "step": 10954 + }, + { + "epoch": 0.8841094342668065, + "grad_norm": 0.7037622332572937, + "learning_rate": 8.577703961264254e-05, + "loss": 2.5491, + "step": 10955 + }, + { + "epoch": 0.8841901380033895, + "grad_norm": 0.7553049325942993, + "learning_rate": 8.576141338663668e-05, + "loss": 2.5643, + "step": 10956 + }, + { + "epoch": 0.8842708417399726, + "grad_norm": 0.7177377343177795, + "learning_rate": 8.574578751549364e-05, + "loss": 2.49, + "step": 10957 + }, + { + "epoch": 0.8843515454765556, + "grad_norm": 0.682668149471283, + "learning_rate": 8.573016199960283e-05, + "loss": 2.5221, + "step": 10958 + }, + { + "epoch": 0.8844322492131386, + "grad_norm": 0.7508956789970398, + "learning_rate": 8.571453683935366e-05, + "loss": 2.5766, + "step": 10959 + }, + { + "epoch": 0.8845129529497215, + "grad_norm": 0.6495946645736694, + "learning_rate": 8.569891203513562e-05, + "loss": 2.534, + "step": 10960 + }, + { + "epoch": 0.8845936566863046, + "grad_norm": 0.7362824082374573, + "learning_rate": 8.568328758733806e-05, + "loss": 2.4614, + "step": 10961 + }, + { + "epoch": 0.8846743604228876, + "grad_norm": 0.6571496725082397, + "learning_rate": 8.566766349635037e-05, + "loss": 2.4393, + "step": 10962 + }, + { + "epoch": 0.8847550641594706, + "grad_norm": 0.7088329195976257, + "learning_rate": 8.5652039762562e-05, + "loss": 2.5476, + "step": 10963 + }, + { + "epoch": 0.8848357678960536, + "grad_norm": 0.6414440274238586, + "learning_rate": 8.56364163863623e-05, + "loss": 2.4668, + "step": 10964 + }, + { + "epoch": 0.8849164716326365, + "grad_norm": 0.7333478331565857, + "learning_rate": 8.562079336814063e-05, + "loss": 2.5151, + "step": 10965 + }, + { + "epoch": 0.8849971753692196, + "grad_norm": 0.638038694858551, + "learning_rate": 8.560517070828638e-05, + "loss": 2.5063, + "step": 10966 + }, + { + "epoch": 0.8850778791058026, + "grad_norm": 0.638921320438385, + "learning_rate": 8.558954840718896e-05, + "loss": 2.4769, + "step": 10967 + }, + { + "epoch": 0.8851585828423856, + "grad_norm": 0.6923465728759766, + "learning_rate": 8.557392646523759e-05, + "loss": 2.5388, + "step": 10968 + }, + { + "epoch": 0.8852392865789686, + "grad_norm": 0.7095212936401367, + "learning_rate": 8.555830488282169e-05, + "loss": 2.4955, + "step": 10969 + }, + { + "epoch": 0.8853199903155516, + "grad_norm": 0.689908504486084, + "learning_rate": 8.554268366033065e-05, + "loss": 2.4998, + "step": 10970 + }, + { + "epoch": 0.8854006940521346, + "grad_norm": 0.6551975011825562, + "learning_rate": 8.552706279815366e-05, + "loss": 2.4965, + "step": 10971 + }, + { + "epoch": 0.8854813977887176, + "grad_norm": 0.7239118218421936, + "learning_rate": 8.551144229668012e-05, + "loss": 2.5785, + "step": 10972 + }, + { + "epoch": 0.8855621015253006, + "grad_norm": 0.6743230819702148, + "learning_rate": 8.549582215629932e-05, + "loss": 2.5146, + "step": 10973 + }, + { + "epoch": 0.8856428052618837, + "grad_norm": 0.6991584300994873, + "learning_rate": 8.548020237740052e-05, + "loss": 2.5524, + "step": 10974 + }, + { + "epoch": 0.8857235089984666, + "grad_norm": 0.6605305075645447, + "learning_rate": 8.546458296037304e-05, + "loss": 2.5505, + "step": 10975 + }, + { + "epoch": 0.8858042127350496, + "grad_norm": 0.7011568546295166, + "learning_rate": 8.54489639056062e-05, + "loss": 2.4381, + "step": 10976 + }, + { + "epoch": 0.8858849164716326, + "grad_norm": 0.7015339136123657, + "learning_rate": 8.543334521348916e-05, + "loss": 2.5432, + "step": 10977 + }, + { + "epoch": 0.8859656202082157, + "grad_norm": 0.6892278790473938, + "learning_rate": 8.541772688441124e-05, + "loss": 2.5286, + "step": 10978 + }, + { + "epoch": 0.8860463239447987, + "grad_norm": 0.6680187582969666, + "learning_rate": 8.540210891876168e-05, + "loss": 2.439, + "step": 10979 + }, + { + "epoch": 0.8861270276813816, + "grad_norm": 0.7043240666389465, + "learning_rate": 8.538649131692975e-05, + "loss": 2.5558, + "step": 10980 + }, + { + "epoch": 0.8862077314179646, + "grad_norm": 0.6940229535102844, + "learning_rate": 8.537087407930463e-05, + "loss": 2.5219, + "step": 10981 + }, + { + "epoch": 0.8862884351545477, + "grad_norm": 0.6571553945541382, + "learning_rate": 8.535525720627558e-05, + "loss": 2.5054, + "step": 10982 + }, + { + "epoch": 0.8863691388911307, + "grad_norm": 0.6846656203269958, + "learning_rate": 8.533964069823182e-05, + "loss": 2.497, + "step": 10983 + }, + { + "epoch": 0.8864498426277136, + "grad_norm": 0.6838627457618713, + "learning_rate": 8.53240245555625e-05, + "loss": 2.5495, + "step": 10984 + }, + { + "epoch": 0.8865305463642966, + "grad_norm": 0.6825091242790222, + "learning_rate": 8.530840877865687e-05, + "loss": 2.5656, + "step": 10985 + }, + { + "epoch": 0.8866112501008797, + "grad_norm": 0.7368674278259277, + "learning_rate": 8.529279336790414e-05, + "loss": 2.5378, + "step": 10986 + }, + { + "epoch": 0.8866919538374627, + "grad_norm": 0.7333693504333496, + "learning_rate": 8.527717832369338e-05, + "loss": 2.506, + "step": 10987 + }, + { + "epoch": 0.8867726575740457, + "grad_norm": 0.6623306274414062, + "learning_rate": 8.526156364641384e-05, + "loss": 2.4824, + "step": 10988 + }, + { + "epoch": 0.8868533613106286, + "grad_norm": 0.6863973140716553, + "learning_rate": 8.524594933645468e-05, + "loss": 2.536, + "step": 10989 + }, + { + "epoch": 0.8869340650472117, + "grad_norm": 0.6805100440979004, + "learning_rate": 8.523033539420501e-05, + "loss": 2.4954, + "step": 10990 + }, + { + "epoch": 0.8870147687837947, + "grad_norm": 0.6672216653823853, + "learning_rate": 8.521472182005399e-05, + "loss": 2.4893, + "step": 10991 + }, + { + "epoch": 0.8870954725203777, + "grad_norm": 0.7310158610343933, + "learning_rate": 8.519910861439079e-05, + "loss": 2.5317, + "step": 10992 + }, + { + "epoch": 0.8871761762569607, + "grad_norm": 0.6820743083953857, + "learning_rate": 8.518349577760445e-05, + "loss": 2.4482, + "step": 10993 + }, + { + "epoch": 0.8872568799935437, + "grad_norm": 0.6660269498825073, + "learning_rate": 8.516788331008411e-05, + "loss": 2.5353, + "step": 10994 + }, + { + "epoch": 0.8873375837301267, + "grad_norm": 0.676243007183075, + "learning_rate": 8.51522712122189e-05, + "loss": 2.531, + "step": 10995 + }, + { + "epoch": 0.8874182874667097, + "grad_norm": 0.6677152514457703, + "learning_rate": 8.513665948439796e-05, + "loss": 2.4732, + "step": 10996 + }, + { + "epoch": 0.8874989912032927, + "grad_norm": 0.7341045141220093, + "learning_rate": 8.512104812701027e-05, + "loss": 2.5668, + "step": 10997 + }, + { + "epoch": 0.8875796949398758, + "grad_norm": 0.6475326418876648, + "learning_rate": 8.510543714044496e-05, + "loss": 2.5026, + "step": 10998 + }, + { + "epoch": 0.8876603986764587, + "grad_norm": 0.7335529923439026, + "learning_rate": 8.50898265250911e-05, + "loss": 2.4946, + "step": 10999 + }, + { + "epoch": 0.8877411024130417, + "grad_norm": 0.760108232498169, + "learning_rate": 8.507421628133772e-05, + "loss": 2.5697, + "step": 11000 + }, + { + "epoch": 0.8877411024130417, + "eval_loss": 2.450413465499878, + "eval_runtime": 975.281, + "eval_samples_per_second": 2.686, + "eval_steps_per_second": 0.448, + "step": 11000 + }, + { + "epoch": 0.8878218061496247, + "grad_norm": 0.6420160531997681, + "learning_rate": 8.505860640957391e-05, + "loss": 2.5842, + "step": 11001 + }, + { + "epoch": 0.8879025098862078, + "grad_norm": 0.6625204086303711, + "learning_rate": 8.50429969101887e-05, + "loss": 2.4771, + "step": 11002 + }, + { + "epoch": 0.8879832136227908, + "grad_norm": 0.7430149912834167, + "learning_rate": 8.502738778357107e-05, + "loss": 2.5509, + "step": 11003 + }, + { + "epoch": 0.8880639173593737, + "grad_norm": 0.663624107837677, + "learning_rate": 8.501177903011008e-05, + "loss": 2.504, + "step": 11004 + }, + { + "epoch": 0.8881446210959567, + "grad_norm": 0.6638087630271912, + "learning_rate": 8.499617065019476e-05, + "loss": 2.492, + "step": 11005 + }, + { + "epoch": 0.8882253248325398, + "grad_norm": 0.7321780323982239, + "learning_rate": 8.498056264421406e-05, + "loss": 2.5808, + "step": 11006 + }, + { + "epoch": 0.8883060285691228, + "grad_norm": 0.7108619809150696, + "learning_rate": 8.4964955012557e-05, + "loss": 2.6185, + "step": 11007 + }, + { + "epoch": 0.8883867323057058, + "grad_norm": 0.6745856404304504, + "learning_rate": 8.494934775561258e-05, + "loss": 2.576, + "step": 11008 + }, + { + "epoch": 0.8884674360422887, + "grad_norm": 0.8002225756645203, + "learning_rate": 8.493374087376976e-05, + "loss": 2.5598, + "step": 11009 + }, + { + "epoch": 0.8885481397788718, + "grad_norm": 0.6848840713500977, + "learning_rate": 8.491813436741746e-05, + "loss": 2.5218, + "step": 11010 + }, + { + "epoch": 0.8886288435154548, + "grad_norm": 0.6464105248451233, + "learning_rate": 8.490252823694471e-05, + "loss": 2.5503, + "step": 11011 + }, + { + "epoch": 0.8887095472520378, + "grad_norm": 0.7165790796279907, + "learning_rate": 8.488692248274045e-05, + "loss": 2.5104, + "step": 11012 + }, + { + "epoch": 0.8887902509886207, + "grad_norm": 0.6832898259162903, + "learning_rate": 8.487131710519355e-05, + "loss": 2.5379, + "step": 11013 + }, + { + "epoch": 0.8888709547252038, + "grad_norm": 0.6992432475090027, + "learning_rate": 8.485571210469296e-05, + "loss": 2.5388, + "step": 11014 + }, + { + "epoch": 0.8889516584617868, + "grad_norm": 0.6410119533538818, + "learning_rate": 8.484010748162765e-05, + "loss": 2.5237, + "step": 11015 + }, + { + "epoch": 0.8890323621983698, + "grad_norm": 0.716248095035553, + "learning_rate": 8.482450323638647e-05, + "loss": 2.4977, + "step": 11016 + }, + { + "epoch": 0.8891130659349528, + "grad_norm": 0.6620567440986633, + "learning_rate": 8.480889936935833e-05, + "loss": 2.5088, + "step": 11017 + }, + { + "epoch": 0.8891937696715357, + "grad_norm": 0.7311015129089355, + "learning_rate": 8.479329588093217e-05, + "loss": 2.5547, + "step": 11018 + }, + { + "epoch": 0.8892744734081188, + "grad_norm": 0.757203996181488, + "learning_rate": 8.477769277149676e-05, + "loss": 2.5681, + "step": 11019 + }, + { + "epoch": 0.8893551771447018, + "grad_norm": 0.6941282153129578, + "learning_rate": 8.476209004144107e-05, + "loss": 2.5078, + "step": 11020 + }, + { + "epoch": 0.8894358808812848, + "grad_norm": 0.6381667256355286, + "learning_rate": 8.474648769115396e-05, + "loss": 2.5371, + "step": 11021 + }, + { + "epoch": 0.8895165846178678, + "grad_norm": 0.7978621125221252, + "learning_rate": 8.473088572102422e-05, + "loss": 2.5384, + "step": 11022 + }, + { + "epoch": 0.8895972883544508, + "grad_norm": 0.7229189872741699, + "learning_rate": 8.471528413144072e-05, + "loss": 2.5469, + "step": 11023 + }, + { + "epoch": 0.8896779920910338, + "grad_norm": 0.705545961856842, + "learning_rate": 8.469968292279231e-05, + "loss": 2.5281, + "step": 11024 + }, + { + "epoch": 0.8897586958276168, + "grad_norm": 0.7259972095489502, + "learning_rate": 8.468408209546777e-05, + "loss": 2.5485, + "step": 11025 + }, + { + "epoch": 0.8898393995641998, + "grad_norm": 0.6859608888626099, + "learning_rate": 8.466848164985594e-05, + "loss": 2.5548, + "step": 11026 + }, + { + "epoch": 0.8899201033007829, + "grad_norm": 0.7036644816398621, + "learning_rate": 8.465288158634565e-05, + "loss": 2.5159, + "step": 11027 + }, + { + "epoch": 0.8900008070373658, + "grad_norm": 0.6899380087852478, + "learning_rate": 8.463728190532569e-05, + "loss": 2.5037, + "step": 11028 + }, + { + "epoch": 0.8900815107739488, + "grad_norm": 0.7428410649299622, + "learning_rate": 8.462168260718477e-05, + "loss": 2.5074, + "step": 11029 + }, + { + "epoch": 0.8901622145105318, + "grad_norm": 0.6724158525466919, + "learning_rate": 8.460608369231173e-05, + "loss": 2.5544, + "step": 11030 + }, + { + "epoch": 0.8902429182471149, + "grad_norm": 0.6516450643539429, + "learning_rate": 8.459048516109535e-05, + "loss": 2.5152, + "step": 11031 + }, + { + "epoch": 0.8903236219836979, + "grad_norm": 0.7013405561447144, + "learning_rate": 8.457488701392434e-05, + "loss": 2.5116, + "step": 11032 + }, + { + "epoch": 0.8904043257202808, + "grad_norm": 0.7207479476928711, + "learning_rate": 8.455928925118747e-05, + "loss": 2.6041, + "step": 11033 + }, + { + "epoch": 0.8904850294568638, + "grad_norm": 0.69600510597229, + "learning_rate": 8.454369187327348e-05, + "loss": 2.5794, + "step": 11034 + }, + { + "epoch": 0.8905657331934469, + "grad_norm": 0.6831288933753967, + "learning_rate": 8.452809488057108e-05, + "loss": 2.4682, + "step": 11035 + }, + { + "epoch": 0.8906464369300299, + "grad_norm": 0.6978991627693176, + "learning_rate": 8.451249827346901e-05, + "loss": 2.4862, + "step": 11036 + }, + { + "epoch": 0.8907271406666128, + "grad_norm": 0.6772337555885315, + "learning_rate": 8.4496902052356e-05, + "loss": 2.5357, + "step": 11037 + }, + { + "epoch": 0.8908078444031958, + "grad_norm": 0.6735778450965881, + "learning_rate": 8.448130621762067e-05, + "loss": 2.5115, + "step": 11038 + }, + { + "epoch": 0.8908885481397789, + "grad_norm": 0.6695345044136047, + "learning_rate": 8.446571076965177e-05, + "loss": 2.5083, + "step": 11039 + }, + { + "epoch": 0.8909692518763619, + "grad_norm": 0.685343325138092, + "learning_rate": 8.445011570883796e-05, + "loss": 2.5221, + "step": 11040 + }, + { + "epoch": 0.8910499556129449, + "grad_norm": 0.7030319571495056, + "learning_rate": 8.443452103556792e-05, + "loss": 2.5708, + "step": 11041 + }, + { + "epoch": 0.8911306593495278, + "grad_norm": 0.6910343766212463, + "learning_rate": 8.441892675023029e-05, + "loss": 2.5373, + "step": 11042 + }, + { + "epoch": 0.8912113630861109, + "grad_norm": 0.7207868099212646, + "learning_rate": 8.440333285321374e-05, + "loss": 2.5862, + "step": 11043 + }, + { + "epoch": 0.8912920668226939, + "grad_norm": 0.6780788898468018, + "learning_rate": 8.438773934490692e-05, + "loss": 2.562, + "step": 11044 + }, + { + "epoch": 0.8913727705592769, + "grad_norm": 0.7010074257850647, + "learning_rate": 8.437214622569842e-05, + "loss": 2.4556, + "step": 11045 + }, + { + "epoch": 0.8914534742958599, + "grad_norm": 0.6763667464256287, + "learning_rate": 8.435655349597689e-05, + "loss": 2.5402, + "step": 11046 + }, + { + "epoch": 0.891534178032443, + "grad_norm": 0.6870944499969482, + "learning_rate": 8.4340961156131e-05, + "loss": 2.5307, + "step": 11047 + }, + { + "epoch": 0.8916148817690259, + "grad_norm": 0.7835623025894165, + "learning_rate": 8.432536920654923e-05, + "loss": 2.4974, + "step": 11048 + }, + { + "epoch": 0.8916955855056089, + "grad_norm": 0.7551318407058716, + "learning_rate": 8.430977764762024e-05, + "loss": 2.5206, + "step": 11049 + }, + { + "epoch": 0.8917762892421919, + "grad_norm": 0.6486842632293701, + "learning_rate": 8.429418647973265e-05, + "loss": 2.4909, + "step": 11050 + }, + { + "epoch": 0.891856992978775, + "grad_norm": 0.6894064545631409, + "learning_rate": 8.427859570327494e-05, + "loss": 2.5846, + "step": 11051 + }, + { + "epoch": 0.8919376967153579, + "grad_norm": 0.7597395181655884, + "learning_rate": 8.426300531863571e-05, + "loss": 2.5259, + "step": 11052 + }, + { + "epoch": 0.8920184004519409, + "grad_norm": 0.6784652471542358, + "learning_rate": 8.42474153262036e-05, + "loss": 2.5048, + "step": 11053 + }, + { + "epoch": 0.8920991041885239, + "grad_norm": 0.7703847885131836, + "learning_rate": 8.4231825726367e-05, + "loss": 2.4962, + "step": 11054 + }, + { + "epoch": 0.892179807925107, + "grad_norm": 0.6646561026573181, + "learning_rate": 8.421623651951454e-05, + "loss": 2.491, + "step": 11055 + }, + { + "epoch": 0.89226051166169, + "grad_norm": 0.6901054978370667, + "learning_rate": 8.420064770603475e-05, + "loss": 2.515, + "step": 11056 + }, + { + "epoch": 0.8923412153982729, + "grad_norm": 0.6789328455924988, + "learning_rate": 8.41850592863161e-05, + "loss": 2.5481, + "step": 11057 + }, + { + "epoch": 0.8924219191348559, + "grad_norm": 0.6211017370223999, + "learning_rate": 8.41694712607471e-05, + "loss": 2.51, + "step": 11058 + }, + { + "epoch": 0.892502622871439, + "grad_norm": 0.6482260823249817, + "learning_rate": 8.415388362971626e-05, + "loss": 2.5418, + "step": 11059 + }, + { + "epoch": 0.892583326608022, + "grad_norm": 0.7627651691436768, + "learning_rate": 8.413829639361209e-05, + "loss": 2.5033, + "step": 11060 + }, + { + "epoch": 0.892664030344605, + "grad_norm": 0.6560852527618408, + "learning_rate": 8.412270955282302e-05, + "loss": 2.5442, + "step": 11061 + }, + { + "epoch": 0.8927447340811879, + "grad_norm": 0.7479087114334106, + "learning_rate": 8.410712310773752e-05, + "loss": 2.5189, + "step": 11062 + }, + { + "epoch": 0.892825437817771, + "grad_norm": 0.6970879435539246, + "learning_rate": 8.409153705874411e-05, + "loss": 2.5418, + "step": 11063 + }, + { + "epoch": 0.892906141554354, + "grad_norm": 0.6514548659324646, + "learning_rate": 8.407595140623113e-05, + "loss": 2.5277, + "step": 11064 + }, + { + "epoch": 0.892986845290937, + "grad_norm": 0.6745554804801941, + "learning_rate": 8.406036615058707e-05, + "loss": 2.5085, + "step": 11065 + }, + { + "epoch": 0.89306754902752, + "grad_norm": 0.7510363459587097, + "learning_rate": 8.404478129220037e-05, + "loss": 2.4941, + "step": 11066 + }, + { + "epoch": 0.8931482527641029, + "grad_norm": 0.6531470417976379, + "learning_rate": 8.402919683145941e-05, + "loss": 2.5363, + "step": 11067 + }, + { + "epoch": 0.893228956500686, + "grad_norm": 0.6861493587493896, + "learning_rate": 8.401361276875262e-05, + "loss": 2.6369, + "step": 11068 + }, + { + "epoch": 0.893309660237269, + "grad_norm": 0.6029497981071472, + "learning_rate": 8.39980291044684e-05, + "loss": 2.4953, + "step": 11069 + }, + { + "epoch": 0.893390363973852, + "grad_norm": 0.6831715106964111, + "learning_rate": 8.39824458389951e-05, + "loss": 2.5074, + "step": 11070 + }, + { + "epoch": 0.8934710677104349, + "grad_norm": 0.7076299786567688, + "learning_rate": 8.396686297272112e-05, + "loss": 2.5934, + "step": 11071 + }, + { + "epoch": 0.893551771447018, + "grad_norm": 0.6941438913345337, + "learning_rate": 8.395128050603487e-05, + "loss": 2.5338, + "step": 11072 + }, + { + "epoch": 0.893632475183601, + "grad_norm": 0.6867249011993408, + "learning_rate": 8.393569843932463e-05, + "loss": 2.5311, + "step": 11073 + }, + { + "epoch": 0.893713178920184, + "grad_norm": 0.623991847038269, + "learning_rate": 8.392011677297877e-05, + "loss": 2.5133, + "step": 11074 + }, + { + "epoch": 0.893793882656767, + "grad_norm": 0.6808422803878784, + "learning_rate": 8.390453550738564e-05, + "loss": 2.5398, + "step": 11075 + }, + { + "epoch": 0.89387458639335, + "grad_norm": 0.7136701345443726, + "learning_rate": 8.388895464293357e-05, + "loss": 2.5415, + "step": 11076 + }, + { + "epoch": 0.893955290129933, + "grad_norm": 0.6814287304878235, + "learning_rate": 8.387337418001084e-05, + "loss": 2.4782, + "step": 11077 + }, + { + "epoch": 0.894035993866516, + "grad_norm": 0.8101940155029297, + "learning_rate": 8.385779411900579e-05, + "loss": 2.5292, + "step": 11078 + }, + { + "epoch": 0.894116697603099, + "grad_norm": 0.7106796503067017, + "learning_rate": 8.384221446030676e-05, + "loss": 2.5819, + "step": 11079 + }, + { + "epoch": 0.8941974013396821, + "grad_norm": 0.7840015292167664, + "learning_rate": 8.382663520430191e-05, + "loss": 2.5243, + "step": 11080 + }, + { + "epoch": 0.894278105076265, + "grad_norm": 0.7037288546562195, + "learning_rate": 8.381105635137959e-05, + "loss": 2.5606, + "step": 11081 + }, + { + "epoch": 0.894358808812848, + "grad_norm": 0.671558678150177, + "learning_rate": 8.379547790192812e-05, + "loss": 2.4923, + "step": 11082 + }, + { + "epoch": 0.894439512549431, + "grad_norm": 0.6789675951004028, + "learning_rate": 8.377989985633567e-05, + "loss": 2.5281, + "step": 11083 + }, + { + "epoch": 0.8945202162860141, + "grad_norm": 0.6777840852737427, + "learning_rate": 8.37643222149905e-05, + "loss": 2.5159, + "step": 11084 + }, + { + "epoch": 0.8946009200225971, + "grad_norm": 0.6920693516731262, + "learning_rate": 8.374874497828089e-05, + "loss": 2.4952, + "step": 11085 + }, + { + "epoch": 0.89468162375918, + "grad_norm": 0.7394022941589355, + "learning_rate": 8.373316814659502e-05, + "loss": 2.5035, + "step": 11086 + }, + { + "epoch": 0.894762327495763, + "grad_norm": 0.625960648059845, + "learning_rate": 8.37175917203211e-05, + "loss": 2.5324, + "step": 11087 + }, + { + "epoch": 0.8948430312323461, + "grad_norm": 0.6848758459091187, + "learning_rate": 8.370201569984742e-05, + "loss": 2.5312, + "step": 11088 + }, + { + "epoch": 0.8949237349689291, + "grad_norm": 0.7207037210464478, + "learning_rate": 8.368644008556205e-05, + "loss": 2.5807, + "step": 11089 + }, + { + "epoch": 0.895004438705512, + "grad_norm": 0.7582261562347412, + "learning_rate": 8.367086487785326e-05, + "loss": 2.532, + "step": 11090 + }, + { + "epoch": 0.895085142442095, + "grad_norm": 0.6916806101799011, + "learning_rate": 8.36552900771092e-05, + "loss": 2.4772, + "step": 11091 + }, + { + "epoch": 0.8951658461786781, + "grad_norm": 0.6457386016845703, + "learning_rate": 8.363971568371805e-05, + "loss": 2.4952, + "step": 11092 + }, + { + "epoch": 0.8952465499152611, + "grad_norm": 0.7006754279136658, + "learning_rate": 8.362414169806792e-05, + "loss": 2.5818, + "step": 11093 + }, + { + "epoch": 0.8953272536518441, + "grad_norm": 0.6939932703971863, + "learning_rate": 8.3608568120547e-05, + "loss": 2.5411, + "step": 11094 + }, + { + "epoch": 0.895407957388427, + "grad_norm": 0.6314546465873718, + "learning_rate": 8.359299495154343e-05, + "loss": 2.5408, + "step": 11095 + }, + { + "epoch": 0.8954886611250101, + "grad_norm": 0.7202826738357544, + "learning_rate": 8.357742219144529e-05, + "loss": 2.4925, + "step": 11096 + }, + { + "epoch": 0.8955693648615931, + "grad_norm": 0.6475295424461365, + "learning_rate": 8.356184984064071e-05, + "loss": 2.5023, + "step": 11097 + }, + { + "epoch": 0.8956500685981761, + "grad_norm": 0.6161238551139832, + "learning_rate": 8.354627789951785e-05, + "loss": 2.5053, + "step": 11098 + }, + { + "epoch": 0.8957307723347591, + "grad_norm": 0.6919825077056885, + "learning_rate": 8.353070636846472e-05, + "loss": 2.5387, + "step": 11099 + }, + { + "epoch": 0.8958114760713421, + "grad_norm": 0.6374878883361816, + "learning_rate": 8.351513524786944e-05, + "loss": 2.5526, + "step": 11100 + }, + { + "epoch": 0.8958921798079251, + "grad_norm": 0.7041093707084656, + "learning_rate": 8.349956453812009e-05, + "loss": 2.5282, + "step": 11101 + }, + { + "epoch": 0.8959728835445081, + "grad_norm": 0.7252324819564819, + "learning_rate": 8.348399423960471e-05, + "loss": 2.5723, + "step": 11102 + }, + { + "epoch": 0.8960535872810911, + "grad_norm": 0.681682825088501, + "learning_rate": 8.346842435271137e-05, + "loss": 2.5284, + "step": 11103 + }, + { + "epoch": 0.8961342910176742, + "grad_norm": 0.7293850183486938, + "learning_rate": 8.34528548778281e-05, + "loss": 2.5014, + "step": 11104 + }, + { + "epoch": 0.8962149947542571, + "grad_norm": 0.7057846188545227, + "learning_rate": 8.343728581534299e-05, + "loss": 2.5502, + "step": 11105 + }, + { + "epoch": 0.8962956984908401, + "grad_norm": 0.6740830540657043, + "learning_rate": 8.342171716564398e-05, + "loss": 2.5205, + "step": 11106 + }, + { + "epoch": 0.8963764022274231, + "grad_norm": 0.6917470097541809, + "learning_rate": 8.340614892911907e-05, + "loss": 2.5216, + "step": 11107 + }, + { + "epoch": 0.8964571059640062, + "grad_norm": 0.7495635151863098, + "learning_rate": 8.339058110615638e-05, + "loss": 2.5509, + "step": 11108 + }, + { + "epoch": 0.8965378097005892, + "grad_norm": 0.6687765717506409, + "learning_rate": 8.33750136971438e-05, + "loss": 2.5286, + "step": 11109 + }, + { + "epoch": 0.8966185134371721, + "grad_norm": 0.6901381015777588, + "learning_rate": 8.335944670246931e-05, + "loss": 2.5545, + "step": 11110 + }, + { + "epoch": 0.8966992171737551, + "grad_norm": 0.6645506024360657, + "learning_rate": 8.334388012252094e-05, + "loss": 2.4883, + "step": 11111 + }, + { + "epoch": 0.8967799209103382, + "grad_norm": 0.6427997350692749, + "learning_rate": 8.332831395768662e-05, + "loss": 2.5103, + "step": 11112 + }, + { + "epoch": 0.8968606246469212, + "grad_norm": 0.7224035263061523, + "learning_rate": 8.331274820835425e-05, + "loss": 2.5086, + "step": 11113 + }, + { + "epoch": 0.8969413283835042, + "grad_norm": 0.6918233036994934, + "learning_rate": 8.329718287491188e-05, + "loss": 2.5222, + "step": 11114 + }, + { + "epoch": 0.8970220321200871, + "grad_norm": 0.735583484172821, + "learning_rate": 8.328161795774734e-05, + "loss": 2.5277, + "step": 11115 + }, + { + "epoch": 0.8971027358566702, + "grad_norm": 0.6624864339828491, + "learning_rate": 8.326605345724857e-05, + "loss": 2.532, + "step": 11116 + }, + { + "epoch": 0.8971834395932532, + "grad_norm": 0.6227770447731018, + "learning_rate": 8.325048937380352e-05, + "loss": 2.5386, + "step": 11117 + }, + { + "epoch": 0.8972641433298362, + "grad_norm": 0.6483022570610046, + "learning_rate": 8.323492570780004e-05, + "loss": 2.4958, + "step": 11118 + }, + { + "epoch": 0.8973448470664191, + "grad_norm": 0.7072618007659912, + "learning_rate": 8.321936245962602e-05, + "loss": 2.4931, + "step": 11119 + }, + { + "epoch": 0.8974255508030021, + "grad_norm": 0.6848764419555664, + "learning_rate": 8.320379962966937e-05, + "loss": 2.4549, + "step": 11120 + }, + { + "epoch": 0.8975062545395852, + "grad_norm": 0.6819620132446289, + "learning_rate": 8.318823721831795e-05, + "loss": 2.5156, + "step": 11121 + }, + { + "epoch": 0.8975869582761682, + "grad_norm": 0.6834476590156555, + "learning_rate": 8.31726752259596e-05, + "loss": 2.507, + "step": 11122 + }, + { + "epoch": 0.8976676620127512, + "grad_norm": 0.6785772442817688, + "learning_rate": 8.315711365298214e-05, + "loss": 2.5086, + "step": 11123 + }, + { + "epoch": 0.8977483657493341, + "grad_norm": 0.6303566098213196, + "learning_rate": 8.314155249977351e-05, + "loss": 2.5087, + "step": 11124 + }, + { + "epoch": 0.8978290694859172, + "grad_norm": 0.6544361710548401, + "learning_rate": 8.31259917667214e-05, + "loss": 2.505, + "step": 11125 + }, + { + "epoch": 0.8979097732225002, + "grad_norm": 0.8135818243026733, + "learning_rate": 8.311043145421369e-05, + "loss": 2.5139, + "step": 11126 + }, + { + "epoch": 0.8979904769590832, + "grad_norm": 0.6744341254234314, + "learning_rate": 8.309487156263818e-05, + "loss": 2.4797, + "step": 11127 + }, + { + "epoch": 0.8980711806956662, + "grad_norm": 0.6138790845870972, + "learning_rate": 8.307931209238267e-05, + "loss": 2.5334, + "step": 11128 + }, + { + "epoch": 0.8981518844322492, + "grad_norm": 0.702434241771698, + "learning_rate": 8.306375304383492e-05, + "loss": 2.5343, + "step": 11129 + }, + { + "epoch": 0.8982325881688322, + "grad_norm": 0.6787155270576477, + "learning_rate": 8.304819441738275e-05, + "loss": 2.507, + "step": 11130 + }, + { + "epoch": 0.8983132919054152, + "grad_norm": 0.6963719129562378, + "learning_rate": 8.303263621341386e-05, + "loss": 2.5238, + "step": 11131 + }, + { + "epoch": 0.8983939956419982, + "grad_norm": 0.6623271107673645, + "learning_rate": 8.3017078432316e-05, + "loss": 2.5206, + "step": 11132 + }, + { + "epoch": 0.8984746993785813, + "grad_norm": 0.777222752571106, + "learning_rate": 8.300152107447701e-05, + "loss": 2.5004, + "step": 11133 + }, + { + "epoch": 0.8985554031151642, + "grad_norm": 0.6788455247879028, + "learning_rate": 8.29859641402845e-05, + "loss": 2.5735, + "step": 11134 + }, + { + "epoch": 0.8986361068517472, + "grad_norm": 0.6595063209533691, + "learning_rate": 8.297040763012624e-05, + "loss": 2.4988, + "step": 11135 + }, + { + "epoch": 0.8987168105883302, + "grad_norm": 0.7105697989463806, + "learning_rate": 8.295485154438994e-05, + "loss": 2.5531, + "step": 11136 + }, + { + "epoch": 0.8987975143249133, + "grad_norm": 0.6884949803352356, + "learning_rate": 8.29392958834633e-05, + "loss": 2.5158, + "step": 11137 + }, + { + "epoch": 0.8988782180614963, + "grad_norm": 0.7178345322608948, + "learning_rate": 8.2923740647734e-05, + "loss": 2.5836, + "step": 11138 + }, + { + "epoch": 0.8989589217980792, + "grad_norm": 0.7000541687011719, + "learning_rate": 8.290818583758973e-05, + "loss": 2.5345, + "step": 11139 + }, + { + "epoch": 0.8990396255346622, + "grad_norm": 0.6808128952980042, + "learning_rate": 8.289263145341816e-05, + "loss": 2.5227, + "step": 11140 + }, + { + "epoch": 0.8991203292712453, + "grad_norm": 0.7047473788261414, + "learning_rate": 8.287707749560691e-05, + "loss": 2.477, + "step": 11141 + }, + { + "epoch": 0.8992010330078283, + "grad_norm": 0.6654812693595886, + "learning_rate": 8.286152396454365e-05, + "loss": 2.4575, + "step": 11142 + }, + { + "epoch": 0.8992817367444113, + "grad_norm": 0.6690360307693481, + "learning_rate": 8.284597086061603e-05, + "loss": 2.4755, + "step": 11143 + }, + { + "epoch": 0.8993624404809942, + "grad_norm": 0.7270147204399109, + "learning_rate": 8.283041818421164e-05, + "loss": 2.5893, + "step": 11144 + }, + { + "epoch": 0.8994431442175773, + "grad_norm": 0.5977498888969421, + "learning_rate": 8.28148659357181e-05, + "loss": 2.5108, + "step": 11145 + }, + { + "epoch": 0.8995238479541603, + "grad_norm": 0.694593071937561, + "learning_rate": 8.279931411552307e-05, + "loss": 2.5036, + "step": 11146 + }, + { + "epoch": 0.8996045516907433, + "grad_norm": 0.7395440936088562, + "learning_rate": 8.278376272401404e-05, + "loss": 2.5244, + "step": 11147 + }, + { + "epoch": 0.8996852554273262, + "grad_norm": 0.6483517289161682, + "learning_rate": 8.276821176157867e-05, + "loss": 2.5619, + "step": 11148 + }, + { + "epoch": 0.8997659591639093, + "grad_norm": 0.6996768116950989, + "learning_rate": 8.275266122860454e-05, + "loss": 2.5275, + "step": 11149 + }, + { + "epoch": 0.8998466629004923, + "grad_norm": 0.661122739315033, + "learning_rate": 8.273711112547914e-05, + "loss": 2.5053, + "step": 11150 + }, + { + "epoch": 0.8999273666370753, + "grad_norm": 0.6919111609458923, + "learning_rate": 8.272156145259006e-05, + "loss": 2.578, + "step": 11151 + }, + { + "epoch": 0.9000080703736583, + "grad_norm": 0.6680958867073059, + "learning_rate": 8.270601221032482e-05, + "loss": 2.4942, + "step": 11152 + }, + { + "epoch": 0.9000887741102414, + "grad_norm": 0.6782989501953125, + "learning_rate": 8.269046339907101e-05, + "loss": 2.5461, + "step": 11153 + }, + { + "epoch": 0.9001694778468243, + "grad_norm": 0.743468165397644, + "learning_rate": 8.267491501921605e-05, + "loss": 2.629, + "step": 11154 + }, + { + "epoch": 0.9002501815834073, + "grad_norm": 0.709562361240387, + "learning_rate": 8.265936707114751e-05, + "loss": 2.566, + "step": 11155 + }, + { + "epoch": 0.9003308853199903, + "grad_norm": 0.7075676918029785, + "learning_rate": 8.264381955525291e-05, + "loss": 2.5409, + "step": 11156 + }, + { + "epoch": 0.9004115890565734, + "grad_norm": 0.7021335959434509, + "learning_rate": 8.262827247191963e-05, + "loss": 2.5606, + "step": 11157 + }, + { + "epoch": 0.9004922927931563, + "grad_norm": 0.6507331132888794, + "learning_rate": 8.261272582153524e-05, + "loss": 2.5557, + "step": 11158 + }, + { + "epoch": 0.9005729965297393, + "grad_norm": 0.7182760238647461, + "learning_rate": 8.25971796044872e-05, + "loss": 2.5567, + "step": 11159 + }, + { + "epoch": 0.9006537002663223, + "grad_norm": 0.6632338762283325, + "learning_rate": 8.258163382116291e-05, + "loss": 2.5081, + "step": 11160 + }, + { + "epoch": 0.9007344040029054, + "grad_norm": 0.6889928579330444, + "learning_rate": 8.256608847194983e-05, + "loss": 2.5034, + "step": 11161 + }, + { + "epoch": 0.9008151077394884, + "grad_norm": 0.6374824047088623, + "learning_rate": 8.255054355723542e-05, + "loss": 2.4826, + "step": 11162 + }, + { + "epoch": 0.9008958114760713, + "grad_norm": 0.7100771069526672, + "learning_rate": 8.253499907740706e-05, + "loss": 2.4666, + "step": 11163 + }, + { + "epoch": 0.9009765152126543, + "grad_norm": 0.8141123652458191, + "learning_rate": 8.251945503285218e-05, + "loss": 2.5339, + "step": 11164 + }, + { + "epoch": 0.9010572189492374, + "grad_norm": 0.6621670722961426, + "learning_rate": 8.250391142395822e-05, + "loss": 2.4805, + "step": 11165 + }, + { + "epoch": 0.9011379226858204, + "grad_norm": 0.6624772548675537, + "learning_rate": 8.248836825111245e-05, + "loss": 2.5148, + "step": 11166 + }, + { + "epoch": 0.9012186264224034, + "grad_norm": 0.6783565282821655, + "learning_rate": 8.247282551470235e-05, + "loss": 2.4481, + "step": 11167 + }, + { + "epoch": 0.9012993301589863, + "grad_norm": 0.700089156627655, + "learning_rate": 8.245728321511525e-05, + "loss": 2.5649, + "step": 11168 + }, + { + "epoch": 0.9013800338955693, + "grad_norm": 0.6765339970588684, + "learning_rate": 8.244174135273852e-05, + "loss": 2.5221, + "step": 11169 + }, + { + "epoch": 0.9014607376321524, + "grad_norm": 0.6896056532859802, + "learning_rate": 8.242619992795948e-05, + "loss": 2.4742, + "step": 11170 + }, + { + "epoch": 0.9015414413687354, + "grad_norm": 0.7134374976158142, + "learning_rate": 8.241065894116547e-05, + "loss": 2.5231, + "step": 11171 + }, + { + "epoch": 0.9016221451053184, + "grad_norm": 0.6939442753791809, + "learning_rate": 8.239511839274385e-05, + "loss": 2.5159, + "step": 11172 + }, + { + "epoch": 0.9017028488419013, + "grad_norm": 0.6780345439910889, + "learning_rate": 8.237957828308187e-05, + "loss": 2.5474, + "step": 11173 + }, + { + "epoch": 0.9017835525784844, + "grad_norm": 0.6532382965087891, + "learning_rate": 8.236403861256687e-05, + "loss": 2.4982, + "step": 11174 + }, + { + "epoch": 0.9018642563150674, + "grad_norm": 0.6918137073516846, + "learning_rate": 8.234849938158615e-05, + "loss": 2.4657, + "step": 11175 + }, + { + "epoch": 0.9019449600516504, + "grad_norm": 0.6838762164115906, + "learning_rate": 8.233296059052695e-05, + "loss": 2.5405, + "step": 11176 + }, + { + "epoch": 0.9020256637882333, + "grad_norm": 0.7560290098190308, + "learning_rate": 8.231742223977653e-05, + "loss": 2.5379, + "step": 11177 + }, + { + "epoch": 0.9021063675248164, + "grad_norm": 0.6673319339752197, + "learning_rate": 8.230188432972221e-05, + "loss": 2.4669, + "step": 11178 + }, + { + "epoch": 0.9021870712613994, + "grad_norm": 0.7486294507980347, + "learning_rate": 8.228634686075116e-05, + "loss": 2.526, + "step": 11179 + }, + { + "epoch": 0.9022677749979824, + "grad_norm": 0.7012811303138733, + "learning_rate": 8.227080983325067e-05, + "loss": 2.5544, + "step": 11180 + }, + { + "epoch": 0.9023484787345654, + "grad_norm": 0.6807447075843811, + "learning_rate": 8.225527324760796e-05, + "loss": 2.5139, + "step": 11181 + }, + { + "epoch": 0.9024291824711484, + "grad_norm": 0.7594932317733765, + "learning_rate": 8.223973710421018e-05, + "loss": 2.539, + "step": 11182 + }, + { + "epoch": 0.9025098862077314, + "grad_norm": 0.6764204502105713, + "learning_rate": 8.22242014034446e-05, + "loss": 2.6128, + "step": 11183 + }, + { + "epoch": 0.9025905899443144, + "grad_norm": 0.6499967575073242, + "learning_rate": 8.220866614569837e-05, + "loss": 2.5459, + "step": 11184 + }, + { + "epoch": 0.9026712936808974, + "grad_norm": 0.673076331615448, + "learning_rate": 8.219313133135876e-05, + "loss": 2.5852, + "step": 11185 + }, + { + "epoch": 0.9027519974174805, + "grad_norm": 0.784854531288147, + "learning_rate": 8.21775969608128e-05, + "loss": 2.5586, + "step": 11186 + }, + { + "epoch": 0.9028327011540634, + "grad_norm": 0.658963680267334, + "learning_rate": 8.216206303444771e-05, + "loss": 2.4376, + "step": 11187 + }, + { + "epoch": 0.9029134048906464, + "grad_norm": 0.6456249356269836, + "learning_rate": 8.214652955265067e-05, + "loss": 2.5166, + "step": 11188 + }, + { + "epoch": 0.9029941086272294, + "grad_norm": 0.6940007209777832, + "learning_rate": 8.213099651580874e-05, + "loss": 2.4992, + "step": 11189 + }, + { + "epoch": 0.9030748123638125, + "grad_norm": 0.6661425828933716, + "learning_rate": 8.211546392430911e-05, + "loss": 2.5177, + "step": 11190 + }, + { + "epoch": 0.9031555161003955, + "grad_norm": 0.647834300994873, + "learning_rate": 8.20999317785389e-05, + "loss": 2.4666, + "step": 11191 + }, + { + "epoch": 0.9032362198369784, + "grad_norm": 0.7673383355140686, + "learning_rate": 8.208440007888515e-05, + "loss": 2.4852, + "step": 11192 + }, + { + "epoch": 0.9033169235735614, + "grad_norm": 0.7033390998840332, + "learning_rate": 8.206886882573498e-05, + "loss": 2.5549, + "step": 11193 + }, + { + "epoch": 0.9033976273101445, + "grad_norm": 0.6871141195297241, + "learning_rate": 8.205333801947548e-05, + "loss": 2.4585, + "step": 11194 + }, + { + "epoch": 0.9034783310467275, + "grad_norm": 0.7201984524726868, + "learning_rate": 8.20378076604937e-05, + "loss": 2.5271, + "step": 11195 + }, + { + "epoch": 0.9035590347833105, + "grad_norm": 0.704060971736908, + "learning_rate": 8.202227774917671e-05, + "loss": 2.4915, + "step": 11196 + }, + { + "epoch": 0.9036397385198934, + "grad_norm": 0.6833879947662354, + "learning_rate": 8.200674828591156e-05, + "loss": 2.4496, + "step": 11197 + }, + { + "epoch": 0.9037204422564765, + "grad_norm": 0.6564866304397583, + "learning_rate": 8.199121927108527e-05, + "loss": 2.4818, + "step": 11198 + }, + { + "epoch": 0.9038011459930595, + "grad_norm": 0.6970151662826538, + "learning_rate": 8.197569070508486e-05, + "loss": 2.5812, + "step": 11199 + }, + { + "epoch": 0.9038818497296425, + "grad_norm": 0.7147194743156433, + "learning_rate": 8.196016258829737e-05, + "loss": 2.5543, + "step": 11200 + }, + { + "epoch": 0.9039625534662254, + "grad_norm": 0.6357648968696594, + "learning_rate": 8.194463492110981e-05, + "loss": 2.5254, + "step": 11201 + }, + { + "epoch": 0.9040432572028085, + "grad_norm": 0.7113756537437439, + "learning_rate": 8.19291077039091e-05, + "loss": 2.5179, + "step": 11202 + }, + { + "epoch": 0.9041239609393915, + "grad_norm": 0.7252987623214722, + "learning_rate": 8.191358093708228e-05, + "loss": 2.5658, + "step": 11203 + }, + { + "epoch": 0.9042046646759745, + "grad_norm": 0.7095803618431091, + "learning_rate": 8.189805462101631e-05, + "loss": 2.583, + "step": 11204 + }, + { + "epoch": 0.9042853684125575, + "grad_norm": 0.7447760105133057, + "learning_rate": 8.188252875609812e-05, + "loss": 2.5608, + "step": 11205 + }, + { + "epoch": 0.9043660721491406, + "grad_norm": 0.6578439474105835, + "learning_rate": 8.186700334271468e-05, + "loss": 2.508, + "step": 11206 + }, + { + "epoch": 0.9044467758857235, + "grad_norm": 0.6776832938194275, + "learning_rate": 8.185147838125296e-05, + "loss": 2.6188, + "step": 11207 + }, + { + "epoch": 0.9045274796223065, + "grad_norm": 0.6559253931045532, + "learning_rate": 8.183595387209976e-05, + "loss": 2.5307, + "step": 11208 + }, + { + "epoch": 0.9046081833588895, + "grad_norm": 0.7078405022621155, + "learning_rate": 8.18204298156421e-05, + "loss": 2.5545, + "step": 11209 + }, + { + "epoch": 0.9046888870954726, + "grad_norm": 0.6790273189544678, + "learning_rate": 8.18049062122669e-05, + "loss": 2.4963, + "step": 11210 + }, + { + "epoch": 0.9047695908320555, + "grad_norm": 0.6888250708580017, + "learning_rate": 8.178938306236095e-05, + "loss": 2.5108, + "step": 11211 + }, + { + "epoch": 0.9048502945686385, + "grad_norm": 0.6438474059104919, + "learning_rate": 8.177386036631119e-05, + "loss": 2.4976, + "step": 11212 + }, + { + "epoch": 0.9049309983052215, + "grad_norm": 0.6786646842956543, + "learning_rate": 8.175833812450445e-05, + "loss": 2.4584, + "step": 11213 + }, + { + "epoch": 0.9050117020418046, + "grad_norm": 0.6480324268341064, + "learning_rate": 8.174281633732764e-05, + "loss": 2.5021, + "step": 11214 + }, + { + "epoch": 0.9050924057783876, + "grad_norm": 0.7232171893119812, + "learning_rate": 8.172729500516756e-05, + "loss": 2.4742, + "step": 11215 + }, + { + "epoch": 0.9051731095149705, + "grad_norm": 0.7048845291137695, + "learning_rate": 8.171177412841105e-05, + "loss": 2.518, + "step": 11216 + }, + { + "epoch": 0.9052538132515535, + "grad_norm": 0.6363180875778198, + "learning_rate": 8.169625370744496e-05, + "loss": 2.5154, + "step": 11217 + }, + { + "epoch": 0.9053345169881366, + "grad_norm": 0.7176045179367065, + "learning_rate": 8.168073374265605e-05, + "loss": 2.5182, + "step": 11218 + }, + { + "epoch": 0.9054152207247196, + "grad_norm": 0.7011643052101135, + "learning_rate": 8.166521423443112e-05, + "loss": 2.5615, + "step": 11219 + }, + { + "epoch": 0.9054959244613026, + "grad_norm": 0.6853327751159668, + "learning_rate": 8.164969518315704e-05, + "loss": 2.5057, + "step": 11220 + }, + { + "epoch": 0.9055766281978855, + "grad_norm": 0.6972528696060181, + "learning_rate": 8.163417658922049e-05, + "loss": 2.4949, + "step": 11221 + }, + { + "epoch": 0.9056573319344685, + "grad_norm": 0.6780978441238403, + "learning_rate": 8.161865845300824e-05, + "loss": 2.5601, + "step": 11222 + }, + { + "epoch": 0.9057380356710516, + "grad_norm": 0.6454098224639893, + "learning_rate": 8.160314077490711e-05, + "loss": 2.4203, + "step": 11223 + }, + { + "epoch": 0.9058187394076346, + "grad_norm": 0.7300907969474792, + "learning_rate": 8.158762355530378e-05, + "loss": 2.4818, + "step": 11224 + }, + { + "epoch": 0.9058994431442176, + "grad_norm": 0.682475745677948, + "learning_rate": 8.1572106794585e-05, + "loss": 2.4852, + "step": 11225 + }, + { + "epoch": 0.9059801468808005, + "grad_norm": 0.6666192412376404, + "learning_rate": 8.155659049313754e-05, + "loss": 2.5642, + "step": 11226 + }, + { + "epoch": 0.9060608506173836, + "grad_norm": 0.6873177886009216, + "learning_rate": 8.154107465134801e-05, + "loss": 2.5163, + "step": 11227 + }, + { + "epoch": 0.9061415543539666, + "grad_norm": 0.6704845428466797, + "learning_rate": 8.152555926960315e-05, + "loss": 2.5481, + "step": 11228 + }, + { + "epoch": 0.9062222580905496, + "grad_norm": 0.6340618133544922, + "learning_rate": 8.151004434828963e-05, + "loss": 2.4701, + "step": 11229 + }, + { + "epoch": 0.9063029618271325, + "grad_norm": 0.7886226177215576, + "learning_rate": 8.14945298877942e-05, + "loss": 2.5322, + "step": 11230 + }, + { + "epoch": 0.9063836655637156, + "grad_norm": 0.7086018919944763, + "learning_rate": 8.14790158885034e-05, + "loss": 2.4909, + "step": 11231 + }, + { + "epoch": 0.9064643693002986, + "grad_norm": 0.6791329979896545, + "learning_rate": 8.146350235080396e-05, + "loss": 2.4438, + "step": 11232 + }, + { + "epoch": 0.9065450730368816, + "grad_norm": 0.7070720791816711, + "learning_rate": 8.14479892750825e-05, + "loss": 2.528, + "step": 11233 + }, + { + "epoch": 0.9066257767734646, + "grad_norm": 0.6551348567008972, + "learning_rate": 8.143247666172564e-05, + "loss": 2.4747, + "step": 11234 + }, + { + "epoch": 0.9067064805100477, + "grad_norm": 0.6691645979881287, + "learning_rate": 8.141696451111997e-05, + "loss": 2.5038, + "step": 11235 + }, + { + "epoch": 0.9067871842466306, + "grad_norm": 0.6814864277839661, + "learning_rate": 8.14014528236522e-05, + "loss": 2.5737, + "step": 11236 + }, + { + "epoch": 0.9068678879832136, + "grad_norm": 0.7442377209663391, + "learning_rate": 8.138594159970877e-05, + "loss": 2.5839, + "step": 11237 + }, + { + "epoch": 0.9069485917197966, + "grad_norm": 0.6861338019371033, + "learning_rate": 8.137043083967634e-05, + "loss": 2.567, + "step": 11238 + }, + { + "epoch": 0.9070292954563797, + "grad_norm": 0.7056479454040527, + "learning_rate": 8.135492054394151e-05, + "loss": 2.5297, + "step": 11239 + }, + { + "epoch": 0.9071099991929626, + "grad_norm": 0.7166962623596191, + "learning_rate": 8.133941071289076e-05, + "loss": 2.4834, + "step": 11240 + }, + { + "epoch": 0.9071907029295456, + "grad_norm": 0.6285616159439087, + "learning_rate": 8.132390134691068e-05, + "loss": 2.5066, + "step": 11241 + }, + { + "epoch": 0.9072714066661286, + "grad_norm": 0.681915283203125, + "learning_rate": 8.130839244638783e-05, + "loss": 2.5387, + "step": 11242 + }, + { + "epoch": 0.9073521104027117, + "grad_norm": 0.6876898407936096, + "learning_rate": 8.129288401170866e-05, + "loss": 2.4465, + "step": 11243 + }, + { + "epoch": 0.9074328141392947, + "grad_norm": 0.657132625579834, + "learning_rate": 8.127737604325975e-05, + "loss": 2.499, + "step": 11244 + }, + { + "epoch": 0.9075135178758776, + "grad_norm": 0.6678825616836548, + "learning_rate": 8.126186854142752e-05, + "loss": 2.4872, + "step": 11245 + }, + { + "epoch": 0.9075942216124606, + "grad_norm": 0.7296879291534424, + "learning_rate": 8.124636150659858e-05, + "loss": 2.4783, + "step": 11246 + }, + { + "epoch": 0.9076749253490437, + "grad_norm": 0.7087056040763855, + "learning_rate": 8.12308549391593e-05, + "loss": 2.507, + "step": 11247 + }, + { + "epoch": 0.9077556290856267, + "grad_norm": 0.7099738121032715, + "learning_rate": 8.121534883949616e-05, + "loss": 2.5317, + "step": 11248 + }, + { + "epoch": 0.9078363328222097, + "grad_norm": 0.6421170830726624, + "learning_rate": 8.119984320799566e-05, + "loss": 2.5291, + "step": 11249 + }, + { + "epoch": 0.9079170365587926, + "grad_norm": 0.6835018396377563, + "learning_rate": 8.11843380450442e-05, + "loss": 2.5523, + "step": 11250 + }, + { + "epoch": 0.9079977402953757, + "grad_norm": 0.6638229489326477, + "learning_rate": 8.11688333510282e-05, + "loss": 2.5128, + "step": 11251 + }, + { + "epoch": 0.9080784440319587, + "grad_norm": 0.6783459186553955, + "learning_rate": 8.115332912633415e-05, + "loss": 2.5485, + "step": 11252 + }, + { + "epoch": 0.9081591477685417, + "grad_norm": 0.65911865234375, + "learning_rate": 8.113782537134838e-05, + "loss": 2.5408, + "step": 11253 + }, + { + "epoch": 0.9082398515051247, + "grad_norm": 0.6844244003295898, + "learning_rate": 8.112232208645729e-05, + "loss": 2.6067, + "step": 11254 + }, + { + "epoch": 0.9083205552417077, + "grad_norm": 0.6896870136260986, + "learning_rate": 8.110681927204729e-05, + "loss": 2.5444, + "step": 11255 + }, + { + "epoch": 0.9084012589782907, + "grad_norm": 0.6693820953369141, + "learning_rate": 8.109131692850473e-05, + "loss": 2.5118, + "step": 11256 + }, + { + "epoch": 0.9084819627148737, + "grad_norm": 0.6401854753494263, + "learning_rate": 8.107581505621599e-05, + "loss": 2.4811, + "step": 11257 + }, + { + "epoch": 0.9085626664514567, + "grad_norm": 0.6861663460731506, + "learning_rate": 8.106031365556743e-05, + "loss": 2.4633, + "step": 11258 + }, + { + "epoch": 0.9086433701880398, + "grad_norm": 0.6631655097007751, + "learning_rate": 8.104481272694533e-05, + "loss": 2.5748, + "step": 11259 + }, + { + "epoch": 0.9087240739246227, + "grad_norm": 0.6499454975128174, + "learning_rate": 8.102931227073604e-05, + "loss": 2.5573, + "step": 11260 + }, + { + "epoch": 0.9088047776612057, + "grad_norm": 0.7214524149894714, + "learning_rate": 8.10138122873259e-05, + "loss": 2.4905, + "step": 11261 + }, + { + "epoch": 0.9088854813977887, + "grad_norm": 0.6481152176856995, + "learning_rate": 8.099831277710122e-05, + "loss": 2.5073, + "step": 11262 + }, + { + "epoch": 0.9089661851343718, + "grad_norm": 0.6666486859321594, + "learning_rate": 8.09828137404482e-05, + "loss": 2.5379, + "step": 11263 + }, + { + "epoch": 0.9090468888709548, + "grad_norm": 0.7186474800109863, + "learning_rate": 8.096731517775319e-05, + "loss": 2.5164, + "step": 11264 + }, + { + "epoch": 0.9091275926075377, + "grad_norm": 0.6838653087615967, + "learning_rate": 8.095181708940245e-05, + "loss": 2.49, + "step": 11265 + }, + { + "epoch": 0.9092082963441207, + "grad_norm": 0.7740866541862488, + "learning_rate": 8.093631947578221e-05, + "loss": 2.5487, + "step": 11266 + }, + { + "epoch": 0.9092890000807038, + "grad_norm": 0.7198607325553894, + "learning_rate": 8.092082233727871e-05, + "loss": 2.4477, + "step": 11267 + }, + { + "epoch": 0.9093697038172868, + "grad_norm": 0.6454673409461975, + "learning_rate": 8.090532567427825e-05, + "loss": 2.523, + "step": 11268 + }, + { + "epoch": 0.9094504075538697, + "grad_norm": 0.6169581413269043, + "learning_rate": 8.088982948716692e-05, + "loss": 2.4924, + "step": 11269 + }, + { + "epoch": 0.9095311112904527, + "grad_norm": 0.7034861445426941, + "learning_rate": 8.0874333776331e-05, + "loss": 2.4756, + "step": 11270 + }, + { + "epoch": 0.9096118150270357, + "grad_norm": 0.7231355309486389, + "learning_rate": 8.085883854215671e-05, + "loss": 2.4963, + "step": 11271 + }, + { + "epoch": 0.9096925187636188, + "grad_norm": 0.6597892045974731, + "learning_rate": 8.084334378503017e-05, + "loss": 2.5617, + "step": 11272 + }, + { + "epoch": 0.9097732225002018, + "grad_norm": 0.7257365584373474, + "learning_rate": 8.082784950533759e-05, + "loss": 2.5293, + "step": 11273 + }, + { + "epoch": 0.9098539262367847, + "grad_norm": 0.7305313944816589, + "learning_rate": 8.081235570346512e-05, + "loss": 2.5355, + "step": 11274 + }, + { + "epoch": 0.9099346299733677, + "grad_norm": 0.6814435720443726, + "learning_rate": 8.07968623797989e-05, + "loss": 2.4842, + "step": 11275 + }, + { + "epoch": 0.9100153337099508, + "grad_norm": 0.7342902421951294, + "learning_rate": 8.078136953472506e-05, + "loss": 2.4817, + "step": 11276 + }, + { + "epoch": 0.9100960374465338, + "grad_norm": 0.6456516981124878, + "learning_rate": 8.076587716862973e-05, + "loss": 2.5119, + "step": 11277 + }, + { + "epoch": 0.9101767411831168, + "grad_norm": 0.7268881797790527, + "learning_rate": 8.075038528189906e-05, + "loss": 2.4614, + "step": 11278 + }, + { + "epoch": 0.9102574449196997, + "grad_norm": 0.6901549696922302, + "learning_rate": 8.073489387491906e-05, + "loss": 2.5411, + "step": 11279 + }, + { + "epoch": 0.9103381486562828, + "grad_norm": 0.6850160956382751, + "learning_rate": 8.071940294807588e-05, + "loss": 2.5078, + "step": 11280 + }, + { + "epoch": 0.9104188523928658, + "grad_norm": 0.6550731658935547, + "learning_rate": 8.070391250175558e-05, + "loss": 2.5502, + "step": 11281 + }, + { + "epoch": 0.9104995561294488, + "grad_norm": 0.7524412274360657, + "learning_rate": 8.068842253634421e-05, + "loss": 2.4699, + "step": 11282 + }, + { + "epoch": 0.9105802598660317, + "grad_norm": 0.6659243702888489, + "learning_rate": 8.067293305222784e-05, + "loss": 2.557, + "step": 11283 + }, + { + "epoch": 0.9106609636026148, + "grad_norm": 0.67015540599823, + "learning_rate": 8.065744404979251e-05, + "loss": 2.5929, + "step": 11284 + }, + { + "epoch": 0.9107416673391978, + "grad_norm": 0.7139000296592712, + "learning_rate": 8.064195552942422e-05, + "loss": 2.5262, + "step": 11285 + }, + { + "epoch": 0.9108223710757808, + "grad_norm": 0.6918016672134399, + "learning_rate": 8.062646749150899e-05, + "loss": 2.5161, + "step": 11286 + }, + { + "epoch": 0.9109030748123638, + "grad_norm": 0.7395541667938232, + "learning_rate": 8.061097993643289e-05, + "loss": 2.5351, + "step": 11287 + }, + { + "epoch": 0.9109837785489469, + "grad_norm": 0.6794499158859253, + "learning_rate": 8.05954928645818e-05, + "loss": 2.4617, + "step": 11288 + }, + { + "epoch": 0.9110644822855298, + "grad_norm": 0.6906577348709106, + "learning_rate": 8.058000627634176e-05, + "loss": 2.5701, + "step": 11289 + }, + { + "epoch": 0.9111451860221128, + "grad_norm": 0.6954079866409302, + "learning_rate": 8.056452017209874e-05, + "loss": 2.5137, + "step": 11290 + }, + { + "epoch": 0.9112258897586958, + "grad_norm": 0.7381381988525391, + "learning_rate": 8.054903455223866e-05, + "loss": 2.6666, + "step": 11291 + }, + { + "epoch": 0.9113065934952789, + "grad_norm": 0.6731518507003784, + "learning_rate": 8.053354941714749e-05, + "loss": 2.5173, + "step": 11292 + }, + { + "epoch": 0.9113872972318618, + "grad_norm": 0.6976885795593262, + "learning_rate": 8.051806476721116e-05, + "loss": 2.5089, + "step": 11293 + }, + { + "epoch": 0.9114680009684448, + "grad_norm": 0.6401965618133545, + "learning_rate": 8.050258060281562e-05, + "loss": 2.5295, + "step": 11294 + }, + { + "epoch": 0.9115487047050278, + "grad_norm": 0.7409671545028687, + "learning_rate": 8.048709692434667e-05, + "loss": 2.5074, + "step": 11295 + }, + { + "epoch": 0.9116294084416109, + "grad_norm": 0.6028234958648682, + "learning_rate": 8.04716137321903e-05, + "loss": 2.5437, + "step": 11296 + }, + { + "epoch": 0.9117101121781939, + "grad_norm": 0.727643609046936, + "learning_rate": 8.04561310267324e-05, + "loss": 2.5272, + "step": 11297 + }, + { + "epoch": 0.9117908159147768, + "grad_norm": 0.6912926435470581, + "learning_rate": 8.044064880835876e-05, + "loss": 2.5166, + "step": 11298 + }, + { + "epoch": 0.9118715196513598, + "grad_norm": 0.6971367001533508, + "learning_rate": 8.042516707745528e-05, + "loss": 2.5421, + "step": 11299 + }, + { + "epoch": 0.9119522233879429, + "grad_norm": 0.6722451448440552, + "learning_rate": 8.040968583440783e-05, + "loss": 2.5088, + "step": 11300 + }, + { + "epoch": 0.9120329271245259, + "grad_norm": 0.6469144225120544, + "learning_rate": 8.03942050796022e-05, + "loss": 2.4921, + "step": 11301 + }, + { + "epoch": 0.9121136308611089, + "grad_norm": 0.6709008812904358, + "learning_rate": 8.037872481342423e-05, + "loss": 2.4553, + "step": 11302 + }, + { + "epoch": 0.9121943345976918, + "grad_norm": 0.6540920734405518, + "learning_rate": 8.036324503625977e-05, + "loss": 2.489, + "step": 11303 + }, + { + "epoch": 0.9122750383342749, + "grad_norm": 0.6589755415916443, + "learning_rate": 8.034776574849453e-05, + "loss": 2.5195, + "step": 11304 + }, + { + "epoch": 0.9123557420708579, + "grad_norm": 0.676943838596344, + "learning_rate": 8.033228695051434e-05, + "loss": 2.4877, + "step": 11305 + }, + { + "epoch": 0.9124364458074409, + "grad_norm": 0.6509177088737488, + "learning_rate": 8.031680864270498e-05, + "loss": 2.5229, + "step": 11306 + }, + { + "epoch": 0.9125171495440239, + "grad_norm": 0.7480820417404175, + "learning_rate": 8.030133082545219e-05, + "loss": 2.5016, + "step": 11307 + }, + { + "epoch": 0.9125978532806069, + "grad_norm": 0.7130550742149353, + "learning_rate": 8.028585349914174e-05, + "loss": 2.5251, + "step": 11308 + }, + { + "epoch": 0.9126785570171899, + "grad_norm": 0.6959688067436218, + "learning_rate": 8.027037666415934e-05, + "loss": 2.4776, + "step": 11309 + }, + { + "epoch": 0.9127592607537729, + "grad_norm": 0.7540854215621948, + "learning_rate": 8.025490032089076e-05, + "loss": 2.5097, + "step": 11310 + }, + { + "epoch": 0.9128399644903559, + "grad_norm": 0.6921199560165405, + "learning_rate": 8.023942446972165e-05, + "loss": 2.5354, + "step": 11311 + }, + { + "epoch": 0.912920668226939, + "grad_norm": 0.649824857711792, + "learning_rate": 8.022394911103774e-05, + "loss": 2.5398, + "step": 11312 + }, + { + "epoch": 0.9130013719635219, + "grad_norm": 0.6951068639755249, + "learning_rate": 8.020847424522474e-05, + "loss": 2.5302, + "step": 11313 + }, + { + "epoch": 0.9130820757001049, + "grad_norm": 0.6906851530075073, + "learning_rate": 8.019299987266827e-05, + "loss": 2.581, + "step": 11314 + }, + { + "epoch": 0.9131627794366879, + "grad_norm": 0.6758459210395813, + "learning_rate": 8.0177525993754e-05, + "loss": 2.5208, + "step": 11315 + }, + { + "epoch": 0.913243483173271, + "grad_norm": 0.6915175318717957, + "learning_rate": 8.016205260886766e-05, + "loss": 2.5386, + "step": 11316 + }, + { + "epoch": 0.913324186909854, + "grad_norm": 0.7083550691604614, + "learning_rate": 8.014657971839476e-05, + "loss": 2.4895, + "step": 11317 + }, + { + "epoch": 0.9134048906464369, + "grad_norm": 0.7052562832832336, + "learning_rate": 8.013110732272102e-05, + "loss": 2.4896, + "step": 11318 + }, + { + "epoch": 0.9134855943830199, + "grad_norm": 0.7811834216117859, + "learning_rate": 8.011563542223206e-05, + "loss": 2.5082, + "step": 11319 + }, + { + "epoch": 0.913566298119603, + "grad_norm": 0.6207153797149658, + "learning_rate": 8.01001640173134e-05, + "loss": 2.4967, + "step": 11320 + }, + { + "epoch": 0.913647001856186, + "grad_norm": 0.7637950778007507, + "learning_rate": 8.008469310835065e-05, + "loss": 2.4907, + "step": 11321 + }, + { + "epoch": 0.913727705592769, + "grad_norm": 0.7263950705528259, + "learning_rate": 8.006922269572947e-05, + "loss": 2.5259, + "step": 11322 + }, + { + "epoch": 0.9138084093293519, + "grad_norm": 0.6965721845626831, + "learning_rate": 8.005375277983531e-05, + "loss": 2.5648, + "step": 11323 + }, + { + "epoch": 0.9138891130659349, + "grad_norm": 0.7146127223968506, + "learning_rate": 8.003828336105377e-05, + "loss": 2.53, + "step": 11324 + }, + { + "epoch": 0.913969816802518, + "grad_norm": 0.7083697319030762, + "learning_rate": 8.00228144397704e-05, + "loss": 2.4923, + "step": 11325 + }, + { + "epoch": 0.914050520539101, + "grad_norm": 0.7259312868118286, + "learning_rate": 8.000734601637074e-05, + "loss": 2.5303, + "step": 11326 + }, + { + "epoch": 0.9141312242756839, + "grad_norm": 0.7072086930274963, + "learning_rate": 7.999187809124025e-05, + "loss": 2.4662, + "step": 11327 + }, + { + "epoch": 0.9142119280122669, + "grad_norm": 0.7216035723686218, + "learning_rate": 7.997641066476445e-05, + "loss": 2.5069, + "step": 11328 + }, + { + "epoch": 0.91429263174885, + "grad_norm": 0.6925712823867798, + "learning_rate": 7.99609437373289e-05, + "loss": 2.5107, + "step": 11329 + }, + { + "epoch": 0.914373335485433, + "grad_norm": 0.6672701835632324, + "learning_rate": 7.994547730931896e-05, + "loss": 2.5248, + "step": 11330 + }, + { + "epoch": 0.914454039222016, + "grad_norm": 0.8058515787124634, + "learning_rate": 7.993001138112016e-05, + "loss": 2.4427, + "step": 11331 + }, + { + "epoch": 0.9145347429585989, + "grad_norm": 0.6942592859268188, + "learning_rate": 7.991454595311795e-05, + "loss": 2.6163, + "step": 11332 + }, + { + "epoch": 0.914615446695182, + "grad_norm": 0.7051894068717957, + "learning_rate": 7.989908102569774e-05, + "loss": 2.5327, + "step": 11333 + }, + { + "epoch": 0.914696150431765, + "grad_norm": 0.6824771761894226, + "learning_rate": 7.988361659924496e-05, + "loss": 2.4843, + "step": 11334 + }, + { + "epoch": 0.914776854168348, + "grad_norm": 0.6756488084793091, + "learning_rate": 7.98681526741451e-05, + "loss": 2.5215, + "step": 11335 + }, + { + "epoch": 0.914857557904931, + "grad_norm": 0.6988239288330078, + "learning_rate": 7.985268925078344e-05, + "loss": 2.5153, + "step": 11336 + }, + { + "epoch": 0.914938261641514, + "grad_norm": 0.6446006298065186, + "learning_rate": 7.983722632954544e-05, + "loss": 2.5081, + "step": 11337 + }, + { + "epoch": 0.915018965378097, + "grad_norm": 0.6828100681304932, + "learning_rate": 7.982176391081649e-05, + "loss": 2.5607, + "step": 11338 + }, + { + "epoch": 0.91509966911468, + "grad_norm": 0.659721851348877, + "learning_rate": 7.980630199498193e-05, + "loss": 2.531, + "step": 11339 + }, + { + "epoch": 0.915180372851263, + "grad_norm": 0.6298564076423645, + "learning_rate": 7.979084058242709e-05, + "loss": 2.513, + "step": 11340 + }, + { + "epoch": 0.9152610765878461, + "grad_norm": 0.664299726486206, + "learning_rate": 7.977537967353735e-05, + "loss": 2.5533, + "step": 11341 + }, + { + "epoch": 0.915341780324429, + "grad_norm": 0.7035108804702759, + "learning_rate": 7.975991926869801e-05, + "loss": 2.4868, + "step": 11342 + }, + { + "epoch": 0.915422484061012, + "grad_norm": 0.7428407073020935, + "learning_rate": 7.974445936829438e-05, + "loss": 2.5694, + "step": 11343 + }, + { + "epoch": 0.915503187797595, + "grad_norm": 0.6845505237579346, + "learning_rate": 7.972899997271176e-05, + "loss": 2.5092, + "step": 11344 + }, + { + "epoch": 0.9155838915341781, + "grad_norm": 0.7135340571403503, + "learning_rate": 7.971354108233551e-05, + "loss": 2.5157, + "step": 11345 + }, + { + "epoch": 0.915664595270761, + "grad_norm": 0.7032433152198792, + "learning_rate": 7.969808269755077e-05, + "loss": 2.5292, + "step": 11346 + }, + { + "epoch": 0.915745299007344, + "grad_norm": 0.6874690651893616, + "learning_rate": 7.96826248187429e-05, + "loss": 2.5312, + "step": 11347 + }, + { + "epoch": 0.915826002743927, + "grad_norm": 0.6497030258178711, + "learning_rate": 7.966716744629718e-05, + "loss": 2.505, + "step": 11348 + }, + { + "epoch": 0.9159067064805101, + "grad_norm": 0.6618520021438599, + "learning_rate": 7.965171058059874e-05, + "loss": 2.5287, + "step": 11349 + }, + { + "epoch": 0.9159874102170931, + "grad_norm": 0.6737041473388672, + "learning_rate": 7.963625422203288e-05, + "loss": 2.5494, + "step": 11350 + }, + { + "epoch": 0.916068113953676, + "grad_norm": 0.705646276473999, + "learning_rate": 7.96207983709848e-05, + "loss": 2.5402, + "step": 11351 + }, + { + "epoch": 0.916148817690259, + "grad_norm": 0.6852068901062012, + "learning_rate": 7.96053430278397e-05, + "loss": 2.51, + "step": 11352 + }, + { + "epoch": 0.9162295214268421, + "grad_norm": 0.7166822552680969, + "learning_rate": 7.958988819298274e-05, + "loss": 2.576, + "step": 11353 + }, + { + "epoch": 0.9163102251634251, + "grad_norm": 0.6349207162857056, + "learning_rate": 7.957443386679913e-05, + "loss": 2.5219, + "step": 11354 + }, + { + "epoch": 0.9163909289000081, + "grad_norm": 0.6504647135734558, + "learning_rate": 7.955898004967406e-05, + "loss": 2.4593, + "step": 11355 + }, + { + "epoch": 0.916471632636591, + "grad_norm": 0.7313871383666992, + "learning_rate": 7.95435267419926e-05, + "loss": 2.5616, + "step": 11356 + }, + { + "epoch": 0.9165523363731741, + "grad_norm": 0.6948587894439697, + "learning_rate": 7.95280739441399e-05, + "loss": 2.4608, + "step": 11357 + }, + { + "epoch": 0.9166330401097571, + "grad_norm": 0.6130328178405762, + "learning_rate": 7.95126216565012e-05, + "loss": 2.5563, + "step": 11358 + }, + { + "epoch": 0.9167137438463401, + "grad_norm": 0.7149228453636169, + "learning_rate": 7.949716987946145e-05, + "loss": 2.5664, + "step": 11359 + }, + { + "epoch": 0.916794447582923, + "grad_norm": 0.7452285289764404, + "learning_rate": 7.948171861340584e-05, + "loss": 2.525, + "step": 11360 + }, + { + "epoch": 0.9168751513195061, + "grad_norm": 0.6840611100196838, + "learning_rate": 7.946626785871945e-05, + "loss": 2.537, + "step": 11361 + }, + { + "epoch": 0.9169558550560891, + "grad_norm": 0.7269708514213562, + "learning_rate": 7.945081761578732e-05, + "loss": 2.5227, + "step": 11362 + }, + { + "epoch": 0.9170365587926721, + "grad_norm": 0.6521697044372559, + "learning_rate": 7.943536788499452e-05, + "loss": 2.54, + "step": 11363 + }, + { + "epoch": 0.9171172625292551, + "grad_norm": 0.6516863107681274, + "learning_rate": 7.941991866672618e-05, + "loss": 2.4788, + "step": 11364 + }, + { + "epoch": 0.9171979662658382, + "grad_norm": 0.7673580050468445, + "learning_rate": 7.94044699613672e-05, + "loss": 2.4678, + "step": 11365 + }, + { + "epoch": 0.9172786700024211, + "grad_norm": 0.6666994690895081, + "learning_rate": 7.938902176930268e-05, + "loss": 2.5251, + "step": 11366 + }, + { + "epoch": 0.9173593737390041, + "grad_norm": 0.7261863946914673, + "learning_rate": 7.937357409091761e-05, + "loss": 2.4977, + "step": 11367 + }, + { + "epoch": 0.9174400774755871, + "grad_norm": 0.6920679807662964, + "learning_rate": 7.9358126926597e-05, + "loss": 2.5367, + "step": 11368 + }, + { + "epoch": 0.9175207812121702, + "grad_norm": 0.6715712547302246, + "learning_rate": 7.93426802767258e-05, + "loss": 2.4898, + "step": 11369 + }, + { + "epoch": 0.9176014849487532, + "grad_norm": 0.7014333605766296, + "learning_rate": 7.932723414168904e-05, + "loss": 2.4507, + "step": 11370 + }, + { + "epoch": 0.9176821886853361, + "grad_norm": 0.6755761504173279, + "learning_rate": 7.931178852187163e-05, + "loss": 2.5895, + "step": 11371 + }, + { + "epoch": 0.9177628924219191, + "grad_norm": 0.6846731305122375, + "learning_rate": 7.929634341765852e-05, + "loss": 2.5002, + "step": 11372 + }, + { + "epoch": 0.9178435961585021, + "grad_norm": 0.6422831416130066, + "learning_rate": 7.928089882943466e-05, + "loss": 2.5326, + "step": 11373 + }, + { + "epoch": 0.9179242998950852, + "grad_norm": 0.7256442308425903, + "learning_rate": 7.9265454757585e-05, + "loss": 2.5706, + "step": 11374 + }, + { + "epoch": 0.9180050036316681, + "grad_norm": 0.6514387130737305, + "learning_rate": 7.925001120249436e-05, + "loss": 2.5349, + "step": 11375 + }, + { + "epoch": 0.9180857073682511, + "grad_norm": 0.7596457600593567, + "learning_rate": 7.923456816454768e-05, + "loss": 2.4767, + "step": 11376 + }, + { + "epoch": 0.9181664111048341, + "grad_norm": 0.673283040523529, + "learning_rate": 7.921912564412988e-05, + "loss": 2.5156, + "step": 11377 + }, + { + "epoch": 0.9182471148414172, + "grad_norm": 0.6964103579521179, + "learning_rate": 7.920368364162575e-05, + "loss": 2.5293, + "step": 11378 + }, + { + "epoch": 0.9183278185780002, + "grad_norm": 0.6765062212944031, + "learning_rate": 7.91882421574202e-05, + "loss": 2.5757, + "step": 11379 + }, + { + "epoch": 0.9184085223145831, + "grad_norm": 0.7039035558700562, + "learning_rate": 7.917280119189811e-05, + "loss": 2.513, + "step": 11380 + }, + { + "epoch": 0.9184892260511661, + "grad_norm": 0.6523976922035217, + "learning_rate": 7.915736074544419e-05, + "loss": 2.4712, + "step": 11381 + }, + { + "epoch": 0.9185699297877492, + "grad_norm": 0.7159552574157715, + "learning_rate": 7.914192081844334e-05, + "loss": 2.4713, + "step": 11382 + }, + { + "epoch": 0.9186506335243322, + "grad_norm": 0.7071694731712341, + "learning_rate": 7.912648141128036e-05, + "loss": 2.5367, + "step": 11383 + }, + { + "epoch": 0.9187313372609152, + "grad_norm": 0.6675183773040771, + "learning_rate": 7.911104252434e-05, + "loss": 2.5372, + "step": 11384 + }, + { + "epoch": 0.9188120409974981, + "grad_norm": 0.7293995022773743, + "learning_rate": 7.909560415800707e-05, + "loss": 2.5469, + "step": 11385 + }, + { + "epoch": 0.9188927447340812, + "grad_norm": 0.6774035096168518, + "learning_rate": 7.908016631266635e-05, + "loss": 2.5655, + "step": 11386 + }, + { + "epoch": 0.9189734484706642, + "grad_norm": 0.7068144083023071, + "learning_rate": 7.906472898870256e-05, + "loss": 2.5265, + "step": 11387 + }, + { + "epoch": 0.9190541522072472, + "grad_norm": 0.6756324172019958, + "learning_rate": 7.904929218650044e-05, + "loss": 2.4966, + "step": 11388 + }, + { + "epoch": 0.9191348559438302, + "grad_norm": 0.6964625120162964, + "learning_rate": 7.903385590644473e-05, + "loss": 2.5646, + "step": 11389 + }, + { + "epoch": 0.9192155596804132, + "grad_norm": 0.6760976314544678, + "learning_rate": 7.901842014892018e-05, + "loss": 2.5159, + "step": 11390 + }, + { + "epoch": 0.9192962634169962, + "grad_norm": 0.6648714542388916, + "learning_rate": 7.900298491431139e-05, + "loss": 2.5715, + "step": 11391 + }, + { + "epoch": 0.9193769671535792, + "grad_norm": 0.7492914199829102, + "learning_rate": 7.898755020300312e-05, + "loss": 2.5226, + "step": 11392 + }, + { + "epoch": 0.9194576708901622, + "grad_norm": 0.7041164040565491, + "learning_rate": 7.897211601538004e-05, + "loss": 2.5809, + "step": 11393 + }, + { + "epoch": 0.9195383746267453, + "grad_norm": 0.6746383309364319, + "learning_rate": 7.895668235182677e-05, + "loss": 2.5369, + "step": 11394 + }, + { + "epoch": 0.9196190783633282, + "grad_norm": 0.6486156582832336, + "learning_rate": 7.894124921272798e-05, + "loss": 2.5406, + "step": 11395 + }, + { + "epoch": 0.9196997820999112, + "grad_norm": 0.6828807592391968, + "learning_rate": 7.892581659846834e-05, + "loss": 2.5241, + "step": 11396 + }, + { + "epoch": 0.9197804858364942, + "grad_norm": 0.694970428943634, + "learning_rate": 7.891038450943242e-05, + "loss": 2.4402, + "step": 11397 + }, + { + "epoch": 0.9198611895730773, + "grad_norm": 0.7187039852142334, + "learning_rate": 7.889495294600484e-05, + "loss": 2.5052, + "step": 11398 + }, + { + "epoch": 0.9199418933096603, + "grad_norm": 0.6919832825660706, + "learning_rate": 7.887952190857024e-05, + "loss": 2.5078, + "step": 11399 + }, + { + "epoch": 0.9200225970462432, + "grad_norm": 0.7129504084587097, + "learning_rate": 7.886409139751313e-05, + "loss": 2.5047, + "step": 11400 + }, + { + "epoch": 0.9201033007828262, + "grad_norm": 0.6755272746086121, + "learning_rate": 7.88486614132181e-05, + "loss": 2.4821, + "step": 11401 + }, + { + "epoch": 0.9201840045194093, + "grad_norm": 0.7253937125205994, + "learning_rate": 7.883323195606973e-05, + "loss": 2.5062, + "step": 11402 + }, + { + "epoch": 0.9202647082559923, + "grad_norm": 0.7057155966758728, + "learning_rate": 7.881780302645257e-05, + "loss": 2.5475, + "step": 11403 + }, + { + "epoch": 0.9203454119925752, + "grad_norm": 0.713869571685791, + "learning_rate": 7.880237462475111e-05, + "loss": 2.5335, + "step": 11404 + }, + { + "epoch": 0.9204261157291582, + "grad_norm": 0.769648551940918, + "learning_rate": 7.878694675134987e-05, + "loss": 2.4944, + "step": 11405 + }, + { + "epoch": 0.9205068194657413, + "grad_norm": 0.6444964408874512, + "learning_rate": 7.877151940663343e-05, + "loss": 2.5755, + "step": 11406 + }, + { + "epoch": 0.9205875232023243, + "grad_norm": 0.6811819672584534, + "learning_rate": 7.875609259098618e-05, + "loss": 2.5475, + "step": 11407 + }, + { + "epoch": 0.9206682269389073, + "grad_norm": 0.6959417462348938, + "learning_rate": 7.874066630479259e-05, + "loss": 2.5095, + "step": 11408 + }, + { + "epoch": 0.9207489306754902, + "grad_norm": 0.6721363067626953, + "learning_rate": 7.872524054843724e-05, + "loss": 2.5166, + "step": 11409 + }, + { + "epoch": 0.9208296344120733, + "grad_norm": 0.713122546672821, + "learning_rate": 7.870981532230447e-05, + "loss": 2.5084, + "step": 11410 + }, + { + "epoch": 0.9209103381486563, + "grad_norm": 0.7059469819068909, + "learning_rate": 7.869439062677876e-05, + "loss": 2.437, + "step": 11411 + }, + { + "epoch": 0.9209910418852393, + "grad_norm": 0.6808314323425293, + "learning_rate": 7.867896646224454e-05, + "loss": 2.5658, + "step": 11412 + }, + { + "epoch": 0.9210717456218223, + "grad_norm": 0.7060894966125488, + "learning_rate": 7.86635428290862e-05, + "loss": 2.515, + "step": 11413 + }, + { + "epoch": 0.9211524493584053, + "grad_norm": 0.7538465857505798, + "learning_rate": 7.864811972768813e-05, + "loss": 2.4448, + "step": 11414 + }, + { + "epoch": 0.9212331530949883, + "grad_norm": 0.6824522018432617, + "learning_rate": 7.863269715843478e-05, + "loss": 2.503, + "step": 11415 + }, + { + "epoch": 0.9213138568315713, + "grad_norm": 0.7068174481391907, + "learning_rate": 7.861727512171044e-05, + "loss": 2.5198, + "step": 11416 + }, + { + "epoch": 0.9213945605681543, + "grad_norm": 0.6742961406707764, + "learning_rate": 7.860185361789948e-05, + "loss": 2.5167, + "step": 11417 + }, + { + "epoch": 0.9214752643047374, + "grad_norm": 0.7643383741378784, + "learning_rate": 7.858643264738628e-05, + "loss": 2.5508, + "step": 11418 + }, + { + "epoch": 0.9215559680413203, + "grad_norm": 0.6737802028656006, + "learning_rate": 7.857101221055518e-05, + "loss": 2.589, + "step": 11419 + }, + { + "epoch": 0.9216366717779033, + "grad_norm": 0.668214738368988, + "learning_rate": 7.855559230779043e-05, + "loss": 2.4747, + "step": 11420 + }, + { + "epoch": 0.9217173755144863, + "grad_norm": 0.6933084726333618, + "learning_rate": 7.854017293947638e-05, + "loss": 2.5171, + "step": 11421 + }, + { + "epoch": 0.9217980792510694, + "grad_norm": 0.6320228576660156, + "learning_rate": 7.852475410599736e-05, + "loss": 2.5213, + "step": 11422 + }, + { + "epoch": 0.9218787829876524, + "grad_norm": 0.6578245759010315, + "learning_rate": 7.850933580773756e-05, + "loss": 2.5085, + "step": 11423 + }, + { + "epoch": 0.9219594867242353, + "grad_norm": 0.6741796135902405, + "learning_rate": 7.849391804508129e-05, + "loss": 2.5294, + "step": 11424 + }, + { + "epoch": 0.9220401904608183, + "grad_norm": 0.6875781416893005, + "learning_rate": 7.847850081841285e-05, + "loss": 2.5034, + "step": 11425 + }, + { + "epoch": 0.9221208941974013, + "grad_norm": 0.6515244245529175, + "learning_rate": 7.846308412811638e-05, + "loss": 2.4707, + "step": 11426 + }, + { + "epoch": 0.9222015979339844, + "grad_norm": 0.7326812148094177, + "learning_rate": 7.844766797457615e-05, + "loss": 2.5049, + "step": 11427 + }, + { + "epoch": 0.9222823016705674, + "grad_norm": 0.7539918422698975, + "learning_rate": 7.84322523581764e-05, + "loss": 2.4726, + "step": 11428 + }, + { + "epoch": 0.9223630054071503, + "grad_norm": 0.745468020439148, + "learning_rate": 7.841683727930129e-05, + "loss": 2.5003, + "step": 11429 + }, + { + "epoch": 0.9224437091437333, + "grad_norm": 0.726362943649292, + "learning_rate": 7.840142273833499e-05, + "loss": 2.5056, + "step": 11430 + }, + { + "epoch": 0.9225244128803164, + "grad_norm": 0.7275403738021851, + "learning_rate": 7.838600873566175e-05, + "loss": 2.5188, + "step": 11431 + }, + { + "epoch": 0.9226051166168994, + "grad_norm": 0.6908789873123169, + "learning_rate": 7.837059527166563e-05, + "loss": 2.5349, + "step": 11432 + }, + { + "epoch": 0.9226858203534823, + "grad_norm": 0.7220396399497986, + "learning_rate": 7.835518234673079e-05, + "loss": 2.4863, + "step": 11433 + }, + { + "epoch": 0.9227665240900653, + "grad_norm": 0.6516178846359253, + "learning_rate": 7.833976996124142e-05, + "loss": 2.556, + "step": 11434 + }, + { + "epoch": 0.9228472278266484, + "grad_norm": 0.6958726644515991, + "learning_rate": 7.832435811558163e-05, + "loss": 2.5286, + "step": 11435 + }, + { + "epoch": 0.9229279315632314, + "grad_norm": 0.7734121680259705, + "learning_rate": 7.830894681013546e-05, + "loss": 2.5087, + "step": 11436 + }, + { + "epoch": 0.9230086352998144, + "grad_norm": 0.709064245223999, + "learning_rate": 7.829353604528703e-05, + "loss": 2.4817, + "step": 11437 + }, + { + "epoch": 0.9230893390363973, + "grad_norm": 0.7224971652030945, + "learning_rate": 7.827812582142045e-05, + "loss": 2.5179, + "step": 11438 + }, + { + "epoch": 0.9231700427729804, + "grad_norm": 0.7139936685562134, + "learning_rate": 7.826271613891973e-05, + "loss": 2.537, + "step": 11439 + }, + { + "epoch": 0.9232507465095634, + "grad_norm": 0.671138346195221, + "learning_rate": 7.824730699816896e-05, + "loss": 2.4865, + "step": 11440 + }, + { + "epoch": 0.9233314502461464, + "grad_norm": 0.6547425389289856, + "learning_rate": 7.823189839955218e-05, + "loss": 2.509, + "step": 11441 + }, + { + "epoch": 0.9234121539827294, + "grad_norm": 0.719765305519104, + "learning_rate": 7.821649034345338e-05, + "loss": 2.591, + "step": 11442 + }, + { + "epoch": 0.9234928577193124, + "grad_norm": 0.7128504514694214, + "learning_rate": 7.820108283025656e-05, + "loss": 2.541, + "step": 11443 + }, + { + "epoch": 0.9235735614558954, + "grad_norm": 0.7711538672447205, + "learning_rate": 7.818567586034577e-05, + "loss": 2.5388, + "step": 11444 + }, + { + "epoch": 0.9236542651924784, + "grad_norm": 0.7151121497154236, + "learning_rate": 7.817026943410494e-05, + "loss": 2.5539, + "step": 11445 + }, + { + "epoch": 0.9237349689290614, + "grad_norm": 0.7009569406509399, + "learning_rate": 7.815486355191805e-05, + "loss": 2.4793, + "step": 11446 + }, + { + "epoch": 0.9238156726656445, + "grad_norm": 0.7251109480857849, + "learning_rate": 7.813945821416909e-05, + "loss": 2.5406, + "step": 11447 + }, + { + "epoch": 0.9238963764022274, + "grad_norm": 0.6907934546470642, + "learning_rate": 7.812405342124196e-05, + "loss": 2.5069, + "step": 11448 + }, + { + "epoch": 0.9239770801388104, + "grad_norm": 0.699207067489624, + "learning_rate": 7.810864917352061e-05, + "loss": 2.4844, + "step": 11449 + }, + { + "epoch": 0.9240577838753934, + "grad_norm": 0.718386173248291, + "learning_rate": 7.809324547138893e-05, + "loss": 2.5666, + "step": 11450 + }, + { + "epoch": 0.9241384876119765, + "grad_norm": 0.6420444846153259, + "learning_rate": 7.807784231523089e-05, + "loss": 2.506, + "step": 11451 + }, + { + "epoch": 0.9242191913485595, + "grad_norm": 0.6777252554893494, + "learning_rate": 7.806243970543028e-05, + "loss": 2.487, + "step": 11452 + }, + { + "epoch": 0.9242998950851424, + "grad_norm": 0.6907702684402466, + "learning_rate": 7.804703764237102e-05, + "loss": 2.5284, + "step": 11453 + }, + { + "epoch": 0.9243805988217254, + "grad_norm": 0.6383422613143921, + "learning_rate": 7.803163612643698e-05, + "loss": 2.4704, + "step": 11454 + }, + { + "epoch": 0.9244613025583085, + "grad_norm": 0.6879577040672302, + "learning_rate": 7.801623515801198e-05, + "loss": 2.5103, + "step": 11455 + }, + { + "epoch": 0.9245420062948915, + "grad_norm": 0.6856719851493835, + "learning_rate": 7.800083473747986e-05, + "loss": 2.5086, + "step": 11456 + }, + { + "epoch": 0.9246227100314744, + "grad_norm": 0.7463707327842712, + "learning_rate": 7.79854348652245e-05, + "loss": 2.5456, + "step": 11457 + }, + { + "epoch": 0.9247034137680574, + "grad_norm": 0.7352643013000488, + "learning_rate": 7.79700355416296e-05, + "loss": 2.5335, + "step": 11458 + }, + { + "epoch": 0.9247841175046405, + "grad_norm": 0.7525908350944519, + "learning_rate": 7.795463676707897e-05, + "loss": 2.5855, + "step": 11459 + }, + { + "epoch": 0.9248648212412235, + "grad_norm": 0.7323870658874512, + "learning_rate": 7.79392385419565e-05, + "loss": 2.5471, + "step": 11460 + }, + { + "epoch": 0.9249455249778065, + "grad_norm": 0.7443860769271851, + "learning_rate": 7.792384086664582e-05, + "loss": 2.5449, + "step": 11461 + }, + { + "epoch": 0.9250262287143894, + "grad_norm": 0.6928641200065613, + "learning_rate": 7.790844374153073e-05, + "loss": 2.505, + "step": 11462 + }, + { + "epoch": 0.9251069324509725, + "grad_norm": 0.6491222381591797, + "learning_rate": 7.789304716699498e-05, + "loss": 2.5447, + "step": 11463 + }, + { + "epoch": 0.9251876361875555, + "grad_norm": 0.7351166009902954, + "learning_rate": 7.78776511434223e-05, + "loss": 2.524, + "step": 11464 + }, + { + "epoch": 0.9252683399241385, + "grad_norm": 0.6680036783218384, + "learning_rate": 7.786225567119637e-05, + "loss": 2.5019, + "step": 11465 + }, + { + "epoch": 0.9253490436607215, + "grad_norm": 0.7070801258087158, + "learning_rate": 7.784686075070089e-05, + "loss": 2.5052, + "step": 11466 + }, + { + "epoch": 0.9254297473973045, + "grad_norm": 0.7095211148262024, + "learning_rate": 7.783146638231957e-05, + "loss": 2.4998, + "step": 11467 + }, + { + "epoch": 0.9255104511338875, + "grad_norm": 0.6725812554359436, + "learning_rate": 7.781607256643604e-05, + "loss": 2.4909, + "step": 11468 + }, + { + "epoch": 0.9255911548704705, + "grad_norm": 0.684177577495575, + "learning_rate": 7.780067930343396e-05, + "loss": 2.5636, + "step": 11469 + }, + { + "epoch": 0.9256718586070535, + "grad_norm": 0.703419029712677, + "learning_rate": 7.778528659369702e-05, + "loss": 2.4295, + "step": 11470 + }, + { + "epoch": 0.9257525623436366, + "grad_norm": 0.6850195527076721, + "learning_rate": 7.776989443760877e-05, + "loss": 2.5143, + "step": 11471 + }, + { + "epoch": 0.9258332660802195, + "grad_norm": 0.7322348952293396, + "learning_rate": 7.775450283555286e-05, + "loss": 2.5616, + "step": 11472 + }, + { + "epoch": 0.9259139698168025, + "grad_norm": 0.6924510598182678, + "learning_rate": 7.77391117879129e-05, + "loss": 2.4796, + "step": 11473 + }, + { + "epoch": 0.9259946735533855, + "grad_norm": 0.7006441354751587, + "learning_rate": 7.772372129507249e-05, + "loss": 2.5142, + "step": 11474 + }, + { + "epoch": 0.9260753772899685, + "grad_norm": 0.6379218697547913, + "learning_rate": 7.770833135741513e-05, + "loss": 2.5366, + "step": 11475 + }, + { + "epoch": 0.9261560810265516, + "grad_norm": 0.676163375377655, + "learning_rate": 7.769294197532448e-05, + "loss": 2.4936, + "step": 11476 + }, + { + "epoch": 0.9262367847631345, + "grad_norm": 0.6964210271835327, + "learning_rate": 7.767755314918399e-05, + "loss": 2.429, + "step": 11477 + }, + { + "epoch": 0.9263174884997175, + "grad_norm": 0.7017048597335815, + "learning_rate": 7.766216487937722e-05, + "loss": 2.5488, + "step": 11478 + }, + { + "epoch": 0.9263981922363005, + "grad_norm": 0.6742509603500366, + "learning_rate": 7.76467771662877e-05, + "loss": 2.5121, + "step": 11479 + }, + { + "epoch": 0.9264788959728836, + "grad_norm": 0.6751403212547302, + "learning_rate": 7.763139001029893e-05, + "loss": 2.5897, + "step": 11480 + }, + { + "epoch": 0.9265595997094666, + "grad_norm": 0.6639657616615295, + "learning_rate": 7.761600341179439e-05, + "loss": 2.5015, + "step": 11481 + }, + { + "epoch": 0.9266403034460495, + "grad_norm": 0.6332827210426331, + "learning_rate": 7.760061737115756e-05, + "loss": 2.5518, + "step": 11482 + }, + { + "epoch": 0.9267210071826325, + "grad_norm": 0.6751062870025635, + "learning_rate": 7.758523188877192e-05, + "loss": 2.4252, + "step": 11483 + }, + { + "epoch": 0.9268017109192156, + "grad_norm": 0.6763231754302979, + "learning_rate": 7.756984696502084e-05, + "loss": 2.5683, + "step": 11484 + }, + { + "epoch": 0.9268824146557986, + "grad_norm": 0.6480380296707153, + "learning_rate": 7.755446260028784e-05, + "loss": 2.558, + "step": 11485 + }, + { + "epoch": 0.9269631183923815, + "grad_norm": 0.6925072073936462, + "learning_rate": 7.753907879495634e-05, + "loss": 2.5374, + "step": 11486 + }, + { + "epoch": 0.9270438221289645, + "grad_norm": 0.6771834492683411, + "learning_rate": 7.752369554940966e-05, + "loss": 2.5652, + "step": 11487 + }, + { + "epoch": 0.9271245258655476, + "grad_norm": 0.6747026443481445, + "learning_rate": 7.750831286403124e-05, + "loss": 2.5076, + "step": 11488 + }, + { + "epoch": 0.9272052296021306, + "grad_norm": 0.6727211475372314, + "learning_rate": 7.749293073920448e-05, + "loss": 2.4774, + "step": 11489 + }, + { + "epoch": 0.9272859333387136, + "grad_norm": 0.6334055066108704, + "learning_rate": 7.747754917531272e-05, + "loss": 2.5245, + "step": 11490 + }, + { + "epoch": 0.9273666370752965, + "grad_norm": 0.740700900554657, + "learning_rate": 7.746216817273928e-05, + "loss": 2.5485, + "step": 11491 + }, + { + "epoch": 0.9274473408118796, + "grad_norm": 0.6500691771507263, + "learning_rate": 7.744678773186757e-05, + "loss": 2.5277, + "step": 11492 + }, + { + "epoch": 0.9275280445484626, + "grad_norm": 0.6592985987663269, + "learning_rate": 7.743140785308084e-05, + "loss": 2.5304, + "step": 11493 + }, + { + "epoch": 0.9276087482850456, + "grad_norm": 0.6980452537536621, + "learning_rate": 7.741602853676241e-05, + "loss": 2.544, + "step": 11494 + }, + { + "epoch": 0.9276894520216286, + "grad_norm": 0.643190860748291, + "learning_rate": 7.740064978329555e-05, + "loss": 2.5167, + "step": 11495 + }, + { + "epoch": 0.9277701557582116, + "grad_norm": 0.6789804100990295, + "learning_rate": 7.738527159306366e-05, + "loss": 2.5117, + "step": 11496 + }, + { + "epoch": 0.9278508594947946, + "grad_norm": 0.7109663486480713, + "learning_rate": 7.736989396644987e-05, + "loss": 2.5294, + "step": 11497 + }, + { + "epoch": 0.9279315632313776, + "grad_norm": 0.6752706170082092, + "learning_rate": 7.735451690383746e-05, + "loss": 2.4851, + "step": 11498 + }, + { + "epoch": 0.9280122669679606, + "grad_norm": 0.6947829723358154, + "learning_rate": 7.733914040560972e-05, + "loss": 2.5792, + "step": 11499 + }, + { + "epoch": 0.9280929707045437, + "grad_norm": 0.6701157689094543, + "learning_rate": 7.732376447214981e-05, + "loss": 2.4884, + "step": 11500 + }, + { + "epoch": 0.9281736744411266, + "grad_norm": 0.64533531665802, + "learning_rate": 7.730838910384097e-05, + "loss": 2.4644, + "step": 11501 + }, + { + "epoch": 0.9282543781777096, + "grad_norm": 0.6664395332336426, + "learning_rate": 7.729301430106644e-05, + "loss": 2.5286, + "step": 11502 + }, + { + "epoch": 0.9283350819142926, + "grad_norm": 0.6982395648956299, + "learning_rate": 7.72776400642093e-05, + "loss": 2.5092, + "step": 11503 + }, + { + "epoch": 0.9284157856508757, + "grad_norm": 0.6656171679496765, + "learning_rate": 7.726226639365278e-05, + "loss": 2.4945, + "step": 11504 + }, + { + "epoch": 0.9284964893874587, + "grad_norm": 0.6213308572769165, + "learning_rate": 7.724689328978001e-05, + "loss": 2.5042, + "step": 11505 + }, + { + "epoch": 0.9285771931240416, + "grad_norm": 0.6855599880218506, + "learning_rate": 7.723152075297414e-05, + "loss": 2.5207, + "step": 11506 + }, + { + "epoch": 0.9286578968606246, + "grad_norm": 0.7724171280860901, + "learning_rate": 7.721614878361828e-05, + "loss": 2.4842, + "step": 11507 + }, + { + "epoch": 0.9287386005972077, + "grad_norm": 0.708634614944458, + "learning_rate": 7.720077738209559e-05, + "loss": 2.58, + "step": 11508 + }, + { + "epoch": 0.9288193043337907, + "grad_norm": 0.6766082644462585, + "learning_rate": 7.718540654878907e-05, + "loss": 2.492, + "step": 11509 + }, + { + "epoch": 0.9289000080703737, + "grad_norm": 0.6856982707977295, + "learning_rate": 7.717003628408187e-05, + "loss": 2.5186, + "step": 11510 + }, + { + "epoch": 0.9289807118069566, + "grad_norm": 0.680647611618042, + "learning_rate": 7.715466658835705e-05, + "loss": 2.5305, + "step": 11511 + }, + { + "epoch": 0.9290614155435397, + "grad_norm": 0.7174721360206604, + "learning_rate": 7.713929746199771e-05, + "loss": 2.4498, + "step": 11512 + }, + { + "epoch": 0.9291421192801227, + "grad_norm": 0.6507031321525574, + "learning_rate": 7.712392890538676e-05, + "loss": 2.5334, + "step": 11513 + }, + { + "epoch": 0.9292228230167057, + "grad_norm": 0.7545748353004456, + "learning_rate": 7.710856091890732e-05, + "loss": 2.505, + "step": 11514 + }, + { + "epoch": 0.9293035267532886, + "grad_norm": 0.6978560090065002, + "learning_rate": 7.709319350294242e-05, + "loss": 2.5243, + "step": 11515 + }, + { + "epoch": 0.9293842304898717, + "grad_norm": 0.6620199084281921, + "learning_rate": 7.707782665787497e-05, + "loss": 2.5114, + "step": 11516 + }, + { + "epoch": 0.9294649342264547, + "grad_norm": 0.7160476446151733, + "learning_rate": 7.7062460384088e-05, + "loss": 2.5322, + "step": 11517 + }, + { + "epoch": 0.9295456379630377, + "grad_norm": 0.6637005805969238, + "learning_rate": 7.704709468196454e-05, + "loss": 2.456, + "step": 11518 + }, + { + "epoch": 0.9296263416996207, + "grad_norm": 0.6668851375579834, + "learning_rate": 7.703172955188742e-05, + "loss": 2.5251, + "step": 11519 + }, + { + "epoch": 0.9297070454362037, + "grad_norm": 0.6840329170227051, + "learning_rate": 7.701636499423965e-05, + "loss": 2.5068, + "step": 11520 + }, + { + "epoch": 0.9297877491727867, + "grad_norm": 0.695122241973877, + "learning_rate": 7.700100100940415e-05, + "loss": 2.4822, + "step": 11521 + }, + { + "epoch": 0.9298684529093697, + "grad_norm": 0.6784923672676086, + "learning_rate": 7.698563759776382e-05, + "loss": 2.4978, + "step": 11522 + }, + { + "epoch": 0.9299491566459527, + "grad_norm": 0.6949357986450195, + "learning_rate": 7.697027475970154e-05, + "loss": 2.5392, + "step": 11523 + }, + { + "epoch": 0.9300298603825358, + "grad_norm": 0.7128093242645264, + "learning_rate": 7.695491249560025e-05, + "loss": 2.455, + "step": 11524 + }, + { + "epoch": 0.9301105641191187, + "grad_norm": 0.6534962058067322, + "learning_rate": 7.693955080584277e-05, + "loss": 2.5272, + "step": 11525 + }, + { + "epoch": 0.9301912678557017, + "grad_norm": 0.6893511414527893, + "learning_rate": 7.692418969081194e-05, + "loss": 2.5366, + "step": 11526 + }, + { + "epoch": 0.9302719715922847, + "grad_norm": 0.6335335373878479, + "learning_rate": 7.690882915089064e-05, + "loss": 2.5781, + "step": 11527 + }, + { + "epoch": 0.9303526753288677, + "grad_norm": 0.7264769077301025, + "learning_rate": 7.689346918646172e-05, + "loss": 2.5322, + "step": 11528 + }, + { + "epoch": 0.9304333790654508, + "grad_norm": 0.7156329154968262, + "learning_rate": 7.68781097979079e-05, + "loss": 2.5558, + "step": 11529 + }, + { + "epoch": 0.9305140828020337, + "grad_norm": 0.6914563775062561, + "learning_rate": 7.686275098561203e-05, + "loss": 2.5058, + "step": 11530 + }, + { + "epoch": 0.9305947865386167, + "grad_norm": 0.6939939260482788, + "learning_rate": 7.684739274995691e-05, + "loss": 2.4764, + "step": 11531 + }, + { + "epoch": 0.9306754902751997, + "grad_norm": 0.7103014588356018, + "learning_rate": 7.683203509132526e-05, + "loss": 2.5062, + "step": 11532 + }, + { + "epoch": 0.9307561940117828, + "grad_norm": 0.6558870077133179, + "learning_rate": 7.681667801009985e-05, + "loss": 2.4869, + "step": 11533 + }, + { + "epoch": 0.9308368977483658, + "grad_norm": 0.7280104160308838, + "learning_rate": 7.680132150666348e-05, + "loss": 2.566, + "step": 11534 + }, + { + "epoch": 0.9309176014849487, + "grad_norm": 0.6814180612564087, + "learning_rate": 7.678596558139875e-05, + "loss": 2.4926, + "step": 11535 + }, + { + "epoch": 0.9309983052215317, + "grad_norm": 0.6916589736938477, + "learning_rate": 7.677061023468846e-05, + "loss": 2.5189, + "step": 11536 + }, + { + "epoch": 0.9310790089581148, + "grad_norm": 0.6527554988861084, + "learning_rate": 7.675525546691533e-05, + "loss": 2.4969, + "step": 11537 + }, + { + "epoch": 0.9311597126946978, + "grad_norm": 0.6458954811096191, + "learning_rate": 7.673990127846196e-05, + "loss": 2.5159, + "step": 11538 + }, + { + "epoch": 0.9312404164312807, + "grad_norm": 0.6704902052879333, + "learning_rate": 7.672454766971105e-05, + "loss": 2.49, + "step": 11539 + }, + { + "epoch": 0.9313211201678637, + "grad_norm": 0.6599698066711426, + "learning_rate": 7.670919464104527e-05, + "loss": 2.4872, + "step": 11540 + }, + { + "epoch": 0.9314018239044468, + "grad_norm": 0.7638888955116272, + "learning_rate": 7.669384219284722e-05, + "loss": 2.5228, + "step": 11541 + }, + { + "epoch": 0.9314825276410298, + "grad_norm": 0.6911981105804443, + "learning_rate": 7.667849032549954e-05, + "loss": 2.4675, + "step": 11542 + }, + { + "epoch": 0.9315632313776128, + "grad_norm": 0.6414669156074524, + "learning_rate": 7.666313903938486e-05, + "loss": 2.5137, + "step": 11543 + }, + { + "epoch": 0.9316439351141957, + "grad_norm": 0.7552139759063721, + "learning_rate": 7.66477883348858e-05, + "loss": 2.5778, + "step": 11544 + }, + { + "epoch": 0.9317246388507788, + "grad_norm": 0.6738760471343994, + "learning_rate": 7.663243821238484e-05, + "loss": 2.5326, + "step": 11545 + }, + { + "epoch": 0.9318053425873618, + "grad_norm": 0.7406899333000183, + "learning_rate": 7.661708867226459e-05, + "loss": 2.4608, + "step": 11546 + }, + { + "epoch": 0.9318860463239448, + "grad_norm": 0.7261415719985962, + "learning_rate": 7.660173971490769e-05, + "loss": 2.5684, + "step": 11547 + }, + { + "epoch": 0.9319667500605278, + "grad_norm": 0.636542797088623, + "learning_rate": 7.658639134069654e-05, + "loss": 2.5159, + "step": 11548 + }, + { + "epoch": 0.9320474537971108, + "grad_norm": 0.7730209231376648, + "learning_rate": 7.657104355001373e-05, + "loss": 2.487, + "step": 11549 + }, + { + "epoch": 0.9321281575336938, + "grad_norm": 0.6553641557693481, + "learning_rate": 7.655569634324178e-05, + "loss": 2.5105, + "step": 11550 + }, + { + "epoch": 0.9322088612702768, + "grad_norm": 0.7008326649665833, + "learning_rate": 7.654034972076314e-05, + "loss": 2.492, + "step": 11551 + }, + { + "epoch": 0.9322895650068598, + "grad_norm": 0.7074279189109802, + "learning_rate": 7.65250036829603e-05, + "loss": 2.5221, + "step": 11552 + }, + { + "epoch": 0.9323702687434429, + "grad_norm": 0.7235530018806458, + "learning_rate": 7.650965823021578e-05, + "loss": 2.5285, + "step": 11553 + }, + { + "epoch": 0.9324509724800258, + "grad_norm": 0.7601436376571655, + "learning_rate": 7.649431336291194e-05, + "loss": 2.5071, + "step": 11554 + }, + { + "epoch": 0.9325316762166088, + "grad_norm": 0.6446424126625061, + "learning_rate": 7.647896908143127e-05, + "loss": 2.5032, + "step": 11555 + }, + { + "epoch": 0.9326123799531918, + "grad_norm": 0.7032139897346497, + "learning_rate": 7.646362538615614e-05, + "loss": 2.6096, + "step": 11556 + }, + { + "epoch": 0.9326930836897749, + "grad_norm": 0.6727899312973022, + "learning_rate": 7.644828227746904e-05, + "loss": 2.5041, + "step": 11557 + }, + { + "epoch": 0.9327737874263579, + "grad_norm": 0.6817529201507568, + "learning_rate": 7.643293975575229e-05, + "loss": 2.4474, + "step": 11558 + }, + { + "epoch": 0.9328544911629408, + "grad_norm": 0.6374444365501404, + "learning_rate": 7.641759782138827e-05, + "loss": 2.5204, + "step": 11559 + }, + { + "epoch": 0.9329351948995238, + "grad_norm": 0.6889457702636719, + "learning_rate": 7.640225647475939e-05, + "loss": 2.6344, + "step": 11560 + }, + { + "epoch": 0.9330158986361069, + "grad_norm": 0.6657958626747131, + "learning_rate": 7.638691571624794e-05, + "loss": 2.4672, + "step": 11561 + }, + { + "epoch": 0.9330966023726899, + "grad_norm": 0.6425464749336243, + "learning_rate": 7.637157554623627e-05, + "loss": 2.4756, + "step": 11562 + }, + { + "epoch": 0.9331773061092729, + "grad_norm": 0.7193450927734375, + "learning_rate": 7.635623596510675e-05, + "loss": 2.4969, + "step": 11563 + }, + { + "epoch": 0.9332580098458558, + "grad_norm": 0.6595252156257629, + "learning_rate": 7.634089697324159e-05, + "loss": 2.4647, + "step": 11564 + }, + { + "epoch": 0.9333387135824389, + "grad_norm": 0.6505268812179565, + "learning_rate": 7.632555857102312e-05, + "loss": 2.5059, + "step": 11565 + }, + { + "epoch": 0.9334194173190219, + "grad_norm": 0.6877838969230652, + "learning_rate": 7.631022075883365e-05, + "loss": 2.4855, + "step": 11566 + }, + { + "epoch": 0.9335001210556049, + "grad_norm": 0.6376198530197144, + "learning_rate": 7.629488353705538e-05, + "loss": 2.5024, + "step": 11567 + }, + { + "epoch": 0.9335808247921878, + "grad_norm": 0.6807642579078674, + "learning_rate": 7.627954690607058e-05, + "loss": 2.4954, + "step": 11568 + }, + { + "epoch": 0.9336615285287709, + "grad_norm": 0.6785219311714172, + "learning_rate": 7.62642108662615e-05, + "loss": 2.4854, + "step": 11569 + }, + { + "epoch": 0.9337422322653539, + "grad_norm": 0.8159591555595398, + "learning_rate": 7.624887541801032e-05, + "loss": 2.524, + "step": 11570 + }, + { + "epoch": 0.9338229360019369, + "grad_norm": 0.6912592053413391, + "learning_rate": 7.62335405616992e-05, + "loss": 2.5111, + "step": 11571 + }, + { + "epoch": 0.9339036397385199, + "grad_norm": 0.6772454977035522, + "learning_rate": 7.621820629771041e-05, + "loss": 2.5603, + "step": 11572 + }, + { + "epoch": 0.933984343475103, + "grad_norm": 0.6720221638679504, + "learning_rate": 7.620287262642613e-05, + "loss": 2.5016, + "step": 11573 + }, + { + "epoch": 0.9340650472116859, + "grad_norm": 0.651935338973999, + "learning_rate": 7.618753954822841e-05, + "loss": 2.445, + "step": 11574 + }, + { + "epoch": 0.9341457509482689, + "grad_norm": 0.6731166839599609, + "learning_rate": 7.617220706349947e-05, + "loss": 2.4703, + "step": 11575 + }, + { + "epoch": 0.9342264546848519, + "grad_norm": 0.6283879280090332, + "learning_rate": 7.615687517262143e-05, + "loss": 2.5232, + "step": 11576 + }, + { + "epoch": 0.9343071584214349, + "grad_norm": 0.7193455696105957, + "learning_rate": 7.614154387597638e-05, + "loss": 2.5268, + "step": 11577 + }, + { + "epoch": 0.934387862158018, + "grad_norm": 0.6992828845977783, + "learning_rate": 7.61262131739464e-05, + "loss": 2.5834, + "step": 11578 + }, + { + "epoch": 0.9344685658946009, + "grad_norm": 0.6501220464706421, + "learning_rate": 7.611088306691365e-05, + "loss": 2.5146, + "step": 11579 + }, + { + "epoch": 0.9345492696311839, + "grad_norm": 0.7246220111846924, + "learning_rate": 7.60955535552601e-05, + "loss": 2.5665, + "step": 11580 + }, + { + "epoch": 0.9346299733677669, + "grad_norm": 0.7190428376197815, + "learning_rate": 7.608022463936783e-05, + "loss": 2.5061, + "step": 11581 + }, + { + "epoch": 0.93471067710435, + "grad_norm": 0.7144324779510498, + "learning_rate": 7.606489631961893e-05, + "loss": 2.4982, + "step": 11582 + }, + { + "epoch": 0.9347913808409329, + "grad_norm": 0.7144657373428345, + "learning_rate": 7.604956859639535e-05, + "loss": 2.5506, + "step": 11583 + }, + { + "epoch": 0.9348720845775159, + "grad_norm": 0.6596626043319702, + "learning_rate": 7.603424147007913e-05, + "loss": 2.4911, + "step": 11584 + }, + { + "epoch": 0.9349527883140989, + "grad_norm": 0.7090883851051331, + "learning_rate": 7.601891494105227e-05, + "loss": 2.5087, + "step": 11585 + }, + { + "epoch": 0.935033492050682, + "grad_norm": 0.6679760217666626, + "learning_rate": 7.600358900969671e-05, + "loss": 2.497, + "step": 11586 + }, + { + "epoch": 0.935114195787265, + "grad_norm": 0.6795344948768616, + "learning_rate": 7.598826367639447e-05, + "loss": 2.4839, + "step": 11587 + }, + { + "epoch": 0.9351948995238479, + "grad_norm": 0.6378790736198425, + "learning_rate": 7.597293894152744e-05, + "loss": 2.4656, + "step": 11588 + }, + { + "epoch": 0.9352756032604309, + "grad_norm": 0.6646658182144165, + "learning_rate": 7.595761480547762e-05, + "loss": 2.4739, + "step": 11589 + }, + { + "epoch": 0.935356306997014, + "grad_norm": 0.6662073731422424, + "learning_rate": 7.594229126862687e-05, + "loss": 2.4872, + "step": 11590 + }, + { + "epoch": 0.935437010733597, + "grad_norm": 0.6698113679885864, + "learning_rate": 7.592696833135708e-05, + "loss": 2.4964, + "step": 11591 + }, + { + "epoch": 0.93551771447018, + "grad_norm": 0.6520004272460938, + "learning_rate": 7.59116459940502e-05, + "loss": 2.5616, + "step": 11592 + }, + { + "epoch": 0.9355984182067629, + "grad_norm": 0.6675869226455688, + "learning_rate": 7.589632425708806e-05, + "loss": 2.4854, + "step": 11593 + }, + { + "epoch": 0.935679121943346, + "grad_norm": 0.6914103031158447, + "learning_rate": 7.588100312085251e-05, + "loss": 2.5252, + "step": 11594 + }, + { + "epoch": 0.935759825679929, + "grad_norm": 0.7283286452293396, + "learning_rate": 7.586568258572546e-05, + "loss": 2.543, + "step": 11595 + }, + { + "epoch": 0.935840529416512, + "grad_norm": 0.6881958246231079, + "learning_rate": 7.585036265208864e-05, + "loss": 2.4499, + "step": 11596 + }, + { + "epoch": 0.935921233153095, + "grad_norm": 0.7733677625656128, + "learning_rate": 7.58350433203239e-05, + "loss": 2.5595, + "step": 11597 + }, + { + "epoch": 0.936001936889678, + "grad_norm": 0.672711968421936, + "learning_rate": 7.58197245908131e-05, + "loss": 2.4757, + "step": 11598 + }, + { + "epoch": 0.936082640626261, + "grad_norm": 0.691780686378479, + "learning_rate": 7.580440646393794e-05, + "loss": 2.5134, + "step": 11599 + }, + { + "epoch": 0.936163344362844, + "grad_norm": 0.6935102343559265, + "learning_rate": 7.578908894008021e-05, + "loss": 2.5128, + "step": 11600 + }, + { + "epoch": 0.936244048099427, + "grad_norm": 0.7005696892738342, + "learning_rate": 7.57737720196217e-05, + "loss": 2.5338, + "step": 11601 + }, + { + "epoch": 0.93632475183601, + "grad_norm": 0.6729815602302551, + "learning_rate": 7.575845570294409e-05, + "loss": 2.5373, + "step": 11602 + }, + { + "epoch": 0.936405455572593, + "grad_norm": 0.6694760918617249, + "learning_rate": 7.574313999042913e-05, + "loss": 2.5165, + "step": 11603 + }, + { + "epoch": 0.936486159309176, + "grad_norm": 0.6425337791442871, + "learning_rate": 7.572782488245854e-05, + "loss": 2.5102, + "step": 11604 + }, + { + "epoch": 0.936566863045759, + "grad_norm": 0.6613046526908875, + "learning_rate": 7.571251037941405e-05, + "loss": 2.5108, + "step": 11605 + }, + { + "epoch": 0.9366475667823421, + "grad_norm": 0.7396309971809387, + "learning_rate": 7.569719648167723e-05, + "loss": 2.5261, + "step": 11606 + }, + { + "epoch": 0.936728270518925, + "grad_norm": 0.6783239245414734, + "learning_rate": 7.568188318962981e-05, + "loss": 2.5725, + "step": 11607 + }, + { + "epoch": 0.936808974255508, + "grad_norm": 0.7591684460639954, + "learning_rate": 7.566657050365345e-05, + "loss": 2.5085, + "step": 11608 + }, + { + "epoch": 0.936889677992091, + "grad_norm": 0.6805615425109863, + "learning_rate": 7.565125842412974e-05, + "loss": 2.5598, + "step": 11609 + }, + { + "epoch": 0.9369703817286741, + "grad_norm": 0.680203378200531, + "learning_rate": 7.563594695144032e-05, + "loss": 2.5072, + "step": 11610 + }, + { + "epoch": 0.9370510854652571, + "grad_norm": 0.7035777568817139, + "learning_rate": 7.56206360859668e-05, + "loss": 2.4882, + "step": 11611 + }, + { + "epoch": 0.93713178920184, + "grad_norm": 0.7457048892974854, + "learning_rate": 7.560532582809075e-05, + "loss": 2.4975, + "step": 11612 + }, + { + "epoch": 0.937212492938423, + "grad_norm": 0.702055037021637, + "learning_rate": 7.559001617819374e-05, + "loss": 2.5522, + "step": 11613 + }, + { + "epoch": 0.9372931966750061, + "grad_norm": 0.7618527412414551, + "learning_rate": 7.557470713665738e-05, + "loss": 2.5503, + "step": 11614 + }, + { + "epoch": 0.9373739004115891, + "grad_norm": 0.8611559867858887, + "learning_rate": 7.555939870386312e-05, + "loss": 2.4866, + "step": 11615 + }, + { + "epoch": 0.937454604148172, + "grad_norm": 0.7285227179527283, + "learning_rate": 7.554409088019254e-05, + "loss": 2.4855, + "step": 11616 + }, + { + "epoch": 0.937535307884755, + "grad_norm": 0.7512121796607971, + "learning_rate": 7.552878366602716e-05, + "loss": 2.5496, + "step": 11617 + }, + { + "epoch": 0.9376160116213381, + "grad_norm": 0.7353625297546387, + "learning_rate": 7.551347706174844e-05, + "loss": 2.5754, + "step": 11618 + }, + { + "epoch": 0.9376967153579211, + "grad_norm": 0.7131205797195435, + "learning_rate": 7.549817106773788e-05, + "loss": 2.4927, + "step": 11619 + }, + { + "epoch": 0.9377774190945041, + "grad_norm": 0.6562477946281433, + "learning_rate": 7.548286568437695e-05, + "loss": 2.5247, + "step": 11620 + }, + { + "epoch": 0.937858122831087, + "grad_norm": 0.7094948887825012, + "learning_rate": 7.546756091204713e-05, + "loss": 2.5084, + "step": 11621 + }, + { + "epoch": 0.9379388265676701, + "grad_norm": 0.6890475153923035, + "learning_rate": 7.545225675112977e-05, + "loss": 2.5178, + "step": 11622 + }, + { + "epoch": 0.9380195303042531, + "grad_norm": 0.6801474094390869, + "learning_rate": 7.543695320200634e-05, + "loss": 2.5457, + "step": 11623 + }, + { + "epoch": 0.9381002340408361, + "grad_norm": 0.7093712687492371, + "learning_rate": 7.54216502650583e-05, + "loss": 2.6122, + "step": 11624 + }, + { + "epoch": 0.9381809377774191, + "grad_norm": 0.7246927618980408, + "learning_rate": 7.540634794066695e-05, + "loss": 2.5251, + "step": 11625 + }, + { + "epoch": 0.9382616415140022, + "grad_norm": 0.7358111143112183, + "learning_rate": 7.539104622921368e-05, + "loss": 2.5444, + "step": 11626 + }, + { + "epoch": 0.9383423452505851, + "grad_norm": 0.6915993690490723, + "learning_rate": 7.53757451310799e-05, + "loss": 2.448, + "step": 11627 + }, + { + "epoch": 0.9384230489871681, + "grad_norm": 0.6864039301872253, + "learning_rate": 7.536044464664689e-05, + "loss": 2.5267, + "step": 11628 + }, + { + "epoch": 0.9385037527237511, + "grad_norm": 0.664799690246582, + "learning_rate": 7.534514477629602e-05, + "loss": 2.5602, + "step": 11629 + }, + { + "epoch": 0.9385844564603341, + "grad_norm": 0.6770062446594238, + "learning_rate": 7.532984552040862e-05, + "loss": 2.5034, + "step": 11630 + }, + { + "epoch": 0.9386651601969171, + "grad_norm": 0.6961095929145813, + "learning_rate": 7.531454687936592e-05, + "loss": 2.4523, + "step": 11631 + }, + { + "epoch": 0.9387458639335001, + "grad_norm": 0.6776804327964783, + "learning_rate": 7.529924885354924e-05, + "loss": 2.5526, + "step": 11632 + }, + { + "epoch": 0.9388265676700831, + "grad_norm": 0.785796582698822, + "learning_rate": 7.528395144333988e-05, + "loss": 2.5256, + "step": 11633 + }, + { + "epoch": 0.9389072714066661, + "grad_norm": 0.7016655206680298, + "learning_rate": 7.526865464911902e-05, + "loss": 2.4781, + "step": 11634 + }, + { + "epoch": 0.9389879751432492, + "grad_norm": 0.7027767300605774, + "learning_rate": 7.525335847126795e-05, + "loss": 2.5287, + "step": 11635 + }, + { + "epoch": 0.9390686788798321, + "grad_norm": 0.710624098777771, + "learning_rate": 7.523806291016787e-05, + "loss": 2.5486, + "step": 11636 + }, + { + "epoch": 0.9391493826164151, + "grad_norm": 0.7029656767845154, + "learning_rate": 7.52227679662e-05, + "loss": 2.5244, + "step": 11637 + }, + { + "epoch": 0.9392300863529981, + "grad_norm": 0.7417333722114563, + "learning_rate": 7.520747363974551e-05, + "loss": 2.5561, + "step": 11638 + }, + { + "epoch": 0.9393107900895812, + "grad_norm": 0.6595067381858826, + "learning_rate": 7.519217993118559e-05, + "loss": 2.617, + "step": 11639 + }, + { + "epoch": 0.9393914938261642, + "grad_norm": 0.6808187365531921, + "learning_rate": 7.517688684090141e-05, + "loss": 2.5279, + "step": 11640 + }, + { + "epoch": 0.9394721975627471, + "grad_norm": 0.6618706583976746, + "learning_rate": 7.516159436927408e-05, + "loss": 2.4976, + "step": 11641 + }, + { + "epoch": 0.9395529012993301, + "grad_norm": 0.6979385018348694, + "learning_rate": 7.514630251668475e-05, + "loss": 2.4542, + "step": 11642 + }, + { + "epoch": 0.9396336050359132, + "grad_norm": 0.6380844116210938, + "learning_rate": 7.513101128351454e-05, + "loss": 2.48, + "step": 11643 + }, + { + "epoch": 0.9397143087724962, + "grad_norm": 0.6390014290809631, + "learning_rate": 7.511572067014452e-05, + "loss": 2.5111, + "step": 11644 + }, + { + "epoch": 0.9397950125090792, + "grad_norm": 0.7592498064041138, + "learning_rate": 7.510043067695578e-05, + "loss": 2.5161, + "step": 11645 + }, + { + "epoch": 0.9398757162456621, + "grad_norm": 0.6269322037696838, + "learning_rate": 7.508514130432945e-05, + "loss": 2.491, + "step": 11646 + }, + { + "epoch": 0.9399564199822452, + "grad_norm": 0.6372053623199463, + "learning_rate": 7.506985255264646e-05, + "loss": 2.4826, + "step": 11647 + }, + { + "epoch": 0.9400371237188282, + "grad_norm": 0.6962460875511169, + "learning_rate": 7.505456442228794e-05, + "loss": 2.5605, + "step": 11648 + }, + { + "epoch": 0.9401178274554112, + "grad_norm": 0.7931656241416931, + "learning_rate": 7.503927691363491e-05, + "loss": 2.4909, + "step": 11649 + }, + { + "epoch": 0.9401985311919941, + "grad_norm": 0.688792884349823, + "learning_rate": 7.502399002706832e-05, + "loss": 2.4888, + "step": 11650 + }, + { + "epoch": 0.9402792349285772, + "grad_norm": 0.6683691143989563, + "learning_rate": 7.500870376296918e-05, + "loss": 2.5233, + "step": 11651 + }, + { + "epoch": 0.9403599386651602, + "grad_norm": 0.6537527441978455, + "learning_rate": 7.499341812171846e-05, + "loss": 2.5061, + "step": 11652 + }, + { + "epoch": 0.9404406424017432, + "grad_norm": 0.6657658219337463, + "learning_rate": 7.497813310369717e-05, + "loss": 2.4844, + "step": 11653 + }, + { + "epoch": 0.9405213461383262, + "grad_norm": 0.6865110993385315, + "learning_rate": 7.496284870928618e-05, + "loss": 2.4986, + "step": 11654 + }, + { + "epoch": 0.9406020498749093, + "grad_norm": 0.6724923849105835, + "learning_rate": 7.494756493886644e-05, + "loss": 2.4818, + "step": 11655 + }, + { + "epoch": 0.9406827536114922, + "grad_norm": 0.6478626728057861, + "learning_rate": 7.493228179281892e-05, + "loss": 2.5321, + "step": 11656 + }, + { + "epoch": 0.9407634573480752, + "grad_norm": 0.6474425792694092, + "learning_rate": 7.491699927152443e-05, + "loss": 2.5276, + "step": 11657 + }, + { + "epoch": 0.9408441610846582, + "grad_norm": 0.6736220717430115, + "learning_rate": 7.490171737536387e-05, + "loss": 2.4734, + "step": 11658 + }, + { + "epoch": 0.9409248648212413, + "grad_norm": 0.6714746952056885, + "learning_rate": 7.488643610471815e-05, + "loss": 2.5754, + "step": 11659 + }, + { + "epoch": 0.9410055685578242, + "grad_norm": 0.6714532375335693, + "learning_rate": 7.487115545996805e-05, + "loss": 2.4855, + "step": 11660 + }, + { + "epoch": 0.9410862722944072, + "grad_norm": 0.7601683139801025, + "learning_rate": 7.485587544149447e-05, + "loss": 2.4887, + "step": 11661 + }, + { + "epoch": 0.9411669760309902, + "grad_norm": 0.7655646204948425, + "learning_rate": 7.484059604967821e-05, + "loss": 2.4904, + "step": 11662 + }, + { + "epoch": 0.9412476797675733, + "grad_norm": 0.6841822862625122, + "learning_rate": 7.482531728490006e-05, + "loss": 2.5272, + "step": 11663 + }, + { + "epoch": 0.9413283835041563, + "grad_norm": 0.7683621048927307, + "learning_rate": 7.481003914754078e-05, + "loss": 2.5218, + "step": 11664 + }, + { + "epoch": 0.9414090872407392, + "grad_norm": 0.6597647070884705, + "learning_rate": 7.479476163798124e-05, + "loss": 2.4925, + "step": 11665 + }, + { + "epoch": 0.9414897909773222, + "grad_norm": 0.6573941111564636, + "learning_rate": 7.477948475660208e-05, + "loss": 2.4854, + "step": 11666 + }, + { + "epoch": 0.9415704947139053, + "grad_norm": 0.6639125943183899, + "learning_rate": 7.476420850378407e-05, + "loss": 2.5207, + "step": 11667 + }, + { + "epoch": 0.9416511984504883, + "grad_norm": 0.6770366430282593, + "learning_rate": 7.474893287990796e-05, + "loss": 2.5167, + "step": 11668 + }, + { + "epoch": 0.9417319021870713, + "grad_norm": 0.6908389925956726, + "learning_rate": 7.473365788535447e-05, + "loss": 2.4606, + "step": 11669 + }, + { + "epoch": 0.9418126059236542, + "grad_norm": 0.6625069975852966, + "learning_rate": 7.471838352050427e-05, + "loss": 2.5344, + "step": 11670 + }, + { + "epoch": 0.9418933096602373, + "grad_norm": 0.6690869331359863, + "learning_rate": 7.470310978573803e-05, + "loss": 2.4507, + "step": 11671 + }, + { + "epoch": 0.9419740133968203, + "grad_norm": 0.6741886734962463, + "learning_rate": 7.468783668143645e-05, + "loss": 2.5755, + "step": 11672 + }, + { + "epoch": 0.9420547171334033, + "grad_norm": 0.6876424551010132, + "learning_rate": 7.467256420798009e-05, + "loss": 2.483, + "step": 11673 + }, + { + "epoch": 0.9421354208699863, + "grad_norm": 0.7044318318367004, + "learning_rate": 7.465729236574965e-05, + "loss": 2.5025, + "step": 11674 + }, + { + "epoch": 0.9422161246065693, + "grad_norm": 0.6608660817146301, + "learning_rate": 7.46420211551258e-05, + "loss": 2.5253, + "step": 11675 + }, + { + "epoch": 0.9422968283431523, + "grad_norm": 0.6944260001182556, + "learning_rate": 7.4626750576489e-05, + "loss": 2.5002, + "step": 11676 + }, + { + "epoch": 0.9423775320797353, + "grad_norm": 0.7304964065551758, + "learning_rate": 7.46114806302199e-05, + "loss": 2.5501, + "step": 11677 + }, + { + "epoch": 0.9424582358163183, + "grad_norm": 0.688525378704071, + "learning_rate": 7.459621131669911e-05, + "loss": 2.5291, + "step": 11678 + }, + { + "epoch": 0.9425389395529012, + "grad_norm": 0.7388432025909424, + "learning_rate": 7.45809426363071e-05, + "loss": 2.5391, + "step": 11679 + }, + { + "epoch": 0.9426196432894843, + "grad_norm": 0.6777819991111755, + "learning_rate": 7.456567458942447e-05, + "loss": 2.5425, + "step": 11680 + }, + { + "epoch": 0.9427003470260673, + "grad_norm": 0.7208845615386963, + "learning_rate": 7.455040717643169e-05, + "loss": 2.5306, + "step": 11681 + }, + { + "epoch": 0.9427810507626503, + "grad_norm": 0.745384693145752, + "learning_rate": 7.453514039770934e-05, + "loss": 2.4695, + "step": 11682 + }, + { + "epoch": 0.9428617544992333, + "grad_norm": 0.7088115215301514, + "learning_rate": 7.451987425363782e-05, + "loss": 2.5413, + "step": 11683 + }, + { + "epoch": 0.9429424582358163, + "grad_norm": 0.7287998795509338, + "learning_rate": 7.450460874459762e-05, + "loss": 2.5773, + "step": 11684 + }, + { + "epoch": 0.9430231619723993, + "grad_norm": 0.6897092461585999, + "learning_rate": 7.448934387096928e-05, + "loss": 2.5255, + "step": 11685 + }, + { + "epoch": 0.9431038657089823, + "grad_norm": 0.6227227449417114, + "learning_rate": 7.447407963313313e-05, + "loss": 2.5027, + "step": 11686 + }, + { + "epoch": 0.9431845694455653, + "grad_norm": 0.6954305768013, + "learning_rate": 7.445881603146964e-05, + "loss": 2.5477, + "step": 11687 + }, + { + "epoch": 0.9432652731821484, + "grad_norm": 0.7860052585601807, + "learning_rate": 7.444355306635924e-05, + "loss": 2.469, + "step": 11688 + }, + { + "epoch": 0.9433459769187313, + "grad_norm": 0.6851965188980103, + "learning_rate": 7.442829073818227e-05, + "loss": 2.4997, + "step": 11689 + }, + { + "epoch": 0.9434266806553143, + "grad_norm": 0.7011744379997253, + "learning_rate": 7.441302904731916e-05, + "loss": 2.5399, + "step": 11690 + }, + { + "epoch": 0.9435073843918973, + "grad_norm": 0.703167200088501, + "learning_rate": 7.439776799415028e-05, + "loss": 2.5323, + "step": 11691 + }, + { + "epoch": 0.9435880881284804, + "grad_norm": 0.6747310161590576, + "learning_rate": 7.438250757905591e-05, + "loss": 2.5406, + "step": 11692 + }, + { + "epoch": 0.9436687918650634, + "grad_norm": 0.8631153106689453, + "learning_rate": 7.436724780241642e-05, + "loss": 2.5215, + "step": 11693 + }, + { + "epoch": 0.9437494956016463, + "grad_norm": 0.6919798254966736, + "learning_rate": 7.435198866461214e-05, + "loss": 2.4654, + "step": 11694 + }, + { + "epoch": 0.9438301993382293, + "grad_norm": 0.6747070550918579, + "learning_rate": 7.433673016602332e-05, + "loss": 2.5186, + "step": 11695 + }, + { + "epoch": 0.9439109030748124, + "grad_norm": 0.7368776798248291, + "learning_rate": 7.432147230703026e-05, + "loss": 2.5365, + "step": 11696 + }, + { + "epoch": 0.9439916068113954, + "grad_norm": 0.7443639636039734, + "learning_rate": 7.430621508801325e-05, + "loss": 2.4966, + "step": 11697 + }, + { + "epoch": 0.9440723105479784, + "grad_norm": 0.7371395230293274, + "learning_rate": 7.429095850935255e-05, + "loss": 2.4638, + "step": 11698 + }, + { + "epoch": 0.9441530142845613, + "grad_norm": 0.6917321681976318, + "learning_rate": 7.427570257142832e-05, + "loss": 2.5341, + "step": 11699 + }, + { + "epoch": 0.9442337180211444, + "grad_norm": 0.7704101800918579, + "learning_rate": 7.426044727462085e-05, + "loss": 2.5144, + "step": 11700 + }, + { + "epoch": 0.9443144217577274, + "grad_norm": 0.692197859287262, + "learning_rate": 7.424519261931036e-05, + "loss": 2.5293, + "step": 11701 + }, + { + "epoch": 0.9443951254943104, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.422993860587695e-05, + "loss": 2.5236, + "step": 11702 + }, + { + "epoch": 0.9444758292308933, + "grad_norm": 0.6955052018165588, + "learning_rate": 7.421468523470081e-05, + "loss": 2.4765, + "step": 11703 + }, + { + "epoch": 0.9445565329674764, + "grad_norm": 0.7394432425498962, + "learning_rate": 7.419943250616216e-05, + "loss": 2.5053, + "step": 11704 + }, + { + "epoch": 0.9446372367040594, + "grad_norm": 0.679044246673584, + "learning_rate": 7.418418042064108e-05, + "loss": 2.5413, + "step": 11705 + }, + { + "epoch": 0.9447179404406424, + "grad_norm": 0.7153440117835999, + "learning_rate": 7.41689289785177e-05, + "loss": 2.4938, + "step": 11706 + }, + { + "epoch": 0.9447986441772254, + "grad_norm": 0.697068452835083, + "learning_rate": 7.415367818017217e-05, + "loss": 2.5157, + "step": 11707 + }, + { + "epoch": 0.9448793479138085, + "grad_norm": 0.664616048336029, + "learning_rate": 7.41384280259845e-05, + "loss": 2.4859, + "step": 11708 + }, + { + "epoch": 0.9449600516503914, + "grad_norm": 0.7275365591049194, + "learning_rate": 7.412317851633479e-05, + "loss": 2.523, + "step": 11709 + }, + { + "epoch": 0.9450407553869744, + "grad_norm": 0.7408944368362427, + "learning_rate": 7.410792965160318e-05, + "loss": 2.4994, + "step": 11710 + }, + { + "epoch": 0.9451214591235574, + "grad_norm": 0.7222678065299988, + "learning_rate": 7.40926814321696e-05, + "loss": 2.5084, + "step": 11711 + }, + { + "epoch": 0.9452021628601405, + "grad_norm": 0.7242292761802673, + "learning_rate": 7.407743385841412e-05, + "loss": 2.5165, + "step": 11712 + }, + { + "epoch": 0.9452828665967234, + "grad_norm": 0.6634014844894409, + "learning_rate": 7.406218693071677e-05, + "loss": 2.4947, + "step": 11713 + }, + { + "epoch": 0.9453635703333064, + "grad_norm": 0.8126605153083801, + "learning_rate": 7.404694064945751e-05, + "loss": 2.5553, + "step": 11714 + }, + { + "epoch": 0.9454442740698894, + "grad_norm": 0.679344654083252, + "learning_rate": 7.403169501501632e-05, + "loss": 2.5475, + "step": 11715 + }, + { + "epoch": 0.9455249778064725, + "grad_norm": 0.7584314346313477, + "learning_rate": 7.401645002777318e-05, + "loss": 2.5498, + "step": 11716 + }, + { + "epoch": 0.9456056815430555, + "grad_norm": 0.7191590666770935, + "learning_rate": 7.400120568810806e-05, + "loss": 2.5161, + "step": 11717 + }, + { + "epoch": 0.9456863852796384, + "grad_norm": 0.6738762855529785, + "learning_rate": 7.398596199640084e-05, + "loss": 2.4819, + "step": 11718 + }, + { + "epoch": 0.9457670890162214, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.397071895303143e-05, + "loss": 2.4842, + "step": 11719 + }, + { + "epoch": 0.9458477927528045, + "grad_norm": 0.6885291337966919, + "learning_rate": 7.395547655837976e-05, + "loss": 2.5016, + "step": 11720 + }, + { + "epoch": 0.9459284964893875, + "grad_norm": 0.6807307600975037, + "learning_rate": 7.394023481282568e-05, + "loss": 2.4949, + "step": 11721 + }, + { + "epoch": 0.9460092002259705, + "grad_norm": 0.6683849096298218, + "learning_rate": 7.392499371674907e-05, + "loss": 2.4974, + "step": 11722 + }, + { + "epoch": 0.9460899039625534, + "grad_norm": 0.6615697741508484, + "learning_rate": 7.39097532705298e-05, + "loss": 2.4744, + "step": 11723 + }, + { + "epoch": 0.9461706076991365, + "grad_norm": 0.6463690996170044, + "learning_rate": 7.389451347454765e-05, + "loss": 2.478, + "step": 11724 + }, + { + "epoch": 0.9462513114357195, + "grad_norm": 0.6848269701004028, + "learning_rate": 7.387927432918247e-05, + "loss": 2.5491, + "step": 11725 + }, + { + "epoch": 0.9463320151723025, + "grad_norm": 0.7251551747322083, + "learning_rate": 7.386403583481409e-05, + "loss": 2.4936, + "step": 11726 + }, + { + "epoch": 0.9464127189088855, + "grad_norm": 0.6562095284461975, + "learning_rate": 7.384879799182223e-05, + "loss": 2.4895, + "step": 11727 + }, + { + "epoch": 0.9464934226454685, + "grad_norm": 0.6891352534294128, + "learning_rate": 7.383356080058668e-05, + "loss": 2.508, + "step": 11728 + }, + { + "epoch": 0.9465741263820515, + "grad_norm": 0.7220255136489868, + "learning_rate": 7.381832426148719e-05, + "loss": 2.5181, + "step": 11729 + }, + { + "epoch": 0.9466548301186345, + "grad_norm": 0.7213689088821411, + "learning_rate": 7.38030883749035e-05, + "loss": 2.5136, + "step": 11730 + }, + { + "epoch": 0.9467355338552175, + "grad_norm": 0.6711129546165466, + "learning_rate": 7.378785314121535e-05, + "loss": 2.5463, + "step": 11731 + }, + { + "epoch": 0.9468162375918004, + "grad_norm": 0.6380139589309692, + "learning_rate": 7.377261856080239e-05, + "loss": 2.5092, + "step": 11732 + }, + { + "epoch": 0.9468969413283835, + "grad_norm": 0.66046142578125, + "learning_rate": 7.375738463404437e-05, + "loss": 2.5561, + "step": 11733 + }, + { + "epoch": 0.9469776450649665, + "grad_norm": 0.6857354044914246, + "learning_rate": 7.37421513613209e-05, + "loss": 2.5774, + "step": 11734 + }, + { + "epoch": 0.9470583488015495, + "grad_norm": 0.6811589598655701, + "learning_rate": 7.372691874301163e-05, + "loss": 2.4918, + "step": 11735 + }, + { + "epoch": 0.9471390525381325, + "grad_norm": 0.6401017308235168, + "learning_rate": 7.37116867794963e-05, + "loss": 2.4994, + "step": 11736 + }, + { + "epoch": 0.9472197562747156, + "grad_norm": 0.6967078447341919, + "learning_rate": 7.369645547115438e-05, + "loss": 2.5809, + "step": 11737 + }, + { + "epoch": 0.9473004600112985, + "grad_norm": 0.6695219278335571, + "learning_rate": 7.368122481836557e-05, + "loss": 2.4735, + "step": 11738 + }, + { + "epoch": 0.9473811637478815, + "grad_norm": 0.6540528535842896, + "learning_rate": 7.366599482150944e-05, + "loss": 2.4998, + "step": 11739 + }, + { + "epoch": 0.9474618674844645, + "grad_norm": 0.700683057308197, + "learning_rate": 7.365076548096556e-05, + "loss": 2.5258, + "step": 11740 + }, + { + "epoch": 0.9475425712210476, + "grad_norm": 0.7125419974327087, + "learning_rate": 7.363553679711347e-05, + "loss": 2.4653, + "step": 11741 + }, + { + "epoch": 0.9476232749576305, + "grad_norm": 0.7285346984863281, + "learning_rate": 7.362030877033275e-05, + "loss": 2.5523, + "step": 11742 + }, + { + "epoch": 0.9477039786942135, + "grad_norm": 0.7310814261436462, + "learning_rate": 7.360508140100288e-05, + "loss": 2.5027, + "step": 11743 + }, + { + "epoch": 0.9477846824307965, + "grad_norm": 0.746961772441864, + "learning_rate": 7.358985468950335e-05, + "loss": 2.5485, + "step": 11744 + }, + { + "epoch": 0.9478653861673796, + "grad_norm": 0.6880186796188354, + "learning_rate": 7.357462863621369e-05, + "loss": 2.5243, + "step": 11745 + }, + { + "epoch": 0.9479460899039626, + "grad_norm": 0.6406471133232117, + "learning_rate": 7.355940324151339e-05, + "loss": 2.512, + "step": 11746 + }, + { + "epoch": 0.9480267936405455, + "grad_norm": 0.6503005027770996, + "learning_rate": 7.354417850578184e-05, + "loss": 2.5318, + "step": 11747 + }, + { + "epoch": 0.9481074973771285, + "grad_norm": 0.6458879113197327, + "learning_rate": 7.352895442939852e-05, + "loss": 2.5451, + "step": 11748 + }, + { + "epoch": 0.9481882011137116, + "grad_norm": 0.7382936477661133, + "learning_rate": 7.351373101274288e-05, + "loss": 2.5393, + "step": 11749 + }, + { + "epoch": 0.9482689048502946, + "grad_norm": 0.7366087436676025, + "learning_rate": 7.349850825619429e-05, + "loss": 2.5591, + "step": 11750 + }, + { + "epoch": 0.9483496085868776, + "grad_norm": 0.6652588248252869, + "learning_rate": 7.348328616013213e-05, + "loss": 2.5348, + "step": 11751 + }, + { + "epoch": 0.9484303123234605, + "grad_norm": 0.7515435814857483, + "learning_rate": 7.346806472493584e-05, + "loss": 2.5208, + "step": 11752 + }, + { + "epoch": 0.9485110160600436, + "grad_norm": 0.7161263227462769, + "learning_rate": 7.345284395098469e-05, + "loss": 2.5518, + "step": 11753 + }, + { + "epoch": 0.9485917197966266, + "grad_norm": 0.7433953285217285, + "learning_rate": 7.343762383865807e-05, + "loss": 2.5914, + "step": 11754 + }, + { + "epoch": 0.9486724235332096, + "grad_norm": 0.674991250038147, + "learning_rate": 7.342240438833532e-05, + "loss": 2.5566, + "step": 11755 + }, + { + "epoch": 0.9487531272697926, + "grad_norm": 0.7511670589447021, + "learning_rate": 7.34071856003957e-05, + "loss": 2.5253, + "step": 11756 + }, + { + "epoch": 0.9488338310063756, + "grad_norm": 0.6672492623329163, + "learning_rate": 7.339196747521853e-05, + "loss": 2.4887, + "step": 11757 + }, + { + "epoch": 0.9489145347429586, + "grad_norm": 0.6826158761978149, + "learning_rate": 7.337675001318312e-05, + "loss": 2.5072, + "step": 11758 + }, + { + "epoch": 0.9489952384795416, + "grad_norm": 0.7189450860023499, + "learning_rate": 7.336153321466867e-05, + "loss": 2.5583, + "step": 11759 + }, + { + "epoch": 0.9490759422161246, + "grad_norm": 0.6923015117645264, + "learning_rate": 7.33463170800544e-05, + "loss": 2.5416, + "step": 11760 + }, + { + "epoch": 0.9491566459527077, + "grad_norm": 0.690060555934906, + "learning_rate": 7.333110160971963e-05, + "loss": 2.4931, + "step": 11761 + }, + { + "epoch": 0.9492373496892906, + "grad_norm": 0.6887977719306946, + "learning_rate": 7.331588680404354e-05, + "loss": 2.4676, + "step": 11762 + }, + { + "epoch": 0.9493180534258736, + "grad_norm": 0.8573753237724304, + "learning_rate": 7.330067266340528e-05, + "loss": 2.5074, + "step": 11763 + }, + { + "epoch": 0.9493987571624566, + "grad_norm": 0.6760974526405334, + "learning_rate": 7.328545918818403e-05, + "loss": 2.5395, + "step": 11764 + }, + { + "epoch": 0.9494794608990397, + "grad_norm": 0.6946160197257996, + "learning_rate": 7.327024637875901e-05, + "loss": 2.535, + "step": 11765 + }, + { + "epoch": 0.9495601646356226, + "grad_norm": 0.6851378679275513, + "learning_rate": 7.32550342355093e-05, + "loss": 2.487, + "step": 11766 + }, + { + "epoch": 0.9496408683722056, + "grad_norm": 0.6480168104171753, + "learning_rate": 7.323982275881404e-05, + "loss": 2.513, + "step": 11767 + }, + { + "epoch": 0.9497215721087886, + "grad_norm": 0.6492218971252441, + "learning_rate": 7.322461194905239e-05, + "loss": 2.4532, + "step": 11768 + }, + { + "epoch": 0.9498022758453717, + "grad_norm": 0.6670051217079163, + "learning_rate": 7.320940180660337e-05, + "loss": 2.5258, + "step": 11769 + }, + { + "epoch": 0.9498829795819547, + "grad_norm": 0.6678066253662109, + "learning_rate": 7.319419233184608e-05, + "loss": 2.5388, + "step": 11770 + }, + { + "epoch": 0.9499636833185376, + "grad_norm": 0.693545937538147, + "learning_rate": 7.31789835251596e-05, + "loss": 2.5304, + "step": 11771 + }, + { + "epoch": 0.9500443870551206, + "grad_norm": 0.680486798286438, + "learning_rate": 7.316377538692297e-05, + "loss": 2.5024, + "step": 11772 + }, + { + "epoch": 0.9501250907917037, + "grad_norm": 0.7271847128868103, + "learning_rate": 7.314856791751518e-05, + "loss": 2.5947, + "step": 11773 + }, + { + "epoch": 0.9502057945282867, + "grad_norm": 0.6889839172363281, + "learning_rate": 7.31333611173153e-05, + "loss": 2.5135, + "step": 11774 + }, + { + "epoch": 0.9502864982648697, + "grad_norm": 0.7431777119636536, + "learning_rate": 7.311815498670226e-05, + "loss": 2.5856, + "step": 11775 + }, + { + "epoch": 0.9503672020014526, + "grad_norm": 0.7168101072311401, + "learning_rate": 7.310294952605508e-05, + "loss": 2.4383, + "step": 11776 + }, + { + "epoch": 0.9504479057380357, + "grad_norm": 0.654803454875946, + "learning_rate": 7.308774473575271e-05, + "loss": 2.4908, + "step": 11777 + }, + { + "epoch": 0.9505286094746187, + "grad_norm": 0.6810718774795532, + "learning_rate": 7.307254061617412e-05, + "loss": 2.5073, + "step": 11778 + }, + { + "epoch": 0.9506093132112017, + "grad_norm": 0.637980043888092, + "learning_rate": 7.305733716769817e-05, + "loss": 2.5686, + "step": 11779 + }, + { + "epoch": 0.9506900169477847, + "grad_norm": 0.6549471020698547, + "learning_rate": 7.30421343907038e-05, + "loss": 2.5502, + "step": 11780 + }, + { + "epoch": 0.9507707206843676, + "grad_norm": 0.7087163329124451, + "learning_rate": 7.302693228556994e-05, + "loss": 2.4773, + "step": 11781 + }, + { + "epoch": 0.9508514244209507, + "grad_norm": 0.6230717897415161, + "learning_rate": 7.301173085267541e-05, + "loss": 2.4806, + "step": 11782 + }, + { + "epoch": 0.9509321281575337, + "grad_norm": 0.7145688533782959, + "learning_rate": 7.299653009239911e-05, + "loss": 2.5259, + "step": 11783 + }, + { + "epoch": 0.9510128318941167, + "grad_norm": 0.679100513458252, + "learning_rate": 7.298133000511988e-05, + "loss": 2.5012, + "step": 11784 + }, + { + "epoch": 0.9510935356306996, + "grad_norm": 0.7057691216468811, + "learning_rate": 7.29661305912165e-05, + "loss": 2.4826, + "step": 11785 + }, + { + "epoch": 0.9511742393672827, + "grad_norm": 0.65343177318573, + "learning_rate": 7.295093185106782e-05, + "loss": 2.4553, + "step": 11786 + }, + { + "epoch": 0.9512549431038657, + "grad_norm": 0.7948461174964905, + "learning_rate": 7.293573378505268e-05, + "loss": 2.478, + "step": 11787 + }, + { + "epoch": 0.9513356468404487, + "grad_norm": 0.6511468887329102, + "learning_rate": 7.292053639354975e-05, + "loss": 2.4862, + "step": 11788 + }, + { + "epoch": 0.9514163505770317, + "grad_norm": 0.7293919324874878, + "learning_rate": 7.290533967693782e-05, + "loss": 2.5956, + "step": 11789 + }, + { + "epoch": 0.9514970543136148, + "grad_norm": 0.6691277623176575, + "learning_rate": 7.289014363559567e-05, + "loss": 2.5659, + "step": 11790 + }, + { + "epoch": 0.9515777580501977, + "grad_norm": 0.7054625749588013, + "learning_rate": 7.287494826990203e-05, + "loss": 2.5875, + "step": 11791 + }, + { + "epoch": 0.9516584617867807, + "grad_norm": 0.6597220301628113, + "learning_rate": 7.285975358023555e-05, + "loss": 2.5215, + "step": 11792 + }, + { + "epoch": 0.9517391655233637, + "grad_norm": 0.6719489097595215, + "learning_rate": 7.284455956697497e-05, + "loss": 2.4752, + "step": 11793 + }, + { + "epoch": 0.9518198692599468, + "grad_norm": 0.7325637340545654, + "learning_rate": 7.2829366230499e-05, + "loss": 2.5504, + "step": 11794 + }, + { + "epoch": 0.9519005729965297, + "grad_norm": 0.637668788433075, + "learning_rate": 7.281417357118619e-05, + "loss": 2.5105, + "step": 11795 + }, + { + "epoch": 0.9519812767331127, + "grad_norm": 0.7815340161323547, + "learning_rate": 7.279898158941525e-05, + "loss": 2.4998, + "step": 11796 + }, + { + "epoch": 0.9520619804696957, + "grad_norm": 0.6555821299552917, + "learning_rate": 7.278379028556481e-05, + "loss": 2.4326, + "step": 11797 + }, + { + "epoch": 0.9521426842062788, + "grad_norm": 0.7298933863639832, + "learning_rate": 7.276859966001344e-05, + "loss": 2.4779, + "step": 11798 + }, + { + "epoch": 0.9522233879428618, + "grad_norm": 0.683455765247345, + "learning_rate": 7.275340971313974e-05, + "loss": 2.4416, + "step": 11799 + }, + { + "epoch": 0.9523040916794447, + "grad_norm": 0.6353151798248291, + "learning_rate": 7.273822044532232e-05, + "loss": 2.4777, + "step": 11800 + }, + { + "epoch": 0.9523847954160277, + "grad_norm": 0.6898894309997559, + "learning_rate": 7.27230318569397e-05, + "loss": 2.5351, + "step": 11801 + }, + { + "epoch": 0.9524654991526108, + "grad_norm": 0.6528690457344055, + "learning_rate": 7.270784394837041e-05, + "loss": 2.5145, + "step": 11802 + }, + { + "epoch": 0.9525462028891938, + "grad_norm": 0.6432619094848633, + "learning_rate": 7.269265671999304e-05, + "loss": 2.5002, + "step": 11803 + }, + { + "epoch": 0.9526269066257768, + "grad_norm": 0.7317861318588257, + "learning_rate": 7.267747017218601e-05, + "loss": 2.5318, + "step": 11804 + }, + { + "epoch": 0.9527076103623597, + "grad_norm": 0.7581185698509216, + "learning_rate": 7.266228430532785e-05, + "loss": 2.5313, + "step": 11805 + }, + { + "epoch": 0.9527883140989428, + "grad_norm": 0.7316486239433289, + "learning_rate": 7.264709911979702e-05, + "loss": 2.5147, + "step": 11806 + }, + { + "epoch": 0.9528690178355258, + "grad_norm": 0.7378978729248047, + "learning_rate": 7.263191461597199e-05, + "loss": 2.5149, + "step": 11807 + }, + { + "epoch": 0.9529497215721088, + "grad_norm": 0.6603738069534302, + "learning_rate": 7.26167307942312e-05, + "loss": 2.4684, + "step": 11808 + }, + { + "epoch": 0.9530304253086918, + "grad_norm": 0.7566502690315247, + "learning_rate": 7.260154765495302e-05, + "loss": 2.5535, + "step": 11809 + }, + { + "epoch": 0.9531111290452748, + "grad_norm": 0.693067729473114, + "learning_rate": 7.258636519851596e-05, + "loss": 2.5103, + "step": 11810 + }, + { + "epoch": 0.9531918327818578, + "grad_norm": 0.7049208283424377, + "learning_rate": 7.257118342529826e-05, + "loss": 2.5482, + "step": 11811 + }, + { + "epoch": 0.9532725365184408, + "grad_norm": 0.6986998319625854, + "learning_rate": 7.25560023356784e-05, + "loss": 2.4921, + "step": 11812 + }, + { + "epoch": 0.9533532402550238, + "grad_norm": 0.7079482674598694, + "learning_rate": 7.254082193003476e-05, + "loss": 2.5339, + "step": 11813 + }, + { + "epoch": 0.9534339439916069, + "grad_norm": 0.7283922433853149, + "learning_rate": 7.252564220874553e-05, + "loss": 2.5056, + "step": 11814 + }, + { + "epoch": 0.9535146477281898, + "grad_norm": 0.6965533494949341, + "learning_rate": 7.251046317218914e-05, + "loss": 2.5512, + "step": 11815 + }, + { + "epoch": 0.9535953514647728, + "grad_norm": 0.7367159128189087, + "learning_rate": 7.24952848207439e-05, + "loss": 2.5015, + "step": 11816 + }, + { + "epoch": 0.9536760552013558, + "grad_norm": 0.6959818601608276, + "learning_rate": 7.248010715478802e-05, + "loss": 2.4969, + "step": 11817 + }, + { + "epoch": 0.9537567589379389, + "grad_norm": 0.69304358959198, + "learning_rate": 7.246493017469981e-05, + "loss": 2.5098, + "step": 11818 + }, + { + "epoch": 0.9538374626745219, + "grad_norm": 0.6830596327781677, + "learning_rate": 7.244975388085757e-05, + "loss": 2.5206, + "step": 11819 + }, + { + "epoch": 0.9539181664111048, + "grad_norm": 0.7354303598403931, + "learning_rate": 7.243457827363944e-05, + "loss": 2.5223, + "step": 11820 + }, + { + "epoch": 0.9539988701476878, + "grad_norm": 0.7046182751655579, + "learning_rate": 7.241940335342366e-05, + "loss": 2.4931, + "step": 11821 + }, + { + "epoch": 0.9540795738842709, + "grad_norm": 0.6990540623664856, + "learning_rate": 7.240422912058843e-05, + "loss": 2.4302, + "step": 11822 + }, + { + "epoch": 0.9541602776208539, + "grad_norm": 0.7562115788459778, + "learning_rate": 7.238905557551202e-05, + "loss": 2.5118, + "step": 11823 + }, + { + "epoch": 0.9542409813574368, + "grad_norm": 0.8212862014770508, + "learning_rate": 7.237388271857248e-05, + "loss": 2.5476, + "step": 11824 + }, + { + "epoch": 0.9543216850940198, + "grad_norm": 0.7095397710800171, + "learning_rate": 7.235871055014798e-05, + "loss": 2.5073, + "step": 11825 + }, + { + "epoch": 0.9544023888306029, + "grad_norm": 0.7174660563468933, + "learning_rate": 7.23435390706167e-05, + "loss": 2.4553, + "step": 11826 + }, + { + "epoch": 0.9544830925671859, + "grad_norm": 0.7121314406394958, + "learning_rate": 7.23283682803567e-05, + "loss": 2.5164, + "step": 11827 + }, + { + "epoch": 0.9545637963037689, + "grad_norm": 0.7354126572608948, + "learning_rate": 7.231319817974609e-05, + "loss": 2.5413, + "step": 11828 + }, + { + "epoch": 0.9546445000403518, + "grad_norm": 0.7770543694496155, + "learning_rate": 7.2298028769163e-05, + "loss": 2.5244, + "step": 11829 + }, + { + "epoch": 0.9547252037769349, + "grad_norm": 0.6770393252372742, + "learning_rate": 7.228286004898541e-05, + "loss": 2.4707, + "step": 11830 + }, + { + "epoch": 0.9548059075135179, + "grad_norm": 0.6916880011558533, + "learning_rate": 7.22676920195914e-05, + "loss": 2.506, + "step": 11831 + }, + { + "epoch": 0.9548866112501009, + "grad_norm": 0.6299161314964294, + "learning_rate": 7.225252468135901e-05, + "loss": 2.5042, + "step": 11832 + }, + { + "epoch": 0.9549673149866839, + "grad_norm": 0.7081227898597717, + "learning_rate": 7.223735803466623e-05, + "loss": 2.5537, + "step": 11833 + }, + { + "epoch": 0.9550480187232668, + "grad_norm": 0.6600900888442993, + "learning_rate": 7.222219207989104e-05, + "loss": 2.5329, + "step": 11834 + }, + { + "epoch": 0.9551287224598499, + "grad_norm": 0.6715366244316101, + "learning_rate": 7.22070268174115e-05, + "loss": 2.5273, + "step": 11835 + }, + { + "epoch": 0.9552094261964329, + "grad_norm": 0.6655930280685425, + "learning_rate": 7.219186224760543e-05, + "loss": 2.4254, + "step": 11836 + }, + { + "epoch": 0.9552901299330159, + "grad_norm": 0.6925715208053589, + "learning_rate": 7.217669837085088e-05, + "loss": 2.5104, + "step": 11837 + }, + { + "epoch": 0.9553708336695989, + "grad_norm": 0.7132978439331055, + "learning_rate": 7.216153518752571e-05, + "loss": 2.5238, + "step": 11838 + }, + { + "epoch": 0.9554515374061819, + "grad_norm": 0.661651611328125, + "learning_rate": 7.214637269800791e-05, + "loss": 2.445, + "step": 11839 + }, + { + "epoch": 0.9555322411427649, + "grad_norm": 0.6635430455207825, + "learning_rate": 7.213121090267528e-05, + "loss": 2.4707, + "step": 11840 + }, + { + "epoch": 0.9556129448793479, + "grad_norm": 0.6303616166114807, + "learning_rate": 7.211604980190571e-05, + "loss": 2.4923, + "step": 11841 + }, + { + "epoch": 0.9556936486159309, + "grad_norm": 0.7027459144592285, + "learning_rate": 7.210088939607708e-05, + "loss": 2.5592, + "step": 11842 + }, + { + "epoch": 0.955774352352514, + "grad_norm": 0.6539996862411499, + "learning_rate": 7.208572968556722e-05, + "loss": 2.5256, + "step": 11843 + }, + { + "epoch": 0.9558550560890969, + "grad_norm": 0.7019872069358826, + "learning_rate": 7.207057067075393e-05, + "loss": 2.488, + "step": 11844 + }, + { + "epoch": 0.9559357598256799, + "grad_norm": 0.6848211288452148, + "learning_rate": 7.205541235201507e-05, + "loss": 2.4883, + "step": 11845 + }, + { + "epoch": 0.9560164635622629, + "grad_norm": 0.7806351184844971, + "learning_rate": 7.204025472972834e-05, + "loss": 2.5563, + "step": 11846 + }, + { + "epoch": 0.956097167298846, + "grad_norm": 0.7327724695205688, + "learning_rate": 7.202509780427156e-05, + "loss": 2.5275, + "step": 11847 + }, + { + "epoch": 0.956177871035429, + "grad_norm": 0.6805681586265564, + "learning_rate": 7.200994157602248e-05, + "loss": 2.4723, + "step": 11848 + }, + { + "epoch": 0.9562585747720119, + "grad_norm": 0.7053409814834595, + "learning_rate": 7.19947860453588e-05, + "loss": 2.4471, + "step": 11849 + }, + { + "epoch": 0.9563392785085949, + "grad_norm": 0.6783127188682556, + "learning_rate": 7.197963121265826e-05, + "loss": 2.4586, + "step": 11850 + }, + { + "epoch": 0.956419982245178, + "grad_norm": 0.6639916300773621, + "learning_rate": 7.196447707829857e-05, + "loss": 2.4966, + "step": 11851 + }, + { + "epoch": 0.956500685981761, + "grad_norm": 0.684066891670227, + "learning_rate": 7.194932364265739e-05, + "loss": 2.5676, + "step": 11852 + }, + { + "epoch": 0.9565813897183439, + "grad_norm": 0.7872990965843201, + "learning_rate": 7.193417090611239e-05, + "loss": 2.5101, + "step": 11853 + }, + { + "epoch": 0.9566620934549269, + "grad_norm": 0.7543401122093201, + "learning_rate": 7.19190188690412e-05, + "loss": 2.5503, + "step": 11854 + }, + { + "epoch": 0.95674279719151, + "grad_norm": 0.6514382362365723, + "learning_rate": 7.190386753182152e-05, + "loss": 2.4902, + "step": 11855 + }, + { + "epoch": 0.956823500928093, + "grad_norm": 0.6867108345031738, + "learning_rate": 7.188871689483087e-05, + "loss": 2.5054, + "step": 11856 + }, + { + "epoch": 0.956904204664676, + "grad_norm": 0.6536040306091309, + "learning_rate": 7.187356695844687e-05, + "loss": 2.5462, + "step": 11857 + }, + { + "epoch": 0.9569849084012589, + "grad_norm": 0.690237820148468, + "learning_rate": 7.185841772304711e-05, + "loss": 2.5673, + "step": 11858 + }, + { + "epoch": 0.957065612137842, + "grad_norm": 0.6699091196060181, + "learning_rate": 7.184326918900915e-05, + "loss": 2.4733, + "step": 11859 + }, + { + "epoch": 0.957146315874425, + "grad_norm": 0.6482241153717041, + "learning_rate": 7.18281213567105e-05, + "loss": 2.4897, + "step": 11860 + }, + { + "epoch": 0.957227019611008, + "grad_norm": 0.686130166053772, + "learning_rate": 7.181297422652874e-05, + "loss": 2.4596, + "step": 11861 + }, + { + "epoch": 0.957307723347591, + "grad_norm": 0.6507205367088318, + "learning_rate": 7.179782779884132e-05, + "loss": 2.5527, + "step": 11862 + }, + { + "epoch": 0.957388427084174, + "grad_norm": 0.6578813195228577, + "learning_rate": 7.178268207402577e-05, + "loss": 2.4975, + "step": 11863 + }, + { + "epoch": 0.957469130820757, + "grad_norm": 0.6931977272033691, + "learning_rate": 7.176753705245956e-05, + "loss": 2.5533, + "step": 11864 + }, + { + "epoch": 0.95754983455734, + "grad_norm": 0.7306256890296936, + "learning_rate": 7.17523927345201e-05, + "loss": 2.534, + "step": 11865 + }, + { + "epoch": 0.957630538293923, + "grad_norm": 0.6337448358535767, + "learning_rate": 7.173724912058483e-05, + "loss": 2.5015, + "step": 11866 + }, + { + "epoch": 0.9577112420305061, + "grad_norm": 0.6561456322669983, + "learning_rate": 7.172210621103124e-05, + "loss": 2.4946, + "step": 11867 + }, + { + "epoch": 0.957791945767089, + "grad_norm": 0.6341130137443542, + "learning_rate": 7.170696400623666e-05, + "loss": 2.5611, + "step": 11868 + }, + { + "epoch": 0.957872649503672, + "grad_norm": 0.7202804088592529, + "learning_rate": 7.169182250657849e-05, + "loss": 2.5209, + "step": 11869 + }, + { + "epoch": 0.957953353240255, + "grad_norm": 0.6620556712150574, + "learning_rate": 7.167668171243408e-05, + "loss": 2.4895, + "step": 11870 + }, + { + "epoch": 0.9580340569768381, + "grad_norm": 0.6842508912086487, + "learning_rate": 7.166154162418087e-05, + "loss": 2.4417, + "step": 11871 + }, + { + "epoch": 0.958114760713421, + "grad_norm": 0.7539907693862915, + "learning_rate": 7.164640224219608e-05, + "loss": 2.5153, + "step": 11872 + }, + { + "epoch": 0.958195464450004, + "grad_norm": 0.6524286270141602, + "learning_rate": 7.163126356685703e-05, + "loss": 2.509, + "step": 11873 + }, + { + "epoch": 0.958276168186587, + "grad_norm": 0.7022691965103149, + "learning_rate": 7.16161255985411e-05, + "loss": 2.5223, + "step": 11874 + }, + { + "epoch": 0.9583568719231701, + "grad_norm": 0.6659076809883118, + "learning_rate": 7.160098833762549e-05, + "loss": 2.5231, + "step": 11875 + }, + { + "epoch": 0.9584375756597531, + "grad_norm": 0.6756494641304016, + "learning_rate": 7.15858517844875e-05, + "loss": 2.5017, + "step": 11876 + }, + { + "epoch": 0.958518279396336, + "grad_norm": 0.729850709438324, + "learning_rate": 7.157071593950436e-05, + "loss": 2.4583, + "step": 11877 + }, + { + "epoch": 0.958598983132919, + "grad_norm": 0.7155230641365051, + "learning_rate": 7.155558080305326e-05, + "loss": 2.4753, + "step": 11878 + }, + { + "epoch": 0.9586796868695021, + "grad_norm": 0.6553284525871277, + "learning_rate": 7.154044637551147e-05, + "loss": 2.5093, + "step": 11879 + }, + { + "epoch": 0.9587603906060851, + "grad_norm": 0.6516379117965698, + "learning_rate": 7.152531265725617e-05, + "loss": 2.4996, + "step": 11880 + }, + { + "epoch": 0.9588410943426681, + "grad_norm": 0.6871184706687927, + "learning_rate": 7.151017964866449e-05, + "loss": 2.5322, + "step": 11881 + }, + { + "epoch": 0.958921798079251, + "grad_norm": 0.6998933553695679, + "learning_rate": 7.149504735011358e-05, + "loss": 2.5328, + "step": 11882 + }, + { + "epoch": 0.959002501815834, + "grad_norm": 0.7065120935440063, + "learning_rate": 7.147991576198065e-05, + "loss": 2.5251, + "step": 11883 + }, + { + "epoch": 0.9590832055524171, + "grad_norm": 0.6718337535858154, + "learning_rate": 7.146478488464275e-05, + "loss": 2.5596, + "step": 11884 + }, + { + "epoch": 0.9591639092890001, + "grad_norm": 0.6394883990287781, + "learning_rate": 7.144965471847698e-05, + "loss": 2.5022, + "step": 11885 + }, + { + "epoch": 0.9592446130255831, + "grad_norm": 0.6867207288742065, + "learning_rate": 7.143452526386045e-05, + "loss": 2.4927, + "step": 11886 + }, + { + "epoch": 0.959325316762166, + "grad_norm": 0.6710157990455627, + "learning_rate": 7.141939652117026e-05, + "loss": 2.5127, + "step": 11887 + }, + { + "epoch": 0.9594060204987491, + "grad_norm": 0.6286540627479553, + "learning_rate": 7.14042684907834e-05, + "loss": 2.4966, + "step": 11888 + }, + { + "epoch": 0.9594867242353321, + "grad_norm": 0.7295787334442139, + "learning_rate": 7.13891411730769e-05, + "loss": 2.5127, + "step": 11889 + }, + { + "epoch": 0.9595674279719151, + "grad_norm": 0.646084189414978, + "learning_rate": 7.137401456842784e-05, + "loss": 2.5575, + "step": 11890 + }, + { + "epoch": 0.959648131708498, + "grad_norm": 0.7884495258331299, + "learning_rate": 7.135888867721312e-05, + "loss": 2.4807, + "step": 11891 + }, + { + "epoch": 0.9597288354450811, + "grad_norm": 0.638469934463501, + "learning_rate": 7.134376349980977e-05, + "loss": 2.4989, + "step": 11892 + }, + { + "epoch": 0.9598095391816641, + "grad_norm": 0.6802849769592285, + "learning_rate": 7.132863903659476e-05, + "loss": 2.5139, + "step": 11893 + }, + { + "epoch": 0.9598902429182471, + "grad_norm": 0.6657521724700928, + "learning_rate": 7.131351528794499e-05, + "loss": 2.4488, + "step": 11894 + }, + { + "epoch": 0.9599709466548301, + "grad_norm": 0.6537562012672424, + "learning_rate": 7.129839225423741e-05, + "loss": 2.4664, + "step": 11895 + }, + { + "epoch": 0.9600516503914132, + "grad_norm": 0.689637303352356, + "learning_rate": 7.128326993584897e-05, + "loss": 2.582, + "step": 11896 + }, + { + "epoch": 0.9601323541279961, + "grad_norm": 0.6701640486717224, + "learning_rate": 7.126814833315646e-05, + "loss": 2.4963, + "step": 11897 + }, + { + "epoch": 0.9602130578645791, + "grad_norm": 0.7466658353805542, + "learning_rate": 7.125302744653677e-05, + "loss": 2.5015, + "step": 11898 + }, + { + "epoch": 0.9602937616011621, + "grad_norm": 0.6487225294113159, + "learning_rate": 7.123790727636685e-05, + "loss": 2.5393, + "step": 11899 + }, + { + "epoch": 0.9603744653377452, + "grad_norm": 0.7204654216766357, + "learning_rate": 7.122278782302343e-05, + "loss": 2.4668, + "step": 11900 + }, + { + "epoch": 0.9604551690743282, + "grad_norm": 0.6852861046791077, + "learning_rate": 7.120766908688336e-05, + "loss": 2.5893, + "step": 11901 + }, + { + "epoch": 0.9605358728109111, + "grad_norm": 0.6483901739120483, + "learning_rate": 7.119255106832344e-05, + "loss": 2.48, + "step": 11902 + }, + { + "epoch": 0.9606165765474941, + "grad_norm": 0.6670375466346741, + "learning_rate": 7.117743376772049e-05, + "loss": 2.5225, + "step": 11903 + }, + { + "epoch": 0.9606972802840772, + "grad_norm": 0.6805974841117859, + "learning_rate": 7.116231718545118e-05, + "loss": 2.4652, + "step": 11904 + }, + { + "epoch": 0.9607779840206602, + "grad_norm": 0.6700397729873657, + "learning_rate": 7.114720132189232e-05, + "loss": 2.5115, + "step": 11905 + }, + { + "epoch": 0.9608586877572431, + "grad_norm": 0.7167409062385559, + "learning_rate": 7.113208617742066e-05, + "loss": 2.5062, + "step": 11906 + }, + { + "epoch": 0.9609393914938261, + "grad_norm": 0.7337077856063843, + "learning_rate": 7.111697175241286e-05, + "loss": 2.5768, + "step": 11907 + }, + { + "epoch": 0.9610200952304092, + "grad_norm": 0.6681819558143616, + "learning_rate": 7.110185804724558e-05, + "loss": 2.5058, + "step": 11908 + }, + { + "epoch": 0.9611007989669922, + "grad_norm": 0.7235603332519531, + "learning_rate": 7.10867450622956e-05, + "loss": 2.4606, + "step": 11909 + }, + { + "epoch": 0.9611815027035752, + "grad_norm": 0.6931360363960266, + "learning_rate": 7.107163279793947e-05, + "loss": 2.5129, + "step": 11910 + }, + { + "epoch": 0.9612622064401581, + "grad_norm": 0.7331648468971252, + "learning_rate": 7.105652125455388e-05, + "loss": 2.4916, + "step": 11911 + }, + { + "epoch": 0.9613429101767412, + "grad_norm": 0.6538143754005432, + "learning_rate": 7.104141043251545e-05, + "loss": 2.5184, + "step": 11912 + }, + { + "epoch": 0.9614236139133242, + "grad_norm": 0.7018921375274658, + "learning_rate": 7.102630033220077e-05, + "loss": 2.5446, + "step": 11913 + }, + { + "epoch": 0.9615043176499072, + "grad_norm": 0.7528507709503174, + "learning_rate": 7.10111909539864e-05, + "loss": 2.4404, + "step": 11914 + }, + { + "epoch": 0.9615850213864902, + "grad_norm": 0.7258831858634949, + "learning_rate": 7.099608229824894e-05, + "loss": 2.4758, + "step": 11915 + }, + { + "epoch": 0.9616657251230732, + "grad_norm": 0.6954349875450134, + "learning_rate": 7.098097436536498e-05, + "loss": 2.4894, + "step": 11916 + }, + { + "epoch": 0.9617464288596562, + "grad_norm": 0.691584050655365, + "learning_rate": 7.096586715571092e-05, + "loss": 2.544, + "step": 11917 + }, + { + "epoch": 0.9618271325962392, + "grad_norm": 0.7107009291648865, + "learning_rate": 7.095076066966337e-05, + "loss": 2.4994, + "step": 11918 + }, + { + "epoch": 0.9619078363328222, + "grad_norm": 0.6492058634757996, + "learning_rate": 7.093565490759881e-05, + "loss": 2.5751, + "step": 11919 + }, + { + "epoch": 0.9619885400694053, + "grad_norm": 0.6817753314971924, + "learning_rate": 7.092054986989371e-05, + "loss": 2.5129, + "step": 11920 + }, + { + "epoch": 0.9620692438059882, + "grad_norm": 0.6991822123527527, + "learning_rate": 7.090544555692448e-05, + "loss": 2.5728, + "step": 11921 + }, + { + "epoch": 0.9621499475425712, + "grad_norm": 0.6627625226974487, + "learning_rate": 7.089034196906768e-05, + "loss": 2.4479, + "step": 11922 + }, + { + "epoch": 0.9622306512791542, + "grad_norm": 0.6889652013778687, + "learning_rate": 7.087523910669957e-05, + "loss": 2.5323, + "step": 11923 + }, + { + "epoch": 0.9623113550157373, + "grad_norm": 0.7863786816596985, + "learning_rate": 7.086013697019667e-05, + "loss": 2.5146, + "step": 11924 + }, + { + "epoch": 0.9623920587523203, + "grad_norm": 0.6885324716567993, + "learning_rate": 7.084503555993536e-05, + "loss": 2.5072, + "step": 11925 + }, + { + "epoch": 0.9624727624889032, + "grad_norm": 0.619239091873169, + "learning_rate": 7.082993487629192e-05, + "loss": 2.4622, + "step": 11926 + }, + { + "epoch": 0.9625534662254862, + "grad_norm": 0.6762447953224182, + "learning_rate": 7.081483491964278e-05, + "loss": 2.5155, + "step": 11927 + }, + { + "epoch": 0.9626341699620693, + "grad_norm": 0.6559715867042542, + "learning_rate": 7.079973569036424e-05, + "loss": 2.4729, + "step": 11928 + }, + { + "epoch": 0.9627148736986523, + "grad_norm": 0.633280873298645, + "learning_rate": 7.078463718883261e-05, + "loss": 2.4715, + "step": 11929 + }, + { + "epoch": 0.9627955774352353, + "grad_norm": 0.7740094065666199, + "learning_rate": 7.07695394154242e-05, + "loss": 2.4871, + "step": 11930 + }, + { + "epoch": 0.9628762811718182, + "grad_norm": 0.7103284597396851, + "learning_rate": 7.075444237051527e-05, + "loss": 2.5299, + "step": 11931 + }, + { + "epoch": 0.9629569849084013, + "grad_norm": 0.6800934076309204, + "learning_rate": 7.073934605448212e-05, + "loss": 2.5919, + "step": 11932 + }, + { + "epoch": 0.9630376886449843, + "grad_norm": 0.6680917143821716, + "learning_rate": 7.072425046770092e-05, + "loss": 2.4942, + "step": 11933 + }, + { + "epoch": 0.9631183923815673, + "grad_norm": 0.7248062491416931, + "learning_rate": 7.070915561054792e-05, + "loss": 2.4956, + "step": 11934 + }, + { + "epoch": 0.9631990961181502, + "grad_norm": 0.6635782122612, + "learning_rate": 7.069406148339936e-05, + "loss": 2.4658, + "step": 11935 + }, + { + "epoch": 0.9632797998547332, + "grad_norm": 0.6751061081886292, + "learning_rate": 7.067896808663137e-05, + "loss": 2.4912, + "step": 11936 + }, + { + "epoch": 0.9633605035913163, + "grad_norm": 0.7476027607917786, + "learning_rate": 7.066387542062013e-05, + "loss": 2.4858, + "step": 11937 + }, + { + "epoch": 0.9634412073278993, + "grad_norm": 0.6770931482315063, + "learning_rate": 7.064878348574183e-05, + "loss": 2.4574, + "step": 11938 + }, + { + "epoch": 0.9635219110644823, + "grad_norm": 0.7105392813682556, + "learning_rate": 7.063369228237255e-05, + "loss": 2.5523, + "step": 11939 + }, + { + "epoch": 0.9636026148010652, + "grad_norm": 0.6806207299232483, + "learning_rate": 7.061860181088842e-05, + "loss": 2.4992, + "step": 11940 + }, + { + "epoch": 0.9636833185376483, + "grad_norm": 0.7059600353240967, + "learning_rate": 7.060351207166558e-05, + "loss": 2.5778, + "step": 11941 + }, + { + "epoch": 0.9637640222742313, + "grad_norm": 0.6306884288787842, + "learning_rate": 7.058842306508002e-05, + "loss": 2.5389, + "step": 11942 + }, + { + "epoch": 0.9638447260108143, + "grad_norm": 0.6997150778770447, + "learning_rate": 7.057333479150783e-05, + "loss": 2.5077, + "step": 11943 + }, + { + "epoch": 0.9639254297473973, + "grad_norm": 0.7073743343353271, + "learning_rate": 7.05582472513251e-05, + "loss": 2.5274, + "step": 11944 + }, + { + "epoch": 0.9640061334839803, + "grad_norm": 0.6768803596496582, + "learning_rate": 7.054316044490777e-05, + "loss": 2.5155, + "step": 11945 + }, + { + "epoch": 0.9640868372205633, + "grad_norm": 0.6792057752609253, + "learning_rate": 7.052807437263189e-05, + "loss": 2.5509, + "step": 11946 + }, + { + "epoch": 0.9641675409571463, + "grad_norm": 0.6883981823921204, + "learning_rate": 7.051298903487344e-05, + "loss": 2.5176, + "step": 11947 + }, + { + "epoch": 0.9642482446937293, + "grad_norm": 0.6934401392936707, + "learning_rate": 7.049790443200844e-05, + "loss": 2.502, + "step": 11948 + }, + { + "epoch": 0.9643289484303124, + "grad_norm": 0.6882597804069519, + "learning_rate": 7.048282056441269e-05, + "loss": 2.487, + "step": 11949 + }, + { + "epoch": 0.9644096521668953, + "grad_norm": 0.6972896456718445, + "learning_rate": 7.046773743246225e-05, + "loss": 2.5304, + "step": 11950 + }, + { + "epoch": 0.9644903559034783, + "grad_norm": 0.6591988205909729, + "learning_rate": 7.045265503653303e-05, + "loss": 2.4734, + "step": 11951 + }, + { + "epoch": 0.9645710596400613, + "grad_norm": 0.6890063285827637, + "learning_rate": 7.043757337700082e-05, + "loss": 2.5289, + "step": 11952 + }, + { + "epoch": 0.9646517633766444, + "grad_norm": 0.6931065917015076, + "learning_rate": 7.042249245424157e-05, + "loss": 2.484, + "step": 11953 + }, + { + "epoch": 0.9647324671132274, + "grad_norm": 0.6943762898445129, + "learning_rate": 7.040741226863117e-05, + "loss": 2.501, + "step": 11954 + }, + { + "epoch": 0.9648131708498103, + "grad_norm": 0.677154004573822, + "learning_rate": 7.039233282054536e-05, + "loss": 2.4976, + "step": 11955 + }, + { + "epoch": 0.9648938745863933, + "grad_norm": 0.6662883758544922, + "learning_rate": 7.037725411036003e-05, + "loss": 2.4928, + "step": 11956 + }, + { + "epoch": 0.9649745783229764, + "grad_norm": 0.6854663491249084, + "learning_rate": 7.0362176138451e-05, + "loss": 2.4657, + "step": 11957 + }, + { + "epoch": 0.9650552820595594, + "grad_norm": 0.6703238487243652, + "learning_rate": 7.034709890519397e-05, + "loss": 2.4879, + "step": 11958 + }, + { + "epoch": 0.9651359857961423, + "grad_norm": 0.7023652791976929, + "learning_rate": 7.033202241096474e-05, + "loss": 2.4619, + "step": 11959 + }, + { + "epoch": 0.9652166895327253, + "grad_norm": 0.6950454711914062, + "learning_rate": 7.031694665613911e-05, + "loss": 2.5125, + "step": 11960 + }, + { + "epoch": 0.9652973932693084, + "grad_norm": 0.6740411520004272, + "learning_rate": 7.030187164109272e-05, + "loss": 2.436, + "step": 11961 + }, + { + "epoch": 0.9653780970058914, + "grad_norm": 0.6697152256965637, + "learning_rate": 7.028679736620132e-05, + "loss": 2.5513, + "step": 11962 + }, + { + "epoch": 0.9654588007424744, + "grad_norm": 0.6920599937438965, + "learning_rate": 7.027172383184061e-05, + "loss": 2.5264, + "step": 11963 + }, + { + "epoch": 0.9655395044790573, + "grad_norm": 0.6493465304374695, + "learning_rate": 7.025665103838627e-05, + "loss": 2.4834, + "step": 11964 + }, + { + "epoch": 0.9656202082156404, + "grad_norm": 0.684092104434967, + "learning_rate": 7.02415789862139e-05, + "loss": 2.4662, + "step": 11965 + }, + { + "epoch": 0.9657009119522234, + "grad_norm": 0.7161515355110168, + "learning_rate": 7.022650767569921e-05, + "loss": 2.4648, + "step": 11966 + }, + { + "epoch": 0.9657816156888064, + "grad_norm": 0.6994524002075195, + "learning_rate": 7.021143710721778e-05, + "loss": 2.5186, + "step": 11967 + }, + { + "epoch": 0.9658623194253894, + "grad_norm": 0.7105295062065125, + "learning_rate": 7.019636728114518e-05, + "loss": 2.5132, + "step": 11968 + }, + { + "epoch": 0.9659430231619724, + "grad_norm": 0.7182292938232422, + "learning_rate": 7.018129819785702e-05, + "loss": 2.5469, + "step": 11969 + }, + { + "epoch": 0.9660237268985554, + "grad_norm": 0.7021759152412415, + "learning_rate": 7.016622985772887e-05, + "loss": 2.5477, + "step": 11970 + }, + { + "epoch": 0.9661044306351384, + "grad_norm": 0.6751413941383362, + "learning_rate": 7.015116226113624e-05, + "loss": 2.5174, + "step": 11971 + }, + { + "epoch": 0.9661851343717214, + "grad_norm": 0.6341918110847473, + "learning_rate": 7.013609540845468e-05, + "loss": 2.4778, + "step": 11972 + }, + { + "epoch": 0.9662658381083045, + "grad_norm": 0.7080956697463989, + "learning_rate": 7.012102930005971e-05, + "loss": 2.5304, + "step": 11973 + }, + { + "epoch": 0.9663465418448874, + "grad_norm": 0.6367003321647644, + "learning_rate": 7.010596393632674e-05, + "loss": 2.4857, + "step": 11974 + }, + { + "epoch": 0.9664272455814704, + "grad_norm": 0.6841328740119934, + "learning_rate": 7.009089931763131e-05, + "loss": 2.5365, + "step": 11975 + }, + { + "epoch": 0.9665079493180534, + "grad_norm": 0.6568236351013184, + "learning_rate": 7.00758354443489e-05, + "loss": 2.5286, + "step": 11976 + }, + { + "epoch": 0.9665886530546365, + "grad_norm": 0.7071812152862549, + "learning_rate": 7.006077231685485e-05, + "loss": 2.458, + "step": 11977 + }, + { + "epoch": 0.9666693567912195, + "grad_norm": 0.6997712850570679, + "learning_rate": 7.004570993552462e-05, + "loss": 2.4571, + "step": 11978 + }, + { + "epoch": 0.9667500605278024, + "grad_norm": 0.6920793056488037, + "learning_rate": 7.003064830073359e-05, + "loss": 2.4172, + "step": 11979 + }, + { + "epoch": 0.9668307642643854, + "grad_norm": 0.6823387742042542, + "learning_rate": 7.001558741285718e-05, + "loss": 2.4895, + "step": 11980 + }, + { + "epoch": 0.9669114680009685, + "grad_norm": 0.7309569716453552, + "learning_rate": 7.000052727227068e-05, + "loss": 2.502, + "step": 11981 + }, + { + "epoch": 0.9669921717375515, + "grad_norm": 0.734708845615387, + "learning_rate": 6.998546787934946e-05, + "loss": 2.4918, + "step": 11982 + }, + { + "epoch": 0.9670728754741345, + "grad_norm": 0.690406084060669, + "learning_rate": 6.997040923446889e-05, + "loss": 2.4994, + "step": 11983 + }, + { + "epoch": 0.9671535792107174, + "grad_norm": 0.7126687169075012, + "learning_rate": 6.995535133800416e-05, + "loss": 2.4824, + "step": 11984 + }, + { + "epoch": 0.9672342829473004, + "grad_norm": 0.7020599246025085, + "learning_rate": 6.994029419033062e-05, + "loss": 2.4889, + "step": 11985 + }, + { + "epoch": 0.9673149866838835, + "grad_norm": 0.7690796852111816, + "learning_rate": 6.992523779182356e-05, + "loss": 2.4997, + "step": 11986 + }, + { + "epoch": 0.9673956904204665, + "grad_norm": 0.6635778546333313, + "learning_rate": 6.991018214285816e-05, + "loss": 2.4989, + "step": 11987 + }, + { + "epoch": 0.9674763941570494, + "grad_norm": 0.7088577747344971, + "learning_rate": 6.989512724380967e-05, + "loss": 2.549, + "step": 11988 + }, + { + "epoch": 0.9675570978936324, + "grad_norm": 0.6420924663543701, + "learning_rate": 6.988007309505333e-05, + "loss": 2.4585, + "step": 11989 + }, + { + "epoch": 0.9676378016302155, + "grad_norm": 0.7902400493621826, + "learning_rate": 6.986501969696428e-05, + "loss": 2.5009, + "step": 11990 + }, + { + "epoch": 0.9677185053667985, + "grad_norm": 0.700907289981842, + "learning_rate": 6.984996704991773e-05, + "loss": 2.4778, + "step": 11991 + }, + { + "epoch": 0.9677992091033815, + "grad_norm": 0.664378821849823, + "learning_rate": 6.983491515428883e-05, + "loss": 2.5116, + "step": 11992 + }, + { + "epoch": 0.9678799128399644, + "grad_norm": 0.6314663887023926, + "learning_rate": 6.981986401045266e-05, + "loss": 2.4588, + "step": 11993 + }, + { + "epoch": 0.9679606165765475, + "grad_norm": 0.6521078944206238, + "learning_rate": 6.980481361878438e-05, + "loss": 2.5224, + "step": 11994 + }, + { + "epoch": 0.9680413203131305, + "grad_norm": 0.6336014270782471, + "learning_rate": 6.978976397965907e-05, + "loss": 2.4297, + "step": 11995 + }, + { + "epoch": 0.9681220240497135, + "grad_norm": 0.7321500778198242, + "learning_rate": 6.977471509345183e-05, + "loss": 2.5252, + "step": 11996 + }, + { + "epoch": 0.9682027277862965, + "grad_norm": 0.686950147151947, + "learning_rate": 6.97596669605377e-05, + "loss": 2.5188, + "step": 11997 + }, + { + "epoch": 0.9682834315228795, + "grad_norm": 0.729343056678772, + "learning_rate": 6.97446195812917e-05, + "loss": 2.5157, + "step": 11998 + }, + { + "epoch": 0.9683641352594625, + "grad_norm": 0.6447068452835083, + "learning_rate": 6.972957295608889e-05, + "loss": 2.5041, + "step": 11999 + }, + { + "epoch": 0.9684448389960455, + "grad_norm": 0.6847280859947205, + "learning_rate": 6.971452708530423e-05, + "loss": 2.443, + "step": 12000 + }, + { + "epoch": 0.9684448389960455, + "eval_loss": 2.431878089904785, + "eval_runtime": 758.167, + "eval_samples_per_second": 3.456, + "eval_steps_per_second": 0.576, + "step": 12000 + }, + { + "epoch": 0.9685255427326285, + "grad_norm": 0.6440466046333313, + "learning_rate": 6.969948196931272e-05, + "loss": 2.5091, + "step": 12001 + }, + { + "epoch": 0.9686062464692116, + "grad_norm": 0.6570029258728027, + "learning_rate": 6.968443760848937e-05, + "loss": 2.491, + "step": 12002 + }, + { + "epoch": 0.9686869502057945, + "grad_norm": 0.7610877752304077, + "learning_rate": 6.966939400320905e-05, + "loss": 2.4713, + "step": 12003 + }, + { + "epoch": 0.9687676539423775, + "grad_norm": 0.7187781929969788, + "learning_rate": 6.965435115384669e-05, + "loss": 2.4303, + "step": 12004 + }, + { + "epoch": 0.9688483576789605, + "grad_norm": 0.7668420672416687, + "learning_rate": 6.963930906077727e-05, + "loss": 2.5513, + "step": 12005 + }, + { + "epoch": 0.9689290614155436, + "grad_norm": 0.7025619745254517, + "learning_rate": 6.96242677243756e-05, + "loss": 2.4349, + "step": 12006 + }, + { + "epoch": 0.9690097651521266, + "grad_norm": 0.7066935896873474, + "learning_rate": 6.960922714501657e-05, + "loss": 2.5465, + "step": 12007 + }, + { + "epoch": 0.9690904688887095, + "grad_norm": 0.6758970618247986, + "learning_rate": 6.95941873230751e-05, + "loss": 2.4827, + "step": 12008 + }, + { + "epoch": 0.9691711726252925, + "grad_norm": 0.7108862996101379, + "learning_rate": 6.957914825892591e-05, + "loss": 2.5412, + "step": 12009 + }, + { + "epoch": 0.9692518763618756, + "grad_norm": 0.660784125328064, + "learning_rate": 6.956410995294389e-05, + "loss": 2.5173, + "step": 12010 + }, + { + "epoch": 0.9693325800984586, + "grad_norm": 0.6966561079025269, + "learning_rate": 6.954907240550377e-05, + "loss": 2.5196, + "step": 12011 + }, + { + "epoch": 0.9694132838350416, + "grad_norm": 0.6889416575431824, + "learning_rate": 6.953403561698042e-05, + "loss": 2.5351, + "step": 12012 + }, + { + "epoch": 0.9694939875716245, + "grad_norm": 0.7578341960906982, + "learning_rate": 6.951899958774852e-05, + "loss": 2.5184, + "step": 12013 + }, + { + "epoch": 0.9695746913082076, + "grad_norm": 0.6735317707061768, + "learning_rate": 6.950396431818282e-05, + "loss": 2.4592, + "step": 12014 + }, + { + "epoch": 0.9696553950447906, + "grad_norm": 0.6903232932090759, + "learning_rate": 6.948892980865806e-05, + "loss": 2.5212, + "step": 12015 + }, + { + "epoch": 0.9697360987813736, + "grad_norm": 0.6477165818214417, + "learning_rate": 6.94738960595489e-05, + "loss": 2.4423, + "step": 12016 + }, + { + "epoch": 0.9698168025179565, + "grad_norm": 0.6778751015663147, + "learning_rate": 6.945886307123007e-05, + "loss": 2.547, + "step": 12017 + }, + { + "epoch": 0.9698975062545396, + "grad_norm": 0.690558135509491, + "learning_rate": 6.944383084407623e-05, + "loss": 2.5081, + "step": 12018 + }, + { + "epoch": 0.9699782099911226, + "grad_norm": 0.7210639119148254, + "learning_rate": 6.942879937846196e-05, + "loss": 2.496, + "step": 12019 + }, + { + "epoch": 0.9700589137277056, + "grad_norm": 0.7182444930076599, + "learning_rate": 6.941376867476194e-05, + "loss": 2.6138, + "step": 12020 + }, + { + "epoch": 0.9701396174642886, + "grad_norm": 0.6929295063018799, + "learning_rate": 6.939873873335077e-05, + "loss": 2.4828, + "step": 12021 + }, + { + "epoch": 0.9702203212008716, + "grad_norm": 0.6919693350791931, + "learning_rate": 6.938370955460298e-05, + "loss": 2.5123, + "step": 12022 + }, + { + "epoch": 0.9703010249374546, + "grad_norm": 0.6475244164466858, + "learning_rate": 6.93686811388932e-05, + "loss": 2.4992, + "step": 12023 + }, + { + "epoch": 0.9703817286740376, + "grad_norm": 0.6728265881538391, + "learning_rate": 6.935365348659597e-05, + "loss": 2.4486, + "step": 12024 + }, + { + "epoch": 0.9704624324106206, + "grad_norm": 0.6791470646858215, + "learning_rate": 6.933862659808582e-05, + "loss": 2.4657, + "step": 12025 + }, + { + "epoch": 0.9705431361472037, + "grad_norm": 0.7611662745475769, + "learning_rate": 6.932360047373721e-05, + "loss": 2.5243, + "step": 12026 + }, + { + "epoch": 0.9706238398837866, + "grad_norm": 0.6642355918884277, + "learning_rate": 6.930857511392467e-05, + "loss": 2.5308, + "step": 12027 + }, + { + "epoch": 0.9707045436203696, + "grad_norm": 0.7270805239677429, + "learning_rate": 6.92935505190227e-05, + "loss": 2.4708, + "step": 12028 + }, + { + "epoch": 0.9707852473569526, + "grad_norm": 0.6706295013427734, + "learning_rate": 6.927852668940568e-05, + "loss": 2.5136, + "step": 12029 + }, + { + "epoch": 0.9708659510935357, + "grad_norm": 0.6923376321792603, + "learning_rate": 6.92635036254481e-05, + "loss": 2.5238, + "step": 12030 + }, + { + "epoch": 0.9709466548301187, + "grad_norm": 0.7154483199119568, + "learning_rate": 6.924848132752436e-05, + "loss": 2.488, + "step": 12031 + }, + { + "epoch": 0.9710273585667016, + "grad_norm": 0.6675701141357422, + "learning_rate": 6.923345979600884e-05, + "loss": 2.5066, + "step": 12032 + }, + { + "epoch": 0.9711080623032846, + "grad_norm": 0.7282043695449829, + "learning_rate": 6.921843903127592e-05, + "loss": 2.5096, + "step": 12033 + }, + { + "epoch": 0.9711887660398677, + "grad_norm": 0.663526177406311, + "learning_rate": 6.92034190337e-05, + "loss": 2.5276, + "step": 12034 + }, + { + "epoch": 0.9712694697764507, + "grad_norm": 0.7491087913513184, + "learning_rate": 6.918839980365534e-05, + "loss": 2.5044, + "step": 12035 + }, + { + "epoch": 0.9713501735130337, + "grad_norm": 0.6977766156196594, + "learning_rate": 6.917338134151629e-05, + "loss": 2.6102, + "step": 12036 + }, + { + "epoch": 0.9714308772496166, + "grad_norm": 0.6447446346282959, + "learning_rate": 6.915836364765722e-05, + "loss": 2.5137, + "step": 12037 + }, + { + "epoch": 0.9715115809861996, + "grad_norm": 0.6801442503929138, + "learning_rate": 6.91433467224523e-05, + "loss": 2.5145, + "step": 12038 + }, + { + "epoch": 0.9715922847227827, + "grad_norm": 0.6843627691268921, + "learning_rate": 6.912833056627583e-05, + "loss": 2.6099, + "step": 12039 + }, + { + "epoch": 0.9716729884593657, + "grad_norm": 0.6862856149673462, + "learning_rate": 6.911331517950209e-05, + "loss": 2.5358, + "step": 12040 + }, + { + "epoch": 0.9717536921959486, + "grad_norm": 0.6835047602653503, + "learning_rate": 6.909830056250527e-05, + "loss": 2.5257, + "step": 12041 + }, + { + "epoch": 0.9718343959325316, + "grad_norm": 0.6958080530166626, + "learning_rate": 6.908328671565956e-05, + "loss": 2.5008, + "step": 12042 + }, + { + "epoch": 0.9719150996691147, + "grad_norm": 0.7556219100952148, + "learning_rate": 6.906827363933917e-05, + "loss": 2.5283, + "step": 12043 + }, + { + "epoch": 0.9719958034056977, + "grad_norm": 0.7074917554855347, + "learning_rate": 6.90532613339183e-05, + "loss": 2.4898, + "step": 12044 + }, + { + "epoch": 0.9720765071422807, + "grad_norm": 0.6456350684165955, + "learning_rate": 6.903824979977101e-05, + "loss": 2.4989, + "step": 12045 + }, + { + "epoch": 0.9721572108788636, + "grad_norm": 0.6609941720962524, + "learning_rate": 6.902323903727146e-05, + "loss": 2.4883, + "step": 12046 + }, + { + "epoch": 0.9722379146154467, + "grad_norm": 0.7132936716079712, + "learning_rate": 6.90082290467938e-05, + "loss": 2.4983, + "step": 12047 + }, + { + "epoch": 0.9723186183520297, + "grad_norm": 0.6686434745788574, + "learning_rate": 6.899321982871206e-05, + "loss": 2.4862, + "step": 12048 + }, + { + "epoch": 0.9723993220886127, + "grad_norm": 0.6792194247245789, + "learning_rate": 6.897821138340033e-05, + "loss": 2.5368, + "step": 12049 + }, + { + "epoch": 0.9724800258251957, + "grad_norm": 0.6829379796981812, + "learning_rate": 6.896320371123268e-05, + "loss": 2.4842, + "step": 12050 + }, + { + "epoch": 0.9725607295617787, + "grad_norm": 0.7459573745727539, + "learning_rate": 6.894819681258312e-05, + "loss": 2.5023, + "step": 12051 + }, + { + "epoch": 0.9726414332983617, + "grad_norm": 0.6700068712234497, + "learning_rate": 6.893319068782566e-05, + "loss": 2.552, + "step": 12052 + }, + { + "epoch": 0.9727221370349447, + "grad_norm": 0.7093638777732849, + "learning_rate": 6.891818533733434e-05, + "loss": 2.445, + "step": 12053 + }, + { + "epoch": 0.9728028407715277, + "grad_norm": 0.703599214553833, + "learning_rate": 6.890318076148304e-05, + "loss": 2.5536, + "step": 12054 + }, + { + "epoch": 0.9728835445081108, + "grad_norm": 0.6214482188224792, + "learning_rate": 6.888817696064578e-05, + "loss": 2.5188, + "step": 12055 + }, + { + "epoch": 0.9729642482446937, + "grad_norm": 0.6893547773361206, + "learning_rate": 6.887317393519645e-05, + "loss": 2.5596, + "step": 12056 + }, + { + "epoch": 0.9730449519812767, + "grad_norm": 0.6282656788825989, + "learning_rate": 6.885817168550903e-05, + "loss": 2.4873, + "step": 12057 + }, + { + "epoch": 0.9731256557178597, + "grad_norm": 0.6979188323020935, + "learning_rate": 6.884317021195737e-05, + "loss": 2.5358, + "step": 12058 + }, + { + "epoch": 0.9732063594544428, + "grad_norm": 0.7925785183906555, + "learning_rate": 6.882816951491533e-05, + "loss": 2.5358, + "step": 12059 + }, + { + "epoch": 0.9732870631910258, + "grad_norm": 0.6449821591377258, + "learning_rate": 6.881316959475684e-05, + "loss": 2.4784, + "step": 12060 + }, + { + "epoch": 0.9733677669276087, + "grad_norm": 0.7013393044471741, + "learning_rate": 6.879817045185565e-05, + "loss": 2.4804, + "step": 12061 + }, + { + "epoch": 0.9734484706641917, + "grad_norm": 0.8338057398796082, + "learning_rate": 6.878317208658559e-05, + "loss": 2.512, + "step": 12062 + }, + { + "epoch": 0.9735291744007748, + "grad_norm": 0.6815133094787598, + "learning_rate": 6.876817449932054e-05, + "loss": 2.467, + "step": 12063 + }, + { + "epoch": 0.9736098781373578, + "grad_norm": 0.659156858921051, + "learning_rate": 6.87531776904342e-05, + "loss": 2.503, + "step": 12064 + }, + { + "epoch": 0.9736905818739408, + "grad_norm": 0.7149603962898254, + "learning_rate": 6.873818166030033e-05, + "loss": 2.5135, + "step": 12065 + }, + { + "epoch": 0.9737712856105237, + "grad_norm": 0.7010510563850403, + "learning_rate": 6.872318640929272e-05, + "loss": 2.5133, + "step": 12066 + }, + { + "epoch": 0.9738519893471068, + "grad_norm": 0.6247616410255432, + "learning_rate": 6.870819193778504e-05, + "loss": 2.5189, + "step": 12067 + }, + { + "epoch": 0.9739326930836898, + "grad_norm": 0.6938940286636353, + "learning_rate": 6.869319824615101e-05, + "loss": 2.5053, + "step": 12068 + }, + { + "epoch": 0.9740133968202728, + "grad_norm": 0.7636895179748535, + "learning_rate": 6.867820533476436e-05, + "loss": 2.4989, + "step": 12069 + }, + { + "epoch": 0.9740941005568557, + "grad_norm": 0.6489234566688538, + "learning_rate": 6.866321320399869e-05, + "loss": 2.4935, + "step": 12070 + }, + { + "epoch": 0.9741748042934388, + "grad_norm": 0.6752095818519592, + "learning_rate": 6.864822185422764e-05, + "loss": 2.4835, + "step": 12071 + }, + { + "epoch": 0.9742555080300218, + "grad_norm": 0.6947118639945984, + "learning_rate": 6.863323128582486e-05, + "loss": 2.504, + "step": 12072 + }, + { + "epoch": 0.9743362117666048, + "grad_norm": 0.6815536618232727, + "learning_rate": 6.861824149916398e-05, + "loss": 2.5369, + "step": 12073 + }, + { + "epoch": 0.9744169155031878, + "grad_norm": 0.6550236344337463, + "learning_rate": 6.860325249461852e-05, + "loss": 2.4753, + "step": 12074 + }, + { + "epoch": 0.9744976192397709, + "grad_norm": 0.6833250522613525, + "learning_rate": 6.858826427256209e-05, + "loss": 2.4687, + "step": 12075 + }, + { + "epoch": 0.9745783229763538, + "grad_norm": 0.6925075650215149, + "learning_rate": 6.857327683336824e-05, + "loss": 2.5363, + "step": 12076 + }, + { + "epoch": 0.9746590267129368, + "grad_norm": 0.6754821538925171, + "learning_rate": 6.855829017741046e-05, + "loss": 2.4696, + "step": 12077 + }, + { + "epoch": 0.9747397304495198, + "grad_norm": 0.7360671162605286, + "learning_rate": 6.854330430506228e-05, + "loss": 2.5144, + "step": 12078 + }, + { + "epoch": 0.9748204341861029, + "grad_norm": 0.6814733743667603, + "learning_rate": 6.852831921669723e-05, + "loss": 2.5059, + "step": 12079 + }, + { + "epoch": 0.9749011379226858, + "grad_norm": 0.7106744647026062, + "learning_rate": 6.851333491268869e-05, + "loss": 2.453, + "step": 12080 + }, + { + "epoch": 0.9749818416592688, + "grad_norm": 0.6623831987380981, + "learning_rate": 6.849835139341015e-05, + "loss": 2.5244, + "step": 12081 + }, + { + "epoch": 0.9750625453958518, + "grad_norm": 0.6723372936248779, + "learning_rate": 6.848336865923506e-05, + "loss": 2.5159, + "step": 12082 + }, + { + "epoch": 0.9751432491324349, + "grad_norm": 0.7256618142127991, + "learning_rate": 6.84683867105368e-05, + "loss": 2.494, + "step": 12083 + }, + { + "epoch": 0.9752239528690179, + "grad_norm": 0.6881731152534485, + "learning_rate": 6.845340554768874e-05, + "loss": 2.4374, + "step": 12084 + }, + { + "epoch": 0.9753046566056008, + "grad_norm": 0.6759666204452515, + "learning_rate": 6.843842517106434e-05, + "loss": 2.5082, + "step": 12085 + }, + { + "epoch": 0.9753853603421838, + "grad_norm": 0.6983315348625183, + "learning_rate": 6.842344558103684e-05, + "loss": 2.5191, + "step": 12086 + }, + { + "epoch": 0.9754660640787668, + "grad_norm": 0.6805596351623535, + "learning_rate": 6.840846677797959e-05, + "loss": 2.5289, + "step": 12087 + }, + { + "epoch": 0.9755467678153499, + "grad_norm": 0.712942361831665, + "learning_rate": 6.839348876226595e-05, + "loss": 2.5544, + "step": 12088 + }, + { + "epoch": 0.9756274715519329, + "grad_norm": 0.6931124329566956, + "learning_rate": 6.837851153426924e-05, + "loss": 2.5407, + "step": 12089 + }, + { + "epoch": 0.9757081752885158, + "grad_norm": 0.6939486265182495, + "learning_rate": 6.836353509436264e-05, + "loss": 2.5236, + "step": 12090 + }, + { + "epoch": 0.9757888790250988, + "grad_norm": 0.7434083223342896, + "learning_rate": 6.834855944291944e-05, + "loss": 2.4903, + "step": 12091 + }, + { + "epoch": 0.9758695827616819, + "grad_norm": 0.672177255153656, + "learning_rate": 6.833358458031292e-05, + "loss": 2.4995, + "step": 12092 + }, + { + "epoch": 0.9759502864982649, + "grad_norm": 0.6631280779838562, + "learning_rate": 6.831861050691619e-05, + "loss": 2.4689, + "step": 12093 + }, + { + "epoch": 0.9760309902348479, + "grad_norm": 0.7485793232917786, + "learning_rate": 6.830363722310253e-05, + "loss": 2.5526, + "step": 12094 + }, + { + "epoch": 0.9761116939714308, + "grad_norm": 0.6592193245887756, + "learning_rate": 6.828866472924511e-05, + "loss": 2.4425, + "step": 12095 + }, + { + "epoch": 0.9761923977080139, + "grad_norm": 0.6479860544204712, + "learning_rate": 6.827369302571703e-05, + "loss": 2.4637, + "step": 12096 + }, + { + "epoch": 0.9762731014445969, + "grad_norm": 0.6694966554641724, + "learning_rate": 6.825872211289146e-05, + "loss": 2.5256, + "step": 12097 + }, + { + "epoch": 0.9763538051811799, + "grad_norm": 0.675751805305481, + "learning_rate": 6.82437519911415e-05, + "loss": 2.5021, + "step": 12098 + }, + { + "epoch": 0.9764345089177628, + "grad_norm": 0.7255450487136841, + "learning_rate": 6.822878266084026e-05, + "loss": 2.5275, + "step": 12099 + }, + { + "epoch": 0.9765152126543459, + "grad_norm": 0.7034213542938232, + "learning_rate": 6.821381412236079e-05, + "loss": 2.5432, + "step": 12100 + }, + { + "epoch": 0.9765959163909289, + "grad_norm": 0.6808038949966431, + "learning_rate": 6.819884637607619e-05, + "loss": 2.5044, + "step": 12101 + }, + { + "epoch": 0.9766766201275119, + "grad_norm": 0.6601580381393433, + "learning_rate": 6.818387942235945e-05, + "loss": 2.4602, + "step": 12102 + }, + { + "epoch": 0.9767573238640949, + "grad_norm": 0.7163928151130676, + "learning_rate": 6.816891326158359e-05, + "loss": 2.4785, + "step": 12103 + }, + { + "epoch": 0.976838027600678, + "grad_norm": 0.6616904735565186, + "learning_rate": 6.815394789412164e-05, + "loss": 2.5081, + "step": 12104 + }, + { + "epoch": 0.9769187313372609, + "grad_norm": 0.6476422548294067, + "learning_rate": 6.813898332034657e-05, + "loss": 2.4624, + "step": 12105 + }, + { + "epoch": 0.9769994350738439, + "grad_norm": 0.6468440890312195, + "learning_rate": 6.812401954063131e-05, + "loss": 2.4948, + "step": 12106 + }, + { + "epoch": 0.9770801388104269, + "grad_norm": 0.6988391876220703, + "learning_rate": 6.810905655534878e-05, + "loss": 2.4958, + "step": 12107 + }, + { + "epoch": 0.97716084254701, + "grad_norm": 0.6777953505516052, + "learning_rate": 6.809409436487196e-05, + "loss": 2.5304, + "step": 12108 + }, + { + "epoch": 0.9772415462835929, + "grad_norm": 0.7115550637245178, + "learning_rate": 6.807913296957368e-05, + "loss": 2.5321, + "step": 12109 + }, + { + "epoch": 0.9773222500201759, + "grad_norm": 0.737823486328125, + "learning_rate": 6.806417236982684e-05, + "loss": 2.5121, + "step": 12110 + }, + { + "epoch": 0.9774029537567589, + "grad_norm": 0.6797437071800232, + "learning_rate": 6.804921256600439e-05, + "loss": 2.4783, + "step": 12111 + }, + { + "epoch": 0.977483657493342, + "grad_norm": 0.7240802645683289, + "learning_rate": 6.803425355847897e-05, + "loss": 2.4949, + "step": 12112 + }, + { + "epoch": 0.977564361229925, + "grad_norm": 0.6433781981468201, + "learning_rate": 6.801929534762357e-05, + "loss": 2.4937, + "step": 12113 + }, + { + "epoch": 0.9776450649665079, + "grad_norm": 0.6935293078422546, + "learning_rate": 6.800433793381095e-05, + "loss": 2.5025, + "step": 12114 + }, + { + "epoch": 0.9777257687030909, + "grad_norm": 0.699780285358429, + "learning_rate": 6.798938131741383e-05, + "loss": 2.5231, + "step": 12115 + }, + { + "epoch": 0.977806472439674, + "grad_norm": 0.6414729952812195, + "learning_rate": 6.7974425498805e-05, + "loss": 2.4422, + "step": 12116 + }, + { + "epoch": 0.977887176176257, + "grad_norm": 0.6733608841896057, + "learning_rate": 6.795947047835722e-05, + "loss": 2.4873, + "step": 12117 + }, + { + "epoch": 0.97796787991284, + "grad_norm": 0.6985765099525452, + "learning_rate": 6.794451625644318e-05, + "loss": 2.4994, + "step": 12118 + }, + { + "epoch": 0.9780485836494229, + "grad_norm": 0.6429893374443054, + "learning_rate": 6.792956283343559e-05, + "loss": 2.4968, + "step": 12119 + }, + { + "epoch": 0.978129287386006, + "grad_norm": 0.7129024267196655, + "learning_rate": 6.79146102097071e-05, + "loss": 2.5457, + "step": 12120 + }, + { + "epoch": 0.978209991122589, + "grad_norm": 0.6811943650245667, + "learning_rate": 6.789965838563047e-05, + "loss": 2.5012, + "step": 12121 + }, + { + "epoch": 0.978290694859172, + "grad_norm": 0.7269948720932007, + "learning_rate": 6.788470736157821e-05, + "loss": 2.5124, + "step": 12122 + }, + { + "epoch": 0.978371398595755, + "grad_norm": 0.7396084666252136, + "learning_rate": 6.786975713792299e-05, + "loss": 2.5631, + "step": 12123 + }, + { + "epoch": 0.978452102332338, + "grad_norm": 0.6880094408988953, + "learning_rate": 6.785480771503745e-05, + "loss": 2.5103, + "step": 12124 + }, + { + "epoch": 0.978532806068921, + "grad_norm": 0.737095057964325, + "learning_rate": 6.783985909329409e-05, + "loss": 2.5062, + "step": 12125 + }, + { + "epoch": 0.978613509805504, + "grad_norm": 0.6540948152542114, + "learning_rate": 6.782491127306552e-05, + "loss": 2.5568, + "step": 12126 + }, + { + "epoch": 0.978694213542087, + "grad_norm": 0.669706404209137, + "learning_rate": 6.780996425472427e-05, + "loss": 2.5156, + "step": 12127 + }, + { + "epoch": 0.97877491727867, + "grad_norm": 0.6722843647003174, + "learning_rate": 6.779501803864286e-05, + "loss": 2.4784, + "step": 12128 + }, + { + "epoch": 0.978855621015253, + "grad_norm": 0.6545475125312805, + "learning_rate": 6.778007262519377e-05, + "loss": 2.5159, + "step": 12129 + }, + { + "epoch": 0.978936324751836, + "grad_norm": 0.7010136246681213, + "learning_rate": 6.776512801474953e-05, + "loss": 2.5244, + "step": 12130 + }, + { + "epoch": 0.979017028488419, + "grad_norm": 0.6912714242935181, + "learning_rate": 6.775018420768253e-05, + "loss": 2.5223, + "step": 12131 + }, + { + "epoch": 0.9790977322250021, + "grad_norm": 0.6864827275276184, + "learning_rate": 6.773524120436525e-05, + "loss": 2.5027, + "step": 12132 + }, + { + "epoch": 0.979178435961585, + "grad_norm": 0.7586981058120728, + "learning_rate": 6.77202990051701e-05, + "loss": 2.4554, + "step": 12133 + }, + { + "epoch": 0.979259139698168, + "grad_norm": 0.6487839818000793, + "learning_rate": 6.770535761046948e-05, + "loss": 2.5035, + "step": 12134 + }, + { + "epoch": 0.979339843434751, + "grad_norm": 0.7193071246147156, + "learning_rate": 6.769041702063575e-05, + "loss": 2.4669, + "step": 12135 + }, + { + "epoch": 0.9794205471713341, + "grad_norm": 0.7118960618972778, + "learning_rate": 6.76754772360413e-05, + "loss": 2.493, + "step": 12136 + }, + { + "epoch": 0.9795012509079171, + "grad_norm": 0.6617394685745239, + "learning_rate": 6.766053825705847e-05, + "loss": 2.4771, + "step": 12137 + }, + { + "epoch": 0.9795819546445, + "grad_norm": 0.7664859294891357, + "learning_rate": 6.764560008405953e-05, + "loss": 2.5191, + "step": 12138 + }, + { + "epoch": 0.979662658381083, + "grad_norm": 0.708063542842865, + "learning_rate": 6.763066271741682e-05, + "loss": 2.5521, + "step": 12139 + }, + { + "epoch": 0.979743362117666, + "grad_norm": 0.6951049566268921, + "learning_rate": 6.761572615750267e-05, + "loss": 2.4708, + "step": 12140 + }, + { + "epoch": 0.9798240658542491, + "grad_norm": 0.6914932727813721, + "learning_rate": 6.760079040468921e-05, + "loss": 2.5101, + "step": 12141 + }, + { + "epoch": 0.9799047695908321, + "grad_norm": 0.6843075752258301, + "learning_rate": 6.758585545934876e-05, + "loss": 2.4932, + "step": 12142 + }, + { + "epoch": 0.979985473327415, + "grad_norm": 0.6567733883857727, + "learning_rate": 6.757092132185354e-05, + "loss": 2.4577, + "step": 12143 + }, + { + "epoch": 0.980066177063998, + "grad_norm": 0.6874415874481201, + "learning_rate": 6.75559879925757e-05, + "loss": 2.4818, + "step": 12144 + }, + { + "epoch": 0.9801468808005811, + "grad_norm": 0.7274627685546875, + "learning_rate": 6.754105547188746e-05, + "loss": 2.523, + "step": 12145 + }, + { + "epoch": 0.9802275845371641, + "grad_norm": 0.6991173028945923, + "learning_rate": 6.7526123760161e-05, + "loss": 2.4864, + "step": 12146 + }, + { + "epoch": 0.980308288273747, + "grad_norm": 0.670078456401825, + "learning_rate": 6.75111928577684e-05, + "loss": 2.4889, + "step": 12147 + }, + { + "epoch": 0.98038899201033, + "grad_norm": 0.6653482913970947, + "learning_rate": 6.749626276508178e-05, + "loss": 2.4652, + "step": 12148 + }, + { + "epoch": 0.9804696957469131, + "grad_norm": 0.7329251766204834, + "learning_rate": 6.748133348247326e-05, + "loss": 2.518, + "step": 12149 + }, + { + "epoch": 0.9805503994834961, + "grad_norm": 0.7792871594429016, + "learning_rate": 6.746640501031495e-05, + "loss": 2.5018, + "step": 12150 + }, + { + "epoch": 0.9806311032200791, + "grad_norm": 0.6962797045707703, + "learning_rate": 6.745147734897883e-05, + "loss": 2.4388, + "step": 12151 + }, + { + "epoch": 0.980711806956662, + "grad_norm": 0.6981272101402283, + "learning_rate": 6.7436550498837e-05, + "loss": 2.4886, + "step": 12152 + }, + { + "epoch": 0.9807925106932451, + "grad_norm": 0.6696565747261047, + "learning_rate": 6.742162446026146e-05, + "loss": 2.5258, + "step": 12153 + }, + { + "epoch": 0.9808732144298281, + "grad_norm": 0.6922139525413513, + "learning_rate": 6.740669923362417e-05, + "loss": 2.493, + "step": 12154 + }, + { + "epoch": 0.9809539181664111, + "grad_norm": 0.6745694875717163, + "learning_rate": 6.739177481929715e-05, + "loss": 2.5209, + "step": 12155 + }, + { + "epoch": 0.9810346219029941, + "grad_norm": 0.7023215889930725, + "learning_rate": 6.737685121765238e-05, + "loss": 2.4987, + "step": 12156 + }, + { + "epoch": 0.9811153256395772, + "grad_norm": 0.6337805390357971, + "learning_rate": 6.73619284290617e-05, + "loss": 2.4838, + "step": 12157 + }, + { + "epoch": 0.9811960293761601, + "grad_norm": 0.6747817397117615, + "learning_rate": 6.73470064538971e-05, + "loss": 2.4834, + "step": 12158 + }, + { + "epoch": 0.9812767331127431, + "grad_norm": 0.6714580655097961, + "learning_rate": 6.733208529253047e-05, + "loss": 2.4724, + "step": 12159 + }, + { + "epoch": 0.9813574368493261, + "grad_norm": 0.6927861571311951, + "learning_rate": 6.731716494533364e-05, + "loss": 2.495, + "step": 12160 + }, + { + "epoch": 0.9814381405859092, + "grad_norm": 0.6576036214828491, + "learning_rate": 6.73022454126785e-05, + "loss": 2.5415, + "step": 12161 + }, + { + "epoch": 0.9815188443224921, + "grad_norm": 0.6495294570922852, + "learning_rate": 6.728732669493691e-05, + "loss": 2.4889, + "step": 12162 + }, + { + "epoch": 0.9815995480590751, + "grad_norm": 0.6680364012718201, + "learning_rate": 6.72724087924806e-05, + "loss": 2.4733, + "step": 12163 + }, + { + "epoch": 0.9816802517956581, + "grad_norm": 0.6816582083702087, + "learning_rate": 6.725749170568143e-05, + "loss": 2.4688, + "step": 12164 + }, + { + "epoch": 0.9817609555322412, + "grad_norm": 0.6995956897735596, + "learning_rate": 6.724257543491116e-05, + "loss": 2.4962, + "step": 12165 + }, + { + "epoch": 0.9818416592688242, + "grad_norm": 0.6728340983390808, + "learning_rate": 6.722765998054157e-05, + "loss": 2.5218, + "step": 12166 + }, + { + "epoch": 0.9819223630054071, + "grad_norm": 0.6835319995880127, + "learning_rate": 6.721274534294433e-05, + "loss": 2.4845, + "step": 12167 + }, + { + "epoch": 0.9820030667419901, + "grad_norm": 0.6969910264015198, + "learning_rate": 6.719783152249119e-05, + "loss": 2.4983, + "step": 12168 + }, + { + "epoch": 0.9820837704785732, + "grad_norm": 0.7327036261558533, + "learning_rate": 6.718291851955383e-05, + "loss": 2.5893, + "step": 12169 + }, + { + "epoch": 0.9821644742151562, + "grad_norm": 0.7092839479446411, + "learning_rate": 6.716800633450393e-05, + "loss": 2.5104, + "step": 12170 + }, + { + "epoch": 0.9822451779517392, + "grad_norm": 0.7384308576583862, + "learning_rate": 6.715309496771311e-05, + "loss": 2.5066, + "step": 12171 + }, + { + "epoch": 0.9823258816883221, + "grad_norm": 0.6744845509529114, + "learning_rate": 6.713818441955308e-05, + "loss": 2.469, + "step": 12172 + }, + { + "epoch": 0.9824065854249052, + "grad_norm": 0.6497980952262878, + "learning_rate": 6.712327469039536e-05, + "loss": 2.4943, + "step": 12173 + }, + { + "epoch": 0.9824872891614882, + "grad_norm": 0.6550357937812805, + "learning_rate": 6.710836578061156e-05, + "loss": 2.5019, + "step": 12174 + }, + { + "epoch": 0.9825679928980712, + "grad_norm": 0.6813549995422363, + "learning_rate": 6.709345769057331e-05, + "loss": 2.4314, + "step": 12175 + }, + { + "epoch": 0.9826486966346542, + "grad_norm": 0.6636531352996826, + "learning_rate": 6.707855042065209e-05, + "loss": 2.5202, + "step": 12176 + }, + { + "epoch": 0.9827294003712372, + "grad_norm": 0.6684894561767578, + "learning_rate": 6.706364397121944e-05, + "loss": 2.4353, + "step": 12177 + }, + { + "epoch": 0.9828101041078202, + "grad_norm": 0.6813677549362183, + "learning_rate": 6.704873834264688e-05, + "loss": 2.4254, + "step": 12178 + }, + { + "epoch": 0.9828908078444032, + "grad_norm": 0.6584975719451904, + "learning_rate": 6.70338335353059e-05, + "loss": 2.5647, + "step": 12179 + }, + { + "epoch": 0.9829715115809862, + "grad_norm": 0.6959114074707031, + "learning_rate": 6.701892954956796e-05, + "loss": 2.5203, + "step": 12180 + }, + { + "epoch": 0.9830522153175693, + "grad_norm": 0.6399044990539551, + "learning_rate": 6.700402638580452e-05, + "loss": 2.4697, + "step": 12181 + }, + { + "epoch": 0.9831329190541522, + "grad_norm": 0.6838750839233398, + "learning_rate": 6.698912404438702e-05, + "loss": 2.5261, + "step": 12182 + }, + { + "epoch": 0.9832136227907352, + "grad_norm": 0.6286367177963257, + "learning_rate": 6.697422252568679e-05, + "loss": 2.4264, + "step": 12183 + }, + { + "epoch": 0.9832943265273182, + "grad_norm": 0.901637852191925, + "learning_rate": 6.695932183007528e-05, + "loss": 2.4908, + "step": 12184 + }, + { + "epoch": 0.9833750302639013, + "grad_norm": 0.8361458778381348, + "learning_rate": 6.694442195792386e-05, + "loss": 2.5183, + "step": 12185 + }, + { + "epoch": 0.9834557340004842, + "grad_norm": 0.7033401727676392, + "learning_rate": 6.692952290960384e-05, + "loss": 2.5702, + "step": 12186 + }, + { + "epoch": 0.9835364377370672, + "grad_norm": 0.669486939907074, + "learning_rate": 6.691462468548653e-05, + "loss": 2.5143, + "step": 12187 + }, + { + "epoch": 0.9836171414736502, + "grad_norm": 0.7043797969818115, + "learning_rate": 6.689972728594329e-05, + "loss": 2.5638, + "step": 12188 + }, + { + "epoch": 0.9836978452102332, + "grad_norm": 0.6532511115074158, + "learning_rate": 6.688483071134537e-05, + "loss": 2.5227, + "step": 12189 + }, + { + "epoch": 0.9837785489468163, + "grad_norm": 0.7363922595977783, + "learning_rate": 6.6869934962064e-05, + "loss": 2.4953, + "step": 12190 + }, + { + "epoch": 0.9838592526833992, + "grad_norm": 0.6746651530265808, + "learning_rate": 6.685504003847051e-05, + "loss": 2.5021, + "step": 12191 + }, + { + "epoch": 0.9839399564199822, + "grad_norm": 0.665459930896759, + "learning_rate": 6.684014594093604e-05, + "loss": 2.5126, + "step": 12192 + }, + { + "epoch": 0.9840206601565652, + "grad_norm": 0.6618975400924683, + "learning_rate": 6.682525266983179e-05, + "loss": 2.5046, + "step": 12193 + }, + { + "epoch": 0.9841013638931483, + "grad_norm": 0.6536173224449158, + "learning_rate": 6.6810360225529e-05, + "loss": 2.4222, + "step": 12194 + }, + { + "epoch": 0.9841820676297313, + "grad_norm": 0.6882187724113464, + "learning_rate": 6.679546860839876e-05, + "loss": 2.475, + "step": 12195 + }, + { + "epoch": 0.9842627713663142, + "grad_norm": 0.6941187977790833, + "learning_rate": 6.678057781881224e-05, + "loss": 2.5642, + "step": 12196 + }, + { + "epoch": 0.9843434751028972, + "grad_norm": 0.7057064175605774, + "learning_rate": 6.676568785714057e-05, + "loss": 2.4817, + "step": 12197 + }, + { + "epoch": 0.9844241788394803, + "grad_norm": 0.6455948352813721, + "learning_rate": 6.675079872375487e-05, + "loss": 2.5206, + "step": 12198 + }, + { + "epoch": 0.9845048825760633, + "grad_norm": 0.6559014320373535, + "learning_rate": 6.673591041902613e-05, + "loss": 2.4082, + "step": 12199 + }, + { + "epoch": 0.9845855863126463, + "grad_norm": 0.6732046008110046, + "learning_rate": 6.672102294332542e-05, + "loss": 2.5472, + "step": 12200 + }, + { + "epoch": 0.9846662900492292, + "grad_norm": 0.7074914574623108, + "learning_rate": 6.670613629702391e-05, + "loss": 2.5243, + "step": 12201 + }, + { + "epoch": 0.9847469937858123, + "grad_norm": 0.6780694127082825, + "learning_rate": 6.669125048049246e-05, + "loss": 2.494, + "step": 12202 + }, + { + "epoch": 0.9848276975223953, + "grad_norm": 0.6361132264137268, + "learning_rate": 6.66763654941021e-05, + "loss": 2.4764, + "step": 12203 + }, + { + "epoch": 0.9849084012589783, + "grad_norm": 0.752727210521698, + "learning_rate": 6.666148133822387e-05, + "loss": 2.4942, + "step": 12204 + }, + { + "epoch": 0.9849891049955612, + "grad_norm": 0.7282724976539612, + "learning_rate": 6.664659801322863e-05, + "loss": 2.471, + "step": 12205 + }, + { + "epoch": 0.9850698087321443, + "grad_norm": 0.6977601051330566, + "learning_rate": 6.663171551948736e-05, + "loss": 2.4695, + "step": 12206 + }, + { + "epoch": 0.9851505124687273, + "grad_norm": 0.6957824230194092, + "learning_rate": 6.661683385737101e-05, + "loss": 2.5096, + "step": 12207 + }, + { + "epoch": 0.9852312162053103, + "grad_norm": 0.6197221279144287, + "learning_rate": 6.660195302725037e-05, + "loss": 2.4199, + "step": 12208 + }, + { + "epoch": 0.9853119199418933, + "grad_norm": 0.747558057308197, + "learning_rate": 6.658707302949638e-05, + "loss": 2.5988, + "step": 12209 + }, + { + "epoch": 0.9853926236784764, + "grad_norm": 0.6593184471130371, + "learning_rate": 6.657219386447989e-05, + "loss": 2.4837, + "step": 12210 + }, + { + "epoch": 0.9854733274150593, + "grad_norm": 0.6795992255210876, + "learning_rate": 6.655731553257169e-05, + "loss": 2.498, + "step": 12211 + }, + { + "epoch": 0.9855540311516423, + "grad_norm": 0.7588422298431396, + "learning_rate": 6.65424380341426e-05, + "loss": 2.444, + "step": 12212 + }, + { + "epoch": 0.9856347348882253, + "grad_norm": 0.7791433930397034, + "learning_rate": 6.652756136956342e-05, + "loss": 2.4893, + "step": 12213 + }, + { + "epoch": 0.9857154386248084, + "grad_norm": 0.6320767998695374, + "learning_rate": 6.651268553920493e-05, + "loss": 2.4831, + "step": 12214 + }, + { + "epoch": 0.9857961423613913, + "grad_norm": 0.6818140745162964, + "learning_rate": 6.649781054343783e-05, + "loss": 2.4316, + "step": 12215 + }, + { + "epoch": 0.9858768460979743, + "grad_norm": 0.7460113763809204, + "learning_rate": 6.648293638263285e-05, + "loss": 2.5335, + "step": 12216 + }, + { + "epoch": 0.9859575498345573, + "grad_norm": 0.714074432849884, + "learning_rate": 6.646806305716079e-05, + "loss": 2.4573, + "step": 12217 + }, + { + "epoch": 0.9860382535711404, + "grad_norm": 0.6815951466560364, + "learning_rate": 6.645319056739217e-05, + "loss": 2.4758, + "step": 12218 + }, + { + "epoch": 0.9861189573077234, + "grad_norm": 0.6842799782752991, + "learning_rate": 6.643831891369775e-05, + "loss": 2.4998, + "step": 12219 + }, + { + "epoch": 0.9861996610443063, + "grad_norm": 0.6725212335586548, + "learning_rate": 6.642344809644818e-05, + "loss": 2.5179, + "step": 12220 + }, + { + "epoch": 0.9862803647808893, + "grad_norm": 0.7859417796134949, + "learning_rate": 6.640857811601402e-05, + "loss": 2.5801, + "step": 12221 + }, + { + "epoch": 0.9863610685174724, + "grad_norm": 0.6438577771186829, + "learning_rate": 6.639370897276591e-05, + "loss": 2.4659, + "step": 12222 + }, + { + "epoch": 0.9864417722540554, + "grad_norm": 0.7036609053611755, + "learning_rate": 6.637884066707447e-05, + "loss": 2.5637, + "step": 12223 + }, + { + "epoch": 0.9865224759906384, + "grad_norm": 0.6756969094276428, + "learning_rate": 6.636397319931016e-05, + "loss": 2.5381, + "step": 12224 + }, + { + "epoch": 0.9866031797272213, + "grad_norm": 0.6907589435577393, + "learning_rate": 6.634910656984354e-05, + "loss": 2.4927, + "step": 12225 + }, + { + "epoch": 0.9866838834638044, + "grad_norm": 0.7347010374069214, + "learning_rate": 6.63342407790452e-05, + "loss": 2.5131, + "step": 12226 + }, + { + "epoch": 0.9867645872003874, + "grad_norm": 0.6835876107215881, + "learning_rate": 6.631937582728555e-05, + "loss": 2.4611, + "step": 12227 + }, + { + "epoch": 0.9868452909369704, + "grad_norm": 0.8199172616004944, + "learning_rate": 6.630451171493511e-05, + "loss": 2.5341, + "step": 12228 + }, + { + "epoch": 0.9869259946735534, + "grad_norm": 0.7537188529968262, + "learning_rate": 6.62896484423643e-05, + "loss": 2.5218, + "step": 12229 + }, + { + "epoch": 0.9870066984101364, + "grad_norm": 0.7254310250282288, + "learning_rate": 6.62747860099436e-05, + "loss": 2.4766, + "step": 12230 + }, + { + "epoch": 0.9870874021467194, + "grad_norm": 0.6852995157241821, + "learning_rate": 6.625992441804338e-05, + "loss": 2.548, + "step": 12231 + }, + { + "epoch": 0.9871681058833024, + "grad_norm": 0.7089388966560364, + "learning_rate": 6.624506366703402e-05, + "loss": 2.5125, + "step": 12232 + }, + { + "epoch": 0.9872488096198854, + "grad_norm": 0.7114216685295105, + "learning_rate": 6.623020375728597e-05, + "loss": 2.5408, + "step": 12233 + }, + { + "epoch": 0.9873295133564685, + "grad_norm": 0.7891978025436401, + "learning_rate": 6.621534468916946e-05, + "loss": 2.5946, + "step": 12234 + }, + { + "epoch": 0.9874102170930514, + "grad_norm": 0.671399712562561, + "learning_rate": 6.620048646305488e-05, + "loss": 2.4732, + "step": 12235 + }, + { + "epoch": 0.9874909208296344, + "grad_norm": 0.6712855696678162, + "learning_rate": 6.618562907931256e-05, + "loss": 2.4376, + "step": 12236 + }, + { + "epoch": 0.9875716245662174, + "grad_norm": 0.7183727025985718, + "learning_rate": 6.617077253831272e-05, + "loss": 2.5406, + "step": 12237 + }, + { + "epoch": 0.9876523283028005, + "grad_norm": 0.6857761144638062, + "learning_rate": 6.615591684042568e-05, + "loss": 2.5279, + "step": 12238 + }, + { + "epoch": 0.9877330320393835, + "grad_norm": 0.7268103957176208, + "learning_rate": 6.614106198602165e-05, + "loss": 2.5283, + "step": 12239 + }, + { + "epoch": 0.9878137357759664, + "grad_norm": 0.6703717708587646, + "learning_rate": 6.612620797547087e-05, + "loss": 2.4254, + "step": 12240 + }, + { + "epoch": 0.9878944395125494, + "grad_norm": 0.7110719680786133, + "learning_rate": 6.611135480914352e-05, + "loss": 2.496, + "step": 12241 + }, + { + "epoch": 0.9879751432491324, + "grad_norm": 0.7268263697624207, + "learning_rate": 6.609650248740983e-05, + "loss": 2.5489, + "step": 12242 + }, + { + "epoch": 0.9880558469857155, + "grad_norm": 0.7413432598114014, + "learning_rate": 6.60816510106399e-05, + "loss": 2.4998, + "step": 12243 + }, + { + "epoch": 0.9881365507222984, + "grad_norm": 0.7443360090255737, + "learning_rate": 6.606680037920389e-05, + "loss": 2.5282, + "step": 12244 + }, + { + "epoch": 0.9882172544588814, + "grad_norm": 0.7787832021713257, + "learning_rate": 6.605195059347191e-05, + "loss": 2.5221, + "step": 12245 + }, + { + "epoch": 0.9882979581954644, + "grad_norm": 0.6921473741531372, + "learning_rate": 6.603710165381409e-05, + "loss": 2.5434, + "step": 12246 + }, + { + "epoch": 0.9883786619320475, + "grad_norm": 0.737328827381134, + "learning_rate": 6.602225356060044e-05, + "loss": 2.5222, + "step": 12247 + }, + { + "epoch": 0.9884593656686305, + "grad_norm": 0.698823094367981, + "learning_rate": 6.600740631420106e-05, + "loss": 2.528, + "step": 12248 + }, + { + "epoch": 0.9885400694052134, + "grad_norm": 0.6735067963600159, + "learning_rate": 6.599255991498601e-05, + "loss": 2.4942, + "step": 12249 + }, + { + "epoch": 0.9886207731417964, + "grad_norm": 0.659622311592102, + "learning_rate": 6.59777143633252e-05, + "loss": 2.4822, + "step": 12250 + }, + { + "epoch": 0.9887014768783795, + "grad_norm": 0.6973726153373718, + "learning_rate": 6.596286965958872e-05, + "loss": 2.5499, + "step": 12251 + }, + { + "epoch": 0.9887821806149625, + "grad_norm": 0.6771909594535828, + "learning_rate": 6.594802580414651e-05, + "loss": 2.4968, + "step": 12252 + }, + { + "epoch": 0.9888628843515455, + "grad_norm": 0.68080073595047, + "learning_rate": 6.593318279736849e-05, + "loss": 2.5142, + "step": 12253 + }, + { + "epoch": 0.9889435880881284, + "grad_norm": NaN, + "learning_rate": 6.593318279736849e-05, + "loss": 2.466, + "step": 12254 + }, + { + "epoch": 0.9890242918247115, + "grad_norm": 0.6865221858024597, + "learning_rate": 6.591834063962461e-05, + "loss": 2.4894, + "step": 12255 + }, + { + "epoch": 0.9891049955612945, + "grad_norm": 0.7050445079803467, + "learning_rate": 6.590349933128478e-05, + "loss": 2.5733, + "step": 12256 + }, + { + "epoch": 0.9891856992978775, + "grad_norm": 0.6971526741981506, + "learning_rate": 6.588865887271887e-05, + "loss": 2.4997, + "step": 12257 + }, + { + "epoch": 0.9892664030344605, + "grad_norm": 0.6465088725090027, + "learning_rate": 6.587381926429674e-05, + "loss": 2.5155, + "step": 12258 + }, + { + "epoch": 0.9893471067710435, + "grad_norm": 0.6521422266960144, + "learning_rate": 6.585898050638823e-05, + "loss": 2.4803, + "step": 12259 + }, + { + "epoch": 0.9894278105076265, + "grad_norm": 0.6798849105834961, + "learning_rate": 6.584414259936324e-05, + "loss": 2.5301, + "step": 12260 + }, + { + "epoch": 0.9895085142442095, + "grad_norm": 0.6903446912765503, + "learning_rate": 6.582930554359144e-05, + "loss": 2.4662, + "step": 12261 + }, + { + "epoch": 0.9895892179807925, + "grad_norm": 0.7183516621589661, + "learning_rate": 6.581446933944267e-05, + "loss": 2.4711, + "step": 12262 + }, + { + "epoch": 0.9896699217173756, + "grad_norm": 0.702738344669342, + "learning_rate": 6.579963398728671e-05, + "loss": 2.531, + "step": 12263 + }, + { + "epoch": 0.9897506254539585, + "grad_norm": 0.7187048196792603, + "learning_rate": 6.578479948749325e-05, + "loss": 2.4933, + "step": 12264 + }, + { + "epoch": 0.9898313291905415, + "grad_norm": 0.6988784670829773, + "learning_rate": 6.576996584043202e-05, + "loss": 2.5179, + "step": 12265 + }, + { + "epoch": 0.9899120329271245, + "grad_norm": 0.7434641122817993, + "learning_rate": 6.575513304647276e-05, + "loss": 2.5157, + "step": 12266 + }, + { + "epoch": 0.9899927366637076, + "grad_norm": 0.667881429195404, + "learning_rate": 6.574030110598505e-05, + "loss": 2.5152, + "step": 12267 + }, + { + "epoch": 0.9900734404002905, + "grad_norm": 0.6766676902770996, + "learning_rate": 6.572547001933862e-05, + "loss": 2.5041, + "step": 12268 + }, + { + "epoch": 0.9901541441368735, + "grad_norm": 0.6531797051429749, + "learning_rate": 6.571063978690311e-05, + "loss": 2.5457, + "step": 12269 + }, + { + "epoch": 0.9902348478734565, + "grad_norm": 0.6557255983352661, + "learning_rate": 6.569581040904804e-05, + "loss": 2.5253, + "step": 12270 + }, + { + "epoch": 0.9903155516100396, + "grad_norm": 0.6818893551826477, + "learning_rate": 6.568098188614304e-05, + "loss": 2.5031, + "step": 12271 + }, + { + "epoch": 0.9903962553466226, + "grad_norm": 0.6644853949546814, + "learning_rate": 6.56661542185577e-05, + "loss": 2.5285, + "step": 12272 + }, + { + "epoch": 0.9904769590832055, + "grad_norm": 0.6035603284835815, + "learning_rate": 6.565132740666155e-05, + "loss": 2.46, + "step": 12273 + }, + { + "epoch": 0.9905576628197885, + "grad_norm": 0.7061343193054199, + "learning_rate": 6.56365014508241e-05, + "loss": 2.4731, + "step": 12274 + }, + { + "epoch": 0.9906383665563716, + "grad_norm": 0.6981248259544373, + "learning_rate": 6.562167635141486e-05, + "loss": 2.4518, + "step": 12275 + }, + { + "epoch": 0.9907190702929546, + "grad_norm": 0.6718073487281799, + "learning_rate": 6.560685210880334e-05, + "loss": 2.4919, + "step": 12276 + }, + { + "epoch": 0.9907997740295376, + "grad_norm": 0.7095392942428589, + "learning_rate": 6.559202872335893e-05, + "loss": 2.5284, + "step": 12277 + }, + { + "epoch": 0.9908804777661205, + "grad_norm": 0.7052092552185059, + "learning_rate": 6.557720619545111e-05, + "loss": 2.4781, + "step": 12278 + }, + { + "epoch": 0.9909611815027036, + "grad_norm": 0.653570830821991, + "learning_rate": 6.556238452544934e-05, + "loss": 2.5293, + "step": 12279 + }, + { + "epoch": 0.9910418852392866, + "grad_norm": 0.6705330610275269, + "learning_rate": 6.554756371372293e-05, + "loss": 2.4437, + "step": 12280 + }, + { + "epoch": 0.9911225889758696, + "grad_norm": 0.6494189500808716, + "learning_rate": 6.553274376064127e-05, + "loss": 2.4833, + "step": 12281 + }, + { + "epoch": 0.9912032927124526, + "grad_norm": 0.6497724652290344, + "learning_rate": 6.551792466657378e-05, + "loss": 2.4803, + "step": 12282 + }, + { + "epoch": 0.9912839964490356, + "grad_norm": 0.7740494608879089, + "learning_rate": 6.550310643188972e-05, + "loss": 2.4907, + "step": 12283 + }, + { + "epoch": 0.9913647001856186, + "grad_norm": 0.699562668800354, + "learning_rate": 6.548828905695843e-05, + "loss": 2.4576, + "step": 12284 + }, + { + "epoch": 0.9914454039222016, + "grad_norm": 0.8123162984848022, + "learning_rate": 6.547347254214921e-05, + "loss": 2.5118, + "step": 12285 + }, + { + "epoch": 0.9915261076587846, + "grad_norm": 0.7227715253829956, + "learning_rate": 6.545865688783129e-05, + "loss": 2.4688, + "step": 12286 + }, + { + "epoch": 0.9916068113953677, + "grad_norm": 0.6498493552207947, + "learning_rate": 6.544384209437392e-05, + "loss": 2.477, + "step": 12287 + }, + { + "epoch": 0.9916875151319506, + "grad_norm": 0.6427823901176453, + "learning_rate": 6.542902816214636e-05, + "loss": 2.4388, + "step": 12288 + }, + { + "epoch": 0.9917682188685336, + "grad_norm": 0.6803679466247559, + "learning_rate": 6.541421509151778e-05, + "loss": 2.5095, + "step": 12289 + }, + { + "epoch": 0.9918489226051166, + "grad_norm": 0.7025790810585022, + "learning_rate": 6.539940288285734e-05, + "loss": 2.4881, + "step": 12290 + }, + { + "epoch": 0.9919296263416996, + "grad_norm": 0.6899270415306091, + "learning_rate": 6.538459153653424e-05, + "loss": 2.486, + "step": 12291 + }, + { + "epoch": 0.9920103300782827, + "grad_norm": 0.7379609942436218, + "learning_rate": 6.536978105291762e-05, + "loss": 2.5368, + "step": 12292 + }, + { + "epoch": 0.9920910338148656, + "grad_norm": 0.7279202342033386, + "learning_rate": 6.535497143237657e-05, + "loss": 2.5275, + "step": 12293 + }, + { + "epoch": 0.9921717375514486, + "grad_norm": 0.6810527443885803, + "learning_rate": 6.53401626752802e-05, + "loss": 2.5053, + "step": 12294 + }, + { + "epoch": 0.9922524412880316, + "grad_norm": 0.6578424572944641, + "learning_rate": 6.532535478199759e-05, + "loss": 2.5334, + "step": 12295 + }, + { + "epoch": 0.9923331450246147, + "grad_norm": 0.6819284558296204, + "learning_rate": 6.531054775289778e-05, + "loss": 2.4879, + "step": 12296 + }, + { + "epoch": 0.9924138487611976, + "grad_norm": 0.6524500846862793, + "learning_rate": 6.529574158834977e-05, + "loss": 2.5349, + "step": 12297 + }, + { + "epoch": 0.9924945524977806, + "grad_norm": 0.6853352785110474, + "learning_rate": 6.528093628872263e-05, + "loss": 2.4217, + "step": 12298 + }, + { + "epoch": 0.9925752562343636, + "grad_norm": 0.6731893420219421, + "learning_rate": 6.526613185438529e-05, + "loss": 2.4739, + "step": 12299 + }, + { + "epoch": 0.9926559599709467, + "grad_norm": 0.6515606641769409, + "learning_rate": 6.525132828570673e-05, + "loss": 2.5348, + "step": 12300 + }, + { + "epoch": 0.9927366637075297, + "grad_norm": 0.6819963455200195, + "learning_rate": 6.523652558305596e-05, + "loss": 2.5052, + "step": 12301 + }, + { + "epoch": 0.9928173674441126, + "grad_norm": 0.6521475911140442, + "learning_rate": 6.522172374680177e-05, + "loss": 2.5283, + "step": 12302 + }, + { + "epoch": 0.9928980711806956, + "grad_norm": 0.6488186717033386, + "learning_rate": 6.520692277731315e-05, + "loss": 2.4779, + "step": 12303 + }, + { + "epoch": 0.9929787749172787, + "grad_norm": 0.6509760022163391, + "learning_rate": 6.519212267495903e-05, + "loss": 2.5426, + "step": 12304 + }, + { + "epoch": 0.9930594786538617, + "grad_norm": 0.621366560459137, + "learning_rate": 6.517732344010814e-05, + "loss": 2.4804, + "step": 12305 + }, + { + "epoch": 0.9931401823904447, + "grad_norm": 0.6907268166542053, + "learning_rate": 6.516252507312938e-05, + "loss": 2.4883, + "step": 12306 + }, + { + "epoch": 0.9932208861270276, + "grad_norm": 0.7739343643188477, + "learning_rate": 6.514772757439157e-05, + "loss": 2.481, + "step": 12307 + }, + { + "epoch": 0.9933015898636107, + "grad_norm": 0.6794601082801819, + "learning_rate": 6.513293094426352e-05, + "loss": 2.5244, + "step": 12308 + }, + { + "epoch": 0.9933822936001937, + "grad_norm": 0.7189902663230896, + "learning_rate": 6.511813518311394e-05, + "loss": 2.5221, + "step": 12309 + }, + { + "epoch": 0.9934629973367767, + "grad_norm": 0.733318030834198, + "learning_rate": 6.510334029131163e-05, + "loss": 2.521, + "step": 12310 + }, + { + "epoch": 0.9935437010733597, + "grad_norm": 0.7584299445152283, + "learning_rate": 6.508854626922531e-05, + "loss": 2.4962, + "step": 12311 + }, + { + "epoch": 0.9936244048099427, + "grad_norm": 0.6442410349845886, + "learning_rate": 6.507375311722366e-05, + "loss": 2.4775, + "step": 12312 + }, + { + "epoch": 0.9937051085465257, + "grad_norm": 0.6609243154525757, + "learning_rate": 6.505896083567536e-05, + "loss": 2.4706, + "step": 12313 + }, + { + "epoch": 0.9937858122831087, + "grad_norm": 0.6527631878852844, + "learning_rate": 6.504416942494914e-05, + "loss": 2.4612, + "step": 12314 + }, + { + "epoch": 0.9938665160196917, + "grad_norm": 0.6798218488693237, + "learning_rate": 6.502937888541357e-05, + "loss": 2.5502, + "step": 12315 + }, + { + "epoch": 0.9939472197562748, + "grad_norm": 0.6573790907859802, + "learning_rate": 6.501458921743728e-05, + "loss": 2.5598, + "step": 12316 + }, + { + "epoch": 0.9940279234928577, + "grad_norm": 0.6945913434028625, + "learning_rate": 6.49998004213889e-05, + "loss": 2.5323, + "step": 12317 + }, + { + "epoch": 0.9941086272294407, + "grad_norm": 0.7609078288078308, + "learning_rate": 6.498501249763697e-05, + "loss": 2.5211, + "step": 12318 + }, + { + "epoch": 0.9941893309660237, + "grad_norm": 0.6878666281700134, + "learning_rate": 6.497022544655006e-05, + "loss": 2.5366, + "step": 12319 + }, + { + "epoch": 0.9942700347026068, + "grad_norm": 0.6675810813903809, + "learning_rate": 6.495543926849674e-05, + "loss": 2.512, + "step": 12320 + }, + { + "epoch": 0.9943507384391898, + "grad_norm": 0.7285950779914856, + "learning_rate": 6.494065396384544e-05, + "loss": 2.4741, + "step": 12321 + }, + { + "epoch": 0.9944314421757727, + "grad_norm": 0.6287158131599426, + "learning_rate": 6.49258695329647e-05, + "loss": 2.4824, + "step": 12322 + }, + { + "epoch": 0.9945121459123557, + "grad_norm": 0.6506727337837219, + "learning_rate": 6.491108597622296e-05, + "loss": 2.5126, + "step": 12323 + }, + { + "epoch": 0.9945928496489388, + "grad_norm": 0.7679052352905273, + "learning_rate": 6.489630329398869e-05, + "loss": 2.5503, + "step": 12324 + }, + { + "epoch": 0.9946735533855218, + "grad_norm": 0.637184202671051, + "learning_rate": 6.488152148663029e-05, + "loss": 2.5098, + "step": 12325 + }, + { + "epoch": 0.9947542571221047, + "grad_norm": 0.6747186779975891, + "learning_rate": 6.486674055451619e-05, + "loss": 2.5154, + "step": 12326 + }, + { + "epoch": 0.9948349608586877, + "grad_norm": 0.7288245558738708, + "learning_rate": 6.485196049801476e-05, + "loss": 2.5077, + "step": 12327 + }, + { + "epoch": 0.9949156645952708, + "grad_norm": 0.6914251446723938, + "learning_rate": 6.483718131749435e-05, + "loss": 2.4877, + "step": 12328 + }, + { + "epoch": 0.9949963683318538, + "grad_norm": 0.7224392294883728, + "learning_rate": 6.48224030133233e-05, + "loss": 2.4862, + "step": 12329 + }, + { + "epoch": 0.9950770720684368, + "grad_norm": 0.7365561723709106, + "learning_rate": 6.480762558586995e-05, + "loss": 2.477, + "step": 12330 + }, + { + "epoch": 0.9951577758050197, + "grad_norm": 0.7673236131668091, + "learning_rate": 6.47928490355025e-05, + "loss": 2.5423, + "step": 12331 + }, + { + "epoch": 0.9952384795416028, + "grad_norm": 0.6638002395629883, + "learning_rate": 6.477807336258931e-05, + "loss": 2.5007, + "step": 12332 + }, + { + "epoch": 0.9953191832781858, + "grad_norm": 0.6415974497795105, + "learning_rate": 6.476329856749864e-05, + "loss": 2.4924, + "step": 12333 + }, + { + "epoch": 0.9953998870147688, + "grad_norm": 0.7129398584365845, + "learning_rate": 6.474852465059864e-05, + "loss": 2.5313, + "step": 12334 + }, + { + "epoch": 0.9954805907513518, + "grad_norm": 0.6896344423294067, + "learning_rate": 6.473375161225756e-05, + "loss": 2.5073, + "step": 12335 + }, + { + "epoch": 0.9955612944879348, + "grad_norm": 0.7009317874908447, + "learning_rate": 6.47189794528436e-05, + "loss": 2.574, + "step": 12336 + }, + { + "epoch": 0.9956419982245178, + "grad_norm": 0.6555172801017761, + "learning_rate": 6.470420817272488e-05, + "loss": 2.4769, + "step": 12337 + }, + { + "epoch": 0.9957227019611008, + "grad_norm": 0.7569532990455627, + "learning_rate": 6.468943777226954e-05, + "loss": 2.4691, + "step": 12338 + }, + { + "epoch": 0.9958034056976838, + "grad_norm": 0.68092280626297, + "learning_rate": 6.467466825184569e-05, + "loss": 2.4793, + "step": 12339 + }, + { + "epoch": 0.9958841094342669, + "grad_norm": 0.6977378726005554, + "learning_rate": 6.465989961182152e-05, + "loss": 2.4678, + "step": 12340 + }, + { + "epoch": 0.9959648131708498, + "grad_norm": 0.6702281832695007, + "learning_rate": 6.4645131852565e-05, + "loss": 2.5398, + "step": 12341 + }, + { + "epoch": 0.9960455169074328, + "grad_norm": 0.7584038972854614, + "learning_rate": 6.46303649744442e-05, + "loss": 2.5355, + "step": 12342 + }, + { + "epoch": 0.9961262206440158, + "grad_norm": 0.6779505610466003, + "learning_rate": 6.461559897782718e-05, + "loss": 2.4828, + "step": 12343 + }, + { + "epoch": 0.9962069243805988, + "grad_norm": 0.6968233585357666, + "learning_rate": 6.460083386308192e-05, + "loss": 2.5108, + "step": 12344 + }, + { + "epoch": 0.9962876281171819, + "grad_norm": 0.7114594578742981, + "learning_rate": 6.45860696305764e-05, + "loss": 2.5236, + "step": 12345 + }, + { + "epoch": 0.9963683318537648, + "grad_norm": 0.6850530505180359, + "learning_rate": 6.457130628067865e-05, + "loss": 2.458, + "step": 12346 + }, + { + "epoch": 0.9964490355903478, + "grad_norm": 0.7135400772094727, + "learning_rate": 6.455654381375651e-05, + "loss": 2.539, + "step": 12347 + }, + { + "epoch": 0.9965297393269308, + "grad_norm": 0.6736366748809814, + "learning_rate": 6.454178223017797e-05, + "loss": 2.4721, + "step": 12348 + }, + { + "epoch": 0.9966104430635139, + "grad_norm": 0.6806206107139587, + "learning_rate": 6.45270215303109e-05, + "loss": 2.5035, + "step": 12349 + }, + { + "epoch": 0.9966911468000968, + "grad_norm": 0.7120711803436279, + "learning_rate": 6.451226171452318e-05, + "loss": 2.5344, + "step": 12350 + }, + { + "epoch": 0.9967718505366798, + "grad_norm": 0.6865986585617065, + "learning_rate": 6.449750278318264e-05, + "loss": 2.4807, + "step": 12351 + }, + { + "epoch": 0.9968525542732628, + "grad_norm": 0.6461294889450073, + "learning_rate": 6.448274473665717e-05, + "loss": 2.4878, + "step": 12352 + }, + { + "epoch": 0.9969332580098459, + "grad_norm": 0.7090638279914856, + "learning_rate": 6.446798757531454e-05, + "loss": 2.4599, + "step": 12353 + }, + { + "epoch": 0.9970139617464289, + "grad_norm": 0.6933324337005615, + "learning_rate": 6.445323129952252e-05, + "loss": 2.5398, + "step": 12354 + }, + { + "epoch": 0.9970946654830118, + "grad_norm": 0.7018197774887085, + "learning_rate": 6.443847590964888e-05, + "loss": 2.5159, + "step": 12355 + }, + { + "epoch": 0.9971753692195948, + "grad_norm": 0.7292604446411133, + "learning_rate": 6.442372140606145e-05, + "loss": 2.4934, + "step": 12356 + }, + { + "epoch": 0.9972560729561779, + "grad_norm": 0.6686378121376038, + "learning_rate": 6.440896778912783e-05, + "loss": 2.5076, + "step": 12357 + }, + { + "epoch": 0.9973367766927609, + "grad_norm": 0.7194764018058777, + "learning_rate": 6.439421505921576e-05, + "loss": 2.4958, + "step": 12358 + }, + { + "epoch": 0.9974174804293439, + "grad_norm": 0.662467360496521, + "learning_rate": 6.437946321669296e-05, + "loss": 2.5202, + "step": 12359 + }, + { + "epoch": 0.9974981841659268, + "grad_norm": 0.7222515940666199, + "learning_rate": 6.436471226192703e-05, + "loss": 2.5058, + "step": 12360 + }, + { + "epoch": 0.9975788879025099, + "grad_norm": 0.6354855895042419, + "learning_rate": 6.434996219528562e-05, + "loss": 2.4849, + "step": 12361 + }, + { + "epoch": 0.9976595916390929, + "grad_norm": 0.7689539790153503, + "learning_rate": 6.433521301713636e-05, + "loss": 2.4959, + "step": 12362 + }, + { + "epoch": 0.9977402953756759, + "grad_norm": 0.6894338130950928, + "learning_rate": 6.43204647278468e-05, + "loss": 2.5098, + "step": 12363 + }, + { + "epoch": 0.9978209991122589, + "grad_norm": 0.7694165110588074, + "learning_rate": 6.430571732778451e-05, + "loss": 2.513, + "step": 12364 + }, + { + "epoch": 0.9979017028488419, + "grad_norm": 0.6512044668197632, + "learning_rate": 6.42909708173171e-05, + "loss": 2.4785, + "step": 12365 + }, + { + "epoch": 0.9979824065854249, + "grad_norm": 0.6605672836303711, + "learning_rate": 6.427622519681201e-05, + "loss": 2.4804, + "step": 12366 + }, + { + "epoch": 0.9980631103220079, + "grad_norm": 0.7123624086380005, + "learning_rate": 6.426148046663677e-05, + "loss": 2.4854, + "step": 12367 + }, + { + "epoch": 0.9981438140585909, + "grad_norm": 0.662645697593689, + "learning_rate": 6.424673662715886e-05, + "loss": 2.5314, + "step": 12368 + }, + { + "epoch": 0.998224517795174, + "grad_norm": 0.6482149362564087, + "learning_rate": 6.423199367874573e-05, + "loss": 2.4492, + "step": 12369 + }, + { + "epoch": 0.9983052215317569, + "grad_norm": 0.6545752286911011, + "learning_rate": 6.421725162176482e-05, + "loss": 2.5042, + "step": 12370 + }, + { + "epoch": 0.9983859252683399, + "grad_norm": 0.6698874235153198, + "learning_rate": 6.420251045658353e-05, + "loss": 2.4523, + "step": 12371 + }, + { + "epoch": 0.9984666290049229, + "grad_norm": 0.6961477398872375, + "learning_rate": 6.418777018356929e-05, + "loss": 2.556, + "step": 12372 + }, + { + "epoch": 0.998547332741506, + "grad_norm": 0.67090904712677, + "learning_rate": 6.41730308030894e-05, + "loss": 2.5237, + "step": 12373 + }, + { + "epoch": 0.998628036478089, + "grad_norm": 0.6828685402870178, + "learning_rate": 6.415829231551124e-05, + "loss": 2.453, + "step": 12374 + }, + { + "epoch": 0.9987087402146719, + "grad_norm": 0.6699565649032593, + "learning_rate": 6.414355472120213e-05, + "loss": 2.4632, + "step": 12375 + }, + { + "epoch": 0.9987894439512549, + "grad_norm": 0.6918730735778809, + "learning_rate": 6.412881802052936e-05, + "loss": 2.4532, + "step": 12376 + }, + { + "epoch": 0.998870147687838, + "grad_norm": 0.7222442030906677, + "learning_rate": 6.411408221386021e-05, + "loss": 2.5113, + "step": 12377 + }, + { + "epoch": 0.998950851424421, + "grad_norm": 0.7479627132415771, + "learning_rate": 6.409934730156195e-05, + "loss": 2.4857, + "step": 12378 + }, + { + "epoch": 0.999031555161004, + "grad_norm": 0.6552882194519043, + "learning_rate": 6.40846132840018e-05, + "loss": 2.4816, + "step": 12379 + }, + { + "epoch": 0.9991122588975869, + "grad_norm": 0.5990073084831238, + "learning_rate": 6.406988016154694e-05, + "loss": 2.4753, + "step": 12380 + }, + { + "epoch": 0.99919296263417, + "grad_norm": 0.6671901941299438, + "learning_rate": 6.405514793456465e-05, + "loss": 2.5298, + "step": 12381 + }, + { + "epoch": 0.999273666370753, + "grad_norm": 0.6630427241325378, + "learning_rate": 6.4040416603422e-05, + "loss": 2.485, + "step": 12382 + }, + { + "epoch": 0.999354370107336, + "grad_norm": 0.6873636841773987, + "learning_rate": 6.402568616848614e-05, + "loss": 2.4902, + "step": 12383 + }, + { + "epoch": 0.9994350738439189, + "grad_norm": 0.6912413239479065, + "learning_rate": 6.401095663012424e-05, + "loss": 2.5339, + "step": 12384 + }, + { + "epoch": 0.999515777580502, + "grad_norm": 0.6491912603378296, + "learning_rate": 6.39962279887034e-05, + "loss": 2.5367, + "step": 12385 + }, + { + "epoch": 0.999596481317085, + "grad_norm": 0.6668288111686707, + "learning_rate": 6.398150024459065e-05, + "loss": 2.5294, + "step": 12386 + }, + { + "epoch": 0.999677185053668, + "grad_norm": 0.6603856086730957, + "learning_rate": 6.396677339815306e-05, + "loss": 2.4378, + "step": 12387 + }, + { + "epoch": 0.999757888790251, + "grad_norm": 0.6461218595504761, + "learning_rate": 6.395204744975772e-05, + "loss": 2.4835, + "step": 12388 + }, + { + "epoch": 0.999838592526834, + "grad_norm": 0.6621688604354858, + "learning_rate": 6.39373223997715e-05, + "loss": 2.4834, + "step": 12389 + }, + { + "epoch": 0.999919296263417, + "grad_norm": 0.6758724451065063, + "learning_rate": 6.392259824856153e-05, + "loss": 2.4549, + "step": 12390 + }, + { + "epoch": 1.0, + "grad_norm": 1.1304112672805786, + "learning_rate": 6.390787499649473e-05, + "loss": 2.5547, + "step": 12391 + }, + { + "epoch": 1.000080703736583, + "grad_norm": 0.6919478178024292, + "learning_rate": 6.389315264393801e-05, + "loss": 2.47, + "step": 12392 + }, + { + "epoch": 1.000161407473166, + "grad_norm": 0.6916815638542175, + "learning_rate": 6.38784311912583e-05, + "loss": 2.4636, + "step": 12393 + }, + { + "epoch": 1.000242111209749, + "grad_norm": 0.6627040505409241, + "learning_rate": 6.386371063882252e-05, + "loss": 2.5094, + "step": 12394 + }, + { + "epoch": 1.000322814946332, + "grad_norm": 0.6408648490905762, + "learning_rate": 6.384899098699754e-05, + "loss": 2.426, + "step": 12395 + }, + { + "epoch": 1.000403518682915, + "grad_norm": 0.70432448387146, + "learning_rate": 6.38342722361502e-05, + "loss": 2.4861, + "step": 12396 + }, + { + "epoch": 1.000484222419498, + "grad_norm": 0.7115964889526367, + "learning_rate": 6.381955438664735e-05, + "loss": 2.4824, + "step": 12397 + }, + { + "epoch": 1.000564926156081, + "grad_norm": 0.6547040939331055, + "learning_rate": 6.380483743885574e-05, + "loss": 2.488, + "step": 12398 + }, + { + "epoch": 1.000645629892664, + "grad_norm": 0.6916625499725342, + "learning_rate": 6.379012139314223e-05, + "loss": 2.4864, + "step": 12399 + }, + { + "epoch": 1.0007263336292471, + "grad_norm": 0.6311133503913879, + "learning_rate": 6.377540624987352e-05, + "loss": 2.4672, + "step": 12400 + }, + { + "epoch": 1.00080703736583, + "grad_norm": 0.7115580439567566, + "learning_rate": 6.376069200941642e-05, + "loss": 2.4359, + "step": 12401 + }, + { + "epoch": 1.000887741102413, + "grad_norm": 0.6734051704406738, + "learning_rate": 6.374597867213756e-05, + "loss": 2.4896, + "step": 12402 + }, + { + "epoch": 1.000968444838996, + "grad_norm": 0.6910715699195862, + "learning_rate": 6.373126623840368e-05, + "loss": 2.4502, + "step": 12403 + }, + { + "epoch": 1.001049148575579, + "grad_norm": 0.6807514429092407, + "learning_rate": 6.37165547085815e-05, + "loss": 2.4791, + "step": 12404 + }, + { + "epoch": 1.0011298523121621, + "grad_norm": 0.679350733757019, + "learning_rate": 6.370184408303759e-05, + "loss": 2.4758, + "step": 12405 + }, + { + "epoch": 1.001210556048745, + "grad_norm": 0.6516300439834595, + "learning_rate": 6.36871343621386e-05, + "loss": 2.4338, + "step": 12406 + }, + { + "epoch": 1.001291259785328, + "grad_norm": 0.7033620476722717, + "learning_rate": 6.367242554625119e-05, + "loss": 2.429, + "step": 12407 + }, + { + "epoch": 1.0013719635219112, + "grad_norm": 0.6750274896621704, + "learning_rate": 6.365771763574186e-05, + "loss": 2.4283, + "step": 12408 + }, + { + "epoch": 1.001452667258494, + "grad_norm": 0.7188721895217896, + "learning_rate": 6.364301063097722e-05, + "loss": 2.4509, + "step": 12409 + }, + { + "epoch": 1.001533370995077, + "grad_norm": 0.6936308741569519, + "learning_rate": 6.362830453232379e-05, + "loss": 2.4469, + "step": 12410 + }, + { + "epoch": 1.00161407473166, + "grad_norm": 0.673060953617096, + "learning_rate": 6.361359934014808e-05, + "loss": 2.4444, + "step": 12411 + }, + { + "epoch": 1.001694778468243, + "grad_norm": 0.7465113997459412, + "learning_rate": 6.359889505481658e-05, + "loss": 2.4376, + "step": 12412 + }, + { + "epoch": 1.0017754822048262, + "grad_norm": 0.7180366516113281, + "learning_rate": 6.358419167669582e-05, + "loss": 2.4223, + "step": 12413 + }, + { + "epoch": 1.001856185941409, + "grad_norm": 0.6582302451133728, + "learning_rate": 6.356948920615214e-05, + "loss": 2.4723, + "step": 12414 + }, + { + "epoch": 1.001936889677992, + "grad_norm": 0.6452654600143433, + "learning_rate": 6.3554787643552e-05, + "loss": 2.4609, + "step": 12415 + }, + { + "epoch": 1.0020175934145752, + "grad_norm": 0.7170321345329285, + "learning_rate": 6.354008698926185e-05, + "loss": 2.5377, + "step": 12416 + }, + { + "epoch": 1.002098297151158, + "grad_norm": 0.6483680605888367, + "learning_rate": 6.352538724364809e-05, + "loss": 2.4349, + "step": 12417 + }, + { + "epoch": 1.0021790008877411, + "grad_norm": 0.6567494869232178, + "learning_rate": 6.351068840707697e-05, + "loss": 2.4421, + "step": 12418 + }, + { + "epoch": 1.002259704624324, + "grad_norm": 0.7498565912246704, + "learning_rate": 6.349599047991488e-05, + "loss": 2.4212, + "step": 12419 + }, + { + "epoch": 1.002340408360907, + "grad_norm": 0.6894906759262085, + "learning_rate": 6.348129346252816e-05, + "loss": 2.4356, + "step": 12420 + }, + { + "epoch": 1.0024211120974902, + "grad_norm": 0.657361626625061, + "learning_rate": 6.346659735528304e-05, + "loss": 2.4164, + "step": 12421 + }, + { + "epoch": 1.002501815834073, + "grad_norm": 0.6369211673736572, + "learning_rate": 6.345190215854581e-05, + "loss": 2.4229, + "step": 12422 + }, + { + "epoch": 1.0025825195706561, + "grad_norm": 0.7033721208572388, + "learning_rate": 6.343720787268277e-05, + "loss": 2.5052, + "step": 12423 + }, + { + "epoch": 1.0026632233072392, + "grad_norm": 0.7125518918037415, + "learning_rate": 6.342251449806003e-05, + "loss": 2.514, + "step": 12424 + }, + { + "epoch": 1.002743927043822, + "grad_norm": 0.7355595827102661, + "learning_rate": 6.340782203504385e-05, + "loss": 2.4459, + "step": 12425 + }, + { + "epoch": 1.0028246307804052, + "grad_norm": 0.7244594693183899, + "learning_rate": 6.339313048400042e-05, + "loss": 2.452, + "step": 12426 + }, + { + "epoch": 1.002905334516988, + "grad_norm": 0.7112728357315063, + "learning_rate": 6.337843984529585e-05, + "loss": 2.4951, + "step": 12427 + }, + { + "epoch": 1.0029860382535711, + "grad_norm": 0.7235615849494934, + "learning_rate": 6.336375011929628e-05, + "loss": 2.4697, + "step": 12428 + }, + { + "epoch": 1.0030667419901542, + "grad_norm": 0.653865396976471, + "learning_rate": 6.334906130636784e-05, + "loss": 2.4804, + "step": 12429 + }, + { + "epoch": 1.003147445726737, + "grad_norm": 0.7845149636268616, + "learning_rate": 6.33343734068766e-05, + "loss": 2.5415, + "step": 12430 + }, + { + "epoch": 1.0032281494633202, + "grad_norm": 0.7356342077255249, + "learning_rate": 6.33196864211886e-05, + "loss": 2.5321, + "step": 12431 + }, + { + "epoch": 1.0033088531999033, + "grad_norm": 0.6828265190124512, + "learning_rate": 6.330500034966991e-05, + "loss": 2.3849, + "step": 12432 + }, + { + "epoch": 1.0033895569364861, + "grad_norm": 0.7226579189300537, + "learning_rate": 6.329031519268658e-05, + "loss": 2.512, + "step": 12433 + }, + { + "epoch": 1.0034702606730692, + "grad_norm": 0.6490235924720764, + "learning_rate": 6.327563095060449e-05, + "loss": 2.487, + "step": 12434 + }, + { + "epoch": 1.003550964409652, + "grad_norm": 0.6889309883117676, + "learning_rate": 6.326094762378969e-05, + "loss": 2.4677, + "step": 12435 + }, + { + "epoch": 1.0036316681462352, + "grad_norm": 0.695854127407074, + "learning_rate": 6.324626521260815e-05, + "loss": 2.4362, + "step": 12436 + }, + { + "epoch": 1.0037123718828183, + "grad_norm": 0.7045256495475769, + "learning_rate": 6.32315837174257e-05, + "loss": 2.4307, + "step": 12437 + }, + { + "epoch": 1.0037930756194011, + "grad_norm": 0.662604570388794, + "learning_rate": 6.321690313860833e-05, + "loss": 2.4271, + "step": 12438 + }, + { + "epoch": 1.0038737793559842, + "grad_norm": 0.7682240009307861, + "learning_rate": 6.320222347652191e-05, + "loss": 2.4617, + "step": 12439 + }, + { + "epoch": 1.0039544830925673, + "grad_norm": 0.6599584817886353, + "learning_rate": 6.318754473153221e-05, + "loss": 2.405, + "step": 12440 + }, + { + "epoch": 1.0040351868291502, + "grad_norm": 0.7423116564750671, + "learning_rate": 6.317286690400515e-05, + "loss": 2.5496, + "step": 12441 + }, + { + "epoch": 1.0041158905657332, + "grad_norm": 0.6928953528404236, + "learning_rate": 6.315818999430654e-05, + "loss": 2.4265, + "step": 12442 + }, + { + "epoch": 1.0041965943023161, + "grad_norm": 0.699990451335907, + "learning_rate": 6.314351400280211e-05, + "loss": 2.4747, + "step": 12443 + }, + { + "epoch": 1.0042772980388992, + "grad_norm": 0.673384964466095, + "learning_rate": 6.312883892985765e-05, + "loss": 2.4891, + "step": 12444 + }, + { + "epoch": 1.0043580017754823, + "grad_norm": 0.6668596863746643, + "learning_rate": 6.311416477583893e-05, + "loss": 2.4312, + "step": 12445 + }, + { + "epoch": 1.0044387055120652, + "grad_norm": 0.6931218504905701, + "learning_rate": 6.309949154111163e-05, + "loss": 2.4907, + "step": 12446 + }, + { + "epoch": 1.0045194092486482, + "grad_norm": 0.687683641910553, + "learning_rate": 6.308481922604146e-05, + "loss": 2.4302, + "step": 12447 + }, + { + "epoch": 1.004600112985231, + "grad_norm": 0.6887302398681641, + "learning_rate": 6.30701478309941e-05, + "loss": 2.4749, + "step": 12448 + }, + { + "epoch": 1.0046808167218142, + "grad_norm": 0.6713404655456543, + "learning_rate": 6.305547735633522e-05, + "loss": 2.5046, + "step": 12449 + }, + { + "epoch": 1.0047615204583973, + "grad_norm": 0.7147336006164551, + "learning_rate": 6.304080780243038e-05, + "loss": 2.4578, + "step": 12450 + }, + { + "epoch": 1.0048422241949801, + "grad_norm": 0.87425297498703, + "learning_rate": 6.30261391696452e-05, + "loss": 2.4487, + "step": 12451 + }, + { + "epoch": 1.0049229279315632, + "grad_norm": 0.6641440987586975, + "learning_rate": 6.301147145834534e-05, + "loss": 2.4657, + "step": 12452 + }, + { + "epoch": 1.0050036316681463, + "grad_norm": 0.7311998009681702, + "learning_rate": 6.299680466889626e-05, + "loss": 2.4784, + "step": 12453 + }, + { + "epoch": 1.0050843354047292, + "grad_norm": 0.6722697615623474, + "learning_rate": 6.298213880166354e-05, + "loss": 2.4653, + "step": 12454 + }, + { + "epoch": 1.0051650391413123, + "grad_norm": 0.6886328458786011, + "learning_rate": 6.29674738570127e-05, + "loss": 2.3949, + "step": 12455 + }, + { + "epoch": 1.0052457428778951, + "grad_norm": 0.684688925743103, + "learning_rate": 6.295280983530921e-05, + "loss": 2.4334, + "step": 12456 + }, + { + "epoch": 1.0053264466144782, + "grad_norm": 0.7436798214912415, + "learning_rate": 6.293814673691853e-05, + "loss": 2.5316, + "step": 12457 + }, + { + "epoch": 1.0054071503510613, + "grad_norm": 0.7401304244995117, + "learning_rate": 6.292348456220615e-05, + "loss": 2.4556, + "step": 12458 + }, + { + "epoch": 1.0054878540876442, + "grad_norm": 0.7330329418182373, + "learning_rate": 6.290882331153742e-05, + "loss": 2.4321, + "step": 12459 + }, + { + "epoch": 1.0055685578242273, + "grad_norm": 0.8005052208900452, + "learning_rate": 6.289416298527776e-05, + "loss": 2.415, + "step": 12460 + }, + { + "epoch": 1.0056492615608104, + "grad_norm": 0.8047310709953308, + "learning_rate": 6.28795035837926e-05, + "loss": 2.4144, + "step": 12461 + }, + { + "epoch": 1.0057299652973932, + "grad_norm": 0.7384032011032104, + "learning_rate": 6.28648451074472e-05, + "loss": 2.5237, + "step": 12462 + }, + { + "epoch": 1.0058106690339763, + "grad_norm": 0.7240314483642578, + "learning_rate": 6.285018755660695e-05, + "loss": 2.4894, + "step": 12463 + }, + { + "epoch": 1.0058913727705592, + "grad_norm": 0.6901080012321472, + "learning_rate": 6.283553093163712e-05, + "loss": 2.4244, + "step": 12464 + }, + { + "epoch": 1.0059720765071423, + "grad_norm": 0.6572268605232239, + "learning_rate": 6.282087523290304e-05, + "loss": 2.456, + "step": 12465 + }, + { + "epoch": 1.0060527802437254, + "grad_norm": 0.7207481861114502, + "learning_rate": 6.28062204607699e-05, + "loss": 2.4153, + "step": 12466 + }, + { + "epoch": 1.0061334839803082, + "grad_norm": 0.6901980042457581, + "learning_rate": 6.279156661560299e-05, + "loss": 2.4776, + "step": 12467 + }, + { + "epoch": 1.0062141877168913, + "grad_norm": 0.7003545761108398, + "learning_rate": 6.277691369776752e-05, + "loss": 2.4206, + "step": 12468 + }, + { + "epoch": 1.0062948914534744, + "grad_norm": 0.6978366374969482, + "learning_rate": 6.276226170762865e-05, + "loss": 2.3866, + "step": 12469 + }, + { + "epoch": 1.0063755951900573, + "grad_norm": 0.6763097643852234, + "learning_rate": 6.274761064555154e-05, + "loss": 2.5439, + "step": 12470 + }, + { + "epoch": 1.0064562989266403, + "grad_norm": 0.7146836519241333, + "learning_rate": 6.273296051190139e-05, + "loss": 2.5486, + "step": 12471 + }, + { + "epoch": 1.0065370026632232, + "grad_norm": 0.7448136806488037, + "learning_rate": 6.271831130704326e-05, + "loss": 2.4539, + "step": 12472 + }, + { + "epoch": 1.0066177063998063, + "grad_norm": 0.6918472051620483, + "learning_rate": 6.270366303134226e-05, + "loss": 2.4756, + "step": 12473 + }, + { + "epoch": 1.0066984101363894, + "grad_norm": 0.7067514657974243, + "learning_rate": 6.26890156851635e-05, + "loss": 2.4925, + "step": 12474 + }, + { + "epoch": 1.0067791138729723, + "grad_norm": 0.6517517566680908, + "learning_rate": 6.267436926887197e-05, + "loss": 2.4339, + "step": 12475 + }, + { + "epoch": 1.0068598176095553, + "grad_norm": 0.673367977142334, + "learning_rate": 6.265972378283274e-05, + "loss": 2.416, + "step": 12476 + }, + { + "epoch": 1.0069405213461384, + "grad_norm": 0.7190212607383728, + "learning_rate": 6.26450792274108e-05, + "loss": 2.4822, + "step": 12477 + }, + { + "epoch": 1.0070212250827213, + "grad_norm": 0.7568029165267944, + "learning_rate": 6.263043560297112e-05, + "loss": 2.4607, + "step": 12478 + }, + { + "epoch": 1.0071019288193044, + "grad_norm": 0.6860609650611877, + "learning_rate": 6.261579290987866e-05, + "loss": 2.4429, + "step": 12479 + }, + { + "epoch": 1.0071826325558872, + "grad_norm": 0.7066059112548828, + "learning_rate": 6.260115114849839e-05, + "loss": 2.5504, + "step": 12480 + }, + { + "epoch": 1.0072633362924703, + "grad_norm": 0.6857946515083313, + "learning_rate": 6.25865103191952e-05, + "loss": 2.4776, + "step": 12481 + }, + { + "epoch": 1.0073440400290534, + "grad_norm": 0.6879859566688538, + "learning_rate": 6.257187042233396e-05, + "loss": 2.3651, + "step": 12482 + }, + { + "epoch": 1.0074247437656363, + "grad_norm": 0.6900867223739624, + "learning_rate": 6.255723145827954e-05, + "loss": 2.4644, + "step": 12483 + }, + { + "epoch": 1.0075054475022194, + "grad_norm": 0.7144716382026672, + "learning_rate": 6.254259342739683e-05, + "loss": 2.4219, + "step": 12484 + }, + { + "epoch": 1.0075861512388025, + "grad_norm": 0.674619197845459, + "learning_rate": 6.252795633005056e-05, + "loss": 2.5038, + "step": 12485 + }, + { + "epoch": 1.0076668549753853, + "grad_norm": 0.7036965489387512, + "learning_rate": 6.251332016660558e-05, + "loss": 2.4784, + "step": 12486 + }, + { + "epoch": 1.0077475587119684, + "grad_norm": 0.7046369910240173, + "learning_rate": 6.249868493742668e-05, + "loss": 2.514, + "step": 12487 + }, + { + "epoch": 1.0078282624485513, + "grad_norm": 0.6933087110519409, + "learning_rate": 6.248405064287854e-05, + "loss": 2.4855, + "step": 12488 + }, + { + "epoch": 1.0079089661851344, + "grad_norm": 0.7210546731948853, + "learning_rate": 6.246941728332594e-05, + "loss": 2.5101, + "step": 12489 + }, + { + "epoch": 1.0079896699217175, + "grad_norm": 0.6738288402557373, + "learning_rate": 6.245478485913361e-05, + "loss": 2.4891, + "step": 12490 + }, + { + "epoch": 1.0080703736583003, + "grad_norm": 0.7023273706436157, + "learning_rate": 6.244015337066611e-05, + "loss": 2.4977, + "step": 12491 + }, + { + "epoch": 1.0081510773948834, + "grad_norm": 0.6761355996131897, + "learning_rate": 6.24255228182882e-05, + "loss": 2.4948, + "step": 12492 + }, + { + "epoch": 1.0082317811314665, + "grad_norm": 0.6427976489067078, + "learning_rate": 6.241089320236448e-05, + "loss": 2.466, + "step": 12493 + }, + { + "epoch": 1.0083124848680494, + "grad_norm": 0.6907719969749451, + "learning_rate": 6.23962645232596e-05, + "loss": 2.437, + "step": 12494 + }, + { + "epoch": 1.0083931886046325, + "grad_norm": 0.709032416343689, + "learning_rate": 6.238163678133807e-05, + "loss": 2.4298, + "step": 12495 + }, + { + "epoch": 1.0084738923412153, + "grad_norm": 0.7395734786987305, + "learning_rate": 6.236700997696448e-05, + "loss": 2.4502, + "step": 12496 + }, + { + "epoch": 1.0085545960777984, + "grad_norm": 0.6535435914993286, + "learning_rate": 6.23523841105034e-05, + "loss": 2.4494, + "step": 12497 + }, + { + "epoch": 1.0086352998143815, + "grad_norm": 0.6597761511802673, + "learning_rate": 6.23377591823193e-05, + "loss": 2.4377, + "step": 12498 + }, + { + "epoch": 1.0087160035509644, + "grad_norm": 0.6610515713691711, + "learning_rate": 6.232313519277668e-05, + "loss": 2.4328, + "step": 12499 + }, + { + "epoch": 1.0087967072875474, + "grad_norm": 0.6785424947738647, + "learning_rate": 6.230851214224009e-05, + "loss": 2.457, + "step": 12500 + }, + { + "epoch": 1.0088774110241303, + "grad_norm": 0.6939748525619507, + "learning_rate": 6.229389003107383e-05, + "loss": 2.383, + "step": 12501 + }, + { + "epoch": 1.0089581147607134, + "grad_norm": 0.7592256665229797, + "learning_rate": 6.22792688596424e-05, + "loss": 2.4665, + "step": 12502 + }, + { + "epoch": 1.0090388184972965, + "grad_norm": 0.6751298308372498, + "learning_rate": 6.226464862831023e-05, + "loss": 2.491, + "step": 12503 + }, + { + "epoch": 1.0091195222338794, + "grad_norm": 0.682771623134613, + "learning_rate": 6.225002933744164e-05, + "loss": 2.4275, + "step": 12504 + }, + { + "epoch": 1.0092002259704624, + "grad_norm": 0.7314651608467102, + "learning_rate": 6.223541098740098e-05, + "loss": 2.4489, + "step": 12505 + }, + { + "epoch": 1.0092809297070455, + "grad_norm": 0.7132120132446289, + "learning_rate": 6.222079357855261e-05, + "loss": 2.4819, + "step": 12506 + }, + { + "epoch": 1.0093616334436284, + "grad_norm": 0.6571424007415771, + "learning_rate": 6.220617711126082e-05, + "loss": 2.455, + "step": 12507 + }, + { + "epoch": 1.0094423371802115, + "grad_norm": 0.7675301432609558, + "learning_rate": 6.21915615858899e-05, + "loss": 2.5282, + "step": 12508 + }, + { + "epoch": 1.0095230409167943, + "grad_norm": 0.6907868385314941, + "learning_rate": 6.217694700280408e-05, + "loss": 2.4639, + "step": 12509 + }, + { + "epoch": 1.0096037446533774, + "grad_norm": 0.7223815321922302, + "learning_rate": 6.216233336236764e-05, + "loss": 2.4682, + "step": 12510 + }, + { + "epoch": 1.0096844483899605, + "grad_norm": 0.7325109839439392, + "learning_rate": 6.214772066494474e-05, + "loss": 2.4591, + "step": 12511 + }, + { + "epoch": 1.0097651521265434, + "grad_norm": 0.6589400768280029, + "learning_rate": 6.213310891089957e-05, + "loss": 2.4883, + "step": 12512 + }, + { + "epoch": 1.0098458558631265, + "grad_norm": 0.6692262291908264, + "learning_rate": 6.211849810059635e-05, + "loss": 2.4635, + "step": 12513 + }, + { + "epoch": 1.0099265595997096, + "grad_norm": 0.7352520823478699, + "learning_rate": 6.210388823439914e-05, + "loss": 2.4743, + "step": 12514 + }, + { + "epoch": 1.0100072633362924, + "grad_norm": 0.6631996035575867, + "learning_rate": 6.208927931267212e-05, + "loss": 2.4848, + "step": 12515 + }, + { + "epoch": 1.0100879670728755, + "grad_norm": 0.6985767483711243, + "learning_rate": 6.207467133577937e-05, + "loss": 2.5044, + "step": 12516 + }, + { + "epoch": 1.0101686708094584, + "grad_norm": 0.665635347366333, + "learning_rate": 6.206006430408494e-05, + "loss": 2.4718, + "step": 12517 + }, + { + "epoch": 1.0102493745460415, + "grad_norm": 0.6859133243560791, + "learning_rate": 6.204545821795286e-05, + "loss": 2.4702, + "step": 12518 + }, + { + "epoch": 1.0103300782826246, + "grad_norm": 0.6578841805458069, + "learning_rate": 6.203085307774722e-05, + "loss": 2.4614, + "step": 12519 + }, + { + "epoch": 1.0104107820192074, + "grad_norm": 0.717523455619812, + "learning_rate": 6.201624888383194e-05, + "loss": 2.4412, + "step": 12520 + }, + { + "epoch": 1.0104914857557905, + "grad_norm": 0.7333831787109375, + "learning_rate": 6.200164563657103e-05, + "loss": 2.4157, + "step": 12521 + }, + { + "epoch": 1.0105721894923736, + "grad_norm": 0.6968720555305481, + "learning_rate": 6.198704333632845e-05, + "loss": 2.4556, + "step": 12522 + }, + { + "epoch": 1.0106528932289565, + "grad_norm": 0.6533070802688599, + "learning_rate": 6.19724419834681e-05, + "loss": 2.43, + "step": 12523 + }, + { + "epoch": 1.0107335969655395, + "grad_norm": 0.7341824769973755, + "learning_rate": 6.195784157835391e-05, + "loss": 2.5326, + "step": 12524 + }, + { + "epoch": 1.0108143007021224, + "grad_norm": 0.752912163734436, + "learning_rate": 6.194324212134974e-05, + "loss": 2.4282, + "step": 12525 + }, + { + "epoch": 1.0108950044387055, + "grad_norm": 0.6538611650466919, + "learning_rate": 6.192864361281951e-05, + "loss": 2.4135, + "step": 12526 + }, + { + "epoch": 1.0109757081752886, + "grad_norm": 0.6931454539299011, + "learning_rate": 6.191404605312695e-05, + "loss": 2.5097, + "step": 12527 + }, + { + "epoch": 1.0110564119118715, + "grad_norm": 0.6317688822746277, + "learning_rate": 6.18994494426359e-05, + "loss": 2.4977, + "step": 12528 + }, + { + "epoch": 1.0111371156484545, + "grad_norm": 0.6793715953826904, + "learning_rate": 6.188485378171024e-05, + "loss": 2.4619, + "step": 12529 + }, + { + "epoch": 1.0112178193850376, + "grad_norm": 0.6696654558181763, + "learning_rate": 6.187025907071361e-05, + "loss": 2.4658, + "step": 12530 + }, + { + "epoch": 1.0112985231216205, + "grad_norm": 0.6788807511329651, + "learning_rate": 6.185566531000979e-05, + "loss": 2.4793, + "step": 12531 + }, + { + "epoch": 1.0113792268582036, + "grad_norm": 0.6933971643447876, + "learning_rate": 6.184107249996253e-05, + "loss": 2.4772, + "step": 12532 + }, + { + "epoch": 1.0114599305947864, + "grad_norm": 0.6866000294685364, + "learning_rate": 6.182648064093546e-05, + "loss": 2.428, + "step": 12533 + }, + { + "epoch": 1.0115406343313695, + "grad_norm": 0.7013841271400452, + "learning_rate": 6.181188973329229e-05, + "loss": 2.5273, + "step": 12534 + }, + { + "epoch": 1.0116213380679526, + "grad_norm": 0.6569108963012695, + "learning_rate": 6.179729977739669e-05, + "loss": 2.4125, + "step": 12535 + }, + { + "epoch": 1.0117020418045355, + "grad_norm": 0.7503486275672913, + "learning_rate": 6.17827107736122e-05, + "loss": 2.4385, + "step": 12536 + }, + { + "epoch": 1.0117827455411186, + "grad_norm": 0.6757314205169678, + "learning_rate": 6.176812272230246e-05, + "loss": 2.4364, + "step": 12537 + }, + { + "epoch": 1.0118634492777017, + "grad_norm": 0.6567254662513733, + "learning_rate": 6.175353562383106e-05, + "loss": 2.4992, + "step": 12538 + }, + { + "epoch": 1.0119441530142845, + "grad_norm": 0.7564988732337952, + "learning_rate": 6.17389494785615e-05, + "loss": 2.4777, + "step": 12539 + }, + { + "epoch": 1.0120248567508676, + "grad_norm": 0.6972391605377197, + "learning_rate": 6.172436428685735e-05, + "loss": 2.5041, + "step": 12540 + }, + { + "epoch": 1.0121055604874505, + "grad_norm": 0.6861580610275269, + "learning_rate": 6.170978004908209e-05, + "loss": 2.4684, + "step": 12541 + }, + { + "epoch": 1.0121862642240336, + "grad_norm": 0.6621903777122498, + "learning_rate": 6.169519676559921e-05, + "loss": 2.4614, + "step": 12542 + }, + { + "epoch": 1.0122669679606167, + "grad_norm": 0.6879795789718628, + "learning_rate": 6.168061443677215e-05, + "loss": 2.4765, + "step": 12543 + }, + { + "epoch": 1.0123476716971995, + "grad_norm": 0.6361081004142761, + "learning_rate": 6.166603306296434e-05, + "loss": 2.4792, + "step": 12544 + }, + { + "epoch": 1.0124283754337826, + "grad_norm": 0.6660729050636292, + "learning_rate": 6.165145264453924e-05, + "loss": 2.489, + "step": 12545 + }, + { + "epoch": 1.0125090791703655, + "grad_norm": 0.6900594234466553, + "learning_rate": 6.163687318186015e-05, + "loss": 2.4543, + "step": 12546 + }, + { + "epoch": 1.0125897829069486, + "grad_norm": 0.7195869088172913, + "learning_rate": 6.162229467529046e-05, + "loss": 2.4137, + "step": 12547 + }, + { + "epoch": 1.0126704866435317, + "grad_norm": 0.7030326128005981, + "learning_rate": 6.16077171251935e-05, + "loss": 2.4657, + "step": 12548 + }, + { + "epoch": 1.0127511903801145, + "grad_norm": 0.6712052822113037, + "learning_rate": 6.15931405319326e-05, + "loss": 2.4718, + "step": 12549 + }, + { + "epoch": 1.0128318941166976, + "grad_norm": 0.7471029162406921, + "learning_rate": 6.157856489587102e-05, + "loss": 2.4705, + "step": 12550 + }, + { + "epoch": 1.0129125978532807, + "grad_norm": 0.6813762187957764, + "learning_rate": 6.15639902173721e-05, + "loss": 2.4479, + "step": 12551 + }, + { + "epoch": 1.0129933015898636, + "grad_norm": 0.6657249927520752, + "learning_rate": 6.154941649679894e-05, + "loss": 2.4911, + "step": 12552 + }, + { + "epoch": 1.0130740053264466, + "grad_norm": 0.6700132489204407, + "learning_rate": 6.153484373451483e-05, + "loss": 2.4962, + "step": 12553 + }, + { + "epoch": 1.0131547090630295, + "grad_norm": 0.7058695554733276, + "learning_rate": 6.152027193088302e-05, + "loss": 2.3935, + "step": 12554 + }, + { + "epoch": 1.0132354127996126, + "grad_norm": 0.7390396595001221, + "learning_rate": 6.150570108626658e-05, + "loss": 2.4454, + "step": 12555 + }, + { + "epoch": 1.0133161165361957, + "grad_norm": 0.7251414060592651, + "learning_rate": 6.149113120102869e-05, + "loss": 2.4146, + "step": 12556 + }, + { + "epoch": 1.0133968202727786, + "grad_norm": 0.8262537717819214, + "learning_rate": 6.14765622755325e-05, + "loss": 2.4638, + "step": 12557 + }, + { + "epoch": 1.0134775240093616, + "grad_norm": 0.7184064984321594, + "learning_rate": 6.146199431014106e-05, + "loss": 2.3958, + "step": 12558 + }, + { + "epoch": 1.0135582277459447, + "grad_norm": 0.7544865012168884, + "learning_rate": 6.144742730521746e-05, + "loss": 2.4662, + "step": 12559 + }, + { + "epoch": 1.0136389314825276, + "grad_norm": 0.6866207718849182, + "learning_rate": 6.143286126112475e-05, + "loss": 2.4951, + "step": 12560 + }, + { + "epoch": 1.0137196352191107, + "grad_norm": 0.6566087603569031, + "learning_rate": 6.1418296178226e-05, + "loss": 2.4002, + "step": 12561 + }, + { + "epoch": 1.0138003389556935, + "grad_norm": 0.6999008059501648, + "learning_rate": 6.140373205688411e-05, + "loss": 2.5306, + "step": 12562 + }, + { + "epoch": 1.0138810426922766, + "grad_norm": 0.6682353615760803, + "learning_rate": 6.138916889746212e-05, + "loss": 2.5565, + "step": 12563 + }, + { + "epoch": 1.0139617464288597, + "grad_norm": 0.7443362474441528, + "learning_rate": 6.137460670032298e-05, + "loss": 2.3958, + "step": 12564 + }, + { + "epoch": 1.0140424501654426, + "grad_norm": 0.6542403697967529, + "learning_rate": 6.136004546582958e-05, + "loss": 2.4394, + "step": 12565 + }, + { + "epoch": 1.0141231539020257, + "grad_norm": 0.6524317264556885, + "learning_rate": 6.134548519434488e-05, + "loss": 2.4979, + "step": 12566 + }, + { + "epoch": 1.0142038576386088, + "grad_norm": 0.6605600118637085, + "learning_rate": 6.133092588623174e-05, + "loss": 2.4827, + "step": 12567 + }, + { + "epoch": 1.0142845613751916, + "grad_norm": 0.7114397883415222, + "learning_rate": 6.1316367541853e-05, + "loss": 2.4799, + "step": 12568 + }, + { + "epoch": 1.0143652651117747, + "grad_norm": 0.6607296466827393, + "learning_rate": 6.130181016157148e-05, + "loss": 2.4991, + "step": 12569 + }, + { + "epoch": 1.0144459688483576, + "grad_norm": 0.6750844717025757, + "learning_rate": 6.128725374575005e-05, + "loss": 2.4451, + "step": 12570 + }, + { + "epoch": 1.0145266725849407, + "grad_norm": 0.6978901624679565, + "learning_rate": 6.127269829475141e-05, + "loss": 2.4608, + "step": 12571 + }, + { + "epoch": 1.0146073763215238, + "grad_norm": 0.676343560218811, + "learning_rate": 6.125814380893838e-05, + "loss": 2.4536, + "step": 12572 + }, + { + "epoch": 1.0146880800581066, + "grad_norm": 0.7082604765892029, + "learning_rate": 6.124359028867368e-05, + "loss": 2.45, + "step": 12573 + }, + { + "epoch": 1.0147687837946897, + "grad_norm": 0.7049853205680847, + "learning_rate": 6.122903773432003e-05, + "loss": 2.4378, + "step": 12574 + }, + { + "epoch": 1.0148494875312728, + "grad_norm": 0.6329593062400818, + "learning_rate": 6.121448614624009e-05, + "loss": 2.4386, + "step": 12575 + }, + { + "epoch": 1.0149301912678557, + "grad_norm": 0.7249468564987183, + "learning_rate": 6.119993552479655e-05, + "loss": 2.5191, + "step": 12576 + }, + { + "epoch": 1.0150108950044388, + "grad_norm": 0.7028193473815918, + "learning_rate": 6.118538587035206e-05, + "loss": 2.4376, + "step": 12577 + }, + { + "epoch": 1.0150915987410216, + "grad_norm": 0.697382926940918, + "learning_rate": 6.117083718326917e-05, + "loss": 2.4797, + "step": 12578 + }, + { + "epoch": 1.0151723024776047, + "grad_norm": 0.7386965155601501, + "learning_rate": 6.115628946391055e-05, + "loss": 2.4512, + "step": 12579 + }, + { + "epoch": 1.0152530062141878, + "grad_norm": 0.6614577174186707, + "learning_rate": 6.114174271263875e-05, + "loss": 2.4404, + "step": 12580 + }, + { + "epoch": 1.0153337099507707, + "grad_norm": 0.6927464604377747, + "learning_rate": 6.112719692981627e-05, + "loss": 2.47, + "step": 12581 + }, + { + "epoch": 1.0154144136873537, + "grad_norm": 0.7004262208938599, + "learning_rate": 6.111265211580566e-05, + "loss": 2.4212, + "step": 12582 + }, + { + "epoch": 1.0154951174239368, + "grad_norm": 0.71146559715271, + "learning_rate": 6.109810827096942e-05, + "loss": 2.4431, + "step": 12583 + }, + { + "epoch": 1.0155758211605197, + "grad_norm": 0.6857032775878906, + "learning_rate": 6.108356539567e-05, + "loss": 2.453, + "step": 12584 + }, + { + "epoch": 1.0156565248971028, + "grad_norm": 0.6976168155670166, + "learning_rate": 6.106902349026986e-05, + "loss": 2.4718, + "step": 12585 + }, + { + "epoch": 1.0157372286336857, + "grad_norm": 0.7158414125442505, + "learning_rate": 6.105448255513146e-05, + "loss": 2.425, + "step": 12586 + }, + { + "epoch": 1.0158179323702687, + "grad_norm": 0.6611737608909607, + "learning_rate": 6.103994259061714e-05, + "loss": 2.4563, + "step": 12587 + }, + { + "epoch": 1.0158986361068518, + "grad_norm": 0.7262980937957764, + "learning_rate": 6.102540359708926e-05, + "loss": 2.4538, + "step": 12588 + }, + { + "epoch": 1.0159793398434347, + "grad_norm": 0.7123451828956604, + "learning_rate": 6.10108655749102e-05, + "loss": 2.4677, + "step": 12589 + }, + { + "epoch": 1.0160600435800178, + "grad_norm": 0.7135589122772217, + "learning_rate": 6.099632852444235e-05, + "loss": 2.4312, + "step": 12590 + }, + { + "epoch": 1.0161407473166009, + "grad_norm": 0.6509461998939514, + "learning_rate": 6.09817924460479e-05, + "loss": 2.4716, + "step": 12591 + }, + { + "epoch": 1.0162214510531837, + "grad_norm": 0.8835915923118591, + "learning_rate": 6.096725734008919e-05, + "loss": 2.4817, + "step": 12592 + }, + { + "epoch": 1.0163021547897668, + "grad_norm": 0.7084136605262756, + "learning_rate": 6.095272320692846e-05, + "loss": 2.483, + "step": 12593 + }, + { + "epoch": 1.0163828585263497, + "grad_norm": 0.6866818070411682, + "learning_rate": 6.0938190046927934e-05, + "loss": 2.4838, + "step": 12594 + }, + { + "epoch": 1.0164635622629328, + "grad_norm": 0.7297510504722595, + "learning_rate": 6.0923657860449824e-05, + "loss": 2.4675, + "step": 12595 + }, + { + "epoch": 1.0165442659995159, + "grad_norm": 0.6735619306564331, + "learning_rate": 6.090912664785633e-05, + "loss": 2.444, + "step": 12596 + }, + { + "epoch": 1.0166249697360987, + "grad_norm": 0.7046451568603516, + "learning_rate": 6.0894596409509565e-05, + "loss": 2.4757, + "step": 12597 + }, + { + "epoch": 1.0167056734726818, + "grad_norm": 0.6646085977554321, + "learning_rate": 6.0880067145771656e-05, + "loss": 2.4772, + "step": 12598 + }, + { + "epoch": 1.0167863772092647, + "grad_norm": 0.7217094302177429, + "learning_rate": 6.086553885700478e-05, + "loss": 2.4589, + "step": 12599 + }, + { + "epoch": 1.0168670809458478, + "grad_norm": 0.647378146648407, + "learning_rate": 6.085101154357093e-05, + "loss": 2.4327, + "step": 12600 + }, + { + "epoch": 1.0169477846824309, + "grad_norm": 0.6907125115394592, + "learning_rate": 6.083648520583223e-05, + "loss": 2.467, + "step": 12601 + }, + { + "epoch": 1.0170284884190137, + "grad_norm": 0.690433919429779, + "learning_rate": 6.0821959844150687e-05, + "loss": 2.488, + "step": 12602 + }, + { + "epoch": 1.0171091921555968, + "grad_norm": 0.6528738737106323, + "learning_rate": 6.080743545888833e-05, + "loss": 2.5028, + "step": 12603 + }, + { + "epoch": 1.01718989589218, + "grad_norm": 0.6962323784828186, + "learning_rate": 6.079291205040711e-05, + "loss": 2.5381, + "step": 12604 + }, + { + "epoch": 1.0172705996287628, + "grad_norm": 0.7386075854301453, + "learning_rate": 6.077838961906902e-05, + "loss": 2.4445, + "step": 12605 + }, + { + "epoch": 1.0173513033653458, + "grad_norm": 0.7382189631462097, + "learning_rate": 6.0763868165236025e-05, + "loss": 2.4926, + "step": 12606 + }, + { + "epoch": 1.0174320071019287, + "grad_norm": 0.7291865944862366, + "learning_rate": 6.074934768926995e-05, + "loss": 2.4624, + "step": 12607 + }, + { + "epoch": 1.0175127108385118, + "grad_norm": 0.754843533039093, + "learning_rate": 6.073482819153275e-05, + "loss": 2.4291, + "step": 12608 + }, + { + "epoch": 1.017593414575095, + "grad_norm": 0.6827771663665771, + "learning_rate": 6.072030967238628e-05, + "loss": 2.453, + "step": 12609 + }, + { + "epoch": 1.0176741183116778, + "grad_norm": 0.7138541340827942, + "learning_rate": 6.0705792132192355e-05, + "loss": 2.5172, + "step": 12610 + }, + { + "epoch": 1.0177548220482608, + "grad_norm": 0.6539924740791321, + "learning_rate": 6.06912755713128e-05, + "loss": 2.4393, + "step": 12611 + }, + { + "epoch": 1.017835525784844, + "grad_norm": 0.7021273970603943, + "learning_rate": 6.067675999010945e-05, + "loss": 2.4519, + "step": 12612 + }, + { + "epoch": 1.0179162295214268, + "grad_norm": 0.7124225497245789, + "learning_rate": 6.0662245388944004e-05, + "loss": 2.4417, + "step": 12613 + }, + { + "epoch": 1.0179969332580099, + "grad_norm": 0.7214948534965515, + "learning_rate": 6.064773176817823e-05, + "loss": 2.4708, + "step": 12614 + }, + { + "epoch": 1.0180776369945927, + "grad_norm": 0.6738584041595459, + "learning_rate": 6.063321912817386e-05, + "loss": 2.4574, + "step": 12615 + }, + { + "epoch": 1.0181583407311758, + "grad_norm": 0.7215890884399414, + "learning_rate": 6.061870746929257e-05, + "loss": 2.4903, + "step": 12616 + }, + { + "epoch": 1.018239044467759, + "grad_norm": 0.6720155477523804, + "learning_rate": 6.0604196791896016e-05, + "loss": 2.4251, + "step": 12617 + }, + { + "epoch": 1.0183197482043418, + "grad_norm": 0.7046420574188232, + "learning_rate": 6.058968709634587e-05, + "loss": 2.446, + "step": 12618 + }, + { + "epoch": 1.0184004519409249, + "grad_norm": 0.6419540047645569, + "learning_rate": 6.0575178383003764e-05, + "loss": 2.4052, + "step": 12619 + }, + { + "epoch": 1.018481155677508, + "grad_norm": 0.6948695182800293, + "learning_rate": 6.0560670652231235e-05, + "loss": 2.5068, + "step": 12620 + }, + { + "epoch": 1.0185618594140908, + "grad_norm": 0.7274870276451111, + "learning_rate": 6.05461639043899e-05, + "loss": 2.4705, + "step": 12621 + }, + { + "epoch": 1.018642563150674, + "grad_norm": 0.6809766292572021, + "learning_rate": 6.053165813984134e-05, + "loss": 2.3767, + "step": 12622 + }, + { + "epoch": 1.0187232668872568, + "grad_norm": 0.6197625994682312, + "learning_rate": 6.0517153358946985e-05, + "loss": 2.4639, + "step": 12623 + }, + { + "epoch": 1.0188039706238399, + "grad_norm": 0.6613010764122009, + "learning_rate": 6.050264956206837e-05, + "loss": 2.5155, + "step": 12624 + }, + { + "epoch": 1.018884674360423, + "grad_norm": 0.7335553765296936, + "learning_rate": 6.0488146749567e-05, + "loss": 2.5344, + "step": 12625 + }, + { + "epoch": 1.0189653780970058, + "grad_norm": 0.7175146341323853, + "learning_rate": 6.047364492180428e-05, + "loss": 2.4972, + "step": 12626 + }, + { + "epoch": 1.019046081833589, + "grad_norm": 0.6825357675552368, + "learning_rate": 6.045914407914166e-05, + "loss": 2.4356, + "step": 12627 + }, + { + "epoch": 1.019126785570172, + "grad_norm": 0.6369633078575134, + "learning_rate": 6.044464422194056e-05, + "loss": 2.4692, + "step": 12628 + }, + { + "epoch": 1.0192074893067549, + "grad_norm": 0.7407073378562927, + "learning_rate": 6.0430145350562264e-05, + "loss": 2.4565, + "step": 12629 + }, + { + "epoch": 1.019288193043338, + "grad_norm": 0.6836552619934082, + "learning_rate": 6.041564746536821e-05, + "loss": 2.4357, + "step": 12630 + }, + { + "epoch": 1.0193688967799208, + "grad_norm": 0.6778741478919983, + "learning_rate": 6.040115056671972e-05, + "loss": 2.424, + "step": 12631 + }, + { + "epoch": 1.019449600516504, + "grad_norm": 0.6440724730491638, + "learning_rate": 6.0386654654978035e-05, + "loss": 2.4455, + "step": 12632 + }, + { + "epoch": 1.019530304253087, + "grad_norm": 0.681376039981842, + "learning_rate": 6.0372159730504476e-05, + "loss": 2.4562, + "step": 12633 + }, + { + "epoch": 1.0196110079896699, + "grad_norm": 0.657462477684021, + "learning_rate": 6.035766579366029e-05, + "loss": 2.4315, + "step": 12634 + }, + { + "epoch": 1.019691711726253, + "grad_norm": 0.6540380716323853, + "learning_rate": 6.0343172844806706e-05, + "loss": 2.4789, + "step": 12635 + }, + { + "epoch": 1.019772415462836, + "grad_norm": 0.711883008480072, + "learning_rate": 6.03286808843049e-05, + "loss": 2.4178, + "step": 12636 + }, + { + "epoch": 1.019853119199419, + "grad_norm": 0.6746736168861389, + "learning_rate": 6.031418991251607e-05, + "loss": 2.4351, + "step": 12637 + }, + { + "epoch": 1.019933822936002, + "grad_norm": 0.677237331867218, + "learning_rate": 6.02996999298014e-05, + "loss": 2.4335, + "step": 12638 + }, + { + "epoch": 1.0200145266725849, + "grad_norm": 0.6950497627258301, + "learning_rate": 6.0285210936521955e-05, + "loss": 2.5178, + "step": 12639 + }, + { + "epoch": 1.020095230409168, + "grad_norm": 0.6349243521690369, + "learning_rate": 6.027072293303885e-05, + "loss": 2.4405, + "step": 12640 + }, + { + "epoch": 1.020175934145751, + "grad_norm": 0.744276762008667, + "learning_rate": 6.0256235919713236e-05, + "loss": 2.5156, + "step": 12641 + }, + { + "epoch": 1.020256637882334, + "grad_norm": 0.7697997689247131, + "learning_rate": 6.0241749896906075e-05, + "loss": 2.4393, + "step": 12642 + }, + { + "epoch": 1.020337341618917, + "grad_norm": 0.7784204483032227, + "learning_rate": 6.022726486497844e-05, + "loss": 2.4565, + "step": 12643 + }, + { + "epoch": 1.0204180453555, + "grad_norm": 0.7434312701225281, + "learning_rate": 6.021278082429136e-05, + "loss": 2.4637, + "step": 12644 + }, + { + "epoch": 1.020498749092083, + "grad_norm": 0.7770118117332458, + "learning_rate": 6.019829777520575e-05, + "loss": 2.4998, + "step": 12645 + }, + { + "epoch": 1.020579452828666, + "grad_norm": 0.7021752595901489, + "learning_rate": 6.01838157180826e-05, + "loss": 2.4661, + "step": 12646 + }, + { + "epoch": 1.0206601565652489, + "grad_norm": 0.6812437176704407, + "learning_rate": 6.0169334653282895e-05, + "loss": 2.4611, + "step": 12647 + }, + { + "epoch": 1.020740860301832, + "grad_norm": 0.757724940776825, + "learning_rate": 6.0154854581167455e-05, + "loss": 2.4427, + "step": 12648 + }, + { + "epoch": 1.020821564038415, + "grad_norm": 0.7386252880096436, + "learning_rate": 6.014037550209718e-05, + "loss": 2.424, + "step": 12649 + }, + { + "epoch": 1.020902267774998, + "grad_norm": 0.7138059735298157, + "learning_rate": 6.012589741643295e-05, + "loss": 2.4951, + "step": 12650 + }, + { + "epoch": 1.020982971511581, + "grad_norm": 0.714022159576416, + "learning_rate": 6.011142032453561e-05, + "loss": 2.4398, + "step": 12651 + }, + { + "epoch": 1.0210636752481639, + "grad_norm": 0.6961550712585449, + "learning_rate": 6.00969442267659e-05, + "loss": 2.4495, + "step": 12652 + }, + { + "epoch": 1.021144378984747, + "grad_norm": 0.7196643948554993, + "learning_rate": 6.008246912348467e-05, + "loss": 2.4449, + "step": 12653 + }, + { + "epoch": 1.02122508272133, + "grad_norm": 0.6163341999053955, + "learning_rate": 6.006799501505268e-05, + "loss": 2.4108, + "step": 12654 + }, + { + "epoch": 1.021305786457913, + "grad_norm": 0.6657030582427979, + "learning_rate": 6.005352190183061e-05, + "loss": 2.4328, + "step": 12655 + }, + { + "epoch": 1.021386490194496, + "grad_norm": 0.7183353900909424, + "learning_rate": 6.00390497841792e-05, + "loss": 2.4912, + "step": 12656 + }, + { + "epoch": 1.021467193931079, + "grad_norm": 0.6912575364112854, + "learning_rate": 6.002457866245916e-05, + "loss": 2.4597, + "step": 12657 + }, + { + "epoch": 1.021547897667662, + "grad_norm": 0.7395210266113281, + "learning_rate": 6.0010108537031084e-05, + "loss": 2.4823, + "step": 12658 + }, + { + "epoch": 1.021628601404245, + "grad_norm": 0.722618043422699, + "learning_rate": 5.9995639408255636e-05, + "loss": 2.4924, + "step": 12659 + }, + { + "epoch": 1.021709305140828, + "grad_norm": 0.739009439945221, + "learning_rate": 5.998117127649344e-05, + "loss": 2.4454, + "step": 12660 + }, + { + "epoch": 1.021790008877411, + "grad_norm": 0.7017633318901062, + "learning_rate": 5.996670414210506e-05, + "loss": 2.5058, + "step": 12661 + }, + { + "epoch": 1.021870712613994, + "grad_norm": 0.742664635181427, + "learning_rate": 5.9952238005451046e-05, + "loss": 2.436, + "step": 12662 + }, + { + "epoch": 1.021951416350577, + "grad_norm": 0.6865660548210144, + "learning_rate": 5.9937772866892e-05, + "loss": 2.4364, + "step": 12663 + }, + { + "epoch": 1.02203212008716, + "grad_norm": 0.7376219034194946, + "learning_rate": 5.992330872678833e-05, + "loss": 2.4975, + "step": 12664 + }, + { + "epoch": 1.0221128238237431, + "grad_norm": 0.6496078372001648, + "learning_rate": 5.990884558550054e-05, + "loss": 2.4651, + "step": 12665 + }, + { + "epoch": 1.022193527560326, + "grad_norm": 0.7178322076797485, + "learning_rate": 5.989438344338915e-05, + "loss": 2.5015, + "step": 12666 + }, + { + "epoch": 1.022274231296909, + "grad_norm": 0.7084102034568787, + "learning_rate": 5.987992230081459e-05, + "loss": 2.4741, + "step": 12667 + }, + { + "epoch": 1.022354935033492, + "grad_norm": 0.6634935736656189, + "learning_rate": 5.986546215813722e-05, + "loss": 2.4255, + "step": 12668 + }, + { + "epoch": 1.022435638770075, + "grad_norm": 0.6897543668746948, + "learning_rate": 5.985100301571742e-05, + "loss": 2.4682, + "step": 12669 + }, + { + "epoch": 1.0225163425066581, + "grad_norm": 0.6643948554992676, + "learning_rate": 5.9836544873915614e-05, + "loss": 2.4009, + "step": 12670 + }, + { + "epoch": 1.022597046243241, + "grad_norm": 0.681252658367157, + "learning_rate": 5.982208773309208e-05, + "loss": 2.4542, + "step": 12671 + }, + { + "epoch": 1.022677749979824, + "grad_norm": 0.7608681917190552, + "learning_rate": 5.980763159360714e-05, + "loss": 2.5614, + "step": 12672 + }, + { + "epoch": 1.0227584537164072, + "grad_norm": 0.6855095028877258, + "learning_rate": 5.979317645582112e-05, + "loss": 2.4505, + "step": 12673 + }, + { + "epoch": 1.02283915745299, + "grad_norm": 0.6846089363098145, + "learning_rate": 5.97787223200942e-05, + "loss": 2.4438, + "step": 12674 + }, + { + "epoch": 1.0229198611895731, + "grad_norm": 0.7198090553283691, + "learning_rate": 5.9764269186786684e-05, + "loss": 2.4469, + "step": 12675 + }, + { + "epoch": 1.023000564926156, + "grad_norm": 0.7120245099067688, + "learning_rate": 5.9749817056258764e-05, + "loss": 2.4626, + "step": 12676 + }, + { + "epoch": 1.023081268662739, + "grad_norm": 0.6839897036552429, + "learning_rate": 5.973536592887059e-05, + "loss": 2.4384, + "step": 12677 + }, + { + "epoch": 1.0231619723993222, + "grad_norm": 0.7053773999214172, + "learning_rate": 5.9720915804982356e-05, + "loss": 2.4554, + "step": 12678 + }, + { + "epoch": 1.023242676135905, + "grad_norm": 0.7114294767379761, + "learning_rate": 5.970646668495421e-05, + "loss": 2.3964, + "step": 12679 + }, + { + "epoch": 1.0233233798724881, + "grad_norm": 0.7001516819000244, + "learning_rate": 5.9692018569146224e-05, + "loss": 2.5216, + "step": 12680 + }, + { + "epoch": 1.0234040836090712, + "grad_norm": 0.6715773940086365, + "learning_rate": 5.96775714579185e-05, + "loss": 2.4595, + "step": 12681 + }, + { + "epoch": 1.023484787345654, + "grad_norm": 0.6856278777122498, + "learning_rate": 5.96631253516311e-05, + "loss": 2.4637, + "step": 12682 + }, + { + "epoch": 1.0235654910822372, + "grad_norm": 0.6785625219345093, + "learning_rate": 5.96486802506441e-05, + "loss": 2.4615, + "step": 12683 + }, + { + "epoch": 1.02364619481882, + "grad_norm": 0.6834213137626648, + "learning_rate": 5.963423615531743e-05, + "loss": 2.4729, + "step": 12684 + }, + { + "epoch": 1.023726898555403, + "grad_norm": 0.6729516386985779, + "learning_rate": 5.961979306601109e-05, + "loss": 2.4013, + "step": 12685 + }, + { + "epoch": 1.0238076022919862, + "grad_norm": 0.6785775423049927, + "learning_rate": 5.960535098308511e-05, + "loss": 2.4825, + "step": 12686 + }, + { + "epoch": 1.023888306028569, + "grad_norm": 0.67277991771698, + "learning_rate": 5.959090990689934e-05, + "loss": 2.4606, + "step": 12687 + }, + { + "epoch": 1.0239690097651521, + "grad_norm": 0.7679588198661804, + "learning_rate": 5.957646983781373e-05, + "loss": 2.5234, + "step": 12688 + }, + { + "epoch": 1.0240497135017352, + "grad_norm": 0.6597407460212708, + "learning_rate": 5.956203077618821e-05, + "loss": 2.4699, + "step": 12689 + }, + { + "epoch": 1.024130417238318, + "grad_norm": 0.6743008494377136, + "learning_rate": 5.9547592722382525e-05, + "loss": 2.4266, + "step": 12690 + }, + { + "epoch": 1.0242111209749012, + "grad_norm": 0.7223396897315979, + "learning_rate": 5.953315567675657e-05, + "loss": 2.5117, + "step": 12691 + }, + { + "epoch": 1.024291824711484, + "grad_norm": 0.6729528307914734, + "learning_rate": 5.951871963967022e-05, + "loss": 2.4586, + "step": 12692 + }, + { + "epoch": 1.0243725284480671, + "grad_norm": 0.6523739695549011, + "learning_rate": 5.950428461148314e-05, + "loss": 2.4408, + "step": 12693 + }, + { + "epoch": 1.0244532321846502, + "grad_norm": 0.6830984950065613, + "learning_rate": 5.9489850592555164e-05, + "loss": 2.4094, + "step": 12694 + }, + { + "epoch": 1.024533935921233, + "grad_norm": 0.6223493814468384, + "learning_rate": 5.9475417583246006e-05, + "loss": 2.4105, + "step": 12695 + }, + { + "epoch": 1.0246146396578162, + "grad_norm": 0.6506635546684265, + "learning_rate": 5.9460985583915374e-05, + "loss": 2.4451, + "step": 12696 + }, + { + "epoch": 1.024695343394399, + "grad_norm": 0.7626760005950928, + "learning_rate": 5.944655459492293e-05, + "loss": 2.4643, + "step": 12697 + }, + { + "epoch": 1.0247760471309821, + "grad_norm": 0.7074631452560425, + "learning_rate": 5.943212461662837e-05, + "loss": 2.4662, + "step": 12698 + }, + { + "epoch": 1.0248567508675652, + "grad_norm": 0.718083918094635, + "learning_rate": 5.9417695649391346e-05, + "loss": 2.4686, + "step": 12699 + }, + { + "epoch": 1.024937454604148, + "grad_norm": 0.6850628852844238, + "learning_rate": 5.9403267693571384e-05, + "loss": 2.4542, + "step": 12700 + }, + { + "epoch": 1.0250181583407312, + "grad_norm": 0.6662585735321045, + "learning_rate": 5.938884074952812e-05, + "loss": 2.4676, + "step": 12701 + }, + { + "epoch": 1.0250988620773143, + "grad_norm": 0.6806240677833557, + "learning_rate": 5.9374414817621114e-05, + "loss": 2.4243, + "step": 12702 + }, + { + "epoch": 1.0251795658138971, + "grad_norm": 0.6763548851013184, + "learning_rate": 5.9359989898209876e-05, + "loss": 2.4389, + "step": 12703 + }, + { + "epoch": 1.0252602695504802, + "grad_norm": 0.7390143275260925, + "learning_rate": 5.934556599165393e-05, + "loss": 2.4667, + "step": 12704 + }, + { + "epoch": 1.025340973287063, + "grad_norm": 0.6159299612045288, + "learning_rate": 5.933114309831276e-05, + "loss": 2.3832, + "step": 12705 + }, + { + "epoch": 1.0254216770236462, + "grad_norm": 0.6779586672782898, + "learning_rate": 5.931672121854579e-05, + "loss": 2.4615, + "step": 12706 + }, + { + "epoch": 1.0255023807602293, + "grad_norm": 0.643800675868988, + "learning_rate": 5.930230035271247e-05, + "loss": 2.4725, + "step": 12707 + }, + { + "epoch": 1.0255830844968121, + "grad_norm": 0.6605903506278992, + "learning_rate": 5.928788050117227e-05, + "loss": 2.4332, + "step": 12708 + }, + { + "epoch": 1.0256637882333952, + "grad_norm": 0.7046334743499756, + "learning_rate": 5.927346166428446e-05, + "loss": 2.4445, + "step": 12709 + }, + { + "epoch": 1.0257444919699783, + "grad_norm": 0.6536325216293335, + "learning_rate": 5.925904384240843e-05, + "loss": 2.4168, + "step": 12710 + }, + { + "epoch": 1.0258251957065612, + "grad_norm": 0.6861097812652588, + "learning_rate": 5.9244627035903564e-05, + "loss": 2.512, + "step": 12711 + }, + { + "epoch": 1.0259058994431443, + "grad_norm": 0.6782278418540955, + "learning_rate": 5.923021124512911e-05, + "loss": 2.4667, + "step": 12712 + }, + { + "epoch": 1.0259866031797271, + "grad_norm": 0.724435031414032, + "learning_rate": 5.921579647044436e-05, + "loss": 2.4828, + "step": 12713 + }, + { + "epoch": 1.0260673069163102, + "grad_norm": 0.6690630316734314, + "learning_rate": 5.9201382712208575e-05, + "loss": 2.4832, + "step": 12714 + }, + { + "epoch": 1.0261480106528933, + "grad_norm": 0.7045348286628723, + "learning_rate": 5.9186969970781015e-05, + "loss": 2.4576, + "step": 12715 + }, + { + "epoch": 1.0262287143894762, + "grad_norm": 0.673321008682251, + "learning_rate": 5.9172558246520796e-05, + "loss": 2.3986, + "step": 12716 + }, + { + "epoch": 1.0263094181260592, + "grad_norm": 0.7184785008430481, + "learning_rate": 5.915814753978717e-05, + "loss": 2.4008, + "step": 12717 + }, + { + "epoch": 1.0263901218626423, + "grad_norm": 0.6971293091773987, + "learning_rate": 5.914373785093931e-05, + "loss": 2.4559, + "step": 12718 + }, + { + "epoch": 1.0264708255992252, + "grad_norm": 0.6941563487052917, + "learning_rate": 5.912932918033626e-05, + "loss": 2.4787, + "step": 12719 + }, + { + "epoch": 1.0265515293358083, + "grad_norm": 0.6276142001152039, + "learning_rate": 5.911492152833715e-05, + "loss": 2.4275, + "step": 12720 + }, + { + "epoch": 1.0266322330723912, + "grad_norm": 0.715928316116333, + "learning_rate": 5.9100514895301106e-05, + "loss": 2.4127, + "step": 12721 + }, + { + "epoch": 1.0267129368089742, + "grad_norm": 0.7004076838493347, + "learning_rate": 5.908610928158713e-05, + "loss": 2.4651, + "step": 12722 + }, + { + "epoch": 1.0267936405455573, + "grad_norm": 0.6761921048164368, + "learning_rate": 5.907170468755425e-05, + "loss": 2.4245, + "step": 12723 + }, + { + "epoch": 1.0268743442821402, + "grad_norm": 0.7246574759483337, + "learning_rate": 5.9057301113561515e-05, + "loss": 2.4489, + "step": 12724 + }, + { + "epoch": 1.0269550480187233, + "grad_norm": 0.7196606397628784, + "learning_rate": 5.904289855996783e-05, + "loss": 2.4357, + "step": 12725 + }, + { + "epoch": 1.0270357517553064, + "grad_norm": 0.7142692804336548, + "learning_rate": 5.902849702713216e-05, + "loss": 2.4821, + "step": 12726 + }, + { + "epoch": 1.0271164554918892, + "grad_norm": 0.7207832336425781, + "learning_rate": 5.9014096515413454e-05, + "loss": 2.4337, + "step": 12727 + }, + { + "epoch": 1.0271971592284723, + "grad_norm": 0.6865695714950562, + "learning_rate": 5.899969702517063e-05, + "loss": 2.4549, + "step": 12728 + }, + { + "epoch": 1.0272778629650552, + "grad_norm": 0.7136662006378174, + "learning_rate": 5.898529855676249e-05, + "loss": 2.4606, + "step": 12729 + }, + { + "epoch": 1.0273585667016383, + "grad_norm": 0.701885998249054, + "learning_rate": 5.897090111054795e-05, + "loss": 2.4913, + "step": 12730 + }, + { + "epoch": 1.0274392704382214, + "grad_norm": 0.6671354174613953, + "learning_rate": 5.8956504686885805e-05, + "loss": 2.4064, + "step": 12731 + }, + { + "epoch": 1.0275199741748042, + "grad_norm": 0.6720621585845947, + "learning_rate": 5.894210928613484e-05, + "loss": 2.4908, + "step": 12732 + }, + { + "epoch": 1.0276006779113873, + "grad_norm": 0.7530980706214905, + "learning_rate": 5.892771490865383e-05, + "loss": 2.4486, + "step": 12733 + }, + { + "epoch": 1.0276813816479704, + "grad_norm": 0.6771122813224792, + "learning_rate": 5.891332155480158e-05, + "loss": 2.3954, + "step": 12734 + }, + { + "epoch": 1.0277620853845533, + "grad_norm": 0.6779236793518066, + "learning_rate": 5.889892922493671e-05, + "loss": 2.4404, + "step": 12735 + }, + { + "epoch": 1.0278427891211364, + "grad_norm": 0.7593358755111694, + "learning_rate": 5.8884537919417974e-05, + "loss": 2.4997, + "step": 12736 + }, + { + "epoch": 1.0279234928577192, + "grad_norm": 0.672686755657196, + "learning_rate": 5.8870147638604044e-05, + "loss": 2.5394, + "step": 12737 + }, + { + "epoch": 1.0280041965943023, + "grad_norm": 0.6727546453475952, + "learning_rate": 5.885575838285353e-05, + "loss": 2.4554, + "step": 12738 + }, + { + "epoch": 1.0280849003308854, + "grad_norm": 0.7092764377593994, + "learning_rate": 5.884137015252507e-05, + "loss": 2.4568, + "step": 12739 + }, + { + "epoch": 1.0281656040674683, + "grad_norm": 0.6988070011138916, + "learning_rate": 5.882698294797728e-05, + "loss": 2.4453, + "step": 12740 + }, + { + "epoch": 1.0282463078040514, + "grad_norm": 0.7578697204589844, + "learning_rate": 5.8812596769568676e-05, + "loss": 2.5648, + "step": 12741 + }, + { + "epoch": 1.0283270115406344, + "grad_norm": 0.6523683667182922, + "learning_rate": 5.879821161765782e-05, + "loss": 2.4088, + "step": 12742 + }, + { + "epoch": 1.0284077152772173, + "grad_norm": 0.6797270178794861, + "learning_rate": 5.878382749260323e-05, + "loss": 2.4465, + "step": 12743 + }, + { + "epoch": 1.0284884190138004, + "grad_norm": 0.6823786497116089, + "learning_rate": 5.876944439476345e-05, + "loss": 2.5053, + "step": 12744 + }, + { + "epoch": 1.0285691227503833, + "grad_norm": 0.6840088367462158, + "learning_rate": 5.875506232449686e-05, + "loss": 2.3771, + "step": 12745 + }, + { + "epoch": 1.0286498264869663, + "grad_norm": 0.6985318064689636, + "learning_rate": 5.8740681282161914e-05, + "loss": 2.4456, + "step": 12746 + }, + { + "epoch": 1.0287305302235494, + "grad_norm": 0.7102388739585876, + "learning_rate": 5.872630126811707e-05, + "loss": 2.4802, + "step": 12747 + }, + { + "epoch": 1.0288112339601323, + "grad_norm": 0.7917937636375427, + "learning_rate": 5.871192228272067e-05, + "loss": 2.4606, + "step": 12748 + }, + { + "epoch": 1.0288919376967154, + "grad_norm": 0.683397114276886, + "learning_rate": 5.86975443263311e-05, + "loss": 2.5011, + "step": 12749 + }, + { + "epoch": 1.0289726414332985, + "grad_norm": 0.7543408870697021, + "learning_rate": 5.8683167399306724e-05, + "loss": 2.4705, + "step": 12750 + }, + { + "epoch": 1.0290533451698813, + "grad_norm": 0.6946283578872681, + "learning_rate": 5.866879150200579e-05, + "loss": 2.4986, + "step": 12751 + }, + { + "epoch": 1.0291340489064644, + "grad_norm": 0.6535125374794006, + "learning_rate": 5.8654416634786605e-05, + "loss": 2.4203, + "step": 12752 + }, + { + "epoch": 1.0292147526430473, + "grad_norm": 0.7470195889472961, + "learning_rate": 5.8640042798007455e-05, + "loss": 2.5103, + "step": 12753 + }, + { + "epoch": 1.0292954563796304, + "grad_norm": 0.6782363653182983, + "learning_rate": 5.8625669992026535e-05, + "loss": 2.4087, + "step": 12754 + }, + { + "epoch": 1.0293761601162135, + "grad_norm": 0.7601497173309326, + "learning_rate": 5.861129821720207e-05, + "loss": 2.4752, + "step": 12755 + }, + { + "epoch": 1.0294568638527963, + "grad_norm": 0.6875388026237488, + "learning_rate": 5.859692747389227e-05, + "loss": 2.448, + "step": 12756 + }, + { + "epoch": 1.0295375675893794, + "grad_norm": 0.7153629064559937, + "learning_rate": 5.858255776245525e-05, + "loss": 2.4641, + "step": 12757 + }, + { + "epoch": 1.0296182713259623, + "grad_norm": 0.682954728603363, + "learning_rate": 5.8568189083249145e-05, + "loss": 2.441, + "step": 12758 + }, + { + "epoch": 1.0296989750625454, + "grad_norm": 0.6959100961685181, + "learning_rate": 5.855382143663209e-05, + "loss": 2.4316, + "step": 12759 + }, + { + "epoch": 1.0297796787991285, + "grad_norm": 0.7062023878097534, + "learning_rate": 5.8539454822962167e-05, + "loss": 2.4287, + "step": 12760 + }, + { + "epoch": 1.0298603825357113, + "grad_norm": 0.706523597240448, + "learning_rate": 5.852508924259736e-05, + "loss": 2.4596, + "step": 12761 + }, + { + "epoch": 1.0299410862722944, + "grad_norm": 0.6908385753631592, + "learning_rate": 5.851072469589578e-05, + "loss": 2.4428, + "step": 12762 + }, + { + "epoch": 1.0300217900088775, + "grad_norm": 0.6810726523399353, + "learning_rate": 5.8496361183215386e-05, + "loss": 2.4902, + "step": 12763 + }, + { + "epoch": 1.0301024937454604, + "grad_norm": 0.661613941192627, + "learning_rate": 5.8481998704914156e-05, + "loss": 2.4256, + "step": 12764 + }, + { + "epoch": 1.0301831974820435, + "grad_norm": 0.6633132100105286, + "learning_rate": 5.846763726135005e-05, + "loss": 2.4512, + "step": 12765 + }, + { + "epoch": 1.0302639012186263, + "grad_norm": 0.6991820335388184, + "learning_rate": 5.8453276852881025e-05, + "loss": 2.3747, + "step": 12766 + }, + { + "epoch": 1.0303446049552094, + "grad_norm": 0.7392076253890991, + "learning_rate": 5.843891747986487e-05, + "loss": 2.438, + "step": 12767 + }, + { + "epoch": 1.0304253086917925, + "grad_norm": 0.6371724605560303, + "learning_rate": 5.842455914265958e-05, + "loss": 2.4627, + "step": 12768 + }, + { + "epoch": 1.0305060124283754, + "grad_norm": 0.6475048661231995, + "learning_rate": 5.841020184162298e-05, + "loss": 2.4883, + "step": 12769 + }, + { + "epoch": 1.0305867161649584, + "grad_norm": 0.6848995685577393, + "learning_rate": 5.839584557711283e-05, + "loss": 2.4452, + "step": 12770 + }, + { + "epoch": 1.0306674199015415, + "grad_norm": 0.7345505952835083, + "learning_rate": 5.838149034948697e-05, + "loss": 2.5121, + "step": 12771 + }, + { + "epoch": 1.0307481236381244, + "grad_norm": 0.715373158454895, + "learning_rate": 5.836713615910318e-05, + "loss": 2.4549, + "step": 12772 + }, + { + "epoch": 1.0308288273747075, + "grad_norm": 0.7371035814285278, + "learning_rate": 5.8352783006319166e-05, + "loss": 2.4633, + "step": 12773 + }, + { + "epoch": 1.0309095311112904, + "grad_norm": 0.6843077540397644, + "learning_rate": 5.833843089149267e-05, + "loss": 2.4067, + "step": 12774 + }, + { + "epoch": 1.0309902348478734, + "grad_norm": 0.7398965954780579, + "learning_rate": 5.832407981498136e-05, + "loss": 2.5199, + "step": 12775 + }, + { + "epoch": 1.0310709385844565, + "grad_norm": 0.6860283017158508, + "learning_rate": 5.830972977714294e-05, + "loss": 2.4564, + "step": 12776 + }, + { + "epoch": 1.0311516423210394, + "grad_norm": 0.683893084526062, + "learning_rate": 5.829538077833503e-05, + "loss": 2.4635, + "step": 12777 + }, + { + "epoch": 1.0312323460576225, + "grad_norm": 0.6412089467048645, + "learning_rate": 5.828103281891525e-05, + "loss": 2.4806, + "step": 12778 + }, + { + "epoch": 1.0313130497942056, + "grad_norm": 0.646393895149231, + "learning_rate": 5.826668589924123e-05, + "loss": 2.4674, + "step": 12779 + }, + { + "epoch": 1.0313937535307884, + "grad_norm": 0.6805605292320251, + "learning_rate": 5.825234001967044e-05, + "loss": 2.5145, + "step": 12780 + }, + { + "epoch": 1.0314744572673715, + "grad_norm": 0.681532084941864, + "learning_rate": 5.8237995180560455e-05, + "loss": 2.5041, + "step": 12781 + }, + { + "epoch": 1.0315551610039544, + "grad_norm": 0.6971312165260315, + "learning_rate": 5.8223651382268865e-05, + "loss": 2.5324, + "step": 12782 + }, + { + "epoch": 1.0316358647405375, + "grad_norm": 0.6634463667869568, + "learning_rate": 5.8209308625153026e-05, + "loss": 2.5086, + "step": 12783 + }, + { + "epoch": 1.0317165684771206, + "grad_norm": 0.6752117276191711, + "learning_rate": 5.819496690957047e-05, + "loss": 2.4805, + "step": 12784 + }, + { + "epoch": 1.0317972722137034, + "grad_norm": 0.7242109775543213, + "learning_rate": 5.818062623587861e-05, + "loss": 2.4205, + "step": 12785 + }, + { + "epoch": 1.0318779759502865, + "grad_norm": 0.7338563203811646, + "learning_rate": 5.816628660443486e-05, + "loss": 2.4277, + "step": 12786 + }, + { + "epoch": 1.0319586796868696, + "grad_norm": 0.6764293313026428, + "learning_rate": 5.81519480155966e-05, + "loss": 2.5096, + "step": 12787 + }, + { + "epoch": 1.0320393834234525, + "grad_norm": 0.6757099032402039, + "learning_rate": 5.813761046972124e-05, + "loss": 2.468, + "step": 12788 + }, + { + "epoch": 1.0321200871600356, + "grad_norm": 0.7072502374649048, + "learning_rate": 5.8123273967166017e-05, + "loss": 2.4642, + "step": 12789 + }, + { + "epoch": 1.0322007908966184, + "grad_norm": 0.6470256447792053, + "learning_rate": 5.810893850828827e-05, + "loss": 2.4146, + "step": 12790 + }, + { + "epoch": 1.0322814946332015, + "grad_norm": 0.7403351068496704, + "learning_rate": 5.809460409344527e-05, + "loss": 2.512, + "step": 12791 + }, + { + "epoch": 1.0323621983697846, + "grad_norm": 0.6711490154266357, + "learning_rate": 5.808027072299432e-05, + "loss": 2.4602, + "step": 12792 + }, + { + "epoch": 1.0324429021063675, + "grad_norm": 0.7920248508453369, + "learning_rate": 5.806593839729258e-05, + "loss": 2.4512, + "step": 12793 + }, + { + "epoch": 1.0325236058429506, + "grad_norm": 0.6442045569419861, + "learning_rate": 5.805160711669725e-05, + "loss": 2.4165, + "step": 12794 + }, + { + "epoch": 1.0326043095795336, + "grad_norm": 0.6681340932846069, + "learning_rate": 5.803727688156553e-05, + "loss": 2.4296, + "step": 12795 + }, + { + "epoch": 1.0326850133161165, + "grad_norm": 0.6653337478637695, + "learning_rate": 5.802294769225457e-05, + "loss": 2.5165, + "step": 12796 + }, + { + "epoch": 1.0327657170526996, + "grad_norm": 0.6444782018661499, + "learning_rate": 5.8008619549121476e-05, + "loss": 2.4266, + "step": 12797 + }, + { + "epoch": 1.0328464207892825, + "grad_norm": 0.6741451621055603, + "learning_rate": 5.7994292452523394e-05, + "loss": 2.4837, + "step": 12798 + }, + { + "epoch": 1.0329271245258655, + "grad_norm": 0.6629341840744019, + "learning_rate": 5.797996640281731e-05, + "loss": 2.4368, + "step": 12799 + }, + { + "epoch": 1.0330078282624486, + "grad_norm": 0.6755850315093994, + "learning_rate": 5.796564140036029e-05, + "loss": 2.4834, + "step": 12800 + }, + { + "epoch": 1.0330885319990315, + "grad_norm": 0.7271782755851746, + "learning_rate": 5.795131744550942e-05, + "loss": 2.5025, + "step": 12801 + }, + { + "epoch": 1.0331692357356146, + "grad_norm": 0.6870545744895935, + "learning_rate": 5.7936994538621605e-05, + "loss": 2.4443, + "step": 12802 + }, + { + "epoch": 1.0332499394721975, + "grad_norm": 0.7231935858726501, + "learning_rate": 5.792267268005382e-05, + "loss": 2.4917, + "step": 12803 + }, + { + "epoch": 1.0333306432087805, + "grad_norm": 0.6905832290649414, + "learning_rate": 5.790835187016307e-05, + "loss": 2.4902, + "step": 12804 + }, + { + "epoch": 1.0334113469453636, + "grad_norm": 0.711814284324646, + "learning_rate": 5.789403210930613e-05, + "loss": 2.4579, + "step": 12805 + }, + { + "epoch": 1.0334920506819465, + "grad_norm": 0.6982280015945435, + "learning_rate": 5.787971339784004e-05, + "loss": 2.5275, + "step": 12806 + }, + { + "epoch": 1.0335727544185296, + "grad_norm": 0.6871493458747864, + "learning_rate": 5.7865395736121575e-05, + "loss": 2.4401, + "step": 12807 + }, + { + "epoch": 1.0336534581551127, + "grad_norm": 0.6898353099822998, + "learning_rate": 5.785107912450763e-05, + "loss": 2.4005, + "step": 12808 + }, + { + "epoch": 1.0337341618916955, + "grad_norm": 0.6264411807060242, + "learning_rate": 5.7836763563354946e-05, + "loss": 2.4497, + "step": 12809 + }, + { + "epoch": 1.0338148656282786, + "grad_norm": 0.6997092962265015, + "learning_rate": 5.782244905302032e-05, + "loss": 2.4388, + "step": 12810 + }, + { + "epoch": 1.0338955693648615, + "grad_norm": 0.6834601759910583, + "learning_rate": 5.7808135593860555e-05, + "loss": 2.4298, + "step": 12811 + }, + { + "epoch": 1.0339762731014446, + "grad_norm": 0.664315402507782, + "learning_rate": 5.77938231862323e-05, + "loss": 2.4289, + "step": 12812 + }, + { + "epoch": 1.0340569768380277, + "grad_norm": 0.6660603284835815, + "learning_rate": 5.7779511830492306e-05, + "loss": 2.4772, + "step": 12813 + }, + { + "epoch": 1.0341376805746105, + "grad_norm": 0.6457028388977051, + "learning_rate": 5.776520152699728e-05, + "loss": 2.4408, + "step": 12814 + }, + { + "epoch": 1.0342183843111936, + "grad_norm": 0.7132207155227661, + "learning_rate": 5.7750892276103794e-05, + "loss": 2.4953, + "step": 12815 + }, + { + "epoch": 1.0342990880477767, + "grad_norm": 0.7397382259368896, + "learning_rate": 5.773658407816848e-05, + "loss": 2.4396, + "step": 12816 + }, + { + "epoch": 1.0343797917843596, + "grad_norm": 0.6951746344566345, + "learning_rate": 5.7722276933548034e-05, + "loss": 2.5021, + "step": 12817 + }, + { + "epoch": 1.0344604955209427, + "grad_norm": 0.6789736151695251, + "learning_rate": 5.7707970842598935e-05, + "loss": 2.4883, + "step": 12818 + }, + { + "epoch": 1.0345411992575255, + "grad_norm": 0.7231541872024536, + "learning_rate": 5.7693665805677747e-05, + "loss": 2.4761, + "step": 12819 + }, + { + "epoch": 1.0346219029941086, + "grad_norm": 0.685943603515625, + "learning_rate": 5.767936182314104e-05, + "loss": 2.4489, + "step": 12820 + }, + { + "epoch": 1.0347026067306917, + "grad_norm": 0.7081817984580994, + "learning_rate": 5.7665058895345236e-05, + "loss": 2.4329, + "step": 12821 + }, + { + "epoch": 1.0347833104672746, + "grad_norm": 0.6700818538665771, + "learning_rate": 5.7650757022646804e-05, + "loss": 2.4252, + "step": 12822 + }, + { + "epoch": 1.0348640142038577, + "grad_norm": 0.6712214946746826, + "learning_rate": 5.763645620540223e-05, + "loss": 2.419, + "step": 12823 + }, + { + "epoch": 1.0349447179404407, + "grad_norm": 0.6732817888259888, + "learning_rate": 5.762215644396793e-05, + "loss": 2.3928, + "step": 12824 + }, + { + "epoch": 1.0350254216770236, + "grad_norm": 0.6689301133155823, + "learning_rate": 5.760785773870024e-05, + "loss": 2.3981, + "step": 12825 + }, + { + "epoch": 1.0351061254136067, + "grad_norm": 0.6822957992553711, + "learning_rate": 5.759356008995556e-05, + "loss": 2.5265, + "step": 12826 + }, + { + "epoch": 1.0351868291501896, + "grad_norm": 0.7316287755966187, + "learning_rate": 5.7579263498090194e-05, + "loss": 2.4132, + "step": 12827 + }, + { + "epoch": 1.0352675328867726, + "grad_norm": 0.6688703894615173, + "learning_rate": 5.756496796346047e-05, + "loss": 2.4195, + "step": 12828 + }, + { + "epoch": 1.0353482366233557, + "grad_norm": 0.6894570589065552, + "learning_rate": 5.755067348642268e-05, + "loss": 2.4897, + "step": 12829 + }, + { + "epoch": 1.0354289403599386, + "grad_norm": 0.7635753750801086, + "learning_rate": 5.753638006733311e-05, + "loss": 2.4643, + "step": 12830 + }, + { + "epoch": 1.0355096440965217, + "grad_norm": 0.6353672742843628, + "learning_rate": 5.75220877065479e-05, + "loss": 2.4533, + "step": 12831 + }, + { + "epoch": 1.0355903478331048, + "grad_norm": 0.6725208759307861, + "learning_rate": 5.750779640442332e-05, + "loss": 2.4958, + "step": 12832 + }, + { + "epoch": 1.0356710515696876, + "grad_norm": 0.7350767254829407, + "learning_rate": 5.749350616131556e-05, + "loss": 2.4192, + "step": 12833 + }, + { + "epoch": 1.0357517553062707, + "grad_norm": 0.7322222590446472, + "learning_rate": 5.7479216977580695e-05, + "loss": 2.4719, + "step": 12834 + }, + { + "epoch": 1.0358324590428536, + "grad_norm": 0.7233425974845886, + "learning_rate": 5.7464928853574904e-05, + "loss": 2.4707, + "step": 12835 + }, + { + "epoch": 1.0359131627794367, + "grad_norm": 0.7117420434951782, + "learning_rate": 5.745064178965427e-05, + "loss": 2.4463, + "step": 12836 + }, + { + "epoch": 1.0359938665160198, + "grad_norm": 0.7615050077438354, + "learning_rate": 5.743635578617486e-05, + "loss": 2.4256, + "step": 12837 + }, + { + "epoch": 1.0360745702526026, + "grad_norm": 0.7056093215942383, + "learning_rate": 5.7422070843492734e-05, + "loss": 2.4628, + "step": 12838 + }, + { + "epoch": 1.0361552739891857, + "grad_norm": 0.685989499092102, + "learning_rate": 5.740778696196389e-05, + "loss": 2.4271, + "step": 12839 + }, + { + "epoch": 1.0362359777257688, + "grad_norm": 0.7286686301231384, + "learning_rate": 5.739350414194439e-05, + "loss": 2.4984, + "step": 12840 + }, + { + "epoch": 1.0363166814623517, + "grad_norm": 0.6939802765846252, + "learning_rate": 5.737922238379009e-05, + "loss": 2.4601, + "step": 12841 + }, + { + "epoch": 1.0363973851989348, + "grad_norm": 0.7077060341835022, + "learning_rate": 5.736494168785698e-05, + "loss": 2.4264, + "step": 12842 + }, + { + "epoch": 1.0364780889355176, + "grad_norm": 0.667086124420166, + "learning_rate": 5.7350662054501016e-05, + "loss": 2.4733, + "step": 12843 + }, + { + "epoch": 1.0365587926721007, + "grad_norm": 0.6531338691711426, + "learning_rate": 5.7336383484078004e-05, + "loss": 2.4709, + "step": 12844 + }, + { + "epoch": 1.0366394964086838, + "grad_norm": 0.7141630053520203, + "learning_rate": 5.732210597694383e-05, + "loss": 2.4747, + "step": 12845 + }, + { + "epoch": 1.0367202001452667, + "grad_norm": 0.7186396718025208, + "learning_rate": 5.730782953345435e-05, + "loss": 2.4401, + "step": 12846 + }, + { + "epoch": 1.0368009038818498, + "grad_norm": 0.6709686517715454, + "learning_rate": 5.7293554153965345e-05, + "loss": 2.456, + "step": 12847 + }, + { + "epoch": 1.0368816076184326, + "grad_norm": 0.6867267489433289, + "learning_rate": 5.727927983883261e-05, + "loss": 2.4522, + "step": 12848 + }, + { + "epoch": 1.0369623113550157, + "grad_norm": 0.7016724348068237, + "learning_rate": 5.7265006588411926e-05, + "loss": 2.4348, + "step": 12849 + }, + { + "epoch": 1.0370430150915988, + "grad_norm": 0.6764764785766602, + "learning_rate": 5.725073440305896e-05, + "loss": 2.4241, + "step": 12850 + }, + { + "epoch": 1.0371237188281817, + "grad_norm": 0.6965062618255615, + "learning_rate": 5.7236463283129435e-05, + "loss": 2.4559, + "step": 12851 + }, + { + "epoch": 1.0372044225647647, + "grad_norm": 0.6878135800361633, + "learning_rate": 5.7222193228979037e-05, + "loss": 2.4874, + "step": 12852 + }, + { + "epoch": 1.0372851263013478, + "grad_norm": 0.6576557755470276, + "learning_rate": 5.720792424096344e-05, + "loss": 2.4273, + "step": 12853 + }, + { + "epoch": 1.0373658300379307, + "grad_norm": 0.7463123798370361, + "learning_rate": 5.719365631943818e-05, + "loss": 2.4933, + "step": 12854 + }, + { + "epoch": 1.0374465337745138, + "grad_norm": 0.6920896768569946, + "learning_rate": 5.7179389464758914e-05, + "loss": 2.4799, + "step": 12855 + }, + { + "epoch": 1.0375272375110969, + "grad_norm": 0.7330591082572937, + "learning_rate": 5.71651236772812e-05, + "loss": 2.469, + "step": 12856 + }, + { + "epoch": 1.0376079412476797, + "grad_norm": 0.6766076683998108, + "learning_rate": 5.715085895736057e-05, + "loss": 2.4787, + "step": 12857 + }, + { + "epoch": 1.0376886449842628, + "grad_norm": 0.724278450012207, + "learning_rate": 5.713659530535255e-05, + "loss": 2.4524, + "step": 12858 + }, + { + "epoch": 1.0377693487208457, + "grad_norm": 0.6816281676292419, + "learning_rate": 5.712233272161265e-05, + "loss": 2.4993, + "step": 12859 + }, + { + "epoch": 1.0378500524574288, + "grad_norm": 0.7186439633369446, + "learning_rate": 5.710807120649626e-05, + "loss": 2.4108, + "step": 12860 + }, + { + "epoch": 1.0379307561940119, + "grad_norm": 0.6616777181625366, + "learning_rate": 5.709381076035887e-05, + "loss": 2.4797, + "step": 12861 + }, + { + "epoch": 1.0380114599305947, + "grad_norm": 0.6956895589828491, + "learning_rate": 5.7079551383555906e-05, + "loss": 2.4017, + "step": 12862 + }, + { + "epoch": 1.0380921636671778, + "grad_norm": 0.6650584936141968, + "learning_rate": 5.706529307644268e-05, + "loss": 2.4808, + "step": 12863 + }, + { + "epoch": 1.0381728674037607, + "grad_norm": 0.6362698674201965, + "learning_rate": 5.705103583937458e-05, + "loss": 2.4077, + "step": 12864 + }, + { + "epoch": 1.0382535711403438, + "grad_norm": 0.6962565183639526, + "learning_rate": 5.703677967270697e-05, + "loss": 2.4715, + "step": 12865 + }, + { + "epoch": 1.0383342748769269, + "grad_norm": 0.6927294135093689, + "learning_rate": 5.702252457679509e-05, + "loss": 2.4983, + "step": 12866 + }, + { + "epoch": 1.0384149786135097, + "grad_norm": 0.7107497453689575, + "learning_rate": 5.70082705519942e-05, + "loss": 2.4198, + "step": 12867 + }, + { + "epoch": 1.0384956823500928, + "grad_norm": 0.6459221243858337, + "learning_rate": 5.6994017598659634e-05, + "loss": 2.4423, + "step": 12868 + }, + { + "epoch": 1.038576386086676, + "grad_norm": 0.705563485622406, + "learning_rate": 5.697976571714658e-05, + "loss": 2.5346, + "step": 12869 + }, + { + "epoch": 1.0386570898232588, + "grad_norm": 0.7424784898757935, + "learning_rate": 5.696551490781021e-05, + "loss": 2.4824, + "step": 12870 + }, + { + "epoch": 1.0387377935598419, + "grad_norm": 0.6820988059043884, + "learning_rate": 5.695126517100569e-05, + "loss": 2.4965, + "step": 12871 + }, + { + "epoch": 1.0388184972964247, + "grad_norm": 0.8209595680236816, + "learning_rate": 5.6937016507088225e-05, + "loss": 2.475, + "step": 12872 + }, + { + "epoch": 1.0388992010330078, + "grad_norm": 0.7407695055007935, + "learning_rate": 5.6922768916412815e-05, + "loss": 2.4683, + "step": 12873 + }, + { + "epoch": 1.038979904769591, + "grad_norm": 0.7335677742958069, + "learning_rate": 5.690852239933462e-05, + "loss": 2.4621, + "step": 12874 + }, + { + "epoch": 1.0390606085061738, + "grad_norm": 0.6731325387954712, + "learning_rate": 5.689427695620873e-05, + "loss": 2.4882, + "step": 12875 + }, + { + "epoch": 1.0391413122427569, + "grad_norm": 0.7256175875663757, + "learning_rate": 5.68800325873901e-05, + "loss": 2.4827, + "step": 12876 + }, + { + "epoch": 1.03922201597934, + "grad_norm": 0.711928129196167, + "learning_rate": 5.686578929323377e-05, + "loss": 2.4447, + "step": 12877 + }, + { + "epoch": 1.0393027197159228, + "grad_norm": 0.6445996165275574, + "learning_rate": 5.685154707409473e-05, + "loss": 2.453, + "step": 12878 + }, + { + "epoch": 1.039383423452506, + "grad_norm": 0.6656066179275513, + "learning_rate": 5.6837305930327923e-05, + "loss": 2.4863, + "step": 12879 + }, + { + "epoch": 1.0394641271890888, + "grad_norm": 0.6844663619995117, + "learning_rate": 5.682306586228828e-05, + "loss": 2.4524, + "step": 12880 + }, + { + "epoch": 1.0395448309256718, + "grad_norm": 0.6436383724212646, + "learning_rate": 5.6808826870330746e-05, + "loss": 2.4137, + "step": 12881 + }, + { + "epoch": 1.039625534662255, + "grad_norm": 0.6731196641921997, + "learning_rate": 5.6794588954810104e-05, + "loss": 2.4176, + "step": 12882 + }, + { + "epoch": 1.0397062383988378, + "grad_norm": 0.6994587779045105, + "learning_rate": 5.678035211608125e-05, + "loss": 2.4651, + "step": 12883 + }, + { + "epoch": 1.0397869421354209, + "grad_norm": 0.6912599205970764, + "learning_rate": 5.6766116354499e-05, + "loss": 2.3918, + "step": 12884 + }, + { + "epoch": 1.039867645872004, + "grad_norm": 0.7627033591270447, + "learning_rate": 5.6751881670418185e-05, + "loss": 2.4278, + "step": 12885 + }, + { + "epoch": 1.0399483496085868, + "grad_norm": 0.7107213139533997, + "learning_rate": 5.6737648064193485e-05, + "loss": 2.5249, + "step": 12886 + }, + { + "epoch": 1.04002905334517, + "grad_norm": 0.7254211902618408, + "learning_rate": 5.672341553617968e-05, + "loss": 2.4454, + "step": 12887 + }, + { + "epoch": 1.0401097570817528, + "grad_norm": 0.6776205897331238, + "learning_rate": 5.670918408673149e-05, + "loss": 2.4333, + "step": 12888 + }, + { + "epoch": 1.0401904608183359, + "grad_norm": 0.6824465394020081, + "learning_rate": 5.669495371620359e-05, + "loss": 2.427, + "step": 12889 + }, + { + "epoch": 1.040271164554919, + "grad_norm": 0.6633001565933228, + "learning_rate": 5.668072442495066e-05, + "loss": 2.4874, + "step": 12890 + }, + { + "epoch": 1.0403518682915018, + "grad_norm": 0.6655289530754089, + "learning_rate": 5.666649621332735e-05, + "loss": 2.5023, + "step": 12891 + }, + { + "epoch": 1.040432572028085, + "grad_norm": 0.6892853379249573, + "learning_rate": 5.665226908168818e-05, + "loss": 2.4505, + "step": 12892 + }, + { + "epoch": 1.040513275764668, + "grad_norm": 0.7154649496078491, + "learning_rate": 5.6638043030387774e-05, + "loss": 2.4916, + "step": 12893 + }, + { + "epoch": 1.0405939795012509, + "grad_norm": 0.6780592799186707, + "learning_rate": 5.662381805978074e-05, + "loss": 2.4116, + "step": 12894 + }, + { + "epoch": 1.040674683237834, + "grad_norm": 0.6737352013587952, + "learning_rate": 5.66095941702215e-05, + "loss": 2.3903, + "step": 12895 + }, + { + "epoch": 1.0407553869744168, + "grad_norm": 0.7623820304870605, + "learning_rate": 5.659537136206461e-05, + "loss": 2.4334, + "step": 12896 + }, + { + "epoch": 1.040836090711, + "grad_norm": 0.7043081521987915, + "learning_rate": 5.65811496356645e-05, + "loss": 2.4403, + "step": 12897 + }, + { + "epoch": 1.040916794447583, + "grad_norm": 0.6704873442649841, + "learning_rate": 5.6566928991375654e-05, + "loss": 2.4416, + "step": 12898 + }, + { + "epoch": 1.0409974981841659, + "grad_norm": 0.6556837558746338, + "learning_rate": 5.6552709429552474e-05, + "loss": 2.4904, + "step": 12899 + }, + { + "epoch": 1.041078201920749, + "grad_norm": 0.6926451325416565, + "learning_rate": 5.653849095054935e-05, + "loss": 2.4889, + "step": 12900 + }, + { + "epoch": 1.041158905657332, + "grad_norm": 0.6407613158226013, + "learning_rate": 5.6524273554720674e-05, + "loss": 2.3951, + "step": 12901 + }, + { + "epoch": 1.041239609393915, + "grad_norm": 0.7812615633010864, + "learning_rate": 5.651005724242071e-05, + "loss": 2.4535, + "step": 12902 + }, + { + "epoch": 1.041320313130498, + "grad_norm": 0.6868990659713745, + "learning_rate": 5.6495842014003796e-05, + "loss": 2.4373, + "step": 12903 + }, + { + "epoch": 1.0414010168670809, + "grad_norm": 0.6467776894569397, + "learning_rate": 5.648162786982427e-05, + "loss": 2.4929, + "step": 12904 + }, + { + "epoch": 1.041481720603664, + "grad_norm": 0.6588063836097717, + "learning_rate": 5.64674148102363e-05, + "loss": 2.4445, + "step": 12905 + }, + { + "epoch": 1.041562424340247, + "grad_norm": 0.6880654096603394, + "learning_rate": 5.6453202835594136e-05, + "loss": 2.4298, + "step": 12906 + }, + { + "epoch": 1.04164312807683, + "grad_norm": 0.7471407055854797, + "learning_rate": 5.6438991946251996e-05, + "loss": 2.4669, + "step": 12907 + }, + { + "epoch": 1.041723831813413, + "grad_norm": 0.7069533467292786, + "learning_rate": 5.6424782142564034e-05, + "loss": 2.4498, + "step": 12908 + }, + { + "epoch": 1.0418045355499959, + "grad_norm": 0.7013602256774902, + "learning_rate": 5.641057342488443e-05, + "loss": 2.4993, + "step": 12909 + }, + { + "epoch": 1.041885239286579, + "grad_norm": 0.6870697736740112, + "learning_rate": 5.6396365793567305e-05, + "loss": 2.5338, + "step": 12910 + }, + { + "epoch": 1.041965943023162, + "grad_norm": 0.6569130420684814, + "learning_rate": 5.638215924896669e-05, + "loss": 2.4538, + "step": 12911 + }, + { + "epoch": 1.042046646759745, + "grad_norm": 0.6900331377983093, + "learning_rate": 5.636795379143669e-05, + "loss": 2.4013, + "step": 12912 + }, + { + "epoch": 1.042127350496328, + "grad_norm": 0.6800071001052856, + "learning_rate": 5.635374942133136e-05, + "loss": 2.4733, + "step": 12913 + }, + { + "epoch": 1.042208054232911, + "grad_norm": 0.703601598739624, + "learning_rate": 5.6339546139004663e-05, + "loss": 2.432, + "step": 12914 + }, + { + "epoch": 1.042288757969494, + "grad_norm": 0.6781988739967346, + "learning_rate": 5.6325343944810594e-05, + "loss": 2.4418, + "step": 12915 + }, + { + "epoch": 1.042369461706077, + "grad_norm": 0.7247167825698853, + "learning_rate": 5.6311142839103125e-05, + "loss": 2.5133, + "step": 12916 + }, + { + "epoch": 1.04245016544266, + "grad_norm": 0.7738155126571655, + "learning_rate": 5.629694282223619e-05, + "loss": 2.5137, + "step": 12917 + }, + { + "epoch": 1.042530869179243, + "grad_norm": 0.74723219871521, + "learning_rate": 5.628274389456367e-05, + "loss": 2.3996, + "step": 12918 + }, + { + "epoch": 1.042611572915826, + "grad_norm": 0.7245466709136963, + "learning_rate": 5.6268546056439456e-05, + "loss": 2.4213, + "step": 12919 + }, + { + "epoch": 1.042692276652409, + "grad_norm": 0.6307608485221863, + "learning_rate": 5.625434930821742e-05, + "loss": 2.4195, + "step": 12920 + }, + { + "epoch": 1.042772980388992, + "grad_norm": 0.7138007879257202, + "learning_rate": 5.6240153650251326e-05, + "loss": 2.463, + "step": 12921 + }, + { + "epoch": 1.042853684125575, + "grad_norm": 0.779659628868103, + "learning_rate": 5.622595908289498e-05, + "loss": 2.4898, + "step": 12922 + }, + { + "epoch": 1.042934387862158, + "grad_norm": 0.7144278287887573, + "learning_rate": 5.621176560650221e-05, + "loss": 2.4083, + "step": 12923 + }, + { + "epoch": 1.043015091598741, + "grad_norm": 0.7724754214286804, + "learning_rate": 5.619757322142667e-05, + "loss": 2.3917, + "step": 12924 + }, + { + "epoch": 1.043095795335324, + "grad_norm": 0.7667245268821716, + "learning_rate": 5.618338192802208e-05, + "loss": 2.4943, + "step": 12925 + }, + { + "epoch": 1.043176499071907, + "grad_norm": 0.6528030037879944, + "learning_rate": 5.616919172664221e-05, + "loss": 2.4323, + "step": 12926 + }, + { + "epoch": 1.04325720280849, + "grad_norm": 0.6790263652801514, + "learning_rate": 5.6155002617640615e-05, + "loss": 2.4304, + "step": 12927 + }, + { + "epoch": 1.043337906545073, + "grad_norm": 0.7554369568824768, + "learning_rate": 5.614081460137097e-05, + "loss": 2.4637, + "step": 12928 + }, + { + "epoch": 1.043418610281656, + "grad_norm": 0.7126293182373047, + "learning_rate": 5.612662767818686e-05, + "loss": 2.4765, + "step": 12929 + }, + { + "epoch": 1.0434993140182391, + "grad_norm": 0.6705749034881592, + "learning_rate": 5.611244184844189e-05, + "loss": 2.4746, + "step": 12930 + }, + { + "epoch": 1.043580017754822, + "grad_norm": 0.6595145463943481, + "learning_rate": 5.609825711248958e-05, + "loss": 2.463, + "step": 12931 + }, + { + "epoch": 1.043660721491405, + "grad_norm": 0.6942049860954285, + "learning_rate": 5.6084073470683476e-05, + "loss": 2.5101, + "step": 12932 + }, + { + "epoch": 1.043741425227988, + "grad_norm": 0.7285810708999634, + "learning_rate": 5.6069890923377087e-05, + "loss": 2.467, + "step": 12933 + }, + { + "epoch": 1.043822128964571, + "grad_norm": 0.7702928185462952, + "learning_rate": 5.605570947092382e-05, + "loss": 2.4998, + "step": 12934 + }, + { + "epoch": 1.0439028327011541, + "grad_norm": 0.6631895899772644, + "learning_rate": 5.604152911367713e-05, + "loss": 2.4277, + "step": 12935 + }, + { + "epoch": 1.043983536437737, + "grad_norm": 0.6447882652282715, + "learning_rate": 5.6027349851990494e-05, + "loss": 2.4868, + "step": 12936 + }, + { + "epoch": 1.04406424017432, + "grad_norm": 0.695160448551178, + "learning_rate": 5.6013171686217205e-05, + "loss": 2.3917, + "step": 12937 + }, + { + "epoch": 1.0441449439109032, + "grad_norm": 0.6579271554946899, + "learning_rate": 5.5998994616710656e-05, + "loss": 2.4245, + "step": 12938 + }, + { + "epoch": 1.044225647647486, + "grad_norm": 0.7053574323654175, + "learning_rate": 5.598481864382419e-05, + "loss": 2.4809, + "step": 12939 + }, + { + "epoch": 1.0443063513840691, + "grad_norm": 0.7008736729621887, + "learning_rate": 5.5970643767911105e-05, + "loss": 2.4481, + "step": 12940 + }, + { + "epoch": 1.044387055120652, + "grad_norm": 0.6577918529510498, + "learning_rate": 5.5956469989324644e-05, + "loss": 2.4211, + "step": 12941 + }, + { + "epoch": 1.044467758857235, + "grad_norm": 0.6662739515304565, + "learning_rate": 5.594229730841815e-05, + "loss": 2.4607, + "step": 12942 + }, + { + "epoch": 1.0445484625938182, + "grad_norm": 0.6637060046195984, + "learning_rate": 5.592812572554471e-05, + "loss": 2.4388, + "step": 12943 + }, + { + "epoch": 1.044629166330401, + "grad_norm": 0.7282097935676575, + "learning_rate": 5.5913955241057605e-05, + "loss": 2.4536, + "step": 12944 + }, + { + "epoch": 1.0447098700669841, + "grad_norm": 0.6470810174942017, + "learning_rate": 5.589978585530997e-05, + "loss": 2.4032, + "step": 12945 + }, + { + "epoch": 1.0447905738035672, + "grad_norm": 0.6958881616592407, + "learning_rate": 5.588561756865498e-05, + "loss": 2.4577, + "step": 12946 + }, + { + "epoch": 1.04487127754015, + "grad_norm": 0.6999812722206116, + "learning_rate": 5.587145038144569e-05, + "loss": 2.454, + "step": 12947 + }, + { + "epoch": 1.0449519812767332, + "grad_norm": 0.6919988989830017, + "learning_rate": 5.58572842940352e-05, + "loss": 2.4505, + "step": 12948 + }, + { + "epoch": 1.045032685013316, + "grad_norm": 0.6813084483146667, + "learning_rate": 5.584311930677659e-05, + "loss": 2.4873, + "step": 12949 + }, + { + "epoch": 1.0451133887498991, + "grad_norm": 0.6587427854537964, + "learning_rate": 5.582895542002286e-05, + "loss": 2.4658, + "step": 12950 + }, + { + "epoch": 1.0451940924864822, + "grad_norm": 0.6942041516304016, + "learning_rate": 5.581479263412703e-05, + "loss": 2.47, + "step": 12951 + }, + { + "epoch": 1.045274796223065, + "grad_norm": 0.7330117225646973, + "learning_rate": 5.58006309494421e-05, + "loss": 2.4826, + "step": 12952 + }, + { + "epoch": 1.0453554999596482, + "grad_norm": 0.7197144031524658, + "learning_rate": 5.578647036632096e-05, + "loss": 2.4425, + "step": 12953 + }, + { + "epoch": 1.045436203696231, + "grad_norm": 0.7442573308944702, + "learning_rate": 5.577231088511654e-05, + "loss": 2.4946, + "step": 12954 + }, + { + "epoch": 1.0455169074328141, + "grad_norm": 0.7039753198623657, + "learning_rate": 5.575815250618179e-05, + "loss": 2.4188, + "step": 12955 + }, + { + "epoch": 1.0455976111693972, + "grad_norm": 0.7374606728553772, + "learning_rate": 5.574399522986951e-05, + "loss": 2.3916, + "step": 12956 + }, + { + "epoch": 1.04567831490598, + "grad_norm": 0.6358140707015991, + "learning_rate": 5.572983905653253e-05, + "loss": 2.4502, + "step": 12957 + }, + { + "epoch": 1.0457590186425632, + "grad_norm": 0.712858259677887, + "learning_rate": 5.5715683986523694e-05, + "loss": 2.4746, + "step": 12958 + }, + { + "epoch": 1.0458397223791462, + "grad_norm": 0.6757933497428894, + "learning_rate": 5.5701530020195756e-05, + "loss": 2.4836, + "step": 12959 + }, + { + "epoch": 1.045920426115729, + "grad_norm": 0.7509831786155701, + "learning_rate": 5.568737715790151e-05, + "loss": 2.4061, + "step": 12960 + }, + { + "epoch": 1.0460011298523122, + "grad_norm": 0.7120335102081299, + "learning_rate": 5.5673225399993646e-05, + "loss": 2.4772, + "step": 12961 + }, + { + "epoch": 1.046081833588895, + "grad_norm": 0.7213751673698425, + "learning_rate": 5.5659074746824924e-05, + "loss": 2.4637, + "step": 12962 + }, + { + "epoch": 1.0461625373254781, + "grad_norm": 0.7161290645599365, + "learning_rate": 5.5644925198747934e-05, + "loss": 2.4552, + "step": 12963 + }, + { + "epoch": 1.0462432410620612, + "grad_norm": 0.7303922772407532, + "learning_rate": 5.563077675611534e-05, + "loss": 2.5091, + "step": 12964 + }, + { + "epoch": 1.046323944798644, + "grad_norm": 0.7051636576652527, + "learning_rate": 5.561662941927981e-05, + "loss": 2.3717, + "step": 12965 + }, + { + "epoch": 1.0464046485352272, + "grad_norm": 0.6880733370780945, + "learning_rate": 5.5602483188593866e-05, + "loss": 2.4205, + "step": 12966 + }, + { + "epoch": 1.0464853522718103, + "grad_norm": 0.6942360401153564, + "learning_rate": 5.558833806441008e-05, + "loss": 2.4601, + "step": 12967 + }, + { + "epoch": 1.0465660560083931, + "grad_norm": 0.7264992594718933, + "learning_rate": 5.5574194047081016e-05, + "loss": 2.4612, + "step": 12968 + }, + { + "epoch": 1.0466467597449762, + "grad_norm": 0.7502472996711731, + "learning_rate": 5.5560051136959166e-05, + "loss": 2.4099, + "step": 12969 + }, + { + "epoch": 1.046727463481559, + "grad_norm": 0.691694438457489, + "learning_rate": 5.5545909334397004e-05, + "loss": 2.5071, + "step": 12970 + }, + { + "epoch": 1.0468081672181422, + "grad_norm": 0.7120653986930847, + "learning_rate": 5.5531768639747026e-05, + "loss": 2.4066, + "step": 12971 + }, + { + "epoch": 1.0468888709547253, + "grad_norm": 0.6501363515853882, + "learning_rate": 5.551762905336159e-05, + "loss": 2.4186, + "step": 12972 + }, + { + "epoch": 1.0469695746913081, + "grad_norm": 0.6924965977668762, + "learning_rate": 5.5503490575593095e-05, + "loss": 2.4864, + "step": 12973 + }, + { + "epoch": 1.0470502784278912, + "grad_norm": 0.6772900819778442, + "learning_rate": 5.548935320679398e-05, + "loss": 2.4101, + "step": 12974 + }, + { + "epoch": 1.0471309821644743, + "grad_norm": 0.6950967311859131, + "learning_rate": 5.54752169473165e-05, + "loss": 2.4893, + "step": 12975 + }, + { + "epoch": 1.0472116859010572, + "grad_norm": 0.6663516163825989, + "learning_rate": 5.5461081797512994e-05, + "loss": 2.4136, + "step": 12976 + }, + { + "epoch": 1.0472923896376403, + "grad_norm": 0.7337449789047241, + "learning_rate": 5.5446947757735754e-05, + "loss": 2.473, + "step": 12977 + }, + { + "epoch": 1.0473730933742231, + "grad_norm": 0.6808840036392212, + "learning_rate": 5.543281482833709e-05, + "loss": 2.4473, + "step": 12978 + }, + { + "epoch": 1.0474537971108062, + "grad_norm": 0.6472508907318115, + "learning_rate": 5.5418683009669124e-05, + "loss": 2.4077, + "step": 12979 + }, + { + "epoch": 1.0475345008473893, + "grad_norm": 0.6904192566871643, + "learning_rate": 5.540455230208409e-05, + "loss": 2.482, + "step": 12980 + }, + { + "epoch": 1.0476152045839722, + "grad_norm": 0.6781610250473022, + "learning_rate": 5.5390422705934264e-05, + "loss": 2.4458, + "step": 12981 + }, + { + "epoch": 1.0476959083205553, + "grad_norm": 0.7130050659179688, + "learning_rate": 5.5376294221571666e-05, + "loss": 2.5136, + "step": 12982 + }, + { + "epoch": 1.0477766120571383, + "grad_norm": 0.7727184891700745, + "learning_rate": 5.536216684934846e-05, + "loss": 2.5346, + "step": 12983 + }, + { + "epoch": 1.0478573157937212, + "grad_norm": 0.7177208662033081, + "learning_rate": 5.534804058961679e-05, + "loss": 2.4153, + "step": 12984 + }, + { + "epoch": 1.0479380195303043, + "grad_norm": 0.7333023548126221, + "learning_rate": 5.5333915442728634e-05, + "loss": 2.4171, + "step": 12985 + }, + { + "epoch": 1.0480187232668872, + "grad_norm": 0.658423125743866, + "learning_rate": 5.5319791409036046e-05, + "loss": 2.446, + "step": 12986 + }, + { + "epoch": 1.0480994270034703, + "grad_norm": 0.8305184841156006, + "learning_rate": 5.5305668488891114e-05, + "loss": 2.5026, + "step": 12987 + }, + { + "epoch": 1.0481801307400533, + "grad_norm": 0.7083305716514587, + "learning_rate": 5.52915466826457e-05, + "loss": 2.5366, + "step": 12988 + }, + { + "epoch": 1.0482608344766362, + "grad_norm": 0.7924454212188721, + "learning_rate": 5.5277425990651824e-05, + "loss": 2.528, + "step": 12989 + }, + { + "epoch": 1.0483415382132193, + "grad_norm": 0.633376955986023, + "learning_rate": 5.5263306413261384e-05, + "loss": 2.4442, + "step": 12990 + }, + { + "epoch": 1.0484222419498024, + "grad_norm": 0.7387240529060364, + "learning_rate": 5.5249187950826295e-05, + "loss": 2.4761, + "step": 12991 + }, + { + "epoch": 1.0485029456863852, + "grad_norm": 0.6796224117279053, + "learning_rate": 5.523507060369843e-05, + "loss": 2.4828, + "step": 12992 + }, + { + "epoch": 1.0485836494229683, + "grad_norm": 0.6925581097602844, + "learning_rate": 5.5220954372229604e-05, + "loss": 2.4861, + "step": 12993 + }, + { + "epoch": 1.0486643531595512, + "grad_norm": 0.6854318380355835, + "learning_rate": 5.5206839256771704e-05, + "loss": 2.473, + "step": 12994 + }, + { + "epoch": 1.0487450568961343, + "grad_norm": 0.706375241279602, + "learning_rate": 5.519272525767643e-05, + "loss": 2.4284, + "step": 12995 + }, + { + "epoch": 1.0488257606327174, + "grad_norm": 0.6917428374290466, + "learning_rate": 5.517861237529556e-05, + "loss": 2.4702, + "step": 12996 + }, + { + "epoch": 1.0489064643693002, + "grad_norm": 0.6903818845748901, + "learning_rate": 5.516450060998086e-05, + "loss": 2.4679, + "step": 12997 + }, + { + "epoch": 1.0489871681058833, + "grad_norm": 0.6403356194496155, + "learning_rate": 5.515038996208398e-05, + "loss": 2.396, + "step": 12998 + }, + { + "epoch": 1.0490678718424662, + "grad_norm": 0.6491792798042297, + "learning_rate": 5.513628043195662e-05, + "loss": 2.4543, + "step": 12999 + }, + { + "epoch": 1.0491485755790493, + "grad_norm": 0.687303900718689, + "learning_rate": 5.512217201995043e-05, + "loss": 2.4716, + "step": 13000 + }, + { + "epoch": 1.0491485755790493, + "eval_loss": 2.4177169799804688, + "eval_runtime": 763.9215, + "eval_samples_per_second": 3.43, + "eval_steps_per_second": 0.572, + "step": 13000 + }, + { + "epoch": 1.0492292793156324, + "grad_norm": 0.7020761370658875, + "learning_rate": 5.510806472641701e-05, + "loss": 2.3591, + "step": 13001 + }, + { + "epoch": 1.0493099830522152, + "grad_norm": 0.6978075504302979, + "learning_rate": 5.509395855170798e-05, + "loss": 2.4585, + "step": 13002 + }, + { + "epoch": 1.0493906867887983, + "grad_norm": 0.7327752113342285, + "learning_rate": 5.5079853496174925e-05, + "loss": 2.5265, + "step": 13003 + }, + { + "epoch": 1.0494713905253814, + "grad_norm": 0.7552505135536194, + "learning_rate": 5.50657495601693e-05, + "loss": 2.4821, + "step": 13004 + }, + { + "epoch": 1.0495520942619643, + "grad_norm": 0.7100770473480225, + "learning_rate": 5.5051646744042664e-05, + "loss": 2.4566, + "step": 13005 + }, + { + "epoch": 1.0496327979985474, + "grad_norm": 0.7008209824562073, + "learning_rate": 5.503754504814651e-05, + "loss": 2.4476, + "step": 13006 + }, + { + "epoch": 1.0497135017351304, + "grad_norm": 0.640724241733551, + "learning_rate": 5.502344447283223e-05, + "loss": 2.437, + "step": 13007 + }, + { + "epoch": 1.0497942054717133, + "grad_norm": 0.7064981460571289, + "learning_rate": 5.5009345018451297e-05, + "loss": 2.5129, + "step": 13008 + }, + { + "epoch": 1.0498749092082964, + "grad_norm": 0.6729782223701477, + "learning_rate": 5.49952466853551e-05, + "loss": 2.4867, + "step": 13009 + }, + { + "epoch": 1.0499556129448793, + "grad_norm": 0.7245302200317383, + "learning_rate": 5.4981149473894966e-05, + "loss": 2.4485, + "step": 13010 + }, + { + "epoch": 1.0500363166814624, + "grad_norm": 0.6686248779296875, + "learning_rate": 5.4967053384422294e-05, + "loss": 2.4314, + "step": 13011 + }, + { + "epoch": 1.0501170204180454, + "grad_norm": 0.6790863871574402, + "learning_rate": 5.495295841728836e-05, + "loss": 2.4847, + "step": 13012 + }, + { + "epoch": 1.0501977241546283, + "grad_norm": 0.6516931653022766, + "learning_rate": 5.49388645728445e-05, + "loss": 2.4306, + "step": 13013 + }, + { + "epoch": 1.0502784278912114, + "grad_norm": 0.6967600584030151, + "learning_rate": 5.492477185144189e-05, + "loss": 2.4942, + "step": 13014 + }, + { + "epoch": 1.0503591316277943, + "grad_norm": 0.696246325969696, + "learning_rate": 5.491068025343178e-05, + "loss": 2.4647, + "step": 13015 + }, + { + "epoch": 1.0504398353643774, + "grad_norm": 0.6962751150131226, + "learning_rate": 5.489658977916543e-05, + "loss": 2.5095, + "step": 13016 + }, + { + "epoch": 1.0505205391009604, + "grad_norm": 0.6982631087303162, + "learning_rate": 5.488250042899392e-05, + "loss": 2.4327, + "step": 13017 + }, + { + "epoch": 1.0506012428375433, + "grad_norm": 0.6932644844055176, + "learning_rate": 5.486841220326845e-05, + "loss": 2.4777, + "step": 13018 + }, + { + "epoch": 1.0506819465741264, + "grad_norm": 0.6923339366912842, + "learning_rate": 5.485432510234012e-05, + "loss": 2.4321, + "step": 13019 + }, + { + "epoch": 1.0507626503107095, + "grad_norm": 0.7445859313011169, + "learning_rate": 5.4840239126560015e-05, + "loss": 2.4425, + "step": 13020 + }, + { + "epoch": 1.0508433540472923, + "grad_norm": 0.7122324705123901, + "learning_rate": 5.48261542762792e-05, + "loss": 2.4545, + "step": 13021 + }, + { + "epoch": 1.0509240577838754, + "grad_norm": 0.734779417514801, + "learning_rate": 5.4812070551848736e-05, + "loss": 2.4764, + "step": 13022 + }, + { + "epoch": 1.0510047615204583, + "grad_norm": 0.6544109582901001, + "learning_rate": 5.4797987953619566e-05, + "loss": 2.4492, + "step": 13023 + }, + { + "epoch": 1.0510854652570414, + "grad_norm": 0.6366097331047058, + "learning_rate": 5.4783906481942704e-05, + "loss": 2.4695, + "step": 13024 + }, + { + "epoch": 1.0511661689936245, + "grad_norm": 0.6966270804405212, + "learning_rate": 5.476982613716908e-05, + "loss": 2.4505, + "step": 13025 + }, + { + "epoch": 1.0512468727302073, + "grad_norm": 0.7010120153427124, + "learning_rate": 5.4755746919649665e-05, + "loss": 2.4545, + "step": 13026 + }, + { + "epoch": 1.0513275764667904, + "grad_norm": 0.6704719662666321, + "learning_rate": 5.474166882973526e-05, + "loss": 2.3899, + "step": 13027 + }, + { + "epoch": 1.0514082802033735, + "grad_norm": 0.757152259349823, + "learning_rate": 5.472759186777679e-05, + "loss": 2.5112, + "step": 13028 + }, + { + "epoch": 1.0514889839399564, + "grad_norm": 0.6668868660926819, + "learning_rate": 5.471351603412509e-05, + "loss": 2.4797, + "step": 13029 + }, + { + "epoch": 1.0515696876765395, + "grad_norm": 0.7919496893882751, + "learning_rate": 5.4699441329130887e-05, + "loss": 2.4874, + "step": 13030 + }, + { + "epoch": 1.0516503914131223, + "grad_norm": 0.7595484852790833, + "learning_rate": 5.468536775314506e-05, + "loss": 2.4621, + "step": 13031 + }, + { + "epoch": 1.0517310951497054, + "grad_norm": 0.6575995683670044, + "learning_rate": 5.467129530651835e-05, + "loss": 2.4474, + "step": 13032 + }, + { + "epoch": 1.0518117988862885, + "grad_norm": 0.6817733645439148, + "learning_rate": 5.4657223989601425e-05, + "loss": 2.4329, + "step": 13033 + }, + { + "epoch": 1.0518925026228714, + "grad_norm": 0.722882091999054, + "learning_rate": 5.464315380274501e-05, + "loss": 2.4544, + "step": 13034 + }, + { + "epoch": 1.0519732063594545, + "grad_norm": 0.6957377791404724, + "learning_rate": 5.4629084746299796e-05, + "loss": 2.5669, + "step": 13035 + }, + { + "epoch": 1.0520539100960375, + "grad_norm": 0.6749420166015625, + "learning_rate": 5.461501682061636e-05, + "loss": 2.5053, + "step": 13036 + }, + { + "epoch": 1.0521346138326204, + "grad_norm": 0.8158369064331055, + "learning_rate": 5.4600950026045326e-05, + "loss": 2.429, + "step": 13037 + }, + { + "epoch": 1.0522153175692035, + "grad_norm": 0.6960736513137817, + "learning_rate": 5.458688436293735e-05, + "loss": 2.4731, + "step": 13038 + }, + { + "epoch": 1.0522960213057864, + "grad_norm": 0.6686301231384277, + "learning_rate": 5.457281983164287e-05, + "loss": 2.4495, + "step": 13039 + }, + { + "epoch": 1.0523767250423695, + "grad_norm": 0.6691476106643677, + "learning_rate": 5.455875643251248e-05, + "loss": 2.4329, + "step": 13040 + }, + { + "epoch": 1.0524574287789525, + "grad_norm": 0.7737297415733337, + "learning_rate": 5.454469416589666e-05, + "loss": 2.4664, + "step": 13041 + }, + { + "epoch": 1.0525381325155354, + "grad_norm": 0.7848188281059265, + "learning_rate": 5.453063303214588e-05, + "loss": 2.4799, + "step": 13042 + }, + { + "epoch": 1.0526188362521185, + "grad_norm": 0.7831119894981384, + "learning_rate": 5.45165730316106e-05, + "loss": 2.5076, + "step": 13043 + }, + { + "epoch": 1.0526995399887016, + "grad_norm": 0.691635012626648, + "learning_rate": 5.4502514164641196e-05, + "loss": 2.4866, + "step": 13044 + }, + { + "epoch": 1.0527802437252844, + "grad_norm": 0.6667110919952393, + "learning_rate": 5.4488456431588106e-05, + "loss": 2.4162, + "step": 13045 + }, + { + "epoch": 1.0528609474618675, + "grad_norm": 0.7201905846595764, + "learning_rate": 5.447439983280163e-05, + "loss": 2.498, + "step": 13046 + }, + { + "epoch": 1.0529416511984504, + "grad_norm": 0.8538106083869934, + "learning_rate": 5.44603443686321e-05, + "loss": 2.4477, + "step": 13047 + }, + { + "epoch": 1.0530223549350335, + "grad_norm": 0.6661962270736694, + "learning_rate": 5.444629003942987e-05, + "loss": 2.5253, + "step": 13048 + }, + { + "epoch": 1.0531030586716166, + "grad_norm": 0.7239834666252136, + "learning_rate": 5.4432236845545146e-05, + "loss": 2.4786, + "step": 13049 + }, + { + "epoch": 1.0531837624081994, + "grad_norm": 0.7328412532806396, + "learning_rate": 5.4418184787328186e-05, + "loss": 2.4841, + "step": 13050 + }, + { + "epoch": 1.0532644661447825, + "grad_norm": 0.6395559310913086, + "learning_rate": 5.440413386512922e-05, + "loss": 2.3544, + "step": 13051 + }, + { + "epoch": 1.0533451698813656, + "grad_norm": 0.6632471084594727, + "learning_rate": 5.43900840792984e-05, + "loss": 2.4753, + "step": 13052 + }, + { + "epoch": 1.0534258736179485, + "grad_norm": 0.7262828350067139, + "learning_rate": 5.4376035430185935e-05, + "loss": 2.4162, + "step": 13053 + }, + { + "epoch": 1.0535065773545316, + "grad_norm": 0.7897952198982239, + "learning_rate": 5.436198791814196e-05, + "loss": 2.4571, + "step": 13054 + }, + { + "epoch": 1.0535872810911144, + "grad_norm": 0.7281489372253418, + "learning_rate": 5.434794154351651e-05, + "loss": 2.4531, + "step": 13055 + }, + { + "epoch": 1.0536679848276975, + "grad_norm": 0.7322356700897217, + "learning_rate": 5.4333896306659694e-05, + "loss": 2.4102, + "step": 13056 + }, + { + "epoch": 1.0537486885642806, + "grad_norm": 0.7657945156097412, + "learning_rate": 5.4319852207921554e-05, + "loss": 2.4526, + "step": 13057 + }, + { + "epoch": 1.0538293923008635, + "grad_norm": 0.6732973456382751, + "learning_rate": 5.430580924765214e-05, + "loss": 2.4516, + "step": 13058 + }, + { + "epoch": 1.0539100960374466, + "grad_norm": 0.663398027420044, + "learning_rate": 5.429176742620137e-05, + "loss": 2.4437, + "step": 13059 + }, + { + "epoch": 1.0539907997740294, + "grad_norm": 0.6363258957862854, + "learning_rate": 5.4277726743919244e-05, + "loss": 2.414, + "step": 13060 + }, + { + "epoch": 1.0540715035106125, + "grad_norm": 0.6600647568702698, + "learning_rate": 5.426368720115568e-05, + "loss": 2.4319, + "step": 13061 + }, + { + "epoch": 1.0541522072471956, + "grad_norm": 0.6941983699798584, + "learning_rate": 5.4249648798260574e-05, + "loss": 2.5247, + "step": 13062 + }, + { + "epoch": 1.0542329109837785, + "grad_norm": 0.7419719099998474, + "learning_rate": 5.423561153558383e-05, + "loss": 2.5088, + "step": 13063 + }, + { + "epoch": 1.0543136147203616, + "grad_norm": 0.708073079586029, + "learning_rate": 5.4221575413475326e-05, + "loss": 2.4037, + "step": 13064 + }, + { + "epoch": 1.0543943184569446, + "grad_norm": 0.7081628441810608, + "learning_rate": 5.4207540432284764e-05, + "loss": 2.4556, + "step": 13065 + }, + { + "epoch": 1.0544750221935275, + "grad_norm": 0.7058689594268799, + "learning_rate": 5.419350659236201e-05, + "loss": 2.4244, + "step": 13066 + }, + { + "epoch": 1.0545557259301106, + "grad_norm": 0.6858707070350647, + "learning_rate": 5.417947389405684e-05, + "loss": 2.4431, + "step": 13067 + }, + { + "epoch": 1.0546364296666935, + "grad_norm": 0.6769983768463135, + "learning_rate": 5.416544233771893e-05, + "loss": 2.4257, + "step": 13068 + }, + { + "epoch": 1.0547171334032766, + "grad_norm": 0.7128089070320129, + "learning_rate": 5.4151411923698e-05, + "loss": 2.4558, + "step": 13069 + }, + { + "epoch": 1.0547978371398596, + "grad_norm": 0.6419198513031006, + "learning_rate": 5.413738265234374e-05, + "loss": 2.4421, + "step": 13070 + }, + { + "epoch": 1.0548785408764425, + "grad_norm": 0.760848879814148, + "learning_rate": 5.4123354524005784e-05, + "loss": 2.4427, + "step": 13071 + }, + { + "epoch": 1.0549592446130256, + "grad_norm": 0.6749173998832703, + "learning_rate": 5.410932753903377e-05, + "loss": 2.4902, + "step": 13072 + }, + { + "epoch": 1.0550399483496087, + "grad_norm": 0.6908800601959229, + "learning_rate": 5.4095301697777265e-05, + "loss": 2.4219, + "step": 13073 + }, + { + "epoch": 1.0551206520861915, + "grad_norm": 0.6779965758323669, + "learning_rate": 5.408127700058587e-05, + "loss": 2.4533, + "step": 13074 + }, + { + "epoch": 1.0552013558227746, + "grad_norm": 0.6832355260848999, + "learning_rate": 5.406725344780906e-05, + "loss": 2.418, + "step": 13075 + }, + { + "epoch": 1.0552820595593575, + "grad_norm": 0.6766698956489563, + "learning_rate": 5.4053231039796357e-05, + "loss": 2.4493, + "step": 13076 + }, + { + "epoch": 1.0553627632959406, + "grad_norm": 0.7256276607513428, + "learning_rate": 5.4039209776897285e-05, + "loss": 2.4126, + "step": 13077 + }, + { + "epoch": 1.0554434670325237, + "grad_norm": 0.6687275171279907, + "learning_rate": 5.4025189659461196e-05, + "loss": 2.435, + "step": 13078 + }, + { + "epoch": 1.0555241707691065, + "grad_norm": 0.6800444722175598, + "learning_rate": 5.401117068783758e-05, + "loss": 2.4608, + "step": 13079 + }, + { + "epoch": 1.0556048745056896, + "grad_norm": 0.6947116851806641, + "learning_rate": 5.399715286237583e-05, + "loss": 2.4908, + "step": 13080 + }, + { + "epoch": 1.0556855782422727, + "grad_norm": 0.6907915472984314, + "learning_rate": 5.398313618342521e-05, + "loss": 2.4805, + "step": 13081 + }, + { + "epoch": 1.0557662819788556, + "grad_norm": 0.7429100275039673, + "learning_rate": 5.396912065133516e-05, + "loss": 2.458, + "step": 13082 + }, + { + "epoch": 1.0558469857154387, + "grad_norm": 0.7186924815177917, + "learning_rate": 5.3955106266454994e-05, + "loss": 2.4924, + "step": 13083 + }, + { + "epoch": 1.0559276894520215, + "grad_norm": 0.7017999887466431, + "learning_rate": 5.394109302913391e-05, + "loss": 2.4103, + "step": 13084 + }, + { + "epoch": 1.0560083931886046, + "grad_norm": 0.7318955659866333, + "learning_rate": 5.392708093972117e-05, + "loss": 2.4424, + "step": 13085 + }, + { + "epoch": 1.0560890969251877, + "grad_norm": 0.6278600692749023, + "learning_rate": 5.391306999856602e-05, + "loss": 2.4433, + "step": 13086 + }, + { + "epoch": 1.0561698006617706, + "grad_norm": 0.6895800232887268, + "learning_rate": 5.389906020601767e-05, + "loss": 2.4275, + "step": 13087 + }, + { + "epoch": 1.0562505043983537, + "grad_norm": 0.7197345495223999, + "learning_rate": 5.388505156242522e-05, + "loss": 2.4309, + "step": 13088 + }, + { + "epoch": 1.0563312081349367, + "grad_norm": 0.636433482170105, + "learning_rate": 5.3871044068137824e-05, + "loss": 2.4258, + "step": 13089 + }, + { + "epoch": 1.0564119118715196, + "grad_norm": 0.6884748339653015, + "learning_rate": 5.3857037723504634e-05, + "loss": 2.4543, + "step": 13090 + }, + { + "epoch": 1.0564926156081027, + "grad_norm": 0.7277036309242249, + "learning_rate": 5.384303252887464e-05, + "loss": 2.4911, + "step": 13091 + }, + { + "epoch": 1.0565733193446856, + "grad_norm": 0.6940809488296509, + "learning_rate": 5.38290284845969e-05, + "loss": 2.4112, + "step": 13092 + }, + { + "epoch": 1.0566540230812687, + "grad_norm": 0.6729177236557007, + "learning_rate": 5.3815025591020526e-05, + "loss": 2.4394, + "step": 13093 + }, + { + "epoch": 1.0567347268178517, + "grad_norm": 0.6941854357719421, + "learning_rate": 5.3801023848494416e-05, + "loss": 2.4263, + "step": 13094 + }, + { + "epoch": 1.0568154305544346, + "grad_norm": 0.7046812772750854, + "learning_rate": 5.3787023257367554e-05, + "loss": 2.5196, + "step": 13095 + }, + { + "epoch": 1.0568961342910177, + "grad_norm": 0.6896177530288696, + "learning_rate": 5.377302381798891e-05, + "loss": 2.4178, + "step": 13096 + }, + { + "epoch": 1.0569768380276008, + "grad_norm": 0.6693699955940247, + "learning_rate": 5.375902553070731e-05, + "loss": 2.4908, + "step": 13097 + }, + { + "epoch": 1.0570575417641837, + "grad_norm": 0.6751677989959717, + "learning_rate": 5.3745028395871674e-05, + "loss": 2.4222, + "step": 13098 + }, + { + "epoch": 1.0571382455007667, + "grad_norm": 0.7666265368461609, + "learning_rate": 5.373103241383088e-05, + "loss": 2.4965, + "step": 13099 + }, + { + "epoch": 1.0572189492373496, + "grad_norm": 0.8069329857826233, + "learning_rate": 5.3717037584933674e-05, + "loss": 2.4988, + "step": 13100 + }, + { + "epoch": 1.0572996529739327, + "grad_norm": 0.7160749435424805, + "learning_rate": 5.370304390952887e-05, + "loss": 2.4311, + "step": 13101 + }, + { + "epoch": 1.0573803567105158, + "grad_norm": 0.6936448812484741, + "learning_rate": 5.368905138796523e-05, + "loss": 2.4877, + "step": 13102 + }, + { + "epoch": 1.0574610604470986, + "grad_norm": 0.7202793955802917, + "learning_rate": 5.3675060020591494e-05, + "loss": 2.4841, + "step": 13103 + }, + { + "epoch": 1.0575417641836817, + "grad_norm": 0.7750168442726135, + "learning_rate": 5.366106980775636e-05, + "loss": 2.4828, + "step": 13104 + }, + { + "epoch": 1.0576224679202646, + "grad_norm": 0.7079972624778748, + "learning_rate": 5.364708074980849e-05, + "loss": 2.4912, + "step": 13105 + }, + { + "epoch": 1.0577031716568477, + "grad_norm": 0.704066276550293, + "learning_rate": 5.363309284709657e-05, + "loss": 2.4731, + "step": 13106 + }, + { + "epoch": 1.0577838753934308, + "grad_norm": 0.7040490508079529, + "learning_rate": 5.361910609996915e-05, + "loss": 2.3811, + "step": 13107 + }, + { + "epoch": 1.0578645791300136, + "grad_norm": 0.6669453978538513, + "learning_rate": 5.360512050877484e-05, + "loss": 2.5372, + "step": 13108 + }, + { + "epoch": 1.0579452828665967, + "grad_norm": 0.7197996973991394, + "learning_rate": 5.359113607386226e-05, + "loss": 2.4612, + "step": 13109 + }, + { + "epoch": 1.0580259866031798, + "grad_norm": 0.7192320823669434, + "learning_rate": 5.3577152795579824e-05, + "loss": 2.4636, + "step": 13110 + }, + { + "epoch": 1.0581066903397627, + "grad_norm": 0.6907937526702881, + "learning_rate": 5.35631706742761e-05, + "loss": 2.4791, + "step": 13111 + }, + { + "epoch": 1.0581873940763458, + "grad_norm": 0.687035083770752, + "learning_rate": 5.354918971029954e-05, + "loss": 2.4706, + "step": 13112 + }, + { + "epoch": 1.0582680978129286, + "grad_norm": 0.6666533350944519, + "learning_rate": 5.353520990399861e-05, + "loss": 2.4789, + "step": 13113 + }, + { + "epoch": 1.0583488015495117, + "grad_norm": 0.6261809468269348, + "learning_rate": 5.35212312557217e-05, + "loss": 2.4485, + "step": 13114 + }, + { + "epoch": 1.0584295052860948, + "grad_norm": 0.6740814447402954, + "learning_rate": 5.350725376581725e-05, + "loss": 2.47, + "step": 13115 + }, + { + "epoch": 1.0585102090226777, + "grad_norm": 0.7634154558181763, + "learning_rate": 5.3493277434633526e-05, + "loss": 2.4685, + "step": 13116 + }, + { + "epoch": 1.0585909127592608, + "grad_norm": 0.6674611568450928, + "learning_rate": 5.34793022625189e-05, + "loss": 2.4362, + "step": 13117 + }, + { + "epoch": 1.0586716164958438, + "grad_norm": 0.7584757804870605, + "learning_rate": 5.346532824982167e-05, + "loss": 2.499, + "step": 13118 + }, + { + "epoch": 1.0587523202324267, + "grad_norm": 0.6453456282615662, + "learning_rate": 5.345135539689015e-05, + "loss": 2.4341, + "step": 13119 + }, + { + "epoch": 1.0588330239690098, + "grad_norm": 0.70013427734375, + "learning_rate": 5.343738370407247e-05, + "loss": 2.3448, + "step": 13120 + }, + { + "epoch": 1.0589137277055927, + "grad_norm": 0.6763362884521484, + "learning_rate": 5.342341317171693e-05, + "loss": 2.4234, + "step": 13121 + }, + { + "epoch": 1.0589944314421758, + "grad_norm": 0.6896576881408691, + "learning_rate": 5.3409443800171664e-05, + "loss": 2.4753, + "step": 13122 + }, + { + "epoch": 1.0590751351787588, + "grad_norm": 0.6984997987747192, + "learning_rate": 5.339547558978486e-05, + "loss": 2.4581, + "step": 13123 + }, + { + "epoch": 1.0591558389153417, + "grad_norm": 0.7276118993759155, + "learning_rate": 5.338150854090462e-05, + "loss": 2.4765, + "step": 13124 + }, + { + "epoch": 1.0592365426519248, + "grad_norm": 0.6943252086639404, + "learning_rate": 5.336754265387911e-05, + "loss": 2.4514, + "step": 13125 + }, + { + "epoch": 1.0593172463885079, + "grad_norm": 0.7070014476776123, + "learning_rate": 5.335357792905628e-05, + "loss": 2.4365, + "step": 13126 + }, + { + "epoch": 1.0593979501250907, + "grad_norm": 0.6887189149856567, + "learning_rate": 5.333961436678422e-05, + "loss": 2.4834, + "step": 13127 + }, + { + "epoch": 1.0594786538616738, + "grad_norm": 0.8150162696838379, + "learning_rate": 5.332565196741098e-05, + "loss": 2.4474, + "step": 13128 + }, + { + "epoch": 1.0595593575982567, + "grad_norm": 0.6681316494941711, + "learning_rate": 5.331169073128447e-05, + "loss": 2.4888, + "step": 13129 + }, + { + "epoch": 1.0596400613348398, + "grad_norm": 0.6696690320968628, + "learning_rate": 5.329773065875267e-05, + "loss": 2.3874, + "step": 13130 + }, + { + "epoch": 1.0597207650714229, + "grad_norm": 0.729807436466217, + "learning_rate": 5.32837717501635e-05, + "loss": 2.4442, + "step": 13131 + }, + { + "epoch": 1.0598014688080057, + "grad_norm": 0.6959047913551331, + "learning_rate": 5.326981400586486e-05, + "loss": 2.4697, + "step": 13132 + }, + { + "epoch": 1.0598821725445888, + "grad_norm": 0.667294442653656, + "learning_rate": 5.3255857426204606e-05, + "loss": 2.3986, + "step": 13133 + }, + { + "epoch": 1.059962876281172, + "grad_norm": 0.6953842639923096, + "learning_rate": 5.3241902011530566e-05, + "loss": 2.396, + "step": 13134 + }, + { + "epoch": 1.0600435800177548, + "grad_norm": 0.6544597148895264, + "learning_rate": 5.32279477621906e-05, + "loss": 2.426, + "step": 13135 + }, + { + "epoch": 1.0601242837543379, + "grad_norm": 0.708017885684967, + "learning_rate": 5.321399467853241e-05, + "loss": 2.4931, + "step": 13136 + }, + { + "epoch": 1.0602049874909207, + "grad_norm": 0.6669809818267822, + "learning_rate": 5.3200042760903764e-05, + "loss": 2.4354, + "step": 13137 + }, + { + "epoch": 1.0602856912275038, + "grad_norm": 1.0144098997116089, + "learning_rate": 5.3186092009652435e-05, + "loss": 2.4803, + "step": 13138 + }, + { + "epoch": 1.060366394964087, + "grad_norm": 0.7213768362998962, + "learning_rate": 5.317214242512601e-05, + "loss": 2.4318, + "step": 13139 + }, + { + "epoch": 1.0604470987006698, + "grad_norm": 0.6429069638252258, + "learning_rate": 5.315819400767223e-05, + "loss": 2.458, + "step": 13140 + }, + { + "epoch": 1.0605278024372529, + "grad_norm": 0.6480485796928406, + "learning_rate": 5.3144246757638714e-05, + "loss": 2.4586, + "step": 13141 + }, + { + "epoch": 1.060608506173836, + "grad_norm": 0.7037697434425354, + "learning_rate": 5.3130300675373035e-05, + "loss": 2.4698, + "step": 13142 + }, + { + "epoch": 1.0606892099104188, + "grad_norm": 0.7307559251785278, + "learning_rate": 5.3116355761222725e-05, + "loss": 2.4027, + "step": 13143 + }, + { + "epoch": 1.060769913647002, + "grad_norm": 0.6684615612030029, + "learning_rate": 5.310241201553547e-05, + "loss": 2.478, + "step": 13144 + }, + { + "epoch": 1.0608506173835848, + "grad_norm": 0.7018016576766968, + "learning_rate": 5.308846943865866e-05, + "loss": 2.4229, + "step": 13145 + }, + { + "epoch": 1.0609313211201679, + "grad_norm": 0.7538621425628662, + "learning_rate": 5.307452803093982e-05, + "loss": 2.5201, + "step": 13146 + }, + { + "epoch": 1.061012024856751, + "grad_norm": 0.6957963109016418, + "learning_rate": 5.306058779272645e-05, + "loss": 2.4233, + "step": 13147 + }, + { + "epoch": 1.0610927285933338, + "grad_norm": 0.6280590295791626, + "learning_rate": 5.304664872436588e-05, + "loss": 2.5117, + "step": 13148 + }, + { + "epoch": 1.061173432329917, + "grad_norm": 0.6937280297279358, + "learning_rate": 5.3032710826205564e-05, + "loss": 2.4889, + "step": 13149 + }, + { + "epoch": 1.0612541360664998, + "grad_norm": 0.6750391125679016, + "learning_rate": 5.3018774098592884e-05, + "loss": 2.4472, + "step": 13150 + }, + { + "epoch": 1.0613348398030829, + "grad_norm": 0.6931902766227722, + "learning_rate": 5.300483854187519e-05, + "loss": 2.3883, + "step": 13151 + }, + { + "epoch": 1.061415543539666, + "grad_norm": 0.6982774138450623, + "learning_rate": 5.2990904156399726e-05, + "loss": 2.4688, + "step": 13152 + }, + { + "epoch": 1.0614962472762488, + "grad_norm": 0.6873522996902466, + "learning_rate": 5.297697094251382e-05, + "loss": 2.4818, + "step": 13153 + }, + { + "epoch": 1.061576951012832, + "grad_norm": 0.635377049446106, + "learning_rate": 5.296303890056471e-05, + "loss": 2.3906, + "step": 13154 + }, + { + "epoch": 1.061657654749415, + "grad_norm": 0.6368159651756287, + "learning_rate": 5.294910803089963e-05, + "loss": 2.4714, + "step": 13155 + }, + { + "epoch": 1.0617383584859978, + "grad_norm": 0.7147238254547119, + "learning_rate": 5.293517833386576e-05, + "loss": 2.4746, + "step": 13156 + }, + { + "epoch": 1.061819062222581, + "grad_norm": 0.742189884185791, + "learning_rate": 5.2921249809810326e-05, + "loss": 2.3913, + "step": 13157 + }, + { + "epoch": 1.061899765959164, + "grad_norm": 0.6665734648704529, + "learning_rate": 5.290732245908038e-05, + "loss": 2.4263, + "step": 13158 + }, + { + "epoch": 1.0619804696957469, + "grad_norm": 0.6894757747650146, + "learning_rate": 5.2893396282023055e-05, + "loss": 2.4204, + "step": 13159 + }, + { + "epoch": 1.06206117343233, + "grad_norm": 0.6394561529159546, + "learning_rate": 5.287947127898546e-05, + "loss": 2.4183, + "step": 13160 + }, + { + "epoch": 1.0621418771689128, + "grad_norm": 0.7422548532485962, + "learning_rate": 5.2865547450314576e-05, + "loss": 2.4454, + "step": 13161 + }, + { + "epoch": 1.062222580905496, + "grad_norm": 0.7486133575439453, + "learning_rate": 5.285162479635748e-05, + "loss": 2.4856, + "step": 13162 + }, + { + "epoch": 1.062303284642079, + "grad_norm": 0.6743031144142151, + "learning_rate": 5.283770331746112e-05, + "loss": 2.4318, + "step": 13163 + }, + { + "epoch": 1.0623839883786619, + "grad_norm": 0.6461686491966248, + "learning_rate": 5.282378301397248e-05, + "loss": 2.4133, + "step": 13164 + }, + { + "epoch": 1.062464692115245, + "grad_norm": 0.6745431423187256, + "learning_rate": 5.28098638862385e-05, + "loss": 2.4463, + "step": 13165 + }, + { + "epoch": 1.0625453958518278, + "grad_norm": 0.6646310687065125, + "learning_rate": 5.279594593460606e-05, + "loss": 2.4211, + "step": 13166 + }, + { + "epoch": 1.062626099588411, + "grad_norm": 0.6789249777793884, + "learning_rate": 5.278202915942207e-05, + "loss": 2.4832, + "step": 13167 + }, + { + "epoch": 1.062706803324994, + "grad_norm": 0.7082679867744446, + "learning_rate": 5.2768113561033326e-05, + "loss": 2.4303, + "step": 13168 + }, + { + "epoch": 1.0627875070615769, + "grad_norm": 0.6875587701797485, + "learning_rate": 5.275419913978664e-05, + "loss": 2.4601, + "step": 13169 + }, + { + "epoch": 1.06286821079816, + "grad_norm": 0.6556203961372375, + "learning_rate": 5.274028589602886e-05, + "loss": 2.4359, + "step": 13170 + }, + { + "epoch": 1.062948914534743, + "grad_norm": 0.7280015349388123, + "learning_rate": 5.272637383010666e-05, + "loss": 2.4999, + "step": 13171 + }, + { + "epoch": 1.063029618271326, + "grad_norm": 0.664654016494751, + "learning_rate": 5.271246294236678e-05, + "loss": 2.3951, + "step": 13172 + }, + { + "epoch": 1.063110322007909, + "grad_norm": 0.6941719055175781, + "learning_rate": 5.2698553233155945e-05, + "loss": 2.45, + "step": 13173 + }, + { + "epoch": 1.0631910257444919, + "grad_norm": 0.7212931513786316, + "learning_rate": 5.268464470282082e-05, + "loss": 2.4615, + "step": 13174 + }, + { + "epoch": 1.063271729481075, + "grad_norm": 0.6877106428146362, + "learning_rate": 5.2670737351708014e-05, + "loss": 2.4495, + "step": 13175 + }, + { + "epoch": 1.063352433217658, + "grad_norm": 0.737718939781189, + "learning_rate": 5.26568311801642e-05, + "loss": 2.4971, + "step": 13176 + }, + { + "epoch": 1.063433136954241, + "grad_norm": 0.6909129619598389, + "learning_rate": 5.264292618853587e-05, + "loss": 2.4889, + "step": 13177 + }, + { + "epoch": 1.063513840690824, + "grad_norm": 0.6750304102897644, + "learning_rate": 5.262902237716961e-05, + "loss": 2.4779, + "step": 13178 + }, + { + "epoch": 1.063594544427407, + "grad_norm": 0.7256019115447998, + "learning_rate": 5.2615119746411954e-05, + "loss": 2.4904, + "step": 13179 + }, + { + "epoch": 1.06367524816399, + "grad_norm": 0.7335983514785767, + "learning_rate": 5.26012182966094e-05, + "loss": 2.4357, + "step": 13180 + }, + { + "epoch": 1.063755951900573, + "grad_norm": 0.6534200310707092, + "learning_rate": 5.258731802810837e-05, + "loss": 2.4213, + "step": 13181 + }, + { + "epoch": 1.063836655637156, + "grad_norm": 0.6899768114089966, + "learning_rate": 5.257341894125529e-05, + "loss": 2.4963, + "step": 13182 + }, + { + "epoch": 1.063917359373739, + "grad_norm": 0.7016159892082214, + "learning_rate": 5.25595210363966e-05, + "loss": 2.4583, + "step": 13183 + }, + { + "epoch": 1.063998063110322, + "grad_norm": 0.6868152022361755, + "learning_rate": 5.2545624313878636e-05, + "loss": 2.4523, + "step": 13184 + }, + { + "epoch": 1.064078766846905, + "grad_norm": 0.7442622184753418, + "learning_rate": 5.2531728774047785e-05, + "loss": 2.425, + "step": 13185 + }, + { + "epoch": 1.064159470583488, + "grad_norm": 0.6900869011878967, + "learning_rate": 5.251783441725037e-05, + "loss": 2.459, + "step": 13186 + }, + { + "epoch": 1.0642401743200711, + "grad_norm": 0.6910288333892822, + "learning_rate": 5.25039412438326e-05, + "loss": 2.4882, + "step": 13187 + }, + { + "epoch": 1.064320878056654, + "grad_norm": 0.7644359469413757, + "learning_rate": 5.249004925414076e-05, + "loss": 2.4663, + "step": 13188 + }, + { + "epoch": 1.064401581793237, + "grad_norm": 0.6703082919120789, + "learning_rate": 5.247615844852114e-05, + "loss": 2.4309, + "step": 13189 + }, + { + "epoch": 1.06448228552982, + "grad_norm": 0.6449835896492004, + "learning_rate": 5.246226882731983e-05, + "loss": 2.4307, + "step": 13190 + }, + { + "epoch": 1.064562989266403, + "grad_norm": 0.7332713603973389, + "learning_rate": 5.244838039088305e-05, + "loss": 2.3763, + "step": 13191 + }, + { + "epoch": 1.0646436930029861, + "grad_norm": 0.7626641988754272, + "learning_rate": 5.2434493139556974e-05, + "loss": 2.4167, + "step": 13192 + }, + { + "epoch": 1.064724396739569, + "grad_norm": 0.6924002170562744, + "learning_rate": 5.2420607073687614e-05, + "loss": 2.4751, + "step": 13193 + }, + { + "epoch": 1.064805100476152, + "grad_norm": 0.6815003156661987, + "learning_rate": 5.2406722193621074e-05, + "loss": 2.4731, + "step": 13194 + }, + { + "epoch": 1.064885804212735, + "grad_norm": 0.7632609009742737, + "learning_rate": 5.239283849970347e-05, + "loss": 2.4562, + "step": 13195 + }, + { + "epoch": 1.064966507949318, + "grad_norm": 0.7157592177391052, + "learning_rate": 5.23789559922808e-05, + "loss": 2.4507, + "step": 13196 + }, + { + "epoch": 1.065047211685901, + "grad_norm": 0.7035543918609619, + "learning_rate": 5.2365074671699e-05, + "loss": 2.4616, + "step": 13197 + }, + { + "epoch": 1.065127915422484, + "grad_norm": 0.7566644549369812, + "learning_rate": 5.235119453830406e-05, + "loss": 2.4751, + "step": 13198 + }, + { + "epoch": 1.065208619159067, + "grad_norm": 0.7030916213989258, + "learning_rate": 5.233731559244194e-05, + "loss": 2.381, + "step": 13199 + }, + { + "epoch": 1.0652893228956501, + "grad_norm": 0.7663755416870117, + "learning_rate": 5.232343783445847e-05, + "loss": 2.4822, + "step": 13200 + }, + { + "epoch": 1.065370026632233, + "grad_norm": 0.717767596244812, + "learning_rate": 5.230956126469955e-05, + "loss": 2.4807, + "step": 13201 + }, + { + "epoch": 1.065450730368816, + "grad_norm": 0.6920818090438843, + "learning_rate": 5.229568588351108e-05, + "loss": 2.4643, + "step": 13202 + }, + { + "epoch": 1.0655314341053992, + "grad_norm": 0.6812553405761719, + "learning_rate": 5.228181169123877e-05, + "loss": 2.4443, + "step": 13203 + }, + { + "epoch": 1.065612137841982, + "grad_norm": 0.7241889834403992, + "learning_rate": 5.226793868822846e-05, + "loss": 2.4581, + "step": 13204 + }, + { + "epoch": 1.0656928415785651, + "grad_norm": 0.7254642248153687, + "learning_rate": 5.225406687482588e-05, + "loss": 2.4999, + "step": 13205 + }, + { + "epoch": 1.065773545315148, + "grad_norm": 0.7316950559616089, + "learning_rate": 5.2240196251376764e-05, + "loss": 2.4493, + "step": 13206 + }, + { + "epoch": 1.065854249051731, + "grad_norm": 0.7208307385444641, + "learning_rate": 5.22263268182268e-05, + "loss": 2.5083, + "step": 13207 + }, + { + "epoch": 1.0659349527883142, + "grad_norm": 0.6552214622497559, + "learning_rate": 5.22124585757217e-05, + "loss": 2.4662, + "step": 13208 + }, + { + "epoch": 1.066015656524897, + "grad_norm": 0.7949681878089905, + "learning_rate": 5.219859152420701e-05, + "loss": 2.4584, + "step": 13209 + }, + { + "epoch": 1.0660963602614801, + "grad_norm": 0.7012154459953308, + "learning_rate": 5.2184725664028366e-05, + "loss": 2.4702, + "step": 13210 + }, + { + "epoch": 1.066177063998063, + "grad_norm": 0.7431927919387817, + "learning_rate": 5.217086099553136e-05, + "loss": 2.4422, + "step": 13211 + }, + { + "epoch": 1.066257767734646, + "grad_norm": 0.7235366702079773, + "learning_rate": 5.2156997519061554e-05, + "loss": 2.4173, + "step": 13212 + }, + { + "epoch": 1.0663384714712292, + "grad_norm": 0.7475029826164246, + "learning_rate": 5.214313523496439e-05, + "loss": 2.4924, + "step": 13213 + }, + { + "epoch": 1.066419175207812, + "grad_norm": 0.6326786875724792, + "learning_rate": 5.212927414358542e-05, + "loss": 2.4154, + "step": 13214 + }, + { + "epoch": 1.0664998789443951, + "grad_norm": 0.6755837798118591, + "learning_rate": 5.211541424527004e-05, + "loss": 2.4248, + "step": 13215 + }, + { + "epoch": 1.0665805826809782, + "grad_norm": 0.645395040512085, + "learning_rate": 5.210155554036373e-05, + "loss": 2.4078, + "step": 13216 + }, + { + "epoch": 1.066661286417561, + "grad_norm": 0.799913763999939, + "learning_rate": 5.208769802921185e-05, + "loss": 2.5067, + "step": 13217 + }, + { + "epoch": 1.0667419901541442, + "grad_norm": 0.7056344747543335, + "learning_rate": 5.207384171215983e-05, + "loss": 2.4817, + "step": 13218 + }, + { + "epoch": 1.0668226938907273, + "grad_norm": 0.7082187533378601, + "learning_rate": 5.205998658955291e-05, + "loss": 2.4495, + "step": 13219 + }, + { + "epoch": 1.0669033976273101, + "grad_norm": 0.6948464512825012, + "learning_rate": 5.204613266173646e-05, + "loss": 2.4584, + "step": 13220 + }, + { + "epoch": 1.0669841013638932, + "grad_norm": 0.7812542915344238, + "learning_rate": 5.203227992905575e-05, + "loss": 2.4803, + "step": 13221 + }, + { + "epoch": 1.067064805100476, + "grad_norm": 0.6892200708389282, + "learning_rate": 5.201842839185598e-05, + "loss": 2.4424, + "step": 13222 + }, + { + "epoch": 1.0671455088370592, + "grad_norm": 0.6982070803642273, + "learning_rate": 5.20045780504824e-05, + "loss": 2.4654, + "step": 13223 + }, + { + "epoch": 1.0672262125736423, + "grad_norm": 0.6799101233482361, + "learning_rate": 5.1990728905280205e-05, + "loss": 2.4748, + "step": 13224 + }, + { + "epoch": 1.0673069163102251, + "grad_norm": 0.6703687906265259, + "learning_rate": 5.1976880956594544e-05, + "loss": 2.4459, + "step": 13225 + }, + { + "epoch": 1.0673876200468082, + "grad_norm": 0.6821435689926147, + "learning_rate": 5.196303420477053e-05, + "loss": 2.4517, + "step": 13226 + }, + { + "epoch": 1.067468323783391, + "grad_norm": 0.6369695067405701, + "learning_rate": 5.194918865015328e-05, + "loss": 2.4388, + "step": 13227 + }, + { + "epoch": 1.0675490275199742, + "grad_norm": 0.6465736627578735, + "learning_rate": 5.1935344293087885e-05, + "loss": 2.3839, + "step": 13228 + }, + { + "epoch": 1.0676297312565572, + "grad_norm": 0.6745415329933167, + "learning_rate": 5.192150113391933e-05, + "loss": 2.4676, + "step": 13229 + }, + { + "epoch": 1.0677104349931401, + "grad_norm": 0.7605211138725281, + "learning_rate": 5.190765917299263e-05, + "loss": 2.4764, + "step": 13230 + }, + { + "epoch": 1.0677911387297232, + "grad_norm": 0.7040959596633911, + "learning_rate": 5.1893818410652825e-05, + "loss": 2.4727, + "step": 13231 + }, + { + "epoch": 1.0678718424663063, + "grad_norm": 0.6718928813934326, + "learning_rate": 5.1879978847244785e-05, + "loss": 2.4308, + "step": 13232 + }, + { + "epoch": 1.0679525462028892, + "grad_norm": 0.6788188219070435, + "learning_rate": 5.1866140483113445e-05, + "loss": 2.4278, + "step": 13233 + }, + { + "epoch": 1.0680332499394722, + "grad_norm": 0.7310218811035156, + "learning_rate": 5.185230331860371e-05, + "loss": 2.4585, + "step": 13234 + }, + { + "epoch": 1.068113953676055, + "grad_norm": 0.8092277646064758, + "learning_rate": 5.183846735406044e-05, + "loss": 2.4128, + "step": 13235 + }, + { + "epoch": 1.0681946574126382, + "grad_norm": 0.6469862461090088, + "learning_rate": 5.182463258982846e-05, + "loss": 2.4315, + "step": 13236 + }, + { + "epoch": 1.0682753611492213, + "grad_norm": 0.7948115468025208, + "learning_rate": 5.181079902625261e-05, + "loss": 2.5127, + "step": 13237 + }, + { + "epoch": 1.0683560648858041, + "grad_norm": 0.6988852620124817, + "learning_rate": 5.179696666367757e-05, + "loss": 2.432, + "step": 13238 + }, + { + "epoch": 1.0684367686223872, + "grad_norm": 0.6914555430412292, + "learning_rate": 5.1783135502448124e-05, + "loss": 2.4748, + "step": 13239 + }, + { + "epoch": 1.0685174723589703, + "grad_norm": 0.7586313486099243, + "learning_rate": 5.176930554290902e-05, + "loss": 2.4522, + "step": 13240 + }, + { + "epoch": 1.0685981760955532, + "grad_norm": 0.6763948798179626, + "learning_rate": 5.175547678540487e-05, + "loss": 2.4477, + "step": 13241 + }, + { + "epoch": 1.0686788798321363, + "grad_norm": 0.7625983357429504, + "learning_rate": 5.1741649230280334e-05, + "loss": 2.4725, + "step": 13242 + }, + { + "epoch": 1.0687595835687191, + "grad_norm": 0.6574710011482239, + "learning_rate": 5.172782287788005e-05, + "loss": 2.4212, + "step": 13243 + }, + { + "epoch": 1.0688402873053022, + "grad_norm": 0.770062267780304, + "learning_rate": 5.1713997728548615e-05, + "loss": 2.5065, + "step": 13244 + }, + { + "epoch": 1.0689209910418853, + "grad_norm": 0.7719037532806396, + "learning_rate": 5.170017378263057e-05, + "loss": 2.5082, + "step": 13245 + }, + { + "epoch": 1.0690016947784682, + "grad_norm": 0.7106119394302368, + "learning_rate": 5.168635104047046e-05, + "loss": 2.4922, + "step": 13246 + }, + { + "epoch": 1.0690823985150513, + "grad_norm": 0.711815595626831, + "learning_rate": 5.167252950241281e-05, + "loss": 2.498, + "step": 13247 + }, + { + "epoch": 1.0691631022516344, + "grad_norm": 0.6926038265228271, + "learning_rate": 5.165870916880201e-05, + "loss": 2.4464, + "step": 13248 + }, + { + "epoch": 1.0692438059882172, + "grad_norm": 0.6959360241889954, + "learning_rate": 5.164489003998254e-05, + "loss": 2.4668, + "step": 13249 + }, + { + "epoch": 1.0693245097248003, + "grad_norm": 0.7165184617042542, + "learning_rate": 5.1631072116298875e-05, + "loss": 2.4198, + "step": 13250 + }, + { + "epoch": 1.0694052134613832, + "grad_norm": 0.7133236527442932, + "learning_rate": 5.161725539809527e-05, + "loss": 2.4691, + "step": 13251 + }, + { + "epoch": 1.0694859171979663, + "grad_norm": 0.7057758569717407, + "learning_rate": 5.160343988571613e-05, + "loss": 2.466, + "step": 13252 + }, + { + "epoch": 1.0695666209345494, + "grad_norm": 0.6808326244354248, + "learning_rate": 5.158962557950583e-05, + "loss": 2.4248, + "step": 13253 + }, + { + "epoch": 1.0696473246711322, + "grad_norm": 0.7166025638580322, + "learning_rate": 5.1575812479808563e-05, + "loss": 2.4753, + "step": 13254 + }, + { + "epoch": 1.0697280284077153, + "grad_norm": 0.7395358085632324, + "learning_rate": 5.156200058696863e-05, + "loss": 2.485, + "step": 13255 + }, + { + "epoch": 1.0698087321442982, + "grad_norm": 0.681106686592102, + "learning_rate": 5.154818990133026e-05, + "loss": 2.5077, + "step": 13256 + }, + { + "epoch": 1.0698894358808813, + "grad_norm": 0.7517002820968628, + "learning_rate": 5.153438042323766e-05, + "loss": 2.5093, + "step": 13257 + }, + { + "epoch": 1.0699701396174643, + "grad_norm": 0.6516926288604736, + "learning_rate": 5.152057215303499e-05, + "loss": 2.4416, + "step": 13258 + }, + { + "epoch": 1.0700508433540472, + "grad_norm": 0.6930893063545227, + "learning_rate": 5.150676509106638e-05, + "loss": 2.506, + "step": 13259 + }, + { + "epoch": 1.0701315470906303, + "grad_norm": 0.7737041115760803, + "learning_rate": 5.1492959237675986e-05, + "loss": 2.4355, + "step": 13260 + }, + { + "epoch": 1.0702122508272134, + "grad_norm": 0.7274872660636902, + "learning_rate": 5.14791545932078e-05, + "loss": 2.5552, + "step": 13261 + }, + { + "epoch": 1.0702929545637963, + "grad_norm": 0.7112408876419067, + "learning_rate": 5.146535115800593e-05, + "loss": 2.4041, + "step": 13262 + }, + { + "epoch": 1.0703736583003793, + "grad_norm": 0.6822024583816528, + "learning_rate": 5.1451548932414415e-05, + "loss": 2.4346, + "step": 13263 + }, + { + "epoch": 1.0704543620369624, + "grad_norm": 0.6590598225593567, + "learning_rate": 5.1437747916777165e-05, + "loss": 2.3946, + "step": 13264 + }, + { + "epoch": 1.0705350657735453, + "grad_norm": 0.643014132976532, + "learning_rate": 5.142394811143818e-05, + "loss": 2.4455, + "step": 13265 + }, + { + "epoch": 1.0706157695101284, + "grad_norm": 0.6480194926261902, + "learning_rate": 5.141014951674139e-05, + "loss": 2.4304, + "step": 13266 + }, + { + "epoch": 1.0706964732467112, + "grad_norm": 0.6933526992797852, + "learning_rate": 5.139635213303069e-05, + "loss": 2.4627, + "step": 13267 + }, + { + "epoch": 1.0707771769832943, + "grad_norm": 0.6832638382911682, + "learning_rate": 5.138255596064995e-05, + "loss": 2.4645, + "step": 13268 + }, + { + "epoch": 1.0708578807198774, + "grad_norm": 0.6579757928848267, + "learning_rate": 5.1368760999943034e-05, + "loss": 2.3928, + "step": 13269 + }, + { + "epoch": 1.0709385844564603, + "grad_norm": 0.6658132672309875, + "learning_rate": 5.1354967251253684e-05, + "loss": 2.4732, + "step": 13270 + }, + { + "epoch": 1.0710192881930434, + "grad_norm": 0.7610828876495361, + "learning_rate": 5.13411747149257e-05, + "loss": 2.4781, + "step": 13271 + }, + { + "epoch": 1.0710999919296262, + "grad_norm": 0.682858943939209, + "learning_rate": 5.1327383391302895e-05, + "loss": 2.4545, + "step": 13272 + }, + { + "epoch": 1.0711806956662093, + "grad_norm": 0.7461360692977905, + "learning_rate": 5.131359328072887e-05, + "loss": 2.4647, + "step": 13273 + }, + { + "epoch": 1.0712613994027924, + "grad_norm": 0.6767961382865906, + "learning_rate": 5.129980438354738e-05, + "loss": 2.4562, + "step": 13274 + }, + { + "epoch": 1.0713421031393753, + "grad_norm": 0.6768184304237366, + "learning_rate": 5.1286016700102066e-05, + "loss": 2.4662, + "step": 13275 + }, + { + "epoch": 1.0714228068759584, + "grad_norm": 0.7022743225097656, + "learning_rate": 5.1272230230736554e-05, + "loss": 2.4321, + "step": 13276 + }, + { + "epoch": 1.0715035106125415, + "grad_norm": 0.725488007068634, + "learning_rate": 5.125844497579444e-05, + "loss": 2.457, + "step": 13277 + }, + { + "epoch": 1.0715842143491243, + "grad_norm": 0.7542931437492371, + "learning_rate": 5.124466093561928e-05, + "loss": 2.4302, + "step": 13278 + }, + { + "epoch": 1.0716649180857074, + "grad_norm": 0.6598316431045532, + "learning_rate": 5.123087811055467e-05, + "loss": 2.4552, + "step": 13279 + }, + { + "epoch": 1.0717456218222903, + "grad_norm": 0.7533490061759949, + "learning_rate": 5.1217096500944017e-05, + "loss": 2.4778, + "step": 13280 + }, + { + "epoch": 1.0718263255588734, + "grad_norm": 0.6890795826911926, + "learning_rate": 5.1203316107130825e-05, + "loss": 2.4349, + "step": 13281 + }, + { + "epoch": 1.0719070292954564, + "grad_norm": 0.7004082202911377, + "learning_rate": 5.118953692945862e-05, + "loss": 2.4645, + "step": 13282 + }, + { + "epoch": 1.0719877330320393, + "grad_norm": 0.7409259676933289, + "learning_rate": 5.117575896827068e-05, + "loss": 2.4734, + "step": 13283 + }, + { + "epoch": 1.0720684367686224, + "grad_norm": 0.7035481929779053, + "learning_rate": 5.116198222391046e-05, + "loss": 2.5027, + "step": 13284 + }, + { + "epoch": 1.0721491405052055, + "grad_norm": 0.7146698236465454, + "learning_rate": 5.114820669672132e-05, + "loss": 2.4623, + "step": 13285 + }, + { + "epoch": 1.0722298442417884, + "grad_norm": 0.7813882231712341, + "learning_rate": 5.113443238704656e-05, + "loss": 2.4644, + "step": 13286 + }, + { + "epoch": 1.0723105479783714, + "grad_norm": 0.6592430472373962, + "learning_rate": 5.1120659295229486e-05, + "loss": 2.4682, + "step": 13287 + }, + { + "epoch": 1.0723912517149543, + "grad_norm": 0.7047967910766602, + "learning_rate": 5.1106887421613395e-05, + "loss": 2.4368, + "step": 13288 + }, + { + "epoch": 1.0724719554515374, + "grad_norm": 0.700977087020874, + "learning_rate": 5.109311676654143e-05, + "loss": 2.4471, + "step": 13289 + }, + { + "epoch": 1.0725526591881205, + "grad_norm": 0.6821093559265137, + "learning_rate": 5.107934733035684e-05, + "loss": 2.433, + "step": 13290 + }, + { + "epoch": 1.0726333629247033, + "grad_norm": 0.6579930186271667, + "learning_rate": 5.1065579113402794e-05, + "loss": 2.4527, + "step": 13291 + }, + { + "epoch": 1.0727140666612864, + "grad_norm": 0.658514678478241, + "learning_rate": 5.105181211602248e-05, + "loss": 2.4443, + "step": 13292 + }, + { + "epoch": 1.0727947703978695, + "grad_norm": 0.6963977217674255, + "learning_rate": 5.103804633855891e-05, + "loss": 2.4699, + "step": 13293 + }, + { + "epoch": 1.0728754741344524, + "grad_norm": 0.6670787334442139, + "learning_rate": 5.102428178135522e-05, + "loss": 2.4672, + "step": 13294 + }, + { + "epoch": 1.0729561778710355, + "grad_norm": 0.6959822773933411, + "learning_rate": 5.1010518444754454e-05, + "loss": 2.4338, + "step": 13295 + }, + { + "epoch": 1.0730368816076183, + "grad_norm": 0.6534817218780518, + "learning_rate": 5.0996756329099614e-05, + "loss": 2.4491, + "step": 13296 + }, + { + "epoch": 1.0731175853442014, + "grad_norm": 0.7265146970748901, + "learning_rate": 5.098299543473371e-05, + "loss": 2.4718, + "step": 13297 + }, + { + "epoch": 1.0731982890807845, + "grad_norm": 0.6554745435714722, + "learning_rate": 5.0969235761999746e-05, + "loss": 2.4286, + "step": 13298 + }, + { + "epoch": 1.0732789928173674, + "grad_norm": 0.7003172039985657, + "learning_rate": 5.095547731124053e-05, + "loss": 2.4182, + "step": 13299 + }, + { + "epoch": 1.0733596965539505, + "grad_norm": 0.6700341105461121, + "learning_rate": 5.094172008279904e-05, + "loss": 2.428, + "step": 13300 + }, + { + "epoch": 1.0734404002905333, + "grad_norm": 0.7290289402008057, + "learning_rate": 5.0927964077018164e-05, + "loss": 2.4324, + "step": 13301 + }, + { + "epoch": 1.0735211040271164, + "grad_norm": 0.6999204158782959, + "learning_rate": 5.0914209294240644e-05, + "loss": 2.5386, + "step": 13302 + }, + { + "epoch": 1.0736018077636995, + "grad_norm": 0.7008000612258911, + "learning_rate": 5.090045573480935e-05, + "loss": 2.5295, + "step": 13303 + }, + { + "epoch": 1.0736825115002824, + "grad_norm": 0.7023071646690369, + "learning_rate": 5.088670339906705e-05, + "loss": 2.4418, + "step": 13304 + }, + { + "epoch": 1.0737632152368655, + "grad_norm": 0.627174437046051, + "learning_rate": 5.0872952287356525e-05, + "loss": 2.3782, + "step": 13305 + }, + { + "epoch": 1.0738439189734486, + "grad_norm": 0.6992766857147217, + "learning_rate": 5.0859202400020364e-05, + "loss": 2.4698, + "step": 13306 + }, + { + "epoch": 1.0739246227100314, + "grad_norm": 0.7189817428588867, + "learning_rate": 5.084545373740138e-05, + "loss": 2.5248, + "step": 13307 + }, + { + "epoch": 1.0740053264466145, + "grad_norm": 0.6849164962768555, + "learning_rate": 5.0831706299842216e-05, + "loss": 2.4084, + "step": 13308 + }, + { + "epoch": 1.0740860301831976, + "grad_norm": 0.6985825300216675, + "learning_rate": 5.0817960087685424e-05, + "loss": 2.4893, + "step": 13309 + }, + { + "epoch": 1.0741667339197805, + "grad_norm": 0.6519783139228821, + "learning_rate": 5.080421510127362e-05, + "loss": 2.5144, + "step": 13310 + }, + { + "epoch": 1.0742474376563635, + "grad_norm": 0.6605731248855591, + "learning_rate": 5.079047134094941e-05, + "loss": 2.4487, + "step": 13311 + }, + { + "epoch": 1.0743281413929464, + "grad_norm": 0.7236705422401428, + "learning_rate": 5.077672880705526e-05, + "loss": 2.4578, + "step": 13312 + }, + { + "epoch": 1.0744088451295295, + "grad_norm": 0.7126381397247314, + "learning_rate": 5.07629874999337e-05, + "loss": 2.4528, + "step": 13313 + }, + { + "epoch": 1.0744895488661126, + "grad_norm": 0.7247878313064575, + "learning_rate": 5.0749247419927236e-05, + "loss": 2.563, + "step": 13314 + }, + { + "epoch": 1.0745702526026955, + "grad_norm": 0.728349506855011, + "learning_rate": 5.0735508567378234e-05, + "loss": 2.4229, + "step": 13315 + }, + { + "epoch": 1.0746509563392785, + "grad_norm": 0.6593719124794006, + "learning_rate": 5.072177094262913e-05, + "loss": 2.4853, + "step": 13316 + }, + { + "epoch": 1.0747316600758614, + "grad_norm": 0.6519735455513, + "learning_rate": 5.070803454602231e-05, + "loss": 2.4507, + "step": 13317 + }, + { + "epoch": 1.0748123638124445, + "grad_norm": 0.6660017371177673, + "learning_rate": 5.0694299377900115e-05, + "loss": 2.4286, + "step": 13318 + }, + { + "epoch": 1.0748930675490276, + "grad_norm": 0.7506695985794067, + "learning_rate": 5.0680565438604876e-05, + "loss": 2.4841, + "step": 13319 + }, + { + "epoch": 1.0749737712856104, + "grad_norm": 0.6855955719947815, + "learning_rate": 5.0666832728478863e-05, + "loss": 2.3817, + "step": 13320 + }, + { + "epoch": 1.0750544750221935, + "grad_norm": 0.7151634693145752, + "learning_rate": 5.065310124786438e-05, + "loss": 2.3984, + "step": 13321 + }, + { + "epoch": 1.0751351787587766, + "grad_norm": 0.6551649570465088, + "learning_rate": 5.063937099710356e-05, + "loss": 2.4574, + "step": 13322 + }, + { + "epoch": 1.0752158824953595, + "grad_norm": 0.7443479895591736, + "learning_rate": 5.062564197653865e-05, + "loss": 2.52, + "step": 13323 + }, + { + "epoch": 1.0752965862319426, + "grad_norm": 0.7554972767829895, + "learning_rate": 5.061191418651186e-05, + "loss": 2.483, + "step": 13324 + }, + { + "epoch": 1.0753772899685254, + "grad_norm": 0.7661007642745972, + "learning_rate": 5.059818762736521e-05, + "loss": 2.566, + "step": 13325 + }, + { + "epoch": 1.0754579937051085, + "grad_norm": 0.7416480183601379, + "learning_rate": 5.058446229944087e-05, + "loss": 2.465, + "step": 13326 + }, + { + "epoch": 1.0755386974416916, + "grad_norm": 0.6997848749160767, + "learning_rate": 5.057073820308089e-05, + "loss": 2.4936, + "step": 13327 + }, + { + "epoch": 1.0756194011782745, + "grad_norm": 0.7570235133171082, + "learning_rate": 5.0557015338627345e-05, + "loss": 2.519, + "step": 13328 + }, + { + "epoch": 1.0757001049148576, + "grad_norm": 0.7910803556442261, + "learning_rate": 5.0543293706422214e-05, + "loss": 2.4932, + "step": 13329 + }, + { + "epoch": 1.0757808086514407, + "grad_norm": 0.7068312168121338, + "learning_rate": 5.052957330680752e-05, + "loss": 2.4489, + "step": 13330 + }, + { + "epoch": 1.0758615123880235, + "grad_norm": 0.7818215489387512, + "learning_rate": 5.051585414012514e-05, + "loss": 2.4467, + "step": 13331 + }, + { + "epoch": 1.0759422161246066, + "grad_norm": 0.7359446287155151, + "learning_rate": 5.0502136206717046e-05, + "loss": 2.4348, + "step": 13332 + }, + { + "epoch": 1.0760229198611895, + "grad_norm": 0.694726824760437, + "learning_rate": 5.0488419506925124e-05, + "loss": 2.4554, + "step": 13333 + }, + { + "epoch": 1.0761036235977726, + "grad_norm": 0.6776530742645264, + "learning_rate": 5.047470404109118e-05, + "loss": 2.4206, + "step": 13334 + }, + { + "epoch": 1.0761843273343557, + "grad_norm": 0.6977556943893433, + "learning_rate": 5.0460989809557066e-05, + "loss": 2.4748, + "step": 13335 + }, + { + "epoch": 1.0762650310709385, + "grad_norm": 0.6888061761856079, + "learning_rate": 5.044727681266459e-05, + "loss": 2.4129, + "step": 13336 + }, + { + "epoch": 1.0763457348075216, + "grad_norm": 0.744110643863678, + "learning_rate": 5.043356505075549e-05, + "loss": 2.4815, + "step": 13337 + }, + { + "epoch": 1.0764264385441047, + "grad_norm": 0.6726455688476562, + "learning_rate": 5.041985452417154e-05, + "loss": 2.4299, + "step": 13338 + }, + { + "epoch": 1.0765071422806876, + "grad_norm": 0.6755545735359192, + "learning_rate": 5.040614523325441e-05, + "loss": 2.4188, + "step": 13339 + }, + { + "epoch": 1.0765878460172706, + "grad_norm": 0.7152739763259888, + "learning_rate": 5.039243717834582e-05, + "loss": 2.4366, + "step": 13340 + }, + { + "epoch": 1.0766685497538535, + "grad_norm": 0.7253085374832153, + "learning_rate": 5.037873035978733e-05, + "loss": 2.4681, + "step": 13341 + }, + { + "epoch": 1.0767492534904366, + "grad_norm": 0.6780266165733337, + "learning_rate": 5.03650247779206e-05, + "loss": 2.5163, + "step": 13342 + }, + { + "epoch": 1.0768299572270197, + "grad_norm": 0.7440996170043945, + "learning_rate": 5.035132043308722e-05, + "loss": 2.4831, + "step": 13343 + }, + { + "epoch": 1.0769106609636026, + "grad_norm": 0.6619833111763, + "learning_rate": 5.0337617325628695e-05, + "loss": 2.433, + "step": 13344 + }, + { + "epoch": 1.0769913647001856, + "grad_norm": 0.7518059015274048, + "learning_rate": 5.032391545588656e-05, + "loss": 2.4241, + "step": 13345 + }, + { + "epoch": 1.0770720684367687, + "grad_norm": 0.6592784523963928, + "learning_rate": 5.031021482420231e-05, + "loss": 2.4902, + "step": 13346 + }, + { + "epoch": 1.0771527721733516, + "grad_norm": 0.7192299365997314, + "learning_rate": 5.029651543091739e-05, + "loss": 2.4445, + "step": 13347 + }, + { + "epoch": 1.0772334759099347, + "grad_norm": 0.7376793622970581, + "learning_rate": 5.028281727637323e-05, + "loss": 2.4532, + "step": 13348 + }, + { + "epoch": 1.0773141796465175, + "grad_norm": 0.7344524264335632, + "learning_rate": 5.026912036091127e-05, + "loss": 2.4193, + "step": 13349 + }, + { + "epoch": 1.0773948833831006, + "grad_norm": 0.7343986630439758, + "learning_rate": 5.0255424684872785e-05, + "loss": 2.4912, + "step": 13350 + }, + { + "epoch": 1.0774755871196837, + "grad_norm": 0.7103631496429443, + "learning_rate": 5.024173024859916e-05, + "loss": 2.4611, + "step": 13351 + }, + { + "epoch": 1.0775562908562666, + "grad_norm": 0.7554094791412354, + "learning_rate": 5.022803705243169e-05, + "loss": 2.4875, + "step": 13352 + }, + { + "epoch": 1.0776369945928497, + "grad_norm": 0.6754978895187378, + "learning_rate": 5.0214345096711655e-05, + "loss": 2.4585, + "step": 13353 + }, + { + "epoch": 1.0777176983294328, + "grad_norm": 0.690747857093811, + "learning_rate": 5.020065438178026e-05, + "loss": 2.4751, + "step": 13354 + }, + { + "epoch": 1.0777984020660156, + "grad_norm": 0.7012028694152832, + "learning_rate": 5.018696490797874e-05, + "loss": 2.4443, + "step": 13355 + }, + { + "epoch": 1.0778791058025987, + "grad_norm": 0.6788459420204163, + "learning_rate": 5.017327667564831e-05, + "loss": 2.4135, + "step": 13356 + }, + { + "epoch": 1.0779598095391816, + "grad_norm": 0.6662794351577759, + "learning_rate": 5.015958968512997e-05, + "loss": 2.3801, + "step": 13357 + }, + { + "epoch": 1.0780405132757647, + "grad_norm": 0.7873939275741577, + "learning_rate": 5.0145903936764994e-05, + "loss": 2.4629, + "step": 13358 + }, + { + "epoch": 1.0781212170123478, + "grad_norm": 0.7484980225563049, + "learning_rate": 5.0132219430894455e-05, + "loss": 2.4307, + "step": 13359 + }, + { + "epoch": 1.0782019207489306, + "grad_norm": 0.7559076547622681, + "learning_rate": 5.011853616785932e-05, + "loss": 2.4846, + "step": 13360 + }, + { + "epoch": 1.0782826244855137, + "grad_norm": 0.6822710633277893, + "learning_rate": 5.010485414800066e-05, + "loss": 2.4448, + "step": 13361 + }, + { + "epoch": 1.0783633282220966, + "grad_norm": 0.6665955185890198, + "learning_rate": 5.0091173371659496e-05, + "loss": 2.4562, + "step": 13362 + }, + { + "epoch": 1.0784440319586797, + "grad_norm": 0.6645659804344177, + "learning_rate": 5.0077493839176714e-05, + "loss": 2.4545, + "step": 13363 + }, + { + "epoch": 1.0785247356952627, + "grad_norm": 0.6648181080818176, + "learning_rate": 5.0063815550893276e-05, + "loss": 2.4565, + "step": 13364 + }, + { + "epoch": 1.0786054394318456, + "grad_norm": 0.6679299473762512, + "learning_rate": 5.005013850715014e-05, + "loss": 2.4301, + "step": 13365 + }, + { + "epoch": 1.0786861431684287, + "grad_norm": 0.7116484642028809, + "learning_rate": 5.003646270828808e-05, + "loss": 2.4174, + "step": 13366 + }, + { + "epoch": 1.0787668469050118, + "grad_norm": 0.6850735545158386, + "learning_rate": 5.002278815464798e-05, + "loss": 2.4386, + "step": 13367 + }, + { + "epoch": 1.0788475506415947, + "grad_norm": 0.6613513827323914, + "learning_rate": 5.00091148465706e-05, + "loss": 2.4038, + "step": 13368 + }, + { + "epoch": 1.0789282543781777, + "grad_norm": 0.659635603427887, + "learning_rate": 4.9995442784396827e-05, + "loss": 2.4346, + "step": 13369 + }, + { + "epoch": 1.0790089581147608, + "grad_norm": 0.6775132417678833, + "learning_rate": 4.998177196846731e-05, + "loss": 2.4853, + "step": 13370 + }, + { + "epoch": 1.0790896618513437, + "grad_norm": 0.719860851764679, + "learning_rate": 4.996810239912277e-05, + "loss": 2.4018, + "step": 13371 + }, + { + "epoch": 1.0791703655879268, + "grad_norm": 0.7316389083862305, + "learning_rate": 4.9954434076703946e-05, + "loss": 2.424, + "step": 13372 + }, + { + "epoch": 1.0792510693245096, + "grad_norm": 0.6779622435569763, + "learning_rate": 4.99407670015514e-05, + "loss": 2.4743, + "step": 13373 + }, + { + "epoch": 1.0793317730610927, + "grad_norm": 0.7357139587402344, + "learning_rate": 4.992710117400581e-05, + "loss": 2.4385, + "step": 13374 + }, + { + "epoch": 1.0794124767976758, + "grad_norm": 0.671441912651062, + "learning_rate": 4.9913436594407784e-05, + "loss": 2.3988, + "step": 13375 + }, + { + "epoch": 1.0794931805342587, + "grad_norm": 0.7205149531364441, + "learning_rate": 4.9899773263097804e-05, + "loss": 2.4594, + "step": 13376 + }, + { + "epoch": 1.0795738842708418, + "grad_norm": 0.702910840511322, + "learning_rate": 4.988611118041644e-05, + "loss": 2.4831, + "step": 13377 + }, + { + "epoch": 1.0796545880074246, + "grad_norm": 0.6977962255477905, + "learning_rate": 4.987245034670418e-05, + "loss": 2.422, + "step": 13378 + }, + { + "epoch": 1.0797352917440077, + "grad_norm": 0.7106757760047913, + "learning_rate": 4.985879076230149e-05, + "loss": 2.4073, + "step": 13379 + }, + { + "epoch": 1.0798159954805908, + "grad_norm": 0.7046806812286377, + "learning_rate": 4.9845132427548814e-05, + "loss": 2.4065, + "step": 13380 + }, + { + "epoch": 1.0798966992171737, + "grad_norm": 0.7476605772972107, + "learning_rate": 4.9831475342786574e-05, + "loss": 2.4886, + "step": 13381 + }, + { + "epoch": 1.0799774029537568, + "grad_norm": 0.696977972984314, + "learning_rate": 4.981781950835508e-05, + "loss": 2.4732, + "step": 13382 + }, + { + "epoch": 1.0800581066903399, + "grad_norm": 0.6596804857254028, + "learning_rate": 4.98041649245947e-05, + "loss": 2.4497, + "step": 13383 + }, + { + "epoch": 1.0801388104269227, + "grad_norm": 0.7216050028800964, + "learning_rate": 4.979051159184573e-05, + "loss": 2.4745, + "step": 13384 + }, + { + "epoch": 1.0802195141635058, + "grad_norm": 0.6636630296707153, + "learning_rate": 4.977685951044852e-05, + "loss": 2.4904, + "step": 13385 + }, + { + "epoch": 1.0803002179000887, + "grad_norm": 0.7030208110809326, + "learning_rate": 4.97632086807432e-05, + "loss": 2.4302, + "step": 13386 + }, + { + "epoch": 1.0803809216366718, + "grad_norm": 0.7158327102661133, + "learning_rate": 4.974955910307004e-05, + "loss": 2.4735, + "step": 13387 + }, + { + "epoch": 1.0804616253732549, + "grad_norm": 0.6736464500427246, + "learning_rate": 4.9735910777769234e-05, + "loss": 2.4334, + "step": 13388 + }, + { + "epoch": 1.0805423291098377, + "grad_norm": 0.6913403272628784, + "learning_rate": 4.972226370518092e-05, + "loss": 2.468, + "step": 13389 + }, + { + "epoch": 1.0806230328464208, + "grad_norm": 0.7006524205207825, + "learning_rate": 4.970861788564522e-05, + "loss": 2.4598, + "step": 13390 + }, + { + "epoch": 1.080703736583004, + "grad_norm": 0.6892947554588318, + "learning_rate": 4.969497331950227e-05, + "loss": 2.4297, + "step": 13391 + }, + { + "epoch": 1.0807844403195868, + "grad_norm": 0.7270283699035645, + "learning_rate": 4.968133000709203e-05, + "loss": 2.5344, + "step": 13392 + }, + { + "epoch": 1.0808651440561698, + "grad_norm": 0.735342264175415, + "learning_rate": 4.9667687948754594e-05, + "loss": 2.4431, + "step": 13393 + }, + { + "epoch": 1.0809458477927527, + "grad_norm": 0.6869279146194458, + "learning_rate": 4.9654047144829974e-05, + "loss": 2.5581, + "step": 13394 + }, + { + "epoch": 1.0810265515293358, + "grad_norm": 0.6975715160369873, + "learning_rate": 4.964040759565808e-05, + "loss": 2.4328, + "step": 13395 + }, + { + "epoch": 1.0811072552659189, + "grad_norm": 0.7312532067298889, + "learning_rate": 4.9626769301578856e-05, + "loss": 2.4686, + "step": 13396 + }, + { + "epoch": 1.0811879590025018, + "grad_norm": 0.7824496626853943, + "learning_rate": 4.9613132262932215e-05, + "loss": 2.4564, + "step": 13397 + }, + { + "epoch": 1.0812686627390848, + "grad_norm": 0.7337941527366638, + "learning_rate": 4.959949648005805e-05, + "loss": 2.4752, + "step": 13398 + }, + { + "epoch": 1.081349366475668, + "grad_norm": 0.7450836300849915, + "learning_rate": 4.958586195329617e-05, + "loss": 2.4457, + "step": 13399 + }, + { + "epoch": 1.0814300702122508, + "grad_norm": 0.6990504860877991, + "learning_rate": 4.9572228682986385e-05, + "loss": 2.4172, + "step": 13400 + }, + { + "epoch": 1.0815107739488339, + "grad_norm": 0.7293999791145325, + "learning_rate": 4.955859666946853e-05, + "loss": 2.5295, + "step": 13401 + }, + { + "epoch": 1.0815914776854167, + "grad_norm": 0.6872537136077881, + "learning_rate": 4.9544965913082264e-05, + "loss": 2.5029, + "step": 13402 + }, + { + "epoch": 1.0816721814219998, + "grad_norm": 0.6821706891059875, + "learning_rate": 4.953133641416733e-05, + "loss": 2.4738, + "step": 13403 + }, + { + "epoch": 1.081752885158583, + "grad_norm": 0.6811527609825134, + "learning_rate": 4.951770817306346e-05, + "loss": 2.4323, + "step": 13404 + }, + { + "epoch": 1.0818335888951658, + "grad_norm": 0.7138943076133728, + "learning_rate": 4.950408119011023e-05, + "loss": 2.5155, + "step": 13405 + }, + { + "epoch": 1.0819142926317489, + "grad_norm": 0.6777952909469604, + "learning_rate": 4.949045546564729e-05, + "loss": 2.4414, + "step": 13406 + }, + { + "epoch": 1.0819949963683317, + "grad_norm": 0.7065548896789551, + "learning_rate": 4.9476831000014276e-05, + "loss": 2.4913, + "step": 13407 + }, + { + "epoch": 1.0820757001049148, + "grad_norm": 0.7286355495452881, + "learning_rate": 4.9463207793550626e-05, + "loss": 2.4171, + "step": 13408 + }, + { + "epoch": 1.082156403841498, + "grad_norm": 0.6703049540519714, + "learning_rate": 4.944958584659597e-05, + "loss": 2.4387, + "step": 13409 + }, + { + "epoch": 1.0822371075780808, + "grad_norm": 0.6572019457817078, + "learning_rate": 4.943596515948983e-05, + "loss": 2.4324, + "step": 13410 + }, + { + "epoch": 1.0823178113146639, + "grad_norm": 0.6722360849380493, + "learning_rate": 4.942234573257156e-05, + "loss": 2.4802, + "step": 13411 + }, + { + "epoch": 1.082398515051247, + "grad_norm": 0.7122535109519958, + "learning_rate": 4.9408727566180655e-05, + "loss": 2.4531, + "step": 13412 + }, + { + "epoch": 1.0824792187878298, + "grad_norm": 0.6769903898239136, + "learning_rate": 4.9395110660656505e-05, + "loss": 2.4549, + "step": 13413 + }, + { + "epoch": 1.082559922524413, + "grad_norm": 0.766251266002655, + "learning_rate": 4.938149501633852e-05, + "loss": 2.4416, + "step": 13414 + }, + { + "epoch": 1.082640626260996, + "grad_norm": 0.6677987575531006, + "learning_rate": 4.936788063356596e-05, + "loss": 2.4578, + "step": 13415 + }, + { + "epoch": 1.0827213299975789, + "grad_norm": 0.7461380362510681, + "learning_rate": 4.9354267512678156e-05, + "loss": 2.4776, + "step": 13416 + }, + { + "epoch": 1.082802033734162, + "grad_norm": 0.6681976914405823, + "learning_rate": 4.934065565401443e-05, + "loss": 2.5044, + "step": 13417 + }, + { + "epoch": 1.0828827374707448, + "grad_norm": 0.6809324622154236, + "learning_rate": 4.932704505791397e-05, + "loss": 2.4651, + "step": 13418 + }, + { + "epoch": 1.082963441207328, + "grad_norm": 0.6926563382148743, + "learning_rate": 4.931343572471596e-05, + "loss": 2.4633, + "step": 13419 + }, + { + "epoch": 1.083044144943911, + "grad_norm": 0.6451820135116577, + "learning_rate": 4.929982765475971e-05, + "loss": 2.474, + "step": 13420 + }, + { + "epoch": 1.0831248486804939, + "grad_norm": 0.7088493704795837, + "learning_rate": 4.9286220848384247e-05, + "loss": 2.462, + "step": 13421 + }, + { + "epoch": 1.083205552417077, + "grad_norm": 0.7819172739982605, + "learning_rate": 4.9272615305928725e-05, + "loss": 2.4534, + "step": 13422 + }, + { + "epoch": 1.0832862561536598, + "grad_norm": 0.6579666137695312, + "learning_rate": 4.925901102773227e-05, + "loss": 2.4101, + "step": 13423 + }, + { + "epoch": 1.083366959890243, + "grad_norm": 0.6999555230140686, + "learning_rate": 4.924540801413385e-05, + "loss": 2.4534, + "step": 13424 + }, + { + "epoch": 1.083447663626826, + "grad_norm": 0.7034400105476379, + "learning_rate": 4.9231806265472555e-05, + "loss": 2.4741, + "step": 13425 + }, + { + "epoch": 1.0835283673634089, + "grad_norm": 0.6595034599304199, + "learning_rate": 4.921820578208739e-05, + "loss": 2.4011, + "step": 13426 + }, + { + "epoch": 1.083609071099992, + "grad_norm": 0.666419267654419, + "learning_rate": 4.920460656431723e-05, + "loss": 2.4399, + "step": 13427 + }, + { + "epoch": 1.083689774836575, + "grad_norm": 0.7058294415473938, + "learning_rate": 4.919100861250108e-05, + "loss": 2.434, + "step": 13428 + }, + { + "epoch": 1.083770478573158, + "grad_norm": 0.7045806050300598, + "learning_rate": 4.917741192697779e-05, + "loss": 2.4616, + "step": 13429 + }, + { + "epoch": 1.083851182309741, + "grad_norm": 0.6565639972686768, + "learning_rate": 4.916381650808626e-05, + "loss": 2.3864, + "step": 13430 + }, + { + "epoch": 1.0839318860463238, + "grad_norm": 0.6939674615859985, + "learning_rate": 4.9150222356165295e-05, + "loss": 2.4217, + "step": 13431 + }, + { + "epoch": 1.084012589782907, + "grad_norm": 0.7240599989891052, + "learning_rate": 4.913662947155373e-05, + "loss": 2.447, + "step": 13432 + }, + { + "epoch": 1.08409329351949, + "grad_norm": 0.7369012832641602, + "learning_rate": 4.9123037854590336e-05, + "loss": 2.4588, + "step": 13433 + }, + { + "epoch": 1.0841739972560729, + "grad_norm": 0.714269757270813, + "learning_rate": 4.9109447505613803e-05, + "loss": 2.4921, + "step": 13434 + }, + { + "epoch": 1.084254700992656, + "grad_norm": 0.7541659474372864, + "learning_rate": 4.909585842496287e-05, + "loss": 2.4191, + "step": 13435 + }, + { + "epoch": 1.084335404729239, + "grad_norm": 0.7245596051216125, + "learning_rate": 4.9082270612976243e-05, + "loss": 2.4904, + "step": 13436 + }, + { + "epoch": 1.084416108465822, + "grad_norm": 0.7301090359687805, + "learning_rate": 4.90686840699925e-05, + "loss": 2.4461, + "step": 13437 + }, + { + "epoch": 1.084496812202405, + "grad_norm": 0.7404102683067322, + "learning_rate": 4.905509879635028e-05, + "loss": 2.4826, + "step": 13438 + }, + { + "epoch": 1.0845775159389879, + "grad_norm": 0.7053710222244263, + "learning_rate": 4.9041514792388175e-05, + "loss": 2.4231, + "step": 13439 + }, + { + "epoch": 1.084658219675571, + "grad_norm": 0.6171362400054932, + "learning_rate": 4.9027932058444724e-05, + "loss": 2.4472, + "step": 13440 + }, + { + "epoch": 1.084738923412154, + "grad_norm": 0.7367038130760193, + "learning_rate": 4.901435059485845e-05, + "loss": 2.4847, + "step": 13441 + }, + { + "epoch": 1.084819627148737, + "grad_norm": 0.754828691482544, + "learning_rate": 4.900077040196788e-05, + "loss": 2.4731, + "step": 13442 + }, + { + "epoch": 1.08490033088532, + "grad_norm": 0.7380684018135071, + "learning_rate": 4.8987191480111386e-05, + "loss": 2.4227, + "step": 13443 + }, + { + "epoch": 1.084981034621903, + "grad_norm": 0.6711444854736328, + "learning_rate": 4.897361382962742e-05, + "loss": 2.4744, + "step": 13444 + }, + { + "epoch": 1.085061738358486, + "grad_norm": 0.7709227204322815, + "learning_rate": 4.896003745085438e-05, + "loss": 2.5422, + "step": 13445 + }, + { + "epoch": 1.085142442095069, + "grad_norm": 0.6778519153594971, + "learning_rate": 4.8946462344130675e-05, + "loss": 2.4757, + "step": 13446 + }, + { + "epoch": 1.085223145831652, + "grad_norm": 0.7390698194503784, + "learning_rate": 4.893288850979454e-05, + "loss": 2.4214, + "step": 13447 + }, + { + "epoch": 1.085303849568235, + "grad_norm": 0.6632684469223022, + "learning_rate": 4.891931594818432e-05, + "loss": 2.4689, + "step": 13448 + }, + { + "epoch": 1.085384553304818, + "grad_norm": 0.68693608045578, + "learning_rate": 4.890574465963827e-05, + "loss": 2.4788, + "step": 13449 + }, + { + "epoch": 1.085465257041401, + "grad_norm": 0.6910344362258911, + "learning_rate": 4.8892174644494625e-05, + "loss": 2.4611, + "step": 13450 + }, + { + "epoch": 1.085545960777984, + "grad_norm": 0.6935380101203918, + "learning_rate": 4.887860590309158e-05, + "loss": 2.4481, + "step": 13451 + }, + { + "epoch": 1.085626664514567, + "grad_norm": 0.7086954712867737, + "learning_rate": 4.886503843576735e-05, + "loss": 2.4583, + "step": 13452 + }, + { + "epoch": 1.08570736825115, + "grad_norm": 0.7447777986526489, + "learning_rate": 4.8851472242859994e-05, + "loss": 2.5035, + "step": 13453 + }, + { + "epoch": 1.085788071987733, + "grad_norm": 0.6896036267280579, + "learning_rate": 4.8837907324707656e-05, + "loss": 2.4622, + "step": 13454 + }, + { + "epoch": 1.085868775724316, + "grad_norm": 0.7261155247688293, + "learning_rate": 4.882434368164843e-05, + "loss": 2.4958, + "step": 13455 + }, + { + "epoch": 1.085949479460899, + "grad_norm": 0.6868197321891785, + "learning_rate": 4.881078131402031e-05, + "loss": 2.4952, + "step": 13456 + }, + { + "epoch": 1.0860301831974821, + "grad_norm": 0.6338867545127869, + "learning_rate": 4.879722022216132e-05, + "loss": 2.4553, + "step": 13457 + }, + { + "epoch": 1.086110886934065, + "grad_norm": 0.7214454412460327, + "learning_rate": 4.878366040640946e-05, + "loss": 2.4433, + "step": 13458 + }, + { + "epoch": 1.086191590670648, + "grad_norm": 0.6871301531791687, + "learning_rate": 4.877010186710266e-05, + "loss": 2.4118, + "step": 13459 + }, + { + "epoch": 1.0862722944072312, + "grad_norm": 0.6845650672912598, + "learning_rate": 4.875654460457883e-05, + "loss": 2.4684, + "step": 13460 + }, + { + "epoch": 1.086352998143814, + "grad_norm": 0.7027513980865479, + "learning_rate": 4.8742988619175865e-05, + "loss": 2.4569, + "step": 13461 + }, + { + "epoch": 1.0864337018803971, + "grad_norm": 0.6428621411323547, + "learning_rate": 4.8729433911231646e-05, + "loss": 2.4211, + "step": 13462 + }, + { + "epoch": 1.08651440561698, + "grad_norm": 0.6921488046646118, + "learning_rate": 4.8715880481083934e-05, + "loss": 2.4668, + "step": 13463 + }, + { + "epoch": 1.086595109353563, + "grad_norm": 0.7001025676727295, + "learning_rate": 4.870232832907051e-05, + "loss": 2.4685, + "step": 13464 + }, + { + "epoch": 1.0866758130901462, + "grad_norm": 0.7460644245147705, + "learning_rate": 4.868877745552922e-05, + "loss": 2.3922, + "step": 13465 + }, + { + "epoch": 1.086756516826729, + "grad_norm": 0.7418891191482544, + "learning_rate": 4.867522786079768e-05, + "loss": 2.3777, + "step": 13466 + }, + { + "epoch": 1.0868372205633121, + "grad_norm": 0.6430083513259888, + "learning_rate": 4.8661679545213625e-05, + "loss": 2.4385, + "step": 13467 + }, + { + "epoch": 1.086917924299895, + "grad_norm": 0.6963593363761902, + "learning_rate": 4.864813250911475e-05, + "loss": 2.4083, + "step": 13468 + }, + { + "epoch": 1.086998628036478, + "grad_norm": 0.6796097159385681, + "learning_rate": 4.8634586752838606e-05, + "loss": 2.4984, + "step": 13469 + }, + { + "epoch": 1.0870793317730612, + "grad_norm": 0.6845307946205139, + "learning_rate": 4.862104227672281e-05, + "loss": 2.4168, + "step": 13470 + }, + { + "epoch": 1.087160035509644, + "grad_norm": 0.705348014831543, + "learning_rate": 4.8607499081105e-05, + "loss": 2.4216, + "step": 13471 + }, + { + "epoch": 1.087240739246227, + "grad_norm": 0.6906474828720093, + "learning_rate": 4.8593957166322636e-05, + "loss": 2.4955, + "step": 13472 + }, + { + "epoch": 1.0873214429828102, + "grad_norm": 0.696489691734314, + "learning_rate": 4.858041653271323e-05, + "loss": 2.4186, + "step": 13473 + }, + { + "epoch": 1.087402146719393, + "grad_norm": 0.6997761726379395, + "learning_rate": 4.856687718061429e-05, + "loss": 2.441, + "step": 13474 + }, + { + "epoch": 1.0874828504559761, + "grad_norm": 0.6515649557113647, + "learning_rate": 4.8553339110363184e-05, + "loss": 2.3997, + "step": 13475 + }, + { + "epoch": 1.087563554192559, + "grad_norm": 0.6902725696563721, + "learning_rate": 4.853980232229734e-05, + "loss": 2.4765, + "step": 13476 + }, + { + "epoch": 1.087644257929142, + "grad_norm": 0.6832055449485779, + "learning_rate": 4.852626681675415e-05, + "loss": 2.411, + "step": 13477 + }, + { + "epoch": 1.0877249616657252, + "grad_norm": 0.668520987033844, + "learning_rate": 4.8512732594070984e-05, + "loss": 2.4742, + "step": 13478 + }, + { + "epoch": 1.087805665402308, + "grad_norm": 0.7019832134246826, + "learning_rate": 4.849919965458507e-05, + "loss": 2.4638, + "step": 13479 + }, + { + "epoch": 1.0878863691388911, + "grad_norm": 0.6986027359962463, + "learning_rate": 4.8485667998633724e-05, + "loss": 2.4866, + "step": 13480 + }, + { + "epoch": 1.0879670728754742, + "grad_norm": 0.659037709236145, + "learning_rate": 4.8472137626554195e-05, + "loss": 2.4821, + "step": 13481 + }, + { + "epoch": 1.088047776612057, + "grad_norm": 0.6506801247596741, + "learning_rate": 4.8458608538683694e-05, + "loss": 2.4686, + "step": 13482 + }, + { + "epoch": 1.0881284803486402, + "grad_norm": 0.7136878967285156, + "learning_rate": 4.844508073535939e-05, + "loss": 2.4523, + "step": 13483 + }, + { + "epoch": 1.088209184085223, + "grad_norm": 0.6663414239883423, + "learning_rate": 4.843155421691848e-05, + "loss": 2.4287, + "step": 13484 + }, + { + "epoch": 1.0882898878218061, + "grad_norm": 0.7192783355712891, + "learning_rate": 4.8418028983698006e-05, + "loss": 2.4433, + "step": 13485 + }, + { + "epoch": 1.0883705915583892, + "grad_norm": 0.6620980501174927, + "learning_rate": 4.8404505036035086e-05, + "loss": 2.4823, + "step": 13486 + }, + { + "epoch": 1.088451295294972, + "grad_norm": 0.6282123327255249, + "learning_rate": 4.83909823742668e-05, + "loss": 2.4641, + "step": 13487 + }, + { + "epoch": 1.0885319990315552, + "grad_norm": 0.6384354829788208, + "learning_rate": 4.837746099873012e-05, + "loss": 2.4234, + "step": 13488 + }, + { + "epoch": 1.0886127027681383, + "grad_norm": 0.6550076603889465, + "learning_rate": 4.836394090976204e-05, + "loss": 2.4743, + "step": 13489 + }, + { + "epoch": 1.0886934065047211, + "grad_norm": 0.6987888216972351, + "learning_rate": 4.8350422107699545e-05, + "loss": 2.4263, + "step": 13490 + }, + { + "epoch": 1.0887741102413042, + "grad_norm": 0.7012613415718079, + "learning_rate": 4.833690459287953e-05, + "loss": 2.4801, + "step": 13491 + }, + { + "epoch": 1.088854813977887, + "grad_norm": 0.6986923217773438, + "learning_rate": 4.832338836563891e-05, + "loss": 2.426, + "step": 13492 + }, + { + "epoch": 1.0889355177144702, + "grad_norm": 0.6936241984367371, + "learning_rate": 4.830987342631453e-05, + "loss": 2.4361, + "step": 13493 + }, + { + "epoch": 1.0890162214510533, + "grad_norm": 0.6612359881401062, + "learning_rate": 4.8296359775243275e-05, + "loss": 2.4385, + "step": 13494 + }, + { + "epoch": 1.0890969251876361, + "grad_norm": 0.6927692294120789, + "learning_rate": 4.828284741276183e-05, + "loss": 2.4692, + "step": 13495 + }, + { + "epoch": 1.0891776289242192, + "grad_norm": 0.6710225343704224, + "learning_rate": 4.8269336339207036e-05, + "loss": 2.4078, + "step": 13496 + }, + { + "epoch": 1.0892583326608023, + "grad_norm": 0.639076828956604, + "learning_rate": 4.825582655491564e-05, + "loss": 2.4368, + "step": 13497 + }, + { + "epoch": 1.0893390363973852, + "grad_norm": 0.7050483226776123, + "learning_rate": 4.824231806022426e-05, + "loss": 2.4308, + "step": 13498 + }, + { + "epoch": 1.0894197401339683, + "grad_norm": 0.7097769975662231, + "learning_rate": 4.822881085546962e-05, + "loss": 2.4378, + "step": 13499 + }, + { + "epoch": 1.0895004438705511, + "grad_norm": 0.6939458847045898, + "learning_rate": 4.821530494098834e-05, + "loss": 2.4678, + "step": 13500 + }, + { + "epoch": 1.0895811476071342, + "grad_norm": 0.6797441840171814, + "learning_rate": 4.8201800317117016e-05, + "loss": 2.4837, + "step": 13501 + }, + { + "epoch": 1.0896618513437173, + "grad_norm": 0.7451521158218384, + "learning_rate": 4.818829698419225e-05, + "loss": 2.4651, + "step": 13502 + }, + { + "epoch": 1.0897425550803002, + "grad_norm": 0.6749109625816345, + "learning_rate": 4.8174794942550585e-05, + "loss": 2.4569, + "step": 13503 + }, + { + "epoch": 1.0898232588168832, + "grad_norm": 0.6321636438369751, + "learning_rate": 4.8161294192528474e-05, + "loss": 2.4049, + "step": 13504 + }, + { + "epoch": 1.0899039625534663, + "grad_norm": 0.7002367377281189, + "learning_rate": 4.8147794734462415e-05, + "loss": 2.4489, + "step": 13505 + }, + { + "epoch": 1.0899846662900492, + "grad_norm": 0.758057713508606, + "learning_rate": 4.813429656868889e-05, + "loss": 2.436, + "step": 13506 + }, + { + "epoch": 1.0900653700266323, + "grad_norm": 0.6665529012680054, + "learning_rate": 4.812079969554424e-05, + "loss": 2.3805, + "step": 13507 + }, + { + "epoch": 1.0901460737632152, + "grad_norm": 0.6962547898292542, + "learning_rate": 4.810730411536487e-05, + "loss": 2.4203, + "step": 13508 + }, + { + "epoch": 1.0902267774997982, + "grad_norm": 0.6860647201538086, + "learning_rate": 4.809380982848712e-05, + "loss": 2.4482, + "step": 13509 + }, + { + "epoch": 1.0903074812363813, + "grad_norm": 0.7045090198516846, + "learning_rate": 4.808031683524733e-05, + "loss": 2.4155, + "step": 13510 + }, + { + "epoch": 1.0903881849729642, + "grad_norm": 0.6609304547309875, + "learning_rate": 4.806682513598176e-05, + "loss": 2.4295, + "step": 13511 + }, + { + "epoch": 1.0904688887095473, + "grad_norm": 0.7647323608398438, + "learning_rate": 4.8053334731026665e-05, + "loss": 2.4704, + "step": 13512 + }, + { + "epoch": 1.0905495924461301, + "grad_norm": 0.677449643611908, + "learning_rate": 4.803984562071829e-05, + "loss": 2.4501, + "step": 13513 + }, + { + "epoch": 1.0906302961827132, + "grad_norm": 0.645866334438324, + "learning_rate": 4.8026357805392754e-05, + "loss": 2.427, + "step": 13514 + }, + { + "epoch": 1.0907109999192963, + "grad_norm": 0.6968488097190857, + "learning_rate": 4.801287128538624e-05, + "loss": 2.3933, + "step": 13515 + }, + { + "epoch": 1.0907917036558792, + "grad_norm": 0.7137444615364075, + "learning_rate": 4.799938606103491e-05, + "loss": 2.4611, + "step": 13516 + }, + { + "epoch": 1.0908724073924623, + "grad_norm": 0.6860007047653198, + "learning_rate": 4.7985902132674765e-05, + "loss": 2.4252, + "step": 13517 + }, + { + "epoch": 1.0909531111290454, + "grad_norm": 0.726290762424469, + "learning_rate": 4.797241950064192e-05, + "loss": 2.44, + "step": 13518 + }, + { + "epoch": 1.0910338148656282, + "grad_norm": 0.6833362579345703, + "learning_rate": 4.795893816527241e-05, + "loss": 2.4199, + "step": 13519 + }, + { + "epoch": 1.0911145186022113, + "grad_norm": 0.7412242293357849, + "learning_rate": 4.794545812690212e-05, + "loss": 2.5412, + "step": 13520 + }, + { + "epoch": 1.0911952223387944, + "grad_norm": 0.6882274150848389, + "learning_rate": 4.793197938586712e-05, + "loss": 2.473, + "step": 13521 + }, + { + "epoch": 1.0912759260753773, + "grad_norm": 0.7334007024765015, + "learning_rate": 4.791850194250335e-05, + "loss": 2.4357, + "step": 13522 + }, + { + "epoch": 1.0913566298119604, + "grad_norm": 0.6564081311225891, + "learning_rate": 4.790502579714661e-05, + "loss": 2.4425, + "step": 13523 + }, + { + "epoch": 1.0914373335485432, + "grad_norm": 0.7045762538909912, + "learning_rate": 4.78915509501328e-05, + "loss": 2.4929, + "step": 13524 + }, + { + "epoch": 1.0915180372851263, + "grad_norm": 0.7512505650520325, + "learning_rate": 4.787807740179776e-05, + "loss": 2.4187, + "step": 13525 + }, + { + "epoch": 1.0915987410217094, + "grad_norm": 0.6592997908592224, + "learning_rate": 4.786460515247732e-05, + "loss": 2.4344, + "step": 13526 + }, + { + "epoch": 1.0916794447582923, + "grad_norm": 0.6721770763397217, + "learning_rate": 4.785113420250715e-05, + "loss": 2.4415, + "step": 13527 + }, + { + "epoch": 1.0917601484948753, + "grad_norm": 0.7544431686401367, + "learning_rate": 4.783766455222305e-05, + "loss": 2.4831, + "step": 13528 + }, + { + "epoch": 1.0918408522314582, + "grad_norm": 0.7226355671882629, + "learning_rate": 4.782419620196073e-05, + "loss": 2.4807, + "step": 13529 + }, + { + "epoch": 1.0919215559680413, + "grad_norm": 0.6386340260505676, + "learning_rate": 4.78107291520558e-05, + "loss": 2.4062, + "step": 13530 + }, + { + "epoch": 1.0920022597046244, + "grad_norm": 0.6670595407485962, + "learning_rate": 4.7797263402843926e-05, + "loss": 2.4009, + "step": 13531 + }, + { + "epoch": 1.0920829634412073, + "grad_norm": 0.6600756049156189, + "learning_rate": 4.778379895466071e-05, + "loss": 2.4321, + "step": 13532 + }, + { + "epoch": 1.0921636671777903, + "grad_norm": 0.7190701961517334, + "learning_rate": 4.77703358078417e-05, + "loss": 2.4229, + "step": 13533 + }, + { + "epoch": 1.0922443709143734, + "grad_norm": 0.6554828882217407, + "learning_rate": 4.775687396272247e-05, + "loss": 2.442, + "step": 13534 + }, + { + "epoch": 1.0923250746509563, + "grad_norm": 0.6720205545425415, + "learning_rate": 4.774341341963853e-05, + "loss": 2.4994, + "step": 13535 + }, + { + "epoch": 1.0924057783875394, + "grad_norm": 0.7161003947257996, + "learning_rate": 4.7729954178925295e-05, + "loss": 2.4666, + "step": 13536 + }, + { + "epoch": 1.0924864821241222, + "grad_norm": 0.6817156672477722, + "learning_rate": 4.771649624091824e-05, + "loss": 2.4203, + "step": 13537 + }, + { + "epoch": 1.0925671858607053, + "grad_norm": 0.7167035937309265, + "learning_rate": 4.770303960595277e-05, + "loss": 2.4214, + "step": 13538 + }, + { + "epoch": 1.0926478895972884, + "grad_norm": 0.6373945474624634, + "learning_rate": 4.768958427436429e-05, + "loss": 2.485, + "step": 13539 + }, + { + "epoch": 1.0927285933338713, + "grad_norm": 0.7361387014389038, + "learning_rate": 4.767613024648808e-05, + "loss": 2.5192, + "step": 13540 + }, + { + "epoch": 1.0928092970704544, + "grad_norm": 0.7034375667572021, + "learning_rate": 4.766267752265947e-05, + "loss": 2.4324, + "step": 13541 + }, + { + "epoch": 1.0928900008070375, + "grad_norm": 0.7355689406394958, + "learning_rate": 4.7649226103213765e-05, + "loss": 2.5048, + "step": 13542 + }, + { + "epoch": 1.0929707045436203, + "grad_norm": 0.7120445966720581, + "learning_rate": 4.7635775988486176e-05, + "loss": 2.449, + "step": 13543 + }, + { + "epoch": 1.0930514082802034, + "grad_norm": 0.695888876914978, + "learning_rate": 4.7622327178811935e-05, + "loss": 2.4974, + "step": 13544 + }, + { + "epoch": 1.0931321120167863, + "grad_norm": 0.6953639984130859, + "learning_rate": 4.760887967452625e-05, + "loss": 2.3927, + "step": 13545 + }, + { + "epoch": 1.0932128157533694, + "grad_norm": 0.6457183957099915, + "learning_rate": 4.759543347596421e-05, + "loss": 2.4501, + "step": 13546 + }, + { + "epoch": 1.0932935194899525, + "grad_norm": 0.7259296774864197, + "learning_rate": 4.7581988583460946e-05, + "loss": 2.4896, + "step": 13547 + }, + { + "epoch": 1.0933742232265353, + "grad_norm": 0.6897724270820618, + "learning_rate": 4.7568544997351586e-05, + "loss": 2.4181, + "step": 13548 + }, + { + "epoch": 1.0934549269631184, + "grad_norm": 0.6723688840866089, + "learning_rate": 4.755510271797111e-05, + "loss": 2.5097, + "step": 13549 + }, + { + "epoch": 1.0935356306997015, + "grad_norm": 0.7353307604789734, + "learning_rate": 4.754166174565456e-05, + "loss": 2.4548, + "step": 13550 + }, + { + "epoch": 1.0936163344362844, + "grad_norm": 0.7334069013595581, + "learning_rate": 4.752822208073693e-05, + "loss": 2.5113, + "step": 13551 + }, + { + "epoch": 1.0936970381728675, + "grad_norm": 0.6581420302391052, + "learning_rate": 4.751478372355317e-05, + "loss": 2.4546, + "step": 13552 + }, + { + "epoch": 1.0937777419094503, + "grad_norm": 0.7890802621841431, + "learning_rate": 4.75013466744382e-05, + "loss": 2.4092, + "step": 13553 + }, + { + "epoch": 1.0938584456460334, + "grad_norm": 0.7226595282554626, + "learning_rate": 4.7487910933726895e-05, + "loss": 2.457, + "step": 13554 + }, + { + "epoch": 1.0939391493826165, + "grad_norm": 0.7108014225959778, + "learning_rate": 4.7474476501754165e-05, + "loss": 2.471, + "step": 13555 + }, + { + "epoch": 1.0940198531191994, + "grad_norm": 0.6864863038063049, + "learning_rate": 4.746104337885473e-05, + "loss": 2.4778, + "step": 13556 + }, + { + "epoch": 1.0941005568557824, + "grad_norm": 0.6890624165534973, + "learning_rate": 4.744761156536345e-05, + "loss": 2.456, + "step": 13557 + }, + { + "epoch": 1.0941812605923653, + "grad_norm": 0.7052781581878662, + "learning_rate": 4.743418106161509e-05, + "loss": 2.4796, + "step": 13558 + }, + { + "epoch": 1.0942619643289484, + "grad_norm": 0.6569164991378784, + "learning_rate": 4.742075186794431e-05, + "loss": 2.469, + "step": 13559 + }, + { + "epoch": 1.0943426680655315, + "grad_norm": 0.7302874326705933, + "learning_rate": 4.7407323984685836e-05, + "loss": 2.4543, + "step": 13560 + }, + { + "epoch": 1.0944233718021144, + "grad_norm": 0.6499345898628235, + "learning_rate": 4.7393897412174335e-05, + "loss": 2.4037, + "step": 13561 + }, + { + "epoch": 1.0945040755386974, + "grad_norm": 0.6643944382667542, + "learning_rate": 4.7380472150744416e-05, + "loss": 2.4067, + "step": 13562 + }, + { + "epoch": 1.0945847792752805, + "grad_norm": 0.7491872906684875, + "learning_rate": 4.736704820073069e-05, + "loss": 2.4277, + "step": 13563 + }, + { + "epoch": 1.0946654830118634, + "grad_norm": 0.7319512367248535, + "learning_rate": 4.735362556246773e-05, + "loss": 2.4588, + "step": 13564 + }, + { + "epoch": 1.0947461867484465, + "grad_norm": 0.7404350638389587, + "learning_rate": 4.734020423629001e-05, + "loss": 2.432, + "step": 13565 + }, + { + "epoch": 1.0948268904850296, + "grad_norm": 0.6462193727493286, + "learning_rate": 4.732678422253206e-05, + "loss": 2.4417, + "step": 13566 + }, + { + "epoch": 1.0949075942216124, + "grad_norm": 0.6711323857307434, + "learning_rate": 4.731336552152836e-05, + "loss": 2.4023, + "step": 13567 + }, + { + "epoch": 1.0949882979581955, + "grad_norm": 0.658261239528656, + "learning_rate": 4.729994813361329e-05, + "loss": 2.4132, + "step": 13568 + }, + { + "epoch": 1.0950690016947784, + "grad_norm": 0.8081904053688049, + "learning_rate": 4.728653205912127e-05, + "loss": 2.4412, + "step": 13569 + }, + { + "epoch": 1.0951497054313615, + "grad_norm": 0.6620786786079407, + "learning_rate": 4.727311729838666e-05, + "loss": 2.4357, + "step": 13570 + }, + { + "epoch": 1.0952304091679446, + "grad_norm": 0.7026848793029785, + "learning_rate": 4.725970385174381e-05, + "loss": 2.4159, + "step": 13571 + }, + { + "epoch": 1.0953111129045274, + "grad_norm": 0.7017392516136169, + "learning_rate": 4.7246291719526995e-05, + "loss": 2.4253, + "step": 13572 + }, + { + "epoch": 1.0953918166411105, + "grad_norm": 0.710172712802887, + "learning_rate": 4.7232880902070483e-05, + "loss": 2.4057, + "step": 13573 + }, + { + "epoch": 1.0954725203776934, + "grad_norm": 0.7208876013755798, + "learning_rate": 4.721947139970856e-05, + "loss": 2.4803, + "step": 13574 + }, + { + "epoch": 1.0955532241142765, + "grad_norm": 0.693219006061554, + "learning_rate": 4.720606321277534e-05, + "loss": 2.3611, + "step": 13575 + }, + { + "epoch": 1.0956339278508596, + "grad_norm": 0.737206757068634, + "learning_rate": 4.7192656341605026e-05, + "loss": 2.3873, + "step": 13576 + }, + { + "epoch": 1.0957146315874424, + "grad_norm": 0.6605268120765686, + "learning_rate": 4.717925078653179e-05, + "loss": 2.4155, + "step": 13577 + }, + { + "epoch": 1.0957953353240255, + "grad_norm": 0.7143047451972961, + "learning_rate": 4.716584654788967e-05, + "loss": 2.4526, + "step": 13578 + }, + { + "epoch": 1.0958760390606086, + "grad_norm": 0.6980953216552734, + "learning_rate": 4.715244362601277e-05, + "loss": 2.4422, + "step": 13579 + }, + { + "epoch": 1.0959567427971915, + "grad_norm": 0.6852009892463684, + "learning_rate": 4.713904202123515e-05, + "loss": 2.4599, + "step": 13580 + }, + { + "epoch": 1.0960374465337746, + "grad_norm": 0.7436656355857849, + "learning_rate": 4.712564173389074e-05, + "loss": 2.4441, + "step": 13581 + }, + { + "epoch": 1.0961181502703574, + "grad_norm": 0.7090624570846558, + "learning_rate": 4.711224276431352e-05, + "loss": 2.4741, + "step": 13582 + }, + { + "epoch": 1.0961988540069405, + "grad_norm": 0.6611043810844421, + "learning_rate": 4.709884511283753e-05, + "loss": 2.4589, + "step": 13583 + }, + { + "epoch": 1.0962795577435236, + "grad_norm": 0.6932426691055298, + "learning_rate": 4.708544877979658e-05, + "loss": 2.4199, + "step": 13584 + }, + { + "epoch": 1.0963602614801065, + "grad_norm": 0.7629422545433044, + "learning_rate": 4.707205376552456e-05, + "loss": 2.4588, + "step": 13585 + }, + { + "epoch": 1.0964409652166895, + "grad_norm": 0.8116739392280579, + "learning_rate": 4.705866007035531e-05, + "loss": 2.472, + "step": 13586 + }, + { + "epoch": 1.0965216689532726, + "grad_norm": 0.6711297631263733, + "learning_rate": 4.704526769462269e-05, + "loss": 2.4086, + "step": 13587 + }, + { + "epoch": 1.0966023726898555, + "grad_norm": 0.716015636920929, + "learning_rate": 4.703187663866037e-05, + "loss": 2.4411, + "step": 13588 + }, + { + "epoch": 1.0966830764264386, + "grad_norm": 0.6982430219650269, + "learning_rate": 4.701848690280215e-05, + "loss": 2.4438, + "step": 13589 + }, + { + "epoch": 1.0967637801630215, + "grad_norm": 0.7183159589767456, + "learning_rate": 4.7005098487381785e-05, + "loss": 2.4464, + "step": 13590 + }, + { + "epoch": 1.0968444838996045, + "grad_norm": 0.6983399391174316, + "learning_rate": 4.699171139273284e-05, + "loss": 2.4354, + "step": 13591 + }, + { + "epoch": 1.0969251876361876, + "grad_norm": 0.7157938480377197, + "learning_rate": 4.697832561918901e-05, + "loss": 2.4393, + "step": 13592 + }, + { + "epoch": 1.0970058913727705, + "grad_norm": 0.6991363763809204, + "learning_rate": 4.696494116708392e-05, + "loss": 2.4723, + "step": 13593 + }, + { + "epoch": 1.0970865951093536, + "grad_norm": 0.6722309589385986, + "learning_rate": 4.695155803675112e-05, + "loss": 2.447, + "step": 13594 + }, + { + "epoch": 1.0971672988459367, + "grad_norm": 0.6492688655853271, + "learning_rate": 4.6938176228524175e-05, + "loss": 2.4213, + "step": 13595 + }, + { + "epoch": 1.0972480025825195, + "grad_norm": 0.6941642165184021, + "learning_rate": 4.6924795742736616e-05, + "loss": 2.4714, + "step": 13596 + }, + { + "epoch": 1.0973287063191026, + "grad_norm": 0.7506042122840881, + "learning_rate": 4.691141657972185e-05, + "loss": 2.4563, + "step": 13597 + }, + { + "epoch": 1.0974094100556855, + "grad_norm": 0.7032836675643921, + "learning_rate": 4.6898038739813356e-05, + "loss": 2.4824, + "step": 13598 + }, + { + "epoch": 1.0974901137922686, + "grad_norm": 0.6908734440803528, + "learning_rate": 4.6884662223344575e-05, + "loss": 2.4486, + "step": 13599 + }, + { + "epoch": 1.0975708175288517, + "grad_norm": 0.714971661567688, + "learning_rate": 4.687128703064883e-05, + "loss": 2.4372, + "step": 13600 + }, + { + "epoch": 1.0976515212654345, + "grad_norm": 0.6989198327064514, + "learning_rate": 4.6857913162059486e-05, + "loss": 2.395, + "step": 13601 + }, + { + "epoch": 1.0977322250020176, + "grad_norm": 0.7163406014442444, + "learning_rate": 4.684454061790987e-05, + "loss": 2.4868, + "step": 13602 + }, + { + "epoch": 1.0978129287386005, + "grad_norm": 0.6600626707077026, + "learning_rate": 4.6831169398533245e-05, + "loss": 2.5134, + "step": 13603 + }, + { + "epoch": 1.0978936324751836, + "grad_norm": 0.6657080054283142, + "learning_rate": 4.681779950426286e-05, + "loss": 2.4701, + "step": 13604 + }, + { + "epoch": 1.0979743362117667, + "grad_norm": 0.665860116481781, + "learning_rate": 4.680443093543194e-05, + "loss": 2.4593, + "step": 13605 + }, + { + "epoch": 1.0980550399483495, + "grad_norm": 0.7000327110290527, + "learning_rate": 4.679106369237368e-05, + "loss": 2.4523, + "step": 13606 + }, + { + "epoch": 1.0981357436849326, + "grad_norm": 0.6969157457351685, + "learning_rate": 4.677769777542118e-05, + "loss": 2.4935, + "step": 13607 + }, + { + "epoch": 1.0982164474215157, + "grad_norm": 0.6864836812019348, + "learning_rate": 4.676433318490757e-05, + "loss": 2.457, + "step": 13608 + }, + { + "epoch": 1.0982971511580986, + "grad_norm": 0.7331364750862122, + "learning_rate": 4.675096992116598e-05, + "loss": 2.4253, + "step": 13609 + }, + { + "epoch": 1.0983778548946816, + "grad_norm": 0.75, + "learning_rate": 4.673760798452936e-05, + "loss": 2.4147, + "step": 13610 + }, + { + "epoch": 1.0984585586312647, + "grad_norm": 0.6589440703392029, + "learning_rate": 4.6724247375330786e-05, + "loss": 2.4718, + "step": 13611 + }, + { + "epoch": 1.0985392623678476, + "grad_norm": 0.7032667994499207, + "learning_rate": 4.671088809390324e-05, + "loss": 2.4724, + "step": 13612 + }, + { + "epoch": 1.0986199661044307, + "grad_norm": 0.7544135451316833, + "learning_rate": 4.6697530140579646e-05, + "loss": 2.4804, + "step": 13613 + }, + { + "epoch": 1.0987006698410136, + "grad_norm": 0.6503081917762756, + "learning_rate": 4.668417351569295e-05, + "loss": 2.3829, + "step": 13614 + }, + { + "epoch": 1.0987813735775966, + "grad_norm": 0.6928786039352417, + "learning_rate": 4.667081821957605e-05, + "loss": 2.5678, + "step": 13615 + }, + { + "epoch": 1.0988620773141797, + "grad_norm": 0.6652864217758179, + "learning_rate": 4.665746425256173e-05, + "loss": 2.4585, + "step": 13616 + }, + { + "epoch": 1.0989427810507626, + "grad_norm": 0.700265109539032, + "learning_rate": 4.664411161498283e-05, + "loss": 2.4785, + "step": 13617 + }, + { + "epoch": 1.0990234847873457, + "grad_norm": 0.7443608045578003, + "learning_rate": 4.663076030717216e-05, + "loss": 2.4869, + "step": 13618 + }, + { + "epoch": 1.0991041885239285, + "grad_norm": 0.7037705779075623, + "learning_rate": 4.6617410329462477e-05, + "loss": 2.4518, + "step": 13619 + }, + { + "epoch": 1.0991848922605116, + "grad_norm": 0.7528365850448608, + "learning_rate": 4.660406168218643e-05, + "loss": 2.4616, + "step": 13620 + }, + { + "epoch": 1.0992655959970947, + "grad_norm": 0.7149221301078796, + "learning_rate": 4.659071436567676e-05, + "loss": 2.4661, + "step": 13621 + }, + { + "epoch": 1.0993462997336776, + "grad_norm": 0.7212862968444824, + "learning_rate": 4.657736838026608e-05, + "loss": 2.4424, + "step": 13622 + }, + { + "epoch": 1.0994270034702607, + "grad_norm": 0.6934216022491455, + "learning_rate": 4.6564023726287045e-05, + "loss": 2.4633, + "step": 13623 + }, + { + "epoch": 1.0995077072068438, + "grad_norm": 0.7244036793708801, + "learning_rate": 4.655068040407221e-05, + "loss": 2.409, + "step": 13624 + }, + { + "epoch": 1.0995884109434266, + "grad_norm": 0.6911318898200989, + "learning_rate": 4.653733841395419e-05, + "loss": 2.5117, + "step": 13625 + }, + { + "epoch": 1.0996691146800097, + "grad_norm": 0.7579816579818726, + "learning_rate": 4.65239977562654e-05, + "loss": 2.4927, + "step": 13626 + }, + { + "epoch": 1.0997498184165928, + "grad_norm": 0.7699651122093201, + "learning_rate": 4.651065843133837e-05, + "loss": 2.4083, + "step": 13627 + }, + { + "epoch": 1.0998305221531757, + "grad_norm": 0.6669431328773499, + "learning_rate": 4.649732043950561e-05, + "loss": 2.4402, + "step": 13628 + }, + { + "epoch": 1.0999112258897588, + "grad_norm": 0.7134940028190613, + "learning_rate": 4.6483983781099426e-05, + "loss": 2.4275, + "step": 13629 + }, + { + "epoch": 1.0999919296263416, + "grad_norm": 0.7107651233673096, + "learning_rate": 4.647064845645227e-05, + "loss": 2.4654, + "step": 13630 + }, + { + "epoch": 1.1000726333629247, + "grad_norm": 0.7101391553878784, + "learning_rate": 4.645731446589652e-05, + "loss": 2.4357, + "step": 13631 + }, + { + "epoch": 1.1001533370995078, + "grad_norm": 0.7511606216430664, + "learning_rate": 4.6443981809764405e-05, + "loss": 2.5016, + "step": 13632 + }, + { + "epoch": 1.1002340408360907, + "grad_norm": 0.7315953373908997, + "learning_rate": 4.6430650488388226e-05, + "loss": 2.4541, + "step": 13633 + }, + { + "epoch": 1.1003147445726738, + "grad_norm": 0.6701769232749939, + "learning_rate": 4.6417320502100316e-05, + "loss": 2.4071, + "step": 13634 + }, + { + "epoch": 1.1003954483092566, + "grad_norm": 0.7164294123649597, + "learning_rate": 4.6403991851232876e-05, + "loss": 2.478, + "step": 13635 + }, + { + "epoch": 1.1004761520458397, + "grad_norm": 0.7003894448280334, + "learning_rate": 4.639066453611802e-05, + "loss": 2.4686, + "step": 13636 + }, + { + "epoch": 1.1005568557824228, + "grad_norm": 0.6855250000953674, + "learning_rate": 4.6377338557087957e-05, + "loss": 2.4531, + "step": 13637 + }, + { + "epoch": 1.1006375595190057, + "grad_norm": 0.6581299901008606, + "learning_rate": 4.6364013914474816e-05, + "loss": 2.4511, + "step": 13638 + }, + { + "epoch": 1.1007182632555887, + "grad_norm": 0.7599080204963684, + "learning_rate": 4.6350690608610604e-05, + "loss": 2.5143, + "step": 13639 + }, + { + "epoch": 1.1007989669921718, + "grad_norm": 0.7029981017112732, + "learning_rate": 4.633736863982744e-05, + "loss": 2.4541, + "step": 13640 + }, + { + "epoch": 1.1008796707287547, + "grad_norm": 0.7378708720207214, + "learning_rate": 4.6324048008457357e-05, + "loss": 2.4319, + "step": 13641 + }, + { + "epoch": 1.1009603744653378, + "grad_norm": 0.7087826728820801, + "learning_rate": 4.631072871483226e-05, + "loss": 2.4148, + "step": 13642 + }, + { + "epoch": 1.1010410782019207, + "grad_norm": 0.7000819444656372, + "learning_rate": 4.629741075928415e-05, + "loss": 2.4692, + "step": 13643 + }, + { + "epoch": 1.1011217819385037, + "grad_norm": 0.7363965511322021, + "learning_rate": 4.628409414214496e-05, + "loss": 2.4584, + "step": 13644 + }, + { + "epoch": 1.1012024856750868, + "grad_norm": 0.6691753268241882, + "learning_rate": 4.627077886374656e-05, + "loss": 2.4356, + "step": 13645 + }, + { + "epoch": 1.1012831894116697, + "grad_norm": 0.6864185929298401, + "learning_rate": 4.625746492442078e-05, + "loss": 2.4713, + "step": 13646 + }, + { + "epoch": 1.1013638931482528, + "grad_norm": 0.714318573474884, + "learning_rate": 4.624415232449947e-05, + "loss": 2.4482, + "step": 13647 + }, + { + "epoch": 1.1014445968848359, + "grad_norm": 0.6383495330810547, + "learning_rate": 4.623084106431444e-05, + "loss": 2.4248, + "step": 13648 + }, + { + "epoch": 1.1015253006214187, + "grad_norm": 0.7014495730400085, + "learning_rate": 4.6217531144197365e-05, + "loss": 2.4393, + "step": 13649 + }, + { + "epoch": 1.1016060043580018, + "grad_norm": 0.8128634095191956, + "learning_rate": 4.620422256448e-05, + "loss": 2.4741, + "step": 13650 + }, + { + "epoch": 1.1016867080945847, + "grad_norm": 0.7333208322525024, + "learning_rate": 4.619091532549408e-05, + "loss": 2.4288, + "step": 13651 + }, + { + "epoch": 1.1017674118311678, + "grad_norm": 0.7023218274116516, + "learning_rate": 4.617760942757117e-05, + "loss": 2.5025, + "step": 13652 + }, + { + "epoch": 1.1018481155677509, + "grad_norm": 0.6420873403549194, + "learning_rate": 4.616430487104292e-05, + "loss": 2.4165, + "step": 13653 + }, + { + "epoch": 1.1019288193043337, + "grad_norm": 0.6767684817314148, + "learning_rate": 4.615100165624092e-05, + "loss": 2.4642, + "step": 13654 + }, + { + "epoch": 1.1020095230409168, + "grad_norm": 0.7361159920692444, + "learning_rate": 4.613769978349672e-05, + "loss": 2.5343, + "step": 13655 + }, + { + "epoch": 1.1020902267775, + "grad_norm": 0.6642624735832214, + "learning_rate": 4.6124399253141846e-05, + "loss": 2.3769, + "step": 13656 + }, + { + "epoch": 1.1021709305140828, + "grad_norm": 0.6912256479263306, + "learning_rate": 4.611110006550781e-05, + "loss": 2.455, + "step": 13657 + }, + { + "epoch": 1.1022516342506659, + "grad_norm": 0.7419310212135315, + "learning_rate": 4.609780222092599e-05, + "loss": 2.4171, + "step": 13658 + }, + { + "epoch": 1.1023323379872487, + "grad_norm": 0.718953549861908, + "learning_rate": 4.6084505719727835e-05, + "loss": 2.4791, + "step": 13659 + }, + { + "epoch": 1.1024130417238318, + "grad_norm": 0.7904248237609863, + "learning_rate": 4.607121056224477e-05, + "loss": 2.4429, + "step": 13660 + }, + { + "epoch": 1.102493745460415, + "grad_norm": 0.6743534803390503, + "learning_rate": 4.605791674880808e-05, + "loss": 2.4481, + "step": 13661 + }, + { + "epoch": 1.1025744491969978, + "grad_norm": 0.6829143166542053, + "learning_rate": 4.6044624279749106e-05, + "loss": 2.4078, + "step": 13662 + }, + { + "epoch": 1.1026551529335809, + "grad_norm": 0.6803167462348938, + "learning_rate": 4.6031333155399136e-05, + "loss": 2.4509, + "step": 13663 + }, + { + "epoch": 1.1027358566701637, + "grad_norm": 0.7474592328071594, + "learning_rate": 4.601804337608943e-05, + "loss": 2.4563, + "step": 13664 + }, + { + "epoch": 1.1028165604067468, + "grad_norm": 0.6753630042076111, + "learning_rate": 4.6004754942151174e-05, + "loss": 2.4285, + "step": 13665 + }, + { + "epoch": 1.10289726414333, + "grad_norm": 0.7990161180496216, + "learning_rate": 4.599146785391558e-05, + "loss": 2.4907, + "step": 13666 + }, + { + "epoch": 1.1029779678799128, + "grad_norm": 0.8161290287971497, + "learning_rate": 4.597818211171383e-05, + "loss": 2.4599, + "step": 13667 + }, + { + "epoch": 1.1030586716164958, + "grad_norm": 0.6813610792160034, + "learning_rate": 4.596489771587695e-05, + "loss": 2.4484, + "step": 13668 + }, + { + "epoch": 1.103139375353079, + "grad_norm": 0.6598966121673584, + "learning_rate": 4.5951614666736076e-05, + "loss": 2.4326, + "step": 13669 + }, + { + "epoch": 1.1032200790896618, + "grad_norm": 0.7084827423095703, + "learning_rate": 4.593833296462228e-05, + "loss": 2.4188, + "step": 13670 + }, + { + "epoch": 1.1033007828262449, + "grad_norm": 0.6876685619354248, + "learning_rate": 4.59250526098665e-05, + "loss": 2.4482, + "step": 13671 + }, + { + "epoch": 1.103381486562828, + "grad_norm": 0.7292699813842773, + "learning_rate": 4.591177360279978e-05, + "loss": 2.4452, + "step": 13672 + }, + { + "epoch": 1.1034621902994108, + "grad_norm": 0.7057675123214722, + "learning_rate": 4.589849594375304e-05, + "loss": 2.4336, + "step": 13673 + }, + { + "epoch": 1.103542894035994, + "grad_norm": 0.7684180736541748, + "learning_rate": 4.5885219633057196e-05, + "loss": 2.4453, + "step": 13674 + }, + { + "epoch": 1.1036235977725768, + "grad_norm": 0.7107112407684326, + "learning_rate": 4.5871944671043154e-05, + "loss": 2.4116, + "step": 13675 + }, + { + "epoch": 1.1037043015091599, + "grad_norm": 0.659501314163208, + "learning_rate": 4.585867105804177e-05, + "loss": 2.4907, + "step": 13676 + }, + { + "epoch": 1.103785005245743, + "grad_norm": 0.7553967833518982, + "learning_rate": 4.5845398794383786e-05, + "loss": 2.3982, + "step": 13677 + }, + { + "epoch": 1.1038657089823258, + "grad_norm": 0.6861104965209961, + "learning_rate": 4.583212788040003e-05, + "loss": 2.416, + "step": 13678 + }, + { + "epoch": 1.103946412718909, + "grad_norm": 0.6546811461448669, + "learning_rate": 4.5818858316421254e-05, + "loss": 2.4506, + "step": 13679 + }, + { + "epoch": 1.1040271164554918, + "grad_norm": 0.7012909650802612, + "learning_rate": 4.58055901027782e-05, + "loss": 2.439, + "step": 13680 + }, + { + "epoch": 1.1041078201920749, + "grad_norm": 0.7594780325889587, + "learning_rate": 4.5792323239801446e-05, + "loss": 2.4437, + "step": 13681 + }, + { + "epoch": 1.104188523928658, + "grad_norm": 0.6576492190361023, + "learning_rate": 4.577905772782172e-05, + "loss": 2.443, + "step": 13682 + }, + { + "epoch": 1.1042692276652408, + "grad_norm": 0.6751925349235535, + "learning_rate": 4.576579356716963e-05, + "loss": 2.507, + "step": 13683 + }, + { + "epoch": 1.104349931401824, + "grad_norm": 0.7206710577011108, + "learning_rate": 4.575253075817567e-05, + "loss": 2.4236, + "step": 13684 + }, + { + "epoch": 1.104430635138407, + "grad_norm": 0.7736170291900635, + "learning_rate": 4.5739269301170485e-05, + "loss": 2.4095, + "step": 13685 + }, + { + "epoch": 1.1045113388749899, + "grad_norm": 0.6901736855506897, + "learning_rate": 4.572600919648457e-05, + "loss": 2.4519, + "step": 13686 + }, + { + "epoch": 1.104592042611573, + "grad_norm": 0.7762539982795715, + "learning_rate": 4.571275044444836e-05, + "loss": 2.5018, + "step": 13687 + }, + { + "epoch": 1.1046727463481558, + "grad_norm": 0.7231423854827881, + "learning_rate": 4.569949304539232e-05, + "loss": 2.4553, + "step": 13688 + }, + { + "epoch": 1.104753450084739, + "grad_norm": 0.7713531255722046, + "learning_rate": 4.568623699964688e-05, + "loss": 2.49, + "step": 13689 + }, + { + "epoch": 1.104834153821322, + "grad_norm": 0.7355079650878906, + "learning_rate": 4.5672982307542354e-05, + "loss": 2.5191, + "step": 13690 + }, + { + "epoch": 1.1049148575579049, + "grad_norm": 0.6916452050209045, + "learning_rate": 4.565972896940913e-05, + "loss": 2.3867, + "step": 13691 + }, + { + "epoch": 1.104995561294488, + "grad_norm": 0.6622549295425415, + "learning_rate": 4.5646476985577544e-05, + "loss": 2.4364, + "step": 13692 + }, + { + "epoch": 1.105076265031071, + "grad_norm": 0.6683297157287598, + "learning_rate": 4.563322635637779e-05, + "loss": 2.43, + "step": 13693 + }, + { + "epoch": 1.105156968767654, + "grad_norm": 0.6857880353927612, + "learning_rate": 4.561997708214015e-05, + "loss": 2.4515, + "step": 13694 + }, + { + "epoch": 1.105237672504237, + "grad_norm": 0.7473817467689514, + "learning_rate": 4.5606729163194807e-05, + "loss": 2.442, + "step": 13695 + }, + { + "epoch": 1.1053183762408199, + "grad_norm": 0.6988846063613892, + "learning_rate": 4.559348259987203e-05, + "loss": 2.3886, + "step": 13696 + }, + { + "epoch": 1.105399079977403, + "grad_norm": 0.6450650691986084, + "learning_rate": 4.5580237392501836e-05, + "loss": 2.4647, + "step": 13697 + }, + { + "epoch": 1.105479783713986, + "grad_norm": 0.7669623494148254, + "learning_rate": 4.556699354141439e-05, + "loss": 2.4362, + "step": 13698 + }, + { + "epoch": 1.105560487450569, + "grad_norm": 0.7019730806350708, + "learning_rate": 4.55537510469398e-05, + "loss": 2.49, + "step": 13699 + }, + { + "epoch": 1.105641191187152, + "grad_norm": 0.6736636757850647, + "learning_rate": 4.5540509909408e-05, + "loss": 2.43, + "step": 13700 + }, + { + "epoch": 1.105721894923735, + "grad_norm": 0.6872034668922424, + "learning_rate": 4.552727012914907e-05, + "loss": 2.4507, + "step": 13701 + }, + { + "epoch": 1.105802598660318, + "grad_norm": 0.6726621985435486, + "learning_rate": 4.5514031706492986e-05, + "loss": 2.4193, + "step": 13702 + }, + { + "epoch": 1.105883302396901, + "grad_norm": 0.7345453500747681, + "learning_rate": 4.550079464176963e-05, + "loss": 2.4257, + "step": 13703 + }, + { + "epoch": 1.105964006133484, + "grad_norm": 0.6764804124832153, + "learning_rate": 4.548755893530894e-05, + "loss": 2.4656, + "step": 13704 + }, + { + "epoch": 1.106044709870067, + "grad_norm": 0.6915058493614197, + "learning_rate": 4.5474324587440766e-05, + "loss": 2.4148, + "step": 13705 + }, + { + "epoch": 1.10612541360665, + "grad_norm": 0.7960236668586731, + "learning_rate": 4.5461091598494954e-05, + "loss": 2.4148, + "step": 13706 + }, + { + "epoch": 1.106206117343233, + "grad_norm": 0.7058970928192139, + "learning_rate": 4.544785996880131e-05, + "loss": 2.4795, + "step": 13707 + }, + { + "epoch": 1.106286821079816, + "grad_norm": 0.6979549527168274, + "learning_rate": 4.5434629698689634e-05, + "loss": 2.4329, + "step": 13708 + }, + { + "epoch": 1.1063675248163989, + "grad_norm": 0.6805241107940674, + "learning_rate": 4.5421400788489586e-05, + "loss": 2.4303, + "step": 13709 + }, + { + "epoch": 1.106448228552982, + "grad_norm": 0.7566354274749756, + "learning_rate": 4.5408173238530905e-05, + "loss": 2.4769, + "step": 13710 + }, + { + "epoch": 1.106528932289565, + "grad_norm": 0.647773802280426, + "learning_rate": 4.539494704914324e-05, + "loss": 2.4037, + "step": 13711 + }, + { + "epoch": 1.106609636026148, + "grad_norm": 0.7248135209083557, + "learning_rate": 4.538172222065628e-05, + "loss": 2.4366, + "step": 13712 + }, + { + "epoch": 1.106690339762731, + "grad_norm": 0.6861057281494141, + "learning_rate": 4.536849875339953e-05, + "loss": 2.456, + "step": 13713 + }, + { + "epoch": 1.106771043499314, + "grad_norm": 0.7386166453361511, + "learning_rate": 4.5355276647702605e-05, + "loss": 2.4806, + "step": 13714 + }, + { + "epoch": 1.106851747235897, + "grad_norm": 0.664402961730957, + "learning_rate": 4.534205590389503e-05, + "loss": 2.4846, + "step": 13715 + }, + { + "epoch": 1.10693245097248, + "grad_norm": 0.8123969435691833, + "learning_rate": 4.5328836522306296e-05, + "loss": 2.4945, + "step": 13716 + }, + { + "epoch": 1.1070131547090631, + "grad_norm": 0.7375624775886536, + "learning_rate": 4.5315618503265865e-05, + "loss": 2.4533, + "step": 13717 + }, + { + "epoch": 1.107093858445646, + "grad_norm": 0.70960932970047, + "learning_rate": 4.53024018471032e-05, + "loss": 2.4351, + "step": 13718 + }, + { + "epoch": 1.107174562182229, + "grad_norm": 0.7170885801315308, + "learning_rate": 4.5289186554147645e-05, + "loss": 2.4654, + "step": 13719 + }, + { + "epoch": 1.107255265918812, + "grad_norm": 0.6986895203590393, + "learning_rate": 4.5275972624728556e-05, + "loss": 2.4079, + "step": 13720 + }, + { + "epoch": 1.107335969655395, + "grad_norm": 0.6948813796043396, + "learning_rate": 4.526276005917532e-05, + "loss": 2.4981, + "step": 13721 + }, + { + "epoch": 1.1074166733919781, + "grad_norm": 0.7719457149505615, + "learning_rate": 4.524954885781717e-05, + "loss": 2.4853, + "step": 13722 + }, + { + "epoch": 1.107497377128561, + "grad_norm": 0.652686357498169, + "learning_rate": 4.5236339020983363e-05, + "loss": 2.3672, + "step": 13723 + }, + { + "epoch": 1.107578080865144, + "grad_norm": 0.7517427802085876, + "learning_rate": 4.5223130549003144e-05, + "loss": 2.3947, + "step": 13724 + }, + { + "epoch": 1.107658784601727, + "grad_norm": 0.6755498647689819, + "learning_rate": 4.5209923442205705e-05, + "loss": 2.4173, + "step": 13725 + }, + { + "epoch": 1.10773948833831, + "grad_norm": 0.6801806688308716, + "learning_rate": 4.519671770092019e-05, + "loss": 2.4366, + "step": 13726 + }, + { + "epoch": 1.1078201920748931, + "grad_norm": 0.6665045619010925, + "learning_rate": 4.5183513325475724e-05, + "loss": 2.4797, + "step": 13727 + }, + { + "epoch": 1.107900895811476, + "grad_norm": 0.7303451299667358, + "learning_rate": 4.517031031620145e-05, + "loss": 2.4487, + "step": 13728 + }, + { + "epoch": 1.107981599548059, + "grad_norm": 0.7241206765174866, + "learning_rate": 4.515710867342632e-05, + "loss": 2.4632, + "step": 13729 + }, + { + "epoch": 1.1080623032846422, + "grad_norm": 0.738835334777832, + "learning_rate": 4.514390839747941e-05, + "loss": 2.3937, + "step": 13730 + }, + { + "epoch": 1.108143007021225, + "grad_norm": 0.7062843441963196, + "learning_rate": 4.5130709488689726e-05, + "loss": 2.4576, + "step": 13731 + }, + { + "epoch": 1.1082237107578081, + "grad_norm": 0.7074100971221924, + "learning_rate": 4.511751194738616e-05, + "loss": 2.4843, + "step": 13732 + }, + { + "epoch": 1.108304414494391, + "grad_norm": 0.751742959022522, + "learning_rate": 4.510431577389765e-05, + "loss": 2.4607, + "step": 13733 + }, + { + "epoch": 1.108385118230974, + "grad_norm": 0.7370054125785828, + "learning_rate": 4.50911209685531e-05, + "loss": 2.4877, + "step": 13734 + }, + { + "epoch": 1.1084658219675572, + "grad_norm": 0.6410251259803772, + "learning_rate": 4.507792753168135e-05, + "loss": 2.4254, + "step": 13735 + }, + { + "epoch": 1.10854652570414, + "grad_norm": 0.7141317129135132, + "learning_rate": 4.506473546361121e-05, + "loss": 2.4962, + "step": 13736 + }, + { + "epoch": 1.1086272294407231, + "grad_norm": 0.6903412342071533, + "learning_rate": 4.50515447646715e-05, + "loss": 2.4315, + "step": 13737 + }, + { + "epoch": 1.1087079331773062, + "grad_norm": 0.7068564891815186, + "learning_rate": 4.50383554351909e-05, + "loss": 2.5795, + "step": 13738 + }, + { + "epoch": 1.108788636913889, + "grad_norm": 0.6880627274513245, + "learning_rate": 4.5025167475498154e-05, + "loss": 2.4399, + "step": 13739 + }, + { + "epoch": 1.1088693406504722, + "grad_norm": 0.6721192598342896, + "learning_rate": 4.5011980885921965e-05, + "loss": 2.4651, + "step": 13740 + }, + { + "epoch": 1.108950044387055, + "grad_norm": 0.7084259986877441, + "learning_rate": 4.499879566679093e-05, + "loss": 2.4121, + "step": 13741 + }, + { + "epoch": 1.109030748123638, + "grad_norm": 0.6809335947036743, + "learning_rate": 4.498561181843368e-05, + "loss": 2.4714, + "step": 13742 + }, + { + "epoch": 1.1091114518602212, + "grad_norm": 0.690416693687439, + "learning_rate": 4.497242934117879e-05, + "loss": 2.4744, + "step": 13743 + }, + { + "epoch": 1.109192155596804, + "grad_norm": 0.728522002696991, + "learning_rate": 4.495924823535483e-05, + "loss": 2.4374, + "step": 13744 + }, + { + "epoch": 1.1092728593333872, + "grad_norm": 0.7000796794891357, + "learning_rate": 4.494606850129026e-05, + "loss": 2.4635, + "step": 13745 + }, + { + "epoch": 1.1093535630699702, + "grad_norm": 0.824645459651947, + "learning_rate": 4.493289013931353e-05, + "loss": 2.3724, + "step": 13746 + }, + { + "epoch": 1.109434266806553, + "grad_norm": 0.6561198830604553, + "learning_rate": 4.491971314975321e-05, + "loss": 2.3726, + "step": 13747 + }, + { + "epoch": 1.1095149705431362, + "grad_norm": 0.7067599892616272, + "learning_rate": 4.490653753293757e-05, + "loss": 2.4285, + "step": 13748 + }, + { + "epoch": 1.109595674279719, + "grad_norm": 0.6954898834228516, + "learning_rate": 4.489336328919503e-05, + "loss": 2.4252, + "step": 13749 + }, + { + "epoch": 1.1096763780163021, + "grad_norm": 0.6683667302131653, + "learning_rate": 4.4880190418853974e-05, + "loss": 2.4815, + "step": 13750 + }, + { + "epoch": 1.1097570817528852, + "grad_norm": 0.7554971575737, + "learning_rate": 4.486701892224261e-05, + "loss": 2.5036, + "step": 13751 + }, + { + "epoch": 1.109837785489468, + "grad_norm": 0.7043242454528809, + "learning_rate": 4.485384879968926e-05, + "loss": 2.3757, + "step": 13752 + }, + { + "epoch": 1.1099184892260512, + "grad_norm": 0.8016893863677979, + "learning_rate": 4.4840680051522186e-05, + "loss": 2.4655, + "step": 13753 + }, + { + "epoch": 1.1099991929626343, + "grad_norm": 0.7022131085395813, + "learning_rate": 4.4827512678069515e-05, + "loss": 2.475, + "step": 13754 + }, + { + "epoch": 1.1100798966992171, + "grad_norm": 0.6963247656822205, + "learning_rate": 4.4814346679659455e-05, + "loss": 2.4866, + "step": 13755 + }, + { + "epoch": 1.1101606004358002, + "grad_norm": 0.6980907917022705, + "learning_rate": 4.4801182056620125e-05, + "loss": 2.4322, + "step": 13756 + }, + { + "epoch": 1.110241304172383, + "grad_norm": 0.68063884973526, + "learning_rate": 4.478801880927964e-05, + "loss": 2.426, + "step": 13757 + }, + { + "epoch": 1.1103220079089662, + "grad_norm": 0.7454195618629456, + "learning_rate": 4.477485693796605e-05, + "loss": 2.5042, + "step": 13758 + }, + { + "epoch": 1.1104027116455493, + "grad_norm": 0.685975193977356, + "learning_rate": 4.476169644300737e-05, + "loss": 2.4874, + "step": 13759 + }, + { + "epoch": 1.1104834153821321, + "grad_norm": 0.7060961723327637, + "learning_rate": 4.4748537324731664e-05, + "loss": 2.4126, + "step": 13760 + }, + { + "epoch": 1.1105641191187152, + "grad_norm": 0.6794416904449463, + "learning_rate": 4.4735379583466795e-05, + "loss": 2.4112, + "step": 13761 + }, + { + "epoch": 1.1106448228552983, + "grad_norm": 0.6854961514472961, + "learning_rate": 4.472222321954073e-05, + "loss": 2.4909, + "step": 13762 + }, + { + "epoch": 1.1107255265918812, + "grad_norm": 0.7660776972770691, + "learning_rate": 4.470906823328139e-05, + "loss": 2.5021, + "step": 13763 + }, + { + "epoch": 1.1108062303284643, + "grad_norm": 0.7027743458747864, + "learning_rate": 4.4695914625016564e-05, + "loss": 2.4375, + "step": 13764 + }, + { + "epoch": 1.1108869340650471, + "grad_norm": 0.6896719336509705, + "learning_rate": 4.468276239507413e-05, + "loss": 2.4574, + "step": 13765 + }, + { + "epoch": 1.1109676378016302, + "grad_norm": 0.685141384601593, + "learning_rate": 4.4669611543781844e-05, + "loss": 2.4311, + "step": 13766 + }, + { + "epoch": 1.1110483415382133, + "grad_norm": 0.7108263373374939, + "learning_rate": 4.465646207146746e-05, + "loss": 2.4565, + "step": 13767 + }, + { + "epoch": 1.1111290452747962, + "grad_norm": 0.63578861951828, + "learning_rate": 4.464331397845873e-05, + "loss": 2.449, + "step": 13768 + }, + { + "epoch": 1.1112097490113793, + "grad_norm": 0.6917306780815125, + "learning_rate": 4.463016726508335e-05, + "loss": 2.4681, + "step": 13769 + }, + { + "epoch": 1.1112904527479621, + "grad_norm": 0.7328054308891296, + "learning_rate": 4.4617021931668914e-05, + "loss": 2.404, + "step": 13770 + }, + { + "epoch": 1.1113711564845452, + "grad_norm": 0.6501660943031311, + "learning_rate": 4.460387797854305e-05, + "loss": 2.4228, + "step": 13771 + }, + { + "epoch": 1.1114518602211283, + "grad_norm": 0.6656771302223206, + "learning_rate": 4.459073540603336e-05, + "loss": 2.4814, + "step": 13772 + }, + { + "epoch": 1.1115325639577112, + "grad_norm": 0.671017587184906, + "learning_rate": 4.457759421446742e-05, + "loss": 2.4605, + "step": 13773 + }, + { + "epoch": 1.1116132676942942, + "grad_norm": 0.6715343594551086, + "learning_rate": 4.456445440417267e-05, + "loss": 2.424, + "step": 13774 + }, + { + "epoch": 1.1116939714308773, + "grad_norm": 0.7051515579223633, + "learning_rate": 4.4551315975476626e-05, + "loss": 2.4358, + "step": 13775 + }, + { + "epoch": 1.1117746751674602, + "grad_norm": 0.7810437679290771, + "learning_rate": 4.453817892870673e-05, + "loss": 2.4718, + "step": 13776 + }, + { + "epoch": 1.1118553789040433, + "grad_norm": 0.7072561383247375, + "learning_rate": 4.4525043264190405e-05, + "loss": 2.4429, + "step": 13777 + }, + { + "epoch": 1.1119360826406264, + "grad_norm": 0.7949702143669128, + "learning_rate": 4.4511908982255e-05, + "loss": 2.4413, + "step": 13778 + }, + { + "epoch": 1.1120167863772092, + "grad_norm": 0.6716235876083374, + "learning_rate": 4.449877608322792e-05, + "loss": 2.427, + "step": 13779 + }, + { + "epoch": 1.1120974901137923, + "grad_norm": 0.7332563996315002, + "learning_rate": 4.448564456743638e-05, + "loss": 2.4567, + "step": 13780 + }, + { + "epoch": 1.1121781938503752, + "grad_norm": 0.7264607548713684, + "learning_rate": 4.447251443520769e-05, + "loss": 2.4844, + "step": 13781 + }, + { + "epoch": 1.1122588975869583, + "grad_norm": 0.7819967865943909, + "learning_rate": 4.4459385686869136e-05, + "loss": 2.5129, + "step": 13782 + }, + { + "epoch": 1.1123396013235414, + "grad_norm": 0.7587651610374451, + "learning_rate": 4.4446258322747824e-05, + "loss": 2.4714, + "step": 13783 + }, + { + "epoch": 1.1124203050601242, + "grad_norm": 0.6392871141433716, + "learning_rate": 4.443313234317099e-05, + "loss": 2.462, + "step": 13784 + }, + { + "epoch": 1.1125010087967073, + "grad_norm": 0.6609585881233215, + "learning_rate": 4.442000774846574e-05, + "loss": 2.4566, + "step": 13785 + }, + { + "epoch": 1.1125817125332902, + "grad_norm": 0.762924075126648, + "learning_rate": 4.440688453895919e-05, + "loss": 2.4613, + "step": 13786 + }, + { + "epoch": 1.1126624162698733, + "grad_norm": 0.7096089124679565, + "learning_rate": 4.4393762714978394e-05, + "loss": 2.4195, + "step": 13787 + }, + { + "epoch": 1.1127431200064564, + "grad_norm": 0.6663284301757812, + "learning_rate": 4.438064227685039e-05, + "loss": 2.422, + "step": 13788 + }, + { + "epoch": 1.1128238237430392, + "grad_norm": 0.6653628945350647, + "learning_rate": 4.436752322490221e-05, + "loss": 2.4477, + "step": 13789 + }, + { + "epoch": 1.1129045274796223, + "grad_norm": 0.6527605056762695, + "learning_rate": 4.435440555946073e-05, + "loss": 2.3874, + "step": 13790 + }, + { + "epoch": 1.1129852312162054, + "grad_norm": 0.6801275014877319, + "learning_rate": 4.4341289280852935e-05, + "loss": 2.4474, + "step": 13791 + }, + { + "epoch": 1.1130659349527883, + "grad_norm": 0.729905366897583, + "learning_rate": 4.432817438940574e-05, + "loss": 2.4711, + "step": 13792 + }, + { + "epoch": 1.1131466386893714, + "grad_norm": 0.7074751853942871, + "learning_rate": 4.431506088544593e-05, + "loss": 2.451, + "step": 13793 + }, + { + "epoch": 1.1132273424259542, + "grad_norm": 0.7241154313087463, + "learning_rate": 4.430194876930035e-05, + "loss": 2.4883, + "step": 13794 + }, + { + "epoch": 1.1133080461625373, + "grad_norm": 0.6549142003059387, + "learning_rate": 4.428883804129586e-05, + "loss": 2.4243, + "step": 13795 + }, + { + "epoch": 1.1133887498991204, + "grad_norm": 0.7046780586242676, + "learning_rate": 4.427572870175907e-05, + "loss": 2.4143, + "step": 13796 + }, + { + "epoch": 1.1134694536357033, + "grad_norm": 0.6563952565193176, + "learning_rate": 4.426262075101682e-05, + "loss": 2.416, + "step": 13797 + }, + { + "epoch": 1.1135501573722864, + "grad_norm": 0.7002081871032715, + "learning_rate": 4.4249514189395803e-05, + "loss": 2.3673, + "step": 13798 + }, + { + "epoch": 1.1136308611088694, + "grad_norm": 0.6766571998596191, + "learning_rate": 4.423640901722259e-05, + "loss": 2.4941, + "step": 13799 + }, + { + "epoch": 1.1137115648454523, + "grad_norm": 0.7404381632804871, + "learning_rate": 4.422330523482383e-05, + "loss": 2.4794, + "step": 13800 + }, + { + "epoch": 1.1137922685820354, + "grad_norm": 0.6670998930931091, + "learning_rate": 4.421020284252614e-05, + "loss": 2.5131, + "step": 13801 + }, + { + "epoch": 1.1138729723186183, + "grad_norm": 0.803720235824585, + "learning_rate": 4.4197101840655995e-05, + "loss": 2.4751, + "step": 13802 + }, + { + "epoch": 1.1139536760552013, + "grad_norm": 0.6532074809074402, + "learning_rate": 4.4184002229539947e-05, + "loss": 2.4147, + "step": 13803 + }, + { + "epoch": 1.1140343797917844, + "grad_norm": 0.6548035144805908, + "learning_rate": 4.417090400950447e-05, + "loss": 2.4601, + "step": 13804 + }, + { + "epoch": 1.1141150835283673, + "grad_norm": 0.6971763968467712, + "learning_rate": 4.415780718087603e-05, + "loss": 2.4752, + "step": 13805 + }, + { + "epoch": 1.1141957872649504, + "grad_norm": 0.6624024510383606, + "learning_rate": 4.414471174398098e-05, + "loss": 2.4183, + "step": 13806 + }, + { + "epoch": 1.1142764910015335, + "grad_norm": 0.6571507453918457, + "learning_rate": 4.4131617699145714e-05, + "loss": 2.4747, + "step": 13807 + }, + { + "epoch": 1.1143571947381163, + "grad_norm": 0.7165808081626892, + "learning_rate": 4.411852504669658e-05, + "loss": 2.453, + "step": 13808 + }, + { + "epoch": 1.1144378984746994, + "grad_norm": 0.6708057522773743, + "learning_rate": 4.410543378695988e-05, + "loss": 2.4858, + "step": 13809 + }, + { + "epoch": 1.1145186022112823, + "grad_norm": 0.889302134513855, + "learning_rate": 4.409234392026187e-05, + "loss": 2.4333, + "step": 13810 + }, + { + "epoch": 1.1145993059478654, + "grad_norm": 0.7440677881240845, + "learning_rate": 4.407925544692884e-05, + "loss": 2.49, + "step": 13811 + }, + { + "epoch": 1.1146800096844485, + "grad_norm": 0.6688372492790222, + "learning_rate": 4.406616836728691e-05, + "loss": 2.4663, + "step": 13812 + }, + { + "epoch": 1.1147607134210313, + "grad_norm": 0.7108204364776611, + "learning_rate": 4.4053082681662264e-05, + "loss": 2.4843, + "step": 13813 + }, + { + "epoch": 1.1148414171576144, + "grad_norm": 0.7270475029945374, + "learning_rate": 4.4039998390381087e-05, + "loss": 2.4158, + "step": 13814 + }, + { + "epoch": 1.1149221208941973, + "grad_norm": 0.7243396639823914, + "learning_rate": 4.402691549376939e-05, + "loss": 2.3969, + "step": 13815 + }, + { + "epoch": 1.1150028246307804, + "grad_norm": 0.6687803268432617, + "learning_rate": 4.4013833992153285e-05, + "loss": 2.42, + "step": 13816 + }, + { + "epoch": 1.1150835283673635, + "grad_norm": 0.6892626285552979, + "learning_rate": 4.400075388585877e-05, + "loss": 2.4086, + "step": 13817 + }, + { + "epoch": 1.1151642321039463, + "grad_norm": 0.7556231021881104, + "learning_rate": 4.398767517521186e-05, + "loss": 2.4201, + "step": 13818 + }, + { + "epoch": 1.1152449358405294, + "grad_norm": 0.6872838735580444, + "learning_rate": 4.397459786053851e-05, + "loss": 2.4143, + "step": 13819 + }, + { + "epoch": 1.1153256395771125, + "grad_norm": 0.6681817770004272, + "learning_rate": 4.396152194216463e-05, + "loss": 2.4404, + "step": 13820 + }, + { + "epoch": 1.1154063433136954, + "grad_norm": 0.7107201218605042, + "learning_rate": 4.394844742041614e-05, + "loss": 2.4503, + "step": 13821 + }, + { + "epoch": 1.1154870470502785, + "grad_norm": 0.706541121006012, + "learning_rate": 4.3935374295618824e-05, + "loss": 2.5106, + "step": 13822 + }, + { + "epoch": 1.1155677507868615, + "grad_norm": 0.6659905910491943, + "learning_rate": 4.392230256809854e-05, + "loss": 2.3839, + "step": 13823 + }, + { + "epoch": 1.1156484545234444, + "grad_norm": 0.7125810980796814, + "learning_rate": 4.3909232238181095e-05, + "loss": 2.4463, + "step": 13824 + }, + { + "epoch": 1.1157291582600275, + "grad_norm": 0.6581901907920837, + "learning_rate": 4.389616330619217e-05, + "loss": 2.4004, + "step": 13825 + }, + { + "epoch": 1.1158098619966104, + "grad_norm": 0.7660872340202332, + "learning_rate": 4.388309577245752e-05, + "loss": 2.4685, + "step": 13826 + }, + { + "epoch": 1.1158905657331935, + "grad_norm": 0.699526846408844, + "learning_rate": 4.387002963730281e-05, + "loss": 2.4131, + "step": 13827 + }, + { + "epoch": 1.1159712694697765, + "grad_norm": 0.7031015753746033, + "learning_rate": 4.3856964901053685e-05, + "loss": 2.4476, + "step": 13828 + }, + { + "epoch": 1.1160519732063594, + "grad_norm": 0.6876828074455261, + "learning_rate": 4.384390156403575e-05, + "loss": 2.4402, + "step": 13829 + }, + { + "epoch": 1.1161326769429425, + "grad_norm": 0.7188935279846191, + "learning_rate": 4.3830839626574626e-05, + "loss": 2.4473, + "step": 13830 + }, + { + "epoch": 1.1162133806795254, + "grad_norm": 0.6825287938117981, + "learning_rate": 4.381777908899577e-05, + "loss": 2.4757, + "step": 13831 + }, + { + "epoch": 1.1162940844161084, + "grad_norm": 0.718267560005188, + "learning_rate": 4.380471995162472e-05, + "loss": 2.483, + "step": 13832 + }, + { + "epoch": 1.1163747881526915, + "grad_norm": 0.6526767611503601, + "learning_rate": 4.379166221478697e-05, + "loss": 2.4161, + "step": 13833 + }, + { + "epoch": 1.1164554918892744, + "grad_norm": 0.7541480660438538, + "learning_rate": 4.37786058788079e-05, + "loss": 2.4876, + "step": 13834 + }, + { + "epoch": 1.1165361956258575, + "grad_norm": 0.7144232988357544, + "learning_rate": 4.376555094401294e-05, + "loss": 2.4153, + "step": 13835 + }, + { + "epoch": 1.1166168993624406, + "grad_norm": 0.7544882297515869, + "learning_rate": 4.3752497410727445e-05, + "loss": 2.4634, + "step": 13836 + }, + { + "epoch": 1.1166976030990234, + "grad_norm": 0.7263267040252686, + "learning_rate": 4.373944527927674e-05, + "loss": 2.5189, + "step": 13837 + }, + { + "epoch": 1.1167783068356065, + "grad_norm": 0.7709252834320068, + "learning_rate": 4.3726394549986135e-05, + "loss": 2.5036, + "step": 13838 + }, + { + "epoch": 1.1168590105721894, + "grad_norm": 0.6849128007888794, + "learning_rate": 4.3713345223180866e-05, + "loss": 2.414, + "step": 13839 + }, + { + "epoch": 1.1169397143087725, + "grad_norm": 0.6807512044906616, + "learning_rate": 4.3700297299186224e-05, + "loss": 2.4924, + "step": 13840 + }, + { + "epoch": 1.1170204180453556, + "grad_norm": 0.6894977688789368, + "learning_rate": 4.3687250778327294e-05, + "loss": 2.4183, + "step": 13841 + }, + { + "epoch": 1.1171011217819384, + "grad_norm": 0.6657617092132568, + "learning_rate": 4.367420566092928e-05, + "loss": 2.448, + "step": 13842 + }, + { + "epoch": 1.1171818255185215, + "grad_norm": 0.7104446291923523, + "learning_rate": 4.366116194731733e-05, + "loss": 2.4862, + "step": 13843 + }, + { + "epoch": 1.1172625292551046, + "grad_norm": 0.7485257387161255, + "learning_rate": 4.3648119637816465e-05, + "loss": 2.4253, + "step": 13844 + }, + { + "epoch": 1.1173432329916875, + "grad_norm": 0.7079899907112122, + "learning_rate": 4.363507873275177e-05, + "loss": 2.4235, + "step": 13845 + }, + { + "epoch": 1.1174239367282706, + "grad_norm": 0.6891573667526245, + "learning_rate": 4.3622039232448274e-05, + "loss": 2.4382, + "step": 13846 + }, + { + "epoch": 1.1175046404648534, + "grad_norm": 0.6886103749275208, + "learning_rate": 4.360900113723086e-05, + "loss": 2.5115, + "step": 13847 + }, + { + "epoch": 1.1175853442014365, + "grad_norm": 0.7511457800865173, + "learning_rate": 4.35959644474246e-05, + "loss": 2.4071, + "step": 13848 + }, + { + "epoch": 1.1176660479380196, + "grad_norm": 0.6526182293891907, + "learning_rate": 4.358292916335437e-05, + "loss": 2.4242, + "step": 13849 + }, + { + "epoch": 1.1177467516746025, + "grad_norm": 0.7385138273239136, + "learning_rate": 4.356989528534499e-05, + "loss": 2.4459, + "step": 13850 + }, + { + "epoch": 1.1178274554111856, + "grad_norm": 0.6668610572814941, + "learning_rate": 4.355686281372132e-05, + "loss": 2.4188, + "step": 13851 + }, + { + "epoch": 1.1179081591477686, + "grad_norm": 0.6950691342353821, + "learning_rate": 4.354383174880818e-05, + "loss": 2.4339, + "step": 13852 + }, + { + "epoch": 1.1179888628843515, + "grad_norm": 0.7017496824264526, + "learning_rate": 4.3530802090930375e-05, + "loss": 2.4733, + "step": 13853 + }, + { + "epoch": 1.1180695666209346, + "grad_norm": 0.8118221759796143, + "learning_rate": 4.351777384041254e-05, + "loss": 2.4826, + "step": 13854 + }, + { + "epoch": 1.1181502703575175, + "grad_norm": 0.7233164310455322, + "learning_rate": 4.350474699757945e-05, + "loss": 2.4637, + "step": 13855 + }, + { + "epoch": 1.1182309740941005, + "grad_norm": 0.6354575157165527, + "learning_rate": 4.349172156275576e-05, + "loss": 2.4487, + "step": 13856 + }, + { + "epoch": 1.1183116778306836, + "grad_norm": 0.6776937246322632, + "learning_rate": 4.347869753626606e-05, + "loss": 2.4292, + "step": 13857 + }, + { + "epoch": 1.1183923815672665, + "grad_norm": 0.6656864881515503, + "learning_rate": 4.3465674918434953e-05, + "loss": 2.484, + "step": 13858 + }, + { + "epoch": 1.1184730853038496, + "grad_norm": 0.7659650444984436, + "learning_rate": 4.345265370958702e-05, + "loss": 2.4181, + "step": 13859 + }, + { + "epoch": 1.1185537890404325, + "grad_norm": 0.6546063423156738, + "learning_rate": 4.3439633910046764e-05, + "loss": 2.4657, + "step": 13860 + }, + { + "epoch": 1.1186344927770155, + "grad_norm": 0.6869762539863586, + "learning_rate": 4.342661552013869e-05, + "loss": 2.513, + "step": 13861 + }, + { + "epoch": 1.1187151965135986, + "grad_norm": 0.6633490324020386, + "learning_rate": 4.3413598540187275e-05, + "loss": 2.4716, + "step": 13862 + }, + { + "epoch": 1.1187959002501815, + "grad_norm": 0.7238267660140991, + "learning_rate": 4.340058297051687e-05, + "loss": 2.4353, + "step": 13863 + }, + { + "epoch": 1.1188766039867646, + "grad_norm": 0.67429119348526, + "learning_rate": 4.3387568811451875e-05, + "loss": 2.4808, + "step": 13864 + }, + { + "epoch": 1.1189573077233477, + "grad_norm": 0.6901153326034546, + "learning_rate": 4.33745560633167e-05, + "loss": 2.4785, + "step": 13865 + }, + { + "epoch": 1.1190380114599305, + "grad_norm": 0.7227689027786255, + "learning_rate": 4.336154472643556e-05, + "loss": 2.4414, + "step": 13866 + }, + { + "epoch": 1.1191187151965136, + "grad_norm": 0.713793933391571, + "learning_rate": 4.33485348011328e-05, + "loss": 2.5136, + "step": 13867 + }, + { + "epoch": 1.1191994189330967, + "grad_norm": 0.6495655179023743, + "learning_rate": 4.333552628773263e-05, + "loss": 2.4267, + "step": 13868 + }, + { + "epoch": 1.1192801226696796, + "grad_norm": 0.7265790104866028, + "learning_rate": 4.3322519186559274e-05, + "loss": 2.4406, + "step": 13869 + }, + { + "epoch": 1.1193608264062627, + "grad_norm": 0.6700571179389954, + "learning_rate": 4.330951349793688e-05, + "loss": 2.4457, + "step": 13870 + }, + { + "epoch": 1.1194415301428455, + "grad_norm": 0.7112334966659546, + "learning_rate": 4.3296509222189616e-05, + "loss": 2.4788, + "step": 13871 + }, + { + "epoch": 1.1195222338794286, + "grad_norm": 0.7056662440299988, + "learning_rate": 4.32835063596416e-05, + "loss": 2.5195, + "step": 13872 + }, + { + "epoch": 1.1196029376160117, + "grad_norm": 0.7198836207389832, + "learning_rate": 4.327050491061683e-05, + "loss": 2.4827, + "step": 13873 + }, + { + "epoch": 1.1196836413525946, + "grad_norm": 0.7384079694747925, + "learning_rate": 4.325750487543936e-05, + "loss": 2.4556, + "step": 13874 + }, + { + "epoch": 1.1197643450891777, + "grad_norm": 0.7315430641174316, + "learning_rate": 4.324450625443324e-05, + "loss": 2.4302, + "step": 13875 + }, + { + "epoch": 1.1198450488257605, + "grad_norm": 0.6692587733268738, + "learning_rate": 4.323150904792234e-05, + "loss": 2.5283, + "step": 13876 + }, + { + "epoch": 1.1199257525623436, + "grad_norm": 0.7407168745994568, + "learning_rate": 4.321851325623063e-05, + "loss": 2.4757, + "step": 13877 + }, + { + "epoch": 1.1200064562989267, + "grad_norm": 0.7387246489524841, + "learning_rate": 4.3205518879682e-05, + "loss": 2.5025, + "step": 13878 + }, + { + "epoch": 1.1200871600355096, + "grad_norm": 0.8058405518531799, + "learning_rate": 4.319252591860031e-05, + "loss": 2.4951, + "step": 13879 + }, + { + "epoch": 1.1201678637720927, + "grad_norm": 0.6964818835258484, + "learning_rate": 4.317953437330936e-05, + "loss": 2.4462, + "step": 13880 + }, + { + "epoch": 1.1202485675086757, + "grad_norm": 0.6904557347297668, + "learning_rate": 4.316654424413294e-05, + "loss": 2.3981, + "step": 13881 + }, + { + "epoch": 1.1203292712452586, + "grad_norm": 0.6555196046829224, + "learning_rate": 4.315355553139485e-05, + "loss": 2.418, + "step": 13882 + }, + { + "epoch": 1.1204099749818417, + "grad_norm": 0.7745094299316406, + "learning_rate": 4.3140568235418724e-05, + "loss": 2.4635, + "step": 13883 + }, + { + "epoch": 1.1204906787184246, + "grad_norm": 0.686676025390625, + "learning_rate": 4.312758235652825e-05, + "loss": 2.4847, + "step": 13884 + }, + { + "epoch": 1.1205713824550076, + "grad_norm": 0.6937002539634705, + "learning_rate": 4.311459789504714e-05, + "loss": 2.4632, + "step": 13885 + }, + { + "epoch": 1.1206520861915907, + "grad_norm": 0.7024590373039246, + "learning_rate": 4.310161485129891e-05, + "loss": 2.4268, + "step": 13886 + }, + { + "epoch": 1.1207327899281736, + "grad_norm": 0.6848484873771667, + "learning_rate": 4.308863322560717e-05, + "loss": 2.4895, + "step": 13887 + }, + { + "epoch": 1.1208134936647567, + "grad_norm": 0.7071602940559387, + "learning_rate": 4.307565301829546e-05, + "loss": 2.4348, + "step": 13888 + }, + { + "epoch": 1.1208941974013398, + "grad_norm": 0.6868199706077576, + "learning_rate": 4.3062674229687274e-05, + "loss": 2.4613, + "step": 13889 + }, + { + "epoch": 1.1209749011379226, + "grad_norm": 0.7283496260643005, + "learning_rate": 4.304969686010608e-05, + "loss": 2.478, + "step": 13890 + }, + { + "epoch": 1.1210556048745057, + "grad_norm": 0.6907255053520203, + "learning_rate": 4.303672090987535e-05, + "loss": 2.4431, + "step": 13891 + }, + { + "epoch": 1.1211363086110886, + "grad_norm": 0.675089418888092, + "learning_rate": 4.302374637931841e-05, + "loss": 2.4398, + "step": 13892 + }, + { + "epoch": 1.1212170123476717, + "grad_norm": 0.6929863095283508, + "learning_rate": 4.301077326875863e-05, + "loss": 2.3909, + "step": 13893 + }, + { + "epoch": 1.1212977160842548, + "grad_norm": 0.6746132969856262, + "learning_rate": 4.29978015785194e-05, + "loss": 2.4726, + "step": 13894 + }, + { + "epoch": 1.1213784198208376, + "grad_norm": 0.720781147480011, + "learning_rate": 4.298483130892392e-05, + "loss": 2.4445, + "step": 13895 + }, + { + "epoch": 1.1214591235574207, + "grad_norm": 0.6624416708946228, + "learning_rate": 4.297186246029549e-05, + "loss": 2.3868, + "step": 13896 + }, + { + "epoch": 1.1215398272940038, + "grad_norm": 0.7849127054214478, + "learning_rate": 4.295889503295731e-05, + "loss": 2.4479, + "step": 13897 + }, + { + "epoch": 1.1216205310305867, + "grad_norm": 0.6655337810516357, + "learning_rate": 4.294592902723259e-05, + "loss": 2.5093, + "step": 13898 + }, + { + "epoch": 1.1217012347671698, + "grad_norm": 0.7055402398109436, + "learning_rate": 4.293296444344445e-05, + "loss": 2.4385, + "step": 13899 + }, + { + "epoch": 1.1217819385037526, + "grad_norm": 0.7388767600059509, + "learning_rate": 4.2920001281916e-05, + "loss": 2.4863, + "step": 13900 + }, + { + "epoch": 1.1218626422403357, + "grad_norm": 0.6915223002433777, + "learning_rate": 4.2907039542970373e-05, + "loss": 2.4218, + "step": 13901 + }, + { + "epoch": 1.1219433459769188, + "grad_norm": 0.7124893665313721, + "learning_rate": 4.289407922693053e-05, + "loss": 2.4514, + "step": 13902 + }, + { + "epoch": 1.1220240497135017, + "grad_norm": 0.6552406549453735, + "learning_rate": 4.28811203341195e-05, + "loss": 2.4558, + "step": 13903 + }, + { + "epoch": 1.1221047534500848, + "grad_norm": 0.6641791462898254, + "learning_rate": 4.286816286486031e-05, + "loss": 2.4277, + "step": 13904 + }, + { + "epoch": 1.1221854571866678, + "grad_norm": 0.677733838558197, + "learning_rate": 4.285520681947579e-05, + "loss": 2.4861, + "step": 13905 + }, + { + "epoch": 1.1222661609232507, + "grad_norm": 0.6572888493537903, + "learning_rate": 4.284225219828891e-05, + "loss": 2.4657, + "step": 13906 + }, + { + "epoch": 1.1223468646598338, + "grad_norm": 0.6923860907554626, + "learning_rate": 4.2829299001622546e-05, + "loss": 2.4857, + "step": 13907 + }, + { + "epoch": 1.1224275683964167, + "grad_norm": 0.6971977949142456, + "learning_rate": 4.281634722979947e-05, + "loss": 2.4434, + "step": 13908 + }, + { + "epoch": 1.1225082721329998, + "grad_norm": 0.6828060746192932, + "learning_rate": 4.2803396883142456e-05, + "loss": 2.4342, + "step": 13909 + }, + { + "epoch": 1.1225889758695828, + "grad_norm": 0.7001270651817322, + "learning_rate": 4.279044796197438e-05, + "loss": 2.5222, + "step": 13910 + }, + { + "epoch": 1.1226696796061657, + "grad_norm": 0.6425578594207764, + "learning_rate": 4.277750046661785e-05, + "loss": 2.42, + "step": 13911 + }, + { + "epoch": 1.1227503833427488, + "grad_norm": 0.6498209834098816, + "learning_rate": 4.2764554397395585e-05, + "loss": 2.4448, + "step": 13912 + }, + { + "epoch": 1.1228310870793319, + "grad_norm": 0.6894031763076782, + "learning_rate": 4.275160975463025e-05, + "loss": 2.4508, + "step": 13913 + }, + { + "epoch": 1.1229117908159147, + "grad_norm": 0.7286608219146729, + "learning_rate": 4.273866653864448e-05, + "loss": 2.4557, + "step": 13914 + }, + { + "epoch": 1.1229924945524978, + "grad_norm": 0.753826379776001, + "learning_rate": 4.272572474976079e-05, + "loss": 2.4635, + "step": 13915 + }, + { + "epoch": 1.1230731982890807, + "grad_norm": 0.6715937256813049, + "learning_rate": 4.271278438830174e-05, + "loss": 2.5107, + "step": 13916 + }, + { + "epoch": 1.1231539020256638, + "grad_norm": 0.6833200454711914, + "learning_rate": 4.26998454545899e-05, + "loss": 2.4883, + "step": 13917 + }, + { + "epoch": 1.1232346057622469, + "grad_norm": 0.6763597130775452, + "learning_rate": 4.2686907948947666e-05, + "loss": 2.4178, + "step": 13918 + }, + { + "epoch": 1.1233153094988297, + "grad_norm": 0.7336227297782898, + "learning_rate": 4.26739718716975e-05, + "loss": 2.4542, + "step": 13919 + }, + { + "epoch": 1.1233960132354128, + "grad_norm": 0.6583260297775269, + "learning_rate": 4.2661037223161806e-05, + "loss": 2.3998, + "step": 13920 + }, + { + "epoch": 1.1234767169719957, + "grad_norm": 0.6444356441497803, + "learning_rate": 4.264810400366295e-05, + "loss": 2.4354, + "step": 13921 + }, + { + "epoch": 1.1235574207085788, + "grad_norm": 0.6786002516746521, + "learning_rate": 4.2635172213523255e-05, + "loss": 2.3989, + "step": 13922 + }, + { + "epoch": 1.1236381244451619, + "grad_norm": 0.6838372349739075, + "learning_rate": 4.262224185306507e-05, + "loss": 2.4431, + "step": 13923 + }, + { + "epoch": 1.1237188281817447, + "grad_norm": 0.7516793012619019, + "learning_rate": 4.260931292261056e-05, + "loss": 2.4373, + "step": 13924 + }, + { + "epoch": 1.1237995319183278, + "grad_norm": 0.6860260367393494, + "learning_rate": 4.2596385422481985e-05, + "loss": 2.4457, + "step": 13925 + }, + { + "epoch": 1.123880235654911, + "grad_norm": 0.6556448936462402, + "learning_rate": 4.2583459353001595e-05, + "loss": 2.4165, + "step": 13926 + }, + { + "epoch": 1.1239609393914938, + "grad_norm": 0.729131281375885, + "learning_rate": 4.257053471449144e-05, + "loss": 2.4124, + "step": 13927 + }, + { + "epoch": 1.1240416431280769, + "grad_norm": 0.6941910982131958, + "learning_rate": 4.2557611507273684e-05, + "loss": 2.4095, + "step": 13928 + }, + { + "epoch": 1.12412234686466, + "grad_norm": 0.6390536427497864, + "learning_rate": 4.25446897316704e-05, + "loss": 2.4221, + "step": 13929 + }, + { + "epoch": 1.1242030506012428, + "grad_norm": 0.7034881114959717, + "learning_rate": 4.253176938800365e-05, + "loss": 2.4685, + "step": 13930 + }, + { + "epoch": 1.124283754337826, + "grad_norm": 0.6975526809692383, + "learning_rate": 4.251885047659542e-05, + "loss": 2.4771, + "step": 13931 + }, + { + "epoch": 1.1243644580744088, + "grad_norm": 0.7020023465156555, + "learning_rate": 4.2505932997767695e-05, + "loss": 2.4746, + "step": 13932 + }, + { + "epoch": 1.1244451618109919, + "grad_norm": 0.7207093238830566, + "learning_rate": 4.2493016951842444e-05, + "loss": 2.4707, + "step": 13933 + }, + { + "epoch": 1.124525865547575, + "grad_norm": 0.7711251974105835, + "learning_rate": 4.24801023391415e-05, + "loss": 2.5104, + "step": 13934 + }, + { + "epoch": 1.1246065692841578, + "grad_norm": 0.7324040532112122, + "learning_rate": 4.246718915998677e-05, + "loss": 2.4257, + "step": 13935 + }, + { + "epoch": 1.124687273020741, + "grad_norm": 0.6532757878303528, + "learning_rate": 4.2454277414700116e-05, + "loss": 2.3708, + "step": 13936 + }, + { + "epoch": 1.1247679767573238, + "grad_norm": 0.6933012008666992, + "learning_rate": 4.244136710360325e-05, + "loss": 2.4985, + "step": 13937 + }, + { + "epoch": 1.1248486804939068, + "grad_norm": 0.6787589192390442, + "learning_rate": 4.242845822701798e-05, + "loss": 2.402, + "step": 13938 + }, + { + "epoch": 1.12492938423049, + "grad_norm": 0.6567786931991577, + "learning_rate": 4.241555078526602e-05, + "loss": 2.4295, + "step": 13939 + }, + { + "epoch": 1.1250100879670728, + "grad_norm": 0.6962547302246094, + "learning_rate": 4.2402644778669074e-05, + "loss": 2.4006, + "step": 13940 + }, + { + "epoch": 1.125090791703656, + "grad_norm": 0.7152721285820007, + "learning_rate": 4.238974020754877e-05, + "loss": 2.4757, + "step": 13941 + }, + { + "epoch": 1.125171495440239, + "grad_norm": 0.6869861483573914, + "learning_rate": 4.237683707222677e-05, + "loss": 2.3877, + "step": 13942 + }, + { + "epoch": 1.1252521991768218, + "grad_norm": 0.6951470971107483, + "learning_rate": 4.236393537302459e-05, + "loss": 2.3755, + "step": 13943 + }, + { + "epoch": 1.125332902913405, + "grad_norm": 0.6997567415237427, + "learning_rate": 4.2351035110263805e-05, + "loss": 2.4731, + "step": 13944 + }, + { + "epoch": 1.125413606649988, + "grad_norm": 0.6765854358673096, + "learning_rate": 4.23381362842659e-05, + "loss": 2.4004, + "step": 13945 + }, + { + "epoch": 1.1254943103865709, + "grad_norm": 0.7046722173690796, + "learning_rate": 4.2325238895352426e-05, + "loss": 2.4379, + "step": 13946 + }, + { + "epoch": 1.125575014123154, + "grad_norm": 0.6862985491752625, + "learning_rate": 4.231234294384472e-05, + "loss": 2.4614, + "step": 13947 + }, + { + "epoch": 1.1256557178597368, + "grad_norm": 0.6637778282165527, + "learning_rate": 4.229944843006422e-05, + "loss": 2.4412, + "step": 13948 + }, + { + "epoch": 1.12573642159632, + "grad_norm": 0.7042228579521179, + "learning_rate": 4.228655535433231e-05, + "loss": 2.4296, + "step": 13949 + }, + { + "epoch": 1.1258171253329028, + "grad_norm": 0.6767764687538147, + "learning_rate": 4.227366371697029e-05, + "loss": 2.409, + "step": 13950 + }, + { + "epoch": 1.1258978290694859, + "grad_norm": 0.6886798143386841, + "learning_rate": 4.226077351829948e-05, + "loss": 2.4786, + "step": 13951 + }, + { + "epoch": 1.125978532806069, + "grad_norm": 0.7723653316497803, + "learning_rate": 4.224788475864115e-05, + "loss": 2.4111, + "step": 13952 + }, + { + "epoch": 1.1260592365426518, + "grad_norm": 0.7614055275917053, + "learning_rate": 4.2234997438316473e-05, + "loss": 2.5055, + "step": 13953 + }, + { + "epoch": 1.126139940279235, + "grad_norm": 0.7195241451263428, + "learning_rate": 4.222211155764665e-05, + "loss": 2.411, + "step": 13954 + }, + { + "epoch": 1.126220644015818, + "grad_norm": 0.7130021452903748, + "learning_rate": 4.220922711695288e-05, + "loss": 2.4819, + "step": 13955 + }, + { + "epoch": 1.1263013477524009, + "grad_norm": 0.6972241401672363, + "learning_rate": 4.2196344116556194e-05, + "loss": 2.4611, + "step": 13956 + }, + { + "epoch": 1.126382051488984, + "grad_norm": 0.7023231387138367, + "learning_rate": 4.218346255677772e-05, + "loss": 2.4509, + "step": 13957 + }, + { + "epoch": 1.126462755225567, + "grad_norm": 0.6959301829338074, + "learning_rate": 4.2170582437938534e-05, + "loss": 2.4441, + "step": 13958 + }, + { + "epoch": 1.12654345896215, + "grad_norm": 0.7423149347305298, + "learning_rate": 4.2157703760359555e-05, + "loss": 2.4452, + "step": 13959 + }, + { + "epoch": 1.126624162698733, + "grad_norm": 0.6587820053100586, + "learning_rate": 4.214482652436177e-05, + "loss": 2.3936, + "step": 13960 + }, + { + "epoch": 1.1267048664353159, + "grad_norm": 0.6601768136024475, + "learning_rate": 4.213195073026618e-05, + "loss": 2.453, + "step": 13961 + }, + { + "epoch": 1.126785570171899, + "grad_norm": 0.6986891031265259, + "learning_rate": 4.2119076378393676e-05, + "loss": 2.452, + "step": 13962 + }, + { + "epoch": 1.126866273908482, + "grad_norm": 0.7207025289535522, + "learning_rate": 4.2106203469065055e-05, + "loss": 2.4048, + "step": 13963 + }, + { + "epoch": 1.126946977645065, + "grad_norm": 0.6731177568435669, + "learning_rate": 4.2093332002601184e-05, + "loss": 2.4573, + "step": 13964 + }, + { + "epoch": 1.127027681381648, + "grad_norm": 0.7330070734024048, + "learning_rate": 4.208046197932288e-05, + "loss": 2.4274, + "step": 13965 + }, + { + "epoch": 1.1271083851182309, + "grad_norm": 0.7008770704269409, + "learning_rate": 4.206759339955084e-05, + "loss": 2.4933, + "step": 13966 + }, + { + "epoch": 1.127189088854814, + "grad_norm": 0.8309584259986877, + "learning_rate": 4.20547262636058e-05, + "loss": 2.3857, + "step": 13967 + }, + { + "epoch": 1.127269792591397, + "grad_norm": 0.6705843210220337, + "learning_rate": 4.204186057180849e-05, + "loss": 2.4303, + "step": 13968 + }, + { + "epoch": 1.12735049632798, + "grad_norm": 0.7526851296424866, + "learning_rate": 4.202899632447949e-05, + "loss": 2.455, + "step": 13969 + }, + { + "epoch": 1.127431200064563, + "grad_norm": 0.6690995097160339, + "learning_rate": 4.201613352193943e-05, + "loss": 2.4398, + "step": 13970 + }, + { + "epoch": 1.127511903801146, + "grad_norm": 0.6946840286254883, + "learning_rate": 4.20032721645089e-05, + "loss": 2.4032, + "step": 13971 + }, + { + "epoch": 1.127592607537729, + "grad_norm": 0.7438863515853882, + "learning_rate": 4.1990412252508426e-05, + "loss": 2.4644, + "step": 13972 + }, + { + "epoch": 1.127673311274312, + "grad_norm": 0.6975359916687012, + "learning_rate": 4.197755378625852e-05, + "loss": 2.3991, + "step": 13973 + }, + { + "epoch": 1.1277540150108951, + "grad_norm": 0.6799279451370239, + "learning_rate": 4.196469676607968e-05, + "loss": 2.4328, + "step": 13974 + }, + { + "epoch": 1.127834718747478, + "grad_norm": 0.7014481425285339, + "learning_rate": 4.1951841192292274e-05, + "loss": 2.5045, + "step": 13975 + }, + { + "epoch": 1.127915422484061, + "grad_norm": 0.7074011564254761, + "learning_rate": 4.1938987065216716e-05, + "loss": 2.4583, + "step": 13976 + }, + { + "epoch": 1.127996126220644, + "grad_norm": 0.7246339917182922, + "learning_rate": 4.192613438517338e-05, + "loss": 2.447, + "step": 13977 + }, + { + "epoch": 1.128076829957227, + "grad_norm": 0.6757462620735168, + "learning_rate": 4.191328315248262e-05, + "loss": 2.4181, + "step": 13978 + }, + { + "epoch": 1.12815753369381, + "grad_norm": 0.6758493185043335, + "learning_rate": 4.1900433367464644e-05, + "loss": 2.4837, + "step": 13979 + }, + { + "epoch": 1.128238237430393, + "grad_norm": 0.6782165765762329, + "learning_rate": 4.1887585030439736e-05, + "loss": 2.3946, + "step": 13980 + }, + { + "epoch": 1.128318941166976, + "grad_norm": 0.7176415324211121, + "learning_rate": 4.187473814172812e-05, + "loss": 2.4538, + "step": 13981 + }, + { + "epoch": 1.128399644903559, + "grad_norm": 0.6636224985122681, + "learning_rate": 4.186189270164997e-05, + "loss": 2.4493, + "step": 13982 + }, + { + "epoch": 1.128480348640142, + "grad_norm": 0.6613143086433411, + "learning_rate": 4.184904871052544e-05, + "loss": 2.4994, + "step": 13983 + }, + { + "epoch": 1.128561052376725, + "grad_norm": 0.7148364186286926, + "learning_rate": 4.183620616867465e-05, + "loss": 2.4673, + "step": 13984 + }, + { + "epoch": 1.128641756113308, + "grad_norm": 0.6657952070236206, + "learning_rate": 4.1823365076417606e-05, + "loss": 2.3915, + "step": 13985 + }, + { + "epoch": 1.128722459849891, + "grad_norm": 0.7135687470436096, + "learning_rate": 4.181052543407439e-05, + "loss": 2.4961, + "step": 13986 + }, + { + "epoch": 1.1288031635864741, + "grad_norm": 0.7245377898216248, + "learning_rate": 4.179768724196501e-05, + "loss": 2.4519, + "step": 13987 + }, + { + "epoch": 1.128883867323057, + "grad_norm": 0.6832938194274902, + "learning_rate": 4.1784850500409376e-05, + "loss": 2.4471, + "step": 13988 + }, + { + "epoch": 1.12896457105964, + "grad_norm": 0.7303032279014587, + "learning_rate": 4.177201520972746e-05, + "loss": 2.3906, + "step": 13989 + }, + { + "epoch": 1.1290452747962232, + "grad_norm": 0.698581874370575, + "learning_rate": 4.175918137023911e-05, + "loss": 2.4667, + "step": 13990 + }, + { + "epoch": 1.129125978532806, + "grad_norm": 0.69133061170578, + "learning_rate": 4.174634898226422e-05, + "loss": 2.4285, + "step": 13991 + }, + { + "epoch": 1.1292066822693891, + "grad_norm": 0.7029501795768738, + "learning_rate": 4.1733518046122576e-05, + "loss": 2.4839, + "step": 13992 + }, + { + "epoch": 1.129287386005972, + "grad_norm": 0.7566521167755127, + "learning_rate": 4.172068856213398e-05, + "loss": 2.5019, + "step": 13993 + }, + { + "epoch": 1.129368089742555, + "grad_norm": 0.697998046875, + "learning_rate": 4.1707860530618204e-05, + "loss": 2.4305, + "step": 13994 + }, + { + "epoch": 1.1294487934791382, + "grad_norm": 0.674194872379303, + "learning_rate": 4.169503395189489e-05, + "loss": 2.4361, + "step": 13995 + }, + { + "epoch": 1.129529497215721, + "grad_norm": 0.6936436891555786, + "learning_rate": 4.168220882628373e-05, + "loss": 2.518, + "step": 13996 + }, + { + "epoch": 1.1296102009523041, + "grad_norm": 0.6831670999526978, + "learning_rate": 4.166938515410442e-05, + "loss": 2.4197, + "step": 13997 + }, + { + "epoch": 1.129690904688887, + "grad_norm": 0.7323662638664246, + "learning_rate": 4.165656293567647e-05, + "loss": 2.4555, + "step": 13998 + }, + { + "epoch": 1.12977160842547, + "grad_norm": 0.7699782848358154, + "learning_rate": 4.164374217131948e-05, + "loss": 2.4456, + "step": 13999 + }, + { + "epoch": 1.1298523121620532, + "grad_norm": 0.7009051442146301, + "learning_rate": 4.163092286135297e-05, + "loss": 2.4429, + "step": 14000 + }, + { + "epoch": 1.1298523121620532, + "eval_loss": 2.4034411907196045, + "eval_runtime": 771.1158, + "eval_samples_per_second": 3.398, + "eval_steps_per_second": 0.567, + "step": 14000 + }, + { + "epoch": 1.129933015898636, + "grad_norm": 0.674665093421936, + "learning_rate": 4.1618105006096456e-05, + "loss": 2.4127, + "step": 14001 + }, + { + "epoch": 1.1300137196352191, + "grad_norm": 0.7332403659820557, + "learning_rate": 4.1605288605869365e-05, + "loss": 2.4854, + "step": 14002 + }, + { + "epoch": 1.1300944233718022, + "grad_norm": 0.70233553647995, + "learning_rate": 4.159247366099117e-05, + "loss": 2.4433, + "step": 14003 + }, + { + "epoch": 1.130175127108385, + "grad_norm": 0.6259445548057556, + "learning_rate": 4.157966017178118e-05, + "loss": 2.3605, + "step": 14004 + }, + { + "epoch": 1.1302558308449682, + "grad_norm": 0.717408299446106, + "learning_rate": 4.1566848138558755e-05, + "loss": 2.4378, + "step": 14005 + }, + { + "epoch": 1.130336534581551, + "grad_norm": 0.6973297595977783, + "learning_rate": 4.155403756164323e-05, + "loss": 2.4363, + "step": 14006 + }, + { + "epoch": 1.1304172383181341, + "grad_norm": 0.7204940915107727, + "learning_rate": 4.154122844135391e-05, + "loss": 2.4814, + "step": 14007 + }, + { + "epoch": 1.1304979420547172, + "grad_norm": 0.8976696133613586, + "learning_rate": 4.1528420778009935e-05, + "loss": 2.4654, + "step": 14008 + }, + { + "epoch": 1.1305786457913, + "grad_norm": 0.7270354628562927, + "learning_rate": 4.151561457193057e-05, + "loss": 2.4088, + "step": 14009 + }, + { + "epoch": 1.1306593495278832, + "grad_norm": 0.7200367450714111, + "learning_rate": 4.1502809823434985e-05, + "loss": 2.4412, + "step": 14010 + }, + { + "epoch": 1.130740053264466, + "grad_norm": 0.7593986392021179, + "learning_rate": 4.149000653284227e-05, + "loss": 2.5058, + "step": 14011 + }, + { + "epoch": 1.1308207570010491, + "grad_norm": 0.7322795987129211, + "learning_rate": 4.147720470047155e-05, + "loss": 2.4899, + "step": 14012 + }, + { + "epoch": 1.1309014607376322, + "grad_norm": 0.6649030447006226, + "learning_rate": 4.1464404326641905e-05, + "loss": 2.4358, + "step": 14013 + }, + { + "epoch": 1.130982164474215, + "grad_norm": 0.7258814573287964, + "learning_rate": 4.145160541167228e-05, + "loss": 2.4732, + "step": 14014 + }, + { + "epoch": 1.1310628682107982, + "grad_norm": 0.7414976358413696, + "learning_rate": 4.1438807955881695e-05, + "loss": 2.4157, + "step": 14015 + }, + { + "epoch": 1.1311435719473812, + "grad_norm": 0.6813236474990845, + "learning_rate": 4.142601195958914e-05, + "loss": 2.3966, + "step": 14016 + }, + { + "epoch": 1.131224275683964, + "grad_norm": 0.6715923547744751, + "learning_rate": 4.141321742311344e-05, + "loss": 2.4358, + "step": 14017 + }, + { + "epoch": 1.1313049794205472, + "grad_norm": 0.7174912691116333, + "learning_rate": 4.14004243467735e-05, + "loss": 2.4838, + "step": 14018 + }, + { + "epoch": 1.1313856831571303, + "grad_norm": 0.6945109963417053, + "learning_rate": 4.138763273088821e-05, + "loss": 2.4674, + "step": 14019 + }, + { + "epoch": 1.1314663868937131, + "grad_norm": 0.6759494543075562, + "learning_rate": 4.137484257577629e-05, + "loss": 2.4659, + "step": 14020 + }, + { + "epoch": 1.1315470906302962, + "grad_norm": 0.7077876925468445, + "learning_rate": 4.1362053881756534e-05, + "loss": 2.4731, + "step": 14021 + }, + { + "epoch": 1.131627794366879, + "grad_norm": 0.6769500970840454, + "learning_rate": 4.1349266649147654e-05, + "loss": 2.3606, + "step": 14022 + }, + { + "epoch": 1.1317084981034622, + "grad_norm": 0.7104208469390869, + "learning_rate": 4.1336480878268424e-05, + "loss": 2.4626, + "step": 14023 + }, + { + "epoch": 1.1317892018400453, + "grad_norm": 0.7102686762809753, + "learning_rate": 4.132369656943741e-05, + "loss": 2.4545, + "step": 14024 + }, + { + "epoch": 1.1318699055766281, + "grad_norm": 0.7773897647857666, + "learning_rate": 4.1310913722973256e-05, + "loss": 2.5107, + "step": 14025 + }, + { + "epoch": 1.1319506093132112, + "grad_norm": 0.6427130103111267, + "learning_rate": 4.1298132339194585e-05, + "loss": 2.4349, + "step": 14026 + }, + { + "epoch": 1.132031313049794, + "grad_norm": 0.6725162863731384, + "learning_rate": 4.128535241841987e-05, + "loss": 2.4566, + "step": 14027 + }, + { + "epoch": 1.1321120167863772, + "grad_norm": 0.7182251214981079, + "learning_rate": 4.127257396096764e-05, + "loss": 2.4472, + "step": 14028 + }, + { + "epoch": 1.1321927205229603, + "grad_norm": 0.6712302565574646, + "learning_rate": 4.1259796967156426e-05, + "loss": 2.4326, + "step": 14029 + }, + { + "epoch": 1.1322734242595431, + "grad_norm": 0.7726041078567505, + "learning_rate": 4.124702143730459e-05, + "loss": 2.4994, + "step": 14030 + }, + { + "epoch": 1.1323541279961262, + "grad_norm": 0.651899516582489, + "learning_rate": 4.123424737173056e-05, + "loss": 2.4244, + "step": 14031 + }, + { + "epoch": 1.1324348317327093, + "grad_norm": 0.6646261215209961, + "learning_rate": 4.12214747707527e-05, + "loss": 2.5027, + "step": 14032 + }, + { + "epoch": 1.1325155354692922, + "grad_norm": 0.729098916053772, + "learning_rate": 4.120870363468933e-05, + "loss": 2.5117, + "step": 14033 + }, + { + "epoch": 1.1325962392058753, + "grad_norm": 0.7056638598442078, + "learning_rate": 4.119593396385876e-05, + "loss": 2.4279, + "step": 14034 + }, + { + "epoch": 1.1326769429424584, + "grad_norm": 0.7051844000816345, + "learning_rate": 4.1183165758579255e-05, + "loss": 2.3844, + "step": 14035 + }, + { + "epoch": 1.1327576466790412, + "grad_norm": 0.6954311728477478, + "learning_rate": 4.1170399019168984e-05, + "loss": 2.4041, + "step": 14036 + }, + { + "epoch": 1.1328383504156243, + "grad_norm": 0.650044858455658, + "learning_rate": 4.1157633745946135e-05, + "loss": 2.4397, + "step": 14037 + }, + { + "epoch": 1.1329190541522072, + "grad_norm": 0.6974380016326904, + "learning_rate": 4.114486993922888e-05, + "loss": 2.4391, + "step": 14038 + }, + { + "epoch": 1.1329997578887903, + "grad_norm": 0.7252807021141052, + "learning_rate": 4.113210759933536e-05, + "loss": 2.4471, + "step": 14039 + }, + { + "epoch": 1.1330804616253733, + "grad_norm": 0.7001414895057678, + "learning_rate": 4.111934672658354e-05, + "loss": 2.402, + "step": 14040 + }, + { + "epoch": 1.1331611653619562, + "grad_norm": 0.7420533895492554, + "learning_rate": 4.110658732129153e-05, + "loss": 2.4987, + "step": 14041 + }, + { + "epoch": 1.1332418690985393, + "grad_norm": 0.6850644946098328, + "learning_rate": 4.1093829383777315e-05, + "loss": 2.4355, + "step": 14042 + }, + { + "epoch": 1.1333225728351222, + "grad_norm": 0.6905977725982666, + "learning_rate": 4.108107291435885e-05, + "loss": 2.4818, + "step": 14043 + }, + { + "epoch": 1.1334032765717053, + "grad_norm": 0.6555112600326538, + "learning_rate": 4.106831791335407e-05, + "loss": 2.425, + "step": 14044 + }, + { + "epoch": 1.1334839803082883, + "grad_norm": 0.6570355892181396, + "learning_rate": 4.105556438108089e-05, + "loss": 2.4232, + "step": 14045 + }, + { + "epoch": 1.1335646840448712, + "grad_norm": 0.7910747528076172, + "learning_rate": 4.104281231785708e-05, + "loss": 2.484, + "step": 14046 + }, + { + "epoch": 1.1336453877814543, + "grad_norm": 0.6581952571868896, + "learning_rate": 4.103006172400052e-05, + "loss": 2.4102, + "step": 14047 + }, + { + "epoch": 1.1337260915180374, + "grad_norm": 0.6834773421287537, + "learning_rate": 4.1017312599828994e-05, + "loss": 2.4602, + "step": 14048 + }, + { + "epoch": 1.1338067952546202, + "grad_norm": 0.7588350772857666, + "learning_rate": 4.1004564945660195e-05, + "loss": 2.5059, + "step": 14049 + }, + { + "epoch": 1.1338874989912033, + "grad_norm": 0.6604699492454529, + "learning_rate": 4.099181876181185e-05, + "loss": 2.4403, + "step": 14050 + }, + { + "epoch": 1.1339682027277862, + "grad_norm": 0.6957669258117676, + "learning_rate": 4.097907404860163e-05, + "loss": 2.4218, + "step": 14051 + }, + { + "epoch": 1.1340489064643693, + "grad_norm": 0.7091849446296692, + "learning_rate": 4.0966330806347166e-05, + "loss": 2.4396, + "step": 14052 + }, + { + "epoch": 1.1341296102009524, + "grad_norm": 0.6637482047080994, + "learning_rate": 4.095358903536605e-05, + "loss": 2.4514, + "step": 14053 + }, + { + "epoch": 1.1342103139375352, + "grad_norm": 0.7485960125923157, + "learning_rate": 4.0940848735975846e-05, + "loss": 2.4401, + "step": 14054 + }, + { + "epoch": 1.1342910176741183, + "grad_norm": 0.6509774327278137, + "learning_rate": 4.092810990849411e-05, + "loss": 2.4575, + "step": 14055 + }, + { + "epoch": 1.1343717214107012, + "grad_norm": 0.7151626348495483, + "learning_rate": 4.091537255323825e-05, + "loss": 2.45, + "step": 14056 + }, + { + "epoch": 1.1344524251472843, + "grad_norm": 0.7536267042160034, + "learning_rate": 4.0902636670525764e-05, + "loss": 2.497, + "step": 14057 + }, + { + "epoch": 1.1345331288838674, + "grad_norm": 0.7779545783996582, + "learning_rate": 4.0889902260674086e-05, + "loss": 2.412, + "step": 14058 + }, + { + "epoch": 1.1346138326204502, + "grad_norm": 0.7211748957633972, + "learning_rate": 4.087716932400052e-05, + "loss": 2.4727, + "step": 14059 + }, + { + "epoch": 1.1346945363570333, + "grad_norm": 0.6710701584815979, + "learning_rate": 4.086443786082245e-05, + "loss": 2.4318, + "step": 14060 + }, + { + "epoch": 1.1347752400936164, + "grad_norm": 0.7072857022285461, + "learning_rate": 4.085170787145717e-05, + "loss": 2.4672, + "step": 14061 + }, + { + "epoch": 1.1348559438301993, + "grad_norm": 0.6475152969360352, + "learning_rate": 4.083897935622194e-05, + "loss": 2.4104, + "step": 14062 + }, + { + "epoch": 1.1349366475667824, + "grad_norm": 0.7408067584037781, + "learning_rate": 4.0826252315433986e-05, + "loss": 2.4129, + "step": 14063 + }, + { + "epoch": 1.1350173513033655, + "grad_norm": 0.732540488243103, + "learning_rate": 4.081352674941056e-05, + "loss": 2.4209, + "step": 14064 + }, + { + "epoch": 1.1350980550399483, + "grad_norm": 0.6933332681655884, + "learning_rate": 4.080080265846872e-05, + "loss": 2.3797, + "step": 14065 + }, + { + "epoch": 1.1351787587765314, + "grad_norm": 0.6507896780967712, + "learning_rate": 4.078808004292561e-05, + "loss": 2.4372, + "step": 14066 + }, + { + "epoch": 1.1352594625131143, + "grad_norm": 0.729292094707489, + "learning_rate": 4.0775358903098384e-05, + "loss": 2.5513, + "step": 14067 + }, + { + "epoch": 1.1353401662496974, + "grad_norm": 0.692757248878479, + "learning_rate": 4.076263923930398e-05, + "loss": 2.4228, + "step": 14068 + }, + { + "epoch": 1.1354208699862804, + "grad_norm": 0.7028260231018066, + "learning_rate": 4.074992105185946e-05, + "loss": 2.4478, + "step": 14069 + }, + { + "epoch": 1.1355015737228633, + "grad_norm": 0.65067058801651, + "learning_rate": 4.073720434108179e-05, + "loss": 2.3729, + "step": 14070 + }, + { + "epoch": 1.1355822774594464, + "grad_norm": 0.6884061098098755, + "learning_rate": 4.0724489107287933e-05, + "loss": 2.3693, + "step": 14071 + }, + { + "epoch": 1.1356629811960293, + "grad_norm": 0.70686936378479, + "learning_rate": 4.071177535079472e-05, + "loss": 2.4989, + "step": 14072 + }, + { + "epoch": 1.1357436849326124, + "grad_norm": 0.6792482733726501, + "learning_rate": 4.0699063071919016e-05, + "loss": 2.393, + "step": 14073 + }, + { + "epoch": 1.1358243886691954, + "grad_norm": 0.7231085896492004, + "learning_rate": 4.0686352270977745e-05, + "loss": 2.4597, + "step": 14074 + }, + { + "epoch": 1.1359050924057783, + "grad_norm": 0.8024532198905945, + "learning_rate": 4.067364294828758e-05, + "loss": 2.4409, + "step": 14075 + }, + { + "epoch": 1.1359857961423614, + "grad_norm": 0.6761424541473389, + "learning_rate": 4.066093510416532e-05, + "loss": 2.4598, + "step": 14076 + }, + { + "epoch": 1.1360664998789445, + "grad_norm": 0.7075559496879578, + "learning_rate": 4.064822873892771e-05, + "loss": 2.4649, + "step": 14077 + }, + { + "epoch": 1.1361472036155273, + "grad_norm": 0.6292272806167603, + "learning_rate": 4.063552385289134e-05, + "loss": 2.445, + "step": 14078 + }, + { + "epoch": 1.1362279073521104, + "grad_norm": 0.6435273885726929, + "learning_rate": 4.06228204463729e-05, + "loss": 2.4105, + "step": 14079 + }, + { + "epoch": 1.1363086110886935, + "grad_norm": 0.7135637402534485, + "learning_rate": 4.061011851968903e-05, + "loss": 2.3907, + "step": 14080 + }, + { + "epoch": 1.1363893148252764, + "grad_norm": 0.7424013614654541, + "learning_rate": 4.059741807315621e-05, + "loss": 2.4405, + "step": 14081 + }, + { + "epoch": 1.1364700185618595, + "grad_norm": 0.6649916768074036, + "learning_rate": 4.0584719107091016e-05, + "loss": 2.4314, + "step": 14082 + }, + { + "epoch": 1.1365507222984423, + "grad_norm": 0.6700563430786133, + "learning_rate": 4.0572021621809944e-05, + "loss": 2.4093, + "step": 14083 + }, + { + "epoch": 1.1366314260350254, + "grad_norm": 0.6740709543228149, + "learning_rate": 4.055932561762942e-05, + "loss": 2.4301, + "step": 14084 + }, + { + "epoch": 1.1367121297716085, + "grad_norm": 0.7039555907249451, + "learning_rate": 4.0546631094865895e-05, + "loss": 2.4427, + "step": 14085 + }, + { + "epoch": 1.1367928335081914, + "grad_norm": 0.7461164593696594, + "learning_rate": 4.053393805383573e-05, + "loss": 2.3865, + "step": 14086 + }, + { + "epoch": 1.1368735372447745, + "grad_norm": 0.6808290481567383, + "learning_rate": 4.0521246494855316e-05, + "loss": 2.3738, + "step": 14087 + }, + { + "epoch": 1.1369542409813573, + "grad_norm": 0.6942760944366455, + "learning_rate": 4.0508556418240875e-05, + "loss": 2.4351, + "step": 14088 + }, + { + "epoch": 1.1370349447179404, + "grad_norm": 0.7615510821342468, + "learning_rate": 4.049586782430872e-05, + "loss": 2.3968, + "step": 14089 + }, + { + "epoch": 1.1371156484545235, + "grad_norm": 0.7240662574768066, + "learning_rate": 4.048318071337512e-05, + "loss": 2.4046, + "step": 14090 + }, + { + "epoch": 1.1371963521911064, + "grad_norm": 0.7286471128463745, + "learning_rate": 4.047049508575621e-05, + "loss": 2.4039, + "step": 14091 + }, + { + "epoch": 1.1372770559276895, + "grad_norm": 0.7031459212303162, + "learning_rate": 4.045781094176816e-05, + "loss": 2.4494, + "step": 14092 + }, + { + "epoch": 1.1373577596642725, + "grad_norm": 0.7116301655769348, + "learning_rate": 4.0445128281727116e-05, + "loss": 2.3991, + "step": 14093 + }, + { + "epoch": 1.1374384634008554, + "grad_norm": 0.6719788312911987, + "learning_rate": 4.043244710594914e-05, + "loss": 2.4823, + "step": 14094 + }, + { + "epoch": 1.1375191671374385, + "grad_norm": 0.6770508885383606, + "learning_rate": 4.041976741475031e-05, + "loss": 2.4362, + "step": 14095 + }, + { + "epoch": 1.1375998708740216, + "grad_norm": 0.6808609962463379, + "learning_rate": 4.040708920844666e-05, + "loss": 2.435, + "step": 14096 + }, + { + "epoch": 1.1376805746106045, + "grad_norm": 0.7445514798164368, + "learning_rate": 4.0394412487354074e-05, + "loss": 2.4749, + "step": 14097 + }, + { + "epoch": 1.1377612783471875, + "grad_norm": 0.7024775743484497, + "learning_rate": 4.038173725178854e-05, + "loss": 2.4354, + "step": 14098 + }, + { + "epoch": 1.1378419820837704, + "grad_norm": 0.6925685405731201, + "learning_rate": 4.0369063502066e-05, + "loss": 2.4462, + "step": 14099 + }, + { + "epoch": 1.1379226858203535, + "grad_norm": 0.6970539689064026, + "learning_rate": 4.035639123850223e-05, + "loss": 2.3842, + "step": 14100 + }, + { + "epoch": 1.1380033895569364, + "grad_norm": 0.6571836471557617, + "learning_rate": 4.0343720461413107e-05, + "loss": 2.4213, + "step": 14101 + }, + { + "epoch": 1.1380840932935194, + "grad_norm": 0.7264918684959412, + "learning_rate": 4.033105117111441e-05, + "loss": 2.4697, + "step": 14102 + }, + { + "epoch": 1.1381647970301025, + "grad_norm": 0.6929560899734497, + "learning_rate": 4.03183833679219e-05, + "loss": 2.461, + "step": 14103 + }, + { + "epoch": 1.1382455007666854, + "grad_norm": 0.6533559560775757, + "learning_rate": 4.030571705215128e-05, + "loss": 2.4336, + "step": 14104 + }, + { + "epoch": 1.1383262045032685, + "grad_norm": 0.7372364401817322, + "learning_rate": 4.0293052224118234e-05, + "loss": 2.4396, + "step": 14105 + }, + { + "epoch": 1.1384069082398516, + "grad_norm": 0.6736310720443726, + "learning_rate": 4.028038888413844e-05, + "loss": 2.4123, + "step": 14106 + }, + { + "epoch": 1.1384876119764344, + "grad_norm": 0.6898338794708252, + "learning_rate": 4.026772703252742e-05, + "loss": 2.431, + "step": 14107 + }, + { + "epoch": 1.1385683157130175, + "grad_norm": 0.7933369278907776, + "learning_rate": 4.02550666696008e-05, + "loss": 2.4669, + "step": 14108 + }, + { + "epoch": 1.1386490194496006, + "grad_norm": 0.7218122482299805, + "learning_rate": 4.024240779567412e-05, + "loss": 2.3761, + "step": 14109 + }, + { + "epoch": 1.1387297231861835, + "grad_norm": 0.7018248438835144, + "learning_rate": 4.022975041106281e-05, + "loss": 2.4011, + "step": 14110 + }, + { + "epoch": 1.1388104269227666, + "grad_norm": 0.6709668040275574, + "learning_rate": 4.0217094516082364e-05, + "loss": 2.426, + "step": 14111 + }, + { + "epoch": 1.1388911306593494, + "grad_norm": 0.7241504192352295, + "learning_rate": 4.0204440111048195e-05, + "loss": 2.4085, + "step": 14112 + }, + { + "epoch": 1.1389718343959325, + "grad_norm": 0.731347382068634, + "learning_rate": 4.0191787196275675e-05, + "loss": 2.502, + "step": 14113 + }, + { + "epoch": 1.1390525381325156, + "grad_norm": 0.6630167365074158, + "learning_rate": 4.0179135772080166e-05, + "loss": 2.3999, + "step": 14114 + }, + { + "epoch": 1.1391332418690985, + "grad_norm": 0.7094748616218567, + "learning_rate": 4.016648583877698e-05, + "loss": 2.4666, + "step": 14115 + }, + { + "epoch": 1.1392139456056816, + "grad_norm": 0.7262436151504517, + "learning_rate": 4.0153837396681395e-05, + "loss": 2.4369, + "step": 14116 + }, + { + "epoch": 1.1392946493422644, + "grad_norm": 0.6796039938926697, + "learning_rate": 4.014119044610859e-05, + "loss": 2.4607, + "step": 14117 + }, + { + "epoch": 1.1393753530788475, + "grad_norm": 0.6690036058425903, + "learning_rate": 4.0128544987373785e-05, + "loss": 2.4145, + "step": 14118 + }, + { + "epoch": 1.1394560568154306, + "grad_norm": 0.6987181305885315, + "learning_rate": 4.011590102079219e-05, + "loss": 2.4294, + "step": 14119 + }, + { + "epoch": 1.1395367605520135, + "grad_norm": 0.6756789684295654, + "learning_rate": 4.0103258546678836e-05, + "loss": 2.396, + "step": 14120 + }, + { + "epoch": 1.1396174642885966, + "grad_norm": 0.7027772068977356, + "learning_rate": 4.009061756534885e-05, + "loss": 2.3971, + "step": 14121 + }, + { + "epoch": 1.1396981680251796, + "grad_norm": 0.6872174143791199, + "learning_rate": 4.007797807711732e-05, + "loss": 2.4297, + "step": 14122 + }, + { + "epoch": 1.1397788717617625, + "grad_norm": 0.7213007211685181, + "learning_rate": 4.006534008229914e-05, + "loss": 2.4792, + "step": 14123 + }, + { + "epoch": 1.1398595754983456, + "grad_norm": 0.6771649122238159, + "learning_rate": 4.0052703581209395e-05, + "loss": 2.4397, + "step": 14124 + }, + { + "epoch": 1.1399402792349287, + "grad_norm": 0.6577184796333313, + "learning_rate": 4.0040068574163013e-05, + "loss": 2.4113, + "step": 14125 + }, + { + "epoch": 1.1400209829715116, + "grad_norm": 0.7493160367012024, + "learning_rate": 4.002743506147483e-05, + "loss": 2.4454, + "step": 14126 + }, + { + "epoch": 1.1401016867080946, + "grad_norm": 0.6820357441902161, + "learning_rate": 4.0014803043459726e-05, + "loss": 2.4126, + "step": 14127 + }, + { + "epoch": 1.1401823904446775, + "grad_norm": 0.7177188992500305, + "learning_rate": 4.000217252043258e-05, + "loss": 2.4355, + "step": 14128 + }, + { + "epoch": 1.1402630941812606, + "grad_norm": 0.654371440410614, + "learning_rate": 3.998954349270808e-05, + "loss": 2.4932, + "step": 14129 + }, + { + "epoch": 1.1403437979178437, + "grad_norm": 0.7029837965965271, + "learning_rate": 3.997691596060104e-05, + "loss": 2.4341, + "step": 14130 + }, + { + "epoch": 1.1404245016544265, + "grad_norm": 0.7971171140670776, + "learning_rate": 3.996428992442615e-05, + "loss": 2.4466, + "step": 14131 + }, + { + "epoch": 1.1405052053910096, + "grad_norm": 0.6941849589347839, + "learning_rate": 3.9951665384498114e-05, + "loss": 2.4861, + "step": 14132 + }, + { + "epoch": 1.1405859091275925, + "grad_norm": 0.6657733917236328, + "learning_rate": 3.993904234113153e-05, + "loss": 2.4266, + "step": 14133 + }, + { + "epoch": 1.1406666128641756, + "grad_norm": 0.6780329346656799, + "learning_rate": 3.9926420794641e-05, + "loss": 2.458, + "step": 14134 + }, + { + "epoch": 1.1407473166007587, + "grad_norm": 0.7070702910423279, + "learning_rate": 3.991380074534109e-05, + "loss": 2.368, + "step": 14135 + }, + { + "epoch": 1.1408280203373415, + "grad_norm": 0.7186575531959534, + "learning_rate": 3.990118219354635e-05, + "loss": 2.4611, + "step": 14136 + }, + { + "epoch": 1.1409087240739246, + "grad_norm": 0.7171763777732849, + "learning_rate": 3.988856513957123e-05, + "loss": 2.4315, + "step": 14137 + }, + { + "epoch": 1.1409894278105077, + "grad_norm": 0.7090228796005249, + "learning_rate": 3.987594958373025e-05, + "loss": 2.4668, + "step": 14138 + }, + { + "epoch": 1.1410701315470906, + "grad_norm": 0.6523951888084412, + "learning_rate": 3.986333552633773e-05, + "loss": 2.4392, + "step": 14139 + }, + { + "epoch": 1.1411508352836737, + "grad_norm": 0.706000804901123, + "learning_rate": 3.98507229677081e-05, + "loss": 2.4382, + "step": 14140 + }, + { + "epoch": 1.1412315390202568, + "grad_norm": 0.6537537574768066, + "learning_rate": 3.983811190815571e-05, + "loss": 2.456, + "step": 14141 + }, + { + "epoch": 1.1413122427568396, + "grad_norm": 0.7509549856185913, + "learning_rate": 3.982550234799479e-05, + "loss": 2.4744, + "step": 14142 + }, + { + "epoch": 1.1413929464934227, + "grad_norm": 0.7188650965690613, + "learning_rate": 3.981289428753967e-05, + "loss": 2.4632, + "step": 14143 + }, + { + "epoch": 1.1414736502300056, + "grad_norm": 0.7563674449920654, + "learning_rate": 3.9800287727104544e-05, + "loss": 2.5063, + "step": 14144 + }, + { + "epoch": 1.1415543539665887, + "grad_norm": 0.8374128341674805, + "learning_rate": 3.978768266700361e-05, + "loss": 2.4942, + "step": 14145 + }, + { + "epoch": 1.1416350577031718, + "grad_norm": 0.7020177841186523, + "learning_rate": 3.9775079107551027e-05, + "loss": 2.4404, + "step": 14146 + }, + { + "epoch": 1.1417157614397546, + "grad_norm": 0.7326170802116394, + "learning_rate": 3.9762477049060895e-05, + "loss": 2.4127, + "step": 14147 + }, + { + "epoch": 1.1417964651763377, + "grad_norm": 0.6661173105239868, + "learning_rate": 3.974987649184734e-05, + "loss": 2.4649, + "step": 14148 + }, + { + "epoch": 1.1418771689129206, + "grad_norm": 0.7186033129692078, + "learning_rate": 3.973727743622432e-05, + "loss": 2.4275, + "step": 14149 + }, + { + "epoch": 1.1419578726495037, + "grad_norm": 0.7193881869316101, + "learning_rate": 3.972467988250588e-05, + "loss": 2.4997, + "step": 14150 + }, + { + "epoch": 1.1420385763860867, + "grad_norm": 0.7139542102813721, + "learning_rate": 3.971208383100601e-05, + "loss": 2.4211, + "step": 14151 + }, + { + "epoch": 1.1421192801226696, + "grad_norm": 0.6840166449546814, + "learning_rate": 3.969948928203856e-05, + "loss": 2.4504, + "step": 14152 + }, + { + "epoch": 1.1421999838592527, + "grad_norm": 0.8261072039604187, + "learning_rate": 3.968689623591747e-05, + "loss": 2.4901, + "step": 14153 + }, + { + "epoch": 1.1422806875958358, + "grad_norm": 0.7636086940765381, + "learning_rate": 3.96743046929566e-05, + "loss": 2.4202, + "step": 14154 + }, + { + "epoch": 1.1423613913324187, + "grad_norm": 0.7477976679801941, + "learning_rate": 3.966171465346973e-05, + "loss": 2.492, + "step": 14155 + }, + { + "epoch": 1.1424420950690017, + "grad_norm": 0.7516389489173889, + "learning_rate": 3.9649126117770665e-05, + "loss": 2.4512, + "step": 14156 + }, + { + "epoch": 1.1425227988055846, + "grad_norm": 0.6987521648406982, + "learning_rate": 3.9636539086173174e-05, + "loss": 2.4005, + "step": 14157 + }, + { + "epoch": 1.1426035025421677, + "grad_norm": 0.7242532968521118, + "learning_rate": 3.962395355899088e-05, + "loss": 2.4414, + "step": 14158 + }, + { + "epoch": 1.1426842062787508, + "grad_norm": 0.6616180539131165, + "learning_rate": 3.961136953653749e-05, + "loss": 2.4442, + "step": 14159 + }, + { + "epoch": 1.1427649100153336, + "grad_norm": 0.7165415287017822, + "learning_rate": 3.959878701912667e-05, + "loss": 2.4658, + "step": 14160 + }, + { + "epoch": 1.1428456137519167, + "grad_norm": 0.6619318127632141, + "learning_rate": 3.9586206007071926e-05, + "loss": 2.3803, + "step": 14161 + }, + { + "epoch": 1.1429263174884996, + "grad_norm": 0.6654838919639587, + "learning_rate": 3.957362650068684e-05, + "loss": 2.4584, + "step": 14162 + }, + { + "epoch": 1.1430070212250827, + "grad_norm": 0.6947140097618103, + "learning_rate": 3.956104850028496e-05, + "loss": 2.4236, + "step": 14163 + }, + { + "epoch": 1.1430877249616658, + "grad_norm": 0.6510412096977234, + "learning_rate": 3.954847200617973e-05, + "loss": 2.3589, + "step": 14164 + }, + { + "epoch": 1.1431684286982486, + "grad_norm": 0.7550667524337769, + "learning_rate": 3.95358970186846e-05, + "loss": 2.419, + "step": 14165 + }, + { + "epoch": 1.1432491324348317, + "grad_norm": 0.7898361682891846, + "learning_rate": 3.9523323538112975e-05, + "loss": 2.4549, + "step": 14166 + }, + { + "epoch": 1.1433298361714148, + "grad_norm": 0.7162390947341919, + "learning_rate": 3.9510751564778246e-05, + "loss": 2.4493, + "step": 14167 + }, + { + "epoch": 1.1434105399079977, + "grad_norm": 0.8251990079879761, + "learning_rate": 3.949818109899367e-05, + "loss": 2.4474, + "step": 14168 + }, + { + "epoch": 1.1434912436445808, + "grad_norm": 0.6739209890365601, + "learning_rate": 3.948561214107258e-05, + "loss": 2.4564, + "step": 14169 + }, + { + "epoch": 1.1435719473811639, + "grad_norm": 0.6606340408325195, + "learning_rate": 3.9473044691328254e-05, + "loss": 2.3838, + "step": 14170 + }, + { + "epoch": 1.1436526511177467, + "grad_norm": 0.7297452092170715, + "learning_rate": 3.946047875007384e-05, + "loss": 2.4673, + "step": 14171 + }, + { + "epoch": 1.1437333548543298, + "grad_norm": 0.7382420301437378, + "learning_rate": 3.9447914317622546e-05, + "loss": 2.4279, + "step": 14172 + }, + { + "epoch": 1.1438140585909127, + "grad_norm": 0.6947354674339294, + "learning_rate": 3.9435351394287546e-05, + "loss": 2.4553, + "step": 14173 + }, + { + "epoch": 1.1438947623274958, + "grad_norm": 0.670369565486908, + "learning_rate": 3.942278998038183e-05, + "loss": 2.4285, + "step": 14174 + }, + { + "epoch": 1.1439754660640788, + "grad_norm": 0.7097954154014587, + "learning_rate": 3.941023007621859e-05, + "loss": 2.477, + "step": 14175 + }, + { + "epoch": 1.1440561698006617, + "grad_norm": 0.6490213871002197, + "learning_rate": 3.9397671682110826e-05, + "loss": 2.3943, + "step": 14176 + }, + { + "epoch": 1.1441368735372448, + "grad_norm": 0.6505936980247498, + "learning_rate": 3.938511479837147e-05, + "loss": 2.4188, + "step": 14177 + }, + { + "epoch": 1.1442175772738277, + "grad_norm": 0.6696773767471313, + "learning_rate": 3.9372559425313496e-05, + "loss": 2.4377, + "step": 14178 + }, + { + "epoch": 1.1442982810104108, + "grad_norm": 0.6747034192085266, + "learning_rate": 3.936000556324982e-05, + "loss": 2.4111, + "step": 14179 + }, + { + "epoch": 1.1443789847469938, + "grad_norm": 0.7766546607017517, + "learning_rate": 3.934745321249336e-05, + "loss": 2.3873, + "step": 14180 + }, + { + "epoch": 1.1444596884835767, + "grad_norm": 0.7608100175857544, + "learning_rate": 3.933490237335688e-05, + "loss": 2.4567, + "step": 14181 + }, + { + "epoch": 1.1445403922201598, + "grad_norm": 0.7724356055259705, + "learning_rate": 3.9322353046153205e-05, + "loss": 2.4729, + "step": 14182 + }, + { + "epoch": 1.1446210959567429, + "grad_norm": 0.6908414363861084, + "learning_rate": 3.930980523119515e-05, + "loss": 2.41, + "step": 14183 + }, + { + "epoch": 1.1447017996933257, + "grad_norm": 0.7209733128547668, + "learning_rate": 3.9297258928795356e-05, + "loss": 2.4629, + "step": 14184 + }, + { + "epoch": 1.1447825034299088, + "grad_norm": 0.7116519212722778, + "learning_rate": 3.928471413926651e-05, + "loss": 2.5081, + "step": 14185 + }, + { + "epoch": 1.144863207166492, + "grad_norm": 0.6704578995704651, + "learning_rate": 3.9272170862921365e-05, + "loss": 2.494, + "step": 14186 + }, + { + "epoch": 1.1449439109030748, + "grad_norm": 0.6914607882499695, + "learning_rate": 3.9259629100072435e-05, + "loss": 2.3979, + "step": 14187 + }, + { + "epoch": 1.1450246146396579, + "grad_norm": 0.7413245439529419, + "learning_rate": 3.924708885103233e-05, + "loss": 2.4534, + "step": 14188 + }, + { + "epoch": 1.1451053183762407, + "grad_norm": 0.7411661744117737, + "learning_rate": 3.923455011611362e-05, + "loss": 2.4191, + "step": 14189 + }, + { + "epoch": 1.1451860221128238, + "grad_norm": 0.6581972241401672, + "learning_rate": 3.9222012895628716e-05, + "loss": 2.4494, + "step": 14190 + }, + { + "epoch": 1.145266725849407, + "grad_norm": 0.6628647446632385, + "learning_rate": 3.920947718989013e-05, + "loss": 2.4483, + "step": 14191 + }, + { + "epoch": 1.1453474295859898, + "grad_norm": 0.7068151831626892, + "learning_rate": 3.9196942999210316e-05, + "loss": 2.4549, + "step": 14192 + }, + { + "epoch": 1.1454281333225729, + "grad_norm": 0.6727713942527771, + "learning_rate": 3.918441032390159e-05, + "loss": 2.4261, + "step": 14193 + }, + { + "epoch": 1.1455088370591557, + "grad_norm": 0.6680718660354614, + "learning_rate": 3.9171879164276334e-05, + "loss": 2.4705, + "step": 14194 + }, + { + "epoch": 1.1455895407957388, + "grad_norm": 0.710096538066864, + "learning_rate": 3.915934952064685e-05, + "loss": 2.474, + "step": 14195 + }, + { + "epoch": 1.145670244532322, + "grad_norm": 0.6927496790885925, + "learning_rate": 3.9146821393325414e-05, + "loss": 2.3979, + "step": 14196 + }, + { + "epoch": 1.1457509482689048, + "grad_norm": 0.6887550354003906, + "learning_rate": 3.913429478262427e-05, + "loss": 2.4588, + "step": 14197 + }, + { + "epoch": 1.1458316520054879, + "grad_norm": 0.6847062706947327, + "learning_rate": 3.912176968885559e-05, + "loss": 2.4602, + "step": 14198 + }, + { + "epoch": 1.145912355742071, + "grad_norm": 0.6832349300384521, + "learning_rate": 3.91092461123316e-05, + "loss": 2.4672, + "step": 14199 + }, + { + "epoch": 1.1459930594786538, + "grad_norm": 0.6789066791534424, + "learning_rate": 3.909672405336432e-05, + "loss": 2.5029, + "step": 14200 + }, + { + "epoch": 1.146073763215237, + "grad_norm": 0.6953951120376587, + "learning_rate": 3.9084203512265885e-05, + "loss": 2.4223, + "step": 14201 + }, + { + "epoch": 1.1461544669518198, + "grad_norm": 0.6629688739776611, + "learning_rate": 3.907168448934836e-05, + "loss": 2.4028, + "step": 14202 + }, + { + "epoch": 1.1462351706884029, + "grad_norm": 0.6661216020584106, + "learning_rate": 3.90591669849237e-05, + "loss": 2.4668, + "step": 14203 + }, + { + "epoch": 1.146315874424986, + "grad_norm": 0.6814442276954651, + "learning_rate": 3.9046650999303894e-05, + "loss": 2.4273, + "step": 14204 + }, + { + "epoch": 1.1463965781615688, + "grad_norm": 0.6678626537322998, + "learning_rate": 3.903413653280088e-05, + "loss": 2.444, + "step": 14205 + }, + { + "epoch": 1.146477281898152, + "grad_norm": 0.6703703999519348, + "learning_rate": 3.902162358572655e-05, + "loss": 2.4273, + "step": 14206 + }, + { + "epoch": 1.1465579856347348, + "grad_norm": 0.7052578926086426, + "learning_rate": 3.900911215839276e-05, + "loss": 2.4397, + "step": 14207 + }, + { + "epoch": 1.1466386893713179, + "grad_norm": 0.6792036294937134, + "learning_rate": 3.899660225111136e-05, + "loss": 2.439, + "step": 14208 + }, + { + "epoch": 1.146719393107901, + "grad_norm": 0.6995401978492737, + "learning_rate": 3.898409386419407e-05, + "loss": 2.5002, + "step": 14209 + }, + { + "epoch": 1.1468000968444838, + "grad_norm": 0.6527338027954102, + "learning_rate": 3.897158699795265e-05, + "loss": 2.4523, + "step": 14210 + }, + { + "epoch": 1.146880800581067, + "grad_norm": 0.7509400248527527, + "learning_rate": 3.8959081652698814e-05, + "loss": 2.4193, + "step": 14211 + }, + { + "epoch": 1.14696150431765, + "grad_norm": 0.6985350251197815, + "learning_rate": 3.894657782874426e-05, + "loss": 2.4251, + "step": 14212 + }, + { + "epoch": 1.1470422080542328, + "grad_norm": 0.6831483840942383, + "learning_rate": 3.893407552640055e-05, + "loss": 2.4172, + "step": 14213 + }, + { + "epoch": 1.147122911790816, + "grad_norm": 0.7281469702720642, + "learning_rate": 3.892157474597929e-05, + "loss": 2.4451, + "step": 14214 + }, + { + "epoch": 1.147203615527399, + "grad_norm": 0.7326027750968933, + "learning_rate": 3.8909075487792066e-05, + "loss": 2.3926, + "step": 14215 + }, + { + "epoch": 1.1472843192639819, + "grad_norm": 0.7030496597290039, + "learning_rate": 3.889657775215036e-05, + "loss": 2.435, + "step": 14216 + }, + { + "epoch": 1.147365023000565, + "grad_norm": 0.6915596127510071, + "learning_rate": 3.888408153936568e-05, + "loss": 2.4622, + "step": 14217 + }, + { + "epoch": 1.1474457267371478, + "grad_norm": 0.678600013256073, + "learning_rate": 3.8871586849749474e-05, + "loss": 2.4264, + "step": 14218 + }, + { + "epoch": 1.147526430473731, + "grad_norm": 0.7487786412239075, + "learning_rate": 3.885909368361308e-05, + "loss": 2.4038, + "step": 14219 + }, + { + "epoch": 1.147607134210314, + "grad_norm": 0.6658064723014832, + "learning_rate": 3.8846602041267886e-05, + "loss": 2.4079, + "step": 14220 + }, + { + "epoch": 1.1476878379468969, + "grad_norm": 0.6985111832618713, + "learning_rate": 3.883411192302527e-05, + "loss": 2.481, + "step": 14221 + }, + { + "epoch": 1.14776854168348, + "grad_norm": 0.7056208848953247, + "learning_rate": 3.8821623329196445e-05, + "loss": 2.4409, + "step": 14222 + }, + { + "epoch": 1.1478492454200628, + "grad_norm": 0.7107830047607422, + "learning_rate": 3.880913626009268e-05, + "loss": 2.4578, + "step": 14223 + }, + { + "epoch": 1.147929949156646, + "grad_norm": 0.6678555607795715, + "learning_rate": 3.87966507160252e-05, + "loss": 2.4548, + "step": 14224 + }, + { + "epoch": 1.148010652893229, + "grad_norm": 0.6699830293655396, + "learning_rate": 3.8784166697305157e-05, + "loss": 2.3763, + "step": 14225 + }, + { + "epoch": 1.1480913566298119, + "grad_norm": 0.7695464491844177, + "learning_rate": 3.8771684204243716e-05, + "loss": 2.4774, + "step": 14226 + }, + { + "epoch": 1.148172060366395, + "grad_norm": 0.7801330089569092, + "learning_rate": 3.8759203237151954e-05, + "loss": 2.4598, + "step": 14227 + }, + { + "epoch": 1.148252764102978, + "grad_norm": 0.7029622793197632, + "learning_rate": 3.8746723796340955e-05, + "loss": 2.3901, + "step": 14228 + }, + { + "epoch": 1.148333467839561, + "grad_norm": 0.7472359538078308, + "learning_rate": 3.873424588212169e-05, + "loss": 2.4724, + "step": 14229 + }, + { + "epoch": 1.148414171576144, + "grad_norm": 0.6621725559234619, + "learning_rate": 3.872176949480517e-05, + "loss": 2.4523, + "step": 14230 + }, + { + "epoch": 1.148494875312727, + "grad_norm": 0.722658634185791, + "learning_rate": 3.8709294634702376e-05, + "loss": 2.4032, + "step": 14231 + }, + { + "epoch": 1.14857557904931, + "grad_norm": 0.7743202447891235, + "learning_rate": 3.869682130212413e-05, + "loss": 2.4373, + "step": 14232 + }, + { + "epoch": 1.148656282785893, + "grad_norm": 0.6906178593635559, + "learning_rate": 3.868434949738136e-05, + "loss": 2.4765, + "step": 14233 + }, + { + "epoch": 1.148736986522476, + "grad_norm": 0.6708275675773621, + "learning_rate": 3.86718792207849e-05, + "loss": 2.4263, + "step": 14234 + }, + { + "epoch": 1.148817690259059, + "grad_norm": 0.6992776989936829, + "learning_rate": 3.8659410472645494e-05, + "loss": 2.378, + "step": 14235 + }, + { + "epoch": 1.148898393995642, + "grad_norm": 0.7229011058807373, + "learning_rate": 3.864694325327389e-05, + "loss": 2.4075, + "step": 14236 + }, + { + "epoch": 1.148979097732225, + "grad_norm": 0.6622509956359863, + "learning_rate": 3.863447756298091e-05, + "loss": 2.3954, + "step": 14237 + }, + { + "epoch": 1.149059801468808, + "grad_norm": 0.7233534455299377, + "learning_rate": 3.862201340207712e-05, + "loss": 2.4506, + "step": 14238 + }, + { + "epoch": 1.149140505205391, + "grad_norm": 0.716869056224823, + "learning_rate": 3.860955077087321e-05, + "loss": 2.4304, + "step": 14239 + }, + { + "epoch": 1.149221208941974, + "grad_norm": 0.6550257205963135, + "learning_rate": 3.8597089669679766e-05, + "loss": 2.4261, + "step": 14240 + }, + { + "epoch": 1.149301912678557, + "grad_norm": 0.6981741786003113, + "learning_rate": 3.858463009880738e-05, + "loss": 2.4115, + "step": 14241 + }, + { + "epoch": 1.14938261641514, + "grad_norm": 0.6792196035385132, + "learning_rate": 3.8572172058566534e-05, + "loss": 2.4195, + "step": 14242 + }, + { + "epoch": 1.149463320151723, + "grad_norm": 0.7278807163238525, + "learning_rate": 3.855971554926773e-05, + "loss": 2.418, + "step": 14243 + }, + { + "epoch": 1.1495440238883061, + "grad_norm": 0.6451076865196228, + "learning_rate": 3.8547260571221456e-05, + "loss": 2.4591, + "step": 14244 + }, + { + "epoch": 1.149624727624889, + "grad_norm": 0.7052451968193054, + "learning_rate": 3.853480712473805e-05, + "loss": 2.4023, + "step": 14245 + }, + { + "epoch": 1.149705431361472, + "grad_norm": 0.7016182541847229, + "learning_rate": 3.852235521012793e-05, + "loss": 2.4959, + "step": 14246 + }, + { + "epoch": 1.1497861350980552, + "grad_norm": 0.7287492156028748, + "learning_rate": 3.850990482770141e-05, + "loss": 2.3884, + "step": 14247 + }, + { + "epoch": 1.149866838834638, + "grad_norm": 0.6648508310317993, + "learning_rate": 3.84974559777688e-05, + "loss": 2.4632, + "step": 14248 + }, + { + "epoch": 1.1499475425712211, + "grad_norm": 0.7387828230857849, + "learning_rate": 3.848500866064036e-05, + "loss": 2.4053, + "step": 14249 + }, + { + "epoch": 1.150028246307804, + "grad_norm": 0.7230356931686401, + "learning_rate": 3.847256287662635e-05, + "loss": 2.5128, + "step": 14250 + }, + { + "epoch": 1.150108950044387, + "grad_norm": 0.7209547162055969, + "learning_rate": 3.846011862603686e-05, + "loss": 2.4626, + "step": 14251 + }, + { + "epoch": 1.1501896537809702, + "grad_norm": 0.7177916765213013, + "learning_rate": 3.844767590918209e-05, + "loss": 2.4469, + "step": 14252 + }, + { + "epoch": 1.150270357517553, + "grad_norm": 0.7850151658058167, + "learning_rate": 3.843523472637216e-05, + "loss": 2.4731, + "step": 14253 + }, + { + "epoch": 1.150351061254136, + "grad_norm": 0.7051519155502319, + "learning_rate": 3.8422795077917084e-05, + "loss": 2.3696, + "step": 14254 + }, + { + "epoch": 1.150431764990719, + "grad_norm": 0.7434025406837463, + "learning_rate": 3.841035696412692e-05, + "loss": 2.444, + "step": 14255 + }, + { + "epoch": 1.150512468727302, + "grad_norm": 0.7404719591140747, + "learning_rate": 3.839792038531166e-05, + "loss": 2.4415, + "step": 14256 + }, + { + "epoch": 1.1505931724638851, + "grad_norm": 0.6883764266967773, + "learning_rate": 3.838548534178125e-05, + "loss": 2.4887, + "step": 14257 + }, + { + "epoch": 1.150673876200468, + "grad_norm": 0.6697155237197876, + "learning_rate": 3.83730518338456e-05, + "loss": 2.3721, + "step": 14258 + }, + { + "epoch": 1.150754579937051, + "grad_norm": 0.68825763463974, + "learning_rate": 3.836061986181459e-05, + "loss": 2.4712, + "step": 14259 + }, + { + "epoch": 1.1508352836736342, + "grad_norm": 0.6810611486434937, + "learning_rate": 3.8348189425998114e-05, + "loss": 2.3995, + "step": 14260 + }, + { + "epoch": 1.150915987410217, + "grad_norm": 0.6718329787254333, + "learning_rate": 3.8335760526705866e-05, + "loss": 2.4068, + "step": 14261 + }, + { + "epoch": 1.1509966911468001, + "grad_norm": 0.694618284702301, + "learning_rate": 3.832333316424767e-05, + "loss": 2.458, + "step": 14262 + }, + { + "epoch": 1.151077394883383, + "grad_norm": 0.6824250817298889, + "learning_rate": 3.8310907338933266e-05, + "loss": 2.4623, + "step": 14263 + }, + { + "epoch": 1.151158098619966, + "grad_norm": 0.6875178217887878, + "learning_rate": 3.8298483051072264e-05, + "loss": 2.4827, + "step": 14264 + }, + { + "epoch": 1.1512388023565492, + "grad_norm": 0.7868281602859497, + "learning_rate": 3.828606030097437e-05, + "loss": 2.4638, + "step": 14265 + }, + { + "epoch": 1.151319506093132, + "grad_norm": 0.7003639936447144, + "learning_rate": 3.8273639088949165e-05, + "loss": 2.4885, + "step": 14266 + }, + { + "epoch": 1.1514002098297151, + "grad_norm": 0.6965197920799255, + "learning_rate": 3.826121941530623e-05, + "loss": 2.3983, + "step": 14267 + }, + { + "epoch": 1.151480913566298, + "grad_norm": 0.7241101264953613, + "learning_rate": 3.824880128035509e-05, + "loss": 2.4598, + "step": 14268 + }, + { + "epoch": 1.151561617302881, + "grad_norm": 0.700764536857605, + "learning_rate": 3.823638468440528e-05, + "loss": 2.3627, + "step": 14269 + }, + { + "epoch": 1.1516423210394642, + "grad_norm": 0.6889846324920654, + "learning_rate": 3.822396962776619e-05, + "loss": 2.4442, + "step": 14270 + }, + { + "epoch": 1.151723024776047, + "grad_norm": 0.6660009026527405, + "learning_rate": 3.8211556110747245e-05, + "loss": 2.403, + "step": 14271 + }, + { + "epoch": 1.1518037285126301, + "grad_norm": 0.6537240743637085, + "learning_rate": 3.819914413365785e-05, + "loss": 2.4358, + "step": 14272 + }, + { + "epoch": 1.1518844322492132, + "grad_norm": 0.6852741837501526, + "learning_rate": 3.818673369680735e-05, + "loss": 2.4272, + "step": 14273 + }, + { + "epoch": 1.151965135985796, + "grad_norm": 0.701874852180481, + "learning_rate": 3.817432480050501e-05, + "loss": 2.4419, + "step": 14274 + }, + { + "epoch": 1.1520458397223792, + "grad_norm": 0.7089500427246094, + "learning_rate": 3.816191744506011e-05, + "loss": 2.4537, + "step": 14275 + }, + { + "epoch": 1.1521265434589623, + "grad_norm": 0.698564887046814, + "learning_rate": 3.8149511630781866e-05, + "loss": 2.3991, + "step": 14276 + }, + { + "epoch": 1.1522072471955451, + "grad_norm": 0.6940335035324097, + "learning_rate": 3.813710735797947e-05, + "loss": 2.5022, + "step": 14277 + }, + { + "epoch": 1.1522879509321282, + "grad_norm": 0.6916826367378235, + "learning_rate": 3.812470462696208e-05, + "loss": 2.4449, + "step": 14278 + }, + { + "epoch": 1.152368654668711, + "grad_norm": 0.7115256190299988, + "learning_rate": 3.811230343803882e-05, + "loss": 2.4371, + "step": 14279 + }, + { + "epoch": 1.1524493584052942, + "grad_norm": 0.6857369542121887, + "learning_rate": 3.80999037915187e-05, + "loss": 2.4426, + "step": 14280 + }, + { + "epoch": 1.1525300621418773, + "grad_norm": 0.7605363130569458, + "learning_rate": 3.808750568771079e-05, + "loss": 2.4999, + "step": 14281 + }, + { + "epoch": 1.1526107658784601, + "grad_norm": 0.6604358553886414, + "learning_rate": 3.8075109126924115e-05, + "loss": 2.419, + "step": 14282 + }, + { + "epoch": 1.1526914696150432, + "grad_norm": 0.6945412755012512, + "learning_rate": 3.806271410946756e-05, + "loss": 2.4555, + "step": 14283 + }, + { + "epoch": 1.152772173351626, + "grad_norm": 0.7205908894538879, + "learning_rate": 3.805032063565007e-05, + "loss": 2.4745, + "step": 14284 + }, + { + "epoch": 1.1528528770882092, + "grad_norm": 0.7198025584220886, + "learning_rate": 3.8037928705780554e-05, + "loss": 2.4358, + "step": 14285 + }, + { + "epoch": 1.1529335808247922, + "grad_norm": 0.7231044769287109, + "learning_rate": 3.802553832016781e-05, + "loss": 2.4713, + "step": 14286 + }, + { + "epoch": 1.1530142845613751, + "grad_norm": 0.6878815293312073, + "learning_rate": 3.80131494791206e-05, + "loss": 2.4479, + "step": 14287 + }, + { + "epoch": 1.1530949882979582, + "grad_norm": 0.6930533647537231, + "learning_rate": 3.800076218294779e-05, + "loss": 2.3912, + "step": 14288 + }, + { + "epoch": 1.1531756920345413, + "grad_norm": 0.703521192073822, + "learning_rate": 3.798837643195808e-05, + "loss": 2.451, + "step": 14289 + }, + { + "epoch": 1.1532563957711242, + "grad_norm": 0.7099746465682983, + "learning_rate": 3.79759922264601e-05, + "loss": 2.4957, + "step": 14290 + }, + { + "epoch": 1.1533370995077072, + "grad_norm": 0.7268218398094177, + "learning_rate": 3.7963609566762527e-05, + "loss": 2.4242, + "step": 14291 + }, + { + "epoch": 1.1534178032442903, + "grad_norm": 0.7465239763259888, + "learning_rate": 3.7951228453174004e-05, + "loss": 2.3867, + "step": 14292 + }, + { + "epoch": 1.1534985069808732, + "grad_norm": 0.704584002494812, + "learning_rate": 3.793884888600302e-05, + "loss": 2.5009, + "step": 14293 + }, + { + "epoch": 1.1535792107174563, + "grad_norm": 0.7057262063026428, + "learning_rate": 3.792647086555816e-05, + "loss": 2.4381, + "step": 14294 + }, + { + "epoch": 1.1536599144540391, + "grad_norm": 0.7045955061912537, + "learning_rate": 3.791409439214794e-05, + "loss": 2.4456, + "step": 14295 + }, + { + "epoch": 1.1537406181906222, + "grad_norm": 0.705476701259613, + "learning_rate": 3.790171946608074e-05, + "loss": 2.466, + "step": 14296 + }, + { + "epoch": 1.1538213219272053, + "grad_norm": 0.7128286957740784, + "learning_rate": 3.788934608766503e-05, + "loss": 2.4891, + "step": 14297 + }, + { + "epoch": 1.1539020256637882, + "grad_norm": 0.678144633769989, + "learning_rate": 3.787697425720918e-05, + "loss": 2.4453, + "step": 14298 + }, + { + "epoch": 1.1539827294003713, + "grad_norm": 0.754216730594635, + "learning_rate": 3.786460397502151e-05, + "loss": 2.4331, + "step": 14299 + }, + { + "epoch": 1.1540634331369541, + "grad_norm": 0.6881092190742493, + "learning_rate": 3.7852235241410325e-05, + "loss": 2.3692, + "step": 14300 + }, + { + "epoch": 1.1541441368735372, + "grad_norm": 0.7498507499694824, + "learning_rate": 3.783986805668395e-05, + "loss": 2.4556, + "step": 14301 + }, + { + "epoch": 1.1542248406101203, + "grad_norm": 0.6312216520309448, + "learning_rate": 3.7827502421150496e-05, + "loss": 2.4727, + "step": 14302 + }, + { + "epoch": 1.1543055443467032, + "grad_norm": 0.7156404256820679, + "learning_rate": 3.781513833511822e-05, + "loss": 2.4003, + "step": 14303 + }, + { + "epoch": 1.1543862480832863, + "grad_norm": 0.6589376926422119, + "learning_rate": 3.7802775798895226e-05, + "loss": 2.4461, + "step": 14304 + }, + { + "epoch": 1.1544669518198694, + "grad_norm": 0.7259865999221802, + "learning_rate": 3.77904148127897e-05, + "loss": 2.4021, + "step": 14305 + }, + { + "epoch": 1.1545476555564522, + "grad_norm": 0.7248456478118896, + "learning_rate": 3.777805537710961e-05, + "loss": 2.4784, + "step": 14306 + }, + { + "epoch": 1.1546283592930353, + "grad_norm": 0.7085593342781067, + "learning_rate": 3.7765697492163034e-05, + "loss": 2.4394, + "step": 14307 + }, + { + "epoch": 1.1547090630296182, + "grad_norm": 0.7394313216209412, + "learning_rate": 3.775334115825796e-05, + "loss": 2.5055, + "step": 14308 + }, + { + "epoch": 1.1547897667662013, + "grad_norm": 0.7231999039649963, + "learning_rate": 3.7740986375702336e-05, + "loss": 2.4551, + "step": 14309 + }, + { + "epoch": 1.1548704705027844, + "grad_norm": 0.6875953078269958, + "learning_rate": 3.7728633144804084e-05, + "loss": 2.4641, + "step": 14310 + }, + { + "epoch": 1.1549511742393672, + "grad_norm": 0.7477203607559204, + "learning_rate": 3.7716281465871094e-05, + "loss": 2.4929, + "step": 14311 + }, + { + "epoch": 1.1550318779759503, + "grad_norm": 0.6653971076011658, + "learning_rate": 3.770393133921115e-05, + "loss": 2.4819, + "step": 14312 + }, + { + "epoch": 1.1551125817125332, + "grad_norm": 0.7267318964004517, + "learning_rate": 3.769158276513209e-05, + "loss": 2.4568, + "step": 14313 + }, + { + "epoch": 1.1551932854491163, + "grad_norm": 0.6675654053688049, + "learning_rate": 3.76792357439417e-05, + "loss": 2.4789, + "step": 14314 + }, + { + "epoch": 1.1552739891856993, + "grad_norm": 0.6847487688064575, + "learning_rate": 3.7666890275947616e-05, + "loss": 2.4034, + "step": 14315 + }, + { + "epoch": 1.1553546929222822, + "grad_norm": 0.811553418636322, + "learning_rate": 3.765454636145758e-05, + "loss": 2.5051, + "step": 14316 + }, + { + "epoch": 1.1554353966588653, + "grad_norm": 0.690026581287384, + "learning_rate": 3.7642204000779204e-05, + "loss": 2.4477, + "step": 14317 + }, + { + "epoch": 1.1555161003954484, + "grad_norm": 0.695810079574585, + "learning_rate": 3.762986319422013e-05, + "loss": 2.4516, + "step": 14318 + }, + { + "epoch": 1.1555968041320313, + "grad_norm": 0.6869217753410339, + "learning_rate": 3.7617523942087886e-05, + "loss": 2.3802, + "step": 14319 + }, + { + "epoch": 1.1556775078686143, + "grad_norm": 0.7109078764915466, + "learning_rate": 3.7605186244690016e-05, + "loss": 2.4306, + "step": 14320 + }, + { + "epoch": 1.1557582116051974, + "grad_norm": 0.7385044693946838, + "learning_rate": 3.759285010233404e-05, + "loss": 2.4288, + "step": 14321 + }, + { + "epoch": 1.1558389153417803, + "grad_norm": 0.6775605082511902, + "learning_rate": 3.7580515515327355e-05, + "loss": 2.4155, + "step": 14322 + }, + { + "epoch": 1.1559196190783634, + "grad_norm": 0.7325694561004639, + "learning_rate": 3.7568182483977375e-05, + "loss": 2.5035, + "step": 14323 + }, + { + "epoch": 1.1560003228149462, + "grad_norm": 0.6896799206733704, + "learning_rate": 3.7555851008591526e-05, + "loss": 2.4739, + "step": 14324 + }, + { + "epoch": 1.1560810265515293, + "grad_norm": 0.7086506485939026, + "learning_rate": 3.7543521089477065e-05, + "loss": 2.4815, + "step": 14325 + }, + { + "epoch": 1.1561617302881124, + "grad_norm": 0.6886687874794006, + "learning_rate": 3.753119272694132e-05, + "loss": 2.4261, + "step": 14326 + }, + { + "epoch": 1.1562424340246953, + "grad_norm": 0.675136148929596, + "learning_rate": 3.751886592129155e-05, + "loss": 2.3946, + "step": 14327 + }, + { + "epoch": 1.1563231377612784, + "grad_norm": 0.706729531288147, + "learning_rate": 3.7506540672834964e-05, + "loss": 2.4199, + "step": 14328 + }, + { + "epoch": 1.1564038414978612, + "grad_norm": 0.6790904998779297, + "learning_rate": 3.749421698187875e-05, + "loss": 2.4419, + "step": 14329 + }, + { + "epoch": 1.1564845452344443, + "grad_norm": 0.6688171029090881, + "learning_rate": 3.748189484873007e-05, + "loss": 2.4516, + "step": 14330 + }, + { + "epoch": 1.1565652489710274, + "grad_norm": 0.6782420873641968, + "learning_rate": 3.746957427369596e-05, + "loss": 2.4586, + "step": 14331 + }, + { + "epoch": 1.1566459527076103, + "grad_norm": 0.7633399367332458, + "learning_rate": 3.7457255257083514e-05, + "loss": 2.3776, + "step": 14332 + }, + { + "epoch": 1.1567266564441934, + "grad_norm": 0.680000364780426, + "learning_rate": 3.744493779919976e-05, + "loss": 2.4978, + "step": 14333 + }, + { + "epoch": 1.1568073601807765, + "grad_norm": 0.6993350386619568, + "learning_rate": 3.743262190035171e-05, + "loss": 2.3974, + "step": 14334 + }, + { + "epoch": 1.1568880639173593, + "grad_norm": 0.7316375374794006, + "learning_rate": 3.7420307560846234e-05, + "loss": 2.4423, + "step": 14335 + }, + { + "epoch": 1.1569687676539424, + "grad_norm": 0.7384842038154602, + "learning_rate": 3.7407994780990285e-05, + "loss": 2.4604, + "step": 14336 + }, + { + "epoch": 1.1570494713905255, + "grad_norm": 0.6980708837509155, + "learning_rate": 3.739568356109072e-05, + "loss": 2.4408, + "step": 14337 + }, + { + "epoch": 1.1571301751271084, + "grad_norm": 0.6510182619094849, + "learning_rate": 3.738337390145438e-05, + "loss": 2.4076, + "step": 14338 + }, + { + "epoch": 1.1572108788636915, + "grad_norm": 0.7458614706993103, + "learning_rate": 3.737106580238804e-05, + "loss": 2.4976, + "step": 14339 + }, + { + "epoch": 1.1572915826002743, + "grad_norm": 0.6663469672203064, + "learning_rate": 3.735875926419849e-05, + "loss": 2.4414, + "step": 14340 + }, + { + "epoch": 1.1573722863368574, + "grad_norm": 0.6611858606338501, + "learning_rate": 3.7346454287192355e-05, + "loss": 2.3783, + "step": 14341 + }, + { + "epoch": 1.1574529900734405, + "grad_norm": 0.6605291366577148, + "learning_rate": 3.7334150871676364e-05, + "loss": 2.4291, + "step": 14342 + }, + { + "epoch": 1.1575336938100234, + "grad_norm": 0.6879985928535461, + "learning_rate": 3.7321849017957186e-05, + "loss": 2.4229, + "step": 14343 + }, + { + "epoch": 1.1576143975466064, + "grad_norm": 0.7466493844985962, + "learning_rate": 3.7309548726341334e-05, + "loss": 2.4278, + "step": 14344 + }, + { + "epoch": 1.1576951012831893, + "grad_norm": 0.7476457357406616, + "learning_rate": 3.72972499971354e-05, + "loss": 2.4944, + "step": 14345 + }, + { + "epoch": 1.1577758050197724, + "grad_norm": 0.6339364647865295, + "learning_rate": 3.728495283064594e-05, + "loss": 2.3753, + "step": 14346 + }, + { + "epoch": 1.1578565087563555, + "grad_norm": 0.6885230541229248, + "learning_rate": 3.7272657227179355e-05, + "loss": 2.4519, + "step": 14347 + }, + { + "epoch": 1.1579372124929384, + "grad_norm": 0.7561741471290588, + "learning_rate": 3.7260363187042126e-05, + "loss": 2.4808, + "step": 14348 + }, + { + "epoch": 1.1580179162295214, + "grad_norm": 0.8007705211639404, + "learning_rate": 3.724807071054062e-05, + "loss": 2.4649, + "step": 14349 + }, + { + "epoch": 1.1580986199661045, + "grad_norm": 0.6920937895774841, + "learning_rate": 3.72357797979813e-05, + "loss": 2.4145, + "step": 14350 + }, + { + "epoch": 1.1581793237026874, + "grad_norm": 0.7310675978660583, + "learning_rate": 3.7223490449670364e-05, + "loss": 2.4475, + "step": 14351 + }, + { + "epoch": 1.1582600274392705, + "grad_norm": 0.6600463390350342, + "learning_rate": 3.7211202665914155e-05, + "loss": 2.3938, + "step": 14352 + }, + { + "epoch": 1.1583407311758536, + "grad_norm": 0.690258800983429, + "learning_rate": 3.719891644701894e-05, + "loss": 2.3944, + "step": 14353 + }, + { + "epoch": 1.1584214349124364, + "grad_norm": 0.7075135111808777, + "learning_rate": 3.718663179329085e-05, + "loss": 2.3931, + "step": 14354 + }, + { + "epoch": 1.1585021386490195, + "grad_norm": 0.7416332960128784, + "learning_rate": 3.71743487050361e-05, + "loss": 2.4566, + "step": 14355 + }, + { + "epoch": 1.1585828423856024, + "grad_norm": 0.7459710836410522, + "learning_rate": 3.7162067182560846e-05, + "loss": 2.4232, + "step": 14356 + }, + { + "epoch": 1.1586635461221855, + "grad_norm": 0.7265400886535645, + "learning_rate": 3.71497872261711e-05, + "loss": 2.4798, + "step": 14357 + }, + { + "epoch": 1.1587442498587683, + "grad_norm": 0.7142636775970459, + "learning_rate": 3.713750883617294e-05, + "loss": 2.4576, + "step": 14358 + }, + { + "epoch": 1.1588249535953514, + "grad_norm": 0.7279871702194214, + "learning_rate": 3.712523201287239e-05, + "loss": 2.439, + "step": 14359 + }, + { + "epoch": 1.1589056573319345, + "grad_norm": 0.7151274681091309, + "learning_rate": 3.7112956756575414e-05, + "loss": 2.4684, + "step": 14360 + }, + { + "epoch": 1.1589863610685174, + "grad_norm": 0.7142657041549683, + "learning_rate": 3.7100683067587946e-05, + "loss": 2.4582, + "step": 14361 + }, + { + "epoch": 1.1590670648051005, + "grad_norm": 0.7716035842895508, + "learning_rate": 3.7088410946215914e-05, + "loss": 2.5038, + "step": 14362 + }, + { + "epoch": 1.1591477685416836, + "grad_norm": 0.7232338190078735, + "learning_rate": 3.707614039276509e-05, + "loss": 2.4558, + "step": 14363 + }, + { + "epoch": 1.1592284722782664, + "grad_norm": 0.7388719916343689, + "learning_rate": 3.706387140754134e-05, + "loss": 2.4535, + "step": 14364 + }, + { + "epoch": 1.1593091760148495, + "grad_norm": 0.7022652626037598, + "learning_rate": 3.7051603990850425e-05, + "loss": 2.4479, + "step": 14365 + }, + { + "epoch": 1.1593898797514326, + "grad_norm": 0.7861798405647278, + "learning_rate": 3.703933814299813e-05, + "loss": 2.4219, + "step": 14366 + }, + { + "epoch": 1.1594705834880155, + "grad_norm": 0.6928723454475403, + "learning_rate": 3.7027073864290074e-05, + "loss": 2.4401, + "step": 14367 + }, + { + "epoch": 1.1595512872245985, + "grad_norm": 0.6312821507453918, + "learning_rate": 3.701481115503194e-05, + "loss": 2.3975, + "step": 14368 + }, + { + "epoch": 1.1596319909611814, + "grad_norm": 0.7008257508277893, + "learning_rate": 3.700255001552937e-05, + "loss": 2.4988, + "step": 14369 + }, + { + "epoch": 1.1597126946977645, + "grad_norm": 0.6664693355560303, + "learning_rate": 3.699029044608792e-05, + "loss": 2.4123, + "step": 14370 + }, + { + "epoch": 1.1597933984343476, + "grad_norm": 0.6613842844963074, + "learning_rate": 3.6978032447013145e-05, + "loss": 2.4802, + "step": 14371 + }, + { + "epoch": 1.1598741021709305, + "grad_norm": 0.707788348197937, + "learning_rate": 3.696577601861057e-05, + "loss": 2.4432, + "step": 14372 + }, + { + "epoch": 1.1599548059075135, + "grad_norm": 0.6547604203224182, + "learning_rate": 3.695352116118561e-05, + "loss": 2.412, + "step": 14373 + }, + { + "epoch": 1.1600355096440964, + "grad_norm": 0.7238109707832336, + "learning_rate": 3.69412678750437e-05, + "loss": 2.4858, + "step": 14374 + }, + { + "epoch": 1.1601162133806795, + "grad_norm": 0.8156580328941345, + "learning_rate": 3.692901616049026e-05, + "loss": 2.4063, + "step": 14375 + }, + { + "epoch": 1.1601969171172626, + "grad_norm": 0.7035481333732605, + "learning_rate": 3.6916766017830585e-05, + "loss": 2.4586, + "step": 14376 + }, + { + "epoch": 1.1602776208538454, + "grad_norm": 0.7523401379585266, + "learning_rate": 3.690451744736999e-05, + "loss": 2.4262, + "step": 14377 + }, + { + "epoch": 1.1603583245904285, + "grad_norm": 0.6740732192993164, + "learning_rate": 3.689227044941376e-05, + "loss": 2.5215, + "step": 14378 + }, + { + "epoch": 1.1604390283270116, + "grad_norm": 0.6502695083618164, + "learning_rate": 3.6880025024267115e-05, + "loss": 2.4292, + "step": 14379 + }, + { + "epoch": 1.1605197320635945, + "grad_norm": 0.7000409364700317, + "learning_rate": 3.686778117223524e-05, + "loss": 2.4323, + "step": 14380 + }, + { + "epoch": 1.1606004358001776, + "grad_norm": 0.7415478229522705, + "learning_rate": 3.68555388936233e-05, + "loss": 2.4515, + "step": 14381 + }, + { + "epoch": 1.1606811395367607, + "grad_norm": 0.6890547871589661, + "learning_rate": 3.684329818873641e-05, + "loss": 2.4115, + "step": 14382 + }, + { + "epoch": 1.1607618432733435, + "grad_norm": 0.8238685727119446, + "learning_rate": 3.68310590578796e-05, + "loss": 2.4666, + "step": 14383 + }, + { + "epoch": 1.1608425470099266, + "grad_norm": 0.8098889589309692, + "learning_rate": 3.681882150135791e-05, + "loss": 2.4667, + "step": 14384 + }, + { + "epoch": 1.1609232507465095, + "grad_norm": 0.6932713985443115, + "learning_rate": 3.680658551947639e-05, + "loss": 2.4574, + "step": 14385 + }, + { + "epoch": 1.1610039544830926, + "grad_norm": 0.7062943577766418, + "learning_rate": 3.6794351112539915e-05, + "loss": 2.4408, + "step": 14386 + }, + { + "epoch": 1.1610846582196757, + "grad_norm": 0.7859255075454712, + "learning_rate": 3.678211828085343e-05, + "loss": 2.3946, + "step": 14387 + }, + { + "epoch": 1.1611653619562585, + "grad_norm": 0.674609899520874, + "learning_rate": 3.676988702472181e-05, + "loss": 2.4456, + "step": 14388 + }, + { + "epoch": 1.1612460656928416, + "grad_norm": 0.7068402171134949, + "learning_rate": 3.675765734444989e-05, + "loss": 2.4393, + "step": 14389 + }, + { + "epoch": 1.1613267694294245, + "grad_norm": 0.7276526689529419, + "learning_rate": 3.674542924034246e-05, + "loss": 2.456, + "step": 14390 + }, + { + "epoch": 1.1614074731660076, + "grad_norm": 0.7670585513114929, + "learning_rate": 3.673320271270433e-05, + "loss": 2.3774, + "step": 14391 + }, + { + "epoch": 1.1614881769025907, + "grad_norm": 0.702173113822937, + "learning_rate": 3.672097776184013e-05, + "loss": 2.3974, + "step": 14392 + }, + { + "epoch": 1.1615688806391735, + "grad_norm": 0.6922066807746887, + "learning_rate": 3.670875438805457e-05, + "loss": 2.4035, + "step": 14393 + }, + { + "epoch": 1.1616495843757566, + "grad_norm": 0.6675707697868347, + "learning_rate": 3.6696532591652335e-05, + "loss": 2.4369, + "step": 14394 + }, + { + "epoch": 1.1617302881123397, + "grad_norm": 0.6939712762832642, + "learning_rate": 3.668431237293796e-05, + "loss": 2.4265, + "step": 14395 + }, + { + "epoch": 1.1618109918489226, + "grad_norm": 0.719510018825531, + "learning_rate": 3.667209373221602e-05, + "loss": 2.4686, + "step": 14396 + }, + { + "epoch": 1.1618916955855056, + "grad_norm": 0.7167489528656006, + "learning_rate": 3.665987666979104e-05, + "loss": 2.5077, + "step": 14397 + }, + { + "epoch": 1.1619723993220887, + "grad_norm": 0.6539514064788818, + "learning_rate": 3.664766118596754e-05, + "loss": 2.4476, + "step": 14398 + }, + { + "epoch": 1.1620531030586716, + "grad_norm": 0.6926440596580505, + "learning_rate": 3.6635447281049876e-05, + "loss": 2.4336, + "step": 14399 + }, + { + "epoch": 1.1621338067952547, + "grad_norm": 0.7124993205070496, + "learning_rate": 3.662323495534252e-05, + "loss": 2.3938, + "step": 14400 + }, + { + "epoch": 1.1622145105318376, + "grad_norm": 0.7073954939842224, + "learning_rate": 3.661102420914986e-05, + "loss": 2.4232, + "step": 14401 + }, + { + "epoch": 1.1622952142684206, + "grad_norm": 0.7491076588630676, + "learning_rate": 3.659881504277613e-05, + "loss": 2.5047, + "step": 14402 + }, + { + "epoch": 1.1623759180050037, + "grad_norm": 0.6698675155639648, + "learning_rate": 3.658660745652568e-05, + "loss": 2.4164, + "step": 14403 + }, + { + "epoch": 1.1624566217415866, + "grad_norm": 0.6576815843582153, + "learning_rate": 3.657440145070276e-05, + "loss": 2.4368, + "step": 14404 + }, + { + "epoch": 1.1625373254781697, + "grad_norm": 0.8236953020095825, + "learning_rate": 3.6562197025611524e-05, + "loss": 2.5041, + "step": 14405 + }, + { + "epoch": 1.1626180292147525, + "grad_norm": 0.7391532063484192, + "learning_rate": 3.6549994181556157e-05, + "loss": 2.4556, + "step": 14406 + }, + { + "epoch": 1.1626987329513356, + "grad_norm": 0.6529936790466309, + "learning_rate": 3.653779291884084e-05, + "loss": 2.4559, + "step": 14407 + }, + { + "epoch": 1.1627794366879187, + "grad_norm": 0.7101796269416809, + "learning_rate": 3.652559323776957e-05, + "loss": 2.3937, + "step": 14408 + }, + { + "epoch": 1.1628601404245016, + "grad_norm": 0.6890308260917664, + "learning_rate": 3.651339513864645e-05, + "loss": 2.4694, + "step": 14409 + }, + { + "epoch": 1.1629408441610847, + "grad_norm": 0.6919918060302734, + "learning_rate": 3.650119862177548e-05, + "loss": 2.4793, + "step": 14410 + }, + { + "epoch": 1.1630215478976678, + "grad_norm": 0.6553575992584229, + "learning_rate": 3.6489003687460624e-05, + "loss": 2.454, + "step": 14411 + }, + { + "epoch": 1.1631022516342506, + "grad_norm": 0.7095460891723633, + "learning_rate": 3.6476810336005804e-05, + "loss": 2.4672, + "step": 14412 + }, + { + "epoch": 1.1631829553708337, + "grad_norm": 0.738301694393158, + "learning_rate": 3.6464618567714935e-05, + "loss": 2.4369, + "step": 14413 + }, + { + "epoch": 1.1632636591074166, + "grad_norm": 0.7574542760848999, + "learning_rate": 3.645242838289189e-05, + "loss": 2.4981, + "step": 14414 + }, + { + "epoch": 1.1633443628439997, + "grad_norm": 0.6780585646629333, + "learning_rate": 3.64402397818404e-05, + "loss": 2.4811, + "step": 14415 + }, + { + "epoch": 1.1634250665805828, + "grad_norm": 0.7050060629844666, + "learning_rate": 3.6428052764864287e-05, + "loss": 2.4607, + "step": 14416 + }, + { + "epoch": 1.1635057703171656, + "grad_norm": 0.6946923136711121, + "learning_rate": 3.6415867332267316e-05, + "loss": 2.4482, + "step": 14417 + }, + { + "epoch": 1.1635864740537487, + "grad_norm": 0.7202015519142151, + "learning_rate": 3.64036834843531e-05, + "loss": 2.4764, + "step": 14418 + }, + { + "epoch": 1.1636671777903316, + "grad_norm": 0.7845996618270874, + "learning_rate": 3.639150122142534e-05, + "loss": 2.4926, + "step": 14419 + }, + { + "epoch": 1.1637478815269147, + "grad_norm": 0.6924630403518677, + "learning_rate": 3.6379320543787645e-05, + "loss": 2.4664, + "step": 14420 + }, + { + "epoch": 1.1638285852634978, + "grad_norm": 0.7225920557975769, + "learning_rate": 3.636714145174358e-05, + "loss": 2.4638, + "step": 14421 + }, + { + "epoch": 1.1639092890000806, + "grad_norm": 0.6587103605270386, + "learning_rate": 3.63549639455967e-05, + "loss": 2.3629, + "step": 14422 + }, + { + "epoch": 1.1639899927366637, + "grad_norm": 0.7537658214569092, + "learning_rate": 3.634278802565051e-05, + "loss": 2.4971, + "step": 14423 + }, + { + "epoch": 1.1640706964732468, + "grad_norm": 0.6881381273269653, + "learning_rate": 3.633061369220841e-05, + "loss": 2.3737, + "step": 14424 + }, + { + "epoch": 1.1641514002098297, + "grad_norm": 0.693779468536377, + "learning_rate": 3.6318440945573864e-05, + "loss": 2.4346, + "step": 14425 + }, + { + "epoch": 1.1642321039464127, + "grad_norm": 0.777563750743866, + "learning_rate": 3.6306269786050265e-05, + "loss": 2.4288, + "step": 14426 + }, + { + "epoch": 1.1643128076829958, + "grad_norm": 0.6786738634109497, + "learning_rate": 3.629410021394087e-05, + "loss": 2.4094, + "step": 14427 + }, + { + "epoch": 1.1643935114195787, + "grad_norm": 0.7478442788124084, + "learning_rate": 3.628193222954904e-05, + "loss": 2.4163, + "step": 14428 + }, + { + "epoch": 1.1644742151561618, + "grad_norm": 0.6530766487121582, + "learning_rate": 3.626976583317803e-05, + "loss": 2.4328, + "step": 14429 + }, + { + "epoch": 1.1645549188927447, + "grad_norm": 0.6665371060371399, + "learning_rate": 3.6257601025131026e-05, + "loss": 2.4006, + "step": 14430 + }, + { + "epoch": 1.1646356226293277, + "grad_norm": 0.7184741497039795, + "learning_rate": 3.624543780571125e-05, + "loss": 2.462, + "step": 14431 + }, + { + "epoch": 1.1647163263659108, + "grad_norm": 0.7039462327957153, + "learning_rate": 3.6233276175221794e-05, + "loss": 2.4321, + "step": 14432 + }, + { + "epoch": 1.1647970301024937, + "grad_norm": 0.7039144039154053, + "learning_rate": 3.622111613396584e-05, + "loss": 2.4399, + "step": 14433 + }, + { + "epoch": 1.1648777338390768, + "grad_norm": 0.6690253615379333, + "learning_rate": 3.620895768224635e-05, + "loss": 2.3976, + "step": 14434 + }, + { + "epoch": 1.1649584375756596, + "grad_norm": 0.7048032879829407, + "learning_rate": 3.6196800820366384e-05, + "loss": 2.4848, + "step": 14435 + }, + { + "epoch": 1.1650391413122427, + "grad_norm": 0.668971836566925, + "learning_rate": 3.618464554862896e-05, + "loss": 2.4614, + "step": 14436 + }, + { + "epoch": 1.1651198450488258, + "grad_norm": 0.704858660697937, + "learning_rate": 3.617249186733695e-05, + "loss": 2.3962, + "step": 14437 + }, + { + "epoch": 1.1652005487854087, + "grad_norm": 0.692435085773468, + "learning_rate": 3.6160339776793296e-05, + "loss": 2.4059, + "step": 14438 + }, + { + "epoch": 1.1652812525219918, + "grad_norm": 0.6774182319641113, + "learning_rate": 3.614818927730085e-05, + "loss": 2.4975, + "step": 14439 + }, + { + "epoch": 1.1653619562585749, + "grad_norm": 0.6507411003112793, + "learning_rate": 3.613604036916243e-05, + "loss": 2.5029, + "step": 14440 + }, + { + "epoch": 1.1654426599951577, + "grad_norm": 0.7223206162452698, + "learning_rate": 3.612389305268084e-05, + "loss": 2.4599, + "step": 14441 + }, + { + "epoch": 1.1655233637317408, + "grad_norm": 0.6523364186286926, + "learning_rate": 3.611174732815883e-05, + "loss": 2.4521, + "step": 14442 + }, + { + "epoch": 1.165604067468324, + "grad_norm": 0.6668452024459839, + "learning_rate": 3.6099603195899046e-05, + "loss": 2.4082, + "step": 14443 + }, + { + "epoch": 1.1656847712049068, + "grad_norm": 0.6878299117088318, + "learning_rate": 3.60874606562042e-05, + "loss": 2.4144, + "step": 14444 + }, + { + "epoch": 1.1657654749414899, + "grad_norm": 0.6662277579307556, + "learning_rate": 3.6075319709376895e-05, + "loss": 2.438, + "step": 14445 + }, + { + "epoch": 1.1658461786780727, + "grad_norm": 0.721422553062439, + "learning_rate": 3.606318035571976e-05, + "loss": 2.4414, + "step": 14446 + }, + { + "epoch": 1.1659268824146558, + "grad_norm": 0.6739782691001892, + "learning_rate": 3.6051042595535264e-05, + "loss": 2.4093, + "step": 14447 + }, + { + "epoch": 1.166007586151239, + "grad_norm": 0.6890884637832642, + "learning_rate": 3.603890642912596e-05, + "loss": 2.4385, + "step": 14448 + }, + { + "epoch": 1.1660882898878218, + "grad_norm": 0.6503998637199402, + "learning_rate": 3.602677185679433e-05, + "loss": 2.4498, + "step": 14449 + }, + { + "epoch": 1.1661689936244048, + "grad_norm": 0.6748046875, + "learning_rate": 3.601463887884271e-05, + "loss": 2.3739, + "step": 14450 + }, + { + "epoch": 1.1662496973609877, + "grad_norm": 0.6843422651290894, + "learning_rate": 3.600250749557358e-05, + "loss": 2.4323, + "step": 14451 + }, + { + "epoch": 1.1663304010975708, + "grad_norm": 0.7061208486557007, + "learning_rate": 3.599037770728929e-05, + "loss": 2.4611, + "step": 14452 + }, + { + "epoch": 1.166411104834154, + "grad_norm": 0.6614537239074707, + "learning_rate": 3.597824951429208e-05, + "loss": 2.4656, + "step": 14453 + }, + { + "epoch": 1.1664918085707368, + "grad_norm": 0.6620328426361084, + "learning_rate": 3.596612291688424e-05, + "loss": 2.415, + "step": 14454 + }, + { + "epoch": 1.1665725123073198, + "grad_norm": 0.6936565041542053, + "learning_rate": 3.595399791536804e-05, + "loss": 2.4655, + "step": 14455 + }, + { + "epoch": 1.166653216043903, + "grad_norm": 0.6766063570976257, + "learning_rate": 3.594187451004559e-05, + "loss": 2.4628, + "step": 14456 + }, + { + "epoch": 1.1667339197804858, + "grad_norm": 0.6588734984397888, + "learning_rate": 3.592975270121909e-05, + "loss": 2.4503, + "step": 14457 + }, + { + "epoch": 1.1668146235170689, + "grad_norm": 0.7290894985198975, + "learning_rate": 3.591763248919062e-05, + "loss": 2.5075, + "step": 14458 + }, + { + "epoch": 1.1668953272536517, + "grad_norm": 0.6952784657478333, + "learning_rate": 3.590551387426231e-05, + "loss": 2.4258, + "step": 14459 + }, + { + "epoch": 1.1669760309902348, + "grad_norm": 0.6737042665481567, + "learning_rate": 3.5893396856736096e-05, + "loss": 2.4459, + "step": 14460 + }, + { + "epoch": 1.167056734726818, + "grad_norm": 0.6616976857185364, + "learning_rate": 3.588128143691397e-05, + "loss": 2.4726, + "step": 14461 + }, + { + "epoch": 1.1671374384634008, + "grad_norm": 0.7017171382904053, + "learning_rate": 3.5869167615098e-05, + "loss": 2.375, + "step": 14462 + }, + { + "epoch": 1.1672181421999839, + "grad_norm": 0.7153809666633606, + "learning_rate": 3.585705539158997e-05, + "loss": 2.4271, + "step": 14463 + }, + { + "epoch": 1.1672988459365667, + "grad_norm": 0.749196469783783, + "learning_rate": 3.584494476669179e-05, + "loss": 2.4713, + "step": 14464 + }, + { + "epoch": 1.1673795496731498, + "grad_norm": 0.6593676209449768, + "learning_rate": 3.583283574070533e-05, + "loss": 2.4276, + "step": 14465 + }, + { + "epoch": 1.167460253409733, + "grad_norm": 0.6949084401130676, + "learning_rate": 3.5820728313932295e-05, + "loss": 2.4128, + "step": 14466 + }, + { + "epoch": 1.1675409571463158, + "grad_norm": 0.6795482039451599, + "learning_rate": 3.5808622486674484e-05, + "loss": 2.485, + "step": 14467 + }, + { + "epoch": 1.1676216608828989, + "grad_norm": 0.6763483881950378, + "learning_rate": 3.5796518259233625e-05, + "loss": 2.4063, + "step": 14468 + }, + { + "epoch": 1.167702364619482, + "grad_norm": 0.665687620639801, + "learning_rate": 3.578441563191133e-05, + "loss": 2.437, + "step": 14469 + }, + { + "epoch": 1.1677830683560648, + "grad_norm": 0.6338435411453247, + "learning_rate": 3.577231460500926e-05, + "loss": 2.3747, + "step": 14470 + }, + { + "epoch": 1.167863772092648, + "grad_norm": 0.7031865119934082, + "learning_rate": 3.5760215178829e-05, + "loss": 2.3952, + "step": 14471 + }, + { + "epoch": 1.167944475829231, + "grad_norm": 0.7544599771499634, + "learning_rate": 3.5748117353672106e-05, + "loss": 2.3941, + "step": 14472 + }, + { + "epoch": 1.1680251795658139, + "grad_norm": 0.7271532416343689, + "learning_rate": 3.5736021129840083e-05, + "loss": 2.4371, + "step": 14473 + }, + { + "epoch": 1.168105883302397, + "grad_norm": 0.709048867225647, + "learning_rate": 3.572392650763441e-05, + "loss": 2.482, + "step": 14474 + }, + { + "epoch": 1.1681865870389798, + "grad_norm": 0.6894589066505432, + "learning_rate": 3.571183348735653e-05, + "loss": 2.4347, + "step": 14475 + }, + { + "epoch": 1.168267290775563, + "grad_norm": 0.6680620908737183, + "learning_rate": 3.5699742069307774e-05, + "loss": 2.3995, + "step": 14476 + }, + { + "epoch": 1.168347994512146, + "grad_norm": 0.701669454574585, + "learning_rate": 3.568765225378954e-05, + "loss": 2.4045, + "step": 14477 + }, + { + "epoch": 1.1684286982487289, + "grad_norm": 0.7102392911911011, + "learning_rate": 3.567556404110315e-05, + "loss": 2.4695, + "step": 14478 + }, + { + "epoch": 1.168509401985312, + "grad_norm": 0.6820430755615234, + "learning_rate": 3.566347743154982e-05, + "loss": 2.4155, + "step": 14479 + }, + { + "epoch": 1.1685901057218948, + "grad_norm": 0.6611022353172302, + "learning_rate": 3.565139242543081e-05, + "loss": 2.3992, + "step": 14480 + }, + { + "epoch": 1.168670809458478, + "grad_norm": 0.6844382882118225, + "learning_rate": 3.5639309023047306e-05, + "loss": 2.4345, + "step": 14481 + }, + { + "epoch": 1.168751513195061, + "grad_norm": 0.7557988166809082, + "learning_rate": 3.5627227224700464e-05, + "loss": 2.4454, + "step": 14482 + }, + { + "epoch": 1.1688322169316439, + "grad_norm": 0.6652555465698242, + "learning_rate": 3.5615147030691384e-05, + "loss": 2.3749, + "step": 14483 + }, + { + "epoch": 1.168912920668227, + "grad_norm": 0.6912989020347595, + "learning_rate": 3.56030684413212e-05, + "loss": 2.4737, + "step": 14484 + }, + { + "epoch": 1.16899362440481, + "grad_norm": 0.735103964805603, + "learning_rate": 3.559099145689083e-05, + "loss": 2.4098, + "step": 14485 + }, + { + "epoch": 1.169074328141393, + "grad_norm": 0.6873028874397278, + "learning_rate": 3.557891607770133e-05, + "loss": 2.4247, + "step": 14486 + }, + { + "epoch": 1.169155031877976, + "grad_norm": 0.7364680171012878, + "learning_rate": 3.556684230405367e-05, + "loss": 2.4314, + "step": 14487 + }, + { + "epoch": 1.169235735614559, + "grad_norm": 0.679122269153595, + "learning_rate": 3.55547701362487e-05, + "loss": 2.4196, + "step": 14488 + }, + { + "epoch": 1.169316439351142, + "grad_norm": 0.6783872246742249, + "learning_rate": 3.554269957458731e-05, + "loss": 2.4212, + "step": 14489 + }, + { + "epoch": 1.169397143087725, + "grad_norm": 0.7434942126274109, + "learning_rate": 3.553063061937034e-05, + "loss": 2.4139, + "step": 14490 + }, + { + "epoch": 1.1694778468243079, + "grad_norm": 0.6799852252006531, + "learning_rate": 3.55185632708986e-05, + "loss": 2.4252, + "step": 14491 + }, + { + "epoch": 1.169558550560891, + "grad_norm": 0.7040107250213623, + "learning_rate": 3.5506497529472795e-05, + "loss": 2.3937, + "step": 14492 + }, + { + "epoch": 1.169639254297474, + "grad_norm": 0.7350315451622009, + "learning_rate": 3.549443339539368e-05, + "loss": 2.4063, + "step": 14493 + }, + { + "epoch": 1.169719958034057, + "grad_norm": 0.694521963596344, + "learning_rate": 3.548237086896192e-05, + "loss": 2.4715, + "step": 14494 + }, + { + "epoch": 1.16980066177064, + "grad_norm": 0.6648221015930176, + "learning_rate": 3.5470309950478096e-05, + "loss": 2.4365, + "step": 14495 + }, + { + "epoch": 1.1698813655072229, + "grad_norm": 0.688024640083313, + "learning_rate": 3.545825064024284e-05, + "loss": 2.449, + "step": 14496 + }, + { + "epoch": 1.169962069243806, + "grad_norm": 0.6743311882019043, + "learning_rate": 3.544619293855672e-05, + "loss": 2.4283, + "step": 14497 + }, + { + "epoch": 1.170042772980389, + "grad_norm": 0.669119119644165, + "learning_rate": 3.543413684572019e-05, + "loss": 2.4363, + "step": 14498 + }, + { + "epoch": 1.170123476716972, + "grad_norm": 0.6998667120933533, + "learning_rate": 3.5422082362033745e-05, + "loss": 2.425, + "step": 14499 + }, + { + "epoch": 1.170204180453555, + "grad_norm": 0.7681630253791809, + "learning_rate": 3.5410029487797845e-05, + "loss": 2.4382, + "step": 14500 + }, + { + "epoch": 1.170284884190138, + "grad_norm": 0.6925049424171448, + "learning_rate": 3.539797822331279e-05, + "loss": 2.4261, + "step": 14501 + }, + { + "epoch": 1.170365587926721, + "grad_norm": 0.7145542502403259, + "learning_rate": 3.538592856887901e-05, + "loss": 2.4681, + "step": 14502 + }, + { + "epoch": 1.170446291663304, + "grad_norm": 0.6441611647605896, + "learning_rate": 3.537388052479684e-05, + "loss": 2.4187, + "step": 14503 + }, + { + "epoch": 1.1705269953998871, + "grad_norm": 0.6622560620307922, + "learning_rate": 3.5361834091366466e-05, + "loss": 2.4615, + "step": 14504 + }, + { + "epoch": 1.17060769913647, + "grad_norm": 0.6987677812576294, + "learning_rate": 3.5349789268888144e-05, + "loss": 2.413, + "step": 14505 + }, + { + "epoch": 1.170688402873053, + "grad_norm": 0.668358325958252, + "learning_rate": 3.533774605766207e-05, + "loss": 2.5146, + "step": 14506 + }, + { + "epoch": 1.170769106609636, + "grad_norm": 0.7514958381652832, + "learning_rate": 3.532570445798844e-05, + "loss": 2.4474, + "step": 14507 + }, + { + "epoch": 1.170849810346219, + "grad_norm": 0.6454465389251709, + "learning_rate": 3.5313664470167276e-05, + "loss": 2.3911, + "step": 14508 + }, + { + "epoch": 1.170930514082802, + "grad_norm": 0.6653602719306946, + "learning_rate": 3.5301626094498674e-05, + "loss": 2.4223, + "step": 14509 + }, + { + "epoch": 1.171011217819385, + "grad_norm": 0.6782815456390381, + "learning_rate": 3.5289589331282715e-05, + "loss": 2.457, + "step": 14510 + }, + { + "epoch": 1.171091921555968, + "grad_norm": 0.720973014831543, + "learning_rate": 3.527755418081932e-05, + "loss": 2.4541, + "step": 14511 + }, + { + "epoch": 1.171172625292551, + "grad_norm": 0.6300156712532043, + "learning_rate": 3.526552064340841e-05, + "loss": 2.4451, + "step": 14512 + }, + { + "epoch": 1.171253329029134, + "grad_norm": 0.7660964727401733, + "learning_rate": 3.5253488719350026e-05, + "loss": 2.5031, + "step": 14513 + }, + { + "epoch": 1.1713340327657171, + "grad_norm": 0.6931602358818054, + "learning_rate": 3.5241458408943905e-05, + "loss": 2.4249, + "step": 14514 + }, + { + "epoch": 1.1714147365023, + "grad_norm": 0.6863045692443848, + "learning_rate": 3.522942971248993e-05, + "loss": 2.4429, + "step": 14515 + }, + { + "epoch": 1.171495440238883, + "grad_norm": 0.6993531584739685, + "learning_rate": 3.521740263028791e-05, + "loss": 2.3864, + "step": 14516 + }, + { + "epoch": 1.1715761439754662, + "grad_norm": 0.807991087436676, + "learning_rate": 3.520537716263753e-05, + "loss": 2.459, + "step": 14517 + }, + { + "epoch": 1.171656847712049, + "grad_norm": 0.6722908020019531, + "learning_rate": 3.519335330983852e-05, + "loss": 2.4426, + "step": 14518 + }, + { + "epoch": 1.1717375514486321, + "grad_norm": 0.6934377551078796, + "learning_rate": 3.5181331072190585e-05, + "loss": 2.4326, + "step": 14519 + }, + { + "epoch": 1.171818255185215, + "grad_norm": 0.6532938480377197, + "learning_rate": 3.516931044999329e-05, + "loss": 2.3778, + "step": 14520 + }, + { + "epoch": 1.171898958921798, + "grad_norm": 0.6779183745384216, + "learning_rate": 3.5157291443546247e-05, + "loss": 2.4089, + "step": 14521 + }, + { + "epoch": 1.1719796626583812, + "grad_norm": 0.687005877494812, + "learning_rate": 3.514527405314899e-05, + "loss": 2.4669, + "step": 14522 + }, + { + "epoch": 1.172060366394964, + "grad_norm": 0.6804830431938171, + "learning_rate": 3.5133258279101045e-05, + "loss": 2.4789, + "step": 14523 + }, + { + "epoch": 1.1721410701315471, + "grad_norm": 0.8345538973808289, + "learning_rate": 3.512124412170187e-05, + "loss": 2.4506, + "step": 14524 + }, + { + "epoch": 1.17222177386813, + "grad_norm": 0.6571901440620422, + "learning_rate": 3.510923158125088e-05, + "loss": 2.4911, + "step": 14525 + }, + { + "epoch": 1.172302477604713, + "grad_norm": 0.6607047915458679, + "learning_rate": 3.5097220658047504e-05, + "loss": 2.4882, + "step": 14526 + }, + { + "epoch": 1.1723831813412962, + "grad_norm": 0.6883669495582581, + "learning_rate": 3.508521135239101e-05, + "loss": 2.4083, + "step": 14527 + }, + { + "epoch": 1.172463885077879, + "grad_norm": 0.6792941689491272, + "learning_rate": 3.5073203664580746e-05, + "loss": 2.368, + "step": 14528 + }, + { + "epoch": 1.172544588814462, + "grad_norm": 0.6675198674201965, + "learning_rate": 3.506119759491598e-05, + "loss": 2.4193, + "step": 14529 + }, + { + "epoch": 1.1726252925510452, + "grad_norm": 0.7267464399337769, + "learning_rate": 3.504919314369591e-05, + "loss": 2.3906, + "step": 14530 + }, + { + "epoch": 1.172705996287628, + "grad_norm": 0.6927710175514221, + "learning_rate": 3.503719031121973e-05, + "loss": 2.4082, + "step": 14531 + }, + { + "epoch": 1.1727867000242111, + "grad_norm": 0.7231000065803528, + "learning_rate": 3.502518909778656e-05, + "loss": 2.4845, + "step": 14532 + }, + { + "epoch": 1.1728674037607942, + "grad_norm": 0.7087520360946655, + "learning_rate": 3.5013189503695544e-05, + "loss": 2.4622, + "step": 14533 + }, + { + "epoch": 1.172948107497377, + "grad_norm": 0.6669846177101135, + "learning_rate": 3.5001191529245716e-05, + "loss": 2.4151, + "step": 14534 + }, + { + "epoch": 1.1730288112339602, + "grad_norm": 0.7338447570800781, + "learning_rate": 3.4989195174736134e-05, + "loss": 2.4274, + "step": 14535 + }, + { + "epoch": 1.173109514970543, + "grad_norm": 0.7032054662704468, + "learning_rate": 3.497720044046572e-05, + "loss": 2.4066, + "step": 14536 + }, + { + "epoch": 1.1731902187071261, + "grad_norm": 0.6571083068847656, + "learning_rate": 3.496520732673344e-05, + "loss": 2.4581, + "step": 14537 + }, + { + "epoch": 1.1732709224437092, + "grad_norm": 0.6618444919586182, + "learning_rate": 3.495321583383819e-05, + "loss": 2.3675, + "step": 14538 + }, + { + "epoch": 1.173351626180292, + "grad_norm": 0.6597652435302734, + "learning_rate": 3.4941225962078885e-05, + "loss": 2.416, + "step": 14539 + }, + { + "epoch": 1.1734323299168752, + "grad_norm": 0.682634711265564, + "learning_rate": 3.492923771175425e-05, + "loss": 2.5081, + "step": 14540 + }, + { + "epoch": 1.173513033653458, + "grad_norm": 0.7046132683753967, + "learning_rate": 3.49172510831631e-05, + "loss": 2.4439, + "step": 14541 + }, + { + "epoch": 1.1735937373900411, + "grad_norm": 0.6734833717346191, + "learning_rate": 3.4905266076604196e-05, + "loss": 2.4348, + "step": 14542 + }, + { + "epoch": 1.1736744411266242, + "grad_norm": 0.6624744534492493, + "learning_rate": 3.4893282692376214e-05, + "loss": 2.4364, + "step": 14543 + }, + { + "epoch": 1.173755144863207, + "grad_norm": 0.8425754308700562, + "learning_rate": 3.4881300930777815e-05, + "loss": 2.4803, + "step": 14544 + }, + { + "epoch": 1.1738358485997902, + "grad_norm": 0.6438888311386108, + "learning_rate": 3.486932079210766e-05, + "loss": 2.3973, + "step": 14545 + }, + { + "epoch": 1.1739165523363733, + "grad_norm": 0.650399923324585, + "learning_rate": 3.485734227666424e-05, + "loss": 2.4183, + "step": 14546 + }, + { + "epoch": 1.1739972560729561, + "grad_norm": 0.6857002973556519, + "learning_rate": 3.4845365384746144e-05, + "loss": 2.4061, + "step": 14547 + }, + { + "epoch": 1.1740779598095392, + "grad_norm": 0.6680994629859924, + "learning_rate": 3.483339011665189e-05, + "loss": 2.421, + "step": 14548 + }, + { + "epoch": 1.1741586635461223, + "grad_norm": 0.6440950632095337, + "learning_rate": 3.482141647267987e-05, + "loss": 2.3914, + "step": 14549 + }, + { + "epoch": 1.1742393672827052, + "grad_norm": 0.7329740524291992, + "learning_rate": 3.480944445312853e-05, + "loss": 2.4805, + "step": 14550 + }, + { + "epoch": 1.1743200710192883, + "grad_norm": 0.6848189234733582, + "learning_rate": 3.4797474058296245e-05, + "loss": 2.3611, + "step": 14551 + }, + { + "epoch": 1.1744007747558711, + "grad_norm": 0.6994072794914246, + "learning_rate": 3.478550528848134e-05, + "loss": 2.5106, + "step": 14552 + }, + { + "epoch": 1.1744814784924542, + "grad_norm": 0.6826444268226624, + "learning_rate": 3.477353814398212e-05, + "loss": 2.467, + "step": 14553 + }, + { + "epoch": 1.1745621822290373, + "grad_norm": 0.6658408045768738, + "learning_rate": 3.476157262509683e-05, + "loss": 2.423, + "step": 14554 + }, + { + "epoch": 1.1746428859656202, + "grad_norm": 0.6963697075843811, + "learning_rate": 3.474960873212372e-05, + "loss": 2.457, + "step": 14555 + }, + { + "epoch": 1.1747235897022033, + "grad_norm": 0.7574479579925537, + "learning_rate": 3.4737646465360894e-05, + "loss": 2.4292, + "step": 14556 + }, + { + "epoch": 1.1748042934387861, + "grad_norm": 0.7494931817054749, + "learning_rate": 3.472568582510652e-05, + "loss": 2.4395, + "step": 14557 + }, + { + "epoch": 1.1748849971753692, + "grad_norm": 0.7062687873840332, + "learning_rate": 3.471372681165872e-05, + "loss": 2.4561, + "step": 14558 + }, + { + "epoch": 1.1749657009119523, + "grad_norm": 0.6875349879264832, + "learning_rate": 3.4701769425315465e-05, + "loss": 2.4728, + "step": 14559 + }, + { + "epoch": 1.1750464046485352, + "grad_norm": 0.7009960412979126, + "learning_rate": 3.46898136663748e-05, + "loss": 2.5364, + "step": 14560 + }, + { + "epoch": 1.1751271083851182, + "grad_norm": 0.673791766166687, + "learning_rate": 3.467785953513475e-05, + "loss": 2.4611, + "step": 14561 + }, + { + "epoch": 1.1752078121217013, + "grad_norm": 0.7166882753372192, + "learning_rate": 3.4665907031893164e-05, + "loss": 2.4451, + "step": 14562 + }, + { + "epoch": 1.1752885158582842, + "grad_norm": 0.6868429780006409, + "learning_rate": 3.465395615694791e-05, + "loss": 2.4282, + "step": 14563 + }, + { + "epoch": 1.1753692195948673, + "grad_norm": 0.7212893962860107, + "learning_rate": 3.464200691059697e-05, + "loss": 2.4239, + "step": 14564 + }, + { + "epoch": 1.1754499233314502, + "grad_norm": 0.7213432192802429, + "learning_rate": 3.463005929313802e-05, + "loss": 2.4872, + "step": 14565 + }, + { + "epoch": 1.1755306270680332, + "grad_norm": 0.6805179119110107, + "learning_rate": 3.461811330486887e-05, + "loss": 2.4192, + "step": 14566 + }, + { + "epoch": 1.1756113308046163, + "grad_norm": 0.6746333241462708, + "learning_rate": 3.460616894608725e-05, + "loss": 2.3911, + "step": 14567 + }, + { + "epoch": 1.1756920345411992, + "grad_norm": 0.7388630509376526, + "learning_rate": 3.459422621709088e-05, + "loss": 2.4758, + "step": 14568 + }, + { + "epoch": 1.1757727382777823, + "grad_norm": 0.7730274200439453, + "learning_rate": 3.458228511817731e-05, + "loss": 2.4159, + "step": 14569 + }, + { + "epoch": 1.1758534420143651, + "grad_norm": 0.721075177192688, + "learning_rate": 3.457034564964422e-05, + "loss": 2.4673, + "step": 14570 + }, + { + "epoch": 1.1759341457509482, + "grad_norm": 0.6647645235061646, + "learning_rate": 3.4558407811789184e-05, + "loss": 2.395, + "step": 14571 + }, + { + "epoch": 1.1760148494875313, + "grad_norm": 0.7155466675758362, + "learning_rate": 3.454647160490965e-05, + "loss": 2.503, + "step": 14572 + }, + { + "epoch": 1.1760955532241142, + "grad_norm": 0.6789268851280212, + "learning_rate": 3.453453702930314e-05, + "loss": 2.401, + "step": 14573 + }, + { + "epoch": 1.1761762569606973, + "grad_norm": 0.7488093376159668, + "learning_rate": 3.4522604085267105e-05, + "loss": 2.4434, + "step": 14574 + }, + { + "epoch": 1.1762569606972804, + "grad_norm": 0.7954889535903931, + "learning_rate": 3.451067277309893e-05, + "loss": 2.5302, + "step": 14575 + }, + { + "epoch": 1.1763376644338632, + "grad_norm": 0.7008484601974487, + "learning_rate": 3.4498743093095975e-05, + "loss": 2.3935, + "step": 14576 + }, + { + "epoch": 1.1764183681704463, + "grad_norm": 0.6725437641143799, + "learning_rate": 3.448681504555561e-05, + "loss": 2.399, + "step": 14577 + }, + { + "epoch": 1.1764990719070294, + "grad_norm": 0.6778931617736816, + "learning_rate": 3.4474888630775026e-05, + "loss": 2.4178, + "step": 14578 + }, + { + "epoch": 1.1765797756436123, + "grad_norm": 0.7043762803077698, + "learning_rate": 3.44629638490515e-05, + "loss": 2.5581, + "step": 14579 + }, + { + "epoch": 1.1766604793801954, + "grad_norm": 0.6848085522651672, + "learning_rate": 3.445104070068227e-05, + "loss": 2.436, + "step": 14580 + }, + { + "epoch": 1.1767411831167782, + "grad_norm": 0.7504082322120667, + "learning_rate": 3.443911918596441e-05, + "loss": 2.4138, + "step": 14581 + }, + { + "epoch": 1.1768218868533613, + "grad_norm": 0.7441161870956421, + "learning_rate": 3.442719930519508e-05, + "loss": 2.4333, + "step": 14582 + }, + { + "epoch": 1.1769025905899444, + "grad_norm": 0.663894772529602, + "learning_rate": 3.4415281058671354e-05, + "loss": 2.4672, + "step": 14583 + }, + { + "epoch": 1.1769832943265273, + "grad_norm": 0.6814345121383667, + "learning_rate": 3.440336444669027e-05, + "loss": 2.4196, + "step": 14584 + }, + { + "epoch": 1.1770639980631104, + "grad_norm": 0.7566598057746887, + "learning_rate": 3.439144946954881e-05, + "loss": 2.4586, + "step": 14585 + }, + { + "epoch": 1.1771447017996932, + "grad_norm": 0.7324996590614319, + "learning_rate": 3.4379536127543934e-05, + "loss": 2.4286, + "step": 14586 + }, + { + "epoch": 1.1772254055362763, + "grad_norm": 0.6632608771324158, + "learning_rate": 3.436762442097259e-05, + "loss": 2.4713, + "step": 14587 + }, + { + "epoch": 1.1773061092728594, + "grad_norm": 0.7246156930923462, + "learning_rate": 3.4355714350131564e-05, + "loss": 2.4374, + "step": 14588 + }, + { + "epoch": 1.1773868130094423, + "grad_norm": 0.7096351981163025, + "learning_rate": 3.4343805915317737e-05, + "loss": 2.4649, + "step": 14589 + }, + { + "epoch": 1.1774675167460253, + "grad_norm": 0.7090620398521423, + "learning_rate": 3.433189911682793e-05, + "loss": 2.396, + "step": 14590 + }, + { + "epoch": 1.1775482204826084, + "grad_norm": 0.7782440185546875, + "learning_rate": 3.431999395495882e-05, + "loss": 2.4506, + "step": 14591 + }, + { + "epoch": 1.1776289242191913, + "grad_norm": 0.6933457851409912, + "learning_rate": 3.4308090430007155e-05, + "loss": 2.3985, + "step": 14592 + }, + { + "epoch": 1.1777096279557744, + "grad_norm": 0.6935414671897888, + "learning_rate": 3.429618854226959e-05, + "loss": 2.4372, + "step": 14593 + }, + { + "epoch": 1.1777903316923575, + "grad_norm": 0.6971156597137451, + "learning_rate": 3.428428829204276e-05, + "loss": 2.4837, + "step": 14594 + }, + { + "epoch": 1.1778710354289403, + "grad_norm": 0.6460022926330566, + "learning_rate": 3.427238967962325e-05, + "loss": 2.3742, + "step": 14595 + }, + { + "epoch": 1.1779517391655234, + "grad_norm": 0.6941941976547241, + "learning_rate": 3.426049270530763e-05, + "loss": 2.4706, + "step": 14596 + }, + { + "epoch": 1.1780324429021063, + "grad_norm": 0.7062166333198547, + "learning_rate": 3.424859736939236e-05, + "loss": 2.3893, + "step": 14597 + }, + { + "epoch": 1.1781131466386894, + "grad_norm": 0.6586433053016663, + "learning_rate": 3.42367036721739e-05, + "loss": 2.4385, + "step": 14598 + }, + { + "epoch": 1.1781938503752725, + "grad_norm": 0.6781242489814758, + "learning_rate": 3.422481161394869e-05, + "loss": 2.3876, + "step": 14599 + }, + { + "epoch": 1.1782745541118553, + "grad_norm": 0.710127592086792, + "learning_rate": 3.421292119501316e-05, + "loss": 2.4067, + "step": 14600 + }, + { + "epoch": 1.1783552578484384, + "grad_norm": 0.6856096982955933, + "learning_rate": 3.420103241566357e-05, + "loss": 2.4855, + "step": 14601 + }, + { + "epoch": 1.1784359615850213, + "grad_norm": 0.7173380851745605, + "learning_rate": 3.4189145276196245e-05, + "loss": 2.4871, + "step": 14602 + }, + { + "epoch": 1.1785166653216044, + "grad_norm": 0.6895382404327393, + "learning_rate": 3.417725977690745e-05, + "loss": 2.4066, + "step": 14603 + }, + { + "epoch": 1.1785973690581875, + "grad_norm": 0.7417690753936768, + "learning_rate": 3.416537591809341e-05, + "loss": 2.3779, + "step": 14604 + }, + { + "epoch": 1.1786780727947703, + "grad_norm": 0.7258411049842834, + "learning_rate": 3.4153493700050286e-05, + "loss": 2.4334, + "step": 14605 + }, + { + "epoch": 1.1787587765313534, + "grad_norm": 0.65704345703125, + "learning_rate": 3.414161312307427e-05, + "loss": 2.4531, + "step": 14606 + }, + { + "epoch": 1.1788394802679365, + "grad_norm": 0.6937118172645569, + "learning_rate": 3.4129734187461374e-05, + "loss": 2.4562, + "step": 14607 + }, + { + "epoch": 1.1789201840045194, + "grad_norm": 0.7331998348236084, + "learning_rate": 3.411785689350768e-05, + "loss": 2.4418, + "step": 14608 + }, + { + "epoch": 1.1790008877411025, + "grad_norm": 0.666582465171814, + "learning_rate": 3.410598124150924e-05, + "loss": 2.4154, + "step": 14609 + }, + { + "epoch": 1.1790815914776853, + "grad_norm": 0.6684321165084839, + "learning_rate": 3.409410723176197e-05, + "loss": 2.4155, + "step": 14610 + }, + { + "epoch": 1.1791622952142684, + "grad_norm": 0.6413382291793823, + "learning_rate": 3.408223486456184e-05, + "loss": 2.3924, + "step": 14611 + }, + { + "epoch": 1.1792429989508515, + "grad_norm": 0.7081305384635925, + "learning_rate": 3.407036414020475e-05, + "loss": 2.3811, + "step": 14612 + }, + { + "epoch": 1.1793237026874344, + "grad_norm": 0.7550063133239746, + "learning_rate": 3.405849505898645e-05, + "loss": 2.4425, + "step": 14613 + }, + { + "epoch": 1.1794044064240174, + "grad_norm": 0.677200198173523, + "learning_rate": 3.404662762120288e-05, + "loss": 2.5182, + "step": 14614 + }, + { + "epoch": 1.1794851101606003, + "grad_norm": 0.6829770803451538, + "learning_rate": 3.4034761827149745e-05, + "loss": 2.5068, + "step": 14615 + }, + { + "epoch": 1.1795658138971834, + "grad_norm": 0.7069409489631653, + "learning_rate": 3.4022897677122815e-05, + "loss": 2.4449, + "step": 14616 + }, + { + "epoch": 1.1796465176337665, + "grad_norm": 0.6604448556900024, + "learning_rate": 3.4011035171417696e-05, + "loss": 2.3996, + "step": 14617 + }, + { + "epoch": 1.1797272213703494, + "grad_norm": 0.6577324271202087, + "learning_rate": 3.3999174310330084e-05, + "loss": 2.4723, + "step": 14618 + }, + { + "epoch": 1.1798079251069324, + "grad_norm": 0.8159187436103821, + "learning_rate": 3.398731509415561e-05, + "loss": 2.4655, + "step": 14619 + }, + { + "epoch": 1.1798886288435155, + "grad_norm": 0.7170652747154236, + "learning_rate": 3.397545752318977e-05, + "loss": 2.5095, + "step": 14620 + }, + { + "epoch": 1.1799693325800984, + "grad_norm": 0.6865009665489197, + "learning_rate": 3.396360159772812e-05, + "loss": 2.4358, + "step": 14621 + }, + { + "epoch": 1.1800500363166815, + "grad_norm": 0.6485020518302917, + "learning_rate": 3.3951747318066175e-05, + "loss": 2.4576, + "step": 14622 + }, + { + "epoch": 1.1801307400532646, + "grad_norm": 0.6626582145690918, + "learning_rate": 3.39398946844993e-05, + "loss": 2.4824, + "step": 14623 + }, + { + "epoch": 1.1802114437898474, + "grad_norm": 0.718588650226593, + "learning_rate": 3.392804369732293e-05, + "loss": 2.4211, + "step": 14624 + }, + { + "epoch": 1.1802921475264305, + "grad_norm": 0.7449582815170288, + "learning_rate": 3.391619435683243e-05, + "loss": 2.444, + "step": 14625 + }, + { + "epoch": 1.1803728512630134, + "grad_norm": 0.6988492012023926, + "learning_rate": 3.3904346663323115e-05, + "loss": 2.4262, + "step": 14626 + }, + { + "epoch": 1.1804535549995965, + "grad_norm": 0.6779490113258362, + "learning_rate": 3.389250061709025e-05, + "loss": 2.4751, + "step": 14627 + }, + { + "epoch": 1.1805342587361796, + "grad_norm": 0.6883673667907715, + "learning_rate": 3.388065621842912e-05, + "loss": 2.4995, + "step": 14628 + }, + { + "epoch": 1.1806149624727624, + "grad_norm": 0.7112017273902893, + "learning_rate": 3.386881346763483e-05, + "loss": 2.4181, + "step": 14629 + }, + { + "epoch": 1.1806956662093455, + "grad_norm": 0.6960459351539612, + "learning_rate": 3.385697236500258e-05, + "loss": 2.4888, + "step": 14630 + }, + { + "epoch": 1.1807763699459284, + "grad_norm": 0.6874156594276428, + "learning_rate": 3.3845132910827484e-05, + "loss": 2.4175, + "step": 14631 + }, + { + "epoch": 1.1808570736825115, + "grad_norm": 0.7075642347335815, + "learning_rate": 3.383329510540463e-05, + "loss": 2.4315, + "step": 14632 + }, + { + "epoch": 1.1809377774190946, + "grad_norm": 0.674907386302948, + "learning_rate": 3.3821458949028995e-05, + "loss": 2.4216, + "step": 14633 + }, + { + "epoch": 1.1810184811556774, + "grad_norm": 0.7008463740348816, + "learning_rate": 3.380962444199559e-05, + "loss": 2.4114, + "step": 14634 + }, + { + "epoch": 1.1810991848922605, + "grad_norm": 0.6784217953681946, + "learning_rate": 3.379779158459937e-05, + "loss": 2.3663, + "step": 14635 + }, + { + "epoch": 1.1811798886288436, + "grad_norm": 0.7174829244613647, + "learning_rate": 3.378596037713525e-05, + "loss": 2.4582, + "step": 14636 + }, + { + "epoch": 1.1812605923654265, + "grad_norm": 0.7106035947799683, + "learning_rate": 3.3774130819898065e-05, + "loss": 2.5095, + "step": 14637 + }, + { + "epoch": 1.1813412961020096, + "grad_norm": 0.809107780456543, + "learning_rate": 3.3762302913182696e-05, + "loss": 2.4942, + "step": 14638 + }, + { + "epoch": 1.1814219998385926, + "grad_norm": 0.7150272727012634, + "learning_rate": 3.375047665728386e-05, + "loss": 2.378, + "step": 14639 + }, + { + "epoch": 1.1815027035751755, + "grad_norm": 0.7016271352767944, + "learning_rate": 3.373865205249632e-05, + "loss": 2.4393, + "step": 14640 + }, + { + "epoch": 1.1815834073117586, + "grad_norm": 0.6387282013893127, + "learning_rate": 3.372682909911481e-05, + "loss": 2.4399, + "step": 14641 + }, + { + "epoch": 1.1816641110483415, + "grad_norm": 0.834181010723114, + "learning_rate": 3.371500779743393e-05, + "loss": 2.4312, + "step": 14642 + }, + { + "epoch": 1.1817448147849245, + "grad_norm": 0.6690472960472107, + "learning_rate": 3.370318814774832e-05, + "loss": 2.407, + "step": 14643 + }, + { + "epoch": 1.1818255185215076, + "grad_norm": 0.6594302654266357, + "learning_rate": 3.369137015035256e-05, + "loss": 2.4275, + "step": 14644 + }, + { + "epoch": 1.1819062222580905, + "grad_norm": 0.7284699082374573, + "learning_rate": 3.3679553805541194e-05, + "loss": 2.3981, + "step": 14645 + }, + { + "epoch": 1.1819869259946736, + "grad_norm": 0.7109572291374207, + "learning_rate": 3.366773911360871e-05, + "loss": 2.4345, + "step": 14646 + }, + { + "epoch": 1.1820676297312565, + "grad_norm": 0.6874241828918457, + "learning_rate": 3.3655926074849566e-05, + "loss": 2.4488, + "step": 14647 + }, + { + "epoch": 1.1821483334678395, + "grad_norm": 0.6698973178863525, + "learning_rate": 3.364411468955819e-05, + "loss": 2.42, + "step": 14648 + }, + { + "epoch": 1.1822290372044226, + "grad_norm": 0.7816089391708374, + "learning_rate": 3.3632304958028915e-05, + "loss": 2.4638, + "step": 14649 + }, + { + "epoch": 1.1823097409410055, + "grad_norm": 0.6718220710754395, + "learning_rate": 3.3620496880556075e-05, + "loss": 2.413, + "step": 14650 + }, + { + "epoch": 1.1823904446775886, + "grad_norm": 0.753463089466095, + "learning_rate": 3.360869045743401e-05, + "loss": 2.3772, + "step": 14651 + }, + { + "epoch": 1.1824711484141717, + "grad_norm": 0.7031456828117371, + "learning_rate": 3.359688568895689e-05, + "loss": 2.4198, + "step": 14652 + }, + { + "epoch": 1.1825518521507545, + "grad_norm": 0.7857323288917542, + "learning_rate": 3.358508257541897e-05, + "loss": 2.4223, + "step": 14653 + }, + { + "epoch": 1.1826325558873376, + "grad_norm": 0.7779297828674316, + "learning_rate": 3.357328111711439e-05, + "loss": 2.5266, + "step": 14654 + }, + { + "epoch": 1.1827132596239207, + "grad_norm": 0.7382386326789856, + "learning_rate": 3.356148131433728e-05, + "loss": 2.4673, + "step": 14655 + }, + { + "epoch": 1.1827939633605036, + "grad_norm": 0.7868054509162903, + "learning_rate": 3.354968316738174e-05, + "loss": 2.4285, + "step": 14656 + }, + { + "epoch": 1.1828746670970867, + "grad_norm": 0.7007591724395752, + "learning_rate": 3.353788667654183e-05, + "loss": 2.4054, + "step": 14657 + }, + { + "epoch": 1.1829553708336695, + "grad_norm": 0.6627741456031799, + "learning_rate": 3.352609184211148e-05, + "loss": 2.4224, + "step": 14658 + }, + { + "epoch": 1.1830360745702526, + "grad_norm": 0.6865360736846924, + "learning_rate": 3.351429866438469e-05, + "loss": 2.4084, + "step": 14659 + }, + { + "epoch": 1.1831167783068357, + "grad_norm": 0.7572095990180969, + "learning_rate": 3.3502507143655404e-05, + "loss": 2.4339, + "step": 14660 + }, + { + "epoch": 1.1831974820434186, + "grad_norm": 0.6907969117164612, + "learning_rate": 3.349071728021743e-05, + "loss": 2.4578, + "step": 14661 + }, + { + "epoch": 1.1832781857800017, + "grad_norm": 0.6618743538856506, + "learning_rate": 3.347892907436465e-05, + "loss": 2.4131, + "step": 14662 + }, + { + "epoch": 1.1833588895165845, + "grad_norm": 0.777159571647644, + "learning_rate": 3.346714252639084e-05, + "loss": 2.419, + "step": 14663 + }, + { + "epoch": 1.1834395932531676, + "grad_norm": 0.666344165802002, + "learning_rate": 3.345535763658975e-05, + "loss": 2.4155, + "step": 14664 + }, + { + "epoch": 1.1835202969897507, + "grad_norm": 0.708848774433136, + "learning_rate": 3.3443574405255095e-05, + "loss": 2.4794, + "step": 14665 + }, + { + "epoch": 1.1836010007263336, + "grad_norm": 0.7247438430786133, + "learning_rate": 3.3431792832680555e-05, + "loss": 2.4445, + "step": 14666 + }, + { + "epoch": 1.1836817044629167, + "grad_norm": 0.6870034337043762, + "learning_rate": 3.342001291915978e-05, + "loss": 2.4309, + "step": 14667 + }, + { + "epoch": 1.1837624081994997, + "grad_norm": 0.7088049650192261, + "learning_rate": 3.340823466498629e-05, + "loss": 2.4456, + "step": 14668 + }, + { + "epoch": 1.1838431119360826, + "grad_norm": 0.695148229598999, + "learning_rate": 3.3396458070453676e-05, + "loss": 2.4018, + "step": 14669 + }, + { + "epoch": 1.1839238156726657, + "grad_norm": 0.7947117686271667, + "learning_rate": 3.3384683135855444e-05, + "loss": 2.4099, + "step": 14670 + }, + { + "epoch": 1.1840045194092486, + "grad_norm": 0.7268195748329163, + "learning_rate": 3.337290986148502e-05, + "loss": 2.3955, + "step": 14671 + }, + { + "epoch": 1.1840852231458316, + "grad_norm": 0.6932024955749512, + "learning_rate": 3.336113824763585e-05, + "loss": 2.4046, + "step": 14672 + }, + { + "epoch": 1.1841659268824147, + "grad_norm": 0.7408114671707153, + "learning_rate": 3.3349368294601334e-05, + "loss": 2.4186, + "step": 14673 + }, + { + "epoch": 1.1842466306189976, + "grad_norm": 0.6678428053855896, + "learning_rate": 3.3337600002674765e-05, + "loss": 2.4324, + "step": 14674 + }, + { + "epoch": 1.1843273343555807, + "grad_norm": 0.7221381664276123, + "learning_rate": 3.3325833372149416e-05, + "loss": 2.4474, + "step": 14675 + }, + { + "epoch": 1.1844080380921636, + "grad_norm": 0.6971224546432495, + "learning_rate": 3.3314068403318654e-05, + "loss": 2.4197, + "step": 14676 + }, + { + "epoch": 1.1844887418287466, + "grad_norm": 0.65053391456604, + "learning_rate": 3.3302305096475604e-05, + "loss": 2.4169, + "step": 14677 + }, + { + "epoch": 1.1845694455653297, + "grad_norm": 0.7231155633926392, + "learning_rate": 3.3290543451913457e-05, + "loss": 2.4222, + "step": 14678 + }, + { + "epoch": 1.1846501493019126, + "grad_norm": 0.6458824872970581, + "learning_rate": 3.3278783469925345e-05, + "loss": 2.422, + "step": 14679 + }, + { + "epoch": 1.1847308530384957, + "grad_norm": 0.6783488392829895, + "learning_rate": 3.32670251508044e-05, + "loss": 2.4231, + "step": 14680 + }, + { + "epoch": 1.1848115567750788, + "grad_norm": 0.6742293238639832, + "learning_rate": 3.3255268494843586e-05, + "loss": 2.409, + "step": 14681 + }, + { + "epoch": 1.1848922605116616, + "grad_norm": 0.7455186247825623, + "learning_rate": 3.3243513502335956e-05, + "loss": 2.4121, + "step": 14682 + }, + { + "epoch": 1.1849729642482447, + "grad_norm": 0.7042234539985657, + "learning_rate": 3.323176017357451e-05, + "loss": 2.4574, + "step": 14683 + }, + { + "epoch": 1.1850536679848278, + "grad_norm": 0.7897992134094238, + "learning_rate": 3.3220008508852094e-05, + "loss": 2.4796, + "step": 14684 + }, + { + "epoch": 1.1851343717214107, + "grad_norm": 0.6894058585166931, + "learning_rate": 3.3208258508461644e-05, + "loss": 2.4125, + "step": 14685 + }, + { + "epoch": 1.1852150754579938, + "grad_norm": 0.7574072480201721, + "learning_rate": 3.319651017269597e-05, + "loss": 2.4714, + "step": 14686 + }, + { + "epoch": 1.1852957791945766, + "grad_norm": 0.7457531094551086, + "learning_rate": 3.3184763501847905e-05, + "loss": 2.4793, + "step": 14687 + }, + { + "epoch": 1.1853764829311597, + "grad_norm": 0.6819709539413452, + "learning_rate": 3.317301849621018e-05, + "loss": 2.4563, + "step": 14688 + }, + { + "epoch": 1.1854571866677428, + "grad_norm": 0.6998026371002197, + "learning_rate": 3.316127515607555e-05, + "loss": 2.4548, + "step": 14689 + }, + { + "epoch": 1.1855378904043257, + "grad_norm": 0.7148768305778503, + "learning_rate": 3.314953348173664e-05, + "loss": 2.4897, + "step": 14690 + }, + { + "epoch": 1.1856185941409088, + "grad_norm": 0.6581987738609314, + "learning_rate": 3.31377934734861e-05, + "loss": 2.4683, + "step": 14691 + }, + { + "epoch": 1.1856992978774916, + "grad_norm": 0.7493093609809875, + "learning_rate": 3.312605513161653e-05, + "loss": 2.4564, + "step": 14692 + }, + { + "epoch": 1.1857800016140747, + "grad_norm": 0.7095562219619751, + "learning_rate": 3.311431845642051e-05, + "loss": 2.4595, + "step": 14693 + }, + { + "epoch": 1.1858607053506578, + "grad_norm": 0.8045323491096497, + "learning_rate": 3.310258344819047e-05, + "loss": 2.5044, + "step": 14694 + }, + { + "epoch": 1.1859414090872407, + "grad_norm": 0.7381219267845154, + "learning_rate": 3.3090850107218943e-05, + "loss": 2.415, + "step": 14695 + }, + { + "epoch": 1.1860221128238237, + "grad_norm": 0.6859883069992065, + "learning_rate": 3.307911843379832e-05, + "loss": 2.4314, + "step": 14696 + }, + { + "epoch": 1.1861028165604068, + "grad_norm": 0.7084196209907532, + "learning_rate": 3.306738842822099e-05, + "loss": 2.4404, + "step": 14697 + }, + { + "epoch": 1.1861835202969897, + "grad_norm": 0.6964806318283081, + "learning_rate": 3.305566009077932e-05, + "loss": 2.4391, + "step": 14698 + }, + { + "epoch": 1.1862642240335728, + "grad_norm": 0.7272049188613892, + "learning_rate": 3.304393342176562e-05, + "loss": 2.4395, + "step": 14699 + }, + { + "epoch": 1.1863449277701559, + "grad_norm": 0.6651458144187927, + "learning_rate": 3.303220842147209e-05, + "loss": 2.4059, + "step": 14700 + }, + { + "epoch": 1.1864256315067387, + "grad_norm": 0.7599130868911743, + "learning_rate": 3.302048509019099e-05, + "loss": 2.5044, + "step": 14701 + }, + { + "epoch": 1.1865063352433218, + "grad_norm": 0.6694391965866089, + "learning_rate": 3.3008763428214505e-05, + "loss": 2.4817, + "step": 14702 + }, + { + "epoch": 1.1865870389799047, + "grad_norm": 0.7176856398582458, + "learning_rate": 3.299704343583473e-05, + "loss": 2.4702, + "step": 14703 + }, + { + "epoch": 1.1866677427164878, + "grad_norm": 0.7133145332336426, + "learning_rate": 3.298532511334378e-05, + "loss": 2.4685, + "step": 14704 + }, + { + "epoch": 1.1867484464530709, + "grad_norm": 0.7170277833938599, + "learning_rate": 3.297360846103371e-05, + "loss": 2.4203, + "step": 14705 + }, + { + "epoch": 1.1868291501896537, + "grad_norm": 0.6853376626968384, + "learning_rate": 3.296189347919652e-05, + "loss": 2.4067, + "step": 14706 + }, + { + "epoch": 1.1869098539262368, + "grad_norm": 0.7269156575202942, + "learning_rate": 3.2950180168124175e-05, + "loss": 2.4211, + "step": 14707 + }, + { + "epoch": 1.1869905576628197, + "grad_norm": 0.8649005889892578, + "learning_rate": 3.2938468528108626e-05, + "loss": 2.4611, + "step": 14708 + }, + { + "epoch": 1.1870712613994028, + "grad_norm": 0.7256221771240234, + "learning_rate": 3.292675855944177e-05, + "loss": 2.4618, + "step": 14709 + }, + { + "epoch": 1.1871519651359859, + "grad_norm": 0.6854279637336731, + "learning_rate": 3.291505026241539e-05, + "loss": 2.4466, + "step": 14710 + }, + { + "epoch": 1.1872326688725687, + "grad_norm": 0.7182712554931641, + "learning_rate": 3.2903343637321316e-05, + "loss": 2.4847, + "step": 14711 + }, + { + "epoch": 1.1873133726091518, + "grad_norm": 0.6795300841331482, + "learning_rate": 3.289163868445134e-05, + "loss": 2.4407, + "step": 14712 + }, + { + "epoch": 1.187394076345735, + "grad_norm": 0.685146689414978, + "learning_rate": 3.287993540409713e-05, + "loss": 2.4537, + "step": 14713 + }, + { + "epoch": 1.1874747800823178, + "grad_norm": 0.7891005873680115, + "learning_rate": 3.2868233796550375e-05, + "loss": 2.4085, + "step": 14714 + }, + { + "epoch": 1.1875554838189009, + "grad_norm": 0.6521769762039185, + "learning_rate": 3.2856533862102724e-05, + "loss": 2.4174, + "step": 14715 + }, + { + "epoch": 1.1876361875554837, + "grad_norm": 0.7486612200737, + "learning_rate": 3.284483560104575e-05, + "loss": 2.4072, + "step": 14716 + }, + { + "epoch": 1.1877168912920668, + "grad_norm": 0.6895913481712341, + "learning_rate": 3.283313901367103e-05, + "loss": 2.4398, + "step": 14717 + }, + { + "epoch": 1.18779759502865, + "grad_norm": 0.6595678329467773, + "learning_rate": 3.282144410027009e-05, + "loss": 2.4407, + "step": 14718 + }, + { + "epoch": 1.1878782987652328, + "grad_norm": 0.7724249958992004, + "learning_rate": 3.280975086113435e-05, + "loss": 2.464, + "step": 14719 + }, + { + "epoch": 1.1879590025018159, + "grad_norm": 0.659472644329071, + "learning_rate": 3.279805929655524e-05, + "loss": 2.4774, + "step": 14720 + }, + { + "epoch": 1.1880397062383987, + "grad_norm": 0.7187919020652771, + "learning_rate": 3.27863694068242e-05, + "loss": 2.4767, + "step": 14721 + }, + { + "epoch": 1.1881204099749818, + "grad_norm": 0.7740198373794556, + "learning_rate": 3.2774681192232506e-05, + "loss": 2.4762, + "step": 14722 + }, + { + "epoch": 1.188201113711565, + "grad_norm": 0.700591504573822, + "learning_rate": 3.2762994653071464e-05, + "loss": 2.448, + "step": 14723 + }, + { + "epoch": 1.1882818174481478, + "grad_norm": 0.7168558239936829, + "learning_rate": 3.275130978963237e-05, + "loss": 2.4084, + "step": 14724 + }, + { + "epoch": 1.1883625211847308, + "grad_norm": 0.8039551973342896, + "learning_rate": 3.273962660220646e-05, + "loss": 2.3849, + "step": 14725 + }, + { + "epoch": 1.188443224921314, + "grad_norm": 0.6453016400337219, + "learning_rate": 3.27279450910848e-05, + "loss": 2.3856, + "step": 14726 + }, + { + "epoch": 1.1885239286578968, + "grad_norm": 0.7194651365280151, + "learning_rate": 3.2716265256558644e-05, + "loss": 2.4337, + "step": 14727 + }, + { + "epoch": 1.1886046323944799, + "grad_norm": 0.7298597097396851, + "learning_rate": 3.270458709891906e-05, + "loss": 2.4491, + "step": 14728 + }, + { + "epoch": 1.188685336131063, + "grad_norm": 0.7127524614334106, + "learning_rate": 3.269291061845705e-05, + "loss": 2.4319, + "step": 14729 + }, + { + "epoch": 1.1887660398676458, + "grad_norm": 0.6782705783843994, + "learning_rate": 3.2681235815463654e-05, + "loss": 2.4375, + "step": 14730 + }, + { + "epoch": 1.188846743604229, + "grad_norm": 0.7418326735496521, + "learning_rate": 3.266956269022987e-05, + "loss": 2.4149, + "step": 14731 + }, + { + "epoch": 1.1889274473408118, + "grad_norm": 0.7442455291748047, + "learning_rate": 3.265789124304654e-05, + "loss": 2.3935, + "step": 14732 + }, + { + "epoch": 1.1890081510773949, + "grad_norm": 0.7238253951072693, + "learning_rate": 3.264622147420461e-05, + "loss": 2.4592, + "step": 14733 + }, + { + "epoch": 1.189088854813978, + "grad_norm": 0.6488127708435059, + "learning_rate": 3.2634553383994925e-05, + "loss": 2.3468, + "step": 14734 + }, + { + "epoch": 1.1891695585505608, + "grad_norm": 0.7182446718215942, + "learning_rate": 3.2622886972708246e-05, + "loss": 2.4457, + "step": 14735 + }, + { + "epoch": 1.189250262287144, + "grad_norm": 0.6885523796081543, + "learning_rate": 3.261122224063534e-05, + "loss": 2.3943, + "step": 14736 + }, + { + "epoch": 1.1893309660237268, + "grad_norm": 0.653367817401886, + "learning_rate": 3.259955918806693e-05, + "loss": 2.4188, + "step": 14737 + }, + { + "epoch": 1.1894116697603099, + "grad_norm": 0.6968675851821899, + "learning_rate": 3.2587897815293686e-05, + "loss": 2.4276, + "step": 14738 + }, + { + "epoch": 1.189492373496893, + "grad_norm": 0.6827409267425537, + "learning_rate": 3.257623812260626e-05, + "loss": 2.4417, + "step": 14739 + }, + { + "epoch": 1.1895730772334758, + "grad_norm": 0.6807438731193542, + "learning_rate": 3.256458011029523e-05, + "loss": 2.4495, + "step": 14740 + }, + { + "epoch": 1.189653780970059, + "grad_norm": 0.6692882180213928, + "learning_rate": 3.255292377865116e-05, + "loss": 2.3789, + "step": 14741 + }, + { + "epoch": 1.189734484706642, + "grad_norm": 0.6581685543060303, + "learning_rate": 3.2541269127964515e-05, + "loss": 2.4073, + "step": 14742 + }, + { + "epoch": 1.1898151884432249, + "grad_norm": 0.6458544731140137, + "learning_rate": 3.252961615852578e-05, + "loss": 2.4657, + "step": 14743 + }, + { + "epoch": 1.189895892179808, + "grad_norm": 0.6971322298049927, + "learning_rate": 3.251796487062541e-05, + "loss": 2.4404, + "step": 14744 + }, + { + "epoch": 1.189976595916391, + "grad_norm": 0.6770374178886414, + "learning_rate": 3.2506315264553724e-05, + "loss": 2.4329, + "step": 14745 + }, + { + "epoch": 1.190057299652974, + "grad_norm": 0.7634715437889099, + "learning_rate": 3.2494667340601085e-05, + "loss": 2.4234, + "step": 14746 + }, + { + "epoch": 1.190138003389557, + "grad_norm": 0.7717967629432678, + "learning_rate": 3.24830210990578e-05, + "loss": 2.5009, + "step": 14747 + }, + { + "epoch": 1.1902187071261399, + "grad_norm": 0.7133559584617615, + "learning_rate": 3.2471376540214124e-05, + "loss": 2.4272, + "step": 14748 + }, + { + "epoch": 1.190299410862723, + "grad_norm": 0.7273291349411011, + "learning_rate": 3.245973366436027e-05, + "loss": 2.4174, + "step": 14749 + }, + { + "epoch": 1.190380114599306, + "grad_norm": 0.6955052614212036, + "learning_rate": 3.244809247178643e-05, + "loss": 2.3605, + "step": 14750 + }, + { + "epoch": 1.190460818335889, + "grad_norm": 0.7072615027427673, + "learning_rate": 3.2436452962782685e-05, + "loss": 2.4897, + "step": 14751 + }, + { + "epoch": 1.190541522072472, + "grad_norm": 0.7095344662666321, + "learning_rate": 3.242481513763913e-05, + "loss": 2.4172, + "step": 14752 + }, + { + "epoch": 1.1906222258090549, + "grad_norm": 0.7260944247245789, + "learning_rate": 3.2413178996645864e-05, + "loss": 2.4272, + "step": 14753 + }, + { + "epoch": 1.190702929545638, + "grad_norm": 0.6601141691207886, + "learning_rate": 3.2401544540092824e-05, + "loss": 2.4072, + "step": 14754 + }, + { + "epoch": 1.190783633282221, + "grad_norm": 0.6684936881065369, + "learning_rate": 3.238991176827e-05, + "loss": 2.3968, + "step": 14755 + }, + { + "epoch": 1.190864337018804, + "grad_norm": 0.7264483571052551, + "learning_rate": 3.23782806814673e-05, + "loss": 2.4263, + "step": 14756 + }, + { + "epoch": 1.190945040755387, + "grad_norm": 0.6927621960639954, + "learning_rate": 3.2366651279974614e-05, + "loss": 2.4495, + "step": 14757 + }, + { + "epoch": 1.19102574449197, + "grad_norm": 0.7007272243499756, + "learning_rate": 3.2355023564081775e-05, + "loss": 2.4373, + "step": 14758 + }, + { + "epoch": 1.191106448228553, + "grad_norm": 0.6756663918495178, + "learning_rate": 3.234339753407857e-05, + "loss": 2.4148, + "step": 14759 + }, + { + "epoch": 1.191187151965136, + "grad_norm": 0.6741094589233398, + "learning_rate": 3.233177319025479e-05, + "loss": 2.3976, + "step": 14760 + }, + { + "epoch": 1.1912678557017191, + "grad_norm": 0.7098578810691833, + "learning_rate": 3.2320150532900085e-05, + "loss": 2.4326, + "step": 14761 + }, + { + "epoch": 1.191348559438302, + "grad_norm": 0.750271737575531, + "learning_rate": 3.230852956230413e-05, + "loss": 2.4766, + "step": 14762 + }, + { + "epoch": 1.191429263174885, + "grad_norm": 0.68764728307724, + "learning_rate": 3.229691027875661e-05, + "loss": 2.4128, + "step": 14763 + }, + { + "epoch": 1.191509966911468, + "grad_norm": 0.656295657157898, + "learning_rate": 3.228529268254702e-05, + "loss": 2.3928, + "step": 14764 + }, + { + "epoch": 1.191590670648051, + "grad_norm": 0.6690353155136108, + "learning_rate": 3.2273676773964955e-05, + "loss": 2.408, + "step": 14765 + }, + { + "epoch": 1.1916713743846339, + "grad_norm": 0.8111640214920044, + "learning_rate": 3.22620625532999e-05, + "loss": 2.4644, + "step": 14766 + }, + { + "epoch": 1.191752078121217, + "grad_norm": 0.7329768538475037, + "learning_rate": 3.2250450020841316e-05, + "loss": 2.4235, + "step": 14767 + }, + { + "epoch": 1.1918327818578, + "grad_norm": 0.6902688145637512, + "learning_rate": 3.223883917687861e-05, + "loss": 2.3883, + "step": 14768 + }, + { + "epoch": 1.191913485594383, + "grad_norm": 0.797249972820282, + "learning_rate": 3.2227230021701205e-05, + "loss": 2.523, + "step": 14769 + }, + { + "epoch": 1.191994189330966, + "grad_norm": 0.6294408440589905, + "learning_rate": 3.221562255559834e-05, + "loss": 2.4156, + "step": 14770 + }, + { + "epoch": 1.192074893067549, + "grad_norm": 0.7326164245605469, + "learning_rate": 3.220401677885936e-05, + "loss": 2.3828, + "step": 14771 + }, + { + "epoch": 1.192155596804132, + "grad_norm": 0.783747673034668, + "learning_rate": 3.219241269177351e-05, + "loss": 2.4321, + "step": 14772 + }, + { + "epoch": 1.192236300540715, + "grad_norm": 0.7415335178375244, + "learning_rate": 3.2180810294630005e-05, + "loss": 2.4446, + "step": 14773 + }, + { + "epoch": 1.1923170042772981, + "grad_norm": 0.7125591039657593, + "learning_rate": 3.2169209587717966e-05, + "loss": 2.3914, + "step": 14774 + }, + { + "epoch": 1.192397708013881, + "grad_norm": 0.6714075207710266, + "learning_rate": 3.215761057132652e-05, + "loss": 2.3918, + "step": 14775 + }, + { + "epoch": 1.192478411750464, + "grad_norm": 0.7147830724716187, + "learning_rate": 3.214601324574481e-05, + "loss": 2.4389, + "step": 14776 + }, + { + "epoch": 1.192559115487047, + "grad_norm": 0.6780480146408081, + "learning_rate": 3.2134417611261755e-05, + "loss": 2.4119, + "step": 14777 + }, + { + "epoch": 1.19263981922363, + "grad_norm": 0.7473881840705872, + "learning_rate": 3.212282366816645e-05, + "loss": 2.4547, + "step": 14778 + }, + { + "epoch": 1.1927205229602131, + "grad_norm": 0.7418377995491028, + "learning_rate": 3.211123141674784e-05, + "loss": 2.4156, + "step": 14779 + }, + { + "epoch": 1.192801226696796, + "grad_norm": 0.687524139881134, + "learning_rate": 3.209964085729477e-05, + "loss": 2.4309, + "step": 14780 + }, + { + "epoch": 1.192881930433379, + "grad_norm": 0.6965883374214172, + "learning_rate": 3.208805199009615e-05, + "loss": 2.4028, + "step": 14781 + }, + { + "epoch": 1.192962634169962, + "grad_norm": 0.7024682760238647, + "learning_rate": 3.207646481544082e-05, + "loss": 2.4482, + "step": 14782 + }, + { + "epoch": 1.193043337906545, + "grad_norm": 0.6835834383964539, + "learning_rate": 3.2064879333617514e-05, + "loss": 2.3898, + "step": 14783 + }, + { + "epoch": 1.1931240416431281, + "grad_norm": 0.7002003788948059, + "learning_rate": 3.2053295544915e-05, + "loss": 2.487, + "step": 14784 + }, + { + "epoch": 1.193204745379711, + "grad_norm": 0.7128168940544128, + "learning_rate": 3.2041713449622e-05, + "loss": 2.4591, + "step": 14785 + }, + { + "epoch": 1.193285449116294, + "grad_norm": 0.6897242665290833, + "learning_rate": 3.203013304802712e-05, + "loss": 2.4458, + "step": 14786 + }, + { + "epoch": 1.1933661528528772, + "grad_norm": 0.7281817197799683, + "learning_rate": 3.2018554340419004e-05, + "loss": 2.3772, + "step": 14787 + }, + { + "epoch": 1.19344685658946, + "grad_norm": 0.6956086754798889, + "learning_rate": 3.200697732708619e-05, + "loss": 2.4316, + "step": 14788 + }, + { + "epoch": 1.1935275603260431, + "grad_norm": 0.7679805159568787, + "learning_rate": 3.199540200831729e-05, + "loss": 2.4464, + "step": 14789 + }, + { + "epoch": 1.1936082640626262, + "grad_norm": 0.6993041634559631, + "learning_rate": 3.19838283844007e-05, + "loss": 2.3881, + "step": 14790 + }, + { + "epoch": 1.193688967799209, + "grad_norm": 0.689618706703186, + "learning_rate": 3.197225645562493e-05, + "loss": 2.4184, + "step": 14791 + }, + { + "epoch": 1.1937696715357922, + "grad_norm": 0.6896520853042603, + "learning_rate": 3.1960686222278354e-05, + "loss": 2.4484, + "step": 14792 + }, + { + "epoch": 1.193850375272375, + "grad_norm": 0.6743811368942261, + "learning_rate": 3.1949117684649334e-05, + "loss": 2.4636, + "step": 14793 + }, + { + "epoch": 1.1939310790089581, + "grad_norm": 0.7028046250343323, + "learning_rate": 3.1937550843026163e-05, + "loss": 2.4576, + "step": 14794 + }, + { + "epoch": 1.1940117827455412, + "grad_norm": 0.7219679951667786, + "learning_rate": 3.192598569769718e-05, + "loss": 2.4495, + "step": 14795 + }, + { + "epoch": 1.194092486482124, + "grad_norm": 0.731438159942627, + "learning_rate": 3.191442224895056e-05, + "loss": 2.4699, + "step": 14796 + }, + { + "epoch": 1.1941731902187072, + "grad_norm": 0.6731431484222412, + "learning_rate": 3.19028604970745e-05, + "loss": 2.4292, + "step": 14797 + }, + { + "epoch": 1.19425389395529, + "grad_norm": 0.6720147728919983, + "learning_rate": 3.1891300442357174e-05, + "loss": 2.4482, + "step": 14798 + }, + { + "epoch": 1.1943345976918731, + "grad_norm": 0.7504273653030396, + "learning_rate": 3.187974208508667e-05, + "loss": 2.4233, + "step": 14799 + }, + { + "epoch": 1.1944153014284562, + "grad_norm": 0.6882641315460205, + "learning_rate": 3.186818542555108e-05, + "loss": 2.4633, + "step": 14800 + }, + { + "epoch": 1.194496005165039, + "grad_norm": 0.7337899208068848, + "learning_rate": 3.1856630464038385e-05, + "loss": 2.4257, + "step": 14801 + }, + { + "epoch": 1.1945767089016222, + "grad_norm": 0.7026493549346924, + "learning_rate": 3.1845077200836636e-05, + "loss": 2.482, + "step": 14802 + }, + { + "epoch": 1.1946574126382052, + "grad_norm": 0.763351321220398, + "learning_rate": 3.1833525636233675e-05, + "loss": 2.4428, + "step": 14803 + }, + { + "epoch": 1.194738116374788, + "grad_norm": 0.6568076610565186, + "learning_rate": 3.182197577051745e-05, + "loss": 2.4373, + "step": 14804 + }, + { + "epoch": 1.1948188201113712, + "grad_norm": 0.6954717040061951, + "learning_rate": 3.1810427603975844e-05, + "loss": 2.4582, + "step": 14805 + }, + { + "epoch": 1.1948995238479543, + "grad_norm": 0.7130215167999268, + "learning_rate": 3.179888113689661e-05, + "loss": 2.443, + "step": 14806 + }, + { + "epoch": 1.1949802275845371, + "grad_norm": 0.6789865493774414, + "learning_rate": 3.178733636956752e-05, + "loss": 2.4138, + "step": 14807 + }, + { + "epoch": 1.1950609313211202, + "grad_norm": 0.7725361585617065, + "learning_rate": 3.177579330227633e-05, + "loss": 2.4783, + "step": 14808 + }, + { + "epoch": 1.195141635057703, + "grad_norm": 0.6952371001243591, + "learning_rate": 3.17642519353107e-05, + "loss": 2.4571, + "step": 14809 + }, + { + "epoch": 1.1952223387942862, + "grad_norm": 0.7541885375976562, + "learning_rate": 3.1752712268958275e-05, + "loss": 2.4075, + "step": 14810 + }, + { + "epoch": 1.1953030425308693, + "grad_norm": 0.6974624395370483, + "learning_rate": 3.174117430350671e-05, + "loss": 2.4525, + "step": 14811 + }, + { + "epoch": 1.1953837462674521, + "grad_norm": 0.7293709516525269, + "learning_rate": 3.172963803924347e-05, + "loss": 2.4646, + "step": 14812 + }, + { + "epoch": 1.1954644500040352, + "grad_norm": 0.6944144368171692, + "learning_rate": 3.1718103476456106e-05, + "loss": 2.462, + "step": 14813 + }, + { + "epoch": 1.195545153740618, + "grad_norm": 0.6415363550186157, + "learning_rate": 3.170657061543214e-05, + "loss": 2.4086, + "step": 14814 + }, + { + "epoch": 1.1956258574772012, + "grad_norm": 0.6511349081993103, + "learning_rate": 3.169503945645892e-05, + "loss": 2.4376, + "step": 14815 + }, + { + "epoch": 1.1957065612137843, + "grad_norm": 0.7420210242271423, + "learning_rate": 3.1683509999823854e-05, + "loss": 2.4317, + "step": 14816 + }, + { + "epoch": 1.1957872649503671, + "grad_norm": 0.7291967272758484, + "learning_rate": 3.1671982245814316e-05, + "loss": 2.4369, + "step": 14817 + }, + { + "epoch": 1.1958679686869502, + "grad_norm": 0.685743510723114, + "learning_rate": 3.166045619471758e-05, + "loss": 2.465, + "step": 14818 + }, + { + "epoch": 1.1959486724235333, + "grad_norm": 0.7130060195922852, + "learning_rate": 3.164893184682093e-05, + "loss": 2.4305, + "step": 14819 + }, + { + "epoch": 1.1960293761601162, + "grad_norm": 0.694508969783783, + "learning_rate": 3.163740920241156e-05, + "loss": 2.4278, + "step": 14820 + }, + { + "epoch": 1.1961100798966993, + "grad_norm": 0.6478514075279236, + "learning_rate": 3.162588826177669e-05, + "loss": 2.4721, + "step": 14821 + }, + { + "epoch": 1.1961907836332821, + "grad_norm": 0.6586465835571289, + "learning_rate": 3.1614369025203386e-05, + "loss": 2.4716, + "step": 14822 + }, + { + "epoch": 1.1962714873698652, + "grad_norm": 0.7558106184005737, + "learning_rate": 3.160285149297876e-05, + "loss": 2.4656, + "step": 14823 + }, + { + "epoch": 1.1963521911064483, + "grad_norm": 0.7208340764045715, + "learning_rate": 3.1591335665389896e-05, + "loss": 2.4374, + "step": 14824 + }, + { + "epoch": 1.1964328948430312, + "grad_norm": 0.70301353931427, + "learning_rate": 3.157982154272375e-05, + "loss": 2.397, + "step": 14825 + }, + { + "epoch": 1.1965135985796143, + "grad_norm": 0.6857609152793884, + "learning_rate": 3.15683091252673e-05, + "loss": 2.4258, + "step": 14826 + }, + { + "epoch": 1.1965943023161971, + "grad_norm": 0.6954602003097534, + "learning_rate": 3.155679841330747e-05, + "loss": 2.4566, + "step": 14827 + }, + { + "epoch": 1.1966750060527802, + "grad_norm": 0.6923913955688477, + "learning_rate": 3.154528940713113e-05, + "loss": 2.4, + "step": 14828 + }, + { + "epoch": 1.1967557097893633, + "grad_norm": 0.6641134023666382, + "learning_rate": 3.1533782107025124e-05, + "loss": 2.4721, + "step": 14829 + }, + { + "epoch": 1.1968364135259462, + "grad_norm": 0.7470134496688843, + "learning_rate": 3.152227651327627e-05, + "loss": 2.4253, + "step": 14830 + }, + { + "epoch": 1.1969171172625293, + "grad_norm": 0.7234545350074768, + "learning_rate": 3.151077262617126e-05, + "loss": 2.4109, + "step": 14831 + }, + { + "epoch": 1.1969978209991123, + "grad_norm": 0.7814013957977295, + "learning_rate": 3.149927044599682e-05, + "loss": 2.4522, + "step": 14832 + }, + { + "epoch": 1.1970785247356952, + "grad_norm": 0.6825435161590576, + "learning_rate": 3.1487769973039624e-05, + "loss": 2.4728, + "step": 14833 + }, + { + "epoch": 1.1971592284722783, + "grad_norm": 0.7091361880302429, + "learning_rate": 3.147627120758634e-05, + "loss": 2.4615, + "step": 14834 + }, + { + "epoch": 1.1972399322088614, + "grad_norm": 0.7271433472633362, + "learning_rate": 3.146477414992346e-05, + "loss": 2.4154, + "step": 14835 + }, + { + "epoch": 1.1973206359454442, + "grad_norm": 0.6557306051254272, + "learning_rate": 3.145327880033756e-05, + "loss": 2.4348, + "step": 14836 + }, + { + "epoch": 1.1974013396820273, + "grad_norm": 0.6667891144752502, + "learning_rate": 3.1441785159115166e-05, + "loss": 2.4123, + "step": 14837 + }, + { + "epoch": 1.1974820434186102, + "grad_norm": 0.6755266189575195, + "learning_rate": 3.143029322654266e-05, + "loss": 2.4287, + "step": 14838 + }, + { + "epoch": 1.1975627471551933, + "grad_norm": 0.7647396922111511, + "learning_rate": 3.1418803002906475e-05, + "loss": 2.4343, + "step": 14839 + }, + { + "epoch": 1.1976434508917764, + "grad_norm": 0.7288243174552917, + "learning_rate": 3.140731448849305e-05, + "loss": 2.4536, + "step": 14840 + }, + { + "epoch": 1.1977241546283592, + "grad_norm": 0.6126244068145752, + "learning_rate": 3.1395827683588605e-05, + "loss": 2.4187, + "step": 14841 + }, + { + "epoch": 1.1978048583649423, + "grad_norm": 0.6773896217346191, + "learning_rate": 3.138434258847948e-05, + "loss": 2.3916, + "step": 14842 + }, + { + "epoch": 1.1978855621015252, + "grad_norm": 0.724413275718689, + "learning_rate": 3.1372859203451934e-05, + "loss": 2.4614, + "step": 14843 + }, + { + "epoch": 1.1979662658381083, + "grad_norm": 0.7043039798736572, + "learning_rate": 3.136137752879209e-05, + "loss": 2.4343, + "step": 14844 + }, + { + "epoch": 1.1980469695746914, + "grad_norm": 0.7543383240699768, + "learning_rate": 3.134989756478615e-05, + "loss": 2.4345, + "step": 14845 + }, + { + "epoch": 1.1981276733112742, + "grad_norm": 0.7193408608436584, + "learning_rate": 3.1338419311720244e-05, + "loss": 2.4728, + "step": 14846 + }, + { + "epoch": 1.1982083770478573, + "grad_norm": 0.8090186715126038, + "learning_rate": 3.132694276988038e-05, + "loss": 2.4246, + "step": 14847 + }, + { + "epoch": 1.1982890807844404, + "grad_norm": 0.7154600620269775, + "learning_rate": 3.131546793955261e-05, + "loss": 2.4061, + "step": 14848 + }, + { + "epoch": 1.1983697845210233, + "grad_norm": 0.6987032890319824, + "learning_rate": 3.130399482102293e-05, + "loss": 2.4525, + "step": 14849 + }, + { + "epoch": 1.1984504882576064, + "grad_norm": 0.7123507261276245, + "learning_rate": 3.129252341457727e-05, + "loss": 2.4017, + "step": 14850 + }, + { + "epoch": 1.1985311919941894, + "grad_norm": 0.6475987434387207, + "learning_rate": 3.128105372050153e-05, + "loss": 2.4617, + "step": 14851 + }, + { + "epoch": 1.1986118957307723, + "grad_norm": 0.6799046993255615, + "learning_rate": 3.126958573908156e-05, + "loss": 2.4337, + "step": 14852 + }, + { + "epoch": 1.1986925994673554, + "grad_norm": 0.6910607218742371, + "learning_rate": 3.125811947060322e-05, + "loss": 2.415, + "step": 14853 + }, + { + "epoch": 1.1987733032039383, + "grad_norm": 0.6879963278770447, + "learning_rate": 3.124665491535219e-05, + "loss": 2.4912, + "step": 14854 + }, + { + "epoch": 1.1988540069405214, + "grad_norm": 0.7038810849189758, + "learning_rate": 3.123519207361425e-05, + "loss": 2.4528, + "step": 14855 + }, + { + "epoch": 1.1989347106771044, + "grad_norm": 0.6771957278251648, + "learning_rate": 3.1223730945675104e-05, + "loss": 2.4524, + "step": 14856 + }, + { + "epoch": 1.1990154144136873, + "grad_norm": 0.7529320120811462, + "learning_rate": 3.1212271531820336e-05, + "loss": 2.4667, + "step": 14857 + }, + { + "epoch": 1.1990961181502704, + "grad_norm": 0.6498474478721619, + "learning_rate": 3.1200813832335574e-05, + "loss": 2.3863, + "step": 14858 + }, + { + "epoch": 1.1991768218868533, + "grad_norm": 0.7587705850601196, + "learning_rate": 3.1189357847506383e-05, + "loss": 2.4962, + "step": 14859 + }, + { + "epoch": 1.1992575256234363, + "grad_norm": 0.674013078212738, + "learning_rate": 3.117790357761825e-05, + "loss": 2.3939, + "step": 14860 + }, + { + "epoch": 1.1993382293600194, + "grad_norm": 0.6546844840049744, + "learning_rate": 3.116645102295668e-05, + "loss": 2.4775, + "step": 14861 + }, + { + "epoch": 1.1994189330966023, + "grad_norm": 0.7558320760726929, + "learning_rate": 3.11550001838071e-05, + "loss": 2.3918, + "step": 14862 + }, + { + "epoch": 1.1994996368331854, + "grad_norm": 0.7074883580207825, + "learning_rate": 3.114355106045486e-05, + "loss": 2.3969, + "step": 14863 + }, + { + "epoch": 1.1995803405697685, + "grad_norm": 0.706078290939331, + "learning_rate": 3.1132103653185305e-05, + "loss": 2.5028, + "step": 14864 + }, + { + "epoch": 1.1996610443063513, + "grad_norm": 0.6883544921875, + "learning_rate": 3.1120657962283764e-05, + "loss": 2.4407, + "step": 14865 + }, + { + "epoch": 1.1997417480429344, + "grad_norm": 0.6905466914176941, + "learning_rate": 3.110921398803551e-05, + "loss": 2.3893, + "step": 14866 + }, + { + "epoch": 1.1998224517795173, + "grad_norm": 0.6584910154342651, + "learning_rate": 3.109777173072569e-05, + "loss": 2.4515, + "step": 14867 + }, + { + "epoch": 1.1999031555161004, + "grad_norm": 0.6957471370697021, + "learning_rate": 3.108633119063951e-05, + "loss": 2.4483, + "step": 14868 + }, + { + "epoch": 1.1999838592526835, + "grad_norm": 0.6716276407241821, + "learning_rate": 3.1074892368062095e-05, + "loss": 2.4298, + "step": 14869 + }, + { + "epoch": 1.2000645629892663, + "grad_norm": 0.7350820302963257, + "learning_rate": 3.1063455263278543e-05, + "loss": 2.4088, + "step": 14870 + }, + { + "epoch": 1.2001452667258494, + "grad_norm": 0.7409771680831909, + "learning_rate": 3.105201987657388e-05, + "loss": 2.4089, + "step": 14871 + }, + { + "epoch": 1.2002259704624323, + "grad_norm": 0.7273266911506653, + "learning_rate": 3.104058620823315e-05, + "loss": 2.5149, + "step": 14872 + }, + { + "epoch": 1.2003066741990154, + "grad_norm": 0.6793962717056274, + "learning_rate": 3.102915425854124e-05, + "loss": 2.4422, + "step": 14873 + }, + { + "epoch": 1.2003873779355985, + "grad_norm": 0.72386234998703, + "learning_rate": 3.101772402778309e-05, + "loss": 2.4756, + "step": 14874 + }, + { + "epoch": 1.2004680816721813, + "grad_norm": 0.6530055999755859, + "learning_rate": 3.1006295516243625e-05, + "loss": 2.4145, + "step": 14875 + }, + { + "epoch": 1.2005487854087644, + "grad_norm": 0.7288365960121155, + "learning_rate": 3.099486872420758e-05, + "loss": 2.4565, + "step": 14876 + }, + { + "epoch": 1.2006294891453475, + "grad_norm": 0.6982102394104004, + "learning_rate": 3.09834436519598e-05, + "loss": 2.4788, + "step": 14877 + }, + { + "epoch": 1.2007101928819304, + "grad_norm": 0.7208256125450134, + "learning_rate": 3.0972020299785007e-05, + "loss": 2.4186, + "step": 14878 + }, + { + "epoch": 1.2007908966185135, + "grad_norm": 0.6928278803825378, + "learning_rate": 3.096059866796791e-05, + "loss": 2.4177, + "step": 14879 + }, + { + "epoch": 1.2008716003550965, + "grad_norm": 0.7145438194274902, + "learning_rate": 3.094917875679317e-05, + "loss": 2.4796, + "step": 14880 + }, + { + "epoch": 1.2009523040916794, + "grad_norm": 0.7126322388648987, + "learning_rate": 3.093776056654539e-05, + "loss": 2.4926, + "step": 14881 + }, + { + "epoch": 1.2010330078282625, + "grad_norm": 0.7775046825408936, + "learning_rate": 3.092634409750919e-05, + "loss": 2.4386, + "step": 14882 + }, + { + "epoch": 1.2011137115648454, + "grad_norm": 0.6387330889701843, + "learning_rate": 3.091492934996901e-05, + "loss": 2.4302, + "step": 14883 + }, + { + "epoch": 1.2011944153014285, + "grad_norm": 0.6883525252342224, + "learning_rate": 3.090351632420939e-05, + "loss": 2.4644, + "step": 14884 + }, + { + "epoch": 1.2012751190380115, + "grad_norm": 0.6698900461196899, + "learning_rate": 3.0892105020514795e-05, + "loss": 2.414, + "step": 14885 + }, + { + "epoch": 1.2013558227745944, + "grad_norm": 0.7124409079551697, + "learning_rate": 3.088069543916956e-05, + "loss": 2.4275, + "step": 14886 + }, + { + "epoch": 1.2014365265111775, + "grad_norm": 0.6996601223945618, + "learning_rate": 3.0869287580458076e-05, + "loss": 2.4725, + "step": 14887 + }, + { + "epoch": 1.2015172302477604, + "grad_norm": 0.653087317943573, + "learning_rate": 3.085788144466468e-05, + "loss": 2.383, + "step": 14888 + }, + { + "epoch": 1.2015979339843434, + "grad_norm": 0.7426899671554565, + "learning_rate": 3.0846477032073554e-05, + "loss": 2.4064, + "step": 14889 + }, + { + "epoch": 1.2016786377209265, + "grad_norm": 0.6417646408081055, + "learning_rate": 3.083507434296903e-05, + "loss": 2.3964, + "step": 14890 + }, + { + "epoch": 1.2017593414575094, + "grad_norm": 0.6301923394203186, + "learning_rate": 3.0823673377635274e-05, + "loss": 2.4285, + "step": 14891 + }, + { + "epoch": 1.2018400451940925, + "grad_norm": 0.7621259093284607, + "learning_rate": 3.081227413635638e-05, + "loss": 2.4731, + "step": 14892 + }, + { + "epoch": 1.2019207489306756, + "grad_norm": 0.6637598872184753, + "learning_rate": 3.080087661941648e-05, + "loss": 2.4126, + "step": 14893 + }, + { + "epoch": 1.2020014526672584, + "grad_norm": 0.6820287108421326, + "learning_rate": 3.078948082709964e-05, + "loss": 2.4108, + "step": 14894 + }, + { + "epoch": 1.2020821564038415, + "grad_norm": 0.7090989351272583, + "learning_rate": 3.077808675968983e-05, + "loss": 2.4678, + "step": 14895 + }, + { + "epoch": 1.2021628601404246, + "grad_norm": 0.7242181897163391, + "learning_rate": 3.076669441747105e-05, + "loss": 2.5346, + "step": 14896 + }, + { + "epoch": 1.2022435638770075, + "grad_norm": 0.7790088653564453, + "learning_rate": 3.075530380072722e-05, + "loss": 2.4436, + "step": 14897 + }, + { + "epoch": 1.2023242676135906, + "grad_norm": 0.6828821301460266, + "learning_rate": 3.074391490974225e-05, + "loss": 2.3767, + "step": 14898 + }, + { + "epoch": 1.2024049713501734, + "grad_norm": 0.709815502166748, + "learning_rate": 3.0732527744799945e-05, + "loss": 2.4139, + "step": 14899 + }, + { + "epoch": 1.2024856750867565, + "grad_norm": 0.6561180353164673, + "learning_rate": 3.07211423061841e-05, + "loss": 2.399, + "step": 14900 + }, + { + "epoch": 1.2025663788233396, + "grad_norm": 0.7122004628181458, + "learning_rate": 3.0709758594178495e-05, + "loss": 2.4314, + "step": 14901 + }, + { + "epoch": 1.2026470825599225, + "grad_norm": 0.6817516684532166, + "learning_rate": 3.0698376609066825e-05, + "loss": 2.4241, + "step": 14902 + }, + { + "epoch": 1.2027277862965056, + "grad_norm": 0.6848475337028503, + "learning_rate": 3.068699635113277e-05, + "loss": 2.4583, + "step": 14903 + }, + { + "epoch": 1.2028084900330884, + "grad_norm": 0.6567823886871338, + "learning_rate": 3.067561782065999e-05, + "loss": 2.3818, + "step": 14904 + }, + { + "epoch": 1.2028891937696715, + "grad_norm": 0.7373961806297302, + "learning_rate": 3.066424101793198e-05, + "loss": 2.4075, + "step": 14905 + }, + { + "epoch": 1.2029698975062546, + "grad_norm": 0.6968079209327698, + "learning_rate": 3.0652865943232346e-05, + "loss": 2.4701, + "step": 14906 + }, + { + "epoch": 1.2030506012428375, + "grad_norm": 0.7356292009353638, + "learning_rate": 3.064149259684459e-05, + "loss": 2.4188, + "step": 14907 + }, + { + "epoch": 1.2031313049794206, + "grad_norm": 0.7144857048988342, + "learning_rate": 3.063012097905211e-05, + "loss": 2.4411, + "step": 14908 + }, + { + "epoch": 1.2032120087160036, + "grad_norm": 0.734531044960022, + "learning_rate": 3.0618751090138365e-05, + "loss": 2.4595, + "step": 14909 + }, + { + "epoch": 1.2032927124525865, + "grad_norm": 0.6658234000205994, + "learning_rate": 3.060738293038669e-05, + "loss": 2.4206, + "step": 14910 + }, + { + "epoch": 1.2033734161891696, + "grad_norm": 0.678424596786499, + "learning_rate": 3.059601650008044e-05, + "loss": 2.4704, + "step": 14911 + }, + { + "epoch": 1.2034541199257527, + "grad_norm": 0.6852440237998962, + "learning_rate": 3.058465179950287e-05, + "loss": 2.46, + "step": 14912 + }, + { + "epoch": 1.2035348236623356, + "grad_norm": 0.702881395816803, + "learning_rate": 3.057328882893724e-05, + "loss": 2.4372, + "step": 14913 + }, + { + "epoch": 1.2036155273989186, + "grad_norm": 0.6978999972343445, + "learning_rate": 3.056192758866676e-05, + "loss": 2.401, + "step": 14914 + }, + { + "epoch": 1.2036962311355015, + "grad_norm": 0.7070993185043335, + "learning_rate": 3.055056807897454e-05, + "loss": 2.3967, + "step": 14915 + }, + { + "epoch": 1.2037769348720846, + "grad_norm": 0.7159305810928345, + "learning_rate": 3.0539210300143693e-05, + "loss": 2.4388, + "step": 14916 + }, + { + "epoch": 1.2038576386086675, + "grad_norm": 0.6920869946479797, + "learning_rate": 3.0527854252457333e-05, + "loss": 2.441, + "step": 14917 + }, + { + "epoch": 1.2039383423452505, + "grad_norm": 0.7014884352684021, + "learning_rate": 3.0516499936198417e-05, + "loss": 2.4115, + "step": 14918 + }, + { + "epoch": 1.2040190460818336, + "grad_norm": 0.6754150986671448, + "learning_rate": 3.0505147351649955e-05, + "loss": 2.3722, + "step": 14919 + }, + { + "epoch": 1.2040997498184165, + "grad_norm": 0.7681791186332703, + "learning_rate": 3.0493796499094874e-05, + "loss": 2.4331, + "step": 14920 + }, + { + "epoch": 1.2041804535549996, + "grad_norm": 0.7265221476554871, + "learning_rate": 3.0482447378816082e-05, + "loss": 2.4806, + "step": 14921 + }, + { + "epoch": 1.2042611572915827, + "grad_norm": 0.6841520667076111, + "learning_rate": 3.047109999109642e-05, + "loss": 2.3896, + "step": 14922 + }, + { + "epoch": 1.2043418610281655, + "grad_norm": 0.746347963809967, + "learning_rate": 3.0459754336218737e-05, + "loss": 2.4081, + "step": 14923 + }, + { + "epoch": 1.2044225647647486, + "grad_norm": 0.6679818034172058, + "learning_rate": 3.0448410414465712e-05, + "loss": 2.4206, + "step": 14924 + }, + { + "epoch": 1.2045032685013317, + "grad_norm": 0.7122265100479126, + "learning_rate": 3.0437068226120114e-05, + "loss": 2.4217, + "step": 14925 + }, + { + "epoch": 1.2045839722379146, + "grad_norm": 0.7023499011993408, + "learning_rate": 3.0425727771464618e-05, + "loss": 2.4597, + "step": 14926 + }, + { + "epoch": 1.2046646759744977, + "grad_norm": 0.7304259538650513, + "learning_rate": 3.0414389050781876e-05, + "loss": 2.4915, + "step": 14927 + }, + { + "epoch": 1.2047453797110805, + "grad_norm": 0.7209908962249756, + "learning_rate": 3.0403052064354442e-05, + "loss": 2.4163, + "step": 14928 + }, + { + "epoch": 1.2048260834476636, + "grad_norm": 0.7367275953292847, + "learning_rate": 3.0391716812464865e-05, + "loss": 2.4192, + "step": 14929 + }, + { + "epoch": 1.2049067871842467, + "grad_norm": 0.6576591730117798, + "learning_rate": 3.0380383295395674e-05, + "loss": 2.4606, + "step": 14930 + }, + { + "epoch": 1.2049874909208296, + "grad_norm": 0.7082500457763672, + "learning_rate": 3.0369051513429315e-05, + "loss": 2.4079, + "step": 14931 + }, + { + "epoch": 1.2050681946574127, + "grad_norm": 0.6770346760749817, + "learning_rate": 3.03577214668482e-05, + "loss": 2.45, + "step": 14932 + }, + { + "epoch": 1.2051488983939955, + "grad_norm": 0.6979790925979614, + "learning_rate": 3.034639315593476e-05, + "loss": 2.3966, + "step": 14933 + }, + { + "epoch": 1.2052296021305786, + "grad_norm": 0.6863394975662231, + "learning_rate": 3.033506658097124e-05, + "loss": 2.4637, + "step": 14934 + }, + { + "epoch": 1.2053103058671617, + "grad_norm": 0.7522799372673035, + "learning_rate": 3.0323741742239963e-05, + "loss": 2.4585, + "step": 14935 + }, + { + "epoch": 1.2053910096037446, + "grad_norm": 0.7119878530502319, + "learning_rate": 3.031241864002321e-05, + "loss": 2.4473, + "step": 14936 + }, + { + "epoch": 1.2054717133403277, + "grad_norm": 0.690861701965332, + "learning_rate": 3.030109727460312e-05, + "loss": 2.4564, + "step": 14937 + }, + { + "epoch": 1.2055524170769107, + "grad_norm": 0.6825447082519531, + "learning_rate": 3.0289777646261886e-05, + "loss": 2.4511, + "step": 14938 + }, + { + "epoch": 1.2056331208134936, + "grad_norm": 0.7404600977897644, + "learning_rate": 3.027845975528164e-05, + "loss": 2.4461, + "step": 14939 + }, + { + "epoch": 1.2057138245500767, + "grad_norm": 0.6871766448020935, + "learning_rate": 3.026714360194437e-05, + "loss": 2.4486, + "step": 14940 + }, + { + "epoch": 1.2057945282866598, + "grad_norm": 0.6646476984024048, + "learning_rate": 3.02558291865322e-05, + "loss": 2.378, + "step": 14941 + }, + { + "epoch": 1.2058752320232426, + "grad_norm": 0.6998385787010193, + "learning_rate": 3.024451650932707e-05, + "loss": 2.4646, + "step": 14942 + }, + { + "epoch": 1.2059559357598257, + "grad_norm": 0.6763097047805786, + "learning_rate": 3.023320557061098e-05, + "loss": 2.3971, + "step": 14943 + }, + { + "epoch": 1.2060366394964086, + "grad_norm": 0.7409633994102478, + "learning_rate": 3.0221896370665736e-05, + "loss": 2.4405, + "step": 14944 + }, + { + "epoch": 1.2061173432329917, + "grad_norm": 0.6972076892852783, + "learning_rate": 3.0210588909773242e-05, + "loss": 2.3935, + "step": 14945 + }, + { + "epoch": 1.2061980469695748, + "grad_norm": 0.6898512840270996, + "learning_rate": 3.0199283188215333e-05, + "loss": 2.4173, + "step": 14946 + }, + { + "epoch": 1.2062787507061576, + "grad_norm": 0.6878097057342529, + "learning_rate": 3.0187979206273707e-05, + "loss": 2.44, + "step": 14947 + }, + { + "epoch": 1.2063594544427407, + "grad_norm": 0.6629695296287537, + "learning_rate": 3.0176676964230143e-05, + "loss": 2.3836, + "step": 14948 + }, + { + "epoch": 1.2064401581793236, + "grad_norm": 0.717654824256897, + "learning_rate": 3.0165376462366336e-05, + "loss": 2.415, + "step": 14949 + }, + { + "epoch": 1.2065208619159067, + "grad_norm": 0.7526129484176636, + "learning_rate": 3.0154077700963867e-05, + "loss": 2.4985, + "step": 14950 + }, + { + "epoch": 1.2066015656524898, + "grad_norm": 0.6867300271987915, + "learning_rate": 3.014278068030435e-05, + "loss": 2.395, + "step": 14951 + }, + { + "epoch": 1.2066822693890726, + "grad_norm": 0.7321466207504272, + "learning_rate": 3.0131485400669356e-05, + "loss": 2.4503, + "step": 14952 + }, + { + "epoch": 1.2067629731256557, + "grad_norm": 0.6915534734725952, + "learning_rate": 3.0120191862340387e-05, + "loss": 2.398, + "step": 14953 + }, + { + "epoch": 1.2068436768622388, + "grad_norm": 0.7017377018928528, + "learning_rate": 3.01089000655989e-05, + "loss": 2.4367, + "step": 14954 + }, + { + "epoch": 1.2069243805988217, + "grad_norm": 0.7032245397567749, + "learning_rate": 3.0097610010726353e-05, + "loss": 2.4078, + "step": 14955 + }, + { + "epoch": 1.2070050843354048, + "grad_norm": 0.6795478463172913, + "learning_rate": 3.008632169800406e-05, + "loss": 2.3508, + "step": 14956 + }, + { + "epoch": 1.2070857880719879, + "grad_norm": 0.7149559855461121, + "learning_rate": 3.007503512771339e-05, + "loss": 2.4023, + "step": 14957 + }, + { + "epoch": 1.2071664918085707, + "grad_norm": 0.724756121635437, + "learning_rate": 3.006375030013563e-05, + "loss": 2.4439, + "step": 14958 + }, + { + "epoch": 1.2072471955451538, + "grad_norm": 0.7233348488807678, + "learning_rate": 3.005246721555205e-05, + "loss": 2.3819, + "step": 14959 + }, + { + "epoch": 1.2073278992817367, + "grad_norm": 0.700322151184082, + "learning_rate": 3.0041185874243815e-05, + "loss": 2.4222, + "step": 14960 + }, + { + "epoch": 1.2074086030183198, + "grad_norm": 0.7268145680427551, + "learning_rate": 3.002990627649209e-05, + "loss": 2.4698, + "step": 14961 + }, + { + "epoch": 1.2074893067549028, + "grad_norm": 0.6885111331939697, + "learning_rate": 3.001862842257801e-05, + "loss": 2.4505, + "step": 14962 + }, + { + "epoch": 1.2075700104914857, + "grad_norm": 0.7237974405288696, + "learning_rate": 3.0007352312782632e-05, + "loss": 2.422, + "step": 14963 + }, + { + "epoch": 1.2076507142280688, + "grad_norm": 0.7214741110801697, + "learning_rate": 2.9996077947387015e-05, + "loss": 2.4428, + "step": 14964 + }, + { + "epoch": 1.2077314179646517, + "grad_norm": 0.7264460921287537, + "learning_rate": 2.998480532667215e-05, + "loss": 2.4669, + "step": 14965 + }, + { + "epoch": 1.2078121217012348, + "grad_norm": 0.7055517435073853, + "learning_rate": 2.9973534450918928e-05, + "loss": 2.5082, + "step": 14966 + }, + { + "epoch": 1.2078928254378178, + "grad_norm": 0.6886781454086304, + "learning_rate": 2.9962265320408268e-05, + "loss": 2.4697, + "step": 14967 + }, + { + "epoch": 1.2079735291744007, + "grad_norm": 0.6875878572463989, + "learning_rate": 2.9950997935421076e-05, + "loss": 2.4384, + "step": 14968 + }, + { + "epoch": 1.2080542329109838, + "grad_norm": 0.7586886882781982, + "learning_rate": 2.99397322962381e-05, + "loss": 2.4088, + "step": 14969 + }, + { + "epoch": 1.2081349366475669, + "grad_norm": 0.6744365096092224, + "learning_rate": 2.992846840314013e-05, + "loss": 2.4109, + "step": 14970 + }, + { + "epoch": 1.2082156403841497, + "grad_norm": 0.6589661240577698, + "learning_rate": 2.9917206256407893e-05, + "loss": 2.4386, + "step": 14971 + }, + { + "epoch": 1.2082963441207328, + "grad_norm": 0.6787264943122864, + "learning_rate": 2.990594585632208e-05, + "loss": 2.401, + "step": 14972 + }, + { + "epoch": 1.2083770478573157, + "grad_norm": 0.710517406463623, + "learning_rate": 2.9894687203163317e-05, + "loss": 2.4813, + "step": 14973 + }, + { + "epoch": 1.2084577515938988, + "grad_norm": 0.676110029220581, + "learning_rate": 2.988343029721221e-05, + "loss": 2.4654, + "step": 14974 + }, + { + "epoch": 1.2085384553304819, + "grad_norm": 0.6940518617630005, + "learning_rate": 2.9872175138749336e-05, + "loss": 2.4188, + "step": 14975 + }, + { + "epoch": 1.2086191590670647, + "grad_norm": 0.6849910020828247, + "learning_rate": 2.9860921728055147e-05, + "loss": 2.384, + "step": 14976 + }, + { + "epoch": 1.2086998628036478, + "grad_norm": 0.6902467608451843, + "learning_rate": 2.9849670065410128e-05, + "loss": 2.4364, + "step": 14977 + }, + { + "epoch": 1.2087805665402307, + "grad_norm": 0.6742224097251892, + "learning_rate": 2.9838420151094747e-05, + "loss": 2.5085, + "step": 14978 + }, + { + "epoch": 1.2088612702768138, + "grad_norm": 0.6635094285011292, + "learning_rate": 2.9827171985389303e-05, + "loss": 2.3635, + "step": 14979 + }, + { + "epoch": 1.2089419740133969, + "grad_norm": 0.7189158201217651, + "learning_rate": 2.9815925568574165e-05, + "loss": 2.458, + "step": 14980 + }, + { + "epoch": 1.2090226777499797, + "grad_norm": 0.7370143532752991, + "learning_rate": 2.9804680900929628e-05, + "loss": 2.4543, + "step": 14981 + }, + { + "epoch": 1.2091033814865628, + "grad_norm": 0.7410217523574829, + "learning_rate": 2.979343798273593e-05, + "loss": 2.4537, + "step": 14982 + }, + { + "epoch": 1.209184085223146, + "grad_norm": 0.7525770664215088, + "learning_rate": 2.9782196814273277e-05, + "loss": 2.5147, + "step": 14983 + }, + { + "epoch": 1.2092647889597288, + "grad_norm": 0.7302291393280029, + "learning_rate": 2.9770957395821863e-05, + "loss": 2.4711, + "step": 14984 + }, + { + "epoch": 1.2093454926963119, + "grad_norm": 0.7154920101165771, + "learning_rate": 2.975971972766175e-05, + "loss": 2.5224, + "step": 14985 + }, + { + "epoch": 1.209426196432895, + "grad_norm": 0.6827684640884399, + "learning_rate": 2.9748483810073025e-05, + "loss": 2.4477, + "step": 14986 + }, + { + "epoch": 1.2095069001694778, + "grad_norm": 0.7753484845161438, + "learning_rate": 2.973724964333575e-05, + "loss": 2.4257, + "step": 14987 + }, + { + "epoch": 1.209587603906061, + "grad_norm": 0.7146809101104736, + "learning_rate": 2.9726017227729862e-05, + "loss": 2.3953, + "step": 14988 + }, + { + "epoch": 1.2096683076426438, + "grad_norm": 0.7360730767250061, + "learning_rate": 2.9714786563535313e-05, + "loss": 2.3774, + "step": 14989 + }, + { + "epoch": 1.2097490113792269, + "grad_norm": 0.7159923911094666, + "learning_rate": 2.970355765103201e-05, + "loss": 2.4068, + "step": 14990 + }, + { + "epoch": 1.20982971511581, + "grad_norm": 0.6732171773910522, + "learning_rate": 2.969233049049982e-05, + "loss": 2.4215, + "step": 14991 + }, + { + "epoch": 1.2099104188523928, + "grad_norm": 0.749812126159668, + "learning_rate": 2.968110508221853e-05, + "loss": 2.4415, + "step": 14992 + }, + { + "epoch": 1.209991122588976, + "grad_norm": 0.7185530662536621, + "learning_rate": 2.9669881426467916e-05, + "loss": 2.4536, + "step": 14993 + }, + { + "epoch": 1.2100718263255588, + "grad_norm": 0.6757143139839172, + "learning_rate": 2.9658659523527733e-05, + "loss": 2.3892, + "step": 14994 + }, + { + "epoch": 1.2101525300621419, + "grad_norm": 0.7187495231628418, + "learning_rate": 2.96474393736776e-05, + "loss": 2.434, + "step": 14995 + }, + { + "epoch": 1.210233233798725, + "grad_norm": 0.7016372680664062, + "learning_rate": 2.9636220977197182e-05, + "loss": 2.4903, + "step": 14996 + }, + { + "epoch": 1.2103139375353078, + "grad_norm": 0.7528983950614929, + "learning_rate": 2.9625004334366103e-05, + "loss": 2.3829, + "step": 14997 + }, + { + "epoch": 1.210394641271891, + "grad_norm": 0.6735692024230957, + "learning_rate": 2.9613789445463837e-05, + "loss": 2.3844, + "step": 14998 + }, + { + "epoch": 1.210475345008474, + "grad_norm": 0.6825322508811951, + "learning_rate": 2.9602576310769935e-05, + "loss": 2.4691, + "step": 14999 + }, + { + "epoch": 1.2105560487450568, + "grad_norm": 0.7507675290107727, + "learning_rate": 2.959136493056389e-05, + "loss": 2.4605, + "step": 15000 + }, + { + "epoch": 1.2105560487450568, + "eval_loss": 2.3882925510406494, + "eval_runtime": 1014.0781, + "eval_samples_per_second": 2.584, + "eval_steps_per_second": 0.431, + "step": 15000 + }, + { + "epoch": 1.21063675248164, + "grad_norm": 0.6937146782875061, + "learning_rate": 2.9580155305125044e-05, + "loss": 2.4444, + "step": 15001 + }, + { + "epoch": 1.210717456218223, + "grad_norm": 0.6572179794311523, + "learning_rate": 2.9568947434732775e-05, + "loss": 2.4373, + "step": 15002 + }, + { + "epoch": 1.2107981599548059, + "grad_norm": 0.7420738935470581, + "learning_rate": 2.955774131966651e-05, + "loss": 2.4046, + "step": 15003 + }, + { + "epoch": 1.210878863691389, + "grad_norm": 0.7952237129211426, + "learning_rate": 2.954653696020543e-05, + "loss": 2.4082, + "step": 15004 + }, + { + "epoch": 1.2109595674279718, + "grad_norm": 0.6640750765800476, + "learning_rate": 2.9535334356628817e-05, + "loss": 2.4109, + "step": 15005 + }, + { + "epoch": 1.211040271164555, + "grad_norm": 0.6968019008636475, + "learning_rate": 2.952413350921588e-05, + "loss": 2.3991, + "step": 15006 + }, + { + "epoch": 1.211120974901138, + "grad_norm": 0.7174221277236938, + "learning_rate": 2.9512934418245787e-05, + "loss": 2.3909, + "step": 15007 + }, + { + "epoch": 1.2112016786377209, + "grad_norm": 0.6854268908500671, + "learning_rate": 2.9501737083997595e-05, + "loss": 2.4321, + "step": 15008 + }, + { + "epoch": 1.211282382374304, + "grad_norm": 0.6705672740936279, + "learning_rate": 2.949054150675039e-05, + "loss": 2.4749, + "step": 15009 + }, + { + "epoch": 1.2113630861108868, + "grad_norm": 0.7871068716049194, + "learning_rate": 2.9479347686783244e-05, + "loss": 2.424, + "step": 15010 + }, + { + "epoch": 1.21144378984747, + "grad_norm": 0.8194620609283447, + "learning_rate": 2.946815562437506e-05, + "loss": 2.461, + "step": 15011 + }, + { + "epoch": 1.211524493584053, + "grad_norm": 0.673367977142334, + "learning_rate": 2.9456965319804818e-05, + "loss": 2.4212, + "step": 15012 + }, + { + "epoch": 1.2116051973206359, + "grad_norm": 0.6630001068115234, + "learning_rate": 2.9445776773351397e-05, + "loss": 2.4393, + "step": 15013 + }, + { + "epoch": 1.211685901057219, + "grad_norm": 0.676170825958252, + "learning_rate": 2.943458998529365e-05, + "loss": 2.3889, + "step": 15014 + }, + { + "epoch": 1.211766604793802, + "grad_norm": 0.6951417326927185, + "learning_rate": 2.942340495591037e-05, + "loss": 2.4088, + "step": 15015 + }, + { + "epoch": 1.211847308530385, + "grad_norm": 0.6909857988357544, + "learning_rate": 2.941222168548037e-05, + "loss": 2.4282, + "step": 15016 + }, + { + "epoch": 1.211928012266968, + "grad_norm": 0.653264045715332, + "learning_rate": 2.9401040174282292e-05, + "loss": 2.4369, + "step": 15017 + }, + { + "epoch": 1.2120087160035509, + "grad_norm": 0.6994543075561523, + "learning_rate": 2.938986042259484e-05, + "loss": 2.419, + "step": 15018 + }, + { + "epoch": 1.212089419740134, + "grad_norm": 0.709015965461731, + "learning_rate": 2.9378682430696668e-05, + "loss": 2.4747, + "step": 15019 + }, + { + "epoch": 1.212170123476717, + "grad_norm": 0.6899579167366028, + "learning_rate": 2.9367506198866313e-05, + "loss": 2.4134, + "step": 15020 + }, + { + "epoch": 1.2122508272133, + "grad_norm": 0.6811912059783936, + "learning_rate": 2.9356331727382337e-05, + "loss": 2.449, + "step": 15021 + }, + { + "epoch": 1.212331530949883, + "grad_norm": 0.8119748830795288, + "learning_rate": 2.9345159016523237e-05, + "loss": 2.4463, + "step": 15022 + }, + { + "epoch": 1.2124122346864659, + "grad_norm": 0.7323578000068665, + "learning_rate": 2.9333988066567463e-05, + "loss": 2.4305, + "step": 15023 + }, + { + "epoch": 1.212492938423049, + "grad_norm": 0.6639837622642517, + "learning_rate": 2.9322818877793436e-05, + "loss": 2.4237, + "step": 15024 + }, + { + "epoch": 1.212573642159632, + "grad_norm": 0.669623076915741, + "learning_rate": 2.9311651450479516e-05, + "loss": 2.4436, + "step": 15025 + }, + { + "epoch": 1.212654345896215, + "grad_norm": 0.7200437784194946, + "learning_rate": 2.9300485784904054e-05, + "loss": 2.4399, + "step": 15026 + }, + { + "epoch": 1.212735049632798, + "grad_norm": 0.7015525102615356, + "learning_rate": 2.9289321881345254e-05, + "loss": 2.4696, + "step": 15027 + }, + { + "epoch": 1.212815753369381, + "grad_norm": 0.74539715051651, + "learning_rate": 2.9278159740081402e-05, + "loss": 2.4204, + "step": 15028 + }, + { + "epoch": 1.212896457105964, + "grad_norm": 0.6373662352561951, + "learning_rate": 2.9266999361390713e-05, + "loss": 2.4273, + "step": 15029 + }, + { + "epoch": 1.212977160842547, + "grad_norm": 0.8213370442390442, + "learning_rate": 2.9255840745551256e-05, + "loss": 2.4166, + "step": 15030 + }, + { + "epoch": 1.2130578645791301, + "grad_norm": 0.7386181354522705, + "learning_rate": 2.9244683892841185e-05, + "loss": 2.3973, + "step": 15031 + }, + { + "epoch": 1.213138568315713, + "grad_norm": 0.7939273118972778, + "learning_rate": 2.9233528803538534e-05, + "loss": 2.5593, + "step": 15032 + }, + { + "epoch": 1.213219272052296, + "grad_norm": 0.7580689191818237, + "learning_rate": 2.9222375477921347e-05, + "loss": 2.4255, + "step": 15033 + }, + { + "epoch": 1.213299975788879, + "grad_norm": 0.7680409550666809, + "learning_rate": 2.9211223916267573e-05, + "loss": 2.4447, + "step": 15034 + }, + { + "epoch": 1.213380679525462, + "grad_norm": 0.6998565196990967, + "learning_rate": 2.9200074118855135e-05, + "loss": 2.4061, + "step": 15035 + }, + { + "epoch": 1.2134613832620451, + "grad_norm": 0.6673001050949097, + "learning_rate": 2.9188926085961954e-05, + "loss": 2.3989, + "step": 15036 + }, + { + "epoch": 1.213542086998628, + "grad_norm": 0.683215320110321, + "learning_rate": 2.9177779817865815e-05, + "loss": 2.4078, + "step": 15037 + }, + { + "epoch": 1.213622790735211, + "grad_norm": 0.696967363357544, + "learning_rate": 2.9166635314844527e-05, + "loss": 2.4224, + "step": 15038 + }, + { + "epoch": 1.213703494471794, + "grad_norm": 0.6930364370346069, + "learning_rate": 2.915549257717588e-05, + "loss": 2.4112, + "step": 15039 + }, + { + "epoch": 1.213784198208377, + "grad_norm": 0.7387405633926392, + "learning_rate": 2.914435160513752e-05, + "loss": 2.4458, + "step": 15040 + }, + { + "epoch": 1.21386490194496, + "grad_norm": 0.6615941524505615, + "learning_rate": 2.913321239900714e-05, + "loss": 2.4406, + "step": 15041 + }, + { + "epoch": 1.213945605681543, + "grad_norm": 0.7520569562911987, + "learning_rate": 2.912207495906235e-05, + "loss": 2.3991, + "step": 15042 + }, + { + "epoch": 1.214026309418126, + "grad_norm": 0.6952454447746277, + "learning_rate": 2.911093928558072e-05, + "loss": 2.4404, + "step": 15043 + }, + { + "epoch": 1.2141070131547091, + "grad_norm": 0.7595344185829163, + "learning_rate": 2.9099805378839794e-05, + "loss": 2.551, + "step": 15044 + }, + { + "epoch": 1.214187716891292, + "grad_norm": 0.6645220518112183, + "learning_rate": 2.9088673239117094e-05, + "loss": 2.4167, + "step": 15045 + }, + { + "epoch": 1.214268420627875, + "grad_norm": 0.6433377861976624, + "learning_rate": 2.907754286668998e-05, + "loss": 2.3873, + "step": 15046 + }, + { + "epoch": 1.2143491243644582, + "grad_norm": 0.6806936860084534, + "learning_rate": 2.9066414261835894e-05, + "loss": 2.3868, + "step": 15047 + }, + { + "epoch": 1.214429828101041, + "grad_norm": 0.7261343598365784, + "learning_rate": 2.905528742483222e-05, + "loss": 2.4785, + "step": 15048 + }, + { + "epoch": 1.2145105318376241, + "grad_norm": 0.6495440602302551, + "learning_rate": 2.9044162355956196e-05, + "loss": 2.4167, + "step": 15049 + }, + { + "epoch": 1.214591235574207, + "grad_norm": 0.6816607117652893, + "learning_rate": 2.9033039055485135e-05, + "loss": 2.459, + "step": 15050 + }, + { + "epoch": 1.21467193931079, + "grad_norm": 0.6624214053153992, + "learning_rate": 2.902191752369624e-05, + "loss": 2.4498, + "step": 15051 + }, + { + "epoch": 1.2147526430473732, + "grad_norm": 0.6800024509429932, + "learning_rate": 2.9010797760866737e-05, + "loss": 2.4442, + "step": 15052 + }, + { + "epoch": 1.214833346783956, + "grad_norm": 0.711705207824707, + "learning_rate": 2.8999679767273667e-05, + "loss": 2.422, + "step": 15053 + }, + { + "epoch": 1.2149140505205391, + "grad_norm": 0.6854784488677979, + "learning_rate": 2.898856354319419e-05, + "loss": 2.4567, + "step": 15054 + }, + { + "epoch": 1.214994754257122, + "grad_norm": 0.6676114797592163, + "learning_rate": 2.8977449088905373e-05, + "loss": 2.3913, + "step": 15055 + }, + { + "epoch": 1.215075457993705, + "grad_norm": 0.6893348693847656, + "learning_rate": 2.8966336404684145e-05, + "loss": 2.4407, + "step": 15056 + }, + { + "epoch": 1.2151561617302882, + "grad_norm": 0.6749289035797119, + "learning_rate": 2.8955225490807514e-05, + "loss": 2.409, + "step": 15057 + }, + { + "epoch": 1.215236865466871, + "grad_norm": 0.6998956203460693, + "learning_rate": 2.8944116347552387e-05, + "loss": 2.4297, + "step": 15058 + }, + { + "epoch": 1.2153175692034541, + "grad_norm": 0.7040024399757385, + "learning_rate": 2.8933008975195596e-05, + "loss": 2.4262, + "step": 15059 + }, + { + "epoch": 1.2153982729400372, + "grad_norm": 0.6638362407684326, + "learning_rate": 2.8921903374014005e-05, + "loss": 2.4355, + "step": 15060 + }, + { + "epoch": 1.21547897667662, + "grad_norm": 0.6864547729492188, + "learning_rate": 2.8910799544284407e-05, + "loss": 2.4493, + "step": 15061 + }, + { + "epoch": 1.2155596804132032, + "grad_norm": 0.707383394241333, + "learning_rate": 2.8899697486283474e-05, + "loss": 2.4604, + "step": 15062 + }, + { + "epoch": 1.2156403841497863, + "grad_norm": 0.7121397852897644, + "learning_rate": 2.888859720028795e-05, + "loss": 2.4272, + "step": 15063 + }, + { + "epoch": 1.2157210878863691, + "grad_norm": 0.7600439786911011, + "learning_rate": 2.8877498686574455e-05, + "loss": 2.4499, + "step": 15064 + }, + { + "epoch": 1.2158017916229522, + "grad_norm": 0.6654962301254272, + "learning_rate": 2.886640194541962e-05, + "loss": 2.4632, + "step": 15065 + }, + { + "epoch": 1.215882495359535, + "grad_norm": 0.7138063311576843, + "learning_rate": 2.8855306977099994e-05, + "loss": 2.4321, + "step": 15066 + }, + { + "epoch": 1.2159631990961182, + "grad_norm": 0.672604501247406, + "learning_rate": 2.884421378189208e-05, + "loss": 2.4026, + "step": 15067 + }, + { + "epoch": 1.2160439028327013, + "grad_norm": 0.6894693970680237, + "learning_rate": 2.8833122360072405e-05, + "loss": 2.4213, + "step": 15068 + }, + { + "epoch": 1.2161246065692841, + "grad_norm": 0.6784985065460205, + "learning_rate": 2.8822032711917325e-05, + "loss": 2.4207, + "step": 15069 + }, + { + "epoch": 1.2162053103058672, + "grad_norm": 0.6569294929504395, + "learning_rate": 2.8810944837703248e-05, + "loss": 2.4142, + "step": 15070 + }, + { + "epoch": 1.21628601404245, + "grad_norm": 0.7240702509880066, + "learning_rate": 2.879985873770654e-05, + "loss": 2.4173, + "step": 15071 + }, + { + "epoch": 1.2163667177790332, + "grad_norm": 0.6935575604438782, + "learning_rate": 2.8788774412203444e-05, + "loss": 2.4487, + "step": 15072 + }, + { + "epoch": 1.2164474215156162, + "grad_norm": 0.6903246641159058, + "learning_rate": 2.8777691861470234e-05, + "loss": 2.4193, + "step": 15073 + }, + { + "epoch": 1.216528125252199, + "grad_norm": 0.7982182502746582, + "learning_rate": 2.8766611085783123e-05, + "loss": 2.492, + "step": 15074 + }, + { + "epoch": 1.2166088289887822, + "grad_norm": 0.6958058476448059, + "learning_rate": 2.875553208541827e-05, + "loss": 2.4198, + "step": 15075 + }, + { + "epoch": 1.2166895327253653, + "grad_norm": 0.6869969964027405, + "learning_rate": 2.8744454860651794e-05, + "loss": 2.3768, + "step": 15076 + }, + { + "epoch": 1.2167702364619482, + "grad_norm": 0.7263007760047913, + "learning_rate": 2.8733379411759796e-05, + "loss": 2.386, + "step": 15077 + }, + { + "epoch": 1.2168509401985312, + "grad_norm": 0.7010302543640137, + "learning_rate": 2.872230573901825e-05, + "loss": 2.4417, + "step": 15078 + }, + { + "epoch": 1.216931643935114, + "grad_norm": 0.818980872631073, + "learning_rate": 2.8711233842703156e-05, + "loss": 2.433, + "step": 15079 + }, + { + "epoch": 1.2170123476716972, + "grad_norm": 0.6937929391860962, + "learning_rate": 2.87001637230905e-05, + "loss": 2.379, + "step": 15080 + }, + { + "epoch": 1.2170930514082803, + "grad_norm": 0.6954175233840942, + "learning_rate": 2.868909538045612e-05, + "loss": 2.4296, + "step": 15081 + }, + { + "epoch": 1.2171737551448631, + "grad_norm": 0.7177354097366333, + "learning_rate": 2.8678028815075887e-05, + "loss": 2.3978, + "step": 15082 + }, + { + "epoch": 1.2172544588814462, + "grad_norm": 0.7100846171379089, + "learning_rate": 2.8666964027225607e-05, + "loss": 2.4566, + "step": 15083 + }, + { + "epoch": 1.217335162618029, + "grad_norm": 0.6909635066986084, + "learning_rate": 2.8655901017181064e-05, + "loss": 2.4772, + "step": 15084 + }, + { + "epoch": 1.2174158663546122, + "grad_norm": 0.7319501638412476, + "learning_rate": 2.8644839785217947e-05, + "loss": 2.4402, + "step": 15085 + }, + { + "epoch": 1.2174965700911953, + "grad_norm": 0.6691421270370483, + "learning_rate": 2.8633780331611958e-05, + "loss": 2.4465, + "step": 15086 + }, + { + "epoch": 1.2175772738277781, + "grad_norm": 0.7028824687004089, + "learning_rate": 2.8622722656638745e-05, + "loss": 2.4765, + "step": 15087 + }, + { + "epoch": 1.2176579775643612, + "grad_norm": 0.7428398728370667, + "learning_rate": 2.861166676057383e-05, + "loss": 2.441, + "step": 15088 + }, + { + "epoch": 1.2177386813009443, + "grad_norm": 0.6715269684791565, + "learning_rate": 2.8600612643692803e-05, + "loss": 2.4621, + "step": 15089 + }, + { + "epoch": 1.2178193850375272, + "grad_norm": 0.6768512725830078, + "learning_rate": 2.8589560306271168e-05, + "loss": 2.4257, + "step": 15090 + }, + { + "epoch": 1.2179000887741103, + "grad_norm": 0.7442535758018494, + "learning_rate": 2.8578509748584326e-05, + "loss": 2.424, + "step": 15091 + }, + { + "epoch": 1.2179807925106934, + "grad_norm": 0.7275974154472351, + "learning_rate": 2.8567460970907722e-05, + "loss": 2.4698, + "step": 15092 + }, + { + "epoch": 1.2180614962472762, + "grad_norm": 0.7050346732139587, + "learning_rate": 2.8556413973516727e-05, + "loss": 2.4734, + "step": 15093 + }, + { + "epoch": 1.2181421999838593, + "grad_norm": 0.7325939536094666, + "learning_rate": 2.854536875668664e-05, + "loss": 2.4166, + "step": 15094 + }, + { + "epoch": 1.2182229037204422, + "grad_norm": 0.6764184236526489, + "learning_rate": 2.8534325320692746e-05, + "loss": 2.4742, + "step": 15095 + }, + { + "epoch": 1.2183036074570253, + "grad_norm": 0.7405500411987305, + "learning_rate": 2.8523283665810318e-05, + "loss": 2.3959, + "step": 15096 + }, + { + "epoch": 1.2183843111936083, + "grad_norm": 0.6714199185371399, + "learning_rate": 2.8512243792314465e-05, + "loss": 2.4571, + "step": 15097 + }, + { + "epoch": 1.2184650149301912, + "grad_norm": 0.6779391169548035, + "learning_rate": 2.8501205700480372e-05, + "loss": 2.3745, + "step": 15098 + }, + { + "epoch": 1.2185457186667743, + "grad_norm": 0.6876079440116882, + "learning_rate": 2.8490169390583134e-05, + "loss": 2.4432, + "step": 15099 + }, + { + "epoch": 1.2186264224033572, + "grad_norm": 0.7092362642288208, + "learning_rate": 2.8479134862897826e-05, + "loss": 2.4716, + "step": 15100 + }, + { + "epoch": 1.2187071261399403, + "grad_norm": 0.6901989579200745, + "learning_rate": 2.8468102117699414e-05, + "loss": 2.417, + "step": 15101 + }, + { + "epoch": 1.2187878298765233, + "grad_norm": 0.7011592984199524, + "learning_rate": 2.8457071155262884e-05, + "loss": 2.4439, + "step": 15102 + }, + { + "epoch": 1.2188685336131062, + "grad_norm": 0.6923472285270691, + "learning_rate": 2.8446041975863146e-05, + "loss": 2.4247, + "step": 15103 + }, + { + "epoch": 1.2189492373496893, + "grad_norm": 0.6948748230934143, + "learning_rate": 2.843501457977509e-05, + "loss": 2.3902, + "step": 15104 + }, + { + "epoch": 1.2190299410862724, + "grad_norm": 0.7034386396408081, + "learning_rate": 2.842398896727354e-05, + "loss": 2.4277, + "step": 15105 + }, + { + "epoch": 1.2191106448228552, + "grad_norm": 0.7965617775917053, + "learning_rate": 2.8412965138633318e-05, + "loss": 2.435, + "step": 15106 + }, + { + "epoch": 1.2191913485594383, + "grad_norm": 0.7371121644973755, + "learning_rate": 2.8401943094129112e-05, + "loss": 2.3928, + "step": 15107 + }, + { + "epoch": 1.2192720522960214, + "grad_norm": 0.7079561352729797, + "learning_rate": 2.839092283403564e-05, + "loss": 2.4706, + "step": 15108 + }, + { + "epoch": 1.2193527560326043, + "grad_norm": 0.6711337566375732, + "learning_rate": 2.8379904358627584e-05, + "loss": 2.4272, + "step": 15109 + }, + { + "epoch": 1.2194334597691874, + "grad_norm": 0.6840410828590393, + "learning_rate": 2.836888766817951e-05, + "loss": 2.4174, + "step": 15110 + }, + { + "epoch": 1.2195141635057702, + "grad_norm": 0.700366199016571, + "learning_rate": 2.8357872762965986e-05, + "loss": 2.4667, + "step": 15111 + }, + { + "epoch": 1.2195948672423533, + "grad_norm": 0.7090682983398438, + "learning_rate": 2.8346859643261593e-05, + "loss": 2.3748, + "step": 15112 + }, + { + "epoch": 1.2196755709789364, + "grad_norm": 0.7965148687362671, + "learning_rate": 2.8335848309340717e-05, + "loss": 2.5138, + "step": 15113 + }, + { + "epoch": 1.2197562747155193, + "grad_norm": 0.7845773696899414, + "learning_rate": 2.8324838761477833e-05, + "loss": 2.4274, + "step": 15114 + }, + { + "epoch": 1.2198369784521024, + "grad_norm": 0.6545087099075317, + "learning_rate": 2.831383099994731e-05, + "loss": 2.4311, + "step": 15115 + }, + { + "epoch": 1.2199176821886852, + "grad_norm": 0.6846331357955933, + "learning_rate": 2.830282502502356e-05, + "loss": 2.4239, + "step": 15116 + }, + { + "epoch": 1.2199983859252683, + "grad_norm": 0.7062236070632935, + "learning_rate": 2.8291820836980798e-05, + "loss": 2.4429, + "step": 15117 + }, + { + "epoch": 1.2200790896618514, + "grad_norm": 0.7526285648345947, + "learning_rate": 2.8280818436093315e-05, + "loss": 2.4882, + "step": 15118 + }, + { + "epoch": 1.2201597933984343, + "grad_norm": 0.6853364109992981, + "learning_rate": 2.8269817822635337e-05, + "loss": 2.3803, + "step": 15119 + }, + { + "epoch": 1.2202404971350174, + "grad_norm": 0.7796143293380737, + "learning_rate": 2.8258818996880964e-05, + "loss": 2.4157, + "step": 15120 + }, + { + "epoch": 1.2203212008716005, + "grad_norm": 0.7202157378196716, + "learning_rate": 2.824782195910437e-05, + "loss": 2.5101, + "step": 15121 + }, + { + "epoch": 1.2204019046081833, + "grad_norm": 0.6730707287788391, + "learning_rate": 2.8236826709579644e-05, + "loss": 2.4397, + "step": 15122 + }, + { + "epoch": 1.2204826083447664, + "grad_norm": 0.7840865850448608, + "learning_rate": 2.8225833248580745e-05, + "loss": 2.4452, + "step": 15123 + }, + { + "epoch": 1.2205633120813493, + "grad_norm": 0.8323497772216797, + "learning_rate": 2.821484157638171e-05, + "loss": 2.4775, + "step": 15124 + }, + { + "epoch": 1.2206440158179324, + "grad_norm": 0.6699438691139221, + "learning_rate": 2.8203851693256466e-05, + "loss": 2.3958, + "step": 15125 + }, + { + "epoch": 1.2207247195545154, + "grad_norm": 0.6711557507514954, + "learning_rate": 2.8192863599478923e-05, + "loss": 2.477, + "step": 15126 + }, + { + "epoch": 1.2208054232910983, + "grad_norm": 0.6255797743797302, + "learning_rate": 2.8181877295322922e-05, + "loss": 2.4222, + "step": 15127 + }, + { + "epoch": 1.2208861270276814, + "grad_norm": 0.7313731908798218, + "learning_rate": 2.8170892781062297e-05, + "loss": 2.4343, + "step": 15128 + }, + { + "epoch": 1.2209668307642643, + "grad_norm": 0.6611476540565491, + "learning_rate": 2.815991005697076e-05, + "loss": 2.3844, + "step": 15129 + }, + { + "epoch": 1.2210475345008474, + "grad_norm": 0.7293661236763, + "learning_rate": 2.8148929123322065e-05, + "loss": 2.3912, + "step": 15130 + }, + { + "epoch": 1.2211282382374304, + "grad_norm": 0.7150777578353882, + "learning_rate": 2.8137949980389866e-05, + "loss": 2.4227, + "step": 15131 + }, + { + "epoch": 1.2212089419740133, + "grad_norm": 0.7001000642776489, + "learning_rate": 2.8126972628447845e-05, + "loss": 2.4751, + "step": 15132 + }, + { + "epoch": 1.2212896457105964, + "grad_norm": 0.7106043100357056, + "learning_rate": 2.8115997067769505e-05, + "loss": 2.4127, + "step": 15133 + }, + { + "epoch": 1.2213703494471795, + "grad_norm": 0.6969115138053894, + "learning_rate": 2.810502329862842e-05, + "loss": 2.4073, + "step": 15134 + }, + { + "epoch": 1.2214510531837623, + "grad_norm": 0.7493317127227783, + "learning_rate": 2.8094051321298098e-05, + "loss": 2.4541, + "step": 15135 + }, + { + "epoch": 1.2215317569203454, + "grad_norm": 0.6499322652816772, + "learning_rate": 2.808308113605198e-05, + "loss": 2.4057, + "step": 15136 + }, + { + "epoch": 1.2216124606569285, + "grad_norm": 0.6716788411140442, + "learning_rate": 2.807211274316347e-05, + "loss": 2.3856, + "step": 15137 + }, + { + "epoch": 1.2216931643935114, + "grad_norm": 0.7724741101264954, + "learning_rate": 2.8061146142905958e-05, + "loss": 2.4652, + "step": 15138 + }, + { + "epoch": 1.2217738681300945, + "grad_norm": 0.7014325261116028, + "learning_rate": 2.8050181335552718e-05, + "loss": 2.4506, + "step": 15139 + }, + { + "epoch": 1.2218545718666773, + "grad_norm": 0.6705317497253418, + "learning_rate": 2.8039218321377026e-05, + "loss": 2.4581, + "step": 15140 + }, + { + "epoch": 1.2219352756032604, + "grad_norm": 0.709973931312561, + "learning_rate": 2.8028257100652156e-05, + "loss": 2.427, + "step": 15141 + }, + { + "epoch": 1.2220159793398435, + "grad_norm": 0.7021297812461853, + "learning_rate": 2.801729767365122e-05, + "loss": 2.3784, + "step": 15142 + }, + { + "epoch": 1.2220966830764264, + "grad_norm": 0.7431899905204773, + "learning_rate": 2.8006340040647393e-05, + "loss": 2.4135, + "step": 15143 + }, + { + "epoch": 1.2221773868130095, + "grad_norm": 0.6724472045898438, + "learning_rate": 2.7995384201913765e-05, + "loss": 2.3966, + "step": 15144 + }, + { + "epoch": 1.2222580905495923, + "grad_norm": 0.7381375432014465, + "learning_rate": 2.7984430157723384e-05, + "loss": 2.4853, + "step": 15145 + }, + { + "epoch": 1.2223387942861754, + "grad_norm": 0.6809988617897034, + "learning_rate": 2.7973477908349255e-05, + "loss": 2.408, + "step": 15146 + }, + { + "epoch": 1.2224194980227585, + "grad_norm": 0.7042898535728455, + "learning_rate": 2.7962527454064337e-05, + "loss": 2.3981, + "step": 15147 + }, + { + "epoch": 1.2225002017593414, + "grad_norm": 0.7096118330955505, + "learning_rate": 2.7951578795141576e-05, + "loss": 2.4175, + "step": 15148 + }, + { + "epoch": 1.2225809054959245, + "grad_norm": 0.7271720767021179, + "learning_rate": 2.794063193185378e-05, + "loss": 2.4193, + "step": 15149 + }, + { + "epoch": 1.2226616092325076, + "grad_norm": 0.7000352740287781, + "learning_rate": 2.7929686864473792e-05, + "loss": 2.422, + "step": 15150 + }, + { + "epoch": 1.2227423129690904, + "grad_norm": 0.6983076333999634, + "learning_rate": 2.791874359327443e-05, + "loss": 2.4613, + "step": 15151 + }, + { + "epoch": 1.2228230167056735, + "grad_norm": 0.7520100474357605, + "learning_rate": 2.7907802118528383e-05, + "loss": 2.4147, + "step": 15152 + }, + { + "epoch": 1.2229037204422566, + "grad_norm": 0.7056650519371033, + "learning_rate": 2.789686244050834e-05, + "loss": 2.4568, + "step": 15153 + }, + { + "epoch": 1.2229844241788395, + "grad_norm": 0.7092614769935608, + "learning_rate": 2.7885924559486975e-05, + "loss": 2.4758, + "step": 15154 + }, + { + "epoch": 1.2230651279154225, + "grad_norm": 0.702521562576294, + "learning_rate": 2.7874988475736885e-05, + "loss": 2.4893, + "step": 15155 + }, + { + "epoch": 1.2231458316520054, + "grad_norm": 0.7454921007156372, + "learning_rate": 2.786405418953061e-05, + "loss": 2.4277, + "step": 15156 + }, + { + "epoch": 1.2232265353885885, + "grad_norm": 0.659503161907196, + "learning_rate": 2.7853121701140694e-05, + "loss": 2.4664, + "step": 15157 + }, + { + "epoch": 1.2233072391251716, + "grad_norm": 0.6368914842605591, + "learning_rate": 2.7842191010839556e-05, + "loss": 2.3728, + "step": 15158 + }, + { + "epoch": 1.2233879428617545, + "grad_norm": 0.7076737880706787, + "learning_rate": 2.783126211889965e-05, + "loss": 2.4204, + "step": 15159 + }, + { + "epoch": 1.2234686465983375, + "grad_norm": 0.718100905418396, + "learning_rate": 2.7820335025593325e-05, + "loss": 2.478, + "step": 15160 + }, + { + "epoch": 1.2235493503349204, + "grad_norm": 0.6804678440093994, + "learning_rate": 2.7809409731192972e-05, + "loss": 2.3755, + "step": 15161 + }, + { + "epoch": 1.2236300540715035, + "grad_norm": 0.7068643569946289, + "learning_rate": 2.77984862359708e-05, + "loss": 2.3713, + "step": 15162 + }, + { + "epoch": 1.2237107578080866, + "grad_norm": 0.7047072052955627, + "learning_rate": 2.7787564540199097e-05, + "loss": 2.4264, + "step": 15163 + }, + { + "epoch": 1.2237914615446694, + "grad_norm": 0.6985021829605103, + "learning_rate": 2.7776644644150076e-05, + "loss": 2.4101, + "step": 15164 + }, + { + "epoch": 1.2238721652812525, + "grad_norm": 0.7543687224388123, + "learning_rate": 2.776572654809583e-05, + "loss": 2.3722, + "step": 15165 + }, + { + "epoch": 1.2239528690178356, + "grad_norm": 0.7199926972389221, + "learning_rate": 2.7754810252308473e-05, + "loss": 2.3819, + "step": 15166 + }, + { + "epoch": 1.2240335727544185, + "grad_norm": 0.696756899356842, + "learning_rate": 2.7743895757060156e-05, + "loss": 2.4245, + "step": 15167 + }, + { + "epoch": 1.2241142764910016, + "grad_norm": 0.7848933339118958, + "learning_rate": 2.773298306262281e-05, + "loss": 2.4725, + "step": 15168 + }, + { + "epoch": 1.2241949802275847, + "grad_norm": 0.6819389462471008, + "learning_rate": 2.7722072169268432e-05, + "loss": 2.4338, + "step": 15169 + }, + { + "epoch": 1.2242756839641675, + "grad_norm": 0.7185801267623901, + "learning_rate": 2.7711163077268977e-05, + "loss": 2.4745, + "step": 15170 + }, + { + "epoch": 1.2243563877007506, + "grad_norm": 0.7645030617713928, + "learning_rate": 2.7700255786896278e-05, + "loss": 2.4677, + "step": 15171 + }, + { + "epoch": 1.2244370914373335, + "grad_norm": 0.6559275388717651, + "learning_rate": 2.7689350298422202e-05, + "loss": 2.386, + "step": 15172 + }, + { + "epoch": 1.2245177951739166, + "grad_norm": 0.6965066194534302, + "learning_rate": 2.767844661211856e-05, + "loss": 2.4022, + "step": 15173 + }, + { + "epoch": 1.2245984989104994, + "grad_norm": 0.6618858575820923, + "learning_rate": 2.7667544728257057e-05, + "loss": 2.3541, + "step": 15174 + }, + { + "epoch": 1.2246792026470825, + "grad_norm": 0.6635501980781555, + "learning_rate": 2.765664464710941e-05, + "loss": 2.3984, + "step": 15175 + }, + { + "epoch": 1.2247599063836656, + "grad_norm": 0.6987191438674927, + "learning_rate": 2.764574636894729e-05, + "loss": 2.4637, + "step": 15176 + }, + { + "epoch": 1.2248406101202485, + "grad_norm": 0.7289232611656189, + "learning_rate": 2.7634849894042303e-05, + "loss": 2.4033, + "step": 15177 + }, + { + "epoch": 1.2249213138568316, + "grad_norm": 0.7245565056800842, + "learning_rate": 2.762395522266602e-05, + "loss": 2.4281, + "step": 15178 + }, + { + "epoch": 1.2250020175934146, + "grad_norm": 0.6946065425872803, + "learning_rate": 2.761306235508997e-05, + "loss": 2.3869, + "step": 15179 + }, + { + "epoch": 1.2250827213299975, + "grad_norm": 0.6381784677505493, + "learning_rate": 2.7602171291585666e-05, + "loss": 2.404, + "step": 15180 + }, + { + "epoch": 1.2251634250665806, + "grad_norm": 0.6893685460090637, + "learning_rate": 2.759128203242446e-05, + "loss": 2.4807, + "step": 15181 + }, + { + "epoch": 1.2252441288031637, + "grad_norm": 0.6640260815620422, + "learning_rate": 2.7580394577877787e-05, + "loss": 2.4036, + "step": 15182 + }, + { + "epoch": 1.2253248325397466, + "grad_norm": 0.7125177979469299, + "learning_rate": 2.7569508928217026e-05, + "loss": 2.3869, + "step": 15183 + }, + { + "epoch": 1.2254055362763296, + "grad_norm": 0.657865583896637, + "learning_rate": 2.7558625083713397e-05, + "loss": 2.3869, + "step": 15184 + }, + { + "epoch": 1.2254862400129125, + "grad_norm": 0.6776065230369568, + "learning_rate": 2.7547743044638197e-05, + "loss": 2.4128, + "step": 15185 + }, + { + "epoch": 1.2255669437494956, + "grad_norm": 0.7126299738883972, + "learning_rate": 2.753686281126263e-05, + "loss": 2.4465, + "step": 15186 + }, + { + "epoch": 1.2256476474860787, + "grad_norm": 0.6918273568153381, + "learning_rate": 2.7525984383857873e-05, + "loss": 2.428, + "step": 15187 + }, + { + "epoch": 1.2257283512226615, + "grad_norm": 0.7742759585380554, + "learning_rate": 2.7515107762695025e-05, + "loss": 2.4299, + "step": 15188 + }, + { + "epoch": 1.2258090549592446, + "grad_norm": 0.7194607853889465, + "learning_rate": 2.7504232948045205e-05, + "loss": 2.4315, + "step": 15189 + }, + { + "epoch": 1.2258897586958275, + "grad_norm": 0.6962646245956421, + "learning_rate": 2.7493359940179363e-05, + "loss": 2.4494, + "step": 15190 + }, + { + "epoch": 1.2259704624324106, + "grad_norm": 0.6681686639785767, + "learning_rate": 2.7482488739368538e-05, + "loss": 2.427, + "step": 15191 + }, + { + "epoch": 1.2260511661689937, + "grad_norm": 0.6589877009391785, + "learning_rate": 2.747161934588366e-05, + "loss": 2.4333, + "step": 15192 + }, + { + "epoch": 1.2261318699055765, + "grad_norm": 0.7415218949317932, + "learning_rate": 2.746075175999564e-05, + "loss": 2.4203, + "step": 15193 + }, + { + "epoch": 1.2262125736421596, + "grad_norm": 0.7371910214424133, + "learning_rate": 2.7449885981975276e-05, + "loss": 2.4684, + "step": 15194 + }, + { + "epoch": 1.2262932773787427, + "grad_norm": 0.7010802626609802, + "learning_rate": 2.7439022012093407e-05, + "loss": 2.4625, + "step": 15195 + }, + { + "epoch": 1.2263739811153256, + "grad_norm": 0.7125125527381897, + "learning_rate": 2.7428159850620773e-05, + "loss": 2.4075, + "step": 15196 + }, + { + "epoch": 1.2264546848519087, + "grad_norm": 0.701133668422699, + "learning_rate": 2.7417299497828107e-05, + "loss": 2.4525, + "step": 15197 + }, + { + "epoch": 1.2265353885884918, + "grad_norm": 0.7543410658836365, + "learning_rate": 2.7406440953986078e-05, + "loss": 2.474, + "step": 15198 + }, + { + "epoch": 1.2266160923250746, + "grad_norm": 0.69012051820755, + "learning_rate": 2.7395584219365323e-05, + "loss": 2.4853, + "step": 15199 + }, + { + "epoch": 1.2266967960616577, + "grad_norm": 0.6559048295021057, + "learning_rate": 2.7384729294236378e-05, + "loss": 2.4252, + "step": 15200 + }, + { + "epoch": 1.2267774997982406, + "grad_norm": 0.6603518128395081, + "learning_rate": 2.7373876178869794e-05, + "loss": 2.4047, + "step": 15201 + }, + { + "epoch": 1.2268582035348237, + "grad_norm": 0.7159265279769897, + "learning_rate": 2.736302487353609e-05, + "loss": 2.4352, + "step": 15202 + }, + { + "epoch": 1.2269389072714068, + "grad_norm": 0.6784560084342957, + "learning_rate": 2.735217537850565e-05, + "loss": 2.3933, + "step": 15203 + }, + { + "epoch": 1.2270196110079896, + "grad_norm": 0.7341950535774231, + "learning_rate": 2.7341327694048903e-05, + "loss": 2.4514, + "step": 15204 + }, + { + "epoch": 1.2271003147445727, + "grad_norm": 0.726046621799469, + "learning_rate": 2.7330481820436204e-05, + "loss": 2.4427, + "step": 15205 + }, + { + "epoch": 1.2271810184811556, + "grad_norm": 0.6897192001342773, + "learning_rate": 2.7319637757937854e-05, + "loss": 2.4587, + "step": 15206 + }, + { + "epoch": 1.2272617222177387, + "grad_norm": 0.6981058716773987, + "learning_rate": 2.7308795506824124e-05, + "loss": 2.4297, + "step": 15207 + }, + { + "epoch": 1.2273424259543217, + "grad_norm": 0.694583535194397, + "learning_rate": 2.729795506736522e-05, + "loss": 2.3608, + "step": 15208 + }, + { + "epoch": 1.2274231296909046, + "grad_norm": 0.710192084312439, + "learning_rate": 2.728711643983136e-05, + "loss": 2.3733, + "step": 15209 + }, + { + "epoch": 1.2275038334274877, + "grad_norm": 0.7203633785247803, + "learning_rate": 2.7276279624492595e-05, + "loss": 2.389, + "step": 15210 + }, + { + "epoch": 1.2275845371640708, + "grad_norm": 0.7298668622970581, + "learning_rate": 2.726544462161905e-05, + "loss": 2.3981, + "step": 15211 + }, + { + "epoch": 1.2276652409006537, + "grad_norm": 0.6640039682388306, + "learning_rate": 2.725461143148078e-05, + "loss": 2.4073, + "step": 15212 + }, + { + "epoch": 1.2277459446372367, + "grad_norm": 0.7203015685081482, + "learning_rate": 2.724378005434772e-05, + "loss": 2.4901, + "step": 15213 + }, + { + "epoch": 1.2278266483738198, + "grad_norm": 0.6668895483016968, + "learning_rate": 2.723295049048985e-05, + "loss": 2.4482, + "step": 15214 + }, + { + "epoch": 1.2279073521104027, + "grad_norm": 0.7551584839820862, + "learning_rate": 2.7222122740177103e-05, + "loss": 2.4877, + "step": 15215 + }, + { + "epoch": 1.2279880558469858, + "grad_norm": 0.707202672958374, + "learning_rate": 2.721129680367923e-05, + "loss": 2.4577, + "step": 15216 + }, + { + "epoch": 1.2280687595835686, + "grad_norm": 0.685153603553772, + "learning_rate": 2.7200472681266155e-05, + "loss": 2.476, + "step": 15217 + }, + { + "epoch": 1.2281494633201517, + "grad_norm": 0.6843041181564331, + "learning_rate": 2.718965037320762e-05, + "loss": 2.4164, + "step": 15218 + }, + { + "epoch": 1.2282301670567348, + "grad_norm": 0.6548978686332703, + "learning_rate": 2.7178829879773306e-05, + "loss": 2.4187, + "step": 15219 + }, + { + "epoch": 1.2283108707933177, + "grad_norm": 0.7037245035171509, + "learning_rate": 2.7168011201232902e-05, + "loss": 2.3621, + "step": 15220 + }, + { + "epoch": 1.2283915745299008, + "grad_norm": 0.6540676951408386, + "learning_rate": 2.7157194337856074e-05, + "loss": 2.4542, + "step": 15221 + }, + { + "epoch": 1.2284722782664836, + "grad_norm": 0.7699899673461914, + "learning_rate": 2.7146379289912338e-05, + "loss": 2.4639, + "step": 15222 + }, + { + "epoch": 1.2285529820030667, + "grad_norm": 0.7178743481636047, + "learning_rate": 2.713556605767128e-05, + "loss": 2.4222, + "step": 15223 + }, + { + "epoch": 1.2286336857396498, + "grad_norm": 0.6749793887138367, + "learning_rate": 2.7124754641402383e-05, + "loss": 2.4323, + "step": 15224 + }, + { + "epoch": 1.2287143894762327, + "grad_norm": 0.7035594582557678, + "learning_rate": 2.711394504137513e-05, + "loss": 2.4466, + "step": 15225 + }, + { + "epoch": 1.2287950932128158, + "grad_norm": 0.6518487930297852, + "learning_rate": 2.7103137257858868e-05, + "loss": 2.4969, + "step": 15226 + }, + { + "epoch": 1.2288757969493989, + "grad_norm": 0.6739057898521423, + "learning_rate": 2.7092331291122974e-05, + "loss": 2.406, + "step": 15227 + }, + { + "epoch": 1.2289565006859817, + "grad_norm": 0.6584770083427429, + "learning_rate": 2.7081527141436767e-05, + "loss": 2.4304, + "step": 15228 + }, + { + "epoch": 1.2290372044225648, + "grad_norm": 0.6846301555633545, + "learning_rate": 2.7070724809069514e-05, + "loss": 2.3995, + "step": 15229 + }, + { + "epoch": 1.2291179081591477, + "grad_norm": 0.6778364777565002, + "learning_rate": 2.705992429429044e-05, + "loss": 2.38, + "step": 15230 + }, + { + "epoch": 1.2291986118957308, + "grad_norm": 0.6957302689552307, + "learning_rate": 2.7049125597368753e-05, + "loss": 2.3973, + "step": 15231 + }, + { + "epoch": 1.2292793156323139, + "grad_norm": 0.730269193649292, + "learning_rate": 2.7038328718573514e-05, + "loss": 2.4829, + "step": 15232 + }, + { + "epoch": 1.2293600193688967, + "grad_norm": 0.7114049196243286, + "learning_rate": 2.702753365817384e-05, + "loss": 2.3902, + "step": 15233 + }, + { + "epoch": 1.2294407231054798, + "grad_norm": 0.7137531638145447, + "learning_rate": 2.7016740416438823e-05, + "loss": 2.3957, + "step": 15234 + }, + { + "epoch": 1.2295214268420627, + "grad_norm": 0.7178330421447754, + "learning_rate": 2.7005948993637386e-05, + "loss": 2.4429, + "step": 15235 + }, + { + "epoch": 1.2296021305786458, + "grad_norm": 0.6767767071723938, + "learning_rate": 2.6995159390038506e-05, + "loss": 2.4009, + "step": 15236 + }, + { + "epoch": 1.2296828343152288, + "grad_norm": 0.7713541984558105, + "learning_rate": 2.6984371605911086e-05, + "loss": 2.4326, + "step": 15237 + }, + { + "epoch": 1.2297635380518117, + "grad_norm": 0.7218228578567505, + "learning_rate": 2.6973585641523992e-05, + "loss": 2.4358, + "step": 15238 + }, + { + "epoch": 1.2298442417883948, + "grad_norm": 0.6782575249671936, + "learning_rate": 2.696280149714604e-05, + "loss": 2.3844, + "step": 15239 + }, + { + "epoch": 1.2299249455249779, + "grad_norm": 0.6825734972953796, + "learning_rate": 2.6952019173045982e-05, + "loss": 2.4621, + "step": 15240 + }, + { + "epoch": 1.2300056492615608, + "grad_norm": 0.6587522625923157, + "learning_rate": 2.6941238669492608e-05, + "loss": 2.4465, + "step": 15241 + }, + { + "epoch": 1.2300863529981438, + "grad_norm": 0.6898796558380127, + "learning_rate": 2.6930459986754498e-05, + "loss": 2.4469, + "step": 15242 + }, + { + "epoch": 1.230167056734727, + "grad_norm": 0.6764062643051147, + "learning_rate": 2.6919683125100338e-05, + "loss": 2.4476, + "step": 15243 + }, + { + "epoch": 1.2302477604713098, + "grad_norm": 0.6647047400474548, + "learning_rate": 2.6908908084798733e-05, + "loss": 2.3677, + "step": 15244 + }, + { + "epoch": 1.2303284642078929, + "grad_norm": 0.7091608047485352, + "learning_rate": 2.6898134866118174e-05, + "loss": 2.4605, + "step": 15245 + }, + { + "epoch": 1.2304091679444757, + "grad_norm": 0.691007137298584, + "learning_rate": 2.6887363469327188e-05, + "loss": 2.4397, + "step": 15246 + }, + { + "epoch": 1.2304898716810588, + "grad_norm": 0.6685532927513123, + "learning_rate": 2.6876593894694214e-05, + "loss": 2.4279, + "step": 15247 + }, + { + "epoch": 1.230570575417642, + "grad_norm": 0.684474766254425, + "learning_rate": 2.686582614248767e-05, + "loss": 2.4162, + "step": 15248 + }, + { + "epoch": 1.2306512791542248, + "grad_norm": 0.657293975353241, + "learning_rate": 2.6855060212975915e-05, + "loss": 2.4337, + "step": 15249 + }, + { + "epoch": 1.2307319828908079, + "grad_norm": 0.7136504650115967, + "learning_rate": 2.684429610642729e-05, + "loss": 2.4156, + "step": 15250 + }, + { + "epoch": 1.2308126866273907, + "grad_norm": 0.6564410924911499, + "learning_rate": 2.6833533823110013e-05, + "loss": 2.5101, + "step": 15251 + }, + { + "epoch": 1.2308933903639738, + "grad_norm": 0.6628747582435608, + "learning_rate": 2.682277336329233e-05, + "loss": 2.3933, + "step": 15252 + }, + { + "epoch": 1.230974094100557, + "grad_norm": 0.7362595796585083, + "learning_rate": 2.681201472724244e-05, + "loss": 2.4541, + "step": 15253 + }, + { + "epoch": 1.2310547978371398, + "grad_norm": 0.7604697346687317, + "learning_rate": 2.680125791522844e-05, + "loss": 2.4383, + "step": 15254 + }, + { + "epoch": 1.2311355015737229, + "grad_norm": 0.7128429412841797, + "learning_rate": 2.6790502927518434e-05, + "loss": 2.4492, + "step": 15255 + }, + { + "epoch": 1.231216205310306, + "grad_norm": 0.6761955618858337, + "learning_rate": 2.677974976438047e-05, + "loss": 2.4355, + "step": 15256 + }, + { + "epoch": 1.2312969090468888, + "grad_norm": 0.6687077879905701, + "learning_rate": 2.6768998426082538e-05, + "loss": 2.4317, + "step": 15257 + }, + { + "epoch": 1.231377612783472, + "grad_norm": 0.7423825860023499, + "learning_rate": 2.675824891289259e-05, + "loss": 2.4216, + "step": 15258 + }, + { + "epoch": 1.231458316520055, + "grad_norm": 0.671130359172821, + "learning_rate": 2.6747501225078542e-05, + "loss": 2.4775, + "step": 15259 + }, + { + "epoch": 1.2315390202566379, + "grad_norm": 0.7421461939811707, + "learning_rate": 2.6736755362908273e-05, + "loss": 2.4042, + "step": 15260 + }, + { + "epoch": 1.231619723993221, + "grad_norm": 0.7084131240844727, + "learning_rate": 2.6726011326649547e-05, + "loss": 2.4506, + "step": 15261 + }, + { + "epoch": 1.2317004277298038, + "grad_norm": 0.641852855682373, + "learning_rate": 2.671526911657015e-05, + "loss": 2.4261, + "step": 15262 + }, + { + "epoch": 1.231781131466387, + "grad_norm": 0.7627724409103394, + "learning_rate": 2.670452873293785e-05, + "loss": 2.4647, + "step": 15263 + }, + { + "epoch": 1.23186183520297, + "grad_norm": 0.6638163924217224, + "learning_rate": 2.669379017602026e-05, + "loss": 2.4208, + "step": 15264 + }, + { + "epoch": 1.2319425389395529, + "grad_norm": 0.6815361380577087, + "learning_rate": 2.668305344608505e-05, + "loss": 2.4404, + "step": 15265 + }, + { + "epoch": 1.232023242676136, + "grad_norm": 0.6466485857963562, + "learning_rate": 2.6672318543399823e-05, + "loss": 2.4327, + "step": 15266 + }, + { + "epoch": 1.2321039464127188, + "grad_norm": 0.7119305729866028, + "learning_rate": 2.6661585468232042e-05, + "loss": 2.4266, + "step": 15267 + }, + { + "epoch": 1.232184650149302, + "grad_norm": 0.7245718836784363, + "learning_rate": 2.6650854220849286e-05, + "loss": 2.4484, + "step": 15268 + }, + { + "epoch": 1.232265353885885, + "grad_norm": 0.7050287127494812, + "learning_rate": 2.6640124801518972e-05, + "loss": 2.4441, + "step": 15269 + }, + { + "epoch": 1.2323460576224678, + "grad_norm": 0.6906494498252869, + "learning_rate": 2.6629397210508556e-05, + "loss": 2.4297, + "step": 15270 + }, + { + "epoch": 1.232426761359051, + "grad_norm": 0.7224171757698059, + "learning_rate": 2.661867144808532e-05, + "loss": 2.4279, + "step": 15271 + }, + { + "epoch": 1.232507465095634, + "grad_norm": 0.688804030418396, + "learning_rate": 2.6607947514516606e-05, + "loss": 2.4741, + "step": 15272 + }, + { + "epoch": 1.232588168832217, + "grad_norm": 0.6462350487709045, + "learning_rate": 2.6597225410069726e-05, + "loss": 2.4499, + "step": 15273 + }, + { + "epoch": 1.2326688725688, + "grad_norm": 0.6860110759735107, + "learning_rate": 2.658650513501184e-05, + "loss": 2.4488, + "step": 15274 + }, + { + "epoch": 1.2327495763053828, + "grad_norm": 0.7158305644989014, + "learning_rate": 2.6575786689610138e-05, + "loss": 2.4318, + "step": 15275 + }, + { + "epoch": 1.232830280041966, + "grad_norm": 0.7740959525108337, + "learning_rate": 2.6565070074131804e-05, + "loss": 2.4824, + "step": 15276 + }, + { + "epoch": 1.232910983778549, + "grad_norm": 0.7573856711387634, + "learning_rate": 2.6554355288843847e-05, + "loss": 2.4034, + "step": 15277 + }, + { + "epoch": 1.2329916875151319, + "grad_norm": 0.6809369921684265, + "learning_rate": 2.654364233401332e-05, + "loss": 2.5085, + "step": 15278 + }, + { + "epoch": 1.233072391251715, + "grad_norm": 0.6695643067359924, + "learning_rate": 2.6532931209907307e-05, + "loss": 2.4697, + "step": 15279 + }, + { + "epoch": 1.2331530949882978, + "grad_norm": 0.7218750715255737, + "learning_rate": 2.6522221916792655e-05, + "loss": 2.4753, + "step": 15280 + }, + { + "epoch": 1.233233798724881, + "grad_norm": 0.8171822428703308, + "learning_rate": 2.6511514454936314e-05, + "loss": 2.45, + "step": 15281 + }, + { + "epoch": 1.233314502461464, + "grad_norm": 0.7234573364257812, + "learning_rate": 2.6500808824605162e-05, + "loss": 2.3963, + "step": 15282 + }, + { + "epoch": 1.2333952061980469, + "grad_norm": 0.6993409395217896, + "learning_rate": 2.6490105026065948e-05, + "loss": 2.4449, + "step": 15283 + }, + { + "epoch": 1.23347590993463, + "grad_norm": 0.7984449863433838, + "learning_rate": 2.6479403059585472e-05, + "loss": 2.4322, + "step": 15284 + }, + { + "epoch": 1.233556613671213, + "grad_norm": 0.683971107006073, + "learning_rate": 2.6468702925430466e-05, + "loss": 2.4125, + "step": 15285 + }, + { + "epoch": 1.233637317407796, + "grad_norm": 0.6739822626113892, + "learning_rate": 2.6458004623867617e-05, + "loss": 2.4487, + "step": 15286 + }, + { + "epoch": 1.233718021144379, + "grad_norm": 0.7003912925720215, + "learning_rate": 2.644730815516351e-05, + "loss": 2.4437, + "step": 15287 + }, + { + "epoch": 1.233798724880962, + "grad_norm": 0.7011744379997253, + "learning_rate": 2.643661351958474e-05, + "loss": 2.4798, + "step": 15288 + }, + { + "epoch": 1.233879428617545, + "grad_norm": 0.7003397941589355, + "learning_rate": 2.6425920717397867e-05, + "loss": 2.4554, + "step": 15289 + }, + { + "epoch": 1.233960132354128, + "grad_norm": 0.6682165265083313, + "learning_rate": 2.6415229748869374e-05, + "loss": 2.4252, + "step": 15290 + }, + { + "epoch": 1.234040836090711, + "grad_norm": 0.6712457537651062, + "learning_rate": 2.6404540614265715e-05, + "loss": 2.4225, + "step": 15291 + }, + { + "epoch": 1.234121539827294, + "grad_norm": 0.654464602470398, + "learning_rate": 2.63938533138533e-05, + "loss": 2.4462, + "step": 15292 + }, + { + "epoch": 1.234202243563877, + "grad_norm": 0.7311797738075256, + "learning_rate": 2.638316784789845e-05, + "loss": 2.502, + "step": 15293 + }, + { + "epoch": 1.23428294730046, + "grad_norm": 0.6836559176445007, + "learning_rate": 2.6372484216667492e-05, + "loss": 2.5134, + "step": 15294 + }, + { + "epoch": 1.234363651037043, + "grad_norm": 0.6961826086044312, + "learning_rate": 2.636180242042672e-05, + "loss": 2.4479, + "step": 15295 + }, + { + "epoch": 1.234444354773626, + "grad_norm": 0.6824259161949158, + "learning_rate": 2.635112245944229e-05, + "loss": 2.4299, + "step": 15296 + }, + { + "epoch": 1.234525058510209, + "grad_norm": 0.7594609260559082, + "learning_rate": 2.634044433398042e-05, + "loss": 2.4469, + "step": 15297 + }, + { + "epoch": 1.234605762246792, + "grad_norm": 0.7044653296470642, + "learning_rate": 2.632976804430721e-05, + "loss": 2.447, + "step": 15298 + }, + { + "epoch": 1.234686465983375, + "grad_norm": 0.6986916065216064, + "learning_rate": 2.631909359068876e-05, + "loss": 2.4705, + "step": 15299 + }, + { + "epoch": 1.234767169719958, + "grad_norm": 0.7025431990623474, + "learning_rate": 2.630842097339111e-05, + "loss": 2.3951, + "step": 15300 + }, + { + "epoch": 1.2348478734565411, + "grad_norm": 0.6533786058425903, + "learning_rate": 2.6297750192680237e-05, + "loss": 2.3769, + "step": 15301 + }, + { + "epoch": 1.234928577193124, + "grad_norm": 0.6575472354888916, + "learning_rate": 2.628708124882212e-05, + "loss": 2.4293, + "step": 15302 + }, + { + "epoch": 1.235009280929707, + "grad_norm": 0.6712046265602112, + "learning_rate": 2.6276414142082584e-05, + "loss": 2.4819, + "step": 15303 + }, + { + "epoch": 1.2350899846662902, + "grad_norm": 0.6947652101516724, + "learning_rate": 2.6265748872727535e-05, + "loss": 2.449, + "step": 15304 + }, + { + "epoch": 1.235170688402873, + "grad_norm": 0.6881443858146667, + "learning_rate": 2.62550854410228e-05, + "loss": 2.3991, + "step": 15305 + }, + { + "epoch": 1.2352513921394561, + "grad_norm": 0.6681519746780396, + "learning_rate": 2.624442384723407e-05, + "loss": 2.4005, + "step": 15306 + }, + { + "epoch": 1.235332095876039, + "grad_norm": 0.6728120446205139, + "learning_rate": 2.62337640916271e-05, + "loss": 2.4242, + "step": 15307 + }, + { + "epoch": 1.235412799612622, + "grad_norm": 0.707360029220581, + "learning_rate": 2.622310617446755e-05, + "loss": 2.4385, + "step": 15308 + }, + { + "epoch": 1.2354935033492052, + "grad_norm": 0.6890079975128174, + "learning_rate": 2.6212450096021058e-05, + "loss": 2.443, + "step": 15309 + }, + { + "epoch": 1.235574207085788, + "grad_norm": 0.7022379636764526, + "learning_rate": 2.620179585655318e-05, + "loss": 2.3982, + "step": 15310 + }, + { + "epoch": 1.235654910822371, + "grad_norm": 0.7283182740211487, + "learning_rate": 2.61911434563295e-05, + "loss": 2.4197, + "step": 15311 + }, + { + "epoch": 1.235735614558954, + "grad_norm": 0.6721852421760559, + "learning_rate": 2.6180492895615426e-05, + "loss": 2.4356, + "step": 15312 + }, + { + "epoch": 1.235816318295537, + "grad_norm": 0.6817916631698608, + "learning_rate": 2.616984417467645e-05, + "loss": 2.4325, + "step": 15313 + }, + { + "epoch": 1.2358970220321202, + "grad_norm": 0.6826596260070801, + "learning_rate": 2.6159197293777972e-05, + "loss": 2.4043, + "step": 15314 + }, + { + "epoch": 1.235977725768703, + "grad_norm": 0.7135530114173889, + "learning_rate": 2.6148552253185288e-05, + "loss": 2.4269, + "step": 15315 + }, + { + "epoch": 1.236058429505286, + "grad_norm": 0.7027753591537476, + "learning_rate": 2.6137909053163722e-05, + "loss": 2.4266, + "step": 15316 + }, + { + "epoch": 1.2361391332418692, + "grad_norm": 0.6597041487693787, + "learning_rate": 2.6127267693978552e-05, + "loss": 2.4073, + "step": 15317 + }, + { + "epoch": 1.236219836978452, + "grad_norm": 0.6450026631355286, + "learning_rate": 2.6116628175894974e-05, + "loss": 2.4299, + "step": 15318 + }, + { + "epoch": 1.2363005407150351, + "grad_norm": 0.7740476727485657, + "learning_rate": 2.6105990499178156e-05, + "loss": 2.4088, + "step": 15319 + }, + { + "epoch": 1.2363812444516182, + "grad_norm": 0.6460183262825012, + "learning_rate": 2.609535466409322e-05, + "loss": 2.4311, + "step": 15320 + }, + { + "epoch": 1.236461948188201, + "grad_norm": 0.6514838337898254, + "learning_rate": 2.608472067090525e-05, + "loss": 2.4069, + "step": 15321 + }, + { + "epoch": 1.2365426519247842, + "grad_norm": 0.7281234860420227, + "learning_rate": 2.6074088519879237e-05, + "loss": 2.4245, + "step": 15322 + }, + { + "epoch": 1.236623355661367, + "grad_norm": 0.752983570098877, + "learning_rate": 2.606345821128018e-05, + "loss": 2.4149, + "step": 15323 + }, + { + "epoch": 1.2367040593979501, + "grad_norm": 0.6912856101989746, + "learning_rate": 2.6052829745373054e-05, + "loss": 2.4489, + "step": 15324 + }, + { + "epoch": 1.236784763134533, + "grad_norm": 0.6719293594360352, + "learning_rate": 2.604220312242267e-05, + "loss": 2.457, + "step": 15325 + }, + { + "epoch": 1.236865466871116, + "grad_norm": 0.7440586090087891, + "learning_rate": 2.6031578342693918e-05, + "loss": 2.4657, + "step": 15326 + }, + { + "epoch": 1.2369461706076992, + "grad_norm": 0.694442629814148, + "learning_rate": 2.602095540645162e-05, + "loss": 2.4422, + "step": 15327 + }, + { + "epoch": 1.237026874344282, + "grad_norm": 0.7186843752861023, + "learning_rate": 2.601033431396046e-05, + "loss": 2.4229, + "step": 15328 + }, + { + "epoch": 1.2371075780808651, + "grad_norm": 0.7401825785636902, + "learning_rate": 2.5999715065485153e-05, + "loss": 2.45, + "step": 15329 + }, + { + "epoch": 1.2371882818174482, + "grad_norm": 0.6710138916969299, + "learning_rate": 2.598909766129045e-05, + "loss": 2.4074, + "step": 15330 + }, + { + "epoch": 1.237268985554031, + "grad_norm": 0.7867769598960876, + "learning_rate": 2.5978482101640867e-05, + "loss": 2.4709, + "step": 15331 + }, + { + "epoch": 1.2373496892906142, + "grad_norm": 0.7076219916343689, + "learning_rate": 2.5967868386801e-05, + "loss": 2.4887, + "step": 15332 + }, + { + "epoch": 1.2374303930271973, + "grad_norm": 0.7277626991271973, + "learning_rate": 2.5957256517035378e-05, + "loss": 2.4295, + "step": 15333 + }, + { + "epoch": 1.2375110967637801, + "grad_norm": 0.7339804768562317, + "learning_rate": 2.5946646492608506e-05, + "loss": 2.4624, + "step": 15334 + }, + { + "epoch": 1.2375918005003632, + "grad_norm": 0.6707656383514404, + "learning_rate": 2.593603831378475e-05, + "loss": 2.4159, + "step": 15335 + }, + { + "epoch": 1.237672504236946, + "grad_norm": 0.7118813991546631, + "learning_rate": 2.592543198082852e-05, + "loss": 2.4496, + "step": 15336 + }, + { + "epoch": 1.2377532079735292, + "grad_norm": 0.675167977809906, + "learning_rate": 2.591482749400419e-05, + "loss": 2.4519, + "step": 15337 + }, + { + "epoch": 1.2378339117101123, + "grad_norm": 0.8245306611061096, + "learning_rate": 2.5904224853575986e-05, + "loss": 2.4732, + "step": 15338 + }, + { + "epoch": 1.2379146154466951, + "grad_norm": 0.7411863207817078, + "learning_rate": 2.5893624059808184e-05, + "loss": 2.4458, + "step": 15339 + }, + { + "epoch": 1.2379953191832782, + "grad_norm": 0.6864522695541382, + "learning_rate": 2.5883025112964997e-05, + "loss": 2.4264, + "step": 15340 + }, + { + "epoch": 1.238076022919861, + "grad_norm": 0.6585919260978699, + "learning_rate": 2.5872428013310567e-05, + "loss": 2.3904, + "step": 15341 + }, + { + "epoch": 1.2381567266564442, + "grad_norm": 0.6605508327484131, + "learning_rate": 2.5861832761108995e-05, + "loss": 2.4828, + "step": 15342 + }, + { + "epoch": 1.2382374303930272, + "grad_norm": 0.7353223562240601, + "learning_rate": 2.5851239356624392e-05, + "loss": 2.4335, + "step": 15343 + }, + { + "epoch": 1.2383181341296101, + "grad_norm": 0.6907783150672913, + "learning_rate": 2.5840647800120688e-05, + "loss": 2.4394, + "step": 15344 + }, + { + "epoch": 1.2383988378661932, + "grad_norm": 0.7239590287208557, + "learning_rate": 2.5830058091861896e-05, + "loss": 2.4221, + "step": 15345 + }, + { + "epoch": 1.2384795416027763, + "grad_norm": 0.7001412510871887, + "learning_rate": 2.5819470232111975e-05, + "loss": 2.4521, + "step": 15346 + }, + { + "epoch": 1.2385602453393592, + "grad_norm": 0.6983658671379089, + "learning_rate": 2.580888422113473e-05, + "loss": 2.4839, + "step": 15347 + }, + { + "epoch": 1.2386409490759422, + "grad_norm": 0.7829005718231201, + "learning_rate": 2.5798300059194037e-05, + "loss": 2.4546, + "step": 15348 + }, + { + "epoch": 1.2387216528125253, + "grad_norm": 0.7248061299324036, + "learning_rate": 2.5787717746553664e-05, + "loss": 2.4341, + "step": 15349 + }, + { + "epoch": 1.2388023565491082, + "grad_norm": 0.7921163439750671, + "learning_rate": 2.577713728347736e-05, + "loss": 2.475, + "step": 15350 + }, + { + "epoch": 1.2388830602856913, + "grad_norm": 0.6571238040924072, + "learning_rate": 2.5766558670228813e-05, + "loss": 2.4636, + "step": 15351 + }, + { + "epoch": 1.2389637640222741, + "grad_norm": 0.7436683177947998, + "learning_rate": 2.575598190707168e-05, + "loss": 2.4868, + "step": 15352 + }, + { + "epoch": 1.2390444677588572, + "grad_norm": 0.6471900939941406, + "learning_rate": 2.5745406994269573e-05, + "loss": 2.4349, + "step": 15353 + }, + { + "epoch": 1.2391251714954403, + "grad_norm": 0.6612011194229126, + "learning_rate": 2.5734833932086012e-05, + "loss": 2.4088, + "step": 15354 + }, + { + "epoch": 1.2392058752320232, + "grad_norm": 0.6882977485656738, + "learning_rate": 2.572426272078451e-05, + "loss": 2.4344, + "step": 15355 + }, + { + "epoch": 1.2392865789686063, + "grad_norm": 0.6836830973625183, + "learning_rate": 2.5713693360628565e-05, + "loss": 2.4325, + "step": 15356 + }, + { + "epoch": 1.2393672827051891, + "grad_norm": 0.712127149105072, + "learning_rate": 2.5703125851881536e-05, + "loss": 2.4505, + "step": 15357 + }, + { + "epoch": 1.2394479864417722, + "grad_norm": 0.7162468433380127, + "learning_rate": 2.5692560194806837e-05, + "loss": 2.4167, + "step": 15358 + }, + { + "epoch": 1.2395286901783553, + "grad_norm": 0.7770177125930786, + "learning_rate": 2.568199638966777e-05, + "loss": 2.4072, + "step": 15359 + }, + { + "epoch": 1.2396093939149382, + "grad_norm": 0.7049651741981506, + "learning_rate": 2.5671434436727636e-05, + "loss": 2.434, + "step": 15360 + }, + { + "epoch": 1.2396900976515213, + "grad_norm": 0.7793349027633667, + "learning_rate": 2.566087433624964e-05, + "loss": 2.4762, + "step": 15361 + }, + { + "epoch": 1.2397708013881044, + "grad_norm": 0.6776690483093262, + "learning_rate": 2.5650316088497018e-05, + "loss": 2.402, + "step": 15362 + }, + { + "epoch": 1.2398515051246872, + "grad_norm": 0.7207701802253723, + "learning_rate": 2.5639759693732834e-05, + "loss": 2.4398, + "step": 15363 + }, + { + "epoch": 1.2399322088612703, + "grad_norm": 0.759787917137146, + "learning_rate": 2.5629205152220215e-05, + "loss": 2.4268, + "step": 15364 + }, + { + "epoch": 1.2400129125978534, + "grad_norm": 0.6906142830848694, + "learning_rate": 2.5618652464222215e-05, + "loss": 2.4075, + "step": 15365 + }, + { + "epoch": 1.2400936163344363, + "grad_norm": 0.7002954483032227, + "learning_rate": 2.560810163000187e-05, + "loss": 2.4516, + "step": 15366 + }, + { + "epoch": 1.2401743200710194, + "grad_norm": 0.7287559509277344, + "learning_rate": 2.5597552649822053e-05, + "loss": 2.4975, + "step": 15367 + }, + { + "epoch": 1.2402550238076022, + "grad_norm": 0.6523926854133606, + "learning_rate": 2.558700552394572e-05, + "loss": 2.4085, + "step": 15368 + }, + { + "epoch": 1.2403357275441853, + "grad_norm": 0.7289387583732605, + "learning_rate": 2.5576460252635727e-05, + "loss": 2.4789, + "step": 15369 + }, + { + "epoch": 1.2404164312807684, + "grad_norm": 0.6613432765007019, + "learning_rate": 2.5565916836154878e-05, + "loss": 2.4263, + "step": 15370 + }, + { + "epoch": 1.2404971350173513, + "grad_norm": 0.7275245785713196, + "learning_rate": 2.555537527476597e-05, + "loss": 2.4652, + "step": 15371 + }, + { + "epoch": 1.2405778387539343, + "grad_norm": 0.6726976037025452, + "learning_rate": 2.554483556873173e-05, + "loss": 2.4092, + "step": 15372 + }, + { + "epoch": 1.2406585424905172, + "grad_norm": 0.6908233761787415, + "learning_rate": 2.5534297718314794e-05, + "loss": 2.3678, + "step": 15373 + }, + { + "epoch": 1.2407392462271003, + "grad_norm": 0.6893147826194763, + "learning_rate": 2.5523761723777806e-05, + "loss": 2.4625, + "step": 15374 + }, + { + "epoch": 1.2408199499636834, + "grad_norm": 0.7640267014503479, + "learning_rate": 2.551322758538339e-05, + "loss": 2.446, + "step": 15375 + }, + { + "epoch": 1.2409006537002663, + "grad_norm": 0.7187458276748657, + "learning_rate": 2.550269530339402e-05, + "loss": 2.4215, + "step": 15376 + }, + { + "epoch": 1.2409813574368493, + "grad_norm": 0.8041789531707764, + "learning_rate": 2.5492164878072234e-05, + "loss": 2.5085, + "step": 15377 + }, + { + "epoch": 1.2410620611734324, + "grad_norm": 0.6582188010215759, + "learning_rate": 2.5481636309680445e-05, + "loss": 2.467, + "step": 15378 + }, + { + "epoch": 1.2411427649100153, + "grad_norm": 0.705731213092804, + "learning_rate": 2.5471109598481112e-05, + "loss": 2.3764, + "step": 15379 + }, + { + "epoch": 1.2412234686465984, + "grad_norm": 0.6918940544128418, + "learning_rate": 2.5460584744736495e-05, + "loss": 2.4513, + "step": 15380 + }, + { + "epoch": 1.2413041723831812, + "grad_norm": 0.7402673959732056, + "learning_rate": 2.5450061748708975e-05, + "loss": 2.5133, + "step": 15381 + }, + { + "epoch": 1.2413848761197643, + "grad_norm": 0.6740667223930359, + "learning_rate": 2.543954061066083e-05, + "loss": 2.4649, + "step": 15382 + }, + { + "epoch": 1.2414655798563474, + "grad_norm": 0.6665407419204712, + "learning_rate": 2.5429021330854197e-05, + "loss": 2.4321, + "step": 15383 + }, + { + "epoch": 1.2415462835929303, + "grad_norm": 0.7324530482292175, + "learning_rate": 2.5418503909551296e-05, + "loss": 2.3574, + "step": 15384 + }, + { + "epoch": 1.2416269873295134, + "grad_norm": 0.7117868661880493, + "learning_rate": 2.5407988347014255e-05, + "loss": 2.4552, + "step": 15385 + }, + { + "epoch": 1.2417076910660962, + "grad_norm": 0.7162930965423584, + "learning_rate": 2.5397474643505103e-05, + "loss": 2.4135, + "step": 15386 + }, + { + "epoch": 1.2417883948026793, + "grad_norm": 0.7301257848739624, + "learning_rate": 2.5386962799285895e-05, + "loss": 2.4277, + "step": 15387 + }, + { + "epoch": 1.2418690985392624, + "grad_norm": 0.7404977679252625, + "learning_rate": 2.5376452814618645e-05, + "loss": 2.478, + "step": 15388 + }, + { + "epoch": 1.2419498022758453, + "grad_norm": 0.6546272039413452, + "learning_rate": 2.536594468976522e-05, + "loss": 2.4879, + "step": 15389 + }, + { + "epoch": 1.2420305060124284, + "grad_norm": 0.6501599550247192, + "learning_rate": 2.5355438424987565e-05, + "loss": 2.3964, + "step": 15390 + }, + { + "epoch": 1.2421112097490115, + "grad_norm": 0.6711748242378235, + "learning_rate": 2.5344934020547496e-05, + "loss": 2.4123, + "step": 15391 + }, + { + "epoch": 1.2421919134855943, + "grad_norm": 0.6803534030914307, + "learning_rate": 2.5334431476706823e-05, + "loss": 2.4271, + "step": 15392 + }, + { + "epoch": 1.2422726172221774, + "grad_norm": 0.7407296299934387, + "learning_rate": 2.5323930793727302e-05, + "loss": 2.49, + "step": 15393 + }, + { + "epoch": 1.2423533209587605, + "grad_norm": 0.701870858669281, + "learning_rate": 2.5313431971870617e-05, + "loss": 2.4534, + "step": 15394 + }, + { + "epoch": 1.2424340246953434, + "grad_norm": 0.6658090353012085, + "learning_rate": 2.5302935011398475e-05, + "loss": 2.4581, + "step": 15395 + }, + { + "epoch": 1.2425147284319265, + "grad_norm": 0.6616473197937012, + "learning_rate": 2.529243991257243e-05, + "loss": 2.4169, + "step": 15396 + }, + { + "epoch": 1.2425954321685093, + "grad_norm": 0.6714773178100586, + "learning_rate": 2.5281946675654067e-05, + "loss": 2.4159, + "step": 15397 + }, + { + "epoch": 1.2426761359050924, + "grad_norm": 0.6789337396621704, + "learning_rate": 2.5271455300904935e-05, + "loss": 2.4211, + "step": 15398 + }, + { + "epoch": 1.2427568396416755, + "grad_norm": 0.6793739795684814, + "learning_rate": 2.5260965788586456e-05, + "loss": 2.4337, + "step": 15399 + }, + { + "epoch": 1.2428375433782584, + "grad_norm": 0.6432294249534607, + "learning_rate": 2.5250478138960076e-05, + "loss": 2.4268, + "step": 15400 + }, + { + "epoch": 1.2429182471148414, + "grad_norm": 0.6960669159889221, + "learning_rate": 2.523999235228718e-05, + "loss": 2.3535, + "step": 15401 + }, + { + "epoch": 1.2429989508514243, + "grad_norm": 0.6724488735198975, + "learning_rate": 2.5229508428829096e-05, + "loss": 2.4294, + "step": 15402 + }, + { + "epoch": 1.2430796545880074, + "grad_norm": 0.636105477809906, + "learning_rate": 2.521902636884711e-05, + "loss": 2.4438, + "step": 15403 + }, + { + "epoch": 1.2431603583245905, + "grad_norm": 0.6865580677986145, + "learning_rate": 2.52085461726025e-05, + "loss": 2.4473, + "step": 15404 + }, + { + "epoch": 1.2432410620611734, + "grad_norm": 0.6740261316299438, + "learning_rate": 2.5198067840356398e-05, + "loss": 2.4642, + "step": 15405 + }, + { + "epoch": 1.2433217657977564, + "grad_norm": 0.7241789698600769, + "learning_rate": 2.518759137236998e-05, + "loss": 2.4294, + "step": 15406 + }, + { + "epoch": 1.2434024695343395, + "grad_norm": 0.6839794516563416, + "learning_rate": 2.5177116768904373e-05, + "loss": 2.4697, + "step": 15407 + }, + { + "epoch": 1.2434831732709224, + "grad_norm": 0.677390992641449, + "learning_rate": 2.5166644030220578e-05, + "loss": 2.4411, + "step": 15408 + }, + { + "epoch": 1.2435638770075055, + "grad_norm": 0.709065854549408, + "learning_rate": 2.515617315657962e-05, + "loss": 2.4392, + "step": 15409 + }, + { + "epoch": 1.2436445807440886, + "grad_norm": 0.6735498905181885, + "learning_rate": 2.514570414824249e-05, + "loss": 2.3924, + "step": 15410 + }, + { + "epoch": 1.2437252844806714, + "grad_norm": 0.6729374527931213, + "learning_rate": 2.513523700547007e-05, + "loss": 2.4464, + "step": 15411 + }, + { + "epoch": 1.2438059882172545, + "grad_norm": 0.7232720851898193, + "learning_rate": 2.5124771728523244e-05, + "loss": 2.3975, + "step": 15412 + }, + { + "epoch": 1.2438866919538374, + "grad_norm": 0.7467584609985352, + "learning_rate": 2.5114308317662837e-05, + "loss": 2.4191, + "step": 15413 + }, + { + "epoch": 1.2439673956904205, + "grad_norm": 0.6951141953468323, + "learning_rate": 2.5103846773149642e-05, + "loss": 2.4207, + "step": 15414 + }, + { + "epoch": 1.2440480994270036, + "grad_norm": 0.6427489519119263, + "learning_rate": 2.5093387095244336e-05, + "loss": 2.3539, + "step": 15415 + }, + { + "epoch": 1.2441288031635864, + "grad_norm": 0.729580283164978, + "learning_rate": 2.5082929284207644e-05, + "loss": 2.4464, + "step": 15416 + }, + { + "epoch": 1.2442095069001695, + "grad_norm": 0.7247009873390198, + "learning_rate": 2.5072473340300207e-05, + "loss": 2.4294, + "step": 15417 + }, + { + "epoch": 1.2442902106367524, + "grad_norm": 0.7037674784660339, + "learning_rate": 2.5062019263782577e-05, + "loss": 2.4294, + "step": 15418 + }, + { + "epoch": 1.2443709143733355, + "grad_norm": 0.6997841596603394, + "learning_rate": 2.5051567054915303e-05, + "loss": 2.4976, + "step": 15419 + }, + { + "epoch": 1.2444516181099186, + "grad_norm": 0.7001172304153442, + "learning_rate": 2.504111671395891e-05, + "loss": 2.371, + "step": 15420 + }, + { + "epoch": 1.2445323218465014, + "grad_norm": 0.6781473159790039, + "learning_rate": 2.5030668241173827e-05, + "loss": 2.4124, + "step": 15421 + }, + { + "epoch": 1.2446130255830845, + "grad_norm": 0.7053182125091553, + "learning_rate": 2.5020221636820463e-05, + "loss": 2.4109, + "step": 15422 + }, + { + "epoch": 1.2446937293196676, + "grad_norm": 0.68635493516922, + "learning_rate": 2.50097769011592e-05, + "loss": 2.4548, + "step": 15423 + }, + { + "epoch": 1.2447744330562505, + "grad_norm": 0.7015564441680908, + "learning_rate": 2.4999334034450293e-05, + "loss": 2.4537, + "step": 15424 + }, + { + "epoch": 1.2448551367928335, + "grad_norm": 0.694054901599884, + "learning_rate": 2.4988893036954043e-05, + "loss": 2.4396, + "step": 15425 + }, + { + "epoch": 1.2449358405294164, + "grad_norm": 0.702518880367279, + "learning_rate": 2.4978453908930665e-05, + "loss": 2.4015, + "step": 15426 + }, + { + "epoch": 1.2450165442659995, + "grad_norm": 0.7237387895584106, + "learning_rate": 2.4968016650640348e-05, + "loss": 2.4257, + "step": 15427 + }, + { + "epoch": 1.2450972480025826, + "grad_norm": 0.7133163809776306, + "learning_rate": 2.4957581262343154e-05, + "loss": 2.4532, + "step": 15428 + }, + { + "epoch": 1.2451779517391655, + "grad_norm": 0.8339287042617798, + "learning_rate": 2.4947147744299203e-05, + "loss": 2.4621, + "step": 15429 + }, + { + "epoch": 1.2452586554757485, + "grad_norm": 0.7620034217834473, + "learning_rate": 2.493671609676852e-05, + "loss": 2.365, + "step": 15430 + }, + { + "epoch": 1.2453393592123314, + "grad_norm": 0.7445465922355652, + "learning_rate": 2.4926286320011094e-05, + "loss": 2.4764, + "step": 15431 + }, + { + "epoch": 1.2454200629489145, + "grad_norm": 0.7366160154342651, + "learning_rate": 2.4915858414286852e-05, + "loss": 2.4597, + "step": 15432 + }, + { + "epoch": 1.2455007666854976, + "grad_norm": 0.7098437547683716, + "learning_rate": 2.490543237985572e-05, + "loss": 2.4202, + "step": 15433 + }, + { + "epoch": 1.2455814704220805, + "grad_norm": 0.6483333706855774, + "learning_rate": 2.4895008216977478e-05, + "loss": 2.4108, + "step": 15434 + }, + { + "epoch": 1.2456621741586635, + "grad_norm": 0.6797904968261719, + "learning_rate": 2.4884585925911963e-05, + "loss": 2.4414, + "step": 15435 + }, + { + "epoch": 1.2457428778952466, + "grad_norm": 0.6853424310684204, + "learning_rate": 2.4874165506918957e-05, + "loss": 2.4226, + "step": 15436 + }, + { + "epoch": 1.2458235816318295, + "grad_norm": 0.6861590147018433, + "learning_rate": 2.4863746960258094e-05, + "loss": 2.3748, + "step": 15437 + }, + { + "epoch": 1.2459042853684126, + "grad_norm": 0.7360263466835022, + "learning_rate": 2.4853330286189058e-05, + "loss": 2.4441, + "step": 15438 + }, + { + "epoch": 1.2459849891049957, + "grad_norm": 0.6894183158874512, + "learning_rate": 2.4842915484971496e-05, + "loss": 2.3495, + "step": 15439 + }, + { + "epoch": 1.2460656928415785, + "grad_norm": 0.7570669651031494, + "learning_rate": 2.4832502556864923e-05, + "loss": 2.4622, + "step": 15440 + }, + { + "epoch": 1.2461463965781616, + "grad_norm": 0.6986069083213806, + "learning_rate": 2.4822091502128876e-05, + "loss": 2.3647, + "step": 15441 + }, + { + "epoch": 1.2462271003147445, + "grad_norm": 0.681450366973877, + "learning_rate": 2.481168232102279e-05, + "loss": 2.3872, + "step": 15442 + }, + { + "epoch": 1.2463078040513276, + "grad_norm": 0.7241837978363037, + "learning_rate": 2.480127501380618e-05, + "loss": 2.4692, + "step": 15443 + }, + { + "epoch": 1.2463885077879107, + "grad_norm": 0.6575295329093933, + "learning_rate": 2.479086958073834e-05, + "loss": 2.5057, + "step": 15444 + }, + { + "epoch": 1.2464692115244935, + "grad_norm": 0.7289770841598511, + "learning_rate": 2.478046602207864e-05, + "loss": 2.4164, + "step": 15445 + }, + { + "epoch": 1.2465499152610766, + "grad_norm": 0.6682024598121643, + "learning_rate": 2.4770064338086374e-05, + "loss": 2.4466, + "step": 15446 + }, + { + "epoch": 1.2466306189976595, + "grad_norm": 0.7238918542861938, + "learning_rate": 2.475966452902072e-05, + "loss": 2.4367, + "step": 15447 + }, + { + "epoch": 1.2467113227342426, + "grad_norm": 0.6825705170631409, + "learning_rate": 2.4749266595140918e-05, + "loss": 2.4337, + "step": 15448 + }, + { + "epoch": 1.2467920264708257, + "grad_norm": 0.7352269887924194, + "learning_rate": 2.4738870536706126e-05, + "loss": 2.4103, + "step": 15449 + }, + { + "epoch": 1.2468727302074085, + "grad_norm": 0.658930778503418, + "learning_rate": 2.4728476353975394e-05, + "loss": 2.4281, + "step": 15450 + }, + { + "epoch": 1.2469534339439916, + "grad_norm": 0.6933601498603821, + "learning_rate": 2.4718084047207778e-05, + "loss": 2.4502, + "step": 15451 + }, + { + "epoch": 1.2470341376805747, + "grad_norm": 0.6901879906654358, + "learning_rate": 2.4707693616662308e-05, + "loss": 2.4057, + "step": 15452 + }, + { + "epoch": 1.2471148414171576, + "grad_norm": 0.7648913860321045, + "learning_rate": 2.469730506259792e-05, + "loss": 2.4163, + "step": 15453 + }, + { + "epoch": 1.2471955451537406, + "grad_norm": 0.6496175527572632, + "learning_rate": 2.4686918385273537e-05, + "loss": 2.4373, + "step": 15454 + }, + { + "epoch": 1.2472762488903237, + "grad_norm": 0.6949105858802795, + "learning_rate": 2.4676533584948048e-05, + "loss": 2.4108, + "step": 15455 + }, + { + "epoch": 1.2473569526269066, + "grad_norm": 0.7018688321113586, + "learning_rate": 2.4666150661880206e-05, + "loss": 2.4589, + "step": 15456 + }, + { + "epoch": 1.2474376563634897, + "grad_norm": 0.7141219973564148, + "learning_rate": 2.4655769616328827e-05, + "loss": 2.4022, + "step": 15457 + }, + { + "epoch": 1.2475183601000726, + "grad_norm": 0.7276743054389954, + "learning_rate": 2.4645390448552608e-05, + "loss": 2.4443, + "step": 15458 + }, + { + "epoch": 1.2475990638366556, + "grad_norm": 0.6861153244972229, + "learning_rate": 2.463501315881027e-05, + "loss": 2.4478, + "step": 15459 + }, + { + "epoch": 1.2476797675732387, + "grad_norm": 0.7252256274223328, + "learning_rate": 2.462463774736038e-05, + "loss": 2.446, + "step": 15460 + }, + { + "epoch": 1.2477604713098216, + "grad_norm": 0.6914857625961304, + "learning_rate": 2.4614264214461557e-05, + "loss": 2.4294, + "step": 15461 + }, + { + "epoch": 1.2478411750464047, + "grad_norm": 0.6815036535263062, + "learning_rate": 2.460389256037232e-05, + "loss": 2.4389, + "step": 15462 + }, + { + "epoch": 1.2479218787829875, + "grad_norm": 0.7420194745063782, + "learning_rate": 2.4593522785351176e-05, + "loss": 2.4932, + "step": 15463 + }, + { + "epoch": 1.2480025825195706, + "grad_norm": 0.6622182130813599, + "learning_rate": 2.4583154889656556e-05, + "loss": 2.4327, + "step": 15464 + }, + { + "epoch": 1.2480832862561537, + "grad_norm": 0.6527934074401855, + "learning_rate": 2.457278887354689e-05, + "loss": 2.3857, + "step": 15465 + }, + { + "epoch": 1.2481639899927366, + "grad_norm": 0.6942344903945923, + "learning_rate": 2.4562424737280465e-05, + "loss": 2.4181, + "step": 15466 + }, + { + "epoch": 1.2482446937293197, + "grad_norm": 0.7449823021888733, + "learning_rate": 2.45520624811156e-05, + "loss": 2.4575, + "step": 15467 + }, + { + "epoch": 1.2483253974659028, + "grad_norm": 0.6905208826065063, + "learning_rate": 2.4541702105310605e-05, + "loss": 2.3858, + "step": 15468 + }, + { + "epoch": 1.2484061012024856, + "grad_norm": 0.6928502917289734, + "learning_rate": 2.4531343610123603e-05, + "loss": 2.4212, + "step": 15469 + }, + { + "epoch": 1.2484868049390687, + "grad_norm": 0.7182145118713379, + "learning_rate": 2.45209869958128e-05, + "loss": 2.4063, + "step": 15470 + }, + { + "epoch": 1.2485675086756518, + "grad_norm": 0.7379452586174011, + "learning_rate": 2.4510632262636314e-05, + "loss": 2.4612, + "step": 15471 + }, + { + "epoch": 1.2486482124122347, + "grad_norm": 0.6663349270820618, + "learning_rate": 2.450027941085219e-05, + "loss": 2.4583, + "step": 15472 + }, + { + "epoch": 1.2487289161488178, + "grad_norm": 0.7266560792922974, + "learning_rate": 2.4489928440718467e-05, + "loss": 2.4483, + "step": 15473 + }, + { + "epoch": 1.2488096198854006, + "grad_norm": 0.7046550512313843, + "learning_rate": 2.447957935249311e-05, + "loss": 2.4087, + "step": 15474 + }, + { + "epoch": 1.2488903236219837, + "grad_norm": 0.684248685836792, + "learning_rate": 2.4469232146434084e-05, + "loss": 2.4352, + "step": 15475 + }, + { + "epoch": 1.2489710273585668, + "grad_norm": 0.6864973902702332, + "learning_rate": 2.4458886822799198e-05, + "loss": 2.3872, + "step": 15476 + }, + { + "epoch": 1.2490517310951497, + "grad_norm": 0.6964752674102783, + "learning_rate": 2.444854338184631e-05, + "loss": 2.437, + "step": 15477 + }, + { + "epoch": 1.2491324348317328, + "grad_norm": 0.6755973100662231, + "learning_rate": 2.4438201823833252e-05, + "loss": 2.4302, + "step": 15478 + }, + { + "epoch": 1.2492131385683156, + "grad_norm": 0.6434857249259949, + "learning_rate": 2.44278621490177e-05, + "loss": 2.406, + "step": 15479 + }, + { + "epoch": 1.2492938423048987, + "grad_norm": 0.7342328429222107, + "learning_rate": 2.441752435765736e-05, + "loss": 2.451, + "step": 15480 + }, + { + "epoch": 1.2493745460414818, + "grad_norm": 0.7486860752105713, + "learning_rate": 2.44071884500099e-05, + "loss": 2.4536, + "step": 15481 + }, + { + "epoch": 1.2494552497780647, + "grad_norm": 0.7274537086486816, + "learning_rate": 2.4396854426332903e-05, + "loss": 2.4599, + "step": 15482 + }, + { + "epoch": 1.2495359535146477, + "grad_norm": 0.7580124735832214, + "learning_rate": 2.4386522286883918e-05, + "loss": 2.4038, + "step": 15483 + }, + { + "epoch": 1.2496166572512308, + "grad_norm": 0.6776975393295288, + "learning_rate": 2.4376192031920488e-05, + "loss": 2.4246, + "step": 15484 + }, + { + "epoch": 1.2496973609878137, + "grad_norm": 0.6899511814117432, + "learning_rate": 2.4365863661699996e-05, + "loss": 2.3922, + "step": 15485 + }, + { + "epoch": 1.2497780647243968, + "grad_norm": 0.7487930059432983, + "learning_rate": 2.4355537176479903e-05, + "loss": 2.4573, + "step": 15486 + }, + { + "epoch": 1.2498587684609797, + "grad_norm": 0.7306599617004395, + "learning_rate": 2.4345212576517575e-05, + "loss": 2.4745, + "step": 15487 + }, + { + "epoch": 1.2499394721975627, + "grad_norm": 0.7152543067932129, + "learning_rate": 2.43348898620703e-05, + "loss": 2.4768, + "step": 15488 + }, + { + "epoch": 1.2500201759341458, + "grad_norm": 0.6576277017593384, + "learning_rate": 2.432456903339535e-05, + "loss": 2.4289, + "step": 15489 + }, + { + "epoch": 1.2501008796707287, + "grad_norm": 0.6974572539329529, + "learning_rate": 2.4314250090749956e-05, + "loss": 2.4218, + "step": 15490 + }, + { + "epoch": 1.2501815834073118, + "grad_norm": 0.7869577407836914, + "learning_rate": 2.4303933034391323e-05, + "loss": 2.3899, + "step": 15491 + }, + { + "epoch": 1.2502622871438946, + "grad_norm": 0.6723129749298096, + "learning_rate": 2.42936178645765e-05, + "loss": 2.4238, + "step": 15492 + }, + { + "epoch": 1.2503429908804777, + "grad_norm": 0.6839526891708374, + "learning_rate": 2.428330458156265e-05, + "loss": 2.4037, + "step": 15493 + }, + { + "epoch": 1.2504236946170608, + "grad_norm": 0.6866093277931213, + "learning_rate": 2.4272993185606796e-05, + "loss": 2.4228, + "step": 15494 + }, + { + "epoch": 1.2505043983536437, + "grad_norm": 0.6992947459220886, + "learning_rate": 2.426268367696588e-05, + "loss": 2.4248, + "step": 15495 + }, + { + "epoch": 1.2505851020902268, + "grad_norm": 0.6836698651313782, + "learning_rate": 2.4252376055896862e-05, + "loss": 2.5387, + "step": 15496 + }, + { + "epoch": 1.2506658058268099, + "grad_norm": 0.6990752816200256, + "learning_rate": 2.4242070322656663e-05, + "loss": 2.4438, + "step": 15497 + }, + { + "epoch": 1.2507465095633927, + "grad_norm": 0.7143029570579529, + "learning_rate": 2.4231766477502082e-05, + "loss": 2.4, + "step": 15498 + }, + { + "epoch": 1.2508272132999758, + "grad_norm": 0.6585043668746948, + "learning_rate": 2.422146452068994e-05, + "loss": 2.4256, + "step": 15499 + }, + { + "epoch": 1.250907917036559, + "grad_norm": 0.739107072353363, + "learning_rate": 2.421116445247702e-05, + "loss": 2.428, + "step": 15500 + }, + { + "epoch": 1.2509886207731418, + "grad_norm": 0.6675287485122681, + "learning_rate": 2.420086627311997e-05, + "loss": 2.5095, + "step": 15501 + }, + { + "epoch": 1.2510693245097249, + "grad_norm": 0.7133405804634094, + "learning_rate": 2.4190569982875467e-05, + "loss": 2.4719, + "step": 15502 + }, + { + "epoch": 1.2511500282463077, + "grad_norm": 0.710904061794281, + "learning_rate": 2.4180275582000134e-05, + "loss": 2.4449, + "step": 15503 + }, + { + "epoch": 1.2512307319828908, + "grad_norm": 0.7088729739189148, + "learning_rate": 2.4169983070750525e-05, + "loss": 2.4059, + "step": 15504 + }, + { + "epoch": 1.2513114357194737, + "grad_norm": 0.7187358736991882, + "learning_rate": 2.4159692449383152e-05, + "loss": 2.4577, + "step": 15505 + }, + { + "epoch": 1.2513921394560568, + "grad_norm": 0.7531955242156982, + "learning_rate": 2.4149403718154497e-05, + "loss": 2.4101, + "step": 15506 + }, + { + "epoch": 1.2514728431926398, + "grad_norm": 0.7565199136734009, + "learning_rate": 2.413911687732101e-05, + "loss": 2.4805, + "step": 15507 + }, + { + "epoch": 1.2515535469292227, + "grad_norm": 0.706471860408783, + "learning_rate": 2.4128831927139008e-05, + "loss": 2.4494, + "step": 15508 + }, + { + "epoch": 1.2516342506658058, + "grad_norm": 0.7022314667701721, + "learning_rate": 2.4118548867864832e-05, + "loss": 2.4442, + "step": 15509 + }, + { + "epoch": 1.251714954402389, + "grad_norm": 0.6885591745376587, + "learning_rate": 2.4108267699754806e-05, + "loss": 2.4186, + "step": 15510 + }, + { + "epoch": 1.2517956581389718, + "grad_norm": 0.6963610649108887, + "learning_rate": 2.409798842306511e-05, + "loss": 2.4209, + "step": 15511 + }, + { + "epoch": 1.2518763618755548, + "grad_norm": 0.7117185592651367, + "learning_rate": 2.4087711038051942e-05, + "loss": 2.4106, + "step": 15512 + }, + { + "epoch": 1.251957065612138, + "grad_norm": 0.6944519281387329, + "learning_rate": 2.407743554497146e-05, + "loss": 2.4493, + "step": 15513 + }, + { + "epoch": 1.2520377693487208, + "grad_norm": 0.689818263053894, + "learning_rate": 2.406716194407974e-05, + "loss": 2.4358, + "step": 15514 + }, + { + "epoch": 1.2521184730853039, + "grad_norm": 0.8132768273353577, + "learning_rate": 2.4056890235632846e-05, + "loss": 2.4574, + "step": 15515 + }, + { + "epoch": 1.252199176821887, + "grad_norm": 0.6855002045631409, + "learning_rate": 2.4046620419886777e-05, + "loss": 2.4118, + "step": 15516 + }, + { + "epoch": 1.2522798805584698, + "grad_norm": 0.6616373658180237, + "learning_rate": 2.4036352497097458e-05, + "loss": 2.4332, + "step": 15517 + }, + { + "epoch": 1.252360584295053, + "grad_norm": 0.6657225489616394, + "learning_rate": 2.4026086467520803e-05, + "loss": 2.3989, + "step": 15518 + }, + { + "epoch": 1.2524412880316358, + "grad_norm": 0.6796447038650513, + "learning_rate": 2.4015822331412664e-05, + "loss": 2.4269, + "step": 15519 + }, + { + "epoch": 1.2525219917682189, + "grad_norm": 0.7168079614639282, + "learning_rate": 2.400556008902889e-05, + "loss": 2.4263, + "step": 15520 + }, + { + "epoch": 1.2526026955048017, + "grad_norm": 0.6985058188438416, + "learning_rate": 2.3995299740625186e-05, + "loss": 2.437, + "step": 15521 + }, + { + "epoch": 1.2526833992413848, + "grad_norm": 0.7078086137771606, + "learning_rate": 2.3985041286457287e-05, + "loss": 2.3996, + "step": 15522 + }, + { + "epoch": 1.252764102977968, + "grad_norm": 0.6989054083824158, + "learning_rate": 2.3974784726780865e-05, + "loss": 2.4717, + "step": 15523 + }, + { + "epoch": 1.2528448067145508, + "grad_norm": 0.747606098651886, + "learning_rate": 2.396453006185153e-05, + "loss": 2.4228, + "step": 15524 + }, + { + "epoch": 1.2529255104511339, + "grad_norm": 0.7500887513160706, + "learning_rate": 2.3954277291924876e-05, + "loss": 2.4636, + "step": 15525 + }, + { + "epoch": 1.253006214187717, + "grad_norm": 0.7710712552070618, + "learning_rate": 2.3944026417256437e-05, + "loss": 2.4405, + "step": 15526 + }, + { + "epoch": 1.2530869179242998, + "grad_norm": 0.7278285622596741, + "learning_rate": 2.3933777438101657e-05, + "loss": 2.4279, + "step": 15527 + }, + { + "epoch": 1.253167621660883, + "grad_norm": 0.6979010701179504, + "learning_rate": 2.3923530354715973e-05, + "loss": 2.4272, + "step": 15528 + }, + { + "epoch": 1.253248325397466, + "grad_norm": 0.7330336570739746, + "learning_rate": 2.3913285167354804e-05, + "loss": 2.3861, + "step": 15529 + }, + { + "epoch": 1.2533290291340489, + "grad_norm": 0.675499677658081, + "learning_rate": 2.3903041876273436e-05, + "loss": 2.3987, + "step": 15530 + }, + { + "epoch": 1.253409732870632, + "grad_norm": 0.6854682564735413, + "learning_rate": 2.3892800481727186e-05, + "loss": 2.4085, + "step": 15531 + }, + { + "epoch": 1.253490436607215, + "grad_norm": 0.713810384273529, + "learning_rate": 2.388256098397129e-05, + "loss": 2.3897, + "step": 15532 + }, + { + "epoch": 1.253571140343798, + "grad_norm": 0.683214545249939, + "learning_rate": 2.3872323383260953e-05, + "loss": 2.4526, + "step": 15533 + }, + { + "epoch": 1.253651844080381, + "grad_norm": 0.6718357801437378, + "learning_rate": 2.3862087679851318e-05, + "loss": 2.4612, + "step": 15534 + }, + { + "epoch": 1.2537325478169639, + "grad_norm": 0.722283124923706, + "learning_rate": 2.3851853873997488e-05, + "loss": 2.4163, + "step": 15535 + }, + { + "epoch": 1.253813251553547, + "grad_norm": 0.689393162727356, + "learning_rate": 2.384162196595453e-05, + "loss": 2.3984, + "step": 15536 + }, + { + "epoch": 1.2538939552901298, + "grad_norm": 0.7146410346031189, + "learning_rate": 2.3831391955977412e-05, + "loss": 2.4442, + "step": 15537 + }, + { + "epoch": 1.253974659026713, + "grad_norm": 0.6651021838188171, + "learning_rate": 2.3821163844321104e-05, + "loss": 2.4064, + "step": 15538 + }, + { + "epoch": 1.254055362763296, + "grad_norm": 0.7088985443115234, + "learning_rate": 2.381093763124056e-05, + "loss": 2.4831, + "step": 15539 + }, + { + "epoch": 1.2541360664998789, + "grad_norm": 0.661375105381012, + "learning_rate": 2.3800713316990588e-05, + "loss": 2.3657, + "step": 15540 + }, + { + "epoch": 1.254216770236462, + "grad_norm": 0.6870979070663452, + "learning_rate": 2.3790490901826012e-05, + "loss": 2.4208, + "step": 15541 + }, + { + "epoch": 1.254297473973045, + "grad_norm": 0.6256219148635864, + "learning_rate": 2.3780270386001657e-05, + "loss": 2.4182, + "step": 15542 + }, + { + "epoch": 1.254378177709628, + "grad_norm": 0.7070638537406921, + "learning_rate": 2.377005176977215e-05, + "loss": 2.3758, + "step": 15543 + }, + { + "epoch": 1.254458881446211, + "grad_norm": 0.6571370363235474, + "learning_rate": 2.3759835053392242e-05, + "loss": 2.3927, + "step": 15544 + }, + { + "epoch": 1.254539585182794, + "grad_norm": 0.644263744354248, + "learning_rate": 2.3749620237116565e-05, + "loss": 2.3992, + "step": 15545 + }, + { + "epoch": 1.254620288919377, + "grad_norm": 0.7127394676208496, + "learning_rate": 2.3739407321199648e-05, + "loss": 2.3942, + "step": 15546 + }, + { + "epoch": 1.25470099265596, + "grad_norm": 0.7274866104125977, + "learning_rate": 2.372919630589605e-05, + "loss": 2.5232, + "step": 15547 + }, + { + "epoch": 1.2547816963925431, + "grad_norm": 0.690138041973114, + "learning_rate": 2.3718987191460274e-05, + "loss": 2.4371, + "step": 15548 + }, + { + "epoch": 1.254862400129126, + "grad_norm": 0.6990681886672974, + "learning_rate": 2.3708779978146724e-05, + "loss": 2.4568, + "step": 15549 + }, + { + "epoch": 1.254943103865709, + "grad_norm": 0.7430790662765503, + "learning_rate": 2.3698574666209793e-05, + "loss": 2.423, + "step": 15550 + }, + { + "epoch": 1.255023807602292, + "grad_norm": 0.6991416215896606, + "learning_rate": 2.3688371255903828e-05, + "loss": 2.4529, + "step": 15551 + }, + { + "epoch": 1.255104511338875, + "grad_norm": 0.6733322739601135, + "learning_rate": 2.367816974748317e-05, + "loss": 2.4531, + "step": 15552 + }, + { + "epoch": 1.2551852150754579, + "grad_norm": 0.7460463047027588, + "learning_rate": 2.3667970141202e-05, + "loss": 2.4267, + "step": 15553 + }, + { + "epoch": 1.255265918812041, + "grad_norm": 0.6784021854400635, + "learning_rate": 2.3657772437314517e-05, + "loss": 2.4996, + "step": 15554 + }, + { + "epoch": 1.255346622548624, + "grad_norm": 0.7499529719352722, + "learning_rate": 2.3647576636074975e-05, + "loss": 2.4749, + "step": 15555 + }, + { + "epoch": 1.255427326285207, + "grad_norm": 0.6698335409164429, + "learning_rate": 2.3637382737737368e-05, + "loss": 2.4499, + "step": 15556 + }, + { + "epoch": 1.25550803002179, + "grad_norm": 0.6644846200942993, + "learning_rate": 2.3627190742555806e-05, + "loss": 2.397, + "step": 15557 + }, + { + "epoch": 1.255588733758373, + "grad_norm": 0.7041488289833069, + "learning_rate": 2.3617000650784315e-05, + "loss": 2.4012, + "step": 15558 + }, + { + "epoch": 1.255669437494956, + "grad_norm": 0.72523033618927, + "learning_rate": 2.3606812462676798e-05, + "loss": 2.4151, + "step": 15559 + }, + { + "epoch": 1.255750141231539, + "grad_norm": 0.77669757604599, + "learning_rate": 2.3596626178487225e-05, + "loss": 2.4478, + "step": 15560 + }, + { + "epoch": 1.2558308449681221, + "grad_norm": 0.6919559836387634, + "learning_rate": 2.3586441798469462e-05, + "loss": 2.4548, + "step": 15561 + }, + { + "epoch": 1.255911548704705, + "grad_norm": 0.7613349556922913, + "learning_rate": 2.3576259322877292e-05, + "loss": 2.4475, + "step": 15562 + }, + { + "epoch": 1.255992252441288, + "grad_norm": 0.6738333106040955, + "learning_rate": 2.3566078751964515e-05, + "loss": 2.4242, + "step": 15563 + }, + { + "epoch": 1.256072956177871, + "grad_norm": 0.7242118716239929, + "learning_rate": 2.355590008598486e-05, + "loss": 2.4047, + "step": 15564 + }, + { + "epoch": 1.256153659914454, + "grad_norm": 0.7117685675621033, + "learning_rate": 2.354572332519199e-05, + "loss": 2.4473, + "step": 15565 + }, + { + "epoch": 1.256234363651037, + "grad_norm": 0.7466531991958618, + "learning_rate": 2.3535548469839564e-05, + "loss": 2.453, + "step": 15566 + }, + { + "epoch": 1.25631506738762, + "grad_norm": 0.6750668883323669, + "learning_rate": 2.3525375520181136e-05, + "loss": 2.4367, + "step": 15567 + }, + { + "epoch": 1.256395771124203, + "grad_norm": 0.7640851736068726, + "learning_rate": 2.35152044764703e-05, + "loss": 2.5014, + "step": 15568 + }, + { + "epoch": 1.256476474860786, + "grad_norm": 0.7198928594589233, + "learning_rate": 2.3505035338960456e-05, + "loss": 2.5138, + "step": 15569 + }, + { + "epoch": 1.256557178597369, + "grad_norm": 0.7079946398735046, + "learning_rate": 2.349486810790511e-05, + "loss": 2.4172, + "step": 15570 + }, + { + "epoch": 1.2566378823339521, + "grad_norm": 0.7477186918258667, + "learning_rate": 2.3484702783557655e-05, + "loss": 2.4224, + "step": 15571 + }, + { + "epoch": 1.256718586070535, + "grad_norm": 0.6875394582748413, + "learning_rate": 2.3474539366171388e-05, + "loss": 2.4621, + "step": 15572 + }, + { + "epoch": 1.256799289807118, + "grad_norm": 0.7164824604988098, + "learning_rate": 2.346437785599964e-05, + "loss": 2.4416, + "step": 15573 + }, + { + "epoch": 1.2568799935437012, + "grad_norm": 0.7031935453414917, + "learning_rate": 2.3454218253295668e-05, + "loss": 2.3943, + "step": 15574 + }, + { + "epoch": 1.256960697280284, + "grad_norm": 0.6739614009857178, + "learning_rate": 2.3444060558312665e-05, + "loss": 2.4114, + "step": 15575 + }, + { + "epoch": 1.2570414010168671, + "grad_norm": 0.6710866689682007, + "learning_rate": 2.3433904771303794e-05, + "loss": 2.4077, + "step": 15576 + }, + { + "epoch": 1.2571221047534502, + "grad_norm": 0.6589750051498413, + "learning_rate": 2.342375089252219e-05, + "loss": 2.3494, + "step": 15577 + }, + { + "epoch": 1.257202808490033, + "grad_norm": 0.7018333077430725, + "learning_rate": 2.3413598922220857e-05, + "loss": 2.459, + "step": 15578 + }, + { + "epoch": 1.2572835122266162, + "grad_norm": 0.7735301852226257, + "learning_rate": 2.3403448860652842e-05, + "loss": 2.4524, + "step": 15579 + }, + { + "epoch": 1.257364215963199, + "grad_norm": 0.7009726762771606, + "learning_rate": 2.339330070807113e-05, + "loss": 2.4244, + "step": 15580 + }, + { + "epoch": 1.2574449196997821, + "grad_norm": 0.671521008014679, + "learning_rate": 2.3383154464728595e-05, + "loss": 2.3808, + "step": 15581 + }, + { + "epoch": 1.257525623436365, + "grad_norm": 0.7736711502075195, + "learning_rate": 2.3373010130878126e-05, + "loss": 2.4936, + "step": 15582 + }, + { + "epoch": 1.257606327172948, + "grad_norm": 0.6987056136131287, + "learning_rate": 2.336286770677255e-05, + "loss": 2.4484, + "step": 15583 + }, + { + "epoch": 1.2576870309095312, + "grad_norm": 0.6337067484855652, + "learning_rate": 2.3352727192664635e-05, + "loss": 2.4196, + "step": 15584 + }, + { + "epoch": 1.257767734646114, + "grad_norm": 0.6832795143127441, + "learning_rate": 2.3342588588807123e-05, + "loss": 2.3681, + "step": 15585 + }, + { + "epoch": 1.257848438382697, + "grad_norm": 0.7208079695701599, + "learning_rate": 2.3332451895452688e-05, + "loss": 2.4436, + "step": 15586 + }, + { + "epoch": 1.2579291421192802, + "grad_norm": 0.6607621312141418, + "learning_rate": 2.3322317112853986e-05, + "loss": 2.4088, + "step": 15587 + }, + { + "epoch": 1.258009845855863, + "grad_norm": 0.7261247038841248, + "learning_rate": 2.331218424126356e-05, + "loss": 2.4389, + "step": 15588 + }, + { + "epoch": 1.2580905495924462, + "grad_norm": 0.6187729239463806, + "learning_rate": 2.3302053280933954e-05, + "loss": 2.3568, + "step": 15589 + }, + { + "epoch": 1.2581712533290292, + "grad_norm": 0.6196430921554565, + "learning_rate": 2.3291924232117713e-05, + "loss": 2.4285, + "step": 15590 + }, + { + "epoch": 1.258251957065612, + "grad_norm": 0.7271853685379028, + "learning_rate": 2.3281797095067193e-05, + "loss": 2.4058, + "step": 15591 + }, + { + "epoch": 1.2583326608021952, + "grad_norm": 0.7141130566596985, + "learning_rate": 2.327167187003484e-05, + "loss": 2.3971, + "step": 15592 + }, + { + "epoch": 1.2584133645387783, + "grad_norm": 0.680743932723999, + "learning_rate": 2.3261548557273027e-05, + "loss": 2.4387, + "step": 15593 + }, + { + "epoch": 1.2584940682753611, + "grad_norm": 0.718173086643219, + "learning_rate": 2.3251427157033955e-05, + "loss": 2.43, + "step": 15594 + }, + { + "epoch": 1.2585747720119442, + "grad_norm": 0.7600045800209045, + "learning_rate": 2.324130766956998e-05, + "loss": 2.4584, + "step": 15595 + }, + { + "epoch": 1.258655475748527, + "grad_norm": 0.7432500123977661, + "learning_rate": 2.3231190095133294e-05, + "loss": 2.4717, + "step": 15596 + }, + { + "epoch": 1.2587361794851102, + "grad_norm": 0.6603000164031982, + "learning_rate": 2.3221074433975988e-05, + "loss": 2.3952, + "step": 15597 + }, + { + "epoch": 1.258816883221693, + "grad_norm": 0.7020140290260315, + "learning_rate": 2.3210960686350213e-05, + "loss": 2.4064, + "step": 15598 + }, + { + "epoch": 1.2588975869582761, + "grad_norm": 0.7434887290000916, + "learning_rate": 2.320084885250804e-05, + "loss": 2.4708, + "step": 15599 + }, + { + "epoch": 1.2589782906948592, + "grad_norm": 0.6626797318458557, + "learning_rate": 2.3190738932701482e-05, + "loss": 2.4503, + "step": 15600 + }, + { + "epoch": 1.259058994431442, + "grad_norm": 0.7880598902702332, + "learning_rate": 2.3180630927182466e-05, + "loss": 2.384, + "step": 15601 + }, + { + "epoch": 1.2591396981680252, + "grad_norm": 0.7766147255897522, + "learning_rate": 2.3170524836202933e-05, + "loss": 2.4019, + "step": 15602 + }, + { + "epoch": 1.2592204019046083, + "grad_norm": 0.7817980051040649, + "learning_rate": 2.3160420660014792e-05, + "loss": 2.4729, + "step": 15603 + }, + { + "epoch": 1.2593011056411911, + "grad_norm": 0.6915614604949951, + "learning_rate": 2.3150318398869787e-05, + "loss": 2.4028, + "step": 15604 + }, + { + "epoch": 1.2593818093777742, + "grad_norm": 0.690882682800293, + "learning_rate": 2.3140218053019714e-05, + "loss": 2.4386, + "step": 15605 + }, + { + "epoch": 1.2594625131143573, + "grad_norm": 0.6670350432395935, + "learning_rate": 2.3130119622716382e-05, + "loss": 2.4224, + "step": 15606 + }, + { + "epoch": 1.2595432168509402, + "grad_norm": 0.6680006980895996, + "learning_rate": 2.3120023108211375e-05, + "loss": 2.3475, + "step": 15607 + }, + { + "epoch": 1.2596239205875233, + "grad_norm": 0.7003577947616577, + "learning_rate": 2.310992850975636e-05, + "loss": 2.4198, + "step": 15608 + }, + { + "epoch": 1.2597046243241061, + "grad_norm": 0.7444167733192444, + "learning_rate": 2.3099835827602944e-05, + "loss": 2.3756, + "step": 15609 + }, + { + "epoch": 1.2597853280606892, + "grad_norm": 0.6757989525794983, + "learning_rate": 2.3089745062002612e-05, + "loss": 2.3955, + "step": 15610 + }, + { + "epoch": 1.259866031797272, + "grad_norm": 0.6955820322036743, + "learning_rate": 2.3079656213206878e-05, + "loss": 2.4031, + "step": 15611 + }, + { + "epoch": 1.2599467355338552, + "grad_norm": 0.6646408438682556, + "learning_rate": 2.3069569281467184e-05, + "loss": 2.4246, + "step": 15612 + }, + { + "epoch": 1.2600274392704383, + "grad_norm": 0.6922882199287415, + "learning_rate": 2.3059484267034958e-05, + "loss": 2.4157, + "step": 15613 + }, + { + "epoch": 1.2601081430070211, + "grad_norm": 0.8092310428619385, + "learning_rate": 2.3049401170161468e-05, + "loss": 2.4137, + "step": 15614 + }, + { + "epoch": 1.2601888467436042, + "grad_norm": 0.7024559378623962, + "learning_rate": 2.3039319991098063e-05, + "loss": 2.4497, + "step": 15615 + }, + { + "epoch": 1.2602695504801873, + "grad_norm": 0.7096099853515625, + "learning_rate": 2.302924073009597e-05, + "loss": 2.4045, + "step": 15616 + }, + { + "epoch": 1.2603502542167702, + "grad_norm": 0.6777564287185669, + "learning_rate": 2.3019163387406406e-05, + "loss": 2.4607, + "step": 15617 + }, + { + "epoch": 1.2604309579533532, + "grad_norm": 0.7564159035682678, + "learning_rate": 2.300908796328052e-05, + "loss": 2.4985, + "step": 15618 + }, + { + "epoch": 1.2605116616899363, + "grad_norm": 0.7432986497879028, + "learning_rate": 2.2999014457969447e-05, + "loss": 2.4326, + "step": 15619 + }, + { + "epoch": 1.2605923654265192, + "grad_norm": 0.7178141474723816, + "learning_rate": 2.2988942871724182e-05, + "loss": 2.4118, + "step": 15620 + }, + { + "epoch": 1.2606730691631023, + "grad_norm": 0.7074497938156128, + "learning_rate": 2.2978873204795782e-05, + "loss": 2.4163, + "step": 15621 + }, + { + "epoch": 1.2607537728996854, + "grad_norm": 0.670200765132904, + "learning_rate": 2.2968805457435217e-05, + "loss": 2.4081, + "step": 15622 + }, + { + "epoch": 1.2608344766362682, + "grad_norm": 0.7258187532424927, + "learning_rate": 2.2958739629893355e-05, + "loss": 2.4889, + "step": 15623 + }, + { + "epoch": 1.2609151803728513, + "grad_norm": 0.6999781727790833, + "learning_rate": 2.2948675722421086e-05, + "loss": 2.3945, + "step": 15624 + }, + { + "epoch": 1.2609958841094342, + "grad_norm": 0.7030084133148193, + "learning_rate": 2.2938613735269243e-05, + "loss": 2.4509, + "step": 15625 + }, + { + "epoch": 1.2610765878460173, + "grad_norm": 0.6875420212745667, + "learning_rate": 2.292855366868858e-05, + "loss": 2.3658, + "step": 15626 + }, + { + "epoch": 1.2611572915826001, + "grad_norm": 0.7375235557556152, + "learning_rate": 2.2918495522929817e-05, + "loss": 2.4308, + "step": 15627 + }, + { + "epoch": 1.2612379953191832, + "grad_norm": 0.7021106481552124, + "learning_rate": 2.2908439298243644e-05, + "loss": 2.4046, + "step": 15628 + }, + { + "epoch": 1.2613186990557663, + "grad_norm": 0.76661616563797, + "learning_rate": 2.2898384994880716e-05, + "loss": 2.5156, + "step": 15629 + }, + { + "epoch": 1.2613994027923492, + "grad_norm": 0.6684869527816772, + "learning_rate": 2.2888332613091558e-05, + "loss": 2.4342, + "step": 15630 + }, + { + "epoch": 1.2614801065289323, + "grad_norm": 0.6878669261932373, + "learning_rate": 2.2878282153126706e-05, + "loss": 2.4544, + "step": 15631 + }, + { + "epoch": 1.2615608102655154, + "grad_norm": 0.6659132838249207, + "learning_rate": 2.2868233615236702e-05, + "loss": 2.4341, + "step": 15632 + }, + { + "epoch": 1.2616415140020982, + "grad_norm": 0.657474160194397, + "learning_rate": 2.2858186999671905e-05, + "loss": 2.3515, + "step": 15633 + }, + { + "epoch": 1.2617222177386813, + "grad_norm": 0.7245650291442871, + "learning_rate": 2.284814230668274e-05, + "loss": 2.3983, + "step": 15634 + }, + { + "epoch": 1.2618029214752644, + "grad_norm": 0.6400195360183716, + "learning_rate": 2.2838099536519554e-05, + "loss": 2.3535, + "step": 15635 + }, + { + "epoch": 1.2618836252118473, + "grad_norm": 0.6719450950622559, + "learning_rate": 2.282805868943262e-05, + "loss": 2.3906, + "step": 15636 + }, + { + "epoch": 1.2619643289484304, + "grad_norm": 0.682746946811676, + "learning_rate": 2.2818019765672207e-05, + "loss": 2.4045, + "step": 15637 + }, + { + "epoch": 1.2620450326850134, + "grad_norm": 0.6631760597229004, + "learning_rate": 2.2807982765488513e-05, + "loss": 2.4896, + "step": 15638 + }, + { + "epoch": 1.2621257364215963, + "grad_norm": 0.782202422618866, + "learning_rate": 2.279794768913164e-05, + "loss": 2.4628, + "step": 15639 + }, + { + "epoch": 1.2622064401581794, + "grad_norm": 0.7579823732376099, + "learning_rate": 2.278791453685173e-05, + "loss": 2.4635, + "step": 15640 + }, + { + "epoch": 1.2622871438947623, + "grad_norm": 0.665096640586853, + "learning_rate": 2.277788330889884e-05, + "loss": 2.4899, + "step": 15641 + }, + { + "epoch": 1.2623678476313454, + "grad_norm": 0.7635685205459595, + "learning_rate": 2.2767854005522936e-05, + "loss": 2.4146, + "step": 15642 + }, + { + "epoch": 1.2624485513679282, + "grad_norm": 0.7579118609428406, + "learning_rate": 2.2757826626974e-05, + "loss": 2.3692, + "step": 15643 + }, + { + "epoch": 1.2625292551045113, + "grad_norm": 0.6772074699401855, + "learning_rate": 2.2747801173501938e-05, + "loss": 2.3954, + "step": 15644 + }, + { + "epoch": 1.2626099588410944, + "grad_norm": 0.7028382420539856, + "learning_rate": 2.2737777645356606e-05, + "loss": 2.4799, + "step": 15645 + }, + { + "epoch": 1.2626906625776773, + "grad_norm": 0.7152617573738098, + "learning_rate": 2.2727756042787818e-05, + "loss": 2.4095, + "step": 15646 + }, + { + "epoch": 1.2627713663142603, + "grad_norm": 0.7286608219146729, + "learning_rate": 2.271773636604535e-05, + "loss": 2.4496, + "step": 15647 + }, + { + "epoch": 1.2628520700508434, + "grad_norm": 0.7006896734237671, + "learning_rate": 2.2707718615378935e-05, + "loss": 2.4128, + "step": 15648 + }, + { + "epoch": 1.2629327737874263, + "grad_norm": 0.6856697797775269, + "learning_rate": 2.2697702791038177e-05, + "loss": 2.4169, + "step": 15649 + }, + { + "epoch": 1.2630134775240094, + "grad_norm": 0.7582918405532837, + "learning_rate": 2.268768889327275e-05, + "loss": 2.4007, + "step": 15650 + }, + { + "epoch": 1.2630941812605925, + "grad_norm": 0.664633572101593, + "learning_rate": 2.2677676922332237e-05, + "loss": 2.3876, + "step": 15651 + }, + { + "epoch": 1.2631748849971753, + "grad_norm": 0.7283070087432861, + "learning_rate": 2.266766687846611e-05, + "loss": 2.4175, + "step": 15652 + }, + { + "epoch": 1.2632555887337584, + "grad_norm": 0.7309537529945374, + "learning_rate": 2.2657658761923863e-05, + "loss": 2.3998, + "step": 15653 + }, + { + "epoch": 1.2633362924703415, + "grad_norm": 0.6386510133743286, + "learning_rate": 2.2647652572954968e-05, + "loss": 2.3723, + "step": 15654 + }, + { + "epoch": 1.2634169962069244, + "grad_norm": 0.6805689930915833, + "learning_rate": 2.263764831180876e-05, + "loss": 2.3989, + "step": 15655 + }, + { + "epoch": 1.2634976999435072, + "grad_norm": 0.7147208452224731, + "learning_rate": 2.2627645978734536e-05, + "loss": 2.4748, + "step": 15656 + }, + { + "epoch": 1.2635784036800903, + "grad_norm": 0.6835155487060547, + "learning_rate": 2.2617645573981683e-05, + "loss": 2.4266, + "step": 15657 + }, + { + "epoch": 1.2636591074166734, + "grad_norm": 0.7631552219390869, + "learning_rate": 2.2607647097799368e-05, + "loss": 2.4152, + "step": 15658 + }, + { + "epoch": 1.2637398111532563, + "grad_norm": 0.6793624758720398, + "learning_rate": 2.2597650550436777e-05, + "loss": 2.3491, + "step": 15659 + }, + { + "epoch": 1.2638205148898394, + "grad_norm": 0.6465637683868408, + "learning_rate": 2.2587655932143083e-05, + "loss": 2.3774, + "step": 15660 + }, + { + "epoch": 1.2639012186264225, + "grad_norm": 0.6920284628868103, + "learning_rate": 2.2577663243167368e-05, + "loss": 2.4321, + "step": 15661 + }, + { + "epoch": 1.2639819223630053, + "grad_norm": 0.6922522783279419, + "learning_rate": 2.256767248375866e-05, + "loss": 2.4242, + "step": 15662 + }, + { + "epoch": 1.2640626260995884, + "grad_norm": 0.6811214089393616, + "learning_rate": 2.255768365416595e-05, + "loss": 2.4101, + "step": 15663 + }, + { + "epoch": 1.2641433298361715, + "grad_norm": 0.6704947352409363, + "learning_rate": 2.2547696754638238e-05, + "loss": 2.4792, + "step": 15664 + }, + { + "epoch": 1.2642240335727544, + "grad_norm": 0.6814701557159424, + "learning_rate": 2.2537711785424354e-05, + "loss": 2.4429, + "step": 15665 + }, + { + "epoch": 1.2643047373093375, + "grad_norm": 0.6778244972229004, + "learning_rate": 2.252772874677318e-05, + "loss": 2.3882, + "step": 15666 + }, + { + "epoch": 1.2643854410459205, + "grad_norm": 0.6570093035697937, + "learning_rate": 2.2517747638933518e-05, + "loss": 2.4162, + "step": 15667 + }, + { + "epoch": 1.2644661447825034, + "grad_norm": 0.6973466873168945, + "learning_rate": 2.2507768462154133e-05, + "loss": 2.3646, + "step": 15668 + }, + { + "epoch": 1.2645468485190865, + "grad_norm": 0.7258623242378235, + "learning_rate": 2.2497791216683715e-05, + "loss": 2.404, + "step": 15669 + }, + { + "epoch": 1.2646275522556694, + "grad_norm": 0.7462170124053955, + "learning_rate": 2.248781590277097e-05, + "loss": 2.5076, + "step": 15670 + }, + { + "epoch": 1.2647082559922525, + "grad_norm": 0.7070441246032715, + "learning_rate": 2.247784252066444e-05, + "loss": 2.3817, + "step": 15671 + }, + { + "epoch": 1.2647889597288353, + "grad_norm": 0.7150183916091919, + "learning_rate": 2.246787107061272e-05, + "loss": 2.461, + "step": 15672 + }, + { + "epoch": 1.2648696634654184, + "grad_norm": 0.668436586856842, + "learning_rate": 2.2457901552864347e-05, + "loss": 2.466, + "step": 15673 + }, + { + "epoch": 1.2649503672020015, + "grad_norm": 0.7011097073554993, + "learning_rate": 2.2447933967667745e-05, + "loss": 2.4582, + "step": 15674 + }, + { + "epoch": 1.2650310709385844, + "grad_norm": 0.7149096727371216, + "learning_rate": 2.243796831527134e-05, + "loss": 2.4461, + "step": 15675 + }, + { + "epoch": 1.2651117746751674, + "grad_norm": 0.6810914278030396, + "learning_rate": 2.2428004595923525e-05, + "loss": 2.4043, + "step": 15676 + }, + { + "epoch": 1.2651924784117505, + "grad_norm": 0.7700765132904053, + "learning_rate": 2.241804280987261e-05, + "loss": 2.4197, + "step": 15677 + }, + { + "epoch": 1.2652731821483334, + "grad_norm": 0.6897448897361755, + "learning_rate": 2.240808295736686e-05, + "loss": 2.4052, + "step": 15678 + }, + { + "epoch": 1.2653538858849165, + "grad_norm": 0.7092932462692261, + "learning_rate": 2.2398125038654515e-05, + "loss": 2.4088, + "step": 15679 + }, + { + "epoch": 1.2654345896214996, + "grad_norm": 0.6930294632911682, + "learning_rate": 2.2388169053983777e-05, + "loss": 2.4504, + "step": 15680 + }, + { + "epoch": 1.2655152933580824, + "grad_norm": 0.7056782245635986, + "learning_rate": 2.237821500360271e-05, + "loss": 2.3975, + "step": 15681 + }, + { + "epoch": 1.2655959970946655, + "grad_norm": 0.651772141456604, + "learning_rate": 2.236826288775944e-05, + "loss": 2.3941, + "step": 15682 + }, + { + "epoch": 1.2656767008312486, + "grad_norm": 0.7254980206489563, + "learning_rate": 2.2358312706702012e-05, + "loss": 2.4149, + "step": 15683 + }, + { + "epoch": 1.2657574045678315, + "grad_norm": 0.6553635597229004, + "learning_rate": 2.2348364460678373e-05, + "loss": 2.4099, + "step": 15684 + }, + { + "epoch": 1.2658381083044146, + "grad_norm": 0.6952616572380066, + "learning_rate": 2.233841814993646e-05, + "loss": 2.384, + "step": 15685 + }, + { + "epoch": 1.2659188120409974, + "grad_norm": 0.72947096824646, + "learning_rate": 2.2328473774724178e-05, + "loss": 2.5033, + "step": 15686 + }, + { + "epoch": 1.2659995157775805, + "grad_norm": 0.7419683933258057, + "learning_rate": 2.231853133528937e-05, + "loss": 2.4881, + "step": 15687 + }, + { + "epoch": 1.2660802195141634, + "grad_norm": 0.7125211358070374, + "learning_rate": 2.2308590831879827e-05, + "loss": 2.4334, + "step": 15688 + }, + { + "epoch": 1.2661609232507465, + "grad_norm": 0.6668617129325867, + "learning_rate": 2.2298652264743315e-05, + "loss": 2.4144, + "step": 15689 + }, + { + "epoch": 1.2662416269873296, + "grad_norm": 0.8075512051582336, + "learning_rate": 2.2288715634127465e-05, + "loss": 2.421, + "step": 15690 + }, + { + "epoch": 1.2663223307239124, + "grad_norm": 0.6894629001617432, + "learning_rate": 2.2278780940279965e-05, + "loss": 2.4142, + "step": 15691 + }, + { + "epoch": 1.2664030344604955, + "grad_norm": 0.7418074011802673, + "learning_rate": 2.226884818344841e-05, + "loss": 2.4214, + "step": 15692 + }, + { + "epoch": 1.2664837381970786, + "grad_norm": 0.6724219918251038, + "learning_rate": 2.225891736388037e-05, + "loss": 2.4455, + "step": 15693 + }, + { + "epoch": 1.2665644419336615, + "grad_norm": 0.7202882766723633, + "learning_rate": 2.224898848182331e-05, + "loss": 2.4017, + "step": 15694 + }, + { + "epoch": 1.2666451456702446, + "grad_norm": 0.7671259641647339, + "learning_rate": 2.2239061537524698e-05, + "loss": 2.4386, + "step": 15695 + }, + { + "epoch": 1.2667258494068276, + "grad_norm": 0.7154317498207092, + "learning_rate": 2.222913653123194e-05, + "loss": 2.3754, + "step": 15696 + }, + { + "epoch": 1.2668065531434105, + "grad_norm": 0.7203264236450195, + "learning_rate": 2.221921346319239e-05, + "loss": 2.3926, + "step": 15697 + }, + { + "epoch": 1.2668872568799936, + "grad_norm": 0.7104187607765198, + "learning_rate": 2.2209292333653365e-05, + "loss": 2.4528, + "step": 15698 + }, + { + "epoch": 1.2669679606165767, + "grad_norm": 0.7650138139724731, + "learning_rate": 2.2199373142862158e-05, + "loss": 2.4372, + "step": 15699 + }, + { + "epoch": 1.2670486643531595, + "grad_norm": 0.6796044111251831, + "learning_rate": 2.2189455891065903e-05, + "loss": 2.415, + "step": 15700 + }, + { + "epoch": 1.2671293680897426, + "grad_norm": 0.6749297380447388, + "learning_rate": 2.2179540578511813e-05, + "loss": 2.4337, + "step": 15701 + }, + { + "epoch": 1.2672100718263255, + "grad_norm": 0.7330272793769836, + "learning_rate": 2.216962720544703e-05, + "loss": 2.4322, + "step": 15702 + }, + { + "epoch": 1.2672907755629086, + "grad_norm": 0.6793510913848877, + "learning_rate": 2.215971577211855e-05, + "loss": 2.4473, + "step": 15703 + }, + { + "epoch": 1.2673714792994915, + "grad_norm": 0.7477267384529114, + "learning_rate": 2.2149806278773433e-05, + "loss": 2.4699, + "step": 15704 + }, + { + "epoch": 1.2674521830360745, + "grad_norm": 0.7048643827438354, + "learning_rate": 2.213989872565867e-05, + "loss": 2.4341, + "step": 15705 + }, + { + "epoch": 1.2675328867726576, + "grad_norm": 0.647433340549469, + "learning_rate": 2.2129993113021108e-05, + "loss": 2.423, + "step": 15706 + }, + { + "epoch": 1.2676135905092405, + "grad_norm": 0.6886507272720337, + "learning_rate": 2.2120089441107706e-05, + "loss": 2.4185, + "step": 15707 + }, + { + "epoch": 1.2676942942458236, + "grad_norm": 0.6720516085624695, + "learning_rate": 2.2110187710165242e-05, + "loss": 2.4587, + "step": 15708 + }, + { + "epoch": 1.2677749979824067, + "grad_norm": 0.676665723323822, + "learning_rate": 2.2100287920440543e-05, + "loss": 2.4241, + "step": 15709 + }, + { + "epoch": 1.2678557017189895, + "grad_norm": 0.6939559578895569, + "learning_rate": 2.209039007218028e-05, + "loss": 2.3974, + "step": 15710 + }, + { + "epoch": 1.2679364054555726, + "grad_norm": 0.6485786437988281, + "learning_rate": 2.2080494165631137e-05, + "loss": 2.4041, + "step": 15711 + }, + { + "epoch": 1.2680171091921557, + "grad_norm": 0.668319582939148, + "learning_rate": 2.2070600201039802e-05, + "loss": 2.4705, + "step": 15712 + }, + { + "epoch": 1.2680978129287386, + "grad_norm": 0.6837478280067444, + "learning_rate": 2.206070817865279e-05, + "loss": 2.4474, + "step": 15713 + }, + { + "epoch": 1.2681785166653217, + "grad_norm": 0.7000131011009216, + "learning_rate": 2.2050818098716664e-05, + "loss": 2.4463, + "step": 15714 + }, + { + "epoch": 1.2682592204019045, + "grad_norm": 0.7063068151473999, + "learning_rate": 2.204092996147794e-05, + "loss": 2.4226, + "step": 15715 + }, + { + "epoch": 1.2683399241384876, + "grad_norm": 0.6497172117233276, + "learning_rate": 2.2031043767183003e-05, + "loss": 2.3678, + "step": 15716 + }, + { + "epoch": 1.2684206278750705, + "grad_norm": 0.6558645963668823, + "learning_rate": 2.2021159516078262e-05, + "loss": 2.4021, + "step": 15717 + }, + { + "epoch": 1.2685013316116536, + "grad_norm": 0.7411713600158691, + "learning_rate": 2.2011277208410062e-05, + "loss": 2.4346, + "step": 15718 + }, + { + "epoch": 1.2685820353482367, + "grad_norm": 0.7275578379631042, + "learning_rate": 2.2001396844424714e-05, + "loss": 2.4262, + "step": 15719 + }, + { + "epoch": 1.2686627390848195, + "grad_norm": 0.7010936141014099, + "learning_rate": 2.199151842436844e-05, + "loss": 2.4774, + "step": 15720 + }, + { + "epoch": 1.2687434428214026, + "grad_norm": 0.7551137208938599, + "learning_rate": 2.1981641948487462e-05, + "loss": 2.5286, + "step": 15721 + }, + { + "epoch": 1.2688241465579857, + "grad_norm": 0.6510799527168274, + "learning_rate": 2.1971767417027888e-05, + "loss": 2.3813, + "step": 15722 + }, + { + "epoch": 1.2689048502945686, + "grad_norm": 0.636050283908844, + "learning_rate": 2.196189483023584e-05, + "loss": 2.4226, + "step": 15723 + }, + { + "epoch": 1.2689855540311517, + "grad_norm": 0.6939265131950378, + "learning_rate": 2.1952024188357368e-05, + "loss": 2.4516, + "step": 15724 + }, + { + "epoch": 1.2690662577677347, + "grad_norm": 0.6715239882469177, + "learning_rate": 2.1942155491638494e-05, + "loss": 2.4358, + "step": 15725 + }, + { + "epoch": 1.2691469615043176, + "grad_norm": 0.740680456161499, + "learning_rate": 2.1932288740325123e-05, + "loss": 2.4135, + "step": 15726 + }, + { + "epoch": 1.2692276652409007, + "grad_norm": 0.6969335079193115, + "learning_rate": 2.1922423934663193e-05, + "loss": 2.43, + "step": 15727 + }, + { + "epoch": 1.2693083689774838, + "grad_norm": 0.6390758156776428, + "learning_rate": 2.1912561074898554e-05, + "loss": 2.4492, + "step": 15728 + }, + { + "epoch": 1.2693890727140666, + "grad_norm": 0.7129701375961304, + "learning_rate": 2.190270016127701e-05, + "loss": 2.3799, + "step": 15729 + }, + { + "epoch": 1.2694697764506497, + "grad_norm": 0.7309553027153015, + "learning_rate": 2.1892841194044332e-05, + "loss": 2.4955, + "step": 15730 + }, + { + "epoch": 1.2695504801872326, + "grad_norm": 0.7257225513458252, + "learning_rate": 2.1882984173446252e-05, + "loss": 2.4184, + "step": 15731 + }, + { + "epoch": 1.2696311839238157, + "grad_norm": 0.7434510588645935, + "learning_rate": 2.1873129099728384e-05, + "loss": 2.453, + "step": 15732 + }, + { + "epoch": 1.2697118876603986, + "grad_norm": 0.6643160581588745, + "learning_rate": 2.1863275973136356e-05, + "loss": 2.3619, + "step": 15733 + }, + { + "epoch": 1.2697925913969816, + "grad_norm": 0.6677344441413879, + "learning_rate": 2.1853424793915778e-05, + "loss": 2.406, + "step": 15734 + }, + { + "epoch": 1.2698732951335647, + "grad_norm": 0.760028064250946, + "learning_rate": 2.1843575562312092e-05, + "loss": 2.5479, + "step": 15735 + }, + { + "epoch": 1.2699539988701476, + "grad_norm": 0.6668389439582825, + "learning_rate": 2.183372827857082e-05, + "loss": 2.4104, + "step": 15736 + }, + { + "epoch": 1.2700347026067307, + "grad_norm": 0.651155412197113, + "learning_rate": 2.182388294293736e-05, + "loss": 2.3738, + "step": 15737 + }, + { + "epoch": 1.2701154063433138, + "grad_norm": 0.736907958984375, + "learning_rate": 2.1814039555657084e-05, + "loss": 2.4179, + "step": 15738 + }, + { + "epoch": 1.2701961100798966, + "grad_norm": 0.7068225741386414, + "learning_rate": 2.180419811697534e-05, + "loss": 2.3911, + "step": 15739 + }, + { + "epoch": 1.2702768138164797, + "grad_norm": 0.6959261894226074, + "learning_rate": 2.1794358627137368e-05, + "loss": 2.452, + "step": 15740 + }, + { + "epoch": 1.2703575175530628, + "grad_norm": 0.6886181235313416, + "learning_rate": 2.1784521086388442e-05, + "loss": 2.4166, + "step": 15741 + }, + { + "epoch": 1.2704382212896457, + "grad_norm": 0.6494541168212891, + "learning_rate": 2.177468549497369e-05, + "loss": 2.3589, + "step": 15742 + }, + { + "epoch": 1.2705189250262288, + "grad_norm": 0.7008326649665833, + "learning_rate": 2.1764851853138247e-05, + "loss": 2.3697, + "step": 15743 + }, + { + "epoch": 1.2705996287628119, + "grad_norm": 0.6800456643104553, + "learning_rate": 2.1755020161127238e-05, + "loss": 2.4162, + "step": 15744 + }, + { + "epoch": 1.2706803324993947, + "grad_norm": 0.6836018562316895, + "learning_rate": 2.1745190419185634e-05, + "loss": 2.3977, + "step": 15745 + }, + { + "epoch": 1.2707610362359778, + "grad_norm": 0.6489691138267517, + "learning_rate": 2.173536262755844e-05, + "loss": 2.464, + "step": 15746 + }, + { + "epoch": 1.2708417399725607, + "grad_norm": 0.7309786677360535, + "learning_rate": 2.172553678649061e-05, + "loss": 2.4065, + "step": 15747 + }, + { + "epoch": 1.2709224437091438, + "grad_norm": 0.6752686500549316, + "learning_rate": 2.1715712896227004e-05, + "loss": 2.3935, + "step": 15748 + }, + { + "epoch": 1.2710031474457266, + "grad_norm": 0.7039850354194641, + "learning_rate": 2.1705890957012465e-05, + "loss": 2.4605, + "step": 15749 + }, + { + "epoch": 1.2710838511823097, + "grad_norm": 0.6904652714729309, + "learning_rate": 2.169607096909182e-05, + "loss": 2.4264, + "step": 15750 + }, + { + "epoch": 1.2711645549188928, + "grad_norm": 0.7104331254959106, + "learning_rate": 2.168625293270974e-05, + "loss": 2.378, + "step": 15751 + }, + { + "epoch": 1.2712452586554757, + "grad_norm": 0.6732800602912903, + "learning_rate": 2.167643684811096e-05, + "loss": 2.4216, + "step": 15752 + }, + { + "epoch": 1.2713259623920588, + "grad_norm": 0.7207335829734802, + "learning_rate": 2.166662271554011e-05, + "loss": 2.3861, + "step": 15753 + }, + { + "epoch": 1.2714066661286418, + "grad_norm": 0.7561055421829224, + "learning_rate": 2.1656810535241813e-05, + "loss": 2.4753, + "step": 15754 + }, + { + "epoch": 1.2714873698652247, + "grad_norm": 0.7018210887908936, + "learning_rate": 2.1647000307460564e-05, + "loss": 2.401, + "step": 15755 + }, + { + "epoch": 1.2715680736018078, + "grad_norm": 0.6908013224601746, + "learning_rate": 2.163719203244089e-05, + "loss": 2.4451, + "step": 15756 + }, + { + "epoch": 1.2716487773383909, + "grad_norm": 0.734909176826477, + "learning_rate": 2.162738571042723e-05, + "loss": 2.4221, + "step": 15757 + }, + { + "epoch": 1.2717294810749737, + "grad_norm": 0.7047279477119446, + "learning_rate": 2.1617581341663973e-05, + "loss": 2.4149, + "step": 15758 + }, + { + "epoch": 1.2718101848115568, + "grad_norm": 0.6875640749931335, + "learning_rate": 2.1607778926395496e-05, + "loss": 2.3874, + "step": 15759 + }, + { + "epoch": 1.2718908885481397, + "grad_norm": 0.7300851345062256, + "learning_rate": 2.159797846486611e-05, + "loss": 2.4706, + "step": 15760 + }, + { + "epoch": 1.2719715922847228, + "grad_norm": 0.733775794506073, + "learning_rate": 2.1588179957320022e-05, + "loss": 2.4208, + "step": 15761 + }, + { + "epoch": 1.2720522960213057, + "grad_norm": 0.8375213742256165, + "learning_rate": 2.1578383404001458e-05, + "loss": 2.4672, + "step": 15762 + }, + { + "epoch": 1.2721329997578887, + "grad_norm": 0.7276780009269714, + "learning_rate": 2.15685888051546e-05, + "loss": 2.4536, + "step": 15763 + }, + { + "epoch": 1.2722137034944718, + "grad_norm": 0.7765224575996399, + "learning_rate": 2.1558796161023508e-05, + "loss": 2.3671, + "step": 15764 + }, + { + "epoch": 1.2722944072310547, + "grad_norm": 0.7225642204284668, + "learning_rate": 2.1549005471852256e-05, + "loss": 2.4316, + "step": 15765 + }, + { + "epoch": 1.2723751109676378, + "grad_norm": 0.6959484219551086, + "learning_rate": 2.1539216737884904e-05, + "loss": 2.4581, + "step": 15766 + }, + { + "epoch": 1.2724558147042209, + "grad_norm": 0.6943621039390564, + "learning_rate": 2.1529429959365332e-05, + "loss": 2.4372, + "step": 15767 + }, + { + "epoch": 1.2725365184408037, + "grad_norm": 0.7067148089408875, + "learning_rate": 2.151964513653746e-05, + "loss": 2.431, + "step": 15768 + }, + { + "epoch": 1.2726172221773868, + "grad_norm": 0.8317076563835144, + "learning_rate": 2.150986226964521e-05, + "loss": 2.4177, + "step": 15769 + }, + { + "epoch": 1.27269792591397, + "grad_norm": 0.7390087246894836, + "learning_rate": 2.150008135893239e-05, + "loss": 2.4711, + "step": 15770 + }, + { + "epoch": 1.2727786296505528, + "grad_norm": 0.6829150915145874, + "learning_rate": 2.1490302404642725e-05, + "loss": 2.4477, + "step": 15771 + }, + { + "epoch": 1.2728593333871359, + "grad_norm": 0.7355613708496094, + "learning_rate": 2.148052540701995e-05, + "loss": 2.493, + "step": 15772 + }, + { + "epoch": 1.272940037123719, + "grad_norm": 0.6872289776802063, + "learning_rate": 2.1470750366307747e-05, + "loss": 2.4363, + "step": 15773 + }, + { + "epoch": 1.2730207408603018, + "grad_norm": 0.7753220796585083, + "learning_rate": 2.1460977282749705e-05, + "loss": 2.4376, + "step": 15774 + }, + { + "epoch": 1.273101444596885, + "grad_norm": 0.6717056632041931, + "learning_rate": 2.145120615658942e-05, + "loss": 2.4383, + "step": 15775 + }, + { + "epoch": 1.2731821483334678, + "grad_norm": 0.7441569566726685, + "learning_rate": 2.1441436988070428e-05, + "loss": 2.462, + "step": 15776 + }, + { + "epoch": 1.2732628520700509, + "grad_norm": 0.6824371814727783, + "learning_rate": 2.143166977743615e-05, + "loss": 2.4173, + "step": 15777 + }, + { + "epoch": 1.2733435558066337, + "grad_norm": 0.7310225963592529, + "learning_rate": 2.1421904524930038e-05, + "loss": 2.4222, + "step": 15778 + }, + { + "epoch": 1.2734242595432168, + "grad_norm": 0.7198066115379333, + "learning_rate": 2.141214123079548e-05, + "loss": 2.4262, + "step": 15779 + }, + { + "epoch": 1.2735049632798, + "grad_norm": 0.7081776857376099, + "learning_rate": 2.1402379895275783e-05, + "loss": 2.4473, + "step": 15780 + }, + { + "epoch": 1.2735856670163828, + "grad_norm": 0.6909368634223938, + "learning_rate": 2.1392620518614235e-05, + "loss": 2.4528, + "step": 15781 + }, + { + "epoch": 1.2736663707529658, + "grad_norm": 0.7170675992965698, + "learning_rate": 2.1382863101054107e-05, + "loss": 2.4214, + "step": 15782 + }, + { + "epoch": 1.273747074489549, + "grad_norm": 0.6992846727371216, + "learning_rate": 2.1373107642838497e-05, + "loss": 2.4397, + "step": 15783 + }, + { + "epoch": 1.2738277782261318, + "grad_norm": 0.7245237231254578, + "learning_rate": 2.1363354144210578e-05, + "loss": 2.373, + "step": 15784 + }, + { + "epoch": 1.273908481962715, + "grad_norm": 0.6929232478141785, + "learning_rate": 2.1353602605413435e-05, + "loss": 2.4297, + "step": 15785 + }, + { + "epoch": 1.273989185699298, + "grad_norm": 0.7243950366973877, + "learning_rate": 2.134385302669013e-05, + "loss": 2.3856, + "step": 15786 + }, + { + "epoch": 1.2740698894358808, + "grad_norm": 0.6712679266929626, + "learning_rate": 2.133410540828359e-05, + "loss": 2.3818, + "step": 15787 + }, + { + "epoch": 1.274150593172464, + "grad_norm": 0.7433474063873291, + "learning_rate": 2.1324359750436774e-05, + "loss": 2.4148, + "step": 15788 + }, + { + "epoch": 1.274231296909047, + "grad_norm": 0.7225894927978516, + "learning_rate": 2.1314616053392577e-05, + "loss": 2.395, + "step": 15789 + }, + { + "epoch": 1.2743120006456299, + "grad_norm": 0.7026889324188232, + "learning_rate": 2.130487431739383e-05, + "loss": 2.4693, + "step": 15790 + }, + { + "epoch": 1.274392704382213, + "grad_norm": 0.6898565292358398, + "learning_rate": 2.1295134542683325e-05, + "loss": 2.3643, + "step": 15791 + }, + { + "epoch": 1.2744734081187958, + "grad_norm": 0.7212820649147034, + "learning_rate": 2.1285396729503826e-05, + "loss": 2.4178, + "step": 15792 + }, + { + "epoch": 1.274554111855379, + "grad_norm": 0.7149149179458618, + "learning_rate": 2.127566087809798e-05, + "loss": 2.4023, + "step": 15793 + }, + { + "epoch": 1.2746348155919618, + "grad_norm": 0.7039671540260315, + "learning_rate": 2.126592698870846e-05, + "loss": 2.4667, + "step": 15794 + }, + { + "epoch": 1.2747155193285449, + "grad_norm": 0.806849479675293, + "learning_rate": 2.1256195061577877e-05, + "loss": 2.4741, + "step": 15795 + }, + { + "epoch": 1.274796223065128, + "grad_norm": 0.7544776797294617, + "learning_rate": 2.124646509694872e-05, + "loss": 2.4258, + "step": 15796 + }, + { + "epoch": 1.2748769268017108, + "grad_norm": 0.6946810483932495, + "learning_rate": 2.1236737095063518e-05, + "loss": 2.4088, + "step": 15797 + }, + { + "epoch": 1.274957630538294, + "grad_norm": 0.7714219093322754, + "learning_rate": 2.1227011056164714e-05, + "loss": 2.4705, + "step": 15798 + }, + { + "epoch": 1.275038334274877, + "grad_norm": 0.6789658665657043, + "learning_rate": 2.121728698049471e-05, + "loss": 2.4692, + "step": 15799 + }, + { + "epoch": 1.2751190380114599, + "grad_norm": 0.7003477215766907, + "learning_rate": 2.120756486829586e-05, + "loss": 2.4437, + "step": 15800 + }, + { + "epoch": 1.275199741748043, + "grad_norm": 0.6802948117256165, + "learning_rate": 2.1197844719810455e-05, + "loss": 2.4002, + "step": 15801 + }, + { + "epoch": 1.275280445484626, + "grad_norm": 0.67823326587677, + "learning_rate": 2.1188126535280773e-05, + "loss": 2.5119, + "step": 15802 + }, + { + "epoch": 1.275361149221209, + "grad_norm": 0.6580843925476074, + "learning_rate": 2.1178410314948972e-05, + "loss": 2.3814, + "step": 15803 + }, + { + "epoch": 1.275441852957792, + "grad_norm": 0.681642472743988, + "learning_rate": 2.1168696059057226e-05, + "loss": 2.4206, + "step": 15804 + }, + { + "epoch": 1.275522556694375, + "grad_norm": 0.7483543753623962, + "learning_rate": 2.1158983767847674e-05, + "loss": 2.4633, + "step": 15805 + }, + { + "epoch": 1.275603260430958, + "grad_norm": 0.6565235257148743, + "learning_rate": 2.11492734415623e-05, + "loss": 2.4145, + "step": 15806 + }, + { + "epoch": 1.275683964167541, + "grad_norm": 0.6606764793395996, + "learning_rate": 2.1139565080443157e-05, + "loss": 2.3935, + "step": 15807 + }, + { + "epoch": 1.275764667904124, + "grad_norm": 0.7915800213813782, + "learning_rate": 2.1129858684732206e-05, + "loss": 2.4288, + "step": 15808 + }, + { + "epoch": 1.275845371640707, + "grad_norm": 0.6763594746589661, + "learning_rate": 2.112015425467133e-05, + "loss": 2.4147, + "step": 15809 + }, + { + "epoch": 1.2759260753772899, + "grad_norm": 0.6886053085327148, + "learning_rate": 2.1110451790502405e-05, + "loss": 2.3798, + "step": 15810 + }, + { + "epoch": 1.276006779113873, + "grad_norm": 0.686122715473175, + "learning_rate": 2.110075129246728e-05, + "loss": 2.3896, + "step": 15811 + }, + { + "epoch": 1.276087482850456, + "grad_norm": 0.6989614367485046, + "learning_rate": 2.109105276080764e-05, + "loss": 2.4533, + "step": 15812 + }, + { + "epoch": 1.276168186587039, + "grad_norm": 0.6818450689315796, + "learning_rate": 2.1081356195765232e-05, + "loss": 2.4012, + "step": 15813 + }, + { + "epoch": 1.276248890323622, + "grad_norm": 0.7492663860321045, + "learning_rate": 2.107166159758176e-05, + "loss": 2.4269, + "step": 15814 + }, + { + "epoch": 1.276329594060205, + "grad_norm": 0.6752359867095947, + "learning_rate": 2.1061968966498767e-05, + "loss": 2.4478, + "step": 15815 + }, + { + "epoch": 1.276410297796788, + "grad_norm": 0.6784162521362305, + "learning_rate": 2.1052278302757854e-05, + "loss": 2.4853, + "step": 15816 + }, + { + "epoch": 1.276491001533371, + "grad_norm": 0.7273215651512146, + "learning_rate": 2.104258960660055e-05, + "loss": 2.4365, + "step": 15817 + }, + { + "epoch": 1.2765717052699541, + "grad_norm": 0.7021621465682983, + "learning_rate": 2.1032902878268323e-05, + "loss": 2.4665, + "step": 15818 + }, + { + "epoch": 1.276652409006537, + "grad_norm": 0.666828989982605, + "learning_rate": 2.102321811800253e-05, + "loss": 2.3922, + "step": 15819 + }, + { + "epoch": 1.27673311274312, + "grad_norm": 0.6780487298965454, + "learning_rate": 2.1013535326044608e-05, + "loss": 2.4072, + "step": 15820 + }, + { + "epoch": 1.276813816479703, + "grad_norm": 0.6474688053131104, + "learning_rate": 2.1003854502635888e-05, + "loss": 2.4145, + "step": 15821 + }, + { + "epoch": 1.276894520216286, + "grad_norm": 0.6712753772735596, + "learning_rate": 2.0994175648017587e-05, + "loss": 2.4349, + "step": 15822 + }, + { + "epoch": 1.2769752239528689, + "grad_norm": 0.6705189943313599, + "learning_rate": 2.098449876243096e-05, + "loss": 2.4376, + "step": 15823 + }, + { + "epoch": 1.277055927689452, + "grad_norm": 0.6794685125350952, + "learning_rate": 2.0974823846117197e-05, + "loss": 2.3717, + "step": 15824 + }, + { + "epoch": 1.277136631426035, + "grad_norm": 0.7145677804946899, + "learning_rate": 2.0965150899317364e-05, + "loss": 2.3829, + "step": 15825 + }, + { + "epoch": 1.277217335162618, + "grad_norm": 0.7043245434761047, + "learning_rate": 2.095547992227257e-05, + "loss": 2.405, + "step": 15826 + }, + { + "epoch": 1.277298038899201, + "grad_norm": 0.7969205379486084, + "learning_rate": 2.0945810915223873e-05, + "loss": 2.4115, + "step": 15827 + }, + { + "epoch": 1.277378742635784, + "grad_norm": 0.657482385635376, + "learning_rate": 2.0936143878412186e-05, + "loss": 2.372, + "step": 15828 + }, + { + "epoch": 1.277459446372367, + "grad_norm": 0.7315167784690857, + "learning_rate": 2.0926478812078466e-05, + "loss": 2.4372, + "step": 15829 + }, + { + "epoch": 1.27754015010895, + "grad_norm": 0.6985061764717102, + "learning_rate": 2.09168157164636e-05, + "loss": 2.3901, + "step": 15830 + }, + { + "epoch": 1.2776208538455331, + "grad_norm": 0.6906184554100037, + "learning_rate": 2.0907154591808408e-05, + "loss": 2.4562, + "step": 15831 + }, + { + "epoch": 1.277701557582116, + "grad_norm": 0.655094563961029, + "learning_rate": 2.0897495438353676e-05, + "loss": 2.451, + "step": 15832 + }, + { + "epoch": 1.277782261318699, + "grad_norm": 0.7663134932518005, + "learning_rate": 2.0887838256340143e-05, + "loss": 2.4634, + "step": 15833 + }, + { + "epoch": 1.2778629650552822, + "grad_norm": 0.7164491415023804, + "learning_rate": 2.087818304600849e-05, + "loss": 2.4624, + "step": 15834 + }, + { + "epoch": 1.277943668791865, + "grad_norm": 0.6962822079658508, + "learning_rate": 2.0868529807599336e-05, + "loss": 2.4325, + "step": 15835 + }, + { + "epoch": 1.2780243725284481, + "grad_norm": 0.702985405921936, + "learning_rate": 2.0858878541353255e-05, + "loss": 2.4219, + "step": 15836 + }, + { + "epoch": 1.278105076265031, + "grad_norm": 0.7605595588684082, + "learning_rate": 2.0849229247510826e-05, + "loss": 2.4201, + "step": 15837 + }, + { + "epoch": 1.278185780001614, + "grad_norm": 0.8479344248771667, + "learning_rate": 2.083958192631249e-05, + "loss": 2.4689, + "step": 15838 + }, + { + "epoch": 1.278266483738197, + "grad_norm": 0.7241235375404358, + "learning_rate": 2.082993657799869e-05, + "loss": 2.4861, + "step": 15839 + }, + { + "epoch": 1.27834718747478, + "grad_norm": 0.7069835066795349, + "learning_rate": 2.0820293202809827e-05, + "loss": 2.3759, + "step": 15840 + }, + { + "epoch": 1.2784278912113631, + "grad_norm": 0.6606370210647583, + "learning_rate": 2.0810651800986237e-05, + "loss": 2.4444, + "step": 15841 + }, + { + "epoch": 1.278508594947946, + "grad_norm": 0.6608174443244934, + "learning_rate": 2.08010123727682e-05, + "loss": 2.4339, + "step": 15842 + }, + { + "epoch": 1.278589298684529, + "grad_norm": 0.751000702381134, + "learning_rate": 2.0791374918396e-05, + "loss": 2.4327, + "step": 15843 + }, + { + "epoch": 1.2786700024211122, + "grad_norm": 0.7223808765411377, + "learning_rate": 2.0781739438109748e-05, + "loss": 2.3573, + "step": 15844 + }, + { + "epoch": 1.278750706157695, + "grad_norm": 0.6872109770774841, + "learning_rate": 2.0772105932149642e-05, + "loss": 2.3973, + "step": 15845 + }, + { + "epoch": 1.2788314098942781, + "grad_norm": 0.6967385411262512, + "learning_rate": 2.0762474400755762e-05, + "loss": 2.4622, + "step": 15846 + }, + { + "epoch": 1.2789121136308612, + "grad_norm": 0.7289159893989563, + "learning_rate": 2.0752844844168163e-05, + "loss": 2.4507, + "step": 15847 + }, + { + "epoch": 1.278992817367444, + "grad_norm": 0.7735978364944458, + "learning_rate": 2.0743217262626802e-05, + "loss": 2.4341, + "step": 15848 + }, + { + "epoch": 1.2790735211040272, + "grad_norm": 0.7209177017211914, + "learning_rate": 2.0733591656371655e-05, + "loss": 2.4024, + "step": 15849 + }, + { + "epoch": 1.2791542248406103, + "grad_norm": 0.6789259314537048, + "learning_rate": 2.0723968025642604e-05, + "loss": 2.3809, + "step": 15850 + }, + { + "epoch": 1.2792349285771931, + "grad_norm": 0.6972812414169312, + "learning_rate": 2.0714346370679495e-05, + "loss": 2.3986, + "step": 15851 + }, + { + "epoch": 1.2793156323137762, + "grad_norm": 0.7144166827201843, + "learning_rate": 2.070472669172213e-05, + "loss": 2.4241, + "step": 15852 + }, + { + "epoch": 1.279396336050359, + "grad_norm": 0.7325223088264465, + "learning_rate": 2.0695108989010282e-05, + "loss": 2.452, + "step": 15853 + }, + { + "epoch": 1.2794770397869422, + "grad_norm": 0.6900116205215454, + "learning_rate": 2.0685493262783608e-05, + "loss": 2.4091, + "step": 15854 + }, + { + "epoch": 1.279557743523525, + "grad_norm": 0.6846197843551636, + "learning_rate": 2.0675879513281758e-05, + "loss": 2.4337, + "step": 15855 + }, + { + "epoch": 1.2796384472601081, + "grad_norm": 0.6901541352272034, + "learning_rate": 2.0666267740744372e-05, + "loss": 2.4586, + "step": 15856 + }, + { + "epoch": 1.2797191509966912, + "grad_norm": 0.6842665672302246, + "learning_rate": 2.0656657945410953e-05, + "loss": 2.4383, + "step": 15857 + }, + { + "epoch": 1.279799854733274, + "grad_norm": 0.7450493574142456, + "learning_rate": 2.0647050127521028e-05, + "loss": 2.4308, + "step": 15858 + }, + { + "epoch": 1.2798805584698572, + "grad_norm": 0.6928436160087585, + "learning_rate": 2.0637444287314033e-05, + "loss": 2.4726, + "step": 15859 + }, + { + "epoch": 1.2799612622064402, + "grad_norm": 0.6539968252182007, + "learning_rate": 2.06278404250294e-05, + "loss": 2.3983, + "step": 15860 + }, + { + "epoch": 1.280041965943023, + "grad_norm": 0.7183163166046143, + "learning_rate": 2.0618238540906444e-05, + "loss": 2.4172, + "step": 15861 + }, + { + "epoch": 1.2801226696796062, + "grad_norm": 0.7070814371109009, + "learning_rate": 2.0608638635184507e-05, + "loss": 2.4018, + "step": 15862 + }, + { + "epoch": 1.2802033734161893, + "grad_norm": 0.7589142918586731, + "learning_rate": 2.0599040708102847e-05, + "loss": 2.4175, + "step": 15863 + }, + { + "epoch": 1.2802840771527721, + "grad_norm": 0.6945414543151855, + "learning_rate": 2.0589444759900613e-05, + "loss": 2.4093, + "step": 15864 + }, + { + "epoch": 1.2803647808893552, + "grad_norm": 0.685482919216156, + "learning_rate": 2.0579850790817003e-05, + "loss": 2.4388, + "step": 15865 + }, + { + "epoch": 1.280445484625938, + "grad_norm": 0.7089706063270569, + "learning_rate": 2.0570258801091148e-05, + "loss": 2.3779, + "step": 15866 + }, + { + "epoch": 1.2805261883625212, + "grad_norm": 0.6994217038154602, + "learning_rate": 2.0560668790962046e-05, + "loss": 2.3757, + "step": 15867 + }, + { + "epoch": 1.280606892099104, + "grad_norm": 0.7170232534408569, + "learning_rate": 2.055108076066874e-05, + "loss": 2.4087, + "step": 15868 + }, + { + "epoch": 1.2806875958356871, + "grad_norm": 0.7008751034736633, + "learning_rate": 2.0541494710450206e-05, + "loss": 2.4384, + "step": 15869 + }, + { + "epoch": 1.2807682995722702, + "grad_norm": 0.6795800924301147, + "learning_rate": 2.053191064054527e-05, + "loss": 2.415, + "step": 15870 + }, + { + "epoch": 1.280849003308853, + "grad_norm": 0.6650210022926331, + "learning_rate": 2.0522328551192882e-05, + "loss": 2.4421, + "step": 15871 + }, + { + "epoch": 1.2809297070454362, + "grad_norm": 0.7045374512672424, + "learning_rate": 2.0512748442631858e-05, + "loss": 2.4285, + "step": 15872 + }, + { + "epoch": 1.2810104107820193, + "grad_norm": 0.6585350632667542, + "learning_rate": 2.0503170315100883e-05, + "loss": 2.3806, + "step": 15873 + }, + { + "epoch": 1.2810911145186021, + "grad_norm": 0.7833496332168579, + "learning_rate": 2.0493594168838725e-05, + "loss": 2.4557, + "step": 15874 + }, + { + "epoch": 1.2811718182551852, + "grad_norm": 0.7237457036972046, + "learning_rate": 2.0484020004084048e-05, + "loss": 2.3966, + "step": 15875 + }, + { + "epoch": 1.2812525219917683, + "grad_norm": 0.7416609525680542, + "learning_rate": 2.0474447821075426e-05, + "loss": 2.3729, + "step": 15876 + }, + { + "epoch": 1.2813332257283512, + "grad_norm": 0.7148095369338989, + "learning_rate": 2.046487762005146e-05, + "loss": 2.4163, + "step": 15877 + }, + { + "epoch": 1.2814139294649343, + "grad_norm": 0.670281171798706, + "learning_rate": 2.0455309401250632e-05, + "loss": 2.383, + "step": 15878 + }, + { + "epoch": 1.2814946332015174, + "grad_norm": 0.6968950629234314, + "learning_rate": 2.0445743164911457e-05, + "loss": 2.3967, + "step": 15879 + }, + { + "epoch": 1.2815753369381002, + "grad_norm": 0.783441960811615, + "learning_rate": 2.0436178911272298e-05, + "loss": 2.455, + "step": 15880 + }, + { + "epoch": 1.2816560406746833, + "grad_norm": 0.709032416343689, + "learning_rate": 2.0426616640571518e-05, + "loss": 2.4207, + "step": 15881 + }, + { + "epoch": 1.2817367444112662, + "grad_norm": 0.6727990508079529, + "learning_rate": 2.0417056353047504e-05, + "loss": 2.4115, + "step": 15882 + }, + { + "epoch": 1.2818174481478493, + "grad_norm": 0.7336034774780273, + "learning_rate": 2.0407498048938445e-05, + "loss": 2.43, + "step": 15883 + }, + { + "epoch": 1.2818981518844321, + "grad_norm": 0.7649042010307312, + "learning_rate": 2.0397941728482604e-05, + "loss": 2.4655, + "step": 15884 + }, + { + "epoch": 1.2819788556210152, + "grad_norm": 0.7218052744865417, + "learning_rate": 2.038838739191816e-05, + "loss": 2.4872, + "step": 15885 + }, + { + "epoch": 1.2820595593575983, + "grad_norm": 0.7192350625991821, + "learning_rate": 2.0378835039483178e-05, + "loss": 2.4751, + "step": 15886 + }, + { + "epoch": 1.2821402630941812, + "grad_norm": 0.7059212923049927, + "learning_rate": 2.0369284671415768e-05, + "loss": 2.43, + "step": 15887 + }, + { + "epoch": 1.2822209668307643, + "grad_norm": 0.7387098073959351, + "learning_rate": 2.0359736287953956e-05, + "loss": 2.4281, + "step": 15888 + }, + { + "epoch": 1.2823016705673473, + "grad_norm": 0.7454321980476379, + "learning_rate": 2.035018988933568e-05, + "loss": 2.4372, + "step": 15889 + }, + { + "epoch": 1.2823823743039302, + "grad_norm": 0.6822765469551086, + "learning_rate": 2.034064547579888e-05, + "loss": 2.3728, + "step": 15890 + }, + { + "epoch": 1.2824630780405133, + "grad_norm": 0.6917527914047241, + "learning_rate": 2.0331103047581412e-05, + "loss": 2.3997, + "step": 15891 + }, + { + "epoch": 1.2825437817770964, + "grad_norm": 0.6734376549720764, + "learning_rate": 2.032156260492113e-05, + "loss": 2.4495, + "step": 15892 + }, + { + "epoch": 1.2826244855136792, + "grad_norm": 0.7222443222999573, + "learning_rate": 2.0312024148055776e-05, + "loss": 2.3466, + "step": 15893 + }, + { + "epoch": 1.2827051892502623, + "grad_norm": 0.703714907169342, + "learning_rate": 2.030248767722309e-05, + "loss": 2.4599, + "step": 15894 + }, + { + "epoch": 1.2827858929868454, + "grad_norm": 0.655161440372467, + "learning_rate": 2.029295319266078e-05, + "loss": 2.3896, + "step": 15895 + }, + { + "epoch": 1.2828665967234283, + "grad_norm": 0.6449242234230042, + "learning_rate": 2.028342069460639e-05, + "loss": 2.3511, + "step": 15896 + }, + { + "epoch": 1.2829473004600114, + "grad_norm": 0.6578382849693298, + "learning_rate": 2.027389018329755e-05, + "loss": 2.3678, + "step": 15897 + }, + { + "epoch": 1.2830280041965942, + "grad_norm": 0.7047572731971741, + "learning_rate": 2.0264361658971797e-05, + "loss": 2.4522, + "step": 15898 + }, + { + "epoch": 1.2831087079331773, + "grad_norm": 0.7310267090797424, + "learning_rate": 2.0254835121866554e-05, + "loss": 2.4117, + "step": 15899 + }, + { + "epoch": 1.2831894116697602, + "grad_norm": 0.7020776867866516, + "learning_rate": 2.024531057221927e-05, + "loss": 2.4033, + "step": 15900 + }, + { + "epoch": 1.2832701154063433, + "grad_norm": 0.6967746615409851, + "learning_rate": 2.023578801026733e-05, + "loss": 2.3491, + "step": 15901 + }, + { + "epoch": 1.2833508191429264, + "grad_norm": 0.7062339782714844, + "learning_rate": 2.022626743624807e-05, + "loss": 2.4598, + "step": 15902 + }, + { + "epoch": 1.2834315228795092, + "grad_norm": 0.730625331401825, + "learning_rate": 2.0216748850398748e-05, + "loss": 2.4995, + "step": 15903 + }, + { + "epoch": 1.2835122266160923, + "grad_norm": 0.6634403467178345, + "learning_rate": 2.020723225295662e-05, + "loss": 2.3843, + "step": 15904 + }, + { + "epoch": 1.2835929303526754, + "grad_norm": 0.6924816966056824, + "learning_rate": 2.019771764415883e-05, + "loss": 2.4258, + "step": 15905 + }, + { + "epoch": 1.2836736340892583, + "grad_norm": 0.7127227187156677, + "learning_rate": 2.018820502424251e-05, + "loss": 2.4038, + "step": 15906 + }, + { + "epoch": 1.2837543378258414, + "grad_norm": 0.7108431458473206, + "learning_rate": 2.0178694393444785e-05, + "loss": 2.4571, + "step": 15907 + }, + { + "epoch": 1.2838350415624245, + "grad_norm": 0.7478229999542236, + "learning_rate": 2.016918575200262e-05, + "loss": 2.4526, + "step": 15908 + }, + { + "epoch": 1.2839157452990073, + "grad_norm": 0.65651935338974, + "learning_rate": 2.015967910015303e-05, + "loss": 2.434, + "step": 15909 + }, + { + "epoch": 1.2839964490355904, + "grad_norm": 0.7285312414169312, + "learning_rate": 2.015017443813294e-05, + "loss": 2.3857, + "step": 15910 + }, + { + "epoch": 1.2840771527721733, + "grad_norm": 0.6947231292724609, + "learning_rate": 2.014067176617923e-05, + "loss": 2.4294, + "step": 15911 + }, + { + "epoch": 1.2841578565087564, + "grad_norm": 0.6965867877006531, + "learning_rate": 2.0131171084528744e-05, + "loss": 2.4514, + "step": 15912 + }, + { + "epoch": 1.2842385602453392, + "grad_norm": 0.6962311863899231, + "learning_rate": 2.0121672393418246e-05, + "loss": 2.4391, + "step": 15913 + }, + { + "epoch": 1.2843192639819223, + "grad_norm": 0.6687992215156555, + "learning_rate": 2.01121756930845e-05, + "loss": 2.4266, + "step": 15914 + }, + { + "epoch": 1.2843999677185054, + "grad_norm": 0.7118954658508301, + "learning_rate": 2.0102680983764145e-05, + "loss": 2.3436, + "step": 15915 + }, + { + "epoch": 1.2844806714550883, + "grad_norm": 0.6866199970245361, + "learning_rate": 2.009318826569382e-05, + "loss": 2.3719, + "step": 15916 + }, + { + "epoch": 1.2845613751916714, + "grad_norm": 0.6701404452323914, + "learning_rate": 2.008369753911016e-05, + "loss": 2.4875, + "step": 15917 + }, + { + "epoch": 1.2846420789282544, + "grad_norm": 0.7020917534828186, + "learning_rate": 2.007420880424963e-05, + "loss": 2.3871, + "step": 15918 + }, + { + "epoch": 1.2847227826648373, + "grad_norm": 0.6865704655647278, + "learning_rate": 2.006472206134875e-05, + "loss": 2.3815, + "step": 15919 + }, + { + "epoch": 1.2848034864014204, + "grad_norm": 0.7106871008872986, + "learning_rate": 2.0055237310643948e-05, + "loss": 2.4276, + "step": 15920 + }, + { + "epoch": 1.2848841901380035, + "grad_norm": 0.6891976594924927, + "learning_rate": 2.004575455237161e-05, + "loss": 2.3641, + "step": 15921 + }, + { + "epoch": 1.2849648938745863, + "grad_norm": 0.6385056972503662, + "learning_rate": 2.0036273786768067e-05, + "loss": 2.3898, + "step": 15922 + }, + { + "epoch": 1.2850455976111694, + "grad_norm": 0.7038321495056152, + "learning_rate": 2.0026795014069633e-05, + "loss": 2.4688, + "step": 15923 + }, + { + "epoch": 1.2851263013477525, + "grad_norm": 0.6310208439826965, + "learning_rate": 2.0017318234512494e-05, + "loss": 2.3821, + "step": 15924 + }, + { + "epoch": 1.2852070050843354, + "grad_norm": 0.6989426016807556, + "learning_rate": 2.0007843448332865e-05, + "loss": 2.434, + "step": 15925 + }, + { + "epoch": 1.2852877088209185, + "grad_norm": 0.6666426658630371, + "learning_rate": 1.9998370655766886e-05, + "loss": 2.4687, + "step": 15926 + }, + { + "epoch": 1.2853684125575013, + "grad_norm": 0.6421633958816528, + "learning_rate": 1.9988899857050648e-05, + "loss": 2.4269, + "step": 15927 + }, + { + "epoch": 1.2854491162940844, + "grad_norm": 0.7229343056678772, + "learning_rate": 1.997943105242016e-05, + "loss": 2.4139, + "step": 15928 + }, + { + "epoch": 1.2855298200306673, + "grad_norm": 0.7168964743614197, + "learning_rate": 1.9969964242111427e-05, + "loss": 2.405, + "step": 15929 + }, + { + "epoch": 1.2856105237672504, + "grad_norm": 0.6824480891227722, + "learning_rate": 1.99604994263604e-05, + "loss": 2.3955, + "step": 15930 + }, + { + "epoch": 1.2856912275038335, + "grad_norm": 0.670956552028656, + "learning_rate": 1.995103660540294e-05, + "loss": 2.3743, + "step": 15931 + }, + { + "epoch": 1.2857719312404163, + "grad_norm": 0.7057971954345703, + "learning_rate": 1.9941575779474864e-05, + "loss": 2.4496, + "step": 15932 + }, + { + "epoch": 1.2858526349769994, + "grad_norm": 0.7802264094352722, + "learning_rate": 1.9932116948812052e-05, + "loss": 2.4231, + "step": 15933 + }, + { + "epoch": 1.2859333387135825, + "grad_norm": 0.7151160836219788, + "learning_rate": 1.992266011365016e-05, + "loss": 2.4319, + "step": 15934 + }, + { + "epoch": 1.2860140424501654, + "grad_norm": 0.7078769207000732, + "learning_rate": 1.991320527422489e-05, + "loss": 2.4037, + "step": 15935 + }, + { + "epoch": 1.2860947461867485, + "grad_norm": 0.7483938336372375, + "learning_rate": 1.9903752430771927e-05, + "loss": 2.4946, + "step": 15936 + }, + { + "epoch": 1.2861754499233315, + "grad_norm": 0.7774620056152344, + "learning_rate": 1.9894301583526808e-05, + "loss": 2.4536, + "step": 15937 + }, + { + "epoch": 1.2862561536599144, + "grad_norm": 0.7311348915100098, + "learning_rate": 1.988485273272509e-05, + "loss": 2.4178, + "step": 15938 + }, + { + "epoch": 1.2863368573964975, + "grad_norm": 0.6821309328079224, + "learning_rate": 1.9875405878602282e-05, + "loss": 2.4851, + "step": 15939 + }, + { + "epoch": 1.2864175611330806, + "grad_norm": 0.7081651091575623, + "learning_rate": 1.9865961021393785e-05, + "loss": 2.4377, + "step": 15940 + }, + { + "epoch": 1.2864982648696635, + "grad_norm": 0.8093439340591431, + "learning_rate": 1.9856518161335014e-05, + "loss": 2.4681, + "step": 15941 + }, + { + "epoch": 1.2865789686062465, + "grad_norm": 0.6769521832466125, + "learning_rate": 1.984707729866131e-05, + "loss": 2.4231, + "step": 15942 + }, + { + "epoch": 1.2866596723428294, + "grad_norm": 0.6973356604576111, + "learning_rate": 1.983763843360795e-05, + "loss": 2.4144, + "step": 15943 + }, + { + "epoch": 1.2867403760794125, + "grad_norm": 0.7814682722091675, + "learning_rate": 1.9828201566410197e-05, + "loss": 2.3935, + "step": 15944 + }, + { + "epoch": 1.2868210798159954, + "grad_norm": 0.7545498609542847, + "learning_rate": 1.9818766697303236e-05, + "loss": 2.4136, + "step": 15945 + }, + { + "epoch": 1.2869017835525784, + "grad_norm": 0.7165581583976746, + "learning_rate": 1.9809333826522225e-05, + "loss": 2.3757, + "step": 15946 + }, + { + "epoch": 1.2869824872891615, + "grad_norm": 0.6812456846237183, + "learning_rate": 1.9799902954302208e-05, + "loss": 2.4143, + "step": 15947 + }, + { + "epoch": 1.2870631910257444, + "grad_norm": 0.7231366634368896, + "learning_rate": 1.9790474080878262e-05, + "loss": 2.4837, + "step": 15948 + }, + { + "epoch": 1.2871438947623275, + "grad_norm": 0.690916121006012, + "learning_rate": 1.9781047206485393e-05, + "loss": 2.4513, + "step": 15949 + }, + { + "epoch": 1.2872245984989106, + "grad_norm": 0.6608129143714905, + "learning_rate": 1.9771622331358485e-05, + "loss": 2.3908, + "step": 15950 + }, + { + "epoch": 1.2873053022354934, + "grad_norm": 0.7194501161575317, + "learning_rate": 1.976219945573249e-05, + "loss": 2.38, + "step": 15951 + }, + { + "epoch": 1.2873860059720765, + "grad_norm": 0.7315083146095276, + "learning_rate": 1.9752778579842213e-05, + "loss": 2.4351, + "step": 15952 + }, + { + "epoch": 1.2874667097086596, + "grad_norm": 0.7313492298126221, + "learning_rate": 1.974335970392246e-05, + "loss": 2.3531, + "step": 15953 + }, + { + "epoch": 1.2875474134452425, + "grad_norm": 0.6982418894767761, + "learning_rate": 1.9733942828207985e-05, + "loss": 2.4319, + "step": 15954 + }, + { + "epoch": 1.2876281171818256, + "grad_norm": 0.6664792895317078, + "learning_rate": 1.972452795293347e-05, + "loss": 2.3981, + "step": 15955 + }, + { + "epoch": 1.2877088209184087, + "grad_norm": 0.6849696040153503, + "learning_rate": 1.9715115078333578e-05, + "loss": 2.3952, + "step": 15956 + }, + { + "epoch": 1.2877895246549915, + "grad_norm": 0.7355225086212158, + "learning_rate": 1.9705704204642873e-05, + "loss": 2.4556, + "step": 15957 + }, + { + "epoch": 1.2878702283915746, + "grad_norm": 0.6850876808166504, + "learning_rate": 1.9696295332095906e-05, + "loss": 2.3873, + "step": 15958 + }, + { + "epoch": 1.2879509321281575, + "grad_norm": 0.6449069976806641, + "learning_rate": 1.9686888460927198e-05, + "loss": 2.4226, + "step": 15959 + }, + { + "epoch": 1.2880316358647406, + "grad_norm": 0.7517794966697693, + "learning_rate": 1.967748359137114e-05, + "loss": 2.377, + "step": 15960 + }, + { + "epoch": 1.2881123396013234, + "grad_norm": 0.6861303448677063, + "learning_rate": 1.9668080723662162e-05, + "loss": 2.4451, + "step": 15961 + }, + { + "epoch": 1.2881930433379065, + "grad_norm": 0.7025154829025269, + "learning_rate": 1.9658679858034602e-05, + "loss": 2.3856, + "step": 15962 + }, + { + "epoch": 1.2882737470744896, + "grad_norm": 0.6775577068328857, + "learning_rate": 1.964928099472275e-05, + "loss": 2.4383, + "step": 15963 + }, + { + "epoch": 1.2883544508110725, + "grad_norm": 0.6889605522155762, + "learning_rate": 1.963988413396086e-05, + "loss": 2.3766, + "step": 15964 + }, + { + "epoch": 1.2884351545476556, + "grad_norm": 0.6697166562080383, + "learning_rate": 1.9630489275983156e-05, + "loss": 2.44, + "step": 15965 + }, + { + "epoch": 1.2885158582842386, + "grad_norm": 0.6895437836647034, + "learning_rate": 1.96210964210237e-05, + "loss": 2.4242, + "step": 15966 + }, + { + "epoch": 1.2885965620208215, + "grad_norm": 0.6955164670944214, + "learning_rate": 1.9611705569316652e-05, + "loss": 2.3915, + "step": 15967 + }, + { + "epoch": 1.2886772657574046, + "grad_norm": 0.7133461236953735, + "learning_rate": 1.960231672109605e-05, + "loss": 2.4307, + "step": 15968 + }, + { + "epoch": 1.2887579694939877, + "grad_norm": 0.6874761581420898, + "learning_rate": 1.9592929876595857e-05, + "loss": 2.4371, + "step": 15969 + }, + { + "epoch": 1.2888386732305706, + "grad_norm": 0.7168406248092651, + "learning_rate": 1.9583545036050044e-05, + "loss": 2.4681, + "step": 15970 + }, + { + "epoch": 1.2889193769671536, + "grad_norm": 0.701874852180481, + "learning_rate": 1.9574162199692492e-05, + "loss": 2.4746, + "step": 15971 + }, + { + "epoch": 1.2890000807037365, + "grad_norm": 0.7118390202522278, + "learning_rate": 1.9564781367757058e-05, + "loss": 2.4139, + "step": 15972 + }, + { + "epoch": 1.2890807844403196, + "grad_norm": 0.6597239971160889, + "learning_rate": 1.955540254047753e-05, + "loss": 2.4346, + "step": 15973 + }, + { + "epoch": 1.2891614881769025, + "grad_norm": 0.7461068630218506, + "learning_rate": 1.9546025718087645e-05, + "loss": 2.4331, + "step": 15974 + }, + { + "epoch": 1.2892421919134855, + "grad_norm": 0.6992977857589722, + "learning_rate": 1.953665090082115e-05, + "loss": 2.424, + "step": 15975 + }, + { + "epoch": 1.2893228956500686, + "grad_norm": 0.6674031615257263, + "learning_rate": 1.9527278088911617e-05, + "loss": 2.4545, + "step": 15976 + }, + { + "epoch": 1.2894035993866515, + "grad_norm": 0.7377402782440186, + "learning_rate": 1.9517907282592662e-05, + "loss": 2.4625, + "step": 15977 + }, + { + "epoch": 1.2894843031232346, + "grad_norm": 0.720579206943512, + "learning_rate": 1.950853848209788e-05, + "loss": 2.4073, + "step": 15978 + }, + { + "epoch": 1.2895650068598177, + "grad_norm": 0.7221893668174744, + "learning_rate": 1.9499171687660688e-05, + "loss": 2.4056, + "step": 15979 + }, + { + "epoch": 1.2896457105964005, + "grad_norm": 0.7409725189208984, + "learning_rate": 1.9489806899514574e-05, + "loss": 2.3899, + "step": 15980 + }, + { + "epoch": 1.2897264143329836, + "grad_norm": 0.6946583986282349, + "learning_rate": 1.948044411789296e-05, + "loss": 2.4832, + "step": 15981 + }, + { + "epoch": 1.2898071180695667, + "grad_norm": 0.7031306028366089, + "learning_rate": 1.9471083343029096e-05, + "loss": 2.4265, + "step": 15982 + }, + { + "epoch": 1.2898878218061496, + "grad_norm": 0.660093367099762, + "learning_rate": 1.946172457515637e-05, + "loss": 2.4883, + "step": 15983 + }, + { + "epoch": 1.2899685255427327, + "grad_norm": 0.700641930103302, + "learning_rate": 1.945236781450802e-05, + "loss": 2.4096, + "step": 15984 + }, + { + "epoch": 1.2900492292793158, + "grad_norm": 0.7350760698318481, + "learning_rate": 1.9443013061317205e-05, + "loss": 2.4161, + "step": 15985 + }, + { + "epoch": 1.2901299330158986, + "grad_norm": 0.7567386031150818, + "learning_rate": 1.9433660315817072e-05, + "loss": 2.3978, + "step": 15986 + }, + { + "epoch": 1.2902106367524817, + "grad_norm": 0.7471369504928589, + "learning_rate": 1.9424309578240717e-05, + "loss": 2.4079, + "step": 15987 + }, + { + "epoch": 1.2902913404890646, + "grad_norm": 0.6630815267562866, + "learning_rate": 1.941496084882124e-05, + "loss": 2.4223, + "step": 15988 + }, + { + "epoch": 1.2903720442256477, + "grad_norm": 0.687224268913269, + "learning_rate": 1.940561412779155e-05, + "loss": 2.4413, + "step": 15989 + }, + { + "epoch": 1.2904527479622305, + "grad_norm": 0.6989685297012329, + "learning_rate": 1.9396269415384637e-05, + "loss": 2.3651, + "step": 15990 + }, + { + "epoch": 1.2905334516988136, + "grad_norm": 0.7256720066070557, + "learning_rate": 1.938692671183342e-05, + "loss": 2.4526, + "step": 15991 + }, + { + "epoch": 1.2906141554353967, + "grad_norm": 0.692032516002655, + "learning_rate": 1.9377586017370685e-05, + "loss": 2.3936, + "step": 15992 + }, + { + "epoch": 1.2906948591719796, + "grad_norm": 0.6733511686325073, + "learning_rate": 1.936824733222925e-05, + "loss": 2.4691, + "step": 15993 + }, + { + "epoch": 1.2907755629085627, + "grad_norm": 0.6698563098907471, + "learning_rate": 1.935891065664187e-05, + "loss": 2.3904, + "step": 15994 + }, + { + "epoch": 1.2908562666451457, + "grad_norm": 0.660521388053894, + "learning_rate": 1.934957599084123e-05, + "loss": 2.4647, + "step": 15995 + }, + { + "epoch": 1.2909369703817286, + "grad_norm": 0.6714615821838379, + "learning_rate": 1.9340243335059982e-05, + "loss": 2.403, + "step": 15996 + }, + { + "epoch": 1.2910176741183117, + "grad_norm": 0.726099967956543, + "learning_rate": 1.9330912689530746e-05, + "loss": 2.4101, + "step": 15997 + }, + { + "epoch": 1.2910983778548948, + "grad_norm": 0.6585896015167236, + "learning_rate": 1.932158405448601e-05, + "loss": 2.3813, + "step": 15998 + }, + { + "epoch": 1.2911790815914777, + "grad_norm": 0.7967908382415771, + "learning_rate": 1.9312257430158286e-05, + "loss": 2.4188, + "step": 15999 + }, + { + "epoch": 1.2912597853280607, + "grad_norm": 0.7340367436408997, + "learning_rate": 1.9302932816780063e-05, + "loss": 2.4642, + "step": 16000 + }, + { + "epoch": 1.2912597853280607, + "eval_loss": 2.3791537284851074, + "eval_runtime": 780.6124, + "eval_samples_per_second": 3.356, + "eval_steps_per_second": 0.56, + "step": 16000 + }, + { + "epoch": 1.2913404890646438, + "grad_norm": 0.6778663992881775, + "learning_rate": 1.929361021458367e-05, + "loss": 2.4057, + "step": 16001 + }, + { + "epoch": 1.2914211928012267, + "grad_norm": 0.6982381343841553, + "learning_rate": 1.9284289623801477e-05, + "loss": 2.4376, + "step": 16002 + }, + { + "epoch": 1.2915018965378098, + "grad_norm": 0.6956612467765808, + "learning_rate": 1.927497104466578e-05, + "loss": 2.4485, + "step": 16003 + }, + { + "epoch": 1.2915826002743926, + "grad_norm": 0.6780211925506592, + "learning_rate": 1.9265654477408825e-05, + "loss": 2.4233, + "step": 16004 + }, + { + "epoch": 1.2916633040109757, + "grad_norm": 0.6869028806686401, + "learning_rate": 1.92563399222628e-05, + "loss": 2.4156, + "step": 16005 + }, + { + "epoch": 1.2917440077475586, + "grad_norm": 0.6402696967124939, + "learning_rate": 1.9247027379459848e-05, + "loss": 2.4208, + "step": 16006 + }, + { + "epoch": 1.2918247114841417, + "grad_norm": 0.6868177652359009, + "learning_rate": 1.92377168492321e-05, + "loss": 2.4067, + "step": 16007 + }, + { + "epoch": 1.2919054152207248, + "grad_norm": 0.7152438759803772, + "learning_rate": 1.922840833181152e-05, + "loss": 2.3944, + "step": 16008 + }, + { + "epoch": 1.2919861189573076, + "grad_norm": 0.6467335820198059, + "learning_rate": 1.921910182743015e-05, + "loss": 2.4064, + "step": 16009 + }, + { + "epoch": 1.2920668226938907, + "grad_norm": 0.6918551325798035, + "learning_rate": 1.9209797336319956e-05, + "loss": 2.4457, + "step": 16010 + }, + { + "epoch": 1.2921475264304738, + "grad_norm": 0.7308588027954102, + "learning_rate": 1.920049485871278e-05, + "loss": 2.3785, + "step": 16011 + }, + { + "epoch": 1.2922282301670567, + "grad_norm": 0.6918718814849854, + "learning_rate": 1.9191194394840472e-05, + "loss": 2.4645, + "step": 16012 + }, + { + "epoch": 1.2923089339036398, + "grad_norm": 0.7048078775405884, + "learning_rate": 1.9181895944934848e-05, + "loss": 2.4082, + "step": 16013 + }, + { + "epoch": 1.2923896376402229, + "grad_norm": 0.7175794839859009, + "learning_rate": 1.917259950922763e-05, + "loss": 2.4521, + "step": 16014 + }, + { + "epoch": 1.2924703413768057, + "grad_norm": 0.6895543932914734, + "learning_rate": 1.916330508795051e-05, + "loss": 2.4058, + "step": 16015 + }, + { + "epoch": 1.2925510451133888, + "grad_norm": 0.6951895952224731, + "learning_rate": 1.9154012681335176e-05, + "loss": 2.4274, + "step": 16016 + }, + { + "epoch": 1.2926317488499717, + "grad_norm": 0.6807428598403931, + "learning_rate": 1.9144722289613148e-05, + "loss": 2.4008, + "step": 16017 + }, + { + "epoch": 1.2927124525865548, + "grad_norm": 0.6643410325050354, + "learning_rate": 1.9135433913015997e-05, + "loss": 2.4036, + "step": 16018 + }, + { + "epoch": 1.2927931563231376, + "grad_norm": 0.7283294796943665, + "learning_rate": 1.912614755177522e-05, + "loss": 2.4118, + "step": 16019 + }, + { + "epoch": 1.2928738600597207, + "grad_norm": 0.7516021132469177, + "learning_rate": 1.911686320612227e-05, + "loss": 2.3983, + "step": 16020 + }, + { + "epoch": 1.2929545637963038, + "grad_norm": 0.7314203381538391, + "learning_rate": 1.91075808762885e-05, + "loss": 2.4352, + "step": 16021 + }, + { + "epoch": 1.2930352675328867, + "grad_norm": 0.6904106736183167, + "learning_rate": 1.9098300562505266e-05, + "loss": 2.3734, + "step": 16022 + }, + { + "epoch": 1.2931159712694698, + "grad_norm": 0.6936709880828857, + "learning_rate": 1.9089022265003863e-05, + "loss": 2.4356, + "step": 16023 + }, + { + "epoch": 1.2931966750060528, + "grad_norm": 0.6753442883491516, + "learning_rate": 1.9079745984015528e-05, + "loss": 2.4713, + "step": 16024 + }, + { + "epoch": 1.2932773787426357, + "grad_norm": 0.7185340523719788, + "learning_rate": 1.9070471719771445e-05, + "loss": 2.4021, + "step": 16025 + }, + { + "epoch": 1.2933580824792188, + "grad_norm": 0.7486871480941772, + "learning_rate": 1.9061199472502798e-05, + "loss": 2.4144, + "step": 16026 + }, + { + "epoch": 1.2934387862158019, + "grad_norm": 0.6790735721588135, + "learning_rate": 1.90519292424406e-05, + "loss": 2.413, + "step": 16027 + }, + { + "epoch": 1.2935194899523847, + "grad_norm": 0.7104402780532837, + "learning_rate": 1.9042661029815922e-05, + "loss": 2.452, + "step": 16028 + }, + { + "epoch": 1.2936001936889678, + "grad_norm": 0.6975364685058594, + "learning_rate": 1.9033394834859796e-05, + "loss": 2.4169, + "step": 16029 + }, + { + "epoch": 1.293680897425551, + "grad_norm": 0.7619667649269104, + "learning_rate": 1.9024130657803085e-05, + "loss": 2.4106, + "step": 16030 + }, + { + "epoch": 1.2937616011621338, + "grad_norm": 0.6600254774093628, + "learning_rate": 1.9014868498876716e-05, + "loss": 2.3955, + "step": 16031 + }, + { + "epoch": 1.2938423048987169, + "grad_norm": 0.6790784597396851, + "learning_rate": 1.9005608358311533e-05, + "loss": 2.437, + "step": 16032 + }, + { + "epoch": 1.2939230086352997, + "grad_norm": 0.7085568308830261, + "learning_rate": 1.899635023633828e-05, + "loss": 2.4729, + "step": 16033 + }, + { + "epoch": 1.2940037123718828, + "grad_norm": 0.6940603256225586, + "learning_rate": 1.8987094133187732e-05, + "loss": 2.4099, + "step": 16034 + }, + { + "epoch": 1.2940844161084657, + "grad_norm": 0.7387171387672424, + "learning_rate": 1.897784004909058e-05, + "loss": 2.4509, + "step": 16035 + }, + { + "epoch": 1.2941651198450488, + "grad_norm": 0.8263981938362122, + "learning_rate": 1.8968587984277463e-05, + "loss": 2.4208, + "step": 16036 + }, + { + "epoch": 1.2942458235816319, + "grad_norm": 0.7393552660942078, + "learning_rate": 1.8959337938978937e-05, + "loss": 2.4458, + "step": 16037 + }, + { + "epoch": 1.2943265273182147, + "grad_norm": 0.652787983417511, + "learning_rate": 1.895008991342555e-05, + "loss": 2.3593, + "step": 16038 + }, + { + "epoch": 1.2944072310547978, + "grad_norm": 0.6533015370368958, + "learning_rate": 1.8940843907847817e-05, + "loss": 2.4538, + "step": 16039 + }, + { + "epoch": 1.294487934791381, + "grad_norm": 0.6723785400390625, + "learning_rate": 1.8931599922476106e-05, + "loss": 2.4528, + "step": 16040 + }, + { + "epoch": 1.2945686385279638, + "grad_norm": 0.693242073059082, + "learning_rate": 1.892235795754085e-05, + "loss": 2.4006, + "step": 16041 + }, + { + "epoch": 1.2946493422645469, + "grad_norm": 0.6849604845046997, + "learning_rate": 1.8913118013272403e-05, + "loss": 2.3758, + "step": 16042 + }, + { + "epoch": 1.29473004600113, + "grad_norm": 0.7252739667892456, + "learning_rate": 1.8903880089900983e-05, + "loss": 2.4101, + "step": 16043 + }, + { + "epoch": 1.2948107497377128, + "grad_norm": 0.720431923866272, + "learning_rate": 1.8894644187656864e-05, + "loss": 2.4241, + "step": 16044 + }, + { + "epoch": 1.294891453474296, + "grad_norm": 0.6936169862747192, + "learning_rate": 1.8885410306770225e-05, + "loss": 2.4225, + "step": 16045 + }, + { + "epoch": 1.294972157210879, + "grad_norm": 0.7698646187782288, + "learning_rate": 1.8876178447471193e-05, + "loss": 2.4031, + "step": 16046 + }, + { + "epoch": 1.2950528609474619, + "grad_norm": 0.6800495982170105, + "learning_rate": 1.8866948609989854e-05, + "loss": 2.3679, + "step": 16047 + }, + { + "epoch": 1.295133564684045, + "grad_norm": 0.7348111867904663, + "learning_rate": 1.8857720794556267e-05, + "loss": 2.4263, + "step": 16048 + }, + { + "epoch": 1.2952142684206278, + "grad_norm": 0.6614782214164734, + "learning_rate": 1.8848495001400356e-05, + "loss": 2.4396, + "step": 16049 + }, + { + "epoch": 1.295294972157211, + "grad_norm": 0.6683650612831116, + "learning_rate": 1.8839271230752075e-05, + "loss": 2.4189, + "step": 16050 + }, + { + "epoch": 1.2953756758937938, + "grad_norm": 0.711040198802948, + "learning_rate": 1.8830049482841328e-05, + "loss": 2.3974, + "step": 16051 + }, + { + "epoch": 1.2954563796303769, + "grad_norm": 0.6663193702697754, + "learning_rate": 1.882082975789795e-05, + "loss": 2.4196, + "step": 16052 + }, + { + "epoch": 1.29553708336696, + "grad_norm": 0.6551210284233093, + "learning_rate": 1.881161205615166e-05, + "loss": 2.3793, + "step": 16053 + }, + { + "epoch": 1.2956177871035428, + "grad_norm": 0.6849039793014526, + "learning_rate": 1.8802396377832243e-05, + "loss": 2.3941, + "step": 16054 + }, + { + "epoch": 1.295698490840126, + "grad_norm": 0.7642949223518372, + "learning_rate": 1.8793182723169357e-05, + "loss": 2.4296, + "step": 16055 + }, + { + "epoch": 1.295779194576709, + "grad_norm": 0.7104716897010803, + "learning_rate": 1.878397109239263e-05, + "loss": 2.4124, + "step": 16056 + }, + { + "epoch": 1.2958598983132918, + "grad_norm": 0.6822344064712524, + "learning_rate": 1.877476148573164e-05, + "loss": 2.4072, + "step": 16057 + }, + { + "epoch": 1.295940602049875, + "grad_norm": 0.6824066042900085, + "learning_rate": 1.8765553903415956e-05, + "loss": 2.4137, + "step": 16058 + }, + { + "epoch": 1.296021305786458, + "grad_norm": 0.7083307504653931, + "learning_rate": 1.875634834567498e-05, + "loss": 2.4423, + "step": 16059 + }, + { + "epoch": 1.2961020095230409, + "grad_norm": 0.7301077246665955, + "learning_rate": 1.874714481273818e-05, + "loss": 2.3926, + "step": 16060 + }, + { + "epoch": 1.296182713259624, + "grad_norm": 0.685656726360321, + "learning_rate": 1.873794330483496e-05, + "loss": 2.4409, + "step": 16061 + }, + { + "epoch": 1.296263416996207, + "grad_norm": 0.6916719675064087, + "learning_rate": 1.8728743822194584e-05, + "loss": 2.4141, + "step": 16062 + }, + { + "epoch": 1.29634412073279, + "grad_norm": 0.7188845276832581, + "learning_rate": 1.871954636504636e-05, + "loss": 2.4186, + "step": 16063 + }, + { + "epoch": 1.2964248244693728, + "grad_norm": 0.6637440919876099, + "learning_rate": 1.8710350933619504e-05, + "loss": 2.4526, + "step": 16064 + }, + { + "epoch": 1.2965055282059559, + "grad_norm": 0.7000349760055542, + "learning_rate": 1.87011575281432e-05, + "loss": 2.4096, + "step": 16065 + }, + { + "epoch": 1.296586231942539, + "grad_norm": 0.693513810634613, + "learning_rate": 1.8691966148846573e-05, + "loss": 2.3931, + "step": 16066 + }, + { + "epoch": 1.2966669356791218, + "grad_norm": 0.6928985118865967, + "learning_rate": 1.8682776795958678e-05, + "loss": 2.4384, + "step": 16067 + }, + { + "epoch": 1.296747639415705, + "grad_norm": 0.6474096179008484, + "learning_rate": 1.8673589469708585e-05, + "loss": 2.3985, + "step": 16068 + }, + { + "epoch": 1.296828343152288, + "grad_norm": 0.6827313899993896, + "learning_rate": 1.866440417032521e-05, + "loss": 2.4607, + "step": 16069 + }, + { + "epoch": 1.2969090468888709, + "grad_norm": 0.7183445692062378, + "learning_rate": 1.8655220898037485e-05, + "loss": 2.4396, + "step": 16070 + }, + { + "epoch": 1.296989750625454, + "grad_norm": 0.6997376680374146, + "learning_rate": 1.8646039653074333e-05, + "loss": 2.4627, + "step": 16071 + }, + { + "epoch": 1.297070454362037, + "grad_norm": 0.7358444333076477, + "learning_rate": 1.8636860435664493e-05, + "loss": 2.4165, + "step": 16072 + }, + { + "epoch": 1.29715115809862, + "grad_norm": 0.8126270771026611, + "learning_rate": 1.8627683246036787e-05, + "loss": 2.4681, + "step": 16073 + }, + { + "epoch": 1.297231861835203, + "grad_norm": 0.7364177107810974, + "learning_rate": 1.8618508084419918e-05, + "loss": 2.44, + "step": 16074 + }, + { + "epoch": 1.297312565571786, + "grad_norm": 0.7480010390281677, + "learning_rate": 1.8609334951042567e-05, + "loss": 2.4759, + "step": 16075 + }, + { + "epoch": 1.297393269308369, + "grad_norm": 0.6563693284988403, + "learning_rate": 1.8600163846133335e-05, + "loss": 2.3865, + "step": 16076 + }, + { + "epoch": 1.297473973044952, + "grad_norm": 0.6961230039596558, + "learning_rate": 1.8590994769920832e-05, + "loss": 2.3851, + "step": 16077 + }, + { + "epoch": 1.297554676781535, + "grad_norm": 0.7137415409088135, + "learning_rate": 1.8581827722633527e-05, + "loss": 2.4115, + "step": 16078 + }, + { + "epoch": 1.297635380518118, + "grad_norm": 0.6579335331916809, + "learning_rate": 1.85726627044999e-05, + "loss": 2.4464, + "step": 16079 + }, + { + "epoch": 1.2977160842547009, + "grad_norm": 0.7069905400276184, + "learning_rate": 1.8563499715748366e-05, + "loss": 2.4057, + "step": 16080 + }, + { + "epoch": 1.297796787991284, + "grad_norm": 0.771925687789917, + "learning_rate": 1.8554338756607325e-05, + "loss": 2.4696, + "step": 16081 + }, + { + "epoch": 1.297877491727867, + "grad_norm": 0.7268456816673279, + "learning_rate": 1.8545179827305048e-05, + "loss": 2.3949, + "step": 16082 + }, + { + "epoch": 1.29795819546445, + "grad_norm": 0.7049130797386169, + "learning_rate": 1.8536022928069796e-05, + "loss": 2.4448, + "step": 16083 + }, + { + "epoch": 1.298038899201033, + "grad_norm": 0.6716888546943665, + "learning_rate": 1.852686805912982e-05, + "loss": 2.3356, + "step": 16084 + }, + { + "epoch": 1.298119602937616, + "grad_norm": 0.666386604309082, + "learning_rate": 1.851771522071325e-05, + "loss": 2.4226, + "step": 16085 + }, + { + "epoch": 1.298200306674199, + "grad_norm": 0.7084901332855225, + "learning_rate": 1.8508564413048223e-05, + "loss": 2.4452, + "step": 16086 + }, + { + "epoch": 1.298281010410782, + "grad_norm": 0.6615412831306458, + "learning_rate": 1.8499415636362815e-05, + "loss": 2.4193, + "step": 16087 + }, + { + "epoch": 1.2983617141473651, + "grad_norm": 0.7143606543540955, + "learning_rate": 1.849026889088499e-05, + "loss": 2.4513, + "step": 16088 + }, + { + "epoch": 1.298442417883948, + "grad_norm": 0.7241482734680176, + "learning_rate": 1.8481124176842723e-05, + "loss": 2.458, + "step": 16089 + }, + { + "epoch": 1.298523121620531, + "grad_norm": 0.6762149930000305, + "learning_rate": 1.8471981494463963e-05, + "loss": 2.4386, + "step": 16090 + }, + { + "epoch": 1.2986038253571142, + "grad_norm": 0.6672768592834473, + "learning_rate": 1.8462840843976525e-05, + "loss": 2.375, + "step": 16091 + }, + { + "epoch": 1.298684529093697, + "grad_norm": 0.6871693134307861, + "learning_rate": 1.8453702225608226e-05, + "loss": 2.4342, + "step": 16092 + }, + { + "epoch": 1.2987652328302801, + "grad_norm": 0.6771275401115417, + "learning_rate": 1.8444565639586864e-05, + "loss": 2.402, + "step": 16093 + }, + { + "epoch": 1.298845936566863, + "grad_norm": 0.6627403497695923, + "learning_rate": 1.8435431086140077e-05, + "loss": 2.4667, + "step": 16094 + }, + { + "epoch": 1.298926640303446, + "grad_norm": 0.7001610398292542, + "learning_rate": 1.8426298565495538e-05, + "loss": 2.4396, + "step": 16095 + }, + { + "epoch": 1.299007344040029, + "grad_norm": 0.7574489712715149, + "learning_rate": 1.8417168077880908e-05, + "loss": 2.4601, + "step": 16096 + }, + { + "epoch": 1.299088047776612, + "grad_norm": 0.7771055698394775, + "learning_rate": 1.840803962352372e-05, + "loss": 2.4371, + "step": 16097 + }, + { + "epoch": 1.299168751513195, + "grad_norm": 0.6738649606704712, + "learning_rate": 1.8398913202651457e-05, + "loss": 2.3921, + "step": 16098 + }, + { + "epoch": 1.299249455249778, + "grad_norm": 0.7014862895011902, + "learning_rate": 1.8389788815491583e-05, + "loss": 2.451, + "step": 16099 + }, + { + "epoch": 1.299330158986361, + "grad_norm": 0.7026070952415466, + "learning_rate": 1.8380666462271523e-05, + "loss": 2.4583, + "step": 16100 + }, + { + "epoch": 1.2994108627229441, + "grad_norm": 0.6904535293579102, + "learning_rate": 1.8371546143218588e-05, + "loss": 2.4453, + "step": 16101 + }, + { + "epoch": 1.299491566459527, + "grad_norm": 0.6974804997444153, + "learning_rate": 1.8362427858560093e-05, + "loss": 2.4291, + "step": 16102 + }, + { + "epoch": 1.29957227019611, + "grad_norm": 0.6826989650726318, + "learning_rate": 1.8353311608523326e-05, + "loss": 2.4183, + "step": 16103 + }, + { + "epoch": 1.2996529739326932, + "grad_norm": 0.6804787516593933, + "learning_rate": 1.8344197393335448e-05, + "loss": 2.434, + "step": 16104 + }, + { + "epoch": 1.299733677669276, + "grad_norm": 0.7144587635993958, + "learning_rate": 1.8335085213223613e-05, + "loss": 2.4296, + "step": 16105 + }, + { + "epoch": 1.2998143814058591, + "grad_norm": 0.7228755354881287, + "learning_rate": 1.8325975068414924e-05, + "loss": 2.3987, + "step": 16106 + }, + { + "epoch": 1.2998950851424422, + "grad_norm": 0.7417716383934021, + "learning_rate": 1.8316866959136438e-05, + "loss": 2.4076, + "step": 16107 + }, + { + "epoch": 1.299975788879025, + "grad_norm": 0.6737387776374817, + "learning_rate": 1.8307760885615154e-05, + "loss": 2.4175, + "step": 16108 + }, + { + "epoch": 1.3000564926156082, + "grad_norm": 0.7294918298721313, + "learning_rate": 1.8298656848078035e-05, + "loss": 2.4022, + "step": 16109 + }, + { + "epoch": 1.300137196352191, + "grad_norm": 0.7200861573219299, + "learning_rate": 1.828955484675193e-05, + "loss": 2.4018, + "step": 16110 + }, + { + "epoch": 1.3002179000887741, + "grad_norm": 0.7704176306724548, + "learning_rate": 1.8280454881863718e-05, + "loss": 2.4539, + "step": 16111 + }, + { + "epoch": 1.300298603825357, + "grad_norm": 0.6790730953216553, + "learning_rate": 1.8271356953640184e-05, + "loss": 2.4196, + "step": 16112 + }, + { + "epoch": 1.30037930756194, + "grad_norm": 0.7165740132331848, + "learning_rate": 1.8262261062308096e-05, + "loss": 2.4234, + "step": 16113 + }, + { + "epoch": 1.3004600112985232, + "grad_norm": 0.7716830372810364, + "learning_rate": 1.82531672080941e-05, + "loss": 2.4255, + "step": 16114 + }, + { + "epoch": 1.300540715035106, + "grad_norm": 0.6525317430496216, + "learning_rate": 1.824407539122488e-05, + "loss": 2.4482, + "step": 16115 + }, + { + "epoch": 1.3006214187716891, + "grad_norm": 0.7397769093513489, + "learning_rate": 1.8234985611927003e-05, + "loss": 2.33, + "step": 16116 + }, + { + "epoch": 1.3007021225082722, + "grad_norm": 0.7106032967567444, + "learning_rate": 1.822589787042702e-05, + "loss": 2.485, + "step": 16117 + }, + { + "epoch": 1.300782826244855, + "grad_norm": 0.7030045390129089, + "learning_rate": 1.8216812166951425e-05, + "loss": 2.454, + "step": 16118 + }, + { + "epoch": 1.3008635299814382, + "grad_norm": 0.7075662612915039, + "learning_rate": 1.8207728501726683e-05, + "loss": 2.4589, + "step": 16119 + }, + { + "epoch": 1.3009442337180213, + "grad_norm": 0.6700533032417297, + "learning_rate": 1.819864687497912e-05, + "loss": 2.4398, + "step": 16120 + }, + { + "epoch": 1.3010249374546041, + "grad_norm": 0.6951712369918823, + "learning_rate": 1.8189567286935117e-05, + "loss": 2.3998, + "step": 16121 + }, + { + "epoch": 1.3011056411911872, + "grad_norm": 0.708344578742981, + "learning_rate": 1.818048973782097e-05, + "loss": 2.4142, + "step": 16122 + }, + { + "epoch": 1.30118634492777, + "grad_norm": 0.7078592777252197, + "learning_rate": 1.817141422786287e-05, + "loss": 2.451, + "step": 16123 + }, + { + "epoch": 1.3012670486643532, + "grad_norm": 0.7111849784851074, + "learning_rate": 1.816234075728703e-05, + "loss": 2.4762, + "step": 16124 + }, + { + "epoch": 1.301347752400936, + "grad_norm": 0.6716348528862, + "learning_rate": 1.8153269326319588e-05, + "loss": 2.4373, + "step": 16125 + }, + { + "epoch": 1.3014284561375191, + "grad_norm": 0.6592512130737305, + "learning_rate": 1.8144199935186623e-05, + "loss": 2.412, + "step": 16126 + }, + { + "epoch": 1.3015091598741022, + "grad_norm": 0.6958334445953369, + "learning_rate": 1.8135132584114167e-05, + "loss": 2.4077, + "step": 16127 + }, + { + "epoch": 1.301589863610685, + "grad_norm": 0.6911341547966003, + "learning_rate": 1.8126067273328207e-05, + "loss": 2.409, + "step": 16128 + }, + { + "epoch": 1.3016705673472682, + "grad_norm": 0.676114022731781, + "learning_rate": 1.8117004003054693e-05, + "loss": 2.4463, + "step": 16129 + }, + { + "epoch": 1.3017512710838512, + "grad_norm": 0.6493322849273682, + "learning_rate": 1.810794277351947e-05, + "loss": 2.4377, + "step": 16130 + }, + { + "epoch": 1.3018319748204341, + "grad_norm": 0.6938454508781433, + "learning_rate": 1.8098883584948367e-05, + "loss": 2.4298, + "step": 16131 + }, + { + "epoch": 1.3019126785570172, + "grad_norm": 0.69407719373703, + "learning_rate": 1.8089826437567214e-05, + "loss": 2.4107, + "step": 16132 + }, + { + "epoch": 1.3019933822936003, + "grad_norm": 0.6898862719535828, + "learning_rate": 1.8080771331601664e-05, + "loss": 2.4182, + "step": 16133 + }, + { + "epoch": 1.3020740860301832, + "grad_norm": 0.7377758026123047, + "learning_rate": 1.807171826727744e-05, + "loss": 2.4112, + "step": 16134 + }, + { + "epoch": 1.3021547897667662, + "grad_norm": 0.674057126045227, + "learning_rate": 1.8062667244820154e-05, + "loss": 2.4276, + "step": 16135 + }, + { + "epoch": 1.3022354935033493, + "grad_norm": 0.7087522745132446, + "learning_rate": 1.8053618264455384e-05, + "loss": 2.4338, + "step": 16136 + }, + { + "epoch": 1.3023161972399322, + "grad_norm": 0.70958411693573, + "learning_rate": 1.8044571326408667e-05, + "loss": 2.4369, + "step": 16137 + }, + { + "epoch": 1.3023969009765153, + "grad_norm": 0.7023837566375732, + "learning_rate": 1.803552643090548e-05, + "loss": 2.4185, + "step": 16138 + }, + { + "epoch": 1.3024776047130981, + "grad_norm": 0.708543598651886, + "learning_rate": 1.8026483578171216e-05, + "loss": 2.4053, + "step": 16139 + }, + { + "epoch": 1.3025583084496812, + "grad_norm": 0.748601496219635, + "learning_rate": 1.8017442768431257e-05, + "loss": 2.3948, + "step": 16140 + }, + { + "epoch": 1.302639012186264, + "grad_norm": 0.6626949310302734, + "learning_rate": 1.800840400191096e-05, + "loss": 2.4636, + "step": 16141 + }, + { + "epoch": 1.3027197159228472, + "grad_norm": 0.7079617977142334, + "learning_rate": 1.7999367278835534e-05, + "loss": 2.4091, + "step": 16142 + }, + { + "epoch": 1.3028004196594303, + "grad_norm": 0.7025624513626099, + "learning_rate": 1.7990332599430225e-05, + "loss": 2.3732, + "step": 16143 + }, + { + "epoch": 1.3028811233960131, + "grad_norm": 0.7365758419036865, + "learning_rate": 1.7981299963920205e-05, + "loss": 2.4725, + "step": 16144 + }, + { + "epoch": 1.3029618271325962, + "grad_norm": 0.7511963248252869, + "learning_rate": 1.7972269372530615e-05, + "loss": 2.4304, + "step": 16145 + }, + { + "epoch": 1.3030425308691793, + "grad_norm": 0.7055985331535339, + "learning_rate": 1.796324082548644e-05, + "loss": 2.4259, + "step": 16146 + }, + { + "epoch": 1.3031232346057622, + "grad_norm": 0.691162645816803, + "learning_rate": 1.7954214323012775e-05, + "loss": 2.4262, + "step": 16147 + }, + { + "epoch": 1.3032039383423453, + "grad_norm": 0.7179710268974304, + "learning_rate": 1.7945189865334587e-05, + "loss": 2.4301, + "step": 16148 + }, + { + "epoch": 1.3032846420789284, + "grad_norm": 0.7391623258590698, + "learning_rate": 1.7936167452676744e-05, + "loss": 2.4302, + "step": 16149 + }, + { + "epoch": 1.3033653458155112, + "grad_norm": 0.7297981381416321, + "learning_rate": 1.7927147085264117e-05, + "loss": 2.3911, + "step": 16150 + }, + { + "epoch": 1.3034460495520943, + "grad_norm": 0.7571932673454285, + "learning_rate": 1.7918128763321552e-05, + "loss": 2.4348, + "step": 16151 + }, + { + "epoch": 1.3035267532886774, + "grad_norm": 0.7074765563011169, + "learning_rate": 1.7909112487073754e-05, + "loss": 2.4164, + "step": 16152 + }, + { + "epoch": 1.3036074570252603, + "grad_norm": 0.7534131407737732, + "learning_rate": 1.7900098256745467e-05, + "loss": 2.3784, + "step": 16153 + }, + { + "epoch": 1.3036881607618434, + "grad_norm": 0.675398588180542, + "learning_rate": 1.789108607256136e-05, + "loss": 2.4305, + "step": 16154 + }, + { + "epoch": 1.3037688644984262, + "grad_norm": 0.7099249362945557, + "learning_rate": 1.7882075934746002e-05, + "loss": 2.4053, + "step": 16155 + }, + { + "epoch": 1.3038495682350093, + "grad_norm": 0.6914681196212769, + "learning_rate": 1.787306784352397e-05, + "loss": 2.3902, + "step": 16156 + }, + { + "epoch": 1.3039302719715922, + "grad_norm": 0.6956958770751953, + "learning_rate": 1.786406179911977e-05, + "loss": 2.4026, + "step": 16157 + }, + { + "epoch": 1.3040109757081753, + "grad_norm": 0.6873000860214233, + "learning_rate": 1.7855057801757857e-05, + "loss": 2.4082, + "step": 16158 + }, + { + "epoch": 1.3040916794447583, + "grad_norm": 0.7340587377548218, + "learning_rate": 1.7846055851662625e-05, + "loss": 2.4894, + "step": 16159 + }, + { + "epoch": 1.3041723831813412, + "grad_norm": 0.6956963539123535, + "learning_rate": 1.7837055949058444e-05, + "loss": 2.3976, + "step": 16160 + }, + { + "epoch": 1.3042530869179243, + "grad_norm": 0.7654300332069397, + "learning_rate": 1.782805809416962e-05, + "loss": 2.4272, + "step": 16161 + }, + { + "epoch": 1.3043337906545074, + "grad_norm": 0.7735971212387085, + "learning_rate": 1.7819062287220368e-05, + "loss": 2.4513, + "step": 16162 + }, + { + "epoch": 1.3044144943910903, + "grad_norm": 0.6897203326225281, + "learning_rate": 1.7810068528434908e-05, + "loss": 2.3974, + "step": 16163 + }, + { + "epoch": 1.3044951981276733, + "grad_norm": 0.7328432202339172, + "learning_rate": 1.780107681803741e-05, + "loss": 2.4455, + "step": 16164 + }, + { + "epoch": 1.3045759018642564, + "grad_norm": 0.7098489999771118, + "learning_rate": 1.7792087156251924e-05, + "loss": 2.4173, + "step": 16165 + }, + { + "epoch": 1.3046566056008393, + "grad_norm": 0.6593194007873535, + "learning_rate": 1.7783099543302518e-05, + "loss": 2.4102, + "step": 16166 + }, + { + "epoch": 1.3047373093374224, + "grad_norm": 0.7329291105270386, + "learning_rate": 1.7774113979413188e-05, + "loss": 2.4856, + "step": 16167 + }, + { + "epoch": 1.3048180130740052, + "grad_norm": 0.7033355236053467, + "learning_rate": 1.776513046480788e-05, + "loss": 2.4503, + "step": 16168 + }, + { + "epoch": 1.3048987168105883, + "grad_norm": 0.7063608765602112, + "learning_rate": 1.7756148999710486e-05, + "loss": 2.4523, + "step": 16169 + }, + { + "epoch": 1.3049794205471712, + "grad_norm": 0.6905883550643921, + "learning_rate": 1.774716958434487e-05, + "loss": 2.4149, + "step": 16170 + }, + { + "epoch": 1.3050601242837543, + "grad_norm": 0.694551408290863, + "learning_rate": 1.7738192218934778e-05, + "loss": 2.437, + "step": 16171 + }, + { + "epoch": 1.3051408280203374, + "grad_norm": 0.7173176407814026, + "learning_rate": 1.772921690370396e-05, + "loss": 2.4817, + "step": 16172 + }, + { + "epoch": 1.3052215317569202, + "grad_norm": 0.7197130918502808, + "learning_rate": 1.7720243638876153e-05, + "loss": 2.4481, + "step": 16173 + }, + { + "epoch": 1.3053022354935033, + "grad_norm": 0.710811197757721, + "learning_rate": 1.771127242467493e-05, + "loss": 2.397, + "step": 16174 + }, + { + "epoch": 1.3053829392300864, + "grad_norm": 0.9194550514221191, + "learning_rate": 1.7702303261323894e-05, + "loss": 2.5206, + "step": 16175 + }, + { + "epoch": 1.3054636429666693, + "grad_norm": 0.7003832459449768, + "learning_rate": 1.769333614904659e-05, + "loss": 2.4175, + "step": 16176 + }, + { + "epoch": 1.3055443467032524, + "grad_norm": 0.7161554098129272, + "learning_rate": 1.768437108806651e-05, + "loss": 2.3892, + "step": 16177 + }, + { + "epoch": 1.3056250504398355, + "grad_norm": 0.6516181826591492, + "learning_rate": 1.767540807860707e-05, + "loss": 2.4361, + "step": 16178 + }, + { + "epoch": 1.3057057541764183, + "grad_norm": 0.7518061399459839, + "learning_rate": 1.7666447120891662e-05, + "loss": 2.4572, + "step": 16179 + }, + { + "epoch": 1.3057864579130014, + "grad_norm": 0.735388994216919, + "learning_rate": 1.7657488215143637e-05, + "loss": 2.3965, + "step": 16180 + }, + { + "epoch": 1.3058671616495845, + "grad_norm": 0.6994282007217407, + "learning_rate": 1.764853136158622e-05, + "loss": 2.4052, + "step": 16181 + }, + { + "epoch": 1.3059478653861674, + "grad_norm": 0.7095311880111694, + "learning_rate": 1.7639576560442684e-05, + "loss": 2.4818, + "step": 16182 + }, + { + "epoch": 1.3060285691227504, + "grad_norm": 0.6527207493782043, + "learning_rate": 1.7630623811936208e-05, + "loss": 2.3962, + "step": 16183 + }, + { + "epoch": 1.3061092728593333, + "grad_norm": 0.6668451428413391, + "learning_rate": 1.7621673116289882e-05, + "loss": 2.4514, + "step": 16184 + }, + { + "epoch": 1.3061899765959164, + "grad_norm": 0.7119911909103394, + "learning_rate": 1.7612724473726795e-05, + "loss": 2.4313, + "step": 16185 + }, + { + "epoch": 1.3062706803324993, + "grad_norm": 0.706249475479126, + "learning_rate": 1.7603777884469984e-05, + "loss": 2.4131, + "step": 16186 + }, + { + "epoch": 1.3063513840690824, + "grad_norm": 0.6634086966514587, + "learning_rate": 1.759483334874241e-05, + "loss": 2.3532, + "step": 16187 + }, + { + "epoch": 1.3064320878056654, + "grad_norm": 0.8096393942832947, + "learning_rate": 1.7585890866766995e-05, + "loss": 2.4485, + "step": 16188 + }, + { + "epoch": 1.3065127915422483, + "grad_norm": 0.675308883190155, + "learning_rate": 1.7576950438766615e-05, + "loss": 2.388, + "step": 16189 + }, + { + "epoch": 1.3065934952788314, + "grad_norm": 0.738275408744812, + "learning_rate": 1.756801206496411e-05, + "loss": 2.4485, + "step": 16190 + }, + { + "epoch": 1.3066741990154145, + "grad_norm": 0.7045620083808899, + "learning_rate": 1.755907574558221e-05, + "loss": 2.3985, + "step": 16191 + }, + { + "epoch": 1.3067549027519973, + "grad_norm": 0.6499879360198975, + "learning_rate": 1.755014148084363e-05, + "loss": 2.3992, + "step": 16192 + }, + { + "epoch": 1.3068356064885804, + "grad_norm": 0.7101179361343384, + "learning_rate": 1.7541209270971083e-05, + "loss": 2.4217, + "step": 16193 + }, + { + "epoch": 1.3069163102251635, + "grad_norm": 0.6865181922912598, + "learning_rate": 1.7532279116187124e-05, + "loss": 2.4805, + "step": 16194 + }, + { + "epoch": 1.3069970139617464, + "grad_norm": 0.7710141539573669, + "learning_rate": 1.752335101671434e-05, + "loss": 2.3654, + "step": 16195 + }, + { + "epoch": 1.3070777176983295, + "grad_norm": 0.695936381816864, + "learning_rate": 1.7514424972775244e-05, + "loss": 2.4315, + "step": 16196 + }, + { + "epoch": 1.3071584214349126, + "grad_norm": 0.6781535148620605, + "learning_rate": 1.7505500984592304e-05, + "loss": 2.4238, + "step": 16197 + }, + { + "epoch": 1.3072391251714954, + "grad_norm": 0.6549252271652222, + "learning_rate": 1.7496579052387918e-05, + "loss": 2.3766, + "step": 16198 + }, + { + "epoch": 1.3073198289080785, + "grad_norm": 0.6599059700965881, + "learning_rate": 1.7487659176384474e-05, + "loss": 2.4613, + "step": 16199 + }, + { + "epoch": 1.3074005326446614, + "grad_norm": 0.6742514967918396, + "learning_rate": 1.7478741356804228e-05, + "loss": 2.3917, + "step": 16200 + }, + { + "epoch": 1.3074812363812445, + "grad_norm": 0.6542397141456604, + "learning_rate": 1.746982559386946e-05, + "loss": 2.44, + "step": 16201 + }, + { + "epoch": 1.3075619401178273, + "grad_norm": 0.7200478315353394, + "learning_rate": 1.74609118878024e-05, + "loss": 2.4324, + "step": 16202 + }, + { + "epoch": 1.3076426438544104, + "grad_norm": 0.717628002166748, + "learning_rate": 1.745200023882515e-05, + "loss": 2.3996, + "step": 16203 + }, + { + "epoch": 1.3077233475909935, + "grad_norm": 0.7350025177001953, + "learning_rate": 1.744309064715983e-05, + "loss": 2.4812, + "step": 16204 + }, + { + "epoch": 1.3078040513275764, + "grad_norm": 0.7253599762916565, + "learning_rate": 1.74341831130285e-05, + "loss": 2.4454, + "step": 16205 + }, + { + "epoch": 1.3078847550641595, + "grad_norm": 0.7537909746170044, + "learning_rate": 1.7425277636653193e-05, + "loss": 2.4247, + "step": 16206 + }, + { + "epoch": 1.3079654588007426, + "grad_norm": 0.7563284039497375, + "learning_rate": 1.7416374218255783e-05, + "loss": 2.3893, + "step": 16207 + }, + { + "epoch": 1.3080461625373254, + "grad_norm": 0.7118926048278809, + "learning_rate": 1.740747285805818e-05, + "loss": 2.4146, + "step": 16208 + }, + { + "epoch": 1.3081268662739085, + "grad_norm": 0.7805569171905518, + "learning_rate": 1.7398573556282304e-05, + "loss": 2.396, + "step": 16209 + }, + { + "epoch": 1.3082075700104916, + "grad_norm": 0.7357630133628845, + "learning_rate": 1.738967631314987e-05, + "loss": 2.5405, + "step": 16210 + }, + { + "epoch": 1.3082882737470745, + "grad_norm": 0.6670438647270203, + "learning_rate": 1.7380781128882652e-05, + "loss": 2.4452, + "step": 16211 + }, + { + "epoch": 1.3083689774836575, + "grad_norm": 0.7374427318572998, + "learning_rate": 1.7371888003702353e-05, + "loss": 2.5143, + "step": 16212 + }, + { + "epoch": 1.3084496812202406, + "grad_norm": 0.672207236289978, + "learning_rate": 1.736299693783058e-05, + "loss": 2.4178, + "step": 16213 + }, + { + "epoch": 1.3085303849568235, + "grad_norm": 0.6926576495170593, + "learning_rate": 1.735410793148894e-05, + "loss": 2.3466, + "step": 16214 + }, + { + "epoch": 1.3086110886934066, + "grad_norm": 0.6928917169570923, + "learning_rate": 1.734522098489899e-05, + "loss": 2.4654, + "step": 16215 + }, + { + "epoch": 1.3086917924299895, + "grad_norm": 0.6536242961883545, + "learning_rate": 1.733633609828217e-05, + "loss": 2.3761, + "step": 16216 + }, + { + "epoch": 1.3087724961665725, + "grad_norm": 0.6993953585624695, + "learning_rate": 1.732745327185994e-05, + "loss": 2.3963, + "step": 16217 + }, + { + "epoch": 1.3088531999031554, + "grad_norm": 0.6851957440376282, + "learning_rate": 1.731857250585368e-05, + "loss": 2.4253, + "step": 16218 + }, + { + "epoch": 1.3089339036397385, + "grad_norm": 0.6620005965232849, + "learning_rate": 1.7309693800484728e-05, + "loss": 2.4302, + "step": 16219 + }, + { + "epoch": 1.3090146073763216, + "grad_norm": 0.6704410314559937, + "learning_rate": 1.7300817155974356e-05, + "loss": 2.4065, + "step": 16220 + }, + { + "epoch": 1.3090953111129044, + "grad_norm": 0.6882327198982239, + "learning_rate": 1.7291942572543807e-05, + "loss": 2.4526, + "step": 16221 + }, + { + "epoch": 1.3091760148494875, + "grad_norm": 0.6971533298492432, + "learning_rate": 1.7283070050414275e-05, + "loss": 2.4076, + "step": 16222 + }, + { + "epoch": 1.3092567185860706, + "grad_norm": 0.6662544012069702, + "learning_rate": 1.7274199589806827e-05, + "loss": 2.3678, + "step": 16223 + }, + { + "epoch": 1.3093374223226535, + "grad_norm": 0.6342894434928894, + "learning_rate": 1.726533119094258e-05, + "loss": 2.3424, + "step": 16224 + }, + { + "epoch": 1.3094181260592366, + "grad_norm": 0.6808488965034485, + "learning_rate": 1.7256464854042577e-05, + "loss": 2.4286, + "step": 16225 + }, + { + "epoch": 1.3094988297958197, + "grad_norm": 0.6417922973632812, + "learning_rate": 1.7247600579327738e-05, + "loss": 2.3677, + "step": 16226 + }, + { + "epoch": 1.3095795335324025, + "grad_norm": 0.7267102599143982, + "learning_rate": 1.7238738367019002e-05, + "loss": 2.3974, + "step": 16227 + }, + { + "epoch": 1.3096602372689856, + "grad_norm": 0.6915002465248108, + "learning_rate": 1.722987821733725e-05, + "loss": 2.4429, + "step": 16228 + }, + { + "epoch": 1.3097409410055685, + "grad_norm": 0.6930112242698669, + "learning_rate": 1.7221020130503296e-05, + "loss": 2.4272, + "step": 16229 + }, + { + "epoch": 1.3098216447421516, + "grad_norm": 0.7049465179443359, + "learning_rate": 1.7212164106737904e-05, + "loss": 2.4089, + "step": 16230 + }, + { + "epoch": 1.3099023484787344, + "grad_norm": 0.7230044603347778, + "learning_rate": 1.720331014626182e-05, + "loss": 2.4313, + "step": 16231 + }, + { + "epoch": 1.3099830522153175, + "grad_norm": 0.6513530015945435, + "learning_rate": 1.7194458249295665e-05, + "loss": 2.3293, + "step": 16232 + }, + { + "epoch": 1.3100637559519006, + "grad_norm": 0.6880534291267395, + "learning_rate": 1.718560841606005e-05, + "loss": 2.4556, + "step": 16233 + }, + { + "epoch": 1.3101444596884835, + "grad_norm": 0.7075292468070984, + "learning_rate": 1.717676064677559e-05, + "loss": 2.4747, + "step": 16234 + }, + { + "epoch": 1.3102251634250666, + "grad_norm": 0.7713594436645508, + "learning_rate": 1.7167914941662723e-05, + "loss": 2.4135, + "step": 16235 + }, + { + "epoch": 1.3103058671616497, + "grad_norm": 0.7883979082107544, + "learning_rate": 1.7159071300941943e-05, + "loss": 2.418, + "step": 16236 + }, + { + "epoch": 1.3103865708982325, + "grad_norm": 0.6588975787162781, + "learning_rate": 1.7150229724833655e-05, + "loss": 2.3295, + "step": 16237 + }, + { + "epoch": 1.3104672746348156, + "grad_norm": 0.679086446762085, + "learning_rate": 1.7141390213558217e-05, + "loss": 2.413, + "step": 16238 + }, + { + "epoch": 1.3105479783713987, + "grad_norm": 0.6803067326545715, + "learning_rate": 1.713255276733592e-05, + "loss": 2.4338, + "step": 16239 + }, + { + "epoch": 1.3106286821079816, + "grad_norm": 0.7041650414466858, + "learning_rate": 1.712371738638704e-05, + "loss": 2.469, + "step": 16240 + }, + { + "epoch": 1.3107093858445646, + "grad_norm": 0.6560962796211243, + "learning_rate": 1.711488407093178e-05, + "loss": 2.4353, + "step": 16241 + }, + { + "epoch": 1.3107900895811477, + "grad_norm": 0.6637921333312988, + "learning_rate": 1.7106052821190244e-05, + "loss": 2.3996, + "step": 16242 + }, + { + "epoch": 1.3108707933177306, + "grad_norm": 0.8131709098815918, + "learning_rate": 1.7097223637382565e-05, + "loss": 2.466, + "step": 16243 + }, + { + "epoch": 1.3109514970543137, + "grad_norm": 0.6637253165245056, + "learning_rate": 1.708839651972881e-05, + "loss": 2.3811, + "step": 16244 + }, + { + "epoch": 1.3110322007908966, + "grad_norm": 0.71912682056427, + "learning_rate": 1.7079571468448917e-05, + "loss": 2.4175, + "step": 16245 + }, + { + "epoch": 1.3111129045274796, + "grad_norm": 0.7028010487556458, + "learning_rate": 1.7070748483762854e-05, + "loss": 2.41, + "step": 16246 + }, + { + "epoch": 1.3111936082640625, + "grad_norm": 0.7241945862770081, + "learning_rate": 1.7061927565890522e-05, + "loss": 2.4171, + "step": 16247 + }, + { + "epoch": 1.3112743120006456, + "grad_norm": 0.7039221525192261, + "learning_rate": 1.705310871505177e-05, + "loss": 2.4154, + "step": 16248 + }, + { + "epoch": 1.3113550157372287, + "grad_norm": 0.672444760799408, + "learning_rate": 1.704429193146636e-05, + "loss": 2.4025, + "step": 16249 + }, + { + "epoch": 1.3114357194738115, + "grad_norm": 0.7240859866142273, + "learning_rate": 1.7035477215354068e-05, + "loss": 2.3864, + "step": 16250 + }, + { + "epoch": 1.3115164232103946, + "grad_norm": 0.7379294633865356, + "learning_rate": 1.7026664566934536e-05, + "loss": 2.4663, + "step": 16251 + }, + { + "epoch": 1.3115971269469777, + "grad_norm": 0.6928708553314209, + "learning_rate": 1.7017853986427425e-05, + "loss": 2.4407, + "step": 16252 + }, + { + "epoch": 1.3116778306835606, + "grad_norm": 0.6304093599319458, + "learning_rate": 1.7009045474052298e-05, + "loss": 2.4755, + "step": 16253 + }, + { + "epoch": 1.3117585344201437, + "grad_norm": 0.6945829391479492, + "learning_rate": 1.700023903002872e-05, + "loss": 2.3817, + "step": 16254 + }, + { + "epoch": 1.3118392381567268, + "grad_norm": 0.6899009346961975, + "learning_rate": 1.6991434654576133e-05, + "loss": 2.3989, + "step": 16255 + }, + { + "epoch": 1.3119199418933096, + "grad_norm": 0.7359157204627991, + "learning_rate": 1.6982632347913985e-05, + "loss": 2.3788, + "step": 16256 + }, + { + "epoch": 1.3120006456298927, + "grad_norm": 0.6562486886978149, + "learning_rate": 1.6973832110261658e-05, + "loss": 2.3955, + "step": 16257 + }, + { + "epoch": 1.3120813493664758, + "grad_norm": 0.6772989630699158, + "learning_rate": 1.696503394183846e-05, + "loss": 2.4788, + "step": 16258 + }, + { + "epoch": 1.3121620531030587, + "grad_norm": 0.7214391231536865, + "learning_rate": 1.695623784286363e-05, + "loss": 2.3836, + "step": 16259 + }, + { + "epoch": 1.3122427568396418, + "grad_norm": 0.7041679620742798, + "learning_rate": 1.6947443813556495e-05, + "loss": 2.4547, + "step": 16260 + }, + { + "epoch": 1.3123234605762246, + "grad_norm": 0.6819555163383484, + "learning_rate": 1.6938651854136135e-05, + "loss": 2.468, + "step": 16261 + }, + { + "epoch": 1.3124041643128077, + "grad_norm": 0.6466858983039856, + "learning_rate": 1.6929861964821693e-05, + "loss": 2.4572, + "step": 16262 + }, + { + "epoch": 1.3124848680493906, + "grad_norm": 0.688709557056427, + "learning_rate": 1.6921074145832248e-05, + "loss": 2.3891, + "step": 16263 + }, + { + "epoch": 1.3125655717859737, + "grad_norm": 0.6896470785140991, + "learning_rate": 1.69122883973868e-05, + "loss": 2.3825, + "step": 16264 + }, + { + "epoch": 1.3126462755225567, + "grad_norm": 0.8242524266242981, + "learning_rate": 1.690350471970431e-05, + "loss": 2.4804, + "step": 16265 + }, + { + "epoch": 1.3127269792591396, + "grad_norm": 0.7506044507026672, + "learning_rate": 1.689472311300373e-05, + "loss": 2.4671, + "step": 16266 + }, + { + "epoch": 1.3128076829957227, + "grad_norm": 0.6776263117790222, + "learning_rate": 1.688594357750386e-05, + "loss": 2.4646, + "step": 16267 + }, + { + "epoch": 1.3128883867323058, + "grad_norm": 0.6843759417533875, + "learning_rate": 1.6877166113423548e-05, + "loss": 2.4147, + "step": 16268 + }, + { + "epoch": 1.3129690904688887, + "grad_norm": 0.6650474667549133, + "learning_rate": 1.686839072098153e-05, + "loss": 2.4379, + "step": 16269 + }, + { + "epoch": 1.3130497942054717, + "grad_norm": 0.6636466383934021, + "learning_rate": 1.6859617400396533e-05, + "loss": 2.4334, + "step": 16270 + }, + { + "epoch": 1.3131304979420548, + "grad_norm": 0.649217963218689, + "learning_rate": 1.685084615188719e-05, + "loss": 2.319, + "step": 16271 + }, + { + "epoch": 1.3132112016786377, + "grad_norm": 0.7343039512634277, + "learning_rate": 1.6842076975672126e-05, + "loss": 2.3844, + "step": 16272 + }, + { + "epoch": 1.3132919054152208, + "grad_norm": 0.6916847825050354, + "learning_rate": 1.6833309871969894e-05, + "loss": 2.4544, + "step": 16273 + }, + { + "epoch": 1.3133726091518036, + "grad_norm": 0.6762102842330933, + "learning_rate": 1.6824544840998967e-05, + "loss": 2.3912, + "step": 16274 + }, + { + "epoch": 1.3134533128883867, + "grad_norm": 0.7327221035957336, + "learning_rate": 1.68157818829778e-05, + "loss": 2.4403, + "step": 16275 + }, + { + "epoch": 1.3135340166249696, + "grad_norm": 0.7362363338470459, + "learning_rate": 1.6807020998124812e-05, + "loss": 2.5169, + "step": 16276 + }, + { + "epoch": 1.3136147203615527, + "grad_norm": 0.6882300972938538, + "learning_rate": 1.679826218665832e-05, + "loss": 2.4139, + "step": 16277 + }, + { + "epoch": 1.3136954240981358, + "grad_norm": 0.7146984934806824, + "learning_rate": 1.6789505448796615e-05, + "loss": 2.4738, + "step": 16278 + }, + { + "epoch": 1.3137761278347186, + "grad_norm": 0.6581223607063293, + "learning_rate": 1.6780750784757947e-05, + "loss": 2.4617, + "step": 16279 + }, + { + "epoch": 1.3138568315713017, + "grad_norm": 0.7729318141937256, + "learning_rate": 1.6771998194760518e-05, + "loss": 2.4541, + "step": 16280 + }, + { + "epoch": 1.3139375353078848, + "grad_norm": 0.7617159485816956, + "learning_rate": 1.6763247679022442e-05, + "loss": 2.4727, + "step": 16281 + }, + { + "epoch": 1.3140182390444677, + "grad_norm": 0.6640555262565613, + "learning_rate": 1.6754499237761844e-05, + "loss": 2.4717, + "step": 16282 + }, + { + "epoch": 1.3140989427810508, + "grad_norm": 0.7289882898330688, + "learning_rate": 1.6745752871196707e-05, + "loss": 2.4515, + "step": 16283 + }, + { + "epoch": 1.3141796465176339, + "grad_norm": 0.7075887322425842, + "learning_rate": 1.6737008579545043e-05, + "loss": 2.4586, + "step": 16284 + }, + { + "epoch": 1.3142603502542167, + "grad_norm": 0.7152252197265625, + "learning_rate": 1.672826636302477e-05, + "loss": 2.512, + "step": 16285 + }, + { + "epoch": 1.3143410539907998, + "grad_norm": 0.6875295639038086, + "learning_rate": 1.6719526221853808e-05, + "loss": 2.4049, + "step": 16286 + }, + { + "epoch": 1.314421757727383, + "grad_norm": 0.6812484860420227, + "learning_rate": 1.671078815624991e-05, + "loss": 2.3705, + "step": 16287 + }, + { + "epoch": 1.3145024614639658, + "grad_norm": 0.664282500743866, + "learning_rate": 1.6702052166430904e-05, + "loss": 2.3776, + "step": 16288 + }, + { + "epoch": 1.3145831652005489, + "grad_norm": 0.7460842728614807, + "learning_rate": 1.66933182526145e-05, + "loss": 2.4525, + "step": 16289 + }, + { + "epoch": 1.3146638689371317, + "grad_norm": 0.6555477380752563, + "learning_rate": 1.6684586415018366e-05, + "loss": 2.3902, + "step": 16290 + }, + { + "epoch": 1.3147445726737148, + "grad_norm": 0.7191921472549438, + "learning_rate": 1.6675856653860135e-05, + "loss": 2.4957, + "step": 16291 + }, + { + "epoch": 1.3148252764102977, + "grad_norm": 0.738667368888855, + "learning_rate": 1.666712896935738e-05, + "loss": 2.4182, + "step": 16292 + }, + { + "epoch": 1.3149059801468808, + "grad_norm": 0.6764421463012695, + "learning_rate": 1.6658403361727593e-05, + "loss": 2.4179, + "step": 16293 + }, + { + "epoch": 1.3149866838834638, + "grad_norm": 0.6981594562530518, + "learning_rate": 1.6649679831188247e-05, + "loss": 2.4288, + "step": 16294 + }, + { + "epoch": 1.3150673876200467, + "grad_norm": 0.6657801866531372, + "learning_rate": 1.6640958377956784e-05, + "loss": 2.3716, + "step": 16295 + }, + { + "epoch": 1.3151480913566298, + "grad_norm": 0.7238973379135132, + "learning_rate": 1.6632239002250505e-05, + "loss": 2.438, + "step": 16296 + }, + { + "epoch": 1.3152287950932129, + "grad_norm": 0.6727766990661621, + "learning_rate": 1.6623521704286772e-05, + "loss": 2.4406, + "step": 16297 + }, + { + "epoch": 1.3153094988297958, + "grad_norm": 0.6741603016853333, + "learning_rate": 1.661480648428282e-05, + "loss": 2.4379, + "step": 16298 + }, + { + "epoch": 1.3153902025663788, + "grad_norm": 0.7174610495567322, + "learning_rate": 1.6606093342455865e-05, + "loss": 2.4368, + "step": 16299 + }, + { + "epoch": 1.315470906302962, + "grad_norm": 0.6604920029640198, + "learning_rate": 1.6597382279023057e-05, + "loss": 2.4431, + "step": 16300 + }, + { + "epoch": 1.3155516100395448, + "grad_norm": 0.6930821537971497, + "learning_rate": 1.6588673294201494e-05, + "loss": 2.4064, + "step": 16301 + }, + { + "epoch": 1.3156323137761279, + "grad_norm": 0.6489799618721008, + "learning_rate": 1.657996638820826e-05, + "loss": 2.4256, + "step": 16302 + }, + { + "epoch": 1.315713017512711, + "grad_norm": 0.6781083345413208, + "learning_rate": 1.65712615612603e-05, + "loss": 2.4731, + "step": 16303 + }, + { + "epoch": 1.3157937212492938, + "grad_norm": 0.6710748076438904, + "learning_rate": 1.656255881357458e-05, + "loss": 2.4065, + "step": 16304 + }, + { + "epoch": 1.315874424985877, + "grad_norm": 0.7099822163581848, + "learning_rate": 1.655385814536804e-05, + "loss": 2.3978, + "step": 16305 + }, + { + "epoch": 1.3159551287224598, + "grad_norm": 0.7215133905410767, + "learning_rate": 1.6545159556857447e-05, + "loss": 2.4655, + "step": 16306 + }, + { + "epoch": 1.3160358324590429, + "grad_norm": 0.7705253958702087, + "learning_rate": 1.6536463048259643e-05, + "loss": 2.4576, + "step": 16307 + }, + { + "epoch": 1.3161165361956257, + "grad_norm": 0.6232311725616455, + "learning_rate": 1.6527768619791372e-05, + "loss": 2.3923, + "step": 16308 + }, + { + "epoch": 1.3161972399322088, + "grad_norm": 0.6599528789520264, + "learning_rate": 1.6519076271669264e-05, + "loss": 2.4236, + "step": 16309 + }, + { + "epoch": 1.316277943668792, + "grad_norm": 0.6598034501075745, + "learning_rate": 1.6510386004110023e-05, + "loss": 2.368, + "step": 16310 + }, + { + "epoch": 1.3163586474053748, + "grad_norm": 0.6949655413627625, + "learning_rate": 1.650169781733022e-05, + "loss": 2.4277, + "step": 16311 + }, + { + "epoch": 1.3164393511419579, + "grad_norm": 0.6838186383247375, + "learning_rate": 1.6493011711546358e-05, + "loss": 2.4413, + "step": 16312 + }, + { + "epoch": 1.316520054878541, + "grad_norm": 0.7026765942573547, + "learning_rate": 1.6484327686974933e-05, + "loss": 2.4628, + "step": 16313 + }, + { + "epoch": 1.3166007586151238, + "grad_norm": 0.745360791683197, + "learning_rate": 1.647564574383237e-05, + "loss": 2.4358, + "step": 16314 + }, + { + "epoch": 1.316681462351707, + "grad_norm": 0.676225483417511, + "learning_rate": 1.6466965882335083e-05, + "loss": 2.4119, + "step": 16315 + }, + { + "epoch": 1.31676216608829, + "grad_norm": 0.6767755150794983, + "learning_rate": 1.6458288102699325e-05, + "loss": 2.4322, + "step": 16316 + }, + { + "epoch": 1.3168428698248729, + "grad_norm": 0.6957309246063232, + "learning_rate": 1.6449612405141424e-05, + "loss": 2.4327, + "step": 16317 + }, + { + "epoch": 1.316923573561456, + "grad_norm": 0.6773050427436829, + "learning_rate": 1.64409387898776e-05, + "loss": 2.4207, + "step": 16318 + }, + { + "epoch": 1.3170042772980388, + "grad_norm": 0.7319278717041016, + "learning_rate": 1.6432267257123978e-05, + "loss": 2.445, + "step": 16319 + }, + { + "epoch": 1.317084981034622, + "grad_norm": 0.7531326413154602, + "learning_rate": 1.6423597807096714e-05, + "loss": 2.3948, + "step": 16320 + }, + { + "epoch": 1.3171656847712048, + "grad_norm": 0.6741669178009033, + "learning_rate": 1.6414930440011854e-05, + "loss": 2.4177, + "step": 16321 + }, + { + "epoch": 1.3172463885077879, + "grad_norm": 0.6814963221549988, + "learning_rate": 1.640626515608543e-05, + "loss": 2.4419, + "step": 16322 + }, + { + "epoch": 1.317327092244371, + "grad_norm": 0.6740893721580505, + "learning_rate": 1.6397601955533392e-05, + "loss": 2.3516, + "step": 16323 + }, + { + "epoch": 1.3174077959809538, + "grad_norm": 0.7172163724899292, + "learning_rate": 1.6388940838571675e-05, + "loss": 2.4665, + "step": 16324 + }, + { + "epoch": 1.317488499717537, + "grad_norm": 0.6690489053726196, + "learning_rate": 1.6380281805416085e-05, + "loss": 2.3957, + "step": 16325 + }, + { + "epoch": 1.31756920345412, + "grad_norm": 0.7182994484901428, + "learning_rate": 1.6371624856282462e-05, + "loss": 2.4456, + "step": 16326 + }, + { + "epoch": 1.3176499071907029, + "grad_norm": 0.6324366927146912, + "learning_rate": 1.636296999138659e-05, + "loss": 2.4111, + "step": 16327 + }, + { + "epoch": 1.317730610927286, + "grad_norm": 0.6740162372589111, + "learning_rate": 1.6354317210944093e-05, + "loss": 2.451, + "step": 16328 + }, + { + "epoch": 1.317811314663869, + "grad_norm": 0.6964122653007507, + "learning_rate": 1.6345666515170665e-05, + "loss": 2.4269, + "step": 16329 + }, + { + "epoch": 1.317892018400452, + "grad_norm": 0.7093058824539185, + "learning_rate": 1.6337017904281915e-05, + "loss": 2.4686, + "step": 16330 + }, + { + "epoch": 1.317972722137035, + "grad_norm": 0.693233072757721, + "learning_rate": 1.6328371378493367e-05, + "loss": 2.4149, + "step": 16331 + }, + { + "epoch": 1.318053425873618, + "grad_norm": 0.6418019533157349, + "learning_rate": 1.631972693802052e-05, + "loss": 2.4268, + "step": 16332 + }, + { + "epoch": 1.318134129610201, + "grad_norm": 0.6815310120582581, + "learning_rate": 1.631108458307883e-05, + "loss": 2.4274, + "step": 16333 + }, + { + "epoch": 1.318214833346784, + "grad_norm": 0.6774280071258545, + "learning_rate": 1.630244431388369e-05, + "loss": 2.3927, + "step": 16334 + }, + { + "epoch": 1.3182955370833669, + "grad_norm": 0.688090443611145, + "learning_rate": 1.6293806130650413e-05, + "loss": 2.4013, + "step": 16335 + }, + { + "epoch": 1.31837624081995, + "grad_norm": 0.7300553321838379, + "learning_rate": 1.6285170033594288e-05, + "loss": 2.4716, + "step": 16336 + }, + { + "epoch": 1.3184569445565328, + "grad_norm": 0.6798286437988281, + "learning_rate": 1.627653602293059e-05, + "loss": 2.3893, + "step": 16337 + }, + { + "epoch": 1.318537648293116, + "grad_norm": 0.6699275970458984, + "learning_rate": 1.6267904098874442e-05, + "loss": 2.4446, + "step": 16338 + }, + { + "epoch": 1.318618352029699, + "grad_norm": 0.7632322311401367, + "learning_rate": 1.6259274261641e-05, + "loss": 2.4434, + "step": 16339 + }, + { + "epoch": 1.3186990557662819, + "grad_norm": 0.7156099677085876, + "learning_rate": 1.6250646511445343e-05, + "loss": 2.4142, + "step": 16340 + }, + { + "epoch": 1.318779759502865, + "grad_norm": 0.7525599598884583, + "learning_rate": 1.6242020848502505e-05, + "loss": 2.3543, + "step": 16341 + }, + { + "epoch": 1.318860463239448, + "grad_norm": 0.7063113451004028, + "learning_rate": 1.623339727302745e-05, + "loss": 2.4754, + "step": 16342 + }, + { + "epoch": 1.318941166976031, + "grad_norm": 0.7138137221336365, + "learning_rate": 1.6224775785235123e-05, + "loss": 2.4223, + "step": 16343 + }, + { + "epoch": 1.319021870712614, + "grad_norm": 0.6976706981658936, + "learning_rate": 1.6216156385340352e-05, + "loss": 2.4878, + "step": 16344 + }, + { + "epoch": 1.319102574449197, + "grad_norm": 0.6931003332138062, + "learning_rate": 1.6207539073557974e-05, + "loss": 2.39, + "step": 16345 + }, + { + "epoch": 1.31918327818578, + "grad_norm": 0.6919357180595398, + "learning_rate": 1.6198923850102765e-05, + "loss": 2.4197, + "step": 16346 + }, + { + "epoch": 1.319263981922363, + "grad_norm": 0.7453805804252625, + "learning_rate": 1.619031071518945e-05, + "loss": 2.4226, + "step": 16347 + }, + { + "epoch": 1.3193446856589461, + "grad_norm": 0.6990562677383423, + "learning_rate": 1.6181699669032658e-05, + "loss": 2.3925, + "step": 16348 + }, + { + "epoch": 1.319425389395529, + "grad_norm": 0.6974303126335144, + "learning_rate": 1.6173090711847006e-05, + "loss": 2.445, + "step": 16349 + }, + { + "epoch": 1.319506093132112, + "grad_norm": 0.7278286814689636, + "learning_rate": 1.6164483843847057e-05, + "loss": 2.3869, + "step": 16350 + }, + { + "epoch": 1.319586796868695, + "grad_norm": 0.7282646298408508, + "learning_rate": 1.6155879065247326e-05, + "loss": 2.3694, + "step": 16351 + }, + { + "epoch": 1.319667500605278, + "grad_norm": 0.7329844832420349, + "learning_rate": 1.6147276376262255e-05, + "loss": 2.4369, + "step": 16352 + }, + { + "epoch": 1.319748204341861, + "grad_norm": 0.6499385833740234, + "learning_rate": 1.613867577710627e-05, + "loss": 2.441, + "step": 16353 + }, + { + "epoch": 1.319828908078444, + "grad_norm": 0.7026061415672302, + "learning_rate": 1.6130077267993683e-05, + "loss": 2.4117, + "step": 16354 + }, + { + "epoch": 1.319909611815027, + "grad_norm": 0.7007814049720764, + "learning_rate": 1.6121480849138803e-05, + "loss": 2.4287, + "step": 16355 + }, + { + "epoch": 1.31999031555161, + "grad_norm": 0.6525697708129883, + "learning_rate": 1.611288652075591e-05, + "loss": 2.3969, + "step": 16356 + }, + { + "epoch": 1.320071019288193, + "grad_norm": 0.7268216609954834, + "learning_rate": 1.610429428305914e-05, + "loss": 2.4227, + "step": 16357 + }, + { + "epoch": 1.3201517230247761, + "grad_norm": 0.6665107011795044, + "learning_rate": 1.6095704136262668e-05, + "loss": 2.3694, + "step": 16358 + }, + { + "epoch": 1.320232426761359, + "grad_norm": 0.6832399368286133, + "learning_rate": 1.60871160805806e-05, + "loss": 2.4001, + "step": 16359 + }, + { + "epoch": 1.320313130497942, + "grad_norm": 0.6788592338562012, + "learning_rate": 1.6078530116226897e-05, + "loss": 2.4294, + "step": 16360 + }, + { + "epoch": 1.3203938342345252, + "grad_norm": 0.7147449254989624, + "learning_rate": 1.6069946243415625e-05, + "loss": 2.3904, + "step": 16361 + }, + { + "epoch": 1.320474537971108, + "grad_norm": 0.7014418840408325, + "learning_rate": 1.6061364462360683e-05, + "loss": 2.4026, + "step": 16362 + }, + { + "epoch": 1.3205552417076911, + "grad_norm": 0.6867612600326538, + "learning_rate": 1.6052784773275987e-05, + "loss": 2.4092, + "step": 16363 + }, + { + "epoch": 1.3206359454442742, + "grad_norm": 0.6588961482048035, + "learning_rate": 1.6044207176375303e-05, + "loss": 2.4588, + "step": 16364 + }, + { + "epoch": 1.320716649180857, + "grad_norm": 0.688671350479126, + "learning_rate": 1.6035631671872444e-05, + "loss": 2.3957, + "step": 16365 + }, + { + "epoch": 1.3207973529174402, + "grad_norm": 0.7548064589500427, + "learning_rate": 1.6027058259981154e-05, + "loss": 2.4168, + "step": 16366 + }, + { + "epoch": 1.320878056654023, + "grad_norm": 0.7251972556114197, + "learning_rate": 1.6018486940915044e-05, + "loss": 2.4704, + "step": 16367 + }, + { + "epoch": 1.3209587603906061, + "grad_norm": 0.73149174451828, + "learning_rate": 1.6009917714887778e-05, + "loss": 2.4597, + "step": 16368 + }, + { + "epoch": 1.321039464127189, + "grad_norm": 0.6741003394126892, + "learning_rate": 1.600135058211294e-05, + "loss": 2.3876, + "step": 16369 + }, + { + "epoch": 1.321120167863772, + "grad_norm": 0.6891310214996338, + "learning_rate": 1.5992785542804e-05, + "loss": 2.4229, + "step": 16370 + }, + { + "epoch": 1.3212008716003552, + "grad_norm": 0.7529458403587341, + "learning_rate": 1.5984222597174415e-05, + "loss": 2.45, + "step": 16371 + }, + { + "epoch": 1.321281575336938, + "grad_norm": 0.708134651184082, + "learning_rate": 1.5975661745437664e-05, + "loss": 2.454, + "step": 16372 + }, + { + "epoch": 1.321362279073521, + "grad_norm": 0.7511130571365356, + "learning_rate": 1.596710298780705e-05, + "loss": 2.4201, + "step": 16373 + }, + { + "epoch": 1.3214429828101042, + "grad_norm": 0.6599537134170532, + "learning_rate": 1.595854632449588e-05, + "loss": 2.3982, + "step": 16374 + }, + { + "epoch": 1.321523686546687, + "grad_norm": 0.6821228861808777, + "learning_rate": 1.5949991755717453e-05, + "loss": 2.4525, + "step": 16375 + }, + { + "epoch": 1.3216043902832701, + "grad_norm": 0.6872302293777466, + "learning_rate": 1.5941439281684923e-05, + "loss": 2.3631, + "step": 16376 + }, + { + "epoch": 1.3216850940198532, + "grad_norm": 0.6650066375732422, + "learning_rate": 1.5932888902611453e-05, + "loss": 2.3718, + "step": 16377 + }, + { + "epoch": 1.321765797756436, + "grad_norm": 0.6620016694068909, + "learning_rate": 1.5924340618710143e-05, + "loss": 2.4076, + "step": 16378 + }, + { + "epoch": 1.3218465014930192, + "grad_norm": 0.694807231426239, + "learning_rate": 1.5915794430194066e-05, + "loss": 2.4369, + "step": 16379 + }, + { + "epoch": 1.321927205229602, + "grad_norm": 0.6810131669044495, + "learning_rate": 1.590725033727616e-05, + "loss": 2.4151, + "step": 16380 + }, + { + "epoch": 1.3220079089661851, + "grad_norm": 0.768846333026886, + "learning_rate": 1.58987083401694e-05, + "loss": 2.4991, + "step": 16381 + }, + { + "epoch": 1.322088612702768, + "grad_norm": 0.6581698656082153, + "learning_rate": 1.5890168439086672e-05, + "loss": 2.4263, + "step": 16382 + }, + { + "epoch": 1.322169316439351, + "grad_norm": 0.7267034649848938, + "learning_rate": 1.5881630634240818e-05, + "loss": 2.4219, + "step": 16383 + }, + { + "epoch": 1.3222500201759342, + "grad_norm": 0.7391555905342102, + "learning_rate": 1.5873094925844612e-05, + "loss": 2.427, + "step": 16384 + }, + { + "epoch": 1.322330723912517, + "grad_norm": 0.6612021923065186, + "learning_rate": 1.5864561314110815e-05, + "loss": 2.4108, + "step": 16385 + }, + { + "epoch": 1.3224114276491001, + "grad_norm": 0.7118437886238098, + "learning_rate": 1.585602979925206e-05, + "loss": 2.3839, + "step": 16386 + }, + { + "epoch": 1.3224921313856832, + "grad_norm": 0.6663616299629211, + "learning_rate": 1.5847500381480997e-05, + "loss": 2.4302, + "step": 16387 + }, + { + "epoch": 1.322572835122266, + "grad_norm": 0.6848715543746948, + "learning_rate": 1.583897306101022e-05, + "loss": 2.4228, + "step": 16388 + }, + { + "epoch": 1.3226535388588492, + "grad_norm": 0.680895209312439, + "learning_rate": 1.5830447838052208e-05, + "loss": 2.4457, + "step": 16389 + }, + { + "epoch": 1.3227342425954323, + "grad_norm": 0.683276891708374, + "learning_rate": 1.582192471281946e-05, + "loss": 2.4412, + "step": 16390 + }, + { + "epoch": 1.3228149463320151, + "grad_norm": 0.7311880588531494, + "learning_rate": 1.5813403685524396e-05, + "loss": 2.4604, + "step": 16391 + }, + { + "epoch": 1.3228956500685982, + "grad_norm": 0.6769095659255981, + "learning_rate": 1.580488475637937e-05, + "loss": 2.4311, + "step": 16392 + }, + { + "epoch": 1.3229763538051813, + "grad_norm": 0.6683096289634705, + "learning_rate": 1.579636792559671e-05, + "loss": 2.445, + "step": 16393 + }, + { + "epoch": 1.3230570575417642, + "grad_norm": 0.7268782258033752, + "learning_rate": 1.5787853193388667e-05, + "loss": 2.4176, + "step": 16394 + }, + { + "epoch": 1.3231377612783473, + "grad_norm": 0.6878541707992554, + "learning_rate": 1.5779340559967494e-05, + "loss": 2.4615, + "step": 16395 + }, + { + "epoch": 1.3232184650149301, + "grad_norm": 0.7031291127204895, + "learning_rate": 1.577083002554527e-05, + "loss": 2.3726, + "step": 16396 + }, + { + "epoch": 1.3232991687515132, + "grad_norm": 0.7738708853721619, + "learning_rate": 1.5762321590334138e-05, + "loss": 2.5046, + "step": 16397 + }, + { + "epoch": 1.323379872488096, + "grad_norm": 0.6660913228988647, + "learning_rate": 1.575381525454619e-05, + "loss": 2.3759, + "step": 16398 + }, + { + "epoch": 1.3234605762246792, + "grad_norm": 0.6534021496772766, + "learning_rate": 1.574531101839335e-05, + "loss": 2.3983, + "step": 16399 + }, + { + "epoch": 1.3235412799612623, + "grad_norm": 0.6645511388778687, + "learning_rate": 1.5736808882087606e-05, + "loss": 2.3958, + "step": 16400 + }, + { + "epoch": 1.3236219836978451, + "grad_norm": 0.6723225712776184, + "learning_rate": 1.5728308845840855e-05, + "loss": 2.4248, + "step": 16401 + }, + { + "epoch": 1.3237026874344282, + "grad_norm": 0.6609976887702942, + "learning_rate": 1.5719810909864942e-05, + "loss": 2.3888, + "step": 16402 + }, + { + "epoch": 1.3237833911710113, + "grad_norm": 0.6713845729827881, + "learning_rate": 1.5711315074371635e-05, + "loss": 2.4474, + "step": 16403 + }, + { + "epoch": 1.3238640949075942, + "grad_norm": 0.701438307762146, + "learning_rate": 1.5702821339572726e-05, + "loss": 2.4673, + "step": 16404 + }, + { + "epoch": 1.3239447986441772, + "grad_norm": 0.7235428094863892, + "learning_rate": 1.5694329705679834e-05, + "loss": 2.3825, + "step": 16405 + }, + { + "epoch": 1.3240255023807603, + "grad_norm": 0.6785053610801697, + "learning_rate": 1.568584017290462e-05, + "loss": 2.4668, + "step": 16406 + }, + { + "epoch": 1.3241062061173432, + "grad_norm": 0.6918929815292358, + "learning_rate": 1.5677352741458705e-05, + "loss": 2.4329, + "step": 16407 + }, + { + "epoch": 1.3241869098539263, + "grad_norm": 0.7194826006889343, + "learning_rate": 1.5668867411553544e-05, + "loss": 2.3717, + "step": 16408 + }, + { + "epoch": 1.3242676135905094, + "grad_norm": 0.7299134731292725, + "learning_rate": 1.5660384183400658e-05, + "loss": 2.4695, + "step": 16409 + }, + { + "epoch": 1.3243483173270922, + "grad_norm": 0.7047600746154785, + "learning_rate": 1.565190305721147e-05, + "loss": 2.4525, + "step": 16410 + }, + { + "epoch": 1.3244290210636753, + "grad_norm": 0.685001015663147, + "learning_rate": 1.5643424033197328e-05, + "loss": 2.322, + "step": 16411 + }, + { + "epoch": 1.3245097248002582, + "grad_norm": 0.7696635127067566, + "learning_rate": 1.5634947111569588e-05, + "loss": 2.4464, + "step": 16412 + }, + { + "epoch": 1.3245904285368413, + "grad_norm": 0.7066066265106201, + "learning_rate": 1.5626472292539485e-05, + "loss": 2.4315, + "step": 16413 + }, + { + "epoch": 1.3246711322734241, + "grad_norm": 0.6553033590316772, + "learning_rate": 1.5617999576318276e-05, + "loss": 2.4296, + "step": 16414 + }, + { + "epoch": 1.3247518360100072, + "grad_norm": 0.7031354308128357, + "learning_rate": 1.560952896311707e-05, + "loss": 2.4565, + "step": 16415 + }, + { + "epoch": 1.3248325397465903, + "grad_norm": 0.7826353311538696, + "learning_rate": 1.560106045314701e-05, + "loss": 2.4275, + "step": 16416 + }, + { + "epoch": 1.3249132434831732, + "grad_norm": 0.6408981084823608, + "learning_rate": 1.559259404661916e-05, + "loss": 2.3869, + "step": 16417 + }, + { + "epoch": 1.3249939472197563, + "grad_norm": 0.7487547993659973, + "learning_rate": 1.558412974374448e-05, + "loss": 2.3678, + "step": 16418 + }, + { + "epoch": 1.3250746509563394, + "grad_norm": 0.7163991332054138, + "learning_rate": 1.5575667544733963e-05, + "loss": 2.397, + "step": 16419 + }, + { + "epoch": 1.3251553546929222, + "grad_norm": 0.6933553814888, + "learning_rate": 1.5567207449798515e-05, + "loss": 2.424, + "step": 16420 + }, + { + "epoch": 1.3252360584295053, + "grad_norm": 0.687406063079834, + "learning_rate": 1.5558749459148945e-05, + "loss": 2.4346, + "step": 16421 + }, + { + "epoch": 1.3253167621660884, + "grad_norm": 0.6781243681907654, + "learning_rate": 1.5550293572996054e-05, + "loss": 2.4526, + "step": 16422 + }, + { + "epoch": 1.3253974659026713, + "grad_norm": 0.6632506847381592, + "learning_rate": 1.5541839791550616e-05, + "loss": 2.4559, + "step": 16423 + }, + { + "epoch": 1.3254781696392544, + "grad_norm": 0.668396532535553, + "learning_rate": 1.5533388115023327e-05, + "loss": 2.4463, + "step": 16424 + }, + { + "epoch": 1.3255588733758372, + "grad_norm": 0.6853309869766235, + "learning_rate": 1.552493854362479e-05, + "loss": 2.429, + "step": 16425 + }, + { + "epoch": 1.3256395771124203, + "grad_norm": 0.7443413138389587, + "learning_rate": 1.5516491077565597e-05, + "loss": 2.4091, + "step": 16426 + }, + { + "epoch": 1.3257202808490032, + "grad_norm": 0.690170168876648, + "learning_rate": 1.550804571705632e-05, + "loss": 2.3942, + "step": 16427 + }, + { + "epoch": 1.3258009845855863, + "grad_norm": NaN, + "learning_rate": 1.550804571705632e-05, + "loss": 2.3788, + "step": 16428 + }, + { + "epoch": 1.3258816883221693, + "grad_norm": 0.6901132464408875, + "learning_rate": 1.5499602462307373e-05, + "loss": 2.3859, + "step": 16429 + }, + { + "epoch": 1.3259623920587522, + "grad_norm": 0.6639334559440613, + "learning_rate": 1.5491161313529223e-05, + "loss": 2.4271, + "step": 16430 + }, + { + "epoch": 1.3260430957953353, + "grad_norm": 0.7121936678886414, + "learning_rate": 1.548272227093227e-05, + "loss": 2.3818, + "step": 16431 + }, + { + "epoch": 1.3261237995319184, + "grad_norm": 0.6863218545913696, + "learning_rate": 1.5474285334726778e-05, + "loss": 2.3744, + "step": 16432 + }, + { + "epoch": 1.3262045032685013, + "grad_norm": 0.6697081327438354, + "learning_rate": 1.5465850505123057e-05, + "loss": 2.4001, + "step": 16433 + }, + { + "epoch": 1.3262852070050843, + "grad_norm": 0.7258912324905396, + "learning_rate": 1.5457417782331308e-05, + "loss": 2.4556, + "step": 16434 + }, + { + "epoch": 1.3263659107416674, + "grad_norm": 0.6930057406425476, + "learning_rate": 1.5448987166561712e-05, + "loss": 2.4979, + "step": 16435 + }, + { + "epoch": 1.3264466144782503, + "grad_norm": 0.6475574970245361, + "learning_rate": 1.5440558658024363e-05, + "loss": 2.3821, + "step": 16436 + }, + { + "epoch": 1.3265273182148334, + "grad_norm": 0.7489237785339355, + "learning_rate": 1.5432132256929367e-05, + "loss": 2.465, + "step": 16437 + }, + { + "epoch": 1.3266080219514165, + "grad_norm": 0.704391360282898, + "learning_rate": 1.5423707963486667e-05, + "loss": 2.433, + "step": 16438 + }, + { + "epoch": 1.3266887256879993, + "grad_norm": 0.669452965259552, + "learning_rate": 1.5415285777906253e-05, + "loss": 2.3981, + "step": 16439 + }, + { + "epoch": 1.3267694294245824, + "grad_norm": 0.6961604356765747, + "learning_rate": 1.540686570039802e-05, + "loss": 2.4684, + "step": 16440 + }, + { + "epoch": 1.3268501331611653, + "grad_norm": 0.6613924503326416, + "learning_rate": 1.539844773117185e-05, + "loss": 2.3711, + "step": 16441 + }, + { + "epoch": 1.3269308368977484, + "grad_norm": 0.7019763588905334, + "learning_rate": 1.5390031870437492e-05, + "loss": 2.3716, + "step": 16442 + }, + { + "epoch": 1.3270115406343312, + "grad_norm": 0.700176477432251, + "learning_rate": 1.5381618118404707e-05, + "loss": 2.4305, + "step": 16443 + }, + { + "epoch": 1.3270922443709143, + "grad_norm": 0.6716598272323608, + "learning_rate": 1.5373206475283197e-05, + "loss": 2.3835, + "step": 16444 + }, + { + "epoch": 1.3271729481074974, + "grad_norm": 0.6449697017669678, + "learning_rate": 1.53647969412826e-05, + "loss": 2.3707, + "step": 16445 + }, + { + "epoch": 1.3272536518440803, + "grad_norm": 0.7276685237884521, + "learning_rate": 1.535638951661249e-05, + "loss": 2.4313, + "step": 16446 + }, + { + "epoch": 1.3273343555806634, + "grad_norm": 0.7144705057144165, + "learning_rate": 1.5347984201482456e-05, + "loss": 2.4122, + "step": 16447 + }, + { + "epoch": 1.3274150593172465, + "grad_norm": 0.660225510597229, + "learning_rate": 1.53395809961019e-05, + "loss": 2.4282, + "step": 16448 + }, + { + "epoch": 1.3274957630538293, + "grad_norm": 0.7431676983833313, + "learning_rate": 1.5331179900680293e-05, + "loss": 2.3863, + "step": 16449 + }, + { + "epoch": 1.3275764667904124, + "grad_norm": 0.6670290231704712, + "learning_rate": 1.5322780915427036e-05, + "loss": 2.4266, + "step": 16450 + }, + { + "epoch": 1.3276571705269955, + "grad_norm": 0.711098313331604, + "learning_rate": 1.531438404055141e-05, + "loss": 2.4431, + "step": 16451 + }, + { + "epoch": 1.3277378742635784, + "grad_norm": 0.6908091902732849, + "learning_rate": 1.5305989276262688e-05, + "loss": 2.4153, + "step": 16452 + }, + { + "epoch": 1.3278185780001615, + "grad_norm": 0.7458107471466064, + "learning_rate": 1.5297596622770115e-05, + "loss": 2.4076, + "step": 16453 + }, + { + "epoch": 1.3278992817367445, + "grad_norm": 0.7406951189041138, + "learning_rate": 1.528920608028285e-05, + "loss": 2.3585, + "step": 16454 + }, + { + "epoch": 1.3279799854733274, + "grad_norm": 0.718824565410614, + "learning_rate": 1.5280817649010005e-05, + "loss": 2.4092, + "step": 16455 + }, + { + "epoch": 1.3280606892099105, + "grad_norm": 0.7163959741592407, + "learning_rate": 1.527243132916064e-05, + "loss": 2.4344, + "step": 16456 + }, + { + "epoch": 1.3281413929464934, + "grad_norm": 0.6695916652679443, + "learning_rate": 1.5264047120943793e-05, + "loss": 2.4144, + "step": 16457 + }, + { + "epoch": 1.3282220966830764, + "grad_norm": 0.6858509182929993, + "learning_rate": 1.5255665024568366e-05, + "loss": 2.4345, + "step": 16458 + }, + { + "epoch": 1.3283028004196593, + "grad_norm": 0.7277235388755798, + "learning_rate": 1.5247285040243297e-05, + "loss": 2.4219, + "step": 16459 + }, + { + "epoch": 1.3283835041562424, + "grad_norm": 0.6481949090957642, + "learning_rate": 1.5238907168177441e-05, + "loss": 2.4483, + "step": 16460 + }, + { + "epoch": 1.3284642078928255, + "grad_norm": 0.6956833600997925, + "learning_rate": 1.5230531408579574e-05, + "loss": 2.4241, + "step": 16461 + }, + { + "epoch": 1.3285449116294084, + "grad_norm": 0.7266185879707336, + "learning_rate": 1.522215776165845e-05, + "loss": 2.4577, + "step": 16462 + }, + { + "epoch": 1.3286256153659914, + "grad_norm": 0.725574254989624, + "learning_rate": 1.5213786227622773e-05, + "loss": 2.4451, + "step": 16463 + }, + { + "epoch": 1.3287063191025745, + "grad_norm": 0.7550850510597229, + "learning_rate": 1.5205416806681172e-05, + "loss": 2.4262, + "step": 16464 + }, + { + "epoch": 1.3287870228391574, + "grad_norm": 0.6391028761863708, + "learning_rate": 1.5197049499042237e-05, + "loss": 2.4116, + "step": 16465 + }, + { + "epoch": 1.3288677265757405, + "grad_norm": 0.6899027824401855, + "learning_rate": 1.5188684304914524e-05, + "loss": 2.3754, + "step": 16466 + }, + { + "epoch": 1.3289484303123236, + "grad_norm": 0.696681022644043, + "learning_rate": 1.518032122450649e-05, + "loss": 2.471, + "step": 16467 + }, + { + "epoch": 1.3290291340489064, + "grad_norm": 0.7090939283370972, + "learning_rate": 1.5171960258026551e-05, + "loss": 2.4153, + "step": 16468 + }, + { + "epoch": 1.3291098377854895, + "grad_norm": 0.7125746607780457, + "learning_rate": 1.5163601405683148e-05, + "loss": 2.4102, + "step": 16469 + }, + { + "epoch": 1.3291905415220726, + "grad_norm": 0.7407518029212952, + "learning_rate": 1.5155244667684531e-05, + "loss": 2.429, + "step": 16470 + }, + { + "epoch": 1.3292712452586555, + "grad_norm": 0.7401885390281677, + "learning_rate": 1.5146890044239004e-05, + "loss": 2.4577, + "step": 16471 + }, + { + "epoch": 1.3293519489952383, + "grad_norm": 0.7625757455825806, + "learning_rate": 1.5138537535554786e-05, + "loss": 2.3813, + "step": 16472 + }, + { + "epoch": 1.3294326527318214, + "grad_norm": 0.7423396706581116, + "learning_rate": 1.5130187141840057e-05, + "loss": 2.3797, + "step": 16473 + }, + { + "epoch": 1.3295133564684045, + "grad_norm": 0.7029228806495667, + "learning_rate": 1.5121838863302884e-05, + "loss": 2.4203, + "step": 16474 + }, + { + "epoch": 1.3295940602049874, + "grad_norm": 0.8062863349914551, + "learning_rate": 1.5113492700151378e-05, + "loss": 2.3743, + "step": 16475 + }, + { + "epoch": 1.3296747639415705, + "grad_norm": 0.7113343477249146, + "learning_rate": 1.5105148652593548e-05, + "loss": 2.3837, + "step": 16476 + }, + { + "epoch": 1.3297554676781536, + "grad_norm": 0.6733126044273376, + "learning_rate": 1.5096806720837309e-05, + "loss": 2.4677, + "step": 16477 + }, + { + "epoch": 1.3298361714147364, + "grad_norm": 0.6936657428741455, + "learning_rate": 1.5088466905090593e-05, + "loss": 2.3677, + "step": 16478 + }, + { + "epoch": 1.3299168751513195, + "grad_norm": 0.746746301651001, + "learning_rate": 1.5080129205561255e-05, + "loss": 2.423, + "step": 16479 + }, + { + "epoch": 1.3299975788879026, + "grad_norm": 0.6879116296768188, + "learning_rate": 1.5071793622457065e-05, + "loss": 2.4867, + "step": 16480 + }, + { + "epoch": 1.3300782826244855, + "grad_norm": 0.6841214299201965, + "learning_rate": 1.5063460155985776e-05, + "loss": 2.5015, + "step": 16481 + }, + { + "epoch": 1.3301589863610686, + "grad_norm": 0.6955111622810364, + "learning_rate": 1.5055128806355123e-05, + "loss": 2.3975, + "step": 16482 + }, + { + "epoch": 1.3302396900976516, + "grad_norm": 0.7084987163543701, + "learning_rate": 1.5046799573772673e-05, + "loss": 2.4511, + "step": 16483 + }, + { + "epoch": 1.3303203938342345, + "grad_norm": 0.6905840039253235, + "learning_rate": 1.5038472458446051e-05, + "loss": 2.3542, + "step": 16484 + }, + { + "epoch": 1.3304010975708176, + "grad_norm": 0.7182672023773193, + "learning_rate": 1.5030147460582788e-05, + "loss": 2.3673, + "step": 16485 + }, + { + "epoch": 1.3304818013074005, + "grad_norm": 0.6805183291435242, + "learning_rate": 1.5021824580390353e-05, + "loss": 2.3751, + "step": 16486 + }, + { + "epoch": 1.3305625050439835, + "grad_norm": 0.6278836727142334, + "learning_rate": 1.5013503818076202e-05, + "loss": 2.3508, + "step": 16487 + }, + { + "epoch": 1.3306432087805664, + "grad_norm": 0.664000391960144, + "learning_rate": 1.500518517384768e-05, + "loss": 2.4039, + "step": 16488 + }, + { + "epoch": 1.3307239125171495, + "grad_norm": 0.6906681060791016, + "learning_rate": 1.4996868647912155e-05, + "loss": 2.4068, + "step": 16489 + }, + { + "epoch": 1.3308046162537326, + "grad_norm": 0.6756102442741394, + "learning_rate": 1.4988554240476826e-05, + "loss": 2.4423, + "step": 16490 + }, + { + "epoch": 1.3308853199903155, + "grad_norm": 0.7013095021247864, + "learning_rate": 1.4980241951748964e-05, + "loss": 2.3536, + "step": 16491 + }, + { + "epoch": 1.3309660237268985, + "grad_norm": 0.6689851880073547, + "learning_rate": 1.4971931781935732e-05, + "loss": 2.4192, + "step": 16492 + }, + { + "epoch": 1.3310467274634816, + "grad_norm": 0.6411572694778442, + "learning_rate": 1.4963623731244202e-05, + "loss": 2.4012, + "step": 16493 + }, + { + "epoch": 1.3311274312000645, + "grad_norm": 0.7209812998771667, + "learning_rate": 1.4955317799881453e-05, + "loss": 2.378, + "step": 16494 + }, + { + "epoch": 1.3312081349366476, + "grad_norm": 0.7041119933128357, + "learning_rate": 1.4947013988054504e-05, + "loss": 2.4047, + "step": 16495 + }, + { + "epoch": 1.3312888386732307, + "grad_norm": 0.6928852796554565, + "learning_rate": 1.4938712295970292e-05, + "loss": 2.4489, + "step": 16496 + }, + { + "epoch": 1.3313695424098135, + "grad_norm": 0.6923524141311646, + "learning_rate": 1.4930412723835718e-05, + "loss": 2.3752, + "step": 16497 + }, + { + "epoch": 1.3314502461463966, + "grad_norm": 0.7034686803817749, + "learning_rate": 1.4922115271857662e-05, + "loss": 2.3898, + "step": 16498 + }, + { + "epoch": 1.3315309498829797, + "grad_norm": 0.6717320084571838, + "learning_rate": 1.4913819940242856e-05, + "loss": 2.3629, + "step": 16499 + }, + { + "epoch": 1.3316116536195626, + "grad_norm": 0.6885079741477966, + "learning_rate": 1.4905526729198083e-05, + "loss": 2.4321, + "step": 16500 + }, + { + "epoch": 1.3316923573561457, + "grad_norm": 0.662452757358551, + "learning_rate": 1.489723563893004e-05, + "loss": 2.4532, + "step": 16501 + }, + { + "epoch": 1.3317730610927285, + "grad_norm": 0.6650903224945068, + "learning_rate": 1.4888946669645332e-05, + "loss": 2.4347, + "step": 16502 + }, + { + "epoch": 1.3318537648293116, + "grad_norm": 0.7217590808868408, + "learning_rate": 1.4880659821550546e-05, + "loss": 2.4641, + "step": 16503 + }, + { + "epoch": 1.3319344685658945, + "grad_norm": 0.7063763737678528, + "learning_rate": 1.4872375094852232e-05, + "loss": 2.4365, + "step": 16504 + }, + { + "epoch": 1.3320151723024776, + "grad_norm": 0.7366454005241394, + "learning_rate": 1.4864092489756853e-05, + "loss": 2.4223, + "step": 16505 + }, + { + "epoch": 1.3320958760390607, + "grad_norm": 0.7132206559181213, + "learning_rate": 1.4855812006470838e-05, + "loss": 2.4404, + "step": 16506 + }, + { + "epoch": 1.3321765797756435, + "grad_norm": 0.665553867816925, + "learning_rate": 1.484753364520055e-05, + "loss": 2.3818, + "step": 16507 + }, + { + "epoch": 1.3322572835122266, + "grad_norm": 0.7854028344154358, + "learning_rate": 1.483925740615234e-05, + "loss": 2.4111, + "step": 16508 + }, + { + "epoch": 1.3323379872488097, + "grad_norm": 0.7331317663192749, + "learning_rate": 1.4830983289532418e-05, + "loss": 2.4446, + "step": 16509 + }, + { + "epoch": 1.3324186909853926, + "grad_norm": 0.670315146446228, + "learning_rate": 1.4822711295547042e-05, + "loss": 2.4017, + "step": 16510 + }, + { + "epoch": 1.3324993947219756, + "grad_norm": 0.7242144346237183, + "learning_rate": 1.481444142440237e-05, + "loss": 2.4281, + "step": 16511 + }, + { + "epoch": 1.3325800984585587, + "grad_norm": 0.7108538746833801, + "learning_rate": 1.4806173676304468e-05, + "loss": 2.4331, + "step": 16512 + }, + { + "epoch": 1.3326608021951416, + "grad_norm": 0.658989667892456, + "learning_rate": 1.479790805145943e-05, + "loss": 2.4321, + "step": 16513 + }, + { + "epoch": 1.3327415059317247, + "grad_norm": 0.6596404314041138, + "learning_rate": 1.4789644550073233e-05, + "loss": 2.3817, + "step": 16514 + }, + { + "epoch": 1.3328222096683078, + "grad_norm": 0.6922028064727783, + "learning_rate": 1.4781383172351837e-05, + "loss": 2.399, + "step": 16515 + }, + { + "epoch": 1.3329029134048906, + "grad_norm": 0.750747799873352, + "learning_rate": 1.4773123918501141e-05, + "loss": 2.4502, + "step": 16516 + }, + { + "epoch": 1.3329836171414737, + "grad_norm": 0.6887632608413696, + "learning_rate": 1.4764866788727006e-05, + "loss": 2.3636, + "step": 16517 + }, + { + "epoch": 1.3330643208780566, + "grad_norm": 0.6751166582107544, + "learning_rate": 1.4756611783235163e-05, + "loss": 2.3956, + "step": 16518 + }, + { + "epoch": 1.3331450246146397, + "grad_norm": 0.679040253162384, + "learning_rate": 1.4748358902231395e-05, + "loss": 2.4044, + "step": 16519 + }, + { + "epoch": 1.3332257283512225, + "grad_norm": 0.6396780610084534, + "learning_rate": 1.4740108145921373e-05, + "loss": 2.4114, + "step": 16520 + }, + { + "epoch": 1.3333064320878056, + "grad_norm": 0.6686230301856995, + "learning_rate": 1.4731859514510738e-05, + "loss": 2.4535, + "step": 16521 + }, + { + "epoch": 1.3333871358243887, + "grad_norm": 0.6693681478500366, + "learning_rate": 1.472361300820505e-05, + "loss": 2.3885, + "step": 16522 + }, + { + "epoch": 1.3334678395609716, + "grad_norm": 0.7700718641281128, + "learning_rate": 1.4715368627209836e-05, + "loss": 2.3939, + "step": 16523 + }, + { + "epoch": 1.3335485432975547, + "grad_norm": 0.7203121781349182, + "learning_rate": 1.4707126371730561e-05, + "loss": 2.4644, + "step": 16524 + }, + { + "epoch": 1.3336292470341378, + "grad_norm": 0.7798308730125427, + "learning_rate": 1.4698886241972665e-05, + "loss": 2.4293, + "step": 16525 + }, + { + "epoch": 1.3337099507707206, + "grad_norm": 0.7017160654067993, + "learning_rate": 1.4690648238141503e-05, + "loss": 2.4327, + "step": 16526 + }, + { + "epoch": 1.3337906545073037, + "grad_norm": 0.6522603631019592, + "learning_rate": 1.468241236044241e-05, + "loss": 2.3955, + "step": 16527 + }, + { + "epoch": 1.3338713582438868, + "grad_norm": 0.766222357749939, + "learning_rate": 1.4674178609080602e-05, + "loss": 2.4652, + "step": 16528 + }, + { + "epoch": 1.3339520619804697, + "grad_norm": 0.7351565361022949, + "learning_rate": 1.4665946984261303e-05, + "loss": 2.4607, + "step": 16529 + }, + { + "epoch": 1.3340327657170528, + "grad_norm": 0.6817728281021118, + "learning_rate": 1.4657717486189693e-05, + "loss": 2.3687, + "step": 16530 + }, + { + "epoch": 1.3341134694536356, + "grad_norm": 0.7401643395423889, + "learning_rate": 1.464949011507083e-05, + "loss": 2.4179, + "step": 16531 + }, + { + "epoch": 1.3341941731902187, + "grad_norm": 0.7783530354499817, + "learning_rate": 1.4641264871109784e-05, + "loss": 2.4088, + "step": 16532 + }, + { + "epoch": 1.3342748769268016, + "grad_norm": 0.6761943697929382, + "learning_rate": 1.4633041754511534e-05, + "loss": 2.4141, + "step": 16533 + }, + { + "epoch": 1.3343555806633847, + "grad_norm": 0.6842260360717773, + "learning_rate": 1.4624820765481073e-05, + "loss": 2.4918, + "step": 16534 + }, + { + "epoch": 1.3344362843999678, + "grad_norm": 0.6906094551086426, + "learning_rate": 1.4616601904223225e-05, + "loss": 2.4576, + "step": 16535 + }, + { + "epoch": 1.3345169881365506, + "grad_norm": 0.6549125909805298, + "learning_rate": 1.4608385170942829e-05, + "loss": 2.3748, + "step": 16536 + }, + { + "epoch": 1.3345976918731337, + "grad_norm": 0.6603896617889404, + "learning_rate": 1.4600170565844728e-05, + "loss": 2.3739, + "step": 16537 + }, + { + "epoch": 1.3346783956097168, + "grad_norm": 0.6413096189498901, + "learning_rate": 1.4591958089133606e-05, + "loss": 2.3979, + "step": 16538 + }, + { + "epoch": 1.3347590993462997, + "grad_norm": 0.7085204720497131, + "learning_rate": 1.4583747741014142e-05, + "loss": 2.4185, + "step": 16539 + }, + { + "epoch": 1.3348398030828827, + "grad_norm": 0.6517937183380127, + "learning_rate": 1.4575539521690983e-05, + "loss": 2.3938, + "step": 16540 + }, + { + "epoch": 1.3349205068194658, + "grad_norm": 0.6326449513435364, + "learning_rate": 1.4567333431368658e-05, + "loss": 2.4613, + "step": 16541 + }, + { + "epoch": 1.3350012105560487, + "grad_norm": 0.8046317100524902, + "learning_rate": 1.4559129470251708e-05, + "loss": 2.4547, + "step": 16542 + }, + { + "epoch": 1.3350819142926318, + "grad_norm": 0.6661570072174072, + "learning_rate": 1.455092763854462e-05, + "loss": 2.3636, + "step": 16543 + }, + { + "epoch": 1.3351626180292149, + "grad_norm": 0.6806541085243225, + "learning_rate": 1.454272793645176e-05, + "loss": 2.4309, + "step": 16544 + }, + { + "epoch": 1.3352433217657977, + "grad_norm": 0.651836097240448, + "learning_rate": 1.45345303641775e-05, + "loss": 2.3862, + "step": 16545 + }, + { + "epoch": 1.3353240255023808, + "grad_norm": 0.7448983192443848, + "learning_rate": 1.4526334921926165e-05, + "loss": 2.4654, + "step": 16546 + }, + { + "epoch": 1.3354047292389637, + "grad_norm": 0.6885285973548889, + "learning_rate": 1.4518141609901992e-05, + "loss": 2.3943, + "step": 16547 + }, + { + "epoch": 1.3354854329755468, + "grad_norm": 0.7204004526138306, + "learning_rate": 1.450995042830917e-05, + "loss": 2.4117, + "step": 16548 + }, + { + "epoch": 1.3355661367121296, + "grad_norm": 0.6551961898803711, + "learning_rate": 1.4501761377351864e-05, + "loss": 2.4269, + "step": 16549 + }, + { + "epoch": 1.3356468404487127, + "grad_norm": 0.7191253304481506, + "learning_rate": 1.4493574457234182e-05, + "loss": 2.3472, + "step": 16550 + }, + { + "epoch": 1.3357275441852958, + "grad_norm": 0.6793580651283264, + "learning_rate": 1.4485389668160121e-05, + "loss": 2.4264, + "step": 16551 + }, + { + "epoch": 1.3358082479218787, + "grad_norm": 0.704250693321228, + "learning_rate": 1.4477207010333682e-05, + "loss": 2.5236, + "step": 16552 + }, + { + "epoch": 1.3358889516584618, + "grad_norm": 0.6826470494270325, + "learning_rate": 1.4469026483958837e-05, + "loss": 2.4473, + "step": 16553 + }, + { + "epoch": 1.3359696553950449, + "grad_norm": 0.6646167039871216, + "learning_rate": 1.4460848089239399e-05, + "loss": 2.4232, + "step": 16554 + }, + { + "epoch": 1.3360503591316277, + "grad_norm": 0.7604451179504395, + "learning_rate": 1.4452671826379227e-05, + "loss": 2.4208, + "step": 16555 + }, + { + "epoch": 1.3361310628682108, + "grad_norm": 0.7129300236701965, + "learning_rate": 1.4444497695582093e-05, + "loss": 2.4304, + "step": 16556 + }, + { + "epoch": 1.336211766604794, + "grad_norm": 0.6769927740097046, + "learning_rate": 1.4436325697051733e-05, + "loss": 2.3467, + "step": 16557 + }, + { + "epoch": 1.3362924703413768, + "grad_norm": 0.6568608283996582, + "learning_rate": 1.4428155830991797e-05, + "loss": 2.4285, + "step": 16558 + }, + { + "epoch": 1.3363731740779599, + "grad_norm": 0.7687276005744934, + "learning_rate": 1.4419988097605919e-05, + "loss": 2.4815, + "step": 16559 + }, + { + "epoch": 1.336453877814543, + "grad_norm": 0.7001463770866394, + "learning_rate": 1.4411822497097638e-05, + "loss": 2.4629, + "step": 16560 + }, + { + "epoch": 1.3365345815511258, + "grad_norm": 0.7211995720863342, + "learning_rate": 1.4403659029670458e-05, + "loss": 2.4323, + "step": 16561 + }, + { + "epoch": 1.336615285287709, + "grad_norm": 0.7371769547462463, + "learning_rate": 1.439549769552787e-05, + "loss": 2.3962, + "step": 16562 + }, + { + "epoch": 1.3366959890242918, + "grad_norm": 0.7475463151931763, + "learning_rate": 1.4387338494873237e-05, + "loss": 2.3593, + "step": 16563 + }, + { + "epoch": 1.3367766927608749, + "grad_norm": 0.7215834856033325, + "learning_rate": 1.4379181427909916e-05, + "loss": 2.3687, + "step": 16564 + }, + { + "epoch": 1.3368573964974577, + "grad_norm": 0.7160200476646423, + "learning_rate": 1.4371026494841211e-05, + "loss": 2.3652, + "step": 16565 + }, + { + "epoch": 1.3369381002340408, + "grad_norm": 0.6636231541633606, + "learning_rate": 1.436287369587036e-05, + "loss": 2.4628, + "step": 16566 + }, + { + "epoch": 1.337018803970624, + "grad_norm": 0.657774806022644, + "learning_rate": 1.4354723031200556e-05, + "loss": 2.4082, + "step": 16567 + }, + { + "epoch": 1.3370995077072068, + "grad_norm": 0.7020300626754761, + "learning_rate": 1.4346574501034936e-05, + "loss": 2.3821, + "step": 16568 + }, + { + "epoch": 1.3371802114437898, + "grad_norm": 0.6800786256790161, + "learning_rate": 1.4338428105576595e-05, + "loss": 2.3839, + "step": 16569 + }, + { + "epoch": 1.337260915180373, + "grad_norm": 0.7176932692527771, + "learning_rate": 1.4330283845028536e-05, + "loss": 2.4614, + "step": 16570 + }, + { + "epoch": 1.3373416189169558, + "grad_norm": 0.7233355641365051, + "learning_rate": 1.432214171959374e-05, + "loss": 2.4048, + "step": 16571 + }, + { + "epoch": 1.3374223226535389, + "grad_norm": 0.7721874117851257, + "learning_rate": 1.4314001729475157e-05, + "loss": 2.4169, + "step": 16572 + }, + { + "epoch": 1.337503026390122, + "grad_norm": 0.7123380303382874, + "learning_rate": 1.4305863874875613e-05, + "loss": 2.3799, + "step": 16573 + }, + { + "epoch": 1.3375837301267048, + "grad_norm": 0.7297765016555786, + "learning_rate": 1.4297728155997958e-05, + "loss": 2.4655, + "step": 16574 + }, + { + "epoch": 1.337664433863288, + "grad_norm": 0.6806401610374451, + "learning_rate": 1.428959457304493e-05, + "loss": 2.4102, + "step": 16575 + }, + { + "epoch": 1.3377451375998708, + "grad_norm": 0.6811275482177734, + "learning_rate": 1.4281463126219264e-05, + "loss": 2.4298, + "step": 16576 + }, + { + "epoch": 1.3378258413364539, + "grad_norm": 0.6900678277015686, + "learning_rate": 1.427333381572361e-05, + "loss": 2.4745, + "step": 16577 + }, + { + "epoch": 1.3379065450730367, + "grad_norm": 0.7815307974815369, + "learning_rate": 1.4265206641760587e-05, + "loss": 2.3624, + "step": 16578 + }, + { + "epoch": 1.3379872488096198, + "grad_norm": 0.6948800683021545, + "learning_rate": 1.4257081604532708e-05, + "loss": 2.4142, + "step": 16579 + }, + { + "epoch": 1.338067952546203, + "grad_norm": 0.7387657165527344, + "learning_rate": 1.4248958704242488e-05, + "loss": 2.4241, + "step": 16580 + }, + { + "epoch": 1.3381486562827858, + "grad_norm": 0.7158597111701965, + "learning_rate": 1.4240837941092367e-05, + "loss": 2.4473, + "step": 16581 + }, + { + "epoch": 1.3382293600193689, + "grad_norm": 0.758674144744873, + "learning_rate": 1.423271931528477e-05, + "loss": 2.4504, + "step": 16582 + }, + { + "epoch": 1.338310063755952, + "grad_norm": 0.6904417872428894, + "learning_rate": 1.4224602827021982e-05, + "loss": 2.4288, + "step": 16583 + }, + { + "epoch": 1.3383907674925348, + "grad_norm": 0.6988760828971863, + "learning_rate": 1.4216488476506307e-05, + "loss": 2.3874, + "step": 16584 + }, + { + "epoch": 1.338471471229118, + "grad_norm": 0.6969872117042542, + "learning_rate": 1.4208376263940003e-05, + "loss": 2.3388, + "step": 16585 + }, + { + "epoch": 1.338552174965701, + "grad_norm": 0.687179684638977, + "learning_rate": 1.420026618952518e-05, + "loss": 2.431, + "step": 16586 + }, + { + "epoch": 1.3386328787022839, + "grad_norm": 0.6319810152053833, + "learning_rate": 1.4192158253464038e-05, + "loss": 2.4415, + "step": 16587 + }, + { + "epoch": 1.338713582438867, + "grad_norm": 0.7554977536201477, + "learning_rate": 1.4184052455958629e-05, + "loss": 2.3863, + "step": 16588 + }, + { + "epoch": 1.33879428617545, + "grad_norm": 0.7025974988937378, + "learning_rate": 1.4175948797210936e-05, + "loss": 2.3957, + "step": 16589 + }, + { + "epoch": 1.338874989912033, + "grad_norm": 0.7270370721817017, + "learning_rate": 1.4167847277422952e-05, + "loss": 2.4309, + "step": 16590 + }, + { + "epoch": 1.338955693648616, + "grad_norm": 0.7017608284950256, + "learning_rate": 1.4159747896796593e-05, + "loss": 2.4142, + "step": 16591 + }, + { + "epoch": 1.3390363973851989, + "grad_norm": 0.7114055156707764, + "learning_rate": 1.4151650655533687e-05, + "loss": 2.473, + "step": 16592 + }, + { + "epoch": 1.339117101121782, + "grad_norm": 0.6420357823371887, + "learning_rate": 1.4143555553836063e-05, + "loss": 2.3671, + "step": 16593 + }, + { + "epoch": 1.3391978048583648, + "grad_norm": 0.7067350745201111, + "learning_rate": 1.413546259190548e-05, + "loss": 2.4422, + "step": 16594 + }, + { + "epoch": 1.339278508594948, + "grad_norm": 0.7376763224601746, + "learning_rate": 1.4127371769943598e-05, + "loss": 2.4443, + "step": 16595 + }, + { + "epoch": 1.339359212331531, + "grad_norm": 0.646515965461731, + "learning_rate": 1.4119283088152092e-05, + "loss": 2.3949, + "step": 16596 + }, + { + "epoch": 1.3394399160681139, + "grad_norm": 0.6896061301231384, + "learning_rate": 1.411119654673254e-05, + "loss": 2.4535, + "step": 16597 + }, + { + "epoch": 1.339520619804697, + "grad_norm": 0.6992611289024353, + "learning_rate": 1.4103112145886489e-05, + "loss": 2.3983, + "step": 16598 + }, + { + "epoch": 1.33960132354128, + "grad_norm": 0.7176348567008972, + "learning_rate": 1.4095029885815426e-05, + "loss": 2.4671, + "step": 16599 + }, + { + "epoch": 1.339682027277863, + "grad_norm": 0.6635856628417969, + "learning_rate": 1.4086949766720759e-05, + "loss": 2.4235, + "step": 16600 + }, + { + "epoch": 1.339762731014446, + "grad_norm": 0.673332154750824, + "learning_rate": 1.4078871788803915e-05, + "loss": 2.4328, + "step": 16601 + }, + { + "epoch": 1.339843434751029, + "grad_norm": 0.6738821864128113, + "learning_rate": 1.407079595226617e-05, + "loss": 2.4786, + "step": 16602 + }, + { + "epoch": 1.339924138487612, + "grad_norm": 0.690605103969574, + "learning_rate": 1.4062722257308803e-05, + "loss": 2.4025, + "step": 16603 + }, + { + "epoch": 1.340004842224195, + "grad_norm": 0.7186758518218994, + "learning_rate": 1.4054650704133066e-05, + "loss": 2.4793, + "step": 16604 + }, + { + "epoch": 1.3400855459607781, + "grad_norm": 0.6484951376914978, + "learning_rate": 1.4046581292940075e-05, + "loss": 2.3855, + "step": 16605 + }, + { + "epoch": 1.340166249697361, + "grad_norm": 0.6993771195411682, + "learning_rate": 1.403851402393096e-05, + "loss": 2.3872, + "step": 16606 + }, + { + "epoch": 1.340246953433944, + "grad_norm": 0.7446531653404236, + "learning_rate": 1.403044889730678e-05, + "loss": 2.4253, + "step": 16607 + }, + { + "epoch": 1.340327657170527, + "grad_norm": 0.6873160004615784, + "learning_rate": 1.4022385913268542e-05, + "loss": 2.464, + "step": 16608 + }, + { + "epoch": 1.34040836090711, + "grad_norm": 0.6570948362350464, + "learning_rate": 1.4014325072017198e-05, + "loss": 2.4063, + "step": 16609 + }, + { + "epoch": 1.3404890646436929, + "grad_norm": 0.7209224104881287, + "learning_rate": 1.4006266373753651e-05, + "loss": 2.4827, + "step": 16610 + }, + { + "epoch": 1.340569768380276, + "grad_norm": 0.7283413410186768, + "learning_rate": 1.3998209818678732e-05, + "loss": 2.4009, + "step": 16611 + }, + { + "epoch": 1.340650472116859, + "grad_norm": 0.6650960445404053, + "learning_rate": 1.3990155406993221e-05, + "loss": 2.3576, + "step": 16612 + }, + { + "epoch": 1.340731175853442, + "grad_norm": 0.6857860088348389, + "learning_rate": 1.3982103138897873e-05, + "loss": 2.4686, + "step": 16613 + }, + { + "epoch": 1.340811879590025, + "grad_norm": 0.7065873146057129, + "learning_rate": 1.3974053014593402e-05, + "loss": 2.3999, + "step": 16614 + }, + { + "epoch": 1.340892583326608, + "grad_norm": 0.8093010783195496, + "learning_rate": 1.3966005034280372e-05, + "loss": 2.4273, + "step": 16615 + }, + { + "epoch": 1.340973287063191, + "grad_norm": 0.649132251739502, + "learning_rate": 1.3957959198159387e-05, + "loss": 2.3418, + "step": 16616 + }, + { + "epoch": 1.341053990799774, + "grad_norm": 0.7114978432655334, + "learning_rate": 1.3949915506430976e-05, + "loss": 2.4393, + "step": 16617 + }, + { + "epoch": 1.3411346945363571, + "grad_norm": 0.7989282608032227, + "learning_rate": 1.3941873959295615e-05, + "loss": 2.4044, + "step": 16618 + }, + { + "epoch": 1.34121539827294, + "grad_norm": 0.7373676896095276, + "learning_rate": 1.3933834556953707e-05, + "loss": 2.4758, + "step": 16619 + }, + { + "epoch": 1.341296102009523, + "grad_norm": 0.7076435089111328, + "learning_rate": 1.3925797299605647e-05, + "loss": 2.4429, + "step": 16620 + }, + { + "epoch": 1.3413768057461062, + "grad_norm": 0.6739028692245483, + "learning_rate": 1.39177621874517e-05, + "loss": 2.4275, + "step": 16621 + }, + { + "epoch": 1.341457509482689, + "grad_norm": 0.7134198546409607, + "learning_rate": 1.3909729220692125e-05, + "loss": 2.4541, + "step": 16622 + }, + { + "epoch": 1.3415382132192721, + "grad_norm": 0.6770301461219788, + "learning_rate": 1.3901698399527175e-05, + "loss": 2.4143, + "step": 16623 + }, + { + "epoch": 1.341618916955855, + "grad_norm": 0.7146373987197876, + "learning_rate": 1.3893669724156943e-05, + "loss": 2.4886, + "step": 16624 + }, + { + "epoch": 1.341699620692438, + "grad_norm": 0.6801536083221436, + "learning_rate": 1.3885643194781539e-05, + "loss": 2.4154, + "step": 16625 + }, + { + "epoch": 1.341780324429021, + "grad_norm": 0.7350363731384277, + "learning_rate": 1.3877618811601024e-05, + "loss": 2.3918, + "step": 16626 + }, + { + "epoch": 1.341861028165604, + "grad_norm": 0.7088882327079773, + "learning_rate": 1.3869596574815358e-05, + "loss": 2.412, + "step": 16627 + }, + { + "epoch": 1.3419417319021871, + "grad_norm": 0.7199791669845581, + "learning_rate": 1.3861576484624506e-05, + "loss": 2.3912, + "step": 16628 + }, + { + "epoch": 1.34202243563877, + "grad_norm": 0.692971408367157, + "learning_rate": 1.3853558541228328e-05, + "loss": 2.3826, + "step": 16629 + }, + { + "epoch": 1.342103139375353, + "grad_norm": 0.7524722814559937, + "learning_rate": 1.3845542744826679e-05, + "loss": 2.4227, + "step": 16630 + }, + { + "epoch": 1.3421838431119362, + "grad_norm": 0.6624585390090942, + "learning_rate": 1.3837529095619307e-05, + "loss": 2.3649, + "step": 16631 + }, + { + "epoch": 1.342264546848519, + "grad_norm": 0.6884489059448242, + "learning_rate": 1.3829517593805929e-05, + "loss": 2.3687, + "step": 16632 + }, + { + "epoch": 1.3423452505851021, + "grad_norm": 0.6766197085380554, + "learning_rate": 1.3821508239586246e-05, + "loss": 2.4191, + "step": 16633 + }, + { + "epoch": 1.3424259543216852, + "grad_norm": 0.6744453310966492, + "learning_rate": 1.3813501033159837e-05, + "loss": 2.4254, + "step": 16634 + }, + { + "epoch": 1.342506658058268, + "grad_norm": 0.6906216144561768, + "learning_rate": 1.3805495974726267e-05, + "loss": 2.4763, + "step": 16635 + }, + { + "epoch": 1.3425873617948512, + "grad_norm": 0.7052608132362366, + "learning_rate": 1.3797493064485078e-05, + "loss": 2.4307, + "step": 16636 + }, + { + "epoch": 1.342668065531434, + "grad_norm": 0.6701127290725708, + "learning_rate": 1.3789492302635653e-05, + "loss": 2.4529, + "step": 16637 + }, + { + "epoch": 1.3427487692680171, + "grad_norm": 0.7440397143363953, + "learning_rate": 1.3781493689377455e-05, + "loss": 2.4471, + "step": 16638 + }, + { + "epoch": 1.3428294730046, + "grad_norm": 0.7340207695960999, + "learning_rate": 1.3773497224909848e-05, + "loss": 2.4434, + "step": 16639 + }, + { + "epoch": 1.342910176741183, + "grad_norm": 0.6836793422698975, + "learning_rate": 1.376550290943205e-05, + "loss": 2.4072, + "step": 16640 + }, + { + "epoch": 1.3429908804777662, + "grad_norm": 0.6820472478866577, + "learning_rate": 1.3757510743143342e-05, + "loss": 2.4078, + "step": 16641 + }, + { + "epoch": 1.343071584214349, + "grad_norm": 0.6608061194419861, + "learning_rate": 1.3749520726242938e-05, + "loss": 2.3995, + "step": 16642 + }, + { + "epoch": 1.3431522879509321, + "grad_norm": 0.6582421064376831, + "learning_rate": 1.3741532858929906e-05, + "loss": 2.3768, + "step": 16643 + }, + { + "epoch": 1.3432329916875152, + "grad_norm": 0.7032744288444519, + "learning_rate": 1.3733547141403358e-05, + "loss": 2.4367, + "step": 16644 + }, + { + "epoch": 1.343313695424098, + "grad_norm": 0.7149307727813721, + "learning_rate": 1.3725563573862321e-05, + "loss": 2.4425, + "step": 16645 + }, + { + "epoch": 1.3433943991606812, + "grad_norm": 0.7375392913818359, + "learning_rate": 1.3717582156505793e-05, + "loss": 2.409, + "step": 16646 + }, + { + "epoch": 1.3434751028972642, + "grad_norm": 0.8422170877456665, + "learning_rate": 1.3709602889532624e-05, + "loss": 2.4758, + "step": 16647 + }, + { + "epoch": 1.343555806633847, + "grad_norm": 0.6542177796363831, + "learning_rate": 1.3701625773141712e-05, + "loss": 2.4199, + "step": 16648 + }, + { + "epoch": 1.3436365103704302, + "grad_norm": 0.6639342904090881, + "learning_rate": 1.3693650807531898e-05, + "loss": 2.4366, + "step": 16649 + }, + { + "epoch": 1.3437172141070133, + "grad_norm": 0.7270925045013428, + "learning_rate": 1.3685677992901901e-05, + "loss": 2.3745, + "step": 16650 + }, + { + "epoch": 1.3437979178435961, + "grad_norm": 0.7325547337532043, + "learning_rate": 1.367770732945044e-05, + "loss": 2.5053, + "step": 16651 + }, + { + "epoch": 1.3438786215801792, + "grad_norm": 0.7752320766448975, + "learning_rate": 1.3669738817376177e-05, + "loss": 2.4505, + "step": 16652 + }, + { + "epoch": 1.343959325316762, + "grad_norm": 0.6538182497024536, + "learning_rate": 1.3661772456877675e-05, + "loss": 2.4164, + "step": 16653 + }, + { + "epoch": 1.3440400290533452, + "grad_norm": 0.6886051297187805, + "learning_rate": 1.3653808248153487e-05, + "loss": 2.4156, + "step": 16654 + }, + { + "epoch": 1.344120732789928, + "grad_norm": 0.6990679502487183, + "learning_rate": 1.3645846191402134e-05, + "loss": 2.418, + "step": 16655 + }, + { + "epoch": 1.3442014365265111, + "grad_norm": 0.7006608247756958, + "learning_rate": 1.3637886286821999e-05, + "loss": 2.3987, + "step": 16656 + }, + { + "epoch": 1.3442821402630942, + "grad_norm": 0.6858758926391602, + "learning_rate": 1.3629928534611502e-05, + "loss": 2.3571, + "step": 16657 + }, + { + "epoch": 1.344362843999677, + "grad_norm": 0.7273774147033691, + "learning_rate": 1.3621972934968951e-05, + "loss": 2.4141, + "step": 16658 + }, + { + "epoch": 1.3444435477362602, + "grad_norm": 0.6770352721214294, + "learning_rate": 1.3614019488092633e-05, + "loss": 2.4602, + "step": 16659 + }, + { + "epoch": 1.3445242514728433, + "grad_norm": 0.7473095655441284, + "learning_rate": 1.3606068194180766e-05, + "loss": 2.3884, + "step": 16660 + }, + { + "epoch": 1.3446049552094261, + "grad_norm": 0.7271387577056885, + "learning_rate": 1.3598119053431512e-05, + "loss": 2.4705, + "step": 16661 + }, + { + "epoch": 1.3446856589460092, + "grad_norm": 0.658349335193634, + "learning_rate": 1.3590172066043006e-05, + "loss": 2.4271, + "step": 16662 + }, + { + "epoch": 1.3447663626825923, + "grad_norm": 0.6479319930076599, + "learning_rate": 1.3582227232213273e-05, + "loss": 2.3428, + "step": 16663 + }, + { + "epoch": 1.3448470664191752, + "grad_norm": 0.700951874256134, + "learning_rate": 1.3574284552140337e-05, + "loss": 2.4926, + "step": 16664 + }, + { + "epoch": 1.3449277701557583, + "grad_norm": 0.6699960231781006, + "learning_rate": 1.3566344026022171e-05, + "loss": 2.4372, + "step": 16665 + }, + { + "epoch": 1.3450084738923413, + "grad_norm": 0.6743033528327942, + "learning_rate": 1.3558405654056617e-05, + "loss": 2.4142, + "step": 16666 + }, + { + "epoch": 1.3450891776289242, + "grad_norm": 0.6619464755058289, + "learning_rate": 1.355046943644157e-05, + "loss": 2.4099, + "step": 16667 + }, + { + "epoch": 1.3451698813655073, + "grad_norm": 0.668084442615509, + "learning_rate": 1.3542535373374798e-05, + "loss": 2.3895, + "step": 16668 + }, + { + "epoch": 1.3452505851020902, + "grad_norm": 0.7954626679420471, + "learning_rate": 1.3534603465054052e-05, + "loss": 2.479, + "step": 16669 + }, + { + "epoch": 1.3453312888386733, + "grad_norm": 0.6742919683456421, + "learning_rate": 1.3526673711677008e-05, + "loss": 2.4289, + "step": 16670 + }, + { + "epoch": 1.3454119925752561, + "grad_norm": 0.6564723253250122, + "learning_rate": 1.3518746113441316e-05, + "loss": 2.404, + "step": 16671 + }, + { + "epoch": 1.3454926963118392, + "grad_norm": 0.6955705881118774, + "learning_rate": 1.3510820670544521e-05, + "loss": 2.4274, + "step": 16672 + }, + { + "epoch": 1.3455734000484223, + "grad_norm": 0.6687749028205872, + "learning_rate": 1.3502897383184154e-05, + "loss": 2.4564, + "step": 16673 + }, + { + "epoch": 1.3456541037850052, + "grad_norm": 0.7984250783920288, + "learning_rate": 1.34949762515577e-05, + "loss": 2.3426, + "step": 16674 + }, + { + "epoch": 1.3457348075215882, + "grad_norm": 0.7334223389625549, + "learning_rate": 1.348705727586258e-05, + "loss": 2.4712, + "step": 16675 + }, + { + "epoch": 1.3458155112581713, + "grad_norm": 0.6732765436172485, + "learning_rate": 1.3479140456296114e-05, + "loss": 2.424, + "step": 16676 + }, + { + "epoch": 1.3458962149947542, + "grad_norm": 0.7944334149360657, + "learning_rate": 1.3471225793055641e-05, + "loss": 2.3951, + "step": 16677 + }, + { + "epoch": 1.3459769187313373, + "grad_norm": 0.6829007863998413, + "learning_rate": 1.3463313286338408e-05, + "loss": 2.4158, + "step": 16678 + }, + { + "epoch": 1.3460576224679204, + "grad_norm": 0.7019640207290649, + "learning_rate": 1.345540293634161e-05, + "loss": 2.4093, + "step": 16679 + }, + { + "epoch": 1.3461383262045032, + "grad_norm": 0.6839374303817749, + "learning_rate": 1.3447494743262412e-05, + "loss": 2.3959, + "step": 16680 + }, + { + "epoch": 1.3462190299410863, + "grad_norm": 0.7211155295372009, + "learning_rate": 1.3439588707297911e-05, + "loss": 2.4052, + "step": 16681 + }, + { + "epoch": 1.3462997336776692, + "grad_norm": 0.73811274766922, + "learning_rate": 1.3431684828645109e-05, + "loss": 2.4179, + "step": 16682 + }, + { + "epoch": 1.3463804374142523, + "grad_norm": 0.6634721159934998, + "learning_rate": 1.3423783107501009e-05, + "loss": 2.379, + "step": 16683 + }, + { + "epoch": 1.3464611411508352, + "grad_norm": 0.6884057521820068, + "learning_rate": 1.3415883544062579e-05, + "loss": 2.4144, + "step": 16684 + }, + { + "epoch": 1.3465418448874182, + "grad_norm": 0.7239587306976318, + "learning_rate": 1.340798613852664e-05, + "loss": 2.3856, + "step": 16685 + }, + { + "epoch": 1.3466225486240013, + "grad_norm": 0.7201077342033386, + "learning_rate": 1.3400090891090033e-05, + "loss": 2.4552, + "step": 16686 + }, + { + "epoch": 1.3467032523605842, + "grad_norm": 0.7049584984779358, + "learning_rate": 1.3392197801949558e-05, + "loss": 2.4424, + "step": 16687 + }, + { + "epoch": 1.3467839560971673, + "grad_norm": 0.7240790128707886, + "learning_rate": 1.3384306871301877e-05, + "loss": 2.4156, + "step": 16688 + }, + { + "epoch": 1.3468646598337504, + "grad_norm": 0.7276458740234375, + "learning_rate": 1.337641809934369e-05, + "loss": 2.3882, + "step": 16689 + }, + { + "epoch": 1.3469453635703332, + "grad_norm": 0.6650896072387695, + "learning_rate": 1.3368531486271607e-05, + "loss": 2.396, + "step": 16690 + }, + { + "epoch": 1.3470260673069163, + "grad_norm": 0.6946447491645813, + "learning_rate": 1.3360647032282203e-05, + "loss": 2.3779, + "step": 16691 + }, + { + "epoch": 1.3471067710434994, + "grad_norm": 0.7507699728012085, + "learning_rate": 1.3352764737571932e-05, + "loss": 2.4378, + "step": 16692 + }, + { + "epoch": 1.3471874747800823, + "grad_norm": 0.6548876762390137, + "learning_rate": 1.334488460233725e-05, + "loss": 2.4181, + "step": 16693 + }, + { + "epoch": 1.3472681785166654, + "grad_norm": 0.7000874280929565, + "learning_rate": 1.3337006626774595e-05, + "loss": 2.4463, + "step": 16694 + }, + { + "epoch": 1.3473488822532484, + "grad_norm": 0.6487517356872559, + "learning_rate": 1.3329130811080249e-05, + "loss": 2.3703, + "step": 16695 + }, + { + "epoch": 1.3474295859898313, + "grad_norm": 0.6447827219963074, + "learning_rate": 1.3321257155450517e-05, + "loss": 2.3779, + "step": 16696 + }, + { + "epoch": 1.3475102897264144, + "grad_norm": 0.6309572458267212, + "learning_rate": 1.3313385660081667e-05, + "loss": 2.4443, + "step": 16697 + }, + { + "epoch": 1.3475909934629973, + "grad_norm": 0.6366227865219116, + "learning_rate": 1.330551632516982e-05, + "loss": 2.3418, + "step": 16698 + }, + { + "epoch": 1.3476716971995804, + "grad_norm": 0.6864019632339478, + "learning_rate": 1.3297649150911117e-05, + "loss": 2.4416, + "step": 16699 + }, + { + "epoch": 1.3477524009361632, + "grad_norm": 0.6807940006256104, + "learning_rate": 1.3289784137501671e-05, + "loss": 2.4465, + "step": 16700 + }, + { + "epoch": 1.3478331046727463, + "grad_norm": 0.6991185545921326, + "learning_rate": 1.3281921285137455e-05, + "loss": 2.3929, + "step": 16701 + }, + { + "epoch": 1.3479138084093294, + "grad_norm": 0.691908061504364, + "learning_rate": 1.3274060594014437e-05, + "loss": 2.4237, + "step": 16702 + }, + { + "epoch": 1.3479945121459123, + "grad_norm": 0.6909685730934143, + "learning_rate": 1.3266202064328548e-05, + "loss": 2.3695, + "step": 16703 + }, + { + "epoch": 1.3480752158824953, + "grad_norm": 0.6473715901374817, + "learning_rate": 1.325834569627562e-05, + "loss": 2.384, + "step": 16704 + }, + { + "epoch": 1.3481559196190784, + "grad_norm": 0.7433453798294067, + "learning_rate": 1.3250491490051454e-05, + "loss": 2.4546, + "step": 16705 + }, + { + "epoch": 1.3482366233556613, + "grad_norm": 0.7432501316070557, + "learning_rate": 1.3242639445851812e-05, + "loss": 2.4204, + "step": 16706 + }, + { + "epoch": 1.3483173270922444, + "grad_norm": 0.6661228537559509, + "learning_rate": 1.3234789563872397e-05, + "loss": 2.4454, + "step": 16707 + }, + { + "epoch": 1.3483980308288275, + "grad_norm": 0.7481260895729065, + "learning_rate": 1.3226941844308816e-05, + "loss": 2.4348, + "step": 16708 + }, + { + "epoch": 1.3484787345654103, + "grad_norm": 0.6986531019210815, + "learning_rate": 1.3219096287356669e-05, + "loss": 2.3622, + "step": 16709 + }, + { + "epoch": 1.3485594383019934, + "grad_norm": 0.7457645535469055, + "learning_rate": 1.321125289321149e-05, + "loss": 2.4399, + "step": 16710 + }, + { + "epoch": 1.3486401420385765, + "grad_norm": 0.6710307598114014, + "learning_rate": 1.3203411662068754e-05, + "loss": 2.3857, + "step": 16711 + }, + { + "epoch": 1.3487208457751594, + "grad_norm": 0.767304539680481, + "learning_rate": 1.3195572594123884e-05, + "loss": 2.4666, + "step": 16712 + }, + { + "epoch": 1.3488015495117425, + "grad_norm": 0.6720963716506958, + "learning_rate": 1.3187735689572289e-05, + "loss": 2.3952, + "step": 16713 + }, + { + "epoch": 1.3488822532483253, + "grad_norm": 0.6381734609603882, + "learning_rate": 1.3179900948609213e-05, + "loss": 2.3632, + "step": 16714 + }, + { + "epoch": 1.3489629569849084, + "grad_norm": 0.6697315573692322, + "learning_rate": 1.317206837142997e-05, + "loss": 2.4117, + "step": 16715 + }, + { + "epoch": 1.3490436607214913, + "grad_norm": 0.723676323890686, + "learning_rate": 1.3164237958229764e-05, + "loss": 2.3772, + "step": 16716 + }, + { + "epoch": 1.3491243644580744, + "grad_norm": 0.7021055817604065, + "learning_rate": 1.3156409709203732e-05, + "loss": 2.3808, + "step": 16717 + }, + { + "epoch": 1.3492050681946575, + "grad_norm": 0.7128920555114746, + "learning_rate": 1.3148583624546962e-05, + "loss": 2.3854, + "step": 16718 + }, + { + "epoch": 1.3492857719312403, + "grad_norm": 0.6684797406196594, + "learning_rate": 1.314075970445453e-05, + "loss": 2.3722, + "step": 16719 + }, + { + "epoch": 1.3493664756678234, + "grad_norm": 0.6710386276245117, + "learning_rate": 1.3132937949121426e-05, + "loss": 2.412, + "step": 16720 + }, + { + "epoch": 1.3494471794044065, + "grad_norm": 0.7207252979278564, + "learning_rate": 1.3125118358742572e-05, + "loss": 2.4506, + "step": 16721 + }, + { + "epoch": 1.3495278831409894, + "grad_norm": 0.685516893863678, + "learning_rate": 1.3117300933512865e-05, + "loss": 2.435, + "step": 16722 + }, + { + "epoch": 1.3496085868775725, + "grad_norm": 0.71708744764328, + "learning_rate": 1.3109485673627154e-05, + "loss": 2.4735, + "step": 16723 + }, + { + "epoch": 1.3496892906141555, + "grad_norm": 0.7293861508369446, + "learning_rate": 1.3101672579280166e-05, + "loss": 2.4545, + "step": 16724 + }, + { + "epoch": 1.3497699943507384, + "grad_norm": 0.6448976993560791, + "learning_rate": 1.3093861650666661e-05, + "loss": 2.386, + "step": 16725 + }, + { + "epoch": 1.3498506980873215, + "grad_norm": 0.8111226558685303, + "learning_rate": 1.3086052887981315e-05, + "loss": 2.4733, + "step": 16726 + }, + { + "epoch": 1.3499314018239044, + "grad_norm": 0.7673875093460083, + "learning_rate": 1.3078246291418706e-05, + "loss": 2.4119, + "step": 16727 + }, + { + "epoch": 1.3500121055604875, + "grad_norm": 0.7296731472015381, + "learning_rate": 1.307044186117341e-05, + "loss": 2.3724, + "step": 16728 + }, + { + "epoch": 1.3500928092970703, + "grad_norm": 0.6947155594825745, + "learning_rate": 1.306263959743994e-05, + "loss": 2.3989, + "step": 16729 + }, + { + "epoch": 1.3501735130336534, + "grad_norm": 0.6781659722328186, + "learning_rate": 1.3054839500412753e-05, + "loss": 2.429, + "step": 16730 + }, + { + "epoch": 1.3502542167702365, + "grad_norm": 0.7498819231987, + "learning_rate": 1.3047041570286244e-05, + "loss": 2.459, + "step": 16731 + }, + { + "epoch": 1.3503349205068194, + "grad_norm": 0.6651057004928589, + "learning_rate": 1.3039245807254774e-05, + "loss": 2.4049, + "step": 16732 + }, + { + "epoch": 1.3504156242434024, + "grad_norm": 0.6998507380485535, + "learning_rate": 1.3031452211512596e-05, + "loss": 2.4083, + "step": 16733 + }, + { + "epoch": 1.3504963279799855, + "grad_norm": 0.6522402167320251, + "learning_rate": 1.3023660783253966e-05, + "loss": 2.3987, + "step": 16734 + }, + { + "epoch": 1.3505770317165684, + "grad_norm": 0.6618130207061768, + "learning_rate": 1.3015871522673096e-05, + "loss": 2.4514, + "step": 16735 + }, + { + "epoch": 1.3506577354531515, + "grad_norm": 0.7139489650726318, + "learning_rate": 1.300808442996405e-05, + "loss": 2.484, + "step": 16736 + }, + { + "epoch": 1.3507384391897346, + "grad_norm": 0.6582522988319397, + "learning_rate": 1.3000299505320956e-05, + "loss": 2.4463, + "step": 16737 + }, + { + "epoch": 1.3508191429263174, + "grad_norm": 0.7115446329116821, + "learning_rate": 1.2992516748937811e-05, + "loss": 2.4795, + "step": 16738 + }, + { + "epoch": 1.3508998466629005, + "grad_norm": 0.7243752479553223, + "learning_rate": 1.2984736161008581e-05, + "loss": 2.4151, + "step": 16739 + }, + { + "epoch": 1.3509805503994836, + "grad_norm": 0.758084774017334, + "learning_rate": 1.297695774172719e-05, + "loss": 2.4028, + "step": 16740 + }, + { + "epoch": 1.3510612541360665, + "grad_norm": 0.6555618643760681, + "learning_rate": 1.2969181491287496e-05, + "loss": 2.4184, + "step": 16741 + }, + { + "epoch": 1.3511419578726496, + "grad_norm": 0.6657842993736267, + "learning_rate": 1.2961407409883331e-05, + "loss": 2.375, + "step": 16742 + }, + { + "epoch": 1.3512226616092324, + "grad_norm": 0.6355723142623901, + "learning_rate": 1.2953635497708382e-05, + "loss": 2.4202, + "step": 16743 + }, + { + "epoch": 1.3513033653458155, + "grad_norm": 0.7384408116340637, + "learning_rate": 1.2945865754956377e-05, + "loss": 2.4298, + "step": 16744 + }, + { + "epoch": 1.3513840690823984, + "grad_norm": 0.7300455570220947, + "learning_rate": 1.2938098181820979e-05, + "loss": 2.3842, + "step": 16745 + }, + { + "epoch": 1.3514647728189815, + "grad_norm": 0.7378895282745361, + "learning_rate": 1.2930332778495735e-05, + "loss": 2.4025, + "step": 16746 + }, + { + "epoch": 1.3515454765555646, + "grad_norm": 0.6542565822601318, + "learning_rate": 1.2922569545174212e-05, + "loss": 2.3995, + "step": 16747 + }, + { + "epoch": 1.3516261802921474, + "grad_norm": 0.669829249382019, + "learning_rate": 1.291480848204989e-05, + "loss": 2.3843, + "step": 16748 + }, + { + "epoch": 1.3517068840287305, + "grad_norm": 0.6747604608535767, + "learning_rate": 1.2907049589316167e-05, + "loss": 2.4108, + "step": 16749 + }, + { + "epoch": 1.3517875877653136, + "grad_norm": 0.7003559470176697, + "learning_rate": 1.2899292867166402e-05, + "loss": 2.4233, + "step": 16750 + }, + { + "epoch": 1.3518682915018965, + "grad_norm": 0.7365099191665649, + "learning_rate": 1.2891538315793994e-05, + "loss": 2.3592, + "step": 16751 + }, + { + "epoch": 1.3519489952384796, + "grad_norm": 0.6849377751350403, + "learning_rate": 1.2883785935392123e-05, + "loss": 2.3943, + "step": 16752 + }, + { + "epoch": 1.3520296989750626, + "grad_norm": 0.7263002395629883, + "learning_rate": 1.2876035726154045e-05, + "loss": 2.4078, + "step": 16753 + }, + { + "epoch": 1.3521104027116455, + "grad_norm": 0.7341182827949524, + "learning_rate": 1.2868287688272884e-05, + "loss": 2.3568, + "step": 16754 + }, + { + "epoch": 1.3521911064482286, + "grad_norm": 0.7281078100204468, + "learning_rate": 1.2860541821941796e-05, + "loss": 2.4073, + "step": 16755 + }, + { + "epoch": 1.3522718101848117, + "grad_norm": 0.6302868127822876, + "learning_rate": 1.285279812735376e-05, + "loss": 2.3946, + "step": 16756 + }, + { + "epoch": 1.3523525139213946, + "grad_norm": 0.7333062887191772, + "learning_rate": 1.28450566047018e-05, + "loss": 2.3892, + "step": 16757 + }, + { + "epoch": 1.3524332176579776, + "grad_norm": 0.74838787317276, + "learning_rate": 1.2837317254178882e-05, + "loss": 2.4844, + "step": 16758 + }, + { + "epoch": 1.3525139213945605, + "grad_norm": 0.7085757255554199, + "learning_rate": 1.2829580075977843e-05, + "loss": 2.3583, + "step": 16759 + }, + { + "epoch": 1.3525946251311436, + "grad_norm": 0.7182579040527344, + "learning_rate": 1.2821845070291527e-05, + "loss": 2.4326, + "step": 16760 + }, + { + "epoch": 1.3526753288677265, + "grad_norm": 0.6857885718345642, + "learning_rate": 1.2814112237312714e-05, + "loss": 2.4406, + "step": 16761 + }, + { + "epoch": 1.3527560326043095, + "grad_norm": 0.7629652619361877, + "learning_rate": 1.2806381577234139e-05, + "loss": 2.4839, + "step": 16762 + }, + { + "epoch": 1.3528367363408926, + "grad_norm": 0.6940319538116455, + "learning_rate": 1.2798653090248458e-05, + "loss": 2.3918, + "step": 16763 + }, + { + "epoch": 1.3529174400774755, + "grad_norm": 0.6825633645057678, + "learning_rate": 1.2790926776548318e-05, + "loss": 2.3828, + "step": 16764 + }, + { + "epoch": 1.3529981438140586, + "grad_norm": 0.6830280423164368, + "learning_rate": 1.278320263632622e-05, + "loss": 2.3727, + "step": 16765 + }, + { + "epoch": 1.3530788475506417, + "grad_norm": 0.6782984733581543, + "learning_rate": 1.2775480669774698e-05, + "loss": 2.3984, + "step": 16766 + }, + { + "epoch": 1.3531595512872245, + "grad_norm": 0.6939808130264282, + "learning_rate": 1.276776087708621e-05, + "loss": 2.3724, + "step": 16767 + }, + { + "epoch": 1.3532402550238076, + "grad_norm": 0.7562546133995056, + "learning_rate": 1.276004325845317e-05, + "loss": 2.4178, + "step": 16768 + }, + { + "epoch": 1.3533209587603907, + "grad_norm": 0.6692922115325928, + "learning_rate": 1.2752327814067877e-05, + "loss": 2.4072, + "step": 16769 + }, + { + "epoch": 1.3534016624969736, + "grad_norm": 0.6783415079116821, + "learning_rate": 1.2744614544122635e-05, + "loss": 2.3993, + "step": 16770 + }, + { + "epoch": 1.3534823662335567, + "grad_norm": 0.6608997583389282, + "learning_rate": 1.27369034488097e-05, + "loss": 2.3883, + "step": 16771 + }, + { + "epoch": 1.3535630699701398, + "grad_norm": 0.6849228739738464, + "learning_rate": 1.2729194528321231e-05, + "loss": 2.4009, + "step": 16772 + }, + { + "epoch": 1.3536437737067226, + "grad_norm": 0.7059305906295776, + "learning_rate": 1.2721487782849362e-05, + "loss": 2.508, + "step": 16773 + }, + { + "epoch": 1.3537244774433057, + "grad_norm": 0.6471492052078247, + "learning_rate": 1.2713783212586183e-05, + "loss": 2.3813, + "step": 16774 + }, + { + "epoch": 1.3538051811798886, + "grad_norm": 0.7108949422836304, + "learning_rate": 1.2706080817723687e-05, + "loss": 2.4189, + "step": 16775 + }, + { + "epoch": 1.3538858849164717, + "grad_norm": 0.6623945236206055, + "learning_rate": 1.269838059845383e-05, + "loss": 2.4128, + "step": 16776 + }, + { + "epoch": 1.3539665886530545, + "grad_norm": 0.6595518589019775, + "learning_rate": 1.269068255496857e-05, + "loss": 2.3984, + "step": 16777 + }, + { + "epoch": 1.3540472923896376, + "grad_norm": 0.6932248473167419, + "learning_rate": 1.2682986687459708e-05, + "loss": 2.3951, + "step": 16778 + }, + { + "epoch": 1.3541279961262207, + "grad_norm": 0.6914867162704468, + "learning_rate": 1.2675292996119059e-05, + "loss": 2.4602, + "step": 16779 + }, + { + "epoch": 1.3542086998628036, + "grad_norm": 0.6633034348487854, + "learning_rate": 1.266760148113838e-05, + "loss": 2.43, + "step": 16780 + }, + { + "epoch": 1.3542894035993867, + "grad_norm": 0.6987594366073608, + "learning_rate": 1.2659912142709363e-05, + "loss": 2.3962, + "step": 16781 + }, + { + "epoch": 1.3543701073359697, + "grad_norm": 0.7429597973823547, + "learning_rate": 1.2652224981023652e-05, + "loss": 2.4838, + "step": 16782 + }, + { + "epoch": 1.3544508110725526, + "grad_norm": 0.6402504444122314, + "learning_rate": 1.2644539996272808e-05, + "loss": 2.43, + "step": 16783 + }, + { + "epoch": 1.3545315148091357, + "grad_norm": 0.6763156652450562, + "learning_rate": 1.263685718864841e-05, + "loss": 2.4911, + "step": 16784 + }, + { + "epoch": 1.3546122185457188, + "grad_norm": 0.8133900165557861, + "learning_rate": 1.2629176558341881e-05, + "loss": 2.45, + "step": 16785 + }, + { + "epoch": 1.3546929222823016, + "grad_norm": 0.6946277022361755, + "learning_rate": 1.262149810554465e-05, + "loss": 2.43, + "step": 16786 + }, + { + "epoch": 1.3547736260188847, + "grad_norm": 0.7667170166969299, + "learning_rate": 1.2613821830448125e-05, + "loss": 2.4464, + "step": 16787 + }, + { + "epoch": 1.3548543297554676, + "grad_norm": 0.672662615776062, + "learning_rate": 1.2606147733243567e-05, + "loss": 2.3653, + "step": 16788 + }, + { + "epoch": 1.3549350334920507, + "grad_norm": 0.6856412291526794, + "learning_rate": 1.2598475814122258e-05, + "loss": 2.3924, + "step": 16789 + }, + { + "epoch": 1.3550157372286336, + "grad_norm": 0.6966650485992432, + "learning_rate": 1.2590806073275407e-05, + "loss": 2.4039, + "step": 16790 + }, + { + "epoch": 1.3550964409652166, + "grad_norm": 0.7397874593734741, + "learning_rate": 1.2583138510894143e-05, + "loss": 2.4769, + "step": 16791 + }, + { + "epoch": 1.3551771447017997, + "grad_norm": 0.6960996985435486, + "learning_rate": 1.2575473127169591e-05, + "loss": 2.4342, + "step": 16792 + }, + { + "epoch": 1.3552578484383826, + "grad_norm": 0.7324376702308655, + "learning_rate": 1.2567809922292795e-05, + "loss": 2.4779, + "step": 16793 + }, + { + "epoch": 1.3553385521749657, + "grad_norm": 0.6891930103302002, + "learning_rate": 1.2560148896454704e-05, + "loss": 2.4228, + "step": 16794 + }, + { + "epoch": 1.3554192559115488, + "grad_norm": 0.6919474601745605, + "learning_rate": 1.2552490049846278e-05, + "loss": 2.4178, + "step": 16795 + }, + { + "epoch": 1.3554999596481316, + "grad_norm": 0.7067604660987854, + "learning_rate": 1.2544833382658405e-05, + "loss": 2.457, + "step": 16796 + }, + { + "epoch": 1.3555806633847147, + "grad_norm": 0.7667992115020752, + "learning_rate": 1.253717889508188e-05, + "loss": 2.3951, + "step": 16797 + }, + { + "epoch": 1.3556613671212978, + "grad_norm": 0.6337998509407043, + "learning_rate": 1.2529526587307482e-05, + "loss": 2.3788, + "step": 16798 + }, + { + "epoch": 1.3557420708578807, + "grad_norm": 0.6591900587081909, + "learning_rate": 1.2521876459525927e-05, + "loss": 2.4101, + "step": 16799 + }, + { + "epoch": 1.3558227745944638, + "grad_norm": 0.7115298509597778, + "learning_rate": 1.2514228511927895e-05, + "loss": 2.4417, + "step": 16800 + }, + { + "epoch": 1.3559034783310469, + "grad_norm": 0.6851321458816528, + "learning_rate": 1.2506582744703965e-05, + "loss": 2.4081, + "step": 16801 + }, + { + "epoch": 1.3559841820676297, + "grad_norm": 0.7469603419303894, + "learning_rate": 1.249893915804471e-05, + "loss": 2.3703, + "step": 16802 + }, + { + "epoch": 1.3560648858042128, + "grad_norm": 0.6972614526748657, + "learning_rate": 1.2491297752140641e-05, + "loss": 2.3549, + "step": 16803 + }, + { + "epoch": 1.3561455895407957, + "grad_norm": 0.6669485569000244, + "learning_rate": 1.2483658527182151e-05, + "loss": 2.4261, + "step": 16804 + }, + { + "epoch": 1.3562262932773788, + "grad_norm": 0.7516919374465942, + "learning_rate": 1.247602148335968e-05, + "loss": 2.4323, + "step": 16805 + }, + { + "epoch": 1.3563069970139616, + "grad_norm": 0.7191836833953857, + "learning_rate": 1.2468386620863548e-05, + "loss": 2.4242, + "step": 16806 + }, + { + "epoch": 1.3563877007505447, + "grad_norm": 0.660237729549408, + "learning_rate": 1.2460753939884017e-05, + "loss": 2.4154, + "step": 16807 + }, + { + "epoch": 1.3564684044871278, + "grad_norm": 0.749531626701355, + "learning_rate": 1.2453123440611325e-05, + "loss": 2.4138, + "step": 16808 + }, + { + "epoch": 1.3565491082237107, + "grad_norm": 0.6808986067771912, + "learning_rate": 1.2445495123235673e-05, + "loss": 2.3918, + "step": 16809 + }, + { + "epoch": 1.3566298119602938, + "grad_norm": 0.686183750629425, + "learning_rate": 1.2437868987947133e-05, + "loss": 2.4172, + "step": 16810 + }, + { + "epoch": 1.3567105156968768, + "grad_norm": 0.6487868428230286, + "learning_rate": 1.2430245034935784e-05, + "loss": 2.4199, + "step": 16811 + }, + { + "epoch": 1.3567912194334597, + "grad_norm": 0.7352244257926941, + "learning_rate": 1.242262326439163e-05, + "loss": 2.3779, + "step": 16812 + }, + { + "epoch": 1.3568719231700428, + "grad_norm": 0.7250565886497498, + "learning_rate": 1.2415003676504644e-05, + "loss": 2.4106, + "step": 16813 + }, + { + "epoch": 1.3569526269066259, + "grad_norm": 0.6843926906585693, + "learning_rate": 1.2407386271464716e-05, + "loss": 2.3725, + "step": 16814 + }, + { + "epoch": 1.3570333306432087, + "grad_norm": 0.686326801776886, + "learning_rate": 1.2399771049461684e-05, + "loss": 2.3709, + "step": 16815 + }, + { + "epoch": 1.3571140343797918, + "grad_norm": 0.6796969771385193, + "learning_rate": 1.2392158010685373e-05, + "loss": 2.4545, + "step": 16816 + }, + { + "epoch": 1.357194738116375, + "grad_norm": 0.6469466090202332, + "learning_rate": 1.2384547155325466e-05, + "loss": 2.4263, + "step": 16817 + }, + { + "epoch": 1.3572754418529578, + "grad_norm": 0.7089909911155701, + "learning_rate": 1.2376938483571688e-05, + "loss": 2.378, + "step": 16818 + }, + { + "epoch": 1.3573561455895409, + "grad_norm": 0.7313235402107239, + "learning_rate": 1.2369331995613665e-05, + "loss": 2.46, + "step": 16819 + }, + { + "epoch": 1.3574368493261237, + "grad_norm": 0.7555651664733887, + "learning_rate": 1.2361727691640934e-05, + "loss": 2.531, + "step": 16820 + }, + { + "epoch": 1.3575175530627068, + "grad_norm": 0.7563485503196716, + "learning_rate": 1.2354125571843033e-05, + "loss": 2.4205, + "step": 16821 + }, + { + "epoch": 1.3575982567992897, + "grad_norm": 0.7996519804000854, + "learning_rate": 1.2346525636409434e-05, + "loss": 2.4223, + "step": 16822 + }, + { + "epoch": 1.3576789605358728, + "grad_norm": 0.7141731977462769, + "learning_rate": 1.233892788552955e-05, + "loss": 2.4554, + "step": 16823 + }, + { + "epoch": 1.3577596642724559, + "grad_norm": 0.6715070605278015, + "learning_rate": 1.233133231939273e-05, + "loss": 2.4386, + "step": 16824 + }, + { + "epoch": 1.3578403680090387, + "grad_norm": 0.6893020272254944, + "learning_rate": 1.2323738938188301e-05, + "loss": 2.4065, + "step": 16825 + }, + { + "epoch": 1.3579210717456218, + "grad_norm": 0.7542821764945984, + "learning_rate": 1.2316147742105454e-05, + "loss": 2.3974, + "step": 16826 + }, + { + "epoch": 1.358001775482205, + "grad_norm": 0.7177664041519165, + "learning_rate": 1.230855873133343e-05, + "loss": 2.4306, + "step": 16827 + }, + { + "epoch": 1.3580824792187878, + "grad_norm": 0.7056576013565063, + "learning_rate": 1.2300971906061354e-05, + "loss": 2.4238, + "step": 16828 + }, + { + "epoch": 1.3581631829553709, + "grad_norm": 0.686903715133667, + "learning_rate": 1.2293387266478296e-05, + "loss": 2.3902, + "step": 16829 + }, + { + "epoch": 1.358243886691954, + "grad_norm": 0.7377725839614868, + "learning_rate": 1.2285804812773293e-05, + "loss": 2.4294, + "step": 16830 + }, + { + "epoch": 1.3583245904285368, + "grad_norm": 0.6537891030311584, + "learning_rate": 1.227822454513532e-05, + "loss": 2.374, + "step": 16831 + }, + { + "epoch": 1.35840529416512, + "grad_norm": 0.684699296951294, + "learning_rate": 1.2270646463753288e-05, + "loss": 2.4105, + "step": 16832 + }, + { + "epoch": 1.3584859979017028, + "grad_norm": 0.7042316794395447, + "learning_rate": 1.2263070568816081e-05, + "loss": 2.4246, + "step": 16833 + }, + { + "epoch": 1.3585667016382859, + "grad_norm": 0.7610476613044739, + "learning_rate": 1.2255496860512505e-05, + "loss": 2.4581, + "step": 16834 + }, + { + "epoch": 1.3586474053748687, + "grad_norm": 0.6620839834213257, + "learning_rate": 1.224792533903134e-05, + "loss": 2.4138, + "step": 16835 + }, + { + "epoch": 1.3587281091114518, + "grad_norm": 0.6861035823822021, + "learning_rate": 1.2240356004561227e-05, + "loss": 2.4195, + "step": 16836 + }, + { + "epoch": 1.358808812848035, + "grad_norm": 0.7186882495880127, + "learning_rate": 1.2232788857290855e-05, + "loss": 2.404, + "step": 16837 + }, + { + "epoch": 1.3588895165846178, + "grad_norm": 0.7219386696815491, + "learning_rate": 1.2225223897408833e-05, + "loss": 2.3778, + "step": 16838 + }, + { + "epoch": 1.3589702203212009, + "grad_norm": 0.6935911774635315, + "learning_rate": 1.2217661125103663e-05, + "loss": 2.4617, + "step": 16839 + }, + { + "epoch": 1.359050924057784, + "grad_norm": 0.7885910272598267, + "learning_rate": 1.2210100540563828e-05, + "loss": 2.4467, + "step": 16840 + }, + { + "epoch": 1.3591316277943668, + "grad_norm": 0.6690255403518677, + "learning_rate": 1.220254214397778e-05, + "loss": 2.381, + "step": 16841 + }, + { + "epoch": 1.35921233153095, + "grad_norm": 0.7592741847038269, + "learning_rate": 1.2194985935533887e-05, + "loss": 2.4459, + "step": 16842 + }, + { + "epoch": 1.359293035267533, + "grad_norm": 0.827460527420044, + "learning_rate": 1.2187431915420466e-05, + "loss": 2.3842, + "step": 16843 + }, + { + "epoch": 1.3593737390041158, + "grad_norm": 0.7313764691352844, + "learning_rate": 1.2179880083825811e-05, + "loss": 2.3938, + "step": 16844 + }, + { + "epoch": 1.359454442740699, + "grad_norm": 0.7093486189842224, + "learning_rate": 1.2172330440938084e-05, + "loss": 2.4316, + "step": 16845 + }, + { + "epoch": 1.359535146477282, + "grad_norm": 0.6805742383003235, + "learning_rate": 1.2164782986945467e-05, + "loss": 2.4372, + "step": 16846 + }, + { + "epoch": 1.3596158502138649, + "grad_norm": 0.7525961399078369, + "learning_rate": 1.2157237722036064e-05, + "loss": 2.3867, + "step": 16847 + }, + { + "epoch": 1.359696553950448, + "grad_norm": 0.723896861076355, + "learning_rate": 1.2149694646397947e-05, + "loss": 2.4685, + "step": 16848 + }, + { + "epoch": 1.3597772576870308, + "grad_norm": 0.704448938369751, + "learning_rate": 1.2142153760219055e-05, + "loss": 2.4463, + "step": 16849 + }, + { + "epoch": 1.359857961423614, + "grad_norm": 0.7207927703857422, + "learning_rate": 1.2134615063687349e-05, + "loss": 2.3549, + "step": 16850 + }, + { + "epoch": 1.3599386651601968, + "grad_norm": 0.7106234431266785, + "learning_rate": 1.2127078556990724e-05, + "loss": 2.4145, + "step": 16851 + }, + { + "epoch": 1.3600193688967799, + "grad_norm": 0.7740694284439087, + "learning_rate": 1.2119544240316993e-05, + "loss": 2.3999, + "step": 16852 + }, + { + "epoch": 1.360100072633363, + "grad_norm": 0.6696181297302246, + "learning_rate": 1.2112012113853954e-05, + "loss": 2.4046, + "step": 16853 + }, + { + "epoch": 1.3601807763699458, + "grad_norm": 0.6758043169975281, + "learning_rate": 1.2104482177789334e-05, + "loss": 2.4021, + "step": 16854 + }, + { + "epoch": 1.360261480106529, + "grad_norm": 0.6659380793571472, + "learning_rate": 1.2096954432310758e-05, + "loss": 2.4145, + "step": 16855 + }, + { + "epoch": 1.360342183843112, + "grad_norm": 0.6889290809631348, + "learning_rate": 1.2089428877605858e-05, + "loss": 2.3486, + "step": 16856 + }, + { + "epoch": 1.3604228875796949, + "grad_norm": 0.6755563020706177, + "learning_rate": 1.2081905513862201e-05, + "loss": 2.4294, + "step": 16857 + }, + { + "epoch": 1.360503591316278, + "grad_norm": 0.7662243843078613, + "learning_rate": 1.2074384341267276e-05, + "loss": 2.414, + "step": 16858 + }, + { + "epoch": 1.360584295052861, + "grad_norm": 0.7432721853256226, + "learning_rate": 1.2066865360008517e-05, + "loss": 2.4314, + "step": 16859 + }, + { + "epoch": 1.360664998789444, + "grad_norm": 0.6465074419975281, + "learning_rate": 1.2059348570273366e-05, + "loss": 2.3349, + "step": 16860 + }, + { + "epoch": 1.360745702526027, + "grad_norm": 0.6940968632698059, + "learning_rate": 1.2051833972249105e-05, + "loss": 2.4539, + "step": 16861 + }, + { + "epoch": 1.36082640626261, + "grad_norm": 0.7211138010025024, + "learning_rate": 1.2044321566123019e-05, + "loss": 2.4041, + "step": 16862 + }, + { + "epoch": 1.360907109999193, + "grad_norm": 0.6746649146080017, + "learning_rate": 1.2036811352082367e-05, + "loss": 2.4329, + "step": 16863 + }, + { + "epoch": 1.360987813735776, + "grad_norm": 0.7502184510231018, + "learning_rate": 1.2029303330314345e-05, + "loss": 2.407, + "step": 16864 + }, + { + "epoch": 1.361068517472359, + "grad_norm": 0.7192596793174744, + "learning_rate": 1.2021797501006027e-05, + "loss": 2.3907, + "step": 16865 + }, + { + "epoch": 1.361149221208942, + "grad_norm": 0.6682254672050476, + "learning_rate": 1.2014293864344483e-05, + "loss": 2.391, + "step": 16866 + }, + { + "epoch": 1.3612299249455249, + "grad_norm": 0.680969774723053, + "learning_rate": 1.2006792420516755e-05, + "loss": 2.3479, + "step": 16867 + }, + { + "epoch": 1.361310628682108, + "grad_norm": 0.682671308517456, + "learning_rate": 1.1999293169709757e-05, + "loss": 2.4097, + "step": 16868 + }, + { + "epoch": 1.361391332418691, + "grad_norm": 0.7030573487281799, + "learning_rate": 1.199179611211041e-05, + "loss": 2.4514, + "step": 16869 + }, + { + "epoch": 1.361472036155274, + "grad_norm": 0.670630693435669, + "learning_rate": 1.1984301247905582e-05, + "loss": 2.3982, + "step": 16870 + }, + { + "epoch": 1.361552739891857, + "grad_norm": 0.6993644833564758, + "learning_rate": 1.1976808577282017e-05, + "loss": 2.4297, + "step": 16871 + }, + { + "epoch": 1.36163344362844, + "grad_norm": 0.7448122501373291, + "learning_rate": 1.1969318100426486e-05, + "loss": 2.3612, + "step": 16872 + }, + { + "epoch": 1.361714147365023, + "grad_norm": 0.7014498114585876, + "learning_rate": 1.1961829817525649e-05, + "loss": 2.3451, + "step": 16873 + }, + { + "epoch": 1.361794851101606, + "grad_norm": 0.7140750885009766, + "learning_rate": 1.195434372876616e-05, + "loss": 2.4231, + "step": 16874 + }, + { + "epoch": 1.3618755548381891, + "grad_norm": 0.7377427816390991, + "learning_rate": 1.1946859834334567e-05, + "loss": 2.4055, + "step": 16875 + }, + { + "epoch": 1.361956258574772, + "grad_norm": 0.7969191670417786, + "learning_rate": 1.1939378134417433e-05, + "loss": 2.3503, + "step": 16876 + }, + { + "epoch": 1.362036962311355, + "grad_norm": 0.6821554899215698, + "learning_rate": 1.1931898629201155e-05, + "loss": 2.4259, + "step": 16877 + }, + { + "epoch": 1.3621176660479382, + "grad_norm": 0.6598221659660339, + "learning_rate": 1.1924421318872182e-05, + "loss": 2.3833, + "step": 16878 + }, + { + "epoch": 1.362198369784521, + "grad_norm": 0.8031432628631592, + "learning_rate": 1.1916946203616863e-05, + "loss": 2.5077, + "step": 16879 + }, + { + "epoch": 1.362279073521104, + "grad_norm": 0.7247405648231506, + "learning_rate": 1.190947328362152e-05, + "loss": 2.426, + "step": 16880 + }, + { + "epoch": 1.362359777257687, + "grad_norm": 0.7256691455841064, + "learning_rate": 1.1902002559072344e-05, + "loss": 2.474, + "step": 16881 + }, + { + "epoch": 1.36244048099427, + "grad_norm": 0.7382180094718933, + "learning_rate": 1.1894534030155558e-05, + "loss": 2.4487, + "step": 16882 + }, + { + "epoch": 1.362521184730853, + "grad_norm": 0.700179398059845, + "learning_rate": 1.1887067697057297e-05, + "loss": 2.3836, + "step": 16883 + }, + { + "epoch": 1.362601888467436, + "grad_norm": 0.706106424331665, + "learning_rate": 1.1879603559963638e-05, + "loss": 2.4304, + "step": 16884 + }, + { + "epoch": 1.362682592204019, + "grad_norm": 0.7514815926551819, + "learning_rate": 1.1872141619060606e-05, + "loss": 2.4895, + "step": 16885 + }, + { + "epoch": 1.362763295940602, + "grad_norm": 0.6605612635612488, + "learning_rate": 1.1864681874534201e-05, + "loss": 2.3569, + "step": 16886 + }, + { + "epoch": 1.362843999677185, + "grad_norm": 0.6366496682167053, + "learning_rate": 1.1857224326570283e-05, + "loss": 2.3919, + "step": 16887 + }, + { + "epoch": 1.3629247034137681, + "grad_norm": 0.8100820183753967, + "learning_rate": 1.1849768975354736e-05, + "loss": 2.5063, + "step": 16888 + }, + { + "epoch": 1.363005407150351, + "grad_norm": 0.685127854347229, + "learning_rate": 1.1842315821073403e-05, + "loss": 2.4647, + "step": 16889 + }, + { + "epoch": 1.363086110886934, + "grad_norm": 0.696172833442688, + "learning_rate": 1.1834864863911987e-05, + "loss": 2.4224, + "step": 16890 + }, + { + "epoch": 1.3631668146235172, + "grad_norm": 0.6558032035827637, + "learning_rate": 1.1827416104056199e-05, + "loss": 2.3619, + "step": 16891 + }, + { + "epoch": 1.3632475183601, + "grad_norm": 0.744687020778656, + "learning_rate": 1.1819969541691689e-05, + "loss": 2.4669, + "step": 16892 + }, + { + "epoch": 1.3633282220966831, + "grad_norm": 0.6925212740898132, + "learning_rate": 1.1812525177004052e-05, + "loss": 2.3967, + "step": 16893 + }, + { + "epoch": 1.363408925833266, + "grad_norm": 0.6861244440078735, + "learning_rate": 1.1805083010178797e-05, + "loss": 2.3979, + "step": 16894 + }, + { + "epoch": 1.363489629569849, + "grad_norm": 0.6987108588218689, + "learning_rate": 1.179764304140143e-05, + "loss": 2.4263, + "step": 16895 + }, + { + "epoch": 1.363570333306432, + "grad_norm": 0.6940091848373413, + "learning_rate": 1.179020527085738e-05, + "loss": 2.4328, + "step": 16896 + }, + { + "epoch": 1.363651037043015, + "grad_norm": 0.6831968426704407, + "learning_rate": 1.1782769698731966e-05, + "loss": 2.427, + "step": 16897 + }, + { + "epoch": 1.3637317407795981, + "grad_norm": 0.7370985746383667, + "learning_rate": 1.177533632521054e-05, + "loss": 2.3711, + "step": 16898 + }, + { + "epoch": 1.363812444516181, + "grad_norm": 0.8176774978637695, + "learning_rate": 1.1767905150478376e-05, + "loss": 2.4337, + "step": 16899 + }, + { + "epoch": 1.363893148252764, + "grad_norm": 0.786318302154541, + "learning_rate": 1.1760476174720637e-05, + "loss": 2.5099, + "step": 16900 + }, + { + "epoch": 1.3639738519893472, + "grad_norm": 0.7309854626655579, + "learning_rate": 1.1753049398122495e-05, + "loss": 2.46, + "step": 16901 + }, + { + "epoch": 1.36405455572593, + "grad_norm": 0.7410863637924194, + "learning_rate": 1.1745624820869039e-05, + "loss": 2.4249, + "step": 16902 + }, + { + "epoch": 1.3641352594625131, + "grad_norm": 0.7059988379478455, + "learning_rate": 1.1738202443145308e-05, + "loss": 2.4964, + "step": 16903 + }, + { + "epoch": 1.3642159631990962, + "grad_norm": 0.7351845502853394, + "learning_rate": 1.1730782265136287e-05, + "loss": 2.4694, + "step": 16904 + }, + { + "epoch": 1.364296666935679, + "grad_norm": 0.6928153038024902, + "learning_rate": 1.1723364287026938e-05, + "loss": 2.426, + "step": 16905 + }, + { + "epoch": 1.3643773706722622, + "grad_norm": 0.759920060634613, + "learning_rate": 1.1715948509002083e-05, + "loss": 2.4359, + "step": 16906 + }, + { + "epoch": 1.3644580744088453, + "grad_norm": 0.6655696630477905, + "learning_rate": 1.1708534931246573e-05, + "loss": 2.4118, + "step": 16907 + }, + { + "epoch": 1.3645387781454281, + "grad_norm": 0.6912528872489929, + "learning_rate": 1.170112355394517e-05, + "loss": 2.4257, + "step": 16908 + }, + { + "epoch": 1.3646194818820112, + "grad_norm": 0.6612871289253235, + "learning_rate": 1.1693714377282604e-05, + "loss": 2.4192, + "step": 16909 + }, + { + "epoch": 1.364700185618594, + "grad_norm": 0.6548018455505371, + "learning_rate": 1.1686307401443486e-05, + "loss": 2.4054, + "step": 16910 + }, + { + "epoch": 1.3647808893551772, + "grad_norm": 0.7749961018562317, + "learning_rate": 1.1678902626612443e-05, + "loss": 2.44, + "step": 16911 + }, + { + "epoch": 1.36486159309176, + "grad_norm": 0.7187496423721313, + "learning_rate": 1.1671500052974039e-05, + "loss": 2.4033, + "step": 16912 + }, + { + "epoch": 1.3649422968283431, + "grad_norm": 0.7002814412117004, + "learning_rate": 1.1664099680712715e-05, + "loss": 2.4442, + "step": 16913 + }, + { + "epoch": 1.3650230005649262, + "grad_norm": 0.6852529644966125, + "learning_rate": 1.1656701510012946e-05, + "loss": 2.4253, + "step": 16914 + }, + { + "epoch": 1.365103704301509, + "grad_norm": 0.6922035813331604, + "learning_rate": 1.1649305541059142e-05, + "loss": 2.4406, + "step": 16915 + }, + { + "epoch": 1.3651844080380922, + "grad_norm": 0.6883397698402405, + "learning_rate": 1.1641911774035563e-05, + "loss": 2.4064, + "step": 16916 + }, + { + "epoch": 1.3652651117746752, + "grad_norm": 0.7101531624794006, + "learning_rate": 1.163452020912652e-05, + "loss": 2.4068, + "step": 16917 + }, + { + "epoch": 1.365345815511258, + "grad_norm": 0.728369951248169, + "learning_rate": 1.1627130846516231e-05, + "loss": 2.4319, + "step": 16918 + }, + { + "epoch": 1.3654265192478412, + "grad_norm": 0.6765053272247314, + "learning_rate": 1.161974368638884e-05, + "loss": 2.3922, + "step": 16919 + }, + { + "epoch": 1.3655072229844243, + "grad_norm": 0.6909242868423462, + "learning_rate": 1.1612358728928475e-05, + "loss": 2.4124, + "step": 16920 + }, + { + "epoch": 1.3655879267210072, + "grad_norm": 0.735650897026062, + "learning_rate": 1.1604975974319177e-05, + "loss": 2.5137, + "step": 16921 + }, + { + "epoch": 1.3656686304575902, + "grad_norm": 0.6587653756141663, + "learning_rate": 1.1597595422744934e-05, + "loss": 2.4163, + "step": 16922 + }, + { + "epoch": 1.3657493341941733, + "grad_norm": 0.700282096862793, + "learning_rate": 1.159021707438971e-05, + "loss": 2.4272, + "step": 16923 + }, + { + "epoch": 1.3658300379307562, + "grad_norm": 0.7175682783126831, + "learning_rate": 1.1582840929437365e-05, + "loss": 2.4598, + "step": 16924 + }, + { + "epoch": 1.3659107416673393, + "grad_norm": 0.6725881695747375, + "learning_rate": 1.157546698807176e-05, + "loss": 2.4064, + "step": 16925 + }, + { + "epoch": 1.3659914454039221, + "grad_norm": 0.7130467295646667, + "learning_rate": 1.1568095250476651e-05, + "loss": 2.3851, + "step": 16926 + }, + { + "epoch": 1.3660721491405052, + "grad_norm": 0.6859269142150879, + "learning_rate": 1.1560725716835785e-05, + "loss": 2.3577, + "step": 16927 + }, + { + "epoch": 1.366152852877088, + "grad_norm": 0.7037541270256042, + "learning_rate": 1.1553358387332824e-05, + "loss": 2.4402, + "step": 16928 + }, + { + "epoch": 1.3662335566136712, + "grad_norm": 0.7094031572341919, + "learning_rate": 1.1545993262151366e-05, + "loss": 2.4036, + "step": 16929 + }, + { + "epoch": 1.3663142603502543, + "grad_norm": 0.6953302025794983, + "learning_rate": 1.1538630341474965e-05, + "loss": 2.4192, + "step": 16930 + }, + { + "epoch": 1.3663949640868371, + "grad_norm": 0.7012252807617188, + "learning_rate": 1.1531269625487163e-05, + "loss": 2.4207, + "step": 16931 + }, + { + "epoch": 1.3664756678234202, + "grad_norm": 0.6616495847702026, + "learning_rate": 1.1523911114371366e-05, + "loss": 2.4187, + "step": 16932 + }, + { + "epoch": 1.3665563715600033, + "grad_norm": 0.6819868087768555, + "learning_rate": 1.1516554808310975e-05, + "loss": 2.448, + "step": 16933 + }, + { + "epoch": 1.3666370752965862, + "grad_norm": 0.6869969964027405, + "learning_rate": 1.1509200707489343e-05, + "loss": 2.4134, + "step": 16934 + }, + { + "epoch": 1.3667177790331693, + "grad_norm": 0.6600778698921204, + "learning_rate": 1.1501848812089733e-05, + "loss": 2.4159, + "step": 16935 + }, + { + "epoch": 1.3667984827697524, + "grad_norm": 0.668712317943573, + "learning_rate": 1.1494499122295398e-05, + "loss": 2.41, + "step": 16936 + }, + { + "epoch": 1.3668791865063352, + "grad_norm": 0.767365574836731, + "learning_rate": 1.1487151638289518e-05, + "loss": 2.3856, + "step": 16937 + }, + { + "epoch": 1.3669598902429183, + "grad_norm": 0.721546471118927, + "learning_rate": 1.1479806360255174e-05, + "loss": 2.4038, + "step": 16938 + }, + { + "epoch": 1.3670405939795012, + "grad_norm": 0.6796963810920715, + "learning_rate": 1.1472463288375456e-05, + "loss": 2.3698, + "step": 16939 + }, + { + "epoch": 1.3671212977160843, + "grad_norm": 0.7340671420097351, + "learning_rate": 1.1465122422833363e-05, + "loss": 2.4296, + "step": 16940 + }, + { + "epoch": 1.3672020014526671, + "grad_norm": 0.7173369526863098, + "learning_rate": 1.145778376381187e-05, + "loss": 2.3923, + "step": 16941 + }, + { + "epoch": 1.3672827051892502, + "grad_norm": 0.6683956980705261, + "learning_rate": 1.1450447311493839e-05, + "loss": 2.4092, + "step": 16942 + }, + { + "epoch": 1.3673634089258333, + "grad_norm": 0.6457851529121399, + "learning_rate": 1.1443113066062129e-05, + "loss": 2.3467, + "step": 16943 + }, + { + "epoch": 1.3674441126624162, + "grad_norm": 0.6870608925819397, + "learning_rate": 1.1435781027699532e-05, + "loss": 2.3766, + "step": 16944 + }, + { + "epoch": 1.3675248163989993, + "grad_norm": 0.6496049165725708, + "learning_rate": 1.1428451196588775e-05, + "loss": 2.4464, + "step": 16945 + }, + { + "epoch": 1.3676055201355823, + "grad_norm": 0.7554739117622375, + "learning_rate": 1.1421123572912551e-05, + "loss": 2.4243, + "step": 16946 + }, + { + "epoch": 1.3676862238721652, + "grad_norm": 0.7208122611045837, + "learning_rate": 1.1413798156853495e-05, + "loss": 2.3699, + "step": 16947 + }, + { + "epoch": 1.3677669276087483, + "grad_norm": 0.7072176337242126, + "learning_rate": 1.1406474948594126e-05, + "loss": 2.4011, + "step": 16948 + }, + { + "epoch": 1.3678476313453314, + "grad_norm": 0.7316476106643677, + "learning_rate": 1.1399153948316999e-05, + "loss": 2.4508, + "step": 16949 + }, + { + "epoch": 1.3679283350819142, + "grad_norm": 0.8518069386482239, + "learning_rate": 1.1391835156204577e-05, + "loss": 2.4197, + "step": 16950 + }, + { + "epoch": 1.3680090388184973, + "grad_norm": 0.6700364947319031, + "learning_rate": 1.1384518572439228e-05, + "loss": 2.4272, + "step": 16951 + }, + { + "epoch": 1.3680897425550804, + "grad_norm": 0.7007749676704407, + "learning_rate": 1.1377204197203317e-05, + "loss": 2.3777, + "step": 16952 + }, + { + "epoch": 1.3681704462916633, + "grad_norm": 0.6792053580284119, + "learning_rate": 1.1369892030679141e-05, + "loss": 2.4487, + "step": 16953 + }, + { + "epoch": 1.3682511500282464, + "grad_norm": 0.6913022398948669, + "learning_rate": 1.1362582073048932e-05, + "loss": 2.3757, + "step": 16954 + }, + { + "epoch": 1.3683318537648292, + "grad_norm": 0.648248016834259, + "learning_rate": 1.135527432449488e-05, + "loss": 2.3482, + "step": 16955 + }, + { + "epoch": 1.3684125575014123, + "grad_norm": 0.6711798906326294, + "learning_rate": 1.1347968785199115e-05, + "loss": 2.4096, + "step": 16956 + }, + { + "epoch": 1.3684932612379952, + "grad_norm": 0.6932381987571716, + "learning_rate": 1.1340665455343724e-05, + "loss": 2.3834, + "step": 16957 + }, + { + "epoch": 1.3685739649745783, + "grad_norm": 0.6890178918838501, + "learning_rate": 1.1333364335110697e-05, + "loss": 2.4182, + "step": 16958 + }, + { + "epoch": 1.3686546687111614, + "grad_norm": 0.6612519025802612, + "learning_rate": 1.1326065424681997e-05, + "loss": 2.3691, + "step": 16959 + }, + { + "epoch": 1.3687353724477442, + "grad_norm": 0.7123190760612488, + "learning_rate": 1.131876872423957e-05, + "loss": 2.3919, + "step": 16960 + }, + { + "epoch": 1.3688160761843273, + "grad_norm": 0.6615463495254517, + "learning_rate": 1.1311474233965214e-05, + "loss": 2.4266, + "step": 16961 + }, + { + "epoch": 1.3688967799209104, + "grad_norm": 0.7320190668106079, + "learning_rate": 1.130418195404076e-05, + "loss": 2.4268, + "step": 16962 + }, + { + "epoch": 1.3689774836574933, + "grad_norm": 0.6845116019248962, + "learning_rate": 1.1296891884647965e-05, + "loss": 2.3972, + "step": 16963 + }, + { + "epoch": 1.3690581873940764, + "grad_norm": 0.70455002784729, + "learning_rate": 1.1289604025968448e-05, + "loss": 2.4183, + "step": 16964 + }, + { + "epoch": 1.3691388911306595, + "grad_norm": 0.6952407956123352, + "learning_rate": 1.128231837818392e-05, + "loss": 2.4276, + "step": 16965 + }, + { + "epoch": 1.3692195948672423, + "grad_norm": 0.7939464449882507, + "learning_rate": 1.1275034941475938e-05, + "loss": 2.4072, + "step": 16966 + }, + { + "epoch": 1.3693002986038254, + "grad_norm": 0.6974930763244629, + "learning_rate": 1.1267753716026007e-05, + "loss": 2.4133, + "step": 16967 + }, + { + "epoch": 1.3693810023404085, + "grad_norm": 0.7187508344650269, + "learning_rate": 1.126047470201559e-05, + "loss": 2.3588, + "step": 16968 + }, + { + "epoch": 1.3694617060769914, + "grad_norm": 0.6887609958648682, + "learning_rate": 1.1253197899626134e-05, + "loss": 2.4322, + "step": 16969 + }, + { + "epoch": 1.3695424098135744, + "grad_norm": 0.679957389831543, + "learning_rate": 1.1245923309038964e-05, + "loss": 2.3907, + "step": 16970 + }, + { + "epoch": 1.3696231135501573, + "grad_norm": 0.7540870308876038, + "learning_rate": 1.1238650930435378e-05, + "loss": 2.4752, + "step": 16971 + }, + { + "epoch": 1.3697038172867404, + "grad_norm": 0.7697634100914001, + "learning_rate": 1.1231380763996635e-05, + "loss": 2.4366, + "step": 16972 + }, + { + "epoch": 1.3697845210233233, + "grad_norm": 0.6836850643157959, + "learning_rate": 1.1224112809903954e-05, + "loss": 2.3511, + "step": 16973 + }, + { + "epoch": 1.3698652247599064, + "grad_norm": 0.6904506683349609, + "learning_rate": 1.1216847068338421e-05, + "loss": 2.4109, + "step": 16974 + }, + { + "epoch": 1.3699459284964894, + "grad_norm": 0.6579318046569824, + "learning_rate": 1.1209583539481127e-05, + "loss": 2.4391, + "step": 16975 + }, + { + "epoch": 1.3700266322330723, + "grad_norm": 0.7107192277908325, + "learning_rate": 1.120232222351314e-05, + "loss": 2.399, + "step": 16976 + }, + { + "epoch": 1.3701073359696554, + "grad_norm": 0.7581583261489868, + "learning_rate": 1.119506312061539e-05, + "loss": 2.4817, + "step": 16977 + }, + { + "epoch": 1.3701880397062385, + "grad_norm": 0.6836642622947693, + "learning_rate": 1.11878062309688e-05, + "loss": 2.4415, + "step": 16978 + }, + { + "epoch": 1.3702687434428213, + "grad_norm": 0.6842699646949768, + "learning_rate": 1.118055155475426e-05, + "loss": 2.4045, + "step": 16979 + }, + { + "epoch": 1.3703494471794044, + "grad_norm": 0.7630519270896912, + "learning_rate": 1.1173299092152534e-05, + "loss": 2.4314, + "step": 16980 + }, + { + "epoch": 1.3704301509159875, + "grad_norm": 0.7334303259849548, + "learning_rate": 1.116604884334439e-05, + "loss": 2.3564, + "step": 16981 + }, + { + "epoch": 1.3705108546525704, + "grad_norm": 0.6929439306259155, + "learning_rate": 1.1158800808510538e-05, + "loss": 2.4258, + "step": 16982 + }, + { + "epoch": 1.3705915583891535, + "grad_norm": 0.6387187838554382, + "learning_rate": 1.1151554987831591e-05, + "loss": 2.3263, + "step": 16983 + }, + { + "epoch": 1.3706722621257363, + "grad_norm": 0.7279032468795776, + "learning_rate": 1.1144311381488136e-05, + "loss": 2.4074, + "step": 16984 + }, + { + "epoch": 1.3707529658623194, + "grad_norm": 0.7066916227340698, + "learning_rate": 1.113706998966072e-05, + "loss": 2.4358, + "step": 16985 + }, + { + "epoch": 1.3708336695989023, + "grad_norm": 0.6753098964691162, + "learning_rate": 1.1129830812529807e-05, + "loss": 2.4195, + "step": 16986 + }, + { + "epoch": 1.3709143733354854, + "grad_norm": 0.6728894114494324, + "learning_rate": 1.112259385027582e-05, + "loss": 2.3712, + "step": 16987 + }, + { + "epoch": 1.3709950770720685, + "grad_norm": 0.7251775860786438, + "learning_rate": 1.1115359103079115e-05, + "loss": 2.4063, + "step": 16988 + }, + { + "epoch": 1.3710757808086513, + "grad_norm": 0.6797254085540771, + "learning_rate": 1.1108126571120036e-05, + "loss": 2.395, + "step": 16989 + }, + { + "epoch": 1.3711564845452344, + "grad_norm": 0.7505605220794678, + "learning_rate": 1.1100896254578786e-05, + "loss": 2.4044, + "step": 16990 + }, + { + "epoch": 1.3712371882818175, + "grad_norm": 0.7126416563987732, + "learning_rate": 1.1093668153635594e-05, + "loss": 2.4043, + "step": 16991 + }, + { + "epoch": 1.3713178920184004, + "grad_norm": 0.6550771594047546, + "learning_rate": 1.1086442268470609e-05, + "loss": 2.3515, + "step": 16992 + }, + { + "epoch": 1.3713985957549835, + "grad_norm": 0.7253621816635132, + "learning_rate": 1.1079218599263874e-05, + "loss": 2.4109, + "step": 16993 + }, + { + "epoch": 1.3714792994915666, + "grad_norm": 0.7272186875343323, + "learning_rate": 1.1071997146195468e-05, + "loss": 2.3531, + "step": 16994 + }, + { + "epoch": 1.3715600032281494, + "grad_norm": 0.6841129660606384, + "learning_rate": 1.1064777909445345e-05, + "loss": 2.4031, + "step": 16995 + }, + { + "epoch": 1.3716407069647325, + "grad_norm": 0.692945659160614, + "learning_rate": 1.1057560889193441e-05, + "loss": 2.3858, + "step": 16996 + }, + { + "epoch": 1.3717214107013156, + "grad_norm": 0.721182644367218, + "learning_rate": 1.1050346085619612e-05, + "loss": 2.3871, + "step": 16997 + }, + { + "epoch": 1.3718021144378985, + "grad_norm": 0.722960889339447, + "learning_rate": 1.1043133498903702e-05, + "loss": 2.3452, + "step": 16998 + }, + { + "epoch": 1.3718828181744815, + "grad_norm": 0.7148451805114746, + "learning_rate": 1.1035923129225412e-05, + "loss": 2.3905, + "step": 16999 + }, + { + "epoch": 1.3719635219110644, + "grad_norm": 0.7118532061576843, + "learning_rate": 1.1028714976764486e-05, + "loss": 2.3894, + "step": 17000 + }, + { + "epoch": 1.3719635219110644, + "eval_loss": 2.3730249404907227, + "eval_runtime": 769.4165, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.568, + "step": 17000 + } + ], + "logging_steps": 1, + "max_steps": 20000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.95407905485312e+17, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-17000/training_args.bin b/out/checkpoint-17000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae4a8b118e2a671c30e37a5d24a42d8090b49055 --- /dev/null +++ b/out/checkpoint-17000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2928f4418c9a306cbe65ca0c1b156ae660c125ec9122008a9f527a50891704 +size 5112 diff --git a/out/checkpoint-18000/config.json b/out/checkpoint-18000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..16f06bb1cdbf882eb90d57ea1906b3790e298a3f --- /dev/null +++ b/out/checkpoint-18000/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "./models/checkpoint-10000", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1877, + "pad_token_id": 1026, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 6027 +} diff --git a/out/checkpoint-18000/generation_config.json b/out/checkpoint-18000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..51f4dbe1c89cfa9da69401685604ff16254d9d20 --- /dev/null +++ b/out/checkpoint-18000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "pad_token_id": 1026, + "transformers_version": "4.41.2" +} diff --git a/out/checkpoint-18000/model.safetensors b/out/checkpoint-18000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..50d8f3b91e08f5a41345ac59ee2384403ed5b443 --- /dev/null +++ b/out/checkpoint-18000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:198fc04ddbaa953c5bcc9a4e3599dd47c8c90f85dddfd34a59e22b5a84b6a590 +size 364520064 diff --git a/out/checkpoint-18000/optimizer.pt b/out/checkpoint-18000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bae89b83469a628b42963fe989449356cf806b4 --- /dev/null +++ b/out/checkpoint-18000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35be891a7563a210bf433f95cd5e7c0ac2001a1c82dfded1743d075e7bcf189c +size 729134010 diff --git a/out/checkpoint-18000/rng_state.pth b/out/checkpoint-18000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7570a9f1324838cc9c69ac4ec0d8217778304a0e --- /dev/null +++ b/out/checkpoint-18000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6db0bb394067f9edd733615c5d1f1d6ecc586cc8ad6604014075d45f865eb6 +size 14244 diff --git a/out/checkpoint-18000/scheduler.pt b/out/checkpoint-18000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..56f1678e265b6a87d18ba96540e88992b0cdd790 --- /dev/null +++ b/out/checkpoint-18000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e257775ded399712f9738ac755f4bbc39ac3bdaa44edd0cf546d60bda535ba +size 1064 diff --git a/out/checkpoint-18000/special_tokens_map.json b/out/checkpoint-18000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9fa6207c25267215ce16bfacdcb9089df3e897 --- /dev/null +++ b/out/checkpoint-18000/special_tokens_map.json @@ -0,0 +1,9 @@ +{ + "pad_token": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-18000/tokenizer.json b/out/checkpoint-18000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..2bf66a33fda75b69f9b1a9597987f418f5acfb49 --- /dev/null +++ b/out/checkpoint-18000/tokenizer.json @@ -0,0 +1,20279 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|audio:0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|audio:1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|audio:2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "<|audio:3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "<|audio:4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 5, + "content": "<|audio:5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 6, + "content": "<|audio:6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 7, + "content": "<|audio:7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 8, + "content": "<|audio:8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 9, + "content": "<|audio:9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 10, + "content": "<|audio:10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 11, + "content": "<|audio:11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 12, + "content": "<|audio:12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 13, + "content": "<|audio:13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 14, + "content": "<|audio:14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 15, + "content": "<|audio:15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 16, + "content": "<|audio:16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 17, + "content": "<|audio:17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 18, + "content": "<|audio:18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 19, + "content": "<|audio:19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 20, + "content": "<|audio:20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 21, + "content": "<|audio:21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 22, + "content": "<|audio:22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 23, + "content": "<|audio:23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 24, + "content": "<|audio:24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 25, + "content": "<|audio:25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 26, + "content": "<|audio:26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 27, + "content": "<|audio:27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 28, + "content": "<|audio:28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 29, + "content": "<|audio:29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 30, + "content": "<|audio:30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 31, + "content": "<|audio:31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 32, + "content": "<|audio:32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 33, + "content": "<|audio:33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 34, + "content": "<|audio:34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 35, + "content": "<|audio:35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 36, + "content": "<|audio:36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 37, + "content": "<|audio:37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 38, + "content": "<|audio:38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 39, + "content": "<|audio:39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 40, + "content": "<|audio:40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 41, + "content": "<|audio:41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 42, + "content": "<|audio:42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 43, + "content": "<|audio:43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 44, + "content": "<|audio:44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 45, + "content": "<|audio:45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 46, + "content": "<|audio:46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 47, + "content": "<|audio:47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 48, + "content": "<|audio:48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 49, + "content": "<|audio:49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 50, + "content": "<|audio:50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 51, + "content": "<|audio:51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 52, + "content": "<|audio:52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 53, + "content": "<|audio:53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 54, + "content": "<|audio:54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 55, + "content": "<|audio:55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 56, + "content": "<|audio:56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 57, + "content": "<|audio:57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 58, + "content": "<|audio:58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 59, + "content": "<|audio:59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 60, + "content": "<|audio:60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 61, + "content": "<|audio:61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 62, + "content": "<|audio:62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 63, + "content": "<|audio:63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 64, + "content": "<|audio:64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 65, + "content": "<|audio:65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 66, + "content": "<|audio:66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 67, + "content": "<|audio:67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 68, + "content": "<|audio:68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 69, + "content": "<|audio:69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 70, + "content": "<|audio:70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 71, + "content": "<|audio:71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 72, + "content": "<|audio:72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 73, + "content": "<|audio:73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 74, + "content": "<|audio:74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 75, + "content": "<|audio:75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 76, + "content": "<|audio:76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 77, + "content": "<|audio:77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 78, + "content": "<|audio:78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 79, + "content": "<|audio:79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 80, + "content": "<|audio:80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 81, + "content": "<|audio:81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 82, + "content": "<|audio:82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 83, + "content": "<|audio:83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 84, + "content": "<|audio:84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 85, + "content": "<|audio:85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 86, + "content": "<|audio:86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 87, + "content": "<|audio:87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 88, + "content": "<|audio:88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 89, + "content": "<|audio:89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 90, + "content": "<|audio:90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 91, + "content": "<|audio:91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 92, + "content": "<|audio:92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 93, + "content": "<|audio:93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 94, + "content": "<|audio:94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 95, + "content": "<|audio:95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 96, + "content": "<|audio:96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 97, + "content": "<|audio:97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 98, + "content": "<|audio:98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 99, + "content": "<|audio:99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 100, + "content": "<|audio:100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 101, + "content": "<|audio:101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 102, + "content": "<|audio:102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 103, + "content": "<|audio:103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 104, + "content": "<|audio:104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 105, + "content": "<|audio:105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 106, + "content": "<|audio:106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 107, + "content": "<|audio:107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 108, + "content": "<|audio:108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 109, + "content": "<|audio:109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 110, + "content": "<|audio:110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 111, + "content": "<|audio:111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 112, + "content": "<|audio:112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 113, + "content": "<|audio:113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 114, + "content": "<|audio:114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 115, + "content": "<|audio:115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 116, + "content": "<|audio:116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 117, + "content": "<|audio:117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 118, + "content": "<|audio:118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 119, + "content": "<|audio:119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 120, + "content": "<|audio:120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 121, + "content": "<|audio:121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 122, + "content": "<|audio:122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 123, + "content": "<|audio:123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 124, + "content": "<|audio:124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 125, + "content": "<|audio:125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 126, + "content": "<|audio:126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127, + "content": "<|audio:127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128, + "content": "<|audio:128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 129, + "content": "<|audio:129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 130, + "content": "<|audio:130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 131, + "content": "<|audio:131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 132, + "content": "<|audio:132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 133, + "content": "<|audio:133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 134, + "content": "<|audio:134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 135, + "content": "<|audio:135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 136, + "content": "<|audio:136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 137, + "content": "<|audio:137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 138, + "content": "<|audio:138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 139, + "content": "<|audio:139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 140, + "content": "<|audio:140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 141, + "content": "<|audio:141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 142, + "content": "<|audio:142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 143, + "content": "<|audio:143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 144, + "content": "<|audio:144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 145, + "content": "<|audio:145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 146, + "content": "<|audio:146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 147, + "content": "<|audio:147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 148, + "content": "<|audio:148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 149, + "content": "<|audio:149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 150, + "content": "<|audio:150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 151, + "content": "<|audio:151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 152, + "content": "<|audio:152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 153, + "content": "<|audio:153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 154, + "content": "<|audio:154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 155, + "content": "<|audio:155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 156, + "content": "<|audio:156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 157, + "content": "<|audio:157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 158, + "content": "<|audio:158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 159, + "content": "<|audio:159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 160, + "content": "<|audio:160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 161, + "content": "<|audio:161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 162, + "content": "<|audio:162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 163, + "content": "<|audio:163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 164, + "content": "<|audio:164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 165, + "content": "<|audio:165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 166, + "content": "<|audio:166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 167, + "content": "<|audio:167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 168, + "content": "<|audio:168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 169, + "content": "<|audio:169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 170, + "content": "<|audio:170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 171, + "content": "<|audio:171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 172, + "content": "<|audio:172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 173, + "content": "<|audio:173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 174, + "content": "<|audio:174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 175, + "content": "<|audio:175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 176, + "content": "<|audio:176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 177, + "content": "<|audio:177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 178, + "content": "<|audio:178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 179, + "content": "<|audio:179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 180, + "content": "<|audio:180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 181, + "content": "<|audio:181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 182, + "content": "<|audio:182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 183, + "content": "<|audio:183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 184, + "content": "<|audio:184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 185, + "content": "<|audio:185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 186, + "content": "<|audio:186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 187, + "content": "<|audio:187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 188, + "content": "<|audio:188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 189, + "content": "<|audio:189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 190, + "content": "<|audio:190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 191, + "content": "<|audio:191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 192, + "content": "<|audio:192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 193, + "content": "<|audio:193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 194, + "content": "<|audio:194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 195, + "content": "<|audio:195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 196, + "content": "<|audio:196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 197, + "content": "<|audio:197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 198, + "content": "<|audio:198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 199, + "content": "<|audio:199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 200, + "content": "<|audio:200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 201, + "content": "<|audio:201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 202, + "content": "<|audio:202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 203, + "content": "<|audio:203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 204, + "content": "<|audio:204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 205, + "content": "<|audio:205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 206, + "content": "<|audio:206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 207, + "content": "<|audio:207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 208, + "content": "<|audio:208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 209, + "content": "<|audio:209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 210, + "content": "<|audio:210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 211, + "content": "<|audio:211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 212, + "content": "<|audio:212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 213, + "content": "<|audio:213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 214, + "content": "<|audio:214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 215, + "content": "<|audio:215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 216, + "content": "<|audio:216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 217, + "content": "<|audio:217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 218, + "content": "<|audio:218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 219, + "content": "<|audio:219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 220, + "content": "<|audio:220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 221, + "content": "<|audio:221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 222, + "content": "<|audio:222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 223, + "content": "<|audio:223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 224, + "content": "<|audio:224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 225, + "content": "<|audio:225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 226, + "content": "<|audio:226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 227, + "content": "<|audio:227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 228, + "content": "<|audio:228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 229, + "content": "<|audio:229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 230, + "content": "<|audio:230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 231, + "content": "<|audio:231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 232, + "content": "<|audio:232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 233, + "content": "<|audio:233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 234, + "content": "<|audio:234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 235, + "content": "<|audio:235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 236, + "content": "<|audio:236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 237, + "content": "<|audio:237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 238, + "content": "<|audio:238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 239, + "content": "<|audio:239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 240, + "content": "<|audio:240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 241, + "content": "<|audio:241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 242, + "content": "<|audio:242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 243, + "content": "<|audio:243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 244, + "content": "<|audio:244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 245, + "content": "<|audio:245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 246, + "content": "<|audio:246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 247, + "content": "<|audio:247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 248, + "content": "<|audio:248|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 249, + "content": "<|audio:249|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 250, + "content": "<|audio:250|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 251, + "content": "<|audio:251|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 252, + "content": "<|audio:252|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 253, + "content": "<|audio:253|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 254, + "content": "<|audio:254|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 255, + "content": "<|audio:255|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 256, + "content": "<|audio:256|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 257, + "content": "<|audio:257|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 258, + "content": "<|audio:258|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "<|audio:259|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 260, + "content": "<|audio:260|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 261, + "content": "<|audio:261|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 262, + "content": "<|audio:262|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 263, + "content": "<|audio:263|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 264, + "content": "<|audio:264|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 265, + "content": "<|audio:265|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 266, + "content": "<|audio:266|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 267, + "content": "<|audio:267|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 268, + "content": "<|audio:268|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 269, + "content": "<|audio:269|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 270, + "content": "<|audio:270|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 271, + "content": "<|audio:271|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 272, + "content": "<|audio:272|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 273, + "content": "<|audio:273|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 274, + "content": "<|audio:274|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 275, + "content": "<|audio:275|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 276, + "content": "<|audio:276|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 277, + "content": "<|audio:277|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 278, + "content": "<|audio:278|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 279, + "content": "<|audio:279|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 280, + "content": "<|audio:280|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 281, + "content": "<|audio:281|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 282, + "content": "<|audio:282|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 283, + "content": "<|audio:283|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 284, + "content": "<|audio:284|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 285, + "content": "<|audio:285|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 286, + "content": "<|audio:286|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 287, + "content": "<|audio:287|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 288, + "content": "<|audio:288|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 289, + "content": "<|audio:289|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 290, + "content": "<|audio:290|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 291, + "content": "<|audio:291|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 292, + "content": "<|audio:292|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 293, + "content": "<|audio:293|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 294, + "content": "<|audio:294|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 295, + "content": "<|audio:295|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 296, + "content": "<|audio:296|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 297, + "content": "<|audio:297|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 298, + "content": "<|audio:298|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 299, + "content": "<|audio:299|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 300, + "content": "<|audio:300|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 301, + "content": "<|audio:301|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 302, + "content": "<|audio:302|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 303, + "content": "<|audio:303|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 304, + "content": "<|audio:304|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 305, + "content": "<|audio:305|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 306, + "content": "<|audio:306|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 307, + "content": "<|audio:307|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 308, + "content": "<|audio:308|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 309, + "content": "<|audio:309|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 310, + "content": "<|audio:310|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 311, + "content": "<|audio:311|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 312, + "content": "<|audio:312|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 313, + "content": "<|audio:313|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 314, + "content": "<|audio:314|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 315, + "content": "<|audio:315|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 316, + "content": "<|audio:316|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 317, + "content": "<|audio:317|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 318, + "content": "<|audio:318|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 319, + "content": "<|audio:319|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 320, + "content": "<|audio:320|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 321, + "content": "<|audio:321|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 322, + "content": "<|audio:322|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 323, + "content": "<|audio:323|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 324, + "content": "<|audio:324|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 325, + "content": "<|audio:325|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 326, + "content": "<|audio:326|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 327, + "content": "<|audio:327|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 328, + "content": "<|audio:328|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 329, + "content": "<|audio:329|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 330, + "content": "<|audio:330|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 331, + "content": "<|audio:331|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 332, + "content": "<|audio:332|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 333, + "content": "<|audio:333|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 334, + "content": "<|audio:334|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 335, + "content": "<|audio:335|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 336, + "content": "<|audio:336|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 337, + "content": "<|audio:337|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 338, + "content": "<|audio:338|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 339, + "content": "<|audio:339|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 340, + "content": "<|audio:340|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 341, + "content": "<|audio:341|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 342, + "content": "<|audio:342|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 343, + "content": "<|audio:343|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 344, + "content": "<|audio:344|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 345, + "content": "<|audio:345|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 346, + "content": "<|audio:346|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 347, + "content": "<|audio:347|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 348, + "content": "<|audio:348|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 349, + "content": "<|audio:349|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 350, + "content": "<|audio:350|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 351, + "content": "<|audio:351|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 352, + "content": "<|audio:352|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 353, + "content": "<|audio:353|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 354, + "content": "<|audio:354|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 355, + "content": "<|audio:355|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 356, + "content": "<|audio:356|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 357, + "content": "<|audio:357|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 358, + "content": "<|audio:358|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 359, + "content": "<|audio:359|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 360, + "content": "<|audio:360|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 361, + "content": "<|audio:361|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 362, + "content": "<|audio:362|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 363, + "content": "<|audio:363|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 364, + "content": "<|audio:364|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 365, + "content": "<|audio:365|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 366, + "content": "<|audio:366|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 367, + "content": "<|audio:367|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 368, + "content": "<|audio:368|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 369, + "content": "<|audio:369|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 370, + "content": "<|audio:370|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 371, + "content": "<|audio:371|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 372, + "content": "<|audio:372|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 373, + "content": "<|audio:373|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 374, + "content": "<|audio:374|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 375, + "content": "<|audio:375|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 376, + "content": "<|audio:376|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 377, + "content": "<|audio:377|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 378, + "content": "<|audio:378|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 379, + "content": "<|audio:379|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 380, + "content": "<|audio:380|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 381, + "content": "<|audio:381|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 382, + "content": "<|audio:382|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 383, + "content": "<|audio:383|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 384, + "content": "<|audio:384|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 385, + "content": "<|audio:385|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 386, + "content": "<|audio:386|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 387, + "content": "<|audio:387|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 388, + "content": "<|audio:388|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 389, + "content": "<|audio:389|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 390, + "content": "<|audio:390|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 391, + "content": "<|audio:391|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 392, + "content": "<|audio:392|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 393, + "content": "<|audio:393|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 394, + "content": "<|audio:394|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 395, + "content": "<|audio:395|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 396, + "content": "<|audio:396|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 397, + "content": "<|audio:397|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 398, + "content": "<|audio:398|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 399, + "content": "<|audio:399|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 400, + "content": "<|audio:400|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 401, + "content": "<|audio:401|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 402, + "content": "<|audio:402|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 403, + "content": "<|audio:403|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 404, + "content": "<|audio:404|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 405, + "content": "<|audio:405|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 406, + "content": "<|audio:406|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 407, + "content": "<|audio:407|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 408, + "content": "<|audio:408|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 409, + "content": "<|audio:409|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 410, + "content": "<|audio:410|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 411, + "content": "<|audio:411|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 412, + "content": "<|audio:412|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 413, + "content": "<|audio:413|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 414, + "content": "<|audio:414|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 415, + "content": "<|audio:415|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 416, + "content": "<|audio:416|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 417, + "content": "<|audio:417|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 418, + "content": "<|audio:418|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 419, + "content": "<|audio:419|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 420, + "content": "<|audio:420|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 421, + "content": "<|audio:421|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 422, + "content": "<|audio:422|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 423, + "content": "<|audio:423|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 424, + "content": "<|audio:424|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 425, + "content": "<|audio:425|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 426, + "content": "<|audio:426|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 427, + "content": "<|audio:427|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 428, + "content": "<|audio:428|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 429, + "content": "<|audio:429|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 430, + "content": "<|audio:430|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 431, + "content": "<|audio:431|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 432, + "content": "<|audio:432|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 433, + "content": "<|audio:433|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 434, + "content": "<|audio:434|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 435, + "content": "<|audio:435|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 436, + "content": "<|audio:436|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 437, + "content": "<|audio:437|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 438, + "content": "<|audio:438|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 439, + "content": "<|audio:439|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 440, + "content": "<|audio:440|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 441, + "content": "<|audio:441|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 442, + "content": "<|audio:442|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 443, + "content": "<|audio:443|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 444, + "content": "<|audio:444|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 445, + "content": "<|audio:445|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 446, + "content": "<|audio:446|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 447, + "content": "<|audio:447|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 448, + "content": "<|audio:448|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 449, + "content": "<|audio:449|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 450, + "content": "<|audio:450|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 451, + "content": "<|audio:451|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 452, + "content": "<|audio:452|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 453, + "content": "<|audio:453|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 454, + "content": "<|audio:454|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 455, + "content": "<|audio:455|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 456, + "content": "<|audio:456|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 457, + "content": "<|audio:457|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 458, + "content": "<|audio:458|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 459, + "content": "<|audio:459|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 460, + "content": "<|audio:460|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 461, + "content": "<|audio:461|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 462, + "content": "<|audio:462|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 463, + "content": "<|audio:463|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 464, + "content": "<|audio:464|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 465, + "content": "<|audio:465|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 466, + "content": "<|audio:466|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 467, + "content": "<|audio:467|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 468, + "content": "<|audio:468|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 469, + "content": "<|audio:469|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 470, + "content": "<|audio:470|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 471, + "content": "<|audio:471|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 472, + "content": "<|audio:472|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 473, + "content": "<|audio:473|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 474, + "content": "<|audio:474|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 475, + "content": "<|audio:475|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 476, + "content": "<|audio:476|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 477, + "content": "<|audio:477|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 478, + "content": "<|audio:478|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 479, + "content": "<|audio:479|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 480, + "content": "<|audio:480|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 481, + "content": "<|audio:481|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 482, + "content": "<|audio:482|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 483, + "content": "<|audio:483|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 484, + "content": "<|audio:484|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 485, + "content": "<|audio:485|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 486, + "content": "<|audio:486|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 487, + "content": "<|audio:487|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 488, + "content": "<|audio:488|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 489, + "content": "<|audio:489|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 490, + "content": "<|audio:490|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 491, + "content": "<|audio:491|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 492, + "content": "<|audio:492|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 493, + "content": "<|audio:493|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 494, + "content": "<|audio:494|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 495, + "content": "<|audio:495|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 496, + "content": "<|audio:496|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 497, + "content": "<|audio:497|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 498, + "content": "<|audio:498|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 499, + "content": "<|audio:499|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 500, + "content": "<|audio:500|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 501, + "content": "<|audio:501|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 502, + "content": "<|audio:502|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 503, + "content": "<|audio:503|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 504, + "content": "<|audio:504|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 505, + "content": "<|audio:505|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 506, + "content": "<|audio:506|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 507, + "content": "<|audio:507|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 508, + "content": "<|audio:508|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 509, + "content": "<|audio:509|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 510, + "content": "<|audio:510|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 511, + "content": "<|audio:511|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 512, + "content": "<|audio:512|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 513, + "content": "<|audio:513|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 514, + "content": "<|audio:514|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 515, + "content": "<|audio:515|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 516, + "content": "<|audio:516|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 517, + "content": "<|audio:517|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 518, + "content": "<|audio:518|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 519, + "content": "<|audio:519|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 520, + "content": "<|audio:520|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 521, + "content": "<|audio:521|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 522, + "content": "<|audio:522|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 523, + "content": "<|audio:523|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 524, + "content": "<|audio:524|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 525, + "content": "<|audio:525|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 526, + "content": "<|audio:526|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 527, + "content": "<|audio:527|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 528, + "content": "<|audio:528|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 529, + "content": "<|audio:529|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 530, + "content": "<|audio:530|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 531, + "content": "<|audio:531|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 532, + "content": "<|audio:532|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 533, + "content": "<|audio:533|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 534, + "content": "<|audio:534|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 535, + "content": "<|audio:535|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 536, + "content": "<|audio:536|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 537, + "content": "<|audio:537|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 538, + "content": "<|audio:538|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 539, + "content": "<|audio:539|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 540, + "content": "<|audio:540|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 541, + "content": "<|audio:541|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 542, + "content": "<|audio:542|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 543, + "content": "<|audio:543|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 544, + "content": "<|audio:544|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 545, + "content": "<|audio:545|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 546, + "content": "<|audio:546|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 547, + "content": "<|audio:547|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 548, + "content": "<|audio:548|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 549, + "content": "<|audio:549|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 550, + "content": "<|audio:550|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 551, + "content": "<|audio:551|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 552, + "content": "<|audio:552|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 553, + "content": "<|audio:553|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 554, + "content": "<|audio:554|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 555, + "content": "<|audio:555|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 556, + "content": "<|audio:556|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 557, + "content": "<|audio:557|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 558, + "content": "<|audio:558|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 559, + "content": "<|audio:559|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 560, + "content": "<|audio:560|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 561, + "content": "<|audio:561|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 562, + "content": "<|audio:562|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 563, + "content": "<|audio:563|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 564, + "content": "<|audio:564|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 565, + "content": "<|audio:565|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 566, + "content": "<|audio:566|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 567, + "content": "<|audio:567|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 568, + "content": "<|audio:568|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 569, + "content": "<|audio:569|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 570, + "content": "<|audio:570|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 571, + "content": "<|audio:571|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 572, + "content": "<|audio:572|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 573, + "content": "<|audio:573|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 574, + "content": "<|audio:574|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 575, + "content": "<|audio:575|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 576, + "content": "<|audio:576|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 577, + "content": "<|audio:577|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 578, + "content": "<|audio:578|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 579, + "content": "<|audio:579|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 580, + "content": "<|audio:580|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 581, + "content": "<|audio:581|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 582, + "content": "<|audio:582|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 583, + "content": "<|audio:583|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 584, + "content": "<|audio:584|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 585, + "content": "<|audio:585|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 586, + "content": "<|audio:586|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 587, + "content": "<|audio:587|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 588, + "content": "<|audio:588|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 589, + "content": "<|audio:589|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 590, + "content": "<|audio:590|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 591, + "content": "<|audio:591|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 592, + "content": "<|audio:592|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 593, + "content": "<|audio:593|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 594, + "content": "<|audio:594|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 595, + "content": "<|audio:595|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 596, + "content": "<|audio:596|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 597, + "content": "<|audio:597|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 598, + "content": "<|audio:598|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 599, + "content": "<|audio:599|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 600, + "content": "<|audio:600|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 601, + "content": "<|audio:601|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 602, + "content": "<|audio:602|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 603, + "content": "<|audio:603|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 604, + "content": "<|audio:604|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "<|audio:605|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "<|audio:606|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "<|audio:607|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "<|audio:608|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "<|audio:609|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "<|audio:610|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "<|audio:611|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "<|audio:612|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "<|audio:613|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "<|audio:614|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "<|audio:615|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "<|audio:616|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "<|audio:617|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "<|audio:618|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "<|audio:619|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "<|audio:620|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "<|audio:621|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "<|audio:622|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "<|audio:623|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "<|audio:624|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "<|audio:625|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "<|audio:626|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "<|audio:627|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "<|audio:628|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "<|audio:629|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "<|audio:630|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "<|audio:631|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "<|audio:632|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "<|audio:633|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "<|audio:634|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "<|audio:635|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "<|audio:636|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "<|audio:637|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "<|audio:638|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "<|audio:639|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 640, + "content": "<|audio:640|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 641, + "content": "<|audio:641|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 642, + "content": "<|audio:642|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 643, + "content": "<|audio:643|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 644, + "content": "<|audio:644|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 645, + "content": "<|audio:645|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 646, + "content": "<|audio:646|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 647, + "content": "<|audio:647|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 648, + "content": "<|audio:648|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 649, + "content": "<|audio:649|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 650, + "content": "<|audio:650|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 651, + "content": "<|audio:651|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 652, + "content": "<|audio:652|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 653, + "content": "<|audio:653|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 654, + "content": "<|audio:654|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 655, + "content": "<|audio:655|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 656, + "content": "<|audio:656|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 657, + "content": "<|audio:657|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 658, + "content": "<|audio:658|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 659, + "content": "<|audio:659|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 660, + "content": "<|audio:660|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 661, + "content": "<|audio:661|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 662, + "content": "<|audio:662|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 663, + "content": "<|audio:663|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 664, + "content": "<|audio:664|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 665, + "content": "<|audio:665|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 666, + "content": "<|audio:666|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 667, + "content": "<|audio:667|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 668, + "content": "<|audio:668|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 669, + "content": "<|audio:669|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 670, + "content": "<|audio:670|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 671, + "content": "<|audio:671|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 672, + "content": "<|audio:672|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 673, + "content": "<|audio:673|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 674, + "content": "<|audio:674|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 675, + "content": "<|audio:675|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 676, + "content": "<|audio:676|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 677, + "content": "<|audio:677|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 678, + "content": "<|audio:678|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 679, + "content": "<|audio:679|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 680, + "content": "<|audio:680|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 681, + "content": "<|audio:681|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 682, + "content": "<|audio:682|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 683, + "content": "<|audio:683|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 684, + "content": "<|audio:684|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 685, + "content": "<|audio:685|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 686, + "content": "<|audio:686|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 687, + "content": "<|audio:687|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 688, + "content": "<|audio:688|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 689, + "content": "<|audio:689|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 690, + "content": "<|audio:690|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 691, + "content": "<|audio:691|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 692, + "content": "<|audio:692|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 693, + "content": "<|audio:693|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 694, + "content": "<|audio:694|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "<|audio:695|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "<|audio:696|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "<|audio:697|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "<|audio:698|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "<|audio:699|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "<|audio:700|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "<|audio:701|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "<|audio:702|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "<|audio:703|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 704, + "content": "<|audio:704|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 705, + "content": "<|audio:705|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 706, + "content": "<|audio:706|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 707, + "content": "<|audio:707|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 708, + "content": "<|audio:708|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 709, + "content": "<|audio:709|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 710, + "content": "<|audio:710|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 711, + "content": "<|audio:711|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 712, + "content": "<|audio:712|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 713, + "content": "<|audio:713|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 714, + "content": "<|audio:714|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 715, + "content": "<|audio:715|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 716, + "content": "<|audio:716|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 717, + "content": "<|audio:717|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 718, + "content": "<|audio:718|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 719, + "content": "<|audio:719|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 720, + "content": "<|audio:720|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 721, + "content": "<|audio:721|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 722, + "content": "<|audio:722|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 723, + "content": "<|audio:723|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 724, + "content": "<|audio:724|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 725, + "content": "<|audio:725|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 726, + "content": "<|audio:726|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 727, + "content": "<|audio:727|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 728, + "content": "<|audio:728|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 729, + "content": "<|audio:729|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 730, + "content": "<|audio:730|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 731, + "content": "<|audio:731|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 732, + "content": "<|audio:732|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 733, + "content": "<|audio:733|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 734, + "content": "<|audio:734|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 735, + "content": "<|audio:735|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 736, + "content": "<|audio:736|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 737, + "content": "<|audio:737|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 738, + "content": "<|audio:738|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 739, + "content": "<|audio:739|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 740, + "content": "<|audio:740|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 741, + "content": "<|audio:741|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 742, + "content": "<|audio:742|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 743, + "content": "<|audio:743|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 744, + "content": "<|audio:744|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 745, + "content": "<|audio:745|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 746, + "content": "<|audio:746|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 747, + "content": "<|audio:747|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 748, + "content": "<|audio:748|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 749, + "content": "<|audio:749|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 750, + "content": "<|audio:750|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 751, + "content": "<|audio:751|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 752, + "content": "<|audio:752|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 753, + "content": "<|audio:753|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 754, + "content": "<|audio:754|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 755, + "content": "<|audio:755|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 756, + "content": "<|audio:756|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 757, + "content": "<|audio:757|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 758, + "content": "<|audio:758|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 759, + "content": "<|audio:759|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 760, + "content": "<|audio:760|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 761, + "content": "<|audio:761|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 762, + "content": "<|audio:762|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 763, + "content": "<|audio:763|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 764, + "content": "<|audio:764|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 765, + "content": "<|audio:765|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 766, + "content": "<|audio:766|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 767, + "content": "<|audio:767|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 768, + "content": "<|audio:768|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 769, + "content": "<|audio:769|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 770, + "content": "<|audio:770|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 771, + "content": "<|audio:771|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 772, + "content": "<|audio:772|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 773, + "content": "<|audio:773|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 774, + "content": "<|audio:774|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 775, + "content": "<|audio:775|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 776, + "content": "<|audio:776|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 777, + "content": "<|audio:777|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 778, + "content": "<|audio:778|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 779, + "content": "<|audio:779|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 780, + "content": "<|audio:780|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 781, + "content": "<|audio:781|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 782, + "content": "<|audio:782|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 783, + "content": "<|audio:783|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 784, + "content": "<|audio:784|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 785, + "content": "<|audio:785|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 786, + "content": "<|audio:786|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 787, + "content": "<|audio:787|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 788, + "content": "<|audio:788|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 789, + "content": "<|audio:789|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 790, + "content": "<|audio:790|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 791, + "content": "<|audio:791|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 792, + "content": "<|audio:792|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 793, + "content": "<|audio:793|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 794, + "content": "<|audio:794|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 795, + "content": "<|audio:795|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 796, + "content": "<|audio:796|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 797, + "content": "<|audio:797|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 798, + "content": "<|audio:798|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 799, + "content": "<|audio:799|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 800, + "content": "<|audio:800|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 801, + "content": "<|audio:801|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 802, + "content": "<|audio:802|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 803, + "content": "<|audio:803|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 804, + "content": "<|audio:804|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 805, + "content": "<|audio:805|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 806, + "content": "<|audio:806|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 807, + "content": "<|audio:807|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 808, + "content": "<|audio:808|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 809, + "content": "<|audio:809|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 810, + "content": "<|audio:810|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 811, + "content": "<|audio:811|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 812, + "content": "<|audio:812|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 813, + "content": "<|audio:813|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 814, + "content": "<|audio:814|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 815, + "content": "<|audio:815|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 816, + "content": "<|audio:816|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 817, + "content": "<|audio:817|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 818, + "content": "<|audio:818|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 819, + "content": "<|audio:819|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 820, + "content": "<|audio:820|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 821, + "content": "<|audio:821|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 822, + "content": "<|audio:822|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 823, + "content": "<|audio:823|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 824, + "content": "<|audio:824|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 825, + "content": "<|audio:825|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 826, + "content": "<|audio:826|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 827, + "content": "<|audio:827|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 828, + "content": "<|audio:828|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 829, + "content": "<|audio:829|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 830, + "content": "<|audio:830|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 831, + "content": "<|audio:831|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 832, + "content": "<|audio:832|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 833, + "content": "<|audio:833|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 834, + "content": "<|audio:834|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 835, + "content": "<|audio:835|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 836, + "content": "<|audio:836|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 837, + "content": "<|audio:837|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 838, + "content": "<|audio:838|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 839, + "content": "<|audio:839|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 840, + "content": "<|audio:840|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 841, + "content": "<|audio:841|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 842, + "content": "<|audio:842|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 843, + "content": "<|audio:843|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 844, + "content": "<|audio:844|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 845, + "content": "<|audio:845|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 846, + "content": "<|audio:846|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 847, + "content": "<|audio:847|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 848, + "content": "<|audio:848|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 849, + "content": "<|audio:849|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 850, + "content": "<|audio:850|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 851, + "content": "<|audio:851|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 852, + "content": "<|audio:852|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 853, + "content": "<|audio:853|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 854, + "content": "<|audio:854|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 855, + "content": "<|audio:855|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 856, + "content": "<|audio:856|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 857, + "content": "<|audio:857|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 858, + "content": "<|audio:858|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 859, + "content": "<|audio:859|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 860, + "content": "<|audio:860|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 861, + "content": "<|audio:861|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 862, + "content": "<|audio:862|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 863, + "content": "<|audio:863|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 864, + "content": "<|audio:864|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 865, + "content": "<|audio:865|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 866, + "content": "<|audio:866|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 867, + "content": "<|audio:867|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 868, + "content": "<|audio:868|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 869, + "content": "<|audio:869|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 870, + "content": "<|audio:870|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 871, + "content": "<|audio:871|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 872, + "content": "<|audio:872|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 873, + "content": "<|audio:873|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 874, + "content": "<|audio:874|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 875, + "content": "<|audio:875|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 876, + "content": "<|audio:876|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 877, + "content": "<|audio:877|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 878, + "content": "<|audio:878|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 879, + "content": "<|audio:879|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 880, + "content": "<|audio:880|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 881, + "content": "<|audio:881|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 882, + "content": "<|audio:882|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 883, + "content": "<|audio:883|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 884, + "content": "<|audio:884|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 885, + "content": "<|audio:885|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 886, + "content": "<|audio:886|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 887, + "content": "<|audio:887|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 888, + "content": "<|audio:888|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 889, + "content": "<|audio:889|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 890, + "content": "<|audio:890|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 891, + "content": "<|audio:891|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 892, + "content": "<|audio:892|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 893, + "content": "<|audio:893|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 894, + "content": "<|audio:894|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 895, + "content": "<|audio:895|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 896, + "content": "<|audio:896|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 897, + "content": "<|audio:897|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 898, + "content": "<|audio:898|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 899, + "content": "<|audio:899|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 900, + "content": "<|audio:900|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 901, + "content": "<|audio:901|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 902, + "content": "<|audio:902|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 903, + "content": "<|audio:903|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 904, + "content": "<|audio:904|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 905, + "content": "<|audio:905|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 906, + "content": "<|audio:906|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 907, + "content": "<|audio:907|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 908, + "content": "<|audio:908|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 909, + "content": "<|audio:909|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 910, + "content": "<|audio:910|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 911, + "content": "<|audio:911|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 912, + "content": "<|audio:912|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 913, + "content": "<|audio:913|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 914, + "content": "<|audio:914|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 915, + "content": "<|audio:915|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 916, + "content": "<|audio:916|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 917, + "content": "<|audio:917|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 918, + "content": "<|audio:918|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 919, + "content": "<|audio:919|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 920, + "content": "<|audio:920|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 921, + "content": "<|audio:921|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 922, + "content": "<|audio:922|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 923, + "content": "<|audio:923|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 924, + "content": "<|audio:924|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 925, + "content": "<|audio:925|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 926, + "content": "<|audio:926|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 927, + "content": "<|audio:927|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 928, + "content": "<|audio:928|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 929, + "content": "<|audio:929|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 930, + "content": "<|audio:930|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 931, + "content": "<|audio:931|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 932, + "content": "<|audio:932|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 933, + "content": "<|audio:933|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 934, + "content": "<|audio:934|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 935, + "content": "<|audio:935|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 936, + "content": "<|audio:936|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 937, + "content": "<|audio:937|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 938, + "content": "<|audio:938|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 939, + "content": "<|audio:939|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 940, + "content": "<|audio:940|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 941, + "content": "<|audio:941|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 942, + "content": "<|audio:942|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 943, + "content": "<|audio:943|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 944, + "content": "<|audio:944|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 945, + "content": "<|audio:945|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 946, + "content": "<|audio:946|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 947, + "content": "<|audio:947|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 948, + "content": "<|audio:948|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 949, + "content": "<|audio:949|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 950, + "content": "<|audio:950|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 951, + "content": "<|audio:951|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 952, + "content": "<|audio:952|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 953, + "content": "<|audio:953|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 954, + "content": "<|audio:954|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 955, + "content": "<|audio:955|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 956, + "content": "<|audio:956|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 957, + "content": "<|audio:957|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 958, + "content": "<|audio:958|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 959, + "content": "<|audio:959|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 960, + "content": "<|audio:960|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 961, + "content": "<|audio:961|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 962, + "content": "<|audio:962|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 963, + "content": "<|audio:963|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 964, + "content": "<|audio:964|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 965, + "content": "<|audio:965|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 966, + "content": "<|audio:966|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 967, + "content": "<|audio:967|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 968, + "content": "<|audio:968|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 969, + "content": "<|audio:969|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 970, + "content": "<|audio:970|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 971, + "content": "<|audio:971|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 972, + "content": "<|audio:972|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 973, + "content": "<|audio:973|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 974, + "content": "<|audio:974|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 975, + "content": "<|audio:975|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 976, + "content": "<|audio:976|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 977, + "content": "<|audio:977|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 978, + "content": "<|audio:978|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 979, + "content": "<|audio:979|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 980, + "content": "<|audio:980|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 981, + "content": "<|audio:981|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 982, + "content": "<|audio:982|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 983, + "content": "<|audio:983|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 984, + "content": "<|audio:984|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 985, + "content": "<|audio:985|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 986, + "content": "<|audio:986|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 987, + "content": "<|audio:987|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 988, + "content": "<|audio:988|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 989, + "content": "<|audio:989|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 990, + "content": "<|audio:990|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 991, + "content": "<|audio:991|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 992, + "content": "<|audio:992|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 993, + "content": "<|audio:993|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 994, + "content": "<|audio:994|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 995, + "content": "<|audio:995|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 996, + "content": "<|audio:996|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 997, + "content": "<|audio:997|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 998, + "content": "<|audio:998|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 999, + "content": "<|audio:999|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1000, + "content": "<|audio:1000|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1001, + "content": "<|audio:1001|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1002, + "content": "<|audio:1002|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1003, + "content": "<|audio:1003|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1004, + "content": "<|audio:1004|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1005, + "content": "<|audio:1005|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1006, + "content": "<|audio:1006|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1007, + "content": "<|audio:1007|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1008, + "content": "<|audio:1008|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1009, + "content": "<|audio:1009|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1010, + "content": "<|audio:1010|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1011, + "content": "<|audio:1011|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1012, + "content": "<|audio:1012|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1013, + "content": "<|audio:1013|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1014, + "content": "<|audio:1014|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1015, + "content": "<|audio:1015|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1016, + "content": "<|audio:1016|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1017, + "content": "<|audio:1017|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1018, + "content": "<|audio:1018|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1019, + "content": "<|audio:1019|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1020, + "content": "<|audio:1020|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1021, + "content": "<|audio:1021|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1022, + "content": "<|audio:1022|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1023, + "content": "<|audio:1023|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1024, + "content": "<|startoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1025, + "content": "<|endoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1026, + "content": "<|padding|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFKC" + }, + "pre_tokenizer": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "post_processor": null, + "decoder": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|audio:0|>": 0, + "<|audio:1|>": 1, + "<|audio:2|>": 2, + "<|audio:3|>": 3, + "<|audio:4|>": 4, + "<|audio:5|>": 5, + "<|audio:6|>": 6, + "<|audio:7|>": 7, + "<|audio:8|>": 8, + "<|audio:9|>": 9, + "<|audio:10|>": 10, + "<|audio:11|>": 11, + "<|audio:12|>": 12, + "<|audio:13|>": 13, + "<|audio:14|>": 14, + "<|audio:15|>": 15, + "<|audio:16|>": 16, + "<|audio:17|>": 17, + "<|audio:18|>": 18, + "<|audio:19|>": 19, + "<|audio:20|>": 20, + "<|audio:21|>": 21, + "<|audio:22|>": 22, + "<|audio:23|>": 23, + "<|audio:24|>": 24, + "<|audio:25|>": 25, + "<|audio:26|>": 26, + "<|audio:27|>": 27, + "<|audio:28|>": 28, + "<|audio:29|>": 29, + "<|audio:30|>": 30, + "<|audio:31|>": 31, + "<|audio:32|>": 32, + "<|audio:33|>": 33, + "<|audio:34|>": 34, + "<|audio:35|>": 35, + "<|audio:36|>": 36, + "<|audio:37|>": 37, + "<|audio:38|>": 38, + "<|audio:39|>": 39, + "<|audio:40|>": 40, + "<|audio:41|>": 41, + "<|audio:42|>": 42, + "<|audio:43|>": 43, + "<|audio:44|>": 44, + "<|audio:45|>": 45, + "<|audio:46|>": 46, + "<|audio:47|>": 47, + "<|audio:48|>": 48, + "<|audio:49|>": 49, + "<|audio:50|>": 50, + "<|audio:51|>": 51, + "<|audio:52|>": 52, + "<|audio:53|>": 53, + "<|audio:54|>": 54, + "<|audio:55|>": 55, + "<|audio:56|>": 56, + "<|audio:57|>": 57, + "<|audio:58|>": 58, + "<|audio:59|>": 59, + "<|audio:60|>": 60, + "<|audio:61|>": 61, + "<|audio:62|>": 62, + "<|audio:63|>": 63, + "<|audio:64|>": 64, + "<|audio:65|>": 65, + "<|audio:66|>": 66, + "<|audio:67|>": 67, + "<|audio:68|>": 68, + "<|audio:69|>": 69, + "<|audio:70|>": 70, + "<|audio:71|>": 71, + "<|audio:72|>": 72, + "<|audio:73|>": 73, + "<|audio:74|>": 74, + "<|audio:75|>": 75, + "<|audio:76|>": 76, + "<|audio:77|>": 77, + "<|audio:78|>": 78, + "<|audio:79|>": 79, + "<|audio:80|>": 80, + "<|audio:81|>": 81, + "<|audio:82|>": 82, + "<|audio:83|>": 83, + "<|audio:84|>": 84, + "<|audio:85|>": 85, + "<|audio:86|>": 86, + "<|audio:87|>": 87, + "<|audio:88|>": 88, + "<|audio:89|>": 89, + "<|audio:90|>": 90, + "<|audio:91|>": 91, + "<|audio:92|>": 92, + "<|audio:93|>": 93, + "<|audio:94|>": 94, + "<|audio:95|>": 95, + "<|audio:96|>": 96, + "<|audio:97|>": 97, + "<|audio:98|>": 98, + "<|audio:99|>": 99, + "<|audio:100|>": 100, + "<|audio:101|>": 101, + "<|audio:102|>": 102, + "<|audio:103|>": 103, + "<|audio:104|>": 104, + "<|audio:105|>": 105, + "<|audio:106|>": 106, + "<|audio:107|>": 107, + "<|audio:108|>": 108, + "<|audio:109|>": 109, + "<|audio:110|>": 110, + "<|audio:111|>": 111, + "<|audio:112|>": 112, + "<|audio:113|>": 113, + "<|audio:114|>": 114, + "<|audio:115|>": 115, + "<|audio:116|>": 116, + "<|audio:117|>": 117, + "<|audio:118|>": 118, + "<|audio:119|>": 119, + "<|audio:120|>": 120, + "<|audio:121|>": 121, + "<|audio:122|>": 122, + "<|audio:123|>": 123, + "<|audio:124|>": 124, + "<|audio:125|>": 125, + "<|audio:126|>": 126, + "<|audio:127|>": 127, + "<|audio:128|>": 128, + "<|audio:129|>": 129, + "<|audio:130|>": 130, + "<|audio:131|>": 131, + "<|audio:132|>": 132, + "<|audio:133|>": 133, + "<|audio:134|>": 134, + "<|audio:135|>": 135, + "<|audio:136|>": 136, + "<|audio:137|>": 137, + "<|audio:138|>": 138, + "<|audio:139|>": 139, + "<|audio:140|>": 140, + "<|audio:141|>": 141, + "<|audio:142|>": 142, + "<|audio:143|>": 143, + "<|audio:144|>": 144, + "<|audio:145|>": 145, + "<|audio:146|>": 146, + "<|audio:147|>": 147, + "<|audio:148|>": 148, + "<|audio:149|>": 149, + "<|audio:150|>": 150, + "<|audio:151|>": 151, + "<|audio:152|>": 152, + "<|audio:153|>": 153, + "<|audio:154|>": 154, + "<|audio:155|>": 155, + "<|audio:156|>": 156, + "<|audio:157|>": 157, + "<|audio:158|>": 158, + "<|audio:159|>": 159, + "<|audio:160|>": 160, + "<|audio:161|>": 161, + "<|audio:162|>": 162, + "<|audio:163|>": 163, + "<|audio:164|>": 164, + "<|audio:165|>": 165, + "<|audio:166|>": 166, + "<|audio:167|>": 167, + "<|audio:168|>": 168, + "<|audio:169|>": 169, + "<|audio:170|>": 170, + "<|audio:171|>": 171, + "<|audio:172|>": 172, + "<|audio:173|>": 173, + "<|audio:174|>": 174, + "<|audio:175|>": 175, + "<|audio:176|>": 176, + "<|audio:177|>": 177, + "<|audio:178|>": 178, + "<|audio:179|>": 179, + "<|audio:180|>": 180, + "<|audio:181|>": 181, + "<|audio:182|>": 182, + "<|audio:183|>": 183, + "<|audio:184|>": 184, + "<|audio:185|>": 185, + "<|audio:186|>": 186, + "<|audio:187|>": 187, + "<|audio:188|>": 188, + "<|audio:189|>": 189, + "<|audio:190|>": 190, + "<|audio:191|>": 191, + "<|audio:192|>": 192, + "<|audio:193|>": 193, + "<|audio:194|>": 194, + "<|audio:195|>": 195, + "<|audio:196|>": 196, + "<|audio:197|>": 197, + "<|audio:198|>": 198, + "<|audio:199|>": 199, + "<|audio:200|>": 200, + "<|audio:201|>": 201, + "<|audio:202|>": 202, + "<|audio:203|>": 203, + "<|audio:204|>": 204, + "<|audio:205|>": 205, + "<|audio:206|>": 206, + "<|audio:207|>": 207, + "<|audio:208|>": 208, + "<|audio:209|>": 209, + "<|audio:210|>": 210, + "<|audio:211|>": 211, + "<|audio:212|>": 212, + "<|audio:213|>": 213, + "<|audio:214|>": 214, + "<|audio:215|>": 215, + "<|audio:216|>": 216, + "<|audio:217|>": 217, + "<|audio:218|>": 218, + "<|audio:219|>": 219, + "<|audio:220|>": 220, + "<|audio:221|>": 221, + "<|audio:222|>": 222, + "<|audio:223|>": 223, + "<|audio:224|>": 224, + "<|audio:225|>": 225, + "<|audio:226|>": 226, + "<|audio:227|>": 227, + "<|audio:228|>": 228, + "<|audio:229|>": 229, + "<|audio:230|>": 230, + "<|audio:231|>": 231, + "<|audio:232|>": 232, + "<|audio:233|>": 233, + "<|audio:234|>": 234, + "<|audio:235|>": 235, + "<|audio:236|>": 236, + "<|audio:237|>": 237, + "<|audio:238|>": 238, + "<|audio:239|>": 239, + "<|audio:240|>": 240, + "<|audio:241|>": 241, + "<|audio:242|>": 242, + "<|audio:243|>": 243, + "<|audio:244|>": 244, + "<|audio:245|>": 245, + "<|audio:246|>": 246, + "<|audio:247|>": 247, + "<|audio:248|>": 248, + "<|audio:249|>": 249, + "<|audio:250|>": 250, + "<|audio:251|>": 251, + "<|audio:252|>": 252, + "<|audio:253|>": 253, + "<|audio:254|>": 254, + "<|audio:255|>": 255, + "<|audio:256|>": 256, + "<|audio:257|>": 257, + "<|audio:258|>": 258, + "<|audio:259|>": 259, + "<|audio:260|>": 260, + "<|audio:261|>": 261, + "<|audio:262|>": 262, + "<|audio:263|>": 263, + "<|audio:264|>": 264, + "<|audio:265|>": 265, + "<|audio:266|>": 266, + "<|audio:267|>": 267, + "<|audio:268|>": 268, + "<|audio:269|>": 269, + "<|audio:270|>": 270, + "<|audio:271|>": 271, + "<|audio:272|>": 272, + "<|audio:273|>": 273, + "<|audio:274|>": 274, + "<|audio:275|>": 275, + "<|audio:276|>": 276, + "<|audio:277|>": 277, + "<|audio:278|>": 278, + "<|audio:279|>": 279, + "<|audio:280|>": 280, + "<|audio:281|>": 281, + "<|audio:282|>": 282, + "<|audio:283|>": 283, + "<|audio:284|>": 284, + "<|audio:285|>": 285, + "<|audio:286|>": 286, + "<|audio:287|>": 287, + "<|audio:288|>": 288, + "<|audio:289|>": 289, + "<|audio:290|>": 290, + "<|audio:291|>": 291, + "<|audio:292|>": 292, + "<|audio:293|>": 293, + "<|audio:294|>": 294, + "<|audio:295|>": 295, + "<|audio:296|>": 296, + "<|audio:297|>": 297, + "<|audio:298|>": 298, + "<|audio:299|>": 299, + "<|audio:300|>": 300, + "<|audio:301|>": 301, + "<|audio:302|>": 302, + "<|audio:303|>": 303, + "<|audio:304|>": 304, + "<|audio:305|>": 305, + "<|audio:306|>": 306, + "<|audio:307|>": 307, + "<|audio:308|>": 308, + "<|audio:309|>": 309, + "<|audio:310|>": 310, + "<|audio:311|>": 311, + "<|audio:312|>": 312, + "<|audio:313|>": 313, + "<|audio:314|>": 314, + "<|audio:315|>": 315, + "<|audio:316|>": 316, + "<|audio:317|>": 317, + "<|audio:318|>": 318, + "<|audio:319|>": 319, + "<|audio:320|>": 320, + "<|audio:321|>": 321, + "<|audio:322|>": 322, + "<|audio:323|>": 323, + "<|audio:324|>": 324, + "<|audio:325|>": 325, + "<|audio:326|>": 326, + "<|audio:327|>": 327, + "<|audio:328|>": 328, + "<|audio:329|>": 329, + "<|audio:330|>": 330, + "<|audio:331|>": 331, + "<|audio:332|>": 332, + "<|audio:333|>": 333, + "<|audio:334|>": 334, + "<|audio:335|>": 335, + "<|audio:336|>": 336, + "<|audio:337|>": 337, + "<|audio:338|>": 338, + "<|audio:339|>": 339, + "<|audio:340|>": 340, + "<|audio:341|>": 341, + "<|audio:342|>": 342, + "<|audio:343|>": 343, + "<|audio:344|>": 344, + "<|audio:345|>": 345, + "<|audio:346|>": 346, + "<|audio:347|>": 347, + "<|audio:348|>": 348, + "<|audio:349|>": 349, + "<|audio:350|>": 350, + "<|audio:351|>": 351, + "<|audio:352|>": 352, + "<|audio:353|>": 353, + "<|audio:354|>": 354, + "<|audio:355|>": 355, + "<|audio:356|>": 356, + "<|audio:357|>": 357, + "<|audio:358|>": 358, + "<|audio:359|>": 359, + "<|audio:360|>": 360, + "<|audio:361|>": 361, + "<|audio:362|>": 362, + "<|audio:363|>": 363, + "<|audio:364|>": 364, + "<|audio:365|>": 365, + "<|audio:366|>": 366, + "<|audio:367|>": 367, + "<|audio:368|>": 368, + "<|audio:369|>": 369, + "<|audio:370|>": 370, + "<|audio:371|>": 371, + "<|audio:372|>": 372, + "<|audio:373|>": 373, + "<|audio:374|>": 374, + "<|audio:375|>": 375, + "<|audio:376|>": 376, + "<|audio:377|>": 377, + "<|audio:378|>": 378, + "<|audio:379|>": 379, + "<|audio:380|>": 380, + "<|audio:381|>": 381, + "<|audio:382|>": 382, + "<|audio:383|>": 383, + "<|audio:384|>": 384, + "<|audio:385|>": 385, + "<|audio:386|>": 386, + "<|audio:387|>": 387, + "<|audio:388|>": 388, + "<|audio:389|>": 389, + "<|audio:390|>": 390, + "<|audio:391|>": 391, + "<|audio:392|>": 392, + "<|audio:393|>": 393, + "<|audio:394|>": 394, + "<|audio:395|>": 395, + "<|audio:396|>": 396, + "<|audio:397|>": 397, + "<|audio:398|>": 398, + "<|audio:399|>": 399, + "<|audio:400|>": 400, + "<|audio:401|>": 401, + "<|audio:402|>": 402, + "<|audio:403|>": 403, + "<|audio:404|>": 404, + "<|audio:405|>": 405, + "<|audio:406|>": 406, + "<|audio:407|>": 407, + "<|audio:408|>": 408, + "<|audio:409|>": 409, + "<|audio:410|>": 410, + "<|audio:411|>": 411, + "<|audio:412|>": 412, + "<|audio:413|>": 413, + "<|audio:414|>": 414, + "<|audio:415|>": 415, + "<|audio:416|>": 416, + "<|audio:417|>": 417, + "<|audio:418|>": 418, + "<|audio:419|>": 419, + "<|audio:420|>": 420, + "<|audio:421|>": 421, + "<|audio:422|>": 422, + "<|audio:423|>": 423, + "<|audio:424|>": 424, + "<|audio:425|>": 425, + "<|audio:426|>": 426, + "<|audio:427|>": 427, + "<|audio:428|>": 428, + "<|audio:429|>": 429, + "<|audio:430|>": 430, + "<|audio:431|>": 431, + "<|audio:432|>": 432, + "<|audio:433|>": 433, + "<|audio:434|>": 434, + "<|audio:435|>": 435, + "<|audio:436|>": 436, + "<|audio:437|>": 437, + "<|audio:438|>": 438, + "<|audio:439|>": 439, + "<|audio:440|>": 440, + "<|audio:441|>": 441, + "<|audio:442|>": 442, + "<|audio:443|>": 443, + "<|audio:444|>": 444, + "<|audio:445|>": 445, + "<|audio:446|>": 446, + "<|audio:447|>": 447, + "<|audio:448|>": 448, + "<|audio:449|>": 449, + "<|audio:450|>": 450, + "<|audio:451|>": 451, + "<|audio:452|>": 452, + "<|audio:453|>": 453, + "<|audio:454|>": 454, + "<|audio:455|>": 455, + "<|audio:456|>": 456, + "<|audio:457|>": 457, + "<|audio:458|>": 458, + "<|audio:459|>": 459, + "<|audio:460|>": 460, + "<|audio:461|>": 461, + "<|audio:462|>": 462, + "<|audio:463|>": 463, + "<|audio:464|>": 464, + "<|audio:465|>": 465, + "<|audio:466|>": 466, + "<|audio:467|>": 467, + "<|audio:468|>": 468, + "<|audio:469|>": 469, + "<|audio:470|>": 470, + "<|audio:471|>": 471, + "<|audio:472|>": 472, + "<|audio:473|>": 473, + "<|audio:474|>": 474, + "<|audio:475|>": 475, + "<|audio:476|>": 476, + "<|audio:477|>": 477, + "<|audio:478|>": 478, + "<|audio:479|>": 479, + "<|audio:480|>": 480, + "<|audio:481|>": 481, + "<|audio:482|>": 482, + "<|audio:483|>": 483, + "<|audio:484|>": 484, + "<|audio:485|>": 485, + "<|audio:486|>": 486, + "<|audio:487|>": 487, + "<|audio:488|>": 488, + "<|audio:489|>": 489, + "<|audio:490|>": 490, + "<|audio:491|>": 491, + "<|audio:492|>": 492, + "<|audio:493|>": 493, + "<|audio:494|>": 494, + "<|audio:495|>": 495, + "<|audio:496|>": 496, + "<|audio:497|>": 497, + "<|audio:498|>": 498, + "<|audio:499|>": 499, + "<|audio:500|>": 500, + "<|audio:501|>": 501, + "<|audio:502|>": 502, + "<|audio:503|>": 503, + "<|audio:504|>": 504, + "<|audio:505|>": 505, + "<|audio:506|>": 506, + "<|audio:507|>": 507, + "<|audio:508|>": 508, + "<|audio:509|>": 509, + "<|audio:510|>": 510, + "<|audio:511|>": 511, + "<|audio:512|>": 512, + "<|audio:513|>": 513, + "<|audio:514|>": 514, + "<|audio:515|>": 515, + "<|audio:516|>": 516, + "<|audio:517|>": 517, + "<|audio:518|>": 518, + "<|audio:519|>": 519, + "<|audio:520|>": 520, + "<|audio:521|>": 521, + "<|audio:522|>": 522, + "<|audio:523|>": 523, + "<|audio:524|>": 524, + "<|audio:525|>": 525, + "<|audio:526|>": 526, + "<|audio:527|>": 527, + "<|audio:528|>": 528, + "<|audio:529|>": 529, + "<|audio:530|>": 530, + "<|audio:531|>": 531, + "<|audio:532|>": 532, + "<|audio:533|>": 533, + "<|audio:534|>": 534, + "<|audio:535|>": 535, + "<|audio:536|>": 536, + "<|audio:537|>": 537, + "<|audio:538|>": 538, + "<|audio:539|>": 539, + "<|audio:540|>": 540, + "<|audio:541|>": 541, + "<|audio:542|>": 542, + "<|audio:543|>": 543, + "<|audio:544|>": 544, + "<|audio:545|>": 545, + "<|audio:546|>": 546, + "<|audio:547|>": 547, + "<|audio:548|>": 548, + "<|audio:549|>": 549, + "<|audio:550|>": 550, + "<|audio:551|>": 551, + "<|audio:552|>": 552, + "<|audio:553|>": 553, + "<|audio:554|>": 554, + "<|audio:555|>": 555, + "<|audio:556|>": 556, + "<|audio:557|>": 557, + "<|audio:558|>": 558, + "<|audio:559|>": 559, + "<|audio:560|>": 560, + "<|audio:561|>": 561, + "<|audio:562|>": 562, + "<|audio:563|>": 563, + "<|audio:564|>": 564, + "<|audio:565|>": 565, + "<|audio:566|>": 566, + "<|audio:567|>": 567, + "<|audio:568|>": 568, + "<|audio:569|>": 569, + "<|audio:570|>": 570, + "<|audio:571|>": 571, + "<|audio:572|>": 572, + "<|audio:573|>": 573, + "<|audio:574|>": 574, + "<|audio:575|>": 575, + "<|audio:576|>": 576, + "<|audio:577|>": 577, + "<|audio:578|>": 578, + "<|audio:579|>": 579, + "<|audio:580|>": 580, + "<|audio:581|>": 581, + "<|audio:582|>": 582, + "<|audio:583|>": 583, + "<|audio:584|>": 584, + "<|audio:585|>": 585, + "<|audio:586|>": 586, + "<|audio:587|>": 587, + "<|audio:588|>": 588, + "<|audio:589|>": 589, + "<|audio:590|>": 590, + "<|audio:591|>": 591, + "<|audio:592|>": 592, + "<|audio:593|>": 593, + "<|audio:594|>": 594, + "<|audio:595|>": 595, + "<|audio:596|>": 596, + "<|audio:597|>": 597, + "<|audio:598|>": 598, + "<|audio:599|>": 599, + "<|audio:600|>": 600, + "<|audio:601|>": 601, + "<|audio:602|>": 602, + "<|audio:603|>": 603, + "<|audio:604|>": 604, + "<|audio:605|>": 605, + "<|audio:606|>": 606, + "<|audio:607|>": 607, + "<|audio:608|>": 608, + "<|audio:609|>": 609, + "<|audio:610|>": 610, + "<|audio:611|>": 611, + "<|audio:612|>": 612, + "<|audio:613|>": 613, + "<|audio:614|>": 614, + "<|audio:615|>": 615, + "<|audio:616|>": 616, + "<|audio:617|>": 617, + "<|audio:618|>": 618, + "<|audio:619|>": 619, + "<|audio:620|>": 620, + "<|audio:621|>": 621, + "<|audio:622|>": 622, + "<|audio:623|>": 623, + "<|audio:624|>": 624, + "<|audio:625|>": 625, + "<|audio:626|>": 626, + "<|audio:627|>": 627, + "<|audio:628|>": 628, + "<|audio:629|>": 629, + "<|audio:630|>": 630, + "<|audio:631|>": 631, + "<|audio:632|>": 632, + "<|audio:633|>": 633, + "<|audio:634|>": 634, + "<|audio:635|>": 635, + "<|audio:636|>": 636, + "<|audio:637|>": 637, + "<|audio:638|>": 638, + "<|audio:639|>": 639, + "<|audio:640|>": 640, + "<|audio:641|>": 641, + "<|audio:642|>": 642, + "<|audio:643|>": 643, + "<|audio:644|>": 644, + "<|audio:645|>": 645, + "<|audio:646|>": 646, + "<|audio:647|>": 647, + "<|audio:648|>": 648, + "<|audio:649|>": 649, + "<|audio:650|>": 650, + "<|audio:651|>": 651, + "<|audio:652|>": 652, + "<|audio:653|>": 653, + "<|audio:654|>": 654, + "<|audio:655|>": 655, + "<|audio:656|>": 656, + "<|audio:657|>": 657, + "<|audio:658|>": 658, + "<|audio:659|>": 659, + "<|audio:660|>": 660, + "<|audio:661|>": 661, + "<|audio:662|>": 662, + "<|audio:663|>": 663, + "<|audio:664|>": 664, + "<|audio:665|>": 665, + "<|audio:666|>": 666, + "<|audio:667|>": 667, + "<|audio:668|>": 668, + "<|audio:669|>": 669, + "<|audio:670|>": 670, + "<|audio:671|>": 671, + "<|audio:672|>": 672, + "<|audio:673|>": 673, + "<|audio:674|>": 674, + "<|audio:675|>": 675, + "<|audio:676|>": 676, + "<|audio:677|>": 677, + "<|audio:678|>": 678, + "<|audio:679|>": 679, + "<|audio:680|>": 680, + "<|audio:681|>": 681, + "<|audio:682|>": 682, + "<|audio:683|>": 683, + "<|audio:684|>": 684, + "<|audio:685|>": 685, + "<|audio:686|>": 686, + "<|audio:687|>": 687, + "<|audio:688|>": 688, + "<|audio:689|>": 689, + "<|audio:690|>": 690, + "<|audio:691|>": 691, + "<|audio:692|>": 692, + "<|audio:693|>": 693, + "<|audio:694|>": 694, + "<|audio:695|>": 695, + "<|audio:696|>": 696, + "<|audio:697|>": 697, + "<|audio:698|>": 698, + "<|audio:699|>": 699, + "<|audio:700|>": 700, + "<|audio:701|>": 701, + "<|audio:702|>": 702, + "<|audio:703|>": 703, + "<|audio:704|>": 704, + "<|audio:705|>": 705, + "<|audio:706|>": 706, + "<|audio:707|>": 707, + "<|audio:708|>": 708, + "<|audio:709|>": 709, + "<|audio:710|>": 710, + "<|audio:711|>": 711, + "<|audio:712|>": 712, + "<|audio:713|>": 713, + "<|audio:714|>": 714, + "<|audio:715|>": 715, + "<|audio:716|>": 716, + "<|audio:717|>": 717, + "<|audio:718|>": 718, + "<|audio:719|>": 719, + "<|audio:720|>": 720, + "<|audio:721|>": 721, + "<|audio:722|>": 722, + "<|audio:723|>": 723, + "<|audio:724|>": 724, + "<|audio:725|>": 725, + "<|audio:726|>": 726, + "<|audio:727|>": 727, + "<|audio:728|>": 728, + "<|audio:729|>": 729, + "<|audio:730|>": 730, + "<|audio:731|>": 731, + "<|audio:732|>": 732, + "<|audio:733|>": 733, + "<|audio:734|>": 734, + "<|audio:735|>": 735, + "<|audio:736|>": 736, + "<|audio:737|>": 737, + "<|audio:738|>": 738, + "<|audio:739|>": 739, + "<|audio:740|>": 740, + "<|audio:741|>": 741, + "<|audio:742|>": 742, + "<|audio:743|>": 743, + "<|audio:744|>": 744, + "<|audio:745|>": 745, + "<|audio:746|>": 746, + "<|audio:747|>": 747, + "<|audio:748|>": 748, + "<|audio:749|>": 749, + "<|audio:750|>": 750, + "<|audio:751|>": 751, + "<|audio:752|>": 752, + "<|audio:753|>": 753, + "<|audio:754|>": 754, + "<|audio:755|>": 755, + "<|audio:756|>": 756, + "<|audio:757|>": 757, + "<|audio:758|>": 758, + "<|audio:759|>": 759, + "<|audio:760|>": 760, + "<|audio:761|>": 761, + "<|audio:762|>": 762, + "<|audio:763|>": 763, + "<|audio:764|>": 764, + "<|audio:765|>": 765, + "<|audio:766|>": 766, + "<|audio:767|>": 767, + "<|audio:768|>": 768, + "<|audio:769|>": 769, + "<|audio:770|>": 770, + "<|audio:771|>": 771, + "<|audio:772|>": 772, + "<|audio:773|>": 773, + "<|audio:774|>": 774, + "<|audio:775|>": 775, + "<|audio:776|>": 776, + "<|audio:777|>": 777, + "<|audio:778|>": 778, + "<|audio:779|>": 779, + "<|audio:780|>": 780, + "<|audio:781|>": 781, + "<|audio:782|>": 782, + "<|audio:783|>": 783, + "<|audio:784|>": 784, + "<|audio:785|>": 785, + "<|audio:786|>": 786, + "<|audio:787|>": 787, + "<|audio:788|>": 788, + "<|audio:789|>": 789, + "<|audio:790|>": 790, + "<|audio:791|>": 791, + "<|audio:792|>": 792, + "<|audio:793|>": 793, + "<|audio:794|>": 794, + "<|audio:795|>": 795, + "<|audio:796|>": 796, + "<|audio:797|>": 797, + "<|audio:798|>": 798, + "<|audio:799|>": 799, + "<|audio:800|>": 800, + "<|audio:801|>": 801, + "<|audio:802|>": 802, + "<|audio:803|>": 803, + "<|audio:804|>": 804, + "<|audio:805|>": 805, + "<|audio:806|>": 806, + "<|audio:807|>": 807, + "<|audio:808|>": 808, + "<|audio:809|>": 809, + "<|audio:810|>": 810, + "<|audio:811|>": 811, + "<|audio:812|>": 812, + "<|audio:813|>": 813, + "<|audio:814|>": 814, + "<|audio:815|>": 815, + "<|audio:816|>": 816, + "<|audio:817|>": 817, + "<|audio:818|>": 818, + "<|audio:819|>": 819, + "<|audio:820|>": 820, + "<|audio:821|>": 821, + "<|audio:822|>": 822, + "<|audio:823|>": 823, + "<|audio:824|>": 824, + "<|audio:825|>": 825, + "<|audio:826|>": 826, + "<|audio:827|>": 827, + "<|audio:828|>": 828, + "<|audio:829|>": 829, + "<|audio:830|>": 830, + "<|audio:831|>": 831, + "<|audio:832|>": 832, + "<|audio:833|>": 833, + "<|audio:834|>": 834, + "<|audio:835|>": 835, + "<|audio:836|>": 836, + "<|audio:837|>": 837, + "<|audio:838|>": 838, + "<|audio:839|>": 839, + "<|audio:840|>": 840, + "<|audio:841|>": 841, + "<|audio:842|>": 842, + "<|audio:843|>": 843, + "<|audio:844|>": 844, + "<|audio:845|>": 845, + "<|audio:846|>": 846, + "<|audio:847|>": 847, + "<|audio:848|>": 848, + "<|audio:849|>": 849, + "<|audio:850|>": 850, + "<|audio:851|>": 851, + "<|audio:852|>": 852, + "<|audio:853|>": 853, + "<|audio:854|>": 854, + "<|audio:855|>": 855, + "<|audio:856|>": 856, + "<|audio:857|>": 857, + "<|audio:858|>": 858, + "<|audio:859|>": 859, + "<|audio:860|>": 860, + "<|audio:861|>": 861, + "<|audio:862|>": 862, + "<|audio:863|>": 863, + "<|audio:864|>": 864, + "<|audio:865|>": 865, + "<|audio:866|>": 866, + "<|audio:867|>": 867, + "<|audio:868|>": 868, + "<|audio:869|>": 869, + "<|audio:870|>": 870, + "<|audio:871|>": 871, + "<|audio:872|>": 872, + "<|audio:873|>": 873, + "<|audio:874|>": 874, + "<|audio:875|>": 875, + "<|audio:876|>": 876, + "<|audio:877|>": 877, + "<|audio:878|>": 878, + "<|audio:879|>": 879, + "<|audio:880|>": 880, + "<|audio:881|>": 881, + "<|audio:882|>": 882, + "<|audio:883|>": 883, + "<|audio:884|>": 884, + "<|audio:885|>": 885, + "<|audio:886|>": 886, + "<|audio:887|>": 887, + "<|audio:888|>": 888, + "<|audio:889|>": 889, + "<|audio:890|>": 890, + "<|audio:891|>": 891, + "<|audio:892|>": 892, + "<|audio:893|>": 893, + "<|audio:894|>": 894, + "<|audio:895|>": 895, + "<|audio:896|>": 896, + "<|audio:897|>": 897, + "<|audio:898|>": 898, + "<|audio:899|>": 899, + "<|audio:900|>": 900, + "<|audio:901|>": 901, + "<|audio:902|>": 902, + "<|audio:903|>": 903, + "<|audio:904|>": 904, + "<|audio:905|>": 905, + "<|audio:906|>": 906, + "<|audio:907|>": 907, + "<|audio:908|>": 908, + "<|audio:909|>": 909, + "<|audio:910|>": 910, + "<|audio:911|>": 911, + "<|audio:912|>": 912, + "<|audio:913|>": 913, + "<|audio:914|>": 914, + "<|audio:915|>": 915, + "<|audio:916|>": 916, + "<|audio:917|>": 917, + "<|audio:918|>": 918, + "<|audio:919|>": 919, + "<|audio:920|>": 920, + "<|audio:921|>": 921, + "<|audio:922|>": 922, + "<|audio:923|>": 923, + "<|audio:924|>": 924, + "<|audio:925|>": 925, + "<|audio:926|>": 926, + "<|audio:927|>": 927, + "<|audio:928|>": 928, + "<|audio:929|>": 929, + "<|audio:930|>": 930, + "<|audio:931|>": 931, + "<|audio:932|>": 932, + "<|audio:933|>": 933, + "<|audio:934|>": 934, + "<|audio:935|>": 935, + "<|audio:936|>": 936, + "<|audio:937|>": 937, + "<|audio:938|>": 938, + "<|audio:939|>": 939, + "<|audio:940|>": 940, + "<|audio:941|>": 941, + "<|audio:942|>": 942, + "<|audio:943|>": 943, + "<|audio:944|>": 944, + "<|audio:945|>": 945, + "<|audio:946|>": 946, + "<|audio:947|>": 947, + "<|audio:948|>": 948, + "<|audio:949|>": 949, + "<|audio:950|>": 950, + "<|audio:951|>": 951, + "<|audio:952|>": 952, + "<|audio:953|>": 953, + "<|audio:954|>": 954, + "<|audio:955|>": 955, + "<|audio:956|>": 956, + "<|audio:957|>": 957, + "<|audio:958|>": 958, + "<|audio:959|>": 959, + "<|audio:960|>": 960, + "<|audio:961|>": 961, + "<|audio:962|>": 962, + "<|audio:963|>": 963, + "<|audio:964|>": 964, + "<|audio:965|>": 965, + "<|audio:966|>": 966, + "<|audio:967|>": 967, + "<|audio:968|>": 968, + "<|audio:969|>": 969, + "<|audio:970|>": 970, + "<|audio:971|>": 971, + "<|audio:972|>": 972, + "<|audio:973|>": 973, + "<|audio:974|>": 974, + "<|audio:975|>": 975, + "<|audio:976|>": 976, + "<|audio:977|>": 977, + "<|audio:978|>": 978, + "<|audio:979|>": 979, + "<|audio:980|>": 980, + "<|audio:981|>": 981, + "<|audio:982|>": 982, + "<|audio:983|>": 983, + "<|audio:984|>": 984, + "<|audio:985|>": 985, + "<|audio:986|>": 986, + "<|audio:987|>": 987, + "<|audio:988|>": 988, + "<|audio:989|>": 989, + "<|audio:990|>": 990, + "<|audio:991|>": 991, + "<|audio:992|>": 992, + "<|audio:993|>": 993, + "<|audio:994|>": 994, + "<|audio:995|>": 995, + "<|audio:996|>": 996, + "<|audio:997|>": 997, + "<|audio:998|>": 998, + "<|audio:999|>": 999, + "<|audio:1000|>": 1000, + "<|audio:1001|>": 1001, + "<|audio:1002|>": 1002, + "<|audio:1003|>": 1003, + "<|audio:1004|>": 1004, + "<|audio:1005|>": 1005, + "<|audio:1006|>": 1006, + "<|audio:1007|>": 1007, + "<|audio:1008|>": 1008, + "<|audio:1009|>": 1009, + "<|audio:1010|>": 1010, + "<|audio:1011|>": 1011, + "<|audio:1012|>": 1012, + "<|audio:1013|>": 1013, + "<|audio:1014|>": 1014, + "<|audio:1015|>": 1015, + "<|audio:1016|>": 1016, + "<|audio:1017|>": 1017, + "<|audio:1018|>": 1018, + "<|audio:1019|>": 1019, + "<|audio:1020|>": 1020, + "<|audio:1021|>": 1021, + "<|audio:1022|>": 1022, + "<|audio:1023|>": 1023, + "<|startoftranscript|>": 1024, + "<|endoftranscript|>": 1025, + "<|padding|>": 1026, + "'": 1027, + "a": 1028, + "b": 1029, + "c": 1030, + "d": 1031, + "e": 1032, + "f": 1033, + "g": 1034, + "h": 1035, + "i": 1036, + "j": 1037, + "k": 1038, + "l": 1039, + "m": 1040, + "n": 1041, + "o": 1042, + "p": 1043, + "q": 1044, + "r": 1045, + "s": 1046, + "t": 1047, + "u": 1048, + "v": 1049, + "w": 1050, + "x": 1051, + "y": 1052, + "z": 1053, + "▁": 1054, + "▁t": 1055, + "he": 1056, + "▁a": 1057, + "▁the": 1058, + "in": 1059, + "▁s": 1060, + "▁w": 1061, + "▁o": 1062, + "re": 1063, + "nd": 1064, + "▁b": 1065, + "▁h": 1066, + "er": 1067, + "▁m": 1068, + "▁i": 1069, + "ou": 1070, + "▁c": 1071, + "▁f": 1072, + "at": 1073, + "ed": 1074, + "▁and": 1075, + "en": 1076, + "▁to": 1077, + "▁of": 1078, + "on": 1079, + "is": 1080, + "▁d": 1081, + "ing": 1082, + "▁th": 1083, + "▁p": 1084, + "▁he": 1085, + "or": 1086, + "▁l": 1087, + "es": 1088, + "▁in": 1089, + "ll": 1090, + "it": 1091, + "ar": 1092, + "as": 1093, + "an": 1094, + "▁n": 1095, + "▁g": 1096, + "om": 1097, + "▁be": 1098, + "▁ha": 1099, + "▁e": 1100, + "le": 1101, + "ot": 1102, + "▁y": 1103, + "ut": 1104, + "ow": 1105, + "ic": 1106, + "▁wh": 1107, + "▁it": 1108, + "ld": 1109, + "ve": 1110, + "▁that": 1111, + "ly": 1112, + "▁was": 1113, + "id": 1114, + "se": 1115, + "st": 1116, + "▁on": 1117, + "gh": 1118, + "ent": 1119, + "▁re": 1120, + "▁you": 1121, + "im": 1122, + "ce": 1123, + "▁u": 1124, + "ver": 1125, + "ion": 1126, + "▁as": 1127, + "et": 1128, + "▁for": 1129, + "ay": 1130, + "▁his": 1131, + "▁we": 1132, + "ith": 1133, + "al": 1134, + "ir": 1135, + "▁r": 1136, + "▁with": 1137, + "▁st": 1138, + "ad": 1139, + "ur": 1140, + "ght": 1141, + "▁an": 1142, + "▁her": 1143, + "▁not": 1144, + "▁is": 1145, + "▁had": 1146, + "ter": 1147, + "her": 1148, + "ac": 1149, + "am": 1150, + "▁at": 1151, + "oo": 1152, + "▁but": 1153, + "ould": 1154, + "▁she": 1155, + "▁k": 1156, + "▁se": 1157, + "▁sa": 1158, + "▁sh": 1159, + "▁fr": 1160, + "▁him": 1161, + "▁so": 1162, + "▁me": 1163, + "ill": 1164, + "ain": 1165, + "▁su": 1166, + "ight": 1167, + "ch": 1168, + "red": 1169, + "ct": 1170, + "all": 1171, + "ro": 1172, + "ke": 1173, + "ess": 1174, + "il": 1175, + "'s": 1176, + "ore": 1177, + "▁de": 1178, + "▁my": 1179, + "▁they": 1180, + "▁whe": 1181, + "▁all": 1182, + "ich": 1183, + "▁ne": 1184, + "ri": 1185, + "▁by": 1186, + "▁have": 1187, + "ome": 1188, + "pp": 1189, + "▁this": 1190, + "▁li": 1191, + "▁do": 1192, + "▁con": 1193, + "us": 1194, + "▁which": 1195, + "▁ch": 1196, + "ul": 1197, + "qu": 1198, + "▁j": 1199, + "▁up": 1200, + "▁said": 1201, + "▁from": 1202, + "ard": 1203, + "ge": 1204, + "▁or": 1205, + "▁v": 1206, + "▁one": 1207, + "▁no": 1208, + "th": 1209, + "▁ex": 1210, + "▁were": 1211, + "▁there": 1212, + "pe": 1213, + "and": 1214, + "est": 1215, + "▁man": 1216, + "▁who": 1217, + "ble": 1218, + "ie": 1219, + "▁al": 1220, + "ant": 1221, + "res": 1222, + "ous": 1223, + "ust": 1224, + "very": 1225, + "ation": 1226, + "▁fe": 1227, + "▁them": 1228, + "lf": 1229, + "▁when": 1230, + "nt": 1231, + "ame": 1232, + "ind": 1233, + "ra": 1234, + "▁go": 1235, + "ers": 1236, + "ast": 1237, + "fe": 1238, + "ood": 1239, + "▁kn": 1240, + "▁int": 1241, + "ist": 1242, + "▁are": 1243, + "art": 1244, + "out": 1245, + "▁would": 1246, + "▁le": 1247, + "▁what": 1248, + "os": 1249, + "▁their": 1250, + "ong": 1251, + "our": 1252, + "▁if": 1253, + "▁com": 1254, + "ound": 1255, + "▁ab": 1256, + "▁out": 1257, + "▁wor": 1258, + "em": 1259, + "▁will": 1260, + "ak": 1261, + "▁mis": 1262, + "ate": 1263, + "ol": 1264, + "um": 1265, + "un": 1266, + "itt": 1267, + "ough": 1268, + "ked": 1269, + "ig": 1270, + "ap": 1271, + "one": 1272, + "▁been": 1273, + "own": 1274, + "ive": 1275, + "▁then": 1276, + "▁br": 1277, + "ven": 1278, + "if": 1279, + "▁ar": 1280, + "'t": 1281, + "self": 1282, + "▁tr": 1283, + "▁pl": 1284, + "▁ro": 1285, + "▁pr": 1286, + "ther": 1287, + "reat": 1288, + "▁un": 1289, + "▁af": 1290, + "▁sp": 1291, + "▁qu": 1292, + "▁pro": 1293, + "ity": 1294, + "hed": 1295, + "▁tw": 1296, + "▁ag": 1297, + "▁could": 1298, + "ost": 1299, + "ace": 1300, + "ort": 1301, + "ure": 1302, + "ake": 1303, + "▁am": 1304, + "ack": 1305, + "▁any": 1306, + "▁some": 1307, + "▁your": 1308, + "▁more": 1309, + "▁can": 1310, + "au": 1311, + "▁tim": 1312, + "ep": 1313, + "ag": 1314, + "▁en": 1315, + "ck": 1316, + "▁into": 1317, + "▁cl": 1318, + "ry": 1319, + "▁now": 1320, + "hing": 1321, + "nder": 1322, + "are": 1323, + "▁very": 1324, + "▁gr": 1325, + "el": 1326, + "ose": 1327, + "▁loo": 1328, + "▁bo": 1329, + "ved": 1330, + "op": 1331, + "▁other": 1332, + "▁did": 1333, + "ance": 1334, + "▁than": 1335, + "ittle": 1336, + "▁little": 1337, + "ine": 1338, + "ies": 1339, + "way": 1340, + "ite": 1341, + "▁like": 1342, + "ide": 1343, + "▁lo": 1344, + "ass": 1345, + "▁bl": 1346, + "able": 1347, + "urn": 1348, + "ought": 1349, + "▁know": 1350, + "other": 1351, + "▁time": 1352, + "▁im": 1353, + "▁dis": 1354, + "▁us": 1355, + "▁co": 1356, + "fore": 1357, + "▁how": 1358, + "▁te": 1359, + "ence": 1360, + "▁day": 1361, + "▁ad": 1362, + "ade": 1363, + "ice": 1364, + "▁about": 1365, + "▁see": 1366, + "▁over": 1367, + "pt": 1368, + "cc": 1369, + "▁too": 1370, + "ink": 1371, + "▁fl": 1372, + "wn": 1373, + "▁great": 1374, + "▁after": 1375, + "pl": 1376, + "de": 1377, + "▁per": 1378, + "ment": 1379, + "▁again": 1380, + "▁upon": 1381, + "▁hand": 1382, + "ab": 1383, + "▁has": 1384, + "ree": 1385, + "ish": 1386, + "ci": 1387, + "▁only": 1388, + "ally": 1389, + "▁well": 1390, + "▁should": 1391, + "▁po": 1392, + "▁mar": 1393, + "ress": 1394, + "▁say": 1395, + "▁good": 1396, + "ather": 1397, + "▁two": 1398, + "ings": 1399, + "▁pe": 1400, + "ount": 1401, + "▁our": 1402, + "ire": 1403, + "ving": 1404, + "▁down": 1405, + "ars": 1406, + "ert": 1407, + "we": 1408, + "▁before": 1409, + "ile": 1410, + "ves": 1411, + "▁app": 1412, + "▁every": 1413, + "▁its": 1414, + "▁old": 1415, + "▁thr": 1416, + "▁mu": 1417, + "▁made": 1418, + "ied": 1419, + "ick": 1420, + "▁long": 1421, + "age": 1422, + "te": 1423, + "ft": 1424, + "▁where": 1425, + "ang": 1426, + "▁never": 1427, + "▁must": 1428, + "▁pre": 1429, + "▁sm": 1430, + "ful": 1431, + "▁such": 1432, + "ull": 1433, + "▁str": 1434, + "ions": 1435, + "▁off": 1436, + "▁sc": 1437, + "▁came": 1438, + "ious": 1439, + "ue": 1440, + "▁miss": 1441, + "ward": 1442, + "ild": 1443, + "▁fir": 1444, + "▁even": 1445, + "▁under": 1446, + "act": 1447, + "▁these": 1448, + "▁come": 1449, + "▁part": 1450, + "▁fo": 1451, + "ated": 1452, + "ness": 1453, + "▁rem": 1454, + "ord": 1455, + "▁bec": 1456, + "ty": 1457, + "▁may": 1458, + "▁much": 1459, + "▁think": 1460, + "per": 1461, + "▁way": 1462, + "▁mister": 1463, + "led": 1464, + "▁let": 1465, + "orn": 1466, + "▁ey": 1467, + "▁gl": 1468, + "▁cont": 1469, + "▁thought": 1470, + "▁look": 1471, + "ect": 1472, + "▁spe": 1473, + "ise": 1474, + "▁back": 1475, + "▁bet": 1476, + "ady": 1477, + "▁ye": 1478, + "ans": 1479, + "ach": 1480, + "▁here": 1481, + "▁just": 1482, + "ren": 1483, + "▁first": 1484, + "▁ho": 1485, + "▁own": 1486, + "▁des": 1487, + "▁ob": 1488, + "ried": 1489, + "ud": 1490, + "ary": 1491, + "▁went": 1492, + "▁mo": 1493, + "▁himself": 1494, + "▁men": 1495, + "air": 1496, + "cl": 1497, + "ave": 1498, + "ath": 1499, + "ff": 1500, + "▁sl": 1501, + "co": 1502, + "on't": 1503, + "llow": 1504, + "▁cr": 1505, + "▁res": 1506, + "▁i'": 1507, + "▁might": 1508, + "ily": 1509, + "▁seem": 1510, + "int": 1511, + "ip": 1512, + "▁beg": 1513, + "ouse": 1514, + "anc": 1515, + "n't": 1516, + "▁wat": 1517, + "▁through": 1518, + "▁comp": 1519, + "ber": 1520, + "▁away": 1521, + "▁car": 1522, + "▁em": 1523, + "▁get": 1524, + "▁imp": 1525, + "▁head": 1526, + "oss": 1527, + "▁life": 1528, + "▁bel": 1529, + "▁without": 1530, + "▁most": 1531, + "▁pass": 1532, + "▁make": 1533, + "▁cons": 1534, + "ened": 1535, + "▁som": 1536, + "▁turn": 1537, + "av": 1538, + "ng": 1539, + "▁shall": 1540, + "▁acc": 1541, + "▁those": 1542, + "▁pres": 1543, + "▁eyes": 1544, + "▁house": 1545, + "iz": 1546, + "▁somet": 1547, + "▁jo": 1548, + "▁still": 1549, + "▁call": 1550, + "▁night": 1551, + "hes": 1552, + "▁op": 1553, + "ause": 1554, + "▁wom": 1555, + "▁last": 1556, + "ks": 1557, + "less": 1558, + "ared": 1559, + "▁comm": 1560, + "▁don't": 1561, + "▁tell": 1562, + "▁ent": 1563, + "▁nothing": 1564, + "▁new": 1565, + "ign": 1566, + "▁take": 1567, + "▁being": 1568, + "▁many": 1569, + "▁word": 1570, + "ons": 1571, + "▁found": 1572, + "▁ret": 1573, + "ase": 1574, + "▁ear": 1575, + "▁while": 1576, + "▁att": 1577, + "ory": 1578, + "ix": 1579, + "▁ser": 1580, + "▁saw": 1581, + "▁put": 1582, + "ne": 1583, + "oth": 1584, + "iend": 1585, + "▁peop": 1586, + "▁wr": 1587, + "▁young": 1588, + "ark": 1589, + "dy": 1590, + "aking": 1591, + "les": 1592, + "▁count": 1593, + "▁once": 1594, + "▁friend": 1595, + "▁la": 1596, + "ens": 1597, + "▁people": 1598, + "pect": 1599, + "ors": 1600, + "fect": 1601, + "▁mat": 1602, + "ince": 1603, + "ible": 1604, + "ered": 1605, + "▁room": 1606, + "▁three": 1607, + "▁yet": 1608, + "ail": 1609, + "▁same": 1610, + "▁father": 1611, + "▁right": 1612, + "▁child": 1613, + "▁cour": 1614, + "igh": 1615, + "▁place": 1616, + "▁another": 1617, + "ult": 1618, + "iv": 1619, + "ition": 1620, + "▁ind": 1621, + "▁want": 1622, + "▁though": 1623, + "▁nor": 1624, + "▁far": 1625, + "▁king": 1626, + "▁happ": 1627, + "▁heart": 1628, + "▁face": 1629, + "▁end": 1630, + "▁ever": 1631, + "▁nat": 1632, + "thing": 1633, + "▁love": 1634, + "get": 1635, + "▁took": 1636, + "▁dist": 1637, + "ever": 1638, + "ian": 1639, + "▁hu": 1640, + "ew": 1641, + "▁arm": 1642, + "▁inst": 1643, + "man": 1644, + "▁work": 1645, + "▁light": 1646, + "▁char": 1647, + "▁ple": 1648, + "ict": 1649, + "▁set": 1650, + "▁ac": 1651, + "▁looked": 1652, + "▁missus": 1653, + "▁asked": 1654, + "▁mind": 1655, + "▁yes": 1656, + "▁supp": 1657, + "▁inte": 1658, + "▁rep": 1659, + "cess": 1660, + "ently": 1661, + "▁left": 1662, + "gg": 1663, + "ertain": 1664, + "▁ke": 1665, + "ished": 1666, + "ub": 1667, + "▁pers": 1668, + "ways": 1669, + "▁things": 1670, + "alk": 1671, + "irl": 1672, + "▁mom": 1673, + "▁sir": 1674, + "▁wa": 1675, + "▁moment": 1676, + "ations": 1677, + "▁sat": 1678, + "sel": 1679, + "▁find": 1680, + "ower": 1681, + "ia": 1682, + "vent": 1683, + "rew": 1684, + "▁world": 1685, + "ject": 1686, + "▁give": 1687, + "▁cap": 1688, + "▁why": 1689, + "so": 1690, + "▁gu": 1691, + "▁mother": 1692, + "▁gen": 1693, + "▁sw": 1694, + "▁always": 1695, + "der": 1696, + "lt": 1697, + "ling": 1698, + "▁ans": 1699, + "pped": 1700, + "▁soon": 1701, + "▁act": 1702, + "▁form": 1703, + "▁el": 1704, + "dd": 1705, + "▁heard": 1706, + "ret": 1707, + "▁thing": 1708, + "▁something": 1709, + "▁seemed": 1710, + "▁sub": 1711, + "▁door": 1712, + "ange": 1713, + "▁girl": 1714, + "ced": 1715, + "▁appe": 1716, + "ither": 1717, + "▁wind": 1718, + "▁because": 1719, + "▁dif": 1720, + "▁mon": 1721, + "ss": 1722, + "▁going": 1723, + "▁told": 1724, + "orm": 1725, + "▁home": 1726, + "ained": 1727, + "▁got": 1728, + "▁war": 1729, + "▁god": 1730, + "aught": 1731, + "▁gi": 1732, + "▁eng": 1733, + "▁sur": 1734, + "ning": 1735, + "▁hands": 1736, + "▁woman": 1737, + "▁follow": 1738, + "land": 1739, + "aut": 1740, + "▁vo": 1741, + "▁feel": 1742, + "▁rel": 1743, + "▁poss": 1744, + "ched": 1745, + "ical": 1746, + "ple": 1747, + "ph": 1748, + "▁boy": 1749, + "▁return": 1750, + "▁reg": 1751, + "▁rest": 1752, + "ook": 1753, + "▁knew": 1754, + "ner": 1755, + "▁each": 1756, + "▁oh": 1757, + "▁sil": 1758, + "▁kind": 1759, + "▁exp": 1760, + "▁ma": 1761, + "▁cle": 1762, + "▁hel": 1763, + "iver": 1764, + "ting": 1765, + "▁del": 1766, + "ual": 1767, + "▁inf": 1768, + "▁ass": 1769, + "▁water": 1770, + "▁conf": 1771, + "▁bre": 1772, + "▁wo": 1773, + "cept": 1774, + "▁belie": 1775, + "▁certain": 1776, + "▁against": 1777, + "▁hard": 1778, + "▁ph": 1779, + "row": 1780, + "▁unt": 1781, + "▁years": 1782, + "▁quite": 1783, + "▁side": 1784, + "iness": 1785, + "ined": 1786, + "▁near": 1787, + "▁hor": 1788, + "ters": 1789, + "ired": 1790, + "ool": 1791, + "▁four": 1792, + "▁few": 1793, + "▁done": 1794, + "ier": 1795, + "▁che": 1796, + "rest": 1797, + "ited": 1798, + "most": 1799, + "▁better": 1800, + "▁half": 1801, + "▁min": 1802, + "▁tre": 1803, + "ps": 1804, + "▁also": 1805, + "▁care": 1806, + "ock": 1807, + "uck": 1808, + "oub": 1809, + "▁began": 1810, + "ully": 1811, + "▁enough": 1812, + "ised": 1813, + "ru": 1814, + "▁having": 1815, + "▁seen": 1816, + "▁gener": 1817, + "▁lady": 1818, + "▁dra": 1819, + "▁hum": 1820, + "aps": 1821, + "ott": 1822, + "▁pur": 1823, + "aken": 1824, + "ross": 1825, + "ying": 1826, + "▁ter": 1827, + "▁hour": 1828, + "▁inde": 1829, + "ank": 1830, + "▁called": 1831, + "ial": 1832, + "ason": 1833, + "▁beh": 1834, + "▁does": 1835, + "▁whole": 1836, + "▁morn": 1837, + "▁turned": 1838, + "▁pleas": 1839, + "▁ste": 1840, + "▁ref": 1841, + "▁gave": 1842, + "ense": 1843, + "▁occ": 1844, + "ib": 1845, + "▁course": 1846, + "▁ins": 1847, + "ream": 1848, + "gether": 1849, + "uth": 1850, + "▁both": 1851, + "▁sou": 1852, + "▁cur": 1853, + "▁add": 1854, + "een": 1855, + "▁col": 1856, + "▁read": 1857, + "ween": 1858, + "selves": 1859, + "▁among": 1860, + "▁between": 1861, + "▁inc": 1862, + "▁keep": 1863, + "▁beaut": 1864, + "ular": 1865, + "▁poor": 1866, + "▁it's": 1867, + "▁sure": 1868, + "▁morning": 1869, + "▁white": 1870, + "ged": 1871, + "▁name": 1872, + "▁dear": 1873, + "▁toward": 1874, + "ute": 1875, + "▁small": 1876, + "▁whom": 1877, + "▁repl": 1878, + "▁sk": 1879, + "▁lar": 1880, + "▁felt": 1881, + "bo": 1882, + "osed": 1883, + "ating": 1884, + "▁myself": 1885, + "▁open": 1886, + "▁six": 1887, + "▁herself": 1888, + "▁however": 1889, + "▁bu": 1890, + "ond": 1891, + "aint": 1892, + "xt": 1893, + "▁fore": 1894, + "▁inter": 1895, + "▁ev": 1896, + "▁high": 1897, + "ction": 1898, + "▁hund": 1899, + "▁stood": 1900, + "▁hundred": 1901, + "aster": 1902, + "▁tra": 1903, + "▁show": 1904, + "▁sent": 1905, + "ife": 1906, + "▁round": 1907, + "▁sim": 1908, + "▁dr": 1909, + "▁gra": 1910, + "▁words": 1911, + "▁days": 1912, + "▁almost": 1913, + "ale": 1914, + "vel": 1915, + "▁point": 1916, + "ents": 1917, + "▁gre": 1918, + "▁eight": 1919, + "ces": 1920, + "ates": 1921, + "dden": 1922, + "▁fam": 1923, + "▁stand": 1924, + "▁bus": 1925, + "▁land": 1926, + "▁ed": 1927, + "▁mean": 1928, + "ung": 1929, + "haps": 1930, + "▁sun": 1931, + "ures": 1932, + "▁since": 1933, + "iet": 1934, + "ird": 1935, + "▁perhaps": 1936, + "ned": 1937, + "▁sle": 1938, + "iss": 1939, + "▁best": 1940, + "▁sudden": 1941, + "▁dark": 1942, + "▁replied": 1943, + "▁voice": 1944, + "▁met": 1945, + "▁anything": 1946, + "▁till": 1947, + "▁underst": 1948, + "▁bar": 1949, + "its": 1950, + "▁until": 1951, + "ins": 1952, + "oud": 1953, + "▁black": 1954, + "▁bro": 1955, + "▁hear": 1956, + "▁looking": 1957, + "▁cried": 1958, + "▁you'": 1959, + "▁fact": 1960, + "amp": 1961, + "▁prin": 1962, + "▁less": 1963, + "▁lay": 1964, + "▁next": 1965, + "▁law": 1966, + "up": 1967, + "▁power": 1968, + "▁prop": 1969, + "not": 1970, + "rent": 1971, + "▁brought": 1972, + "ately": 1973, + "enty": 1974, + "▁country": 1975, + "▁help": 1976, + "als": 1977, + "▁quest": 1978, + "med": 1979, + "▁use": 1980, + "▁vis": 1981, + "▁sn": 1982, + "▁i'm": 1983, + "fully": 1984, + "▁spo": 1985, + "▁together": 1986, + "▁need": 1987, + "▁air": 1988, + "▁adv": 1989, + "▁person": 1990, + "▁indeed": 1991, + "▁contin": 1992, + "▁unc": 1993, + "oney": 1994, + "▁gent": 1995, + "▁present": 1996, + "▁aw": 1997, + "▁par": 1998, + "ows": 1999, + "ured": 2000, + "▁full": 2001, + "tain": 2002, + "▁run": 2003, + "▁rather": 2004, + "▁ide": 2005, + "▁cond": 2006, + "nded": 2007, + "▁lat": 2008, + "▁sy": 2009, + "be": 2010, + "du": 2011, + "▁har": 2012, + "▁feet": 2013, + "▁fin": 2014, + "eter": 2015, + "▁fall": 2016, + "cei": 2017, + "▁five": 2018, + "▁mil": 2019, + "▁bed": 2020, + "oc": 2021, + "▁doct": 2022, + "▁interest": 2023, + "ressed": 2024, + "▁matter": 2025, + "▁lord": 2026, + "▁gone": 2027, + "▁es": 2028, + "fort": 2029, + "▁death": 2030, + "▁wife": 2031, + "▁serv": 2032, + "▁pat": 2033, + "ering": 2034, + "oubt": 2035, + "▁adm": 2036, + "▁talk": 2037, + "▁taken": 2038, + "▁art": 2039, + "▁tri": 2040, + "▁others": 2041, + "▁hope": 2042, + "ash": 2043, + "az": 2044, + "▁ext": 2045, + "▁cannot": 2046, + "ief": 2047, + "▁speak": 2048, + "▁lau": 2049, + "▁themselves": 2050, + "▁along": 2051, + "▁dire": 2052, + "ove": 2053, + "mb": 2054, + "pr": 2055, + "▁bes": 2056, + "▁cou": 2057, + "▁mor": 2058, + "ten": 2059, + "▁gentle": 2060, + "uring": 2061, + "▁fire": 2062, + "▁large": 2063, + "▁pol": 2064, + "▁cat": 2065, + "▁swe": 2066, + "ention": 2067, + "vers": 2068, + "▁thus": 2069, + "app": 2070, + "▁sec": 2071, + "▁play": 2072, + "▁real": 2073, + "▁prom": 2074, + "ments": 2075, + "wered": 2076, + "ield": 2077, + "ains": 2078, + "ison": 2079, + "ached": 2080, + "▁thou": 2081, + "▁reason": 2082, + "▁thous": 2083, + "iting": 2084, + "▁brother": 2085, + "akes": 2086, + "▁thousand": 2087, + "ont": 2088, + "▁money": 2089, + "▁remem": 2090, + "▁dep": 2091, + "▁answered": 2092, + "▁true": 2093, + "▁children": 2094, + "▁behind": 2095, + "oy": 2096, + "▁sound": 2097, + "ants": 2098, + "ably": 2099, + "▁wood": 2100, + "used": 2101, + "▁dec": 2102, + "▁whose": 2103, + "od": 2104, + "▁ele": 2105, + "▁twenty": 2106, + "▁ra": 2107, + "itu": 2108, + "▁believe": 2109, + "▁wonder": 2110, + "ene": 2111, + "▁inv": 2112, + "▁hon": 2113, + "aring": 2114, + "sh": 2115, + "ued": 2116, + "▁suff": 2117, + "▁opp": 2118, + "▁doubt": 2119, + "▁rec": 2120, + "ton": 2121, + "▁hold": 2122, + "▁diffe": 2123, + "▁passed": 2124, + "▁cor": 2125, + "me": 2126, + "ided": 2127, + "ities": 2128, + "▁mer": 2129, + "▁sing": 2130, + "▁nature": 2131, + "▁alone": 2132, + "▁dead": 2133, + "▁pri": 2134, + "ken": 2135, + "lic": 2136, + "▁red": 2137, + "▁bur": 2138, + "aces": 2139, + "▁close": 2140, + "▁gold": 2141, + "▁start": 2142, + "▁hur": 2143, + "▁fur": 2144, + "og": 2145, + "ances": 2146, + "▁ask": 2147, + "▁doctor": 2148, + "▁son": 2149, + "▁ground": 2150, + "wer": 2151, + "ets": 2152, + "▁sea": 2153, + "▁strong": 2154, + "▁leave": 2155, + "▁compan": 2156, + "▁i'll": 2157, + "ery": 2158, + "cy": 2159, + "illed": 2160, + "ept": 2161, + "ides": 2162, + "tle": 2163, + "▁ce": 2164, + "▁obs": 2165, + "body": 2166, + "▁fell": 2167, + "▁sign": 2168, + "cond": 2169, + "▁mount": 2170, + "▁fair": 2171, + "▁given": 2172, + "▁therefore": 2173, + "ane": 2174, + "▁ir": 2175, + "▁deep": 2176, + "iful": 2177, + "fic": 2178, + "ys": 2179, + "▁often": 2180, + "▁body": 2181, + "unt": 2182, + "▁short": 2183, + "▁tem": 2184, + "▁fa": 2185, + "▁master": 2186, + "▁earth": 2187, + "▁pap": 2188, + "ceed": 2189, + "▁stre": 2190, + "▁second": 2191, + "▁fort": 2192, + "bed": 2193, + "gth": 2194, + "owed": 2195, + "▁horse": 2196, + "idd": 2197, + "▁mad": 2198, + "ually": 2199, + "▁pa": 2200, + "▁chr": 2201, + "▁order": 2202, + "▁ten": 2203, + "vered": 2204, + "▁const": 2205, + "▁wish": 2206, + "▁fif": 2207, + "▁eas": 2208, + "▁cir": 2209, + "▁dro": 2210, + "aim": 2211, + "hen": 2212, + "▁ca": 2213, + "▁really": 2214, + "read": 2215, + "ceived": 2216, + "▁ill": 2217, + "▁fear": 2218, + "osition": 2219, + "▁understand": 2220, + "▁spir": 2221, + "▁list": 2222, + "▁abs": 2223, + "▁spr": 2224, + "aced": 2225, + "▁question": 2226, + "anger": 2227, + "▁everything": 2228, + "aughter": 2229, + "▁aff": 2230, + "▁wall": 2231, + "▁coming": 2232, + "ching": 2233, + "ready": 2234, + "ider": 2235, + "▁above": 2236, + "▁prince": 2237, + "▁already": 2238, + "▁least": 2239, + "▁reco": 2240, + "▁expl": 2241, + "▁step": 2242, + "▁used": 2243, + "▁ru": 2244, + "▁itself": 2245, + "ister": 2246, + "▁necess": 2247, + "▁case": 2248, + "▁around": 2249, + "hn": 2250, + "▁soul": 2251, + "▁suddenly": 2252, + "ger": 2253, + "▁lad": 2254, + "▁evening": 2255, + "▁mag": 2256, + "▁general": 2257, + "▁num": 2258, + "imes": 2259, + "▁known": 2260, + "▁wal": 2261, + "▁quick": 2262, + "ized": 2263, + "▁mus": 2264, + "▁sch": 2265, + "▁captain": 2266, + "▁that's": 2267, + "ific": 2268, + "▁whether": 2269, + "▁lear": 2270, + "gn": 2271, + "▁within": 2272, + "men": 2273, + "▁live": 2274, + "vern": 2275, + "▁times": 2276, + "▁expect": 2277, + "▁state": 2278, + "▁friends": 2279, + "▁bring": 2280, + "▁sort": 2281, + "▁women": 2282, + "▁table": 2283, + "▁meet": 2284, + "▁john": 2285, + "▁circ": 2286, + "▁sum": 2287, + "▁returned": 2288, + "iled": 2289, + "▁dri": 2290, + "▁held": 2291, + "▁exc": 2292, + "▁big": 2293, + "▁says": 2294, + "▁perfect": 2295, + "▁lea": 2296, + "▁obser": 2297, + "▁else": 2298, + "▁during": 2299, + "ident": 2300, + "▁hus": 2301, + "ted": 2302, + "▁beautiful": 2303, + "▁clear": 2304, + "▁either": 2305, + "▁town": 2306, + "▁sight": 2307, + "▁lost": 2308, + "▁sleep": 2309, + "▁means": 2310, + "▁foot": 2311, + "▁cut": 2312, + "▁cal": 2313, + "▁kept": 2314, + "▁ran": 2315, + "ience": 2316, + "▁prof": 2317, + "tered": 2318, + "here": 2319, + "ety": 2320, + "▁fellow": 2321, + "▁can't": 2322, + "▁mist": 2323, + "▁past": 2324, + "▁dream": 2325, + "ages": 2326, + "▁became": 2327, + "▁pret": 2328, + "▁disc": 2329, + "▁bad": 2330, + "▁making": 2331, + "ution": 2332, + "▁object": 2333, + "▁towards": 2334, + "▁low": 2335, + "ught": 2336, + "▁dev": 2337, + "▁human": 2338, + "▁manner": 2339, + "▁strange": 2340, + "▁year": 2341, + "old": 2342, + "ient": 2343, + "ines": 2344, + "▁sever": 2345, + "mon": 2346, + "▁ann": 2347, + "airs": 2348, + "ches": 2349, + "▁city": 2350, + "▁sometimes": 2351, + "'d": 2352, + "▁rose": 2353, + "▁est": 2354, + "ility": 2355, + "▁walk": 2356, + "▁ready": 2357, + "▁pal": 2358, + "▁leg": 2359, + "▁road": 2360, + "ians": 2361, + "cious": 2362, + "▁corn": 2363, + "▁thy": 2364, + "▁cold": 2365, + "lly": 2366, + "iously": 2367, + "lish": 2368, + "▁stra": 2369, + "mer": 2370, + "▁bat": 2371, + "owing": 2372, + "iew": 2373, + "▁christ": 2374, + "▁squ": 2375, + "▁truth": 2376, + "cri": 2377, + "lled": 2378, + "▁thir": 2379, + "▁didn't": 2380, + "bert": 2381, + "▁soci": 2382, + "br": 2383, + "▁bit": 2384, + "▁subject": 2385, + "▁ship": 2386, + "▁mur": 2387, + "▁appro": 2388, + "▁pie": 2389, + "▁answer": 2390, + "▁free": 2391, + "▁business": 2392, + "▁ut": 2393, + "ape": 2394, + "▁appear": 2395, + "▁river": 2396, + "▁sto": 2397, + "▁cast": 2398, + "▁family": 2399, + "▁jud": 2400, + "▁excl": 2401, + "▁letter": 2402, + "ingly": 2403, + "rie": 2404, + "▁hair": 2405, + "ote": 2406, + "▁arms": 2407, + "▁become": 2408, + "ern": 2409, + "ouble": 2410, + "▁different": 2411, + "▁val": 2412, + "ffect": 2413, + "▁natur": 2414, + "▁possible": 2415, + "▁several": 2416, + "▁fine": 2417, + "ah": 2418, + "▁lead": 2419, + "▁forg": 2420, + "▁express": 2421, + "li": 2422, + "▁sus": 2423, + "▁glad": 2424, + "oon": 2425, + "▁arri": 2426, + "▁blood": 2427, + "itting": 2428, + "▁quiet": 2429, + "rence": 2430, + "▁idea": 2431, + "▁able": 2432, + "itted": 2433, + "ster": 2434, + "▁charac": 2435, + "▁begin": 2436, + "▁chur": 2437, + "▁tou": 2438, + "▁story": 2439, + "▁eye": 2440, + "band": 2441, + "ative": 2442, + "▁grand": 2443, + "▁consider": 2444, + "▁across": 2445, + "▁pen": 2446, + "▁except": 2447, + "▁fre": 2448, + "▁win": 2449, + "▁equ": 2450, + "eth": 2451, + "▁cent": 2452, + "isf": 2453, + "▁partic": 2454, + "▁diffic": 2455, + "▁window": 2456, + "▁surpr": 2457, + "llect": 2458, + "▁prov": 2459, + "▁direct": 2460, + "▁conc": 2461, + "ey": 2462, + "aw": 2463, + "▁govern": 2464, + "▁disco": 2465, + "▁wild": 2466, + "▁dog": 2467, + "▁flo": 2468, + "▁soft": 2469, + "teen": 2470, + "▁cross": 2471, + "ased": 2472, + "▁effect": 2473, + "▁sor": 2474, + "▁longer": 2475, + "▁hen": 2476, + "▁followed": 2477, + "▁sold": 2478, + "▁thee": 2479, + "▁pub": 2480, + "▁husband": 2481, + "ards": 2482, + "antly": 2483, + "by": 2484, + "▁ap": 2485, + "▁suppose": 2486, + "▁respect": 2487, + "ts": 2488, + "▁hast": 2489, + "▁sal": 2490, + "▁comple": 2491, + "▁heav": 2492, + "▁happy": 2493, + "▁rich": 2494, + "▁creat": 2495, + "une": 2496, + "▁taking": 2497, + "▁requ": 2498, + "▁stay": 2499, + "▁spoke": 2500, + "▁daughter": 2501, + "▁wee": 2502, + "▁ve": 2503, + "▁du": 2504, + "▁green": 2505, + "▁anim": 2506, + "▁din": 2507, + "'ll": 2508, + "▁bird": 2509, + "alth": 2510, + "▁mere": 2511, + "▁gard": 2512, + "ny": 2513, + "ley": 2514, + "▁possess": 2515, + "empt": 2516, + "▁reached": 2517, + "▁appeared": 2518, + "ov": 2519, + "▁exist": 2520, + "ination": 2521, + "▁pretty": 2522, + "▁remember": 2523, + "▁hea": 2524, + "▁opened": 2525, + "▁tom": 2526, + "anged": 2527, + "▁slow": 2528, + "▁imag": 2529, + "▁i've": 2530, + "ract": 2531, + "▁saying": 2532, + "king": 2533, + "utes": 2534, + "▁common": 2535, + "▁occas": 2536, + "▁book": 2537, + "▁rus": 2538, + "ames": 2539, + "ices": 2540, + "▁bright": 2541, + "ms": 2542, + "▁satisf": 2543, + "▁sense": 2544, + "▁fav": 2545, + "▁succ": 2546, + "ump": 2547, + "ising": 2548, + "▁lu": 2549, + "▁accord": 2550, + "tern": 2551, + "▁break": 2552, + "▁exper": 2553, + "▁month": 2554, + "use": 2555, + "▁dem": 2556, + "▁scar": 2557, + "▁continued": 2558, + "▁secret": 2559, + "▁church": 2560, + "▁tree": 2561, + "▁stri": 2562, + "▁carried": 2563, + "▁cry": 2564, + "nding": 2565, + "▁spirit": 2566, + "▁wanted": 2567, + "eric": 2568, + "▁certainly": 2569, + "▁command": 2570, + "▁dest": 2571, + "▁move": 2572, + "oun": 2573, + "▁sweet": 2574, + "▁street": 2575, + "▁ought": 2576, + "▁account": 2577, + "▁def": 2578, + "ham": 2579, + "▁prep": 2580, + "▁sens": 2581, + "▁esc": 2582, + "▁rock": 2583, + "ots": 2584, + "▁decl": 2585, + "▁purp": 2586, + "riage": 2587, + "outh": 2588, + "owers": 2589, + "▁draw": 2590, + "▁eat": 2591, + "▁breat": 2592, + "▁character": 2593, + "ime": 2594, + "cul": 2595, + "medi": 2596, + "▁stud": 2597, + "▁school": 2598, + "itude": 2599, + "▁heaven": 2600, + "▁feeling": 2601, + "▁sad": 2602, + "▁regard": 2603, + "ement": 2604, + "▁pain": 2605, + "▁worth": 2606, + "▁bra": 2607, + "ney": 2608, + "▁dut": 2609, + "▁smo": 2610, + "aimed": 2611, + "▁trans": 2612, + "▁delight": 2613, + "▁quar": 2614, + "▁hung": 2615, + "▁mot": 2616, + "▁blue": 2617, + "▁hot": 2618, + "▁hill": 2619, + "▁div": 2620, + "umb": 2621, + "▁disapp": 2622, + "▁marg": 2623, + "▁laugh": 2624, + "idence": 2625, + "▁produ": 2626, + "▁success": 2627, + "ury": 2628, + "son": 2629, + "▁fast": 2630, + "▁english": 2631, + "▁dress": 2632, + "▁hat": 2633, + "▁terri": 2634, + "▁port": 2635, + "▁neither": 2636, + "▁court": 2637, + "▁seven": 2638, + "▁fight": 2639, + "▁princess": 2640, + "▁lived": 2641, + "▁view": 2642, + "▁immedi": 2643, + "▁self": 2644, + "▁var": 2645, + "▁hours": 2646, + "▁mill": 2647, + "▁sol": 2648, + "▁exam": 2649, + "▁tried": 2650, + "▁won't": 2651, + "▁entered": 2652, + "▁disp": 2653, + "to": 2654, + "ric": 2655, + "▁carry": 2656, + "▁import": 2657, + "▁ang": 2658, + "ze": 2659, + "ony": 2660, + "▁danger": 2661, + "ledge": 2662, + "▁offic": 2663, + "▁cause": 2664, + "▁none": 2665, + "▁forward": 2666, + "▁uncle": 2667, + "▁tor": 2668, + "▁det": 2669, + "ask": 2670, + "▁len": 2671, + "▁further": 2672, + "▁pay": 2673, + "▁added": 2674, + "▁front": 2675, + "ror": 2676, + "▁ge": 2677, + "▁particular": 2678, + "▁deal": 2679, + "▁prot": 2680, + "▁led": 2681, + "▁acqu": 2682, + "▁pray": 2683, + "▁eff": 2684, + "▁happened": 2685, + "▁chief": 2686, + "lect": 2687, + "▁walked": 2688, + "▁later": 2689, + "▁joy": 2690, + "iar": 2691, + "day": 2692, + "▁ord": 2693, + "▁alth": 2694, + "▁comfort": 2695, + "▁prob": 2696, + "▁maj": 2697, + "▁affect": 2698, + "▁public": 2699, + "▁bene": 2700, + "ening": 2701, + "▁although": 2702, + "gr": 2703, + "▁sho": 2704, + "▁fig": 2705, + "resh": 2706, + "▁fail": 2707, + "uct": 2708, + "ug": 2709, + "ality": 2710, + "▁mem": 2711, + "▁seems": 2712, + "▁yourself": 2713, + "ship": 2714, + "ead": 2715, + "iam": 2716, + "▁number": 2717, + "side": 2718, + "▁ah": 2719, + "▁doing": 2720, + "▁living": 2721, + "arent": 2722, + "▁desp": 2723, + "ize": 2724, + "oof": 2725, + "▁field": 2726, + "▁received": 2727, + "▁shad": 2728, + "▁bey": 2729, + "▁beyond": 2730, + "▁phil": 2731, + "▁line": 2732, + "▁visit": 2733, + "inct": 2734, + "rig": 2735, + "▁party": 2736, + "▁garden": 2737, + "▁je": 2738, + "▁mouth": 2739, + "▁hall": 2740, + "▁queen": 2741, + "▁boat": 2742, + "▁bear": 2743, + "▁americ": 2744, + "ism": 2745, + "▁gentleman": 2746, + "▁vi": 2747, + "irt": 2748, + "uff": 2749, + "▁laid": 2750, + "raid": 2751, + "▁occasion": 2752, + "▁entire": 2753, + "▁age": 2754, + "▁sister": 2755, + "▁clot": 2756, + "▁repe": 2757, + "ously": 2758, + "▁prison": 2759, + "▁accom": 2760, + "▁whis": 2761, + "▁nearly": 2762, + "▁trees": 2763, + "iling": 2764, + "iff": 2765, + "▁eighteen": 2766, + "bit": 2767, + "wards": 2768, + "▁early": 2769, + "▁tal": 2770, + "▁lab": 2771, + "▁forth": 2772, + "ming": 2773, + "ones": 2774, + "▁med": 2775, + "▁try": 2776, + "▁da": 2777, + "ilt": 2778, + "anced": 2779, + "▁princi": 2780, + "▁enem": 2781, + "▁thinking": 2782, + "▁chance": 2783, + "where": 2784, + "▁cre": 2785, + "▁minutes": 2786, + "▁anx": 2787, + "▁mary": 2788, + "▁pict": 2789, + "▁wait": 2790, + "▁vill": 2791, + "▁stren": 2792, + "▁afraid": 2793, + "▁crow": 2794, + "▁smile": 2795, + "▁late": 2796, + "▁england": 2797, + "▁pleasure": 2798, + "▁aunt": 2799, + "▁news": 2800, + "▁wis": 2801, + "▁fle": 2802, + "▁seeing": 2803, + "▁super": 2804, + "▁faith": 2805, + "▁rob": 2806, + "iment": 2807, + "oint": 2808, + "▁bill": 2809, + "lling": 2810, + "▁neigh": 2811, + "▁trouble": 2812, + "▁silence": 2813, + "▁plain": 2814, + "▁there's": 2815, + "aret": 2816, + "pend": 2817, + "▁exclaimed": 2818, + "rench": 2819, + "gy": 2820, + "▁miles": 2821, + "ply": 2822, + "▁glass": 2823, + "▁drew": 2824, + "▁neighb": 2825, + "els": 2826, + "▁mine": 2827, + "▁pract": 2828, + "▁heavy": 2829, + "▁standing": 2830, + "▁sevent": 2831, + "▁shar": 2832, + "▁change": 2833, + "▁necessary": 2834, + "▁chap": 2835, + "▁purpose": 2836, + "▁inqu": 2837, + "▁natural": 2838, + "▁deter": 2839, + "icked": 2840, + "▁bott": 2841, + "▁hardly": 2842, + "▁bell": 2843, + "▁top": 2844, + "▁caught": 2845, + "fered": 2846, + "wh": 2847, + "ives": 2848, + "ounded": 2849, + "▁auth": 2850, + "▁circum": 2851, + "▁fing": 2852, + "▁stopped": 2853, + "uc": 2854, + "▁wit": 2855, + "ament": 2856, + "▁opin": 2857, + "▁av": 2858, + "▁priv": 2859, + "aining": 2860, + "▁instead": 2861, + "rupt": 2862, + "▁grew": 2863, + "▁loved": 2864, + "▁island": 2865, + "▁knight": 2866, + "▁ago": 2867, + "▁length": 2868, + "▁inn": 2869, + "▁peace": 2870, + "ls": 2871, + "inary": 2872, + "ior": 2873, + "ues": 2874, + "▁third": 2875, + "ush": 2876, + "▁beauty": 2877, + "▁hig": 2878, + "▁he's": 2879, + "the": 2880, + "form": 2881, + "head": 2882, + "ically": 2883, + "asp": 2884, + "ancy": 2885, + "▁determ": 2886, + "▁straight": 2887, + "▁cra": 2888, + "ining": 2889, + "pper": 2890, + "ler": 2891, + "▁infl": 2892, + "▁thor": 2893, + "▁convers": 2894, + "▁besides": 2895, + "▁position": 2896, + "▁thirty": 2897, + "▁den": 2898, + "rage": 2899, + "▁attention": 2900, + "ma": 2901, + "▁conv": 2902, + "ager": 2903, + "▁hist": 2904, + "ored": 2905, + "▁comes": 2906, + "aged": 2907, + "▁force": 2908, + "▁sitting": 2909, + "▁please": 2910, + "tend": 2911, + "iter": 2912, + "▁whatever": 2913, + "▁inform": 2914, + "▁hop": 2915, + "▁chair": 2916, + "▁build": 2917, + "▁bab": 2918, + "ustom": 2919, + "▁girls": 2920, + "▁rom": 2921, + "▁french": 2922, + "▁struck": 2923, + "▁pull": 2924, + "▁ast": 2925, + "▁lie": 2926, + "▁wrong": 2927, + "▁knowledge": 2928, + "▁grace": 2929, + "▁scarce": 2930, + "ghed": 2931, + "▁resol": 2932, + "▁watch": 2933, + "▁thoughts": 2934, + "▁rid": 2935, + "▁attempt": 2936, + "▁fifty": 2937, + "▁rap": 2938, + "▁box": 2939, + "hood": 2940, + "▁getting": 2941, + "▁ver": 2942, + "▁fat": 2943, + "▁company": 2944, + "▁arr": 2945, + "▁crowd": 2946, + "▁burn": 2947, + "▁slight": 2948, + "▁class": 2949, + "▁south": 2950, + "▁die": 2951, + "▁exact": 2952, + "▁drink": 2953, + "▁enj": 2954, + "▁thick": 2955, + "▁dinner": 2956, + "▁save": 2957, + "▁maid": 2958, + "▁plan": 2959, + "▁saint": 2960, + "▁immediately": 2961, + "iers": 2962, + "▁born": 2963, + "ius": 2964, + "▁rev": 2965, + "▁tears": 2966, + "ists": 2967, + "▁treat": 2968, + "usion": 2969, + "▁meant": 2970, + "▁boys": 2971, + "pping": 2972, + "▁slowly": 2973, + "▁incl": 2974, + "▁lim": 2975, + "▁died": 2976, + "iced": 2977, + "▁compl": 2978, + "▁fool": 2979, + "▁forest": 2980, + "▁sugg": 2981, + "▁post": 2982, + "▁accept": 2983, + "▁result": 2984, + "▁author": 2985, + "ndon": 2986, + "ceive": 2987, + "▁suggest": 2988, + "cient": 2989, + "▁stone": 2990, + "▁fright": 2991, + "▁paper": 2992, + "▁conse": 2993, + "▁jour": 2994, + "▁ty": 2995, + "▁enc": 2996, + "▁quickly": 2997, + "▁contr": 2998, + "▁youth": 2999, + "▁send": 3000, + "▁vict": 3001, + "ified": 3002, + "▁belong": 3003, + "▁warm": 3004, + "▁fix": 3005, + "▁imposs": 3006, + "▁beside": 3007, + "▁er": 3008, + "▁tone": 3009, + "▁camp": 3010, + "▁desire": 3011, + "▁bound": 3012, + "▁makes": 3013, + "▁margaret": 3014, + "▁north": 3015, + "▁brown": 3016, + "▁moon": 3017, + "▁lips": 3018, + "▁placed": 3019, + "val": 3020, + "▁circumst": 3021, + "▁food": 3022, + "▁filled": 3023, + "ics": 3024, + "ift": 3025, + "ann": 3026, + "▁london": 3027, + "▁distance": 3028, + "ging": 3029, + "▁strength": 3030, + "▁id": 3031, + "▁floor": 3032, + "▁forget": 3033, + "▁obl": 3034, + "▁mid": 3035, + "ries": 3036, + "itions": 3037, + "bs": 3038, + "▁spring": 3039, + "▁you're": 3040, + "▁viol": 3041, + "▁jack": 3042, + "▁pock": 3043, + "ooks": 3044, + "▁following": 3045, + "▁sac": 3046, + "▁remained": 3047, + "arch": 3048, + "▁grow": 3049, + "▁snow": 3050, + "▁government": 3051, + "▁ball": 3052, + "▁hors": 3053, + "▁nar": 3054, + "aded": 3055, + "▁broken": 3056, + "▁laughed": 3057, + "▁descri": 3058, + "▁safe": 3059, + "itten": 3060, + "ively": 3061, + "▁profess": 3062, + "▁o'": 3063, + "amed": 3064, + "▁depart": 3065, + "▁easy": 3066, + "oured": 3067, + "▁und": 3068, + "▁coun": 3069, + "▁thank": 3070, + "▁knows": 3071, + "▁waiting": 3072, + "dom": 3073, + "ats": 3074, + "▁ger": 3075, + "▁van": 3076, + "▁anne": 3077, + "▁horses": 3078, + "ugg": 3079, + "▁dread": 3080, + "▁une": 3081, + "ges": 3082, + "acy": 3083, + "▁proceed": 3084, + "▁gaz": 3085, + "▁shout": 3086, + "▁started": 3087, + "ented": 3088, + "▁complete": 3089, + "ope": 3090, + "▁gall": 3091, + "dered": 3092, + "▁wide": 3093, + "ires": 3094, + "▁neck": 3095, + "asure": 3096, + "isted": 3097, + "▁service": 3098, + "▁piece": 3099, + "cially": 3100, + "ences": 3101, + "▁sail": 3102, + "▁palace": 3103, + "erv": 3104, + "▁guard": 3105, + "▁doll": 3106, + "▁talking": 3107, + "▁man's": 3108, + "▁lift": 3109, + "▁grave": 3110, + "▁week": 3111, + "let": 3112, + "▁impossible": 3113, + "▁effort": 3114, + "▁imm": 3115, + "▁army": 3116, + "well": 3117, + "▁difficult": 3118, + "und": 3119, + "▁fresh": 3120, + "▁fun": 3121, + "reme": 3122, + "▁stop": 3123, + "▁mess": 3124, + "▁gar": 3125, + "▁deg": 3126, + "▁incre": 3127, + "▁corner": 3128, + "▁society": 3129, + "▁weak": 3130, + "▁shut": 3131, + "▁hy": 3132, + "▁proper": 3133, + "aching": 3134, + "▁cloud": 3135, + "iddle": 3136, + "ivid": 3137, + "▁demand": 3138, + "▁nine": 3139, + "▁sit": 3140, + "▁recogn": 3141, + "▁beat": 3142, + "uss": 3143, + "▁turning": 3144, + "▁sky": 3145, + "▁opinion": 3146, + "▁single": 3147, + "pic": 3148, + "▁fly": 3149, + "▁lang": 3150, + "▁mass": 3151, + "cell": 3152, + "▁outside": 3153, + "▁kiss": 3154, + "▁trust": 3155, + "▁occup": 3156, + "▁evil": 3157, + "▁below": 3158, + "▁appearance": 3159, + "uit": 3160, + "▁aftern": 3161, + "▁glo": 3162, + "▁gun": 3163, + "▁west": 3164, + "ency": 3165, + "par": 3166, + "▁showed": 3167, + "▁conversation": 3168, + "ises": 3169, + "▁conn": 3170, + "▁couldn't": 3171, + "▁running": 3172, + "▁mention": 3173, + "▁greater": 3174, + "▁music": 3175, + "▁breath": 3176, + "ases": 3177, + "▁nin": 3178, + "▁ant": 3179, + "arer": 3180, + "▁morrow": 3181, + "▁bank": 3182, + "▁espe": 3183, + "▁peter": 3184, + "ork": 3185, + "cial": 3186, + "▁presence": 3187, + "▁battle": 3188, + "▁winter": 3189, + "hered": 3190, + "▁probably": 3191, + "▁clothes": 3192, + "▁fash": 3193, + "▁mark": 3194, + "▁wished": 3195, + "vere": 3196, + "▁coll": 3197, + "▁emb": 3198, + "▁kne": 3199, + "▁married": 3200, + "▁arrived": 3201, + "▁pun": 3202, + "▁event": 3203, + "ushed": 3204, + "▁suffic": 3205, + "▁eager": 3206, + "▁former": 3207, + "▁giving": 3208, + "▁pop": 3209, + "▁sand": 3210, + "▁neg": 3211, + "▁usual": 3212, + "▁relig": 3213, + "▁simple": 3214, + "▁sym": 3215, + "itation": 3216, + "▁gro": 3217, + "ories": 3218, + "▁moved": 3219, + "▁months": 3220, + "▁speaking": 3221, + "▁pet": 3222, + "▁silent": 3223, + "▁cab": 3224, + "▁mountain": 3225, + "▁expression": 3226, + "gar": 3227, + "▁covered": 3228, + "▁hunt": 3229, + "▁afternoon": 3230, + "aped": 3231, + "▁occur": 3232, + "rief": 3233, + "▁states": 3234, + "▁z": 3235, + "str": 3236, + "▁loc": 3237, + "light": 3238, + "▁shore": 3239, + "che": 3240, + "▁easily": 3241, + "▁pale": 3242, + "unity": 3243, + "▁remark": 3244, + "▁phys": 3245, + "▁beginning": 3246, + "▁duty": 3247, + "▁chapter": 3248, + "▁influ": 3249, + "cho": 3250, + "▁concl": 3251, + "amb": 3252, + "▁instant": 3253, + "▁polit": 3254, + "zz": 3255, + "▁enjoy": 3256, + "▁sick": 3257, + "▁remain": 3258, + "uel": 3259, + "▁stream": 3260, + "▁figure": 3261, + "ald": 3262, + "▁tur": 3263, + "▁path": 3264, + "▁vol": 3265, + "▁minute": 3266, + "▁pleasant": 3267, + "▁scarcely": 3268, + "▁conscious": 3269, + "▁terrible": 3270, + "▁kill": 3271, + "▁raised": 3272, + "▁fashion": 3273, + "▁twel": 3274, + "yal": 3275, + "▁leaving": 3276, + "▁twelve": 3277, + "ature": 3278, + "▁fut": 3279, + "▁threw": 3280, + "▁star": 3281, + "▁flowers": 3282, + "olog": 3283, + "▁trying": 3284, + "rib": 3285, + "▁sword": 3286, + "▁tall": 3287, + "▁marry": 3288, + "▁ben": 3289, + "▁expected": 3290, + "▁according": 3291, + "▁forty": 3292, + "▁stick": 3293, + "inal": 3294, + "▁guess": 3295, + "▁silver": 3296, + "▁iron": 3297, + "▁oblig": 3298, + "▁office": 3299, + "▁rapid": 3300, + "▁ladies": 3301, + "▁especially": 3302, + "ipped": 3303, + "orted": 3304, + "▁bread": 3305, + "ech": 3306, + "▁tender": 3307, + "orth": 3308, + "▁learned": 3309, + "▁books": 3310, + "▁isn't": 3311, + "▁surprise": 3312, + "▁write": 3313, + "▁purs": 3314, + "pered": 3315, + "▁written": 3316, + "▁killed": 3317, + "▁consequ": 3318, + "▁exh": 3319, + "▁places": 3320, + "▁condition": 3321, + "▁direction": 3322, + "▁cho": 3323, + "ulty": 3324, + "jo": 3325, + "mit": 3326, + "▁entirely": 3327, + "tering": 3328, + "▁enter": 3329, + "▁action": 3330, + "wise": 3331, + "▁suc": 3332, + "ibly": 3333, + "▁happiness": 3334, + "▁decided": 3335, + "▁golden": 3336, + "▁langu": 3337, + "eness": 3338, + "▁note": 3339, + "▁unless": 3340, + "uous": 3341, + "▁fal": 3342, + "aled": 3343, + "▁you'll": 3344, + "▁wonderful": 3345, + "ounds": 3346, + "ume": 3347, + "'re": 3348, + "▁shook": 3349, + "er's": 3350, + "oop": 3351, + "onel": 3352, + "▁perfectly": 3353, + "▁geor": 3354, + "ndered": 3355, + "▁broad": 3356, + "atic": 3357, + "▁closed": 3358, + "a's": 3359, + "▁spot": 3360, + "tended": 3361, + "▁latter": 3362, + "▁steps": 3363, + "▁merely": 3364, + "▁history": 3365, + "fer": 3366, + "▁wise": 3367, + "ishing": 3368, + "osing": 3369, + "▁middle": 3370, + "idered": 3371, + "▁understood": 3372, + "▁enemy": 3373, + "▁sole": 3374, + "llig": 3375, + "▁jew": 3376, + "▁simply": 3377, + "gan": 3378, + "▁conduct": 3379, + "▁tast": 3380, + "▁board": 3381, + "▁sav": 3382, + "▁wouldn't": 3383, + "▁shot": 3384, + "▁reply": 3385, + "▁changed": 3386, + "mn": 3387, + "▁grass": 3388, + "▁finally": 3389, + "▁admir": 3390, + "ital": 3391, + "▁sharp": 3392, + "itch": 3393, + "▁fortune": 3394, + "▁summer": 3395, + "▁experience": 3396, + "▁succeed": 3397, + "gress": 3398, + "uted": 3399, + "▁orig": 3400, + "retched": 3401, + "▁journey": 3402, + "▁excell": 3403, + "▁observed": 3404, + "ax": 3405, + "▁afterwards": 3406, + "fast": 3407, + "sy": 3408, + "▁bow": 3409, + "▁flat": 3410, + "▁persons": 3411, + "▁lean": 3412, + "▁earn": 3413, + "▁broke": 3414, + "▁mir": 3415, + "▁fit": 3416, + "osp": 3417, + "▁marriage": 3418, + "▁repres": 3419, + "io": 3420, + "▁lying": 3421, + "unk": 3422, + "▁trave": 3423, + "▁situ": 3424, + "▁listen": 3425, + "▁acquaint": 3426, + "▁ring": 3427, + "cience": 3428, + "▁faint": 3429, + "olute": 3430, + "▁calm": 3431, + "bered": 3432, + "▁lives": 3433, + "▁escape": 3434, + "▁beneath": 3435, + "ouses": 3436, + "▁clim": 3437, + "▁bless": 3438, + "▁repeated": 3439, + "▁pocket": 3440, + "ests": 3441, + "▁tail": 3442, + "▁passion": 3443, + "▁dick": 3444, + "▁ven": 3445, + "oses": 3446, + "clock": 3447, + "▁mut": 3448, + "▁becom": 3449, + "▁oper": 3450, + "▁o'clock": 3451, + "▁fish": 3452, + "▁lou": 3453, + "semb": 3454, + "▁prev": 3455, + "▁allowed": 3456, + "▁famil": 3457, + "hel": 3458, + "▁gate": 3459, + "▁spite": 3460, + "ivers": 3461, + "▁health": 3462, + "ission": 3463, + "▁ign": 3464, + "▁reach": 3465, + "▁cand": 3466, + "▁rain": 3467, + "▁empl": 3468, + "▁ban": 3469, + "▁strugg": 3470, + "▁firm": 3471, + "▁bitter": 3472, + "▁sorry": 3473, + "bing": 3474, + "▁father's": 3475, + "▁temper": 3476, + "▁madame": 3477, + "ples": 3478, + "▁furn": 3479, + "▁future": 3480, + "umed": 3481, + "▁nice": 3482, + "▁separ": 3483, + "▁presently": 3484, + "▁circumstances": 3485, + "▁connect": 3486, + "iding": 3487, + "▁sett": 3488, + "kes": 3489, + "▁loud": 3490, + "▁worse": 3491, + "▁wand": 3492, + "▁spread": 3493, + "▁i'd": 3494, + "▁letters": 3495, + "▁yellow": 3496, + "▁magn": 3497, + "▁passing": 3498, + "▁kit": 3499, + "▁pleased": 3500, + "▁darkness": 3501, + "▁remar": 3502, + "idden": 3503, + "come": 3504, + "▁tea": 3505, + "▁civ": 3506, + "▁apart": 3507, + "▁disappe": 3508, + "▁important": 3509, + "▁legs": 3510, + "▁nation": 3511, + "▁delic": 3512, + "▁dressed": 3513, + "▁game": 3514, + "▁walls": 3515, + "ec": 3516, + "▁dry": 3517, + "▁virt": 3518, + "▁dim": 3519, + "idently": 3520, + "rel": 3521, + "▁rub": 3522, + "▁absolute": 3523, + "▁blind": 3524, + "▁discovered": 3525, + "▁exactly": 3526, + "▁dam": 3527, + "otten": 3528, + "▁sorrow": 3529, + "my": 3530, + "▁cost": 3531, + "ference": 3532, + "▁employ": 3533, + "velop": 3534, + "▁cous": 3535, + "▁beast": 3536, + "▁spec": 3537, + "▁opport": 3538, + "▁ears": 3539, + "▁dropped": 3540, + "▁subst": 3541, + "▁chee": 3542, + "▁protect": 3543, + "ils": 3544, + "▁smiled": 3545, + "ina": 3546, + "▁resp": 3547, + "▁promise": 3548, + "▁bag": 3549, + "▁host": 3550, + "urs": 3551, + "▁creature": 3552, + "▁notice": 3553, + "▁knowing": 3554, + "▁heads": 3555, + "▁concer": 3556, + "▁seat": 3557, + "ishment": 3558, + "▁individ": 3559, + "▁existence": 3560, + "▁determined": 3561, + "lend": 3562, + "▁storm": 3563, + "roy": 3564, + "ours": 3565, + "▁conce": 3566, + "anging": 3567, + "▁fixed": 3568, + "▁press": 3569, + "▁major": 3570, + "oved": 3571, + "▁ves": 3572, + "iod": 3573, + "▁learn": 3574, + "▁motion": 3575, + "▁empt": 3576, + "▁leaves": 3577, + "▁bottom": 3578, + "▁arg": 3579, + "iety": 3580, + "▁nobody": 3581, + "▁pros": 3582, + "que": 3583, + "▁utter": 3584, + "▁pick": 3585, + "acked": 3586, + "▁intellig": 3587, + "▁hes": 3588, + "▁stir": 3589, + "▁prevent": 3590, + "▁assist": 3591, + "▁dom": 3592, + "▁disg": 3593, + "▁advant": 3594, + "erable": 3595, + "▁vent": 3596, + "ument": 3597, + "▁tired": 3598, + "rect": 3599, + "ashed": 3600, + "action": 3601, + "▁considered": 3602, + "▁wrote": 3603, + "▁houses": 3604, + "▁suit": 3605, + "▁cheer": 3606, + "▁castle": 3607, + "▁pra": 3608, + "▁perform": 3609, + "ancing": 3610, + "▁clean": 3611, + "ruct": 3612, + "▁stro": 3613, + "▁frequ": 3614, + "▁drawing": 3615, + "▁luck": 3616, + "▁habit": 3617, + "idge": 3618, + "ell": 3619, + "▁ones": 3620, + "▁noble": 3621, + "▁splend": 3622, + "▁honor": 3623, + "zen": 3624, + "▁paid": 3625, + "▁speech": 3626, + "▁estab": 3627, + "▁ur": 3628, + "istr": 3629, + "▁individual": 3630, + "inite": 3631, + "▁vall": 3632, + "▁birds": 3633, + "rodu": 3634, + "▁dar": 3635, + "▁allow": 3636, + "▁confess": 3637, + "▁impress": 3638, + "▁propert": 3639, + "▁jane": 3640, + "▁song": 3641, + "▁various": 3642, + "▁narrow": 3643, + "▁moder": 3644, + "▁believed": 3645, + "ays": 3646, + "▁extra": 3647, + "▁pure": 3648, + "arily": 3649, + "▁period": 3650, + "▁shadow": 3651, + "▁somewh": 3652, + "▁mal": 3653, + "▁cott": 3654, + "▁extreme": 3655, + "▁judge": 3656, + "▁village": 3657, + "▁royal": 3658, + "▁somewhat": 3659, + "▁lower": 3660, + "▁ham": 3661, + "▁agree": 3662, + "▁remembered": 3663, + "▁aston": 3664, + "enth": 3665, + "▁declared": 3666, + "pan": 3667, + "▁train": 3668, + "▁parts": 3669, + "▁colonel": 3670, + "amber": 3671, + "▁breakfast": 3672, + "▁surely": 3673, + "▁sin": 3674, + "ayed": 3675, + "▁scene": 3676, + "go": 3677, + "▁greatest": 3678, + "▁influence": 3679, + "▁custom": 3680, + "itary": 3681, + "▁animal": 3682, + "▁sake": 3683, + "▁mod": 3684, + "▁soldiers": 3685, + "iny": 3686, + "▁ancient": 3687, + "▁drawn": 3688, + "▁evidently": 3689, + "▁ways": 3690, + "▁looks": 3691, + "▁revol": 3692, + "ator": 3693, + "anted": 3694, + "▁reflect": 3695, + "▁picture": 3696, + "▁likely": 3697, + "▁shr": 3698, + "▁laws": 3699, + "▁holding": 3700, + "▁difficulty": 3701, + "▁inj": 3702, + "▁mel": 3703, + "▁courage": 3704, + "nes": 3705, + "▁mort": 3706, + "▁troub": 3707, + "▁burst": 3708, + "▁angry": 3709, + "▁proud": 3710, + "gged": 3711, + "▁spoken": 3712, + "ision": 3713, + "▁desert": 3714, + "ption": 3715, + "▁comb": 3716, + "▁apparent": 3717, + "ring": 3718, + "▁watched": 3719, + "na": 3720, + "▁east": 3721, + "▁shop": 3722, + "▁agre": 3723, + "▁private": 3724, + "esty": 3725, + "▁jul": 3726, + "▁finished": 3727, + "▁anxious": 3728, + "otion": 3729, + "▁fifteen": 3730, + "▁social": 3731, + "under": 3732, + "▁dism": 3733, + "▁touch": 3734, + "▁wine": 3735, + "▁attack": 3736, + "▁ideas": 3737, + "▁george": 3738, + "af": 3739, + "rer": 3740, + "oose": 3741, + "▁space": 3742, + "▁scr": 3743, + "▁inside": 3744, + "▁gentlemen": 3745, + "▁civil": 3746, + "iently": 3747, + "▁formed": 3748, + "▁fol": 3749, + "▁goes": 3750, + "▁you've": 3751, + "▁thin": 3752, + "▁surf": 3753, + "▁servant": 3754, + "▁bal": 3755, + "▁cover": 3756, + "▁ourselves": 3757, + "▁fallen": 3758, + "▁henry": 3759, + "▁lot": 3760, + "ium": 3761, + "▁advent": 3762, + "▁carriage": 3763, + "▁baby": 3764, + "▁elect": 3765, + "▁tong": 3766, + "▁appre": 3767, + "▁everybody": 3768, + "uded": 3769, + "▁commun": 3770, + "▁ine": 3771, + "itive": 3772, + "▁waited": 3773, + "cise": 3774, + "▁grou": 3775, + "het": 3776, + "▁vain": 3777, + "▁impro": 3778, + "▁favor": 3779, + "erial": 3780, + "▁speed": 3781, + "▁windows": 3782, + "▁carefully": 3783, + "▁ice": 3784, + "▁noise": 3785, + "▁hero": 3786, + "▁jim": 3787, + "▁william": 3788, + "▁pecul": 3789, + "▁promised": 3790, + "▁walking": 3791, + "▁forgotten": 3792, + "▁obliged": 3793, + "▁earnest": 3794, + "▁main": 3795, + "▁lose": 3796, + "▁glance": 3797, + "▁vessel": 3798, + "▁grad": 3799, + "▁thro": 3800, + "▁bod": 3801, + "▁shoulder": 3802, + "▁meth": 3803, + "▁animals": 3804, + "▁noticed": 3805, + "ables": 3806, + "▁peculiar": 3807, + "▁fier": 3808, + "▁pot": 3809, + "▁quietly": 3810, + "▁cup": 3811, + "▁serious": 3812, + "▁tremb": 3813, + "▁generally": 3814, + "▁american": 3815, + "▁symp": 3816, + "ral": 3817, + "▁don": 3818, + "▁france": 3819, + "iction": 3820, + "▁property": 3821, + "▁shoulders": 3822, + "▁stranger": 3823, + "▁san": 3824, + "▁cow": 3825, + "▁what's": 3826, + "▁dust": 3827, + "▁affection": 3828, + "▁handsome": 3829, + "▁higher": 3830, + "iant": 3831, + "nday": 3832, + "▁wel": 3833, + "▁poet": 3834, + "▁sla": 3835, + "▁distinct": 3836, + "▁mam": 3837, + "▁pier": 3838, + "acing": 3839, + "ague": 3840, + "▁grown": 3841, + "uly": 3842, + "▁d'": 3843, + "▁chamber": 3844, + "▁desce": 3845, + "▁murm": 3846, + "stem": 3847, + "▁personal": 3848, + "▁fancy": 3849, + "▁offered": 3850, + "osite": 3851, + "onsie": 3852, + "▁built": 3853, + "▁edge": 3854, + "▁whispered": 3855, + "▁skin": 3856, + "▁pieces": 3857, + "itated": 3858, + "cher": 3859, + "osity": 3860, + "▁pit": 3861, + "▁contro": 3862, + "▁faces": 3863, + "▁spent": 3864, + "▁interrupt": 3865, + "how": 3866, + "isters": 3867, + "▁butter": 3868, + "▁develop": 3869, + "▁unk": 3870, + "hip": 3871, + "▁heat": 3872, + "▁fond": 3873, + "▁coat": 3874, + "▁touched": 3875, + "▁hol": 3876, + "ingu": 3877, + "▁pi": 3878, + "▁race": 3879, + "▁jump": 3880, + "▁surprised": 3881, + "oted": 3882, + "▁defe": 3883, + "enced": 3884, + "▁wasn't": 3885, + "▁wear": 3886, + "andon": 3887, + "▁fan": 3888, + "acher": 3889, + "▁arch": 3890, + "▁educ": 3891, + "▁brave": 3892, + "athered": 3893, + "▁eld": 3894, + "▁wealth": 3895, + "▁system": 3896, + "▁german": 3897, + "▁false": 3898, + "wood": 3899, + "▁dare": 3900, + "aked": 3901, + "▁cousin": 3902, + "▁fer": 3903, + "key": 3904, + "▁lin": 3905, + "▁intellect": 3906, + "▁prepared": 3907, + "▁fingers": 3908, + "▁surr": 3909, + "▁mountains": 3910, + "ipp": 3911, + "▁opportunity": 3912, + "aff": 3913, + "▁bare": 3914, + "▁dor": 3915, + "▁introdu": 3916, + "▁collect": 3917, + "▁lovely": 3918, + "▁rag": 3919, + "▁crown": 3920, + "▁matters": 3921, + "▁companion": 3922, + "▁weather": 3923, + "▁alar": 3924, + "▁innoc": 3925, + "▁ris": 3926, + "▁mix": 3927, + "▁lake": 3928, + "▁store": 3929, + "▁unh": 3930, + "▁meaning": 3931, + "▁memory": 3932, + "over": 3933, + "▁band": 3934, + "leep": 3935, + "▁finding": 3936, + "ee": 3937, + "▁charge": 3938, + "▁grat": 3939, + "▁attract": 3940, + "▁gray": 3941, + "▁quarter": 3942, + "▁avo": 3943, + "▁greatly": 3944, + "▁mach": 3945, + "▁inh": 3946, + "▁asleep": 3947, + "▁paris": 3948, + "▁dav": 3949, + "▁alto": 3950, + "▁offer": 3951, + "▁opposite": 3952, + "ounced": 3953, + "erve": 3954, + "▁breast": 3955, + "nown": 3956, + "▁reading": 3957, + "▁altogether": 3958, + "▁writing": 3959, + "pected": 3960, + "▁degree": 3961, + "cing": 3962, + "night": 3963, + "▁exec": 3964, + "fortun": 3965, + "▁stat": 3966, + "▁feelings": 3967, + "▁hath": 3968, + "▁cook": 3969, + "▁rail": 3970, + "▁honour": 3971, + "ding": 3972, + "▁fate": 3973, + "▁por": 3974, + "▁frank": 3975, + "▁meeting": 3976, + "▁rough": 3977, + "▁alive": 3978, + "▁hide": 3979, + "ites": 3980, + "ilar": 3981, + "▁blow": 3982, + "▁cruel": 3983, + "raph": 3984, + "▁hurt": 3985, + "▁loss": 3986, + "▁thrown": 3987, + "▁caused": 3988, + "▁we'll": 3989, + "▁serve": 3990, + "▁duke": 3991, + "▁bent": 3992, + "▁united": 3993, + "▁seek": 3994, + "▁kingdom": 3995, + "▁situation": 3996, + "▁empty": 3997, + "ners": 3998, + "▁due": 3999, + "▁liked": 4000, + "▁swift": 4001, + "▁opening": 4002, + "▁servants": 4003, + "chen": 4004, + "oura": 4005, + "▁gh": 4006, + "▁suspic": 4007, + "▁freed": 4008, + "ointed": 4009, + "▁surface": 4010, + "cil": 4011, + "▁questions": 4012, + "▁ess": 4013, + "▁curious": 4014, + "▁constit": 4015, + "▁accompan": 4016, + "▁christian": 4017, + "▁fill": 4018, + "arest": 4019, + "▁satisfied": 4020, + "ron": 4021, + "▁sides": 4022, + "▁pity": 4023, + "▁reve": 4024, + "▁equal": 4025, + "▁height": 4026, + "▁ordered": 4027, + "osop": 4028, + "▁grey": 4029, + "▁listened": 4030, + "pet": 4031, + "▁rejo": 4032, + "▁capt": 4033, + "ibility": 4034, + "ob": 4035, + "▁mart": 4036, + "▁happen": 4037, + "▁hurried": 4038, + "▁dollars": 4039, + "▁language": 4040, + "▁ange": 4041, + "▁yours": 4042, + "▁supposed": 4043, + "▁laughing": 4044, + "▁settled": 4045, + "▁rode": 4046, + "▁perm": 4047, + "▁distingu": 4048, + "▁hurry": 4049, + "▁destroy": 4050, + "▁talked": 4051, + "▁lifted": 4052, + "ocr": 4053, + "▁square": 4054, + "▁value": 4055, + "▁taste": 4056, + "▁vast": 4057, + "▁king's": 4058, + "▁rul": 4059, + "▁roof": 4060, + "▁telling": 4061, + "▁study": 4062, + "▁ow": 4063, + "▁pan": 4064, + "▁bas": 4065, + "▁rising": 4066, + "▁sufficient": 4067, + "▁forced": 4068, + "▁rise": 4069, + "▁attend": 4070, + "▁philosop": 4071, + "▁nose": 4072, + "▁sixty": 4073, + "hest": 4074, + "▁pin": 4075, + "▁egg": 4076, + "▁amb": 4077, + "▁fault": 4078, + "bur": 4079, + "▁station": 4080, + "▁distur": 4081, + "▁regular": 4082, + "ille": 4083, + "▁pack": 4084, + "▁special": 4085, + "▁honest": 4086, + "▁building": 4087, + "▁season": 4088, + "▁shape": 4089, + "▁pride": 4090, + "▁smiling": 4091, + "like": 4092, + "▁orders": 4093, + "yn": 4094, + "▁woods": 4095, + "▁accompl": 4096, + "con": 4097, + "▁sam": 4098, + "▁usually": 4099, + "▁watching": 4100, + "▁sacri": 4101, + "erved": 4102, + "▁passage": 4103, + "▁material": 4104, + "▁valley": 4105, + "yr": 4106, + "▁stairs": 4107, + "▁libert": 4108, + "▁frightened": 4109, + "▁remarked": 4110, + "▁tit": 4111, + "▁wed": 4112, + "▁mistress": 4113, + "▁directly": 4114, + "▁suffer": 4115, + "▁gloom": 4116, + "▁lines": 4117, + "▁stock": 4118, + "▁justice": 4119, + "▁diam": 4120, + "ested": 4121, + "▁growing": 4122, + "▁doesn't": 4123, + "▁gathered": 4124, + "▁ordinary": 4125, + "uce": 4126, + "▁eur": 4127, + "▁unf": 4128, + "▁kitchen": 4129, + "▁threat": 4130, + "▁depend": 4131, + "▁weeks": 4132, + "▁despair": 4133, + "▁method": 4134, + "▁seized": 4135, + "▁discuss": 4136, + "▁exer": 4137, + "ify": 4138, + "▁flower": 4139, + "▁ignor": 4140, + "eer": 4141, + "ades": 4142, + "▁deb": 4143, + "eping": 4144, + "▁ale": 4145, + "▁yo": 4146, + "chief": 4147, + "▁supper": 4148, + "ik": 4149, + "▁bold": 4150, + "▁putting": 4151, + "▁nearer": 4152, + "uses": 4153, + "▁one's": 4154, + "▁ble": 4155, + "▁york": 4156, + "▁ende": 4157, + "▁affairs": 4158, + "▁soldier": 4159, + "▁contrary": 4160, + "▁moving": 4161, + "▁streets": 4162, + "▁bir": 4163, + "rance": 4164, + "hens": 4165, + "▁cit": 4166, + "icated": 4167, + "▁catch": 4168, + "▁imagine": 4169, + "eds": 4170, + "▁march": 4171, + "▁search": 4172, + "ara": 4173, + "▁receive": 4174, + "imate": 4175, + "▁monsie": 4176, + "▁twice": 4177, + "▁papa": 4178, + "▁monsieur": 4179, + "▁reck": 4180, + "min": 4181, + "ude": 4182, + "▁process": 4183, + "▁hole": 4184, + "aly": 4185, + "lin": 4186, + "▁cro": 4187, + "▁favour": 4188, + "▁dign": 4189, + "▁working": 4190, + "▁harm": 4191, + "▁europe": 4192, + "antic": 4193, + "▁proved": 4194, + "ocked": 4195, + "▁prove": 4196, + "▁cler": 4197, + "▁lod": 4198, + "ception": 4199, + "▁pulled": 4200, + "▁arth": 4201, + "▁authority": 4202, + "▁haven": 4203, + "▁jer": 4204, + "▁uns": 4205, + "▁movement": 4206, + "usted": 4207, + "▁engaged": 4208, + "▁brothers": 4209, + "▁advantage": 4210, + "lished": 4211, + "ole": 4212, + "▁arthur": 4213, + "▁aut": 4214, + "▁stones": 4215, + "▁farm": 4216, + "▁difference": 4217, + "▁fart": 4218, + "▁aside": 4219, + "▁mas": 4220, + "▁observ": 4221, + "▁hence": 4222, + "▁possession": 4223, + "▁hills": 4224, + "▁fortun": 4225, + "uls": 4226, + "ails": 4227, + "▁instance": 4228, + "▁she's": 4229, + "▁ol": 4230, + "▁holy": 4231, + "▁flew": 4232, + "ky": 4233, + "▁color": 4234, + "▁rate": 4235, + "▁doors": 4236, + "▁busy": 4237, + "set": 4238, + "▁address": 4239, + "▁familiar": 4240, + "▁weight": 4241, + "▁aware": 4242, + "▁played": 4243, + "▁sympath": 4244, + "lls": 4245, + "▁solemn": 4246, + "▁liter": 4247, + "▁test": 4248, + "▁emper": 4249, + "▁indian": 4250, + "▁distant": 4251, + "▁interesting": 4252, + "▁bull": 4253, + "▁thorough": 4254, + "▁wore": 4255, + "▁worked": 4256, + "▁explained": 4257, + "▁excellent": 4258, + "▁splendid": 4259, + "▁tongue": 4260, + "▁di": 4261, + "▁pard": 4262, + "▁named": 4263, + "▁shame": 4264, + "▁franc": 4265, + "▁spect": 4266, + "▁moments": 4267, + "bers": 4268, + "▁wil": 4269, + "▁myster": 4270, + "▁seated": 4271, + "▁instantly": 4272, + "▁similar": 4273, + "▁endeav": 4274, + "▁measure": 4275, + "▁naturally": 4276, + "nds": 4277, + "▁suf": 4278, + "▁amount": 4279, + "▁imper": 4280, + "▁dogs": 4281, + "itable": 4282, + "▁brit": 4283, + "▁necessity": 4284, + "rid": 4285, + "ulous": 4286, + "▁confidence": 4287, + "den": 4288, + "▁parent": 4289, + "▁wid": 4290, + "▁vir": 4291, + "▁neverthe": 4292, + "▁agreed": 4293, + "▁nevertheless": 4294, + "unch": 4295, + "▁hearing": 4296, + "▁takes": 4297, + "▁aug": 4298, + "▁univers": 4299, + "enance": 4300, + "▁unw": 4301, + "▁earl": 4302, + "▁keeping": 4303, + "▁drive": 4304, + "▁produced": 4305, + "▁aud": 4306, + "on's": 4307, + "▁names": 4308, + "agn": 4309, + "▁disappeared": 4310, + "▁throw": 4311, + "▁president": 4312, + "▁gods": 4313, + "▁magic": 4314, + "▁represent": 4315, + "▁unknown": 4316, + "por": 4317, + "▁terror": 4318, + "▁haven't": 4319, + "asc": 4320, + "▁support": 4321, + "▁smoke": 4322, + "▁wicked": 4323, + "ker": 4324, + "▁works": 4325, + "▁artic": 4326, + "▁dull": 4327, + "▁yester": 4328, + "▁falling": 4329, + "▁worthy": 4330, + "▁liberty": 4331, + "ulation": 4332, + "▁design": 4333, + "▁wants": 4334, + "▁evidence": 4335, + "▁companions": 4336, + "▁spirits": 4337, + "▁coast": 4338, + "▁mighty": 4339, + "▁particularly": 4340, + "▁witness": 4341, + "▁discover": 4342, + "▁sought": 4343, + "▁span": 4344, + "'ve": 4345, + "▁rare": 4346, + "▁officers": 4347, + "lv": 4348, + "zy": 4349, + "▁yesterday": 4350, + "vey": 4351, + "cent": 4352, + "▁powers": 4353, + "▁yield": 4354, + "▁cool": 4355, + "▁organ": 4356, + "▁amaz": 4357, + "▁pointed": 4358, + "ford": 4359, + "▁claim": 4360, + "▁content": 4361, + "▁possibly": 4362, + "▁terms": 4363, + "▁trium": 4364, + "▁officer": 4365, + "▁persu": 4366, + "▁ceased": 4367, + "▁drove": 4368, + "▁occurred": 4369, + "▁gree": 4370, + "▁lies": 4371, + "▁otherwise": 4372, + "▁emperor": 4373, + "▁hom": 4374, + "▁stars": 4375, + "▁knees": 4376, + "▁triumph": 4377, + "ruction": 4378, + "▁paused": 4379, + "oms": 4380, + "▁required": 4381, + "▁failed": 4382, + "▁unhapp": 4383, + "▁diamond": 4384, + "▁rat": 4385, + "▁ali": 4386, + "▁double": 4387, + "▁forms": 4388, + "▁gives": 4389, + "▁finger": 4390, + "race": 4391, + "▁pair": 4392, + "alous": 4393, + "illa": 4394, + "▁bob": 4395, + "▁eliz": 4396, + "▁travel": 4397, + "▁carrying": 4398, + "▁gle": 4399, + "iles": 4400, + "▁teeth": 4401, + "esh": 4402, + "▁shown": 4403, + "▁fruit": 4404, + "▁waters": 4405, + "▁entertain": 4406, + "▁hearts": 4407, + "umn": 4408, + "▁labor": 4409, + "in't": 4410, + "▁pill": 4411, + "▁ener": 4412, + "soci": 4413, + "▁example": 4414, + "▁upper": 4415, + "▁foreign": 4416, + "▁moral": 4417, + "▁softly": 4418, + "rose": 4419, + "▁huge": 4420, + "▁charles": 4421, + "▁priest": 4422, + "▁excit": 4423, + "▁fet": 4424, + "▁mother's": 4425, + "▁possessed": 4426, + "▁cases": 4427, + "▁report": 4428, + "▁milk": 4429, + "▁affair": 4430, + "▁principle": 4431, + "▁inhab": 4432, + "▁freedom": 4433, + "▁proof": 4434, + "▁intended": 4435, + "▁satisfaction": 4436, + "▁shouted": 4437, + "isc": 4438, + "▁plat": 4439, + "▁bask": 4440, + "ental": 4441, + "▁group": 4442, + "▁farther": 4443, + "asm": 4444, + "▁unfortun": 4445, + "▁unto": 4446, + "▁singing": 4447, + "▁arrange": 4448, + "▁religion": 4449, + "▁ber": 4450, + "▁rocks": 4451, + "▁seventeen": 4452, + "▁der": 4453, + "▁james": 4454, + "▁buy": 4455, + "▁succeeded": 4456, + "▁rooms": 4457, + "▁leading": 4458, + "▁majesty": 4459, + "▁events": 4460, + "▁dance": 4461, + "▁paint": 4462, + "▁gently": 4463, + "acle": 4464, + "▁tele": 4465, + "▁pardon": 4466, + "using": 4467, + "▁drop": 4468, + "father": 4469, + "▁invent": 4470, + "▁key": 4471, + "▁mentioned": 4472, + "▁seventy": 4473, + "▁ros": 4474, + "▁suffering": 4475, + "▁record": 4476, + "▁cabin": 4477, + "road": 4478, + "▁diss": 4479, + "ival": 4480, + "▁demanded": 4481, + "▁excitement": 4482, + "▁associ": 4483, + "▁progress": 4484, + "angers": 4485, + "▁curi": 4486, + "▁america": 4487, + "▁rule": 4488, + "▁bor": 4489, + "▁vig": 4490, + "lessly": 4491, + "▁clearly": 4492, + "▁bore": 4493, + "▁sheep": 4494, + "▁regret": 4495, + "▁neighbour": 4496, + "bly": 4497, + "iance": 4498, + "▁instinct": 4499, + "▁advice": 4500, + "▁awful": 4501, + "▁sen": 4502, + "▁fully": 4503, + "▁gather": 4504, + "▁papers": 4505, + "▁hidden": 4506, + "▁chest": 4507, + "▁birth": 4508, + "hy": 4509, + "pap": 4510, + "▁hither": 4511, + "▁stuff": 4512, + "▁impat": 4513, + "▁calling": 4514, + "▁fourth": 4515, + "▁dreadful": 4516, + "▁pos": 4517, + "▁grief": 4518, + "▁brill": 4519, + "▁powerful": 4520, + "▁presented": 4521, + "▁fairy": 4522, + "▁explain": 4523, + "▁shoot": 4524, + "▁prisoner": 4525, + "▁joined": 4526, + "▁afford": 4527, + "mond": 4528, + "attered": 4529, + "▁ing": 4530, + "iments": 4531, + "▁shel": 4532, + "▁prefer": 4533, + "▁considerable": 4534, + "▁obey": 4535, + "▁voices": 4536, + "▁interv": 4537, + "▁interested": 4538, + "▁virg": 4539, + "▁cred": 4540, + "▁card": 4541, + "▁ep": 4542, + "▁needed": 4543, + "▁pounds": 4544, + "▁conqu": 4545, + "▁clever": 4546, + "▁advanced": 4547, + "▁cord": 4548, + "ighed": 4549, + "▁undert": 4550, + "▁resolved": 4551, + "▁wag": 4552, + "istic": 4553, + "▁paul": 4554, + "▁excited": 4555, + "▁conditions": 4556, + "▁pictures": 4557, + "acious": 4558, + "▁shining": 4559, + "▁sunday": 4560, + "▁served": 4561, + "▁steam": 4562, + "▁police": 4563, + "▁sprang": 4564, + "sie": 4565, + "ora": 4566, + "ese": 4567, + "▁jes": 4568, + "▁nodd": 4569, + "▁salt": 4570, + "▁fields": 4571, + "▁cart": 4572, + "▁indians": 4573, + "▁fierce": 4574, + "dle": 4575, + "▁ride": 4576, + "▁desired": 4577, + "▁edward": 4578, + "▁importance": 4579, + "▁information": 4580, + "ture": 4581, + "▁hosp": 4582, + "▁memb": 4583, + "▁perceived": 4584, + "▁yard": 4585, + "▁crit": 4586, + "ternal": 4587, + "▁task": 4588, + "▁fold": 4589, + "rant": 4590, + "▁sooner": 4591, + "▁merch": 4592, + "▁absolutely": 4593, + "▁citiz": 4594, + "▁suffered": 4595, + "▁tight": 4596, + "▁dur": 4597, + "▁iss": 4598, + "illy": 4599, + "▁log": 4600, + "▁completely": 4601, + "hold": 4602, + "▁rad": 4603, + "▁share": 4604, + "▁willing": 4605, + "▁devil": 4606, + "▁ships": 4607, + "▁imagination": 4608, + "▁superior": 4609, + "com": 4610, + "ams": 4611, + "▁anybody": 4612, + "▁env": 4613, + "▁appl": 4614, + "▁drag": 4615, + "▁dawn": 4616, + "asped": 4617, + "▁occupied": 4618, + "▁curiosity": 4619, + "iest": 4620, + "▁sigh": 4621, + "▁fox": 4622, + "asant": 4623, + "▁myst": 4624, + "▁stead": 4625, + "ett": 4626, + "▁couple": 4627, + "▁type": 4628, + "▁extraord": 4629, + "▁apparently": 4630, + "▁welcome": 4631, + "▁daily": 4632, + "▁modern": 4633, + "iot": 4634, + "▁ain't": 4635, + "▁dying": 4636, + "llen": 4637, + "▁feat": 4638, + "▁accident": 4639, + "▁countenance": 4640, + "▁abandon": 4641, + "ortion": 4642, + "▁lock": 4643, + "▁crime": 4644, + "pir": 4645, + "▁mult": 4646, + "▁alas": 4647, + "▁refused": 4648, + "▁hate": 4649, + "▁dw": 4650, + "▁whenever": 4651, + "▁thanks": 4652, + "▁slave": 4653, + "▁regarded": 4654, + "▁suggested": 4655, + "ulf": 4656, + "▁actually": 4657, + "gment": 4658, + "▁size": 4659, + "reg": 4660, + "▁cult": 4661, + "▁kat": 4662, + "▁bodies": 4663, + "hus": 4664, + "▁bay": 4665, + "▁truly": 4666, + "▁flesh": 4667, + "ishop": 4668, + "▁smith": 4669, + "▁betr": 4670, + "with": 4671, + "▁wet": 4672, + "▁rapidly": 4673, + "gers": 4674, + "▁odd": 4675, + "asons": 4676, + "ette": 4677, + "▁club": 4678, + "abel": 4679, + "▁horror": 4680, + "▁mile": 4681, + "▁flight": 4682, + "▁crossed": 4683, + "▁professor": 4684, + "▁oce": 4685, + "▁worst": 4686, + "ization": 4687, + "▁rushed": 4688, + "▁science": 4689, + "▁brief": 4690, + "▁stepped": 4691, + "▁midst": 4692, + "ha": 4693, + "▁sour": 4694, + "▁maint": 4695, + "▁brain": 4696, + "▁cottage": 4697, + "▁expressed": 4698, + "▁equally": 4699, + "▁education": 4700, + "▁august": 4701, + "▁buck": 4702, + "▁nay": 4703, + "ids": 4704, + "▁tempt": 4705, + "▁inquir": 4706, + "▁foolish": 4707, + "▁taught": 4708, + "▁cop": 4709, + "▁dun": 4710, + "▁picked": 4711, + "▁elsie": 4712, + "▁lands": 4713, + "▁driven": 4714, + "▁political": 4715, + "mas": 4716, + "▁deck": 4717, + "▁resist": 4718, + "▁instr": 4719, + "▁bon": 4720, + "▁ken": 4721, + "ips": 4722, + "▁hotel": 4723, + "▁dangerous": 4724, + "ially": 4725, + "now": 4726, + "▁dozen": 4727, + "▁trade": 4728, + "▁points": 4729, + "▁ninet": 4730, + "ability": 4731, + "▁crim": 4732, + "▁relations": 4733, + "▁interp": 4734, + "▁barb": 4735, + "▁delighted": 4736, + "▁members": 4737, + "▁sisters": 4738, + "▁sty": 4739, + "▁anger": 4740, + "▁belief": 4741, + "▁asking": 4742, + "▁meat": 4743, + "▁displ": 4744, + "▁relief": 4745, + "ification": 4746, + "▁hunting": 4747, + "▁alex": 4748, + "aries": 4749, + "▁obst": 4750, + "▁behold": 4751, + "▁mistake": 4752, + "▁inquired": 4753, + "▁remarkable": 4754, + "▁origin": 4755, + "cked": 4756, + "▁nerv": 4757, + "acks": 4758, + "vert": 4759, + "rop": 4760, + "▁careful": 4761, + "▁wounded": 4762, + "ading": 4763, + "▁cere": 4764, + "▁enemies": 4765, + "▁gradually": 4766, + "▁interrupted": 4767, + "▁fis": 4768, + "▁stup": 4769, + "▁severe": 4770, + "▁keen": 4771, + "▁sixteen": 4772, + "kins": 4773, + "resp": 4774, + "▁worn": 4775, + "▁flour": 4776, + "▁sylv": 4777, + "▁control": 4778, + "kin": 4779, + "▁lone": 4780, + "asing": 4781, + "▁nap": 4782, + "▁assert": 4783, + "▁depth": 4784, + "▁kindly": 4785, + "▁murder": 4786, + "acity": 4787, + "▁eleven": 4788, + "▁invol": 4789, + "▁d'art": 4790, + "▁wings": 4791, + "▁oak": 4792, + "▁et": 4793, + "▁begun": 4794, + "▁dreams": 4795, + "while": 4796, + "▁moreover": 4797, + "▁exped": 4798, + "▁independ": 4799, + "▁buried": 4800, + "▁approached": 4801, + "agnan": 4802, + "▁d'artagnan": 4803, + "▁sex": 4804, + "▁saved": 4805, + "▁harry": 4806, + "▁physical": 4807, + "▁species": 4808, + "cer": 4809, + "oe": 4810, + "▁glory": 4811, + "▁creatures": 4812, + "▁newspap": 4813, + "▁sang": 4814, + "▁plenty": 4815, + "▁useful": 4816, + "▁shoes": 4817, + "▁hoped": 4818, + "▁frequently": 4819, + "▁saf": 4820, + "▁distr": 4821, + "▁princip": 4822, + "▁pu": 4823, + "y's": 4824, + "aunt": 4825, + "▁lover": 4826, + "▁famous": 4827, + "▁recollect": 4828, + "▁nur": 4829, + "▁grim": 4830, + "▁indif": 4831, + "▁charming": 4832, + "▁aim": 4833, + "▁loose": 4834, + "▁consciousness": 4835, + "▁mamma": 4836, + "▁enthus": 4837, + "▁slept": 4838, + "▁smooth": 4839, + "▁fighting": 4840, + "▁hyp": 4841, + "▁enthusi": 4842, + "▁dig": 4843, + "aling": 4844, + "▁stage": 4845, + "▁anyone": 4846, + "▁thrust": 4847, + "▁desper": 4848, + "▁tar": 4849, + "▁lamp": 4850, + "stone": 4851, + "▁stern": 4852, + "▁evident": 4853, + "▁meanwhile": 4854, + "▁forgive": 4855, + "▁accepted": 4856, + "▁ocean": 4857, + "▁tot": 4858, + "▁they're": 4859, + "▁wondered": 4860, + "▁playing": 4861, + "▁detect": 4862, + "▁hale": 4863, + "▁knife": 4864, + "ailed": 4865, + "▁closely": 4866, + "▁meas": 4867, + "▁proceeded": 4868, + "▁message": 4869, + "▁mour": 4870, + "▁fac": 4871, + "▁union": 4872, + "ustomed": 4873, + "hem": 4874, + "aming": 4875, + "▁exceed": 4876, + "▁feather": 4877, + "▁precious": 4878, + "▁century": 4879, + "▁unex": 4880, + "▁park": 4881, + "ication": 4882, + "▁everywhere": 4883, + "▁minds": 4884, + "▁extraordinary": 4885, + "▁arose": 4886, + "▁entrance": 4887, + "▁capital": 4888, + "▁recall": 4889, + "▁burning": 4890, + "▁magnific": 4891, + "oes": 4892, + "orious": 4893, + "stand": 4894, + "▁assemb": 4895, + "▁plant": 4896, + "▁neighbor": 4897, + "▁lest": 4898, + "uments": 4899, + "▁colle": 4900, + "▁virtue": 4901, + "▁bew": 4902, + "▁forb": 4903, + "▁retreat": 4904, + "▁capable": 4905, + "▁assured": 4906, + "▁constant": 4907, + "▁governor": 4908, + "▁increased": 4909, + "▁horn": 4910, + "▁removed": 4911, + "▁facts": 4912, + "▁absence": 4913, + "▁explan": 4914, + "▁ack": 4915, + "▁somebody": 4916, + "▁awa": 4917, + "▁admit": 4918, + "▁correct": 4919, + "▁forgot": 4920, + "▁jealous": 4921, + "▁kissed": 4922, + "▁popular": 4923, + "▁hut": 4924, + "▁ug": 4925, + "pelled": 4926, + "▁grant": 4927, + "▁friendship": 4928, + "▁indign": 4929, + "▁sympathy": 4930, + "iable": 4931, + "erous": 4932, + "▁thom": 4933, + "▁alice": 4934, + "▁level": 4935, + "▁objects": 4936, + "▁pressed": 4937, + "▁sha": 4938, + "room": 4939, + "▁qual": 4940, + "▁begged": 4941, + "▁emp": 4942, + "▁hind": 4943, + "▁highest": 4944, + "▁clouds": 4945, + "▁ghost": 4946, + "▁acknow": 4947, + "oused": 4948, + "▁strike": 4949, + "▁wishes": 4950, + "▁becomes": 4951, + "▁trembling": 4952, + "▁nob": 4953, + "▁kindness": 4954, + "▁accordingly": 4955, + "▁throat": 4956, + "ration": 4957, + "▁fare": 4958, + "▁we're": 4959, + "▁stretched": 4960, + "▁frag": 4961, + "▁wheel": 4962, + "▁queer": 4963, + "▁grandfather": 4964, + "for": 4965, + "▁choose": 4966, + "▁helen": 4967, + "▁eighty": 4968, + "▁ly": 4969, + "▁miserable": 4970, + "▁contempt": 4971, + "igned": 4972, + "▁military": 4973, + "▁russ": 4974, + "▁basket": 4975, + "▁ahead": 4976, + "oops": 4977, + "ivered": 4978, + "▁listening": 4979, + "▁fro": 4980, + "▁larger": 4981, + "▁divine": 4982, + "iber": 4983, + "▁stories": 4984, + "anches": 4985, + "ushing": 4986, + "izing": 4987, + "▁treasure": 4988, + "▁excuse": 4989, + "▁innocent": 4990, + "▁aid": 4991, + "▁remind": 4992, + "▁slaves": 4993, + "rit": 4994, + "stairs": 4995, + "▁reward": 4996, + "ograph": 4997, + "▁manage": 4998, + "▁dish": 4999, + "▁throughout": 5000, + "▁waves": 5001, + "▁judgment": 5002, + "▁arrival": 5003, + "▁choice": 5004, + "▁unhappy": 5005, + "astic": 5006, + "▁blank": 5007, + "▁advance": 5008, + "▁informed": 5009, + "▁acquaintance": 5010, + "▁impression": 5011, + "▁mysterious": 5012, + "bb": 5013, + "▁ara": 5014, + "▁notes": 5015, + "▁hadn't": 5016, + "▁sell": 5017, + "▁comr": 5018, + "▁impl": 5019, + "▁indust": 5020, + "▁ended": 5021, + "▁lights": 5022, + "▁nurse": 5023, + "▁sout": 5024, + "▁bought": 5025, + "▁fred": 5026, + "▁marked": 5027, + "▁scream": 5028, + "mend": 5029, + "▁uneas": 5030, + "▁delicate": 5031, + "▁weary": 5032, + "estic": 5033, + "▁prompt": 5034, + "▁experi": 5035, + "▁hungry": 5036, + "▁flying": 5037, + "▁pow": 5038, + "▁bridge": 5039, + "▁join": 5040, + "▁visible": 5041, + "▁understanding": 5042, + "▁crying": 5043, + "▁avoid": 5044, + "▁tis": 5045, + "▁stiff": 5046, + "aches": 5047, + "▁restr": 5048, + "▁sounds": 5049, + "▁bowed": 5050, + "▁caut": 5051, + "▁goods": 5052, + "▁david": 5053, + "▁unable": 5054, + "▁you'd": 5055, + "hamed": 5056, + "▁bos": 5057, + "eral": 5058, + "▁ashamed": 5059, + "▁somewhere": 5060, + "▁infinite": 5061, + "ocks": 5062, + "▁dignity": 5063, + "▁gay": 5064, + "▁vic": 5065, + "▁amid": 5066, + "▁hollow": 5067, + "▁emotion": 5068, + "▁admitted": 5069, + "▁parents": 5070, + "▁wra": 5071, + "▁hint": 5072, + "▁temple": 5073, + "▁comfortable": 5074, + "▁intelligence": 5075, + "orous": 5076, + "▁bearing": 5077, + "▁hers": 5078, + "abeth": 5079, + "▁remains": 5080, + "▁contem": 5081, + "▁settle": 5082, + "▁immense": 5083, + "ffe": 5084, + "pher": 5085, + "▁cher": 5086, + "ldom": 5087, + "▁weap": 5088, + "ulated": 5089, + "▁lighted": 5090, + "gypt": 5091, + "▁adventure": 5092, + "▁thoroughly": 5093, + "▁egypt": 5094, + "ilst": 5095, + "anges": 5096, + "▁obt": 5097, + "▁friendly": 5098, + "▁reckon": 5099, + "▁stupid": 5100, + "▁fed": 5101, + "▁rome": 5102, + "▁meal": 5103, + "▁intention": 5104, + "▁returning": 5105, + "▁convin": 5106, + "▁coo": 5107, + "lection": 5108, + "▁ash": 5109, + "achel": 5110, + "▁rope": 5111, + "▁price": 5112, + "▁project": 5113, + "elt": 5114, + "rows": 5115, + "▁secure": 5116, + "▁escaped": 5117, + "▁hopes": 5118, + "▁elizabeth": 5119, + "▁safety": 5120, + "▁wound": 5121, + "▁sup": 5122, + "▁unus": 5123, + "onscious": 5124, + "▁horri": 5125, + "▁minister": 5126, + "▁ox": 5127, + "lla": 5128, + "ensive": 5129, + "▁helped": 5130, + "▁plainly": 5131, + "▁seldom": 5132, + "▁thinks": 5133, + "▁fellows": 5134, + "▁mood": 5135, + "▁pushed": 5136, + "▁exhib": 5137, + "inging": 5138, + "▁thunder": 5139, + "aud": 5140, + "iana": 5141, + "▁fairly": 5142, + "▁elder": 5143, + "▁eggs": 5144, + "irm": 5145, + "▁maiden": 5146, + "mother": 5147, + "▁appears": 5148, + "▁cheeks": 5149, + "▁won": 5150, + "▁ease": 5151, + "▁redu": 5152, + "▁skill": 5153, + "▁extent": 5154, + "▁practice": 5155, + "▁religious": 5156, + "▁becoming": 5157, + "▁virgin": 5158, + "▁features": 5159, + "▁tied": 5160, + "▁whence": 5161, + "▁somehow": 5162, + "▁greet": 5163, + "▁faithful": 5164, + "▁concerned": 5165, + "▁theat": 5166, + "▁bishop": 5167, + "▁pink": 5168, + "▁eagerly": 5169, + "rees": 5170, + "▁eating": 5171, + "▁waste": 5172, + "▁rank": 5173, + "▁fem": 5174, + "▁bride": 5175, + "▁unl": 5176, + "otted": 5177, + "ceiving": 5178, + "▁trib": 5179, + "▁original": 5180, + "▁concerning": 5181, + "▁hab": 5182, + "▁accustomed": 5183, + "▁patient": 5184, + "▁recom": 5185, + "▁cell": 5186, + "ointment": 5187, + "▁arranged": 5188, + "ville": 5189, + "iture": 5190, + "▁wholly": 5191, + "▁older": 5192, + "▁colour": 5193, + "▁provided": 5194, + "▁ate": 5195, + "▁partly": 5196, + "▁mont": 5197, + "ology": 5198, + "▁prospect": 5199, + "▁ceremon": 5200, + "▁ze": 5201, + "▁laughter": 5202, + "▁fee": 5203, + "▁branches": 5204, + "▁fled": 5205, + "right": 5206, + "▁whilst": 5207, + "▁slipped": 5208, + "▁violent": 5209, + "▁inhabit": 5210, + "▁sons": 5211, + "▁engage": 5212, + "▁uncom": 5213, + "▁deeply": 5214, + "▁substance": 5215, + "▁tale": 5216, + "▁tiny": 5217, + "▁dan": 5218, + "▁ga": 5219, + "▁bee": 5220, + "▁yards": 5221, + "icks": 5222, + "▁hastily": 5223, + "held": 5224, + "▁wes": 5225, + "▁vague": 5226, + "▁amuse": 5227, + "▁mud": 5228, + "▁wolf": 5229, + "▁hans": 5230, + "illing": 5231, + "▁supply": 5232, + "▁silk": 5233, + "▁constantly": 5234, + "▁christmas": 5235, + "▁million": 5236, + "▁whisper": 5237, + "▁mental": 5238, + "▁washing": 5239, + "verse": 5240, + "▁cloth": 5241, + "▁baron": 5242, + "▁corresp": 5243, + "▁nodded": 5244, + "▁correspond": 5245, + "ka": 5246, + "▁hell": 5247, + "▁gain": 5248, + "▁rust": 5249, + "▁obtain": 5250, + "▁unconscious": 5251, + "▁struggle": 5252, + "▁established": 5253, + "▁lawy": 5254, + "ols": 5255, + "▁signs": 5256, + "▁uttered": 5257, + "▁roman": 5258, + "▁constitution": 5259, + "pes": 5260, + "▁cave": 5261, + "▁spare": 5262, + "▁quant": 5263, + "▁image": 5264, + "▁merry": 5265, + "▁treated": 5266, + "▁efforts": 5267, + "▁lonely": 5268, + "rated": 5269, + "▁nut": 5270, + "▁glanced": 5271, + "▁portion": 5272, + "itor": 5273, + "▁resemb": 5274, + "▁withd": 5275, + "▁mead": 5276, + "▁feast": 5277, + "▁prim": 5278, + "▁cliff": 5279, + "▁emer": 5280, + "▁proportion": 5281, + "▁consideration": 5282, + "▁haste": 5283, + "▁gaze": 5284, + "▁savage": 5285, + "▁crew": 5286, + "▁tower": 5287, + "▁lack": 5288, + "▁conscience": 5289, + "▁mercy": 5290, + "▁exha": 5291, + "▁consent": 5292, + "ators": 5293, + "urd": 5294, + "▁outl": 5295, + "▁clo": 5296, + "▁adop": 5297, + "▁amongst": 5298, + "▁hanging": 5299, + "▁circle": 5300, + "▁prepar": 5301, + "▁brilliant": 5302, + "fl": 5303, + "▁gained": 5304, + "▁row": 5305, + "▁troops": 5306, + "▁repro": 5307, + "▁ming": 5308, + "oul": 5309, + "▁dared": 5310, + "▁lion": 5311, + "▁joe": 5312, + "▁winds": 5313, + "▁bringing": 5314, + "▁anxiety": 5315, + "▁billy": 5316, + "▁consequence": 5317, + "fice": 5318, + "pse": 5319, + "▁fought": 5320, + "▁pred": 5321, + "▁scra": 5322, + "▁glim": 5323, + "▁victory": 5324, + "ped": 5325, + "▁rab": 5326, + "▁scot": 5327, + "▁obv": 5328, + "▁shock": 5329, + "chan": 5330, + "▁knock": 5331, + "ourse": 5332, + "▁handed": 5333, + "▁indul": 5334, + "▁patience": 5335, + "▁souther": 5336, + "▁jose": 5337, + "▁fever": 5338, + "▁rolled": 5339, + "icted": 5340, + "▁setting": 5341, + "▁profession": 5342, + "▁sylvia": 5343, + "▁hun": 5344, + "utions": 5345, + "▁feared": 5346, + "▁brand": 5347, + "▁boots": 5348, + "▁forehead": 5349, + "▁principles": 5350, + "▁sink": 5351, + "▁rig": 5352, + "aval": 5353, + "▁purch": 5354, + "▁gazed": 5355, + "▁employed": 5356, + "▁murmured": 5357, + "more": 5358, + "▁sar": 5359, + "ashing": 5360, + "ural": 5361, + "acles": 5362, + "▁trad": 5363, + "▁active": 5364, + "▁benef": 5365, + "▁bottle": 5366, + "▁rage": 5367, + "▁invest": 5368, + "▁lux": 5369, + "▁sank": 5370, + "▁hang": 5371, + "▁beard": 5372, + "ential": 5373, + "▁loving": 5374, + "▁native": 5375, + "▁instruct": 5376, + "▁waist": 5377, + "▁relation": 5378, + "▁discovery": 5379, + "▁melan": 5380, + "▁nervous": 5381, + "▁obtained": 5382, + "▁pig": 5383, + "▁sear": 5384, + "▁flag": 5385, + "▁trail": 5386, + "▁distinguished": 5387, + "▁stared": 5388, + "▁misery": 5389, + "▁print": 5390, + "▁guil": 5391, + "▁jumped": 5392, + "▁swim": 5393, + "▁approaching": 5394, + "▁suspicion": 5395, + "▁iv": 5396, + "▁managed": 5397, + "aker": 5398, + "▁teach": 5399, + "▁match": 5400, + "▁guilty": 5401, + "▁wretched": 5402, + "▁rum": 5403, + "▁compar": 5404, + "▁theory": 5405, + "▁sher": 5406, + "▁bree": 5407, + "▁kings": 5408, + "▁shone": 5409, + "atherine": 5410, + "▁throne": 5411, + "▁showing": 5412, + "aws": 5413, + "▁robin": 5414, + "▁embar": 5415, + "utation": 5416, + "▁woman's": 5417, + "▁addressed": 5418, + "▁protest": 5419, + "▁admiration": 5420, + "▁troubled": 5421, + "▁ugly": 5422, + "oom": 5423, + "erves": 5424, + "▁flung": 5425, + "▁subs": 5426, + "▁relie": 5427, + "▁thousands": 5428, + "nce": 5429, + "▁od": 5430, + "▁current": 5431, + "▁wooden": 5432, + "▁sacrifice": 5433, + "urity": 5434, + "cip": 5435, + "▁pear": 5436, + "▁farmer": 5437, + "▁needs": 5438, + "▁condem": 5439, + "▁member": 5440, + "▁bade": 5441, + "▁dancing": 5442, + "▁reasons": 5443, + "▁consult": 5444, + "▁swall": 5445, + "▁shadows": 5446, + "▁angel": 5447, + "▁nineteen": 5448, + "▁style": 5449, + "field": 5450, + "▁lan": 5451, + "▁manif": 5452, + "▁robert": 5453, + "▁grate": 5454, + "▁engine": 5455, + "▁wisdom": 5456, + "▁jesus": 5457, + "▁convent": 5458, + "▁preced": 5459, + "▁interests": 5460, + "▁trial": 5461, + "bor": 5462, + "iven": 5463, + "▁nest": 5464, + "▁exch": 5465, + "▁voy": 5466, + "▁illust": 5467, + "▁worship": 5468, + "▁adam": 5469, + "▁phr": 5470, + "▁principal": 5471, + "▁hit": 5472, + "▁spend": 5473, + "▁stands": 5474, + "▁respons": 5475, + "▁ay": 5476, + "▁haw": 5477, + "▁whist": 5478, + "▁arrest": 5479, + "▁kinds": 5480, + "▁require": 5481, + "▁described": 5482, + "▁lit": 5483, + "▁precise": 5484, + "▁proposed": 5485, + "▁produce": 5486, + "▁utterly": 5487, + "ulse": 5488, + "▁novel": 5489, + "▁blame": 5490, + "▁credit": 5491, + "▁pause": 5492, + "osen": 5493, + "▁household": 5494, + "▁armed": 5495, + "▁follows": 5496, + "upon": 5497, + "▁approach": 5498, + "▁ninety": 5499, + "▁pir": 5500, + "▁flore": 5501, + "ivity": 5502, + "▁refuse": 5503, + "▁sensible": 5504, + "choly": 5505, + "▁national": 5506, + "▁grie": 5507, + "▁reven": 5508, + "▁let's": 5509, + "▁delightful": 5510, + "▁extremely": 5511, + "▁melancholy": 5512, + "uing": 5513, + "▁enorm": 5514, + "cles": 5515, + "▁slightly": 5516, + "▁sacred": 5517, + "▁recognized": 5518, + "▁mystery": 5519, + "▁gri": 5520, + "▁compre": 5521, + "▁distress": 5522, + "▁warri": 5523, + "▁useless": 5524, + "▁trif": 5525, + "▁mounted": 5526, + "▁philip": 5527, + "▁energy": 5528, + "▁explanation": 5529, + "▁cas": 5530, + "atory": 5531, + "▁pour": 5532, + "▁ric": 5533, + "▁chosen": 5534, + "▁everyone": 5535, + "umbled": 5536, + "▁apr": 5537, + "▁cam": 5538, + "▁proc": 5539, + "▁resumed": 5540, + "▁appreci": 5541, + "▁alexand": 5542, + "▁aven": 5543, + "▁wing": 5544, + "▁intense": 5545, + "▁highly": 5546, + "▁lucy": 5547, + "▁solid": 5548, + "▁departure": 5549, + "▁agreeable": 5550, + "▁exercise": 5551, + "apped": 5552, + "▁ward": 5553, + "▁bud": 5554, + "▁dwell": 5555, + "icate": 5556, + "▁dece": 5557, + "▁teacher": 5558, + "tending": 5559, + "▁max": 5560, + "▁request": 5561, + "▁unexpected": 5562, + "▁joseph": 5563, + "col": 5564, + "▁leap": 5565, + "▁victim": 5566, + "▁sighed": 5567, + "▁forces": 5568, + "chie": 5569, + "▁feed": 5570, + "▁sport": 5571, + "▁drift": 5572, + "▁wedding": 5573, + "▁british": 5574, + "sec": 5575, + "▁attitude": 5576, + "▁vision": 5577, + "▁pipe": 5578, + "▁tow": 5579, + "▁halt": 5580, + "▁manners": 5581, + "▁tend": 5582, + "▁flood": 5583, + "▁commission": 5584, + "▁guide": 5585, + "▁observe": 5586, + "▁concern": 5587, + "▁rush": 5588, + "▁affected": 5589, + "fall": 5590, + "▁stret": 5591, + "▁coach": 5592, + "▁poison": 5593, + "▁directed": 5594, + "▁medic": 5595, + "▁gest": 5596, + "▁echo": 5597, + "▁younger": 5598, + "▁confusion": 5599, + "▁continue": 5600, + "▁parli": 5601, + "▁absor": 5602, + "▁centre": 5603, + "conom": 5604, + "▁horrible": 5605, + "rison": 5606, + "▁bol": 5607, + "▁bath": 5608, + "▁gown": 5609, + "▁bye": 5610, + "▁aloud": 5611, + "▁suppl": 5612, + "▁profound": 5613, + "▁err": 5614, + "▁cheerful": 5615, + "worth": 5616, + "▁sentence": 5617, + "▁mistaken": 5618, + "▁torn": 5619, + "▁figures": 5620, + "▁accompanied": 5621, + "▁catherine": 5622, + "▁econom": 5623, + "▁atm": 5624, + "▁shaking": 5625, + "umber": 5626, + "▁council": 5627, + "lot": 5628, + "▁asce": 5629, + "ilities": 5630, + "▁spar": 5631, + "▁ends": 5632, + "▁straw": 5633, + "▁knights": 5634, + "▁atmosp": 5635, + "▁shade": 5636, + "▁brow": 5637, + "▁spark": 5638, + "▁rested": 5639, + "▁sentiment": 5640, + "▁recovered": 5641, + "▁subjects": 5642, + "▁duties": 5643, + "▁composed": 5644, + "▁swept": 5645, + "▁reality": 5646, + "▁singular": 5647, + "▁transp": 5648, + "▁locked": 5649, + "▁louis": 5650, + "▁assistance": 5651, + "▁wake": 5652, + "rem": 5653, + "▁sovere": 5654, + "▁unp": 5655, + "▁loves": 5656, + "▁absurd": 5657, + "▁souls": 5658, + "▁immediate": 5659, + "▁riding": 5660, + "▁connection": 5661, + "▁cheek": 5662, + "▁magnificent": 5663, + "▁ere": 5664, + "▁sugar": 5665, + "▁plans": 5666, + "▁prud": 5667, + "▁dise": 5668, + "▁adj": 5669, + "▁leaning": 5670, + "▁surrounded": 5671, + "▁we've": 5672, + "▁orn": 5673, + "▁roll": 5674, + "▁proble": 5675, + "▁strict": 5676, + "▁awake": 5677, + "▁praise": 5678, + "▁convinced": 5679, + "▁rele": 5680, + "▁frame": 5681, + "▁breaking": 5682, + "▁curtain": 5683, + "▁stayed": 5684, + "▁divided": 5685, + "▁craw": 5686, + "▁inclined": 5687, + "▁previous": 5688, + "ault": 5689, + "omen": 5690, + "▁stair": 5691, + "▁sees": 5692, + "▁pron": 5693, + "board": 5694, + "▁complex": 5695, + "▁prayer": 5696, + "▁pierre": 5697, + "▁unfortunate": 5698, + "gs": 5699, + "▁genius": 5700, + "▁increase": 5701, + "▁sufficiently": 5702, + "▁banks": 5703, + "▁revolution": 5704, + "▁southern": 5705, + "ki": 5706, + "oke": 5707, + "▁aust": 5708, + "edy": 5709, + "▁ling": 5710, + "▁countess": 5711, + "▁sleeping": 5712, + "▁devoted": 5713, + "▁utmost": 5714, + "▁market": 5715, + "▁bosom": 5716, + "▁bark": 5717, + "▁cath": 5718, + "alt": 5719, + "char": 5720, + "▁clock": 5721, + "▁handker": 5722, + "▁admin": 5723, + "▁senses": 5724, + "▁ident": 5725, + "▁midnight": 5726, + "▁connected": 5727, + "▁permitted": 5728, + "▁hid": 5729, + "▁fil": 5730, + "▁faced": 5731, + "▁gift": 5732, + "▁chat": 5733, + "▁brid": 5734, + "▁norther": 5735, + "▁horiz": 5736, + "▁college": 5737, + "▁handkerchief": 5738, + "isions": 5739, + "▁rebe": 5740, + "▁polic": 5741, + "▁announced": 5742, + "ounce": 5743, + "▁nons": 5744, + "▁nurs": 5745, + "ales": 5746, + "▁fleet": 5747, + "▁ragged": 5748, + "▁coffe": 5749, + "▁parties": 5750, + "▁delay": 5751, + "▁sounded": 5752, + "▁cities": 5753, + "▁wash": 5754, + "▁appointed": 5755, + "▁nights": 5756, + "▁instit": 5757, + "▁god's": 5758, + "▁striking": 5759, + "▁guns": 5760, + "▁astonishment": 5761, + "▁merchant": 5762, + "▁parliament": 5763, + "nal": 5764, + "▁ax": 5765, + "atched": 5766, + "▁pil": 5767, + "▁page": 5768, + "iform": 5769, + "▁plate": 5770, + "▁thirst": 5771, + "▁negro": 5772, + "▁ruin": 5773, + "▁inhabitants": 5774, + "win": 5775, + "arf": 5776, + "▁rib": 5777, + "▁addition": 5778, + "▁argument": 5779, + "bour": 5780, + "▁tad": 5781, + "▁scen": 5782, + "▁guests": 5783, + "▁wondering": 5784, + "▁acquainted": 5785, + "▁intent": 5786, + "pless": 5787, + "▁destroyed": 5788, + "▁coffee": 5789, + "inent": 5790, + "lebr": 5791, + "▁render": 5792, + "▁sob": 5793, + "▁demon": 5794, + "▁desir": 5795, + "uding": 5796, + "▁gets": 5797, + "▁assure": 5798, + "▁raise": 5799, + "▁sharply": 5800, + "▁privile": 5801, + "▁alarm": 5802, + "▁machine": 5803, + "fied": 5804, + "▁contract": 5805, + "▁deliber": 5806, + "▁drown": 5807, + "▁afterward": 5808, + "▁guest": 5809, + "▁conclusion": 5810, + "▁risk": 5811, + "▁ignorant": 5812, + "bury": 5813, + "kind": 5814, + "▁pian": 5815, + "an's": 5816, + "uries": 5817, + "▁soil": 5818, + "▁refer": 5819, + "▁commanded": 5820, + "▁practical": 5821, + "▁toss": 5822, + "▁offe": 5823, + "▁beheld": 5824, + "▁arist": 5825, + "▁quarters": 5826, + "▁degrees": 5827, + "▁fisher": 5828, + "▁nonsense": 5829, + "▁mc": 5830, + "isp": 5831, + "▁mechan": 5832, + "keep": 5833, + "▁doubtless": 5834, + "▁violence": 5835, + "▁neglect": 5836, + "▁folk": 5837, + "liness": 5838, + "▁bul": 5839, + "▁easter": 5840, + "▁loft": 5841, + "▁contained": 5842, + "▁reflection": 5843, + "▁celebr": 5844, + "▁leaf": 5845, + "▁concluded": 5846, + "▁district": 5847, + "iation": 5848, + "rs": 5849, + "▁scient": 5850, + "▁he'd": 5851, + "▁scorn": 5852, + "▁crack": 5853, + "▁steep": 5854, + "▁muttered": 5855, + "▁establish": 5856, + "▁darling": 5857, + "▁andrew": 5858, + "▁chim": 5859, + "quis": 5860, + "▁quality": 5861, + "▁polly": 5862, + "▁check": 5863, + "▁craft": 5864, + "▁travell": 5865, + "▁universal": 5866, + "inate": 5867, + "▁cig": 5868, + "atives": 5869, + "omp": 5870, + "uten": 5871, + "▁jac": 5872, + "▁job": 5873, + "▁subm": 5874, + "▁reader": 5875, + "▁leis": 5876, + "▁emph": 5877, + "▁surround": 5878, + "ox": 5879, + "pent": 5880, + "itate": 5881, + "▁extended": 5882, + "▁lev": 5883, + "▁overt": 5884, + "▁retired": 5885, + "▁puzz": 5886, + "uable": 5887, + "▁libr": 5888, + "▁chin": 5889, + "▁spl": 5890, + "▁realized": 5891, + "▁causes": 5892, + "▁punishment": 5893, + "▁physic": 5894, + "▁leisure": 5895, + "can": 5896, + "▁wave": 5897, + "▁shake": 5898, + "▁charm": 5899, + "▁belonged": 5900, + "mber": 5901, + "▁bones": 5902, + "▁gas": 5903, + "▁range": 5904, + "▁prec": 5905, + "▁smell": 5906, + "▁maybe": 5907, + "▁invited": 5908, + "▁troubles": 5909, + "▁tables": 5910, + "anch": 5911, + "icip": 5912, + "▁june": 5913, + "▁abo": 5914, + "▁ages": 5915, + "▁anywhere": 5916, + "ffin": 5917, + "▁drunk": 5918, + "▁properly": 5919, + "▁local": 5920, + "▁improve": 5921, + "▁atmosphere": 5922, + "▁dir": 5923, + "▁he'll": 5924, + "▁reb": 5925, + "▁rang": 5926, + "▁compass": 5927, + "▁lieuten": 5928, + "▁leaned": 5929, + "▁firmly": 5930, + "▁nations": 5931, + "▁hay": 5932, + "▁wept": 5933, + "▁ral": 5934, + "▁conven": 5935, + "▁uniform": 5936, + "▁julia": 5937, + "eem": 5938, + "rass": 5939, + "▁track": 5940, + "▁commer": 5941, + "▁bushes": 5942, + "▁obsc": 5943, + "▁sorts": 5944, + "▁difficulties": 5945, + "▁intellectual": 5946, + "▁introduced": 5947, + "mith": 5948, + "▁tro": 5949, + "iday": 5950, + "▁rendered": 5951, + "▁rout": 5952, + "add": 5953, + "▁plun": 5954, + "▁throwing": 5955, + "▁humble": 5956, + "▁polite": 5957, + "▁numerous": 5958, + "▁movements": 5959, + "▁successful": 5960, + "▁candle": 5961, + "▁separate": 5962, + "▁protection": 5963, + "▁thomas": 5964, + "▁enormous": 5965, + "▁unb": 5966, + "▁repub": 5967, + "▁sunsh": 5968, + "▁descended": 5969, + "▁unusual": 5970, + "ived": 5971, + "▁blaz": 5972, + "▁shows": 5973, + "▁simpl": 5974, + "▁cattle": 5975, + "▁crept": 5976, + "▁astonished": 5977, + "▁deserted": 5978, + "▁lap": 5979, + "arse": 5980, + "▁nearest": 5981, + "udes": 5982, + "▁entering": 5983, + "▁ideal": 5984, + "standing": 5985, + "nders": 5986, + "▁sore": 5987, + "aine": 5988, + "▁clos": 5989, + "▁ours": 5990, + "▁wherever": 5991, + "▁term": 5992, + "▁visited": 5993, + "▁calcul": 5994, + "ds": 5995, + "▁base": 5996, + "▁gates": 5997, + "▁stamp": 5998, + "▁liber": 5999, + "▁official": 6000, + "▁erect": 6001, + "▁alt": 6002, + "elia": 6003, + "▁harmon": 6004, + "▁painful": 6005, + "▁burned": 6006, + "▁republic": 6007, + "uer": 6008, + "▁lately": 6009, + "▁ital": 6010, + "amm": 6011, + "▁tear": 6012, + "▁actions": 6013, + "▁final": 6014, + "▁startled": 6015, + "▁sensation": 6016, + "▁fatal": 6017, + "olic": 6018, + "▁flash": 6019, + "▁appet": 6020, + "▁stronger": 6021, + "▁numbers": 6022, + "▁gratitude": 6023, + "▁female": 6024, + "▁western": 6025, + "lest": 6026 + }, + "merges": [ + "▁ t", + "h e", + "▁ a", + "▁t he", + "i n", + "▁ s", + "▁ w", + "▁ o", + "r e", + "n d", + "▁ b", + "▁ h", + "e r", + "▁ m", + "▁ i", + "o u", + "▁ c", + "▁ f", + "a t", + "e d", + "▁a nd", + "e n", + "▁t o", + "▁o f", + "o n", + "i s", + "▁ d", + "in g", + "▁t h", + "▁ p", + "▁ he", + "o r", + "▁ l", + "e s", + "▁ in", + "l l", + "i t", + "a r", + "a s", + "a n", + "▁ n", + "▁ g", + "o m", + "▁b e", + "▁h a", + "▁ e", + "l e", + "o t", + "▁ y", + "u t", + "o w", + "i c", + "▁w h", + "▁i t", + "l d", + "v e", + "▁th at", + "l y", + "▁w as", + "i d", + "s e", + "s t", + "▁o n", + "g h", + "en t", + "▁ re", + "▁y ou", + "i m", + "c e", + "▁ u", + "v er", + "i on", + "▁a s", + "e t", + "▁f or", + "a y", + "▁h is", + "▁w e", + "it h", + "a l", + "i r", + "▁ r", + "▁w ith", + "▁s t", + "a d", + "u r", + "gh t", + "▁a n", + "▁he r", + "▁n ot", + "▁i s", + "▁ha d", + "t er", + "he r", + "a c", + "a m", + "▁a t", + "o o", + "▁b ut", + "ou ld", + "▁s he", + "▁ k", + "▁s e", + "▁s a", + "▁s h", + "▁f r", + "▁h im", + "▁s o", + "▁m e", + "i ll", + "a in", + "▁s u", + "i ght", + "c h", + "re d", + "c t", + "a ll", + "r o", + "k e", + "es s", + "i l", + "' s", + "o re", + "▁d e", + "▁m y", + "▁the y", + "▁w he", + "▁a ll", + "ic h", + "▁n e", + "r i", + "▁b y", + "▁ha ve", + "om e", + "p p", + "▁th is", + "▁l i", + "▁d o", + "▁c on", + "u s", + "▁wh ich", + "▁c h", + "u l", + "q u", + "▁ j", + "▁u p", + "▁sa id", + "▁fr om", + "ar d", + "g e", + "▁o r", + "▁ v", + "▁on e", + "▁n o", + "t h", + "▁e x", + "▁we re", + "▁the re", + "p e", + "a nd", + "es t", + "▁m an", + "▁wh o", + "b le", + "i e", + "▁a l", + "an t", + "re s", + "ou s", + "u st", + "ver y", + "at ion", + "▁f e", + "▁the m", + "l f", + "▁whe n", + "n t", + "am e", + "in d", + "r a", + "▁g o", + "er s", + "as t", + "f e", + "oo d", + "▁k n", + "▁in t", + "is t", + "▁a re", + "ar t", + "ou t", + "▁w ould", + "▁l e", + "▁wh at", + "o s", + "▁the ir", + "on g", + "ou r", + "▁i f", + "▁c om", + "ou nd", + "▁a b", + "▁o ut", + "▁w or", + "e m", + "▁w ill", + "a k", + "▁m is", + "at e", + "o l", + "u m", + "u n", + "it t", + "ou gh", + "k ed", + "i g", + "a p", + "on e", + "▁be en", + "ow n", + "i ve", + "▁the n", + "▁b r", + "v en", + "i f", + "▁a r", + "' t", + "se lf", + "▁t r", + "▁p l", + "▁r o", + "▁p r", + "t her", + "re at", + "▁u n", + "▁a f", + "▁s p", + "▁ qu", + "▁p ro", + "it y", + "he d", + "▁t w", + "▁a g", + "▁c ould", + "o st", + "a ce", + "or t", + "u re", + "a ke", + "▁a m", + "ac k", + "▁an y", + "▁s ome", + "▁you r", + "▁m ore", + "▁c an", + "a u", + "▁t im", + "e p", + "a g", + "▁ en", + "c k", + "▁int o", + "▁c l", + "r y", + "▁n ow", + "h ing", + "nd er", + "a re", + "▁ very", + "▁g r", + "e l", + "o se", + "▁l oo", + "▁b o", + "v ed", + "o p", + "▁o ther", + "▁d id", + "an ce", + "▁th an", + "itt le", + "▁l ittle", + "in e", + "i es", + "w ay", + "it e", + "▁li ke", + "id e", + "▁l o", + "as s", + "▁b l", + "a ble", + "ur n", + "ou ght", + "▁kn ow", + "ot her", + "▁tim e", + "▁i m", + "▁d is", + "▁u s", + "▁c o", + "f ore", + "▁h ow", + "▁t e", + "en ce", + "▁d ay", + "▁a d", + "ad e", + "ic e", + "▁ab out", + "▁se e", + "▁o ver", + "p t", + "c c", + "▁to o", + "in k", + "▁f l", + "w n", + "▁g reat", + "▁af ter", + "p l", + "d e", + "▁p er", + "m ent", + "▁ag ain", + "▁up on", + "▁ha nd", + "a b", + "▁h as", + "re e", + "is h", + "c i", + "▁on ly", + "all y", + "▁we ll", + "▁sh ould", + "▁p o", + "▁m ar", + "res s", + "▁s ay", + "▁g ood", + "at her", + "▁tw o", + "ing s", + "▁p e", + "ou nt", + "▁o ur", + "i re", + "v ing", + "▁d own", + "ar s", + "er t", + "w e", + "▁be fore", + "i le", + "v es", + "▁a pp", + "▁e very", + "▁it s", + "▁o ld", + "▁th r", + "▁m u", + "▁m ade", + "i ed", + "ic k", + "▁l ong", + "a ge", + "t e", + "f t", + "▁whe re", + "an g", + "▁ne ver", + "▁m ust", + "▁p re", + "▁s m", + "f ul", + "▁su ch", + "u ll", + "▁st r", + "ion s", + "▁of f", + "▁s c", + "▁c ame", + "i ous", + "u e", + "▁mis s", + "w ard", + "i ld", + "▁f ir", + "▁e ven", + "▁u nder", + "ac t", + "▁the se", + "▁c ome", + "▁p art", + "▁f o", + "at ed", + "n ess", + "▁re m", + "or d", + "▁be c", + "t y", + "▁m ay", + "▁mu ch", + "▁th ink", + "p er", + "▁w ay", + "▁mis ter", + "l ed", + "▁l et", + "or n", + "▁e y", + "▁g l", + "▁con t", + "▁th ought", + "▁loo k", + "e ct", + "▁s pe", + "is e", + "▁b ack", + "▁be t", + "ad y", + "▁y e", + "an s", + "ac h", + "▁he re", + "▁j ust", + "re n", + "▁fir st", + "▁h o", + "▁o wn", + "▁d es", + "▁o b", + "ri ed", + "u d", + "ar y", + "▁w ent", + "▁m o", + "▁him self", + "▁m en", + "a ir", + "c l", + "a ve", + "at h", + "f f", + "▁s l", + "c o", + "on 't", + "ll ow", + "▁c r", + "▁re s", + "▁i '", + "▁m ight", + "i ly", + "▁se em", + "in t", + "i p", + "▁be g", + "ou se", + "an c", + "n 't", + "▁w at", + "▁thr ough", + "▁com p", + "b er", + "▁a way", + "▁c ar", + "▁e m", + "▁g et", + "▁im p", + "▁he ad", + "os s", + "▁li fe", + "▁be l", + "▁with out", + "▁m ost", + "▁p ass", + "▁m ake", + "▁con s", + "en ed", + "▁s om", + "▁t urn", + "a v", + "n g", + "▁sh all", + "▁a cc", + "▁th ose", + "▁p res", + "▁ey es", + "▁h ouse", + "i z", + "▁som et", + "▁j o", + "▁st ill", + "▁c all", + "▁n ight", + "he s", + "▁o p", + "au se", + "▁w om", + "▁l ast", + "k s", + "l ess", + "a red", + "▁com m", + "▁d on't", + "▁te ll", + "▁ ent", + "▁not hing", + "▁ne w", + "ig n", + "▁t ake", + "▁be ing", + "▁man y", + "▁wor d", + "on s", + "▁f ound", + "▁re t", + "as e", + "▁e ar", + "▁wh ile", + "▁at t", + "or y", + "i x", + "▁s er", + "▁sa w", + "▁p ut", + "n e", + "ot h", + "ie nd", + "▁pe op", + "▁w r", + "▁you ng", + "ar k", + "d y", + "ak ing", + "l es", + "▁c ount", + "▁on ce", + "▁fr iend", + "▁l a", + "en s", + "▁peop le", + "pe ct", + "or s", + "fe ct", + "▁m at", + "in ce", + "i ble", + "e red", + "▁ro om", + "▁th ree", + "▁y et", + "a il", + "▁s ame", + "▁f ather", + "▁r ight", + "▁ch ild", + "▁c our", + "i gh", + "▁pl ace", + "▁an other", + "ul t", + "i v", + "it ion", + "▁in d", + "▁w ant", + "▁th ough", + "▁n or", + "▁f ar", + "▁k ing", + "▁ha pp", + "▁he art", + "▁f ace", + "▁e nd", + "▁e ver", + "▁n at", + "th ing", + "▁lo ve", + "g et", + "▁too k", + "▁d ist", + "e ver", + "i an", + "▁h u", + "e w", + "▁ar m", + "▁in st", + "m an", + "▁wor k", + "▁l ight", + "▁ch ar", + "▁p le", + "ic t", + "▁s et", + "▁a c", + "▁loo ked", + "▁miss us", + "▁as ked", + "▁m ind", + "▁y es", + "▁su pp", + "▁int e", + "▁re p", + "c ess", + "ent ly", + "▁le ft", + "g g", + "ert ain", + "▁k e", + "is hed", + "u b", + "▁p ers", + "way s", + "▁th ings", + "al k", + "ir l", + "▁m om", + "▁s ir", + "▁w a", + "▁mom ent", + "ation s", + "▁s at", + "se l", + "▁f ind", + "ow er", + "i a", + "v ent", + "re w", + "▁wor ld", + "j ect", + "▁g ive", + "▁c ap", + "▁wh y", + "s o", + "▁g u", + "▁m other", + "▁g en", + "▁s w", + "▁al ways", + "d er", + "l t", + "l ing", + "▁an s", + "pp ed", + "▁so on", + "▁a ct", + "▁for m", + "▁e l", + "d d", + "▁he ard", + "re t", + "▁th ing", + "▁somet hing", + "▁seem ed", + "▁su b", + "▁do or", + "an ge", + "▁g irl", + "c ed", + "▁app e", + "it her", + "▁w ind", + "▁bec ause", + "▁d if", + "▁m on", + "s s", + "▁go ing", + "▁to ld", + "or m", + "▁h ome", + "ain ed", + "▁g ot", + "▁w ar", + "▁go d", + "au ght", + "▁g i", + "▁en g", + "▁s ur", + "n ing", + "▁hand s", + "▁wom an", + "▁fo llow", + "l and", + "a ut", + "▁v o", + "▁fe el", + "▁re l", + "▁p oss", + "c hed", + "ic al", + "p le", + "p h", + "▁bo y", + "▁ret urn", + "▁re g", + "▁re st", + "oo k", + "▁kn ew", + "n er", + "▁e ach", + "▁o h", + "▁s il", + "▁k ind", + "▁ex p", + "▁m a", + "▁c le", + "▁he l", + "i ver", + "t ing", + "▁de l", + "u al", + "▁in f", + "▁as s", + "▁wat er", + "▁con f", + "▁b re", + "▁w o", + "ce pt", + "▁bel ie", + "▁c ertain", + "▁again st", + "▁h ard", + "▁p h", + "r ow", + "▁u nt", + "▁ye ars", + "▁qu ite", + "▁s ide", + "in ess", + "in ed", + "▁ne ar", + "▁h or", + "ter s", + "i red", + "oo l", + "▁f our", + "▁fe w", + "▁d one", + "i er", + "▁c he", + "re st", + "it ed", + "m ost", + "▁bet ter", + "▁ha lf", + "▁m in", + "▁t re", + "p s", + "▁al so", + "▁c are", + "o ck", + "u ck", + "ou b", + "▁beg an", + "ull y", + "▁en ough", + "is ed", + "r u", + "▁ha ving", + "▁se en", + "▁gen er", + "▁l ady", + "▁d ra", + "▁h um", + "ap s", + "ot t", + "▁p ur", + "ak en", + "ro ss", + "y ing", + "▁t er", + "▁h our", + "▁in de", + "an k", + "▁call ed", + "i al", + "as on", + "▁be h", + "▁do es", + "▁who le", + "▁m orn", + "▁turn ed", + "▁ple as", + "▁st e", + "▁re f", + "▁g ave", + "en se", + "▁o cc", + "i b", + "▁cour se", + "▁in s", + "re am", + "get her", + "ut h", + "▁b oth", + "▁s ou", + "▁c ur", + "▁ad d", + "e en", + "▁c ol", + "▁re ad", + "we en", + "sel ves", + "▁am ong", + "▁bet ween", + "▁in c", + "▁ke ep", + "▁be aut", + "ul ar", + "▁po or", + "▁it 's", + "▁su re", + "▁morn ing", + "▁wh ite", + "g ed", + "▁n ame", + "▁de ar", + "▁to ward", + "ut e", + "▁sm all", + "▁wh om", + "▁re pl", + "▁s k", + "▁l ar", + "▁fe lt", + "b o", + "os ed", + "at ing", + "▁my self", + "▁op en", + "▁s ix", + "▁her self", + "▁how ever", + "▁b u", + "o nd", + "ain t", + "x t", + "▁f ore", + "▁in ter", + "▁e v", + "▁h igh", + "ct ion", + "▁hu nd", + "▁st ood", + "▁hund red", + "as ter", + "▁t ra", + "▁sh ow", + "▁s ent", + "i fe", + "▁r ound", + "▁s im", + "▁d r", + "▁g ra", + "▁word s", + "▁day s", + "▁al most", + "a le", + "ve l", + "▁po int", + "ent s", + "▁g re", + "▁e ight", + "c es", + "at es", + "dd en", + "▁f am", + "▁st and", + "▁b us", + "▁l and", + "▁ ed", + "▁me an", + "un g", + "h aps", + "▁su n", + "u res", + "▁s ince", + "i et", + "ir d", + "▁per haps", + "n ed", + "▁s le", + "is s", + "▁b est", + "▁su dden", + "▁d ark", + "▁repl ied", + "▁vo ice", + "▁m et", + "▁any thing", + "▁t ill", + "▁under st", + "▁b ar", + "it s", + "▁unt il", + "in s", + "ou d", + "▁bl ack", + "▁b ro", + "▁he ar", + "▁look ing", + "▁c ried", + "▁you '", + "▁f act", + "am p", + "▁pr in", + "▁l ess", + "▁l ay", + "▁ne xt", + "▁la w", + "u p", + "▁p ower", + "▁pro p", + "n ot", + "re nt", + "▁br ought", + "ate ly", + "ent y", + "▁count ry", + "▁hel p", + "al s", + "▁qu est", + "m ed", + "▁u se", + "▁v is", + "▁s n", + "▁i' m", + "f ully", + "▁sp o", + "▁to gether", + "▁ne ed", + "▁a ir", + "▁ad v", + "▁pers on", + "▁inde ed", + "▁cont in", + "▁un c", + "one y", + "▁g ent", + "▁pres ent", + "▁a w", + "▁p ar", + "ow s", + "u red", + "▁f ull", + "t ain", + "▁r un", + "▁r ather", + "▁i de", + "▁co nd", + "nd ed", + "▁l at", + "▁s y", + "b e", + "d u", + "▁h ar", + "▁fe et", + "▁f in", + "et er", + "▁f all", + "ce i", + "▁f ive", + "▁m il", + "▁b ed", + "o c", + "▁do ct", + "▁inte rest", + "ress ed", + "▁mat ter", + "▁l ord", + "▁g one", + "▁ es", + "f ort", + "▁de ath", + "▁w ife", + "▁ser v", + "▁p at", + "er ing", + "oub t", + "▁ad m", + "▁t alk", + "▁t aken", + "▁ar t", + "▁t ri", + "▁other s", + "▁ho pe", + "as h", + "a z", + "▁ex t", + "▁can not", + "ie f", + "▁spe ak", + "▁l au", + "▁them selves", + "▁al ong", + "▁d ire", + "o ve", + "m b", + "p r", + "▁b es", + "▁c ou", + "▁m or", + "t en", + "▁gent le", + "ur ing", + "▁f ire", + "▁lar ge", + "▁p ol", + "▁c at", + "▁s we", + "ent ion", + "ver s", + "▁th us", + "a pp", + "▁se c", + "▁pl ay", + "▁re al", + "▁pr om", + "ment s", + "we red", + "ie ld", + "ain s", + "is on", + "ac hed", + "▁th ou", + "▁re ason", + "▁th ous", + "it ing", + "▁br other", + "ak es", + "▁thous and", + "on t", + "▁m oney", + "▁rem em", + "▁de p", + "▁ans wered", + "▁tr ue", + "▁child ren", + "▁beh ind", + "o y", + "▁s ound", + "ant s", + "ab ly", + "▁w ood", + "us ed", + "▁de c", + "▁who se", + "o d", + "▁e le", + "▁tw enty", + "▁r a", + "it u", + "▁belie ve", + "▁wo nder", + "en e", + "▁in v", + "▁h on", + "ar ing", + "s h", + "u ed", + "▁su ff", + "▁o pp", + "▁d oubt", + "▁re c", + "t on", + "▁ho ld", + "▁dif fe", + "▁pass ed", + "▁c or", + "m e", + "id ed", + "it ies", + "▁m er", + "▁s ing", + "▁nat ure", + "▁al one", + "▁de ad", + "▁p ri", + "k en", + "l ic", + "▁re d", + "▁b ur", + "ac es", + "▁cl ose", + "▁go ld", + "▁st art", + "▁h ur", + "▁f ur", + "o g", + "anc es", + "▁as k", + "▁doct or", + "▁s on", + "▁gr ound", + "w er", + "et s", + "▁se a", + "▁str ong", + "▁le ave", + "▁comp an", + "▁i' ll", + "er y", + "c y", + "ill ed", + "ep t", + "id es", + "t le", + "▁c e", + "▁ob s", + "bo dy", + "▁fe ll", + "▁s ign", + "co nd", + "▁m ount", + "▁f air", + "▁gi ven", + "▁there fore", + "an e", + "▁i r", + "▁de ep", + "if ul", + "f ic", + "y s", + "▁of ten", + "▁bo dy", + "u nt", + "▁sh ort", + "▁t em", + "▁f a", + "▁m aster", + "▁ear th", + "▁p ap", + "ce ed", + "▁st re", + "▁se cond", + "▁for t", + "b ed", + "g th", + "ow ed", + "▁hor se", + "id d", + "▁m ad", + "u ally", + "▁p a", + "▁ch r", + "▁or der", + "▁t en", + "ve red", + "▁con st", + "▁w ish", + "▁f if", + "▁e as", + "▁c ir", + "▁d ro", + "a im", + "he n", + "▁c a", + "▁re ally", + "re ad", + "cei ved", + "▁i ll", + "▁fe ar", + "os ition", + "▁underst and", + "▁sp ir", + "▁l ist", + "▁ab s", + "▁sp r", + "ac ed", + "▁quest ion", + "ang er", + "▁every thing", + "aught er", + "▁af f", + "▁w all", + "▁com ing", + "ch ing", + "re ady", + "id er", + "▁ab ove", + "▁pr ince", + "▁al ready", + "▁le ast", + "▁re co", + "▁ex pl", + "▁st ep", + "▁us ed", + "▁r u", + "▁it self", + "is ter", + "▁ne cess", + "▁c ase", + "▁ar ound", + "h n", + "▁sou l", + "▁sudden ly", + "g er", + "▁l ad", + "▁even ing", + "▁m ag", + "▁gener al", + "▁n um", + "im es", + "▁kn own", + "▁w al", + "▁qu ick", + "iz ed", + "▁m us", + "▁s ch", + "▁cap tain", + "▁that 's", + "if ic", + "▁whe ther", + "▁le ar", + "g n", + "▁with in", + "m en", + "▁li ve", + "ver n", + "▁tim es", + "▁ex pect", + "▁st ate", + "▁friend s", + "▁br ing", + "▁s ort", + "▁wom en", + "▁t able", + "▁me et", + "▁jo hn", + "▁cir c", + "▁su m", + "▁return ed", + "il ed", + "▁d ri", + "▁he ld", + "▁ex c", + "▁b ig", + "▁say s", + "▁per fect", + "▁le a", + "▁obs er", + "▁el se", + "▁d uring", + "id ent", + "▁h us", + "t ed", + "▁beaut iful", + "▁cle ar", + "▁e ither", + "▁to wn", + "▁s ight", + "▁l ost", + "▁sle ep", + "▁me ans", + "▁fo ot", + "▁c ut", + "▁c al", + "▁k ept", + "▁r an", + "i ence", + "▁pro f", + "te red", + "he re", + "et y", + "▁fe llow", + "▁can 't", + "▁m ist", + "▁p ast", + "▁d ream", + "ag es", + "▁bec ame", + "▁pre t", + "▁dis c", + "▁b ad", + "▁m aking", + "ut ion", + "▁ob ject", + "▁toward s", + "▁l ow", + "u ght", + "▁de v", + "▁hum an", + "▁man ner", + "▁str ange", + "▁ye ar", + "o ld", + "i ent", + "in es", + "▁se ver", + "m on", + "▁an n", + "air s", + "c hes", + "▁c ity", + "▁somet imes", + "' d", + "▁ro se", + "▁ est", + "il ity", + "▁w alk", + "▁re ady", + "▁p al", + "▁le g", + "▁ro ad", + "i ans", + "ci ous", + "▁c orn", + "▁th y", + "▁co ld", + "ll y", + "ious ly", + "l ish", + "▁st ra", + "m er", + "▁b at", + "ow ing", + "ie w", + "▁chr ist", + "▁s qu", + "▁tr uth", + "c ri", + "ll ed", + "▁th ir", + "▁did n't", + "b ert", + "▁so ci", + "b r", + "▁b it", + "▁sub ject", + "▁sh ip", + "▁m ur", + "▁app ro", + "▁p ie", + "▁ans wer", + "▁f ree", + "▁bus iness", + "▁ ut", + "a pe", + "▁appe ar", + "▁r iver", + "▁st o", + "▁c ast", + "▁fam ily", + "▁j ud", + "▁ex cl", + "▁let ter", + "ing ly", + "ri e", + "▁ha ir", + "ot e", + "▁arm s", + "▁bec ome", + "er n", + "ou ble", + "▁diffe rent", + "▁v al", + "f fect", + "▁nat ur", + "▁poss ible", + "▁sever al", + "▁f ine", + "a h", + "▁le ad", + "▁for g", + "▁exp ress", + "l i", + "▁su s", + "▁gl ad", + "o on", + "▁ar ri", + "▁bl ood", + "itt ing", + "▁qu iet", + "ren ce", + "▁ide a", + "▁a ble", + "itt ed", + "st er", + "▁char ac", + "▁beg in", + "▁ch ur", + "▁t ou", + "▁st ory", + "▁ey e", + "b and", + "at ive", + "▁gr and", + "▁cons ider", + "▁ac ross", + "▁p en", + "▁ex cept", + "▁f re", + "▁w in", + "▁e qu", + "et h", + "▁c ent", + "is f", + "▁part ic", + "▁dif fic", + "▁wind ow", + "▁sur pr", + "ll ect", + "▁pro v", + "▁dire ct", + "▁con c", + "e y", + "a w", + "▁go vern", + "▁dis co", + "▁w ild", + "▁do g", + "▁fl o", + "▁so ft", + "te en", + "▁c ross", + "as ed", + "▁e ffect", + "▁s or", + "▁long er", + "▁he n", + "▁follow ed", + "▁so ld", + "▁the e", + "▁p ub", + "▁hus band", + "ard s", + "ant ly", + "b y", + "▁a p", + "▁supp ose", + "▁res pect", + "t s", + "▁h ast", + "▁s al", + "▁comp le", + "▁he av", + "▁happ y", + "▁r ich", + "▁c reat", + "un e", + "▁t aking", + "▁re qu", + "▁st ay", + "▁spo ke", + "▁d aughter", + "▁we e", + "▁ ve", + "▁d u", + "▁gre en", + "▁an im", + "▁d in", + "' ll", + "▁b ird", + "al th", + "▁me re", + "▁g ard", + "n y", + "le y", + "▁poss ess", + "em pt", + "▁re ached", + "▁appe ared", + "o v", + "▁ex ist", + "in ation", + "▁pret ty", + "▁remem ber", + "▁he a", + "▁op ened", + "▁to m", + "ang ed", + "▁sl ow", + "▁im ag", + "▁i' ve", + "r act", + "▁say ing", + "k ing", + "ut es", + "▁comm on", + "▁occ as", + "▁b ook", + "▁r us", + "am es", + "ic es", + "▁br ight", + "m s", + "▁sat isf", + "▁s ense", + "▁f av", + "▁su cc", + "um p", + "is ing", + "▁l u", + "▁acc ord", + "ter n", + "▁bre ak", + "▁ex per", + "▁mon th", + "u se", + "▁de m", + "▁sc ar", + "▁contin ued", + "▁sec ret", + "▁chur ch", + "▁t ree", + "▁st ri", + "▁car ried", + "▁c ry", + "nd ing", + "▁spir it", + "▁want ed", + "er ic", + "▁certain ly", + "▁comm and", + "▁d est", + "▁mo ve", + "ou n", + "▁swe et", + "▁stre et", + "▁o ught", + "▁acc ount", + "▁de f", + "h am", + "▁pre p", + "▁s ens", + "▁es c", + "▁ro ck", + "ot s", + "▁de cl", + "▁pur p", + "ri age", + "ou th", + "ow ers", + "▁dra w", + "▁e at", + "▁b reat", + "▁charac ter", + "im e", + "c ul", + "med i", + "▁st ud", + "▁sch ool", + "itu de", + "▁hea ven", + "▁feel ing", + "▁s ad", + "▁reg ard", + "em ent", + "▁p ain", + "▁wor th", + "▁b ra", + "ne y", + "▁d ut", + "▁sm o", + "aim ed", + "▁tr ans", + "▁del ight", + "▁qu ar", + "▁h ung", + "▁m ot", + "▁bl ue", + "▁h ot", + "▁h ill", + "▁d iv", + "um b", + "▁dis app", + "▁mar g", + "▁lau gh", + "id ence", + "▁pro du", + "▁succ ess", + "ur y", + "s on", + "▁f ast", + "▁eng lish", + "▁d ress", + "▁h at", + "▁ter ri", + "▁p ort", + "▁ne ither", + "▁cour t", + "▁se ven", + "▁f ight", + "▁prin cess", + "▁li ved", + "▁v iew", + "▁im medi", + "▁se lf", + "▁v ar", + "▁hour s", + "▁m ill", + "▁so l", + "▁ex am", + "▁t ried", + "▁w on't", + "▁ent ered", + "▁dis p", + "t o", + "r ic", + "▁car ry", + "▁imp ort", + "▁an g", + "z e", + "on y", + "▁d anger", + "led ge", + "▁off ic", + "▁c ause", + "▁n one", + "▁for ward", + "▁unc le", + "▁to r", + "▁d et", + "as k", + "▁l en", + "▁fur ther", + "▁p ay", + "▁add ed", + "▁fr ont", + "r or", + "▁g e", + "▁partic ular", + "▁de al", + "▁pr ot", + "▁l ed", + "▁ac qu", + "▁pr ay", + "▁e ff", + "▁happ ened", + "▁ch ief", + "le ct", + "▁wal ked", + "▁lat er", + "▁jo y", + "i ar", + "d ay", + "▁or d", + "▁al th", + "▁com fort", + "▁pro b", + "▁ma j", + "▁af fect", + "▁pub lic", + "▁b ene", + "en ing", + "▁alth ough", + "g r", + "▁sh o", + "▁f ig", + "res h", + "▁f ail", + "u ct", + "u g", + "al ity", + "▁me m", + "▁seem s", + "▁your self", + "sh ip", + "e ad", + "i am", + "▁num ber", + "s ide", + "▁a h", + "▁do ing", + "▁li ving", + "are nt", + "▁des p", + "iz e", + "oo f", + "▁f ield", + "▁re ceived", + "▁sh ad", + "▁be y", + "▁bey ond", + "▁ph il", + "▁l ine", + "▁vis it", + "in ct", + "ri g", + "▁part y", + "▁gard en", + "▁j e", + "▁m outh", + "▁ha ll", + "▁qu een", + "▁bo at", + "▁be ar", + "▁am eric", + "is m", + "▁gentle man", + "▁v i", + "ir t", + "u ff", + "▁la id", + "ra id", + "▁occas ion", + "▁ent ire", + "▁a ge", + "▁s ister", + "▁cl ot", + "▁re pe", + "ous ly", + "▁pr ison", + "▁acc om", + "▁wh is", + "▁near ly", + "▁tre es", + "il ing", + "if f", + "▁eight een", + "b it", + "ward s", + "▁ear ly", + "▁t al", + "▁l ab", + "▁for th", + "m ing", + "on es", + "▁m ed", + "▁tr y", + "▁d a", + "il t", + "anc ed", + "▁prin ci", + "▁en em", + "▁think ing", + "▁ch ance", + "w here", + "▁c re", + "▁min utes", + "▁an x", + "▁mar y", + "▁p ict", + "▁wa it", + "▁v ill", + "▁st ren", + "▁af raid", + "▁cr ow", + "▁sm ile", + "▁l ate", + "▁eng land", + "▁pleas ure", + "▁a unt", + "▁new s", + "▁w is", + "▁f le", + "▁see ing", + "▁su per", + "▁fa ith", + "▁ro b", + "im ent", + "o int", + "▁b ill", + "ll ing", + "▁ne igh", + "▁tr ouble", + "▁sil ence", + "▁pl ain", + "▁there 's", + "are t", + "pe nd", + "▁excl aimed", + "ren ch", + "g y", + "▁mil es", + "p ly", + "▁gl ass", + "▁d rew", + "▁neigh b", + "el s", + "▁m ine", + "▁pr act", + "▁heav y", + "▁stand ing", + "▁se vent", + "▁sh ar", + "▁ch ange", + "▁necess ary", + "▁ch ap", + "▁purp ose", + "▁in qu", + "▁natur al", + "▁d eter", + "ic ked", + "▁b ott", + "▁hard ly", + "▁be ll", + "▁to p", + "▁c aught", + "fe red", + "w h", + "i ves", + "ound ed", + "▁a uth", + "▁circ um", + "▁f ing", + "▁sto pped", + "u c", + "▁w it", + "am ent", + "▁op in", + "▁a v", + "▁pri v", + "ain ing", + "▁inst ead", + "ru pt", + "▁g rew", + "▁lo ved", + "▁is land", + "▁kn ight", + "▁ag o", + "▁len gth", + "▁in n", + "▁pe ace", + "l s", + "in ary", + "i or", + "u es", + "▁th ird", + "us h", + "▁beaut y", + "▁h ig", + "▁he 's", + "t he", + "f orm", + "he ad", + "ic ally", + "as p", + "anc y", + "▁deter m", + "▁stra ight", + "▁c ra", + "in ing", + "pp er", + "l er", + "▁inf l", + "▁th or", + "▁con vers", + "▁bes ides", + "▁p osition", + "▁thir ty", + "▁d en", + "ra ge", + "▁att ention", + "m a", + "▁con v", + "ag er", + "▁his t", + "o red", + "▁com es", + "ag ed", + "▁for ce", + "▁s itting", + "▁ple ase", + "te nd", + "it er", + "▁what ever", + "▁inf orm", + "▁h op", + "▁ch air", + "▁bu ild", + "▁b ab", + "ust om", + "▁girl s", + "▁r om", + "▁f rench", + "▁str uck", + "▁p ull", + "▁a st", + "▁li e", + "▁wr ong", + "▁know ledge", + "▁gra ce", + "▁scar ce", + "g hed", + "▁res ol", + "▁wat ch", + "▁thought s", + "▁r id", + "▁att empt", + "▁fif ty", + "▁r ap", + "▁bo x", + "h ood", + "▁get ting", + "▁ ver", + "▁f at", + "▁compan y", + "▁ar r", + "▁crow d", + "▁b urn", + "▁sl ight", + "▁cl ass", + "▁sou th", + "▁d ie", + "▁ex act", + "▁dr ink", + "▁en j", + "▁th ick", + "▁din ner", + "▁sa ve", + "▁ma id", + "▁pl an", + "▁sa int", + "▁immedi ately", + "i ers", + "▁b orn", + "i us", + "▁re v", + "▁te ars", + "ist s", + "▁t reat", + "us ion", + "▁me ant", + "▁boy s", + "pp ing", + "▁slow ly", + "▁in cl", + "▁l im", + "▁d ied", + "ic ed", + "▁com pl", + "▁f ool", + "▁fore st", + "▁su gg", + "▁p ost", + "▁ac cept", + "▁res ult", + "▁auth or", + "nd on", + "ce ive", + "▁sugg est", + "ci ent", + "▁st one", + "▁fr ight", + "▁pap er", + "▁con se", + "▁j our", + "▁t y", + "▁en c", + "▁quick ly", + "▁cont r", + "▁you th", + "▁se nd", + "▁v ict", + "if ied", + "▁bel ong", + "▁war m", + "▁f ix", + "▁imp oss", + "▁bes ide", + "▁ er", + "▁to ne", + "▁c amp", + "▁des ire", + "▁b ound", + "▁m akes", + "▁marg aret", + "▁nor th", + "▁br own", + "▁mo on", + "▁li ps", + "▁pl aced", + "v al", + "▁circum st", + "▁f ood", + "▁f illed", + "ic s", + "if t", + "an n", + "▁lo ndon", + "▁dist ance", + "g ing", + "▁stren gth", + "▁i d", + "▁flo or", + "▁for get", + "▁ob l", + "▁m id", + "ri es", + "it ions", + "b s", + "▁spr ing", + "▁you' re", + "▁vi ol", + "▁j ack", + "▁po ck", + "oo ks", + "▁follow ing", + "▁s ac", + "▁rem ained", + "ar ch", + "▁gr ow", + "▁sn ow", + "▁govern ment", + "▁b all", + "▁h ors", + "▁n ar", + "ad ed", + "▁bro ken", + "▁lau ghed", + "▁des cri", + "▁sa fe", + "itt en", + "ive ly", + "▁prof ess", + "▁o '", + "am ed", + "▁dep art", + "▁eas y", + "ou red", + "▁u nd", + "▁cou n", + "▁than k", + "▁know s", + "▁wa iting", + "d om", + "at s", + "▁g er", + "▁v an", + "▁an ne", + "▁hors es", + "u gg", + "▁d read", + "▁un e", + "g es", + "ac y", + "▁pro ceed", + "▁g az", + "▁sh out", + "▁start ed", + "ent ed", + "▁comple te", + "o pe", + "▁g all", + "de red", + "▁w ide", + "i res", + "▁ne ck", + "as ure", + "ist ed", + "▁serv ice", + "▁pie ce", + "ci ally", + "en ces", + "▁sa il", + "▁pal ace", + "er v", + "▁gu ard", + "▁do ll", + "▁talk ing", + "▁man 's", + "▁li ft", + "▁gra ve", + "▁wee k", + "le t", + "▁imposs ible", + "▁eff ort", + "▁im m", + "▁arm y", + "we ll", + "▁diffic ult", + "u nd", + "▁f resh", + "▁f un", + "re me", + "▁st op", + "▁m ess", + "▁g ar", + "▁de g", + "▁inc re", + "▁corn er", + "▁soci ety", + "▁we ak", + "▁sh ut", + "▁h y", + "▁pro per", + "ac hing", + "▁cl oud", + "idd le", + "iv id", + "▁dem and", + "▁n ine", + "▁s it", + "▁reco gn", + "▁be at", + "us s", + "▁turn ing", + "▁sk y", + "▁opin ion", + "▁sing le", + "p ic", + "▁f ly", + "▁l ang", + "▁m ass", + "ce ll", + "▁out side", + "▁k iss", + "▁tr ust", + "▁occ up", + "▁ev il", + "▁bel ow", + "▁appear ance", + "u it", + "▁after n", + "▁gl o", + "▁g un", + "▁w est", + "en cy", + "p ar", + "▁show ed", + "▁convers ation", + "is es", + "▁con n", + "▁could n't", + "▁run ning", + "▁m ention", + "▁great er", + "▁mus ic", + "▁breat h", + "as es", + "▁n in", + "▁an t", + "are r", + "▁mor row", + "▁b ank", + "▁es pe", + "▁p eter", + "or k", + "ci al", + "▁pres ence", + "▁bat tle", + "▁win ter", + "he red", + "▁prob ably", + "▁clot hes", + "▁f ash", + "▁mar k", + "▁w ished", + "ve re", + "▁co ll", + "▁em b", + "▁kn e", + "▁mar ried", + "▁arri ved", + "▁p un", + "▁e vent", + "us hed", + "▁suff ic", + "▁e ager", + "▁form er", + "▁gi ving", + "▁p op", + "▁sa nd", + "▁ne g", + "▁us ual", + "▁rel ig", + "▁sim ple", + "▁sy m", + "it ation", + "▁g ro", + "or ies", + "▁mo ved", + "▁month s", + "▁spe aking", + "▁p et", + "▁sil ent", + "▁c ab", + "▁mount ain", + "▁express ion", + "g ar", + "▁co vered", + "▁hu nt", + "▁aftern oon", + "ap ed", + "▁occ ur", + "rie f", + "▁st ates", + "▁ z", + "st r", + "▁lo c", + "l ight", + "▁sh ore", + "c he", + "▁eas ily", + "▁p ale", + "un ity", + "▁rem ark", + "▁ph ys", + "▁begin ning", + "▁dut y", + "▁chap ter", + "▁infl u", + "ch o", + "▁con cl", + "am b", + "▁inst ant", + "▁pol it", + "z z", + "▁enj oy", + "▁s ick", + "▁rem ain", + "u el", + "▁st ream", + "▁fig ure", + "a ld", + "▁t ur", + "▁p ath", + "▁v ol", + "▁min ute", + "▁pleas ant", + "▁scarce ly", + "▁cons cious", + "▁terri ble", + "▁k ill", + "▁ra ised", + "▁fash ion", + "▁tw el", + "y al", + "▁lea ving", + "▁twel ve", + "at ure", + "▁f ut", + "▁th rew", + "▁st ar", + "▁fl owers", + "ol og", + "▁tr ying", + "ri b", + "▁sw ord", + "▁t all", + "▁mar ry", + "▁b en", + "▁expect ed", + "▁accord ing", + "▁for ty", + "▁st ick", + "in al", + "▁gu ess", + "▁sil ver", + "▁ir on", + "▁obl ig", + "▁off ice", + "▁rap id", + "▁lad ies", + "▁espe cially", + "i pped", + "ort ed", + "▁bre ad", + "e ch", + "▁te nder", + "or th", + "▁lear ned", + "▁b ooks", + "▁is n't", + "▁surpr ise", + "▁wr ite", + "▁pur s", + "pe red", + "▁wr itten", + "▁k illed", + "▁conse qu", + "▁ex h", + "▁pl aces", + "▁cond ition", + "▁dire ction", + "▁ch o", + "ul ty", + "j o", + "m it", + "▁entire ly", + "ter ing", + "▁ent er", + "▁act ion", + "w ise", + "▁su c", + "ib ly", + "▁happ iness", + "▁dec ided", + "▁gold en", + "▁lang u", + "en ess", + "▁not e", + "▁un less", + "u ous", + "▁f al", + "al ed", + "▁you' ll", + "▁wonder ful", + "ound s", + "um e", + "' re", + "▁sh ook", + "er 's", + "oo p", + "one l", + "▁perfect ly", + "▁ge or", + "nd ered", + "▁bro ad", + "at ic", + "▁cl osed", + "a 's", + "▁sp ot", + "te nded", + "▁lat ter", + "▁step s", + "▁mere ly", + "▁hist ory", + "f er", + "▁w ise", + "is hing", + "os ing", + "▁m iddle", + "ide red", + "▁underst ood", + "▁enem y", + "▁so le", + "ll ig", + "▁j ew", + "▁sim ply", + "g an", + "▁cond uct", + "▁t ast", + "▁bo ard", + "▁sa v", + "▁would n't", + "▁sh ot", + "▁rep ly", + "▁ch anged", + "m n", + "▁gr ass", + "▁fin ally", + "▁adm ir", + "it al", + "▁shar p", + "it ch", + "▁fort une", + "▁sum mer", + "▁exper ience", + "▁suc ceed", + "g ress", + "ut ed", + "▁o rig", + "ret ched", + "▁jour ney", + "▁ex cell", + "▁obser ved", + "a x", + "▁after wards", + "f ast", + "s y", + "▁b ow", + "▁fl at", + "▁pers ons", + "▁le an", + "▁ear n", + "▁bro ke", + "▁m ir", + "▁f it", + "os p", + "▁mar riage", + "▁rep res", + "i o", + "▁l ying", + "un k", + "▁tra ve", + "▁s itu", + "▁list en", + "▁acqu aint", + "▁r ing", + "ci ence", + "▁f aint", + "ol ute", + "▁cal m", + "b ered", + "▁li ves", + "▁esc ape", + "▁bene ath", + "ous es", + "▁cl im", + "▁bl ess", + "▁repe ated", + "▁pock et", + "est s", + "▁t ail", + "▁pass ion", + "▁d ick", + "▁v en", + "os es", + "cl ock", + "▁m ut", + "▁bec om", + "▁o per", + "▁o' clock", + "▁f ish", + "▁l ou", + "se mb", + "▁pre v", + "▁all owed", + "▁fam il", + "he l", + "▁g ate", + "▁sp ite", + "iver s", + "▁he alth", + "iss ion", + "▁i gn", + "▁re ach", + "▁c and", + "▁r ain", + "▁em pl", + "▁b an", + "▁str ugg", + "▁fir m", + "▁bit ter", + "▁sor ry", + "b ing", + "▁father 's", + "▁tem per", + "▁mad ame", + "pl es", + "▁f urn", + "▁fut ure", + "um ed", + "▁n ice", + "▁se par", + "▁pres ently", + "▁circumst ances", + "▁conn ect", + "id ing", + "▁set t", + "k es", + "▁l oud", + "▁wor se", + "▁w and", + "▁sp read", + "▁i' d", + "▁let ters", + "▁ye llow", + "▁mag n", + "▁pass ing", + "▁k it", + "▁pleas ed", + "▁dark ness", + "▁rem ar", + "idd en", + "c ome", + "▁te a", + "▁c iv", + "▁ap art", + "▁disapp e", + "▁import ant", + "▁leg s", + "▁n ation", + "▁del ic", + "▁d ressed", + "▁g ame", + "▁wall s", + "e c", + "▁d ry", + "▁v irt", + "▁d im", + "id ently", + "re l", + "▁r ub", + "▁abs olute", + "▁bl ind", + "▁disco vered", + "▁exact ly", + "▁d am", + "ott en", + "▁sor row", + "m y", + "▁c ost", + "fe rence", + "▁empl oy", + "vel op", + "▁c ous", + "▁be ast", + "▁spe c", + "▁opp ort", + "▁e ars", + "▁dro pped", + "▁sub st", + "▁che e", + "▁prot ect", + "il s", + "▁sm iled", + "in a", + "▁res p", + "▁prom ise", + "▁b ag", + "▁h ost", + "ur s", + "▁creat ure", + "▁not ice", + "▁know ing", + "▁head s", + "▁conc er", + "▁se at", + "ish ment", + "▁ind ivid", + "▁exist ence", + "▁determ ined", + "le nd", + "▁st orm", + "ro y", + "our s", + "▁con ce", + "ang ing", + "▁fix ed", + "▁p ress", + "▁maj or", + "o ved", + "▁v es", + "i od", + "▁lear n", + "▁mot ion", + "▁em pt", + "▁lea ves", + "▁bott om", + "▁ar g", + "iet y", + "▁no body", + "▁pro s", + "qu e", + "▁ut ter", + "▁p ick", + "ac ked", + "▁inte llig", + "▁he s", + "▁st ir", + "▁pre vent", + "▁ass ist", + "▁d om", + "▁dis g", + "▁adv ant", + "er able", + "▁v ent", + "um ent", + "▁t ired", + "re ct", + "as hed", + "act ion", + "▁cons idered", + "▁wr ote", + "▁h ouses", + "▁su it", + "▁che er", + "▁cast le", + "▁p ra", + "▁per form", + "anc ing", + "▁cle an", + "ru ct", + "▁st ro", + "▁fre qu", + "▁draw ing", + "▁l uck", + "▁ha bit", + "id ge", + "e ll", + "▁on es", + "▁no ble", + "▁sp lend", + "▁hon or", + "z en", + "▁pa id", + "▁spe ech", + "▁est ab", + "▁u r", + "ist r", + "▁individ ual", + "in ite", + "▁v all", + "▁bird s", + "ro du", + "▁d ar", + "▁all ow", + "▁conf ess", + "▁imp ress", + "▁prop ert", + "▁j ane", + "▁s ong", + "▁var ious", + "▁nar row", + "▁mo der", + "▁belie ved", + "ay s", + "▁ext ra", + "▁p ure", + "ar ily", + "▁per iod", + "▁shad ow", + "▁some wh", + "▁m al", + "▁c ott", + "▁ext reme", + "▁jud ge", + "▁vill age", + "▁ro yal", + "▁somewh at", + "▁l ower", + "▁ha m", + "▁ag ree", + "▁remem bered", + "▁ast on", + "ent h", + "▁decl ared", + "p an", + "▁tr ain", + "▁part s", + "▁col onel", + "am ber", + "▁break fast", + "▁sure ly", + "▁s in", + "ay ed", + "▁sc ene", + "g o", + "▁great est", + "▁influ ence", + "▁c ustom", + "it ary", + "▁anim al", + "▁sa ke", + "▁mo d", + "▁sold iers", + "in y", + "▁an cient", + "▁dra wn", + "▁ev idently", + "▁way s", + "▁look s", + "▁rev ol", + "at or", + "ant ed", + "▁ref lect", + "▁pict ure", + "▁like ly", + "▁sh r", + "▁law s", + "▁hold ing", + "▁diffic ulty", + "▁in j", + "▁me l", + "▁cou rage", + "n es", + "▁m ort", + "▁tr oub", + "▁bur st", + "▁ang ry", + "▁pr oud", + "gg ed", + "▁spo ken", + "is ion", + "▁des ert", + "pt ion", + "▁com b", + "▁app arent", + "r ing", + "▁wat ched", + "n a", + "▁e ast", + "▁sh op", + "▁ag re", + "▁priv ate", + "est y", + "▁j ul", + "▁fin ished", + "▁anx ious", + "ot ion", + "▁fif teen", + "▁soci al", + "u nder", + "▁dis m", + "▁tou ch", + "▁w ine", + "▁att ack", + "▁ide as", + "▁geor ge", + "a f", + "re r", + "oo se", + "▁sp ace", + "▁sc r", + "▁ins ide", + "▁gentle men", + "▁civ il", + "i ently", + "▁form ed", + "▁f ol", + "▁go es", + "▁you' ve", + "▁th in", + "▁sur f", + "▁serv ant", + "▁b al", + "▁co ver", + "▁our selves", + "▁fall en", + "▁hen ry", + "▁l ot", + "i um", + "▁ad vent", + "▁car riage", + "▁bab y", + "▁ele ct", + "▁to ng", + "▁app re", + "▁every body", + "ud ed", + "▁comm un", + "▁in e", + "it ive", + "▁wa ited", + "c ise", + "▁gr ou", + "he t", + "▁v ain", + "▁imp ro", + "▁fav or", + "er ial", + "▁spe ed", + "▁wind ows", + "▁care fully", + "▁i ce", + "▁no ise", + "▁her o", + "▁j im", + "▁will iam", + "▁pe cul", + "▁prom ised", + "▁walk ing", + "▁forg otten", + "▁oblig ed", + "▁earn est", + "▁m ain", + "▁l ose", + "▁gl ance", + "▁ves sel", + "▁gr ad", + "▁th ro", + "▁bo d", + "▁should er", + "▁met h", + "▁anim als", + "▁not iced", + "ab les", + "▁pecul iar", + "▁f ier", + "▁p ot", + "▁quiet ly", + "▁c up", + "▁ser ious", + "▁tre mb", + "▁gener ally", + "▁americ an", + "▁sym p", + "r al", + "▁d on", + "▁fr ance", + "ict ion", + "▁propert y", + "▁should ers", + "▁str anger", + "▁s an", + "▁c ow", + "▁what 's", + "▁d ust", + "▁affect ion", + "▁hands ome", + "▁hig her", + "i ant", + "nd ay", + "▁we l", + "▁po et", + "▁sl a", + "▁dist inct", + "▁m am", + "▁p ier", + "ac ing", + "ag ue", + "▁gr own", + "u ly", + "▁d '", + "▁ch amber", + "▁des ce", + "▁mur m", + "st em", + "▁person al", + "▁f ancy", + "▁of fered", + "os ite", + "ons ie", + "▁bu ilt", + "▁ed ge", + "▁whis pered", + "▁sk in", + "▁pie ces", + "it ated", + "c her", + "os ity", + "▁p it", + "▁cont ro", + "▁f aces", + "▁sp ent", + "▁inter rupt", + "h ow", + "is ters", + "▁but ter", + "▁de velop", + "▁un k", + "h ip", + "▁he at", + "▁fo nd", + "▁co at", + "▁tou ched", + "▁h ol", + "ing u", + "▁p i", + "▁r ace", + "▁j ump", + "▁surpr ised", + "ot ed", + "▁de fe", + "en ced", + "▁was n't", + "▁we ar", + "and on", + "▁f an", + "ac her", + "▁ar ch", + "▁ed uc", + "▁bra ve", + "at hered", + "▁e ld", + "▁we alth", + "▁sy stem", + "▁ger man", + "▁fal se", + "w ood", + "▁d are", + "ak ed", + "▁cous in", + "▁f er", + "ke y", + "▁l in", + "▁inte llect", + "▁prep ared", + "▁fing ers", + "▁sur r", + "▁mount ains", + "i pp", + "▁opport unity", + "a ff", + "▁b are", + "▁d or", + "▁int rodu", + "▁co llect", + "▁love ly", + "▁r ag", + "▁cr own", + "▁mat ters", + "▁compan ion", + "▁we ather", + "▁al ar", + "▁inn oc", + "▁r is", + "▁m ix", + "▁l ake", + "▁st ore", + "▁un h", + "▁mean ing", + "▁mem ory", + "o ver", + "▁b and", + "le ep", + "▁find ing", + "e e", + "▁char ge", + "▁gr at", + "▁att ract", + "▁gr ay", + "▁quar ter", + "▁av o", + "▁great ly", + "▁m ach", + "▁in h", + "▁as leep", + "▁par is", + "▁d av", + "▁al to", + "▁off er", + "▁opp osite", + "oun ced", + "er ve", + "▁bre ast", + "n own", + "▁read ing", + "▁alto gether", + "▁wr iting", + "pect ed", + "▁deg ree", + "c ing", + "n ight", + "▁ex ec", + "fort un", + "▁st at", + "▁feel ings", + "▁h ath", + "▁c ook", + "▁r ail", + "▁hon our", + "d ing", + "▁f ate", + "▁p or", + "▁fr ank", + "▁meet ing", + "▁r ough", + "▁al ive", + "▁h ide", + "it es", + "il ar", + "▁bl ow", + "▁cr uel", + "ra ph", + "▁hur t", + "▁l oss", + "▁thr own", + "▁ca used", + "▁we 'll", + "▁ser ve", + "▁du ke", + "▁b ent", + "▁un ited", + "▁see k", + "▁king dom", + "▁situ ation", + "▁empt y", + "n ers", + "▁d ue", + "▁li ked", + "▁sw ift", + "▁open ing", + "▁serv ants", + "c hen", + "ou ra", + "▁g h", + "▁sus pic", + "▁fre ed", + "oint ed", + "▁surf ace", + "c il", + "▁quest ions", + "▁ ess", + "▁cur ious", + "▁const it", + "▁accom pan", + "▁christ ian", + "▁f ill", + "are st", + "▁satisf ied", + "r on", + "▁s ides", + "▁p ity", + "▁re ve", + "▁equ al", + "▁he ight", + "▁or dered", + "os op", + "▁gre y", + "▁list ened", + "p et", + "▁re jo", + "▁cap t", + "ib ility", + "o b", + "▁m art", + "▁happ en", + "▁hur ried", + "▁doll ars", + "▁langu age", + "▁an ge", + "▁your s", + "▁supp osed", + "▁laugh ing", + "▁sett led", + "▁ro de", + "▁per m", + "▁dist ingu", + "▁hur ry", + "▁dest roy", + "▁tal ked", + "▁lift ed", + "oc r", + "▁squ are", + "▁val ue", + "▁tast e", + "▁v ast", + "▁king 's", + "▁r ul", + "▁r oof", + "▁tell ing", + "▁stud y", + "▁o w", + "▁p an", + "▁b as", + "▁r ising", + "▁suffic ient", + "▁for ced", + "▁r ise", + "▁at tend", + "▁phil osop", + "▁no se", + "▁six ty", + "he st", + "▁p in", + "▁e gg", + "▁am b", + "▁fa ult", + "b ur", + "▁st ation", + "▁dist ur", + "▁reg ular", + "ill e", + "▁p ack", + "▁spe cial", + "▁hon est", + "▁build ing", + "▁se ason", + "▁sh ape", + "▁pr ide", + "▁sm iling", + "li ke", + "▁ord ers", + "y n", + "▁wood s", + "▁accom pl", + "c on", + "▁s am", + "▁us ually", + "▁wat ching", + "▁sac ri", + "er ved", + "▁pass age", + "▁mat erial", + "▁vall ey", + "y r", + "▁st airs", + "▁li bert", + "▁fright ened", + "▁remar ked", + "▁t it", + "▁w ed", + "▁mist ress", + "▁direct ly", + "▁suff er", + "▁glo om", + "▁l ines", + "▁st ock", + "▁just ice", + "▁d iam", + "est ed", + "▁gr owing", + "▁does n't", + "▁g athered", + "▁ord inary", + "u ce", + "▁e ur", + "▁un f", + "▁kit chen", + "▁th reat", + "▁de pend", + "▁wee ks", + "▁desp air", + "▁meth od", + "▁se ized", + "▁disc uss", + "▁ex er", + "if y", + "▁fl ower", + "▁ign or", + "e er", + "ad es", + "▁de b", + "ep ing", + "▁a le", + "▁y o", + "ch ief", + "▁supp er", + "i k", + "▁bo ld", + "▁put ting", + "▁ne arer", + "us es", + "▁one 's", + "▁b le", + "▁y ork", + "▁end e", + "▁aff airs", + "▁sold ier", + "▁contr ary", + "▁mo ving", + "▁stre ets", + "▁b ir", + "r ance", + "hen s", + "▁c it", + "ic ated", + "▁cat ch", + "▁imag ine", + "ed s", + "▁mar ch", + "▁se arch", + "ar a", + "▁re ceive", + "im ate", + "▁m onsie", + "▁tw ice", + "▁pap a", + "▁monsie ur", + "▁re ck", + "m in", + "u de", + "▁pro cess", + "▁ho le", + "a ly", + "l in", + "▁c ro", + "▁fav our", + "▁d ign", + "▁work ing", + "▁har m", + "▁eur ope", + "ant ic", + "▁pro ved", + "oc ked", + "▁pro ve", + "▁cl er", + "▁lo d", + "cept ion", + "▁pull ed", + "▁ar th", + "▁author ity", + "▁ha ven", + "▁j er", + "▁un s", + "▁move ment", + "ust ed", + "▁eng aged", + "▁brother s", + "▁advant age", + "l ished", + "o le", + "▁arth ur", + "▁a ut", + "▁st ones", + "▁far m", + "▁diffe rence", + "▁f art", + "▁as ide", + "▁m as", + "▁obser v", + "▁hen ce", + "▁possess ion", + "▁hill s", + "▁fort un", + "ul s", + "ail s", + "▁inst ance", + "▁she 's", + "▁o l", + "▁ho ly", + "▁fle w", + "k y", + "▁col or", + "▁r ate", + "▁do ors", + "▁bus y", + "se t", + "▁add ress", + "▁famil iar", + "▁we ight", + "▁aw are", + "▁play ed", + "▁symp ath", + "ll s", + "▁sole mn", + "▁l iter", + "▁t est", + "▁em per", + "▁ind ian", + "▁dist ant", + "▁interest ing", + "▁b ull", + "▁thor ough", + "▁w ore", + "▁wor ked", + "▁expl ained", + "▁excell ent", + "▁splend id", + "▁tong ue", + "▁d i", + "▁p ard", + "▁n amed", + "▁sh ame", + "▁fr anc", + "▁spe ct", + "▁moment s", + "b ers", + "▁w il", + "▁my ster", + "▁se ated", + "▁inst antly", + "▁sim ilar", + "▁ende av", + "▁me asure", + "▁natur ally", + "nd s", + "▁su f", + "▁am ount", + "▁im per", + "▁dog s", + "it able", + "▁br it", + "▁necess ity", + "r id", + "ul ous", + "▁conf idence", + "d en", + "▁p arent", + "▁w id", + "▁v ir", + "▁never the", + "▁agre ed", + "▁neverthe less", + "un ch", + "▁hear ing", + "▁t akes", + "▁a ug", + "▁un ivers", + "en ance", + "▁un w", + "▁ear l", + "▁keep ing", + "▁dri ve", + "▁produ ced", + "▁a ud", + "on 's", + "▁n ames", + "ag n", + "▁disappe ared", + "▁thr ow", + "▁pres ident", + "▁god s", + "▁mag ic", + "▁repres ent", + "▁unk nown", + "p or", + "▁ter ror", + "▁haven 't", + "as c", + "▁supp ort", + "▁smo ke", + "▁w icked", + "k er", + "▁wor ks", + "▁art ic", + "▁d ull", + "▁yes ter", + "▁fall ing", + "▁worth y", + "▁libert y", + "ul ation", + "▁des ign", + "▁want s", + "▁ev idence", + "▁compan ions", + "▁spir its", + "▁co ast", + "▁might y", + "▁particular ly", + "▁wit ness", + "▁disco ver", + "▁s ought", + "▁sp an", + "' ve", + "▁r are", + "▁offic ers", + "l v", + "z y", + "▁yester day", + "ve y", + "c ent", + "▁p owers", + "▁y ield", + "▁c ool", + "▁or gan", + "▁am az", + "▁point ed", + "f ord", + "▁cl aim", + "▁cont ent", + "▁poss ibly", + "▁ter ms", + "▁tri um", + "▁offic er", + "▁pers u", + "▁ce ased", + "▁dro ve", + "▁occur red", + "▁g ree", + "▁li es", + "▁other wise", + "▁emper or", + "▁h om", + "▁st ars", + "▁kne es", + "▁trium ph", + "ru ction", + "▁pa used", + "om s", + "▁requ ired", + "▁fail ed", + "▁unh app", + "▁diam ond", + "▁r at", + "▁al i", + "▁d ouble", + "▁form s", + "▁gi ves", + "▁fing er", + "ra ce", + "▁p air", + "al ous", + "ill a", + "▁bo b", + "▁el iz", + "▁tra vel", + "▁carry ing", + "▁g le", + "il es", + "▁te eth", + "es h", + "▁sh own", + "▁fr uit", + "▁wat ers", + "▁ent ertain", + "▁heart s", + "um n", + "▁lab or", + "in 't", + "▁p ill", + "▁en er", + "so ci", + "▁exam ple", + "▁u pper", + "▁fore ign", + "▁mor al", + "▁soft ly", + "ro se", + "▁hu ge", + "▁char les", + "▁pri est", + "▁exc it", + "▁f et", + "▁mother 's", + "▁possess ed", + "▁c ases", + "▁rep ort", + "▁mil k", + "▁aff air", + "▁princi ple", + "▁inh ab", + "▁freed om", + "▁pr oof", + "▁inte nded", + "▁satisf action", + "▁shout ed", + "is c", + "▁pl at", + "▁b ask", + "ent al", + "▁grou p", + "▁fart her", + "as m", + "▁un fortun", + "▁unt o", + "▁sing ing", + "▁arr ange", + "▁relig ion", + "▁b er", + "▁rock s", + "▁sevent een", + "▁d er", + "▁j ames", + "▁bu y", + "▁succeed ed", + "▁room s", + "▁lead ing", + "▁maj esty", + "▁event s", + "▁d ance", + "▁p aint", + "▁g ently", + "ac le", + "▁te le", + "▁pard on", + "us ing", + "▁dro p", + "f ather", + "▁in vent", + "▁ke y", + "▁mention ed", + "▁sevent y", + "▁r os", + "▁suff ering", + "▁rec ord", + "▁cab in", + "ro ad", + "▁dis s", + "iv al", + "▁demand ed", + "▁excit ement", + "▁as soci", + "▁pro gress", + "ang ers", + "▁cur i", + "▁americ a", + "▁ru le", + "▁b or", + "▁v ig", + "less ly", + "▁clear ly", + "▁b ore", + "▁she ep", + "▁reg ret", + "▁neighb our", + "b ly", + "i ance", + "▁inst inct", + "▁adv ice", + "▁aw ful", + "▁s en", + "▁f ully", + "▁g ather", + "▁pap ers", + "▁h idden", + "▁che st", + "▁bir th", + "h y", + "p ap", + "▁h ither", + "▁st uff", + "▁imp at", + "▁call ing", + "▁four th", + "▁dread ful", + "▁p os", + "▁g rief", + "▁br ill", + "▁power ful", + "▁present ed", + "▁fair y", + "▁expl ain", + "▁sho ot", + "▁prison er", + "▁jo ined", + "▁aff ord", + "m ond", + "at tered", + "▁ ing", + "im ents", + "▁she l", + "▁pre fer", + "▁consider able", + "▁ob ey", + "▁vo ices", + "▁inter v", + "▁interest ed", + "▁vir g", + "▁c red", + "▁c ard", + "▁e p", + "▁need ed", + "▁p ounds", + "▁con qu", + "▁cle ver", + "▁adv anced", + "▁c ord", + "ig hed", + "▁under t", + "▁resol ved", + "▁w ag", + "ist ic", + "▁pa ul", + "▁exc ited", + "▁cond itions", + "▁pict ures", + "ac ious", + "▁sh ining", + "▁su nday", + "▁ser ved", + "▁ste am", + "▁pol ice", + "▁spr ang", + "s ie", + "or a", + "es e", + "▁j es", + "▁no dd", + "▁sal t", + "▁field s", + "▁c art", + "▁ind ians", + "▁fier ce", + "d le", + "▁r ide", + "▁des ired", + "▁ed ward", + "▁import ance", + "▁inform ation", + "t ure", + "▁h osp", + "▁me mb", + "▁per ceived", + "▁y ard", + "▁cr it", + "tern al", + "▁t ask", + "▁fo ld", + "r ant", + "▁soon er", + "▁mer ch", + "▁absolute ly", + "▁cit iz", + "▁suf fered", + "▁t ight", + "▁d ur", + "▁is s", + "ill y", + "▁lo g", + "▁complete ly", + "h old", + "▁r ad", + "▁sh are", + "▁will ing", + "▁dev il", + "▁ship s", + "▁imag ination", + "▁super ior", + "c om", + "am s", + "▁any body", + "▁en v", + "▁app l", + "▁dra g", + "▁da wn", + "asp ed", + "▁occup ied", + "▁curi osity", + "i est", + "▁s igh", + "▁fo x", + "as ant", + "▁my st", + "▁ste ad", + "et t", + "▁cou ple", + "▁ty pe", + "▁extra ord", + "▁apparent ly", + "▁wel come", + "▁da ily", + "▁moder n", + "i ot", + "▁a in't", + "▁d ying", + "ll en", + "▁fe at", + "▁acc ident", + "▁count enance", + "▁ab andon", + "ort ion", + "▁lo ck", + "▁cr ime", + "p ir", + "▁m ult", + "▁al as", + "▁ref used", + "▁h ate", + "▁d w", + "▁when ever", + "▁than ks", + "▁sl ave", + "▁regard ed", + "▁suggest ed", + "ul f", + "▁act ually", + "g ment", + "▁s ize", + "re g", + "▁c ult", + "▁k at", + "▁bod ies", + "h us", + "▁b ay", + "▁tr uly", + "▁fl esh", + "ish op", + "▁sm ith", + "▁bet r", + "w ith", + "▁w et", + "▁rapid ly", + "g ers", + "▁o dd", + "as ons", + "et te", + "▁cl ub", + "ab el", + "▁hor ror", + "▁m ile", + "▁fl ight", + "▁cross ed", + "▁profess or", + "▁o ce", + "▁wor st", + "iz ation", + "▁rus hed", + "▁s cience", + "▁b rief", + "▁ste pped", + "▁mid st", + "h a", + "▁s our", + "▁m aint", + "▁br ain", + "▁cott age", + "▁exp ressed", + "▁equ ally", + "▁educ ation", + "▁aug ust", + "▁b uck", + "▁n ay", + "id s", + "▁tem pt", + "▁inqu ir", + "▁fool ish", + "▁t aught", + "▁c op", + "▁d un", + "▁p icked", + "▁el sie", + "▁land s", + "▁dri ven", + "▁polit ical", + "m as", + "▁de ck", + "▁res ist", + "▁inst r", + "▁b on", + "▁k en", + "ip s", + "▁hot el", + "▁danger ous", + "i ally", + "n ow", + "▁do zen", + "▁tr ade", + "▁point s", + "▁nin et", + "ab ility", + "▁cr im", + "▁rel ations", + "▁inter p", + "▁bar b", + "▁delight ed", + "▁memb ers", + "▁s isters", + "▁st y", + "▁an ger", + "▁belie f", + "▁ask ing", + "▁me at", + "▁dis pl", + "▁rel ief", + "ific ation", + "▁hunt ing", + "▁ale x", + "ar ies", + "▁ob st", + "▁beh old", + "▁mist ake", + "▁inqu ired", + "▁remark able", + "▁orig in", + "c ked", + "▁n erv", + "ack s", + "ver t", + "ro p", + "▁care ful", + "▁w ounded", + "ad ing", + "▁ce re", + "▁enem ies", + "▁grad ually", + "▁interrupt ed", + "▁f is", + "▁st up", + "▁se vere", + "▁ke en", + "▁six teen", + "k ins", + "res p", + "▁wor n", + "▁fl our", + "▁sy lv", + "▁contro l", + "k in", + "▁l one", + "as ing", + "▁n ap", + "▁ass ert", + "▁dep th", + "▁kind ly", + "▁mur der", + "ac ity", + "▁ele ven", + "▁inv ol", + "▁d' art", + "▁w ings", + "▁o ak", + "▁e t", + "▁beg un", + "▁dream s", + "wh ile", + "▁more over", + "▁exp ed", + "▁inde pend", + "▁bur ied", + "▁appro ached", + "agn an", + "▁d'art agnan", + "▁se x", + "▁sa ved", + "▁har ry", + "▁phys ical", + "▁spec ies", + "c er", + "o e", + "▁gl ory", + "▁creat ures", + "▁news pap", + "▁s ang", + "▁pl enty", + "▁use ful", + "▁sho es", + "▁hop ed", + "▁frequ ently", + "▁sa f", + "▁dist r", + "▁princi p", + "▁p u", + "y 's", + "au nt", + "▁lo ver", + "▁fam ous", + "▁reco llect", + "▁n ur", + "▁gr im", + "▁ind if", + "▁char ming", + "▁a im", + "▁loo se", + "▁conscious ness", + "▁mam ma", + "▁ent hus", + "▁sle pt", + "▁smo oth", + "▁fight ing", + "▁hy p", + "▁enthus i", + "▁d ig", + "al ing", + "▁st age", + "▁any one", + "▁thr ust", + "▁des per", + "▁t ar", + "▁l amp", + "st one", + "▁st ern", + "▁ev ident", + "▁mean while", + "▁forg ive", + "▁accept ed", + "▁oce an", + "▁to t", + "▁they 're", + "▁wo ndered", + "▁play ing", + "▁det ect", + "▁ha le", + "▁kn ife", + "ail ed", + "▁close ly", + "▁me as", + "▁proceed ed", + "▁mess age", + "▁m our", + "▁f ac", + "▁un ion", + "ustom ed", + "he m", + "am ing", + "▁ex ceed", + "▁fe ather", + "▁pre cious", + "▁cent ury", + "▁une x", + "▁p ark", + "ic ation", + "▁every where", + "▁mind s", + "▁extraord inary", + "▁a rose", + "▁ent rance", + "▁cap ital", + "▁rec all", + "▁burn ing", + "▁magn ific", + "o es", + "or ious", + "st and", + "▁as semb", + "▁pl ant", + "▁neighb or", + "▁l est", + "um ents", + "▁coll e", + "▁virt ue", + "▁be w", + "▁for b", + "▁ret reat", + "▁cap able", + "▁ass ured", + "▁const ant", + "▁govern or", + "▁incre ased", + "▁h orn", + "▁rem oved", + "▁fact s", + "▁abs ence", + "▁expl an", + "▁a ck", + "▁some body", + "▁aw a", + "▁adm it", + "▁cor rect", + "▁forg ot", + "▁je alous", + "▁kiss ed", + "▁pop ular", + "▁h ut", + "▁u g", + "pe lled", + "▁gr ant", + "▁friend ship", + "▁ind ign", + "▁sympath y", + "i able", + "er ous", + "▁th om", + "▁al ice", + "▁le vel", + "▁object s", + "▁p ressed", + "▁sh a", + "ro om", + "▁qu al", + "▁beg ged", + "▁em p", + "▁h ind", + "▁hig hest", + "▁cloud s", + "▁gh ost", + "▁ack now", + "ous ed", + "▁stri ke", + "▁wis hes", + "▁becom es", + "▁tremb ling", + "▁no b", + "▁kind ness", + "▁accord ingly", + "▁thro at", + "r ation", + "▁f are", + "▁we 're", + "▁st retched", + "▁fr ag", + "▁whe el", + "▁qu eer", + "▁grand father", + "f or", + "▁ch oose", + "▁hel en", + "▁eight y", + "▁l y", + "▁mis erable", + "▁cont empt", + "ign ed", + "▁mil itary", + "▁rus s", + "▁bask et", + "▁a head", + "oo ps", + "ive red", + "▁list ening", + "▁fr o", + "▁lar ger", + "▁div ine", + "i ber", + "▁st ories", + "anc hes", + "us hing", + "iz ing", + "▁tre asure", + "▁exc use", + "▁innoc ent", + "▁a id", + "▁rem ind", + "▁sla ves", + "r it", + "st airs", + "▁re ward", + "og raph", + "▁man age", + "▁dis h", + "▁through out", + "▁wa ves", + "▁jud gment", + "▁arri val", + "▁cho ice", + "▁unhapp y", + "ast ic", + "▁bl ank", + "▁adv ance", + "▁inform ed", + "▁acquaint ance", + "▁impress ion", + "▁myster ious", + "b b", + "▁a ra", + "▁not es", + "▁had n't", + "▁se ll", + "▁com r", + "▁im pl", + "▁ind ust", + "▁end ed", + "▁light s", + "▁nur se", + "▁s out", + "▁b ought", + "▁f red", + "▁mar ked", + "▁sc ream", + "me nd", + "▁une as", + "▁delic ate", + "▁we ary", + "est ic", + "▁prom pt", + "▁exper i", + "▁hung ry", + "▁fly ing", + "▁p ow", + "▁br idge", + "▁jo in", + "▁vis ible", + "▁understand ing", + "▁cry ing", + "▁avo id", + "▁t is", + "▁st iff", + "ac hes", + "▁rest r", + "▁sound s", + "▁b owed", + "▁c aut", + "▁good s", + "▁dav id", + "▁un able", + "▁you' d", + "ham ed", + "▁b os", + "er al", + "▁as hamed", + "▁some where", + "▁inf inite", + "ock s", + "▁dign ity", + "▁g ay", + "▁v ic", + "▁am id", + "▁ho llow", + "▁em otion", + "▁adm itted", + "▁parent s", + "▁w ra", + "▁h int", + "▁tem ple", + "▁comfort able", + "▁intellig ence", + "or ous", + "▁be aring", + "▁her s", + "ab eth", + "▁rem ains", + "▁cont em", + "▁set tle", + "▁imm ense", + "f fe", + "p her", + "▁c her", + "ld om", + "▁we ap", + "ul ated", + "▁light ed", + "gy pt", + "▁advent ure", + "▁thorough ly", + "▁e gypt", + "il st", + "ang es", + "▁ob t", + "▁friend ly", + "▁reck on", + "▁stup id", + "▁f ed", + "▁r ome", + "▁me al", + "▁int ention", + "▁return ing", + "▁conv in", + "▁c oo", + "le ction", + "▁as h", + "ac hel", + "▁ro pe", + "▁pr ice", + "▁pro ject", + "el t", + "row s", + "▁sec ure", + "▁esc aped", + "▁hop es", + "▁eliz abeth", + "▁saf ety", + "▁w ound", + "▁su p", + "▁un us", + "ons cious", + "▁hor ri", + "▁min ister", + "▁o x", + "ll a", + "ens ive", + "▁help ed", + "▁plain ly", + "▁se ldom", + "▁think s", + "▁fellow s", + "▁m ood", + "▁p ushed", + "▁exh ib", + "ing ing", + "▁th under", + "au d", + "ian a", + "▁fair ly", + "▁eld er", + "▁egg s", + "ir m", + "▁maid en", + "m other", + "▁appe ars", + "▁chee ks", + "▁w on", + "▁e ase", + "▁re du", + "▁sk ill", + "▁ext ent", + "▁pract ice", + "▁relig ious", + "▁becom ing", + "▁virg in", + "▁feat ures", + "▁t ied", + "▁when ce", + "▁some how", + "▁gre et", + "▁faith ful", + "▁concer ned", + "▁the at", + "▁b ishop", + "▁p ink", + "▁eager ly", + "re es", + "▁e ating", + "▁was te", + "▁r ank", + "▁fe m", + "▁br ide", + "▁un l", + "ott ed", + "cei ving", + "▁tri b", + "▁orig inal", + "▁concer ning", + "▁ha b", + "▁acc ustomed", + "▁pat ient", + "▁rec om", + "▁ce ll", + "oint ment", + "▁arr anged", + "v ille", + "it ure", + "▁who lly", + "▁old er", + "▁col our", + "▁prov ided", + "▁at e", + "▁part ly", + "▁mon t", + "olog y", + "▁pros pect", + "▁cere mon", + "▁ ze", + "▁l aughter", + "▁fe e", + "▁br anches", + "▁fl ed", + "r ight", + "▁wh ilst", + "▁sl ipped", + "▁viol ent", + "▁inhab it", + "▁s ons", + "▁eng age", + "▁unc om", + "▁deep ly", + "▁subst ance", + "▁t ale", + "▁t iny", + "▁d an", + "▁g a", + "▁be e", + "▁y ards", + "ick s", + "▁hast ily", + "he ld", + "▁w es", + "▁v ague", + "▁am use", + "▁mu d", + "▁wo lf", + "▁h ans", + "ill ing", + "▁supp ly", + "▁sil k", + "▁const antly", + "▁christ mas", + "▁mill ion", + "▁whis per", + "▁m ental", + "▁was hing", + "ver se", + "▁cl oth", + "▁bar on", + "▁cor resp", + "▁nodd ed", + "▁corresp ond", + "k a", + "▁he ll", + "▁g ain", + "▁r ust", + "▁ob tain", + "▁unc onscious", + "▁strugg le", + "▁estab lished", + "▁law y", + "ol s", + "▁sign s", + "▁ut tered", + "▁rom an", + "▁constit ution", + "p es", + "▁c ave", + "▁sp are", + "▁qu ant", + "▁im age", + "▁mer ry", + "▁treat ed", + "▁effort s", + "▁lone ly", + "r ated", + "▁n ut", + "▁gl anced", + "▁port ion", + "it or", + "▁re semb", + "▁with d", + "▁me ad", + "▁fe ast", + "▁pr im", + "▁cl iff", + "▁em er", + "▁prop ortion", + "▁consider ation", + "▁hast e", + "▁gaz e", + "▁sav age", + "▁c rew", + "▁to wer", + "▁l ack", + "▁cons cience", + "▁mer cy", + "▁exh a", + "▁cons ent", + "at ors", + "ur d", + "▁out l", + "▁cl o", + "▁ad op", + "▁among st", + "▁h anging", + "▁circ le", + "▁prep ar", + "▁brill iant", + "f l", + "▁g ained", + "▁r ow", + "▁tr oops", + "▁rep ro", + "▁m ing", + "ou l", + "▁d ared", + "▁l ion", + "▁jo e", + "▁wind s", + "▁bring ing", + "▁anx iety", + "▁bill y", + "▁consequ ence", + "f ice", + "p se", + "▁f ought", + "▁p red", + "▁sc ra", + "▁gl im", + "▁vict ory", + "p ed", + "▁r ab", + "▁sc ot", + "▁ob v", + "▁sh ock", + "ch an", + "▁kn ock", + "our se", + "▁hand ed", + "▁ind ul", + "▁pat ience", + "▁sout her", + "▁j ose", + "▁fe ver", + "▁ro lled", + "ict ed", + "▁set ting", + "▁profess ion", + "▁sylv ia", + "▁h un", + "ut ions", + "▁fe ared", + "▁br and", + "▁bo ots", + "▁fore head", + "▁princi ples", + "▁s ink", + "▁r ig", + "av al", + "▁pur ch", + "▁gaz ed", + "▁employ ed", + "▁murm ured", + "m ore", + "▁s ar", + "as hing", + "ur al", + "ac les", + "▁tr ad", + "▁act ive", + "▁bene f", + "▁bott le", + "▁r age", + "▁inv est", + "▁lu x", + "▁s ank", + "▁h ang", + "▁be ard", + "ent ial", + "▁lo ving", + "▁nat ive", + "▁inst ruct", + "▁wa ist", + "▁rel ation", + "▁disco very", + "▁mel an", + "▁nerv ous", + "▁obt ained", + "▁p ig", + "▁se ar", + "▁fl ag", + "▁tra il", + "▁distingu ished", + "▁st ared", + "▁mis ery", + "▁pr int", + "▁gu il", + "▁jump ed", + "▁sw im", + "▁appro aching", + "▁suspic ion", + "▁i v", + "▁man aged", + "ak er", + "▁te ach", + "▁mat ch", + "▁guil ty", + "▁w retched", + "▁r um", + "▁comp ar", + "▁the ory", + "▁s her", + "▁b ree", + "▁k ings", + "▁sh one", + "ather ine", + "▁thr one", + "▁show ing", + "aw s", + "▁rob in", + "▁emb ar", + "ut ation", + "▁woman 's", + "▁add ressed", + "▁prot est", + "▁admir ation", + "▁troub led", + "▁ug ly", + "o om", + "er ves", + "▁fl ung", + "▁sub s", + "▁rel ie", + "▁thousand s", + "n ce", + "▁o d", + "▁cur rent", + "▁wood en", + "▁sacri fice", + "ur ity", + "ci p", + "▁pe ar", + "▁far mer", + "▁need s", + "▁cond em", + "▁mem ber", + "▁b ade", + "▁d ancing", + "▁re asons", + "▁cons ult", + "▁sw all", + "▁shad ows", + "▁ange l", + "▁ninet een", + "▁sty le", + "f ield", + "▁l an", + "▁man if", + "▁ro bert", + "▁gr ate", + "▁eng ine", + "▁wis dom", + "▁jes us", + "▁con vent", + "▁pre ced", + "▁interest s", + "▁tri al", + "b or", + "i ven", + "▁n est", + "▁ex ch", + "▁vo y", + "▁ill ust", + "▁wor ship", + "▁ad am", + "▁ph r", + "▁princip al", + "▁h it", + "▁spe nd", + "▁stand s", + "▁resp ons", + "▁a y", + "▁ha w", + "▁wh ist", + "▁ar rest", + "▁kind s", + "▁requ ire", + "▁descri bed", + "▁l it", + "▁pre cise", + "▁prop osed", + "▁produ ce", + "▁utter ly", + "ul se", + "▁no vel", + "▁bl ame", + "▁cred it", + "▁p ause", + "os en", + "▁house hold", + "▁arm ed", + "▁follow s", + "up on", + "▁appro ach", + "▁nin ety", + "▁p ir", + "▁fl ore", + "iv ity", + "▁ref use", + "▁sens ible", + "cho ly", + "▁nation al", + "▁g rie", + "▁re ven", + "▁let 's", + "▁delight ful", + "▁extreme ly", + "▁melan choly", + "u ing", + "▁en orm", + "cl es", + "▁slight ly", + "▁sac red", + "▁recogn ized", + "▁myst ery", + "▁g ri", + "▁comp re", + "▁dist ress", + "▁war ri", + "▁use less", + "▁tri f", + "▁mount ed", + "▁phil ip", + "▁ener gy", + "▁explan ation", + "▁c as", + "at ory", + "▁p our", + "▁r ic", + "▁ch osen", + "▁every one", + "umb led", + "▁a pr", + "▁c am", + "▁pro c", + "▁res umed", + "▁appre ci", + "▁alex and", + "▁a ven", + "▁w ing", + "▁int ense", + "▁high ly", + "▁lu cy", + "▁sol id", + "▁depart ure", + "▁agree able", + "▁exer cise", + "a pped", + "▁w ard", + "▁b ud", + "▁d well", + "ic ate", + "▁de ce", + "▁te acher", + "te nding", + "▁ma x", + "▁requ est", + "▁unex pected", + "▁jose ph", + "c ol", + "▁le ap", + "▁vict im", + "▁s ighed", + "▁for ces", + "ch ie", + "▁fe ed", + "▁sp ort", + "▁dri ft", + "▁wed ding", + "▁brit ish", + "se c", + "▁att itude", + "▁vis ion", + "▁pi pe", + "▁to w", + "▁ha lt", + "▁man ners", + "▁te nd", + "▁fl ood", + "▁comm ission", + "▁gu ide", + "▁obser ve", + "▁conc ern", + "▁rus h", + "▁affect ed", + "f all", + "▁st ret", + "▁co ach", + "▁po ison", + "▁direct ed", + "▁med ic", + "▁g est", + "▁e cho", + "▁young er", + "▁conf usion", + "▁contin ue", + "▁par li", + "▁abs or", + "▁cent re", + "con om", + "▁horri ble", + "r ison", + "▁b ol", + "▁b ath", + "▁g own", + "▁by e", + "▁al oud", + "▁supp l", + "▁prof ound", + "▁er r", + "▁cheer ful", + "w orth", + "▁sent ence", + "▁mist aken", + "▁tor n", + "▁fig ures", + "▁accompan ied", + "▁c atherine", + "▁e conom", + "▁at m", + "▁sh aking", + "um ber", + "▁coun cil", + "l ot", + "▁as ce", + "il ities", + "▁sp ar", + "▁end s", + "▁stra w", + "▁knight s", + "▁atm osp", + "▁sh ade", + "▁br ow", + "▁sp ark", + "▁rest ed", + "▁sent iment", + "▁reco vered", + "▁subject s", + "▁dut ies", + "▁comp osed", + "▁sw ept", + "▁real ity", + "▁sing ular", + "▁trans p", + "▁loc ked", + "▁lou is", + "▁assist ance", + "▁w ake", + "re m", + "▁so vere", + "▁un p", + "▁lo ves", + "▁abs urd", + "▁soul s", + "▁immedi ate", + "▁rid ing", + "▁connect ion", + "▁chee k", + "▁magnific ent", + "▁e re", + "▁su gar", + "▁pl ans", + "▁pr ud", + "▁dis e", + "▁ad j", + "▁lean ing", + "▁surr ounded", + "▁we 've", + "▁or n", + "▁ro ll", + "▁pro ble", + "▁str ict", + "▁aw ake", + "▁pra ise", + "▁convin ced", + "▁re le", + "▁fr ame", + "▁bre aking", + "▁cur tain", + "▁stay ed", + "▁div ided", + "▁cra w", + "▁incl ined", + "▁prev ious", + "a ult", + "om en", + "▁st air", + "▁se es", + "▁pr on", + "bo ard", + "▁comple x", + "▁pray er", + "▁pier re", + "▁unfortun ate", + "g s", + "▁gen ius", + "▁incre ase", + "▁suffic iently", + "▁ban ks", + "▁revol ution", + "▁souther n", + "k i", + "o ke", + "▁a ust", + "ed y", + "▁l ing", + "▁count ess", + "▁sleep ing", + "▁dev oted", + "▁ut most", + "▁mark et", + "▁bos om", + "▁b ark", + "▁c ath", + "al t", + "ch ar", + "▁cl ock", + "▁hand ker", + "▁adm in", + "▁sens es", + "▁id ent", + "▁mid night", + "▁connect ed", + "▁perm itted", + "▁h id", + "▁f il", + "▁f aced", + "▁g ift", + "▁ch at", + "▁br id", + "▁nor ther", + "▁hor iz", + "▁colle ge", + "▁handker chief", + "is ions", + "▁re be", + "▁pol ic", + "▁ann ounced", + "oun ce", + "▁n ons", + "▁n urs", + "al es", + "▁fle et", + "▁rag ged", + "▁co ffe", + "▁part ies", + "▁del ay", + "▁sound ed", + "▁c ities", + "▁was h", + "▁app ointed", + "▁night s", + "▁inst it", + "▁god 's", + "▁stri king", + "▁gun s", + "▁aston ishment", + "▁merch ant", + "▁parli ament", + "n al", + "▁a x", + "at ched", + "▁p il", + "▁p age", + "if orm", + "▁pl ate", + "▁thir st", + "▁neg ro", + "▁ru in", + "▁inhabit ants", + "w in", + "ar f", + "▁r ib", + "▁add ition", + "▁arg ument", + "b our", + "▁t ad", + "▁sc en", + "▁gu ests", + "▁wonder ing", + "▁acquaint ed", + "▁int ent", + "pl ess", + "▁destroy ed", + "▁coffe e", + "in ent", + "le br", + "▁re nder", + "▁so b", + "▁de mon", + "▁des ir", + "ud ing", + "▁get s", + "▁ass ure", + "▁ra ise", + "▁shar ply", + "▁priv ile", + "▁alar m", + "▁mach ine", + "f ied", + "▁cont ract", + "▁del iber", + "▁dr own", + "▁after ward", + "▁gu est", + "▁concl usion", + "▁ris k", + "▁ignor ant", + "b ury", + "k ind", + "▁p ian", + "an 's", + "ur ies", + "▁so il", + "▁ref er", + "▁command ed", + "▁pract ical", + "▁to ss", + "▁of fe", + "▁be held", + "▁ar ist", + "▁quar ters", + "▁deg rees", + "▁fis her", + "▁nons ense", + "▁m c", + "is p", + "▁me chan", + "ke ep", + "▁doubt less", + "▁viol ence", + "▁neg lect", + "▁fol k", + "l iness", + "▁b ul", + "▁e aster", + "▁lo ft", + "▁cont ained", + "▁ref lection", + "▁ce lebr", + "▁lea f", + "▁concl uded", + "▁distr ict", + "i ation", + "r s", + "▁s cient", + "▁he 'd", + "▁sc orn", + "▁cr ack", + "▁ste ep", + "▁mut tered", + "▁estab lish", + "▁dar ling", + "▁and rew", + "▁ch im", + "qu is", + "▁qu ality", + "▁po lly", + "▁che ck", + "▁cra ft", + "▁trave ll", + "▁univers al", + "in ate", + "▁c ig", + "at ives", + "om p", + "ut en", + "▁j ac", + "▁jo b", + "▁sub m", + "▁read er", + "▁le is", + "▁em ph", + "▁surr ound", + "o x", + "p ent", + "it ate", + "▁ex tended", + "▁le v", + "▁over t", + "▁ret ired", + "▁pu zz", + "u able", + "▁li br", + "▁ch in", + "▁sp l", + "▁real ized", + "▁ca uses", + "▁pun ishment", + "▁phys ic", + "▁leis ure", + "c an", + "▁w ave", + "▁sh ake", + "▁char m", + "▁belong ed", + "m ber", + "▁b ones", + "▁g as", + "▁r ange", + "▁pre c", + "▁sm ell", + "▁may be", + "▁inv ited", + "▁troub les", + "▁t ables", + "an ch", + "ic ip", + "▁j une", + "▁ab o", + "▁ag es", + "▁any where", + "ff in", + "▁dr unk", + "▁proper ly", + "▁loc al", + "▁impro ve", + "▁atmosp here", + "▁d ir", + "▁he 'll", + "▁re b", + "▁r ang", + "▁comp ass", + "▁lie uten", + "▁lean ed", + "▁firm ly", + "▁n ations", + "▁ha y", + "▁we pt", + "▁r al", + "▁con ven", + "▁un iform", + "▁jul ia", + "e em", + "r ass", + "▁tr ack", + "▁comm er", + "▁bus hes", + "▁obs c", + "▁sort s", + "▁difficult ies", + "▁intellect ual", + "▁introdu ced", + "m ith", + "▁t ro", + "id ay", + "▁re ndered", + "▁r out", + "ad d", + "▁pl un", + "▁thr owing", + "▁hum ble", + "▁pol ite", + "▁num erous", + "▁move ments", + "▁success ful", + "▁cand le", + "▁separ ate", + "▁protect ion", + "▁thom as", + "▁enorm ous", + "▁un b", + "▁rep ub", + "▁sun sh", + "▁desce nded", + "▁unus ual", + "i ved", + "▁bl az", + "▁show s", + "▁sim pl", + "▁cat tle", + "▁cre pt", + "▁aston ished", + "▁desert ed", + "▁l ap", + "ar se", + "▁ne arest", + "ud es", + "▁ent ering", + "▁ide al", + "stand ing", + "nd ers", + "▁so re", + "ain e", + "▁cl os", + "▁our s", + "▁where ver", + "▁ter m", + "▁vis ited", + "▁cal cul", + "d s", + "▁b ase", + "▁g ates", + "▁st amp", + "▁li ber", + "▁offic ial", + "▁e rect", + "▁al t", + "el ia", + "▁har mon", + "▁pain ful", + "▁burn ed", + "▁repub lic", + "u er", + "▁l ately", + "▁it al", + "am m", + "▁te ar", + "▁act ions", + "▁fin al", + "▁start led", + "▁sens ation", + "▁fat al", + "ol ic", + "▁fl ash", + "▁app et", + "▁strong er", + "▁num bers", + "▁grat itude", + "▁fem ale", + "▁wes tern", + "l est" + ] + } +} \ No newline at end of file diff --git a/out/checkpoint-18000/tokenizer_config.json b/out/checkpoint-18000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0073e6415da746fc5c44a52e02785cb94510efa4 --- /dev/null +++ b/out/checkpoint-18000/tokenizer_config.json @@ -0,0 +1,9253 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|audio:0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|audio:1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|audio:2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "<|audio:3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "<|audio:4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "<|audio:5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "<|audio:6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "<|audio:7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "8": { + "content": "<|audio:8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9": { + "content": "<|audio:9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10": { + "content": "<|audio:10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11": { + "content": "<|audio:11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12": { + "content": "<|audio:12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13": { + "content": "<|audio:13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "14": { + "content": "<|audio:14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15": { + "content": "<|audio:15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "16": { + "content": "<|audio:16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17": { + "content": "<|audio:17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "18": { + "content": "<|audio:18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19": { + "content": "<|audio:19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20": { + "content": "<|audio:20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21": { + "content": "<|audio:21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22": { + "content": "<|audio:22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "23": { + "content": "<|audio:23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "24": { + "content": "<|audio:24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "25": { + "content": "<|audio:25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "26": { + "content": "<|audio:26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27": { + "content": "<|audio:27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "<|audio:28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "<|audio:29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "<|audio:30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "<|audio:31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "<|audio:32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "33": { + "content": "<|audio:33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34": { + "content": "<|audio:34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "35": { + "content": "<|audio:35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "36": { + "content": "<|audio:36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "37": { + "content": "<|audio:37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "38": { + "content": "<|audio:38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39": { + "content": "<|audio:39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "40": { + "content": "<|audio:40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "41": { + "content": "<|audio:41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "42": { + "content": "<|audio:42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43": { + "content": "<|audio:43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "44": { + "content": "<|audio:44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "45": { + "content": "<|audio:45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46": { + "content": "<|audio:46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47": { + "content": "<|audio:47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "48": { + "content": "<|audio:48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "49": { + "content": "<|audio:49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50": { + "content": "<|audio:50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "51": { + "content": "<|audio:51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52": { + "content": "<|audio:52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "53": { + "content": "<|audio:53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54": { + "content": "<|audio:54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "55": { + "content": "<|audio:55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "56": { + "content": "<|audio:56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "57": { + "content": "<|audio:57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58": { + "content": "<|audio:58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "59": { + "content": "<|audio:59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "60": { + "content": "<|audio:60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "61": { + "content": "<|audio:61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "62": { + "content": "<|audio:62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "63": { + "content": "<|audio:63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64": { + "content": "<|audio:64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "65": { + "content": "<|audio:65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66": { + "content": "<|audio:66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "67": { + "content": "<|audio:67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68": { + "content": "<|audio:68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "69": { + "content": "<|audio:69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70": { + "content": "<|audio:70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "71": { + "content": "<|audio:71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72": { + "content": "<|audio:72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "73": { + "content": "<|audio:73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74": { + "content": "<|audio:74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "75": { + "content": "<|audio:75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "76": { + "content": "<|audio:76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77": { + "content": "<|audio:77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "78": { + "content": "<|audio:78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "<|audio:79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "<|audio:80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81": { + "content": "<|audio:81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82": { + "content": "<|audio:82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "83": { + "content": "<|audio:83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84": { + "content": "<|audio:84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "85": { + "content": "<|audio:85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86": { + "content": "<|audio:86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "87": { + "content": "<|audio:87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "88": { + "content": "<|audio:88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "89": { + "content": "<|audio:89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "90": { + "content": "<|audio:90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "91": { + "content": "<|audio:91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92": { + "content": "<|audio:92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93": { + "content": "<|audio:93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94": { + "content": "<|audio:94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95": { + "content": "<|audio:95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "96": { + "content": "<|audio:96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "97": { + "content": "<|audio:97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "98": { + "content": "<|audio:98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "99": { + "content": "<|audio:99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100": { + "content": "<|audio:100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "<|audio:101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "<|audio:102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "<|audio:103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104": { + "content": "<|audio:104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "105": { + "content": "<|audio:105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "<|audio:106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "<|audio:107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "<|audio:108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "109": { + "content": "<|audio:109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110": { + "content": "<|audio:110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "111": { + "content": "<|audio:111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "112": { + "content": "<|audio:112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "113": { + "content": "<|audio:113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "114": { + "content": "<|audio:114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "115": { + "content": "<|audio:115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "116": { + "content": "<|audio:116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117": { + "content": "<|audio:117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118": { + "content": "<|audio:118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "119": { + "content": "<|audio:119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "120": { + "content": "<|audio:120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "121": { + "content": "<|audio:121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "122": { + "content": "<|audio:122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "123": { + "content": "<|audio:123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "124": { + "content": "<|audio:124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "125": { + "content": "<|audio:125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126": { + "content": "<|audio:126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "127": { + "content": "<|audio:127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128": { + "content": "<|audio:128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "129": { + "content": "<|audio:129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130": { + "content": "<|audio:130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "131": { + "content": "<|audio:131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "132": { + "content": "<|audio:132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "133": { + "content": "<|audio:133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "134": { + "content": "<|audio:134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "135": { + "content": "<|audio:135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "136": { + "content": "<|audio:136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "137": { + "content": "<|audio:137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "138": { + "content": "<|audio:138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "139": { + "content": "<|audio:139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "140": { + "content": "<|audio:140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "141": { + "content": "<|audio:141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "142": { + "content": "<|audio:142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "143": { + "content": "<|audio:143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "144": { + "content": "<|audio:144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "145": { + "content": "<|audio:145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "146": { + "content": "<|audio:146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147": { + "content": "<|audio:147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "148": { + "content": "<|audio:148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "149": { + "content": "<|audio:149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "150": { + "content": "<|audio:150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151": { + "content": "<|audio:151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152": { + "content": "<|audio:152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "153": { + "content": "<|audio:153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "154": { + "content": "<|audio:154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155": { + "content": "<|audio:155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "156": { + "content": "<|audio:156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "157": { + "content": "<|audio:157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158": { + "content": "<|audio:158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "159": { + "content": "<|audio:159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "160": { + "content": "<|audio:160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "161": { + "content": "<|audio:161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162": { + "content": "<|audio:162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "163": { + "content": "<|audio:163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "164": { + "content": "<|audio:164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "165": { + "content": "<|audio:165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166": { + "content": "<|audio:166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "167": { + "content": "<|audio:167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "168": { + "content": "<|audio:168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "169": { + "content": "<|audio:169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "170": { + "content": "<|audio:170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171": { + "content": "<|audio:171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172": { + "content": "<|audio:172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "173": { + "content": "<|audio:173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "174": { + "content": "<|audio:174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175": { + "content": "<|audio:175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "176": { + "content": "<|audio:176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177": { + "content": "<|audio:177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178": { + "content": "<|audio:178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "179": { + "content": "<|audio:179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "180": { + "content": "<|audio:180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "181": { + "content": "<|audio:181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "182": { + "content": "<|audio:182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "183": { + "content": "<|audio:183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "184": { + "content": "<|audio:184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "185": { + "content": "<|audio:185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186": { + "content": "<|audio:186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187": { + "content": "<|audio:187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "188": { + "content": "<|audio:188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "189": { + "content": "<|audio:189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "190": { + "content": "<|audio:190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "191": { + "content": "<|audio:191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "192": { + "content": "<|audio:192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "193": { + "content": "<|audio:193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "194": { + "content": "<|audio:194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "195": { + "content": "<|audio:195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "196": { + "content": "<|audio:196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "197": { + "content": "<|audio:197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "198": { + "content": "<|audio:198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199": { + "content": "<|audio:199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200": { + "content": "<|audio:200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "201": { + "content": "<|audio:201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "202": { + "content": "<|audio:202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "203": { + "content": "<|audio:203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "204": { + "content": "<|audio:204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "205": { + "content": "<|audio:205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "206": { + "content": "<|audio:206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "207": { + "content": "<|audio:207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "208": { + "content": "<|audio:208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "209": { + "content": "<|audio:209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "210": { + "content": "<|audio:210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "211": { + "content": "<|audio:211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "212": { + "content": "<|audio:212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "213": { + "content": "<|audio:213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "214": { + "content": "<|audio:214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "215": { + "content": "<|audio:215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "216": { + "content": "<|audio:216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "217": { + "content": "<|audio:217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "218": { + "content": "<|audio:218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "219": { + "content": "<|audio:219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "220": { + "content": "<|audio:220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "221": { + "content": "<|audio:221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "222": { + "content": "<|audio:222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "223": { + "content": "<|audio:223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "224": { + "content": "<|audio:224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "225": { + "content": "<|audio:225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "226": { + "content": "<|audio:226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "227": { + "content": "<|audio:227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "228": { + "content": "<|audio:228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "229": { + "content": "<|audio:229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "230": { + "content": "<|audio:230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "231": { + "content": "<|audio:231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "232": { + "content": "<|audio:232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "233": { + "content": "<|audio:233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "234": { + "content": "<|audio:234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "235": { + "content": "<|audio:235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "236": { + "content": "<|audio:236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "237": { + "content": "<|audio:237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "238": { + "content": "<|audio:238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "239": { + "content": "<|audio:239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "240": { + "content": "<|audio:240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "241": { + "content": "<|audio:241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "242": { + "content": "<|audio:242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "243": { + "content": "<|audio:243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "244": { + "content": "<|audio:244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "245": { + "content": "<|audio:245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "246": { + "content": "<|audio:246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "247": { + "content": "<|audio:247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "248": { + "content": "<|audio:248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "249": { + "content": "<|audio:249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250": { + "content": "<|audio:250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "251": { + "content": "<|audio:251|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "252": { + "content": "<|audio:252|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "253": { + "content": "<|audio:253|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "254": { + "content": "<|audio:254|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255": { + "content": "<|audio:255|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256": { + "content": "<|audio:256|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "257": { + "content": "<|audio:257|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "258": { + "content": "<|audio:258|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "<|audio:259|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "<|audio:260|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "<|audio:261|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "<|audio:262|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "<|audio:263|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "<|audio:264|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "<|audio:265|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "<|audio:266|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "<|audio:267|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "<|audio:268|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "<|audio:269|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "<|audio:270|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "<|audio:271|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "<|audio:272|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "<|audio:273|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "<|audio:274|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "<|audio:275|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "<|audio:276|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "<|audio:277|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "<|audio:278|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "<|audio:279|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "<|audio:280|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "<|audio:281|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "<|audio:282|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "<|audio:283|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "<|audio:284|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "<|audio:285|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "<|audio:286|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "<|audio:287|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "<|audio:288|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "<|audio:289|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "<|audio:290|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "<|audio:291|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "<|audio:292|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "<|audio:293|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "<|audio:294|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "<|audio:295|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "<|audio:296|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "<|audio:297|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "<|audio:298|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "<|audio:299|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "<|audio:300|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "<|audio:301|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "<|audio:302|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "<|audio:303|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "<|audio:304|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "<|audio:305|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "<|audio:306|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "<|audio:307|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "<|audio:308|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "<|audio:309|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "<|audio:310|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "<|audio:311|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "<|audio:312|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "<|audio:313|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "<|audio:314|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "<|audio:315|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "<|audio:316|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "<|audio:317|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "<|audio:318|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "<|audio:319|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "<|audio:320|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "<|audio:321|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "<|audio:322|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "<|audio:323|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "<|audio:324|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "<|audio:325|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "<|audio:326|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "<|audio:327|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "<|audio:328|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "<|audio:329|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "<|audio:330|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "<|audio:331|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "<|audio:332|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "<|audio:333|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "<|audio:334|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "<|audio:335|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "<|audio:336|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "<|audio:337|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "<|audio:338|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "<|audio:339|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "<|audio:340|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "<|audio:341|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "<|audio:342|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "<|audio:343|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "<|audio:344|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "<|audio:345|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "<|audio:346|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "<|audio:347|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "<|audio:348|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "<|audio:349|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "<|audio:350|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "<|audio:351|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "<|audio:352|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "<|audio:353|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "<|audio:354|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "<|audio:355|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "<|audio:356|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "<|audio:357|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "<|audio:358|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "<|audio:359|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "<|audio:360|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "<|audio:361|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "<|audio:362|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "<|audio:363|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "<|audio:364|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "<|audio:365|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "<|audio:366|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "<|audio:367|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "<|audio:368|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "<|audio:369|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "<|audio:370|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "<|audio:371|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "<|audio:372|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "<|audio:373|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "<|audio:374|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "<|audio:375|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "<|audio:376|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "<|audio:377|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "<|audio:378|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "<|audio:379|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "<|audio:380|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "<|audio:381|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "<|audio:382|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "<|audio:383|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "384": { + "content": "<|audio:384|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "385": { + "content": "<|audio:385|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "386": { + "content": "<|audio:386|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "387": { + "content": "<|audio:387|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "388": { + "content": "<|audio:388|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "389": { + "content": "<|audio:389|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "390": { + "content": "<|audio:390|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "391": { + "content": "<|audio:391|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "392": { + "content": "<|audio:392|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "393": { + "content": "<|audio:393|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "394": { + "content": "<|audio:394|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "395": { + "content": "<|audio:395|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "396": { + "content": "<|audio:396|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "397": { + "content": "<|audio:397|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "398": { + "content": "<|audio:398|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "399": { + "content": "<|audio:399|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "400": { + "content": "<|audio:400|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "401": { + "content": "<|audio:401|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "402": { + "content": "<|audio:402|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "403": { + "content": "<|audio:403|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "404": { + "content": "<|audio:404|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "405": { + "content": "<|audio:405|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "406": { + "content": "<|audio:406|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "407": { + "content": "<|audio:407|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "408": { + "content": "<|audio:408|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "409": { + "content": "<|audio:409|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "410": { + "content": "<|audio:410|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "411": { + "content": "<|audio:411|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "412": { + "content": "<|audio:412|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "413": { + "content": "<|audio:413|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "414": { + "content": "<|audio:414|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "415": { + "content": "<|audio:415|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "416": { + "content": "<|audio:416|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "417": { + "content": "<|audio:417|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "418": { + "content": "<|audio:418|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "419": { + "content": "<|audio:419|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "420": { + "content": "<|audio:420|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "421": { + "content": "<|audio:421|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "422": { + "content": "<|audio:422|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "423": { + "content": "<|audio:423|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "424": { + "content": "<|audio:424|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "425": { + "content": "<|audio:425|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "426": { + "content": "<|audio:426|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "427": { + "content": "<|audio:427|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "428": { + "content": "<|audio:428|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "429": { + "content": "<|audio:429|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "430": { + "content": "<|audio:430|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "431": { + "content": "<|audio:431|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "432": { + "content": "<|audio:432|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "433": { + "content": "<|audio:433|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "434": { + "content": "<|audio:434|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "435": { + "content": "<|audio:435|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "436": { + "content": "<|audio:436|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "437": { + "content": "<|audio:437|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "438": { + "content": "<|audio:438|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "439": { + "content": "<|audio:439|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "440": { + "content": "<|audio:440|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "441": { + "content": "<|audio:441|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "442": { + "content": "<|audio:442|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "443": { + "content": "<|audio:443|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "444": { + "content": "<|audio:444|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "445": { + "content": "<|audio:445|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "446": { + "content": "<|audio:446|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "447": { + "content": "<|audio:447|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "448": { + "content": "<|audio:448|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "449": { + "content": "<|audio:449|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "450": { + "content": "<|audio:450|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "451": { + "content": "<|audio:451|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "452": { + "content": "<|audio:452|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "453": { + "content": "<|audio:453|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "454": { + "content": "<|audio:454|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "455": { + "content": "<|audio:455|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "456": { + "content": "<|audio:456|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "457": { + "content": "<|audio:457|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "458": { + "content": "<|audio:458|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "459": { + "content": "<|audio:459|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "460": { + "content": "<|audio:460|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "461": { + "content": "<|audio:461|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "462": { + "content": "<|audio:462|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "463": { + "content": "<|audio:463|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "464": { + "content": "<|audio:464|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "465": { + "content": "<|audio:465|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "466": { + "content": "<|audio:466|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "467": { + "content": "<|audio:467|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "468": { + "content": "<|audio:468|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "469": { + "content": "<|audio:469|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "470": { + "content": "<|audio:470|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "471": { + "content": "<|audio:471|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "472": { + "content": "<|audio:472|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "473": { + "content": "<|audio:473|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "474": { + "content": "<|audio:474|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "475": { + "content": "<|audio:475|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "476": { + "content": "<|audio:476|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "477": { + "content": "<|audio:477|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "478": { + "content": "<|audio:478|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "479": { + "content": "<|audio:479|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "480": { + "content": "<|audio:480|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "481": { + "content": "<|audio:481|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "482": { + "content": "<|audio:482|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "483": { + "content": "<|audio:483|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "484": { + "content": "<|audio:484|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "485": { + "content": "<|audio:485|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "486": { + "content": "<|audio:486|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "487": { + "content": "<|audio:487|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "488": { + "content": "<|audio:488|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "489": { + "content": "<|audio:489|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "490": { + "content": "<|audio:490|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "491": { + "content": "<|audio:491|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "492": { + "content": "<|audio:492|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "493": { + "content": "<|audio:493|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "494": { + "content": "<|audio:494|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "495": { + "content": "<|audio:495|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "496": { + "content": "<|audio:496|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "497": { + "content": "<|audio:497|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "498": { + "content": "<|audio:498|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "499": { + "content": "<|audio:499|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "500": { + "content": "<|audio:500|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "501": { + "content": "<|audio:501|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "502": { + "content": "<|audio:502|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "503": { + "content": "<|audio:503|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "504": { + "content": "<|audio:504|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "505": { + "content": "<|audio:505|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "506": { + "content": "<|audio:506|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "507": { + "content": "<|audio:507|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "508": { + "content": "<|audio:508|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "509": { + "content": "<|audio:509|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "510": { + "content": "<|audio:510|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "511": { + "content": "<|audio:511|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "512": { + "content": "<|audio:512|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "513": { + "content": "<|audio:513|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "514": { + "content": "<|audio:514|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "515": { + "content": "<|audio:515|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "516": { + "content": "<|audio:516|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "517": { + "content": "<|audio:517|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "518": { + "content": "<|audio:518|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "519": { + "content": "<|audio:519|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "520": { + "content": "<|audio:520|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "521": { + "content": "<|audio:521|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "522": { + "content": "<|audio:522|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "523": { + "content": "<|audio:523|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "524": { + "content": "<|audio:524|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "525": { + "content": "<|audio:525|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "526": { + "content": "<|audio:526|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "527": { + "content": "<|audio:527|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "528": { + "content": "<|audio:528|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "529": { + "content": "<|audio:529|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "530": { + "content": "<|audio:530|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "531": { + "content": "<|audio:531|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "532": { + "content": "<|audio:532|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "533": { + "content": "<|audio:533|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "534": { + "content": "<|audio:534|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "535": { + "content": "<|audio:535|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "536": { + "content": "<|audio:536|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "537": { + "content": "<|audio:537|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "538": { + "content": "<|audio:538|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "539": { + "content": "<|audio:539|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "540": { + "content": "<|audio:540|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "541": { + "content": "<|audio:541|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "542": { + "content": "<|audio:542|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "543": { + "content": "<|audio:543|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "544": { + "content": "<|audio:544|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "545": { + "content": "<|audio:545|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "546": { + "content": "<|audio:546|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "547": { + "content": "<|audio:547|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "548": { + "content": "<|audio:548|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "549": { + "content": "<|audio:549|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "550": { + "content": "<|audio:550|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "551": { + "content": "<|audio:551|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "552": { + "content": "<|audio:552|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "553": { + "content": "<|audio:553|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "554": { + "content": "<|audio:554|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "555": { + "content": "<|audio:555|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "556": { + "content": "<|audio:556|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "557": { + "content": "<|audio:557|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "558": { + "content": "<|audio:558|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "559": { + "content": "<|audio:559|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "560": { + "content": "<|audio:560|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "561": { + "content": "<|audio:561|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "562": { + "content": "<|audio:562|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "563": { + "content": "<|audio:563|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "564": { + "content": "<|audio:564|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "565": { + "content": "<|audio:565|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "566": { + "content": "<|audio:566|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "567": { + "content": "<|audio:567|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "568": { + "content": "<|audio:568|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "569": { + "content": "<|audio:569|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "570": { + "content": "<|audio:570|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "571": { + "content": "<|audio:571|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "572": { + "content": "<|audio:572|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "573": { + "content": "<|audio:573|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "574": { + "content": "<|audio:574|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "575": { + "content": "<|audio:575|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "576": { + "content": "<|audio:576|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "577": { + "content": "<|audio:577|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "578": { + "content": "<|audio:578|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "579": { + "content": "<|audio:579|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "580": { + "content": "<|audio:580|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "581": { + "content": "<|audio:581|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "582": { + "content": "<|audio:582|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "583": { + "content": "<|audio:583|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "584": { + "content": "<|audio:584|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "585": { + "content": "<|audio:585|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "586": { + "content": "<|audio:586|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "587": { + "content": "<|audio:587|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "588": { + "content": "<|audio:588|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "589": { + "content": "<|audio:589|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "590": { + "content": "<|audio:590|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "591": { + "content": "<|audio:591|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "592": { + "content": "<|audio:592|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "593": { + "content": "<|audio:593|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "594": { + "content": "<|audio:594|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "595": { + "content": "<|audio:595|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "596": { + "content": "<|audio:596|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "597": { + "content": "<|audio:597|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "598": { + "content": "<|audio:598|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "599": { + "content": "<|audio:599|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "600": { + "content": "<|audio:600|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "601": { + "content": "<|audio:601|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "602": { + "content": "<|audio:602|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "603": { + "content": "<|audio:603|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "604": { + "content": "<|audio:604|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "605": { + "content": "<|audio:605|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "606": { + "content": "<|audio:606|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "607": { + "content": "<|audio:607|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "608": { + "content": "<|audio:608|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "609": { + "content": "<|audio:609|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "610": { + "content": "<|audio:610|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "611": { + "content": "<|audio:611|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "612": { + "content": "<|audio:612|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "613": { + "content": "<|audio:613|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "614": { + "content": "<|audio:614|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "615": { + "content": "<|audio:615|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "616": { + "content": "<|audio:616|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "617": { + "content": "<|audio:617|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "618": { + "content": "<|audio:618|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "619": { + "content": "<|audio:619|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "620": { + "content": "<|audio:620|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "621": { + "content": "<|audio:621|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "622": { + "content": "<|audio:622|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "623": { + "content": "<|audio:623|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "624": { + "content": "<|audio:624|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "625": { + "content": "<|audio:625|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "626": { + "content": "<|audio:626|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "627": { + "content": "<|audio:627|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "628": { + "content": "<|audio:628|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "629": { + "content": "<|audio:629|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "630": { + "content": "<|audio:630|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "631": { + "content": "<|audio:631|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "632": { + "content": "<|audio:632|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "633": { + "content": "<|audio:633|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "634": { + "content": "<|audio:634|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "635": { + "content": "<|audio:635|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "636": { + "content": "<|audio:636|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "637": { + "content": "<|audio:637|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "638": { + "content": "<|audio:638|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "639": { + "content": "<|audio:639|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "640": { + "content": "<|audio:640|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "641": { + "content": "<|audio:641|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "642": { + "content": "<|audio:642|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "643": { + "content": "<|audio:643|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "644": { + "content": "<|audio:644|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "645": { + "content": "<|audio:645|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "646": { + "content": "<|audio:646|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "647": { + "content": "<|audio:647|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "648": { + "content": "<|audio:648|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "649": { + "content": "<|audio:649|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "650": { + "content": "<|audio:650|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "651": { + "content": "<|audio:651|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "652": { + "content": "<|audio:652|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "653": { + "content": "<|audio:653|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "654": { + "content": "<|audio:654|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "655": { + "content": "<|audio:655|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "656": { + "content": "<|audio:656|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "657": { + "content": "<|audio:657|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "658": { + "content": "<|audio:658|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "659": { + "content": "<|audio:659|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "660": { + "content": "<|audio:660|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "661": { + "content": "<|audio:661|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "662": { + "content": "<|audio:662|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "663": { + "content": "<|audio:663|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "664": { + "content": "<|audio:664|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "665": { + "content": "<|audio:665|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "666": { + "content": "<|audio:666|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "667": { + "content": "<|audio:667|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "668": { + "content": "<|audio:668|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "669": { + "content": "<|audio:669|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "670": { + "content": "<|audio:670|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "671": { + "content": "<|audio:671|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "672": { + "content": "<|audio:672|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "673": { + "content": "<|audio:673|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "674": { + "content": "<|audio:674|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "675": { + "content": "<|audio:675|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "676": { + "content": "<|audio:676|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "677": { + "content": "<|audio:677|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "678": { + "content": "<|audio:678|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "679": { + "content": "<|audio:679|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "680": { + "content": "<|audio:680|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "681": { + "content": "<|audio:681|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "682": { + "content": "<|audio:682|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "683": { + "content": "<|audio:683|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "684": { + "content": "<|audio:684|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "685": { + "content": "<|audio:685|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "686": { + "content": "<|audio:686|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "687": { + "content": "<|audio:687|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "688": { + "content": "<|audio:688|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "689": { + "content": "<|audio:689|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "690": { + "content": "<|audio:690|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "691": { + "content": "<|audio:691|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "692": { + "content": "<|audio:692|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "693": { + "content": "<|audio:693|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "694": { + "content": "<|audio:694|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "695": { + "content": "<|audio:695|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "696": { + "content": "<|audio:696|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "697": { + "content": "<|audio:697|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "698": { + "content": "<|audio:698|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "699": { + "content": "<|audio:699|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "700": { + "content": "<|audio:700|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "701": { + "content": "<|audio:701|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "702": { + "content": "<|audio:702|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "703": { + "content": "<|audio:703|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "704": { + "content": "<|audio:704|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "705": { + "content": "<|audio:705|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "706": { + "content": "<|audio:706|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "707": { + "content": "<|audio:707|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "708": { + "content": "<|audio:708|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "709": { + "content": "<|audio:709|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "710": { + "content": "<|audio:710|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "711": { + "content": "<|audio:711|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "712": { + "content": "<|audio:712|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "713": { + "content": "<|audio:713|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "714": { + "content": "<|audio:714|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "715": { + "content": "<|audio:715|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "716": { + "content": "<|audio:716|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "717": { + "content": "<|audio:717|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "718": { + "content": "<|audio:718|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "719": { + "content": "<|audio:719|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "720": { + "content": "<|audio:720|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "721": { + "content": "<|audio:721|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "722": { + "content": "<|audio:722|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "723": { + "content": "<|audio:723|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "724": { + "content": "<|audio:724|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "725": { + "content": "<|audio:725|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "726": { + "content": "<|audio:726|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "727": { + "content": "<|audio:727|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "728": { + "content": "<|audio:728|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "729": { + "content": "<|audio:729|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "730": { + "content": "<|audio:730|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "731": { + "content": "<|audio:731|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "732": { + "content": "<|audio:732|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "733": { + "content": "<|audio:733|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "734": { + "content": "<|audio:734|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "735": { + "content": "<|audio:735|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "736": { + "content": "<|audio:736|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "737": { + "content": "<|audio:737|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "738": { + "content": "<|audio:738|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "739": { + "content": "<|audio:739|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "740": { + "content": "<|audio:740|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "741": { + "content": "<|audio:741|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "742": { + "content": "<|audio:742|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "743": { + "content": "<|audio:743|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "744": { + "content": "<|audio:744|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "745": { + "content": "<|audio:745|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "746": { + "content": "<|audio:746|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "747": { + "content": "<|audio:747|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "748": { + "content": "<|audio:748|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "749": { + "content": "<|audio:749|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "750": { + "content": "<|audio:750|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "751": { + "content": "<|audio:751|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "752": { + "content": "<|audio:752|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "753": { + "content": "<|audio:753|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "754": { + "content": "<|audio:754|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "755": { + "content": "<|audio:755|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "756": { + "content": "<|audio:756|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "757": { + "content": "<|audio:757|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "758": { + "content": "<|audio:758|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "759": { + "content": "<|audio:759|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "760": { + "content": "<|audio:760|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "761": { + "content": "<|audio:761|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "762": { + "content": "<|audio:762|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "763": { + "content": "<|audio:763|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "764": { + "content": "<|audio:764|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "765": { + "content": "<|audio:765|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "766": { + "content": "<|audio:766|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "767": { + "content": "<|audio:767|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "768": { + "content": "<|audio:768|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "769": { + "content": "<|audio:769|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "770": { + "content": "<|audio:770|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "771": { + "content": "<|audio:771|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "772": { + "content": "<|audio:772|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "773": { + "content": "<|audio:773|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "774": { + "content": "<|audio:774|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "775": { + "content": "<|audio:775|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "776": { + "content": "<|audio:776|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "777": { + "content": "<|audio:777|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "778": { + "content": "<|audio:778|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "779": { + "content": "<|audio:779|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "780": { + "content": "<|audio:780|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "781": { + "content": "<|audio:781|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "782": { + "content": "<|audio:782|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "783": { + "content": "<|audio:783|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "784": { + "content": "<|audio:784|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "785": { + "content": "<|audio:785|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "786": { + "content": "<|audio:786|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "787": { + "content": "<|audio:787|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "788": { + "content": "<|audio:788|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "789": { + "content": "<|audio:789|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "790": { + "content": "<|audio:790|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "791": { + "content": "<|audio:791|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "792": { + "content": "<|audio:792|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "793": { + "content": "<|audio:793|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "794": { + "content": "<|audio:794|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "795": { + "content": "<|audio:795|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "796": { + "content": "<|audio:796|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "797": { + "content": "<|audio:797|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "798": { + "content": "<|audio:798|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "799": { + "content": "<|audio:799|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "800": { + "content": "<|audio:800|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "801": { + "content": "<|audio:801|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "802": { + "content": "<|audio:802|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "803": { + "content": "<|audio:803|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "804": { + "content": "<|audio:804|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "805": { + "content": "<|audio:805|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "806": { + "content": "<|audio:806|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "807": { + "content": "<|audio:807|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "808": { + "content": "<|audio:808|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "809": { + "content": "<|audio:809|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "810": { + "content": "<|audio:810|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "811": { + "content": "<|audio:811|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "812": { + "content": "<|audio:812|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "813": { + "content": "<|audio:813|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "814": { + "content": "<|audio:814|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "815": { + "content": "<|audio:815|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "816": { + "content": "<|audio:816|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "817": { + "content": "<|audio:817|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "818": { + "content": "<|audio:818|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "819": { + "content": "<|audio:819|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "820": { + "content": "<|audio:820|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "821": { + "content": "<|audio:821|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "822": { + "content": "<|audio:822|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "823": { + "content": "<|audio:823|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "824": { + "content": "<|audio:824|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "825": { + "content": "<|audio:825|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "826": { + "content": "<|audio:826|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "827": { + "content": "<|audio:827|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "828": { + "content": "<|audio:828|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "829": { + "content": "<|audio:829|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "830": { + "content": "<|audio:830|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "831": { + "content": "<|audio:831|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "832": { + "content": "<|audio:832|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "833": { + "content": "<|audio:833|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "834": { + "content": "<|audio:834|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "835": { + "content": "<|audio:835|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "836": { + "content": "<|audio:836|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "837": { + "content": "<|audio:837|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "838": { + "content": "<|audio:838|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "839": { + "content": "<|audio:839|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "840": { + "content": "<|audio:840|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "841": { + "content": "<|audio:841|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "842": { + "content": "<|audio:842|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "843": { + "content": "<|audio:843|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "844": { + "content": "<|audio:844|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "845": { + "content": "<|audio:845|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "846": { + "content": "<|audio:846|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "847": { + "content": "<|audio:847|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "848": { + "content": "<|audio:848|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "849": { + "content": "<|audio:849|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "850": { + "content": "<|audio:850|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "851": { + "content": "<|audio:851|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "852": { + "content": "<|audio:852|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "853": { + "content": "<|audio:853|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "854": { + "content": "<|audio:854|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "855": { + "content": "<|audio:855|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "856": { + "content": "<|audio:856|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "857": { + "content": "<|audio:857|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "858": { + "content": "<|audio:858|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "859": { + "content": "<|audio:859|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "860": { + "content": "<|audio:860|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "861": { + "content": "<|audio:861|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "862": { + "content": "<|audio:862|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "863": { + "content": "<|audio:863|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "864": { + "content": "<|audio:864|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "865": { + "content": "<|audio:865|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "866": { + "content": "<|audio:866|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "867": { + "content": "<|audio:867|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "868": { + "content": "<|audio:868|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "869": { + "content": "<|audio:869|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "870": { + "content": "<|audio:870|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "871": { + "content": "<|audio:871|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "872": { + "content": "<|audio:872|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "873": { + "content": "<|audio:873|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "874": { + "content": "<|audio:874|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "875": { + "content": "<|audio:875|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "876": { + "content": "<|audio:876|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "877": { + "content": "<|audio:877|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "878": { + "content": "<|audio:878|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "879": { + "content": "<|audio:879|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "880": { + "content": "<|audio:880|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "881": { + "content": "<|audio:881|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "882": { + "content": "<|audio:882|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "883": { + "content": "<|audio:883|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "884": { + "content": "<|audio:884|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "885": { + "content": "<|audio:885|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "886": { + "content": "<|audio:886|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "887": { + "content": "<|audio:887|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "888": { + "content": "<|audio:888|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "889": { + "content": "<|audio:889|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "890": { + "content": "<|audio:890|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "891": { + "content": "<|audio:891|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "892": { + "content": "<|audio:892|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "893": { + "content": "<|audio:893|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "894": { + "content": "<|audio:894|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "895": { + "content": "<|audio:895|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "896": { + "content": "<|audio:896|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "897": { + "content": "<|audio:897|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "898": { + "content": "<|audio:898|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "899": { + "content": "<|audio:899|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "900": { + "content": "<|audio:900|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "901": { + "content": "<|audio:901|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "902": { + "content": "<|audio:902|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "903": { + "content": "<|audio:903|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "904": { + "content": "<|audio:904|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "905": { + "content": "<|audio:905|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "906": { + "content": "<|audio:906|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "907": { + "content": "<|audio:907|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "908": { + "content": "<|audio:908|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "909": { + "content": "<|audio:909|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "910": { + "content": "<|audio:910|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "911": { + "content": "<|audio:911|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "912": { + "content": "<|audio:912|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "913": { + "content": "<|audio:913|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "914": { + "content": "<|audio:914|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "915": { + "content": "<|audio:915|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "916": { + "content": "<|audio:916|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "917": { + "content": "<|audio:917|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "918": { + "content": "<|audio:918|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "919": { + "content": "<|audio:919|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "920": { + "content": "<|audio:920|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "921": { + "content": "<|audio:921|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "922": { + "content": "<|audio:922|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "923": { + "content": "<|audio:923|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "924": { + "content": "<|audio:924|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "925": { + "content": "<|audio:925|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "926": { + "content": "<|audio:926|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "927": { + "content": "<|audio:927|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "928": { + "content": "<|audio:928|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "929": { + "content": "<|audio:929|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "930": { + "content": "<|audio:930|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "931": { + "content": "<|audio:931|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "932": { + "content": "<|audio:932|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "933": { + "content": "<|audio:933|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "934": { + "content": "<|audio:934|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "935": { + "content": "<|audio:935|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "936": { + "content": "<|audio:936|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "937": { + "content": "<|audio:937|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "938": { + "content": "<|audio:938|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "939": { + "content": "<|audio:939|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "940": { + "content": "<|audio:940|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "941": { + "content": "<|audio:941|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "942": { + "content": "<|audio:942|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "943": { + "content": "<|audio:943|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "944": { + "content": "<|audio:944|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "945": { + "content": "<|audio:945|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "946": { + "content": "<|audio:946|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "947": { + "content": "<|audio:947|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "948": { + "content": "<|audio:948|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "949": { + "content": "<|audio:949|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "950": { + "content": "<|audio:950|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "951": { + "content": "<|audio:951|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "952": { + "content": "<|audio:952|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "953": { + "content": "<|audio:953|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "954": { + "content": "<|audio:954|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "955": { + "content": "<|audio:955|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "956": { + "content": "<|audio:956|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "957": { + "content": "<|audio:957|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "958": { + "content": "<|audio:958|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "959": { + "content": "<|audio:959|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "960": { + "content": "<|audio:960|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "961": { + "content": "<|audio:961|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "962": { + "content": "<|audio:962|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "963": { + "content": "<|audio:963|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "964": { + "content": "<|audio:964|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "965": { + "content": "<|audio:965|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "966": { + "content": "<|audio:966|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "967": { + "content": "<|audio:967|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "968": { + "content": "<|audio:968|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "969": { + "content": "<|audio:969|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "970": { + "content": "<|audio:970|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "971": { + "content": "<|audio:971|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "972": { + "content": "<|audio:972|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "973": { + "content": "<|audio:973|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "974": { + "content": "<|audio:974|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "975": { + "content": "<|audio:975|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "976": { + "content": "<|audio:976|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "977": { + "content": "<|audio:977|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "978": { + "content": "<|audio:978|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "979": { + "content": "<|audio:979|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "980": { + "content": "<|audio:980|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "981": { + "content": "<|audio:981|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "982": { + "content": "<|audio:982|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "983": { + "content": "<|audio:983|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "984": { + "content": "<|audio:984|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "985": { + "content": "<|audio:985|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "986": { + "content": "<|audio:986|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "987": { + "content": "<|audio:987|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "988": { + "content": "<|audio:988|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "989": { + "content": "<|audio:989|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "990": { + "content": "<|audio:990|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "991": { + "content": "<|audio:991|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "992": { + "content": "<|audio:992|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "993": { + "content": "<|audio:993|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "994": { + "content": "<|audio:994|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "995": { + "content": "<|audio:995|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "996": { + "content": "<|audio:996|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "997": { + "content": "<|audio:997|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "998": { + "content": "<|audio:998|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "999": { + "content": "<|audio:999|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1000": { + "content": "<|audio:1000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1001": { + "content": "<|audio:1001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1002": { + "content": "<|audio:1002|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1003": { + "content": "<|audio:1003|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1004": { + "content": "<|audio:1004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1005": { + "content": "<|audio:1005|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1006": { + "content": "<|audio:1006|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1007": { + "content": "<|audio:1007|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1008": { + "content": "<|audio:1008|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1009": { + "content": "<|audio:1009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1010": { + "content": "<|audio:1010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1011": { + "content": "<|audio:1011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1012": { + "content": "<|audio:1012|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1013": { + "content": "<|audio:1013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1014": { + "content": "<|audio:1014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1015": { + "content": "<|audio:1015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1016": { + "content": "<|audio:1016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1017": { + "content": "<|audio:1017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1018": { + "content": "<|audio:1018|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1019": { + "content": "<|audio:1019|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1020": { + "content": "<|audio:1020|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1021": { + "content": "<|audio:1021|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1022": { + "content": "<|audio:1022|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1023": { + "content": "<|audio:1023|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1024": { + "content": "<|startoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1025": { + "content": "<|endoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1026": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "model_max_length": 1877, + "pad_token": "<|padding|>", + "special_tokens": [ + "<|audio:0|>", + "<|audio:1|>", + "<|audio:2|>", + "<|audio:3|>", + "<|audio:4|>", + "<|audio:5|>", + "<|audio:6|>", + "<|audio:7|>", + "<|audio:8|>", + "<|audio:9|>", + "<|audio:10|>", + "<|audio:11|>", + "<|audio:12|>", + "<|audio:13|>", + "<|audio:14|>", + "<|audio:15|>", + "<|audio:16|>", + "<|audio:17|>", + "<|audio:18|>", + "<|audio:19|>", + "<|audio:20|>", + "<|audio:21|>", + "<|audio:22|>", + "<|audio:23|>", + "<|audio:24|>", + "<|audio:25|>", + "<|audio:26|>", + "<|audio:27|>", + "<|audio:28|>", + "<|audio:29|>", + "<|audio:30|>", + "<|audio:31|>", + "<|audio:32|>", + "<|audio:33|>", + "<|audio:34|>", + "<|audio:35|>", + "<|audio:36|>", + "<|audio:37|>", + "<|audio:38|>", + "<|audio:39|>", + "<|audio:40|>", + "<|audio:41|>", + "<|audio:42|>", + "<|audio:43|>", + "<|audio:44|>", + "<|audio:45|>", + "<|audio:46|>", + "<|audio:47|>", + "<|audio:48|>", + "<|audio:49|>", + "<|audio:50|>", + "<|audio:51|>", + "<|audio:52|>", + "<|audio:53|>", + "<|audio:54|>", + "<|audio:55|>", + "<|audio:56|>", + "<|audio:57|>", + "<|audio:58|>", + "<|audio:59|>", + "<|audio:60|>", + "<|audio:61|>", + "<|audio:62|>", + "<|audio:63|>", + "<|audio:64|>", + "<|audio:65|>", + "<|audio:66|>", + "<|audio:67|>", + "<|audio:68|>", + "<|audio:69|>", + "<|audio:70|>", + "<|audio:71|>", + "<|audio:72|>", + "<|audio:73|>", + "<|audio:74|>", + "<|audio:75|>", + "<|audio:76|>", + "<|audio:77|>", + "<|audio:78|>", + "<|audio:79|>", + "<|audio:80|>", + "<|audio:81|>", + "<|audio:82|>", + "<|audio:83|>", + "<|audio:84|>", + "<|audio:85|>", + "<|audio:86|>", + "<|audio:87|>", + "<|audio:88|>", + "<|audio:89|>", + "<|audio:90|>", + "<|audio:91|>", + "<|audio:92|>", + "<|audio:93|>", + "<|audio:94|>", + "<|audio:95|>", + "<|audio:96|>", + "<|audio:97|>", + "<|audio:98|>", + "<|audio:99|>", + "<|audio:100|>", + "<|audio:101|>", + "<|audio:102|>", + "<|audio:103|>", + "<|audio:104|>", + "<|audio:105|>", + "<|audio:106|>", + "<|audio:107|>", + "<|audio:108|>", + "<|audio:109|>", + "<|audio:110|>", + "<|audio:111|>", + "<|audio:112|>", + "<|audio:113|>", + "<|audio:114|>", + "<|audio:115|>", + "<|audio:116|>", + "<|audio:117|>", + "<|audio:118|>", + "<|audio:119|>", + "<|audio:120|>", + "<|audio:121|>", + "<|audio:122|>", + "<|audio:123|>", + "<|audio:124|>", + "<|audio:125|>", + "<|audio:126|>", + "<|audio:127|>", + "<|audio:128|>", + "<|audio:129|>", + "<|audio:130|>", + "<|audio:131|>", + "<|audio:132|>", + "<|audio:133|>", + "<|audio:134|>", + "<|audio:135|>", + "<|audio:136|>", + "<|audio:137|>", + "<|audio:138|>", + "<|audio:139|>", + "<|audio:140|>", + "<|audio:141|>", + "<|audio:142|>", + "<|audio:143|>", + "<|audio:144|>", + "<|audio:145|>", + "<|audio:146|>", + "<|audio:147|>", + "<|audio:148|>", + "<|audio:149|>", + "<|audio:150|>", + "<|audio:151|>", + "<|audio:152|>", + "<|audio:153|>", + "<|audio:154|>", + "<|audio:155|>", + "<|audio:156|>", + "<|audio:157|>", + "<|audio:158|>", + "<|audio:159|>", + "<|audio:160|>", + "<|audio:161|>", + "<|audio:162|>", + "<|audio:163|>", + "<|audio:164|>", + "<|audio:165|>", + "<|audio:166|>", + "<|audio:167|>", + "<|audio:168|>", + "<|audio:169|>", + "<|audio:170|>", + "<|audio:171|>", + "<|audio:172|>", + "<|audio:173|>", + "<|audio:174|>", + "<|audio:175|>", + "<|audio:176|>", + "<|audio:177|>", + "<|audio:178|>", + "<|audio:179|>", + "<|audio:180|>", + "<|audio:181|>", + "<|audio:182|>", + "<|audio:183|>", + "<|audio:184|>", + "<|audio:185|>", + "<|audio:186|>", + "<|audio:187|>", + "<|audio:188|>", + "<|audio:189|>", + "<|audio:190|>", + "<|audio:191|>", + "<|audio:192|>", + "<|audio:193|>", + "<|audio:194|>", + "<|audio:195|>", + "<|audio:196|>", + "<|audio:197|>", + "<|audio:198|>", + "<|audio:199|>", + "<|audio:200|>", + "<|audio:201|>", + "<|audio:202|>", + "<|audio:203|>", + "<|audio:204|>", + "<|audio:205|>", + "<|audio:206|>", + "<|audio:207|>", + "<|audio:208|>", + "<|audio:209|>", + "<|audio:210|>", + "<|audio:211|>", + "<|audio:212|>", + "<|audio:213|>", + "<|audio:214|>", + "<|audio:215|>", + "<|audio:216|>", + "<|audio:217|>", + "<|audio:218|>", + "<|audio:219|>", + "<|audio:220|>", + "<|audio:221|>", + "<|audio:222|>", + "<|audio:223|>", + "<|audio:224|>", + "<|audio:225|>", + "<|audio:226|>", + "<|audio:227|>", + "<|audio:228|>", + "<|audio:229|>", + "<|audio:230|>", + "<|audio:231|>", + "<|audio:232|>", + "<|audio:233|>", + "<|audio:234|>", + "<|audio:235|>", + "<|audio:236|>", + "<|audio:237|>", + "<|audio:238|>", + "<|audio:239|>", + "<|audio:240|>", + "<|audio:241|>", + "<|audio:242|>", + "<|audio:243|>", + "<|audio:244|>", + "<|audio:245|>", + "<|audio:246|>", + "<|audio:247|>", + "<|audio:248|>", + "<|audio:249|>", + "<|audio:250|>", + "<|audio:251|>", + "<|audio:252|>", + "<|audio:253|>", + "<|audio:254|>", + "<|audio:255|>", + "<|audio:256|>", + "<|audio:257|>", + "<|audio:258|>", + "<|audio:259|>", + "<|audio:260|>", + "<|audio:261|>", + "<|audio:262|>", + "<|audio:263|>", + "<|audio:264|>", + "<|audio:265|>", + "<|audio:266|>", + "<|audio:267|>", + "<|audio:268|>", + "<|audio:269|>", + "<|audio:270|>", + "<|audio:271|>", + "<|audio:272|>", + "<|audio:273|>", + "<|audio:274|>", + "<|audio:275|>", + "<|audio:276|>", + "<|audio:277|>", + "<|audio:278|>", + "<|audio:279|>", + "<|audio:280|>", + "<|audio:281|>", + "<|audio:282|>", + "<|audio:283|>", + "<|audio:284|>", + "<|audio:285|>", + "<|audio:286|>", + "<|audio:287|>", + "<|audio:288|>", + "<|audio:289|>", + "<|audio:290|>", + "<|audio:291|>", + "<|audio:292|>", + "<|audio:293|>", + "<|audio:294|>", + "<|audio:295|>", + "<|audio:296|>", + "<|audio:297|>", + "<|audio:298|>", + "<|audio:299|>", + "<|audio:300|>", + "<|audio:301|>", + "<|audio:302|>", + "<|audio:303|>", + "<|audio:304|>", + "<|audio:305|>", + "<|audio:306|>", + "<|audio:307|>", + "<|audio:308|>", + "<|audio:309|>", + "<|audio:310|>", + "<|audio:311|>", + "<|audio:312|>", + "<|audio:313|>", + "<|audio:314|>", + "<|audio:315|>", + "<|audio:316|>", + "<|audio:317|>", + "<|audio:318|>", + "<|audio:319|>", + "<|audio:320|>", + "<|audio:321|>", + "<|audio:322|>", + "<|audio:323|>", + "<|audio:324|>", + "<|audio:325|>", + "<|audio:326|>", + "<|audio:327|>", + "<|audio:328|>", + "<|audio:329|>", + "<|audio:330|>", + "<|audio:331|>", + "<|audio:332|>", + "<|audio:333|>", + "<|audio:334|>", + "<|audio:335|>", + "<|audio:336|>", + "<|audio:337|>", + "<|audio:338|>", + "<|audio:339|>", + "<|audio:340|>", + "<|audio:341|>", + "<|audio:342|>", + "<|audio:343|>", + "<|audio:344|>", + "<|audio:345|>", + "<|audio:346|>", + "<|audio:347|>", + "<|audio:348|>", + "<|audio:349|>", + "<|audio:350|>", + "<|audio:351|>", + "<|audio:352|>", + "<|audio:353|>", + "<|audio:354|>", + "<|audio:355|>", + "<|audio:356|>", + "<|audio:357|>", + "<|audio:358|>", + "<|audio:359|>", + "<|audio:360|>", + "<|audio:361|>", + "<|audio:362|>", + "<|audio:363|>", + "<|audio:364|>", + "<|audio:365|>", + "<|audio:366|>", + "<|audio:367|>", + "<|audio:368|>", + "<|audio:369|>", + "<|audio:370|>", + "<|audio:371|>", + "<|audio:372|>", + "<|audio:373|>", + "<|audio:374|>", + "<|audio:375|>", + "<|audio:376|>", + "<|audio:377|>", + "<|audio:378|>", + "<|audio:379|>", + "<|audio:380|>", + "<|audio:381|>", + "<|audio:382|>", + "<|audio:383|>", + "<|audio:384|>", + "<|audio:385|>", + "<|audio:386|>", + "<|audio:387|>", + "<|audio:388|>", + "<|audio:389|>", + "<|audio:390|>", + "<|audio:391|>", + "<|audio:392|>", + "<|audio:393|>", + "<|audio:394|>", + "<|audio:395|>", + "<|audio:396|>", + "<|audio:397|>", + "<|audio:398|>", + "<|audio:399|>", + "<|audio:400|>", + "<|audio:401|>", + "<|audio:402|>", + "<|audio:403|>", + "<|audio:404|>", + "<|audio:405|>", + "<|audio:406|>", + "<|audio:407|>", + "<|audio:408|>", + "<|audio:409|>", + "<|audio:410|>", + "<|audio:411|>", + "<|audio:412|>", + "<|audio:413|>", + "<|audio:414|>", + "<|audio:415|>", + "<|audio:416|>", + "<|audio:417|>", + "<|audio:418|>", + "<|audio:419|>", + "<|audio:420|>", + "<|audio:421|>", + "<|audio:422|>", + "<|audio:423|>", + "<|audio:424|>", + "<|audio:425|>", + "<|audio:426|>", + "<|audio:427|>", + "<|audio:428|>", + "<|audio:429|>", + "<|audio:430|>", + "<|audio:431|>", + "<|audio:432|>", + "<|audio:433|>", + "<|audio:434|>", + "<|audio:435|>", + "<|audio:436|>", + "<|audio:437|>", + "<|audio:438|>", + "<|audio:439|>", + "<|audio:440|>", + "<|audio:441|>", + "<|audio:442|>", + "<|audio:443|>", + "<|audio:444|>", + "<|audio:445|>", + "<|audio:446|>", + "<|audio:447|>", + "<|audio:448|>", + "<|audio:449|>", + "<|audio:450|>", + "<|audio:451|>", + "<|audio:452|>", + "<|audio:453|>", + "<|audio:454|>", + "<|audio:455|>", + "<|audio:456|>", + "<|audio:457|>", + "<|audio:458|>", + "<|audio:459|>", + "<|audio:460|>", + "<|audio:461|>", + "<|audio:462|>", + "<|audio:463|>", + "<|audio:464|>", + "<|audio:465|>", + "<|audio:466|>", + "<|audio:467|>", + "<|audio:468|>", + "<|audio:469|>", + "<|audio:470|>", + "<|audio:471|>", + "<|audio:472|>", + "<|audio:473|>", + "<|audio:474|>", + "<|audio:475|>", + "<|audio:476|>", + "<|audio:477|>", + "<|audio:478|>", + "<|audio:479|>", + "<|audio:480|>", + "<|audio:481|>", + "<|audio:482|>", + "<|audio:483|>", + "<|audio:484|>", + "<|audio:485|>", + "<|audio:486|>", + "<|audio:487|>", + "<|audio:488|>", + "<|audio:489|>", + "<|audio:490|>", + "<|audio:491|>", + "<|audio:492|>", + "<|audio:493|>", + "<|audio:494|>", + "<|audio:495|>", + "<|audio:496|>", + "<|audio:497|>", + "<|audio:498|>", + "<|audio:499|>", + "<|audio:500|>", + "<|audio:501|>", + "<|audio:502|>", + "<|audio:503|>", + "<|audio:504|>", + "<|audio:505|>", + "<|audio:506|>", + "<|audio:507|>", + "<|audio:508|>", + "<|audio:509|>", + "<|audio:510|>", + "<|audio:511|>", + "<|audio:512|>", + "<|audio:513|>", + "<|audio:514|>", + "<|audio:515|>", + "<|audio:516|>", + "<|audio:517|>", + "<|audio:518|>", + "<|audio:519|>", + "<|audio:520|>", + "<|audio:521|>", + "<|audio:522|>", + "<|audio:523|>", + "<|audio:524|>", + "<|audio:525|>", + "<|audio:526|>", + "<|audio:527|>", + "<|audio:528|>", + "<|audio:529|>", + "<|audio:530|>", + "<|audio:531|>", + "<|audio:532|>", + "<|audio:533|>", + "<|audio:534|>", + "<|audio:535|>", + "<|audio:536|>", + "<|audio:537|>", + "<|audio:538|>", + "<|audio:539|>", + "<|audio:540|>", + "<|audio:541|>", + "<|audio:542|>", + "<|audio:543|>", + "<|audio:544|>", + "<|audio:545|>", + "<|audio:546|>", + "<|audio:547|>", + "<|audio:548|>", + "<|audio:549|>", + "<|audio:550|>", + "<|audio:551|>", + "<|audio:552|>", + "<|audio:553|>", + "<|audio:554|>", + "<|audio:555|>", + "<|audio:556|>", + "<|audio:557|>", + "<|audio:558|>", + "<|audio:559|>", + "<|audio:560|>", + "<|audio:561|>", + "<|audio:562|>", + "<|audio:563|>", + "<|audio:564|>", + "<|audio:565|>", + "<|audio:566|>", + "<|audio:567|>", + "<|audio:568|>", + "<|audio:569|>", + "<|audio:570|>", + "<|audio:571|>", + "<|audio:572|>", + "<|audio:573|>", + "<|audio:574|>", + "<|audio:575|>", + "<|audio:576|>", + "<|audio:577|>", + "<|audio:578|>", + "<|audio:579|>", + "<|audio:580|>", + "<|audio:581|>", + "<|audio:582|>", + "<|audio:583|>", + "<|audio:584|>", + "<|audio:585|>", + "<|audio:586|>", + "<|audio:587|>", + "<|audio:588|>", + "<|audio:589|>", + "<|audio:590|>", + "<|audio:591|>", + "<|audio:592|>", + "<|audio:593|>", + "<|audio:594|>", + "<|audio:595|>", + "<|audio:596|>", + "<|audio:597|>", + "<|audio:598|>", + "<|audio:599|>", + "<|audio:600|>", + "<|audio:601|>", + "<|audio:602|>", + "<|audio:603|>", + "<|audio:604|>", + "<|audio:605|>", + "<|audio:606|>", + "<|audio:607|>", + "<|audio:608|>", + "<|audio:609|>", + "<|audio:610|>", + "<|audio:611|>", + "<|audio:612|>", + "<|audio:613|>", + "<|audio:614|>", + "<|audio:615|>", + "<|audio:616|>", + "<|audio:617|>", + "<|audio:618|>", + "<|audio:619|>", + "<|audio:620|>", + "<|audio:621|>", + "<|audio:622|>", + "<|audio:623|>", + "<|audio:624|>", + "<|audio:625|>", + "<|audio:626|>", + "<|audio:627|>", + "<|audio:628|>", + "<|audio:629|>", + "<|audio:630|>", + "<|audio:631|>", + "<|audio:632|>", + "<|audio:633|>", + "<|audio:634|>", + "<|audio:635|>", + "<|audio:636|>", + "<|audio:637|>", + "<|audio:638|>", + "<|audio:639|>", + "<|audio:640|>", + "<|audio:641|>", + "<|audio:642|>", + "<|audio:643|>", + "<|audio:644|>", + "<|audio:645|>", + "<|audio:646|>", + "<|audio:647|>", + "<|audio:648|>", + "<|audio:649|>", + "<|audio:650|>", + "<|audio:651|>", + "<|audio:652|>", + "<|audio:653|>", + "<|audio:654|>", + "<|audio:655|>", + "<|audio:656|>", + "<|audio:657|>", + "<|audio:658|>", + "<|audio:659|>", + "<|audio:660|>", + "<|audio:661|>", + "<|audio:662|>", + "<|audio:663|>", + "<|audio:664|>", + "<|audio:665|>", + "<|audio:666|>", + "<|audio:667|>", + "<|audio:668|>", + "<|audio:669|>", + "<|audio:670|>", + "<|audio:671|>", + "<|audio:672|>", + "<|audio:673|>", + "<|audio:674|>", + "<|audio:675|>", + "<|audio:676|>", + "<|audio:677|>", + "<|audio:678|>", + "<|audio:679|>", + "<|audio:680|>", + "<|audio:681|>", + "<|audio:682|>", + "<|audio:683|>", + "<|audio:684|>", + "<|audio:685|>", + "<|audio:686|>", + "<|audio:687|>", + "<|audio:688|>", + "<|audio:689|>", + "<|audio:690|>", + "<|audio:691|>", + "<|audio:692|>", + "<|audio:693|>", + "<|audio:694|>", + "<|audio:695|>", + "<|audio:696|>", + "<|audio:697|>", + "<|audio:698|>", + "<|audio:699|>", + "<|audio:700|>", + "<|audio:701|>", + "<|audio:702|>", + "<|audio:703|>", + "<|audio:704|>", + "<|audio:705|>", + "<|audio:706|>", + "<|audio:707|>", + "<|audio:708|>", + "<|audio:709|>", + "<|audio:710|>", + "<|audio:711|>", + "<|audio:712|>", + "<|audio:713|>", + "<|audio:714|>", + "<|audio:715|>", + "<|audio:716|>", + "<|audio:717|>", + "<|audio:718|>", + "<|audio:719|>", + "<|audio:720|>", + "<|audio:721|>", + "<|audio:722|>", + "<|audio:723|>", + "<|audio:724|>", + "<|audio:725|>", + "<|audio:726|>", + "<|audio:727|>", + "<|audio:728|>", + "<|audio:729|>", + "<|audio:730|>", + "<|audio:731|>", + "<|audio:732|>", + "<|audio:733|>", + "<|audio:734|>", + "<|audio:735|>", + "<|audio:736|>", + "<|audio:737|>", + "<|audio:738|>", + "<|audio:739|>", + "<|audio:740|>", + "<|audio:741|>", + "<|audio:742|>", + "<|audio:743|>", + "<|audio:744|>", + "<|audio:745|>", + "<|audio:746|>", + "<|audio:747|>", + "<|audio:748|>", + "<|audio:749|>", + "<|audio:750|>", + "<|audio:751|>", + "<|audio:752|>", + "<|audio:753|>", + "<|audio:754|>", + "<|audio:755|>", + "<|audio:756|>", + "<|audio:757|>", + "<|audio:758|>", + "<|audio:759|>", + "<|audio:760|>", + "<|audio:761|>", + "<|audio:762|>", + "<|audio:763|>", + "<|audio:764|>", + "<|audio:765|>", + "<|audio:766|>", + "<|audio:767|>", + "<|audio:768|>", + "<|audio:769|>", + "<|audio:770|>", + "<|audio:771|>", + "<|audio:772|>", + "<|audio:773|>", + "<|audio:774|>", + "<|audio:775|>", + "<|audio:776|>", + "<|audio:777|>", + "<|audio:778|>", + "<|audio:779|>", + "<|audio:780|>", + "<|audio:781|>", + "<|audio:782|>", + "<|audio:783|>", + "<|audio:784|>", + "<|audio:785|>", + "<|audio:786|>", + "<|audio:787|>", + "<|audio:788|>", + "<|audio:789|>", + "<|audio:790|>", + "<|audio:791|>", + "<|audio:792|>", + "<|audio:793|>", + "<|audio:794|>", + "<|audio:795|>", + "<|audio:796|>", + "<|audio:797|>", + "<|audio:798|>", + "<|audio:799|>", + "<|audio:800|>", + "<|audio:801|>", + "<|audio:802|>", + "<|audio:803|>", + "<|audio:804|>", + "<|audio:805|>", + "<|audio:806|>", + "<|audio:807|>", + "<|audio:808|>", + "<|audio:809|>", + "<|audio:810|>", + "<|audio:811|>", + "<|audio:812|>", + "<|audio:813|>", + "<|audio:814|>", + "<|audio:815|>", + "<|audio:816|>", + "<|audio:817|>", + "<|audio:818|>", + "<|audio:819|>", + "<|audio:820|>", + "<|audio:821|>", + "<|audio:822|>", + "<|audio:823|>", + "<|audio:824|>", + "<|audio:825|>", + "<|audio:826|>", + "<|audio:827|>", + "<|audio:828|>", + "<|audio:829|>", + "<|audio:830|>", + "<|audio:831|>", + "<|audio:832|>", + "<|audio:833|>", + "<|audio:834|>", + "<|audio:835|>", + "<|audio:836|>", + "<|audio:837|>", + "<|audio:838|>", + "<|audio:839|>", + "<|audio:840|>", + "<|audio:841|>", + "<|audio:842|>", + "<|audio:843|>", + "<|audio:844|>", + "<|audio:845|>", + "<|audio:846|>", + "<|audio:847|>", + "<|audio:848|>", + "<|audio:849|>", + "<|audio:850|>", + "<|audio:851|>", + "<|audio:852|>", + "<|audio:853|>", + "<|audio:854|>", + "<|audio:855|>", + "<|audio:856|>", + "<|audio:857|>", + "<|audio:858|>", + "<|audio:859|>", + "<|audio:860|>", + "<|audio:861|>", + "<|audio:862|>", + "<|audio:863|>", + "<|audio:864|>", + "<|audio:865|>", + "<|audio:866|>", + "<|audio:867|>", + "<|audio:868|>", + "<|audio:869|>", + "<|audio:870|>", + "<|audio:871|>", + "<|audio:872|>", + "<|audio:873|>", + "<|audio:874|>", + "<|audio:875|>", + "<|audio:876|>", + "<|audio:877|>", + "<|audio:878|>", + "<|audio:879|>", + "<|audio:880|>", + "<|audio:881|>", + "<|audio:882|>", + "<|audio:883|>", + "<|audio:884|>", + "<|audio:885|>", + "<|audio:886|>", + "<|audio:887|>", + "<|audio:888|>", + "<|audio:889|>", + "<|audio:890|>", + "<|audio:891|>", + "<|audio:892|>", + "<|audio:893|>", + "<|audio:894|>", + "<|audio:895|>", + "<|audio:896|>", + "<|audio:897|>", + "<|audio:898|>", + "<|audio:899|>", + "<|audio:900|>", + "<|audio:901|>", + "<|audio:902|>", + "<|audio:903|>", + "<|audio:904|>", + "<|audio:905|>", + "<|audio:906|>", + "<|audio:907|>", + "<|audio:908|>", + "<|audio:909|>", + "<|audio:910|>", + "<|audio:911|>", + "<|audio:912|>", + "<|audio:913|>", + "<|audio:914|>", + "<|audio:915|>", + "<|audio:916|>", + "<|audio:917|>", + "<|audio:918|>", + "<|audio:919|>", + "<|audio:920|>", + "<|audio:921|>", + "<|audio:922|>", + "<|audio:923|>", + "<|audio:924|>", + "<|audio:925|>", + "<|audio:926|>", + "<|audio:927|>", + "<|audio:928|>", + "<|audio:929|>", + "<|audio:930|>", + "<|audio:931|>", + "<|audio:932|>", + "<|audio:933|>", + "<|audio:934|>", + "<|audio:935|>", + "<|audio:936|>", + "<|audio:937|>", + "<|audio:938|>", + "<|audio:939|>", + "<|audio:940|>", + "<|audio:941|>", + "<|audio:942|>", + "<|audio:943|>", + "<|audio:944|>", + "<|audio:945|>", + "<|audio:946|>", + "<|audio:947|>", + "<|audio:948|>", + "<|audio:949|>", + "<|audio:950|>", + "<|audio:951|>", + "<|audio:952|>", + "<|audio:953|>", + "<|audio:954|>", + "<|audio:955|>", + "<|audio:956|>", + "<|audio:957|>", + "<|audio:958|>", + "<|audio:959|>", + "<|audio:960|>", + "<|audio:961|>", + "<|audio:962|>", + "<|audio:963|>", + "<|audio:964|>", + "<|audio:965|>", + "<|audio:966|>", + "<|audio:967|>", + "<|audio:968|>", + "<|audio:969|>", + "<|audio:970|>", + "<|audio:971|>", + "<|audio:972|>", + "<|audio:973|>", + "<|audio:974|>", + "<|audio:975|>", + "<|audio:976|>", + "<|audio:977|>", + "<|audio:978|>", + "<|audio:979|>", + "<|audio:980|>", + "<|audio:981|>", + "<|audio:982|>", + "<|audio:983|>", + "<|audio:984|>", + "<|audio:985|>", + "<|audio:986|>", + "<|audio:987|>", + "<|audio:988|>", + "<|audio:989|>", + "<|audio:990|>", + "<|audio:991|>", + "<|audio:992|>", + "<|audio:993|>", + "<|audio:994|>", + "<|audio:995|>", + "<|audio:996|>", + "<|audio:997|>", + "<|audio:998|>", + "<|audio:999|>", + "<|audio:1000|>", + "<|audio:1001|>", + "<|audio:1002|>", + "<|audio:1003|>", + "<|audio:1004|>", + "<|audio:1005|>", + "<|audio:1006|>", + "<|audio:1007|>", + "<|audio:1008|>", + "<|audio:1009|>", + "<|audio:1010|>", + "<|audio:1011|>", + "<|audio:1012|>", + "<|audio:1013|>", + "<|audio:1014|>", + "<|audio:1015|>", + "<|audio:1016|>", + "<|audio:1017|>", + "<|audio:1018|>", + "<|audio:1019|>", + "<|audio:1020|>", + "<|audio:1021|>", + "<|audio:1022|>", + "<|audio:1023|>", + "<|startoftranscript|>", + "<|endoftranscript|>", + "<|padding|>" + ], + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/out/checkpoint-18000/trainer_state.json b/out/checkpoint-18000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f723074c7464d97ac1a0753b76dbecd16a5e3249 --- /dev/null +++ b/out/checkpoint-18000/trainer_state.json @@ -0,0 +1,126177 @@ +{ + "best_metric": 2.3678998947143555, + "best_model_checkpoint": "./out/checkpoint-18000", + "epoch": 1.4526672584940683, + "eval_steps": 1000, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 8.07037365830038e-05, + "grad_norm": 0.8911969065666199, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.6759, + "step": 1 + }, + { + "epoch": 0.0001614074731660076, + "grad_norm": 0.8724873661994934, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7001, + "step": 2 + }, + { + "epoch": 0.00024211120974901139, + "grad_norm": 0.9050428867340088, + "learning_rate": 6e-06, + "loss": 2.6291, + "step": 3 + }, + { + "epoch": 0.0003228149463320152, + "grad_norm": 0.9249712824821472, + "learning_rate": 8.000000000000001e-06, + "loss": 2.7174, + "step": 4 + }, + { + "epoch": 0.000403518682915019, + "grad_norm": 0.9102846384048462, + "learning_rate": 1e-05, + "loss": 2.6831, + "step": 5 + }, + { + "epoch": 0.00048422241949802277, + "grad_norm": 0.9129141569137573, + "learning_rate": 1.2e-05, + "loss": 2.684, + "step": 6 + }, + { + "epoch": 0.0005649261560810266, + "grad_norm": 0.8648065328598022, + "learning_rate": 1.4000000000000001e-05, + "loss": 2.6488, + "step": 7 + }, + { + "epoch": 0.0006456298926640304, + "grad_norm": 0.8677545785903931, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.7143, + "step": 8 + }, + { + "epoch": 0.0007263336292470342, + "grad_norm": 0.919029712677002, + "learning_rate": 1.8e-05, + "loss": 2.631, + "step": 9 + }, + { + "epoch": 0.000807037365830038, + "grad_norm": 0.9289683103561401, + "learning_rate": 2e-05, + "loss": 2.6564, + "step": 10 + }, + { + "epoch": 0.0008877411024130417, + "grad_norm": 0.8810267448425293, + "learning_rate": 2.2000000000000003e-05, + "loss": 2.6395, + "step": 11 + }, + { + "epoch": 0.0009684448389960455, + "grad_norm": 0.8185754418373108, + "learning_rate": 2.4e-05, + "loss": 2.6871, + "step": 12 + }, + { + "epoch": 0.0010491485755790492, + "grad_norm": 0.9476913213729858, + "learning_rate": 2.6000000000000002e-05, + "loss": 2.7011, + "step": 13 + }, + { + "epoch": 0.0011298523121620531, + "grad_norm": 0.9616057872772217, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.7373, + "step": 14 + }, + { + "epoch": 0.0012105560487450568, + "grad_norm": 0.9429686665534973, + "learning_rate": 3e-05, + "loss": 2.7556, + "step": 15 + }, + { + "epoch": 0.0012912597853280607, + "grad_norm": 1.0331422090530396, + "learning_rate": 3.2000000000000005e-05, + "loss": 2.7756, + "step": 16 + }, + { + "epoch": 0.0013719635219110644, + "grad_norm": 0.906057596206665, + "learning_rate": 3.4000000000000007e-05, + "loss": 2.7053, + "step": 17 + }, + { + "epoch": 0.0014526672584940683, + "grad_norm": 0.8677626252174377, + "learning_rate": 3.6e-05, + "loss": 2.7012, + "step": 18 + }, + { + "epoch": 0.001533370995077072, + "grad_norm": 0.9378079175949097, + "learning_rate": 3.8e-05, + "loss": 2.6786, + "step": 19 + }, + { + "epoch": 0.001614074731660076, + "grad_norm": 1.0333882570266724, + "learning_rate": 4e-05, + "loss": 2.689, + "step": 20 + }, + { + "epoch": 0.0016947784682430796, + "grad_norm": 0.9435378909111023, + "learning_rate": 4.2e-05, + "loss": 2.7084, + "step": 21 + }, + { + "epoch": 0.0017754822048260835, + "grad_norm": 0.9530225396156311, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.7039, + "step": 22 + }, + { + "epoch": 0.0018561859414090872, + "grad_norm": 1.0154749155044556, + "learning_rate": 4.600000000000001e-05, + "loss": 2.6623, + "step": 23 + }, + { + "epoch": 0.001936889677992091, + "grad_norm": 1.0341671705245972, + "learning_rate": 4.8e-05, + "loss": 2.7072, + "step": 24 + }, + { + "epoch": 0.002017593414575095, + "grad_norm": 0.9185739159584045, + "learning_rate": 5e-05, + "loss": 2.6595, + "step": 25 + }, + { + "epoch": 0.0020982971511580985, + "grad_norm": 1.060390591621399, + "learning_rate": 5.2000000000000004e-05, + "loss": 2.7045, + "step": 26 + }, + { + "epoch": 0.0021790008877411024, + "grad_norm": 0.9720118641853333, + "learning_rate": 5.4000000000000005e-05, + "loss": 2.6513, + "step": 27 + }, + { + "epoch": 0.0022597046243241063, + "grad_norm": 0.9426784515380859, + "learning_rate": 5.6000000000000006e-05, + "loss": 2.6541, + "step": 28 + }, + { + "epoch": 0.00234040836090711, + "grad_norm": 0.9736170768737793, + "learning_rate": 5.8e-05, + "loss": 2.7324, + "step": 29 + }, + { + "epoch": 0.0024211120974901136, + "grad_norm": 0.9831354022026062, + "learning_rate": 6e-05, + "loss": 2.6651, + "step": 30 + }, + { + "epoch": 0.0025018158340731175, + "grad_norm": 1.0222605466842651, + "learning_rate": 6.2e-05, + "loss": 2.7375, + "step": 31 + }, + { + "epoch": 0.0025825195706561214, + "grad_norm": 0.9182235598564148, + "learning_rate": 6.400000000000001e-05, + "loss": 2.7142, + "step": 32 + }, + { + "epoch": 0.0026632233072391254, + "grad_norm": 1.0200958251953125, + "learning_rate": 6.6e-05, + "loss": 2.6785, + "step": 33 + }, + { + "epoch": 0.002743927043822129, + "grad_norm": 1.0153381824493408, + "learning_rate": 6.800000000000001e-05, + "loss": 2.6737, + "step": 34 + }, + { + "epoch": 0.0028246307804051327, + "grad_norm": 0.8998087644577026, + "learning_rate": 7e-05, + "loss": 2.7594, + "step": 35 + }, + { + "epoch": 0.0029053345169881366, + "grad_norm": 0.9005621671676636, + "learning_rate": 7.2e-05, + "loss": 2.713, + "step": 36 + }, + { + "epoch": 0.0029860382535711405, + "grad_norm": 1.0165663957595825, + "learning_rate": 7.4e-05, + "loss": 2.7197, + "step": 37 + }, + { + "epoch": 0.003066741990154144, + "grad_norm": 1.0011894702911377, + "learning_rate": 7.6e-05, + "loss": 2.6315, + "step": 38 + }, + { + "epoch": 0.003147445726737148, + "grad_norm": 1.141209602355957, + "learning_rate": 7.800000000000001e-05, + "loss": 2.7249, + "step": 39 + }, + { + "epoch": 0.003228149463320152, + "grad_norm": 0.9114719033241272, + "learning_rate": 8e-05, + "loss": 2.7039, + "step": 40 + }, + { + "epoch": 0.0033088531999031557, + "grad_norm": 1.0193392038345337, + "learning_rate": 8.2e-05, + "loss": 2.6501, + "step": 41 + }, + { + "epoch": 0.003389556936486159, + "grad_norm": 0.9458270072937012, + "learning_rate": 8.4e-05, + "loss": 2.725, + "step": 42 + }, + { + "epoch": 0.003470260673069163, + "grad_norm": 0.9667492508888245, + "learning_rate": 8.6e-05, + "loss": 2.7232, + "step": 43 + }, + { + "epoch": 0.003550964409652167, + "grad_norm": 0.9987972378730774, + "learning_rate": 8.800000000000001e-05, + "loss": 2.6554, + "step": 44 + }, + { + "epoch": 0.003631668146235171, + "grad_norm": 1.0166393518447876, + "learning_rate": 9e-05, + "loss": 2.7291, + "step": 45 + }, + { + "epoch": 0.0037123718828181744, + "grad_norm": 0.9557009935379028, + "learning_rate": 9.200000000000001e-05, + "loss": 2.7194, + "step": 46 + }, + { + "epoch": 0.0037930756194011783, + "grad_norm": 0.9575492143630981, + "learning_rate": 9.4e-05, + "loss": 2.6671, + "step": 47 + }, + { + "epoch": 0.003873779355984182, + "grad_norm": 0.9614555239677429, + "learning_rate": 9.6e-05, + "loss": 2.6865, + "step": 48 + }, + { + "epoch": 0.003954483092567186, + "grad_norm": 0.9245515465736389, + "learning_rate": 9.8e-05, + "loss": 2.7821, + "step": 49 + }, + { + "epoch": 0.00403518682915019, + "grad_norm": 0.9756044745445251, + "learning_rate": 0.0001, + "loss": 2.7608, + "step": 50 + }, + { + "epoch": 0.0041158905657331935, + "grad_norm": 0.95787513256073, + "learning_rate": 0.00010200000000000001, + "loss": 2.6458, + "step": 51 + }, + { + "epoch": 0.004196594302316197, + "grad_norm": 1.0102490186691284, + "learning_rate": 0.00010400000000000001, + "loss": 2.7835, + "step": 52 + }, + { + "epoch": 0.004277298038899201, + "grad_norm": 0.9676176309585571, + "learning_rate": 0.00010600000000000002, + "loss": 2.702, + "step": 53 + }, + { + "epoch": 0.004358001775482205, + "grad_norm": 0.9724096655845642, + "learning_rate": 0.00010800000000000001, + "loss": 2.714, + "step": 54 + }, + { + "epoch": 0.004438705512065208, + "grad_norm": 0.9482994675636292, + "learning_rate": 0.00011000000000000002, + "loss": 2.8069, + "step": 55 + }, + { + "epoch": 0.0045194092486482125, + "grad_norm": 0.9886480569839478, + "learning_rate": 0.00011200000000000001, + "loss": 2.7468, + "step": 56 + }, + { + "epoch": 0.004600112985231216, + "grad_norm": 0.9696247577667236, + "learning_rate": 0.00011399999999999999, + "loss": 2.7486, + "step": 57 + }, + { + "epoch": 0.00468081672181422, + "grad_norm": 1.0638912916183472, + "learning_rate": 0.000116, + "loss": 2.7747, + "step": 58 + }, + { + "epoch": 0.004761520458397224, + "grad_norm": 1.016483187675476, + "learning_rate": 0.000118, + "loss": 2.6925, + "step": 59 + }, + { + "epoch": 0.004842224194980227, + "grad_norm": 1.0298779010772705, + "learning_rate": 0.00012, + "loss": 2.7487, + "step": 60 + }, + { + "epoch": 0.004922927931563232, + "grad_norm": 1.1082268953323364, + "learning_rate": 0.000122, + "loss": 2.7697, + "step": 61 + }, + { + "epoch": 0.005003631668146235, + "grad_norm": 0.9202101826667786, + "learning_rate": 0.000124, + "loss": 2.7429, + "step": 62 + }, + { + "epoch": 0.0050843354047292386, + "grad_norm": 1.0140503644943237, + "learning_rate": 0.000126, + "loss": 2.7492, + "step": 63 + }, + { + "epoch": 0.005165039141312243, + "grad_norm": 1.0689163208007812, + "learning_rate": 0.00012800000000000002, + "loss": 2.7353, + "step": 64 + }, + { + "epoch": 0.005245742877895246, + "grad_norm": 0.9947141408920288, + "learning_rate": 0.00013000000000000002, + "loss": 2.7385, + "step": 65 + }, + { + "epoch": 0.005326446614478251, + "grad_norm": 1.2034410238265991, + "learning_rate": 0.000132, + "loss": 2.7632, + "step": 66 + }, + { + "epoch": 0.005407150351061254, + "grad_norm": 0.9450412392616272, + "learning_rate": 0.000134, + "loss": 2.7547, + "step": 67 + }, + { + "epoch": 0.005487854087644258, + "grad_norm": 1.1818269491195679, + "learning_rate": 0.00013600000000000003, + "loss": 2.7663, + "step": 68 + }, + { + "epoch": 0.005568557824227262, + "grad_norm": 1.003347396850586, + "learning_rate": 0.000138, + "loss": 2.7299, + "step": 69 + }, + { + "epoch": 0.0056492615608102655, + "grad_norm": 1.0105760097503662, + "learning_rate": 0.00014, + "loss": 2.7261, + "step": 70 + }, + { + "epoch": 0.005729965297393269, + "grad_norm": 0.9459090232849121, + "learning_rate": 0.000142, + "loss": 2.7237, + "step": 71 + }, + { + "epoch": 0.005810669033976273, + "grad_norm": 0.9716219305992126, + "learning_rate": 0.000144, + "loss": 2.8175, + "step": 72 + }, + { + "epoch": 0.005891372770559277, + "grad_norm": 0.9968419075012207, + "learning_rate": 0.000146, + "loss": 2.7828, + "step": 73 + }, + { + "epoch": 0.005972076507142281, + "grad_norm": 1.099680781364441, + "learning_rate": 0.000148, + "loss": 2.7111, + "step": 74 + }, + { + "epoch": 0.0060527802437252845, + "grad_norm": 1.004846453666687, + "learning_rate": 0.00015000000000000001, + "loss": 2.7508, + "step": 75 + }, + { + "epoch": 0.006133483980308288, + "grad_norm": 1.0568128824234009, + "learning_rate": 0.000152, + "loss": 2.7341, + "step": 76 + }, + { + "epoch": 0.006214187716891292, + "grad_norm": 0.9871000051498413, + "learning_rate": 0.000154, + "loss": 2.7831, + "step": 77 + }, + { + "epoch": 0.006294891453474296, + "grad_norm": 1.005947232246399, + "learning_rate": 0.00015600000000000002, + "loss": 2.6798, + "step": 78 + }, + { + "epoch": 0.006375595190057299, + "grad_norm": 0.9984713792800903, + "learning_rate": 0.00015800000000000002, + "loss": 2.8126, + "step": 79 + }, + { + "epoch": 0.006456298926640304, + "grad_norm": 0.9805751442909241, + "learning_rate": 0.00016, + "loss": 2.7826, + "step": 80 + }, + { + "epoch": 0.006537002663223307, + "grad_norm": 1.02998685836792, + "learning_rate": 0.000162, + "loss": 2.7636, + "step": 81 + }, + { + "epoch": 0.006617706399806311, + "grad_norm": 1.0790135860443115, + "learning_rate": 0.000164, + "loss": 2.7809, + "step": 82 + }, + { + "epoch": 0.006698410136389315, + "grad_norm": 1.1058307886123657, + "learning_rate": 0.000166, + "loss": 2.787, + "step": 83 + }, + { + "epoch": 0.006779113872972318, + "grad_norm": 1.0199624300003052, + "learning_rate": 0.000168, + "loss": 2.7171, + "step": 84 + }, + { + "epoch": 0.006859817609555323, + "grad_norm": 1.006494402885437, + "learning_rate": 0.00017, + "loss": 2.7791, + "step": 85 + }, + { + "epoch": 0.006940521346138326, + "grad_norm": 0.9672449827194214, + "learning_rate": 0.000172, + "loss": 2.6929, + "step": 86 + }, + { + "epoch": 0.00702122508272133, + "grad_norm": 0.9747781157493591, + "learning_rate": 0.000174, + "loss": 2.7676, + "step": 87 + }, + { + "epoch": 0.007101928819304334, + "grad_norm": 0.9193839430809021, + "learning_rate": 0.00017600000000000002, + "loss": 2.7124, + "step": 88 + }, + { + "epoch": 0.0071826325558873375, + "grad_norm": 1.078499436378479, + "learning_rate": 0.00017800000000000002, + "loss": 2.8018, + "step": 89 + }, + { + "epoch": 0.007263336292470342, + "grad_norm": 1.070957899093628, + "learning_rate": 0.00018, + "loss": 2.7889, + "step": 90 + }, + { + "epoch": 0.007344040029053345, + "grad_norm": 1.160942554473877, + "learning_rate": 0.000182, + "loss": 2.8026, + "step": 91 + }, + { + "epoch": 0.007424743765636349, + "grad_norm": 0.9988501071929932, + "learning_rate": 0.00018400000000000003, + "loss": 2.7746, + "step": 92 + }, + { + "epoch": 0.007505447502219353, + "grad_norm": 1.0882319211959839, + "learning_rate": 0.00018600000000000002, + "loss": 2.8105, + "step": 93 + }, + { + "epoch": 0.0075861512388023565, + "grad_norm": 1.1882357597351074, + "learning_rate": 0.000188, + "loss": 2.8294, + "step": 94 + }, + { + "epoch": 0.00766685497538536, + "grad_norm": 1.0761829614639282, + "learning_rate": 0.00019, + "loss": 2.7846, + "step": 95 + }, + { + "epoch": 0.007747558711968364, + "grad_norm": 1.0665982961654663, + "learning_rate": 0.000192, + "loss": 2.8542, + "step": 96 + }, + { + "epoch": 0.007828262448551369, + "grad_norm": 1.206127405166626, + "learning_rate": 0.000194, + "loss": 2.7711, + "step": 97 + }, + { + "epoch": 0.007908966185134371, + "grad_norm": 1.095150113105774, + "learning_rate": 0.000196, + "loss": 2.732, + "step": 98 + }, + { + "epoch": 0.007989669921717376, + "grad_norm": 1.118348240852356, + "learning_rate": 0.00019800000000000002, + "loss": 2.7736, + "step": 99 + }, + { + "epoch": 0.00807037365830038, + "grad_norm": 1.0646461248397827, + "learning_rate": 0.0002, + "loss": 2.8584, + "step": 100 + }, + { + "epoch": 0.008151077394883383, + "grad_norm": 1.0387661457061768, + "learning_rate": 0.0001999999987538693, + "loss": 2.7961, + "step": 101 + }, + { + "epoch": 0.008231781131466387, + "grad_norm": 1.1905474662780762, + "learning_rate": 0.00019999999501547723, + "loss": 2.8615, + "step": 102 + }, + { + "epoch": 0.008312484868049391, + "grad_norm": 0.9630722999572754, + "learning_rate": 0.0001999999887848239, + "loss": 2.8076, + "step": 103 + }, + { + "epoch": 0.008393188604632394, + "grad_norm": 1.1034537553787231, + "learning_rate": 0.00019999998006190942, + "loss": 2.8402, + "step": 104 + }, + { + "epoch": 0.008473892341215398, + "grad_norm": 1.0679295063018799, + "learning_rate": 0.00019999996884673403, + "loss": 2.7948, + "step": 105 + }, + { + "epoch": 0.008554596077798403, + "grad_norm": 1.0108860731124878, + "learning_rate": 0.00019999995513929802, + "loss": 2.7996, + "step": 106 + }, + { + "epoch": 0.008635299814381405, + "grad_norm": 1.3762084245681763, + "learning_rate": 0.0001999999389396017, + "loss": 2.8023, + "step": 107 + }, + { + "epoch": 0.00871600355096441, + "grad_norm": 1.1320533752441406, + "learning_rate": 0.00019999992024764555, + "loss": 2.793, + "step": 108 + }, + { + "epoch": 0.008796707287547414, + "grad_norm": 1.1752389669418335, + "learning_rate": 0.00019999989906342998, + "loss": 2.8274, + "step": 109 + }, + { + "epoch": 0.008877411024130416, + "grad_norm": 1.2734956741333008, + "learning_rate": 0.00019999987538695552, + "loss": 2.8017, + "step": 110 + }, + { + "epoch": 0.00895811476071342, + "grad_norm": 1.3703055381774902, + "learning_rate": 0.00019999984921822273, + "loss": 2.8699, + "step": 111 + }, + { + "epoch": 0.009038818497296425, + "grad_norm": 1.0079127550125122, + "learning_rate": 0.0001999998205572323, + "loss": 2.8845, + "step": 112 + }, + { + "epoch": 0.00911952223387943, + "grad_norm": 1.28025484085083, + "learning_rate": 0.000199999789403985, + "loss": 2.8636, + "step": 113 + }, + { + "epoch": 0.009200225970462432, + "grad_norm": 1.1057093143463135, + "learning_rate": 0.00019999975575848148, + "loss": 2.8484, + "step": 114 + }, + { + "epoch": 0.009280929707045436, + "grad_norm": 1.0874677896499634, + "learning_rate": 0.00019999971962072265, + "loss": 2.7314, + "step": 115 + }, + { + "epoch": 0.00936163344362844, + "grad_norm": 1.0909658670425415, + "learning_rate": 0.00019999968099070943, + "loss": 2.7827, + "step": 116 + }, + { + "epoch": 0.009442337180211443, + "grad_norm": 1.0881624221801758, + "learning_rate": 0.00019999963986844273, + "loss": 2.827, + "step": 117 + }, + { + "epoch": 0.009523040916794448, + "grad_norm": 1.2498180866241455, + "learning_rate": 0.00019999959625392362, + "loss": 2.8695, + "step": 118 + }, + { + "epoch": 0.009603744653377452, + "grad_norm": 1.1344549655914307, + "learning_rate": 0.00019999955014715317, + "loss": 2.8079, + "step": 119 + }, + { + "epoch": 0.009684448389960455, + "grad_norm": 1.032563328742981, + "learning_rate": 0.00019999950154813253, + "loss": 2.7787, + "step": 120 + }, + { + "epoch": 0.009765152126543459, + "grad_norm": 0.9630110263824463, + "learning_rate": 0.0001999994504568629, + "loss": 2.8103, + "step": 121 + }, + { + "epoch": 0.009845855863126463, + "grad_norm": 1.0418641567230225, + "learning_rate": 0.0001999993968733456, + "loss": 2.8679, + "step": 122 + }, + { + "epoch": 0.009926559599709466, + "grad_norm": 0.9797310829162598, + "learning_rate": 0.00019999934079758188, + "loss": 2.7792, + "step": 123 + }, + { + "epoch": 0.01000726333629247, + "grad_norm": 1.0494028329849243, + "learning_rate": 0.00019999928222957323, + "loss": 2.8007, + "step": 124 + }, + { + "epoch": 0.010087967072875475, + "grad_norm": 1.1570640802383423, + "learning_rate": 0.00019999922116932105, + "loss": 2.8331, + "step": 125 + }, + { + "epoch": 0.010168670809458477, + "grad_norm": 1.2753098011016846, + "learning_rate": 0.00019999915761682684, + "loss": 2.8533, + "step": 126 + }, + { + "epoch": 0.010249374546041481, + "grad_norm": 0.9804013967514038, + "learning_rate": 0.00019999909157209227, + "loss": 2.841, + "step": 127 + }, + { + "epoch": 0.010330078282624486, + "grad_norm": 1.320839285850525, + "learning_rate": 0.00019999902303511892, + "loss": 2.8738, + "step": 128 + }, + { + "epoch": 0.01041078201920749, + "grad_norm": 1.1105059385299683, + "learning_rate": 0.0001999989520059085, + "loss": 2.8458, + "step": 129 + }, + { + "epoch": 0.010491485755790493, + "grad_norm": 1.2869762182235718, + "learning_rate": 0.0001999988784844628, + "loss": 2.7951, + "step": 130 + }, + { + "epoch": 0.010572189492373497, + "grad_norm": 1.1609153747558594, + "learning_rate": 0.00019999880247078368, + "loss": 2.8147, + "step": 131 + }, + { + "epoch": 0.010652893228956501, + "grad_norm": 1.066728115081787, + "learning_rate": 0.00019999872396487297, + "loss": 2.863, + "step": 132 + }, + { + "epoch": 0.010733596965539504, + "grad_norm": 1.2868720293045044, + "learning_rate": 0.0001999986429667327, + "loss": 2.7765, + "step": 133 + }, + { + "epoch": 0.010814300702122508, + "grad_norm": 1.0064955949783325, + "learning_rate": 0.00019999855947636485, + "loss": 2.7834, + "step": 134 + }, + { + "epoch": 0.010895004438705513, + "grad_norm": 1.146589756011963, + "learning_rate": 0.00019999847349377143, + "loss": 2.7966, + "step": 135 + }, + { + "epoch": 0.010975708175288515, + "grad_norm": 0.9831073880195618, + "learning_rate": 0.0001999983850189547, + "loss": 2.8877, + "step": 136 + }, + { + "epoch": 0.01105641191187152, + "grad_norm": 1.1690322160720825, + "learning_rate": 0.0001999982940519168, + "loss": 2.8514, + "step": 137 + }, + { + "epoch": 0.011137115648454524, + "grad_norm": 1.0014944076538086, + "learning_rate": 0.00019999820059266003, + "loss": 2.7846, + "step": 138 + }, + { + "epoch": 0.011217819385037527, + "grad_norm": 0.9581566452980042, + "learning_rate": 0.0001999981046411867, + "loss": 2.7907, + "step": 139 + }, + { + "epoch": 0.011298523121620531, + "grad_norm": 1.1300675868988037, + "learning_rate": 0.00019999800619749922, + "loss": 2.8099, + "step": 140 + }, + { + "epoch": 0.011379226858203535, + "grad_norm": 0.9845526814460754, + "learning_rate": 0.0001999979052616, + "loss": 2.8607, + "step": 141 + }, + { + "epoch": 0.011459930594786538, + "grad_norm": 1.0781387090682983, + "learning_rate": 0.0001999978018334916, + "loss": 2.831, + "step": 142 + }, + { + "epoch": 0.011540634331369542, + "grad_norm": 1.1142648458480835, + "learning_rate": 0.00019999769591317658, + "loss": 2.9194, + "step": 143 + }, + { + "epoch": 0.011621338067952547, + "grad_norm": 0.9972650408744812, + "learning_rate": 0.00019999758750065757, + "loss": 2.8253, + "step": 144 + }, + { + "epoch": 0.01170204180453555, + "grad_norm": 1.040738582611084, + "learning_rate": 0.0001999974765959373, + "loss": 2.7378, + "step": 145 + }, + { + "epoch": 0.011782745541118553, + "grad_norm": 0.9824327826499939, + "learning_rate": 0.00019999736319901848, + "loss": 2.8263, + "step": 146 + }, + { + "epoch": 0.011863449277701558, + "grad_norm": 1.0531679391860962, + "learning_rate": 0.00019999724730990402, + "loss": 2.7975, + "step": 147 + }, + { + "epoch": 0.011944153014284562, + "grad_norm": 1.0699561834335327, + "learning_rate": 0.0001999971289285967, + "loss": 2.8199, + "step": 148 + }, + { + "epoch": 0.012024856750867565, + "grad_norm": 1.0203633308410645, + "learning_rate": 0.0001999970080550996, + "loss": 2.8479, + "step": 149 + }, + { + "epoch": 0.012105560487450569, + "grad_norm": 1.035589575767517, + "learning_rate": 0.00019999688468941564, + "loss": 2.8263, + "step": 150 + }, + { + "epoch": 0.012186264224033573, + "grad_norm": 0.9706670641899109, + "learning_rate": 0.00019999675883154792, + "loss": 2.8324, + "step": 151 + }, + { + "epoch": 0.012266967960616576, + "grad_norm": 1.1565446853637695, + "learning_rate": 0.00019999663048149958, + "loss": 2.8098, + "step": 152 + }, + { + "epoch": 0.01234767169719958, + "grad_norm": 1.025796890258789, + "learning_rate": 0.0001999964996392738, + "loss": 2.7906, + "step": 153 + }, + { + "epoch": 0.012428375433782585, + "grad_norm": 1.117438554763794, + "learning_rate": 0.00019999636630487386, + "loss": 2.8276, + "step": 154 + }, + { + "epoch": 0.012509079170365587, + "grad_norm": 1.025159478187561, + "learning_rate": 0.00019999623047830308, + "loss": 2.8089, + "step": 155 + }, + { + "epoch": 0.012589782906948592, + "grad_norm": 1.007582664489746, + "learning_rate": 0.00019999609215956487, + "loss": 2.8147, + "step": 156 + }, + { + "epoch": 0.012670486643531596, + "grad_norm": 1.0504885911941528, + "learning_rate": 0.0001999959513486626, + "loss": 2.8329, + "step": 157 + }, + { + "epoch": 0.012751190380114599, + "grad_norm": 0.918382465839386, + "learning_rate": 0.00019999580804559987, + "loss": 2.878, + "step": 158 + }, + { + "epoch": 0.012831894116697603, + "grad_norm": 0.9397236704826355, + "learning_rate": 0.0001999956622503802, + "loss": 2.8254, + "step": 159 + }, + { + "epoch": 0.012912597853280607, + "grad_norm": 0.9985697269439697, + "learning_rate": 0.00019999551396300723, + "loss": 2.8417, + "step": 160 + }, + { + "epoch": 0.01299330158986361, + "grad_norm": 0.9866878390312195, + "learning_rate": 0.00019999536318348465, + "loss": 2.7524, + "step": 161 + }, + { + "epoch": 0.013074005326446614, + "grad_norm": 1.0707440376281738, + "learning_rate": 0.00019999520991181627, + "loss": 2.8171, + "step": 162 + }, + { + "epoch": 0.013154709063029619, + "grad_norm": 0.9359755516052246, + "learning_rate": 0.00019999505414800583, + "loss": 2.8463, + "step": 163 + }, + { + "epoch": 0.013235412799612623, + "grad_norm": 1.056647777557373, + "learning_rate": 0.00019999489589205726, + "loss": 2.8602, + "step": 164 + }, + { + "epoch": 0.013316116536195625, + "grad_norm": 0.975370466709137, + "learning_rate": 0.0001999947351439745, + "loss": 2.8292, + "step": 165 + }, + { + "epoch": 0.01339682027277863, + "grad_norm": 0.9241237044334412, + "learning_rate": 0.00019999457190376157, + "loss": 2.7827, + "step": 166 + }, + { + "epoch": 0.013477524009361634, + "grad_norm": 0.9478302001953125, + "learning_rate": 0.00019999440617142247, + "loss": 2.7708, + "step": 167 + }, + { + "epoch": 0.013558227745944637, + "grad_norm": 0.9804863333702087, + "learning_rate": 0.00019999423794696142, + "loss": 2.7696, + "step": 168 + }, + { + "epoch": 0.013638931482527641, + "grad_norm": 0.9764013886451721, + "learning_rate": 0.00019999406723038255, + "loss": 2.8521, + "step": 169 + }, + { + "epoch": 0.013719635219110645, + "grad_norm": 1.026532769203186, + "learning_rate": 0.00019999389402169016, + "loss": 2.8507, + "step": 170 + }, + { + "epoch": 0.013800338955693648, + "grad_norm": 0.9983204007148743, + "learning_rate": 0.00019999371832088854, + "loss": 2.8761, + "step": 171 + }, + { + "epoch": 0.013881042692276652, + "grad_norm": 0.9914593696594238, + "learning_rate": 0.00019999354012798206, + "loss": 2.8723, + "step": 172 + }, + { + "epoch": 0.013961746428859657, + "grad_norm": 1.066962718963623, + "learning_rate": 0.00019999335944297517, + "loss": 2.8635, + "step": 173 + }, + { + "epoch": 0.01404245016544266, + "grad_norm": 1.0848973989486694, + "learning_rate": 0.0001999931762658724, + "loss": 2.8645, + "step": 174 + }, + { + "epoch": 0.014123153902025664, + "grad_norm": 1.0245702266693115, + "learning_rate": 0.0001999929905966783, + "loss": 2.8463, + "step": 175 + }, + { + "epoch": 0.014203857638608668, + "grad_norm": 1.2363669872283936, + "learning_rate": 0.00019999280243539747, + "loss": 2.8345, + "step": 176 + }, + { + "epoch": 0.01428456137519167, + "grad_norm": 1.0224756002426147, + "learning_rate": 0.0001999926117820346, + "loss": 2.8309, + "step": 177 + }, + { + "epoch": 0.014365265111774675, + "grad_norm": 1.0882402658462524, + "learning_rate": 0.0001999924186365945, + "loss": 2.8619, + "step": 178 + }, + { + "epoch": 0.01444596884835768, + "grad_norm": 1.0384254455566406, + "learning_rate": 0.00019999222299908192, + "loss": 2.8477, + "step": 179 + }, + { + "epoch": 0.014526672584940684, + "grad_norm": 0.9662587642669678, + "learning_rate": 0.00019999202486950177, + "loss": 2.8087, + "step": 180 + }, + { + "epoch": 0.014607376321523686, + "grad_norm": 0.9086892604827881, + "learning_rate": 0.000199991824247859, + "loss": 2.7688, + "step": 181 + }, + { + "epoch": 0.01468808005810669, + "grad_norm": 1.004185676574707, + "learning_rate": 0.00019999162113415854, + "loss": 2.8237, + "step": 182 + }, + { + "epoch": 0.014768783794689695, + "grad_norm": 0.997965395450592, + "learning_rate": 0.00019999141552840552, + "loss": 2.8228, + "step": 183 + }, + { + "epoch": 0.014849487531272697, + "grad_norm": 0.9844975471496582, + "learning_rate": 0.00019999120743060503, + "loss": 2.8582, + "step": 184 + }, + { + "epoch": 0.014930191267855702, + "grad_norm": 1.0531272888183594, + "learning_rate": 0.00019999099684076232, + "loss": 2.8571, + "step": 185 + }, + { + "epoch": 0.015010895004438706, + "grad_norm": 1.1178920269012451, + "learning_rate": 0.00019999078375888257, + "loss": 2.85, + "step": 186 + }, + { + "epoch": 0.015091598741021709, + "grad_norm": 1.0773903131484985, + "learning_rate": 0.0001999905681849711, + "loss": 2.826, + "step": 187 + }, + { + "epoch": 0.015172302477604713, + "grad_norm": 1.1573486328125, + "learning_rate": 0.00019999035011903325, + "loss": 2.8866, + "step": 188 + }, + { + "epoch": 0.015253006214187717, + "grad_norm": 1.0401980876922607, + "learning_rate": 0.00019999012956107456, + "loss": 2.788, + "step": 189 + }, + { + "epoch": 0.01533370995077072, + "grad_norm": 1.0150686502456665, + "learning_rate": 0.00019998990651110045, + "loss": 2.8542, + "step": 190 + }, + { + "epoch": 0.015414413687353724, + "grad_norm": 1.1902797222137451, + "learning_rate": 0.0001999896809691165, + "loss": 2.9209, + "step": 191 + }, + { + "epoch": 0.015495117423936729, + "grad_norm": 1.0177555084228516, + "learning_rate": 0.0001999894529351283, + "loss": 2.7852, + "step": 192 + }, + { + "epoch": 0.015575821160519731, + "grad_norm": 1.062322974205017, + "learning_rate": 0.00019998922240914159, + "loss": 2.8328, + "step": 193 + }, + { + "epoch": 0.015656524897102737, + "grad_norm": 1.0937334299087524, + "learning_rate": 0.00019998898939116205, + "loss": 2.8069, + "step": 194 + }, + { + "epoch": 0.015737228633685738, + "grad_norm": 0.9553198218345642, + "learning_rate": 0.00019998875388119554, + "loss": 2.8402, + "step": 195 + }, + { + "epoch": 0.015817932370268743, + "grad_norm": 1.1802356243133545, + "learning_rate": 0.0001999885158792479, + "loss": 2.945, + "step": 196 + }, + { + "epoch": 0.015898636106851747, + "grad_norm": 1.160346269607544, + "learning_rate": 0.0001999882753853251, + "loss": 2.8341, + "step": 197 + }, + { + "epoch": 0.01597933984343475, + "grad_norm": 1.0379278659820557, + "learning_rate": 0.00019998803239943305, + "loss": 2.898, + "step": 198 + }, + { + "epoch": 0.016060043580017756, + "grad_norm": 1.2022395133972168, + "learning_rate": 0.00019998778692157792, + "loss": 2.8302, + "step": 199 + }, + { + "epoch": 0.01614074731660076, + "grad_norm": 1.057017207145691, + "learning_rate": 0.00019998753895176575, + "loss": 2.8474, + "step": 200 + }, + { + "epoch": 0.01622145105318376, + "grad_norm": 0.9299072027206421, + "learning_rate": 0.00019998728849000271, + "loss": 2.8266, + "step": 201 + }, + { + "epoch": 0.016302154789766765, + "grad_norm": 1.0296592712402344, + "learning_rate": 0.00019998703553629512, + "loss": 2.8106, + "step": 202 + }, + { + "epoch": 0.01638285852634977, + "grad_norm": 0.9641671180725098, + "learning_rate": 0.0001999867800906492, + "loss": 2.8089, + "step": 203 + }, + { + "epoch": 0.016463562262932774, + "grad_norm": 0.9951125383377075, + "learning_rate": 0.00019998652215307136, + "loss": 2.813, + "step": 204 + }, + { + "epoch": 0.016544265999515778, + "grad_norm": 1.0089969635009766, + "learning_rate": 0.00019998626172356804, + "loss": 2.8021, + "step": 205 + }, + { + "epoch": 0.016624969736098782, + "grad_norm": 0.9916231632232666, + "learning_rate": 0.00019998599880214566, + "loss": 2.8455, + "step": 206 + }, + { + "epoch": 0.016705673472681787, + "grad_norm": 0.9612492322921753, + "learning_rate": 0.00019998573338881088, + "loss": 2.8653, + "step": 207 + }, + { + "epoch": 0.016786377209264788, + "grad_norm": 0.984578013420105, + "learning_rate": 0.00019998546548357022, + "loss": 2.8359, + "step": 208 + }, + { + "epoch": 0.016867080945847792, + "grad_norm": 0.9457565546035767, + "learning_rate": 0.0001999851950864304, + "loss": 2.8507, + "step": 209 + }, + { + "epoch": 0.016947784682430796, + "grad_norm": 1.0219026803970337, + "learning_rate": 0.00019998492219739817, + "loss": 2.8326, + "step": 210 + }, + { + "epoch": 0.0170284884190138, + "grad_norm": 0.971570611000061, + "learning_rate": 0.00019998464681648032, + "loss": 2.8079, + "step": 211 + }, + { + "epoch": 0.017109192155596805, + "grad_norm": 0.9731320738792419, + "learning_rate": 0.00019998436894368368, + "loss": 2.8536, + "step": 212 + }, + { + "epoch": 0.01718989589217981, + "grad_norm": 1.0519105195999146, + "learning_rate": 0.00019998408857901525, + "loss": 2.8589, + "step": 213 + }, + { + "epoch": 0.01727059962876281, + "grad_norm": 0.9725883603096008, + "learning_rate": 0.00019998380572248194, + "loss": 2.7937, + "step": 214 + }, + { + "epoch": 0.017351303365345815, + "grad_norm": 1.0397064685821533, + "learning_rate": 0.00019998352037409084, + "loss": 2.9145, + "step": 215 + }, + { + "epoch": 0.01743200710192882, + "grad_norm": 0.9094852209091187, + "learning_rate": 0.00019998323253384904, + "loss": 2.7692, + "step": 216 + }, + { + "epoch": 0.017512710838511823, + "grad_norm": 0.941646158695221, + "learning_rate": 0.00019998294220176374, + "loss": 2.7975, + "step": 217 + }, + { + "epoch": 0.017593414575094828, + "grad_norm": 0.9939892888069153, + "learning_rate": 0.00019998264937784216, + "loss": 2.8421, + "step": 218 + }, + { + "epoch": 0.017674118311677832, + "grad_norm": 0.8985795378684998, + "learning_rate": 0.0001999823540620916, + "loss": 2.8146, + "step": 219 + }, + { + "epoch": 0.017754822048260833, + "grad_norm": 1.0436078310012817, + "learning_rate": 0.00019998205625451943, + "loss": 2.8416, + "step": 220 + }, + { + "epoch": 0.017835525784843837, + "grad_norm": 0.9941675066947937, + "learning_rate": 0.00019998175595513305, + "loss": 2.8723, + "step": 221 + }, + { + "epoch": 0.01791622952142684, + "grad_norm": 0.9203903675079346, + "learning_rate": 0.00019998145316393995, + "loss": 2.7791, + "step": 222 + }, + { + "epoch": 0.017996933258009846, + "grad_norm": 0.9325969815254211, + "learning_rate": 0.00019998114788094768, + "loss": 2.8664, + "step": 223 + }, + { + "epoch": 0.01807763699459285, + "grad_norm": 0.9483599662780762, + "learning_rate": 0.00019998084010616388, + "loss": 2.7782, + "step": 224 + }, + { + "epoch": 0.018158340731175854, + "grad_norm": 0.9555078744888306, + "learning_rate": 0.00019998052983959615, + "loss": 2.7771, + "step": 225 + }, + { + "epoch": 0.01823904446775886, + "grad_norm": 0.9452421069145203, + "learning_rate": 0.00019998021708125233, + "loss": 2.8878, + "step": 226 + }, + { + "epoch": 0.01831974820434186, + "grad_norm": 0.9784894585609436, + "learning_rate": 0.00019997990183114007, + "loss": 2.8382, + "step": 227 + }, + { + "epoch": 0.018400451940924864, + "grad_norm": 1.0844931602478027, + "learning_rate": 0.00019997958408926735, + "loss": 2.8015, + "step": 228 + }, + { + "epoch": 0.01848115567750787, + "grad_norm": 1.0416710376739502, + "learning_rate": 0.00019997926385564207, + "loss": 2.8364, + "step": 229 + }, + { + "epoch": 0.018561859414090873, + "grad_norm": 0.9213813543319702, + "learning_rate": 0.00019997894113027215, + "loss": 2.8489, + "step": 230 + }, + { + "epoch": 0.018642563150673877, + "grad_norm": 1.0186388492584229, + "learning_rate": 0.00019997861591316567, + "loss": 2.914, + "step": 231 + }, + { + "epoch": 0.01872326688725688, + "grad_norm": 1.0032236576080322, + "learning_rate": 0.00019997828820433072, + "loss": 2.8733, + "step": 232 + }, + { + "epoch": 0.018803970623839882, + "grad_norm": 0.9783569574356079, + "learning_rate": 0.0001999779580037755, + "loss": 2.851, + "step": 233 + }, + { + "epoch": 0.018884674360422887, + "grad_norm": 0.8471441268920898, + "learning_rate": 0.00019997762531150825, + "loss": 2.7923, + "step": 234 + }, + { + "epoch": 0.01896537809700589, + "grad_norm": 0.8912937641143799, + "learning_rate": 0.00019997729012753717, + "loss": 2.8725, + "step": 235 + }, + { + "epoch": 0.019046081833588895, + "grad_norm": 1.2453325986862183, + "learning_rate": 0.00019997695245187075, + "loss": 2.9292, + "step": 236 + }, + { + "epoch": 0.0191267855701719, + "grad_norm": 0.8870908617973328, + "learning_rate": 0.0001999766122845173, + "loss": 2.8008, + "step": 237 + }, + { + "epoch": 0.019207489306754904, + "grad_norm": 1.0679768323898315, + "learning_rate": 0.0001999762696254853, + "loss": 2.8919, + "step": 238 + }, + { + "epoch": 0.01928819304333791, + "grad_norm": 0.9769917130470276, + "learning_rate": 0.00019997592447478337, + "loss": 2.7937, + "step": 239 + }, + { + "epoch": 0.01936889677992091, + "grad_norm": 1.066183090209961, + "learning_rate": 0.00019997557683242004, + "loss": 2.8375, + "step": 240 + }, + { + "epoch": 0.019449600516503913, + "grad_norm": 0.9834103584289551, + "learning_rate": 0.000199975226698404, + "loss": 2.8577, + "step": 241 + }, + { + "epoch": 0.019530304253086918, + "grad_norm": 1.102211833000183, + "learning_rate": 0.00019997487407274396, + "loss": 2.8466, + "step": 242 + }, + { + "epoch": 0.019611007989669922, + "grad_norm": 0.9936226606369019, + "learning_rate": 0.00019997451895544872, + "loss": 2.7729, + "step": 243 + }, + { + "epoch": 0.019691711726252926, + "grad_norm": 1.0995992422103882, + "learning_rate": 0.00019997416134652713, + "loss": 2.8425, + "step": 244 + }, + { + "epoch": 0.01977241546283593, + "grad_norm": 0.94181889295578, + "learning_rate": 0.00019997380124598814, + "loss": 2.8495, + "step": 245 + }, + { + "epoch": 0.01985311919941893, + "grad_norm": 0.9791487455368042, + "learning_rate": 0.00019997343865384067, + "loss": 2.8919, + "step": 246 + }, + { + "epoch": 0.019933822936001936, + "grad_norm": 0.9173399209976196, + "learning_rate": 0.00019997307357009375, + "loss": 2.8593, + "step": 247 + }, + { + "epoch": 0.02001452667258494, + "grad_norm": 0.9675281047821045, + "learning_rate": 0.00019997270599475653, + "loss": 2.8226, + "step": 248 + }, + { + "epoch": 0.020095230409167945, + "grad_norm": 0.8928244113922119, + "learning_rate": 0.00019997233592783812, + "loss": 2.8296, + "step": 249 + }, + { + "epoch": 0.02017593414575095, + "grad_norm": 0.928601861000061, + "learning_rate": 0.0001999719633693478, + "loss": 2.8399, + "step": 250 + }, + { + "epoch": 0.020256637882333953, + "grad_norm": 0.9378123879432678, + "learning_rate": 0.00019997158831929482, + "loss": 2.8711, + "step": 251 + }, + { + "epoch": 0.020337341618916954, + "grad_norm": 0.9041047692298889, + "learning_rate": 0.00019997121077768853, + "loss": 2.8338, + "step": 252 + }, + { + "epoch": 0.02041804535549996, + "grad_norm": 0.9673274755477905, + "learning_rate": 0.00019997083074453832, + "loss": 2.8556, + "step": 253 + }, + { + "epoch": 0.020498749092082963, + "grad_norm": 0.9204083681106567, + "learning_rate": 0.0001999704482198537, + "loss": 2.7954, + "step": 254 + }, + { + "epoch": 0.020579452828665967, + "grad_norm": 0.9267606735229492, + "learning_rate": 0.00019997006320364417, + "loss": 2.8656, + "step": 255 + }, + { + "epoch": 0.02066015656524897, + "grad_norm": 0.9562919735908508, + "learning_rate": 0.00019996967569591936, + "loss": 2.8406, + "step": 256 + }, + { + "epoch": 0.020740860301831976, + "grad_norm": 0.9065950512886047, + "learning_rate": 0.0001999692856966889, + "loss": 2.7856, + "step": 257 + }, + { + "epoch": 0.02082156403841498, + "grad_norm": 0.9136463403701782, + "learning_rate": 0.0001999688932059625, + "loss": 2.8083, + "step": 258 + }, + { + "epoch": 0.02090226777499798, + "grad_norm": 0.9785570502281189, + "learning_rate": 0.00019996849822374998, + "loss": 2.7984, + "step": 259 + }, + { + "epoch": 0.020982971511580985, + "grad_norm": 0.9549168348312378, + "learning_rate": 0.00019996810075006117, + "loss": 2.8048, + "step": 260 + }, + { + "epoch": 0.02106367524816399, + "grad_norm": 0.8923975825309753, + "learning_rate": 0.00019996770078490594, + "loss": 2.8559, + "step": 261 + }, + { + "epoch": 0.021144378984746994, + "grad_norm": 0.9516206383705139, + "learning_rate": 0.0001999672983282943, + "loss": 2.9171, + "step": 262 + }, + { + "epoch": 0.02122508272133, + "grad_norm": 0.9101666808128357, + "learning_rate": 0.0001999668933802363, + "loss": 2.8746, + "step": 263 + }, + { + "epoch": 0.021305786457913003, + "grad_norm": 0.9081267714500427, + "learning_rate": 0.00019996648594074195, + "loss": 2.8637, + "step": 264 + }, + { + "epoch": 0.021386490194496004, + "grad_norm": 1.0048178434371948, + "learning_rate": 0.0001999660760098215, + "loss": 2.8783, + "step": 265 + }, + { + "epoch": 0.021467193931079008, + "grad_norm": 0.9625924229621887, + "learning_rate": 0.0001999656635874851, + "loss": 2.8226, + "step": 266 + }, + { + "epoch": 0.021547897667662012, + "grad_norm": 0.9911805391311646, + "learning_rate": 0.00019996524867374306, + "loss": 2.8135, + "step": 267 + }, + { + "epoch": 0.021628601404245017, + "grad_norm": 0.8920134902000427, + "learning_rate": 0.00019996483126860572, + "loss": 2.7934, + "step": 268 + }, + { + "epoch": 0.02170930514082802, + "grad_norm": 1.0806514024734497, + "learning_rate": 0.00019996441137208346, + "loss": 2.8435, + "step": 269 + }, + { + "epoch": 0.021790008877411025, + "grad_norm": 0.9426547884941101, + "learning_rate": 0.00019996398898418675, + "loss": 2.7919, + "step": 270 + }, + { + "epoch": 0.021870712613994026, + "grad_norm": 0.9893020987510681, + "learning_rate": 0.00019996356410492615, + "loss": 2.8616, + "step": 271 + }, + { + "epoch": 0.02195141635057703, + "grad_norm": 1.0196046829223633, + "learning_rate": 0.00019996313673431218, + "loss": 2.8101, + "step": 272 + }, + { + "epoch": 0.022032120087160035, + "grad_norm": 0.9556699991226196, + "learning_rate": 0.00019996270687235558, + "loss": 2.8669, + "step": 273 + }, + { + "epoch": 0.02211282382374304, + "grad_norm": 0.8985902667045593, + "learning_rate": 0.00019996227451906702, + "loss": 2.8078, + "step": 274 + }, + { + "epoch": 0.022193527560326044, + "grad_norm": 1.0198246240615845, + "learning_rate": 0.00019996183967445726, + "loss": 2.8314, + "step": 275 + }, + { + "epoch": 0.022274231296909048, + "grad_norm": 0.9360179901123047, + "learning_rate": 0.00019996140233853715, + "loss": 2.7969, + "step": 276 + }, + { + "epoch": 0.022354935033492052, + "grad_norm": 1.0250160694122314, + "learning_rate": 0.00019996096251131759, + "loss": 2.7897, + "step": 277 + }, + { + "epoch": 0.022435638770075053, + "grad_norm": 0.934582531452179, + "learning_rate": 0.00019996052019280954, + "loss": 2.8667, + "step": 278 + }, + { + "epoch": 0.022516342506658057, + "grad_norm": 0.9394461512565613, + "learning_rate": 0.00019996007538302407, + "loss": 2.7681, + "step": 279 + }, + { + "epoch": 0.022597046243241062, + "grad_norm": 0.9468861222267151, + "learning_rate": 0.00019995962808197216, + "loss": 2.7709, + "step": 280 + }, + { + "epoch": 0.022677749979824066, + "grad_norm": 0.9798515439033508, + "learning_rate": 0.00019995917828966506, + "loss": 2.8274, + "step": 281 + }, + { + "epoch": 0.02275845371640707, + "grad_norm": 1.0403941869735718, + "learning_rate": 0.00019995872600611395, + "loss": 2.8897, + "step": 282 + }, + { + "epoch": 0.022839157452990075, + "grad_norm": 0.9795030951499939, + "learning_rate": 0.00019995827123133006, + "loss": 2.8792, + "step": 283 + }, + { + "epoch": 0.022919861189573076, + "grad_norm": 0.9162538647651672, + "learning_rate": 0.00019995781396532479, + "loss": 2.8339, + "step": 284 + }, + { + "epoch": 0.02300056492615608, + "grad_norm": 1.0864707231521606, + "learning_rate": 0.00019995735420810947, + "loss": 2.8599, + "step": 285 + }, + { + "epoch": 0.023081268662739084, + "grad_norm": 0.9181776642799377, + "learning_rate": 0.0001999568919596956, + "loss": 2.8736, + "step": 286 + }, + { + "epoch": 0.02316197239932209, + "grad_norm": 0.8880531191825867, + "learning_rate": 0.00019995642722009472, + "loss": 2.8215, + "step": 287 + }, + { + "epoch": 0.023242676135905093, + "grad_norm": 0.9287240505218506, + "learning_rate": 0.00019995595998931835, + "loss": 2.844, + "step": 288 + }, + { + "epoch": 0.023323379872488097, + "grad_norm": 0.886894941329956, + "learning_rate": 0.0001999554902673782, + "loss": 2.8319, + "step": 289 + }, + { + "epoch": 0.0234040836090711, + "grad_norm": 0.9564458131790161, + "learning_rate": 0.0001999550180542859, + "loss": 2.8126, + "step": 290 + }, + { + "epoch": 0.023484787345654103, + "grad_norm": 0.8745970726013184, + "learning_rate": 0.00019995454335005334, + "loss": 2.8344, + "step": 291 + }, + { + "epoch": 0.023565491082237107, + "grad_norm": 1.0343137979507446, + "learning_rate": 0.00019995406615469217, + "loss": 2.8498, + "step": 292 + }, + { + "epoch": 0.02364619481882011, + "grad_norm": 0.9951575994491577, + "learning_rate": 0.0001999535864682145, + "loss": 2.8655, + "step": 293 + }, + { + "epoch": 0.023726898555403116, + "grad_norm": 0.8457592725753784, + "learning_rate": 0.0001999531042906321, + "loss": 2.8189, + "step": 294 + }, + { + "epoch": 0.02380760229198612, + "grad_norm": 0.9126954674720764, + "learning_rate": 0.00019995261962195708, + "loss": 2.8272, + "step": 295 + }, + { + "epoch": 0.023888306028569124, + "grad_norm": 1.0171937942504883, + "learning_rate": 0.0001999521324622015, + "loss": 2.869, + "step": 296 + }, + { + "epoch": 0.023969009765152125, + "grad_norm": 0.9887226223945618, + "learning_rate": 0.00019995164281137753, + "loss": 2.7643, + "step": 297 + }, + { + "epoch": 0.02404971350173513, + "grad_norm": 1.4240798950195312, + "learning_rate": 0.00019995115066949733, + "loss": 2.8332, + "step": 298 + }, + { + "epoch": 0.024130417238318134, + "grad_norm": 0.9856921434402466, + "learning_rate": 0.00019995065603657316, + "loss": 2.8283, + "step": 299 + }, + { + "epoch": 0.024211120974901138, + "grad_norm": 0.997164785861969, + "learning_rate": 0.0001999501589126174, + "loss": 2.9164, + "step": 300 + }, + { + "epoch": 0.024291824711484142, + "grad_norm": 1.6480412483215332, + "learning_rate": 0.00019994965929764238, + "loss": 2.8941, + "step": 301 + }, + { + "epoch": 0.024372528448067147, + "grad_norm": 1.1590758562088013, + "learning_rate": 0.0001999491571916606, + "loss": 2.8127, + "step": 302 + }, + { + "epoch": 0.024453232184650148, + "grad_norm": 1.1228376626968384, + "learning_rate": 0.00019994865259468454, + "loss": 2.8439, + "step": 303 + }, + { + "epoch": 0.024533935921233152, + "grad_norm": 1.0426349639892578, + "learning_rate": 0.0001999481455067268, + "loss": 2.8671, + "step": 304 + }, + { + "epoch": 0.024614639657816156, + "grad_norm": 1.0911917686462402, + "learning_rate": 0.00019994763592779996, + "loss": 2.8297, + "step": 305 + }, + { + "epoch": 0.02469534339439916, + "grad_norm": 1.0493195056915283, + "learning_rate": 0.00019994712385791683, + "loss": 2.7996, + "step": 306 + }, + { + "epoch": 0.024776047130982165, + "grad_norm": 0.9275023341178894, + "learning_rate": 0.00019994660929709008, + "loss": 2.7949, + "step": 307 + }, + { + "epoch": 0.02485675086756517, + "grad_norm": 1.1074799299240112, + "learning_rate": 0.00019994609224533255, + "loss": 2.8364, + "step": 308 + }, + { + "epoch": 0.024937454604148174, + "grad_norm": 0.9189429879188538, + "learning_rate": 0.00019994557270265717, + "loss": 2.8293, + "step": 309 + }, + { + "epoch": 0.025018158340731175, + "grad_norm": 0.9577780961990356, + "learning_rate": 0.00019994505066907683, + "loss": 2.8295, + "step": 310 + }, + { + "epoch": 0.02509886207731418, + "grad_norm": 1.0707277059555054, + "learning_rate": 0.0001999445261446046, + "loss": 2.795, + "step": 311 + }, + { + "epoch": 0.025179565813897183, + "grad_norm": 0.9211257696151733, + "learning_rate": 0.0001999439991292535, + "loss": 2.8355, + "step": 312 + }, + { + "epoch": 0.025260269550480188, + "grad_norm": 0.987779438495636, + "learning_rate": 0.00019994346962303667, + "loss": 2.8175, + "step": 313 + }, + { + "epoch": 0.025340973287063192, + "grad_norm": 0.9317128658294678, + "learning_rate": 0.00019994293762596734, + "loss": 2.8205, + "step": 314 + }, + { + "epoch": 0.025421677023646196, + "grad_norm": 0.8989154100418091, + "learning_rate": 0.00019994240313805873, + "loss": 2.8257, + "step": 315 + }, + { + "epoch": 0.025502380760229197, + "grad_norm": 0.8391042351722717, + "learning_rate": 0.00019994186615932423, + "loss": 2.8105, + "step": 316 + }, + { + "epoch": 0.0255830844968122, + "grad_norm": 0.8908089995384216, + "learning_rate": 0.00019994132668977715, + "loss": 2.7894, + "step": 317 + }, + { + "epoch": 0.025663788233395206, + "grad_norm": 0.8666881322860718, + "learning_rate": 0.00019994078472943097, + "loss": 2.7934, + "step": 318 + }, + { + "epoch": 0.02574449196997821, + "grad_norm": 0.8834616541862488, + "learning_rate": 0.00019994024027829914, + "loss": 2.8166, + "step": 319 + }, + { + "epoch": 0.025825195706561214, + "grad_norm": 0.9831370115280151, + "learning_rate": 0.00019993969333639532, + "loss": 2.889, + "step": 320 + }, + { + "epoch": 0.02590589944314422, + "grad_norm": 0.9171644449234009, + "learning_rate": 0.00019993914390373308, + "loss": 2.8582, + "step": 321 + }, + { + "epoch": 0.02598660317972722, + "grad_norm": 0.9624861478805542, + "learning_rate": 0.00019993859198032615, + "loss": 2.8574, + "step": 322 + }, + { + "epoch": 0.026067306916310224, + "grad_norm": 0.8826586008071899, + "learning_rate": 0.00019993803756618826, + "loss": 2.8544, + "step": 323 + }, + { + "epoch": 0.02614801065289323, + "grad_norm": 0.9286447763442993, + "learning_rate": 0.0001999374806613332, + "loss": 2.7937, + "step": 324 + }, + { + "epoch": 0.026228714389476233, + "grad_norm": 0.9901685118675232, + "learning_rate": 0.00019993692126577493, + "loss": 2.7654, + "step": 325 + }, + { + "epoch": 0.026309418126059237, + "grad_norm": 0.9624341130256653, + "learning_rate": 0.00019993635937952734, + "loss": 2.8804, + "step": 326 + }, + { + "epoch": 0.02639012186264224, + "grad_norm": 0.8867596387863159, + "learning_rate": 0.0001999357950026044, + "loss": 2.8254, + "step": 327 + }, + { + "epoch": 0.026470825599225246, + "grad_norm": 0.9243817925453186, + "learning_rate": 0.00019993522813502022, + "loss": 2.8177, + "step": 328 + }, + { + "epoch": 0.026551529335808247, + "grad_norm": 0.9322247505187988, + "learning_rate": 0.00019993465877678895, + "loss": 2.9023, + "step": 329 + }, + { + "epoch": 0.02663223307239125, + "grad_norm": 0.8768174648284912, + "learning_rate": 0.00019993408692792474, + "loss": 2.8184, + "step": 330 + }, + { + "epoch": 0.026712936808974255, + "grad_norm": 0.9436870813369751, + "learning_rate": 0.00019993351258844184, + "loss": 2.8319, + "step": 331 + }, + { + "epoch": 0.02679364054555726, + "grad_norm": 0.9970327019691467, + "learning_rate": 0.0001999329357583546, + "loss": 2.7946, + "step": 332 + }, + { + "epoch": 0.026874344282140264, + "grad_norm": 0.9100088477134705, + "learning_rate": 0.00019993235643767736, + "loss": 2.782, + "step": 333 + }, + { + "epoch": 0.02695504801872327, + "grad_norm": 0.9693402051925659, + "learning_rate": 0.00019993177462642456, + "loss": 2.8182, + "step": 334 + }, + { + "epoch": 0.02703575175530627, + "grad_norm": 0.8761965036392212, + "learning_rate": 0.00019993119032461073, + "loss": 2.8058, + "step": 335 + }, + { + "epoch": 0.027116455491889273, + "grad_norm": 1.0699270963668823, + "learning_rate": 0.00019993060353225043, + "loss": 2.9211, + "step": 336 + }, + { + "epoch": 0.027197159228472278, + "grad_norm": 1.0094172954559326, + "learning_rate": 0.00019993001424935822, + "loss": 2.8837, + "step": 337 + }, + { + "epoch": 0.027277862965055282, + "grad_norm": 0.9683573842048645, + "learning_rate": 0.00019992942247594887, + "loss": 2.8523, + "step": 338 + }, + { + "epoch": 0.027358566701638286, + "grad_norm": 1.3243813514709473, + "learning_rate": 0.00019992882821203708, + "loss": 2.7891, + "step": 339 + }, + { + "epoch": 0.02743927043822129, + "grad_norm": 1.0227056741714478, + "learning_rate": 0.0001999282314576377, + "loss": 2.8396, + "step": 340 + }, + { + "epoch": 0.027519974174804295, + "grad_norm": 1.03257417678833, + "learning_rate": 0.00019992763221276556, + "loss": 2.824, + "step": 341 + }, + { + "epoch": 0.027600677911387296, + "grad_norm": 0.86456698179245, + "learning_rate": 0.00019992703047743562, + "loss": 2.8006, + "step": 342 + }, + { + "epoch": 0.0276813816479703, + "grad_norm": 0.965339720249176, + "learning_rate": 0.00019992642625166286, + "loss": 2.8658, + "step": 343 + }, + { + "epoch": 0.027762085384553305, + "grad_norm": 1.0028942823410034, + "learning_rate": 0.00019992581953546236, + "loss": 2.8311, + "step": 344 + }, + { + "epoch": 0.02784278912113631, + "grad_norm": 0.984307050704956, + "learning_rate": 0.0001999252103288492, + "loss": 2.8748, + "step": 345 + }, + { + "epoch": 0.027923492857719313, + "grad_norm": 0.9405032396316528, + "learning_rate": 0.00019992459863183858, + "loss": 2.8371, + "step": 346 + }, + { + "epoch": 0.028004196594302318, + "grad_norm": 0.9867002367973328, + "learning_rate": 0.0001999239844444458, + "loss": 2.7914, + "step": 347 + }, + { + "epoch": 0.02808490033088532, + "grad_norm": 0.9224951267242432, + "learning_rate": 0.00019992336776668613, + "loss": 2.7986, + "step": 348 + }, + { + "epoch": 0.028165604067468323, + "grad_norm": 1.002838134765625, + "learning_rate": 0.0001999227485985749, + "loss": 2.8207, + "step": 349 + }, + { + "epoch": 0.028246307804051327, + "grad_norm": 0.8922045826911926, + "learning_rate": 0.00019992212694012757, + "loss": 2.8264, + "step": 350 + }, + { + "epoch": 0.02832701154063433, + "grad_norm": 1.0860323905944824, + "learning_rate": 0.00019992150279135964, + "loss": 2.8778, + "step": 351 + }, + { + "epoch": 0.028407715277217336, + "grad_norm": 1.0995604991912842, + "learning_rate": 0.0001999208761522867, + "loss": 2.8599, + "step": 352 + }, + { + "epoch": 0.02848841901380034, + "grad_norm": 0.8741658926010132, + "learning_rate": 0.0001999202470229243, + "loss": 2.7757, + "step": 353 + }, + { + "epoch": 0.02856912275038334, + "grad_norm": 0.9142587184906006, + "learning_rate": 0.00019991961540328815, + "loss": 2.8235, + "step": 354 + }, + { + "epoch": 0.028649826486966345, + "grad_norm": 1.0000953674316406, + "learning_rate": 0.000199918981293394, + "loss": 2.8, + "step": 355 + }, + { + "epoch": 0.02873053022354935, + "grad_norm": 0.9416046738624573, + "learning_rate": 0.00019991834469325763, + "loss": 2.7941, + "step": 356 + }, + { + "epoch": 0.028811233960132354, + "grad_norm": 0.9135935306549072, + "learning_rate": 0.00019991770560289496, + "loss": 2.8315, + "step": 357 + }, + { + "epoch": 0.02889193769671536, + "grad_norm": 0.8867244124412537, + "learning_rate": 0.00019991706402232184, + "loss": 2.8649, + "step": 358 + }, + { + "epoch": 0.028972641433298363, + "grad_norm": 0.9360243678092957, + "learning_rate": 0.00019991641995155431, + "loss": 2.7556, + "step": 359 + }, + { + "epoch": 0.029053345169881367, + "grad_norm": 0.8903766870498657, + "learning_rate": 0.00019991577339060842, + "loss": 2.8379, + "step": 360 + }, + { + "epoch": 0.029134048906464368, + "grad_norm": 1.0178784132003784, + "learning_rate": 0.00019991512433950023, + "loss": 2.8045, + "step": 361 + }, + { + "epoch": 0.029214752643047372, + "grad_norm": 0.9318631887435913, + "learning_rate": 0.000199914472798246, + "loss": 2.823, + "step": 362 + }, + { + "epoch": 0.029295456379630377, + "grad_norm": 0.9384647011756897, + "learning_rate": 0.00019991381876686195, + "loss": 2.9379, + "step": 363 + }, + { + "epoch": 0.02937616011621338, + "grad_norm": 0.9318633675575256, + "learning_rate": 0.00019991316224536433, + "loss": 2.8222, + "step": 364 + }, + { + "epoch": 0.029456863852796385, + "grad_norm": 0.8653938174247742, + "learning_rate": 0.00019991250323376952, + "loss": 2.8447, + "step": 365 + }, + { + "epoch": 0.02953756758937939, + "grad_norm": 0.8997991681098938, + "learning_rate": 0.00019991184173209398, + "loss": 2.8523, + "step": 366 + }, + { + "epoch": 0.02961827132596239, + "grad_norm": 0.8587092161178589, + "learning_rate": 0.00019991117774035416, + "loss": 2.8141, + "step": 367 + }, + { + "epoch": 0.029698975062545395, + "grad_norm": 0.8740741014480591, + "learning_rate": 0.00019991051125856663, + "loss": 2.7487, + "step": 368 + }, + { + "epoch": 0.0297796787991284, + "grad_norm": 0.9099416732788086, + "learning_rate": 0.00019990984228674798, + "loss": 2.834, + "step": 369 + }, + { + "epoch": 0.029860382535711404, + "grad_norm": 0.8675365447998047, + "learning_rate": 0.0001999091708249149, + "loss": 2.8259, + "step": 370 + }, + { + "epoch": 0.029941086272294408, + "grad_norm": 1.0141092538833618, + "learning_rate": 0.00019990849687308412, + "loss": 2.8369, + "step": 371 + }, + { + "epoch": 0.030021790008877412, + "grad_norm": 0.849155604839325, + "learning_rate": 0.00019990782043127243, + "loss": 2.7505, + "step": 372 + }, + { + "epoch": 0.030102493745460413, + "grad_norm": 1.073754072189331, + "learning_rate": 0.0001999071414994967, + "loss": 2.8939, + "step": 373 + }, + { + "epoch": 0.030183197482043417, + "grad_norm": 0.8615279197692871, + "learning_rate": 0.00019990646007777383, + "loss": 2.7662, + "step": 374 + }, + { + "epoch": 0.030263901218626422, + "grad_norm": 0.8803398609161377, + "learning_rate": 0.0001999057761661208, + "loss": 2.7992, + "step": 375 + }, + { + "epoch": 0.030344604955209426, + "grad_norm": 0.8901834487915039, + "learning_rate": 0.00019990508976455473, + "loss": 2.8222, + "step": 376 + }, + { + "epoch": 0.03042530869179243, + "grad_norm": 0.9443284869194031, + "learning_rate": 0.00019990440087309263, + "loss": 2.8326, + "step": 377 + }, + { + "epoch": 0.030506012428375435, + "grad_norm": 0.9122868180274963, + "learning_rate": 0.0001999037094917517, + "loss": 2.7653, + "step": 378 + }, + { + "epoch": 0.03058671616495844, + "grad_norm": 0.8764635920524597, + "learning_rate": 0.0001999030156205492, + "loss": 2.7813, + "step": 379 + }, + { + "epoch": 0.03066741990154144, + "grad_norm": 0.8466865420341492, + "learning_rate": 0.0001999023192595024, + "loss": 2.8338, + "step": 380 + }, + { + "epoch": 0.030748123638124444, + "grad_norm": 0.8833961486816406, + "learning_rate": 0.00019990162040862863, + "loss": 2.78, + "step": 381 + }, + { + "epoch": 0.03082882737470745, + "grad_norm": 1.0298357009887695, + "learning_rate": 0.00019990091906794537, + "loss": 2.8059, + "step": 382 + }, + { + "epoch": 0.030909531111290453, + "grad_norm": 0.8651318550109863, + "learning_rate": 0.00019990021523747005, + "loss": 2.8608, + "step": 383 + }, + { + "epoch": 0.030990234847873457, + "grad_norm": 1.0262864828109741, + "learning_rate": 0.0001998995089172202, + "loss": 2.8226, + "step": 384 + }, + { + "epoch": 0.03107093858445646, + "grad_norm": 0.9266276955604553, + "learning_rate": 0.00019989880010721348, + "loss": 2.9414, + "step": 385 + }, + { + "epoch": 0.031151642321039463, + "grad_norm": 0.8762117028236389, + "learning_rate": 0.00019989808880746749, + "loss": 2.8023, + "step": 386 + }, + { + "epoch": 0.031232346057622467, + "grad_norm": 0.8531816601753235, + "learning_rate": 0.00019989737501800004, + "loss": 2.777, + "step": 387 + }, + { + "epoch": 0.031313049794205475, + "grad_norm": 0.8999545574188232, + "learning_rate": 0.0001998966587388288, + "loss": 2.8656, + "step": 388 + }, + { + "epoch": 0.03139375353078847, + "grad_norm": 0.932248055934906, + "learning_rate": 0.00019989593996997177, + "loss": 2.8212, + "step": 389 + }, + { + "epoch": 0.031474457267371476, + "grad_norm": 0.9059134125709534, + "learning_rate": 0.00019989521871144672, + "loss": 2.7945, + "step": 390 + }, + { + "epoch": 0.03155516100395448, + "grad_norm": 0.9323028922080994, + "learning_rate": 0.00019989449496327172, + "loss": 2.8338, + "step": 391 + }, + { + "epoch": 0.031635864740537485, + "grad_norm": 0.9141251444816589, + "learning_rate": 0.0001998937687254648, + "loss": 2.7935, + "step": 392 + }, + { + "epoch": 0.03171656847712049, + "grad_norm": 1.0026880502700806, + "learning_rate": 0.000199893039998044, + "loss": 2.8811, + "step": 393 + }, + { + "epoch": 0.031797272213703494, + "grad_norm": 1.0178622007369995, + "learning_rate": 0.00019989230878102756, + "loss": 2.9003, + "step": 394 + }, + { + "epoch": 0.0318779759502865, + "grad_norm": 0.9111912846565247, + "learning_rate": 0.00019989157507443363, + "loss": 2.8399, + "step": 395 + }, + { + "epoch": 0.0319586796868695, + "grad_norm": 1.054563283920288, + "learning_rate": 0.00019989083887828052, + "loss": 2.9088, + "step": 396 + }, + { + "epoch": 0.03203938342345251, + "grad_norm": 0.9459816217422485, + "learning_rate": 0.00019989010019258663, + "loss": 2.805, + "step": 397 + }, + { + "epoch": 0.03212008716003551, + "grad_norm": 1.0139873027801514, + "learning_rate": 0.00019988935901737033, + "loss": 2.8452, + "step": 398 + }, + { + "epoch": 0.032200790896618516, + "grad_norm": 0.986325204372406, + "learning_rate": 0.00019988861535265006, + "loss": 2.8311, + "step": 399 + }, + { + "epoch": 0.03228149463320152, + "grad_norm": 0.9565223455429077, + "learning_rate": 0.00019988786919844436, + "loss": 2.7766, + "step": 400 + }, + { + "epoch": 0.032362198369784524, + "grad_norm": 0.8901559710502625, + "learning_rate": 0.0001998871205547719, + "loss": 2.7966, + "step": 401 + }, + { + "epoch": 0.03244290210636752, + "grad_norm": 1.0959528684616089, + "learning_rate": 0.00019988636942165123, + "loss": 2.8377, + "step": 402 + }, + { + "epoch": 0.032523605842950526, + "grad_norm": 1.0768988132476807, + "learning_rate": 0.00019988561579910118, + "loss": 2.8267, + "step": 403 + }, + { + "epoch": 0.03260430957953353, + "grad_norm": 0.9563855528831482, + "learning_rate": 0.00019988485968714048, + "loss": 2.8459, + "step": 404 + }, + { + "epoch": 0.032685013316116535, + "grad_norm": 0.930927038192749, + "learning_rate": 0.00019988410108578796, + "loss": 2.8053, + "step": 405 + }, + { + "epoch": 0.03276571705269954, + "grad_norm": 1.0658363103866577, + "learning_rate": 0.00019988333999506255, + "loss": 2.8512, + "step": 406 + }, + { + "epoch": 0.03284642078928254, + "grad_norm": 0.9258090257644653, + "learning_rate": 0.0001998825764149832, + "loss": 2.8541, + "step": 407 + }, + { + "epoch": 0.03292712452586555, + "grad_norm": 1.18158757686615, + "learning_rate": 0.00019988181034556895, + "loss": 2.8838, + "step": 408 + }, + { + "epoch": 0.03300782826244855, + "grad_norm": 0.9506754875183105, + "learning_rate": 0.00019988104178683891, + "loss": 2.7733, + "step": 409 + }, + { + "epoch": 0.033088531999031556, + "grad_norm": 0.9559460282325745, + "learning_rate": 0.0001998802707388122, + "loss": 2.9259, + "step": 410 + }, + { + "epoch": 0.03316923573561456, + "grad_norm": 0.9322298765182495, + "learning_rate": 0.00019987949720150808, + "loss": 2.8318, + "step": 411 + }, + { + "epoch": 0.033249939472197565, + "grad_norm": 0.9226691722869873, + "learning_rate": 0.00019987872117494576, + "loss": 2.9063, + "step": 412 + }, + { + "epoch": 0.03333064320878057, + "grad_norm": 1.0543674230575562, + "learning_rate": 0.00019987794265914464, + "loss": 2.7877, + "step": 413 + }, + { + "epoch": 0.033411346945363574, + "grad_norm": 0.989986002445221, + "learning_rate": 0.00019987716165412408, + "loss": 2.8354, + "step": 414 + }, + { + "epoch": 0.03349205068194657, + "grad_norm": 0.8703451752662659, + "learning_rate": 0.0001998763781599036, + "loss": 2.8127, + "step": 415 + }, + { + "epoch": 0.033572754418529575, + "grad_norm": 0.974943220615387, + "learning_rate": 0.0001998755921765027, + "loss": 2.9272, + "step": 416 + }, + { + "epoch": 0.03365345815511258, + "grad_norm": 0.8714169859886169, + "learning_rate": 0.000199874803703941, + "loss": 2.8027, + "step": 417 + }, + { + "epoch": 0.033734161891695584, + "grad_norm": 0.9251161217689514, + "learning_rate": 0.00019987401274223804, + "loss": 2.8186, + "step": 418 + }, + { + "epoch": 0.03381486562827859, + "grad_norm": 0.9657236933708191, + "learning_rate": 0.00019987321929141366, + "loss": 2.8297, + "step": 419 + }, + { + "epoch": 0.03389556936486159, + "grad_norm": 0.9022002816200256, + "learning_rate": 0.00019987242335148757, + "loss": 2.881, + "step": 420 + }, + { + "epoch": 0.0339762731014446, + "grad_norm": 0.9479621052742004, + "learning_rate": 0.0001998716249224796, + "loss": 2.8288, + "step": 421 + }, + { + "epoch": 0.0340569768380276, + "grad_norm": 0.9458955526351929, + "learning_rate": 0.00019987082400440968, + "loss": 2.8861, + "step": 422 + }, + { + "epoch": 0.034137680574610606, + "grad_norm": 0.9444572329521179, + "learning_rate": 0.0001998700205972978, + "loss": 2.8877, + "step": 423 + }, + { + "epoch": 0.03421838431119361, + "grad_norm": 0.9263925552368164, + "learning_rate": 0.00019986921470116392, + "loss": 2.8028, + "step": 424 + }, + { + "epoch": 0.034299088047776614, + "grad_norm": 1.0690566301345825, + "learning_rate": 0.00019986840631602812, + "loss": 2.882, + "step": 425 + }, + { + "epoch": 0.03437979178435962, + "grad_norm": 0.8999007940292358, + "learning_rate": 0.0001998675954419106, + "loss": 2.8179, + "step": 426 + }, + { + "epoch": 0.03446049552094262, + "grad_norm": 0.894395112991333, + "learning_rate": 0.00019986678207883153, + "loss": 2.814, + "step": 427 + }, + { + "epoch": 0.03454119925752562, + "grad_norm": 0.8621550798416138, + "learning_rate": 0.00019986596622681123, + "loss": 2.7584, + "step": 428 + }, + { + "epoch": 0.034621902994108625, + "grad_norm": 0.9452527165412903, + "learning_rate": 0.00019986514788587, + "loss": 2.8949, + "step": 429 + }, + { + "epoch": 0.03470260673069163, + "grad_norm": 0.8973272442817688, + "learning_rate": 0.0001998643270560282, + "loss": 2.868, + "step": 430 + }, + { + "epoch": 0.034783310467274633, + "grad_norm": 0.9887418150901794, + "learning_rate": 0.00019986350373730634, + "loss": 2.8009, + "step": 431 + }, + { + "epoch": 0.03486401420385764, + "grad_norm": 0.9449994564056396, + "learning_rate": 0.0001998626779297249, + "loss": 2.8305, + "step": 432 + }, + { + "epoch": 0.03494471794044064, + "grad_norm": 1.052871823310852, + "learning_rate": 0.0001998618496333045, + "loss": 2.8136, + "step": 433 + }, + { + "epoch": 0.035025421677023647, + "grad_norm": 0.9600724577903748, + "learning_rate": 0.00019986101884806576, + "loss": 2.7857, + "step": 434 + }, + { + "epoch": 0.03510612541360665, + "grad_norm": 0.874043345451355, + "learning_rate": 0.00019986018557402942, + "loss": 2.8524, + "step": 435 + }, + { + "epoch": 0.035186829150189655, + "grad_norm": 0.9810616374015808, + "learning_rate": 0.0001998593498112162, + "loss": 2.7506, + "step": 436 + }, + { + "epoch": 0.03526753288677266, + "grad_norm": 0.9163016080856323, + "learning_rate": 0.00019985851155964693, + "loss": 2.798, + "step": 437 + }, + { + "epoch": 0.035348236623355664, + "grad_norm": 1.0688380002975464, + "learning_rate": 0.00019985767081934252, + "loss": 2.8916, + "step": 438 + }, + { + "epoch": 0.03542894035993867, + "grad_norm": 0.925020158290863, + "learning_rate": 0.00019985682759032393, + "loss": 2.8017, + "step": 439 + }, + { + "epoch": 0.035509644096521666, + "grad_norm": 0.9429430961608887, + "learning_rate": 0.0001998559818726122, + "loss": 2.837, + "step": 440 + }, + { + "epoch": 0.03559034783310467, + "grad_norm": 0.9135627150535583, + "learning_rate": 0.00019985513366622832, + "loss": 2.8423, + "step": 441 + }, + { + "epoch": 0.035671051569687674, + "grad_norm": 0.9218924045562744, + "learning_rate": 0.00019985428297119353, + "loss": 2.854, + "step": 442 + }, + { + "epoch": 0.03575175530627068, + "grad_norm": 0.9307878613471985, + "learning_rate": 0.00019985342978752897, + "loss": 2.8591, + "step": 443 + }, + { + "epoch": 0.03583245904285368, + "grad_norm": 0.935394287109375, + "learning_rate": 0.00019985257411525592, + "loss": 2.8388, + "step": 444 + }, + { + "epoch": 0.03591316277943669, + "grad_norm": 0.890959620475769, + "learning_rate": 0.0001998517159543957, + "loss": 2.78, + "step": 445 + }, + { + "epoch": 0.03599386651601969, + "grad_norm": 1.110924482345581, + "learning_rate": 0.0001998508553049697, + "loss": 2.8117, + "step": 446 + }, + { + "epoch": 0.036074570252602696, + "grad_norm": 0.8774176239967346, + "learning_rate": 0.0001998499921669994, + "loss": 2.8368, + "step": 447 + }, + { + "epoch": 0.0361552739891857, + "grad_norm": 0.9766948819160461, + "learning_rate": 0.00019984912654050625, + "loss": 2.764, + "step": 448 + }, + { + "epoch": 0.036235977725768705, + "grad_norm": 1.1439398527145386, + "learning_rate": 0.00019984825842551187, + "loss": 2.84, + "step": 449 + }, + { + "epoch": 0.03631668146235171, + "grad_norm": 0.8995118737220764, + "learning_rate": 0.0001998473878220379, + "loss": 2.834, + "step": 450 + }, + { + "epoch": 0.03639738519893471, + "grad_norm": 0.9810060858726501, + "learning_rate": 0.000199846514730106, + "loss": 2.9338, + "step": 451 + }, + { + "epoch": 0.03647808893551772, + "grad_norm": 1.0862053632736206, + "learning_rate": 0.00019984563914973795, + "loss": 2.837, + "step": 452 + }, + { + "epoch": 0.036558792672100715, + "grad_norm": 0.9456702470779419, + "learning_rate": 0.0001998447610809556, + "loss": 2.7664, + "step": 453 + }, + { + "epoch": 0.03663949640868372, + "grad_norm": 1.0714432001113892, + "learning_rate": 0.0001998438805237808, + "loss": 2.8339, + "step": 454 + }, + { + "epoch": 0.036720200145266724, + "grad_norm": 0.89134281873703, + "learning_rate": 0.00019984299747823547, + "loss": 2.7818, + "step": 455 + }, + { + "epoch": 0.03680090388184973, + "grad_norm": 0.869742214679718, + "learning_rate": 0.0001998421119443417, + "loss": 2.7916, + "step": 456 + }, + { + "epoch": 0.03688160761843273, + "grad_norm": 0.9307265281677246, + "learning_rate": 0.00019984122392212149, + "loss": 2.8485, + "step": 457 + }, + { + "epoch": 0.03696231135501574, + "grad_norm": 0.900215744972229, + "learning_rate": 0.00019984033341159698, + "loss": 2.8536, + "step": 458 + }, + { + "epoch": 0.03704301509159874, + "grad_norm": 0.8679699897766113, + "learning_rate": 0.00019983944041279038, + "loss": 2.8344, + "step": 459 + }, + { + "epoch": 0.037123718828181745, + "grad_norm": 0.9540488719940186, + "learning_rate": 0.00019983854492572394, + "loss": 2.873, + "step": 460 + }, + { + "epoch": 0.03720442256476475, + "grad_norm": 0.8697962760925293, + "learning_rate": 0.00019983764695042, + "loss": 2.8122, + "step": 461 + }, + { + "epoch": 0.037285126301347754, + "grad_norm": 0.9534483551979065, + "learning_rate": 0.0001998367464869009, + "loss": 2.8842, + "step": 462 + }, + { + "epoch": 0.03736583003793076, + "grad_norm": 0.8402275443077087, + "learning_rate": 0.00019983584353518911, + "loss": 2.8135, + "step": 463 + }, + { + "epoch": 0.03744653377451376, + "grad_norm": 0.8226146697998047, + "learning_rate": 0.0001998349380953071, + "loss": 2.8036, + "step": 464 + }, + { + "epoch": 0.03752723751109677, + "grad_norm": 0.9292199611663818, + "learning_rate": 0.0001998340301672775, + "loss": 2.7887, + "step": 465 + }, + { + "epoch": 0.037607941247679764, + "grad_norm": 0.9035555124282837, + "learning_rate": 0.0001998331197511229, + "loss": 2.7851, + "step": 466 + }, + { + "epoch": 0.03768864498426277, + "grad_norm": 0.9411706328392029, + "learning_rate": 0.00019983220684686596, + "loss": 2.7782, + "step": 467 + }, + { + "epoch": 0.03776934872084577, + "grad_norm": 0.9867696166038513, + "learning_rate": 0.0001998312914545295, + "loss": 2.8125, + "step": 468 + }, + { + "epoch": 0.03785005245742878, + "grad_norm": 0.9683675169944763, + "learning_rate": 0.00019983037357413624, + "loss": 2.8325, + "step": 469 + }, + { + "epoch": 0.03793075619401178, + "grad_norm": 0.963941752910614, + "learning_rate": 0.00019982945320570913, + "loss": 2.8281, + "step": 470 + }, + { + "epoch": 0.038011459930594786, + "grad_norm": 0.9812459349632263, + "learning_rate": 0.0001998285303492711, + "loss": 2.765, + "step": 471 + }, + { + "epoch": 0.03809216366717779, + "grad_norm": 0.9681405425071716, + "learning_rate": 0.00019982760500484516, + "loss": 2.8882, + "step": 472 + }, + { + "epoch": 0.038172867403760795, + "grad_norm": 0.8983948826789856, + "learning_rate": 0.00019982667717245432, + "loss": 2.8182, + "step": 473 + }, + { + "epoch": 0.0382535711403438, + "grad_norm": 0.9875261783599854, + "learning_rate": 0.00019982574685212178, + "loss": 2.8072, + "step": 474 + }, + { + "epoch": 0.038334274876926804, + "grad_norm": 0.8889442086219788, + "learning_rate": 0.00019982481404387064, + "loss": 2.8635, + "step": 475 + }, + { + "epoch": 0.03841497861350981, + "grad_norm": 0.8904242515563965, + "learning_rate": 0.00019982387874772418, + "loss": 2.829, + "step": 476 + }, + { + "epoch": 0.03849568235009281, + "grad_norm": 1.0182000398635864, + "learning_rate": 0.00019982294096370574, + "loss": 2.8552, + "step": 477 + }, + { + "epoch": 0.03857638608667582, + "grad_norm": 0.9867151975631714, + "learning_rate": 0.00019982200069183867, + "loss": 2.8201, + "step": 478 + }, + { + "epoch": 0.038657089823258814, + "grad_norm": 0.9785345196723938, + "learning_rate": 0.0001998210579321464, + "loss": 2.8652, + "step": 479 + }, + { + "epoch": 0.03873779355984182, + "grad_norm": 0.9696915149688721, + "learning_rate": 0.00019982011268465243, + "loss": 2.8276, + "step": 480 + }, + { + "epoch": 0.03881849729642482, + "grad_norm": 0.9257470965385437, + "learning_rate": 0.00019981916494938033, + "loss": 2.8321, + "step": 481 + }, + { + "epoch": 0.03889920103300783, + "grad_norm": 0.9394895434379578, + "learning_rate": 0.00019981821472635369, + "loss": 2.8747, + "step": 482 + }, + { + "epoch": 0.03897990476959083, + "grad_norm": 0.9888504147529602, + "learning_rate": 0.00019981726201559626, + "loss": 2.8201, + "step": 483 + }, + { + "epoch": 0.039060608506173836, + "grad_norm": 0.8957003951072693, + "learning_rate": 0.0001998163068171317, + "loss": 2.8255, + "step": 484 + }, + { + "epoch": 0.03914131224275684, + "grad_norm": 0.9792008996009827, + "learning_rate": 0.00019981534913098383, + "loss": 2.7985, + "step": 485 + }, + { + "epoch": 0.039222015979339844, + "grad_norm": 0.8689060211181641, + "learning_rate": 0.00019981438895717656, + "loss": 2.7945, + "step": 486 + }, + { + "epoch": 0.03930271971592285, + "grad_norm": 0.9932593703269958, + "learning_rate": 0.0001998134262957338, + "loss": 2.9041, + "step": 487 + }, + { + "epoch": 0.03938342345250585, + "grad_norm": 0.8496069312095642, + "learning_rate": 0.00019981246114667955, + "loss": 2.8433, + "step": 488 + }, + { + "epoch": 0.03946412718908886, + "grad_norm": 0.8484126925468445, + "learning_rate": 0.00019981149351003786, + "loss": 2.7872, + "step": 489 + }, + { + "epoch": 0.03954483092567186, + "grad_norm": 0.9208858013153076, + "learning_rate": 0.00019981052338583283, + "loss": 2.7776, + "step": 490 + }, + { + "epoch": 0.03962553466225486, + "grad_norm": 0.9305418729782104, + "learning_rate": 0.00019980955077408865, + "loss": 2.7851, + "step": 491 + }, + { + "epoch": 0.03970623839883786, + "grad_norm": 0.9803212881088257, + "learning_rate": 0.00019980857567482955, + "loss": 2.8469, + "step": 492 + }, + { + "epoch": 0.03978694213542087, + "grad_norm": 0.9165790677070618, + "learning_rate": 0.00019980759808807985, + "loss": 2.8513, + "step": 493 + }, + { + "epoch": 0.03986764587200387, + "grad_norm": 0.9153794050216675, + "learning_rate": 0.00019980661801386393, + "loss": 2.8322, + "step": 494 + }, + { + "epoch": 0.039948349608586876, + "grad_norm": 0.89347904920578, + "learning_rate": 0.00019980563545220616, + "loss": 2.8316, + "step": 495 + }, + { + "epoch": 0.04002905334516988, + "grad_norm": 0.9882236123085022, + "learning_rate": 0.00019980465040313105, + "loss": 2.7471, + "step": 496 + }, + { + "epoch": 0.040109757081752885, + "grad_norm": 0.9391099810600281, + "learning_rate": 0.00019980366286666322, + "loss": 2.8182, + "step": 497 + }, + { + "epoch": 0.04019046081833589, + "grad_norm": 1.0155293941497803, + "learning_rate": 0.00019980267284282717, + "loss": 2.8721, + "step": 498 + }, + { + "epoch": 0.040271164554918894, + "grad_norm": 0.9952930212020874, + "learning_rate": 0.00019980168033164765, + "loss": 2.8538, + "step": 499 + }, + { + "epoch": 0.0403518682915019, + "grad_norm": 0.8385666608810425, + "learning_rate": 0.00019980068533314934, + "loss": 2.8242, + "step": 500 + }, + { + "epoch": 0.0404325720280849, + "grad_norm": 0.8747559785842896, + "learning_rate": 0.0001997996878473571, + "loss": 2.7908, + "step": 501 + }, + { + "epoch": 0.04051327576466791, + "grad_norm": 0.9267926216125488, + "learning_rate": 0.00019979868787429575, + "loss": 2.8359, + "step": 502 + }, + { + "epoch": 0.04059397950125091, + "grad_norm": 0.8194155693054199, + "learning_rate": 0.00019979768541399022, + "loss": 2.8161, + "step": 503 + }, + { + "epoch": 0.04067468323783391, + "grad_norm": 0.8923258185386658, + "learning_rate": 0.00019979668046646548, + "loss": 2.7547, + "step": 504 + }, + { + "epoch": 0.04075538697441691, + "grad_norm": 0.8965646028518677, + "learning_rate": 0.00019979567303174663, + "loss": 2.8432, + "step": 505 + }, + { + "epoch": 0.04083609071099992, + "grad_norm": 0.814481794834137, + "learning_rate": 0.0001997946631098587, + "loss": 2.8327, + "step": 506 + }, + { + "epoch": 0.04091679444758292, + "grad_norm": 0.8806928396224976, + "learning_rate": 0.00019979365070082694, + "loss": 2.8573, + "step": 507 + }, + { + "epoch": 0.040997498184165926, + "grad_norm": 0.8546919822692871, + "learning_rate": 0.00019979263580467653, + "loss": 2.8618, + "step": 508 + }, + { + "epoch": 0.04107820192074893, + "grad_norm": 0.8557277321815491, + "learning_rate": 0.00019979161842143274, + "loss": 2.8454, + "step": 509 + }, + { + "epoch": 0.041158905657331935, + "grad_norm": 0.9153180122375488, + "learning_rate": 0.00019979059855112098, + "loss": 2.8027, + "step": 510 + }, + { + "epoch": 0.04123960939391494, + "grad_norm": 0.8616741895675659, + "learning_rate": 0.00019978957619376666, + "loss": 2.7628, + "step": 511 + }, + { + "epoch": 0.04132031313049794, + "grad_norm": 0.8777137398719788, + "learning_rate": 0.00019978855134939524, + "loss": 2.8443, + "step": 512 + }, + { + "epoch": 0.04140101686708095, + "grad_norm": 0.852100133895874, + "learning_rate": 0.0001997875240180323, + "loss": 2.8125, + "step": 513 + }, + { + "epoch": 0.04148172060366395, + "grad_norm": 0.8470742702484131, + "learning_rate": 0.00019978649419970338, + "loss": 2.8139, + "step": 514 + }, + { + "epoch": 0.041562424340246956, + "grad_norm": 0.8890305161476135, + "learning_rate": 0.0001997854618944342, + "loss": 2.8633, + "step": 515 + }, + { + "epoch": 0.04164312807682996, + "grad_norm": 0.8893599510192871, + "learning_rate": 0.00019978442710225043, + "loss": 2.8066, + "step": 516 + }, + { + "epoch": 0.04172383181341296, + "grad_norm": 0.9093891382217407, + "learning_rate": 0.00019978338982317792, + "loss": 2.8026, + "step": 517 + }, + { + "epoch": 0.04180453554999596, + "grad_norm": 0.9775434136390686, + "learning_rate": 0.00019978235005724252, + "loss": 2.849, + "step": 518 + }, + { + "epoch": 0.04188523928657897, + "grad_norm": 1.0014091730117798, + "learning_rate": 0.00019978130780447012, + "loss": 2.8572, + "step": 519 + }, + { + "epoch": 0.04196594302316197, + "grad_norm": 0.8487632870674133, + "learning_rate": 0.00019978026306488668, + "loss": 2.7611, + "step": 520 + }, + { + "epoch": 0.042046646759744975, + "grad_norm": 0.86592698097229, + "learning_rate": 0.00019977921583851825, + "loss": 2.7616, + "step": 521 + }, + { + "epoch": 0.04212735049632798, + "grad_norm": 1.0285916328430176, + "learning_rate": 0.00019977816612539093, + "loss": 2.8049, + "step": 522 + }, + { + "epoch": 0.042208054232910984, + "grad_norm": 0.9716495871543884, + "learning_rate": 0.00019977711392553092, + "loss": 2.8459, + "step": 523 + }, + { + "epoch": 0.04228875796949399, + "grad_norm": 0.8842264413833618, + "learning_rate": 0.0001997760592389644, + "loss": 2.7934, + "step": 524 + }, + { + "epoch": 0.04236946170607699, + "grad_norm": 0.8839964866638184, + "learning_rate": 0.00019977500206571765, + "loss": 2.8135, + "step": 525 + }, + { + "epoch": 0.04245016544266, + "grad_norm": 0.870331346988678, + "learning_rate": 0.00019977394240581705, + "loss": 2.8684, + "step": 526 + }, + { + "epoch": 0.042530869179243, + "grad_norm": 0.8844720125198364, + "learning_rate": 0.000199772880259289, + "loss": 2.7867, + "step": 527 + }, + { + "epoch": 0.042611572915826006, + "grad_norm": 0.9353455901145935, + "learning_rate": 0.00019977181562615994, + "loss": 2.8051, + "step": 528 + }, + { + "epoch": 0.04269227665240901, + "grad_norm": 0.9530816078186035, + "learning_rate": 0.00019977074850645646, + "loss": 2.7915, + "step": 529 + }, + { + "epoch": 0.04277298038899201, + "grad_norm": 0.8984190821647644, + "learning_rate": 0.00019976967890020507, + "loss": 2.7957, + "step": 530 + }, + { + "epoch": 0.04285368412557501, + "grad_norm": 0.9146613478660583, + "learning_rate": 0.00019976860680743252, + "loss": 2.9053, + "step": 531 + }, + { + "epoch": 0.042934387862158016, + "grad_norm": 0.9228026866912842, + "learning_rate": 0.0001997675322281655, + "loss": 2.8578, + "step": 532 + }, + { + "epoch": 0.04301509159874102, + "grad_norm": 0.8266343474388123, + "learning_rate": 0.0001997664551624308, + "loss": 2.7393, + "step": 533 + }, + { + "epoch": 0.043095795335324025, + "grad_norm": 0.9197628498077393, + "learning_rate": 0.0001997653756102552, + "loss": 2.8828, + "step": 534 + }, + { + "epoch": 0.04317649907190703, + "grad_norm": 0.9145991802215576, + "learning_rate": 0.00019976429357166566, + "loss": 2.7767, + "step": 535 + }, + { + "epoch": 0.04325720280849003, + "grad_norm": 0.9123281240463257, + "learning_rate": 0.00019976320904668913, + "loss": 2.7993, + "step": 536 + }, + { + "epoch": 0.04333790654507304, + "grad_norm": 0.8597636818885803, + "learning_rate": 0.00019976212203535266, + "loss": 2.8148, + "step": 537 + }, + { + "epoch": 0.04341861028165604, + "grad_norm": 0.8963296413421631, + "learning_rate": 0.00019976103253768334, + "loss": 2.7722, + "step": 538 + }, + { + "epoch": 0.043499314018239046, + "grad_norm": 0.9480688571929932, + "learning_rate": 0.0001997599405537083, + "loss": 2.8038, + "step": 539 + }, + { + "epoch": 0.04358001775482205, + "grad_norm": 0.8115736842155457, + "learning_rate": 0.00019975884608345476, + "loss": 2.8069, + "step": 540 + }, + { + "epoch": 0.043660721491405055, + "grad_norm": 0.9642506837844849, + "learning_rate": 0.00019975774912695, + "loss": 2.8703, + "step": 541 + }, + { + "epoch": 0.04374142522798805, + "grad_norm": 0.9638697504997253, + "learning_rate": 0.0001997566496842214, + "loss": 2.8223, + "step": 542 + }, + { + "epoch": 0.04382212896457106, + "grad_norm": 0.9478490352630615, + "learning_rate": 0.00019975554775529628, + "loss": 2.8164, + "step": 543 + }, + { + "epoch": 0.04390283270115406, + "grad_norm": 1.1771583557128906, + "learning_rate": 0.00019975444334020215, + "loss": 2.7969, + "step": 544 + }, + { + "epoch": 0.043983536437737066, + "grad_norm": 0.9597339034080505, + "learning_rate": 0.00019975333643896655, + "loss": 2.8025, + "step": 545 + }, + { + "epoch": 0.04406424017432007, + "grad_norm": 0.981595516204834, + "learning_rate": 0.00019975222705161704, + "loss": 2.7994, + "step": 546 + }, + { + "epoch": 0.044144943910903074, + "grad_norm": 0.9581133723258972, + "learning_rate": 0.00019975111517818127, + "loss": 2.802, + "step": 547 + }, + { + "epoch": 0.04422564764748608, + "grad_norm": 0.8643878698348999, + "learning_rate": 0.00019975000081868697, + "loss": 2.7958, + "step": 548 + }, + { + "epoch": 0.04430635138406908, + "grad_norm": 1.2188652753829956, + "learning_rate": 0.0001997488839731619, + "loss": 2.8786, + "step": 549 + }, + { + "epoch": 0.04438705512065209, + "grad_norm": 0.9138071537017822, + "learning_rate": 0.00019974776464163387, + "loss": 2.809, + "step": 550 + }, + { + "epoch": 0.04446775885723509, + "grad_norm": 0.9604587554931641, + "learning_rate": 0.00019974664282413083, + "loss": 2.8009, + "step": 551 + }, + { + "epoch": 0.044548462593818096, + "grad_norm": 1.0271116495132446, + "learning_rate": 0.00019974551852068072, + "loss": 2.8689, + "step": 552 + }, + { + "epoch": 0.0446291663304011, + "grad_norm": 0.9330877065658569, + "learning_rate": 0.00019974439173131155, + "loss": 2.7613, + "step": 553 + }, + { + "epoch": 0.044709870066984105, + "grad_norm": 0.9549325108528137, + "learning_rate": 0.00019974326245605136, + "loss": 2.8314, + "step": 554 + }, + { + "epoch": 0.0447905738035671, + "grad_norm": 0.8928439021110535, + "learning_rate": 0.00019974213069492836, + "loss": 2.8097, + "step": 555 + }, + { + "epoch": 0.044871277540150106, + "grad_norm": 0.8705076575279236, + "learning_rate": 0.00019974099644797075, + "loss": 2.8112, + "step": 556 + }, + { + "epoch": 0.04495198127673311, + "grad_norm": 0.988345742225647, + "learning_rate": 0.00019973985971520676, + "loss": 2.7648, + "step": 557 + }, + { + "epoch": 0.045032685013316115, + "grad_norm": 0.9161957502365112, + "learning_rate": 0.00019973872049666475, + "loss": 2.8691, + "step": 558 + }, + { + "epoch": 0.04511338874989912, + "grad_norm": 0.8404076099395752, + "learning_rate": 0.00019973757879237312, + "loss": 2.7708, + "step": 559 + }, + { + "epoch": 0.045194092486482124, + "grad_norm": 1.05247962474823, + "learning_rate": 0.0001997364346023603, + "loss": 2.8638, + "step": 560 + }, + { + "epoch": 0.04527479622306513, + "grad_norm": 0.9235066175460815, + "learning_rate": 0.00019973528792665483, + "loss": 2.7876, + "step": 561 + }, + { + "epoch": 0.04535549995964813, + "grad_norm": 1.220075249671936, + "learning_rate": 0.00019973413876528526, + "loss": 2.8563, + "step": 562 + }, + { + "epoch": 0.04543620369623114, + "grad_norm": 0.9098384976387024, + "learning_rate": 0.00019973298711828025, + "loss": 2.8427, + "step": 563 + }, + { + "epoch": 0.04551690743281414, + "grad_norm": 0.8792217969894409, + "learning_rate": 0.00019973183298566848, + "loss": 2.8673, + "step": 564 + }, + { + "epoch": 0.045597611169397145, + "grad_norm": 0.9895235896110535, + "learning_rate": 0.00019973067636747875, + "loss": 2.8262, + "step": 565 + }, + { + "epoch": 0.04567831490598015, + "grad_norm": 0.9191479086875916, + "learning_rate": 0.00019972951726373984, + "loss": 2.8005, + "step": 566 + }, + { + "epoch": 0.045759018642563154, + "grad_norm": 0.9631491899490356, + "learning_rate": 0.0001997283556744807, + "loss": 2.8438, + "step": 567 + }, + { + "epoch": 0.04583972237914615, + "grad_norm": 0.8302746415138245, + "learning_rate": 0.00019972719159973024, + "loss": 2.8221, + "step": 568 + }, + { + "epoch": 0.045920426115729156, + "grad_norm": 0.8238534927368164, + "learning_rate": 0.00019972602503951748, + "loss": 2.7674, + "step": 569 + }, + { + "epoch": 0.04600112985231216, + "grad_norm": 0.9675811529159546, + "learning_rate": 0.00019972485599387146, + "loss": 2.8457, + "step": 570 + }, + { + "epoch": 0.046081833588895164, + "grad_norm": 0.8663914203643799, + "learning_rate": 0.00019972368446282134, + "loss": 2.7851, + "step": 571 + }, + { + "epoch": 0.04616253732547817, + "grad_norm": 0.9904592633247375, + "learning_rate": 0.00019972251044639636, + "loss": 2.8792, + "step": 572 + }, + { + "epoch": 0.04624324106206117, + "grad_norm": 0.907600462436676, + "learning_rate": 0.0001997213339446257, + "loss": 2.7991, + "step": 573 + }, + { + "epoch": 0.04632394479864418, + "grad_norm": 0.871362566947937, + "learning_rate": 0.00019972015495753876, + "loss": 2.7959, + "step": 574 + }, + { + "epoch": 0.04640464853522718, + "grad_norm": 0.9664937853813171, + "learning_rate": 0.00019971897348516486, + "loss": 2.7847, + "step": 575 + }, + { + "epoch": 0.046485352271810186, + "grad_norm": 1.0670619010925293, + "learning_rate": 0.0001997177895275335, + "loss": 2.8864, + "step": 576 + }, + { + "epoch": 0.04656605600839319, + "grad_norm": 0.9281025528907776, + "learning_rate": 0.00019971660308467414, + "loss": 2.8568, + "step": 577 + }, + { + "epoch": 0.046646759744976195, + "grad_norm": 0.8964822888374329, + "learning_rate": 0.00019971541415661639, + "loss": 2.7246, + "step": 578 + }, + { + "epoch": 0.0467274634815592, + "grad_norm": 0.8921917676925659, + "learning_rate": 0.00019971422274338985, + "loss": 2.8513, + "step": 579 + }, + { + "epoch": 0.0468081672181422, + "grad_norm": 0.9550159573554993, + "learning_rate": 0.0001997130288450242, + "loss": 2.7615, + "step": 580 + }, + { + "epoch": 0.0468888709547252, + "grad_norm": 0.9330170154571533, + "learning_rate": 0.00019971183246154925, + "loss": 2.9017, + "step": 581 + }, + { + "epoch": 0.046969574691308205, + "grad_norm": 0.9125271439552307, + "learning_rate": 0.00019971063359299477, + "loss": 2.8263, + "step": 582 + }, + { + "epoch": 0.04705027842789121, + "grad_norm": 1.0005927085876465, + "learning_rate": 0.00019970943223939066, + "loss": 2.8371, + "step": 583 + }, + { + "epoch": 0.047130982164474214, + "grad_norm": 1.0333613157272339, + "learning_rate": 0.00019970822840076685, + "loss": 2.8275, + "step": 584 + }, + { + "epoch": 0.04721168590105722, + "grad_norm": 0.8684708476066589, + "learning_rate": 0.00019970702207715334, + "loss": 2.8343, + "step": 585 + }, + { + "epoch": 0.04729238963764022, + "grad_norm": 1.1112761497497559, + "learning_rate": 0.00019970581326858025, + "loss": 2.9012, + "step": 586 + }, + { + "epoch": 0.04737309337422323, + "grad_norm": 1.0187962055206299, + "learning_rate": 0.00019970460197507763, + "loss": 2.8423, + "step": 587 + }, + { + "epoch": 0.04745379711080623, + "grad_norm": 0.9802024960517883, + "learning_rate": 0.00019970338819667567, + "loss": 2.867, + "step": 588 + }, + { + "epoch": 0.047534500847389236, + "grad_norm": 0.9825551509857178, + "learning_rate": 0.00019970217193340467, + "loss": 2.8359, + "step": 589 + }, + { + "epoch": 0.04761520458397224, + "grad_norm": 1.1399210691452026, + "learning_rate": 0.00019970095318529494, + "loss": 2.8356, + "step": 590 + }, + { + "epoch": 0.047695908320555244, + "grad_norm": 1.0373995304107666, + "learning_rate": 0.00019969973195237684, + "loss": 2.8005, + "step": 591 + }, + { + "epoch": 0.04777661205713825, + "grad_norm": 1.133596420288086, + "learning_rate": 0.00019969850823468077, + "loss": 2.8778, + "step": 592 + }, + { + "epoch": 0.047857315793721246, + "grad_norm": 1.0187327861785889, + "learning_rate": 0.00019969728203223728, + "loss": 2.8291, + "step": 593 + }, + { + "epoch": 0.04793801953030425, + "grad_norm": 1.0588128566741943, + "learning_rate": 0.00019969605334507688, + "loss": 2.9396, + "step": 594 + }, + { + "epoch": 0.048018723266887255, + "grad_norm": 0.8783230781555176, + "learning_rate": 0.00019969482217323026, + "loss": 2.8076, + "step": 595 + }, + { + "epoch": 0.04809942700347026, + "grad_norm": 1.0500195026397705, + "learning_rate": 0.00019969358851672805, + "loss": 2.9099, + "step": 596 + }, + { + "epoch": 0.04818013074005326, + "grad_norm": 0.9523593187332153, + "learning_rate": 0.000199692352375601, + "loss": 2.7448, + "step": 597 + }, + { + "epoch": 0.04826083447663627, + "grad_norm": 1.0008500814437866, + "learning_rate": 0.00019969111374987995, + "loss": 2.8212, + "step": 598 + }, + { + "epoch": 0.04834153821321927, + "grad_norm": 0.8992626070976257, + "learning_rate": 0.00019968987263959575, + "loss": 2.8698, + "step": 599 + }, + { + "epoch": 0.048422241949802276, + "grad_norm": 0.9914852380752563, + "learning_rate": 0.00019968862904477935, + "loss": 2.8221, + "step": 600 + }, + { + "epoch": 0.04850294568638528, + "grad_norm": 0.9633241295814514, + "learning_rate": 0.00019968738296546168, + "loss": 2.8835, + "step": 601 + }, + { + "epoch": 0.048583649422968285, + "grad_norm": 1.055831789970398, + "learning_rate": 0.00019968613440167387, + "loss": 2.8781, + "step": 602 + }, + { + "epoch": 0.04866435315955129, + "grad_norm": 0.913856029510498, + "learning_rate": 0.000199684883353447, + "loss": 2.7863, + "step": 603 + }, + { + "epoch": 0.048745056896134294, + "grad_norm": 0.8429243564605713, + "learning_rate": 0.00019968362982081226, + "loss": 2.7753, + "step": 604 + }, + { + "epoch": 0.0488257606327173, + "grad_norm": 0.9324761629104614, + "learning_rate": 0.0001996823738038009, + "loss": 2.8058, + "step": 605 + }, + { + "epoch": 0.048906464369300295, + "grad_norm": 1.0004981756210327, + "learning_rate": 0.0001996811153024442, + "loss": 2.8537, + "step": 606 + }, + { + "epoch": 0.0489871681058833, + "grad_norm": 0.9438043236732483, + "learning_rate": 0.00019967985431677354, + "loss": 2.8828, + "step": 607 + }, + { + "epoch": 0.049067871842466304, + "grad_norm": 0.9359340071678162, + "learning_rate": 0.00019967859084682034, + "loss": 2.8149, + "step": 608 + }, + { + "epoch": 0.04914857557904931, + "grad_norm": 1.0400227308273315, + "learning_rate": 0.00019967732489261609, + "loss": 2.8489, + "step": 609 + }, + { + "epoch": 0.04922927931563231, + "grad_norm": 0.8978031277656555, + "learning_rate": 0.00019967605645419237, + "loss": 2.8599, + "step": 610 + }, + { + "epoch": 0.04930998305221532, + "grad_norm": 0.9982689619064331, + "learning_rate": 0.00019967478553158073, + "loss": 2.9024, + "step": 611 + }, + { + "epoch": 0.04939068678879832, + "grad_norm": 1.0695222616195679, + "learning_rate": 0.00019967351212481292, + "loss": 2.8483, + "step": 612 + }, + { + "epoch": 0.049471390525381326, + "grad_norm": 1.0615525245666504, + "learning_rate": 0.0001996722362339206, + "loss": 2.806, + "step": 613 + }, + { + "epoch": 0.04955209426196433, + "grad_norm": 0.9624890089035034, + "learning_rate": 0.0001996709578589356, + "loss": 2.8641, + "step": 614 + }, + { + "epoch": 0.049632797998547334, + "grad_norm": 0.9156595468521118, + "learning_rate": 0.00019966967699988985, + "loss": 2.7991, + "step": 615 + }, + { + "epoch": 0.04971350173513034, + "grad_norm": 0.8687645196914673, + "learning_rate": 0.00019966839365681517, + "loss": 2.774, + "step": 616 + }, + { + "epoch": 0.04979420547171334, + "grad_norm": 0.9175437688827515, + "learning_rate": 0.00019966710782974359, + "loss": 2.8064, + "step": 617 + }, + { + "epoch": 0.04987490920829635, + "grad_norm": 0.8897463083267212, + "learning_rate": 0.00019966581951870715, + "loss": 2.8487, + "step": 618 + }, + { + "epoch": 0.049955612944879345, + "grad_norm": 0.8908397555351257, + "learning_rate": 0.00019966452872373795, + "loss": 2.8523, + "step": 619 + }, + { + "epoch": 0.05003631668146235, + "grad_norm": 0.95484858751297, + "learning_rate": 0.00019966323544486818, + "loss": 2.8471, + "step": 620 + }, + { + "epoch": 0.050117020418045354, + "grad_norm": 0.9995831251144409, + "learning_rate": 0.00019966193968213008, + "loss": 2.8341, + "step": 621 + }, + { + "epoch": 0.05019772415462836, + "grad_norm": 0.8731706142425537, + "learning_rate": 0.00019966064143555587, + "loss": 2.8491, + "step": 622 + }, + { + "epoch": 0.05027842789121136, + "grad_norm": 0.9213298559188843, + "learning_rate": 0.000199659340705178, + "loss": 2.8256, + "step": 623 + }, + { + "epoch": 0.050359131627794367, + "grad_norm": 0.9565179347991943, + "learning_rate": 0.00019965803749102885, + "loss": 2.8177, + "step": 624 + }, + { + "epoch": 0.05043983536437737, + "grad_norm": 1.0076881647109985, + "learning_rate": 0.00019965673179314086, + "loss": 2.7812, + "step": 625 + }, + { + "epoch": 0.050520539100960375, + "grad_norm": 0.989647388458252, + "learning_rate": 0.00019965542361154666, + "loss": 2.9226, + "step": 626 + }, + { + "epoch": 0.05060124283754338, + "grad_norm": 0.9671580791473389, + "learning_rate": 0.00019965411294627878, + "loss": 2.8204, + "step": 627 + }, + { + "epoch": 0.050681946574126384, + "grad_norm": 0.9275986552238464, + "learning_rate": 0.00019965279979736989, + "loss": 2.8481, + "step": 628 + }, + { + "epoch": 0.05076265031070939, + "grad_norm": 0.9949543476104736, + "learning_rate": 0.00019965148416485273, + "loss": 2.8606, + "step": 629 + }, + { + "epoch": 0.05084335404729239, + "grad_norm": 0.9506482481956482, + "learning_rate": 0.0001996501660487601, + "loss": 2.8088, + "step": 630 + }, + { + "epoch": 0.0509240577838754, + "grad_norm": 0.9147887229919434, + "learning_rate": 0.00019964884544912488, + "loss": 2.7997, + "step": 631 + }, + { + "epoch": 0.051004761520458394, + "grad_norm": 0.8964840769767761, + "learning_rate": 0.00019964752236597993, + "loss": 2.8342, + "step": 632 + }, + { + "epoch": 0.0510854652570414, + "grad_norm": 0.931811511516571, + "learning_rate": 0.00019964619679935824, + "loss": 2.8229, + "step": 633 + }, + { + "epoch": 0.0511661689936244, + "grad_norm": 0.8634423017501831, + "learning_rate": 0.00019964486874929282, + "loss": 2.803, + "step": 634 + }, + { + "epoch": 0.05124687273020741, + "grad_norm": 0.892223596572876, + "learning_rate": 0.00019964353821581683, + "loss": 2.802, + "step": 635 + }, + { + "epoch": 0.05132757646679041, + "grad_norm": 0.8373630046844482, + "learning_rate": 0.00019964220519896338, + "loss": 2.7693, + "step": 636 + }, + { + "epoch": 0.051408280203373416, + "grad_norm": 0.8729730248451233, + "learning_rate": 0.0001996408696987657, + "loss": 2.8467, + "step": 637 + }, + { + "epoch": 0.05148898393995642, + "grad_norm": 0.8994413614273071, + "learning_rate": 0.0001996395317152571, + "loss": 2.8837, + "step": 638 + }, + { + "epoch": 0.051569687676539425, + "grad_norm": 0.9146113395690918, + "learning_rate": 0.0001996381912484709, + "loss": 2.8189, + "step": 639 + }, + { + "epoch": 0.05165039141312243, + "grad_norm": 0.9330562353134155, + "learning_rate": 0.00019963684829844052, + "loss": 2.7873, + "step": 640 + }, + { + "epoch": 0.05173109514970543, + "grad_norm": 0.9076224565505981, + "learning_rate": 0.00019963550286519944, + "loss": 2.802, + "step": 641 + }, + { + "epoch": 0.05181179888628844, + "grad_norm": 0.9580704569816589, + "learning_rate": 0.00019963415494878115, + "loss": 2.8173, + "step": 642 + }, + { + "epoch": 0.05189250262287144, + "grad_norm": 0.9291248917579651, + "learning_rate": 0.00019963280454921928, + "loss": 2.7866, + "step": 643 + }, + { + "epoch": 0.05197320635945444, + "grad_norm": 0.9815296530723572, + "learning_rate": 0.0001996314516665475, + "loss": 2.7903, + "step": 644 + }, + { + "epoch": 0.052053910096037444, + "grad_norm": 0.9461820721626282, + "learning_rate": 0.00019963009630079949, + "loss": 2.7854, + "step": 645 + }, + { + "epoch": 0.05213461383262045, + "grad_norm": 0.9660771489143372, + "learning_rate": 0.00019962873845200908, + "loss": 2.9187, + "step": 646 + }, + { + "epoch": 0.05221531756920345, + "grad_norm": 0.8987802863121033, + "learning_rate": 0.00019962737812021002, + "loss": 2.8854, + "step": 647 + }, + { + "epoch": 0.05229602130578646, + "grad_norm": 0.9810429215431213, + "learning_rate": 0.0001996260153054363, + "loss": 2.8974, + "step": 648 + }, + { + "epoch": 0.05237672504236946, + "grad_norm": 0.8185738325119019, + "learning_rate": 0.00019962465000772183, + "loss": 2.797, + "step": 649 + }, + { + "epoch": 0.052457428778952465, + "grad_norm": 0.8976237773895264, + "learning_rate": 0.0001996232822271007, + "loss": 2.8557, + "step": 650 + }, + { + "epoch": 0.05253813251553547, + "grad_norm": 0.8591496348381042, + "learning_rate": 0.0001996219119636069, + "loss": 2.8521, + "step": 651 + }, + { + "epoch": 0.052618836252118474, + "grad_norm": 0.8907031416893005, + "learning_rate": 0.00019962053921727472, + "loss": 2.8117, + "step": 652 + }, + { + "epoch": 0.05269953998870148, + "grad_norm": 0.9034241437911987, + "learning_rate": 0.00019961916398813823, + "loss": 2.741, + "step": 653 + }, + { + "epoch": 0.05278024372528448, + "grad_norm": 0.8284802436828613, + "learning_rate": 0.00019961778627623176, + "loss": 2.776, + "step": 654 + }, + { + "epoch": 0.05286094746186749, + "grad_norm": 0.8459529876708984, + "learning_rate": 0.00019961640608158967, + "loss": 2.8027, + "step": 655 + }, + { + "epoch": 0.05294165119845049, + "grad_norm": 0.9720042943954468, + "learning_rate": 0.00019961502340424636, + "loss": 2.9086, + "step": 656 + }, + { + "epoch": 0.05302235493503349, + "grad_norm": 0.8581427335739136, + "learning_rate": 0.00019961363824423626, + "loss": 2.8347, + "step": 657 + }, + { + "epoch": 0.05310305867161649, + "grad_norm": 0.9545331597328186, + "learning_rate": 0.00019961225060159386, + "loss": 2.828, + "step": 658 + }, + { + "epoch": 0.0531837624081995, + "grad_norm": 1.0303562879562378, + "learning_rate": 0.00019961086047635385, + "loss": 2.8461, + "step": 659 + }, + { + "epoch": 0.0532644661447825, + "grad_norm": 0.86605304479599, + "learning_rate": 0.0001996094678685508, + "loss": 2.8355, + "step": 660 + }, + { + "epoch": 0.053345169881365506, + "grad_norm": 0.8146334886550903, + "learning_rate": 0.0001996080727782194, + "loss": 2.8638, + "step": 661 + }, + { + "epoch": 0.05342587361794851, + "grad_norm": 0.9434560537338257, + "learning_rate": 0.00019960667520539446, + "loss": 2.8196, + "step": 662 + }, + { + "epoch": 0.053506577354531515, + "grad_norm": 0.9362602829933167, + "learning_rate": 0.00019960527515011084, + "loss": 2.8452, + "step": 663 + }, + { + "epoch": 0.05358728109111452, + "grad_norm": 0.828713059425354, + "learning_rate": 0.00019960387261240334, + "loss": 2.8079, + "step": 664 + }, + { + "epoch": 0.053667984827697524, + "grad_norm": 0.8610214591026306, + "learning_rate": 0.00019960246759230697, + "loss": 2.8197, + "step": 665 + }, + { + "epoch": 0.05374868856428053, + "grad_norm": 0.8913124799728394, + "learning_rate": 0.00019960106008985674, + "loss": 2.8392, + "step": 666 + }, + { + "epoch": 0.05382939230086353, + "grad_norm": 0.8109759092330933, + "learning_rate": 0.00019959965010508778, + "loss": 2.7961, + "step": 667 + }, + { + "epoch": 0.05391009603744654, + "grad_norm": 0.8714832663536072, + "learning_rate": 0.00019959823763803514, + "loss": 2.7984, + "step": 668 + }, + { + "epoch": 0.05399079977402954, + "grad_norm": 0.9008125066757202, + "learning_rate": 0.00019959682268873408, + "loss": 2.8319, + "step": 669 + }, + { + "epoch": 0.05407150351061254, + "grad_norm": 0.8718584775924683, + "learning_rate": 0.00019959540525721985, + "loss": 2.7973, + "step": 670 + }, + { + "epoch": 0.05415220724719554, + "grad_norm": 0.8666327595710754, + "learning_rate": 0.00019959398534352774, + "loss": 2.8296, + "step": 671 + }, + { + "epoch": 0.05423291098377855, + "grad_norm": 0.9755229949951172, + "learning_rate": 0.00019959256294769322, + "loss": 2.8358, + "step": 672 + }, + { + "epoch": 0.05431361472036155, + "grad_norm": 1.193708062171936, + "learning_rate": 0.0001995911380697517, + "loss": 2.7672, + "step": 673 + }, + { + "epoch": 0.054394318456944556, + "grad_norm": 0.9104088544845581, + "learning_rate": 0.00019958971070973866, + "loss": 2.8389, + "step": 674 + }, + { + "epoch": 0.05447502219352756, + "grad_norm": 0.9266251921653748, + "learning_rate": 0.0001995882808676897, + "loss": 2.8226, + "step": 675 + }, + { + "epoch": 0.054555725930110564, + "grad_norm": 1.1161282062530518, + "learning_rate": 0.00019958684854364046, + "loss": 2.8236, + "step": 676 + }, + { + "epoch": 0.05463642966669357, + "grad_norm": 0.9200586080551147, + "learning_rate": 0.00019958541373762666, + "loss": 2.8074, + "step": 677 + }, + { + "epoch": 0.05471713340327657, + "grad_norm": 1.0372560024261475, + "learning_rate": 0.000199583976449684, + "loss": 2.815, + "step": 678 + }, + { + "epoch": 0.05479783713985958, + "grad_norm": 0.8822301030158997, + "learning_rate": 0.0001995825366798483, + "loss": 2.7985, + "step": 679 + }, + { + "epoch": 0.05487854087644258, + "grad_norm": 0.9226076006889343, + "learning_rate": 0.00019958109442815553, + "loss": 2.7649, + "step": 680 + }, + { + "epoch": 0.054959244613025586, + "grad_norm": 0.8769479990005493, + "learning_rate": 0.00019957964969464156, + "loss": 2.8483, + "step": 681 + }, + { + "epoch": 0.05503994834960859, + "grad_norm": 0.8601027727127075, + "learning_rate": 0.0001995782024793424, + "loss": 2.8072, + "step": 682 + }, + { + "epoch": 0.05512065208619159, + "grad_norm": 0.9684911370277405, + "learning_rate": 0.00019957675278229416, + "loss": 2.8693, + "step": 683 + }, + { + "epoch": 0.05520135582277459, + "grad_norm": 0.9119890928268433, + "learning_rate": 0.00019957530060353294, + "loss": 2.853, + "step": 684 + }, + { + "epoch": 0.055282059559357596, + "grad_norm": 0.9588247537612915, + "learning_rate": 0.0001995738459430949, + "loss": 2.8435, + "step": 685 + }, + { + "epoch": 0.0553627632959406, + "grad_norm": 0.8317441940307617, + "learning_rate": 0.00019957238880101636, + "loss": 2.8208, + "step": 686 + }, + { + "epoch": 0.055443467032523605, + "grad_norm": 0.92695152759552, + "learning_rate": 0.00019957092917733361, + "loss": 2.8378, + "step": 687 + }, + { + "epoch": 0.05552417076910661, + "grad_norm": 0.8908315300941467, + "learning_rate": 0.00019956946707208305, + "loss": 2.8041, + "step": 688 + }, + { + "epoch": 0.055604874505689614, + "grad_norm": 0.9787055253982544, + "learning_rate": 0.00019956800248530107, + "loss": 2.8604, + "step": 689 + }, + { + "epoch": 0.05568557824227262, + "grad_norm": 0.8707631826400757, + "learning_rate": 0.00019956653541702415, + "loss": 2.7763, + "step": 690 + }, + { + "epoch": 0.05576628197885562, + "grad_norm": 1.0059715509414673, + "learning_rate": 0.00019956506586728896, + "loss": 2.8267, + "step": 691 + }, + { + "epoch": 0.05584698571543863, + "grad_norm": 0.88490891456604, + "learning_rate": 0.00019956359383613203, + "loss": 2.8278, + "step": 692 + }, + { + "epoch": 0.05592768945202163, + "grad_norm": 0.9527923464775085, + "learning_rate": 0.00019956211932359007, + "loss": 2.8251, + "step": 693 + }, + { + "epoch": 0.056008393188604635, + "grad_norm": 0.9612617492675781, + "learning_rate": 0.00019956064232969987, + "loss": 2.8148, + "step": 694 + }, + { + "epoch": 0.05608909692518763, + "grad_norm": 0.9261285066604614, + "learning_rate": 0.0001995591628544982, + "loss": 2.8176, + "step": 695 + }, + { + "epoch": 0.05616980066177064, + "grad_norm": 0.9766250252723694, + "learning_rate": 0.0001995576808980219, + "loss": 2.7968, + "step": 696 + }, + { + "epoch": 0.05625050439835364, + "grad_norm": 0.9287495017051697, + "learning_rate": 0.00019955619646030802, + "loss": 2.7679, + "step": 697 + }, + { + "epoch": 0.056331208134936646, + "grad_norm": 0.9182924032211304, + "learning_rate": 0.00019955470954139345, + "loss": 2.8295, + "step": 698 + }, + { + "epoch": 0.05641191187151965, + "grad_norm": 0.8650663495063782, + "learning_rate": 0.00019955322014131524, + "loss": 2.7928, + "step": 699 + }, + { + "epoch": 0.056492615608102655, + "grad_norm": 0.9543934464454651, + "learning_rate": 0.00019955172826011062, + "loss": 2.8049, + "step": 700 + }, + { + "epoch": 0.05657331934468566, + "grad_norm": 0.9060636162757874, + "learning_rate": 0.00019955023389781664, + "loss": 2.871, + "step": 701 + }, + { + "epoch": 0.05665402308126866, + "grad_norm": 0.9824137091636658, + "learning_rate": 0.00019954873705447065, + "loss": 2.816, + "step": 702 + }, + { + "epoch": 0.05673472681785167, + "grad_norm": 0.8831053972244263, + "learning_rate": 0.00019954723773010988, + "loss": 2.8207, + "step": 703 + }, + { + "epoch": 0.05681543055443467, + "grad_norm": 0.9603390693664551, + "learning_rate": 0.00019954573592477173, + "loss": 2.831, + "step": 704 + }, + { + "epoch": 0.056896134291017676, + "grad_norm": 0.911556601524353, + "learning_rate": 0.00019954423163849364, + "loss": 2.7679, + "step": 705 + }, + { + "epoch": 0.05697683802760068, + "grad_norm": 0.8558745384216309, + "learning_rate": 0.00019954272487131305, + "loss": 2.7934, + "step": 706 + }, + { + "epoch": 0.057057541764183685, + "grad_norm": 1.0175282955169678, + "learning_rate": 0.00019954121562326758, + "loss": 2.905, + "step": 707 + }, + { + "epoch": 0.05713824550076668, + "grad_norm": 0.9480875730514526, + "learning_rate": 0.00019953970389439483, + "loss": 2.85, + "step": 708 + }, + { + "epoch": 0.05721894923734969, + "grad_norm": 0.9271003603935242, + "learning_rate": 0.0001995381896847324, + "loss": 2.8237, + "step": 709 + }, + { + "epoch": 0.05729965297393269, + "grad_norm": 0.8439653515815735, + "learning_rate": 0.00019953667299431815, + "loss": 2.821, + "step": 710 + }, + { + "epoch": 0.057380356710515695, + "grad_norm": 0.9750552177429199, + "learning_rate": 0.0001995351538231898, + "loss": 2.8613, + "step": 711 + }, + { + "epoch": 0.0574610604470987, + "grad_norm": 0.9409266710281372, + "learning_rate": 0.0001995336321713852, + "loss": 2.7876, + "step": 712 + }, + { + "epoch": 0.057541764183681704, + "grad_norm": 0.811138927936554, + "learning_rate": 0.00019953210803894233, + "loss": 2.7957, + "step": 713 + }, + { + "epoch": 0.05762246792026471, + "grad_norm": 0.9504825472831726, + "learning_rate": 0.00019953058142589916, + "loss": 2.8536, + "step": 714 + }, + { + "epoch": 0.05770317165684771, + "grad_norm": 0.8183554410934448, + "learning_rate": 0.00019952905233229368, + "loss": 2.7697, + "step": 715 + }, + { + "epoch": 0.05778387539343072, + "grad_norm": 1.1146113872528076, + "learning_rate": 0.0001995275207581641, + "loss": 2.8629, + "step": 716 + }, + { + "epoch": 0.05786457913001372, + "grad_norm": 0.8797986507415771, + "learning_rate": 0.00019952598670354852, + "loss": 2.7962, + "step": 717 + }, + { + "epoch": 0.057945282866596726, + "grad_norm": 0.8771101832389832, + "learning_rate": 0.00019952445016848517, + "loss": 2.8323, + "step": 718 + }, + { + "epoch": 0.05802598660317973, + "grad_norm": 0.9003355503082275, + "learning_rate": 0.00019952291115301235, + "loss": 2.777, + "step": 719 + }, + { + "epoch": 0.058106690339762734, + "grad_norm": 0.846125602722168, + "learning_rate": 0.00019952136965716846, + "loss": 2.7875, + "step": 720 + }, + { + "epoch": 0.05818739407634573, + "grad_norm": 0.908833920955658, + "learning_rate": 0.00019951982568099187, + "loss": 2.7975, + "step": 721 + }, + { + "epoch": 0.058268097812928736, + "grad_norm": 0.8616230487823486, + "learning_rate": 0.00019951827922452106, + "loss": 2.7486, + "step": 722 + }, + { + "epoch": 0.05834880154951174, + "grad_norm": 0.8791850805282593, + "learning_rate": 0.00019951673028779462, + "loss": 2.8301, + "step": 723 + }, + { + "epoch": 0.058429505286094745, + "grad_norm": 0.9437321424484253, + "learning_rate": 0.00019951517887085112, + "loss": 2.7956, + "step": 724 + }, + { + "epoch": 0.05851020902267775, + "grad_norm": 0.9263394474983215, + "learning_rate": 0.00019951362497372922, + "loss": 2.867, + "step": 725 + }, + { + "epoch": 0.05859091275926075, + "grad_norm": 0.9442462921142578, + "learning_rate": 0.00019951206859646764, + "loss": 2.8447, + "step": 726 + }, + { + "epoch": 0.05867161649584376, + "grad_norm": 0.9286711812019348, + "learning_rate": 0.0001995105097391052, + "loss": 2.7588, + "step": 727 + }, + { + "epoch": 0.05875232023242676, + "grad_norm": 0.9338774085044861, + "learning_rate": 0.00019950894840168072, + "loss": 2.7394, + "step": 728 + }, + { + "epoch": 0.058833023969009766, + "grad_norm": 0.8880760073661804, + "learning_rate": 0.00019950738458423314, + "loss": 2.7949, + "step": 729 + }, + { + "epoch": 0.05891372770559277, + "grad_norm": 1.0091183185577393, + "learning_rate": 0.00019950581828680143, + "loss": 2.8633, + "step": 730 + }, + { + "epoch": 0.058994431442175775, + "grad_norm": 0.8657729625701904, + "learning_rate": 0.0001995042495094246, + "loss": 2.8649, + "step": 731 + }, + { + "epoch": 0.05907513517875878, + "grad_norm": 1.0084047317504883, + "learning_rate": 0.00019950267825214176, + "loss": 2.8422, + "step": 732 + }, + { + "epoch": 0.059155838915341784, + "grad_norm": 0.9096506237983704, + "learning_rate": 0.00019950110451499208, + "loss": 2.7908, + "step": 733 + }, + { + "epoch": 0.05923654265192478, + "grad_norm": 1.1338937282562256, + "learning_rate": 0.0001994995282980148, + "loss": 2.8093, + "step": 734 + }, + { + "epoch": 0.059317246388507786, + "grad_norm": 0.8813811540603638, + "learning_rate": 0.00019949794960124915, + "loss": 2.8866, + "step": 735 + }, + { + "epoch": 0.05939795012509079, + "grad_norm": 0.8457592129707336, + "learning_rate": 0.00019949636842473453, + "loss": 2.7744, + "step": 736 + }, + { + "epoch": 0.059478653861673794, + "grad_norm": 0.8731856346130371, + "learning_rate": 0.0001994947847685103, + "loss": 2.7822, + "step": 737 + }, + { + "epoch": 0.0595593575982568, + "grad_norm": 0.8915185332298279, + "learning_rate": 0.00019949319863261597, + "loss": 2.773, + "step": 738 + }, + { + "epoch": 0.0596400613348398, + "grad_norm": 0.9478987455368042, + "learning_rate": 0.00019949161001709106, + "loss": 2.8462, + "step": 739 + }, + { + "epoch": 0.05972076507142281, + "grad_norm": 0.8903716206550598, + "learning_rate": 0.00019949001892197515, + "loss": 2.7741, + "step": 740 + }, + { + "epoch": 0.05980146880800581, + "grad_norm": 0.8870117664337158, + "learning_rate": 0.00019948842534730786, + "loss": 2.8255, + "step": 741 + }, + { + "epoch": 0.059882172544588816, + "grad_norm": 1.0766080617904663, + "learning_rate": 0.00019948682929312898, + "loss": 2.8865, + "step": 742 + }, + { + "epoch": 0.05996287628117182, + "grad_norm": 0.846447229385376, + "learning_rate": 0.00019948523075947824, + "loss": 2.8441, + "step": 743 + }, + { + "epoch": 0.060043580017754825, + "grad_norm": 0.9847991466522217, + "learning_rate": 0.00019948362974639552, + "loss": 2.8099, + "step": 744 + }, + { + "epoch": 0.06012428375433783, + "grad_norm": 0.9170514941215515, + "learning_rate": 0.00019948202625392068, + "loss": 2.8797, + "step": 745 + }, + { + "epoch": 0.060204987490920826, + "grad_norm": 0.8564898371696472, + "learning_rate": 0.0001994804202820937, + "loss": 2.7993, + "step": 746 + }, + { + "epoch": 0.06028569122750383, + "grad_norm": 0.8527392148971558, + "learning_rate": 0.00019947881183095457, + "loss": 2.7816, + "step": 747 + }, + { + "epoch": 0.060366394964086835, + "grad_norm": 0.9170876145362854, + "learning_rate": 0.00019947720090054342, + "loss": 2.8031, + "step": 748 + }, + { + "epoch": 0.06044709870066984, + "grad_norm": 0.8891414403915405, + "learning_rate": 0.0001994755874909004, + "loss": 2.8072, + "step": 749 + }, + { + "epoch": 0.060527802437252844, + "grad_norm": 0.8853670358657837, + "learning_rate": 0.0001994739716020657, + "loss": 2.8857, + "step": 750 + }, + { + "epoch": 0.06060850617383585, + "grad_norm": 0.9011211395263672, + "learning_rate": 0.0001994723532340796, + "loss": 2.8519, + "step": 751 + }, + { + "epoch": 0.06068920991041885, + "grad_norm": 0.8843330144882202, + "learning_rate": 0.00019947073238698243, + "loss": 2.7882, + "step": 752 + }, + { + "epoch": 0.06076991364700186, + "grad_norm": 0.8712944984436035, + "learning_rate": 0.00019946910906081463, + "loss": 2.791, + "step": 753 + }, + { + "epoch": 0.06085061738358486, + "grad_norm": 0.8296090364456177, + "learning_rate": 0.00019946748325561656, + "loss": 2.8073, + "step": 754 + }, + { + "epoch": 0.060931321120167865, + "grad_norm": 0.9239117503166199, + "learning_rate": 0.00019946585497142885, + "loss": 2.8209, + "step": 755 + }, + { + "epoch": 0.06101202485675087, + "grad_norm": 0.8885170221328735, + "learning_rate": 0.000199464224208292, + "loss": 2.8391, + "step": 756 + }, + { + "epoch": 0.061092728593333874, + "grad_norm": 0.933720588684082, + "learning_rate": 0.0001994625909662467, + "loss": 2.7635, + "step": 757 + }, + { + "epoch": 0.06117343232991688, + "grad_norm": 0.9751253724098206, + "learning_rate": 0.00019946095524533362, + "loss": 2.7933, + "step": 758 + }, + { + "epoch": 0.061254136066499876, + "grad_norm": 0.9469670057296753, + "learning_rate": 0.00019945931704559353, + "loss": 2.7652, + "step": 759 + }, + { + "epoch": 0.06133483980308288, + "grad_norm": 0.8559684157371521, + "learning_rate": 0.00019945767636706728, + "loss": 2.8258, + "step": 760 + }, + { + "epoch": 0.061415543539665884, + "grad_norm": 1.021478295326233, + "learning_rate": 0.00019945603320979574, + "loss": 2.8047, + "step": 761 + }, + { + "epoch": 0.06149624727624889, + "grad_norm": 0.8421681523323059, + "learning_rate": 0.00019945438757381986, + "loss": 2.8233, + "step": 762 + }, + { + "epoch": 0.06157695101283189, + "grad_norm": 0.900654137134552, + "learning_rate": 0.0001994527394591807, + "loss": 2.7591, + "step": 763 + }, + { + "epoch": 0.0616576547494149, + "grad_norm": 0.878300666809082, + "learning_rate": 0.0001994510888659193, + "loss": 2.715, + "step": 764 + }, + { + "epoch": 0.0617383584859979, + "grad_norm": 0.9170855283737183, + "learning_rate": 0.00019944943579407678, + "loss": 2.8604, + "step": 765 + }, + { + "epoch": 0.061819062222580906, + "grad_norm": 0.8532859683036804, + "learning_rate": 0.00019944778024369434, + "loss": 2.8124, + "step": 766 + }, + { + "epoch": 0.06189976595916391, + "grad_norm": 0.8549049496650696, + "learning_rate": 0.00019944612221481332, + "loss": 2.8066, + "step": 767 + }, + { + "epoch": 0.061980469695746915, + "grad_norm": 0.9602857828140259, + "learning_rate": 0.00019944446170747492, + "loss": 2.8424, + "step": 768 + }, + { + "epoch": 0.06206117343232992, + "grad_norm": 0.910953164100647, + "learning_rate": 0.0001994427987217206, + "loss": 2.8093, + "step": 769 + }, + { + "epoch": 0.06214187716891292, + "grad_norm": 0.8536386489868164, + "learning_rate": 0.0001994411332575918, + "loss": 2.802, + "step": 770 + }, + { + "epoch": 0.06222258090549593, + "grad_norm": 0.9166232347488403, + "learning_rate": 0.00019943946531513, + "loss": 2.783, + "step": 771 + }, + { + "epoch": 0.062303284642078925, + "grad_norm": 0.9954056739807129, + "learning_rate": 0.00019943779489437678, + "loss": 2.8198, + "step": 772 + }, + { + "epoch": 0.06238398837866193, + "grad_norm": 0.8527171015739441, + "learning_rate": 0.0001994361219953738, + "loss": 2.8159, + "step": 773 + }, + { + "epoch": 0.062464692115244934, + "grad_norm": 0.8951592445373535, + "learning_rate": 0.00019943444661816274, + "loss": 2.7969, + "step": 774 + }, + { + "epoch": 0.06254539585182795, + "grad_norm": 0.9348207116127014, + "learning_rate": 0.00019943276876278532, + "loss": 2.8403, + "step": 775 + }, + { + "epoch": 0.06262609958841095, + "grad_norm": 0.866318941116333, + "learning_rate": 0.00019943108842928342, + "loss": 2.7886, + "step": 776 + }, + { + "epoch": 0.06270680332499395, + "grad_norm": 0.8571285605430603, + "learning_rate": 0.00019942940561769884, + "loss": 2.771, + "step": 777 + }, + { + "epoch": 0.06278750706157694, + "grad_norm": 0.8384295105934143, + "learning_rate": 0.00019942772032807357, + "loss": 2.7885, + "step": 778 + }, + { + "epoch": 0.06286821079815995, + "grad_norm": 0.9934808611869812, + "learning_rate": 0.00019942603256044961, + "loss": 2.8399, + "step": 779 + }, + { + "epoch": 0.06294891453474295, + "grad_norm": 0.8275915384292603, + "learning_rate": 0.00019942434231486902, + "loss": 2.8983, + "step": 780 + }, + { + "epoch": 0.06302961827132596, + "grad_norm": 0.9073596000671387, + "learning_rate": 0.0001994226495913739, + "loss": 2.7886, + "step": 781 + }, + { + "epoch": 0.06311032200790896, + "grad_norm": 0.9091461300849915, + "learning_rate": 0.00019942095439000646, + "loss": 2.814, + "step": 782 + }, + { + "epoch": 0.06319102574449197, + "grad_norm": 0.9356934428215027, + "learning_rate": 0.000199419256710809, + "loss": 2.8238, + "step": 783 + }, + { + "epoch": 0.06327172948107497, + "grad_norm": 0.883514940738678, + "learning_rate": 0.00019941755655382374, + "loss": 2.7912, + "step": 784 + }, + { + "epoch": 0.06335243321765797, + "grad_norm": 0.8770506381988525, + "learning_rate": 0.00019941585391909308, + "loss": 2.7774, + "step": 785 + }, + { + "epoch": 0.06343313695424098, + "grad_norm": 0.8891726136207581, + "learning_rate": 0.00019941414880665948, + "loss": 2.7975, + "step": 786 + }, + { + "epoch": 0.06351384069082398, + "grad_norm": 0.9280585050582886, + "learning_rate": 0.00019941244121656545, + "loss": 2.9468, + "step": 787 + }, + { + "epoch": 0.06359454442740699, + "grad_norm": 0.8545510768890381, + "learning_rate": 0.00019941073114885347, + "loss": 2.8165, + "step": 788 + }, + { + "epoch": 0.06367524816398999, + "grad_norm": 0.8631312847137451, + "learning_rate": 0.0001994090186035662, + "loss": 2.7955, + "step": 789 + }, + { + "epoch": 0.063755951900573, + "grad_norm": 0.8883851170539856, + "learning_rate": 0.00019940730358074634, + "loss": 2.7828, + "step": 790 + }, + { + "epoch": 0.063836655637156, + "grad_norm": 0.8421074748039246, + "learning_rate": 0.00019940558608043664, + "loss": 2.7999, + "step": 791 + }, + { + "epoch": 0.063917359373739, + "grad_norm": 0.918134868144989, + "learning_rate": 0.0001994038661026799, + "loss": 2.7888, + "step": 792 + }, + { + "epoch": 0.06399806311032201, + "grad_norm": 0.8513637781143188, + "learning_rate": 0.00019940214364751896, + "loss": 2.7719, + "step": 793 + }, + { + "epoch": 0.06407876684690501, + "grad_norm": 0.9181898236274719, + "learning_rate": 0.00019940041871499675, + "loss": 2.8345, + "step": 794 + }, + { + "epoch": 0.06415947058348802, + "grad_norm": 0.8129134774208069, + "learning_rate": 0.00019939869130515626, + "loss": 2.7316, + "step": 795 + }, + { + "epoch": 0.06424017432007102, + "grad_norm": 0.8782191872596741, + "learning_rate": 0.00019939696141804057, + "loss": 2.7852, + "step": 796 + }, + { + "epoch": 0.06432087805665403, + "grad_norm": 0.9064851403236389, + "learning_rate": 0.00019939522905369276, + "loss": 2.8105, + "step": 797 + }, + { + "epoch": 0.06440158179323703, + "grad_norm": 0.9888454675674438, + "learning_rate": 0.00019939349421215603, + "loss": 2.8496, + "step": 798 + }, + { + "epoch": 0.06448228552982004, + "grad_norm": 0.8717427253723145, + "learning_rate": 0.0001993917568934736, + "loss": 2.8227, + "step": 799 + }, + { + "epoch": 0.06456298926640304, + "grad_norm": 0.922980010509491, + "learning_rate": 0.0001993900170976888, + "loss": 2.8571, + "step": 800 + }, + { + "epoch": 0.06464369300298604, + "grad_norm": 0.8311850428581238, + "learning_rate": 0.00019938827482484492, + "loss": 2.7905, + "step": 801 + }, + { + "epoch": 0.06472439673956905, + "grad_norm": 0.9274900555610657, + "learning_rate": 0.0001993865300749855, + "loss": 2.8526, + "step": 802 + }, + { + "epoch": 0.06480510047615205, + "grad_norm": 0.9072165489196777, + "learning_rate": 0.00019938478284815388, + "loss": 2.8384, + "step": 803 + }, + { + "epoch": 0.06488580421273504, + "grad_norm": 0.854099452495575, + "learning_rate": 0.0001993830331443937, + "loss": 2.8459, + "step": 804 + }, + { + "epoch": 0.06496650794931805, + "grad_norm": 0.824126660823822, + "learning_rate": 0.00019938128096374854, + "loss": 2.7845, + "step": 805 + }, + { + "epoch": 0.06504721168590105, + "grad_norm": 0.8570442795753479, + "learning_rate": 0.0001993795263062621, + "loss": 2.8446, + "step": 806 + }, + { + "epoch": 0.06512791542248406, + "grad_norm": 0.8998628854751587, + "learning_rate": 0.00019937776917197805, + "loss": 2.8604, + "step": 807 + }, + { + "epoch": 0.06520861915906706, + "grad_norm": 0.9189189076423645, + "learning_rate": 0.00019937600956094023, + "loss": 2.7866, + "step": 808 + }, + { + "epoch": 0.06528932289565006, + "grad_norm": 0.9471604824066162, + "learning_rate": 0.00019937424747319248, + "loss": 2.7619, + "step": 809 + }, + { + "epoch": 0.06537002663223307, + "grad_norm": 0.8507755994796753, + "learning_rate": 0.00019937248290877874, + "loss": 2.8259, + "step": 810 + }, + { + "epoch": 0.06545073036881607, + "grad_norm": 0.8800963759422302, + "learning_rate": 0.00019937071586774292, + "loss": 2.827, + "step": 811 + }, + { + "epoch": 0.06553143410539908, + "grad_norm": 0.8851124048233032, + "learning_rate": 0.00019936894635012915, + "loss": 2.793, + "step": 812 + }, + { + "epoch": 0.06561213784198208, + "grad_norm": 0.88127601146698, + "learning_rate": 0.00019936717435598144, + "loss": 2.8885, + "step": 813 + }, + { + "epoch": 0.06569284157856509, + "grad_norm": 0.9115073084831238, + "learning_rate": 0.000199365399885344, + "loss": 2.8278, + "step": 814 + }, + { + "epoch": 0.06577354531514809, + "grad_norm": 0.8722662925720215, + "learning_rate": 0.00019936362293826107, + "loss": 2.8125, + "step": 815 + }, + { + "epoch": 0.0658542490517311, + "grad_norm": 0.8332365155220032, + "learning_rate": 0.0001993618435147769, + "loss": 2.7682, + "step": 816 + }, + { + "epoch": 0.0659349527883141, + "grad_norm": 0.9524003863334656, + "learning_rate": 0.0001993600616149359, + "loss": 2.8166, + "step": 817 + }, + { + "epoch": 0.0660156565248971, + "grad_norm": 0.8402767181396484, + "learning_rate": 0.0001993582772387824, + "loss": 2.8192, + "step": 818 + }, + { + "epoch": 0.06609636026148011, + "grad_norm": 0.8589913249015808, + "learning_rate": 0.0001993564903863609, + "loss": 2.7785, + "step": 819 + }, + { + "epoch": 0.06617706399806311, + "grad_norm": 1.034550428390503, + "learning_rate": 0.00019935470105771598, + "loss": 2.8407, + "step": 820 + }, + { + "epoch": 0.06625776773464612, + "grad_norm": 0.856490969657898, + "learning_rate": 0.0001993529092528921, + "loss": 2.794, + "step": 821 + }, + { + "epoch": 0.06633847147122912, + "grad_norm": 0.897498369216919, + "learning_rate": 0.0001993511149719341, + "loss": 2.7959, + "step": 822 + }, + { + "epoch": 0.06641917520781213, + "grad_norm": 0.8495277166366577, + "learning_rate": 0.00019934931821488658, + "loss": 2.783, + "step": 823 + }, + { + "epoch": 0.06649987894439513, + "grad_norm": 0.8362239599227905, + "learning_rate": 0.00019934751898179436, + "loss": 2.8628, + "step": 824 + }, + { + "epoch": 0.06658058268097813, + "grad_norm": 0.8702061176300049, + "learning_rate": 0.00019934571727270225, + "loss": 2.7878, + "step": 825 + }, + { + "epoch": 0.06666128641756114, + "grad_norm": 0.8341560363769531, + "learning_rate": 0.0001993439130876552, + "loss": 2.7345, + "step": 826 + }, + { + "epoch": 0.06674199015414414, + "grad_norm": 0.880181074142456, + "learning_rate": 0.00019934210642669813, + "loss": 2.7789, + "step": 827 + }, + { + "epoch": 0.06682269389072715, + "grad_norm": 0.9088126420974731, + "learning_rate": 0.00019934029728987607, + "loss": 2.7893, + "step": 828 + }, + { + "epoch": 0.06690339762731014, + "grad_norm": 0.8087106347084045, + "learning_rate": 0.00019933848567723416, + "loss": 2.7967, + "step": 829 + }, + { + "epoch": 0.06698410136389314, + "grad_norm": 0.8970876336097717, + "learning_rate": 0.00019933667158881745, + "loss": 2.8837, + "step": 830 + }, + { + "epoch": 0.06706480510047615, + "grad_norm": 0.9344804883003235, + "learning_rate": 0.00019933485502467128, + "loss": 2.7754, + "step": 831 + }, + { + "epoch": 0.06714550883705915, + "grad_norm": 0.8119301795959473, + "learning_rate": 0.00019933303598484084, + "loss": 2.7919, + "step": 832 + }, + { + "epoch": 0.06722621257364216, + "grad_norm": 0.9370681047439575, + "learning_rate": 0.00019933121446937148, + "loss": 2.8011, + "step": 833 + }, + { + "epoch": 0.06730691631022516, + "grad_norm": 0.8358973264694214, + "learning_rate": 0.00019932939047830858, + "loss": 2.8339, + "step": 834 + }, + { + "epoch": 0.06738762004680816, + "grad_norm": 0.8565972447395325, + "learning_rate": 0.00019932756401169765, + "loss": 2.8269, + "step": 835 + }, + { + "epoch": 0.06746832378339117, + "grad_norm": 0.8405514359474182, + "learning_rate": 0.00019932573506958417, + "loss": 2.7621, + "step": 836 + }, + { + "epoch": 0.06754902751997417, + "grad_norm": 0.8217617869377136, + "learning_rate": 0.00019932390365201373, + "loss": 2.8363, + "step": 837 + }, + { + "epoch": 0.06762973125655718, + "grad_norm": 0.9121438264846802, + "learning_rate": 0.00019932206975903198, + "loss": 2.8033, + "step": 838 + }, + { + "epoch": 0.06771043499314018, + "grad_norm": 0.9113054871559143, + "learning_rate": 0.00019932023339068464, + "loss": 2.8696, + "step": 839 + }, + { + "epoch": 0.06779113872972319, + "grad_norm": 0.8638293743133545, + "learning_rate": 0.00019931839454701743, + "loss": 2.8008, + "step": 840 + }, + { + "epoch": 0.06787184246630619, + "grad_norm": 0.862932562828064, + "learning_rate": 0.0001993165532280762, + "loss": 2.8092, + "step": 841 + }, + { + "epoch": 0.0679525462028892, + "grad_norm": 0.9089607000350952, + "learning_rate": 0.00019931470943390685, + "loss": 2.8921, + "step": 842 + }, + { + "epoch": 0.0680332499394722, + "grad_norm": 0.9233555793762207, + "learning_rate": 0.00019931286316455537, + "loss": 2.9025, + "step": 843 + }, + { + "epoch": 0.0681139536760552, + "grad_norm": 0.9403017163276672, + "learning_rate": 0.0001993110144200677, + "loss": 2.7875, + "step": 844 + }, + { + "epoch": 0.06819465741263821, + "grad_norm": 0.9194290637969971, + "learning_rate": 0.00019930916320048996, + "loss": 2.8254, + "step": 845 + }, + { + "epoch": 0.06827536114922121, + "grad_norm": 0.8238688111305237, + "learning_rate": 0.00019930730950586828, + "loss": 2.82, + "step": 846 + }, + { + "epoch": 0.06835606488580422, + "grad_norm": 0.8560660481452942, + "learning_rate": 0.00019930545333624885, + "loss": 2.8516, + "step": 847 + }, + { + "epoch": 0.06843676862238722, + "grad_norm": 0.9127222895622253, + "learning_rate": 0.0001993035946916779, + "loss": 2.7674, + "step": 848 + }, + { + "epoch": 0.06851747235897022, + "grad_norm": 0.8679420948028564, + "learning_rate": 0.00019930173357220182, + "loss": 2.777, + "step": 849 + }, + { + "epoch": 0.06859817609555323, + "grad_norm": 0.9686945676803589, + "learning_rate": 0.00019929986997786699, + "loss": 2.7841, + "step": 850 + }, + { + "epoch": 0.06867887983213623, + "grad_norm": 0.8366333246231079, + "learning_rate": 0.00019929800390871977, + "loss": 2.7993, + "step": 851 + }, + { + "epoch": 0.06875958356871924, + "grad_norm": 0.8374585509300232, + "learning_rate": 0.00019929613536480675, + "loss": 2.7545, + "step": 852 + }, + { + "epoch": 0.06884028730530224, + "grad_norm": 0.9843763709068298, + "learning_rate": 0.00019929426434617451, + "loss": 2.8118, + "step": 853 + }, + { + "epoch": 0.06892099104188525, + "grad_norm": 0.8093454241752625, + "learning_rate": 0.0001992923908528696, + "loss": 2.7301, + "step": 854 + }, + { + "epoch": 0.06900169477846824, + "grad_norm": 0.8374418020248413, + "learning_rate": 0.00019929051488493877, + "loss": 2.7745, + "step": 855 + }, + { + "epoch": 0.06908239851505124, + "grad_norm": 0.869965136051178, + "learning_rate": 0.00019928863644242875, + "loss": 2.7637, + "step": 856 + }, + { + "epoch": 0.06916310225163425, + "grad_norm": 0.9280590415000916, + "learning_rate": 0.00019928675552538638, + "loss": 2.7792, + "step": 857 + }, + { + "epoch": 0.06924380598821725, + "grad_norm": 0.8624193668365479, + "learning_rate": 0.00019928487213385852, + "loss": 2.7755, + "step": 858 + }, + { + "epoch": 0.06932450972480025, + "grad_norm": 0.8379972577095032, + "learning_rate": 0.00019928298626789212, + "loss": 2.8563, + "step": 859 + }, + { + "epoch": 0.06940521346138326, + "grad_norm": 0.9272914528846741, + "learning_rate": 0.00019928109792753418, + "loss": 2.836, + "step": 860 + }, + { + "epoch": 0.06948591719796626, + "grad_norm": 0.9239040613174438, + "learning_rate": 0.00019927920711283175, + "loss": 2.7999, + "step": 861 + }, + { + "epoch": 0.06956662093454927, + "grad_norm": 0.9125113487243652, + "learning_rate": 0.00019927731382383195, + "loss": 2.8494, + "step": 862 + }, + { + "epoch": 0.06964732467113227, + "grad_norm": 0.8782855868339539, + "learning_rate": 0.00019927541806058198, + "loss": 2.767, + "step": 863 + }, + { + "epoch": 0.06972802840771528, + "grad_norm": 0.8815447092056274, + "learning_rate": 0.00019927351982312907, + "loss": 2.7877, + "step": 864 + }, + { + "epoch": 0.06980873214429828, + "grad_norm": 0.8555476069450378, + "learning_rate": 0.00019927161911152056, + "loss": 2.8057, + "step": 865 + }, + { + "epoch": 0.06988943588088128, + "grad_norm": 0.8562924265861511, + "learning_rate": 0.00019926971592580382, + "loss": 2.8049, + "step": 866 + }, + { + "epoch": 0.06997013961746429, + "grad_norm": 0.846503734588623, + "learning_rate": 0.00019926781026602625, + "loss": 2.8545, + "step": 867 + }, + { + "epoch": 0.07005084335404729, + "grad_norm": 0.8439623713493347, + "learning_rate": 0.00019926590213223535, + "loss": 2.7451, + "step": 868 + }, + { + "epoch": 0.0701315470906303, + "grad_norm": 0.8471730351448059, + "learning_rate": 0.00019926399152447868, + "loss": 2.7879, + "step": 869 + }, + { + "epoch": 0.0702122508272133, + "grad_norm": 0.8721400499343872, + "learning_rate": 0.00019926207844280387, + "loss": 2.8594, + "step": 870 + }, + { + "epoch": 0.0702929545637963, + "grad_norm": 0.8110925555229187, + "learning_rate": 0.0001992601628872586, + "loss": 2.7789, + "step": 871 + }, + { + "epoch": 0.07037365830037931, + "grad_norm": 0.9593119025230408, + "learning_rate": 0.0001992582448578906, + "loss": 2.8792, + "step": 872 + }, + { + "epoch": 0.07045436203696231, + "grad_norm": 0.8553354144096375, + "learning_rate": 0.00019925632435474765, + "loss": 2.8056, + "step": 873 + }, + { + "epoch": 0.07053506577354532, + "grad_norm": 0.8062612414360046, + "learning_rate": 0.00019925440137787768, + "loss": 2.7762, + "step": 874 + }, + { + "epoch": 0.07061576951012832, + "grad_norm": 0.8264921307563782, + "learning_rate": 0.00019925247592732858, + "loss": 2.8435, + "step": 875 + }, + { + "epoch": 0.07069647324671133, + "grad_norm": 0.7770401835441589, + "learning_rate": 0.00019925054800314828, + "loss": 2.7846, + "step": 876 + }, + { + "epoch": 0.07077717698329433, + "grad_norm": 0.8426765203475952, + "learning_rate": 0.0001992486176053849, + "loss": 2.782, + "step": 877 + }, + { + "epoch": 0.07085788071987734, + "grad_norm": 0.855330228805542, + "learning_rate": 0.00019924668473408655, + "loss": 2.8051, + "step": 878 + }, + { + "epoch": 0.07093858445646034, + "grad_norm": 0.8762049674987793, + "learning_rate": 0.00019924474938930135, + "loss": 2.7634, + "step": 879 + }, + { + "epoch": 0.07101928819304333, + "grad_norm": 0.9226812124252319, + "learning_rate": 0.0001992428115710776, + "loss": 2.8342, + "step": 880 + }, + { + "epoch": 0.07109999192962634, + "grad_norm": 0.9031660556793213, + "learning_rate": 0.00019924087127946353, + "loss": 2.7953, + "step": 881 + }, + { + "epoch": 0.07118069566620934, + "grad_norm": 1.0151792764663696, + "learning_rate": 0.00019923892851450757, + "loss": 2.8225, + "step": 882 + }, + { + "epoch": 0.07126139940279234, + "grad_norm": 0.9805678725242615, + "learning_rate": 0.00019923698327625806, + "loss": 2.7727, + "step": 883 + }, + { + "epoch": 0.07134210313937535, + "grad_norm": 0.8831729888916016, + "learning_rate": 0.00019923503556476356, + "loss": 2.7682, + "step": 884 + }, + { + "epoch": 0.07142280687595835, + "grad_norm": 1.0311404466629028, + "learning_rate": 0.00019923308538007253, + "loss": 2.8422, + "step": 885 + }, + { + "epoch": 0.07150351061254136, + "grad_norm": 0.8143388628959656, + "learning_rate": 0.0001992311327222336, + "loss": 2.7876, + "step": 886 + }, + { + "epoch": 0.07158421434912436, + "grad_norm": 0.877017617225647, + "learning_rate": 0.00019922917759129552, + "loss": 2.7486, + "step": 887 + }, + { + "epoch": 0.07166491808570737, + "grad_norm": 0.930646538734436, + "learning_rate": 0.0001992272199873069, + "loss": 2.8022, + "step": 888 + }, + { + "epoch": 0.07174562182229037, + "grad_norm": 0.934753954410553, + "learning_rate": 0.00019922525991031655, + "loss": 2.8485, + "step": 889 + }, + { + "epoch": 0.07182632555887337, + "grad_norm": 0.9564220905303955, + "learning_rate": 0.00019922329736037339, + "loss": 2.761, + "step": 890 + }, + { + "epoch": 0.07190702929545638, + "grad_norm": 0.9457311630249023, + "learning_rate": 0.00019922133233752626, + "loss": 2.8279, + "step": 891 + }, + { + "epoch": 0.07198773303203938, + "grad_norm": 0.9385658502578735, + "learning_rate": 0.0001992193648418242, + "loss": 2.8222, + "step": 892 + }, + { + "epoch": 0.07206843676862239, + "grad_norm": 1.0157524347305298, + "learning_rate": 0.00019921739487331616, + "loss": 2.9166, + "step": 893 + }, + { + "epoch": 0.07214914050520539, + "grad_norm": 0.9143860340118408, + "learning_rate": 0.00019921542243205132, + "loss": 2.8139, + "step": 894 + }, + { + "epoch": 0.0722298442417884, + "grad_norm": 0.8769320249557495, + "learning_rate": 0.00019921344751807878, + "loss": 2.8023, + "step": 895 + }, + { + "epoch": 0.0723105479783714, + "grad_norm": 0.9647517204284668, + "learning_rate": 0.0001992114701314478, + "loss": 2.8872, + "step": 896 + }, + { + "epoch": 0.0723912517149544, + "grad_norm": 1.025978446006775, + "learning_rate": 0.00019920949027220762, + "loss": 2.837, + "step": 897 + }, + { + "epoch": 0.07247195545153741, + "grad_norm": 0.8848521113395691, + "learning_rate": 0.0001992075079404076, + "loss": 2.7498, + "step": 898 + }, + { + "epoch": 0.07255265918812041, + "grad_norm": 0.9395595788955688, + "learning_rate": 0.0001992055231360972, + "loss": 2.8752, + "step": 899 + }, + { + "epoch": 0.07263336292470342, + "grad_norm": 0.8711572885513306, + "learning_rate": 0.00019920353585932578, + "loss": 2.8608, + "step": 900 + }, + { + "epoch": 0.07271406666128642, + "grad_norm": 0.8606846332550049, + "learning_rate": 0.00019920154611014295, + "loss": 2.829, + "step": 901 + }, + { + "epoch": 0.07279477039786943, + "grad_norm": 0.859354555606842, + "learning_rate": 0.0001991995538885983, + "loss": 2.8102, + "step": 902 + }, + { + "epoch": 0.07287547413445243, + "grad_norm": 0.9063243865966797, + "learning_rate": 0.00019919755919474143, + "loss": 2.8509, + "step": 903 + }, + { + "epoch": 0.07295617787103544, + "grad_norm": 0.8321940898895264, + "learning_rate": 0.00019919556202862207, + "loss": 2.796, + "step": 904 + }, + { + "epoch": 0.07303688160761844, + "grad_norm": 0.8875191807746887, + "learning_rate": 0.00019919356239029003, + "loss": 2.8672, + "step": 905 + }, + { + "epoch": 0.07311758534420143, + "grad_norm": 0.9028071165084839, + "learning_rate": 0.0001991915602797951, + "loss": 2.8926, + "step": 906 + }, + { + "epoch": 0.07319828908078443, + "grad_norm": 0.9449291825294495, + "learning_rate": 0.0001991895556971872, + "loss": 2.8159, + "step": 907 + }, + { + "epoch": 0.07327899281736744, + "grad_norm": 0.871576189994812, + "learning_rate": 0.0001991875486425163, + "loss": 2.8162, + "step": 908 + }, + { + "epoch": 0.07335969655395044, + "grad_norm": 0.818423330783844, + "learning_rate": 0.0001991855391158324, + "loss": 2.8882, + "step": 909 + }, + { + "epoch": 0.07344040029053345, + "grad_norm": 0.8802343606948853, + "learning_rate": 0.0001991835271171856, + "loss": 2.8245, + "step": 910 + }, + { + "epoch": 0.07352110402711645, + "grad_norm": 0.916023313999176, + "learning_rate": 0.000199181512646626, + "loss": 2.8966, + "step": 911 + }, + { + "epoch": 0.07360180776369946, + "grad_norm": 1.0663317441940308, + "learning_rate": 0.0001991794957042039, + "loss": 2.7736, + "step": 912 + }, + { + "epoch": 0.07368251150028246, + "grad_norm": 0.9212445616722107, + "learning_rate": 0.00019917747628996947, + "loss": 2.7924, + "step": 913 + }, + { + "epoch": 0.07376321523686546, + "grad_norm": 0.9785256385803223, + "learning_rate": 0.00019917545440397308, + "loss": 2.8021, + "step": 914 + }, + { + "epoch": 0.07384391897344847, + "grad_norm": 0.8510444760322571, + "learning_rate": 0.00019917343004626514, + "loss": 2.7991, + "step": 915 + }, + { + "epoch": 0.07392462271003147, + "grad_norm": 0.8967106342315674, + "learning_rate": 0.0001991714032168961, + "loss": 2.8838, + "step": 916 + }, + { + "epoch": 0.07400532644661448, + "grad_norm": 0.8940563797950745, + "learning_rate": 0.0001991693739159164, + "loss": 2.8124, + "step": 917 + }, + { + "epoch": 0.07408603018319748, + "grad_norm": 0.9270479679107666, + "learning_rate": 0.0001991673421433767, + "loss": 2.7627, + "step": 918 + }, + { + "epoch": 0.07416673391978049, + "grad_norm": 0.905805230140686, + "learning_rate": 0.0001991653078993276, + "loss": 2.781, + "step": 919 + }, + { + "epoch": 0.07424743765636349, + "grad_norm": 0.9295129179954529, + "learning_rate": 0.00019916327118381982, + "loss": 2.8332, + "step": 920 + }, + { + "epoch": 0.0743281413929465, + "grad_norm": 0.863331139087677, + "learning_rate": 0.00019916123199690408, + "loss": 2.8489, + "step": 921 + }, + { + "epoch": 0.0744088451295295, + "grad_norm": 0.9966896772384644, + "learning_rate": 0.00019915919033863127, + "loss": 2.9107, + "step": 922 + }, + { + "epoch": 0.0744895488661125, + "grad_norm": 0.8921390771865845, + "learning_rate": 0.00019915714620905218, + "loss": 2.7668, + "step": 923 + }, + { + "epoch": 0.07457025260269551, + "grad_norm": 0.9378434419631958, + "learning_rate": 0.00019915509960821782, + "loss": 2.8305, + "step": 924 + }, + { + "epoch": 0.07465095633927851, + "grad_norm": 1.0351817607879639, + "learning_rate": 0.0001991530505361792, + "loss": 2.9412, + "step": 925 + }, + { + "epoch": 0.07473166007586152, + "grad_norm": 0.7995476722717285, + "learning_rate": 0.0001991509989929874, + "loss": 2.7872, + "step": 926 + }, + { + "epoch": 0.07481236381244452, + "grad_norm": 0.858830988407135, + "learning_rate": 0.0001991489449786935, + "loss": 2.7775, + "step": 927 + }, + { + "epoch": 0.07489306754902753, + "grad_norm": 1.1254682540893555, + "learning_rate": 0.00019914688849334867, + "loss": 2.7913, + "step": 928 + }, + { + "epoch": 0.07497377128561053, + "grad_norm": 0.9475330710411072, + "learning_rate": 0.00019914482953700428, + "loss": 2.7945, + "step": 929 + }, + { + "epoch": 0.07505447502219353, + "grad_norm": 0.8427290916442871, + "learning_rate": 0.00019914276810971152, + "loss": 2.8297, + "step": 930 + }, + { + "epoch": 0.07513517875877652, + "grad_norm": 0.9308956265449524, + "learning_rate": 0.00019914070421152183, + "loss": 2.8534, + "step": 931 + }, + { + "epoch": 0.07521588249535953, + "grad_norm": 0.9264787435531616, + "learning_rate": 0.00019913863784248664, + "loss": 2.7959, + "step": 932 + }, + { + "epoch": 0.07529658623194253, + "grad_norm": 0.8432087302207947, + "learning_rate": 0.00019913656900265742, + "loss": 2.8479, + "step": 933 + }, + { + "epoch": 0.07537728996852554, + "grad_norm": 0.8237274885177612, + "learning_rate": 0.0001991344976920858, + "loss": 2.782, + "step": 934 + }, + { + "epoch": 0.07545799370510854, + "grad_norm": 0.8143243789672852, + "learning_rate": 0.0001991324239108233, + "loss": 2.7567, + "step": 935 + }, + { + "epoch": 0.07553869744169155, + "grad_norm": 0.8824434280395508, + "learning_rate": 0.0001991303476589217, + "loss": 2.7971, + "step": 936 + }, + { + "epoch": 0.07561940117827455, + "grad_norm": 0.8202407360076904, + "learning_rate": 0.00019912826893643272, + "loss": 2.7825, + "step": 937 + }, + { + "epoch": 0.07570010491485755, + "grad_norm": 0.8001337647438049, + "learning_rate": 0.00019912618774340813, + "loss": 2.8294, + "step": 938 + }, + { + "epoch": 0.07578080865144056, + "grad_norm": 0.8875572085380554, + "learning_rate": 0.00019912410407989982, + "loss": 2.8013, + "step": 939 + }, + { + "epoch": 0.07586151238802356, + "grad_norm": 0.8676280379295349, + "learning_rate": 0.0001991220179459597, + "loss": 2.767, + "step": 940 + }, + { + "epoch": 0.07594221612460657, + "grad_norm": 0.9767136573791504, + "learning_rate": 0.00019911992934163982, + "loss": 2.8315, + "step": 941 + }, + { + "epoch": 0.07602291986118957, + "grad_norm": 0.8690733909606934, + "learning_rate": 0.0001991178382669922, + "loss": 2.8042, + "step": 942 + }, + { + "epoch": 0.07610362359777258, + "grad_norm": 0.862978458404541, + "learning_rate": 0.00019911574472206893, + "loss": 2.8243, + "step": 943 + }, + { + "epoch": 0.07618432733435558, + "grad_norm": 0.9116127490997314, + "learning_rate": 0.00019911364870692225, + "loss": 2.7377, + "step": 944 + }, + { + "epoch": 0.07626503107093859, + "grad_norm": 0.8765420317649841, + "learning_rate": 0.00019911155022160433, + "loss": 2.7673, + "step": 945 + }, + { + "epoch": 0.07634573480752159, + "grad_norm": 0.8229342699050903, + "learning_rate": 0.0001991094492661675, + "loss": 2.7749, + "step": 946 + }, + { + "epoch": 0.0764264385441046, + "grad_norm": 0.8340098261833191, + "learning_rate": 0.00019910734584066412, + "loss": 2.7871, + "step": 947 + }, + { + "epoch": 0.0765071422806876, + "grad_norm": 0.8116940259933472, + "learning_rate": 0.0001991052399451466, + "loss": 2.8202, + "step": 948 + }, + { + "epoch": 0.0765878460172706, + "grad_norm": 0.8730412721633911, + "learning_rate": 0.00019910313157966747, + "loss": 2.8661, + "step": 949 + }, + { + "epoch": 0.07666854975385361, + "grad_norm": 0.8272213339805603, + "learning_rate": 0.0001991010207442792, + "loss": 2.8352, + "step": 950 + }, + { + "epoch": 0.07674925349043661, + "grad_norm": 0.8586944937705994, + "learning_rate": 0.0001990989074390345, + "loss": 2.8018, + "step": 951 + }, + { + "epoch": 0.07682995722701962, + "grad_norm": 0.81830894947052, + "learning_rate": 0.00019909679166398592, + "loss": 2.8154, + "step": 952 + }, + { + "epoch": 0.07691066096360262, + "grad_norm": 0.8158484101295471, + "learning_rate": 0.00019909467341918627, + "loss": 2.7618, + "step": 953 + }, + { + "epoch": 0.07699136470018562, + "grad_norm": 0.816834032535553, + "learning_rate": 0.00019909255270468833, + "loss": 2.8125, + "step": 954 + }, + { + "epoch": 0.07707206843676863, + "grad_norm": 0.944790780544281, + "learning_rate": 0.00019909042952054496, + "loss": 2.8054, + "step": 955 + }, + { + "epoch": 0.07715277217335163, + "grad_norm": 0.9281302690505981, + "learning_rate": 0.00019908830386680904, + "loss": 2.8724, + "step": 956 + }, + { + "epoch": 0.07723347590993462, + "grad_norm": 0.8850300908088684, + "learning_rate": 0.00019908617574353356, + "loss": 2.7906, + "step": 957 + }, + { + "epoch": 0.07731417964651763, + "grad_norm": 0.8997938632965088, + "learning_rate": 0.00019908404515077158, + "loss": 2.7814, + "step": 958 + }, + { + "epoch": 0.07739488338310063, + "grad_norm": 0.8814194798469543, + "learning_rate": 0.0001990819120885762, + "loss": 2.7423, + "step": 959 + }, + { + "epoch": 0.07747558711968364, + "grad_norm": 0.8759928345680237, + "learning_rate": 0.00019907977655700054, + "loss": 2.7803, + "step": 960 + }, + { + "epoch": 0.07755629085626664, + "grad_norm": 0.8439476490020752, + "learning_rate": 0.00019907763855609787, + "loss": 2.8277, + "step": 961 + }, + { + "epoch": 0.07763699459284965, + "grad_norm": 0.8745121955871582, + "learning_rate": 0.00019907549808592144, + "loss": 2.8152, + "step": 962 + }, + { + "epoch": 0.07771769832943265, + "grad_norm": 1.0439598560333252, + "learning_rate": 0.00019907335514652465, + "loss": 2.7882, + "step": 963 + }, + { + "epoch": 0.07779840206601565, + "grad_norm": 0.9516503810882568, + "learning_rate": 0.00019907120973796082, + "loss": 2.8555, + "step": 964 + }, + { + "epoch": 0.07787910580259866, + "grad_norm": 0.928717315196991, + "learning_rate": 0.0001990690618602835, + "loss": 2.8214, + "step": 965 + }, + { + "epoch": 0.07795980953918166, + "grad_norm": 0.7923071384429932, + "learning_rate": 0.00019906691151354617, + "loss": 2.8153, + "step": 966 + }, + { + "epoch": 0.07804051327576467, + "grad_norm": 0.8783324956893921, + "learning_rate": 0.00019906475869780246, + "loss": 2.7691, + "step": 967 + }, + { + "epoch": 0.07812121701234767, + "grad_norm": 0.8974801301956177, + "learning_rate": 0.000199062603413106, + "loss": 2.8156, + "step": 968 + }, + { + "epoch": 0.07820192074893068, + "grad_norm": 0.9304391741752625, + "learning_rate": 0.00019906044565951052, + "loss": 2.8489, + "step": 969 + }, + { + "epoch": 0.07828262448551368, + "grad_norm": 0.8351098895072937, + "learning_rate": 0.00019905828543706976, + "loss": 2.7744, + "step": 970 + }, + { + "epoch": 0.07836332822209668, + "grad_norm": 0.8634265065193176, + "learning_rate": 0.0001990561227458376, + "loss": 2.8193, + "step": 971 + }, + { + "epoch": 0.07844403195867969, + "grad_norm": 0.8969653248786926, + "learning_rate": 0.00019905395758586792, + "loss": 2.7548, + "step": 972 + }, + { + "epoch": 0.07852473569526269, + "grad_norm": 0.8964852094650269, + "learning_rate": 0.0001990517899572147, + "loss": 2.8037, + "step": 973 + }, + { + "epoch": 0.0786054394318457, + "grad_norm": 0.8567596077919006, + "learning_rate": 0.00019904961985993196, + "loss": 2.7942, + "step": 974 + }, + { + "epoch": 0.0786861431684287, + "grad_norm": 0.8275273442268372, + "learning_rate": 0.00019904744729407374, + "loss": 2.8359, + "step": 975 + }, + { + "epoch": 0.0787668469050117, + "grad_norm": 0.9458810091018677, + "learning_rate": 0.00019904527225969424, + "loss": 2.8354, + "step": 976 + }, + { + "epoch": 0.07884755064159471, + "grad_norm": 0.8690593838691711, + "learning_rate": 0.00019904309475684767, + "loss": 2.7894, + "step": 977 + }, + { + "epoch": 0.07892825437817771, + "grad_norm": 0.810279130935669, + "learning_rate": 0.00019904091478558823, + "loss": 2.7939, + "step": 978 + }, + { + "epoch": 0.07900895811476072, + "grad_norm": 0.8779012560844421, + "learning_rate": 0.0001990387323459703, + "loss": 2.7551, + "step": 979 + }, + { + "epoch": 0.07908966185134372, + "grad_norm": 0.7936381101608276, + "learning_rate": 0.00019903654743804833, + "loss": 2.814, + "step": 980 + }, + { + "epoch": 0.07917036558792673, + "grad_norm": 0.9567989110946655, + "learning_rate": 0.00019903436006187667, + "loss": 2.7715, + "step": 981 + }, + { + "epoch": 0.07925106932450972, + "grad_norm": 0.9250255823135376, + "learning_rate": 0.00019903217021750987, + "loss": 2.8967, + "step": 982 + }, + { + "epoch": 0.07933177306109272, + "grad_norm": 0.8342804312705994, + "learning_rate": 0.00019902997790500256, + "loss": 2.7728, + "step": 983 + }, + { + "epoch": 0.07941247679767573, + "grad_norm": 0.8321473598480225, + "learning_rate": 0.00019902778312440932, + "loss": 2.8479, + "step": 984 + }, + { + "epoch": 0.07949318053425873, + "grad_norm": 0.894727885723114, + "learning_rate": 0.00019902558587578484, + "loss": 2.8211, + "step": 985 + }, + { + "epoch": 0.07957388427084174, + "grad_norm": 0.8093457221984863, + "learning_rate": 0.0001990233861591839, + "loss": 2.7481, + "step": 986 + }, + { + "epoch": 0.07965458800742474, + "grad_norm": 0.8626284599304199, + "learning_rate": 0.00019902118397466132, + "loss": 2.8368, + "step": 987 + }, + { + "epoch": 0.07973529174400774, + "grad_norm": 0.799648642539978, + "learning_rate": 0.00019901897932227204, + "loss": 2.8713, + "step": 988 + }, + { + "epoch": 0.07981599548059075, + "grad_norm": 0.9658265709877014, + "learning_rate": 0.00019901677220207092, + "loss": 2.7284, + "step": 989 + }, + { + "epoch": 0.07989669921717375, + "grad_norm": 0.877299427986145, + "learning_rate": 0.00019901456261411303, + "loss": 2.7916, + "step": 990 + }, + { + "epoch": 0.07997740295375676, + "grad_norm": 0.926450252532959, + "learning_rate": 0.00019901235055845337, + "loss": 2.8207, + "step": 991 + }, + { + "epoch": 0.08005810669033976, + "grad_norm": 0.8858455419540405, + "learning_rate": 0.00019901013603514716, + "loss": 2.795, + "step": 992 + }, + { + "epoch": 0.08013881042692277, + "grad_norm": 0.8619922995567322, + "learning_rate": 0.0001990079190442495, + "loss": 2.8163, + "step": 993 + }, + { + "epoch": 0.08021951416350577, + "grad_norm": 0.859200656414032, + "learning_rate": 0.00019900569958581572, + "loss": 2.7715, + "step": 994 + }, + { + "epoch": 0.08030021790008877, + "grad_norm": 0.8346282839775085, + "learning_rate": 0.0001990034776599011, + "loss": 2.8312, + "step": 995 + }, + { + "epoch": 0.08038092163667178, + "grad_norm": 0.9188725352287292, + "learning_rate": 0.00019900125326656102, + "loss": 2.799, + "step": 996 + }, + { + "epoch": 0.08046162537325478, + "grad_norm": 0.8548648953437805, + "learning_rate": 0.00019899902640585092, + "loss": 2.7778, + "step": 997 + }, + { + "epoch": 0.08054232910983779, + "grad_norm": 0.8883183002471924, + "learning_rate": 0.00019899679707782624, + "loss": 2.809, + "step": 998 + }, + { + "epoch": 0.08062303284642079, + "grad_norm": 0.8915852308273315, + "learning_rate": 0.00019899456528254267, + "loss": 2.8309, + "step": 999 + }, + { + "epoch": 0.0807037365830038, + "grad_norm": 0.8092094659805298, + "learning_rate": 0.00019899233102005573, + "loss": 2.7753, + "step": 1000 + }, + { + "epoch": 0.0807037365830038, + "eval_loss": 2.7104671001434326, + "eval_runtime": 773.7354, + "eval_samples_per_second": 3.386, + "eval_steps_per_second": 0.565, + "step": 1000 + }, + { + "epoch": 0.0807844403195868, + "grad_norm": 0.8744900226593018, + "learning_rate": 0.00019899009429042114, + "loss": 2.7948, + "step": 1001 + }, + { + "epoch": 0.0808651440561698, + "grad_norm": 0.8749974370002747, + "learning_rate": 0.0001989878550936946, + "loss": 2.7609, + "step": 1002 + }, + { + "epoch": 0.08094584779275281, + "grad_norm": 0.8622820377349854, + "learning_rate": 0.000198985613429932, + "loss": 2.8023, + "step": 1003 + }, + { + "epoch": 0.08102655152933581, + "grad_norm": 0.9404367208480835, + "learning_rate": 0.00019898336929918915, + "loss": 2.7992, + "step": 1004 + }, + { + "epoch": 0.08110725526591882, + "grad_norm": 0.8846708536148071, + "learning_rate": 0.000198981122701522, + "loss": 2.8084, + "step": 1005 + }, + { + "epoch": 0.08118795900250182, + "grad_norm": 0.8105908036231995, + "learning_rate": 0.0001989788736369865, + "loss": 2.8504, + "step": 1006 + }, + { + "epoch": 0.08126866273908483, + "grad_norm": 1.0107187032699585, + "learning_rate": 0.0001989766221056388, + "loss": 2.7935, + "step": 1007 + }, + { + "epoch": 0.08134936647566782, + "grad_norm": 0.7825451493263245, + "learning_rate": 0.0001989743681075349, + "loss": 2.8024, + "step": 1008 + }, + { + "epoch": 0.08143007021225082, + "grad_norm": 0.8478613495826721, + "learning_rate": 0.000198972111642731, + "loss": 2.8645, + "step": 1009 + }, + { + "epoch": 0.08151077394883383, + "grad_norm": 0.8432144522666931, + "learning_rate": 0.0001989698527112834, + "loss": 2.8469, + "step": 1010 + }, + { + "epoch": 0.08159147768541683, + "grad_norm": 0.8147936463356018, + "learning_rate": 0.00019896759131324835, + "loss": 2.7799, + "step": 1011 + }, + { + "epoch": 0.08167218142199983, + "grad_norm": 0.8446993827819824, + "learning_rate": 0.00019896532744868224, + "loss": 2.7685, + "step": 1012 + }, + { + "epoch": 0.08175288515858284, + "grad_norm": 0.7635807394981384, + "learning_rate": 0.00019896306111764146, + "loss": 2.7823, + "step": 1013 + }, + { + "epoch": 0.08183358889516584, + "grad_norm": 0.8272855877876282, + "learning_rate": 0.00019896079232018253, + "loss": 2.7877, + "step": 1014 + }, + { + "epoch": 0.08191429263174885, + "grad_norm": 0.8079700469970703, + "learning_rate": 0.00019895852105636193, + "loss": 2.7849, + "step": 1015 + }, + { + "epoch": 0.08199499636833185, + "grad_norm": 0.8518063426017761, + "learning_rate": 0.0001989562473262363, + "loss": 2.8622, + "step": 1016 + }, + { + "epoch": 0.08207570010491486, + "grad_norm": 0.8646622896194458, + "learning_rate": 0.00019895397112986235, + "loss": 2.8224, + "step": 1017 + }, + { + "epoch": 0.08215640384149786, + "grad_norm": 0.8764398097991943, + "learning_rate": 0.00019895169246729672, + "loss": 2.938, + "step": 1018 + }, + { + "epoch": 0.08223710757808086, + "grad_norm": 0.8304057717323303, + "learning_rate": 0.0001989494113385963, + "loss": 2.7586, + "step": 1019 + }, + { + "epoch": 0.08231781131466387, + "grad_norm": 0.8569272756576538, + "learning_rate": 0.00019894712774381787, + "loss": 2.7803, + "step": 1020 + }, + { + "epoch": 0.08239851505124687, + "grad_norm": 0.8788578510284424, + "learning_rate": 0.00019894484168301836, + "loss": 2.8138, + "step": 1021 + }, + { + "epoch": 0.08247921878782988, + "grad_norm": 0.9113569855690002, + "learning_rate": 0.0001989425531562548, + "loss": 2.8023, + "step": 1022 + }, + { + "epoch": 0.08255992252441288, + "grad_norm": 0.8630590438842773, + "learning_rate": 0.00019894026216358413, + "loss": 2.791, + "step": 1023 + }, + { + "epoch": 0.08264062626099589, + "grad_norm": 0.8691157698631287, + "learning_rate": 0.00019893796870506348, + "loss": 2.811, + "step": 1024 + }, + { + "epoch": 0.08272132999757889, + "grad_norm": 0.9078284502029419, + "learning_rate": 0.00019893567278075007, + "loss": 2.8282, + "step": 1025 + }, + { + "epoch": 0.0828020337341619, + "grad_norm": 0.867511510848999, + "learning_rate": 0.00019893337439070105, + "loss": 2.7862, + "step": 1026 + }, + { + "epoch": 0.0828827374707449, + "grad_norm": 0.8016698360443115, + "learning_rate": 0.00019893107353497372, + "loss": 2.8083, + "step": 1027 + }, + { + "epoch": 0.0829634412073279, + "grad_norm": 0.8583545684814453, + "learning_rate": 0.00019892877021362543, + "loss": 2.8041, + "step": 1028 + }, + { + "epoch": 0.08304414494391091, + "grad_norm": 0.8302493691444397, + "learning_rate": 0.0001989264644267136, + "loss": 2.7866, + "step": 1029 + }, + { + "epoch": 0.08312484868049391, + "grad_norm": 0.9628411531448364, + "learning_rate": 0.00019892415617429567, + "loss": 2.8187, + "step": 1030 + }, + { + "epoch": 0.08320555241707692, + "grad_norm": 0.874840259552002, + "learning_rate": 0.0001989218454564292, + "loss": 2.7475, + "step": 1031 + }, + { + "epoch": 0.08328625615365992, + "grad_norm": 0.8641294836997986, + "learning_rate": 0.0001989195322731717, + "loss": 2.7795, + "step": 1032 + }, + { + "epoch": 0.08336695989024291, + "grad_norm": 0.8219757080078125, + "learning_rate": 0.0001989172166245809, + "loss": 2.7683, + "step": 1033 + }, + { + "epoch": 0.08344766362682592, + "grad_norm": 0.7905694246292114, + "learning_rate": 0.00019891489851071455, + "loss": 2.7668, + "step": 1034 + }, + { + "epoch": 0.08352836736340892, + "grad_norm": 0.8180816173553467, + "learning_rate": 0.0001989125779316303, + "loss": 2.7661, + "step": 1035 + }, + { + "epoch": 0.08360907109999192, + "grad_norm": 0.8337293267250061, + "learning_rate": 0.00019891025488738605, + "loss": 2.7823, + "step": 1036 + }, + { + "epoch": 0.08368977483657493, + "grad_norm": 0.9673140048980713, + "learning_rate": 0.00019890792937803973, + "loss": 2.8164, + "step": 1037 + }, + { + "epoch": 0.08377047857315793, + "grad_norm": 0.8810501098632812, + "learning_rate": 0.00019890560140364922, + "loss": 2.7904, + "step": 1038 + }, + { + "epoch": 0.08385118230974094, + "grad_norm": 0.9507614374160767, + "learning_rate": 0.0001989032709642726, + "loss": 2.7928, + "step": 1039 + }, + { + "epoch": 0.08393188604632394, + "grad_norm": 0.953738808631897, + "learning_rate": 0.00019890093805996793, + "loss": 2.7922, + "step": 1040 + }, + { + "epoch": 0.08401258978290695, + "grad_norm": 0.8079931139945984, + "learning_rate": 0.00019889860269079336, + "loss": 2.7909, + "step": 1041 + }, + { + "epoch": 0.08409329351948995, + "grad_norm": 1.0330647230148315, + "learning_rate": 0.0001988962648568071, + "loss": 2.7526, + "step": 1042 + }, + { + "epoch": 0.08417399725607295, + "grad_norm": 0.8988988399505615, + "learning_rate": 0.00019889392455806738, + "loss": 2.7471, + "step": 1043 + }, + { + "epoch": 0.08425470099265596, + "grad_norm": 0.7986348271369934, + "learning_rate": 0.00019889158179463255, + "loss": 2.7208, + "step": 1044 + }, + { + "epoch": 0.08433540472923896, + "grad_norm": 0.9231631755828857, + "learning_rate": 0.000198889236566561, + "loss": 2.7953, + "step": 1045 + }, + { + "epoch": 0.08441610846582197, + "grad_norm": 0.8438155055046082, + "learning_rate": 0.00019888688887391117, + "loss": 2.8006, + "step": 1046 + }, + { + "epoch": 0.08449681220240497, + "grad_norm": 0.8915219306945801, + "learning_rate": 0.0001988845387167416, + "loss": 2.8184, + "step": 1047 + }, + { + "epoch": 0.08457751593898798, + "grad_norm": 0.924401581287384, + "learning_rate": 0.0001988821860951108, + "loss": 2.8411, + "step": 1048 + }, + { + "epoch": 0.08465821967557098, + "grad_norm": 0.8144630193710327, + "learning_rate": 0.00019887983100907745, + "loss": 2.8258, + "step": 1049 + }, + { + "epoch": 0.08473892341215399, + "grad_norm": 0.9974459409713745, + "learning_rate": 0.00019887747345870028, + "loss": 2.7567, + "step": 1050 + }, + { + "epoch": 0.08481962714873699, + "grad_norm": 0.944526195526123, + "learning_rate": 0.00019887511344403796, + "loss": 2.8657, + "step": 1051 + }, + { + "epoch": 0.08490033088532, + "grad_norm": 0.8204831480979919, + "learning_rate": 0.00019887275096514936, + "loss": 2.8054, + "step": 1052 + }, + { + "epoch": 0.084981034621903, + "grad_norm": 0.8855900168418884, + "learning_rate": 0.00019887038602209336, + "loss": 2.8019, + "step": 1053 + }, + { + "epoch": 0.085061738358486, + "grad_norm": 0.9025108814239502, + "learning_rate": 0.0001988680186149289, + "loss": 2.7934, + "step": 1054 + }, + { + "epoch": 0.08514244209506901, + "grad_norm": 0.8486441373825073, + "learning_rate": 0.00019886564874371494, + "loss": 2.809, + "step": 1055 + }, + { + "epoch": 0.08522314583165201, + "grad_norm": 0.778364896774292, + "learning_rate": 0.00019886327640851058, + "loss": 2.7783, + "step": 1056 + }, + { + "epoch": 0.08530384956823502, + "grad_norm": 0.8515299558639526, + "learning_rate": 0.00019886090160937497, + "loss": 2.8122, + "step": 1057 + }, + { + "epoch": 0.08538455330481802, + "grad_norm": 0.8466131091117859, + "learning_rate": 0.00019885852434636724, + "loss": 2.7798, + "step": 1058 + }, + { + "epoch": 0.08546525704140101, + "grad_norm": 0.8856541514396667, + "learning_rate": 0.00019885614461954667, + "loss": 2.8033, + "step": 1059 + }, + { + "epoch": 0.08554596077798401, + "grad_norm": 0.8853924870491028, + "learning_rate": 0.00019885376242897258, + "loss": 2.8368, + "step": 1060 + }, + { + "epoch": 0.08562666451456702, + "grad_norm": 0.7858660221099854, + "learning_rate": 0.0001988513777747043, + "loss": 2.7806, + "step": 1061 + }, + { + "epoch": 0.08570736825115002, + "grad_norm": 0.8601513504981995, + "learning_rate": 0.0001988489906568013, + "loss": 2.8434, + "step": 1062 + }, + { + "epoch": 0.08578807198773303, + "grad_norm": 0.9126001596450806, + "learning_rate": 0.00019884660107532306, + "loss": 2.8469, + "step": 1063 + }, + { + "epoch": 0.08586877572431603, + "grad_norm": 0.9016061425209045, + "learning_rate": 0.00019884420903032912, + "loss": 2.7907, + "step": 1064 + }, + { + "epoch": 0.08594947946089904, + "grad_norm": 0.9134494066238403, + "learning_rate": 0.00019884181452187915, + "loss": 2.8426, + "step": 1065 + }, + { + "epoch": 0.08603018319748204, + "grad_norm": 0.8891138434410095, + "learning_rate": 0.00019883941755003272, + "loss": 2.8092, + "step": 1066 + }, + { + "epoch": 0.08611088693406505, + "grad_norm": 0.822884202003479, + "learning_rate": 0.0001988370181148497, + "loss": 2.8454, + "step": 1067 + }, + { + "epoch": 0.08619159067064805, + "grad_norm": 0.8341901898384094, + "learning_rate": 0.0001988346162163898, + "loss": 2.8027, + "step": 1068 + }, + { + "epoch": 0.08627229440723105, + "grad_norm": 0.8653229475021362, + "learning_rate": 0.00019883221185471291, + "loss": 2.7487, + "step": 1069 + }, + { + "epoch": 0.08635299814381406, + "grad_norm": 0.8065966367721558, + "learning_rate": 0.00019882980502987894, + "loss": 2.7847, + "step": 1070 + }, + { + "epoch": 0.08643370188039706, + "grad_norm": 0.9106903076171875, + "learning_rate": 0.0001988273957419479, + "loss": 2.7962, + "step": 1071 + }, + { + "epoch": 0.08651440561698007, + "grad_norm": 0.953815221786499, + "learning_rate": 0.0001988249839909798, + "loss": 2.8168, + "step": 1072 + }, + { + "epoch": 0.08659510935356307, + "grad_norm": 0.8642842173576355, + "learning_rate": 0.00019882256977703477, + "loss": 2.8205, + "step": 1073 + }, + { + "epoch": 0.08667581309014608, + "grad_norm": 0.8500350117683411, + "learning_rate": 0.000198820153100173, + "loss": 2.8798, + "step": 1074 + }, + { + "epoch": 0.08675651682672908, + "grad_norm": 0.9212989807128906, + "learning_rate": 0.00019881773396045467, + "loss": 2.8088, + "step": 1075 + }, + { + "epoch": 0.08683722056331208, + "grad_norm": 0.8897970914840698, + "learning_rate": 0.0001988153123579401, + "loss": 2.7983, + "step": 1076 + }, + { + "epoch": 0.08691792429989509, + "grad_norm": 0.7942636609077454, + "learning_rate": 0.00019881288829268968, + "loss": 2.7711, + "step": 1077 + }, + { + "epoch": 0.08699862803647809, + "grad_norm": 0.8286700248718262, + "learning_rate": 0.00019881046176476374, + "loss": 2.7995, + "step": 1078 + }, + { + "epoch": 0.0870793317730611, + "grad_norm": 0.9436343908309937, + "learning_rate": 0.00019880803277422281, + "loss": 2.8399, + "step": 1079 + }, + { + "epoch": 0.0871600355096441, + "grad_norm": 0.9592518210411072, + "learning_rate": 0.00019880560132112742, + "loss": 2.7888, + "step": 1080 + }, + { + "epoch": 0.0872407392462271, + "grad_norm": 0.8956589698791504, + "learning_rate": 0.00019880316740553816, + "loss": 2.7635, + "step": 1081 + }, + { + "epoch": 0.08732144298281011, + "grad_norm": 1.055312156677246, + "learning_rate": 0.00019880073102751574, + "loss": 2.7778, + "step": 1082 + }, + { + "epoch": 0.08740214671939311, + "grad_norm": 0.783273458480835, + "learning_rate": 0.00019879829218712075, + "loss": 2.735, + "step": 1083 + }, + { + "epoch": 0.0874828504559761, + "grad_norm": 0.8315421938896179, + "learning_rate": 0.00019879585088441413, + "loss": 2.7973, + "step": 1084 + }, + { + "epoch": 0.08756355419255911, + "grad_norm": 0.9550945162773132, + "learning_rate": 0.00019879340711945662, + "loss": 2.8083, + "step": 1085 + }, + { + "epoch": 0.08764425792914211, + "grad_norm": 0.9579277634620667, + "learning_rate": 0.00019879096089230915, + "loss": 2.7411, + "step": 1086 + }, + { + "epoch": 0.08772496166572512, + "grad_norm": 0.8602219223976135, + "learning_rate": 0.0001987885122030327, + "loss": 2.7461, + "step": 1087 + }, + { + "epoch": 0.08780566540230812, + "grad_norm": 0.9749068021774292, + "learning_rate": 0.00019878606105168829, + "loss": 2.7701, + "step": 1088 + }, + { + "epoch": 0.08788636913889113, + "grad_norm": 0.8128982186317444, + "learning_rate": 0.00019878360743833703, + "loss": 2.7949, + "step": 1089 + }, + { + "epoch": 0.08796707287547413, + "grad_norm": 0.9177080988883972, + "learning_rate": 0.00019878115136304003, + "loss": 2.7471, + "step": 1090 + }, + { + "epoch": 0.08804777661205714, + "grad_norm": 0.9052132368087769, + "learning_rate": 0.0001987786928258585, + "loss": 2.8356, + "step": 1091 + }, + { + "epoch": 0.08812848034864014, + "grad_norm": 0.8972994089126587, + "learning_rate": 0.00019877623182685378, + "loss": 2.8304, + "step": 1092 + }, + { + "epoch": 0.08820918408522314, + "grad_norm": 0.861251950263977, + "learning_rate": 0.0001987737683660871, + "loss": 2.8436, + "step": 1093 + }, + { + "epoch": 0.08828988782180615, + "grad_norm": 0.9139869809150696, + "learning_rate": 0.00019877130244361996, + "loss": 2.7583, + "step": 1094 + }, + { + "epoch": 0.08837059155838915, + "grad_norm": 0.8441170454025269, + "learning_rate": 0.00019876883405951377, + "loss": 2.7508, + "step": 1095 + }, + { + "epoch": 0.08845129529497216, + "grad_norm": 0.8624769449234009, + "learning_rate": 0.00019876636321383004, + "loss": 2.8003, + "step": 1096 + }, + { + "epoch": 0.08853199903155516, + "grad_norm": 0.9033877849578857, + "learning_rate": 0.00019876388990663037, + "loss": 2.7934, + "step": 1097 + }, + { + "epoch": 0.08861270276813817, + "grad_norm": 0.9492632746696472, + "learning_rate": 0.0001987614141379764, + "loss": 2.7852, + "step": 1098 + }, + { + "epoch": 0.08869340650472117, + "grad_norm": 0.9004682302474976, + "learning_rate": 0.00019875893590792982, + "loss": 2.7518, + "step": 1099 + }, + { + "epoch": 0.08877411024130417, + "grad_norm": 0.8352272510528564, + "learning_rate": 0.0001987564552165524, + "loss": 2.8035, + "step": 1100 + }, + { + "epoch": 0.08885481397788718, + "grad_norm": 0.8488562107086182, + "learning_rate": 0.00019875397206390593, + "loss": 2.7672, + "step": 1101 + }, + { + "epoch": 0.08893551771447018, + "grad_norm": 0.9450985193252563, + "learning_rate": 0.00019875148645005238, + "loss": 2.7558, + "step": 1102 + }, + { + "epoch": 0.08901622145105319, + "grad_norm": 0.9203561544418335, + "learning_rate": 0.0001987489983750536, + "loss": 2.7983, + "step": 1103 + }, + { + "epoch": 0.08909692518763619, + "grad_norm": 0.8761897087097168, + "learning_rate": 0.0001987465078389717, + "loss": 2.7536, + "step": 1104 + }, + { + "epoch": 0.0891776289242192, + "grad_norm": 0.9064637422561646, + "learning_rate": 0.00019874401484186867, + "loss": 2.8104, + "step": 1105 + }, + { + "epoch": 0.0892583326608022, + "grad_norm": 0.8394999504089355, + "learning_rate": 0.00019874151938380666, + "loss": 2.7459, + "step": 1106 + }, + { + "epoch": 0.0893390363973852, + "grad_norm": 0.8782099485397339, + "learning_rate": 0.00019873902146484785, + "loss": 2.8675, + "step": 1107 + }, + { + "epoch": 0.08941974013396821, + "grad_norm": 0.8564850091934204, + "learning_rate": 0.00019873652108505458, + "loss": 2.8561, + "step": 1108 + }, + { + "epoch": 0.08950044387055121, + "grad_norm": 0.8343809843063354, + "learning_rate": 0.0001987340182444891, + "loss": 2.8406, + "step": 1109 + }, + { + "epoch": 0.0895811476071342, + "grad_norm": 1.096273422241211, + "learning_rate": 0.00019873151294321376, + "loss": 2.8264, + "step": 1110 + }, + { + "epoch": 0.08966185134371721, + "grad_norm": 0.8654618263244629, + "learning_rate": 0.00019872900518129103, + "loss": 2.7956, + "step": 1111 + }, + { + "epoch": 0.08974255508030021, + "grad_norm": 0.8868138194084167, + "learning_rate": 0.00019872649495878344, + "loss": 2.8028, + "step": 1112 + }, + { + "epoch": 0.08982325881688322, + "grad_norm": 0.8139104843139648, + "learning_rate": 0.00019872398227575348, + "loss": 2.7502, + "step": 1113 + }, + { + "epoch": 0.08990396255346622, + "grad_norm": 0.8277762532234192, + "learning_rate": 0.00019872146713226384, + "loss": 2.7913, + "step": 1114 + }, + { + "epoch": 0.08998466629004923, + "grad_norm": 0.8470397591590881, + "learning_rate": 0.00019871894952837717, + "loss": 2.7982, + "step": 1115 + }, + { + "epoch": 0.09006537002663223, + "grad_norm": 0.8424760103225708, + "learning_rate": 0.00019871642946415625, + "loss": 2.8067, + "step": 1116 + }, + { + "epoch": 0.09014607376321523, + "grad_norm": 0.8253894448280334, + "learning_rate": 0.00019871390693966382, + "loss": 2.8339, + "step": 1117 + }, + { + "epoch": 0.09022677749979824, + "grad_norm": 0.8120691776275635, + "learning_rate": 0.00019871138195496282, + "loss": 2.7938, + "step": 1118 + }, + { + "epoch": 0.09030748123638124, + "grad_norm": 0.920189619064331, + "learning_rate": 0.00019870885451011617, + "loss": 2.8083, + "step": 1119 + }, + { + "epoch": 0.09038818497296425, + "grad_norm": 0.8990969657897949, + "learning_rate": 0.0001987063246051868, + "loss": 2.7481, + "step": 1120 + }, + { + "epoch": 0.09046888870954725, + "grad_norm": 0.8280801773071289, + "learning_rate": 0.0001987037922402378, + "loss": 2.8536, + "step": 1121 + }, + { + "epoch": 0.09054959244613026, + "grad_norm": 0.8510503768920898, + "learning_rate": 0.0001987012574153323, + "loss": 2.758, + "step": 1122 + }, + { + "epoch": 0.09063029618271326, + "grad_norm": 0.9103946685791016, + "learning_rate": 0.00019869872013053344, + "loss": 2.7594, + "step": 1123 + }, + { + "epoch": 0.09071099991929626, + "grad_norm": 0.804916262626648, + "learning_rate": 0.00019869618038590448, + "loss": 2.7489, + "step": 1124 + }, + { + "epoch": 0.09079170365587927, + "grad_norm": 0.7542802095413208, + "learning_rate": 0.00019869363818150867, + "loss": 2.76, + "step": 1125 + }, + { + "epoch": 0.09087240739246227, + "grad_norm": 0.7725108861923218, + "learning_rate": 0.00019869109351740947, + "loss": 2.8124, + "step": 1126 + }, + { + "epoch": 0.09095311112904528, + "grad_norm": 0.8533692955970764, + "learning_rate": 0.0001986885463936702, + "loss": 2.8499, + "step": 1127 + }, + { + "epoch": 0.09103381486562828, + "grad_norm": 0.8351541757583618, + "learning_rate": 0.0001986859968103544, + "loss": 2.8075, + "step": 1128 + }, + { + "epoch": 0.09111451860221129, + "grad_norm": 0.8780044913291931, + "learning_rate": 0.0001986834447675256, + "loss": 2.7587, + "step": 1129 + }, + { + "epoch": 0.09119522233879429, + "grad_norm": 0.9587519764900208, + "learning_rate": 0.00019868089026524736, + "loss": 2.8069, + "step": 1130 + }, + { + "epoch": 0.0912759260753773, + "grad_norm": 0.8285651206970215, + "learning_rate": 0.00019867833330358342, + "loss": 2.8209, + "step": 1131 + }, + { + "epoch": 0.0913566298119603, + "grad_norm": 0.8589211106300354, + "learning_rate": 0.00019867577388259745, + "loss": 2.8144, + "step": 1132 + }, + { + "epoch": 0.0914373335485433, + "grad_norm": 0.8740364909172058, + "learning_rate": 0.00019867321200235324, + "loss": 2.858, + "step": 1133 + }, + { + "epoch": 0.09151803728512631, + "grad_norm": 0.8368108868598938, + "learning_rate": 0.00019867064766291467, + "loss": 2.7997, + "step": 1134 + }, + { + "epoch": 0.0915987410217093, + "grad_norm": 0.8243690133094788, + "learning_rate": 0.00019866808086434564, + "loss": 2.7925, + "step": 1135 + }, + { + "epoch": 0.0916794447582923, + "grad_norm": 0.8296996355056763, + "learning_rate": 0.0001986655116067101, + "loss": 2.7953, + "step": 1136 + }, + { + "epoch": 0.09176014849487531, + "grad_norm": 0.9255942702293396, + "learning_rate": 0.0001986629398900721, + "loss": 2.844, + "step": 1137 + }, + { + "epoch": 0.09184085223145831, + "grad_norm": 0.7498174905776978, + "learning_rate": 0.00019866036571449574, + "loss": 2.7372, + "step": 1138 + }, + { + "epoch": 0.09192155596804132, + "grad_norm": 0.8170139193534851, + "learning_rate": 0.00019865778908004513, + "loss": 2.7656, + "step": 1139 + }, + { + "epoch": 0.09200225970462432, + "grad_norm": 0.8858106732368469, + "learning_rate": 0.00019865520998678458, + "loss": 2.7657, + "step": 1140 + }, + { + "epoch": 0.09208296344120732, + "grad_norm": 0.8789847493171692, + "learning_rate": 0.00019865262843477826, + "loss": 2.8419, + "step": 1141 + }, + { + "epoch": 0.09216366717779033, + "grad_norm": 0.8433314561843872, + "learning_rate": 0.00019865004442409058, + "loss": 2.7981, + "step": 1142 + }, + { + "epoch": 0.09224437091437333, + "grad_norm": 0.8822595477104187, + "learning_rate": 0.0001986474579547859, + "loss": 2.8368, + "step": 1143 + }, + { + "epoch": 0.09232507465095634, + "grad_norm": 0.9067013263702393, + "learning_rate": 0.00019864486902692872, + "loss": 2.7807, + "step": 1144 + }, + { + "epoch": 0.09240577838753934, + "grad_norm": 0.9551558494567871, + "learning_rate": 0.00019864227764058355, + "loss": 2.7617, + "step": 1145 + }, + { + "epoch": 0.09248648212412235, + "grad_norm": 0.8337206244468689, + "learning_rate": 0.00019863968379581494, + "loss": 2.8289, + "step": 1146 + }, + { + "epoch": 0.09256718586070535, + "grad_norm": 0.952702522277832, + "learning_rate": 0.0001986370874926876, + "loss": 2.8508, + "step": 1147 + }, + { + "epoch": 0.09264788959728835, + "grad_norm": 0.8586699366569519, + "learning_rate": 0.00019863448873126615, + "loss": 2.8784, + "step": 1148 + }, + { + "epoch": 0.09272859333387136, + "grad_norm": 0.7625309228897095, + "learning_rate": 0.00019863188751161544, + "loss": 2.7936, + "step": 1149 + }, + { + "epoch": 0.09280929707045436, + "grad_norm": 0.8912700414657593, + "learning_rate": 0.0001986292838338003, + "loss": 2.8745, + "step": 1150 + }, + { + "epoch": 0.09289000080703737, + "grad_norm": 0.8618904948234558, + "learning_rate": 0.00019862667769788553, + "loss": 2.8086, + "step": 1151 + }, + { + "epoch": 0.09297070454362037, + "grad_norm": 1.0013352632522583, + "learning_rate": 0.00019862406910393617, + "loss": 2.8211, + "step": 1152 + }, + { + "epoch": 0.09305140828020338, + "grad_norm": 0.7922475337982178, + "learning_rate": 0.0001986214580520172, + "loss": 2.7668, + "step": 1153 + }, + { + "epoch": 0.09313211201678638, + "grad_norm": 0.9490330815315247, + "learning_rate": 0.00019861884454219365, + "loss": 2.7571, + "step": 1154 + }, + { + "epoch": 0.09321281575336939, + "grad_norm": 0.8780270218849182, + "learning_rate": 0.00019861622857453076, + "loss": 2.7598, + "step": 1155 + }, + { + "epoch": 0.09329351948995239, + "grad_norm": 0.9220066070556641, + "learning_rate": 0.00019861361014909365, + "loss": 2.7609, + "step": 1156 + }, + { + "epoch": 0.0933742232265354, + "grad_norm": 0.8299020528793335, + "learning_rate": 0.0001986109892659476, + "loss": 2.8655, + "step": 1157 + }, + { + "epoch": 0.0934549269631184, + "grad_norm": 0.9700348377227783, + "learning_rate": 0.0001986083659251579, + "loss": 2.8597, + "step": 1158 + }, + { + "epoch": 0.0935356306997014, + "grad_norm": 0.8820784687995911, + "learning_rate": 0.00019860574012679001, + "loss": 2.8776, + "step": 1159 + }, + { + "epoch": 0.0936163344362844, + "grad_norm": 0.8134172558784485, + "learning_rate": 0.0001986031118709093, + "loss": 2.8163, + "step": 1160 + }, + { + "epoch": 0.0936970381728674, + "grad_norm": 0.885974109172821, + "learning_rate": 0.00019860048115758123, + "loss": 2.752, + "step": 1161 + }, + { + "epoch": 0.0937777419094504, + "grad_norm": 0.9650186896324158, + "learning_rate": 0.0001985978479868715, + "loss": 2.7587, + "step": 1162 + }, + { + "epoch": 0.0938584456460334, + "grad_norm": 0.8550445437431335, + "learning_rate": 0.00019859521235884563, + "loss": 2.7887, + "step": 1163 + }, + { + "epoch": 0.09393914938261641, + "grad_norm": 0.9686560034751892, + "learning_rate": 0.00019859257427356933, + "loss": 2.7974, + "step": 1164 + }, + { + "epoch": 0.09401985311919941, + "grad_norm": 0.9185387492179871, + "learning_rate": 0.00019858993373110837, + "loss": 2.7933, + "step": 1165 + }, + { + "epoch": 0.09410055685578242, + "grad_norm": 0.9549610018730164, + "learning_rate": 0.00019858729073152852, + "loss": 2.7698, + "step": 1166 + }, + { + "epoch": 0.09418126059236542, + "grad_norm": 1.0523492097854614, + "learning_rate": 0.0001985846452748957, + "loss": 2.7215, + "step": 1167 + }, + { + "epoch": 0.09426196432894843, + "grad_norm": 0.8551118969917297, + "learning_rate": 0.00019858199736127582, + "loss": 2.805, + "step": 1168 + }, + { + "epoch": 0.09434266806553143, + "grad_norm": 1.021374225616455, + "learning_rate": 0.0001985793469907349, + "loss": 2.794, + "step": 1169 + }, + { + "epoch": 0.09442337180211444, + "grad_norm": 0.8745501041412354, + "learning_rate": 0.0001985766941633389, + "loss": 2.7793, + "step": 1170 + }, + { + "epoch": 0.09450407553869744, + "grad_norm": 0.7426434755325317, + "learning_rate": 0.00019857403887915402, + "loss": 2.7808, + "step": 1171 + }, + { + "epoch": 0.09458477927528045, + "grad_norm": 0.9183726906776428, + "learning_rate": 0.0001985713811382464, + "loss": 2.8001, + "step": 1172 + }, + { + "epoch": 0.09466548301186345, + "grad_norm": 0.8136709928512573, + "learning_rate": 0.00019856872094068233, + "loss": 2.7394, + "step": 1173 + }, + { + "epoch": 0.09474618674844645, + "grad_norm": 0.9399348497390747, + "learning_rate": 0.00019856605828652807, + "loss": 2.7733, + "step": 1174 + }, + { + "epoch": 0.09482689048502946, + "grad_norm": 0.8233176469802856, + "learning_rate": 0.00019856339317584997, + "loss": 2.7672, + "step": 1175 + }, + { + "epoch": 0.09490759422161246, + "grad_norm": 0.9157048463821411, + "learning_rate": 0.00019856072560871447, + "loss": 2.7992, + "step": 1176 + }, + { + "epoch": 0.09498829795819547, + "grad_norm": 0.8729545474052429, + "learning_rate": 0.00019855805558518803, + "loss": 2.749, + "step": 1177 + }, + { + "epoch": 0.09506900169477847, + "grad_norm": 0.8592300415039062, + "learning_rate": 0.00019855538310533722, + "loss": 2.7257, + "step": 1178 + }, + { + "epoch": 0.09514970543136148, + "grad_norm": 0.8470803499221802, + "learning_rate": 0.00019855270816922867, + "loss": 2.7479, + "step": 1179 + }, + { + "epoch": 0.09523040916794448, + "grad_norm": 0.8538667559623718, + "learning_rate": 0.00019855003077692897, + "loss": 2.7576, + "step": 1180 + }, + { + "epoch": 0.09531111290452748, + "grad_norm": 0.8890984654426575, + "learning_rate": 0.0001985473509285049, + "loss": 2.7961, + "step": 1181 + }, + { + "epoch": 0.09539181664111049, + "grad_norm": 0.7769411206245422, + "learning_rate": 0.00019854466862402324, + "loss": 2.8087, + "step": 1182 + }, + { + "epoch": 0.09547252037769349, + "grad_norm": 0.8892520666122437, + "learning_rate": 0.00019854198386355085, + "loss": 2.7935, + "step": 1183 + }, + { + "epoch": 0.0955532241142765, + "grad_norm": 0.8675585389137268, + "learning_rate": 0.00019853929664715464, + "loss": 2.833, + "step": 1184 + }, + { + "epoch": 0.0956339278508595, + "grad_norm": 0.8053853511810303, + "learning_rate": 0.00019853660697490154, + "loss": 2.8002, + "step": 1185 + }, + { + "epoch": 0.09571463158744249, + "grad_norm": 0.9237198829650879, + "learning_rate": 0.00019853391484685865, + "loss": 2.8281, + "step": 1186 + }, + { + "epoch": 0.0957953353240255, + "grad_norm": 0.8432926535606384, + "learning_rate": 0.000198531220263093, + "loss": 2.8131, + "step": 1187 + }, + { + "epoch": 0.0958760390606085, + "grad_norm": 0.796380341053009, + "learning_rate": 0.0001985285232236718, + "loss": 2.753, + "step": 1188 + }, + { + "epoch": 0.0959567427971915, + "grad_norm": 0.9183037281036377, + "learning_rate": 0.00019852582372866225, + "loss": 2.7625, + "step": 1189 + }, + { + "epoch": 0.09603744653377451, + "grad_norm": 0.8194435238838196, + "learning_rate": 0.0001985231217781316, + "loss": 2.7906, + "step": 1190 + }, + { + "epoch": 0.09611815027035751, + "grad_norm": 0.8430871367454529, + "learning_rate": 0.00019852041737214725, + "loss": 2.8457, + "step": 1191 + }, + { + "epoch": 0.09619885400694052, + "grad_norm": 1.0237345695495605, + "learning_rate": 0.0001985177105107765, + "loss": 2.789, + "step": 1192 + }, + { + "epoch": 0.09627955774352352, + "grad_norm": 0.8721581101417542, + "learning_rate": 0.00019851500119408692, + "loss": 2.7187, + "step": 1193 + }, + { + "epoch": 0.09636026148010653, + "grad_norm": 0.8089142441749573, + "learning_rate": 0.00019851228942214603, + "loss": 2.7544, + "step": 1194 + }, + { + "epoch": 0.09644096521668953, + "grad_norm": 1.1076842546463013, + "learning_rate": 0.0001985095751950213, + "loss": 2.7859, + "step": 1195 + }, + { + "epoch": 0.09652166895327254, + "grad_norm": 0.84585040807724, + "learning_rate": 0.0001985068585127805, + "loss": 2.8005, + "step": 1196 + }, + { + "epoch": 0.09660237268985554, + "grad_norm": 0.8231167197227478, + "learning_rate": 0.00019850413937549127, + "loss": 2.8561, + "step": 1197 + }, + { + "epoch": 0.09668307642643854, + "grad_norm": 1.0028103590011597, + "learning_rate": 0.00019850141778322136, + "loss": 2.8049, + "step": 1198 + }, + { + "epoch": 0.09676378016302155, + "grad_norm": 0.8575148582458496, + "learning_rate": 0.0001984986937360387, + "loss": 2.7723, + "step": 1199 + }, + { + "epoch": 0.09684448389960455, + "grad_norm": 0.8567116260528564, + "learning_rate": 0.00019849596723401107, + "loss": 2.7418, + "step": 1200 + }, + { + "epoch": 0.09692518763618756, + "grad_norm": 1.1159218549728394, + "learning_rate": 0.00019849323827720645, + "loss": 2.8352, + "step": 1201 + }, + { + "epoch": 0.09700589137277056, + "grad_norm": 0.849656879901886, + "learning_rate": 0.0001984905068656929, + "loss": 2.7875, + "step": 1202 + }, + { + "epoch": 0.09708659510935357, + "grad_norm": 0.8479150533676147, + "learning_rate": 0.00019848777299953847, + "loss": 2.7828, + "step": 1203 + }, + { + "epoch": 0.09716729884593657, + "grad_norm": 0.9143954515457153, + "learning_rate": 0.00019848503667881125, + "loss": 2.7978, + "step": 1204 + }, + { + "epoch": 0.09724800258251957, + "grad_norm": 0.8162297010421753, + "learning_rate": 0.0001984822979035795, + "loss": 2.7621, + "step": 1205 + }, + { + "epoch": 0.09732870631910258, + "grad_norm": 0.8625509142875671, + "learning_rate": 0.00019847955667391144, + "loss": 2.7484, + "step": 1206 + }, + { + "epoch": 0.09740941005568558, + "grad_norm": 0.8485168218612671, + "learning_rate": 0.00019847681298987543, + "loss": 2.7599, + "step": 1207 + }, + { + "epoch": 0.09749011379226859, + "grad_norm": 0.8962678909301758, + "learning_rate": 0.00019847406685153976, + "loss": 2.7753, + "step": 1208 + }, + { + "epoch": 0.09757081752885159, + "grad_norm": 0.8890791535377502, + "learning_rate": 0.00019847131825897297, + "loss": 2.7635, + "step": 1209 + }, + { + "epoch": 0.0976515212654346, + "grad_norm": 0.8461710810661316, + "learning_rate": 0.00019846856721224355, + "loss": 2.796, + "step": 1210 + }, + { + "epoch": 0.0977322250020176, + "grad_norm": 0.912738025188446, + "learning_rate": 0.00019846581371141996, + "loss": 2.7889, + "step": 1211 + }, + { + "epoch": 0.09781292873860059, + "grad_norm": 0.8530749082565308, + "learning_rate": 0.00019846305775657097, + "loss": 2.8298, + "step": 1212 + }, + { + "epoch": 0.0978936324751836, + "grad_norm": 0.8890148401260376, + "learning_rate": 0.00019846029934776516, + "loss": 2.7491, + "step": 1213 + }, + { + "epoch": 0.0979743362117666, + "grad_norm": 0.8936887979507446, + "learning_rate": 0.0001984575384850713, + "loss": 2.7759, + "step": 1214 + }, + { + "epoch": 0.0980550399483496, + "grad_norm": 0.7811321020126343, + "learning_rate": 0.00019845477516855823, + "loss": 2.8126, + "step": 1215 + }, + { + "epoch": 0.09813574368493261, + "grad_norm": 0.8751768469810486, + "learning_rate": 0.00019845200939829484, + "loss": 2.792, + "step": 1216 + }, + { + "epoch": 0.09821644742151561, + "grad_norm": 0.8749501705169678, + "learning_rate": 0.00019844924117434998, + "loss": 2.7818, + "step": 1217 + }, + { + "epoch": 0.09829715115809862, + "grad_norm": 0.8130955100059509, + "learning_rate": 0.0001984464704967927, + "loss": 2.8581, + "step": 1218 + }, + { + "epoch": 0.09837785489468162, + "grad_norm": 0.8158220648765564, + "learning_rate": 0.00019844369736569196, + "loss": 2.7704, + "step": 1219 + }, + { + "epoch": 0.09845855863126463, + "grad_norm": 0.9351849555969238, + "learning_rate": 0.00019844092178111702, + "loss": 2.7857, + "step": 1220 + }, + { + "epoch": 0.09853926236784763, + "grad_norm": 0.8373914957046509, + "learning_rate": 0.00019843814374313697, + "loss": 2.8217, + "step": 1221 + }, + { + "epoch": 0.09861996610443063, + "grad_norm": 0.8919960856437683, + "learning_rate": 0.00019843536325182104, + "loss": 2.7914, + "step": 1222 + }, + { + "epoch": 0.09870066984101364, + "grad_norm": 0.9994316697120667, + "learning_rate": 0.00019843258030723858, + "loss": 2.7981, + "step": 1223 + }, + { + "epoch": 0.09878137357759664, + "grad_norm": 0.8144915699958801, + "learning_rate": 0.0001984297949094589, + "loss": 2.811, + "step": 1224 + }, + { + "epoch": 0.09886207731417965, + "grad_norm": 0.8957876563072205, + "learning_rate": 0.0001984270070585514, + "loss": 2.7752, + "step": 1225 + }, + { + "epoch": 0.09894278105076265, + "grad_norm": 0.9426520466804504, + "learning_rate": 0.0001984242167545856, + "loss": 2.8139, + "step": 1226 + }, + { + "epoch": 0.09902348478734566, + "grad_norm": 0.888769268989563, + "learning_rate": 0.00019842142399763106, + "loss": 2.8305, + "step": 1227 + }, + { + "epoch": 0.09910418852392866, + "grad_norm": 0.9497748613357544, + "learning_rate": 0.00019841862878775736, + "loss": 2.748, + "step": 1228 + }, + { + "epoch": 0.09918489226051166, + "grad_norm": 0.8715065717697144, + "learning_rate": 0.00019841583112503416, + "loss": 2.7794, + "step": 1229 + }, + { + "epoch": 0.09926559599709467, + "grad_norm": 0.875599205493927, + "learning_rate": 0.00019841303100953116, + "loss": 2.8016, + "step": 1230 + }, + { + "epoch": 0.09934629973367767, + "grad_norm": 0.8631919622421265, + "learning_rate": 0.0001984102284413182, + "loss": 2.8239, + "step": 1231 + }, + { + "epoch": 0.09942700347026068, + "grad_norm": 0.9028074741363525, + "learning_rate": 0.0001984074234204651, + "loss": 2.8372, + "step": 1232 + }, + { + "epoch": 0.09950770720684368, + "grad_norm": 0.890933096408844, + "learning_rate": 0.00019840461594704175, + "loss": 2.799, + "step": 1233 + }, + { + "epoch": 0.09958841094342669, + "grad_norm": 0.9626480340957642, + "learning_rate": 0.00019840180602111816, + "loss": 2.8207, + "step": 1234 + }, + { + "epoch": 0.09966911468000969, + "grad_norm": 0.798394501209259, + "learning_rate": 0.00019839899364276433, + "loss": 2.7784, + "step": 1235 + }, + { + "epoch": 0.0997498184165927, + "grad_norm": 0.8246447443962097, + "learning_rate": 0.00019839617881205036, + "loss": 2.8193, + "step": 1236 + }, + { + "epoch": 0.09983052215317569, + "grad_norm": 0.8315989375114441, + "learning_rate": 0.0001983933615290464, + "loss": 2.8036, + "step": 1237 + }, + { + "epoch": 0.09991122588975869, + "grad_norm": 0.8889075517654419, + "learning_rate": 0.00019839054179382267, + "loss": 2.7606, + "step": 1238 + }, + { + "epoch": 0.0999919296263417, + "grad_norm": 0.7558645009994507, + "learning_rate": 0.00019838771960644942, + "loss": 2.7666, + "step": 1239 + }, + { + "epoch": 0.1000726333629247, + "grad_norm": 0.8876601457595825, + "learning_rate": 0.00019838489496699704, + "loss": 2.8778, + "step": 1240 + }, + { + "epoch": 0.1001533370995077, + "grad_norm": 0.8609516620635986, + "learning_rate": 0.00019838206787553588, + "loss": 2.8189, + "step": 1241 + }, + { + "epoch": 0.10023404083609071, + "grad_norm": 0.8521148562431335, + "learning_rate": 0.00019837923833213644, + "loss": 2.8159, + "step": 1242 + }, + { + "epoch": 0.10031474457267371, + "grad_norm": 0.9155359268188477, + "learning_rate": 0.0001983764063368692, + "loss": 2.8351, + "step": 1243 + }, + { + "epoch": 0.10039544830925672, + "grad_norm": 0.8595378398895264, + "learning_rate": 0.00019837357188980475, + "loss": 2.8447, + "step": 1244 + }, + { + "epoch": 0.10047615204583972, + "grad_norm": 0.900244951248169, + "learning_rate": 0.00019837073499101373, + "loss": 2.8646, + "step": 1245 + }, + { + "epoch": 0.10055685578242272, + "grad_norm": 0.8404260277748108, + "learning_rate": 0.00019836789564056689, + "loss": 2.7824, + "step": 1246 + }, + { + "epoch": 0.10063755951900573, + "grad_norm": 0.8776196241378784, + "learning_rate": 0.0001983650538385349, + "loss": 2.8045, + "step": 1247 + }, + { + "epoch": 0.10071826325558873, + "grad_norm": 0.8889327049255371, + "learning_rate": 0.00019836220958498868, + "loss": 2.7967, + "step": 1248 + }, + { + "epoch": 0.10079896699217174, + "grad_norm": 0.8905191421508789, + "learning_rate": 0.00019835936287999906, + "loss": 2.8167, + "step": 1249 + }, + { + "epoch": 0.10087967072875474, + "grad_norm": 0.839970052242279, + "learning_rate": 0.000198356513723637, + "loss": 2.8643, + "step": 1250 + }, + { + "epoch": 0.10096037446533775, + "grad_norm": 0.7989531755447388, + "learning_rate": 0.00019835366211597353, + "loss": 2.8493, + "step": 1251 + }, + { + "epoch": 0.10104107820192075, + "grad_norm": 0.7960095405578613, + "learning_rate": 0.0001983508080570797, + "loss": 2.7377, + "step": 1252 + }, + { + "epoch": 0.10112178193850375, + "grad_norm": 0.7989903092384338, + "learning_rate": 0.00019834795154702661, + "loss": 2.7409, + "step": 1253 + }, + { + "epoch": 0.10120248567508676, + "grad_norm": 0.8557813167572021, + "learning_rate": 0.0001983450925858855, + "loss": 2.7945, + "step": 1254 + }, + { + "epoch": 0.10128318941166976, + "grad_norm": 0.948357880115509, + "learning_rate": 0.0001983422311737276, + "loss": 2.826, + "step": 1255 + }, + { + "epoch": 0.10136389314825277, + "grad_norm": 0.8356020450592041, + "learning_rate": 0.00019833936731062423, + "loss": 2.8157, + "step": 1256 + }, + { + "epoch": 0.10144459688483577, + "grad_norm": 0.8199872970581055, + "learning_rate": 0.00019833650099664678, + "loss": 2.7273, + "step": 1257 + }, + { + "epoch": 0.10152530062141878, + "grad_norm": 0.8178466558456421, + "learning_rate": 0.00019833363223186669, + "loss": 2.7513, + "step": 1258 + }, + { + "epoch": 0.10160600435800178, + "grad_norm": 0.8165889978408813, + "learning_rate": 0.00019833076101635538, + "loss": 2.7689, + "step": 1259 + }, + { + "epoch": 0.10168670809458479, + "grad_norm": 0.8240275979042053, + "learning_rate": 0.0001983278873501845, + "loss": 2.7477, + "step": 1260 + }, + { + "epoch": 0.10176741183116779, + "grad_norm": 0.8470584750175476, + "learning_rate": 0.00019832501123342563, + "loss": 2.7414, + "step": 1261 + }, + { + "epoch": 0.1018481155677508, + "grad_norm": 0.819063663482666, + "learning_rate": 0.00019832213266615046, + "loss": 2.7335, + "step": 1262 + }, + { + "epoch": 0.10192881930433378, + "grad_norm": 0.8045673370361328, + "learning_rate": 0.00019831925164843071, + "loss": 2.8141, + "step": 1263 + }, + { + "epoch": 0.10200952304091679, + "grad_norm": 0.7827214598655701, + "learning_rate": 0.00019831636818033824, + "loss": 2.7549, + "step": 1264 + }, + { + "epoch": 0.10209022677749979, + "grad_norm": 0.9596436619758606, + "learning_rate": 0.00019831348226194485, + "loss": 2.7327, + "step": 1265 + }, + { + "epoch": 0.1021709305140828, + "grad_norm": 0.826909601688385, + "learning_rate": 0.0001983105938933225, + "loss": 2.7166, + "step": 1266 + }, + { + "epoch": 0.1022516342506658, + "grad_norm": 0.8060985207557678, + "learning_rate": 0.00019830770307454313, + "loss": 2.7514, + "step": 1267 + }, + { + "epoch": 0.1023323379872488, + "grad_norm": 0.8257390856742859, + "learning_rate": 0.00019830480980567887, + "loss": 2.77, + "step": 1268 + }, + { + "epoch": 0.10241304172383181, + "grad_norm": 0.844406008720398, + "learning_rate": 0.00019830191408680173, + "loss": 2.8548, + "step": 1269 + }, + { + "epoch": 0.10249374546041481, + "grad_norm": 0.84171462059021, + "learning_rate": 0.00019829901591798398, + "loss": 2.7404, + "step": 1270 + }, + { + "epoch": 0.10257444919699782, + "grad_norm": 0.8084118962287903, + "learning_rate": 0.00019829611529929774, + "loss": 2.8078, + "step": 1271 + }, + { + "epoch": 0.10265515293358082, + "grad_norm": 0.8273561000823975, + "learning_rate": 0.00019829321223081538, + "loss": 2.787, + "step": 1272 + }, + { + "epoch": 0.10273585667016383, + "grad_norm": 0.799098551273346, + "learning_rate": 0.00019829030671260925, + "loss": 2.7563, + "step": 1273 + }, + { + "epoch": 0.10281656040674683, + "grad_norm": 0.885866105556488, + "learning_rate": 0.00019828739874475172, + "loss": 2.7313, + "step": 1274 + }, + { + "epoch": 0.10289726414332984, + "grad_norm": 0.7702760696411133, + "learning_rate": 0.00019828448832731529, + "loss": 2.7919, + "step": 1275 + }, + { + "epoch": 0.10297796787991284, + "grad_norm": 0.7577444911003113, + "learning_rate": 0.0001982815754603725, + "loss": 2.7149, + "step": 1276 + }, + { + "epoch": 0.10305867161649584, + "grad_norm": 0.8439713716506958, + "learning_rate": 0.00019827866014399592, + "loss": 2.7881, + "step": 1277 + }, + { + "epoch": 0.10313937535307885, + "grad_norm": 0.8504937291145325, + "learning_rate": 0.00019827574237825827, + "loss": 2.7611, + "step": 1278 + }, + { + "epoch": 0.10322007908966185, + "grad_norm": 0.7775665521621704, + "learning_rate": 0.00019827282216323218, + "loss": 2.7312, + "step": 1279 + }, + { + "epoch": 0.10330078282624486, + "grad_norm": 0.8671591281890869, + "learning_rate": 0.00019826989949899048, + "loss": 2.836, + "step": 1280 + }, + { + "epoch": 0.10338148656282786, + "grad_norm": 0.9308713674545288, + "learning_rate": 0.00019826697438560603, + "loss": 2.7494, + "step": 1281 + }, + { + "epoch": 0.10346219029941087, + "grad_norm": 0.9145268797874451, + "learning_rate": 0.0001982640468231517, + "loss": 2.8054, + "step": 1282 + }, + { + "epoch": 0.10354289403599387, + "grad_norm": 0.8150805234909058, + "learning_rate": 0.00019826111681170043, + "loss": 2.7879, + "step": 1283 + }, + { + "epoch": 0.10362359777257688, + "grad_norm": 0.8576685786247253, + "learning_rate": 0.00019825818435132531, + "loss": 2.8184, + "step": 1284 + }, + { + "epoch": 0.10370430150915988, + "grad_norm": 0.8838599920272827, + "learning_rate": 0.00019825524944209937, + "loss": 2.7838, + "step": 1285 + }, + { + "epoch": 0.10378500524574288, + "grad_norm": 0.9119304418563843, + "learning_rate": 0.00019825231208409576, + "loss": 2.8392, + "step": 1286 + }, + { + "epoch": 0.10386570898232589, + "grad_norm": 0.8112398982048035, + "learning_rate": 0.00019824937227738771, + "loss": 2.7844, + "step": 1287 + }, + { + "epoch": 0.10394641271890888, + "grad_norm": 0.8714308738708496, + "learning_rate": 0.00019824643002204847, + "loss": 2.7765, + "step": 1288 + }, + { + "epoch": 0.10402711645549188, + "grad_norm": 0.8733358979225159, + "learning_rate": 0.00019824348531815138, + "loss": 2.771, + "step": 1289 + }, + { + "epoch": 0.10410782019207489, + "grad_norm": 0.8218281269073486, + "learning_rate": 0.00019824053816576981, + "loss": 2.8099, + "step": 1290 + }, + { + "epoch": 0.10418852392865789, + "grad_norm": 0.8647308945655823, + "learning_rate": 0.00019823758856497725, + "loss": 2.7738, + "step": 1291 + }, + { + "epoch": 0.1042692276652409, + "grad_norm": 0.8358582854270935, + "learning_rate": 0.00019823463651584718, + "loss": 2.8021, + "step": 1292 + }, + { + "epoch": 0.1043499314018239, + "grad_norm": 0.7943673133850098, + "learning_rate": 0.00019823168201845318, + "loss": 2.8293, + "step": 1293 + }, + { + "epoch": 0.1044306351384069, + "grad_norm": 0.8501425981521606, + "learning_rate": 0.0001982287250728689, + "loss": 2.7701, + "step": 1294 + }, + { + "epoch": 0.10451133887498991, + "grad_norm": 0.8503665328025818, + "learning_rate": 0.00019822576567916797, + "loss": 2.7881, + "step": 1295 + }, + { + "epoch": 0.10459204261157291, + "grad_norm": 0.9687628149986267, + "learning_rate": 0.0001982228038374242, + "loss": 2.7623, + "step": 1296 + }, + { + "epoch": 0.10467274634815592, + "grad_norm": 0.8034376502037048, + "learning_rate": 0.00019821983954771146, + "loss": 2.8072, + "step": 1297 + }, + { + "epoch": 0.10475345008473892, + "grad_norm": 0.817135214805603, + "learning_rate": 0.00019821687281010352, + "loss": 2.7572, + "step": 1298 + }, + { + "epoch": 0.10483415382132193, + "grad_norm": 0.7961457371711731, + "learning_rate": 0.0001982139036246744, + "loss": 2.8405, + "step": 1299 + }, + { + "epoch": 0.10491485755790493, + "grad_norm": 0.7572407722473145, + "learning_rate": 0.00019821093199149804, + "loss": 2.7495, + "step": 1300 + }, + { + "epoch": 0.10499556129448794, + "grad_norm": 0.7990664839744568, + "learning_rate": 0.00019820795791064856, + "loss": 2.7567, + "step": 1301 + }, + { + "epoch": 0.10507626503107094, + "grad_norm": 0.8197236061096191, + "learning_rate": 0.0001982049813822, + "loss": 2.7807, + "step": 1302 + }, + { + "epoch": 0.10515696876765394, + "grad_norm": 0.9491304159164429, + "learning_rate": 0.00019820200240622664, + "loss": 2.8531, + "step": 1303 + }, + { + "epoch": 0.10523767250423695, + "grad_norm": 0.8143845200538635, + "learning_rate": 0.00019819902098280268, + "loss": 2.7542, + "step": 1304 + }, + { + "epoch": 0.10531837624081995, + "grad_norm": 0.9055941104888916, + "learning_rate": 0.0001981960371120024, + "loss": 2.863, + "step": 1305 + }, + { + "epoch": 0.10539907997740296, + "grad_norm": 0.7804721593856812, + "learning_rate": 0.0001981930507939002, + "loss": 2.8213, + "step": 1306 + }, + { + "epoch": 0.10547978371398596, + "grad_norm": 0.8375318050384521, + "learning_rate": 0.00019819006202857046, + "loss": 2.8222, + "step": 1307 + }, + { + "epoch": 0.10556048745056897, + "grad_norm": 0.9145569801330566, + "learning_rate": 0.00019818707081608773, + "loss": 2.805, + "step": 1308 + }, + { + "epoch": 0.10564119118715197, + "grad_norm": 0.7899324893951416, + "learning_rate": 0.00019818407715652654, + "loss": 2.8246, + "step": 1309 + }, + { + "epoch": 0.10572189492373497, + "grad_norm": 0.7843480110168457, + "learning_rate": 0.0001981810810499615, + "loss": 2.7909, + "step": 1310 + }, + { + "epoch": 0.10580259866031798, + "grad_norm": 0.8071008920669556, + "learning_rate": 0.00019817808249646723, + "loss": 2.7434, + "step": 1311 + }, + { + "epoch": 0.10588330239690098, + "grad_norm": 0.8682011961936951, + "learning_rate": 0.0001981750814961185, + "loss": 2.8387, + "step": 1312 + }, + { + "epoch": 0.10596400613348399, + "grad_norm": 0.7501091361045837, + "learning_rate": 0.0001981720780489902, + "loss": 2.7633, + "step": 1313 + }, + { + "epoch": 0.10604470987006698, + "grad_norm": 0.9259567856788635, + "learning_rate": 0.000198169072155157, + "loss": 2.8309, + "step": 1314 + }, + { + "epoch": 0.10612541360664998, + "grad_norm": 0.8018674254417419, + "learning_rate": 0.00019816606381469393, + "loss": 2.8647, + "step": 1315 + }, + { + "epoch": 0.10620611734323299, + "grad_norm": 0.8218088746070862, + "learning_rate": 0.00019816305302767595, + "loss": 2.823, + "step": 1316 + }, + { + "epoch": 0.10628682107981599, + "grad_norm": 0.812125027179718, + "learning_rate": 0.00019816003979417808, + "loss": 2.7216, + "step": 1317 + }, + { + "epoch": 0.106367524816399, + "grad_norm": 0.787407636642456, + "learning_rate": 0.0001981570241142754, + "loss": 2.7639, + "step": 1318 + }, + { + "epoch": 0.106448228552982, + "grad_norm": 0.7982528805732727, + "learning_rate": 0.00019815400598804312, + "loss": 2.8597, + "step": 1319 + }, + { + "epoch": 0.106528932289565, + "grad_norm": 0.8490404486656189, + "learning_rate": 0.00019815098541555646, + "loss": 2.7947, + "step": 1320 + }, + { + "epoch": 0.10660963602614801, + "grad_norm": 0.8743172883987427, + "learning_rate": 0.00019814796239689064, + "loss": 2.8674, + "step": 1321 + }, + { + "epoch": 0.10669033976273101, + "grad_norm": 0.8338125348091125, + "learning_rate": 0.00019814493693212106, + "loss": 2.781, + "step": 1322 + }, + { + "epoch": 0.10677104349931402, + "grad_norm": 0.871516764163971, + "learning_rate": 0.00019814190902132307, + "loss": 2.8742, + "step": 1323 + }, + { + "epoch": 0.10685174723589702, + "grad_norm": 0.8935555815696716, + "learning_rate": 0.00019813887866457216, + "loss": 2.7991, + "step": 1324 + }, + { + "epoch": 0.10693245097248003, + "grad_norm": 0.840067446231842, + "learning_rate": 0.00019813584586194388, + "loss": 2.7922, + "step": 1325 + }, + { + "epoch": 0.10701315470906303, + "grad_norm": 0.7919262647628784, + "learning_rate": 0.0001981328106135138, + "loss": 2.7912, + "step": 1326 + }, + { + "epoch": 0.10709385844564603, + "grad_norm": 0.7974550127983093, + "learning_rate": 0.00019812977291935752, + "loss": 2.8497, + "step": 1327 + }, + { + "epoch": 0.10717456218222904, + "grad_norm": 0.9126157164573669, + "learning_rate": 0.00019812673277955082, + "loss": 2.7698, + "step": 1328 + }, + { + "epoch": 0.10725526591881204, + "grad_norm": 0.8329752683639526, + "learning_rate": 0.0001981236901941694, + "loss": 2.8366, + "step": 1329 + }, + { + "epoch": 0.10733596965539505, + "grad_norm": 0.8313524127006531, + "learning_rate": 0.00019812064516328915, + "loss": 2.6863, + "step": 1330 + }, + { + "epoch": 0.10741667339197805, + "grad_norm": 0.8917783498764038, + "learning_rate": 0.0001981175976869859, + "loss": 2.7817, + "step": 1331 + }, + { + "epoch": 0.10749737712856106, + "grad_norm": 0.8370450735092163, + "learning_rate": 0.00019811454776533566, + "loss": 2.837, + "step": 1332 + }, + { + "epoch": 0.10757808086514406, + "grad_norm": 0.8415676355361938, + "learning_rate": 0.00019811149539841443, + "loss": 2.7399, + "step": 1333 + }, + { + "epoch": 0.10765878460172706, + "grad_norm": 0.8576632142066956, + "learning_rate": 0.00019810844058629825, + "loss": 2.7747, + "step": 1334 + }, + { + "epoch": 0.10773948833831007, + "grad_norm": 0.8943549394607544, + "learning_rate": 0.00019810538332906328, + "loss": 2.7368, + "step": 1335 + }, + { + "epoch": 0.10782019207489307, + "grad_norm": 0.8878718018531799, + "learning_rate": 0.00019810232362678568, + "loss": 2.7907, + "step": 1336 + }, + { + "epoch": 0.10790089581147608, + "grad_norm": 0.8131409287452698, + "learning_rate": 0.00019809926147954174, + "loss": 2.7782, + "step": 1337 + }, + { + "epoch": 0.10798159954805908, + "grad_norm": 0.8733747005462646, + "learning_rate": 0.0001980961968874078, + "loss": 2.8552, + "step": 1338 + }, + { + "epoch": 0.10806230328464207, + "grad_norm": 0.8997320532798767, + "learning_rate": 0.0001980931298504602, + "loss": 2.8452, + "step": 1339 + }, + { + "epoch": 0.10814300702122508, + "grad_norm": 0.8400282263755798, + "learning_rate": 0.00019809006036877538, + "loss": 2.786, + "step": 1340 + }, + { + "epoch": 0.10822371075780808, + "grad_norm": 0.8173925280570984, + "learning_rate": 0.00019808698844242983, + "loss": 2.8363, + "step": 1341 + }, + { + "epoch": 0.10830441449439109, + "grad_norm": 0.872278094291687, + "learning_rate": 0.00019808391407150015, + "loss": 2.7789, + "step": 1342 + }, + { + "epoch": 0.10838511823097409, + "grad_norm": 0.8939952254295349, + "learning_rate": 0.00019808083725606293, + "loss": 2.7453, + "step": 1343 + }, + { + "epoch": 0.1084658219675571, + "grad_norm": 0.8351218104362488, + "learning_rate": 0.00019807775799619484, + "loss": 2.8004, + "step": 1344 + }, + { + "epoch": 0.1085465257041401, + "grad_norm": 0.8381102681159973, + "learning_rate": 0.00019807467629197266, + "loss": 2.8155, + "step": 1345 + }, + { + "epoch": 0.1086272294407231, + "grad_norm": 0.869458019733429, + "learning_rate": 0.00019807159214347317, + "loss": 2.8219, + "step": 1346 + }, + { + "epoch": 0.10870793317730611, + "grad_norm": 0.8251017928123474, + "learning_rate": 0.00019806850555077326, + "loss": 2.7978, + "step": 1347 + }, + { + "epoch": 0.10878863691388911, + "grad_norm": 0.8056492209434509, + "learning_rate": 0.0001980654165139498, + "loss": 2.7994, + "step": 1348 + }, + { + "epoch": 0.10886934065047212, + "grad_norm": 0.9566174745559692, + "learning_rate": 0.00019806232503307984, + "loss": 2.794, + "step": 1349 + }, + { + "epoch": 0.10895004438705512, + "grad_norm": 0.7891408801078796, + "learning_rate": 0.0001980592311082404, + "loss": 2.7134, + "step": 1350 + }, + { + "epoch": 0.10903074812363812, + "grad_norm": 0.8894741535186768, + "learning_rate": 0.00019805613473950862, + "loss": 2.7829, + "step": 1351 + }, + { + "epoch": 0.10911145186022113, + "grad_norm": 0.893086850643158, + "learning_rate": 0.0001980530359269616, + "loss": 2.7475, + "step": 1352 + }, + { + "epoch": 0.10919215559680413, + "grad_norm": 0.8758537173271179, + "learning_rate": 0.00019804993467067666, + "loss": 2.8715, + "step": 1353 + }, + { + "epoch": 0.10927285933338714, + "grad_norm": 0.9304648041725159, + "learning_rate": 0.00019804683097073098, + "loss": 2.8051, + "step": 1354 + }, + { + "epoch": 0.10935356306997014, + "grad_norm": 0.8465876579284668, + "learning_rate": 0.00019804372482720202, + "loss": 2.7879, + "step": 1355 + }, + { + "epoch": 0.10943426680655315, + "grad_norm": 0.8485612273216248, + "learning_rate": 0.00019804061624016713, + "loss": 2.7783, + "step": 1356 + }, + { + "epoch": 0.10951497054313615, + "grad_norm": 0.835630476474762, + "learning_rate": 0.0001980375052097038, + "loss": 2.8116, + "step": 1357 + }, + { + "epoch": 0.10959567427971915, + "grad_norm": 0.8404836058616638, + "learning_rate": 0.00019803439173588956, + "loss": 2.8257, + "step": 1358 + }, + { + "epoch": 0.10967637801630216, + "grad_norm": 0.8048505783081055, + "learning_rate": 0.00019803127581880206, + "loss": 2.7762, + "step": 1359 + }, + { + "epoch": 0.10975708175288516, + "grad_norm": 0.8481776118278503, + "learning_rate": 0.00019802815745851885, + "loss": 2.8243, + "step": 1360 + }, + { + "epoch": 0.10983778548946817, + "grad_norm": 0.8565996885299683, + "learning_rate": 0.00019802503665511775, + "loss": 2.7958, + "step": 1361 + }, + { + "epoch": 0.10991848922605117, + "grad_norm": 0.8867515921592712, + "learning_rate": 0.0001980219134086765, + "loss": 2.7973, + "step": 1362 + }, + { + "epoch": 0.10999919296263418, + "grad_norm": 0.8459765911102295, + "learning_rate": 0.0001980187877192729, + "loss": 2.848, + "step": 1363 + }, + { + "epoch": 0.11007989669921718, + "grad_norm": 0.7929832339286804, + "learning_rate": 0.0001980156595869849, + "loss": 2.8583, + "step": 1364 + }, + { + "epoch": 0.11016060043580017, + "grad_norm": 0.8475651741027832, + "learning_rate": 0.00019801252901189043, + "loss": 2.8436, + "step": 1365 + }, + { + "epoch": 0.11024130417238318, + "grad_norm": 0.8545576333999634, + "learning_rate": 0.00019800939599406755, + "loss": 2.7457, + "step": 1366 + }, + { + "epoch": 0.11032200790896618, + "grad_norm": 1.0093715190887451, + "learning_rate": 0.00019800626053359435, + "loss": 2.8198, + "step": 1367 + }, + { + "epoch": 0.11040271164554918, + "grad_norm": 0.8728145956993103, + "learning_rate": 0.0001980031226305489, + "loss": 2.7794, + "step": 1368 + }, + { + "epoch": 0.11048341538213219, + "grad_norm": 0.8538581728935242, + "learning_rate": 0.00019799998228500946, + "loss": 2.8018, + "step": 1369 + }, + { + "epoch": 0.11056411911871519, + "grad_norm": 0.9452785849571228, + "learning_rate": 0.00019799683949705432, + "loss": 2.8173, + "step": 1370 + }, + { + "epoch": 0.1106448228552982, + "grad_norm": 0.806508481502533, + "learning_rate": 0.00019799369426676174, + "loss": 2.8192, + "step": 1371 + }, + { + "epoch": 0.1107255265918812, + "grad_norm": 0.8952856063842773, + "learning_rate": 0.00019799054659421018, + "loss": 2.8072, + "step": 1372 + }, + { + "epoch": 0.1108062303284642, + "grad_norm": 0.8863561749458313, + "learning_rate": 0.00019798739647947802, + "loss": 2.7836, + "step": 1373 + }, + { + "epoch": 0.11088693406504721, + "grad_norm": 0.8544357419013977, + "learning_rate": 0.00019798424392264378, + "loss": 2.7714, + "step": 1374 + }, + { + "epoch": 0.11096763780163021, + "grad_norm": 0.807546854019165, + "learning_rate": 0.00019798108892378607, + "loss": 2.7635, + "step": 1375 + }, + { + "epoch": 0.11104834153821322, + "grad_norm": 0.8198233246803284, + "learning_rate": 0.0001979779314829835, + "loss": 2.8253, + "step": 1376 + }, + { + "epoch": 0.11112904527479622, + "grad_norm": 0.9268671870231628, + "learning_rate": 0.00019797477160031477, + "loss": 2.8007, + "step": 1377 + }, + { + "epoch": 0.11120974901137923, + "grad_norm": 0.8547680974006653, + "learning_rate": 0.0001979716092758586, + "loss": 2.7749, + "step": 1378 + }, + { + "epoch": 0.11129045274796223, + "grad_norm": 0.8052394390106201, + "learning_rate": 0.00019796844450969384, + "loss": 2.763, + "step": 1379 + }, + { + "epoch": 0.11137115648454524, + "grad_norm": 0.8291144371032715, + "learning_rate": 0.00019796527730189936, + "loss": 2.8053, + "step": 1380 + }, + { + "epoch": 0.11145186022112824, + "grad_norm": 0.8114006519317627, + "learning_rate": 0.00019796210765255404, + "loss": 2.8047, + "step": 1381 + }, + { + "epoch": 0.11153256395771124, + "grad_norm": 0.9326293468475342, + "learning_rate": 0.00019795893556173697, + "loss": 2.8199, + "step": 1382 + }, + { + "epoch": 0.11161326769429425, + "grad_norm": 0.7702555656433105, + "learning_rate": 0.00019795576102952714, + "loss": 2.7909, + "step": 1383 + }, + { + "epoch": 0.11169397143087725, + "grad_norm": 0.8115492463111877, + "learning_rate": 0.0001979525840560037, + "loss": 2.748, + "step": 1384 + }, + { + "epoch": 0.11177467516746026, + "grad_norm": 0.8926187753677368, + "learning_rate": 0.0001979494046412458, + "loss": 2.7791, + "step": 1385 + }, + { + "epoch": 0.11185537890404326, + "grad_norm": 0.8549754023551941, + "learning_rate": 0.0001979462227853327, + "loss": 2.7989, + "step": 1386 + }, + { + "epoch": 0.11193608264062627, + "grad_norm": 0.8625262975692749, + "learning_rate": 0.0001979430384883437, + "loss": 2.7202, + "step": 1387 + }, + { + "epoch": 0.11201678637720927, + "grad_norm": 0.8134698867797852, + "learning_rate": 0.00019793985175035813, + "loss": 2.8008, + "step": 1388 + }, + { + "epoch": 0.11209749011379228, + "grad_norm": 0.8546617031097412, + "learning_rate": 0.00019793666257145547, + "loss": 2.8076, + "step": 1389 + }, + { + "epoch": 0.11217819385037527, + "grad_norm": 0.8003748059272766, + "learning_rate": 0.00019793347095171514, + "loss": 2.826, + "step": 1390 + }, + { + "epoch": 0.11225889758695827, + "grad_norm": 0.8116614818572998, + "learning_rate": 0.00019793027689121674, + "loss": 2.7096, + "step": 1391 + }, + { + "epoch": 0.11233960132354127, + "grad_norm": 0.7785829901695251, + "learning_rate": 0.00019792708039003984, + "loss": 2.748, + "step": 1392 + }, + { + "epoch": 0.11242030506012428, + "grad_norm": 0.7999277710914612, + "learning_rate": 0.0001979238814482641, + "loss": 2.7671, + "step": 1393 + }, + { + "epoch": 0.11250100879670728, + "grad_norm": 0.8862190842628479, + "learning_rate": 0.00019792068006596925, + "loss": 2.8484, + "step": 1394 + }, + { + "epoch": 0.11258171253329029, + "grad_norm": 0.8747627139091492, + "learning_rate": 0.00019791747624323512, + "loss": 2.7477, + "step": 1395 + }, + { + "epoch": 0.11266241626987329, + "grad_norm": 0.8280831575393677, + "learning_rate": 0.0001979142699801415, + "loss": 2.87, + "step": 1396 + }, + { + "epoch": 0.1127431200064563, + "grad_norm": 0.8069074153900146, + "learning_rate": 0.00019791106127676832, + "loss": 2.7724, + "step": 1397 + }, + { + "epoch": 0.1128238237430393, + "grad_norm": 0.8253301382064819, + "learning_rate": 0.00019790785013319557, + "loss": 2.7351, + "step": 1398 + }, + { + "epoch": 0.1129045274796223, + "grad_norm": 0.8298853635787964, + "learning_rate": 0.00019790463654950323, + "loss": 2.7709, + "step": 1399 + }, + { + "epoch": 0.11298523121620531, + "grad_norm": 0.7796407341957092, + "learning_rate": 0.0001979014205257715, + "loss": 2.7766, + "step": 1400 + }, + { + "epoch": 0.11306593495278831, + "grad_norm": 0.8922166228294373, + "learning_rate": 0.00019789820206208037, + "loss": 2.8473, + "step": 1401 + }, + { + "epoch": 0.11314663868937132, + "grad_norm": 0.7763219475746155, + "learning_rate": 0.00019789498115851015, + "loss": 2.8629, + "step": 1402 + }, + { + "epoch": 0.11322734242595432, + "grad_norm": 0.8679928779602051, + "learning_rate": 0.0001978917578151411, + "loss": 2.8017, + "step": 1403 + }, + { + "epoch": 0.11330804616253733, + "grad_norm": 0.8491933941841125, + "learning_rate": 0.00019788853203205357, + "loss": 2.7156, + "step": 1404 + }, + { + "epoch": 0.11338874989912033, + "grad_norm": 0.8271194696426392, + "learning_rate": 0.00019788530380932792, + "loss": 2.7892, + "step": 1405 + }, + { + "epoch": 0.11346945363570334, + "grad_norm": 0.9224163293838501, + "learning_rate": 0.00019788207314704463, + "loss": 2.7824, + "step": 1406 + }, + { + "epoch": 0.11355015737228634, + "grad_norm": 0.7662777900695801, + "learning_rate": 0.00019787884004528422, + "loss": 2.7364, + "step": 1407 + }, + { + "epoch": 0.11363086110886934, + "grad_norm": 0.8750362396240234, + "learning_rate": 0.00019787560450412728, + "loss": 2.7546, + "step": 1408 + }, + { + "epoch": 0.11371156484545235, + "grad_norm": 0.9158821105957031, + "learning_rate": 0.0001978723665236544, + "loss": 2.8304, + "step": 1409 + }, + { + "epoch": 0.11379226858203535, + "grad_norm": 0.8291050791740417, + "learning_rate": 0.0001978691261039463, + "loss": 2.758, + "step": 1410 + }, + { + "epoch": 0.11387297231861836, + "grad_norm": 0.801886796951294, + "learning_rate": 0.00019786588324508374, + "loss": 2.7805, + "step": 1411 + }, + { + "epoch": 0.11395367605520136, + "grad_norm": 0.8140222430229187, + "learning_rate": 0.00019786263794714757, + "loss": 2.8155, + "step": 1412 + }, + { + "epoch": 0.11403437979178437, + "grad_norm": 0.7747580409049988, + "learning_rate": 0.00019785939021021865, + "loss": 2.778, + "step": 1413 + }, + { + "epoch": 0.11411508352836737, + "grad_norm": 0.8954138159751892, + "learning_rate": 0.0001978561400343779, + "loss": 2.7756, + "step": 1414 + }, + { + "epoch": 0.11419578726495037, + "grad_norm": 0.9038921594619751, + "learning_rate": 0.00019785288741970634, + "loss": 2.7181, + "step": 1415 + }, + { + "epoch": 0.11427649100153336, + "grad_norm": 0.8284393548965454, + "learning_rate": 0.000197849632366285, + "loss": 2.7467, + "step": 1416 + }, + { + "epoch": 0.11435719473811637, + "grad_norm": 0.8996441960334778, + "learning_rate": 0.00019784637487419514, + "loss": 2.7918, + "step": 1417 + }, + { + "epoch": 0.11443789847469937, + "grad_norm": 0.9868448376655579, + "learning_rate": 0.00019784311494351777, + "loss": 2.7687, + "step": 1418 + }, + { + "epoch": 0.11451860221128238, + "grad_norm": 0.8491402864456177, + "learning_rate": 0.0001978398525743342, + "loss": 2.8492, + "step": 1419 + }, + { + "epoch": 0.11459930594786538, + "grad_norm": 1.06125807762146, + "learning_rate": 0.0001978365877667258, + "loss": 2.8041, + "step": 1420 + }, + { + "epoch": 0.11468000968444839, + "grad_norm": 0.8194011449813843, + "learning_rate": 0.00019783332052077386, + "loss": 2.7109, + "step": 1421 + }, + { + "epoch": 0.11476071342103139, + "grad_norm": 0.972620964050293, + "learning_rate": 0.00019783005083655984, + "loss": 2.8107, + "step": 1422 + }, + { + "epoch": 0.1148414171576144, + "grad_norm": 0.925410270690918, + "learning_rate": 0.0001978267787141652, + "loss": 2.7603, + "step": 1423 + }, + { + "epoch": 0.1149221208941974, + "grad_norm": 0.920156717300415, + "learning_rate": 0.00019782350415367152, + "loss": 2.7644, + "step": 1424 + }, + { + "epoch": 0.1150028246307804, + "grad_norm": 0.8617576360702515, + "learning_rate": 0.00019782022715516043, + "loss": 2.769, + "step": 1425 + }, + { + "epoch": 0.11508352836736341, + "grad_norm": 1.0987342596054077, + "learning_rate": 0.00019781694771871356, + "loss": 2.8224, + "step": 1426 + }, + { + "epoch": 0.11516423210394641, + "grad_norm": 0.8418076634407043, + "learning_rate": 0.00019781366584441264, + "loss": 2.7947, + "step": 1427 + }, + { + "epoch": 0.11524493584052942, + "grad_norm": 0.8010901808738708, + "learning_rate": 0.0001978103815323395, + "loss": 2.733, + "step": 1428 + }, + { + "epoch": 0.11532563957711242, + "grad_norm": 0.8649042844772339, + "learning_rate": 0.00019780709478257598, + "loss": 2.7681, + "step": 1429 + }, + { + "epoch": 0.11540634331369543, + "grad_norm": 0.7728127837181091, + "learning_rate": 0.00019780380559520397, + "loss": 2.7795, + "step": 1430 + }, + { + "epoch": 0.11548704705027843, + "grad_norm": 0.7770940065383911, + "learning_rate": 0.00019780051397030545, + "loss": 2.743, + "step": 1431 + }, + { + "epoch": 0.11556775078686143, + "grad_norm": 0.8341890573501587, + "learning_rate": 0.0001977972199079625, + "loss": 2.8047, + "step": 1432 + }, + { + "epoch": 0.11564845452344444, + "grad_norm": 0.7894187569618225, + "learning_rate": 0.00019779392340825717, + "loss": 2.7757, + "step": 1433 + }, + { + "epoch": 0.11572915826002744, + "grad_norm": 0.8002873063087463, + "learning_rate": 0.00019779062447127164, + "loss": 2.7816, + "step": 1434 + }, + { + "epoch": 0.11580986199661045, + "grad_norm": 0.8256075978279114, + "learning_rate": 0.0001977873230970881, + "loss": 2.7839, + "step": 1435 + }, + { + "epoch": 0.11589056573319345, + "grad_norm": 0.8695322871208191, + "learning_rate": 0.0001977840192857889, + "loss": 2.746, + "step": 1436 + }, + { + "epoch": 0.11597126946977646, + "grad_norm": 0.767425537109375, + "learning_rate": 0.00019778071303745628, + "loss": 2.797, + "step": 1437 + }, + { + "epoch": 0.11605197320635946, + "grad_norm": 0.8263241052627563, + "learning_rate": 0.0001977774043521727, + "loss": 2.7702, + "step": 1438 + }, + { + "epoch": 0.11613267694294246, + "grad_norm": 0.8108638525009155, + "learning_rate": 0.0001977740932300206, + "loss": 2.6981, + "step": 1439 + }, + { + "epoch": 0.11621338067952547, + "grad_norm": 0.7945007681846619, + "learning_rate": 0.00019777077967108255, + "loss": 2.7357, + "step": 1440 + }, + { + "epoch": 0.11629408441610846, + "grad_norm": 0.8480326533317566, + "learning_rate": 0.00019776746367544107, + "loss": 2.8563, + "step": 1441 + }, + { + "epoch": 0.11637478815269146, + "grad_norm": 0.8202071785926819, + "learning_rate": 0.00019776414524317882, + "loss": 2.7955, + "step": 1442 + }, + { + "epoch": 0.11645549188927447, + "grad_norm": 0.8202874660491943, + "learning_rate": 0.00019776082437437852, + "loss": 2.765, + "step": 1443 + }, + { + "epoch": 0.11653619562585747, + "grad_norm": 0.8053051829338074, + "learning_rate": 0.00019775750106912294, + "loss": 2.6866, + "step": 1444 + }, + { + "epoch": 0.11661689936244048, + "grad_norm": 0.831968367099762, + "learning_rate": 0.00019775417532749486, + "loss": 2.7022, + "step": 1445 + }, + { + "epoch": 0.11669760309902348, + "grad_norm": 0.8903129696846008, + "learning_rate": 0.00019775084714957725, + "loss": 2.7308, + "step": 1446 + }, + { + "epoch": 0.11677830683560649, + "grad_norm": 0.8178622722625732, + "learning_rate": 0.000197747516535453, + "loss": 2.7446, + "step": 1447 + }, + { + "epoch": 0.11685901057218949, + "grad_norm": 0.8270576596260071, + "learning_rate": 0.00019774418348520508, + "loss": 2.7716, + "step": 1448 + }, + { + "epoch": 0.1169397143087725, + "grad_norm": 0.7965807914733887, + "learning_rate": 0.00019774084799891662, + "loss": 2.7305, + "step": 1449 + }, + { + "epoch": 0.1170204180453555, + "grad_norm": 0.8499472737312317, + "learning_rate": 0.00019773751007667073, + "loss": 2.7584, + "step": 1450 + }, + { + "epoch": 0.1171011217819385, + "grad_norm": 0.8961663842201233, + "learning_rate": 0.0001977341697185506, + "loss": 2.7729, + "step": 1451 + }, + { + "epoch": 0.1171818255185215, + "grad_norm": 1.0203527212142944, + "learning_rate": 0.0001977308269246395, + "loss": 2.727, + "step": 1452 + }, + { + "epoch": 0.11726252925510451, + "grad_norm": 0.953289806842804, + "learning_rate": 0.0001977274816950207, + "loss": 2.8158, + "step": 1453 + }, + { + "epoch": 0.11734323299168752, + "grad_norm": 1.0064597129821777, + "learning_rate": 0.0001977241340297776, + "loss": 2.8743, + "step": 1454 + }, + { + "epoch": 0.11742393672827052, + "grad_norm": 0.8541988730430603, + "learning_rate": 0.00019772078392899363, + "loss": 2.8532, + "step": 1455 + }, + { + "epoch": 0.11750464046485352, + "grad_norm": 0.8351433873176575, + "learning_rate": 0.00019771743139275228, + "loss": 2.7749, + "step": 1456 + }, + { + "epoch": 0.11758534420143653, + "grad_norm": 0.9555812478065491, + "learning_rate": 0.00019771407642113712, + "loss": 2.7408, + "step": 1457 + }, + { + "epoch": 0.11766604793801953, + "grad_norm": 0.7943894267082214, + "learning_rate": 0.0001977107190142317, + "loss": 2.7265, + "step": 1458 + }, + { + "epoch": 0.11774675167460254, + "grad_norm": 0.8636460900306702, + "learning_rate": 0.0001977073591721198, + "loss": 2.8178, + "step": 1459 + }, + { + "epoch": 0.11782745541118554, + "grad_norm": 0.8673834800720215, + "learning_rate": 0.00019770399689488506, + "loss": 2.7928, + "step": 1460 + }, + { + "epoch": 0.11790815914776855, + "grad_norm": 0.9463722705841064, + "learning_rate": 0.00019770063218261133, + "loss": 2.7448, + "step": 1461 + }, + { + "epoch": 0.11798886288435155, + "grad_norm": 0.8429726362228394, + "learning_rate": 0.00019769726503538246, + "loss": 2.7564, + "step": 1462 + }, + { + "epoch": 0.11806956662093455, + "grad_norm": 0.9412201642990112, + "learning_rate": 0.00019769389545328236, + "loss": 2.793, + "step": 1463 + }, + { + "epoch": 0.11815027035751756, + "grad_norm": 0.9112111926078796, + "learning_rate": 0.000197690523436395, + "loss": 2.7787, + "step": 1464 + }, + { + "epoch": 0.11823097409410056, + "grad_norm": 0.8417023420333862, + "learning_rate": 0.00019768714898480444, + "loss": 2.7654, + "step": 1465 + }, + { + "epoch": 0.11831167783068357, + "grad_norm": 0.8275290727615356, + "learning_rate": 0.00019768377209859476, + "loss": 2.7914, + "step": 1466 + }, + { + "epoch": 0.11839238156726656, + "grad_norm": 0.8113142848014832, + "learning_rate": 0.00019768039277785017, + "loss": 2.7516, + "step": 1467 + }, + { + "epoch": 0.11847308530384956, + "grad_norm": 0.8655288219451904, + "learning_rate": 0.0001976770110226548, + "loss": 2.8158, + "step": 1468 + }, + { + "epoch": 0.11855378904043257, + "grad_norm": 0.8063547611236572, + "learning_rate": 0.000197673626833093, + "loss": 2.7624, + "step": 1469 + }, + { + "epoch": 0.11863449277701557, + "grad_norm": 0.843772292137146, + "learning_rate": 0.00019767024020924908, + "loss": 2.86, + "step": 1470 + }, + { + "epoch": 0.11871519651359858, + "grad_norm": 0.7942481637001038, + "learning_rate": 0.0001976668511512075, + "loss": 2.758, + "step": 1471 + }, + { + "epoch": 0.11879590025018158, + "grad_norm": 0.841275155544281, + "learning_rate": 0.00019766345965905268, + "loss": 2.8014, + "step": 1472 + }, + { + "epoch": 0.11887660398676458, + "grad_norm": 0.8003600835800171, + "learning_rate": 0.00019766006573286915, + "loss": 2.7829, + "step": 1473 + }, + { + "epoch": 0.11895730772334759, + "grad_norm": 0.8437239527702332, + "learning_rate": 0.00019765666937274147, + "loss": 2.7706, + "step": 1474 + }, + { + "epoch": 0.11903801145993059, + "grad_norm": 0.8118240833282471, + "learning_rate": 0.00019765327057875433, + "loss": 2.8185, + "step": 1475 + }, + { + "epoch": 0.1191187151965136, + "grad_norm": 0.8051649928092957, + "learning_rate": 0.00019764986935099244, + "loss": 2.7676, + "step": 1476 + }, + { + "epoch": 0.1191994189330966, + "grad_norm": 0.7786862850189209, + "learning_rate": 0.00019764646568954053, + "loss": 2.8069, + "step": 1477 + }, + { + "epoch": 0.1192801226696796, + "grad_norm": 0.8199592232704163, + "learning_rate": 0.0001976430595944834, + "loss": 2.7718, + "step": 1478 + }, + { + "epoch": 0.11936082640626261, + "grad_norm": 0.8696652054786682, + "learning_rate": 0.00019763965106590604, + "loss": 2.7682, + "step": 1479 + }, + { + "epoch": 0.11944153014284561, + "grad_norm": 0.7993931174278259, + "learning_rate": 0.00019763624010389334, + "loss": 2.7607, + "step": 1480 + }, + { + "epoch": 0.11952223387942862, + "grad_norm": 0.8107055425643921, + "learning_rate": 0.0001976328267085303, + "loss": 2.7885, + "step": 1481 + }, + { + "epoch": 0.11960293761601162, + "grad_norm": 0.8189423084259033, + "learning_rate": 0.000197629410879902, + "loss": 2.7332, + "step": 1482 + }, + { + "epoch": 0.11968364135259463, + "grad_norm": 0.9134814143180847, + "learning_rate": 0.0001976259926180936, + "loss": 2.7691, + "step": 1483 + }, + { + "epoch": 0.11976434508917763, + "grad_norm": 0.8642883896827698, + "learning_rate": 0.00019762257192319023, + "loss": 2.7876, + "step": 1484 + }, + { + "epoch": 0.11984504882576064, + "grad_norm": 0.7411352396011353, + "learning_rate": 0.0001976191487952772, + "loss": 2.7577, + "step": 1485 + }, + { + "epoch": 0.11992575256234364, + "grad_norm": 0.7741669416427612, + "learning_rate": 0.00019761572323443978, + "loss": 2.8005, + "step": 1486 + }, + { + "epoch": 0.12000645629892664, + "grad_norm": 0.8195405602455139, + "learning_rate": 0.0001976122952407634, + "loss": 2.7421, + "step": 1487 + }, + { + "epoch": 0.12008716003550965, + "grad_norm": 0.8355886936187744, + "learning_rate": 0.00019760886481433345, + "loss": 2.8156, + "step": 1488 + }, + { + "epoch": 0.12016786377209265, + "grad_norm": 0.8321093916893005, + "learning_rate": 0.00019760543195523542, + "loss": 2.7261, + "step": 1489 + }, + { + "epoch": 0.12024856750867566, + "grad_norm": 0.7792446613311768, + "learning_rate": 0.0001976019966635549, + "loss": 2.7319, + "step": 1490 + }, + { + "epoch": 0.12032927124525866, + "grad_norm": 0.770535409450531, + "learning_rate": 0.00019759855893937748, + "loss": 2.7727, + "step": 1491 + }, + { + "epoch": 0.12040997498184165, + "grad_norm": 0.8168532252311707, + "learning_rate": 0.00019759511878278887, + "loss": 2.7763, + "step": 1492 + }, + { + "epoch": 0.12049067871842466, + "grad_norm": 0.8395755290985107, + "learning_rate": 0.00019759167619387476, + "loss": 2.8382, + "step": 1493 + }, + { + "epoch": 0.12057138245500766, + "grad_norm": 0.8682762384414673, + "learning_rate": 0.00019758823117272097, + "loss": 2.8056, + "step": 1494 + }, + { + "epoch": 0.12065208619159067, + "grad_norm": 0.815192699432373, + "learning_rate": 0.00019758478371941337, + "loss": 2.7602, + "step": 1495 + }, + { + "epoch": 0.12073278992817367, + "grad_norm": 0.7919273376464844, + "learning_rate": 0.00019758133383403786, + "loss": 2.7989, + "step": 1496 + }, + { + "epoch": 0.12081349366475667, + "grad_norm": 1.004387378692627, + "learning_rate": 0.00019757788151668045, + "loss": 2.7765, + "step": 1497 + }, + { + "epoch": 0.12089419740133968, + "grad_norm": 1.0032062530517578, + "learning_rate": 0.00019757442676742715, + "loss": 2.7751, + "step": 1498 + }, + { + "epoch": 0.12097490113792268, + "grad_norm": 0.8797723054885864, + "learning_rate": 0.00019757096958636407, + "loss": 2.7798, + "step": 1499 + }, + { + "epoch": 0.12105560487450569, + "grad_norm": 0.9239820241928101, + "learning_rate": 0.0001975675099735774, + "loss": 2.7976, + "step": 1500 + }, + { + "epoch": 0.12113630861108869, + "grad_norm": 0.9903601408004761, + "learning_rate": 0.00019756404792915328, + "loss": 2.7891, + "step": 1501 + }, + { + "epoch": 0.1212170123476717, + "grad_norm": 0.8402895331382751, + "learning_rate": 0.0001975605834531781, + "loss": 2.8037, + "step": 1502 + }, + { + "epoch": 0.1212977160842547, + "grad_norm": 0.8986102342605591, + "learning_rate": 0.00019755711654573813, + "loss": 2.8375, + "step": 1503 + }, + { + "epoch": 0.1213784198208377, + "grad_norm": 0.8795471787452698, + "learning_rate": 0.0001975536472069198, + "loss": 2.7916, + "step": 1504 + }, + { + "epoch": 0.12145912355742071, + "grad_norm": 0.866278350353241, + "learning_rate": 0.00019755017543680962, + "loss": 2.7884, + "step": 1505 + }, + { + "epoch": 0.12153982729400371, + "grad_norm": 0.7877952456474304, + "learning_rate": 0.00019754670123549398, + "loss": 2.7659, + "step": 1506 + }, + { + "epoch": 0.12162053103058672, + "grad_norm": 0.857155978679657, + "learning_rate": 0.00019754322460305962, + "loss": 2.8029, + "step": 1507 + }, + { + "epoch": 0.12170123476716972, + "grad_norm": 0.8323284387588501, + "learning_rate": 0.00019753974553959314, + "loss": 2.7764, + "step": 1508 + }, + { + "epoch": 0.12178193850375273, + "grad_norm": 0.8557485938072205, + "learning_rate": 0.00019753626404518117, + "loss": 2.7448, + "step": 1509 + }, + { + "epoch": 0.12186264224033573, + "grad_norm": 0.8026818037033081, + "learning_rate": 0.00019753278011991058, + "loss": 2.7323, + "step": 1510 + }, + { + "epoch": 0.12194334597691874, + "grad_norm": 0.8578904271125793, + "learning_rate": 0.00019752929376386816, + "loss": 2.759, + "step": 1511 + }, + { + "epoch": 0.12202404971350174, + "grad_norm": 0.8617175221443176, + "learning_rate": 0.00019752580497714076, + "loss": 2.7641, + "step": 1512 + }, + { + "epoch": 0.12210475345008474, + "grad_norm": 0.8261943459510803, + "learning_rate": 0.00019752231375981538, + "loss": 2.7554, + "step": 1513 + }, + { + "epoch": 0.12218545718666775, + "grad_norm": 0.9984099268913269, + "learning_rate": 0.00019751882011197902, + "loss": 2.763, + "step": 1514 + }, + { + "epoch": 0.12226616092325075, + "grad_norm": 0.8014064431190491, + "learning_rate": 0.00019751532403371874, + "loss": 2.8083, + "step": 1515 + }, + { + "epoch": 0.12234686465983376, + "grad_norm": 0.9276653528213501, + "learning_rate": 0.0001975118255251217, + "loss": 2.8055, + "step": 1516 + }, + { + "epoch": 0.12242756839641676, + "grad_norm": 0.9365193843841553, + "learning_rate": 0.00019750832458627503, + "loss": 2.7397, + "step": 1517 + }, + { + "epoch": 0.12250827213299975, + "grad_norm": 0.8952646851539612, + "learning_rate": 0.00019750482121726605, + "loss": 2.8305, + "step": 1518 + }, + { + "epoch": 0.12258897586958276, + "grad_norm": 0.8395531177520752, + "learning_rate": 0.00019750131541818204, + "loss": 2.7852, + "step": 1519 + }, + { + "epoch": 0.12266967960616576, + "grad_norm": 0.8123572468757629, + "learning_rate": 0.0001974978071891104, + "loss": 2.831, + "step": 1520 + }, + { + "epoch": 0.12275038334274876, + "grad_norm": 0.8716141581535339, + "learning_rate": 0.00019749429653013851, + "loss": 2.8012, + "step": 1521 + }, + { + "epoch": 0.12283108707933177, + "grad_norm": 0.7848379611968994, + "learning_rate": 0.0001974907834413539, + "loss": 2.7812, + "step": 1522 + }, + { + "epoch": 0.12291179081591477, + "grad_norm": 0.834072470664978, + "learning_rate": 0.00019748726792284414, + "loss": 2.7442, + "step": 1523 + }, + { + "epoch": 0.12299249455249778, + "grad_norm": 0.8377225399017334, + "learning_rate": 0.0001974837499746968, + "loss": 2.7967, + "step": 1524 + }, + { + "epoch": 0.12307319828908078, + "grad_norm": 0.8809494376182556, + "learning_rate": 0.0001974802295969996, + "loss": 2.8042, + "step": 1525 + }, + { + "epoch": 0.12315390202566379, + "grad_norm": 0.8504741787910461, + "learning_rate": 0.00019747670678984028, + "loss": 2.7909, + "step": 1526 + }, + { + "epoch": 0.12323460576224679, + "grad_norm": 0.9444355368614197, + "learning_rate": 0.00019747318155330663, + "loss": 2.8567, + "step": 1527 + }, + { + "epoch": 0.1233153094988298, + "grad_norm": 0.859166145324707, + "learning_rate": 0.00019746965388748645, + "loss": 2.8305, + "step": 1528 + }, + { + "epoch": 0.1233960132354128, + "grad_norm": 0.8431086540222168, + "learning_rate": 0.00019746612379246777, + "loss": 2.7799, + "step": 1529 + }, + { + "epoch": 0.1234767169719958, + "grad_norm": 0.8872438669204712, + "learning_rate": 0.00019746259126833846, + "loss": 2.8413, + "step": 1530 + }, + { + "epoch": 0.12355742070857881, + "grad_norm": 0.8698925375938416, + "learning_rate": 0.0001974590563151866, + "loss": 2.8446, + "step": 1531 + }, + { + "epoch": 0.12363812444516181, + "grad_norm": 0.8926429152488708, + "learning_rate": 0.0001974555189331003, + "loss": 2.7859, + "step": 1532 + }, + { + "epoch": 0.12371882818174482, + "grad_norm": 0.8089048862457275, + "learning_rate": 0.00019745197912216775, + "loss": 2.7985, + "step": 1533 + }, + { + "epoch": 0.12379953191832782, + "grad_norm": 0.8180400729179382, + "learning_rate": 0.0001974484368824771, + "loss": 2.7587, + "step": 1534 + }, + { + "epoch": 0.12388023565491083, + "grad_norm": 0.9584212303161621, + "learning_rate": 0.00019744489221411668, + "loss": 2.766, + "step": 1535 + }, + { + "epoch": 0.12396093939149383, + "grad_norm": 0.8425920009613037, + "learning_rate": 0.00019744134511717485, + "loss": 2.8125, + "step": 1536 + }, + { + "epoch": 0.12404164312807683, + "grad_norm": 0.9109299182891846, + "learning_rate": 0.00019743779559173996, + "loss": 2.8613, + "step": 1537 + }, + { + "epoch": 0.12412234686465984, + "grad_norm": 0.8840214610099792, + "learning_rate": 0.0001974342436379005, + "loss": 2.7603, + "step": 1538 + }, + { + "epoch": 0.12420305060124284, + "grad_norm": 0.8128962516784668, + "learning_rate": 0.00019743068925574502, + "loss": 2.7593, + "step": 1539 + }, + { + "epoch": 0.12428375433782585, + "grad_norm": 0.8150052428245544, + "learning_rate": 0.00019742713244536204, + "loss": 2.8099, + "step": 1540 + }, + { + "epoch": 0.12436445807440885, + "grad_norm": 0.8442968130111694, + "learning_rate": 0.00019742357320684027, + "loss": 2.7746, + "step": 1541 + }, + { + "epoch": 0.12444516181099186, + "grad_norm": 0.9347402453422546, + "learning_rate": 0.00019742001154026838, + "loss": 2.8247, + "step": 1542 + }, + { + "epoch": 0.12452586554757485, + "grad_norm": 0.8305966854095459, + "learning_rate": 0.00019741644744573512, + "loss": 2.7398, + "step": 1543 + }, + { + "epoch": 0.12460656928415785, + "grad_norm": 0.8811129927635193, + "learning_rate": 0.00019741288092332935, + "loss": 2.8014, + "step": 1544 + }, + { + "epoch": 0.12468727302074085, + "grad_norm": 1.0287303924560547, + "learning_rate": 0.00019740931197313996, + "loss": 2.8449, + "step": 1545 + }, + { + "epoch": 0.12476797675732386, + "grad_norm": 0.8499771356582642, + "learning_rate": 0.00019740574059525588, + "loss": 2.7845, + "step": 1546 + }, + { + "epoch": 0.12484868049390686, + "grad_norm": 0.8110969066619873, + "learning_rate": 0.00019740216678976614, + "loss": 2.7565, + "step": 1547 + }, + { + "epoch": 0.12492938423048987, + "grad_norm": 0.8530771136283875, + "learning_rate": 0.00019739859055675977, + "loss": 2.8098, + "step": 1548 + }, + { + "epoch": 0.12501008796707289, + "grad_norm": 0.8483901619911194, + "learning_rate": 0.00019739501189632591, + "loss": 2.812, + "step": 1549 + }, + { + "epoch": 0.1250907917036559, + "grad_norm": 0.7894467711448669, + "learning_rate": 0.00019739143080855378, + "loss": 2.8576, + "step": 1550 + }, + { + "epoch": 0.1251714954402389, + "grad_norm": 0.8270247578620911, + "learning_rate": 0.0001973878472935326, + "loss": 2.7613, + "step": 1551 + }, + { + "epoch": 0.1252521991768219, + "grad_norm": 0.8496212959289551, + "learning_rate": 0.00019738426135135174, + "loss": 2.8375, + "step": 1552 + }, + { + "epoch": 0.1253329029134049, + "grad_norm": 0.8465524911880493, + "learning_rate": 0.00019738067298210045, + "loss": 2.8023, + "step": 1553 + }, + { + "epoch": 0.1254136066499879, + "grad_norm": 0.7843824028968811, + "learning_rate": 0.00019737708218586826, + "loss": 2.7424, + "step": 1554 + }, + { + "epoch": 0.1254943103865709, + "grad_norm": 0.8310040235519409, + "learning_rate": 0.00019737348896274462, + "loss": 2.7608, + "step": 1555 + }, + { + "epoch": 0.1255750141231539, + "grad_norm": 0.7895017266273499, + "learning_rate": 0.00019736989331281914, + "loss": 2.7549, + "step": 1556 + }, + { + "epoch": 0.1256557178597369, + "grad_norm": 0.8140431642532349, + "learning_rate": 0.00019736629523618138, + "loss": 2.802, + "step": 1557 + }, + { + "epoch": 0.1257364215963199, + "grad_norm": 0.8026889562606812, + "learning_rate": 0.000197362694732921, + "loss": 2.7758, + "step": 1558 + }, + { + "epoch": 0.1258171253329029, + "grad_norm": 0.8018048405647278, + "learning_rate": 0.0001973590918031278, + "loss": 2.7729, + "step": 1559 + }, + { + "epoch": 0.1258978290694859, + "grad_norm": 0.8394612073898315, + "learning_rate": 0.00019735548644689147, + "loss": 2.7692, + "step": 1560 + }, + { + "epoch": 0.1259785328060689, + "grad_norm": 0.819804310798645, + "learning_rate": 0.00019735187866430198, + "loss": 2.6933, + "step": 1561 + }, + { + "epoch": 0.12605923654265191, + "grad_norm": 0.8094257116317749, + "learning_rate": 0.0001973482684554492, + "loss": 2.7722, + "step": 1562 + }, + { + "epoch": 0.12613994027923492, + "grad_norm": 0.8647315502166748, + "learning_rate": 0.00019734465582042305, + "loss": 2.787, + "step": 1563 + }, + { + "epoch": 0.12622064401581792, + "grad_norm": 0.8439335823059082, + "learning_rate": 0.00019734104075931367, + "loss": 2.8, + "step": 1564 + }, + { + "epoch": 0.12630134775240093, + "grad_norm": 0.852480947971344, + "learning_rate": 0.00019733742327221105, + "loss": 2.8656, + "step": 1565 + }, + { + "epoch": 0.12638205148898393, + "grad_norm": 0.813846230506897, + "learning_rate": 0.00019733380335920542, + "loss": 2.7733, + "step": 1566 + }, + { + "epoch": 0.12646275522556694, + "grad_norm": 0.7860896587371826, + "learning_rate": 0.00019733018102038698, + "loss": 2.8201, + "step": 1567 + }, + { + "epoch": 0.12654345896214994, + "grad_norm": 0.7857748866081238, + "learning_rate": 0.00019732655625584602, + "loss": 2.8726, + "step": 1568 + }, + { + "epoch": 0.12662416269873294, + "grad_norm": 0.8152899146080017, + "learning_rate": 0.00019732292906567286, + "loss": 2.7738, + "step": 1569 + }, + { + "epoch": 0.12670486643531595, + "grad_norm": 0.8281696438789368, + "learning_rate": 0.00019731929944995788, + "loss": 2.7966, + "step": 1570 + }, + { + "epoch": 0.12678557017189895, + "grad_norm": 0.8070773482322693, + "learning_rate": 0.00019731566740879158, + "loss": 2.6988, + "step": 1571 + }, + { + "epoch": 0.12686627390848196, + "grad_norm": 0.7859680652618408, + "learning_rate": 0.00019731203294226445, + "loss": 2.7241, + "step": 1572 + }, + { + "epoch": 0.12694697764506496, + "grad_norm": 0.7753982543945312, + "learning_rate": 0.0001973083960504671, + "loss": 2.7621, + "step": 1573 + }, + { + "epoch": 0.12702768138164797, + "grad_norm": 0.8063471913337708, + "learning_rate": 0.00019730475673349014, + "loss": 2.7298, + "step": 1574 + }, + { + "epoch": 0.12710838511823097, + "grad_norm": 0.7943962812423706, + "learning_rate": 0.0001973011149914243, + "loss": 2.7714, + "step": 1575 + }, + { + "epoch": 0.12718908885481398, + "grad_norm": 0.8297483325004578, + "learning_rate": 0.00019729747082436033, + "loss": 2.7743, + "step": 1576 + }, + { + "epoch": 0.12726979259139698, + "grad_norm": 0.8728111386299133, + "learning_rate": 0.000197293824232389, + "loss": 2.8251, + "step": 1577 + }, + { + "epoch": 0.12735049632797998, + "grad_norm": 0.8762480020523071, + "learning_rate": 0.00019729017521560128, + "loss": 2.8036, + "step": 1578 + }, + { + "epoch": 0.127431200064563, + "grad_norm": 0.9266185164451599, + "learning_rate": 0.00019728652377408806, + "loss": 2.7335, + "step": 1579 + }, + { + "epoch": 0.127511903801146, + "grad_norm": 0.9289839267730713, + "learning_rate": 0.00019728286990794037, + "loss": 2.7715, + "step": 1580 + }, + { + "epoch": 0.127592607537729, + "grad_norm": 0.8811823725700378, + "learning_rate": 0.0001972792136172493, + "loss": 2.7389, + "step": 1581 + }, + { + "epoch": 0.127673311274312, + "grad_norm": 0.8174294233322144, + "learning_rate": 0.00019727555490210588, + "loss": 2.7483, + "step": 1582 + }, + { + "epoch": 0.127754015010895, + "grad_norm": 0.8254107236862183, + "learning_rate": 0.00019727189376260137, + "loss": 2.7897, + "step": 1583 + }, + { + "epoch": 0.127834718747478, + "grad_norm": 0.8478763699531555, + "learning_rate": 0.000197268230198827, + "loss": 2.7394, + "step": 1584 + }, + { + "epoch": 0.12791542248406101, + "grad_norm": 0.8356192111968994, + "learning_rate": 0.00019726456421087404, + "loss": 2.7518, + "step": 1585 + }, + { + "epoch": 0.12799612622064402, + "grad_norm": 0.8523107767105103, + "learning_rate": 0.00019726089579883392, + "loss": 2.7893, + "step": 1586 + }, + { + "epoch": 0.12807682995722702, + "grad_norm": 0.9048579931259155, + "learning_rate": 0.00019725722496279804, + "loss": 2.7488, + "step": 1587 + }, + { + "epoch": 0.12815753369381003, + "grad_norm": 0.8242251873016357, + "learning_rate": 0.00019725355170285787, + "loss": 2.7544, + "step": 1588 + }, + { + "epoch": 0.12823823743039303, + "grad_norm": 0.8343983888626099, + "learning_rate": 0.00019724987601910497, + "loss": 2.7317, + "step": 1589 + }, + { + "epoch": 0.12831894116697604, + "grad_norm": 0.8084509372711182, + "learning_rate": 0.00019724619791163095, + "loss": 2.7822, + "step": 1590 + }, + { + "epoch": 0.12839964490355904, + "grad_norm": 0.8397380113601685, + "learning_rate": 0.00019724251738052745, + "loss": 2.8188, + "step": 1591 + }, + { + "epoch": 0.12848034864014204, + "grad_norm": 0.8558558821678162, + "learning_rate": 0.00019723883442588624, + "loss": 2.7623, + "step": 1592 + }, + { + "epoch": 0.12856105237672505, + "grad_norm": 0.7602639198303223, + "learning_rate": 0.0001972351490477991, + "loss": 2.7932, + "step": 1593 + }, + { + "epoch": 0.12864175611330805, + "grad_norm": 0.8379851579666138, + "learning_rate": 0.00019723146124635786, + "loss": 2.8296, + "step": 1594 + }, + { + "epoch": 0.12872245984989106, + "grad_norm": 0.8454548716545105, + "learning_rate": 0.00019722777102165444, + "loss": 2.8192, + "step": 1595 + }, + { + "epoch": 0.12880316358647406, + "grad_norm": 0.8344082832336426, + "learning_rate": 0.0001972240783737808, + "loss": 2.7628, + "step": 1596 + }, + { + "epoch": 0.12888386732305707, + "grad_norm": 0.809093713760376, + "learning_rate": 0.000197220383302829, + "loss": 2.8055, + "step": 1597 + }, + { + "epoch": 0.12896457105964007, + "grad_norm": 0.7909694910049438, + "learning_rate": 0.0001972166858088911, + "loss": 2.7292, + "step": 1598 + }, + { + "epoch": 0.12904527479622308, + "grad_norm": 0.8350280523300171, + "learning_rate": 0.00019721298589205928, + "loss": 2.7671, + "step": 1599 + }, + { + "epoch": 0.12912597853280608, + "grad_norm": 0.7857616543769836, + "learning_rate": 0.00019720928355242568, + "loss": 2.729, + "step": 1600 + }, + { + "epoch": 0.12920668226938908, + "grad_norm": 0.7899746298789978, + "learning_rate": 0.0001972055787900827, + "loss": 2.8023, + "step": 1601 + }, + { + "epoch": 0.1292873860059721, + "grad_norm": 0.8604246377944946, + "learning_rate": 0.00019720187160512256, + "loss": 2.749, + "step": 1602 + }, + { + "epoch": 0.1293680897425551, + "grad_norm": 0.8517864942550659, + "learning_rate": 0.0001971981619976377, + "loss": 2.7203, + "step": 1603 + }, + { + "epoch": 0.1294487934791381, + "grad_norm": 0.8860471248626709, + "learning_rate": 0.00019719444996772056, + "loss": 2.7372, + "step": 1604 + }, + { + "epoch": 0.1295294972157211, + "grad_norm": 0.8355888724327087, + "learning_rate": 0.00019719073551546367, + "loss": 2.7284, + "step": 1605 + }, + { + "epoch": 0.1296102009523041, + "grad_norm": 0.7998479604721069, + "learning_rate": 0.00019718701864095955, + "loss": 2.7726, + "step": 1606 + }, + { + "epoch": 0.12969090468888708, + "grad_norm": 0.8564549088478088, + "learning_rate": 0.00019718329934430092, + "loss": 2.7334, + "step": 1607 + }, + { + "epoch": 0.1297716084254701, + "grad_norm": 0.8594443798065186, + "learning_rate": 0.00019717957762558044, + "loss": 2.7865, + "step": 1608 + }, + { + "epoch": 0.1298523121620531, + "grad_norm": 0.804553210735321, + "learning_rate": 0.00019717585348489082, + "loss": 2.8094, + "step": 1609 + }, + { + "epoch": 0.1299330158986361, + "grad_norm": 0.7892553806304932, + "learning_rate": 0.0001971721269223249, + "loss": 2.7969, + "step": 1610 + }, + { + "epoch": 0.1300137196352191, + "grad_norm": 0.8703331351280212, + "learning_rate": 0.0001971683979379756, + "loss": 2.8192, + "step": 1611 + }, + { + "epoch": 0.1300944233718021, + "grad_norm": 0.8176589012145996, + "learning_rate": 0.00019716466653193582, + "loss": 2.7902, + "step": 1612 + }, + { + "epoch": 0.1301751271083851, + "grad_norm": 0.8305137157440186, + "learning_rate": 0.00019716093270429855, + "loss": 2.8202, + "step": 1613 + }, + { + "epoch": 0.1302558308449681, + "grad_norm": 0.8261505365371704, + "learning_rate": 0.00019715719645515688, + "loss": 2.7905, + "step": 1614 + }, + { + "epoch": 0.13033653458155112, + "grad_norm": 0.9465535879135132, + "learning_rate": 0.00019715345778460389, + "loss": 2.7965, + "step": 1615 + }, + { + "epoch": 0.13041723831813412, + "grad_norm": 0.8847100138664246, + "learning_rate": 0.00019714971669273275, + "loss": 2.8177, + "step": 1616 + }, + { + "epoch": 0.13049794205471713, + "grad_norm": 0.9768328666687012, + "learning_rate": 0.0001971459731796367, + "loss": 2.7668, + "step": 1617 + }, + { + "epoch": 0.13057864579130013, + "grad_norm": 0.7498586177825928, + "learning_rate": 0.0001971422272454091, + "loss": 2.761, + "step": 1618 + }, + { + "epoch": 0.13065934952788313, + "grad_norm": 1.0455373525619507, + "learning_rate": 0.00019713847889014325, + "loss": 2.7652, + "step": 1619 + }, + { + "epoch": 0.13074005326446614, + "grad_norm": 0.8484631180763245, + "learning_rate": 0.00019713472811393258, + "loss": 2.7858, + "step": 1620 + }, + { + "epoch": 0.13082075700104914, + "grad_norm": 0.8190686702728271, + "learning_rate": 0.00019713097491687057, + "loss": 2.7217, + "step": 1621 + }, + { + "epoch": 0.13090146073763215, + "grad_norm": 0.8866000175476074, + "learning_rate": 0.00019712721929905077, + "loss": 2.7868, + "step": 1622 + }, + { + "epoch": 0.13098216447421515, + "grad_norm": 0.8026713132858276, + "learning_rate": 0.00019712346126056677, + "loss": 2.7276, + "step": 1623 + }, + { + "epoch": 0.13106286821079816, + "grad_norm": 0.8306462168693542, + "learning_rate": 0.00019711970080151225, + "loss": 2.7747, + "step": 1624 + }, + { + "epoch": 0.13114357194738116, + "grad_norm": 0.8276618123054504, + "learning_rate": 0.0001971159379219809, + "loss": 2.7146, + "step": 1625 + }, + { + "epoch": 0.13122427568396416, + "grad_norm": 0.9749011993408203, + "learning_rate": 0.00019711217262206648, + "loss": 2.8731, + "step": 1626 + }, + { + "epoch": 0.13130497942054717, + "grad_norm": 0.828484058380127, + "learning_rate": 0.00019710840490186292, + "loss": 2.803, + "step": 1627 + }, + { + "epoch": 0.13138568315713017, + "grad_norm": 0.8095957636833191, + "learning_rate": 0.00019710463476146402, + "loss": 2.7751, + "step": 1628 + }, + { + "epoch": 0.13146638689371318, + "grad_norm": 0.8731853365898132, + "learning_rate": 0.0001971008622009638, + "loss": 2.8274, + "step": 1629 + }, + { + "epoch": 0.13154709063029618, + "grad_norm": 0.8180200457572937, + "learning_rate": 0.00019709708722045628, + "loss": 2.813, + "step": 1630 + }, + { + "epoch": 0.13162779436687919, + "grad_norm": 0.7740067839622498, + "learning_rate": 0.00019709330982003553, + "loss": 2.7319, + "step": 1631 + }, + { + "epoch": 0.1317084981034622, + "grad_norm": 0.8439326882362366, + "learning_rate": 0.0001970895299997957, + "loss": 2.8182, + "step": 1632 + }, + { + "epoch": 0.1317892018400452, + "grad_norm": 0.8254802823066711, + "learning_rate": 0.000197085747759831, + "loss": 2.7874, + "step": 1633 + }, + { + "epoch": 0.1318699055766282, + "grad_norm": 0.8128175139427185, + "learning_rate": 0.00019708196310023562, + "loss": 2.8125, + "step": 1634 + }, + { + "epoch": 0.1319506093132112, + "grad_norm": 0.8664820790290833, + "learning_rate": 0.00019707817602110402, + "loss": 2.8446, + "step": 1635 + }, + { + "epoch": 0.1320313130497942, + "grad_norm": 0.8101332783699036, + "learning_rate": 0.00019707438652253044, + "loss": 2.8027, + "step": 1636 + }, + { + "epoch": 0.1321120167863772, + "grad_norm": 0.8296725153923035, + "learning_rate": 0.00019707059460460945, + "loss": 2.7677, + "step": 1637 + }, + { + "epoch": 0.13219272052296022, + "grad_norm": 0.7321150898933411, + "learning_rate": 0.0001970668002674355, + "loss": 2.6991, + "step": 1638 + }, + { + "epoch": 0.13227342425954322, + "grad_norm": 0.8321375250816345, + "learning_rate": 0.0001970630035111031, + "loss": 2.6948, + "step": 1639 + }, + { + "epoch": 0.13235412799612623, + "grad_norm": 0.7622714042663574, + "learning_rate": 0.00019705920433570694, + "loss": 2.6957, + "step": 1640 + }, + { + "epoch": 0.13243483173270923, + "grad_norm": 0.8413416147232056, + "learning_rate": 0.00019705540274134173, + "loss": 2.7277, + "step": 1641 + }, + { + "epoch": 0.13251553546929223, + "grad_norm": 0.8798941373825073, + "learning_rate": 0.00019705159872810218, + "loss": 2.7699, + "step": 1642 + }, + { + "epoch": 0.13259623920587524, + "grad_norm": 0.788287341594696, + "learning_rate": 0.00019704779229608304, + "loss": 2.7933, + "step": 1643 + }, + { + "epoch": 0.13267694294245824, + "grad_norm": 0.8547430634498596, + "learning_rate": 0.00019704398344537927, + "loss": 2.7706, + "step": 1644 + }, + { + "epoch": 0.13275764667904125, + "grad_norm": 0.8474008440971375, + "learning_rate": 0.00019704017217608575, + "loss": 2.8005, + "step": 1645 + }, + { + "epoch": 0.13283835041562425, + "grad_norm": 0.8636945486068726, + "learning_rate": 0.00019703635848829747, + "loss": 2.8241, + "step": 1646 + }, + { + "epoch": 0.13291905415220726, + "grad_norm": 0.8158168792724609, + "learning_rate": 0.00019703254238210947, + "loss": 2.7576, + "step": 1647 + }, + { + "epoch": 0.13299975788879026, + "grad_norm": 0.8420887589454651, + "learning_rate": 0.0001970287238576169, + "loss": 2.7677, + "step": 1648 + }, + { + "epoch": 0.13308046162537326, + "grad_norm": 0.7910059690475464, + "learning_rate": 0.00019702490291491486, + "loss": 2.7807, + "step": 1649 + }, + { + "epoch": 0.13316116536195627, + "grad_norm": 0.8308143615722656, + "learning_rate": 0.00019702107955409863, + "loss": 2.7698, + "step": 1650 + }, + { + "epoch": 0.13324186909853927, + "grad_norm": 0.8215764760971069, + "learning_rate": 0.00019701725377526349, + "loss": 2.8263, + "step": 1651 + }, + { + "epoch": 0.13332257283512228, + "grad_norm": 0.8780504465103149, + "learning_rate": 0.00019701342557850476, + "loss": 2.8032, + "step": 1652 + }, + { + "epoch": 0.13340327657170528, + "grad_norm": 0.8125136494636536, + "learning_rate": 0.0001970095949639179, + "loss": 2.8317, + "step": 1653 + }, + { + "epoch": 0.13348398030828829, + "grad_norm": 0.8170902132987976, + "learning_rate": 0.00019700576193159831, + "loss": 2.7528, + "step": 1654 + }, + { + "epoch": 0.1335646840448713, + "grad_norm": 0.8318637013435364, + "learning_rate": 0.00019700192648164157, + "loss": 2.7963, + "step": 1655 + }, + { + "epoch": 0.1336453877814543, + "grad_norm": 0.8445270657539368, + "learning_rate": 0.00019699808861414327, + "loss": 2.772, + "step": 1656 + }, + { + "epoch": 0.1337260915180373, + "grad_norm": 0.7908959984779358, + "learning_rate": 0.00019699424832919906, + "loss": 2.7528, + "step": 1657 + }, + { + "epoch": 0.13380679525462028, + "grad_norm": 0.8153900504112244, + "learning_rate": 0.00019699040562690462, + "loss": 2.7643, + "step": 1658 + }, + { + "epoch": 0.13388749899120328, + "grad_norm": 0.86302250623703, + "learning_rate": 0.0001969865605073557, + "loss": 2.8037, + "step": 1659 + }, + { + "epoch": 0.13396820272778628, + "grad_norm": 0.8373419046401978, + "learning_rate": 0.0001969827129706482, + "loss": 2.7647, + "step": 1660 + }, + { + "epoch": 0.1340489064643693, + "grad_norm": 0.8166481852531433, + "learning_rate": 0.00019697886301687798, + "loss": 2.8333, + "step": 1661 + }, + { + "epoch": 0.1341296102009523, + "grad_norm": 0.7807812094688416, + "learning_rate": 0.00019697501064614098, + "loss": 2.7495, + "step": 1662 + }, + { + "epoch": 0.1342103139375353, + "grad_norm": 0.8375338315963745, + "learning_rate": 0.00019697115585853324, + "loss": 2.7518, + "step": 1663 + }, + { + "epoch": 0.1342910176741183, + "grad_norm": 0.7392182350158691, + "learning_rate": 0.00019696729865415077, + "loss": 2.758, + "step": 1664 + }, + { + "epoch": 0.1343717214107013, + "grad_norm": 0.8041971921920776, + "learning_rate": 0.00019696343903308978, + "loss": 2.7485, + "step": 1665 + }, + { + "epoch": 0.1344524251472843, + "grad_norm": 0.789310097694397, + "learning_rate": 0.00019695957699544643, + "loss": 2.8179, + "step": 1666 + }, + { + "epoch": 0.13453312888386731, + "grad_norm": 0.7643609642982483, + "learning_rate": 0.00019695571254131693, + "loss": 2.7791, + "step": 1667 + }, + { + "epoch": 0.13461383262045032, + "grad_norm": 0.8284661769866943, + "learning_rate": 0.00019695184567079766, + "loss": 2.717, + "step": 1668 + }, + { + "epoch": 0.13469453635703332, + "grad_norm": 0.7620903253555298, + "learning_rate": 0.00019694797638398494, + "loss": 2.7808, + "step": 1669 + }, + { + "epoch": 0.13477524009361633, + "grad_norm": 0.9123913645744324, + "learning_rate": 0.00019694410468097524, + "loss": 2.7648, + "step": 1670 + }, + { + "epoch": 0.13485594383019933, + "grad_norm": 0.735518217086792, + "learning_rate": 0.000196940230561865, + "loss": 2.7653, + "step": 1671 + }, + { + "epoch": 0.13493664756678234, + "grad_norm": 0.8363413214683533, + "learning_rate": 0.00019693635402675085, + "loss": 2.766, + "step": 1672 + }, + { + "epoch": 0.13501735130336534, + "grad_norm": 0.8206491470336914, + "learning_rate": 0.00019693247507572936, + "loss": 2.7829, + "step": 1673 + }, + { + "epoch": 0.13509805503994834, + "grad_norm": 0.7726099491119385, + "learning_rate": 0.0001969285937088972, + "loss": 2.7381, + "step": 1674 + }, + { + "epoch": 0.13517875877653135, + "grad_norm": 0.8970316052436829, + "learning_rate": 0.0001969247099263511, + "loss": 2.7836, + "step": 1675 + }, + { + "epoch": 0.13525946251311435, + "grad_norm": 0.7966172099113464, + "learning_rate": 0.00019692082372818788, + "loss": 2.7135, + "step": 1676 + }, + { + "epoch": 0.13534016624969736, + "grad_norm": 0.8583024740219116, + "learning_rate": 0.00019691693511450438, + "loss": 2.7908, + "step": 1677 + }, + { + "epoch": 0.13542086998628036, + "grad_norm": 0.9430457353591919, + "learning_rate": 0.0001969130440853975, + "loss": 2.7311, + "step": 1678 + }, + { + "epoch": 0.13550157372286337, + "grad_norm": 0.8066009879112244, + "learning_rate": 0.00019690915064096424, + "loss": 2.7039, + "step": 1679 + }, + { + "epoch": 0.13558227745944637, + "grad_norm": 1.0169655084609985, + "learning_rate": 0.0001969052547813016, + "loss": 2.7832, + "step": 1680 + }, + { + "epoch": 0.13566298119602938, + "grad_norm": 0.8606080412864685, + "learning_rate": 0.00019690135650650672, + "loss": 2.751, + "step": 1681 + }, + { + "epoch": 0.13574368493261238, + "grad_norm": 0.8625333905220032, + "learning_rate": 0.00019689745581667674, + "loss": 2.761, + "step": 1682 + }, + { + "epoch": 0.13582438866919538, + "grad_norm": 0.9304285645484924, + "learning_rate": 0.00019689355271190886, + "loss": 2.7566, + "step": 1683 + }, + { + "epoch": 0.1359050924057784, + "grad_norm": 0.793397068977356, + "learning_rate": 0.00019688964719230035, + "loss": 2.7648, + "step": 1684 + }, + { + "epoch": 0.1359857961423614, + "grad_norm": 0.8496749401092529, + "learning_rate": 0.00019688573925794858, + "loss": 2.7461, + "step": 1685 + }, + { + "epoch": 0.1360664998789444, + "grad_norm": 0.7807914018630981, + "learning_rate": 0.0001968818289089509, + "loss": 2.8266, + "step": 1686 + }, + { + "epoch": 0.1361472036155274, + "grad_norm": 0.8186607956886292, + "learning_rate": 0.0001968779161454048, + "loss": 2.8447, + "step": 1687 + }, + { + "epoch": 0.1362279073521104, + "grad_norm": 0.8007118701934814, + "learning_rate": 0.0001968740009674078, + "loss": 2.7888, + "step": 1688 + }, + { + "epoch": 0.1363086110886934, + "grad_norm": 0.8735570311546326, + "learning_rate": 0.00019687008337505749, + "loss": 2.7152, + "step": 1689 + }, + { + "epoch": 0.13638931482527641, + "grad_norm": 0.8546476364135742, + "learning_rate": 0.00019686616336845144, + "loss": 2.8113, + "step": 1690 + }, + { + "epoch": 0.13647001856185942, + "grad_norm": 0.9156736135482788, + "learning_rate": 0.0001968622409476874, + "loss": 2.7561, + "step": 1691 + }, + { + "epoch": 0.13655072229844242, + "grad_norm": 0.8091925382614136, + "learning_rate": 0.0001968583161128631, + "loss": 2.7384, + "step": 1692 + }, + { + "epoch": 0.13663142603502543, + "grad_norm": 0.7871039509773254, + "learning_rate": 0.0001968543888640764, + "loss": 2.7138, + "step": 1693 + }, + { + "epoch": 0.13671212977160843, + "grad_norm": 0.9537062048912048, + "learning_rate": 0.00019685045920142516, + "loss": 2.7726, + "step": 1694 + }, + { + "epoch": 0.13679283350819144, + "grad_norm": 0.8663280010223389, + "learning_rate": 0.00019684652712500728, + "loss": 2.7509, + "step": 1695 + }, + { + "epoch": 0.13687353724477444, + "grad_norm": 0.8717214465141296, + "learning_rate": 0.0001968425926349208, + "loss": 2.791, + "step": 1696 + }, + { + "epoch": 0.13695424098135744, + "grad_norm": 0.8942584991455078, + "learning_rate": 0.00019683865573126374, + "loss": 2.77, + "step": 1697 + }, + { + "epoch": 0.13703494471794045, + "grad_norm": 0.8243421316146851, + "learning_rate": 0.00019683471641413424, + "loss": 2.8063, + "step": 1698 + }, + { + "epoch": 0.13711564845452345, + "grad_norm": 0.8618699908256531, + "learning_rate": 0.0001968307746836305, + "loss": 2.6872, + "step": 1699 + }, + { + "epoch": 0.13719635219110646, + "grad_norm": 0.7931695580482483, + "learning_rate": 0.00019682683053985072, + "loss": 2.7495, + "step": 1700 + }, + { + "epoch": 0.13727705592768946, + "grad_norm": 0.7549482583999634, + "learning_rate": 0.00019682288398289324, + "loss": 2.7543, + "step": 1701 + }, + { + "epoch": 0.13735775966427247, + "grad_norm": 0.7953789234161377, + "learning_rate": 0.00019681893501285636, + "loss": 2.6895, + "step": 1702 + }, + { + "epoch": 0.13743846340085547, + "grad_norm": 0.7916574478149414, + "learning_rate": 0.00019681498362983857, + "loss": 2.819, + "step": 1703 + }, + { + "epoch": 0.13751916713743847, + "grad_norm": 0.7986735105514526, + "learning_rate": 0.0001968110298339383, + "loss": 2.8062, + "step": 1704 + }, + { + "epoch": 0.13759987087402148, + "grad_norm": 0.8601658940315247, + "learning_rate": 0.00019680707362525407, + "loss": 2.7625, + "step": 1705 + }, + { + "epoch": 0.13768057461060448, + "grad_norm": 0.8888362050056458, + "learning_rate": 0.00019680311500388454, + "loss": 2.7747, + "step": 1706 + }, + { + "epoch": 0.1377612783471875, + "grad_norm": 0.7762896418571472, + "learning_rate": 0.00019679915396992833, + "loss": 2.7959, + "step": 1707 + }, + { + "epoch": 0.1378419820837705, + "grad_norm": 0.8942253589630127, + "learning_rate": 0.00019679519052348416, + "loss": 2.7717, + "step": 1708 + }, + { + "epoch": 0.13792268582035347, + "grad_norm": 0.8388909697532654, + "learning_rate": 0.00019679122466465082, + "loss": 2.7448, + "step": 1709 + }, + { + "epoch": 0.13800338955693647, + "grad_norm": 0.8826024532318115, + "learning_rate": 0.00019678725639352712, + "loss": 2.7307, + "step": 1710 + }, + { + "epoch": 0.13808409329351948, + "grad_norm": 0.8972313404083252, + "learning_rate": 0.00019678328571021204, + "loss": 2.7619, + "step": 1711 + }, + { + "epoch": 0.13816479703010248, + "grad_norm": 0.9373044371604919, + "learning_rate": 0.00019677931261480444, + "loss": 2.7664, + "step": 1712 + }, + { + "epoch": 0.1382455007666855, + "grad_norm": 0.8060994148254395, + "learning_rate": 0.00019677533710740343, + "loss": 2.7707, + "step": 1713 + }, + { + "epoch": 0.1383262045032685, + "grad_norm": 0.8324100971221924, + "learning_rate": 0.000196771359188108, + "loss": 2.8249, + "step": 1714 + }, + { + "epoch": 0.1384069082398515, + "grad_norm": 0.879176676273346, + "learning_rate": 0.00019676737885701738, + "loss": 2.7767, + "step": 1715 + }, + { + "epoch": 0.1384876119764345, + "grad_norm": 0.8823966979980469, + "learning_rate": 0.0001967633961142307, + "loss": 2.791, + "step": 1716 + }, + { + "epoch": 0.1385683157130175, + "grad_norm": 0.8176039457321167, + "learning_rate": 0.00019675941095984728, + "loss": 2.8225, + "step": 1717 + }, + { + "epoch": 0.1386490194496005, + "grad_norm": 0.8005076050758362, + "learning_rate": 0.00019675542339396635, + "loss": 2.8175, + "step": 1718 + }, + { + "epoch": 0.1387297231861835, + "grad_norm": 0.800854504108429, + "learning_rate": 0.0001967514334166874, + "loss": 2.8226, + "step": 1719 + }, + { + "epoch": 0.13881042692276652, + "grad_norm": 0.7941261529922485, + "learning_rate": 0.00019674744102810978, + "loss": 2.7488, + "step": 1720 + }, + { + "epoch": 0.13889113065934952, + "grad_norm": 0.7955947518348694, + "learning_rate": 0.00019674344622833302, + "loss": 2.7749, + "step": 1721 + }, + { + "epoch": 0.13897183439593253, + "grad_norm": 0.8353856205940247, + "learning_rate": 0.00019673944901745674, + "loss": 2.7982, + "step": 1722 + }, + { + "epoch": 0.13905253813251553, + "grad_norm": 0.8711503744125366, + "learning_rate": 0.00019673544939558047, + "loss": 2.8007, + "step": 1723 + }, + { + "epoch": 0.13913324186909853, + "grad_norm": 0.8525274991989136, + "learning_rate": 0.00019673144736280396, + "loss": 2.7423, + "step": 1724 + }, + { + "epoch": 0.13921394560568154, + "grad_norm": 0.8143991231918335, + "learning_rate": 0.0001967274429192269, + "loss": 2.7752, + "step": 1725 + }, + { + "epoch": 0.13929464934226454, + "grad_norm": 0.8508228063583374, + "learning_rate": 0.00019672343606494912, + "loss": 2.7422, + "step": 1726 + }, + { + "epoch": 0.13937535307884755, + "grad_norm": 0.8320932984352112, + "learning_rate": 0.0001967194268000705, + "loss": 2.7598, + "step": 1727 + }, + { + "epoch": 0.13945605681543055, + "grad_norm": 0.8233908414840698, + "learning_rate": 0.00019671541512469092, + "loss": 2.7834, + "step": 1728 + }, + { + "epoch": 0.13953676055201356, + "grad_norm": 0.8097162246704102, + "learning_rate": 0.00019671140103891038, + "loss": 2.7856, + "step": 1729 + }, + { + "epoch": 0.13961746428859656, + "grad_norm": 0.9043141007423401, + "learning_rate": 0.0001967073845428289, + "loss": 2.8047, + "step": 1730 + }, + { + "epoch": 0.13969816802517956, + "grad_norm": 0.9118517637252808, + "learning_rate": 0.00019670336563654662, + "loss": 2.789, + "step": 1731 + }, + { + "epoch": 0.13977887176176257, + "grad_norm": 0.8016074895858765, + "learning_rate": 0.00019669934432016368, + "loss": 2.7506, + "step": 1732 + }, + { + "epoch": 0.13985957549834557, + "grad_norm": 0.8376848697662354, + "learning_rate": 0.0001966953205937803, + "loss": 2.7832, + "step": 1733 + }, + { + "epoch": 0.13994027923492858, + "grad_norm": 0.8511834144592285, + "learning_rate": 0.0001966912944574968, + "loss": 2.7564, + "step": 1734 + }, + { + "epoch": 0.14002098297151158, + "grad_norm": 0.7796351909637451, + "learning_rate": 0.00019668726591141344, + "loss": 2.7489, + "step": 1735 + }, + { + "epoch": 0.14010168670809459, + "grad_norm": 0.8204767107963562, + "learning_rate": 0.00019668323495563068, + "loss": 2.7634, + "step": 1736 + }, + { + "epoch": 0.1401823904446776, + "grad_norm": 0.9049975872039795, + "learning_rate": 0.000196679201590249, + "loss": 2.7863, + "step": 1737 + }, + { + "epoch": 0.1402630941812606, + "grad_norm": 0.7473673224449158, + "learning_rate": 0.0001966751658153689, + "loss": 2.7557, + "step": 1738 + }, + { + "epoch": 0.1403437979178436, + "grad_norm": 0.7765525579452515, + "learning_rate": 0.0001966711276310909, + "loss": 2.7865, + "step": 1739 + }, + { + "epoch": 0.1404245016544266, + "grad_norm": 0.8766517043113708, + "learning_rate": 0.00019666708703751576, + "loss": 2.7873, + "step": 1740 + }, + { + "epoch": 0.1405052053910096, + "grad_norm": 0.8351505994796753, + "learning_rate": 0.00019666304403474408, + "loss": 2.7355, + "step": 1741 + }, + { + "epoch": 0.1405859091275926, + "grad_norm": 0.7612324953079224, + "learning_rate": 0.00019665899862287667, + "loss": 2.7608, + "step": 1742 + }, + { + "epoch": 0.14066661286417562, + "grad_norm": 0.894249439239502, + "learning_rate": 0.00019665495080201434, + "loss": 2.7469, + "step": 1743 + }, + { + "epoch": 0.14074731660075862, + "grad_norm": 0.8528907895088196, + "learning_rate": 0.00019665090057225803, + "loss": 2.773, + "step": 1744 + }, + { + "epoch": 0.14082802033734163, + "grad_norm": 0.7718498706817627, + "learning_rate": 0.00019664684793370855, + "loss": 2.8045, + "step": 1745 + }, + { + "epoch": 0.14090872407392463, + "grad_norm": 0.8013718128204346, + "learning_rate": 0.00019664279288646706, + "loss": 2.7665, + "step": 1746 + }, + { + "epoch": 0.14098942781050763, + "grad_norm": 0.828803539276123, + "learning_rate": 0.00019663873543063448, + "loss": 2.7846, + "step": 1747 + }, + { + "epoch": 0.14107013154709064, + "grad_norm": 0.8349393606185913, + "learning_rate": 0.00019663467556631204, + "loss": 2.7405, + "step": 1748 + }, + { + "epoch": 0.14115083528367364, + "grad_norm": 0.8273345232009888, + "learning_rate": 0.00019663061329360085, + "loss": 2.7578, + "step": 1749 + }, + { + "epoch": 0.14123153902025665, + "grad_norm": 0.7989444136619568, + "learning_rate": 0.0001966265486126022, + "loss": 2.739, + "step": 1750 + }, + { + "epoch": 0.14131224275683965, + "grad_norm": 0.8690519332885742, + "learning_rate": 0.00019662248152341736, + "loss": 2.7566, + "step": 1751 + }, + { + "epoch": 0.14139294649342266, + "grad_norm": 0.8453623056411743, + "learning_rate": 0.0001966184120261477, + "loss": 2.8572, + "step": 1752 + }, + { + "epoch": 0.14147365023000566, + "grad_norm": 0.8396254777908325, + "learning_rate": 0.00019661434012089468, + "loss": 2.786, + "step": 1753 + }, + { + "epoch": 0.14155435396658866, + "grad_norm": 0.7643738389015198, + "learning_rate": 0.00019661026580775973, + "loss": 2.8193, + "step": 1754 + }, + { + "epoch": 0.14163505770317167, + "grad_norm": 0.8124154806137085, + "learning_rate": 0.00019660618908684443, + "loss": 2.7754, + "step": 1755 + }, + { + "epoch": 0.14171576143975467, + "grad_norm": 0.8620683550834656, + "learning_rate": 0.00019660210995825036, + "loss": 2.7827, + "step": 1756 + }, + { + "epoch": 0.14179646517633768, + "grad_norm": 0.8241196274757385, + "learning_rate": 0.0001965980284220792, + "loss": 2.7573, + "step": 1757 + }, + { + "epoch": 0.14187716891292068, + "grad_norm": 0.8264089822769165, + "learning_rate": 0.00019659394447843262, + "loss": 2.8214, + "step": 1758 + }, + { + "epoch": 0.14195787264950369, + "grad_norm": 0.9129722118377686, + "learning_rate": 0.00019658985812741247, + "loss": 2.7962, + "step": 1759 + }, + { + "epoch": 0.14203857638608666, + "grad_norm": 0.7976365089416504, + "learning_rate": 0.00019658576936912057, + "loss": 2.7534, + "step": 1760 + }, + { + "epoch": 0.14211928012266967, + "grad_norm": 0.7587228417396545, + "learning_rate": 0.00019658167820365882, + "loss": 2.7083, + "step": 1761 + }, + { + "epoch": 0.14219998385925267, + "grad_norm": 0.757882833480835, + "learning_rate": 0.00019657758463112918, + "loss": 2.7135, + "step": 1762 + }, + { + "epoch": 0.14228068759583568, + "grad_norm": 0.8541501760482788, + "learning_rate": 0.00019657348865163369, + "loss": 2.7833, + "step": 1763 + }, + { + "epoch": 0.14236139133241868, + "grad_norm": 0.7708966135978699, + "learning_rate": 0.00019656939026527442, + "loss": 2.7128, + "step": 1764 + }, + { + "epoch": 0.14244209506900168, + "grad_norm": 0.8733000159263611, + "learning_rate": 0.00019656528947215347, + "loss": 2.7597, + "step": 1765 + }, + { + "epoch": 0.1425227988055847, + "grad_norm": 0.7913360595703125, + "learning_rate": 0.0001965611862723731, + "loss": 2.7681, + "step": 1766 + }, + { + "epoch": 0.1426035025421677, + "grad_norm": 0.8692380785942078, + "learning_rate": 0.00019655708066603555, + "loss": 2.7587, + "step": 1767 + }, + { + "epoch": 0.1426842062787507, + "grad_norm": 0.8231006860733032, + "learning_rate": 0.00019655297265324317, + "loss": 2.772, + "step": 1768 + }, + { + "epoch": 0.1427649100153337, + "grad_norm": 0.7373722791671753, + "learning_rate": 0.0001965488622340983, + "loss": 2.7875, + "step": 1769 + }, + { + "epoch": 0.1428456137519167, + "grad_norm": 0.8614751696586609, + "learning_rate": 0.0001965447494087034, + "loss": 2.7962, + "step": 1770 + }, + { + "epoch": 0.1429263174884997, + "grad_norm": 0.8336494565010071, + "learning_rate": 0.000196540634177161, + "loss": 2.7072, + "step": 1771 + }, + { + "epoch": 0.14300702122508271, + "grad_norm": 0.844292163848877, + "learning_rate": 0.00019653651653957362, + "loss": 2.8043, + "step": 1772 + }, + { + "epoch": 0.14308772496166572, + "grad_norm": 0.7366824150085449, + "learning_rate": 0.0001965323964960439, + "loss": 2.7296, + "step": 1773 + }, + { + "epoch": 0.14316842869824872, + "grad_norm": 0.75767982006073, + "learning_rate": 0.0001965282740466745, + "loss": 2.7946, + "step": 1774 + }, + { + "epoch": 0.14324913243483173, + "grad_norm": 0.8361382484436035, + "learning_rate": 0.00019652414919156823, + "loss": 2.7232, + "step": 1775 + }, + { + "epoch": 0.14332983617141473, + "grad_norm": 0.8473719358444214, + "learning_rate": 0.0001965200219308278, + "loss": 2.774, + "step": 1776 + }, + { + "epoch": 0.14341053990799774, + "grad_norm": 0.7446423172950745, + "learning_rate": 0.00019651589226455613, + "loss": 2.7439, + "step": 1777 + }, + { + "epoch": 0.14349124364458074, + "grad_norm": 0.8332851529121399, + "learning_rate": 0.00019651176019285616, + "loss": 2.7891, + "step": 1778 + }, + { + "epoch": 0.14357194738116374, + "grad_norm": 0.885313868522644, + "learning_rate": 0.0001965076257158308, + "loss": 2.7677, + "step": 1779 + }, + { + "epoch": 0.14365265111774675, + "grad_norm": 0.8506965637207031, + "learning_rate": 0.00019650348883358315, + "loss": 2.8112, + "step": 1780 + }, + { + "epoch": 0.14373335485432975, + "grad_norm": 0.8415799736976624, + "learning_rate": 0.0001964993495462163, + "loss": 2.8242, + "step": 1781 + }, + { + "epoch": 0.14381405859091276, + "grad_norm": 0.8501513004302979, + "learning_rate": 0.00019649520785383338, + "loss": 2.8352, + "step": 1782 + }, + { + "epoch": 0.14389476232749576, + "grad_norm": 0.7839778065681458, + "learning_rate": 0.00019649106375653767, + "loss": 2.7194, + "step": 1783 + }, + { + "epoch": 0.14397546606407877, + "grad_norm": 0.8013346195220947, + "learning_rate": 0.00019648691725443243, + "loss": 2.7665, + "step": 1784 + }, + { + "epoch": 0.14405616980066177, + "grad_norm": 1.0338317155838013, + "learning_rate": 0.00019648276834762095, + "loss": 2.8599, + "step": 1785 + }, + { + "epoch": 0.14413687353724478, + "grad_norm": 0.898417592048645, + "learning_rate": 0.0001964786170362067, + "loss": 2.7192, + "step": 1786 + }, + { + "epoch": 0.14421757727382778, + "grad_norm": 0.8876320123672485, + "learning_rate": 0.00019647446332029313, + "loss": 2.7722, + "step": 1787 + }, + { + "epoch": 0.14429828101041078, + "grad_norm": 0.819461464881897, + "learning_rate": 0.00019647030719998373, + "loss": 2.7698, + "step": 1788 + }, + { + "epoch": 0.1443789847469938, + "grad_norm": 0.848380446434021, + "learning_rate": 0.0001964661486753821, + "loss": 2.7894, + "step": 1789 + }, + { + "epoch": 0.1444596884835768, + "grad_norm": 0.8343753814697266, + "learning_rate": 0.0001964619877465919, + "loss": 2.699, + "step": 1790 + }, + { + "epoch": 0.1445403922201598, + "grad_norm": 0.8718340396881104, + "learning_rate": 0.0001964578244137168, + "loss": 2.7313, + "step": 1791 + }, + { + "epoch": 0.1446210959567428, + "grad_norm": 0.866122841835022, + "learning_rate": 0.00019645365867686056, + "loss": 2.7112, + "step": 1792 + }, + { + "epoch": 0.1447017996933258, + "grad_norm": 0.8351789712905884, + "learning_rate": 0.000196449490536127, + "loss": 2.7765, + "step": 1793 + }, + { + "epoch": 0.1447825034299088, + "grad_norm": 0.8628408312797546, + "learning_rate": 0.00019644531999162004, + "loss": 2.7375, + "step": 1794 + }, + { + "epoch": 0.14486320716649181, + "grad_norm": 0.8414484858512878, + "learning_rate": 0.00019644114704344358, + "loss": 2.7502, + "step": 1795 + }, + { + "epoch": 0.14494391090307482, + "grad_norm": 0.9092586636543274, + "learning_rate": 0.00019643697169170166, + "loss": 2.7714, + "step": 1796 + }, + { + "epoch": 0.14502461463965782, + "grad_norm": 0.8458060622215271, + "learning_rate": 0.0001964327939364983, + "loss": 2.8376, + "step": 1797 + }, + { + "epoch": 0.14510531837624083, + "grad_norm": 0.8150759935379028, + "learning_rate": 0.00019642861377793764, + "loss": 2.7147, + "step": 1798 + }, + { + "epoch": 0.14518602211282383, + "grad_norm": 0.9008790850639343, + "learning_rate": 0.00019642443121612387, + "loss": 2.7786, + "step": 1799 + }, + { + "epoch": 0.14526672584940684, + "grad_norm": 0.848671555519104, + "learning_rate": 0.00019642024625116117, + "loss": 2.7813, + "step": 1800 + }, + { + "epoch": 0.14534742958598984, + "grad_norm": 0.8035007119178772, + "learning_rate": 0.00019641605888315393, + "loss": 2.7988, + "step": 1801 + }, + { + "epoch": 0.14542813332257284, + "grad_norm": 0.8210242390632629, + "learning_rate": 0.00019641186911220645, + "loss": 2.8451, + "step": 1802 + }, + { + "epoch": 0.14550883705915585, + "grad_norm": 0.8852066397666931, + "learning_rate": 0.00019640767693842318, + "loss": 2.7492, + "step": 1803 + }, + { + "epoch": 0.14558954079573885, + "grad_norm": 0.8421196937561035, + "learning_rate": 0.0001964034823619086, + "loss": 2.759, + "step": 1804 + }, + { + "epoch": 0.14567024453232186, + "grad_norm": 0.8166298866271973, + "learning_rate": 0.00019639928538276724, + "loss": 2.7942, + "step": 1805 + }, + { + "epoch": 0.14575094826890486, + "grad_norm": 0.8502809405326843, + "learning_rate": 0.00019639508600110368, + "loss": 2.7829, + "step": 1806 + }, + { + "epoch": 0.14583165200548787, + "grad_norm": 0.8371078372001648, + "learning_rate": 0.0001963908842170226, + "loss": 2.7168, + "step": 1807 + }, + { + "epoch": 0.14591235574207087, + "grad_norm": 0.8148230910301208, + "learning_rate": 0.0001963866800306287, + "loss": 2.7706, + "step": 1808 + }, + { + "epoch": 0.14599305947865387, + "grad_norm": 0.8984564542770386, + "learning_rate": 0.0001963824734420268, + "loss": 2.7761, + "step": 1809 + }, + { + "epoch": 0.14607376321523688, + "grad_norm": 0.9357183575630188, + "learning_rate": 0.00019637826445132172, + "loss": 2.7738, + "step": 1810 + }, + { + "epoch": 0.14615446695181986, + "grad_norm": 0.8545449376106262, + "learning_rate": 0.00019637405305861834, + "loss": 2.772, + "step": 1811 + }, + { + "epoch": 0.14623517068840286, + "grad_norm": 1.1674948930740356, + "learning_rate": 0.00019636983926402165, + "loss": 2.8988, + "step": 1812 + }, + { + "epoch": 0.14631587442498586, + "grad_norm": 0.7875451445579529, + "learning_rate": 0.00019636562306763665, + "loss": 2.7053, + "step": 1813 + }, + { + "epoch": 0.14639657816156887, + "grad_norm": 0.8980962038040161, + "learning_rate": 0.0001963614044695684, + "loss": 2.7731, + "step": 1814 + }, + { + "epoch": 0.14647728189815187, + "grad_norm": 0.8403381705284119, + "learning_rate": 0.00019635718346992207, + "loss": 2.8555, + "step": 1815 + }, + { + "epoch": 0.14655798563473488, + "grad_norm": 0.8736433982849121, + "learning_rate": 0.00019635296006880284, + "loss": 2.7918, + "step": 1816 + }, + { + "epoch": 0.14663868937131788, + "grad_norm": 0.8604151606559753, + "learning_rate": 0.000196348734266316, + "loss": 2.7493, + "step": 1817 + }, + { + "epoch": 0.1467193931079009, + "grad_norm": 0.8329424262046814, + "learning_rate": 0.00019634450606256681, + "loss": 2.7348, + "step": 1818 + }, + { + "epoch": 0.1468000968444839, + "grad_norm": 0.9835913181304932, + "learning_rate": 0.0001963402754576607, + "loss": 2.7651, + "step": 1819 + }, + { + "epoch": 0.1468808005810669, + "grad_norm": 0.7968378067016602, + "learning_rate": 0.0001963360424517031, + "loss": 2.7672, + "step": 1820 + }, + { + "epoch": 0.1469615043176499, + "grad_norm": 0.8012512922286987, + "learning_rate": 0.00019633180704479948, + "loss": 2.8022, + "step": 1821 + }, + { + "epoch": 0.1470422080542329, + "grad_norm": 0.7656376957893372, + "learning_rate": 0.0001963275692370554, + "loss": 2.7561, + "step": 1822 + }, + { + "epoch": 0.1471229117908159, + "grad_norm": 0.8030453324317932, + "learning_rate": 0.00019632332902857656, + "loss": 2.8048, + "step": 1823 + }, + { + "epoch": 0.1472036155273989, + "grad_norm": 0.8050903677940369, + "learning_rate": 0.0001963190864194685, + "loss": 2.7846, + "step": 1824 + }, + { + "epoch": 0.14728431926398192, + "grad_norm": 0.8001886606216431, + "learning_rate": 0.00019631484140983705, + "loss": 2.7382, + "step": 1825 + }, + { + "epoch": 0.14736502300056492, + "grad_norm": 0.8589862585067749, + "learning_rate": 0.00019631059399978796, + "loss": 2.8376, + "step": 1826 + }, + { + "epoch": 0.14744572673714793, + "grad_norm": 0.86325603723526, + "learning_rate": 0.00019630634418942714, + "loss": 2.7643, + "step": 1827 + }, + { + "epoch": 0.14752643047373093, + "grad_norm": 0.7893280386924744, + "learning_rate": 0.00019630209197886046, + "loss": 2.713, + "step": 1828 + }, + { + "epoch": 0.14760713421031393, + "grad_norm": 0.8890528082847595, + "learning_rate": 0.00019629783736819394, + "loss": 2.7435, + "step": 1829 + }, + { + "epoch": 0.14768783794689694, + "grad_norm": 0.794924795627594, + "learning_rate": 0.00019629358035753357, + "loss": 2.7703, + "step": 1830 + }, + { + "epoch": 0.14776854168347994, + "grad_norm": 0.7712973952293396, + "learning_rate": 0.00019628932094698545, + "loss": 2.7487, + "step": 1831 + }, + { + "epoch": 0.14784924542006295, + "grad_norm": 0.7810670137405396, + "learning_rate": 0.00019628505913665576, + "loss": 2.7687, + "step": 1832 + }, + { + "epoch": 0.14792994915664595, + "grad_norm": 0.8331059813499451, + "learning_rate": 0.0001962807949266507, + "loss": 2.7166, + "step": 1833 + }, + { + "epoch": 0.14801065289322896, + "grad_norm": 0.8983452916145325, + "learning_rate": 0.00019627652831707656, + "loss": 2.8096, + "step": 1834 + }, + { + "epoch": 0.14809135662981196, + "grad_norm": 0.8387179374694824, + "learning_rate": 0.00019627225930803963, + "loss": 2.8252, + "step": 1835 + }, + { + "epoch": 0.14817206036639496, + "grad_norm": 0.8619294762611389, + "learning_rate": 0.0001962679878996464, + "loss": 2.7623, + "step": 1836 + }, + { + "epoch": 0.14825276410297797, + "grad_norm": 0.8195026516914368, + "learning_rate": 0.0001962637140920032, + "loss": 2.7295, + "step": 1837 + }, + { + "epoch": 0.14833346783956097, + "grad_norm": 0.806216835975647, + "learning_rate": 0.00019625943788521664, + "loss": 2.7184, + "step": 1838 + }, + { + "epoch": 0.14841417157614398, + "grad_norm": 0.7758379578590393, + "learning_rate": 0.00019625515927939327, + "loss": 2.7675, + "step": 1839 + }, + { + "epoch": 0.14849487531272698, + "grad_norm": 0.7617168426513672, + "learning_rate": 0.0001962508782746397, + "loss": 2.8041, + "step": 1840 + }, + { + "epoch": 0.14857557904930999, + "grad_norm": 0.9630066156387329, + "learning_rate": 0.00019624659487106264, + "loss": 2.814, + "step": 1841 + }, + { + "epoch": 0.148656282785893, + "grad_norm": 0.7656112313270569, + "learning_rate": 0.00019624230906876888, + "loss": 2.7564, + "step": 1842 + }, + { + "epoch": 0.148736986522476, + "grad_norm": 0.9394779801368713, + "learning_rate": 0.0001962380208678652, + "loss": 2.7958, + "step": 1843 + }, + { + "epoch": 0.148817690259059, + "grad_norm": 0.7647004127502441, + "learning_rate": 0.00019623373026845842, + "loss": 2.72, + "step": 1844 + }, + { + "epoch": 0.148898393995642, + "grad_norm": 0.809079647064209, + "learning_rate": 0.00019622943727065555, + "loss": 2.7732, + "step": 1845 + }, + { + "epoch": 0.148979097732225, + "grad_norm": 0.8241337537765503, + "learning_rate": 0.00019622514187456357, + "loss": 2.759, + "step": 1846 + }, + { + "epoch": 0.149059801468808, + "grad_norm": 0.8979619145393372, + "learning_rate": 0.00019622084408028948, + "loss": 2.8307, + "step": 1847 + }, + { + "epoch": 0.14914050520539102, + "grad_norm": 0.8058865666389465, + "learning_rate": 0.00019621654388794047, + "loss": 2.807, + "step": 1848 + }, + { + "epoch": 0.14922120894197402, + "grad_norm": 0.81967693567276, + "learning_rate": 0.00019621224129762364, + "loss": 2.7762, + "step": 1849 + }, + { + "epoch": 0.14930191267855702, + "grad_norm": 0.7385755777359009, + "learning_rate": 0.0001962079363094463, + "loss": 2.7854, + "step": 1850 + }, + { + "epoch": 0.14938261641514003, + "grad_norm": 0.8585657477378845, + "learning_rate": 0.00019620362892351566, + "loss": 2.7781, + "step": 1851 + }, + { + "epoch": 0.14946332015172303, + "grad_norm": 0.8328986763954163, + "learning_rate": 0.00019619931913993912, + "loss": 2.8245, + "step": 1852 + }, + { + "epoch": 0.14954402388830604, + "grad_norm": 0.749727189540863, + "learning_rate": 0.0001961950069588241, + "loss": 2.8049, + "step": 1853 + }, + { + "epoch": 0.14962472762488904, + "grad_norm": 0.7886502742767334, + "learning_rate": 0.00019619069238027803, + "loss": 2.7521, + "step": 1854 + }, + { + "epoch": 0.14970543136147205, + "grad_norm": 0.816137433052063, + "learning_rate": 0.00019618637540440848, + "loss": 2.8383, + "step": 1855 + }, + { + "epoch": 0.14978613509805505, + "grad_norm": 0.80442214012146, + "learning_rate": 0.000196182056031323, + "loss": 2.7227, + "step": 1856 + }, + { + "epoch": 0.14986683883463806, + "grad_norm": 0.7605221271514893, + "learning_rate": 0.00019617773426112924, + "loss": 2.7494, + "step": 1857 + }, + { + "epoch": 0.14994754257122106, + "grad_norm": 0.8745137453079224, + "learning_rate": 0.00019617341009393497, + "loss": 2.6978, + "step": 1858 + }, + { + "epoch": 0.15002824630780406, + "grad_norm": 0.8151741623878479, + "learning_rate": 0.00019616908352984789, + "loss": 2.7817, + "step": 1859 + }, + { + "epoch": 0.15010895004438707, + "grad_norm": 0.773876428604126, + "learning_rate": 0.0001961647545689759, + "loss": 2.812, + "step": 1860 + }, + { + "epoch": 0.15018965378097007, + "grad_norm": 0.8216966390609741, + "learning_rate": 0.00019616042321142683, + "loss": 2.8181, + "step": 1861 + }, + { + "epoch": 0.15027035751755305, + "grad_norm": 0.8097409605979919, + "learning_rate": 0.00019615608945730862, + "loss": 2.8336, + "step": 1862 + }, + { + "epoch": 0.15035106125413605, + "grad_norm": 0.8085697293281555, + "learning_rate": 0.00019615175330672932, + "loss": 2.8176, + "step": 1863 + }, + { + "epoch": 0.15043176499071906, + "grad_norm": 0.7658133506774902, + "learning_rate": 0.00019614741475979701, + "loss": 2.7543, + "step": 1864 + }, + { + "epoch": 0.15051246872730206, + "grad_norm": 0.7193909883499146, + "learning_rate": 0.00019614307381661978, + "loss": 2.7475, + "step": 1865 + }, + { + "epoch": 0.15059317246388507, + "grad_norm": 0.835608959197998, + "learning_rate": 0.0001961387304773058, + "loss": 2.8017, + "step": 1866 + }, + { + "epoch": 0.15067387620046807, + "grad_norm": 0.7898489832878113, + "learning_rate": 0.0001961343847419634, + "loss": 2.7613, + "step": 1867 + }, + { + "epoch": 0.15075457993705108, + "grad_norm": 0.8031982183456421, + "learning_rate": 0.0001961300366107008, + "loss": 2.7442, + "step": 1868 + }, + { + "epoch": 0.15083528367363408, + "grad_norm": 0.8427363634109497, + "learning_rate": 0.00019612568608362642, + "loss": 2.8095, + "step": 1869 + }, + { + "epoch": 0.15091598741021708, + "grad_norm": 0.8282802700996399, + "learning_rate": 0.00019612133316084863, + "loss": 2.7216, + "step": 1870 + }, + { + "epoch": 0.1509966911468001, + "grad_norm": 0.7799758911132812, + "learning_rate": 0.000196116977842476, + "loss": 2.793, + "step": 1871 + }, + { + "epoch": 0.1510773948833831, + "grad_norm": 0.8151525259017944, + "learning_rate": 0.00019611262012861702, + "loss": 2.7641, + "step": 1872 + }, + { + "epoch": 0.1511580986199661, + "grad_norm": 0.7926812767982483, + "learning_rate": 0.0001961082600193803, + "loss": 2.7523, + "step": 1873 + }, + { + "epoch": 0.1512388023565491, + "grad_norm": 0.8737135529518127, + "learning_rate": 0.0001961038975148745, + "loss": 2.7965, + "step": 1874 + }, + { + "epoch": 0.1513195060931321, + "grad_norm": 0.7948090434074402, + "learning_rate": 0.00019609953261520837, + "loss": 2.7737, + "step": 1875 + }, + { + "epoch": 0.1514002098297151, + "grad_norm": 0.8161277770996094, + "learning_rate": 0.0001960951653204907, + "loss": 2.7423, + "step": 1876 + }, + { + "epoch": 0.15148091356629811, + "grad_norm": 0.8904973864555359, + "learning_rate": 0.00019609079563083026, + "loss": 2.7066, + "step": 1877 + }, + { + "epoch": 0.15156161730288112, + "grad_norm": 0.8107061982154846, + "learning_rate": 0.00019608642354633604, + "loss": 2.7939, + "step": 1878 + }, + { + "epoch": 0.15164232103946412, + "grad_norm": 0.8410987854003906, + "learning_rate": 0.00019608204906711694, + "loss": 2.7521, + "step": 1879 + }, + { + "epoch": 0.15172302477604713, + "grad_norm": 0.8336483836174011, + "learning_rate": 0.0001960776721932821, + "loss": 2.7613, + "step": 1880 + }, + { + "epoch": 0.15180372851263013, + "grad_norm": 0.730549156665802, + "learning_rate": 0.00019607329292494044, + "loss": 2.8019, + "step": 1881 + }, + { + "epoch": 0.15188443224921314, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001960689112622012, + "loss": 2.6907, + "step": 1882 + }, + { + "epoch": 0.15196513598579614, + "grad_norm": 0.848414421081543, + "learning_rate": 0.00019606452720517359, + "loss": 2.7278, + "step": 1883 + }, + { + "epoch": 0.15204583972237914, + "grad_norm": 0.8331718444824219, + "learning_rate": 0.00019606014075396682, + "loss": 2.6994, + "step": 1884 + }, + { + "epoch": 0.15212654345896215, + "grad_norm": 0.9192764759063721, + "learning_rate": 0.00019605575190869025, + "loss": 2.7095, + "step": 1885 + }, + { + "epoch": 0.15220724719554515, + "grad_norm": 0.8377116322517395, + "learning_rate": 0.00019605136066945324, + "loss": 2.7925, + "step": 1886 + }, + { + "epoch": 0.15228795093212816, + "grad_norm": 0.7302869558334351, + "learning_rate": 0.00019604696703636525, + "loss": 2.7286, + "step": 1887 + }, + { + "epoch": 0.15236865466871116, + "grad_norm": 0.7972438335418701, + "learning_rate": 0.00019604257100953577, + "loss": 2.7732, + "step": 1888 + }, + { + "epoch": 0.15244935840529417, + "grad_norm": 1.0350826978683472, + "learning_rate": 0.00019603817258907435, + "loss": 2.8211, + "step": 1889 + }, + { + "epoch": 0.15253006214187717, + "grad_norm": 0.782755970954895, + "learning_rate": 0.00019603377177509067, + "loss": 2.8489, + "step": 1890 + }, + { + "epoch": 0.15261076587846018, + "grad_norm": 0.9072603583335876, + "learning_rate": 0.0001960293685676943, + "loss": 2.7764, + "step": 1891 + }, + { + "epoch": 0.15269146961504318, + "grad_norm": 0.7878704071044922, + "learning_rate": 0.0001960249629669951, + "loss": 2.7494, + "step": 1892 + }, + { + "epoch": 0.15277217335162618, + "grad_norm": 0.8770418167114258, + "learning_rate": 0.00019602055497310278, + "loss": 2.7318, + "step": 1893 + }, + { + "epoch": 0.1528528770882092, + "grad_norm": 0.8004975914955139, + "learning_rate": 0.00019601614458612723, + "loss": 2.7272, + "step": 1894 + }, + { + "epoch": 0.1529335808247922, + "grad_norm": 0.8511070013046265, + "learning_rate": 0.00019601173180617835, + "loss": 2.7876, + "step": 1895 + }, + { + "epoch": 0.1530142845613752, + "grad_norm": 0.7946128845214844, + "learning_rate": 0.00019600731663336617, + "loss": 2.7435, + "step": 1896 + }, + { + "epoch": 0.1530949882979582, + "grad_norm": 0.8155317902565002, + "learning_rate": 0.00019600289906780067, + "loss": 2.7642, + "step": 1897 + }, + { + "epoch": 0.1531756920345412, + "grad_norm": 0.8086098432540894, + "learning_rate": 0.000195998479109592, + "loss": 2.7358, + "step": 1898 + }, + { + "epoch": 0.1532563957711242, + "grad_norm": 0.8698278665542603, + "learning_rate": 0.00019599405675885026, + "loss": 2.725, + "step": 1899 + }, + { + "epoch": 0.15333709950770721, + "grad_norm": 0.8756006360054016, + "learning_rate": 0.00019598963201568573, + "loss": 2.7209, + "step": 1900 + }, + { + "epoch": 0.15341780324429022, + "grad_norm": 0.7984628081321716, + "learning_rate": 0.0001959852048802086, + "loss": 2.7685, + "step": 1901 + }, + { + "epoch": 0.15349850698087322, + "grad_norm": 0.8244056105613708, + "learning_rate": 0.0001959807753525293, + "loss": 2.7692, + "step": 1902 + }, + { + "epoch": 0.15357921071745623, + "grad_norm": 0.8577731251716614, + "learning_rate": 0.00019597634343275814, + "loss": 2.7571, + "step": 1903 + }, + { + "epoch": 0.15365991445403923, + "grad_norm": 0.8410975933074951, + "learning_rate": 0.00019597190912100566, + "loss": 2.7862, + "step": 1904 + }, + { + "epoch": 0.15374061819062224, + "grad_norm": 0.9094158411026001, + "learning_rate": 0.0001959674724173823, + "loss": 2.7655, + "step": 1905 + }, + { + "epoch": 0.15382132192720524, + "grad_norm": 0.8375208973884583, + "learning_rate": 0.00019596303332199868, + "loss": 2.8129, + "step": 1906 + }, + { + "epoch": 0.15390202566378824, + "grad_norm": 0.8335977792739868, + "learning_rate": 0.00019595859183496543, + "loss": 2.7835, + "step": 1907 + }, + { + "epoch": 0.15398272940037125, + "grad_norm": 0.7973531484603882, + "learning_rate": 0.0001959541479563932, + "loss": 2.7785, + "step": 1908 + }, + { + "epoch": 0.15406343313695425, + "grad_norm": 0.7808824181556702, + "learning_rate": 0.0001959497016863928, + "loss": 2.7862, + "step": 1909 + }, + { + "epoch": 0.15414413687353726, + "grad_norm": 0.853824257850647, + "learning_rate": 0.00019594525302507504, + "loss": 2.6721, + "step": 1910 + }, + { + "epoch": 0.15422484061012026, + "grad_norm": 0.8589324355125427, + "learning_rate": 0.00019594080197255073, + "loss": 2.7948, + "step": 1911 + }, + { + "epoch": 0.15430554434670327, + "grad_norm": 0.7951898574829102, + "learning_rate": 0.00019593634852893086, + "loss": 2.7903, + "step": 1912 + }, + { + "epoch": 0.15438624808328624, + "grad_norm": 0.8333349227905273, + "learning_rate": 0.0001959318926943264, + "loss": 2.8073, + "step": 1913 + }, + { + "epoch": 0.15446695181986925, + "grad_norm": 0.8552380800247192, + "learning_rate": 0.0001959274344688484, + "loss": 2.8199, + "step": 1914 + }, + { + "epoch": 0.15454765555645225, + "grad_norm": 0.8356214165687561, + "learning_rate": 0.000195922973852608, + "loss": 2.7985, + "step": 1915 + }, + { + "epoch": 0.15462835929303526, + "grad_norm": 0.7167248725891113, + "learning_rate": 0.00019591851084571634, + "loss": 2.6802, + "step": 1916 + }, + { + "epoch": 0.15470906302961826, + "grad_norm": 0.7980726361274719, + "learning_rate": 0.00019591404544828464, + "loss": 2.692, + "step": 1917 + }, + { + "epoch": 0.15478976676620126, + "grad_norm": 0.7766004800796509, + "learning_rate": 0.00019590957766042424, + "loss": 2.7219, + "step": 1918 + }, + { + "epoch": 0.15487047050278427, + "grad_norm": 0.828852653503418, + "learning_rate": 0.0001959051074822464, + "loss": 2.7369, + "step": 1919 + }, + { + "epoch": 0.15495117423936727, + "grad_norm": 0.7818129062652588, + "learning_rate": 0.0001959006349138626, + "loss": 2.7778, + "step": 1920 + }, + { + "epoch": 0.15503187797595028, + "grad_norm": 0.8428593873977661, + "learning_rate": 0.00019589615995538432, + "loss": 2.8257, + "step": 1921 + }, + { + "epoch": 0.15511258171253328, + "grad_norm": 0.8756616115570068, + "learning_rate": 0.00019589168260692307, + "loss": 2.7692, + "step": 1922 + }, + { + "epoch": 0.15519328544911629, + "grad_norm": 0.7802519202232361, + "learning_rate": 0.0001958872028685904, + "loss": 2.7811, + "step": 1923 + }, + { + "epoch": 0.1552739891856993, + "grad_norm": 0.7787032723426819, + "learning_rate": 0.00019588272074049797, + "loss": 2.7546, + "step": 1924 + }, + { + "epoch": 0.1553546929222823, + "grad_norm": 0.848479151725769, + "learning_rate": 0.0001958782362227575, + "loss": 2.7759, + "step": 1925 + }, + { + "epoch": 0.1554353966588653, + "grad_norm": 0.8331353664398193, + "learning_rate": 0.00019587374931548076, + "loss": 2.7881, + "step": 1926 + }, + { + "epoch": 0.1555161003954483, + "grad_norm": 0.8646424412727356, + "learning_rate": 0.00019586926001877958, + "loss": 2.8059, + "step": 1927 + }, + { + "epoch": 0.1555968041320313, + "grad_norm": 0.912253737449646, + "learning_rate": 0.00019586476833276584, + "loss": 2.7446, + "step": 1928 + }, + { + "epoch": 0.1556775078686143, + "grad_norm": 0.9256471395492554, + "learning_rate": 0.00019586027425755147, + "loss": 2.8, + "step": 1929 + }, + { + "epoch": 0.15575821160519732, + "grad_norm": 1.0984607934951782, + "learning_rate": 0.0001958557777932485, + "loss": 2.7759, + "step": 1930 + }, + { + "epoch": 0.15583891534178032, + "grad_norm": 0.8736081123352051, + "learning_rate": 0.00019585127893996895, + "loss": 2.7464, + "step": 1931 + }, + { + "epoch": 0.15591961907836333, + "grad_norm": 0.932538628578186, + "learning_rate": 0.00019584677769782498, + "loss": 2.7874, + "step": 1932 + }, + { + "epoch": 0.15600032281494633, + "grad_norm": 0.9742087125778198, + "learning_rate": 0.0001958422740669288, + "loss": 2.7727, + "step": 1933 + }, + { + "epoch": 0.15608102655152933, + "grad_norm": 0.8975874781608582, + "learning_rate": 0.00019583776804739256, + "loss": 2.7812, + "step": 1934 + }, + { + "epoch": 0.15616173028811234, + "grad_norm": 0.9380232691764832, + "learning_rate": 0.00019583325963932864, + "loss": 2.7284, + "step": 1935 + }, + { + "epoch": 0.15624243402469534, + "grad_norm": 0.8332872986793518, + "learning_rate": 0.00019582874884284938, + "loss": 2.7792, + "step": 1936 + }, + { + "epoch": 0.15632313776127835, + "grad_norm": 1.0017194747924805, + "learning_rate": 0.0001958242356580672, + "loss": 2.7187, + "step": 1937 + }, + { + "epoch": 0.15640384149786135, + "grad_norm": 0.9433515667915344, + "learning_rate": 0.0001958197200850946, + "loss": 2.8394, + "step": 1938 + }, + { + "epoch": 0.15648454523444436, + "grad_norm": 0.8781030178070068, + "learning_rate": 0.00019581520212404407, + "loss": 2.7667, + "step": 1939 + }, + { + "epoch": 0.15656524897102736, + "grad_norm": 0.895656168460846, + "learning_rate": 0.00019581068177502826, + "loss": 2.799, + "step": 1940 + }, + { + "epoch": 0.15664595270761036, + "grad_norm": 0.8336960673332214, + "learning_rate": 0.0001958061590381598, + "loss": 2.8152, + "step": 1941 + }, + { + "epoch": 0.15672665644419337, + "grad_norm": 0.9184536337852478, + "learning_rate": 0.00019580163391355143, + "loss": 2.7746, + "step": 1942 + }, + { + "epoch": 0.15680736018077637, + "grad_norm": 0.8564908504486084, + "learning_rate": 0.00019579710640131587, + "loss": 2.7674, + "step": 1943 + }, + { + "epoch": 0.15688806391735938, + "grad_norm": 0.7491608262062073, + "learning_rate": 0.00019579257650156605, + "loss": 2.7665, + "step": 1944 + }, + { + "epoch": 0.15696876765394238, + "grad_norm": 0.9165031313896179, + "learning_rate": 0.00019578804421441478, + "loss": 2.7343, + "step": 1945 + }, + { + "epoch": 0.15704947139052539, + "grad_norm": 0.8413978815078735, + "learning_rate": 0.00019578350953997512, + "loss": 2.7503, + "step": 1946 + }, + { + "epoch": 0.1571301751271084, + "grad_norm": 0.7820419073104858, + "learning_rate": 0.00019577897247835993, + "loss": 2.7535, + "step": 1947 + }, + { + "epoch": 0.1572108788636914, + "grad_norm": 0.8134996294975281, + "learning_rate": 0.00019577443302968246, + "loss": 2.7504, + "step": 1948 + }, + { + "epoch": 0.1572915826002744, + "grad_norm": 0.8201301097869873, + "learning_rate": 0.00019576989119405574, + "loss": 2.6927, + "step": 1949 + }, + { + "epoch": 0.1573722863368574, + "grad_norm": 0.8343217372894287, + "learning_rate": 0.00019576534697159296, + "loss": 2.7742, + "step": 1950 + }, + { + "epoch": 0.1574529900734404, + "grad_norm": 0.8161751627922058, + "learning_rate": 0.0001957608003624074, + "loss": 2.8236, + "step": 1951 + }, + { + "epoch": 0.1575336938100234, + "grad_norm": 0.8626808524131775, + "learning_rate": 0.00019575625136661242, + "loss": 2.7305, + "step": 1952 + }, + { + "epoch": 0.15761439754660642, + "grad_norm": 0.8238986730575562, + "learning_rate": 0.0001957516999843213, + "loss": 2.7641, + "step": 1953 + }, + { + "epoch": 0.15769510128318942, + "grad_norm": 0.7806095480918884, + "learning_rate": 0.00019574714621564755, + "loss": 2.7155, + "step": 1954 + }, + { + "epoch": 0.15777580501977242, + "grad_norm": 0.8137761950492859, + "learning_rate": 0.0001957425900607046, + "loss": 2.7529, + "step": 1955 + }, + { + "epoch": 0.15785650875635543, + "grad_norm": 0.8383988738059998, + "learning_rate": 0.00019573803151960606, + "loss": 2.7726, + "step": 1956 + }, + { + "epoch": 0.15793721249293843, + "grad_norm": 0.8734413385391235, + "learning_rate": 0.00019573347059246549, + "loss": 2.8563, + "step": 1957 + }, + { + "epoch": 0.15801791622952144, + "grad_norm": 0.8018438816070557, + "learning_rate": 0.0001957289072793966, + "loss": 2.8031, + "step": 1958 + }, + { + "epoch": 0.15809861996610444, + "grad_norm": 0.8175764083862305, + "learning_rate": 0.0001957243415805131, + "loss": 2.7824, + "step": 1959 + }, + { + "epoch": 0.15817932370268745, + "grad_norm": 0.7642164826393127, + "learning_rate": 0.00019571977349592878, + "loss": 2.7666, + "step": 1960 + }, + { + "epoch": 0.15826002743927045, + "grad_norm": 0.7584841847419739, + "learning_rate": 0.0001957152030257575, + "loss": 2.7211, + "step": 1961 + }, + { + "epoch": 0.15834073117585346, + "grad_norm": 0.822610080242157, + "learning_rate": 0.00019571063017011312, + "loss": 2.7025, + "step": 1962 + }, + { + "epoch": 0.15842143491243646, + "grad_norm": 0.7553817629814148, + "learning_rate": 0.00019570605492910968, + "loss": 2.8122, + "step": 1963 + }, + { + "epoch": 0.15850213864901944, + "grad_norm": 0.7224497199058533, + "learning_rate": 0.0001957014773028612, + "loss": 2.7613, + "step": 1964 + }, + { + "epoch": 0.15858284238560244, + "grad_norm": 0.8563623428344727, + "learning_rate": 0.00019569689729148168, + "loss": 2.8005, + "step": 1965 + }, + { + "epoch": 0.15866354612218544, + "grad_norm": 0.7665508389472961, + "learning_rate": 0.00019569231489508537, + "loss": 2.7387, + "step": 1966 + }, + { + "epoch": 0.15874424985876845, + "grad_norm": 0.7788479328155518, + "learning_rate": 0.0001956877301137864, + "loss": 2.7229, + "step": 1967 + }, + { + "epoch": 0.15882495359535145, + "grad_norm": 0.7326748371124268, + "learning_rate": 0.00019568314294769908, + "loss": 2.7728, + "step": 1968 + }, + { + "epoch": 0.15890565733193446, + "grad_norm": 0.790492594242096, + "learning_rate": 0.00019567855339693772, + "loss": 2.7809, + "step": 1969 + }, + { + "epoch": 0.15898636106851746, + "grad_norm": 0.8026898503303528, + "learning_rate": 0.0001956739614616167, + "loss": 2.7267, + "step": 1970 + }, + { + "epoch": 0.15906706480510047, + "grad_norm": 0.7963770627975464, + "learning_rate": 0.00019566936714185046, + "loss": 2.7161, + "step": 1971 + }, + { + "epoch": 0.15914776854168347, + "grad_norm": 0.7708200216293335, + "learning_rate": 0.00019566477043775354, + "loss": 2.7223, + "step": 1972 + }, + { + "epoch": 0.15922847227826648, + "grad_norm": 0.8036624789237976, + "learning_rate": 0.00019566017134944042, + "loss": 2.7644, + "step": 1973 + }, + { + "epoch": 0.15930917601484948, + "grad_norm": 0.8221341967582703, + "learning_rate": 0.00019565556987702581, + "loss": 2.7629, + "step": 1974 + }, + { + "epoch": 0.15938987975143248, + "grad_norm": 0.7685462832450867, + "learning_rate": 0.00019565096602062435, + "loss": 2.8016, + "step": 1975 + }, + { + "epoch": 0.1594705834880155, + "grad_norm": 0.8173574209213257, + "learning_rate": 0.00019564635978035075, + "loss": 2.761, + "step": 1976 + }, + { + "epoch": 0.1595512872245985, + "grad_norm": 0.7567519545555115, + "learning_rate": 0.00019564175115631988, + "loss": 2.7794, + "step": 1977 + }, + { + "epoch": 0.1596319909611815, + "grad_norm": 0.8754587173461914, + "learning_rate": 0.00019563714014864654, + "loss": 2.7769, + "step": 1978 + }, + { + "epoch": 0.1597126946977645, + "grad_norm": 0.753871738910675, + "learning_rate": 0.00019563252675744569, + "loss": 2.7489, + "step": 1979 + }, + { + "epoch": 0.1597933984343475, + "grad_norm": 0.777103841304779, + "learning_rate": 0.00019562791098283225, + "loss": 2.7667, + "step": 1980 + }, + { + "epoch": 0.1598741021709305, + "grad_norm": 0.8227293491363525, + "learning_rate": 0.00019562329282492131, + "loss": 2.7904, + "step": 1981 + }, + { + "epoch": 0.15995480590751351, + "grad_norm": 0.7595541477203369, + "learning_rate": 0.00019561867228382797, + "loss": 2.7654, + "step": 1982 + }, + { + "epoch": 0.16003550964409652, + "grad_norm": 0.8330550789833069, + "learning_rate": 0.00019561404935966733, + "loss": 2.7533, + "step": 1983 + }, + { + "epoch": 0.16011621338067952, + "grad_norm": 0.8213297128677368, + "learning_rate": 0.0001956094240525547, + "loss": 2.8103, + "step": 1984 + }, + { + "epoch": 0.16019691711726253, + "grad_norm": 0.8046056628227234, + "learning_rate": 0.00019560479636260527, + "loss": 2.7666, + "step": 1985 + }, + { + "epoch": 0.16027762085384553, + "grad_norm": 0.7886037230491638, + "learning_rate": 0.0001956001662899344, + "loss": 2.7066, + "step": 1986 + }, + { + "epoch": 0.16035832459042854, + "grad_norm": 0.8300043940544128, + "learning_rate": 0.00019559553383465748, + "loss": 2.7617, + "step": 1987 + }, + { + "epoch": 0.16043902832701154, + "grad_norm": 0.7963815331459045, + "learning_rate": 0.00019559089899688994, + "loss": 2.6891, + "step": 1988 + }, + { + "epoch": 0.16051973206359454, + "grad_norm": 0.7794002294540405, + "learning_rate": 0.00019558626177674734, + "loss": 2.8012, + "step": 1989 + }, + { + "epoch": 0.16060043580017755, + "grad_norm": 0.8345863819122314, + "learning_rate": 0.00019558162217434526, + "loss": 2.7715, + "step": 1990 + }, + { + "epoch": 0.16068113953676055, + "grad_norm": 0.8883393406867981, + "learning_rate": 0.00019557698018979927, + "loss": 2.7863, + "step": 1991 + }, + { + "epoch": 0.16076184327334356, + "grad_norm": 0.8069450259208679, + "learning_rate": 0.0001955723358232251, + "loss": 2.759, + "step": 1992 + }, + { + "epoch": 0.16084254700992656, + "grad_norm": 0.9014191031455994, + "learning_rate": 0.00019556768907473852, + "loss": 2.711, + "step": 1993 + }, + { + "epoch": 0.16092325074650957, + "grad_norm": 0.8429470658302307, + "learning_rate": 0.0001955630399444553, + "loss": 2.6936, + "step": 1994 + }, + { + "epoch": 0.16100395448309257, + "grad_norm": 0.7859500050544739, + "learning_rate": 0.00019555838843249128, + "loss": 2.7343, + "step": 1995 + }, + { + "epoch": 0.16108465821967557, + "grad_norm": 0.8068249821662903, + "learning_rate": 0.00019555373453896245, + "loss": 2.7492, + "step": 1996 + }, + { + "epoch": 0.16116536195625858, + "grad_norm": 0.8194023370742798, + "learning_rate": 0.00019554907826398478, + "loss": 2.7265, + "step": 1997 + }, + { + "epoch": 0.16124606569284158, + "grad_norm": 0.8139404654502869, + "learning_rate": 0.00019554441960767434, + "loss": 2.7311, + "step": 1998 + }, + { + "epoch": 0.1613267694294246, + "grad_norm": 0.8210673928260803, + "learning_rate": 0.00019553975857014718, + "loss": 2.7095, + "step": 1999 + }, + { + "epoch": 0.1614074731660076, + "grad_norm": 0.8615561723709106, + "learning_rate": 0.0001955350951515195, + "loss": 2.7458, + "step": 2000 + }, + { + "epoch": 0.1614074731660076, + "eval_loss": 2.6739437580108643, + "eval_runtime": 813.8274, + "eval_samples_per_second": 3.219, + "eval_steps_per_second": 0.537, + "step": 2000 + }, + { + "epoch": 0.1614881769025906, + "grad_norm": 0.8945594429969788, + "learning_rate": 0.0001955304293519075, + "loss": 2.776, + "step": 2001 + }, + { + "epoch": 0.1615688806391736, + "grad_norm": 0.7943438291549683, + "learning_rate": 0.00019552576117142748, + "loss": 2.7484, + "step": 2002 + }, + { + "epoch": 0.1616495843757566, + "grad_norm": 0.8264374136924744, + "learning_rate": 0.00019552109061019582, + "loss": 2.7725, + "step": 2003 + }, + { + "epoch": 0.1617302881123396, + "grad_norm": 0.7591681480407715, + "learning_rate": 0.00019551641766832887, + "loss": 2.7217, + "step": 2004 + }, + { + "epoch": 0.16181099184892261, + "grad_norm": 0.8275293707847595, + "learning_rate": 0.0001955117423459431, + "loss": 2.7279, + "step": 2005 + }, + { + "epoch": 0.16189169558550562, + "grad_norm": 0.8109650611877441, + "learning_rate": 0.00019550706464315504, + "loss": 2.8111, + "step": 2006 + }, + { + "epoch": 0.16197239932208862, + "grad_norm": 0.8710397481918335, + "learning_rate": 0.00019550238456008127, + "loss": 2.7166, + "step": 2007 + }, + { + "epoch": 0.16205310305867163, + "grad_norm": 0.8569270968437195, + "learning_rate": 0.00019549770209683845, + "loss": 2.7739, + "step": 2008 + }, + { + "epoch": 0.16213380679525463, + "grad_norm": 0.7927817702293396, + "learning_rate": 0.00019549301725354325, + "loss": 2.7154, + "step": 2009 + }, + { + "epoch": 0.16221451053183764, + "grad_norm": 0.7576590776443481, + "learning_rate": 0.00019548833003031244, + "loss": 2.7276, + "step": 2010 + }, + { + "epoch": 0.16229521426842064, + "grad_norm": 0.8092780709266663, + "learning_rate": 0.00019548364042726283, + "loss": 2.7494, + "step": 2011 + }, + { + "epoch": 0.16237591800500364, + "grad_norm": 0.7643424868583679, + "learning_rate": 0.0001954789484445113, + "loss": 2.7877, + "step": 2012 + }, + { + "epoch": 0.16245662174158665, + "grad_norm": 0.8235166072845459, + "learning_rate": 0.0001954742540821748, + "loss": 2.7884, + "step": 2013 + }, + { + "epoch": 0.16253732547816965, + "grad_norm": 0.9297853708267212, + "learning_rate": 0.00019546955734037034, + "loss": 2.765, + "step": 2014 + }, + { + "epoch": 0.16261802921475263, + "grad_norm": 0.7778275609016418, + "learning_rate": 0.0001954648582192149, + "loss": 2.7178, + "step": 2015 + }, + { + "epoch": 0.16269873295133563, + "grad_norm": 0.8767017126083374, + "learning_rate": 0.00019546015671882566, + "loss": 2.8254, + "step": 2016 + }, + { + "epoch": 0.16277943668791864, + "grad_norm": 0.7870603203773499, + "learning_rate": 0.0001954554528393198, + "loss": 2.797, + "step": 2017 + }, + { + "epoch": 0.16286014042450164, + "grad_norm": 0.8112391233444214, + "learning_rate": 0.00019545074658081454, + "loss": 2.8562, + "step": 2018 + }, + { + "epoch": 0.16294084416108465, + "grad_norm": 0.8216677308082581, + "learning_rate": 0.00019544603794342713, + "loss": 2.7894, + "step": 2019 + }, + { + "epoch": 0.16302154789766765, + "grad_norm": 0.8445515632629395, + "learning_rate": 0.00019544132692727497, + "loss": 2.8618, + "step": 2020 + }, + { + "epoch": 0.16310225163425066, + "grad_norm": 0.8275444507598877, + "learning_rate": 0.00019543661353247548, + "loss": 2.8087, + "step": 2021 + }, + { + "epoch": 0.16318295537083366, + "grad_norm": 0.8142833709716797, + "learning_rate": 0.00019543189775914608, + "loss": 2.8075, + "step": 2022 + }, + { + "epoch": 0.16326365910741666, + "grad_norm": 0.8182976245880127, + "learning_rate": 0.0001954271796074043, + "loss": 2.8312, + "step": 2023 + }, + { + "epoch": 0.16334436284399967, + "grad_norm": 0.7629228234291077, + "learning_rate": 0.0001954224590773678, + "loss": 2.7191, + "step": 2024 + }, + { + "epoch": 0.16342506658058267, + "grad_norm": 0.8630000948905945, + "learning_rate": 0.00019541773616915418, + "loss": 2.8013, + "step": 2025 + }, + { + "epoch": 0.16350577031716568, + "grad_norm": 0.8917906880378723, + "learning_rate": 0.00019541301088288115, + "loss": 2.7573, + "step": 2026 + }, + { + "epoch": 0.16358647405374868, + "grad_norm": 0.8641694188117981, + "learning_rate": 0.00019540828321866648, + "loss": 2.7509, + "step": 2027 + }, + { + "epoch": 0.16366717779033169, + "grad_norm": 0.7687639594078064, + "learning_rate": 0.00019540355317662798, + "loss": 2.7266, + "step": 2028 + }, + { + "epoch": 0.1637478815269147, + "grad_norm": 0.7870400547981262, + "learning_rate": 0.00019539882075688355, + "loss": 2.8217, + "step": 2029 + }, + { + "epoch": 0.1638285852634977, + "grad_norm": 0.9373054504394531, + "learning_rate": 0.0001953940859595511, + "loss": 2.7562, + "step": 2030 + }, + { + "epoch": 0.1639092890000807, + "grad_norm": 0.7941255569458008, + "learning_rate": 0.00019538934878474872, + "loss": 2.7553, + "step": 2031 + }, + { + "epoch": 0.1639899927366637, + "grad_norm": 0.735977053642273, + "learning_rate": 0.00019538460923259438, + "loss": 2.7058, + "step": 2032 + }, + { + "epoch": 0.1640706964732467, + "grad_norm": 0.7812782526016235, + "learning_rate": 0.00019537986730320625, + "loss": 2.7885, + "step": 2033 + }, + { + "epoch": 0.1641514002098297, + "grad_norm": 1.1534128189086914, + "learning_rate": 0.0001953751229967025, + "loss": 2.7139, + "step": 2034 + }, + { + "epoch": 0.16423210394641272, + "grad_norm": 0.9139814972877502, + "learning_rate": 0.00019537037631320135, + "loss": 2.7869, + "step": 2035 + }, + { + "epoch": 0.16431280768299572, + "grad_norm": 0.8330421447753906, + "learning_rate": 0.00019536562725282116, + "loss": 2.7491, + "step": 2036 + }, + { + "epoch": 0.16439351141957873, + "grad_norm": 0.9040594696998596, + "learning_rate": 0.00019536087581568026, + "loss": 2.7637, + "step": 2037 + }, + { + "epoch": 0.16447421515616173, + "grad_norm": 0.9158666729927063, + "learning_rate": 0.00019535612200189705, + "loss": 2.7709, + "step": 2038 + }, + { + "epoch": 0.16455491889274473, + "grad_norm": 0.8668088912963867, + "learning_rate": 0.00019535136581158997, + "loss": 2.7994, + "step": 2039 + }, + { + "epoch": 0.16463562262932774, + "grad_norm": 0.9179345369338989, + "learning_rate": 0.00019534660724487764, + "loss": 2.747, + "step": 2040 + }, + { + "epoch": 0.16471632636591074, + "grad_norm": 0.9690881967544556, + "learning_rate": 0.00019534184630187862, + "loss": 2.742, + "step": 2041 + }, + { + "epoch": 0.16479703010249375, + "grad_norm": 0.8478729724884033, + "learning_rate": 0.00019533708298271157, + "loss": 2.7824, + "step": 2042 + }, + { + "epoch": 0.16487773383907675, + "grad_norm": 0.8286584615707397, + "learning_rate": 0.00019533231728749518, + "loss": 2.7263, + "step": 2043 + }, + { + "epoch": 0.16495843757565976, + "grad_norm": 0.8095324039459229, + "learning_rate": 0.00019532754921634826, + "loss": 2.7845, + "step": 2044 + }, + { + "epoch": 0.16503914131224276, + "grad_norm": 0.9552872776985168, + "learning_rate": 0.0001953227787693896, + "loss": 2.7676, + "step": 2045 + }, + { + "epoch": 0.16511984504882576, + "grad_norm": 1.021515130996704, + "learning_rate": 0.00019531800594673815, + "loss": 2.784, + "step": 2046 + }, + { + "epoch": 0.16520054878540877, + "grad_norm": 0.7847293019294739, + "learning_rate": 0.00019531323074851276, + "loss": 2.7319, + "step": 2047 + }, + { + "epoch": 0.16528125252199177, + "grad_norm": 0.7803899049758911, + "learning_rate": 0.0001953084531748326, + "loss": 2.8321, + "step": 2048 + }, + { + "epoch": 0.16536195625857478, + "grad_norm": 0.8687692880630493, + "learning_rate": 0.0001953036732258166, + "loss": 2.763, + "step": 2049 + }, + { + "epoch": 0.16544265999515778, + "grad_norm": 0.8212031126022339, + "learning_rate": 0.00019529889090158392, + "loss": 2.7262, + "step": 2050 + }, + { + "epoch": 0.16552336373174079, + "grad_norm": 0.8460689187049866, + "learning_rate": 0.0001952941062022538, + "loss": 2.8018, + "step": 2051 + }, + { + "epoch": 0.1656040674683238, + "grad_norm": 0.9189361929893494, + "learning_rate": 0.00019528931912794547, + "loss": 2.8079, + "step": 2052 + }, + { + "epoch": 0.1656847712049068, + "grad_norm": 0.9529987573623657, + "learning_rate": 0.00019528452967877816, + "loss": 2.8015, + "step": 2053 + }, + { + "epoch": 0.1657654749414898, + "grad_norm": 0.8468493223190308, + "learning_rate": 0.00019527973785487133, + "loss": 2.8013, + "step": 2054 + }, + { + "epoch": 0.1658461786780728, + "grad_norm": 0.8150945901870728, + "learning_rate": 0.00019527494365634436, + "loss": 2.7975, + "step": 2055 + }, + { + "epoch": 0.1659268824146558, + "grad_norm": 0.814942479133606, + "learning_rate": 0.00019527014708331674, + "loss": 2.7503, + "step": 2056 + }, + { + "epoch": 0.1660075861512388, + "grad_norm": 0.7841517329216003, + "learning_rate": 0.000195265348135908, + "loss": 2.7921, + "step": 2057 + }, + { + "epoch": 0.16608828988782182, + "grad_norm": 0.7603738903999329, + "learning_rate": 0.0001952605468142378, + "loss": 2.7658, + "step": 2058 + }, + { + "epoch": 0.16616899362440482, + "grad_norm": 0.8460882902145386, + "learning_rate": 0.00019525574311842574, + "loss": 2.7644, + "step": 2059 + }, + { + "epoch": 0.16624969736098782, + "grad_norm": 0.8633555173873901, + "learning_rate": 0.00019525093704859156, + "loss": 2.7956, + "step": 2060 + }, + { + "epoch": 0.16633040109757083, + "grad_norm": 0.7700977325439453, + "learning_rate": 0.00019524612860485503, + "loss": 2.7103, + "step": 2061 + }, + { + "epoch": 0.16641110483415383, + "grad_norm": 0.888770580291748, + "learning_rate": 0.00019524131778733602, + "loss": 2.7325, + "step": 2062 + }, + { + "epoch": 0.16649180857073684, + "grad_norm": 0.8338149189949036, + "learning_rate": 0.00019523650459615438, + "loss": 2.7533, + "step": 2063 + }, + { + "epoch": 0.16657251230731984, + "grad_norm": 0.7723987698554993, + "learning_rate": 0.0001952316890314301, + "loss": 2.7316, + "step": 2064 + }, + { + "epoch": 0.16665321604390285, + "grad_norm": 0.8952934145927429, + "learning_rate": 0.0001952268710932832, + "loss": 2.7825, + "step": 2065 + }, + { + "epoch": 0.16673391978048582, + "grad_norm": 0.8201496601104736, + "learning_rate": 0.00019522205078183378, + "loss": 2.7162, + "step": 2066 + }, + { + "epoch": 0.16681462351706883, + "grad_norm": 0.7733781337738037, + "learning_rate": 0.00019521722809720188, + "loss": 2.7834, + "step": 2067 + }, + { + "epoch": 0.16689532725365183, + "grad_norm": 0.8285118937492371, + "learning_rate": 0.0001952124030395078, + "loss": 2.8475, + "step": 2068 + }, + { + "epoch": 0.16697603099023484, + "grad_norm": 0.84097820520401, + "learning_rate": 0.00019520757560887174, + "loss": 2.784, + "step": 2069 + }, + { + "epoch": 0.16705673472681784, + "grad_norm": 0.7336563467979431, + "learning_rate": 0.000195202745805414, + "loss": 2.7663, + "step": 2070 + }, + { + "epoch": 0.16713743846340084, + "grad_norm": 0.8359388113021851, + "learning_rate": 0.000195197913629255, + "loss": 2.7931, + "step": 2071 + }, + { + "epoch": 0.16721814219998385, + "grad_norm": 0.8272559642791748, + "learning_rate": 0.0001951930790805151, + "loss": 2.8578, + "step": 2072 + }, + { + "epoch": 0.16729884593656685, + "grad_norm": 0.7970743179321289, + "learning_rate": 0.00019518824215931487, + "loss": 2.8148, + "step": 2073 + }, + { + "epoch": 0.16737954967314986, + "grad_norm": 0.856200098991394, + "learning_rate": 0.00019518340286577482, + "loss": 2.8067, + "step": 2074 + }, + { + "epoch": 0.16746025340973286, + "grad_norm": 0.7581893801689148, + "learning_rate": 0.00019517856120001556, + "loss": 2.7339, + "step": 2075 + }, + { + "epoch": 0.16754095714631587, + "grad_norm": 0.8488386869430542, + "learning_rate": 0.00019517371716215774, + "loss": 2.7332, + "step": 2076 + }, + { + "epoch": 0.16762166088289887, + "grad_norm": 0.7488275170326233, + "learning_rate": 0.00019516887075232212, + "loss": 2.7734, + "step": 2077 + }, + { + "epoch": 0.16770236461948188, + "grad_norm": 0.9173932075500488, + "learning_rate": 0.00019516402197062945, + "loss": 2.7792, + "step": 2078 + }, + { + "epoch": 0.16778306835606488, + "grad_norm": 0.8200702667236328, + "learning_rate": 0.0001951591708172006, + "loss": 2.8046, + "step": 2079 + }, + { + "epoch": 0.16786377209264788, + "grad_norm": 0.8270781636238098, + "learning_rate": 0.00019515431729215642, + "loss": 2.7467, + "step": 2080 + }, + { + "epoch": 0.1679444758292309, + "grad_norm": 0.8660609722137451, + "learning_rate": 0.00019514946139561799, + "loss": 2.8169, + "step": 2081 + }, + { + "epoch": 0.1680251795658139, + "grad_norm": 0.78753262758255, + "learning_rate": 0.0001951446031277062, + "loss": 2.7388, + "step": 2082 + }, + { + "epoch": 0.1681058833023969, + "grad_norm": 0.791593074798584, + "learning_rate": 0.00019513974248854224, + "loss": 2.8776, + "step": 2083 + }, + { + "epoch": 0.1681865870389799, + "grad_norm": 0.7883535623550415, + "learning_rate": 0.0001951348794782472, + "loss": 2.78, + "step": 2084 + }, + { + "epoch": 0.1682672907755629, + "grad_norm": 0.7877013087272644, + "learning_rate": 0.00019513001409694224, + "loss": 2.7559, + "step": 2085 + }, + { + "epoch": 0.1683479945121459, + "grad_norm": 0.8838450908660889, + "learning_rate": 0.00019512514634474864, + "loss": 2.7489, + "step": 2086 + }, + { + "epoch": 0.16842869824872891, + "grad_norm": 0.7751588821411133, + "learning_rate": 0.00019512027622178775, + "loss": 2.6832, + "step": 2087 + }, + { + "epoch": 0.16850940198531192, + "grad_norm": 0.90345299243927, + "learning_rate": 0.00019511540372818095, + "loss": 2.8189, + "step": 2088 + }, + { + "epoch": 0.16859010572189492, + "grad_norm": 0.7820938229560852, + "learning_rate": 0.00019511052886404966, + "loss": 2.7655, + "step": 2089 + }, + { + "epoch": 0.16867080945847793, + "grad_norm": 0.8250375986099243, + "learning_rate": 0.00019510565162951537, + "loss": 2.7866, + "step": 2090 + }, + { + "epoch": 0.16875151319506093, + "grad_norm": 0.8063845634460449, + "learning_rate": 0.00019510077202469962, + "loss": 2.7774, + "step": 2091 + }, + { + "epoch": 0.16883221693164394, + "grad_norm": 0.7627965807914734, + "learning_rate": 0.00019509589004972403, + "loss": 2.7201, + "step": 2092 + }, + { + "epoch": 0.16891292066822694, + "grad_norm": 0.8392470479011536, + "learning_rate": 0.00019509100570471027, + "loss": 2.7613, + "step": 2093 + }, + { + "epoch": 0.16899362440480994, + "grad_norm": 0.7807552814483643, + "learning_rate": 0.0001950861189897801, + "loss": 2.7451, + "step": 2094 + }, + { + "epoch": 0.16907432814139295, + "grad_norm": 0.7829259634017944, + "learning_rate": 0.00019508122990505528, + "loss": 2.7128, + "step": 2095 + }, + { + "epoch": 0.16915503187797595, + "grad_norm": 0.7793046832084656, + "learning_rate": 0.00019507633845065766, + "loss": 2.7849, + "step": 2096 + }, + { + "epoch": 0.16923573561455896, + "grad_norm": 0.869752824306488, + "learning_rate": 0.00019507144462670915, + "loss": 2.7882, + "step": 2097 + }, + { + "epoch": 0.16931643935114196, + "grad_norm": 0.7550783753395081, + "learning_rate": 0.00019506654843333174, + "loss": 2.7211, + "step": 2098 + }, + { + "epoch": 0.16939714308772497, + "grad_norm": 0.8364891409873962, + "learning_rate": 0.0001950616498706474, + "loss": 2.7171, + "step": 2099 + }, + { + "epoch": 0.16947784682430797, + "grad_norm": 0.8026537299156189, + "learning_rate": 0.0001950567489387783, + "loss": 2.8362, + "step": 2100 + }, + { + "epoch": 0.16955855056089097, + "grad_norm": 0.8073398470878601, + "learning_rate": 0.00019505184563784652, + "loss": 2.7635, + "step": 2101 + }, + { + "epoch": 0.16963925429747398, + "grad_norm": 0.8168368935585022, + "learning_rate": 0.00019504693996797424, + "loss": 2.7553, + "step": 2102 + }, + { + "epoch": 0.16971995803405698, + "grad_norm": 0.7933681011199951, + "learning_rate": 0.0001950420319292838, + "loss": 2.7887, + "step": 2103 + }, + { + "epoch": 0.16980066177064, + "grad_norm": 0.8326540589332581, + "learning_rate": 0.00019503712152189748, + "loss": 2.7844, + "step": 2104 + }, + { + "epoch": 0.169881365507223, + "grad_norm": 0.8357202410697937, + "learning_rate": 0.00019503220874593765, + "loss": 2.7744, + "step": 2105 + }, + { + "epoch": 0.169962069243806, + "grad_norm": 0.8541022539138794, + "learning_rate": 0.00019502729360152676, + "loss": 2.7867, + "step": 2106 + }, + { + "epoch": 0.170042772980389, + "grad_norm": 0.8338841795921326, + "learning_rate": 0.0001950223760887873, + "loss": 2.7208, + "step": 2107 + }, + { + "epoch": 0.170123476716972, + "grad_norm": 0.8824255466461182, + "learning_rate": 0.00019501745620784187, + "loss": 2.7658, + "step": 2108 + }, + { + "epoch": 0.170204180453555, + "grad_norm": 0.7710463404655457, + "learning_rate": 0.00019501253395881306, + "loss": 2.7167, + "step": 2109 + }, + { + "epoch": 0.17028488419013801, + "grad_norm": 0.7740076184272766, + "learning_rate": 0.0001950076093418235, + "loss": 2.7251, + "step": 2110 + }, + { + "epoch": 0.17036558792672102, + "grad_norm": 0.8258434534072876, + "learning_rate": 0.00019500268235699597, + "loss": 2.7533, + "step": 2111 + }, + { + "epoch": 0.17044629166330402, + "grad_norm": 0.8347997069358826, + "learning_rate": 0.00019499775300445326, + "loss": 2.7372, + "step": 2112 + }, + { + "epoch": 0.17052699539988703, + "grad_norm": 0.8246529698371887, + "learning_rate": 0.00019499282128431823, + "loss": 2.7458, + "step": 2113 + }, + { + "epoch": 0.17060769913647003, + "grad_norm": 0.8510704040527344, + "learning_rate": 0.00019498788719671378, + "loss": 2.8144, + "step": 2114 + }, + { + "epoch": 0.17068840287305304, + "grad_norm": 0.7793454527854919, + "learning_rate": 0.00019498295074176286, + "loss": 2.7927, + "step": 2115 + }, + { + "epoch": 0.17076910660963604, + "grad_norm": 0.7888665199279785, + "learning_rate": 0.00019497801191958853, + "loss": 2.7156, + "step": 2116 + }, + { + "epoch": 0.17084981034621902, + "grad_norm": 0.8502812385559082, + "learning_rate": 0.00019497307073031386, + "loss": 2.7906, + "step": 2117 + }, + { + "epoch": 0.17093051408280202, + "grad_norm": 0.8376502990722656, + "learning_rate": 0.00019496812717406203, + "loss": 2.7354, + "step": 2118 + }, + { + "epoch": 0.17101121781938503, + "grad_norm": 0.7974401116371155, + "learning_rate": 0.0001949631812509562, + "loss": 2.7755, + "step": 2119 + }, + { + "epoch": 0.17109192155596803, + "grad_norm": 0.7760190963745117, + "learning_rate": 0.00019495823296111965, + "loss": 2.7694, + "step": 2120 + }, + { + "epoch": 0.17117262529255103, + "grad_norm": 0.7721701860427856, + "learning_rate": 0.00019495328230467575, + "loss": 2.7474, + "step": 2121 + }, + { + "epoch": 0.17125332902913404, + "grad_norm": 0.7360577583312988, + "learning_rate": 0.0001949483292817478, + "loss": 2.8044, + "step": 2122 + }, + { + "epoch": 0.17133403276571704, + "grad_norm": 0.7536107301712036, + "learning_rate": 0.0001949433738924593, + "loss": 2.8165, + "step": 2123 + }, + { + "epoch": 0.17141473650230005, + "grad_norm": 0.7668276429176331, + "learning_rate": 0.00019493841613693375, + "loss": 2.7964, + "step": 2124 + }, + { + "epoch": 0.17149544023888305, + "grad_norm": 0.8323161602020264, + "learning_rate": 0.0001949334560152947, + "loss": 2.7395, + "step": 2125 + }, + { + "epoch": 0.17157614397546606, + "grad_norm": 0.8132179975509644, + "learning_rate": 0.00019492849352766576, + "loss": 2.7511, + "step": 2126 + }, + { + "epoch": 0.17165684771204906, + "grad_norm": 0.7806998491287231, + "learning_rate": 0.0001949235286741706, + "loss": 2.7649, + "step": 2127 + }, + { + "epoch": 0.17173755144863206, + "grad_norm": 0.8315939903259277, + "learning_rate": 0.00019491856145493298, + "loss": 2.7742, + "step": 2128 + }, + { + "epoch": 0.17181825518521507, + "grad_norm": 0.8368063569068909, + "learning_rate": 0.00019491359187007672, + "loss": 2.7667, + "step": 2129 + }, + { + "epoch": 0.17189895892179807, + "grad_norm": 0.9183431267738342, + "learning_rate": 0.0001949086199197256, + "loss": 2.7444, + "step": 2130 + }, + { + "epoch": 0.17197966265838108, + "grad_norm": 0.7824065089225769, + "learning_rate": 0.0001949036456040036, + "loss": 2.7455, + "step": 2131 + }, + { + "epoch": 0.17206036639496408, + "grad_norm": 0.777974009513855, + "learning_rate": 0.00019489866892303468, + "loss": 2.7466, + "step": 2132 + }, + { + "epoch": 0.17214107013154709, + "grad_norm": 0.8068816065788269, + "learning_rate": 0.00019489368987694286, + "loss": 2.7081, + "step": 2133 + }, + { + "epoch": 0.1722217738681301, + "grad_norm": 0.8757622838020325, + "learning_rate": 0.00019488870846585222, + "loss": 2.8005, + "step": 2134 + }, + { + "epoch": 0.1723024776047131, + "grad_norm": 0.7967162728309631, + "learning_rate": 0.00019488372468988693, + "loss": 2.7737, + "step": 2135 + }, + { + "epoch": 0.1723831813412961, + "grad_norm": 0.7700283527374268, + "learning_rate": 0.00019487873854917117, + "loss": 2.7431, + "step": 2136 + }, + { + "epoch": 0.1724638850778791, + "grad_norm": 0.8259130716323853, + "learning_rate": 0.00019487375004382927, + "loss": 2.7635, + "step": 2137 + }, + { + "epoch": 0.1725445888144621, + "grad_norm": 0.8253815770149231, + "learning_rate": 0.0001948687591739855, + "loss": 2.7046, + "step": 2138 + }, + { + "epoch": 0.1726252925510451, + "grad_norm": 0.8087987303733826, + "learning_rate": 0.00019486376593976426, + "loss": 2.7728, + "step": 2139 + }, + { + "epoch": 0.17270599628762812, + "grad_norm": 0.8437588214874268, + "learning_rate": 0.00019485877034128998, + "loss": 2.7606, + "step": 2140 + }, + { + "epoch": 0.17278670002421112, + "grad_norm": 0.8416075110435486, + "learning_rate": 0.00019485377237868723, + "loss": 2.7396, + "step": 2141 + }, + { + "epoch": 0.17286740376079412, + "grad_norm": 0.784275472164154, + "learning_rate": 0.00019484877205208046, + "loss": 2.766, + "step": 2142 + }, + { + "epoch": 0.17294810749737713, + "grad_norm": 0.8082472681999207, + "learning_rate": 0.0001948437693615944, + "loss": 2.8, + "step": 2143 + }, + { + "epoch": 0.17302881123396013, + "grad_norm": 0.8904329538345337, + "learning_rate": 0.00019483876430735365, + "loss": 2.6579, + "step": 2144 + }, + { + "epoch": 0.17310951497054314, + "grad_norm": 0.7864851355552673, + "learning_rate": 0.000194833756889483, + "loss": 2.8231, + "step": 2145 + }, + { + "epoch": 0.17319021870712614, + "grad_norm": 0.7445049285888672, + "learning_rate": 0.00019482874710810723, + "loss": 2.7498, + "step": 2146 + }, + { + "epoch": 0.17327092244370915, + "grad_norm": 0.8266116380691528, + "learning_rate": 0.00019482373496335117, + "loss": 2.7152, + "step": 2147 + }, + { + "epoch": 0.17335162618029215, + "grad_norm": 0.7712300419807434, + "learning_rate": 0.0001948187204553398, + "loss": 2.7751, + "step": 2148 + }, + { + "epoch": 0.17343232991687516, + "grad_norm": 0.7472708225250244, + "learning_rate": 0.00019481370358419807, + "loss": 2.7397, + "step": 2149 + }, + { + "epoch": 0.17351303365345816, + "grad_norm": 0.763454020023346, + "learning_rate": 0.00019480868435005095, + "loss": 2.7174, + "step": 2150 + }, + { + "epoch": 0.17359373739004116, + "grad_norm": 0.8187674283981323, + "learning_rate": 0.00019480366275302362, + "loss": 2.8424, + "step": 2151 + }, + { + "epoch": 0.17367444112662417, + "grad_norm": 0.8183228373527527, + "learning_rate": 0.0001947986387932412, + "loss": 2.7351, + "step": 2152 + }, + { + "epoch": 0.17375514486320717, + "grad_norm": 0.807231605052948, + "learning_rate": 0.00019479361247082884, + "loss": 2.8054, + "step": 2153 + }, + { + "epoch": 0.17383584859979018, + "grad_norm": 0.8383626341819763, + "learning_rate": 0.00019478858378591194, + "loss": 2.7181, + "step": 2154 + }, + { + "epoch": 0.17391655233637318, + "grad_norm": 0.8330298662185669, + "learning_rate": 0.0001947835527386157, + "loss": 2.748, + "step": 2155 + }, + { + "epoch": 0.17399725607295619, + "grad_norm": 0.8433073163032532, + "learning_rate": 0.0001947785193290656, + "loss": 2.8115, + "step": 2156 + }, + { + "epoch": 0.1740779598095392, + "grad_norm": 0.8873384594917297, + "learning_rate": 0.000194773483557387, + "loss": 2.8288, + "step": 2157 + }, + { + "epoch": 0.1741586635461222, + "grad_norm": 0.8399423360824585, + "learning_rate": 0.00019476844542370546, + "loss": 2.7514, + "step": 2158 + }, + { + "epoch": 0.1742393672827052, + "grad_norm": 0.7808830738067627, + "learning_rate": 0.00019476340492814655, + "loss": 2.7003, + "step": 2159 + }, + { + "epoch": 0.1743200710192882, + "grad_norm": 0.8268750905990601, + "learning_rate": 0.00019475836207083589, + "loss": 2.7961, + "step": 2160 + }, + { + "epoch": 0.1744007747558712, + "grad_norm": 0.9144260883331299, + "learning_rate": 0.0001947533168518991, + "loss": 2.769, + "step": 2161 + }, + { + "epoch": 0.1744814784924542, + "grad_norm": 0.8409113883972168, + "learning_rate": 0.000194748269271462, + "loss": 2.8004, + "step": 2162 + }, + { + "epoch": 0.17456218222903722, + "grad_norm": 0.8747037649154663, + "learning_rate": 0.00019474321932965035, + "loss": 2.7602, + "step": 2163 + }, + { + "epoch": 0.17464288596562022, + "grad_norm": 0.8582575917243958, + "learning_rate": 0.00019473816702659, + "loss": 2.7292, + "step": 2164 + }, + { + "epoch": 0.17472358970220322, + "grad_norm": 0.7402843832969666, + "learning_rate": 0.0001947331123624069, + "loss": 2.7287, + "step": 2165 + }, + { + "epoch": 0.17480429343878623, + "grad_norm": 0.8019410967826843, + "learning_rate": 0.000194728055337227, + "loss": 2.7451, + "step": 2166 + }, + { + "epoch": 0.17488499717536923, + "grad_norm": 0.9137046337127686, + "learning_rate": 0.0001947229959511763, + "loss": 2.808, + "step": 2167 + }, + { + "epoch": 0.1749657009119522, + "grad_norm": 0.7539177536964417, + "learning_rate": 0.000194717934204381, + "loss": 2.7031, + "step": 2168 + }, + { + "epoch": 0.17504640464853521, + "grad_norm": 0.8611089587211609, + "learning_rate": 0.00019471287009696715, + "loss": 2.8751, + "step": 2169 + }, + { + "epoch": 0.17512710838511822, + "grad_norm": 0.906134843826294, + "learning_rate": 0.000194707803629061, + "loss": 2.9163, + "step": 2170 + }, + { + "epoch": 0.17520781212170122, + "grad_norm": 0.8066667318344116, + "learning_rate": 0.00019470273480078879, + "loss": 2.7549, + "step": 2171 + }, + { + "epoch": 0.17528851585828423, + "grad_norm": 0.7962325215339661, + "learning_rate": 0.00019469766361227692, + "loss": 2.7964, + "step": 2172 + }, + { + "epoch": 0.17536921959486723, + "grad_norm": 0.7802287340164185, + "learning_rate": 0.0001946925900636517, + "loss": 2.7022, + "step": 2173 + }, + { + "epoch": 0.17544992333145024, + "grad_norm": 0.783478319644928, + "learning_rate": 0.0001946875141550396, + "loss": 2.7798, + "step": 2174 + }, + { + "epoch": 0.17553062706803324, + "grad_norm": 0.8006815314292908, + "learning_rate": 0.00019468243588656713, + "loss": 2.7345, + "step": 2175 + }, + { + "epoch": 0.17561133080461624, + "grad_norm": 0.7566428184509277, + "learning_rate": 0.00019467735525836085, + "loss": 2.7822, + "step": 2176 + }, + { + "epoch": 0.17569203454119925, + "grad_norm": 0.772282600402832, + "learning_rate": 0.0001946722722705474, + "loss": 2.7346, + "step": 2177 + }, + { + "epoch": 0.17577273827778225, + "grad_norm": 0.7808345556259155, + "learning_rate": 0.00019466718692325347, + "loss": 2.755, + "step": 2178 + }, + { + "epoch": 0.17585344201436526, + "grad_norm": 0.8150362372398376, + "learning_rate": 0.00019466209921660576, + "loss": 2.7691, + "step": 2179 + }, + { + "epoch": 0.17593414575094826, + "grad_norm": 0.7952939867973328, + "learning_rate": 0.0001946570091507311, + "loss": 2.8175, + "step": 2180 + }, + { + "epoch": 0.17601484948753127, + "grad_norm": 0.8211334347724915, + "learning_rate": 0.00019465191672575634, + "loss": 2.7561, + "step": 2181 + }, + { + "epoch": 0.17609555322411427, + "grad_norm": 0.7726178765296936, + "learning_rate": 0.00019464682194180838, + "loss": 2.7435, + "step": 2182 + }, + { + "epoch": 0.17617625696069728, + "grad_norm": 0.7614372372627258, + "learning_rate": 0.00019464172479901422, + "loss": 2.7301, + "step": 2183 + }, + { + "epoch": 0.17625696069728028, + "grad_norm": 0.7818898558616638, + "learning_rate": 0.00019463662529750083, + "loss": 2.6964, + "step": 2184 + }, + { + "epoch": 0.17633766443386328, + "grad_norm": 0.7849796414375305, + "learning_rate": 0.0001946315234373954, + "loss": 2.7431, + "step": 2185 + }, + { + "epoch": 0.1764183681704463, + "grad_norm": 0.7939459085464478, + "learning_rate": 0.00019462641921882506, + "loss": 2.7126, + "step": 2186 + }, + { + "epoch": 0.1764990719070293, + "grad_norm": 0.8391629457473755, + "learning_rate": 0.00019462131264191696, + "loss": 2.8394, + "step": 2187 + }, + { + "epoch": 0.1765797756436123, + "grad_norm": 0.7548067569732666, + "learning_rate": 0.0001946162037067984, + "loss": 2.7315, + "step": 2188 + }, + { + "epoch": 0.1766604793801953, + "grad_norm": 0.8278634548187256, + "learning_rate": 0.00019461109241359674, + "loss": 2.8298, + "step": 2189 + }, + { + "epoch": 0.1767411831167783, + "grad_norm": 0.8275949954986572, + "learning_rate": 0.00019460597876243933, + "loss": 2.8072, + "step": 2190 + }, + { + "epoch": 0.1768218868533613, + "grad_norm": 0.7720363140106201, + "learning_rate": 0.00019460086275345363, + "loss": 2.7478, + "step": 2191 + }, + { + "epoch": 0.17690259058994431, + "grad_norm": 0.7795925140380859, + "learning_rate": 0.00019459574438676714, + "loss": 2.7633, + "step": 2192 + }, + { + "epoch": 0.17698329432652732, + "grad_norm": 0.7722043991088867, + "learning_rate": 0.00019459062366250743, + "loss": 2.8001, + "step": 2193 + }, + { + "epoch": 0.17706399806311032, + "grad_norm": 0.8560587763786316, + "learning_rate": 0.00019458550058080212, + "loss": 2.7494, + "step": 2194 + }, + { + "epoch": 0.17714470179969333, + "grad_norm": 0.7473754286766052, + "learning_rate": 0.00019458037514177886, + "loss": 2.7112, + "step": 2195 + }, + { + "epoch": 0.17722540553627633, + "grad_norm": 0.7625827789306641, + "learning_rate": 0.00019457524734556542, + "loss": 2.7496, + "step": 2196 + }, + { + "epoch": 0.17730610927285934, + "grad_norm": 0.7809351682662964, + "learning_rate": 0.00019457011719228962, + "loss": 2.7764, + "step": 2197 + }, + { + "epoch": 0.17738681300944234, + "grad_norm": 0.7846190333366394, + "learning_rate": 0.00019456498468207927, + "loss": 2.7189, + "step": 2198 + }, + { + "epoch": 0.17746751674602534, + "grad_norm": 0.7919551134109497, + "learning_rate": 0.0001945598498150623, + "loss": 2.7798, + "step": 2199 + }, + { + "epoch": 0.17754822048260835, + "grad_norm": 0.796183705329895, + "learning_rate": 0.0001945547125913667, + "loss": 2.7498, + "step": 2200 + }, + { + "epoch": 0.17762892421919135, + "grad_norm": 0.791668176651001, + "learning_rate": 0.0001945495730111205, + "loss": 2.7638, + "step": 2201 + }, + { + "epoch": 0.17770962795577436, + "grad_norm": 0.8303191661834717, + "learning_rate": 0.0001945444310744518, + "loss": 2.8079, + "step": 2202 + }, + { + "epoch": 0.17779033169235736, + "grad_norm": 0.8245917558670044, + "learning_rate": 0.00019453928678148872, + "loss": 2.7222, + "step": 2203 + }, + { + "epoch": 0.17787103542894037, + "grad_norm": 0.793456494808197, + "learning_rate": 0.0001945341401323595, + "loss": 2.8532, + "step": 2204 + }, + { + "epoch": 0.17795173916552337, + "grad_norm": 0.7574856877326965, + "learning_rate": 0.00019452899112719235, + "loss": 2.7361, + "step": 2205 + }, + { + "epoch": 0.17803244290210637, + "grad_norm": 0.7748556733131409, + "learning_rate": 0.0001945238397661157, + "loss": 2.7423, + "step": 2206 + }, + { + "epoch": 0.17811314663868938, + "grad_norm": 0.8973588347434998, + "learning_rate": 0.00019451868604925782, + "loss": 2.7604, + "step": 2207 + }, + { + "epoch": 0.17819385037527238, + "grad_norm": 0.7613589763641357, + "learning_rate": 0.00019451352997674722, + "loss": 2.7168, + "step": 2208 + }, + { + "epoch": 0.1782745541118554, + "grad_norm": 0.8152763247489929, + "learning_rate": 0.00019450837154871243, + "loss": 2.7904, + "step": 2209 + }, + { + "epoch": 0.1783552578484384, + "grad_norm": 0.8115083575248718, + "learning_rate": 0.00019450321076528194, + "loss": 2.7595, + "step": 2210 + }, + { + "epoch": 0.1784359615850214, + "grad_norm": 0.772665798664093, + "learning_rate": 0.00019449804762658438, + "loss": 2.7125, + "step": 2211 + }, + { + "epoch": 0.1785166653216044, + "grad_norm": 0.8002723455429077, + "learning_rate": 0.0001944928821327485, + "loss": 2.8121, + "step": 2212 + }, + { + "epoch": 0.1785973690581874, + "grad_norm": 0.8354858160018921, + "learning_rate": 0.00019448771428390296, + "loss": 2.8662, + "step": 2213 + }, + { + "epoch": 0.1786780727947704, + "grad_norm": 0.7799130082130432, + "learning_rate": 0.0001944825440801766, + "loss": 2.7247, + "step": 2214 + }, + { + "epoch": 0.1787587765313534, + "grad_norm": 0.810265302658081, + "learning_rate": 0.00019447737152169828, + "loss": 2.7095, + "step": 2215 + }, + { + "epoch": 0.17883948026793642, + "grad_norm": 0.8305599093437195, + "learning_rate": 0.00019447219660859687, + "loss": 2.7448, + "step": 2216 + }, + { + "epoch": 0.17892018400451942, + "grad_norm": 0.7899554371833801, + "learning_rate": 0.00019446701934100138, + "loss": 2.7295, + "step": 2217 + }, + { + "epoch": 0.17900088774110243, + "grad_norm": 0.7675672173500061, + "learning_rate": 0.00019446183971904082, + "loss": 2.7236, + "step": 2218 + }, + { + "epoch": 0.1790815914776854, + "grad_norm": 0.8717279434204102, + "learning_rate": 0.0001944566577428443, + "loss": 2.8044, + "step": 2219 + }, + { + "epoch": 0.1791622952142684, + "grad_norm": 0.8151431679725647, + "learning_rate": 0.00019445147341254094, + "loss": 2.7753, + "step": 2220 + }, + { + "epoch": 0.1792429989508514, + "grad_norm": 0.8481619358062744, + "learning_rate": 0.00019444628672825998, + "loss": 2.7954, + "step": 2221 + }, + { + "epoch": 0.17932370268743442, + "grad_norm": 0.8133199214935303, + "learning_rate": 0.00019444109769013065, + "loss": 2.7235, + "step": 2222 + }, + { + "epoch": 0.17940440642401742, + "grad_norm": 0.8250097036361694, + "learning_rate": 0.00019443590629828232, + "loss": 2.8352, + "step": 2223 + }, + { + "epoch": 0.17948511016060043, + "grad_norm": 0.8279787302017212, + "learning_rate": 0.00019443071255284433, + "loss": 2.7513, + "step": 2224 + }, + { + "epoch": 0.17956581389718343, + "grad_norm": 0.7781538963317871, + "learning_rate": 0.00019442551645394612, + "loss": 2.7239, + "step": 2225 + }, + { + "epoch": 0.17964651763376643, + "grad_norm": 0.7718615531921387, + "learning_rate": 0.00019442031800171727, + "loss": 2.7387, + "step": 2226 + }, + { + "epoch": 0.17972722137034944, + "grad_norm": 0.7704512476921082, + "learning_rate": 0.00019441511719628724, + "loss": 2.792, + "step": 2227 + }, + { + "epoch": 0.17980792510693244, + "grad_norm": 0.8290835618972778, + "learning_rate": 0.00019440991403778566, + "loss": 2.7745, + "step": 2228 + }, + { + "epoch": 0.17988862884351545, + "grad_norm": 0.8408392667770386, + "learning_rate": 0.00019440470852634227, + "loss": 2.7688, + "step": 2229 + }, + { + "epoch": 0.17996933258009845, + "grad_norm": 0.8503465056419373, + "learning_rate": 0.00019439950066208676, + "loss": 2.6747, + "step": 2230 + }, + { + "epoch": 0.18005003631668146, + "grad_norm": 0.8213364481925964, + "learning_rate": 0.0001943942904451489, + "loss": 2.7212, + "step": 2231 + }, + { + "epoch": 0.18013074005326446, + "grad_norm": 0.8511209487915039, + "learning_rate": 0.0001943890778756586, + "loss": 2.701, + "step": 2232 + }, + { + "epoch": 0.18021144378984746, + "grad_norm": 0.8034417033195496, + "learning_rate": 0.00019438386295374577, + "loss": 2.7029, + "step": 2233 + }, + { + "epoch": 0.18029214752643047, + "grad_norm": 0.7603715658187866, + "learning_rate": 0.0001943786456795403, + "loss": 2.7201, + "step": 2234 + }, + { + "epoch": 0.18037285126301347, + "grad_norm": 0.9210647940635681, + "learning_rate": 0.0001943734260531723, + "loss": 2.7847, + "step": 2235 + }, + { + "epoch": 0.18045355499959648, + "grad_norm": 0.7429665923118591, + "learning_rate": 0.00019436820407477186, + "loss": 2.7493, + "step": 2236 + }, + { + "epoch": 0.18053425873617948, + "grad_norm": 0.8290510773658752, + "learning_rate": 0.00019436297974446905, + "loss": 2.7711, + "step": 2237 + }, + { + "epoch": 0.18061496247276249, + "grad_norm": 0.7593570947647095, + "learning_rate": 0.0001943577530623941, + "loss": 2.7539, + "step": 2238 + }, + { + "epoch": 0.1806956662093455, + "grad_norm": 0.8222225308418274, + "learning_rate": 0.00019435252402867734, + "loss": 2.7703, + "step": 2239 + }, + { + "epoch": 0.1807763699459285, + "grad_norm": 0.8280842900276184, + "learning_rate": 0.00019434729264344898, + "loss": 2.7966, + "step": 2240 + }, + { + "epoch": 0.1808570736825115, + "grad_norm": 0.8258495926856995, + "learning_rate": 0.00019434205890683952, + "loss": 2.759, + "step": 2241 + }, + { + "epoch": 0.1809377774190945, + "grad_norm": 0.8294420838356018, + "learning_rate": 0.00019433682281897932, + "loss": 2.6996, + "step": 2242 + }, + { + "epoch": 0.1810184811556775, + "grad_norm": 0.8258811235427856, + "learning_rate": 0.0001943315843799989, + "loss": 2.774, + "step": 2243 + }, + { + "epoch": 0.1810991848922605, + "grad_norm": 0.8035838007926941, + "learning_rate": 0.0001943263435900288, + "loss": 2.7806, + "step": 2244 + }, + { + "epoch": 0.18117988862884352, + "grad_norm": 0.7900332808494568, + "learning_rate": 0.00019432110044919964, + "loss": 2.7462, + "step": 2245 + }, + { + "epoch": 0.18126059236542652, + "grad_norm": 0.8126730918884277, + "learning_rate": 0.00019431585495764212, + "loss": 2.6913, + "step": 2246 + }, + { + "epoch": 0.18134129610200952, + "grad_norm": 0.8411321043968201, + "learning_rate": 0.00019431060711548695, + "loss": 2.7503, + "step": 2247 + }, + { + "epoch": 0.18142199983859253, + "grad_norm": 0.7712867856025696, + "learning_rate": 0.0001943053569228649, + "loss": 2.7703, + "step": 2248 + }, + { + "epoch": 0.18150270357517553, + "grad_norm": 0.9093566536903381, + "learning_rate": 0.00019430010437990688, + "loss": 2.7838, + "step": 2249 + }, + { + "epoch": 0.18158340731175854, + "grad_norm": 0.8184913396835327, + "learning_rate": 0.00019429484948674372, + "loss": 2.8167, + "step": 2250 + }, + { + "epoch": 0.18166411104834154, + "grad_norm": 0.7215915322303772, + "learning_rate": 0.00019428959224350643, + "loss": 2.739, + "step": 2251 + }, + { + "epoch": 0.18174481478492455, + "grad_norm": 0.7842726111412048, + "learning_rate": 0.000194284332650326, + "loss": 2.8547, + "step": 2252 + }, + { + "epoch": 0.18182551852150755, + "grad_norm": 0.7758263349533081, + "learning_rate": 0.00019427907070733357, + "loss": 2.7746, + "step": 2253 + }, + { + "epoch": 0.18190622225809056, + "grad_norm": 0.7710500359535217, + "learning_rate": 0.00019427380641466027, + "loss": 2.7415, + "step": 2254 + }, + { + "epoch": 0.18198692599467356, + "grad_norm": 0.8233851194381714, + "learning_rate": 0.00019426853977243724, + "loss": 2.7471, + "step": 2255 + }, + { + "epoch": 0.18206762973125656, + "grad_norm": 0.7856284379959106, + "learning_rate": 0.00019426327078079578, + "loss": 2.6892, + "step": 2256 + }, + { + "epoch": 0.18214833346783957, + "grad_norm": 0.7978290915489197, + "learning_rate": 0.00019425799943986722, + "loss": 2.7346, + "step": 2257 + }, + { + "epoch": 0.18222903720442257, + "grad_norm": 0.8339362740516663, + "learning_rate": 0.00019425272574978293, + "loss": 2.7403, + "step": 2258 + }, + { + "epoch": 0.18230974094100558, + "grad_norm": 0.8035171031951904, + "learning_rate": 0.0001942474497106743, + "loss": 2.7444, + "step": 2259 + }, + { + "epoch": 0.18239044467758858, + "grad_norm": 0.7950475811958313, + "learning_rate": 0.0001942421713226729, + "loss": 2.7218, + "step": 2260 + }, + { + "epoch": 0.18247114841417159, + "grad_norm": 0.8439741730690002, + "learning_rate": 0.00019423689058591022, + "loss": 2.7498, + "step": 2261 + }, + { + "epoch": 0.1825518521507546, + "grad_norm": 0.8585919737815857, + "learning_rate": 0.00019423160750051789, + "loss": 2.7459, + "step": 2262 + }, + { + "epoch": 0.1826325558873376, + "grad_norm": 0.857276201248169, + "learning_rate": 0.00019422632206662755, + "loss": 2.8404, + "step": 2263 + }, + { + "epoch": 0.1827132596239206, + "grad_norm": 0.7692707777023315, + "learning_rate": 0.000194221034284371, + "loss": 2.8069, + "step": 2264 + }, + { + "epoch": 0.1827939633605036, + "grad_norm": 0.9107782244682312, + "learning_rate": 0.00019421574415387998, + "loss": 2.7554, + "step": 2265 + }, + { + "epoch": 0.1828746670970866, + "grad_norm": 0.763300895690918, + "learning_rate": 0.00019421045167528628, + "loss": 2.8031, + "step": 2266 + }, + { + "epoch": 0.1829553708336696, + "grad_norm": 0.8625530004501343, + "learning_rate": 0.0001942051568487219, + "loss": 2.7622, + "step": 2267 + }, + { + "epoch": 0.18303607457025262, + "grad_norm": 0.8483080863952637, + "learning_rate": 0.00019419985967431875, + "loss": 2.7726, + "step": 2268 + }, + { + "epoch": 0.18311677830683562, + "grad_norm": 0.8295309543609619, + "learning_rate": 0.00019419456015220884, + "loss": 2.7676, + "step": 2269 + }, + { + "epoch": 0.1831974820434186, + "grad_norm": 0.812976062297821, + "learning_rate": 0.0001941892582825243, + "loss": 2.745, + "step": 2270 + }, + { + "epoch": 0.1832781857800016, + "grad_norm": 0.799846351146698, + "learning_rate": 0.00019418395406539717, + "loss": 2.7474, + "step": 2271 + }, + { + "epoch": 0.1833588895165846, + "grad_norm": 0.7825174331665039, + "learning_rate": 0.00019417864750095976, + "loss": 2.7982, + "step": 2272 + }, + { + "epoch": 0.1834395932531676, + "grad_norm": 0.8331060409545898, + "learning_rate": 0.00019417333858934424, + "loss": 2.7279, + "step": 2273 + }, + { + "epoch": 0.18352029698975061, + "grad_norm": 0.8579809665679932, + "learning_rate": 0.00019416802733068295, + "loss": 2.7425, + "step": 2274 + }, + { + "epoch": 0.18360100072633362, + "grad_norm": 0.8643589019775391, + "learning_rate": 0.0001941627137251083, + "loss": 2.7369, + "step": 2275 + }, + { + "epoch": 0.18368170446291662, + "grad_norm": 0.9086846113204956, + "learning_rate": 0.00019415739777275265, + "loss": 2.7681, + "step": 2276 + }, + { + "epoch": 0.18376240819949963, + "grad_norm": 0.8442896604537964, + "learning_rate": 0.00019415207947374853, + "loss": 2.7733, + "step": 2277 + }, + { + "epoch": 0.18384311193608263, + "grad_norm": 0.7858592867851257, + "learning_rate": 0.00019414675882822846, + "loss": 2.7726, + "step": 2278 + }, + { + "epoch": 0.18392381567266564, + "grad_norm": 0.8191118240356445, + "learning_rate": 0.00019414143583632503, + "loss": 2.8142, + "step": 2279 + }, + { + "epoch": 0.18400451940924864, + "grad_norm": 0.8093815445899963, + "learning_rate": 0.00019413611049817097, + "loss": 2.7068, + "step": 2280 + }, + { + "epoch": 0.18408522314583164, + "grad_norm": 0.80247563123703, + "learning_rate": 0.00019413078281389895, + "loss": 2.7459, + "step": 2281 + }, + { + "epoch": 0.18416592688241465, + "grad_norm": 0.8200877904891968, + "learning_rate": 0.00019412545278364176, + "loss": 2.6963, + "step": 2282 + }, + { + "epoch": 0.18424663061899765, + "grad_norm": 0.870662271976471, + "learning_rate": 0.00019412012040753224, + "loss": 2.8636, + "step": 2283 + }, + { + "epoch": 0.18432733435558066, + "grad_norm": 0.7626601457595825, + "learning_rate": 0.00019411478568570332, + "loss": 2.8082, + "step": 2284 + }, + { + "epoch": 0.18440803809216366, + "grad_norm": 0.7492787837982178, + "learning_rate": 0.00019410944861828787, + "loss": 2.7231, + "step": 2285 + }, + { + "epoch": 0.18448874182874667, + "grad_norm": 0.8172419667243958, + "learning_rate": 0.000194104109205419, + "loss": 2.7054, + "step": 2286 + }, + { + "epoch": 0.18456944556532967, + "grad_norm": 0.7749670147895813, + "learning_rate": 0.0001940987674472297, + "loss": 2.6907, + "step": 2287 + }, + { + "epoch": 0.18465014930191267, + "grad_norm": 0.8855465054512024, + "learning_rate": 0.00019409342334385316, + "loss": 2.7439, + "step": 2288 + }, + { + "epoch": 0.18473085303849568, + "grad_norm": 0.8066419363021851, + "learning_rate": 0.00019408807689542257, + "loss": 2.7126, + "step": 2289 + }, + { + "epoch": 0.18481155677507868, + "grad_norm": 0.7759004235267639, + "learning_rate": 0.00019408272810207114, + "loss": 2.7207, + "step": 2290 + }, + { + "epoch": 0.1848922605116617, + "grad_norm": 0.8593513369560242, + "learning_rate": 0.00019407737696393215, + "loss": 2.7375, + "step": 2291 + }, + { + "epoch": 0.1849729642482447, + "grad_norm": 0.8154759407043457, + "learning_rate": 0.00019407202348113904, + "loss": 2.7608, + "step": 2292 + }, + { + "epoch": 0.1850536679848277, + "grad_norm": 0.7912892699241638, + "learning_rate": 0.0001940666676538252, + "loss": 2.7886, + "step": 2293 + }, + { + "epoch": 0.1851343717214107, + "grad_norm": 0.9184576272964478, + "learning_rate": 0.0001940613094821241, + "loss": 2.7867, + "step": 2294 + }, + { + "epoch": 0.1852150754579937, + "grad_norm": 0.8114588856697083, + "learning_rate": 0.0001940559489661693, + "loss": 2.8105, + "step": 2295 + }, + { + "epoch": 0.1852957791945767, + "grad_norm": 0.7681595683097839, + "learning_rate": 0.00019405058610609438, + "loss": 2.7707, + "step": 2296 + }, + { + "epoch": 0.18537648293115971, + "grad_norm": 0.7719643712043762, + "learning_rate": 0.000194045220902033, + "loss": 2.6767, + "step": 2297 + }, + { + "epoch": 0.18545718666774272, + "grad_norm": 0.7602487206459045, + "learning_rate": 0.00019403985335411888, + "loss": 2.7698, + "step": 2298 + }, + { + "epoch": 0.18553789040432572, + "grad_norm": 0.8044554591178894, + "learning_rate": 0.00019403448346248578, + "loss": 2.7578, + "step": 2299 + }, + { + "epoch": 0.18561859414090873, + "grad_norm": 0.7830328345298767, + "learning_rate": 0.00019402911122726757, + "loss": 2.7113, + "step": 2300 + }, + { + "epoch": 0.18569929787749173, + "grad_norm": 0.7793100476264954, + "learning_rate": 0.0001940237366485981, + "loss": 2.7388, + "step": 2301 + }, + { + "epoch": 0.18578000161407474, + "grad_norm": 0.9127374887466431, + "learning_rate": 0.00019401835972661133, + "loss": 2.7459, + "step": 2302 + }, + { + "epoch": 0.18586070535065774, + "grad_norm": 0.8007177114486694, + "learning_rate": 0.00019401298046144128, + "loss": 2.776, + "step": 2303 + }, + { + "epoch": 0.18594140908724074, + "grad_norm": 0.7384614944458008, + "learning_rate": 0.000194007598853222, + "loss": 2.6819, + "step": 2304 + }, + { + "epoch": 0.18602211282382375, + "grad_norm": 0.798909068107605, + "learning_rate": 0.0001940022149020876, + "loss": 2.7218, + "step": 2305 + }, + { + "epoch": 0.18610281656040675, + "grad_norm": 0.8388963341712952, + "learning_rate": 0.0001939968286081723, + "loss": 2.8248, + "step": 2306 + }, + { + "epoch": 0.18618352029698976, + "grad_norm": 0.8411754369735718, + "learning_rate": 0.0001939914399716103, + "loss": 2.7575, + "step": 2307 + }, + { + "epoch": 0.18626422403357276, + "grad_norm": 0.7936103343963623, + "learning_rate": 0.00019398604899253594, + "loss": 2.7488, + "step": 2308 + }, + { + "epoch": 0.18634492777015577, + "grad_norm": 0.7913734912872314, + "learning_rate": 0.00019398065567108357, + "loss": 2.7963, + "step": 2309 + }, + { + "epoch": 0.18642563150673877, + "grad_norm": 0.8341575860977173, + "learning_rate": 0.00019397526000738754, + "loss": 2.7698, + "step": 2310 + }, + { + "epoch": 0.18650633524332177, + "grad_norm": 0.8323128819465637, + "learning_rate": 0.00019396986200158244, + "loss": 2.7218, + "step": 2311 + }, + { + "epoch": 0.18658703897990478, + "grad_norm": 0.748073160648346, + "learning_rate": 0.0001939644616538027, + "loss": 2.7798, + "step": 2312 + }, + { + "epoch": 0.18666774271648778, + "grad_norm": 0.8166958689689636, + "learning_rate": 0.00019395905896418296, + "loss": 2.661, + "step": 2313 + }, + { + "epoch": 0.1867484464530708, + "grad_norm": 0.796791672706604, + "learning_rate": 0.00019395365393285786, + "loss": 2.7297, + "step": 2314 + }, + { + "epoch": 0.1868291501896538, + "grad_norm": 0.7851170897483826, + "learning_rate": 0.0001939482465599621, + "loss": 2.7798, + "step": 2315 + }, + { + "epoch": 0.1869098539262368, + "grad_norm": 0.7545836567878723, + "learning_rate": 0.00019394283684563045, + "loss": 2.7327, + "step": 2316 + }, + { + "epoch": 0.1869905576628198, + "grad_norm": 0.8100360631942749, + "learning_rate": 0.00019393742478999776, + "loss": 2.7901, + "step": 2317 + }, + { + "epoch": 0.1870712613994028, + "grad_norm": 0.7874314785003662, + "learning_rate": 0.00019393201039319887, + "loss": 2.7597, + "step": 2318 + }, + { + "epoch": 0.1871519651359858, + "grad_norm": 0.7698730826377869, + "learning_rate": 0.00019392659365536876, + "loss": 2.7327, + "step": 2319 + }, + { + "epoch": 0.1872326688725688, + "grad_norm": 0.7417994141578674, + "learning_rate": 0.0001939211745766424, + "loss": 2.7413, + "step": 2320 + }, + { + "epoch": 0.1873133726091518, + "grad_norm": 0.7823258638381958, + "learning_rate": 0.00019391575315715485, + "loss": 2.7577, + "step": 2321 + }, + { + "epoch": 0.1873940763457348, + "grad_norm": 0.82382732629776, + "learning_rate": 0.00019391032939704124, + "loss": 2.7769, + "step": 2322 + }, + { + "epoch": 0.1874747800823178, + "grad_norm": 0.8405026197433472, + "learning_rate": 0.0001939049032964367, + "loss": 2.8402, + "step": 2323 + }, + { + "epoch": 0.1875554838189008, + "grad_norm": 0.8307906985282898, + "learning_rate": 0.00019389947485547654, + "loss": 2.7642, + "step": 2324 + }, + { + "epoch": 0.1876361875554838, + "grad_norm": 0.8618248701095581, + "learning_rate": 0.000193894044074296, + "loss": 2.7853, + "step": 2325 + }, + { + "epoch": 0.1877168912920668, + "grad_norm": 0.8040831685066223, + "learning_rate": 0.00019388861095303046, + "loss": 2.7467, + "step": 2326 + }, + { + "epoch": 0.18779759502864982, + "grad_norm": 0.7723637223243713, + "learning_rate": 0.0001938831754918153, + "loss": 2.7222, + "step": 2327 + }, + { + "epoch": 0.18787829876523282, + "grad_norm": 0.8189084529876709, + "learning_rate": 0.000193877737690786, + "loss": 2.7857, + "step": 2328 + }, + { + "epoch": 0.18795900250181583, + "grad_norm": 0.8335791826248169, + "learning_rate": 0.00019387229755007805, + "loss": 2.6997, + "step": 2329 + }, + { + "epoch": 0.18803970623839883, + "grad_norm": 0.7732782959938049, + "learning_rate": 0.00019386685506982707, + "loss": 2.7155, + "step": 2330 + }, + { + "epoch": 0.18812040997498183, + "grad_norm": 0.8262906670570374, + "learning_rate": 0.0001938614102501687, + "loss": 2.7638, + "step": 2331 + }, + { + "epoch": 0.18820111371156484, + "grad_norm": 0.7969058156013489, + "learning_rate": 0.00019385596309123862, + "loss": 2.7363, + "step": 2332 + }, + { + "epoch": 0.18828181744814784, + "grad_norm": 0.7834853529930115, + "learning_rate": 0.0001938505135931726, + "loss": 2.7205, + "step": 2333 + }, + { + "epoch": 0.18836252118473085, + "grad_norm": 0.748481810092926, + "learning_rate": 0.00019384506175610647, + "loss": 2.7759, + "step": 2334 + }, + { + "epoch": 0.18844322492131385, + "grad_norm": 0.8137786984443665, + "learning_rate": 0.00019383960758017604, + "loss": 2.828, + "step": 2335 + }, + { + "epoch": 0.18852392865789686, + "grad_norm": 0.8065745234489441, + "learning_rate": 0.00019383415106551734, + "loss": 2.7408, + "step": 2336 + }, + { + "epoch": 0.18860463239447986, + "grad_norm": 0.768643856048584, + "learning_rate": 0.0001938286922122663, + "loss": 2.6503, + "step": 2337 + }, + { + "epoch": 0.18868533613106286, + "grad_norm": 0.7677921652793884, + "learning_rate": 0.00019382323102055897, + "loss": 2.7088, + "step": 2338 + }, + { + "epoch": 0.18876603986764587, + "grad_norm": 0.7648717164993286, + "learning_rate": 0.0001938177674905315, + "loss": 2.7015, + "step": 2339 + }, + { + "epoch": 0.18884674360422887, + "grad_norm": 0.7517116665840149, + "learning_rate": 0.00019381230162231997, + "loss": 2.7095, + "step": 2340 + }, + { + "epoch": 0.18892744734081188, + "grad_norm": 0.8147841691970825, + "learning_rate": 0.00019380683341606067, + "loss": 2.8563, + "step": 2341 + }, + { + "epoch": 0.18900815107739488, + "grad_norm": 0.7849822640419006, + "learning_rate": 0.00019380136287188988, + "loss": 2.7432, + "step": 2342 + }, + { + "epoch": 0.18908885481397789, + "grad_norm": 0.813811719417572, + "learning_rate": 0.0001937958899899439, + "loss": 2.7419, + "step": 2343 + }, + { + "epoch": 0.1891695585505609, + "grad_norm": 0.8142707943916321, + "learning_rate": 0.00019379041477035923, + "loss": 2.7658, + "step": 2344 + }, + { + "epoch": 0.1892502622871439, + "grad_norm": 0.7594506740570068, + "learning_rate": 0.00019378493721327217, + "loss": 2.7298, + "step": 2345 + }, + { + "epoch": 0.1893309660237269, + "grad_norm": 0.8374232053756714, + "learning_rate": 0.00019377945731881936, + "loss": 2.8112, + "step": 2346 + }, + { + "epoch": 0.1894116697603099, + "grad_norm": 0.783608615398407, + "learning_rate": 0.00019377397508713734, + "loss": 2.8168, + "step": 2347 + }, + { + "epoch": 0.1894923734968929, + "grad_norm": 0.720214307308197, + "learning_rate": 0.0001937684905183627, + "loss": 2.7516, + "step": 2348 + }, + { + "epoch": 0.1895730772334759, + "grad_norm": 0.7939600944519043, + "learning_rate": 0.0001937630036126322, + "loss": 2.7609, + "step": 2349 + }, + { + "epoch": 0.18965378097005892, + "grad_norm": 0.787315309047699, + "learning_rate": 0.00019375751437008252, + "loss": 2.758, + "step": 2350 + }, + { + "epoch": 0.18973448470664192, + "grad_norm": 0.7862411141395569, + "learning_rate": 0.00019375202279085053, + "loss": 2.6866, + "step": 2351 + }, + { + "epoch": 0.18981518844322492, + "grad_norm": 0.8651136159896851, + "learning_rate": 0.000193746528875073, + "loss": 2.7488, + "step": 2352 + }, + { + "epoch": 0.18989589217980793, + "grad_norm": 0.8150602579116821, + "learning_rate": 0.00019374103262288696, + "loss": 2.7417, + "step": 2353 + }, + { + "epoch": 0.18997659591639093, + "grad_norm": 0.9053540229797363, + "learning_rate": 0.00019373553403442934, + "loss": 2.7587, + "step": 2354 + }, + { + "epoch": 0.19005729965297394, + "grad_norm": 0.8775703310966492, + "learning_rate": 0.0001937300331098372, + "loss": 2.733, + "step": 2355 + }, + { + "epoch": 0.19013800338955694, + "grad_norm": 0.7714357972145081, + "learning_rate": 0.0001937245298492476, + "loss": 2.7595, + "step": 2356 + }, + { + "epoch": 0.19021870712613995, + "grad_norm": 0.8648017048835754, + "learning_rate": 0.0001937190242527977, + "loss": 2.7944, + "step": 2357 + }, + { + "epoch": 0.19029941086272295, + "grad_norm": 0.9367388486862183, + "learning_rate": 0.00019371351632062477, + "loss": 2.7902, + "step": 2358 + }, + { + "epoch": 0.19038011459930596, + "grad_norm": 0.8116368651390076, + "learning_rate": 0.00019370800605286604, + "loss": 2.7291, + "step": 2359 + }, + { + "epoch": 0.19046081833588896, + "grad_norm": 0.7892753481864929, + "learning_rate": 0.00019370249344965882, + "loss": 2.8192, + "step": 2360 + }, + { + "epoch": 0.19054152207247196, + "grad_norm": 0.8109372854232788, + "learning_rate": 0.00019369697851114056, + "loss": 2.6982, + "step": 2361 + }, + { + "epoch": 0.19062222580905497, + "grad_norm": 0.8756314516067505, + "learning_rate": 0.00019369146123744864, + "loss": 2.744, + "step": 2362 + }, + { + "epoch": 0.19070292954563797, + "grad_norm": 0.7400399446487427, + "learning_rate": 0.00019368594162872058, + "loss": 2.7328, + "step": 2363 + }, + { + "epoch": 0.19078363328222098, + "grad_norm": 0.8223158717155457, + "learning_rate": 0.000193680419685094, + "loss": 2.7614, + "step": 2364 + }, + { + "epoch": 0.19086433701880398, + "grad_norm": 0.7350139617919922, + "learning_rate": 0.00019367489540670645, + "loss": 2.7074, + "step": 2365 + }, + { + "epoch": 0.19094504075538699, + "grad_norm": 0.7915631532669067, + "learning_rate": 0.00019366936879369563, + "loss": 2.7835, + "step": 2366 + }, + { + "epoch": 0.19102574449197, + "grad_norm": 0.7765628099441528, + "learning_rate": 0.00019366383984619932, + "loss": 2.765, + "step": 2367 + }, + { + "epoch": 0.191106448228553, + "grad_norm": 0.8127059936523438, + "learning_rate": 0.00019365830856435525, + "loss": 2.7753, + "step": 2368 + }, + { + "epoch": 0.191187151965136, + "grad_norm": 0.8652897477149963, + "learning_rate": 0.0001936527749483013, + "loss": 2.7137, + "step": 2369 + }, + { + "epoch": 0.191267855701719, + "grad_norm": 0.8086774945259094, + "learning_rate": 0.00019364723899817541, + "loss": 2.7209, + "step": 2370 + }, + { + "epoch": 0.191348559438302, + "grad_norm": 0.7965098023414612, + "learning_rate": 0.00019364170071411554, + "loss": 2.786, + "step": 2371 + }, + { + "epoch": 0.19142926317488498, + "grad_norm": 0.7954064607620239, + "learning_rate": 0.00019363616009625967, + "loss": 2.7508, + "step": 2372 + }, + { + "epoch": 0.191509966911468, + "grad_norm": 0.7835928201675415, + "learning_rate": 0.00019363061714474595, + "loss": 2.7423, + "step": 2373 + }, + { + "epoch": 0.191590670648051, + "grad_norm": 0.8720580339431763, + "learning_rate": 0.0001936250718597125, + "loss": 2.7877, + "step": 2374 + }, + { + "epoch": 0.191671374384634, + "grad_norm": 0.836066484451294, + "learning_rate": 0.00019361952424129747, + "loss": 2.8456, + "step": 2375 + }, + { + "epoch": 0.191752078121217, + "grad_norm": 0.793666660785675, + "learning_rate": 0.00019361397428963923, + "loss": 2.786, + "step": 2376 + }, + { + "epoch": 0.1918327818578, + "grad_norm": 0.8573217391967773, + "learning_rate": 0.000193608422004876, + "loss": 2.7569, + "step": 2377 + }, + { + "epoch": 0.191913485594383, + "grad_norm": 0.81243896484375, + "learning_rate": 0.00019360286738714623, + "loss": 2.771, + "step": 2378 + }, + { + "epoch": 0.19199418933096601, + "grad_norm": 0.7449626326560974, + "learning_rate": 0.00019359731043658832, + "loss": 2.7479, + "step": 2379 + }, + { + "epoch": 0.19207489306754902, + "grad_norm": 0.8124165534973145, + "learning_rate": 0.00019359175115334076, + "loss": 2.7602, + "step": 2380 + }, + { + "epoch": 0.19215559680413202, + "grad_norm": 0.7786986827850342, + "learning_rate": 0.00019358618953754211, + "loss": 2.6926, + "step": 2381 + }, + { + "epoch": 0.19223630054071503, + "grad_norm": 0.7987258434295654, + "learning_rate": 0.000193580625589331, + "loss": 2.7573, + "step": 2382 + }, + { + "epoch": 0.19231700427729803, + "grad_norm": 0.8236463665962219, + "learning_rate": 0.00019357505930884606, + "loss": 2.6755, + "step": 2383 + }, + { + "epoch": 0.19239770801388104, + "grad_norm": 0.8285779356956482, + "learning_rate": 0.00019356949069622602, + "loss": 2.7658, + "step": 2384 + }, + { + "epoch": 0.19247841175046404, + "grad_norm": 0.7823960781097412, + "learning_rate": 0.0001935639197516097, + "loss": 2.7404, + "step": 2385 + }, + { + "epoch": 0.19255911548704704, + "grad_norm": 0.968638002872467, + "learning_rate": 0.00019355834647513591, + "loss": 2.7836, + "step": 2386 + }, + { + "epoch": 0.19263981922363005, + "grad_norm": 0.8170328736305237, + "learning_rate": 0.00019355277086694357, + "loss": 2.7816, + "step": 2387 + }, + { + "epoch": 0.19272052296021305, + "grad_norm": 0.8342583179473877, + "learning_rate": 0.00019354719292717163, + "loss": 2.8204, + "step": 2388 + }, + { + "epoch": 0.19280122669679606, + "grad_norm": 0.8160435557365417, + "learning_rate": 0.0001935416126559591, + "loss": 2.6938, + "step": 2389 + }, + { + "epoch": 0.19288193043337906, + "grad_norm": 0.7888174653053284, + "learning_rate": 0.00019353603005344504, + "loss": 2.6804, + "step": 2390 + }, + { + "epoch": 0.19296263416996207, + "grad_norm": 0.8389205932617188, + "learning_rate": 0.00019353044511976865, + "loss": 2.7571, + "step": 2391 + }, + { + "epoch": 0.19304333790654507, + "grad_norm": 0.7920562028884888, + "learning_rate": 0.00019352485785506906, + "loss": 2.7174, + "step": 2392 + }, + { + "epoch": 0.19312404164312807, + "grad_norm": 0.7853459715843201, + "learning_rate": 0.00019351926825948555, + "loss": 2.7626, + "step": 2393 + }, + { + "epoch": 0.19320474537971108, + "grad_norm": 0.9109459519386292, + "learning_rate": 0.0001935136763331574, + "loss": 2.7568, + "step": 2394 + }, + { + "epoch": 0.19328544911629408, + "grad_norm": 0.7983853816986084, + "learning_rate": 0.00019350808207622397, + "loss": 2.7412, + "step": 2395 + }, + { + "epoch": 0.1933661528528771, + "grad_norm": 0.7416854500770569, + "learning_rate": 0.00019350248548882472, + "loss": 2.7335, + "step": 2396 + }, + { + "epoch": 0.1934468565894601, + "grad_norm": 0.7305171489715576, + "learning_rate": 0.0001934968865710991, + "loss": 2.7295, + "step": 2397 + }, + { + "epoch": 0.1935275603260431, + "grad_norm": 0.7717033624649048, + "learning_rate": 0.0001934912853231867, + "loss": 2.7568, + "step": 2398 + }, + { + "epoch": 0.1936082640626261, + "grad_norm": 0.7833831906318665, + "learning_rate": 0.00019348568174522705, + "loss": 2.736, + "step": 2399 + }, + { + "epoch": 0.1936889677992091, + "grad_norm": 0.872831404209137, + "learning_rate": 0.00019348007583735983, + "loss": 2.7719, + "step": 2400 + }, + { + "epoch": 0.1937696715357921, + "grad_norm": 0.8389193415641785, + "learning_rate": 0.0001934744675997248, + "loss": 2.7572, + "step": 2401 + }, + { + "epoch": 0.19385037527237511, + "grad_norm": 0.8442249298095703, + "learning_rate": 0.00019346885703246165, + "loss": 2.8117, + "step": 2402 + }, + { + "epoch": 0.19393107900895812, + "grad_norm": 0.8451170325279236, + "learning_rate": 0.00019346324413571027, + "loss": 2.7216, + "step": 2403 + }, + { + "epoch": 0.19401178274554112, + "grad_norm": 0.898529052734375, + "learning_rate": 0.00019345762890961052, + "loss": 2.8119, + "step": 2404 + }, + { + "epoch": 0.19409248648212413, + "grad_norm": 0.8302313685417175, + "learning_rate": 0.00019345201135430236, + "loss": 2.76, + "step": 2405 + }, + { + "epoch": 0.19417319021870713, + "grad_norm": 0.8975207209587097, + "learning_rate": 0.00019344639146992582, + "loss": 2.8043, + "step": 2406 + }, + { + "epoch": 0.19425389395529014, + "grad_norm": 0.8972581028938293, + "learning_rate": 0.0001934407692566209, + "loss": 2.7487, + "step": 2407 + }, + { + "epoch": 0.19433459769187314, + "grad_norm": 0.8311447501182556, + "learning_rate": 0.00019343514471452776, + "loss": 2.7653, + "step": 2408 + }, + { + "epoch": 0.19441530142845614, + "grad_norm": 0.8336243033409119, + "learning_rate": 0.0001934295178437866, + "loss": 2.753, + "step": 2409 + }, + { + "epoch": 0.19449600516503915, + "grad_norm": 0.8339207172393799, + "learning_rate": 0.0001934238886445376, + "loss": 2.7643, + "step": 2410 + }, + { + "epoch": 0.19457670890162215, + "grad_norm": 0.906074583530426, + "learning_rate": 0.0001934182571169211, + "loss": 2.7777, + "step": 2411 + }, + { + "epoch": 0.19465741263820516, + "grad_norm": 0.8759943246841431, + "learning_rate": 0.00019341262326107742, + "loss": 2.77, + "step": 2412 + }, + { + "epoch": 0.19473811637478816, + "grad_norm": 0.8399369716644287, + "learning_rate": 0.00019340698707714699, + "loss": 2.752, + "step": 2413 + }, + { + "epoch": 0.19481882011137117, + "grad_norm": 0.8551808595657349, + "learning_rate": 0.00019340134856527026, + "loss": 2.6727, + "step": 2414 + }, + { + "epoch": 0.19489952384795417, + "grad_norm": 0.7660732865333557, + "learning_rate": 0.00019339570772558778, + "loss": 2.7491, + "step": 2415 + }, + { + "epoch": 0.19498022758453717, + "grad_norm": 0.8257685303688049, + "learning_rate": 0.00019339006455824015, + "loss": 2.7584, + "step": 2416 + }, + { + "epoch": 0.19506093132112018, + "grad_norm": 0.797275960445404, + "learning_rate": 0.00019338441906336794, + "loss": 2.7051, + "step": 2417 + }, + { + "epoch": 0.19514163505770318, + "grad_norm": 0.8311913013458252, + "learning_rate": 0.00019337877124111193, + "loss": 2.8084, + "step": 2418 + }, + { + "epoch": 0.1952223387942862, + "grad_norm": 0.7995893359184265, + "learning_rate": 0.0001933731210916128, + "loss": 2.7556, + "step": 2419 + }, + { + "epoch": 0.1953030425308692, + "grad_norm": 0.792850136756897, + "learning_rate": 0.00019336746861501147, + "loss": 2.7289, + "step": 2420 + }, + { + "epoch": 0.1953837462674522, + "grad_norm": 0.8058848977088928, + "learning_rate": 0.00019336181381144873, + "loss": 2.7394, + "step": 2421 + }, + { + "epoch": 0.1954644500040352, + "grad_norm": 0.8267124891281128, + "learning_rate": 0.00019335615668106555, + "loss": 2.771, + "step": 2422 + }, + { + "epoch": 0.19554515374061818, + "grad_norm": 0.7641060948371887, + "learning_rate": 0.00019335049722400292, + "loss": 2.7311, + "step": 2423 + }, + { + "epoch": 0.19562585747720118, + "grad_norm": 0.8023245930671692, + "learning_rate": 0.00019334483544040186, + "loss": 2.7658, + "step": 2424 + }, + { + "epoch": 0.19570656121378419, + "grad_norm": 0.8341927528381348, + "learning_rate": 0.00019333917133040348, + "loss": 2.7476, + "step": 2425 + }, + { + "epoch": 0.1957872649503672, + "grad_norm": 0.7985726594924927, + "learning_rate": 0.000193333504894149, + "loss": 2.7362, + "step": 2426 + }, + { + "epoch": 0.1958679686869502, + "grad_norm": 0.7267594933509827, + "learning_rate": 0.0001933278361317796, + "loss": 2.6875, + "step": 2427 + }, + { + "epoch": 0.1959486724235332, + "grad_norm": 0.8292990326881409, + "learning_rate": 0.00019332216504343652, + "loss": 2.7619, + "step": 2428 + }, + { + "epoch": 0.1960293761601162, + "grad_norm": 0.7549588680267334, + "learning_rate": 0.00019331649162926116, + "loss": 2.7385, + "step": 2429 + }, + { + "epoch": 0.1961100798966992, + "grad_norm": 0.7688446640968323, + "learning_rate": 0.0001933108158893949, + "loss": 2.7544, + "step": 2430 + }, + { + "epoch": 0.1961907836332822, + "grad_norm": 0.8168436884880066, + "learning_rate": 0.00019330513782397918, + "loss": 2.8013, + "step": 2431 + }, + { + "epoch": 0.19627148736986522, + "grad_norm": 0.8405759334564209, + "learning_rate": 0.00019329945743315556, + "loss": 2.7299, + "step": 2432 + }, + { + "epoch": 0.19635219110644822, + "grad_norm": 0.79430091381073, + "learning_rate": 0.00019329377471706554, + "loss": 2.7293, + "step": 2433 + }, + { + "epoch": 0.19643289484303122, + "grad_norm": 0.8428656458854675, + "learning_rate": 0.0001932880896758508, + "loss": 2.8211, + "step": 2434 + }, + { + "epoch": 0.19651359857961423, + "grad_norm": 0.7883139252662659, + "learning_rate": 0.00019328240230965298, + "loss": 2.6943, + "step": 2435 + }, + { + "epoch": 0.19659430231619723, + "grad_norm": 0.7539335489273071, + "learning_rate": 0.00019327671261861387, + "loss": 2.6926, + "step": 2436 + }, + { + "epoch": 0.19667500605278024, + "grad_norm": 0.9986057281494141, + "learning_rate": 0.00019327102060287524, + "loss": 2.7851, + "step": 2437 + }, + { + "epoch": 0.19675570978936324, + "grad_norm": 0.7716113924980164, + "learning_rate": 0.000193265326262579, + "loss": 2.752, + "step": 2438 + }, + { + "epoch": 0.19683641352594625, + "grad_norm": 0.9134296774864197, + "learning_rate": 0.000193259629597867, + "loss": 2.7698, + "step": 2439 + }, + { + "epoch": 0.19691711726252925, + "grad_norm": 0.7966345548629761, + "learning_rate": 0.00019325393060888124, + "loss": 2.7839, + "step": 2440 + }, + { + "epoch": 0.19699782099911226, + "grad_norm": 0.8051251173019409, + "learning_rate": 0.0001932482292957638, + "loss": 2.7322, + "step": 2441 + }, + { + "epoch": 0.19707852473569526, + "grad_norm": 0.843169629573822, + "learning_rate": 0.0001932425256586567, + "loss": 2.8263, + "step": 2442 + }, + { + "epoch": 0.19715922847227826, + "grad_norm": 0.7552370429039001, + "learning_rate": 0.00019323681969770213, + "loss": 2.7342, + "step": 2443 + }, + { + "epoch": 0.19723993220886127, + "grad_norm": 0.844473123550415, + "learning_rate": 0.0001932311114130423, + "loss": 2.776, + "step": 2444 + }, + { + "epoch": 0.19732063594544427, + "grad_norm": 0.8002473711967468, + "learning_rate": 0.00019322540080481945, + "loss": 2.7382, + "step": 2445 + }, + { + "epoch": 0.19740133968202728, + "grad_norm": 0.8564329147338867, + "learning_rate": 0.00019321968787317594, + "loss": 2.7592, + "step": 2446 + }, + { + "epoch": 0.19748204341861028, + "grad_norm": 0.7853825688362122, + "learning_rate": 0.00019321397261825408, + "loss": 2.7101, + "step": 2447 + }, + { + "epoch": 0.19756274715519329, + "grad_norm": 0.8482939004898071, + "learning_rate": 0.0001932082550401964, + "loss": 2.7891, + "step": 2448 + }, + { + "epoch": 0.1976434508917763, + "grad_norm": 0.8361770510673523, + "learning_rate": 0.00019320253513914536, + "loss": 2.7341, + "step": 2449 + }, + { + "epoch": 0.1977241546283593, + "grad_norm": 0.7814618945121765, + "learning_rate": 0.0001931968129152435, + "loss": 2.771, + "step": 2450 + }, + { + "epoch": 0.1978048583649423, + "grad_norm": 0.7588146924972534, + "learning_rate": 0.00019319108836863343, + "loss": 2.7577, + "step": 2451 + }, + { + "epoch": 0.1978855621015253, + "grad_norm": 0.9184895157814026, + "learning_rate": 0.00019318536149945785, + "loss": 2.7711, + "step": 2452 + }, + { + "epoch": 0.1979662658381083, + "grad_norm": 0.8454298973083496, + "learning_rate": 0.00019317963230785947, + "loss": 2.7748, + "step": 2453 + }, + { + "epoch": 0.1980469695746913, + "grad_norm": 0.7662420868873596, + "learning_rate": 0.0001931739007939811, + "loss": 2.7704, + "step": 2454 + }, + { + "epoch": 0.19812767331127432, + "grad_norm": 0.837888777256012, + "learning_rate": 0.0001931681669579655, + "loss": 2.7613, + "step": 2455 + }, + { + "epoch": 0.19820837704785732, + "grad_norm": 0.7835226058959961, + "learning_rate": 0.0001931624307999557, + "loss": 2.6888, + "step": 2456 + }, + { + "epoch": 0.19828908078444032, + "grad_norm": 0.8491464257240295, + "learning_rate": 0.00019315669232009456, + "loss": 2.7521, + "step": 2457 + }, + { + "epoch": 0.19836978452102333, + "grad_norm": 0.7590088248252869, + "learning_rate": 0.00019315095151852516, + "loss": 2.7441, + "step": 2458 + }, + { + "epoch": 0.19845048825760633, + "grad_norm": 0.9316127300262451, + "learning_rate": 0.00019314520839539052, + "loss": 2.786, + "step": 2459 + }, + { + "epoch": 0.19853119199418934, + "grad_norm": 0.7819615006446838, + "learning_rate": 0.0001931394629508338, + "loss": 2.7003, + "step": 2460 + }, + { + "epoch": 0.19861189573077234, + "grad_norm": 0.7675932049751282, + "learning_rate": 0.0001931337151849982, + "loss": 2.7065, + "step": 2461 + }, + { + "epoch": 0.19869259946735535, + "grad_norm": 0.7797678112983704, + "learning_rate": 0.000193127965098027, + "loss": 2.7605, + "step": 2462 + }, + { + "epoch": 0.19877330320393835, + "grad_norm": 0.789544403553009, + "learning_rate": 0.00019312221269006345, + "loss": 2.7913, + "step": 2463 + }, + { + "epoch": 0.19885400694052136, + "grad_norm": 0.9594957232475281, + "learning_rate": 0.00019311645796125094, + "loss": 2.785, + "step": 2464 + }, + { + "epoch": 0.19893471067710436, + "grad_norm": 0.8154739141464233, + "learning_rate": 0.00019311070091173287, + "loss": 2.6716, + "step": 2465 + }, + { + "epoch": 0.19901541441368736, + "grad_norm": 0.9042142629623413, + "learning_rate": 0.00019310494154165274, + "loss": 2.734, + "step": 2466 + }, + { + "epoch": 0.19909611815027037, + "grad_norm": 0.7803483605384827, + "learning_rate": 0.0001930991798511541, + "loss": 2.7052, + "step": 2467 + }, + { + "epoch": 0.19917682188685337, + "grad_norm": 0.7917614579200745, + "learning_rate": 0.00019309341584038055, + "loss": 2.728, + "step": 2468 + }, + { + "epoch": 0.19925752562343638, + "grad_norm": 0.8295063376426697, + "learning_rate": 0.00019308764950947568, + "loss": 2.7496, + "step": 2469 + }, + { + "epoch": 0.19933822936001938, + "grad_norm": 0.790831983089447, + "learning_rate": 0.0001930818808585833, + "loss": 2.7356, + "step": 2470 + }, + { + "epoch": 0.19941893309660239, + "grad_norm": 0.8527843952178955, + "learning_rate": 0.0001930761098878471, + "loss": 2.718, + "step": 2471 + }, + { + "epoch": 0.1994996368331854, + "grad_norm": 0.8518494367599487, + "learning_rate": 0.00019307033659741096, + "loss": 2.7189, + "step": 2472 + }, + { + "epoch": 0.1995803405697684, + "grad_norm": 0.8027220368385315, + "learning_rate": 0.00019306456098741872, + "loss": 2.7272, + "step": 2473 + }, + { + "epoch": 0.19966104430635137, + "grad_norm": 0.7516468167304993, + "learning_rate": 0.00019305878305801434, + "loss": 2.798, + "step": 2474 + }, + { + "epoch": 0.19974174804293438, + "grad_norm": 0.7676397562026978, + "learning_rate": 0.00019305300280934187, + "loss": 2.8076, + "step": 2475 + }, + { + "epoch": 0.19982245177951738, + "grad_norm": 0.8237762451171875, + "learning_rate": 0.00019304722024154528, + "loss": 2.6998, + "step": 2476 + }, + { + "epoch": 0.19990315551610038, + "grad_norm": 0.8397759199142456, + "learning_rate": 0.0001930414353547688, + "loss": 2.806, + "step": 2477 + }, + { + "epoch": 0.1999838592526834, + "grad_norm": 0.8911117911338806, + "learning_rate": 0.00019303564814915645, + "loss": 2.7566, + "step": 2478 + }, + { + "epoch": 0.2000645629892664, + "grad_norm": 0.765404999256134, + "learning_rate": 0.00019302985862485264, + "loss": 2.7363, + "step": 2479 + }, + { + "epoch": 0.2001452667258494, + "grad_norm": 0.7898589372634888, + "learning_rate": 0.0001930240667820015, + "loss": 2.7007, + "step": 2480 + }, + { + "epoch": 0.2002259704624324, + "grad_norm": 0.7581521272659302, + "learning_rate": 0.0001930182726207475, + "loss": 2.7508, + "step": 2481 + }, + { + "epoch": 0.2003066741990154, + "grad_norm": 0.8179795742034912, + "learning_rate": 0.00019301247614123495, + "loss": 2.7327, + "step": 2482 + }, + { + "epoch": 0.2003873779355984, + "grad_norm": 0.8103611469268799, + "learning_rate": 0.00019300667734360838, + "loss": 2.7869, + "step": 2483 + }, + { + "epoch": 0.20046808167218141, + "grad_norm": 0.7368054389953613, + "learning_rate": 0.0001930008762280123, + "loss": 2.73, + "step": 2484 + }, + { + "epoch": 0.20054878540876442, + "grad_norm": 0.7679662108421326, + "learning_rate": 0.00019299507279459127, + "loss": 2.7905, + "step": 2485 + }, + { + "epoch": 0.20062948914534742, + "grad_norm": 0.7783839702606201, + "learning_rate": 0.0001929892670434899, + "loss": 2.6816, + "step": 2486 + }, + { + "epoch": 0.20071019288193043, + "grad_norm": 0.7575809359550476, + "learning_rate": 0.00019298345897485298, + "loss": 2.7351, + "step": 2487 + }, + { + "epoch": 0.20079089661851343, + "grad_norm": 0.7674959301948547, + "learning_rate": 0.00019297764858882514, + "loss": 2.7682, + "step": 2488 + }, + { + "epoch": 0.20087160035509644, + "grad_norm": 0.7972592115402222, + "learning_rate": 0.00019297183588555127, + "loss": 2.782, + "step": 2489 + }, + { + "epoch": 0.20095230409167944, + "grad_norm": 0.8417105674743652, + "learning_rate": 0.00019296602086517624, + "loss": 2.8173, + "step": 2490 + }, + { + "epoch": 0.20103300782826244, + "grad_norm": 0.7194239497184753, + "learning_rate": 0.00019296020352784496, + "loss": 2.7735, + "step": 2491 + }, + { + "epoch": 0.20111371156484545, + "grad_norm": 0.801895022392273, + "learning_rate": 0.00019295438387370237, + "loss": 2.7018, + "step": 2492 + }, + { + "epoch": 0.20119441530142845, + "grad_norm": 0.900943398475647, + "learning_rate": 0.0001929485619028936, + "loss": 2.77, + "step": 2493 + }, + { + "epoch": 0.20127511903801146, + "grad_norm": 0.7882106304168701, + "learning_rate": 0.00019294273761556366, + "loss": 2.7195, + "step": 2494 + }, + { + "epoch": 0.20135582277459446, + "grad_norm": 0.7471950054168701, + "learning_rate": 0.00019293691101185775, + "loss": 2.7346, + "step": 2495 + }, + { + "epoch": 0.20143652651117747, + "grad_norm": 0.7498352527618408, + "learning_rate": 0.00019293108209192104, + "loss": 2.7255, + "step": 2496 + }, + { + "epoch": 0.20151723024776047, + "grad_norm": 0.8233164548873901, + "learning_rate": 0.0001929252508558989, + "loss": 2.8253, + "step": 2497 + }, + { + "epoch": 0.20159793398434347, + "grad_norm": 0.7533289790153503, + "learning_rate": 0.00019291941730393658, + "loss": 2.7487, + "step": 2498 + }, + { + "epoch": 0.20167863772092648, + "grad_norm": 0.7372691035270691, + "learning_rate": 0.0001929135814361795, + "loss": 2.6799, + "step": 2499 + }, + { + "epoch": 0.20175934145750948, + "grad_norm": 0.7760890126228333, + "learning_rate": 0.00019290774325277305, + "loss": 2.8366, + "step": 2500 + }, + { + "epoch": 0.2018400451940925, + "grad_norm": 0.7653746008872986, + "learning_rate": 0.0001929019027538628, + "loss": 2.7413, + "step": 2501 + }, + { + "epoch": 0.2019207489306755, + "grad_norm": 0.7364951372146606, + "learning_rate": 0.0001928960599395943, + "loss": 2.7405, + "step": 2502 + }, + { + "epoch": 0.2020014526672585, + "grad_norm": 0.8317872285842896, + "learning_rate": 0.00019289021481011314, + "loss": 2.7186, + "step": 2503 + }, + { + "epoch": 0.2020821564038415, + "grad_norm": 0.8325691223144531, + "learning_rate": 0.00019288436736556502, + "loss": 2.7305, + "step": 2504 + }, + { + "epoch": 0.2021628601404245, + "grad_norm": 0.7674683332443237, + "learning_rate": 0.00019287851760609566, + "loss": 2.7171, + "step": 2505 + }, + { + "epoch": 0.2022435638770075, + "grad_norm": 0.8043155074119568, + "learning_rate": 0.00019287266553185084, + "loss": 2.7425, + "step": 2506 + }, + { + "epoch": 0.2023242676135905, + "grad_norm": 0.8522058725357056, + "learning_rate": 0.00019286681114297642, + "loss": 2.7764, + "step": 2507 + }, + { + "epoch": 0.20240497135017352, + "grad_norm": 0.7700086236000061, + "learning_rate": 0.00019286095443961832, + "loss": 2.7499, + "step": 2508 + }, + { + "epoch": 0.20248567508675652, + "grad_norm": 0.8078013062477112, + "learning_rate": 0.0001928550954219225, + "loss": 2.7863, + "step": 2509 + }, + { + "epoch": 0.20256637882333953, + "grad_norm": 0.7431712746620178, + "learning_rate": 0.00019284923409003496, + "loss": 2.8296, + "step": 2510 + }, + { + "epoch": 0.20264708255992253, + "grad_norm": 0.753754734992981, + "learning_rate": 0.00019284337044410182, + "loss": 2.722, + "step": 2511 + }, + { + "epoch": 0.20272778629650554, + "grad_norm": 0.8117631077766418, + "learning_rate": 0.00019283750448426918, + "loss": 2.7718, + "step": 2512 + }, + { + "epoch": 0.20280849003308854, + "grad_norm": 0.9149020910263062, + "learning_rate": 0.00019283163621068325, + "loss": 2.7416, + "step": 2513 + }, + { + "epoch": 0.20288919376967154, + "grad_norm": 0.8240262866020203, + "learning_rate": 0.0001928257656234903, + "loss": 2.811, + "step": 2514 + }, + { + "epoch": 0.20296989750625455, + "grad_norm": 0.7394035458564758, + "learning_rate": 0.00019281989272283657, + "loss": 2.7345, + "step": 2515 + }, + { + "epoch": 0.20305060124283755, + "grad_norm": 0.7827345132827759, + "learning_rate": 0.00019281401750886854, + "loss": 2.7955, + "step": 2516 + }, + { + "epoch": 0.20313130497942056, + "grad_norm": 0.7482333183288574, + "learning_rate": 0.00019280813998173252, + "loss": 2.6963, + "step": 2517 + }, + { + "epoch": 0.20321200871600356, + "grad_norm": 0.8187180757522583, + "learning_rate": 0.00019280226014157509, + "loss": 2.7413, + "step": 2518 + }, + { + "epoch": 0.20329271245258657, + "grad_norm": 0.7708666920661926, + "learning_rate": 0.00019279637798854274, + "loss": 2.7636, + "step": 2519 + }, + { + "epoch": 0.20337341618916957, + "grad_norm": 0.7414180040359497, + "learning_rate": 0.00019279049352278208, + "loss": 2.7321, + "step": 2520 + }, + { + "epoch": 0.20345411992575257, + "grad_norm": 0.8172248601913452, + "learning_rate": 0.00019278460674443975, + "loss": 2.8026, + "step": 2521 + }, + { + "epoch": 0.20353482366233558, + "grad_norm": 0.7463089227676392, + "learning_rate": 0.0001927787176536625, + "loss": 2.74, + "step": 2522 + }, + { + "epoch": 0.20361552739891858, + "grad_norm": 0.7684210538864136, + "learning_rate": 0.00019277282625059704, + "loss": 2.782, + "step": 2523 + }, + { + "epoch": 0.2036962311355016, + "grad_norm": 0.9246797561645508, + "learning_rate": 0.00019276693253539027, + "loss": 2.8546, + "step": 2524 + }, + { + "epoch": 0.20377693487208456, + "grad_norm": 0.753753125667572, + "learning_rate": 0.00019276103650818906, + "loss": 2.7422, + "step": 2525 + }, + { + "epoch": 0.20385763860866757, + "grad_norm": 0.7461897134780884, + "learning_rate": 0.00019275513816914032, + "loss": 2.7575, + "step": 2526 + }, + { + "epoch": 0.20393834234525057, + "grad_norm": 0.7555257081985474, + "learning_rate": 0.00019274923751839106, + "loss": 2.7423, + "step": 2527 + }, + { + "epoch": 0.20401904608183358, + "grad_norm": 0.7628511786460876, + "learning_rate": 0.00019274333455608837, + "loss": 2.7386, + "step": 2528 + }, + { + "epoch": 0.20409974981841658, + "grad_norm": 0.7529371976852417, + "learning_rate": 0.00019273742928237937, + "loss": 2.6852, + "step": 2529 + }, + { + "epoch": 0.20418045355499959, + "grad_norm": 0.7466779351234436, + "learning_rate": 0.00019273152169741118, + "loss": 2.6996, + "step": 2530 + }, + { + "epoch": 0.2042611572915826, + "grad_norm": 0.7916153073310852, + "learning_rate": 0.0001927256118013311, + "loss": 2.7644, + "step": 2531 + }, + { + "epoch": 0.2043418610281656, + "grad_norm": 0.7662972211837769, + "learning_rate": 0.00019271969959428636, + "loss": 2.7497, + "step": 2532 + }, + { + "epoch": 0.2044225647647486, + "grad_norm": 0.8244680166244507, + "learning_rate": 0.00019271378507642432, + "loss": 2.7598, + "step": 2533 + }, + { + "epoch": 0.2045032685013316, + "grad_norm": 0.7721532583236694, + "learning_rate": 0.00019270786824789244, + "loss": 2.7303, + "step": 2534 + }, + { + "epoch": 0.2045839722379146, + "grad_norm": 0.7598209381103516, + "learning_rate": 0.0001927019491088381, + "loss": 2.734, + "step": 2535 + }, + { + "epoch": 0.2046646759744976, + "grad_norm": 0.7778685092926025, + "learning_rate": 0.00019269602765940887, + "loss": 2.7113, + "step": 2536 + }, + { + "epoch": 0.20474537971108062, + "grad_norm": 0.7447141408920288, + "learning_rate": 0.00019269010389975235, + "loss": 2.7205, + "step": 2537 + }, + { + "epoch": 0.20482608344766362, + "grad_norm": 0.8066664338111877, + "learning_rate": 0.00019268417783001613, + "loss": 2.7637, + "step": 2538 + }, + { + "epoch": 0.20490678718424662, + "grad_norm": 0.7055318355560303, + "learning_rate": 0.00019267824945034794, + "loss": 2.6936, + "step": 2539 + }, + { + "epoch": 0.20498749092082963, + "grad_norm": 0.832647979259491, + "learning_rate": 0.0001926723187608955, + "loss": 2.7423, + "step": 2540 + }, + { + "epoch": 0.20506819465741263, + "grad_norm": 0.7316983938217163, + "learning_rate": 0.0001926663857618066, + "loss": 2.7136, + "step": 2541 + }, + { + "epoch": 0.20514889839399564, + "grad_norm": 0.8115554451942444, + "learning_rate": 0.00019266045045322915, + "loss": 2.6964, + "step": 2542 + }, + { + "epoch": 0.20522960213057864, + "grad_norm": 0.802573025226593, + "learning_rate": 0.00019265451283531108, + "loss": 2.7989, + "step": 2543 + }, + { + "epoch": 0.20531030586716165, + "grad_norm": 0.7073348164558411, + "learning_rate": 0.00019264857290820033, + "loss": 2.7399, + "step": 2544 + }, + { + "epoch": 0.20539100960374465, + "grad_norm": 0.7749258279800415, + "learning_rate": 0.00019264263067204495, + "loss": 2.7321, + "step": 2545 + }, + { + "epoch": 0.20547171334032766, + "grad_norm": 0.7473557591438293, + "learning_rate": 0.00019263668612699305, + "loss": 2.7774, + "step": 2546 + }, + { + "epoch": 0.20555241707691066, + "grad_norm": 0.8073423504829407, + "learning_rate": 0.0001926307392731928, + "loss": 2.7429, + "step": 2547 + }, + { + "epoch": 0.20563312081349366, + "grad_norm": 0.9106586575508118, + "learning_rate": 0.00019262479011079235, + "loss": 2.7972, + "step": 2548 + }, + { + "epoch": 0.20571382455007667, + "grad_norm": 0.7975970506668091, + "learning_rate": 0.00019261883863994002, + "loss": 2.7561, + "step": 2549 + }, + { + "epoch": 0.20579452828665967, + "grad_norm": 0.8967030048370361, + "learning_rate": 0.00019261288486078414, + "loss": 2.7368, + "step": 2550 + }, + { + "epoch": 0.20587523202324268, + "grad_norm": 0.7157345414161682, + "learning_rate": 0.00019260692877347304, + "loss": 2.7329, + "step": 2551 + }, + { + "epoch": 0.20595593575982568, + "grad_norm": 0.8758620619773865, + "learning_rate": 0.00019260097037815524, + "loss": 2.7522, + "step": 2552 + }, + { + "epoch": 0.20603663949640869, + "grad_norm": 0.7948124408721924, + "learning_rate": 0.00019259500967497916, + "loss": 2.7675, + "step": 2553 + }, + { + "epoch": 0.2061173432329917, + "grad_norm": 0.8233941197395325, + "learning_rate": 0.00019258904666409344, + "loss": 2.7728, + "step": 2554 + }, + { + "epoch": 0.2061980469695747, + "grad_norm": 0.8084299564361572, + "learning_rate": 0.0001925830813456466, + "loss": 2.7728, + "step": 2555 + }, + { + "epoch": 0.2062787507061577, + "grad_norm": 0.8004557490348816, + "learning_rate": 0.00019257711371978737, + "loss": 2.7783, + "step": 2556 + }, + { + "epoch": 0.2063594544427407, + "grad_norm": 0.7999755144119263, + "learning_rate": 0.0001925711437866645, + "loss": 2.7632, + "step": 2557 + }, + { + "epoch": 0.2064401581793237, + "grad_norm": 0.7317264080047607, + "learning_rate": 0.0001925651715464267, + "loss": 2.7101, + "step": 2558 + }, + { + "epoch": 0.2065208619159067, + "grad_norm": 0.7906385660171509, + "learning_rate": 0.00019255919699922287, + "loss": 2.7258, + "step": 2559 + }, + { + "epoch": 0.20660156565248972, + "grad_norm": 0.7932917475700378, + "learning_rate": 0.0001925532201452019, + "loss": 2.7714, + "step": 2560 + }, + { + "epoch": 0.20668226938907272, + "grad_norm": 0.8039286732673645, + "learning_rate": 0.00019254724098451275, + "loss": 2.7469, + "step": 2561 + }, + { + "epoch": 0.20676297312565572, + "grad_norm": 0.79400634765625, + "learning_rate": 0.00019254125951730444, + "loss": 2.7499, + "step": 2562 + }, + { + "epoch": 0.20684367686223873, + "grad_norm": 0.8072263598442078, + "learning_rate": 0.00019253527574372603, + "loss": 2.7805, + "step": 2563 + }, + { + "epoch": 0.20692438059882173, + "grad_norm": 0.7117579579353333, + "learning_rate": 0.00019252928966392667, + "loss": 2.7321, + "step": 2564 + }, + { + "epoch": 0.20700508433540474, + "grad_norm": 0.7080324292182922, + "learning_rate": 0.00019252330127805554, + "loss": 2.7225, + "step": 2565 + }, + { + "epoch": 0.20708578807198774, + "grad_norm": 0.7276670336723328, + "learning_rate": 0.00019251731058626186, + "loss": 2.7592, + "step": 2566 + }, + { + "epoch": 0.20716649180857075, + "grad_norm": 0.8030811548233032, + "learning_rate": 0.00019251131758869495, + "loss": 2.7184, + "step": 2567 + }, + { + "epoch": 0.20724719554515375, + "grad_norm": 0.7808283567428589, + "learning_rate": 0.0001925053222855042, + "loss": 2.7504, + "step": 2568 + }, + { + "epoch": 0.20732789928173675, + "grad_norm": 0.783225953578949, + "learning_rate": 0.00019249932467683902, + "loss": 2.7125, + "step": 2569 + }, + { + "epoch": 0.20740860301831976, + "grad_norm": 0.7440134286880493, + "learning_rate": 0.00019249332476284887, + "loss": 2.7938, + "step": 2570 + }, + { + "epoch": 0.20748930675490276, + "grad_norm": 0.8729553818702698, + "learning_rate": 0.00019248732254368328, + "loss": 2.8338, + "step": 2571 + }, + { + "epoch": 0.20757001049148577, + "grad_norm": 0.8170497417449951, + "learning_rate": 0.0001924813180194918, + "loss": 2.7254, + "step": 2572 + }, + { + "epoch": 0.20765071422806877, + "grad_norm": 0.733220100402832, + "learning_rate": 0.00019247531119042418, + "loss": 2.6401, + "step": 2573 + }, + { + "epoch": 0.20773141796465178, + "grad_norm": 0.7247937917709351, + "learning_rate": 0.00019246930205663008, + "loss": 2.736, + "step": 2574 + }, + { + "epoch": 0.20781212170123478, + "grad_norm": 0.7880212068557739, + "learning_rate": 0.00019246329061825925, + "loss": 2.7173, + "step": 2575 + }, + { + "epoch": 0.20789282543781776, + "grad_norm": 0.820808470249176, + "learning_rate": 0.00019245727687546149, + "loss": 2.7331, + "step": 2576 + }, + { + "epoch": 0.20797352917440076, + "grad_norm": 0.8605412840843201, + "learning_rate": 0.00019245126082838673, + "loss": 2.761, + "step": 2577 + }, + { + "epoch": 0.20805423291098377, + "grad_norm": 0.763506293296814, + "learning_rate": 0.00019244524247718486, + "loss": 2.7053, + "step": 2578 + }, + { + "epoch": 0.20813493664756677, + "grad_norm": 0.8428114652633667, + "learning_rate": 0.00019243922182200592, + "loss": 2.724, + "step": 2579 + }, + { + "epoch": 0.20821564038414977, + "grad_norm": 0.821986734867096, + "learning_rate": 0.0001924331988629999, + "loss": 2.7615, + "step": 2580 + }, + { + "epoch": 0.20829634412073278, + "grad_norm": 0.8177430629730225, + "learning_rate": 0.00019242717360031693, + "loss": 2.7012, + "step": 2581 + }, + { + "epoch": 0.20837704785731578, + "grad_norm": 0.7584180235862732, + "learning_rate": 0.00019242114603410724, + "loss": 2.7372, + "step": 2582 + }, + { + "epoch": 0.2084577515938988, + "grad_norm": 0.9384645223617554, + "learning_rate": 0.00019241511616452096, + "loss": 2.695, + "step": 2583 + }, + { + "epoch": 0.2085384553304818, + "grad_norm": 0.8518964648246765, + "learning_rate": 0.00019240908399170844, + "loss": 2.8216, + "step": 2584 + }, + { + "epoch": 0.2086191590670648, + "grad_norm": 0.9082949161529541, + "learning_rate": 0.00019240304951581995, + "loss": 2.777, + "step": 2585 + }, + { + "epoch": 0.2086998628036478, + "grad_norm": 0.7906371355056763, + "learning_rate": 0.00019239701273700597, + "loss": 2.7083, + "step": 2586 + }, + { + "epoch": 0.2087805665402308, + "grad_norm": 0.7711954712867737, + "learning_rate": 0.00019239097365541686, + "loss": 2.6907, + "step": 2587 + }, + { + "epoch": 0.2088612702768138, + "grad_norm": 0.8155506253242493, + "learning_rate": 0.0001923849322712032, + "loss": 2.7602, + "step": 2588 + }, + { + "epoch": 0.20894197401339681, + "grad_norm": 0.8843441009521484, + "learning_rate": 0.0001923788885845155, + "loss": 2.7525, + "step": 2589 + }, + { + "epoch": 0.20902267774997982, + "grad_norm": 0.7336379289627075, + "learning_rate": 0.00019237284259550444, + "loss": 2.731, + "step": 2590 + }, + { + "epoch": 0.20910338148656282, + "grad_norm": 0.8261263370513916, + "learning_rate": 0.00019236679430432066, + "loss": 2.6493, + "step": 2591 + }, + { + "epoch": 0.20918408522314583, + "grad_norm": 0.7716216444969177, + "learning_rate": 0.00019236074371111497, + "loss": 2.7775, + "step": 2592 + }, + { + "epoch": 0.20926478895972883, + "grad_norm": 0.8390100598335266, + "learning_rate": 0.00019235469081603808, + "loss": 2.7532, + "step": 2593 + }, + { + "epoch": 0.20934549269631184, + "grad_norm": 0.8388446569442749, + "learning_rate": 0.00019234863561924087, + "loss": 2.8171, + "step": 2594 + }, + { + "epoch": 0.20942619643289484, + "grad_norm": 0.8003209829330444, + "learning_rate": 0.00019234257812087425, + "loss": 2.7385, + "step": 2595 + }, + { + "epoch": 0.20950690016947784, + "grad_norm": 0.8008458018302917, + "learning_rate": 0.00019233651832108918, + "loss": 2.7366, + "step": 2596 + }, + { + "epoch": 0.20958760390606085, + "grad_norm": 0.7701897025108337, + "learning_rate": 0.00019233045622003676, + "loss": 2.69, + "step": 2597 + }, + { + "epoch": 0.20966830764264385, + "grad_norm": 0.8106730580329895, + "learning_rate": 0.00019232439181786796, + "loss": 2.6911, + "step": 2598 + }, + { + "epoch": 0.20974901137922686, + "grad_norm": 0.9580766558647156, + "learning_rate": 0.00019231832511473401, + "loss": 2.7663, + "step": 2599 + }, + { + "epoch": 0.20982971511580986, + "grad_norm": 0.7851876616477966, + "learning_rate": 0.0001923122561107861, + "loss": 2.7632, + "step": 2600 + }, + { + "epoch": 0.20991041885239287, + "grad_norm": 0.8160942196846008, + "learning_rate": 0.0001923061848061754, + "loss": 2.8533, + "step": 2601 + }, + { + "epoch": 0.20999112258897587, + "grad_norm": 0.8540663719177246, + "learning_rate": 0.00019230011120105334, + "loss": 2.7083, + "step": 2602 + }, + { + "epoch": 0.21007182632555887, + "grad_norm": 0.8273833394050598, + "learning_rate": 0.0001922940352955712, + "loss": 2.7916, + "step": 2603 + }, + { + "epoch": 0.21015253006214188, + "grad_norm": 0.8394255638122559, + "learning_rate": 0.00019228795708988046, + "loss": 2.8561, + "step": 2604 + }, + { + "epoch": 0.21023323379872488, + "grad_norm": 0.8291410803794861, + "learning_rate": 0.00019228187658413258, + "loss": 2.7462, + "step": 2605 + }, + { + "epoch": 0.2103139375353079, + "grad_norm": 0.7984235286712646, + "learning_rate": 0.00019227579377847912, + "loss": 2.7459, + "step": 2606 + }, + { + "epoch": 0.2103946412718909, + "grad_norm": 0.8343340158462524, + "learning_rate": 0.00019226970867307163, + "loss": 2.6963, + "step": 2607 + }, + { + "epoch": 0.2104753450084739, + "grad_norm": 0.6982808709144592, + "learning_rate": 0.00019226362126806184, + "loss": 2.7333, + "step": 2608 + }, + { + "epoch": 0.2105560487450569, + "grad_norm": 0.8039572834968567, + "learning_rate": 0.0001922575315636014, + "loss": 2.7253, + "step": 2609 + }, + { + "epoch": 0.2106367524816399, + "grad_norm": 0.8708705902099609, + "learning_rate": 0.00019225143955984214, + "loss": 2.7555, + "step": 2610 + }, + { + "epoch": 0.2107174562182229, + "grad_norm": 0.8773347735404968, + "learning_rate": 0.00019224534525693585, + "loss": 2.7598, + "step": 2611 + }, + { + "epoch": 0.2107981599548059, + "grad_norm": 0.8151054978370667, + "learning_rate": 0.0001922392486550344, + "loss": 2.7398, + "step": 2612 + }, + { + "epoch": 0.21087886369138892, + "grad_norm": 0.7922329306602478, + "learning_rate": 0.0001922331497542898, + "loss": 2.7296, + "step": 2613 + }, + { + "epoch": 0.21095956742797192, + "grad_norm": 0.7536506652832031, + "learning_rate": 0.00019222704855485396, + "loss": 2.7897, + "step": 2614 + }, + { + "epoch": 0.21104027116455493, + "grad_norm": 0.7539274096488953, + "learning_rate": 0.000192220945056879, + "loss": 2.7809, + "step": 2615 + }, + { + "epoch": 0.21112097490113793, + "grad_norm": 0.7737646698951721, + "learning_rate": 0.00019221483926051705, + "loss": 2.7195, + "step": 2616 + }, + { + "epoch": 0.21120167863772094, + "grad_norm": 0.7421913743019104, + "learning_rate": 0.00019220873116592024, + "loss": 2.6817, + "step": 2617 + }, + { + "epoch": 0.21128238237430394, + "grad_norm": 0.7872927784919739, + "learning_rate": 0.0001922026207732408, + "loss": 2.7379, + "step": 2618 + }, + { + "epoch": 0.21136308611088694, + "grad_norm": 0.7950671315193176, + "learning_rate": 0.00019219650808263104, + "loss": 2.7135, + "step": 2619 + }, + { + "epoch": 0.21144378984746995, + "grad_norm": 0.7711792588233948, + "learning_rate": 0.0001921903930942433, + "loss": 2.7021, + "step": 2620 + }, + { + "epoch": 0.21152449358405295, + "grad_norm": 0.9030743837356567, + "learning_rate": 0.00019218427580822996, + "loss": 2.8083, + "step": 2621 + }, + { + "epoch": 0.21160519732063596, + "grad_norm": 0.8191907405853271, + "learning_rate": 0.0001921781562247435, + "loss": 2.6998, + "step": 2622 + }, + { + "epoch": 0.21168590105721896, + "grad_norm": 0.7883538603782654, + "learning_rate": 0.00019217203434393644, + "loss": 2.7573, + "step": 2623 + }, + { + "epoch": 0.21176660479380197, + "grad_norm": 0.7565868496894836, + "learning_rate": 0.00019216591016596134, + "loss": 2.7725, + "step": 2624 + }, + { + "epoch": 0.21184730853038497, + "grad_norm": 0.8579828143119812, + "learning_rate": 0.00019215978369097086, + "loss": 2.7529, + "step": 2625 + }, + { + "epoch": 0.21192801226696797, + "grad_norm": 0.7835422158241272, + "learning_rate": 0.0001921536549191176, + "loss": 2.6926, + "step": 2626 + }, + { + "epoch": 0.21200871600355095, + "grad_norm": 0.8041907548904419, + "learning_rate": 0.00019214752385055442, + "loss": 2.7541, + "step": 2627 + }, + { + "epoch": 0.21208941974013396, + "grad_norm": 0.7754014730453491, + "learning_rate": 0.00019214139048543406, + "loss": 2.6807, + "step": 2628 + }, + { + "epoch": 0.21217012347671696, + "grad_norm": 0.8222344517707825, + "learning_rate": 0.00019213525482390936, + "loss": 2.7339, + "step": 2629 + }, + { + "epoch": 0.21225082721329996, + "grad_norm": 0.8083673715591431, + "learning_rate": 0.0001921291168661333, + "loss": 2.739, + "step": 2630 + }, + { + "epoch": 0.21233153094988297, + "grad_norm": 0.8039100766181946, + "learning_rate": 0.0001921229766122588, + "loss": 2.7372, + "step": 2631 + }, + { + "epoch": 0.21241223468646597, + "grad_norm": 0.7513072490692139, + "learning_rate": 0.00019211683406243892, + "loss": 2.7284, + "step": 2632 + }, + { + "epoch": 0.21249293842304898, + "grad_norm": 0.7653890252113342, + "learning_rate": 0.00019211068921682673, + "loss": 2.6911, + "step": 2633 + }, + { + "epoch": 0.21257364215963198, + "grad_norm": 0.7210217714309692, + "learning_rate": 0.00019210454207557542, + "loss": 2.6989, + "step": 2634 + }, + { + "epoch": 0.21265434589621499, + "grad_norm": 0.7389202117919922, + "learning_rate": 0.00019209839263883814, + "loss": 2.7016, + "step": 2635 + }, + { + "epoch": 0.212735049632798, + "grad_norm": 0.8069031238555908, + "learning_rate": 0.00019209224090676813, + "loss": 2.8213, + "step": 2636 + }, + { + "epoch": 0.212815753369381, + "grad_norm": 0.8019161224365234, + "learning_rate": 0.00019208608687951877, + "loss": 2.7413, + "step": 2637 + }, + { + "epoch": 0.212896457105964, + "grad_norm": 0.775572657585144, + "learning_rate": 0.00019207993055724343, + "loss": 2.7016, + "step": 2638 + }, + { + "epoch": 0.212977160842547, + "grad_norm": 0.7482941746711731, + "learning_rate": 0.0001920737719400955, + "loss": 2.7991, + "step": 2639 + }, + { + "epoch": 0.21305786457913, + "grad_norm": 0.8467636704444885, + "learning_rate": 0.0001920676110282285, + "loss": 2.7401, + "step": 2640 + }, + { + "epoch": 0.213138568315713, + "grad_norm": 0.8726305365562439, + "learning_rate": 0.00019206144782179597, + "loss": 2.7599, + "step": 2641 + }, + { + "epoch": 0.21321927205229602, + "grad_norm": 0.740527868270874, + "learning_rate": 0.00019205528232095148, + "loss": 2.7326, + "step": 2642 + }, + { + "epoch": 0.21329997578887902, + "grad_norm": 0.7932354211807251, + "learning_rate": 0.00019204911452584873, + "loss": 2.7873, + "step": 2643 + }, + { + "epoch": 0.21338067952546202, + "grad_norm": 0.7994125485420227, + "learning_rate": 0.00019204294443664143, + "loss": 2.7305, + "step": 2644 + }, + { + "epoch": 0.21346138326204503, + "grad_norm": 0.880557656288147, + "learning_rate": 0.00019203677205348338, + "loss": 2.7295, + "step": 2645 + }, + { + "epoch": 0.21354208699862803, + "grad_norm": 0.8269557952880859, + "learning_rate": 0.00019203059737652836, + "loss": 2.765, + "step": 2646 + }, + { + "epoch": 0.21362279073521104, + "grad_norm": 0.8732784986495972, + "learning_rate": 0.00019202442040593026, + "loss": 2.6742, + "step": 2647 + }, + { + "epoch": 0.21370349447179404, + "grad_norm": 0.7921704649925232, + "learning_rate": 0.0001920182411418431, + "loss": 2.7144, + "step": 2648 + }, + { + "epoch": 0.21378419820837705, + "grad_norm": 0.8097628355026245, + "learning_rate": 0.00019201205958442082, + "loss": 2.7513, + "step": 2649 + }, + { + "epoch": 0.21386490194496005, + "grad_norm": 0.8230542540550232, + "learning_rate": 0.00019200587573381744, + "loss": 2.7648, + "step": 2650 + }, + { + "epoch": 0.21394560568154306, + "grad_norm": 0.7719153761863708, + "learning_rate": 0.0001919996895901872, + "loss": 2.7637, + "step": 2651 + }, + { + "epoch": 0.21402630941812606, + "grad_norm": 0.9022669792175293, + "learning_rate": 0.00019199350115368415, + "loss": 2.7707, + "step": 2652 + }, + { + "epoch": 0.21410701315470906, + "grad_norm": 0.8111257553100586, + "learning_rate": 0.00019198731042446263, + "loss": 2.7423, + "step": 2653 + }, + { + "epoch": 0.21418771689129207, + "grad_norm": 0.7534981966018677, + "learning_rate": 0.00019198111740267683, + "loss": 2.7474, + "step": 2654 + }, + { + "epoch": 0.21426842062787507, + "grad_norm": 0.761411190032959, + "learning_rate": 0.00019197492208848117, + "loss": 2.7541, + "step": 2655 + }, + { + "epoch": 0.21434912436445808, + "grad_norm": 0.8076324462890625, + "learning_rate": 0.00019196872448203002, + "loss": 2.7198, + "step": 2656 + }, + { + "epoch": 0.21442982810104108, + "grad_norm": 0.7987746000289917, + "learning_rate": 0.00019196252458347784, + "loss": 2.7164, + "step": 2657 + }, + { + "epoch": 0.21451053183762409, + "grad_norm": 0.7581545114517212, + "learning_rate": 0.0001919563223929792, + "loss": 2.6837, + "step": 2658 + }, + { + "epoch": 0.2145912355742071, + "grad_norm": 0.8773601055145264, + "learning_rate": 0.00019195011791068857, + "loss": 2.8248, + "step": 2659 + }, + { + "epoch": 0.2146719393107901, + "grad_norm": 0.7027503252029419, + "learning_rate": 0.00019194391113676066, + "loss": 2.6726, + "step": 2660 + }, + { + "epoch": 0.2147526430473731, + "grad_norm": 0.8650866746902466, + "learning_rate": 0.00019193770207135015, + "loss": 2.7348, + "step": 2661 + }, + { + "epoch": 0.2148333467839561, + "grad_norm": 0.8521862030029297, + "learning_rate": 0.0001919314907146118, + "loss": 2.7409, + "step": 2662 + }, + { + "epoch": 0.2149140505205391, + "grad_norm": 0.8098535537719727, + "learning_rate": 0.00019192527706670033, + "loss": 2.7615, + "step": 2663 + }, + { + "epoch": 0.2149947542571221, + "grad_norm": 0.7396193146705627, + "learning_rate": 0.0001919190611277707, + "loss": 2.7191, + "step": 2664 + }, + { + "epoch": 0.21507545799370512, + "grad_norm": 0.8245799541473389, + "learning_rate": 0.00019191284289797776, + "loss": 2.7429, + "step": 2665 + }, + { + "epoch": 0.21515616173028812, + "grad_norm": 0.791646420955658, + "learning_rate": 0.00019190662237747656, + "loss": 2.7197, + "step": 2666 + }, + { + "epoch": 0.21523686546687112, + "grad_norm": 0.7850802540779114, + "learning_rate": 0.00019190039956642205, + "loss": 2.7353, + "step": 2667 + }, + { + "epoch": 0.21531756920345413, + "grad_norm": 0.7657971978187561, + "learning_rate": 0.00019189417446496937, + "loss": 2.7083, + "step": 2668 + }, + { + "epoch": 0.21539827294003713, + "grad_norm": 0.7704403400421143, + "learning_rate": 0.00019188794707327363, + "loss": 2.7813, + "step": 2669 + }, + { + "epoch": 0.21547897667662014, + "grad_norm": 0.7345917224884033, + "learning_rate": 0.00019188171739149005, + "loss": 2.7098, + "step": 2670 + }, + { + "epoch": 0.21555968041320314, + "grad_norm": 0.728831946849823, + "learning_rate": 0.00019187548541977392, + "loss": 2.6745, + "step": 2671 + }, + { + "epoch": 0.21564038414978615, + "grad_norm": 0.8079627156257629, + "learning_rate": 0.0001918692511582805, + "loss": 2.6427, + "step": 2672 + }, + { + "epoch": 0.21572108788636915, + "grad_norm": 0.766808032989502, + "learning_rate": 0.0001918630146071652, + "loss": 2.6956, + "step": 2673 + }, + { + "epoch": 0.21580179162295215, + "grad_norm": 0.7555391192436218, + "learning_rate": 0.00019185677576658345, + "loss": 2.6499, + "step": 2674 + }, + { + "epoch": 0.21588249535953516, + "grad_norm": 0.7740229964256287, + "learning_rate": 0.00019185053463669074, + "loss": 2.7685, + "step": 2675 + }, + { + "epoch": 0.21596319909611816, + "grad_norm": 0.8272803425788879, + "learning_rate": 0.00019184429121764257, + "loss": 2.7272, + "step": 2676 + }, + { + "epoch": 0.21604390283270117, + "grad_norm": 0.870625376701355, + "learning_rate": 0.00019183804550959463, + "loss": 2.7509, + "step": 2677 + }, + { + "epoch": 0.21612460656928414, + "grad_norm": 0.8021238446235657, + "learning_rate": 0.0001918317975127025, + "loss": 2.7058, + "step": 2678 + }, + { + "epoch": 0.21620531030586715, + "grad_norm": 0.729918897151947, + "learning_rate": 0.00019182554722712192, + "loss": 2.6145, + "step": 2679 + }, + { + "epoch": 0.21628601404245015, + "grad_norm": 0.7658380270004272, + "learning_rate": 0.00019181929465300867, + "loss": 2.712, + "step": 2680 + }, + { + "epoch": 0.21636671777903316, + "grad_norm": 0.7702174186706543, + "learning_rate": 0.00019181303979051858, + "loss": 2.8257, + "step": 2681 + }, + { + "epoch": 0.21644742151561616, + "grad_norm": 0.7782231569290161, + "learning_rate": 0.00019180678263980755, + "loss": 2.8226, + "step": 2682 + }, + { + "epoch": 0.21652812525219917, + "grad_norm": 0.7448495626449585, + "learning_rate": 0.0001918005232010315, + "loss": 2.7877, + "step": 2683 + }, + { + "epoch": 0.21660882898878217, + "grad_norm": 0.7273527979850769, + "learning_rate": 0.00019179426147434647, + "loss": 2.7169, + "step": 2684 + }, + { + "epoch": 0.21668953272536517, + "grad_norm": 0.7730992436408997, + "learning_rate": 0.00019178799745990846, + "loss": 2.717, + "step": 2685 + }, + { + "epoch": 0.21677023646194818, + "grad_norm": 0.7709231376647949, + "learning_rate": 0.0001917817311578736, + "loss": 2.7676, + "step": 2686 + }, + { + "epoch": 0.21685094019853118, + "grad_norm": 0.7825181484222412, + "learning_rate": 0.00019177546256839812, + "loss": 2.7473, + "step": 2687 + }, + { + "epoch": 0.2169316439351142, + "grad_norm": 0.8133581280708313, + "learning_rate": 0.0001917691916916382, + "loss": 2.7242, + "step": 2688 + }, + { + "epoch": 0.2170123476716972, + "grad_norm": 0.7833015322685242, + "learning_rate": 0.00019176291852775011, + "loss": 2.8128, + "step": 2689 + }, + { + "epoch": 0.2170930514082802, + "grad_norm": 0.7423487305641174, + "learning_rate": 0.00019175664307689028, + "loss": 2.6999, + "step": 2690 + }, + { + "epoch": 0.2171737551448632, + "grad_norm": 0.7881289124488831, + "learning_rate": 0.000191750365339215, + "loss": 2.7349, + "step": 2691 + }, + { + "epoch": 0.2172544588814462, + "grad_norm": 0.8316197395324707, + "learning_rate": 0.00019174408531488077, + "loss": 2.7654, + "step": 2692 + }, + { + "epoch": 0.2173351626180292, + "grad_norm": 0.7589917778968811, + "learning_rate": 0.00019173780300404413, + "loss": 2.6815, + "step": 2693 + }, + { + "epoch": 0.21741586635461221, + "grad_norm": 0.7752439975738525, + "learning_rate": 0.00019173151840686163, + "loss": 2.7804, + "step": 2694 + }, + { + "epoch": 0.21749657009119522, + "grad_norm": 0.8156552910804749, + "learning_rate": 0.0001917252315234899, + "loss": 2.7325, + "step": 2695 + }, + { + "epoch": 0.21757727382777822, + "grad_norm": 0.8886982798576355, + "learning_rate": 0.00019171894235408564, + "loss": 2.7257, + "step": 2696 + }, + { + "epoch": 0.21765797756436123, + "grad_norm": 0.8270704746246338, + "learning_rate": 0.00019171265089880558, + "loss": 2.7357, + "step": 2697 + }, + { + "epoch": 0.21773868130094423, + "grad_norm": 0.807700514793396, + "learning_rate": 0.00019170635715780651, + "loss": 2.7488, + "step": 2698 + }, + { + "epoch": 0.21781938503752724, + "grad_norm": 0.8195288181304932, + "learning_rate": 0.00019170006113124533, + "loss": 2.7048, + "step": 2699 + }, + { + "epoch": 0.21790008877411024, + "grad_norm": 0.817097008228302, + "learning_rate": 0.00019169376281927888, + "loss": 2.7148, + "step": 2700 + }, + { + "epoch": 0.21798079251069324, + "grad_norm": 0.8415588140487671, + "learning_rate": 0.0001916874622220642, + "loss": 2.7376, + "step": 2701 + }, + { + "epoch": 0.21806149624727625, + "grad_norm": 0.8004198670387268, + "learning_rate": 0.00019168115933975826, + "loss": 2.7145, + "step": 2702 + }, + { + "epoch": 0.21814219998385925, + "grad_norm": 0.8167368769645691, + "learning_rate": 0.0001916748541725182, + "loss": 2.6923, + "step": 2703 + }, + { + "epoch": 0.21822290372044226, + "grad_norm": 0.8877980709075928, + "learning_rate": 0.0001916685467205011, + "loss": 2.8232, + "step": 2704 + }, + { + "epoch": 0.21830360745702526, + "grad_norm": 0.7835622429847717, + "learning_rate": 0.00019166223698386422, + "loss": 2.7797, + "step": 2705 + }, + { + "epoch": 0.21838431119360827, + "grad_norm": 0.8023552894592285, + "learning_rate": 0.00019165592496276477, + "loss": 2.6697, + "step": 2706 + }, + { + "epoch": 0.21846501493019127, + "grad_norm": 0.8549069166183472, + "learning_rate": 0.00019164961065736008, + "loss": 2.729, + "step": 2707 + }, + { + "epoch": 0.21854571866677427, + "grad_norm": 0.8561950325965881, + "learning_rate": 0.00019164329406780753, + "loss": 2.772, + "step": 2708 + }, + { + "epoch": 0.21862642240335728, + "grad_norm": 0.6979276537895203, + "learning_rate": 0.00019163697519426453, + "loss": 2.7195, + "step": 2709 + }, + { + "epoch": 0.21870712613994028, + "grad_norm": 0.7659175395965576, + "learning_rate": 0.00019163065403688856, + "loss": 2.7742, + "step": 2710 + }, + { + "epoch": 0.2187878298765233, + "grad_norm": 0.8621466755867004, + "learning_rate": 0.00019162433059583718, + "loss": 2.721, + "step": 2711 + }, + { + "epoch": 0.2188685336131063, + "grad_norm": 0.8086833357810974, + "learning_rate": 0.00019161800487126795, + "loss": 2.7356, + "step": 2712 + }, + { + "epoch": 0.2189492373496893, + "grad_norm": 0.816215455532074, + "learning_rate": 0.00019161167686333855, + "loss": 2.7159, + "step": 2713 + }, + { + "epoch": 0.2190299410862723, + "grad_norm": 0.9180822968482971, + "learning_rate": 0.0001916053465722067, + "loss": 2.7162, + "step": 2714 + }, + { + "epoch": 0.2191106448228553, + "grad_norm": 0.7547199130058289, + "learning_rate": 0.00019159901399803014, + "loss": 2.7338, + "step": 2715 + }, + { + "epoch": 0.2191913485594383, + "grad_norm": 0.7380769848823547, + "learning_rate": 0.00019159267914096675, + "loss": 2.7149, + "step": 2716 + }, + { + "epoch": 0.2192720522960213, + "grad_norm": 0.7242285013198853, + "learning_rate": 0.00019158634200117433, + "loss": 2.724, + "step": 2717 + }, + { + "epoch": 0.21935275603260432, + "grad_norm": 0.8400316834449768, + "learning_rate": 0.00019158000257881087, + "loss": 2.7528, + "step": 2718 + }, + { + "epoch": 0.21943345976918732, + "grad_norm": 0.8437172770500183, + "learning_rate": 0.00019157366087403435, + "loss": 2.7872, + "step": 2719 + }, + { + "epoch": 0.21951416350577033, + "grad_norm": 0.7428301572799683, + "learning_rate": 0.00019156731688700282, + "loss": 2.6831, + "step": 2720 + }, + { + "epoch": 0.21959486724235333, + "grad_norm": 0.7589641213417053, + "learning_rate": 0.00019156097061787445, + "loss": 2.7105, + "step": 2721 + }, + { + "epoch": 0.21967557097893634, + "grad_norm": 0.7607305645942688, + "learning_rate": 0.00019155462206680727, + "loss": 2.7913, + "step": 2722 + }, + { + "epoch": 0.21975627471551934, + "grad_norm": 0.7455689311027527, + "learning_rate": 0.00019154827123395963, + "loss": 2.6321, + "step": 2723 + }, + { + "epoch": 0.21983697845210234, + "grad_norm": 0.7860318422317505, + "learning_rate": 0.00019154191811948974, + "loss": 2.7907, + "step": 2724 + }, + { + "epoch": 0.21991768218868535, + "grad_norm": 0.8101385235786438, + "learning_rate": 0.00019153556272355596, + "loss": 2.7682, + "step": 2725 + }, + { + "epoch": 0.21999838592526835, + "grad_norm": 0.7437283396720886, + "learning_rate": 0.00019152920504631667, + "loss": 2.7271, + "step": 2726 + }, + { + "epoch": 0.22007908966185136, + "grad_norm": 0.7390851974487305, + "learning_rate": 0.00019152284508793034, + "loss": 2.7492, + "step": 2727 + }, + { + "epoch": 0.22015979339843436, + "grad_norm": 0.9074966311454773, + "learning_rate": 0.0001915164828485555, + "loss": 2.8076, + "step": 2728 + }, + { + "epoch": 0.22024049713501734, + "grad_norm": 0.7644218802452087, + "learning_rate": 0.00019151011832835063, + "loss": 2.7238, + "step": 2729 + }, + { + "epoch": 0.22032120087160034, + "grad_norm": 0.823567807674408, + "learning_rate": 0.0001915037515274744, + "loss": 2.7701, + "step": 2730 + }, + { + "epoch": 0.22040190460818335, + "grad_norm": 0.7601858377456665, + "learning_rate": 0.00019149738244608552, + "loss": 2.6981, + "step": 2731 + }, + { + "epoch": 0.22048260834476635, + "grad_norm": 0.8242961764335632, + "learning_rate": 0.00019149101108434269, + "loss": 2.6916, + "step": 2732 + }, + { + "epoch": 0.22056331208134936, + "grad_norm": 0.7970656156539917, + "learning_rate": 0.0001914846374424047, + "loss": 2.7858, + "step": 2733 + }, + { + "epoch": 0.22064401581793236, + "grad_norm": 0.7844050526618958, + "learning_rate": 0.0001914782615204304, + "loss": 2.6782, + "step": 2734 + }, + { + "epoch": 0.22072471955451536, + "grad_norm": 0.7965044975280762, + "learning_rate": 0.00019147188331857868, + "loss": 2.7563, + "step": 2735 + }, + { + "epoch": 0.22080542329109837, + "grad_norm": 0.8189071416854858, + "learning_rate": 0.00019146550283700856, + "loss": 2.7587, + "step": 2736 + }, + { + "epoch": 0.22088612702768137, + "grad_norm": 0.7610960602760315, + "learning_rate": 0.00019145912007587898, + "loss": 2.663, + "step": 2737 + }, + { + "epoch": 0.22096683076426438, + "grad_norm": 0.7642313838005066, + "learning_rate": 0.00019145273503534907, + "loss": 2.78, + "step": 2738 + }, + { + "epoch": 0.22104753450084738, + "grad_norm": 0.7699539065361023, + "learning_rate": 0.0001914463477155779, + "loss": 2.7429, + "step": 2739 + }, + { + "epoch": 0.22112823823743039, + "grad_norm": 0.7674413919448853, + "learning_rate": 0.00019143995811672477, + "loss": 2.7048, + "step": 2740 + }, + { + "epoch": 0.2212089419740134, + "grad_norm": 0.7871866226196289, + "learning_rate": 0.00019143356623894882, + "loss": 2.7769, + "step": 2741 + }, + { + "epoch": 0.2212896457105964, + "grad_norm": 0.8453468680381775, + "learning_rate": 0.00019142717208240937, + "loss": 2.7677, + "step": 2742 + }, + { + "epoch": 0.2213703494471794, + "grad_norm": 0.8050780892372131, + "learning_rate": 0.00019142077564726582, + "loss": 2.7809, + "step": 2743 + }, + { + "epoch": 0.2214510531837624, + "grad_norm": 0.811287522315979, + "learning_rate": 0.0001914143769336776, + "loss": 2.7201, + "step": 2744 + }, + { + "epoch": 0.2215317569203454, + "grad_norm": 0.823106050491333, + "learning_rate": 0.00019140797594180412, + "loss": 2.7371, + "step": 2745 + }, + { + "epoch": 0.2216124606569284, + "grad_norm": 0.778126060962677, + "learning_rate": 0.0001914015726718049, + "loss": 2.6925, + "step": 2746 + }, + { + "epoch": 0.22169316439351142, + "grad_norm": 0.8240278959274292, + "learning_rate": 0.0001913951671238396, + "loss": 2.7227, + "step": 2747 + }, + { + "epoch": 0.22177386813009442, + "grad_norm": 0.8061805963516235, + "learning_rate": 0.0001913887592980678, + "loss": 2.7092, + "step": 2748 + }, + { + "epoch": 0.22185457186667742, + "grad_norm": 0.9111800789833069, + "learning_rate": 0.00019138234919464925, + "loss": 2.7364, + "step": 2749 + }, + { + "epoch": 0.22193527560326043, + "grad_norm": 0.8154863715171814, + "learning_rate": 0.0001913759368137437, + "loss": 2.6983, + "step": 2750 + }, + { + "epoch": 0.22201597933984343, + "grad_norm": 0.8547734022140503, + "learning_rate": 0.0001913695221555109, + "loss": 2.7016, + "step": 2751 + }, + { + "epoch": 0.22209668307642644, + "grad_norm": 0.7488531470298767, + "learning_rate": 0.00019136310522011079, + "loss": 2.6641, + "step": 2752 + }, + { + "epoch": 0.22217738681300944, + "grad_norm": 0.9118027091026306, + "learning_rate": 0.00019135668600770326, + "loss": 2.6965, + "step": 2753 + }, + { + "epoch": 0.22225809054959245, + "grad_norm": 0.7629117369651794, + "learning_rate": 0.00019135026451844834, + "loss": 2.7836, + "step": 2754 + }, + { + "epoch": 0.22233879428617545, + "grad_norm": 0.8081222176551819, + "learning_rate": 0.000191343840752506, + "loss": 2.7339, + "step": 2755 + }, + { + "epoch": 0.22241949802275846, + "grad_norm": 0.9143899083137512, + "learning_rate": 0.00019133741471003636, + "loss": 2.7051, + "step": 2756 + }, + { + "epoch": 0.22250020175934146, + "grad_norm": 0.8096790909767151, + "learning_rate": 0.00019133098639119962, + "loss": 2.6884, + "step": 2757 + }, + { + "epoch": 0.22258090549592446, + "grad_norm": 0.7959297895431519, + "learning_rate": 0.00019132455579615597, + "loss": 2.7127, + "step": 2758 + }, + { + "epoch": 0.22266160923250747, + "grad_norm": 0.7111356854438782, + "learning_rate": 0.00019131812292506563, + "loss": 2.7418, + "step": 2759 + }, + { + "epoch": 0.22274231296909047, + "grad_norm": 0.7584012150764465, + "learning_rate": 0.00019131168777808898, + "loss": 2.6705, + "step": 2760 + }, + { + "epoch": 0.22282301670567348, + "grad_norm": 0.7646663784980774, + "learning_rate": 0.0001913052503553864, + "loss": 2.7166, + "step": 2761 + }, + { + "epoch": 0.22290372044225648, + "grad_norm": 0.7643954157829285, + "learning_rate": 0.00019129881065711827, + "loss": 2.7967, + "step": 2762 + }, + { + "epoch": 0.22298442417883949, + "grad_norm": 0.7591429948806763, + "learning_rate": 0.0001912923686834451, + "loss": 2.6611, + "step": 2763 + }, + { + "epoch": 0.2230651279154225, + "grad_norm": 0.7182386517524719, + "learning_rate": 0.00019128592443452749, + "loss": 2.6808, + "step": 2764 + }, + { + "epoch": 0.2231458316520055, + "grad_norm": 0.7689648270606995, + "learning_rate": 0.00019127947791052602, + "loss": 2.7288, + "step": 2765 + }, + { + "epoch": 0.2232265353885885, + "grad_norm": 0.7851321697235107, + "learning_rate": 0.00019127302911160136, + "loss": 2.7227, + "step": 2766 + }, + { + "epoch": 0.2233072391251715, + "grad_norm": 0.8419411182403564, + "learning_rate": 0.00019126657803791424, + "loss": 2.7397, + "step": 2767 + }, + { + "epoch": 0.2233879428617545, + "grad_norm": 0.7657596468925476, + "learning_rate": 0.0001912601246896254, + "loss": 2.7223, + "step": 2768 + }, + { + "epoch": 0.2234686465983375, + "grad_norm": 0.8033619523048401, + "learning_rate": 0.00019125366906689567, + "loss": 2.7256, + "step": 2769 + }, + { + "epoch": 0.22354935033492052, + "grad_norm": 0.7784682512283325, + "learning_rate": 0.00019124721116988601, + "loss": 2.7692, + "step": 2770 + }, + { + "epoch": 0.22363005407150352, + "grad_norm": 0.7842707633972168, + "learning_rate": 0.00019124075099875731, + "loss": 2.7707, + "step": 2771 + }, + { + "epoch": 0.22371075780808652, + "grad_norm": 0.7864845395088196, + "learning_rate": 0.0001912342885536706, + "loss": 2.6912, + "step": 2772 + }, + { + "epoch": 0.22379146154466953, + "grad_norm": 0.8544312715530396, + "learning_rate": 0.0001912278238347869, + "loss": 2.8345, + "step": 2773 + }, + { + "epoch": 0.22387216528125253, + "grad_norm": 0.7210882306098938, + "learning_rate": 0.0001912213568422674, + "loss": 2.6933, + "step": 2774 + }, + { + "epoch": 0.22395286901783554, + "grad_norm": 0.8877022862434387, + "learning_rate": 0.00019121488757627318, + "loss": 2.7583, + "step": 2775 + }, + { + "epoch": 0.22403357275441854, + "grad_norm": 0.902886688709259, + "learning_rate": 0.00019120841603696554, + "loss": 2.8, + "step": 2776 + }, + { + "epoch": 0.22411427649100155, + "grad_norm": 0.771294355392456, + "learning_rate": 0.0001912019422245058, + "loss": 2.7712, + "step": 2777 + }, + { + "epoch": 0.22419498022758455, + "grad_norm": 0.7973463535308838, + "learning_rate": 0.0001911954661390552, + "loss": 2.7368, + "step": 2778 + }, + { + "epoch": 0.22427568396416755, + "grad_norm": 0.776836633682251, + "learning_rate": 0.00019118898778077524, + "loss": 2.7126, + "step": 2779 + }, + { + "epoch": 0.22435638770075053, + "grad_norm": 0.8286641240119934, + "learning_rate": 0.00019118250714982731, + "loss": 2.7148, + "step": 2780 + }, + { + "epoch": 0.22443709143733354, + "grad_norm": 0.7848700284957886, + "learning_rate": 0.00019117602424637294, + "loss": 2.7284, + "step": 2781 + }, + { + "epoch": 0.22451779517391654, + "grad_norm": 0.7658216953277588, + "learning_rate": 0.0001911695390705737, + "loss": 2.7186, + "step": 2782 + }, + { + "epoch": 0.22459849891049954, + "grad_norm": 0.7596792578697205, + "learning_rate": 0.00019116305162259124, + "loss": 2.6854, + "step": 2783 + }, + { + "epoch": 0.22467920264708255, + "grad_norm": 0.7901157140731812, + "learning_rate": 0.00019115656190258726, + "loss": 2.7347, + "step": 2784 + }, + { + "epoch": 0.22475990638366555, + "grad_norm": 0.7499287128448486, + "learning_rate": 0.00019115006991072346, + "loss": 2.7219, + "step": 2785 + }, + { + "epoch": 0.22484061012024856, + "grad_norm": 0.7427374124526978, + "learning_rate": 0.00019114357564716162, + "loss": 2.7147, + "step": 2786 + }, + { + "epoch": 0.22492131385683156, + "grad_norm": 0.8305855393409729, + "learning_rate": 0.00019113707911206363, + "loss": 2.7587, + "step": 2787 + }, + { + "epoch": 0.22500201759341457, + "grad_norm": 0.8266459703445435, + "learning_rate": 0.00019113058030559142, + "loss": 2.7275, + "step": 2788 + }, + { + "epoch": 0.22508272132999757, + "grad_norm": 0.7338323593139648, + "learning_rate": 0.0001911240792279069, + "loss": 2.762, + "step": 2789 + }, + { + "epoch": 0.22516342506658057, + "grad_norm": 0.7653434872627258, + "learning_rate": 0.00019111757587917216, + "loss": 2.6715, + "step": 2790 + }, + { + "epoch": 0.22524412880316358, + "grad_norm": 0.76301509141922, + "learning_rate": 0.00019111107025954923, + "loss": 2.698, + "step": 2791 + }, + { + "epoch": 0.22532483253974658, + "grad_norm": 0.7810547947883606, + "learning_rate": 0.00019110456236920024, + "loss": 2.7295, + "step": 2792 + }, + { + "epoch": 0.2254055362763296, + "grad_norm": 0.7885214686393738, + "learning_rate": 0.00019109805220828742, + "loss": 2.7724, + "step": 2793 + }, + { + "epoch": 0.2254862400129126, + "grad_norm": 0.8087031841278076, + "learning_rate": 0.00019109153977697301, + "loss": 2.7888, + "step": 2794 + }, + { + "epoch": 0.2255669437494956, + "grad_norm": 0.795101523399353, + "learning_rate": 0.00019108502507541933, + "loss": 2.6815, + "step": 2795 + }, + { + "epoch": 0.2256476474860786, + "grad_norm": 0.8337482213973999, + "learning_rate": 0.0001910785081037887, + "loss": 2.8192, + "step": 2796 + }, + { + "epoch": 0.2257283512226616, + "grad_norm": 0.8357288241386414, + "learning_rate": 0.00019107198886224357, + "loss": 2.7867, + "step": 2797 + }, + { + "epoch": 0.2258090549592446, + "grad_norm": 0.80678391456604, + "learning_rate": 0.00019106546735094644, + "loss": 2.7313, + "step": 2798 + }, + { + "epoch": 0.2258897586958276, + "grad_norm": 0.7481401562690735, + "learning_rate": 0.00019105894357005979, + "loss": 2.7073, + "step": 2799 + }, + { + "epoch": 0.22597046243241062, + "grad_norm": 0.8025074005126953, + "learning_rate": 0.00019105241751974622, + "loss": 2.6922, + "step": 2800 + }, + { + "epoch": 0.22605116616899362, + "grad_norm": 0.7308986186981201, + "learning_rate": 0.00019104588920016842, + "loss": 2.7511, + "step": 2801 + }, + { + "epoch": 0.22613186990557663, + "grad_norm": 0.7727689146995544, + "learning_rate": 0.00019103935861148905, + "loss": 2.707, + "step": 2802 + }, + { + "epoch": 0.22621257364215963, + "grad_norm": 0.8611076474189758, + "learning_rate": 0.0001910328257538709, + "loss": 2.8494, + "step": 2803 + }, + { + "epoch": 0.22629327737874264, + "grad_norm": 0.8487605452537537, + "learning_rate": 0.00019102629062747677, + "loss": 2.7698, + "step": 2804 + }, + { + "epoch": 0.22637398111532564, + "grad_norm": 0.7495502233505249, + "learning_rate": 0.00019101975323246952, + "loss": 2.7091, + "step": 2805 + }, + { + "epoch": 0.22645468485190864, + "grad_norm": 0.7334234118461609, + "learning_rate": 0.0001910132135690121, + "loss": 2.7375, + "step": 2806 + }, + { + "epoch": 0.22653538858849165, + "grad_norm": 0.879912257194519, + "learning_rate": 0.00019100667163726747, + "loss": 2.7278, + "step": 2807 + }, + { + "epoch": 0.22661609232507465, + "grad_norm": 0.8087306618690491, + "learning_rate": 0.0001910001274373987, + "loss": 2.8065, + "step": 2808 + }, + { + "epoch": 0.22669679606165766, + "grad_norm": 0.7548169493675232, + "learning_rate": 0.00019099358096956887, + "loss": 2.7235, + "step": 2809 + }, + { + "epoch": 0.22677749979824066, + "grad_norm": 0.7505785822868347, + "learning_rate": 0.00019098703223394118, + "loss": 2.6633, + "step": 2810 + }, + { + "epoch": 0.22685820353482367, + "grad_norm": 0.829075813293457, + "learning_rate": 0.00019098048123067875, + "loss": 2.7389, + "step": 2811 + }, + { + "epoch": 0.22693890727140667, + "grad_norm": 0.7731673121452332, + "learning_rate": 0.00019097392795994493, + "loss": 2.7639, + "step": 2812 + }, + { + "epoch": 0.22701961100798967, + "grad_norm": 0.7389004826545715, + "learning_rate": 0.00019096737242190303, + "loss": 2.717, + "step": 2813 + }, + { + "epoch": 0.22710031474457268, + "grad_norm": 0.7520460486412048, + "learning_rate": 0.0001909608146167164, + "loss": 2.7203, + "step": 2814 + }, + { + "epoch": 0.22718101848115568, + "grad_norm": 0.7272354364395142, + "learning_rate": 0.00019095425454454849, + "loss": 2.7306, + "step": 2815 + }, + { + "epoch": 0.2272617222177387, + "grad_norm": 0.7593528032302856, + "learning_rate": 0.00019094769220556282, + "loss": 2.7565, + "step": 2816 + }, + { + "epoch": 0.2273424259543217, + "grad_norm": 0.7312695384025574, + "learning_rate": 0.0001909411275999229, + "loss": 2.744, + "step": 2817 + }, + { + "epoch": 0.2274231296909047, + "grad_norm": 0.7483308911323547, + "learning_rate": 0.00019093456072779238, + "loss": 2.7938, + "step": 2818 + }, + { + "epoch": 0.2275038334274877, + "grad_norm": 0.8515620231628418, + "learning_rate": 0.00019092799158933486, + "loss": 2.7392, + "step": 2819 + }, + { + "epoch": 0.2275845371640707, + "grad_norm": 0.7119776606559753, + "learning_rate": 0.00019092142018471415, + "loss": 2.6985, + "step": 2820 + }, + { + "epoch": 0.2276652409006537, + "grad_norm": 0.7549445033073425, + "learning_rate": 0.00019091484651409394, + "loss": 2.7621, + "step": 2821 + }, + { + "epoch": 0.2277459446372367, + "grad_norm": 0.8728097081184387, + "learning_rate": 0.00019090827057763814, + "loss": 2.8321, + "step": 2822 + }, + { + "epoch": 0.22782664837381972, + "grad_norm": 0.755043089389801, + "learning_rate": 0.00019090169237551057, + "loss": 2.7341, + "step": 2823 + }, + { + "epoch": 0.22790735211040272, + "grad_norm": 0.7949401140213013, + "learning_rate": 0.00019089511190787523, + "loss": 2.7646, + "step": 2824 + }, + { + "epoch": 0.22798805584698573, + "grad_norm": 0.8027622103691101, + "learning_rate": 0.00019088852917489607, + "loss": 2.7606, + "step": 2825 + }, + { + "epoch": 0.22806875958356873, + "grad_norm": 0.8609418869018555, + "learning_rate": 0.0001908819441767372, + "loss": 2.7433, + "step": 2826 + }, + { + "epoch": 0.22814946332015174, + "grad_norm": 0.8021805882453918, + "learning_rate": 0.00019087535691356271, + "loss": 2.7723, + "step": 2827 + }, + { + "epoch": 0.22823016705673474, + "grad_norm": 0.8104252219200134, + "learning_rate": 0.00019086876738553675, + "loss": 2.7229, + "step": 2828 + }, + { + "epoch": 0.22831087079331774, + "grad_norm": 0.8714433908462524, + "learning_rate": 0.00019086217559282362, + "loss": 2.75, + "step": 2829 + }, + { + "epoch": 0.22839157452990075, + "grad_norm": 0.7598714828491211, + "learning_rate": 0.0001908555815355875, + "loss": 2.6979, + "step": 2830 + }, + { + "epoch": 0.22847227826648372, + "grad_norm": 0.859708309173584, + "learning_rate": 0.00019084898521399283, + "loss": 2.7863, + "step": 2831 + }, + { + "epoch": 0.22855298200306673, + "grad_norm": 0.7798011302947998, + "learning_rate": 0.00019084238662820397, + "loss": 2.7623, + "step": 2832 + }, + { + "epoch": 0.22863368573964973, + "grad_norm": 0.7869576811790466, + "learning_rate": 0.00019083578577838535, + "loss": 2.7341, + "step": 2833 + }, + { + "epoch": 0.22871438947623274, + "grad_norm": 0.7486738562583923, + "learning_rate": 0.0001908291826647015, + "loss": 2.7615, + "step": 2834 + }, + { + "epoch": 0.22879509321281574, + "grad_norm": 0.8270190954208374, + "learning_rate": 0.00019082257728731704, + "loss": 2.7515, + "step": 2835 + }, + { + "epoch": 0.22887579694939875, + "grad_norm": 0.9060254693031311, + "learning_rate": 0.00019081596964639648, + "loss": 2.874, + "step": 2836 + }, + { + "epoch": 0.22895650068598175, + "grad_norm": 0.7802320122718811, + "learning_rate": 0.00019080935974210458, + "loss": 2.7224, + "step": 2837 + }, + { + "epoch": 0.22903720442256476, + "grad_norm": 0.9513018131256104, + "learning_rate": 0.00019080274757460607, + "loss": 2.7168, + "step": 2838 + }, + { + "epoch": 0.22911790815914776, + "grad_norm": 0.7139711976051331, + "learning_rate": 0.0001907961331440657, + "loss": 2.676, + "step": 2839 + }, + { + "epoch": 0.22919861189573076, + "grad_norm": 0.8635632395744324, + "learning_rate": 0.00019078951645064838, + "loss": 2.6979, + "step": 2840 + }, + { + "epoch": 0.22927931563231377, + "grad_norm": 0.8823218941688538, + "learning_rate": 0.000190782897494519, + "loss": 2.7345, + "step": 2841 + }, + { + "epoch": 0.22936001936889677, + "grad_norm": 0.8139359354972839, + "learning_rate": 0.00019077627627584246, + "loss": 2.6988, + "step": 2842 + }, + { + "epoch": 0.22944072310547978, + "grad_norm": 0.8935994505882263, + "learning_rate": 0.00019076965279478383, + "loss": 2.7706, + "step": 2843 + }, + { + "epoch": 0.22952142684206278, + "grad_norm": 0.8362705111503601, + "learning_rate": 0.00019076302705150816, + "loss": 2.7593, + "step": 2844 + }, + { + "epoch": 0.22960213057864579, + "grad_norm": 0.7534157633781433, + "learning_rate": 0.00019075639904618066, + "loss": 2.7501, + "step": 2845 + }, + { + "epoch": 0.2296828343152288, + "grad_norm": 0.8826640248298645, + "learning_rate": 0.00019074976877896642, + "loss": 2.7758, + "step": 2846 + }, + { + "epoch": 0.2297635380518118, + "grad_norm": 0.8395571112632751, + "learning_rate": 0.0001907431362500307, + "loss": 2.7625, + "step": 2847 + }, + { + "epoch": 0.2298442417883948, + "grad_norm": 0.7927684783935547, + "learning_rate": 0.00019073650145953885, + "loss": 2.7392, + "step": 2848 + }, + { + "epoch": 0.2299249455249778, + "grad_norm": 0.823208749294281, + "learning_rate": 0.00019072986440765618, + "loss": 2.7259, + "step": 2849 + }, + { + "epoch": 0.2300056492615608, + "grad_norm": 0.889416515827179, + "learning_rate": 0.00019072322509454815, + "loss": 2.7539, + "step": 2850 + }, + { + "epoch": 0.2300863529981438, + "grad_norm": 0.7957748770713806, + "learning_rate": 0.0001907165835203802, + "loss": 2.7756, + "step": 2851 + }, + { + "epoch": 0.23016705673472682, + "grad_norm": 0.7924029231071472, + "learning_rate": 0.00019070993968531782, + "loss": 2.7439, + "step": 2852 + }, + { + "epoch": 0.23024776047130982, + "grad_norm": 0.7811052799224854, + "learning_rate": 0.0001907032935895266, + "loss": 2.7479, + "step": 2853 + }, + { + "epoch": 0.23032846420789282, + "grad_norm": 0.7973877191543579, + "learning_rate": 0.00019069664523317225, + "loss": 2.7502, + "step": 2854 + }, + { + "epoch": 0.23040916794447583, + "grad_norm": 0.7524267435073853, + "learning_rate": 0.0001906899946164204, + "loss": 2.75, + "step": 2855 + }, + { + "epoch": 0.23048987168105883, + "grad_norm": 0.7594791054725647, + "learning_rate": 0.00019068334173943683, + "loss": 2.6534, + "step": 2856 + }, + { + "epoch": 0.23057057541764184, + "grad_norm": 0.7253785729408264, + "learning_rate": 0.00019067668660238733, + "loss": 2.7246, + "step": 2857 + }, + { + "epoch": 0.23065127915422484, + "grad_norm": 0.788737416267395, + "learning_rate": 0.00019067002920543775, + "loss": 2.757, + "step": 2858 + }, + { + "epoch": 0.23073198289080785, + "grad_norm": 0.7577618956565857, + "learning_rate": 0.00019066336954875403, + "loss": 2.674, + "step": 2859 + }, + { + "epoch": 0.23081268662739085, + "grad_norm": 0.7682929635047913, + "learning_rate": 0.0001906567076325022, + "loss": 2.8193, + "step": 2860 + }, + { + "epoch": 0.23089339036397385, + "grad_norm": 0.7742112874984741, + "learning_rate": 0.00019065004345684817, + "loss": 2.6969, + "step": 2861 + }, + { + "epoch": 0.23097409410055686, + "grad_norm": 0.7981678247451782, + "learning_rate": 0.00019064337702195814, + "loss": 2.7681, + "step": 2862 + }, + { + "epoch": 0.23105479783713986, + "grad_norm": 0.7608500123023987, + "learning_rate": 0.00019063670832799817, + "loss": 2.7459, + "step": 2863 + }, + { + "epoch": 0.23113550157372287, + "grad_norm": 0.7563463449478149, + "learning_rate": 0.00019063003737513455, + "loss": 2.7678, + "step": 2864 + }, + { + "epoch": 0.23121620531030587, + "grad_norm": 0.7915034890174866, + "learning_rate": 0.00019062336416353343, + "loss": 2.7577, + "step": 2865 + }, + { + "epoch": 0.23129690904688888, + "grad_norm": 0.7229592204093933, + "learning_rate": 0.00019061668869336122, + "loss": 2.7308, + "step": 2866 + }, + { + "epoch": 0.23137761278347188, + "grad_norm": 0.7910905480384827, + "learning_rate": 0.00019061001096478425, + "loss": 2.7571, + "step": 2867 + }, + { + "epoch": 0.23145831652005489, + "grad_norm": 0.8474656939506531, + "learning_rate": 0.00019060333097796895, + "loss": 2.7011, + "step": 2868 + }, + { + "epoch": 0.2315390202566379, + "grad_norm": 0.8005419373512268, + "learning_rate": 0.00019059664873308178, + "loss": 2.7441, + "step": 2869 + }, + { + "epoch": 0.2316197239932209, + "grad_norm": 0.7728021740913391, + "learning_rate": 0.00019058996423028935, + "loss": 2.7753, + "step": 2870 + }, + { + "epoch": 0.2317004277298039, + "grad_norm": 0.7338094115257263, + "learning_rate": 0.00019058327746975816, + "loss": 2.7009, + "step": 2871 + }, + { + "epoch": 0.2317811314663869, + "grad_norm": 0.7746245265007019, + "learning_rate": 0.00019057658845165494, + "loss": 2.6938, + "step": 2872 + }, + { + "epoch": 0.2318618352029699, + "grad_norm": 0.7474356293678284, + "learning_rate": 0.00019056989717614636, + "loss": 2.7161, + "step": 2873 + }, + { + "epoch": 0.2319425389395529, + "grad_norm": 0.9540585279464722, + "learning_rate": 0.00019056320364339917, + "loss": 2.7753, + "step": 2874 + }, + { + "epoch": 0.23202324267613592, + "grad_norm": 0.799726665019989, + "learning_rate": 0.00019055650785358024, + "loss": 2.7301, + "step": 2875 + }, + { + "epoch": 0.23210394641271892, + "grad_norm": 0.8087828159332275, + "learning_rate": 0.0001905498098068564, + "loss": 2.7305, + "step": 2876 + }, + { + "epoch": 0.23218465014930192, + "grad_norm": 0.8177600502967834, + "learning_rate": 0.00019054310950339457, + "loss": 2.7462, + "step": 2877 + }, + { + "epoch": 0.23226535388588493, + "grad_norm": 0.7106238603591919, + "learning_rate": 0.00019053640694336181, + "loss": 2.7183, + "step": 2878 + }, + { + "epoch": 0.23234605762246793, + "grad_norm": 0.884185791015625, + "learning_rate": 0.00019052970212692514, + "loss": 2.7549, + "step": 2879 + }, + { + "epoch": 0.23242676135905094, + "grad_norm": 0.7532132267951965, + "learning_rate": 0.00019052299505425163, + "loss": 2.7524, + "step": 2880 + }, + { + "epoch": 0.23250746509563394, + "grad_norm": 0.7295021414756775, + "learning_rate": 0.00019051628572550842, + "loss": 2.6928, + "step": 2881 + }, + { + "epoch": 0.23258816883221692, + "grad_norm": 0.8475896716117859, + "learning_rate": 0.00019050957414086278, + "loss": 2.7138, + "step": 2882 + }, + { + "epoch": 0.23266887256879992, + "grad_norm": 0.7219378352165222, + "learning_rate": 0.00019050286030048198, + "loss": 2.7034, + "step": 2883 + }, + { + "epoch": 0.23274957630538293, + "grad_norm": 0.8410176634788513, + "learning_rate": 0.0001904961442045333, + "loss": 2.7413, + "step": 2884 + }, + { + "epoch": 0.23283028004196593, + "grad_norm": 0.7792301177978516, + "learning_rate": 0.00019048942585318414, + "loss": 2.6771, + "step": 2885 + }, + { + "epoch": 0.23291098377854894, + "grad_norm": 0.7457073926925659, + "learning_rate": 0.00019048270524660196, + "loss": 2.7325, + "step": 2886 + }, + { + "epoch": 0.23299168751513194, + "grad_norm": 0.8258858323097229, + "learning_rate": 0.00019047598238495424, + "loss": 2.7434, + "step": 2887 + }, + { + "epoch": 0.23307239125171494, + "grad_norm": 0.8188657164573669, + "learning_rate": 0.00019046925726840853, + "loss": 2.732, + "step": 2888 + }, + { + "epoch": 0.23315309498829795, + "grad_norm": 0.8084142208099365, + "learning_rate": 0.00019046252989713246, + "loss": 2.7537, + "step": 2889 + }, + { + "epoch": 0.23323379872488095, + "grad_norm": 0.75553297996521, + "learning_rate": 0.00019045580027129364, + "loss": 2.6685, + "step": 2890 + }, + { + "epoch": 0.23331450246146396, + "grad_norm": 0.8145995736122131, + "learning_rate": 0.00019044906839105986, + "loss": 2.7654, + "step": 2891 + }, + { + "epoch": 0.23339520619804696, + "grad_norm": 0.8433949947357178, + "learning_rate": 0.0001904423342565988, + "loss": 2.7713, + "step": 2892 + }, + { + "epoch": 0.23347590993462997, + "grad_norm": 0.7826054096221924, + "learning_rate": 0.0001904355978680784, + "loss": 2.7108, + "step": 2893 + }, + { + "epoch": 0.23355661367121297, + "grad_norm": 0.7281686663627625, + "learning_rate": 0.0001904288592256665, + "loss": 2.7606, + "step": 2894 + }, + { + "epoch": 0.23363731740779597, + "grad_norm": 0.8282813429832458, + "learning_rate": 0.00019042211832953103, + "loss": 2.6662, + "step": 2895 + }, + { + "epoch": 0.23371802114437898, + "grad_norm": 0.8227263689041138, + "learning_rate": 0.00019041537517984, + "loss": 2.7493, + "step": 2896 + }, + { + "epoch": 0.23379872488096198, + "grad_norm": 0.839350700378418, + "learning_rate": 0.0001904086297767615, + "loss": 2.7258, + "step": 2897 + }, + { + "epoch": 0.233879428617545, + "grad_norm": 0.713231086730957, + "learning_rate": 0.00019040188212046357, + "loss": 2.6722, + "step": 2898 + }, + { + "epoch": 0.233960132354128, + "grad_norm": 0.8314552903175354, + "learning_rate": 0.00019039513221111447, + "loss": 2.8509, + "step": 2899 + }, + { + "epoch": 0.234040836090711, + "grad_norm": 0.8885688781738281, + "learning_rate": 0.0001903883800488824, + "loss": 2.7608, + "step": 2900 + }, + { + "epoch": 0.234121539827294, + "grad_norm": 0.755308210849762, + "learning_rate": 0.00019038162563393555, + "loss": 2.7065, + "step": 2901 + }, + { + "epoch": 0.234202243563877, + "grad_norm": 0.7436641454696655, + "learning_rate": 0.00019037486896644236, + "loss": 2.6865, + "step": 2902 + }, + { + "epoch": 0.23428294730046, + "grad_norm": 0.7861987948417664, + "learning_rate": 0.0001903681100465712, + "loss": 2.7238, + "step": 2903 + }, + { + "epoch": 0.234363651037043, + "grad_norm": 0.7481045126914978, + "learning_rate": 0.0001903613488744905, + "loss": 2.7038, + "step": 2904 + }, + { + "epoch": 0.23444435477362602, + "grad_norm": 0.790765106678009, + "learning_rate": 0.0001903545854503688, + "loss": 2.6865, + "step": 2905 + }, + { + "epoch": 0.23452505851020902, + "grad_norm": 0.8594793677330017, + "learning_rate": 0.0001903478197743746, + "loss": 2.7324, + "step": 2906 + }, + { + "epoch": 0.23460576224679203, + "grad_norm": 0.7504310011863708, + "learning_rate": 0.00019034105184667662, + "loss": 2.6535, + "step": 2907 + }, + { + "epoch": 0.23468646598337503, + "grad_norm": 0.7824578881263733, + "learning_rate": 0.00019033428166744342, + "loss": 2.7113, + "step": 2908 + }, + { + "epoch": 0.23476716971995804, + "grad_norm": 0.7766899466514587, + "learning_rate": 0.0001903275092368438, + "loss": 2.6907, + "step": 2909 + }, + { + "epoch": 0.23484787345654104, + "grad_norm": 0.8082600235939026, + "learning_rate": 0.00019032073455504657, + "loss": 2.6781, + "step": 2910 + }, + { + "epoch": 0.23492857719312404, + "grad_norm": 0.7790517210960388, + "learning_rate": 0.0001903139576222205, + "loss": 2.7277, + "step": 2911 + }, + { + "epoch": 0.23500928092970705, + "grad_norm": 0.7449578046798706, + "learning_rate": 0.00019030717843853453, + "loss": 2.7078, + "step": 2912 + }, + { + "epoch": 0.23508998466629005, + "grad_norm": 0.7931632399559021, + "learning_rate": 0.0001903003970041576, + "loss": 2.7165, + "step": 2913 + }, + { + "epoch": 0.23517068840287306, + "grad_norm": 0.7970653176307678, + "learning_rate": 0.00019029361331925873, + "loss": 2.7993, + "step": 2914 + }, + { + "epoch": 0.23525139213945606, + "grad_norm": 0.8497335314750671, + "learning_rate": 0.00019028682738400697, + "loss": 2.7564, + "step": 2915 + }, + { + "epoch": 0.23533209587603907, + "grad_norm": 0.7840128540992737, + "learning_rate": 0.0001902800391985715, + "loss": 2.7546, + "step": 2916 + }, + { + "epoch": 0.23541279961262207, + "grad_norm": 0.8237372636795044, + "learning_rate": 0.00019027324876312146, + "loss": 2.7507, + "step": 2917 + }, + { + "epoch": 0.23549350334920507, + "grad_norm": 0.8445321917533875, + "learning_rate": 0.00019026645607782603, + "loss": 2.7287, + "step": 2918 + }, + { + "epoch": 0.23557420708578808, + "grad_norm": 0.8380417227745056, + "learning_rate": 0.0001902596611428546, + "loss": 2.7778, + "step": 2919 + }, + { + "epoch": 0.23565491082237108, + "grad_norm": 0.7989064455032349, + "learning_rate": 0.00019025286395837646, + "loss": 2.7254, + "step": 2920 + }, + { + "epoch": 0.2357356145589541, + "grad_norm": 0.8223496079444885, + "learning_rate": 0.00019024606452456102, + "loss": 2.7028, + "step": 2921 + }, + { + "epoch": 0.2358163182955371, + "grad_norm": 0.8090229630470276, + "learning_rate": 0.00019023926284157775, + "loss": 2.6911, + "step": 2922 + }, + { + "epoch": 0.2358970220321201, + "grad_norm": 0.7556560635566711, + "learning_rate": 0.00019023245890959615, + "loss": 2.7183, + "step": 2923 + }, + { + "epoch": 0.2359777257687031, + "grad_norm": 0.7907983660697937, + "learning_rate": 0.00019022565272878582, + "loss": 2.6805, + "step": 2924 + }, + { + "epoch": 0.2360584295052861, + "grad_norm": 0.9404142498970032, + "learning_rate": 0.0001902188442993164, + "loss": 2.8081, + "step": 2925 + }, + { + "epoch": 0.2361391332418691, + "grad_norm": 0.8349069952964783, + "learning_rate": 0.0001902120336213575, + "loss": 2.8329, + "step": 2926 + }, + { + "epoch": 0.2362198369784521, + "grad_norm": 0.8557522892951965, + "learning_rate": 0.00019020522069507892, + "loss": 2.704, + "step": 2927 + }, + { + "epoch": 0.23630054071503512, + "grad_norm": 0.7557278275489807, + "learning_rate": 0.00019019840552065044, + "loss": 2.7071, + "step": 2928 + }, + { + "epoch": 0.23638124445161812, + "grad_norm": 0.8810723423957825, + "learning_rate": 0.00019019158809824193, + "loss": 2.7535, + "step": 2929 + }, + { + "epoch": 0.23646194818820113, + "grad_norm": 0.7845562100410461, + "learning_rate": 0.00019018476842802326, + "loss": 2.7254, + "step": 2930 + }, + { + "epoch": 0.23654265192478413, + "grad_norm": 0.7566044926643372, + "learning_rate": 0.00019017794651016444, + "loss": 2.7295, + "step": 2931 + }, + { + "epoch": 0.23662335566136714, + "grad_norm": 0.8083382248878479, + "learning_rate": 0.00019017112234483545, + "loss": 2.7305, + "step": 2932 + }, + { + "epoch": 0.2367040593979501, + "grad_norm": 0.7924187183380127, + "learning_rate": 0.00019016429593220638, + "loss": 2.7659, + "step": 2933 + }, + { + "epoch": 0.23678476313453312, + "grad_norm": 0.8400307297706604, + "learning_rate": 0.00019015746727244737, + "loss": 2.7293, + "step": 2934 + }, + { + "epoch": 0.23686546687111612, + "grad_norm": 0.6931199431419373, + "learning_rate": 0.0001901506363657286, + "loss": 2.7189, + "step": 2935 + }, + { + "epoch": 0.23694617060769912, + "grad_norm": 0.8263585567474365, + "learning_rate": 0.0001901438032122203, + "loss": 2.7368, + "step": 2936 + }, + { + "epoch": 0.23702687434428213, + "grad_norm": 0.8001893162727356, + "learning_rate": 0.0001901369678120928, + "loss": 2.7793, + "step": 2937 + }, + { + "epoch": 0.23710757808086513, + "grad_norm": 0.7724235653877258, + "learning_rate": 0.00019013013016551644, + "loss": 2.717, + "step": 2938 + }, + { + "epoch": 0.23718828181744814, + "grad_norm": 0.7617147564888, + "learning_rate": 0.00019012329027266164, + "loss": 2.7275, + "step": 2939 + }, + { + "epoch": 0.23726898555403114, + "grad_norm": 0.80738765001297, + "learning_rate": 0.00019011644813369884, + "loss": 2.7444, + "step": 2940 + }, + { + "epoch": 0.23734968929061415, + "grad_norm": 0.7885528802871704, + "learning_rate": 0.00019010960374879861, + "loss": 2.7377, + "step": 2941 + }, + { + "epoch": 0.23743039302719715, + "grad_norm": 0.720268964767456, + "learning_rate": 0.00019010275711813147, + "loss": 2.6897, + "step": 2942 + }, + { + "epoch": 0.23751109676378016, + "grad_norm": 0.7532111406326294, + "learning_rate": 0.00019009590824186815, + "loss": 2.8117, + "step": 2943 + }, + { + "epoch": 0.23759180050036316, + "grad_norm": 0.780777633190155, + "learning_rate": 0.00019008905712017925, + "loss": 2.7565, + "step": 2944 + }, + { + "epoch": 0.23767250423694616, + "grad_norm": 0.8721919059753418, + "learning_rate": 0.00019008220375323553, + "loss": 2.801, + "step": 2945 + }, + { + "epoch": 0.23775320797352917, + "grad_norm": 0.8258914947509766, + "learning_rate": 0.00019007534814120786, + "loss": 2.7696, + "step": 2946 + }, + { + "epoch": 0.23783391171011217, + "grad_norm": 0.7292730808258057, + "learning_rate": 0.00019006849028426704, + "loss": 2.7512, + "step": 2947 + }, + { + "epoch": 0.23791461544669518, + "grad_norm": 0.7789164185523987, + "learning_rate": 0.00019006163018258398, + "loss": 2.7489, + "step": 2948 + }, + { + "epoch": 0.23799531918327818, + "grad_norm": 0.8049725294113159, + "learning_rate": 0.00019005476783632967, + "loss": 2.672, + "step": 2949 + }, + { + "epoch": 0.23807602291986119, + "grad_norm": 0.7440119981765747, + "learning_rate": 0.00019004790324567519, + "loss": 2.7208, + "step": 2950 + }, + { + "epoch": 0.2381567266564442, + "grad_norm": 0.7695925235748291, + "learning_rate": 0.00019004103641079154, + "loss": 2.7816, + "step": 2951 + }, + { + "epoch": 0.2382374303930272, + "grad_norm": 0.7623234391212463, + "learning_rate": 0.00019003416733184988, + "loss": 2.7034, + "step": 2952 + }, + { + "epoch": 0.2383181341296102, + "grad_norm": 0.8136502504348755, + "learning_rate": 0.00019002729600902141, + "loss": 2.7638, + "step": 2953 + }, + { + "epoch": 0.2383988378661932, + "grad_norm": 0.7813066840171814, + "learning_rate": 0.00019002042244247743, + "loss": 2.7606, + "step": 2954 + }, + { + "epoch": 0.2384795416027762, + "grad_norm": 0.7863059043884277, + "learning_rate": 0.0001900135466323892, + "loss": 2.7219, + "step": 2955 + }, + { + "epoch": 0.2385602453393592, + "grad_norm": 0.8712359070777893, + "learning_rate": 0.00019000666857892806, + "loss": 2.7485, + "step": 2956 + }, + { + "epoch": 0.23864094907594222, + "grad_norm": 0.8130611777305603, + "learning_rate": 0.00018999978828226547, + "loss": 2.7195, + "step": 2957 + }, + { + "epoch": 0.23872165281252522, + "grad_norm": 0.759503960609436, + "learning_rate": 0.00018999290574257292, + "loss": 2.6856, + "step": 2958 + }, + { + "epoch": 0.23880235654910822, + "grad_norm": 0.7490882277488708, + "learning_rate": 0.0001899860209600219, + "loss": 2.7587, + "step": 2959 + }, + { + "epoch": 0.23888306028569123, + "grad_norm": 0.8111297488212585, + "learning_rate": 0.000189979133934784, + "loss": 2.7688, + "step": 2960 + }, + { + "epoch": 0.23896376402227423, + "grad_norm": 0.844894289970398, + "learning_rate": 0.0001899722446670309, + "loss": 2.7706, + "step": 2961 + }, + { + "epoch": 0.23904446775885724, + "grad_norm": 0.7875459790229797, + "learning_rate": 0.00018996535315693423, + "loss": 2.7535, + "step": 2962 + }, + { + "epoch": 0.23912517149544024, + "grad_norm": 0.7768518328666687, + "learning_rate": 0.0001899584594046658, + "loss": 2.7268, + "step": 2963 + }, + { + "epoch": 0.23920587523202325, + "grad_norm": 0.8645716309547424, + "learning_rate": 0.00018995156341039744, + "loss": 2.7856, + "step": 2964 + }, + { + "epoch": 0.23928657896860625, + "grad_norm": 0.7816600799560547, + "learning_rate": 0.00018994466517430097, + "loss": 2.757, + "step": 2965 + }, + { + "epoch": 0.23936728270518925, + "grad_norm": 0.7967644333839417, + "learning_rate": 0.00018993776469654832, + "loss": 2.7021, + "step": 2966 + }, + { + "epoch": 0.23944798644177226, + "grad_norm": 0.800589919090271, + "learning_rate": 0.00018993086197731146, + "loss": 2.6838, + "step": 2967 + }, + { + "epoch": 0.23952869017835526, + "grad_norm": 0.7658529281616211, + "learning_rate": 0.00018992395701676246, + "loss": 2.6992, + "step": 2968 + }, + { + "epoch": 0.23960939391493827, + "grad_norm": 0.848456621170044, + "learning_rate": 0.00018991704981507338, + "loss": 2.7249, + "step": 2969 + }, + { + "epoch": 0.23969009765152127, + "grad_norm": 0.7365427017211914, + "learning_rate": 0.00018991014037241638, + "loss": 2.7044, + "step": 2970 + }, + { + "epoch": 0.23977080138810428, + "grad_norm": 0.8026351928710938, + "learning_rate": 0.00018990322868896365, + "loss": 2.7409, + "step": 2971 + }, + { + "epoch": 0.23985150512468728, + "grad_norm": 0.788646936416626, + "learning_rate": 0.00018989631476488744, + "loss": 2.7331, + "step": 2972 + }, + { + "epoch": 0.23993220886127029, + "grad_norm": 0.8388644456863403, + "learning_rate": 0.00018988939860036007, + "loss": 2.7478, + "step": 2973 + }, + { + "epoch": 0.2400129125978533, + "grad_norm": 0.7479026913642883, + "learning_rate": 0.00018988248019555394, + "loss": 2.7248, + "step": 2974 + }, + { + "epoch": 0.2400936163344363, + "grad_norm": 0.7313364744186401, + "learning_rate": 0.00018987555955064144, + "loss": 2.7323, + "step": 2975 + }, + { + "epoch": 0.2401743200710193, + "grad_norm": 0.7858260273933411, + "learning_rate": 0.00018986863666579505, + "loss": 2.6845, + "step": 2976 + }, + { + "epoch": 0.2402550238076023, + "grad_norm": 0.8090949654579163, + "learning_rate": 0.00018986171154118732, + "loss": 2.8094, + "step": 2977 + }, + { + "epoch": 0.2403357275441853, + "grad_norm": 0.7917135953903198, + "learning_rate": 0.00018985478417699085, + "loss": 2.7106, + "step": 2978 + }, + { + "epoch": 0.2404164312807683, + "grad_norm": 0.8192126154899597, + "learning_rate": 0.00018984785457337825, + "loss": 2.7729, + "step": 2979 + }, + { + "epoch": 0.24049713501735132, + "grad_norm": 0.797922670841217, + "learning_rate": 0.00018984092273052226, + "loss": 2.7747, + "step": 2980 + }, + { + "epoch": 0.24057783875393432, + "grad_norm": 0.9050948023796082, + "learning_rate": 0.00018983398864859564, + "loss": 2.7453, + "step": 2981 + }, + { + "epoch": 0.24065854249051732, + "grad_norm": 0.7827617526054382, + "learning_rate": 0.0001898270523277712, + "loss": 2.7371, + "step": 2982 + }, + { + "epoch": 0.24073924622710033, + "grad_norm": 0.7530156373977661, + "learning_rate": 0.0001898201137682218, + "loss": 2.7397, + "step": 2983 + }, + { + "epoch": 0.2408199499636833, + "grad_norm": 0.7989545464515686, + "learning_rate": 0.00018981317297012034, + "loss": 2.7532, + "step": 2984 + }, + { + "epoch": 0.2409006537002663, + "grad_norm": 0.7501168847084045, + "learning_rate": 0.00018980622993363988, + "loss": 2.7395, + "step": 2985 + }, + { + "epoch": 0.2409813574368493, + "grad_norm": 0.8073468208312988, + "learning_rate": 0.0001897992846589534, + "loss": 2.7673, + "step": 2986 + }, + { + "epoch": 0.24106206117343232, + "grad_norm": 0.9155512452125549, + "learning_rate": 0.00018979233714623401, + "loss": 2.6608, + "step": 2987 + }, + { + "epoch": 0.24114276491001532, + "grad_norm": 0.7461311221122742, + "learning_rate": 0.00018978538739565485, + "loss": 2.7657, + "step": 2988 + }, + { + "epoch": 0.24122346864659833, + "grad_norm": 0.8011443018913269, + "learning_rate": 0.00018977843540738914, + "loss": 2.7363, + "step": 2989 + }, + { + "epoch": 0.24130417238318133, + "grad_norm": 0.7602998614311218, + "learning_rate": 0.0001897714811816101, + "loss": 2.7285, + "step": 2990 + }, + { + "epoch": 0.24138487611976434, + "grad_norm": 0.8283531069755554, + "learning_rate": 0.00018976452471849116, + "loss": 2.7614, + "step": 2991 + }, + { + "epoch": 0.24146557985634734, + "grad_norm": 0.7358889579772949, + "learning_rate": 0.00018975756601820556, + "loss": 2.7429, + "step": 2992 + }, + { + "epoch": 0.24154628359293034, + "grad_norm": 0.7749240398406982, + "learning_rate": 0.0001897506050809268, + "loss": 2.6884, + "step": 2993 + }, + { + "epoch": 0.24162698732951335, + "grad_norm": 0.7529963254928589, + "learning_rate": 0.00018974364190682837, + "loss": 2.7619, + "step": 2994 + }, + { + "epoch": 0.24170769106609635, + "grad_norm": 0.7946054935455322, + "learning_rate": 0.00018973667649608376, + "loss": 2.7403, + "step": 2995 + }, + { + "epoch": 0.24178839480267936, + "grad_norm": 0.735870897769928, + "learning_rate": 0.0001897297088488666, + "loss": 2.7158, + "step": 2996 + }, + { + "epoch": 0.24186909853926236, + "grad_norm": 0.8409188985824585, + "learning_rate": 0.00018972273896535055, + "loss": 2.768, + "step": 2997 + }, + { + "epoch": 0.24194980227584537, + "grad_norm": 0.8351938724517822, + "learning_rate": 0.0001897157668457093, + "loss": 2.7548, + "step": 2998 + }, + { + "epoch": 0.24203050601242837, + "grad_norm": 0.8339046239852905, + "learning_rate": 0.00018970879249011663, + "loss": 2.7842, + "step": 2999 + }, + { + "epoch": 0.24211120974901137, + "grad_norm": 0.8092730641365051, + "learning_rate": 0.00018970181589874637, + "loss": 2.7141, + "step": 3000 + }, + { + "epoch": 0.24211120974901137, + "eval_loss": 2.643277406692505, + "eval_runtime": 784.7512, + "eval_samples_per_second": 3.339, + "eval_steps_per_second": 0.557, + "step": 3000 + }, + { + "epoch": 0.24219191348559438, + "grad_norm": 0.8014447093009949, + "learning_rate": 0.00018969483707177235, + "loss": 2.7341, + "step": 3001 + }, + { + "epoch": 0.24227261722217738, + "grad_norm": 0.744153618812561, + "learning_rate": 0.00018968785600936855, + "loss": 2.678, + "step": 3002 + }, + { + "epoch": 0.2423533209587604, + "grad_norm": 0.7264240384101868, + "learning_rate": 0.0001896808727117089, + "loss": 2.7321, + "step": 3003 + }, + { + "epoch": 0.2424340246953434, + "grad_norm": 0.8214067220687866, + "learning_rate": 0.00018967388717896748, + "loss": 2.7311, + "step": 3004 + }, + { + "epoch": 0.2425147284319264, + "grad_norm": 0.7871330976486206, + "learning_rate": 0.00018966689941131838, + "loss": 2.7184, + "step": 3005 + }, + { + "epoch": 0.2425954321685094, + "grad_norm": 0.7301360964775085, + "learning_rate": 0.00018965990940893575, + "loss": 2.7039, + "step": 3006 + }, + { + "epoch": 0.2426761359050924, + "grad_norm": 0.8290385603904724, + "learning_rate": 0.00018965291717199382, + "loss": 2.7848, + "step": 3007 + }, + { + "epoch": 0.2427568396416754, + "grad_norm": 0.7465909123420715, + "learning_rate": 0.00018964592270066683, + "loss": 2.7271, + "step": 3008 + }, + { + "epoch": 0.2428375433782584, + "grad_norm": 0.7992933988571167, + "learning_rate": 0.00018963892599512913, + "loss": 2.7749, + "step": 3009 + }, + { + "epoch": 0.24291824711484142, + "grad_norm": 0.7879100441932678, + "learning_rate": 0.00018963192705555507, + "loss": 2.6844, + "step": 3010 + }, + { + "epoch": 0.24299895085142442, + "grad_norm": 0.7895401120185852, + "learning_rate": 0.00018962492588211905, + "loss": 2.725, + "step": 3011 + }, + { + "epoch": 0.24307965458800743, + "grad_norm": 0.7699374556541443, + "learning_rate": 0.00018961792247499564, + "loss": 2.7408, + "step": 3012 + }, + { + "epoch": 0.24316035832459043, + "grad_norm": 0.828372597694397, + "learning_rate": 0.0001896109168343593, + "loss": 2.7527, + "step": 3013 + }, + { + "epoch": 0.24324106206117344, + "grad_norm": 0.7611951231956482, + "learning_rate": 0.0001896039089603847, + "loss": 2.7294, + "step": 3014 + }, + { + "epoch": 0.24332176579775644, + "grad_norm": 0.8214892148971558, + "learning_rate": 0.00018959689885324646, + "loss": 2.6931, + "step": 3015 + }, + { + "epoch": 0.24340246953433944, + "grad_norm": 0.7472538352012634, + "learning_rate": 0.00018958988651311928, + "loss": 2.7316, + "step": 3016 + }, + { + "epoch": 0.24348317327092245, + "grad_norm": 0.7574933171272278, + "learning_rate": 0.00018958287194017795, + "loss": 2.7764, + "step": 3017 + }, + { + "epoch": 0.24356387700750545, + "grad_norm": 0.739152729511261, + "learning_rate": 0.00018957585513459723, + "loss": 2.7949, + "step": 3018 + }, + { + "epoch": 0.24364458074408846, + "grad_norm": 0.824097752571106, + "learning_rate": 0.00018956883609655208, + "loss": 2.6612, + "step": 3019 + }, + { + "epoch": 0.24372528448067146, + "grad_norm": 0.7891144156455994, + "learning_rate": 0.00018956181482621744, + "loss": 2.7139, + "step": 3020 + }, + { + "epoch": 0.24380598821725447, + "grad_norm": 0.7364415526390076, + "learning_rate": 0.0001895547913237682, + "loss": 2.6984, + "step": 3021 + }, + { + "epoch": 0.24388669195383747, + "grad_norm": 0.7631362080574036, + "learning_rate": 0.0001895477655893795, + "loss": 2.7015, + "step": 3022 + }, + { + "epoch": 0.24396739569042047, + "grad_norm": 0.780541181564331, + "learning_rate": 0.00018954073762322637, + "loss": 2.7716, + "step": 3023 + }, + { + "epoch": 0.24404809942700348, + "grad_norm": 0.7877349853515625, + "learning_rate": 0.00018953370742548403, + "loss": 2.6654, + "step": 3024 + }, + { + "epoch": 0.24412880316358648, + "grad_norm": 0.7786216139793396, + "learning_rate": 0.00018952667499632763, + "loss": 2.7491, + "step": 3025 + }, + { + "epoch": 0.2442095069001695, + "grad_norm": 0.8207663893699646, + "learning_rate": 0.00018951964033593247, + "loss": 2.7212, + "step": 3026 + }, + { + "epoch": 0.2442902106367525, + "grad_norm": 0.8271831274032593, + "learning_rate": 0.00018951260344447386, + "loss": 2.7456, + "step": 3027 + }, + { + "epoch": 0.2443709143733355, + "grad_norm": 0.7610505819320679, + "learning_rate": 0.00018950556432212722, + "loss": 2.7472, + "step": 3028 + }, + { + "epoch": 0.2444516181099185, + "grad_norm": 0.7521701455116272, + "learning_rate": 0.00018949852296906792, + "loss": 2.7263, + "step": 3029 + }, + { + "epoch": 0.2445323218465015, + "grad_norm": 0.7518337965011597, + "learning_rate": 0.00018949147938547144, + "loss": 2.7069, + "step": 3030 + }, + { + "epoch": 0.2446130255830845, + "grad_norm": 0.7823107838630676, + "learning_rate": 0.00018948443357151343, + "loss": 2.7858, + "step": 3031 + }, + { + "epoch": 0.2446937293196675, + "grad_norm": 0.733132004737854, + "learning_rate": 0.00018947738552736938, + "loss": 2.7194, + "step": 3032 + }, + { + "epoch": 0.24477443305625052, + "grad_norm": 0.7756488919258118, + "learning_rate": 0.00018947033525321501, + "loss": 2.7299, + "step": 3033 + }, + { + "epoch": 0.24485513679283352, + "grad_norm": 0.7971112728118896, + "learning_rate": 0.00018946328274922598, + "loss": 2.7474, + "step": 3034 + }, + { + "epoch": 0.2449358405294165, + "grad_norm": 0.7871260643005371, + "learning_rate": 0.0001894562280155781, + "loss": 2.6994, + "step": 3035 + }, + { + "epoch": 0.2450165442659995, + "grad_norm": 0.7431116104125977, + "learning_rate": 0.00018944917105244717, + "loss": 2.6834, + "step": 3036 + }, + { + "epoch": 0.2450972480025825, + "grad_norm": 0.7372273206710815, + "learning_rate": 0.00018944211186000906, + "loss": 2.6988, + "step": 3037 + }, + { + "epoch": 0.2451779517391655, + "grad_norm": 0.8161508440971375, + "learning_rate": 0.00018943505043843975, + "loss": 2.7595, + "step": 3038 + }, + { + "epoch": 0.24525865547574852, + "grad_norm": 0.8062586784362793, + "learning_rate": 0.00018942798678791518, + "loss": 2.6893, + "step": 3039 + }, + { + "epoch": 0.24533935921233152, + "grad_norm": 0.824023425579071, + "learning_rate": 0.0001894209209086114, + "loss": 2.7188, + "step": 3040 + }, + { + "epoch": 0.24542006294891452, + "grad_norm": 0.740466833114624, + "learning_rate": 0.00018941385280070455, + "loss": 2.674, + "step": 3041 + }, + { + "epoch": 0.24550076668549753, + "grad_norm": 0.8543577194213867, + "learning_rate": 0.00018940678246437073, + "loss": 2.7423, + "step": 3042 + }, + { + "epoch": 0.24558147042208053, + "grad_norm": 0.7059324979782104, + "learning_rate": 0.0001893997098997862, + "loss": 2.6669, + "step": 3043 + }, + { + "epoch": 0.24566217415866354, + "grad_norm": 0.7739956974983215, + "learning_rate": 0.00018939263510712721, + "loss": 2.7118, + "step": 3044 + }, + { + "epoch": 0.24574287789524654, + "grad_norm": 0.7701205611228943, + "learning_rate": 0.00018938555808657007, + "loss": 2.7653, + "step": 3045 + }, + { + "epoch": 0.24582358163182955, + "grad_norm": 0.7243000864982605, + "learning_rate": 0.00018937847883829115, + "loss": 2.6789, + "step": 3046 + }, + { + "epoch": 0.24590428536841255, + "grad_norm": 0.7645598649978638, + "learning_rate": 0.00018937139736246693, + "loss": 2.7108, + "step": 3047 + }, + { + "epoch": 0.24598498910499556, + "grad_norm": 0.7544745802879333, + "learning_rate": 0.00018936431365927385, + "loss": 2.6958, + "step": 3048 + }, + { + "epoch": 0.24606569284157856, + "grad_norm": 0.709282398223877, + "learning_rate": 0.00018935722772888848, + "loss": 2.6728, + "step": 3049 + }, + { + "epoch": 0.24614639657816156, + "grad_norm": 0.7524243593215942, + "learning_rate": 0.00018935013957148742, + "loss": 2.7283, + "step": 3050 + }, + { + "epoch": 0.24622710031474457, + "grad_norm": 0.7959655523300171, + "learning_rate": 0.0001893430491872473, + "loss": 2.7384, + "step": 3051 + }, + { + "epoch": 0.24630780405132757, + "grad_norm": 0.7252553105354309, + "learning_rate": 0.00018933595657634486, + "loss": 2.7226, + "step": 3052 + }, + { + "epoch": 0.24638850778791058, + "grad_norm": 0.7387316226959229, + "learning_rate": 0.00018932886173895686, + "loss": 2.7546, + "step": 3053 + }, + { + "epoch": 0.24646921152449358, + "grad_norm": 0.804856538772583, + "learning_rate": 0.0001893217646752601, + "loss": 2.7321, + "step": 3054 + }, + { + "epoch": 0.24654991526107659, + "grad_norm": 0.6929069757461548, + "learning_rate": 0.0001893146653854315, + "loss": 2.6735, + "step": 3055 + }, + { + "epoch": 0.2466306189976596, + "grad_norm": 0.7076159715652466, + "learning_rate": 0.00018930756386964794, + "loss": 2.7368, + "step": 3056 + }, + { + "epoch": 0.2467113227342426, + "grad_norm": 0.7522851228713989, + "learning_rate": 0.00018930046012808648, + "loss": 2.7448, + "step": 3057 + }, + { + "epoch": 0.2467920264708256, + "grad_norm": 0.8347200155258179, + "learning_rate": 0.00018929335416092408, + "loss": 2.6837, + "step": 3058 + }, + { + "epoch": 0.2468727302074086, + "grad_norm": 0.737503707408905, + "learning_rate": 0.00018928624596833786, + "loss": 2.693, + "step": 3059 + }, + { + "epoch": 0.2469534339439916, + "grad_norm": 0.7836787104606628, + "learning_rate": 0.00018927913555050503, + "loss": 2.7335, + "step": 3060 + }, + { + "epoch": 0.2470341376805746, + "grad_norm": 0.7823840975761414, + "learning_rate": 0.00018927202290760278, + "loss": 2.6736, + "step": 3061 + }, + { + "epoch": 0.24711484141715762, + "grad_norm": 0.7894529700279236, + "learning_rate": 0.00018926490803980833, + "loss": 2.7112, + "step": 3062 + }, + { + "epoch": 0.24719554515374062, + "grad_norm": 0.8289024829864502, + "learning_rate": 0.000189257790947299, + "loss": 2.7667, + "step": 3063 + }, + { + "epoch": 0.24727624889032362, + "grad_norm": 0.70560222864151, + "learning_rate": 0.00018925067163025227, + "loss": 2.6946, + "step": 3064 + }, + { + "epoch": 0.24735695262690663, + "grad_norm": 0.6954196095466614, + "learning_rate": 0.00018924355008884548, + "loss": 2.7237, + "step": 3065 + }, + { + "epoch": 0.24743765636348963, + "grad_norm": 0.7975121736526489, + "learning_rate": 0.0001892364263232561, + "loss": 2.6392, + "step": 3066 + }, + { + "epoch": 0.24751836010007264, + "grad_norm": 0.777350902557373, + "learning_rate": 0.00018922930033366174, + "loss": 2.7284, + "step": 3067 + }, + { + "epoch": 0.24759906383665564, + "grad_norm": 0.738240659236908, + "learning_rate": 0.00018922217212023995, + "loss": 2.6884, + "step": 3068 + }, + { + "epoch": 0.24767976757323865, + "grad_norm": 0.8077268600463867, + "learning_rate": 0.0001892150416831684, + "loss": 2.7205, + "step": 3069 + }, + { + "epoch": 0.24776047130982165, + "grad_norm": 0.8108188509941101, + "learning_rate": 0.00018920790902262483, + "loss": 2.7592, + "step": 3070 + }, + { + "epoch": 0.24784117504640465, + "grad_norm": 0.7842642664909363, + "learning_rate": 0.00018920077413878695, + "loss": 2.7474, + "step": 3071 + }, + { + "epoch": 0.24792187878298766, + "grad_norm": 0.7644543051719666, + "learning_rate": 0.0001891936370318326, + "loss": 2.7179, + "step": 3072 + }, + { + "epoch": 0.24800258251957066, + "grad_norm": 0.7761854529380798, + "learning_rate": 0.00018918649770193965, + "loss": 2.71, + "step": 3073 + }, + { + "epoch": 0.24808328625615367, + "grad_norm": 0.7724074125289917, + "learning_rate": 0.00018917935614928607, + "loss": 2.7359, + "step": 3074 + }, + { + "epoch": 0.24816398999273667, + "grad_norm": 0.7360609173774719, + "learning_rate": 0.0001891722123740498, + "loss": 2.7342, + "step": 3075 + }, + { + "epoch": 0.24824469372931968, + "grad_norm": 0.757561206817627, + "learning_rate": 0.00018916506637640894, + "loss": 2.7647, + "step": 3076 + }, + { + "epoch": 0.24832539746590268, + "grad_norm": 0.7180947065353394, + "learning_rate": 0.00018915791815654148, + "loss": 2.6771, + "step": 3077 + }, + { + "epoch": 0.24840610120248569, + "grad_norm": 0.7219653129577637, + "learning_rate": 0.0001891507677146257, + "loss": 2.7772, + "step": 3078 + }, + { + "epoch": 0.2484868049390687, + "grad_norm": 0.749113917350769, + "learning_rate": 0.0001891436150508397, + "loss": 2.6996, + "step": 3079 + }, + { + "epoch": 0.2485675086756517, + "grad_norm": 0.766180157661438, + "learning_rate": 0.00018913646016536183, + "loss": 2.7896, + "step": 3080 + }, + { + "epoch": 0.2486482124122347, + "grad_norm": 0.7672411799430847, + "learning_rate": 0.00018912930305837032, + "loss": 2.7307, + "step": 3081 + }, + { + "epoch": 0.2487289161488177, + "grad_norm": 0.7639018297195435, + "learning_rate": 0.00018912214373004364, + "loss": 2.6569, + "step": 3082 + }, + { + "epoch": 0.2488096198854007, + "grad_norm": 0.8935483694076538, + "learning_rate": 0.00018911498218056013, + "loss": 2.6897, + "step": 3083 + }, + { + "epoch": 0.2488903236219837, + "grad_norm": 0.8506368398666382, + "learning_rate": 0.00018910781841009836, + "loss": 2.778, + "step": 3084 + }, + { + "epoch": 0.24897102735856672, + "grad_norm": 0.8026999235153198, + "learning_rate": 0.0001891006524188368, + "loss": 2.7799, + "step": 3085 + }, + { + "epoch": 0.2490517310951497, + "grad_norm": 0.784637987613678, + "learning_rate": 0.00018909348420695406, + "loss": 2.673, + "step": 3086 + }, + { + "epoch": 0.2491324348317327, + "grad_norm": 0.8949337601661682, + "learning_rate": 0.00018908631377462882, + "loss": 2.7726, + "step": 3087 + }, + { + "epoch": 0.2492131385683157, + "grad_norm": 0.73841792345047, + "learning_rate": 0.00018907914112203974, + "loss": 2.7403, + "step": 3088 + }, + { + "epoch": 0.2492938423048987, + "grad_norm": 0.7305924296379089, + "learning_rate": 0.00018907196624936564, + "loss": 2.6713, + "step": 3089 + }, + { + "epoch": 0.2493745460414817, + "grad_norm": 0.7707394361495972, + "learning_rate": 0.0001890647891567853, + "loss": 2.7306, + "step": 3090 + }, + { + "epoch": 0.2494552497780647, + "grad_norm": 0.8691473603248596, + "learning_rate": 0.00018905760984447759, + "loss": 2.6775, + "step": 3091 + }, + { + "epoch": 0.24953595351464772, + "grad_norm": 0.7466028332710266, + "learning_rate": 0.00018905042831262144, + "loss": 2.7196, + "step": 3092 + }, + { + "epoch": 0.24961665725123072, + "grad_norm": 0.7785150408744812, + "learning_rate": 0.0001890432445613958, + "loss": 2.7099, + "step": 3093 + }, + { + "epoch": 0.24969736098781373, + "grad_norm": 0.7775028347969055, + "learning_rate": 0.0001890360585909798, + "loss": 2.698, + "step": 3094 + }, + { + "epoch": 0.24977806472439673, + "grad_norm": 0.829257071018219, + "learning_rate": 0.00018902887040155245, + "loss": 2.711, + "step": 3095 + }, + { + "epoch": 0.24985876846097974, + "grad_norm": 0.8492234945297241, + "learning_rate": 0.00018902167999329295, + "loss": 2.7164, + "step": 3096 + }, + { + "epoch": 0.24993947219756274, + "grad_norm": 0.7332174777984619, + "learning_rate": 0.00018901448736638045, + "loss": 2.6925, + "step": 3097 + }, + { + "epoch": 0.25002017593414577, + "grad_norm": 0.7494251728057861, + "learning_rate": 0.00018900729252099426, + "loss": 2.6899, + "step": 3098 + }, + { + "epoch": 0.25010087967072875, + "grad_norm": 0.7760747075080872, + "learning_rate": 0.00018900009545731367, + "loss": 2.6626, + "step": 3099 + }, + { + "epoch": 0.2501815834073118, + "grad_norm": 0.7270001173019409, + "learning_rate": 0.00018899289617551804, + "loss": 2.7338, + "step": 3100 + }, + { + "epoch": 0.25026228714389476, + "grad_norm": 0.7832693457603455, + "learning_rate": 0.0001889856946757868, + "loss": 2.6668, + "step": 3101 + }, + { + "epoch": 0.2503429908804778, + "grad_norm": 0.8833239674568176, + "learning_rate": 0.00018897849095829945, + "loss": 2.7219, + "step": 3102 + }, + { + "epoch": 0.25042369461706077, + "grad_norm": 0.8144814372062683, + "learning_rate": 0.0001889712850232355, + "loss": 2.724, + "step": 3103 + }, + { + "epoch": 0.2505043983536438, + "grad_norm": 0.9466180801391602, + "learning_rate": 0.0001889640768707746, + "loss": 2.7499, + "step": 3104 + }, + { + "epoch": 0.2505851020902268, + "grad_norm": 0.926292359828949, + "learning_rate": 0.00018895686650109632, + "loss": 2.7391, + "step": 3105 + }, + { + "epoch": 0.2506658058268098, + "grad_norm": 0.8214002251625061, + "learning_rate": 0.00018894965391438038, + "loss": 2.7546, + "step": 3106 + }, + { + "epoch": 0.2507465095633928, + "grad_norm": 0.9021030068397522, + "learning_rate": 0.00018894243911080655, + "loss": 2.7188, + "step": 3107 + }, + { + "epoch": 0.2508272132999758, + "grad_norm": 0.778366208076477, + "learning_rate": 0.00018893522209055465, + "loss": 2.7852, + "step": 3108 + }, + { + "epoch": 0.2509079170365588, + "grad_norm": 0.8780209422111511, + "learning_rate": 0.00018892800285380456, + "loss": 2.7344, + "step": 3109 + }, + { + "epoch": 0.2509886207731418, + "grad_norm": 0.7581839561462402, + "learning_rate": 0.00018892078140073614, + "loss": 2.6697, + "step": 3110 + }, + { + "epoch": 0.2510693245097248, + "grad_norm": 0.7818635702133179, + "learning_rate": 0.00018891355773152944, + "loss": 2.6969, + "step": 3111 + }, + { + "epoch": 0.2511500282463078, + "grad_norm": 0.7528424859046936, + "learning_rate": 0.0001889063318463644, + "loss": 2.7359, + "step": 3112 + }, + { + "epoch": 0.2512307319828908, + "grad_norm": 0.8274288773536682, + "learning_rate": 0.0001888991037454212, + "loss": 2.7124, + "step": 3113 + }, + { + "epoch": 0.2513114357194738, + "grad_norm": 0.7186813354492188, + "learning_rate": 0.00018889187342888, + "loss": 2.7037, + "step": 3114 + }, + { + "epoch": 0.2513921394560568, + "grad_norm": 0.7458071112632751, + "learning_rate": 0.00018888464089692088, + "loss": 2.7178, + "step": 3115 + }, + { + "epoch": 0.2514728431926398, + "grad_norm": 0.7814257740974426, + "learning_rate": 0.00018887740614972418, + "loss": 2.7554, + "step": 3116 + }, + { + "epoch": 0.2515535469292228, + "grad_norm": 0.7706831097602844, + "learning_rate": 0.0001888701691874702, + "loss": 2.7441, + "step": 3117 + }, + { + "epoch": 0.2516342506658058, + "grad_norm": 0.8177775740623474, + "learning_rate": 0.0001888629300103393, + "loss": 2.7257, + "step": 3118 + }, + { + "epoch": 0.25171495440238884, + "grad_norm": 0.791097104549408, + "learning_rate": 0.00018885568861851188, + "loss": 2.6937, + "step": 3119 + }, + { + "epoch": 0.2517956581389718, + "grad_norm": 0.7521430850028992, + "learning_rate": 0.00018884844501216845, + "loss": 2.7723, + "step": 3120 + }, + { + "epoch": 0.25187636187555484, + "grad_norm": 0.8119359016418457, + "learning_rate": 0.00018884119919148948, + "loss": 2.7573, + "step": 3121 + }, + { + "epoch": 0.2519570656121378, + "grad_norm": 0.7579830288887024, + "learning_rate": 0.00018883395115665562, + "loss": 2.6943, + "step": 3122 + }, + { + "epoch": 0.25203776934872085, + "grad_norm": 0.7718791365623474, + "learning_rate": 0.00018882670090784748, + "loss": 2.6911, + "step": 3123 + }, + { + "epoch": 0.25211847308530383, + "grad_norm": 0.7718087434768677, + "learning_rate": 0.00018881944844524576, + "loss": 2.7505, + "step": 3124 + }, + { + "epoch": 0.25219917682188686, + "grad_norm": 0.7696875333786011, + "learning_rate": 0.0001888121937690312, + "loss": 2.7272, + "step": 3125 + }, + { + "epoch": 0.25227988055846984, + "grad_norm": 0.8082131743431091, + "learning_rate": 0.00018880493687938464, + "loss": 2.6677, + "step": 3126 + }, + { + "epoch": 0.25236058429505287, + "grad_norm": 0.857224702835083, + "learning_rate": 0.00018879767777648686, + "loss": 2.7237, + "step": 3127 + }, + { + "epoch": 0.25244128803163585, + "grad_norm": 0.8135749697685242, + "learning_rate": 0.00018879041646051886, + "loss": 2.7298, + "step": 3128 + }, + { + "epoch": 0.2525219917682189, + "grad_norm": 0.7772457003593445, + "learning_rate": 0.0001887831529316616, + "loss": 2.7723, + "step": 3129 + }, + { + "epoch": 0.25260269550480186, + "grad_norm": 0.795555055141449, + "learning_rate": 0.00018877588719009607, + "loss": 2.7207, + "step": 3130 + }, + { + "epoch": 0.2526833992413849, + "grad_norm": 0.7677939534187317, + "learning_rate": 0.00018876861923600337, + "loss": 2.6649, + "step": 3131 + }, + { + "epoch": 0.25276410297796786, + "grad_norm": 0.7706151008605957, + "learning_rate": 0.00018876134906956464, + "loss": 2.7154, + "step": 3132 + }, + { + "epoch": 0.2528448067145509, + "grad_norm": 0.8230584859848022, + "learning_rate": 0.00018875407669096105, + "loss": 2.7871, + "step": 3133 + }, + { + "epoch": 0.2529255104511339, + "grad_norm": 0.7037158608436584, + "learning_rate": 0.0001887468021003739, + "loss": 2.669, + "step": 3134 + }, + { + "epoch": 0.2530062141877169, + "grad_norm": 0.8485400080680847, + "learning_rate": 0.00018873952529798441, + "loss": 2.7517, + "step": 3135 + }, + { + "epoch": 0.2530869179242999, + "grad_norm": 0.7803399562835693, + "learning_rate": 0.000188732246283974, + "loss": 2.6987, + "step": 3136 + }, + { + "epoch": 0.2531676216608829, + "grad_norm": 0.7884016633033752, + "learning_rate": 0.0001887249650585241, + "loss": 2.7348, + "step": 3137 + }, + { + "epoch": 0.2532483253974659, + "grad_norm": 0.7794530987739563, + "learning_rate": 0.0001887176816218161, + "loss": 2.6934, + "step": 3138 + }, + { + "epoch": 0.2533290291340489, + "grad_norm": 0.7905173301696777, + "learning_rate": 0.00018871039597403156, + "loss": 2.714, + "step": 3139 + }, + { + "epoch": 0.2534097328706319, + "grad_norm": 0.7857949137687683, + "learning_rate": 0.0001887031081153521, + "loss": 2.7591, + "step": 3140 + }, + { + "epoch": 0.25349043660721493, + "grad_norm": 0.8602419495582581, + "learning_rate": 0.00018869581804595927, + "loss": 2.7819, + "step": 3141 + }, + { + "epoch": 0.2535711403437979, + "grad_norm": 0.7845202088356018, + "learning_rate": 0.00018868852576603483, + "loss": 2.6796, + "step": 3142 + }, + { + "epoch": 0.25365184408038094, + "grad_norm": 0.7600612640380859, + "learning_rate": 0.00018868123127576048, + "loss": 2.6785, + "step": 3143 + }, + { + "epoch": 0.2537325478169639, + "grad_norm": 0.7731521725654602, + "learning_rate": 0.000188673934575318, + "loss": 2.7435, + "step": 3144 + }, + { + "epoch": 0.25381325155354695, + "grad_norm": 0.8214225172996521, + "learning_rate": 0.0001886666356648893, + "loss": 2.7264, + "step": 3145 + }, + { + "epoch": 0.2538939552901299, + "grad_norm": 0.7623010277748108, + "learning_rate": 0.00018865933454465628, + "loss": 2.73, + "step": 3146 + }, + { + "epoch": 0.25397465902671296, + "grad_norm": 0.7864633798599243, + "learning_rate": 0.00018865203121480088, + "loss": 2.7654, + "step": 3147 + }, + { + "epoch": 0.25405536276329593, + "grad_norm": 0.7654051780700684, + "learning_rate": 0.0001886447256755051, + "loss": 2.7171, + "step": 3148 + }, + { + "epoch": 0.25413606649987897, + "grad_norm": 0.8045486211776733, + "learning_rate": 0.0001886374179269511, + "loss": 2.7385, + "step": 3149 + }, + { + "epoch": 0.25421677023646194, + "grad_norm": 0.8504971861839294, + "learning_rate": 0.0001886301079693209, + "loss": 2.6719, + "step": 3150 + }, + { + "epoch": 0.254297473973045, + "grad_norm": 0.771538496017456, + "learning_rate": 0.0001886227958027967, + "loss": 2.6707, + "step": 3151 + }, + { + "epoch": 0.25437817770962795, + "grad_norm": 0.8472220301628113, + "learning_rate": 0.0001886154814275608, + "loss": 2.7201, + "step": 3152 + }, + { + "epoch": 0.254458881446211, + "grad_norm": 0.7639158368110657, + "learning_rate": 0.00018860816484379545, + "loss": 2.76, + "step": 3153 + }, + { + "epoch": 0.25453958518279396, + "grad_norm": 0.8042064905166626, + "learning_rate": 0.000188600846051683, + "loss": 2.6862, + "step": 3154 + }, + { + "epoch": 0.254620288919377, + "grad_norm": 0.7481087446212769, + "learning_rate": 0.0001885935250514059, + "loss": 2.7394, + "step": 3155 + }, + { + "epoch": 0.25470099265595997, + "grad_norm": 0.7826097011566162, + "learning_rate": 0.00018858620184314653, + "loss": 2.596, + "step": 3156 + }, + { + "epoch": 0.254781696392543, + "grad_norm": 0.7477610111236572, + "learning_rate": 0.00018857887642708743, + "loss": 2.7385, + "step": 3157 + }, + { + "epoch": 0.254862400129126, + "grad_norm": 0.7347466945648193, + "learning_rate": 0.00018857154880341122, + "loss": 2.722, + "step": 3158 + }, + { + "epoch": 0.254943103865709, + "grad_norm": 0.7853806018829346, + "learning_rate": 0.00018856421897230048, + "loss": 2.7675, + "step": 3159 + }, + { + "epoch": 0.255023807602292, + "grad_norm": 0.7497034072875977, + "learning_rate": 0.0001885568869339379, + "loss": 2.6882, + "step": 3160 + }, + { + "epoch": 0.255104511338875, + "grad_norm": 0.7932263612747192, + "learning_rate": 0.0001885495526885062, + "loss": 2.7938, + "step": 3161 + }, + { + "epoch": 0.255185215075458, + "grad_norm": 0.7776823043823242, + "learning_rate": 0.00018854221623618815, + "loss": 2.6955, + "step": 3162 + }, + { + "epoch": 0.25526591881204097, + "grad_norm": 0.7564878463745117, + "learning_rate": 0.00018853487757716666, + "loss": 2.7644, + "step": 3163 + }, + { + "epoch": 0.255346622548624, + "grad_norm": 0.836270809173584, + "learning_rate": 0.00018852753671162454, + "loss": 2.7119, + "step": 3164 + }, + { + "epoch": 0.255427326285207, + "grad_norm": 0.7540388703346252, + "learning_rate": 0.00018852019363974485, + "loss": 2.797, + "step": 3165 + }, + { + "epoch": 0.25550803002179, + "grad_norm": 0.7943860292434692, + "learning_rate": 0.0001885128483617105, + "loss": 2.7973, + "step": 3166 + }, + { + "epoch": 0.255588733758373, + "grad_norm": 0.7743831276893616, + "learning_rate": 0.00018850550087770463, + "loss": 2.7403, + "step": 3167 + }, + { + "epoch": 0.255669437494956, + "grad_norm": 0.7593801021575928, + "learning_rate": 0.00018849815118791028, + "loss": 2.7203, + "step": 3168 + }, + { + "epoch": 0.255750141231539, + "grad_norm": 0.7663586139678955, + "learning_rate": 0.00018849079929251068, + "loss": 2.7481, + "step": 3169 + }, + { + "epoch": 0.25583084496812203, + "grad_norm": 0.7218170166015625, + "learning_rate": 0.00018848344519168905, + "loss": 2.6698, + "step": 3170 + }, + { + "epoch": 0.255911548704705, + "grad_norm": 0.8374441266059875, + "learning_rate": 0.00018847608888562868, + "loss": 2.8121, + "step": 3171 + }, + { + "epoch": 0.25599225244128804, + "grad_norm": 0.7488373517990112, + "learning_rate": 0.00018846873037451286, + "loss": 2.6871, + "step": 3172 + }, + { + "epoch": 0.256072956177871, + "grad_norm": 0.7513325810432434, + "learning_rate": 0.00018846136965852505, + "loss": 2.6924, + "step": 3173 + }, + { + "epoch": 0.25615365991445405, + "grad_norm": 0.7467690706253052, + "learning_rate": 0.00018845400673784865, + "loss": 2.714, + "step": 3174 + }, + { + "epoch": 0.256234363651037, + "grad_norm": 0.7717954516410828, + "learning_rate": 0.0001884466416126672, + "loss": 2.6679, + "step": 3175 + }, + { + "epoch": 0.25631506738762005, + "grad_norm": 0.7086547613143921, + "learning_rate": 0.0001884392742831642, + "loss": 2.7046, + "step": 3176 + }, + { + "epoch": 0.25639577112420303, + "grad_norm": 0.7024885416030884, + "learning_rate": 0.00018843190474952337, + "loss": 2.6724, + "step": 3177 + }, + { + "epoch": 0.25647647486078606, + "grad_norm": 0.8376390933990479, + "learning_rate": 0.00018842453301192827, + "loss": 2.7818, + "step": 3178 + }, + { + "epoch": 0.25655717859736904, + "grad_norm": 0.8190221190452576, + "learning_rate": 0.00018841715907056265, + "loss": 2.7455, + "step": 3179 + }, + { + "epoch": 0.25663788233395207, + "grad_norm": 0.8029047846794128, + "learning_rate": 0.0001884097829256103, + "loss": 2.7102, + "step": 3180 + }, + { + "epoch": 0.25671858607053505, + "grad_norm": 0.7467923760414124, + "learning_rate": 0.00018840240457725508, + "loss": 2.7051, + "step": 3181 + }, + { + "epoch": 0.2567992898071181, + "grad_norm": 0.7850394248962402, + "learning_rate": 0.00018839502402568086, + "loss": 2.6826, + "step": 3182 + }, + { + "epoch": 0.25687999354370106, + "grad_norm": 0.7144927978515625, + "learning_rate": 0.00018838764127107155, + "loss": 2.6694, + "step": 3183 + }, + { + "epoch": 0.2569606972802841, + "grad_norm": 0.7580311894416809, + "learning_rate": 0.0001883802563136112, + "loss": 2.7191, + "step": 3184 + }, + { + "epoch": 0.25704140101686707, + "grad_norm": 0.7366482615470886, + "learning_rate": 0.0001883728691534838, + "loss": 2.7175, + "step": 3185 + }, + { + "epoch": 0.2571221047534501, + "grad_norm": 0.6961715817451477, + "learning_rate": 0.0001883654797908735, + "loss": 2.7705, + "step": 3186 + }, + { + "epoch": 0.2572028084900331, + "grad_norm": 0.7473716735839844, + "learning_rate": 0.00018835808822596445, + "loss": 2.707, + "step": 3187 + }, + { + "epoch": 0.2572835122266161, + "grad_norm": 0.8376151919364929, + "learning_rate": 0.00018835069445894087, + "loss": 2.7424, + "step": 3188 + }, + { + "epoch": 0.2573642159631991, + "grad_norm": 0.7950237393379211, + "learning_rate": 0.00018834329848998706, + "loss": 2.7593, + "step": 3189 + }, + { + "epoch": 0.2574449196997821, + "grad_norm": 0.7637122869491577, + "learning_rate": 0.0001883359003192873, + "loss": 2.6708, + "step": 3190 + }, + { + "epoch": 0.2575256234363651, + "grad_norm": 0.709516704082489, + "learning_rate": 0.00018832849994702597, + "loss": 2.6988, + "step": 3191 + }, + { + "epoch": 0.2576063271729481, + "grad_norm": 0.7465435266494751, + "learning_rate": 0.00018832109737338757, + "loss": 2.7183, + "step": 3192 + }, + { + "epoch": 0.2576870309095311, + "grad_norm": 0.7619186043739319, + "learning_rate": 0.00018831369259855653, + "loss": 2.6833, + "step": 3193 + }, + { + "epoch": 0.25776773464611413, + "grad_norm": 0.7501961588859558, + "learning_rate": 0.0001883062856227174, + "loss": 2.725, + "step": 3194 + }, + { + "epoch": 0.2578484383826971, + "grad_norm": 0.7720133066177368, + "learning_rate": 0.00018829887644605483, + "loss": 2.7988, + "step": 3195 + }, + { + "epoch": 0.25792914211928014, + "grad_norm": 0.7253942489624023, + "learning_rate": 0.00018829146506875344, + "loss": 2.6999, + "step": 3196 + }, + { + "epoch": 0.2580098458558631, + "grad_norm": 0.7759599685668945, + "learning_rate": 0.00018828405149099792, + "loss": 2.6831, + "step": 3197 + }, + { + "epoch": 0.25809054959244615, + "grad_norm": 0.7250547409057617, + "learning_rate": 0.0001882766357129731, + "loss": 2.6742, + "step": 3198 + }, + { + "epoch": 0.2581712533290291, + "grad_norm": 0.7565183043479919, + "learning_rate": 0.00018826921773486372, + "loss": 2.6777, + "step": 3199 + }, + { + "epoch": 0.25825195706561216, + "grad_norm": 0.7183675169944763, + "learning_rate": 0.0001882617975568547, + "loss": 2.6743, + "step": 3200 + }, + { + "epoch": 0.25833266080219514, + "grad_norm": 0.7021663784980774, + "learning_rate": 0.00018825437517913098, + "loss": 2.727, + "step": 3201 + }, + { + "epoch": 0.25841336453877817, + "grad_norm": 0.7406932711601257, + "learning_rate": 0.00018824695060187753, + "loss": 2.7448, + "step": 3202 + }, + { + "epoch": 0.25849406827536114, + "grad_norm": 0.7766773104667664, + "learning_rate": 0.0001882395238252794, + "loss": 2.69, + "step": 3203 + }, + { + "epoch": 0.2585747720119442, + "grad_norm": 0.7483372688293457, + "learning_rate": 0.00018823209484952164, + "loss": 2.6611, + "step": 3204 + }, + { + "epoch": 0.25865547574852715, + "grad_norm": 0.781831681728363, + "learning_rate": 0.0001882246636747895, + "loss": 2.7292, + "step": 3205 + }, + { + "epoch": 0.2587361794851102, + "grad_norm": 0.7188203930854797, + "learning_rate": 0.00018821723030126806, + "loss": 2.718, + "step": 3206 + }, + { + "epoch": 0.25881688322169316, + "grad_norm": 0.7332054972648621, + "learning_rate": 0.00018820979472914263, + "loss": 2.6492, + "step": 3207 + }, + { + "epoch": 0.2588975869582762, + "grad_norm": 0.7044041156768799, + "learning_rate": 0.00018820235695859858, + "loss": 2.7047, + "step": 3208 + }, + { + "epoch": 0.25897829069485917, + "grad_norm": 0.8651862740516663, + "learning_rate": 0.00018819491698982121, + "loss": 2.6301, + "step": 3209 + }, + { + "epoch": 0.2590589944314422, + "grad_norm": 0.8118106126785278, + "learning_rate": 0.00018818747482299598, + "loss": 2.6522, + "step": 3210 + }, + { + "epoch": 0.2591396981680252, + "grad_norm": 0.7239218354225159, + "learning_rate": 0.00018818003045830832, + "loss": 2.7058, + "step": 3211 + }, + { + "epoch": 0.2592204019046082, + "grad_norm": 0.8557687997817993, + "learning_rate": 0.00018817258389594382, + "loss": 2.7125, + "step": 3212 + }, + { + "epoch": 0.2593011056411912, + "grad_norm": 0.7685148119926453, + "learning_rate": 0.00018816513513608801, + "loss": 2.7516, + "step": 3213 + }, + { + "epoch": 0.25938180937777416, + "grad_norm": 0.7497698664665222, + "learning_rate": 0.00018815768417892664, + "loss": 2.6536, + "step": 3214 + }, + { + "epoch": 0.2594625131143572, + "grad_norm": 0.7041923403739929, + "learning_rate": 0.0001881502310246453, + "loss": 2.7031, + "step": 3215 + }, + { + "epoch": 0.2595432168509402, + "grad_norm": 0.7815428376197815, + "learning_rate": 0.00018814277567342976, + "loss": 2.7291, + "step": 3216 + }, + { + "epoch": 0.2596239205875232, + "grad_norm": 0.7285065650939941, + "learning_rate": 0.00018813531812546583, + "loss": 2.7712, + "step": 3217 + }, + { + "epoch": 0.2597046243241062, + "grad_norm": 0.7606547474861145, + "learning_rate": 0.0001881278583809394, + "loss": 2.6714, + "step": 3218 + }, + { + "epoch": 0.2597853280606892, + "grad_norm": 0.7166680097579956, + "learning_rate": 0.00018812039644003638, + "loss": 2.7147, + "step": 3219 + }, + { + "epoch": 0.2598660317972722, + "grad_norm": 0.8977978229522705, + "learning_rate": 0.0001881129323029427, + "loss": 2.7743, + "step": 3220 + }, + { + "epoch": 0.2599467355338552, + "grad_norm": 0.7447277307510376, + "learning_rate": 0.00018810546596984446, + "loss": 2.7049, + "step": 3221 + }, + { + "epoch": 0.2600274392704382, + "grad_norm": 0.7343515157699585, + "learning_rate": 0.00018809799744092768, + "loss": 2.6999, + "step": 3222 + }, + { + "epoch": 0.26010814300702123, + "grad_norm": 0.7303341627120972, + "learning_rate": 0.00018809052671637852, + "loss": 2.7222, + "step": 3223 + }, + { + "epoch": 0.2601888467436042, + "grad_norm": 0.7412950396537781, + "learning_rate": 0.00018808305379638314, + "loss": 2.6957, + "step": 3224 + }, + { + "epoch": 0.26026955048018724, + "grad_norm": 0.7495343089103699, + "learning_rate": 0.00018807557868112781, + "loss": 2.7123, + "step": 3225 + }, + { + "epoch": 0.2603502542167702, + "grad_norm": 0.8137524724006653, + "learning_rate": 0.00018806810137079886, + "loss": 2.7191, + "step": 3226 + }, + { + "epoch": 0.26043095795335325, + "grad_norm": 0.786374568939209, + "learning_rate": 0.0001880606218655826, + "loss": 2.7237, + "step": 3227 + }, + { + "epoch": 0.2605116616899362, + "grad_norm": 0.9969484806060791, + "learning_rate": 0.00018805314016566543, + "loss": 2.7603, + "step": 3228 + }, + { + "epoch": 0.26059236542651926, + "grad_norm": 0.8132432103157043, + "learning_rate": 0.00018804565627123386, + "loss": 2.6807, + "step": 3229 + }, + { + "epoch": 0.26067306916310223, + "grad_norm": 0.7604904174804688, + "learning_rate": 0.00018803817018247436, + "loss": 2.7105, + "step": 3230 + }, + { + "epoch": 0.26075377289968527, + "grad_norm": 0.743505597114563, + "learning_rate": 0.00018803068189957354, + "loss": 2.7152, + "step": 3231 + }, + { + "epoch": 0.26083447663626824, + "grad_norm": 0.7780006527900696, + "learning_rate": 0.000188023191422718, + "loss": 2.7043, + "step": 3232 + }, + { + "epoch": 0.2609151803728513, + "grad_norm": 0.7683089375495911, + "learning_rate": 0.00018801569875209447, + "loss": 2.7033, + "step": 3233 + }, + { + "epoch": 0.26099588410943425, + "grad_norm": 0.7540118098258972, + "learning_rate": 0.0001880082038878896, + "loss": 2.7121, + "step": 3234 + }, + { + "epoch": 0.2610765878460173, + "grad_norm": 0.7509592771530151, + "learning_rate": 0.00018800070683029025, + "loss": 2.6575, + "step": 3235 + }, + { + "epoch": 0.26115729158260026, + "grad_norm": 0.8015461564064026, + "learning_rate": 0.00018799320757948327, + "loss": 2.6956, + "step": 3236 + }, + { + "epoch": 0.2612379953191833, + "grad_norm": 0.7586383819580078, + "learning_rate": 0.00018798570613565553, + "loss": 2.6719, + "step": 3237 + }, + { + "epoch": 0.26131869905576627, + "grad_norm": 0.7833155989646912, + "learning_rate": 0.000187978202498994, + "loss": 2.7317, + "step": 3238 + }, + { + "epoch": 0.2613994027923493, + "grad_norm": 0.7976018786430359, + "learning_rate": 0.00018797069666968565, + "loss": 2.7514, + "step": 3239 + }, + { + "epoch": 0.2614801065289323, + "grad_norm": 0.8388968706130981, + "learning_rate": 0.00018796318864791763, + "loss": 2.6845, + "step": 3240 + }, + { + "epoch": 0.2615608102655153, + "grad_norm": 0.8082842230796814, + "learning_rate": 0.00018795567843387701, + "loss": 2.7204, + "step": 3241 + }, + { + "epoch": 0.2616415140020983, + "grad_norm": 0.7514800429344177, + "learning_rate": 0.00018794816602775094, + "loss": 2.7117, + "step": 3242 + }, + { + "epoch": 0.2617222177386813, + "grad_norm": 0.8676564693450928, + "learning_rate": 0.00018794065142972664, + "loss": 2.6596, + "step": 3243 + }, + { + "epoch": 0.2618029214752643, + "grad_norm": 0.7449865341186523, + "learning_rate": 0.0001879331346399915, + "loss": 2.7089, + "step": 3244 + }, + { + "epoch": 0.2618836252118473, + "grad_norm": 0.8020811676979065, + "learning_rate": 0.00018792561565873274, + "loss": 2.7293, + "step": 3245 + }, + { + "epoch": 0.2619643289484303, + "grad_norm": 0.7961642146110535, + "learning_rate": 0.00018791809448613783, + "loss": 2.7269, + "step": 3246 + }, + { + "epoch": 0.26204503268501333, + "grad_norm": 0.7842351198196411, + "learning_rate": 0.00018791057112239415, + "loss": 2.6773, + "step": 3247 + }, + { + "epoch": 0.2621257364215963, + "grad_norm": 0.7494246959686279, + "learning_rate": 0.00018790304556768925, + "loss": 2.7317, + "step": 3248 + }, + { + "epoch": 0.26220644015817934, + "grad_norm": 0.7822836637496948, + "learning_rate": 0.0001878955178222107, + "loss": 2.6834, + "step": 3249 + }, + { + "epoch": 0.2622871438947623, + "grad_norm": 0.8432494401931763, + "learning_rate": 0.00018788798788614607, + "loss": 2.7048, + "step": 3250 + }, + { + "epoch": 0.26236784763134535, + "grad_norm": 0.9599446058273315, + "learning_rate": 0.000187880455759683, + "loss": 2.7793, + "step": 3251 + }, + { + "epoch": 0.26244855136792833, + "grad_norm": 0.8097226023674011, + "learning_rate": 0.00018787292144300928, + "loss": 2.7177, + "step": 3252 + }, + { + "epoch": 0.26252925510451136, + "grad_norm": 0.8423499464988708, + "learning_rate": 0.00018786538493631265, + "loss": 2.7265, + "step": 3253 + }, + { + "epoch": 0.26260995884109434, + "grad_norm": 0.7388847470283508, + "learning_rate": 0.00018785784623978095, + "loss": 2.6778, + "step": 3254 + }, + { + "epoch": 0.26269066257767737, + "grad_norm": 0.766368567943573, + "learning_rate": 0.0001878503053536021, + "loss": 2.654, + "step": 3255 + }, + { + "epoch": 0.26277136631426035, + "grad_norm": 0.8181266188621521, + "learning_rate": 0.00018784276227796394, + "loss": 2.7568, + "step": 3256 + }, + { + "epoch": 0.2628520700508434, + "grad_norm": 0.8235312104225159, + "learning_rate": 0.00018783521701305452, + "loss": 2.7317, + "step": 3257 + }, + { + "epoch": 0.26293277378742635, + "grad_norm": 0.7103183269500732, + "learning_rate": 0.00018782766955906195, + "loss": 2.6919, + "step": 3258 + }, + { + "epoch": 0.2630134775240094, + "grad_norm": 0.7202538251876831, + "learning_rate": 0.0001878201199161742, + "loss": 2.7179, + "step": 3259 + }, + { + "epoch": 0.26309418126059236, + "grad_norm": 0.8402286171913147, + "learning_rate": 0.00018781256808457952, + "loss": 2.7789, + "step": 3260 + }, + { + "epoch": 0.2631748849971754, + "grad_norm": 0.8136829137802124, + "learning_rate": 0.00018780501406446613, + "loss": 2.6872, + "step": 3261 + }, + { + "epoch": 0.26325558873375837, + "grad_norm": 0.8017000555992126, + "learning_rate": 0.00018779745785602224, + "loss": 2.7527, + "step": 3262 + }, + { + "epoch": 0.2633362924703414, + "grad_norm": 0.7880774140357971, + "learning_rate": 0.00018778989945943619, + "loss": 2.7348, + "step": 3263 + }, + { + "epoch": 0.2634169962069244, + "grad_norm": 0.7402438521385193, + "learning_rate": 0.00018778233887489635, + "loss": 2.6946, + "step": 3264 + }, + { + "epoch": 0.26349769994350736, + "grad_norm": 0.7450907230377197, + "learning_rate": 0.0001877747761025912, + "loss": 2.7502, + "step": 3265 + }, + { + "epoch": 0.2635784036800904, + "grad_norm": 0.7504056692123413, + "learning_rate": 0.00018776721114270917, + "loss": 2.832, + "step": 3266 + }, + { + "epoch": 0.26365910741667337, + "grad_norm": 0.7710226774215698, + "learning_rate": 0.00018775964399543878, + "loss": 2.6895, + "step": 3267 + }, + { + "epoch": 0.2637398111532564, + "grad_norm": 0.769927978515625, + "learning_rate": 0.00018775207466096867, + "loss": 2.6801, + "step": 3268 + }, + { + "epoch": 0.2638205148898394, + "grad_norm": 0.7210869193077087, + "learning_rate": 0.0001877445031394875, + "loss": 2.6966, + "step": 3269 + }, + { + "epoch": 0.2639012186264224, + "grad_norm": 0.7731119990348816, + "learning_rate": 0.00018773692943118393, + "loss": 2.6965, + "step": 3270 + }, + { + "epoch": 0.2639819223630054, + "grad_norm": 0.7539728283882141, + "learning_rate": 0.00018772935353624672, + "loss": 2.753, + "step": 3271 + }, + { + "epoch": 0.2640626260995884, + "grad_norm": 0.7993821501731873, + "learning_rate": 0.00018772177545486472, + "loss": 2.7177, + "step": 3272 + }, + { + "epoch": 0.2641433298361714, + "grad_norm": 0.7880005240440369, + "learning_rate": 0.00018771419518722672, + "loss": 2.6854, + "step": 3273 + }, + { + "epoch": 0.2642240335727544, + "grad_norm": 0.8079188466072083, + "learning_rate": 0.0001877066127335217, + "loss": 2.734, + "step": 3274 + }, + { + "epoch": 0.2643047373093374, + "grad_norm": 0.8241428732872009, + "learning_rate": 0.00018769902809393865, + "loss": 2.7156, + "step": 3275 + }, + { + "epoch": 0.26438544104592043, + "grad_norm": 0.8007158041000366, + "learning_rate": 0.00018769144126866657, + "loss": 2.693, + "step": 3276 + }, + { + "epoch": 0.2644661447825034, + "grad_norm": 0.8360451459884644, + "learning_rate": 0.00018768385225789456, + "loss": 2.6919, + "step": 3277 + }, + { + "epoch": 0.26454684851908644, + "grad_norm": 0.7596627473831177, + "learning_rate": 0.00018767626106181172, + "loss": 2.7861, + "step": 3278 + }, + { + "epoch": 0.2646275522556694, + "grad_norm": 0.7469248175621033, + "learning_rate": 0.00018766866768060727, + "loss": 2.7305, + "step": 3279 + }, + { + "epoch": 0.26470825599225245, + "grad_norm": 0.7103936076164246, + "learning_rate": 0.00018766107211447045, + "loss": 2.6456, + "step": 3280 + }, + { + "epoch": 0.2647889597288354, + "grad_norm": 0.7595266103744507, + "learning_rate": 0.00018765347436359056, + "loss": 2.7235, + "step": 3281 + }, + { + "epoch": 0.26486966346541846, + "grad_norm": 0.786648154258728, + "learning_rate": 0.00018764587442815698, + "loss": 2.7182, + "step": 3282 + }, + { + "epoch": 0.26495036720200144, + "grad_norm": 0.7152618169784546, + "learning_rate": 0.00018763827230835908, + "loss": 2.6842, + "step": 3283 + }, + { + "epoch": 0.26503107093858447, + "grad_norm": 0.89169842004776, + "learning_rate": 0.00018763066800438636, + "loss": 2.7661, + "step": 3284 + }, + { + "epoch": 0.26511177467516744, + "grad_norm": 0.8148171305656433, + "learning_rate": 0.00018762306151642833, + "loss": 2.7264, + "step": 3285 + }, + { + "epoch": 0.2651924784117505, + "grad_norm": 0.8070533871650696, + "learning_rate": 0.00018761545284467454, + "loss": 2.7425, + "step": 3286 + }, + { + "epoch": 0.26527318214833345, + "grad_norm": 0.8536118268966675, + "learning_rate": 0.00018760784198931465, + "loss": 2.702, + "step": 3287 + }, + { + "epoch": 0.2653538858849165, + "grad_norm": 0.7422329783439636, + "learning_rate": 0.00018760022895053833, + "loss": 2.6913, + "step": 3288 + }, + { + "epoch": 0.26543458962149946, + "grad_norm": 0.7415527105331421, + "learning_rate": 0.0001875926137285353, + "loss": 2.6472, + "step": 3289 + }, + { + "epoch": 0.2655152933580825, + "grad_norm": 0.8432031273841858, + "learning_rate": 0.00018758499632349538, + "loss": 2.7506, + "step": 3290 + }, + { + "epoch": 0.26559599709466547, + "grad_norm": 0.8113259077072144, + "learning_rate": 0.0001875773767356084, + "loss": 2.6866, + "step": 3291 + }, + { + "epoch": 0.2656767008312485, + "grad_norm": 0.7898122668266296, + "learning_rate": 0.00018756975496506424, + "loss": 2.6516, + "step": 3292 + }, + { + "epoch": 0.2657574045678315, + "grad_norm": 0.7627275586128235, + "learning_rate": 0.0001875621310120529, + "loss": 2.7065, + "step": 3293 + }, + { + "epoch": 0.2658381083044145, + "grad_norm": 0.8227291107177734, + "learning_rate": 0.00018755450487676435, + "loss": 2.7614, + "step": 3294 + }, + { + "epoch": 0.2659188120409975, + "grad_norm": 0.8162109851837158, + "learning_rate": 0.00018754687655938868, + "loss": 2.7924, + "step": 3295 + }, + { + "epoch": 0.2659995157775805, + "grad_norm": 0.7231846451759338, + "learning_rate": 0.00018753924606011602, + "loss": 2.7505, + "step": 3296 + }, + { + "epoch": 0.2660802195141635, + "grad_norm": 0.8635944724082947, + "learning_rate": 0.00018753161337913647, + "loss": 2.7505, + "step": 3297 + }, + { + "epoch": 0.26616092325074653, + "grad_norm": 0.8131890892982483, + "learning_rate": 0.00018752397851664031, + "loss": 2.7872, + "step": 3298 + }, + { + "epoch": 0.2662416269873295, + "grad_norm": 0.7336695790290833, + "learning_rate": 0.00018751634147281786, + "loss": 2.7517, + "step": 3299 + }, + { + "epoch": 0.26632233072391254, + "grad_norm": 0.7541754841804504, + "learning_rate": 0.00018750870224785939, + "loss": 2.7807, + "step": 3300 + }, + { + "epoch": 0.2664030344604955, + "grad_norm": 0.9347110390663147, + "learning_rate": 0.0001875010608419553, + "loss": 2.6954, + "step": 3301 + }, + { + "epoch": 0.26648373819707855, + "grad_norm": 0.7591213583946228, + "learning_rate": 0.00018749341725529604, + "loss": 2.7019, + "step": 3302 + }, + { + "epoch": 0.2665644419336615, + "grad_norm": 0.811527669429779, + "learning_rate": 0.00018748577148807211, + "loss": 2.7123, + "step": 3303 + }, + { + "epoch": 0.26664514567024455, + "grad_norm": 0.7419980764389038, + "learning_rate": 0.00018747812354047408, + "loss": 2.7383, + "step": 3304 + }, + { + "epoch": 0.26672584940682753, + "grad_norm": 0.7801192402839661, + "learning_rate": 0.00018747047341269256, + "loss": 2.7245, + "step": 3305 + }, + { + "epoch": 0.26680655314341056, + "grad_norm": 0.7392756938934326, + "learning_rate": 0.00018746282110491816, + "loss": 2.6992, + "step": 3306 + }, + { + "epoch": 0.26688725687999354, + "grad_norm": 0.7085927724838257, + "learning_rate": 0.00018745516661734161, + "loss": 2.739, + "step": 3307 + }, + { + "epoch": 0.26696796061657657, + "grad_norm": 0.7218676209449768, + "learning_rate": 0.00018744750995015373, + "loss": 2.7091, + "step": 3308 + }, + { + "epoch": 0.26704866435315955, + "grad_norm": 0.847872257232666, + "learning_rate": 0.0001874398511035453, + "loss": 2.699, + "step": 3309 + }, + { + "epoch": 0.2671293680897426, + "grad_norm": 0.8280770778656006, + "learning_rate": 0.00018743219007770723, + "loss": 2.763, + "step": 3310 + }, + { + "epoch": 0.26721007182632556, + "grad_norm": 0.7271165251731873, + "learning_rate": 0.0001874245268728304, + "loss": 2.7219, + "step": 3311 + }, + { + "epoch": 0.2672907755629086, + "grad_norm": 0.7342363595962524, + "learning_rate": 0.00018741686148910586, + "loss": 2.6765, + "step": 3312 + }, + { + "epoch": 0.26737147929949157, + "grad_norm": 0.7260174751281738, + "learning_rate": 0.0001874091939267246, + "loss": 2.7003, + "step": 3313 + }, + { + "epoch": 0.2674521830360746, + "grad_norm": 0.742494523525238, + "learning_rate": 0.00018740152418587775, + "loss": 2.7371, + "step": 3314 + }, + { + "epoch": 0.2675328867726576, + "grad_norm": 0.7238131165504456, + "learning_rate": 0.00018739385226675646, + "loss": 2.7486, + "step": 3315 + }, + { + "epoch": 0.26761359050924055, + "grad_norm": 0.7329363226890564, + "learning_rate": 0.0001873861781695519, + "loss": 2.6414, + "step": 3316 + }, + { + "epoch": 0.2676942942458236, + "grad_norm": 0.7078117728233337, + "learning_rate": 0.00018737850189445534, + "loss": 2.7271, + "step": 3317 + }, + { + "epoch": 0.26777499798240656, + "grad_norm": 0.7945309281349182, + "learning_rate": 0.00018737082344165814, + "loss": 2.7323, + "step": 3318 + }, + { + "epoch": 0.2678557017189896, + "grad_norm": 0.7510890364646912, + "learning_rate": 0.0001873631428113516, + "loss": 2.6563, + "step": 3319 + }, + { + "epoch": 0.26793640545557257, + "grad_norm": 0.7790820002555847, + "learning_rate": 0.0001873554600037272, + "loss": 2.7445, + "step": 3320 + }, + { + "epoch": 0.2680171091921556, + "grad_norm": 0.7689393162727356, + "learning_rate": 0.00018734777501897636, + "loss": 2.669, + "step": 3321 + }, + { + "epoch": 0.2680978129287386, + "grad_norm": 0.8227118253707886, + "learning_rate": 0.00018734008785729065, + "loss": 2.7279, + "step": 3322 + }, + { + "epoch": 0.2681785166653216, + "grad_norm": 0.7551290392875671, + "learning_rate": 0.00018733239851886162, + "loss": 2.6864, + "step": 3323 + }, + { + "epoch": 0.2682592204019046, + "grad_norm": 0.8572004437446594, + "learning_rate": 0.00018732470700388097, + "loss": 2.8159, + "step": 3324 + }, + { + "epoch": 0.2683399241384876, + "grad_norm": 0.7509044408798218, + "learning_rate": 0.00018731701331254033, + "loss": 2.7698, + "step": 3325 + }, + { + "epoch": 0.2684206278750706, + "grad_norm": 0.8474129438400269, + "learning_rate": 0.00018730931744503148, + "loss": 2.6745, + "step": 3326 + }, + { + "epoch": 0.2685013316116536, + "grad_norm": 0.8310953378677368, + "learning_rate": 0.00018730161940154618, + "loss": 2.712, + "step": 3327 + }, + { + "epoch": 0.2685820353482366, + "grad_norm": 0.8820717334747314, + "learning_rate": 0.00018729391918227632, + "loss": 2.7776, + "step": 3328 + }, + { + "epoch": 0.26866273908481964, + "grad_norm": 0.8827663064002991, + "learning_rate": 0.00018728621678741384, + "loss": 2.7115, + "step": 3329 + }, + { + "epoch": 0.2687434428214026, + "grad_norm": 0.7896323800086975, + "learning_rate": 0.00018727851221715064, + "loss": 2.6799, + "step": 3330 + }, + { + "epoch": 0.26882414655798564, + "grad_norm": 0.7775614261627197, + "learning_rate": 0.0001872708054716788, + "loss": 2.7021, + "step": 3331 + }, + { + "epoch": 0.2689048502945686, + "grad_norm": 0.8150187134742737, + "learning_rate": 0.0001872630965511903, + "loss": 2.679, + "step": 3332 + }, + { + "epoch": 0.26898555403115165, + "grad_norm": 0.7821844220161438, + "learning_rate": 0.00018725538545587736, + "loss": 2.7067, + "step": 3333 + }, + { + "epoch": 0.26906625776773463, + "grad_norm": 0.8390234112739563, + "learning_rate": 0.00018724767218593216, + "loss": 2.7133, + "step": 3334 + }, + { + "epoch": 0.26914696150431766, + "grad_norm": 0.8150694370269775, + "learning_rate": 0.00018723995674154687, + "loss": 2.7022, + "step": 3335 + }, + { + "epoch": 0.26922766524090064, + "grad_norm": 0.7473872900009155, + "learning_rate": 0.0001872322391229138, + "loss": 2.7268, + "step": 3336 + }, + { + "epoch": 0.26930836897748367, + "grad_norm": 0.7591951489448547, + "learning_rate": 0.0001872245193302253, + "loss": 2.7516, + "step": 3337 + }, + { + "epoch": 0.26938907271406665, + "grad_norm": 0.7914662957191467, + "learning_rate": 0.00018721679736367382, + "loss": 2.6613, + "step": 3338 + }, + { + "epoch": 0.2694697764506497, + "grad_norm": 0.7823428511619568, + "learning_rate": 0.00018720907322345172, + "loss": 2.6661, + "step": 3339 + }, + { + "epoch": 0.26955048018723266, + "grad_norm": 0.8428264260292053, + "learning_rate": 0.00018720134690975156, + "loss": 2.672, + "step": 3340 + }, + { + "epoch": 0.2696311839238157, + "grad_norm": 0.71320641040802, + "learning_rate": 0.00018719361842276587, + "loss": 2.7326, + "step": 3341 + }, + { + "epoch": 0.26971188766039866, + "grad_norm": 0.7972821593284607, + "learning_rate": 0.00018718588776268731, + "loss": 2.7182, + "step": 3342 + }, + { + "epoch": 0.2697925913969817, + "grad_norm": 0.7924500107765198, + "learning_rate": 0.0001871781549297085, + "loss": 2.7308, + "step": 3343 + }, + { + "epoch": 0.2698732951335647, + "grad_norm": 0.7668356895446777, + "learning_rate": 0.0001871704199240222, + "loss": 2.678, + "step": 3344 + }, + { + "epoch": 0.2699539988701477, + "grad_norm": 0.866973876953125, + "learning_rate": 0.00018716268274582114, + "loss": 2.7802, + "step": 3345 + }, + { + "epoch": 0.2700347026067307, + "grad_norm": 0.7709557414054871, + "learning_rate": 0.0001871549433952982, + "loss": 2.7418, + "step": 3346 + }, + { + "epoch": 0.2701154063433137, + "grad_norm": 0.7707573771476746, + "learning_rate": 0.00018714720187264626, + "loss": 2.7486, + "step": 3347 + }, + { + "epoch": 0.2701961100798967, + "grad_norm": 0.8007768392562866, + "learning_rate": 0.00018713945817805822, + "loss": 2.7106, + "step": 3348 + }, + { + "epoch": 0.2702768138164797, + "grad_norm": 0.7239583134651184, + "learning_rate": 0.0001871317123117271, + "loss": 2.7209, + "step": 3349 + }, + { + "epoch": 0.2703575175530627, + "grad_norm": 0.775104820728302, + "learning_rate": 0.00018712396427384594, + "loss": 2.6503, + "step": 3350 + }, + { + "epoch": 0.27043822128964573, + "grad_norm": 0.7492741346359253, + "learning_rate": 0.0001871162140646079, + "loss": 2.699, + "step": 3351 + }, + { + "epoch": 0.2705189250262287, + "grad_norm": 0.7550846338272095, + "learning_rate": 0.00018710846168420604, + "loss": 2.7458, + "step": 3352 + }, + { + "epoch": 0.27059962876281174, + "grad_norm": 0.807996928691864, + "learning_rate": 0.0001871007071328336, + "loss": 2.7604, + "step": 3353 + }, + { + "epoch": 0.2706803324993947, + "grad_norm": 0.7381845116615295, + "learning_rate": 0.00018709295041068386, + "loss": 2.6833, + "step": 3354 + }, + { + "epoch": 0.27076103623597775, + "grad_norm": 0.7542420625686646, + "learning_rate": 0.00018708519151795016, + "loss": 2.6462, + "step": 3355 + }, + { + "epoch": 0.2708417399725607, + "grad_norm": 0.7675846219062805, + "learning_rate": 0.00018707743045482582, + "loss": 2.7068, + "step": 3356 + }, + { + "epoch": 0.27092244370914376, + "grad_norm": 0.7437357902526855, + "learning_rate": 0.0001870696672215043, + "loss": 2.73, + "step": 3357 + }, + { + "epoch": 0.27100314744572673, + "grad_norm": 0.7880852222442627, + "learning_rate": 0.00018706190181817903, + "loss": 2.759, + "step": 3358 + }, + { + "epoch": 0.27108385118230977, + "grad_norm": 0.7403178811073303, + "learning_rate": 0.00018705413424504363, + "loss": 2.7538, + "step": 3359 + }, + { + "epoch": 0.27116455491889274, + "grad_norm": 0.7601225972175598, + "learning_rate": 0.00018704636450229164, + "loss": 2.7331, + "step": 3360 + }, + { + "epoch": 0.2712452586554758, + "grad_norm": 0.7810701727867126, + "learning_rate": 0.0001870385925901167, + "loss": 2.7736, + "step": 3361 + }, + { + "epoch": 0.27132596239205875, + "grad_norm": 0.8934530019760132, + "learning_rate": 0.0001870308185087125, + "loss": 2.7214, + "step": 3362 + }, + { + "epoch": 0.2714066661286418, + "grad_norm": 0.7468441128730774, + "learning_rate": 0.0001870230422582728, + "loss": 2.6957, + "step": 3363 + }, + { + "epoch": 0.27148736986522476, + "grad_norm": 0.7643293142318726, + "learning_rate": 0.00018701526383899144, + "loss": 2.6773, + "step": 3364 + }, + { + "epoch": 0.2715680736018078, + "grad_norm": 0.7602033615112305, + "learning_rate": 0.0001870074832510622, + "loss": 2.7095, + "step": 3365 + }, + { + "epoch": 0.27164877733839077, + "grad_norm": 0.772065281867981, + "learning_rate": 0.00018699970049467908, + "loss": 2.6753, + "step": 3366 + }, + { + "epoch": 0.27172948107497374, + "grad_norm": 0.7718359231948853, + "learning_rate": 0.00018699191557003598, + "loss": 2.6857, + "step": 3367 + }, + { + "epoch": 0.2718101848115568, + "grad_norm": 0.8207093477249146, + "learning_rate": 0.00018698412847732693, + "loss": 2.7549, + "step": 3368 + }, + { + "epoch": 0.27189088854813975, + "grad_norm": 0.7393590807914734, + "learning_rate": 0.00018697633921674605, + "loss": 2.6884, + "step": 3369 + }, + { + "epoch": 0.2719715922847228, + "grad_norm": 0.7955869436264038, + "learning_rate": 0.0001869685477884874, + "loss": 2.708, + "step": 3370 + }, + { + "epoch": 0.27205229602130576, + "grad_norm": 0.7392188906669617, + "learning_rate": 0.00018696075419274527, + "loss": 2.717, + "step": 3371 + }, + { + "epoch": 0.2721329997578888, + "grad_norm": 0.800204873085022, + "learning_rate": 0.00018695295842971376, + "loss": 2.7184, + "step": 3372 + }, + { + "epoch": 0.27221370349447177, + "grad_norm": 0.8195740580558777, + "learning_rate": 0.00018694516049958725, + "loss": 2.6865, + "step": 3373 + }, + { + "epoch": 0.2722944072310548, + "grad_norm": 0.8617578148841858, + "learning_rate": 0.00018693736040256007, + "loss": 2.7098, + "step": 3374 + }, + { + "epoch": 0.2723751109676378, + "grad_norm": 0.8184413909912109, + "learning_rate": 0.00018692955813882662, + "loss": 2.7449, + "step": 3375 + }, + { + "epoch": 0.2724558147042208, + "grad_norm": 0.990275502204895, + "learning_rate": 0.00018692175370858133, + "loss": 2.7891, + "step": 3376 + }, + { + "epoch": 0.2725365184408038, + "grad_norm": 0.7857810854911804, + "learning_rate": 0.0001869139471120187, + "loss": 2.6884, + "step": 3377 + }, + { + "epoch": 0.2726172221773868, + "grad_norm": 0.8040915131568909, + "learning_rate": 0.00018690613834933335, + "loss": 2.7047, + "step": 3378 + }, + { + "epoch": 0.2726979259139698, + "grad_norm": 0.7512348294258118, + "learning_rate": 0.00018689832742071983, + "loss": 2.6898, + "step": 3379 + }, + { + "epoch": 0.27277862965055283, + "grad_norm": 0.6781859397888184, + "learning_rate": 0.00018689051432637288, + "loss": 2.6396, + "step": 3380 + }, + { + "epoch": 0.2728593333871358, + "grad_norm": 0.7858247756958008, + "learning_rate": 0.00018688269906648716, + "loss": 2.6785, + "step": 3381 + }, + { + "epoch": 0.27294003712371884, + "grad_norm": 0.7342140674591064, + "learning_rate": 0.00018687488164125744, + "loss": 2.6778, + "step": 3382 + }, + { + "epoch": 0.2730207408603018, + "grad_norm": 0.8113372921943665, + "learning_rate": 0.00018686706205087858, + "loss": 2.6982, + "step": 3383 + }, + { + "epoch": 0.27310144459688485, + "grad_norm": 0.7904205918312073, + "learning_rate": 0.0001868592402955455, + "loss": 2.7891, + "step": 3384 + }, + { + "epoch": 0.2731821483334678, + "grad_norm": 0.7274135947227478, + "learning_rate": 0.00018685141637545308, + "loss": 2.6908, + "step": 3385 + }, + { + "epoch": 0.27326285207005085, + "grad_norm": 0.7675744295120239, + "learning_rate": 0.0001868435902907963, + "loss": 2.6987, + "step": 3386 + }, + { + "epoch": 0.27334355580663383, + "grad_norm": 0.8085030913352966, + "learning_rate": 0.00018683576204177026, + "loss": 2.7798, + "step": 3387 + }, + { + "epoch": 0.27342425954321686, + "grad_norm": 0.7498135566711426, + "learning_rate": 0.00018682793162857006, + "loss": 2.7216, + "step": 3388 + }, + { + "epoch": 0.27350496327979984, + "grad_norm": 0.900741696357727, + "learning_rate": 0.0001868200990513908, + "loss": 2.6871, + "step": 3389 + }, + { + "epoch": 0.27358566701638287, + "grad_norm": 0.7948571443557739, + "learning_rate": 0.00018681226431042772, + "loss": 2.6985, + "step": 3390 + }, + { + "epoch": 0.27366637075296585, + "grad_norm": 0.8739100098609924, + "learning_rate": 0.00018680442740587612, + "loss": 2.6922, + "step": 3391 + }, + { + "epoch": 0.2737470744895489, + "grad_norm": 0.730084240436554, + "learning_rate": 0.00018679658833793125, + "loss": 2.7029, + "step": 3392 + }, + { + "epoch": 0.27382777822613186, + "grad_norm": 0.7560603022575378, + "learning_rate": 0.00018678874710678853, + "loss": 2.7429, + "step": 3393 + }, + { + "epoch": 0.2739084819627149, + "grad_norm": 0.8331460356712341, + "learning_rate": 0.00018678090371264334, + "loss": 2.7157, + "step": 3394 + }, + { + "epoch": 0.27398918569929787, + "grad_norm": 0.8070168495178223, + "learning_rate": 0.00018677305815569122, + "loss": 2.7629, + "step": 3395 + }, + { + "epoch": 0.2740698894358809, + "grad_norm": 0.7922534346580505, + "learning_rate": 0.00018676521043612762, + "loss": 2.7159, + "step": 3396 + }, + { + "epoch": 0.2741505931724639, + "grad_norm": 0.7838901281356812, + "learning_rate": 0.0001867573605541482, + "loss": 2.6721, + "step": 3397 + }, + { + "epoch": 0.2742312969090469, + "grad_norm": 0.8912512063980103, + "learning_rate": 0.00018674950850994856, + "loss": 2.7243, + "step": 3398 + }, + { + "epoch": 0.2743120006456299, + "grad_norm": 0.7205448150634766, + "learning_rate": 0.0001867416543037244, + "loss": 2.7152, + "step": 3399 + }, + { + "epoch": 0.2743927043822129, + "grad_norm": 0.6992877721786499, + "learning_rate": 0.00018673379793567146, + "loss": 2.7183, + "step": 3400 + }, + { + "epoch": 0.2744734081187959, + "grad_norm": 0.8009448051452637, + "learning_rate": 0.00018672593940598556, + "loss": 2.715, + "step": 3401 + }, + { + "epoch": 0.2745541118553789, + "grad_norm": 0.7812647819519043, + "learning_rate": 0.0001867180787148626, + "loss": 2.7579, + "step": 3402 + }, + { + "epoch": 0.2746348155919619, + "grad_norm": 0.7300555109977722, + "learning_rate": 0.00018671021586249835, + "loss": 2.694, + "step": 3403 + }, + { + "epoch": 0.27471551932854493, + "grad_norm": 0.8082736134529114, + "learning_rate": 0.00018670235084908887, + "loss": 2.768, + "step": 3404 + }, + { + "epoch": 0.2747962230651279, + "grad_norm": 0.7729581594467163, + "learning_rate": 0.0001866944836748302, + "loss": 2.7256, + "step": 3405 + }, + { + "epoch": 0.27487692680171094, + "grad_norm": 0.8113458752632141, + "learning_rate": 0.00018668661433991835, + "loss": 2.6692, + "step": 3406 + }, + { + "epoch": 0.2749576305382939, + "grad_norm": 0.7757337689399719, + "learning_rate": 0.00018667874284454948, + "loss": 2.6769, + "step": 3407 + }, + { + "epoch": 0.27503833427487695, + "grad_norm": 0.7896093726158142, + "learning_rate": 0.00018667086918891976, + "loss": 2.7118, + "step": 3408 + }, + { + "epoch": 0.2751190380114599, + "grad_norm": 0.7764071822166443, + "learning_rate": 0.00018666299337322543, + "loss": 2.7284, + "step": 3409 + }, + { + "epoch": 0.27519974174804296, + "grad_norm": 0.794815182685852, + "learning_rate": 0.00018665511539766273, + "loss": 2.7232, + "step": 3410 + }, + { + "epoch": 0.27528044548462594, + "grad_norm": 0.8134122490882874, + "learning_rate": 0.0001866472352624281, + "loss": 2.7023, + "step": 3411 + }, + { + "epoch": 0.27536114922120897, + "grad_norm": 0.7654025554656982, + "learning_rate": 0.00018663935296771782, + "loss": 2.7002, + "step": 3412 + }, + { + "epoch": 0.27544185295779194, + "grad_norm": 0.6930806636810303, + "learning_rate": 0.0001866314685137284, + "loss": 2.6764, + "step": 3413 + }, + { + "epoch": 0.275522556694375, + "grad_norm": 0.7535184621810913, + "learning_rate": 0.00018662358190065631, + "loss": 2.6657, + "step": 3414 + }, + { + "epoch": 0.27560326043095795, + "grad_norm": 0.7775620818138123, + "learning_rate": 0.00018661569312869816, + "loss": 2.6931, + "step": 3415 + }, + { + "epoch": 0.275683964167541, + "grad_norm": 0.7209072113037109, + "learning_rate": 0.00018660780219805048, + "loss": 2.7293, + "step": 3416 + }, + { + "epoch": 0.27576466790412396, + "grad_norm": 0.7182055711746216, + "learning_rate": 0.00018659990910891, + "loss": 2.6561, + "step": 3417 + }, + { + "epoch": 0.27584537164070694, + "grad_norm": 0.7130969166755676, + "learning_rate": 0.00018659201386147338, + "loss": 2.7156, + "step": 3418 + }, + { + "epoch": 0.27592607537728997, + "grad_norm": 0.7296265959739685, + "learning_rate": 0.00018658411645593745, + "loss": 2.6894, + "step": 3419 + }, + { + "epoch": 0.27600677911387295, + "grad_norm": 0.7707972526550293, + "learning_rate": 0.000186576216892499, + "loss": 2.7528, + "step": 3420 + }, + { + "epoch": 0.276087482850456, + "grad_norm": 0.6945170164108276, + "learning_rate": 0.0001865683151713549, + "loss": 2.6762, + "step": 3421 + }, + { + "epoch": 0.27616818658703896, + "grad_norm": 0.7664114236831665, + "learning_rate": 0.0001865604112927021, + "loss": 2.7212, + "step": 3422 + }, + { + "epoch": 0.276248890323622, + "grad_norm": 0.6950399875640869, + "learning_rate": 0.0001865525052567376, + "loss": 2.7035, + "step": 3423 + }, + { + "epoch": 0.27632959406020496, + "grad_norm": 0.7307506799697876, + "learning_rate": 0.00018654459706365838, + "loss": 2.7296, + "step": 3424 + }, + { + "epoch": 0.276410297796788, + "grad_norm": 0.720912516117096, + "learning_rate": 0.0001865366867136616, + "loss": 2.6884, + "step": 3425 + }, + { + "epoch": 0.276491001533371, + "grad_norm": 0.7581072449684143, + "learning_rate": 0.00018652877420694436, + "loss": 2.705, + "step": 3426 + }, + { + "epoch": 0.276571705269954, + "grad_norm": 0.7473136186599731, + "learning_rate": 0.0001865208595437039, + "loss": 2.7316, + "step": 3427 + }, + { + "epoch": 0.276652409006537, + "grad_norm": 0.7272855639457703, + "learning_rate": 0.00018651294272413745, + "loss": 2.6834, + "step": 3428 + }, + { + "epoch": 0.27673311274312, + "grad_norm": 0.7046366930007935, + "learning_rate": 0.0001865050237484423, + "loss": 2.6491, + "step": 3429 + }, + { + "epoch": 0.276813816479703, + "grad_norm": 0.7521376609802246, + "learning_rate": 0.00018649710261681586, + "loss": 2.708, + "step": 3430 + }, + { + "epoch": 0.276894520216286, + "grad_norm": 0.7372453808784485, + "learning_rate": 0.0001864891793294555, + "loss": 2.682, + "step": 3431 + }, + { + "epoch": 0.276975223952869, + "grad_norm": 0.7381749749183655, + "learning_rate": 0.0001864812538865587, + "loss": 2.7526, + "step": 3432 + }, + { + "epoch": 0.27705592768945203, + "grad_norm": 0.7891514301300049, + "learning_rate": 0.00018647332628832298, + "loss": 2.6904, + "step": 3433 + }, + { + "epoch": 0.277136631426035, + "grad_norm": 0.7942724823951721, + "learning_rate": 0.00018646539653494596, + "loss": 2.7873, + "step": 3434 + }, + { + "epoch": 0.27721733516261804, + "grad_norm": 0.7365398406982422, + "learning_rate": 0.0001864574646266252, + "loss": 2.6684, + "step": 3435 + }, + { + "epoch": 0.277298038899201, + "grad_norm": 0.7802249193191528, + "learning_rate": 0.00018644953056355846, + "loss": 2.7152, + "step": 3436 + }, + { + "epoch": 0.27737874263578405, + "grad_norm": 0.7801448106765747, + "learning_rate": 0.0001864415943459434, + "loss": 2.7034, + "step": 3437 + }, + { + "epoch": 0.277459446372367, + "grad_norm": 0.7722738981246948, + "learning_rate": 0.00018643365597397786, + "loss": 2.7135, + "step": 3438 + }, + { + "epoch": 0.27754015010895006, + "grad_norm": 0.7847445011138916, + "learning_rate": 0.00018642571544785967, + "loss": 2.6999, + "step": 3439 + }, + { + "epoch": 0.27762085384553303, + "grad_norm": 0.7226125597953796, + "learning_rate": 0.00018641777276778675, + "loss": 2.7613, + "step": 3440 + }, + { + "epoch": 0.27770155758211607, + "grad_norm": 0.713188111782074, + "learning_rate": 0.000186409827933957, + "loss": 2.6953, + "step": 3441 + }, + { + "epoch": 0.27778226131869904, + "grad_norm": 0.7308298349380493, + "learning_rate": 0.0001864018809465685, + "loss": 2.7045, + "step": 3442 + }, + { + "epoch": 0.2778629650552821, + "grad_norm": 0.7606719732284546, + "learning_rate": 0.00018639393180581925, + "loss": 2.7883, + "step": 3443 + }, + { + "epoch": 0.27794366879186505, + "grad_norm": 0.7583296895027161, + "learning_rate": 0.00018638598051190738, + "loss": 2.6734, + "step": 3444 + }, + { + "epoch": 0.2780243725284481, + "grad_norm": 0.7147012948989868, + "learning_rate": 0.00018637802706503108, + "loss": 2.7223, + "step": 3445 + }, + { + "epoch": 0.27810507626503106, + "grad_norm": 0.7812997102737427, + "learning_rate": 0.00018637007146538853, + "loss": 2.7277, + "step": 3446 + }, + { + "epoch": 0.2781857800016141, + "grad_norm": 0.7460772395133972, + "learning_rate": 0.000186362113713178, + "loss": 2.6875, + "step": 3447 + }, + { + "epoch": 0.27826648373819707, + "grad_norm": 0.7359143495559692, + "learning_rate": 0.0001863541538085979, + "loss": 2.7122, + "step": 3448 + }, + { + "epoch": 0.2783471874747801, + "grad_norm": 0.7122978568077087, + "learning_rate": 0.00018634619175184655, + "loss": 2.6381, + "step": 3449 + }, + { + "epoch": 0.2784278912113631, + "grad_norm": 0.6965885758399963, + "learning_rate": 0.00018633822754312234, + "loss": 2.6957, + "step": 3450 + }, + { + "epoch": 0.2785085949479461, + "grad_norm": 0.7737082242965698, + "learning_rate": 0.00018633026118262385, + "loss": 2.7579, + "step": 3451 + }, + { + "epoch": 0.2785892986845291, + "grad_norm": 0.6925420165061951, + "learning_rate": 0.00018632229267054958, + "loss": 2.6226, + "step": 3452 + }, + { + "epoch": 0.2786700024211121, + "grad_norm": 0.7496356964111328, + "learning_rate": 0.0001863143220070981, + "loss": 2.7059, + "step": 3453 + }, + { + "epoch": 0.2787507061576951, + "grad_norm": 0.7066817283630371, + "learning_rate": 0.0001863063491924681, + "loss": 2.681, + "step": 3454 + }, + { + "epoch": 0.2788314098942781, + "grad_norm": 0.8143237829208374, + "learning_rate": 0.0001862983742268583, + "loss": 2.6698, + "step": 3455 + }, + { + "epoch": 0.2789121136308611, + "grad_norm": 0.7518483996391296, + "learning_rate": 0.00018629039711046737, + "loss": 2.7041, + "step": 3456 + }, + { + "epoch": 0.27899281736744413, + "grad_norm": 0.8756366968154907, + "learning_rate": 0.00018628241784349422, + "loss": 2.7547, + "step": 3457 + }, + { + "epoch": 0.2790735211040271, + "grad_norm": 0.8709446787834167, + "learning_rate": 0.0001862744364261377, + "loss": 2.7068, + "step": 3458 + }, + { + "epoch": 0.27915422484061014, + "grad_norm": 0.8121913075447083, + "learning_rate": 0.00018626645285859666, + "loss": 2.673, + "step": 3459 + }, + { + "epoch": 0.2792349285771931, + "grad_norm": 0.7685909271240234, + "learning_rate": 0.00018625846714107012, + "loss": 2.7389, + "step": 3460 + }, + { + "epoch": 0.27931563231377615, + "grad_norm": 0.7098073363304138, + "learning_rate": 0.0001862504792737571, + "loss": 2.6942, + "step": 3461 + }, + { + "epoch": 0.27939633605035913, + "grad_norm": 0.7718049883842468, + "learning_rate": 0.00018624248925685666, + "loss": 2.7359, + "step": 3462 + }, + { + "epoch": 0.27947703978694216, + "grad_norm": 0.7912909984588623, + "learning_rate": 0.00018623449709056797, + "loss": 2.6658, + "step": 3463 + }, + { + "epoch": 0.27955774352352514, + "grad_norm": 0.7255454659461975, + "learning_rate": 0.0001862265027750902, + "loss": 2.771, + "step": 3464 + }, + { + "epoch": 0.27963844726010817, + "grad_norm": 0.7542218565940857, + "learning_rate": 0.00018621850631062254, + "loss": 2.6741, + "step": 3465 + }, + { + "epoch": 0.27971915099669115, + "grad_norm": 0.8386052846908569, + "learning_rate": 0.00018621050769736437, + "loss": 2.67, + "step": 3466 + }, + { + "epoch": 0.2797998547332742, + "grad_norm": 0.8563781976699829, + "learning_rate": 0.00018620250693551495, + "loss": 2.7461, + "step": 3467 + }, + { + "epoch": 0.27988055846985715, + "grad_norm": 0.7490699291229248, + "learning_rate": 0.00018619450402527376, + "loss": 2.6863, + "step": 3468 + }, + { + "epoch": 0.27996126220644013, + "grad_norm": 0.8008999824523926, + "learning_rate": 0.00018618649896684017, + "loss": 2.7769, + "step": 3469 + }, + { + "epoch": 0.28004196594302316, + "grad_norm": 0.7678235769271851, + "learning_rate": 0.00018617849176041378, + "loss": 2.7237, + "step": 3470 + }, + { + "epoch": 0.28012266967960614, + "grad_norm": 0.8774877786636353, + "learning_rate": 0.00018617048240619408, + "loss": 2.7502, + "step": 3471 + }, + { + "epoch": 0.28020337341618917, + "grad_norm": 0.8150283098220825, + "learning_rate": 0.00018616247090438073, + "loss": 2.6941, + "step": 3472 + }, + { + "epoch": 0.28028407715277215, + "grad_norm": 0.7330089807510376, + "learning_rate": 0.00018615445725517332, + "loss": 2.7002, + "step": 3473 + }, + { + "epoch": 0.2803647808893552, + "grad_norm": 0.748275101184845, + "learning_rate": 0.00018614644145877168, + "loss": 2.6996, + "step": 3474 + }, + { + "epoch": 0.28044548462593816, + "grad_norm": 0.7718296647071838, + "learning_rate": 0.0001861384235153755, + "loss": 2.7333, + "step": 3475 + }, + { + "epoch": 0.2805261883625212, + "grad_norm": 0.7751123309135437, + "learning_rate": 0.00018613040342518465, + "loss": 2.7362, + "step": 3476 + }, + { + "epoch": 0.28060689209910417, + "grad_norm": 0.70979243516922, + "learning_rate": 0.000186122381188399, + "loss": 2.6651, + "step": 3477 + }, + { + "epoch": 0.2806875958356872, + "grad_norm": 0.9607138633728027, + "learning_rate": 0.00018611435680521848, + "loss": 2.7779, + "step": 3478 + }, + { + "epoch": 0.2807682995722702, + "grad_norm": 0.709671676158905, + "learning_rate": 0.0001861063302758431, + "loss": 2.6994, + "step": 3479 + }, + { + "epoch": 0.2808490033088532, + "grad_norm": 0.8765757083892822, + "learning_rate": 0.00018609830160047283, + "loss": 2.7107, + "step": 3480 + }, + { + "epoch": 0.2809297070454362, + "grad_norm": 0.7996764183044434, + "learning_rate": 0.0001860902707793079, + "loss": 2.7921, + "step": 3481 + }, + { + "epoch": 0.2810104107820192, + "grad_norm": 0.7094513177871704, + "learning_rate": 0.0001860822378125483, + "loss": 2.7211, + "step": 3482 + }, + { + "epoch": 0.2810911145186022, + "grad_norm": 0.8068607449531555, + "learning_rate": 0.0001860742027003944, + "loss": 2.675, + "step": 3483 + }, + { + "epoch": 0.2811718182551852, + "grad_norm": 0.7737938165664673, + "learning_rate": 0.00018606616544304628, + "loss": 2.7538, + "step": 3484 + }, + { + "epoch": 0.2812525219917682, + "grad_norm": 0.7979975342750549, + "learning_rate": 0.0001860581260407044, + "loss": 2.7894, + "step": 3485 + }, + { + "epoch": 0.28133322572835123, + "grad_norm": 0.7671655416488647, + "learning_rate": 0.00018605008449356904, + "loss": 2.7097, + "step": 3486 + }, + { + "epoch": 0.2814139294649342, + "grad_norm": 0.7284159064292908, + "learning_rate": 0.00018604204080184062, + "loss": 2.7447, + "step": 3487 + }, + { + "epoch": 0.28149463320151724, + "grad_norm": 0.7425351142883301, + "learning_rate": 0.00018603399496571968, + "loss": 2.7302, + "step": 3488 + }, + { + "epoch": 0.2815753369381002, + "grad_norm": 0.7709810733795166, + "learning_rate": 0.00018602594698540663, + "loss": 2.6979, + "step": 3489 + }, + { + "epoch": 0.28165604067468325, + "grad_norm": 0.744628369808197, + "learning_rate": 0.00018601789686110214, + "loss": 2.7279, + "step": 3490 + }, + { + "epoch": 0.2817367444112662, + "grad_norm": 0.7679976224899292, + "learning_rate": 0.00018600984459300678, + "loss": 2.6862, + "step": 3491 + }, + { + "epoch": 0.28181744814784926, + "grad_norm": 0.7923497557640076, + "learning_rate": 0.0001860017901813213, + "loss": 2.6975, + "step": 3492 + }, + { + "epoch": 0.28189815188443224, + "grad_norm": 0.7896692156791687, + "learning_rate": 0.00018599373362624636, + "loss": 2.7052, + "step": 3493 + }, + { + "epoch": 0.28197885562101527, + "grad_norm": 0.7913276553153992, + "learning_rate": 0.00018598567492798284, + "loss": 2.7233, + "step": 3494 + }, + { + "epoch": 0.28205955935759824, + "grad_norm": 0.7385257482528687, + "learning_rate": 0.00018597761408673146, + "loss": 2.7616, + "step": 3495 + }, + { + "epoch": 0.2821402630941813, + "grad_norm": 0.7181909084320068, + "learning_rate": 0.00018596955110269323, + "loss": 2.718, + "step": 3496 + }, + { + "epoch": 0.28222096683076425, + "grad_norm": 0.8313151597976685, + "learning_rate": 0.00018596148597606907, + "loss": 2.6775, + "step": 3497 + }, + { + "epoch": 0.2823016705673473, + "grad_norm": 0.7235481142997742, + "learning_rate": 0.00018595341870705995, + "loss": 2.7085, + "step": 3498 + }, + { + "epoch": 0.28238237430393026, + "grad_norm": 0.7092145085334778, + "learning_rate": 0.00018594534929586697, + "loss": 2.7167, + "step": 3499 + }, + { + "epoch": 0.2824630780405133, + "grad_norm": 0.7929207682609558, + "learning_rate": 0.0001859372777426912, + "loss": 2.663, + "step": 3500 + }, + { + "epoch": 0.28254378177709627, + "grad_norm": 0.7488871216773987, + "learning_rate": 0.00018592920404773383, + "loss": 2.7911, + "step": 3501 + }, + { + "epoch": 0.2826244855136793, + "grad_norm": 0.8230419158935547, + "learning_rate": 0.0001859211282111961, + "loss": 2.754, + "step": 3502 + }, + { + "epoch": 0.2827051892502623, + "grad_norm": 0.731971025466919, + "learning_rate": 0.00018591305023327924, + "loss": 2.7142, + "step": 3503 + }, + { + "epoch": 0.2827858929868453, + "grad_norm": 0.8159881234169006, + "learning_rate": 0.00018590497011418457, + "loss": 2.7046, + "step": 3504 + }, + { + "epoch": 0.2828665967234283, + "grad_norm": 0.750266432762146, + "learning_rate": 0.0001858968878541135, + "loss": 2.6951, + "step": 3505 + }, + { + "epoch": 0.2829473004600113, + "grad_norm": 0.7750049233436584, + "learning_rate": 0.00018588880345326748, + "loss": 2.6958, + "step": 3506 + }, + { + "epoch": 0.2830280041965943, + "grad_norm": 0.8559218049049377, + "learning_rate": 0.00018588071691184795, + "loss": 2.7205, + "step": 3507 + }, + { + "epoch": 0.28310870793317733, + "grad_norm": 0.7334830164909363, + "learning_rate": 0.00018587262823005642, + "loss": 2.7134, + "step": 3508 + }, + { + "epoch": 0.2831894116697603, + "grad_norm": 0.8749497532844543, + "learning_rate": 0.00018586453740809456, + "loss": 2.6811, + "step": 3509 + }, + { + "epoch": 0.28327011540634334, + "grad_norm": 0.8800753355026245, + "learning_rate": 0.00018585644444616396, + "loss": 2.7427, + "step": 3510 + }, + { + "epoch": 0.2833508191429263, + "grad_norm": 0.8666185736656189, + "learning_rate": 0.00018584834934446632, + "loss": 2.6828, + "step": 3511 + }, + { + "epoch": 0.28343152287950935, + "grad_norm": 0.7451635003089905, + "learning_rate": 0.00018584025210320343, + "loss": 2.6784, + "step": 3512 + }, + { + "epoch": 0.2835122266160923, + "grad_norm": 0.8512656688690186, + "learning_rate": 0.00018583215272257708, + "loss": 2.7762, + "step": 3513 + }, + { + "epoch": 0.28359293035267535, + "grad_norm": 0.9298297166824341, + "learning_rate": 0.00018582405120278907, + "loss": 2.7714, + "step": 3514 + }, + { + "epoch": 0.28367363408925833, + "grad_norm": 0.7968065738677979, + "learning_rate": 0.0001858159475440414, + "loss": 2.7286, + "step": 3515 + }, + { + "epoch": 0.28375433782584136, + "grad_norm": 0.7381564378738403, + "learning_rate": 0.00018580784174653596, + "loss": 2.6697, + "step": 3516 + }, + { + "epoch": 0.28383504156242434, + "grad_norm": 0.8199222683906555, + "learning_rate": 0.00018579973381047481, + "loss": 2.7463, + "step": 3517 + }, + { + "epoch": 0.28391574529900737, + "grad_norm": 0.8022071123123169, + "learning_rate": 0.00018579162373606002, + "loss": 2.6898, + "step": 3518 + }, + { + "epoch": 0.28399644903559035, + "grad_norm": 0.7899700999259949, + "learning_rate": 0.0001857835115234937, + "loss": 2.7074, + "step": 3519 + }, + { + "epoch": 0.2840771527721733, + "grad_norm": 0.7237183451652527, + "learning_rate": 0.00018577539717297805, + "loss": 2.6699, + "step": 3520 + }, + { + "epoch": 0.28415785650875636, + "grad_norm": 0.7627314329147339, + "learning_rate": 0.00018576728068471526, + "loss": 2.7745, + "step": 3521 + }, + { + "epoch": 0.28423856024533933, + "grad_norm": 0.7301654815673828, + "learning_rate": 0.00018575916205890766, + "loss": 2.7191, + "step": 3522 + }, + { + "epoch": 0.28431926398192237, + "grad_norm": 0.7441647052764893, + "learning_rate": 0.00018575104129575753, + "loss": 2.7529, + "step": 3523 + }, + { + "epoch": 0.28439996771850534, + "grad_norm": 0.7715914249420166, + "learning_rate": 0.0001857429183954673, + "loss": 2.6893, + "step": 3524 + }, + { + "epoch": 0.2844806714550884, + "grad_norm": 0.7464057207107544, + "learning_rate": 0.00018573479335823944, + "loss": 2.7169, + "step": 3525 + }, + { + "epoch": 0.28456137519167135, + "grad_norm": 0.753198504447937, + "learning_rate": 0.00018572666618427638, + "loss": 2.7144, + "step": 3526 + }, + { + "epoch": 0.2846420789282544, + "grad_norm": 0.7681953310966492, + "learning_rate": 0.00018571853687378073, + "loss": 2.709, + "step": 3527 + }, + { + "epoch": 0.28472278266483736, + "grad_norm": 0.7591876983642578, + "learning_rate": 0.0001857104054269551, + "loss": 2.7519, + "step": 3528 + }, + { + "epoch": 0.2848034864014204, + "grad_norm": 0.7417709827423096, + "learning_rate": 0.00018570227184400205, + "loss": 2.6756, + "step": 3529 + }, + { + "epoch": 0.28488419013800337, + "grad_norm": 0.7641329169273376, + "learning_rate": 0.0001856941361251244, + "loss": 2.6614, + "step": 3530 + }, + { + "epoch": 0.2849648938745864, + "grad_norm": 0.7813490033149719, + "learning_rate": 0.0001856859982705249, + "loss": 2.7145, + "step": 3531 + }, + { + "epoch": 0.2850455976111694, + "grad_norm": 0.7777202129364014, + "learning_rate": 0.00018567785828040628, + "loss": 2.7015, + "step": 3532 + }, + { + "epoch": 0.2851263013477524, + "grad_norm": 0.7647144794464111, + "learning_rate": 0.0001856697161549715, + "loss": 2.7311, + "step": 3533 + }, + { + "epoch": 0.2852070050843354, + "grad_norm": 0.7477256655693054, + "learning_rate": 0.00018566157189442342, + "loss": 2.6832, + "step": 3534 + }, + { + "epoch": 0.2852877088209184, + "grad_norm": 0.7037049531936646, + "learning_rate": 0.00018565342549896506, + "loss": 2.6942, + "step": 3535 + }, + { + "epoch": 0.2853684125575014, + "grad_norm": 0.7309197783470154, + "learning_rate": 0.00018564527696879945, + "loss": 2.6797, + "step": 3536 + }, + { + "epoch": 0.2854491162940844, + "grad_norm": 0.798075795173645, + "learning_rate": 0.00018563712630412967, + "loss": 2.6926, + "step": 3537 + }, + { + "epoch": 0.2855298200306674, + "grad_norm": 0.7831682562828064, + "learning_rate": 0.0001856289735051588, + "loss": 2.7537, + "step": 3538 + }, + { + "epoch": 0.28561052376725043, + "grad_norm": 0.7983096241950989, + "learning_rate": 0.0001856208185720901, + "loss": 2.7037, + "step": 3539 + }, + { + "epoch": 0.2856912275038334, + "grad_norm": 0.7250573635101318, + "learning_rate": 0.00018561266150512678, + "loss": 2.7282, + "step": 3540 + }, + { + "epoch": 0.28577193124041644, + "grad_norm": 0.7800211906433105, + "learning_rate": 0.00018560450230447218, + "loss": 2.6541, + "step": 3541 + }, + { + "epoch": 0.2858526349769994, + "grad_norm": 0.7624209523200989, + "learning_rate": 0.00018559634097032953, + "loss": 2.7041, + "step": 3542 + }, + { + "epoch": 0.28593333871358245, + "grad_norm": 0.7212036848068237, + "learning_rate": 0.0001855881775029024, + "loss": 2.7287, + "step": 3543 + }, + { + "epoch": 0.28601404245016543, + "grad_norm": 0.7774164080619812, + "learning_rate": 0.00018558001190239408, + "loss": 2.6515, + "step": 3544 + }, + { + "epoch": 0.28609474618674846, + "grad_norm": 0.7169588208198547, + "learning_rate": 0.0001855718441690082, + "loss": 2.7111, + "step": 3545 + }, + { + "epoch": 0.28617544992333144, + "grad_norm": 0.7473909258842468, + "learning_rate": 0.00018556367430294827, + "loss": 2.7405, + "step": 3546 + }, + { + "epoch": 0.28625615365991447, + "grad_norm": 0.7213929295539856, + "learning_rate": 0.0001855555023044179, + "loss": 2.7336, + "step": 3547 + }, + { + "epoch": 0.28633685739649745, + "grad_norm": 0.701816201210022, + "learning_rate": 0.00018554732817362078, + "loss": 2.721, + "step": 3548 + }, + { + "epoch": 0.2864175611330805, + "grad_norm": 0.8158134818077087, + "learning_rate": 0.00018553915191076064, + "loss": 2.6979, + "step": 3549 + }, + { + "epoch": 0.28649826486966345, + "grad_norm": 0.7303084135055542, + "learning_rate": 0.00018553097351604118, + "loss": 2.6734, + "step": 3550 + }, + { + "epoch": 0.2865789686062465, + "grad_norm": 0.8140435814857483, + "learning_rate": 0.00018552279298966634, + "loss": 2.6832, + "step": 3551 + }, + { + "epoch": 0.28665967234282946, + "grad_norm": 0.7024678587913513, + "learning_rate": 0.00018551461033183988, + "loss": 2.7118, + "step": 3552 + }, + { + "epoch": 0.2867403760794125, + "grad_norm": 0.7277806401252747, + "learning_rate": 0.00018550642554276582, + "loss": 2.6362, + "step": 3553 + }, + { + "epoch": 0.28682107981599547, + "grad_norm": 0.8376575112342834, + "learning_rate": 0.00018549823862264812, + "loss": 2.744, + "step": 3554 + }, + { + "epoch": 0.2869017835525785, + "grad_norm": 0.712195098400116, + "learning_rate": 0.00018549004957169082, + "loss": 2.6715, + "step": 3555 + }, + { + "epoch": 0.2869824872891615, + "grad_norm": 0.7511523962020874, + "learning_rate": 0.00018548185839009805, + "loss": 2.7655, + "step": 3556 + }, + { + "epoch": 0.2870631910257445, + "grad_norm": 0.7397211790084839, + "learning_rate": 0.00018547366507807388, + "loss": 2.6813, + "step": 3557 + }, + { + "epoch": 0.2871438947623275, + "grad_norm": 0.6926341652870178, + "learning_rate": 0.00018546546963582253, + "loss": 2.6477, + "step": 3558 + }, + { + "epoch": 0.2872245984989105, + "grad_norm": 0.7776244878768921, + "learning_rate": 0.00018545727206354827, + "loss": 2.6979, + "step": 3559 + }, + { + "epoch": 0.2873053022354935, + "grad_norm": 0.7639400959014893, + "learning_rate": 0.00018544907236145542, + "loss": 2.6913, + "step": 3560 + }, + { + "epoch": 0.28738600597207653, + "grad_norm": 0.7738329768180847, + "learning_rate": 0.0001854408705297483, + "loss": 2.7231, + "step": 3561 + }, + { + "epoch": 0.2874667097086595, + "grad_norm": 0.7182422876358032, + "learning_rate": 0.00018543266656863137, + "loss": 2.718, + "step": 3562 + }, + { + "epoch": 0.28754741344524254, + "grad_norm": 0.7257261276245117, + "learning_rate": 0.00018542446047830903, + "loss": 2.7354, + "step": 3563 + }, + { + "epoch": 0.2876281171818255, + "grad_norm": 0.7761391997337341, + "learning_rate": 0.00018541625225898588, + "loss": 2.705, + "step": 3564 + }, + { + "epoch": 0.28770882091840855, + "grad_norm": 0.9272314310073853, + "learning_rate": 0.0001854080419108664, + "loss": 2.7278, + "step": 3565 + }, + { + "epoch": 0.2877895246549915, + "grad_norm": 0.7622589468955994, + "learning_rate": 0.00018539982943415527, + "loss": 2.7224, + "step": 3566 + }, + { + "epoch": 0.28787022839157456, + "grad_norm": 0.725349485874176, + "learning_rate": 0.0001853916148290572, + "loss": 2.6782, + "step": 3567 + }, + { + "epoch": 0.28795093212815753, + "grad_norm": 0.776242733001709, + "learning_rate": 0.0001853833980957768, + "loss": 2.6467, + "step": 3568 + }, + { + "epoch": 0.28803163586474057, + "grad_norm": 0.8461112976074219, + "learning_rate": 0.00018537517923451896, + "loss": 2.6763, + "step": 3569 + }, + { + "epoch": 0.28811233960132354, + "grad_norm": 0.8161221742630005, + "learning_rate": 0.00018536695824548848, + "loss": 2.7057, + "step": 3570 + }, + { + "epoch": 0.2881930433379065, + "grad_norm": 0.7404211759567261, + "learning_rate": 0.00018535873512889024, + "loss": 2.7083, + "step": 3571 + }, + { + "epoch": 0.28827374707448955, + "grad_norm": 0.831042468547821, + "learning_rate": 0.00018535050988492918, + "loss": 2.6121, + "step": 3572 + }, + { + "epoch": 0.2883544508110725, + "grad_norm": 0.7286352515220642, + "learning_rate": 0.00018534228251381035, + "loss": 2.7165, + "step": 3573 + }, + { + "epoch": 0.28843515454765556, + "grad_norm": 0.7951883673667908, + "learning_rate": 0.00018533405301573872, + "loss": 2.6794, + "step": 3574 + }, + { + "epoch": 0.28851585828423854, + "grad_norm": 0.7431079149246216, + "learning_rate": 0.00018532582139091944, + "loss": 2.6758, + "step": 3575 + }, + { + "epoch": 0.28859656202082157, + "grad_norm": 0.7408809065818787, + "learning_rate": 0.0001853175876395576, + "loss": 2.6901, + "step": 3576 + }, + { + "epoch": 0.28867726575740454, + "grad_norm": 0.7428708672523499, + "learning_rate": 0.00018530935176185848, + "loss": 2.6679, + "step": 3577 + }, + { + "epoch": 0.2887579694939876, + "grad_norm": 0.7670302987098694, + "learning_rate": 0.00018530111375802735, + "loss": 2.7306, + "step": 3578 + }, + { + "epoch": 0.28883867323057055, + "grad_norm": 0.7582474946975708, + "learning_rate": 0.00018529287362826943, + "loss": 2.7715, + "step": 3579 + }, + { + "epoch": 0.2889193769671536, + "grad_norm": 0.750973105430603, + "learning_rate": 0.0001852846313727902, + "loss": 2.7147, + "step": 3580 + }, + { + "epoch": 0.28900008070373656, + "grad_norm": 0.771854043006897, + "learning_rate": 0.00018527638699179498, + "loss": 2.6874, + "step": 3581 + }, + { + "epoch": 0.2890807844403196, + "grad_norm": 0.785469651222229, + "learning_rate": 0.00018526814048548928, + "loss": 2.6858, + "step": 3582 + }, + { + "epoch": 0.28916148817690257, + "grad_norm": 0.7601101398468018, + "learning_rate": 0.00018525989185407864, + "loss": 2.6927, + "step": 3583 + }, + { + "epoch": 0.2892421919134856, + "grad_norm": 0.7313411831855774, + "learning_rate": 0.00018525164109776861, + "loss": 2.6813, + "step": 3584 + }, + { + "epoch": 0.2893228956500686, + "grad_norm": 0.7471718192100525, + "learning_rate": 0.00018524338821676483, + "loss": 2.6791, + "step": 3585 + }, + { + "epoch": 0.2894035993866516, + "grad_norm": 0.7615204453468323, + "learning_rate": 0.00018523513321127302, + "loss": 2.7767, + "step": 3586 + }, + { + "epoch": 0.2894843031232346, + "grad_norm": 0.766793966293335, + "learning_rate": 0.00018522687608149886, + "loss": 2.664, + "step": 3587 + }, + { + "epoch": 0.2895650068598176, + "grad_norm": 0.7897932529449463, + "learning_rate": 0.00018521861682764816, + "loss": 2.7148, + "step": 3588 + }, + { + "epoch": 0.2896457105964006, + "grad_norm": 0.7366818785667419, + "learning_rate": 0.00018521035544992679, + "loss": 2.69, + "step": 3589 + }, + { + "epoch": 0.28972641433298363, + "grad_norm": 0.7503829598426819, + "learning_rate": 0.00018520209194854058, + "loss": 2.7141, + "step": 3590 + }, + { + "epoch": 0.2898071180695666, + "grad_norm": 0.8064351081848145, + "learning_rate": 0.00018519382632369556, + "loss": 2.6738, + "step": 3591 + }, + { + "epoch": 0.28988782180614964, + "grad_norm": 0.7364048361778259, + "learning_rate": 0.00018518555857559768, + "loss": 2.6731, + "step": 3592 + }, + { + "epoch": 0.2899685255427326, + "grad_norm": 0.7065430283546448, + "learning_rate": 0.00018517728870445297, + "loss": 2.7314, + "step": 3593 + }, + { + "epoch": 0.29004922927931565, + "grad_norm": 0.8233428001403809, + "learning_rate": 0.0001851690167104676, + "loss": 2.727, + "step": 3594 + }, + { + "epoch": 0.2901299330158986, + "grad_norm": 0.7563758492469788, + "learning_rate": 0.00018516074259384768, + "loss": 2.665, + "step": 3595 + }, + { + "epoch": 0.29021063675248165, + "grad_norm": 0.7451249361038208, + "learning_rate": 0.00018515246635479943, + "loss": 2.7686, + "step": 3596 + }, + { + "epoch": 0.29029134048906463, + "grad_norm": 0.7374305725097656, + "learning_rate": 0.00018514418799352918, + "loss": 2.6466, + "step": 3597 + }, + { + "epoch": 0.29037204422564766, + "grad_norm": 0.7596983909606934, + "learning_rate": 0.00018513590751024315, + "loss": 2.6763, + "step": 3598 + }, + { + "epoch": 0.29045274796223064, + "grad_norm": 0.7808190584182739, + "learning_rate": 0.0001851276249051478, + "loss": 2.7362, + "step": 3599 + }, + { + "epoch": 0.29053345169881367, + "grad_norm": 0.765785276889801, + "learning_rate": 0.00018511934017844948, + "loss": 2.7049, + "step": 3600 + }, + { + "epoch": 0.29061415543539665, + "grad_norm": 0.7503563165664673, + "learning_rate": 0.0001851110533303547, + "loss": 2.6262, + "step": 3601 + }, + { + "epoch": 0.2906948591719797, + "grad_norm": 0.7287782430648804, + "learning_rate": 0.00018510276436107, + "loss": 2.7076, + "step": 3602 + }, + { + "epoch": 0.29077556290856266, + "grad_norm": 0.7748721837997437, + "learning_rate": 0.00018509447327080193, + "loss": 2.6945, + "step": 3603 + }, + { + "epoch": 0.2908562666451457, + "grad_norm": 0.7482423186302185, + "learning_rate": 0.00018508618005975714, + "loss": 2.7326, + "step": 3604 + }, + { + "epoch": 0.29093697038172867, + "grad_norm": 0.7708765864372253, + "learning_rate": 0.00018507788472814238, + "loss": 2.7602, + "step": 3605 + }, + { + "epoch": 0.2910176741183117, + "grad_norm": 0.7308060526847839, + "learning_rate": 0.0001850695872761643, + "loss": 2.6735, + "step": 3606 + }, + { + "epoch": 0.2910983778548947, + "grad_norm": 0.7512951493263245, + "learning_rate": 0.00018506128770402972, + "loss": 2.6877, + "step": 3607 + }, + { + "epoch": 0.2911790815914777, + "grad_norm": 0.6806616187095642, + "learning_rate": 0.00018505298601194552, + "loss": 2.6689, + "step": 3608 + }, + { + "epoch": 0.2912597853280607, + "grad_norm": 0.7825661301612854, + "learning_rate": 0.00018504468220011857, + "loss": 2.7108, + "step": 3609 + }, + { + "epoch": 0.2913404890646437, + "grad_norm": 0.8243381977081299, + "learning_rate": 0.00018503637626875584, + "loss": 2.6789, + "step": 3610 + }, + { + "epoch": 0.2914211928012267, + "grad_norm": 0.745012640953064, + "learning_rate": 0.00018502806821806429, + "loss": 2.7658, + "step": 3611 + }, + { + "epoch": 0.2915018965378097, + "grad_norm": 0.7091341018676758, + "learning_rate": 0.00018501975804825104, + "loss": 2.7046, + "step": 3612 + }, + { + "epoch": 0.2915826002743927, + "grad_norm": 0.729026734828949, + "learning_rate": 0.0001850114457595232, + "loss": 2.6692, + "step": 3613 + }, + { + "epoch": 0.29166330401097573, + "grad_norm": 0.8098071813583374, + "learning_rate": 0.00018500313135208786, + "loss": 2.712, + "step": 3614 + }, + { + "epoch": 0.2917440077475587, + "grad_norm": 0.7387483716011047, + "learning_rate": 0.0001849948148261523, + "loss": 2.6705, + "step": 3615 + }, + { + "epoch": 0.29182471148414174, + "grad_norm": 0.7904576659202576, + "learning_rate": 0.0001849864961819238, + "loss": 2.5969, + "step": 3616 + }, + { + "epoch": 0.2919054152207247, + "grad_norm": 0.7560681700706482, + "learning_rate": 0.00018497817541960964, + "loss": 2.6971, + "step": 3617 + }, + { + "epoch": 0.29198611895730775, + "grad_norm": 0.8488430976867676, + "learning_rate": 0.00018496985253941723, + "loss": 2.7367, + "step": 3618 + }, + { + "epoch": 0.2920668226938907, + "grad_norm": 0.7641268372535706, + "learning_rate": 0.00018496152754155399, + "loss": 2.6948, + "step": 3619 + }, + { + "epoch": 0.29214752643047376, + "grad_norm": 0.7219721674919128, + "learning_rate": 0.00018495320042622736, + "loss": 2.7225, + "step": 3620 + }, + { + "epoch": 0.29222823016705674, + "grad_norm": 0.7583872675895691, + "learning_rate": 0.00018494487119364493, + "loss": 2.7335, + "step": 3621 + }, + { + "epoch": 0.2923089339036397, + "grad_norm": 0.7771418690681458, + "learning_rate": 0.00018493653984401424, + "loss": 2.6712, + "step": 3622 + }, + { + "epoch": 0.29238963764022274, + "grad_norm": 0.7537891268730164, + "learning_rate": 0.00018492820637754296, + "loss": 2.7282, + "step": 3623 + }, + { + "epoch": 0.2924703413768057, + "grad_norm": 0.7334226965904236, + "learning_rate": 0.00018491987079443875, + "loss": 2.7072, + "step": 3624 + }, + { + "epoch": 0.29255104511338875, + "grad_norm": 0.7768076658248901, + "learning_rate": 0.00018491153309490942, + "loss": 2.7176, + "step": 3625 + }, + { + "epoch": 0.29263174884997173, + "grad_norm": 0.6831281185150146, + "learning_rate": 0.0001849031932791627, + "loss": 2.6982, + "step": 3626 + }, + { + "epoch": 0.29271245258655476, + "grad_norm": 0.7150557637214661, + "learning_rate": 0.00018489485134740648, + "loss": 2.7325, + "step": 3627 + }, + { + "epoch": 0.29279315632313774, + "grad_norm": 0.782667338848114, + "learning_rate": 0.00018488650729984863, + "loss": 2.7146, + "step": 3628 + }, + { + "epoch": 0.29287386005972077, + "grad_norm": 0.7718524932861328, + "learning_rate": 0.0001848781611366971, + "loss": 2.746, + "step": 3629 + }, + { + "epoch": 0.29295456379630375, + "grad_norm": 0.7066439390182495, + "learning_rate": 0.00018486981285815998, + "loss": 2.7497, + "step": 3630 + }, + { + "epoch": 0.2930352675328868, + "grad_norm": 0.7705665826797485, + "learning_rate": 0.00018486146246444522, + "loss": 2.6448, + "step": 3631 + }, + { + "epoch": 0.29311597126946976, + "grad_norm": 0.7334863543510437, + "learning_rate": 0.000184853109955761, + "loss": 2.6931, + "step": 3632 + }, + { + "epoch": 0.2931966750060528, + "grad_norm": 0.7903133630752563, + "learning_rate": 0.0001848447553323155, + "loss": 2.6954, + "step": 3633 + }, + { + "epoch": 0.29327737874263576, + "grad_norm": 0.6821191310882568, + "learning_rate": 0.00018483639859431689, + "loss": 2.6165, + "step": 3634 + }, + { + "epoch": 0.2933580824792188, + "grad_norm": 0.7187811136245728, + "learning_rate": 0.00018482803974197344, + "loss": 2.6387, + "step": 3635 + }, + { + "epoch": 0.2934387862158018, + "grad_norm": 0.7429843544960022, + "learning_rate": 0.00018481967877549354, + "loss": 2.6848, + "step": 3636 + }, + { + "epoch": 0.2935194899523848, + "grad_norm": 0.7431524395942688, + "learning_rate": 0.0001848113156950855, + "loss": 2.7044, + "step": 3637 + }, + { + "epoch": 0.2936001936889678, + "grad_norm": 0.7008687853813171, + "learning_rate": 0.00018480295050095778, + "loss": 2.6922, + "step": 3638 + }, + { + "epoch": 0.2936808974255508, + "grad_norm": 0.7106652855873108, + "learning_rate": 0.00018479458319331884, + "loss": 2.6845, + "step": 3639 + }, + { + "epoch": 0.2937616011621338, + "grad_norm": 0.7288951873779297, + "learning_rate": 0.00018478621377237723, + "loss": 2.7017, + "step": 3640 + }, + { + "epoch": 0.2938423048987168, + "grad_norm": 0.7228607535362244, + "learning_rate": 0.00018477784223834155, + "loss": 2.7449, + "step": 3641 + }, + { + "epoch": 0.2939230086352998, + "grad_norm": 0.7180825471878052, + "learning_rate": 0.00018476946859142043, + "loss": 2.7291, + "step": 3642 + }, + { + "epoch": 0.29400371237188283, + "grad_norm": 0.7854947447776794, + "learning_rate": 0.00018476109283182258, + "loss": 2.7619, + "step": 3643 + }, + { + "epoch": 0.2940844161084658, + "grad_norm": 0.7871318459510803, + "learning_rate": 0.00018475271495975673, + "loss": 2.6695, + "step": 3644 + }, + { + "epoch": 0.29416511984504884, + "grad_norm": 0.7813127636909485, + "learning_rate": 0.00018474433497543165, + "loss": 2.735, + "step": 3645 + }, + { + "epoch": 0.2942458235816318, + "grad_norm": 0.7835291028022766, + "learning_rate": 0.00018473595287905623, + "loss": 2.7336, + "step": 3646 + }, + { + "epoch": 0.29432652731821485, + "grad_norm": 0.6970148682594299, + "learning_rate": 0.00018472756867083935, + "loss": 2.6912, + "step": 3647 + }, + { + "epoch": 0.2944072310547978, + "grad_norm": 0.7968462109565735, + "learning_rate": 0.00018471918235098998, + "loss": 2.6889, + "step": 3648 + }, + { + "epoch": 0.29448793479138086, + "grad_norm": 0.7011313438415527, + "learning_rate": 0.00018471079391971714, + "loss": 2.6989, + "step": 3649 + }, + { + "epoch": 0.29456863852796383, + "grad_norm": 0.8047335743904114, + "learning_rate": 0.00018470240337722991, + "loss": 2.6827, + "step": 3650 + }, + { + "epoch": 0.29464934226454687, + "grad_norm": 0.7446332573890686, + "learning_rate": 0.00018469401072373733, + "loss": 2.7089, + "step": 3651 + }, + { + "epoch": 0.29473004600112984, + "grad_norm": 0.7610359191894531, + "learning_rate": 0.00018468561595944862, + "loss": 2.6766, + "step": 3652 + }, + { + "epoch": 0.2948107497377129, + "grad_norm": 0.7705755233764648, + "learning_rate": 0.000184677219084573, + "loss": 2.7445, + "step": 3653 + }, + { + "epoch": 0.29489145347429585, + "grad_norm": 0.7466446757316589, + "learning_rate": 0.00018466882009931973, + "loss": 2.726, + "step": 3654 + }, + { + "epoch": 0.2949721572108789, + "grad_norm": 0.7912059426307678, + "learning_rate": 0.00018466041900389813, + "loss": 2.6865, + "step": 3655 + }, + { + "epoch": 0.29505286094746186, + "grad_norm": 0.722588837146759, + "learning_rate": 0.00018465201579851757, + "loss": 2.7039, + "step": 3656 + }, + { + "epoch": 0.2951335646840449, + "grad_norm": 0.739311933517456, + "learning_rate": 0.00018464361048338752, + "loss": 2.6991, + "step": 3657 + }, + { + "epoch": 0.29521426842062787, + "grad_norm": 0.7784128785133362, + "learning_rate": 0.00018463520305871743, + "loss": 2.753, + "step": 3658 + }, + { + "epoch": 0.2952949721572109, + "grad_norm": 0.8261777758598328, + "learning_rate": 0.00018462679352471682, + "loss": 2.7257, + "step": 3659 + }, + { + "epoch": 0.2953756758937939, + "grad_norm": 0.7510927319526672, + "learning_rate": 0.0001846183818815953, + "loss": 2.6981, + "step": 3660 + }, + { + "epoch": 0.2954563796303769, + "grad_norm": 0.7403035163879395, + "learning_rate": 0.00018460996812956254, + "loss": 2.744, + "step": 3661 + }, + { + "epoch": 0.2955370833669599, + "grad_norm": 0.7927733063697815, + "learning_rate": 0.00018460155226882817, + "loss": 2.6304, + "step": 3662 + }, + { + "epoch": 0.2956177871035429, + "grad_norm": 0.7923495769500732, + "learning_rate": 0.000184593134299602, + "loss": 2.7882, + "step": 3663 + }, + { + "epoch": 0.2956984908401259, + "grad_norm": 0.7639210224151611, + "learning_rate": 0.00018458471422209377, + "loss": 2.7171, + "step": 3664 + }, + { + "epoch": 0.2957791945767089, + "grad_norm": 0.736652672290802, + "learning_rate": 0.00018457629203651337, + "loss": 2.7479, + "step": 3665 + }, + { + "epoch": 0.2958598983132919, + "grad_norm": 0.7718610763549805, + "learning_rate": 0.00018456786774307066, + "loss": 2.7135, + "step": 3666 + }, + { + "epoch": 0.29594060204987493, + "grad_norm": 0.7711780071258545, + "learning_rate": 0.00018455944134197565, + "loss": 2.6867, + "step": 3667 + }, + { + "epoch": 0.2960213057864579, + "grad_norm": 0.7202491760253906, + "learning_rate": 0.0001845510128334383, + "loss": 2.6657, + "step": 3668 + }, + { + "epoch": 0.29610200952304094, + "grad_norm": 0.8155657649040222, + "learning_rate": 0.00018454258221766869, + "loss": 2.7342, + "step": 3669 + }, + { + "epoch": 0.2961827132596239, + "grad_norm": 0.7972069382667542, + "learning_rate": 0.00018453414949487696, + "loss": 2.7351, + "step": 3670 + }, + { + "epoch": 0.29626341699620695, + "grad_norm": 0.8645625710487366, + "learning_rate": 0.00018452571466527325, + "loss": 2.6778, + "step": 3671 + }, + { + "epoch": 0.29634412073278993, + "grad_norm": 0.7410334944725037, + "learning_rate": 0.00018451727772906775, + "loss": 2.7228, + "step": 3672 + }, + { + "epoch": 0.2964248244693729, + "grad_norm": 0.7845733165740967, + "learning_rate": 0.0001845088386864708, + "loss": 2.7068, + "step": 3673 + }, + { + "epoch": 0.29650552820595594, + "grad_norm": 0.7709881067276001, + "learning_rate": 0.00018450039753769266, + "loss": 2.676, + "step": 3674 + }, + { + "epoch": 0.2965862319425389, + "grad_norm": 0.7214749455451965, + "learning_rate": 0.00018449195428294371, + "loss": 2.6488, + "step": 3675 + }, + { + "epoch": 0.29666693567912195, + "grad_norm": 0.7467561960220337, + "learning_rate": 0.00018448350892243443, + "loss": 2.7262, + "step": 3676 + }, + { + "epoch": 0.2967476394157049, + "grad_norm": 0.8412678241729736, + "learning_rate": 0.00018447506145637522, + "loss": 2.7898, + "step": 3677 + }, + { + "epoch": 0.29682834315228795, + "grad_norm": 0.7130109071731567, + "learning_rate": 0.00018446661188497668, + "loss": 2.7344, + "step": 3678 + }, + { + "epoch": 0.29690904688887093, + "grad_norm": 0.7807374000549316, + "learning_rate": 0.00018445816020844937, + "loss": 2.7198, + "step": 3679 + }, + { + "epoch": 0.29698975062545396, + "grad_norm": 0.8497760891914368, + "learning_rate": 0.00018444970642700394, + "loss": 2.7479, + "step": 3680 + }, + { + "epoch": 0.29707045436203694, + "grad_norm": 0.6827178001403809, + "learning_rate": 0.0001844412505408511, + "loss": 2.727, + "step": 3681 + }, + { + "epoch": 0.29715115809861997, + "grad_norm": 0.8063304424285889, + "learning_rate": 0.00018443279255020152, + "loss": 2.7896, + "step": 3682 + }, + { + "epoch": 0.29723186183520295, + "grad_norm": 0.7759353518486023, + "learning_rate": 0.00018442433245526604, + "loss": 2.7014, + "step": 3683 + }, + { + "epoch": 0.297312565571786, + "grad_norm": 0.7380958199501038, + "learning_rate": 0.00018441587025625554, + "loss": 2.6665, + "step": 3684 + }, + { + "epoch": 0.29739326930836896, + "grad_norm": 0.7623556852340698, + "learning_rate": 0.00018440740595338087, + "loss": 2.6955, + "step": 3685 + }, + { + "epoch": 0.297473973044952, + "grad_norm": 0.8204537630081177, + "learning_rate": 0.000184398939546853, + "loss": 2.6854, + "step": 3686 + }, + { + "epoch": 0.29755467678153497, + "grad_norm": 0.7346726655960083, + "learning_rate": 0.00018439047103688293, + "loss": 2.6664, + "step": 3687 + }, + { + "epoch": 0.297635380518118, + "grad_norm": 0.777860701084137, + "learning_rate": 0.00018438200042368173, + "loss": 2.6423, + "step": 3688 + }, + { + "epoch": 0.297716084254701, + "grad_norm": 0.7331553101539612, + "learning_rate": 0.00018437352770746054, + "loss": 2.6137, + "step": 3689 + }, + { + "epoch": 0.297796787991284, + "grad_norm": 0.7634466290473938, + "learning_rate": 0.00018436505288843043, + "loss": 2.7266, + "step": 3690 + }, + { + "epoch": 0.297877491727867, + "grad_norm": 0.8151016235351562, + "learning_rate": 0.00018435657596680268, + "loss": 2.7373, + "step": 3691 + }, + { + "epoch": 0.29795819546445, + "grad_norm": 0.7806773781776428, + "learning_rate": 0.00018434809694278857, + "loss": 2.7011, + "step": 3692 + }, + { + "epoch": 0.298038899201033, + "grad_norm": 0.7575243711471558, + "learning_rate": 0.00018433961581659935, + "loss": 2.6601, + "step": 3693 + }, + { + "epoch": 0.298119602937616, + "grad_norm": 0.7527276873588562, + "learning_rate": 0.00018433113258844647, + "loss": 2.6864, + "step": 3694 + }, + { + "epoch": 0.298200306674199, + "grad_norm": 0.8024318218231201, + "learning_rate": 0.0001843226472585413, + "loss": 2.728, + "step": 3695 + }, + { + "epoch": 0.29828101041078203, + "grad_norm": 0.7549982666969299, + "learning_rate": 0.0001843141598270954, + "loss": 2.6834, + "step": 3696 + }, + { + "epoch": 0.298361714147365, + "grad_norm": 0.7699971199035645, + "learning_rate": 0.0001843056702943202, + "loss": 2.7209, + "step": 3697 + }, + { + "epoch": 0.29844241788394804, + "grad_norm": 0.823842465877533, + "learning_rate": 0.0001842971786604273, + "loss": 2.6924, + "step": 3698 + }, + { + "epoch": 0.298523121620531, + "grad_norm": 0.7645791172981262, + "learning_rate": 0.00018428868492562837, + "loss": 2.6821, + "step": 3699 + }, + { + "epoch": 0.29860382535711405, + "grad_norm": 0.7530989050865173, + "learning_rate": 0.00018428018909013506, + "loss": 2.7592, + "step": 3700 + }, + { + "epoch": 0.298684529093697, + "grad_norm": 0.7958168387413025, + "learning_rate": 0.00018427169115415914, + "loss": 2.6925, + "step": 3701 + }, + { + "epoch": 0.29876523283028006, + "grad_norm": 0.7777522802352905, + "learning_rate": 0.00018426319111791242, + "loss": 2.6757, + "step": 3702 + }, + { + "epoch": 0.29884593656686304, + "grad_norm": 0.7418079972267151, + "learning_rate": 0.00018425468898160667, + "loss": 2.6445, + "step": 3703 + }, + { + "epoch": 0.29892664030344607, + "grad_norm": 0.7591132521629333, + "learning_rate": 0.00018424618474545382, + "loss": 2.7157, + "step": 3704 + }, + { + "epoch": 0.29900734404002904, + "grad_norm": 0.7591627836227417, + "learning_rate": 0.00018423767840966586, + "loss": 2.6691, + "step": 3705 + }, + { + "epoch": 0.2990880477766121, + "grad_norm": 0.7934779524803162, + "learning_rate": 0.00018422916997445476, + "loss": 2.7262, + "step": 3706 + }, + { + "epoch": 0.29916875151319505, + "grad_norm": 0.7964254021644592, + "learning_rate": 0.00018422065944003252, + "loss": 2.6196, + "step": 3707 + }, + { + "epoch": 0.2992494552497781, + "grad_norm": 0.7448374032974243, + "learning_rate": 0.0001842121468066113, + "loss": 2.6732, + "step": 3708 + }, + { + "epoch": 0.29933015898636106, + "grad_norm": 0.7813000679016113, + "learning_rate": 0.00018420363207440329, + "loss": 2.6978, + "step": 3709 + }, + { + "epoch": 0.2994108627229441, + "grad_norm": 0.7760851979255676, + "learning_rate": 0.00018419511524362064, + "loss": 2.7466, + "step": 3710 + }, + { + "epoch": 0.29949156645952707, + "grad_norm": 0.7786797881126404, + "learning_rate": 0.00018418659631447564, + "loss": 2.7044, + "step": 3711 + }, + { + "epoch": 0.2995722701961101, + "grad_norm": 0.7860158085823059, + "learning_rate": 0.00018417807528718055, + "loss": 2.6587, + "step": 3712 + }, + { + "epoch": 0.2996529739326931, + "grad_norm": 0.8327339291572571, + "learning_rate": 0.0001841695521619478, + "loss": 2.7112, + "step": 3713 + }, + { + "epoch": 0.2997336776692761, + "grad_norm": 0.7535735368728638, + "learning_rate": 0.00018416102693898982, + "loss": 2.726, + "step": 3714 + }, + { + "epoch": 0.2998143814058591, + "grad_norm": 0.7781090140342712, + "learning_rate": 0.000184152499618519, + "loss": 2.7238, + "step": 3715 + }, + { + "epoch": 0.2998950851424421, + "grad_norm": 0.7700545191764832, + "learning_rate": 0.00018414397020074795, + "loss": 2.7081, + "step": 3716 + }, + { + "epoch": 0.2999757888790251, + "grad_norm": 0.7578303217887878, + "learning_rate": 0.0001841354386858892, + "loss": 2.6591, + "step": 3717 + }, + { + "epoch": 0.30005649261560813, + "grad_norm": 0.7506501078605652, + "learning_rate": 0.00018412690507415538, + "loss": 2.6551, + "step": 3718 + }, + { + "epoch": 0.3001371963521911, + "grad_norm": 0.7869547009468079, + "learning_rate": 0.00018411836936575918, + "loss": 2.7169, + "step": 3719 + }, + { + "epoch": 0.30021790008877414, + "grad_norm": 0.7547428607940674, + "learning_rate": 0.00018410983156091332, + "loss": 2.7498, + "step": 3720 + }, + { + "epoch": 0.3002986038253571, + "grad_norm": 0.7829383015632629, + "learning_rate": 0.0001841012916598306, + "loss": 2.6885, + "step": 3721 + }, + { + "epoch": 0.30037930756194015, + "grad_norm": 0.8469082117080688, + "learning_rate": 0.00018409274966272386, + "loss": 2.7594, + "step": 3722 + }, + { + "epoch": 0.3004600112985231, + "grad_norm": 0.7690171599388123, + "learning_rate": 0.00018408420556980596, + "loss": 2.7892, + "step": 3723 + }, + { + "epoch": 0.3005407150351061, + "grad_norm": 0.7295899987220764, + "learning_rate": 0.00018407565938128987, + "loss": 2.7023, + "step": 3724 + }, + { + "epoch": 0.30062141877168913, + "grad_norm": 0.7249528169631958, + "learning_rate": 0.00018406711109738856, + "loss": 2.7135, + "step": 3725 + }, + { + "epoch": 0.3007021225082721, + "grad_norm": 0.7237234711647034, + "learning_rate": 0.0001840585607183151, + "loss": 2.6117, + "step": 3726 + }, + { + "epoch": 0.30078282624485514, + "grad_norm": 0.7426557540893555, + "learning_rate": 0.00018405000824428256, + "loss": 2.7202, + "step": 3727 + }, + { + "epoch": 0.3008635299814381, + "grad_norm": 0.7572938799858093, + "learning_rate": 0.00018404145367550414, + "loss": 2.7373, + "step": 3728 + }, + { + "epoch": 0.30094423371802115, + "grad_norm": 0.7198675274848938, + "learning_rate": 0.00018403289701219295, + "loss": 2.6675, + "step": 3729 + }, + { + "epoch": 0.3010249374546041, + "grad_norm": 0.722532331943512, + "learning_rate": 0.00018402433825456235, + "loss": 2.6933, + "step": 3730 + }, + { + "epoch": 0.30110564119118716, + "grad_norm": 0.7621530890464783, + "learning_rate": 0.0001840157774028256, + "loss": 2.6951, + "step": 3731 + }, + { + "epoch": 0.30118634492777013, + "grad_norm": 0.7435615062713623, + "learning_rate": 0.00018400721445719604, + "loss": 2.7323, + "step": 3732 + }, + { + "epoch": 0.30126704866435317, + "grad_norm": 0.7233619689941406, + "learning_rate": 0.00018399864941788708, + "loss": 2.6789, + "step": 3733 + }, + { + "epoch": 0.30134775240093614, + "grad_norm": 0.7421496510505676, + "learning_rate": 0.00018399008228511224, + "loss": 2.72, + "step": 3734 + }, + { + "epoch": 0.3014284561375192, + "grad_norm": 0.7250909805297852, + "learning_rate": 0.000183981513059085, + "loss": 2.6717, + "step": 3735 + }, + { + "epoch": 0.30150915987410215, + "grad_norm": 0.7642899751663208, + "learning_rate": 0.0001839729417400189, + "loss": 2.6823, + "step": 3736 + }, + { + "epoch": 0.3015898636106852, + "grad_norm": 0.7434508204460144, + "learning_rate": 0.00018396436832812758, + "loss": 2.6441, + "step": 3737 + }, + { + "epoch": 0.30167056734726816, + "grad_norm": 0.7163311839103699, + "learning_rate": 0.00018395579282362473, + "loss": 2.6736, + "step": 3738 + }, + { + "epoch": 0.3017512710838512, + "grad_norm": 0.6936792731285095, + "learning_rate": 0.00018394721522672404, + "loss": 2.6792, + "step": 3739 + }, + { + "epoch": 0.30183197482043417, + "grad_norm": 0.7791975736618042, + "learning_rate": 0.0001839386355376393, + "loss": 2.653, + "step": 3740 + }, + { + "epoch": 0.3019126785570172, + "grad_norm": 0.7902694940567017, + "learning_rate": 0.00018393005375658437, + "loss": 2.7448, + "step": 3741 + }, + { + "epoch": 0.3019933822936002, + "grad_norm": 0.7405624389648438, + "learning_rate": 0.0001839214698837731, + "loss": 2.6977, + "step": 3742 + }, + { + "epoch": 0.3020740860301832, + "grad_norm": 0.8033632040023804, + "learning_rate": 0.00018391288391941943, + "loss": 2.7468, + "step": 3743 + }, + { + "epoch": 0.3021547897667662, + "grad_norm": 0.8148884177207947, + "learning_rate": 0.00018390429586373735, + "loss": 2.6992, + "step": 3744 + }, + { + "epoch": 0.3022354935033492, + "grad_norm": 0.7633625268936157, + "learning_rate": 0.00018389570571694089, + "loss": 2.6604, + "step": 3745 + }, + { + "epoch": 0.3023161972399322, + "grad_norm": 0.8687180876731873, + "learning_rate": 0.00018388711347924413, + "loss": 2.6808, + "step": 3746 + }, + { + "epoch": 0.3023969009765152, + "grad_norm": 0.6974104046821594, + "learning_rate": 0.0001838785191508612, + "loss": 2.7613, + "step": 3747 + }, + { + "epoch": 0.3024776047130982, + "grad_norm": 0.7919288873672485, + "learning_rate": 0.00018386992273200633, + "loss": 2.664, + "step": 3748 + }, + { + "epoch": 0.30255830844968123, + "grad_norm": 0.7708829045295715, + "learning_rate": 0.00018386132422289374, + "loss": 2.7703, + "step": 3749 + }, + { + "epoch": 0.3026390121862642, + "grad_norm": 0.7099813222885132, + "learning_rate": 0.00018385272362373775, + "loss": 2.6485, + "step": 3750 + }, + { + "epoch": 0.30271971592284724, + "grad_norm": 0.7629622220993042, + "learning_rate": 0.0001838441209347527, + "loss": 2.7339, + "step": 3751 + }, + { + "epoch": 0.3028004196594302, + "grad_norm": 0.727275550365448, + "learning_rate": 0.00018383551615615295, + "loss": 2.7194, + "step": 3752 + }, + { + "epoch": 0.30288112339601325, + "grad_norm": 0.7158832550048828, + "learning_rate": 0.00018382690928815302, + "loss": 2.6698, + "step": 3753 + }, + { + "epoch": 0.30296182713259623, + "grad_norm": 0.8075565099716187, + "learning_rate": 0.00018381830033096735, + "loss": 2.7198, + "step": 3754 + }, + { + "epoch": 0.30304253086917926, + "grad_norm": 0.7949094176292419, + "learning_rate": 0.00018380968928481057, + "loss": 2.7048, + "step": 3755 + }, + { + "epoch": 0.30312323460576224, + "grad_norm": 0.7009503841400146, + "learning_rate": 0.00018380107614989724, + "loss": 2.709, + "step": 3756 + }, + { + "epoch": 0.30320393834234527, + "grad_norm": 0.668574869632721, + "learning_rate": 0.00018379246092644204, + "loss": 2.6515, + "step": 3757 + }, + { + "epoch": 0.30328464207892825, + "grad_norm": 0.7470806241035461, + "learning_rate": 0.00018378384361465968, + "loss": 2.7577, + "step": 3758 + }, + { + "epoch": 0.3033653458155113, + "grad_norm": 0.7529913783073425, + "learning_rate": 0.0001837752242147649, + "loss": 2.7189, + "step": 3759 + }, + { + "epoch": 0.30344604955209425, + "grad_norm": 0.7373302578926086, + "learning_rate": 0.00018376660272697258, + "loss": 2.7197, + "step": 3760 + }, + { + "epoch": 0.3035267532886773, + "grad_norm": 0.7650466561317444, + "learning_rate": 0.0001837579791514975, + "loss": 2.6613, + "step": 3761 + }, + { + "epoch": 0.30360745702526026, + "grad_norm": 0.775209903717041, + "learning_rate": 0.00018374935348855468, + "loss": 2.6454, + "step": 3762 + }, + { + "epoch": 0.3036881607618433, + "grad_norm": 0.7049290537834167, + "learning_rate": 0.00018374072573835903, + "loss": 2.6663, + "step": 3763 + }, + { + "epoch": 0.30376886449842627, + "grad_norm": 0.7060630917549133, + "learning_rate": 0.0001837320959011256, + "loss": 2.6908, + "step": 3764 + }, + { + "epoch": 0.3038495682350093, + "grad_norm": 0.7561464905738831, + "learning_rate": 0.00018372346397706944, + "loss": 2.673, + "step": 3765 + }, + { + "epoch": 0.3039302719715923, + "grad_norm": 0.7293568849563599, + "learning_rate": 0.0001837148299664057, + "loss": 2.6431, + "step": 3766 + }, + { + "epoch": 0.3040109757081753, + "grad_norm": 0.8460379838943481, + "learning_rate": 0.00018370619386934962, + "loss": 2.7493, + "step": 3767 + }, + { + "epoch": 0.3040916794447583, + "grad_norm": 0.8136082291603088, + "learning_rate": 0.00018369755568611632, + "loss": 2.7298, + "step": 3768 + }, + { + "epoch": 0.3041723831813413, + "grad_norm": 0.6916636824607849, + "learning_rate": 0.00018368891541692116, + "loss": 2.7173, + "step": 3769 + }, + { + "epoch": 0.3042530869179243, + "grad_norm": 0.7547643780708313, + "learning_rate": 0.0001836802730619795, + "loss": 2.6343, + "step": 3770 + }, + { + "epoch": 0.30433379065450733, + "grad_norm": 0.7439205050468445, + "learning_rate": 0.00018367162862150665, + "loss": 2.6627, + "step": 3771 + }, + { + "epoch": 0.3044144943910903, + "grad_norm": 0.7781087756156921, + "learning_rate": 0.0001836629820957181, + "loss": 2.7223, + "step": 3772 + }, + { + "epoch": 0.30449519812767334, + "grad_norm": 0.7876880764961243, + "learning_rate": 0.00018365433348482935, + "loss": 2.7139, + "step": 3773 + }, + { + "epoch": 0.3045759018642563, + "grad_norm": 0.7571346163749695, + "learning_rate": 0.00018364568278905595, + "loss": 2.6939, + "step": 3774 + }, + { + "epoch": 0.3046566056008393, + "grad_norm": 0.9011813402175903, + "learning_rate": 0.00018363703000861346, + "loss": 2.7516, + "step": 3775 + }, + { + "epoch": 0.3047373093374223, + "grad_norm": 0.7809761762619019, + "learning_rate": 0.00018362837514371755, + "loss": 2.7587, + "step": 3776 + }, + { + "epoch": 0.3048180130740053, + "grad_norm": 0.7486867308616638, + "learning_rate": 0.00018361971819458393, + "loss": 2.6617, + "step": 3777 + }, + { + "epoch": 0.30489871681058833, + "grad_norm": 0.7434267401695251, + "learning_rate": 0.00018361105916142836, + "loss": 2.7328, + "step": 3778 + }, + { + "epoch": 0.3049794205471713, + "grad_norm": 0.7895822525024414, + "learning_rate": 0.0001836023980444666, + "loss": 2.7038, + "step": 3779 + }, + { + "epoch": 0.30506012428375434, + "grad_norm": 0.7329267263412476, + "learning_rate": 0.00018359373484391458, + "loss": 2.6533, + "step": 3780 + }, + { + "epoch": 0.3051408280203373, + "grad_norm": 0.7578477263450623, + "learning_rate": 0.00018358506955998817, + "loss": 2.723, + "step": 3781 + }, + { + "epoch": 0.30522153175692035, + "grad_norm": 0.7174215316772461, + "learning_rate": 0.0001835764021929033, + "loss": 2.7665, + "step": 3782 + }, + { + "epoch": 0.3053022354935033, + "grad_norm": 0.7261673808097839, + "learning_rate": 0.00018356773274287605, + "loss": 2.7239, + "step": 3783 + }, + { + "epoch": 0.30538293923008636, + "grad_norm": 0.7550768852233887, + "learning_rate": 0.00018355906121012244, + "loss": 2.6952, + "step": 3784 + }, + { + "epoch": 0.30546364296666934, + "grad_norm": 0.7805373668670654, + "learning_rate": 0.0001835503875948586, + "loss": 2.6453, + "step": 3785 + }, + { + "epoch": 0.30554434670325237, + "grad_norm": 0.7753674983978271, + "learning_rate": 0.0001835417118973007, + "loss": 2.7188, + "step": 3786 + }, + { + "epoch": 0.30562505043983534, + "grad_norm": 0.719774603843689, + "learning_rate": 0.00018353303411766496, + "loss": 2.69, + "step": 3787 + }, + { + "epoch": 0.3057057541764184, + "grad_norm": 0.786780059337616, + "learning_rate": 0.00018352435425616763, + "loss": 2.7015, + "step": 3788 + }, + { + "epoch": 0.30578645791300135, + "grad_norm": 0.7481613159179688, + "learning_rate": 0.00018351567231302508, + "loss": 2.6267, + "step": 3789 + }, + { + "epoch": 0.3058671616495844, + "grad_norm": 0.8138384222984314, + "learning_rate": 0.00018350698828845365, + "loss": 2.7301, + "step": 3790 + }, + { + "epoch": 0.30594786538616736, + "grad_norm": 0.7911081314086914, + "learning_rate": 0.00018349830218266982, + "loss": 2.6661, + "step": 3791 + }, + { + "epoch": 0.3060285691227504, + "grad_norm": 0.763179361820221, + "learning_rate": 0.00018348961399588997, + "loss": 2.6509, + "step": 3792 + }, + { + "epoch": 0.30610927285933337, + "grad_norm": 0.8214982748031616, + "learning_rate": 0.00018348092372833072, + "loss": 2.6951, + "step": 3793 + }, + { + "epoch": 0.3061899765959164, + "grad_norm": 0.7271003127098083, + "learning_rate": 0.00018347223138020865, + "loss": 2.7227, + "step": 3794 + }, + { + "epoch": 0.3062706803324994, + "grad_norm": 0.7727730870246887, + "learning_rate": 0.00018346353695174037, + "loss": 2.721, + "step": 3795 + }, + { + "epoch": 0.3063513840690824, + "grad_norm": 0.844895601272583, + "learning_rate": 0.00018345484044314257, + "loss": 2.6757, + "step": 3796 + }, + { + "epoch": 0.3064320878056654, + "grad_norm": 0.7409898638725281, + "learning_rate": 0.00018344614185463197, + "loss": 2.6798, + "step": 3797 + }, + { + "epoch": 0.3065127915422484, + "grad_norm": 0.8284425139427185, + "learning_rate": 0.00018343744118642542, + "loss": 2.7573, + "step": 3798 + }, + { + "epoch": 0.3065934952788314, + "grad_norm": 0.7535427808761597, + "learning_rate": 0.00018342873843873973, + "loss": 2.7026, + "step": 3799 + }, + { + "epoch": 0.30667419901541443, + "grad_norm": 0.8013898730278015, + "learning_rate": 0.00018342003361179176, + "loss": 2.7331, + "step": 3800 + }, + { + "epoch": 0.3067549027519974, + "grad_norm": 0.7458386421203613, + "learning_rate": 0.0001834113267057985, + "loss": 2.6976, + "step": 3801 + }, + { + "epoch": 0.30683560648858044, + "grad_norm": 0.8333673477172852, + "learning_rate": 0.00018340261772097695, + "loss": 2.7064, + "step": 3802 + }, + { + "epoch": 0.3069163102251634, + "grad_norm": 0.7273485064506531, + "learning_rate": 0.00018339390665754414, + "loss": 2.6619, + "step": 3803 + }, + { + "epoch": 0.30699701396174645, + "grad_norm": 0.8199014067649841, + "learning_rate": 0.0001833851935157172, + "loss": 2.654, + "step": 3804 + }, + { + "epoch": 0.3070777176983294, + "grad_norm": 0.780197024345398, + "learning_rate": 0.00018337647829571324, + "loss": 2.6814, + "step": 3805 + }, + { + "epoch": 0.30715842143491245, + "grad_norm": 0.7214049100875854, + "learning_rate": 0.0001833677609977495, + "loss": 2.709, + "step": 3806 + }, + { + "epoch": 0.30723912517149543, + "grad_norm": 0.7680457830429077, + "learning_rate": 0.00018335904162204326, + "loss": 2.6628, + "step": 3807 + }, + { + "epoch": 0.30731982890807846, + "grad_norm": 0.760728120803833, + "learning_rate": 0.00018335032016881178, + "loss": 2.7005, + "step": 3808 + }, + { + "epoch": 0.30740053264466144, + "grad_norm": 0.7631687521934509, + "learning_rate": 0.00018334159663827243, + "loss": 2.7012, + "step": 3809 + }, + { + "epoch": 0.30748123638124447, + "grad_norm": 0.7515785694122314, + "learning_rate": 0.00018333287103064266, + "loss": 2.7062, + "step": 3810 + }, + { + "epoch": 0.30756194011782745, + "grad_norm": 0.804500162601471, + "learning_rate": 0.00018332414334613987, + "loss": 2.7888, + "step": 3811 + }, + { + "epoch": 0.3076426438544105, + "grad_norm": 0.7551451325416565, + "learning_rate": 0.00018331541358498164, + "loss": 2.6345, + "step": 3812 + }, + { + "epoch": 0.30772334759099346, + "grad_norm": 0.7342958450317383, + "learning_rate": 0.0001833066817473855, + "loss": 2.6601, + "step": 3813 + }, + { + "epoch": 0.3078040513275765, + "grad_norm": 0.8059296607971191, + "learning_rate": 0.0001832979478335691, + "loss": 2.7694, + "step": 3814 + }, + { + "epoch": 0.30788475506415947, + "grad_norm": 0.7037352919578552, + "learning_rate": 0.0001832892118437501, + "loss": 2.6788, + "step": 3815 + }, + { + "epoch": 0.3079654588007425, + "grad_norm": 0.759509801864624, + "learning_rate": 0.0001832804737781462, + "loss": 2.7115, + "step": 3816 + }, + { + "epoch": 0.3080461625373255, + "grad_norm": 0.7911720871925354, + "learning_rate": 0.00018327173363697524, + "loss": 2.6676, + "step": 3817 + }, + { + "epoch": 0.3081268662739085, + "grad_norm": 0.7592991590499878, + "learning_rate": 0.00018326299142045496, + "loss": 2.7245, + "step": 3818 + }, + { + "epoch": 0.3082075700104915, + "grad_norm": 0.7620227932929993, + "learning_rate": 0.00018325424712880333, + "loss": 2.7224, + "step": 3819 + }, + { + "epoch": 0.3082882737470745, + "grad_norm": 0.7834638953208923, + "learning_rate": 0.0001832455007622382, + "loss": 2.7469, + "step": 3820 + }, + { + "epoch": 0.3083689774836575, + "grad_norm": 0.7765992879867554, + "learning_rate": 0.00018323675232097757, + "loss": 2.7193, + "step": 3821 + }, + { + "epoch": 0.3084496812202405, + "grad_norm": 0.7334728837013245, + "learning_rate": 0.00018322800180523949, + "loss": 2.667, + "step": 3822 + }, + { + "epoch": 0.3085303849568235, + "grad_norm": 0.7674607634544373, + "learning_rate": 0.00018321924921524207, + "loss": 2.6479, + "step": 3823 + }, + { + "epoch": 0.30861108869340653, + "grad_norm": 0.7616469860076904, + "learning_rate": 0.0001832104945512034, + "loss": 2.6535, + "step": 3824 + }, + { + "epoch": 0.3086917924299895, + "grad_norm": 0.7693164944648743, + "learning_rate": 0.00018320173781334172, + "loss": 2.7616, + "step": 3825 + }, + { + "epoch": 0.3087724961665725, + "grad_norm": 0.7099221348762512, + "learning_rate": 0.0001831929790018752, + "loss": 2.6729, + "step": 3826 + }, + { + "epoch": 0.3088531999031555, + "grad_norm": 0.7389346957206726, + "learning_rate": 0.00018318421811702222, + "loss": 2.6396, + "step": 3827 + }, + { + "epoch": 0.3089339036397385, + "grad_norm": 0.8302628397941589, + "learning_rate": 0.00018317545515900106, + "loss": 2.6786, + "step": 3828 + }, + { + "epoch": 0.3090146073763215, + "grad_norm": 0.7441998720169067, + "learning_rate": 0.00018316669012803015, + "loss": 2.6769, + "step": 3829 + }, + { + "epoch": 0.3090953111129045, + "grad_norm": 0.8454675674438477, + "learning_rate": 0.00018315792302432788, + "loss": 2.7275, + "step": 3830 + }, + { + "epoch": 0.30917601484948753, + "grad_norm": 0.8129739761352539, + "learning_rate": 0.00018314915384811282, + "loss": 2.7603, + "step": 3831 + }, + { + "epoch": 0.3092567185860705, + "grad_norm": 0.7525617480278015, + "learning_rate": 0.00018314038259960349, + "loss": 2.7156, + "step": 3832 + }, + { + "epoch": 0.30933742232265354, + "grad_norm": 0.7319022417068481, + "learning_rate": 0.0001831316092790185, + "loss": 2.676, + "step": 3833 + }, + { + "epoch": 0.3094181260592365, + "grad_norm": 0.7767768502235413, + "learning_rate": 0.00018312283388657646, + "loss": 2.7022, + "step": 3834 + }, + { + "epoch": 0.30949882979581955, + "grad_norm": 0.709293007850647, + "learning_rate": 0.00018311405642249616, + "loss": 2.6241, + "step": 3835 + }, + { + "epoch": 0.30957953353240253, + "grad_norm": 0.715360701084137, + "learning_rate": 0.0001831052768869963, + "loss": 2.6777, + "step": 3836 + }, + { + "epoch": 0.30966023726898556, + "grad_norm": 0.7361319065093994, + "learning_rate": 0.0001830964952802957, + "loss": 2.6539, + "step": 3837 + }, + { + "epoch": 0.30974094100556854, + "grad_norm": 0.7243087291717529, + "learning_rate": 0.0001830877116026132, + "loss": 2.7506, + "step": 3838 + }, + { + "epoch": 0.30982164474215157, + "grad_norm": 0.7361106872558594, + "learning_rate": 0.00018307892585416776, + "loss": 2.697, + "step": 3839 + }, + { + "epoch": 0.30990234847873455, + "grad_norm": 0.7541893720626831, + "learning_rate": 0.00018307013803517833, + "loss": 2.694, + "step": 3840 + }, + { + "epoch": 0.3099830522153176, + "grad_norm": 0.7235575914382935, + "learning_rate": 0.00018306134814586388, + "loss": 2.6711, + "step": 3841 + }, + { + "epoch": 0.31006375595190055, + "grad_norm": 0.7868196368217468, + "learning_rate": 0.00018305255618644354, + "loss": 2.7177, + "step": 3842 + }, + { + "epoch": 0.3101444596884836, + "grad_norm": 0.8074443340301514, + "learning_rate": 0.00018304376215713637, + "loss": 2.7293, + "step": 3843 + }, + { + "epoch": 0.31022516342506656, + "grad_norm": 0.6993385553359985, + "learning_rate": 0.00018303496605816158, + "loss": 2.6942, + "step": 3844 + }, + { + "epoch": 0.3103058671616496, + "grad_norm": 0.7272824645042419, + "learning_rate": 0.00018302616788973839, + "loss": 2.7093, + "step": 3845 + }, + { + "epoch": 0.31038657089823257, + "grad_norm": 0.7496963143348694, + "learning_rate": 0.00018301736765208605, + "loss": 2.7096, + "step": 3846 + }, + { + "epoch": 0.3104672746348156, + "grad_norm": 0.7407644987106323, + "learning_rate": 0.00018300856534542387, + "loss": 2.6956, + "step": 3847 + }, + { + "epoch": 0.3105479783713986, + "grad_norm": 0.742382287979126, + "learning_rate": 0.00018299976096997132, + "loss": 2.6744, + "step": 3848 + }, + { + "epoch": 0.3106286821079816, + "grad_norm": 0.7314567565917969, + "learning_rate": 0.0001829909545259477, + "loss": 2.7544, + "step": 3849 + }, + { + "epoch": 0.3107093858445646, + "grad_norm": 0.7550896406173706, + "learning_rate": 0.0001829821460135726, + "loss": 2.714, + "step": 3850 + }, + { + "epoch": 0.3107900895811476, + "grad_norm": 0.7496031522750854, + "learning_rate": 0.00018297333543306548, + "loss": 2.6718, + "step": 3851 + }, + { + "epoch": 0.3108707933177306, + "grad_norm": 0.7600073218345642, + "learning_rate": 0.00018296452278464596, + "loss": 2.7141, + "step": 3852 + }, + { + "epoch": 0.31095149705431363, + "grad_norm": 0.7242388129234314, + "learning_rate": 0.00018295570806853366, + "loss": 2.7407, + "step": 3853 + }, + { + "epoch": 0.3110322007908966, + "grad_norm": 0.723874568939209, + "learning_rate": 0.00018294689128494824, + "loss": 2.7253, + "step": 3854 + }, + { + "epoch": 0.31111290452747964, + "grad_norm": 0.7902834415435791, + "learning_rate": 0.00018293807243410947, + "loss": 2.7118, + "step": 3855 + }, + { + "epoch": 0.3111936082640626, + "grad_norm": 0.7676794528961182, + "learning_rate": 0.00018292925151623717, + "loss": 2.684, + "step": 3856 + }, + { + "epoch": 0.31127431200064565, + "grad_norm": 0.767431378364563, + "learning_rate": 0.0001829204285315511, + "loss": 2.6936, + "step": 3857 + }, + { + "epoch": 0.3113550157372286, + "grad_norm": 0.7802234888076782, + "learning_rate": 0.00018291160348027122, + "loss": 2.7181, + "step": 3858 + }, + { + "epoch": 0.31143571947381166, + "grad_norm": 0.7823610305786133, + "learning_rate": 0.00018290277636261743, + "loss": 2.7014, + "step": 3859 + }, + { + "epoch": 0.31151642321039463, + "grad_norm": 0.8199869394302368, + "learning_rate": 0.00018289394717880978, + "loss": 2.73, + "step": 3860 + }, + { + "epoch": 0.31159712694697766, + "grad_norm": 0.7725761532783508, + "learning_rate": 0.00018288511592906822, + "loss": 2.6978, + "step": 3861 + }, + { + "epoch": 0.31167783068356064, + "grad_norm": 0.752034068107605, + "learning_rate": 0.00018287628261361296, + "loss": 2.6635, + "step": 3862 + }, + { + "epoch": 0.3117585344201437, + "grad_norm": 0.7961714267730713, + "learning_rate": 0.0001828674472326641, + "loss": 2.7047, + "step": 3863 + }, + { + "epoch": 0.31183923815672665, + "grad_norm": 0.7413069605827332, + "learning_rate": 0.00018285860978644182, + "loss": 2.6872, + "step": 3864 + }, + { + "epoch": 0.3119199418933097, + "grad_norm": 0.8943146467208862, + "learning_rate": 0.00018284977027516636, + "loss": 2.7611, + "step": 3865 + }, + { + "epoch": 0.31200064562989266, + "grad_norm": 0.7663856744766235, + "learning_rate": 0.0001828409286990581, + "loss": 2.7541, + "step": 3866 + }, + { + "epoch": 0.3120813493664757, + "grad_norm": 0.7557348608970642, + "learning_rate": 0.00018283208505833731, + "loss": 2.6633, + "step": 3867 + }, + { + "epoch": 0.31216205310305867, + "grad_norm": 0.7690094113349915, + "learning_rate": 0.00018282323935322445, + "loss": 2.7117, + "step": 3868 + }, + { + "epoch": 0.3122427568396417, + "grad_norm": 0.8059033751487732, + "learning_rate": 0.00018281439158393997, + "loss": 2.6743, + "step": 3869 + }, + { + "epoch": 0.3123234605762247, + "grad_norm": 0.7877150774002075, + "learning_rate": 0.00018280554175070438, + "loss": 2.6546, + "step": 3870 + }, + { + "epoch": 0.3124041643128077, + "grad_norm": 0.799670934677124, + "learning_rate": 0.0001827966898537382, + "loss": 2.7184, + "step": 3871 + }, + { + "epoch": 0.3124848680493907, + "grad_norm": 0.8353915214538574, + "learning_rate": 0.0001827878358932621, + "loss": 2.7235, + "step": 3872 + }, + { + "epoch": 0.3125655717859737, + "grad_norm": 0.7954776883125305, + "learning_rate": 0.00018277897986949672, + "loss": 2.5992, + "step": 3873 + }, + { + "epoch": 0.3126462755225567, + "grad_norm": 0.7959856986999512, + "learning_rate": 0.00018277012178266277, + "loss": 2.6877, + "step": 3874 + }, + { + "epoch": 0.3127269792591397, + "grad_norm": 0.8220208883285522, + "learning_rate": 0.00018276126163298102, + "loss": 2.6891, + "step": 3875 + }, + { + "epoch": 0.3128076829957227, + "grad_norm": 0.7827965021133423, + "learning_rate": 0.0001827523994206723, + "loss": 2.7271, + "step": 3876 + }, + { + "epoch": 0.3128883867323057, + "grad_norm": 0.764369010925293, + "learning_rate": 0.00018274353514595746, + "loss": 2.6661, + "step": 3877 + }, + { + "epoch": 0.3129690904688887, + "grad_norm": 0.7440944314002991, + "learning_rate": 0.00018273466880905744, + "loss": 2.6621, + "step": 3878 + }, + { + "epoch": 0.3130497942054717, + "grad_norm": 0.8544813394546509, + "learning_rate": 0.00018272580041019319, + "loss": 2.7168, + "step": 3879 + }, + { + "epoch": 0.3131304979420547, + "grad_norm": 0.7232592701911926, + "learning_rate": 0.00018271692994958577, + "loss": 2.6666, + "step": 3880 + }, + { + "epoch": 0.3132112016786377, + "grad_norm": 0.750525712966919, + "learning_rate": 0.00018270805742745617, + "loss": 2.6984, + "step": 3881 + }, + { + "epoch": 0.31329190541522073, + "grad_norm": 0.8195550441741943, + "learning_rate": 0.00018269918284402565, + "loss": 2.7183, + "step": 3882 + }, + { + "epoch": 0.3133726091518037, + "grad_norm": 0.7695632576942444, + "learning_rate": 0.0001826903061995153, + "loss": 2.7092, + "step": 3883 + }, + { + "epoch": 0.31345331288838674, + "grad_norm": 0.7631582617759705, + "learning_rate": 0.0001826814274941463, + "loss": 2.7061, + "step": 3884 + }, + { + "epoch": 0.3135340166249697, + "grad_norm": 0.8318471908569336, + "learning_rate": 0.0001826725467281401, + "loss": 2.694, + "step": 3885 + }, + { + "epoch": 0.31361472036155275, + "grad_norm": 0.7313492298126221, + "learning_rate": 0.00018266366390171784, + "loss": 2.6729, + "step": 3886 + }, + { + "epoch": 0.3136954240981357, + "grad_norm": 0.7508631944656372, + "learning_rate": 0.00018265477901510105, + "loss": 2.731, + "step": 3887 + }, + { + "epoch": 0.31377612783471875, + "grad_norm": 0.8106402158737183, + "learning_rate": 0.00018264589206851107, + "loss": 2.7113, + "step": 3888 + }, + { + "epoch": 0.31385683157130173, + "grad_norm": 0.771542489528656, + "learning_rate": 0.00018263700306216945, + "loss": 2.644, + "step": 3889 + }, + { + "epoch": 0.31393753530788476, + "grad_norm": 0.812441885471344, + "learning_rate": 0.00018262811199629768, + "loss": 2.6889, + "step": 3890 + }, + { + "epoch": 0.31401823904446774, + "grad_norm": 0.8231199979782104, + "learning_rate": 0.00018261921887111738, + "loss": 2.6466, + "step": 3891 + }, + { + "epoch": 0.31409894278105077, + "grad_norm": 0.7492454051971436, + "learning_rate": 0.00018261032368685012, + "loss": 2.6693, + "step": 3892 + }, + { + "epoch": 0.31417964651763375, + "grad_norm": 0.7651814222335815, + "learning_rate": 0.00018260142644371772, + "loss": 2.6569, + "step": 3893 + }, + { + "epoch": 0.3142603502542168, + "grad_norm": 0.7504465579986572, + "learning_rate": 0.0001825925271419418, + "loss": 2.684, + "step": 3894 + }, + { + "epoch": 0.31434105399079976, + "grad_norm": 0.749650239944458, + "learning_rate": 0.00018258362578174424, + "loss": 2.6482, + "step": 3895 + }, + { + "epoch": 0.3144217577273828, + "grad_norm": 0.8445256352424622, + "learning_rate": 0.00018257472236334686, + "loss": 2.727, + "step": 3896 + }, + { + "epoch": 0.31450246146396577, + "grad_norm": 0.7628257870674133, + "learning_rate": 0.0001825658168869715, + "loss": 2.7314, + "step": 3897 + }, + { + "epoch": 0.3145831652005488, + "grad_norm": 0.7738446593284607, + "learning_rate": 0.00018255690935284019, + "loss": 2.7478, + "step": 3898 + }, + { + "epoch": 0.3146638689371318, + "grad_norm": 0.7578958868980408, + "learning_rate": 0.00018254799976117486, + "loss": 2.6922, + "step": 3899 + }, + { + "epoch": 0.3147445726737148, + "grad_norm": 0.8367362022399902, + "learning_rate": 0.00018253908811219764, + "loss": 2.7347, + "step": 3900 + }, + { + "epoch": 0.3148252764102978, + "grad_norm": 0.7530354857444763, + "learning_rate": 0.00018253017440613057, + "loss": 2.7151, + "step": 3901 + }, + { + "epoch": 0.3149059801468808, + "grad_norm": 0.7168053388595581, + "learning_rate": 0.00018252125864319578, + "loss": 2.7072, + "step": 3902 + }, + { + "epoch": 0.3149866838834638, + "grad_norm": 0.7480056285858154, + "learning_rate": 0.00018251234082361555, + "loss": 2.6489, + "step": 3903 + }, + { + "epoch": 0.3150673876200468, + "grad_norm": 0.8563880324363708, + "learning_rate": 0.0001825034209476121, + "loss": 2.7384, + "step": 3904 + }, + { + "epoch": 0.3151480913566298, + "grad_norm": 0.7959346771240234, + "learning_rate": 0.0001824944990154077, + "loss": 2.631, + "step": 3905 + }, + { + "epoch": 0.31522879509321283, + "grad_norm": 0.7385980486869812, + "learning_rate": 0.00018248557502722476, + "loss": 2.7394, + "step": 3906 + }, + { + "epoch": 0.3153094988297958, + "grad_norm": 0.7682650685310364, + "learning_rate": 0.00018247664898328567, + "loss": 2.7327, + "step": 3907 + }, + { + "epoch": 0.31539020256637884, + "grad_norm": 0.7720316648483276, + "learning_rate": 0.0001824677208838129, + "loss": 2.6442, + "step": 3908 + }, + { + "epoch": 0.3154709063029618, + "grad_norm": 0.7927379608154297, + "learning_rate": 0.00018245879072902895, + "loss": 2.7738, + "step": 3909 + }, + { + "epoch": 0.31555161003954485, + "grad_norm": 0.7506012916564941, + "learning_rate": 0.00018244985851915637, + "loss": 2.6825, + "step": 3910 + }, + { + "epoch": 0.3156323137761278, + "grad_norm": 0.6996353268623352, + "learning_rate": 0.00018244092425441781, + "loss": 2.6783, + "step": 3911 + }, + { + "epoch": 0.31571301751271086, + "grad_norm": 0.8039344549179077, + "learning_rate": 0.00018243198793503588, + "loss": 2.7628, + "step": 3912 + }, + { + "epoch": 0.31579372124929384, + "grad_norm": 0.7890963554382324, + "learning_rate": 0.0001824230495612334, + "loss": 2.7512, + "step": 3913 + }, + { + "epoch": 0.31587442498587687, + "grad_norm": 0.7470870614051819, + "learning_rate": 0.00018241410913323301, + "loss": 2.7058, + "step": 3914 + }, + { + "epoch": 0.31595512872245984, + "grad_norm": 0.7056336402893066, + "learning_rate": 0.0001824051666512576, + "loss": 2.6091, + "step": 3915 + }, + { + "epoch": 0.3160358324590429, + "grad_norm": 0.7818490862846375, + "learning_rate": 0.00018239622211553002, + "loss": 2.7509, + "step": 3916 + }, + { + "epoch": 0.31611653619562585, + "grad_norm": 0.7590607404708862, + "learning_rate": 0.0001823872755262732, + "loss": 2.7238, + "step": 3917 + }, + { + "epoch": 0.3161972399322089, + "grad_norm": 0.7157841920852661, + "learning_rate": 0.00018237832688371014, + "loss": 2.6639, + "step": 3918 + }, + { + "epoch": 0.31627794366879186, + "grad_norm": 0.7515804171562195, + "learning_rate": 0.00018236937618806382, + "loss": 2.6973, + "step": 3919 + }, + { + "epoch": 0.3163586474053749, + "grad_norm": 0.6691949963569641, + "learning_rate": 0.00018236042343955733, + "loss": 2.727, + "step": 3920 + }, + { + "epoch": 0.31643935114195787, + "grad_norm": 0.8122327327728271, + "learning_rate": 0.0001823514686384138, + "loss": 2.7513, + "step": 3921 + }, + { + "epoch": 0.3165200548785409, + "grad_norm": 0.7813653349876404, + "learning_rate": 0.0001823425117848564, + "loss": 2.7037, + "step": 3922 + }, + { + "epoch": 0.3166007586151239, + "grad_norm": 0.6869354844093323, + "learning_rate": 0.00018233355287910834, + "loss": 2.693, + "step": 3923 + }, + { + "epoch": 0.3166814623517069, + "grad_norm": 0.7773037552833557, + "learning_rate": 0.00018232459192139296, + "loss": 2.687, + "step": 3924 + }, + { + "epoch": 0.3167621660882899, + "grad_norm": 0.7644256949424744, + "learning_rate": 0.00018231562891193352, + "loss": 2.6753, + "step": 3925 + }, + { + "epoch": 0.3168428698248729, + "grad_norm": 0.8427005410194397, + "learning_rate": 0.00018230666385095343, + "loss": 2.6641, + "step": 3926 + }, + { + "epoch": 0.3169235735614559, + "grad_norm": 0.7194599509239197, + "learning_rate": 0.0001822976967386761, + "loss": 2.7091, + "step": 3927 + }, + { + "epoch": 0.3170042772980389, + "grad_norm": 0.7710655331611633, + "learning_rate": 0.00018228872757532512, + "loss": 2.6938, + "step": 3928 + }, + { + "epoch": 0.3170849810346219, + "grad_norm": 0.8003759980201721, + "learning_rate": 0.0001822797563611239, + "loss": 2.7019, + "step": 3929 + }, + { + "epoch": 0.3171656847712049, + "grad_norm": 0.7960470914840698, + "learning_rate": 0.00018227078309629606, + "loss": 2.661, + "step": 3930 + }, + { + "epoch": 0.3172463885077879, + "grad_norm": 0.7731126546859741, + "learning_rate": 0.00018226180778106526, + "loss": 2.7023, + "step": 3931 + }, + { + "epoch": 0.3173270922443709, + "grad_norm": 0.7561383843421936, + "learning_rate": 0.00018225283041565515, + "loss": 2.6768, + "step": 3932 + }, + { + "epoch": 0.3174077959809539, + "grad_norm": 0.7578409910202026, + "learning_rate": 0.0001822438510002895, + "loss": 2.7145, + "step": 3933 + }, + { + "epoch": 0.3174884997175369, + "grad_norm": 0.7901952862739563, + "learning_rate": 0.00018223486953519214, + "loss": 2.7121, + "step": 3934 + }, + { + "epoch": 0.31756920345411993, + "grad_norm": 0.82305908203125, + "learning_rate": 0.0001822258860205868, + "loss": 2.7553, + "step": 3935 + }, + { + "epoch": 0.3176499071907029, + "grad_norm": 0.748055636882782, + "learning_rate": 0.0001822169004566975, + "loss": 2.7236, + "step": 3936 + }, + { + "epoch": 0.31773061092728594, + "grad_norm": 0.7981358766555786, + "learning_rate": 0.0001822079128437481, + "loss": 2.7444, + "step": 3937 + }, + { + "epoch": 0.3178113146638689, + "grad_norm": 0.7938945889472961, + "learning_rate": 0.0001821989231819626, + "loss": 2.7512, + "step": 3938 + }, + { + "epoch": 0.31789201840045195, + "grad_norm": 0.7250397205352783, + "learning_rate": 0.0001821899314715651, + "loss": 2.6843, + "step": 3939 + }, + { + "epoch": 0.3179727221370349, + "grad_norm": 0.8844723701477051, + "learning_rate": 0.00018218093771277965, + "loss": 2.6295, + "step": 3940 + }, + { + "epoch": 0.31805342587361796, + "grad_norm": 0.7545698881149292, + "learning_rate": 0.0001821719419058304, + "loss": 2.7478, + "step": 3941 + }, + { + "epoch": 0.31813412961020093, + "grad_norm": 0.7254738807678223, + "learning_rate": 0.00018216294405094157, + "loss": 2.665, + "step": 3942 + }, + { + "epoch": 0.31821483334678397, + "grad_norm": 0.7664754390716553, + "learning_rate": 0.00018215394414833737, + "loss": 2.7431, + "step": 3943 + }, + { + "epoch": 0.31829553708336694, + "grad_norm": 0.8250303864479065, + "learning_rate": 0.00018214494219824217, + "loss": 2.6957, + "step": 3944 + }, + { + "epoch": 0.31837624081995, + "grad_norm": 0.7425532341003418, + "learning_rate": 0.00018213593820088026, + "loss": 2.666, + "step": 3945 + }, + { + "epoch": 0.31845694455653295, + "grad_norm": 0.6943121552467346, + "learning_rate": 0.00018212693215647604, + "loss": 2.716, + "step": 3946 + }, + { + "epoch": 0.318537648293116, + "grad_norm": 0.732829213142395, + "learning_rate": 0.00018211792406525403, + "loss": 2.6557, + "step": 3947 + }, + { + "epoch": 0.31861835202969896, + "grad_norm": 0.7666537165641785, + "learning_rate": 0.00018210891392743866, + "loss": 2.7275, + "step": 3948 + }, + { + "epoch": 0.318699055766282, + "grad_norm": 0.7652621865272522, + "learning_rate": 0.00018209990174325455, + "loss": 2.6372, + "step": 3949 + }, + { + "epoch": 0.31877975950286497, + "grad_norm": 0.7416055202484131, + "learning_rate": 0.00018209088751292626, + "loss": 2.6688, + "step": 3950 + }, + { + "epoch": 0.318860463239448, + "grad_norm": 0.7504609227180481, + "learning_rate": 0.00018208187123667848, + "loss": 2.6912, + "step": 3951 + }, + { + "epoch": 0.318941166976031, + "grad_norm": 0.7308809757232666, + "learning_rate": 0.00018207285291473588, + "loss": 2.7272, + "step": 3952 + }, + { + "epoch": 0.319021870712614, + "grad_norm": 0.8031618595123291, + "learning_rate": 0.00018206383254732326, + "loss": 2.7354, + "step": 3953 + }, + { + "epoch": 0.319102574449197, + "grad_norm": 0.81386798620224, + "learning_rate": 0.00018205481013466542, + "loss": 2.676, + "step": 3954 + }, + { + "epoch": 0.31918327818578, + "grad_norm": 0.7845911383628845, + "learning_rate": 0.0001820457856769872, + "loss": 2.7094, + "step": 3955 + }, + { + "epoch": 0.319263981922363, + "grad_norm": 0.7189298272132874, + "learning_rate": 0.00018203675917451357, + "loss": 2.6764, + "step": 3956 + }, + { + "epoch": 0.319344685658946, + "grad_norm": 0.8253228664398193, + "learning_rate": 0.00018202773062746944, + "loss": 2.6805, + "step": 3957 + }, + { + "epoch": 0.319425389395529, + "grad_norm": 0.7965289950370789, + "learning_rate": 0.0001820187000360798, + "loss": 2.7148, + "step": 3958 + }, + { + "epoch": 0.31950609313211203, + "grad_norm": 0.7505398988723755, + "learning_rate": 0.0001820096674005698, + "loss": 2.6732, + "step": 3959 + }, + { + "epoch": 0.319586796868695, + "grad_norm": 0.7554877400398254, + "learning_rate": 0.0001820006327211645, + "loss": 2.7467, + "step": 3960 + }, + { + "epoch": 0.31966750060527804, + "grad_norm": 0.7836194038391113, + "learning_rate": 0.00018199159599808907, + "loss": 2.7252, + "step": 3961 + }, + { + "epoch": 0.319748204341861, + "grad_norm": 0.7967261672019958, + "learning_rate": 0.00018198255723156877, + "loss": 2.6814, + "step": 3962 + }, + { + "epoch": 0.31982890807844405, + "grad_norm": 0.7411713600158691, + "learning_rate": 0.00018197351642182882, + "loss": 2.6928, + "step": 3963 + }, + { + "epoch": 0.31990961181502703, + "grad_norm": 0.6961422562599182, + "learning_rate": 0.00018196447356909454, + "loss": 2.6651, + "step": 3964 + }, + { + "epoch": 0.31999031555161006, + "grad_norm": 0.7245771884918213, + "learning_rate": 0.00018195542867359134, + "loss": 2.6726, + "step": 3965 + }, + { + "epoch": 0.32007101928819304, + "grad_norm": 0.784654974937439, + "learning_rate": 0.00018194638173554462, + "loss": 2.6829, + "step": 3966 + }, + { + "epoch": 0.32015172302477607, + "grad_norm": 0.7373329997062683, + "learning_rate": 0.00018193733275517985, + "loss": 2.6481, + "step": 3967 + }, + { + "epoch": 0.32023242676135905, + "grad_norm": 0.7878682613372803, + "learning_rate": 0.00018192828173272258, + "loss": 2.6701, + "step": 3968 + }, + { + "epoch": 0.3203131304979421, + "grad_norm": 0.759676992893219, + "learning_rate": 0.00018191922866839835, + "loss": 2.7218, + "step": 3969 + }, + { + "epoch": 0.32039383423452505, + "grad_norm": 0.7923088669776917, + "learning_rate": 0.00018191017356243282, + "loss": 2.6841, + "step": 3970 + }, + { + "epoch": 0.3204745379711081, + "grad_norm": 0.7084882855415344, + "learning_rate": 0.00018190111641505164, + "loss": 2.7167, + "step": 3971 + }, + { + "epoch": 0.32055524170769106, + "grad_norm": 0.7166235446929932, + "learning_rate": 0.00018189205722648054, + "loss": 2.6647, + "step": 3972 + }, + { + "epoch": 0.3206359454442741, + "grad_norm": 0.7997722029685974, + "learning_rate": 0.0001818829959969453, + "loss": 2.7199, + "step": 3973 + }, + { + "epoch": 0.32071664918085707, + "grad_norm": 0.8309516310691833, + "learning_rate": 0.0001818739327266718, + "loss": 2.8006, + "step": 3974 + }, + { + "epoch": 0.3207973529174401, + "grad_norm": 0.7164002656936646, + "learning_rate": 0.00018186486741588582, + "loss": 2.6258, + "step": 3975 + }, + { + "epoch": 0.3208780566540231, + "grad_norm": 0.7715865969657898, + "learning_rate": 0.0001818558000648134, + "loss": 2.7034, + "step": 3976 + }, + { + "epoch": 0.3209587603906061, + "grad_norm": 0.7806593775749207, + "learning_rate": 0.0001818467306736804, + "loss": 2.6758, + "step": 3977 + }, + { + "epoch": 0.3210394641271891, + "grad_norm": 0.8026594519615173, + "learning_rate": 0.00018183765924271298, + "loss": 2.6976, + "step": 3978 + }, + { + "epoch": 0.32112016786377207, + "grad_norm": 0.7971245050430298, + "learning_rate": 0.00018182858577213716, + "loss": 2.7312, + "step": 3979 + }, + { + "epoch": 0.3212008716003551, + "grad_norm": 0.7347297072410583, + "learning_rate": 0.00018181951026217908, + "loss": 2.6664, + "step": 3980 + }, + { + "epoch": 0.3212815753369381, + "grad_norm": 0.7929779291152954, + "learning_rate": 0.0001818104327130649, + "loss": 2.6603, + "step": 3981 + }, + { + "epoch": 0.3213622790735211, + "grad_norm": 0.7465224862098694, + "learning_rate": 0.00018180135312502089, + "loss": 2.6566, + "step": 3982 + }, + { + "epoch": 0.3214429828101041, + "grad_norm": 0.7114695906639099, + "learning_rate": 0.00018179227149827334, + "loss": 2.6492, + "step": 3983 + }, + { + "epoch": 0.3215236865466871, + "grad_norm": 0.7179337739944458, + "learning_rate": 0.00018178318783304857, + "loss": 2.6778, + "step": 3984 + }, + { + "epoch": 0.3216043902832701, + "grad_norm": 0.7182629704475403, + "learning_rate": 0.000181774102129573, + "loss": 2.7057, + "step": 3985 + }, + { + "epoch": 0.3216850940198531, + "grad_norm": 0.7383119463920593, + "learning_rate": 0.000181765014388073, + "loss": 2.6633, + "step": 3986 + }, + { + "epoch": 0.3217657977564361, + "grad_norm": 0.7340527176856995, + "learning_rate": 0.00018175592460877512, + "loss": 2.6838, + "step": 3987 + }, + { + "epoch": 0.32184650149301913, + "grad_norm": 0.7934359312057495, + "learning_rate": 0.00018174683279190593, + "loss": 2.6795, + "step": 3988 + }, + { + "epoch": 0.3219272052296021, + "grad_norm": 0.6960840821266174, + "learning_rate": 0.00018173773893769192, + "loss": 2.6669, + "step": 3989 + }, + { + "epoch": 0.32200790896618514, + "grad_norm": 0.7513574361801147, + "learning_rate": 0.00018172864304635985, + "loss": 2.6744, + "step": 3990 + }, + { + "epoch": 0.3220886127027681, + "grad_norm": 0.7516636848449707, + "learning_rate": 0.00018171954511813629, + "loss": 2.6652, + "step": 3991 + }, + { + "epoch": 0.32216931643935115, + "grad_norm": 0.7817716002464294, + "learning_rate": 0.00018171044515324808, + "loss": 2.6671, + "step": 3992 + }, + { + "epoch": 0.3222500201759341, + "grad_norm": 0.6859925389289856, + "learning_rate": 0.000181701343151922, + "loss": 2.6984, + "step": 3993 + }, + { + "epoch": 0.32233072391251716, + "grad_norm": 0.7669627666473389, + "learning_rate": 0.00018169223911438485, + "loss": 2.7102, + "step": 3994 + }, + { + "epoch": 0.32241142764910014, + "grad_norm": 0.784724235534668, + "learning_rate": 0.00018168313304086357, + "loss": 2.7413, + "step": 3995 + }, + { + "epoch": 0.32249213138568317, + "grad_norm": 0.7341497540473938, + "learning_rate": 0.00018167402493158509, + "loss": 2.706, + "step": 3996 + }, + { + "epoch": 0.32257283512226614, + "grad_norm": 0.7975730299949646, + "learning_rate": 0.00018166491478677641, + "loss": 2.6896, + "step": 3997 + }, + { + "epoch": 0.3226535388588492, + "grad_norm": 0.8138537406921387, + "learning_rate": 0.00018165580260666458, + "loss": 2.6986, + "step": 3998 + }, + { + "epoch": 0.32273424259543215, + "grad_norm": 0.6734997034072876, + "learning_rate": 0.0001816466883914767, + "loss": 2.6686, + "step": 3999 + }, + { + "epoch": 0.3228149463320152, + "grad_norm": 0.7742779850959778, + "learning_rate": 0.00018163757214143992, + "loss": 2.7222, + "step": 4000 + }, + { + "epoch": 0.3228149463320152, + "eval_loss": 2.615234375, + "eval_runtime": 783.0394, + "eval_samples_per_second": 3.346, + "eval_steps_per_second": 0.558, + "step": 4000 + }, + { + "epoch": 0.32289565006859816, + "grad_norm": 0.7654715180397034, + "learning_rate": 0.00018162845385678145, + "loss": 2.7016, + "step": 4001 + }, + { + "epoch": 0.3229763538051812, + "grad_norm": 0.8698763251304626, + "learning_rate": 0.0001816193335377285, + "loss": 2.6709, + "step": 4002 + }, + { + "epoch": 0.32305705754176417, + "grad_norm": 0.758056640625, + "learning_rate": 0.00018161021118450843, + "loss": 2.7277, + "step": 4003 + }, + { + "epoch": 0.3231377612783472, + "grad_norm": 0.7462654113769531, + "learning_rate": 0.00018160108679734856, + "loss": 2.623, + "step": 4004 + }, + { + "epoch": 0.3232184650149302, + "grad_norm": 0.7274953722953796, + "learning_rate": 0.00018159196037647628, + "loss": 2.6875, + "step": 4005 + }, + { + "epoch": 0.3232991687515132, + "grad_norm": 0.7737346887588501, + "learning_rate": 0.0001815828319221191, + "loss": 2.6967, + "step": 4006 + }, + { + "epoch": 0.3233798724880962, + "grad_norm": 0.7793172001838684, + "learning_rate": 0.00018157370143450448, + "loss": 2.724, + "step": 4007 + }, + { + "epoch": 0.3234605762246792, + "grad_norm": 0.7791805863380432, + "learning_rate": 0.00018156456891385995, + "loss": 2.6653, + "step": 4008 + }, + { + "epoch": 0.3235412799612622, + "grad_norm": 0.7225624918937683, + "learning_rate": 0.0001815554343604132, + "loss": 2.745, + "step": 4009 + }, + { + "epoch": 0.32362198369784523, + "grad_norm": 0.6958494782447815, + "learning_rate": 0.0001815462977743918, + "loss": 2.6856, + "step": 4010 + }, + { + "epoch": 0.3237026874344282, + "grad_norm": 0.7572030425071716, + "learning_rate": 0.0001815371591560235, + "loss": 2.7053, + "step": 4011 + }, + { + "epoch": 0.32378339117101124, + "grad_norm": 0.7133952975273132, + "learning_rate": 0.00018152801850553605, + "loss": 2.6984, + "step": 4012 + }, + { + "epoch": 0.3238640949075942, + "grad_norm": 0.7598705291748047, + "learning_rate": 0.00018151887582315728, + "loss": 2.6632, + "step": 4013 + }, + { + "epoch": 0.32394479864417725, + "grad_norm": 0.7670698165893555, + "learning_rate": 0.00018150973110911503, + "loss": 2.7035, + "step": 4014 + }, + { + "epoch": 0.3240255023807602, + "grad_norm": 0.7547060251235962, + "learning_rate": 0.00018150058436363723, + "loss": 2.6531, + "step": 4015 + }, + { + "epoch": 0.32410620611734325, + "grad_norm": 0.7943035364151001, + "learning_rate": 0.00018149143558695178, + "loss": 2.766, + "step": 4016 + }, + { + "epoch": 0.32418690985392623, + "grad_norm": 0.864356517791748, + "learning_rate": 0.00018148228477928675, + "loss": 2.7134, + "step": 4017 + }, + { + "epoch": 0.32426761359050926, + "grad_norm": 0.7773902416229248, + "learning_rate": 0.00018147313194087018, + "loss": 2.6948, + "step": 4018 + }, + { + "epoch": 0.32434831732709224, + "grad_norm": 0.839131772518158, + "learning_rate": 0.0001814639770719302, + "loss": 2.7393, + "step": 4019 + }, + { + "epoch": 0.32442902106367527, + "grad_norm": 0.807837963104248, + "learning_rate": 0.00018145482017269498, + "loss": 2.7835, + "step": 4020 + }, + { + "epoch": 0.32450972480025825, + "grad_norm": 0.7133228182792664, + "learning_rate": 0.00018144566124339272, + "loss": 2.6859, + "step": 4021 + }, + { + "epoch": 0.3245904285368413, + "grad_norm": 0.8450621962547302, + "learning_rate": 0.00018143650028425162, + "loss": 2.7548, + "step": 4022 + }, + { + "epoch": 0.32467113227342426, + "grad_norm": 0.8594980835914612, + "learning_rate": 0.00018142733729550013, + "loss": 2.6636, + "step": 4023 + }, + { + "epoch": 0.3247518360100073, + "grad_norm": 0.7134621739387512, + "learning_rate": 0.0001814181722773665, + "loss": 2.6501, + "step": 4024 + }, + { + "epoch": 0.32483253974659027, + "grad_norm": 0.8630430698394775, + "learning_rate": 0.0001814090052300792, + "loss": 2.6994, + "step": 4025 + }, + { + "epoch": 0.3249132434831733, + "grad_norm": 0.7044873237609863, + "learning_rate": 0.00018139983615386666, + "loss": 2.6603, + "step": 4026 + }, + { + "epoch": 0.3249939472197563, + "grad_norm": 0.6896052360534668, + "learning_rate": 0.00018139066504895744, + "loss": 2.6649, + "step": 4027 + }, + { + "epoch": 0.3250746509563393, + "grad_norm": 0.802855372428894, + "learning_rate": 0.00018138149191558012, + "loss": 2.7067, + "step": 4028 + }, + { + "epoch": 0.3251553546929223, + "grad_norm": 0.7555437088012695, + "learning_rate": 0.00018137231675396324, + "loss": 2.6471, + "step": 4029 + }, + { + "epoch": 0.32523605842950526, + "grad_norm": 0.6846967339515686, + "learning_rate": 0.00018136313956433552, + "loss": 2.6774, + "step": 4030 + }, + { + "epoch": 0.3253167621660883, + "grad_norm": 0.7435858249664307, + "learning_rate": 0.0001813539603469257, + "loss": 2.7135, + "step": 4031 + }, + { + "epoch": 0.32539746590267127, + "grad_norm": 0.7669098377227783, + "learning_rate": 0.00018134477910196253, + "loss": 2.7014, + "step": 4032 + }, + { + "epoch": 0.3254781696392543, + "grad_norm": 0.7797521352767944, + "learning_rate": 0.00018133559582967482, + "loss": 2.7229, + "step": 4033 + }, + { + "epoch": 0.3255588733758373, + "grad_norm": 0.7377886176109314, + "learning_rate": 0.00018132641053029142, + "loss": 2.7196, + "step": 4034 + }, + { + "epoch": 0.3256395771124203, + "grad_norm": 0.7387986779212952, + "learning_rate": 0.0001813172232040413, + "loss": 2.687, + "step": 4035 + }, + { + "epoch": 0.3257202808490033, + "grad_norm": 0.7276624441146851, + "learning_rate": 0.0001813080338511534, + "loss": 2.6954, + "step": 4036 + }, + { + "epoch": 0.3258009845855863, + "grad_norm": 0.7929670214653015, + "learning_rate": 0.00018129884247185683, + "loss": 2.7431, + "step": 4037 + }, + { + "epoch": 0.3258816883221693, + "grad_norm": 0.7896441221237183, + "learning_rate": 0.0001812896490663805, + "loss": 2.6823, + "step": 4038 + }, + { + "epoch": 0.3259623920587523, + "grad_norm": 0.8642957210540771, + "learning_rate": 0.00018128045363495368, + "loss": 2.7334, + "step": 4039 + }, + { + "epoch": 0.3260430957953353, + "grad_norm": 0.7156081795692444, + "learning_rate": 0.00018127125617780542, + "loss": 2.6886, + "step": 4040 + }, + { + "epoch": 0.32612379953191833, + "grad_norm": 0.8260853290557861, + "learning_rate": 0.00018126205669516507, + "loss": 2.6802, + "step": 4041 + }, + { + "epoch": 0.3262045032685013, + "grad_norm": 0.6853542327880859, + "learning_rate": 0.00018125285518726182, + "loss": 2.6392, + "step": 4042 + }, + { + "epoch": 0.32628520700508434, + "grad_norm": 0.7574017643928528, + "learning_rate": 0.00018124365165432505, + "loss": 2.7412, + "step": 4043 + }, + { + "epoch": 0.3263659107416673, + "grad_norm": 0.8656191825866699, + "learning_rate": 0.00018123444609658408, + "loss": 2.6903, + "step": 4044 + }, + { + "epoch": 0.32644661447825035, + "grad_norm": 0.7443257570266724, + "learning_rate": 0.00018122523851426837, + "loss": 2.682, + "step": 4045 + }, + { + "epoch": 0.32652731821483333, + "grad_norm": 0.7222229242324829, + "learning_rate": 0.0001812160289076074, + "loss": 2.6196, + "step": 4046 + }, + { + "epoch": 0.32660802195141636, + "grad_norm": 0.8531985878944397, + "learning_rate": 0.00018120681727683066, + "loss": 2.6777, + "step": 4047 + }, + { + "epoch": 0.32668872568799934, + "grad_norm": 0.7380290627479553, + "learning_rate": 0.0001811976036221678, + "loss": 2.6847, + "step": 4048 + }, + { + "epoch": 0.32676942942458237, + "grad_norm": 0.7250707149505615, + "learning_rate": 0.00018118838794384837, + "loss": 2.6846, + "step": 4049 + }, + { + "epoch": 0.32685013316116535, + "grad_norm": 0.763504147529602, + "learning_rate": 0.00018117917024210208, + "loss": 2.69, + "step": 4050 + }, + { + "epoch": 0.3269308368977484, + "grad_norm": 0.7740737795829773, + "learning_rate": 0.00018116995051715867, + "loss": 2.6945, + "step": 4051 + }, + { + "epoch": 0.32701154063433135, + "grad_norm": 0.7777624726295471, + "learning_rate": 0.00018116072876924792, + "loss": 2.6918, + "step": 4052 + }, + { + "epoch": 0.3270922443709144, + "grad_norm": 0.7957910895347595, + "learning_rate": 0.0001811515049985997, + "loss": 2.7237, + "step": 4053 + }, + { + "epoch": 0.32717294810749736, + "grad_norm": 0.7828991413116455, + "learning_rate": 0.00018114227920544375, + "loss": 2.7008, + "step": 4054 + }, + { + "epoch": 0.3272536518440804, + "grad_norm": 0.6695161461830139, + "learning_rate": 0.00018113305139001016, + "loss": 2.7311, + "step": 4055 + }, + { + "epoch": 0.32733435558066337, + "grad_norm": 0.7693436145782471, + "learning_rate": 0.00018112382155252883, + "loss": 2.7102, + "step": 4056 + }, + { + "epoch": 0.3274150593172464, + "grad_norm": 0.7520042657852173, + "learning_rate": 0.0001811145896932298, + "loss": 2.6455, + "step": 4057 + }, + { + "epoch": 0.3274957630538294, + "grad_norm": 0.786834716796875, + "learning_rate": 0.00018110535581234317, + "loss": 2.6965, + "step": 4058 + }, + { + "epoch": 0.3275764667904124, + "grad_norm": 0.742001473903656, + "learning_rate": 0.00018109611991009905, + "loss": 2.7341, + "step": 4059 + }, + { + "epoch": 0.3276571705269954, + "grad_norm": 0.813522219657898, + "learning_rate": 0.00018108688198672766, + "loss": 2.8116, + "step": 4060 + }, + { + "epoch": 0.3277378742635784, + "grad_norm": 0.7611314058303833, + "learning_rate": 0.00018107764204245916, + "loss": 2.6741, + "step": 4061 + }, + { + "epoch": 0.3278185780001614, + "grad_norm": 0.7285993695259094, + "learning_rate": 0.00018106840007752392, + "loss": 2.671, + "step": 4062 + }, + { + "epoch": 0.32789928173674443, + "grad_norm": 0.773151695728302, + "learning_rate": 0.0001810591560921522, + "loss": 2.7106, + "step": 4063 + }, + { + "epoch": 0.3279799854733274, + "grad_norm": 0.7448920011520386, + "learning_rate": 0.00018104991008657445, + "loss": 2.7176, + "step": 4064 + }, + { + "epoch": 0.32806068920991044, + "grad_norm": 0.7088467478752136, + "learning_rate": 0.0001810406620610211, + "loss": 2.7085, + "step": 4065 + }, + { + "epoch": 0.3281413929464934, + "grad_norm": 0.7507789731025696, + "learning_rate": 0.00018103141201572255, + "loss": 2.7361, + "step": 4066 + }, + { + "epoch": 0.32822209668307645, + "grad_norm": 0.7065643072128296, + "learning_rate": 0.00018102215995090943, + "loss": 2.6573, + "step": 4067 + }, + { + "epoch": 0.3283028004196594, + "grad_norm": 0.6888713836669922, + "learning_rate": 0.0001810129058668123, + "loss": 2.6699, + "step": 4068 + }, + { + "epoch": 0.32838350415624246, + "grad_norm": 0.736347496509552, + "learning_rate": 0.00018100364976366174, + "loss": 2.7089, + "step": 4069 + }, + { + "epoch": 0.32846420789282543, + "grad_norm": 0.6854562759399414, + "learning_rate": 0.0001809943916416885, + "loss": 2.7051, + "step": 4070 + }, + { + "epoch": 0.32854491162940846, + "grad_norm": 0.7481048107147217, + "learning_rate": 0.0001809851315011233, + "loss": 2.7428, + "step": 4071 + }, + { + "epoch": 0.32862561536599144, + "grad_norm": 0.7600961923599243, + "learning_rate": 0.0001809758693421969, + "loss": 2.7153, + "step": 4072 + }, + { + "epoch": 0.3287063191025745, + "grad_norm": 0.7545063495635986, + "learning_rate": 0.00018096660516514024, + "loss": 2.6736, + "step": 4073 + }, + { + "epoch": 0.32878702283915745, + "grad_norm": 0.7967175841331482, + "learning_rate": 0.0001809573389701841, + "loss": 2.6711, + "step": 4074 + }, + { + "epoch": 0.3288677265757405, + "grad_norm": 0.7115446925163269, + "learning_rate": 0.00018094807075755943, + "loss": 2.6761, + "step": 4075 + }, + { + "epoch": 0.32894843031232346, + "grad_norm": 0.8230876326560974, + "learning_rate": 0.00018093880052749725, + "loss": 2.6749, + "step": 4076 + }, + { + "epoch": 0.3290291340489065, + "grad_norm": 0.8549706935882568, + "learning_rate": 0.00018092952828022856, + "loss": 2.7084, + "step": 4077 + }, + { + "epoch": 0.32910983778548947, + "grad_norm": 0.7379534244537354, + "learning_rate": 0.00018092025401598448, + "loss": 2.7241, + "step": 4078 + }, + { + "epoch": 0.3291905415220725, + "grad_norm": 0.7659998536109924, + "learning_rate": 0.00018091097773499616, + "loss": 2.7108, + "step": 4079 + }, + { + "epoch": 0.3292712452586555, + "grad_norm": 0.8074536323547363, + "learning_rate": 0.00018090169943749476, + "loss": 2.676, + "step": 4080 + }, + { + "epoch": 0.32935194899523845, + "grad_norm": 0.7588536143302917, + "learning_rate": 0.00018089241912371153, + "loss": 2.639, + "step": 4081 + }, + { + "epoch": 0.3294326527318215, + "grad_norm": 0.7510811686515808, + "learning_rate": 0.00018088313679387775, + "loss": 2.6722, + "step": 4082 + }, + { + "epoch": 0.32951335646840446, + "grad_norm": 0.7538900971412659, + "learning_rate": 0.0001808738524482248, + "loss": 2.6917, + "step": 4083 + }, + { + "epoch": 0.3295940602049875, + "grad_norm": 0.8071155548095703, + "learning_rate": 0.00018086456608698402, + "loss": 2.6964, + "step": 4084 + }, + { + "epoch": 0.32967476394157047, + "grad_norm": 0.7778098583221436, + "learning_rate": 0.00018085527771038686, + "loss": 2.7301, + "step": 4085 + }, + { + "epoch": 0.3297554676781535, + "grad_norm": 0.7717564702033997, + "learning_rate": 0.00018084598731866485, + "loss": 2.7484, + "step": 4086 + }, + { + "epoch": 0.3298361714147365, + "grad_norm": 0.7361736297607422, + "learning_rate": 0.00018083669491204948, + "loss": 2.6299, + "step": 4087 + }, + { + "epoch": 0.3299168751513195, + "grad_norm": 0.736681342124939, + "learning_rate": 0.00018082740049077238, + "loss": 2.7521, + "step": 4088 + }, + { + "epoch": 0.3299975788879025, + "grad_norm": 0.8011857867240906, + "learning_rate": 0.00018081810405506517, + "loss": 2.724, + "step": 4089 + }, + { + "epoch": 0.3300782826244855, + "grad_norm": 0.7741932272911072, + "learning_rate": 0.00018080880560515956, + "loss": 2.6766, + "step": 4090 + }, + { + "epoch": 0.3301589863610685, + "grad_norm": 0.7321778535842896, + "learning_rate": 0.00018079950514128724, + "loss": 2.6614, + "step": 4091 + }, + { + "epoch": 0.33023969009765153, + "grad_norm": 0.7916514277458191, + "learning_rate": 0.00018079020266368006, + "loss": 2.7177, + "step": 4092 + }, + { + "epoch": 0.3303203938342345, + "grad_norm": 0.7961388826370239, + "learning_rate": 0.00018078089817256986, + "loss": 2.6671, + "step": 4093 + }, + { + "epoch": 0.33040109757081754, + "grad_norm": 0.7167038321495056, + "learning_rate": 0.0001807715916681885, + "loss": 2.6989, + "step": 4094 + }, + { + "epoch": 0.3304818013074005, + "grad_norm": 0.6924864649772644, + "learning_rate": 0.00018076228315076794, + "loss": 2.6484, + "step": 4095 + }, + { + "epoch": 0.33056250504398355, + "grad_norm": 0.777881383895874, + "learning_rate": 0.00018075297262054013, + "loss": 2.6498, + "step": 4096 + }, + { + "epoch": 0.3306432087805665, + "grad_norm": 0.7878376841545105, + "learning_rate": 0.0001807436600777372, + "loss": 2.7745, + "step": 4097 + }, + { + "epoch": 0.33072391251714955, + "grad_norm": 0.8418465256690979, + "learning_rate": 0.0001807343455225912, + "loss": 2.7195, + "step": 4098 + }, + { + "epoch": 0.33080461625373253, + "grad_norm": 0.7780830264091492, + "learning_rate": 0.00018072502895533424, + "loss": 2.6652, + "step": 4099 + }, + { + "epoch": 0.33088531999031556, + "grad_norm": 0.7102445960044861, + "learning_rate": 0.00018071571037619853, + "loss": 2.6618, + "step": 4100 + }, + { + "epoch": 0.33096602372689854, + "grad_norm": 0.7028098106384277, + "learning_rate": 0.00018070638978541633, + "loss": 2.7114, + "step": 4101 + }, + { + "epoch": 0.33104672746348157, + "grad_norm": 0.7529525756835938, + "learning_rate": 0.00018069706718321996, + "loss": 2.7231, + "step": 4102 + }, + { + "epoch": 0.33112743120006455, + "grad_norm": 0.7404564023017883, + "learning_rate": 0.0001806877425698417, + "loss": 2.6564, + "step": 4103 + }, + { + "epoch": 0.3312081349366476, + "grad_norm": 0.7725130319595337, + "learning_rate": 0.00018067841594551401, + "loss": 2.677, + "step": 4104 + }, + { + "epoch": 0.33128883867323056, + "grad_norm": 0.7616425156593323, + "learning_rate": 0.00018066908731046927, + "loss": 2.6586, + "step": 4105 + }, + { + "epoch": 0.3313695424098136, + "grad_norm": 0.7318183779716492, + "learning_rate": 0.00018065975666494002, + "loss": 2.6624, + "step": 4106 + }, + { + "epoch": 0.33145024614639657, + "grad_norm": 0.7012802958488464, + "learning_rate": 0.00018065042400915878, + "loss": 2.6663, + "step": 4107 + }, + { + "epoch": 0.3315309498829796, + "grad_norm": 0.815226674079895, + "learning_rate": 0.00018064108934335814, + "loss": 2.7248, + "step": 4108 + }, + { + "epoch": 0.3316116536195626, + "grad_norm": 0.68972247838974, + "learning_rate": 0.00018063175266777077, + "loss": 2.6961, + "step": 4109 + }, + { + "epoch": 0.3316923573561456, + "grad_norm": 0.7563794255256653, + "learning_rate": 0.00018062241398262937, + "loss": 2.6526, + "step": 4110 + }, + { + "epoch": 0.3317730610927286, + "grad_norm": 0.7878836989402771, + "learning_rate": 0.00018061307328816662, + "loss": 2.7316, + "step": 4111 + }, + { + "epoch": 0.3318537648293116, + "grad_norm": 0.7189129590988159, + "learning_rate": 0.00018060373058461537, + "loss": 2.6577, + "step": 4112 + }, + { + "epoch": 0.3319344685658946, + "grad_norm": 0.7517561912536621, + "learning_rate": 0.00018059438587220847, + "loss": 2.668, + "step": 4113 + }, + { + "epoch": 0.3320151723024776, + "grad_norm": 0.7602595686912537, + "learning_rate": 0.00018058503915117878, + "loss": 2.6741, + "step": 4114 + }, + { + "epoch": 0.3320958760390606, + "grad_norm": 0.7702187299728394, + "learning_rate": 0.00018057569042175927, + "loss": 2.7082, + "step": 4115 + }, + { + "epoch": 0.33217657977564363, + "grad_norm": 0.7289660573005676, + "learning_rate": 0.00018056633968418294, + "loss": 2.6728, + "step": 4116 + }, + { + "epoch": 0.3322572835122266, + "grad_norm": 0.6936683654785156, + "learning_rate": 0.0001805569869386828, + "loss": 2.6735, + "step": 4117 + }, + { + "epoch": 0.33233798724880964, + "grad_norm": 0.7128138542175293, + "learning_rate": 0.000180547632185492, + "loss": 2.646, + "step": 4118 + }, + { + "epoch": 0.3324186909853926, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00018053827542484363, + "loss": 2.6497, + "step": 4119 + }, + { + "epoch": 0.33249939472197565, + "grad_norm": 0.7084202170372009, + "learning_rate": 0.0001805289166569709, + "loss": 2.6328, + "step": 4120 + }, + { + "epoch": 0.3325800984585586, + "grad_norm": 0.8068051934242249, + "learning_rate": 0.00018051955588210708, + "loss": 2.6576, + "step": 4121 + }, + { + "epoch": 0.33266080219514166, + "grad_norm": 0.787680447101593, + "learning_rate": 0.00018051019310048544, + "loss": 2.7091, + "step": 4122 + }, + { + "epoch": 0.33274150593172463, + "grad_norm": 0.698946475982666, + "learning_rate": 0.00018050082831233931, + "loss": 2.6657, + "step": 4123 + }, + { + "epoch": 0.33282220966830767, + "grad_norm": 0.7946122288703918, + "learning_rate": 0.00018049146151790215, + "loss": 2.6981, + "step": 4124 + }, + { + "epoch": 0.33290291340489064, + "grad_norm": 0.8025123476982117, + "learning_rate": 0.00018048209271740736, + "loss": 2.6878, + "step": 4125 + }, + { + "epoch": 0.3329836171414737, + "grad_norm": 0.7493376135826111, + "learning_rate": 0.0001804727219110884, + "loss": 2.6556, + "step": 4126 + }, + { + "epoch": 0.33306432087805665, + "grad_norm": 0.7143186926841736, + "learning_rate": 0.00018046334909917886, + "loss": 2.6879, + "step": 4127 + }, + { + "epoch": 0.3331450246146397, + "grad_norm": 0.7375641465187073, + "learning_rate": 0.00018045397428191235, + "loss": 2.6817, + "step": 4128 + }, + { + "epoch": 0.33322572835122266, + "grad_norm": 0.7201291918754578, + "learning_rate": 0.00018044459745952248, + "loss": 2.6765, + "step": 4129 + }, + { + "epoch": 0.3333064320878057, + "grad_norm": 0.7924519777297974, + "learning_rate": 0.00018043521863224296, + "loss": 2.7748, + "step": 4130 + }, + { + "epoch": 0.33338713582438867, + "grad_norm": 0.7773354053497314, + "learning_rate": 0.00018042583780030752, + "loss": 2.6839, + "step": 4131 + }, + { + "epoch": 0.33346783956097165, + "grad_norm": 0.7527397274971008, + "learning_rate": 0.00018041645496394998, + "loss": 2.6749, + "step": 4132 + }, + { + "epoch": 0.3335485432975547, + "grad_norm": 0.7329208254814148, + "learning_rate": 0.00018040707012340418, + "loss": 2.7535, + "step": 4133 + }, + { + "epoch": 0.33362924703413765, + "grad_norm": 0.7637773752212524, + "learning_rate": 0.00018039768327890397, + "loss": 2.632, + "step": 4134 + }, + { + "epoch": 0.3337099507707207, + "grad_norm": 0.823623776435852, + "learning_rate": 0.00018038829443068333, + "loss": 2.7122, + "step": 4135 + }, + { + "epoch": 0.33379065450730366, + "grad_norm": 0.8040826916694641, + "learning_rate": 0.00018037890357897632, + "loss": 2.7197, + "step": 4136 + }, + { + "epoch": 0.3338713582438867, + "grad_norm": 0.7483998537063599, + "learning_rate": 0.00018036951072401686, + "loss": 2.6535, + "step": 4137 + }, + { + "epoch": 0.33395206198046967, + "grad_norm": 0.8141106367111206, + "learning_rate": 0.00018036011586603914, + "loss": 2.7127, + "step": 4138 + }, + { + "epoch": 0.3340327657170527, + "grad_norm": 0.7226041555404663, + "learning_rate": 0.00018035071900527724, + "loss": 2.6846, + "step": 4139 + }, + { + "epoch": 0.3341134694536357, + "grad_norm": 0.7624794840812683, + "learning_rate": 0.00018034132014196541, + "loss": 2.6725, + "step": 4140 + }, + { + "epoch": 0.3341941731902187, + "grad_norm": 0.7299962043762207, + "learning_rate": 0.00018033191927633785, + "loss": 2.6728, + "step": 4141 + }, + { + "epoch": 0.3342748769268017, + "grad_norm": 0.7920462489128113, + "learning_rate": 0.0001803225164086289, + "loss": 2.6544, + "step": 4142 + }, + { + "epoch": 0.3343555806633847, + "grad_norm": 0.7469778656959534, + "learning_rate": 0.00018031311153907282, + "loss": 2.7356, + "step": 4143 + }, + { + "epoch": 0.3344362843999677, + "grad_norm": 0.8831696510314941, + "learning_rate": 0.0001803037046679041, + "loss": 2.6584, + "step": 4144 + }, + { + "epoch": 0.33451698813655073, + "grad_norm": 0.8047679662704468, + "learning_rate": 0.00018029429579535715, + "loss": 2.6213, + "step": 4145 + }, + { + "epoch": 0.3345976918731337, + "grad_norm": 0.7109517455101013, + "learning_rate": 0.00018028488492166645, + "loss": 2.6622, + "step": 4146 + }, + { + "epoch": 0.33467839560971674, + "grad_norm": 0.7240141034126282, + "learning_rate": 0.0001802754720470665, + "loss": 2.6794, + "step": 4147 + }, + { + "epoch": 0.3347590993462997, + "grad_norm": 0.7292990684509277, + "learning_rate": 0.000180266057171792, + "loss": 2.6079, + "step": 4148 + }, + { + "epoch": 0.33483980308288275, + "grad_norm": 0.8055328130722046, + "learning_rate": 0.00018025664029607756, + "loss": 2.7044, + "step": 4149 + }, + { + "epoch": 0.3349205068194657, + "grad_norm": 0.8348979949951172, + "learning_rate": 0.00018024722142015781, + "loss": 2.6757, + "step": 4150 + }, + { + "epoch": 0.33500121055604876, + "grad_norm": 0.7797044515609741, + "learning_rate": 0.00018023780054426754, + "loss": 2.7125, + "step": 4151 + }, + { + "epoch": 0.33508191429263173, + "grad_norm": 0.802442729473114, + "learning_rate": 0.00018022837766864153, + "loss": 2.7121, + "step": 4152 + }, + { + "epoch": 0.33516261802921476, + "grad_norm": 0.7248829007148743, + "learning_rate": 0.00018021895279351463, + "loss": 2.7344, + "step": 4153 + }, + { + "epoch": 0.33524332176579774, + "grad_norm": 0.7458582520484924, + "learning_rate": 0.00018020952591912175, + "loss": 2.665, + "step": 4154 + }, + { + "epoch": 0.3353240255023808, + "grad_norm": 0.8153703808784485, + "learning_rate": 0.0001802000970456978, + "loss": 2.7416, + "step": 4155 + }, + { + "epoch": 0.33540472923896375, + "grad_norm": 0.7583708763122559, + "learning_rate": 0.00018019066617347779, + "loss": 2.7002, + "step": 4156 + }, + { + "epoch": 0.3354854329755468, + "grad_norm": 0.7522469162940979, + "learning_rate": 0.00018018123330269678, + "loss": 2.7196, + "step": 4157 + }, + { + "epoch": 0.33556613671212976, + "grad_norm": 0.7386923432350159, + "learning_rate": 0.00018017179843358983, + "loss": 2.6947, + "step": 4158 + }, + { + "epoch": 0.3356468404487128, + "grad_norm": 0.7366231083869934, + "learning_rate": 0.00018016236156639205, + "loss": 2.7377, + "step": 4159 + }, + { + "epoch": 0.33572754418529577, + "grad_norm": 0.7727232575416565, + "learning_rate": 0.00018015292270133872, + "loss": 2.7566, + "step": 4160 + }, + { + "epoch": 0.3358082479218788, + "grad_norm": 0.6781843304634094, + "learning_rate": 0.000180143481838665, + "loss": 2.6796, + "step": 4161 + }, + { + "epoch": 0.3358889516584618, + "grad_norm": 0.7036039233207703, + "learning_rate": 0.00018013403897860624, + "loss": 2.7012, + "step": 4162 + }, + { + "epoch": 0.3359696553950448, + "grad_norm": 0.8252625465393066, + "learning_rate": 0.00018012459412139776, + "loss": 2.6613, + "step": 4163 + }, + { + "epoch": 0.3360503591316278, + "grad_norm": 0.6924486756324768, + "learning_rate": 0.00018011514726727493, + "loss": 2.6425, + "step": 4164 + }, + { + "epoch": 0.3361310628682108, + "grad_norm": 0.7735962271690369, + "learning_rate": 0.0001801056984164732, + "loss": 2.7235, + "step": 4165 + }, + { + "epoch": 0.3362117666047938, + "grad_norm": 0.7439951300621033, + "learning_rate": 0.0001800962475692281, + "loss": 2.7428, + "step": 4166 + }, + { + "epoch": 0.3362924703413768, + "grad_norm": 0.6830539107322693, + "learning_rate": 0.0001800867947257751, + "loss": 2.5907, + "step": 4167 + }, + { + "epoch": 0.3363731740779598, + "grad_norm": 0.8355144262313843, + "learning_rate": 0.00018007733988634986, + "loss": 2.6978, + "step": 4168 + }, + { + "epoch": 0.33645387781454283, + "grad_norm": 0.6880978941917419, + "learning_rate": 0.00018006788305118798, + "loss": 2.6934, + "step": 4169 + }, + { + "epoch": 0.3365345815511258, + "grad_norm": 0.762709379196167, + "learning_rate": 0.0001800584242205251, + "loss": 2.684, + "step": 4170 + }, + { + "epoch": 0.33661528528770884, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001800489633945971, + "loss": 2.6857, + "step": 4171 + }, + { + "epoch": 0.3366959890242918, + "grad_norm": 0.787651777267456, + "learning_rate": 0.00018003950057363964, + "loss": 2.6979, + "step": 4172 + }, + { + "epoch": 0.33677669276087485, + "grad_norm": 0.7831481099128723, + "learning_rate": 0.00018003003575788856, + "loss": 2.7158, + "step": 4173 + }, + { + "epoch": 0.33685739649745783, + "grad_norm": 0.844904363155365, + "learning_rate": 0.00018002056894757986, + "loss": 2.6459, + "step": 4174 + }, + { + "epoch": 0.33693810023404086, + "grad_norm": 0.7529420852661133, + "learning_rate": 0.00018001110014294937, + "loss": 2.685, + "step": 4175 + }, + { + "epoch": 0.33701880397062384, + "grad_norm": 0.776719868183136, + "learning_rate": 0.0001800016293442331, + "loss": 2.6353, + "step": 4176 + }, + { + "epoch": 0.33709950770720687, + "grad_norm": 0.7988671660423279, + "learning_rate": 0.00017999215655166716, + "loss": 2.7241, + "step": 4177 + }, + { + "epoch": 0.33718021144378985, + "grad_norm": 0.7190617918968201, + "learning_rate": 0.00017998268176548752, + "loss": 2.7278, + "step": 4178 + }, + { + "epoch": 0.3372609151803729, + "grad_norm": 0.8337060809135437, + "learning_rate": 0.0001799732049859304, + "loss": 2.7059, + "step": 4179 + }, + { + "epoch": 0.33734161891695585, + "grad_norm": 0.7547435164451599, + "learning_rate": 0.0001799637262132319, + "loss": 2.7782, + "step": 4180 + }, + { + "epoch": 0.3374223226535389, + "grad_norm": 0.8067883253097534, + "learning_rate": 0.0001799542454476284, + "loss": 2.7978, + "step": 4181 + }, + { + "epoch": 0.33750302639012186, + "grad_norm": 0.7451581358909607, + "learning_rate": 0.00017994476268935609, + "loss": 2.6931, + "step": 4182 + }, + { + "epoch": 0.33758373012670484, + "grad_norm": 0.7521898746490479, + "learning_rate": 0.00017993527793865125, + "loss": 2.6939, + "step": 4183 + }, + { + "epoch": 0.33766443386328787, + "grad_norm": 0.7608996033668518, + "learning_rate": 0.0001799257911957504, + "loss": 2.715, + "step": 4184 + }, + { + "epoch": 0.33774513759987085, + "grad_norm": 0.7459948658943176, + "learning_rate": 0.00017991630246088987, + "loss": 2.6951, + "step": 4185 + }, + { + "epoch": 0.3378258413364539, + "grad_norm": 0.7549717426300049, + "learning_rate": 0.00017990681173430618, + "loss": 2.7353, + "step": 4186 + }, + { + "epoch": 0.33790654507303686, + "grad_norm": 0.7234344482421875, + "learning_rate": 0.0001798973190162359, + "loss": 2.6491, + "step": 4187 + }, + { + "epoch": 0.3379872488096199, + "grad_norm": 0.7652330994606018, + "learning_rate": 0.00017988782430691553, + "loss": 2.765, + "step": 4188 + }, + { + "epoch": 0.33806795254620287, + "grad_norm": 0.742953360080719, + "learning_rate": 0.00017987832760658177, + "loss": 2.7079, + "step": 4189 + }, + { + "epoch": 0.3381486562827859, + "grad_norm": 0.7440767288208008, + "learning_rate": 0.00017986882891547125, + "loss": 2.6751, + "step": 4190 + }, + { + "epoch": 0.3382293600193689, + "grad_norm": 0.7141925096511841, + "learning_rate": 0.00017985932823382078, + "loss": 2.6249, + "step": 4191 + }, + { + "epoch": 0.3383100637559519, + "grad_norm": 0.7200489044189453, + "learning_rate": 0.00017984982556186707, + "loss": 2.6811, + "step": 4192 + }, + { + "epoch": 0.3383907674925349, + "grad_norm": 0.7677409648895264, + "learning_rate": 0.00017984032089984696, + "loss": 2.6641, + "step": 4193 + }, + { + "epoch": 0.3384714712291179, + "grad_norm": 0.7386545538902283, + "learning_rate": 0.00017983081424799741, + "loss": 2.6504, + "step": 4194 + }, + { + "epoch": 0.3385521749657009, + "grad_norm": 0.7528583407402039, + "learning_rate": 0.00017982130560655526, + "loss": 2.6422, + "step": 4195 + }, + { + "epoch": 0.3386328787022839, + "grad_norm": 0.7339407801628113, + "learning_rate": 0.0001798117949757575, + "loss": 2.7047, + "step": 4196 + }, + { + "epoch": 0.3387135824388669, + "grad_norm": 0.7655882239341736, + "learning_rate": 0.00017980228235584117, + "loss": 2.7644, + "step": 4197 + }, + { + "epoch": 0.33879428617544993, + "grad_norm": 0.7602109909057617, + "learning_rate": 0.00017979276774704342, + "loss": 2.697, + "step": 4198 + }, + { + "epoch": 0.3388749899120329, + "grad_norm": 0.7188911437988281, + "learning_rate": 0.00017978325114960126, + "loss": 2.7147, + "step": 4199 + }, + { + "epoch": 0.33895569364861594, + "grad_norm": 0.7672597765922546, + "learning_rate": 0.00017977373256375194, + "loss": 2.6558, + "step": 4200 + }, + { + "epoch": 0.3390363973851989, + "grad_norm": 0.784187912940979, + "learning_rate": 0.0001797642119897327, + "loss": 2.7005, + "step": 4201 + }, + { + "epoch": 0.33911710112178195, + "grad_norm": 0.7359703779220581, + "learning_rate": 0.00017975468942778075, + "loss": 2.6578, + "step": 4202 + }, + { + "epoch": 0.3391978048583649, + "grad_norm": 0.7776080965995789, + "learning_rate": 0.00017974516487813345, + "loss": 2.6747, + "step": 4203 + }, + { + "epoch": 0.33927850859494796, + "grad_norm": 0.6934135556221008, + "learning_rate": 0.00017973563834102824, + "loss": 2.6335, + "step": 4204 + }, + { + "epoch": 0.33935921233153094, + "grad_norm": 0.7715818881988525, + "learning_rate": 0.00017972610981670245, + "loss": 2.6062, + "step": 4205 + }, + { + "epoch": 0.33943991606811397, + "grad_norm": 0.7466367483139038, + "learning_rate": 0.0001797165793053936, + "loss": 2.7243, + "step": 4206 + }, + { + "epoch": 0.33952061980469694, + "grad_norm": 0.7485085129737854, + "learning_rate": 0.00017970704680733926, + "loss": 2.6603, + "step": 4207 + }, + { + "epoch": 0.33960132354128, + "grad_norm": 0.7365782856941223, + "learning_rate": 0.0001796975123227769, + "loss": 2.7179, + "step": 4208 + }, + { + "epoch": 0.33968202727786295, + "grad_norm": 0.8405506014823914, + "learning_rate": 0.00017968797585194422, + "loss": 2.7413, + "step": 4209 + }, + { + "epoch": 0.339762731014446, + "grad_norm": 0.8227888941764832, + "learning_rate": 0.00017967843739507888, + "loss": 2.6814, + "step": 4210 + }, + { + "epoch": 0.33984343475102896, + "grad_norm": 0.8247283697128296, + "learning_rate": 0.0001796688969524186, + "loss": 2.6802, + "step": 4211 + }, + { + "epoch": 0.339924138487612, + "grad_norm": 0.7639476656913757, + "learning_rate": 0.00017965935452420116, + "loss": 2.7422, + "step": 4212 + }, + { + "epoch": 0.34000484222419497, + "grad_norm": 0.7846776247024536, + "learning_rate": 0.00017964981011066436, + "loss": 2.7443, + "step": 4213 + }, + { + "epoch": 0.340085545960778, + "grad_norm": 0.7593334913253784, + "learning_rate": 0.00017964026371204608, + "loss": 2.7179, + "step": 4214 + }, + { + "epoch": 0.340166249697361, + "grad_norm": 0.7878177165985107, + "learning_rate": 0.00017963071532858425, + "loss": 2.7118, + "step": 4215 + }, + { + "epoch": 0.340246953433944, + "grad_norm": 0.7728220224380493, + "learning_rate": 0.00017962116496051685, + "loss": 2.6646, + "step": 4216 + }, + { + "epoch": 0.340327657170527, + "grad_norm": 0.8419308066368103, + "learning_rate": 0.00017961161260808187, + "loss": 2.7829, + "step": 4217 + }, + { + "epoch": 0.34040836090711, + "grad_norm": 0.7066153883934021, + "learning_rate": 0.0001796020582715174, + "loss": 2.6498, + "step": 4218 + }, + { + "epoch": 0.340489064643693, + "grad_norm": 0.7976264953613281, + "learning_rate": 0.00017959250195106156, + "loss": 2.7496, + "step": 4219 + }, + { + "epoch": 0.34056976838027603, + "grad_norm": 0.736595630645752, + "learning_rate": 0.0001795829436469525, + "loss": 2.6497, + "step": 4220 + }, + { + "epoch": 0.340650472116859, + "grad_norm": 0.818550705909729, + "learning_rate": 0.0001795733833594285, + "loss": 2.6793, + "step": 4221 + }, + { + "epoch": 0.34073117585344204, + "grad_norm": 0.7712778449058533, + "learning_rate": 0.00017956382108872773, + "loss": 2.6215, + "step": 4222 + }, + { + "epoch": 0.340811879590025, + "grad_norm": 0.746306300163269, + "learning_rate": 0.00017955425683508858, + "loss": 2.7372, + "step": 4223 + }, + { + "epoch": 0.34089258332660805, + "grad_norm": 0.7269306778907776, + "learning_rate": 0.00017954469059874937, + "loss": 2.6438, + "step": 4224 + }, + { + "epoch": 0.340973287063191, + "grad_norm": 0.7426211833953857, + "learning_rate": 0.00017953512237994855, + "loss": 2.6539, + "step": 4225 + }, + { + "epoch": 0.34105399079977405, + "grad_norm": 0.7269948124885559, + "learning_rate": 0.0001795255521789246, + "loss": 2.6833, + "step": 4226 + }, + { + "epoch": 0.34113469453635703, + "grad_norm": 0.7279343605041504, + "learning_rate": 0.00017951597999591598, + "loss": 2.7011, + "step": 4227 + }, + { + "epoch": 0.34121539827294006, + "grad_norm": 0.7554663419723511, + "learning_rate": 0.0001795064058311613, + "loss": 2.7036, + "step": 4228 + }, + { + "epoch": 0.34129610200952304, + "grad_norm": 0.7516502141952515, + "learning_rate": 0.00017949682968489912, + "loss": 2.6699, + "step": 4229 + }, + { + "epoch": 0.34137680574610607, + "grad_norm": 0.7931745052337646, + "learning_rate": 0.00017948725155736818, + "loss": 2.6655, + "step": 4230 + }, + { + "epoch": 0.34145750948268905, + "grad_norm": 0.6981344223022461, + "learning_rate": 0.0001794776714488071, + "loss": 2.6987, + "step": 4231 + }, + { + "epoch": 0.3415382132192721, + "grad_norm": 0.7513911724090576, + "learning_rate": 0.00017946808935945474, + "loss": 2.6985, + "step": 4232 + }, + { + "epoch": 0.34161891695585506, + "grad_norm": 0.7373185753822327, + "learning_rate": 0.00017945850528954983, + "loss": 2.7269, + "step": 4233 + }, + { + "epoch": 0.34169962069243803, + "grad_norm": 0.6990259289741516, + "learning_rate": 0.0001794489192393313, + "loss": 2.6763, + "step": 4234 + }, + { + "epoch": 0.34178032442902107, + "grad_norm": 0.7661817669868469, + "learning_rate": 0.00017943933120903797, + "loss": 2.7057, + "step": 4235 + }, + { + "epoch": 0.34186102816560404, + "grad_norm": 0.7570027112960815, + "learning_rate": 0.0001794297411989089, + "loss": 2.7358, + "step": 4236 + }, + { + "epoch": 0.3419417319021871, + "grad_norm": 0.7751824855804443, + "learning_rate": 0.000179420149209183, + "loss": 2.6771, + "step": 4237 + }, + { + "epoch": 0.34202243563877005, + "grad_norm": 0.8028360605239868, + "learning_rate": 0.0001794105552400994, + "loss": 2.6399, + "step": 4238 + }, + { + "epoch": 0.3421031393753531, + "grad_norm": 0.7398171424865723, + "learning_rate": 0.00017940095929189716, + "loss": 2.6532, + "step": 4239 + }, + { + "epoch": 0.34218384311193606, + "grad_norm": 0.8300225138664246, + "learning_rate": 0.0001793913613648155, + "loss": 2.6798, + "step": 4240 + }, + { + "epoch": 0.3422645468485191, + "grad_norm": 0.7501145005226135, + "learning_rate": 0.00017938176145909356, + "loss": 2.7132, + "step": 4241 + }, + { + "epoch": 0.34234525058510207, + "grad_norm": 0.7178483605384827, + "learning_rate": 0.00017937215957497063, + "loss": 2.7172, + "step": 4242 + }, + { + "epoch": 0.3424259543216851, + "grad_norm": 0.7207306027412415, + "learning_rate": 0.00017936255571268599, + "loss": 2.629, + "step": 4243 + }, + { + "epoch": 0.3425066580582681, + "grad_norm": 0.7339839935302734, + "learning_rate": 0.00017935294987247899, + "loss": 2.6262, + "step": 4244 + }, + { + "epoch": 0.3425873617948511, + "grad_norm": 0.6977292895317078, + "learning_rate": 0.00017934334205458907, + "loss": 2.6949, + "step": 4245 + }, + { + "epoch": 0.3426680655314341, + "grad_norm": 0.7368096113204956, + "learning_rate": 0.00017933373225925564, + "loss": 2.681, + "step": 4246 + }, + { + "epoch": 0.3427487692680171, + "grad_norm": 0.7234459519386292, + "learning_rate": 0.00017932412048671825, + "loss": 2.6891, + "step": 4247 + }, + { + "epoch": 0.3428294730046001, + "grad_norm": 0.7659995555877686, + "learning_rate": 0.00017931450673721642, + "loss": 2.7394, + "step": 4248 + }, + { + "epoch": 0.3429101767411831, + "grad_norm": 0.7799893617630005, + "learning_rate": 0.00017930489101098974, + "loss": 2.7707, + "step": 4249 + }, + { + "epoch": 0.3429908804777661, + "grad_norm": 0.7063946723937988, + "learning_rate": 0.00017929527330827786, + "loss": 2.6573, + "step": 4250 + }, + { + "epoch": 0.34307158421434913, + "grad_norm": 0.7090561389923096, + "learning_rate": 0.0001792856536293205, + "loss": 2.7095, + "step": 4251 + }, + { + "epoch": 0.3431522879509321, + "grad_norm": 0.8020029067993164, + "learning_rate": 0.0001792760319743574, + "loss": 2.6905, + "step": 4252 + }, + { + "epoch": 0.34323299168751514, + "grad_norm": 0.7221484780311584, + "learning_rate": 0.00017926640834362836, + "loss": 2.6853, + "step": 4253 + }, + { + "epoch": 0.3433136954240981, + "grad_norm": 0.7102623581886292, + "learning_rate": 0.00017925678273737324, + "loss": 2.6821, + "step": 4254 + }, + { + "epoch": 0.34339439916068115, + "grad_norm": 0.7702807784080505, + "learning_rate": 0.00017924715515583187, + "loss": 2.6986, + "step": 4255 + }, + { + "epoch": 0.34347510289726413, + "grad_norm": 0.7938152551651001, + "learning_rate": 0.00017923752559924425, + "loss": 2.7162, + "step": 4256 + }, + { + "epoch": 0.34355580663384716, + "grad_norm": 0.7340937852859497, + "learning_rate": 0.00017922789406785036, + "loss": 2.6904, + "step": 4257 + }, + { + "epoch": 0.34363651037043014, + "grad_norm": 0.7010839581489563, + "learning_rate": 0.00017921826056189026, + "loss": 2.6969, + "step": 4258 + }, + { + "epoch": 0.34371721410701317, + "grad_norm": 0.758178174495697, + "learning_rate": 0.00017920862508160403, + "loss": 2.6391, + "step": 4259 + }, + { + "epoch": 0.34379791784359615, + "grad_norm": 0.7861726880073547, + "learning_rate": 0.0001791989876272318, + "loss": 2.7088, + "step": 4260 + }, + { + "epoch": 0.3438786215801792, + "grad_norm": 0.6764364242553711, + "learning_rate": 0.00017918934819901377, + "loss": 2.6221, + "step": 4261 + }, + { + "epoch": 0.34395932531676215, + "grad_norm": 0.76728355884552, + "learning_rate": 0.00017917970679719018, + "loss": 2.6854, + "step": 4262 + }, + { + "epoch": 0.3440400290533452, + "grad_norm": 0.7161166071891785, + "learning_rate": 0.00017917006342200133, + "loss": 2.7048, + "step": 4263 + }, + { + "epoch": 0.34412073278992816, + "grad_norm": 0.7182073593139648, + "learning_rate": 0.00017916041807368753, + "loss": 2.7559, + "step": 4264 + }, + { + "epoch": 0.3442014365265112, + "grad_norm": 0.832258403301239, + "learning_rate": 0.0001791507707524892, + "loss": 2.6743, + "step": 4265 + }, + { + "epoch": 0.34428214026309417, + "grad_norm": 0.7048495411872864, + "learning_rate": 0.00017914112145864675, + "loss": 2.693, + "step": 4266 + }, + { + "epoch": 0.3443628439996772, + "grad_norm": 0.7475518584251404, + "learning_rate": 0.00017913147019240068, + "loss": 2.6881, + "step": 4267 + }, + { + "epoch": 0.3444435477362602, + "grad_norm": 0.72830730676651, + "learning_rate": 0.00017912181695399154, + "loss": 2.659, + "step": 4268 + }, + { + "epoch": 0.3445242514728432, + "grad_norm": 0.7183662056922913, + "learning_rate": 0.00017911216174365988, + "loss": 2.6611, + "step": 4269 + }, + { + "epoch": 0.3446049552094262, + "grad_norm": 0.7487103343009949, + "learning_rate": 0.0001791025045616463, + "loss": 2.6518, + "step": 4270 + }, + { + "epoch": 0.3446856589460092, + "grad_norm": 0.7733812928199768, + "learning_rate": 0.0001790928454081916, + "loss": 2.6359, + "step": 4271 + }, + { + "epoch": 0.3447663626825922, + "grad_norm": 0.7774991393089294, + "learning_rate": 0.00017908318428353642, + "loss": 2.6654, + "step": 4272 + }, + { + "epoch": 0.34484706641917523, + "grad_norm": 0.6882895827293396, + "learning_rate": 0.00017907352118792157, + "loss": 2.686, + "step": 4273 + }, + { + "epoch": 0.3449277701557582, + "grad_norm": 0.7571535110473633, + "learning_rate": 0.00017906385612158785, + "loss": 2.7108, + "step": 4274 + }, + { + "epoch": 0.34500847389234124, + "grad_norm": 0.7324517369270325, + "learning_rate": 0.00017905418908477615, + "loss": 2.6663, + "step": 4275 + }, + { + "epoch": 0.3450891776289242, + "grad_norm": 0.7476221919059753, + "learning_rate": 0.00017904452007772744, + "loss": 2.7202, + "step": 4276 + }, + { + "epoch": 0.34516988136550725, + "grad_norm": 0.7648386359214783, + "learning_rate": 0.00017903484910068268, + "loss": 2.6759, + "step": 4277 + }, + { + "epoch": 0.3452505851020902, + "grad_norm": 0.7375434637069702, + "learning_rate": 0.00017902517615388282, + "loss": 2.6603, + "step": 4278 + }, + { + "epoch": 0.34533128883867326, + "grad_norm": 0.7248519062995911, + "learning_rate": 0.00017901550123756906, + "loss": 2.7147, + "step": 4279 + }, + { + "epoch": 0.34541199257525623, + "grad_norm": 0.7264916896820068, + "learning_rate": 0.0001790058243519824, + "loss": 2.6992, + "step": 4280 + }, + { + "epoch": 0.34549269631183926, + "grad_norm": 0.8370026350021362, + "learning_rate": 0.0001789961454973641, + "loss": 2.7114, + "step": 4281 + }, + { + "epoch": 0.34557340004842224, + "grad_norm": 0.72071373462677, + "learning_rate": 0.00017898646467395538, + "loss": 2.6957, + "step": 4282 + }, + { + "epoch": 0.3456541037850053, + "grad_norm": 0.7355397343635559, + "learning_rate": 0.0001789767818819975, + "loss": 2.6744, + "step": 4283 + }, + { + "epoch": 0.34573480752158825, + "grad_norm": 0.734756588935852, + "learning_rate": 0.00017896709712173173, + "loss": 2.726, + "step": 4284 + }, + { + "epoch": 0.3458155112581712, + "grad_norm": 0.7890543341636658, + "learning_rate": 0.00017895741039339945, + "loss": 2.6726, + "step": 4285 + }, + { + "epoch": 0.34589621499475426, + "grad_norm": 0.7768735885620117, + "learning_rate": 0.00017894772169724216, + "loss": 2.7617, + "step": 4286 + }, + { + "epoch": 0.34597691873133724, + "grad_norm": 0.7306547164916992, + "learning_rate": 0.00017893803103350125, + "loss": 2.6253, + "step": 4287 + }, + { + "epoch": 0.34605762246792027, + "grad_norm": 0.767066478729248, + "learning_rate": 0.00017892833840241828, + "loss": 2.6522, + "step": 4288 + }, + { + "epoch": 0.34613832620450324, + "grad_norm": 0.7018097639083862, + "learning_rate": 0.00017891864380423477, + "loss": 2.7111, + "step": 4289 + }, + { + "epoch": 0.3462190299410863, + "grad_norm": 0.7305615544319153, + "learning_rate": 0.00017890894723919236, + "loss": 2.6924, + "step": 4290 + }, + { + "epoch": 0.34629973367766925, + "grad_norm": 0.7588002681732178, + "learning_rate": 0.00017889924870753275, + "loss": 2.6952, + "step": 4291 + }, + { + "epoch": 0.3463804374142523, + "grad_norm": 0.7162861824035645, + "learning_rate": 0.0001788895482094976, + "loss": 2.6239, + "step": 4292 + }, + { + "epoch": 0.34646114115083526, + "grad_norm": 0.7494024634361267, + "learning_rate": 0.00017887984574532868, + "loss": 2.6763, + "step": 4293 + }, + { + "epoch": 0.3465418448874183, + "grad_norm": 0.7100037336349487, + "learning_rate": 0.0001788701413152678, + "loss": 2.6378, + "step": 4294 + }, + { + "epoch": 0.34662254862400127, + "grad_norm": 0.7316900491714478, + "learning_rate": 0.00017886043491955684, + "loss": 2.7001, + "step": 4295 + }, + { + "epoch": 0.3467032523605843, + "grad_norm": 0.8467028737068176, + "learning_rate": 0.00017885072655843772, + "loss": 2.7536, + "step": 4296 + }, + { + "epoch": 0.3467839560971673, + "grad_norm": 0.7248796820640564, + "learning_rate": 0.00017884101623215237, + "loss": 2.6956, + "step": 4297 + }, + { + "epoch": 0.3468646598337503, + "grad_norm": 0.7183107137680054, + "learning_rate": 0.0001788313039409428, + "loss": 2.743, + "step": 4298 + }, + { + "epoch": 0.3469453635703333, + "grad_norm": 0.6835163831710815, + "learning_rate": 0.00017882158968505105, + "loss": 2.7016, + "step": 4299 + }, + { + "epoch": 0.3470260673069163, + "grad_norm": 0.7973365783691406, + "learning_rate": 0.00017881187346471925, + "loss": 2.6927, + "step": 4300 + }, + { + "epoch": 0.3471067710434993, + "grad_norm": 0.700040876865387, + "learning_rate": 0.00017880215528018954, + "loss": 2.6961, + "step": 4301 + }, + { + "epoch": 0.34718747478008233, + "grad_norm": 0.8180583119392395, + "learning_rate": 0.00017879243513170415, + "loss": 2.642, + "step": 4302 + }, + { + "epoch": 0.3472681785166653, + "grad_norm": 0.7134599685668945, + "learning_rate": 0.0001787827130195053, + "loss": 2.6901, + "step": 4303 + }, + { + "epoch": 0.34734888225324834, + "grad_norm": 0.767998218536377, + "learning_rate": 0.0001787729889438353, + "loss": 2.6472, + "step": 4304 + }, + { + "epoch": 0.3474295859898313, + "grad_norm": 0.7260780930519104, + "learning_rate": 0.0001787632629049365, + "loss": 2.6791, + "step": 4305 + }, + { + "epoch": 0.34751028972641435, + "grad_norm": 0.6918236613273621, + "learning_rate": 0.00017875353490305132, + "loss": 2.6596, + "step": 4306 + }, + { + "epoch": 0.3475909934629973, + "grad_norm": 0.7734197974205017, + "learning_rate": 0.00017874380493842216, + "loss": 2.6402, + "step": 4307 + }, + { + "epoch": 0.34767169719958035, + "grad_norm": 0.7051037549972534, + "learning_rate": 0.00017873407301129154, + "loss": 2.7517, + "step": 4308 + }, + { + "epoch": 0.34775240093616333, + "grad_norm": 0.7026919722557068, + "learning_rate": 0.00017872433912190203, + "loss": 2.7058, + "step": 4309 + }, + { + "epoch": 0.34783310467274636, + "grad_norm": 0.7248546481132507, + "learning_rate": 0.00017871460327049618, + "loss": 2.666, + "step": 4310 + }, + { + "epoch": 0.34791380840932934, + "grad_norm": 0.7348842620849609, + "learning_rate": 0.0001787048654573167, + "loss": 2.7712, + "step": 4311 + }, + { + "epoch": 0.34799451214591237, + "grad_norm": 0.7923693656921387, + "learning_rate": 0.00017869512568260618, + "loss": 2.6469, + "step": 4312 + }, + { + "epoch": 0.34807521588249535, + "grad_norm": 0.7604066729545593, + "learning_rate": 0.00017868538394660743, + "loss": 2.7152, + "step": 4313 + }, + { + "epoch": 0.3481559196190784, + "grad_norm": 0.6811137795448303, + "learning_rate": 0.00017867564024956324, + "loss": 2.715, + "step": 4314 + }, + { + "epoch": 0.34823662335566136, + "grad_norm": 0.7292799353599548, + "learning_rate": 0.00017866589459171643, + "loss": 2.6374, + "step": 4315 + }, + { + "epoch": 0.3483173270922444, + "grad_norm": 0.6961250901222229, + "learning_rate": 0.0001786561469733099, + "loss": 2.6592, + "step": 4316 + }, + { + "epoch": 0.34839803082882737, + "grad_norm": 0.7447086572647095, + "learning_rate": 0.00017864639739458658, + "loss": 2.6965, + "step": 4317 + }, + { + "epoch": 0.3484787345654104, + "grad_norm": 0.7107378244400024, + "learning_rate": 0.00017863664585578942, + "loss": 2.7057, + "step": 4318 + }, + { + "epoch": 0.3485594383019934, + "grad_norm": 0.7372235655784607, + "learning_rate": 0.00017862689235716153, + "loss": 2.6289, + "step": 4319 + }, + { + "epoch": 0.3486401420385764, + "grad_norm": 0.7360481023788452, + "learning_rate": 0.00017861713689894593, + "loss": 2.7208, + "step": 4320 + }, + { + "epoch": 0.3487208457751594, + "grad_norm": 0.7378106713294983, + "learning_rate": 0.00017860737948138575, + "loss": 2.6836, + "step": 4321 + }, + { + "epoch": 0.3488015495117424, + "grad_norm": 0.7110548615455627, + "learning_rate": 0.00017859762010472423, + "loss": 2.6941, + "step": 4322 + }, + { + "epoch": 0.3488822532483254, + "grad_norm": 0.7419706583023071, + "learning_rate": 0.00017858785876920455, + "loss": 2.6591, + "step": 4323 + }, + { + "epoch": 0.3489629569849084, + "grad_norm": 0.7759542465209961, + "learning_rate": 0.00017857809547506997, + "loss": 2.6966, + "step": 4324 + }, + { + "epoch": 0.3490436607214914, + "grad_norm": 0.7894207239151001, + "learning_rate": 0.0001785683302225639, + "loss": 2.7298, + "step": 4325 + }, + { + "epoch": 0.34912436445807443, + "grad_norm": 0.7342399954795837, + "learning_rate": 0.0001785585630119296, + "loss": 2.6998, + "step": 4326 + }, + { + "epoch": 0.3492050681946574, + "grad_norm": 0.8684173822402954, + "learning_rate": 0.0001785487938434106, + "loss": 2.7179, + "step": 4327 + }, + { + "epoch": 0.34928577193124044, + "grad_norm": 0.7557523846626282, + "learning_rate": 0.00017853902271725033, + "loss": 2.7081, + "step": 4328 + }, + { + "epoch": 0.3493664756678234, + "grad_norm": 0.7910173535346985, + "learning_rate": 0.0001785292496336923, + "loss": 2.718, + "step": 4329 + }, + { + "epoch": 0.34944717940440645, + "grad_norm": 0.7878917455673218, + "learning_rate": 0.00017851947459298007, + "loss": 2.674, + "step": 4330 + }, + { + "epoch": 0.3495278831409894, + "grad_norm": 0.7290656566619873, + "learning_rate": 0.0001785096975953573, + "loss": 2.6962, + "step": 4331 + }, + { + "epoch": 0.34960858687757246, + "grad_norm": 0.8465737104415894, + "learning_rate": 0.00017849991864106763, + "loss": 2.6793, + "step": 4332 + }, + { + "epoch": 0.34968929061415543, + "grad_norm": 0.7183132171630859, + "learning_rate": 0.0001784901377303548, + "loss": 2.6902, + "step": 4333 + }, + { + "epoch": 0.34976999435073847, + "grad_norm": 0.7535461783409119, + "learning_rate": 0.00017848035486346255, + "loss": 2.7153, + "step": 4334 + }, + { + "epoch": 0.34985069808732144, + "grad_norm": 0.778734028339386, + "learning_rate": 0.0001784705700406347, + "loss": 2.6316, + "step": 4335 + }, + { + "epoch": 0.3499314018239044, + "grad_norm": 0.6937401294708252, + "learning_rate": 0.00017846078326211516, + "loss": 2.6902, + "step": 4336 + }, + { + "epoch": 0.35001210556048745, + "grad_norm": 0.7450751066207886, + "learning_rate": 0.00017845099452814774, + "loss": 2.6898, + "step": 4337 + }, + { + "epoch": 0.35009280929707043, + "grad_norm": 0.7535614967346191, + "learning_rate": 0.0001784412038389765, + "loss": 2.6969, + "step": 4338 + }, + { + "epoch": 0.35017351303365346, + "grad_norm": 0.6971385478973389, + "learning_rate": 0.00017843141119484543, + "loss": 2.6517, + "step": 4339 + }, + { + "epoch": 0.35025421677023644, + "grad_norm": 0.7233202457427979, + "learning_rate": 0.00017842161659599858, + "loss": 2.7332, + "step": 4340 + }, + { + "epoch": 0.35033492050681947, + "grad_norm": 0.7870340347290039, + "learning_rate": 0.00017841182004268, + "loss": 2.6485, + "step": 4341 + }, + { + "epoch": 0.35041562424340245, + "grad_norm": 0.7387053966522217, + "learning_rate": 0.0001784020215351339, + "loss": 2.6945, + "step": 4342 + }, + { + "epoch": 0.3504963279799855, + "grad_norm": 0.8357887268066406, + "learning_rate": 0.00017839222107360453, + "loss": 2.703, + "step": 4343 + }, + { + "epoch": 0.35057703171656845, + "grad_norm": 0.7197332978248596, + "learning_rate": 0.000178382418658336, + "loss": 2.6649, + "step": 4344 + }, + { + "epoch": 0.3506577354531515, + "grad_norm": 0.7416980862617493, + "learning_rate": 0.0001783726142895728, + "loss": 2.7393, + "step": 4345 + }, + { + "epoch": 0.35073843918973446, + "grad_norm": 0.6807832717895508, + "learning_rate": 0.00017836280796755912, + "loss": 2.6619, + "step": 4346 + }, + { + "epoch": 0.3508191429263175, + "grad_norm": 0.6858795285224915, + "learning_rate": 0.00017835299969253945, + "loss": 2.6266, + "step": 4347 + }, + { + "epoch": 0.35089984666290047, + "grad_norm": 0.8432363867759705, + "learning_rate": 0.0001783431894647582, + "loss": 2.6534, + "step": 4348 + }, + { + "epoch": 0.3509805503994835, + "grad_norm": 0.7240749001502991, + "learning_rate": 0.0001783333772844599, + "loss": 2.6851, + "step": 4349 + }, + { + "epoch": 0.3510612541360665, + "grad_norm": 0.7814531326293945, + "learning_rate": 0.00017832356315188906, + "loss": 2.7085, + "step": 4350 + }, + { + "epoch": 0.3511419578726495, + "grad_norm": 0.6989716291427612, + "learning_rate": 0.00017831374706729026, + "loss": 2.6674, + "step": 4351 + }, + { + "epoch": 0.3512226616092325, + "grad_norm": 0.7118446230888367, + "learning_rate": 0.0001783039290309082, + "loss": 2.6837, + "step": 4352 + }, + { + "epoch": 0.3513033653458155, + "grad_norm": 0.7641892433166504, + "learning_rate": 0.00017829410904298754, + "loss": 2.6415, + "step": 4353 + }, + { + "epoch": 0.3513840690823985, + "grad_norm": 0.6975794434547424, + "learning_rate": 0.000178284287103773, + "loss": 2.6679, + "step": 4354 + }, + { + "epoch": 0.35146477281898153, + "grad_norm": 0.7192546725273132, + "learning_rate": 0.00017827446321350943, + "loss": 2.6539, + "step": 4355 + }, + { + "epoch": 0.3515454765555645, + "grad_norm": 0.8749549388885498, + "learning_rate": 0.00017826463737244155, + "loss": 2.7254, + "step": 4356 + }, + { + "epoch": 0.35162618029214754, + "grad_norm": 0.8509732484817505, + "learning_rate": 0.0001782548095808144, + "loss": 2.7679, + "step": 4357 + }, + { + "epoch": 0.3517068840287305, + "grad_norm": 0.7647901773452759, + "learning_rate": 0.00017824497983887278, + "loss": 2.7049, + "step": 4358 + }, + { + "epoch": 0.35178758776531355, + "grad_norm": 0.7551973462104797, + "learning_rate": 0.00017823514814686178, + "loss": 2.7086, + "step": 4359 + }, + { + "epoch": 0.3518682915018965, + "grad_norm": 0.730140209197998, + "learning_rate": 0.00017822531450502633, + "loss": 2.6334, + "step": 4360 + }, + { + "epoch": 0.35194899523847956, + "grad_norm": 0.8210160136222839, + "learning_rate": 0.00017821547891361158, + "loss": 2.7248, + "step": 4361 + }, + { + "epoch": 0.35202969897506253, + "grad_norm": 0.761972963809967, + "learning_rate": 0.00017820564137286264, + "loss": 2.6502, + "step": 4362 + }, + { + "epoch": 0.35211040271164556, + "grad_norm": 0.7564061284065247, + "learning_rate": 0.00017819580188302466, + "loss": 2.6795, + "step": 4363 + }, + { + "epoch": 0.35219110644822854, + "grad_norm": 0.7382947206497192, + "learning_rate": 0.00017818596044434293, + "loss": 2.6754, + "step": 4364 + }, + { + "epoch": 0.3522718101848116, + "grad_norm": 0.737194836139679, + "learning_rate": 0.00017817611705706266, + "loss": 2.7098, + "step": 4365 + }, + { + "epoch": 0.35235251392139455, + "grad_norm": 0.7183281779289246, + "learning_rate": 0.0001781662717214292, + "loss": 2.6528, + "step": 4366 + }, + { + "epoch": 0.3524332176579776, + "grad_norm": 0.7785990238189697, + "learning_rate": 0.00017815642443768794, + "loss": 2.6419, + "step": 4367 + }, + { + "epoch": 0.35251392139456056, + "grad_norm": 0.7114452719688416, + "learning_rate": 0.00017814657520608427, + "loss": 2.7088, + "step": 4368 + }, + { + "epoch": 0.3525946251311436, + "grad_norm": 0.746969997882843, + "learning_rate": 0.00017813672402686365, + "loss": 2.7199, + "step": 4369 + }, + { + "epoch": 0.35267532886772657, + "grad_norm": 0.7700605988502502, + "learning_rate": 0.00017812687090027165, + "loss": 2.6713, + "step": 4370 + }, + { + "epoch": 0.3527560326043096, + "grad_norm": 0.7733504772186279, + "learning_rate": 0.0001781170158265538, + "loss": 2.6916, + "step": 4371 + }, + { + "epoch": 0.3528367363408926, + "grad_norm": 0.7769689559936523, + "learning_rate": 0.00017810715880595566, + "loss": 2.7787, + "step": 4372 + }, + { + "epoch": 0.3529174400774756, + "grad_norm": 0.7538996934890747, + "learning_rate": 0.000178097299838723, + "loss": 2.6964, + "step": 4373 + }, + { + "epoch": 0.3529981438140586, + "grad_norm": 0.7777890563011169, + "learning_rate": 0.00017808743892510146, + "loss": 2.6882, + "step": 4374 + }, + { + "epoch": 0.3530788475506416, + "grad_norm": 0.8331751823425293, + "learning_rate": 0.00017807757606533683, + "loss": 2.7113, + "step": 4375 + }, + { + "epoch": 0.3531595512872246, + "grad_norm": 0.8039207458496094, + "learning_rate": 0.00017806771125967492, + "loss": 2.6694, + "step": 4376 + }, + { + "epoch": 0.3532402550238076, + "grad_norm": 0.7727575898170471, + "learning_rate": 0.00017805784450836154, + "loss": 2.6639, + "step": 4377 + }, + { + "epoch": 0.3533209587603906, + "grad_norm": 0.8247967958450317, + "learning_rate": 0.00017804797581164264, + "loss": 2.6539, + "step": 4378 + }, + { + "epoch": 0.35340166249697363, + "grad_norm": 0.7574009299278259, + "learning_rate": 0.0001780381051697642, + "loss": 2.7163, + "step": 4379 + }, + { + "epoch": 0.3534823662335566, + "grad_norm": 0.7304368615150452, + "learning_rate": 0.0001780282325829721, + "loss": 2.5759, + "step": 4380 + }, + { + "epoch": 0.35356306997013964, + "grad_norm": 0.7133963704109192, + "learning_rate": 0.00017801835805151257, + "loss": 2.7008, + "step": 4381 + }, + { + "epoch": 0.3536437737067226, + "grad_norm": 0.7525407075881958, + "learning_rate": 0.00017800848157563157, + "loss": 2.6785, + "step": 4382 + }, + { + "epoch": 0.35372447744330565, + "grad_norm": 0.7306779623031616, + "learning_rate": 0.00017799860315557528, + "loss": 2.6454, + "step": 4383 + }, + { + "epoch": 0.35380518117988863, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.00017798872279158994, + "loss": 2.708, + "step": 4384 + }, + { + "epoch": 0.35388588491647166, + "grad_norm": 0.7655978202819824, + "learning_rate": 0.00017797884048392177, + "loss": 2.727, + "step": 4385 + }, + { + "epoch": 0.35396658865305464, + "grad_norm": 0.6802939176559448, + "learning_rate": 0.00017796895623281702, + "loss": 2.659, + "step": 4386 + }, + { + "epoch": 0.3540472923896376, + "grad_norm": 0.7191160917282104, + "learning_rate": 0.00017795907003852207, + "loss": 2.6335, + "step": 4387 + }, + { + "epoch": 0.35412799612622065, + "grad_norm": 0.7771886587142944, + "learning_rate": 0.00017794918190128337, + "loss": 2.6658, + "step": 4388 + }, + { + "epoch": 0.3542086998628036, + "grad_norm": 0.7133512496948242, + "learning_rate": 0.00017793929182134723, + "loss": 2.6701, + "step": 4389 + }, + { + "epoch": 0.35428940359938665, + "grad_norm": 0.7795221209526062, + "learning_rate": 0.00017792939979896022, + "loss": 2.6932, + "step": 4390 + }, + { + "epoch": 0.35437010733596963, + "grad_norm": 0.726767897605896, + "learning_rate": 0.00017791950583436887, + "loss": 2.676, + "step": 4391 + }, + { + "epoch": 0.35445081107255266, + "grad_norm": 0.7447288632392883, + "learning_rate": 0.00017790960992781972, + "loss": 2.7195, + "step": 4392 + }, + { + "epoch": 0.35453151480913564, + "grad_norm": 0.8053649663925171, + "learning_rate": 0.0001778997120795595, + "loss": 2.6851, + "step": 4393 + }, + { + "epoch": 0.35461221854571867, + "grad_norm": 0.7258884906768799, + "learning_rate": 0.00017788981228983474, + "loss": 2.6819, + "step": 4394 + }, + { + "epoch": 0.35469292228230165, + "grad_norm": 0.7279395461082458, + "learning_rate": 0.0001778799105588923, + "loss": 2.6954, + "step": 4395 + }, + { + "epoch": 0.3547736260188847, + "grad_norm": 0.7372962236404419, + "learning_rate": 0.0001778700068869789, + "loss": 2.7049, + "step": 4396 + }, + { + "epoch": 0.35485432975546766, + "grad_norm": 0.712003767490387, + "learning_rate": 0.00017786010127434135, + "loss": 2.7413, + "step": 4397 + }, + { + "epoch": 0.3549350334920507, + "grad_norm": 0.7487424612045288, + "learning_rate": 0.0001778501937212266, + "loss": 2.7231, + "step": 4398 + }, + { + "epoch": 0.35501573722863367, + "grad_norm": 0.73053377866745, + "learning_rate": 0.00017784028422788146, + "loss": 2.7029, + "step": 4399 + }, + { + "epoch": 0.3550964409652167, + "grad_norm": 0.697062611579895, + "learning_rate": 0.00017783037279455298, + "loss": 2.7139, + "step": 4400 + }, + { + "epoch": 0.3551771447017997, + "grad_norm": 0.7750880718231201, + "learning_rate": 0.00017782045942148819, + "loss": 2.6601, + "step": 4401 + }, + { + "epoch": 0.3552578484383827, + "grad_norm": 0.7124977111816406, + "learning_rate": 0.00017781054410893413, + "loss": 2.6119, + "step": 4402 + }, + { + "epoch": 0.3553385521749657, + "grad_norm": 0.7773111462593079, + "learning_rate": 0.00017780062685713785, + "loss": 2.7181, + "step": 4403 + }, + { + "epoch": 0.3554192559115487, + "grad_norm": 0.7282142639160156, + "learning_rate": 0.00017779070766634663, + "loss": 2.7141, + "step": 4404 + }, + { + "epoch": 0.3554999596481317, + "grad_norm": 0.8578598499298096, + "learning_rate": 0.0001777807865368076, + "loss": 2.7628, + "step": 4405 + }, + { + "epoch": 0.3555806633847147, + "grad_norm": 0.7126399874687195, + "learning_rate": 0.00017777086346876809, + "loss": 2.6914, + "step": 4406 + }, + { + "epoch": 0.3556613671212977, + "grad_norm": 0.8026365637779236, + "learning_rate": 0.00017776093846247533, + "loss": 2.7059, + "step": 4407 + }, + { + "epoch": 0.35574207085788073, + "grad_norm": 0.7839884161949158, + "learning_rate": 0.0001777510115181767, + "loss": 2.7265, + "step": 4408 + }, + { + "epoch": 0.3558227745944637, + "grad_norm": 0.7498767971992493, + "learning_rate": 0.00017774108263611966, + "loss": 2.7201, + "step": 4409 + }, + { + "epoch": 0.35590347833104674, + "grad_norm": 0.6996301412582397, + "learning_rate": 0.0001777311518165516, + "loss": 2.6271, + "step": 4410 + }, + { + "epoch": 0.3559841820676297, + "grad_norm": 0.7721461057662964, + "learning_rate": 0.00017772121905972003, + "loss": 2.6739, + "step": 4411 + }, + { + "epoch": 0.35606488580421275, + "grad_norm": 0.8018803000450134, + "learning_rate": 0.00017771128436587256, + "loss": 2.7092, + "step": 4412 + }, + { + "epoch": 0.3561455895407957, + "grad_norm": 0.7185639142990112, + "learning_rate": 0.0001777013477352567, + "loss": 2.6996, + "step": 4413 + }, + { + "epoch": 0.35622629327737876, + "grad_norm": 0.7218519449234009, + "learning_rate": 0.0001776914091681202, + "loss": 2.6555, + "step": 4414 + }, + { + "epoch": 0.35630699701396173, + "grad_norm": 0.7234479188919067, + "learning_rate": 0.00017768146866471062, + "loss": 2.6762, + "step": 4415 + }, + { + "epoch": 0.35638770075054477, + "grad_norm": 0.6723350286483765, + "learning_rate": 0.00017767152622527582, + "loss": 2.6272, + "step": 4416 + }, + { + "epoch": 0.35646840448712774, + "grad_norm": 0.7281947731971741, + "learning_rate": 0.00017766158185006356, + "loss": 2.7216, + "step": 4417 + }, + { + "epoch": 0.3565491082237108, + "grad_norm": 0.8350874781608582, + "learning_rate": 0.00017765163553932166, + "loss": 2.6619, + "step": 4418 + }, + { + "epoch": 0.35662981196029375, + "grad_norm": 0.7454007267951965, + "learning_rate": 0.00017764168729329801, + "loss": 2.6623, + "step": 4419 + }, + { + "epoch": 0.3567105156968768, + "grad_norm": 0.7419041395187378, + "learning_rate": 0.00017763173711224058, + "loss": 2.6773, + "step": 4420 + }, + { + "epoch": 0.35679121943345976, + "grad_norm": 0.7965987920761108, + "learning_rate": 0.0001776217849963973, + "loss": 2.6426, + "step": 4421 + }, + { + "epoch": 0.3568719231700428, + "grad_norm": 0.7093302607536316, + "learning_rate": 0.00017761183094601622, + "loss": 2.6745, + "step": 4422 + }, + { + "epoch": 0.35695262690662577, + "grad_norm": 0.7937216758728027, + "learning_rate": 0.00017760187496134548, + "loss": 2.7275, + "step": 4423 + }, + { + "epoch": 0.3570333306432088, + "grad_norm": 0.9185259938240051, + "learning_rate": 0.00017759191704263313, + "loss": 2.7055, + "step": 4424 + }, + { + "epoch": 0.3571140343797918, + "grad_norm": 0.7365124821662903, + "learning_rate": 0.00017758195719012743, + "loss": 2.6504, + "step": 4425 + }, + { + "epoch": 0.3571947381163748, + "grad_norm": 0.6992416977882385, + "learning_rate": 0.0001775719954040765, + "loss": 2.6684, + "step": 4426 + }, + { + "epoch": 0.3572754418529578, + "grad_norm": 0.7742372751235962, + "learning_rate": 0.00017756203168472866, + "loss": 2.6877, + "step": 4427 + }, + { + "epoch": 0.3573561455895408, + "grad_norm": 0.7448472380638123, + "learning_rate": 0.0001775520660323323, + "loss": 2.7027, + "step": 4428 + }, + { + "epoch": 0.3574368493261238, + "grad_norm": 0.7201915979385376, + "learning_rate": 0.00017754209844713569, + "loss": 2.7046, + "step": 4429 + }, + { + "epoch": 0.3575175530627068, + "grad_norm": 0.6675081253051758, + "learning_rate": 0.0001775321289293873, + "loss": 2.6503, + "step": 4430 + }, + { + "epoch": 0.3575982567992898, + "grad_norm": 0.7252706289291382, + "learning_rate": 0.0001775221574793356, + "loss": 2.6053, + "step": 4431 + }, + { + "epoch": 0.35767896053587284, + "grad_norm": 0.7134702801704407, + "learning_rate": 0.00017751218409722906, + "loss": 2.6857, + "step": 4432 + }, + { + "epoch": 0.3577596642724558, + "grad_norm": 0.7074102163314819, + "learning_rate": 0.0001775022087833163, + "loss": 2.6871, + "step": 4433 + }, + { + "epoch": 0.35784036800903885, + "grad_norm": 0.693520724773407, + "learning_rate": 0.00017749223153784588, + "loss": 2.6629, + "step": 4434 + }, + { + "epoch": 0.3579210717456218, + "grad_norm": 0.6933221817016602, + "learning_rate": 0.0001774822523610665, + "loss": 2.6793, + "step": 4435 + }, + { + "epoch": 0.35800177548220485, + "grad_norm": 0.75307297706604, + "learning_rate": 0.00017747227125322685, + "loss": 2.7012, + "step": 4436 + }, + { + "epoch": 0.35808247921878783, + "grad_norm": 0.7732915282249451, + "learning_rate": 0.0001774622882145757, + "loss": 2.6908, + "step": 4437 + }, + { + "epoch": 0.3581631829553708, + "grad_norm": 0.7067054510116577, + "learning_rate": 0.0001774523032453618, + "loss": 2.7494, + "step": 4438 + }, + { + "epoch": 0.35824388669195384, + "grad_norm": 0.7412838935852051, + "learning_rate": 0.00017744231634583406, + "loss": 2.6734, + "step": 4439 + }, + { + "epoch": 0.3583245904285368, + "grad_norm": 0.7663930654525757, + "learning_rate": 0.00017743232751624136, + "loss": 2.6952, + "step": 4440 + }, + { + "epoch": 0.35840529416511985, + "grad_norm": 0.70650714635849, + "learning_rate": 0.00017742233675683268, + "loss": 2.6806, + "step": 4441 + }, + { + "epoch": 0.3584859979017028, + "grad_norm": 0.698310375213623, + "learning_rate": 0.00017741234406785692, + "loss": 2.6471, + "step": 4442 + }, + { + "epoch": 0.35856670163828586, + "grad_norm": 0.7274026274681091, + "learning_rate": 0.00017740234944956323, + "loss": 2.6688, + "step": 4443 + }, + { + "epoch": 0.35864740537486883, + "grad_norm": 0.6944074034690857, + "learning_rate": 0.00017739235290220067, + "loss": 2.6954, + "step": 4444 + }, + { + "epoch": 0.35872810911145186, + "grad_norm": 0.841995358467102, + "learning_rate": 0.00017738235442601834, + "loss": 2.7169, + "step": 4445 + }, + { + "epoch": 0.35880881284803484, + "grad_norm": 0.74863201379776, + "learning_rate": 0.00017737235402126545, + "loss": 2.6534, + "step": 4446 + }, + { + "epoch": 0.3588895165846179, + "grad_norm": 0.7260422110557556, + "learning_rate": 0.00017736235168819126, + "loss": 2.6266, + "step": 4447 + }, + { + "epoch": 0.35897022032120085, + "grad_norm": 0.7450951337814331, + "learning_rate": 0.00017735234742704504, + "loss": 2.7328, + "step": 4448 + }, + { + "epoch": 0.3590509240577839, + "grad_norm": 0.6942493319511414, + "learning_rate": 0.00017734234123807614, + "loss": 2.7219, + "step": 4449 + }, + { + "epoch": 0.35913162779436686, + "grad_norm": 0.7676761746406555, + "learning_rate": 0.00017733233312153393, + "loss": 2.6594, + "step": 4450 + }, + { + "epoch": 0.3592123315309499, + "grad_norm": 0.7446104288101196, + "learning_rate": 0.00017732232307766778, + "loss": 2.6877, + "step": 4451 + }, + { + "epoch": 0.35929303526753287, + "grad_norm": 0.7551130056381226, + "learning_rate": 0.00017731231110672727, + "loss": 2.672, + "step": 4452 + }, + { + "epoch": 0.3593737390041159, + "grad_norm": 0.6876464486122131, + "learning_rate": 0.00017730229720896182, + "loss": 2.6658, + "step": 4453 + }, + { + "epoch": 0.3594544427406989, + "grad_norm": 0.6992844343185425, + "learning_rate": 0.00017729228138462107, + "loss": 2.6805, + "step": 4454 + }, + { + "epoch": 0.3595351464772819, + "grad_norm": 0.8437497615814209, + "learning_rate": 0.00017728226363395466, + "loss": 2.6884, + "step": 4455 + }, + { + "epoch": 0.3596158502138649, + "grad_norm": 0.7669322490692139, + "learning_rate": 0.00017727224395721217, + "loss": 2.6432, + "step": 4456 + }, + { + "epoch": 0.3596965539504479, + "grad_norm": 0.7613428831100464, + "learning_rate": 0.0001772622223546434, + "loss": 2.6124, + "step": 4457 + }, + { + "epoch": 0.3597772576870309, + "grad_norm": 0.719932496547699, + "learning_rate": 0.00017725219882649807, + "loss": 2.6623, + "step": 4458 + }, + { + "epoch": 0.3598579614236139, + "grad_norm": 0.7650800347328186, + "learning_rate": 0.000177242173373026, + "loss": 2.7551, + "step": 4459 + }, + { + "epoch": 0.3599386651601969, + "grad_norm": 0.7423754930496216, + "learning_rate": 0.0001772321459944771, + "loss": 2.7375, + "step": 4460 + }, + { + "epoch": 0.36001936889677993, + "grad_norm": 0.7602835297584534, + "learning_rate": 0.0001772221166911012, + "loss": 2.7086, + "step": 4461 + }, + { + "epoch": 0.3601000726333629, + "grad_norm": 0.7246943712234497, + "learning_rate": 0.00017721208546314827, + "loss": 2.7068, + "step": 4462 + }, + { + "epoch": 0.36018077636994594, + "grad_norm": 0.715965211391449, + "learning_rate": 0.00017720205231086837, + "loss": 2.689, + "step": 4463 + }, + { + "epoch": 0.3602614801065289, + "grad_norm": 0.7696218490600586, + "learning_rate": 0.00017719201723451151, + "loss": 2.611, + "step": 4464 + }, + { + "epoch": 0.36034218384311195, + "grad_norm": 0.7599236369132996, + "learning_rate": 0.00017718198023432779, + "loss": 2.6504, + "step": 4465 + }, + { + "epoch": 0.36042288757969493, + "grad_norm": 0.7674956321716309, + "learning_rate": 0.0001771719413105674, + "loss": 2.7559, + "step": 4466 + }, + { + "epoch": 0.36050359131627796, + "grad_norm": 0.7263289093971252, + "learning_rate": 0.00017716190046348045, + "loss": 2.6822, + "step": 4467 + }, + { + "epoch": 0.36058429505286094, + "grad_norm": 0.7564195990562439, + "learning_rate": 0.0001771518576933173, + "loss": 2.7319, + "step": 4468 + }, + { + "epoch": 0.36066499878944397, + "grad_norm": 0.7291253805160522, + "learning_rate": 0.00017714181300032813, + "loss": 2.704, + "step": 4469 + }, + { + "epoch": 0.36074570252602695, + "grad_norm": 0.7354169487953186, + "learning_rate": 0.00017713176638476332, + "loss": 2.6344, + "step": 4470 + }, + { + "epoch": 0.36082640626261, + "grad_norm": 0.7104110717773438, + "learning_rate": 0.0001771217178468733, + "loss": 2.665, + "step": 4471 + }, + { + "epoch": 0.36090710999919295, + "grad_norm": 0.6913934350013733, + "learning_rate": 0.00017711166738690847, + "loss": 2.6674, + "step": 4472 + }, + { + "epoch": 0.360987813735776, + "grad_norm": 0.7999634742736816, + "learning_rate": 0.0001771016150051193, + "loss": 2.6847, + "step": 4473 + }, + { + "epoch": 0.36106851747235896, + "grad_norm": 0.7878915667533875, + "learning_rate": 0.00017709156070175634, + "loss": 2.7125, + "step": 4474 + }, + { + "epoch": 0.361149221208942, + "grad_norm": 0.7145688533782959, + "learning_rate": 0.00017708150447707017, + "loss": 2.6863, + "step": 4475 + }, + { + "epoch": 0.36122992494552497, + "grad_norm": 0.7518604397773743, + "learning_rate": 0.00017707144633131143, + "loss": 2.6616, + "step": 4476 + }, + { + "epoch": 0.361310628682108, + "grad_norm": 0.735634982585907, + "learning_rate": 0.0001770613862647308, + "loss": 2.6315, + "step": 4477 + }, + { + "epoch": 0.361391332418691, + "grad_norm": 0.7925180196762085, + "learning_rate": 0.00017705132427757895, + "loss": 2.6951, + "step": 4478 + }, + { + "epoch": 0.361472036155274, + "grad_norm": 0.6949547529220581, + "learning_rate": 0.00017704126037010667, + "loss": 2.6934, + "step": 4479 + }, + { + "epoch": 0.361552739891857, + "grad_norm": 0.7233577966690063, + "learning_rate": 0.00017703119454256483, + "loss": 2.6773, + "step": 4480 + }, + { + "epoch": 0.36163344362844, + "grad_norm": 0.7303269505500793, + "learning_rate": 0.00017702112679520424, + "loss": 2.6351, + "step": 4481 + }, + { + "epoch": 0.361714147365023, + "grad_norm": 0.7620660066604614, + "learning_rate": 0.00017701105712827583, + "loss": 2.6748, + "step": 4482 + }, + { + "epoch": 0.36179485110160603, + "grad_norm": 0.7744965553283691, + "learning_rate": 0.00017700098554203057, + "loss": 2.7013, + "step": 4483 + }, + { + "epoch": 0.361875554838189, + "grad_norm": 0.8017357587814331, + "learning_rate": 0.00017699091203671947, + "loss": 2.7273, + "step": 4484 + }, + { + "epoch": 0.36195625857477204, + "grad_norm": 0.8014432191848755, + "learning_rate": 0.0001769808366125936, + "loss": 2.6864, + "step": 4485 + }, + { + "epoch": 0.362036962311355, + "grad_norm": 0.6914888620376587, + "learning_rate": 0.00017697075926990406, + "loss": 2.6851, + "step": 4486 + }, + { + "epoch": 0.36211766604793805, + "grad_norm": 0.7472698092460632, + "learning_rate": 0.00017696068000890196, + "loss": 2.695, + "step": 4487 + }, + { + "epoch": 0.362198369784521, + "grad_norm": 0.7506285309791565, + "learning_rate": 0.00017695059882983855, + "loss": 2.7055, + "step": 4488 + }, + { + "epoch": 0.362279073521104, + "grad_norm": 0.7501141428947449, + "learning_rate": 0.00017694051573296507, + "loss": 2.7109, + "step": 4489 + }, + { + "epoch": 0.36235977725768703, + "grad_norm": 0.6654670834541321, + "learning_rate": 0.00017693043071853284, + "loss": 2.6165, + "step": 4490 + }, + { + "epoch": 0.36244048099427, + "grad_norm": 0.7894664406776428, + "learning_rate": 0.00017692034378679315, + "loss": 2.7274, + "step": 4491 + }, + { + "epoch": 0.36252118473085304, + "grad_norm": 0.7206711173057556, + "learning_rate": 0.00017691025493799743, + "loss": 2.7047, + "step": 4492 + }, + { + "epoch": 0.362601888467436, + "grad_norm": 0.7656282186508179, + "learning_rate": 0.00017690016417239708, + "loss": 2.696, + "step": 4493 + }, + { + "epoch": 0.36268259220401905, + "grad_norm": 0.7357437610626221, + "learning_rate": 0.00017689007149024362, + "loss": 2.7279, + "step": 4494 + }, + { + "epoch": 0.362763295940602, + "grad_norm": 0.7262146472930908, + "learning_rate": 0.00017687997689178864, + "loss": 2.6964, + "step": 4495 + }, + { + "epoch": 0.36284399967718506, + "grad_norm": 0.7839891910552979, + "learning_rate": 0.00017686988037728365, + "loss": 2.651, + "step": 4496 + }, + { + "epoch": 0.36292470341376803, + "grad_norm": 0.7150306105613708, + "learning_rate": 0.00017685978194698028, + "loss": 2.6481, + "step": 4497 + }, + { + "epoch": 0.36300540715035107, + "grad_norm": 0.7144685387611389, + "learning_rate": 0.00017684968160113025, + "loss": 2.7169, + "step": 4498 + }, + { + "epoch": 0.36308611088693404, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00017683957933998525, + "loss": 2.7543, + "step": 4499 + }, + { + "epoch": 0.3631668146235171, + "grad_norm": 0.7301446199417114, + "learning_rate": 0.00017682947516379707, + "loss": 2.6806, + "step": 4500 + }, + { + "epoch": 0.36324751836010005, + "grad_norm": 0.7314243316650391, + "learning_rate": 0.00017681936907281757, + "loss": 2.7227, + "step": 4501 + }, + { + "epoch": 0.3633282220966831, + "grad_norm": 0.7695817351341248, + "learning_rate": 0.00017680926106729852, + "loss": 2.7229, + "step": 4502 + }, + { + "epoch": 0.36340892583326606, + "grad_norm": 0.6885762810707092, + "learning_rate": 0.00017679915114749198, + "loss": 2.7246, + "step": 4503 + }, + { + "epoch": 0.3634896295698491, + "grad_norm": 0.6893608570098877, + "learning_rate": 0.0001767890393136498, + "loss": 2.6572, + "step": 4504 + }, + { + "epoch": 0.36357033330643207, + "grad_norm": 0.7011978626251221, + "learning_rate": 0.00017677892556602402, + "loss": 2.6775, + "step": 4505 + }, + { + "epoch": 0.3636510370430151, + "grad_norm": 0.6693406105041504, + "learning_rate": 0.00017676880990486672, + "loss": 2.6183, + "step": 4506 + }, + { + "epoch": 0.3637317407795981, + "grad_norm": 0.7023048996925354, + "learning_rate": 0.00017675869233043002, + "loss": 2.6772, + "step": 4507 + }, + { + "epoch": 0.3638124445161811, + "grad_norm": 0.6903806328773499, + "learning_rate": 0.00017674857284296605, + "loss": 2.6486, + "step": 4508 + }, + { + "epoch": 0.3638931482527641, + "grad_norm": 0.6799258589744568, + "learning_rate": 0.000176738451442727, + "loss": 2.6305, + "step": 4509 + }, + { + "epoch": 0.3639738519893471, + "grad_norm": 0.7935682535171509, + "learning_rate": 0.00017672832812996517, + "loss": 2.7365, + "step": 4510 + }, + { + "epoch": 0.3640545557259301, + "grad_norm": 0.7593684196472168, + "learning_rate": 0.00017671820290493284, + "loss": 2.7029, + "step": 4511 + }, + { + "epoch": 0.36413525946251313, + "grad_norm": 0.7185288667678833, + "learning_rate": 0.00017670807576788234, + "loss": 2.6646, + "step": 4512 + }, + { + "epoch": 0.3642159631990961, + "grad_norm": 0.7260291576385498, + "learning_rate": 0.00017669794671906606, + "loss": 2.6615, + "step": 4513 + }, + { + "epoch": 0.36429666693567914, + "grad_norm": 0.6933417916297913, + "learning_rate": 0.00017668781575873646, + "loss": 2.6678, + "step": 4514 + }, + { + "epoch": 0.3643773706722621, + "grad_norm": 0.7657343149185181, + "learning_rate": 0.00017667768288714603, + "loss": 2.7155, + "step": 4515 + }, + { + "epoch": 0.36445807440884515, + "grad_norm": 0.7326949834823608, + "learning_rate": 0.0001766675481045473, + "loss": 2.732, + "step": 4516 + }, + { + "epoch": 0.3645387781454281, + "grad_norm": 0.7370324730873108, + "learning_rate": 0.0001766574114111929, + "loss": 2.6124, + "step": 4517 + }, + { + "epoch": 0.36461948188201115, + "grad_norm": 0.7280072569847107, + "learning_rate": 0.00017664727280733536, + "loss": 2.6793, + "step": 4518 + }, + { + "epoch": 0.36470018561859413, + "grad_norm": 0.7174237370491028, + "learning_rate": 0.00017663713229322748, + "loss": 2.629, + "step": 4519 + }, + { + "epoch": 0.36478088935517716, + "grad_norm": 0.6660771369934082, + "learning_rate": 0.0001766269898691219, + "loss": 2.6862, + "step": 4520 + }, + { + "epoch": 0.36486159309176014, + "grad_norm": 0.7024446725845337, + "learning_rate": 0.00017661684553527143, + "loss": 2.6602, + "step": 4521 + }, + { + "epoch": 0.36494229682834317, + "grad_norm": 0.7419618964195251, + "learning_rate": 0.0001766066992919289, + "loss": 2.6904, + "step": 4522 + }, + { + "epoch": 0.36502300056492615, + "grad_norm": 0.7425804138183594, + "learning_rate": 0.00017659655113934716, + "loss": 2.7312, + "step": 4523 + }, + { + "epoch": 0.3651037043015092, + "grad_norm": 0.7117013931274414, + "learning_rate": 0.00017658640107777915, + "loss": 2.6411, + "step": 4524 + }, + { + "epoch": 0.36518440803809216, + "grad_norm": 0.719613254070282, + "learning_rate": 0.00017657624910747782, + "loss": 2.6799, + "step": 4525 + }, + { + "epoch": 0.3652651117746752, + "grad_norm": 0.7654159665107727, + "learning_rate": 0.0001765660952286962, + "loss": 2.6675, + "step": 4526 + }, + { + "epoch": 0.36534581551125817, + "grad_norm": 0.7111814022064209, + "learning_rate": 0.00017655593944168734, + "loss": 2.6717, + "step": 4527 + }, + { + "epoch": 0.3654265192478412, + "grad_norm": 0.7494712471961975, + "learning_rate": 0.00017654578174670436, + "loss": 2.7181, + "step": 4528 + }, + { + "epoch": 0.3655072229844242, + "grad_norm": 0.8062291145324707, + "learning_rate": 0.0001765356221440004, + "loss": 2.6563, + "step": 4529 + }, + { + "epoch": 0.3655879267210072, + "grad_norm": 0.7923303842544556, + "learning_rate": 0.00017652546063382866, + "loss": 2.6295, + "step": 4530 + }, + { + "epoch": 0.3656686304575902, + "grad_norm": 0.7417340278625488, + "learning_rate": 0.00017651529721644238, + "loss": 2.6727, + "step": 4531 + }, + { + "epoch": 0.3657493341941732, + "grad_norm": 0.7326166033744812, + "learning_rate": 0.0001765051318920949, + "loss": 2.702, + "step": 4532 + }, + { + "epoch": 0.3658300379307562, + "grad_norm": 0.8133745193481445, + "learning_rate": 0.00017649496466103957, + "loss": 2.7157, + "step": 4533 + }, + { + "epoch": 0.3659107416673392, + "grad_norm": 0.710502564907074, + "learning_rate": 0.00017648479552352973, + "loss": 2.6668, + "step": 4534 + }, + { + "epoch": 0.3659914454039222, + "grad_norm": 0.6947012543678284, + "learning_rate": 0.00017647462447981885, + "loss": 2.6865, + "step": 4535 + }, + { + "epoch": 0.36607214914050523, + "grad_norm": 0.8432720899581909, + "learning_rate": 0.0001764644515301604, + "loss": 2.6226, + "step": 4536 + }, + { + "epoch": 0.3661528528770882, + "grad_norm": 0.7321269512176514, + "learning_rate": 0.00017645427667480802, + "loss": 2.662, + "step": 4537 + }, + { + "epoch": 0.36623355661367124, + "grad_norm": 0.8099743723869324, + "learning_rate": 0.00017644409991401515, + "loss": 2.6853, + "step": 4538 + }, + { + "epoch": 0.3663142603502542, + "grad_norm": 0.6885355114936829, + "learning_rate": 0.0001764339212480355, + "loss": 2.6672, + "step": 4539 + }, + { + "epoch": 0.3663949640868372, + "grad_norm": 0.911396324634552, + "learning_rate": 0.00017642374067712276, + "loss": 2.5778, + "step": 4540 + }, + { + "epoch": 0.3664756678234202, + "grad_norm": 0.7461941838264465, + "learning_rate": 0.0001764135582015306, + "loss": 2.6629, + "step": 4541 + }, + { + "epoch": 0.3665563715600032, + "grad_norm": 0.772741436958313, + "learning_rate": 0.0001764033738215128, + "loss": 2.725, + "step": 4542 + }, + { + "epoch": 0.36663707529658623, + "grad_norm": 0.7256152629852295, + "learning_rate": 0.0001763931875373232, + "loss": 2.6439, + "step": 4543 + }, + { + "epoch": 0.3667177790331692, + "grad_norm": 0.8089167475700378, + "learning_rate": 0.0001763829993492157, + "loss": 2.5972, + "step": 4544 + }, + { + "epoch": 0.36679848276975224, + "grad_norm": 0.7115232944488525, + "learning_rate": 0.0001763728092574442, + "loss": 2.633, + "step": 4545 + }, + { + "epoch": 0.3668791865063352, + "grad_norm": 0.7189347147941589, + "learning_rate": 0.00017636261726226266, + "loss": 2.619, + "step": 4546 + }, + { + "epoch": 0.36695989024291825, + "grad_norm": 0.7667742967605591, + "learning_rate": 0.00017635242336392506, + "loss": 2.667, + "step": 4547 + }, + { + "epoch": 0.36704059397950123, + "grad_norm": 0.7982457876205444, + "learning_rate": 0.00017634222756268545, + "loss": 2.6667, + "step": 4548 + }, + { + "epoch": 0.36712129771608426, + "grad_norm": 0.7465574145317078, + "learning_rate": 0.00017633202985879804, + "loss": 2.6436, + "step": 4549 + }, + { + "epoch": 0.36720200145266724, + "grad_norm": 0.7297804951667786, + "learning_rate": 0.00017632183025251686, + "loss": 2.6464, + "step": 4550 + }, + { + "epoch": 0.36728270518925027, + "grad_norm": 0.6885054111480713, + "learning_rate": 0.0001763116287440962, + "loss": 2.6742, + "step": 4551 + }, + { + "epoch": 0.36736340892583325, + "grad_norm": 0.7341574430465698, + "learning_rate": 0.00017630142533379023, + "loss": 2.6688, + "step": 4552 + }, + { + "epoch": 0.3674441126624163, + "grad_norm": 0.8565430045127869, + "learning_rate": 0.0001762912200218533, + "loss": 2.6889, + "step": 4553 + }, + { + "epoch": 0.36752481639899925, + "grad_norm": 0.7509489059448242, + "learning_rate": 0.00017628101280853974, + "loss": 2.6177, + "step": 4554 + }, + { + "epoch": 0.3676055201355823, + "grad_norm": 0.8128334879875183, + "learning_rate": 0.00017627080369410396, + "loss": 2.7301, + "step": 4555 + }, + { + "epoch": 0.36768622387216526, + "grad_norm": 0.7511637210845947, + "learning_rate": 0.00017626059267880035, + "loss": 2.7327, + "step": 4556 + }, + { + "epoch": 0.3677669276087483, + "grad_norm": 0.8350822925567627, + "learning_rate": 0.00017625037976288347, + "loss": 2.6073, + "step": 4557 + }, + { + "epoch": 0.36784763134533127, + "grad_norm": 0.7743313312530518, + "learning_rate": 0.00017624016494660776, + "loss": 2.7055, + "step": 4558 + }, + { + "epoch": 0.3679283350819143, + "grad_norm": 0.8196439146995544, + "learning_rate": 0.00017622994823022787, + "loss": 2.6565, + "step": 4559 + }, + { + "epoch": 0.3680090388184973, + "grad_norm": 0.7223393321037292, + "learning_rate": 0.00017621972961399837, + "loss": 2.68, + "step": 4560 + }, + { + "epoch": 0.3680897425550803, + "grad_norm": 0.7215418219566345, + "learning_rate": 0.000176209509098174, + "loss": 2.6627, + "step": 4561 + }, + { + "epoch": 0.3681704462916633, + "grad_norm": 0.8050473928451538, + "learning_rate": 0.00017619928668300946, + "loss": 2.5802, + "step": 4562 + }, + { + "epoch": 0.3682511500282463, + "grad_norm": 0.7452750205993652, + "learning_rate": 0.00017618906236875948, + "loss": 2.6524, + "step": 4563 + }, + { + "epoch": 0.3683318537648293, + "grad_norm": 0.7950742244720459, + "learning_rate": 0.00017617883615567888, + "loss": 2.6371, + "step": 4564 + }, + { + "epoch": 0.36841255750141233, + "grad_norm": 0.7185397744178772, + "learning_rate": 0.00017616860804402261, + "loss": 2.6531, + "step": 4565 + }, + { + "epoch": 0.3684932612379953, + "grad_norm": 0.7480553388595581, + "learning_rate": 0.0001761583780340455, + "loss": 2.6727, + "step": 4566 + }, + { + "epoch": 0.36857396497457834, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.00017614814612600251, + "loss": 2.6095, + "step": 4567 + }, + { + "epoch": 0.3686546687111613, + "grad_norm": 0.9159810543060303, + "learning_rate": 0.00017613791232014866, + "loss": 2.7039, + "step": 4568 + }, + { + "epoch": 0.36873537244774435, + "grad_norm": 0.7478305697441101, + "learning_rate": 0.00017612767661673905, + "loss": 2.6307, + "step": 4569 + }, + { + "epoch": 0.3688160761843273, + "grad_norm": 0.9154726266860962, + "learning_rate": 0.00017611743901602874, + "loss": 2.675, + "step": 4570 + }, + { + "epoch": 0.36889677992091036, + "grad_norm": 0.7903287410736084, + "learning_rate": 0.0001761071995182728, + "loss": 2.6938, + "step": 4571 + }, + { + "epoch": 0.36897748365749333, + "grad_norm": 0.7919119596481323, + "learning_rate": 0.0001760969581237266, + "loss": 2.7092, + "step": 4572 + }, + { + "epoch": 0.36905818739407636, + "grad_norm": 0.8052253723144531, + "learning_rate": 0.00017608671483264522, + "loss": 2.6914, + "step": 4573 + }, + { + "epoch": 0.36913889113065934, + "grad_norm": 0.7660435438156128, + "learning_rate": 0.00017607646964528403, + "loss": 2.674, + "step": 4574 + }, + { + "epoch": 0.3692195948672424, + "grad_norm": 0.8554383516311646, + "learning_rate": 0.00017606622256189836, + "loss": 2.6792, + "step": 4575 + }, + { + "epoch": 0.36930029860382535, + "grad_norm": 0.7719140648841858, + "learning_rate": 0.00017605597358274358, + "loss": 2.6836, + "step": 4576 + }, + { + "epoch": 0.3693810023404084, + "grad_norm": 0.733068585395813, + "learning_rate": 0.00017604572270807513, + "loss": 2.6496, + "step": 4577 + }, + { + "epoch": 0.36946170607699136, + "grad_norm": 0.7622445225715637, + "learning_rate": 0.00017603546993814849, + "loss": 2.7097, + "step": 4578 + }, + { + "epoch": 0.3695424098135744, + "grad_norm": 0.7326679825782776, + "learning_rate": 0.00017602521527321913, + "loss": 2.6786, + "step": 4579 + }, + { + "epoch": 0.36962311355015737, + "grad_norm": 0.7579432129859924, + "learning_rate": 0.00017601495871354272, + "loss": 2.6618, + "step": 4580 + }, + { + "epoch": 0.3697038172867404, + "grad_norm": 0.8812715411186218, + "learning_rate": 0.00017600470025937485, + "loss": 2.6942, + "step": 4581 + }, + { + "epoch": 0.3697845210233234, + "grad_norm": 0.7230449318885803, + "learning_rate": 0.00017599443991097116, + "loss": 2.6374, + "step": 4582 + }, + { + "epoch": 0.3698652247599064, + "grad_norm": 0.8347739577293396, + "learning_rate": 0.00017598417766858735, + "loss": 2.6653, + "step": 4583 + }, + { + "epoch": 0.3699459284964894, + "grad_norm": 0.7826598882675171, + "learning_rate": 0.0001759739135324792, + "loss": 2.6342, + "step": 4584 + }, + { + "epoch": 0.3700266322330724, + "grad_norm": 0.749060332775116, + "learning_rate": 0.00017596364750290254, + "loss": 2.7256, + "step": 4585 + }, + { + "epoch": 0.3701073359696554, + "grad_norm": 0.7470815181732178, + "learning_rate": 0.00017595337958011323, + "loss": 2.6485, + "step": 4586 + }, + { + "epoch": 0.3701880397062384, + "grad_norm": 0.7251530289649963, + "learning_rate": 0.00017594310976436716, + "loss": 2.6613, + "step": 4587 + }, + { + "epoch": 0.3702687434428214, + "grad_norm": 0.7143718004226685, + "learning_rate": 0.00017593283805592027, + "loss": 2.6101, + "step": 4588 + }, + { + "epoch": 0.37034944717940443, + "grad_norm": 0.7378203272819519, + "learning_rate": 0.00017592256445502855, + "loss": 2.6735, + "step": 4589 + }, + { + "epoch": 0.3704301509159874, + "grad_norm": 0.7193629741668701, + "learning_rate": 0.00017591228896194808, + "loss": 2.719, + "step": 4590 + }, + { + "epoch": 0.3705108546525704, + "grad_norm": 0.7377258539199829, + "learning_rate": 0.00017590201157693494, + "loss": 2.6789, + "step": 4591 + }, + { + "epoch": 0.3705915583891534, + "grad_norm": 0.7468351721763611, + "learning_rate": 0.00017589173230024522, + "loss": 2.6389, + "step": 4592 + }, + { + "epoch": 0.3706722621257364, + "grad_norm": 0.7612246870994568, + "learning_rate": 0.0001758814511321352, + "loss": 2.7045, + "step": 4593 + }, + { + "epoch": 0.37075296586231943, + "grad_norm": 0.7603838443756104, + "learning_rate": 0.00017587116807286102, + "loss": 2.7323, + "step": 4594 + }, + { + "epoch": 0.3708336695989024, + "grad_norm": 0.7436477541923523, + "learning_rate": 0.000175860883122679, + "loss": 2.7331, + "step": 4595 + }, + { + "epoch": 0.37091437333548544, + "grad_norm": 0.7004369497299194, + "learning_rate": 0.0001758505962818455, + "loss": 2.6418, + "step": 4596 + }, + { + "epoch": 0.3709950770720684, + "grad_norm": 0.711980938911438, + "learning_rate": 0.00017584030755061683, + "loss": 2.6184, + "step": 4597 + }, + { + "epoch": 0.37107578080865145, + "grad_norm": 0.6999367475509644, + "learning_rate": 0.0001758300169292495, + "loss": 2.6584, + "step": 4598 + }, + { + "epoch": 0.3711564845452344, + "grad_norm": 0.6755785942077637, + "learning_rate": 0.0001758197244179999, + "loss": 2.664, + "step": 4599 + }, + { + "epoch": 0.37123718828181745, + "grad_norm": 0.7174055576324463, + "learning_rate": 0.00017580943001712455, + "loss": 2.6821, + "step": 4600 + }, + { + "epoch": 0.37131789201840043, + "grad_norm": 0.8218933343887329, + "learning_rate": 0.00017579913372688005, + "loss": 2.6355, + "step": 4601 + }, + { + "epoch": 0.37139859575498346, + "grad_norm": 0.7417960166931152, + "learning_rate": 0.000175788835547523, + "loss": 2.7226, + "step": 4602 + }, + { + "epoch": 0.37147929949156644, + "grad_norm": 0.824421763420105, + "learning_rate": 0.00017577853547931006, + "loss": 2.6526, + "step": 4603 + }, + { + "epoch": 0.37156000322814947, + "grad_norm": 0.7391949892044067, + "learning_rate": 0.00017576823352249794, + "loss": 2.6702, + "step": 4604 + }, + { + "epoch": 0.37164070696473245, + "grad_norm": 0.7890247106552124, + "learning_rate": 0.00017575792967734337, + "loss": 2.7281, + "step": 4605 + }, + { + "epoch": 0.3717214107013155, + "grad_norm": 0.785527765750885, + "learning_rate": 0.00017574762394410317, + "loss": 2.6728, + "step": 4606 + }, + { + "epoch": 0.37180211443789846, + "grad_norm": 0.7195863127708435, + "learning_rate": 0.00017573731632303415, + "loss": 2.6329, + "step": 4607 + }, + { + "epoch": 0.3718828181744815, + "grad_norm": 0.7896780371665955, + "learning_rate": 0.0001757270068143932, + "loss": 2.6776, + "step": 4608 + }, + { + "epoch": 0.37196352191106447, + "grad_norm": 0.7568275332450867, + "learning_rate": 0.00017571669541843735, + "loss": 2.6668, + "step": 4609 + }, + { + "epoch": 0.3720442256476475, + "grad_norm": 0.7923939228057861, + "learning_rate": 0.00017570638213542348, + "loss": 2.7033, + "step": 4610 + }, + { + "epoch": 0.3721249293842305, + "grad_norm": 0.7586569786071777, + "learning_rate": 0.00017569606696560868, + "loss": 2.7286, + "step": 4611 + }, + { + "epoch": 0.3722056331208135, + "grad_norm": 0.8222009539604187, + "learning_rate": 0.00017568574990925004, + "loss": 2.6448, + "step": 4612 + }, + { + "epoch": 0.3722863368573965, + "grad_norm": 0.7144019603729248, + "learning_rate": 0.00017567543096660466, + "loss": 2.6671, + "step": 4613 + }, + { + "epoch": 0.3723670405939795, + "grad_norm": 0.7602240443229675, + "learning_rate": 0.00017566511013792973, + "loss": 2.6492, + "step": 4614 + }, + { + "epoch": 0.3724477443305625, + "grad_norm": 0.7949689626693726, + "learning_rate": 0.00017565478742348245, + "loss": 2.7002, + "step": 4615 + }, + { + "epoch": 0.3725284480671455, + "grad_norm": 0.6922519207000732, + "learning_rate": 0.00017564446282352012, + "loss": 2.6917, + "step": 4616 + }, + { + "epoch": 0.3726091518037285, + "grad_norm": 0.7382915616035461, + "learning_rate": 0.0001756341363383, + "loss": 2.6375, + "step": 4617 + }, + { + "epoch": 0.37268985554031153, + "grad_norm": 0.7511888742446899, + "learning_rate": 0.00017562380796807956, + "loss": 2.6823, + "step": 4618 + }, + { + "epoch": 0.3727705592768945, + "grad_norm": 0.7273457646369934, + "learning_rate": 0.00017561347771311608, + "loss": 2.6124, + "step": 4619 + }, + { + "epoch": 0.37285126301347754, + "grad_norm": 0.689440131187439, + "learning_rate": 0.0001756031455736671, + "loss": 2.6931, + "step": 4620 + }, + { + "epoch": 0.3729319667500605, + "grad_norm": 0.7755659222602844, + "learning_rate": 0.00017559281154999013, + "loss": 2.6273, + "step": 4621 + }, + { + "epoch": 0.37301267048664355, + "grad_norm": 0.6940193176269531, + "learning_rate": 0.00017558247564234265, + "loss": 2.641, + "step": 4622 + }, + { + "epoch": 0.3730933742232265, + "grad_norm": 0.7387529015541077, + "learning_rate": 0.00017557213785098232, + "loss": 2.7229, + "step": 4623 + }, + { + "epoch": 0.37317407795980956, + "grad_norm": 0.6807727217674255, + "learning_rate": 0.00017556179817616678, + "loss": 2.6469, + "step": 4624 + }, + { + "epoch": 0.37325478169639253, + "grad_norm": 0.7203819751739502, + "learning_rate": 0.0001755514566181537, + "loss": 2.6239, + "step": 4625 + }, + { + "epoch": 0.37333548543297557, + "grad_norm": 0.9345876574516296, + "learning_rate": 0.0001755411131772008, + "loss": 2.7154, + "step": 4626 + }, + { + "epoch": 0.37341618916955854, + "grad_norm": 0.6787357330322266, + "learning_rate": 0.00017553076785356594, + "loss": 2.6374, + "step": 4627 + }, + { + "epoch": 0.3734968929061416, + "grad_norm": 0.7153670191764832, + "learning_rate": 0.0001755204206475069, + "loss": 2.6734, + "step": 4628 + }, + { + "epoch": 0.37357759664272455, + "grad_norm": 0.736464262008667, + "learning_rate": 0.00017551007155928154, + "loss": 2.7241, + "step": 4629 + }, + { + "epoch": 0.3736583003793076, + "grad_norm": 0.7134939432144165, + "learning_rate": 0.0001754997205891478, + "loss": 2.682, + "step": 4630 + }, + { + "epoch": 0.37373900411589056, + "grad_norm": 0.7071199417114258, + "learning_rate": 0.0001754893677373637, + "loss": 2.7361, + "step": 4631 + }, + { + "epoch": 0.3738197078524736, + "grad_norm": 0.7040621638298035, + "learning_rate": 0.00017547901300418722, + "loss": 2.7031, + "step": 4632 + }, + { + "epoch": 0.37390041158905657, + "grad_norm": 0.7179287075996399, + "learning_rate": 0.00017546865638987642, + "loss": 2.6755, + "step": 4633 + }, + { + "epoch": 0.3739811153256396, + "grad_norm": 0.7579259276390076, + "learning_rate": 0.00017545829789468944, + "loss": 2.6514, + "step": 4634 + }, + { + "epoch": 0.3740618190622226, + "grad_norm": 0.7825835347175598, + "learning_rate": 0.0001754479375188844, + "loss": 2.6876, + "step": 4635 + }, + { + "epoch": 0.3741425227988056, + "grad_norm": 0.7913421988487244, + "learning_rate": 0.00017543757526271956, + "loss": 2.7153, + "step": 4636 + }, + { + "epoch": 0.3742232265353886, + "grad_norm": 0.7766042947769165, + "learning_rate": 0.00017542721112645313, + "loss": 2.645, + "step": 4637 + }, + { + "epoch": 0.3743039302719716, + "grad_norm": 0.7363953590393066, + "learning_rate": 0.00017541684511034343, + "loss": 2.6376, + "step": 4638 + }, + { + "epoch": 0.3743846340085546, + "grad_norm": 0.6928617358207703, + "learning_rate": 0.00017540647721464881, + "loss": 2.6882, + "step": 4639 + }, + { + "epoch": 0.3744653377451376, + "grad_norm": 0.7832257747650146, + "learning_rate": 0.0001753961074396277, + "loss": 2.7305, + "step": 4640 + }, + { + "epoch": 0.3745460414817206, + "grad_norm": 0.7180350422859192, + "learning_rate": 0.00017538573578553844, + "loss": 2.6783, + "step": 4641 + }, + { + "epoch": 0.3746267452183036, + "grad_norm": 0.718209981918335, + "learning_rate": 0.00017537536225263964, + "loss": 2.6961, + "step": 4642 + }, + { + "epoch": 0.3747074489548866, + "grad_norm": 0.7056655287742615, + "learning_rate": 0.00017536498684118975, + "loss": 2.7096, + "step": 4643 + }, + { + "epoch": 0.3747881526914696, + "grad_norm": 0.8004828691482544, + "learning_rate": 0.0001753546095514474, + "loss": 2.7168, + "step": 4644 + }, + { + "epoch": 0.3748688564280526, + "grad_norm": 0.7630821466445923, + "learning_rate": 0.0001753442303836712, + "loss": 2.7091, + "step": 4645 + }, + { + "epoch": 0.3749495601646356, + "grad_norm": 0.7539668083190918, + "learning_rate": 0.0001753338493381198, + "loss": 2.651, + "step": 4646 + }, + { + "epoch": 0.37503026390121863, + "grad_norm": 0.7243319749832153, + "learning_rate": 0.000175323466415052, + "loss": 2.6765, + "step": 4647 + }, + { + "epoch": 0.3751109676378016, + "grad_norm": 0.8906281590461731, + "learning_rate": 0.00017531308161472647, + "loss": 2.5938, + "step": 4648 + }, + { + "epoch": 0.37519167137438464, + "grad_norm": 0.787966251373291, + "learning_rate": 0.0001753026949374021, + "loss": 2.6011, + "step": 4649 + }, + { + "epoch": 0.3752723751109676, + "grad_norm": 0.7763915061950684, + "learning_rate": 0.00017529230638333772, + "loss": 2.7197, + "step": 4650 + }, + { + "epoch": 0.37535307884755065, + "grad_norm": 0.7717103362083435, + "learning_rate": 0.00017528191595279224, + "loss": 2.6605, + "step": 4651 + }, + { + "epoch": 0.3754337825841336, + "grad_norm": 0.7340055108070374, + "learning_rate": 0.00017527152364602464, + "loss": 2.6856, + "step": 4652 + }, + { + "epoch": 0.37551448632071666, + "grad_norm": 0.7805169820785522, + "learning_rate": 0.0001752611294632939, + "loss": 2.7088, + "step": 4653 + }, + { + "epoch": 0.37559519005729963, + "grad_norm": 0.7894891500473022, + "learning_rate": 0.00017525073340485912, + "loss": 2.6691, + "step": 4654 + }, + { + "epoch": 0.37567589379388266, + "grad_norm": 0.7627872824668884, + "learning_rate": 0.0001752403354709793, + "loss": 2.6536, + "step": 4655 + }, + { + "epoch": 0.37575659753046564, + "grad_norm": 0.8097225427627563, + "learning_rate": 0.00017522993566191367, + "loss": 2.7108, + "step": 4656 + }, + { + "epoch": 0.3758373012670487, + "grad_norm": 0.834449827671051, + "learning_rate": 0.00017521953397792137, + "loss": 2.7565, + "step": 4657 + }, + { + "epoch": 0.37591800500363165, + "grad_norm": 0.7924147844314575, + "learning_rate": 0.00017520913041926166, + "loss": 2.7101, + "step": 4658 + }, + { + "epoch": 0.3759987087402147, + "grad_norm": 0.7407249808311462, + "learning_rate": 0.00017519872498619385, + "loss": 2.6501, + "step": 4659 + }, + { + "epoch": 0.37607941247679766, + "grad_norm": 0.7251791954040527, + "learning_rate": 0.0001751883176789772, + "loss": 2.6786, + "step": 4660 + }, + { + "epoch": 0.3761601162133807, + "grad_norm": 0.7120431661605835, + "learning_rate": 0.00017517790849787116, + "loss": 2.7244, + "step": 4661 + }, + { + "epoch": 0.37624081994996367, + "grad_norm": 0.724836528301239, + "learning_rate": 0.00017516749744313513, + "loss": 2.7099, + "step": 4662 + }, + { + "epoch": 0.3763215236865467, + "grad_norm": 0.7788939476013184, + "learning_rate": 0.00017515708451502855, + "loss": 2.6206, + "step": 4663 + }, + { + "epoch": 0.3764022274231297, + "grad_norm": 0.7518914341926575, + "learning_rate": 0.00017514666971381099, + "loss": 2.7505, + "step": 4664 + }, + { + "epoch": 0.3764829311597127, + "grad_norm": 0.8004730939865112, + "learning_rate": 0.00017513625303974194, + "loss": 2.6119, + "step": 4665 + }, + { + "epoch": 0.3765636348962957, + "grad_norm": 0.7661109566688538, + "learning_rate": 0.00017512583449308107, + "loss": 2.724, + "step": 4666 + }, + { + "epoch": 0.3766443386328787, + "grad_norm": 0.7669692635536194, + "learning_rate": 0.00017511541407408805, + "loss": 2.7109, + "step": 4667 + }, + { + "epoch": 0.3767250423694617, + "grad_norm": 0.738608181476593, + "learning_rate": 0.00017510499178302253, + "loss": 2.6642, + "step": 4668 + }, + { + "epoch": 0.3768057461060447, + "grad_norm": 0.7194661498069763, + "learning_rate": 0.00017509456762014432, + "loss": 2.6906, + "step": 4669 + }, + { + "epoch": 0.3768864498426277, + "grad_norm": 0.7025040984153748, + "learning_rate": 0.00017508414158571314, + "loss": 2.6596, + "step": 4670 + }, + { + "epoch": 0.37696715357921073, + "grad_norm": 0.7756575345993042, + "learning_rate": 0.00017507371367998892, + "loss": 2.7114, + "step": 4671 + }, + { + "epoch": 0.3770478573157937, + "grad_norm": 0.834966778755188, + "learning_rate": 0.00017506328390323148, + "loss": 2.7554, + "step": 4672 + }, + { + "epoch": 0.37712856105237674, + "grad_norm": 0.6997280120849609, + "learning_rate": 0.0001750528522557008, + "loss": 2.6285, + "step": 4673 + }, + { + "epoch": 0.3772092647889597, + "grad_norm": 0.7101716995239258, + "learning_rate": 0.0001750424187376569, + "loss": 2.6465, + "step": 4674 + }, + { + "epoch": 0.37728996852554275, + "grad_norm": 0.6577222347259521, + "learning_rate": 0.0001750319833493597, + "loss": 2.6372, + "step": 4675 + }, + { + "epoch": 0.37737067226212573, + "grad_norm": 0.7402529120445251, + "learning_rate": 0.00017502154609106937, + "loss": 2.6464, + "step": 4676 + }, + { + "epoch": 0.37745137599870876, + "grad_norm": 0.6858490705490112, + "learning_rate": 0.00017501110696304596, + "loss": 2.6141, + "step": 4677 + }, + { + "epoch": 0.37753207973529174, + "grad_norm": 0.729468822479248, + "learning_rate": 0.0001750006659655497, + "loss": 2.6671, + "step": 4678 + }, + { + "epoch": 0.37761278347187477, + "grad_norm": 0.7197559475898743, + "learning_rate": 0.0001749902230988408, + "loss": 2.6462, + "step": 4679 + }, + { + "epoch": 0.37769348720845775, + "grad_norm": 0.7171144485473633, + "learning_rate": 0.00017497977836317957, + "loss": 2.6427, + "step": 4680 + }, + { + "epoch": 0.3777741909450408, + "grad_norm": 0.7423805594444275, + "learning_rate": 0.00017496933175882617, + "loss": 2.662, + "step": 4681 + }, + { + "epoch": 0.37785489468162375, + "grad_norm": 0.7498061060905457, + "learning_rate": 0.0001749588832860411, + "loss": 2.6243, + "step": 4682 + }, + { + "epoch": 0.3779355984182068, + "grad_norm": 0.7706165909767151, + "learning_rate": 0.0001749484329450847, + "loss": 2.6928, + "step": 4683 + }, + { + "epoch": 0.37801630215478976, + "grad_norm": 0.723363995552063, + "learning_rate": 0.00017493798073621745, + "loss": 2.6787, + "step": 4684 + }, + { + "epoch": 0.3780970058913728, + "grad_norm": 0.7444875836372375, + "learning_rate": 0.00017492752665969983, + "loss": 2.6789, + "step": 4685 + }, + { + "epoch": 0.37817770962795577, + "grad_norm": 0.6946491599082947, + "learning_rate": 0.00017491707071579237, + "loss": 2.6761, + "step": 4686 + }, + { + "epoch": 0.3782584133645388, + "grad_norm": 0.7171412706375122, + "learning_rate": 0.00017490661290475568, + "loss": 2.6788, + "step": 4687 + }, + { + "epoch": 0.3783391171011218, + "grad_norm": 0.7503272891044617, + "learning_rate": 0.00017489615322685038, + "loss": 2.7057, + "step": 4688 + }, + { + "epoch": 0.3784198208377048, + "grad_norm": 0.7458747625350952, + "learning_rate": 0.00017488569168233714, + "loss": 2.6857, + "step": 4689 + }, + { + "epoch": 0.3785005245742878, + "grad_norm": 0.7030516266822815, + "learning_rate": 0.0001748752282714768, + "loss": 2.6522, + "step": 4690 + }, + { + "epoch": 0.3785812283108708, + "grad_norm": 0.7717545628547668, + "learning_rate": 0.00017486476299452994, + "loss": 2.6527, + "step": 4691 + }, + { + "epoch": 0.3786619320474538, + "grad_norm": 0.6788322925567627, + "learning_rate": 0.0001748542958517575, + "loss": 2.6362, + "step": 4692 + }, + { + "epoch": 0.3787426357840368, + "grad_norm": 0.8518630266189575, + "learning_rate": 0.0001748438268434204, + "loss": 2.6812, + "step": 4693 + }, + { + "epoch": 0.3788233395206198, + "grad_norm": 0.7167141437530518, + "learning_rate": 0.00017483335596977945, + "loss": 2.6414, + "step": 4694 + }, + { + "epoch": 0.3789040432572028, + "grad_norm": 0.7748053073883057, + "learning_rate": 0.00017482288323109567, + "loss": 2.7291, + "step": 4695 + }, + { + "epoch": 0.3789847469937858, + "grad_norm": 0.7203041911125183, + "learning_rate": 0.00017481240862763002, + "loss": 2.6957, + "step": 4696 + }, + { + "epoch": 0.3790654507303688, + "grad_norm": 0.7973119020462036, + "learning_rate": 0.00017480193215964362, + "loss": 2.7456, + "step": 4697 + }, + { + "epoch": 0.3791461544669518, + "grad_norm": 0.7851223945617676, + "learning_rate": 0.00017479145382739755, + "loss": 2.6525, + "step": 4698 + }, + { + "epoch": 0.3792268582035348, + "grad_norm": 0.7012068629264832, + "learning_rate": 0.0001747809736311529, + "loss": 2.6662, + "step": 4699 + }, + { + "epoch": 0.37930756194011783, + "grad_norm": 0.7266128659248352, + "learning_rate": 0.00017477049157117093, + "loss": 2.5853, + "step": 4700 + }, + { + "epoch": 0.3793882656767008, + "grad_norm": 0.7264416217803955, + "learning_rate": 0.00017476000764771285, + "loss": 2.6972, + "step": 4701 + }, + { + "epoch": 0.37946896941328384, + "grad_norm": 0.797709047794342, + "learning_rate": 0.00017474952186103995, + "loss": 2.6997, + "step": 4702 + }, + { + "epoch": 0.3795496731498668, + "grad_norm": 0.7552568912506104, + "learning_rate": 0.00017473903421141358, + "loss": 2.7178, + "step": 4703 + }, + { + "epoch": 0.37963037688644985, + "grad_norm": 0.7611108422279358, + "learning_rate": 0.0001747285446990951, + "loss": 2.6997, + "step": 4704 + }, + { + "epoch": 0.3797110806230328, + "grad_norm": 0.8081753253936768, + "learning_rate": 0.00017471805332434595, + "loss": 2.7242, + "step": 4705 + }, + { + "epoch": 0.37979178435961586, + "grad_norm": 0.728301465511322, + "learning_rate": 0.0001747075600874276, + "loss": 2.5885, + "step": 4706 + }, + { + "epoch": 0.37987248809619883, + "grad_norm": 0.7548539638519287, + "learning_rate": 0.00017469706498860155, + "loss": 2.7038, + "step": 4707 + }, + { + "epoch": 0.37995319183278187, + "grad_norm": 0.7054354548454285, + "learning_rate": 0.00017468656802812938, + "loss": 2.6566, + "step": 4708 + }, + { + "epoch": 0.38003389556936484, + "grad_norm": 0.7231585383415222, + "learning_rate": 0.0001746760692062727, + "loss": 2.6564, + "step": 4709 + }, + { + "epoch": 0.3801145993059479, + "grad_norm": 0.6931934952735901, + "learning_rate": 0.00017466556852329318, + "loss": 2.6403, + "step": 4710 + }, + { + "epoch": 0.38019530304253085, + "grad_norm": 0.7882393598556519, + "learning_rate": 0.00017465506597945255, + "loss": 2.6337, + "step": 4711 + }, + { + "epoch": 0.3802760067791139, + "grad_norm": 0.7015109658241272, + "learning_rate": 0.0001746445615750125, + "loss": 2.6742, + "step": 4712 + }, + { + "epoch": 0.38035671051569686, + "grad_norm": 0.7653505802154541, + "learning_rate": 0.0001746340553102348, + "loss": 2.6742, + "step": 4713 + }, + { + "epoch": 0.3804374142522799, + "grad_norm": 0.7166270613670349, + "learning_rate": 0.0001746235471853814, + "loss": 2.5995, + "step": 4714 + }, + { + "epoch": 0.38051811798886287, + "grad_norm": 0.7612236738204956, + "learning_rate": 0.0001746130372007141, + "loss": 2.7595, + "step": 4715 + }, + { + "epoch": 0.3805988217254459, + "grad_norm": 0.6783852577209473, + "learning_rate": 0.00017460252535649493, + "loss": 2.6156, + "step": 4716 + }, + { + "epoch": 0.3806795254620289, + "grad_norm": 0.7495827078819275, + "learning_rate": 0.00017459201165298578, + "loss": 2.6847, + "step": 4717 + }, + { + "epoch": 0.3807602291986119, + "grad_norm": 0.814798891544342, + "learning_rate": 0.0001745814960904487, + "loss": 2.6211, + "step": 4718 + }, + { + "epoch": 0.3808409329351949, + "grad_norm": 0.7541367411613464, + "learning_rate": 0.0001745709786691458, + "loss": 2.6214, + "step": 4719 + }, + { + "epoch": 0.3809216366717779, + "grad_norm": 0.7065702676773071, + "learning_rate": 0.00017456045938933921, + "loss": 2.6699, + "step": 4720 + }, + { + "epoch": 0.3810023404083609, + "grad_norm": 0.751960813999176, + "learning_rate": 0.000174549938251291, + "loss": 2.6085, + "step": 4721 + }, + { + "epoch": 0.3810830441449439, + "grad_norm": 0.72068190574646, + "learning_rate": 0.00017453941525526353, + "loss": 2.6201, + "step": 4722 + }, + { + "epoch": 0.3811637478815269, + "grad_norm": 0.7201167941093445, + "learning_rate": 0.00017452889040151892, + "loss": 2.6775, + "step": 4723 + }, + { + "epoch": 0.38124445161810994, + "grad_norm": 0.7904958128929138, + "learning_rate": 0.00017451836369031956, + "loss": 2.7217, + "step": 4724 + }, + { + "epoch": 0.3813251553546929, + "grad_norm": 0.7096366882324219, + "learning_rate": 0.0001745078351219278, + "loss": 2.7004, + "step": 4725 + }, + { + "epoch": 0.38140585909127594, + "grad_norm": 0.6812441945075989, + "learning_rate": 0.00017449730469660602, + "loss": 2.6555, + "step": 4726 + }, + { + "epoch": 0.3814865628278589, + "grad_norm": 0.8037428855895996, + "learning_rate": 0.00017448677241461665, + "loss": 2.7094, + "step": 4727 + }, + { + "epoch": 0.38156726656444195, + "grad_norm": 0.7282679677009583, + "learning_rate": 0.00017447623827622223, + "loss": 2.6699, + "step": 4728 + }, + { + "epoch": 0.38164797030102493, + "grad_norm": 0.745705783367157, + "learning_rate": 0.00017446570228168523, + "loss": 2.6098, + "step": 4729 + }, + { + "epoch": 0.38172867403760796, + "grad_norm": 0.7098714113235474, + "learning_rate": 0.00017445516443126828, + "loss": 2.6628, + "step": 4730 + }, + { + "epoch": 0.38180937777419094, + "grad_norm": 0.7376620769500732, + "learning_rate": 0.00017444462472523405, + "loss": 2.7086, + "step": 4731 + }, + { + "epoch": 0.38189008151077397, + "grad_norm": 0.717800498008728, + "learning_rate": 0.00017443408316384512, + "loss": 2.6582, + "step": 4732 + }, + { + "epoch": 0.38197078524735695, + "grad_norm": 0.7061530947685242, + "learning_rate": 0.00017442353974736428, + "loss": 2.6817, + "step": 4733 + }, + { + "epoch": 0.38205148898394, + "grad_norm": 0.744667112827301, + "learning_rate": 0.0001744129944760543, + "loss": 2.6649, + "step": 4734 + }, + { + "epoch": 0.38213219272052296, + "grad_norm": 0.7302529215812683, + "learning_rate": 0.00017440244735017797, + "loss": 2.7313, + "step": 4735 + }, + { + "epoch": 0.382212896457106, + "grad_norm": 0.6845258474349976, + "learning_rate": 0.00017439189836999816, + "loss": 2.637, + "step": 4736 + }, + { + "epoch": 0.38229360019368896, + "grad_norm": 0.7060490250587463, + "learning_rate": 0.0001743813475357778, + "loss": 2.6674, + "step": 4737 + }, + { + "epoch": 0.382374303930272, + "grad_norm": 0.7146841287612915, + "learning_rate": 0.00017437079484777977, + "loss": 2.6607, + "step": 4738 + }, + { + "epoch": 0.382455007666855, + "grad_norm": 0.7107662558555603, + "learning_rate": 0.00017436024030626719, + "loss": 2.6777, + "step": 4739 + }, + { + "epoch": 0.382535711403438, + "grad_norm": 0.7356777191162109, + "learning_rate": 0.00017434968391150303, + "loss": 2.5801, + "step": 4740 + }, + { + "epoch": 0.382616415140021, + "grad_norm": 0.6839054226875305, + "learning_rate": 0.00017433912566375037, + "loss": 2.6319, + "step": 4741 + }, + { + "epoch": 0.382697118876604, + "grad_norm": 0.7049627900123596, + "learning_rate": 0.00017432856556327236, + "loss": 2.741, + "step": 4742 + }, + { + "epoch": 0.382777822613187, + "grad_norm": 0.7926551103591919, + "learning_rate": 0.00017431800361033224, + "loss": 2.64, + "step": 4743 + }, + { + "epoch": 0.38285852634976997, + "grad_norm": 0.734272301197052, + "learning_rate": 0.0001743074398051932, + "loss": 2.6575, + "step": 4744 + }, + { + "epoch": 0.382939230086353, + "grad_norm": 0.6959543824195862, + "learning_rate": 0.00017429687414811847, + "loss": 2.664, + "step": 4745 + }, + { + "epoch": 0.383019933822936, + "grad_norm": 0.7258255481719971, + "learning_rate": 0.00017428630663937148, + "loss": 2.6597, + "step": 4746 + }, + { + "epoch": 0.383100637559519, + "grad_norm": 0.8067473769187927, + "learning_rate": 0.0001742757372792155, + "loss": 2.6798, + "step": 4747 + }, + { + "epoch": 0.383181341296102, + "grad_norm": 0.7000626921653748, + "learning_rate": 0.000174265166067914, + "loss": 2.6561, + "step": 4748 + }, + { + "epoch": 0.383262045032685, + "grad_norm": 0.818914532661438, + "learning_rate": 0.00017425459300573045, + "loss": 2.6491, + "step": 4749 + }, + { + "epoch": 0.383342748769268, + "grad_norm": 0.7060543298721313, + "learning_rate": 0.00017424401809292833, + "loss": 2.6825, + "step": 4750 + }, + { + "epoch": 0.383423452505851, + "grad_norm": 0.893488883972168, + "learning_rate": 0.0001742334413297712, + "loss": 2.7201, + "step": 4751 + }, + { + "epoch": 0.383504156242434, + "grad_norm": 0.8131078481674194, + "learning_rate": 0.00017422286271652265, + "loss": 2.7828, + "step": 4752 + }, + { + "epoch": 0.38358485997901703, + "grad_norm": 0.7735587954521179, + "learning_rate": 0.00017421228225344634, + "loss": 2.6489, + "step": 4753 + }, + { + "epoch": 0.3836655637156, + "grad_norm": 0.713800311088562, + "learning_rate": 0.000174201699940806, + "loss": 2.6686, + "step": 4754 + }, + { + "epoch": 0.38374626745218304, + "grad_norm": 0.8246580362319946, + "learning_rate": 0.00017419111577886528, + "loss": 2.6771, + "step": 4755 + }, + { + "epoch": 0.383826971188766, + "grad_norm": 0.694542646408081, + "learning_rate": 0.00017418052976788805, + "loss": 2.6632, + "step": 4756 + }, + { + "epoch": 0.38390767492534905, + "grad_norm": 0.7200453281402588, + "learning_rate": 0.0001741699419081381, + "loss": 2.6386, + "step": 4757 + }, + { + "epoch": 0.38398837866193203, + "grad_norm": 0.7002073526382446, + "learning_rate": 0.00017415935219987933, + "loss": 2.6399, + "step": 4758 + }, + { + "epoch": 0.38406908239851506, + "grad_norm": 0.7056967616081238, + "learning_rate": 0.00017414876064337565, + "loss": 2.7048, + "step": 4759 + }, + { + "epoch": 0.38414978613509804, + "grad_norm": 0.7406448721885681, + "learning_rate": 0.000174138167238891, + "loss": 2.6256, + "step": 4760 + }, + { + "epoch": 0.38423048987168107, + "grad_norm": 0.7280529737472534, + "learning_rate": 0.00017412757198668945, + "loss": 2.6393, + "step": 4761 + }, + { + "epoch": 0.38431119360826405, + "grad_norm": 0.7626908421516418, + "learning_rate": 0.00017411697488703502, + "loss": 2.6717, + "step": 4762 + }, + { + "epoch": 0.3843918973448471, + "grad_norm": 0.716345489025116, + "learning_rate": 0.00017410637594019184, + "loss": 2.6457, + "step": 4763 + }, + { + "epoch": 0.38447260108143005, + "grad_norm": 0.8825077414512634, + "learning_rate": 0.00017409577514642405, + "loss": 2.7042, + "step": 4764 + }, + { + "epoch": 0.3845533048180131, + "grad_norm": 0.7301186919212341, + "learning_rate": 0.00017408517250599585, + "loss": 2.7065, + "step": 4765 + }, + { + "epoch": 0.38463400855459606, + "grad_norm": 0.8235788345336914, + "learning_rate": 0.0001740745680191715, + "loss": 2.6315, + "step": 4766 + }, + { + "epoch": 0.3847147122911791, + "grad_norm": 0.7355515956878662, + "learning_rate": 0.00017406396168621527, + "loss": 2.6939, + "step": 4767 + }, + { + "epoch": 0.38479541602776207, + "grad_norm": 0.6781682372093201, + "learning_rate": 0.0001740533535073915, + "loss": 2.6071, + "step": 4768 + }, + { + "epoch": 0.3848761197643451, + "grad_norm": 0.801191508769989, + "learning_rate": 0.0001740427434829646, + "loss": 2.6635, + "step": 4769 + }, + { + "epoch": 0.3849568235009281, + "grad_norm": 0.759682297706604, + "learning_rate": 0.00017403213161319903, + "loss": 2.6823, + "step": 4770 + }, + { + "epoch": 0.3850375272375111, + "grad_norm": 0.806498110294342, + "learning_rate": 0.00017402151789835916, + "loss": 2.7111, + "step": 4771 + }, + { + "epoch": 0.3851182309740941, + "grad_norm": 0.7677996158599854, + "learning_rate": 0.00017401090233870958, + "loss": 2.6701, + "step": 4772 + }, + { + "epoch": 0.3851989347106771, + "grad_norm": 0.7449933290481567, + "learning_rate": 0.00017400028493451487, + "loss": 2.7037, + "step": 4773 + }, + { + "epoch": 0.3852796384472601, + "grad_norm": 0.7506107091903687, + "learning_rate": 0.0001739896656860396, + "loss": 2.6587, + "step": 4774 + }, + { + "epoch": 0.38536034218384313, + "grad_norm": 0.8781036734580994, + "learning_rate": 0.00017397904459354844, + "loss": 2.7634, + "step": 4775 + }, + { + "epoch": 0.3854410459204261, + "grad_norm": 0.7067514657974243, + "learning_rate": 0.0001739684216573061, + "loss": 2.638, + "step": 4776 + }, + { + "epoch": 0.38552174965700914, + "grad_norm": 0.7742886543273926, + "learning_rate": 0.00017395779687757735, + "loss": 2.7043, + "step": 4777 + }, + { + "epoch": 0.3856024533935921, + "grad_norm": 0.7348291277885437, + "learning_rate": 0.00017394717025462697, + "loss": 2.7404, + "step": 4778 + }, + { + "epoch": 0.38568315713017515, + "grad_norm": 0.7449346780776978, + "learning_rate": 0.00017393654178871984, + "loss": 2.631, + "step": 4779 + }, + { + "epoch": 0.3857638608667581, + "grad_norm": 0.7191200256347656, + "learning_rate": 0.00017392591148012078, + "loss": 2.6776, + "step": 4780 + }, + { + "epoch": 0.38584456460334116, + "grad_norm": 0.7055533528327942, + "learning_rate": 0.00017391527932909476, + "loss": 2.6219, + "step": 4781 + }, + { + "epoch": 0.38592526833992413, + "grad_norm": 0.73755943775177, + "learning_rate": 0.0001739046453359068, + "loss": 2.6692, + "step": 4782 + }, + { + "epoch": 0.38600597207650716, + "grad_norm": 0.7469369769096375, + "learning_rate": 0.00017389400950082185, + "loss": 2.6572, + "step": 4783 + }, + { + "epoch": 0.38608667581309014, + "grad_norm": 0.7552534341812134, + "learning_rate": 0.00017388337182410504, + "loss": 2.6853, + "step": 4784 + }, + { + "epoch": 0.3861673795496732, + "grad_norm": 0.7453532814979553, + "learning_rate": 0.00017387273230602145, + "loss": 2.6601, + "step": 4785 + }, + { + "epoch": 0.38624808328625615, + "grad_norm": 0.7259301543235779, + "learning_rate": 0.0001738620909468363, + "loss": 2.6997, + "step": 4786 + }, + { + "epoch": 0.3863287870228392, + "grad_norm": 0.6970019936561584, + "learning_rate": 0.00017385144774681476, + "loss": 2.7497, + "step": 4787 + }, + { + "epoch": 0.38640949075942216, + "grad_norm": 0.7172032594680786, + "learning_rate": 0.00017384080270622208, + "loss": 2.7182, + "step": 4788 + }, + { + "epoch": 0.3864901944960052, + "grad_norm": 0.7184371948242188, + "learning_rate": 0.00017383015582532357, + "loss": 2.6358, + "step": 4789 + }, + { + "epoch": 0.38657089823258817, + "grad_norm": 0.7302096486091614, + "learning_rate": 0.00017381950710438458, + "loss": 2.6066, + "step": 4790 + }, + { + "epoch": 0.3866516019691712, + "grad_norm": 0.7043540477752686, + "learning_rate": 0.00017380885654367053, + "loss": 2.699, + "step": 4791 + }, + { + "epoch": 0.3867323057057542, + "grad_norm": 0.6919732689857483, + "learning_rate": 0.0001737982041434468, + "loss": 2.6025, + "step": 4792 + }, + { + "epoch": 0.3868130094423372, + "grad_norm": 0.7277705669403076, + "learning_rate": 0.00017378754990397894, + "loss": 2.6764, + "step": 4793 + }, + { + "epoch": 0.3868937131789202, + "grad_norm": 0.7546190619468689, + "learning_rate": 0.00017377689382553247, + "loss": 2.5865, + "step": 4794 + }, + { + "epoch": 0.38697441691550316, + "grad_norm": 0.7636401653289795, + "learning_rate": 0.00017376623590837294, + "loss": 2.6488, + "step": 4795 + }, + { + "epoch": 0.3870551206520862, + "grad_norm": 0.6945658922195435, + "learning_rate": 0.00017375557615276595, + "loss": 2.6739, + "step": 4796 + }, + { + "epoch": 0.38713582438866917, + "grad_norm": 0.7503637075424194, + "learning_rate": 0.00017374491455897722, + "loss": 2.6854, + "step": 4797 + }, + { + "epoch": 0.3872165281252522, + "grad_norm": 0.7457373142242432, + "learning_rate": 0.00017373425112727247, + "loss": 2.6659, + "step": 4798 + }, + { + "epoch": 0.3872972318618352, + "grad_norm": 0.7742534875869751, + "learning_rate": 0.0001737235858579174, + "loss": 2.6461, + "step": 4799 + }, + { + "epoch": 0.3873779355984182, + "grad_norm": 0.7397909760475159, + "learning_rate": 0.0001737129187511779, + "loss": 2.6779, + "step": 4800 + }, + { + "epoch": 0.3874586393350012, + "grad_norm": 0.7922031879425049, + "learning_rate": 0.00017370224980731974, + "loss": 2.6417, + "step": 4801 + }, + { + "epoch": 0.3875393430715842, + "grad_norm": 0.8503968715667725, + "learning_rate": 0.00017369157902660887, + "loss": 2.7063, + "step": 4802 + }, + { + "epoch": 0.3876200468081672, + "grad_norm": 0.7143701314926147, + "learning_rate": 0.00017368090640931125, + "loss": 2.6152, + "step": 4803 + }, + { + "epoch": 0.38770075054475023, + "grad_norm": 0.8016753196716309, + "learning_rate": 0.0001736702319556928, + "loss": 2.6005, + "step": 4804 + }, + { + "epoch": 0.3877814542813332, + "grad_norm": 0.7329538464546204, + "learning_rate": 0.00017365955566601962, + "loss": 2.6027, + "step": 4805 + }, + { + "epoch": 0.38786215801791624, + "grad_norm": 0.7005148530006409, + "learning_rate": 0.00017364887754055773, + "loss": 2.6585, + "step": 4806 + }, + { + "epoch": 0.3879428617544992, + "grad_norm": 0.7092769145965576, + "learning_rate": 0.00017363819757957333, + "loss": 2.6763, + "step": 4807 + }, + { + "epoch": 0.38802356549108225, + "grad_norm": 0.7475202679634094, + "learning_rate": 0.0001736275157833325, + "loss": 2.5969, + "step": 4808 + }, + { + "epoch": 0.3881042692276652, + "grad_norm": 0.822496235370636, + "learning_rate": 0.0001736168321521016, + "loss": 2.6758, + "step": 4809 + }, + { + "epoch": 0.38818497296424825, + "grad_norm": 0.7756842374801636, + "learning_rate": 0.0001736061466861467, + "loss": 2.6676, + "step": 4810 + }, + { + "epoch": 0.38826567670083123, + "grad_norm": 0.7192497849464417, + "learning_rate": 0.00017359545938573428, + "loss": 2.7045, + "step": 4811 + }, + { + "epoch": 0.38834638043741426, + "grad_norm": 0.7064149379730225, + "learning_rate": 0.00017358477025113063, + "loss": 2.6169, + "step": 4812 + }, + { + "epoch": 0.38842708417399724, + "grad_norm": 0.7297258973121643, + "learning_rate": 0.00017357407928260215, + "loss": 2.612, + "step": 4813 + }, + { + "epoch": 0.38850778791058027, + "grad_norm": 0.7011935114860535, + "learning_rate": 0.00017356338648041528, + "loss": 2.6507, + "step": 4814 + }, + { + "epoch": 0.38858849164716325, + "grad_norm": 0.7647256255149841, + "learning_rate": 0.00017355269184483651, + "loss": 2.6838, + "step": 4815 + }, + { + "epoch": 0.3886691953837463, + "grad_norm": 0.690182089805603, + "learning_rate": 0.0001735419953761324, + "loss": 2.6996, + "step": 4816 + }, + { + "epoch": 0.38874989912032926, + "grad_norm": 0.7142173647880554, + "learning_rate": 0.00017353129707456955, + "loss": 2.6705, + "step": 4817 + }, + { + "epoch": 0.3888306028569123, + "grad_norm": 0.801369309425354, + "learning_rate": 0.00017352059694041456, + "loss": 2.7002, + "step": 4818 + }, + { + "epoch": 0.38891130659349527, + "grad_norm": 0.7021649479866028, + "learning_rate": 0.0001735098949739341, + "loss": 2.7042, + "step": 4819 + }, + { + "epoch": 0.3889920103300783, + "grad_norm": 0.6802586317062378, + "learning_rate": 0.00017349919117539488, + "loss": 2.7186, + "step": 4820 + }, + { + "epoch": 0.3890727140666613, + "grad_norm": 0.7723212838172913, + "learning_rate": 0.0001734884855450637, + "loss": 2.608, + "step": 4821 + }, + { + "epoch": 0.3891534178032443, + "grad_norm": 0.7037193179130554, + "learning_rate": 0.00017347777808320735, + "loss": 2.6198, + "step": 4822 + }, + { + "epoch": 0.3892341215398273, + "grad_norm": 0.7172731161117554, + "learning_rate": 0.00017346706879009272, + "loss": 2.7037, + "step": 4823 + }, + { + "epoch": 0.3893148252764103, + "grad_norm": 0.7421539425849915, + "learning_rate": 0.00017345635766598667, + "loss": 2.6619, + "step": 4824 + }, + { + "epoch": 0.3893955290129933, + "grad_norm": 0.7587071061134338, + "learning_rate": 0.0001734456447111562, + "loss": 2.6229, + "step": 4825 + }, + { + "epoch": 0.3894762327495763, + "grad_norm": 0.6981459259986877, + "learning_rate": 0.00017343492992586822, + "loss": 2.5927, + "step": 4826 + }, + { + "epoch": 0.3895569364861593, + "grad_norm": 0.7628491520881653, + "learning_rate": 0.00017342421331038987, + "loss": 2.7047, + "step": 4827 + }, + { + "epoch": 0.38963764022274233, + "grad_norm": 0.8005064129829407, + "learning_rate": 0.00017341349486498818, + "loss": 2.6918, + "step": 4828 + }, + { + "epoch": 0.3897183439593253, + "grad_norm": 0.7756431102752686, + "learning_rate": 0.0001734027745899303, + "loss": 2.6621, + "step": 4829 + }, + { + "epoch": 0.38979904769590834, + "grad_norm": 0.7317833304405212, + "learning_rate": 0.00017339205248548338, + "loss": 2.7134, + "step": 4830 + }, + { + "epoch": 0.3898797514324913, + "grad_norm": 0.7293959259986877, + "learning_rate": 0.0001733813285519147, + "loss": 2.6865, + "step": 4831 + }, + { + "epoch": 0.38996045516907435, + "grad_norm": 0.7120299935340881, + "learning_rate": 0.00017337060278949147, + "loss": 2.6915, + "step": 4832 + }, + { + "epoch": 0.3900411589056573, + "grad_norm": 0.7255397439002991, + "learning_rate": 0.00017335987519848103, + "loss": 2.6671, + "step": 4833 + }, + { + "epoch": 0.39012186264224036, + "grad_norm": 0.7849408388137817, + "learning_rate": 0.0001733491457791507, + "loss": 2.6301, + "step": 4834 + }, + { + "epoch": 0.39020256637882333, + "grad_norm": 0.6998472809791565, + "learning_rate": 0.00017333841453176797, + "loss": 2.6587, + "step": 4835 + }, + { + "epoch": 0.39028327011540637, + "grad_norm": 0.7530023455619812, + "learning_rate": 0.00017332768145660024, + "loss": 2.7011, + "step": 4836 + }, + { + "epoch": 0.39036397385198934, + "grad_norm": 0.7251207828521729, + "learning_rate": 0.00017331694655391497, + "loss": 2.6416, + "step": 4837 + }, + { + "epoch": 0.3904446775885724, + "grad_norm": 0.7016854882240295, + "learning_rate": 0.00017330620982397975, + "loss": 2.7224, + "step": 4838 + }, + { + "epoch": 0.39052538132515535, + "grad_norm": 0.7253310084342957, + "learning_rate": 0.00017329547126706217, + "loss": 2.6747, + "step": 4839 + }, + { + "epoch": 0.3906060850617384, + "grad_norm": 0.7114601731300354, + "learning_rate": 0.00017328473088342987, + "loss": 2.6654, + "step": 4840 + }, + { + "epoch": 0.39068678879832136, + "grad_norm": 0.7773289680480957, + "learning_rate": 0.00017327398867335048, + "loss": 2.6625, + "step": 4841 + }, + { + "epoch": 0.3907674925349044, + "grad_norm": 0.7541868686676025, + "learning_rate": 0.00017326324463709175, + "loss": 2.667, + "step": 4842 + }, + { + "epoch": 0.39084819627148737, + "grad_norm": 0.8095890283584595, + "learning_rate": 0.00017325249877492147, + "loss": 2.706, + "step": 4843 + }, + { + "epoch": 0.3909289000080704, + "grad_norm": 0.7019474506378174, + "learning_rate": 0.00017324175108710742, + "loss": 2.6125, + "step": 4844 + }, + { + "epoch": 0.3910096037446534, + "grad_norm": 0.7055396437644958, + "learning_rate": 0.00017323100157391746, + "loss": 2.6373, + "step": 4845 + }, + { + "epoch": 0.39109030748123635, + "grad_norm": 0.7332476377487183, + "learning_rate": 0.00017322025023561955, + "loss": 2.6559, + "step": 4846 + }, + { + "epoch": 0.3911710112178194, + "grad_norm": 0.7740387916564941, + "learning_rate": 0.00017320949707248158, + "loss": 2.7341, + "step": 4847 + }, + { + "epoch": 0.39125171495440236, + "grad_norm": 0.7371044754981995, + "learning_rate": 0.0001731987420847716, + "loss": 2.7318, + "step": 4848 + }, + { + "epoch": 0.3913324186909854, + "grad_norm": 0.7897786498069763, + "learning_rate": 0.00017318798527275758, + "loss": 2.6759, + "step": 4849 + }, + { + "epoch": 0.39141312242756837, + "grad_norm": 0.7149896621704102, + "learning_rate": 0.0001731772266367077, + "loss": 2.7097, + "step": 4850 + }, + { + "epoch": 0.3914938261641514, + "grad_norm": 0.7824358344078064, + "learning_rate": 0.00017316646617689002, + "loss": 2.6376, + "step": 4851 + }, + { + "epoch": 0.3915745299007344, + "grad_norm": 0.7704496383666992, + "learning_rate": 0.00017315570389357272, + "loss": 2.6539, + "step": 4852 + }, + { + "epoch": 0.3916552336373174, + "grad_norm": 0.7489706873893738, + "learning_rate": 0.00017314493978702407, + "loss": 2.6716, + "step": 4853 + }, + { + "epoch": 0.3917359373739004, + "grad_norm": 0.7368690967559814, + "learning_rate": 0.00017313417385751234, + "loss": 2.7171, + "step": 4854 + }, + { + "epoch": 0.3918166411104834, + "grad_norm": 0.7215858697891235, + "learning_rate": 0.00017312340610530579, + "loss": 2.6306, + "step": 4855 + }, + { + "epoch": 0.3918973448470664, + "grad_norm": 0.7622217535972595, + "learning_rate": 0.00017311263653067285, + "loss": 2.6089, + "step": 4856 + }, + { + "epoch": 0.39197804858364943, + "grad_norm": 0.7317889332771301, + "learning_rate": 0.00017310186513388185, + "loss": 2.6831, + "step": 4857 + }, + { + "epoch": 0.3920587523202324, + "grad_norm": 0.894185483455658, + "learning_rate": 0.0001730910919152013, + "loss": 2.684, + "step": 4858 + }, + { + "epoch": 0.39213945605681544, + "grad_norm": 0.7313157916069031, + "learning_rate": 0.00017308031687489968, + "loss": 2.6465, + "step": 4859 + }, + { + "epoch": 0.3922201597933984, + "grad_norm": 0.7765825390815735, + "learning_rate": 0.00017306954001324552, + "loss": 2.6526, + "step": 4860 + }, + { + "epoch": 0.39230086352998145, + "grad_norm": 0.7171424031257629, + "learning_rate": 0.00017305876133050742, + "loss": 2.6212, + "step": 4861 + }, + { + "epoch": 0.3923815672665644, + "grad_norm": 0.7215112447738647, + "learning_rate": 0.000173047980826954, + "loss": 2.6329, + "step": 4862 + }, + { + "epoch": 0.39246227100314746, + "grad_norm": 0.7393578886985779, + "learning_rate": 0.00017303719850285396, + "loss": 2.7264, + "step": 4863 + }, + { + "epoch": 0.39254297473973043, + "grad_norm": 0.7620136737823486, + "learning_rate": 0.00017302641435847603, + "loss": 2.6686, + "step": 4864 + }, + { + "epoch": 0.39262367847631346, + "grad_norm": 0.7290963530540466, + "learning_rate": 0.00017301562839408893, + "loss": 2.578, + "step": 4865 + }, + { + "epoch": 0.39270438221289644, + "grad_norm": 0.6978541612625122, + "learning_rate": 0.00017300484060996153, + "loss": 2.6783, + "step": 4866 + }, + { + "epoch": 0.3927850859494795, + "grad_norm": 0.7212007641792297, + "learning_rate": 0.00017299405100636264, + "loss": 2.6282, + "step": 4867 + }, + { + "epoch": 0.39286578968606245, + "grad_norm": 0.757324755191803, + "learning_rate": 0.0001729832595835612, + "loss": 2.6933, + "step": 4868 + }, + { + "epoch": 0.3929464934226455, + "grad_norm": 0.7052869200706482, + "learning_rate": 0.00017297246634182618, + "loss": 2.7152, + "step": 4869 + }, + { + "epoch": 0.39302719715922846, + "grad_norm": 0.7326259016990662, + "learning_rate": 0.0001729616712814265, + "loss": 2.6792, + "step": 4870 + }, + { + "epoch": 0.3931079008958115, + "grad_norm": 0.7540302276611328, + "learning_rate": 0.00017295087440263128, + "loss": 2.6621, + "step": 4871 + }, + { + "epoch": 0.39318860463239447, + "grad_norm": 0.765454888343811, + "learning_rate": 0.00017294007570570956, + "loss": 2.7049, + "step": 4872 + }, + { + "epoch": 0.3932693083689775, + "grad_norm": 0.7303065061569214, + "learning_rate": 0.0001729292751909305, + "loss": 2.6867, + "step": 4873 + }, + { + "epoch": 0.3933500121055605, + "grad_norm": 0.7049854397773743, + "learning_rate": 0.00017291847285856325, + "loss": 2.7052, + "step": 4874 + }, + { + "epoch": 0.3934307158421435, + "grad_norm": 0.7199053764343262, + "learning_rate": 0.00017290766870887704, + "loss": 2.7195, + "step": 4875 + }, + { + "epoch": 0.3935114195787265, + "grad_norm": 0.7536180019378662, + "learning_rate": 0.00017289686274214118, + "loss": 2.6861, + "step": 4876 + }, + { + "epoch": 0.3935921233153095, + "grad_norm": 0.7295238971710205, + "learning_rate": 0.00017288605495862492, + "loss": 2.6684, + "step": 4877 + }, + { + "epoch": 0.3936728270518925, + "grad_norm": 0.7575719952583313, + "learning_rate": 0.00017287524535859763, + "loss": 2.6439, + "step": 4878 + }, + { + "epoch": 0.3937535307884755, + "grad_norm": 0.678909182548523, + "learning_rate": 0.00017286443394232874, + "loss": 2.6562, + "step": 4879 + }, + { + "epoch": 0.3938342345250585, + "grad_norm": 0.6908892393112183, + "learning_rate": 0.00017285362071008768, + "loss": 2.6364, + "step": 4880 + }, + { + "epoch": 0.39391493826164153, + "grad_norm": 0.7414079904556274, + "learning_rate": 0.00017284280566214397, + "loss": 2.5872, + "step": 4881 + }, + { + "epoch": 0.3939956419982245, + "grad_norm": 0.6824749112129211, + "learning_rate": 0.0001728319887987671, + "loss": 2.641, + "step": 4882 + }, + { + "epoch": 0.39407634573480754, + "grad_norm": 0.6908513903617859, + "learning_rate": 0.0001728211701202267, + "loss": 2.6977, + "step": 4883 + }, + { + "epoch": 0.3941570494713905, + "grad_norm": 0.7214735746383667, + "learning_rate": 0.0001728103496267924, + "loss": 2.5826, + "step": 4884 + }, + { + "epoch": 0.39423775320797355, + "grad_norm": 0.812781572341919, + "learning_rate": 0.00017279952731873385, + "loss": 2.6806, + "step": 4885 + }, + { + "epoch": 0.39431845694455653, + "grad_norm": 0.7610746026039124, + "learning_rate": 0.00017278870319632078, + "loss": 2.6046, + "step": 4886 + }, + { + "epoch": 0.39439916068113956, + "grad_norm": 0.7151652574539185, + "learning_rate": 0.00017277787725982293, + "loss": 2.6543, + "step": 4887 + }, + { + "epoch": 0.39447986441772254, + "grad_norm": 0.7293612360954285, + "learning_rate": 0.00017276704950951017, + "loss": 2.6384, + "step": 4888 + }, + { + "epoch": 0.39456056815430557, + "grad_norm": 0.8138254284858704, + "learning_rate": 0.00017275621994565233, + "loss": 2.7208, + "step": 4889 + }, + { + "epoch": 0.39464127189088855, + "grad_norm": 0.7557196021080017, + "learning_rate": 0.00017274538856851924, + "loss": 2.6571, + "step": 4890 + }, + { + "epoch": 0.3947219756274716, + "grad_norm": 0.7297266721725464, + "learning_rate": 0.00017273455537838097, + "loss": 2.6222, + "step": 4891 + }, + { + "epoch": 0.39480267936405455, + "grad_norm": 0.7838431596755981, + "learning_rate": 0.00017272372037550743, + "loss": 2.782, + "step": 4892 + }, + { + "epoch": 0.3948833831006376, + "grad_norm": 0.7799673676490784, + "learning_rate": 0.00017271288356016866, + "loss": 2.6658, + "step": 4893 + }, + { + "epoch": 0.39496408683722056, + "grad_norm": 0.8495545387268066, + "learning_rate": 0.0001727020449326348, + "loss": 2.6552, + "step": 4894 + }, + { + "epoch": 0.3950447905738036, + "grad_norm": 0.7317770719528198, + "learning_rate": 0.00017269120449317588, + "loss": 2.6616, + "step": 4895 + }, + { + "epoch": 0.39512549431038657, + "grad_norm": 0.7518885731697083, + "learning_rate": 0.00017268036224206217, + "loss": 2.6864, + "step": 4896 + }, + { + "epoch": 0.39520619804696955, + "grad_norm": 0.83487468957901, + "learning_rate": 0.00017266951817956382, + "loss": 2.7535, + "step": 4897 + }, + { + "epoch": 0.3952869017835526, + "grad_norm": 0.7440658211708069, + "learning_rate": 0.00017265867230595113, + "loss": 2.6584, + "step": 4898 + }, + { + "epoch": 0.39536760552013556, + "grad_norm": 0.7060485482215881, + "learning_rate": 0.00017264782462149438, + "loss": 2.6892, + "step": 4899 + }, + { + "epoch": 0.3954483092567186, + "grad_norm": 0.8410428166389465, + "learning_rate": 0.00017263697512646394, + "loss": 2.6425, + "step": 4900 + }, + { + "epoch": 0.39552901299330157, + "grad_norm": 0.757046639919281, + "learning_rate": 0.0001726261238211302, + "loss": 2.6159, + "step": 4901 + }, + { + "epoch": 0.3956097167298846, + "grad_norm": 0.7288908958435059, + "learning_rate": 0.00017261527070576365, + "loss": 2.6753, + "step": 4902 + }, + { + "epoch": 0.3956904204664676, + "grad_norm": 0.8194541335105896, + "learning_rate": 0.0001726044157806347, + "loss": 2.6673, + "step": 4903 + }, + { + "epoch": 0.3957711242030506, + "grad_norm": 0.7957740426063538, + "learning_rate": 0.00017259355904601393, + "loss": 2.6662, + "step": 4904 + }, + { + "epoch": 0.3958518279396336, + "grad_norm": 0.8790122270584106, + "learning_rate": 0.0001725827005021719, + "loss": 2.7513, + "step": 4905 + }, + { + "epoch": 0.3959325316762166, + "grad_norm": 0.7674984335899353, + "learning_rate": 0.00017257184014937924, + "loss": 2.6375, + "step": 4906 + }, + { + "epoch": 0.3960132354127996, + "grad_norm": 0.7250992655754089, + "learning_rate": 0.00017256097798790663, + "loss": 2.63, + "step": 4907 + }, + { + "epoch": 0.3960939391493826, + "grad_norm": 0.8578312397003174, + "learning_rate": 0.00017255011401802475, + "loss": 2.702, + "step": 4908 + }, + { + "epoch": 0.3961746428859656, + "grad_norm": 0.7365253567695618, + "learning_rate": 0.00017253924824000438, + "loss": 2.6156, + "step": 4909 + }, + { + "epoch": 0.39625534662254863, + "grad_norm": 0.7148925065994263, + "learning_rate": 0.00017252838065411633, + "loss": 2.6658, + "step": 4910 + }, + { + "epoch": 0.3963360503591316, + "grad_norm": 0.7517829537391663, + "learning_rate": 0.00017251751126063148, + "loss": 2.6347, + "step": 4911 + }, + { + "epoch": 0.39641675409571464, + "grad_norm": 0.7880864143371582, + "learning_rate": 0.00017250664005982066, + "loss": 2.7045, + "step": 4912 + }, + { + "epoch": 0.3964974578322976, + "grad_norm": 0.7460693120956421, + "learning_rate": 0.00017249576705195482, + "loss": 2.6976, + "step": 4913 + }, + { + "epoch": 0.39657816156888065, + "grad_norm": 0.7179895043373108, + "learning_rate": 0.00017248489223730496, + "loss": 2.6366, + "step": 4914 + }, + { + "epoch": 0.3966588653054636, + "grad_norm": 0.7737421989440918, + "learning_rate": 0.00017247401561614213, + "loss": 2.7116, + "step": 4915 + }, + { + "epoch": 0.39673956904204666, + "grad_norm": 0.8561483025550842, + "learning_rate": 0.0001724631371887374, + "loss": 2.6591, + "step": 4916 + }, + { + "epoch": 0.39682027277862963, + "grad_norm": 0.7616356611251831, + "learning_rate": 0.00017245225695536182, + "loss": 2.6436, + "step": 4917 + }, + { + "epoch": 0.39690097651521267, + "grad_norm": 0.7754645943641663, + "learning_rate": 0.0001724413749162866, + "loss": 2.6699, + "step": 4918 + }, + { + "epoch": 0.39698168025179564, + "grad_norm": 0.800165593624115, + "learning_rate": 0.000172430491071783, + "loss": 2.7155, + "step": 4919 + }, + { + "epoch": 0.3970623839883787, + "grad_norm": 0.8448799848556519, + "learning_rate": 0.00017241960542212223, + "loss": 2.6991, + "step": 4920 + }, + { + "epoch": 0.39714308772496165, + "grad_norm": 0.7106496095657349, + "learning_rate": 0.00017240871796757556, + "loss": 2.628, + "step": 4921 + }, + { + "epoch": 0.3972237914615447, + "grad_norm": 0.7332959175109863, + "learning_rate": 0.00017239782870841436, + "loss": 2.6159, + "step": 4922 + }, + { + "epoch": 0.39730449519812766, + "grad_norm": 0.7573551535606384, + "learning_rate": 0.00017238693764491002, + "loss": 2.67, + "step": 4923 + }, + { + "epoch": 0.3973851989347107, + "grad_norm": 0.7833136320114136, + "learning_rate": 0.00017237604477733399, + "loss": 2.7276, + "step": 4924 + }, + { + "epoch": 0.39746590267129367, + "grad_norm": 0.7233073711395264, + "learning_rate": 0.00017236515010595773, + "loss": 2.6654, + "step": 4925 + }, + { + "epoch": 0.3975466064078767, + "grad_norm": 0.7920324206352234, + "learning_rate": 0.00017235425363105273, + "loss": 2.7611, + "step": 4926 + }, + { + "epoch": 0.3976273101444597, + "grad_norm": 0.7096883058547974, + "learning_rate": 0.00017234335535289063, + "loss": 2.687, + "step": 4927 + }, + { + "epoch": 0.3977080138810427, + "grad_norm": 0.7231960296630859, + "learning_rate": 0.000172332455271743, + "loss": 2.6441, + "step": 4928 + }, + { + "epoch": 0.3977887176176257, + "grad_norm": 0.7852105498313904, + "learning_rate": 0.00017232155338788146, + "loss": 2.5948, + "step": 4929 + }, + { + "epoch": 0.3978694213542087, + "grad_norm": 0.788789689540863, + "learning_rate": 0.0001723106497015778, + "loss": 2.6797, + "step": 4930 + }, + { + "epoch": 0.3979501250907917, + "grad_norm": 0.7082793116569519, + "learning_rate": 0.00017229974421310377, + "loss": 2.6787, + "step": 4931 + }, + { + "epoch": 0.3980308288273747, + "grad_norm": 0.8157992362976074, + "learning_rate": 0.00017228883692273106, + "loss": 2.6367, + "step": 4932 + }, + { + "epoch": 0.3981115325639577, + "grad_norm": 0.7576673030853271, + "learning_rate": 0.00017227792783073157, + "loss": 2.6826, + "step": 4933 + }, + { + "epoch": 0.39819223630054074, + "grad_norm": 0.7225388884544373, + "learning_rate": 0.00017226701693737718, + "loss": 2.668, + "step": 4934 + }, + { + "epoch": 0.3982729400371237, + "grad_norm": 0.7029562592506409, + "learning_rate": 0.00017225610424293985, + "loss": 2.6613, + "step": 4935 + }, + { + "epoch": 0.39835364377370674, + "grad_norm": 0.73081374168396, + "learning_rate": 0.0001722451897476915, + "loss": 2.6378, + "step": 4936 + }, + { + "epoch": 0.3984343475102897, + "grad_norm": 0.744008481502533, + "learning_rate": 0.0001722342734519042, + "loss": 2.6501, + "step": 4937 + }, + { + "epoch": 0.39851505124687275, + "grad_norm": 0.7482618093490601, + "learning_rate": 0.00017222335535584996, + "loss": 2.7287, + "step": 4938 + }, + { + "epoch": 0.39859575498345573, + "grad_norm": 0.6487892866134644, + "learning_rate": 0.00017221243545980093, + "loss": 2.6417, + "step": 4939 + }, + { + "epoch": 0.39867645872003876, + "grad_norm": 0.7894789576530457, + "learning_rate": 0.00017220151376402923, + "loss": 2.7431, + "step": 4940 + }, + { + "epoch": 0.39875716245662174, + "grad_norm": 0.8232294321060181, + "learning_rate": 0.00017219059026880708, + "loss": 2.6824, + "step": 4941 + }, + { + "epoch": 0.39883786619320477, + "grad_norm": 0.6844691634178162, + "learning_rate": 0.00017217966497440668, + "loss": 2.6294, + "step": 4942 + }, + { + "epoch": 0.39891856992978775, + "grad_norm": 0.7245259881019592, + "learning_rate": 0.00017216873788110037, + "loss": 2.6815, + "step": 4943 + }, + { + "epoch": 0.3989992736663708, + "grad_norm": 0.7197226881980896, + "learning_rate": 0.00017215780898916045, + "loss": 2.725, + "step": 4944 + }, + { + "epoch": 0.39907997740295376, + "grad_norm": 0.8391285538673401, + "learning_rate": 0.00017214687829885934, + "loss": 2.6724, + "step": 4945 + }, + { + "epoch": 0.3991606811395368, + "grad_norm": 0.7357564568519592, + "learning_rate": 0.00017213594581046938, + "loss": 2.7052, + "step": 4946 + }, + { + "epoch": 0.39924138487611976, + "grad_norm": 0.7611483931541443, + "learning_rate": 0.00017212501152426312, + "loss": 2.7214, + "step": 4947 + }, + { + "epoch": 0.39932208861270274, + "grad_norm": 0.7314950227737427, + "learning_rate": 0.00017211407544051306, + "loss": 2.6594, + "step": 4948 + }, + { + "epoch": 0.3994027923492858, + "grad_norm": 0.774131178855896, + "learning_rate": 0.00017210313755949169, + "loss": 2.6812, + "step": 4949 + }, + { + "epoch": 0.39948349608586875, + "grad_norm": 0.707003116607666, + "learning_rate": 0.00017209219788147167, + "loss": 2.7334, + "step": 4950 + }, + { + "epoch": 0.3995641998224518, + "grad_norm": 0.8179643154144287, + "learning_rate": 0.0001720812564067256, + "loss": 2.6554, + "step": 4951 + }, + { + "epoch": 0.39964490355903476, + "grad_norm": 0.6572005152702332, + "learning_rate": 0.00017207031313552621, + "loss": 2.6423, + "step": 4952 + }, + { + "epoch": 0.3997256072956178, + "grad_norm": 0.7663072943687439, + "learning_rate": 0.00017205936806814623, + "loss": 2.689, + "step": 4953 + }, + { + "epoch": 0.39980631103220077, + "grad_norm": 0.7351107001304626, + "learning_rate": 0.00017204842120485846, + "loss": 2.631, + "step": 4954 + }, + { + "epoch": 0.3998870147687838, + "grad_norm": 0.7754253149032593, + "learning_rate": 0.00017203747254593564, + "loss": 2.6371, + "step": 4955 + }, + { + "epoch": 0.3999677185053668, + "grad_norm": 0.7471042275428772, + "learning_rate": 0.00017202652209165074, + "loss": 2.6542, + "step": 4956 + }, + { + "epoch": 0.4000484222419498, + "grad_norm": 0.7357343435287476, + "learning_rate": 0.00017201556984227664, + "loss": 2.6226, + "step": 4957 + }, + { + "epoch": 0.4001291259785328, + "grad_norm": 0.8096252679824829, + "learning_rate": 0.00017200461579808626, + "loss": 2.6458, + "step": 4958 + }, + { + "epoch": 0.4002098297151158, + "grad_norm": 0.7622970938682556, + "learning_rate": 0.0001719936599593526, + "loss": 2.7129, + "step": 4959 + }, + { + "epoch": 0.4002905334516988, + "grad_norm": 0.7374953627586365, + "learning_rate": 0.00017198270232634882, + "loss": 2.696, + "step": 4960 + }, + { + "epoch": 0.4003712371882818, + "grad_norm": 0.7897924184799194, + "learning_rate": 0.00017197174289934787, + "loss": 2.7508, + "step": 4961 + }, + { + "epoch": 0.4004519409248648, + "grad_norm": 0.7047984004020691, + "learning_rate": 0.00017196078167862298, + "loss": 2.6733, + "step": 4962 + }, + { + "epoch": 0.40053264466144783, + "grad_norm": 0.7866294980049133, + "learning_rate": 0.0001719498186644473, + "loss": 2.694, + "step": 4963 + }, + { + "epoch": 0.4006133483980308, + "grad_norm": 0.739923894405365, + "learning_rate": 0.00017193885385709409, + "loss": 2.7125, + "step": 4964 + }, + { + "epoch": 0.40069405213461384, + "grad_norm": 0.7506374716758728, + "learning_rate": 0.00017192788725683652, + "loss": 2.627, + "step": 4965 + }, + { + "epoch": 0.4007747558711968, + "grad_norm": 0.6591607928276062, + "learning_rate": 0.00017191691886394802, + "loss": 2.6723, + "step": 4966 + }, + { + "epoch": 0.40085545960777985, + "grad_norm": 0.7748788595199585, + "learning_rate": 0.00017190594867870192, + "loss": 2.6486, + "step": 4967 + }, + { + "epoch": 0.40093616334436283, + "grad_norm": 0.7518232464790344, + "learning_rate": 0.0001718949767013716, + "loss": 2.6879, + "step": 4968 + }, + { + "epoch": 0.40101686708094586, + "grad_norm": 0.7360039949417114, + "learning_rate": 0.00017188400293223052, + "loss": 2.6506, + "step": 4969 + }, + { + "epoch": 0.40109757081752884, + "grad_norm": 0.7217130064964294, + "learning_rate": 0.0001718730273715522, + "loss": 2.6263, + "step": 4970 + }, + { + "epoch": 0.40117827455411187, + "grad_norm": 0.7246078252792358, + "learning_rate": 0.00017186205001961015, + "loss": 2.6222, + "step": 4971 + }, + { + "epoch": 0.40125897829069485, + "grad_norm": 0.7566879391670227, + "learning_rate": 0.00017185107087667794, + "loss": 2.7003, + "step": 4972 + }, + { + "epoch": 0.4013396820272779, + "grad_norm": 0.7881271243095398, + "learning_rate": 0.00017184008994302924, + "loss": 2.6463, + "step": 4973 + }, + { + "epoch": 0.40142038576386085, + "grad_norm": 0.7307420372962952, + "learning_rate": 0.00017182910721893775, + "loss": 2.667, + "step": 4974 + }, + { + "epoch": 0.4015010895004439, + "grad_norm": 0.7088132500648499, + "learning_rate": 0.00017181812270467708, + "loss": 2.6073, + "step": 4975 + }, + { + "epoch": 0.40158179323702686, + "grad_norm": 0.7839647531509399, + "learning_rate": 0.0001718071364005211, + "loss": 2.6594, + "step": 4976 + }, + { + "epoch": 0.4016624969736099, + "grad_norm": 0.7472013235092163, + "learning_rate": 0.00017179614830674353, + "loss": 2.737, + "step": 4977 + }, + { + "epoch": 0.40174320071019287, + "grad_norm": 0.7241616249084473, + "learning_rate": 0.0001717851584236183, + "loss": 2.6615, + "step": 4978 + }, + { + "epoch": 0.4018239044467759, + "grad_norm": 0.7918941378593445, + "learning_rate": 0.00017177416675141929, + "loss": 2.6774, + "step": 4979 + }, + { + "epoch": 0.4019046081833589, + "grad_norm": 0.801003098487854, + "learning_rate": 0.00017176317329042039, + "loss": 2.6749, + "step": 4980 + }, + { + "epoch": 0.4019853119199419, + "grad_norm": 0.7556802034378052, + "learning_rate": 0.00017175217804089564, + "loss": 2.6197, + "step": 4981 + }, + { + "epoch": 0.4020660156565249, + "grad_norm": 0.7539604902267456, + "learning_rate": 0.00017174118100311904, + "loss": 2.6222, + "step": 4982 + }, + { + "epoch": 0.4021467193931079, + "grad_norm": 0.741436243057251, + "learning_rate": 0.0001717301821773647, + "loss": 2.6471, + "step": 4983 + }, + { + "epoch": 0.4022274231296909, + "grad_norm": 0.7449339628219604, + "learning_rate": 0.0001717191815639067, + "loss": 2.6448, + "step": 4984 + }, + { + "epoch": 0.40230812686627393, + "grad_norm": 0.7771497964859009, + "learning_rate": 0.0001717081791630192, + "loss": 2.673, + "step": 4985 + }, + { + "epoch": 0.4023888306028569, + "grad_norm": 0.6916669607162476, + "learning_rate": 0.00017169717497497646, + "loss": 2.6025, + "step": 4986 + }, + { + "epoch": 0.40246953433943994, + "grad_norm": 0.7373276948928833, + "learning_rate": 0.0001716861690000527, + "loss": 2.6783, + "step": 4987 + }, + { + "epoch": 0.4025502380760229, + "grad_norm": 0.7756158709526062, + "learning_rate": 0.0001716751612385222, + "loss": 2.7296, + "step": 4988 + }, + { + "epoch": 0.40263094181260595, + "grad_norm": 0.7725681066513062, + "learning_rate": 0.00017166415169065933, + "loss": 2.7169, + "step": 4989 + }, + { + "epoch": 0.4027116455491889, + "grad_norm": 0.7165024280548096, + "learning_rate": 0.00017165314035673846, + "loss": 2.677, + "step": 4990 + }, + { + "epoch": 0.40279234928577196, + "grad_norm": 0.8888981938362122, + "learning_rate": 0.00017164212723703404, + "loss": 2.7694, + "step": 4991 + }, + { + "epoch": 0.40287305302235493, + "grad_norm": 0.7439224720001221, + "learning_rate": 0.00017163111233182052, + "loss": 2.674, + "step": 4992 + }, + { + "epoch": 0.40295375675893796, + "grad_norm": 0.6948431730270386, + "learning_rate": 0.00017162009564137244, + "loss": 2.6595, + "step": 4993 + }, + { + "epoch": 0.40303446049552094, + "grad_norm": 0.7274380922317505, + "learning_rate": 0.00017160907716596438, + "loss": 2.649, + "step": 4994 + }, + { + "epoch": 0.403115164232104, + "grad_norm": 0.7127148509025574, + "learning_rate": 0.0001715980569058709, + "loss": 2.6883, + "step": 4995 + }, + { + "epoch": 0.40319586796868695, + "grad_norm": 0.7129155993461609, + "learning_rate": 0.00017158703486136668, + "loss": 2.6516, + "step": 4996 + }, + { + "epoch": 0.40327657170527, + "grad_norm": 0.7848126292228699, + "learning_rate": 0.00017157601103272646, + "loss": 2.6778, + "step": 4997 + }, + { + "epoch": 0.40335727544185296, + "grad_norm": 0.752268373966217, + "learning_rate": 0.0001715649854202249, + "loss": 2.7228, + "step": 4998 + }, + { + "epoch": 0.40343797917843593, + "grad_norm": 0.7750338912010193, + "learning_rate": 0.00017155395802413684, + "loss": 2.6338, + "step": 4999 + }, + { + "epoch": 0.40351868291501897, + "grad_norm": 0.7165457010269165, + "learning_rate": 0.00017154292884473713, + "loss": 2.6195, + "step": 5000 + }, + { + "epoch": 0.40351868291501897, + "eval_loss": 2.585501194000244, + "eval_runtime": 901.8519, + "eval_samples_per_second": 2.905, + "eval_steps_per_second": 0.485, + "step": 5000 + }, + { + "epoch": 0.40359938665160194, + "grad_norm": 0.8118943572044373, + "learning_rate": 0.00017153189788230062, + "loss": 2.6649, + "step": 5001 + }, + { + "epoch": 0.403680090388185, + "grad_norm": 0.722984790802002, + "learning_rate": 0.00017152086513710221, + "loss": 2.6929, + "step": 5002 + }, + { + "epoch": 0.40376079412476795, + "grad_norm": 0.700690507888794, + "learning_rate": 0.00017150983060941686, + "loss": 2.6368, + "step": 5003 + }, + { + "epoch": 0.403841497861351, + "grad_norm": 0.7331504225730896, + "learning_rate": 0.00017149879429951965, + "loss": 2.6826, + "step": 5004 + }, + { + "epoch": 0.40392220159793396, + "grad_norm": 0.7312643527984619, + "learning_rate": 0.00017148775620768553, + "loss": 2.6279, + "step": 5005 + }, + { + "epoch": 0.404002905334517, + "grad_norm": 0.7488462924957275, + "learning_rate": 0.00017147671633418972, + "loss": 2.6711, + "step": 5006 + }, + { + "epoch": 0.40408360907109997, + "grad_norm": 0.8620340824127197, + "learning_rate": 0.00017146567467930725, + "loss": 2.6637, + "step": 5007 + }, + { + "epoch": 0.404164312807683, + "grad_norm": 0.683907151222229, + "learning_rate": 0.00017145463124331335, + "loss": 2.6331, + "step": 5008 + }, + { + "epoch": 0.404245016544266, + "grad_norm": 0.7389389276504517, + "learning_rate": 0.0001714435860264833, + "loss": 2.7232, + "step": 5009 + }, + { + "epoch": 0.404325720280849, + "grad_norm": 0.7456515431404114, + "learning_rate": 0.00017143253902909228, + "loss": 2.6363, + "step": 5010 + }, + { + "epoch": 0.404406424017432, + "grad_norm": 0.7044962644577026, + "learning_rate": 0.0001714214902514157, + "loss": 2.6672, + "step": 5011 + }, + { + "epoch": 0.404487127754015, + "grad_norm": 0.7410328984260559, + "learning_rate": 0.00017141043969372887, + "loss": 2.6059, + "step": 5012 + }, + { + "epoch": 0.404567831490598, + "grad_norm": 0.6697140336036682, + "learning_rate": 0.00017139938735630722, + "loss": 2.7151, + "step": 5013 + }, + { + "epoch": 0.404648535227181, + "grad_norm": 0.746675431728363, + "learning_rate": 0.00017138833323942617, + "loss": 2.6792, + "step": 5014 + }, + { + "epoch": 0.404729238963764, + "grad_norm": 0.7724997401237488, + "learning_rate": 0.00017137727734336129, + "loss": 2.6234, + "step": 5015 + }, + { + "epoch": 0.40480994270034704, + "grad_norm": 0.8014429211616516, + "learning_rate": 0.00017136621966838805, + "loss": 2.6795, + "step": 5016 + }, + { + "epoch": 0.40489064643693, + "grad_norm": 0.6900430917739868, + "learning_rate": 0.00017135516021478205, + "loss": 2.7127, + "step": 5017 + }, + { + "epoch": 0.40497135017351304, + "grad_norm": 0.6648666858673096, + "learning_rate": 0.00017134409898281896, + "loss": 2.6564, + "step": 5018 + }, + { + "epoch": 0.405052053910096, + "grad_norm": 0.7054181098937988, + "learning_rate": 0.00017133303597277442, + "loss": 2.6652, + "step": 5019 + }, + { + "epoch": 0.40513275764667905, + "grad_norm": 0.6847733855247498, + "learning_rate": 0.00017132197118492414, + "loss": 2.6997, + "step": 5020 + }, + { + "epoch": 0.40521346138326203, + "grad_norm": 0.7047749757766724, + "learning_rate": 0.00017131090461954392, + "loss": 2.6752, + "step": 5021 + }, + { + "epoch": 0.40529416511984506, + "grad_norm": 0.7549976706504822, + "learning_rate": 0.00017129983627690957, + "loss": 2.6736, + "step": 5022 + }, + { + "epoch": 0.40537486885642804, + "grad_norm": 0.7436367273330688, + "learning_rate": 0.00017128876615729686, + "loss": 2.7189, + "step": 5023 + }, + { + "epoch": 0.40545557259301107, + "grad_norm": 0.6515071988105774, + "learning_rate": 0.00017127769426098177, + "loss": 2.6422, + "step": 5024 + }, + { + "epoch": 0.40553627632959405, + "grad_norm": 0.6960858702659607, + "learning_rate": 0.00017126662058824024, + "loss": 2.6619, + "step": 5025 + }, + { + "epoch": 0.4056169800661771, + "grad_norm": 0.8075968623161316, + "learning_rate": 0.0001712555451393482, + "loss": 2.6678, + "step": 5026 + }, + { + "epoch": 0.40569768380276006, + "grad_norm": 0.6864624619483948, + "learning_rate": 0.00017124446791458176, + "loss": 2.6331, + "step": 5027 + }, + { + "epoch": 0.4057783875393431, + "grad_norm": 0.7218763828277588, + "learning_rate": 0.0001712333889142169, + "loss": 2.6316, + "step": 5028 + }, + { + "epoch": 0.40585909127592606, + "grad_norm": 0.7024715542793274, + "learning_rate": 0.0001712223081385298, + "loss": 2.623, + "step": 5029 + }, + { + "epoch": 0.4059397950125091, + "grad_norm": 0.6681575775146484, + "learning_rate": 0.0001712112255877966, + "loss": 2.6786, + "step": 5030 + }, + { + "epoch": 0.4060204987490921, + "grad_norm": 0.7249817848205566, + "learning_rate": 0.0001712001412622935, + "loss": 2.6179, + "step": 5031 + }, + { + "epoch": 0.4061012024856751, + "grad_norm": 0.7178316116333008, + "learning_rate": 0.00017118905516229677, + "loss": 2.696, + "step": 5032 + }, + { + "epoch": 0.4061819062222581, + "grad_norm": 0.7838767766952515, + "learning_rate": 0.0001711779672880827, + "loss": 2.6881, + "step": 5033 + }, + { + "epoch": 0.4062626099588411, + "grad_norm": 0.799937903881073, + "learning_rate": 0.0001711668776399276, + "loss": 2.7587, + "step": 5034 + }, + { + "epoch": 0.4063433136954241, + "grad_norm": 0.7622246146202087, + "learning_rate": 0.0001711557862181079, + "loss": 2.6621, + "step": 5035 + }, + { + "epoch": 0.4064240174320071, + "grad_norm": 0.7158814072608948, + "learning_rate": 0.00017114469302290003, + "loss": 2.6421, + "step": 5036 + }, + { + "epoch": 0.4065047211685901, + "grad_norm": 0.7913404107093811, + "learning_rate": 0.0001711335980545804, + "loss": 2.6323, + "step": 5037 + }, + { + "epoch": 0.40658542490517313, + "grad_norm": 0.718325138092041, + "learning_rate": 0.00017112250131342556, + "loss": 2.6171, + "step": 5038 + }, + { + "epoch": 0.4066661286417561, + "grad_norm": 0.7793646454811096, + "learning_rate": 0.0001711114027997121, + "loss": 2.7494, + "step": 5039 + }, + { + "epoch": 0.40674683237833914, + "grad_norm": 0.7774816155433655, + "learning_rate": 0.00017110030251371656, + "loss": 2.5534, + "step": 5040 + }, + { + "epoch": 0.4068275361149221, + "grad_norm": 0.8547549247741699, + "learning_rate": 0.00017108920045571564, + "loss": 2.7155, + "step": 5041 + }, + { + "epoch": 0.40690823985150515, + "grad_norm": 0.7685851454734802, + "learning_rate": 0.000171078096625986, + "loss": 2.6109, + "step": 5042 + }, + { + "epoch": 0.4069889435880881, + "grad_norm": 0.7953611016273499, + "learning_rate": 0.00017106699102480445, + "loss": 2.7034, + "step": 5043 + }, + { + "epoch": 0.40706964732467116, + "grad_norm": 0.7550730109214783, + "learning_rate": 0.00017105588365244764, + "loss": 2.7026, + "step": 5044 + }, + { + "epoch": 0.40715035106125413, + "grad_norm": 0.7036548256874084, + "learning_rate": 0.0001710447745091925, + "loss": 2.6246, + "step": 5045 + }, + { + "epoch": 0.40723105479783717, + "grad_norm": 0.7154512405395508, + "learning_rate": 0.00017103366359531586, + "loss": 2.6592, + "step": 5046 + }, + { + "epoch": 0.40731175853442014, + "grad_norm": 0.7773932218551636, + "learning_rate": 0.00017102255091109463, + "loss": 2.6458, + "step": 5047 + }, + { + "epoch": 0.4073924622710032, + "grad_norm": 0.7458996176719666, + "learning_rate": 0.0001710114364568058, + "loss": 2.643, + "step": 5048 + }, + { + "epoch": 0.40747316600758615, + "grad_norm": 0.7465376257896423, + "learning_rate": 0.00017100032023272633, + "loss": 2.6677, + "step": 5049 + }, + { + "epoch": 0.40755386974416913, + "grad_norm": 0.7340850830078125, + "learning_rate": 0.0001709892022391333, + "loss": 2.6372, + "step": 5050 + }, + { + "epoch": 0.40763457348075216, + "grad_norm": 0.7189164757728577, + "learning_rate": 0.00017097808247630377, + "loss": 2.6524, + "step": 5051 + }, + { + "epoch": 0.40771527721733514, + "grad_norm": 0.6954184174537659, + "learning_rate": 0.0001709669609445149, + "loss": 2.7383, + "step": 5052 + }, + { + "epoch": 0.40779598095391817, + "grad_norm": 0.736409604549408, + "learning_rate": 0.00017095583764404384, + "loss": 2.6424, + "step": 5053 + }, + { + "epoch": 0.40787668469050115, + "grad_norm": 0.6773545742034912, + "learning_rate": 0.0001709447125751678, + "loss": 2.6557, + "step": 5054 + }, + { + "epoch": 0.4079573884270842, + "grad_norm": 0.718748927116394, + "learning_rate": 0.00017093358573816412, + "loss": 2.6884, + "step": 5055 + }, + { + "epoch": 0.40803809216366715, + "grad_norm": 0.8276848793029785, + "learning_rate": 0.00017092245713331002, + "loss": 2.6642, + "step": 5056 + }, + { + "epoch": 0.4081187959002502, + "grad_norm": 0.7694761157035828, + "learning_rate": 0.00017091132676088294, + "loss": 2.644, + "step": 5057 + }, + { + "epoch": 0.40819949963683316, + "grad_norm": 0.766724705696106, + "learning_rate": 0.0001709001946211602, + "loss": 2.6918, + "step": 5058 + }, + { + "epoch": 0.4082802033734162, + "grad_norm": 0.7067074775695801, + "learning_rate": 0.00017088906071441927, + "loss": 2.7228, + "step": 5059 + }, + { + "epoch": 0.40836090710999917, + "grad_norm": 0.7216899991035461, + "learning_rate": 0.00017087792504093767, + "loss": 2.7068, + "step": 5060 + }, + { + "epoch": 0.4084416108465822, + "grad_norm": 0.6728984713554382, + "learning_rate": 0.00017086678760099287, + "loss": 2.686, + "step": 5061 + }, + { + "epoch": 0.4085223145831652, + "grad_norm": 0.7546882033348083, + "learning_rate": 0.0001708556483948625, + "loss": 2.6907, + "step": 5062 + }, + { + "epoch": 0.4086030183197482, + "grad_norm": 0.7471179962158203, + "learning_rate": 0.00017084450742282416, + "loss": 2.6857, + "step": 5063 + }, + { + "epoch": 0.4086837220563312, + "grad_norm": 0.7879743576049805, + "learning_rate": 0.00017083336468515548, + "loss": 2.7224, + "step": 5064 + }, + { + "epoch": 0.4087644257929142, + "grad_norm": 0.691343367099762, + "learning_rate": 0.00017082222018213422, + "loss": 2.6561, + "step": 5065 + }, + { + "epoch": 0.4088451295294972, + "grad_norm": 0.7497386336326599, + "learning_rate": 0.00017081107391403805, + "loss": 2.6317, + "step": 5066 + }, + { + "epoch": 0.40892583326608023, + "grad_norm": 0.6846269965171814, + "learning_rate": 0.00017079992588114485, + "loss": 2.6522, + "step": 5067 + }, + { + "epoch": 0.4090065370026632, + "grad_norm": 0.7312905192375183, + "learning_rate": 0.0001707887760837324, + "loss": 2.588, + "step": 5068 + }, + { + "epoch": 0.40908724073924624, + "grad_norm": 0.6966867446899414, + "learning_rate": 0.00017077762452207866, + "loss": 2.6316, + "step": 5069 + }, + { + "epoch": 0.4091679444758292, + "grad_norm": 0.6882073283195496, + "learning_rate": 0.00017076647119646147, + "loss": 2.6977, + "step": 5070 + }, + { + "epoch": 0.40924864821241225, + "grad_norm": 0.7392483949661255, + "learning_rate": 0.00017075531610715884, + "loss": 2.6768, + "step": 5071 + }, + { + "epoch": 0.4093293519489952, + "grad_norm": 0.7311073541641235, + "learning_rate": 0.00017074415925444876, + "loss": 2.6628, + "step": 5072 + }, + { + "epoch": 0.40941005568557826, + "grad_norm": 0.6769934296607971, + "learning_rate": 0.00017073300063860934, + "loss": 2.6438, + "step": 5073 + }, + { + "epoch": 0.40949075942216123, + "grad_norm": 0.736456573009491, + "learning_rate": 0.00017072184025991862, + "loss": 2.6151, + "step": 5074 + }, + { + "epoch": 0.40957146315874426, + "grad_norm": 0.7026283740997314, + "learning_rate": 0.00017071067811865476, + "loss": 2.6726, + "step": 5075 + }, + { + "epoch": 0.40965216689532724, + "grad_norm": 0.6825234293937683, + "learning_rate": 0.00017069951421509597, + "loss": 2.6795, + "step": 5076 + }, + { + "epoch": 0.4097328706319103, + "grad_norm": 0.7243828773498535, + "learning_rate": 0.0001706883485495205, + "loss": 2.687, + "step": 5077 + }, + { + "epoch": 0.40981357436849325, + "grad_norm": 0.7300469875335693, + "learning_rate": 0.00017067718112220658, + "loss": 2.6268, + "step": 5078 + }, + { + "epoch": 0.4098942781050763, + "grad_norm": 0.698095440864563, + "learning_rate": 0.00017066601193343255, + "loss": 2.6461, + "step": 5079 + }, + { + "epoch": 0.40997498184165926, + "grad_norm": 0.7318777441978455, + "learning_rate": 0.00017065484098347677, + "loss": 2.6817, + "step": 5080 + }, + { + "epoch": 0.4100556855782423, + "grad_norm": 0.7681582570075989, + "learning_rate": 0.00017064366827261772, + "loss": 2.7309, + "step": 5081 + }, + { + "epoch": 0.41013638931482527, + "grad_norm": 0.7690179944038391, + "learning_rate": 0.0001706324938011337, + "loss": 2.6292, + "step": 5082 + }, + { + "epoch": 0.4102170930514083, + "grad_norm": 0.6745284199714661, + "learning_rate": 0.00017062131756930338, + "loss": 2.7133, + "step": 5083 + }, + { + "epoch": 0.4102977967879913, + "grad_norm": 0.7524279952049255, + "learning_rate": 0.00017061013957740518, + "loss": 2.6237, + "step": 5084 + }, + { + "epoch": 0.4103785005245743, + "grad_norm": 0.7813692092895508, + "learning_rate": 0.00017059895982571773, + "loss": 2.6953, + "step": 5085 + }, + { + "epoch": 0.4104592042611573, + "grad_norm": 0.7128829956054688, + "learning_rate": 0.00017058777831451967, + "loss": 2.6771, + "step": 5086 + }, + { + "epoch": 0.4105399079977403, + "grad_norm": 0.7249834537506104, + "learning_rate": 0.00017057659504408963, + "loss": 2.6376, + "step": 5087 + }, + { + "epoch": 0.4106206117343233, + "grad_norm": 0.7742593288421631, + "learning_rate": 0.00017056541001470637, + "loss": 2.6227, + "step": 5088 + }, + { + "epoch": 0.4107013154709063, + "grad_norm": 0.6994228959083557, + "learning_rate": 0.00017055422322664863, + "loss": 2.6573, + "step": 5089 + }, + { + "epoch": 0.4107820192074893, + "grad_norm": 0.7144249081611633, + "learning_rate": 0.00017054303468019518, + "loss": 2.6602, + "step": 5090 + }, + { + "epoch": 0.41086272294407233, + "grad_norm": 0.7695099711418152, + "learning_rate": 0.00017053184437562497, + "loss": 2.6516, + "step": 5091 + }, + { + "epoch": 0.4109434266806553, + "grad_norm": 0.7610031962394714, + "learning_rate": 0.00017052065231321678, + "loss": 2.6963, + "step": 5092 + }, + { + "epoch": 0.41102413041723834, + "grad_norm": 0.7117859721183777, + "learning_rate": 0.0001705094584932496, + "loss": 2.6954, + "step": 5093 + }, + { + "epoch": 0.4111048341538213, + "grad_norm": 0.7891486287117004, + "learning_rate": 0.00017049826291600244, + "loss": 2.7265, + "step": 5094 + }, + { + "epoch": 0.41118553789040435, + "grad_norm": 0.7347370386123657, + "learning_rate": 0.00017048706558175423, + "loss": 2.658, + "step": 5095 + }, + { + "epoch": 0.41126624162698733, + "grad_norm": 0.7541289925575256, + "learning_rate": 0.00017047586649078414, + "loss": 2.6596, + "step": 5096 + }, + { + "epoch": 0.41134694536357036, + "grad_norm": 0.7471255660057068, + "learning_rate": 0.00017046466564337118, + "loss": 2.7008, + "step": 5097 + }, + { + "epoch": 0.41142764910015334, + "grad_norm": 0.7566937208175659, + "learning_rate": 0.00017045346303979457, + "loss": 2.7006, + "step": 5098 + }, + { + "epoch": 0.41150835283673637, + "grad_norm": 0.6991304159164429, + "learning_rate": 0.00017044225868033353, + "loss": 2.6846, + "step": 5099 + }, + { + "epoch": 0.41158905657331935, + "grad_norm": 0.7286314368247986, + "learning_rate": 0.00017043105256526724, + "loss": 2.6219, + "step": 5100 + }, + { + "epoch": 0.4116697603099023, + "grad_norm": 0.6953727006912231, + "learning_rate": 0.000170419844694875, + "loss": 2.6093, + "step": 5101 + }, + { + "epoch": 0.41175046404648535, + "grad_norm": 0.6942756772041321, + "learning_rate": 0.00017040863506943615, + "loss": 2.6399, + "step": 5102 + }, + { + "epoch": 0.41183116778306833, + "grad_norm": 0.7513531446456909, + "learning_rate": 0.00017039742368923005, + "loss": 2.6187, + "step": 5103 + }, + { + "epoch": 0.41191187151965136, + "grad_norm": 0.7530633211135864, + "learning_rate": 0.00017038621055453617, + "loss": 2.6124, + "step": 5104 + }, + { + "epoch": 0.41199257525623434, + "grad_norm": 0.7487555146217346, + "learning_rate": 0.00017037499566563392, + "loss": 2.6331, + "step": 5105 + }, + { + "epoch": 0.41207327899281737, + "grad_norm": 0.7641858458518982, + "learning_rate": 0.00017036377902280282, + "loss": 2.6875, + "step": 5106 + }, + { + "epoch": 0.41215398272940035, + "grad_norm": 0.6962767839431763, + "learning_rate": 0.0001703525606263224, + "loss": 2.6538, + "step": 5107 + }, + { + "epoch": 0.4122346864659834, + "grad_norm": 0.8183409571647644, + "learning_rate": 0.0001703413404764723, + "loss": 2.6204, + "step": 5108 + }, + { + "epoch": 0.41231539020256636, + "grad_norm": 0.7029808759689331, + "learning_rate": 0.00017033011857353207, + "loss": 2.6369, + "step": 5109 + }, + { + "epoch": 0.4123960939391494, + "grad_norm": 0.7171663045883179, + "learning_rate": 0.00017031889491778149, + "loss": 2.6211, + "step": 5110 + }, + { + "epoch": 0.41247679767573237, + "grad_norm": 0.7456090450286865, + "learning_rate": 0.0001703076695095002, + "loss": 2.6574, + "step": 5111 + }, + { + "epoch": 0.4125575014123154, + "grad_norm": 0.7468575239181519, + "learning_rate": 0.000170296442348968, + "loss": 2.598, + "step": 5112 + }, + { + "epoch": 0.4126382051488984, + "grad_norm": 0.7106603384017944, + "learning_rate": 0.0001702852134364647, + "loss": 2.6577, + "step": 5113 + }, + { + "epoch": 0.4127189088854814, + "grad_norm": 0.7788330912590027, + "learning_rate": 0.00017027398277227017, + "loss": 2.6797, + "step": 5114 + }, + { + "epoch": 0.4127996126220644, + "grad_norm": 0.7794120907783508, + "learning_rate": 0.00017026275035666427, + "loss": 2.5834, + "step": 5115 + }, + { + "epoch": 0.4128803163586474, + "grad_norm": 0.7270684838294983, + "learning_rate": 0.00017025151618992702, + "loss": 2.7153, + "step": 5116 + }, + { + "epoch": 0.4129610200952304, + "grad_norm": 0.8169006109237671, + "learning_rate": 0.00017024028027233827, + "loss": 2.6786, + "step": 5117 + }, + { + "epoch": 0.4130417238318134, + "grad_norm": 0.8053112626075745, + "learning_rate": 0.00017022904260417815, + "loss": 2.6456, + "step": 5118 + }, + { + "epoch": 0.4131224275683964, + "grad_norm": 0.7646365165710449, + "learning_rate": 0.0001702178031857267, + "loss": 2.6784, + "step": 5119 + }, + { + "epoch": 0.41320313130497943, + "grad_norm": 0.7878902554512024, + "learning_rate": 0.00017020656201726406, + "loss": 2.66, + "step": 5120 + }, + { + "epoch": 0.4132838350415624, + "grad_norm": 0.8602383732795715, + "learning_rate": 0.00017019531909907037, + "loss": 2.7018, + "step": 5121 + }, + { + "epoch": 0.41336453877814544, + "grad_norm": 0.801092267036438, + "learning_rate": 0.00017018407443142585, + "loss": 2.7728, + "step": 5122 + }, + { + "epoch": 0.4134452425147284, + "grad_norm": 0.7372604012489319, + "learning_rate": 0.00017017282801461074, + "loss": 2.6588, + "step": 5123 + }, + { + "epoch": 0.41352594625131145, + "grad_norm": 0.7553830146789551, + "learning_rate": 0.0001701615798489053, + "loss": 2.6844, + "step": 5124 + }, + { + "epoch": 0.4136066499878944, + "grad_norm": 0.7699872255325317, + "learning_rate": 0.0001701503299345899, + "loss": 2.6523, + "step": 5125 + }, + { + "epoch": 0.41368735372447746, + "grad_norm": 0.7087047696113586, + "learning_rate": 0.0001701390782719449, + "loss": 2.6785, + "step": 5126 + }, + { + "epoch": 0.41376805746106043, + "grad_norm": 0.7835792303085327, + "learning_rate": 0.0001701278248612507, + "loss": 2.7064, + "step": 5127 + }, + { + "epoch": 0.41384876119764347, + "grad_norm": 0.7833154201507568, + "learning_rate": 0.0001701165697027878, + "loss": 2.6552, + "step": 5128 + }, + { + "epoch": 0.41392946493422644, + "grad_norm": 0.8240615725517273, + "learning_rate": 0.0001701053127968367, + "loss": 2.7074, + "step": 5129 + }, + { + "epoch": 0.4140101686708095, + "grad_norm": 0.7612149119377136, + "learning_rate": 0.0001700940541436779, + "loss": 2.7484, + "step": 5130 + }, + { + "epoch": 0.41409087240739245, + "grad_norm": 0.7795391082763672, + "learning_rate": 0.00017008279374359212, + "loss": 2.6022, + "step": 5131 + }, + { + "epoch": 0.4141715761439755, + "grad_norm": 0.7714587450027466, + "learning_rate": 0.00017007153159685992, + "loss": 2.6529, + "step": 5132 + }, + { + "epoch": 0.41425227988055846, + "grad_norm": 0.7821317911148071, + "learning_rate": 0.00017006026770376194, + "loss": 2.6356, + "step": 5133 + }, + { + "epoch": 0.4143329836171415, + "grad_norm": 0.7300596833229065, + "learning_rate": 0.00017004900206457897, + "loss": 2.6552, + "step": 5134 + }, + { + "epoch": 0.41441368735372447, + "grad_norm": 0.780505359172821, + "learning_rate": 0.00017003773467959174, + "loss": 2.675, + "step": 5135 + }, + { + "epoch": 0.4144943910903075, + "grad_norm": 0.7107391357421875, + "learning_rate": 0.00017002646554908107, + "loss": 2.7096, + "step": 5136 + }, + { + "epoch": 0.4145750948268905, + "grad_norm": 0.7358834743499756, + "learning_rate": 0.0001700151946733279, + "loss": 2.6619, + "step": 5137 + }, + { + "epoch": 0.4146557985634735, + "grad_norm": 0.7573859095573425, + "learning_rate": 0.00017000392205261298, + "loss": 2.6234, + "step": 5138 + }, + { + "epoch": 0.4147365023000565, + "grad_norm": 0.7032024264335632, + "learning_rate": 0.00016999264768721738, + "loss": 2.6096, + "step": 5139 + }, + { + "epoch": 0.4148172060366395, + "grad_norm": 0.743813693523407, + "learning_rate": 0.00016998137157742203, + "loss": 2.6782, + "step": 5140 + }, + { + "epoch": 0.4148979097732225, + "grad_norm": 0.8861347436904907, + "learning_rate": 0.00016997009372350793, + "loss": 2.6645, + "step": 5141 + }, + { + "epoch": 0.4149786135098055, + "grad_norm": 0.7598684430122375, + "learning_rate": 0.00016995881412575623, + "loss": 2.649, + "step": 5142 + }, + { + "epoch": 0.4150593172463885, + "grad_norm": 0.7535565495491028, + "learning_rate": 0.00016994753278444798, + "loss": 2.6449, + "step": 5143 + }, + { + "epoch": 0.41514002098297154, + "grad_norm": 0.7073138356208801, + "learning_rate": 0.0001699362496998644, + "loss": 2.6253, + "step": 5144 + }, + { + "epoch": 0.4152207247195545, + "grad_norm": 0.7161526679992676, + "learning_rate": 0.00016992496487228662, + "loss": 2.6623, + "step": 5145 + }, + { + "epoch": 0.41530142845613754, + "grad_norm": 0.8284714818000793, + "learning_rate": 0.00016991367830199595, + "loss": 2.7363, + "step": 5146 + }, + { + "epoch": 0.4153821321927205, + "grad_norm": 0.7127673625946045, + "learning_rate": 0.0001699023899892737, + "loss": 2.6274, + "step": 5147 + }, + { + "epoch": 0.41546283592930355, + "grad_norm": 0.7496370673179626, + "learning_rate": 0.00016989109993440112, + "loss": 2.6364, + "step": 5148 + }, + { + "epoch": 0.41554353966588653, + "grad_norm": 0.7616143822669983, + "learning_rate": 0.00016987980813765963, + "loss": 2.7225, + "step": 5149 + }, + { + "epoch": 0.41562424340246956, + "grad_norm": 0.6935909986495972, + "learning_rate": 0.00016986851459933067, + "loss": 2.6109, + "step": 5150 + }, + { + "epoch": 0.41570494713905254, + "grad_norm": 0.721023678779602, + "learning_rate": 0.00016985721931969566, + "loss": 2.6993, + "step": 5151 + }, + { + "epoch": 0.4157856508756355, + "grad_norm": 0.8216699361801147, + "learning_rate": 0.00016984592229903617, + "loss": 2.6512, + "step": 5152 + }, + { + "epoch": 0.41586635461221855, + "grad_norm": 0.7425234913825989, + "learning_rate": 0.00016983462353763372, + "loss": 2.5903, + "step": 5153 + }, + { + "epoch": 0.4159470583488015, + "grad_norm": 0.7292542457580566, + "learning_rate": 0.00016982332303576986, + "loss": 2.692, + "step": 5154 + }, + { + "epoch": 0.41602776208538456, + "grad_norm": 0.7466831803321838, + "learning_rate": 0.0001698120207937263, + "loss": 2.7145, + "step": 5155 + }, + { + "epoch": 0.41610846582196753, + "grad_norm": 0.7271949648857117, + "learning_rate": 0.00016980071681178471, + "loss": 2.655, + "step": 5156 + }, + { + "epoch": 0.41618916955855056, + "grad_norm": 0.7505547404289246, + "learning_rate": 0.00016978941109022677, + "loss": 2.7167, + "step": 5157 + }, + { + "epoch": 0.41626987329513354, + "grad_norm": 0.7307172417640686, + "learning_rate": 0.00016977810362933427, + "loss": 2.6735, + "step": 5158 + }, + { + "epoch": 0.4163505770317166, + "grad_norm": 0.7839170098304749, + "learning_rate": 0.00016976679442938904, + "loss": 2.6818, + "step": 5159 + }, + { + "epoch": 0.41643128076829955, + "grad_norm": 0.7131803631782532, + "learning_rate": 0.00016975548349067293, + "loss": 2.6921, + "step": 5160 + }, + { + "epoch": 0.4165119845048826, + "grad_norm": 0.8129798173904419, + "learning_rate": 0.0001697441708134678, + "loss": 2.6682, + "step": 5161 + }, + { + "epoch": 0.41659268824146556, + "grad_norm": 0.7634746432304382, + "learning_rate": 0.00016973285639805563, + "loss": 2.6684, + "step": 5162 + }, + { + "epoch": 0.4166733919780486, + "grad_norm": 0.7367348074913025, + "learning_rate": 0.0001697215402447184, + "loss": 2.6424, + "step": 5163 + }, + { + "epoch": 0.41675409571463157, + "grad_norm": 0.7235338687896729, + "learning_rate": 0.00016971022235373815, + "loss": 2.6817, + "step": 5164 + }, + { + "epoch": 0.4168347994512146, + "grad_norm": 0.7764291763305664, + "learning_rate": 0.0001696989027253969, + "loss": 2.6477, + "step": 5165 + }, + { + "epoch": 0.4169155031877976, + "grad_norm": 0.8207562565803528, + "learning_rate": 0.00016968758135997683, + "loss": 2.6408, + "step": 5166 + }, + { + "epoch": 0.4169962069243806, + "grad_norm": 0.7291484475135803, + "learning_rate": 0.00016967625825776005, + "loss": 2.6233, + "step": 5167 + }, + { + "epoch": 0.4170769106609636, + "grad_norm": 0.7060603499412537, + "learning_rate": 0.0001696649334190288, + "loss": 2.6204, + "step": 5168 + }, + { + "epoch": 0.4171576143975466, + "grad_norm": 0.7058241963386536, + "learning_rate": 0.00016965360684406528, + "loss": 2.6212, + "step": 5169 + }, + { + "epoch": 0.4172383181341296, + "grad_norm": 0.8248410224914551, + "learning_rate": 0.00016964227853315177, + "loss": 2.6688, + "step": 5170 + }, + { + "epoch": 0.4173190218707126, + "grad_norm": 0.7287606596946716, + "learning_rate": 0.0001696309484865707, + "loss": 2.6201, + "step": 5171 + }, + { + "epoch": 0.4173997256072956, + "grad_norm": 0.7214288115501404, + "learning_rate": 0.00016961961670460433, + "loss": 2.682, + "step": 5172 + }, + { + "epoch": 0.41748042934387863, + "grad_norm": 0.7133594155311584, + "learning_rate": 0.00016960828318753516, + "loss": 2.7167, + "step": 5173 + }, + { + "epoch": 0.4175611330804616, + "grad_norm": 0.6935842633247375, + "learning_rate": 0.00016959694793564558, + "loss": 2.6134, + "step": 5174 + }, + { + "epoch": 0.41764183681704464, + "grad_norm": 0.6863382458686829, + "learning_rate": 0.00016958561094921815, + "loss": 2.6396, + "step": 5175 + }, + { + "epoch": 0.4177225405536276, + "grad_norm": 0.7659433484077454, + "learning_rate": 0.0001695742722285354, + "loss": 2.6926, + "step": 5176 + }, + { + "epoch": 0.41780324429021065, + "grad_norm": 0.6997129917144775, + "learning_rate": 0.00016956293177387992, + "loss": 2.6983, + "step": 5177 + }, + { + "epoch": 0.41788394802679363, + "grad_norm": 0.6784526705741882, + "learning_rate": 0.00016955158958553433, + "loss": 2.6961, + "step": 5178 + }, + { + "epoch": 0.41796465176337666, + "grad_norm": 0.8227884769439697, + "learning_rate": 0.00016954024566378132, + "loss": 2.7008, + "step": 5179 + }, + { + "epoch": 0.41804535549995964, + "grad_norm": 0.7733054757118225, + "learning_rate": 0.0001695289000089036, + "loss": 2.6615, + "step": 5180 + }, + { + "epoch": 0.41812605923654267, + "grad_norm": 0.7077545523643494, + "learning_rate": 0.00016951755262118394, + "loss": 2.6388, + "step": 5181 + }, + { + "epoch": 0.41820676297312565, + "grad_norm": 0.7962050437927246, + "learning_rate": 0.00016950620350090513, + "loss": 2.7063, + "step": 5182 + }, + { + "epoch": 0.4182874667097087, + "grad_norm": 0.6950554847717285, + "learning_rate": 0.00016949485264835005, + "loss": 2.7076, + "step": 5183 + }, + { + "epoch": 0.41836817044629165, + "grad_norm": 0.8546960949897766, + "learning_rate": 0.00016948350006380162, + "loss": 2.6533, + "step": 5184 + }, + { + "epoch": 0.4184488741828747, + "grad_norm": 0.7469324469566345, + "learning_rate": 0.00016947214574754272, + "loss": 2.5884, + "step": 5185 + }, + { + "epoch": 0.41852957791945766, + "grad_norm": 0.7125554084777832, + "learning_rate": 0.0001694607896998563, + "loss": 2.6448, + "step": 5186 + }, + { + "epoch": 0.4186102816560407, + "grad_norm": 0.6998329758644104, + "learning_rate": 0.00016944943192102549, + "loss": 2.5569, + "step": 5187 + }, + { + "epoch": 0.41869098539262367, + "grad_norm": 0.9046749472618103, + "learning_rate": 0.00016943807241133328, + "loss": 2.7701, + "step": 5188 + }, + { + "epoch": 0.4187716891292067, + "grad_norm": 0.7842074036598206, + "learning_rate": 0.00016942671117106274, + "loss": 2.7124, + "step": 5189 + }, + { + "epoch": 0.4188523928657897, + "grad_norm": 0.7625874280929565, + "learning_rate": 0.00016941534820049713, + "loss": 2.6626, + "step": 5190 + }, + { + "epoch": 0.4189330966023727, + "grad_norm": 0.7006461024284363, + "learning_rate": 0.00016940398349991957, + "loss": 2.6283, + "step": 5191 + }, + { + "epoch": 0.4190138003389557, + "grad_norm": 0.7081875205039978, + "learning_rate": 0.00016939261706961332, + "loss": 2.69, + "step": 5192 + }, + { + "epoch": 0.4190945040755387, + "grad_norm": 0.7554503083229065, + "learning_rate": 0.00016938124890986166, + "loss": 2.641, + "step": 5193 + }, + { + "epoch": 0.4191752078121217, + "grad_norm": 0.7478535175323486, + "learning_rate": 0.0001693698790209479, + "loss": 2.7035, + "step": 5194 + }, + { + "epoch": 0.41925591154870473, + "grad_norm": 0.7323064208030701, + "learning_rate": 0.00016935850740315545, + "loss": 2.6713, + "step": 5195 + }, + { + "epoch": 0.4193366152852877, + "grad_norm": 0.8011505007743835, + "learning_rate": 0.00016934713405676764, + "loss": 2.6413, + "step": 5196 + }, + { + "epoch": 0.41941731902187074, + "grad_norm": 0.768851637840271, + "learning_rate": 0.00016933575898206804, + "loss": 2.6147, + "step": 5197 + }, + { + "epoch": 0.4194980227584537, + "grad_norm": 0.7255160808563232, + "learning_rate": 0.00016932438217934006, + "loss": 2.6093, + "step": 5198 + }, + { + "epoch": 0.41957872649503675, + "grad_norm": 0.7431769967079163, + "learning_rate": 0.00016931300364886722, + "loss": 2.6658, + "step": 5199 + }, + { + "epoch": 0.4196594302316197, + "grad_norm": 0.7532122731208801, + "learning_rate": 0.00016930162339093318, + "loss": 2.6371, + "step": 5200 + }, + { + "epoch": 0.41974013396820276, + "grad_norm": 0.7253943681716919, + "learning_rate": 0.00016929024140582152, + "loss": 2.6365, + "step": 5201 + }, + { + "epoch": 0.41982083770478573, + "grad_norm": 0.7323265075683594, + "learning_rate": 0.00016927885769381593, + "loss": 2.7096, + "step": 5202 + }, + { + "epoch": 0.4199015414413687, + "grad_norm": 0.7340009808540344, + "learning_rate": 0.00016926747225520008, + "loss": 2.6983, + "step": 5203 + }, + { + "epoch": 0.41998224517795174, + "grad_norm": 0.838706374168396, + "learning_rate": 0.00016925608509025776, + "loss": 2.7098, + "step": 5204 + }, + { + "epoch": 0.4200629489145347, + "grad_norm": 0.7320838570594788, + "learning_rate": 0.0001692446961992728, + "loss": 2.6767, + "step": 5205 + }, + { + "epoch": 0.42014365265111775, + "grad_norm": 0.7275335192680359, + "learning_rate": 0.00016923330558252898, + "loss": 2.6754, + "step": 5206 + }, + { + "epoch": 0.4202243563877007, + "grad_norm": 0.7572353482246399, + "learning_rate": 0.00016922191324031017, + "loss": 2.7076, + "step": 5207 + }, + { + "epoch": 0.42030506012428376, + "grad_norm": 0.7991098165512085, + "learning_rate": 0.0001692105191729004, + "loss": 2.7281, + "step": 5208 + }, + { + "epoch": 0.42038576386086673, + "grad_norm": 0.70769202709198, + "learning_rate": 0.00016919912338058356, + "loss": 2.684, + "step": 5209 + }, + { + "epoch": 0.42046646759744977, + "grad_norm": 0.6895349621772766, + "learning_rate": 0.0001691877258636436, + "loss": 2.6723, + "step": 5210 + }, + { + "epoch": 0.42054717133403274, + "grad_norm": 0.7368944883346558, + "learning_rate": 0.00016917632662236476, + "loss": 2.601, + "step": 5211 + }, + { + "epoch": 0.4206278750706158, + "grad_norm": 0.7122060060501099, + "learning_rate": 0.00016916492565703097, + "loss": 2.703, + "step": 5212 + }, + { + "epoch": 0.42070857880719875, + "grad_norm": 0.735251784324646, + "learning_rate": 0.00016915352296792646, + "loss": 2.7715, + "step": 5213 + }, + { + "epoch": 0.4207892825437818, + "grad_norm": 0.7686039805412292, + "learning_rate": 0.00016914211855533536, + "loss": 2.6935, + "step": 5214 + }, + { + "epoch": 0.42086998628036476, + "grad_norm": 0.8457472920417786, + "learning_rate": 0.00016913071241954195, + "loss": 2.6535, + "step": 5215 + }, + { + "epoch": 0.4209506900169478, + "grad_norm": 0.6913465261459351, + "learning_rate": 0.00016911930456083046, + "loss": 2.6453, + "step": 5216 + }, + { + "epoch": 0.42103139375353077, + "grad_norm": 0.6939878463745117, + "learning_rate": 0.00016910789497948524, + "loss": 2.6483, + "step": 5217 + }, + { + "epoch": 0.4211120974901138, + "grad_norm": 0.7240888476371765, + "learning_rate": 0.00016909648367579062, + "loss": 2.6649, + "step": 5218 + }, + { + "epoch": 0.4211928012266968, + "grad_norm": 0.7570972442626953, + "learning_rate": 0.00016908507065003102, + "loss": 2.6633, + "step": 5219 + }, + { + "epoch": 0.4212735049632798, + "grad_norm": 0.72161465883255, + "learning_rate": 0.00016907365590249082, + "loss": 2.6999, + "step": 5220 + }, + { + "epoch": 0.4213542086998628, + "grad_norm": 0.7818038463592529, + "learning_rate": 0.00016906223943345458, + "loss": 2.6478, + "step": 5221 + }, + { + "epoch": 0.4214349124364458, + "grad_norm": 0.7292464971542358, + "learning_rate": 0.00016905082124320684, + "loss": 2.6725, + "step": 5222 + }, + { + "epoch": 0.4215156161730288, + "grad_norm": 0.7612937092781067, + "learning_rate": 0.0001690394013320321, + "loss": 2.6474, + "step": 5223 + }, + { + "epoch": 0.4215963199096118, + "grad_norm": 0.7325131297111511, + "learning_rate": 0.000169027979700215, + "loss": 2.6525, + "step": 5224 + }, + { + "epoch": 0.4216770236461948, + "grad_norm": 0.7736644148826599, + "learning_rate": 0.00016901655634804022, + "loss": 2.662, + "step": 5225 + }, + { + "epoch": 0.42175772738277784, + "grad_norm": 0.758522629737854, + "learning_rate": 0.00016900513127579244, + "loss": 2.6558, + "step": 5226 + }, + { + "epoch": 0.4218384311193608, + "grad_norm": 0.7559491991996765, + "learning_rate": 0.00016899370448375642, + "loss": 2.7361, + "step": 5227 + }, + { + "epoch": 0.42191913485594384, + "grad_norm": 0.7791146039962769, + "learning_rate": 0.00016898227597221692, + "loss": 2.6739, + "step": 5228 + }, + { + "epoch": 0.4219998385925268, + "grad_norm": 0.7280717492103577, + "learning_rate": 0.00016897084574145878, + "loss": 2.6316, + "step": 5229 + }, + { + "epoch": 0.42208054232910985, + "grad_norm": 0.7455596327781677, + "learning_rate": 0.0001689594137917669, + "loss": 2.7244, + "step": 5230 + }, + { + "epoch": 0.42216124606569283, + "grad_norm": 0.7965813875198364, + "learning_rate": 0.00016894798012342613, + "loss": 2.6757, + "step": 5231 + }, + { + "epoch": 0.42224194980227586, + "grad_norm": 0.6740596294403076, + "learning_rate": 0.00016893654473672148, + "loss": 2.631, + "step": 5232 + }, + { + "epoch": 0.42232265353885884, + "grad_norm": 0.695105254650116, + "learning_rate": 0.00016892510763193795, + "loss": 2.6563, + "step": 5233 + }, + { + "epoch": 0.42240335727544187, + "grad_norm": 0.7623865008354187, + "learning_rate": 0.00016891366880936051, + "loss": 2.6738, + "step": 5234 + }, + { + "epoch": 0.42248406101202485, + "grad_norm": 0.7545912265777588, + "learning_rate": 0.00016890222826927435, + "loss": 2.6949, + "step": 5235 + }, + { + "epoch": 0.4225647647486079, + "grad_norm": 0.7280749678611755, + "learning_rate": 0.00016889078601196452, + "loss": 2.6571, + "step": 5236 + }, + { + "epoch": 0.42264546848519086, + "grad_norm": 0.6624523401260376, + "learning_rate": 0.00016887934203771625, + "loss": 2.6854, + "step": 5237 + }, + { + "epoch": 0.4227261722217739, + "grad_norm": 0.7835487127304077, + "learning_rate": 0.0001688678963468147, + "loss": 2.6437, + "step": 5238 + }, + { + "epoch": 0.42280687595835686, + "grad_norm": 0.7384940981864929, + "learning_rate": 0.00016885644893954518, + "loss": 2.6584, + "step": 5239 + }, + { + "epoch": 0.4228875796949399, + "grad_norm": 0.8227531313896179, + "learning_rate": 0.00016884499981619292, + "loss": 2.673, + "step": 5240 + }, + { + "epoch": 0.4229682834315229, + "grad_norm": 0.7442220449447632, + "learning_rate": 0.00016883354897704334, + "loss": 2.6729, + "step": 5241 + }, + { + "epoch": 0.4230489871681059, + "grad_norm": 0.7182636857032776, + "learning_rate": 0.00016882209642238175, + "loss": 2.6833, + "step": 5242 + }, + { + "epoch": 0.4231296909046889, + "grad_norm": 0.7061870098114014, + "learning_rate": 0.00016881064215249362, + "loss": 2.6696, + "step": 5243 + }, + { + "epoch": 0.4232103946412719, + "grad_norm": 0.6792885065078735, + "learning_rate": 0.00016879918616766445, + "loss": 2.6805, + "step": 5244 + }, + { + "epoch": 0.4232910983778549, + "grad_norm": 0.7439807057380676, + "learning_rate": 0.00016878772846817968, + "loss": 2.6522, + "step": 5245 + }, + { + "epoch": 0.4233718021144379, + "grad_norm": 0.7078969478607178, + "learning_rate": 0.00016877626905432492, + "loss": 2.6549, + "step": 5246 + }, + { + "epoch": 0.4234525058510209, + "grad_norm": 0.7103868126869202, + "learning_rate": 0.00016876480792638577, + "loss": 2.6812, + "step": 5247 + }, + { + "epoch": 0.42353320958760393, + "grad_norm": 0.7224452495574951, + "learning_rate": 0.00016875334508464782, + "loss": 2.6657, + "step": 5248 + }, + { + "epoch": 0.4236139133241869, + "grad_norm": 0.6885106563568115, + "learning_rate": 0.00016874188052939682, + "loss": 2.6421, + "step": 5249 + }, + { + "epoch": 0.42369461706076994, + "grad_norm": 0.6736720204353333, + "learning_rate": 0.00016873041426091845, + "loss": 2.6717, + "step": 5250 + }, + { + "epoch": 0.4237753207973529, + "grad_norm": 0.7597963809967041, + "learning_rate": 0.00016871894627949846, + "loss": 2.6231, + "step": 5251 + }, + { + "epoch": 0.42385602453393595, + "grad_norm": 0.8295687437057495, + "learning_rate": 0.00016870747658542275, + "loss": 2.6631, + "step": 5252 + }, + { + "epoch": 0.4239367282705189, + "grad_norm": 0.6750548481941223, + "learning_rate": 0.0001686960051789771, + "loss": 2.6997, + "step": 5253 + }, + { + "epoch": 0.4240174320071019, + "grad_norm": 0.7229160666465759, + "learning_rate": 0.0001686845320604474, + "loss": 2.6525, + "step": 5254 + }, + { + "epoch": 0.42409813574368493, + "grad_norm": 0.8318623900413513, + "learning_rate": 0.00016867305723011967, + "loss": 2.7774, + "step": 5255 + }, + { + "epoch": 0.4241788394802679, + "grad_norm": 0.8391026854515076, + "learning_rate": 0.00016866158068827979, + "loss": 2.6712, + "step": 5256 + }, + { + "epoch": 0.42425954321685094, + "grad_norm": 0.691146969795227, + "learning_rate": 0.00016865010243521388, + "loss": 2.6459, + "step": 5257 + }, + { + "epoch": 0.4243402469534339, + "grad_norm": 0.7223602533340454, + "learning_rate": 0.00016863862247120794, + "loss": 2.6675, + "step": 5258 + }, + { + "epoch": 0.42442095069001695, + "grad_norm": 0.8400631546974182, + "learning_rate": 0.0001686271407965481, + "loss": 2.6978, + "step": 5259 + }, + { + "epoch": 0.42450165442659993, + "grad_norm": 0.737684965133667, + "learning_rate": 0.0001686156574115205, + "loss": 2.6992, + "step": 5260 + }, + { + "epoch": 0.42458235816318296, + "grad_norm": 0.7511717677116394, + "learning_rate": 0.0001686041723164114, + "loss": 2.6947, + "step": 5261 + }, + { + "epoch": 0.42466306189976594, + "grad_norm": 0.7434492707252502, + "learning_rate": 0.00016859268551150698, + "loss": 2.7353, + "step": 5262 + }, + { + "epoch": 0.42474376563634897, + "grad_norm": 0.746609628200531, + "learning_rate": 0.00016858119699709353, + "loss": 2.7519, + "step": 5263 + }, + { + "epoch": 0.42482446937293195, + "grad_norm": 0.7709949612617493, + "learning_rate": 0.0001685697067734574, + "loss": 2.7018, + "step": 5264 + }, + { + "epoch": 0.424905173109515, + "grad_norm": 0.7496309876441956, + "learning_rate": 0.00016855821484088488, + "loss": 2.6761, + "step": 5265 + }, + { + "epoch": 0.42498587684609795, + "grad_norm": 0.7071252465248108, + "learning_rate": 0.00016854672119966243, + "loss": 2.6762, + "step": 5266 + }, + { + "epoch": 0.425066580582681, + "grad_norm": 0.7991356253623962, + "learning_rate": 0.00016853522585007658, + "loss": 2.6134, + "step": 5267 + }, + { + "epoch": 0.42514728431926396, + "grad_norm": 0.8194605708122253, + "learning_rate": 0.0001685237287924137, + "loss": 2.6601, + "step": 5268 + }, + { + "epoch": 0.425227988055847, + "grad_norm": 0.7451688051223755, + "learning_rate": 0.00016851223002696037, + "loss": 2.6631, + "step": 5269 + }, + { + "epoch": 0.42530869179242997, + "grad_norm": 0.7220263481140137, + "learning_rate": 0.0001685007295540032, + "loss": 2.6631, + "step": 5270 + }, + { + "epoch": 0.425389395529013, + "grad_norm": 0.7268854975700378, + "learning_rate": 0.00016848922737382874, + "loss": 2.6752, + "step": 5271 + }, + { + "epoch": 0.425470099265596, + "grad_norm": 0.8841642141342163, + "learning_rate": 0.00016847772348672378, + "loss": 2.7153, + "step": 5272 + }, + { + "epoch": 0.425550803002179, + "grad_norm": 0.7725942134857178, + "learning_rate": 0.00016846621789297489, + "loss": 2.6726, + "step": 5273 + }, + { + "epoch": 0.425631506738762, + "grad_norm": 0.7179448008537292, + "learning_rate": 0.00016845471059286887, + "loss": 2.6659, + "step": 5274 + }, + { + "epoch": 0.425712210475345, + "grad_norm": 0.7630325555801392, + "learning_rate": 0.00016844320158669257, + "loss": 2.7133, + "step": 5275 + }, + { + "epoch": 0.425792914211928, + "grad_norm": 0.7349739670753479, + "learning_rate": 0.00016843169087473272, + "loss": 2.6397, + "step": 5276 + }, + { + "epoch": 0.42587361794851103, + "grad_norm": 0.7670298218727112, + "learning_rate": 0.00016842017845727626, + "loss": 2.6485, + "step": 5277 + }, + { + "epoch": 0.425954321685094, + "grad_norm": 0.692095160484314, + "learning_rate": 0.00016840866433461013, + "loss": 2.6058, + "step": 5278 + }, + { + "epoch": 0.42603502542167704, + "grad_norm": 0.6888624429702759, + "learning_rate": 0.00016839714850702125, + "loss": 2.5757, + "step": 5279 + }, + { + "epoch": 0.42611572915826, + "grad_norm": 0.6816484332084656, + "learning_rate": 0.00016838563097479664, + "loss": 2.6656, + "step": 5280 + }, + { + "epoch": 0.42619643289484305, + "grad_norm": 0.7778486609458923, + "learning_rate": 0.00016837411173822333, + "loss": 2.6738, + "step": 5281 + }, + { + "epoch": 0.426277136631426, + "grad_norm": 0.73436439037323, + "learning_rate": 0.00016836259079758845, + "loss": 2.6346, + "step": 5282 + }, + { + "epoch": 0.42635784036800906, + "grad_norm": 0.673528254032135, + "learning_rate": 0.00016835106815317908, + "loss": 2.6636, + "step": 5283 + }, + { + "epoch": 0.42643854410459203, + "grad_norm": 0.6892737150192261, + "learning_rate": 0.00016833954380528242, + "loss": 2.6723, + "step": 5284 + }, + { + "epoch": 0.42651924784117506, + "grad_norm": 0.7404607534408569, + "learning_rate": 0.00016832801775418571, + "loss": 2.6751, + "step": 5285 + }, + { + "epoch": 0.42659995157775804, + "grad_norm": 0.7040587663650513, + "learning_rate": 0.00016831649000017618, + "loss": 2.6079, + "step": 5286 + }, + { + "epoch": 0.4266806553143411, + "grad_norm": 0.7295164465904236, + "learning_rate": 0.00016830496054354112, + "loss": 2.5928, + "step": 5287 + }, + { + "epoch": 0.42676135905092405, + "grad_norm": 0.7269962430000305, + "learning_rate": 0.00016829342938456788, + "loss": 2.6648, + "step": 5288 + }, + { + "epoch": 0.4268420627875071, + "grad_norm": 0.7296550273895264, + "learning_rate": 0.0001682818965235439, + "loss": 2.6814, + "step": 5289 + }, + { + "epoch": 0.42692276652409006, + "grad_norm": 0.8376085758209229, + "learning_rate": 0.00016827036196075655, + "loss": 2.702, + "step": 5290 + }, + { + "epoch": 0.4270034702606731, + "grad_norm": 0.7461032271385193, + "learning_rate": 0.00016825882569649332, + "loss": 2.6959, + "step": 5291 + }, + { + "epoch": 0.42708417399725607, + "grad_norm": 0.7218661308288574, + "learning_rate": 0.00016824728773104171, + "loss": 2.7182, + "step": 5292 + }, + { + "epoch": 0.4271648777338391, + "grad_norm": 0.7012860774993896, + "learning_rate": 0.00016823574806468933, + "loss": 2.6989, + "step": 5293 + }, + { + "epoch": 0.4272455814704221, + "grad_norm": 0.7039482593536377, + "learning_rate": 0.0001682242066977237, + "loss": 2.6153, + "step": 5294 + }, + { + "epoch": 0.4273262852070051, + "grad_norm": 0.8783851861953735, + "learning_rate": 0.0001682126636304325, + "loss": 2.7174, + "step": 5295 + }, + { + "epoch": 0.4274069889435881, + "grad_norm": 0.7266566157341003, + "learning_rate": 0.00016820111886310343, + "loss": 2.6571, + "step": 5296 + }, + { + "epoch": 0.4274876926801711, + "grad_norm": 0.7512212991714478, + "learning_rate": 0.0001681895723960242, + "loss": 2.6802, + "step": 5297 + }, + { + "epoch": 0.4275683964167541, + "grad_norm": 0.7786974310874939, + "learning_rate": 0.00016817802422948254, + "loss": 2.6514, + "step": 5298 + }, + { + "epoch": 0.4276491001533371, + "grad_norm": 0.7454531788825989, + "learning_rate": 0.00016816647436376634, + "loss": 2.6508, + "step": 5299 + }, + { + "epoch": 0.4277298038899201, + "grad_norm": 0.7542992830276489, + "learning_rate": 0.0001681549227991634, + "loss": 2.6455, + "step": 5300 + }, + { + "epoch": 0.42781050762650313, + "grad_norm": 0.7405722141265869, + "learning_rate": 0.0001681433695359616, + "loss": 2.6505, + "step": 5301 + }, + { + "epoch": 0.4278912113630861, + "grad_norm": 0.7120002508163452, + "learning_rate": 0.00016813181457444896, + "loss": 2.6652, + "step": 5302 + }, + { + "epoch": 0.42797191509966914, + "grad_norm": 0.7645997405052185, + "learning_rate": 0.00016812025791491334, + "loss": 2.6456, + "step": 5303 + }, + { + "epoch": 0.4280526188362521, + "grad_norm": 0.7214465141296387, + "learning_rate": 0.00016810869955764286, + "loss": 2.6261, + "step": 5304 + }, + { + "epoch": 0.4281333225728351, + "grad_norm": 0.7653367519378662, + "learning_rate": 0.00016809713950292551, + "loss": 2.7295, + "step": 5305 + }, + { + "epoch": 0.4282140263094181, + "grad_norm": 0.6798970103263855, + "learning_rate": 0.0001680855777510495, + "loss": 2.6549, + "step": 5306 + }, + { + "epoch": 0.4282947300460011, + "grad_norm": 0.7693684101104736, + "learning_rate": 0.00016807401430230288, + "loss": 2.7001, + "step": 5307 + }, + { + "epoch": 0.42837543378258414, + "grad_norm": 0.6962063312530518, + "learning_rate": 0.00016806244915697384, + "loss": 2.6582, + "step": 5308 + }, + { + "epoch": 0.4284561375191671, + "grad_norm": 0.7526959776878357, + "learning_rate": 0.00016805088231535068, + "loss": 2.7204, + "step": 5309 + }, + { + "epoch": 0.42853684125575014, + "grad_norm": 0.7403820753097534, + "learning_rate": 0.0001680393137777217, + "loss": 2.6505, + "step": 5310 + }, + { + "epoch": 0.4286175449923331, + "grad_norm": 0.7056909799575806, + "learning_rate": 0.00016802774354437506, + "loss": 2.5981, + "step": 5311 + }, + { + "epoch": 0.42869824872891615, + "grad_norm": 0.6756439805030823, + "learning_rate": 0.0001680161716155993, + "loss": 2.6845, + "step": 5312 + }, + { + "epoch": 0.42877895246549913, + "grad_norm": 0.7634297013282776, + "learning_rate": 0.0001680045979916827, + "loss": 2.6399, + "step": 5313 + }, + { + "epoch": 0.42885965620208216, + "grad_norm": 0.6793022751808167, + "learning_rate": 0.0001679930226729138, + "loss": 2.6808, + "step": 5314 + }, + { + "epoch": 0.42894035993866514, + "grad_norm": 0.7692369222640991, + "learning_rate": 0.00016798144565958103, + "loss": 2.673, + "step": 5315 + }, + { + "epoch": 0.42902106367524817, + "grad_norm": 0.668798565864563, + "learning_rate": 0.00016796986695197293, + "loss": 2.6465, + "step": 5316 + }, + { + "epoch": 0.42910176741183115, + "grad_norm": 0.719160795211792, + "learning_rate": 0.00016795828655037805, + "loss": 2.5876, + "step": 5317 + }, + { + "epoch": 0.4291824711484142, + "grad_norm": 0.7352864742279053, + "learning_rate": 0.000167946704455085, + "loss": 2.625, + "step": 5318 + }, + { + "epoch": 0.42926317488499716, + "grad_norm": 0.7103392481803894, + "learning_rate": 0.00016793512066638254, + "loss": 2.602, + "step": 5319 + }, + { + "epoch": 0.4293438786215802, + "grad_norm": 0.7005727291107178, + "learning_rate": 0.0001679235351845592, + "loss": 2.6723, + "step": 5320 + }, + { + "epoch": 0.42942458235816316, + "grad_norm": 0.7686243653297424, + "learning_rate": 0.00016791194800990387, + "loss": 2.693, + "step": 5321 + }, + { + "epoch": 0.4295052860947462, + "grad_norm": 0.7026933431625366, + "learning_rate": 0.00016790035914270526, + "loss": 2.6334, + "step": 5322 + }, + { + "epoch": 0.4295859898313292, + "grad_norm": 0.748938262462616, + "learning_rate": 0.0001678887685832522, + "loss": 2.6757, + "step": 5323 + }, + { + "epoch": 0.4296666935679122, + "grad_norm": 0.7753568887710571, + "learning_rate": 0.00016787717633183355, + "loss": 2.6782, + "step": 5324 + }, + { + "epoch": 0.4297473973044952, + "grad_norm": 0.7605767846107483, + "learning_rate": 0.00016786558238873823, + "loss": 2.6822, + "step": 5325 + }, + { + "epoch": 0.4298281010410782, + "grad_norm": 0.7516531348228455, + "learning_rate": 0.00016785398675425524, + "loss": 2.6802, + "step": 5326 + }, + { + "epoch": 0.4299088047776612, + "grad_norm": 0.7551677227020264, + "learning_rate": 0.0001678423894286735, + "loss": 2.6509, + "step": 5327 + }, + { + "epoch": 0.4299895085142442, + "grad_norm": 0.765364944934845, + "learning_rate": 0.00016783079041228206, + "loss": 2.6552, + "step": 5328 + }, + { + "epoch": 0.4300702122508272, + "grad_norm": 0.7016649842262268, + "learning_rate": 0.00016781918970537002, + "loss": 2.6861, + "step": 5329 + }, + { + "epoch": 0.43015091598741023, + "grad_norm": 0.7266311645507812, + "learning_rate": 0.0001678075873082265, + "loss": 2.7064, + "step": 5330 + }, + { + "epoch": 0.4302316197239932, + "grad_norm": 0.7414532899856567, + "learning_rate": 0.00016779598322114064, + "loss": 2.6273, + "step": 5331 + }, + { + "epoch": 0.43031232346057624, + "grad_norm": 0.7032443881034851, + "learning_rate": 0.00016778437744440167, + "loss": 2.6577, + "step": 5332 + }, + { + "epoch": 0.4303930271971592, + "grad_norm": 0.7150338888168335, + "learning_rate": 0.00016777276997829882, + "loss": 2.6586, + "step": 5333 + }, + { + "epoch": 0.43047373093374225, + "grad_norm": 0.6893971562385559, + "learning_rate": 0.0001677611608231214, + "loss": 2.6713, + "step": 5334 + }, + { + "epoch": 0.4305544346703252, + "grad_norm": 0.861935555934906, + "learning_rate": 0.00016774954997915867, + "loss": 2.7037, + "step": 5335 + }, + { + "epoch": 0.43063513840690826, + "grad_norm": 0.7140138745307922, + "learning_rate": 0.00016773793744670012, + "loss": 2.6684, + "step": 5336 + }, + { + "epoch": 0.43071584214349123, + "grad_norm": 0.7245929837226868, + "learning_rate": 0.00016772632322603506, + "loss": 2.6349, + "step": 5337 + }, + { + "epoch": 0.43079654588007427, + "grad_norm": 0.7216203808784485, + "learning_rate": 0.000167714707317453, + "loss": 2.6338, + "step": 5338 + }, + { + "epoch": 0.43087724961665724, + "grad_norm": 0.7076452374458313, + "learning_rate": 0.00016770308972124343, + "loss": 2.6614, + "step": 5339 + }, + { + "epoch": 0.4309579533532403, + "grad_norm": 0.7392035722732544, + "learning_rate": 0.00016769147043769586, + "loss": 2.6697, + "step": 5340 + }, + { + "epoch": 0.43103865708982325, + "grad_norm": 0.7235357761383057, + "learning_rate": 0.00016767984946709994, + "loss": 2.6664, + "step": 5341 + }, + { + "epoch": 0.4311193608264063, + "grad_norm": 0.6985526084899902, + "learning_rate": 0.00016766822680974524, + "loss": 2.6157, + "step": 5342 + }, + { + "epoch": 0.43120006456298926, + "grad_norm": 0.769963264465332, + "learning_rate": 0.0001676566024659214, + "loss": 2.6096, + "step": 5343 + }, + { + "epoch": 0.4312807682995723, + "grad_norm": 0.7504093050956726, + "learning_rate": 0.00016764497643591823, + "loss": 2.5795, + "step": 5344 + }, + { + "epoch": 0.43136147203615527, + "grad_norm": 0.7193379402160645, + "learning_rate": 0.0001676333487200254, + "loss": 2.6158, + "step": 5345 + }, + { + "epoch": 0.4314421757727383, + "grad_norm": 0.777357280254364, + "learning_rate": 0.00016762171931853273, + "loss": 2.6388, + "step": 5346 + }, + { + "epoch": 0.4315228795093213, + "grad_norm": 0.8590179085731506, + "learning_rate": 0.00016761008823173003, + "loss": 2.6597, + "step": 5347 + }, + { + "epoch": 0.4316035832459043, + "grad_norm": 0.7040170431137085, + "learning_rate": 0.0001675984554599072, + "loss": 2.6447, + "step": 5348 + }, + { + "epoch": 0.4316842869824873, + "grad_norm": 0.7682301998138428, + "learning_rate": 0.00016758682100335417, + "loss": 2.6738, + "step": 5349 + }, + { + "epoch": 0.4317649907190703, + "grad_norm": 0.8342414498329163, + "learning_rate": 0.00016757518486236087, + "loss": 2.7058, + "step": 5350 + }, + { + "epoch": 0.4318456944556533, + "grad_norm": 0.7410600781440735, + "learning_rate": 0.00016756354703721736, + "loss": 2.6597, + "step": 5351 + }, + { + "epoch": 0.4319263981922363, + "grad_norm": 0.7633174061775208, + "learning_rate": 0.00016755190752821363, + "loss": 2.6461, + "step": 5352 + }, + { + "epoch": 0.4320071019288193, + "grad_norm": 0.7855150103569031, + "learning_rate": 0.00016754026633563973, + "loss": 2.6556, + "step": 5353 + }, + { + "epoch": 0.43208780566540234, + "grad_norm": 0.7197602391242981, + "learning_rate": 0.00016752862345978587, + "loss": 2.6511, + "step": 5354 + }, + { + "epoch": 0.4321685094019853, + "grad_norm": 0.7748876810073853, + "learning_rate": 0.00016751697890094223, + "loss": 2.7, + "step": 5355 + }, + { + "epoch": 0.4322492131385683, + "grad_norm": 0.7457308173179626, + "learning_rate": 0.00016750533265939895, + "loss": 2.6934, + "step": 5356 + }, + { + "epoch": 0.4323299168751513, + "grad_norm": 0.8003394603729248, + "learning_rate": 0.00016749368473544633, + "loss": 2.6273, + "step": 5357 + }, + { + "epoch": 0.4324106206117343, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.00016748203512937464, + "loss": 2.6605, + "step": 5358 + }, + { + "epoch": 0.43249132434831733, + "grad_norm": 0.6859120726585388, + "learning_rate": 0.00016747038384147422, + "loss": 2.6748, + "step": 5359 + }, + { + "epoch": 0.4325720280849003, + "grad_norm": 0.7169440984725952, + "learning_rate": 0.0001674587308720355, + "loss": 2.6674, + "step": 5360 + }, + { + "epoch": 0.43265273182148334, + "grad_norm": 0.7762351036071777, + "learning_rate": 0.00016744707622134888, + "loss": 2.6673, + "step": 5361 + }, + { + "epoch": 0.4327334355580663, + "grad_norm": 0.7169542908668518, + "learning_rate": 0.0001674354198897048, + "loss": 2.7341, + "step": 5362 + }, + { + "epoch": 0.43281413929464935, + "grad_norm": 0.7903403043746948, + "learning_rate": 0.00016742376187739376, + "loss": 2.6019, + "step": 5363 + }, + { + "epoch": 0.4328948430312323, + "grad_norm": 0.8395403027534485, + "learning_rate": 0.00016741210218470634, + "loss": 2.6519, + "step": 5364 + }, + { + "epoch": 0.43297554676781536, + "grad_norm": 0.7521546483039856, + "learning_rate": 0.0001674004408119331, + "loss": 2.6067, + "step": 5365 + }, + { + "epoch": 0.43305625050439833, + "grad_norm": 0.7186779975891113, + "learning_rate": 0.0001673887777593647, + "loss": 2.6435, + "step": 5366 + }, + { + "epoch": 0.43313695424098136, + "grad_norm": 0.7362968921661377, + "learning_rate": 0.0001673771130272918, + "loss": 2.6031, + "step": 5367 + }, + { + "epoch": 0.43321765797756434, + "grad_norm": 0.8033537864685059, + "learning_rate": 0.0001673654466160051, + "loss": 2.7234, + "step": 5368 + }, + { + "epoch": 0.4332983617141474, + "grad_norm": 0.7109711766242981, + "learning_rate": 0.0001673537785257954, + "loss": 2.6621, + "step": 5369 + }, + { + "epoch": 0.43337906545073035, + "grad_norm": 0.7499226927757263, + "learning_rate": 0.0001673421087569535, + "loss": 2.706, + "step": 5370 + }, + { + "epoch": 0.4334597691873134, + "grad_norm": 0.7192875146865845, + "learning_rate": 0.00016733043730977017, + "loss": 2.6053, + "step": 5371 + }, + { + "epoch": 0.43354047292389636, + "grad_norm": 0.6939374208450317, + "learning_rate": 0.00016731876418453636, + "loss": 2.6621, + "step": 5372 + }, + { + "epoch": 0.4336211766604794, + "grad_norm": 0.720741331577301, + "learning_rate": 0.00016730708938154297, + "loss": 2.6358, + "step": 5373 + }, + { + "epoch": 0.43370188039706237, + "grad_norm": 0.6979780793190002, + "learning_rate": 0.00016729541290108095, + "loss": 2.6162, + "step": 5374 + }, + { + "epoch": 0.4337825841336454, + "grad_norm": 0.8014200925827026, + "learning_rate": 0.00016728373474344136, + "loss": 2.6255, + "step": 5375 + }, + { + "epoch": 0.4338632878702284, + "grad_norm": 0.7780057787895203, + "learning_rate": 0.0001672720549089152, + "loss": 2.6257, + "step": 5376 + }, + { + "epoch": 0.4339439916068114, + "grad_norm": 0.7111102938652039, + "learning_rate": 0.00016726037339779358, + "loss": 2.6384, + "step": 5377 + }, + { + "epoch": 0.4340246953433944, + "grad_norm": 0.7077106833457947, + "learning_rate": 0.00016724869021036764, + "loss": 2.6293, + "step": 5378 + }, + { + "epoch": 0.4341053990799774, + "grad_norm": 0.8328250646591187, + "learning_rate": 0.00016723700534692853, + "loss": 2.6186, + "step": 5379 + }, + { + "epoch": 0.4341861028165604, + "grad_norm": 0.6942149996757507, + "learning_rate": 0.00016722531880776752, + "loss": 2.6032, + "step": 5380 + }, + { + "epoch": 0.4342668065531434, + "grad_norm": 0.7180305123329163, + "learning_rate": 0.00016721363059317583, + "loss": 2.6166, + "step": 5381 + }, + { + "epoch": 0.4343475102897264, + "grad_norm": 0.8093443512916565, + "learning_rate": 0.00016720194070344476, + "loss": 2.6596, + "step": 5382 + }, + { + "epoch": 0.43442821402630943, + "grad_norm": 0.7337743043899536, + "learning_rate": 0.00016719024913886568, + "loss": 2.6137, + "step": 5383 + }, + { + "epoch": 0.4345089177628924, + "grad_norm": 0.7590384483337402, + "learning_rate": 0.00016717855589972993, + "loss": 2.6541, + "step": 5384 + }, + { + "epoch": 0.43458962149947544, + "grad_norm": 0.6945257186889648, + "learning_rate": 0.00016716686098632898, + "loss": 2.686, + "step": 5385 + }, + { + "epoch": 0.4346703252360584, + "grad_norm": 0.7175764441490173, + "learning_rate": 0.00016715516439895424, + "loss": 2.6081, + "step": 5386 + }, + { + "epoch": 0.43475102897264145, + "grad_norm": 0.7287259697914124, + "learning_rate": 0.00016714346613789732, + "loss": 2.6462, + "step": 5387 + }, + { + "epoch": 0.43483173270922443, + "grad_norm": 0.6864096522331238, + "learning_rate": 0.00016713176620344964, + "loss": 2.7104, + "step": 5388 + }, + { + "epoch": 0.43491243644580746, + "grad_norm": 0.6554383039474487, + "learning_rate": 0.00016712006459590289, + "loss": 2.6153, + "step": 5389 + }, + { + "epoch": 0.43499314018239044, + "grad_norm": 0.6415165662765503, + "learning_rate": 0.00016710836131554867, + "loss": 2.6198, + "step": 5390 + }, + { + "epoch": 0.43507384391897347, + "grad_norm": 0.6998475193977356, + "learning_rate": 0.00016709665636267869, + "loss": 2.6774, + "step": 5391 + }, + { + "epoch": 0.43515454765555645, + "grad_norm": 0.7437679171562195, + "learning_rate": 0.00016708494973758465, + "loss": 2.6176, + "step": 5392 + }, + { + "epoch": 0.4352352513921395, + "grad_norm": 0.6898311376571655, + "learning_rate": 0.00016707324144055825, + "loss": 2.6194, + "step": 5393 + }, + { + "epoch": 0.43531595512872245, + "grad_norm": 0.7536425590515137, + "learning_rate": 0.00016706153147189138, + "loss": 2.672, + "step": 5394 + }, + { + "epoch": 0.4353966588653055, + "grad_norm": 0.7576118111610413, + "learning_rate": 0.00016704981983187581, + "loss": 2.6473, + "step": 5395 + }, + { + "epoch": 0.43547736260188846, + "grad_norm": 0.7452495098114014, + "learning_rate": 0.00016703810652080349, + "loss": 2.6487, + "step": 5396 + }, + { + "epoch": 0.4355580663384715, + "grad_norm": 0.7817744612693787, + "learning_rate": 0.0001670263915389663, + "loss": 2.61, + "step": 5397 + }, + { + "epoch": 0.43563877007505447, + "grad_norm": 0.7195492386817932, + "learning_rate": 0.00016701467488665624, + "loss": 2.6745, + "step": 5398 + }, + { + "epoch": 0.4357194738116375, + "grad_norm": 0.7703930735588074, + "learning_rate": 0.0001670029565641653, + "loss": 2.7196, + "step": 5399 + }, + { + "epoch": 0.4358001775482205, + "grad_norm": 0.6859520673751831, + "learning_rate": 0.00016699123657178553, + "loss": 2.6317, + "step": 5400 + }, + { + "epoch": 0.4358808812848035, + "grad_norm": 0.7380268573760986, + "learning_rate": 0.00016697951490980903, + "loss": 2.6008, + "step": 5401 + }, + { + "epoch": 0.4359615850213865, + "grad_norm": 0.7903439402580261, + "learning_rate": 0.00016696779157852792, + "loss": 2.6411, + "step": 5402 + }, + { + "epoch": 0.4360422887579695, + "grad_norm": 0.7022606134414673, + "learning_rate": 0.0001669560665782344, + "loss": 2.6153, + "step": 5403 + }, + { + "epoch": 0.4361229924945525, + "grad_norm": 0.8196203112602234, + "learning_rate": 0.00016694433990922068, + "loss": 2.6128, + "step": 5404 + }, + { + "epoch": 0.43620369623113553, + "grad_norm": 0.7342696189880371, + "learning_rate": 0.000166932611571779, + "loss": 2.6802, + "step": 5405 + }, + { + "epoch": 0.4362843999677185, + "grad_norm": 0.7475131154060364, + "learning_rate": 0.0001669208815662017, + "loss": 2.6106, + "step": 5406 + }, + { + "epoch": 0.4363651037043015, + "grad_norm": 0.7067655324935913, + "learning_rate": 0.00016690914989278107, + "loss": 2.6362, + "step": 5407 + }, + { + "epoch": 0.4364458074408845, + "grad_norm": 0.7550163865089417, + "learning_rate": 0.00016689741655180956, + "loss": 2.6256, + "step": 5408 + }, + { + "epoch": 0.4365265111774675, + "grad_norm": 0.7341828346252441, + "learning_rate": 0.00016688568154357952, + "loss": 2.6912, + "step": 5409 + }, + { + "epoch": 0.4366072149140505, + "grad_norm": 0.7501869201660156, + "learning_rate": 0.00016687394486838349, + "loss": 2.7122, + "step": 5410 + }, + { + "epoch": 0.4366879186506335, + "grad_norm": 0.7041562795639038, + "learning_rate": 0.00016686220652651392, + "loss": 2.6755, + "step": 5411 + }, + { + "epoch": 0.43676862238721653, + "grad_norm": 0.7218217253684998, + "learning_rate": 0.00016685046651826338, + "loss": 2.693, + "step": 5412 + }, + { + "epoch": 0.4368493261237995, + "grad_norm": 0.6880577206611633, + "learning_rate": 0.00016683872484392448, + "loss": 2.638, + "step": 5413 + }, + { + "epoch": 0.43693002986038254, + "grad_norm": 0.6864475607872009, + "learning_rate": 0.0001668269815037898, + "loss": 2.6497, + "step": 5414 + }, + { + "epoch": 0.4370107335969655, + "grad_norm": 0.7326167821884155, + "learning_rate": 0.00016681523649815212, + "loss": 2.6858, + "step": 5415 + }, + { + "epoch": 0.43709143733354855, + "grad_norm": 0.6773428320884705, + "learning_rate": 0.00016680348982730405, + "loss": 2.6489, + "step": 5416 + }, + { + "epoch": 0.4371721410701315, + "grad_norm": 0.7117835283279419, + "learning_rate": 0.00016679174149153837, + "loss": 2.6607, + "step": 5417 + }, + { + "epoch": 0.43725284480671456, + "grad_norm": 0.7268334031105042, + "learning_rate": 0.00016677999149114793, + "loss": 2.703, + "step": 5418 + }, + { + "epoch": 0.43733354854329753, + "grad_norm": 0.7672972679138184, + "learning_rate": 0.00016676823982642554, + "loss": 2.5803, + "step": 5419 + }, + { + "epoch": 0.43741425227988057, + "grad_norm": 0.6966733932495117, + "learning_rate": 0.00016675648649766407, + "loss": 2.6149, + "step": 5420 + }, + { + "epoch": 0.43749495601646354, + "grad_norm": 0.752896249294281, + "learning_rate": 0.00016674473150515644, + "loss": 2.7108, + "step": 5421 + }, + { + "epoch": 0.4375756597530466, + "grad_norm": 0.7094796895980835, + "learning_rate": 0.00016673297484919565, + "loss": 2.6989, + "step": 5422 + }, + { + "epoch": 0.43765636348962955, + "grad_norm": 0.7631612420082092, + "learning_rate": 0.00016672121653007465, + "loss": 2.6673, + "step": 5423 + }, + { + "epoch": 0.4377370672262126, + "grad_norm": 0.7083843946456909, + "learning_rate": 0.00016670945654808655, + "loss": 2.6529, + "step": 5424 + }, + { + "epoch": 0.43781777096279556, + "grad_norm": 0.7291569709777832, + "learning_rate": 0.0001666976949035244, + "loss": 2.633, + "step": 5425 + }, + { + "epoch": 0.4378984746993786, + "grad_norm": 0.8351448774337769, + "learning_rate": 0.00016668593159668138, + "loss": 2.5993, + "step": 5426 + }, + { + "epoch": 0.43797917843596157, + "grad_norm": 0.7339642643928528, + "learning_rate": 0.00016667416662785058, + "loss": 2.6486, + "step": 5427 + }, + { + "epoch": 0.4380598821725446, + "grad_norm": 0.7257512211799622, + "learning_rate": 0.00016666239999732526, + "loss": 2.6453, + "step": 5428 + }, + { + "epoch": 0.4381405859091276, + "grad_norm": 0.7282476425170898, + "learning_rate": 0.00016665063170539872, + "loss": 2.6654, + "step": 5429 + }, + { + "epoch": 0.4382212896457106, + "grad_norm": 0.726685643196106, + "learning_rate": 0.00016663886175236417, + "loss": 2.65, + "step": 5430 + }, + { + "epoch": 0.4383019933822936, + "grad_norm": 0.7478880286216736, + "learning_rate": 0.000166627090138515, + "loss": 2.623, + "step": 5431 + }, + { + "epoch": 0.4383826971188766, + "grad_norm": 0.7624948024749756, + "learning_rate": 0.00016661531686414457, + "loss": 2.6438, + "step": 5432 + }, + { + "epoch": 0.4384634008554596, + "grad_norm": 0.8098936676979065, + "learning_rate": 0.00016660354192954633, + "loss": 2.6226, + "step": 5433 + }, + { + "epoch": 0.4385441045920426, + "grad_norm": 0.7305725812911987, + "learning_rate": 0.0001665917653350137, + "loss": 2.6425, + "step": 5434 + }, + { + "epoch": 0.4386248083286256, + "grad_norm": 0.7064421772956848, + "learning_rate": 0.00016657998708084027, + "loss": 2.6069, + "step": 5435 + }, + { + "epoch": 0.43870551206520864, + "grad_norm": 0.8279524445533752, + "learning_rate": 0.00016656820716731945, + "loss": 2.6609, + "step": 5436 + }, + { + "epoch": 0.4387862158017916, + "grad_norm": 0.742659866809845, + "learning_rate": 0.00016655642559474488, + "loss": 2.64, + "step": 5437 + }, + { + "epoch": 0.43886691953837464, + "grad_norm": 0.757780909538269, + "learning_rate": 0.00016654464236341026, + "loss": 2.6546, + "step": 5438 + }, + { + "epoch": 0.4389476232749576, + "grad_norm": 0.7439742684364319, + "learning_rate": 0.00016653285747360918, + "loss": 2.6717, + "step": 5439 + }, + { + "epoch": 0.43902832701154065, + "grad_norm": 0.7529581189155579, + "learning_rate": 0.0001665210709256354, + "loss": 2.6204, + "step": 5440 + }, + { + "epoch": 0.43910903074812363, + "grad_norm": 0.7224153876304626, + "learning_rate": 0.00016650928271978258, + "loss": 2.6417, + "step": 5441 + }, + { + "epoch": 0.43918973448470666, + "grad_norm": 0.6792185306549072, + "learning_rate": 0.00016649749285634462, + "loss": 2.6382, + "step": 5442 + }, + { + "epoch": 0.43927043822128964, + "grad_norm": 0.6887058019638062, + "learning_rate": 0.00016648570133561533, + "loss": 2.6302, + "step": 5443 + }, + { + "epoch": 0.43935114195787267, + "grad_norm": 0.7373671531677246, + "learning_rate": 0.00016647390815788853, + "loss": 2.625, + "step": 5444 + }, + { + "epoch": 0.43943184569445565, + "grad_norm": 0.7595719695091248, + "learning_rate": 0.0001664621133234582, + "loss": 2.6444, + "step": 5445 + }, + { + "epoch": 0.4395125494310387, + "grad_norm": 0.7331473231315613, + "learning_rate": 0.00016645031683261825, + "loss": 2.6308, + "step": 5446 + }, + { + "epoch": 0.43959325316762166, + "grad_norm": 0.7724922895431519, + "learning_rate": 0.0001664385186856627, + "loss": 2.6646, + "step": 5447 + }, + { + "epoch": 0.4396739569042047, + "grad_norm": 0.6960163712501526, + "learning_rate": 0.00016642671888288563, + "loss": 2.6196, + "step": 5448 + }, + { + "epoch": 0.43975466064078766, + "grad_norm": 0.6769189834594727, + "learning_rate": 0.00016641491742458103, + "loss": 2.6558, + "step": 5449 + }, + { + "epoch": 0.4398353643773707, + "grad_norm": 0.7435783743858337, + "learning_rate": 0.0001664031143110431, + "loss": 2.6717, + "step": 5450 + }, + { + "epoch": 0.4399160681139537, + "grad_norm": 0.7234118580818176, + "learning_rate": 0.00016639130954256603, + "loss": 2.6549, + "step": 5451 + }, + { + "epoch": 0.4399967718505367, + "grad_norm": 0.720825731754303, + "learning_rate": 0.00016637950311944392, + "loss": 2.6098, + "step": 5452 + }, + { + "epoch": 0.4400774755871197, + "grad_norm": 0.6977505087852478, + "learning_rate": 0.0001663676950419711, + "loss": 2.6351, + "step": 5453 + }, + { + "epoch": 0.4401581793237027, + "grad_norm": 0.6959076523780823, + "learning_rate": 0.00016635588531044185, + "loss": 2.6918, + "step": 5454 + }, + { + "epoch": 0.4402388830602857, + "grad_norm": 0.7022189497947693, + "learning_rate": 0.00016634407392515044, + "loss": 2.6218, + "step": 5455 + }, + { + "epoch": 0.4403195867968687, + "grad_norm": 0.7147775292396545, + "learning_rate": 0.0001663322608863913, + "loss": 2.6966, + "step": 5456 + }, + { + "epoch": 0.4404002905334517, + "grad_norm": 0.7592755556106567, + "learning_rate": 0.00016632044619445882, + "loss": 2.6326, + "step": 5457 + }, + { + "epoch": 0.4404809942700347, + "grad_norm": 0.6914302110671997, + "learning_rate": 0.00016630862984964745, + "loss": 2.603, + "step": 5458 + }, + { + "epoch": 0.4405616980066177, + "grad_norm": 0.7735368609428406, + "learning_rate": 0.0001662968118522517, + "loss": 2.6666, + "step": 5459 + }, + { + "epoch": 0.4406424017432007, + "grad_norm": 0.7175899744033813, + "learning_rate": 0.00016628499220256612, + "loss": 2.666, + "step": 5460 + }, + { + "epoch": 0.4407231054797837, + "grad_norm": 0.6735796332359314, + "learning_rate": 0.00016627317090088523, + "loss": 2.6451, + "step": 5461 + }, + { + "epoch": 0.4408038092163667, + "grad_norm": 0.72022545337677, + "learning_rate": 0.0001662613479475037, + "loss": 2.6295, + "step": 5462 + }, + { + "epoch": 0.4408845129529497, + "grad_norm": 0.7084751725196838, + "learning_rate": 0.00016624952334271616, + "loss": 2.6633, + "step": 5463 + }, + { + "epoch": 0.4409652166895327, + "grad_norm": 0.7399250864982605, + "learning_rate": 0.00016623769708681735, + "loss": 2.6076, + "step": 5464 + }, + { + "epoch": 0.44104592042611573, + "grad_norm": 0.6904892325401306, + "learning_rate": 0.00016622586918010193, + "loss": 2.6799, + "step": 5465 + }, + { + "epoch": 0.4411266241626987, + "grad_norm": 0.7419006824493408, + "learning_rate": 0.00016621403962286478, + "loss": 2.65, + "step": 5466 + }, + { + "epoch": 0.44120732789928174, + "grad_norm": 0.7201282978057861, + "learning_rate": 0.00016620220841540064, + "loss": 2.6769, + "step": 5467 + }, + { + "epoch": 0.4412880316358647, + "grad_norm": 0.7223218679428101, + "learning_rate": 0.00016619037555800443, + "loss": 2.6342, + "step": 5468 + }, + { + "epoch": 0.44136873537244775, + "grad_norm": 0.7517585754394531, + "learning_rate": 0.00016617854105097104, + "loss": 2.6103, + "step": 5469 + }, + { + "epoch": 0.44144943910903073, + "grad_norm": 0.6765139698982239, + "learning_rate": 0.0001661667048945954, + "loss": 2.624, + "step": 5470 + }, + { + "epoch": 0.44153014284561376, + "grad_norm": 0.7197677493095398, + "learning_rate": 0.00016615486708917255, + "loss": 2.5786, + "step": 5471 + }, + { + "epoch": 0.44161084658219674, + "grad_norm": 0.7196774482727051, + "learning_rate": 0.00016614302763499742, + "loss": 2.6147, + "step": 5472 + }, + { + "epoch": 0.44169155031877977, + "grad_norm": 0.7210293412208557, + "learning_rate": 0.00016613118653236518, + "loss": 2.6526, + "step": 5473 + }, + { + "epoch": 0.44177225405536275, + "grad_norm": 0.6870129108428955, + "learning_rate": 0.00016611934378157092, + "loss": 2.665, + "step": 5474 + }, + { + "epoch": 0.4418529577919458, + "grad_norm": 0.6925365328788757, + "learning_rate": 0.00016610749938290975, + "loss": 2.5734, + "step": 5475 + }, + { + "epoch": 0.44193366152852875, + "grad_norm": 0.7399131655693054, + "learning_rate": 0.0001660956533366769, + "loss": 2.6935, + "step": 5476 + }, + { + "epoch": 0.4420143652651118, + "grad_norm": 0.7348966002464294, + "learning_rate": 0.00016608380564316758, + "loss": 2.6788, + "step": 5477 + }, + { + "epoch": 0.44209506900169476, + "grad_norm": 0.7597334980964661, + "learning_rate": 0.00016607195630267708, + "loss": 2.6732, + "step": 5478 + }, + { + "epoch": 0.4421757727382778, + "grad_norm": 0.6847043037414551, + "learning_rate": 0.00016606010531550072, + "loss": 2.6475, + "step": 5479 + }, + { + "epoch": 0.44225647647486077, + "grad_norm": 0.7065151929855347, + "learning_rate": 0.00016604825268193388, + "loss": 2.6674, + "step": 5480 + }, + { + "epoch": 0.4423371802114438, + "grad_norm": 0.7102208137512207, + "learning_rate": 0.0001660363984022719, + "loss": 2.6723, + "step": 5481 + }, + { + "epoch": 0.4424178839480268, + "grad_norm": 0.6912767887115479, + "learning_rate": 0.00016602454247681024, + "loss": 2.628, + "step": 5482 + }, + { + "epoch": 0.4424985876846098, + "grad_norm": 0.7265123128890991, + "learning_rate": 0.0001660126849058444, + "loss": 2.5935, + "step": 5483 + }, + { + "epoch": 0.4425792914211928, + "grad_norm": 0.8177923560142517, + "learning_rate": 0.0001660008256896699, + "loss": 2.6402, + "step": 5484 + }, + { + "epoch": 0.4426599951577758, + "grad_norm": 0.7196556925773621, + "learning_rate": 0.00016598896482858231, + "loss": 2.6939, + "step": 5485 + }, + { + "epoch": 0.4427406988943588, + "grad_norm": 0.7459850907325745, + "learning_rate": 0.0001659771023228772, + "loss": 2.6343, + "step": 5486 + }, + { + "epoch": 0.44282140263094183, + "grad_norm": 0.7399095892906189, + "learning_rate": 0.00016596523817285024, + "loss": 2.6139, + "step": 5487 + }, + { + "epoch": 0.4429021063675248, + "grad_norm": 0.7517558336257935, + "learning_rate": 0.0001659533723787971, + "loss": 2.6609, + "step": 5488 + }, + { + "epoch": 0.44298281010410784, + "grad_norm": 0.7073537707328796, + "learning_rate": 0.00016594150494101355, + "loss": 2.6326, + "step": 5489 + }, + { + "epoch": 0.4430635138406908, + "grad_norm": 0.7414752244949341, + "learning_rate": 0.0001659296358597953, + "loss": 2.6759, + "step": 5490 + }, + { + "epoch": 0.44314421757727385, + "grad_norm": 0.7636380195617676, + "learning_rate": 0.0001659177651354382, + "loss": 2.5743, + "step": 5491 + }, + { + "epoch": 0.4432249213138568, + "grad_norm": 0.6839539408683777, + "learning_rate": 0.00016590589276823804, + "loss": 2.631, + "step": 5492 + }, + { + "epoch": 0.44330562505043986, + "grad_norm": 0.8057516813278198, + "learning_rate": 0.0001658940187584908, + "loss": 2.6916, + "step": 5493 + }, + { + "epoch": 0.44338632878702283, + "grad_norm": 0.7479767799377441, + "learning_rate": 0.00016588214310649232, + "loss": 2.6811, + "step": 5494 + }, + { + "epoch": 0.44346703252360586, + "grad_norm": 0.7854729294776917, + "learning_rate": 0.00016587026581253866, + "loss": 2.6746, + "step": 5495 + }, + { + "epoch": 0.44354773626018884, + "grad_norm": 0.7782836556434631, + "learning_rate": 0.00016585838687692577, + "loss": 2.61, + "step": 5496 + }, + { + "epoch": 0.4436284399967719, + "grad_norm": 0.7047034502029419, + "learning_rate": 0.00016584650629994968, + "loss": 2.6573, + "step": 5497 + }, + { + "epoch": 0.44370914373335485, + "grad_norm": 0.7398735880851746, + "learning_rate": 0.0001658346240819066, + "loss": 2.6338, + "step": 5498 + }, + { + "epoch": 0.4437898474699379, + "grad_norm": 0.7243468165397644, + "learning_rate": 0.00016582274022309258, + "loss": 2.5898, + "step": 5499 + }, + { + "epoch": 0.44387055120652086, + "grad_norm": 0.7415906190872192, + "learning_rate": 0.00016581085472380376, + "loss": 2.5893, + "step": 5500 + }, + { + "epoch": 0.4439512549431039, + "grad_norm": 0.6935107707977295, + "learning_rate": 0.00016579896758433645, + "loss": 2.6704, + "step": 5501 + }, + { + "epoch": 0.44403195867968687, + "grad_norm": 0.7188034653663635, + "learning_rate": 0.00016578707880498685, + "loss": 2.643, + "step": 5502 + }, + { + "epoch": 0.4441126624162699, + "grad_norm": 0.6697022914886475, + "learning_rate": 0.0001657751883860513, + "loss": 2.6313, + "step": 5503 + }, + { + "epoch": 0.4441933661528529, + "grad_norm": 0.760154664516449, + "learning_rate": 0.00016576329632782613, + "loss": 2.6604, + "step": 5504 + }, + { + "epoch": 0.4442740698894359, + "grad_norm": 0.6883447170257568, + "learning_rate": 0.00016575140263060765, + "loss": 2.64, + "step": 5505 + }, + { + "epoch": 0.4443547736260189, + "grad_norm": 0.8628804683685303, + "learning_rate": 0.0001657395072946924, + "loss": 2.6651, + "step": 5506 + }, + { + "epoch": 0.4444354773626019, + "grad_norm": 0.7125170230865479, + "learning_rate": 0.0001657276103203768, + "loss": 2.7132, + "step": 5507 + }, + { + "epoch": 0.4445161810991849, + "grad_norm": 0.6965304613113403, + "learning_rate": 0.00016571571170795725, + "loss": 2.7109, + "step": 5508 + }, + { + "epoch": 0.44459688483576787, + "grad_norm": 0.720327615737915, + "learning_rate": 0.00016570381145773042, + "loss": 2.6323, + "step": 5509 + }, + { + "epoch": 0.4446775885723509, + "grad_norm": 0.7097898125648499, + "learning_rate": 0.00016569190956999287, + "loss": 2.6461, + "step": 5510 + }, + { + "epoch": 0.4447582923089339, + "grad_norm": 0.7142884731292725, + "learning_rate": 0.0001656800060450412, + "loss": 2.6894, + "step": 5511 + }, + { + "epoch": 0.4448389960455169, + "grad_norm": 0.6992002725601196, + "learning_rate": 0.0001656681008831721, + "loss": 2.6116, + "step": 5512 + }, + { + "epoch": 0.4449196997820999, + "grad_norm": 0.763841450214386, + "learning_rate": 0.00016565619408468227, + "loss": 2.6441, + "step": 5513 + }, + { + "epoch": 0.4450004035186829, + "grad_norm": 0.6958404183387756, + "learning_rate": 0.00016564428564986848, + "loss": 2.5751, + "step": 5514 + }, + { + "epoch": 0.4450811072552659, + "grad_norm": 0.8804046511650085, + "learning_rate": 0.00016563237557902744, + "loss": 2.6353, + "step": 5515 + }, + { + "epoch": 0.4451618109918489, + "grad_norm": 0.744864821434021, + "learning_rate": 0.00016562046387245608, + "loss": 2.6887, + "step": 5516 + }, + { + "epoch": 0.4452425147284319, + "grad_norm": 0.7627978920936584, + "learning_rate": 0.0001656085505304512, + "loss": 2.6347, + "step": 5517 + }, + { + "epoch": 0.44532321846501494, + "grad_norm": 0.7728918194770813, + "learning_rate": 0.00016559663555330975, + "loss": 2.6344, + "step": 5518 + }, + { + "epoch": 0.4454039222015979, + "grad_norm": 0.7853842377662659, + "learning_rate": 0.00016558471894132865, + "loss": 2.7239, + "step": 5519 + }, + { + "epoch": 0.44548462593818094, + "grad_norm": 0.7981860041618347, + "learning_rate": 0.00016557280069480495, + "loss": 2.66, + "step": 5520 + }, + { + "epoch": 0.4455653296747639, + "grad_norm": 0.7555295825004578, + "learning_rate": 0.0001655608808140356, + "loss": 2.6636, + "step": 5521 + }, + { + "epoch": 0.44564603341134695, + "grad_norm": 0.6893854141235352, + "learning_rate": 0.00016554895929931778, + "loss": 2.5999, + "step": 5522 + }, + { + "epoch": 0.44572673714792993, + "grad_norm": 0.7740506529808044, + "learning_rate": 0.0001655370361509485, + "loss": 2.6308, + "step": 5523 + }, + { + "epoch": 0.44580744088451296, + "grad_norm": 0.6956021785736084, + "learning_rate": 0.00016552511136922498, + "loss": 2.6376, + "step": 5524 + }, + { + "epoch": 0.44588814462109594, + "grad_norm": 0.7408841252326965, + "learning_rate": 0.00016551318495444445, + "loss": 2.6644, + "step": 5525 + }, + { + "epoch": 0.44596884835767897, + "grad_norm": 0.7715663313865662, + "learning_rate": 0.000165501256906904, + "loss": 2.6791, + "step": 5526 + }, + { + "epoch": 0.44604955209426195, + "grad_norm": 0.6880629062652588, + "learning_rate": 0.0001654893272269011, + "loss": 2.7209, + "step": 5527 + }, + { + "epoch": 0.446130255830845, + "grad_norm": 0.6765853762626648, + "learning_rate": 0.0001654773959147329, + "loss": 2.6548, + "step": 5528 + }, + { + "epoch": 0.44621095956742796, + "grad_norm": 0.739248514175415, + "learning_rate": 0.00016546546297069688, + "loss": 2.69, + "step": 5529 + }, + { + "epoch": 0.446291663304011, + "grad_norm": 0.7655714750289917, + "learning_rate": 0.00016545352839509038, + "loss": 2.6238, + "step": 5530 + }, + { + "epoch": 0.44637236704059396, + "grad_norm": 0.706068217754364, + "learning_rate": 0.00016544159218821088, + "loss": 2.6528, + "step": 5531 + }, + { + "epoch": 0.446453070777177, + "grad_norm": 0.7411316633224487, + "learning_rate": 0.00016542965435035578, + "loss": 2.7034, + "step": 5532 + }, + { + "epoch": 0.44653377451376, + "grad_norm": 0.6550690531730652, + "learning_rate": 0.0001654177148818227, + "loss": 2.6388, + "step": 5533 + }, + { + "epoch": 0.446614478250343, + "grad_norm": 0.7151147127151489, + "learning_rate": 0.00016540577378290915, + "loss": 2.7382, + "step": 5534 + }, + { + "epoch": 0.446695181986926, + "grad_norm": 0.7343939542770386, + "learning_rate": 0.00016539383105391276, + "loss": 2.6316, + "step": 5535 + }, + { + "epoch": 0.446775885723509, + "grad_norm": 0.702036440372467, + "learning_rate": 0.00016538188669513115, + "loss": 2.6465, + "step": 5536 + }, + { + "epoch": 0.446856589460092, + "grad_norm": 0.7212840914726257, + "learning_rate": 0.00016536994070686197, + "loss": 2.6471, + "step": 5537 + }, + { + "epoch": 0.446937293196675, + "grad_norm": 0.7345479130744934, + "learning_rate": 0.00016535799308940304, + "loss": 2.6746, + "step": 5538 + }, + { + "epoch": 0.447017996933258, + "grad_norm": 0.7447341084480286, + "learning_rate": 0.00016534604384305207, + "loss": 2.6487, + "step": 5539 + }, + { + "epoch": 0.44709870066984103, + "grad_norm": 0.6865687370300293, + "learning_rate": 0.00016533409296810687, + "loss": 2.6202, + "step": 5540 + }, + { + "epoch": 0.447179404406424, + "grad_norm": 0.8210769891738892, + "learning_rate": 0.0001653221404648653, + "loss": 2.7155, + "step": 5541 + }, + { + "epoch": 0.44726010814300704, + "grad_norm": 0.7768925428390503, + "learning_rate": 0.0001653101863336252, + "loss": 2.6011, + "step": 5542 + }, + { + "epoch": 0.44734081187959, + "grad_norm": 0.7160049080848694, + "learning_rate": 0.00016529823057468456, + "loss": 2.6541, + "step": 5543 + }, + { + "epoch": 0.44742151561617305, + "grad_norm": 0.7386900782585144, + "learning_rate": 0.00016528627318834134, + "loss": 2.6586, + "step": 5544 + }, + { + "epoch": 0.447502219352756, + "grad_norm": 0.7415460348129272, + "learning_rate": 0.0001652743141748935, + "loss": 2.7032, + "step": 5545 + }, + { + "epoch": 0.44758292308933906, + "grad_norm": 0.8483054637908936, + "learning_rate": 0.00016526235353463912, + "loss": 2.6145, + "step": 5546 + }, + { + "epoch": 0.44766362682592203, + "grad_norm": 0.7428778409957886, + "learning_rate": 0.00016525039126787629, + "loss": 2.7005, + "step": 5547 + }, + { + "epoch": 0.44774433056250507, + "grad_norm": 0.7214285731315613, + "learning_rate": 0.00016523842737490316, + "loss": 2.6267, + "step": 5548 + }, + { + "epoch": 0.44782503429908804, + "grad_norm": 0.6753950715065002, + "learning_rate": 0.0001652264618560179, + "loss": 2.6732, + "step": 5549 + }, + { + "epoch": 0.4479057380356711, + "grad_norm": 0.6969403028488159, + "learning_rate": 0.00016521449471151867, + "loss": 2.6218, + "step": 5550 + }, + { + "epoch": 0.44798644177225405, + "grad_norm": 0.7562664151191711, + "learning_rate": 0.00016520252594170377, + "loss": 2.69, + "step": 5551 + }, + { + "epoch": 0.4480671455088371, + "grad_norm": 0.6831937432289124, + "learning_rate": 0.0001651905555468715, + "loss": 2.709, + "step": 5552 + }, + { + "epoch": 0.44814784924542006, + "grad_norm": 0.6753427386283875, + "learning_rate": 0.00016517858352732017, + "loss": 2.5852, + "step": 5553 + }, + { + "epoch": 0.4482285529820031, + "grad_norm": 0.7573871612548828, + "learning_rate": 0.00016516660988334815, + "loss": 2.6187, + "step": 5554 + }, + { + "epoch": 0.44830925671858607, + "grad_norm": 0.6424254775047302, + "learning_rate": 0.00016515463461525383, + "loss": 2.6411, + "step": 5555 + }, + { + "epoch": 0.4483899604551691, + "grad_norm": 0.7460073232650757, + "learning_rate": 0.0001651426577233358, + "loss": 2.6239, + "step": 5556 + }, + { + "epoch": 0.4484706641917521, + "grad_norm": 0.6980866193771362, + "learning_rate": 0.0001651306792078924, + "loss": 2.605, + "step": 5557 + }, + { + "epoch": 0.4485513679283351, + "grad_norm": 0.7376009225845337, + "learning_rate": 0.00016511869906922217, + "loss": 2.7114, + "step": 5558 + }, + { + "epoch": 0.4486320716649181, + "grad_norm": 0.7227364778518677, + "learning_rate": 0.0001651067173076238, + "loss": 2.6212, + "step": 5559 + }, + { + "epoch": 0.44871277540150106, + "grad_norm": 0.8989635705947876, + "learning_rate": 0.00016509473392339584, + "loss": 2.671, + "step": 5560 + }, + { + "epoch": 0.4487934791380841, + "grad_norm": 0.7273553609848022, + "learning_rate": 0.0001650827489168369, + "loss": 2.6556, + "step": 5561 + }, + { + "epoch": 0.44887418287466707, + "grad_norm": 0.839439868927002, + "learning_rate": 0.00016507076228824578, + "loss": 2.6959, + "step": 5562 + }, + { + "epoch": 0.4489548866112501, + "grad_norm": 0.6912770867347717, + "learning_rate": 0.00016505877403792115, + "loss": 2.6709, + "step": 5563 + }, + { + "epoch": 0.4490355903478331, + "grad_norm": 0.7850949168205261, + "learning_rate": 0.00016504678416616182, + "loss": 2.7257, + "step": 5564 + }, + { + "epoch": 0.4491162940844161, + "grad_norm": 0.7768355011940002, + "learning_rate": 0.0001650347926732666, + "loss": 2.5939, + "step": 5565 + }, + { + "epoch": 0.4491969978209991, + "grad_norm": 0.6518398523330688, + "learning_rate": 0.0001650227995595343, + "loss": 2.6589, + "step": 5566 + }, + { + "epoch": 0.4492777015575821, + "grad_norm": 0.6855975389480591, + "learning_rate": 0.0001650108048252639, + "loss": 2.6372, + "step": 5567 + }, + { + "epoch": 0.4493584052941651, + "grad_norm": 0.7176938056945801, + "learning_rate": 0.0001649988084707543, + "loss": 2.6506, + "step": 5568 + }, + { + "epoch": 0.44943910903074813, + "grad_norm": 0.735335648059845, + "learning_rate": 0.00016498681049630448, + "loss": 2.608, + "step": 5569 + }, + { + "epoch": 0.4495198127673311, + "grad_norm": 0.6862306594848633, + "learning_rate": 0.00016497481090221346, + "loss": 2.5982, + "step": 5570 + }, + { + "epoch": 0.44960051650391414, + "grad_norm": 0.7213380336761475, + "learning_rate": 0.0001649628096887803, + "loss": 2.6457, + "step": 5571 + }, + { + "epoch": 0.4496812202404971, + "grad_norm": 0.7118985652923584, + "learning_rate": 0.0001649508068563041, + "loss": 2.6321, + "step": 5572 + }, + { + "epoch": 0.44976192397708015, + "grad_norm": 0.7663396596908569, + "learning_rate": 0.00016493880240508405, + "loss": 2.5865, + "step": 5573 + }, + { + "epoch": 0.4498426277136631, + "grad_norm": 0.6854543089866638, + "learning_rate": 0.00016492679633541926, + "loss": 2.6536, + "step": 5574 + }, + { + "epoch": 0.44992333145024616, + "grad_norm": 0.7071701884269714, + "learning_rate": 0.000164914788647609, + "loss": 2.6149, + "step": 5575 + }, + { + "epoch": 0.45000403518682913, + "grad_norm": 0.7610478401184082, + "learning_rate": 0.00016490277934195252, + "loss": 2.6326, + "step": 5576 + }, + { + "epoch": 0.45008473892341216, + "grad_norm": 0.7117596864700317, + "learning_rate": 0.0001648907684187491, + "loss": 2.6938, + "step": 5577 + }, + { + "epoch": 0.45016544265999514, + "grad_norm": 0.6980494856834412, + "learning_rate": 0.00016487875587829813, + "loss": 2.6798, + "step": 5578 + }, + { + "epoch": 0.4502461463965782, + "grad_norm": 0.7957972288131714, + "learning_rate": 0.00016486674172089898, + "loss": 2.6029, + "step": 5579 + }, + { + "epoch": 0.45032685013316115, + "grad_norm": 0.7258082032203674, + "learning_rate": 0.00016485472594685103, + "loss": 2.6785, + "step": 5580 + }, + { + "epoch": 0.4504075538697442, + "grad_norm": 0.7402041554450989, + "learning_rate": 0.0001648427085564538, + "loss": 2.6263, + "step": 5581 + }, + { + "epoch": 0.45048825760632716, + "grad_norm": 0.6943814158439636, + "learning_rate": 0.00016483068955000673, + "loss": 2.6761, + "step": 5582 + }, + { + "epoch": 0.4505689613429102, + "grad_norm": 0.8021644353866577, + "learning_rate": 0.00016481866892780947, + "loss": 2.6376, + "step": 5583 + }, + { + "epoch": 0.45064966507949317, + "grad_norm": 0.7748533487319946, + "learning_rate": 0.0001648066466901615, + "loss": 2.7465, + "step": 5584 + }, + { + "epoch": 0.4507303688160762, + "grad_norm": 0.7432222366333008, + "learning_rate": 0.00016479462283736248, + "loss": 2.6368, + "step": 5585 + }, + { + "epoch": 0.4508110725526592, + "grad_norm": 0.7835286259651184, + "learning_rate": 0.00016478259736971214, + "loss": 2.6449, + "step": 5586 + }, + { + "epoch": 0.4508917762892422, + "grad_norm": 0.7372995018959045, + "learning_rate": 0.00016477057028751007, + "loss": 2.6091, + "step": 5587 + }, + { + "epoch": 0.4509724800258252, + "grad_norm": 0.8230665326118469, + "learning_rate": 0.0001647585415910561, + "loss": 2.6345, + "step": 5588 + }, + { + "epoch": 0.4510531837624082, + "grad_norm": 0.7490825057029724, + "learning_rate": 0.00016474651128065002, + "loss": 2.5996, + "step": 5589 + }, + { + "epoch": 0.4511338874989912, + "grad_norm": 0.7950569987297058, + "learning_rate": 0.00016473447935659157, + "loss": 2.7109, + "step": 5590 + }, + { + "epoch": 0.4512145912355742, + "grad_norm": 0.7648342251777649, + "learning_rate": 0.00016472244581918074, + "loss": 2.6268, + "step": 5591 + }, + { + "epoch": 0.4512952949721572, + "grad_norm": 0.726828396320343, + "learning_rate": 0.00016471041066871733, + "loss": 2.5959, + "step": 5592 + }, + { + "epoch": 0.45137599870874023, + "grad_norm": 0.7855841517448425, + "learning_rate": 0.00016469837390550133, + "loss": 2.6671, + "step": 5593 + }, + { + "epoch": 0.4514567024453232, + "grad_norm": 0.6858882904052734, + "learning_rate": 0.00016468633552983275, + "loss": 2.6003, + "step": 5594 + }, + { + "epoch": 0.45153740618190624, + "grad_norm": 0.710926353931427, + "learning_rate": 0.0001646742955420116, + "loss": 2.6049, + "step": 5595 + }, + { + "epoch": 0.4516181099184892, + "grad_norm": 0.8359978199005127, + "learning_rate": 0.0001646622539423379, + "loss": 2.6636, + "step": 5596 + }, + { + "epoch": 0.45169881365507225, + "grad_norm": 0.7628041505813599, + "learning_rate": 0.00016465021073111186, + "loss": 2.6586, + "step": 5597 + }, + { + "epoch": 0.4517795173916552, + "grad_norm": 0.7723419666290283, + "learning_rate": 0.00016463816590863356, + "loss": 2.6213, + "step": 5598 + }, + { + "epoch": 0.45186022112823826, + "grad_norm": 0.7210986018180847, + "learning_rate": 0.0001646261194752032, + "loss": 2.6674, + "step": 5599 + }, + { + "epoch": 0.45194092486482124, + "grad_norm": 0.7665949463844299, + "learning_rate": 0.00016461407143112097, + "loss": 2.68, + "step": 5600 + }, + { + "epoch": 0.45202162860140427, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016460202177668722, + "loss": 2.6473, + "step": 5601 + }, + { + "epoch": 0.45210233233798724, + "grad_norm": 0.6831738948822021, + "learning_rate": 0.0001645899705122022, + "loss": 2.6863, + "step": 5602 + }, + { + "epoch": 0.4521830360745703, + "grad_norm": 0.7006321549415588, + "learning_rate": 0.00016457791763796627, + "loss": 2.6242, + "step": 5603 + }, + { + "epoch": 0.45226373981115325, + "grad_norm": 0.7245663404464722, + "learning_rate": 0.00016456586315427983, + "loss": 2.6201, + "step": 5604 + }, + { + "epoch": 0.4523444435477363, + "grad_norm": 0.7444287538528442, + "learning_rate": 0.00016455380706144332, + "loss": 2.6684, + "step": 5605 + }, + { + "epoch": 0.45242514728431926, + "grad_norm": 0.6562673449516296, + "learning_rate": 0.00016454174935975714, + "loss": 2.5912, + "step": 5606 + }, + { + "epoch": 0.4525058510209023, + "grad_norm": 0.6494336724281311, + "learning_rate": 0.0001645296900495219, + "loss": 2.6245, + "step": 5607 + }, + { + "epoch": 0.45258655475748527, + "grad_norm": 0.6968161463737488, + "learning_rate": 0.0001645176291310381, + "loss": 2.6494, + "step": 5608 + }, + { + "epoch": 0.4526672584940683, + "grad_norm": 0.7351142764091492, + "learning_rate": 0.00016450556660460632, + "loss": 2.574, + "step": 5609 + }, + { + "epoch": 0.4527479622306513, + "grad_norm": 0.7522323131561279, + "learning_rate": 0.0001644935024705272, + "loss": 2.6512, + "step": 5610 + }, + { + "epoch": 0.45282866596723426, + "grad_norm": 0.6744225025177002, + "learning_rate": 0.0001644814367291014, + "loss": 2.6288, + "step": 5611 + }, + { + "epoch": 0.4529093697038173, + "grad_norm": 0.6933234333992004, + "learning_rate": 0.00016446936938062967, + "loss": 2.6076, + "step": 5612 + }, + { + "epoch": 0.45299007344040026, + "grad_norm": 0.7101204991340637, + "learning_rate": 0.00016445730042541272, + "loss": 2.6322, + "step": 5613 + }, + { + "epoch": 0.4530707771769833, + "grad_norm": 0.7647581696510315, + "learning_rate": 0.00016444522986375134, + "loss": 2.7021, + "step": 5614 + }, + { + "epoch": 0.4531514809135663, + "grad_norm": 0.7028820514678955, + "learning_rate": 0.00016443315769594635, + "loss": 2.6171, + "step": 5615 + }, + { + "epoch": 0.4532321846501493, + "grad_norm": 0.6933851838111877, + "learning_rate": 0.00016442108392229868, + "loss": 2.6119, + "step": 5616 + }, + { + "epoch": 0.4533128883867323, + "grad_norm": 0.7218462824821472, + "learning_rate": 0.0001644090085431092, + "loss": 2.6661, + "step": 5617 + }, + { + "epoch": 0.4533935921233153, + "grad_norm": 0.7390525341033936, + "learning_rate": 0.00016439693155867883, + "loss": 2.7084, + "step": 5618 + }, + { + "epoch": 0.4534742958598983, + "grad_norm": 0.734136164188385, + "learning_rate": 0.0001643848529693086, + "loss": 2.6896, + "step": 5619 + }, + { + "epoch": 0.4535549995964813, + "grad_norm": 0.8082060813903809, + "learning_rate": 0.00016437277277529954, + "loss": 2.5828, + "step": 5620 + }, + { + "epoch": 0.4536357033330643, + "grad_norm": 0.695988655090332, + "learning_rate": 0.0001643606909769527, + "loss": 2.6383, + "step": 5621 + }, + { + "epoch": 0.45371640706964733, + "grad_norm": 0.7415786385536194, + "learning_rate": 0.00016434860757456922, + "loss": 2.6388, + "step": 5622 + }, + { + "epoch": 0.4537971108062303, + "grad_norm": 0.7378649115562439, + "learning_rate": 0.0001643365225684502, + "loss": 2.6534, + "step": 5623 + }, + { + "epoch": 0.45387781454281334, + "grad_norm": 0.7686129808425903, + "learning_rate": 0.0001643244359588969, + "loss": 2.6637, + "step": 5624 + }, + { + "epoch": 0.4539585182793963, + "grad_norm": 0.7305558323860168, + "learning_rate": 0.00016431234774621047, + "loss": 2.6525, + "step": 5625 + }, + { + "epoch": 0.45403922201597935, + "grad_norm": 0.7994235157966614, + "learning_rate": 0.00016430025793069225, + "loss": 2.6316, + "step": 5626 + }, + { + "epoch": 0.4541199257525623, + "grad_norm": 0.6945801377296448, + "learning_rate": 0.0001642881665126435, + "loss": 2.6367, + "step": 5627 + }, + { + "epoch": 0.45420062948914536, + "grad_norm": 0.6855447292327881, + "learning_rate": 0.00016427607349236558, + "loss": 2.6317, + "step": 5628 + }, + { + "epoch": 0.45428133322572833, + "grad_norm": 0.6961888670921326, + "learning_rate": 0.00016426397887015992, + "loss": 2.6477, + "step": 5629 + }, + { + "epoch": 0.45436203696231137, + "grad_norm": 0.7531994581222534, + "learning_rate": 0.0001642518826463279, + "loss": 2.7219, + "step": 5630 + }, + { + "epoch": 0.45444274069889434, + "grad_norm": 0.7442335486412048, + "learning_rate": 0.00016423978482117102, + "loss": 2.706, + "step": 5631 + }, + { + "epoch": 0.4545234444354774, + "grad_norm": 0.7075700759887695, + "learning_rate": 0.00016422768539499076, + "loss": 2.6481, + "step": 5632 + }, + { + "epoch": 0.45460414817206035, + "grad_norm": 0.7831876873970032, + "learning_rate": 0.0001642155843680887, + "loss": 2.616, + "step": 5633 + }, + { + "epoch": 0.4546848519086434, + "grad_norm": 0.7514604926109314, + "learning_rate": 0.00016420348174076642, + "loss": 2.6282, + "step": 5634 + }, + { + "epoch": 0.45476555564522636, + "grad_norm": 0.7136685252189636, + "learning_rate": 0.0001641913775133255, + "loss": 2.6764, + "step": 5635 + }, + { + "epoch": 0.4548462593818094, + "grad_norm": 0.7406740784645081, + "learning_rate": 0.00016417927168606771, + "loss": 2.6126, + "step": 5636 + }, + { + "epoch": 0.45492696311839237, + "grad_norm": 0.7257869839668274, + "learning_rate": 0.0001641671642592947, + "loss": 2.6035, + "step": 5637 + }, + { + "epoch": 0.4550076668549754, + "grad_norm": 0.8378798961639404, + "learning_rate": 0.00016415505523330822, + "loss": 2.6657, + "step": 5638 + }, + { + "epoch": 0.4550883705915584, + "grad_norm": 0.7218836545944214, + "learning_rate": 0.00016414294460841003, + "loss": 2.6209, + "step": 5639 + }, + { + "epoch": 0.4551690743281414, + "grad_norm": 0.7792766690254211, + "learning_rate": 0.00016413083238490204, + "loss": 2.7208, + "step": 5640 + }, + { + "epoch": 0.4552497780647244, + "grad_norm": 0.7800823450088501, + "learning_rate": 0.000164118718563086, + "loss": 2.6351, + "step": 5641 + }, + { + "epoch": 0.4553304818013074, + "grad_norm": 0.7593275904655457, + "learning_rate": 0.00016410660314326395, + "loss": 2.7025, + "step": 5642 + }, + { + "epoch": 0.4554111855378904, + "grad_norm": 0.7561587691307068, + "learning_rate": 0.00016409448612573772, + "loss": 2.6188, + "step": 5643 + }, + { + "epoch": 0.4554918892744734, + "grad_norm": 0.7674516439437866, + "learning_rate": 0.00016408236751080937, + "loss": 2.629, + "step": 5644 + }, + { + "epoch": 0.4555725930110564, + "grad_norm": 0.7112495303153992, + "learning_rate": 0.00016407024729878095, + "loss": 2.6261, + "step": 5645 + }, + { + "epoch": 0.45565329674763944, + "grad_norm": 0.6861695647239685, + "learning_rate": 0.00016405812548995444, + "loss": 2.6984, + "step": 5646 + }, + { + "epoch": 0.4557340004842224, + "grad_norm": 0.7711648941040039, + "learning_rate": 0.000164046002084632, + "loss": 2.6839, + "step": 5647 + }, + { + "epoch": 0.45581470422080544, + "grad_norm": 0.6862967014312744, + "learning_rate": 0.00016403387708311578, + "loss": 2.5964, + "step": 5648 + }, + { + "epoch": 0.4558954079573884, + "grad_norm": 0.707374632358551, + "learning_rate": 0.00016402175048570793, + "loss": 2.6191, + "step": 5649 + }, + { + "epoch": 0.45597611169397145, + "grad_norm": 0.7980892658233643, + "learning_rate": 0.00016400962229271072, + "loss": 2.6288, + "step": 5650 + }, + { + "epoch": 0.45605681543055443, + "grad_norm": 0.686187744140625, + "learning_rate": 0.0001639974925044264, + "loss": 2.6277, + "step": 5651 + }, + { + "epoch": 0.45613751916713746, + "grad_norm": 0.6970425844192505, + "learning_rate": 0.0001639853611211573, + "loss": 2.5726, + "step": 5652 + }, + { + "epoch": 0.45621822290372044, + "grad_norm": 0.701500415802002, + "learning_rate": 0.00016397322814320573, + "loss": 2.6275, + "step": 5653 + }, + { + "epoch": 0.45629892664030347, + "grad_norm": 0.8432207107543945, + "learning_rate": 0.00016396109357087407, + "loss": 2.6185, + "step": 5654 + }, + { + "epoch": 0.45637963037688645, + "grad_norm": 0.7049770951271057, + "learning_rate": 0.00016394895740446476, + "loss": 2.674, + "step": 5655 + }, + { + "epoch": 0.4564603341134695, + "grad_norm": 0.7068646550178528, + "learning_rate": 0.00016393681964428026, + "loss": 2.6072, + "step": 5656 + }, + { + "epoch": 0.45654103785005246, + "grad_norm": 0.7698760032653809, + "learning_rate": 0.00016392468029062312, + "loss": 2.6547, + "step": 5657 + }, + { + "epoch": 0.4566217415866355, + "grad_norm": 0.7381031513214111, + "learning_rate": 0.00016391253934379583, + "loss": 2.6125, + "step": 5658 + }, + { + "epoch": 0.45670244532321846, + "grad_norm": 0.7367781400680542, + "learning_rate": 0.00016390039680410097, + "loss": 2.6763, + "step": 5659 + }, + { + "epoch": 0.4567831490598015, + "grad_norm": 0.7416272759437561, + "learning_rate": 0.00016388825267184121, + "loss": 2.7059, + "step": 5660 + }, + { + "epoch": 0.4568638527963845, + "grad_norm": 0.6933416724205017, + "learning_rate": 0.0001638761069473192, + "loss": 2.6028, + "step": 5661 + }, + { + "epoch": 0.45694455653296745, + "grad_norm": 0.7311314940452576, + "learning_rate": 0.00016386395963083756, + "loss": 2.6266, + "step": 5662 + }, + { + "epoch": 0.4570252602695505, + "grad_norm": 0.7172734141349792, + "learning_rate": 0.00016385181072269917, + "loss": 2.6754, + "step": 5663 + }, + { + "epoch": 0.45710596400613346, + "grad_norm": 0.7286428213119507, + "learning_rate": 0.00016383966022320671, + "loss": 2.6637, + "step": 5664 + }, + { + "epoch": 0.4571866677427165, + "grad_norm": 0.7296474575996399, + "learning_rate": 0.00016382750813266308, + "loss": 2.6655, + "step": 5665 + }, + { + "epoch": 0.45726737147929947, + "grad_norm": 0.6929224133491516, + "learning_rate": 0.00016381535445137105, + "loss": 2.6376, + "step": 5666 + }, + { + "epoch": 0.4573480752158825, + "grad_norm": 0.7012765407562256, + "learning_rate": 0.0001638031991796336, + "loss": 2.6222, + "step": 5667 + }, + { + "epoch": 0.4574287789524655, + "grad_norm": 0.7360745668411255, + "learning_rate": 0.00016379104231775368, + "loss": 2.6304, + "step": 5668 + }, + { + "epoch": 0.4575094826890485, + "grad_norm": 0.7276801466941833, + "learning_rate": 0.00016377888386603419, + "loss": 2.7046, + "step": 5669 + }, + { + "epoch": 0.4575901864256315, + "grad_norm": 0.688432514667511, + "learning_rate": 0.0001637667238247782, + "loss": 2.6598, + "step": 5670 + }, + { + "epoch": 0.4576708901622145, + "grad_norm": 0.6874414682388306, + "learning_rate": 0.00016375456219428877, + "loss": 2.7, + "step": 5671 + }, + { + "epoch": 0.4577515938987975, + "grad_norm": 0.711091160774231, + "learning_rate": 0.000163742398974869, + "loss": 2.6063, + "step": 5672 + }, + { + "epoch": 0.4578322976353805, + "grad_norm": 0.7131791710853577, + "learning_rate": 0.000163730234166822, + "loss": 2.5948, + "step": 5673 + }, + { + "epoch": 0.4579130013719635, + "grad_norm": 0.7166630625724792, + "learning_rate": 0.000163718067770451, + "loss": 2.6488, + "step": 5674 + }, + { + "epoch": 0.45799370510854653, + "grad_norm": 0.7285952568054199, + "learning_rate": 0.00016370589978605916, + "loss": 2.6445, + "step": 5675 + }, + { + "epoch": 0.4580744088451295, + "grad_norm": 0.728050172328949, + "learning_rate": 0.0001636937302139498, + "loss": 2.5425, + "step": 5676 + }, + { + "epoch": 0.45815511258171254, + "grad_norm": 0.7196047902107239, + "learning_rate": 0.00016368155905442615, + "loss": 2.7426, + "step": 5677 + }, + { + "epoch": 0.4582358163182955, + "grad_norm": 0.6844602823257446, + "learning_rate": 0.0001636693863077916, + "loss": 2.6157, + "step": 5678 + }, + { + "epoch": 0.45831652005487855, + "grad_norm": 0.7375781536102295, + "learning_rate": 0.0001636572119743495, + "loss": 2.7069, + "step": 5679 + }, + { + "epoch": 0.4583972237914615, + "grad_norm": 0.7667750120162964, + "learning_rate": 0.0001636450360544033, + "loss": 2.6589, + "step": 5680 + }, + { + "epoch": 0.45847792752804456, + "grad_norm": 0.6569861173629761, + "learning_rate": 0.00016363285854825642, + "loss": 2.6197, + "step": 5681 + }, + { + "epoch": 0.45855863126462754, + "grad_norm": 0.7177335023880005, + "learning_rate": 0.00016362067945621239, + "loss": 2.6104, + "step": 5682 + }, + { + "epoch": 0.45863933500121057, + "grad_norm": 0.7260481715202332, + "learning_rate": 0.00016360849877857469, + "loss": 2.6435, + "step": 5683 + }, + { + "epoch": 0.45872003873779355, + "grad_norm": 0.7083989381790161, + "learning_rate": 0.00016359631651564693, + "loss": 2.6366, + "step": 5684 + }, + { + "epoch": 0.4588007424743766, + "grad_norm": 0.6417020559310913, + "learning_rate": 0.00016358413266773271, + "loss": 2.6311, + "step": 5685 + }, + { + "epoch": 0.45888144621095955, + "grad_norm": 0.737856924533844, + "learning_rate": 0.0001635719472351357, + "loss": 2.6647, + "step": 5686 + }, + { + "epoch": 0.4589621499475426, + "grad_norm": 0.6774190068244934, + "learning_rate": 0.0001635597602181596, + "loss": 2.6366, + "step": 5687 + }, + { + "epoch": 0.45904285368412556, + "grad_norm": 0.6480480432510376, + "learning_rate": 0.0001635475716171081, + "loss": 2.6501, + "step": 5688 + }, + { + "epoch": 0.4591235574207086, + "grad_norm": 0.7886860370635986, + "learning_rate": 0.0001635353814322851, + "loss": 2.7239, + "step": 5689 + }, + { + "epoch": 0.45920426115729157, + "grad_norm": 0.7579021453857422, + "learning_rate": 0.0001635231896639942, + "loss": 2.6155, + "step": 5690 + }, + { + "epoch": 0.4592849648938746, + "grad_norm": 0.6853809356689453, + "learning_rate": 0.0001635109963125394, + "loss": 2.5933, + "step": 5691 + }, + { + "epoch": 0.4593656686304576, + "grad_norm": 0.661342978477478, + "learning_rate": 0.00016349880137822456, + "loss": 2.6277, + "step": 5692 + }, + { + "epoch": 0.4594463723670406, + "grad_norm": 0.6795682311058044, + "learning_rate": 0.0001634866048613536, + "loss": 2.6221, + "step": 5693 + }, + { + "epoch": 0.4595270761036236, + "grad_norm": 0.7375383377075195, + "learning_rate": 0.00016347440676223047, + "loss": 2.6082, + "step": 5694 + }, + { + "epoch": 0.4596077798402066, + "grad_norm": 0.7565153241157532, + "learning_rate": 0.0001634622070811592, + "loss": 2.6615, + "step": 5695 + }, + { + "epoch": 0.4596884835767896, + "grad_norm": 0.6869745254516602, + "learning_rate": 0.00016345000581844386, + "loss": 2.6172, + "step": 5696 + }, + { + "epoch": 0.45976918731337263, + "grad_norm": 0.7192853689193726, + "learning_rate": 0.0001634378029743885, + "loss": 2.6324, + "step": 5697 + }, + { + "epoch": 0.4598498910499556, + "grad_norm": 0.6919218301773071, + "learning_rate": 0.00016342559854929726, + "loss": 2.5965, + "step": 5698 + }, + { + "epoch": 0.45993059478653864, + "grad_norm": 0.6715282797813416, + "learning_rate": 0.00016341339254347432, + "loss": 2.6225, + "step": 5699 + }, + { + "epoch": 0.4600112985231216, + "grad_norm": 0.6768380999565125, + "learning_rate": 0.00016340118495722388, + "loss": 2.6376, + "step": 5700 + }, + { + "epoch": 0.46009200225970465, + "grad_norm": 0.6898325681686401, + "learning_rate": 0.00016338897579085018, + "loss": 2.667, + "step": 5701 + }, + { + "epoch": 0.4601727059962876, + "grad_norm": 0.7171810865402222, + "learning_rate": 0.00016337676504465747, + "loss": 2.678, + "step": 5702 + }, + { + "epoch": 0.46025340973287066, + "grad_norm": 0.7050724029541016, + "learning_rate": 0.00016336455271895016, + "loss": 2.619, + "step": 5703 + }, + { + "epoch": 0.46033411346945363, + "grad_norm": 0.8287240862846375, + "learning_rate": 0.00016335233881403248, + "loss": 2.71, + "step": 5704 + }, + { + "epoch": 0.46041481720603666, + "grad_norm": 0.6880568861961365, + "learning_rate": 0.000163340123330209, + "loss": 2.6516, + "step": 5705 + }, + { + "epoch": 0.46049552094261964, + "grad_norm": 0.7222896218299866, + "learning_rate": 0.00016332790626778402, + "loss": 2.5899, + "step": 5706 + }, + { + "epoch": 0.4605762246792027, + "grad_norm": 0.7707448601722717, + "learning_rate": 0.00016331568762706207, + "loss": 2.6116, + "step": 5707 + }, + { + "epoch": 0.46065692841578565, + "grad_norm": 0.7780653834342957, + "learning_rate": 0.0001633034674083477, + "loss": 2.6072, + "step": 5708 + }, + { + "epoch": 0.4607376321523687, + "grad_norm": 0.7551524639129639, + "learning_rate": 0.00016329124561194545, + "loss": 2.548, + "step": 5709 + }, + { + "epoch": 0.46081833588895166, + "grad_norm": 0.9312284588813782, + "learning_rate": 0.0001632790222381599, + "loss": 2.6557, + "step": 5710 + }, + { + "epoch": 0.4608990396255347, + "grad_norm": 0.7404753565788269, + "learning_rate": 0.0001632667972872957, + "loss": 2.6889, + "step": 5711 + }, + { + "epoch": 0.46097974336211767, + "grad_norm": 0.7423726916313171, + "learning_rate": 0.00016325457075965752, + "loss": 2.6265, + "step": 5712 + }, + { + "epoch": 0.46106044709870064, + "grad_norm": 1.0683187246322632, + "learning_rate": 0.0001632423426555501, + "loss": 2.6827, + "step": 5713 + }, + { + "epoch": 0.4611411508352837, + "grad_norm": 0.7204160094261169, + "learning_rate": 0.0001632301129752782, + "loss": 2.702, + "step": 5714 + }, + { + "epoch": 0.46122185457186665, + "grad_norm": 0.7591153383255005, + "learning_rate": 0.0001632178817191466, + "loss": 2.6031, + "step": 5715 + }, + { + "epoch": 0.4613025583084497, + "grad_norm": 0.8147456645965576, + "learning_rate": 0.00016320564888746013, + "loss": 2.6117, + "step": 5716 + }, + { + "epoch": 0.46138326204503266, + "grad_norm": 0.7880246639251709, + "learning_rate": 0.00016319341448052364, + "loss": 2.5896, + "step": 5717 + }, + { + "epoch": 0.4614639657816157, + "grad_norm": 0.6875137686729431, + "learning_rate": 0.00016318117849864206, + "loss": 2.6258, + "step": 5718 + }, + { + "epoch": 0.46154466951819867, + "grad_norm": 0.7197960615158081, + "learning_rate": 0.00016316894094212044, + "loss": 2.6656, + "step": 5719 + }, + { + "epoch": 0.4616253732547817, + "grad_norm": 0.7049540281295776, + "learning_rate": 0.0001631567018112636, + "loss": 2.6698, + "step": 5720 + }, + { + "epoch": 0.4617060769913647, + "grad_norm": 0.7128825783729553, + "learning_rate": 0.00016314446110637668, + "loss": 2.6552, + "step": 5721 + }, + { + "epoch": 0.4617867807279477, + "grad_norm": 0.7956201434135437, + "learning_rate": 0.00016313221882776477, + "loss": 2.6747, + "step": 5722 + }, + { + "epoch": 0.4618674844645307, + "grad_norm": 0.7598347663879395, + "learning_rate": 0.0001631199749757329, + "loss": 2.6187, + "step": 5723 + }, + { + "epoch": 0.4619481882011137, + "grad_norm": 0.6587582230567932, + "learning_rate": 0.00016310772955058627, + "loss": 2.596, + "step": 5724 + }, + { + "epoch": 0.4620288919376967, + "grad_norm": 0.700136125087738, + "learning_rate": 0.00016309548255263003, + "loss": 2.6527, + "step": 5725 + }, + { + "epoch": 0.4621095956742797, + "grad_norm": 0.7246582508087158, + "learning_rate": 0.00016308323398216945, + "loss": 2.6577, + "step": 5726 + }, + { + "epoch": 0.4621902994108627, + "grad_norm": 0.6951557993888855, + "learning_rate": 0.00016307098383950977, + "loss": 2.5816, + "step": 5727 + }, + { + "epoch": 0.46227100314744574, + "grad_norm": 0.7109191417694092, + "learning_rate": 0.0001630587321249563, + "loss": 2.6586, + "step": 5728 + }, + { + "epoch": 0.4623517068840287, + "grad_norm": 0.7357863783836365, + "learning_rate": 0.0001630464788388144, + "loss": 2.691, + "step": 5729 + }, + { + "epoch": 0.46243241062061174, + "grad_norm": 0.7916350960731506, + "learning_rate": 0.00016303422398138945, + "loss": 2.6584, + "step": 5730 + }, + { + "epoch": 0.4625131143571947, + "grad_norm": 0.6543231010437012, + "learning_rate": 0.00016302196755298685, + "loss": 2.6482, + "step": 5731 + }, + { + "epoch": 0.46259381809377775, + "grad_norm": 0.6978787183761597, + "learning_rate": 0.00016300970955391208, + "loss": 2.5956, + "step": 5732 + }, + { + "epoch": 0.46267452183036073, + "grad_norm": 0.7301886677742004, + "learning_rate": 0.00016299744998447065, + "loss": 2.6178, + "step": 5733 + }, + { + "epoch": 0.46275522556694376, + "grad_norm": 0.7381030321121216, + "learning_rate": 0.00016298518884496808, + "loss": 2.6712, + "step": 5734 + }, + { + "epoch": 0.46283592930352674, + "grad_norm": 0.7769027948379517, + "learning_rate": 0.00016297292613570995, + "loss": 2.6082, + "step": 5735 + }, + { + "epoch": 0.46291663304010977, + "grad_norm": 0.7698354721069336, + "learning_rate": 0.0001629606618570019, + "loss": 2.6543, + "step": 5736 + }, + { + "epoch": 0.46299733677669275, + "grad_norm": 0.7001554369926453, + "learning_rate": 0.00016294839600914957, + "loss": 2.6174, + "step": 5737 + }, + { + "epoch": 0.4630780405132758, + "grad_norm": 0.7589300274848938, + "learning_rate": 0.00016293612859245868, + "loss": 2.6338, + "step": 5738 + }, + { + "epoch": 0.46315874424985876, + "grad_norm": 0.7083945274353027, + "learning_rate": 0.00016292385960723493, + "loss": 2.6793, + "step": 5739 + }, + { + "epoch": 0.4632394479864418, + "grad_norm": 0.739439845085144, + "learning_rate": 0.00016291158905378412, + "loss": 2.7335, + "step": 5740 + }, + { + "epoch": 0.46332015172302476, + "grad_norm": 0.6868166923522949, + "learning_rate": 0.00016289931693241205, + "loss": 2.6139, + "step": 5741 + }, + { + "epoch": 0.4634008554596078, + "grad_norm": 0.7385871410369873, + "learning_rate": 0.0001628870432434246, + "loss": 2.6783, + "step": 5742 + }, + { + "epoch": 0.4634815591961908, + "grad_norm": 0.7227835655212402, + "learning_rate": 0.00016287476798712764, + "loss": 2.6732, + "step": 5743 + }, + { + "epoch": 0.4635622629327738, + "grad_norm": 0.6662411689758301, + "learning_rate": 0.00016286249116382709, + "loss": 2.6645, + "step": 5744 + }, + { + "epoch": 0.4636429666693568, + "grad_norm": 0.8110263347625732, + "learning_rate": 0.00016285021277382894, + "loss": 2.6448, + "step": 5745 + }, + { + "epoch": 0.4637236704059398, + "grad_norm": 0.7419269680976868, + "learning_rate": 0.0001628379328174392, + "loss": 2.7286, + "step": 5746 + }, + { + "epoch": 0.4638043741425228, + "grad_norm": 0.6518125534057617, + "learning_rate": 0.0001628256512949639, + "loss": 2.6545, + "step": 5747 + }, + { + "epoch": 0.4638850778791058, + "grad_norm": 0.6816060543060303, + "learning_rate": 0.00016281336820670917, + "loss": 2.6167, + "step": 5748 + }, + { + "epoch": 0.4639657816156888, + "grad_norm": 0.6537362337112427, + "learning_rate": 0.0001628010835529811, + "loss": 2.6522, + "step": 5749 + }, + { + "epoch": 0.46404648535227183, + "grad_norm": 0.6720992922782898, + "learning_rate": 0.00016278879733408585, + "loss": 2.6028, + "step": 5750 + }, + { + "epoch": 0.4641271890888548, + "grad_norm": 0.6778908371925354, + "learning_rate": 0.00016277650955032967, + "loss": 2.5591, + "step": 5751 + }, + { + "epoch": 0.46420789282543784, + "grad_norm": 0.6908471584320068, + "learning_rate": 0.0001627642202020187, + "loss": 2.6574, + "step": 5752 + }, + { + "epoch": 0.4642885965620208, + "grad_norm": 0.7034298181533813, + "learning_rate": 0.00016275192928945936, + "loss": 2.657, + "step": 5753 + }, + { + "epoch": 0.46436930029860385, + "grad_norm": 0.7245952486991882, + "learning_rate": 0.0001627396368129579, + "loss": 2.6572, + "step": 5754 + }, + { + "epoch": 0.4644500040351868, + "grad_norm": 0.6764482855796814, + "learning_rate": 0.0001627273427728207, + "loss": 2.6576, + "step": 5755 + }, + { + "epoch": 0.46453070777176986, + "grad_norm": 0.7074379920959473, + "learning_rate": 0.0001627150471693541, + "loss": 2.614, + "step": 5756 + }, + { + "epoch": 0.46461141150835283, + "grad_norm": 0.7292052507400513, + "learning_rate": 0.0001627027500028646, + "loss": 2.673, + "step": 5757 + }, + { + "epoch": 0.46469211524493587, + "grad_norm": 0.7554025650024414, + "learning_rate": 0.0001626904512736587, + "loss": 2.5919, + "step": 5758 + }, + { + "epoch": 0.46477281898151884, + "grad_norm": 0.6829606890678406, + "learning_rate": 0.00016267815098204284, + "loss": 2.7206, + "step": 5759 + }, + { + "epoch": 0.4648535227181019, + "grad_norm": 0.7201548218727112, + "learning_rate": 0.00016266584912832363, + "loss": 2.6651, + "step": 5760 + }, + { + "epoch": 0.46493422645468485, + "grad_norm": 0.6889227628707886, + "learning_rate": 0.00016265354571280764, + "loss": 2.6776, + "step": 5761 + }, + { + "epoch": 0.4650149301912679, + "grad_norm": 0.7286190986633301, + "learning_rate": 0.00016264124073580156, + "loss": 2.591, + "step": 5762 + }, + { + "epoch": 0.46509563392785086, + "grad_norm": 0.7222036123275757, + "learning_rate": 0.00016262893419761196, + "loss": 2.6422, + "step": 5763 + }, + { + "epoch": 0.46517633766443384, + "grad_norm": 0.6822768449783325, + "learning_rate": 0.00016261662609854562, + "loss": 2.6126, + "step": 5764 + }, + { + "epoch": 0.46525704140101687, + "grad_norm": 0.7263356447219849, + "learning_rate": 0.00016260431643890929, + "loss": 2.6304, + "step": 5765 + }, + { + "epoch": 0.46533774513759985, + "grad_norm": 0.7152180075645447, + "learning_rate": 0.00016259200521900972, + "loss": 2.6489, + "step": 5766 + }, + { + "epoch": 0.4654184488741829, + "grad_norm": 0.6988116502761841, + "learning_rate": 0.00016257969243915378, + "loss": 2.6151, + "step": 5767 + }, + { + "epoch": 0.46549915261076585, + "grad_norm": 0.7131790518760681, + "learning_rate": 0.00016256737809964831, + "loss": 2.6284, + "step": 5768 + }, + { + "epoch": 0.4655798563473489, + "grad_norm": 0.674196183681488, + "learning_rate": 0.00016255506220080025, + "loss": 2.5815, + "step": 5769 + }, + { + "epoch": 0.46566056008393186, + "grad_norm": 0.7166198492050171, + "learning_rate": 0.0001625427447429165, + "loss": 2.6594, + "step": 5770 + }, + { + "epoch": 0.4657412638205149, + "grad_norm": 0.6997127532958984, + "learning_rate": 0.00016253042572630407, + "loss": 2.6502, + "step": 5771 + }, + { + "epoch": 0.46582196755709787, + "grad_norm": 0.7761591076850891, + "learning_rate": 0.00016251810515126994, + "loss": 2.624, + "step": 5772 + }, + { + "epoch": 0.4659026712936809, + "grad_norm": 0.7038728594779968, + "learning_rate": 0.00016250578301812125, + "loss": 2.6096, + "step": 5773 + }, + { + "epoch": 0.4659833750302639, + "grad_norm": 0.7080080509185791, + "learning_rate": 0.00016249345932716505, + "loss": 2.6196, + "step": 5774 + }, + { + "epoch": 0.4660640787668469, + "grad_norm": 0.7461444735527039, + "learning_rate": 0.00016248113407870847, + "loss": 2.65, + "step": 5775 + }, + { + "epoch": 0.4661447825034299, + "grad_norm": 0.7914463877677917, + "learning_rate": 0.00016246880727305868, + "loss": 2.6539, + "step": 5776 + }, + { + "epoch": 0.4662254862400129, + "grad_norm": 0.7067776918411255, + "learning_rate": 0.00016245647891052295, + "loss": 2.72, + "step": 5777 + }, + { + "epoch": 0.4663061899765959, + "grad_norm": 0.7190818190574646, + "learning_rate": 0.00016244414899140852, + "loss": 2.7029, + "step": 5778 + }, + { + "epoch": 0.46638689371317893, + "grad_norm": 0.6740003824234009, + "learning_rate": 0.00016243181751602261, + "loss": 2.6404, + "step": 5779 + }, + { + "epoch": 0.4664675974497619, + "grad_norm": 0.7942661643028259, + "learning_rate": 0.00016241948448467267, + "loss": 2.6333, + "step": 5780 + }, + { + "epoch": 0.46654830118634494, + "grad_norm": 0.6415690183639526, + "learning_rate": 0.00016240714989766597, + "loss": 2.6354, + "step": 5781 + }, + { + "epoch": 0.4666290049229279, + "grad_norm": 0.7287769913673401, + "learning_rate": 0.00016239481375530997, + "loss": 2.6721, + "step": 5782 + }, + { + "epoch": 0.46670970865951095, + "grad_norm": 0.8197699189186096, + "learning_rate": 0.00016238247605791212, + "loss": 2.7577, + "step": 5783 + }, + { + "epoch": 0.4667904123960939, + "grad_norm": 0.8182012438774109, + "learning_rate": 0.0001623701368057799, + "loss": 2.6475, + "step": 5784 + }, + { + "epoch": 0.46687111613267696, + "grad_norm": 0.6974665522575378, + "learning_rate": 0.00016235779599922082, + "loss": 2.5897, + "step": 5785 + }, + { + "epoch": 0.46695181986925993, + "grad_norm": 0.7156379222869873, + "learning_rate": 0.00016234545363854247, + "loss": 2.5981, + "step": 5786 + }, + { + "epoch": 0.46703252360584296, + "grad_norm": 0.6875364780426025, + "learning_rate": 0.0001623331097240524, + "loss": 2.6333, + "step": 5787 + }, + { + "epoch": 0.46711322734242594, + "grad_norm": 0.7222917675971985, + "learning_rate": 0.00016232076425605835, + "loss": 2.5865, + "step": 5788 + }, + { + "epoch": 0.467193931079009, + "grad_norm": 0.7224915027618408, + "learning_rate": 0.00016230841723486792, + "loss": 2.667, + "step": 5789 + }, + { + "epoch": 0.46727463481559195, + "grad_norm": 0.7125402688980103, + "learning_rate": 0.00016229606866078887, + "loss": 2.6548, + "step": 5790 + }, + { + "epoch": 0.467355338552175, + "grad_norm": 0.6866132616996765, + "learning_rate": 0.00016228371853412894, + "loss": 2.6381, + "step": 5791 + }, + { + "epoch": 0.46743604228875796, + "grad_norm": 0.7573552131652832, + "learning_rate": 0.00016227136685519593, + "loss": 2.6766, + "step": 5792 + }, + { + "epoch": 0.467516746025341, + "grad_norm": 0.7565932273864746, + "learning_rate": 0.00016225901362429767, + "loss": 2.5965, + "step": 5793 + }, + { + "epoch": 0.46759744976192397, + "grad_norm": 0.7279250621795654, + "learning_rate": 0.00016224665884174207, + "loss": 2.6599, + "step": 5794 + }, + { + "epoch": 0.467678153498507, + "grad_norm": 0.7501276731491089, + "learning_rate": 0.000162234302507837, + "loss": 2.636, + "step": 5795 + }, + { + "epoch": 0.46775885723509, + "grad_norm": 0.7823930978775024, + "learning_rate": 0.00016222194462289042, + "loss": 2.6277, + "step": 5796 + }, + { + "epoch": 0.467839560971673, + "grad_norm": 0.7168415784835815, + "learning_rate": 0.00016220958518721034, + "loss": 2.6868, + "step": 5797 + }, + { + "epoch": 0.467920264708256, + "grad_norm": 0.7468454241752625, + "learning_rate": 0.00016219722420110478, + "loss": 2.7209, + "step": 5798 + }, + { + "epoch": 0.468000968444839, + "grad_norm": 0.6915228962898254, + "learning_rate": 0.0001621848616648818, + "loss": 2.6356, + "step": 5799 + }, + { + "epoch": 0.468081672181422, + "grad_norm": 0.7731573581695557, + "learning_rate": 0.00016217249757884955, + "loss": 2.6396, + "step": 5800 + }, + { + "epoch": 0.468162375918005, + "grad_norm": 0.6579388380050659, + "learning_rate": 0.0001621601319433161, + "loss": 2.6077, + "step": 5801 + }, + { + "epoch": 0.468243079654588, + "grad_norm": 0.7136246562004089, + "learning_rate": 0.00016214776475858967, + "loss": 2.6602, + "step": 5802 + }, + { + "epoch": 0.46832378339117103, + "grad_norm": 0.6929461359977722, + "learning_rate": 0.0001621353960249785, + "loss": 2.6851, + "step": 5803 + }, + { + "epoch": 0.468404487127754, + "grad_norm": 0.8001779913902283, + "learning_rate": 0.00016212302574279087, + "loss": 2.6577, + "step": 5804 + }, + { + "epoch": 0.46848519086433704, + "grad_norm": 0.7637671828269958, + "learning_rate": 0.00016211065391233498, + "loss": 2.6923, + "step": 5805 + }, + { + "epoch": 0.46856589460092, + "grad_norm": 0.6879906058311462, + "learning_rate": 0.0001620982805339193, + "loss": 2.6555, + "step": 5806 + }, + { + "epoch": 0.46864659833750305, + "grad_norm": 0.7731223702430725, + "learning_rate": 0.0001620859056078521, + "loss": 2.6301, + "step": 5807 + }, + { + "epoch": 0.468727302074086, + "grad_norm": 0.7351491451263428, + "learning_rate": 0.00016207352913444185, + "loss": 2.6154, + "step": 5808 + }, + { + "epoch": 0.46880800581066906, + "grad_norm": 0.716314435005188, + "learning_rate": 0.000162061151113997, + "loss": 2.6294, + "step": 5809 + }, + { + "epoch": 0.46888870954725204, + "grad_norm": 0.6974702477455139, + "learning_rate": 0.00016204877154682605, + "loss": 2.6046, + "step": 5810 + }, + { + "epoch": 0.46896941328383507, + "grad_norm": 0.7456035614013672, + "learning_rate": 0.00016203639043323745, + "loss": 2.6308, + "step": 5811 + }, + { + "epoch": 0.46905011702041804, + "grad_norm": 0.7198047637939453, + "learning_rate": 0.0001620240077735399, + "loss": 2.6303, + "step": 5812 + }, + { + "epoch": 0.4691308207570011, + "grad_norm": 0.7098269462585449, + "learning_rate": 0.00016201162356804192, + "loss": 2.6352, + "step": 5813 + }, + { + "epoch": 0.46921152449358405, + "grad_norm": 0.7060410976409912, + "learning_rate": 0.0001619992378170522, + "loss": 2.6489, + "step": 5814 + }, + { + "epoch": 0.46929222823016703, + "grad_norm": 0.7126092314720154, + "learning_rate": 0.0001619868505208794, + "loss": 2.66, + "step": 5815 + }, + { + "epoch": 0.46937293196675006, + "grad_norm": 0.7391123175621033, + "learning_rate": 0.00016197446167983223, + "loss": 2.6066, + "step": 5816 + }, + { + "epoch": 0.46945363570333304, + "grad_norm": 0.7282211780548096, + "learning_rate": 0.0001619620712942195, + "loss": 2.6422, + "step": 5817 + }, + { + "epoch": 0.46953433943991607, + "grad_norm": 0.7581801414489746, + "learning_rate": 0.00016194967936434998, + "loss": 2.702, + "step": 5818 + }, + { + "epoch": 0.46961504317649905, + "grad_norm": 0.6649011373519897, + "learning_rate": 0.00016193728589053248, + "loss": 2.6235, + "step": 5819 + }, + { + "epoch": 0.4696957469130821, + "grad_norm": 0.720312237739563, + "learning_rate": 0.00016192489087307592, + "loss": 2.5961, + "step": 5820 + }, + { + "epoch": 0.46977645064966506, + "grad_norm": 0.72076016664505, + "learning_rate": 0.0001619124943122892, + "loss": 2.6793, + "step": 5821 + }, + { + "epoch": 0.4698571543862481, + "grad_norm": 0.6695740818977356, + "learning_rate": 0.0001619000962084813, + "loss": 2.6325, + "step": 5822 + }, + { + "epoch": 0.46993785812283106, + "grad_norm": 0.7678804993629456, + "learning_rate": 0.0001618876965619612, + "loss": 2.7473, + "step": 5823 + }, + { + "epoch": 0.4700185618594141, + "grad_norm": 0.782349169254303, + "learning_rate": 0.00016187529537303792, + "loss": 2.6139, + "step": 5824 + }, + { + "epoch": 0.4700992655959971, + "grad_norm": 0.6906631588935852, + "learning_rate": 0.00016186289264202052, + "loss": 2.6529, + "step": 5825 + }, + { + "epoch": 0.4701799693325801, + "grad_norm": 0.732947051525116, + "learning_rate": 0.00016185048836921814, + "loss": 2.6416, + "step": 5826 + }, + { + "epoch": 0.4702606730691631, + "grad_norm": 0.8306718468666077, + "learning_rate": 0.0001618380825549399, + "loss": 2.6566, + "step": 5827 + }, + { + "epoch": 0.4703413768057461, + "grad_norm": 0.725764811038971, + "learning_rate": 0.00016182567519949502, + "loss": 2.6664, + "step": 5828 + }, + { + "epoch": 0.4704220805423291, + "grad_norm": 0.7301872372627258, + "learning_rate": 0.00016181326630319268, + "loss": 2.6666, + "step": 5829 + }, + { + "epoch": 0.4705027842789121, + "grad_norm": 0.7297122478485107, + "learning_rate": 0.00016180085586634216, + "loss": 2.6415, + "step": 5830 + }, + { + "epoch": 0.4705834880154951, + "grad_norm": 0.7445664405822754, + "learning_rate": 0.00016178844388925278, + "loss": 2.6112, + "step": 5831 + }, + { + "epoch": 0.47066419175207813, + "grad_norm": 0.7787267565727234, + "learning_rate": 0.00016177603037223384, + "loss": 2.6452, + "step": 5832 + }, + { + "epoch": 0.4707448954886611, + "grad_norm": 0.7386903762817383, + "learning_rate": 0.00016176361531559474, + "loss": 2.6919, + "step": 5833 + }, + { + "epoch": 0.47082559922524414, + "grad_norm": 0.7991776466369629, + "learning_rate": 0.0001617511987196449, + "loss": 2.6728, + "step": 5834 + }, + { + "epoch": 0.4709063029618271, + "grad_norm": 0.7196263670921326, + "learning_rate": 0.00016173878058469375, + "loss": 2.6008, + "step": 5835 + }, + { + "epoch": 0.47098700669841015, + "grad_norm": 0.6773477792739868, + "learning_rate": 0.00016172636091105086, + "loss": 2.6184, + "step": 5836 + }, + { + "epoch": 0.4710677104349931, + "grad_norm": 0.7238345742225647, + "learning_rate": 0.00016171393969902567, + "loss": 2.6221, + "step": 5837 + }, + { + "epoch": 0.47114841417157616, + "grad_norm": 0.702104926109314, + "learning_rate": 0.00016170151694892777, + "loss": 2.5909, + "step": 5838 + }, + { + "epoch": 0.47122911790815913, + "grad_norm": 0.7571590542793274, + "learning_rate": 0.00016168909266106677, + "loss": 2.6044, + "step": 5839 + }, + { + "epoch": 0.47130982164474217, + "grad_norm": 0.7408227324485779, + "learning_rate": 0.00016167666683575234, + "loss": 2.5771, + "step": 5840 + }, + { + "epoch": 0.47139052538132514, + "grad_norm": 0.6760764122009277, + "learning_rate": 0.00016166423947329414, + "loss": 2.6202, + "step": 5841 + }, + { + "epoch": 0.4714712291179082, + "grad_norm": 0.7085632681846619, + "learning_rate": 0.00016165181057400192, + "loss": 2.5887, + "step": 5842 + }, + { + "epoch": 0.47155193285449115, + "grad_norm": 0.7298943400382996, + "learning_rate": 0.00016163938013818538, + "loss": 2.609, + "step": 5843 + }, + { + "epoch": 0.4716326365910742, + "grad_norm": 0.7591157555580139, + "learning_rate": 0.0001616269481661544, + "loss": 2.6582, + "step": 5844 + }, + { + "epoch": 0.47171334032765716, + "grad_norm": 0.6727088093757629, + "learning_rate": 0.00016161451465821877, + "loss": 2.6289, + "step": 5845 + }, + { + "epoch": 0.4717940440642402, + "grad_norm": 0.6782706379890442, + "learning_rate": 0.00016160207961468835, + "loss": 2.6875, + "step": 5846 + }, + { + "epoch": 0.47187474780082317, + "grad_norm": 0.6839444041252136, + "learning_rate": 0.00016158964303587313, + "loss": 2.5687, + "step": 5847 + }, + { + "epoch": 0.4719554515374062, + "grad_norm": 0.7565997838973999, + "learning_rate": 0.00016157720492208295, + "loss": 2.6855, + "step": 5848 + }, + { + "epoch": 0.4720361552739892, + "grad_norm": 0.7286611199378967, + "learning_rate": 0.0001615647652736279, + "loss": 2.5906, + "step": 5849 + }, + { + "epoch": 0.4721168590105722, + "grad_norm": 0.7503396272659302, + "learning_rate": 0.00016155232409081793, + "loss": 2.6419, + "step": 5850 + }, + { + "epoch": 0.4721975627471552, + "grad_norm": 0.6924198865890503, + "learning_rate": 0.00016153988137396317, + "loss": 2.661, + "step": 5851 + }, + { + "epoch": 0.4722782664837382, + "grad_norm": 0.7731672525405884, + "learning_rate": 0.0001615274371233737, + "loss": 2.6993, + "step": 5852 + }, + { + "epoch": 0.4723589702203212, + "grad_norm": 0.7422799468040466, + "learning_rate": 0.00016151499133935964, + "loss": 2.6134, + "step": 5853 + }, + { + "epoch": 0.4724396739569042, + "grad_norm": 0.6924546957015991, + "learning_rate": 0.0001615025440222312, + "loss": 2.672, + "step": 5854 + }, + { + "epoch": 0.4725203776934872, + "grad_norm": 0.7205976843833923, + "learning_rate": 0.00016149009517229862, + "loss": 2.6722, + "step": 5855 + }, + { + "epoch": 0.47260108143007024, + "grad_norm": 0.6898519992828369, + "learning_rate": 0.0001614776447898721, + "loss": 2.6474, + "step": 5856 + }, + { + "epoch": 0.4726817851666532, + "grad_norm": 0.7512481212615967, + "learning_rate": 0.00016146519287526197, + "loss": 2.7413, + "step": 5857 + }, + { + "epoch": 0.47276248890323624, + "grad_norm": 0.6734220385551453, + "learning_rate": 0.0001614527394287786, + "loss": 2.6114, + "step": 5858 + }, + { + "epoch": 0.4728431926398192, + "grad_norm": 0.6745339632034302, + "learning_rate": 0.00016144028445073228, + "loss": 2.6039, + "step": 5859 + }, + { + "epoch": 0.47292389637640225, + "grad_norm": 0.7463086843490601, + "learning_rate": 0.0001614278279414335, + "loss": 2.6109, + "step": 5860 + }, + { + "epoch": 0.47300460011298523, + "grad_norm": 0.7203261256217957, + "learning_rate": 0.00016141536990119264, + "loss": 2.651, + "step": 5861 + }, + { + "epoch": 0.47308530384956826, + "grad_norm": 0.7718746066093445, + "learning_rate": 0.00016140291033032024, + "loss": 2.6953, + "step": 5862 + }, + { + "epoch": 0.47316600758615124, + "grad_norm": 0.7854858040809631, + "learning_rate": 0.0001613904492291268, + "loss": 2.5941, + "step": 5863 + }, + { + "epoch": 0.47324671132273427, + "grad_norm": 0.7218664288520813, + "learning_rate": 0.0001613779865979229, + "loss": 2.6447, + "step": 5864 + }, + { + "epoch": 0.47332741505931725, + "grad_norm": 0.7479045987129211, + "learning_rate": 0.0001613655224370191, + "loss": 2.6662, + "step": 5865 + }, + { + "epoch": 0.4734081187959002, + "grad_norm": 0.7335021495819092, + "learning_rate": 0.00016135305674672612, + "loss": 2.6283, + "step": 5866 + }, + { + "epoch": 0.47348882253248326, + "grad_norm": 0.7650331258773804, + "learning_rate": 0.00016134058952735453, + "loss": 2.7168, + "step": 5867 + }, + { + "epoch": 0.47356952626906623, + "grad_norm": 0.733383297920227, + "learning_rate": 0.00016132812077921513, + "loss": 2.6352, + "step": 5868 + }, + { + "epoch": 0.47365023000564926, + "grad_norm": 1.3944146633148193, + "learning_rate": 0.00016131565050261866, + "loss": 2.7518, + "step": 5869 + }, + { + "epoch": 0.47373093374223224, + "grad_norm": 0.746112585067749, + "learning_rate": 0.0001613031786978759, + "loss": 2.6253, + "step": 5870 + }, + { + "epoch": 0.4738116374788153, + "grad_norm": 0.9859737753868103, + "learning_rate": 0.00016129070536529766, + "loss": 2.6682, + "step": 5871 + }, + { + "epoch": 0.47389234121539825, + "grad_norm": 0.7358877062797546, + "learning_rate": 0.00016127823050519484, + "loss": 2.6712, + "step": 5872 + }, + { + "epoch": 0.4739730449519813, + "grad_norm": 0.7379923462867737, + "learning_rate": 0.0001612657541178783, + "loss": 2.6268, + "step": 5873 + }, + { + "epoch": 0.47405374868856426, + "grad_norm": 0.7671005725860596, + "learning_rate": 0.00016125327620365907, + "loss": 2.6127, + "step": 5874 + }, + { + "epoch": 0.4741344524251473, + "grad_norm": 0.8007156252861023, + "learning_rate": 0.00016124079676284805, + "loss": 2.6173, + "step": 5875 + }, + { + "epoch": 0.47421515616173027, + "grad_norm": 0.7930500507354736, + "learning_rate": 0.00016122831579575627, + "loss": 2.589, + "step": 5876 + }, + { + "epoch": 0.4742958598983133, + "grad_norm": 0.788006603717804, + "learning_rate": 0.00016121583330269484, + "loss": 2.6731, + "step": 5877 + }, + { + "epoch": 0.4743765636348963, + "grad_norm": 0.742148220539093, + "learning_rate": 0.00016120334928397483, + "loss": 2.674, + "step": 5878 + }, + { + "epoch": 0.4744572673714793, + "grad_norm": 0.6823038458824158, + "learning_rate": 0.00016119086373990736, + "loss": 2.6153, + "step": 5879 + }, + { + "epoch": 0.4745379711080623, + "grad_norm": 0.7542331218719482, + "learning_rate": 0.00016117837667080356, + "loss": 2.6739, + "step": 5880 + }, + { + "epoch": 0.4746186748446453, + "grad_norm": 0.8163543343544006, + "learning_rate": 0.00016116588807697476, + "loss": 2.6558, + "step": 5881 + }, + { + "epoch": 0.4746993785812283, + "grad_norm": 0.7528213858604431, + "learning_rate": 0.0001611533979587321, + "loss": 2.6243, + "step": 5882 + }, + { + "epoch": 0.4747800823178113, + "grad_norm": 0.7476626038551331, + "learning_rate": 0.00016114090631638695, + "loss": 2.5984, + "step": 5883 + }, + { + "epoch": 0.4748607860543943, + "grad_norm": 0.7436621785163879, + "learning_rate": 0.00016112841315025055, + "loss": 2.6118, + "step": 5884 + }, + { + "epoch": 0.47494148979097733, + "grad_norm": 0.8024004101753235, + "learning_rate": 0.0001611159184606343, + "loss": 2.6926, + "step": 5885 + }, + { + "epoch": 0.4750221935275603, + "grad_norm": 0.7475626468658447, + "learning_rate": 0.00016110342224784962, + "loss": 2.6175, + "step": 5886 + }, + { + "epoch": 0.47510289726414334, + "grad_norm": 0.7900637984275818, + "learning_rate": 0.00016109092451220796, + "loss": 2.6503, + "step": 5887 + }, + { + "epoch": 0.4751836010007263, + "grad_norm": 0.6988356113433838, + "learning_rate": 0.00016107842525402074, + "loss": 2.6494, + "step": 5888 + }, + { + "epoch": 0.47526430473730935, + "grad_norm": 1.0214186906814575, + "learning_rate": 0.00016106592447359948, + "loss": 2.6476, + "step": 5889 + }, + { + "epoch": 0.4753450084738923, + "grad_norm": 0.741527795791626, + "learning_rate": 0.00016105342217125578, + "loss": 2.6054, + "step": 5890 + }, + { + "epoch": 0.47542571221047536, + "grad_norm": 0.7196603417396545, + "learning_rate": 0.0001610409183473012, + "loss": 2.6146, + "step": 5891 + }, + { + "epoch": 0.47550641594705834, + "grad_norm": 0.8130923509597778, + "learning_rate": 0.00016102841300204737, + "loss": 2.6505, + "step": 5892 + }, + { + "epoch": 0.47558711968364137, + "grad_norm": 0.7929537892341614, + "learning_rate": 0.00016101590613580596, + "loss": 2.6725, + "step": 5893 + }, + { + "epoch": 0.47566782342022434, + "grad_norm": 0.7149303555488586, + "learning_rate": 0.00016100339774888865, + "loss": 2.6272, + "step": 5894 + }, + { + "epoch": 0.4757485271568074, + "grad_norm": 0.7242792248725891, + "learning_rate": 0.00016099088784160724, + "loss": 2.5948, + "step": 5895 + }, + { + "epoch": 0.47582923089339035, + "grad_norm": 0.7571540474891663, + "learning_rate": 0.00016097837641427346, + "loss": 2.689, + "step": 5896 + }, + { + "epoch": 0.4759099346299734, + "grad_norm": 0.7402021288871765, + "learning_rate": 0.00016096586346719916, + "loss": 2.7035, + "step": 5897 + }, + { + "epoch": 0.47599063836655636, + "grad_norm": 0.7195574045181274, + "learning_rate": 0.00016095334900069613, + "loss": 2.5862, + "step": 5898 + }, + { + "epoch": 0.4760713421031394, + "grad_norm": 0.7677412033081055, + "learning_rate": 0.00016094083301507634, + "loss": 2.6715, + "step": 5899 + }, + { + "epoch": 0.47615204583972237, + "grad_norm": 0.7131708860397339, + "learning_rate": 0.0001609283155106517, + "loss": 2.6555, + "step": 5900 + }, + { + "epoch": 0.4762327495763054, + "grad_norm": 0.6774055361747742, + "learning_rate": 0.00016091579648773414, + "loss": 2.621, + "step": 5901 + }, + { + "epoch": 0.4763134533128884, + "grad_norm": 0.6873257160186768, + "learning_rate": 0.00016090327594663571, + "loss": 2.6719, + "step": 5902 + }, + { + "epoch": 0.4763941570494714, + "grad_norm": 0.8004229068756104, + "learning_rate": 0.00016089075388766845, + "loss": 2.6926, + "step": 5903 + }, + { + "epoch": 0.4764748607860544, + "grad_norm": 0.7196173667907715, + "learning_rate": 0.00016087823031114438, + "loss": 2.6032, + "step": 5904 + }, + { + "epoch": 0.4765555645226374, + "grad_norm": 0.7665518522262573, + "learning_rate": 0.00016086570521737573, + "loss": 2.6359, + "step": 5905 + }, + { + "epoch": 0.4766362682592204, + "grad_norm": 0.7240240573883057, + "learning_rate": 0.0001608531786066746, + "loss": 2.6489, + "step": 5906 + }, + { + "epoch": 0.47671697199580343, + "grad_norm": 0.7603839039802551, + "learning_rate": 0.00016084065047935317, + "loss": 2.6064, + "step": 5907 + }, + { + "epoch": 0.4767976757323864, + "grad_norm": 0.7394058704376221, + "learning_rate": 0.0001608281208357237, + "loss": 2.6643, + "step": 5908 + }, + { + "epoch": 0.47687837946896944, + "grad_norm": 0.7183148860931396, + "learning_rate": 0.00016081558967609845, + "loss": 2.56, + "step": 5909 + }, + { + "epoch": 0.4769590832055524, + "grad_norm": 0.7181926965713501, + "learning_rate": 0.00016080305700078972, + "loss": 2.6665, + "step": 5910 + }, + { + "epoch": 0.47703978694213545, + "grad_norm": 0.7634081840515137, + "learning_rate": 0.00016079052281010988, + "loss": 2.7076, + "step": 5911 + }, + { + "epoch": 0.4771204906787184, + "grad_norm": 0.7928739190101624, + "learning_rate": 0.0001607779871043713, + "loss": 2.6512, + "step": 5912 + }, + { + "epoch": 0.47720119441530146, + "grad_norm": 0.7192893028259277, + "learning_rate": 0.00016076544988388643, + "loss": 2.6453, + "step": 5913 + }, + { + "epoch": 0.47728189815188443, + "grad_norm": 0.7171720862388611, + "learning_rate": 0.00016075291114896767, + "loss": 2.6501, + "step": 5914 + }, + { + "epoch": 0.47736260188846746, + "grad_norm": 0.6787160038948059, + "learning_rate": 0.00016074037089992756, + "loss": 2.6566, + "step": 5915 + }, + { + "epoch": 0.47744330562505044, + "grad_norm": 0.8118634819984436, + "learning_rate": 0.00016072782913707868, + "loss": 2.6635, + "step": 5916 + }, + { + "epoch": 0.4775240093616334, + "grad_norm": 0.7188509702682495, + "learning_rate": 0.0001607152858607335, + "loss": 2.6899, + "step": 5917 + }, + { + "epoch": 0.47760471309821645, + "grad_norm": 0.6742647290229797, + "learning_rate": 0.00016070274107120468, + "loss": 2.6221, + "step": 5918 + }, + { + "epoch": 0.4776854168347994, + "grad_norm": 0.7274083495140076, + "learning_rate": 0.00016069019476880488, + "loss": 2.6588, + "step": 5919 + }, + { + "epoch": 0.47776612057138246, + "grad_norm": 0.6984386444091797, + "learning_rate": 0.00016067764695384682, + "loss": 2.6376, + "step": 5920 + }, + { + "epoch": 0.47784682430796543, + "grad_norm": 0.7260883450508118, + "learning_rate": 0.00016066509762664315, + "loss": 2.6623, + "step": 5921 + }, + { + "epoch": 0.47792752804454847, + "grad_norm": 0.7540579438209534, + "learning_rate": 0.00016065254678750666, + "loss": 2.695, + "step": 5922 + }, + { + "epoch": 0.47800823178113144, + "grad_norm": 0.7032651305198669, + "learning_rate": 0.00016063999443675017, + "loss": 2.6791, + "step": 5923 + }, + { + "epoch": 0.4780889355177145, + "grad_norm": 0.682842493057251, + "learning_rate": 0.0001606274405746865, + "loss": 2.6198, + "step": 5924 + }, + { + "epoch": 0.47816963925429745, + "grad_norm": 0.6843859553337097, + "learning_rate": 0.00016061488520162853, + "loss": 2.6432, + "step": 5925 + }, + { + "epoch": 0.4782503429908805, + "grad_norm": 0.652119517326355, + "learning_rate": 0.00016060232831788918, + "loss": 2.6461, + "step": 5926 + }, + { + "epoch": 0.47833104672746346, + "grad_norm": 0.6986887454986572, + "learning_rate": 0.0001605897699237814, + "loss": 2.5885, + "step": 5927 + }, + { + "epoch": 0.4784117504640465, + "grad_norm": 0.7156725525856018, + "learning_rate": 0.00016057721001961817, + "loss": 2.6526, + "step": 5928 + }, + { + "epoch": 0.47849245420062947, + "grad_norm": 0.7367579936981201, + "learning_rate": 0.0001605646486057125, + "loss": 2.5842, + "step": 5929 + }, + { + "epoch": 0.4785731579372125, + "grad_norm": 0.7059770822525024, + "learning_rate": 0.00016055208568237746, + "loss": 2.617, + "step": 5930 + }, + { + "epoch": 0.4786538616737955, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016053952124992619, + "loss": 2.6499, + "step": 5931 + }, + { + "epoch": 0.4787345654103785, + "grad_norm": 0.7027475237846375, + "learning_rate": 0.00016052695530867177, + "loss": 2.5934, + "step": 5932 + }, + { + "epoch": 0.4788152691469615, + "grad_norm": 0.7031852602958679, + "learning_rate": 0.00016051438785892743, + "loss": 2.5947, + "step": 5933 + }, + { + "epoch": 0.4788959728835445, + "grad_norm": 0.6731768846511841, + "learning_rate": 0.00016050181890100635, + "loss": 2.6811, + "step": 5934 + }, + { + "epoch": 0.4789766766201275, + "grad_norm": 0.7120038866996765, + "learning_rate": 0.0001604892484352218, + "loss": 2.6625, + "step": 5935 + }, + { + "epoch": 0.4790573803567105, + "grad_norm": 0.6895150542259216, + "learning_rate": 0.00016047667646188702, + "loss": 2.6784, + "step": 5936 + }, + { + "epoch": 0.4791380840932935, + "grad_norm": 0.7080708742141724, + "learning_rate": 0.0001604641029813154, + "loss": 2.6491, + "step": 5937 + }, + { + "epoch": 0.47921878782987654, + "grad_norm": 0.6522819399833679, + "learning_rate": 0.00016045152799382025, + "loss": 2.6113, + "step": 5938 + }, + { + "epoch": 0.4792994915664595, + "grad_norm": 0.6988112926483154, + "learning_rate": 0.00016043895149971506, + "loss": 2.6892, + "step": 5939 + }, + { + "epoch": 0.47938019530304254, + "grad_norm": 0.7545368671417236, + "learning_rate": 0.00016042637349931318, + "loss": 2.6872, + "step": 5940 + }, + { + "epoch": 0.4794608990396255, + "grad_norm": 0.7083707451820374, + "learning_rate": 0.0001604137939929281, + "loss": 2.6726, + "step": 5941 + }, + { + "epoch": 0.47954160277620855, + "grad_norm": 0.8198027014732361, + "learning_rate": 0.00016040121298087337, + "loss": 2.647, + "step": 5942 + }, + { + "epoch": 0.47962230651279153, + "grad_norm": 0.7296201586723328, + "learning_rate": 0.00016038863046346252, + "loss": 2.7122, + "step": 5943 + }, + { + "epoch": 0.47970301024937456, + "grad_norm": 0.7262474298477173, + "learning_rate": 0.00016037604644100913, + "loss": 2.6903, + "step": 5944 + }, + { + "epoch": 0.47978371398595754, + "grad_norm": 0.8010182976722717, + "learning_rate": 0.00016036346091382686, + "loss": 2.6942, + "step": 5945 + }, + { + "epoch": 0.47986441772254057, + "grad_norm": 0.7227098345756531, + "learning_rate": 0.00016035087388222932, + "loss": 2.6661, + "step": 5946 + }, + { + "epoch": 0.47994512145912355, + "grad_norm": 0.7374662756919861, + "learning_rate": 0.00016033828534653028, + "loss": 2.6233, + "step": 5947 + }, + { + "epoch": 0.4800258251957066, + "grad_norm": 0.7139650583267212, + "learning_rate": 0.00016032569530704342, + "loss": 2.5859, + "step": 5948 + }, + { + "epoch": 0.48010652893228956, + "grad_norm": 0.7067660689353943, + "learning_rate": 0.00016031310376408254, + "loss": 2.6677, + "step": 5949 + }, + { + "epoch": 0.4801872326688726, + "grad_norm": 0.694715142250061, + "learning_rate": 0.00016030051071796146, + "loss": 2.6415, + "step": 5950 + }, + { + "epoch": 0.48026793640545556, + "grad_norm": 0.728918194770813, + "learning_rate": 0.00016028791616899403, + "loss": 2.6274, + "step": 5951 + }, + { + "epoch": 0.4803486401420386, + "grad_norm": 0.699846088886261, + "learning_rate": 0.00016027532011749412, + "loss": 2.6613, + "step": 5952 + }, + { + "epoch": 0.4804293438786216, + "grad_norm": 0.7177432179450989, + "learning_rate": 0.0001602627225637757, + "loss": 2.6107, + "step": 5953 + }, + { + "epoch": 0.4805100476152046, + "grad_norm": 0.7502370476722717, + "learning_rate": 0.00016025012350815267, + "loss": 2.6534, + "step": 5954 + }, + { + "epoch": 0.4805907513517876, + "grad_norm": 0.7730218172073364, + "learning_rate": 0.0001602375229509391, + "loss": 2.7037, + "step": 5955 + }, + { + "epoch": 0.4806714550883706, + "grad_norm": 0.7046666145324707, + "learning_rate": 0.00016022492089244898, + "loss": 2.6336, + "step": 5956 + }, + { + "epoch": 0.4807521588249536, + "grad_norm": 0.7991104125976562, + "learning_rate": 0.0001602123173329964, + "loss": 2.7024, + "step": 5957 + }, + { + "epoch": 0.4808328625615366, + "grad_norm": 0.7056288123130798, + "learning_rate": 0.00016019971227289548, + "loss": 2.6088, + "step": 5958 + }, + { + "epoch": 0.4809135662981196, + "grad_norm": 0.7277925610542297, + "learning_rate": 0.00016018710571246038, + "loss": 2.6245, + "step": 5959 + }, + { + "epoch": 0.48099427003470263, + "grad_norm": 0.7545790672302246, + "learning_rate": 0.00016017449765200526, + "loss": 2.6076, + "step": 5960 + }, + { + "epoch": 0.4810749737712856, + "grad_norm": 0.7106321454048157, + "learning_rate": 0.00016016188809184434, + "loss": 2.5561, + "step": 5961 + }, + { + "epoch": 0.48115567750786864, + "grad_norm": 0.7464704513549805, + "learning_rate": 0.0001601492770322919, + "loss": 2.6336, + "step": 5962 + }, + { + "epoch": 0.4812363812444516, + "grad_norm": 0.7531768083572388, + "learning_rate": 0.00016013666447366228, + "loss": 2.6236, + "step": 5963 + }, + { + "epoch": 0.48131708498103465, + "grad_norm": 0.7412876486778259, + "learning_rate": 0.00016012405041626978, + "loss": 2.6309, + "step": 5964 + }, + { + "epoch": 0.4813977887176176, + "grad_norm": 0.7030940055847168, + "learning_rate": 0.00016011143486042878, + "loss": 2.6252, + "step": 5965 + }, + { + "epoch": 0.48147849245420066, + "grad_norm": 0.7932302951812744, + "learning_rate": 0.00016009881780645367, + "loss": 2.6797, + "step": 5966 + }, + { + "epoch": 0.48155919619078363, + "grad_norm": 0.7366262078285217, + "learning_rate": 0.00016008619925465893, + "loss": 2.6616, + "step": 5967 + }, + { + "epoch": 0.4816398999273666, + "grad_norm": 0.6938421130180359, + "learning_rate": 0.00016007357920535902, + "loss": 2.6888, + "step": 5968 + }, + { + "epoch": 0.48172060366394964, + "grad_norm": 0.7560005784034729, + "learning_rate": 0.00016006095765886853, + "loss": 2.6044, + "step": 5969 + }, + { + "epoch": 0.4818013074005326, + "grad_norm": 0.7330430150032043, + "learning_rate": 0.0001600483346155019, + "loss": 2.7023, + "step": 5970 + }, + { + "epoch": 0.48188201113711565, + "grad_norm": 0.7257955074310303, + "learning_rate": 0.00016003571007557388, + "loss": 2.6763, + "step": 5971 + }, + { + "epoch": 0.4819627148736986, + "grad_norm": 0.704187273979187, + "learning_rate": 0.000160023084039399, + "loss": 2.6229, + "step": 5972 + }, + { + "epoch": 0.48204341861028166, + "grad_norm": 0.7014813423156738, + "learning_rate": 0.00016001045650729196, + "loss": 2.6207, + "step": 5973 + }, + { + "epoch": 0.48212412234686464, + "grad_norm": 0.8039405941963196, + "learning_rate": 0.00015999782747956747, + "loss": 2.6198, + "step": 5974 + }, + { + "epoch": 0.48220482608344767, + "grad_norm": 0.7114945650100708, + "learning_rate": 0.0001599851969565403, + "loss": 2.6154, + "step": 5975 + }, + { + "epoch": 0.48228552982003065, + "grad_norm": 0.7603329420089722, + "learning_rate": 0.00015997256493852517, + "loss": 2.6217, + "step": 5976 + }, + { + "epoch": 0.4823662335566137, + "grad_norm": 0.7773346900939941, + "learning_rate": 0.000159959931425837, + "loss": 2.7054, + "step": 5977 + }, + { + "epoch": 0.48244693729319665, + "grad_norm": 0.8022029399871826, + "learning_rate": 0.0001599472964187906, + "loss": 2.6844, + "step": 5978 + }, + { + "epoch": 0.4825276410297797, + "grad_norm": 0.7384541630744934, + "learning_rate": 0.00015993465991770087, + "loss": 2.6516, + "step": 5979 + }, + { + "epoch": 0.48260834476636266, + "grad_norm": 0.6993509531021118, + "learning_rate": 0.00015992202192288273, + "loss": 2.6837, + "step": 5980 + }, + { + "epoch": 0.4826890485029457, + "grad_norm": 0.7430509328842163, + "learning_rate": 0.00015990938243465116, + "loss": 2.6717, + "step": 5981 + }, + { + "epoch": 0.48276975223952867, + "grad_norm": 0.7544847726821899, + "learning_rate": 0.0001598967414533212, + "loss": 2.6573, + "step": 5982 + }, + { + "epoch": 0.4828504559761117, + "grad_norm": 0.736955463886261, + "learning_rate": 0.00015988409897920786, + "loss": 2.6865, + "step": 5983 + }, + { + "epoch": 0.4829311597126947, + "grad_norm": 0.7771684527397156, + "learning_rate": 0.00015987145501262622, + "loss": 2.6173, + "step": 5984 + }, + { + "epoch": 0.4830118634492777, + "grad_norm": 0.7504391670227051, + "learning_rate": 0.00015985880955389143, + "loss": 2.6218, + "step": 5985 + }, + { + "epoch": 0.4830925671858607, + "grad_norm": 0.7025442123413086, + "learning_rate": 0.00015984616260331861, + "loss": 2.6107, + "step": 5986 + }, + { + "epoch": 0.4831732709224437, + "grad_norm": 0.6906485557556152, + "learning_rate": 0.000159833514161223, + "loss": 2.633, + "step": 5987 + }, + { + "epoch": 0.4832539746590267, + "grad_norm": 0.7771004438400269, + "learning_rate": 0.00015982086422791983, + "loss": 2.5956, + "step": 5988 + }, + { + "epoch": 0.48333467839560973, + "grad_norm": 0.6927372813224792, + "learning_rate": 0.00015980821280372432, + "loss": 2.5984, + "step": 5989 + }, + { + "epoch": 0.4834153821321927, + "grad_norm": 0.7196357846260071, + "learning_rate": 0.00015979555988895184, + "loss": 2.6386, + "step": 5990 + }, + { + "epoch": 0.48349608586877574, + "grad_norm": 0.7601087689399719, + "learning_rate": 0.0001597829054839177, + "loss": 2.6707, + "step": 5991 + }, + { + "epoch": 0.4835767896053587, + "grad_norm": 0.7783588767051697, + "learning_rate": 0.00015977024958893722, + "loss": 2.5815, + "step": 5992 + }, + { + "epoch": 0.48365749334194175, + "grad_norm": 0.7651833891868591, + "learning_rate": 0.00015975759220432592, + "loss": 2.6235, + "step": 5993 + }, + { + "epoch": 0.4837381970785247, + "grad_norm": 0.7158511877059937, + "learning_rate": 0.0001597449333303992, + "loss": 2.6813, + "step": 5994 + }, + { + "epoch": 0.48381890081510776, + "grad_norm": 0.7411341667175293, + "learning_rate": 0.0001597322729674726, + "loss": 2.7231, + "step": 5995 + }, + { + "epoch": 0.48389960455169073, + "grad_norm": 0.7168158292770386, + "learning_rate": 0.0001597196111158616, + "loss": 2.6408, + "step": 5996 + }, + { + "epoch": 0.48398030828827376, + "grad_norm": 0.7603393793106079, + "learning_rate": 0.00015970694777588175, + "loss": 2.7821, + "step": 5997 + }, + { + "epoch": 0.48406101202485674, + "grad_norm": 0.7298564910888672, + "learning_rate": 0.0001596942829478487, + "loss": 2.6828, + "step": 5998 + }, + { + "epoch": 0.4841417157614398, + "grad_norm": 0.7850572466850281, + "learning_rate": 0.0001596816166320781, + "loss": 2.6191, + "step": 5999 + }, + { + "epoch": 0.48422241949802275, + "grad_norm": 0.7697601914405823, + "learning_rate": 0.00015966894882888562, + "loss": 2.6768, + "step": 6000 + }, + { + "epoch": 0.48422241949802275, + "eval_loss": 2.5610127449035645, + "eval_runtime": 760.0481, + "eval_samples_per_second": 3.447, + "eval_steps_per_second": 0.575, + "step": 6000 + }, + { + "epoch": 0.4843031232346058, + "grad_norm": 0.7212432026863098, + "learning_rate": 0.00015965627953858693, + "loss": 2.5967, + "step": 6001 + }, + { + "epoch": 0.48438382697118876, + "grad_norm": 0.7629631757736206, + "learning_rate": 0.0001596436087614978, + "loss": 2.7005, + "step": 6002 + }, + { + "epoch": 0.4844645307077718, + "grad_norm": 0.7154754400253296, + "learning_rate": 0.00015963093649793404, + "loss": 2.6909, + "step": 6003 + }, + { + "epoch": 0.48454523444435477, + "grad_norm": 0.7365279793739319, + "learning_rate": 0.00015961826274821147, + "loss": 2.6268, + "step": 6004 + }, + { + "epoch": 0.4846259381809378, + "grad_norm": 0.8114632964134216, + "learning_rate": 0.00015960558751264596, + "loss": 2.6647, + "step": 6005 + }, + { + "epoch": 0.4847066419175208, + "grad_norm": 0.7411556243896484, + "learning_rate": 0.00015959291079155338, + "loss": 2.6378, + "step": 6006 + }, + { + "epoch": 0.4847873456541038, + "grad_norm": 0.7137390375137329, + "learning_rate": 0.00015958023258524968, + "loss": 2.6454, + "step": 6007 + }, + { + "epoch": 0.4848680493906868, + "grad_norm": 0.7477054595947266, + "learning_rate": 0.00015956755289405088, + "loss": 2.6463, + "step": 6008 + }, + { + "epoch": 0.4849487531272698, + "grad_norm": 0.7198071479797363, + "learning_rate": 0.0001595548717182729, + "loss": 2.6537, + "step": 6009 + }, + { + "epoch": 0.4850294568638528, + "grad_norm": 0.6697781085968018, + "learning_rate": 0.00015954218905823186, + "loss": 2.7018, + "step": 6010 + }, + { + "epoch": 0.4851101606004358, + "grad_norm": 0.7577201724052429, + "learning_rate": 0.00015952950491424382, + "loss": 2.6531, + "step": 6011 + }, + { + "epoch": 0.4851908643370188, + "grad_norm": 0.6852774024009705, + "learning_rate": 0.0001595168192866249, + "loss": 2.5819, + "step": 6012 + }, + { + "epoch": 0.48527156807360183, + "grad_norm": 0.7116097807884216, + "learning_rate": 0.0001595041321756913, + "loss": 2.5691, + "step": 6013 + }, + { + "epoch": 0.4853522718101848, + "grad_norm": 0.7478477954864502, + "learning_rate": 0.00015949144358175916, + "loss": 2.6658, + "step": 6014 + }, + { + "epoch": 0.48543297554676784, + "grad_norm": 0.816969633102417, + "learning_rate": 0.0001594787535051447, + "loss": 2.6709, + "step": 6015 + }, + { + "epoch": 0.4855136792833508, + "grad_norm": 0.6953164339065552, + "learning_rate": 0.00015946606194616427, + "loss": 2.6139, + "step": 6016 + }, + { + "epoch": 0.48559438301993385, + "grad_norm": 0.6698834300041199, + "learning_rate": 0.0001594533689051341, + "loss": 2.574, + "step": 6017 + }, + { + "epoch": 0.4856750867565168, + "grad_norm": 0.7686784267425537, + "learning_rate": 0.0001594406743823706, + "loss": 2.6271, + "step": 6018 + }, + { + "epoch": 0.4857557904930998, + "grad_norm": 0.7713280916213989, + "learning_rate": 0.00015942797837819009, + "loss": 2.6682, + "step": 6019 + }, + { + "epoch": 0.48583649422968284, + "grad_norm": 0.8102596998214722, + "learning_rate": 0.00015941528089290902, + "loss": 2.6771, + "step": 6020 + }, + { + "epoch": 0.4859171979662658, + "grad_norm": 0.7140331864356995, + "learning_rate": 0.00015940258192684382, + "loss": 2.6267, + "step": 6021 + }, + { + "epoch": 0.48599790170284884, + "grad_norm": 0.7057615518569946, + "learning_rate": 0.000159389881480311, + "loss": 2.6011, + "step": 6022 + }, + { + "epoch": 0.4860786054394318, + "grad_norm": 0.7106850147247314, + "learning_rate": 0.0001593771795536271, + "loss": 2.6681, + "step": 6023 + }, + { + "epoch": 0.48615930917601485, + "grad_norm": 0.7618210315704346, + "learning_rate": 0.00015936447614710867, + "loss": 2.6545, + "step": 6024 + }, + { + "epoch": 0.48624001291259783, + "grad_norm": 0.7577608227729797, + "learning_rate": 0.00015935177126107233, + "loss": 2.6479, + "step": 6025 + }, + { + "epoch": 0.48632071664918086, + "grad_norm": 0.758745551109314, + "learning_rate": 0.00015933906489583468, + "loss": 2.7057, + "step": 6026 + }, + { + "epoch": 0.48640142038576384, + "grad_norm": 0.785906970500946, + "learning_rate": 0.00015932635705171241, + "loss": 2.7081, + "step": 6027 + }, + { + "epoch": 0.48648212412234687, + "grad_norm": 0.6744558215141296, + "learning_rate": 0.00015931364772902228, + "loss": 2.6438, + "step": 6028 + }, + { + "epoch": 0.48656282785892985, + "grad_norm": 0.7451377511024475, + "learning_rate": 0.00015930093692808099, + "loss": 2.6509, + "step": 6029 + }, + { + "epoch": 0.4866435315955129, + "grad_norm": 0.6590149402618408, + "learning_rate": 0.0001592882246492053, + "loss": 2.5683, + "step": 6030 + }, + { + "epoch": 0.48672423533209586, + "grad_norm": 0.7433840036392212, + "learning_rate": 0.0001592755108927121, + "loss": 2.6647, + "step": 6031 + }, + { + "epoch": 0.4868049390686789, + "grad_norm": 0.876806378364563, + "learning_rate": 0.00015926279565891822, + "loss": 2.6482, + "step": 6032 + }, + { + "epoch": 0.48688564280526186, + "grad_norm": 0.7495005130767822, + "learning_rate": 0.00015925007894814058, + "loss": 2.6346, + "step": 6033 + }, + { + "epoch": 0.4869663465418449, + "grad_norm": 0.7005730271339417, + "learning_rate": 0.00015923736076069604, + "loss": 2.6241, + "step": 6034 + }, + { + "epoch": 0.4870470502784279, + "grad_norm": 0.664098858833313, + "learning_rate": 0.00015922464109690166, + "loss": 2.6281, + "step": 6035 + }, + { + "epoch": 0.4871277540150109, + "grad_norm": 0.7482514977455139, + "learning_rate": 0.00015921191995707442, + "loss": 2.5764, + "step": 6036 + }, + { + "epoch": 0.4872084577515939, + "grad_norm": 0.7450351715087891, + "learning_rate": 0.0001591991973415313, + "loss": 2.6433, + "step": 6037 + }, + { + "epoch": 0.4872891614881769, + "grad_norm": 0.6738519072532654, + "learning_rate": 0.00015918647325058948, + "loss": 2.6688, + "step": 6038 + }, + { + "epoch": 0.4873698652247599, + "grad_norm": 0.7999960780143738, + "learning_rate": 0.000159173747684566, + "loss": 2.6309, + "step": 6039 + }, + { + "epoch": 0.4874505689613429, + "grad_norm": 0.7249687910079956, + "learning_rate": 0.00015916102064377806, + "loss": 2.5808, + "step": 6040 + }, + { + "epoch": 0.4875312726979259, + "grad_norm": 0.7014601826667786, + "learning_rate": 0.00015914829212854286, + "loss": 2.6646, + "step": 6041 + }, + { + "epoch": 0.48761197643450893, + "grad_norm": 0.7091174721717834, + "learning_rate": 0.00015913556213917757, + "loss": 2.6576, + "step": 6042 + }, + { + "epoch": 0.4876926801710919, + "grad_norm": 0.6949019432067871, + "learning_rate": 0.00015912283067599952, + "loss": 2.5883, + "step": 6043 + }, + { + "epoch": 0.48777338390767494, + "grad_norm": 0.6990448236465454, + "learning_rate": 0.00015911009773932598, + "loss": 2.6413, + "step": 6044 + }, + { + "epoch": 0.4878540876442579, + "grad_norm": 0.7106831073760986, + "learning_rate": 0.00015909736332947425, + "loss": 2.6122, + "step": 6045 + }, + { + "epoch": 0.48793479138084095, + "grad_norm": 0.7052395343780518, + "learning_rate": 0.00015908462744676177, + "loss": 2.572, + "step": 6046 + }, + { + "epoch": 0.4880154951174239, + "grad_norm": 0.7250158190727234, + "learning_rate": 0.00015907189009150592, + "loss": 2.6582, + "step": 6047 + }, + { + "epoch": 0.48809619885400696, + "grad_norm": 0.7213590145111084, + "learning_rate": 0.00015905915126402414, + "loss": 2.7025, + "step": 6048 + }, + { + "epoch": 0.48817690259058993, + "grad_norm": 0.7136254906654358, + "learning_rate": 0.00015904641096463394, + "loss": 2.6823, + "step": 6049 + }, + { + "epoch": 0.48825760632717297, + "grad_norm": 0.7163361310958862, + "learning_rate": 0.00015903366919365282, + "loss": 2.6642, + "step": 6050 + }, + { + "epoch": 0.48833831006375594, + "grad_norm": 0.6842724680900574, + "learning_rate": 0.00015902092595139838, + "loss": 2.6599, + "step": 6051 + }, + { + "epoch": 0.488419013800339, + "grad_norm": 0.7426519393920898, + "learning_rate": 0.0001590081812381882, + "loss": 2.6271, + "step": 6052 + }, + { + "epoch": 0.48849971753692195, + "grad_norm": 0.7415586709976196, + "learning_rate": 0.00015899543505433985, + "loss": 2.6105, + "step": 6053 + }, + { + "epoch": 0.488580421273505, + "grad_norm": 0.7286739945411682, + "learning_rate": 0.00015898268740017105, + "loss": 2.6304, + "step": 6054 + }, + { + "epoch": 0.48866112501008796, + "grad_norm": 0.6898483633995056, + "learning_rate": 0.00015896993827599947, + "loss": 2.6237, + "step": 6055 + }, + { + "epoch": 0.488741828746671, + "grad_norm": 0.7020056247711182, + "learning_rate": 0.00015895718768214293, + "loss": 2.6166, + "step": 6056 + }, + { + "epoch": 0.48882253248325397, + "grad_norm": 0.7145286798477173, + "learning_rate": 0.00015894443561891914, + "loss": 2.6729, + "step": 6057 + }, + { + "epoch": 0.488903236219837, + "grad_norm": 0.6888289451599121, + "learning_rate": 0.00015893168208664594, + "loss": 2.6154, + "step": 6058 + }, + { + "epoch": 0.48898393995642, + "grad_norm": 0.6929970383644104, + "learning_rate": 0.00015891892708564116, + "loss": 2.6748, + "step": 6059 + }, + { + "epoch": 0.489064643693003, + "grad_norm": 0.679853618144989, + "learning_rate": 0.0001589061706162227, + "loss": 2.605, + "step": 6060 + }, + { + "epoch": 0.489145347429586, + "grad_norm": 0.71812504529953, + "learning_rate": 0.0001588934126787085, + "loss": 2.7249, + "step": 6061 + }, + { + "epoch": 0.489226051166169, + "grad_norm": 0.7083466053009033, + "learning_rate": 0.00015888065327341648, + "loss": 2.5986, + "step": 6062 + }, + { + "epoch": 0.489306754902752, + "grad_norm": 0.7476792931556702, + "learning_rate": 0.00015886789240066466, + "loss": 2.5942, + "step": 6063 + }, + { + "epoch": 0.489387458639335, + "grad_norm": 0.7197855114936829, + "learning_rate": 0.00015885513006077114, + "loss": 2.6198, + "step": 6064 + }, + { + "epoch": 0.489468162375918, + "grad_norm": 0.6678233742713928, + "learning_rate": 0.00015884236625405385, + "loss": 2.5793, + "step": 6065 + }, + { + "epoch": 0.48954886611250104, + "grad_norm": 0.7371037602424622, + "learning_rate": 0.00015882960098083105, + "loss": 2.6231, + "step": 6066 + }, + { + "epoch": 0.489629569849084, + "grad_norm": 0.7087417244911194, + "learning_rate": 0.00015881683424142078, + "loss": 2.6483, + "step": 6067 + }, + { + "epoch": 0.48971027358566704, + "grad_norm": 0.7300292253494263, + "learning_rate": 0.00015880406603614126, + "loss": 2.6778, + "step": 6068 + }, + { + "epoch": 0.48979097732225, + "grad_norm": 0.8347866535186768, + "learning_rate": 0.0001587912963653107, + "loss": 2.554, + "step": 6069 + }, + { + "epoch": 0.489871681058833, + "grad_norm": 0.7717794179916382, + "learning_rate": 0.00015877852522924732, + "loss": 2.6904, + "step": 6070 + }, + { + "epoch": 0.48995238479541603, + "grad_norm": 0.6960952281951904, + "learning_rate": 0.00015876575262826944, + "loss": 2.6059, + "step": 6071 + }, + { + "epoch": 0.490033088531999, + "grad_norm": 0.7316592931747437, + "learning_rate": 0.00015875297856269543, + "loss": 2.6685, + "step": 6072 + }, + { + "epoch": 0.49011379226858204, + "grad_norm": 0.6775457859039307, + "learning_rate": 0.00015874020303284362, + "loss": 2.6232, + "step": 6073 + }, + { + "epoch": 0.490194496005165, + "grad_norm": 0.7741925120353699, + "learning_rate": 0.00015872742603903237, + "loss": 2.6767, + "step": 6074 + }, + { + "epoch": 0.49027519974174805, + "grad_norm": 0.857490599155426, + "learning_rate": 0.00015871464758158017, + "loss": 2.6649, + "step": 6075 + }, + { + "epoch": 0.490355903478331, + "grad_norm": 0.7474274039268494, + "learning_rate": 0.00015870186766080545, + "loss": 2.6926, + "step": 6076 + }, + { + "epoch": 0.49043660721491406, + "grad_norm": 0.7266567945480347, + "learning_rate": 0.00015868908627702675, + "loss": 2.5919, + "step": 6077 + }, + { + "epoch": 0.49051731095149703, + "grad_norm": 0.7247830629348755, + "learning_rate": 0.0001586763034305626, + "loss": 2.6158, + "step": 6078 + }, + { + "epoch": 0.49059801468808006, + "grad_norm": 0.7654951214790344, + "learning_rate": 0.00015866351912173157, + "loss": 2.7236, + "step": 6079 + }, + { + "epoch": 0.49067871842466304, + "grad_norm": 0.732431948184967, + "learning_rate": 0.00015865073335085236, + "loss": 2.6349, + "step": 6080 + }, + { + "epoch": 0.4907594221612461, + "grad_norm": 0.7240673303604126, + "learning_rate": 0.0001586379461182435, + "loss": 2.6282, + "step": 6081 + }, + { + "epoch": 0.49084012589782905, + "grad_norm": 0.767473042011261, + "learning_rate": 0.00015862515742422374, + "loss": 2.6939, + "step": 6082 + }, + { + "epoch": 0.4909208296344121, + "grad_norm": 0.6977359056472778, + "learning_rate": 0.00015861236726911183, + "loss": 2.6591, + "step": 6083 + }, + { + "epoch": 0.49100153337099506, + "grad_norm": 0.7676639556884766, + "learning_rate": 0.00015859957565322655, + "loss": 2.6189, + "step": 6084 + }, + { + "epoch": 0.4910822371075781, + "grad_norm": 0.7157976031303406, + "learning_rate": 0.0001585867825768866, + "loss": 2.644, + "step": 6085 + }, + { + "epoch": 0.49116294084416107, + "grad_norm": 0.7080803513526917, + "learning_rate": 0.0001585739880404109, + "loss": 2.6099, + "step": 6086 + }, + { + "epoch": 0.4912436445807441, + "grad_norm": 0.7109760046005249, + "learning_rate": 0.0001585611920441183, + "loss": 2.7087, + "step": 6087 + }, + { + "epoch": 0.4913243483173271, + "grad_norm": 0.7274255156517029, + "learning_rate": 0.00015854839458832772, + "loss": 2.6394, + "step": 6088 + }, + { + "epoch": 0.4914050520539101, + "grad_norm": 0.7407883405685425, + "learning_rate": 0.00015853559567335812, + "loss": 2.6729, + "step": 6089 + }, + { + "epoch": 0.4914857557904931, + "grad_norm": 0.6879885196685791, + "learning_rate": 0.00015852279529952843, + "loss": 2.5971, + "step": 6090 + }, + { + "epoch": 0.4915664595270761, + "grad_norm": 0.7678415179252625, + "learning_rate": 0.00015850999346715772, + "loss": 2.6606, + "step": 6091 + }, + { + "epoch": 0.4916471632636591, + "grad_norm": 0.7108608484268188, + "learning_rate": 0.00015849719017656504, + "loss": 2.6494, + "step": 6092 + }, + { + "epoch": 0.4917278670002421, + "grad_norm": 0.7238833904266357, + "learning_rate": 0.00015848438542806945, + "loss": 2.6742, + "step": 6093 + }, + { + "epoch": 0.4918085707368251, + "grad_norm": 0.7316902279853821, + "learning_rate": 0.0001584715792219901, + "loss": 2.6757, + "step": 6094 + }, + { + "epoch": 0.49188927447340813, + "grad_norm": 0.7339446544647217, + "learning_rate": 0.00015845877155864612, + "loss": 2.607, + "step": 6095 + }, + { + "epoch": 0.4919699782099911, + "grad_norm": 0.6931337714195251, + "learning_rate": 0.0001584459624383568, + "loss": 2.6203, + "step": 6096 + }, + { + "epoch": 0.49205068194657414, + "grad_norm": 0.734229326248169, + "learning_rate": 0.00015843315186144126, + "loss": 2.646, + "step": 6097 + }, + { + "epoch": 0.4921313856831571, + "grad_norm": 0.7764919400215149, + "learning_rate": 0.00015842033982821883, + "loss": 2.6698, + "step": 6098 + }, + { + "epoch": 0.49221208941974015, + "grad_norm": 0.7707986235618591, + "learning_rate": 0.00015840752633900887, + "loss": 2.6995, + "step": 6099 + }, + { + "epoch": 0.4922927931563231, + "grad_norm": 0.7321949601173401, + "learning_rate": 0.00015839471139413066, + "loss": 2.6517, + "step": 6100 + }, + { + "epoch": 0.49237349689290616, + "grad_norm": 0.7087488770484924, + "learning_rate": 0.00015838189499390353, + "loss": 2.6153, + "step": 6101 + }, + { + "epoch": 0.49245420062948914, + "grad_norm": 0.7300730347633362, + "learning_rate": 0.00015836907713864706, + "loss": 2.5868, + "step": 6102 + }, + { + "epoch": 0.49253490436607217, + "grad_norm": 0.8476536273956299, + "learning_rate": 0.00015835625782868054, + "loss": 2.7158, + "step": 6103 + }, + { + "epoch": 0.49261560810265514, + "grad_norm": 0.8062012791633606, + "learning_rate": 0.0001583434370643236, + "loss": 2.6896, + "step": 6104 + }, + { + "epoch": 0.4926963118392382, + "grad_norm": 0.7336686849594116, + "learning_rate": 0.00015833061484589562, + "loss": 2.6416, + "step": 6105 + }, + { + "epoch": 0.49277701557582115, + "grad_norm": 0.6976929306983948, + "learning_rate": 0.00015831779117371627, + "loss": 2.6279, + "step": 6106 + }, + { + "epoch": 0.4928577193124042, + "grad_norm": 0.7262609601020813, + "learning_rate": 0.00015830496604810513, + "loss": 2.6144, + "step": 6107 + }, + { + "epoch": 0.49293842304898716, + "grad_norm": 0.7274572253227234, + "learning_rate": 0.00015829213946938183, + "loss": 2.7409, + "step": 6108 + }, + { + "epoch": 0.4930191267855702, + "grad_norm": 0.7438454031944275, + "learning_rate": 0.000158279311437866, + "loss": 2.5928, + "step": 6109 + }, + { + "epoch": 0.49309983052215317, + "grad_norm": 0.6885421872138977, + "learning_rate": 0.00015826648195387742, + "loss": 2.6659, + "step": 6110 + }, + { + "epoch": 0.4931805342587362, + "grad_norm": 0.6781450510025024, + "learning_rate": 0.0001582536510177358, + "loss": 2.6068, + "step": 6111 + }, + { + "epoch": 0.4932612379953192, + "grad_norm": 0.7618128657341003, + "learning_rate": 0.0001582408186297609, + "loss": 2.6705, + "step": 6112 + }, + { + "epoch": 0.4933419417319022, + "grad_norm": 0.7011203765869141, + "learning_rate": 0.00015822798479027256, + "loss": 2.596, + "step": 6113 + }, + { + "epoch": 0.4934226454684852, + "grad_norm": 0.7727806568145752, + "learning_rate": 0.00015821514949959065, + "loss": 2.6458, + "step": 6114 + }, + { + "epoch": 0.4935033492050682, + "grad_norm": 0.7318129539489746, + "learning_rate": 0.00015820231275803502, + "loss": 2.6009, + "step": 6115 + }, + { + "epoch": 0.4935840529416512, + "grad_norm": 0.6836227178573608, + "learning_rate": 0.00015818947456592563, + "loss": 2.6311, + "step": 6116 + }, + { + "epoch": 0.49366475667823423, + "grad_norm": 0.7657275199890137, + "learning_rate": 0.0001581766349235824, + "loss": 2.6079, + "step": 6117 + }, + { + "epoch": 0.4937454604148172, + "grad_norm": 0.74736487865448, + "learning_rate": 0.0001581637938313254, + "loss": 2.6752, + "step": 6118 + }, + { + "epoch": 0.49382616415140024, + "grad_norm": 0.716708242893219, + "learning_rate": 0.00015815095128947454, + "loss": 2.5896, + "step": 6119 + }, + { + "epoch": 0.4939068678879832, + "grad_norm": 0.740727424621582, + "learning_rate": 0.00015813810729835002, + "loss": 2.6528, + "step": 6120 + }, + { + "epoch": 0.4939875716245662, + "grad_norm": 0.6746687293052673, + "learning_rate": 0.0001581252618582719, + "loss": 2.6438, + "step": 6121 + }, + { + "epoch": 0.4940682753611492, + "grad_norm": 0.7547900080680847, + "learning_rate": 0.00015811241496956028, + "loss": 2.631, + "step": 6122 + }, + { + "epoch": 0.4941489790977322, + "grad_norm": 0.7500903606414795, + "learning_rate": 0.0001580995666325354, + "loss": 2.7039, + "step": 6123 + }, + { + "epoch": 0.49422968283431523, + "grad_norm": 0.7692849636077881, + "learning_rate": 0.00015808671684751743, + "loss": 2.5922, + "step": 6124 + }, + { + "epoch": 0.4943103865708982, + "grad_norm": 0.6964236497879028, + "learning_rate": 0.00015807386561482662, + "loss": 2.6239, + "step": 6125 + }, + { + "epoch": 0.49439109030748124, + "grad_norm": 0.7094165086746216, + "learning_rate": 0.0001580610129347833, + "loss": 2.6239, + "step": 6126 + }, + { + "epoch": 0.4944717940440642, + "grad_norm": 0.7579131126403809, + "learning_rate": 0.00015804815880770775, + "loss": 2.6654, + "step": 6127 + }, + { + "epoch": 0.49455249778064725, + "grad_norm": 0.7687693238258362, + "learning_rate": 0.00015803530323392034, + "loss": 2.6557, + "step": 6128 + }, + { + "epoch": 0.4946332015172302, + "grad_norm": 0.6913540363311768, + "learning_rate": 0.0001580224462137415, + "loss": 2.6299, + "step": 6129 + }, + { + "epoch": 0.49471390525381326, + "grad_norm": 0.7574129700660706, + "learning_rate": 0.0001580095877474916, + "loss": 2.6327, + "step": 6130 + }, + { + "epoch": 0.49479460899039623, + "grad_norm": 0.6834598183631897, + "learning_rate": 0.0001579967278354911, + "loss": 2.6402, + "step": 6131 + }, + { + "epoch": 0.49487531272697927, + "grad_norm": 0.7872750163078308, + "learning_rate": 0.00015798386647806057, + "loss": 2.6647, + "step": 6132 + }, + { + "epoch": 0.49495601646356224, + "grad_norm": 0.705211341381073, + "learning_rate": 0.00015797100367552055, + "loss": 2.6288, + "step": 6133 + }, + { + "epoch": 0.4950367202001453, + "grad_norm": 0.7302640080451965, + "learning_rate": 0.00015795813942819155, + "loss": 2.6683, + "step": 6134 + }, + { + "epoch": 0.49511742393672825, + "grad_norm": 0.7522360682487488, + "learning_rate": 0.0001579452737363942, + "loss": 2.5885, + "step": 6135 + }, + { + "epoch": 0.4951981276733113, + "grad_norm": 0.657376229763031, + "learning_rate": 0.0001579324066004492, + "loss": 2.5775, + "step": 6136 + }, + { + "epoch": 0.49527883140989426, + "grad_norm": 0.7539556622505188, + "learning_rate": 0.00015791953802067715, + "loss": 2.6236, + "step": 6137 + }, + { + "epoch": 0.4953595351464773, + "grad_norm": 0.7090374827384949, + "learning_rate": 0.00015790666799739883, + "loss": 2.5845, + "step": 6138 + }, + { + "epoch": 0.49544023888306027, + "grad_norm": 0.6883948445320129, + "learning_rate": 0.00015789379653093497, + "loss": 2.6621, + "step": 6139 + }, + { + "epoch": 0.4955209426196433, + "grad_norm": 0.7466424107551575, + "learning_rate": 0.00015788092362160633, + "loss": 2.6289, + "step": 6140 + }, + { + "epoch": 0.4956016463562263, + "grad_norm": 0.7424437403678894, + "learning_rate": 0.00015786804926973383, + "loss": 2.6405, + "step": 6141 + }, + { + "epoch": 0.4956823500928093, + "grad_norm": 0.7227851748466492, + "learning_rate": 0.00015785517347563822, + "loss": 2.6537, + "step": 6142 + }, + { + "epoch": 0.4957630538293923, + "grad_norm": 0.7548653483390808, + "learning_rate": 0.00015784229623964048, + "loss": 2.7377, + "step": 6143 + }, + { + "epoch": 0.4958437575659753, + "grad_norm": 0.7086976170539856, + "learning_rate": 0.00015782941756206152, + "loss": 2.6194, + "step": 6144 + }, + { + "epoch": 0.4959244613025583, + "grad_norm": 0.6605533957481384, + "learning_rate": 0.0001578165374432223, + "loss": 2.6265, + "step": 6145 + }, + { + "epoch": 0.4960051650391413, + "grad_norm": 0.7187899947166443, + "learning_rate": 0.00015780365588344384, + "loss": 2.5639, + "step": 6146 + }, + { + "epoch": 0.4960858687757243, + "grad_norm": 0.7014074921607971, + "learning_rate": 0.00015779077288304716, + "loss": 2.6011, + "step": 6147 + }, + { + "epoch": 0.49616657251230734, + "grad_norm": 0.7463840842247009, + "learning_rate": 0.00015777788844235335, + "loss": 2.6059, + "step": 6148 + }, + { + "epoch": 0.4962472762488903, + "grad_norm": 0.8022417426109314, + "learning_rate": 0.00015776500256168356, + "loss": 2.6011, + "step": 6149 + }, + { + "epoch": 0.49632797998547334, + "grad_norm": 0.7140083909034729, + "learning_rate": 0.0001577521152413589, + "loss": 2.6891, + "step": 6150 + }, + { + "epoch": 0.4964086837220563, + "grad_norm": 0.7266198992729187, + "learning_rate": 0.00015773922648170053, + "loss": 2.6561, + "step": 6151 + }, + { + "epoch": 0.49648938745863935, + "grad_norm": 0.7241406440734863, + "learning_rate": 0.0001577263362830297, + "loss": 2.6835, + "step": 6152 + }, + { + "epoch": 0.49657009119522233, + "grad_norm": 0.7422344088554382, + "learning_rate": 0.0001577134446456677, + "loss": 2.6039, + "step": 6153 + }, + { + "epoch": 0.49665079493180536, + "grad_norm": 0.8764764666557312, + "learning_rate": 0.0001577005515699358, + "loss": 2.68, + "step": 6154 + }, + { + "epoch": 0.49673149866838834, + "grad_norm": 0.7224323749542236, + "learning_rate": 0.0001576876570561553, + "loss": 2.5824, + "step": 6155 + }, + { + "epoch": 0.49681220240497137, + "grad_norm": 0.7601075172424316, + "learning_rate": 0.00015767476110464758, + "loss": 2.7124, + "step": 6156 + }, + { + "epoch": 0.49689290614155435, + "grad_norm": 0.7425428628921509, + "learning_rate": 0.0001576618637157341, + "loss": 2.5913, + "step": 6157 + }, + { + "epoch": 0.4969736098781374, + "grad_norm": 0.721969723701477, + "learning_rate": 0.0001576489648897362, + "loss": 2.6482, + "step": 6158 + }, + { + "epoch": 0.49705431361472036, + "grad_norm": 0.8142126798629761, + "learning_rate": 0.00015763606462697544, + "loss": 2.6231, + "step": 6159 + }, + { + "epoch": 0.4971350173513034, + "grad_norm": 0.6636359691619873, + "learning_rate": 0.00015762316292777326, + "loss": 2.6388, + "step": 6160 + }, + { + "epoch": 0.49721572108788636, + "grad_norm": 0.7093132734298706, + "learning_rate": 0.00015761025979245123, + "loss": 2.6562, + "step": 6161 + }, + { + "epoch": 0.4972964248244694, + "grad_norm": 0.7130851745605469, + "learning_rate": 0.00015759735522133094, + "loss": 2.6856, + "step": 6162 + }, + { + "epoch": 0.4973771285610524, + "grad_norm": 0.7303292155265808, + "learning_rate": 0.000157584449214734, + "loss": 2.6077, + "step": 6163 + }, + { + "epoch": 0.4974578322976354, + "grad_norm": 0.6742258071899414, + "learning_rate": 0.00015757154177298204, + "loss": 2.6644, + "step": 6164 + }, + { + "epoch": 0.4975385360342184, + "grad_norm": 0.6882894039154053, + "learning_rate": 0.00015755863289639677, + "loss": 2.6462, + "step": 6165 + }, + { + "epoch": 0.4976192397708014, + "grad_norm": 0.7882276773452759, + "learning_rate": 0.00015754572258529993, + "loss": 2.6509, + "step": 6166 + }, + { + "epoch": 0.4976999435073844, + "grad_norm": 0.7163859009742737, + "learning_rate": 0.00015753281084001324, + "loss": 2.627, + "step": 6167 + }, + { + "epoch": 0.4977806472439674, + "grad_norm": 0.7194411158561707, + "learning_rate": 0.0001575198976608585, + "loss": 2.6798, + "step": 6168 + }, + { + "epoch": 0.4978613509805504, + "grad_norm": 0.7233198881149292, + "learning_rate": 0.0001575069830481576, + "loss": 2.6616, + "step": 6169 + }, + { + "epoch": 0.49794205471713343, + "grad_norm": 0.7246997952461243, + "learning_rate": 0.00015749406700223231, + "loss": 2.6262, + "step": 6170 + }, + { + "epoch": 0.4980227584537164, + "grad_norm": 0.7509368658065796, + "learning_rate": 0.00015748114952340457, + "loss": 2.6148, + "step": 6171 + }, + { + "epoch": 0.4981034621902994, + "grad_norm": 0.7079075574874878, + "learning_rate": 0.00015746823061199637, + "loss": 2.6712, + "step": 6172 + }, + { + "epoch": 0.4981841659268824, + "grad_norm": 0.6821560859680176, + "learning_rate": 0.0001574553102683296, + "loss": 2.6253, + "step": 6173 + }, + { + "epoch": 0.4982648696634654, + "grad_norm": 0.7623000741004944, + "learning_rate": 0.00015744238849272634, + "loss": 2.6252, + "step": 6174 + }, + { + "epoch": 0.4983455734000484, + "grad_norm": 0.709434449672699, + "learning_rate": 0.00015742946528550858, + "loss": 2.555, + "step": 6175 + }, + { + "epoch": 0.4984262771366314, + "grad_norm": 0.7277799844741821, + "learning_rate": 0.00015741654064699846, + "loss": 2.6551, + "step": 6176 + }, + { + "epoch": 0.49850698087321443, + "grad_norm": 0.7208690643310547, + "learning_rate": 0.00015740361457751802, + "loss": 2.6747, + "step": 6177 + }, + { + "epoch": 0.4985876846097974, + "grad_norm": 0.8458136916160583, + "learning_rate": 0.00015739068707738946, + "loss": 2.6551, + "step": 6178 + }, + { + "epoch": 0.49866838834638044, + "grad_norm": 0.7718539834022522, + "learning_rate": 0.00015737775814693498, + "loss": 2.6246, + "step": 6179 + }, + { + "epoch": 0.4987490920829634, + "grad_norm": 0.6982735395431519, + "learning_rate": 0.00015736482778647674, + "loss": 2.5726, + "step": 6180 + }, + { + "epoch": 0.49882979581954645, + "grad_norm": 0.6759411692619324, + "learning_rate": 0.00015735189599633707, + "loss": 2.6603, + "step": 6181 + }, + { + "epoch": 0.4989104995561294, + "grad_norm": 0.7016656994819641, + "learning_rate": 0.0001573389627768382, + "loss": 2.6045, + "step": 6182 + }, + { + "epoch": 0.49899120329271246, + "grad_norm": 0.7170618176460266, + "learning_rate": 0.00015732602812830253, + "loss": 2.6419, + "step": 6183 + }, + { + "epoch": 0.49907190702929544, + "grad_norm": 0.6963300704956055, + "learning_rate": 0.00015731309205105237, + "loss": 2.6377, + "step": 6184 + }, + { + "epoch": 0.49915261076587847, + "grad_norm": 0.7437995672225952, + "learning_rate": 0.00015730015454541014, + "loss": 2.7013, + "step": 6185 + }, + { + "epoch": 0.49923331450246144, + "grad_norm": 0.6846518516540527, + "learning_rate": 0.00015728721561169827, + "loss": 2.5526, + "step": 6186 + }, + { + "epoch": 0.4993140182390445, + "grad_norm": 0.7343618273735046, + "learning_rate": 0.00015727427525023924, + "loss": 2.6567, + "step": 6187 + }, + { + "epoch": 0.49939472197562745, + "grad_norm": 0.6947566270828247, + "learning_rate": 0.00015726133346135554, + "loss": 2.6642, + "step": 6188 + }, + { + "epoch": 0.4994754257122105, + "grad_norm": 0.7402610778808594, + "learning_rate": 0.00015724839024536976, + "loss": 2.6964, + "step": 6189 + }, + { + "epoch": 0.49955612944879346, + "grad_norm": 0.7318306565284729, + "learning_rate": 0.00015723544560260444, + "loss": 2.5864, + "step": 6190 + }, + { + "epoch": 0.4996368331853765, + "grad_norm": 0.752216100692749, + "learning_rate": 0.00015722249953338215, + "loss": 2.6357, + "step": 6191 + }, + { + "epoch": 0.49971753692195947, + "grad_norm": 0.70283442735672, + "learning_rate": 0.00015720955203802565, + "loss": 2.5892, + "step": 6192 + }, + { + "epoch": 0.4997982406585425, + "grad_norm": 0.7457823753356934, + "learning_rate": 0.00015719660311685755, + "loss": 2.6663, + "step": 6193 + }, + { + "epoch": 0.4998789443951255, + "grad_norm": 0.7296229600906372, + "learning_rate": 0.00015718365277020058, + "loss": 2.6238, + "step": 6194 + }, + { + "epoch": 0.4999596481317085, + "grad_norm": 0.6963346004486084, + "learning_rate": 0.0001571707009983775, + "loss": 2.6303, + "step": 6195 + }, + { + "epoch": 0.5000403518682915, + "grad_norm": 0.7074694633483887, + "learning_rate": 0.0001571577478017111, + "loss": 2.6077, + "step": 6196 + }, + { + "epoch": 0.5001210556048745, + "grad_norm": 0.7826260328292847, + "learning_rate": 0.00015714479318052423, + "loss": 2.6668, + "step": 6197 + }, + { + "epoch": 0.5002017593414575, + "grad_norm": 0.6908758282661438, + "learning_rate": 0.00015713183713513974, + "loss": 2.6195, + "step": 6198 + }, + { + "epoch": 0.5002824630780405, + "grad_norm": 0.7571602463722229, + "learning_rate": 0.0001571188796658805, + "loss": 2.6546, + "step": 6199 + }, + { + "epoch": 0.5003631668146236, + "grad_norm": 0.7359431385993958, + "learning_rate": 0.0001571059207730695, + "loss": 2.5792, + "step": 6200 + }, + { + "epoch": 0.5004438705512065, + "grad_norm": 0.6886340379714966, + "learning_rate": 0.00015709296045702967, + "loss": 2.6099, + "step": 6201 + }, + { + "epoch": 0.5005245742877895, + "grad_norm": 0.6900473833084106, + "learning_rate": 0.000157079998718084, + "loss": 2.6461, + "step": 6202 + }, + { + "epoch": 0.5006052780243725, + "grad_norm": 0.66212397813797, + "learning_rate": 0.00015706703555655555, + "loss": 2.6178, + "step": 6203 + }, + { + "epoch": 0.5006859817609556, + "grad_norm": 0.7666565179824829, + "learning_rate": 0.00015705407097276744, + "loss": 2.7097, + "step": 6204 + }, + { + "epoch": 0.5007666854975386, + "grad_norm": 0.7294591069221497, + "learning_rate": 0.0001570411049670427, + "loss": 2.5995, + "step": 6205 + }, + { + "epoch": 0.5008473892341215, + "grad_norm": 0.7279765009880066, + "learning_rate": 0.00015702813753970453, + "loss": 2.5554, + "step": 6206 + }, + { + "epoch": 0.5009280929707045, + "grad_norm": 0.7174742817878723, + "learning_rate": 0.0001570151686910761, + "loss": 2.6523, + "step": 6207 + }, + { + "epoch": 0.5010087967072876, + "grad_norm": 0.67017662525177, + "learning_rate": 0.00015700219842148063, + "loss": 2.5613, + "step": 6208 + }, + { + "epoch": 0.5010895004438706, + "grad_norm": 0.7000258564949036, + "learning_rate": 0.00015698922673124138, + "loss": 2.5658, + "step": 6209 + }, + { + "epoch": 0.5011702041804535, + "grad_norm": 0.6894544363021851, + "learning_rate": 0.00015697625362068164, + "loss": 2.6925, + "step": 6210 + }, + { + "epoch": 0.5012509079170365, + "grad_norm": 0.6742957234382629, + "learning_rate": 0.00015696327909012466, + "loss": 2.6429, + "step": 6211 + }, + { + "epoch": 0.5013316116536196, + "grad_norm": 0.7039656639099121, + "learning_rate": 0.0001569503031398939, + "loss": 2.6313, + "step": 6212 + }, + { + "epoch": 0.5014123153902026, + "grad_norm": 0.720003604888916, + "learning_rate": 0.00015693732577031272, + "loss": 2.6207, + "step": 6213 + }, + { + "epoch": 0.5014930191267856, + "grad_norm": 0.8611499071121216, + "learning_rate": 0.00015692434698170456, + "loss": 2.6855, + "step": 6214 + }, + { + "epoch": 0.5015737228633685, + "grad_norm": 0.6664702296257019, + "learning_rate": 0.00015691136677439284, + "loss": 2.6174, + "step": 6215 + }, + { + "epoch": 0.5016544265999516, + "grad_norm": 0.7258509993553162, + "learning_rate": 0.00015689838514870111, + "loss": 2.6558, + "step": 6216 + }, + { + "epoch": 0.5017351303365346, + "grad_norm": 0.6972211599349976, + "learning_rate": 0.0001568854021049529, + "loss": 2.5913, + "step": 6217 + }, + { + "epoch": 0.5018158340731176, + "grad_norm": 0.7927280068397522, + "learning_rate": 0.00015687241764347177, + "loss": 2.6466, + "step": 6218 + }, + { + "epoch": 0.5018965378097006, + "grad_norm": 0.7044646143913269, + "learning_rate": 0.00015685943176458128, + "loss": 2.6195, + "step": 6219 + }, + { + "epoch": 0.5019772415462836, + "grad_norm": 0.6935598254203796, + "learning_rate": 0.00015684644446860516, + "loss": 2.6486, + "step": 6220 + }, + { + "epoch": 0.5020579452828666, + "grad_norm": 0.7965792417526245, + "learning_rate": 0.00015683345575586704, + "loss": 2.6265, + "step": 6221 + }, + { + "epoch": 0.5021386490194496, + "grad_norm": 0.727053701877594, + "learning_rate": 0.00015682046562669064, + "loss": 2.6714, + "step": 6222 + }, + { + "epoch": 0.5022193527560326, + "grad_norm": 0.7919184565544128, + "learning_rate": 0.0001568074740813997, + "loss": 2.7115, + "step": 6223 + }, + { + "epoch": 0.5023000564926156, + "grad_norm": 0.7724714279174805, + "learning_rate": 0.00015679448112031801, + "loss": 2.6636, + "step": 6224 + }, + { + "epoch": 0.5023807602291986, + "grad_norm": 0.6893701553344727, + "learning_rate": 0.0001567814867437694, + "loss": 2.6562, + "step": 6225 + }, + { + "epoch": 0.5024614639657816, + "grad_norm": 0.7089633345603943, + "learning_rate": 0.00015676849095207769, + "loss": 2.6125, + "step": 6226 + }, + { + "epoch": 0.5025421677023646, + "grad_norm": 0.7620012760162354, + "learning_rate": 0.00015675549374556682, + "loss": 2.6935, + "step": 6227 + }, + { + "epoch": 0.5026228714389476, + "grad_norm": 0.7293741703033447, + "learning_rate": 0.00015674249512456065, + "loss": 2.66, + "step": 6228 + }, + { + "epoch": 0.5027035751755307, + "grad_norm": 0.7366519570350647, + "learning_rate": 0.00015672949508938318, + "loss": 2.5968, + "step": 6229 + }, + { + "epoch": 0.5027842789121136, + "grad_norm": 0.6646310091018677, + "learning_rate": 0.00015671649364035846, + "loss": 2.5751, + "step": 6230 + }, + { + "epoch": 0.5028649826486966, + "grad_norm": 0.6682632565498352, + "learning_rate": 0.00015670349077781038, + "loss": 2.5902, + "step": 6231 + }, + { + "epoch": 0.5029456863852796, + "grad_norm": 0.7327528595924377, + "learning_rate": 0.00015669048650206313, + "loss": 2.6487, + "step": 6232 + }, + { + "epoch": 0.5030263901218627, + "grad_norm": 0.7114281058311462, + "learning_rate": 0.00015667748081344074, + "loss": 2.5779, + "step": 6233 + }, + { + "epoch": 0.5031070938584457, + "grad_norm": 0.7908105850219727, + "learning_rate": 0.00015666447371226737, + "loss": 2.6099, + "step": 6234 + }, + { + "epoch": 0.5031877975950286, + "grad_norm": 0.7823575139045715, + "learning_rate": 0.00015665146519886725, + "loss": 2.6339, + "step": 6235 + }, + { + "epoch": 0.5032685013316116, + "grad_norm": 0.7404836416244507, + "learning_rate": 0.00015663845527356447, + "loss": 2.6035, + "step": 6236 + }, + { + "epoch": 0.5033492050681947, + "grad_norm": 0.7448995113372803, + "learning_rate": 0.00015662544393668334, + "loss": 2.6566, + "step": 6237 + }, + { + "epoch": 0.5034299088047777, + "grad_norm": 0.7209747433662415, + "learning_rate": 0.00015661243118854815, + "loss": 2.682, + "step": 6238 + }, + { + "epoch": 0.5035106125413606, + "grad_norm": 0.691759467124939, + "learning_rate": 0.00015659941702948315, + "loss": 2.6435, + "step": 6239 + }, + { + "epoch": 0.5035913162779436, + "grad_norm": 0.7646063566207886, + "learning_rate": 0.00015658640145981275, + "loss": 2.591, + "step": 6240 + }, + { + "epoch": 0.5036720200145267, + "grad_norm": 0.8319387435913086, + "learning_rate": 0.00015657338447986133, + "loss": 2.5937, + "step": 6241 + }, + { + "epoch": 0.5037527237511097, + "grad_norm": 0.729193389415741, + "learning_rate": 0.00015656036608995323, + "loss": 2.651, + "step": 6242 + }, + { + "epoch": 0.5038334274876927, + "grad_norm": 0.720098614692688, + "learning_rate": 0.000156547346290413, + "loss": 2.681, + "step": 6243 + }, + { + "epoch": 0.5039141312242756, + "grad_norm": 0.7172541618347168, + "learning_rate": 0.00015653432508156508, + "loss": 2.5906, + "step": 6244 + }, + { + "epoch": 0.5039948349608587, + "grad_norm": 0.7352481484413147, + "learning_rate": 0.00015652130246373398, + "loss": 2.6376, + "step": 6245 + }, + { + "epoch": 0.5040755386974417, + "grad_norm": 0.6664925813674927, + "learning_rate": 0.0001565082784372443, + "loss": 2.706, + "step": 6246 + }, + { + "epoch": 0.5041562424340247, + "grad_norm": 0.7292987704277039, + "learning_rate": 0.0001564952530024206, + "loss": 2.6149, + "step": 6247 + }, + { + "epoch": 0.5042369461706077, + "grad_norm": 0.6904531121253967, + "learning_rate": 0.00015648222615958747, + "loss": 2.579, + "step": 6248 + }, + { + "epoch": 0.5043176499071907, + "grad_norm": 0.7385311722755432, + "learning_rate": 0.00015646919790906965, + "loss": 2.6137, + "step": 6249 + }, + { + "epoch": 0.5043983536437737, + "grad_norm": 0.7869507074356079, + "learning_rate": 0.0001564561682511918, + "loss": 2.6831, + "step": 6250 + }, + { + "epoch": 0.5044790573803567, + "grad_norm": 0.723680317401886, + "learning_rate": 0.00015644313718627867, + "loss": 2.6083, + "step": 6251 + }, + { + "epoch": 0.5045597611169397, + "grad_norm": 0.7029969692230225, + "learning_rate": 0.00015643010471465502, + "loss": 2.6462, + "step": 6252 + }, + { + "epoch": 0.5046404648535228, + "grad_norm": 0.818975031375885, + "learning_rate": 0.00015641707083664566, + "loss": 2.6393, + "step": 6253 + }, + { + "epoch": 0.5047211685901057, + "grad_norm": 0.7237667441368103, + "learning_rate": 0.0001564040355525754, + "loss": 2.5995, + "step": 6254 + }, + { + "epoch": 0.5048018723266887, + "grad_norm": 0.8613824248313904, + "learning_rate": 0.00015639099886276912, + "loss": 2.748, + "step": 6255 + }, + { + "epoch": 0.5048825760632717, + "grad_norm": 0.6802194118499756, + "learning_rate": 0.00015637796076755178, + "loss": 2.6393, + "step": 6256 + }, + { + "epoch": 0.5049632797998548, + "grad_norm": 0.7816255688667297, + "learning_rate": 0.00015636492126724823, + "loss": 2.6218, + "step": 6257 + }, + { + "epoch": 0.5050439835364378, + "grad_norm": 0.7443990707397461, + "learning_rate": 0.00015635188036218356, + "loss": 2.6181, + "step": 6258 + }, + { + "epoch": 0.5051246872730207, + "grad_norm": 0.7869458794593811, + "learning_rate": 0.0001563388380526827, + "loss": 2.6641, + "step": 6259 + }, + { + "epoch": 0.5052053910096037, + "grad_norm": 0.7423158288002014, + "learning_rate": 0.00015632579433907072, + "loss": 2.5849, + "step": 6260 + }, + { + "epoch": 0.5052860947461868, + "grad_norm": 0.7888280153274536, + "learning_rate": 0.00015631274922167272, + "loss": 2.7095, + "step": 6261 + }, + { + "epoch": 0.5053667984827698, + "grad_norm": 0.7053405046463013, + "learning_rate": 0.0001562997027008138, + "loss": 2.5747, + "step": 6262 + }, + { + "epoch": 0.5054475022193528, + "grad_norm": 0.7930825352668762, + "learning_rate": 0.0001562866547768191, + "loss": 2.6359, + "step": 6263 + }, + { + "epoch": 0.5055282059559357, + "grad_norm": 0.7431469559669495, + "learning_rate": 0.0001562736054500139, + "loss": 2.6167, + "step": 6264 + }, + { + "epoch": 0.5056089096925188, + "grad_norm": 0.8395694494247437, + "learning_rate": 0.00015626055472072324, + "loss": 2.7217, + "step": 6265 + }, + { + "epoch": 0.5056896134291018, + "grad_norm": 0.7318898439407349, + "learning_rate": 0.0001562475025892726, + "loss": 2.6866, + "step": 6266 + }, + { + "epoch": 0.5057703171656848, + "grad_norm": 0.7487025856971741, + "learning_rate": 0.0001562344490559871, + "loss": 2.7206, + "step": 6267 + }, + { + "epoch": 0.5058510209022677, + "grad_norm": 0.8187269568443298, + "learning_rate": 0.00015622139412119212, + "loss": 2.658, + "step": 6268 + }, + { + "epoch": 0.5059317246388508, + "grad_norm": 0.6714495420455933, + "learning_rate": 0.00015620833778521307, + "loss": 2.6182, + "step": 6269 + }, + { + "epoch": 0.5060124283754338, + "grad_norm": 0.7556246519088745, + "learning_rate": 0.00015619528004837528, + "loss": 2.6502, + "step": 6270 + }, + { + "epoch": 0.5060931321120168, + "grad_norm": 0.6989960074424744, + "learning_rate": 0.00015618222091100424, + "loss": 2.6031, + "step": 6271 + }, + { + "epoch": 0.5061738358485998, + "grad_norm": 0.7002139091491699, + "learning_rate": 0.0001561691603734254, + "loss": 2.6563, + "step": 6272 + }, + { + "epoch": 0.5062545395851827, + "grad_norm": 0.7064816355705261, + "learning_rate": 0.00015615609843596423, + "loss": 2.6482, + "step": 6273 + }, + { + "epoch": 0.5063352433217658, + "grad_norm": 0.6971433162689209, + "learning_rate": 0.00015614303509894634, + "loss": 2.6522, + "step": 6274 + }, + { + "epoch": 0.5064159470583488, + "grad_norm": 0.6982942223548889, + "learning_rate": 0.0001561299703626972, + "loss": 2.6477, + "step": 6275 + }, + { + "epoch": 0.5064966507949318, + "grad_norm": 0.7219811081886292, + "learning_rate": 0.0001561169042275425, + "loss": 2.6514, + "step": 6276 + }, + { + "epoch": 0.5065773545315148, + "grad_norm": 0.7391932010650635, + "learning_rate": 0.00015610383669380787, + "loss": 2.698, + "step": 6277 + }, + { + "epoch": 0.5066580582680978, + "grad_norm": 0.7852853536605835, + "learning_rate": 0.00015609076776181894, + "loss": 2.6281, + "step": 6278 + }, + { + "epoch": 0.5067387620046808, + "grad_norm": 0.7435647249221802, + "learning_rate": 0.00015607769743190147, + "loss": 2.6403, + "step": 6279 + }, + { + "epoch": 0.5068194657412638, + "grad_norm": 0.7300949096679688, + "learning_rate": 0.00015606462570438119, + "loss": 2.6125, + "step": 6280 + }, + { + "epoch": 0.5069001694778468, + "grad_norm": 0.7081549167633057, + "learning_rate": 0.00015605155257958388, + "loss": 2.6192, + "step": 6281 + }, + { + "epoch": 0.5069808732144299, + "grad_norm": 0.709020733833313, + "learning_rate": 0.00015603847805783537, + "loss": 2.6745, + "step": 6282 + }, + { + "epoch": 0.5070615769510128, + "grad_norm": 0.691684901714325, + "learning_rate": 0.0001560254021394615, + "loss": 2.5638, + "step": 6283 + }, + { + "epoch": 0.5071422806875958, + "grad_norm": 0.8338537812232971, + "learning_rate": 0.00015601232482478813, + "loss": 2.5835, + "step": 6284 + }, + { + "epoch": 0.5072229844241788, + "grad_norm": 0.659436047077179, + "learning_rate": 0.00015599924611414126, + "loss": 2.601, + "step": 6285 + }, + { + "epoch": 0.5073036881607619, + "grad_norm": 0.72590172290802, + "learning_rate": 0.00015598616600784676, + "loss": 2.602, + "step": 6286 + }, + { + "epoch": 0.5073843918973449, + "grad_norm": 0.6704443693161011, + "learning_rate": 0.00015597308450623066, + "loss": 2.5703, + "step": 6287 + }, + { + "epoch": 0.5074650956339278, + "grad_norm": 0.7298632264137268, + "learning_rate": 0.00015596000160961898, + "loss": 2.6859, + "step": 6288 + }, + { + "epoch": 0.5075457993705108, + "grad_norm": 0.6900345087051392, + "learning_rate": 0.00015594691731833776, + "loss": 2.6264, + "step": 6289 + }, + { + "epoch": 0.5076265031070939, + "grad_norm": 0.6705992221832275, + "learning_rate": 0.0001559338316327131, + "loss": 2.6135, + "step": 6290 + }, + { + "epoch": 0.5077072068436769, + "grad_norm": 0.691545307636261, + "learning_rate": 0.0001559207445530712, + "loss": 2.6538, + "step": 6291 + }, + { + "epoch": 0.5077879105802598, + "grad_norm": 0.6579985618591309, + "learning_rate": 0.00015590765607973811, + "loss": 2.6224, + "step": 6292 + }, + { + "epoch": 0.5078686143168428, + "grad_norm": 0.6938790678977966, + "learning_rate": 0.00015589456621304014, + "loss": 2.5932, + "step": 6293 + }, + { + "epoch": 0.5079493180534259, + "grad_norm": 0.7421671748161316, + "learning_rate": 0.00015588147495330346, + "loss": 2.7098, + "step": 6294 + }, + { + "epoch": 0.5080300217900089, + "grad_norm": 0.7076674699783325, + "learning_rate": 0.0001558683823008543, + "loss": 2.664, + "step": 6295 + }, + { + "epoch": 0.5081107255265919, + "grad_norm": 0.6829726696014404, + "learning_rate": 0.00015585528825601906, + "loss": 2.6029, + "step": 6296 + }, + { + "epoch": 0.5081914292631748, + "grad_norm": 0.6968080401420593, + "learning_rate": 0.000155842192819124, + "loss": 2.6256, + "step": 6297 + }, + { + "epoch": 0.5082721329997579, + "grad_norm": 0.7453410625457764, + "learning_rate": 0.00015582909599049554, + "loss": 2.6577, + "step": 6298 + }, + { + "epoch": 0.5083528367363409, + "grad_norm": 0.6603519916534424, + "learning_rate": 0.00015581599777046007, + "loss": 2.6066, + "step": 6299 + }, + { + "epoch": 0.5084335404729239, + "grad_norm": 0.7096173763275146, + "learning_rate": 0.00015580289815934401, + "loss": 2.5488, + "step": 6300 + }, + { + "epoch": 0.5085142442095069, + "grad_norm": 0.799298107624054, + "learning_rate": 0.0001557897971574739, + "loss": 2.6021, + "step": 6301 + }, + { + "epoch": 0.50859494794609, + "grad_norm": 0.6820314526557922, + "learning_rate": 0.00015577669476517618, + "loss": 2.6276, + "step": 6302 + }, + { + "epoch": 0.5086756516826729, + "grad_norm": 0.7119347453117371, + "learning_rate": 0.00015576359098277742, + "loss": 2.6627, + "step": 6303 + }, + { + "epoch": 0.5087563554192559, + "grad_norm": 0.7638720273971558, + "learning_rate": 0.00015575048581060422, + "loss": 2.6824, + "step": 6304 + }, + { + "epoch": 0.5088370591558389, + "grad_norm": 0.7360339164733887, + "learning_rate": 0.00015573737924898316, + "loss": 2.5805, + "step": 6305 + }, + { + "epoch": 0.508917762892422, + "grad_norm": 0.7220984697341919, + "learning_rate": 0.00015572427129824091, + "loss": 2.6374, + "step": 6306 + }, + { + "epoch": 0.5089984666290049, + "grad_norm": 0.670964777469635, + "learning_rate": 0.00015571116195870418, + "loss": 2.6371, + "step": 6307 + }, + { + "epoch": 0.5090791703655879, + "grad_norm": 0.7826075553894043, + "learning_rate": 0.00015569805123069968, + "loss": 2.7666, + "step": 6308 + }, + { + "epoch": 0.5091598741021709, + "grad_norm": 0.7691593766212463, + "learning_rate": 0.00015568493911455412, + "loss": 2.6242, + "step": 6309 + }, + { + "epoch": 0.509240577838754, + "grad_norm": 0.714500367641449, + "learning_rate": 0.0001556718256105943, + "loss": 2.6551, + "step": 6310 + }, + { + "epoch": 0.509321281575337, + "grad_norm": 0.7634009718894958, + "learning_rate": 0.00015565871071914706, + "loss": 2.7069, + "step": 6311 + }, + { + "epoch": 0.5094019853119199, + "grad_norm": 0.7134168148040771, + "learning_rate": 0.00015564559444053926, + "loss": 2.5816, + "step": 6312 + }, + { + "epoch": 0.5094826890485029, + "grad_norm": 0.6548121571540833, + "learning_rate": 0.0001556324767750978, + "loss": 2.6192, + "step": 6313 + }, + { + "epoch": 0.509563392785086, + "grad_norm": 0.7244428992271423, + "learning_rate": 0.0001556193577231496, + "loss": 2.6072, + "step": 6314 + }, + { + "epoch": 0.509644096521669, + "grad_norm": 0.6976662278175354, + "learning_rate": 0.0001556062372850216, + "loss": 2.6148, + "step": 6315 + }, + { + "epoch": 0.509724800258252, + "grad_norm": 0.772726833820343, + "learning_rate": 0.00015559311546104083, + "loss": 2.6458, + "step": 6316 + }, + { + "epoch": 0.5098055039948349, + "grad_norm": 0.7976188659667969, + "learning_rate": 0.00015557999225153428, + "loss": 2.6772, + "step": 6317 + }, + { + "epoch": 0.509886207731418, + "grad_norm": 0.6458039283752441, + "learning_rate": 0.00015556686765682903, + "loss": 2.6143, + "step": 6318 + }, + { + "epoch": 0.509966911468001, + "grad_norm": 0.7295405268669128, + "learning_rate": 0.0001555537416772522, + "loss": 2.5919, + "step": 6319 + }, + { + "epoch": 0.510047615204584, + "grad_norm": 0.657978355884552, + "learning_rate": 0.00015554061431313093, + "loss": 2.6245, + "step": 6320 + }, + { + "epoch": 0.510128318941167, + "grad_norm": 0.6726922392845154, + "learning_rate": 0.00015552748556479232, + "loss": 2.6207, + "step": 6321 + }, + { + "epoch": 0.51020902267775, + "grad_norm": 0.7954673767089844, + "learning_rate": 0.00015551435543256363, + "loss": 2.7177, + "step": 6322 + }, + { + "epoch": 0.510289726414333, + "grad_norm": 0.7186735272407532, + "learning_rate": 0.00015550122391677211, + "loss": 2.5953, + "step": 6323 + }, + { + "epoch": 0.510370430150916, + "grad_norm": 0.7835420966148376, + "learning_rate": 0.00015548809101774498, + "loss": 2.7039, + "step": 6324 + }, + { + "epoch": 0.510451133887499, + "grad_norm": 0.6966592073440552, + "learning_rate": 0.00015547495673580962, + "loss": 2.6287, + "step": 6325 + }, + { + "epoch": 0.5105318376240819, + "grad_norm": 0.6676180362701416, + "learning_rate": 0.00015546182107129328, + "loss": 2.638, + "step": 6326 + }, + { + "epoch": 0.510612541360665, + "grad_norm": 0.7285657525062561, + "learning_rate": 0.0001554486840245234, + "loss": 2.6661, + "step": 6327 + }, + { + "epoch": 0.510693245097248, + "grad_norm": 0.6453657150268555, + "learning_rate": 0.00015543554559582735, + "loss": 2.715, + "step": 6328 + }, + { + "epoch": 0.510773948833831, + "grad_norm": 0.7364684343338013, + "learning_rate": 0.0001554224057855326, + "loss": 2.6475, + "step": 6329 + }, + { + "epoch": 0.510854652570414, + "grad_norm": 0.670894980430603, + "learning_rate": 0.00015540926459396665, + "loss": 2.6091, + "step": 6330 + }, + { + "epoch": 0.510935356306997, + "grad_norm": 0.6750168204307556, + "learning_rate": 0.00015539612202145696, + "loss": 2.6473, + "step": 6331 + }, + { + "epoch": 0.51101606004358, + "grad_norm": 0.6552454233169556, + "learning_rate": 0.0001553829780683311, + "loss": 2.6158, + "step": 6332 + }, + { + "epoch": 0.511096763780163, + "grad_norm": 0.7387828230857849, + "learning_rate": 0.00015536983273491668, + "loss": 2.6219, + "step": 6333 + }, + { + "epoch": 0.511177467516746, + "grad_norm": 0.6993975639343262, + "learning_rate": 0.00015535668602154127, + "loss": 2.6446, + "step": 6334 + }, + { + "epoch": 0.5112581712533291, + "grad_norm": 0.6491217613220215, + "learning_rate": 0.00015534353792853254, + "loss": 2.6404, + "step": 6335 + }, + { + "epoch": 0.511338874989912, + "grad_norm": 0.7165521383285522, + "learning_rate": 0.0001553303884562182, + "loss": 2.6339, + "step": 6336 + }, + { + "epoch": 0.511419578726495, + "grad_norm": 0.7363756895065308, + "learning_rate": 0.0001553172376049259, + "loss": 2.6411, + "step": 6337 + }, + { + "epoch": 0.511500282463078, + "grad_norm": 0.7148438096046448, + "learning_rate": 0.00015530408537498347, + "loss": 2.5617, + "step": 6338 + }, + { + "epoch": 0.5115809861996611, + "grad_norm": 0.7140451669692993, + "learning_rate": 0.00015529093176671864, + "loss": 2.5898, + "step": 6339 + }, + { + "epoch": 0.5116616899362441, + "grad_norm": 0.7799252271652222, + "learning_rate": 0.00015527777678045926, + "loss": 2.6176, + "step": 6340 + }, + { + "epoch": 0.511742393672827, + "grad_norm": 0.7292928099632263, + "learning_rate": 0.00015526462041653323, + "loss": 2.6722, + "step": 6341 + }, + { + "epoch": 0.51182309740941, + "grad_norm": 0.6986904740333557, + "learning_rate": 0.00015525146267526837, + "loss": 2.6154, + "step": 6342 + }, + { + "epoch": 0.5119038011459931, + "grad_norm": 0.7239612936973572, + "learning_rate": 0.00015523830355699262, + "loss": 2.5664, + "step": 6343 + }, + { + "epoch": 0.5119845048825761, + "grad_norm": 0.6805121898651123, + "learning_rate": 0.00015522514306203395, + "loss": 2.6204, + "step": 6344 + }, + { + "epoch": 0.512065208619159, + "grad_norm": 0.7036689519882202, + "learning_rate": 0.00015521198119072035, + "loss": 2.6211, + "step": 6345 + }, + { + "epoch": 0.512145912355742, + "grad_norm": 0.7155849933624268, + "learning_rate": 0.00015519881794337988, + "loss": 2.6074, + "step": 6346 + }, + { + "epoch": 0.5122266160923251, + "grad_norm": 0.7183938026428223, + "learning_rate": 0.00015518565332034057, + "loss": 2.6148, + "step": 6347 + }, + { + "epoch": 0.5123073198289081, + "grad_norm": 0.7053570747375488, + "learning_rate": 0.0001551724873219305, + "loss": 2.6476, + "step": 6348 + }, + { + "epoch": 0.5123880235654911, + "grad_norm": 0.714846670627594, + "learning_rate": 0.00015515931994847785, + "loss": 2.5728, + "step": 6349 + }, + { + "epoch": 0.512468727302074, + "grad_norm": 0.7504729628562927, + "learning_rate": 0.00015514615120031076, + "loss": 2.6415, + "step": 6350 + }, + { + "epoch": 0.5125494310386571, + "grad_norm": 0.6940335035324097, + "learning_rate": 0.0001551329810777574, + "loss": 2.6115, + "step": 6351 + }, + { + "epoch": 0.5126301347752401, + "grad_norm": 0.7166119813919067, + "learning_rate": 0.00015511980958114608, + "loss": 2.6284, + "step": 6352 + }, + { + "epoch": 0.5127108385118231, + "grad_norm": 0.7787839770317078, + "learning_rate": 0.00015510663671080497, + "loss": 2.6385, + "step": 6353 + }, + { + "epoch": 0.5127915422484061, + "grad_norm": 0.7298412322998047, + "learning_rate": 0.00015509346246706245, + "loss": 2.629, + "step": 6354 + }, + { + "epoch": 0.5128722459849892, + "grad_norm": 0.7918897271156311, + "learning_rate": 0.00015508028685024683, + "loss": 2.6777, + "step": 6355 + }, + { + "epoch": 0.5129529497215721, + "grad_norm": 0.6867843866348267, + "learning_rate": 0.00015506710986068646, + "loss": 2.6101, + "step": 6356 + }, + { + "epoch": 0.5130336534581551, + "grad_norm": 0.716468870639801, + "learning_rate": 0.00015505393149870978, + "loss": 2.6558, + "step": 6357 + }, + { + "epoch": 0.5131143571947381, + "grad_norm": 0.6704092621803284, + "learning_rate": 0.0001550407517646452, + "loss": 2.6128, + "step": 6358 + }, + { + "epoch": 0.5131950609313212, + "grad_norm": 0.820716381072998, + "learning_rate": 0.00015502757065882124, + "loss": 2.6052, + "step": 6359 + }, + { + "epoch": 0.5132757646679041, + "grad_norm": 0.7328094840049744, + "learning_rate": 0.00015501438818156635, + "loss": 2.6399, + "step": 6360 + }, + { + "epoch": 0.5133564684044871, + "grad_norm": 0.6602808833122253, + "learning_rate": 0.00015500120433320911, + "loss": 2.5509, + "step": 6361 + }, + { + "epoch": 0.5134371721410701, + "grad_norm": 0.7013166546821594, + "learning_rate": 0.00015498801911407805, + "loss": 2.6439, + "step": 6362 + }, + { + "epoch": 0.5135178758776532, + "grad_norm": 0.7415499091148376, + "learning_rate": 0.00015497483252450186, + "loss": 2.575, + "step": 6363 + }, + { + "epoch": 0.5135985796142362, + "grad_norm": 0.7262336015701294, + "learning_rate": 0.00015496164456480912, + "loss": 2.6815, + "step": 6364 + }, + { + "epoch": 0.5136792833508191, + "grad_norm": 0.7353699803352356, + "learning_rate": 0.0001549484552353285, + "loss": 2.6172, + "step": 6365 + }, + { + "epoch": 0.5137599870874021, + "grad_norm": 0.7005086541175842, + "learning_rate": 0.00015493526453638879, + "loss": 2.5945, + "step": 6366 + }, + { + "epoch": 0.5138406908239852, + "grad_norm": 0.7469770908355713, + "learning_rate": 0.00015492207246831864, + "loss": 2.6797, + "step": 6367 + }, + { + "epoch": 0.5139213945605682, + "grad_norm": 0.6768934726715088, + "learning_rate": 0.00015490887903144693, + "loss": 2.6369, + "step": 6368 + }, + { + "epoch": 0.5140020982971512, + "grad_norm": 0.7625820636749268, + "learning_rate": 0.00015489568422610237, + "loss": 2.6182, + "step": 6369 + }, + { + "epoch": 0.5140828020337341, + "grad_norm": 0.749351978302002, + "learning_rate": 0.00015488248805261388, + "loss": 2.6066, + "step": 6370 + }, + { + "epoch": 0.5141635057703172, + "grad_norm": 0.8369480967521667, + "learning_rate": 0.00015486929051131032, + "loss": 2.7627, + "step": 6371 + }, + { + "epoch": 0.5142442095069002, + "grad_norm": 0.6482037305831909, + "learning_rate": 0.0001548560916025206, + "loss": 2.609, + "step": 6372 + }, + { + "epoch": 0.5143249132434832, + "grad_norm": 0.6801851391792297, + "learning_rate": 0.0001548428913265737, + "loss": 2.5878, + "step": 6373 + }, + { + "epoch": 0.5144056169800661, + "grad_norm": 0.744926929473877, + "learning_rate": 0.0001548296896837986, + "loss": 2.6569, + "step": 6374 + }, + { + "epoch": 0.5144863207166491, + "grad_norm": 0.6862614750862122, + "learning_rate": 0.00015481648667452425, + "loss": 2.5626, + "step": 6375 + }, + { + "epoch": 0.5145670244532322, + "grad_norm": 0.7186449766159058, + "learning_rate": 0.0001548032822990798, + "loss": 2.6783, + "step": 6376 + }, + { + "epoch": 0.5146477281898152, + "grad_norm": 0.699715256690979, + "learning_rate": 0.0001547900765577943, + "loss": 2.6709, + "step": 6377 + }, + { + "epoch": 0.5147284319263982, + "grad_norm": 0.7272205352783203, + "learning_rate": 0.00015477686945099687, + "loss": 2.6076, + "step": 6378 + }, + { + "epoch": 0.5148091356629811, + "grad_norm": 0.7667459845542908, + "learning_rate": 0.00015476366097901667, + "loss": 2.6541, + "step": 6379 + }, + { + "epoch": 0.5148898393995642, + "grad_norm": 0.6538121700286865, + "learning_rate": 0.00015475045114218285, + "loss": 2.5806, + "step": 6380 + }, + { + "epoch": 0.5149705431361472, + "grad_norm": 0.7388994097709656, + "learning_rate": 0.00015473723994082473, + "loss": 2.6293, + "step": 6381 + }, + { + "epoch": 0.5150512468727302, + "grad_norm": 0.7044215202331543, + "learning_rate": 0.00015472402737527142, + "loss": 2.5755, + "step": 6382 + }, + { + "epoch": 0.5151319506093132, + "grad_norm": 0.6807994246482849, + "learning_rate": 0.00015471081344585236, + "loss": 2.6493, + "step": 6383 + }, + { + "epoch": 0.5152126543458962, + "grad_norm": 0.676278293132782, + "learning_rate": 0.00015469759815289681, + "loss": 2.6319, + "step": 6384 + }, + { + "epoch": 0.5152933580824792, + "grad_norm": 0.7515453696250916, + "learning_rate": 0.00015468438149673412, + "loss": 2.6415, + "step": 6385 + }, + { + "epoch": 0.5153740618190622, + "grad_norm": 0.8694239854812622, + "learning_rate": 0.0001546711634776937, + "loss": 2.5818, + "step": 6386 + }, + { + "epoch": 0.5154547655556452, + "grad_norm": 0.717090368270874, + "learning_rate": 0.000154657944096105, + "loss": 2.7132, + "step": 6387 + }, + { + "epoch": 0.5155354692922283, + "grad_norm": 0.7098804116249084, + "learning_rate": 0.00015464472335229742, + "loss": 2.564, + "step": 6388 + }, + { + "epoch": 0.5156161730288112, + "grad_norm": 0.6879690289497375, + "learning_rate": 0.0001546315012466005, + "loss": 2.6094, + "step": 6389 + }, + { + "epoch": 0.5156968767653942, + "grad_norm": 0.7110763788223267, + "learning_rate": 0.00015461827777934377, + "loss": 2.5982, + "step": 6390 + }, + { + "epoch": 0.5157775805019772, + "grad_norm": 0.7168039679527283, + "learning_rate": 0.00015460505295085677, + "loss": 2.5451, + "step": 6391 + }, + { + "epoch": 0.5158582842385603, + "grad_norm": 0.7059877514839172, + "learning_rate": 0.00015459182676146914, + "loss": 2.6655, + "step": 6392 + }, + { + "epoch": 0.5159389879751433, + "grad_norm": 0.7278143763542175, + "learning_rate": 0.00015457859921151043, + "loss": 2.6587, + "step": 6393 + }, + { + "epoch": 0.5160196917117262, + "grad_norm": 0.7301023602485657, + "learning_rate": 0.0001545653703013104, + "loss": 2.7672, + "step": 6394 + }, + { + "epoch": 0.5161003954483092, + "grad_norm": 0.6933302283287048, + "learning_rate": 0.0001545521400311987, + "loss": 2.5924, + "step": 6395 + }, + { + "epoch": 0.5161810991848923, + "grad_norm": 0.7074775099754333, + "learning_rate": 0.00015453890840150508, + "loss": 2.6663, + "step": 6396 + }, + { + "epoch": 0.5162618029214753, + "grad_norm": 0.7069801092147827, + "learning_rate": 0.00015452567541255924, + "loss": 2.6791, + "step": 6397 + }, + { + "epoch": 0.5163425066580583, + "grad_norm": 0.6586462259292603, + "learning_rate": 0.00015451244106469108, + "loss": 2.6368, + "step": 6398 + }, + { + "epoch": 0.5164232103946412, + "grad_norm": 0.6862531900405884, + "learning_rate": 0.00015449920535823042, + "loss": 2.7099, + "step": 6399 + }, + { + "epoch": 0.5165039141312243, + "grad_norm": 0.7177795767784119, + "learning_rate": 0.00015448596829350706, + "loss": 2.5921, + "step": 6400 + }, + { + "epoch": 0.5165846178678073, + "grad_norm": 0.6936569213867188, + "learning_rate": 0.00015447272987085094, + "loss": 2.5739, + "step": 6401 + }, + { + "epoch": 0.5166653216043903, + "grad_norm": 0.7394363284111023, + "learning_rate": 0.00015445949009059202, + "loss": 2.5941, + "step": 6402 + }, + { + "epoch": 0.5167460253409732, + "grad_norm": 0.6713366508483887, + "learning_rate": 0.00015444624895306027, + "loss": 2.574, + "step": 6403 + }, + { + "epoch": 0.5168267290775563, + "grad_norm": 0.679128885269165, + "learning_rate": 0.0001544330064585856, + "loss": 2.6422, + "step": 6404 + }, + { + "epoch": 0.5169074328141393, + "grad_norm": 0.6803367137908936, + "learning_rate": 0.0001544197626074982, + "loss": 2.6503, + "step": 6405 + }, + { + "epoch": 0.5169881365507223, + "grad_norm": 0.8009794354438782, + "learning_rate": 0.000154406517400128, + "loss": 2.6434, + "step": 6406 + }, + { + "epoch": 0.5170688402873053, + "grad_norm": 0.7292529344558716, + "learning_rate": 0.00015439327083680517, + "loss": 2.6333, + "step": 6407 + }, + { + "epoch": 0.5171495440238884, + "grad_norm": 0.67046719789505, + "learning_rate": 0.00015438002291785988, + "loss": 2.5791, + "step": 6408 + }, + { + "epoch": 0.5172302477604713, + "grad_norm": 0.755501925945282, + "learning_rate": 0.00015436677364362225, + "loss": 2.5558, + "step": 6409 + }, + { + "epoch": 0.5173109514970543, + "grad_norm": 0.6957115530967712, + "learning_rate": 0.0001543535230144225, + "loss": 2.5839, + "step": 6410 + }, + { + "epoch": 0.5173916552336373, + "grad_norm": 0.6629074215888977, + "learning_rate": 0.0001543402710305909, + "loss": 2.6529, + "step": 6411 + }, + { + "epoch": 0.5174723589702204, + "grad_norm": 0.6647019386291504, + "learning_rate": 0.00015432701769245766, + "loss": 2.589, + "step": 6412 + }, + { + "epoch": 0.5175530627068033, + "grad_norm": 0.6472512483596802, + "learning_rate": 0.00015431376300035316, + "loss": 2.6184, + "step": 6413 + }, + { + "epoch": 0.5176337664433863, + "grad_norm": 0.6900136470794678, + "learning_rate": 0.0001543005069546077, + "loss": 2.7029, + "step": 6414 + }, + { + "epoch": 0.5177144701799693, + "grad_norm": 0.7702177166938782, + "learning_rate": 0.00015428724955555165, + "loss": 2.6189, + "step": 6415 + }, + { + "epoch": 0.5177951739165524, + "grad_norm": 0.641655445098877, + "learning_rate": 0.00015427399080351545, + "loss": 2.6486, + "step": 6416 + }, + { + "epoch": 0.5178758776531354, + "grad_norm": 0.6826485991477966, + "learning_rate": 0.00015426073069882952, + "loss": 2.6105, + "step": 6417 + }, + { + "epoch": 0.5179565813897183, + "grad_norm": 0.749812662601471, + "learning_rate": 0.00015424746924182434, + "loss": 2.5644, + "step": 6418 + }, + { + "epoch": 0.5180372851263013, + "grad_norm": 0.6737890243530273, + "learning_rate": 0.0001542342064328304, + "loss": 2.686, + "step": 6419 + }, + { + "epoch": 0.5181179888628844, + "grad_norm": 0.7131822109222412, + "learning_rate": 0.0001542209422721783, + "loss": 2.697, + "step": 6420 + }, + { + "epoch": 0.5181986925994674, + "grad_norm": 0.7543746829032898, + "learning_rate": 0.0001542076767601986, + "loss": 2.6349, + "step": 6421 + }, + { + "epoch": 0.5182793963360504, + "grad_norm": 0.7589309215545654, + "learning_rate": 0.00015419440989722184, + "loss": 2.63, + "step": 6422 + }, + { + "epoch": 0.5183601000726333, + "grad_norm": 0.7036365866661072, + "learning_rate": 0.00015418114168357872, + "loss": 2.605, + "step": 6423 + }, + { + "epoch": 0.5184408038092164, + "grad_norm": 0.733161985874176, + "learning_rate": 0.00015416787211959998, + "loss": 2.6708, + "step": 6424 + }, + { + "epoch": 0.5185215075457994, + "grad_norm": 0.6928101181983948, + "learning_rate": 0.00015415460120561623, + "loss": 2.6549, + "step": 6425 + }, + { + "epoch": 0.5186022112823824, + "grad_norm": 0.6557250022888184, + "learning_rate": 0.00015414132894195825, + "loss": 2.6185, + "step": 6426 + }, + { + "epoch": 0.5186829150189654, + "grad_norm": 0.7236297726631165, + "learning_rate": 0.00015412805532895684, + "loss": 2.6185, + "step": 6427 + }, + { + "epoch": 0.5187636187555483, + "grad_norm": 0.7194060683250427, + "learning_rate": 0.0001541147803669428, + "loss": 2.6123, + "step": 6428 + }, + { + "epoch": 0.5188443224921314, + "grad_norm": 0.7077342867851257, + "learning_rate": 0.00015410150405624696, + "loss": 2.6628, + "step": 6429 + }, + { + "epoch": 0.5189250262287144, + "grad_norm": 0.7036150693893433, + "learning_rate": 0.00015408822639720023, + "loss": 2.5966, + "step": 6430 + }, + { + "epoch": 0.5190057299652974, + "grad_norm": 0.7047349810600281, + "learning_rate": 0.00015407494739013352, + "loss": 2.6626, + "step": 6431 + }, + { + "epoch": 0.5190864337018803, + "grad_norm": 0.7537584900856018, + "learning_rate": 0.00015406166703537777, + "loss": 2.6452, + "step": 6432 + }, + { + "epoch": 0.5191671374384634, + "grad_norm": 0.7944707870483398, + "learning_rate": 0.00015404838533326394, + "loss": 2.6834, + "step": 6433 + }, + { + "epoch": 0.5192478411750464, + "grad_norm": 0.8602458238601685, + "learning_rate": 0.00015403510228412305, + "loss": 2.6238, + "step": 6434 + }, + { + "epoch": 0.5193285449116294, + "grad_norm": 0.7181896567344666, + "learning_rate": 0.0001540218178882862, + "loss": 2.652, + "step": 6435 + }, + { + "epoch": 0.5194092486482124, + "grad_norm": 0.7470960021018982, + "learning_rate": 0.0001540085321460844, + "loss": 2.6703, + "step": 6436 + }, + { + "epoch": 0.5194899523847955, + "grad_norm": 0.8249944448471069, + "learning_rate": 0.00015399524505784883, + "loss": 2.5945, + "step": 6437 + }, + { + "epoch": 0.5195706561213784, + "grad_norm": 0.7332444190979004, + "learning_rate": 0.00015398195662391057, + "loss": 2.6472, + "step": 6438 + }, + { + "epoch": 0.5196513598579614, + "grad_norm": 0.7727739810943604, + "learning_rate": 0.0001539686668446009, + "loss": 2.6276, + "step": 6439 + }, + { + "epoch": 0.5197320635945444, + "grad_norm": 0.7161617279052734, + "learning_rate": 0.00015395537572025094, + "loss": 2.624, + "step": 6440 + }, + { + "epoch": 0.5198127673311275, + "grad_norm": 0.7657529711723328, + "learning_rate": 0.00015394208325119198, + "loss": 2.6604, + "step": 6441 + }, + { + "epoch": 0.5198934710677104, + "grad_norm": 0.732904314994812, + "learning_rate": 0.00015392878943775527, + "loss": 2.6334, + "step": 6442 + }, + { + "epoch": 0.5199741748042934, + "grad_norm": 0.7058991193771362, + "learning_rate": 0.0001539154942802722, + "loss": 2.5936, + "step": 6443 + }, + { + "epoch": 0.5200548785408764, + "grad_norm": 0.7328821420669556, + "learning_rate": 0.00015390219777907405, + "loss": 2.5969, + "step": 6444 + }, + { + "epoch": 0.5201355822774595, + "grad_norm": 0.7899969220161438, + "learning_rate": 0.00015388889993449224, + "loss": 2.5856, + "step": 6445 + }, + { + "epoch": 0.5202162860140425, + "grad_norm": 0.6963860392570496, + "learning_rate": 0.00015387560074685817, + "loss": 2.6139, + "step": 6446 + }, + { + "epoch": 0.5202969897506254, + "grad_norm": 0.812053918838501, + "learning_rate": 0.00015386230021650327, + "loss": 2.716, + "step": 6447 + }, + { + "epoch": 0.5203776934872084, + "grad_norm": 0.766781210899353, + "learning_rate": 0.0001538489983437591, + "loss": 2.6509, + "step": 6448 + }, + { + "epoch": 0.5204583972237915, + "grad_norm": 0.6877299547195435, + "learning_rate": 0.00015383569512895712, + "loss": 2.6076, + "step": 6449 + }, + { + "epoch": 0.5205391009603745, + "grad_norm": 0.7009176015853882, + "learning_rate": 0.00015382239057242888, + "loss": 2.608, + "step": 6450 + }, + { + "epoch": 0.5206198046969575, + "grad_norm": 0.7187578678131104, + "learning_rate": 0.000153809084674506, + "loss": 2.5946, + "step": 6451 + }, + { + "epoch": 0.5207005084335404, + "grad_norm": 0.7242687344551086, + "learning_rate": 0.00015379577743552001, + "loss": 2.6752, + "step": 6452 + }, + { + "epoch": 0.5207812121701235, + "grad_norm": 0.7668174505233765, + "learning_rate": 0.00015378246885580266, + "loss": 2.6694, + "step": 6453 + }, + { + "epoch": 0.5208619159067065, + "grad_norm": 0.7676039338111877, + "learning_rate": 0.00015376915893568557, + "loss": 2.6379, + "step": 6454 + }, + { + "epoch": 0.5209426196432895, + "grad_norm": 0.7394412159919739, + "learning_rate": 0.00015375584767550053, + "loss": 2.6046, + "step": 6455 + }, + { + "epoch": 0.5210233233798724, + "grad_norm": 0.7246636748313904, + "learning_rate": 0.00015374253507557923, + "loss": 2.592, + "step": 6456 + }, + { + "epoch": 0.5211040271164555, + "grad_norm": 0.7121255993843079, + "learning_rate": 0.00015372922113625345, + "loss": 2.634, + "step": 6457 + }, + { + "epoch": 0.5211847308530385, + "grad_norm": 0.7378345131874084, + "learning_rate": 0.00015371590585785505, + "loss": 2.5753, + "step": 6458 + }, + { + "epoch": 0.5212654345896215, + "grad_norm": 0.6682030558586121, + "learning_rate": 0.00015370258924071587, + "loss": 2.6305, + "step": 6459 + }, + { + "epoch": 0.5213461383262045, + "grad_norm": 0.7164177894592285, + "learning_rate": 0.00015368927128516776, + "loss": 2.7188, + "step": 6460 + }, + { + "epoch": 0.5214268420627876, + "grad_norm": 0.7341115474700928, + "learning_rate": 0.00015367595199154273, + "loss": 2.6204, + "step": 6461 + }, + { + "epoch": 0.5215075457993705, + "grad_norm": 0.6781840920448303, + "learning_rate": 0.00015366263136017258, + "loss": 2.6104, + "step": 6462 + }, + { + "epoch": 0.5215882495359535, + "grad_norm": 0.7029077410697937, + "learning_rate": 0.0001536493093913894, + "loss": 2.6055, + "step": 6463 + }, + { + "epoch": 0.5216689532725365, + "grad_norm": 0.6958553194999695, + "learning_rate": 0.00015363598608552522, + "loss": 2.5991, + "step": 6464 + }, + { + "epoch": 0.5217496570091196, + "grad_norm": 0.6919750571250916, + "learning_rate": 0.00015362266144291207, + "loss": 2.6022, + "step": 6465 + }, + { + "epoch": 0.5218303607457025, + "grad_norm": 0.6980622410774231, + "learning_rate": 0.000153609335463882, + "loss": 2.6289, + "step": 6466 + }, + { + "epoch": 0.5219110644822855, + "grad_norm": 0.7468248009681702, + "learning_rate": 0.00015359600814876715, + "loss": 2.6327, + "step": 6467 + }, + { + "epoch": 0.5219917682188685, + "grad_norm": 0.7183729410171509, + "learning_rate": 0.00015358267949789966, + "loss": 2.6389, + "step": 6468 + }, + { + "epoch": 0.5220724719554516, + "grad_norm": 0.6558868885040283, + "learning_rate": 0.00015356934951161178, + "loss": 2.6261, + "step": 6469 + }, + { + "epoch": 0.5221531756920346, + "grad_norm": 0.8000216484069824, + "learning_rate": 0.00015355601819023562, + "loss": 2.6908, + "step": 6470 + }, + { + "epoch": 0.5222338794286175, + "grad_norm": 0.775056004524231, + "learning_rate": 0.00015354268553410355, + "loss": 2.6763, + "step": 6471 + }, + { + "epoch": 0.5223145831652005, + "grad_norm": 0.7345123291015625, + "learning_rate": 0.00015352935154354776, + "loss": 2.582, + "step": 6472 + }, + { + "epoch": 0.5223952869017836, + "grad_norm": 0.731311023235321, + "learning_rate": 0.0001535160162189006, + "loss": 2.6519, + "step": 6473 + }, + { + "epoch": 0.5224759906383666, + "grad_norm": 0.6481007933616638, + "learning_rate": 0.00015350267956049443, + "loss": 2.5695, + "step": 6474 + }, + { + "epoch": 0.5225566943749496, + "grad_norm": 0.7698814868927002, + "learning_rate": 0.00015348934156866163, + "loss": 2.5732, + "step": 6475 + }, + { + "epoch": 0.5226373981115325, + "grad_norm": 0.7404680848121643, + "learning_rate": 0.00015347600224373462, + "loss": 2.5826, + "step": 6476 + }, + { + "epoch": 0.5227181018481155, + "grad_norm": 0.6965613961219788, + "learning_rate": 0.00015346266158604584, + "loss": 2.6069, + "step": 6477 + }, + { + "epoch": 0.5227988055846986, + "grad_norm": 0.6611152291297913, + "learning_rate": 0.00015344931959592777, + "loss": 2.4937, + "step": 6478 + }, + { + "epoch": 0.5228795093212816, + "grad_norm": 0.7418150305747986, + "learning_rate": 0.00015343597627371296, + "loss": 2.5747, + "step": 6479 + }, + { + "epoch": 0.5229602130578646, + "grad_norm": 0.6847610473632812, + "learning_rate": 0.00015342263161973393, + "loss": 2.5906, + "step": 6480 + }, + { + "epoch": 0.5230409167944475, + "grad_norm": 0.7054881453514099, + "learning_rate": 0.00015340928563432326, + "loss": 2.5914, + "step": 6481 + }, + { + "epoch": 0.5231216205310306, + "grad_norm": 0.6918888092041016, + "learning_rate": 0.0001533959383178136, + "loss": 2.6412, + "step": 6482 + }, + { + "epoch": 0.5232023242676136, + "grad_norm": 0.7232856154441833, + "learning_rate": 0.00015338258967053755, + "loss": 2.6364, + "step": 6483 + }, + { + "epoch": 0.5232830280041966, + "grad_norm": 0.7345031499862671, + "learning_rate": 0.00015336923969282786, + "loss": 2.6649, + "step": 6484 + }, + { + "epoch": 0.5233637317407795, + "grad_norm": 0.7644383907318115, + "learning_rate": 0.0001533558883850172, + "loss": 2.6949, + "step": 6485 + }, + { + "epoch": 0.5234444354773626, + "grad_norm": 0.6532372832298279, + "learning_rate": 0.0001533425357474383, + "loss": 2.5915, + "step": 6486 + }, + { + "epoch": 0.5235251392139456, + "grad_norm": 0.7089118361473083, + "learning_rate": 0.000153329181780424, + "loss": 2.6446, + "step": 6487 + }, + { + "epoch": 0.5236058429505286, + "grad_norm": 0.6966068148612976, + "learning_rate": 0.00015331582648430705, + "loss": 2.6764, + "step": 6488 + }, + { + "epoch": 0.5236865466871116, + "grad_norm": 0.7130835056304932, + "learning_rate": 0.00015330246985942035, + "loss": 2.6279, + "step": 6489 + }, + { + "epoch": 0.5237672504236947, + "grad_norm": 0.729727029800415, + "learning_rate": 0.00015328911190609678, + "loss": 2.612, + "step": 6490 + }, + { + "epoch": 0.5238479541602776, + "grad_norm": 0.6804213523864746, + "learning_rate": 0.0001532757526246692, + "loss": 2.6113, + "step": 6491 + }, + { + "epoch": 0.5239286578968606, + "grad_norm": 0.7324437499046326, + "learning_rate": 0.0001532623920154707, + "loss": 2.6054, + "step": 6492 + }, + { + "epoch": 0.5240093616334436, + "grad_norm": 0.6166699528694153, + "learning_rate": 0.00015324903007883406, + "loss": 2.5822, + "step": 6493 + }, + { + "epoch": 0.5240900653700267, + "grad_norm": 0.7339944839477539, + "learning_rate": 0.00015323566681509242, + "loss": 2.6204, + "step": 6494 + }, + { + "epoch": 0.5241707691066096, + "grad_norm": 0.7267727255821228, + "learning_rate": 0.00015322230222457886, + "loss": 2.6094, + "step": 6495 + }, + { + "epoch": 0.5242514728431926, + "grad_norm": 0.6417120695114136, + "learning_rate": 0.00015320893630762635, + "loss": 2.6044, + "step": 6496 + }, + { + "epoch": 0.5243321765797756, + "grad_norm": 0.7092922329902649, + "learning_rate": 0.00015319556906456808, + "loss": 2.6428, + "step": 6497 + }, + { + "epoch": 0.5244128803163587, + "grad_norm": 0.7482922673225403, + "learning_rate": 0.00015318220049573714, + "loss": 2.6025, + "step": 6498 + }, + { + "epoch": 0.5244935840529417, + "grad_norm": 0.691925048828125, + "learning_rate": 0.00015316883060146675, + "loss": 2.6308, + "step": 6499 + }, + { + "epoch": 0.5245742877895246, + "grad_norm": 0.7084488272666931, + "learning_rate": 0.00015315545938209015, + "loss": 2.6535, + "step": 6500 + }, + { + "epoch": 0.5246549915261076, + "grad_norm": 0.7182802557945251, + "learning_rate": 0.00015314208683794056, + "loss": 2.6045, + "step": 6501 + }, + { + "epoch": 0.5247356952626907, + "grad_norm": 0.7043096423149109, + "learning_rate": 0.00015312871296935122, + "loss": 2.6465, + "step": 6502 + }, + { + "epoch": 0.5248163989992737, + "grad_norm": 0.7679466009140015, + "learning_rate": 0.00015311533777665547, + "loss": 2.6624, + "step": 6503 + }, + { + "epoch": 0.5248971027358567, + "grad_norm": 0.6825870275497437, + "learning_rate": 0.00015310196126018668, + "loss": 2.5548, + "step": 6504 + }, + { + "epoch": 0.5249778064724396, + "grad_norm": 0.7364058494567871, + "learning_rate": 0.00015308858342027816, + "loss": 2.6495, + "step": 6505 + }, + { + "epoch": 0.5250585102090227, + "grad_norm": 0.7333239316940308, + "learning_rate": 0.00015307520425726341, + "loss": 2.5835, + "step": 6506 + }, + { + "epoch": 0.5251392139456057, + "grad_norm": 0.7479620575904846, + "learning_rate": 0.00015306182377147583, + "loss": 2.6065, + "step": 6507 + }, + { + "epoch": 0.5252199176821887, + "grad_norm": 0.7347591519355774, + "learning_rate": 0.00015304844196324888, + "loss": 2.6624, + "step": 6508 + }, + { + "epoch": 0.5253006214187717, + "grad_norm": 0.6879193782806396, + "learning_rate": 0.0001530350588329161, + "loss": 2.6598, + "step": 6509 + }, + { + "epoch": 0.5253813251553547, + "grad_norm": 0.7841597199440002, + "learning_rate": 0.000153021674380811, + "loss": 2.53, + "step": 6510 + }, + { + "epoch": 0.5254620288919377, + "grad_norm": 0.7916845679283142, + "learning_rate": 0.0001530082886072672, + "loss": 2.6995, + "step": 6511 + }, + { + "epoch": 0.5255427326285207, + "grad_norm": 0.7066318988800049, + "learning_rate": 0.0001529949015126183, + "loss": 2.58, + "step": 6512 + }, + { + "epoch": 0.5256234363651037, + "grad_norm": 0.6871134638786316, + "learning_rate": 0.00015298151309719787, + "loss": 2.6095, + "step": 6513 + }, + { + "epoch": 0.5257041401016868, + "grad_norm": 0.7479702830314636, + "learning_rate": 0.00015296812336133963, + "loss": 2.608, + "step": 6514 + }, + { + "epoch": 0.5257848438382697, + "grad_norm": 0.6772119402885437, + "learning_rate": 0.00015295473230537735, + "loss": 2.5679, + "step": 6515 + }, + { + "epoch": 0.5258655475748527, + "grad_norm": 0.7365416884422302, + "learning_rate": 0.0001529413399296447, + "loss": 2.6722, + "step": 6516 + }, + { + "epoch": 0.5259462513114357, + "grad_norm": 0.7538040280342102, + "learning_rate": 0.00015292794623447545, + "loss": 2.5562, + "step": 6517 + }, + { + "epoch": 0.5260269550480188, + "grad_norm": 0.7471820712089539, + "learning_rate": 0.00015291455122020344, + "loss": 2.7079, + "step": 6518 + }, + { + "epoch": 0.5261076587846018, + "grad_norm": 0.7605932354927063, + "learning_rate": 0.00015290115488716247, + "loss": 2.6696, + "step": 6519 + }, + { + "epoch": 0.5261883625211847, + "grad_norm": 0.7081854939460754, + "learning_rate": 0.00015288775723568647, + "loss": 2.6502, + "step": 6520 + }, + { + "epoch": 0.5262690662577677, + "grad_norm": 0.7236372828483582, + "learning_rate": 0.0001528743582661093, + "loss": 2.662, + "step": 6521 + }, + { + "epoch": 0.5263497699943508, + "grad_norm": 0.6710047721862793, + "learning_rate": 0.0001528609579787649, + "loss": 2.5947, + "step": 6522 + }, + { + "epoch": 0.5264304737309338, + "grad_norm": 0.709381103515625, + "learning_rate": 0.00015284755637398726, + "loss": 2.5922, + "step": 6523 + }, + { + "epoch": 0.5265111774675167, + "grad_norm": 0.7029775381088257, + "learning_rate": 0.00015283415345211033, + "loss": 2.6777, + "step": 6524 + }, + { + "epoch": 0.5265918812040997, + "grad_norm": 0.7250857949256897, + "learning_rate": 0.00015282074921346825, + "loss": 2.6027, + "step": 6525 + }, + { + "epoch": 0.5266725849406828, + "grad_norm": 0.7192760705947876, + "learning_rate": 0.00015280734365839498, + "loss": 2.6544, + "step": 6526 + }, + { + "epoch": 0.5267532886772658, + "grad_norm": 0.693583071231842, + "learning_rate": 0.0001527939367872247, + "loss": 2.6302, + "step": 6527 + }, + { + "epoch": 0.5268339924138488, + "grad_norm": 0.7031428217887878, + "learning_rate": 0.00015278052860029145, + "loss": 2.6944, + "step": 6528 + }, + { + "epoch": 0.5269146961504317, + "grad_norm": 0.6986895799636841, + "learning_rate": 0.00015276711909792949, + "loss": 2.6595, + "step": 6529 + }, + { + "epoch": 0.5269953998870147, + "grad_norm": 0.7375979423522949, + "learning_rate": 0.000152753708280473, + "loss": 2.6839, + "step": 6530 + }, + { + "epoch": 0.5270761036235978, + "grad_norm": 0.7126755714416504, + "learning_rate": 0.0001527402961482562, + "loss": 2.5597, + "step": 6531 + }, + { + "epoch": 0.5271568073601808, + "grad_norm": 0.6631070971488953, + "learning_rate": 0.00015272688270161338, + "loss": 2.5566, + "step": 6532 + }, + { + "epoch": 0.5272375110967638, + "grad_norm": 0.6896609663963318, + "learning_rate": 0.00015271346794087874, + "loss": 2.5801, + "step": 6533 + }, + { + "epoch": 0.5273182148333467, + "grad_norm": 0.7437502145767212, + "learning_rate": 0.00015270005186638673, + "loss": 2.6572, + "step": 6534 + }, + { + "epoch": 0.5273989185699298, + "grad_norm": 0.7013052701950073, + "learning_rate": 0.00015268663447847166, + "loss": 2.621, + "step": 6535 + }, + { + "epoch": 0.5274796223065128, + "grad_norm": 0.7161773443222046, + "learning_rate": 0.00015267321577746795, + "loss": 2.5989, + "step": 6536 + }, + { + "epoch": 0.5275603260430958, + "grad_norm": 0.7654534578323364, + "learning_rate": 0.00015265979576371, + "loss": 2.6338, + "step": 6537 + }, + { + "epoch": 0.5276410297796787, + "grad_norm": 0.694646954536438, + "learning_rate": 0.0001526463744375323, + "loss": 2.6036, + "step": 6538 + }, + { + "epoch": 0.5277217335162618, + "grad_norm": 0.6594679355621338, + "learning_rate": 0.0001526329517992693, + "loss": 2.6256, + "step": 6539 + }, + { + "epoch": 0.5278024372528448, + "grad_norm": 0.6424389481544495, + "learning_rate": 0.00015261952784925557, + "loss": 2.6389, + "step": 6540 + }, + { + "epoch": 0.5278831409894278, + "grad_norm": 0.7465235590934753, + "learning_rate": 0.0001526061025878257, + "loss": 2.5449, + "step": 6541 + }, + { + "epoch": 0.5279638447260108, + "grad_norm": 0.6900132298469543, + "learning_rate": 0.0001525926760153142, + "loss": 2.5597, + "step": 6542 + }, + { + "epoch": 0.5280445484625939, + "grad_norm": 0.7505282163619995, + "learning_rate": 0.00015257924813205572, + "loss": 2.6526, + "step": 6543 + }, + { + "epoch": 0.5281252521991768, + "grad_norm": 0.72642582654953, + "learning_rate": 0.00015256581893838495, + "loss": 2.6593, + "step": 6544 + }, + { + "epoch": 0.5282059559357598, + "grad_norm": 0.6901132464408875, + "learning_rate": 0.00015255238843463656, + "loss": 2.6726, + "step": 6545 + }, + { + "epoch": 0.5282866596723428, + "grad_norm": 0.7741395831108093, + "learning_rate": 0.0001525389566211453, + "loss": 2.5929, + "step": 6546 + }, + { + "epoch": 0.5283673634089259, + "grad_norm": 0.7282403111457825, + "learning_rate": 0.00015252552349824585, + "loss": 2.5696, + "step": 6547 + }, + { + "epoch": 0.5284480671455088, + "grad_norm": 0.7421764731407166, + "learning_rate": 0.0001525120890662731, + "loss": 2.5593, + "step": 6548 + }, + { + "epoch": 0.5285287708820918, + "grad_norm": 0.6830468773841858, + "learning_rate": 0.00015249865332556182, + "loss": 2.6396, + "step": 6549 + }, + { + "epoch": 0.5286094746186748, + "grad_norm": 0.6758440732955933, + "learning_rate": 0.00015248521627644684, + "loss": 2.5375, + "step": 6550 + }, + { + "epoch": 0.5286901783552579, + "grad_norm": 0.6897253394126892, + "learning_rate": 0.00015247177791926308, + "loss": 2.6148, + "step": 6551 + }, + { + "epoch": 0.5287708820918409, + "grad_norm": 0.6391426920890808, + "learning_rate": 0.00015245833825434547, + "loss": 2.5563, + "step": 6552 + }, + { + "epoch": 0.5288515858284238, + "grad_norm": 0.7213610410690308, + "learning_rate": 0.00015244489728202893, + "loss": 2.6158, + "step": 6553 + }, + { + "epoch": 0.5289322895650068, + "grad_norm": 0.6678160429000854, + "learning_rate": 0.00015243145500264845, + "loss": 2.6177, + "step": 6554 + }, + { + "epoch": 0.5290129933015899, + "grad_norm": 0.7041724324226379, + "learning_rate": 0.00015241801141653905, + "loss": 2.6504, + "step": 6555 + }, + { + "epoch": 0.5290936970381729, + "grad_norm": 0.6551648378372192, + "learning_rate": 0.0001524045665240358, + "loss": 2.577, + "step": 6556 + }, + { + "epoch": 0.5291744007747559, + "grad_norm": 0.7190412878990173, + "learning_rate": 0.00015239112032547377, + "loss": 2.596, + "step": 6557 + }, + { + "epoch": 0.5292551045113388, + "grad_norm": 0.6936302781105042, + "learning_rate": 0.00015237767282118807, + "loss": 2.6551, + "step": 6558 + }, + { + "epoch": 0.5293358082479219, + "grad_norm": 0.6901839971542358, + "learning_rate": 0.0001523642240115138, + "loss": 2.6263, + "step": 6559 + }, + { + "epoch": 0.5294165119845049, + "grad_norm": 0.6905068159103394, + "learning_rate": 0.00015235077389678624, + "loss": 2.6323, + "step": 6560 + }, + { + "epoch": 0.5294972157210879, + "grad_norm": 0.7495188117027283, + "learning_rate": 0.00015233732247734057, + "loss": 2.6243, + "step": 6561 + }, + { + "epoch": 0.5295779194576709, + "grad_norm": 0.6758708357810974, + "learning_rate": 0.00015232386975351197, + "loss": 2.6184, + "step": 6562 + }, + { + "epoch": 0.5296586231942539, + "grad_norm": 0.6443266868591309, + "learning_rate": 0.00015231041572563573, + "loss": 2.6543, + "step": 6563 + }, + { + "epoch": 0.5297393269308369, + "grad_norm": 0.7384275794029236, + "learning_rate": 0.00015229696039404723, + "loss": 2.6117, + "step": 6564 + }, + { + "epoch": 0.5298200306674199, + "grad_norm": 0.6873897314071655, + "learning_rate": 0.00015228350375908178, + "loss": 2.5689, + "step": 6565 + }, + { + "epoch": 0.5299007344040029, + "grad_norm": 0.6715645790100098, + "learning_rate": 0.00015227004582107472, + "loss": 2.5943, + "step": 6566 + }, + { + "epoch": 0.529981438140586, + "grad_norm": 0.6814208030700684, + "learning_rate": 0.00015225658658036151, + "loss": 2.5562, + "step": 6567 + }, + { + "epoch": 0.5300621418771689, + "grad_norm": 0.6942310929298401, + "learning_rate": 0.00015224312603727755, + "loss": 2.5902, + "step": 6568 + }, + { + "epoch": 0.5301428456137519, + "grad_norm": 0.6856299042701721, + "learning_rate": 0.0001522296641921583, + "loss": 2.6115, + "step": 6569 + }, + { + "epoch": 0.5302235493503349, + "grad_norm": 0.870833694934845, + "learning_rate": 0.0001522162010453393, + "loss": 2.7492, + "step": 6570 + }, + { + "epoch": 0.530304253086918, + "grad_norm": 0.6796989440917969, + "learning_rate": 0.0001522027365971561, + "loss": 2.6957, + "step": 6571 + }, + { + "epoch": 0.530384956823501, + "grad_norm": 0.7043026685714722, + "learning_rate": 0.00015218927084794423, + "loss": 2.604, + "step": 6572 + }, + { + "epoch": 0.5304656605600839, + "grad_norm": 0.7533933520317078, + "learning_rate": 0.00015217580379803933, + "loss": 2.6271, + "step": 6573 + }, + { + "epoch": 0.5305463642966669, + "grad_norm": 0.7526697516441345, + "learning_rate": 0.000152162335447777, + "loss": 2.553, + "step": 6574 + }, + { + "epoch": 0.53062706803325, + "grad_norm": 0.6942071318626404, + "learning_rate": 0.00015214886579749284, + "loss": 2.7206, + "step": 6575 + }, + { + "epoch": 0.530707771769833, + "grad_norm": 0.7133236527442932, + "learning_rate": 0.00015213539484752273, + "loss": 2.6545, + "step": 6576 + }, + { + "epoch": 0.530788475506416, + "grad_norm": 0.7229849696159363, + "learning_rate": 0.00015212192259820222, + "loss": 2.6647, + "step": 6577 + }, + { + "epoch": 0.5308691792429989, + "grad_norm": 0.7142449617385864, + "learning_rate": 0.0001521084490498672, + "loss": 2.5777, + "step": 6578 + }, + { + "epoch": 0.5309498829795819, + "grad_norm": 0.6950247287750244, + "learning_rate": 0.00015209497420285342, + "loss": 2.6159, + "step": 6579 + }, + { + "epoch": 0.531030586716165, + "grad_norm": 0.7492622137069702, + "learning_rate": 0.00015208149805749668, + "loss": 2.6927, + "step": 6580 + }, + { + "epoch": 0.531111290452748, + "grad_norm": 0.7618215084075928, + "learning_rate": 0.00015206802061413287, + "loss": 2.5831, + "step": 6581 + }, + { + "epoch": 0.5311919941893309, + "grad_norm": 0.7448660731315613, + "learning_rate": 0.0001520545418730979, + "loss": 2.6123, + "step": 6582 + }, + { + "epoch": 0.5312726979259139, + "grad_norm": 0.7450618147850037, + "learning_rate": 0.00015204106183472766, + "loss": 2.5768, + "step": 6583 + }, + { + "epoch": 0.531353401662497, + "grad_norm": 0.7426019310951233, + "learning_rate": 0.0001520275804993581, + "loss": 2.603, + "step": 6584 + }, + { + "epoch": 0.53143410539908, + "grad_norm": 0.7503333687782288, + "learning_rate": 0.00015201409786732526, + "loss": 2.6159, + "step": 6585 + }, + { + "epoch": 0.531514809135663, + "grad_norm": 0.6944373846054077, + "learning_rate": 0.00015200061393896513, + "loss": 2.5201, + "step": 6586 + }, + { + "epoch": 0.5315955128722459, + "grad_norm": 0.6958110332489014, + "learning_rate": 0.00015198712871461375, + "loss": 2.5592, + "step": 6587 + }, + { + "epoch": 0.531676216608829, + "grad_norm": 0.7838244438171387, + "learning_rate": 0.00015197364219460727, + "loss": 2.6663, + "step": 6588 + }, + { + "epoch": 0.531756920345412, + "grad_norm": 0.754338800907135, + "learning_rate": 0.00015196015437928174, + "loss": 2.6183, + "step": 6589 + }, + { + "epoch": 0.531837624081995, + "grad_norm": 0.7394337058067322, + "learning_rate": 0.00015194666526897332, + "loss": 2.5622, + "step": 6590 + }, + { + "epoch": 0.531918327818578, + "grad_norm": 0.7352069020271301, + "learning_rate": 0.00015193317486401824, + "loss": 2.6173, + "step": 6591 + }, + { + "epoch": 0.531999031555161, + "grad_norm": 0.6318944096565247, + "learning_rate": 0.00015191968316475267, + "loss": 2.6159, + "step": 6592 + }, + { + "epoch": 0.532079735291744, + "grad_norm": 0.7071281671524048, + "learning_rate": 0.00015190619017151291, + "loss": 2.633, + "step": 6593 + }, + { + "epoch": 0.532160439028327, + "grad_norm": 0.7762585282325745, + "learning_rate": 0.00015189269588463517, + "loss": 2.6445, + "step": 6594 + }, + { + "epoch": 0.53224114276491, + "grad_norm": 0.7979930639266968, + "learning_rate": 0.0001518792003044558, + "loss": 2.5825, + "step": 6595 + }, + { + "epoch": 0.5323218465014931, + "grad_norm": 0.7355580925941467, + "learning_rate": 0.00015186570343131114, + "loss": 2.6197, + "step": 6596 + }, + { + "epoch": 0.532402550238076, + "grad_norm": 0.7286938428878784, + "learning_rate": 0.0001518522052655376, + "loss": 2.6385, + "step": 6597 + }, + { + "epoch": 0.532483253974659, + "grad_norm": 0.689143180847168, + "learning_rate": 0.00015183870580747156, + "loss": 2.6593, + "step": 6598 + }, + { + "epoch": 0.532563957711242, + "grad_norm": 0.714746356010437, + "learning_rate": 0.00015182520505744945, + "loss": 2.6059, + "step": 6599 + }, + { + "epoch": 0.5326446614478251, + "grad_norm": 0.8055040240287781, + "learning_rate": 0.00015181170301580777, + "loss": 2.6983, + "step": 6600 + }, + { + "epoch": 0.532725365184408, + "grad_norm": 0.7104170918464661, + "learning_rate": 0.00015179819968288297, + "loss": 2.6578, + "step": 6601 + }, + { + "epoch": 0.532806068920991, + "grad_norm": 0.7175524830818176, + "learning_rate": 0.0001517846950590117, + "loss": 2.6263, + "step": 6602 + }, + { + "epoch": 0.532886772657574, + "grad_norm": 0.6755492091178894, + "learning_rate": 0.00015177118914453042, + "loss": 2.5752, + "step": 6603 + }, + { + "epoch": 0.5329674763941571, + "grad_norm": 0.7020289897918701, + "learning_rate": 0.00015175768193977578, + "loss": 2.6186, + "step": 6604 + }, + { + "epoch": 0.5330481801307401, + "grad_norm": 0.7550958395004272, + "learning_rate": 0.0001517441734450844, + "loss": 2.628, + "step": 6605 + }, + { + "epoch": 0.533128883867323, + "grad_norm": 0.6697603464126587, + "learning_rate": 0.00015173066366079297, + "loss": 2.6433, + "step": 6606 + }, + { + "epoch": 0.533209587603906, + "grad_norm": 0.715372622013092, + "learning_rate": 0.0001517171525872382, + "loss": 2.6022, + "step": 6607 + }, + { + "epoch": 0.5332902913404891, + "grad_norm": 0.7081933617591858, + "learning_rate": 0.00015170364022475675, + "loss": 2.675, + "step": 6608 + }, + { + "epoch": 0.5333709950770721, + "grad_norm": 0.7074152231216431, + "learning_rate": 0.00015169012657368546, + "loss": 2.6637, + "step": 6609 + }, + { + "epoch": 0.5334516988136551, + "grad_norm": 0.6692848801612854, + "learning_rate": 0.00015167661163436108, + "loss": 2.5855, + "step": 6610 + }, + { + "epoch": 0.533532402550238, + "grad_norm": 0.7307556867599487, + "learning_rate": 0.00015166309540712048, + "loss": 2.6105, + "step": 6611 + }, + { + "epoch": 0.5336131062868211, + "grad_norm": 0.7026669383049011, + "learning_rate": 0.00015164957789230048, + "loss": 2.6656, + "step": 6612 + }, + { + "epoch": 0.5336938100234041, + "grad_norm": 0.6579706072807312, + "learning_rate": 0.000151636059090238, + "loss": 2.6456, + "step": 6613 + }, + { + "epoch": 0.5337745137599871, + "grad_norm": 0.6854498386383057, + "learning_rate": 0.00015162253900126993, + "loss": 2.5969, + "step": 6614 + }, + { + "epoch": 0.5338552174965701, + "grad_norm": 0.7542434334754944, + "learning_rate": 0.00015160901762573323, + "loss": 2.6333, + "step": 6615 + }, + { + "epoch": 0.5339359212331531, + "grad_norm": 0.6795105934143066, + "learning_rate": 0.0001515954949639649, + "loss": 2.6268, + "step": 6616 + }, + { + "epoch": 0.5340166249697361, + "grad_norm": 0.6395254135131836, + "learning_rate": 0.000151581971016302, + "loss": 2.5684, + "step": 6617 + }, + { + "epoch": 0.5340973287063191, + "grad_norm": 0.7069850564002991, + "learning_rate": 0.00015156844578308155, + "loss": 2.64, + "step": 6618 + }, + { + "epoch": 0.5341780324429021, + "grad_norm": 0.6779203414916992, + "learning_rate": 0.0001515549192646406, + "loss": 2.6255, + "step": 6619 + }, + { + "epoch": 0.5342587361794852, + "grad_norm": 0.6403560638427734, + "learning_rate": 0.00015154139146131632, + "loss": 2.611, + "step": 6620 + }, + { + "epoch": 0.5343394399160681, + "grad_norm": 0.7532669901847839, + "learning_rate": 0.00015152786237344583, + "loss": 2.5641, + "step": 6621 + }, + { + "epoch": 0.5344201436526511, + "grad_norm": 0.6827573776245117, + "learning_rate": 0.00015151433200136629, + "loss": 2.6096, + "step": 6622 + }, + { + "epoch": 0.5345008473892341, + "grad_norm": 0.6691904067993164, + "learning_rate": 0.000151500800345415, + "loss": 2.6602, + "step": 6623 + }, + { + "epoch": 0.5345815511258172, + "grad_norm": 0.7288634777069092, + "learning_rate": 0.00015148726740592906, + "loss": 2.6468, + "step": 6624 + }, + { + "epoch": 0.5346622548624002, + "grad_norm": 0.7087839245796204, + "learning_rate": 0.00015147373318324586, + "loss": 2.5795, + "step": 6625 + }, + { + "epoch": 0.5347429585989831, + "grad_norm": 0.6618373394012451, + "learning_rate": 0.00015146019767770267, + "loss": 2.638, + "step": 6626 + }, + { + "epoch": 0.5348236623355661, + "grad_norm": 0.7384989857673645, + "learning_rate": 0.00015144666088963684, + "loss": 2.6104, + "step": 6627 + }, + { + "epoch": 0.5349043660721492, + "grad_norm": 0.6662275195121765, + "learning_rate": 0.00015143312281938576, + "loss": 2.6174, + "step": 6628 + }, + { + "epoch": 0.5349850698087322, + "grad_norm": 0.6617184281349182, + "learning_rate": 0.0001514195834672868, + "loss": 2.6154, + "step": 6629 + }, + { + "epoch": 0.5350657735453151, + "grad_norm": 0.7173622846603394, + "learning_rate": 0.0001514060428336774, + "loss": 2.5741, + "step": 6630 + }, + { + "epoch": 0.5351464772818981, + "grad_norm": 0.7773584127426147, + "learning_rate": 0.00015139250091889502, + "loss": 2.6333, + "step": 6631 + }, + { + "epoch": 0.5352271810184811, + "grad_norm": 0.7255204916000366, + "learning_rate": 0.0001513789577232772, + "loss": 2.5459, + "step": 6632 + }, + { + "epoch": 0.5353078847550642, + "grad_norm": 0.7308403849601746, + "learning_rate": 0.00015136541324716144, + "loss": 2.5934, + "step": 6633 + }, + { + "epoch": 0.5353885884916472, + "grad_norm": 0.699367880821228, + "learning_rate": 0.0001513518674908853, + "loss": 2.6797, + "step": 6634 + }, + { + "epoch": 0.5354692922282301, + "grad_norm": 0.7236449718475342, + "learning_rate": 0.0001513383204547864, + "loss": 2.6289, + "step": 6635 + }, + { + "epoch": 0.5355499959648131, + "grad_norm": 0.6860557794570923, + "learning_rate": 0.00015132477213920234, + "loss": 2.6736, + "step": 6636 + }, + { + "epoch": 0.5356306997013962, + "grad_norm": 0.6724153161048889, + "learning_rate": 0.00015131122254447084, + "loss": 2.5581, + "step": 6637 + }, + { + "epoch": 0.5357114034379792, + "grad_norm": 0.6818630695343018, + "learning_rate": 0.00015129767167092949, + "loss": 2.5979, + "step": 6638 + }, + { + "epoch": 0.5357921071745622, + "grad_norm": 0.6956631541252136, + "learning_rate": 0.00015128411951891607, + "loss": 2.6116, + "step": 6639 + }, + { + "epoch": 0.5358728109111451, + "grad_norm": 0.6698076128959656, + "learning_rate": 0.00015127056608876837, + "loss": 2.65, + "step": 6640 + }, + { + "epoch": 0.5359535146477282, + "grad_norm": 0.7763264179229736, + "learning_rate": 0.00015125701138082415, + "loss": 2.6164, + "step": 6641 + }, + { + "epoch": 0.5360342183843112, + "grad_norm": 0.7148340940475464, + "learning_rate": 0.00015124345539542118, + "loss": 2.6467, + "step": 6642 + }, + { + "epoch": 0.5361149221208942, + "grad_norm": 0.7350041270256042, + "learning_rate": 0.00015122989813289733, + "loss": 2.6477, + "step": 6643 + }, + { + "epoch": 0.5361956258574772, + "grad_norm": 0.6993441581726074, + "learning_rate": 0.00015121633959359055, + "loss": 2.7526, + "step": 6644 + }, + { + "epoch": 0.5362763295940602, + "grad_norm": 0.6828470826148987, + "learning_rate": 0.00015120277977783873, + "loss": 2.6439, + "step": 6645 + }, + { + "epoch": 0.5363570333306432, + "grad_norm": 0.7076796889305115, + "learning_rate": 0.0001511892186859797, + "loss": 2.6375, + "step": 6646 + }, + { + "epoch": 0.5364377370672262, + "grad_norm": 0.6830769777297974, + "learning_rate": 0.0001511756563183516, + "loss": 2.6052, + "step": 6647 + }, + { + "epoch": 0.5365184408038092, + "grad_norm": 0.6482179760932922, + "learning_rate": 0.00015116209267529237, + "loss": 2.6251, + "step": 6648 + }, + { + "epoch": 0.5365991445403923, + "grad_norm": 0.6687620878219604, + "learning_rate": 0.00015114852775714, + "loss": 2.659, + "step": 6649 + }, + { + "epoch": 0.5366798482769752, + "grad_norm": 0.734108030796051, + "learning_rate": 0.0001511349615642327, + "loss": 2.6542, + "step": 6650 + }, + { + "epoch": 0.5367605520135582, + "grad_norm": 0.7092111706733704, + "learning_rate": 0.00015112139409690842, + "loss": 2.6228, + "step": 6651 + }, + { + "epoch": 0.5368412557501412, + "grad_norm": 0.6544996500015259, + "learning_rate": 0.0001511078253555054, + "loss": 2.5661, + "step": 6652 + }, + { + "epoch": 0.5369219594867243, + "grad_norm": 0.7012531161308289, + "learning_rate": 0.00015109425534036176, + "loss": 2.6447, + "step": 6653 + }, + { + "epoch": 0.5370026632233073, + "grad_norm": 0.6813335418701172, + "learning_rate": 0.0001510806840518157, + "loss": 2.5723, + "step": 6654 + }, + { + "epoch": 0.5370833669598902, + "grad_norm": 0.6711288094520569, + "learning_rate": 0.0001510671114902055, + "loss": 2.6096, + "step": 6655 + }, + { + "epoch": 0.5371640706964732, + "grad_norm": 0.721866250038147, + "learning_rate": 0.00015105353765586935, + "loss": 2.6167, + "step": 6656 + }, + { + "epoch": 0.5372447744330563, + "grad_norm": 0.8140639066696167, + "learning_rate": 0.00015103996254914562, + "loss": 2.5768, + "step": 6657 + }, + { + "epoch": 0.5373254781696393, + "grad_norm": 0.6859177947044373, + "learning_rate": 0.0001510263861703726, + "loss": 2.5638, + "step": 6658 + }, + { + "epoch": 0.5374061819062222, + "grad_norm": 0.7254204154014587, + "learning_rate": 0.00015101280851988864, + "loss": 2.5855, + "step": 6659 + }, + { + "epoch": 0.5374868856428052, + "grad_norm": 0.7181829810142517, + "learning_rate": 0.00015099922959803218, + "loss": 2.5358, + "step": 6660 + }, + { + "epoch": 0.5375675893793883, + "grad_norm": 0.7092663645744324, + "learning_rate": 0.00015098564940514155, + "loss": 2.679, + "step": 6661 + }, + { + "epoch": 0.5376482931159713, + "grad_norm": 0.7126225233078003, + "learning_rate": 0.00015097206794155527, + "loss": 2.6167, + "step": 6662 + }, + { + "epoch": 0.5377289968525543, + "grad_norm": 0.7469925880432129, + "learning_rate": 0.00015095848520761186, + "loss": 2.5906, + "step": 6663 + }, + { + "epoch": 0.5378097005891372, + "grad_norm": 0.6911186575889587, + "learning_rate": 0.00015094490120364973, + "loss": 2.6488, + "step": 6664 + }, + { + "epoch": 0.5378904043257203, + "grad_norm": 0.6579635143280029, + "learning_rate": 0.00015093131593000753, + "loss": 2.5894, + "step": 6665 + }, + { + "epoch": 0.5379711080623033, + "grad_norm": 0.7107242345809937, + "learning_rate": 0.00015091772938702377, + "loss": 2.6568, + "step": 6666 + }, + { + "epoch": 0.5380518117988863, + "grad_norm": 0.6845428943634033, + "learning_rate": 0.00015090414157503714, + "loss": 2.5697, + "step": 6667 + }, + { + "epoch": 0.5381325155354693, + "grad_norm": 0.6713212132453918, + "learning_rate": 0.00015089055249438622, + "loss": 2.5747, + "step": 6668 + }, + { + "epoch": 0.5382132192720523, + "grad_norm": 0.7091513276100159, + "learning_rate": 0.0001508769621454097, + "loss": 2.6765, + "step": 6669 + }, + { + "epoch": 0.5382939230086353, + "grad_norm": 0.7403436899185181, + "learning_rate": 0.00015086337052844627, + "loss": 2.6841, + "step": 6670 + }, + { + "epoch": 0.5383746267452183, + "grad_norm": 0.6745626330375671, + "learning_rate": 0.0001508497776438347, + "loss": 2.6436, + "step": 6671 + }, + { + "epoch": 0.5384553304818013, + "grad_norm": 0.7491294145584106, + "learning_rate": 0.00015083618349191372, + "loss": 2.6376, + "step": 6672 + }, + { + "epoch": 0.5385360342183844, + "grad_norm": 0.719761848449707, + "learning_rate": 0.00015082258807302222, + "loss": 2.5885, + "step": 6673 + }, + { + "epoch": 0.5386167379549673, + "grad_norm": 0.7302667498588562, + "learning_rate": 0.00015080899138749895, + "loss": 2.7019, + "step": 6674 + }, + { + "epoch": 0.5386974416915503, + "grad_norm": 0.7640584111213684, + "learning_rate": 0.0001507953934356828, + "loss": 2.6404, + "step": 6675 + }, + { + "epoch": 0.5387781454281333, + "grad_norm": 0.699515700340271, + "learning_rate": 0.0001507817942179127, + "loss": 2.6407, + "step": 6676 + }, + { + "epoch": 0.5388588491647164, + "grad_norm": 0.7305224537849426, + "learning_rate": 0.00015076819373452746, + "loss": 2.5994, + "step": 6677 + }, + { + "epoch": 0.5389395529012994, + "grad_norm": 0.7125952243804932, + "learning_rate": 0.00015075459198586616, + "loss": 2.6472, + "step": 6678 + }, + { + "epoch": 0.5390202566378823, + "grad_norm": 0.7077293395996094, + "learning_rate": 0.00015074098897226778, + "loss": 2.6168, + "step": 6679 + }, + { + "epoch": 0.5391009603744653, + "grad_norm": 0.6713843941688538, + "learning_rate": 0.00015072738469407127, + "loss": 2.5736, + "step": 6680 + }, + { + "epoch": 0.5391816641110483, + "grad_norm": 0.7101294994354248, + "learning_rate": 0.00015071377915161578, + "loss": 2.6994, + "step": 6681 + }, + { + "epoch": 0.5392623678476314, + "grad_norm": 0.7132740020751953, + "learning_rate": 0.00015070017234524032, + "loss": 2.586, + "step": 6682 + }, + { + "epoch": 0.5393430715842144, + "grad_norm": 0.7043401598930359, + "learning_rate": 0.00015068656427528402, + "loss": 2.6025, + "step": 6683 + }, + { + "epoch": 0.5394237753207973, + "grad_norm": 0.6831551194190979, + "learning_rate": 0.00015067295494208607, + "loss": 2.6183, + "step": 6684 + }, + { + "epoch": 0.5395044790573803, + "grad_norm": 0.7066370844841003, + "learning_rate": 0.0001506593443459856, + "loss": 2.6467, + "step": 6685 + }, + { + "epoch": 0.5395851827939634, + "grad_norm": 0.7908033132553101, + "learning_rate": 0.0001506457324873219, + "loss": 2.6929, + "step": 6686 + }, + { + "epoch": 0.5396658865305464, + "grad_norm": 0.7186774611473083, + "learning_rate": 0.00015063211936643407, + "loss": 2.5841, + "step": 6687 + }, + { + "epoch": 0.5397465902671293, + "grad_norm": 0.6634512543678284, + "learning_rate": 0.0001506185049836615, + "loss": 2.5517, + "step": 6688 + }, + { + "epoch": 0.5398272940037123, + "grad_norm": 0.734406590461731, + "learning_rate": 0.00015060488933934353, + "loss": 2.6317, + "step": 6689 + }, + { + "epoch": 0.5399079977402954, + "grad_norm": 0.7754772305488586, + "learning_rate": 0.00015059127243381937, + "loss": 2.6885, + "step": 6690 + }, + { + "epoch": 0.5399887014768784, + "grad_norm": 0.7636603713035583, + "learning_rate": 0.00015057765426742848, + "loss": 2.5767, + "step": 6691 + }, + { + "epoch": 0.5400694052134614, + "grad_norm": 0.6621577143669128, + "learning_rate": 0.00015056403484051017, + "loss": 2.5905, + "step": 6692 + }, + { + "epoch": 0.5401501089500443, + "grad_norm": 0.7605881094932556, + "learning_rate": 0.00015055041415340404, + "loss": 2.6166, + "step": 6693 + }, + { + "epoch": 0.5402308126866274, + "grad_norm": 0.7603485584259033, + "learning_rate": 0.0001505367922064494, + "loss": 2.6123, + "step": 6694 + }, + { + "epoch": 0.5403115164232104, + "grad_norm": 0.7021469473838806, + "learning_rate": 0.0001505231689999858, + "loss": 2.6754, + "step": 6695 + }, + { + "epoch": 0.5403922201597934, + "grad_norm": 0.7291955947875977, + "learning_rate": 0.00015050954453435273, + "loss": 2.6393, + "step": 6696 + }, + { + "epoch": 0.5404729238963764, + "grad_norm": 0.6658700704574585, + "learning_rate": 0.00015049591880988977, + "loss": 2.5888, + "step": 6697 + }, + { + "epoch": 0.5405536276329594, + "grad_norm": 0.7080146074295044, + "learning_rate": 0.00015048229182693657, + "loss": 2.6318, + "step": 6698 + }, + { + "epoch": 0.5406343313695424, + "grad_norm": 0.7440849542617798, + "learning_rate": 0.00015046866358583267, + "loss": 2.596, + "step": 6699 + }, + { + "epoch": 0.5407150351061254, + "grad_norm": 0.886578381061554, + "learning_rate": 0.00015045503408691775, + "loss": 2.6479, + "step": 6700 + }, + { + "epoch": 0.5407957388427084, + "grad_norm": 0.7221408486366272, + "learning_rate": 0.00015044140333053148, + "loss": 2.625, + "step": 6701 + }, + { + "epoch": 0.5408764425792915, + "grad_norm": 0.7193209528923035, + "learning_rate": 0.0001504277713170136, + "loss": 2.6044, + "step": 6702 + }, + { + "epoch": 0.5409571463158744, + "grad_norm": 0.7139819860458374, + "learning_rate": 0.00015041413804670384, + "loss": 2.5572, + "step": 6703 + }, + { + "epoch": 0.5410378500524574, + "grad_norm": 0.728875994682312, + "learning_rate": 0.00015040050351994196, + "loss": 2.6373, + "step": 6704 + }, + { + "epoch": 0.5411185537890404, + "grad_norm": 0.6794858574867249, + "learning_rate": 0.0001503868677370678, + "loss": 2.6265, + "step": 6705 + }, + { + "epoch": 0.5411992575256235, + "grad_norm": 0.6874774098396301, + "learning_rate": 0.00015037323069842117, + "loss": 2.6146, + "step": 6706 + }, + { + "epoch": 0.5412799612622065, + "grad_norm": 0.7064409255981445, + "learning_rate": 0.00015035959240434197, + "loss": 2.6126, + "step": 6707 + }, + { + "epoch": 0.5413606649987894, + "grad_norm": 0.7212977409362793, + "learning_rate": 0.00015034595285517006, + "loss": 2.6836, + "step": 6708 + }, + { + "epoch": 0.5414413687353724, + "grad_norm": 0.7826492190361023, + "learning_rate": 0.0001503323120512454, + "loss": 2.6648, + "step": 6709 + }, + { + "epoch": 0.5415220724719555, + "grad_norm": 0.7228415608406067, + "learning_rate": 0.000150318669992908, + "loss": 2.5734, + "step": 6710 + }, + { + "epoch": 0.5416027762085385, + "grad_norm": 0.6929590702056885, + "learning_rate": 0.00015030502668049778, + "loss": 2.6023, + "step": 6711 + }, + { + "epoch": 0.5416834799451214, + "grad_norm": 0.679990291595459, + "learning_rate": 0.0001502913821143548, + "loss": 2.5867, + "step": 6712 + }, + { + "epoch": 0.5417641836817044, + "grad_norm": 0.7324180603027344, + "learning_rate": 0.00015027773629481907, + "loss": 2.5722, + "step": 6713 + }, + { + "epoch": 0.5418448874182875, + "grad_norm": 0.686826765537262, + "learning_rate": 0.00015026408922223078, + "loss": 2.6138, + "step": 6714 + }, + { + "epoch": 0.5419255911548705, + "grad_norm": 0.7045193314552307, + "learning_rate": 0.00015025044089693, + "loss": 2.619, + "step": 6715 + }, + { + "epoch": 0.5420062948914535, + "grad_norm": 0.6839936375617981, + "learning_rate": 0.00015023679131925683, + "loss": 2.5778, + "step": 6716 + }, + { + "epoch": 0.5420869986280364, + "grad_norm": 0.7613961696624756, + "learning_rate": 0.00015022314048955153, + "loss": 2.6262, + "step": 6717 + }, + { + "epoch": 0.5421677023646195, + "grad_norm": 0.7867478728294373, + "learning_rate": 0.00015020948840815428, + "loss": 2.6576, + "step": 6718 + }, + { + "epoch": 0.5422484061012025, + "grad_norm": 0.7371038794517517, + "learning_rate": 0.0001501958350754053, + "loss": 2.6495, + "step": 6719 + }, + { + "epoch": 0.5423291098377855, + "grad_norm": 0.7146512269973755, + "learning_rate": 0.00015018218049164494, + "loss": 2.6514, + "step": 6720 + }, + { + "epoch": 0.5424098135743685, + "grad_norm": 0.7507650256156921, + "learning_rate": 0.00015016852465721346, + "loss": 2.6509, + "step": 6721 + }, + { + "epoch": 0.5424905173109515, + "grad_norm": 0.6786547303199768, + "learning_rate": 0.0001501548675724512, + "loss": 2.5983, + "step": 6722 + }, + { + "epoch": 0.5425712210475345, + "grad_norm": 0.7077932357788086, + "learning_rate": 0.0001501412092376985, + "loss": 2.622, + "step": 6723 + }, + { + "epoch": 0.5426519247841175, + "grad_norm": 0.7191271781921387, + "learning_rate": 0.00015012754965329584, + "loss": 2.6632, + "step": 6724 + }, + { + "epoch": 0.5427326285207005, + "grad_norm": 0.6785906553268433, + "learning_rate": 0.00015011388881958356, + "loss": 2.6312, + "step": 6725 + }, + { + "epoch": 0.5428133322572836, + "grad_norm": 0.6880263090133667, + "learning_rate": 0.00015010022673690222, + "loss": 2.5951, + "step": 6726 + }, + { + "epoch": 0.5428940359938665, + "grad_norm": 0.7769095301628113, + "learning_rate": 0.0001500865634055923, + "loss": 2.5503, + "step": 6727 + }, + { + "epoch": 0.5429747397304495, + "grad_norm": 0.6847476959228516, + "learning_rate": 0.0001500728988259942, + "loss": 2.6824, + "step": 6728 + }, + { + "epoch": 0.5430554434670325, + "grad_norm": 0.6829310059547424, + "learning_rate": 0.00015005923299844863, + "loss": 2.5683, + "step": 6729 + }, + { + "epoch": 0.5431361472036156, + "grad_norm": 0.7436082363128662, + "learning_rate": 0.0001500455659232961, + "loss": 2.6165, + "step": 6730 + }, + { + "epoch": 0.5432168509401986, + "grad_norm": 0.7876375913619995, + "learning_rate": 0.00015003189760087724, + "loss": 2.6203, + "step": 6731 + }, + { + "epoch": 0.5432975546767815, + "grad_norm": 0.6869253516197205, + "learning_rate": 0.0001500182280315327, + "loss": 2.6136, + "step": 6732 + }, + { + "epoch": 0.5433782584133645, + "grad_norm": 0.7179432511329651, + "learning_rate": 0.00015000455721560316, + "loss": 2.6049, + "step": 6733 + }, + { + "epoch": 0.5434589621499475, + "grad_norm": 0.7286917567253113, + "learning_rate": 0.00014999088515342939, + "loss": 2.5704, + "step": 6734 + }, + { + "epoch": 0.5435396658865306, + "grad_norm": 0.6841779351234436, + "learning_rate": 0.00014997721184535206, + "loss": 2.6095, + "step": 6735 + }, + { + "epoch": 0.5436203696231136, + "grad_norm": 0.7661791443824768, + "learning_rate": 0.00014996353729171196, + "loss": 2.6193, + "step": 6736 + }, + { + "epoch": 0.5437010733596965, + "grad_norm": 0.7365885376930237, + "learning_rate": 0.0001499498614928499, + "loss": 2.586, + "step": 6737 + }, + { + "epoch": 0.5437817770962795, + "grad_norm": 0.7423815131187439, + "learning_rate": 0.00014993618444910674, + "loss": 2.6199, + "step": 6738 + }, + { + "epoch": 0.5438624808328626, + "grad_norm": 0.7667781114578247, + "learning_rate": 0.0001499225061608233, + "loss": 2.6584, + "step": 6739 + }, + { + "epoch": 0.5439431845694456, + "grad_norm": 0.7148830890655518, + "learning_rate": 0.00014990882662834057, + "loss": 2.7172, + "step": 6740 + }, + { + "epoch": 0.5440238883060285, + "grad_norm": 0.7206205725669861, + "learning_rate": 0.00014989514585199936, + "loss": 2.5682, + "step": 6741 + }, + { + "epoch": 0.5441045920426115, + "grad_norm": 0.7306448221206665, + "learning_rate": 0.0001498814638321407, + "loss": 2.6724, + "step": 6742 + }, + { + "epoch": 0.5441852957791946, + "grad_norm": 0.7058824896812439, + "learning_rate": 0.00014986778056910556, + "loss": 2.6573, + "step": 6743 + }, + { + "epoch": 0.5442659995157776, + "grad_norm": 0.770588755607605, + "learning_rate": 0.000149854096063235, + "loss": 2.658, + "step": 6744 + }, + { + "epoch": 0.5443467032523606, + "grad_norm": 0.8283931612968445, + "learning_rate": 0.00014984041031487001, + "loss": 2.6624, + "step": 6745 + }, + { + "epoch": 0.5444274069889435, + "grad_norm": 0.6814693808555603, + "learning_rate": 0.00014982672332435176, + "loss": 2.5835, + "step": 6746 + }, + { + "epoch": 0.5445081107255266, + "grad_norm": 0.7059363722801208, + "learning_rate": 0.00014981303509202127, + "loss": 2.5977, + "step": 6747 + }, + { + "epoch": 0.5445888144621096, + "grad_norm": 0.6678106188774109, + "learning_rate": 0.00014979934561821975, + "loss": 2.6479, + "step": 6748 + }, + { + "epoch": 0.5446695181986926, + "grad_norm": 0.8167592883110046, + "learning_rate": 0.00014978565490328835, + "loss": 2.6529, + "step": 6749 + }, + { + "epoch": 0.5447502219352756, + "grad_norm": 0.807209849357605, + "learning_rate": 0.00014977196294756832, + "loss": 2.6546, + "step": 6750 + }, + { + "epoch": 0.5448309256718586, + "grad_norm": 0.7099517583847046, + "learning_rate": 0.00014975826975140085, + "loss": 2.6178, + "step": 6751 + }, + { + "epoch": 0.5449116294084416, + "grad_norm": 0.7900758981704712, + "learning_rate": 0.0001497445753151272, + "loss": 2.586, + "step": 6752 + }, + { + "epoch": 0.5449923331450246, + "grad_norm": 0.6826134920120239, + "learning_rate": 0.00014973087963908875, + "loss": 2.5914, + "step": 6753 + }, + { + "epoch": 0.5450730368816076, + "grad_norm": 0.7383863925933838, + "learning_rate": 0.0001497171827236268, + "loss": 2.6357, + "step": 6754 + }, + { + "epoch": 0.5451537406181907, + "grad_norm": 0.7208051085472107, + "learning_rate": 0.0001497034845690826, + "loss": 2.5435, + "step": 6755 + }, + { + "epoch": 0.5452344443547736, + "grad_norm": 0.680794894695282, + "learning_rate": 0.00014968978517579772, + "loss": 2.5691, + "step": 6756 + }, + { + "epoch": 0.5453151480913566, + "grad_norm": 0.680759847164154, + "learning_rate": 0.00014967608454411347, + "loss": 2.5761, + "step": 6757 + }, + { + "epoch": 0.5453958518279396, + "grad_norm": 0.719634473323822, + "learning_rate": 0.00014966238267437134, + "loss": 2.637, + "step": 6758 + }, + { + "epoch": 0.5454765555645227, + "grad_norm": 0.777302086353302, + "learning_rate": 0.0001496486795669128, + "loss": 2.6457, + "step": 6759 + }, + { + "epoch": 0.5455572593011057, + "grad_norm": 0.6875059604644775, + "learning_rate": 0.0001496349752220794, + "loss": 2.6116, + "step": 6760 + }, + { + "epoch": 0.5456379630376886, + "grad_norm": 0.6884258985519409, + "learning_rate": 0.0001496212696402127, + "loss": 2.5863, + "step": 6761 + }, + { + "epoch": 0.5457186667742716, + "grad_norm": 0.6667922139167786, + "learning_rate": 0.00014960756282165422, + "loss": 2.5892, + "step": 6762 + }, + { + "epoch": 0.5457993705108547, + "grad_norm": 0.6712725162506104, + "learning_rate": 0.00014959385476674559, + "loss": 2.5478, + "step": 6763 + }, + { + "epoch": 0.5458800742474377, + "grad_norm": 0.6803874969482422, + "learning_rate": 0.00014958014547582845, + "loss": 2.5785, + "step": 6764 + }, + { + "epoch": 0.5459607779840207, + "grad_norm": 0.6975811123847961, + "learning_rate": 0.0001495664349492445, + "loss": 2.5765, + "step": 6765 + }, + { + "epoch": 0.5460414817206036, + "grad_norm": 0.7676273584365845, + "learning_rate": 0.00014955272318733544, + "loss": 2.634, + "step": 6766 + }, + { + "epoch": 0.5461221854571867, + "grad_norm": 0.7044547200202942, + "learning_rate": 0.000149539010190443, + "loss": 2.646, + "step": 6767 + }, + { + "epoch": 0.5462028891937697, + "grad_norm": 0.7453166842460632, + "learning_rate": 0.00014952529595890887, + "loss": 2.6137, + "step": 6768 + }, + { + "epoch": 0.5462835929303527, + "grad_norm": 0.7281681299209595, + "learning_rate": 0.00014951158049307493, + "loss": 2.6558, + "step": 6769 + }, + { + "epoch": 0.5463642966669356, + "grad_norm": 0.7131047248840332, + "learning_rate": 0.00014949786379328298, + "loss": 2.6441, + "step": 6770 + }, + { + "epoch": 0.5464450004035187, + "grad_norm": 0.7072219848632812, + "learning_rate": 0.00014948414585987487, + "loss": 2.5861, + "step": 6771 + }, + { + "epoch": 0.5465257041401017, + "grad_norm": 0.7270335555076599, + "learning_rate": 0.00014947042669319252, + "loss": 2.6703, + "step": 6772 + }, + { + "epoch": 0.5466064078766847, + "grad_norm": 0.7314150929450989, + "learning_rate": 0.0001494567062935778, + "loss": 2.6101, + "step": 6773 + }, + { + "epoch": 0.5466871116132677, + "grad_norm": 0.8168460130691528, + "learning_rate": 0.00014944298466137266, + "loss": 2.662, + "step": 6774 + }, + { + "epoch": 0.5467678153498507, + "grad_norm": 0.7338390350341797, + "learning_rate": 0.00014942926179691913, + "loss": 2.6481, + "step": 6775 + }, + { + "epoch": 0.5468485190864337, + "grad_norm": 0.7065639495849609, + "learning_rate": 0.00014941553770055917, + "loss": 2.6192, + "step": 6776 + }, + { + "epoch": 0.5469292228230167, + "grad_norm": 0.7675396203994751, + "learning_rate": 0.00014940181237263483, + "loss": 2.5828, + "step": 6777 + }, + { + "epoch": 0.5470099265595997, + "grad_norm": 0.7085692286491394, + "learning_rate": 0.0001493880858134882, + "loss": 2.5815, + "step": 6778 + }, + { + "epoch": 0.5470906302961828, + "grad_norm": 0.757591187953949, + "learning_rate": 0.00014937435802346135, + "loss": 2.691, + "step": 6779 + }, + { + "epoch": 0.5471713340327657, + "grad_norm": 0.7299168705940247, + "learning_rate": 0.00014936062900289647, + "loss": 2.6246, + "step": 6780 + }, + { + "epoch": 0.5472520377693487, + "grad_norm": 0.693692684173584, + "learning_rate": 0.00014934689875213564, + "loss": 2.6149, + "step": 6781 + }, + { + "epoch": 0.5473327415059317, + "grad_norm": 0.733657956123352, + "learning_rate": 0.00014933316727152113, + "loss": 2.582, + "step": 6782 + }, + { + "epoch": 0.5474134452425147, + "grad_norm": 0.6881953477859497, + "learning_rate": 0.00014931943456139514, + "loss": 2.6023, + "step": 6783 + }, + { + "epoch": 0.5474941489790978, + "grad_norm": 0.7102411985397339, + "learning_rate": 0.00014930570062209988, + "loss": 2.6296, + "step": 6784 + }, + { + "epoch": 0.5475748527156807, + "grad_norm": 0.7263364791870117, + "learning_rate": 0.00014929196545397771, + "loss": 2.6414, + "step": 6785 + }, + { + "epoch": 0.5476555564522637, + "grad_norm": 0.7239066958427429, + "learning_rate": 0.00014927822905737092, + "loss": 2.6174, + "step": 6786 + }, + { + "epoch": 0.5477362601888467, + "grad_norm": 0.6909911632537842, + "learning_rate": 0.0001492644914326218, + "loss": 2.6036, + "step": 6787 + }, + { + "epoch": 0.5478169639254298, + "grad_norm": 0.719693124294281, + "learning_rate": 0.00014925075258007283, + "loss": 2.6507, + "step": 6788 + }, + { + "epoch": 0.5478976676620128, + "grad_norm": 0.7722225785255432, + "learning_rate": 0.0001492370125000663, + "loss": 2.6268, + "step": 6789 + }, + { + "epoch": 0.5479783713985957, + "grad_norm": 0.7456568479537964, + "learning_rate": 0.00014922327119294476, + "loss": 2.6426, + "step": 6790 + }, + { + "epoch": 0.5480590751351787, + "grad_norm": 0.7430242300033569, + "learning_rate": 0.00014920952865905062, + "loss": 2.6632, + "step": 6791 + }, + { + "epoch": 0.5481397788717618, + "grad_norm": 0.7363260388374329, + "learning_rate": 0.0001491957848987264, + "loss": 2.6021, + "step": 6792 + }, + { + "epoch": 0.5482204826083448, + "grad_norm": 0.6903972029685974, + "learning_rate": 0.00014918203991231462, + "loss": 2.6086, + "step": 6793 + }, + { + "epoch": 0.5483011863449277, + "grad_norm": 0.6765161752700806, + "learning_rate": 0.00014916829370015781, + "loss": 2.5806, + "step": 6794 + }, + { + "epoch": 0.5483818900815107, + "grad_norm": 0.7533403635025024, + "learning_rate": 0.0001491545462625986, + "loss": 2.6351, + "step": 6795 + }, + { + "epoch": 0.5484625938180938, + "grad_norm": 0.6841829419136047, + "learning_rate": 0.00014914079759997963, + "loss": 2.606, + "step": 6796 + }, + { + "epoch": 0.5485432975546768, + "grad_norm": 0.7671411037445068, + "learning_rate": 0.00014912704771264353, + "loss": 2.6645, + "step": 6797 + }, + { + "epoch": 0.5486240012912598, + "grad_norm": 0.7218797206878662, + "learning_rate": 0.00014911329660093295, + "loss": 2.6302, + "step": 6798 + }, + { + "epoch": 0.5487047050278427, + "grad_norm": 0.7269994020462036, + "learning_rate": 0.00014909954426519067, + "loss": 2.6261, + "step": 6799 + }, + { + "epoch": 0.5487854087644258, + "grad_norm": 0.765353262424469, + "learning_rate": 0.00014908579070575936, + "loss": 2.5787, + "step": 6800 + }, + { + "epoch": 0.5488661125010088, + "grad_norm": 0.6503065228462219, + "learning_rate": 0.00014907203592298189, + "loss": 2.6404, + "step": 6801 + }, + { + "epoch": 0.5489468162375918, + "grad_norm": 0.6869633197784424, + "learning_rate": 0.00014905827991720097, + "loss": 2.6463, + "step": 6802 + }, + { + "epoch": 0.5490275199741748, + "grad_norm": 0.7221426963806152, + "learning_rate": 0.00014904452268875947, + "loss": 2.6686, + "step": 6803 + }, + { + "epoch": 0.5491082237107578, + "grad_norm": 0.6781399250030518, + "learning_rate": 0.00014903076423800028, + "loss": 2.6274, + "step": 6804 + }, + { + "epoch": 0.5491889274473408, + "grad_norm": 0.7451084852218628, + "learning_rate": 0.00014901700456526626, + "loss": 2.6449, + "step": 6805 + }, + { + "epoch": 0.5492696311839238, + "grad_norm": 0.7159574627876282, + "learning_rate": 0.0001490032436709004, + "loss": 2.6664, + "step": 6806 + }, + { + "epoch": 0.5493503349205068, + "grad_norm": 0.724039614200592, + "learning_rate": 0.00014898948155524558, + "loss": 2.5816, + "step": 6807 + }, + { + "epoch": 0.5494310386570899, + "grad_norm": 0.7194633483886719, + "learning_rate": 0.0001489757182186448, + "loss": 2.5625, + "step": 6808 + }, + { + "epoch": 0.5495117423936728, + "grad_norm": 0.704133927822113, + "learning_rate": 0.0001489619536614411, + "loss": 2.6295, + "step": 6809 + }, + { + "epoch": 0.5495924461302558, + "grad_norm": 0.6717158555984497, + "learning_rate": 0.00014894818788397757, + "loss": 2.6168, + "step": 6810 + }, + { + "epoch": 0.5496731498668388, + "grad_norm": 0.7096573710441589, + "learning_rate": 0.0001489344208865972, + "loss": 2.6316, + "step": 6811 + }, + { + "epoch": 0.5497538536034219, + "grad_norm": 0.6383458375930786, + "learning_rate": 0.00014892065266964316, + "loss": 2.5577, + "step": 6812 + }, + { + "epoch": 0.5498345573400049, + "grad_norm": 0.7606377601623535, + "learning_rate": 0.0001489068832334586, + "loss": 2.7078, + "step": 6813 + }, + { + "epoch": 0.5499152610765878, + "grad_norm": 0.649162232875824, + "learning_rate": 0.00014889311257838665, + "loss": 2.6023, + "step": 6814 + }, + { + "epoch": 0.5499959648131708, + "grad_norm": 0.6445025205612183, + "learning_rate": 0.00014887934070477053, + "loss": 2.6, + "step": 6815 + }, + { + "epoch": 0.5500766685497539, + "grad_norm": 0.6873729825019836, + "learning_rate": 0.00014886556761295342, + "loss": 2.6398, + "step": 6816 + }, + { + "epoch": 0.5501573722863369, + "grad_norm": 0.7814947366714478, + "learning_rate": 0.0001488517933032787, + "loss": 2.5803, + "step": 6817 + }, + { + "epoch": 0.5502380760229199, + "grad_norm": 0.7140909433364868, + "learning_rate": 0.00014883801777608953, + "loss": 2.6051, + "step": 6818 + }, + { + "epoch": 0.5503187797595028, + "grad_norm": 0.7326326370239258, + "learning_rate": 0.00014882424103172936, + "loss": 2.6123, + "step": 6819 + }, + { + "epoch": 0.5503994834960859, + "grad_norm": 0.7093667387962341, + "learning_rate": 0.00014881046307054142, + "loss": 2.6527, + "step": 6820 + }, + { + "epoch": 0.5504801872326689, + "grad_norm": 0.6877567768096924, + "learning_rate": 0.00014879668389286915, + "loss": 2.6086, + "step": 6821 + }, + { + "epoch": 0.5505608909692519, + "grad_norm": 0.7095615863800049, + "learning_rate": 0.000148782903499056, + "loss": 2.6469, + "step": 6822 + }, + { + "epoch": 0.5506415947058348, + "grad_norm": 0.6931191086769104, + "learning_rate": 0.00014876912188944535, + "loss": 2.6842, + "step": 6823 + }, + { + "epoch": 0.5507222984424179, + "grad_norm": 0.7016414403915405, + "learning_rate": 0.00014875533906438072, + "loss": 2.5753, + "step": 6824 + }, + { + "epoch": 0.5508030021790009, + "grad_norm": 0.6813814640045166, + "learning_rate": 0.00014874155502420558, + "loss": 2.5739, + "step": 6825 + }, + { + "epoch": 0.5508837059155839, + "grad_norm": 0.7068608403205872, + "learning_rate": 0.00014872776976926347, + "loss": 2.6325, + "step": 6826 + }, + { + "epoch": 0.5509644096521669, + "grad_norm": 0.6978127360343933, + "learning_rate": 0.00014871398329989796, + "loss": 2.5614, + "step": 6827 + }, + { + "epoch": 0.55104511338875, + "grad_norm": 0.6923051476478577, + "learning_rate": 0.00014870019561645265, + "loss": 2.6075, + "step": 6828 + }, + { + "epoch": 0.5511258171253329, + "grad_norm": 0.6708533763885498, + "learning_rate": 0.00014868640671927117, + "loss": 2.5883, + "step": 6829 + }, + { + "epoch": 0.5512065208619159, + "grad_norm": 0.7679650783538818, + "learning_rate": 0.00014867261660869713, + "loss": 2.6105, + "step": 6830 + }, + { + "epoch": 0.5512872245984989, + "grad_norm": 0.7080917358398438, + "learning_rate": 0.0001486588252850743, + "loss": 2.5855, + "step": 6831 + }, + { + "epoch": 0.551367928335082, + "grad_norm": 0.7218755483627319, + "learning_rate": 0.00014864503274874635, + "loss": 2.5872, + "step": 6832 + }, + { + "epoch": 0.551448632071665, + "grad_norm": 0.689038872718811, + "learning_rate": 0.000148631239000057, + "loss": 2.5902, + "step": 6833 + }, + { + "epoch": 0.5515293358082479, + "grad_norm": 0.6810954213142395, + "learning_rate": 0.00014861744403935005, + "loss": 2.5938, + "step": 6834 + }, + { + "epoch": 0.5516100395448309, + "grad_norm": 0.7509457468986511, + "learning_rate": 0.00014860364786696933, + "loss": 2.593, + "step": 6835 + }, + { + "epoch": 0.5516907432814139, + "grad_norm": 0.739536702632904, + "learning_rate": 0.00014858985048325863, + "loss": 2.6668, + "step": 6836 + }, + { + "epoch": 0.551771447017997, + "grad_norm": 0.661829948425293, + "learning_rate": 0.00014857605188856184, + "loss": 2.6407, + "step": 6837 + }, + { + "epoch": 0.5518521507545799, + "grad_norm": 0.6869735717773438, + "learning_rate": 0.00014856225208322287, + "loss": 2.535, + "step": 6838 + }, + { + "epoch": 0.5519328544911629, + "grad_norm": 0.6724792122840881, + "learning_rate": 0.00014854845106758563, + "loss": 2.5629, + "step": 6839 + }, + { + "epoch": 0.5520135582277459, + "grad_norm": 0.7066503763198853, + "learning_rate": 0.00014853464884199407, + "loss": 2.6002, + "step": 6840 + }, + { + "epoch": 0.552094261964329, + "grad_norm": 0.7354215979576111, + "learning_rate": 0.0001485208454067922, + "loss": 2.6032, + "step": 6841 + }, + { + "epoch": 0.552174965700912, + "grad_norm": 0.8124571442604065, + "learning_rate": 0.00014850704076232405, + "loss": 2.5884, + "step": 6842 + }, + { + "epoch": 0.5522556694374949, + "grad_norm": 0.6941336393356323, + "learning_rate": 0.00014849323490893364, + "loss": 2.6461, + "step": 6843 + }, + { + "epoch": 0.5523363731740779, + "grad_norm": 0.6848790049552917, + "learning_rate": 0.00014847942784696505, + "loss": 2.6098, + "step": 6844 + }, + { + "epoch": 0.552417076910661, + "grad_norm": 0.6688000559806824, + "learning_rate": 0.00014846561957676237, + "loss": 2.6115, + "step": 6845 + }, + { + "epoch": 0.552497780647244, + "grad_norm": 0.6647306084632874, + "learning_rate": 0.00014845181009866975, + "loss": 2.597, + "step": 6846 + }, + { + "epoch": 0.552578484383827, + "grad_norm": 0.7277785539627075, + "learning_rate": 0.0001484379994130314, + "loss": 2.6223, + "step": 6847 + }, + { + "epoch": 0.5526591881204099, + "grad_norm": 0.6623761057853699, + "learning_rate": 0.00014842418752019146, + "loss": 2.5657, + "step": 6848 + }, + { + "epoch": 0.552739891856993, + "grad_norm": 0.7207754254341125, + "learning_rate": 0.00014841037442049423, + "loss": 2.5711, + "step": 6849 + }, + { + "epoch": 0.552820595593576, + "grad_norm": 0.6963560581207275, + "learning_rate": 0.00014839656011428389, + "loss": 2.6078, + "step": 6850 + }, + { + "epoch": 0.552901299330159, + "grad_norm": 0.6875078678131104, + "learning_rate": 0.00014838274460190475, + "loss": 2.6109, + "step": 6851 + }, + { + "epoch": 0.552982003066742, + "grad_norm": 0.7049943804740906, + "learning_rate": 0.00014836892788370118, + "loss": 2.5755, + "step": 6852 + }, + { + "epoch": 0.553062706803325, + "grad_norm": 0.6941191554069519, + "learning_rate": 0.00014835510996001744, + "loss": 2.6694, + "step": 6853 + }, + { + "epoch": 0.553143410539908, + "grad_norm": 0.7589484453201294, + "learning_rate": 0.000148341290831198, + "loss": 2.5677, + "step": 6854 + }, + { + "epoch": 0.553224114276491, + "grad_norm": 0.6594784259796143, + "learning_rate": 0.00014832747049758723, + "loss": 2.6209, + "step": 6855 + }, + { + "epoch": 0.553304818013074, + "grad_norm": 0.726598858833313, + "learning_rate": 0.00014831364895952952, + "loss": 2.6492, + "step": 6856 + }, + { + "epoch": 0.553385521749657, + "grad_norm": 0.6668030023574829, + "learning_rate": 0.0001482998262173694, + "loss": 2.6057, + "step": 6857 + }, + { + "epoch": 0.55346622548624, + "grad_norm": 0.7698997855186462, + "learning_rate": 0.0001482860022714514, + "loss": 2.6215, + "step": 6858 + }, + { + "epoch": 0.553546929222823, + "grad_norm": 0.6805251836776733, + "learning_rate": 0.00014827217712211997, + "loss": 2.5855, + "step": 6859 + }, + { + "epoch": 0.553627632959406, + "grad_norm": 0.8481020331382751, + "learning_rate": 0.00014825835076971968, + "loss": 2.6218, + "step": 6860 + }, + { + "epoch": 0.5537083366959891, + "grad_norm": 0.6801722645759583, + "learning_rate": 0.00014824452321459517, + "loss": 2.5998, + "step": 6861 + }, + { + "epoch": 0.553789040432572, + "grad_norm": 0.7174597978591919, + "learning_rate": 0.00014823069445709104, + "loss": 2.5782, + "step": 6862 + }, + { + "epoch": 0.553869744169155, + "grad_norm": 0.7607117891311646, + "learning_rate": 0.0001482168644975519, + "loss": 2.6492, + "step": 6863 + }, + { + "epoch": 0.553950447905738, + "grad_norm": 0.7554265856742859, + "learning_rate": 0.00014820303333632246, + "loss": 2.6511, + "step": 6864 + }, + { + "epoch": 0.5540311516423211, + "grad_norm": 0.7520260214805603, + "learning_rate": 0.00014818920097374745, + "loss": 2.6258, + "step": 6865 + }, + { + "epoch": 0.5541118553789041, + "grad_norm": 0.7897995114326477, + "learning_rate": 0.00014817536741017152, + "loss": 2.6153, + "step": 6866 + }, + { + "epoch": 0.554192559115487, + "grad_norm": 0.7444615960121155, + "learning_rate": 0.00014816153264593957, + "loss": 2.5892, + "step": 6867 + }, + { + "epoch": 0.55427326285207, + "grad_norm": 0.6593222618103027, + "learning_rate": 0.0001481476966813963, + "loss": 2.6048, + "step": 6868 + }, + { + "epoch": 0.5543539665886531, + "grad_norm": 0.7517102360725403, + "learning_rate": 0.0001481338595168866, + "loss": 2.6496, + "step": 6869 + }, + { + "epoch": 0.5544346703252361, + "grad_norm": 0.7314056754112244, + "learning_rate": 0.00014812002115275529, + "loss": 2.6009, + "step": 6870 + }, + { + "epoch": 0.554515374061819, + "grad_norm": 0.6718037724494934, + "learning_rate": 0.00014810618158934722, + "loss": 2.6279, + "step": 6871 + }, + { + "epoch": 0.554596077798402, + "grad_norm": 0.6853529810905457, + "learning_rate": 0.00014809234082700735, + "loss": 2.6562, + "step": 6872 + }, + { + "epoch": 0.5546767815349851, + "grad_norm": 0.713599443435669, + "learning_rate": 0.0001480784988660807, + "loss": 2.5783, + "step": 6873 + }, + { + "epoch": 0.5547574852715681, + "grad_norm": 0.6820243000984192, + "learning_rate": 0.00014806465570691213, + "loss": 2.5753, + "step": 6874 + }, + { + "epoch": 0.5548381890081511, + "grad_norm": 0.6999152302742004, + "learning_rate": 0.00014805081134984673, + "loss": 2.5839, + "step": 6875 + }, + { + "epoch": 0.554918892744734, + "grad_norm": 0.7145923376083374, + "learning_rate": 0.00014803696579522948, + "loss": 2.6153, + "step": 6876 + }, + { + "epoch": 0.5549995964813171, + "grad_norm": 0.7569223046302795, + "learning_rate": 0.00014802311904340548, + "loss": 2.5879, + "step": 6877 + }, + { + "epoch": 0.5550803002179001, + "grad_norm": 0.6977131962776184, + "learning_rate": 0.00014800927109471983, + "loss": 2.6587, + "step": 6878 + }, + { + "epoch": 0.5551610039544831, + "grad_norm": 0.6693562865257263, + "learning_rate": 0.00014799542194951764, + "loss": 2.6271, + "step": 6879 + }, + { + "epoch": 0.5552417076910661, + "grad_norm": 0.6937456130981445, + "learning_rate": 0.00014798157160814406, + "loss": 2.6213, + "step": 6880 + }, + { + "epoch": 0.5553224114276492, + "grad_norm": 0.761538565158844, + "learning_rate": 0.0001479677200709443, + "loss": 2.6053, + "step": 6881 + }, + { + "epoch": 0.5554031151642321, + "grad_norm": 0.707457959651947, + "learning_rate": 0.00014795386733826356, + "loss": 2.5763, + "step": 6882 + }, + { + "epoch": 0.5554838189008151, + "grad_norm": 0.7323198318481445, + "learning_rate": 0.0001479400134104471, + "loss": 2.6899, + "step": 6883 + }, + { + "epoch": 0.5555645226373981, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.0001479261582878402, + "loss": 2.5743, + "step": 6884 + }, + { + "epoch": 0.5556452263739811, + "grad_norm": 0.7683241367340088, + "learning_rate": 0.00014791230197078813, + "loss": 2.5295, + "step": 6885 + }, + { + "epoch": 0.5557259301105641, + "grad_norm": 0.7248150706291199, + "learning_rate": 0.00014789844445963626, + "loss": 2.6131, + "step": 6886 + }, + { + "epoch": 0.5558066338471471, + "grad_norm": 0.6868402361869812, + "learning_rate": 0.00014788458575472997, + "loss": 2.6182, + "step": 6887 + }, + { + "epoch": 0.5558873375837301, + "grad_norm": 0.6995798945426941, + "learning_rate": 0.0001478707258564146, + "loss": 2.5969, + "step": 6888 + }, + { + "epoch": 0.5559680413203131, + "grad_norm": 0.6912558078765869, + "learning_rate": 0.00014785686476503565, + "loss": 2.6264, + "step": 6889 + }, + { + "epoch": 0.5560487450568962, + "grad_norm": 0.7485123872756958, + "learning_rate": 0.00014784300248093848, + "loss": 2.6036, + "step": 6890 + }, + { + "epoch": 0.5561294487934791, + "grad_norm": 0.7150819897651672, + "learning_rate": 0.00014782913900446864, + "loss": 2.5807, + "step": 6891 + }, + { + "epoch": 0.5562101525300621, + "grad_norm": 0.6715224385261536, + "learning_rate": 0.00014781527433597167, + "loss": 2.6164, + "step": 6892 + }, + { + "epoch": 0.5562908562666451, + "grad_norm": 0.6951256394386292, + "learning_rate": 0.000147801408475793, + "loss": 2.6106, + "step": 6893 + }, + { + "epoch": 0.5563715600032282, + "grad_norm": 0.7296997904777527, + "learning_rate": 0.00014778754142427832, + "loss": 2.6182, + "step": 6894 + }, + { + "epoch": 0.5564522637398112, + "grad_norm": 0.7484713196754456, + "learning_rate": 0.0001477736731817732, + "loss": 2.6384, + "step": 6895 + }, + { + "epoch": 0.5565329674763941, + "grad_norm": 0.6967526078224182, + "learning_rate": 0.00014775980374862326, + "loss": 2.5889, + "step": 6896 + }, + { + "epoch": 0.5566136712129771, + "grad_norm": 0.7004885077476501, + "learning_rate": 0.00014774593312517415, + "loss": 2.6549, + "step": 6897 + }, + { + "epoch": 0.5566943749495602, + "grad_norm": 0.7069302201271057, + "learning_rate": 0.00014773206131177158, + "loss": 2.6408, + "step": 6898 + }, + { + "epoch": 0.5567750786861432, + "grad_norm": 0.7048566341400146, + "learning_rate": 0.00014771818830876127, + "loss": 2.5909, + "step": 6899 + }, + { + "epoch": 0.5568557824227262, + "grad_norm": 0.7386630773544312, + "learning_rate": 0.00014770431411648897, + "loss": 2.6402, + "step": 6900 + }, + { + "epoch": 0.5569364861593091, + "grad_norm": 0.7244876027107239, + "learning_rate": 0.00014769043873530047, + "loss": 2.5548, + "step": 6901 + }, + { + "epoch": 0.5570171898958922, + "grad_norm": 0.6820651888847351, + "learning_rate": 0.00014767656216554156, + "loss": 2.682, + "step": 6902 + }, + { + "epoch": 0.5570978936324752, + "grad_norm": 0.7281784415245056, + "learning_rate": 0.00014766268440755812, + "loss": 2.622, + "step": 6903 + }, + { + "epoch": 0.5571785973690582, + "grad_norm": 0.6525030136108398, + "learning_rate": 0.00014764880546169594, + "loss": 2.5809, + "step": 6904 + }, + { + "epoch": 0.5572593011056411, + "grad_norm": 0.6735210418701172, + "learning_rate": 0.00014763492532830102, + "loss": 2.6645, + "step": 6905 + }, + { + "epoch": 0.5573400048422242, + "grad_norm": 0.674700140953064, + "learning_rate": 0.00014762104400771922, + "loss": 2.6466, + "step": 6906 + }, + { + "epoch": 0.5574207085788072, + "grad_norm": 0.7570134401321411, + "learning_rate": 0.00014760716150029652, + "loss": 2.57, + "step": 6907 + }, + { + "epoch": 0.5575014123153902, + "grad_norm": 0.6532449722290039, + "learning_rate": 0.00014759327780637893, + "loss": 2.6207, + "step": 6908 + }, + { + "epoch": 0.5575821160519732, + "grad_norm": 0.7697737812995911, + "learning_rate": 0.00014757939292631242, + "loss": 2.5846, + "step": 6909 + }, + { + "epoch": 0.5576628197885563, + "grad_norm": 0.6750194430351257, + "learning_rate": 0.00014756550686044308, + "loss": 2.6421, + "step": 6910 + }, + { + "epoch": 0.5577435235251392, + "grad_norm": 0.7357683777809143, + "learning_rate": 0.00014755161960911697, + "loss": 2.6173, + "step": 6911 + }, + { + "epoch": 0.5578242272617222, + "grad_norm": 0.6812090277671814, + "learning_rate": 0.0001475377311726802, + "loss": 2.5556, + "step": 6912 + }, + { + "epoch": 0.5579049309983052, + "grad_norm": 0.7633040547370911, + "learning_rate": 0.00014752384155147888, + "loss": 2.6505, + "step": 6913 + }, + { + "epoch": 0.5579856347348883, + "grad_norm": 0.7426417469978333, + "learning_rate": 0.00014750995074585922, + "loss": 2.5575, + "step": 6914 + }, + { + "epoch": 0.5580663384714712, + "grad_norm": 0.6926711201667786, + "learning_rate": 0.00014749605875616744, + "loss": 2.5751, + "step": 6915 + }, + { + "epoch": 0.5581470422080542, + "grad_norm": 0.70630943775177, + "learning_rate": 0.00014748216558274966, + "loss": 2.6228, + "step": 6916 + }, + { + "epoch": 0.5582277459446372, + "grad_norm": 0.7183346748352051, + "learning_rate": 0.0001474682712259522, + "loss": 2.5704, + "step": 6917 + }, + { + "epoch": 0.5583084496812203, + "grad_norm": 0.7622792720794678, + "learning_rate": 0.00014745437568612136, + "loss": 2.6031, + "step": 6918 + }, + { + "epoch": 0.5583891534178033, + "grad_norm": 0.6967802047729492, + "learning_rate": 0.00014744047896360344, + "loss": 2.6031, + "step": 6919 + }, + { + "epoch": 0.5584698571543862, + "grad_norm": 0.7827191948890686, + "learning_rate": 0.00014742658105874475, + "loss": 2.5427, + "step": 6920 + }, + { + "epoch": 0.5585505608909692, + "grad_norm": 0.6865705847740173, + "learning_rate": 0.0001474126819718917, + "loss": 2.6514, + "step": 6921 + }, + { + "epoch": 0.5586312646275523, + "grad_norm": 0.7181665897369385, + "learning_rate": 0.0001473987817033906, + "loss": 2.613, + "step": 6922 + }, + { + "epoch": 0.5587119683641353, + "grad_norm": 0.7198463082313538, + "learning_rate": 0.00014738488025358806, + "loss": 2.6423, + "step": 6923 + }, + { + "epoch": 0.5587926721007183, + "grad_norm": 0.773078441619873, + "learning_rate": 0.00014737097762283042, + "loss": 2.5946, + "step": 6924 + }, + { + "epoch": 0.5588733758373012, + "grad_norm": 0.7732799649238586, + "learning_rate": 0.00014735707381146416, + "loss": 2.6778, + "step": 6925 + }, + { + "epoch": 0.5589540795738843, + "grad_norm": 0.7639997601509094, + "learning_rate": 0.00014734316881983585, + "loss": 2.6064, + "step": 6926 + }, + { + "epoch": 0.5590347833104673, + "grad_norm": 0.7912085652351379, + "learning_rate": 0.00014732926264829198, + "loss": 2.5765, + "step": 6927 + }, + { + "epoch": 0.5591154870470503, + "grad_norm": 0.7460121512413025, + "learning_rate": 0.0001473153552971792, + "loss": 2.6724, + "step": 6928 + }, + { + "epoch": 0.5591961907836333, + "grad_norm": 0.6853603720664978, + "learning_rate": 0.00014730144676684408, + "loss": 2.5846, + "step": 6929 + }, + { + "epoch": 0.5592768945202163, + "grad_norm": 0.7368159294128418, + "learning_rate": 0.00014728753705763324, + "loss": 2.6626, + "step": 6930 + }, + { + "epoch": 0.5593575982567993, + "grad_norm": 0.6888907551765442, + "learning_rate": 0.0001472736261698934, + "loss": 2.6169, + "step": 6931 + }, + { + "epoch": 0.5594383019933823, + "grad_norm": 0.6978163719177246, + "learning_rate": 0.0001472597141039712, + "loss": 2.6367, + "step": 6932 + }, + { + "epoch": 0.5595190057299653, + "grad_norm": 0.7829774618148804, + "learning_rate": 0.00014724580086021335, + "loss": 2.5983, + "step": 6933 + }, + { + "epoch": 0.5595997094665484, + "grad_norm": 0.7872018218040466, + "learning_rate": 0.0001472318864389667, + "loss": 2.5418, + "step": 6934 + }, + { + "epoch": 0.5596804132031313, + "grad_norm": 0.6994973421096802, + "learning_rate": 0.00014721797084057793, + "loss": 2.6062, + "step": 6935 + }, + { + "epoch": 0.5597611169397143, + "grad_norm": 0.7281144857406616, + "learning_rate": 0.00014720405406539394, + "loss": 2.573, + "step": 6936 + }, + { + "epoch": 0.5598418206762973, + "grad_norm": 0.713513970375061, + "learning_rate": 0.0001471901361137615, + "loss": 2.6589, + "step": 6937 + }, + { + "epoch": 0.5599225244128803, + "grad_norm": 0.7752750515937805, + "learning_rate": 0.00014717621698602754, + "loss": 2.6478, + "step": 6938 + }, + { + "epoch": 0.5600032281494634, + "grad_norm": 0.6876000165939331, + "learning_rate": 0.00014716229668253889, + "loss": 2.6092, + "step": 6939 + }, + { + "epoch": 0.5600839318860463, + "grad_norm": 0.6371028423309326, + "learning_rate": 0.00014714837520364256, + "loss": 2.606, + "step": 6940 + }, + { + "epoch": 0.5601646356226293, + "grad_norm": 0.6488915085792542, + "learning_rate": 0.00014713445254968546, + "loss": 2.5769, + "step": 6941 + }, + { + "epoch": 0.5602453393592123, + "grad_norm": 0.7286413908004761, + "learning_rate": 0.00014712052872101458, + "loss": 2.6267, + "step": 6942 + }, + { + "epoch": 0.5603260430957954, + "grad_norm": 0.6863759160041809, + "learning_rate": 0.00014710660371797696, + "loss": 2.641, + "step": 6943 + }, + { + "epoch": 0.5604067468323783, + "grad_norm": 0.706900417804718, + "learning_rate": 0.00014709267754091964, + "loss": 2.6344, + "step": 6944 + }, + { + "epoch": 0.5604874505689613, + "grad_norm": 0.6462892293930054, + "learning_rate": 0.0001470787501901897, + "loss": 2.5561, + "step": 6945 + }, + { + "epoch": 0.5605681543055443, + "grad_norm": 0.7342472076416016, + "learning_rate": 0.00014706482166613425, + "loss": 2.583, + "step": 6946 + }, + { + "epoch": 0.5606488580421274, + "grad_norm": 0.7132803797721863, + "learning_rate": 0.00014705089196910038, + "loss": 2.558, + "step": 6947 + }, + { + "epoch": 0.5607295617787104, + "grad_norm": 0.7709125876426697, + "learning_rate": 0.00014703696109943533, + "loss": 2.6165, + "step": 6948 + }, + { + "epoch": 0.5608102655152933, + "grad_norm": 0.7108885645866394, + "learning_rate": 0.00014702302905748619, + "loss": 2.5788, + "step": 6949 + }, + { + "epoch": 0.5608909692518763, + "grad_norm": 0.7295591235160828, + "learning_rate": 0.0001470090958436003, + "loss": 2.6526, + "step": 6950 + }, + { + "epoch": 0.5609716729884594, + "grad_norm": 0.7235364317893982, + "learning_rate": 0.00014699516145812486, + "loss": 2.604, + "step": 6951 + }, + { + "epoch": 0.5610523767250424, + "grad_norm": 0.6723269820213318, + "learning_rate": 0.00014698122590140714, + "loss": 2.5838, + "step": 6952 + }, + { + "epoch": 0.5611330804616254, + "grad_norm": 0.7022266983985901, + "learning_rate": 0.00014696728917379447, + "loss": 2.6086, + "step": 6953 + }, + { + "epoch": 0.5612137841982083, + "grad_norm": 0.6923824548721313, + "learning_rate": 0.00014695335127563414, + "loss": 2.6678, + "step": 6954 + }, + { + "epoch": 0.5612944879347914, + "grad_norm": 0.6909339427947998, + "learning_rate": 0.0001469394122072736, + "loss": 2.6397, + "step": 6955 + }, + { + "epoch": 0.5613751916713744, + "grad_norm": 0.710299015045166, + "learning_rate": 0.00014692547196906022, + "loss": 2.5973, + "step": 6956 + }, + { + "epoch": 0.5614558954079574, + "grad_norm": 0.7141178250312805, + "learning_rate": 0.00014691153056134136, + "loss": 2.6111, + "step": 6957 + }, + { + "epoch": 0.5615365991445403, + "grad_norm": 0.6994750499725342, + "learning_rate": 0.00014689758798446456, + "loss": 2.6498, + "step": 6958 + }, + { + "epoch": 0.5616173028811234, + "grad_norm": 0.6951611638069153, + "learning_rate": 0.00014688364423877726, + "loss": 2.6208, + "step": 6959 + }, + { + "epoch": 0.5616980066177064, + "grad_norm": 0.6610642075538635, + "learning_rate": 0.000146869699324627, + "loss": 2.5725, + "step": 6960 + }, + { + "epoch": 0.5617787103542894, + "grad_norm": 0.6771267056465149, + "learning_rate": 0.00014685575324236135, + "loss": 2.6336, + "step": 6961 + }, + { + "epoch": 0.5618594140908724, + "grad_norm": 0.7431008815765381, + "learning_rate": 0.0001468418059923278, + "loss": 2.6782, + "step": 6962 + }, + { + "epoch": 0.5619401178274555, + "grad_norm": 0.7399705648422241, + "learning_rate": 0.000146827857574874, + "loss": 2.6212, + "step": 6963 + }, + { + "epoch": 0.5620208215640384, + "grad_norm": 0.7237067222595215, + "learning_rate": 0.00014681390799034763, + "loss": 2.6261, + "step": 6964 + }, + { + "epoch": 0.5621015253006214, + "grad_norm": 0.7033257484436035, + "learning_rate": 0.00014679995723909623, + "loss": 2.6912, + "step": 6965 + }, + { + "epoch": 0.5621822290372044, + "grad_norm": 0.6953759789466858, + "learning_rate": 0.00014678600532146762, + "loss": 2.6022, + "step": 6966 + }, + { + "epoch": 0.5622629327737875, + "grad_norm": 0.8338057994842529, + "learning_rate": 0.0001467720522378094, + "loss": 2.595, + "step": 6967 + }, + { + "epoch": 0.5623436365103704, + "grad_norm": 0.6506100296974182, + "learning_rate": 0.00014675809798846942, + "loss": 2.6033, + "step": 6968 + }, + { + "epoch": 0.5624243402469534, + "grad_norm": 0.7122468948364258, + "learning_rate": 0.0001467441425737954, + "loss": 2.56, + "step": 6969 + }, + { + "epoch": 0.5625050439835364, + "grad_norm": 0.7012680172920227, + "learning_rate": 0.00014673018599413516, + "loss": 2.6052, + "step": 6970 + }, + { + "epoch": 0.5625857477201195, + "grad_norm": 0.668187141418457, + "learning_rate": 0.00014671622824983653, + "loss": 2.6675, + "step": 6971 + }, + { + "epoch": 0.5626664514567025, + "grad_norm": 0.7259203791618347, + "learning_rate": 0.00014670226934124738, + "loss": 2.5977, + "step": 6972 + }, + { + "epoch": 0.5627471551932854, + "grad_norm": 0.6705875396728516, + "learning_rate": 0.00014668830926871555, + "loss": 2.649, + "step": 6973 + }, + { + "epoch": 0.5628278589298684, + "grad_norm": 0.682731568813324, + "learning_rate": 0.00014667434803258906, + "loss": 2.6084, + "step": 6974 + }, + { + "epoch": 0.5629085626664515, + "grad_norm": 0.7061700224876404, + "learning_rate": 0.00014666038563321577, + "loss": 2.6256, + "step": 6975 + }, + { + "epoch": 0.5629892664030345, + "grad_norm": 0.6839977502822876, + "learning_rate": 0.00014664642207094374, + "loss": 2.6342, + "step": 6976 + }, + { + "epoch": 0.5630699701396175, + "grad_norm": 0.7376503348350525, + "learning_rate": 0.00014663245734612094, + "loss": 2.6001, + "step": 6977 + }, + { + "epoch": 0.5631506738762004, + "grad_norm": 0.6901546716690063, + "learning_rate": 0.0001466184914590954, + "loss": 2.6715, + "step": 6978 + }, + { + "epoch": 0.5632313776127835, + "grad_norm": 0.816223680973053, + "learning_rate": 0.00014660452441021512, + "loss": 2.6407, + "step": 6979 + }, + { + "epoch": 0.5633120813493665, + "grad_norm": 0.6904644966125488, + "learning_rate": 0.00014659055619982835, + "loss": 2.5543, + "step": 6980 + }, + { + "epoch": 0.5633927850859495, + "grad_norm": 0.6784235239028931, + "learning_rate": 0.0001465765868282831, + "loss": 2.6184, + "step": 6981 + }, + { + "epoch": 0.5634734888225325, + "grad_norm": 0.7689006328582764, + "learning_rate": 0.00014656261629592755, + "loss": 2.644, + "step": 6982 + }, + { + "epoch": 0.5635541925591155, + "grad_norm": 0.7608775496482849, + "learning_rate": 0.0001465486446031099, + "loss": 2.5952, + "step": 6983 + }, + { + "epoch": 0.5636348962956985, + "grad_norm": 0.7266525626182556, + "learning_rate": 0.00014653467175017833, + "loss": 2.6479, + "step": 6984 + }, + { + "epoch": 0.5637156000322815, + "grad_norm": 0.6907477974891663, + "learning_rate": 0.00014652069773748113, + "loss": 2.5825, + "step": 6985 + }, + { + "epoch": 0.5637963037688645, + "grad_norm": 0.7790403366088867, + "learning_rate": 0.00014650672256536648, + "loss": 2.5948, + "step": 6986 + }, + { + "epoch": 0.5638770075054474, + "grad_norm": 0.7072858214378357, + "learning_rate": 0.00014649274623418278, + "loss": 2.6017, + "step": 6987 + }, + { + "epoch": 0.5639577112420305, + "grad_norm": 0.7140414118766785, + "learning_rate": 0.0001464787687442783, + "loss": 2.5709, + "step": 6988 + }, + { + "epoch": 0.5640384149786135, + "grad_norm": 0.857783317565918, + "learning_rate": 0.00014646479009600139, + "loss": 2.7049, + "step": 6989 + }, + { + "epoch": 0.5641191187151965, + "grad_norm": 0.7599344253540039, + "learning_rate": 0.00014645081028970047, + "loss": 2.6369, + "step": 6990 + }, + { + "epoch": 0.5641998224517795, + "grad_norm": 0.7286150455474854, + "learning_rate": 0.00014643682932572393, + "loss": 2.6238, + "step": 6991 + }, + { + "epoch": 0.5642805261883626, + "grad_norm": 0.7095075249671936, + "learning_rate": 0.0001464228472044202, + "loss": 2.5924, + "step": 6992 + }, + { + "epoch": 0.5643612299249455, + "grad_norm": 0.7583668828010559, + "learning_rate": 0.0001464088639261378, + "loss": 2.6098, + "step": 6993 + }, + { + "epoch": 0.5644419336615285, + "grad_norm": 0.7393970489501953, + "learning_rate": 0.00014639487949122515, + "loss": 2.6036, + "step": 6994 + }, + { + "epoch": 0.5645226373981115, + "grad_norm": 0.6789388656616211, + "learning_rate": 0.00014638089390003086, + "loss": 2.642, + "step": 6995 + }, + { + "epoch": 0.5646033411346946, + "grad_norm": 0.8021289706230164, + "learning_rate": 0.00014636690715290346, + "loss": 2.6851, + "step": 6996 + }, + { + "epoch": 0.5646840448712775, + "grad_norm": 0.6931039094924927, + "learning_rate": 0.00014635291925019152, + "loss": 2.6358, + "step": 6997 + }, + { + "epoch": 0.5647647486078605, + "grad_norm": 0.7356590032577515, + "learning_rate": 0.00014633893019224366, + "loss": 2.5661, + "step": 6998 + }, + { + "epoch": 0.5648454523444435, + "grad_norm": 0.6777941584587097, + "learning_rate": 0.0001463249399794085, + "loss": 2.5578, + "step": 6999 + }, + { + "epoch": 0.5649261560810266, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.0001463109486120348, + "loss": 2.5582, + "step": 7000 + }, + { + "epoch": 0.5649261560810266, + "eval_loss": 2.5298855304718018, + "eval_runtime": 757.774, + "eval_samples_per_second": 3.457, + "eval_steps_per_second": 0.577, + "step": 7000 + }, + { + "epoch": 0.5650068598176096, + "grad_norm": 0.7175148129463196, + "learning_rate": 0.0001462969560904712, + "loss": 2.568, + "step": 7001 + }, + { + "epoch": 0.5650875635541925, + "grad_norm": 0.6998937129974365, + "learning_rate": 0.00014628296241506636, + "loss": 2.6347, + "step": 7002 + }, + { + "epoch": 0.5651682672907755, + "grad_norm": 0.8140312433242798, + "learning_rate": 0.00014626896758616916, + "loss": 2.6566, + "step": 7003 + }, + { + "epoch": 0.5652489710273586, + "grad_norm": 0.7218164205551147, + "learning_rate": 0.00014625497160412833, + "loss": 2.5693, + "step": 7004 + }, + { + "epoch": 0.5653296747639416, + "grad_norm": 0.6974074244499207, + "learning_rate": 0.0001462409744692927, + "loss": 2.6084, + "step": 7005 + }, + { + "epoch": 0.5654103785005246, + "grad_norm": 0.7475053071975708, + "learning_rate": 0.00014622697618201113, + "loss": 2.6534, + "step": 7006 + }, + { + "epoch": 0.5654910822371075, + "grad_norm": 0.6768492460250854, + "learning_rate": 0.00014621297674263247, + "loss": 2.585, + "step": 7007 + }, + { + "epoch": 0.5655717859736906, + "grad_norm": 0.7023029923439026, + "learning_rate": 0.0001461989761515056, + "loss": 2.6219, + "step": 7008 + }, + { + "epoch": 0.5656524897102736, + "grad_norm": 0.7248445749282837, + "learning_rate": 0.0001461849744089795, + "loss": 2.6382, + "step": 7009 + }, + { + "epoch": 0.5657331934468566, + "grad_norm": 0.6961148381233215, + "learning_rate": 0.00014617097151540308, + "loss": 2.7184, + "step": 7010 + }, + { + "epoch": 0.5658138971834396, + "grad_norm": 0.6649057269096375, + "learning_rate": 0.0001461569674711254, + "loss": 2.6059, + "step": 7011 + }, + { + "epoch": 0.5658946009200226, + "grad_norm": 0.7451788783073425, + "learning_rate": 0.00014614296227649542, + "loss": 2.5697, + "step": 7012 + }, + { + "epoch": 0.5659753046566056, + "grad_norm": 0.6880216598510742, + "learning_rate": 0.0001461289559318622, + "loss": 2.5785, + "step": 7013 + }, + { + "epoch": 0.5660560083931886, + "grad_norm": 0.7505971789360046, + "learning_rate": 0.00014611494843757482, + "loss": 2.5479, + "step": 7014 + }, + { + "epoch": 0.5661367121297716, + "grad_norm": 0.745914876461029, + "learning_rate": 0.00014610093979398235, + "loss": 2.6367, + "step": 7015 + }, + { + "epoch": 0.5662174158663547, + "grad_norm": 0.6758660674095154, + "learning_rate": 0.000146086930001434, + "loss": 2.5673, + "step": 7016 + }, + { + "epoch": 0.5662981196029376, + "grad_norm": 0.7114273309707642, + "learning_rate": 0.00014607291906027886, + "loss": 2.6188, + "step": 7017 + }, + { + "epoch": 0.5663788233395206, + "grad_norm": 0.6791165471076965, + "learning_rate": 0.00014605890697086613, + "loss": 2.6197, + "step": 7018 + }, + { + "epoch": 0.5664595270761036, + "grad_norm": 0.6948217153549194, + "learning_rate": 0.00014604489373354503, + "loss": 2.5996, + "step": 7019 + }, + { + "epoch": 0.5665402308126867, + "grad_norm": 0.6993576884269714, + "learning_rate": 0.00014603087934866483, + "loss": 2.565, + "step": 7020 + }, + { + "epoch": 0.5666209345492697, + "grad_norm": 0.6936905384063721, + "learning_rate": 0.0001460168638165748, + "loss": 2.6524, + "step": 7021 + }, + { + "epoch": 0.5667016382858526, + "grad_norm": 0.6810741424560547, + "learning_rate": 0.00014600284713762424, + "loss": 2.6519, + "step": 7022 + }, + { + "epoch": 0.5667823420224356, + "grad_norm": 0.7540227770805359, + "learning_rate": 0.00014598882931216245, + "loss": 2.659, + "step": 7023 + }, + { + "epoch": 0.5668630457590187, + "grad_norm": 0.6520613431930542, + "learning_rate": 0.0001459748103405388, + "loss": 2.5341, + "step": 7024 + }, + { + "epoch": 0.5669437494956017, + "grad_norm": 0.7159109711647034, + "learning_rate": 0.00014596079022310277, + "loss": 2.6548, + "step": 7025 + }, + { + "epoch": 0.5670244532321846, + "grad_norm": 0.803284227848053, + "learning_rate": 0.00014594676896020366, + "loss": 2.705, + "step": 7026 + }, + { + "epoch": 0.5671051569687676, + "grad_norm": 0.7069976925849915, + "learning_rate": 0.00014593274655219095, + "loss": 2.5733, + "step": 7027 + }, + { + "epoch": 0.5671858607053507, + "grad_norm": 0.7085167169570923, + "learning_rate": 0.00014591872299941417, + "loss": 2.6247, + "step": 7028 + }, + { + "epoch": 0.5672665644419337, + "grad_norm": 0.6748499274253845, + "learning_rate": 0.00014590469830222272, + "loss": 2.6446, + "step": 7029 + }, + { + "epoch": 0.5673472681785167, + "grad_norm": 0.6885821223258972, + "learning_rate": 0.00014589067246096623, + "loss": 2.5879, + "step": 7030 + }, + { + "epoch": 0.5674279719150996, + "grad_norm": 0.7220324277877808, + "learning_rate": 0.0001458766454759942, + "loss": 2.6249, + "step": 7031 + }, + { + "epoch": 0.5675086756516827, + "grad_norm": 0.6712783575057983, + "learning_rate": 0.00014586261734765628, + "loss": 2.5971, + "step": 7032 + }, + { + "epoch": 0.5675893793882657, + "grad_norm": 0.6582161784172058, + "learning_rate": 0.00014584858807630203, + "loss": 2.6224, + "step": 7033 + }, + { + "epoch": 0.5676700831248487, + "grad_norm": 0.6699219346046448, + "learning_rate": 0.0001458345576622811, + "loss": 2.5926, + "step": 7034 + }, + { + "epoch": 0.5677507868614317, + "grad_norm": 0.6508033871650696, + "learning_rate": 0.0001458205261059432, + "loss": 2.6311, + "step": 7035 + }, + { + "epoch": 0.5678314905980147, + "grad_norm": 0.7551338076591492, + "learning_rate": 0.00014580649340763802, + "loss": 2.5729, + "step": 7036 + }, + { + "epoch": 0.5679121943345977, + "grad_norm": 0.6875829100608826, + "learning_rate": 0.00014579245956771527, + "loss": 2.6253, + "step": 7037 + }, + { + "epoch": 0.5679928980711807, + "grad_norm": 0.698204517364502, + "learning_rate": 0.00014577842458652474, + "loss": 2.6218, + "step": 7038 + }, + { + "epoch": 0.5680736018077637, + "grad_norm": 0.8258630037307739, + "learning_rate": 0.00014576438846441615, + "loss": 2.6307, + "step": 7039 + }, + { + "epoch": 0.5681543055443466, + "grad_norm": 0.753105878829956, + "learning_rate": 0.00014575035120173942, + "loss": 2.5664, + "step": 7040 + }, + { + "epoch": 0.5682350092809297, + "grad_norm": 0.6999726295471191, + "learning_rate": 0.00014573631279884435, + "loss": 2.6857, + "step": 7041 + }, + { + "epoch": 0.5683157130175127, + "grad_norm": 0.6484847068786621, + "learning_rate": 0.00014572227325608078, + "loss": 2.6068, + "step": 7042 + }, + { + "epoch": 0.5683964167540957, + "grad_norm": 0.7098011374473572, + "learning_rate": 0.00014570823257379866, + "loss": 2.6591, + "step": 7043 + }, + { + "epoch": 0.5684771204906787, + "grad_norm": 0.8304192423820496, + "learning_rate": 0.0001456941907523479, + "loss": 2.6582, + "step": 7044 + }, + { + "epoch": 0.5685578242272618, + "grad_norm": 0.763214111328125, + "learning_rate": 0.00014568014779207844, + "loss": 2.6605, + "step": 7045 + }, + { + "epoch": 0.5686385279638447, + "grad_norm": 0.6805880665779114, + "learning_rate": 0.00014566610369334032, + "loss": 2.6362, + "step": 7046 + }, + { + "epoch": 0.5687192317004277, + "grad_norm": 0.6753434538841248, + "learning_rate": 0.00014565205845648352, + "loss": 2.6352, + "step": 7047 + }, + { + "epoch": 0.5687999354370107, + "grad_norm": 0.7065438032150269, + "learning_rate": 0.00014563801208185807, + "loss": 2.5975, + "step": 7048 + }, + { + "epoch": 0.5688806391735938, + "grad_norm": 0.6863527894020081, + "learning_rate": 0.00014562396456981407, + "loss": 2.576, + "step": 7049 + }, + { + "epoch": 0.5689613429101767, + "grad_norm": 0.7344440817832947, + "learning_rate": 0.00014560991592070158, + "loss": 2.5933, + "step": 7050 + }, + { + "epoch": 0.5690420466467597, + "grad_norm": 0.699992835521698, + "learning_rate": 0.00014559586613487082, + "loss": 2.6161, + "step": 7051 + }, + { + "epoch": 0.5691227503833427, + "grad_norm": 0.7287258505821228, + "learning_rate": 0.00014558181521267185, + "loss": 2.665, + "step": 7052 + }, + { + "epoch": 0.5692034541199258, + "grad_norm": 0.7304692268371582, + "learning_rate": 0.0001455677631544549, + "loss": 2.5696, + "step": 7053 + }, + { + "epoch": 0.5692841578565088, + "grad_norm": 0.6556086540222168, + "learning_rate": 0.00014555370996057016, + "loss": 2.6405, + "step": 7054 + }, + { + "epoch": 0.5693648615930917, + "grad_norm": 0.6796221137046814, + "learning_rate": 0.0001455396556313679, + "loss": 2.6475, + "step": 7055 + }, + { + "epoch": 0.5694455653296747, + "grad_norm": 0.7067505717277527, + "learning_rate": 0.00014552560016719838, + "loss": 2.6344, + "step": 7056 + }, + { + "epoch": 0.5695262690662578, + "grad_norm": 0.7108997106552124, + "learning_rate": 0.00014551154356841193, + "loss": 2.6543, + "step": 7057 + }, + { + "epoch": 0.5696069728028408, + "grad_norm": 0.7296212911605835, + "learning_rate": 0.0001454974858353588, + "loss": 2.6152, + "step": 7058 + }, + { + "epoch": 0.5696876765394238, + "grad_norm": 0.7329154014587402, + "learning_rate": 0.00014548342696838943, + "loss": 2.6338, + "step": 7059 + }, + { + "epoch": 0.5697683802760067, + "grad_norm": 0.6880258321762085, + "learning_rate": 0.00014546936696785412, + "loss": 2.5834, + "step": 7060 + }, + { + "epoch": 0.5698490840125898, + "grad_norm": 0.7140741348266602, + "learning_rate": 0.00014545530583410336, + "loss": 2.6361, + "step": 7061 + }, + { + "epoch": 0.5699297877491728, + "grad_norm": 0.6419476866722107, + "learning_rate": 0.00014544124356748755, + "loss": 2.4982, + "step": 7062 + }, + { + "epoch": 0.5700104914857558, + "grad_norm": 0.6934036612510681, + "learning_rate": 0.00014542718016835718, + "loss": 2.5748, + "step": 7063 + }, + { + "epoch": 0.5700911952223388, + "grad_norm": 0.721663236618042, + "learning_rate": 0.0001454131156370627, + "loss": 2.5419, + "step": 7064 + }, + { + "epoch": 0.5701718989589218, + "grad_norm": 0.734062671661377, + "learning_rate": 0.00014539904997395468, + "loss": 2.6288, + "step": 7065 + }, + { + "epoch": 0.5702526026955048, + "grad_norm": 0.7927694320678711, + "learning_rate": 0.00014538498317938367, + "loss": 2.6331, + "step": 7066 + }, + { + "epoch": 0.5703333064320878, + "grad_norm": 0.715929388999939, + "learning_rate": 0.00014537091525370025, + "loss": 2.6333, + "step": 7067 + }, + { + "epoch": 0.5704140101686708, + "grad_norm": 0.772230327129364, + "learning_rate": 0.00014535684619725498, + "loss": 2.6019, + "step": 7068 + }, + { + "epoch": 0.5704947139052539, + "grad_norm": 0.7277318239212036, + "learning_rate": 0.0001453427760103986, + "loss": 2.6062, + "step": 7069 + }, + { + "epoch": 0.5705754176418368, + "grad_norm": 0.6708227396011353, + "learning_rate": 0.00014532870469348164, + "loss": 2.6613, + "step": 7070 + }, + { + "epoch": 0.5706561213784198, + "grad_norm": 0.7507323622703552, + "learning_rate": 0.0001453146322468549, + "loss": 2.6456, + "step": 7071 + }, + { + "epoch": 0.5707368251150028, + "grad_norm": 0.6864063739776611, + "learning_rate": 0.00014530055867086912, + "loss": 2.6361, + "step": 7072 + }, + { + "epoch": 0.5708175288515859, + "grad_norm": 0.6805310249328613, + "learning_rate": 0.00014528648396587498, + "loss": 2.6088, + "step": 7073 + }, + { + "epoch": 0.5708982325881689, + "grad_norm": 0.7946523427963257, + "learning_rate": 0.00014527240813222325, + "loss": 2.6533, + "step": 7074 + }, + { + "epoch": 0.5709789363247518, + "grad_norm": 0.6814306974411011, + "learning_rate": 0.00014525833117026474, + "loss": 2.6478, + "step": 7075 + }, + { + "epoch": 0.5710596400613348, + "grad_norm": 0.749664843082428, + "learning_rate": 0.00014524425308035034, + "loss": 2.6296, + "step": 7076 + }, + { + "epoch": 0.5711403437979179, + "grad_norm": 0.6774656772613525, + "learning_rate": 0.00014523017386283091, + "loss": 2.5867, + "step": 7077 + }, + { + "epoch": 0.5712210475345009, + "grad_norm": 0.7331634163856506, + "learning_rate": 0.00014521609351805733, + "loss": 2.6484, + "step": 7078 + }, + { + "epoch": 0.5713017512710838, + "grad_norm": 0.7076910734176636, + "learning_rate": 0.00014520201204638045, + "loss": 2.6464, + "step": 7079 + }, + { + "epoch": 0.5713824550076668, + "grad_norm": 0.74099200963974, + "learning_rate": 0.00014518792944815127, + "loss": 2.6304, + "step": 7080 + }, + { + "epoch": 0.5714631587442499, + "grad_norm": 0.6673823595046997, + "learning_rate": 0.00014517384572372078, + "loss": 2.5903, + "step": 7081 + }, + { + "epoch": 0.5715438624808329, + "grad_norm": 0.6872609257698059, + "learning_rate": 0.00014515976087343997, + "loss": 2.6189, + "step": 7082 + }, + { + "epoch": 0.5716245662174159, + "grad_norm": 0.7363224625587463, + "learning_rate": 0.0001451456748976599, + "loss": 2.5845, + "step": 7083 + }, + { + "epoch": 0.5717052699539988, + "grad_norm": 0.7672157287597656, + "learning_rate": 0.00014513158779673157, + "loss": 2.6331, + "step": 7084 + }, + { + "epoch": 0.5717859736905819, + "grad_norm": 0.661195695400238, + "learning_rate": 0.00014511749957100612, + "loss": 2.5827, + "step": 7085 + }, + { + "epoch": 0.5718666774271649, + "grad_norm": 0.8034788370132446, + "learning_rate": 0.0001451034102208346, + "loss": 2.6209, + "step": 7086 + }, + { + "epoch": 0.5719473811637479, + "grad_norm": 0.7318302392959595, + "learning_rate": 0.00014508931974656822, + "loss": 2.5898, + "step": 7087 + }, + { + "epoch": 0.5720280849003309, + "grad_norm": 0.7334744930267334, + "learning_rate": 0.00014507522814855814, + "loss": 2.5893, + "step": 7088 + }, + { + "epoch": 0.5721087886369138, + "grad_norm": 0.783051609992981, + "learning_rate": 0.00014506113542715553, + "loss": 2.6284, + "step": 7089 + }, + { + "epoch": 0.5721894923734969, + "grad_norm": 0.7319497466087341, + "learning_rate": 0.00014504704158271165, + "loss": 2.5705, + "step": 7090 + }, + { + "epoch": 0.5722701961100799, + "grad_norm": 0.7886925935745239, + "learning_rate": 0.00014503294661557772, + "loss": 2.641, + "step": 7091 + }, + { + "epoch": 0.5723508998466629, + "grad_norm": 0.6882795691490173, + "learning_rate": 0.00014501885052610502, + "loss": 2.5714, + "step": 7092 + }, + { + "epoch": 0.5724316035832459, + "grad_norm": 0.7089235186576843, + "learning_rate": 0.00014500475331464494, + "loss": 2.6073, + "step": 7093 + }, + { + "epoch": 0.5725123073198289, + "grad_norm": 0.7261029481887817, + "learning_rate": 0.00014499065498154874, + "loss": 2.5595, + "step": 7094 + }, + { + "epoch": 0.5725930110564119, + "grad_norm": 0.7625105977058411, + "learning_rate": 0.0001449765555271678, + "loss": 2.5978, + "step": 7095 + }, + { + "epoch": 0.5726737147929949, + "grad_norm": 0.7853986024856567, + "learning_rate": 0.00014496245495185353, + "loss": 2.6378, + "step": 7096 + }, + { + "epoch": 0.5727544185295779, + "grad_norm": 0.8070923686027527, + "learning_rate": 0.00014494835325595736, + "loss": 2.7062, + "step": 7097 + }, + { + "epoch": 0.572835122266161, + "grad_norm": 0.7074965834617615, + "learning_rate": 0.00014493425043983073, + "loss": 2.5177, + "step": 7098 + }, + { + "epoch": 0.5729158260027439, + "grad_norm": 0.6890520453453064, + "learning_rate": 0.00014492014650382512, + "loss": 2.6058, + "step": 7099 + }, + { + "epoch": 0.5729965297393269, + "grad_norm": 0.6979860067367554, + "learning_rate": 0.00014490604144829202, + "loss": 2.5274, + "step": 7100 + }, + { + "epoch": 0.5730772334759099, + "grad_norm": 0.7972229719161987, + "learning_rate": 0.000144891935273583, + "loss": 2.6369, + "step": 7101 + }, + { + "epoch": 0.573157937212493, + "grad_norm": 0.6994345188140869, + "learning_rate": 0.0001448778279800496, + "loss": 2.5975, + "step": 7102 + }, + { + "epoch": 0.573238640949076, + "grad_norm": 0.7943929433822632, + "learning_rate": 0.0001448637195680434, + "loss": 2.6317, + "step": 7103 + }, + { + "epoch": 0.5733193446856589, + "grad_norm": 0.6975306272506714, + "learning_rate": 0.00014484961003791605, + "loss": 2.6264, + "step": 7104 + }, + { + "epoch": 0.5734000484222419, + "grad_norm": 0.6889060735702515, + "learning_rate": 0.00014483549939001917, + "loss": 2.5974, + "step": 7105 + }, + { + "epoch": 0.573480752158825, + "grad_norm": 0.7372777462005615, + "learning_rate": 0.00014482138762470444, + "loss": 2.5851, + "step": 7106 + }, + { + "epoch": 0.573561455895408, + "grad_norm": 0.7045157551765442, + "learning_rate": 0.00014480727474232362, + "loss": 2.6451, + "step": 7107 + }, + { + "epoch": 0.5736421596319909, + "grad_norm": 0.6974517107009888, + "learning_rate": 0.00014479316074322832, + "loss": 2.6796, + "step": 7108 + }, + { + "epoch": 0.5737228633685739, + "grad_norm": 0.7328097224235535, + "learning_rate": 0.00014477904562777038, + "loss": 2.5923, + "step": 7109 + }, + { + "epoch": 0.573803567105157, + "grad_norm": 0.7288877964019775, + "learning_rate": 0.0001447649293963016, + "loss": 2.6012, + "step": 7110 + }, + { + "epoch": 0.57388427084174, + "grad_norm": 0.7054389119148254, + "learning_rate": 0.00014475081204917372, + "loss": 2.6666, + "step": 7111 + }, + { + "epoch": 0.573964974578323, + "grad_norm": 0.7447949647903442, + "learning_rate": 0.00014473669358673865, + "loss": 2.6093, + "step": 7112 + }, + { + "epoch": 0.5740456783149059, + "grad_norm": 0.6431592106819153, + "learning_rate": 0.0001447225740093482, + "loss": 2.6242, + "step": 7113 + }, + { + "epoch": 0.574126382051489, + "grad_norm": 0.7096747756004333, + "learning_rate": 0.00014470845331735434, + "loss": 2.6297, + "step": 7114 + }, + { + "epoch": 0.574207085788072, + "grad_norm": 0.6918880939483643, + "learning_rate": 0.00014469433151110894, + "loss": 2.5849, + "step": 7115 + }, + { + "epoch": 0.574287789524655, + "grad_norm": 0.6617783308029175, + "learning_rate": 0.00014468020859096395, + "loss": 2.5972, + "step": 7116 + }, + { + "epoch": 0.574368493261238, + "grad_norm": 0.6525121927261353, + "learning_rate": 0.0001446660845572714, + "loss": 2.5888, + "step": 7117 + }, + { + "epoch": 0.574449196997821, + "grad_norm": 0.7024720907211304, + "learning_rate": 0.00014465195941038326, + "loss": 2.6135, + "step": 7118 + }, + { + "epoch": 0.574529900734404, + "grad_norm": 0.7660520672798157, + "learning_rate": 0.00014463783315065153, + "loss": 2.5837, + "step": 7119 + }, + { + "epoch": 0.574610604470987, + "grad_norm": 0.8206443190574646, + "learning_rate": 0.00014462370577842838, + "loss": 2.6749, + "step": 7120 + }, + { + "epoch": 0.57469130820757, + "grad_norm": 0.7176216840744019, + "learning_rate": 0.00014460957729406577, + "loss": 2.5814, + "step": 7121 + }, + { + "epoch": 0.5747720119441531, + "grad_norm": 0.7867588400840759, + "learning_rate": 0.0001445954476979159, + "loss": 2.5697, + "step": 7122 + }, + { + "epoch": 0.574852715680736, + "grad_norm": 0.7150471806526184, + "learning_rate": 0.0001445813169903309, + "loss": 2.5689, + "step": 7123 + }, + { + "epoch": 0.574933419417319, + "grad_norm": 0.7082479596138, + "learning_rate": 0.00014456718517166296, + "loss": 2.6081, + "step": 7124 + }, + { + "epoch": 0.575014123153902, + "grad_norm": 0.7207253575325012, + "learning_rate": 0.00014455305224226426, + "loss": 2.6573, + "step": 7125 + }, + { + "epoch": 0.5750948268904851, + "grad_norm": 0.7451751232147217, + "learning_rate": 0.00014453891820248704, + "loss": 2.6057, + "step": 7126 + }, + { + "epoch": 0.575175530627068, + "grad_norm": 0.7030230164527893, + "learning_rate": 0.0001445247830526835, + "loss": 2.6122, + "step": 7127 + }, + { + "epoch": 0.575256234363651, + "grad_norm": 0.7233754396438599, + "learning_rate": 0.00014451064679320605, + "loss": 2.5937, + "step": 7128 + }, + { + "epoch": 0.575336938100234, + "grad_norm": 0.6943942904472351, + "learning_rate": 0.0001444965094244069, + "loss": 2.6327, + "step": 7129 + }, + { + "epoch": 0.5754176418368171, + "grad_norm": 0.682056725025177, + "learning_rate": 0.00014448237094663843, + "loss": 2.6212, + "step": 7130 + }, + { + "epoch": 0.5754983455734001, + "grad_norm": 0.7424136400222778, + "learning_rate": 0.00014446823136025298, + "loss": 2.6031, + "step": 7131 + }, + { + "epoch": 0.575579049309983, + "grad_norm": 0.7464002370834351, + "learning_rate": 0.00014445409066560298, + "loss": 2.6363, + "step": 7132 + }, + { + "epoch": 0.575659753046566, + "grad_norm": 0.7137650847434998, + "learning_rate": 0.00014443994886304085, + "loss": 2.5343, + "step": 7133 + }, + { + "epoch": 0.5757404567831491, + "grad_norm": 0.6744158864021301, + "learning_rate": 0.00014442580595291901, + "loss": 2.6463, + "step": 7134 + }, + { + "epoch": 0.5758211605197321, + "grad_norm": 0.6947084069252014, + "learning_rate": 0.00014441166193558991, + "loss": 2.6074, + "step": 7135 + }, + { + "epoch": 0.5759018642563151, + "grad_norm": 0.6981585621833801, + "learning_rate": 0.00014439751681140616, + "loss": 2.6257, + "step": 7136 + }, + { + "epoch": 0.575982567992898, + "grad_norm": 0.6800102591514587, + "learning_rate": 0.00014438337058072023, + "loss": 2.6447, + "step": 7137 + }, + { + "epoch": 0.5760632717294811, + "grad_norm": 0.6952316164970398, + "learning_rate": 0.00014436922324388465, + "loss": 2.5739, + "step": 7138 + }, + { + "epoch": 0.5761439754660641, + "grad_norm": 0.709170937538147, + "learning_rate": 0.0001443550748012521, + "loss": 2.5918, + "step": 7139 + }, + { + "epoch": 0.5762246792026471, + "grad_norm": 0.7677363157272339, + "learning_rate": 0.00014434092525317512, + "loss": 2.6322, + "step": 7140 + }, + { + "epoch": 0.5763053829392301, + "grad_norm": 0.6730263233184814, + "learning_rate": 0.00014432677460000636, + "loss": 2.6764, + "step": 7141 + }, + { + "epoch": 0.576386086675813, + "grad_norm": 0.6782239675521851, + "learning_rate": 0.0001443126228420985, + "loss": 2.5208, + "step": 7142 + }, + { + "epoch": 0.5764667904123961, + "grad_norm": 0.7737600207328796, + "learning_rate": 0.00014429846997980424, + "loss": 2.6964, + "step": 7143 + }, + { + "epoch": 0.5765474941489791, + "grad_norm": 0.7456403374671936, + "learning_rate": 0.00014428431601347635, + "loss": 2.6163, + "step": 7144 + }, + { + "epoch": 0.5766281978855621, + "grad_norm": 0.7824606895446777, + "learning_rate": 0.00014427016094346754, + "loss": 2.6499, + "step": 7145 + }, + { + "epoch": 0.576708901622145, + "grad_norm": 0.7233635187149048, + "learning_rate": 0.00014425600477013055, + "loss": 2.6064, + "step": 7146 + }, + { + "epoch": 0.5767896053587281, + "grad_norm": 0.7008275389671326, + "learning_rate": 0.00014424184749381824, + "loss": 2.5585, + "step": 7147 + }, + { + "epoch": 0.5768703090953111, + "grad_norm": 0.6817710995674133, + "learning_rate": 0.00014422768911488346, + "loss": 2.6215, + "step": 7148 + }, + { + "epoch": 0.5769510128318941, + "grad_norm": 0.6860779523849487, + "learning_rate": 0.00014421352963367906, + "loss": 2.5877, + "step": 7149 + }, + { + "epoch": 0.5770317165684771, + "grad_norm": 0.732865035533905, + "learning_rate": 0.00014419936905055793, + "loss": 2.5704, + "step": 7150 + }, + { + "epoch": 0.5771124203050602, + "grad_norm": 0.6992458701133728, + "learning_rate": 0.00014418520736587297, + "loss": 2.6654, + "step": 7151 + }, + { + "epoch": 0.5771931240416431, + "grad_norm": 0.6865053176879883, + "learning_rate": 0.00014417104457997715, + "loss": 2.6389, + "step": 7152 + }, + { + "epoch": 0.5772738277782261, + "grad_norm": 0.7652727365493774, + "learning_rate": 0.00014415688069322345, + "loss": 2.6478, + "step": 7153 + }, + { + "epoch": 0.5773545315148091, + "grad_norm": 0.708692193031311, + "learning_rate": 0.0001441427157059648, + "loss": 2.6065, + "step": 7154 + }, + { + "epoch": 0.5774352352513922, + "grad_norm": 0.7549232244491577, + "learning_rate": 0.00014412854961855435, + "loss": 2.6484, + "step": 7155 + }, + { + "epoch": 0.5775159389879752, + "grad_norm": 0.6410655975341797, + "learning_rate": 0.00014411438243134506, + "loss": 2.6061, + "step": 7156 + }, + { + "epoch": 0.5775966427245581, + "grad_norm": 0.7711724042892456, + "learning_rate": 0.00014410021414469005, + "loss": 2.628, + "step": 7157 + }, + { + "epoch": 0.5776773464611411, + "grad_norm": 0.6723695993423462, + "learning_rate": 0.0001440860447589424, + "loss": 2.6214, + "step": 7158 + }, + { + "epoch": 0.5777580501977242, + "grad_norm": 0.7359206676483154, + "learning_rate": 0.0001440718742744553, + "loss": 2.6157, + "step": 7159 + }, + { + "epoch": 0.5778387539343072, + "grad_norm": 0.7320525050163269, + "learning_rate": 0.0001440577026915819, + "loss": 2.6081, + "step": 7160 + }, + { + "epoch": 0.5779194576708901, + "grad_norm": 0.7728561162948608, + "learning_rate": 0.00014404353001067535, + "loss": 2.5989, + "step": 7161 + }, + { + "epoch": 0.5780001614074731, + "grad_norm": 0.7380329370498657, + "learning_rate": 0.0001440293562320889, + "loss": 2.6337, + "step": 7162 + }, + { + "epoch": 0.5780808651440562, + "grad_norm": 0.667789876461029, + "learning_rate": 0.00014401518135617581, + "loss": 2.6324, + "step": 7163 + }, + { + "epoch": 0.5781615688806392, + "grad_norm": 0.6907219886779785, + "learning_rate": 0.00014400100538328935, + "loss": 2.5897, + "step": 7164 + }, + { + "epoch": 0.5782422726172222, + "grad_norm": 0.9051530957221985, + "learning_rate": 0.00014398682831378283, + "loss": 2.6895, + "step": 7165 + }, + { + "epoch": 0.5783229763538051, + "grad_norm": 0.7189533114433289, + "learning_rate": 0.00014397265014800956, + "loss": 2.5948, + "step": 7166 + }, + { + "epoch": 0.5784036800903882, + "grad_norm": 0.7003059983253479, + "learning_rate": 0.00014395847088632285, + "loss": 2.5814, + "step": 7167 + }, + { + "epoch": 0.5784843838269712, + "grad_norm": 0.8083534240722656, + "learning_rate": 0.0001439442905290762, + "loss": 2.6131, + "step": 7168 + }, + { + "epoch": 0.5785650875635542, + "grad_norm": 0.7068585157394409, + "learning_rate": 0.0001439301090766229, + "loss": 2.6027, + "step": 7169 + }, + { + "epoch": 0.5786457913001372, + "grad_norm": 0.7010494470596313, + "learning_rate": 0.00014391592652931653, + "loss": 2.5296, + "step": 7170 + }, + { + "epoch": 0.5787264950367202, + "grad_norm": 0.7577467560768127, + "learning_rate": 0.00014390174288751045, + "loss": 2.6347, + "step": 7171 + }, + { + "epoch": 0.5788071987733032, + "grad_norm": 0.643799364566803, + "learning_rate": 0.00014388755815155813, + "loss": 2.6152, + "step": 7172 + }, + { + "epoch": 0.5788879025098862, + "grad_norm": 0.740352988243103, + "learning_rate": 0.00014387337232181315, + "loss": 2.6123, + "step": 7173 + }, + { + "epoch": 0.5789686062464692, + "grad_norm": 0.7309309840202332, + "learning_rate": 0.00014385918539862907, + "loss": 2.6072, + "step": 7174 + }, + { + "epoch": 0.5790493099830523, + "grad_norm": 0.7237016558647156, + "learning_rate": 0.00014384499738235941, + "loss": 2.6375, + "step": 7175 + }, + { + "epoch": 0.5791300137196352, + "grad_norm": 0.6600970029830933, + "learning_rate": 0.00014383080827335784, + "loss": 2.5285, + "step": 7176 + }, + { + "epoch": 0.5792107174562182, + "grad_norm": 0.6822233200073242, + "learning_rate": 0.00014381661807197794, + "loss": 2.5497, + "step": 7177 + }, + { + "epoch": 0.5792914211928012, + "grad_norm": 0.6990383863449097, + "learning_rate": 0.00014380242677857337, + "loss": 2.6283, + "step": 7178 + }, + { + "epoch": 0.5793721249293843, + "grad_norm": 0.64422208070755, + "learning_rate": 0.00014378823439349783, + "loss": 2.5762, + "step": 7179 + }, + { + "epoch": 0.5794528286659673, + "grad_norm": 0.63804692029953, + "learning_rate": 0.00014377404091710501, + "loss": 2.5523, + "step": 7180 + }, + { + "epoch": 0.5795335324025502, + "grad_norm": 0.6978863477706909, + "learning_rate": 0.0001437598463497487, + "loss": 2.5089, + "step": 7181 + }, + { + "epoch": 0.5796142361391332, + "grad_norm": 0.7091087698936462, + "learning_rate": 0.00014374565069178257, + "loss": 2.7005, + "step": 7182 + }, + { + "epoch": 0.5796949398757163, + "grad_norm": 0.683659553527832, + "learning_rate": 0.00014373145394356053, + "loss": 2.5988, + "step": 7183 + }, + { + "epoch": 0.5797756436122993, + "grad_norm": 0.7352960705757141, + "learning_rate": 0.00014371725610543633, + "loss": 2.5671, + "step": 7184 + }, + { + "epoch": 0.5798563473488823, + "grad_norm": 0.6951913237571716, + "learning_rate": 0.00014370305717776382, + "loss": 2.5917, + "step": 7185 + }, + { + "epoch": 0.5799370510854652, + "grad_norm": 0.6644465923309326, + "learning_rate": 0.0001436888571608969, + "loss": 2.5954, + "step": 7186 + }, + { + "epoch": 0.5800177548220483, + "grad_norm": 0.7406458258628845, + "learning_rate": 0.00014367465605518942, + "loss": 2.6369, + "step": 7187 + }, + { + "epoch": 0.5800984585586313, + "grad_norm": 0.6724697351455688, + "learning_rate": 0.00014366045386099535, + "loss": 2.6227, + "step": 7188 + }, + { + "epoch": 0.5801791622952143, + "grad_norm": 0.6804977059364319, + "learning_rate": 0.00014364625057866867, + "loss": 2.6445, + "step": 7189 + }, + { + "epoch": 0.5802598660317972, + "grad_norm": 0.7020019888877869, + "learning_rate": 0.00014363204620856335, + "loss": 2.6733, + "step": 7190 + }, + { + "epoch": 0.5803405697683802, + "grad_norm": 0.6458491086959839, + "learning_rate": 0.00014361784075103332, + "loss": 2.572, + "step": 7191 + }, + { + "epoch": 0.5804212735049633, + "grad_norm": 0.7078056335449219, + "learning_rate": 0.00014360363420643272, + "loss": 2.7032, + "step": 7192 + }, + { + "epoch": 0.5805019772415463, + "grad_norm": 0.6367471814155579, + "learning_rate": 0.00014358942657511557, + "loss": 2.5369, + "step": 7193 + }, + { + "epoch": 0.5805826809781293, + "grad_norm": 0.7311955094337463, + "learning_rate": 0.00014357521785743596, + "loss": 2.6513, + "step": 7194 + }, + { + "epoch": 0.5806633847147122, + "grad_norm": 0.6957442164421082, + "learning_rate": 0.00014356100805374805, + "loss": 2.6512, + "step": 7195 + }, + { + "epoch": 0.5807440884512953, + "grad_norm": 0.7026693224906921, + "learning_rate": 0.0001435467971644059, + "loss": 2.6049, + "step": 7196 + }, + { + "epoch": 0.5808247921878783, + "grad_norm": 0.7337697744369507, + "learning_rate": 0.00014353258518976376, + "loss": 2.5516, + "step": 7197 + }, + { + "epoch": 0.5809054959244613, + "grad_norm": 0.6891856789588928, + "learning_rate": 0.00014351837213017577, + "loss": 2.5894, + "step": 7198 + }, + { + "epoch": 0.5809861996610443, + "grad_norm": 0.6710659265518188, + "learning_rate": 0.0001435041579859962, + "loss": 2.596, + "step": 7199 + }, + { + "epoch": 0.5810669033976273, + "grad_norm": 0.7637245059013367, + "learning_rate": 0.00014348994275757931, + "loss": 2.6278, + "step": 7200 + }, + { + "epoch": 0.5811476071342103, + "grad_norm": 0.7558664679527283, + "learning_rate": 0.00014347572644527934, + "loss": 2.6917, + "step": 7201 + }, + { + "epoch": 0.5812283108707933, + "grad_norm": 0.7254986763000488, + "learning_rate": 0.00014346150904945065, + "loss": 2.6161, + "step": 7202 + }, + { + "epoch": 0.5813090146073763, + "grad_norm": 0.7177211046218872, + "learning_rate": 0.00014344729057044753, + "loss": 2.555, + "step": 7203 + }, + { + "epoch": 0.5813897183439594, + "grad_norm": 0.6408729553222656, + "learning_rate": 0.00014343307100862432, + "loss": 2.6071, + "step": 7204 + }, + { + "epoch": 0.5814704220805423, + "grad_norm": 0.7399997711181641, + "learning_rate": 0.0001434188503643355, + "loss": 2.6013, + "step": 7205 + }, + { + "epoch": 0.5815511258171253, + "grad_norm": 0.7796236276626587, + "learning_rate": 0.00014340462863793543, + "loss": 2.603, + "step": 7206 + }, + { + "epoch": 0.5816318295537083, + "grad_norm": 0.7420137524604797, + "learning_rate": 0.00014339040582977855, + "loss": 2.5858, + "step": 7207 + }, + { + "epoch": 0.5817125332902914, + "grad_norm": 0.738042414188385, + "learning_rate": 0.00014337618194021928, + "loss": 2.592, + "step": 7208 + }, + { + "epoch": 0.5817932370268744, + "grad_norm": 0.6910614371299744, + "learning_rate": 0.00014336195696961222, + "loss": 2.6448, + "step": 7209 + }, + { + "epoch": 0.5818739407634573, + "grad_norm": 0.7838915586471558, + "learning_rate": 0.00014334773091831185, + "loss": 2.6257, + "step": 7210 + }, + { + "epoch": 0.5819546445000403, + "grad_norm": 0.7362141013145447, + "learning_rate": 0.0001433335037866727, + "loss": 2.6505, + "step": 7211 + }, + { + "epoch": 0.5820353482366234, + "grad_norm": 0.6892269253730774, + "learning_rate": 0.00014331927557504934, + "loss": 2.6518, + "step": 7212 + }, + { + "epoch": 0.5821160519732064, + "grad_norm": 0.7444556951522827, + "learning_rate": 0.0001433050462837964, + "loss": 2.6785, + "step": 7213 + }, + { + "epoch": 0.5821967557097893, + "grad_norm": 0.6948450207710266, + "learning_rate": 0.00014329081591326853, + "loss": 2.5753, + "step": 7214 + }, + { + "epoch": 0.5822774594463723, + "grad_norm": 0.713741660118103, + "learning_rate": 0.00014327658446382032, + "loss": 2.6425, + "step": 7215 + }, + { + "epoch": 0.5823581631829554, + "grad_norm": 0.7352245450019836, + "learning_rate": 0.00014326235193580657, + "loss": 2.6859, + "step": 7216 + }, + { + "epoch": 0.5824388669195384, + "grad_norm": 0.7151867151260376, + "learning_rate": 0.00014324811832958187, + "loss": 2.6106, + "step": 7217 + }, + { + "epoch": 0.5825195706561214, + "grad_norm": 0.7003469467163086, + "learning_rate": 0.000143233883645501, + "loss": 2.618, + "step": 7218 + }, + { + "epoch": 0.5826002743927043, + "grad_norm": 0.7139034867286682, + "learning_rate": 0.00014321964788391878, + "loss": 2.5772, + "step": 7219 + }, + { + "epoch": 0.5826809781292874, + "grad_norm": 0.6368305683135986, + "learning_rate": 0.00014320541104518992, + "loss": 2.5259, + "step": 7220 + }, + { + "epoch": 0.5827616818658704, + "grad_norm": 0.6921548247337341, + "learning_rate": 0.0001431911731296693, + "loss": 2.6403, + "step": 7221 + }, + { + "epoch": 0.5828423856024534, + "grad_norm": 0.6995570659637451, + "learning_rate": 0.00014317693413771175, + "loss": 2.6172, + "step": 7222 + }, + { + "epoch": 0.5829230893390364, + "grad_norm": 0.7557246088981628, + "learning_rate": 0.0001431626940696721, + "loss": 2.6347, + "step": 7223 + }, + { + "epoch": 0.5830037930756194, + "grad_norm": 0.6912205219268799, + "learning_rate": 0.00014314845292590528, + "loss": 2.5958, + "step": 7224 + }, + { + "epoch": 0.5830844968122024, + "grad_norm": 0.6896184682846069, + "learning_rate": 0.00014313421070676625, + "loss": 2.569, + "step": 7225 + }, + { + "epoch": 0.5831652005487854, + "grad_norm": 0.6900814771652222, + "learning_rate": 0.00014311996741260994, + "loss": 2.5466, + "step": 7226 + }, + { + "epoch": 0.5832459042853684, + "grad_norm": 0.7319771647453308, + "learning_rate": 0.00014310572304379132, + "loss": 2.6181, + "step": 7227 + }, + { + "epoch": 0.5833266080219515, + "grad_norm": 0.728138267993927, + "learning_rate": 0.0001430914776006654, + "loss": 2.6644, + "step": 7228 + }, + { + "epoch": 0.5834073117585344, + "grad_norm": 0.7361802458763123, + "learning_rate": 0.0001430772310835872, + "loss": 2.6079, + "step": 7229 + }, + { + "epoch": 0.5834880154951174, + "grad_norm": 0.6893376708030701, + "learning_rate": 0.00014306298349291182, + "loss": 2.5615, + "step": 7230 + }, + { + "epoch": 0.5835687192317004, + "grad_norm": 0.6661401987075806, + "learning_rate": 0.00014304873482899431, + "loss": 2.6028, + "step": 7231 + }, + { + "epoch": 0.5836494229682835, + "grad_norm": 0.6571504473686218, + "learning_rate": 0.0001430344850921898, + "loss": 2.5553, + "step": 7232 + }, + { + "epoch": 0.5837301267048665, + "grad_norm": 0.6878423690795898, + "learning_rate": 0.00014302023428285342, + "loss": 2.5336, + "step": 7233 + }, + { + "epoch": 0.5838108304414494, + "grad_norm": 0.768117368221283, + "learning_rate": 0.00014300598240134035, + "loss": 2.6036, + "step": 7234 + }, + { + "epoch": 0.5838915341780324, + "grad_norm": 0.6876625418663025, + "learning_rate": 0.0001429917294480058, + "loss": 2.6314, + "step": 7235 + }, + { + "epoch": 0.5839722379146155, + "grad_norm": 0.7146790027618408, + "learning_rate": 0.00014297747542320495, + "loss": 2.6029, + "step": 7236 + }, + { + "epoch": 0.5840529416511985, + "grad_norm": 0.7032392024993896, + "learning_rate": 0.00014296322032729308, + "loss": 2.6163, + "step": 7237 + }, + { + "epoch": 0.5841336453877815, + "grad_norm": 0.7323551177978516, + "learning_rate": 0.00014294896416062544, + "loss": 2.6706, + "step": 7238 + }, + { + "epoch": 0.5842143491243644, + "grad_norm": 0.7647258639335632, + "learning_rate": 0.00014293470692355734, + "loss": 2.6744, + "step": 7239 + }, + { + "epoch": 0.5842950528609475, + "grad_norm": 0.6824506521224976, + "learning_rate": 0.00014292044861644414, + "loss": 2.579, + "step": 7240 + }, + { + "epoch": 0.5843757565975305, + "grad_norm": 0.7553619742393494, + "learning_rate": 0.00014290618923964115, + "loss": 2.6196, + "step": 7241 + }, + { + "epoch": 0.5844564603341135, + "grad_norm": 0.6872109770774841, + "learning_rate": 0.00014289192879350375, + "loss": 2.555, + "step": 7242 + }, + { + "epoch": 0.5845371640706964, + "grad_norm": 0.664658784866333, + "learning_rate": 0.00014287766727838735, + "loss": 2.5781, + "step": 7243 + }, + { + "epoch": 0.5846178678072794, + "grad_norm": 0.6709543466567993, + "learning_rate": 0.00014286340469464744, + "loss": 2.6022, + "step": 7244 + }, + { + "epoch": 0.5846985715438625, + "grad_norm": 0.7236210107803345, + "learning_rate": 0.00014284914104263941, + "loss": 2.5609, + "step": 7245 + }, + { + "epoch": 0.5847792752804455, + "grad_norm": 0.6751740574836731, + "learning_rate": 0.0001428348763227188, + "loss": 2.5792, + "step": 7246 + }, + { + "epoch": 0.5848599790170285, + "grad_norm": 0.6684607267379761, + "learning_rate": 0.0001428206105352411, + "loss": 2.5705, + "step": 7247 + }, + { + "epoch": 0.5849406827536114, + "grad_norm": 0.6876732707023621, + "learning_rate": 0.00014280634368056186, + "loss": 2.6576, + "step": 7248 + }, + { + "epoch": 0.5850213864901945, + "grad_norm": 0.758637547492981, + "learning_rate": 0.0001427920757590366, + "loss": 2.6215, + "step": 7249 + }, + { + "epoch": 0.5851020902267775, + "grad_norm": 0.6839025020599365, + "learning_rate": 0.00014277780677102097, + "loss": 2.5898, + "step": 7250 + }, + { + "epoch": 0.5851827939633605, + "grad_norm": 0.6912671327590942, + "learning_rate": 0.00014276353671687056, + "loss": 2.5879, + "step": 7251 + }, + { + "epoch": 0.5852634976999435, + "grad_norm": 0.6727048754692078, + "learning_rate": 0.00014274926559694107, + "loss": 2.5501, + "step": 7252 + }, + { + "epoch": 0.5853442014365265, + "grad_norm": 0.7031945586204529, + "learning_rate": 0.00014273499341158812, + "loss": 2.625, + "step": 7253 + }, + { + "epoch": 0.5854249051731095, + "grad_norm": 0.6886943578720093, + "learning_rate": 0.0001427207201611674, + "loss": 2.6141, + "step": 7254 + }, + { + "epoch": 0.5855056089096925, + "grad_norm": 0.7906915545463562, + "learning_rate": 0.00014270644584603466, + "loss": 2.7189, + "step": 7255 + }, + { + "epoch": 0.5855863126462755, + "grad_norm": 0.6873704195022583, + "learning_rate": 0.00014269217046654567, + "loss": 2.6031, + "step": 7256 + }, + { + "epoch": 0.5856670163828586, + "grad_norm": 0.6655381321907043, + "learning_rate": 0.00014267789402305618, + "loss": 2.5747, + "step": 7257 + }, + { + "epoch": 0.5857477201194415, + "grad_norm": 0.6655673384666443, + "learning_rate": 0.00014266361651592204, + "loss": 2.625, + "step": 7258 + }, + { + "epoch": 0.5858284238560245, + "grad_norm": 0.6752866506576538, + "learning_rate": 0.00014264933794549901, + "loss": 2.5914, + "step": 7259 + }, + { + "epoch": 0.5859091275926075, + "grad_norm": 0.6680975556373596, + "learning_rate": 0.00014263505831214302, + "loss": 2.5572, + "step": 7260 + }, + { + "epoch": 0.5859898313291906, + "grad_norm": 0.6873607039451599, + "learning_rate": 0.00014262077761620994, + "loss": 2.6696, + "step": 7261 + }, + { + "epoch": 0.5860705350657736, + "grad_norm": 0.6745384335517883, + "learning_rate": 0.00014260649585805566, + "loss": 2.5738, + "step": 7262 + }, + { + "epoch": 0.5861512388023565, + "grad_norm": 0.6524637937545776, + "learning_rate": 0.0001425922130380361, + "loss": 2.6209, + "step": 7263 + }, + { + "epoch": 0.5862319425389395, + "grad_norm": 0.6729850172996521, + "learning_rate": 0.00014257792915650728, + "loss": 2.652, + "step": 7264 + }, + { + "epoch": 0.5863126462755226, + "grad_norm": 0.6713503003120422, + "learning_rate": 0.00014256364421382514, + "loss": 2.5658, + "step": 7265 + }, + { + "epoch": 0.5863933500121056, + "grad_norm": 0.6835616827011108, + "learning_rate": 0.00014254935821034575, + "loss": 2.5535, + "step": 7266 + }, + { + "epoch": 0.5864740537486886, + "grad_norm": 0.7425376176834106, + "learning_rate": 0.00014253507114642515, + "loss": 2.6369, + "step": 7267 + }, + { + "epoch": 0.5865547574852715, + "grad_norm": 0.6788069605827332, + "learning_rate": 0.00014252078302241932, + "loss": 2.601, + "step": 7268 + }, + { + "epoch": 0.5866354612218546, + "grad_norm": 0.6828538179397583, + "learning_rate": 0.0001425064938386845, + "loss": 2.5861, + "step": 7269 + }, + { + "epoch": 0.5867161649584376, + "grad_norm": 0.6763372421264648, + "learning_rate": 0.0001424922035955767, + "loss": 2.6035, + "step": 7270 + }, + { + "epoch": 0.5867968686950206, + "grad_norm": 0.6517930626869202, + "learning_rate": 0.0001424779122934521, + "loss": 2.5564, + "step": 7271 + }, + { + "epoch": 0.5868775724316035, + "grad_norm": 0.6633113622665405, + "learning_rate": 0.00014246361993266692, + "loss": 2.6163, + "step": 7272 + }, + { + "epoch": 0.5869582761681866, + "grad_norm": 0.684822678565979, + "learning_rate": 0.00014244932651357733, + "loss": 2.6057, + "step": 7273 + }, + { + "epoch": 0.5870389799047696, + "grad_norm": 0.7679704427719116, + "learning_rate": 0.00014243503203653952, + "loss": 2.6522, + "step": 7274 + }, + { + "epoch": 0.5871196836413526, + "grad_norm": 0.6834188103675842, + "learning_rate": 0.00014242073650190984, + "loss": 2.652, + "step": 7275 + }, + { + "epoch": 0.5872003873779356, + "grad_norm": 0.6903846859931946, + "learning_rate": 0.00014240643991004449, + "loss": 2.5894, + "step": 7276 + }, + { + "epoch": 0.5872810911145186, + "grad_norm": 0.7060866951942444, + "learning_rate": 0.0001423921422612998, + "loss": 2.5994, + "step": 7277 + }, + { + "epoch": 0.5873617948511016, + "grad_norm": 0.6646741628646851, + "learning_rate": 0.0001423778435560321, + "loss": 2.6432, + "step": 7278 + }, + { + "epoch": 0.5874424985876846, + "grad_norm": 0.6930218935012817, + "learning_rate": 0.0001423635437945978, + "loss": 2.6233, + "step": 7279 + }, + { + "epoch": 0.5875232023242676, + "grad_norm": 0.6914143562316895, + "learning_rate": 0.00014234924297735322, + "loss": 2.6143, + "step": 7280 + }, + { + "epoch": 0.5876039060608507, + "grad_norm": 0.7351366281509399, + "learning_rate": 0.0001423349411046548, + "loss": 2.6323, + "step": 7281 + }, + { + "epoch": 0.5876846097974336, + "grad_norm": 0.6813770532608032, + "learning_rate": 0.000142320638176859, + "loss": 2.5964, + "step": 7282 + }, + { + "epoch": 0.5877653135340166, + "grad_norm": 0.7049702405929565, + "learning_rate": 0.00014230633419432226, + "loss": 2.6284, + "step": 7283 + }, + { + "epoch": 0.5878460172705996, + "grad_norm": 0.7140446901321411, + "learning_rate": 0.00014229202915740107, + "loss": 2.6113, + "step": 7284 + }, + { + "epoch": 0.5879267210071827, + "grad_norm": 0.696588933467865, + "learning_rate": 0.00014227772306645196, + "loss": 2.6384, + "step": 7285 + }, + { + "epoch": 0.5880074247437657, + "grad_norm": 0.6800615787506104, + "learning_rate": 0.0001422634159218315, + "loss": 2.5743, + "step": 7286 + }, + { + "epoch": 0.5880881284803486, + "grad_norm": 0.7586596608161926, + "learning_rate": 0.00014224910772389624, + "loss": 2.6504, + "step": 7287 + }, + { + "epoch": 0.5881688322169316, + "grad_norm": 0.73286372423172, + "learning_rate": 0.00014223479847300278, + "loss": 2.6026, + "step": 7288 + }, + { + "epoch": 0.5882495359535147, + "grad_norm": 0.6808766722679138, + "learning_rate": 0.00014222048816950772, + "loss": 2.5822, + "step": 7289 + }, + { + "epoch": 0.5883302396900977, + "grad_norm": 0.7424919009208679, + "learning_rate": 0.0001422061768137677, + "loss": 2.6474, + "step": 7290 + }, + { + "epoch": 0.5884109434266807, + "grad_norm": 0.658183753490448, + "learning_rate": 0.00014219186440613948, + "loss": 2.6051, + "step": 7291 + }, + { + "epoch": 0.5884916471632636, + "grad_norm": 0.6693006157875061, + "learning_rate": 0.0001421775509469797, + "loss": 2.5774, + "step": 7292 + }, + { + "epoch": 0.5885723508998466, + "grad_norm": 0.7298646569252014, + "learning_rate": 0.00014216323643664508, + "loss": 2.5688, + "step": 7293 + }, + { + "epoch": 0.5886530546364297, + "grad_norm": 0.6665881276130676, + "learning_rate": 0.00014214892087549238, + "loss": 2.608, + "step": 7294 + }, + { + "epoch": 0.5887337583730127, + "grad_norm": 0.7220060229301453, + "learning_rate": 0.00014213460426387841, + "loss": 2.6078, + "step": 7295 + }, + { + "epoch": 0.5888144621095956, + "grad_norm": 0.6693970561027527, + "learning_rate": 0.00014212028660215997, + "loss": 2.597, + "step": 7296 + }, + { + "epoch": 0.5888951658461786, + "grad_norm": 0.682331919670105, + "learning_rate": 0.00014210596789069387, + "loss": 2.5752, + "step": 7297 + }, + { + "epoch": 0.5889758695827617, + "grad_norm": 0.7586890459060669, + "learning_rate": 0.000142091648129837, + "loss": 2.6878, + "step": 7298 + }, + { + "epoch": 0.5890565733193447, + "grad_norm": 0.6740901470184326, + "learning_rate": 0.00014207732731994624, + "loss": 2.6083, + "step": 7299 + }, + { + "epoch": 0.5891372770559277, + "grad_norm": 0.6959021091461182, + "learning_rate": 0.00014206300546137842, + "loss": 2.5765, + "step": 7300 + }, + { + "epoch": 0.5892179807925106, + "grad_norm": 0.7446078658103943, + "learning_rate": 0.0001420486825544906, + "loss": 2.662, + "step": 7301 + }, + { + "epoch": 0.5892986845290937, + "grad_norm": 0.7418847680091858, + "learning_rate": 0.0001420343585996397, + "loss": 2.6606, + "step": 7302 + }, + { + "epoch": 0.5893793882656767, + "grad_norm": 0.7185709476470947, + "learning_rate": 0.00014202003359718273, + "loss": 2.563, + "step": 7303 + }, + { + "epoch": 0.5894600920022597, + "grad_norm": 0.6960515379905701, + "learning_rate": 0.00014200570754747664, + "loss": 2.6182, + "step": 7304 + }, + { + "epoch": 0.5895407957388427, + "grad_norm": 0.6589705348014832, + "learning_rate": 0.00014199138045087849, + "loss": 2.6714, + "step": 7305 + }, + { + "epoch": 0.5896214994754257, + "grad_norm": 0.7027507424354553, + "learning_rate": 0.00014197705230774543, + "loss": 2.6145, + "step": 7306 + }, + { + "epoch": 0.5897022032120087, + "grad_norm": 0.6761246919631958, + "learning_rate": 0.00014196272311843447, + "loss": 2.5688, + "step": 7307 + }, + { + "epoch": 0.5897829069485917, + "grad_norm": 0.6618059277534485, + "learning_rate": 0.00014194839288330277, + "loss": 2.6194, + "step": 7308 + }, + { + "epoch": 0.5898636106851747, + "grad_norm": 0.7182614803314209, + "learning_rate": 0.00014193406160270747, + "loss": 2.5452, + "step": 7309 + }, + { + "epoch": 0.5899443144217578, + "grad_norm": 0.6830565333366394, + "learning_rate": 0.0001419197292770057, + "loss": 2.5728, + "step": 7310 + }, + { + "epoch": 0.5900250181583407, + "grad_norm": 0.6744499802589417, + "learning_rate": 0.00014190539590655475, + "loss": 2.5736, + "step": 7311 + }, + { + "epoch": 0.5901057218949237, + "grad_norm": 0.7177874445915222, + "learning_rate": 0.00014189106149171176, + "loss": 2.6271, + "step": 7312 + }, + { + "epoch": 0.5901864256315067, + "grad_norm": 0.6770105361938477, + "learning_rate": 0.000141876726032834, + "loss": 2.5924, + "step": 7313 + }, + { + "epoch": 0.5902671293680898, + "grad_norm": 0.7295818328857422, + "learning_rate": 0.0001418623895302788, + "loss": 2.644, + "step": 7314 + }, + { + "epoch": 0.5903478331046728, + "grad_norm": 0.7244859933853149, + "learning_rate": 0.00014184805198440338, + "loss": 2.5892, + "step": 7315 + }, + { + "epoch": 0.5904285368412557, + "grad_norm": 0.7067728638648987, + "learning_rate": 0.00014183371339556512, + "loss": 2.5985, + "step": 7316 + }, + { + "epoch": 0.5905092405778387, + "grad_norm": 0.6732490062713623, + "learning_rate": 0.0001418193737641214, + "loss": 2.5771, + "step": 7317 + }, + { + "epoch": 0.5905899443144218, + "grad_norm": 0.7087544202804565, + "learning_rate": 0.00014180503309042957, + "loss": 2.6373, + "step": 7318 + }, + { + "epoch": 0.5906706480510048, + "grad_norm": 0.772174596786499, + "learning_rate": 0.00014179069137484703, + "loss": 2.6262, + "step": 7319 + }, + { + "epoch": 0.5907513517875878, + "grad_norm": 0.6855718493461609, + "learning_rate": 0.00014177634861773118, + "loss": 2.6268, + "step": 7320 + }, + { + "epoch": 0.5908320555241707, + "grad_norm": 0.7168720364570618, + "learning_rate": 0.00014176200481943953, + "loss": 2.5892, + "step": 7321 + }, + { + "epoch": 0.5909127592607538, + "grad_norm": 0.7126333713531494, + "learning_rate": 0.0001417476599803296, + "loss": 2.6079, + "step": 7322 + }, + { + "epoch": 0.5909934629973368, + "grad_norm": 0.7451913952827454, + "learning_rate": 0.0001417333141007588, + "loss": 2.635, + "step": 7323 + }, + { + "epoch": 0.5910741667339198, + "grad_norm": 0.7405436038970947, + "learning_rate": 0.00014171896718108475, + "loss": 2.6014, + "step": 7324 + }, + { + "epoch": 0.5911548704705027, + "grad_norm": 0.7583999037742615, + "learning_rate": 0.00014170461922166498, + "loss": 2.6815, + "step": 7325 + }, + { + "epoch": 0.5912355742070858, + "grad_norm": 0.6653509140014648, + "learning_rate": 0.00014169027022285706, + "loss": 2.6153, + "step": 7326 + }, + { + "epoch": 0.5913162779436688, + "grad_norm": 0.7145548462867737, + "learning_rate": 0.00014167592018501864, + "loss": 2.6022, + "step": 7327 + }, + { + "epoch": 0.5913969816802518, + "grad_norm": 0.6996089816093445, + "learning_rate": 0.00014166156910850737, + "loss": 2.6586, + "step": 7328 + }, + { + "epoch": 0.5914776854168348, + "grad_norm": 0.735653281211853, + "learning_rate": 0.0001416472169936809, + "loss": 2.6084, + "step": 7329 + }, + { + "epoch": 0.5915583891534179, + "grad_norm": 0.695036768913269, + "learning_rate": 0.00014163286384089686, + "loss": 2.5058, + "step": 7330 + }, + { + "epoch": 0.5916390928900008, + "grad_norm": 0.9014756679534912, + "learning_rate": 0.00014161850965051307, + "loss": 2.5991, + "step": 7331 + }, + { + "epoch": 0.5917197966265838, + "grad_norm": 0.7079846858978271, + "learning_rate": 0.0001416041544228872, + "loss": 2.6067, + "step": 7332 + }, + { + "epoch": 0.5918005003631668, + "grad_norm": 0.7681204080581665, + "learning_rate": 0.00014158979815837705, + "loss": 2.5414, + "step": 7333 + }, + { + "epoch": 0.5918812040997499, + "grad_norm": 0.6501670479774475, + "learning_rate": 0.00014157544085734042, + "loss": 2.617, + "step": 7334 + }, + { + "epoch": 0.5919619078363328, + "grad_norm": 0.7573496103286743, + "learning_rate": 0.00014156108252013513, + "loss": 2.6341, + "step": 7335 + }, + { + "epoch": 0.5920426115729158, + "grad_norm": 0.6865558624267578, + "learning_rate": 0.00014154672314711903, + "loss": 2.6229, + "step": 7336 + }, + { + "epoch": 0.5921233153094988, + "grad_norm": 0.6859166622161865, + "learning_rate": 0.00014153236273864995, + "loss": 2.6149, + "step": 7337 + }, + { + "epoch": 0.5922040190460819, + "grad_norm": 0.7603647112846375, + "learning_rate": 0.00014151800129508585, + "loss": 2.5645, + "step": 7338 + }, + { + "epoch": 0.5922847227826649, + "grad_norm": 0.6740217208862305, + "learning_rate": 0.00014150363881678464, + "loss": 2.5883, + "step": 7339 + }, + { + "epoch": 0.5923654265192478, + "grad_norm": 0.6412263512611389, + "learning_rate": 0.00014148927530410426, + "loss": 2.576, + "step": 7340 + }, + { + "epoch": 0.5924461302558308, + "grad_norm": 0.669834315776825, + "learning_rate": 0.00014147491075740265, + "loss": 2.542, + "step": 7341 + }, + { + "epoch": 0.5925268339924139, + "grad_norm": 0.720024049282074, + "learning_rate": 0.00014146054517703786, + "loss": 2.6491, + "step": 7342 + }, + { + "epoch": 0.5926075377289969, + "grad_norm": 0.7191612720489502, + "learning_rate": 0.00014144617856336794, + "loss": 2.5933, + "step": 7343 + }, + { + "epoch": 0.5926882414655799, + "grad_norm": 0.7012050747871399, + "learning_rate": 0.00014143181091675087, + "loss": 2.5253, + "step": 7344 + }, + { + "epoch": 0.5927689452021628, + "grad_norm": 0.7825081944465637, + "learning_rate": 0.00014141744223754478, + "loss": 2.6225, + "step": 7345 + }, + { + "epoch": 0.5928496489387458, + "grad_norm": 0.6699295043945312, + "learning_rate": 0.00014140307252610775, + "loss": 2.5893, + "step": 7346 + }, + { + "epoch": 0.5929303526753289, + "grad_norm": 0.6668846011161804, + "learning_rate": 0.00014138870178279794, + "loss": 2.5944, + "step": 7347 + }, + { + "epoch": 0.5930110564119119, + "grad_norm": 0.7681072950363159, + "learning_rate": 0.0001413743300079735, + "loss": 2.5715, + "step": 7348 + }, + { + "epoch": 0.5930917601484949, + "grad_norm": 0.653075635433197, + "learning_rate": 0.00014135995720199258, + "loss": 2.5924, + "step": 7349 + }, + { + "epoch": 0.5931724638850778, + "grad_norm": 0.6807504892349243, + "learning_rate": 0.00014134558336521342, + "loss": 2.5395, + "step": 7350 + }, + { + "epoch": 0.5932531676216609, + "grad_norm": 0.681175708770752, + "learning_rate": 0.00014133120849799423, + "loss": 2.5401, + "step": 7351 + }, + { + "epoch": 0.5933338713582439, + "grad_norm": 0.7159900665283203, + "learning_rate": 0.0001413168326006933, + "loss": 2.5684, + "step": 7352 + }, + { + "epoch": 0.5934145750948269, + "grad_norm": 0.6517181992530823, + "learning_rate": 0.00014130245567366888, + "loss": 2.5887, + "step": 7353 + }, + { + "epoch": 0.5934952788314098, + "grad_norm": 0.6982731223106384, + "learning_rate": 0.00014128807771727936, + "loss": 2.5707, + "step": 7354 + }, + { + "epoch": 0.5935759825679929, + "grad_norm": 0.7003650069236755, + "learning_rate": 0.00014127369873188296, + "loss": 2.6415, + "step": 7355 + }, + { + "epoch": 0.5936566863045759, + "grad_norm": 0.7408339977264404, + "learning_rate": 0.0001412593187178381, + "loss": 2.5655, + "step": 7356 + }, + { + "epoch": 0.5937373900411589, + "grad_norm": 0.717218279838562, + "learning_rate": 0.00014124493767550317, + "loss": 2.586, + "step": 7357 + }, + { + "epoch": 0.5938180937777419, + "grad_norm": 0.6723458766937256, + "learning_rate": 0.00014123055560523657, + "loss": 2.593, + "step": 7358 + }, + { + "epoch": 0.593898797514325, + "grad_norm": 0.6861262321472168, + "learning_rate": 0.00014121617250739677, + "loss": 2.612, + "step": 7359 + }, + { + "epoch": 0.5939795012509079, + "grad_norm": 0.6811453104019165, + "learning_rate": 0.00014120178838234222, + "loss": 2.5708, + "step": 7360 + }, + { + "epoch": 0.5940602049874909, + "grad_norm": 0.6249656677246094, + "learning_rate": 0.00014118740323043136, + "loss": 2.5604, + "step": 7361 + }, + { + "epoch": 0.5941409087240739, + "grad_norm": 0.7671588659286499, + "learning_rate": 0.00014117301705202274, + "loss": 2.547, + "step": 7362 + }, + { + "epoch": 0.594221612460657, + "grad_norm": 0.6856057643890381, + "learning_rate": 0.00014115862984747496, + "loss": 2.6108, + "step": 7363 + }, + { + "epoch": 0.5943023161972399, + "grad_norm": 0.692331850528717, + "learning_rate": 0.0001411442416171465, + "loss": 2.6347, + "step": 7364 + }, + { + "epoch": 0.5943830199338229, + "grad_norm": 0.7256516814231873, + "learning_rate": 0.000141129852361396, + "loss": 2.6098, + "step": 7365 + }, + { + "epoch": 0.5944637236704059, + "grad_norm": 0.7522590160369873, + "learning_rate": 0.00014111546208058203, + "loss": 2.5688, + "step": 7366 + }, + { + "epoch": 0.594544427406989, + "grad_norm": 0.6915806531906128, + "learning_rate": 0.0001411010707750633, + "loss": 2.5899, + "step": 7367 + }, + { + "epoch": 0.594625131143572, + "grad_norm": 0.7355465292930603, + "learning_rate": 0.00014108667844519844, + "loss": 2.5212, + "step": 7368 + }, + { + "epoch": 0.5947058348801549, + "grad_norm": 0.731002926826477, + "learning_rate": 0.00014107228509134615, + "loss": 2.6369, + "step": 7369 + }, + { + "epoch": 0.5947865386167379, + "grad_norm": 0.6764423251152039, + "learning_rate": 0.0001410578907138652, + "loss": 2.6012, + "step": 7370 + }, + { + "epoch": 0.594867242353321, + "grad_norm": 0.7466071844100952, + "learning_rate": 0.0001410434953131142, + "loss": 2.5822, + "step": 7371 + }, + { + "epoch": 0.594947946089904, + "grad_norm": 0.7276137471199036, + "learning_rate": 0.00014102909888945205, + "loss": 2.6055, + "step": 7372 + }, + { + "epoch": 0.595028649826487, + "grad_norm": 0.7411746978759766, + "learning_rate": 0.00014101470144323752, + "loss": 2.6489, + "step": 7373 + }, + { + "epoch": 0.5951093535630699, + "grad_norm": 0.7511908411979675, + "learning_rate": 0.0001410003029748294, + "loss": 2.6268, + "step": 7374 + }, + { + "epoch": 0.595190057299653, + "grad_norm": 0.6623562574386597, + "learning_rate": 0.0001409859034845866, + "loss": 2.58, + "step": 7375 + }, + { + "epoch": 0.595270761036236, + "grad_norm": 0.6948572397232056, + "learning_rate": 0.00014097150297286785, + "loss": 2.5811, + "step": 7376 + }, + { + "epoch": 0.595351464772819, + "grad_norm": 0.6836786270141602, + "learning_rate": 0.0001409571014400322, + "loss": 2.5861, + "step": 7377 + }, + { + "epoch": 0.595432168509402, + "grad_norm": 0.6644341945648193, + "learning_rate": 0.00014094269888643854, + "loss": 2.6339, + "step": 7378 + }, + { + "epoch": 0.595512872245985, + "grad_norm": 0.6434289813041687, + "learning_rate": 0.0001409282953124458, + "loss": 2.4897, + "step": 7379 + }, + { + "epoch": 0.595593575982568, + "grad_norm": 0.6745082139968872, + "learning_rate": 0.0001409138907184129, + "loss": 2.522, + "step": 7380 + }, + { + "epoch": 0.595674279719151, + "grad_norm": 0.725321352481842, + "learning_rate": 0.0001408994851046989, + "loss": 2.5711, + "step": 7381 + }, + { + "epoch": 0.595754983455734, + "grad_norm": 0.7485500574111938, + "learning_rate": 0.00014088507847166283, + "loss": 2.6095, + "step": 7382 + }, + { + "epoch": 0.595835687192317, + "grad_norm": 0.721125602722168, + "learning_rate": 0.00014087067081966376, + "loss": 2.6762, + "step": 7383 + }, + { + "epoch": 0.5959163909289, + "grad_norm": 0.7099901437759399, + "learning_rate": 0.00014085626214906073, + "loss": 2.5667, + "step": 7384 + }, + { + "epoch": 0.595997094665483, + "grad_norm": 0.6889060139656067, + "learning_rate": 0.00014084185246021283, + "loss": 2.6723, + "step": 7385 + }, + { + "epoch": 0.596077798402066, + "grad_norm": 0.735698938369751, + "learning_rate": 0.00014082744175347923, + "loss": 2.6434, + "step": 7386 + }, + { + "epoch": 0.5961585021386491, + "grad_norm": 0.7603070735931396, + "learning_rate": 0.00014081303002921902, + "loss": 2.665, + "step": 7387 + }, + { + "epoch": 0.596239205875232, + "grad_norm": 0.6786355376243591, + "learning_rate": 0.00014079861728779141, + "loss": 2.5842, + "step": 7388 + }, + { + "epoch": 0.596319909611815, + "grad_norm": 0.6693331003189087, + "learning_rate": 0.00014078420352955565, + "loss": 2.6211, + "step": 7389 + }, + { + "epoch": 0.596400613348398, + "grad_norm": 0.74013751745224, + "learning_rate": 0.0001407697887548709, + "loss": 2.5886, + "step": 7390 + }, + { + "epoch": 0.5964813170849811, + "grad_norm": 0.739507257938385, + "learning_rate": 0.00014075537296409646, + "loss": 2.607, + "step": 7391 + }, + { + "epoch": 0.5965620208215641, + "grad_norm": 0.7121848464012146, + "learning_rate": 0.00014074095615759156, + "loss": 2.6052, + "step": 7392 + }, + { + "epoch": 0.596642724558147, + "grad_norm": 0.7526760697364807, + "learning_rate": 0.00014072653833571556, + "loss": 2.6051, + "step": 7393 + }, + { + "epoch": 0.59672342829473, + "grad_norm": 0.7867496609687805, + "learning_rate": 0.00014071211949882777, + "loss": 2.6228, + "step": 7394 + }, + { + "epoch": 0.596804132031313, + "grad_norm": 0.7527757883071899, + "learning_rate": 0.00014069769964728752, + "loss": 2.6793, + "step": 7395 + }, + { + "epoch": 0.5968848357678961, + "grad_norm": 0.7096899747848511, + "learning_rate": 0.00014068327878145423, + "loss": 2.5207, + "step": 7396 + }, + { + "epoch": 0.5969655395044791, + "grad_norm": 0.6863983869552612, + "learning_rate": 0.00014066885690168726, + "loss": 2.7059, + "step": 7397 + }, + { + "epoch": 0.597046243241062, + "grad_norm": 0.7782251834869385, + "learning_rate": 0.0001406544340083461, + "loss": 2.6232, + "step": 7398 + }, + { + "epoch": 0.597126946977645, + "grad_norm": 0.6944136619567871, + "learning_rate": 0.00014064001010179013, + "loss": 2.6134, + "step": 7399 + }, + { + "epoch": 0.5972076507142281, + "grad_norm": 0.7629704475402832, + "learning_rate": 0.00014062558518237892, + "loss": 2.5358, + "step": 7400 + }, + { + "epoch": 0.5972883544508111, + "grad_norm": 0.6922330260276794, + "learning_rate": 0.0001406111592504719, + "loss": 2.5457, + "step": 7401 + }, + { + "epoch": 0.597369058187394, + "grad_norm": 0.6992952227592468, + "learning_rate": 0.00014059673230642865, + "loss": 2.6241, + "step": 7402 + }, + { + "epoch": 0.597449761923977, + "grad_norm": 0.6587642431259155, + "learning_rate": 0.0001405823043506087, + "loss": 2.5867, + "step": 7403 + }, + { + "epoch": 0.5975304656605601, + "grad_norm": 0.6993013024330139, + "learning_rate": 0.00014056787538337164, + "loss": 2.6194, + "step": 7404 + }, + { + "epoch": 0.5976111693971431, + "grad_norm": 0.7605414986610413, + "learning_rate": 0.0001405534454050771, + "loss": 2.607, + "step": 7405 + }, + { + "epoch": 0.5976918731337261, + "grad_norm": 0.6624562740325928, + "learning_rate": 0.00014053901441608466, + "loss": 2.5962, + "step": 7406 + }, + { + "epoch": 0.597772576870309, + "grad_norm": 0.7432621717453003, + "learning_rate": 0.000140524582416754, + "loss": 2.6434, + "step": 7407 + }, + { + "epoch": 0.5978532806068921, + "grad_norm": 0.7184053659439087, + "learning_rate": 0.00014051014940744488, + "loss": 2.6139, + "step": 7408 + }, + { + "epoch": 0.5979339843434751, + "grad_norm": 0.7567455768585205, + "learning_rate": 0.00014049571538851687, + "loss": 2.5788, + "step": 7409 + }, + { + "epoch": 0.5980146880800581, + "grad_norm": 0.6759883761405945, + "learning_rate": 0.00014048128036032984, + "loss": 2.5584, + "step": 7410 + }, + { + "epoch": 0.5980953918166411, + "grad_norm": 0.7607424855232239, + "learning_rate": 0.00014046684432324343, + "loss": 2.5675, + "step": 7411 + }, + { + "epoch": 0.5981760955532242, + "grad_norm": 0.7134036421775818, + "learning_rate": 0.00014045240727761748, + "loss": 2.6805, + "step": 7412 + }, + { + "epoch": 0.5982567992898071, + "grad_norm": 0.6996984481811523, + "learning_rate": 0.00014043796922381184, + "loss": 2.5874, + "step": 7413 + }, + { + "epoch": 0.5983375030263901, + "grad_norm": 0.7098252177238464, + "learning_rate": 0.00014042353016218627, + "loss": 2.5895, + "step": 7414 + }, + { + "epoch": 0.5984182067629731, + "grad_norm": 0.7160520553588867, + "learning_rate": 0.00014040909009310068, + "loss": 2.6042, + "step": 7415 + }, + { + "epoch": 0.5984989104995562, + "grad_norm": 0.6727281212806702, + "learning_rate": 0.00014039464901691493, + "loss": 2.5356, + "step": 7416 + }, + { + "epoch": 0.5985796142361391, + "grad_norm": 0.7052881717681885, + "learning_rate": 0.00014038020693398891, + "loss": 2.6093, + "step": 7417 + }, + { + "epoch": 0.5986603179727221, + "grad_norm": 0.7151781916618347, + "learning_rate": 0.00014036576384468262, + "loss": 2.5776, + "step": 7418 + }, + { + "epoch": 0.5987410217093051, + "grad_norm": 0.7376574873924255, + "learning_rate": 0.0001403513197493559, + "loss": 2.6246, + "step": 7419 + }, + { + "epoch": 0.5988217254458882, + "grad_norm": 0.6882135272026062, + "learning_rate": 0.00014033687464836892, + "loss": 2.6028, + "step": 7420 + }, + { + "epoch": 0.5989024291824712, + "grad_norm": 0.6603999137878418, + "learning_rate": 0.00014032242854208153, + "loss": 2.5897, + "step": 7421 + }, + { + "epoch": 0.5989831329190541, + "grad_norm": 0.7001559734344482, + "learning_rate": 0.0001403079814308538, + "loss": 2.6033, + "step": 7422 + }, + { + "epoch": 0.5990638366556371, + "grad_norm": 0.7184363603591919, + "learning_rate": 0.00014029353331504582, + "loss": 2.7464, + "step": 7423 + }, + { + "epoch": 0.5991445403922202, + "grad_norm": 0.6794769167900085, + "learning_rate": 0.00014027908419501767, + "loss": 2.569, + "step": 7424 + }, + { + "epoch": 0.5992252441288032, + "grad_norm": 0.6846041083335876, + "learning_rate": 0.00014026463407112942, + "loss": 2.5995, + "step": 7425 + }, + { + "epoch": 0.5993059478653862, + "grad_norm": 0.6539658308029175, + "learning_rate": 0.00014025018294374129, + "loss": 2.5749, + "step": 7426 + }, + { + "epoch": 0.5993866516019691, + "grad_norm": 0.6572301983833313, + "learning_rate": 0.00014023573081321336, + "loss": 2.5312, + "step": 7427 + }, + { + "epoch": 0.5994673553385522, + "grad_norm": 0.7010765671730042, + "learning_rate": 0.00014022127767990581, + "loss": 2.5088, + "step": 7428 + }, + { + "epoch": 0.5995480590751352, + "grad_norm": 0.7193396091461182, + "learning_rate": 0.0001402068235441789, + "loss": 2.6193, + "step": 7429 + }, + { + "epoch": 0.5996287628117182, + "grad_norm": 0.6928533315658569, + "learning_rate": 0.00014019236840639288, + "loss": 2.6149, + "step": 7430 + }, + { + "epoch": 0.5997094665483012, + "grad_norm": 0.743658185005188, + "learning_rate": 0.00014017791226690794, + "loss": 2.5466, + "step": 7431 + }, + { + "epoch": 0.5997901702848842, + "grad_norm": 0.752082347869873, + "learning_rate": 0.0001401634551260844, + "loss": 2.6605, + "step": 7432 + }, + { + "epoch": 0.5998708740214672, + "grad_norm": 0.7280415296554565, + "learning_rate": 0.00014014899698428255, + "loss": 2.6128, + "step": 7433 + }, + { + "epoch": 0.5999515777580502, + "grad_norm": 0.7037710547447205, + "learning_rate": 0.0001401345378418628, + "loss": 2.6157, + "step": 7434 + }, + { + "epoch": 0.6000322814946332, + "grad_norm": 0.6984395980834961, + "learning_rate": 0.00014012007769918542, + "loss": 2.5579, + "step": 7435 + }, + { + "epoch": 0.6001129852312163, + "grad_norm": 0.6853601336479187, + "learning_rate": 0.00014010561655661085, + "loss": 2.6316, + "step": 7436 + }, + { + "epoch": 0.6001936889677992, + "grad_norm": 0.7551750540733337, + "learning_rate": 0.00014009115441449948, + "loss": 2.6671, + "step": 7437 + }, + { + "epoch": 0.6002743927043822, + "grad_norm": 0.7680155038833618, + "learning_rate": 0.0001400766912732117, + "loss": 2.6301, + "step": 7438 + }, + { + "epoch": 0.6003550964409652, + "grad_norm": 0.6757175922393799, + "learning_rate": 0.00014006222713310807, + "loss": 2.5584, + "step": 7439 + }, + { + "epoch": 0.6004358001775483, + "grad_norm": 0.6636163592338562, + "learning_rate": 0.00014004776199454897, + "loss": 2.5437, + "step": 7440 + }, + { + "epoch": 0.6005165039141312, + "grad_norm": 0.7317774891853333, + "learning_rate": 0.00014003329585789498, + "loss": 2.594, + "step": 7441 + }, + { + "epoch": 0.6005972076507142, + "grad_norm": 0.6903451681137085, + "learning_rate": 0.0001400188287235066, + "loss": 2.6175, + "step": 7442 + }, + { + "epoch": 0.6006779113872972, + "grad_norm": 0.7137858867645264, + "learning_rate": 0.00014000436059174437, + "loss": 2.6411, + "step": 7443 + }, + { + "epoch": 0.6007586151238803, + "grad_norm": 0.7124149203300476, + "learning_rate": 0.00013998989146296893, + "loss": 2.6562, + "step": 7444 + }, + { + "epoch": 0.6008393188604633, + "grad_norm": 0.7518175840377808, + "learning_rate": 0.00013997542133754087, + "loss": 2.6213, + "step": 7445 + }, + { + "epoch": 0.6009200225970462, + "grad_norm": 0.6843053698539734, + "learning_rate": 0.0001399609502158208, + "loss": 2.6099, + "step": 7446 + }, + { + "epoch": 0.6010007263336292, + "grad_norm": 0.6668025255203247, + "learning_rate": 0.0001399464780981694, + "loss": 2.609, + "step": 7447 + }, + { + "epoch": 0.6010814300702122, + "grad_norm": 0.6849119067192078, + "learning_rate": 0.00013993200498494735, + "loss": 2.6097, + "step": 7448 + }, + { + "epoch": 0.6011621338067953, + "grad_norm": 0.7767381072044373, + "learning_rate": 0.0001399175308765153, + "loss": 2.6351, + "step": 7449 + }, + { + "epoch": 0.6012428375433783, + "grad_norm": 0.6630256772041321, + "learning_rate": 0.0001399030557732341, + "loss": 2.5924, + "step": 7450 + }, + { + "epoch": 0.6013235412799612, + "grad_norm": 0.6918755769729614, + "learning_rate": 0.00013988857967546444, + "loss": 2.6205, + "step": 7451 + }, + { + "epoch": 0.6014042450165442, + "grad_norm": 0.7179181575775146, + "learning_rate": 0.00013987410258356708, + "loss": 2.5971, + "step": 7452 + }, + { + "epoch": 0.6014849487531273, + "grad_norm": 0.7233672738075256, + "learning_rate": 0.00013985962449790284, + "loss": 2.595, + "step": 7453 + }, + { + "epoch": 0.6015656524897103, + "grad_norm": 0.6861593127250671, + "learning_rate": 0.0001398451454188326, + "loss": 2.6127, + "step": 7454 + }, + { + "epoch": 0.6016463562262933, + "grad_norm": 0.6818981170654297, + "learning_rate": 0.00013983066534671714, + "loss": 2.5923, + "step": 7455 + }, + { + "epoch": 0.6017270599628762, + "grad_norm": 0.700036346912384, + "learning_rate": 0.0001398161842819174, + "loss": 2.5474, + "step": 7456 + }, + { + "epoch": 0.6018077636994593, + "grad_norm": 0.6884824633598328, + "learning_rate": 0.00013980170222479426, + "loss": 2.6041, + "step": 7457 + }, + { + "epoch": 0.6018884674360423, + "grad_norm": 0.6745120286941528, + "learning_rate": 0.00013978721917570866, + "loss": 2.6638, + "step": 7458 + }, + { + "epoch": 0.6019691711726253, + "grad_norm": 0.6886256337165833, + "learning_rate": 0.00013977273513502157, + "loss": 2.5733, + "step": 7459 + }, + { + "epoch": 0.6020498749092082, + "grad_norm": 0.7220930457115173, + "learning_rate": 0.00013975825010309394, + "loss": 2.5739, + "step": 7460 + }, + { + "epoch": 0.6021305786457913, + "grad_norm": 0.7281780242919922, + "learning_rate": 0.0001397437640802868, + "loss": 2.5646, + "step": 7461 + }, + { + "epoch": 0.6022112823823743, + "grad_norm": 0.7316896915435791, + "learning_rate": 0.00013972927706696115, + "loss": 2.6532, + "step": 7462 + }, + { + "epoch": 0.6022919861189573, + "grad_norm": 0.6288646459579468, + "learning_rate": 0.00013971478906347806, + "loss": 2.5753, + "step": 7463 + }, + { + "epoch": 0.6023726898555403, + "grad_norm": 0.7110145688056946, + "learning_rate": 0.00013970030007019862, + "loss": 2.6421, + "step": 7464 + }, + { + "epoch": 0.6024533935921234, + "grad_norm": 0.7437754273414612, + "learning_rate": 0.00013968581008748393, + "loss": 2.585, + "step": 7465 + }, + { + "epoch": 0.6025340973287063, + "grad_norm": 0.6839718222618103, + "learning_rate": 0.00013967131911569514, + "loss": 2.6249, + "step": 7466 + }, + { + "epoch": 0.6026148010652893, + "grad_norm": 0.7358397841453552, + "learning_rate": 0.00013965682715519332, + "loss": 2.597, + "step": 7467 + }, + { + "epoch": 0.6026955048018723, + "grad_norm": 0.673651397228241, + "learning_rate": 0.00013964233420633973, + "loss": 2.6111, + "step": 7468 + }, + { + "epoch": 0.6027762085384554, + "grad_norm": 0.7390083074569702, + "learning_rate": 0.00013962784026949553, + "loss": 2.6131, + "step": 7469 + }, + { + "epoch": 0.6028569122750383, + "grad_norm": 0.6902220249176025, + "learning_rate": 0.00013961334534502197, + "loss": 2.6116, + "step": 7470 + }, + { + "epoch": 0.6029376160116213, + "grad_norm": 0.6946651935577393, + "learning_rate": 0.00013959884943328033, + "loss": 2.6307, + "step": 7471 + }, + { + "epoch": 0.6030183197482043, + "grad_norm": 0.7277294993400574, + "learning_rate": 0.00013958435253463183, + "loss": 2.6065, + "step": 7472 + }, + { + "epoch": 0.6030990234847874, + "grad_norm": 0.743833601474762, + "learning_rate": 0.00013956985464943776, + "loss": 2.6644, + "step": 7473 + }, + { + "epoch": 0.6031797272213704, + "grad_norm": 0.6480288505554199, + "learning_rate": 0.0001395553557780595, + "loss": 2.5386, + "step": 7474 + }, + { + "epoch": 0.6032604309579533, + "grad_norm": 0.799443781375885, + "learning_rate": 0.00013954085592085834, + "loss": 2.5653, + "step": 7475 + }, + { + "epoch": 0.6033411346945363, + "grad_norm": 0.6790705323219299, + "learning_rate": 0.00013952635507819575, + "loss": 2.6229, + "step": 7476 + }, + { + "epoch": 0.6034218384311194, + "grad_norm": 0.6871588826179504, + "learning_rate": 0.00013951185325043302, + "loss": 2.6514, + "step": 7477 + }, + { + "epoch": 0.6035025421677024, + "grad_norm": 0.7236921787261963, + "learning_rate": 0.00013949735043793164, + "loss": 2.5931, + "step": 7478 + }, + { + "epoch": 0.6035832459042854, + "grad_norm": 0.6888518929481506, + "learning_rate": 0.00013948284664105305, + "loss": 2.6408, + "step": 7479 + }, + { + "epoch": 0.6036639496408683, + "grad_norm": 0.7292625904083252, + "learning_rate": 0.00013946834186015868, + "loss": 2.5829, + "step": 7480 + }, + { + "epoch": 0.6037446533774514, + "grad_norm": 0.6755293607711792, + "learning_rate": 0.00013945383609561009, + "loss": 2.5917, + "step": 7481 + }, + { + "epoch": 0.6038253571140344, + "grad_norm": 0.6808032989501953, + "learning_rate": 0.00013943932934776877, + "loss": 2.6103, + "step": 7482 + }, + { + "epoch": 0.6039060608506174, + "grad_norm": 0.747173547744751, + "learning_rate": 0.00013942482161699625, + "loss": 2.624, + "step": 7483 + }, + { + "epoch": 0.6039867645872004, + "grad_norm": 0.7265594005584717, + "learning_rate": 0.00013941031290365413, + "loss": 2.5672, + "step": 7484 + }, + { + "epoch": 0.6040674683237834, + "grad_norm": 0.6434060335159302, + "learning_rate": 0.000139395803208104, + "loss": 2.5885, + "step": 7485 + }, + { + "epoch": 0.6041481720603664, + "grad_norm": 0.7148730754852295, + "learning_rate": 0.00013938129253070747, + "loss": 2.6466, + "step": 7486 + }, + { + "epoch": 0.6042288757969494, + "grad_norm": 0.7724708318710327, + "learning_rate": 0.00013936678087182616, + "loss": 2.6364, + "step": 7487 + }, + { + "epoch": 0.6043095795335324, + "grad_norm": 0.6886702179908752, + "learning_rate": 0.0001393522682318218, + "loss": 2.5844, + "step": 7488 + }, + { + "epoch": 0.6043902832701155, + "grad_norm": 0.6501082181930542, + "learning_rate": 0.00013933775461105603, + "loss": 2.5767, + "step": 7489 + }, + { + "epoch": 0.6044709870066984, + "grad_norm": 0.7333959341049194, + "learning_rate": 0.00013932324000989058, + "loss": 2.5735, + "step": 7490 + }, + { + "epoch": 0.6045516907432814, + "grad_norm": 0.7057361602783203, + "learning_rate": 0.00013930872442868722, + "loss": 2.627, + "step": 7491 + }, + { + "epoch": 0.6046323944798644, + "grad_norm": 0.705078661441803, + "learning_rate": 0.00013929420786780767, + "loss": 2.6012, + "step": 7492 + }, + { + "epoch": 0.6047130982164475, + "grad_norm": 0.7192156314849854, + "learning_rate": 0.00013927969032761378, + "loss": 2.5594, + "step": 7493 + }, + { + "epoch": 0.6047938019530305, + "grad_norm": 0.703116774559021, + "learning_rate": 0.00013926517180846726, + "loss": 2.6099, + "step": 7494 + }, + { + "epoch": 0.6048745056896134, + "grad_norm": 0.6970264315605164, + "learning_rate": 0.00013925065231073006, + "loss": 2.5832, + "step": 7495 + }, + { + "epoch": 0.6049552094261964, + "grad_norm": 0.7308031320571899, + "learning_rate": 0.00013923613183476402, + "loss": 2.586, + "step": 7496 + }, + { + "epoch": 0.6050359131627794, + "grad_norm": 0.7212777137756348, + "learning_rate": 0.00013922161038093097, + "loss": 2.6374, + "step": 7497 + }, + { + "epoch": 0.6051166168993625, + "grad_norm": 0.6644641757011414, + "learning_rate": 0.0001392070879495929, + "loss": 2.5226, + "step": 7498 + }, + { + "epoch": 0.6051973206359454, + "grad_norm": 0.6683016419410706, + "learning_rate": 0.0001391925645411117, + "loss": 2.5279, + "step": 7499 + }, + { + "epoch": 0.6052780243725284, + "grad_norm": 0.7341439127922058, + "learning_rate": 0.00013917804015584932, + "loss": 2.5995, + "step": 7500 + }, + { + "epoch": 0.6053587281091114, + "grad_norm": 0.753942608833313, + "learning_rate": 0.0001391635147941678, + "loss": 2.5706, + "step": 7501 + }, + { + "epoch": 0.6054394318456945, + "grad_norm": 0.7541958093643188, + "learning_rate": 0.00013914898845642908, + "loss": 2.6365, + "step": 7502 + }, + { + "epoch": 0.6055201355822775, + "grad_norm": 0.6583349108695984, + "learning_rate": 0.00013913446114299528, + "loss": 2.534, + "step": 7503 + }, + { + "epoch": 0.6056008393188604, + "grad_norm": 0.6545756459236145, + "learning_rate": 0.00013911993285422835, + "loss": 2.5443, + "step": 7504 + }, + { + "epoch": 0.6056815430554434, + "grad_norm": 0.8290210366249084, + "learning_rate": 0.00013910540359049045, + "loss": 2.6196, + "step": 7505 + }, + { + "epoch": 0.6057622467920265, + "grad_norm": 0.7032577395439148, + "learning_rate": 0.0001390908733521437, + "loss": 2.6575, + "step": 7506 + }, + { + "epoch": 0.6058429505286095, + "grad_norm": 0.7018071413040161, + "learning_rate": 0.0001390763421395502, + "loss": 2.6272, + "step": 7507 + }, + { + "epoch": 0.6059236542651925, + "grad_norm": 0.6288552284240723, + "learning_rate": 0.00013906180995307206, + "loss": 2.5295, + "step": 7508 + }, + { + "epoch": 0.6060043580017754, + "grad_norm": 0.7013774514198303, + "learning_rate": 0.00013904727679307153, + "loss": 2.5669, + "step": 7509 + }, + { + "epoch": 0.6060850617383585, + "grad_norm": 0.6811630129814148, + "learning_rate": 0.00013903274265991082, + "loss": 2.5827, + "step": 7510 + }, + { + "epoch": 0.6061657654749415, + "grad_norm": 0.6690269112586975, + "learning_rate": 0.0001390182075539521, + "loss": 2.5947, + "step": 7511 + }, + { + "epoch": 0.6062464692115245, + "grad_norm": 0.6946289539337158, + "learning_rate": 0.00013900367147555768, + "loss": 2.59, + "step": 7512 + }, + { + "epoch": 0.6063271729481075, + "grad_norm": 0.7302843332290649, + "learning_rate": 0.0001389891344250898, + "loss": 2.5994, + "step": 7513 + }, + { + "epoch": 0.6064078766846905, + "grad_norm": 0.7462306022644043, + "learning_rate": 0.00013897459640291074, + "loss": 2.5983, + "step": 7514 + }, + { + "epoch": 0.6064885804212735, + "grad_norm": 0.6948123574256897, + "learning_rate": 0.0001389600574093829, + "loss": 2.5737, + "step": 7515 + }, + { + "epoch": 0.6065692841578565, + "grad_norm": 0.6897372007369995, + "learning_rate": 0.00013894551744486857, + "loss": 2.607, + "step": 7516 + }, + { + "epoch": 0.6066499878944395, + "grad_norm": 0.6808069348335266, + "learning_rate": 0.00013893097650973015, + "loss": 2.5712, + "step": 7517 + }, + { + "epoch": 0.6067306916310226, + "grad_norm": 0.7000731229782104, + "learning_rate": 0.00013891643460433, + "loss": 2.5654, + "step": 7518 + }, + { + "epoch": 0.6068113953676055, + "grad_norm": 0.7197545766830444, + "learning_rate": 0.0001389018917290306, + "loss": 2.5705, + "step": 7519 + }, + { + "epoch": 0.6068920991041885, + "grad_norm": 0.7001069188117981, + "learning_rate": 0.00013888734788419433, + "loss": 2.5934, + "step": 7520 + }, + { + "epoch": 0.6069728028407715, + "grad_norm": 0.7480459213256836, + "learning_rate": 0.00013887280307018377, + "loss": 2.5211, + "step": 7521 + }, + { + "epoch": 0.6070535065773546, + "grad_norm": 0.6913945078849792, + "learning_rate": 0.00013885825728736132, + "loss": 2.6013, + "step": 7522 + }, + { + "epoch": 0.6071342103139376, + "grad_norm": 0.6527336239814758, + "learning_rate": 0.00013884371053608948, + "loss": 2.5901, + "step": 7523 + }, + { + "epoch": 0.6072149140505205, + "grad_norm": 0.6897335052490234, + "learning_rate": 0.00013882916281673086, + "loss": 2.5389, + "step": 7524 + }, + { + "epoch": 0.6072956177871035, + "grad_norm": 0.7159501910209656, + "learning_rate": 0.00013881461412964798, + "loss": 2.5399, + "step": 7525 + }, + { + "epoch": 0.6073763215236866, + "grad_norm": 0.6744364500045776, + "learning_rate": 0.00013880006447520346, + "loss": 2.5658, + "step": 7526 + }, + { + "epoch": 0.6074570252602696, + "grad_norm": 0.819950520992279, + "learning_rate": 0.00013878551385375994, + "loss": 2.6143, + "step": 7527 + }, + { + "epoch": 0.6075377289968525, + "grad_norm": 0.744293212890625, + "learning_rate": 0.00013877096226568, + "loss": 2.6565, + "step": 7528 + }, + { + "epoch": 0.6076184327334355, + "grad_norm": 0.7121254205703735, + "learning_rate": 0.00013875640971132636, + "loss": 2.6151, + "step": 7529 + }, + { + "epoch": 0.6076991364700186, + "grad_norm": 0.7616204023361206, + "learning_rate": 0.00013874185619106163, + "loss": 2.6395, + "step": 7530 + }, + { + "epoch": 0.6077798402066016, + "grad_norm": 0.7481076121330261, + "learning_rate": 0.0001387273017052486, + "loss": 2.597, + "step": 7531 + }, + { + "epoch": 0.6078605439431846, + "grad_norm": 0.6660816073417664, + "learning_rate": 0.00013871274625425, + "loss": 2.5696, + "step": 7532 + }, + { + "epoch": 0.6079412476797675, + "grad_norm": 0.7491411566734314, + "learning_rate": 0.00013869818983842854, + "loss": 2.552, + "step": 7533 + }, + { + "epoch": 0.6080219514163506, + "grad_norm": 0.7130792140960693, + "learning_rate": 0.00013868363245814704, + "loss": 2.5959, + "step": 7534 + }, + { + "epoch": 0.6081026551529336, + "grad_norm": 0.7157341241836548, + "learning_rate": 0.00013866907411376827, + "loss": 2.5598, + "step": 7535 + }, + { + "epoch": 0.6081833588895166, + "grad_norm": 0.7750656008720398, + "learning_rate": 0.00013865451480565513, + "loss": 2.6217, + "step": 7536 + }, + { + "epoch": 0.6082640626260996, + "grad_norm": 0.6915080547332764, + "learning_rate": 0.00013863995453417043, + "loss": 2.6211, + "step": 7537 + }, + { + "epoch": 0.6083447663626826, + "grad_norm": 0.7245940566062927, + "learning_rate": 0.00013862539329967706, + "loss": 2.5619, + "step": 7538 + }, + { + "epoch": 0.6084254700992656, + "grad_norm": 0.8884119391441345, + "learning_rate": 0.0001386108311025379, + "loss": 2.6349, + "step": 7539 + }, + { + "epoch": 0.6085061738358486, + "grad_norm": 0.7889477610588074, + "learning_rate": 0.0001385962679431159, + "loss": 2.6169, + "step": 7540 + }, + { + "epoch": 0.6085868775724316, + "grad_norm": 0.7187505960464478, + "learning_rate": 0.00013858170382177403, + "loss": 2.5582, + "step": 7541 + }, + { + "epoch": 0.6086675813090147, + "grad_norm": 0.7502198219299316, + "learning_rate": 0.00013856713873887526, + "loss": 2.5418, + "step": 7542 + }, + { + "epoch": 0.6087482850455976, + "grad_norm": 0.797704815864563, + "learning_rate": 0.00013855257269478256, + "loss": 2.5764, + "step": 7543 + }, + { + "epoch": 0.6088289887821806, + "grad_norm": 0.7651431560516357, + "learning_rate": 0.00013853800568985896, + "loss": 2.5995, + "step": 7544 + }, + { + "epoch": 0.6089096925187636, + "grad_norm": 0.7048482298851013, + "learning_rate": 0.00013852343772446753, + "loss": 2.5656, + "step": 7545 + }, + { + "epoch": 0.6089903962553467, + "grad_norm": 0.7252251505851746, + "learning_rate": 0.00013850886879897135, + "loss": 2.6509, + "step": 7546 + }, + { + "epoch": 0.6090710999919297, + "grad_norm": 0.7220067381858826, + "learning_rate": 0.00013849429891373344, + "loss": 2.5558, + "step": 7547 + }, + { + "epoch": 0.6091518037285126, + "grad_norm": 0.7672600746154785, + "learning_rate": 0.000138479728069117, + "loss": 2.5682, + "step": 7548 + }, + { + "epoch": 0.6092325074650956, + "grad_norm": 0.7753601670265198, + "learning_rate": 0.0001384651562654852, + "loss": 2.6459, + "step": 7549 + }, + { + "epoch": 0.6093132112016786, + "grad_norm": 0.7346559166908264, + "learning_rate": 0.00013845058350320108, + "loss": 2.5988, + "step": 7550 + }, + { + "epoch": 0.6093939149382617, + "grad_norm": 0.7386072874069214, + "learning_rate": 0.00013843600978262797, + "loss": 2.6366, + "step": 7551 + }, + { + "epoch": 0.6094746186748446, + "grad_norm": 0.7114188075065613, + "learning_rate": 0.00013842143510412898, + "loss": 2.5515, + "step": 7552 + }, + { + "epoch": 0.6095553224114276, + "grad_norm": 0.6836373209953308, + "learning_rate": 0.00013840685946806742, + "loss": 2.6301, + "step": 7553 + }, + { + "epoch": 0.6096360261480106, + "grad_norm": 0.7548927068710327, + "learning_rate": 0.00013839228287480652, + "loss": 2.6508, + "step": 7554 + }, + { + "epoch": 0.6097167298845937, + "grad_norm": 0.6931679248809814, + "learning_rate": 0.00013837770532470957, + "loss": 2.5535, + "step": 7555 + }, + { + "epoch": 0.6097974336211767, + "grad_norm": 0.7621145248413086, + "learning_rate": 0.00013836312681813988, + "loss": 2.6831, + "step": 7556 + }, + { + "epoch": 0.6098781373577596, + "grad_norm": 0.6735427975654602, + "learning_rate": 0.00013834854735546079, + "loss": 2.5338, + "step": 7557 + }, + { + "epoch": 0.6099588410943426, + "grad_norm": 0.7157600522041321, + "learning_rate": 0.00013833396693703565, + "loss": 2.5713, + "step": 7558 + }, + { + "epoch": 0.6100395448309257, + "grad_norm": 0.718032956123352, + "learning_rate": 0.00013831938556322789, + "loss": 2.5625, + "step": 7559 + }, + { + "epoch": 0.6101202485675087, + "grad_norm": 0.7290309071540833, + "learning_rate": 0.0001383048032344008, + "loss": 2.5956, + "step": 7560 + }, + { + "epoch": 0.6102009523040917, + "grad_norm": 0.675470769405365, + "learning_rate": 0.00013829021995091792, + "loss": 2.6053, + "step": 7561 + }, + { + "epoch": 0.6102816560406746, + "grad_norm": 0.7348767518997192, + "learning_rate": 0.00013827563571314268, + "loss": 2.6174, + "step": 7562 + }, + { + "epoch": 0.6103623597772577, + "grad_norm": 0.64495849609375, + "learning_rate": 0.00013826105052143852, + "loss": 2.5923, + "step": 7563 + }, + { + "epoch": 0.6104430635138407, + "grad_norm": 0.7379264235496521, + "learning_rate": 0.000138246464376169, + "loss": 2.6438, + "step": 7564 + }, + { + "epoch": 0.6105237672504237, + "grad_norm": 0.7802134156227112, + "learning_rate": 0.00013823187727769756, + "loss": 2.5884, + "step": 7565 + }, + { + "epoch": 0.6106044709870067, + "grad_norm": 0.6907222867012024, + "learning_rate": 0.00013821728922638782, + "loss": 2.596, + "step": 7566 + }, + { + "epoch": 0.6106851747235897, + "grad_norm": 0.6924182176589966, + "learning_rate": 0.00013820270022260335, + "loss": 2.5631, + "step": 7567 + }, + { + "epoch": 0.6107658784601727, + "grad_norm": 0.729258120059967, + "learning_rate": 0.0001381881102667077, + "loss": 2.5761, + "step": 7568 + }, + { + "epoch": 0.6108465821967557, + "grad_norm": 0.7141425013542175, + "learning_rate": 0.00013817351935906455, + "loss": 2.6214, + "step": 7569 + }, + { + "epoch": 0.6109272859333387, + "grad_norm": 0.7564505338668823, + "learning_rate": 0.00013815892750003748, + "loss": 2.6338, + "step": 7570 + }, + { + "epoch": 0.6110079896699218, + "grad_norm": 0.674705982208252, + "learning_rate": 0.00013814433468999022, + "loss": 2.5604, + "step": 7571 + }, + { + "epoch": 0.6110886934065047, + "grad_norm": 0.6956657767295837, + "learning_rate": 0.00013812974092928642, + "loss": 2.5805, + "step": 7572 + }, + { + "epoch": 0.6111693971430877, + "grad_norm": 0.7393823862075806, + "learning_rate": 0.0001381151462182898, + "loss": 2.6312, + "step": 7573 + }, + { + "epoch": 0.6112501008796707, + "grad_norm": 0.7048184275627136, + "learning_rate": 0.00013810055055736407, + "loss": 2.5948, + "step": 7574 + }, + { + "epoch": 0.6113308046162538, + "grad_norm": 0.748798668384552, + "learning_rate": 0.0001380859539468731, + "loss": 2.5815, + "step": 7575 + }, + { + "epoch": 0.6114115083528368, + "grad_norm": 0.7146531343460083, + "learning_rate": 0.00013807135638718048, + "loss": 2.5803, + "step": 7576 + }, + { + "epoch": 0.6114922120894197, + "grad_norm": 0.6883770823478699, + "learning_rate": 0.00013805675787865025, + "loss": 2.6005, + "step": 7577 + }, + { + "epoch": 0.6115729158260027, + "grad_norm": 0.7808375358581543, + "learning_rate": 0.0001380421584216461, + "loss": 2.6539, + "step": 7578 + }, + { + "epoch": 0.6116536195625858, + "grad_norm": 0.6919417977333069, + "learning_rate": 0.00013802755801653192, + "loss": 2.5812, + "step": 7579 + }, + { + "epoch": 0.6117343232991688, + "grad_norm": 0.6651085615158081, + "learning_rate": 0.0001380129566636716, + "loss": 2.5952, + "step": 7580 + }, + { + "epoch": 0.6118150270357517, + "grad_norm": 0.7806586623191833, + "learning_rate": 0.00013799835436342897, + "loss": 2.6509, + "step": 7581 + }, + { + "epoch": 0.6118957307723347, + "grad_norm": 0.6522969007492065, + "learning_rate": 0.0001379837511161681, + "loss": 2.606, + "step": 7582 + }, + { + "epoch": 0.6119764345089178, + "grad_norm": 0.7566540837287903, + "learning_rate": 0.0001379691469222528, + "loss": 2.6625, + "step": 7583 + }, + { + "epoch": 0.6120571382455008, + "grad_norm": 0.7126421928405762, + "learning_rate": 0.00013795454178204715, + "loss": 2.6396, + "step": 7584 + }, + { + "epoch": 0.6121378419820838, + "grad_norm": 0.6534276008605957, + "learning_rate": 0.0001379399356959151, + "loss": 2.5841, + "step": 7585 + }, + { + "epoch": 0.6122185457186667, + "grad_norm": 0.7663385272026062, + "learning_rate": 0.00013792532866422065, + "loss": 2.6685, + "step": 7586 + }, + { + "epoch": 0.6122992494552498, + "grad_norm": 0.6971656084060669, + "learning_rate": 0.0001379107206873279, + "loss": 2.6036, + "step": 7587 + }, + { + "epoch": 0.6123799531918328, + "grad_norm": 0.6807122230529785, + "learning_rate": 0.00013789611176560088, + "loss": 2.6499, + "step": 7588 + }, + { + "epoch": 0.6124606569284158, + "grad_norm": 0.6712431311607361, + "learning_rate": 0.0001378815018994037, + "loss": 2.6725, + "step": 7589 + }, + { + "epoch": 0.6125413606649988, + "grad_norm": 0.6986604928970337, + "learning_rate": 0.00013786689108910045, + "loss": 2.6159, + "step": 7590 + }, + { + "epoch": 0.6126220644015818, + "grad_norm": 0.7004108428955078, + "learning_rate": 0.0001378522793350553, + "loss": 2.5743, + "step": 7591 + }, + { + "epoch": 0.6127027681381648, + "grad_norm": 0.6782098412513733, + "learning_rate": 0.00013783766663763239, + "loss": 2.5776, + "step": 7592 + }, + { + "epoch": 0.6127834718747478, + "grad_norm": 0.6697036027908325, + "learning_rate": 0.00013782305299719593, + "loss": 2.6195, + "step": 7593 + }, + { + "epoch": 0.6128641756113308, + "grad_norm": 0.6894395351409912, + "learning_rate": 0.00013780843841411014, + "loss": 2.662, + "step": 7594 + }, + { + "epoch": 0.6129448793479139, + "grad_norm": 0.6775636672973633, + "learning_rate": 0.00013779382288873918, + "loss": 2.6083, + "step": 7595 + }, + { + "epoch": 0.6130255830844968, + "grad_norm": 0.7143577337265015, + "learning_rate": 0.00013777920642144738, + "loss": 2.581, + "step": 7596 + }, + { + "epoch": 0.6131062868210798, + "grad_norm": 0.6143797636032104, + "learning_rate": 0.00013776458901259905, + "loss": 2.541, + "step": 7597 + }, + { + "epoch": 0.6131869905576628, + "grad_norm": 0.7003727555274963, + "learning_rate": 0.00013774997066255839, + "loss": 2.5748, + "step": 7598 + }, + { + "epoch": 0.6132676942942458, + "grad_norm": 0.6796504259109497, + "learning_rate": 0.0001377353513716898, + "loss": 2.596, + "step": 7599 + }, + { + "epoch": 0.6133483980308289, + "grad_norm": 0.7011274695396423, + "learning_rate": 0.00013772073114035762, + "loss": 2.5318, + "step": 7600 + }, + { + "epoch": 0.6134291017674118, + "grad_norm": 0.6584382057189941, + "learning_rate": 0.0001377061099689262, + "loss": 2.5793, + "step": 7601 + }, + { + "epoch": 0.6135098055039948, + "grad_norm": 0.6586211919784546, + "learning_rate": 0.00013769148785775995, + "loss": 2.5969, + "step": 7602 + }, + { + "epoch": 0.6135905092405778, + "grad_norm": 0.7187132835388184, + "learning_rate": 0.0001376768648072233, + "loss": 2.6407, + "step": 7603 + }, + { + "epoch": 0.6136712129771609, + "grad_norm": 0.7394679188728333, + "learning_rate": 0.00013766224081768072, + "loss": 2.5959, + "step": 7604 + }, + { + "epoch": 0.6137519167137439, + "grad_norm": 0.6802375912666321, + "learning_rate": 0.00013764761588949665, + "loss": 2.5956, + "step": 7605 + }, + { + "epoch": 0.6138326204503268, + "grad_norm": 0.6949049234390259, + "learning_rate": 0.00013763299002303553, + "loss": 2.556, + "step": 7606 + }, + { + "epoch": 0.6139133241869098, + "grad_norm": 0.7406589388847351, + "learning_rate": 0.00013761836321866196, + "loss": 2.5495, + "step": 7607 + }, + { + "epoch": 0.6139940279234929, + "grad_norm": 0.742499053478241, + "learning_rate": 0.0001376037354767404, + "loss": 2.589, + "step": 7608 + }, + { + "epoch": 0.6140747316600759, + "grad_norm": 0.7669157385826111, + "learning_rate": 0.00013758910679763551, + "loss": 2.576, + "step": 7609 + }, + { + "epoch": 0.6141554353966588, + "grad_norm": 0.6506752967834473, + "learning_rate": 0.00013757447718171182, + "loss": 2.5792, + "step": 7610 + }, + { + "epoch": 0.6142361391332418, + "grad_norm": 0.698514461517334, + "learning_rate": 0.00013755984662933393, + "loss": 2.5809, + "step": 7611 + }, + { + "epoch": 0.6143168428698249, + "grad_norm": 0.6541082262992859, + "learning_rate": 0.00013754521514086645, + "loss": 2.5755, + "step": 7612 + }, + { + "epoch": 0.6143975466064079, + "grad_norm": 0.6619362235069275, + "learning_rate": 0.0001375305827166741, + "loss": 2.5886, + "step": 7613 + }, + { + "epoch": 0.6144782503429909, + "grad_norm": 0.7205569744110107, + "learning_rate": 0.00013751594935712148, + "loss": 2.6293, + "step": 7614 + }, + { + "epoch": 0.6145589540795738, + "grad_norm": 0.7382494211196899, + "learning_rate": 0.00013750131506257339, + "loss": 2.6977, + "step": 7615 + }, + { + "epoch": 0.6146396578161569, + "grad_norm": 0.7492627501487732, + "learning_rate": 0.00013748667983339444, + "loss": 2.6492, + "step": 7616 + }, + { + "epoch": 0.6147203615527399, + "grad_norm": 0.6627328991889954, + "learning_rate": 0.00013747204366994947, + "loss": 2.5458, + "step": 7617 + }, + { + "epoch": 0.6148010652893229, + "grad_norm": 0.7039626836776733, + "learning_rate": 0.00013745740657260323, + "loss": 2.6578, + "step": 7618 + }, + { + "epoch": 0.6148817690259059, + "grad_norm": 0.6999295353889465, + "learning_rate": 0.00013744276854172046, + "loss": 2.6189, + "step": 7619 + }, + { + "epoch": 0.6149624727624889, + "grad_norm": 0.7604365348815918, + "learning_rate": 0.00013742812957766607, + "loss": 2.5344, + "step": 7620 + }, + { + "epoch": 0.6150431764990719, + "grad_norm": 0.6860831379890442, + "learning_rate": 0.0001374134896808048, + "loss": 2.6309, + "step": 7621 + }, + { + "epoch": 0.6151238802356549, + "grad_norm": 0.6628854274749756, + "learning_rate": 0.0001373988488515016, + "loss": 2.6339, + "step": 7622 + }, + { + "epoch": 0.6152045839722379, + "grad_norm": 0.7112562656402588, + "learning_rate": 0.00013738420709012134, + "loss": 2.6064, + "step": 7623 + }, + { + "epoch": 0.615285287708821, + "grad_norm": 0.7068392634391785, + "learning_rate": 0.0001373695643970289, + "loss": 2.624, + "step": 7624 + }, + { + "epoch": 0.6153659914454039, + "grad_norm": 0.6534786224365234, + "learning_rate": 0.00013735492077258924, + "loss": 2.5582, + "step": 7625 + }, + { + "epoch": 0.6154466951819869, + "grad_norm": 0.7433418035507202, + "learning_rate": 0.00013734027621716729, + "loss": 2.5803, + "step": 7626 + }, + { + "epoch": 0.6155273989185699, + "grad_norm": 0.7172532081604004, + "learning_rate": 0.00013732563073112804, + "loss": 2.5906, + "step": 7627 + }, + { + "epoch": 0.615608102655153, + "grad_norm": 0.6712297201156616, + "learning_rate": 0.00013731098431483653, + "loss": 2.5597, + "step": 7628 + }, + { + "epoch": 0.615688806391736, + "grad_norm": 0.7079061269760132, + "learning_rate": 0.00013729633696865775, + "loss": 2.5538, + "step": 7629 + }, + { + "epoch": 0.6157695101283189, + "grad_norm": 0.6968971490859985, + "learning_rate": 0.00013728168869295678, + "loss": 2.6429, + "step": 7630 + }, + { + "epoch": 0.6158502138649019, + "grad_norm": 0.7123236060142517, + "learning_rate": 0.00013726703948809864, + "loss": 2.5607, + "step": 7631 + }, + { + "epoch": 0.615930917601485, + "grad_norm": 0.6441208124160767, + "learning_rate": 0.00013725238935444843, + "loss": 2.6176, + "step": 7632 + }, + { + "epoch": 0.616011621338068, + "grad_norm": 0.7145917415618896, + "learning_rate": 0.00013723773829237137, + "loss": 2.5698, + "step": 7633 + }, + { + "epoch": 0.616092325074651, + "grad_norm": 0.6397334337234497, + "learning_rate": 0.00013722308630223252, + "loss": 2.596, + "step": 7634 + }, + { + "epoch": 0.6161730288112339, + "grad_norm": 0.6372843980789185, + "learning_rate": 0.00013720843338439702, + "loss": 2.5679, + "step": 7635 + }, + { + "epoch": 0.616253732547817, + "grad_norm": 0.707842230796814, + "learning_rate": 0.00013719377953923012, + "loss": 2.6296, + "step": 7636 + }, + { + "epoch": 0.6163344362844, + "grad_norm": 0.6629409193992615, + "learning_rate": 0.000137179124767097, + "loss": 2.542, + "step": 7637 + }, + { + "epoch": 0.616415140020983, + "grad_norm": 0.753646194934845, + "learning_rate": 0.00013716446906836288, + "loss": 2.5741, + "step": 7638 + }, + { + "epoch": 0.6164958437575659, + "grad_norm": 0.6409948468208313, + "learning_rate": 0.0001371498124433931, + "loss": 2.6723, + "step": 7639 + }, + { + "epoch": 0.616576547494149, + "grad_norm": 0.6489264965057373, + "learning_rate": 0.0001371351548925528, + "loss": 2.5806, + "step": 7640 + }, + { + "epoch": 0.616657251230732, + "grad_norm": 0.6857934594154358, + "learning_rate": 0.00013712049641620745, + "loss": 2.6406, + "step": 7641 + }, + { + "epoch": 0.616737954967315, + "grad_norm": 0.6754183769226074, + "learning_rate": 0.00013710583701472226, + "loss": 2.5576, + "step": 7642 + }, + { + "epoch": 0.616818658703898, + "grad_norm": 0.7083800435066223, + "learning_rate": 0.0001370911766884626, + "loss": 2.5747, + "step": 7643 + }, + { + "epoch": 0.616899362440481, + "grad_norm": 0.7281948924064636, + "learning_rate": 0.0001370765154377939, + "loss": 2.5627, + "step": 7644 + }, + { + "epoch": 0.616980066177064, + "grad_norm": 0.655414342880249, + "learning_rate": 0.00013706185326308148, + "loss": 2.5897, + "step": 7645 + }, + { + "epoch": 0.617060769913647, + "grad_norm": 0.6771859526634216, + "learning_rate": 0.0001370471901646908, + "loss": 2.5761, + "step": 7646 + }, + { + "epoch": 0.61714147365023, + "grad_norm": 0.6813557147979736, + "learning_rate": 0.00013703252614298732, + "loss": 2.5807, + "step": 7647 + }, + { + "epoch": 0.6172221773868131, + "grad_norm": 0.6948046684265137, + "learning_rate": 0.00013701786119833646, + "loss": 2.586, + "step": 7648 + }, + { + "epoch": 0.617302881123396, + "grad_norm": 0.643455982208252, + "learning_rate": 0.00013700319533110377, + "loss": 2.592, + "step": 7649 + }, + { + "epoch": 0.617383584859979, + "grad_norm": 0.7292457818984985, + "learning_rate": 0.0001369885285416547, + "loss": 2.6396, + "step": 7650 + }, + { + "epoch": 0.617464288596562, + "grad_norm": 0.642902672290802, + "learning_rate": 0.00013697386083035478, + "loss": 2.6115, + "step": 7651 + }, + { + "epoch": 0.617544992333145, + "grad_norm": 0.6536445021629333, + "learning_rate": 0.00013695919219756966, + "loss": 2.5406, + "step": 7652 + }, + { + "epoch": 0.6176256960697281, + "grad_norm": 0.6643723249435425, + "learning_rate": 0.0001369445226436648, + "loss": 2.6188, + "step": 7653 + }, + { + "epoch": 0.617706399806311, + "grad_norm": 0.6481621265411377, + "learning_rate": 0.00013692985216900592, + "loss": 2.5489, + "step": 7654 + }, + { + "epoch": 0.617787103542894, + "grad_norm": 0.6828036904335022, + "learning_rate": 0.00013691518077395856, + "loss": 2.5114, + "step": 7655 + }, + { + "epoch": 0.617867807279477, + "grad_norm": 0.6802895665168762, + "learning_rate": 0.00013690050845888838, + "loss": 2.5973, + "step": 7656 + }, + { + "epoch": 0.6179485110160601, + "grad_norm": 0.6980829238891602, + "learning_rate": 0.00013688583522416107, + "loss": 2.6032, + "step": 7657 + }, + { + "epoch": 0.618029214752643, + "grad_norm": 0.7157626748085022, + "learning_rate": 0.00013687116107014236, + "loss": 2.5552, + "step": 7658 + }, + { + "epoch": 0.618109918489226, + "grad_norm": 0.69700688123703, + "learning_rate": 0.00013685648599719792, + "loss": 2.5988, + "step": 7659 + }, + { + "epoch": 0.618190622225809, + "grad_norm": 0.6859539151191711, + "learning_rate": 0.0001368418100056935, + "loss": 2.6268, + "step": 7660 + }, + { + "epoch": 0.6182713259623921, + "grad_norm": 0.6812828183174133, + "learning_rate": 0.00013682713309599487, + "loss": 2.6002, + "step": 7661 + }, + { + "epoch": 0.6183520296989751, + "grad_norm": 0.6461766362190247, + "learning_rate": 0.00013681245526846783, + "loss": 2.6064, + "step": 7662 + }, + { + "epoch": 0.618432733435558, + "grad_norm": 0.7198306322097778, + "learning_rate": 0.00013679777652347814, + "loss": 2.6012, + "step": 7663 + }, + { + "epoch": 0.618513437172141, + "grad_norm": 0.7367191910743713, + "learning_rate": 0.00013678309686139168, + "loss": 2.6661, + "step": 7664 + }, + { + "epoch": 0.6185941409087241, + "grad_norm": 0.6975768804550171, + "learning_rate": 0.0001367684162825743, + "loss": 2.6394, + "step": 7665 + }, + { + "epoch": 0.6186748446453071, + "grad_norm": 0.7545140385627747, + "learning_rate": 0.0001367537347873919, + "loss": 2.624, + "step": 7666 + }, + { + "epoch": 0.6187555483818901, + "grad_norm": 0.6683520674705505, + "learning_rate": 0.0001367390523762103, + "loss": 2.6345, + "step": 7667 + }, + { + "epoch": 0.618836252118473, + "grad_norm": 0.6964975595474243, + "learning_rate": 0.00013672436904939552, + "loss": 2.591, + "step": 7668 + }, + { + "epoch": 0.6189169558550561, + "grad_norm": 0.7033975124359131, + "learning_rate": 0.00013670968480731344, + "loss": 2.566, + "step": 7669 + }, + { + "epoch": 0.6189976595916391, + "grad_norm": 0.706136167049408, + "learning_rate": 0.00013669499965033007, + "loss": 2.6073, + "step": 7670 + }, + { + "epoch": 0.6190783633282221, + "grad_norm": 0.7146300673484802, + "learning_rate": 0.0001366803135788114, + "loss": 2.6602, + "step": 7671 + }, + { + "epoch": 0.6191590670648051, + "grad_norm": 0.7603063583374023, + "learning_rate": 0.00013666562659312342, + "loss": 2.5286, + "step": 7672 + }, + { + "epoch": 0.6192397708013881, + "grad_norm": 0.744955837726593, + "learning_rate": 0.00013665093869363217, + "loss": 2.5678, + "step": 7673 + }, + { + "epoch": 0.6193204745379711, + "grad_norm": 0.7548620104789734, + "learning_rate": 0.00013663624988070373, + "loss": 2.6081, + "step": 7674 + }, + { + "epoch": 0.6194011782745541, + "grad_norm": 0.7367276549339294, + "learning_rate": 0.0001366215601547042, + "loss": 2.5559, + "step": 7675 + }, + { + "epoch": 0.6194818820111371, + "grad_norm": 0.7243839502334595, + "learning_rate": 0.00013660686951599962, + "loss": 2.5545, + "step": 7676 + }, + { + "epoch": 0.6195625857477202, + "grad_norm": 0.7595756649971008, + "learning_rate": 0.00013659217796495616, + "loss": 2.6547, + "step": 7677 + }, + { + "epoch": 0.6196432894843031, + "grad_norm": 0.7566717863082886, + "learning_rate": 0.00013657748550193998, + "loss": 2.6521, + "step": 7678 + }, + { + "epoch": 0.6197239932208861, + "grad_norm": 0.8441942930221558, + "learning_rate": 0.00013656279212731728, + "loss": 2.6325, + "step": 7679 + }, + { + "epoch": 0.6198046969574691, + "grad_norm": 0.7481170296669006, + "learning_rate": 0.00013654809784145418, + "loss": 2.6037, + "step": 7680 + }, + { + "epoch": 0.6198854006940522, + "grad_norm": 0.6626241207122803, + "learning_rate": 0.00013653340264471695, + "loss": 2.6028, + "step": 7681 + }, + { + "epoch": 0.6199661044306352, + "grad_norm": 0.7658020853996277, + "learning_rate": 0.00013651870653747186, + "loss": 2.5553, + "step": 7682 + }, + { + "epoch": 0.6200468081672181, + "grad_norm": 0.8218126893043518, + "learning_rate": 0.0001365040095200851, + "loss": 2.5661, + "step": 7683 + }, + { + "epoch": 0.6201275119038011, + "grad_norm": 0.6481068134307861, + "learning_rate": 0.00013648931159292304, + "loss": 2.5675, + "step": 7684 + }, + { + "epoch": 0.6202082156403842, + "grad_norm": 0.7529950141906738, + "learning_rate": 0.0001364746127563519, + "loss": 2.6137, + "step": 7685 + }, + { + "epoch": 0.6202889193769672, + "grad_norm": 0.7133232355117798, + "learning_rate": 0.00013645991301073816, + "loss": 2.6004, + "step": 7686 + }, + { + "epoch": 0.6203696231135502, + "grad_norm": 0.7809340953826904, + "learning_rate": 0.000136445212356448, + "loss": 2.6317, + "step": 7687 + }, + { + "epoch": 0.6204503268501331, + "grad_norm": 0.7106895446777344, + "learning_rate": 0.00013643051079384789, + "loss": 2.6086, + "step": 7688 + }, + { + "epoch": 0.6205310305867162, + "grad_norm": 0.6960744261741638, + "learning_rate": 0.00013641580832330423, + "loss": 2.5554, + "step": 7689 + }, + { + "epoch": 0.6206117343232992, + "grad_norm": 0.7078820466995239, + "learning_rate": 0.00013640110494518343, + "loss": 2.5902, + "step": 7690 + }, + { + "epoch": 0.6206924380598822, + "grad_norm": 0.7150746583938599, + "learning_rate": 0.00013638640065985195, + "loss": 2.5947, + "step": 7691 + }, + { + "epoch": 0.6207731417964651, + "grad_norm": 0.7507869601249695, + "learning_rate": 0.00013637169546767625, + "loss": 2.559, + "step": 7692 + }, + { + "epoch": 0.6208538455330482, + "grad_norm": 0.7453179359436035, + "learning_rate": 0.00013635698936902282, + "loss": 2.5612, + "step": 7693 + }, + { + "epoch": 0.6209345492696312, + "grad_norm": 0.7174177765846252, + "learning_rate": 0.00013634228236425816, + "loss": 2.6221, + "step": 7694 + }, + { + "epoch": 0.6210152530062142, + "grad_norm": 0.7394092679023743, + "learning_rate": 0.00013632757445374884, + "loss": 2.6045, + "step": 7695 + }, + { + "epoch": 0.6210959567427972, + "grad_norm": 0.7346367239952087, + "learning_rate": 0.0001363128656378614, + "loss": 2.677, + "step": 7696 + }, + { + "epoch": 0.6211766604793802, + "grad_norm": 0.6697696447372437, + "learning_rate": 0.00013629815591696245, + "loss": 2.5741, + "step": 7697 + }, + { + "epoch": 0.6212573642159632, + "grad_norm": 0.6993793845176697, + "learning_rate": 0.00013628344529141852, + "loss": 2.5206, + "step": 7698 + }, + { + "epoch": 0.6213380679525462, + "grad_norm": 0.6946697235107422, + "learning_rate": 0.00013626873376159631, + "loss": 2.6046, + "step": 7699 + }, + { + "epoch": 0.6214187716891292, + "grad_norm": 0.7641928195953369, + "learning_rate": 0.00013625402132786248, + "loss": 2.5459, + "step": 7700 + }, + { + "epoch": 0.6214994754257122, + "grad_norm": 0.6513504981994629, + "learning_rate": 0.00013623930799058363, + "loss": 2.6137, + "step": 7701 + }, + { + "epoch": 0.6215801791622952, + "grad_norm": 0.6745209097862244, + "learning_rate": 0.00013622459375012651, + "loss": 2.5285, + "step": 7702 + }, + { + "epoch": 0.6216608828988782, + "grad_norm": 0.7162348628044128, + "learning_rate": 0.0001362098786068578, + "loss": 2.6224, + "step": 7703 + }, + { + "epoch": 0.6217415866354612, + "grad_norm": 0.7387436032295227, + "learning_rate": 0.00013619516256114427, + "loss": 2.6216, + "step": 7704 + }, + { + "epoch": 0.6218222903720442, + "grad_norm": 0.764955461025238, + "learning_rate": 0.00013618044561335268, + "loss": 2.612, + "step": 7705 + }, + { + "epoch": 0.6219029941086273, + "grad_norm": 0.6492719054222107, + "learning_rate": 0.00013616572776384983, + "loss": 2.5532, + "step": 7706 + }, + { + "epoch": 0.6219836978452102, + "grad_norm": 0.6870293617248535, + "learning_rate": 0.0001361510090130025, + "loss": 2.5705, + "step": 7707 + }, + { + "epoch": 0.6220644015817932, + "grad_norm": 0.6899540424346924, + "learning_rate": 0.0001361362893611775, + "loss": 2.5768, + "step": 7708 + }, + { + "epoch": 0.6221451053183762, + "grad_norm": 0.658941924571991, + "learning_rate": 0.0001361215688087417, + "loss": 2.5664, + "step": 7709 + }, + { + "epoch": 0.6222258090549593, + "grad_norm": 0.6875531673431396, + "learning_rate": 0.000136106847356062, + "loss": 2.6128, + "step": 7710 + }, + { + "epoch": 0.6223065127915423, + "grad_norm": 0.657073974609375, + "learning_rate": 0.0001360921250035053, + "loss": 2.6449, + "step": 7711 + }, + { + "epoch": 0.6223872165281252, + "grad_norm": 0.7051201462745667, + "learning_rate": 0.00013607740175143848, + "loss": 2.5925, + "step": 7712 + }, + { + "epoch": 0.6224679202647082, + "grad_norm": 0.702877938747406, + "learning_rate": 0.0001360626776002285, + "loss": 2.5338, + "step": 7713 + }, + { + "epoch": 0.6225486240012913, + "grad_norm": 0.650935709476471, + "learning_rate": 0.00013604795255024233, + "loss": 2.5799, + "step": 7714 + }, + { + "epoch": 0.6226293277378743, + "grad_norm": 0.7035139203071594, + "learning_rate": 0.00013603322660184694, + "loss": 2.5476, + "step": 7715 + }, + { + "epoch": 0.6227100314744572, + "grad_norm": 0.6549977660179138, + "learning_rate": 0.0001360184997554094, + "loss": 2.6117, + "step": 7716 + }, + { + "epoch": 0.6227907352110402, + "grad_norm": 0.6882792115211487, + "learning_rate": 0.00013600377201129662, + "loss": 2.53, + "step": 7717 + }, + { + "epoch": 0.6228714389476233, + "grad_norm": 0.7390840649604797, + "learning_rate": 0.0001359890433698758, + "loss": 2.6345, + "step": 7718 + }, + { + "epoch": 0.6229521426842063, + "grad_norm": 0.7577612400054932, + "learning_rate": 0.00013597431383151386, + "loss": 2.6386, + "step": 7719 + }, + { + "epoch": 0.6230328464207893, + "grad_norm": 0.6818724870681763, + "learning_rate": 0.00013595958339657804, + "loss": 2.5806, + "step": 7720 + }, + { + "epoch": 0.6231135501573722, + "grad_norm": 0.6954349279403687, + "learning_rate": 0.0001359448520654354, + "loss": 2.5913, + "step": 7721 + }, + { + "epoch": 0.6231942538939553, + "grad_norm": 0.7976544499397278, + "learning_rate": 0.00013593011983845308, + "loss": 2.5686, + "step": 7722 + }, + { + "epoch": 0.6232749576305383, + "grad_norm": 0.7362754940986633, + "learning_rate": 0.00013591538671599824, + "loss": 2.5596, + "step": 7723 + }, + { + "epoch": 0.6233556613671213, + "grad_norm": 0.6842390298843384, + "learning_rate": 0.00013590065269843805, + "loss": 2.5793, + "step": 7724 + }, + { + "epoch": 0.6234363651037043, + "grad_norm": 0.6816275715827942, + "learning_rate": 0.0001358859177861398, + "loss": 2.5948, + "step": 7725 + }, + { + "epoch": 0.6235170688402873, + "grad_norm": 0.6892915964126587, + "learning_rate": 0.00013587118197947066, + "loss": 2.6287, + "step": 7726 + }, + { + "epoch": 0.6235977725768703, + "grad_norm": 0.6851752996444702, + "learning_rate": 0.00013585644527879792, + "loss": 2.5781, + "step": 7727 + }, + { + "epoch": 0.6236784763134533, + "grad_norm": 0.7022164463996887, + "learning_rate": 0.00013584170768448877, + "loss": 2.5856, + "step": 7728 + }, + { + "epoch": 0.6237591800500363, + "grad_norm": 0.6752299070358276, + "learning_rate": 0.0001358269691969106, + "loss": 2.6042, + "step": 7729 + }, + { + "epoch": 0.6238398837866194, + "grad_norm": 0.6861466765403748, + "learning_rate": 0.00013581222981643074, + "loss": 2.5887, + "step": 7730 + }, + { + "epoch": 0.6239205875232023, + "grad_norm": 0.7147940397262573, + "learning_rate": 0.00013579748954341647, + "loss": 2.5796, + "step": 7731 + }, + { + "epoch": 0.6240012912597853, + "grad_norm": 0.6704726219177246, + "learning_rate": 0.0001357827483782352, + "loss": 2.6027, + "step": 7732 + }, + { + "epoch": 0.6240819949963683, + "grad_norm": 0.6984317898750305, + "learning_rate": 0.0001357680063212543, + "loss": 2.635, + "step": 7733 + }, + { + "epoch": 0.6241626987329514, + "grad_norm": 0.6205787658691406, + "learning_rate": 0.00013575326337284115, + "loss": 2.5715, + "step": 7734 + }, + { + "epoch": 0.6242434024695344, + "grad_norm": 0.7214726805686951, + "learning_rate": 0.00013573851953336326, + "loss": 2.5605, + "step": 7735 + }, + { + "epoch": 0.6243241062061173, + "grad_norm": 0.6716169714927673, + "learning_rate": 0.000135723774803188, + "loss": 2.6766, + "step": 7736 + }, + { + "epoch": 0.6244048099427003, + "grad_norm": 0.6446832418441772, + "learning_rate": 0.00013570902918268293, + "loss": 2.5629, + "step": 7737 + }, + { + "epoch": 0.6244855136792834, + "grad_norm": 0.6721374988555908, + "learning_rate": 0.0001356942826722155, + "loss": 2.6093, + "step": 7738 + }, + { + "epoch": 0.6245662174158664, + "grad_norm": 0.7430365681648254, + "learning_rate": 0.0001356795352721532, + "loss": 2.5966, + "step": 7739 + }, + { + "epoch": 0.6246469211524494, + "grad_norm": 0.6787518858909607, + "learning_rate": 0.00013566478698286366, + "loss": 2.5519, + "step": 7740 + }, + { + "epoch": 0.6247276248890323, + "grad_norm": 0.6340047121047974, + "learning_rate": 0.0001356500378047144, + "loss": 2.5181, + "step": 7741 + }, + { + "epoch": 0.6248083286256154, + "grad_norm": 0.7559040188789368, + "learning_rate": 0.000135635287738073, + "loss": 2.6068, + "step": 7742 + }, + { + "epoch": 0.6248890323621984, + "grad_norm": 0.6819902062416077, + "learning_rate": 0.00013562053678330707, + "loss": 2.5754, + "step": 7743 + }, + { + "epoch": 0.6249697360987814, + "grad_norm": 0.6463500261306763, + "learning_rate": 0.00013560578494078423, + "loss": 2.5915, + "step": 7744 + }, + { + "epoch": 0.6250504398353643, + "grad_norm": 0.7510617971420288, + "learning_rate": 0.0001355910322108722, + "loss": 2.5738, + "step": 7745 + }, + { + "epoch": 0.6251311435719474, + "grad_norm": 0.75312739610672, + "learning_rate": 0.00013557627859393855, + "loss": 2.5938, + "step": 7746 + }, + { + "epoch": 0.6252118473085304, + "grad_norm": 0.7784396409988403, + "learning_rate": 0.0001355615240903511, + "loss": 2.6634, + "step": 7747 + }, + { + "epoch": 0.6252925510451134, + "grad_norm": 0.7174746990203857, + "learning_rate": 0.00013554676870047752, + "loss": 2.5973, + "step": 7748 + }, + { + "epoch": 0.6253732547816964, + "grad_norm": 0.6854952573776245, + "learning_rate": 0.0001355320124246855, + "loss": 2.5397, + "step": 7749 + }, + { + "epoch": 0.6254539585182795, + "grad_norm": 0.6584961414337158, + "learning_rate": 0.00013551725526334284, + "loss": 2.5574, + "step": 7750 + }, + { + "epoch": 0.6255346622548624, + "grad_norm": 0.7067389488220215, + "learning_rate": 0.00013550249721681738, + "loss": 2.5524, + "step": 7751 + }, + { + "epoch": 0.6256153659914454, + "grad_norm": 0.6923872232437134, + "learning_rate": 0.00013548773828547686, + "loss": 2.5651, + "step": 7752 + }, + { + "epoch": 0.6256960697280284, + "grad_norm": 0.6612355709075928, + "learning_rate": 0.00013547297846968915, + "loss": 2.6075, + "step": 7753 + }, + { + "epoch": 0.6257767734646114, + "grad_norm": 0.6762828826904297, + "learning_rate": 0.00013545821776982206, + "loss": 2.6136, + "step": 7754 + }, + { + "epoch": 0.6258574772011944, + "grad_norm": 0.6940783858299255, + "learning_rate": 0.0001354434561862435, + "loss": 2.5566, + "step": 7755 + }, + { + "epoch": 0.6259381809377774, + "grad_norm": 0.7874250411987305, + "learning_rate": 0.0001354286937193214, + "loss": 2.6732, + "step": 7756 + }, + { + "epoch": 0.6260188846743604, + "grad_norm": 0.6974111795425415, + "learning_rate": 0.0001354139303694236, + "loss": 2.5455, + "step": 7757 + }, + { + "epoch": 0.6260995884109434, + "grad_norm": 0.6710802316665649, + "learning_rate": 0.0001353991661369181, + "loss": 2.5608, + "step": 7758 + }, + { + "epoch": 0.6261802921475265, + "grad_norm": 0.681635320186615, + "learning_rate": 0.00013538440102217286, + "loss": 2.6107, + "step": 7759 + }, + { + "epoch": 0.6262609958841094, + "grad_norm": 0.7229577898979187, + "learning_rate": 0.0001353696350255558, + "loss": 2.5936, + "step": 7760 + }, + { + "epoch": 0.6263416996206924, + "grad_norm": 0.6909681558609009, + "learning_rate": 0.00013535486814743504, + "loss": 2.5521, + "step": 7761 + }, + { + "epoch": 0.6264224033572754, + "grad_norm": 0.7003746032714844, + "learning_rate": 0.0001353401003881785, + "loss": 2.5606, + "step": 7762 + }, + { + "epoch": 0.6265031070938585, + "grad_norm": 0.6883233785629272, + "learning_rate": 0.0001353253317481543, + "loss": 2.5971, + "step": 7763 + }, + { + "epoch": 0.6265838108304415, + "grad_norm": 0.7382355332374573, + "learning_rate": 0.0001353105622277305, + "loss": 2.5449, + "step": 7764 + }, + { + "epoch": 0.6266645145670244, + "grad_norm": 0.7090556621551514, + "learning_rate": 0.00013529579182727515, + "loss": 2.5988, + "step": 7765 + }, + { + "epoch": 0.6267452183036074, + "grad_norm": 0.6842581629753113, + "learning_rate": 0.00013528102054715643, + "loss": 2.6214, + "step": 7766 + }, + { + "epoch": 0.6268259220401905, + "grad_norm": 0.6969670653343201, + "learning_rate": 0.00013526624838774246, + "loss": 2.5443, + "step": 7767 + }, + { + "epoch": 0.6269066257767735, + "grad_norm": 0.7244827151298523, + "learning_rate": 0.00013525147534940138, + "loss": 2.5967, + "step": 7768 + }, + { + "epoch": 0.6269873295133565, + "grad_norm": 0.7022162675857544, + "learning_rate": 0.0001352367014325014, + "loss": 2.599, + "step": 7769 + }, + { + "epoch": 0.6270680332499394, + "grad_norm": 0.7065250873565674, + "learning_rate": 0.00013522192663741067, + "loss": 2.6105, + "step": 7770 + }, + { + "epoch": 0.6271487369865225, + "grad_norm": 0.6690711975097656, + "learning_rate": 0.0001352071509644975, + "loss": 2.55, + "step": 7771 + }, + { + "epoch": 0.6272294407231055, + "grad_norm": 0.6405982971191406, + "learning_rate": 0.00013519237441413011, + "loss": 2.6078, + "step": 7772 + }, + { + "epoch": 0.6273101444596885, + "grad_norm": 0.7340127229690552, + "learning_rate": 0.00013517759698667672, + "loss": 2.6244, + "step": 7773 + }, + { + "epoch": 0.6273908481962714, + "grad_norm": 0.6609435677528381, + "learning_rate": 0.00013516281868250566, + "loss": 2.5746, + "step": 7774 + }, + { + "epoch": 0.6274715519328545, + "grad_norm": 0.6681997179985046, + "learning_rate": 0.00013514803950198523, + "loss": 2.6181, + "step": 7775 + }, + { + "epoch": 0.6275522556694375, + "grad_norm": 0.7120032906532288, + "learning_rate": 0.0001351332594454838, + "loss": 2.6018, + "step": 7776 + }, + { + "epoch": 0.6276329594060205, + "grad_norm": 0.6618601679801941, + "learning_rate": 0.0001351184785133697, + "loss": 2.5342, + "step": 7777 + }, + { + "epoch": 0.6277136631426035, + "grad_norm": 0.7250192165374756, + "learning_rate": 0.00013510369670601132, + "loss": 2.5795, + "step": 7778 + }, + { + "epoch": 0.6277943668791865, + "grad_norm": 0.7918543219566345, + "learning_rate": 0.00013508891402377708, + "loss": 2.6544, + "step": 7779 + }, + { + "epoch": 0.6278750706157695, + "grad_norm": 0.678895890712738, + "learning_rate": 0.00013507413046703534, + "loss": 2.5937, + "step": 7780 + }, + { + "epoch": 0.6279557743523525, + "grad_norm": 0.7336576581001282, + "learning_rate": 0.00013505934603615457, + "loss": 2.598, + "step": 7781 + }, + { + "epoch": 0.6280364780889355, + "grad_norm": 0.6891419291496277, + "learning_rate": 0.00013504456073150332, + "loss": 2.5063, + "step": 7782 + }, + { + "epoch": 0.6281171818255186, + "grad_norm": 0.7949386835098267, + "learning_rate": 0.00013502977455344997, + "loss": 2.5703, + "step": 7783 + }, + { + "epoch": 0.6281978855621015, + "grad_norm": 0.7917985320091248, + "learning_rate": 0.00013501498750236306, + "loss": 2.639, + "step": 7784 + }, + { + "epoch": 0.6282785892986845, + "grad_norm": 0.7387086749076843, + "learning_rate": 0.00013500019957861113, + "loss": 2.5864, + "step": 7785 + }, + { + "epoch": 0.6283592930352675, + "grad_norm": 0.7189435958862305, + "learning_rate": 0.00013498541078256273, + "loss": 2.5627, + "step": 7786 + }, + { + "epoch": 0.6284399967718506, + "grad_norm": 0.6709900498390198, + "learning_rate": 0.00013497062111458646, + "loss": 2.5973, + "step": 7787 + }, + { + "epoch": 0.6285207005084336, + "grad_norm": 0.6925386190414429, + "learning_rate": 0.0001349558305750509, + "loss": 2.615, + "step": 7788 + }, + { + "epoch": 0.6286014042450165, + "grad_norm": 0.7191932201385498, + "learning_rate": 0.00013494103916432466, + "loss": 2.576, + "step": 7789 + }, + { + "epoch": 0.6286821079815995, + "grad_norm": 0.6798804402351379, + "learning_rate": 0.00013492624688277638, + "loss": 2.5661, + "step": 7790 + }, + { + "epoch": 0.6287628117181826, + "grad_norm": 0.6514562964439392, + "learning_rate": 0.00013491145373077475, + "loss": 2.6135, + "step": 7791 + }, + { + "epoch": 0.6288435154547656, + "grad_norm": 0.7345223426818848, + "learning_rate": 0.00013489665970868838, + "loss": 2.6015, + "step": 7792 + }, + { + "epoch": 0.6289242191913486, + "grad_norm": 0.7102675437927246, + "learning_rate": 0.0001348818648168861, + "loss": 2.5545, + "step": 7793 + }, + { + "epoch": 0.6290049229279315, + "grad_norm": 0.7151654362678528, + "learning_rate": 0.0001348670690557365, + "loss": 2.6464, + "step": 7794 + }, + { + "epoch": 0.6290856266645146, + "grad_norm": 0.7344057559967041, + "learning_rate": 0.00013485227242560844, + "loss": 2.6777, + "step": 7795 + }, + { + "epoch": 0.6291663304010976, + "grad_norm": 0.6622766852378845, + "learning_rate": 0.00013483747492687065, + "loss": 2.5713, + "step": 7796 + }, + { + "epoch": 0.6292470341376806, + "grad_norm": 0.6899346709251404, + "learning_rate": 0.0001348226765598919, + "loss": 2.5188, + "step": 7797 + }, + { + "epoch": 0.6293277378742635, + "grad_norm": 0.6711421012878418, + "learning_rate": 0.000134807877325041, + "loss": 2.5603, + "step": 7798 + }, + { + "epoch": 0.6294084416108466, + "grad_norm": 0.6973204016685486, + "learning_rate": 0.00013479307722268687, + "loss": 2.6621, + "step": 7799 + }, + { + "epoch": 0.6294891453474296, + "grad_norm": 0.7782350778579712, + "learning_rate": 0.00013477827625319824, + "loss": 2.5929, + "step": 7800 + }, + { + "epoch": 0.6295698490840126, + "grad_norm": 0.8703733682632446, + "learning_rate": 0.0001347634744169441, + "loss": 2.6884, + "step": 7801 + }, + { + "epoch": 0.6296505528205956, + "grad_norm": 0.7196036577224731, + "learning_rate": 0.00013474867171429326, + "loss": 2.6002, + "step": 7802 + }, + { + "epoch": 0.6297312565571785, + "grad_norm": 0.7224054932594299, + "learning_rate": 0.00013473386814561475, + "loss": 2.6007, + "step": 7803 + }, + { + "epoch": 0.6298119602937616, + "grad_norm": 0.7615752816200256, + "learning_rate": 0.00013471906371127743, + "loss": 2.6459, + "step": 7804 + }, + { + "epoch": 0.6298926640303446, + "grad_norm": 0.7189914584159851, + "learning_rate": 0.00013470425841165024, + "loss": 2.5692, + "step": 7805 + }, + { + "epoch": 0.6299733677669276, + "grad_norm": 0.7101845741271973, + "learning_rate": 0.00013468945224710225, + "loss": 2.5776, + "step": 7806 + }, + { + "epoch": 0.6300540715035106, + "grad_norm": 0.6860305666923523, + "learning_rate": 0.00013467464521800244, + "loss": 2.5567, + "step": 7807 + }, + { + "epoch": 0.6301347752400936, + "grad_norm": 0.7003797292709351, + "learning_rate": 0.0001346598373247198, + "loss": 2.6444, + "step": 7808 + }, + { + "epoch": 0.6302154789766766, + "grad_norm": 0.6341832876205444, + "learning_rate": 0.00013464502856762344, + "loss": 2.5475, + "step": 7809 + }, + { + "epoch": 0.6302961827132596, + "grad_norm": 0.6255922317504883, + "learning_rate": 0.00013463021894708242, + "loss": 2.5875, + "step": 7810 + }, + { + "epoch": 0.6303768864498426, + "grad_norm": 0.7136420607566833, + "learning_rate": 0.00013461540846346575, + "loss": 2.5708, + "step": 7811 + }, + { + "epoch": 0.6304575901864257, + "grad_norm": 0.7164542078971863, + "learning_rate": 0.00013460059711714267, + "loss": 2.4975, + "step": 7812 + }, + { + "epoch": 0.6305382939230086, + "grad_norm": 0.7667872905731201, + "learning_rate": 0.00013458578490848226, + "loss": 2.6124, + "step": 7813 + }, + { + "epoch": 0.6306189976595916, + "grad_norm": 0.6631812453269958, + "learning_rate": 0.0001345709718378537, + "loss": 2.5318, + "step": 7814 + }, + { + "epoch": 0.6306997013961746, + "grad_norm": 0.696864664554596, + "learning_rate": 0.0001345561579056261, + "loss": 2.6171, + "step": 7815 + }, + { + "epoch": 0.6307804051327577, + "grad_norm": 0.7368598580360413, + "learning_rate": 0.00013454134311216873, + "loss": 2.5734, + "step": 7816 + }, + { + "epoch": 0.6308611088693407, + "grad_norm": 0.7279712557792664, + "learning_rate": 0.00013452652745785083, + "loss": 2.6231, + "step": 7817 + }, + { + "epoch": 0.6309418126059236, + "grad_norm": 0.8070993423461914, + "learning_rate": 0.00013451171094304158, + "loss": 2.5486, + "step": 7818 + }, + { + "epoch": 0.6310225163425066, + "grad_norm": 0.7522621750831604, + "learning_rate": 0.0001344968935681103, + "loss": 2.5576, + "step": 7819 + }, + { + "epoch": 0.6311032200790897, + "grad_norm": 0.8185423612594604, + "learning_rate": 0.00013448207533342624, + "loss": 2.6068, + "step": 7820 + }, + { + "epoch": 0.6311839238156727, + "grad_norm": 0.7542584538459778, + "learning_rate": 0.0001344672562393587, + "loss": 2.643, + "step": 7821 + }, + { + "epoch": 0.6312646275522557, + "grad_norm": 0.7892276644706726, + "learning_rate": 0.00013445243628627712, + "loss": 2.6211, + "step": 7822 + }, + { + "epoch": 0.6313453312888386, + "grad_norm": 0.7216602563858032, + "learning_rate": 0.00013443761547455072, + "loss": 2.5725, + "step": 7823 + }, + { + "epoch": 0.6314260350254217, + "grad_norm": 0.6750743985176086, + "learning_rate": 0.0001344227938045489, + "loss": 2.5319, + "step": 7824 + }, + { + "epoch": 0.6315067387620047, + "grad_norm": 0.6711540222167969, + "learning_rate": 0.0001344079712766411, + "loss": 2.5957, + "step": 7825 + }, + { + "epoch": 0.6315874424985877, + "grad_norm": 0.6923524737358093, + "learning_rate": 0.00013439314789119667, + "loss": 2.6084, + "step": 7826 + }, + { + "epoch": 0.6316681462351706, + "grad_norm": 0.6859166026115417, + "learning_rate": 0.00013437832364858517, + "loss": 2.5608, + "step": 7827 + }, + { + "epoch": 0.6317488499717537, + "grad_norm": 0.7340966463088989, + "learning_rate": 0.0001343634985491759, + "loss": 2.531, + "step": 7828 + }, + { + "epoch": 0.6318295537083367, + "grad_norm": 0.7374520301818848, + "learning_rate": 0.00013434867259333848, + "loss": 2.5972, + "step": 7829 + }, + { + "epoch": 0.6319102574449197, + "grad_norm": 0.7252814769744873, + "learning_rate": 0.00013433384578144232, + "loss": 2.5874, + "step": 7830 + }, + { + "epoch": 0.6319909611815027, + "grad_norm": 0.7000489830970764, + "learning_rate": 0.000134319018113857, + "loss": 2.6137, + "step": 7831 + }, + { + "epoch": 0.6320716649180858, + "grad_norm": 0.805981457233429, + "learning_rate": 0.00013430418959095198, + "loss": 2.5581, + "step": 7832 + }, + { + "epoch": 0.6321523686546687, + "grad_norm": 0.7459721565246582, + "learning_rate": 0.00013428936021309693, + "loss": 2.5284, + "step": 7833 + }, + { + "epoch": 0.6322330723912517, + "grad_norm": 0.749794065952301, + "learning_rate": 0.00013427452998066136, + "loss": 2.5927, + "step": 7834 + }, + { + "epoch": 0.6323137761278347, + "grad_norm": 0.6925346255302429, + "learning_rate": 0.00013425969889401494, + "loss": 2.5703, + "step": 7835 + }, + { + "epoch": 0.6323944798644178, + "grad_norm": 0.6647117137908936, + "learning_rate": 0.00013424486695352728, + "loss": 2.5649, + "step": 7836 + }, + { + "epoch": 0.6324751836010007, + "grad_norm": 0.7358147501945496, + "learning_rate": 0.00013423003415956796, + "loss": 2.6122, + "step": 7837 + }, + { + "epoch": 0.6325558873375837, + "grad_norm": 0.7798088788986206, + "learning_rate": 0.00013421520051250675, + "loss": 2.5805, + "step": 7838 + }, + { + "epoch": 0.6326365910741667, + "grad_norm": 0.7108271718025208, + "learning_rate": 0.00013420036601271334, + "loss": 2.5457, + "step": 7839 + }, + { + "epoch": 0.6327172948107498, + "grad_norm": 0.7108528017997742, + "learning_rate": 0.00013418553066055734, + "loss": 2.6313, + "step": 7840 + }, + { + "epoch": 0.6327979985473328, + "grad_norm": 0.7325249910354614, + "learning_rate": 0.00013417069445640858, + "loss": 2.5598, + "step": 7841 + }, + { + "epoch": 0.6328787022839157, + "grad_norm": 0.6861844062805176, + "learning_rate": 0.0001341558574006368, + "loss": 2.5899, + "step": 7842 + }, + { + "epoch": 0.6329594060204987, + "grad_norm": 0.7576130628585815, + "learning_rate": 0.00013414101949361175, + "loss": 2.6077, + "step": 7843 + }, + { + "epoch": 0.6330401097570818, + "grad_norm": 0.7756128907203674, + "learning_rate": 0.0001341261807357033, + "loss": 2.6111, + "step": 7844 + }, + { + "epoch": 0.6331208134936648, + "grad_norm": 0.7131127715110779, + "learning_rate": 0.00013411134112728114, + "loss": 2.5227, + "step": 7845 + }, + { + "epoch": 0.6332015172302478, + "grad_norm": 0.6517898440361023, + "learning_rate": 0.00013409650066871525, + "loss": 2.5825, + "step": 7846 + }, + { + "epoch": 0.6332822209668307, + "grad_norm": 0.8452722430229187, + "learning_rate": 0.0001340816593603754, + "loss": 2.6037, + "step": 7847 + }, + { + "epoch": 0.6333629247034138, + "grad_norm": 0.7421110272407532, + "learning_rate": 0.00013406681720263153, + "loss": 2.5684, + "step": 7848 + }, + { + "epoch": 0.6334436284399968, + "grad_norm": 0.695139467716217, + "learning_rate": 0.0001340519741958535, + "loss": 2.5648, + "step": 7849 + }, + { + "epoch": 0.6335243321765798, + "grad_norm": 0.7780016660690308, + "learning_rate": 0.0001340371303404113, + "loss": 2.6849, + "step": 7850 + }, + { + "epoch": 0.6336050359131628, + "grad_norm": 0.7276864051818848, + "learning_rate": 0.00013402228563667482, + "loss": 2.6198, + "step": 7851 + }, + { + "epoch": 0.6336857396497458, + "grad_norm": 0.7566827535629272, + "learning_rate": 0.00013400744008501404, + "loss": 2.5803, + "step": 7852 + }, + { + "epoch": 0.6337664433863288, + "grad_norm": 0.7933458089828491, + "learning_rate": 0.00013399259368579894, + "loss": 2.6029, + "step": 7853 + }, + { + "epoch": 0.6338471471229118, + "grad_norm": 0.6849822402000427, + "learning_rate": 0.00013397774643939957, + "loss": 2.5454, + "step": 7854 + }, + { + "epoch": 0.6339278508594948, + "grad_norm": 0.7054651379585266, + "learning_rate": 0.00013396289834618594, + "loss": 2.5905, + "step": 7855 + }, + { + "epoch": 0.6340085545960777, + "grad_norm": 0.7036863565444946, + "learning_rate": 0.00013394804940652813, + "loss": 2.6342, + "step": 7856 + }, + { + "epoch": 0.6340892583326608, + "grad_norm": 0.7101735472679138, + "learning_rate": 0.00013393319962079614, + "loss": 2.6402, + "step": 7857 + }, + { + "epoch": 0.6341699620692438, + "grad_norm": 0.7053956389427185, + "learning_rate": 0.0001339183489893601, + "loss": 2.5841, + "step": 7858 + }, + { + "epoch": 0.6342506658058268, + "grad_norm": 0.7734887003898621, + "learning_rate": 0.0001339034975125902, + "loss": 2.652, + "step": 7859 + }, + { + "epoch": 0.6343313695424098, + "grad_norm": 0.6714119911193848, + "learning_rate": 0.0001338886451908565, + "loss": 2.5927, + "step": 7860 + }, + { + "epoch": 0.6344120732789928, + "grad_norm": 0.6580910682678223, + "learning_rate": 0.00013387379202452917, + "loss": 2.6114, + "step": 7861 + }, + { + "epoch": 0.6344927770155758, + "grad_norm": 0.6810200214385986, + "learning_rate": 0.00013385893801397836, + "loss": 2.5616, + "step": 7862 + }, + { + "epoch": 0.6345734807521588, + "grad_norm": 0.6989572048187256, + "learning_rate": 0.00013384408315957432, + "loss": 2.5954, + "step": 7863 + }, + { + "epoch": 0.6346541844887418, + "grad_norm": 0.7033671736717224, + "learning_rate": 0.00013382922746168728, + "loss": 2.6015, + "step": 7864 + }, + { + "epoch": 0.6347348882253249, + "grad_norm": 0.6873033046722412, + "learning_rate": 0.0001338143709206875, + "loss": 2.562, + "step": 7865 + }, + { + "epoch": 0.6348155919619078, + "grad_norm": 0.7361463904380798, + "learning_rate": 0.00013379951353694513, + "loss": 2.6175, + "step": 7866 + }, + { + "epoch": 0.6348962956984908, + "grad_norm": 0.7623226046562195, + "learning_rate": 0.00013378465531083055, + "loss": 2.7342, + "step": 7867 + }, + { + "epoch": 0.6349769994350738, + "grad_norm": 0.7427035570144653, + "learning_rate": 0.0001337697962427141, + "loss": 2.5468, + "step": 7868 + }, + { + "epoch": 0.6350577031716569, + "grad_norm": 0.6865772008895874, + "learning_rate": 0.00013375493633296598, + "loss": 2.6112, + "step": 7869 + }, + { + "epoch": 0.6351384069082399, + "grad_norm": 0.663567304611206, + "learning_rate": 0.00013374007558195666, + "loss": 2.5896, + "step": 7870 + }, + { + "epoch": 0.6352191106448228, + "grad_norm": 0.6804360151290894, + "learning_rate": 0.00013372521399005643, + "loss": 2.58, + "step": 7871 + }, + { + "epoch": 0.6352998143814058, + "grad_norm": 0.6755216121673584, + "learning_rate": 0.0001337103515576357, + "loss": 2.5593, + "step": 7872 + }, + { + "epoch": 0.6353805181179889, + "grad_norm": 0.8148807883262634, + "learning_rate": 0.00013369548828506491, + "loss": 2.6473, + "step": 7873 + }, + { + "epoch": 0.6354612218545719, + "grad_norm": 0.713009774684906, + "learning_rate": 0.00013368062417271447, + "loss": 2.6002, + "step": 7874 + }, + { + "epoch": 0.6355419255911549, + "grad_norm": 0.6390172839164734, + "learning_rate": 0.00013366575922095484, + "loss": 2.5794, + "step": 7875 + }, + { + "epoch": 0.6356226293277378, + "grad_norm": 0.7228195667266846, + "learning_rate": 0.00013365089343015649, + "loss": 2.6051, + "step": 7876 + }, + { + "epoch": 0.6357033330643209, + "grad_norm": 0.7563474178314209, + "learning_rate": 0.00013363602680068986, + "loss": 2.6308, + "step": 7877 + }, + { + "epoch": 0.6357840368009039, + "grad_norm": 0.7366798520088196, + "learning_rate": 0.00013362115933292557, + "loss": 2.5589, + "step": 7878 + }, + { + "epoch": 0.6358647405374869, + "grad_norm": 0.7137070894241333, + "learning_rate": 0.00013360629102723409, + "loss": 2.6428, + "step": 7879 + }, + { + "epoch": 0.6359454442740698, + "grad_norm": 0.6799132823944092, + "learning_rate": 0.000133591421883986, + "loss": 2.5549, + "step": 7880 + }, + { + "epoch": 0.6360261480106529, + "grad_norm": 0.7031344771385193, + "learning_rate": 0.00013357655190355188, + "loss": 2.6298, + "step": 7881 + }, + { + "epoch": 0.6361068517472359, + "grad_norm": 0.7441670298576355, + "learning_rate": 0.00013356168108630227, + "loss": 2.5844, + "step": 7882 + }, + { + "epoch": 0.6361875554838189, + "grad_norm": 0.7281978726387024, + "learning_rate": 0.00013354680943260784, + "loss": 2.5773, + "step": 7883 + }, + { + "epoch": 0.6362682592204019, + "grad_norm": 0.6969650983810425, + "learning_rate": 0.00013353193694283928, + "loss": 2.6156, + "step": 7884 + }, + { + "epoch": 0.636348962956985, + "grad_norm": 0.6668435335159302, + "learning_rate": 0.00013351706361736714, + "loss": 2.6328, + "step": 7885 + }, + { + "epoch": 0.6364296666935679, + "grad_norm": 0.6909573078155518, + "learning_rate": 0.0001335021894565622, + "loss": 2.5772, + "step": 7886 + }, + { + "epoch": 0.6365103704301509, + "grad_norm": 0.6740022897720337, + "learning_rate": 0.0001334873144607951, + "loss": 2.6435, + "step": 7887 + }, + { + "epoch": 0.6365910741667339, + "grad_norm": 0.7203185558319092, + "learning_rate": 0.0001334724386304366, + "loss": 2.5401, + "step": 7888 + }, + { + "epoch": 0.636671777903317, + "grad_norm": 0.7343020439147949, + "learning_rate": 0.0001334575619658574, + "loss": 2.5811, + "step": 7889 + }, + { + "epoch": 0.6367524816399, + "grad_norm": 0.6941348314285278, + "learning_rate": 0.00013344268446742835, + "loss": 2.6267, + "step": 7890 + }, + { + "epoch": 0.6368331853764829, + "grad_norm": 0.6983792185783386, + "learning_rate": 0.00013342780613552016, + "loss": 2.533, + "step": 7891 + }, + { + "epoch": 0.6369138891130659, + "grad_norm": 0.7093533277511597, + "learning_rate": 0.00013341292697050365, + "loss": 2.6616, + "step": 7892 + }, + { + "epoch": 0.636994592849649, + "grad_norm": 0.7377648949623108, + "learning_rate": 0.00013339804697274965, + "loss": 2.6032, + "step": 7893 + }, + { + "epoch": 0.637075296586232, + "grad_norm": 0.6669821739196777, + "learning_rate": 0.00013338316614262903, + "loss": 2.6082, + "step": 7894 + }, + { + "epoch": 0.6371560003228149, + "grad_norm": 0.6665576100349426, + "learning_rate": 0.00013336828448051263, + "loss": 2.6114, + "step": 7895 + }, + { + "epoch": 0.6372367040593979, + "grad_norm": 0.6893584132194519, + "learning_rate": 0.0001333534019867714, + "loss": 2.5886, + "step": 7896 + }, + { + "epoch": 0.637317407795981, + "grad_norm": 0.7651494741439819, + "learning_rate": 0.00013333851866177617, + "loss": 2.5622, + "step": 7897 + }, + { + "epoch": 0.637398111532564, + "grad_norm": 0.8124055862426758, + "learning_rate": 0.00013332363450589788, + "loss": 2.6036, + "step": 7898 + }, + { + "epoch": 0.637478815269147, + "grad_norm": 0.7394436597824097, + "learning_rate": 0.00013330874951950755, + "loss": 2.6214, + "step": 7899 + }, + { + "epoch": 0.6375595190057299, + "grad_norm": 0.6279659867286682, + "learning_rate": 0.00013329386370297615, + "loss": 2.5652, + "step": 7900 + }, + { + "epoch": 0.637640222742313, + "grad_norm": 0.7289649248123169, + "learning_rate": 0.00013327897705667455, + "loss": 2.5628, + "step": 7901 + }, + { + "epoch": 0.637720926478896, + "grad_norm": 0.7267701625823975, + "learning_rate": 0.0001332640895809739, + "loss": 2.5475, + "step": 7902 + }, + { + "epoch": 0.637801630215479, + "grad_norm": 0.7470490336418152, + "learning_rate": 0.00013324920127624515, + "loss": 2.5054, + "step": 7903 + }, + { + "epoch": 0.637882333952062, + "grad_norm": 0.6963294148445129, + "learning_rate": 0.00013323431214285944, + "loss": 2.5992, + "step": 7904 + }, + { + "epoch": 0.6379630376886449, + "grad_norm": 0.6993808746337891, + "learning_rate": 0.00013321942218118778, + "loss": 2.6044, + "step": 7905 + }, + { + "epoch": 0.638043741425228, + "grad_norm": 0.6620917916297913, + "learning_rate": 0.00013320453139160126, + "loss": 2.5278, + "step": 7906 + }, + { + "epoch": 0.638124445161811, + "grad_norm": 0.6535444855690002, + "learning_rate": 0.00013318963977447106, + "loss": 2.6069, + "step": 7907 + }, + { + "epoch": 0.638205148898394, + "grad_norm": 0.6913008689880371, + "learning_rate": 0.00013317474733016824, + "loss": 2.5271, + "step": 7908 + }, + { + "epoch": 0.638285852634977, + "grad_norm": 0.6760269403457642, + "learning_rate": 0.000133159854059064, + "loss": 2.7029, + "step": 7909 + }, + { + "epoch": 0.63836655637156, + "grad_norm": 0.7026536464691162, + "learning_rate": 0.0001331449599615295, + "loss": 2.592, + "step": 7910 + }, + { + "epoch": 0.638447260108143, + "grad_norm": 0.7935923933982849, + "learning_rate": 0.000133130065037936, + "loss": 2.5674, + "step": 7911 + }, + { + "epoch": 0.638527963844726, + "grad_norm": 0.694675087928772, + "learning_rate": 0.00013311516928865466, + "loss": 2.6727, + "step": 7912 + }, + { + "epoch": 0.638608667581309, + "grad_norm": 0.7378186583518982, + "learning_rate": 0.00013310027271405672, + "loss": 2.5691, + "step": 7913 + }, + { + "epoch": 0.638689371317892, + "grad_norm": 0.7684193849563599, + "learning_rate": 0.00013308537531451345, + "loss": 2.5796, + "step": 7914 + }, + { + "epoch": 0.638770075054475, + "grad_norm": 0.6881510019302368, + "learning_rate": 0.00013307047709039619, + "loss": 2.6, + "step": 7915 + }, + { + "epoch": 0.638850778791058, + "grad_norm": 0.7341364026069641, + "learning_rate": 0.00013305557804207618, + "loss": 2.622, + "step": 7916 + }, + { + "epoch": 0.638931482527641, + "grad_norm": 0.7620663642883301, + "learning_rate": 0.00013304067816992474, + "loss": 2.5571, + "step": 7917 + }, + { + "epoch": 0.6390121862642241, + "grad_norm": 0.6929789781570435, + "learning_rate": 0.00013302577747431322, + "loss": 2.6204, + "step": 7918 + }, + { + "epoch": 0.639092890000807, + "grad_norm": 0.6942943334579468, + "learning_rate": 0.000133010875955613, + "loss": 2.6737, + "step": 7919 + }, + { + "epoch": 0.63917359373739, + "grad_norm": 0.69537752866745, + "learning_rate": 0.0001329959736141955, + "loss": 2.6105, + "step": 7920 + }, + { + "epoch": 0.639254297473973, + "grad_norm": 0.6690821051597595, + "learning_rate": 0.00013298107045043203, + "loss": 2.6279, + "step": 7921 + }, + { + "epoch": 0.6393350012105561, + "grad_norm": 0.7748103141784668, + "learning_rate": 0.00013296616646469412, + "loss": 2.6307, + "step": 7922 + }, + { + "epoch": 0.6394157049471391, + "grad_norm": 0.7509558200836182, + "learning_rate": 0.00013295126165735311, + "loss": 2.6388, + "step": 7923 + }, + { + "epoch": 0.639496408683722, + "grad_norm": 0.7641764283180237, + "learning_rate": 0.0001329363560287806, + "loss": 2.5819, + "step": 7924 + }, + { + "epoch": 0.639577112420305, + "grad_norm": 0.6912327408790588, + "learning_rate": 0.00013292144957934794, + "loss": 2.5588, + "step": 7925 + }, + { + "epoch": 0.6396578161568881, + "grad_norm": 0.7568803429603577, + "learning_rate": 0.0001329065423094267, + "loss": 2.5627, + "step": 7926 + }, + { + "epoch": 0.6397385198934711, + "grad_norm": 0.7272306084632874, + "learning_rate": 0.00013289163421938843, + "loss": 2.6101, + "step": 7927 + }, + { + "epoch": 0.6398192236300541, + "grad_norm": 0.6965963840484619, + "learning_rate": 0.00013287672530960465, + "loss": 2.5967, + "step": 7928 + }, + { + "epoch": 0.639899927366637, + "grad_norm": 0.7729843854904175, + "learning_rate": 0.00013286181558044694, + "loss": 2.6222, + "step": 7929 + }, + { + "epoch": 0.6399806311032201, + "grad_norm": 0.6876606941223145, + "learning_rate": 0.00013284690503228687, + "loss": 2.6162, + "step": 7930 + }, + { + "epoch": 0.6400613348398031, + "grad_norm": 0.7555204629898071, + "learning_rate": 0.0001328319936654961, + "loss": 2.588, + "step": 7931 + }, + { + "epoch": 0.6401420385763861, + "grad_norm": 0.7324720621109009, + "learning_rate": 0.0001328170814804462, + "loss": 2.6111, + "step": 7932 + }, + { + "epoch": 0.640222742312969, + "grad_norm": 0.6802392601966858, + "learning_rate": 0.0001328021684775088, + "loss": 2.5955, + "step": 7933 + }, + { + "epoch": 0.6403034460495521, + "grad_norm": 0.7564330697059631, + "learning_rate": 0.00013278725465705568, + "loss": 2.5355, + "step": 7934 + }, + { + "epoch": 0.6403841497861351, + "grad_norm": 0.6916235089302063, + "learning_rate": 0.00013277234001945844, + "loss": 2.6037, + "step": 7935 + }, + { + "epoch": 0.6404648535227181, + "grad_norm": 0.688819169998169, + "learning_rate": 0.00013275742456508885, + "loss": 2.5626, + "step": 7936 + }, + { + "epoch": 0.6405455572593011, + "grad_norm": 0.6647922992706299, + "learning_rate": 0.0001327425082943186, + "loss": 2.6166, + "step": 7937 + }, + { + "epoch": 0.6406262609958842, + "grad_norm": 0.6792626976966858, + "learning_rate": 0.00013272759120751943, + "loss": 2.6206, + "step": 7938 + }, + { + "epoch": 0.6407069647324671, + "grad_norm": 0.6482827663421631, + "learning_rate": 0.00013271267330506312, + "loss": 2.5558, + "step": 7939 + }, + { + "epoch": 0.6407876684690501, + "grad_norm": 0.6628372073173523, + "learning_rate": 0.0001326977545873215, + "loss": 2.5904, + "step": 7940 + }, + { + "epoch": 0.6408683722056331, + "grad_norm": 0.7168916463851929, + "learning_rate": 0.00013268283505466635, + "loss": 2.5189, + "step": 7941 + }, + { + "epoch": 0.6409490759422162, + "grad_norm": 0.6691678762435913, + "learning_rate": 0.00013266791470746957, + "loss": 2.608, + "step": 7942 + }, + { + "epoch": 0.6410297796787991, + "grad_norm": 0.6850359439849854, + "learning_rate": 0.00013265299354610292, + "loss": 2.5929, + "step": 7943 + }, + { + "epoch": 0.6411104834153821, + "grad_norm": 0.6807669401168823, + "learning_rate": 0.0001326380715709383, + "loss": 2.6016, + "step": 7944 + }, + { + "epoch": 0.6411911871519651, + "grad_norm": 0.6450446844100952, + "learning_rate": 0.00013262314878234767, + "loss": 2.6129, + "step": 7945 + }, + { + "epoch": 0.6412718908885482, + "grad_norm": 0.679115355014801, + "learning_rate": 0.00013260822518070285, + "loss": 2.6049, + "step": 7946 + }, + { + "epoch": 0.6413525946251312, + "grad_norm": 0.7082008123397827, + "learning_rate": 0.00013259330076637583, + "loss": 2.5673, + "step": 7947 + }, + { + "epoch": 0.6414332983617141, + "grad_norm": 0.7357851266860962, + "learning_rate": 0.00013257837553973855, + "loss": 2.6118, + "step": 7948 + }, + { + "epoch": 0.6415140020982971, + "grad_norm": 0.687035083770752, + "learning_rate": 0.000132563449501163, + "loss": 2.5359, + "step": 7949 + }, + { + "epoch": 0.6415947058348802, + "grad_norm": 0.6950698494911194, + "learning_rate": 0.00013254852265102117, + "loss": 2.5527, + "step": 7950 + }, + { + "epoch": 0.6416754095714632, + "grad_norm": 0.6878959536552429, + "learning_rate": 0.00013253359498968507, + "loss": 2.611, + "step": 7951 + }, + { + "epoch": 0.6417561133080462, + "grad_norm": 0.7224605083465576, + "learning_rate": 0.00013251866651752675, + "loss": 2.5459, + "step": 7952 + }, + { + "epoch": 0.6418368170446291, + "grad_norm": 0.7299731969833374, + "learning_rate": 0.00013250373723491826, + "loss": 2.5651, + "step": 7953 + }, + { + "epoch": 0.6419175207812122, + "grad_norm": 0.7663037776947021, + "learning_rate": 0.00013248880714223163, + "loss": 2.6073, + "step": 7954 + }, + { + "epoch": 0.6419982245177952, + "grad_norm": 0.6532007455825806, + "learning_rate": 0.00013247387623983902, + "loss": 2.6087, + "step": 7955 + }, + { + "epoch": 0.6420789282543782, + "grad_norm": 0.7520449757575989, + "learning_rate": 0.00013245894452811255, + "loss": 2.5998, + "step": 7956 + }, + { + "epoch": 0.6421596319909612, + "grad_norm": 0.7196050882339478, + "learning_rate": 0.0001324440120074243, + "loss": 2.6448, + "step": 7957 + }, + { + "epoch": 0.6422403357275441, + "grad_norm": 0.7093806862831116, + "learning_rate": 0.0001324290786781465, + "loss": 2.5935, + "step": 7958 + }, + { + "epoch": 0.6423210394641272, + "grad_norm": 0.695541501045227, + "learning_rate": 0.00013241414454065125, + "loss": 2.5872, + "step": 7959 + }, + { + "epoch": 0.6424017432007102, + "grad_norm": 0.6763006448745728, + "learning_rate": 0.0001323992095953108, + "loss": 2.572, + "step": 7960 + }, + { + "epoch": 0.6424824469372932, + "grad_norm": 0.6403522491455078, + "learning_rate": 0.00013238427384249738, + "loss": 2.6137, + "step": 7961 + }, + { + "epoch": 0.6425631506738761, + "grad_norm": 0.6647571325302124, + "learning_rate": 0.00013236933728258315, + "loss": 2.5904, + "step": 7962 + }, + { + "epoch": 0.6426438544104592, + "grad_norm": 0.6931071877479553, + "learning_rate": 0.0001323543999159405, + "loss": 2.6085, + "step": 7963 + }, + { + "epoch": 0.6427245581470422, + "grad_norm": 0.6899439096450806, + "learning_rate": 0.00013233946174294155, + "loss": 2.5555, + "step": 7964 + }, + { + "epoch": 0.6428052618836252, + "grad_norm": 0.6564984321594238, + "learning_rate": 0.0001323245227639587, + "loss": 2.576, + "step": 7965 + }, + { + "epoch": 0.6428859656202082, + "grad_norm": 0.7427607774734497, + "learning_rate": 0.00013230958297936427, + "loss": 2.6178, + "step": 7966 + }, + { + "epoch": 0.6429666693567913, + "grad_norm": 0.6884508728981018, + "learning_rate": 0.00013229464238953054, + "loss": 2.6519, + "step": 7967 + }, + { + "epoch": 0.6430473730933742, + "grad_norm": 0.692442774772644, + "learning_rate": 0.00013227970099482993, + "loss": 2.5784, + "step": 7968 + }, + { + "epoch": 0.6431280768299572, + "grad_norm": 0.6637876629829407, + "learning_rate": 0.00013226475879563477, + "loss": 2.5785, + "step": 7969 + }, + { + "epoch": 0.6432087805665402, + "grad_norm": 0.6844972372055054, + "learning_rate": 0.0001322498157923175, + "loss": 2.5745, + "step": 7970 + }, + { + "epoch": 0.6432894843031233, + "grad_norm": 0.7259756922721863, + "learning_rate": 0.0001322348719852505, + "loss": 2.5696, + "step": 7971 + }, + { + "epoch": 0.6433701880397062, + "grad_norm": 0.6719023585319519, + "learning_rate": 0.00013221992737480625, + "loss": 2.6049, + "step": 7972 + }, + { + "epoch": 0.6434508917762892, + "grad_norm": 0.7160155773162842, + "learning_rate": 0.00013220498196135717, + "loss": 2.572, + "step": 7973 + }, + { + "epoch": 0.6435315955128722, + "grad_norm": 0.6920225620269775, + "learning_rate": 0.00013219003574527576, + "loss": 2.6576, + "step": 7974 + }, + { + "epoch": 0.6436122992494553, + "grad_norm": 0.698518693447113, + "learning_rate": 0.0001321750887269345, + "loss": 2.6074, + "step": 7975 + }, + { + "epoch": 0.6436930029860383, + "grad_norm": 0.7607932090759277, + "learning_rate": 0.00013216014090670594, + "loss": 2.6173, + "step": 7976 + }, + { + "epoch": 0.6437737067226212, + "grad_norm": 0.8130847811698914, + "learning_rate": 0.0001321451922849626, + "loss": 2.6023, + "step": 7977 + }, + { + "epoch": 0.6438544104592042, + "grad_norm": 0.676675021648407, + "learning_rate": 0.00013213024286207702, + "loss": 2.6174, + "step": 7978 + }, + { + "epoch": 0.6439351141957873, + "grad_norm": 0.7018851041793823, + "learning_rate": 0.00013211529263842183, + "loss": 2.5713, + "step": 7979 + }, + { + "epoch": 0.6440158179323703, + "grad_norm": 0.796097457408905, + "learning_rate": 0.00013210034161436954, + "loss": 2.5937, + "step": 7980 + }, + { + "epoch": 0.6440965216689533, + "grad_norm": 0.7118527293205261, + "learning_rate": 0.0001320853897902929, + "loss": 2.5721, + "step": 7981 + }, + { + "epoch": 0.6441772254055362, + "grad_norm": 0.7282249331474304, + "learning_rate": 0.00013207043716656445, + "loss": 2.5975, + "step": 7982 + }, + { + "epoch": 0.6442579291421193, + "grad_norm": 0.6710900664329529, + "learning_rate": 0.00013205548374355686, + "loss": 2.5809, + "step": 7983 + }, + { + "epoch": 0.6443386328787023, + "grad_norm": 0.7045658230781555, + "learning_rate": 0.00013204052952164278, + "loss": 2.5715, + "step": 7984 + }, + { + "epoch": 0.6444193366152853, + "grad_norm": 0.719507098197937, + "learning_rate": 0.00013202557450119504, + "loss": 2.5948, + "step": 7985 + }, + { + "epoch": 0.6445000403518683, + "grad_norm": 0.7603922486305237, + "learning_rate": 0.0001320106186825862, + "loss": 2.6176, + "step": 7986 + }, + { + "epoch": 0.6445807440884513, + "grad_norm": 0.7057444453239441, + "learning_rate": 0.0001319956620661891, + "loss": 2.5905, + "step": 7987 + }, + { + "epoch": 0.6446614478250343, + "grad_norm": 0.7884874939918518, + "learning_rate": 0.00013198070465237645, + "loss": 2.5892, + "step": 7988 + }, + { + "epoch": 0.6447421515616173, + "grad_norm": 0.6932834386825562, + "learning_rate": 0.00013196574644152103, + "loss": 2.6032, + "step": 7989 + }, + { + "epoch": 0.6448228552982003, + "grad_norm": 0.7361180186271667, + "learning_rate": 0.00013195078743399568, + "loss": 2.5877, + "step": 7990 + }, + { + "epoch": 0.6449035590347834, + "grad_norm": 0.6843615174293518, + "learning_rate": 0.00013193582763017315, + "loss": 2.5804, + "step": 7991 + }, + { + "epoch": 0.6449842627713663, + "grad_norm": 0.7592078447341919, + "learning_rate": 0.00013192086703042635, + "loss": 2.6464, + "step": 7992 + }, + { + "epoch": 0.6450649665079493, + "grad_norm": 0.7362154126167297, + "learning_rate": 0.0001319059056351281, + "loss": 2.6154, + "step": 7993 + }, + { + "epoch": 0.6451456702445323, + "grad_norm": 0.6721758246421814, + "learning_rate": 0.00013189094344465125, + "loss": 2.5735, + "step": 7994 + }, + { + "epoch": 0.6452263739811154, + "grad_norm": 0.6221550107002258, + "learning_rate": 0.00013187598045936874, + "loss": 2.5612, + "step": 7995 + }, + { + "epoch": 0.6453070777176984, + "grad_norm": 0.7225528359413147, + "learning_rate": 0.00013186101667965344, + "loss": 2.6263, + "step": 7996 + }, + { + "epoch": 0.6453877814542813, + "grad_norm": 0.7599418759346008, + "learning_rate": 0.00013184605210587837, + "loss": 2.5814, + "step": 7997 + }, + { + "epoch": 0.6454684851908643, + "grad_norm": 0.6778777837753296, + "learning_rate": 0.00013183108673841642, + "loss": 2.6158, + "step": 7998 + }, + { + "epoch": 0.6455491889274474, + "grad_norm": 0.6860963106155396, + "learning_rate": 0.00013181612057764058, + "loss": 2.6207, + "step": 7999 + }, + { + "epoch": 0.6456298926640304, + "grad_norm": 0.6615182757377625, + "learning_rate": 0.00013180115362392382, + "loss": 2.5571, + "step": 8000 + }, + { + "epoch": 0.6456298926640304, + "eval_loss": 2.5128066539764404, + "eval_runtime": 754.3655, + "eval_samples_per_second": 3.473, + "eval_steps_per_second": 0.579, + "step": 8000 + }, + { + "epoch": 0.6457105964006133, + "grad_norm": 0.688169538974762, + "learning_rate": 0.0001317861858776392, + "loss": 2.6513, + "step": 8001 + }, + { + "epoch": 0.6457913001371963, + "grad_norm": 0.6726182103157043, + "learning_rate": 0.00013177121733915975, + "loss": 2.5909, + "step": 8002 + }, + { + "epoch": 0.6458720038737794, + "grad_norm": 0.7348085641860962, + "learning_rate": 0.00013175624800885853, + "loss": 2.577, + "step": 8003 + }, + { + "epoch": 0.6459527076103624, + "grad_norm": 0.677435040473938, + "learning_rate": 0.00013174127788710856, + "loss": 2.5056, + "step": 8004 + }, + { + "epoch": 0.6460334113469454, + "grad_norm": 0.6864951848983765, + "learning_rate": 0.000131726306974283, + "loss": 2.5733, + "step": 8005 + }, + { + "epoch": 0.6461141150835283, + "grad_norm": 0.7070075869560242, + "learning_rate": 0.0001317113352707549, + "loss": 2.5359, + "step": 8006 + }, + { + "epoch": 0.6461948188201113, + "grad_norm": 0.7065049409866333, + "learning_rate": 0.00013169636277689746, + "loss": 2.6261, + "step": 8007 + }, + { + "epoch": 0.6462755225566944, + "grad_norm": 0.6691577434539795, + "learning_rate": 0.0001316813894930838, + "loss": 2.6015, + "step": 8008 + }, + { + "epoch": 0.6463562262932774, + "grad_norm": 0.6754019260406494, + "learning_rate": 0.0001316664154196871, + "loss": 2.5954, + "step": 8009 + }, + { + "epoch": 0.6464369300298604, + "grad_norm": 0.6172776818275452, + "learning_rate": 0.00013165144055708055, + "loss": 2.5599, + "step": 8010 + }, + { + "epoch": 0.6465176337664433, + "grad_norm": 0.6778094172477722, + "learning_rate": 0.00013163646490563737, + "loss": 2.5407, + "step": 8011 + }, + { + "epoch": 0.6465983375030264, + "grad_norm": 0.7363924980163574, + "learning_rate": 0.00013162148846573076, + "loss": 2.6075, + "step": 8012 + }, + { + "epoch": 0.6466790412396094, + "grad_norm": 0.6662711501121521, + "learning_rate": 0.00013160651123773404, + "loss": 2.5611, + "step": 8013 + }, + { + "epoch": 0.6467597449761924, + "grad_norm": 0.699670135974884, + "learning_rate": 0.00013159153322202043, + "loss": 2.5612, + "step": 8014 + }, + { + "epoch": 0.6468404487127754, + "grad_norm": 0.7382899522781372, + "learning_rate": 0.0001315765544189632, + "loss": 2.6017, + "step": 8015 + }, + { + "epoch": 0.6469211524493584, + "grad_norm": 0.7624868154525757, + "learning_rate": 0.0001315615748289357, + "loss": 2.6174, + "step": 8016 + }, + { + "epoch": 0.6470018561859414, + "grad_norm": 0.704622745513916, + "learning_rate": 0.00013154659445231129, + "loss": 2.5367, + "step": 8017 + }, + { + "epoch": 0.6470825599225244, + "grad_norm": 0.7117413878440857, + "learning_rate": 0.00013153161328946324, + "loss": 2.5958, + "step": 8018 + }, + { + "epoch": 0.6471632636591074, + "grad_norm": 0.6825408339500427, + "learning_rate": 0.00013151663134076497, + "loss": 2.5118, + "step": 8019 + }, + { + "epoch": 0.6472439673956905, + "grad_norm": 0.6732384562492371, + "learning_rate": 0.00013150164860658986, + "loss": 2.6312, + "step": 8020 + }, + { + "epoch": 0.6473246711322734, + "grad_norm": 0.712812602519989, + "learning_rate": 0.00013148666508731134, + "loss": 2.576, + "step": 8021 + }, + { + "epoch": 0.6474053748688564, + "grad_norm": 0.8128857612609863, + "learning_rate": 0.0001314716807833028, + "loss": 2.5333, + "step": 8022 + }, + { + "epoch": 0.6474860786054394, + "grad_norm": 0.7817162275314331, + "learning_rate": 0.00013145669569493773, + "loss": 2.6835, + "step": 8023 + }, + { + "epoch": 0.6475667823420225, + "grad_norm": 0.7164301872253418, + "learning_rate": 0.00013144170982258956, + "loss": 2.5573, + "step": 8024 + }, + { + "epoch": 0.6476474860786054, + "grad_norm": 0.67625892162323, + "learning_rate": 0.00013142672316663177, + "loss": 2.5976, + "step": 8025 + }, + { + "epoch": 0.6477281898151884, + "grad_norm": 0.6919494867324829, + "learning_rate": 0.0001314117357274379, + "loss": 2.6179, + "step": 8026 + }, + { + "epoch": 0.6478088935517714, + "grad_norm": 0.6787464618682861, + "learning_rate": 0.0001313967475053815, + "loss": 2.5405, + "step": 8027 + }, + { + "epoch": 0.6478895972883545, + "grad_norm": 0.6305621862411499, + "learning_rate": 0.00013138175850083605, + "loss": 2.6016, + "step": 8028 + }, + { + "epoch": 0.6479703010249375, + "grad_norm": 0.7456182837486267, + "learning_rate": 0.00013136676871417516, + "loss": 2.6091, + "step": 8029 + }, + { + "epoch": 0.6480510047615204, + "grad_norm": 0.7047890424728394, + "learning_rate": 0.00013135177814577238, + "loss": 2.6108, + "step": 8030 + }, + { + "epoch": 0.6481317084981034, + "grad_norm": 0.7509389519691467, + "learning_rate": 0.00013133678679600133, + "loss": 2.6396, + "step": 8031 + }, + { + "epoch": 0.6482124122346865, + "grad_norm": 0.63836270570755, + "learning_rate": 0.00013132179466523566, + "loss": 2.5759, + "step": 8032 + }, + { + "epoch": 0.6482931159712695, + "grad_norm": 0.6994885206222534, + "learning_rate": 0.000131306801753849, + "loss": 2.61, + "step": 8033 + }, + { + "epoch": 0.6483738197078525, + "grad_norm": 0.6762083768844604, + "learning_rate": 0.00013129180806221497, + "loss": 2.5431, + "step": 8034 + }, + { + "epoch": 0.6484545234444354, + "grad_norm": 0.6890944242477417, + "learning_rate": 0.0001312768135907073, + "loss": 2.5922, + "step": 8035 + }, + { + "epoch": 0.6485352271810185, + "grad_norm": 0.7409473061561584, + "learning_rate": 0.0001312618183396997, + "loss": 2.6132, + "step": 8036 + }, + { + "epoch": 0.6486159309176015, + "grad_norm": 0.6660643815994263, + "learning_rate": 0.00013124682230956585, + "loss": 2.5816, + "step": 8037 + }, + { + "epoch": 0.6486966346541845, + "grad_norm": 0.714235246181488, + "learning_rate": 0.0001312318255006795, + "loss": 2.5613, + "step": 8038 + }, + { + "epoch": 0.6487773383907675, + "grad_norm": 0.6568472385406494, + "learning_rate": 0.00013121682791341442, + "loss": 2.6382, + "step": 8039 + }, + { + "epoch": 0.6488580421273505, + "grad_norm": 0.6874251961708069, + "learning_rate": 0.00013120182954814438, + "loss": 2.593, + "step": 8040 + }, + { + "epoch": 0.6489387458639335, + "grad_norm": 0.7620158791542053, + "learning_rate": 0.0001311868304052432, + "loss": 2.589, + "step": 8041 + }, + { + "epoch": 0.6490194496005165, + "grad_norm": 0.6755926609039307, + "learning_rate": 0.00013117183048508467, + "loss": 2.5876, + "step": 8042 + }, + { + "epoch": 0.6491001533370995, + "grad_norm": 0.6952808499336243, + "learning_rate": 0.00013115682978804264, + "loss": 2.5909, + "step": 8043 + }, + { + "epoch": 0.6491808570736826, + "grad_norm": 0.6599535346031189, + "learning_rate": 0.00013114182831449098, + "loss": 2.6031, + "step": 8044 + }, + { + "epoch": 0.6492615608102655, + "grad_norm": 0.7816598415374756, + "learning_rate": 0.00013112682606480355, + "loss": 2.5633, + "step": 8045 + }, + { + "epoch": 0.6493422645468485, + "grad_norm": 0.7188639640808105, + "learning_rate": 0.00013111182303935425, + "loss": 2.6292, + "step": 8046 + }, + { + "epoch": 0.6494229682834315, + "grad_norm": 0.7131505608558655, + "learning_rate": 0.00013109681923851698, + "loss": 2.5729, + "step": 8047 + }, + { + "epoch": 0.6495036720200146, + "grad_norm": 0.7466408014297485, + "learning_rate": 0.00013108181466266568, + "loss": 2.5742, + "step": 8048 + }, + { + "epoch": 0.6495843757565976, + "grad_norm": 0.6707943677902222, + "learning_rate": 0.00013106680931217437, + "loss": 2.5506, + "step": 8049 + }, + { + "epoch": 0.6496650794931805, + "grad_norm": 0.6913424730300903, + "learning_rate": 0.0001310518031874169, + "loss": 2.5639, + "step": 8050 + }, + { + "epoch": 0.6497457832297635, + "grad_norm": 0.8261755704879761, + "learning_rate": 0.00013103679628876733, + "loss": 2.601, + "step": 8051 + }, + { + "epoch": 0.6498264869663466, + "grad_norm": 0.7410566806793213, + "learning_rate": 0.0001310217886165997, + "loss": 2.5326, + "step": 8052 + }, + { + "epoch": 0.6499071907029296, + "grad_norm": 0.7032365202903748, + "learning_rate": 0.00013100678017128798, + "loss": 2.5907, + "step": 8053 + }, + { + "epoch": 0.6499878944395125, + "grad_norm": 0.7074568271636963, + "learning_rate": 0.00013099177095320626, + "loss": 2.6193, + "step": 8054 + }, + { + "epoch": 0.6500685981760955, + "grad_norm": 0.7754546999931335, + "learning_rate": 0.00013097676096272855, + "loss": 2.5832, + "step": 8055 + }, + { + "epoch": 0.6501493019126786, + "grad_norm": 0.7475717663764954, + "learning_rate": 0.00013096175020022903, + "loss": 2.6233, + "step": 8056 + }, + { + "epoch": 0.6502300056492616, + "grad_norm": 0.7863949537277222, + "learning_rate": 0.00013094673866608173, + "loss": 2.5745, + "step": 8057 + }, + { + "epoch": 0.6503107093858446, + "grad_norm": 0.69294673204422, + "learning_rate": 0.0001309317263606608, + "loss": 2.5982, + "step": 8058 + }, + { + "epoch": 0.6503914131224275, + "grad_norm": 0.7096135020256042, + "learning_rate": 0.00013091671328434046, + "loss": 2.5944, + "step": 8059 + }, + { + "epoch": 0.6504721168590105, + "grad_norm": 0.7001097202301025, + "learning_rate": 0.00013090169943749476, + "loss": 2.5435, + "step": 8060 + }, + { + "epoch": 0.6505528205955936, + "grad_norm": 0.7522539496421814, + "learning_rate": 0.00013088668482049792, + "loss": 2.5843, + "step": 8061 + }, + { + "epoch": 0.6506335243321766, + "grad_norm": 0.6675420999526978, + "learning_rate": 0.00013087166943372418, + "loss": 2.5623, + "step": 8062 + }, + { + "epoch": 0.6507142280687596, + "grad_norm": 0.7779181599617004, + "learning_rate": 0.00013085665327754772, + "loss": 2.6087, + "step": 8063 + }, + { + "epoch": 0.6507949318053425, + "grad_norm": 0.7385239005088806, + "learning_rate": 0.00013084163635234284, + "loss": 2.5725, + "step": 8064 + }, + { + "epoch": 0.6508756355419256, + "grad_norm": 0.6966612339019775, + "learning_rate": 0.00013082661865848375, + "loss": 2.5745, + "step": 8065 + }, + { + "epoch": 0.6509563392785086, + "grad_norm": 0.7098337411880493, + "learning_rate": 0.00013081160019634468, + "loss": 2.5461, + "step": 8066 + }, + { + "epoch": 0.6510370430150916, + "grad_norm": 0.6514503359794617, + "learning_rate": 0.00013079658096630002, + "loss": 2.5869, + "step": 8067 + }, + { + "epoch": 0.6511177467516746, + "grad_norm": 0.680422306060791, + "learning_rate": 0.0001307815609687241, + "loss": 2.6316, + "step": 8068 + }, + { + "epoch": 0.6511984504882576, + "grad_norm": 0.6892665028572083, + "learning_rate": 0.00013076654020399117, + "loss": 2.5862, + "step": 8069 + }, + { + "epoch": 0.6512791542248406, + "grad_norm": 0.7605568170547485, + "learning_rate": 0.00013075151867247568, + "loss": 2.5342, + "step": 8070 + }, + { + "epoch": 0.6513598579614236, + "grad_norm": 0.7571204900741577, + "learning_rate": 0.00013073649637455192, + "loss": 2.5762, + "step": 8071 + }, + { + "epoch": 0.6514405616980066, + "grad_norm": 0.6910812258720398, + "learning_rate": 0.00013072147331059431, + "loss": 2.6635, + "step": 8072 + }, + { + "epoch": 0.6515212654345897, + "grad_norm": 0.765559196472168, + "learning_rate": 0.00013070644948097733, + "loss": 2.5885, + "step": 8073 + }, + { + "epoch": 0.6516019691711726, + "grad_norm": 0.7533665299415588, + "learning_rate": 0.00013069142488607532, + "loss": 2.6545, + "step": 8074 + }, + { + "epoch": 0.6516826729077556, + "grad_norm": 0.685089647769928, + "learning_rate": 0.0001306763995262628, + "loss": 2.5955, + "step": 8075 + }, + { + "epoch": 0.6517633766443386, + "grad_norm": 0.7280653715133667, + "learning_rate": 0.00013066137340191422, + "loss": 2.5548, + "step": 8076 + }, + { + "epoch": 0.6518440803809217, + "grad_norm": 0.6881482601165771, + "learning_rate": 0.00013064634651340404, + "loss": 2.6143, + "step": 8077 + }, + { + "epoch": 0.6519247841175047, + "grad_norm": 0.6878265142440796, + "learning_rate": 0.0001306313188611068, + "loss": 2.5681, + "step": 8078 + }, + { + "epoch": 0.6520054878540876, + "grad_norm": 0.685238242149353, + "learning_rate": 0.00013061629044539702, + "loss": 2.5517, + "step": 8079 + }, + { + "epoch": 0.6520861915906706, + "grad_norm": 0.6689820885658264, + "learning_rate": 0.00013060126126664928, + "loss": 2.6201, + "step": 8080 + }, + { + "epoch": 0.6521668953272537, + "grad_norm": 0.7128999829292297, + "learning_rate": 0.00013058623132523807, + "loss": 2.5829, + "step": 8081 + }, + { + "epoch": 0.6522475990638367, + "grad_norm": 0.6835216879844666, + "learning_rate": 0.00013057120062153805, + "loss": 2.6312, + "step": 8082 + }, + { + "epoch": 0.6523283028004196, + "grad_norm": 0.7140012383460999, + "learning_rate": 0.00013055616915592382, + "loss": 2.6148, + "step": 8083 + }, + { + "epoch": 0.6524090065370026, + "grad_norm": 0.7378252148628235, + "learning_rate": 0.00013054113692876994, + "loss": 2.5805, + "step": 8084 + }, + { + "epoch": 0.6524897102735857, + "grad_norm": 0.7569258213043213, + "learning_rate": 0.0001305261039404511, + "loss": 2.6088, + "step": 8085 + }, + { + "epoch": 0.6525704140101687, + "grad_norm": 0.6909007430076599, + "learning_rate": 0.00013051107019134195, + "loss": 2.5285, + "step": 8086 + }, + { + "epoch": 0.6526511177467517, + "grad_norm": 0.6785587072372437, + "learning_rate": 0.0001304960356818172, + "loss": 2.5527, + "step": 8087 + }, + { + "epoch": 0.6527318214833346, + "grad_norm": 0.7058801054954529, + "learning_rate": 0.0001304810004122515, + "loss": 2.6789, + "step": 8088 + }, + { + "epoch": 0.6528125252199177, + "grad_norm": 0.6920512318611145, + "learning_rate": 0.0001304659643830196, + "loss": 2.5748, + "step": 8089 + }, + { + "epoch": 0.6528932289565007, + "grad_norm": 0.6829244494438171, + "learning_rate": 0.00013045092759449625, + "loss": 2.5389, + "step": 8090 + }, + { + "epoch": 0.6529739326930837, + "grad_norm": 0.6942421793937683, + "learning_rate": 0.00013043589004705614, + "loss": 2.5851, + "step": 8091 + }, + { + "epoch": 0.6530546364296667, + "grad_norm": 0.6473072171211243, + "learning_rate": 0.0001304208517410741, + "loss": 2.56, + "step": 8092 + }, + { + "epoch": 0.6531353401662497, + "grad_norm": 0.6692056655883789, + "learning_rate": 0.00013040581267692494, + "loss": 2.5977, + "step": 8093 + }, + { + "epoch": 0.6532160439028327, + "grad_norm": 0.6918915510177612, + "learning_rate": 0.00013039077285498344, + "loss": 2.551, + "step": 8094 + }, + { + "epoch": 0.6532967476394157, + "grad_norm": 0.7432852387428284, + "learning_rate": 0.00013037573227562443, + "loss": 2.5537, + "step": 8095 + }, + { + "epoch": 0.6533774513759987, + "grad_norm": 0.6737081408500671, + "learning_rate": 0.0001303606909392228, + "loss": 2.5947, + "step": 8096 + }, + { + "epoch": 0.6534581551125818, + "grad_norm": 0.6810599565505981, + "learning_rate": 0.0001303456488461533, + "loss": 2.5704, + "step": 8097 + }, + { + "epoch": 0.6535388588491647, + "grad_norm": 0.675240159034729, + "learning_rate": 0.00013033060599679098, + "loss": 2.591, + "step": 8098 + }, + { + "epoch": 0.6536195625857477, + "grad_norm": 0.6888695359230042, + "learning_rate": 0.00013031556239151066, + "loss": 2.5403, + "step": 8099 + }, + { + "epoch": 0.6537002663223307, + "grad_norm": 0.7154796719551086, + "learning_rate": 0.00013030051803068727, + "loss": 2.5654, + "step": 8100 + }, + { + "epoch": 0.6537809700589138, + "grad_norm": 0.6655243635177612, + "learning_rate": 0.0001302854729146958, + "loss": 2.5867, + "step": 8101 + }, + { + "epoch": 0.6538616737954968, + "grad_norm": 0.7070788145065308, + "learning_rate": 0.00013027042704391115, + "loss": 2.5593, + "step": 8102 + }, + { + "epoch": 0.6539423775320797, + "grad_norm": 0.7071834206581116, + "learning_rate": 0.0001302553804187083, + "loss": 2.536, + "step": 8103 + }, + { + "epoch": 0.6540230812686627, + "grad_norm": 0.7086542248725891, + "learning_rate": 0.00013024033303946233, + "loss": 2.5644, + "step": 8104 + }, + { + "epoch": 0.6541037850052458, + "grad_norm": 0.6714556813240051, + "learning_rate": 0.00013022528490654818, + "loss": 2.5167, + "step": 8105 + }, + { + "epoch": 0.6541844887418288, + "grad_norm": 0.6905114054679871, + "learning_rate": 0.00013021023602034095, + "loss": 2.5227, + "step": 8106 + }, + { + "epoch": 0.6542651924784118, + "grad_norm": 0.7050586342811584, + "learning_rate": 0.00013019518638121563, + "loss": 2.5725, + "step": 8107 + }, + { + "epoch": 0.6543458962149947, + "grad_norm": 0.6940500736236572, + "learning_rate": 0.00013018013598954737, + "loss": 2.5912, + "step": 8108 + }, + { + "epoch": 0.6544265999515777, + "grad_norm": 0.7136965990066528, + "learning_rate": 0.00013016508484571122, + "loss": 2.6101, + "step": 8109 + }, + { + "epoch": 0.6545073036881608, + "grad_norm": 0.7205774188041687, + "learning_rate": 0.0001301500329500823, + "loss": 2.5869, + "step": 8110 + }, + { + "epoch": 0.6545880074247438, + "grad_norm": 0.6831154823303223, + "learning_rate": 0.00013013498030303575, + "loss": 2.5309, + "step": 8111 + }, + { + "epoch": 0.6546687111613267, + "grad_norm": 0.6778538823127747, + "learning_rate": 0.0001301199269049467, + "loss": 2.6297, + "step": 8112 + }, + { + "epoch": 0.6547494148979097, + "grad_norm": 0.705055832862854, + "learning_rate": 0.00013010487275619034, + "loss": 2.6188, + "step": 8113 + }, + { + "epoch": 0.6548301186344928, + "grad_norm": 0.6927980780601501, + "learning_rate": 0.00013008981785714188, + "loss": 2.5744, + "step": 8114 + }, + { + "epoch": 0.6549108223710758, + "grad_norm": 0.7070884108543396, + "learning_rate": 0.0001300747622081765, + "loss": 2.618, + "step": 8115 + }, + { + "epoch": 0.6549915261076588, + "grad_norm": 0.723479688167572, + "learning_rate": 0.0001300597058096694, + "loss": 2.5928, + "step": 8116 + }, + { + "epoch": 0.6550722298442417, + "grad_norm": 0.6689562201499939, + "learning_rate": 0.00013004464866199587, + "loss": 2.5592, + "step": 8117 + }, + { + "epoch": 0.6551529335808248, + "grad_norm": 0.6685079336166382, + "learning_rate": 0.00013002959076553115, + "loss": 2.558, + "step": 8118 + }, + { + "epoch": 0.6552336373174078, + "grad_norm": 0.678105890750885, + "learning_rate": 0.00013001453212065057, + "loss": 2.6176, + "step": 8119 + }, + { + "epoch": 0.6553143410539908, + "grad_norm": 0.7355597019195557, + "learning_rate": 0.00012999947272772933, + "loss": 2.6293, + "step": 8120 + }, + { + "epoch": 0.6553950447905738, + "grad_norm": 0.735862672328949, + "learning_rate": 0.00012998441258714284, + "loss": 2.635, + "step": 8121 + }, + { + "epoch": 0.6554757485271568, + "grad_norm": 0.6766025424003601, + "learning_rate": 0.0001299693516992664, + "loss": 2.5829, + "step": 8122 + }, + { + "epoch": 0.6555564522637398, + "grad_norm": 0.6701885461807251, + "learning_rate": 0.00012995429006447542, + "loss": 2.5996, + "step": 8123 + }, + { + "epoch": 0.6556371560003228, + "grad_norm": 0.6814082264900208, + "learning_rate": 0.00012993922768314518, + "loss": 2.5906, + "step": 8124 + }, + { + "epoch": 0.6557178597369058, + "grad_norm": 0.7104958295822144, + "learning_rate": 0.00012992416455565113, + "loss": 2.6708, + "step": 8125 + }, + { + "epoch": 0.6557985634734889, + "grad_norm": 0.6451221108436584, + "learning_rate": 0.0001299091006823687, + "loss": 2.5512, + "step": 8126 + }, + { + "epoch": 0.6558792672100718, + "grad_norm": 0.6736068725585938, + "learning_rate": 0.0001298940360636733, + "loss": 2.5839, + "step": 8127 + }, + { + "epoch": 0.6559599709466548, + "grad_norm": 0.6873149871826172, + "learning_rate": 0.00012987897069994031, + "loss": 2.5804, + "step": 8128 + }, + { + "epoch": 0.6560406746832378, + "grad_norm": 0.6937728524208069, + "learning_rate": 0.00012986390459154533, + "loss": 2.5648, + "step": 8129 + }, + { + "epoch": 0.6561213784198209, + "grad_norm": 0.7109464406967163, + "learning_rate": 0.00012984883773886377, + "loss": 2.6132, + "step": 8130 + }, + { + "epoch": 0.6562020821564039, + "grad_norm": 0.7134159803390503, + "learning_rate": 0.00012983377014227115, + "loss": 2.6029, + "step": 8131 + }, + { + "epoch": 0.6562827858929868, + "grad_norm": 0.6788110733032227, + "learning_rate": 0.000129818701802143, + "loss": 2.6344, + "step": 8132 + }, + { + "epoch": 0.6563634896295698, + "grad_norm": 0.6798231601715088, + "learning_rate": 0.00012980363271885483, + "loss": 2.5758, + "step": 8133 + }, + { + "epoch": 0.6564441933661529, + "grad_norm": 0.6586930155754089, + "learning_rate": 0.00012978856289278226, + "loss": 2.5918, + "step": 8134 + }, + { + "epoch": 0.6565248971027359, + "grad_norm": 0.6614218950271606, + "learning_rate": 0.0001297734923243008, + "loss": 2.5777, + "step": 8135 + }, + { + "epoch": 0.6566056008393188, + "grad_norm": 0.6874340176582336, + "learning_rate": 0.0001297584210137861, + "loss": 2.5528, + "step": 8136 + }, + { + "epoch": 0.6566863045759018, + "grad_norm": 0.6972174048423767, + "learning_rate": 0.00012974334896161376, + "loss": 2.6551, + "step": 8137 + }, + { + "epoch": 0.6567670083124849, + "grad_norm": 0.7414106726646423, + "learning_rate": 0.0001297282761681594, + "loss": 2.5719, + "step": 8138 + }, + { + "epoch": 0.6568477120490679, + "grad_norm": 0.6678279042243958, + "learning_rate": 0.00012971320263379868, + "loss": 2.555, + "step": 8139 + }, + { + "epoch": 0.6569284157856509, + "grad_norm": 0.692149817943573, + "learning_rate": 0.0001296981283589073, + "loss": 2.5991, + "step": 8140 + }, + { + "epoch": 0.6570091195222338, + "grad_norm": 0.6937025189399719, + "learning_rate": 0.00012968305334386094, + "loss": 2.5635, + "step": 8141 + }, + { + "epoch": 0.6570898232588169, + "grad_norm": 0.6250358819961548, + "learning_rate": 0.00012966797758903528, + "loss": 2.55, + "step": 8142 + }, + { + "epoch": 0.6571705269953999, + "grad_norm": 0.7388221025466919, + "learning_rate": 0.00012965290109480607, + "loss": 2.5307, + "step": 8143 + }, + { + "epoch": 0.6572512307319829, + "grad_norm": 0.7165891528129578, + "learning_rate": 0.00012963782386154904, + "loss": 2.5482, + "step": 8144 + }, + { + "epoch": 0.6573319344685659, + "grad_norm": 0.7605282068252563, + "learning_rate": 0.00012962274588963996, + "loss": 2.5839, + "step": 8145 + }, + { + "epoch": 0.657412638205149, + "grad_norm": 0.7259613275527954, + "learning_rate": 0.00012960766717945465, + "loss": 2.5612, + "step": 8146 + }, + { + "epoch": 0.6574933419417319, + "grad_norm": 0.7301480770111084, + "learning_rate": 0.00012959258773136885, + "loss": 2.5365, + "step": 8147 + }, + { + "epoch": 0.6575740456783149, + "grad_norm": 0.6800966262817383, + "learning_rate": 0.0001295775075457584, + "loss": 2.5663, + "step": 8148 + }, + { + "epoch": 0.6576547494148979, + "grad_norm": 0.6968960165977478, + "learning_rate": 0.0001295624266229992, + "loss": 2.5626, + "step": 8149 + }, + { + "epoch": 0.657735453151481, + "grad_norm": 0.9044952392578125, + "learning_rate": 0.00012954734496346704, + "loss": 2.6479, + "step": 8150 + }, + { + "epoch": 0.6578161568880639, + "grad_norm": 0.6955156922340393, + "learning_rate": 0.00012953226256753777, + "loss": 2.5879, + "step": 8151 + }, + { + "epoch": 0.6578968606246469, + "grad_norm": 0.6535033583641052, + "learning_rate": 0.00012951717943558735, + "loss": 2.5372, + "step": 8152 + }, + { + "epoch": 0.6579775643612299, + "grad_norm": 0.720730721950531, + "learning_rate": 0.0001295020955679916, + "loss": 2.5813, + "step": 8153 + }, + { + "epoch": 0.658058268097813, + "grad_norm": 0.7190384268760681, + "learning_rate": 0.00012948701096512655, + "loss": 2.5923, + "step": 8154 + }, + { + "epoch": 0.658138971834396, + "grad_norm": 0.6624464988708496, + "learning_rate": 0.0001294719256273681, + "loss": 2.5548, + "step": 8155 + }, + { + "epoch": 0.6582196755709789, + "grad_norm": 0.7839831709861755, + "learning_rate": 0.00012945683955509224, + "loss": 2.531, + "step": 8156 + }, + { + "epoch": 0.6583003793075619, + "grad_norm": 0.694970965385437, + "learning_rate": 0.00012944175274867497, + "loss": 2.4693, + "step": 8157 + }, + { + "epoch": 0.658381083044145, + "grad_norm": 0.7409366965293884, + "learning_rate": 0.0001294266652084922, + "loss": 2.5706, + "step": 8158 + }, + { + "epoch": 0.658461786780728, + "grad_norm": 0.7502163052558899, + "learning_rate": 0.00012941157693492002, + "loss": 2.6137, + "step": 8159 + }, + { + "epoch": 0.658542490517311, + "grad_norm": 0.6627129912376404, + "learning_rate": 0.00012939648792833447, + "loss": 2.5781, + "step": 8160 + }, + { + "epoch": 0.6586231942538939, + "grad_norm": 0.6775660514831543, + "learning_rate": 0.00012938139818911157, + "loss": 2.5441, + "step": 8161 + }, + { + "epoch": 0.6587038979904769, + "grad_norm": 0.7150553464889526, + "learning_rate": 0.00012936630771762748, + "loss": 2.5763, + "step": 8162 + }, + { + "epoch": 0.65878460172706, + "grad_norm": 0.7461466193199158, + "learning_rate": 0.0001293512165142582, + "loss": 2.54, + "step": 8163 + }, + { + "epoch": 0.658865305463643, + "grad_norm": 0.7635199427604675, + "learning_rate": 0.00012933612457937988, + "loss": 2.5763, + "step": 8164 + }, + { + "epoch": 0.658946009200226, + "grad_norm": 0.7360543608665466, + "learning_rate": 0.00012932103191336865, + "loss": 2.5968, + "step": 8165 + }, + { + "epoch": 0.6590267129368089, + "grad_norm": 0.6482167840003967, + "learning_rate": 0.0001293059385166007, + "loss": 2.5704, + "step": 8166 + }, + { + "epoch": 0.659107416673392, + "grad_norm": 0.7024737596511841, + "learning_rate": 0.00012929084438945208, + "loss": 2.6221, + "step": 8167 + }, + { + "epoch": 0.659188120409975, + "grad_norm": 0.7192068696022034, + "learning_rate": 0.0001292757495322991, + "loss": 2.5574, + "step": 8168 + }, + { + "epoch": 0.659268824146558, + "grad_norm": 0.6900508403778076, + "learning_rate": 0.0001292606539455179, + "loss": 2.5969, + "step": 8169 + }, + { + "epoch": 0.6593495278831409, + "grad_norm": 0.7522475719451904, + "learning_rate": 0.00012924555762948474, + "loss": 2.592, + "step": 8170 + }, + { + "epoch": 0.659430231619724, + "grad_norm": 0.6610947251319885, + "learning_rate": 0.00012923046058457583, + "loss": 2.5404, + "step": 8171 + }, + { + "epoch": 0.659510935356307, + "grad_norm": 0.667628288269043, + "learning_rate": 0.00012921536281116738, + "loss": 2.5551, + "step": 8172 + }, + { + "epoch": 0.65959163909289, + "grad_norm": 0.7119980454444885, + "learning_rate": 0.00012920026430963578, + "loss": 2.6002, + "step": 8173 + }, + { + "epoch": 0.659672342829473, + "grad_norm": 0.712166428565979, + "learning_rate": 0.00012918516508035724, + "loss": 2.626, + "step": 8174 + }, + { + "epoch": 0.659753046566056, + "grad_norm": 0.6993290185928345, + "learning_rate": 0.0001291700651237081, + "loss": 2.6311, + "step": 8175 + }, + { + "epoch": 0.659833750302639, + "grad_norm": 0.6889405250549316, + "learning_rate": 0.0001291549644400647, + "loss": 2.6483, + "step": 8176 + }, + { + "epoch": 0.659914454039222, + "grad_norm": 0.7120937705039978, + "learning_rate": 0.00012913986302980334, + "loss": 2.5489, + "step": 8177 + }, + { + "epoch": 0.659995157775805, + "grad_norm": 0.7112947106361389, + "learning_rate": 0.00012912476089330043, + "loss": 2.6393, + "step": 8178 + }, + { + "epoch": 0.6600758615123881, + "grad_norm": 0.710342526435852, + "learning_rate": 0.00012910965803093237, + "loss": 2.5897, + "step": 8179 + }, + { + "epoch": 0.660156565248971, + "grad_norm": 0.6506931185722351, + "learning_rate": 0.0001290945544430755, + "loss": 2.6429, + "step": 8180 + }, + { + "epoch": 0.660237268985554, + "grad_norm": 0.7147021293640137, + "learning_rate": 0.00012907945013010633, + "loss": 2.5521, + "step": 8181 + }, + { + "epoch": 0.660317972722137, + "grad_norm": 0.6802387833595276, + "learning_rate": 0.0001290643450924012, + "loss": 2.581, + "step": 8182 + }, + { + "epoch": 0.6603986764587201, + "grad_norm": 0.7599670886993408, + "learning_rate": 0.00012904923933033664, + "loss": 2.5532, + "step": 8183 + }, + { + "epoch": 0.6604793801953031, + "grad_norm": 0.7105657458305359, + "learning_rate": 0.0001290341328442891, + "loss": 2.5744, + "step": 8184 + }, + { + "epoch": 0.660560083931886, + "grad_norm": 0.6786425113677979, + "learning_rate": 0.00012901902563463506, + "loss": 2.5326, + "step": 8185 + }, + { + "epoch": 0.660640787668469, + "grad_norm": 0.7305583357810974, + "learning_rate": 0.00012900391770175106, + "loss": 2.6103, + "step": 8186 + }, + { + "epoch": 0.6607214914050521, + "grad_norm": 0.6578992605209351, + "learning_rate": 0.00012898880904601363, + "loss": 2.5833, + "step": 8187 + }, + { + "epoch": 0.6608021951416351, + "grad_norm": 0.6498856544494629, + "learning_rate": 0.00012897369966779926, + "loss": 2.6333, + "step": 8188 + }, + { + "epoch": 0.660882898878218, + "grad_norm": 0.7065569162368774, + "learning_rate": 0.00012895858956748458, + "loss": 2.5326, + "step": 8189 + }, + { + "epoch": 0.660963602614801, + "grad_norm": 0.7676446437835693, + "learning_rate": 0.00012894347874544613, + "loss": 2.6233, + "step": 8190 + }, + { + "epoch": 0.6610443063513841, + "grad_norm": 0.6794395446777344, + "learning_rate": 0.00012892836720206056, + "loss": 2.5426, + "step": 8191 + }, + { + "epoch": 0.6611250100879671, + "grad_norm": 0.7448986768722534, + "learning_rate": 0.00012891325493770444, + "loss": 2.5832, + "step": 8192 + }, + { + "epoch": 0.6612057138245501, + "grad_norm": 0.7789760231971741, + "learning_rate": 0.0001288981419527544, + "loss": 2.6393, + "step": 8193 + }, + { + "epoch": 0.661286417561133, + "grad_norm": 0.7425827980041504, + "learning_rate": 0.00012888302824758718, + "loss": 2.6159, + "step": 8194 + }, + { + "epoch": 0.6613671212977161, + "grad_norm": 0.6677481532096863, + "learning_rate": 0.00012886791382257936, + "loss": 2.5399, + "step": 8195 + }, + { + "epoch": 0.6614478250342991, + "grad_norm": 0.698397159576416, + "learning_rate": 0.0001288527986781077, + "loss": 2.5443, + "step": 8196 + }, + { + "epoch": 0.6615285287708821, + "grad_norm": 0.6862680315971375, + "learning_rate": 0.00012883768281454885, + "loss": 2.5843, + "step": 8197 + }, + { + "epoch": 0.6616092325074651, + "grad_norm": 0.7421948313713074, + "learning_rate": 0.00012882256623227955, + "loss": 2.5885, + "step": 8198 + }, + { + "epoch": 0.6616899362440481, + "grad_norm": 0.7453073859214783, + "learning_rate": 0.00012880744893167654, + "loss": 2.5821, + "step": 8199 + }, + { + "epoch": 0.6617706399806311, + "grad_norm": 0.668218195438385, + "learning_rate": 0.00012879233091311667, + "loss": 2.5941, + "step": 8200 + }, + { + "epoch": 0.6618513437172141, + "grad_norm": 0.6864587664604187, + "learning_rate": 0.00012877721217697657, + "loss": 2.5321, + "step": 8201 + }, + { + "epoch": 0.6619320474537971, + "grad_norm": 0.6521022319793701, + "learning_rate": 0.00012876209272363317, + "loss": 2.5945, + "step": 8202 + }, + { + "epoch": 0.6620127511903802, + "grad_norm": 0.7564631104469299, + "learning_rate": 0.00012874697255346325, + "loss": 2.5901, + "step": 8203 + }, + { + "epoch": 0.6620934549269631, + "grad_norm": 0.731991171836853, + "learning_rate": 0.00012873185166684356, + "loss": 2.649, + "step": 8204 + }, + { + "epoch": 0.6621741586635461, + "grad_norm": 0.6804815530776978, + "learning_rate": 0.00012871673006415108, + "loss": 2.5417, + "step": 8205 + }, + { + "epoch": 0.6622548624001291, + "grad_norm": 0.6862792372703552, + "learning_rate": 0.0001287016077457626, + "loss": 2.6118, + "step": 8206 + }, + { + "epoch": 0.6623355661367122, + "grad_norm": 0.7013735175132751, + "learning_rate": 0.00012868648471205503, + "loss": 2.6296, + "step": 8207 + }, + { + "epoch": 0.6624162698732952, + "grad_norm": 0.7284584045410156, + "learning_rate": 0.00012867136096340529, + "loss": 2.6547, + "step": 8208 + }, + { + "epoch": 0.6624969736098781, + "grad_norm": 0.714546799659729, + "learning_rate": 0.00012865623650019025, + "loss": 2.5955, + "step": 8209 + }, + { + "epoch": 0.6625776773464611, + "grad_norm": 0.7645453214645386, + "learning_rate": 0.0001286411113227869, + "loss": 2.6132, + "step": 8210 + }, + { + "epoch": 0.6626583810830441, + "grad_norm": 0.6615093946456909, + "learning_rate": 0.0001286259854315722, + "loss": 2.5701, + "step": 8211 + }, + { + "epoch": 0.6627390848196272, + "grad_norm": 0.6565523147583008, + "learning_rate": 0.0001286108588269231, + "loss": 2.57, + "step": 8212 + }, + { + "epoch": 0.6628197885562102, + "grad_norm": 0.7173478007316589, + "learning_rate": 0.00012859573150921666, + "loss": 2.589, + "step": 8213 + }, + { + "epoch": 0.6629004922927931, + "grad_norm": 0.7069580554962158, + "learning_rate": 0.00012858060347882975, + "loss": 2.6146, + "step": 8214 + }, + { + "epoch": 0.6629811960293761, + "grad_norm": 0.7004678249359131, + "learning_rate": 0.00012856547473613953, + "loss": 2.5735, + "step": 8215 + }, + { + "epoch": 0.6630618997659592, + "grad_norm": 0.6589130163192749, + "learning_rate": 0.00012855034528152305, + "loss": 2.5731, + "step": 8216 + }, + { + "epoch": 0.6631426035025422, + "grad_norm": 0.7223117351531982, + "learning_rate": 0.0001285352151153573, + "loss": 2.5262, + "step": 8217 + }, + { + "epoch": 0.6632233072391251, + "grad_norm": 0.7045131325721741, + "learning_rate": 0.0001285200842380194, + "loss": 2.5789, + "step": 8218 + }, + { + "epoch": 0.6633040109757081, + "grad_norm": 0.7002174854278564, + "learning_rate": 0.00012850495264988645, + "loss": 2.6386, + "step": 8219 + }, + { + "epoch": 0.6633847147122912, + "grad_norm": 0.6844584941864014, + "learning_rate": 0.00012848982035133555, + "loss": 2.5394, + "step": 8220 + }, + { + "epoch": 0.6634654184488742, + "grad_norm": 0.7154871821403503, + "learning_rate": 0.00012847468734274387, + "loss": 2.5927, + "step": 8221 + }, + { + "epoch": 0.6635461221854572, + "grad_norm": 0.6856776475906372, + "learning_rate": 0.00012845955362448855, + "loss": 2.5694, + "step": 8222 + }, + { + "epoch": 0.6636268259220401, + "grad_norm": 0.7069089412689209, + "learning_rate": 0.00012844441919694676, + "loss": 2.5856, + "step": 8223 + }, + { + "epoch": 0.6637075296586232, + "grad_norm": 0.7084143161773682, + "learning_rate": 0.00012842928406049567, + "loss": 2.6301, + "step": 8224 + }, + { + "epoch": 0.6637882333952062, + "grad_norm": 0.6790862679481506, + "learning_rate": 0.00012841414821551252, + "loss": 2.5586, + "step": 8225 + }, + { + "epoch": 0.6638689371317892, + "grad_norm": 0.6537249684333801, + "learning_rate": 0.00012839901166237453, + "loss": 2.5652, + "step": 8226 + }, + { + "epoch": 0.6639496408683722, + "grad_norm": 0.6670125126838684, + "learning_rate": 0.00012838387440145893, + "loss": 2.5438, + "step": 8227 + }, + { + "epoch": 0.6640303446049552, + "grad_norm": 0.7202955484390259, + "learning_rate": 0.00012836873643314297, + "loss": 2.5632, + "step": 8228 + }, + { + "epoch": 0.6641110483415382, + "grad_norm": 0.6844765543937683, + "learning_rate": 0.00012835359775780394, + "loss": 2.5595, + "step": 8229 + }, + { + "epoch": 0.6641917520781212, + "grad_norm": 0.6557698249816895, + "learning_rate": 0.00012833845837581916, + "loss": 2.5998, + "step": 8230 + }, + { + "epoch": 0.6642724558147042, + "grad_norm": 0.6741784811019897, + "learning_rate": 0.0001283233182875659, + "loss": 2.5591, + "step": 8231 + }, + { + "epoch": 0.6643531595512873, + "grad_norm": 0.6926484704017639, + "learning_rate": 0.00012830817749342154, + "loss": 2.5557, + "step": 8232 + }, + { + "epoch": 0.6644338632878702, + "grad_norm": 0.6866984367370605, + "learning_rate": 0.00012829303599376336, + "loss": 2.5646, + "step": 8233 + }, + { + "epoch": 0.6645145670244532, + "grad_norm": 0.6772707104682922, + "learning_rate": 0.0001282778937889688, + "loss": 2.6028, + "step": 8234 + }, + { + "epoch": 0.6645952707610362, + "grad_norm": 0.693236768245697, + "learning_rate": 0.00012826275087941518, + "loss": 2.611, + "step": 8235 + }, + { + "epoch": 0.6646759744976193, + "grad_norm": 0.7181996703147888, + "learning_rate": 0.00012824760726547993, + "loss": 2.6081, + "step": 8236 + }, + { + "epoch": 0.6647566782342023, + "grad_norm": 0.6845484375953674, + "learning_rate": 0.00012823246294754048, + "loss": 2.5544, + "step": 8237 + }, + { + "epoch": 0.6648373819707852, + "grad_norm": 0.7106444239616394, + "learning_rate": 0.00012821731792597425, + "loss": 2.552, + "step": 8238 + }, + { + "epoch": 0.6649180857073682, + "grad_norm": 0.6930601000785828, + "learning_rate": 0.0001282021722011587, + "loss": 2.5401, + "step": 8239 + }, + { + "epoch": 0.6649987894439513, + "grad_norm": 0.6658228039741516, + "learning_rate": 0.00012818702577347129, + "loss": 2.6287, + "step": 8240 + }, + { + "epoch": 0.6650794931805343, + "grad_norm": 0.6919803619384766, + "learning_rate": 0.0001281718786432895, + "loss": 2.6142, + "step": 8241 + }, + { + "epoch": 0.6651601969171173, + "grad_norm": 0.6675698757171631, + "learning_rate": 0.00012815673081099086, + "loss": 2.5325, + "step": 8242 + }, + { + "epoch": 0.6652409006537002, + "grad_norm": 0.6669798493385315, + "learning_rate": 0.0001281415822769529, + "loss": 2.5355, + "step": 8243 + }, + { + "epoch": 0.6653216043902833, + "grad_norm": 0.6449857950210571, + "learning_rate": 0.00012812643304155316, + "loss": 2.5968, + "step": 8244 + }, + { + "epoch": 0.6654023081268663, + "grad_norm": 0.6972789168357849, + "learning_rate": 0.00012811128310516914, + "loss": 2.6133, + "step": 8245 + }, + { + "epoch": 0.6654830118634493, + "grad_norm": 0.7179878354072571, + "learning_rate": 0.0001280961324681785, + "loss": 2.5793, + "step": 8246 + }, + { + "epoch": 0.6655637156000322, + "grad_norm": 0.6736378073692322, + "learning_rate": 0.0001280809811309588, + "loss": 2.5543, + "step": 8247 + }, + { + "epoch": 0.6656444193366153, + "grad_norm": 0.7376420497894287, + "learning_rate": 0.00012806582909388763, + "loss": 2.5501, + "step": 8248 + }, + { + "epoch": 0.6657251230731983, + "grad_norm": 0.7163094878196716, + "learning_rate": 0.00012805067635734263, + "loss": 2.5538, + "step": 8249 + }, + { + "epoch": 0.6658058268097813, + "grad_norm": 0.7699353694915771, + "learning_rate": 0.00012803552292170144, + "loss": 2.5925, + "step": 8250 + }, + { + "epoch": 0.6658865305463643, + "grad_norm": 0.6504995822906494, + "learning_rate": 0.00012802036878734177, + "loss": 2.5944, + "step": 8251 + }, + { + "epoch": 0.6659672342829474, + "grad_norm": 0.7150379419326782, + "learning_rate": 0.0001280052139546412, + "loss": 2.5959, + "step": 8252 + }, + { + "epoch": 0.6660479380195303, + "grad_norm": 0.7562555074691772, + "learning_rate": 0.00012799005842397757, + "loss": 2.6041, + "step": 8253 + }, + { + "epoch": 0.6661286417561133, + "grad_norm": 0.7242838740348816, + "learning_rate": 0.00012797490219572846, + "loss": 2.6152, + "step": 8254 + }, + { + "epoch": 0.6662093454926963, + "grad_norm": 0.7062848210334778, + "learning_rate": 0.00012795974527027168, + "loss": 2.596, + "step": 8255 + }, + { + "epoch": 0.6662900492292794, + "grad_norm": 0.8179726004600525, + "learning_rate": 0.00012794458764798497, + "loss": 2.5792, + "step": 8256 + }, + { + "epoch": 0.6663707529658623, + "grad_norm": 0.692166268825531, + "learning_rate": 0.00012792942932924608, + "loss": 2.6025, + "step": 8257 + }, + { + "epoch": 0.6664514567024453, + "grad_norm": 0.6540334224700928, + "learning_rate": 0.0001279142703144328, + "loss": 2.5119, + "step": 8258 + }, + { + "epoch": 0.6665321604390283, + "grad_norm": 0.7087461352348328, + "learning_rate": 0.00012789911060392294, + "loss": 2.5808, + "step": 8259 + }, + { + "epoch": 0.6666128641756114, + "grad_norm": 0.6897622346878052, + "learning_rate": 0.0001278839501980943, + "loss": 2.5811, + "step": 8260 + }, + { + "epoch": 0.6666935679121944, + "grad_norm": 0.6653634905815125, + "learning_rate": 0.00012786878909732473, + "loss": 2.5498, + "step": 8261 + }, + { + "epoch": 0.6667742716487773, + "grad_norm": 0.6541483402252197, + "learning_rate": 0.0001278536273019921, + "loss": 2.605, + "step": 8262 + }, + { + "epoch": 0.6668549753853603, + "grad_norm": 0.6748146414756775, + "learning_rate": 0.00012783846481247428, + "loss": 2.5571, + "step": 8263 + }, + { + "epoch": 0.6669356791219433, + "grad_norm": 0.7258282899856567, + "learning_rate": 0.00012782330162914915, + "loss": 2.5562, + "step": 8264 + }, + { + "epoch": 0.6670163828585264, + "grad_norm": 0.6963080167770386, + "learning_rate": 0.00012780813775239457, + "loss": 2.6467, + "step": 8265 + }, + { + "epoch": 0.6670970865951094, + "grad_norm": 0.6627718806266785, + "learning_rate": 0.00012779297318258855, + "loss": 2.5369, + "step": 8266 + }, + { + "epoch": 0.6671777903316923, + "grad_norm": 0.7026168704032898, + "learning_rate": 0.00012777780792010897, + "loss": 2.5639, + "step": 8267 + }, + { + "epoch": 0.6672584940682753, + "grad_norm": 0.6969077587127686, + "learning_rate": 0.0001277626419653338, + "loss": 2.517, + "step": 8268 + }, + { + "epoch": 0.6673391978048584, + "grad_norm": 0.6918485760688782, + "learning_rate": 0.00012774747531864102, + "loss": 2.6388, + "step": 8269 + }, + { + "epoch": 0.6674199015414414, + "grad_norm": 0.6661256551742554, + "learning_rate": 0.00012773230798040862, + "loss": 2.5477, + "step": 8270 + }, + { + "epoch": 0.6675006052780244, + "grad_norm": 0.6778402328491211, + "learning_rate": 0.0001277171399510146, + "loss": 2.6032, + "step": 8271 + }, + { + "epoch": 0.6675813090146073, + "grad_norm": 0.6464864611625671, + "learning_rate": 0.00012770197123083702, + "loss": 2.5396, + "step": 8272 + }, + { + "epoch": 0.6676620127511904, + "grad_norm": 0.7154508233070374, + "learning_rate": 0.0001276868018202539, + "loss": 2.6163, + "step": 8273 + }, + { + "epoch": 0.6677427164877734, + "grad_norm": 0.6849631071090698, + "learning_rate": 0.0001276716317196433, + "loss": 2.549, + "step": 8274 + }, + { + "epoch": 0.6678234202243564, + "grad_norm": 0.6696017980575562, + "learning_rate": 0.00012765646092938334, + "loss": 2.5046, + "step": 8275 + }, + { + "epoch": 0.6679041239609393, + "grad_norm": 0.668153703212738, + "learning_rate": 0.00012764128944985203, + "loss": 2.5422, + "step": 8276 + }, + { + "epoch": 0.6679848276975224, + "grad_norm": 0.6600282192230225, + "learning_rate": 0.00012762611728142756, + "loss": 2.6117, + "step": 8277 + }, + { + "epoch": 0.6680655314341054, + "grad_norm": 0.6691608428955078, + "learning_rate": 0.000127610944424488, + "loss": 2.5761, + "step": 8278 + }, + { + "epoch": 0.6681462351706884, + "grad_norm": 0.695142924785614, + "learning_rate": 0.00012759577087941156, + "loss": 2.6123, + "step": 8279 + }, + { + "epoch": 0.6682269389072714, + "grad_norm": 0.6846559643745422, + "learning_rate": 0.00012758059664657635, + "loss": 2.5882, + "step": 8280 + }, + { + "epoch": 0.6683076426438544, + "grad_norm": 0.7616459131240845, + "learning_rate": 0.0001275654217263606, + "loss": 2.5559, + "step": 8281 + }, + { + "epoch": 0.6683883463804374, + "grad_norm": 0.6995570063591003, + "learning_rate": 0.00012755024611914246, + "loss": 2.5336, + "step": 8282 + }, + { + "epoch": 0.6684690501170204, + "grad_norm": 0.7199691534042358, + "learning_rate": 0.0001275350698253002, + "loss": 2.6618, + "step": 8283 + }, + { + "epoch": 0.6685497538536034, + "grad_norm": 0.6938748955726624, + "learning_rate": 0.000127519892845212, + "loss": 2.574, + "step": 8284 + }, + { + "epoch": 0.6686304575901865, + "grad_norm": 0.6827714443206787, + "learning_rate": 0.00012750471517925614, + "loss": 2.5647, + "step": 8285 + }, + { + "epoch": 0.6687111613267694, + "grad_norm": 0.6684606671333313, + "learning_rate": 0.00012748953682781083, + "loss": 2.528, + "step": 8286 + }, + { + "epoch": 0.6687918650633524, + "grad_norm": 0.6842156052589417, + "learning_rate": 0.00012747435779125448, + "loss": 2.5521, + "step": 8287 + }, + { + "epoch": 0.6688725687999354, + "grad_norm": 0.7440506219863892, + "learning_rate": 0.0001274591780699653, + "loss": 2.5646, + "step": 8288 + }, + { + "epoch": 0.6689532725365185, + "grad_norm": 0.769922137260437, + "learning_rate": 0.0001274439976643216, + "loss": 2.6104, + "step": 8289 + }, + { + "epoch": 0.6690339762731015, + "grad_norm": 0.7793089747428894, + "learning_rate": 0.00012742881657470175, + "loss": 2.6348, + "step": 8290 + }, + { + "epoch": 0.6691146800096844, + "grad_norm": 0.695060133934021, + "learning_rate": 0.0001274136348014841, + "loss": 2.5797, + "step": 8291 + }, + { + "epoch": 0.6691953837462674, + "grad_norm": 0.7089917659759521, + "learning_rate": 0.00012739845234504697, + "loss": 2.5431, + "step": 8292 + }, + { + "epoch": 0.6692760874828505, + "grad_norm": 0.7542717456817627, + "learning_rate": 0.00012738326920576885, + "loss": 2.6172, + "step": 8293 + }, + { + "epoch": 0.6693567912194335, + "grad_norm": 0.6947969794273376, + "learning_rate": 0.00012736808538402802, + "loss": 2.6026, + "step": 8294 + }, + { + "epoch": 0.6694374949560165, + "grad_norm": 0.6696321368217468, + "learning_rate": 0.00012735290088020302, + "loss": 2.5592, + "step": 8295 + }, + { + "epoch": 0.6695181986925994, + "grad_norm": 0.7001518607139587, + "learning_rate": 0.0001273377156946722, + "loss": 2.5994, + "step": 8296 + }, + { + "epoch": 0.6695989024291825, + "grad_norm": 0.6708101630210876, + "learning_rate": 0.000127322529827814, + "loss": 2.6392, + "step": 8297 + }, + { + "epoch": 0.6696796061657655, + "grad_norm": 0.6282601952552795, + "learning_rate": 0.000127307343280007, + "loss": 2.5762, + "step": 8298 + }, + { + "epoch": 0.6697603099023485, + "grad_norm": 0.6879595518112183, + "learning_rate": 0.0001272921560516296, + "loss": 2.5507, + "step": 8299 + }, + { + "epoch": 0.6698410136389314, + "grad_norm": 0.6108266115188599, + "learning_rate": 0.00012727696814306033, + "loss": 2.5865, + "step": 8300 + }, + { + "epoch": 0.6699217173755145, + "grad_norm": 0.6763970851898193, + "learning_rate": 0.0001272617795546777, + "loss": 2.6439, + "step": 8301 + }, + { + "epoch": 0.6700024211120975, + "grad_norm": 0.6997560858726501, + "learning_rate": 0.00012724659028686027, + "loss": 2.5291, + "step": 8302 + }, + { + "epoch": 0.6700831248486805, + "grad_norm": 0.675714910030365, + "learning_rate": 0.0001272314003399866, + "loss": 2.5452, + "step": 8303 + }, + { + "epoch": 0.6701638285852635, + "grad_norm": 0.6847789883613586, + "learning_rate": 0.00012721620971443525, + "loss": 2.6111, + "step": 8304 + }, + { + "epoch": 0.6702445323218466, + "grad_norm": 0.7283920645713806, + "learning_rate": 0.0001272010184105848, + "loss": 2.6322, + "step": 8305 + }, + { + "epoch": 0.6703252360584295, + "grad_norm": 0.7551796436309814, + "learning_rate": 0.00012718582642881382, + "loss": 2.5728, + "step": 8306 + }, + { + "epoch": 0.6704059397950125, + "grad_norm": 0.694526195526123, + "learning_rate": 0.00012717063376950104, + "loss": 2.6241, + "step": 8307 + }, + { + "epoch": 0.6704866435315955, + "grad_norm": 0.6956443190574646, + "learning_rate": 0.00012715544043302504, + "loss": 2.5531, + "step": 8308 + }, + { + "epoch": 0.6705673472681786, + "grad_norm": 0.7649452686309814, + "learning_rate": 0.00012714024641976446, + "loss": 2.5462, + "step": 8309 + }, + { + "epoch": 0.6706480510047615, + "grad_norm": 0.7711065411567688, + "learning_rate": 0.00012712505173009797, + "loss": 2.5878, + "step": 8310 + }, + { + "epoch": 0.6707287547413445, + "grad_norm": 0.68077552318573, + "learning_rate": 0.00012710985636440434, + "loss": 2.5668, + "step": 8311 + }, + { + "epoch": 0.6708094584779275, + "grad_norm": 0.7181024551391602, + "learning_rate": 0.0001270946603230622, + "loss": 2.6104, + "step": 8312 + }, + { + "epoch": 0.6708901622145105, + "grad_norm": 0.7136553525924683, + "learning_rate": 0.0001270794636064503, + "loss": 2.5282, + "step": 8313 + }, + { + "epoch": 0.6709708659510936, + "grad_norm": 0.880094587802887, + "learning_rate": 0.00012706426621494736, + "loss": 2.5837, + "step": 8314 + }, + { + "epoch": 0.6710515696876765, + "grad_norm": 0.7438541054725647, + "learning_rate": 0.00012704906814893217, + "loss": 2.5577, + "step": 8315 + }, + { + "epoch": 0.6711322734242595, + "grad_norm": 0.8197470903396606, + "learning_rate": 0.00012703386940878352, + "loss": 2.569, + "step": 8316 + }, + { + "epoch": 0.6712129771608425, + "grad_norm": 0.7728317975997925, + "learning_rate": 0.00012701866999488014, + "loss": 2.6407, + "step": 8317 + }, + { + "epoch": 0.6712936808974256, + "grad_norm": 0.7594823837280273, + "learning_rate": 0.0001270034699076009, + "loss": 2.5789, + "step": 8318 + }, + { + "epoch": 0.6713743846340086, + "grad_norm": 0.7502284646034241, + "learning_rate": 0.0001269882691473246, + "loss": 2.6068, + "step": 8319 + }, + { + "epoch": 0.6714550883705915, + "grad_norm": 0.7355664372444153, + "learning_rate": 0.0001269730677144301, + "loss": 2.6055, + "step": 8320 + }, + { + "epoch": 0.6715357921071745, + "grad_norm": 0.7218407392501831, + "learning_rate": 0.0001269578656092962, + "loss": 2.5953, + "step": 8321 + }, + { + "epoch": 0.6716164958437576, + "grad_norm": 0.6932538747787476, + "learning_rate": 0.00012694266283230185, + "loss": 2.5795, + "step": 8322 + }, + { + "epoch": 0.6716971995803406, + "grad_norm": 0.7337260246276855, + "learning_rate": 0.00012692745938382591, + "loss": 2.5606, + "step": 8323 + }, + { + "epoch": 0.6717779033169236, + "grad_norm": 0.6959026455879211, + "learning_rate": 0.00012691225526424731, + "loss": 2.5688, + "step": 8324 + }, + { + "epoch": 0.6718586070535065, + "grad_norm": 0.7352995872497559, + "learning_rate": 0.00012689705047394493, + "loss": 2.6308, + "step": 8325 + }, + { + "epoch": 0.6719393107900896, + "grad_norm": 0.7023616433143616, + "learning_rate": 0.00012688184501329777, + "loss": 2.6462, + "step": 8326 + }, + { + "epoch": 0.6720200145266726, + "grad_norm": 0.6581354737281799, + "learning_rate": 0.00012686663888268474, + "loss": 2.5997, + "step": 8327 + }, + { + "epoch": 0.6721007182632556, + "grad_norm": 0.6332606077194214, + "learning_rate": 0.00012685143208248484, + "loss": 2.6348, + "step": 8328 + }, + { + "epoch": 0.6721814219998385, + "grad_norm": 0.6826457977294922, + "learning_rate": 0.00012683622461307707, + "loss": 2.5092, + "step": 8329 + }, + { + "epoch": 0.6722621257364216, + "grad_norm": 0.7641614079475403, + "learning_rate": 0.00012682101647484042, + "loss": 2.7098, + "step": 8330 + }, + { + "epoch": 0.6723428294730046, + "grad_norm": 0.7153630256652832, + "learning_rate": 0.00012680580766815394, + "loss": 2.5647, + "step": 8331 + }, + { + "epoch": 0.6724235332095876, + "grad_norm": 0.6746379137039185, + "learning_rate": 0.00012679059819339664, + "loss": 2.6187, + "step": 8332 + }, + { + "epoch": 0.6725042369461706, + "grad_norm": 0.6748883128166199, + "learning_rate": 0.00012677538805094764, + "loss": 2.6045, + "step": 8333 + }, + { + "epoch": 0.6725849406827537, + "grad_norm": 0.7366370558738708, + "learning_rate": 0.00012676017724118596, + "loss": 2.5789, + "step": 8334 + }, + { + "epoch": 0.6726656444193366, + "grad_norm": 0.7381749153137207, + "learning_rate": 0.00012674496576449074, + "loss": 2.5958, + "step": 8335 + }, + { + "epoch": 0.6727463481559196, + "grad_norm": 0.7109243869781494, + "learning_rate": 0.00012672975362124103, + "loss": 2.5874, + "step": 8336 + }, + { + "epoch": 0.6728270518925026, + "grad_norm": 0.6904270052909851, + "learning_rate": 0.00012671454081181595, + "loss": 2.5891, + "step": 8337 + }, + { + "epoch": 0.6729077556290857, + "grad_norm": 0.6809365749359131, + "learning_rate": 0.00012669932733659476, + "loss": 2.5904, + "step": 8338 + }, + { + "epoch": 0.6729884593656686, + "grad_norm": 0.7527552843093872, + "learning_rate": 0.00012668411319595647, + "loss": 2.5602, + "step": 8339 + }, + { + "epoch": 0.6730691631022516, + "grad_norm": 0.6746577620506287, + "learning_rate": 0.00012666889839028038, + "loss": 2.5468, + "step": 8340 + }, + { + "epoch": 0.6731498668388346, + "grad_norm": 0.6904895305633545, + "learning_rate": 0.00012665368291994562, + "loss": 2.623, + "step": 8341 + }, + { + "epoch": 0.6732305705754177, + "grad_norm": 0.6495908498764038, + "learning_rate": 0.00012663846678533135, + "loss": 2.5843, + "step": 8342 + }, + { + "epoch": 0.6733112743120007, + "grad_norm": 0.6782342195510864, + "learning_rate": 0.00012662324998681692, + "loss": 2.6141, + "step": 8343 + }, + { + "epoch": 0.6733919780485836, + "grad_norm": 0.7090504765510559, + "learning_rate": 0.0001266080325247815, + "loss": 2.6654, + "step": 8344 + }, + { + "epoch": 0.6734726817851666, + "grad_norm": 0.7085515856742859, + "learning_rate": 0.00012659281439960434, + "loss": 2.5394, + "step": 8345 + }, + { + "epoch": 0.6735533855217497, + "grad_norm": 0.6813806295394897, + "learning_rate": 0.00012657759561166473, + "loss": 2.6522, + "step": 8346 + }, + { + "epoch": 0.6736340892583327, + "grad_norm": 0.726378858089447, + "learning_rate": 0.00012656237616134197, + "loss": 2.5922, + "step": 8347 + }, + { + "epoch": 0.6737147929949157, + "grad_norm": 0.6323714256286621, + "learning_rate": 0.00012654715604901534, + "loss": 2.4938, + "step": 8348 + }, + { + "epoch": 0.6737954967314986, + "grad_norm": 0.6925889253616333, + "learning_rate": 0.0001265319352750642, + "loss": 2.635, + "step": 8349 + }, + { + "epoch": 0.6738762004680817, + "grad_norm": 0.6676003932952881, + "learning_rate": 0.00012651671383986788, + "loss": 2.558, + "step": 8350 + }, + { + "epoch": 0.6739569042046647, + "grad_norm": 0.7464616298675537, + "learning_rate": 0.00012650149174380575, + "loss": 2.5777, + "step": 8351 + }, + { + "epoch": 0.6740376079412477, + "grad_norm": 0.6611667275428772, + "learning_rate": 0.00012648626898725715, + "loss": 2.5779, + "step": 8352 + }, + { + "epoch": 0.6741183116778307, + "grad_norm": 0.7391866445541382, + "learning_rate": 0.00012647104557060148, + "loss": 2.5624, + "step": 8353 + }, + { + "epoch": 0.6741990154144137, + "grad_norm": 0.7107826471328735, + "learning_rate": 0.00012645582149421817, + "loss": 2.5744, + "step": 8354 + }, + { + "epoch": 0.6742797191509967, + "grad_norm": 0.7385339736938477, + "learning_rate": 0.00012644059675848666, + "loss": 2.5752, + "step": 8355 + }, + { + "epoch": 0.6743604228875797, + "grad_norm": 0.6887345314025879, + "learning_rate": 0.00012642537136378634, + "loss": 2.5794, + "step": 8356 + }, + { + "epoch": 0.6744411266241627, + "grad_norm": 0.6934933662414551, + "learning_rate": 0.00012641014531049666, + "loss": 2.5361, + "step": 8357 + }, + { + "epoch": 0.6745218303607458, + "grad_norm": 0.7437291741371155, + "learning_rate": 0.00012639491859899716, + "loss": 2.5741, + "step": 8358 + }, + { + "epoch": 0.6746025340973287, + "grad_norm": 0.7088494896888733, + "learning_rate": 0.00012637969122966729, + "loss": 2.6449, + "step": 8359 + }, + { + "epoch": 0.6746832378339117, + "grad_norm": 0.7496390342712402, + "learning_rate": 0.00012636446320288654, + "loss": 2.6109, + "step": 8360 + }, + { + "epoch": 0.6747639415704947, + "grad_norm": 0.6949843764305115, + "learning_rate": 0.00012634923451903447, + "loss": 2.5769, + "step": 8361 + }, + { + "epoch": 0.6748446453070778, + "grad_norm": 0.7192673087120056, + "learning_rate": 0.00012633400517849056, + "loss": 2.6053, + "step": 8362 + }, + { + "epoch": 0.6749253490436607, + "grad_norm": 0.7003379464149475, + "learning_rate": 0.00012631877518163442, + "loss": 2.5745, + "step": 8363 + }, + { + "epoch": 0.6750060527802437, + "grad_norm": 0.7499879002571106, + "learning_rate": 0.00012630354452884563, + "loss": 2.6077, + "step": 8364 + }, + { + "epoch": 0.6750867565168267, + "grad_norm": 0.7047405242919922, + "learning_rate": 0.00012628831322050377, + "loss": 2.5955, + "step": 8365 + }, + { + "epoch": 0.6751674602534097, + "grad_norm": 0.7463203072547913, + "learning_rate": 0.00012627308125698838, + "loss": 2.5421, + "step": 8366 + }, + { + "epoch": 0.6752481639899928, + "grad_norm": 0.7377086877822876, + "learning_rate": 0.00012625784863867914, + "loss": 2.5804, + "step": 8367 + }, + { + "epoch": 0.6753288677265757, + "grad_norm": 0.7136400938034058, + "learning_rate": 0.00012624261536595566, + "loss": 2.5673, + "step": 8368 + }, + { + "epoch": 0.6754095714631587, + "grad_norm": 0.6923615336418152, + "learning_rate": 0.0001262273814391976, + "loss": 2.5832, + "step": 8369 + }, + { + "epoch": 0.6754902751997417, + "grad_norm": 0.7495028972625732, + "learning_rate": 0.00012621214685878469, + "loss": 2.5943, + "step": 8370 + }, + { + "epoch": 0.6755709789363248, + "grad_norm": 0.6751434206962585, + "learning_rate": 0.0001261969116250965, + "loss": 2.5495, + "step": 8371 + }, + { + "epoch": 0.6756516826729078, + "grad_norm": 0.7055973410606384, + "learning_rate": 0.00012618167573851284, + "loss": 2.5651, + "step": 8372 + }, + { + "epoch": 0.6757323864094907, + "grad_norm": 0.7479640245437622, + "learning_rate": 0.00012616643919941337, + "loss": 2.653, + "step": 8373 + }, + { + "epoch": 0.6758130901460737, + "grad_norm": 0.7075015902519226, + "learning_rate": 0.00012615120200817778, + "loss": 2.5787, + "step": 8374 + }, + { + "epoch": 0.6758937938826568, + "grad_norm": 0.7513934969902039, + "learning_rate": 0.00012613596416518593, + "loss": 2.6099, + "step": 8375 + }, + { + "epoch": 0.6759744976192398, + "grad_norm": 0.6742326021194458, + "learning_rate": 0.00012612072567081754, + "loss": 2.5335, + "step": 8376 + }, + { + "epoch": 0.6760552013558228, + "grad_norm": 0.7271459698677063, + "learning_rate": 0.00012610548652545239, + "loss": 2.6082, + "step": 8377 + }, + { + "epoch": 0.6761359050924057, + "grad_norm": 0.7481515407562256, + "learning_rate": 0.00012609024672947022, + "loss": 2.5805, + "step": 8378 + }, + { + "epoch": 0.6762166088289888, + "grad_norm": 0.7484803199768066, + "learning_rate": 0.00012607500628325093, + "loss": 2.6099, + "step": 8379 + }, + { + "epoch": 0.6762973125655718, + "grad_norm": 0.7462390661239624, + "learning_rate": 0.00012605976518717435, + "loss": 2.6054, + "step": 8380 + }, + { + "epoch": 0.6763780163021548, + "grad_norm": 0.7014410495758057, + "learning_rate": 0.00012604452344162028, + "loss": 2.5614, + "step": 8381 + }, + { + "epoch": 0.6764587200387377, + "grad_norm": 0.6902963519096375, + "learning_rate": 0.0001260292810469686, + "loss": 2.5813, + "step": 8382 + }, + { + "epoch": 0.6765394237753208, + "grad_norm": 0.6646186113357544, + "learning_rate": 0.00012601403800359919, + "loss": 2.545, + "step": 8383 + }, + { + "epoch": 0.6766201275119038, + "grad_norm": 0.7067462801933289, + "learning_rate": 0.00012599879431189197, + "loss": 2.6195, + "step": 8384 + }, + { + "epoch": 0.6767008312484868, + "grad_norm": 0.7263965010643005, + "learning_rate": 0.0001259835499722268, + "loss": 2.5929, + "step": 8385 + }, + { + "epoch": 0.6767815349850698, + "grad_norm": 0.6672000885009766, + "learning_rate": 0.0001259683049849837, + "loss": 2.5561, + "step": 8386 + }, + { + "epoch": 0.6768622387216529, + "grad_norm": 0.6543236374855042, + "learning_rate": 0.0001259530593505425, + "loss": 2.6256, + "step": 8387 + }, + { + "epoch": 0.6769429424582358, + "grad_norm": 0.6532339453697205, + "learning_rate": 0.00012593781306928324, + "loss": 2.5074, + "step": 8388 + }, + { + "epoch": 0.6770236461948188, + "grad_norm": 0.7442833185195923, + "learning_rate": 0.00012592256614158591, + "loss": 2.6124, + "step": 8389 + }, + { + "epoch": 0.6771043499314018, + "grad_norm": 0.786685585975647, + "learning_rate": 0.00012590731856783043, + "loss": 2.6077, + "step": 8390 + }, + { + "epoch": 0.6771850536679849, + "grad_norm": 0.7952337265014648, + "learning_rate": 0.00012589207034839687, + "loss": 2.5894, + "step": 8391 + }, + { + "epoch": 0.6772657574045678, + "grad_norm": 0.7847954034805298, + "learning_rate": 0.00012587682148366524, + "loss": 2.4934, + "step": 8392 + }, + { + "epoch": 0.6773464611411508, + "grad_norm": 0.6769007444381714, + "learning_rate": 0.00012586157197401552, + "loss": 2.5695, + "step": 8393 + }, + { + "epoch": 0.6774271648777338, + "grad_norm": 0.6583757996559143, + "learning_rate": 0.00012584632181982788, + "loss": 2.5866, + "step": 8394 + }, + { + "epoch": 0.6775078686143169, + "grad_norm": 0.7375823855400085, + "learning_rate": 0.0001258310710214823, + "loss": 2.5141, + "step": 8395 + }, + { + "epoch": 0.6775885723508999, + "grad_norm": 0.6901078224182129, + "learning_rate": 0.00012581581957935896, + "loss": 2.5732, + "step": 8396 + }, + { + "epoch": 0.6776692760874828, + "grad_norm": 0.687152624130249, + "learning_rate": 0.0001258005674938379, + "loss": 2.5916, + "step": 8397 + }, + { + "epoch": 0.6777499798240658, + "grad_norm": 0.7198586463928223, + "learning_rate": 0.00012578531476529917, + "loss": 2.5626, + "step": 8398 + }, + { + "epoch": 0.6778306835606489, + "grad_norm": 0.7417474985122681, + "learning_rate": 0.00012577006139412309, + "loss": 2.5486, + "step": 8399 + }, + { + "epoch": 0.6779113872972319, + "grad_norm": 0.6588087677955627, + "learning_rate": 0.0001257548073806897, + "loss": 2.6123, + "step": 8400 + }, + { + "epoch": 0.6779920910338149, + "grad_norm": 0.7211382389068604, + "learning_rate": 0.00012573955272537915, + "loss": 2.6402, + "step": 8401 + }, + { + "epoch": 0.6780727947703978, + "grad_norm": 0.7196084856987, + "learning_rate": 0.00012572429742857167, + "loss": 2.51, + "step": 8402 + }, + { + "epoch": 0.6781534985069809, + "grad_norm": 0.6399394273757935, + "learning_rate": 0.00012570904149064748, + "loss": 2.5309, + "step": 8403 + }, + { + "epoch": 0.6782342022435639, + "grad_norm": 0.6969572305679321, + "learning_rate": 0.00012569378491198674, + "loss": 2.5829, + "step": 8404 + }, + { + "epoch": 0.6783149059801469, + "grad_norm": 0.8005492091178894, + "learning_rate": 0.00012567852769296975, + "loss": 2.6277, + "step": 8405 + }, + { + "epoch": 0.6783956097167299, + "grad_norm": 0.6786207556724548, + "learning_rate": 0.0001256632698339767, + "loss": 2.5839, + "step": 8406 + }, + { + "epoch": 0.6784763134533129, + "grad_norm": 0.7047130465507507, + "learning_rate": 0.0001256480113353879, + "loss": 2.533, + "step": 8407 + }, + { + "epoch": 0.6785570171898959, + "grad_norm": 0.7640479803085327, + "learning_rate": 0.0001256327521975836, + "loss": 2.5855, + "step": 8408 + }, + { + "epoch": 0.6786377209264789, + "grad_norm": 0.728111207485199, + "learning_rate": 0.00012561749242094412, + "loss": 2.6184, + "step": 8409 + }, + { + "epoch": 0.6787184246630619, + "grad_norm": 0.7842772603034973, + "learning_rate": 0.00012560223200584975, + "loss": 2.5915, + "step": 8410 + }, + { + "epoch": 0.678799128399645, + "grad_norm": 0.7129092812538147, + "learning_rate": 0.00012558697095268085, + "loss": 2.6526, + "step": 8411 + }, + { + "epoch": 0.6788798321362279, + "grad_norm": 0.751103401184082, + "learning_rate": 0.00012557170926181773, + "loss": 2.605, + "step": 8412 + }, + { + "epoch": 0.6789605358728109, + "grad_norm": 0.6850594878196716, + "learning_rate": 0.0001255564469336408, + "loss": 2.6047, + "step": 8413 + }, + { + "epoch": 0.6790412396093939, + "grad_norm": 0.703037679195404, + "learning_rate": 0.00012554118396853036, + "loss": 2.653, + "step": 8414 + }, + { + "epoch": 0.6791219433459769, + "grad_norm": 0.8097915053367615, + "learning_rate": 0.0001255259203668669, + "loss": 2.5937, + "step": 8415 + }, + { + "epoch": 0.67920264708256, + "grad_norm": 0.700351357460022, + "learning_rate": 0.00012551065612903076, + "loss": 2.6089, + "step": 8416 + }, + { + "epoch": 0.6792833508191429, + "grad_norm": 0.6760888695716858, + "learning_rate": 0.00012549539125540236, + "loss": 2.547, + "step": 8417 + }, + { + "epoch": 0.6793640545557259, + "grad_norm": 0.6751723289489746, + "learning_rate": 0.0001254801257463622, + "loss": 2.625, + "step": 8418 + }, + { + "epoch": 0.6794447582923089, + "grad_norm": 0.6928921937942505, + "learning_rate": 0.00012546485960229065, + "loss": 2.5671, + "step": 8419 + }, + { + "epoch": 0.679525462028892, + "grad_norm": 0.6541565656661987, + "learning_rate": 0.0001254495928235683, + "loss": 2.5837, + "step": 8420 + }, + { + "epoch": 0.679606165765475, + "grad_norm": 0.6228676438331604, + "learning_rate": 0.00012543432541057555, + "loss": 2.5798, + "step": 8421 + }, + { + "epoch": 0.6796868695020579, + "grad_norm": 0.7620853185653687, + "learning_rate": 0.0001254190573636929, + "loss": 2.5885, + "step": 8422 + }, + { + "epoch": 0.6797675732386409, + "grad_norm": 0.7425604462623596, + "learning_rate": 0.0001254037886833009, + "loss": 2.6124, + "step": 8423 + }, + { + "epoch": 0.679848276975224, + "grad_norm": 0.7150974273681641, + "learning_rate": 0.0001253885193697801, + "loss": 2.5423, + "step": 8424 + }, + { + "epoch": 0.679928980711807, + "grad_norm": 0.672649621963501, + "learning_rate": 0.000125373249423511, + "loss": 2.5563, + "step": 8425 + }, + { + "epoch": 0.6800096844483899, + "grad_norm": 0.6913620829582214, + "learning_rate": 0.00012535797884487425, + "loss": 2.5261, + "step": 8426 + }, + { + "epoch": 0.6800903881849729, + "grad_norm": 0.712123692035675, + "learning_rate": 0.00012534270763425034, + "loss": 2.5958, + "step": 8427 + }, + { + "epoch": 0.680171091921556, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00012532743579201993, + "loss": 2.6036, + "step": 8428 + }, + { + "epoch": 0.680251795658139, + "grad_norm": 0.7108714580535889, + "learning_rate": 0.0001253121633185636, + "loss": 2.6004, + "step": 8429 + }, + { + "epoch": 0.680332499394722, + "grad_norm": 0.7142449021339417, + "learning_rate": 0.00012529689021426198, + "loss": 2.588, + "step": 8430 + }, + { + "epoch": 0.6804132031313049, + "grad_norm": 0.7579841017723083, + "learning_rate": 0.00012528161647949574, + "loss": 2.5927, + "step": 8431 + }, + { + "epoch": 0.680493906867888, + "grad_norm": 0.6522083878517151, + "learning_rate": 0.00012526634211464555, + "loss": 2.5619, + "step": 8432 + }, + { + "epoch": 0.680574610604471, + "grad_norm": 0.7681782245635986, + "learning_rate": 0.00012525106712009203, + "loss": 2.6065, + "step": 8433 + }, + { + "epoch": 0.680655314341054, + "grad_norm": 0.6900169253349304, + "learning_rate": 0.00012523579149621594, + "loss": 2.5507, + "step": 8434 + }, + { + "epoch": 0.680736018077637, + "grad_norm": 0.6907666325569153, + "learning_rate": 0.00012522051524339794, + "loss": 2.5213, + "step": 8435 + }, + { + "epoch": 0.68081672181422, + "grad_norm": 0.7202023267745972, + "learning_rate": 0.0001252052383620188, + "loss": 2.6367, + "step": 8436 + }, + { + "epoch": 0.680897425550803, + "grad_norm": 0.7893621325492859, + "learning_rate": 0.00012518996085245925, + "loss": 2.6066, + "step": 8437 + }, + { + "epoch": 0.680978129287386, + "grad_norm": 0.7693532109260559, + "learning_rate": 0.00012517468271509998, + "loss": 2.5346, + "step": 8438 + }, + { + "epoch": 0.681058833023969, + "grad_norm": 0.7976840734481812, + "learning_rate": 0.0001251594039503218, + "loss": 2.5991, + "step": 8439 + }, + { + "epoch": 0.6811395367605521, + "grad_norm": 0.7671225666999817, + "learning_rate": 0.00012514412455850554, + "loss": 2.5959, + "step": 8440 + }, + { + "epoch": 0.681220240497135, + "grad_norm": 0.7143450975418091, + "learning_rate": 0.00012512884454003194, + "loss": 2.5828, + "step": 8441 + }, + { + "epoch": 0.681300944233718, + "grad_norm": 0.6821861863136292, + "learning_rate": 0.00012511356389528192, + "loss": 2.5908, + "step": 8442 + }, + { + "epoch": 0.681381647970301, + "grad_norm": 0.7279960513114929, + "learning_rate": 0.00012509828262463615, + "loss": 2.578, + "step": 8443 + }, + { + "epoch": 0.6814623517068841, + "grad_norm": 0.6503065824508667, + "learning_rate": 0.0001250830007284756, + "loss": 2.525, + "step": 8444 + }, + { + "epoch": 0.681543055443467, + "grad_norm": 0.7276029586791992, + "learning_rate": 0.00012506771820718112, + "loss": 2.584, + "step": 8445 + }, + { + "epoch": 0.68162375918005, + "grad_norm": 0.7635578513145447, + "learning_rate": 0.00012505243506113356, + "loss": 2.627, + "step": 8446 + }, + { + "epoch": 0.681704462916633, + "grad_norm": 0.7086981534957886, + "learning_rate": 0.00012503715129071386, + "loss": 2.6164, + "step": 8447 + }, + { + "epoch": 0.6817851666532161, + "grad_norm": 0.7144165635108948, + "learning_rate": 0.00012502186689630285, + "loss": 2.5642, + "step": 8448 + }, + { + "epoch": 0.6818658703897991, + "grad_norm": 0.8135093450546265, + "learning_rate": 0.00012500658187828155, + "loss": 2.6161, + "step": 8449 + }, + { + "epoch": 0.681946574126382, + "grad_norm": 0.7223377227783203, + "learning_rate": 0.00012499129623703086, + "loss": 2.6192, + "step": 8450 + }, + { + "epoch": 0.682027277862965, + "grad_norm": 0.7189127206802368, + "learning_rate": 0.00012497600997293172, + "loss": 2.6086, + "step": 8451 + }, + { + "epoch": 0.6821079815995481, + "grad_norm": 0.6742144823074341, + "learning_rate": 0.00012496072308636514, + "loss": 2.5747, + "step": 8452 + }, + { + "epoch": 0.6821886853361311, + "grad_norm": 0.7432419657707214, + "learning_rate": 0.0001249454355777121, + "loss": 2.5687, + "step": 8453 + }, + { + "epoch": 0.6822693890727141, + "grad_norm": 0.6140317320823669, + "learning_rate": 0.00012493014744735357, + "loss": 2.5371, + "step": 8454 + }, + { + "epoch": 0.682350092809297, + "grad_norm": 0.7215768098831177, + "learning_rate": 0.0001249148586956706, + "loss": 2.6806, + "step": 8455 + }, + { + "epoch": 0.6824307965458801, + "grad_norm": 0.7485790252685547, + "learning_rate": 0.0001248995693230442, + "loss": 2.575, + "step": 8456 + }, + { + "epoch": 0.6825115002824631, + "grad_norm": 0.744349479675293, + "learning_rate": 0.00012488427932985552, + "loss": 2.5961, + "step": 8457 + }, + { + "epoch": 0.6825922040190461, + "grad_norm": 0.6784959435462952, + "learning_rate": 0.0001248689887164855, + "loss": 2.5501, + "step": 8458 + }, + { + "epoch": 0.682672907755629, + "grad_norm": 0.6664010286331177, + "learning_rate": 0.0001248536974833153, + "loss": 2.5741, + "step": 8459 + }, + { + "epoch": 0.6827536114922121, + "grad_norm": 0.7185953259468079, + "learning_rate": 0.00012483840563072592, + "loss": 2.5875, + "step": 8460 + }, + { + "epoch": 0.6828343152287951, + "grad_norm": 0.6553035378456116, + "learning_rate": 0.00012482311315909864, + "loss": 2.5321, + "step": 8461 + }, + { + "epoch": 0.6829150189653781, + "grad_norm": 0.6713398694992065, + "learning_rate": 0.00012480782006881442, + "loss": 2.6207, + "step": 8462 + }, + { + "epoch": 0.6829957227019611, + "grad_norm": 0.6733734607696533, + "learning_rate": 0.00012479252636025452, + "loss": 2.5746, + "step": 8463 + }, + { + "epoch": 0.6830764264385442, + "grad_norm": 0.7257994413375854, + "learning_rate": 0.00012477723203380004, + "loss": 2.5837, + "step": 8464 + }, + { + "epoch": 0.6831571301751271, + "grad_norm": 0.716242253780365, + "learning_rate": 0.00012476193708983214, + "loss": 2.5611, + "step": 8465 + }, + { + "epoch": 0.6832378339117101, + "grad_norm": 0.6797829866409302, + "learning_rate": 0.0001247466415287321, + "loss": 2.5763, + "step": 8466 + }, + { + "epoch": 0.6833185376482931, + "grad_norm": 0.679931640625, + "learning_rate": 0.000124731345350881, + "loss": 2.606, + "step": 8467 + }, + { + "epoch": 0.6833992413848761, + "grad_norm": 0.6767866611480713, + "learning_rate": 0.00012471604855666016, + "loss": 2.5682, + "step": 8468 + }, + { + "epoch": 0.6834799451214592, + "grad_norm": 0.7297048568725586, + "learning_rate": 0.00012470075114645078, + "loss": 2.5527, + "step": 8469 + }, + { + "epoch": 0.6835606488580421, + "grad_norm": 0.6882644295692444, + "learning_rate": 0.0001246854531206341, + "loss": 2.5712, + "step": 8470 + }, + { + "epoch": 0.6836413525946251, + "grad_norm": 0.7129159569740295, + "learning_rate": 0.00012467015447959143, + "loss": 2.5627, + "step": 8471 + }, + { + "epoch": 0.6837220563312081, + "grad_norm": 0.6671481728553772, + "learning_rate": 0.000124654855223704, + "loss": 2.6226, + "step": 8472 + }, + { + "epoch": 0.6838027600677912, + "grad_norm": 0.7096946835517883, + "learning_rate": 0.00012463955535335313, + "loss": 2.5373, + "step": 8473 + }, + { + "epoch": 0.6838834638043741, + "grad_norm": 0.6781395077705383, + "learning_rate": 0.00012462425486892012, + "loss": 2.5607, + "step": 8474 + }, + { + "epoch": 0.6839641675409571, + "grad_norm": 0.6777891516685486, + "learning_rate": 0.00012460895377078632, + "loss": 2.5991, + "step": 8475 + }, + { + "epoch": 0.6840448712775401, + "grad_norm": 0.7175275087356567, + "learning_rate": 0.00012459365205933306, + "loss": 2.6006, + "step": 8476 + }, + { + "epoch": 0.6841255750141232, + "grad_norm": 0.6832807660102844, + "learning_rate": 0.00012457834973494174, + "loss": 2.5757, + "step": 8477 + }, + { + "epoch": 0.6842062787507062, + "grad_norm": 0.7002938985824585, + "learning_rate": 0.00012456304679799366, + "loss": 2.554, + "step": 8478 + }, + { + "epoch": 0.6842869824872891, + "grad_norm": 0.7236241698265076, + "learning_rate": 0.00012454774324887027, + "loss": 2.6054, + "step": 8479 + }, + { + "epoch": 0.6843676862238721, + "grad_norm": 0.7327216267585754, + "learning_rate": 0.00012453243908795288, + "loss": 2.6101, + "step": 8480 + }, + { + "epoch": 0.6844483899604552, + "grad_norm": 0.7414156794548035, + "learning_rate": 0.00012451713431562306, + "loss": 2.5505, + "step": 8481 + }, + { + "epoch": 0.6845290936970382, + "grad_norm": 0.697795569896698, + "learning_rate": 0.00012450182893226214, + "loss": 2.539, + "step": 8482 + }, + { + "epoch": 0.6846097974336212, + "grad_norm": 0.7053593397140503, + "learning_rate": 0.00012448652293825158, + "loss": 2.6045, + "step": 8483 + }, + { + "epoch": 0.6846905011702041, + "grad_norm": 0.6710856556892395, + "learning_rate": 0.00012447121633397287, + "loss": 2.554, + "step": 8484 + }, + { + "epoch": 0.6847712049067872, + "grad_norm": 0.754454493522644, + "learning_rate": 0.0001244559091198075, + "loss": 2.5523, + "step": 8485 + }, + { + "epoch": 0.6848519086433702, + "grad_norm": 0.6468656659126282, + "learning_rate": 0.0001244406012961369, + "loss": 2.5931, + "step": 8486 + }, + { + "epoch": 0.6849326123799532, + "grad_norm": 0.7169063091278076, + "learning_rate": 0.00012442529286334266, + "loss": 2.5743, + "step": 8487 + }, + { + "epoch": 0.6850133161165362, + "grad_norm": 0.6737040877342224, + "learning_rate": 0.00012440998382180627, + "loss": 2.5734, + "step": 8488 + }, + { + "epoch": 0.6850940198531192, + "grad_norm": 0.7026428580284119, + "learning_rate": 0.0001243946741719093, + "loss": 2.4994, + "step": 8489 + }, + { + "epoch": 0.6851747235897022, + "grad_norm": 0.7378512024879456, + "learning_rate": 0.00012437936391403322, + "loss": 2.5611, + "step": 8490 + }, + { + "epoch": 0.6852554273262852, + "grad_norm": 0.7379863262176514, + "learning_rate": 0.0001243640530485597, + "loss": 2.538, + "step": 8491 + }, + { + "epoch": 0.6853361310628682, + "grad_norm": 0.68398118019104, + "learning_rate": 0.00012434874157587027, + "loss": 2.5593, + "step": 8492 + }, + { + "epoch": 0.6854168347994513, + "grad_norm": 0.6780444383621216, + "learning_rate": 0.0001243334294963466, + "loss": 2.5068, + "step": 8493 + }, + { + "epoch": 0.6854975385360342, + "grad_norm": 0.7425427436828613, + "learning_rate": 0.0001243181168103702, + "loss": 2.6607, + "step": 8494 + }, + { + "epoch": 0.6855782422726172, + "grad_norm": 0.7563300132751465, + "learning_rate": 0.0001243028035183228, + "loss": 2.5915, + "step": 8495 + }, + { + "epoch": 0.6856589460092002, + "grad_norm": 0.6746618151664734, + "learning_rate": 0.000124287489620586, + "loss": 2.5399, + "step": 8496 + }, + { + "epoch": 0.6857396497457833, + "grad_norm": 0.7100487947463989, + "learning_rate": 0.00012427217511754146, + "loss": 2.5927, + "step": 8497 + }, + { + "epoch": 0.6858203534823663, + "grad_norm": 0.6487080454826355, + "learning_rate": 0.00012425686000957088, + "loss": 2.5582, + "step": 8498 + }, + { + "epoch": 0.6859010572189492, + "grad_norm": 0.6577199697494507, + "learning_rate": 0.00012424154429705592, + "loss": 2.5589, + "step": 8499 + }, + { + "epoch": 0.6859817609555322, + "grad_norm": 0.6748726963996887, + "learning_rate": 0.00012422622798037832, + "loss": 2.5651, + "step": 8500 + }, + { + "epoch": 0.6860624646921153, + "grad_norm": 0.7159377336502075, + "learning_rate": 0.0001242109110599198, + "loss": 2.569, + "step": 8501 + }, + { + "epoch": 0.6861431684286983, + "grad_norm": 0.6772934198379517, + "learning_rate": 0.00012419559353606208, + "loss": 2.5533, + "step": 8502 + }, + { + "epoch": 0.6862238721652812, + "grad_norm": 0.6776062846183777, + "learning_rate": 0.00012418027540918693, + "loss": 2.5704, + "step": 8503 + }, + { + "epoch": 0.6863045759018642, + "grad_norm": 0.7009913921356201, + "learning_rate": 0.00012416495667967608, + "loss": 2.5928, + "step": 8504 + }, + { + "epoch": 0.6863852796384473, + "grad_norm": 0.607571005821228, + "learning_rate": 0.00012414963734791137, + "loss": 2.5459, + "step": 8505 + }, + { + "epoch": 0.6864659833750303, + "grad_norm": 0.6798292398452759, + "learning_rate": 0.00012413431741427458, + "loss": 2.6585, + "step": 8506 + }, + { + "epoch": 0.6865466871116133, + "grad_norm": 0.7892771363258362, + "learning_rate": 0.00012411899687914747, + "loss": 2.5781, + "step": 8507 + }, + { + "epoch": 0.6866273908481962, + "grad_norm": 0.6683816909790039, + "learning_rate": 0.00012410367574291199, + "loss": 2.5598, + "step": 8508 + }, + { + "epoch": 0.6867080945847793, + "grad_norm": 0.7591805458068848, + "learning_rate": 0.00012408835400594983, + "loss": 2.6478, + "step": 8509 + }, + { + "epoch": 0.6867887983213623, + "grad_norm": 0.6896353960037231, + "learning_rate": 0.00012407303166864293, + "loss": 2.5418, + "step": 8510 + }, + { + "epoch": 0.6868695020579453, + "grad_norm": 0.6657233834266663, + "learning_rate": 0.00012405770873137316, + "loss": 2.5753, + "step": 8511 + }, + { + "epoch": 0.6869502057945283, + "grad_norm": 0.6775455474853516, + "learning_rate": 0.00012404238519452237, + "loss": 2.4902, + "step": 8512 + }, + { + "epoch": 0.6870309095311113, + "grad_norm": 0.6572847962379456, + "learning_rate": 0.00012402706105847254, + "loss": 2.6189, + "step": 8513 + }, + { + "epoch": 0.6871116132676943, + "grad_norm": 0.7159940004348755, + "learning_rate": 0.00012401173632360557, + "loss": 2.5928, + "step": 8514 + }, + { + "epoch": 0.6871923170042773, + "grad_norm": 0.7178850173950195, + "learning_rate": 0.0001239964109903033, + "loss": 2.5342, + "step": 8515 + }, + { + "epoch": 0.6872730207408603, + "grad_norm": 0.6761649250984192, + "learning_rate": 0.00012398108505894774, + "loss": 2.5716, + "step": 8516 + }, + { + "epoch": 0.6873537244774433, + "grad_norm": 0.6831200122833252, + "learning_rate": 0.0001239657585299209, + "loss": 2.5506, + "step": 8517 + }, + { + "epoch": 0.6874344282140263, + "grad_norm": 0.7064316868782043, + "learning_rate": 0.00012395043140360468, + "loss": 2.541, + "step": 8518 + }, + { + "epoch": 0.6875151319506093, + "grad_norm": 0.7269963026046753, + "learning_rate": 0.00012393510368038113, + "loss": 2.541, + "step": 8519 + }, + { + "epoch": 0.6875958356871923, + "grad_norm": 0.6651471257209778, + "learning_rate": 0.00012391977536063218, + "loss": 2.5476, + "step": 8520 + }, + { + "epoch": 0.6876765394237753, + "grad_norm": 0.7649257779121399, + "learning_rate": 0.00012390444644473994, + "loss": 2.601, + "step": 8521 + }, + { + "epoch": 0.6877572431603584, + "grad_norm": 0.6637376546859741, + "learning_rate": 0.0001238891169330864, + "loss": 2.5582, + "step": 8522 + }, + { + "epoch": 0.6878379468969413, + "grad_norm": 0.6609189510345459, + "learning_rate": 0.0001238737868260536, + "loss": 2.5795, + "step": 8523 + }, + { + "epoch": 0.6879186506335243, + "grad_norm": 0.657494843006134, + "learning_rate": 0.00012385845612402363, + "loss": 2.6005, + "step": 8524 + }, + { + "epoch": 0.6879993543701073, + "grad_norm": 0.6780641674995422, + "learning_rate": 0.00012384312482737858, + "loss": 2.514, + "step": 8525 + }, + { + "epoch": 0.6880800581066904, + "grad_norm": 0.7310795187950134, + "learning_rate": 0.00012382779293650052, + "loss": 2.5707, + "step": 8526 + }, + { + "epoch": 0.6881607618432733, + "grad_norm": 0.6722557544708252, + "learning_rate": 0.0001238124604517716, + "loss": 2.5897, + "step": 8527 + }, + { + "epoch": 0.6882414655798563, + "grad_norm": 0.6502346992492676, + "learning_rate": 0.0001237971273735739, + "loss": 2.5554, + "step": 8528 + }, + { + "epoch": 0.6883221693164393, + "grad_norm": 0.6993897557258606, + "learning_rate": 0.0001237817937022896, + "loss": 2.6328, + "step": 8529 + }, + { + "epoch": 0.6884028730530224, + "grad_norm": 0.7069644331932068, + "learning_rate": 0.00012376645943830083, + "loss": 2.5957, + "step": 8530 + }, + { + "epoch": 0.6884835767896054, + "grad_norm": 0.7193333506584167, + "learning_rate": 0.00012375112458198973, + "loss": 2.6505, + "step": 8531 + }, + { + "epoch": 0.6885642805261883, + "grad_norm": 0.6821088194847107, + "learning_rate": 0.00012373578913373853, + "loss": 2.6129, + "step": 8532 + }, + { + "epoch": 0.6886449842627713, + "grad_norm": 0.6499428749084473, + "learning_rate": 0.00012372045309392947, + "loss": 2.6053, + "step": 8533 + }, + { + "epoch": 0.6887256879993544, + "grad_norm": 0.7469449639320374, + "learning_rate": 0.00012370511646294464, + "loss": 2.6423, + "step": 8534 + }, + { + "epoch": 0.6888063917359374, + "grad_norm": 0.7326325178146362, + "learning_rate": 0.00012368977924116637, + "loss": 2.5708, + "step": 8535 + }, + { + "epoch": 0.6888870954725204, + "grad_norm": 0.7459580302238464, + "learning_rate": 0.00012367444142897686, + "loss": 2.544, + "step": 8536 + }, + { + "epoch": 0.6889677992091033, + "grad_norm": 0.7198929786682129, + "learning_rate": 0.00012365910302675843, + "loss": 2.6295, + "step": 8537 + }, + { + "epoch": 0.6890485029456864, + "grad_norm": 0.8139802813529968, + "learning_rate": 0.0001236437640348933, + "loss": 2.549, + "step": 8538 + }, + { + "epoch": 0.6891292066822694, + "grad_norm": 0.6497162580490112, + "learning_rate": 0.00012362842445376372, + "loss": 2.5849, + "step": 8539 + }, + { + "epoch": 0.6892099104188524, + "grad_norm": 0.7378165125846863, + "learning_rate": 0.00012361308428375208, + "loss": 2.606, + "step": 8540 + }, + { + "epoch": 0.6892906141554354, + "grad_norm": 0.6807567477226257, + "learning_rate": 0.00012359774352524062, + "loss": 2.5892, + "step": 8541 + }, + { + "epoch": 0.6893713178920184, + "grad_norm": 0.6639370918273926, + "learning_rate": 0.0001235824021786117, + "loss": 2.5249, + "step": 8542 + }, + { + "epoch": 0.6894520216286014, + "grad_norm": 0.7140880823135376, + "learning_rate": 0.00012356706024424773, + "loss": 2.5877, + "step": 8543 + }, + { + "epoch": 0.6895327253651844, + "grad_norm": 0.7079257965087891, + "learning_rate": 0.00012355171772253097, + "loss": 2.6011, + "step": 8544 + }, + { + "epoch": 0.6896134291017674, + "grad_norm": 0.7150856852531433, + "learning_rate": 0.00012353637461384387, + "loss": 2.549, + "step": 8545 + }, + { + "epoch": 0.6896941328383505, + "grad_norm": 0.6896397471427917, + "learning_rate": 0.00012352103091856876, + "loss": 2.5452, + "step": 8546 + }, + { + "epoch": 0.6897748365749334, + "grad_norm": 0.696964681148529, + "learning_rate": 0.00012350568663708808, + "loss": 2.5075, + "step": 8547 + }, + { + "epoch": 0.6898555403115164, + "grad_norm": 0.6926069855690002, + "learning_rate": 0.00012349034176978427, + "loss": 2.5905, + "step": 8548 + }, + { + "epoch": 0.6899362440480994, + "grad_norm": 0.6949423551559448, + "learning_rate": 0.00012347499631703968, + "loss": 2.5284, + "step": 8549 + }, + { + "epoch": 0.6900169477846825, + "grad_norm": 0.6480536460876465, + "learning_rate": 0.0001234596502792369, + "loss": 2.5713, + "step": 8550 + }, + { + "epoch": 0.6900976515212655, + "grad_norm": 0.6990019679069519, + "learning_rate": 0.00012344430365675825, + "loss": 2.5826, + "step": 8551 + }, + { + "epoch": 0.6901783552578484, + "grad_norm": 0.7063903212547302, + "learning_rate": 0.00012342895644998627, + "loss": 2.5271, + "step": 8552 + }, + { + "epoch": 0.6902590589944314, + "grad_norm": 0.7037132978439331, + "learning_rate": 0.0001234136086593035, + "loss": 2.5855, + "step": 8553 + }, + { + "epoch": 0.6903397627310145, + "grad_norm": 0.679701030254364, + "learning_rate": 0.00012339826028509235, + "loss": 2.5577, + "step": 8554 + }, + { + "epoch": 0.6904204664675975, + "grad_norm": 0.7088965773582458, + "learning_rate": 0.0001233829113277354, + "loss": 2.5767, + "step": 8555 + }, + { + "epoch": 0.6905011702041804, + "grad_norm": 0.7115551829338074, + "learning_rate": 0.00012336756178761517, + "loss": 2.5651, + "step": 8556 + }, + { + "epoch": 0.6905818739407634, + "grad_norm": 0.6778836250305176, + "learning_rate": 0.00012335221166511425, + "loss": 2.6388, + "step": 8557 + }, + { + "epoch": 0.6906625776773465, + "grad_norm": 0.6358879804611206, + "learning_rate": 0.00012333686096061515, + "loss": 2.5493, + "step": 8558 + }, + { + "epoch": 0.6907432814139295, + "grad_norm": 0.688197135925293, + "learning_rate": 0.00012332150967450046, + "loss": 2.5707, + "step": 8559 + }, + { + "epoch": 0.6908239851505125, + "grad_norm": 0.6931524872779846, + "learning_rate": 0.0001233061578071528, + "loss": 2.5561, + "step": 8560 + }, + { + "epoch": 0.6909046888870954, + "grad_norm": 0.6684975624084473, + "learning_rate": 0.00012329080535895478, + "loss": 2.6442, + "step": 8561 + }, + { + "epoch": 0.6909853926236785, + "grad_norm": 0.6865811347961426, + "learning_rate": 0.00012327545233028898, + "loss": 2.564, + "step": 8562 + }, + { + "epoch": 0.6910660963602615, + "grad_norm": 0.6999006867408752, + "learning_rate": 0.0001232600987215381, + "loss": 2.5607, + "step": 8563 + }, + { + "epoch": 0.6911468000968445, + "grad_norm": 0.6734526753425598, + "learning_rate": 0.0001232447445330847, + "loss": 2.5261, + "step": 8564 + }, + { + "epoch": 0.6912275038334275, + "grad_norm": 0.7447343468666077, + "learning_rate": 0.00012322938976531153, + "loss": 2.5359, + "step": 8565 + }, + { + "epoch": 0.6913082075700105, + "grad_norm": 0.6498517394065857, + "learning_rate": 0.00012321403441860126, + "loss": 2.5345, + "step": 8566 + }, + { + "epoch": 0.6913889113065935, + "grad_norm": 0.692933976650238, + "learning_rate": 0.00012319867849333658, + "loss": 2.6293, + "step": 8567 + }, + { + "epoch": 0.6914696150431765, + "grad_norm": 0.728430449962616, + "learning_rate": 0.00012318332198990015, + "loss": 2.618, + "step": 8568 + }, + { + "epoch": 0.6915503187797595, + "grad_norm": 0.7029061913490295, + "learning_rate": 0.00012316796490867478, + "loss": 2.6151, + "step": 8569 + }, + { + "epoch": 0.6916310225163425, + "grad_norm": 0.6692330241203308, + "learning_rate": 0.00012315260725004313, + "loss": 2.5511, + "step": 8570 + }, + { + "epoch": 0.6917117262529255, + "grad_norm": 0.6811983585357666, + "learning_rate": 0.000123137249014388, + "loss": 2.6337, + "step": 8571 + }, + { + "epoch": 0.6917924299895085, + "grad_norm": 0.7387441992759705, + "learning_rate": 0.00012312189020209212, + "loss": 2.5679, + "step": 8572 + }, + { + "epoch": 0.6918731337260915, + "grad_norm": 0.7180185914039612, + "learning_rate": 0.0001231065308135383, + "loss": 2.639, + "step": 8573 + }, + { + "epoch": 0.6919538374626745, + "grad_norm": 0.6997829079627991, + "learning_rate": 0.00012309117084910936, + "loss": 2.5392, + "step": 8574 + }, + { + "epoch": 0.6920345411992576, + "grad_norm": 0.7004552483558655, + "learning_rate": 0.00012307581030918807, + "loss": 2.6033, + "step": 8575 + }, + { + "epoch": 0.6921152449358405, + "grad_norm": 0.7183418273925781, + "learning_rate": 0.00012306044919415724, + "loss": 2.6302, + "step": 8576 + }, + { + "epoch": 0.6921959486724235, + "grad_norm": 0.6645712852478027, + "learning_rate": 0.00012304508750439976, + "loss": 2.5401, + "step": 8577 + }, + { + "epoch": 0.6922766524090065, + "grad_norm": 0.6455898284912109, + "learning_rate": 0.00012302972524029848, + "loss": 2.5084, + "step": 8578 + }, + { + "epoch": 0.6923573561455896, + "grad_norm": 0.6933849453926086, + "learning_rate": 0.00012301436240223622, + "loss": 2.5734, + "step": 8579 + }, + { + "epoch": 0.6924380598821726, + "grad_norm": 0.7967655658721924, + "learning_rate": 0.00012299899899059587, + "loss": 2.5721, + "step": 8580 + }, + { + "epoch": 0.6925187636187555, + "grad_norm": 0.706730306148529, + "learning_rate": 0.0001229836350057604, + "loss": 2.6216, + "step": 8581 + }, + { + "epoch": 0.6925994673553385, + "grad_norm": 0.7021105885505676, + "learning_rate": 0.0001229682704481126, + "loss": 2.4877, + "step": 8582 + }, + { + "epoch": 0.6926801710919216, + "grad_norm": 0.7197253108024597, + "learning_rate": 0.00012295290531803553, + "loss": 2.6124, + "step": 8583 + }, + { + "epoch": 0.6927608748285046, + "grad_norm": 0.7559605836868286, + "learning_rate": 0.00012293753961591198, + "loss": 2.6391, + "step": 8584 + }, + { + "epoch": 0.6928415785650875, + "grad_norm": 0.7074676752090454, + "learning_rate": 0.00012292217334212505, + "loss": 2.5949, + "step": 8585 + }, + { + "epoch": 0.6929222823016705, + "grad_norm": 0.6843528747558594, + "learning_rate": 0.00012290680649705763, + "loss": 2.4981, + "step": 8586 + }, + { + "epoch": 0.6930029860382536, + "grad_norm": 0.6853117942810059, + "learning_rate": 0.00012289143908109266, + "loss": 2.6352, + "step": 8587 + }, + { + "epoch": 0.6930836897748366, + "grad_norm": 0.6545630097389221, + "learning_rate": 0.00012287607109461325, + "loss": 2.5344, + "step": 8588 + }, + { + "epoch": 0.6931643935114196, + "grad_norm": 0.7377945184707642, + "learning_rate": 0.00012286070253800233, + "loss": 2.5895, + "step": 8589 + }, + { + "epoch": 0.6932450972480025, + "grad_norm": 0.6919971108436584, + "learning_rate": 0.00012284533341164295, + "loss": 2.5825, + "step": 8590 + }, + { + "epoch": 0.6933258009845856, + "grad_norm": 0.6911910176277161, + "learning_rate": 0.00012282996371591816, + "loss": 2.6008, + "step": 8591 + }, + { + "epoch": 0.6934065047211686, + "grad_norm": 0.7486373782157898, + "learning_rate": 0.00012281459345121095, + "loss": 2.6056, + "step": 8592 + }, + { + "epoch": 0.6934872084577516, + "grad_norm": 0.6829040050506592, + "learning_rate": 0.00012279922261790443, + "loss": 2.5161, + "step": 8593 + }, + { + "epoch": 0.6935679121943346, + "grad_norm": 0.7410104870796204, + "learning_rate": 0.00012278385121638173, + "loss": 2.6114, + "step": 8594 + }, + { + "epoch": 0.6936486159309176, + "grad_norm": 0.7355940937995911, + "learning_rate": 0.00012276847924702587, + "loss": 2.6371, + "step": 8595 + }, + { + "epoch": 0.6937293196675006, + "grad_norm": 0.650641679763794, + "learning_rate": 0.00012275310671022003, + "loss": 2.5568, + "step": 8596 + }, + { + "epoch": 0.6938100234040836, + "grad_norm": 0.661573052406311, + "learning_rate": 0.00012273773360634726, + "loss": 2.5828, + "step": 8597 + }, + { + "epoch": 0.6938907271406666, + "grad_norm": 0.6848435401916504, + "learning_rate": 0.00012272235993579072, + "loss": 2.5226, + "step": 8598 + }, + { + "epoch": 0.6939714308772497, + "grad_norm": 0.7015430927276611, + "learning_rate": 0.0001227069856989336, + "loss": 2.6156, + "step": 8599 + }, + { + "epoch": 0.6940521346138326, + "grad_norm": 0.7058628797531128, + "learning_rate": 0.000122691610896159, + "loss": 2.6007, + "step": 8600 + }, + { + "epoch": 0.6941328383504156, + "grad_norm": 0.6589432954788208, + "learning_rate": 0.0001226762355278502, + "loss": 2.5551, + "step": 8601 + }, + { + "epoch": 0.6942135420869986, + "grad_norm": 0.6875284910202026, + "learning_rate": 0.0001226608595943903, + "loss": 2.5537, + "step": 8602 + }, + { + "epoch": 0.6942942458235817, + "grad_norm": 0.7178356051445007, + "learning_rate": 0.00012264548309616252, + "loss": 2.655, + "step": 8603 + }, + { + "epoch": 0.6943749495601647, + "grad_norm": 0.7327077388763428, + "learning_rate": 0.00012263010603355017, + "loss": 2.5574, + "step": 8604 + }, + { + "epoch": 0.6944556532967476, + "grad_norm": 0.6318337917327881, + "learning_rate": 0.0001226147284069364, + "loss": 2.577, + "step": 8605 + }, + { + "epoch": 0.6945363570333306, + "grad_norm": 0.674872875213623, + "learning_rate": 0.00012259935021670444, + "loss": 2.6225, + "step": 8606 + }, + { + "epoch": 0.6946170607699137, + "grad_norm": 0.6554198861122131, + "learning_rate": 0.0001225839714632376, + "loss": 2.5951, + "step": 8607 + }, + { + "epoch": 0.6946977645064967, + "grad_norm": 0.7086453437805176, + "learning_rate": 0.00012256859214691918, + "loss": 2.622, + "step": 8608 + }, + { + "epoch": 0.6947784682430796, + "grad_norm": 0.6609488129615784, + "learning_rate": 0.00012255321226813245, + "loss": 2.5623, + "step": 8609 + }, + { + "epoch": 0.6948591719796626, + "grad_norm": 0.7504609823226929, + "learning_rate": 0.00012253783182726075, + "loss": 2.5264, + "step": 8610 + }, + { + "epoch": 0.6949398757162457, + "grad_norm": 0.6702934503555298, + "learning_rate": 0.00012252245082468733, + "loss": 2.5877, + "step": 8611 + }, + { + "epoch": 0.6950205794528287, + "grad_norm": 0.7116326689720154, + "learning_rate": 0.00012250706926079553, + "loss": 2.5629, + "step": 8612 + }, + { + "epoch": 0.6951012831894117, + "grad_norm": 0.7495368719100952, + "learning_rate": 0.00012249168713596875, + "loss": 2.5731, + "step": 8613 + }, + { + "epoch": 0.6951819869259946, + "grad_norm": 0.7434844970703125, + "learning_rate": 0.0001224763044505904, + "loss": 2.6008, + "step": 8614 + }, + { + "epoch": 0.6952626906625777, + "grad_norm": 0.719667375087738, + "learning_rate": 0.00012246092120504371, + "loss": 2.6051, + "step": 8615 + }, + { + "epoch": 0.6953433943991607, + "grad_norm": 0.7189086079597473, + "learning_rate": 0.00012244553739971216, + "loss": 2.5662, + "step": 8616 + }, + { + "epoch": 0.6954240981357437, + "grad_norm": 0.7222673892974854, + "learning_rate": 0.00012243015303497917, + "loss": 2.609, + "step": 8617 + }, + { + "epoch": 0.6955048018723267, + "grad_norm": 0.7323142290115356, + "learning_rate": 0.00012241476811122813, + "loss": 2.5458, + "step": 8618 + }, + { + "epoch": 0.6955855056089096, + "grad_norm": 0.7374032735824585, + "learning_rate": 0.00012239938262884246, + "loss": 2.6147, + "step": 8619 + }, + { + "epoch": 0.6956662093454927, + "grad_norm": 0.6707843542098999, + "learning_rate": 0.00012238399658820562, + "loss": 2.6462, + "step": 8620 + }, + { + "epoch": 0.6957469130820757, + "grad_norm": 0.7603243589401245, + "learning_rate": 0.0001223686099897011, + "loss": 2.6295, + "step": 8621 + }, + { + "epoch": 0.6958276168186587, + "grad_norm": 0.6966906785964966, + "learning_rate": 0.00012235322283371232, + "loss": 2.545, + "step": 8622 + }, + { + "epoch": 0.6959083205552417, + "grad_norm": 0.6757891774177551, + "learning_rate": 0.0001223378351206228, + "loss": 2.5548, + "step": 8623 + }, + { + "epoch": 0.6959890242918247, + "grad_norm": 0.6901456713676453, + "learning_rate": 0.00012232244685081605, + "loss": 2.5734, + "step": 8624 + }, + { + "epoch": 0.6960697280284077, + "grad_norm": 0.6942903995513916, + "learning_rate": 0.00012230705802467558, + "loss": 2.5495, + "step": 8625 + }, + { + "epoch": 0.6961504317649907, + "grad_norm": 0.6774815320968628, + "learning_rate": 0.0001222916686425849, + "loss": 2.5076, + "step": 8626 + }, + { + "epoch": 0.6962311355015737, + "grad_norm": 0.8037571310997009, + "learning_rate": 0.00012227627870492754, + "loss": 2.6737, + "step": 8627 + }, + { + "epoch": 0.6963118392381568, + "grad_norm": 0.7027560472488403, + "learning_rate": 0.0001222608882120871, + "loss": 2.5401, + "step": 8628 + }, + { + "epoch": 0.6963925429747397, + "grad_norm": 0.6651299595832825, + "learning_rate": 0.00012224549716444714, + "loss": 2.5835, + "step": 8629 + }, + { + "epoch": 0.6964732467113227, + "grad_norm": 0.7082433104515076, + "learning_rate": 0.00012223010556239124, + "loss": 2.5622, + "step": 8630 + }, + { + "epoch": 0.6965539504479057, + "grad_norm": 0.7993464469909668, + "learning_rate": 0.00012221471340630305, + "loss": 2.655, + "step": 8631 + }, + { + "epoch": 0.6966346541844888, + "grad_norm": 0.7375298142433167, + "learning_rate": 0.00012219932069656606, + "loss": 2.598, + "step": 8632 + }, + { + "epoch": 0.6967153579210718, + "grad_norm": 0.6915456652641296, + "learning_rate": 0.00012218392743356397, + "loss": 2.5649, + "step": 8633 + }, + { + "epoch": 0.6967960616576547, + "grad_norm": 0.679256021976471, + "learning_rate": 0.00012216853361768045, + "loss": 2.545, + "step": 8634 + }, + { + "epoch": 0.6968767653942377, + "grad_norm": 0.7234694361686707, + "learning_rate": 0.0001221531392492991, + "loss": 2.5863, + "step": 8635 + }, + { + "epoch": 0.6969574691308208, + "grad_norm": 0.7053319811820984, + "learning_rate": 0.00012213774432880364, + "loss": 2.5829, + "step": 8636 + }, + { + "epoch": 0.6970381728674038, + "grad_norm": 0.7584449648857117, + "learning_rate": 0.00012212234885657772, + "loss": 2.5855, + "step": 8637 + }, + { + "epoch": 0.6971188766039867, + "grad_norm": 0.7098579406738281, + "learning_rate": 0.00012210695283300501, + "loss": 2.6057, + "step": 8638 + }, + { + "epoch": 0.6971995803405697, + "grad_norm": 0.7350205779075623, + "learning_rate": 0.00012209155625846928, + "loss": 2.546, + "step": 8639 + }, + { + "epoch": 0.6972802840771528, + "grad_norm": 0.6842331290245056, + "learning_rate": 0.0001220761591333542, + "loss": 2.5602, + "step": 8640 + }, + { + "epoch": 0.6973609878137358, + "grad_norm": 0.6731252074241638, + "learning_rate": 0.00012206076145804354, + "loss": 2.4676, + "step": 8641 + }, + { + "epoch": 0.6974416915503188, + "grad_norm": 0.7271167635917664, + "learning_rate": 0.00012204536323292104, + "loss": 2.5605, + "step": 8642 + }, + { + "epoch": 0.6975223952869017, + "grad_norm": 0.6860780715942383, + "learning_rate": 0.00012202996445837043, + "loss": 2.5041, + "step": 8643 + }, + { + "epoch": 0.6976030990234848, + "grad_norm": 0.7134578824043274, + "learning_rate": 0.00012201456513477554, + "loss": 2.614, + "step": 8644 + }, + { + "epoch": 0.6976838027600678, + "grad_norm": 0.6995248198509216, + "learning_rate": 0.00012199916526252014, + "loss": 2.5087, + "step": 8645 + }, + { + "epoch": 0.6977645064966508, + "grad_norm": 0.7280197143554688, + "learning_rate": 0.00012198376484198803, + "loss": 2.5723, + "step": 8646 + }, + { + "epoch": 0.6978452102332338, + "grad_norm": 0.6898967623710632, + "learning_rate": 0.00012196836387356306, + "loss": 2.6073, + "step": 8647 + }, + { + "epoch": 0.6979259139698168, + "grad_norm": 0.6670758128166199, + "learning_rate": 0.00012195296235762901, + "loss": 2.5276, + "step": 8648 + }, + { + "epoch": 0.6980066177063998, + "grad_norm": 0.6862780451774597, + "learning_rate": 0.00012193756029456973, + "loss": 2.5363, + "step": 8649 + }, + { + "epoch": 0.6980873214429828, + "grad_norm": 0.6568876504898071, + "learning_rate": 0.00012192215768476916, + "loss": 2.5828, + "step": 8650 + }, + { + "epoch": 0.6981680251795658, + "grad_norm": 0.7237746119499207, + "learning_rate": 0.00012190675452861107, + "loss": 2.6076, + "step": 8651 + }, + { + "epoch": 0.6982487289161489, + "grad_norm": 0.6831536293029785, + "learning_rate": 0.00012189135082647943, + "loss": 2.5199, + "step": 8652 + }, + { + "epoch": 0.6983294326527318, + "grad_norm": 0.6767029166221619, + "learning_rate": 0.00012187594657875805, + "loss": 2.5859, + "step": 8653 + }, + { + "epoch": 0.6984101363893148, + "grad_norm": 0.6977167129516602, + "learning_rate": 0.00012186054178583092, + "loss": 2.5831, + "step": 8654 + }, + { + "epoch": 0.6984908401258978, + "grad_norm": 0.6369525194168091, + "learning_rate": 0.00012184513644808197, + "loss": 2.5839, + "step": 8655 + }, + { + "epoch": 0.6985715438624809, + "grad_norm": 0.6814634203910828, + "learning_rate": 0.00012182973056589508, + "loss": 2.5493, + "step": 8656 + }, + { + "epoch": 0.6986522475990639, + "grad_norm": 0.6895000338554382, + "learning_rate": 0.00012181432413965428, + "loss": 2.5616, + "step": 8657 + }, + { + "epoch": 0.6987329513356468, + "grad_norm": 0.6689717769622803, + "learning_rate": 0.00012179891716974345, + "loss": 2.5481, + "step": 8658 + }, + { + "epoch": 0.6988136550722298, + "grad_norm": 0.6945160031318665, + "learning_rate": 0.00012178350965654666, + "loss": 2.5781, + "step": 8659 + }, + { + "epoch": 0.6988943588088129, + "grad_norm": 0.7226110696792603, + "learning_rate": 0.00012176810160044785, + "loss": 2.5767, + "step": 8660 + }, + { + "epoch": 0.6989750625453959, + "grad_norm": 0.6810569167137146, + "learning_rate": 0.00012175269300183105, + "loss": 2.5184, + "step": 8661 + }, + { + "epoch": 0.6990557662819789, + "grad_norm": 0.727281928062439, + "learning_rate": 0.0001217372838610803, + "loss": 2.5972, + "step": 8662 + }, + { + "epoch": 0.6991364700185618, + "grad_norm": 0.7111573219299316, + "learning_rate": 0.00012172187417857959, + "loss": 2.6445, + "step": 8663 + }, + { + "epoch": 0.6992171737551449, + "grad_norm": 0.6808965802192688, + "learning_rate": 0.00012170646395471296, + "loss": 2.5191, + "step": 8664 + }, + { + "epoch": 0.6992978774917279, + "grad_norm": 0.7063688635826111, + "learning_rate": 0.00012169105318986455, + "loss": 2.6021, + "step": 8665 + }, + { + "epoch": 0.6993785812283109, + "grad_norm": 0.6522886753082275, + "learning_rate": 0.0001216756418844184, + "loss": 2.5697, + "step": 8666 + }, + { + "epoch": 0.6994592849648938, + "grad_norm": 0.6706095337867737, + "learning_rate": 0.00012166023003875859, + "loss": 2.5706, + "step": 8667 + }, + { + "epoch": 0.6995399887014769, + "grad_norm": 0.6744416356086731, + "learning_rate": 0.00012164481765326923, + "loss": 2.5713, + "step": 8668 + }, + { + "epoch": 0.6996206924380599, + "grad_norm": 0.7385411858558655, + "learning_rate": 0.0001216294047283344, + "loss": 2.5543, + "step": 8669 + }, + { + "epoch": 0.6997013961746429, + "grad_norm": 0.7286678552627563, + "learning_rate": 0.0001216139912643383, + "loss": 2.588, + "step": 8670 + }, + { + "epoch": 0.6997820999112259, + "grad_norm": 0.7065937519073486, + "learning_rate": 0.00012159857726166503, + "loss": 2.5475, + "step": 8671 + }, + { + "epoch": 0.6998628036478088, + "grad_norm": 0.6609788537025452, + "learning_rate": 0.00012158316272069874, + "loss": 2.5664, + "step": 8672 + }, + { + "epoch": 0.6999435073843919, + "grad_norm": 0.7360579371452332, + "learning_rate": 0.00012156774764182364, + "loss": 2.5822, + "step": 8673 + }, + { + "epoch": 0.7000242111209749, + "grad_norm": 0.6265058517456055, + "learning_rate": 0.00012155233202542384, + "loss": 2.5849, + "step": 8674 + }, + { + "epoch": 0.7001049148575579, + "grad_norm": 0.646976888179779, + "learning_rate": 0.00012153691587188363, + "loss": 2.5839, + "step": 8675 + }, + { + "epoch": 0.7001856185941409, + "grad_norm": 0.6634985208511353, + "learning_rate": 0.0001215214991815872, + "loss": 2.5434, + "step": 8676 + }, + { + "epoch": 0.700266322330724, + "grad_norm": 0.6757560968399048, + "learning_rate": 0.00012150608195491871, + "loss": 2.6186, + "step": 8677 + }, + { + "epoch": 0.7003470260673069, + "grad_norm": 0.7077112197875977, + "learning_rate": 0.00012149066419226247, + "loss": 2.5757, + "step": 8678 + }, + { + "epoch": 0.7004277298038899, + "grad_norm": 0.698226273059845, + "learning_rate": 0.00012147524589400268, + "loss": 2.5307, + "step": 8679 + }, + { + "epoch": 0.7005084335404729, + "grad_norm": 0.6782405376434326, + "learning_rate": 0.00012145982706052361, + "loss": 2.5582, + "step": 8680 + }, + { + "epoch": 0.700589137277056, + "grad_norm": 0.6832882165908813, + "learning_rate": 0.0001214444076922096, + "loss": 2.574, + "step": 8681 + }, + { + "epoch": 0.7006698410136389, + "grad_norm": 0.7182612419128418, + "learning_rate": 0.00012142898778944485, + "loss": 2.6457, + "step": 8682 + }, + { + "epoch": 0.7007505447502219, + "grad_norm": 0.7043644785881042, + "learning_rate": 0.00012141356735261373, + "loss": 2.5244, + "step": 8683 + }, + { + "epoch": 0.7008312484868049, + "grad_norm": 0.6942669749259949, + "learning_rate": 0.00012139814638210054, + "loss": 2.5507, + "step": 8684 + }, + { + "epoch": 0.700911952223388, + "grad_norm": 0.8412066102027893, + "learning_rate": 0.00012138272487828959, + "loss": 2.6025, + "step": 8685 + }, + { + "epoch": 0.700992655959971, + "grad_norm": 0.6906788945198059, + "learning_rate": 0.00012136730284156525, + "loss": 2.5259, + "step": 8686 + }, + { + "epoch": 0.7010733596965539, + "grad_norm": 0.7258631587028503, + "learning_rate": 0.00012135188027231188, + "loss": 2.6311, + "step": 8687 + }, + { + "epoch": 0.7011540634331369, + "grad_norm": 0.6294744610786438, + "learning_rate": 0.00012133645717091382, + "loss": 2.5969, + "step": 8688 + }, + { + "epoch": 0.70123476716972, + "grad_norm": 0.6994131207466125, + "learning_rate": 0.00012132103353775548, + "loss": 2.5954, + "step": 8689 + }, + { + "epoch": 0.701315470906303, + "grad_norm": 0.671441912651062, + "learning_rate": 0.00012130560937322124, + "loss": 2.5628, + "step": 8690 + }, + { + "epoch": 0.701396174642886, + "grad_norm": 0.6915482878684998, + "learning_rate": 0.00012129018467769555, + "loss": 2.5173, + "step": 8691 + }, + { + "epoch": 0.7014768783794689, + "grad_norm": 0.6810318231582642, + "learning_rate": 0.00012127475945156279, + "loss": 2.6186, + "step": 8692 + }, + { + "epoch": 0.701557582116052, + "grad_norm": 0.7931910157203674, + "learning_rate": 0.00012125933369520741, + "loss": 2.6243, + "step": 8693 + }, + { + "epoch": 0.701638285852635, + "grad_norm": 0.6843162178993225, + "learning_rate": 0.00012124390740901386, + "loss": 2.6072, + "step": 8694 + }, + { + "epoch": 0.701718989589218, + "grad_norm": 0.672115683555603, + "learning_rate": 0.0001212284805933666, + "loss": 2.6027, + "step": 8695 + }, + { + "epoch": 0.7017996933258009, + "grad_norm": 0.65242600440979, + "learning_rate": 0.00012121305324865014, + "loss": 2.5128, + "step": 8696 + }, + { + "epoch": 0.701880397062384, + "grad_norm": 0.7253173589706421, + "learning_rate": 0.00012119762537524893, + "loss": 2.5776, + "step": 8697 + }, + { + "epoch": 0.701961100798967, + "grad_norm": 0.6536431312561035, + "learning_rate": 0.00012118219697354745, + "loss": 2.5656, + "step": 8698 + }, + { + "epoch": 0.70204180453555, + "grad_norm": 0.7121500372886658, + "learning_rate": 0.00012116676804393028, + "loss": 2.5878, + "step": 8699 + }, + { + "epoch": 0.702122508272133, + "grad_norm": 0.676449716091156, + "learning_rate": 0.00012115133858678191, + "loss": 2.6624, + "step": 8700 + }, + { + "epoch": 0.702203212008716, + "grad_norm": 0.7230382561683655, + "learning_rate": 0.0001211359086024869, + "loss": 2.5461, + "step": 8701 + }, + { + "epoch": 0.702283915745299, + "grad_norm": 0.6679937839508057, + "learning_rate": 0.00012112047809142979, + "loss": 2.5568, + "step": 8702 + }, + { + "epoch": 0.702364619481882, + "grad_norm": 0.6627704501152039, + "learning_rate": 0.0001211050470539952, + "loss": 2.4819, + "step": 8703 + }, + { + "epoch": 0.702445323218465, + "grad_norm": 0.6680646538734436, + "learning_rate": 0.0001210896154905676, + "loss": 2.5722, + "step": 8704 + }, + { + "epoch": 0.7025260269550481, + "grad_norm": 0.7406336665153503, + "learning_rate": 0.00012107418340153167, + "loss": 2.5722, + "step": 8705 + }, + { + "epoch": 0.702606730691631, + "grad_norm": 0.6634557247161865, + "learning_rate": 0.00012105875078727203, + "loss": 2.5747, + "step": 8706 + }, + { + "epoch": 0.702687434428214, + "grad_norm": 0.6521568894386292, + "learning_rate": 0.00012104331764817325, + "loss": 2.555, + "step": 8707 + }, + { + "epoch": 0.702768138164797, + "grad_norm": 0.677606463432312, + "learning_rate": 0.00012102788398461999, + "loss": 2.5544, + "step": 8708 + }, + { + "epoch": 0.7028488419013801, + "grad_norm": 0.6593700051307678, + "learning_rate": 0.0001210124497969969, + "loss": 2.5252, + "step": 8709 + }, + { + "epoch": 0.7029295456379631, + "grad_norm": 0.686903715133667, + "learning_rate": 0.00012099701508568863, + "loss": 2.6513, + "step": 8710 + }, + { + "epoch": 0.703010249374546, + "grad_norm": 0.6395620107650757, + "learning_rate": 0.00012098157985107987, + "loss": 2.5169, + "step": 8711 + }, + { + "epoch": 0.703090953111129, + "grad_norm": 0.7387555837631226, + "learning_rate": 0.00012096614409355526, + "loss": 2.5741, + "step": 8712 + }, + { + "epoch": 0.7031716568477121, + "grad_norm": 0.665900707244873, + "learning_rate": 0.00012095070781349957, + "loss": 2.5068, + "step": 8713 + }, + { + "epoch": 0.7032523605842951, + "grad_norm": 0.6983458399772644, + "learning_rate": 0.00012093527101129745, + "loss": 2.5028, + "step": 8714 + }, + { + "epoch": 0.703333064320878, + "grad_norm": 0.6250826120376587, + "learning_rate": 0.00012091983368733366, + "loss": 2.5765, + "step": 8715 + }, + { + "epoch": 0.703413768057461, + "grad_norm": 0.7031501531600952, + "learning_rate": 0.00012090439584199294, + "loss": 2.5885, + "step": 8716 + }, + { + "epoch": 0.7034944717940441, + "grad_norm": 0.7140926122665405, + "learning_rate": 0.00012088895747566002, + "loss": 2.6278, + "step": 8717 + }, + { + "epoch": 0.7035751755306271, + "grad_norm": 0.6753602027893066, + "learning_rate": 0.00012087351858871969, + "loss": 2.5664, + "step": 8718 + }, + { + "epoch": 0.7036558792672101, + "grad_norm": 0.7150039076805115, + "learning_rate": 0.0001208580791815567, + "loss": 2.6739, + "step": 8719 + }, + { + "epoch": 0.703736583003793, + "grad_norm": 0.7120389342308044, + "learning_rate": 0.00012084263925455583, + "loss": 2.565, + "step": 8720 + }, + { + "epoch": 0.703817286740376, + "grad_norm": 0.7775784134864807, + "learning_rate": 0.00012082719880810194, + "loss": 2.5861, + "step": 8721 + }, + { + "epoch": 0.7038979904769591, + "grad_norm": 0.6704322695732117, + "learning_rate": 0.0001208117578425798, + "loss": 2.5957, + "step": 8722 + }, + { + "epoch": 0.7039786942135421, + "grad_norm": 0.6761276721954346, + "learning_rate": 0.00012079631635837426, + "loss": 2.5472, + "step": 8723 + }, + { + "epoch": 0.7040593979501251, + "grad_norm": 0.7639868855476379, + "learning_rate": 0.00012078087435587016, + "loss": 2.6053, + "step": 8724 + }, + { + "epoch": 0.704140101686708, + "grad_norm": 0.7490074038505554, + "learning_rate": 0.0001207654318354523, + "loss": 2.5517, + "step": 8725 + }, + { + "epoch": 0.7042208054232911, + "grad_norm": 0.7068852782249451, + "learning_rate": 0.00012074998879750566, + "loss": 2.5357, + "step": 8726 + }, + { + "epoch": 0.7043015091598741, + "grad_norm": 0.7273775935173035, + "learning_rate": 0.00012073454524241503, + "loss": 2.6028, + "step": 8727 + }, + { + "epoch": 0.7043822128964571, + "grad_norm": 0.7146363258361816, + "learning_rate": 0.00012071910117056533, + "loss": 2.5982, + "step": 8728 + }, + { + "epoch": 0.7044629166330401, + "grad_norm": 0.7631390690803528, + "learning_rate": 0.00012070365658234149, + "loss": 2.6021, + "step": 8729 + }, + { + "epoch": 0.7045436203696231, + "grad_norm": 0.7065283060073853, + "learning_rate": 0.00012068821147812839, + "loss": 2.5538, + "step": 8730 + }, + { + "epoch": 0.7046243241062061, + "grad_norm": 0.7914319634437561, + "learning_rate": 0.00012067276585831097, + "loss": 2.5617, + "step": 8731 + }, + { + "epoch": 0.7047050278427891, + "grad_norm": 0.7036565542221069, + "learning_rate": 0.0001206573197232742, + "loss": 2.5354, + "step": 8732 + }, + { + "epoch": 0.7047857315793721, + "grad_norm": 0.657116711139679, + "learning_rate": 0.00012064187307340303, + "loss": 2.5084, + "step": 8733 + }, + { + "epoch": 0.7048664353159552, + "grad_norm": 0.7246817946434021, + "learning_rate": 0.00012062642590908242, + "loss": 2.5737, + "step": 8734 + }, + { + "epoch": 0.7049471390525381, + "grad_norm": 0.6895857453346252, + "learning_rate": 0.00012061097823069736, + "loss": 2.5792, + "step": 8735 + }, + { + "epoch": 0.7050278427891211, + "grad_norm": 0.7654988169670105, + "learning_rate": 0.00012059553003863282, + "loss": 2.5302, + "step": 8736 + }, + { + "epoch": 0.7051085465257041, + "grad_norm": 0.7611668109893799, + "learning_rate": 0.00012058008133327387, + "loss": 2.6073, + "step": 8737 + }, + { + "epoch": 0.7051892502622872, + "grad_norm": 0.728729784488678, + "learning_rate": 0.00012056463211500546, + "loss": 2.5714, + "step": 8738 + }, + { + "epoch": 0.7052699539988702, + "grad_norm": 0.7251634001731873, + "learning_rate": 0.00012054918238421271, + "loss": 2.627, + "step": 8739 + }, + { + "epoch": 0.7053506577354531, + "grad_norm": 0.827745795249939, + "learning_rate": 0.00012053373214128056, + "loss": 2.6303, + "step": 8740 + }, + { + "epoch": 0.7054313614720361, + "grad_norm": 0.6837510466575623, + "learning_rate": 0.00012051828138659416, + "loss": 2.5837, + "step": 8741 + }, + { + "epoch": 0.7055120652086192, + "grad_norm": 0.6763553619384766, + "learning_rate": 0.00012050283012053856, + "loss": 2.575, + "step": 8742 + }, + { + "epoch": 0.7055927689452022, + "grad_norm": 0.6779605150222778, + "learning_rate": 0.00012048737834349886, + "loss": 2.588, + "step": 8743 + }, + { + "epoch": 0.7056734726817852, + "grad_norm": 0.7207251191139221, + "learning_rate": 0.00012047192605586008, + "loss": 2.6182, + "step": 8744 + }, + { + "epoch": 0.7057541764183681, + "grad_norm": 0.6681165099143982, + "learning_rate": 0.00012045647325800742, + "loss": 2.5595, + "step": 8745 + }, + { + "epoch": 0.7058348801549512, + "grad_norm": 0.7520970702171326, + "learning_rate": 0.00012044101995032594, + "loss": 2.6306, + "step": 8746 + }, + { + "epoch": 0.7059155838915342, + "grad_norm": 0.7148429155349731, + "learning_rate": 0.00012042556613320087, + "loss": 2.5749, + "step": 8747 + }, + { + "epoch": 0.7059962876281172, + "grad_norm": 0.619369626045227, + "learning_rate": 0.00012041011180701729, + "loss": 2.5382, + "step": 8748 + }, + { + "epoch": 0.7060769913647001, + "grad_norm": 0.7450816035270691, + "learning_rate": 0.00012039465697216032, + "loss": 2.5547, + "step": 8749 + }, + { + "epoch": 0.7061576951012832, + "grad_norm": 0.7324537634849548, + "learning_rate": 0.00012037920162901521, + "loss": 2.5756, + "step": 8750 + }, + { + "epoch": 0.7062383988378662, + "grad_norm": 0.7881754636764526, + "learning_rate": 0.00012036374577796715, + "loss": 2.6376, + "step": 8751 + }, + { + "epoch": 0.7063191025744492, + "grad_norm": 0.7095965147018433, + "learning_rate": 0.00012034828941940128, + "loss": 2.5454, + "step": 8752 + }, + { + "epoch": 0.7063998063110322, + "grad_norm": 0.7142949104309082, + "learning_rate": 0.00012033283255370287, + "loss": 2.5738, + "step": 8753 + }, + { + "epoch": 0.7064805100476153, + "grad_norm": 0.6592378616333008, + "learning_rate": 0.0001203173751812571, + "loss": 2.5473, + "step": 8754 + }, + { + "epoch": 0.7065612137841982, + "grad_norm": 0.6964332461357117, + "learning_rate": 0.00012030191730244926, + "loss": 2.5829, + "step": 8755 + }, + { + "epoch": 0.7066419175207812, + "grad_norm": 0.707539975643158, + "learning_rate": 0.00012028645891766455, + "loss": 2.5652, + "step": 8756 + }, + { + "epoch": 0.7067226212573642, + "grad_norm": 0.6991387009620667, + "learning_rate": 0.00012027100002728824, + "loss": 2.5874, + "step": 8757 + }, + { + "epoch": 0.7068033249939473, + "grad_norm": 0.665746808052063, + "learning_rate": 0.00012025554063170566, + "loss": 2.5163, + "step": 8758 + }, + { + "epoch": 0.7068840287305302, + "grad_norm": 0.696130096912384, + "learning_rate": 0.00012024008073130204, + "loss": 2.5748, + "step": 8759 + }, + { + "epoch": 0.7069647324671132, + "grad_norm": 0.698885440826416, + "learning_rate": 0.00012022462032646269, + "loss": 2.5561, + "step": 8760 + }, + { + "epoch": 0.7070454362036962, + "grad_norm": 0.7052211761474609, + "learning_rate": 0.00012020915941757292, + "loss": 2.5979, + "step": 8761 + }, + { + "epoch": 0.7071261399402793, + "grad_norm": 0.7370811104774475, + "learning_rate": 0.00012019369800501808, + "loss": 2.5623, + "step": 8762 + }, + { + "epoch": 0.7072068436768623, + "grad_norm": 0.6699148416519165, + "learning_rate": 0.00012017823608918352, + "loss": 2.5816, + "step": 8763 + }, + { + "epoch": 0.7072875474134452, + "grad_norm": 0.6712930798530579, + "learning_rate": 0.00012016277367045457, + "loss": 2.5495, + "step": 8764 + }, + { + "epoch": 0.7073682511500282, + "grad_norm": 0.7238204479217529, + "learning_rate": 0.00012014731074921659, + "loss": 2.5936, + "step": 8765 + }, + { + "epoch": 0.7074489548866113, + "grad_norm": 0.7303668856620789, + "learning_rate": 0.00012013184732585494, + "loss": 2.6366, + "step": 8766 + }, + { + "epoch": 0.7075296586231943, + "grad_norm": 0.6883132457733154, + "learning_rate": 0.00012011638340075505, + "loss": 2.534, + "step": 8767 + }, + { + "epoch": 0.7076103623597773, + "grad_norm": 0.7057133316993713, + "learning_rate": 0.00012010091897430229, + "loss": 2.6035, + "step": 8768 + }, + { + "epoch": 0.7076910660963602, + "grad_norm": 0.7069352269172668, + "learning_rate": 0.0001200854540468821, + "loss": 2.5047, + "step": 8769 + }, + { + "epoch": 0.7077717698329433, + "grad_norm": 0.7192478775978088, + "learning_rate": 0.00012006998861887985, + "loss": 2.5698, + "step": 8770 + }, + { + "epoch": 0.7078524735695263, + "grad_norm": 0.6992887854576111, + "learning_rate": 0.00012005452269068107, + "loss": 2.5631, + "step": 8771 + }, + { + "epoch": 0.7079331773061093, + "grad_norm": 0.676154613494873, + "learning_rate": 0.00012003905626267114, + "loss": 2.5255, + "step": 8772 + }, + { + "epoch": 0.7080138810426923, + "grad_norm": 0.672269880771637, + "learning_rate": 0.00012002358933523555, + "loss": 2.5766, + "step": 8773 + }, + { + "epoch": 0.7080945847792752, + "grad_norm": 0.7334566712379456, + "learning_rate": 0.00012000812190875976, + "loss": 2.6068, + "step": 8774 + }, + { + "epoch": 0.7081752885158583, + "grad_norm": 0.6599388122558594, + "learning_rate": 0.00011999265398362931, + "loss": 2.6032, + "step": 8775 + }, + { + "epoch": 0.7082559922524413, + "grad_norm": 0.7158498167991638, + "learning_rate": 0.00011997718556022958, + "loss": 2.599, + "step": 8776 + }, + { + "epoch": 0.7083366959890243, + "grad_norm": 0.7470360994338989, + "learning_rate": 0.00011996171663894624, + "loss": 2.58, + "step": 8777 + }, + { + "epoch": 0.7084173997256072, + "grad_norm": 0.6251266002655029, + "learning_rate": 0.00011994624722016472, + "loss": 2.5996, + "step": 8778 + }, + { + "epoch": 0.7084981034621903, + "grad_norm": 0.6649689078330994, + "learning_rate": 0.00011993077730427058, + "loss": 2.6025, + "step": 8779 + }, + { + "epoch": 0.7085788071987733, + "grad_norm": 0.7554693818092346, + "learning_rate": 0.00011991530689164939, + "loss": 2.6207, + "step": 8780 + }, + { + "epoch": 0.7086595109353563, + "grad_norm": 0.7941430807113647, + "learning_rate": 0.00011989983598268661, + "loss": 2.584, + "step": 8781 + }, + { + "epoch": 0.7087402146719393, + "grad_norm": 0.7257998585700989, + "learning_rate": 0.00011988436457776799, + "loss": 2.6152, + "step": 8782 + }, + { + "epoch": 0.7088209184085223, + "grad_norm": 0.716354489326477, + "learning_rate": 0.00011986889267727899, + "loss": 2.585, + "step": 8783 + }, + { + "epoch": 0.7089016221451053, + "grad_norm": 0.7094400525093079, + "learning_rate": 0.00011985342028160525, + "loss": 2.5759, + "step": 8784 + }, + { + "epoch": 0.7089823258816883, + "grad_norm": 0.7211421728134155, + "learning_rate": 0.0001198379473911324, + "loss": 2.5645, + "step": 8785 + }, + { + "epoch": 0.7090630296182713, + "grad_norm": 0.7166693806648254, + "learning_rate": 0.000119822474006246, + "loss": 2.5357, + "step": 8786 + }, + { + "epoch": 0.7091437333548544, + "grad_norm": 0.6702254414558411, + "learning_rate": 0.00011980700012733175, + "loss": 2.5353, + "step": 8787 + }, + { + "epoch": 0.7092244370914373, + "grad_norm": 0.6784049868583679, + "learning_rate": 0.0001197915257547753, + "loss": 2.4942, + "step": 8788 + }, + { + "epoch": 0.7093051408280203, + "grad_norm": 0.6914299726486206, + "learning_rate": 0.00011977605088896226, + "loss": 2.5682, + "step": 8789 + }, + { + "epoch": 0.7093858445646033, + "grad_norm": 0.7324358820915222, + "learning_rate": 0.00011976057553027837, + "loss": 2.564, + "step": 8790 + }, + { + "epoch": 0.7094665483011864, + "grad_norm": 0.6927928924560547, + "learning_rate": 0.00011974509967910927, + "loss": 2.5728, + "step": 8791 + }, + { + "epoch": 0.7095472520377694, + "grad_norm": 0.6795603036880493, + "learning_rate": 0.00011972962333584066, + "loss": 2.588, + "step": 8792 + }, + { + "epoch": 0.7096279557743523, + "grad_norm": 0.7132226228713989, + "learning_rate": 0.00011971414650085828, + "loss": 2.5759, + "step": 8793 + }, + { + "epoch": 0.7097086595109353, + "grad_norm": 0.737195611000061, + "learning_rate": 0.00011969866917454782, + "loss": 2.5721, + "step": 8794 + }, + { + "epoch": 0.7097893632475184, + "grad_norm": 0.6776021718978882, + "learning_rate": 0.00011968319135729507, + "loss": 2.5794, + "step": 8795 + }, + { + "epoch": 0.7098700669841014, + "grad_norm": 0.7113735675811768, + "learning_rate": 0.0001196677130494857, + "loss": 2.5595, + "step": 8796 + }, + { + "epoch": 0.7099507707206844, + "grad_norm": 0.6277747750282288, + "learning_rate": 0.0001196522342515055, + "loss": 2.5003, + "step": 8797 + }, + { + "epoch": 0.7100314744572673, + "grad_norm": 0.6982879042625427, + "learning_rate": 0.00011963675496374028, + "loss": 2.542, + "step": 8798 + }, + { + "epoch": 0.7101121781938504, + "grad_norm": 0.7019705176353455, + "learning_rate": 0.00011962127518657578, + "loss": 2.5723, + "step": 8799 + }, + { + "epoch": 0.7101928819304334, + "grad_norm": 0.6831088662147522, + "learning_rate": 0.00011960579492039783, + "loss": 2.5676, + "step": 8800 + }, + { + "epoch": 0.7102735856670164, + "grad_norm": 0.6744031310081482, + "learning_rate": 0.0001195903141655922, + "loss": 2.58, + "step": 8801 + }, + { + "epoch": 0.7103542894035993, + "grad_norm": 0.6873177289962769, + "learning_rate": 0.00011957483292254473, + "loss": 2.6289, + "step": 8802 + }, + { + "epoch": 0.7104349931401824, + "grad_norm": 0.6340685486793518, + "learning_rate": 0.00011955935119164125, + "loss": 2.5688, + "step": 8803 + }, + { + "epoch": 0.7105156968767654, + "grad_norm": 0.7147708535194397, + "learning_rate": 0.00011954386897326764, + "loss": 2.5471, + "step": 8804 + }, + { + "epoch": 0.7105964006133484, + "grad_norm": 0.699605405330658, + "learning_rate": 0.00011952838626780971, + "loss": 2.6122, + "step": 8805 + }, + { + "epoch": 0.7106771043499314, + "grad_norm": 0.6685385704040527, + "learning_rate": 0.00011951290307565335, + "loss": 2.5423, + "step": 8806 + }, + { + "epoch": 0.7107578080865145, + "grad_norm": 0.6884726881980896, + "learning_rate": 0.00011949741939718439, + "loss": 2.5243, + "step": 8807 + }, + { + "epoch": 0.7108385118230974, + "grad_norm": 0.6991142630577087, + "learning_rate": 0.00011948193523278884, + "loss": 2.6271, + "step": 8808 + }, + { + "epoch": 0.7109192155596804, + "grad_norm": 0.6964353919029236, + "learning_rate": 0.00011946645058285253, + "loss": 2.6296, + "step": 8809 + }, + { + "epoch": 0.7109999192962634, + "grad_norm": 0.7592040300369263, + "learning_rate": 0.00011945096544776136, + "loss": 2.6601, + "step": 8810 + }, + { + "epoch": 0.7110806230328465, + "grad_norm": 0.7146934866905212, + "learning_rate": 0.00011943547982790131, + "loss": 2.54, + "step": 8811 + }, + { + "epoch": 0.7111613267694294, + "grad_norm": 0.6991123557090759, + "learning_rate": 0.00011941999372365827, + "loss": 2.5978, + "step": 8812 + }, + { + "epoch": 0.7112420305060124, + "grad_norm": 0.6835920810699463, + "learning_rate": 0.00011940450713541822, + "loss": 2.6096, + "step": 8813 + }, + { + "epoch": 0.7113227342425954, + "grad_norm": 0.6913917660713196, + "learning_rate": 0.00011938902006356716, + "loss": 2.5624, + "step": 8814 + }, + { + "epoch": 0.7114034379791785, + "grad_norm": 0.6620622873306274, + "learning_rate": 0.00011937353250849102, + "loss": 2.6211, + "step": 8815 + }, + { + "epoch": 0.7114841417157615, + "grad_norm": 0.6738792061805725, + "learning_rate": 0.00011935804447057581, + "loss": 2.5889, + "step": 8816 + }, + { + "epoch": 0.7115648454523444, + "grad_norm": 0.7101936936378479, + "learning_rate": 0.00011934255595020751, + "loss": 2.5846, + "step": 8817 + }, + { + "epoch": 0.7116455491889274, + "grad_norm": 0.6843911409378052, + "learning_rate": 0.00011932706694777216, + "loss": 2.5757, + "step": 8818 + }, + { + "epoch": 0.7117262529255105, + "grad_norm": 0.7217971086502075, + "learning_rate": 0.0001193115774636558, + "loss": 2.6174, + "step": 8819 + }, + { + "epoch": 0.7118069566620935, + "grad_norm": 0.6706245541572571, + "learning_rate": 0.00011929608749824445, + "loss": 2.5893, + "step": 8820 + }, + { + "epoch": 0.7118876603986765, + "grad_norm": 0.7057672739028931, + "learning_rate": 0.00011928059705192413, + "loss": 2.5426, + "step": 8821 + }, + { + "epoch": 0.7119683641352594, + "grad_norm": 0.7354697585105896, + "learning_rate": 0.00011926510612508095, + "loss": 2.5741, + "step": 8822 + }, + { + "epoch": 0.7120490678718424, + "grad_norm": 0.6618186235427856, + "learning_rate": 0.00011924961471810096, + "loss": 2.6007, + "step": 8823 + }, + { + "epoch": 0.7121297716084255, + "grad_norm": 0.6733995676040649, + "learning_rate": 0.00011923412283137028, + "loss": 2.5739, + "step": 8824 + }, + { + "epoch": 0.7122104753450085, + "grad_norm": 0.7324833869934082, + "learning_rate": 0.00011921863046527497, + "loss": 2.5461, + "step": 8825 + }, + { + "epoch": 0.7122911790815915, + "grad_norm": 0.6753048896789551, + "learning_rate": 0.00011920313762020113, + "loss": 2.5066, + "step": 8826 + }, + { + "epoch": 0.7123718828181744, + "grad_norm": 0.7861250638961792, + "learning_rate": 0.00011918764429653489, + "loss": 2.5229, + "step": 8827 + }, + { + "epoch": 0.7124525865547575, + "grad_norm": 0.7037342190742493, + "learning_rate": 0.00011917215049466244, + "loss": 2.5443, + "step": 8828 + }, + { + "epoch": 0.7125332902913405, + "grad_norm": 0.7112773060798645, + "learning_rate": 0.00011915665621496985, + "loss": 2.5656, + "step": 8829 + }, + { + "epoch": 0.7126139940279235, + "grad_norm": 0.6384316682815552, + "learning_rate": 0.00011914116145784333, + "loss": 2.5526, + "step": 8830 + }, + { + "epoch": 0.7126946977645064, + "grad_norm": 0.6673600077629089, + "learning_rate": 0.000119125666223669, + "loss": 2.5868, + "step": 8831 + }, + { + "epoch": 0.7127754015010895, + "grad_norm": 0.6927722692489624, + "learning_rate": 0.0001191101705128331, + "loss": 2.6237, + "step": 8832 + }, + { + "epoch": 0.7128561052376725, + "grad_norm": 0.7410106658935547, + "learning_rate": 0.00011909467432572182, + "loss": 2.5652, + "step": 8833 + }, + { + "epoch": 0.7129368089742555, + "grad_norm": 0.6780139803886414, + "learning_rate": 0.0001190791776627213, + "loss": 2.5343, + "step": 8834 + }, + { + "epoch": 0.7130175127108385, + "grad_norm": 0.7147949934005737, + "learning_rate": 0.00011906368052421781, + "loss": 2.5368, + "step": 8835 + }, + { + "epoch": 0.7130982164474216, + "grad_norm": 0.7092324495315552, + "learning_rate": 0.00011904818291059759, + "loss": 2.538, + "step": 8836 + }, + { + "epoch": 0.7131789201840045, + "grad_norm": 0.761763870716095, + "learning_rate": 0.00011903268482224684, + "loss": 2.5984, + "step": 8837 + }, + { + "epoch": 0.7132596239205875, + "grad_norm": 0.7011365294456482, + "learning_rate": 0.00011901718625955182, + "loss": 2.5383, + "step": 8838 + }, + { + "epoch": 0.7133403276571705, + "grad_norm": 0.7982703447341919, + "learning_rate": 0.00011900168722289882, + "loss": 2.5714, + "step": 8839 + }, + { + "epoch": 0.7134210313937536, + "grad_norm": 0.6788253784179688, + "learning_rate": 0.00011898618771267412, + "loss": 2.5675, + "step": 8840 + }, + { + "epoch": 0.7135017351303365, + "grad_norm": 0.6245018243789673, + "learning_rate": 0.00011897068772926397, + "loss": 2.5497, + "step": 8841 + }, + { + "epoch": 0.7135824388669195, + "grad_norm": 0.732109785079956, + "learning_rate": 0.0001189551872730547, + "loss": 2.5043, + "step": 8842 + }, + { + "epoch": 0.7136631426035025, + "grad_norm": 0.7640885710716248, + "learning_rate": 0.0001189396863444326, + "loss": 2.5974, + "step": 8843 + }, + { + "epoch": 0.7137438463400856, + "grad_norm": 0.6806808710098267, + "learning_rate": 0.00011892418494378403, + "loss": 2.5911, + "step": 8844 + }, + { + "epoch": 0.7138245500766686, + "grad_norm": 0.6730000376701355, + "learning_rate": 0.00011890868307149528, + "loss": 2.5405, + "step": 8845 + }, + { + "epoch": 0.7139052538132515, + "grad_norm": 0.6881929636001587, + "learning_rate": 0.00011889318072795275, + "loss": 2.6083, + "step": 8846 + }, + { + "epoch": 0.7139859575498345, + "grad_norm": 0.7079598307609558, + "learning_rate": 0.00011887767791354275, + "loss": 2.5743, + "step": 8847 + }, + { + "epoch": 0.7140666612864176, + "grad_norm": 0.6760475635528564, + "learning_rate": 0.00011886217462865166, + "loss": 2.5925, + "step": 8848 + }, + { + "epoch": 0.7141473650230006, + "grad_norm": 0.6851043701171875, + "learning_rate": 0.00011884667087366587, + "loss": 2.5839, + "step": 8849 + }, + { + "epoch": 0.7142280687595836, + "grad_norm": 0.6805267930030823, + "learning_rate": 0.00011883116664897178, + "loss": 2.562, + "step": 8850 + }, + { + "epoch": 0.7143087724961665, + "grad_norm": 0.6720704436302185, + "learning_rate": 0.00011881566195495581, + "loss": 2.5381, + "step": 8851 + }, + { + "epoch": 0.7143894762327496, + "grad_norm": 0.718166172504425, + "learning_rate": 0.00011880015679200436, + "loss": 2.5912, + "step": 8852 + }, + { + "epoch": 0.7144701799693326, + "grad_norm": 0.6643497943878174, + "learning_rate": 0.00011878465116050383, + "loss": 2.5122, + "step": 8853 + }, + { + "epoch": 0.7145508837059156, + "grad_norm": 0.705186665058136, + "learning_rate": 0.00011876914506084074, + "loss": 2.617, + "step": 8854 + }, + { + "epoch": 0.7146315874424986, + "grad_norm": 0.6417848467826843, + "learning_rate": 0.00011875363849340144, + "loss": 2.5552, + "step": 8855 + }, + { + "epoch": 0.7147122911790816, + "grad_norm": 0.6861358880996704, + "learning_rate": 0.00011873813145857249, + "loss": 2.6324, + "step": 8856 + }, + { + "epoch": 0.7147929949156646, + "grad_norm": 0.7134111523628235, + "learning_rate": 0.00011872262395674027, + "loss": 2.5892, + "step": 8857 + }, + { + "epoch": 0.7148736986522476, + "grad_norm": 0.7177506685256958, + "learning_rate": 0.00011870711598829135, + "loss": 2.5677, + "step": 8858 + }, + { + "epoch": 0.7149544023888306, + "grad_norm": 0.6435763835906982, + "learning_rate": 0.00011869160755361219, + "loss": 2.5452, + "step": 8859 + }, + { + "epoch": 0.7150351061254137, + "grad_norm": 0.6443132758140564, + "learning_rate": 0.00011867609865308935, + "loss": 2.5566, + "step": 8860 + }, + { + "epoch": 0.7151158098619966, + "grad_norm": 0.7132347822189331, + "learning_rate": 0.00011866058928710925, + "loss": 2.565, + "step": 8861 + }, + { + "epoch": 0.7151965135985796, + "grad_norm": 0.7803207039833069, + "learning_rate": 0.00011864507945605854, + "loss": 2.556, + "step": 8862 + }, + { + "epoch": 0.7152772173351626, + "grad_norm": 0.7277950644493103, + "learning_rate": 0.00011862956916032367, + "loss": 2.5623, + "step": 8863 + }, + { + "epoch": 0.7153579210717457, + "grad_norm": 0.6812277436256409, + "learning_rate": 0.00011861405840029125, + "loss": 2.6146, + "step": 8864 + }, + { + "epoch": 0.7154386248083286, + "grad_norm": 0.7170509099960327, + "learning_rate": 0.00011859854717634786, + "loss": 2.52, + "step": 8865 + }, + { + "epoch": 0.7155193285449116, + "grad_norm": 0.7282906174659729, + "learning_rate": 0.00011858303548888004, + "loss": 2.5605, + "step": 8866 + }, + { + "epoch": 0.7156000322814946, + "grad_norm": 0.7290246486663818, + "learning_rate": 0.00011856752333827439, + "loss": 2.6292, + "step": 8867 + }, + { + "epoch": 0.7156807360180777, + "grad_norm": 0.6870024800300598, + "learning_rate": 0.00011855201072491752, + "loss": 2.6396, + "step": 8868 + }, + { + "epoch": 0.7157614397546607, + "grad_norm": 0.7336156964302063, + "learning_rate": 0.00011853649764919605, + "loss": 2.6356, + "step": 8869 + }, + { + "epoch": 0.7158421434912436, + "grad_norm": 0.7181294560432434, + "learning_rate": 0.00011852098411149661, + "loss": 2.5163, + "step": 8870 + }, + { + "epoch": 0.7159228472278266, + "grad_norm": 0.7355513572692871, + "learning_rate": 0.00011850547011220583, + "loss": 2.5485, + "step": 8871 + }, + { + "epoch": 0.7160035509644097, + "grad_norm": 0.7005351185798645, + "learning_rate": 0.00011848995565171038, + "loss": 2.5187, + "step": 8872 + }, + { + "epoch": 0.7160842547009927, + "grad_norm": 0.6550194025039673, + "learning_rate": 0.00011847444073039686, + "loss": 2.5174, + "step": 8873 + }, + { + "epoch": 0.7161649584375757, + "grad_norm": 0.6568251252174377, + "learning_rate": 0.00011845892534865202, + "loss": 2.5128, + "step": 8874 + }, + { + "epoch": 0.7162456621741586, + "grad_norm": 0.6359419226646423, + "learning_rate": 0.0001184434095068625, + "loss": 2.5967, + "step": 8875 + }, + { + "epoch": 0.7163263659107416, + "grad_norm": 0.6730023622512817, + "learning_rate": 0.00011842789320541504, + "loss": 2.5243, + "step": 8876 + }, + { + "epoch": 0.7164070696473247, + "grad_norm": 0.6750187277793884, + "learning_rate": 0.00011841237644469625, + "loss": 2.602, + "step": 8877 + }, + { + "epoch": 0.7164877733839077, + "grad_norm": 0.7039143443107605, + "learning_rate": 0.00011839685922509291, + "loss": 2.5345, + "step": 8878 + }, + { + "epoch": 0.7165684771204907, + "grad_norm": 0.6602306962013245, + "learning_rate": 0.00011838134154699177, + "loss": 2.5995, + "step": 8879 + }, + { + "epoch": 0.7166491808570736, + "grad_norm": 0.6744598150253296, + "learning_rate": 0.00011836582341077955, + "loss": 2.6005, + "step": 8880 + }, + { + "epoch": 0.7167298845936567, + "grad_norm": 0.7136051058769226, + "learning_rate": 0.00011835030481684302, + "loss": 2.5424, + "step": 8881 + }, + { + "epoch": 0.7168105883302397, + "grad_norm": 0.7085986137390137, + "learning_rate": 0.00011833478576556889, + "loss": 2.5912, + "step": 8882 + }, + { + "epoch": 0.7168912920668227, + "grad_norm": 0.7635689377784729, + "learning_rate": 0.00011831926625734398, + "loss": 2.5836, + "step": 8883 + }, + { + "epoch": 0.7169719958034056, + "grad_norm": 0.6543256640434265, + "learning_rate": 0.00011830374629255508, + "loss": 2.5442, + "step": 8884 + }, + { + "epoch": 0.7170526995399887, + "grad_norm": 0.663840115070343, + "learning_rate": 0.00011828822587158896, + "loss": 2.5529, + "step": 8885 + }, + { + "epoch": 0.7171334032765717, + "grad_norm": 0.6868027448654175, + "learning_rate": 0.00011827270499483247, + "loss": 2.6678, + "step": 8886 + }, + { + "epoch": 0.7172141070131547, + "grad_norm": 0.649172842502594, + "learning_rate": 0.00011825718366267238, + "loss": 2.57, + "step": 8887 + }, + { + "epoch": 0.7172948107497377, + "grad_norm": 0.6818440556526184, + "learning_rate": 0.00011824166187549554, + "loss": 2.5602, + "step": 8888 + }, + { + "epoch": 0.7173755144863208, + "grad_norm": 0.7222314476966858, + "learning_rate": 0.00011822613963368885, + "loss": 2.5526, + "step": 8889 + }, + { + "epoch": 0.7174562182229037, + "grad_norm": 0.7309598922729492, + "learning_rate": 0.00011821061693763909, + "loss": 2.5515, + "step": 8890 + }, + { + "epoch": 0.7175369219594867, + "grad_norm": 0.6935746669769287, + "learning_rate": 0.00011819509378773314, + "loss": 2.5506, + "step": 8891 + }, + { + "epoch": 0.7176176256960697, + "grad_norm": 0.6754423975944519, + "learning_rate": 0.00011817957018435792, + "loss": 2.5621, + "step": 8892 + }, + { + "epoch": 0.7176983294326528, + "grad_norm": 0.7087355852127075, + "learning_rate": 0.00011816404612790026, + "loss": 2.5708, + "step": 8893 + }, + { + "epoch": 0.7177790331692357, + "grad_norm": 0.726820707321167, + "learning_rate": 0.0001181485216187471, + "loss": 2.5741, + "step": 8894 + }, + { + "epoch": 0.7178597369058187, + "grad_norm": 0.6539922952651978, + "learning_rate": 0.00011813299665728532, + "loss": 2.613, + "step": 8895 + }, + { + "epoch": 0.7179404406424017, + "grad_norm": 0.7008066773414612, + "learning_rate": 0.00011811747124390189, + "loss": 2.6029, + "step": 8896 + }, + { + "epoch": 0.7180211443789848, + "grad_norm": 0.6900522708892822, + "learning_rate": 0.00011810194537898374, + "loss": 2.5716, + "step": 8897 + }, + { + "epoch": 0.7181018481155678, + "grad_norm": 0.675345242023468, + "learning_rate": 0.00011808641906291776, + "loss": 2.5742, + "step": 8898 + }, + { + "epoch": 0.7181825518521507, + "grad_norm": 0.6697559356689453, + "learning_rate": 0.00011807089229609092, + "loss": 2.5717, + "step": 8899 + }, + { + "epoch": 0.7182632555887337, + "grad_norm": 0.6874344944953918, + "learning_rate": 0.00011805536507889021, + "loss": 2.5394, + "step": 8900 + }, + { + "epoch": 0.7183439593253168, + "grad_norm": 0.6675494313240051, + "learning_rate": 0.00011803983741170263, + "loss": 2.5655, + "step": 8901 + }, + { + "epoch": 0.7184246630618998, + "grad_norm": 0.6937244534492493, + "learning_rate": 0.00011802430929491517, + "loss": 2.5676, + "step": 8902 + }, + { + "epoch": 0.7185053667984828, + "grad_norm": 0.7591496109962463, + "learning_rate": 0.00011800878072891474, + "loss": 2.5849, + "step": 8903 + }, + { + "epoch": 0.7185860705350657, + "grad_norm": 0.6503129005432129, + "learning_rate": 0.00011799325171408846, + "loss": 2.5416, + "step": 8904 + }, + { + "epoch": 0.7186667742716488, + "grad_norm": 0.6450222134590149, + "learning_rate": 0.00011797772225082333, + "loss": 2.5395, + "step": 8905 + }, + { + "epoch": 0.7187474780082318, + "grad_norm": 0.7317619919776917, + "learning_rate": 0.00011796219233950632, + "loss": 2.609, + "step": 8906 + }, + { + "epoch": 0.7188281817448148, + "grad_norm": 0.7585787773132324, + "learning_rate": 0.00011794666198052455, + "loss": 2.5556, + "step": 8907 + }, + { + "epoch": 0.7189088854813978, + "grad_norm": 0.6718214750289917, + "learning_rate": 0.00011793113117426505, + "loss": 2.5914, + "step": 8908 + }, + { + "epoch": 0.7189895892179808, + "grad_norm": 0.6459314823150635, + "learning_rate": 0.00011791559992111487, + "loss": 2.5956, + "step": 8909 + }, + { + "epoch": 0.7190702929545638, + "grad_norm": 0.6592775583267212, + "learning_rate": 0.00011790006822146113, + "loss": 2.5568, + "step": 8910 + }, + { + "epoch": 0.7191509966911468, + "grad_norm": 0.7277452349662781, + "learning_rate": 0.0001178845360756909, + "loss": 2.5989, + "step": 8911 + }, + { + "epoch": 0.7192317004277298, + "grad_norm": 0.7020131945610046, + "learning_rate": 0.00011786900348419128, + "loss": 2.645, + "step": 8912 + }, + { + "epoch": 0.7193124041643129, + "grad_norm": 0.6746636629104614, + "learning_rate": 0.00011785347044734938, + "loss": 2.5173, + "step": 8913 + }, + { + "epoch": 0.7193931079008958, + "grad_norm": 0.6782798171043396, + "learning_rate": 0.0001178379369655523, + "loss": 2.6007, + "step": 8914 + }, + { + "epoch": 0.7194738116374788, + "grad_norm": 0.705498218536377, + "learning_rate": 0.00011782240303918724, + "loss": 2.5408, + "step": 8915 + }, + { + "epoch": 0.7195545153740618, + "grad_norm": 0.675532341003418, + "learning_rate": 0.00011780686866864128, + "loss": 2.5188, + "step": 8916 + }, + { + "epoch": 0.7196352191106449, + "grad_norm": 0.6552390456199646, + "learning_rate": 0.00011779133385430161, + "loss": 2.5409, + "step": 8917 + }, + { + "epoch": 0.7197159228472279, + "grad_norm": 0.6589654088020325, + "learning_rate": 0.00011777579859655544, + "loss": 2.5447, + "step": 8918 + }, + { + "epoch": 0.7197966265838108, + "grad_norm": 0.7548382878303528, + "learning_rate": 0.00011776026289578985, + "loss": 2.5239, + "step": 8919 + }, + { + "epoch": 0.7198773303203938, + "grad_norm": 0.697325587272644, + "learning_rate": 0.00011774472675239207, + "loss": 2.5887, + "step": 8920 + }, + { + "epoch": 0.7199580340569769, + "grad_norm": 0.734462320804596, + "learning_rate": 0.00011772919016674934, + "loss": 2.5847, + "step": 8921 + }, + { + "epoch": 0.7200387377935599, + "grad_norm": 0.6736955642700195, + "learning_rate": 0.00011771365313924886, + "loss": 2.558, + "step": 8922 + }, + { + "epoch": 0.7201194415301428, + "grad_norm": 0.7157856822013855, + "learning_rate": 0.00011769811567027784, + "loss": 2.6199, + "step": 8923 + }, + { + "epoch": 0.7202001452667258, + "grad_norm": 0.7045830488204956, + "learning_rate": 0.0001176825777602235, + "loss": 2.576, + "step": 8924 + }, + { + "epoch": 0.7202808490033088, + "grad_norm": 0.6875419020652771, + "learning_rate": 0.00011766703940947308, + "loss": 2.6045, + "step": 8925 + }, + { + "epoch": 0.7203615527398919, + "grad_norm": 0.7313494086265564, + "learning_rate": 0.00011765150061841387, + "loss": 2.5388, + "step": 8926 + }, + { + "epoch": 0.7204422564764749, + "grad_norm": 0.7223608493804932, + "learning_rate": 0.00011763596138743313, + "loss": 2.5466, + "step": 8927 + }, + { + "epoch": 0.7205229602130578, + "grad_norm": 0.7289614081382751, + "learning_rate": 0.00011762042171691816, + "loss": 2.5862, + "step": 8928 + }, + { + "epoch": 0.7206036639496408, + "grad_norm": 0.7098878026008606, + "learning_rate": 0.00011760488160725617, + "loss": 2.5497, + "step": 8929 + }, + { + "epoch": 0.7206843676862239, + "grad_norm": 0.7096838355064392, + "learning_rate": 0.00011758934105883452, + "loss": 2.558, + "step": 8930 + }, + { + "epoch": 0.7207650714228069, + "grad_norm": 0.7334743738174438, + "learning_rate": 0.00011757380007204055, + "loss": 2.5966, + "step": 8931 + }, + { + "epoch": 0.7208457751593899, + "grad_norm": 0.7192476391792297, + "learning_rate": 0.00011755825864726149, + "loss": 2.5307, + "step": 8932 + }, + { + "epoch": 0.7209264788959728, + "grad_norm": 0.7329632043838501, + "learning_rate": 0.00011754271678488478, + "loss": 2.6453, + "step": 8933 + }, + { + "epoch": 0.7210071826325559, + "grad_norm": 0.6827974915504456, + "learning_rate": 0.00011752717448529766, + "loss": 2.5507, + "step": 8934 + }, + { + "epoch": 0.7210878863691389, + "grad_norm": 0.8292449116706848, + "learning_rate": 0.00011751163174888756, + "loss": 2.6178, + "step": 8935 + }, + { + "epoch": 0.7211685901057219, + "grad_norm": 0.6504058837890625, + "learning_rate": 0.00011749608857604183, + "loss": 2.574, + "step": 8936 + }, + { + "epoch": 0.7212492938423049, + "grad_norm": 0.6567742824554443, + "learning_rate": 0.00011748054496714785, + "loss": 2.45, + "step": 8937 + }, + { + "epoch": 0.7213299975788879, + "grad_norm": 0.6699101328849792, + "learning_rate": 0.00011746500092259296, + "loss": 2.5827, + "step": 8938 + }, + { + "epoch": 0.7214107013154709, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.0001174494564427646, + "loss": 2.5246, + "step": 8939 + }, + { + "epoch": 0.7214914050520539, + "grad_norm": 0.7276309132575989, + "learning_rate": 0.00011743391152805017, + "loss": 2.6096, + "step": 8940 + }, + { + "epoch": 0.7215721087886369, + "grad_norm": 0.7248005867004395, + "learning_rate": 0.0001174183661788371, + "loss": 2.6362, + "step": 8941 + }, + { + "epoch": 0.72165281252522, + "grad_norm": 0.7773801684379578, + "learning_rate": 0.00011740282039551282, + "loss": 2.547, + "step": 8942 + }, + { + "epoch": 0.7217335162618029, + "grad_norm": 0.7346466779708862, + "learning_rate": 0.00011738727417846476, + "loss": 2.5635, + "step": 8943 + }, + { + "epoch": 0.7218142199983859, + "grad_norm": 0.7042707800865173, + "learning_rate": 0.0001173717275280804, + "loss": 2.5593, + "step": 8944 + }, + { + "epoch": 0.7218949237349689, + "grad_norm": 0.6894899010658264, + "learning_rate": 0.00011735618044474712, + "loss": 2.5272, + "step": 8945 + }, + { + "epoch": 0.721975627471552, + "grad_norm": 0.6643744111061096, + "learning_rate": 0.00011734063292885249, + "loss": 2.6001, + "step": 8946 + }, + { + "epoch": 0.722056331208135, + "grad_norm": 0.7543076276779175, + "learning_rate": 0.00011732508498078396, + "loss": 2.558, + "step": 8947 + }, + { + "epoch": 0.7221370349447179, + "grad_norm": 0.7065596580505371, + "learning_rate": 0.00011730953660092903, + "loss": 2.6255, + "step": 8948 + }, + { + "epoch": 0.7222177386813009, + "grad_norm": 0.6968158483505249, + "learning_rate": 0.0001172939877896752, + "loss": 2.5277, + "step": 8949 + }, + { + "epoch": 0.722298442417884, + "grad_norm": 0.6918557286262512, + "learning_rate": 0.00011727843854740996, + "loss": 2.5456, + "step": 8950 + }, + { + "epoch": 0.722379146154467, + "grad_norm": 0.7262142300605774, + "learning_rate": 0.00011726288887452088, + "loss": 2.5345, + "step": 8951 + }, + { + "epoch": 0.7224598498910499, + "grad_norm": 0.7423329949378967, + "learning_rate": 0.00011724733877139548, + "loss": 2.6335, + "step": 8952 + }, + { + "epoch": 0.7225405536276329, + "grad_norm": 0.7734495997428894, + "learning_rate": 0.00011723178823842136, + "loss": 2.5951, + "step": 8953 + }, + { + "epoch": 0.722621257364216, + "grad_norm": 0.6792804598808289, + "learning_rate": 0.00011721623727598597, + "loss": 2.5927, + "step": 8954 + }, + { + "epoch": 0.722701961100799, + "grad_norm": 0.7971853017807007, + "learning_rate": 0.00011720068588447697, + "loss": 2.5451, + "step": 8955 + }, + { + "epoch": 0.722782664837382, + "grad_norm": 0.7264395356178284, + "learning_rate": 0.00011718513406428189, + "loss": 2.5769, + "step": 8956 + }, + { + "epoch": 0.7228633685739649, + "grad_norm": 0.6536725759506226, + "learning_rate": 0.0001171695818157884, + "loss": 2.6285, + "step": 8957 + }, + { + "epoch": 0.722944072310548, + "grad_norm": 0.6676235198974609, + "learning_rate": 0.000117154029139384, + "loss": 2.5896, + "step": 8958 + }, + { + "epoch": 0.723024776047131, + "grad_norm": 0.7104088664054871, + "learning_rate": 0.00011713847603545636, + "loss": 2.5606, + "step": 8959 + }, + { + "epoch": 0.723105479783714, + "grad_norm": 0.6646785140037537, + "learning_rate": 0.0001171229225043931, + "loss": 2.5617, + "step": 8960 + }, + { + "epoch": 0.723186183520297, + "grad_norm": 0.7148672342300415, + "learning_rate": 0.00011710736854658186, + "loss": 2.5855, + "step": 8961 + }, + { + "epoch": 0.72326688725688, + "grad_norm": 0.6864955425262451, + "learning_rate": 0.00011709181416241028, + "loss": 2.6098, + "step": 8962 + }, + { + "epoch": 0.723347590993463, + "grad_norm": 0.7049087285995483, + "learning_rate": 0.00011707625935226602, + "loss": 2.506, + "step": 8963 + }, + { + "epoch": 0.723428294730046, + "grad_norm": 0.6419759392738342, + "learning_rate": 0.00011706070411653672, + "loss": 2.5485, + "step": 8964 + }, + { + "epoch": 0.723508998466629, + "grad_norm": 0.6879174709320068, + "learning_rate": 0.00011704514845561007, + "loss": 2.5373, + "step": 8965 + }, + { + "epoch": 0.7235897022032121, + "grad_norm": 0.6473780274391174, + "learning_rate": 0.00011702959236987378, + "loss": 2.5479, + "step": 8966 + }, + { + "epoch": 0.723670405939795, + "grad_norm": 0.6924241185188293, + "learning_rate": 0.00011701403585971553, + "loss": 2.5679, + "step": 8967 + }, + { + "epoch": 0.723751109676378, + "grad_norm": 0.7452483773231506, + "learning_rate": 0.00011699847892552305, + "loss": 2.5043, + "step": 8968 + }, + { + "epoch": 0.723831813412961, + "grad_norm": 0.7517218589782715, + "learning_rate": 0.00011698292156768402, + "loss": 2.5554, + "step": 8969 + }, + { + "epoch": 0.7239125171495441, + "grad_norm": 0.6492432355880737, + "learning_rate": 0.00011696736378658618, + "loss": 2.6091, + "step": 8970 + }, + { + "epoch": 0.723993220886127, + "grad_norm": 0.740093469619751, + "learning_rate": 0.0001169518055826173, + "loss": 2.5629, + "step": 8971 + }, + { + "epoch": 0.72407392462271, + "grad_norm": 0.7186923027038574, + "learning_rate": 0.00011693624695616509, + "loss": 2.5537, + "step": 8972 + }, + { + "epoch": 0.724154628359293, + "grad_norm": 0.7066059112548828, + "learning_rate": 0.00011692068790761737, + "loss": 2.5115, + "step": 8973 + }, + { + "epoch": 0.7242353320958761, + "grad_norm": 0.7031805515289307, + "learning_rate": 0.00011690512843736185, + "loss": 2.596, + "step": 8974 + }, + { + "epoch": 0.7243160358324591, + "grad_norm": 0.7308956384658813, + "learning_rate": 0.00011688956854578635, + "loss": 2.6311, + "step": 8975 + }, + { + "epoch": 0.724396739569042, + "grad_norm": 0.6926052570343018, + "learning_rate": 0.00011687400823327863, + "loss": 2.5659, + "step": 8976 + }, + { + "epoch": 0.724477443305625, + "grad_norm": 0.69638991355896, + "learning_rate": 0.00011685844750022654, + "loss": 2.4792, + "step": 8977 + }, + { + "epoch": 0.724558147042208, + "grad_norm": 0.6858355402946472, + "learning_rate": 0.00011684288634701785, + "loss": 2.5707, + "step": 8978 + }, + { + "epoch": 0.7246388507787911, + "grad_norm": 0.6673639416694641, + "learning_rate": 0.00011682732477404044, + "loss": 2.5627, + "step": 8979 + }, + { + "epoch": 0.7247195545153741, + "grad_norm": 0.7174322605133057, + "learning_rate": 0.00011681176278168206, + "loss": 2.5801, + "step": 8980 + }, + { + "epoch": 0.724800258251957, + "grad_norm": 0.6840930581092834, + "learning_rate": 0.00011679620037033064, + "loss": 2.4994, + "step": 8981 + }, + { + "epoch": 0.72488096198854, + "grad_norm": 0.7179884910583496, + "learning_rate": 0.00011678063754037399, + "loss": 2.6408, + "step": 8982 + }, + { + "epoch": 0.7249616657251231, + "grad_norm": 0.6564825773239136, + "learning_rate": 0.00011676507429219998, + "loss": 2.5412, + "step": 8983 + }, + { + "epoch": 0.7250423694617061, + "grad_norm": 0.7020624876022339, + "learning_rate": 0.00011674951062619652, + "loss": 2.5778, + "step": 8984 + }, + { + "epoch": 0.7251230731982891, + "grad_norm": 0.8061255812644958, + "learning_rate": 0.00011673394654275145, + "loss": 2.5581, + "step": 8985 + }, + { + "epoch": 0.725203776934872, + "grad_norm": 0.7653982043266296, + "learning_rate": 0.00011671838204225267, + "loss": 2.5324, + "step": 8986 + }, + { + "epoch": 0.7252844806714551, + "grad_norm": 0.7168377041816711, + "learning_rate": 0.00011670281712508816, + "loss": 2.6357, + "step": 8987 + }, + { + "epoch": 0.7253651844080381, + "grad_norm": 0.6860470771789551, + "learning_rate": 0.00011668725179164575, + "loss": 2.5367, + "step": 8988 + }, + { + "epoch": 0.7254458881446211, + "grad_norm": 0.7175878286361694, + "learning_rate": 0.00011667168604231342, + "loss": 2.549, + "step": 8989 + }, + { + "epoch": 0.725526591881204, + "grad_norm": 0.7124783992767334, + "learning_rate": 0.00011665611987747907, + "loss": 2.5566, + "step": 8990 + }, + { + "epoch": 0.7256072956177871, + "grad_norm": 0.6575417518615723, + "learning_rate": 0.00011664055329753067, + "loss": 2.5455, + "step": 8991 + }, + { + "epoch": 0.7256879993543701, + "grad_norm": 0.6576877236366272, + "learning_rate": 0.00011662498630285623, + "loss": 2.5596, + "step": 8992 + }, + { + "epoch": 0.7257687030909531, + "grad_norm": 0.7235110402107239, + "learning_rate": 0.00011660941889384365, + "loss": 2.6199, + "step": 8993 + }, + { + "epoch": 0.7258494068275361, + "grad_norm": 0.6623982787132263, + "learning_rate": 0.00011659385107088092, + "loss": 2.5642, + "step": 8994 + }, + { + "epoch": 0.7259301105641192, + "grad_norm": 0.7113857865333557, + "learning_rate": 0.00011657828283435605, + "loss": 2.5631, + "step": 8995 + }, + { + "epoch": 0.7260108143007021, + "grad_norm": 0.7076124548912048, + "learning_rate": 0.00011656271418465702, + "loss": 2.5141, + "step": 8996 + }, + { + "epoch": 0.7260915180372851, + "grad_norm": 0.7534562349319458, + "learning_rate": 0.00011654714512217188, + "loss": 2.5896, + "step": 8997 + }, + { + "epoch": 0.7261722217738681, + "grad_norm": 0.7393170595169067, + "learning_rate": 0.00011653157564728865, + "loss": 2.5848, + "step": 8998 + }, + { + "epoch": 0.7262529255104512, + "grad_norm": 0.6829591989517212, + "learning_rate": 0.0001165160057603953, + "loss": 2.5439, + "step": 8999 + }, + { + "epoch": 0.7263336292470342, + "grad_norm": 0.6527189016342163, + "learning_rate": 0.00011650043546187995, + "loss": 2.5655, + "step": 9000 + }, + { + "epoch": 0.7263336292470342, + "eval_loss": 2.487652063369751, + "eval_runtime": 845.9129, + "eval_samples_per_second": 3.097, + "eval_steps_per_second": 0.517, + "step": 9000 + }, + { + "epoch": 0.7264143329836171, + "grad_norm": 0.6545615196228027, + "learning_rate": 0.00011648486475213058, + "loss": 2.5366, + "step": 9001 + }, + { + "epoch": 0.7264950367202001, + "grad_norm": 0.6854971647262573, + "learning_rate": 0.00011646929363153529, + "loss": 2.5832, + "step": 9002 + }, + { + "epoch": 0.7265757404567832, + "grad_norm": 0.7745552062988281, + "learning_rate": 0.00011645372210048218, + "loss": 2.5854, + "step": 9003 + }, + { + "epoch": 0.7266564441933662, + "grad_norm": 0.7159156203269958, + "learning_rate": 0.00011643815015935928, + "loss": 2.614, + "step": 9004 + }, + { + "epoch": 0.7267371479299491, + "grad_norm": 0.700074315071106, + "learning_rate": 0.00011642257780855475, + "loss": 2.6124, + "step": 9005 + }, + { + "epoch": 0.7268178516665321, + "grad_norm": 0.7367869019508362, + "learning_rate": 0.0001164070050484566, + "loss": 2.5512, + "step": 9006 + }, + { + "epoch": 0.7268985554031152, + "grad_norm": 0.6623905897140503, + "learning_rate": 0.00011639143187945301, + "loss": 2.5724, + "step": 9007 + }, + { + "epoch": 0.7269792591396982, + "grad_norm": 0.7111610770225525, + "learning_rate": 0.0001163758583019321, + "loss": 2.547, + "step": 9008 + }, + { + "epoch": 0.7270599628762812, + "grad_norm": 0.6860959529876709, + "learning_rate": 0.00011636028431628199, + "loss": 2.532, + "step": 9009 + }, + { + "epoch": 0.7271406666128641, + "grad_norm": 0.7606309056282043, + "learning_rate": 0.00011634470992289084, + "loss": 2.5214, + "step": 9010 + }, + { + "epoch": 0.7272213703494472, + "grad_norm": 0.6440508365631104, + "learning_rate": 0.00011632913512214677, + "loss": 2.5554, + "step": 9011 + }, + { + "epoch": 0.7273020740860302, + "grad_norm": 0.6770462393760681, + "learning_rate": 0.00011631355991443796, + "loss": 2.5877, + "step": 9012 + }, + { + "epoch": 0.7273827778226132, + "grad_norm": 0.6419155597686768, + "learning_rate": 0.00011629798430015262, + "loss": 2.5337, + "step": 9013 + }, + { + "epoch": 0.7274634815591962, + "grad_norm": 0.6782121658325195, + "learning_rate": 0.00011628240827967891, + "loss": 2.5152, + "step": 9014 + }, + { + "epoch": 0.7275441852957792, + "grad_norm": 0.6972285509109497, + "learning_rate": 0.00011626683185340501, + "loss": 2.5628, + "step": 9015 + }, + { + "epoch": 0.7276248890323622, + "grad_norm": 0.6823342442512512, + "learning_rate": 0.00011625125502171914, + "loss": 2.5977, + "step": 9016 + }, + { + "epoch": 0.7277055927689452, + "grad_norm": 0.723311722278595, + "learning_rate": 0.0001162356777850095, + "loss": 2.5772, + "step": 9017 + }, + { + "epoch": 0.7277862965055282, + "grad_norm": 0.7395427227020264, + "learning_rate": 0.00011622010014366435, + "loss": 2.6068, + "step": 9018 + }, + { + "epoch": 0.7278670002421113, + "grad_norm": 0.6970974206924438, + "learning_rate": 0.00011620452209807192, + "loss": 2.5577, + "step": 9019 + }, + { + "epoch": 0.7279477039786942, + "grad_norm": 0.6921418309211731, + "learning_rate": 0.0001161889436486204, + "loss": 2.5476, + "step": 9020 + }, + { + "epoch": 0.7280284077152772, + "grad_norm": 0.7243841886520386, + "learning_rate": 0.0001161733647956981, + "loss": 2.579, + "step": 9021 + }, + { + "epoch": 0.7281091114518602, + "grad_norm": 0.7240262627601624, + "learning_rate": 0.0001161577855396933, + "loss": 2.5959, + "step": 9022 + }, + { + "epoch": 0.7281898151884433, + "grad_norm": 0.7215476632118225, + "learning_rate": 0.0001161422058809942, + "loss": 2.5979, + "step": 9023 + }, + { + "epoch": 0.7282705189250263, + "grad_norm": 0.7109708786010742, + "learning_rate": 0.00011612662581998917, + "loss": 2.5912, + "step": 9024 + }, + { + "epoch": 0.7283512226616092, + "grad_norm": 0.6814073920249939, + "learning_rate": 0.00011611104535706645, + "loss": 2.5742, + "step": 9025 + }, + { + "epoch": 0.7284319263981922, + "grad_norm": 0.6788144707679749, + "learning_rate": 0.0001160954644926144, + "loss": 2.5656, + "step": 9026 + }, + { + "epoch": 0.7285126301347752, + "grad_norm": 0.7312989830970764, + "learning_rate": 0.00011607988322702126, + "loss": 2.5877, + "step": 9027 + }, + { + "epoch": 0.7285933338713583, + "grad_norm": 0.6725338697433472, + "learning_rate": 0.0001160643015606754, + "loss": 2.5261, + "step": 9028 + }, + { + "epoch": 0.7286740376079412, + "grad_norm": 0.7439326047897339, + "learning_rate": 0.00011604871949396516, + "loss": 2.603, + "step": 9029 + }, + { + "epoch": 0.7287547413445242, + "grad_norm": 0.7091783285140991, + "learning_rate": 0.00011603313702727889, + "loss": 2.5227, + "step": 9030 + }, + { + "epoch": 0.7288354450811072, + "grad_norm": 0.7474398016929626, + "learning_rate": 0.00011601755416100492, + "loss": 2.616, + "step": 9031 + }, + { + "epoch": 0.7289161488176903, + "grad_norm": 0.6904098987579346, + "learning_rate": 0.00011600197089553162, + "loss": 2.556, + "step": 9032 + }, + { + "epoch": 0.7289968525542733, + "grad_norm": 0.7305783033370972, + "learning_rate": 0.00011598638723124739, + "loss": 2.5633, + "step": 9033 + }, + { + "epoch": 0.7290775562908562, + "grad_norm": 0.6626651883125305, + "learning_rate": 0.00011597080316854062, + "loss": 2.5862, + "step": 9034 + }, + { + "epoch": 0.7291582600274392, + "grad_norm": 0.683102548122406, + "learning_rate": 0.00011595521870779968, + "loss": 2.5629, + "step": 9035 + }, + { + "epoch": 0.7292389637640223, + "grad_norm": 0.7486757636070251, + "learning_rate": 0.00011593963384941295, + "loss": 2.5831, + "step": 9036 + }, + { + "epoch": 0.7293196675006053, + "grad_norm": 0.8059591054916382, + "learning_rate": 0.00011592404859376888, + "loss": 2.6414, + "step": 9037 + }, + { + "epoch": 0.7294003712371883, + "grad_norm": 0.8371721506118774, + "learning_rate": 0.00011590846294125594, + "loss": 2.643, + "step": 9038 + }, + { + "epoch": 0.7294810749737712, + "grad_norm": 0.7216931581497192, + "learning_rate": 0.00011589287689226246, + "loss": 2.6, + "step": 9039 + }, + { + "epoch": 0.7295617787103543, + "grad_norm": 0.6940354704856873, + "learning_rate": 0.00011587729044717701, + "loss": 2.546, + "step": 9040 + }, + { + "epoch": 0.7296424824469373, + "grad_norm": 0.6888829469680786, + "learning_rate": 0.00011586170360638792, + "loss": 2.5878, + "step": 9041 + }, + { + "epoch": 0.7297231861835203, + "grad_norm": 0.6863886117935181, + "learning_rate": 0.00011584611637028373, + "loss": 2.5389, + "step": 9042 + }, + { + "epoch": 0.7298038899201033, + "grad_norm": 0.6670756936073303, + "learning_rate": 0.00011583052873925294, + "loss": 2.5465, + "step": 9043 + }, + { + "epoch": 0.7298845936566863, + "grad_norm": 0.7441220879554749, + "learning_rate": 0.00011581494071368392, + "loss": 2.5679, + "step": 9044 + }, + { + "epoch": 0.7299652973932693, + "grad_norm": 0.7135717272758484, + "learning_rate": 0.0001157993522939653, + "loss": 2.5341, + "step": 9045 + }, + { + "epoch": 0.7300460011298523, + "grad_norm": 0.6837992072105408, + "learning_rate": 0.00011578376348048547, + "loss": 2.5233, + "step": 9046 + }, + { + "epoch": 0.7301267048664353, + "grad_norm": 0.706666886806488, + "learning_rate": 0.00011576817427363302, + "loss": 2.6109, + "step": 9047 + }, + { + "epoch": 0.7302074086030184, + "grad_norm": 0.6856269240379333, + "learning_rate": 0.00011575258467379646, + "loss": 2.5651, + "step": 9048 + }, + { + "epoch": 0.7302881123396013, + "grad_norm": 0.6931480169296265, + "learning_rate": 0.00011573699468136427, + "loss": 2.6031, + "step": 9049 + }, + { + "epoch": 0.7303688160761843, + "grad_norm": 0.6558480858802795, + "learning_rate": 0.00011572140429672508, + "loss": 2.5661, + "step": 9050 + }, + { + "epoch": 0.7304495198127673, + "grad_norm": 0.6468425393104553, + "learning_rate": 0.00011570581352026742, + "loss": 2.5171, + "step": 9051 + }, + { + "epoch": 0.7305302235493504, + "grad_norm": 0.7204702496528625, + "learning_rate": 0.00011569022235237974, + "loss": 2.5861, + "step": 9052 + }, + { + "epoch": 0.7306109272859334, + "grad_norm": 0.7536416053771973, + "learning_rate": 0.00011567463079345078, + "loss": 2.633, + "step": 9053 + }, + { + "epoch": 0.7306916310225163, + "grad_norm": 0.6597960591316223, + "learning_rate": 0.00011565903884386904, + "loss": 2.5327, + "step": 9054 + }, + { + "epoch": 0.7307723347590993, + "grad_norm": 0.689153254032135, + "learning_rate": 0.0001156434465040231, + "loss": 2.5397, + "step": 9055 + }, + { + "epoch": 0.7308530384956824, + "grad_norm": 0.7664844393730164, + "learning_rate": 0.00011562785377430159, + "loss": 2.4852, + "step": 9056 + }, + { + "epoch": 0.7309337422322654, + "grad_norm": 0.7122881412506104, + "learning_rate": 0.0001156122606550931, + "loss": 2.5401, + "step": 9057 + }, + { + "epoch": 0.7310144459688483, + "grad_norm": 0.6937551498413086, + "learning_rate": 0.00011559666714678627, + "loss": 2.5705, + "step": 9058 + }, + { + "epoch": 0.7310951497054313, + "grad_norm": 0.6504047513008118, + "learning_rate": 0.00011558107324976974, + "loss": 2.5638, + "step": 9059 + }, + { + "epoch": 0.7311758534420144, + "grad_norm": 0.7759538888931274, + "learning_rate": 0.0001155654789644321, + "loss": 2.5864, + "step": 9060 + }, + { + "epoch": 0.7312565571785974, + "grad_norm": 0.719859778881073, + "learning_rate": 0.00011554988429116207, + "loss": 2.519, + "step": 9061 + }, + { + "epoch": 0.7313372609151804, + "grad_norm": 0.7159178853034973, + "learning_rate": 0.00011553428923034826, + "loss": 2.5301, + "step": 9062 + }, + { + "epoch": 0.7314179646517633, + "grad_norm": 0.6584001183509827, + "learning_rate": 0.00011551869378237934, + "loss": 2.4716, + "step": 9063 + }, + { + "epoch": 0.7314986683883464, + "grad_norm": 0.6548463702201843, + "learning_rate": 0.00011550309794764405, + "loss": 2.5637, + "step": 9064 + }, + { + "epoch": 0.7315793721249294, + "grad_norm": 0.73887699842453, + "learning_rate": 0.000115487501726531, + "loss": 2.5813, + "step": 9065 + }, + { + "epoch": 0.7316600758615124, + "grad_norm": 0.7856181859970093, + "learning_rate": 0.00011547190511942893, + "loss": 2.592, + "step": 9066 + }, + { + "epoch": 0.7317407795980954, + "grad_norm": 0.7040740847587585, + "learning_rate": 0.00011545630812672654, + "loss": 2.5324, + "step": 9067 + }, + { + "epoch": 0.7318214833346784, + "grad_norm": 0.7316064238548279, + "learning_rate": 0.00011544071074881253, + "loss": 2.5487, + "step": 9068 + }, + { + "epoch": 0.7319021870712614, + "grad_norm": 0.7020413279533386, + "learning_rate": 0.00011542511298607568, + "loss": 2.5179, + "step": 9069 + }, + { + "epoch": 0.7319828908078444, + "grad_norm": 0.672605574131012, + "learning_rate": 0.00011540951483890468, + "loss": 2.5367, + "step": 9070 + }, + { + "epoch": 0.7320635945444274, + "grad_norm": 0.7668856382369995, + "learning_rate": 0.00011539391630768828, + "loss": 2.6089, + "step": 9071 + }, + { + "epoch": 0.7321442982810105, + "grad_norm": 0.6641809940338135, + "learning_rate": 0.00011537831739281524, + "loss": 2.5411, + "step": 9072 + }, + { + "epoch": 0.7322250020175934, + "grad_norm": 0.7142000198364258, + "learning_rate": 0.00011536271809467434, + "loss": 2.5469, + "step": 9073 + }, + { + "epoch": 0.7323057057541764, + "grad_norm": 0.7266140580177307, + "learning_rate": 0.00011534711841365435, + "loss": 2.5565, + "step": 9074 + }, + { + "epoch": 0.7323864094907594, + "grad_norm": 0.6763899326324463, + "learning_rate": 0.00011533151835014407, + "loss": 2.551, + "step": 9075 + }, + { + "epoch": 0.7324671132273425, + "grad_norm": 0.6517418026924133, + "learning_rate": 0.00011531591790453224, + "loss": 2.5415, + "step": 9076 + }, + { + "epoch": 0.7325478169639255, + "grad_norm": 0.6602214574813843, + "learning_rate": 0.00011530031707720772, + "loss": 2.593, + "step": 9077 + }, + { + "epoch": 0.7326285207005084, + "grad_norm": 0.7448844313621521, + "learning_rate": 0.00011528471586855931, + "loss": 2.5598, + "step": 9078 + }, + { + "epoch": 0.7327092244370914, + "grad_norm": 0.7197073698043823, + "learning_rate": 0.00011526911427897579, + "loss": 2.5128, + "step": 9079 + }, + { + "epoch": 0.7327899281736744, + "grad_norm": 0.7245968580245972, + "learning_rate": 0.00011525351230884606, + "loss": 2.5016, + "step": 9080 + }, + { + "epoch": 0.7328706319102575, + "grad_norm": 0.6715837717056274, + "learning_rate": 0.00011523790995855892, + "loss": 2.5469, + "step": 9081 + }, + { + "epoch": 0.7329513356468405, + "grad_norm": 0.7143638730049133, + "learning_rate": 0.00011522230722850325, + "loss": 2.5164, + "step": 9082 + }, + { + "epoch": 0.7330320393834234, + "grad_norm": 0.6809647083282471, + "learning_rate": 0.00011520670411906787, + "loss": 2.6071, + "step": 9083 + }, + { + "epoch": 0.7331127431200064, + "grad_norm": 0.7160956859588623, + "learning_rate": 0.00011519110063064167, + "loss": 2.5346, + "step": 9084 + }, + { + "epoch": 0.7331934468565895, + "grad_norm": 0.6814724802970886, + "learning_rate": 0.00011517549676361357, + "loss": 2.5499, + "step": 9085 + }, + { + "epoch": 0.7332741505931725, + "grad_norm": 0.6914821267127991, + "learning_rate": 0.00011515989251837239, + "loss": 2.5386, + "step": 9086 + }, + { + "epoch": 0.7333548543297554, + "grad_norm": 0.7292554378509521, + "learning_rate": 0.00011514428789530705, + "loss": 2.5642, + "step": 9087 + }, + { + "epoch": 0.7334355580663384, + "grad_norm": 0.6894826292991638, + "learning_rate": 0.00011512868289480647, + "loss": 2.6131, + "step": 9088 + }, + { + "epoch": 0.7335162618029215, + "grad_norm": 0.658770740032196, + "learning_rate": 0.00011511307751725957, + "loss": 2.5594, + "step": 9089 + }, + { + "epoch": 0.7335969655395045, + "grad_norm": 0.7508681416511536, + "learning_rate": 0.0001150974717630553, + "loss": 2.595, + "step": 9090 + }, + { + "epoch": 0.7336776692760875, + "grad_norm": 0.69661545753479, + "learning_rate": 0.00011508186563258256, + "loss": 2.5803, + "step": 9091 + }, + { + "epoch": 0.7337583730126704, + "grad_norm": 0.7277412414550781, + "learning_rate": 0.00011506625912623028, + "loss": 2.5456, + "step": 9092 + }, + { + "epoch": 0.7338390767492535, + "grad_norm": 0.658329963684082, + "learning_rate": 0.00011505065224438745, + "loss": 2.5177, + "step": 9093 + }, + { + "epoch": 0.7339197804858365, + "grad_norm": 0.7277211546897888, + "learning_rate": 0.00011503504498744302, + "loss": 2.553, + "step": 9094 + }, + { + "epoch": 0.7340004842224195, + "grad_norm": 0.7240201830863953, + "learning_rate": 0.00011501943735578598, + "loss": 2.5851, + "step": 9095 + }, + { + "epoch": 0.7340811879590025, + "grad_norm": 0.6565662026405334, + "learning_rate": 0.00011500382934980529, + "loss": 2.5865, + "step": 9096 + }, + { + "epoch": 0.7341618916955855, + "grad_norm": 0.658268392086029, + "learning_rate": 0.00011498822096988995, + "loss": 2.5402, + "step": 9097 + }, + { + "epoch": 0.7342425954321685, + "grad_norm": 0.7305087447166443, + "learning_rate": 0.00011497261221642894, + "loss": 2.5483, + "step": 9098 + }, + { + "epoch": 0.7343232991687515, + "grad_norm": 0.7271504402160645, + "learning_rate": 0.00011495700308981134, + "loss": 2.5303, + "step": 9099 + }, + { + "epoch": 0.7344040029053345, + "grad_norm": 0.70429527759552, + "learning_rate": 0.0001149413935904261, + "loss": 2.5878, + "step": 9100 + }, + { + "epoch": 0.7344847066419176, + "grad_norm": 0.7168769836425781, + "learning_rate": 0.00011492578371866229, + "loss": 2.6017, + "step": 9101 + }, + { + "epoch": 0.7345654103785005, + "grad_norm": 0.7131996154785156, + "learning_rate": 0.00011491017347490891, + "loss": 2.5439, + "step": 9102 + }, + { + "epoch": 0.7346461141150835, + "grad_norm": 0.660321056842804, + "learning_rate": 0.00011489456285955504, + "loss": 2.5236, + "step": 9103 + }, + { + "epoch": 0.7347268178516665, + "grad_norm": 0.6742995977401733, + "learning_rate": 0.00011487895187298977, + "loss": 2.5375, + "step": 9104 + }, + { + "epoch": 0.7348075215882496, + "grad_norm": 0.6380610466003418, + "learning_rate": 0.00011486334051560206, + "loss": 2.5173, + "step": 9105 + }, + { + "epoch": 0.7348882253248326, + "grad_norm": 0.6948198080062866, + "learning_rate": 0.0001148477287877811, + "loss": 2.5247, + "step": 9106 + }, + { + "epoch": 0.7349689290614155, + "grad_norm": 0.7088696360588074, + "learning_rate": 0.00011483211668991591, + "loss": 2.587, + "step": 9107 + }, + { + "epoch": 0.7350496327979985, + "grad_norm": 0.6278921961784363, + "learning_rate": 0.00011481650422239556, + "loss": 2.5652, + "step": 9108 + }, + { + "epoch": 0.7351303365345816, + "grad_norm": 0.6901956796646118, + "learning_rate": 0.00011480089138560926, + "loss": 2.5964, + "step": 9109 + }, + { + "epoch": 0.7352110402711646, + "grad_norm": 0.7264819145202637, + "learning_rate": 0.00011478527817994604, + "loss": 2.5437, + "step": 9110 + }, + { + "epoch": 0.7352917440077475, + "grad_norm": 0.6940708756446838, + "learning_rate": 0.00011476966460579501, + "loss": 2.5761, + "step": 9111 + }, + { + "epoch": 0.7353724477443305, + "grad_norm": 0.689588189125061, + "learning_rate": 0.00011475405066354536, + "loss": 2.5457, + "step": 9112 + }, + { + "epoch": 0.7354531514809136, + "grad_norm": 0.6938436031341553, + "learning_rate": 0.00011473843635358618, + "loss": 2.6026, + "step": 9113 + }, + { + "epoch": 0.7355338552174966, + "grad_norm": 0.7122177481651306, + "learning_rate": 0.00011472282167630663, + "loss": 2.5701, + "step": 9114 + }, + { + "epoch": 0.7356145589540796, + "grad_norm": 0.6667213439941406, + "learning_rate": 0.00011470720663209591, + "loss": 2.5944, + "step": 9115 + }, + { + "epoch": 0.7356952626906625, + "grad_norm": 0.705910861492157, + "learning_rate": 0.00011469159122134314, + "loss": 2.6183, + "step": 9116 + }, + { + "epoch": 0.7357759664272456, + "grad_norm": 0.709937572479248, + "learning_rate": 0.00011467597544443751, + "loss": 2.5153, + "step": 9117 + }, + { + "epoch": 0.7358566701638286, + "grad_norm": 0.6870958805084229, + "learning_rate": 0.00011466035930176822, + "loss": 2.5334, + "step": 9118 + }, + { + "epoch": 0.7359373739004116, + "grad_norm": 0.7274392247200012, + "learning_rate": 0.00011464474279372443, + "loss": 2.5336, + "step": 9119 + }, + { + "epoch": 0.7360180776369946, + "grad_norm": 0.6360952258110046, + "learning_rate": 0.0001146291259206954, + "loss": 2.5604, + "step": 9120 + }, + { + "epoch": 0.7360987813735776, + "grad_norm": 0.7990559935569763, + "learning_rate": 0.00011461350868307028, + "loss": 2.624, + "step": 9121 + }, + { + "epoch": 0.7361794851101606, + "grad_norm": 0.6670079827308655, + "learning_rate": 0.00011459789108123835, + "loss": 2.5761, + "step": 9122 + }, + { + "epoch": 0.7362601888467436, + "grad_norm": 0.6994437575340271, + "learning_rate": 0.00011458227311558877, + "loss": 2.5679, + "step": 9123 + }, + { + "epoch": 0.7363408925833266, + "grad_norm": 0.7428358197212219, + "learning_rate": 0.00011456665478651087, + "loss": 2.5874, + "step": 9124 + }, + { + "epoch": 0.7364215963199097, + "grad_norm": 0.7079486846923828, + "learning_rate": 0.00011455103609439387, + "loss": 2.5999, + "step": 9125 + }, + { + "epoch": 0.7365023000564926, + "grad_norm": 0.646244466304779, + "learning_rate": 0.00011453541703962695, + "loss": 2.5053, + "step": 9126 + }, + { + "epoch": 0.7365830037930756, + "grad_norm": 0.6671318411827087, + "learning_rate": 0.0001145197976225995, + "loss": 2.5277, + "step": 9127 + }, + { + "epoch": 0.7366637075296586, + "grad_norm": 0.7060399055480957, + "learning_rate": 0.00011450417784370072, + "loss": 2.6092, + "step": 9128 + }, + { + "epoch": 0.7367444112662416, + "grad_norm": 0.741547703742981, + "learning_rate": 0.00011448855770331989, + "loss": 2.6121, + "step": 9129 + }, + { + "epoch": 0.7368251150028247, + "grad_norm": 0.710267961025238, + "learning_rate": 0.00011447293720184636, + "loss": 2.5141, + "step": 9130 + }, + { + "epoch": 0.7369058187394076, + "grad_norm": 0.6914308071136475, + "learning_rate": 0.0001144573163396694, + "loss": 2.5489, + "step": 9131 + }, + { + "epoch": 0.7369865224759906, + "grad_norm": 0.7051414847373962, + "learning_rate": 0.0001144416951171783, + "loss": 2.5925, + "step": 9132 + }, + { + "epoch": 0.7370672262125736, + "grad_norm": 0.6765387058258057, + "learning_rate": 0.00011442607353476245, + "loss": 2.5864, + "step": 9133 + }, + { + "epoch": 0.7371479299491567, + "grad_norm": 0.706672191619873, + "learning_rate": 0.00011441045159281108, + "loss": 2.4823, + "step": 9134 + }, + { + "epoch": 0.7372286336857397, + "grad_norm": 0.7534066438674927, + "learning_rate": 0.00011439482929171362, + "loss": 2.5728, + "step": 9135 + }, + { + "epoch": 0.7373093374223226, + "grad_norm": 0.6628777384757996, + "learning_rate": 0.00011437920663185939, + "loss": 2.5538, + "step": 9136 + }, + { + "epoch": 0.7373900411589056, + "grad_norm": 0.6575733423233032, + "learning_rate": 0.00011436358361363773, + "loss": 2.4802, + "step": 9137 + }, + { + "epoch": 0.7374707448954887, + "grad_norm": 0.7629329562187195, + "learning_rate": 0.00011434796023743803, + "loss": 2.6169, + "step": 9138 + }, + { + "epoch": 0.7375514486320717, + "grad_norm": 0.7148225903511047, + "learning_rate": 0.00011433233650364965, + "loss": 2.6335, + "step": 9139 + }, + { + "epoch": 0.7376321523686546, + "grad_norm": 0.705210268497467, + "learning_rate": 0.00011431671241266198, + "loss": 2.6261, + "step": 9140 + }, + { + "epoch": 0.7377128561052376, + "grad_norm": 0.7137441635131836, + "learning_rate": 0.00011430108796486441, + "loss": 2.5021, + "step": 9141 + }, + { + "epoch": 0.7377935598418207, + "grad_norm": 0.6979854702949524, + "learning_rate": 0.00011428546316064635, + "loss": 2.5436, + "step": 9142 + }, + { + "epoch": 0.7378742635784037, + "grad_norm": 0.6568784713745117, + "learning_rate": 0.00011426983800039721, + "loss": 2.5882, + "step": 9143 + }, + { + "epoch": 0.7379549673149867, + "grad_norm": 0.666606605052948, + "learning_rate": 0.00011425421248450638, + "loss": 2.5472, + "step": 9144 + }, + { + "epoch": 0.7380356710515696, + "grad_norm": 0.7240840792655945, + "learning_rate": 0.00011423858661336333, + "loss": 2.6057, + "step": 9145 + }, + { + "epoch": 0.7381163747881527, + "grad_norm": 0.7342149615287781, + "learning_rate": 0.0001142229603873575, + "loss": 2.508, + "step": 9146 + }, + { + "epoch": 0.7381970785247357, + "grad_norm": 0.7089941501617432, + "learning_rate": 0.0001142073338068783, + "loss": 2.6115, + "step": 9147 + }, + { + "epoch": 0.7382777822613187, + "grad_norm": 0.6883555054664612, + "learning_rate": 0.00011419170687231519, + "loss": 2.5254, + "step": 9148 + }, + { + "epoch": 0.7383584859979017, + "grad_norm": 0.6819528937339783, + "learning_rate": 0.00011417607958405765, + "loss": 2.5498, + "step": 9149 + }, + { + "epoch": 0.7384391897344847, + "grad_norm": 0.7348979711532593, + "learning_rate": 0.00011416045194249516, + "loss": 2.5547, + "step": 9150 + }, + { + "epoch": 0.7385198934710677, + "grad_norm": 0.6733320355415344, + "learning_rate": 0.00011414482394801719, + "loss": 2.5985, + "step": 9151 + }, + { + "epoch": 0.7386005972076507, + "grad_norm": 0.714771032333374, + "learning_rate": 0.00011412919560101327, + "loss": 2.571, + "step": 9152 + }, + { + "epoch": 0.7386813009442337, + "grad_norm": 0.7010024189949036, + "learning_rate": 0.0001141135669018728, + "loss": 2.5755, + "step": 9153 + }, + { + "epoch": 0.7387620046808168, + "grad_norm": 0.7014826536178589, + "learning_rate": 0.00011409793785098536, + "loss": 2.6033, + "step": 9154 + }, + { + "epoch": 0.7388427084173997, + "grad_norm": 0.7286051511764526, + "learning_rate": 0.0001140823084487405, + "loss": 2.515, + "step": 9155 + }, + { + "epoch": 0.7389234121539827, + "grad_norm": 0.669365406036377, + "learning_rate": 0.00011406667869552768, + "loss": 2.506, + "step": 9156 + }, + { + "epoch": 0.7390041158905657, + "grad_norm": 0.6886852979660034, + "learning_rate": 0.00011405104859173645, + "loss": 2.6123, + "step": 9157 + }, + { + "epoch": 0.7390848196271488, + "grad_norm": 0.6344162225723267, + "learning_rate": 0.00011403541813775635, + "loss": 2.5483, + "step": 9158 + }, + { + "epoch": 0.7391655233637318, + "grad_norm": 0.7043579816818237, + "learning_rate": 0.00011401978733397694, + "loss": 2.5545, + "step": 9159 + }, + { + "epoch": 0.7392462271003147, + "grad_norm": 0.7960262298583984, + "learning_rate": 0.00011400415618078781, + "loss": 2.5666, + "step": 9160 + }, + { + "epoch": 0.7393269308368977, + "grad_norm": 0.6771546006202698, + "learning_rate": 0.00011398852467857848, + "loss": 2.6016, + "step": 9161 + }, + { + "epoch": 0.7394076345734808, + "grad_norm": 0.6522069573402405, + "learning_rate": 0.00011397289282773855, + "loss": 2.5493, + "step": 9162 + }, + { + "epoch": 0.7394883383100638, + "grad_norm": 0.6804657578468323, + "learning_rate": 0.00011395726062865762, + "loss": 2.5856, + "step": 9163 + }, + { + "epoch": 0.7395690420466468, + "grad_norm": 0.7562841176986694, + "learning_rate": 0.00011394162808172526, + "loss": 2.557, + "step": 9164 + }, + { + "epoch": 0.7396497457832297, + "grad_norm": 0.6464113593101501, + "learning_rate": 0.00011392599518733107, + "loss": 2.5292, + "step": 9165 + }, + { + "epoch": 0.7397304495198128, + "grad_norm": 0.7469549775123596, + "learning_rate": 0.00011391036194586466, + "loss": 2.6168, + "step": 9166 + }, + { + "epoch": 0.7398111532563958, + "grad_norm": 0.7095946669578552, + "learning_rate": 0.00011389472835771572, + "loss": 2.5468, + "step": 9167 + }, + { + "epoch": 0.7398918569929788, + "grad_norm": 0.7376375794410706, + "learning_rate": 0.00011387909442327382, + "loss": 2.5576, + "step": 9168 + }, + { + "epoch": 0.7399725607295617, + "grad_norm": 0.736727774143219, + "learning_rate": 0.00011386346014292859, + "loss": 2.6034, + "step": 9169 + }, + { + "epoch": 0.7400532644661448, + "grad_norm": 0.7026904821395874, + "learning_rate": 0.00011384782551706967, + "loss": 2.5848, + "step": 9170 + }, + { + "epoch": 0.7401339682027278, + "grad_norm": 0.6894888877868652, + "learning_rate": 0.00011383219054608678, + "loss": 2.5475, + "step": 9171 + }, + { + "epoch": 0.7402146719393108, + "grad_norm": 0.6754137277603149, + "learning_rate": 0.00011381655523036954, + "loss": 2.5124, + "step": 9172 + }, + { + "epoch": 0.7402953756758938, + "grad_norm": 0.7935643196105957, + "learning_rate": 0.00011380091957030762, + "loss": 2.5898, + "step": 9173 + }, + { + "epoch": 0.7403760794124769, + "grad_norm": 0.7017118334770203, + "learning_rate": 0.0001137852835662907, + "loss": 2.6139, + "step": 9174 + }, + { + "epoch": 0.7404567831490598, + "grad_norm": 0.7246189117431641, + "learning_rate": 0.00011376964721870847, + "loss": 2.4627, + "step": 9175 + }, + { + "epoch": 0.7405374868856428, + "grad_norm": 0.6835598349571228, + "learning_rate": 0.00011375401052795064, + "loss": 2.5707, + "step": 9176 + }, + { + "epoch": 0.7406181906222258, + "grad_norm": 0.6439787745475769, + "learning_rate": 0.00011373837349440693, + "loss": 2.5161, + "step": 9177 + }, + { + "epoch": 0.7406988943588089, + "grad_norm": 0.7249091267585754, + "learning_rate": 0.00011372273611846704, + "loss": 2.5054, + "step": 9178 + }, + { + "epoch": 0.7407795980953918, + "grad_norm": 0.7653267979621887, + "learning_rate": 0.0001137070984005207, + "loss": 2.6016, + "step": 9179 + }, + { + "epoch": 0.7408603018319748, + "grad_norm": 0.7195165157318115, + "learning_rate": 0.0001136914603409576, + "loss": 2.5931, + "step": 9180 + }, + { + "epoch": 0.7409410055685578, + "grad_norm": 0.7093746662139893, + "learning_rate": 0.00011367582194016756, + "loss": 2.5567, + "step": 9181 + }, + { + "epoch": 0.7410217093051408, + "grad_norm": 0.6868107318878174, + "learning_rate": 0.00011366018319854026, + "loss": 2.5769, + "step": 9182 + }, + { + "epoch": 0.7411024130417239, + "grad_norm": 0.6870261430740356, + "learning_rate": 0.00011364454411646552, + "loss": 2.5418, + "step": 9183 + }, + { + "epoch": 0.7411831167783068, + "grad_norm": 0.7034662365913391, + "learning_rate": 0.00011362890469433306, + "loss": 2.5798, + "step": 9184 + }, + { + "epoch": 0.7412638205148898, + "grad_norm": 0.7200794816017151, + "learning_rate": 0.00011361326493253264, + "loss": 2.5523, + "step": 9185 + }, + { + "epoch": 0.7413445242514728, + "grad_norm": 0.7034540772438049, + "learning_rate": 0.0001135976248314541, + "loss": 2.5107, + "step": 9186 + }, + { + "epoch": 0.7414252279880559, + "grad_norm": 0.7155053019523621, + "learning_rate": 0.00011358198439148721, + "loss": 2.5804, + "step": 9187 + }, + { + "epoch": 0.7415059317246389, + "grad_norm": 0.6965398788452148, + "learning_rate": 0.00011356634361302175, + "loss": 2.5532, + "step": 9188 + }, + { + "epoch": 0.7415866354612218, + "grad_norm": 0.65416419506073, + "learning_rate": 0.00011355070249644755, + "loss": 2.5411, + "step": 9189 + }, + { + "epoch": 0.7416673391978048, + "grad_norm": 0.6798486709594727, + "learning_rate": 0.0001135350610421544, + "loss": 2.4957, + "step": 9190 + }, + { + "epoch": 0.7417480429343879, + "grad_norm": 0.6839874386787415, + "learning_rate": 0.00011351941925053218, + "loss": 2.5745, + "step": 9191 + }, + { + "epoch": 0.7418287466709709, + "grad_norm": 0.7374398708343506, + "learning_rate": 0.00011350377712197068, + "loss": 2.4923, + "step": 9192 + }, + { + "epoch": 0.7419094504075538, + "grad_norm": 0.7517396807670593, + "learning_rate": 0.00011348813465685974, + "loss": 2.538, + "step": 9193 + }, + { + "epoch": 0.7419901541441368, + "grad_norm": 0.6670863628387451, + "learning_rate": 0.00011347249185558926, + "loss": 2.5442, + "step": 9194 + }, + { + "epoch": 0.7420708578807199, + "grad_norm": 0.6508080363273621, + "learning_rate": 0.00011345684871854905, + "loss": 2.6665, + "step": 9195 + }, + { + "epoch": 0.7421515616173029, + "grad_norm": 0.6935258507728577, + "learning_rate": 0.00011344120524612898, + "loss": 2.5388, + "step": 9196 + }, + { + "epoch": 0.7422322653538859, + "grad_norm": 0.696067750453949, + "learning_rate": 0.00011342556143871897, + "loss": 2.574, + "step": 9197 + }, + { + "epoch": 0.7423129690904688, + "grad_norm": 0.7486966252326965, + "learning_rate": 0.00011340991729670882, + "loss": 2.5924, + "step": 9198 + }, + { + "epoch": 0.7423936728270519, + "grad_norm": 0.676407516002655, + "learning_rate": 0.00011339427282048854, + "loss": 2.5907, + "step": 9199 + }, + { + "epoch": 0.7424743765636349, + "grad_norm": 0.7241318225860596, + "learning_rate": 0.00011337862801044792, + "loss": 2.5685, + "step": 9200 + }, + { + "epoch": 0.7425550803002179, + "grad_norm": 0.7012883424758911, + "learning_rate": 0.00011336298286697692, + "loss": 2.56, + "step": 9201 + }, + { + "epoch": 0.7426357840368009, + "grad_norm": 0.7313060164451599, + "learning_rate": 0.0001133473373904655, + "loss": 2.632, + "step": 9202 + }, + { + "epoch": 0.742716487773384, + "grad_norm": 0.6829206943511963, + "learning_rate": 0.00011333169158130353, + "loss": 2.5006, + "step": 9203 + }, + { + "epoch": 0.7427971915099669, + "grad_norm": 0.7324578166007996, + "learning_rate": 0.00011331604543988093, + "loss": 2.5004, + "step": 9204 + }, + { + "epoch": 0.7428778952465499, + "grad_norm": 0.6761097311973572, + "learning_rate": 0.00011330039896658766, + "loss": 2.5516, + "step": 9205 + }, + { + "epoch": 0.7429585989831329, + "grad_norm": 0.6909754276275635, + "learning_rate": 0.00011328475216181369, + "loss": 2.5273, + "step": 9206 + }, + { + "epoch": 0.743039302719716, + "grad_norm": 0.6420674324035645, + "learning_rate": 0.00011326910502594899, + "loss": 2.5507, + "step": 9207 + }, + { + "epoch": 0.7431200064562989, + "grad_norm": 0.6442455053329468, + "learning_rate": 0.0001132534575593835, + "loss": 2.542, + "step": 9208 + }, + { + "epoch": 0.7432007101928819, + "grad_norm": 0.7053101658821106, + "learning_rate": 0.0001132378097625072, + "loss": 2.5116, + "step": 9209 + }, + { + "epoch": 0.7432814139294649, + "grad_norm": 0.7570765614509583, + "learning_rate": 0.00011322216163571007, + "loss": 2.5576, + "step": 9210 + }, + { + "epoch": 0.743362117666048, + "grad_norm": 0.6937675476074219, + "learning_rate": 0.00011320651317938214, + "loss": 2.6212, + "step": 9211 + }, + { + "epoch": 0.743442821402631, + "grad_norm": 0.6741313934326172, + "learning_rate": 0.00011319086439391333, + "loss": 2.5723, + "step": 9212 + }, + { + "epoch": 0.7435235251392139, + "grad_norm": 0.711358904838562, + "learning_rate": 0.00011317521527969374, + "loss": 2.5713, + "step": 9213 + }, + { + "epoch": 0.7436042288757969, + "grad_norm": 0.7443268895149231, + "learning_rate": 0.00011315956583711331, + "loss": 2.5301, + "step": 9214 + }, + { + "epoch": 0.74368493261238, + "grad_norm": 0.7001742720603943, + "learning_rate": 0.00011314391606656212, + "loss": 2.5545, + "step": 9215 + }, + { + "epoch": 0.743765636348963, + "grad_norm": 0.7294990420341492, + "learning_rate": 0.00011312826596843019, + "loss": 2.5897, + "step": 9216 + }, + { + "epoch": 0.743846340085546, + "grad_norm": 0.706924319267273, + "learning_rate": 0.00011311261554310753, + "loss": 2.6477, + "step": 9217 + }, + { + "epoch": 0.7439270438221289, + "grad_norm": 0.7065039277076721, + "learning_rate": 0.00011309696479098423, + "loss": 2.5326, + "step": 9218 + }, + { + "epoch": 0.744007747558712, + "grad_norm": 0.6502599716186523, + "learning_rate": 0.00011308131371245037, + "loss": 2.5833, + "step": 9219 + }, + { + "epoch": 0.744088451295295, + "grad_norm": 0.7135158181190491, + "learning_rate": 0.00011306566230789592, + "loss": 2.5686, + "step": 9220 + }, + { + "epoch": 0.744169155031878, + "grad_norm": 0.7239195108413696, + "learning_rate": 0.00011305001057771101, + "loss": 2.6303, + "step": 9221 + }, + { + "epoch": 0.744249858768461, + "grad_norm": 0.6442604660987854, + "learning_rate": 0.00011303435852228574, + "loss": 2.5495, + "step": 9222 + }, + { + "epoch": 0.744330562505044, + "grad_norm": 0.6700316071510315, + "learning_rate": 0.0001130187061420102, + "loss": 2.5575, + "step": 9223 + }, + { + "epoch": 0.744411266241627, + "grad_norm": 0.7532816529273987, + "learning_rate": 0.00011300305343727446, + "loss": 2.5174, + "step": 9224 + }, + { + "epoch": 0.74449196997821, + "grad_norm": 0.7614738941192627, + "learning_rate": 0.00011298740040846862, + "loss": 2.5995, + "step": 9225 + }, + { + "epoch": 0.744572673714793, + "grad_norm": 0.6781208515167236, + "learning_rate": 0.00011297174705598283, + "loss": 2.5225, + "step": 9226 + }, + { + "epoch": 0.744653377451376, + "grad_norm": 0.680525541305542, + "learning_rate": 0.0001129560933802072, + "loss": 2.5844, + "step": 9227 + }, + { + "epoch": 0.744734081187959, + "grad_norm": 0.7196657657623291, + "learning_rate": 0.00011294043938153185, + "loss": 2.564, + "step": 9228 + }, + { + "epoch": 0.744814784924542, + "grad_norm": 0.6997412443161011, + "learning_rate": 0.00011292478506034694, + "loss": 2.6486, + "step": 9229 + }, + { + "epoch": 0.744895488661125, + "grad_norm": 0.7438939809799194, + "learning_rate": 0.00011290913041704256, + "loss": 2.5667, + "step": 9230 + }, + { + "epoch": 0.744976192397708, + "grad_norm": 0.7391374707221985, + "learning_rate": 0.00011289347545200892, + "loss": 2.5974, + "step": 9231 + }, + { + "epoch": 0.745056896134291, + "grad_norm": 0.7845481634140015, + "learning_rate": 0.0001128778201656362, + "loss": 2.5168, + "step": 9232 + }, + { + "epoch": 0.745137599870874, + "grad_norm": 0.728712797164917, + "learning_rate": 0.00011286216455831449, + "loss": 2.5241, + "step": 9233 + }, + { + "epoch": 0.745218303607457, + "grad_norm": 0.7310191988945007, + "learning_rate": 0.00011284650863043407, + "loss": 2.5777, + "step": 9234 + }, + { + "epoch": 0.74529900734404, + "grad_norm": 0.6661474704742432, + "learning_rate": 0.00011283085238238503, + "loss": 2.5471, + "step": 9235 + }, + { + "epoch": 0.7453797110806231, + "grad_norm": 0.7697983384132385, + "learning_rate": 0.00011281519581455761, + "loss": 2.587, + "step": 9236 + }, + { + "epoch": 0.745460414817206, + "grad_norm": 0.7336567640304565, + "learning_rate": 0.00011279953892734203, + "loss": 2.5756, + "step": 9237 + }, + { + "epoch": 0.745541118553789, + "grad_norm": 0.6192059516906738, + "learning_rate": 0.00011278388172112848, + "loss": 2.5038, + "step": 9238 + }, + { + "epoch": 0.745621822290372, + "grad_norm": 0.7180300354957581, + "learning_rate": 0.00011276822419630719, + "loss": 2.5469, + "step": 9239 + }, + { + "epoch": 0.7457025260269551, + "grad_norm": 0.7583367824554443, + "learning_rate": 0.00011275256635326837, + "loss": 2.6274, + "step": 9240 + }, + { + "epoch": 0.7457832297635381, + "grad_norm": 0.6848096251487732, + "learning_rate": 0.00011273690819240221, + "loss": 2.5117, + "step": 9241 + }, + { + "epoch": 0.745863933500121, + "grad_norm": 0.6830503344535828, + "learning_rate": 0.00011272124971409907, + "loss": 2.5114, + "step": 9242 + }, + { + "epoch": 0.745944637236704, + "grad_norm": 0.780240535736084, + "learning_rate": 0.0001127055909187491, + "loss": 2.6432, + "step": 9243 + }, + { + "epoch": 0.7460253409732871, + "grad_norm": 0.7421274185180664, + "learning_rate": 0.00011268993180674261, + "loss": 2.5723, + "step": 9244 + }, + { + "epoch": 0.7461060447098701, + "grad_norm": 0.6695685386657715, + "learning_rate": 0.00011267427237846986, + "loss": 2.5335, + "step": 9245 + }, + { + "epoch": 0.746186748446453, + "grad_norm": 0.8390316963195801, + "learning_rate": 0.00011265861263432104, + "loss": 2.5125, + "step": 9246 + }, + { + "epoch": 0.746267452183036, + "grad_norm": 0.7030535936355591, + "learning_rate": 0.00011264295257468658, + "loss": 2.5986, + "step": 9247 + }, + { + "epoch": 0.7463481559196191, + "grad_norm": 0.6754253506660461, + "learning_rate": 0.00011262729219995669, + "loss": 2.5067, + "step": 9248 + }, + { + "epoch": 0.7464288596562021, + "grad_norm": 0.6809592843055725, + "learning_rate": 0.00011261163151052163, + "loss": 2.5359, + "step": 9249 + }, + { + "epoch": 0.7465095633927851, + "grad_norm": 0.6546878218650818, + "learning_rate": 0.00011259597050677178, + "loss": 2.5357, + "step": 9250 + }, + { + "epoch": 0.746590267129368, + "grad_norm": 0.6514731645584106, + "learning_rate": 0.00011258030918909739, + "loss": 2.5591, + "step": 9251 + }, + { + "epoch": 0.7466709708659511, + "grad_norm": 0.6981258392333984, + "learning_rate": 0.0001125646475578888, + "loss": 2.6171, + "step": 9252 + }, + { + "epoch": 0.7467516746025341, + "grad_norm": 0.6763784885406494, + "learning_rate": 0.00011254898561353639, + "loss": 2.5455, + "step": 9253 + }, + { + "epoch": 0.7468323783391171, + "grad_norm": 0.6241726279258728, + "learning_rate": 0.00011253332335643043, + "loss": 2.6073, + "step": 9254 + }, + { + "epoch": 0.7469130820757001, + "grad_norm": 0.6810312271118164, + "learning_rate": 0.00011251766078696132, + "loss": 2.5285, + "step": 9255 + }, + { + "epoch": 0.7469937858122832, + "grad_norm": 0.6603971123695374, + "learning_rate": 0.00011250199790551934, + "loss": 2.5985, + "step": 9256 + }, + { + "epoch": 0.7470744895488661, + "grad_norm": 0.69618159532547, + "learning_rate": 0.0001124863347124949, + "loss": 2.5728, + "step": 9257 + }, + { + "epoch": 0.7471551932854491, + "grad_norm": 0.6878889203071594, + "learning_rate": 0.00011247067120827837, + "loss": 2.5459, + "step": 9258 + }, + { + "epoch": 0.7472358970220321, + "grad_norm": 0.6613149046897888, + "learning_rate": 0.00011245500739326011, + "loss": 2.6559, + "step": 9259 + }, + { + "epoch": 0.7473166007586152, + "grad_norm": 0.6397448778152466, + "learning_rate": 0.00011243934326783053, + "loss": 2.5712, + "step": 9260 + }, + { + "epoch": 0.7473973044951981, + "grad_norm": 0.6804259419441223, + "learning_rate": 0.00011242367883237996, + "loss": 2.6143, + "step": 9261 + }, + { + "epoch": 0.7474780082317811, + "grad_norm": 0.8029066324234009, + "learning_rate": 0.00011240801408729884, + "loss": 2.5702, + "step": 9262 + }, + { + "epoch": 0.7475587119683641, + "grad_norm": 0.7086285948753357, + "learning_rate": 0.00011239234903297761, + "loss": 2.6113, + "step": 9263 + }, + { + "epoch": 0.7476394157049472, + "grad_norm": 0.6980452537536621, + "learning_rate": 0.00011237668366980665, + "loss": 2.6355, + "step": 9264 + }, + { + "epoch": 0.7477201194415302, + "grad_norm": 0.6906906962394714, + "learning_rate": 0.00011236101799817636, + "loss": 2.5605, + "step": 9265 + }, + { + "epoch": 0.7478008231781131, + "grad_norm": 0.7412894368171692, + "learning_rate": 0.00011234535201847716, + "loss": 2.6073, + "step": 9266 + }, + { + "epoch": 0.7478815269146961, + "grad_norm": 0.6949330568313599, + "learning_rate": 0.00011232968573109955, + "loss": 2.5623, + "step": 9267 + }, + { + "epoch": 0.7479622306512792, + "grad_norm": 0.6916515827178955, + "learning_rate": 0.00011231401913643393, + "loss": 2.5348, + "step": 9268 + }, + { + "epoch": 0.7480429343878622, + "grad_norm": 0.7576180696487427, + "learning_rate": 0.0001122983522348708, + "loss": 2.5968, + "step": 9269 + }, + { + "epoch": 0.7481236381244452, + "grad_norm": 0.6734197735786438, + "learning_rate": 0.00011228268502680052, + "loss": 2.5185, + "step": 9270 + }, + { + "epoch": 0.7482043418610281, + "grad_norm": 0.6952544450759888, + "learning_rate": 0.00011226701751261367, + "loss": 2.57, + "step": 9271 + }, + { + "epoch": 0.7482850455976112, + "grad_norm": 0.6504654884338379, + "learning_rate": 0.00011225134969270068, + "loss": 2.5677, + "step": 9272 + }, + { + "epoch": 0.7483657493341942, + "grad_norm": 0.6843643188476562, + "learning_rate": 0.00011223568156745198, + "loss": 2.5686, + "step": 9273 + }, + { + "epoch": 0.7484464530707772, + "grad_norm": 0.6786371469497681, + "learning_rate": 0.00011222001313725816, + "loss": 2.5024, + "step": 9274 + }, + { + "epoch": 0.7485271568073602, + "grad_norm": 0.6431117057800293, + "learning_rate": 0.00011220434440250967, + "loss": 2.5206, + "step": 9275 + }, + { + "epoch": 0.7486078605439432, + "grad_norm": 0.699547290802002, + "learning_rate": 0.000112188675363597, + "loss": 2.5974, + "step": 9276 + }, + { + "epoch": 0.7486885642805262, + "grad_norm": 0.6870436072349548, + "learning_rate": 0.00011217300602091067, + "loss": 2.5303, + "step": 9277 + }, + { + "epoch": 0.7487692680171092, + "grad_norm": 0.7032173871994019, + "learning_rate": 0.0001121573363748412, + "loss": 2.5045, + "step": 9278 + }, + { + "epoch": 0.7488499717536922, + "grad_norm": 0.6890417337417603, + "learning_rate": 0.00011214166642577917, + "loss": 2.5945, + "step": 9279 + }, + { + "epoch": 0.7489306754902753, + "grad_norm": 0.7257806062698364, + "learning_rate": 0.00011212599617411506, + "loss": 2.6013, + "step": 9280 + }, + { + "epoch": 0.7490113792268582, + "grad_norm": 0.722561240196228, + "learning_rate": 0.0001121103256202394, + "loss": 2.5809, + "step": 9281 + }, + { + "epoch": 0.7490920829634412, + "grad_norm": 0.7360994219779968, + "learning_rate": 0.00011209465476454277, + "loss": 2.5036, + "step": 9282 + }, + { + "epoch": 0.7491727867000242, + "grad_norm": 0.6561676263809204, + "learning_rate": 0.00011207898360741574, + "loss": 2.5302, + "step": 9283 + }, + { + "epoch": 0.7492534904366072, + "grad_norm": 0.7454147338867188, + "learning_rate": 0.00011206331214924887, + "loss": 2.5511, + "step": 9284 + }, + { + "epoch": 0.7493341941731902, + "grad_norm": 0.7085482478141785, + "learning_rate": 0.00011204764039043275, + "loss": 2.5743, + "step": 9285 + }, + { + "epoch": 0.7494148979097732, + "grad_norm": 0.691872775554657, + "learning_rate": 0.0001120319683313579, + "loss": 2.5414, + "step": 9286 + }, + { + "epoch": 0.7494956016463562, + "grad_norm": 0.6661050915718079, + "learning_rate": 0.00011201629597241496, + "loss": 2.5418, + "step": 9287 + }, + { + "epoch": 0.7495763053829392, + "grad_norm": 0.7440990805625916, + "learning_rate": 0.00011200062331399452, + "loss": 2.5543, + "step": 9288 + }, + { + "epoch": 0.7496570091195223, + "grad_norm": 0.6655303835868835, + "learning_rate": 0.00011198495035648715, + "loss": 2.5629, + "step": 9289 + }, + { + "epoch": 0.7497377128561052, + "grad_norm": 0.7550996541976929, + "learning_rate": 0.00011196927710028353, + "loss": 2.5376, + "step": 9290 + }, + { + "epoch": 0.7498184165926882, + "grad_norm": 0.692915678024292, + "learning_rate": 0.00011195360354577422, + "loss": 2.4661, + "step": 9291 + }, + { + "epoch": 0.7498991203292712, + "grad_norm": 0.7572253346443176, + "learning_rate": 0.00011193792969334985, + "loss": 2.5641, + "step": 9292 + }, + { + "epoch": 0.7499798240658543, + "grad_norm": 0.6550531387329102, + "learning_rate": 0.00011192225554340107, + "loss": 2.5591, + "step": 9293 + }, + { + "epoch": 0.7500605278024373, + "grad_norm": 0.677130401134491, + "learning_rate": 0.0001119065810963185, + "loss": 2.5859, + "step": 9294 + }, + { + "epoch": 0.7501412315390202, + "grad_norm": 0.680673360824585, + "learning_rate": 0.00011189090635249287, + "loss": 2.5343, + "step": 9295 + }, + { + "epoch": 0.7502219352756032, + "grad_norm": 0.7574957609176636, + "learning_rate": 0.00011187523131231472, + "loss": 2.5966, + "step": 9296 + }, + { + "epoch": 0.7503026390121863, + "grad_norm": 0.7099971175193787, + "learning_rate": 0.00011185955597617474, + "loss": 2.5547, + "step": 9297 + }, + { + "epoch": 0.7503833427487693, + "grad_norm": 0.7153162956237793, + "learning_rate": 0.00011184388034446367, + "loss": 2.5986, + "step": 9298 + }, + { + "epoch": 0.7504640464853523, + "grad_norm": 0.7154852747917175, + "learning_rate": 0.00011182820441757212, + "loss": 2.5214, + "step": 9299 + }, + { + "epoch": 0.7505447502219352, + "grad_norm": 0.6899208426475525, + "learning_rate": 0.00011181252819589081, + "loss": 2.5026, + "step": 9300 + }, + { + "epoch": 0.7506254539585183, + "grad_norm": 0.6719048023223877, + "learning_rate": 0.00011179685167981041, + "loss": 2.5915, + "step": 9301 + }, + { + "epoch": 0.7507061576951013, + "grad_norm": 0.6664413213729858, + "learning_rate": 0.00011178117486972164, + "loss": 2.5479, + "step": 9302 + }, + { + "epoch": 0.7507868614316843, + "grad_norm": 0.7433286905288696, + "learning_rate": 0.00011176549776601517, + "loss": 2.5941, + "step": 9303 + }, + { + "epoch": 0.7508675651682672, + "grad_norm": 0.7868518233299255, + "learning_rate": 0.00011174982036908177, + "loss": 2.5537, + "step": 9304 + }, + { + "epoch": 0.7509482689048503, + "grad_norm": 0.7037336826324463, + "learning_rate": 0.0001117341426793121, + "loss": 2.568, + "step": 9305 + }, + { + "epoch": 0.7510289726414333, + "grad_norm": 0.6630405783653259, + "learning_rate": 0.00011171846469709697, + "loss": 2.4906, + "step": 9306 + }, + { + "epoch": 0.7511096763780163, + "grad_norm": 0.7398669719696045, + "learning_rate": 0.00011170278642282701, + "loss": 2.574, + "step": 9307 + }, + { + "epoch": 0.7511903801145993, + "grad_norm": 0.7557641267776489, + "learning_rate": 0.00011168710785689304, + "loss": 2.5237, + "step": 9308 + }, + { + "epoch": 0.7512710838511824, + "grad_norm": 0.6883708238601685, + "learning_rate": 0.00011167142899968581, + "loss": 2.5643, + "step": 9309 + }, + { + "epoch": 0.7513517875877653, + "grad_norm": 0.6623669862747192, + "learning_rate": 0.00011165574985159606, + "loss": 2.5319, + "step": 9310 + }, + { + "epoch": 0.7514324913243483, + "grad_norm": 0.6938778758049011, + "learning_rate": 0.00011164007041301454, + "loss": 2.5083, + "step": 9311 + }, + { + "epoch": 0.7515131950609313, + "grad_norm": 0.718534529209137, + "learning_rate": 0.00011162439068433204, + "loss": 2.4791, + "step": 9312 + }, + { + "epoch": 0.7515938987975144, + "grad_norm": 0.672113299369812, + "learning_rate": 0.00011160871066593934, + "loss": 2.5264, + "step": 9313 + }, + { + "epoch": 0.7516746025340973, + "grad_norm": 0.6854343414306641, + "learning_rate": 0.00011159303035822723, + "loss": 2.5734, + "step": 9314 + }, + { + "epoch": 0.7517553062706803, + "grad_norm": 0.6494589447975159, + "learning_rate": 0.0001115773497615865, + "loss": 2.5564, + "step": 9315 + }, + { + "epoch": 0.7518360100072633, + "grad_norm": 0.7219608426094055, + "learning_rate": 0.00011156166887640793, + "loss": 2.6049, + "step": 9316 + }, + { + "epoch": 0.7519167137438464, + "grad_norm": 0.6892502903938293, + "learning_rate": 0.00011154598770308236, + "loss": 2.5333, + "step": 9317 + }, + { + "epoch": 0.7519974174804294, + "grad_norm": 0.6670175790786743, + "learning_rate": 0.0001115303062420006, + "loss": 2.5882, + "step": 9318 + }, + { + "epoch": 0.7520781212170123, + "grad_norm": 0.7367776036262512, + "learning_rate": 0.00011151462449355347, + "loss": 2.5634, + "step": 9319 + }, + { + "epoch": 0.7521588249535953, + "grad_norm": 0.6971952319145203, + "learning_rate": 0.00011149894245813182, + "loss": 2.5323, + "step": 9320 + }, + { + "epoch": 0.7522395286901784, + "grad_norm": 0.6555755734443665, + "learning_rate": 0.00011148326013612642, + "loss": 2.5597, + "step": 9321 + }, + { + "epoch": 0.7523202324267614, + "grad_norm": 0.7004384994506836, + "learning_rate": 0.00011146757752792819, + "loss": 2.4761, + "step": 9322 + }, + { + "epoch": 0.7524009361633444, + "grad_norm": 0.7151978015899658, + "learning_rate": 0.00011145189463392791, + "loss": 2.5825, + "step": 9323 + }, + { + "epoch": 0.7524816398999273, + "grad_norm": 0.7176918387413025, + "learning_rate": 0.00011143621145451653, + "loss": 2.6112, + "step": 9324 + }, + { + "epoch": 0.7525623436365104, + "grad_norm": 0.7156146168708801, + "learning_rate": 0.00011142052799008487, + "loss": 2.5293, + "step": 9325 + }, + { + "epoch": 0.7526430473730934, + "grad_norm": 0.7360113263130188, + "learning_rate": 0.00011140484424102375, + "loss": 2.5703, + "step": 9326 + }, + { + "epoch": 0.7527237511096764, + "grad_norm": 0.65630042552948, + "learning_rate": 0.00011138916020772414, + "loss": 2.5224, + "step": 9327 + }, + { + "epoch": 0.7528044548462594, + "grad_norm": 0.7088161110877991, + "learning_rate": 0.00011137347589057687, + "loss": 2.6673, + "step": 9328 + }, + { + "epoch": 0.7528851585828424, + "grad_norm": 0.7335243821144104, + "learning_rate": 0.00011135779128997283, + "loss": 2.5693, + "step": 9329 + }, + { + "epoch": 0.7529658623194254, + "grad_norm": 0.7166211605072021, + "learning_rate": 0.00011134210640630298, + "loss": 2.5612, + "step": 9330 + }, + { + "epoch": 0.7530465660560084, + "grad_norm": 0.7324960231781006, + "learning_rate": 0.00011132642123995816, + "loss": 2.5682, + "step": 9331 + }, + { + "epoch": 0.7531272697925914, + "grad_norm": 0.7133917808532715, + "learning_rate": 0.00011131073579132936, + "loss": 2.6131, + "step": 9332 + }, + { + "epoch": 0.7532079735291743, + "grad_norm": 0.678741455078125, + "learning_rate": 0.0001112950500608074, + "loss": 2.6109, + "step": 9333 + }, + { + "epoch": 0.7532886772657574, + "grad_norm": 0.7000784277915955, + "learning_rate": 0.0001112793640487833, + "loss": 2.5087, + "step": 9334 + }, + { + "epoch": 0.7533693810023404, + "grad_norm": 0.719976544380188, + "learning_rate": 0.00011126367775564795, + "loss": 2.4665, + "step": 9335 + }, + { + "epoch": 0.7534500847389234, + "grad_norm": 0.7127155065536499, + "learning_rate": 0.00011124799118179232, + "loss": 2.5254, + "step": 9336 + }, + { + "epoch": 0.7535307884755064, + "grad_norm": 0.6306474804878235, + "learning_rate": 0.00011123230432760734, + "loss": 2.5487, + "step": 9337 + }, + { + "epoch": 0.7536114922120895, + "grad_norm": 0.667019784450531, + "learning_rate": 0.00011121661719348397, + "loss": 2.5576, + "step": 9338 + }, + { + "epoch": 0.7536921959486724, + "grad_norm": 0.6869673132896423, + "learning_rate": 0.00011120092977981318, + "loss": 2.544, + "step": 9339 + }, + { + "epoch": 0.7537728996852554, + "grad_norm": 0.6688670516014099, + "learning_rate": 0.00011118524208698596, + "loss": 2.6017, + "step": 9340 + }, + { + "epoch": 0.7538536034218384, + "grad_norm": 0.6717860102653503, + "learning_rate": 0.00011116955411539325, + "loss": 2.5571, + "step": 9341 + }, + { + "epoch": 0.7539343071584215, + "grad_norm": 0.7113999724388123, + "learning_rate": 0.00011115386586542604, + "loss": 2.5684, + "step": 9342 + }, + { + "epoch": 0.7540150108950044, + "grad_norm": 0.6687907576560974, + "learning_rate": 0.00011113817733747536, + "loss": 2.548, + "step": 9343 + }, + { + "epoch": 0.7540957146315874, + "grad_norm": 0.6828920841217041, + "learning_rate": 0.00011112248853193219, + "loss": 2.5544, + "step": 9344 + }, + { + "epoch": 0.7541764183681704, + "grad_norm": 0.6793262362480164, + "learning_rate": 0.00011110679944918749, + "loss": 2.4655, + "step": 9345 + }, + { + "epoch": 0.7542571221047535, + "grad_norm": 0.6812230348587036, + "learning_rate": 0.00011109111008963235, + "loss": 2.5473, + "step": 9346 + }, + { + "epoch": 0.7543378258413365, + "grad_norm": 0.6838300824165344, + "learning_rate": 0.00011107542045365775, + "loss": 2.5248, + "step": 9347 + }, + { + "epoch": 0.7544185295779194, + "grad_norm": 0.7101932764053345, + "learning_rate": 0.0001110597305416547, + "loss": 2.5235, + "step": 9348 + }, + { + "epoch": 0.7544992333145024, + "grad_norm": 0.7136144042015076, + "learning_rate": 0.0001110440403540143, + "loss": 2.5592, + "step": 9349 + }, + { + "epoch": 0.7545799370510855, + "grad_norm": 0.6673154234886169, + "learning_rate": 0.00011102834989112751, + "loss": 2.4962, + "step": 9350 + }, + { + "epoch": 0.7546606407876685, + "grad_norm": 0.6849049925804138, + "learning_rate": 0.00011101265915338544, + "loss": 2.5793, + "step": 9351 + }, + { + "epoch": 0.7547413445242515, + "grad_norm": 0.7239733338356018, + "learning_rate": 0.0001109969681411791, + "loss": 2.5556, + "step": 9352 + }, + { + "epoch": 0.7548220482608344, + "grad_norm": 0.6738215684890747, + "learning_rate": 0.00011098127685489955, + "loss": 2.6181, + "step": 9353 + }, + { + "epoch": 0.7549027519974175, + "grad_norm": 0.6212114095687866, + "learning_rate": 0.00011096558529493787, + "loss": 2.5509, + "step": 9354 + }, + { + "epoch": 0.7549834557340005, + "grad_norm": 0.6801952123641968, + "learning_rate": 0.00011094989346168517, + "loss": 2.6454, + "step": 9355 + }, + { + "epoch": 0.7550641594705835, + "grad_norm": 0.6605944037437439, + "learning_rate": 0.0001109342013555325, + "loss": 2.5218, + "step": 9356 + }, + { + "epoch": 0.7551448632071665, + "grad_norm": 0.6486438512802124, + "learning_rate": 0.00011091850897687096, + "loss": 2.5431, + "step": 9357 + }, + { + "epoch": 0.7552255669437495, + "grad_norm": 0.6701794266700745, + "learning_rate": 0.0001109028163260916, + "loss": 2.563, + "step": 9358 + }, + { + "epoch": 0.7553062706803325, + "grad_norm": 0.6486446261405945, + "learning_rate": 0.00011088712340358555, + "loss": 2.5147, + "step": 9359 + }, + { + "epoch": 0.7553869744169155, + "grad_norm": 0.695197582244873, + "learning_rate": 0.00011087143020974396, + "loss": 2.5707, + "step": 9360 + }, + { + "epoch": 0.7554676781534985, + "grad_norm": 0.6910821199417114, + "learning_rate": 0.00011085573674495791, + "loss": 2.5797, + "step": 9361 + }, + { + "epoch": 0.7555483818900816, + "grad_norm": 0.7084208726882935, + "learning_rate": 0.00011084004300961852, + "loss": 2.5362, + "step": 9362 + }, + { + "epoch": 0.7556290856266645, + "grad_norm": 0.6750916242599487, + "learning_rate": 0.00011082434900411691, + "loss": 2.5554, + "step": 9363 + }, + { + "epoch": 0.7557097893632475, + "grad_norm": 0.6711466908454895, + "learning_rate": 0.0001108086547288442, + "loss": 2.5577, + "step": 9364 + }, + { + "epoch": 0.7557904930998305, + "grad_norm": 0.7267118096351624, + "learning_rate": 0.00011079296018419163, + "loss": 2.5422, + "step": 9365 + }, + { + "epoch": 0.7558711968364136, + "grad_norm": 0.692730188369751, + "learning_rate": 0.00011077726537055021, + "loss": 2.5281, + "step": 9366 + }, + { + "epoch": 0.7559519005729965, + "grad_norm": 0.7071926593780518, + "learning_rate": 0.00011076157028831122, + "loss": 2.5273, + "step": 9367 + }, + { + "epoch": 0.7560326043095795, + "grad_norm": 0.7662521600723267, + "learning_rate": 0.00011074587493786574, + "loss": 2.5433, + "step": 9368 + }, + { + "epoch": 0.7561133080461625, + "grad_norm": 0.7173436880111694, + "learning_rate": 0.00011073017931960496, + "loss": 2.579, + "step": 9369 + }, + { + "epoch": 0.7561940117827456, + "grad_norm": 0.6401154398918152, + "learning_rate": 0.00011071448343392008, + "loss": 2.5189, + "step": 9370 + }, + { + "epoch": 0.7562747155193286, + "grad_norm": 0.6510714292526245, + "learning_rate": 0.00011069878728120224, + "loss": 2.5682, + "step": 9371 + }, + { + "epoch": 0.7563554192559115, + "grad_norm": 0.7189988493919373, + "learning_rate": 0.00011068309086184269, + "loss": 2.5247, + "step": 9372 + }, + { + "epoch": 0.7564361229924945, + "grad_norm": 0.678753137588501, + "learning_rate": 0.00011066739417623258, + "loss": 2.5083, + "step": 9373 + }, + { + "epoch": 0.7565168267290776, + "grad_norm": 0.6903115510940552, + "learning_rate": 0.0001106516972247631, + "loss": 2.5658, + "step": 9374 + }, + { + "epoch": 0.7565975304656606, + "grad_norm": 0.6772382855415344, + "learning_rate": 0.0001106360000078255, + "loss": 2.5445, + "step": 9375 + }, + { + "epoch": 0.7566782342022436, + "grad_norm": 0.6655055284500122, + "learning_rate": 0.00011062030252581097, + "loss": 2.5186, + "step": 9376 + }, + { + "epoch": 0.7567589379388265, + "grad_norm": 0.7173851728439331, + "learning_rate": 0.00011060460477911074, + "loss": 2.5297, + "step": 9377 + }, + { + "epoch": 0.7568396416754096, + "grad_norm": 0.6891282200813293, + "learning_rate": 0.00011058890676811606, + "loss": 2.5706, + "step": 9378 + }, + { + "epoch": 0.7569203454119926, + "grad_norm": 0.7053082585334778, + "learning_rate": 0.0001105732084932181, + "loss": 2.5475, + "step": 9379 + }, + { + "epoch": 0.7570010491485756, + "grad_norm": 0.7503373622894287, + "learning_rate": 0.00011055750995480818, + "loss": 2.6438, + "step": 9380 + }, + { + "epoch": 0.7570817528851586, + "grad_norm": 0.6703453660011292, + "learning_rate": 0.0001105418111532775, + "loss": 2.5485, + "step": 9381 + }, + { + "epoch": 0.7571624566217416, + "grad_norm": 0.6651757955551147, + "learning_rate": 0.00011052611208901733, + "loss": 2.6079, + "step": 9382 + }, + { + "epoch": 0.7572431603583246, + "grad_norm": 0.6738902926445007, + "learning_rate": 0.00011051041276241895, + "loss": 2.5279, + "step": 9383 + }, + { + "epoch": 0.7573238640949076, + "grad_norm": 0.6803816556930542, + "learning_rate": 0.00011049471317387357, + "loss": 2.5972, + "step": 9384 + }, + { + "epoch": 0.7574045678314906, + "grad_norm": 0.7127584218978882, + "learning_rate": 0.00011047901332377253, + "loss": 2.5275, + "step": 9385 + }, + { + "epoch": 0.7574852715680735, + "grad_norm": 0.7655676007270813, + "learning_rate": 0.00011046331321250711, + "loss": 2.6491, + "step": 9386 + }, + { + "epoch": 0.7575659753046566, + "grad_norm": 0.7005762457847595, + "learning_rate": 0.00011044761284046854, + "loss": 2.5266, + "step": 9387 + }, + { + "epoch": 0.7576466790412396, + "grad_norm": 0.701931357383728, + "learning_rate": 0.00011043191220804817, + "loss": 2.5556, + "step": 9388 + }, + { + "epoch": 0.7577273827778226, + "grad_norm": 0.6888757944107056, + "learning_rate": 0.00011041621131563724, + "loss": 2.5654, + "step": 9389 + }, + { + "epoch": 0.7578080865144056, + "grad_norm": 0.7119149565696716, + "learning_rate": 0.00011040051016362711, + "loss": 2.5925, + "step": 9390 + }, + { + "epoch": 0.7578887902509887, + "grad_norm": 0.7378301024436951, + "learning_rate": 0.00011038480875240911, + "loss": 2.5604, + "step": 9391 + }, + { + "epoch": 0.7579694939875716, + "grad_norm": 0.7221272587776184, + "learning_rate": 0.00011036910708237449, + "loss": 2.5293, + "step": 9392 + }, + { + "epoch": 0.7580501977241546, + "grad_norm": 0.6895891427993774, + "learning_rate": 0.00011035340515391465, + "loss": 2.5177, + "step": 9393 + }, + { + "epoch": 0.7581309014607376, + "grad_norm": 0.6812298893928528, + "learning_rate": 0.00011033770296742086, + "loss": 2.6345, + "step": 9394 + }, + { + "epoch": 0.7582116051973207, + "grad_norm": 0.6733750700950623, + "learning_rate": 0.00011032200052328449, + "loss": 2.5548, + "step": 9395 + }, + { + "epoch": 0.7582923089339036, + "grad_norm": 0.7667728066444397, + "learning_rate": 0.00011030629782189692, + "loss": 2.5858, + "step": 9396 + }, + { + "epoch": 0.7583730126704866, + "grad_norm": 0.6809018850326538, + "learning_rate": 0.00011029059486364946, + "loss": 2.6028, + "step": 9397 + }, + { + "epoch": 0.7584537164070696, + "grad_norm": 0.6817305684089661, + "learning_rate": 0.00011027489164893345, + "loss": 2.5594, + "step": 9398 + }, + { + "epoch": 0.7585344201436527, + "grad_norm": 0.6936343908309937, + "learning_rate": 0.00011025918817814027, + "loss": 2.4997, + "step": 9399 + }, + { + "epoch": 0.7586151238802357, + "grad_norm": 0.7046801447868347, + "learning_rate": 0.00011024348445166133, + "loss": 2.5199, + "step": 9400 + }, + { + "epoch": 0.7586958276168186, + "grad_norm": 0.7247316241264343, + "learning_rate": 0.00011022778046988798, + "loss": 2.5233, + "step": 9401 + }, + { + "epoch": 0.7587765313534016, + "grad_norm": 0.675652265548706, + "learning_rate": 0.00011021207623321162, + "loss": 2.5213, + "step": 9402 + }, + { + "epoch": 0.7588572350899847, + "grad_norm": 0.6866120100021362, + "learning_rate": 0.0001101963717420236, + "loss": 2.6026, + "step": 9403 + }, + { + "epoch": 0.7589379388265677, + "grad_norm": 0.7168806791305542, + "learning_rate": 0.00011018066699671534, + "loss": 2.5707, + "step": 9404 + }, + { + "epoch": 0.7590186425631507, + "grad_norm": 0.6858265995979309, + "learning_rate": 0.00011016496199767825, + "loss": 2.5313, + "step": 9405 + }, + { + "epoch": 0.7590993462997336, + "grad_norm": 0.7064315676689148, + "learning_rate": 0.00011014925674530375, + "loss": 2.5362, + "step": 9406 + }, + { + "epoch": 0.7591800500363167, + "grad_norm": 0.658385694026947, + "learning_rate": 0.00011013355123998324, + "loss": 2.5773, + "step": 9407 + }, + { + "epoch": 0.7592607537728997, + "grad_norm": 0.7112493515014648, + "learning_rate": 0.00011011784548210813, + "loss": 2.589, + "step": 9408 + }, + { + "epoch": 0.7593414575094827, + "grad_norm": 0.6835871934890747, + "learning_rate": 0.00011010213947206986, + "loss": 2.5952, + "step": 9409 + }, + { + "epoch": 0.7594221612460657, + "grad_norm": 0.6920506358146667, + "learning_rate": 0.00011008643321025989, + "loss": 2.5433, + "step": 9410 + }, + { + "epoch": 0.7595028649826487, + "grad_norm": 0.7239150404930115, + "learning_rate": 0.00011007072669706962, + "loss": 2.5291, + "step": 9411 + }, + { + "epoch": 0.7595835687192317, + "grad_norm": 0.644568145275116, + "learning_rate": 0.00011005501993289052, + "loss": 2.5324, + "step": 9412 + }, + { + "epoch": 0.7596642724558147, + "grad_norm": 0.6604863405227661, + "learning_rate": 0.00011003931291811405, + "loss": 2.561, + "step": 9413 + }, + { + "epoch": 0.7597449761923977, + "grad_norm": 0.7056753635406494, + "learning_rate": 0.00011002360565313164, + "loss": 2.6537, + "step": 9414 + }, + { + "epoch": 0.7598256799289808, + "grad_norm": 0.6712720394134521, + "learning_rate": 0.00011000789813833476, + "loss": 2.5222, + "step": 9415 + }, + { + "epoch": 0.7599063836655637, + "grad_norm": 0.6829253435134888, + "learning_rate": 0.00010999219037411492, + "loss": 2.5156, + "step": 9416 + }, + { + "epoch": 0.7599870874021467, + "grad_norm": 0.7386518120765686, + "learning_rate": 0.00010997648236086359, + "loss": 2.5378, + "step": 9417 + }, + { + "epoch": 0.7600677911387297, + "grad_norm": 0.6711105108261108, + "learning_rate": 0.00010996077409897223, + "loss": 2.4985, + "step": 9418 + }, + { + "epoch": 0.7601484948753128, + "grad_norm": 0.6936883926391602, + "learning_rate": 0.00010994506558883233, + "loss": 2.4912, + "step": 9419 + }, + { + "epoch": 0.7602291986118958, + "grad_norm": 0.6927978992462158, + "learning_rate": 0.00010992935683083541, + "loss": 2.5526, + "step": 9420 + }, + { + "epoch": 0.7603099023484787, + "grad_norm": 0.7661495804786682, + "learning_rate": 0.00010991364782537297, + "loss": 2.5778, + "step": 9421 + }, + { + "epoch": 0.7603906060850617, + "grad_norm": 0.7092108726501465, + "learning_rate": 0.0001098979385728365, + "loss": 2.6557, + "step": 9422 + }, + { + "epoch": 0.7604713098216448, + "grad_norm": 0.696666419506073, + "learning_rate": 0.00010988222907361754, + "loss": 2.4897, + "step": 9423 + }, + { + "epoch": 0.7605520135582278, + "grad_norm": 0.6836280822753906, + "learning_rate": 0.00010986651932810756, + "loss": 2.5146, + "step": 9424 + }, + { + "epoch": 0.7606327172948107, + "grad_norm": 0.7269579768180847, + "learning_rate": 0.00010985080933669815, + "loss": 2.5314, + "step": 9425 + }, + { + "epoch": 0.7607134210313937, + "grad_norm": 0.6862092018127441, + "learning_rate": 0.00010983509909978085, + "loss": 2.5415, + "step": 9426 + }, + { + "epoch": 0.7607941247679768, + "grad_norm": 0.7068747878074646, + "learning_rate": 0.00010981938861774713, + "loss": 2.5919, + "step": 9427 + }, + { + "epoch": 0.7608748285045598, + "grad_norm": 0.699999213218689, + "learning_rate": 0.0001098036778909886, + "loss": 2.5175, + "step": 9428 + }, + { + "epoch": 0.7609555322411428, + "grad_norm": 0.6642772555351257, + "learning_rate": 0.0001097879669198968, + "loss": 2.5721, + "step": 9429 + }, + { + "epoch": 0.7610362359777257, + "grad_norm": 0.7100533843040466, + "learning_rate": 0.00010977225570486323, + "loss": 2.5189, + "step": 9430 + }, + { + "epoch": 0.7611169397143088, + "grad_norm": 0.7289063930511475, + "learning_rate": 0.00010975654424627955, + "loss": 2.6139, + "step": 9431 + }, + { + "epoch": 0.7611976434508918, + "grad_norm": 0.7289659380912781, + "learning_rate": 0.00010974083254453726, + "loss": 2.5201, + "step": 9432 + }, + { + "epoch": 0.7612783471874748, + "grad_norm": 0.7389557957649231, + "learning_rate": 0.000109725120600028, + "loss": 2.559, + "step": 9433 + }, + { + "epoch": 0.7613590509240578, + "grad_norm": 0.7021538615226746, + "learning_rate": 0.00010970940841314327, + "loss": 2.6353, + "step": 9434 + }, + { + "epoch": 0.7614397546606407, + "grad_norm": 0.6614113450050354, + "learning_rate": 0.0001096936959842747, + "loss": 2.54, + "step": 9435 + }, + { + "epoch": 0.7615204583972238, + "grad_norm": 0.6905426979064941, + "learning_rate": 0.00010967798331381392, + "loss": 2.5845, + "step": 9436 + }, + { + "epoch": 0.7616011621338068, + "grad_norm": 0.8183904886245728, + "learning_rate": 0.00010966227040215247, + "loss": 2.5255, + "step": 9437 + }, + { + "epoch": 0.7616818658703898, + "grad_norm": 0.7404630780220032, + "learning_rate": 0.00010964655724968199, + "loss": 2.5726, + "step": 9438 + }, + { + "epoch": 0.7617625696069728, + "grad_norm": 0.657127320766449, + "learning_rate": 0.0001096308438567941, + "loss": 2.6233, + "step": 9439 + }, + { + "epoch": 0.7618432733435558, + "grad_norm": 0.7417906522750854, + "learning_rate": 0.00010961513022388039, + "loss": 2.6361, + "step": 9440 + }, + { + "epoch": 0.7619239770801388, + "grad_norm": 0.6930029988288879, + "learning_rate": 0.00010959941635133249, + "loss": 2.5164, + "step": 9441 + }, + { + "epoch": 0.7620046808167218, + "grad_norm": 0.6897261738777161, + "learning_rate": 0.00010958370223954207, + "loss": 2.5626, + "step": 9442 + }, + { + "epoch": 0.7620853845533048, + "grad_norm": 0.6737398505210876, + "learning_rate": 0.00010956798788890072, + "loss": 2.5342, + "step": 9443 + }, + { + "epoch": 0.7621660882898879, + "grad_norm": 0.6550001502037048, + "learning_rate": 0.0001095522732998001, + "loss": 2.5604, + "step": 9444 + }, + { + "epoch": 0.7622467920264708, + "grad_norm": 0.7184637784957886, + "learning_rate": 0.00010953655847263187, + "loss": 2.6006, + "step": 9445 + }, + { + "epoch": 0.7623274957630538, + "grad_norm": 0.6188609600067139, + "learning_rate": 0.00010952084340778766, + "loss": 2.4875, + "step": 9446 + }, + { + "epoch": 0.7624081994996368, + "grad_norm": 0.6550862789154053, + "learning_rate": 0.00010950512810565917, + "loss": 2.5794, + "step": 9447 + }, + { + "epoch": 0.7624889032362199, + "grad_norm": 0.6659231781959534, + "learning_rate": 0.000109489412566638, + "loss": 2.5137, + "step": 9448 + }, + { + "epoch": 0.7625696069728028, + "grad_norm": 0.749376118183136, + "learning_rate": 0.00010947369679111592, + "loss": 2.5923, + "step": 9449 + }, + { + "epoch": 0.7626503107093858, + "grad_norm": 0.6597894430160522, + "learning_rate": 0.0001094579807794845, + "loss": 2.5677, + "step": 9450 + }, + { + "epoch": 0.7627310144459688, + "grad_norm": 0.7194519639015198, + "learning_rate": 0.00010944226453213548, + "loss": 2.5754, + "step": 9451 + }, + { + "epoch": 0.7628117181825519, + "grad_norm": 0.6734583377838135, + "learning_rate": 0.00010942654804946057, + "loss": 2.535, + "step": 9452 + }, + { + "epoch": 0.7628924219191349, + "grad_norm": 0.7171904444694519, + "learning_rate": 0.00010941083133185146, + "loss": 2.5431, + "step": 9453 + }, + { + "epoch": 0.7629731256557178, + "grad_norm": 0.6760339736938477, + "learning_rate": 0.00010939511437969978, + "loss": 2.5163, + "step": 9454 + }, + { + "epoch": 0.7630538293923008, + "grad_norm": 0.6720966696739197, + "learning_rate": 0.00010937939719339731, + "loss": 2.5621, + "step": 9455 + }, + { + "epoch": 0.7631345331288839, + "grad_norm": 0.6374503970146179, + "learning_rate": 0.00010936367977333574, + "loss": 2.5007, + "step": 9456 + }, + { + "epoch": 0.7632152368654669, + "grad_norm": 0.6407146453857422, + "learning_rate": 0.00010934796211990684, + "loss": 2.5724, + "step": 9457 + }, + { + "epoch": 0.7632959406020499, + "grad_norm": 0.6685383319854736, + "learning_rate": 0.00010933224423350225, + "loss": 2.501, + "step": 9458 + }, + { + "epoch": 0.7633766443386328, + "grad_norm": 0.664806604385376, + "learning_rate": 0.00010931652611451373, + "loss": 2.6174, + "step": 9459 + }, + { + "epoch": 0.7634573480752159, + "grad_norm": 0.6383369565010071, + "learning_rate": 0.00010930080776333303, + "loss": 2.557, + "step": 9460 + }, + { + "epoch": 0.7635380518117989, + "grad_norm": 0.6747864484786987, + "learning_rate": 0.0001092850891803519, + "loss": 2.5406, + "step": 9461 + }, + { + "epoch": 0.7636187555483819, + "grad_norm": 0.7312811613082886, + "learning_rate": 0.00010926937036596205, + "loss": 2.5903, + "step": 9462 + }, + { + "epoch": 0.7636994592849649, + "grad_norm": 0.645847737789154, + "learning_rate": 0.00010925365132055529, + "loss": 2.5254, + "step": 9463 + }, + { + "epoch": 0.7637801630215479, + "grad_norm": 0.6466063857078552, + "learning_rate": 0.00010923793204452335, + "loss": 2.5322, + "step": 9464 + }, + { + "epoch": 0.7638608667581309, + "grad_norm": 0.6450574994087219, + "learning_rate": 0.000109222212538258, + "loss": 2.522, + "step": 9465 + }, + { + "epoch": 0.7639415704947139, + "grad_norm": 0.6491848826408386, + "learning_rate": 0.00010920649280215096, + "loss": 2.5545, + "step": 9466 + }, + { + "epoch": 0.7640222742312969, + "grad_norm": 0.6888336539268494, + "learning_rate": 0.0001091907728365941, + "loss": 2.5217, + "step": 9467 + }, + { + "epoch": 0.76410297796788, + "grad_norm": 0.702557384967804, + "learning_rate": 0.00010917505264197914, + "loss": 2.5351, + "step": 9468 + }, + { + "epoch": 0.7641836817044629, + "grad_norm": 0.6552408933639526, + "learning_rate": 0.0001091593322186979, + "loss": 2.5115, + "step": 9469 + }, + { + "epoch": 0.7642643854410459, + "grad_norm": 0.7514002919197083, + "learning_rate": 0.00010914361156714212, + "loss": 2.5196, + "step": 9470 + }, + { + "epoch": 0.7643450891776289, + "grad_norm": 0.6692500710487366, + "learning_rate": 0.00010912789068770366, + "loss": 2.5639, + "step": 9471 + }, + { + "epoch": 0.764425792914212, + "grad_norm": 0.6567397117614746, + "learning_rate": 0.0001091121695807743, + "loss": 2.5027, + "step": 9472 + }, + { + "epoch": 0.764506496650795, + "grad_norm": 0.6876057982444763, + "learning_rate": 0.00010909644824674587, + "loss": 2.519, + "step": 9473 + }, + { + "epoch": 0.7645872003873779, + "grad_norm": 0.747949481010437, + "learning_rate": 0.00010908072668601017, + "loss": 2.5604, + "step": 9474 + }, + { + "epoch": 0.7646679041239609, + "grad_norm": 0.6371368169784546, + "learning_rate": 0.000109065004898959, + "loss": 2.5853, + "step": 9475 + }, + { + "epoch": 0.764748607860544, + "grad_norm": 0.6472185254096985, + "learning_rate": 0.00010904928288598422, + "loss": 2.5662, + "step": 9476 + }, + { + "epoch": 0.764829311597127, + "grad_norm": 0.7009313702583313, + "learning_rate": 0.00010903356064747765, + "loss": 2.5244, + "step": 9477 + }, + { + "epoch": 0.76491001533371, + "grad_norm": 0.7405661940574646, + "learning_rate": 0.00010901783818383116, + "loss": 2.4963, + "step": 9478 + }, + { + "epoch": 0.7649907190702929, + "grad_norm": 0.7693421840667725, + "learning_rate": 0.00010900211549543658, + "loss": 2.6018, + "step": 9479 + }, + { + "epoch": 0.765071422806876, + "grad_norm": 0.6965410709381104, + "learning_rate": 0.00010898639258268571, + "loss": 2.627, + "step": 9480 + }, + { + "epoch": 0.765152126543459, + "grad_norm": 0.7167130708694458, + "learning_rate": 0.00010897066944597046, + "loss": 2.5298, + "step": 9481 + }, + { + "epoch": 0.765232830280042, + "grad_norm": 0.7159689664840698, + "learning_rate": 0.00010895494608568268, + "loss": 2.5179, + "step": 9482 + }, + { + "epoch": 0.7653135340166249, + "grad_norm": 0.7329332232475281, + "learning_rate": 0.00010893922250221423, + "loss": 2.6498, + "step": 9483 + }, + { + "epoch": 0.765394237753208, + "grad_norm": 0.6912567019462585, + "learning_rate": 0.000108923498695957, + "loss": 2.5679, + "step": 9484 + }, + { + "epoch": 0.765474941489791, + "grad_norm": 0.7030324935913086, + "learning_rate": 0.00010890777466730285, + "loss": 2.5678, + "step": 9485 + }, + { + "epoch": 0.765555645226374, + "grad_norm": 0.7238864898681641, + "learning_rate": 0.00010889205041664365, + "loss": 2.5525, + "step": 9486 + }, + { + "epoch": 0.765636348962957, + "grad_norm": 0.6623672842979431, + "learning_rate": 0.00010887632594437134, + "loss": 2.4857, + "step": 9487 + }, + { + "epoch": 0.7657170526995399, + "grad_norm": 0.726645827293396, + "learning_rate": 0.00010886060125087776, + "loss": 2.5405, + "step": 9488 + }, + { + "epoch": 0.765797756436123, + "grad_norm": 0.6624459624290466, + "learning_rate": 0.00010884487633655487, + "loss": 2.5538, + "step": 9489 + }, + { + "epoch": 0.765878460172706, + "grad_norm": 0.7198002934455872, + "learning_rate": 0.00010882915120179453, + "loss": 2.5808, + "step": 9490 + }, + { + "epoch": 0.765959163909289, + "grad_norm": 0.7545582056045532, + "learning_rate": 0.00010881342584698862, + "loss": 2.6059, + "step": 9491 + }, + { + "epoch": 0.766039867645872, + "grad_norm": 0.6748257279396057, + "learning_rate": 0.00010879770027252915, + "loss": 2.5203, + "step": 9492 + }, + { + "epoch": 0.766120571382455, + "grad_norm": 0.7376208901405334, + "learning_rate": 0.00010878197447880796, + "loss": 2.5255, + "step": 9493 + }, + { + "epoch": 0.766201275119038, + "grad_norm": 0.7589401006698608, + "learning_rate": 0.00010876624846621704, + "loss": 2.6304, + "step": 9494 + }, + { + "epoch": 0.766281978855621, + "grad_norm": 0.6963146924972534, + "learning_rate": 0.00010875052223514827, + "loss": 2.5547, + "step": 9495 + }, + { + "epoch": 0.766362682592204, + "grad_norm": 0.6660788059234619, + "learning_rate": 0.00010873479578599361, + "loss": 2.5922, + "step": 9496 + }, + { + "epoch": 0.7664433863287871, + "grad_norm": 0.7506482005119324, + "learning_rate": 0.00010871906911914502, + "loss": 2.5383, + "step": 9497 + }, + { + "epoch": 0.76652409006537, + "grad_norm": 0.7514285445213318, + "learning_rate": 0.00010870334223499443, + "loss": 2.5551, + "step": 9498 + }, + { + "epoch": 0.766604793801953, + "grad_norm": 0.6461809873580933, + "learning_rate": 0.00010868761513393379, + "loss": 2.5367, + "step": 9499 + }, + { + "epoch": 0.766685497538536, + "grad_norm": 0.6328238844871521, + "learning_rate": 0.00010867188781635512, + "loss": 2.5505, + "step": 9500 + }, + { + "epoch": 0.7667662012751191, + "grad_norm": 0.7090224027633667, + "learning_rate": 0.00010865616028265027, + "loss": 2.5921, + "step": 9501 + }, + { + "epoch": 0.766846905011702, + "grad_norm": 0.6404605507850647, + "learning_rate": 0.0001086404325332113, + "loss": 2.5357, + "step": 9502 + }, + { + "epoch": 0.766927608748285, + "grad_norm": 0.652477502822876, + "learning_rate": 0.00010862470456843016, + "loss": 2.5277, + "step": 9503 + }, + { + "epoch": 0.767008312484868, + "grad_norm": 0.7045448422431946, + "learning_rate": 0.00010860897638869887, + "loss": 2.5712, + "step": 9504 + }, + { + "epoch": 0.7670890162214511, + "grad_norm": 0.7024295926094055, + "learning_rate": 0.00010859324799440936, + "loss": 2.5976, + "step": 9505 + }, + { + "epoch": 0.7671697199580341, + "grad_norm": 0.7165585160255432, + "learning_rate": 0.00010857751938595364, + "loss": 2.5378, + "step": 9506 + }, + { + "epoch": 0.767250423694617, + "grad_norm": 0.7037522196769714, + "learning_rate": 0.0001085617905637237, + "loss": 2.554, + "step": 9507 + }, + { + "epoch": 0.7673311274312, + "grad_norm": 0.738210916519165, + "learning_rate": 0.00010854606152811163, + "loss": 2.5102, + "step": 9508 + }, + { + "epoch": 0.7674118311677831, + "grad_norm": 0.7500020861625671, + "learning_rate": 0.0001085303322795093, + "loss": 2.5908, + "step": 9509 + }, + { + "epoch": 0.7674925349043661, + "grad_norm": 0.7669610977172852, + "learning_rate": 0.00010851460281830883, + "loss": 2.5119, + "step": 9510 + }, + { + "epoch": 0.7675732386409491, + "grad_norm": 0.6619212031364441, + "learning_rate": 0.00010849887314490217, + "loss": 2.5622, + "step": 9511 + }, + { + "epoch": 0.767653942377532, + "grad_norm": 0.7142546772956848, + "learning_rate": 0.00010848314325968136, + "loss": 2.596, + "step": 9512 + }, + { + "epoch": 0.7677346461141151, + "grad_norm": 0.7365403175354004, + "learning_rate": 0.0001084674131630385, + "loss": 2.5695, + "step": 9513 + }, + { + "epoch": 0.7678153498506981, + "grad_norm": 0.7843711972236633, + "learning_rate": 0.00010845168285536555, + "loss": 2.5707, + "step": 9514 + }, + { + "epoch": 0.7678960535872811, + "grad_norm": 0.6391385197639465, + "learning_rate": 0.00010843595233705454, + "loss": 2.5523, + "step": 9515 + }, + { + "epoch": 0.7679767573238641, + "grad_norm": 0.6955631971359253, + "learning_rate": 0.00010842022160849758, + "loss": 2.5072, + "step": 9516 + }, + { + "epoch": 0.7680574610604471, + "grad_norm": 0.7291388511657715, + "learning_rate": 0.00010840449067008665, + "loss": 2.5786, + "step": 9517 + }, + { + "epoch": 0.7681381647970301, + "grad_norm": 0.7988889813423157, + "learning_rate": 0.00010838875952221387, + "loss": 2.5622, + "step": 9518 + }, + { + "epoch": 0.7682188685336131, + "grad_norm": 0.726271390914917, + "learning_rate": 0.00010837302816527129, + "loss": 2.5479, + "step": 9519 + }, + { + "epoch": 0.7682995722701961, + "grad_norm": 0.7305205464363098, + "learning_rate": 0.00010835729659965095, + "loss": 2.5946, + "step": 9520 + }, + { + "epoch": 0.7683802760067792, + "grad_norm": 0.7843366265296936, + "learning_rate": 0.00010834156482574493, + "loss": 2.5212, + "step": 9521 + }, + { + "epoch": 0.7684609797433621, + "grad_norm": 0.6988845467567444, + "learning_rate": 0.00010832583284394529, + "loss": 2.5174, + "step": 9522 + }, + { + "epoch": 0.7685416834799451, + "grad_norm": 0.7088077068328857, + "learning_rate": 0.00010831010065464414, + "loss": 2.5253, + "step": 9523 + }, + { + "epoch": 0.7686223872165281, + "grad_norm": 0.7447031140327454, + "learning_rate": 0.00010829436825823358, + "loss": 2.6045, + "step": 9524 + }, + { + "epoch": 0.7687030909531112, + "grad_norm": 0.6865237951278687, + "learning_rate": 0.00010827863565510566, + "loss": 2.558, + "step": 9525 + }, + { + "epoch": 0.7687837946896942, + "grad_norm": 0.7748900651931763, + "learning_rate": 0.0001082629028456525, + "loss": 2.5694, + "step": 9526 + }, + { + "epoch": 0.7688644984262771, + "grad_norm": 0.7031759023666382, + "learning_rate": 0.00010824716983026622, + "loss": 2.5171, + "step": 9527 + }, + { + "epoch": 0.7689452021628601, + "grad_norm": 0.7627702355384827, + "learning_rate": 0.00010823143660933888, + "loss": 2.5715, + "step": 9528 + }, + { + "epoch": 0.7690259058994432, + "grad_norm": 0.707815945148468, + "learning_rate": 0.00010821570318326264, + "loss": 2.5281, + "step": 9529 + }, + { + "epoch": 0.7691066096360262, + "grad_norm": 0.6833841800689697, + "learning_rate": 0.00010819996955242962, + "loss": 2.5702, + "step": 9530 + }, + { + "epoch": 0.7691873133726091, + "grad_norm": 0.7029415369033813, + "learning_rate": 0.00010818423571723189, + "loss": 2.5331, + "step": 9531 + }, + { + "epoch": 0.7692680171091921, + "grad_norm": 0.6442921161651611, + "learning_rate": 0.00010816850167806161, + "loss": 2.5423, + "step": 9532 + }, + { + "epoch": 0.7693487208457752, + "grad_norm": 0.7259004712104797, + "learning_rate": 0.00010815276743531093, + "loss": 2.6014, + "step": 9533 + }, + { + "epoch": 0.7694294245823582, + "grad_norm": 0.6483473777770996, + "learning_rate": 0.00010813703298937199, + "loss": 2.5268, + "step": 9534 + }, + { + "epoch": 0.7695101283189412, + "grad_norm": 0.6805520057678223, + "learning_rate": 0.00010812129834063691, + "loss": 2.5536, + "step": 9535 + }, + { + "epoch": 0.7695908320555241, + "grad_norm": 0.7120587825775146, + "learning_rate": 0.00010810556348949783, + "loss": 2.518, + "step": 9536 + }, + { + "epoch": 0.7696715357921071, + "grad_norm": 0.7280872464179993, + "learning_rate": 0.00010808982843634692, + "loss": 2.5525, + "step": 9537 + }, + { + "epoch": 0.7697522395286902, + "grad_norm": 0.68332439661026, + "learning_rate": 0.00010807409318157636, + "loss": 2.6318, + "step": 9538 + }, + { + "epoch": 0.7698329432652732, + "grad_norm": 0.655352771282196, + "learning_rate": 0.00010805835772557826, + "loss": 2.5781, + "step": 9539 + }, + { + "epoch": 0.7699136470018562, + "grad_norm": 0.7675400972366333, + "learning_rate": 0.00010804262206874484, + "loss": 2.5542, + "step": 9540 + }, + { + "epoch": 0.7699943507384391, + "grad_norm": 0.6676837205886841, + "learning_rate": 0.00010802688621146826, + "loss": 2.5411, + "step": 9541 + }, + { + "epoch": 0.7700750544750222, + "grad_norm": 0.7378436326980591, + "learning_rate": 0.00010801115015414067, + "loss": 2.5416, + "step": 9542 + }, + { + "epoch": 0.7701557582116052, + "grad_norm": 0.7330371141433716, + "learning_rate": 0.0001079954138971543, + "loss": 2.5154, + "step": 9543 + }, + { + "epoch": 0.7702364619481882, + "grad_norm": 0.6792974472045898, + "learning_rate": 0.00010797967744090131, + "loss": 2.5328, + "step": 9544 + }, + { + "epoch": 0.7703171656847712, + "grad_norm": 0.7129618525505066, + "learning_rate": 0.00010796394078577392, + "loss": 2.5688, + "step": 9545 + }, + { + "epoch": 0.7703978694213542, + "grad_norm": 0.6900608539581299, + "learning_rate": 0.00010794820393216429, + "loss": 2.5659, + "step": 9546 + }, + { + "epoch": 0.7704785731579372, + "grad_norm": 0.6798564195632935, + "learning_rate": 0.00010793246688046464, + "loss": 2.5746, + "step": 9547 + }, + { + "epoch": 0.7705592768945202, + "grad_norm": 0.7132395505905151, + "learning_rate": 0.00010791672963106715, + "loss": 2.6277, + "step": 9548 + }, + { + "epoch": 0.7706399806311032, + "grad_norm": 0.6762476563453674, + "learning_rate": 0.0001079009921843641, + "loss": 2.5265, + "step": 9549 + }, + { + "epoch": 0.7707206843676863, + "grad_norm": 0.7223351001739502, + "learning_rate": 0.00010788525454074765, + "loss": 2.6255, + "step": 9550 + }, + { + "epoch": 0.7708013881042692, + "grad_norm": 0.7383624315261841, + "learning_rate": 0.00010786951670061008, + "loss": 2.5744, + "step": 9551 + }, + { + "epoch": 0.7708820918408522, + "grad_norm": 0.6677328944206238, + "learning_rate": 0.00010785377866434355, + "loss": 2.5594, + "step": 9552 + }, + { + "epoch": 0.7709627955774352, + "grad_norm": 0.6572195887565613, + "learning_rate": 0.00010783804043234032, + "loss": 2.5582, + "step": 9553 + }, + { + "epoch": 0.7710434993140183, + "grad_norm": 0.6837800741195679, + "learning_rate": 0.00010782230200499265, + "loss": 2.5311, + "step": 9554 + }, + { + "epoch": 0.7711242030506013, + "grad_norm": 0.7232153415679932, + "learning_rate": 0.00010780656338269277, + "loss": 2.5074, + "step": 9555 + }, + { + "epoch": 0.7712049067871842, + "grad_norm": 0.6722296476364136, + "learning_rate": 0.00010779082456583291, + "loss": 2.551, + "step": 9556 + }, + { + "epoch": 0.7712856105237672, + "grad_norm": 0.6461100578308105, + "learning_rate": 0.00010777508555480535, + "loss": 2.5723, + "step": 9557 + }, + { + "epoch": 0.7713663142603503, + "grad_norm": 0.6573290824890137, + "learning_rate": 0.0001077593463500023, + "loss": 2.4967, + "step": 9558 + }, + { + "epoch": 0.7714470179969333, + "grad_norm": 0.7184738516807556, + "learning_rate": 0.0001077436069518161, + "loss": 2.6703, + "step": 9559 + }, + { + "epoch": 0.7715277217335162, + "grad_norm": 0.7226557731628418, + "learning_rate": 0.00010772786736063895, + "loss": 2.6118, + "step": 9560 + }, + { + "epoch": 0.7716084254700992, + "grad_norm": 0.6800956130027771, + "learning_rate": 0.00010771212757686318, + "loss": 2.578, + "step": 9561 + }, + { + "epoch": 0.7716891292066823, + "grad_norm": 0.6657535433769226, + "learning_rate": 0.00010769638760088099, + "loss": 2.5291, + "step": 9562 + }, + { + "epoch": 0.7717698329432653, + "grad_norm": 0.620527982711792, + "learning_rate": 0.00010768064743308471, + "loss": 2.5518, + "step": 9563 + }, + { + "epoch": 0.7718505366798483, + "grad_norm": 0.693760097026825, + "learning_rate": 0.00010766490707386663, + "loss": 2.52, + "step": 9564 + }, + { + "epoch": 0.7719312404164312, + "grad_norm": 0.6674148440361023, + "learning_rate": 0.000107649166523619, + "loss": 2.5197, + "step": 9565 + }, + { + "epoch": 0.7720119441530143, + "grad_norm": 0.6844033598899841, + "learning_rate": 0.00010763342578273419, + "loss": 2.5842, + "step": 9566 + }, + { + "epoch": 0.7720926478895973, + "grad_norm": 0.6891880035400391, + "learning_rate": 0.00010761768485160442, + "loss": 2.5349, + "step": 9567 + }, + { + "epoch": 0.7721733516261803, + "grad_norm": 0.7157394289970398, + "learning_rate": 0.00010760194373062204, + "loss": 2.5762, + "step": 9568 + }, + { + "epoch": 0.7722540553627633, + "grad_norm": 0.7522526383399963, + "learning_rate": 0.00010758620242017936, + "loss": 2.5348, + "step": 9569 + }, + { + "epoch": 0.7723347590993463, + "grad_norm": 0.6817746162414551, + "learning_rate": 0.00010757046092066869, + "loss": 2.5836, + "step": 9570 + }, + { + "epoch": 0.7724154628359293, + "grad_norm": 0.7274518013000488, + "learning_rate": 0.00010755471923248232, + "loss": 2.5276, + "step": 9571 + }, + { + "epoch": 0.7724961665725123, + "grad_norm": 0.6735557913780212, + "learning_rate": 0.00010753897735601264, + "loss": 2.6116, + "step": 9572 + }, + { + "epoch": 0.7725768703090953, + "grad_norm": 0.6626406908035278, + "learning_rate": 0.00010752323529165186, + "loss": 2.5778, + "step": 9573 + }, + { + "epoch": 0.7726575740456784, + "grad_norm": 0.6627367734909058, + "learning_rate": 0.00010750749303979246, + "loss": 2.5839, + "step": 9574 + }, + { + "epoch": 0.7727382777822613, + "grad_norm": 0.6658251881599426, + "learning_rate": 0.0001074917506008267, + "loss": 2.5233, + "step": 9575 + }, + { + "epoch": 0.7728189815188443, + "grad_norm": 0.6969848871231079, + "learning_rate": 0.00010747600797514692, + "loss": 2.5169, + "step": 9576 + }, + { + "epoch": 0.7728996852554273, + "grad_norm": 0.7313554883003235, + "learning_rate": 0.00010746026516314549, + "loss": 2.5528, + "step": 9577 + }, + { + "epoch": 0.7729803889920104, + "grad_norm": 0.6467077136039734, + "learning_rate": 0.00010744452216521472, + "loss": 2.5158, + "step": 9578 + }, + { + "epoch": 0.7730610927285934, + "grad_norm": 0.6808056235313416, + "learning_rate": 0.00010742877898174702, + "loss": 2.5346, + "step": 9579 + }, + { + "epoch": 0.7731417964651763, + "grad_norm": 0.7537400722503662, + "learning_rate": 0.00010741303561313474, + "loss": 2.5621, + "step": 9580 + }, + { + "epoch": 0.7732225002017593, + "grad_norm": 0.6715610027313232, + "learning_rate": 0.00010739729205977021, + "loss": 2.5384, + "step": 9581 + }, + { + "epoch": 0.7733032039383424, + "grad_norm": 0.7129234075546265, + "learning_rate": 0.00010738154832204586, + "loss": 2.5639, + "step": 9582 + }, + { + "epoch": 0.7733839076749254, + "grad_norm": 0.7156025171279907, + "learning_rate": 0.00010736580440035397, + "loss": 2.5427, + "step": 9583 + }, + { + "epoch": 0.7734646114115084, + "grad_norm": 0.7394191026687622, + "learning_rate": 0.00010735006029508703, + "loss": 2.5809, + "step": 9584 + }, + { + "epoch": 0.7735453151480913, + "grad_norm": 0.7117684483528137, + "learning_rate": 0.00010733431600663737, + "loss": 2.5807, + "step": 9585 + }, + { + "epoch": 0.7736260188846744, + "grad_norm": 0.6622862219810486, + "learning_rate": 0.00010731857153539737, + "loss": 2.5277, + "step": 9586 + }, + { + "epoch": 0.7737067226212574, + "grad_norm": 0.7744547128677368, + "learning_rate": 0.00010730282688175943, + "loss": 2.6119, + "step": 9587 + }, + { + "epoch": 0.7737874263578404, + "grad_norm": 0.6804926991462708, + "learning_rate": 0.00010728708204611597, + "loss": 2.534, + "step": 9588 + }, + { + "epoch": 0.7738681300944233, + "grad_norm": 0.7115367650985718, + "learning_rate": 0.00010727133702885937, + "loss": 2.542, + "step": 9589 + }, + { + "epoch": 0.7739488338310063, + "grad_norm": 0.7623847723007202, + "learning_rate": 0.00010725559183038205, + "loss": 2.587, + "step": 9590 + }, + { + "epoch": 0.7740295375675894, + "grad_norm": 0.6612982153892517, + "learning_rate": 0.00010723984645107641, + "loss": 2.5257, + "step": 9591 + }, + { + "epoch": 0.7741102413041724, + "grad_norm": 0.7553900480270386, + "learning_rate": 0.00010722410089133488, + "loss": 2.6311, + "step": 9592 + }, + { + "epoch": 0.7741909450407554, + "grad_norm": 0.7541414499282837, + "learning_rate": 0.00010720835515154983, + "loss": 2.5978, + "step": 9593 + }, + { + "epoch": 0.7742716487773383, + "grad_norm": 0.6690947413444519, + "learning_rate": 0.00010719260923211376, + "loss": 2.568, + "step": 9594 + }, + { + "epoch": 0.7743523525139214, + "grad_norm": 0.7282151579856873, + "learning_rate": 0.00010717686313341909, + "loss": 2.5375, + "step": 9595 + }, + { + "epoch": 0.7744330562505044, + "grad_norm": 0.6862902045249939, + "learning_rate": 0.00010716111685585821, + "loss": 2.5503, + "step": 9596 + }, + { + "epoch": 0.7745137599870874, + "grad_norm": 0.7076265811920166, + "learning_rate": 0.00010714537039982357, + "loss": 2.4766, + "step": 9597 + }, + { + "epoch": 0.7745944637236704, + "grad_norm": 0.7063891887664795, + "learning_rate": 0.00010712962376570761, + "loss": 2.5822, + "step": 9598 + }, + { + "epoch": 0.7746751674602534, + "grad_norm": 0.6975609064102173, + "learning_rate": 0.00010711387695390282, + "loss": 2.597, + "step": 9599 + }, + { + "epoch": 0.7747558711968364, + "grad_norm": 0.6790002584457397, + "learning_rate": 0.0001070981299648016, + "loss": 2.5705, + "step": 9600 + }, + { + "epoch": 0.7748365749334194, + "grad_norm": 0.6493679881095886, + "learning_rate": 0.00010708238279879643, + "loss": 2.49, + "step": 9601 + }, + { + "epoch": 0.7749172786700024, + "grad_norm": 0.6741142868995667, + "learning_rate": 0.00010706663545627977, + "loss": 2.6008, + "step": 9602 + }, + { + "epoch": 0.7749979824065855, + "grad_norm": 0.6753309965133667, + "learning_rate": 0.00010705088793764408, + "loss": 2.536, + "step": 9603 + }, + { + "epoch": 0.7750786861431684, + "grad_norm": 0.6879377365112305, + "learning_rate": 0.00010703514024328183, + "loss": 2.5884, + "step": 9604 + }, + { + "epoch": 0.7751593898797514, + "grad_norm": 0.6535949110984802, + "learning_rate": 0.00010701939237358549, + "loss": 2.5489, + "step": 9605 + }, + { + "epoch": 0.7752400936163344, + "grad_norm": 0.7308230400085449, + "learning_rate": 0.00010700364432894756, + "loss": 2.5679, + "step": 9606 + }, + { + "epoch": 0.7753207973529175, + "grad_norm": 0.7016584277153015, + "learning_rate": 0.00010698789610976052, + "loss": 2.5678, + "step": 9607 + }, + { + "epoch": 0.7754015010895005, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.00010697214771641682, + "loss": 2.5004, + "step": 9608 + }, + { + "epoch": 0.7754822048260834, + "grad_norm": 0.6414844989776611, + "learning_rate": 0.00010695639914930895, + "loss": 2.4896, + "step": 9609 + }, + { + "epoch": 0.7755629085626664, + "grad_norm": 0.7288017868995667, + "learning_rate": 0.00010694065040882943, + "loss": 2.5945, + "step": 9610 + }, + { + "epoch": 0.7756436122992495, + "grad_norm": 0.6808066368103027, + "learning_rate": 0.00010692490149537079, + "loss": 2.5973, + "step": 9611 + }, + { + "epoch": 0.7757243160358325, + "grad_norm": 0.7924454212188721, + "learning_rate": 0.00010690915240932553, + "loss": 2.5448, + "step": 9612 + }, + { + "epoch": 0.7758050197724154, + "grad_norm": 0.6466094851493835, + "learning_rate": 0.00010689340315108606, + "loss": 2.5065, + "step": 9613 + }, + { + "epoch": 0.7758857235089984, + "grad_norm": 0.6775460243225098, + "learning_rate": 0.00010687765372104502, + "loss": 2.5238, + "step": 9614 + }, + { + "epoch": 0.7759664272455815, + "grad_norm": 0.6901230812072754, + "learning_rate": 0.00010686190411959484, + "loss": 2.5109, + "step": 9615 + }, + { + "epoch": 0.7760471309821645, + "grad_norm": 0.7032039165496826, + "learning_rate": 0.00010684615434712808, + "loss": 2.6094, + "step": 9616 + }, + { + "epoch": 0.7761278347187475, + "grad_norm": 0.7008969187736511, + "learning_rate": 0.00010683040440403727, + "loss": 2.5758, + "step": 9617 + }, + { + "epoch": 0.7762085384553304, + "grad_norm": 0.6909677386283875, + "learning_rate": 0.00010681465429071491, + "loss": 2.5373, + "step": 9618 + }, + { + "epoch": 0.7762892421919135, + "grad_norm": 0.699030339717865, + "learning_rate": 0.00010679890400755355, + "loss": 2.577, + "step": 9619 + }, + { + "epoch": 0.7763699459284965, + "grad_norm": 0.7012344598770142, + "learning_rate": 0.00010678315355494575, + "loss": 2.5205, + "step": 9620 + }, + { + "epoch": 0.7764506496650795, + "grad_norm": 0.7693915367126465, + "learning_rate": 0.000106767402933284, + "loss": 2.5947, + "step": 9621 + }, + { + "epoch": 0.7765313534016625, + "grad_norm": 0.7635772228240967, + "learning_rate": 0.00010675165214296093, + "loss": 2.6221, + "step": 9622 + }, + { + "epoch": 0.7766120571382455, + "grad_norm": 0.701411783695221, + "learning_rate": 0.000106735901184369, + "loss": 2.5236, + "step": 9623 + }, + { + "epoch": 0.7766927608748285, + "grad_norm": 0.7283998727798462, + "learning_rate": 0.00010672015005790079, + "loss": 2.5581, + "step": 9624 + }, + { + "epoch": 0.7767734646114115, + "grad_norm": 0.7069897055625916, + "learning_rate": 0.0001067043987639489, + "loss": 2.5541, + "step": 9625 + }, + { + "epoch": 0.7768541683479945, + "grad_norm": 0.7419753074645996, + "learning_rate": 0.00010668864730290586, + "loss": 2.5992, + "step": 9626 + }, + { + "epoch": 0.7769348720845776, + "grad_norm": 0.6651501059532166, + "learning_rate": 0.00010667289567516426, + "loss": 2.546, + "step": 9627 + }, + { + "epoch": 0.7770155758211605, + "grad_norm": 0.7265670895576477, + "learning_rate": 0.00010665714388111665, + "loss": 2.611, + "step": 9628 + }, + { + "epoch": 0.7770962795577435, + "grad_norm": 0.6520028114318848, + "learning_rate": 0.00010664139192115559, + "loss": 2.5433, + "step": 9629 + }, + { + "epoch": 0.7771769832943265, + "grad_norm": 0.6990057826042175, + "learning_rate": 0.0001066256397956737, + "loss": 2.5325, + "step": 9630 + }, + { + "epoch": 0.7772576870309096, + "grad_norm": 0.7353312373161316, + "learning_rate": 0.00010660988750506355, + "loss": 2.4707, + "step": 9631 + }, + { + "epoch": 0.7773383907674926, + "grad_norm": 0.6810272932052612, + "learning_rate": 0.00010659413504971774, + "loss": 2.5618, + "step": 9632 + }, + { + "epoch": 0.7774190945040755, + "grad_norm": 0.6480081081390381, + "learning_rate": 0.00010657838243002883, + "loss": 2.4543, + "step": 9633 + }, + { + "epoch": 0.7774997982406585, + "grad_norm": 0.6617380976676941, + "learning_rate": 0.00010656262964638942, + "loss": 2.5628, + "step": 9634 + }, + { + "epoch": 0.7775805019772416, + "grad_norm": 0.6761382222175598, + "learning_rate": 0.00010654687669919212, + "loss": 2.5433, + "step": 9635 + }, + { + "epoch": 0.7776612057138246, + "grad_norm": 0.6733867526054382, + "learning_rate": 0.00010653112358882957, + "loss": 2.5282, + "step": 9636 + }, + { + "epoch": 0.7777419094504076, + "grad_norm": 0.6854631304740906, + "learning_rate": 0.00010651537031569433, + "loss": 2.5997, + "step": 9637 + }, + { + "epoch": 0.7778226131869905, + "grad_norm": 0.7451226115226746, + "learning_rate": 0.00010649961688017904, + "loss": 2.5058, + "step": 9638 + }, + { + "epoch": 0.7779033169235735, + "grad_norm": 0.6744229197502136, + "learning_rate": 0.0001064838632826763, + "loss": 2.5962, + "step": 9639 + }, + { + "epoch": 0.7779840206601566, + "grad_norm": 0.7568119764328003, + "learning_rate": 0.00010646810952357873, + "loss": 2.5896, + "step": 9640 + }, + { + "epoch": 0.7780647243967396, + "grad_norm": 0.6860085725784302, + "learning_rate": 0.00010645235560327899, + "loss": 2.5675, + "step": 9641 + }, + { + "epoch": 0.7781454281333225, + "grad_norm": 0.6491742134094238, + "learning_rate": 0.00010643660152216965, + "loss": 2.5374, + "step": 9642 + }, + { + "epoch": 0.7782261318699055, + "grad_norm": 0.6664023399353027, + "learning_rate": 0.0001064208472806434, + "loss": 2.4679, + "step": 9643 + }, + { + "epoch": 0.7783068356064886, + "grad_norm": 0.6595140099525452, + "learning_rate": 0.00010640509287909284, + "loss": 2.5045, + "step": 9644 + }, + { + "epoch": 0.7783875393430716, + "grad_norm": 0.6788576245307922, + "learning_rate": 0.0001063893383179106, + "loss": 2.5706, + "step": 9645 + }, + { + "epoch": 0.7784682430796546, + "grad_norm": 0.6741334199905396, + "learning_rate": 0.00010637358359748939, + "loss": 2.5763, + "step": 9646 + }, + { + "epoch": 0.7785489468162375, + "grad_norm": 0.6837517023086548, + "learning_rate": 0.0001063578287182218, + "loss": 2.5484, + "step": 9647 + }, + { + "epoch": 0.7786296505528206, + "grad_norm": 0.6604229211807251, + "learning_rate": 0.00010634207368050048, + "loss": 2.5465, + "step": 9648 + }, + { + "epoch": 0.7787103542894036, + "grad_norm": 0.6528951525688171, + "learning_rate": 0.00010632631848471813, + "loss": 2.5409, + "step": 9649 + }, + { + "epoch": 0.7787910580259866, + "grad_norm": 0.6615377068519592, + "learning_rate": 0.00010631056313126734, + "loss": 2.5545, + "step": 9650 + }, + { + "epoch": 0.7788717617625696, + "grad_norm": 0.666033923625946, + "learning_rate": 0.00010629480762054089, + "loss": 2.5341, + "step": 9651 + }, + { + "epoch": 0.7789524654991526, + "grad_norm": 0.7022622227668762, + "learning_rate": 0.00010627905195293135, + "loss": 2.5206, + "step": 9652 + }, + { + "epoch": 0.7790331692357356, + "grad_norm": 0.7175850868225098, + "learning_rate": 0.00010626329612883141, + "loss": 2.5912, + "step": 9653 + }, + { + "epoch": 0.7791138729723186, + "grad_norm": 0.6592069268226624, + "learning_rate": 0.00010624754014863379, + "loss": 2.5076, + "step": 9654 + }, + { + "epoch": 0.7791945767089016, + "grad_norm": 0.645893931388855, + "learning_rate": 0.0001062317840127311, + "loss": 2.5124, + "step": 9655 + }, + { + "epoch": 0.7792752804454847, + "grad_norm": 0.6638232469558716, + "learning_rate": 0.00010621602772151607, + "loss": 2.5182, + "step": 9656 + }, + { + "epoch": 0.7793559841820676, + "grad_norm": 0.6718387603759766, + "learning_rate": 0.0001062002712753814, + "loss": 2.4773, + "step": 9657 + }, + { + "epoch": 0.7794366879186506, + "grad_norm": 0.6402876377105713, + "learning_rate": 0.00010618451467471972, + "loss": 2.5557, + "step": 9658 + }, + { + "epoch": 0.7795173916552336, + "grad_norm": 0.6898398399353027, + "learning_rate": 0.00010616875791992382, + "loss": 2.5557, + "step": 9659 + }, + { + "epoch": 0.7795980953918167, + "grad_norm": 0.6718475222587585, + "learning_rate": 0.00010615300101138633, + "loss": 2.5335, + "step": 9660 + }, + { + "epoch": 0.7796787991283997, + "grad_norm": 0.6436911225318909, + "learning_rate": 0.00010613724394949995, + "loss": 2.5214, + "step": 9661 + }, + { + "epoch": 0.7797595028649826, + "grad_norm": 0.7554156184196472, + "learning_rate": 0.00010612148673465743, + "loss": 2.5526, + "step": 9662 + }, + { + "epoch": 0.7798402066015656, + "grad_norm": 0.6728504300117493, + "learning_rate": 0.00010610572936725147, + "loss": 2.5935, + "step": 9663 + }, + { + "epoch": 0.7799209103381487, + "grad_norm": 0.6793323159217834, + "learning_rate": 0.00010608997184767476, + "loss": 2.5515, + "step": 9664 + }, + { + "epoch": 0.7800016140747317, + "grad_norm": 0.7242898941040039, + "learning_rate": 0.00010607421417631999, + "loss": 2.5332, + "step": 9665 + }, + { + "epoch": 0.7800823178113147, + "grad_norm": 0.6719244718551636, + "learning_rate": 0.00010605845635357996, + "loss": 2.5191, + "step": 9666 + }, + { + "epoch": 0.7801630215478976, + "grad_norm": 0.6836631894111633, + "learning_rate": 0.00010604269837984737, + "loss": 2.6489, + "step": 9667 + }, + { + "epoch": 0.7802437252844807, + "grad_norm": 0.6833824515342712, + "learning_rate": 0.00010602694025551496, + "loss": 2.4906, + "step": 9668 + }, + { + "epoch": 0.7803244290210637, + "grad_norm": 0.7449159026145935, + "learning_rate": 0.0001060111819809754, + "loss": 2.5301, + "step": 9669 + }, + { + "epoch": 0.7804051327576467, + "grad_norm": 0.7149158120155334, + "learning_rate": 0.00010599542355662149, + "loss": 2.5097, + "step": 9670 + }, + { + "epoch": 0.7804858364942296, + "grad_norm": 0.6616973876953125, + "learning_rate": 0.00010597966498284595, + "loss": 2.5928, + "step": 9671 + }, + { + "epoch": 0.7805665402308127, + "grad_norm": 0.6556531190872192, + "learning_rate": 0.00010596390626004154, + "loss": 2.5543, + "step": 9672 + }, + { + "epoch": 0.7806472439673957, + "grad_norm": 0.6585283875465393, + "learning_rate": 0.000105948147388601, + "loss": 2.5244, + "step": 9673 + }, + { + "epoch": 0.7807279477039787, + "grad_norm": 0.6484133005142212, + "learning_rate": 0.00010593238836891704, + "loss": 2.4996, + "step": 9674 + }, + { + "epoch": 0.7808086514405617, + "grad_norm": 0.6681119799613953, + "learning_rate": 0.00010591662920138248, + "loss": 2.5322, + "step": 9675 + }, + { + "epoch": 0.7808893551771448, + "grad_norm": 0.709403395652771, + "learning_rate": 0.00010590086988639005, + "loss": 2.5554, + "step": 9676 + }, + { + "epoch": 0.7809700589137277, + "grad_norm": 0.6734669804573059, + "learning_rate": 0.00010588511042433251, + "loss": 2.5452, + "step": 9677 + }, + { + "epoch": 0.7810507626503107, + "grad_norm": 0.6800141930580139, + "learning_rate": 0.00010586935081560268, + "loss": 2.5154, + "step": 9678 + }, + { + "epoch": 0.7811314663868937, + "grad_norm": 0.7757244110107422, + "learning_rate": 0.00010585359106059326, + "loss": 2.5935, + "step": 9679 + }, + { + "epoch": 0.7812121701234768, + "grad_norm": 0.7288491725921631, + "learning_rate": 0.00010583783115969699, + "loss": 2.5276, + "step": 9680 + }, + { + "epoch": 0.7812928738600597, + "grad_norm": 0.6785164475440979, + "learning_rate": 0.00010582207111330678, + "loss": 2.5907, + "step": 9681 + }, + { + "epoch": 0.7813735775966427, + "grad_norm": 0.6651367545127869, + "learning_rate": 0.0001058063109218153, + "loss": 2.545, + "step": 9682 + }, + { + "epoch": 0.7814542813332257, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.0001057905505856154, + "loss": 2.5548, + "step": 9683 + }, + { + "epoch": 0.7815349850698088, + "grad_norm": 0.6486692428588867, + "learning_rate": 0.00010577479010509986, + "loss": 2.5589, + "step": 9684 + }, + { + "epoch": 0.7816156888063918, + "grad_norm": 0.700749397277832, + "learning_rate": 0.0001057590294806614, + "loss": 2.6008, + "step": 9685 + }, + { + "epoch": 0.7816963925429747, + "grad_norm": 0.647051215171814, + "learning_rate": 0.00010574326871269289, + "loss": 2.4894, + "step": 9686 + }, + { + "epoch": 0.7817770962795577, + "grad_norm": 0.6932066679000854, + "learning_rate": 0.00010572750780158713, + "loss": 2.5256, + "step": 9687 + }, + { + "epoch": 0.7818578000161408, + "grad_norm": 0.6330733895301819, + "learning_rate": 0.00010571174674773689, + "loss": 2.5242, + "step": 9688 + }, + { + "epoch": 0.7819385037527238, + "grad_norm": 0.6476379036903381, + "learning_rate": 0.00010569598555153499, + "loss": 2.552, + "step": 9689 + }, + { + "epoch": 0.7820192074893068, + "grad_norm": 0.661204993724823, + "learning_rate": 0.00010568022421337424, + "loss": 2.4869, + "step": 9690 + }, + { + "epoch": 0.7820999112258897, + "grad_norm": 0.6663263440132141, + "learning_rate": 0.00010566446273364746, + "loss": 2.5134, + "step": 9691 + }, + { + "epoch": 0.7821806149624727, + "grad_norm": 0.6982834339141846, + "learning_rate": 0.00010564870111274748, + "loss": 2.5755, + "step": 9692 + }, + { + "epoch": 0.7822613186990558, + "grad_norm": 0.6266167759895325, + "learning_rate": 0.00010563293935106706, + "loss": 2.5413, + "step": 9693 + }, + { + "epoch": 0.7823420224356388, + "grad_norm": 0.6484279632568359, + "learning_rate": 0.0001056171774489991, + "loss": 2.5579, + "step": 9694 + }, + { + "epoch": 0.7824227261722217, + "grad_norm": 0.674933910369873, + "learning_rate": 0.00010560141540693638, + "loss": 2.5364, + "step": 9695 + }, + { + "epoch": 0.7825034299088047, + "grad_norm": 0.7961840033531189, + "learning_rate": 0.00010558565322527174, + "loss": 2.5143, + "step": 9696 + }, + { + "epoch": 0.7825841336453878, + "grad_norm": 0.697158694267273, + "learning_rate": 0.00010556989090439804, + "loss": 2.5341, + "step": 9697 + }, + { + "epoch": 0.7826648373819708, + "grad_norm": 0.6912708282470703, + "learning_rate": 0.00010555412844470806, + "loss": 2.5331, + "step": 9698 + }, + { + "epoch": 0.7827455411185538, + "grad_norm": 0.7078350186347961, + "learning_rate": 0.00010553836584659474, + "loss": 2.5752, + "step": 9699 + }, + { + "epoch": 0.7828262448551367, + "grad_norm": 0.6421065926551819, + "learning_rate": 0.00010552260311045082, + "loss": 2.5393, + "step": 9700 + }, + { + "epoch": 0.7829069485917198, + "grad_norm": 0.644120454788208, + "learning_rate": 0.00010550684023666918, + "loss": 2.5062, + "step": 9701 + }, + { + "epoch": 0.7829876523283028, + "grad_norm": 0.7038589715957642, + "learning_rate": 0.00010549107722564275, + "loss": 2.6074, + "step": 9702 + }, + { + "epoch": 0.7830683560648858, + "grad_norm": 0.6692953109741211, + "learning_rate": 0.00010547531407776427, + "loss": 2.5801, + "step": 9703 + }, + { + "epoch": 0.7831490598014688, + "grad_norm": 0.7059200406074524, + "learning_rate": 0.00010545955079342669, + "loss": 2.5579, + "step": 9704 + }, + { + "epoch": 0.7832297635380518, + "grad_norm": 0.7126718759536743, + "learning_rate": 0.0001054437873730228, + "loss": 2.5764, + "step": 9705 + }, + { + "epoch": 0.7833104672746348, + "grad_norm": 0.696784257888794, + "learning_rate": 0.0001054280238169455, + "loss": 2.5256, + "step": 9706 + }, + { + "epoch": 0.7833911710112178, + "grad_norm": 0.7473082542419434, + "learning_rate": 0.00010541226012558767, + "loss": 2.5983, + "step": 9707 + }, + { + "epoch": 0.7834718747478008, + "grad_norm": 0.6598967909812927, + "learning_rate": 0.00010539649629934219, + "loss": 2.5267, + "step": 9708 + }, + { + "epoch": 0.7835525784843839, + "grad_norm": 0.7168934345245361, + "learning_rate": 0.00010538073233860188, + "loss": 2.5278, + "step": 9709 + }, + { + "epoch": 0.7836332822209668, + "grad_norm": 0.6848951578140259, + "learning_rate": 0.00010536496824375968, + "loss": 2.5267, + "step": 9710 + }, + { + "epoch": 0.7837139859575498, + "grad_norm": 0.7276272773742676, + "learning_rate": 0.0001053492040152084, + "loss": 2.5706, + "step": 9711 + }, + { + "epoch": 0.7837946896941328, + "grad_norm": 0.6929399371147156, + "learning_rate": 0.00010533343965334101, + "loss": 2.5184, + "step": 9712 + }, + { + "epoch": 0.7838753934307159, + "grad_norm": 0.7497181296348572, + "learning_rate": 0.00010531767515855037, + "loss": 2.5626, + "step": 9713 + }, + { + "epoch": 0.7839560971672989, + "grad_norm": 0.6536200046539307, + "learning_rate": 0.00010530191053122935, + "loss": 2.5909, + "step": 9714 + }, + { + "epoch": 0.7840368009038818, + "grad_norm": 0.6750395894050598, + "learning_rate": 0.00010528614577177087, + "loss": 2.5119, + "step": 9715 + }, + { + "epoch": 0.7841175046404648, + "grad_norm": 0.6284878849983215, + "learning_rate": 0.00010527038088056782, + "loss": 2.5417, + "step": 9716 + }, + { + "epoch": 0.7841982083770479, + "grad_norm": 0.6529444456100464, + "learning_rate": 0.00010525461585801308, + "loss": 2.5865, + "step": 9717 + }, + { + "epoch": 0.7842789121136309, + "grad_norm": 0.7332968711853027, + "learning_rate": 0.00010523885070449959, + "loss": 2.561, + "step": 9718 + }, + { + "epoch": 0.7843596158502139, + "grad_norm": 0.7054178714752197, + "learning_rate": 0.00010522308542042025, + "loss": 2.623, + "step": 9719 + }, + { + "epoch": 0.7844403195867968, + "grad_norm": 0.6837820410728455, + "learning_rate": 0.00010520732000616798, + "loss": 2.5586, + "step": 9720 + }, + { + "epoch": 0.7845210233233799, + "grad_norm": 0.7339439392089844, + "learning_rate": 0.00010519155446213565, + "loss": 2.5374, + "step": 9721 + }, + { + "epoch": 0.7846017270599629, + "grad_norm": 0.7625028491020203, + "learning_rate": 0.00010517578878871624, + "loss": 2.5663, + "step": 9722 + }, + { + "epoch": 0.7846824307965459, + "grad_norm": 0.6749752759933472, + "learning_rate": 0.00010516002298630263, + "loss": 2.5744, + "step": 9723 + }, + { + "epoch": 0.7847631345331288, + "grad_norm": 0.6702882647514343, + "learning_rate": 0.00010514425705528776, + "loss": 2.6247, + "step": 9724 + }, + { + "epoch": 0.7848438382697119, + "grad_norm": 0.6641737222671509, + "learning_rate": 0.00010512849099606457, + "loss": 2.5792, + "step": 9725 + }, + { + "epoch": 0.7849245420062949, + "grad_norm": 0.7522993683815002, + "learning_rate": 0.00010511272480902597, + "loss": 2.5941, + "step": 9726 + }, + { + "epoch": 0.7850052457428779, + "grad_norm": 0.7507709860801697, + "learning_rate": 0.00010509695849456487, + "loss": 2.5312, + "step": 9727 + }, + { + "epoch": 0.7850859494794609, + "grad_norm": 0.7101978063583374, + "learning_rate": 0.0001050811920530743, + "loss": 2.5833, + "step": 9728 + }, + { + "epoch": 0.785166653216044, + "grad_norm": 0.6814672946929932, + "learning_rate": 0.0001050654254849471, + "loss": 2.5466, + "step": 9729 + }, + { + "epoch": 0.7852473569526269, + "grad_norm": 0.7250106930732727, + "learning_rate": 0.0001050496587905763, + "loss": 2.5144, + "step": 9730 + }, + { + "epoch": 0.7853280606892099, + "grad_norm": 0.7125658392906189, + "learning_rate": 0.00010503389197035474, + "loss": 2.5384, + "step": 9731 + }, + { + "epoch": 0.7854087644257929, + "grad_norm": 0.7076827883720398, + "learning_rate": 0.00010501812502467547, + "loss": 2.4879, + "step": 9732 + }, + { + "epoch": 0.785489468162376, + "grad_norm": 0.632216215133667, + "learning_rate": 0.00010500235795393141, + "loss": 2.5678, + "step": 9733 + }, + { + "epoch": 0.785570171898959, + "grad_norm": 0.7376949191093445, + "learning_rate": 0.00010498659075851551, + "loss": 2.5024, + "step": 9734 + }, + { + "epoch": 0.7856508756355419, + "grad_norm": 0.6730546951293945, + "learning_rate": 0.00010497082343882072, + "loss": 2.5001, + "step": 9735 + }, + { + "epoch": 0.7857315793721249, + "grad_norm": 0.6958187818527222, + "learning_rate": 0.00010495505599524002, + "loss": 2.538, + "step": 9736 + }, + { + "epoch": 0.785812283108708, + "grad_norm": 0.6882508397102356, + "learning_rate": 0.00010493928842816638, + "loss": 2.5247, + "step": 9737 + }, + { + "epoch": 0.785892986845291, + "grad_norm": 0.711086630821228, + "learning_rate": 0.00010492352073799276, + "loss": 2.5721, + "step": 9738 + }, + { + "epoch": 0.7859736905818739, + "grad_norm": 0.7217094898223877, + "learning_rate": 0.00010490775292511214, + "loss": 2.5827, + "step": 9739 + }, + { + "epoch": 0.7860543943184569, + "grad_norm": 0.6812087893486023, + "learning_rate": 0.0001048919849899175, + "loss": 2.532, + "step": 9740 + }, + { + "epoch": 0.7861350980550399, + "grad_norm": 0.7449110150337219, + "learning_rate": 0.00010487621693280176, + "loss": 2.5611, + "step": 9741 + }, + { + "epoch": 0.786215801791623, + "grad_norm": 0.7297104001045227, + "learning_rate": 0.00010486044875415797, + "loss": 2.5173, + "step": 9742 + }, + { + "epoch": 0.786296505528206, + "grad_norm": 0.6741474270820618, + "learning_rate": 0.0001048446804543791, + "loss": 2.5451, + "step": 9743 + }, + { + "epoch": 0.7863772092647889, + "grad_norm": 0.6450859308242798, + "learning_rate": 0.00010482891203385812, + "loss": 2.551, + "step": 9744 + }, + { + "epoch": 0.7864579130013719, + "grad_norm": 0.6867123246192932, + "learning_rate": 0.00010481314349298805, + "loss": 2.4875, + "step": 9745 + }, + { + "epoch": 0.786538616737955, + "grad_norm": 0.6951552629470825, + "learning_rate": 0.00010479737483216183, + "loss": 2.6253, + "step": 9746 + }, + { + "epoch": 0.786619320474538, + "grad_norm": 0.6786869764328003, + "learning_rate": 0.0001047816060517725, + "loss": 2.5551, + "step": 9747 + }, + { + "epoch": 0.786700024211121, + "grad_norm": 0.698957622051239, + "learning_rate": 0.00010476583715221306, + "loss": 2.5554, + "step": 9748 + }, + { + "epoch": 0.7867807279477039, + "grad_norm": 0.6407502889633179, + "learning_rate": 0.00010475006813387648, + "loss": 2.5112, + "step": 9749 + }, + { + "epoch": 0.786861431684287, + "grad_norm": 0.660418689250946, + "learning_rate": 0.00010473429899715581, + "loss": 2.5557, + "step": 9750 + }, + { + "epoch": 0.78694213542087, + "grad_norm": 0.71445631980896, + "learning_rate": 0.00010471852974244403, + "loss": 2.5169, + "step": 9751 + }, + { + "epoch": 0.787022839157453, + "grad_norm": 0.6620494723320007, + "learning_rate": 0.00010470276037013414, + "loss": 2.5517, + "step": 9752 + }, + { + "epoch": 0.787103542894036, + "grad_norm": 0.6921235918998718, + "learning_rate": 0.00010468699088061917, + "loss": 2.5246, + "step": 9753 + }, + { + "epoch": 0.787184246630619, + "grad_norm": 0.6617140769958496, + "learning_rate": 0.00010467122127429214, + "loss": 2.4941, + "step": 9754 + }, + { + "epoch": 0.787264950367202, + "grad_norm": 0.6549816727638245, + "learning_rate": 0.00010465545155154608, + "loss": 2.5189, + "step": 9755 + }, + { + "epoch": 0.787345654103785, + "grad_norm": 0.7030060887336731, + "learning_rate": 0.00010463968171277396, + "loss": 2.5058, + "step": 9756 + }, + { + "epoch": 0.787426357840368, + "grad_norm": 0.7294049859046936, + "learning_rate": 0.00010462391175836886, + "loss": 2.5166, + "step": 9757 + }, + { + "epoch": 0.787507061576951, + "grad_norm": 0.6407562494277954, + "learning_rate": 0.00010460814168872382, + "loss": 2.5391, + "step": 9758 + }, + { + "epoch": 0.787587765313534, + "grad_norm": 0.8024646639823914, + "learning_rate": 0.0001045923715042318, + "loss": 2.7034, + "step": 9759 + }, + { + "epoch": 0.787668469050117, + "grad_norm": 0.7160943150520325, + "learning_rate": 0.00010457660120528592, + "loss": 2.6016, + "step": 9760 + }, + { + "epoch": 0.7877491727867, + "grad_norm": 0.6987707018852234, + "learning_rate": 0.00010456083079227916, + "loss": 2.5428, + "step": 9761 + }, + { + "epoch": 0.7878298765232831, + "grad_norm": 0.7235369086265564, + "learning_rate": 0.00010454506026560453, + "loss": 2.517, + "step": 9762 + }, + { + "epoch": 0.787910580259866, + "grad_norm": 0.6827502846717834, + "learning_rate": 0.00010452928962565518, + "loss": 2.5777, + "step": 9763 + }, + { + "epoch": 0.787991283996449, + "grad_norm": 0.71755450963974, + "learning_rate": 0.00010451351887282408, + "loss": 2.6004, + "step": 9764 + }, + { + "epoch": 0.788071987733032, + "grad_norm": 0.6988046765327454, + "learning_rate": 0.00010449774800750427, + "loss": 2.6116, + "step": 9765 + }, + { + "epoch": 0.7881526914696151, + "grad_norm": 0.6959548592567444, + "learning_rate": 0.00010448197703008884, + "loss": 2.5856, + "step": 9766 + }, + { + "epoch": 0.7882333952061981, + "grad_norm": 0.687042772769928, + "learning_rate": 0.00010446620594097079, + "loss": 2.5167, + "step": 9767 + }, + { + "epoch": 0.788314098942781, + "grad_norm": 0.6950173377990723, + "learning_rate": 0.00010445043474054325, + "loss": 2.5157, + "step": 9768 + }, + { + "epoch": 0.788394802679364, + "grad_norm": 0.680768609046936, + "learning_rate": 0.00010443466342919926, + "loss": 2.6177, + "step": 9769 + }, + { + "epoch": 0.7884755064159471, + "grad_norm": 0.7790142893791199, + "learning_rate": 0.00010441889200733181, + "loss": 2.5761, + "step": 9770 + }, + { + "epoch": 0.7885562101525301, + "grad_norm": 0.6207798719406128, + "learning_rate": 0.00010440312047533406, + "loss": 2.5305, + "step": 9771 + }, + { + "epoch": 0.7886369138891131, + "grad_norm": 0.7143635749816895, + "learning_rate": 0.00010438734883359903, + "loss": 2.5922, + "step": 9772 + }, + { + "epoch": 0.788717617625696, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00010437157708251977, + "loss": 2.6051, + "step": 9773 + }, + { + "epoch": 0.7887983213622791, + "grad_norm": 0.6602753400802612, + "learning_rate": 0.00010435580522248942, + "loss": 2.6002, + "step": 9774 + }, + { + "epoch": 0.7888790250988621, + "grad_norm": 0.6929246783256531, + "learning_rate": 0.00010434003325390101, + "loss": 2.5798, + "step": 9775 + }, + { + "epoch": 0.7889597288354451, + "grad_norm": 0.7355811595916748, + "learning_rate": 0.00010432426117714762, + "loss": 2.5859, + "step": 9776 + }, + { + "epoch": 0.789040432572028, + "grad_norm": 0.7009611129760742, + "learning_rate": 0.00010430848899262233, + "loss": 2.5535, + "step": 9777 + }, + { + "epoch": 0.7891211363086111, + "grad_norm": 0.6699070930480957, + "learning_rate": 0.00010429271670071823, + "loss": 2.5687, + "step": 9778 + }, + { + "epoch": 0.7892018400451941, + "grad_norm": 0.6632630228996277, + "learning_rate": 0.00010427694430182844, + "loss": 2.5359, + "step": 9779 + }, + { + "epoch": 0.7892825437817771, + "grad_norm": 0.7256911993026733, + "learning_rate": 0.000104261171796346, + "loss": 2.5432, + "step": 9780 + }, + { + "epoch": 0.7893632475183601, + "grad_norm": 0.6654312610626221, + "learning_rate": 0.000104245399184664, + "loss": 2.5432, + "step": 9781 + }, + { + "epoch": 0.7894439512549432, + "grad_norm": 0.6808900237083435, + "learning_rate": 0.00010422962646717557, + "loss": 2.4951, + "step": 9782 + }, + { + "epoch": 0.7895246549915261, + "grad_norm": 0.6655945181846619, + "learning_rate": 0.00010421385364427378, + "loss": 2.5152, + "step": 9783 + }, + { + "epoch": 0.7896053587281091, + "grad_norm": 0.8399274349212646, + "learning_rate": 0.00010419808071635178, + "loss": 2.5688, + "step": 9784 + }, + { + "epoch": 0.7896860624646921, + "grad_norm": 0.6412226557731628, + "learning_rate": 0.00010418230768380262, + "loss": 2.5527, + "step": 9785 + }, + { + "epoch": 0.7897667662012752, + "grad_norm": 0.6505058407783508, + "learning_rate": 0.0001041665345470194, + "loss": 2.5768, + "step": 9786 + }, + { + "epoch": 0.7898474699378581, + "grad_norm": 0.6297653317451477, + "learning_rate": 0.00010415076130639526, + "loss": 2.5372, + "step": 9787 + }, + { + "epoch": 0.7899281736744411, + "grad_norm": 0.6524460315704346, + "learning_rate": 0.00010413498796232331, + "loss": 2.5047, + "step": 9788 + }, + { + "epoch": 0.7900088774110241, + "grad_norm": 0.6637924313545227, + "learning_rate": 0.00010411921451519662, + "loss": 2.508, + "step": 9789 + }, + { + "epoch": 0.7900895811476072, + "grad_norm": 0.6423435211181641, + "learning_rate": 0.00010410344096540836, + "loss": 2.4597, + "step": 9790 + }, + { + "epoch": 0.7901702848841902, + "grad_norm": 0.6361977458000183, + "learning_rate": 0.00010408766731335163, + "loss": 2.5921, + "step": 9791 + }, + { + "epoch": 0.7902509886207731, + "grad_norm": 0.6792182922363281, + "learning_rate": 0.00010407189355941953, + "loss": 2.5543, + "step": 9792 + }, + { + "epoch": 0.7903316923573561, + "grad_norm": 0.6998419761657715, + "learning_rate": 0.00010405611970400519, + "loss": 2.5333, + "step": 9793 + }, + { + "epoch": 0.7904123960939391, + "grad_norm": 0.6730015873908997, + "learning_rate": 0.00010404034574750174, + "loss": 2.596, + "step": 9794 + }, + { + "epoch": 0.7904930998305222, + "grad_norm": 0.7120258808135986, + "learning_rate": 0.00010402457169030235, + "loss": 2.5314, + "step": 9795 + }, + { + "epoch": 0.7905738035671052, + "grad_norm": 0.6553651690483093, + "learning_rate": 0.0001040087975328001, + "loss": 2.4973, + "step": 9796 + }, + { + "epoch": 0.7906545073036881, + "grad_norm": 0.6506681442260742, + "learning_rate": 0.00010399302327538812, + "loss": 2.588, + "step": 9797 + }, + { + "epoch": 0.7907352110402711, + "grad_norm": 0.6737257242202759, + "learning_rate": 0.00010397724891845957, + "loss": 2.5454, + "step": 9798 + }, + { + "epoch": 0.7908159147768542, + "grad_norm": 0.670120894908905, + "learning_rate": 0.00010396147446240756, + "loss": 2.4926, + "step": 9799 + }, + { + "epoch": 0.7908966185134372, + "grad_norm": 0.7028468251228333, + "learning_rate": 0.00010394569990762529, + "loss": 2.5727, + "step": 9800 + }, + { + "epoch": 0.7909773222500202, + "grad_norm": 0.7084455490112305, + "learning_rate": 0.00010392992525450584, + "loss": 2.547, + "step": 9801 + }, + { + "epoch": 0.7910580259866031, + "grad_norm": 0.732694685459137, + "learning_rate": 0.0001039141505034424, + "loss": 2.5871, + "step": 9802 + }, + { + "epoch": 0.7911387297231862, + "grad_norm": 0.7214515209197998, + "learning_rate": 0.00010389837565482807, + "loss": 2.5672, + "step": 9803 + }, + { + "epoch": 0.7912194334597692, + "grad_norm": 0.6495330333709717, + "learning_rate": 0.00010388260070905604, + "loss": 2.5266, + "step": 9804 + }, + { + "epoch": 0.7913001371963522, + "grad_norm": 0.6930941343307495, + "learning_rate": 0.00010386682566651945, + "loss": 2.5734, + "step": 9805 + }, + { + "epoch": 0.7913808409329351, + "grad_norm": 0.714214563369751, + "learning_rate": 0.00010385105052761148, + "loss": 2.4987, + "step": 9806 + }, + { + "epoch": 0.7914615446695182, + "grad_norm": 0.7525388598442078, + "learning_rate": 0.00010383527529272523, + "loss": 2.5427, + "step": 9807 + }, + { + "epoch": 0.7915422484061012, + "grad_norm": 0.6088642477989197, + "learning_rate": 0.00010381949996225389, + "loss": 2.5018, + "step": 9808 + }, + { + "epoch": 0.7916229521426842, + "grad_norm": 0.6797540187835693, + "learning_rate": 0.00010380372453659066, + "loss": 2.5235, + "step": 9809 + }, + { + "epoch": 0.7917036558792672, + "grad_norm": 0.6754054427146912, + "learning_rate": 0.00010378794901612865, + "loss": 2.5343, + "step": 9810 + }, + { + "epoch": 0.7917843596158503, + "grad_norm": 0.7375015020370483, + "learning_rate": 0.00010377217340126106, + "loss": 2.6101, + "step": 9811 + }, + { + "epoch": 0.7918650633524332, + "grad_norm": 0.6487904191017151, + "learning_rate": 0.00010375639769238103, + "loss": 2.5408, + "step": 9812 + }, + { + "epoch": 0.7919457670890162, + "grad_norm": 0.7280275821685791, + "learning_rate": 0.00010374062188988176, + "loss": 2.5503, + "step": 9813 + }, + { + "epoch": 0.7920264708255992, + "grad_norm": 0.6944922208786011, + "learning_rate": 0.00010372484599415644, + "loss": 2.5815, + "step": 9814 + }, + { + "epoch": 0.7921071745621823, + "grad_norm": 0.6970139741897583, + "learning_rate": 0.00010370907000559818, + "loss": 2.546, + "step": 9815 + }, + { + "epoch": 0.7921878782987652, + "grad_norm": 0.7338151335716248, + "learning_rate": 0.00010369329392460023, + "loss": 2.5449, + "step": 9816 + }, + { + "epoch": 0.7922685820353482, + "grad_norm": 0.7763465642929077, + "learning_rate": 0.00010367751775155574, + "loss": 2.5331, + "step": 9817 + }, + { + "epoch": 0.7923492857719312, + "grad_norm": 0.6892645955085754, + "learning_rate": 0.00010366174148685786, + "loss": 2.5617, + "step": 9818 + }, + { + "epoch": 0.7924299895085143, + "grad_norm": 0.7388250231742859, + "learning_rate": 0.00010364596513089984, + "loss": 2.5236, + "step": 9819 + }, + { + "epoch": 0.7925106932450973, + "grad_norm": 0.7035132646560669, + "learning_rate": 0.00010363018868407482, + "loss": 2.5711, + "step": 9820 + }, + { + "epoch": 0.7925913969816802, + "grad_norm": 0.7087043523788452, + "learning_rate": 0.00010361441214677603, + "loss": 2.5416, + "step": 9821 + }, + { + "epoch": 0.7926721007182632, + "grad_norm": 0.7173168063163757, + "learning_rate": 0.00010359863551939664, + "loss": 2.529, + "step": 9822 + }, + { + "epoch": 0.7927528044548463, + "grad_norm": 0.7007408738136292, + "learning_rate": 0.00010358285880232983, + "loss": 2.5287, + "step": 9823 + }, + { + "epoch": 0.7928335081914293, + "grad_norm": 0.7731965780258179, + "learning_rate": 0.0001035670819959688, + "loss": 2.5913, + "step": 9824 + }, + { + "epoch": 0.7929142119280123, + "grad_norm": 0.6625120639801025, + "learning_rate": 0.00010355130510070681, + "loss": 2.5815, + "step": 9825 + }, + { + "epoch": 0.7929949156645952, + "grad_norm": 0.6628395318984985, + "learning_rate": 0.00010353552811693699, + "loss": 2.512, + "step": 9826 + }, + { + "epoch": 0.7930756194011783, + "grad_norm": 0.6565915942192078, + "learning_rate": 0.00010351975104505256, + "loss": 2.54, + "step": 9827 + }, + { + "epoch": 0.7931563231377613, + "grad_norm": 0.6581636667251587, + "learning_rate": 0.00010350397388544672, + "loss": 2.5462, + "step": 9828 + }, + { + "epoch": 0.7932370268743443, + "grad_norm": 0.705668568611145, + "learning_rate": 0.0001034881966385127, + "loss": 2.5241, + "step": 9829 + }, + { + "epoch": 0.7933177306109273, + "grad_norm": 0.7047126293182373, + "learning_rate": 0.00010347241930464373, + "loss": 2.5275, + "step": 9830 + }, + { + "epoch": 0.7933984343475103, + "grad_norm": 0.6285849213600159, + "learning_rate": 0.00010345664188423296, + "loss": 2.518, + "step": 9831 + }, + { + "epoch": 0.7934791380840933, + "grad_norm": 0.697542130947113, + "learning_rate": 0.00010344086437767366, + "loss": 2.5219, + "step": 9832 + }, + { + "epoch": 0.7935598418206763, + "grad_norm": 0.6349283456802368, + "learning_rate": 0.00010342508678535903, + "loss": 2.5277, + "step": 9833 + }, + { + "epoch": 0.7936405455572593, + "grad_norm": 0.7084335088729858, + "learning_rate": 0.00010340930910768225, + "loss": 2.476, + "step": 9834 + }, + { + "epoch": 0.7937212492938424, + "grad_norm": 0.6714156866073608, + "learning_rate": 0.00010339353134503662, + "loss": 2.556, + "step": 9835 + }, + { + "epoch": 0.7938019530304253, + "grad_norm": 0.6687895059585571, + "learning_rate": 0.00010337775349781527, + "loss": 2.5756, + "step": 9836 + }, + { + "epoch": 0.7938826567670083, + "grad_norm": 0.669784665107727, + "learning_rate": 0.00010336197556641152, + "loss": 2.5545, + "step": 9837 + }, + { + "epoch": 0.7939633605035913, + "grad_norm": 0.6738600134849548, + "learning_rate": 0.0001033461975512185, + "loss": 2.5807, + "step": 9838 + }, + { + "epoch": 0.7940440642401744, + "grad_norm": 0.691443681716919, + "learning_rate": 0.00010333041945262953, + "loss": 2.5279, + "step": 9839 + }, + { + "epoch": 0.7941247679767574, + "grad_norm": 0.6283861398696899, + "learning_rate": 0.0001033146412710378, + "loss": 2.5355, + "step": 9840 + }, + { + "epoch": 0.7942054717133403, + "grad_norm": 0.6491204500198364, + "learning_rate": 0.00010329886300683655, + "loss": 2.5431, + "step": 9841 + }, + { + "epoch": 0.7942861754499233, + "grad_norm": 0.6673988103866577, + "learning_rate": 0.00010328308466041898, + "loss": 2.5845, + "step": 9842 + }, + { + "epoch": 0.7943668791865063, + "grad_norm": 0.6669130325317383, + "learning_rate": 0.00010326730623217837, + "loss": 2.5348, + "step": 9843 + }, + { + "epoch": 0.7944475829230894, + "grad_norm": 0.7003189921379089, + "learning_rate": 0.00010325152772250795, + "loss": 2.5779, + "step": 9844 + }, + { + "epoch": 0.7945282866596723, + "grad_norm": 0.6602177619934082, + "learning_rate": 0.00010323574913180097, + "loss": 2.5527, + "step": 9845 + }, + { + "epoch": 0.7946089903962553, + "grad_norm": 0.7053726315498352, + "learning_rate": 0.00010321997046045066, + "loss": 2.566, + "step": 9846 + }, + { + "epoch": 0.7946896941328383, + "grad_norm": 0.7428076863288879, + "learning_rate": 0.00010320419170885025, + "loss": 2.5348, + "step": 9847 + }, + { + "epoch": 0.7947703978694214, + "grad_norm": 0.7029163837432861, + "learning_rate": 0.00010318841287739303, + "loss": 2.5387, + "step": 9848 + }, + { + "epoch": 0.7948511016060044, + "grad_norm": 0.6159133911132812, + "learning_rate": 0.00010317263396647221, + "loss": 2.5408, + "step": 9849 + }, + { + "epoch": 0.7949318053425873, + "grad_norm": 0.6748857498168945, + "learning_rate": 0.00010315685497648106, + "loss": 2.5299, + "step": 9850 + }, + { + "epoch": 0.7950125090791703, + "grad_norm": 0.6281898021697998, + "learning_rate": 0.00010314107590781284, + "loss": 2.5202, + "step": 9851 + }, + { + "epoch": 0.7950932128157534, + "grad_norm": 0.6602163910865784, + "learning_rate": 0.00010312529676086078, + "loss": 2.5119, + "step": 9852 + }, + { + "epoch": 0.7951739165523364, + "grad_norm": 0.6665403246879578, + "learning_rate": 0.00010310951753601818, + "loss": 2.5913, + "step": 9853 + }, + { + "epoch": 0.7952546202889194, + "grad_norm": 0.6705873012542725, + "learning_rate": 0.00010309373823367827, + "loss": 2.6039, + "step": 9854 + }, + { + "epoch": 0.7953353240255023, + "grad_norm": 0.6571313738822937, + "learning_rate": 0.0001030779588542343, + "loss": 2.5629, + "step": 9855 + }, + { + "epoch": 0.7954160277620854, + "grad_norm": 0.6597230434417725, + "learning_rate": 0.00010306217939807956, + "loss": 2.5569, + "step": 9856 + }, + { + "epoch": 0.7954967314986684, + "grad_norm": 0.7098817229270935, + "learning_rate": 0.00010304639986560733, + "loss": 2.4736, + "step": 9857 + }, + { + "epoch": 0.7955774352352514, + "grad_norm": 0.628663957118988, + "learning_rate": 0.00010303062025721082, + "loss": 2.5241, + "step": 9858 + }, + { + "epoch": 0.7956581389718343, + "grad_norm": 0.630843460559845, + "learning_rate": 0.00010301484057328333, + "loss": 2.5604, + "step": 9859 + }, + { + "epoch": 0.7957388427084174, + "grad_norm": 0.7457596659660339, + "learning_rate": 0.00010299906081421813, + "loss": 2.5675, + "step": 9860 + }, + { + "epoch": 0.7958195464450004, + "grad_norm": 0.6566091775894165, + "learning_rate": 0.00010298328098040851, + "loss": 2.4918, + "step": 9861 + }, + { + "epoch": 0.7959002501815834, + "grad_norm": 0.657357931137085, + "learning_rate": 0.00010296750107224773, + "loss": 2.5268, + "step": 9862 + }, + { + "epoch": 0.7959809539181664, + "grad_norm": 0.7021927833557129, + "learning_rate": 0.00010295172109012905, + "loss": 2.528, + "step": 9863 + }, + { + "epoch": 0.7960616576547495, + "grad_norm": 0.662053108215332, + "learning_rate": 0.00010293594103444578, + "loss": 2.5483, + "step": 9864 + }, + { + "epoch": 0.7961423613913324, + "grad_norm": 0.776407778263092, + "learning_rate": 0.00010292016090559118, + "loss": 2.6089, + "step": 9865 + }, + { + "epoch": 0.7962230651279154, + "grad_norm": 0.6499512791633606, + "learning_rate": 0.00010290438070395854, + "loss": 2.5609, + "step": 9866 + }, + { + "epoch": 0.7963037688644984, + "grad_norm": 0.6802246570587158, + "learning_rate": 0.00010288860042994113, + "loss": 2.5217, + "step": 9867 + }, + { + "epoch": 0.7963844726010815, + "grad_norm": 0.6371235847473145, + "learning_rate": 0.00010287282008393224, + "loss": 2.4783, + "step": 9868 + }, + { + "epoch": 0.7964651763376644, + "grad_norm": 0.7070169448852539, + "learning_rate": 0.00010285703966632518, + "loss": 2.5006, + "step": 9869 + }, + { + "epoch": 0.7965458800742474, + "grad_norm": 0.657738208770752, + "learning_rate": 0.00010284125917751323, + "loss": 2.551, + "step": 9870 + }, + { + "epoch": 0.7966265838108304, + "grad_norm": 0.7936853170394897, + "learning_rate": 0.00010282547861788964, + "loss": 2.574, + "step": 9871 + }, + { + "epoch": 0.7967072875474135, + "grad_norm": 0.675715982913971, + "learning_rate": 0.00010280969798784779, + "loss": 2.5288, + "step": 9872 + }, + { + "epoch": 0.7967879912839965, + "grad_norm": 0.6980394124984741, + "learning_rate": 0.00010279391728778092, + "loss": 2.5437, + "step": 9873 + }, + { + "epoch": 0.7968686950205794, + "grad_norm": 0.6580469608306885, + "learning_rate": 0.00010277813651808226, + "loss": 2.5574, + "step": 9874 + }, + { + "epoch": 0.7969493987571624, + "grad_norm": 0.6960238218307495, + "learning_rate": 0.00010276235567914522, + "loss": 2.5477, + "step": 9875 + }, + { + "epoch": 0.7970301024937455, + "grad_norm": 0.704140841960907, + "learning_rate": 0.00010274657477136304, + "loss": 2.5099, + "step": 9876 + }, + { + "epoch": 0.7971108062303285, + "grad_norm": 0.7238990068435669, + "learning_rate": 0.00010273079379512906, + "loss": 2.6182, + "step": 9877 + }, + { + "epoch": 0.7971915099669115, + "grad_norm": 0.6527700424194336, + "learning_rate": 0.00010271501275083657, + "loss": 2.5148, + "step": 9878 + }, + { + "epoch": 0.7972722137034944, + "grad_norm": 0.6665365695953369, + "learning_rate": 0.00010269923163887884, + "loss": 2.5624, + "step": 9879 + }, + { + "epoch": 0.7973529174400775, + "grad_norm": 0.7304019927978516, + "learning_rate": 0.0001026834504596492, + "loss": 2.5537, + "step": 9880 + }, + { + "epoch": 0.7974336211766605, + "grad_norm": 0.6645877957344055, + "learning_rate": 0.00010266766921354099, + "loss": 2.5381, + "step": 9881 + }, + { + "epoch": 0.7975143249132435, + "grad_norm": 0.6817314624786377, + "learning_rate": 0.00010265188790094744, + "loss": 2.5399, + "step": 9882 + }, + { + "epoch": 0.7975950286498265, + "grad_norm": 0.7477232217788696, + "learning_rate": 0.00010263610652226194, + "loss": 2.6461, + "step": 9883 + }, + { + "epoch": 0.7976757323864095, + "grad_norm": 0.7087170481681824, + "learning_rate": 0.00010262032507787777, + "loss": 2.5469, + "step": 9884 + }, + { + "epoch": 0.7977564361229925, + "grad_norm": 0.7093435525894165, + "learning_rate": 0.00010260454356818825, + "loss": 2.5606, + "step": 9885 + }, + { + "epoch": 0.7978371398595755, + "grad_norm": 0.6662636399269104, + "learning_rate": 0.00010258876199358672, + "loss": 2.5415, + "step": 9886 + }, + { + "epoch": 0.7979178435961585, + "grad_norm": 0.6829736232757568, + "learning_rate": 0.00010257298035446644, + "loss": 2.5618, + "step": 9887 + }, + { + "epoch": 0.7979985473327416, + "grad_norm": 0.6872264742851257, + "learning_rate": 0.00010255719865122077, + "loss": 2.5629, + "step": 9888 + }, + { + "epoch": 0.7980792510693245, + "grad_norm": 0.6988633871078491, + "learning_rate": 0.00010254141688424303, + "loss": 2.5191, + "step": 9889 + }, + { + "epoch": 0.7981599548059075, + "grad_norm": 0.6787285804748535, + "learning_rate": 0.00010252563505392654, + "loss": 2.5003, + "step": 9890 + }, + { + "epoch": 0.7982406585424905, + "grad_norm": 0.6703466773033142, + "learning_rate": 0.00010250985316066461, + "loss": 2.5442, + "step": 9891 + }, + { + "epoch": 0.7983213622790736, + "grad_norm": 0.6463642120361328, + "learning_rate": 0.0001024940712048506, + "loss": 2.5236, + "step": 9892 + }, + { + "epoch": 0.7984020660156566, + "grad_norm": 0.6835207939147949, + "learning_rate": 0.0001024782891868778, + "loss": 2.5094, + "step": 9893 + }, + { + "epoch": 0.7984827697522395, + "grad_norm": 0.6621001958847046, + "learning_rate": 0.00010246250710713956, + "loss": 2.5456, + "step": 9894 + }, + { + "epoch": 0.7985634734888225, + "grad_norm": 0.6675469875335693, + "learning_rate": 0.0001024467249660292, + "loss": 2.5312, + "step": 9895 + }, + { + "epoch": 0.7986441772254055, + "grad_norm": 0.7357796430587769, + "learning_rate": 0.00010243094276394007, + "loss": 2.5374, + "step": 9896 + }, + { + "epoch": 0.7987248809619886, + "grad_norm": 0.7005879878997803, + "learning_rate": 0.00010241516050126549, + "loss": 2.5667, + "step": 9897 + }, + { + "epoch": 0.7988055846985715, + "grad_norm": 0.669870913028717, + "learning_rate": 0.0001023993781783988, + "loss": 2.533, + "step": 9898 + }, + { + "epoch": 0.7988862884351545, + "grad_norm": 0.7584091424942017, + "learning_rate": 0.00010238359579573333, + "loss": 2.5995, + "step": 9899 + }, + { + "epoch": 0.7989669921717375, + "grad_norm": 0.6931570172309875, + "learning_rate": 0.00010236781335366239, + "loss": 2.5506, + "step": 9900 + }, + { + "epoch": 0.7990476959083206, + "grad_norm": 0.6810948848724365, + "learning_rate": 0.0001023520308525794, + "loss": 2.5048, + "step": 9901 + }, + { + "epoch": 0.7991283996449036, + "grad_norm": 0.6857194900512695, + "learning_rate": 0.00010233624829287765, + "loss": 2.5559, + "step": 9902 + }, + { + "epoch": 0.7992091033814865, + "grad_norm": 0.6685707569122314, + "learning_rate": 0.00010232046567495046, + "loss": 2.5661, + "step": 9903 + }, + { + "epoch": 0.7992898071180695, + "grad_norm": 0.6626694202423096, + "learning_rate": 0.00010230468299919121, + "loss": 2.6293, + "step": 9904 + }, + { + "epoch": 0.7993705108546526, + "grad_norm": 0.6407302021980286, + "learning_rate": 0.00010228890026599323, + "loss": 2.5552, + "step": 9905 + }, + { + "epoch": 0.7994512145912356, + "grad_norm": 0.762235701084137, + "learning_rate": 0.00010227311747574986, + "loss": 2.4904, + "step": 9906 + }, + { + "epoch": 0.7995319183278186, + "grad_norm": 0.703507661819458, + "learning_rate": 0.0001022573346288545, + "loss": 2.5684, + "step": 9907 + }, + { + "epoch": 0.7996126220644015, + "grad_norm": 0.82541823387146, + "learning_rate": 0.00010224155172570043, + "loss": 2.521, + "step": 9908 + }, + { + "epoch": 0.7996933258009846, + "grad_norm": 0.6836804747581482, + "learning_rate": 0.00010222576876668104, + "loss": 2.5364, + "step": 9909 + }, + { + "epoch": 0.7997740295375676, + "grad_norm": 0.7388977408409119, + "learning_rate": 0.00010220998575218966, + "loss": 2.5724, + "step": 9910 + }, + { + "epoch": 0.7998547332741506, + "grad_norm": 0.7380896806716919, + "learning_rate": 0.00010219420268261966, + "loss": 2.5918, + "step": 9911 + }, + { + "epoch": 0.7999354370107336, + "grad_norm": 0.7303522825241089, + "learning_rate": 0.00010217841955836442, + "loss": 2.5432, + "step": 9912 + }, + { + "epoch": 0.8000161407473166, + "grad_norm": 0.6859301924705505, + "learning_rate": 0.00010216263637981727, + "loss": 2.5734, + "step": 9913 + }, + { + "epoch": 0.8000968444838996, + "grad_norm": 0.731910228729248, + "learning_rate": 0.00010214685314737154, + "loss": 2.5227, + "step": 9914 + }, + { + "epoch": 0.8001775482204826, + "grad_norm": 0.7105006575584412, + "learning_rate": 0.00010213106986142062, + "loss": 2.5335, + "step": 9915 + }, + { + "epoch": 0.8002582519570656, + "grad_norm": 0.7337056994438171, + "learning_rate": 0.00010211528652235786, + "loss": 2.6204, + "step": 9916 + }, + { + "epoch": 0.8003389556936487, + "grad_norm": 0.7350614666938782, + "learning_rate": 0.00010209950313057668, + "loss": 2.5264, + "step": 9917 + }, + { + "epoch": 0.8004196594302316, + "grad_norm": 0.6411921977996826, + "learning_rate": 0.00010208371968647036, + "loss": 2.4642, + "step": 9918 + }, + { + "epoch": 0.8005003631668146, + "grad_norm": 0.7601611018180847, + "learning_rate": 0.00010206793619043229, + "loss": 2.6249, + "step": 9919 + }, + { + "epoch": 0.8005810669033976, + "grad_norm": 0.7086012363433838, + "learning_rate": 0.00010205215264285585, + "loss": 2.5508, + "step": 9920 + }, + { + "epoch": 0.8006617706399807, + "grad_norm": 0.7267128825187683, + "learning_rate": 0.00010203636904413443, + "loss": 2.5109, + "step": 9921 + }, + { + "epoch": 0.8007424743765637, + "grad_norm": 0.7606067657470703, + "learning_rate": 0.00010202058539466132, + "loss": 2.5172, + "step": 9922 + }, + { + "epoch": 0.8008231781131466, + "grad_norm": 0.7610498666763306, + "learning_rate": 0.00010200480169483, + "loss": 2.5085, + "step": 9923 + }, + { + "epoch": 0.8009038818497296, + "grad_norm": 0.7604225873947144, + "learning_rate": 0.00010198901794503373, + "loss": 2.5615, + "step": 9924 + }, + { + "epoch": 0.8009845855863127, + "grad_norm": 0.739532470703125, + "learning_rate": 0.00010197323414566596, + "loss": 2.5574, + "step": 9925 + }, + { + "epoch": 0.8010652893228957, + "grad_norm": 0.6913303136825562, + "learning_rate": 0.00010195745029712003, + "loss": 2.5403, + "step": 9926 + }, + { + "epoch": 0.8011459930594786, + "grad_norm": 0.6963592767715454, + "learning_rate": 0.0001019416663997893, + "loss": 2.5615, + "step": 9927 + }, + { + "epoch": 0.8012266967960616, + "grad_norm": 0.681481122970581, + "learning_rate": 0.0001019258824540672, + "loss": 2.5125, + "step": 9928 + }, + { + "epoch": 0.8013074005326447, + "grad_norm": 0.7192744016647339, + "learning_rate": 0.00010191009846034709, + "loss": 2.5952, + "step": 9929 + }, + { + "epoch": 0.8013881042692277, + "grad_norm": 0.7030046582221985, + "learning_rate": 0.00010189431441902228, + "loss": 2.5445, + "step": 9930 + }, + { + "epoch": 0.8014688080058107, + "grad_norm": 0.6180598139762878, + "learning_rate": 0.00010187853033048622, + "loss": 2.4902, + "step": 9931 + }, + { + "epoch": 0.8015495117423936, + "grad_norm": 0.7479971051216125, + "learning_rate": 0.0001018627461951323, + "loss": 2.5703, + "step": 9932 + }, + { + "epoch": 0.8016302154789767, + "grad_norm": 0.7339857220649719, + "learning_rate": 0.00010184696201335387, + "loss": 2.5744, + "step": 9933 + }, + { + "epoch": 0.8017109192155597, + "grad_norm": 0.6741397380828857, + "learning_rate": 0.00010183117778554432, + "loss": 2.5777, + "step": 9934 + }, + { + "epoch": 0.8017916229521427, + "grad_norm": 0.6731706857681274, + "learning_rate": 0.00010181539351209699, + "loss": 2.5438, + "step": 9935 + }, + { + "epoch": 0.8018723266887257, + "grad_norm": 0.6929418444633484, + "learning_rate": 0.00010179960919340535, + "loss": 2.5308, + "step": 9936 + }, + { + "epoch": 0.8019530304253087, + "grad_norm": 0.7383175492286682, + "learning_rate": 0.00010178382482986271, + "loss": 2.5623, + "step": 9937 + }, + { + "epoch": 0.8020337341618917, + "grad_norm": 0.6872193217277527, + "learning_rate": 0.00010176804042186252, + "loss": 2.5271, + "step": 9938 + }, + { + "epoch": 0.8021144378984747, + "grad_norm": 0.7354295253753662, + "learning_rate": 0.00010175225596979816, + "loss": 2.5122, + "step": 9939 + }, + { + "epoch": 0.8021951416350577, + "grad_norm": 0.7589237689971924, + "learning_rate": 0.00010173647147406297, + "loss": 2.5529, + "step": 9940 + }, + { + "epoch": 0.8022758453716408, + "grad_norm": 0.6998353004455566, + "learning_rate": 0.00010172068693505037, + "loss": 2.4683, + "step": 9941 + }, + { + "epoch": 0.8023565491082237, + "grad_norm": 0.6816055178642273, + "learning_rate": 0.00010170490235315377, + "loss": 2.567, + "step": 9942 + }, + { + "epoch": 0.8024372528448067, + "grad_norm": 0.7188318371772766, + "learning_rate": 0.00010168911772876652, + "loss": 2.5631, + "step": 9943 + }, + { + "epoch": 0.8025179565813897, + "grad_norm": 0.6925922632217407, + "learning_rate": 0.00010167333306228209, + "loss": 2.4872, + "step": 9944 + }, + { + "epoch": 0.8025986603179727, + "grad_norm": 0.7081493735313416, + "learning_rate": 0.00010165754835409377, + "loss": 2.5482, + "step": 9945 + }, + { + "epoch": 0.8026793640545558, + "grad_norm": 0.6838935613632202, + "learning_rate": 0.00010164176360459505, + "loss": 2.541, + "step": 9946 + }, + { + "epoch": 0.8027600677911387, + "grad_norm": 0.6959214210510254, + "learning_rate": 0.00010162597881417928, + "loss": 2.4574, + "step": 9947 + }, + { + "epoch": 0.8028407715277217, + "grad_norm": 0.693004310131073, + "learning_rate": 0.00010161019398323986, + "loss": 2.5553, + "step": 9948 + }, + { + "epoch": 0.8029214752643047, + "grad_norm": 0.6683690547943115, + "learning_rate": 0.00010159440911217022, + "loss": 2.5501, + "step": 9949 + }, + { + "epoch": 0.8030021790008878, + "grad_norm": 0.6797001361846924, + "learning_rate": 0.0001015786242013637, + "loss": 2.5731, + "step": 9950 + }, + { + "epoch": 0.8030828827374707, + "grad_norm": 0.6621012091636658, + "learning_rate": 0.00010156283925121375, + "loss": 2.5278, + "step": 9951 + }, + { + "epoch": 0.8031635864740537, + "grad_norm": 0.7024650573730469, + "learning_rate": 0.00010154705426211377, + "loss": 2.5939, + "step": 9952 + }, + { + "epoch": 0.8032442902106367, + "grad_norm": 0.6756548285484314, + "learning_rate": 0.00010153126923445714, + "loss": 2.5797, + "step": 9953 + }, + { + "epoch": 0.8033249939472198, + "grad_norm": 0.6560662984848022, + "learning_rate": 0.00010151548416863732, + "loss": 2.5358, + "step": 9954 + }, + { + "epoch": 0.8034056976838028, + "grad_norm": 0.7172456979751587, + "learning_rate": 0.00010149969906504766, + "loss": 2.5054, + "step": 9955 + }, + { + "epoch": 0.8034864014203857, + "grad_norm": 0.6379461288452148, + "learning_rate": 0.00010148391392408152, + "loss": 2.5341, + "step": 9956 + }, + { + "epoch": 0.8035671051569687, + "grad_norm": 0.6553892493247986, + "learning_rate": 0.00010146812874613243, + "loss": 2.5618, + "step": 9957 + }, + { + "epoch": 0.8036478088935518, + "grad_norm": 0.6940072178840637, + "learning_rate": 0.00010145234353159372, + "loss": 2.5686, + "step": 9958 + }, + { + "epoch": 0.8037285126301348, + "grad_norm": 0.6641896963119507, + "learning_rate": 0.00010143655828085878, + "loss": 2.5188, + "step": 9959 + }, + { + "epoch": 0.8038092163667178, + "grad_norm": 0.6622887253761292, + "learning_rate": 0.00010142077299432111, + "loss": 2.54, + "step": 9960 + }, + { + "epoch": 0.8038899201033007, + "grad_norm": 0.7216808795928955, + "learning_rate": 0.000101404987672374, + "loss": 2.5775, + "step": 9961 + }, + { + "epoch": 0.8039706238398838, + "grad_norm": 0.6544952988624573, + "learning_rate": 0.00010138920231541095, + "loss": 2.6066, + "step": 9962 + }, + { + "epoch": 0.8040513275764668, + "grad_norm": 0.6869354248046875, + "learning_rate": 0.00010137341692382539, + "loss": 2.5157, + "step": 9963 + }, + { + "epoch": 0.8041320313130498, + "grad_norm": 0.6731898784637451, + "learning_rate": 0.00010135763149801063, + "loss": 2.4369, + "step": 9964 + }, + { + "epoch": 0.8042127350496328, + "grad_norm": 0.6943373084068298, + "learning_rate": 0.00010134184603836017, + "loss": 2.5529, + "step": 9965 + }, + { + "epoch": 0.8042934387862158, + "grad_norm": 0.729928195476532, + "learning_rate": 0.00010132606054526739, + "loss": 2.5814, + "step": 9966 + }, + { + "epoch": 0.8043741425227988, + "grad_norm": 0.6491130590438843, + "learning_rate": 0.00010131027501912571, + "loss": 2.5246, + "step": 9967 + }, + { + "epoch": 0.8044548462593818, + "grad_norm": 0.747756838798523, + "learning_rate": 0.00010129448946032857, + "loss": 2.513, + "step": 9968 + }, + { + "epoch": 0.8045355499959648, + "grad_norm": 0.6449645757675171, + "learning_rate": 0.00010127870386926935, + "loss": 2.5232, + "step": 9969 + }, + { + "epoch": 0.8046162537325479, + "grad_norm": 0.6425037980079651, + "learning_rate": 0.0001012629182463415, + "loss": 2.5065, + "step": 9970 + }, + { + "epoch": 0.8046969574691308, + "grad_norm": 0.7340624332427979, + "learning_rate": 0.00010124713259193843, + "loss": 2.5325, + "step": 9971 + }, + { + "epoch": 0.8047776612057138, + "grad_norm": 0.7308940291404724, + "learning_rate": 0.00010123134690645352, + "loss": 2.5717, + "step": 9972 + }, + { + "epoch": 0.8048583649422968, + "grad_norm": 0.7128338813781738, + "learning_rate": 0.00010121556119028028, + "loss": 2.5548, + "step": 9973 + }, + { + "epoch": 0.8049390686788799, + "grad_norm": 0.7027677893638611, + "learning_rate": 0.00010119977544381207, + "loss": 2.5311, + "step": 9974 + }, + { + "epoch": 0.8050197724154629, + "grad_norm": 0.7022054195404053, + "learning_rate": 0.00010118398966744229, + "loss": 2.5177, + "step": 9975 + }, + { + "epoch": 0.8051004761520458, + "grad_norm": 0.7382696270942688, + "learning_rate": 0.00010116820386156441, + "loss": 2.532, + "step": 9976 + }, + { + "epoch": 0.8051811798886288, + "grad_norm": 0.6968613862991333, + "learning_rate": 0.00010115241802657181, + "loss": 2.536, + "step": 9977 + }, + { + "epoch": 0.8052618836252119, + "grad_norm": 0.8277899026870728, + "learning_rate": 0.00010113663216285798, + "loss": 2.5963, + "step": 9978 + }, + { + "epoch": 0.8053425873617949, + "grad_norm": 0.677707314491272, + "learning_rate": 0.00010112084627081629, + "loss": 2.5041, + "step": 9979 + }, + { + "epoch": 0.8054232910983778, + "grad_norm": 0.6943314075469971, + "learning_rate": 0.00010110506035084017, + "loss": 2.4776, + "step": 9980 + }, + { + "epoch": 0.8055039948349608, + "grad_norm": 0.6948177218437195, + "learning_rate": 0.00010108927440332306, + "loss": 2.5306, + "step": 9981 + }, + { + "epoch": 0.8055846985715439, + "grad_norm": 0.6873918771743774, + "learning_rate": 0.0001010734884286584, + "loss": 2.5783, + "step": 9982 + }, + { + "epoch": 0.8056654023081269, + "grad_norm": 0.6370649933815002, + "learning_rate": 0.00010105770242723958, + "loss": 2.5584, + "step": 9983 + }, + { + "epoch": 0.8057461060447099, + "grad_norm": 0.7594422698020935, + "learning_rate": 0.00010104191639946008, + "loss": 2.543, + "step": 9984 + }, + { + "epoch": 0.8058268097812928, + "grad_norm": 0.697380542755127, + "learning_rate": 0.00010102613034571327, + "loss": 2.5295, + "step": 9985 + }, + { + "epoch": 0.8059075135178759, + "grad_norm": 0.6597251892089844, + "learning_rate": 0.00010101034426639264, + "loss": 2.5917, + "step": 9986 + }, + { + "epoch": 0.8059882172544589, + "grad_norm": 0.6583479046821594, + "learning_rate": 0.00010099455816189156, + "loss": 2.6206, + "step": 9987 + }, + { + "epoch": 0.8060689209910419, + "grad_norm": 0.6603943705558777, + "learning_rate": 0.00010097877203260349, + "loss": 2.5223, + "step": 9988 + }, + { + "epoch": 0.8061496247276249, + "grad_norm": 0.716454267501831, + "learning_rate": 0.00010096298587892188, + "loss": 2.5572, + "step": 9989 + }, + { + "epoch": 0.806230328464208, + "grad_norm": 0.6511488556861877, + "learning_rate": 0.00010094719970124016, + "loss": 2.5815, + "step": 9990 + }, + { + "epoch": 0.8063110322007909, + "grad_norm": 0.6969261169433594, + "learning_rate": 0.00010093141349995173, + "loss": 2.5902, + "step": 9991 + }, + { + "epoch": 0.8063917359373739, + "grad_norm": 0.7012695074081421, + "learning_rate": 0.00010091562727545001, + "loss": 2.5134, + "step": 9992 + }, + { + "epoch": 0.8064724396739569, + "grad_norm": 0.6368406414985657, + "learning_rate": 0.00010089984102812848, + "loss": 2.568, + "step": 9993 + }, + { + "epoch": 0.80655314341054, + "grad_norm": 0.6552153825759888, + "learning_rate": 0.00010088405475838059, + "loss": 2.5101, + "step": 9994 + }, + { + "epoch": 0.8066338471471229, + "grad_norm": 0.6949633359909058, + "learning_rate": 0.00010086826846659974, + "loss": 2.5427, + "step": 9995 + }, + { + "epoch": 0.8067145508837059, + "grad_norm": 0.6593093872070312, + "learning_rate": 0.00010085248215317935, + "loss": 2.5551, + "step": 9996 + }, + { + "epoch": 0.8067952546202889, + "grad_norm": 0.6963745355606079, + "learning_rate": 0.00010083669581851287, + "loss": 2.4956, + "step": 9997 + }, + { + "epoch": 0.8068759583568719, + "grad_norm": 0.7093523144721985, + "learning_rate": 0.00010082090946299377, + "loss": 2.5876, + "step": 9998 + }, + { + "epoch": 0.806956662093455, + "grad_norm": 0.6796671152114868, + "learning_rate": 0.00010080512308701544, + "loss": 2.5302, + "step": 9999 + }, + { + "epoch": 0.8070373658300379, + "grad_norm": 0.7170542478561401, + "learning_rate": 0.00010078933669097135, + "loss": 2.5886, + "step": 10000 + }, + { + "epoch": 0.8070373658300379, + "eval_loss": 2.4734926223754883, + "eval_runtime": 788.2594, + "eval_samples_per_second": 3.324, + "eval_steps_per_second": 0.554, + "step": 10000 + }, + { + "epoch": 0.8071180695666209, + "grad_norm": 0.6566126346588135, + "learning_rate": 0.0001007735502752549, + "loss": 2.4441, + "step": 10001 + }, + { + "epoch": 0.8071987733032039, + "grad_norm": 0.6739515662193298, + "learning_rate": 0.00010075776384025957, + "loss": 2.5767, + "step": 10002 + }, + { + "epoch": 0.807279477039787, + "grad_norm": 0.6334208846092224, + "learning_rate": 0.00010074197738637881, + "loss": 2.5321, + "step": 10003 + }, + { + "epoch": 0.80736018077637, + "grad_norm": 0.6764520406723022, + "learning_rate": 0.000100726190914006, + "loss": 2.5144, + "step": 10004 + }, + { + "epoch": 0.8074408845129529, + "grad_norm": 0.7090082764625549, + "learning_rate": 0.00010071040442353464, + "loss": 2.5626, + "step": 10005 + }, + { + "epoch": 0.8075215882495359, + "grad_norm": 0.6915304064750671, + "learning_rate": 0.00010069461791535814, + "loss": 2.5261, + "step": 10006 + }, + { + "epoch": 0.807602291986119, + "grad_norm": 0.6685747504234314, + "learning_rate": 0.00010067883138986991, + "loss": 2.492, + "step": 10007 + }, + { + "epoch": 0.807682995722702, + "grad_norm": 0.7179074883460999, + "learning_rate": 0.00010066304484746347, + "loss": 2.4601, + "step": 10008 + }, + { + "epoch": 0.807763699459285, + "grad_norm": 0.7032761573791504, + "learning_rate": 0.00010064725828853219, + "loss": 2.578, + "step": 10009 + }, + { + "epoch": 0.8078444031958679, + "grad_norm": 0.710322916507721, + "learning_rate": 0.00010063147171346959, + "loss": 2.5514, + "step": 10010 + }, + { + "epoch": 0.807925106932451, + "grad_norm": 0.6552841067314148, + "learning_rate": 0.00010061568512266903, + "loss": 2.5474, + "step": 10011 + }, + { + "epoch": 0.808005810669034, + "grad_norm": 0.6862452626228333, + "learning_rate": 0.00010059989851652398, + "loss": 2.5772, + "step": 10012 + }, + { + "epoch": 0.808086514405617, + "grad_norm": 0.7123851180076599, + "learning_rate": 0.00010058411189542788, + "loss": 2.4936, + "step": 10013 + }, + { + "epoch": 0.8081672181421999, + "grad_norm": 0.6889944672584534, + "learning_rate": 0.00010056832525977422, + "loss": 2.5041, + "step": 10014 + }, + { + "epoch": 0.808247921878783, + "grad_norm": 0.6986924409866333, + "learning_rate": 0.0001005525386099564, + "loss": 2.5591, + "step": 10015 + }, + { + "epoch": 0.808328625615366, + "grad_norm": 0.6935306787490845, + "learning_rate": 0.00010053675194636787, + "loss": 2.5423, + "step": 10016 + }, + { + "epoch": 0.808409329351949, + "grad_norm": 0.6751969456672668, + "learning_rate": 0.00010052096526940207, + "loss": 2.5666, + "step": 10017 + }, + { + "epoch": 0.808490033088532, + "grad_norm": 0.676909327507019, + "learning_rate": 0.00010050517857945243, + "loss": 2.5394, + "step": 10018 + }, + { + "epoch": 0.808570736825115, + "grad_norm": 0.7439377307891846, + "learning_rate": 0.00010048939187691246, + "loss": 2.5011, + "step": 10019 + }, + { + "epoch": 0.808651440561698, + "grad_norm": 0.6594791412353516, + "learning_rate": 0.00010047360516217554, + "loss": 2.5159, + "step": 10020 + }, + { + "epoch": 0.808732144298281, + "grad_norm": 0.7013304233551025, + "learning_rate": 0.00010045781843563517, + "loss": 2.5439, + "step": 10021 + }, + { + "epoch": 0.808812848034864, + "grad_norm": 0.7537491917610168, + "learning_rate": 0.00010044203169768476, + "loss": 2.5837, + "step": 10022 + }, + { + "epoch": 0.8088935517714471, + "grad_norm": 0.7273866534233093, + "learning_rate": 0.00010042624494871773, + "loss": 2.5546, + "step": 10023 + }, + { + "epoch": 0.80897425550803, + "grad_norm": 0.6716369986534119, + "learning_rate": 0.0001004104581891276, + "loss": 2.5264, + "step": 10024 + }, + { + "epoch": 0.809054959244613, + "grad_norm": 0.7544769644737244, + "learning_rate": 0.00010039467141930777, + "loss": 2.5502, + "step": 10025 + }, + { + "epoch": 0.809135662981196, + "grad_norm": 0.8713179230690002, + "learning_rate": 0.0001003788846396517, + "loss": 2.5178, + "step": 10026 + }, + { + "epoch": 0.8092163667177791, + "grad_norm": 0.6704887747764587, + "learning_rate": 0.00010036309785055283, + "loss": 2.5136, + "step": 10027 + }, + { + "epoch": 0.809297070454362, + "grad_norm": 0.7308552861213684, + "learning_rate": 0.00010034731105240458, + "loss": 2.4781, + "step": 10028 + }, + { + "epoch": 0.809377774190945, + "grad_norm": 0.7214144468307495, + "learning_rate": 0.00010033152424560049, + "loss": 2.5946, + "step": 10029 + }, + { + "epoch": 0.809458477927528, + "grad_norm": 0.6946821808815002, + "learning_rate": 0.00010031573743053393, + "loss": 2.4937, + "step": 10030 + }, + { + "epoch": 0.8095391816641111, + "grad_norm": 0.7348416447639465, + "learning_rate": 0.00010029995060759833, + "loss": 2.5959, + "step": 10031 + }, + { + "epoch": 0.8096198854006941, + "grad_norm": 0.7482579350471497, + "learning_rate": 0.00010028416377718721, + "loss": 2.6, + "step": 10032 + }, + { + "epoch": 0.809700589137277, + "grad_norm": 0.7114939093589783, + "learning_rate": 0.00010026837693969397, + "loss": 2.5376, + "step": 10033 + }, + { + "epoch": 0.80978129287386, + "grad_norm": 0.6559228897094727, + "learning_rate": 0.00010025259009551209, + "loss": 2.4961, + "step": 10034 + }, + { + "epoch": 0.8098619966104431, + "grad_norm": 0.7494906187057495, + "learning_rate": 0.00010023680324503501, + "loss": 2.5723, + "step": 10035 + }, + { + "epoch": 0.8099427003470261, + "grad_norm": 0.7207093834877014, + "learning_rate": 0.00010022101638865618, + "loss": 2.5523, + "step": 10036 + }, + { + "epoch": 0.8100234040836091, + "grad_norm": 0.6730504035949707, + "learning_rate": 0.00010020522952676903, + "loss": 2.5135, + "step": 10037 + }, + { + "epoch": 0.810104107820192, + "grad_norm": 0.6805168390274048, + "learning_rate": 0.000100189442659767, + "loss": 2.5598, + "step": 10038 + }, + { + "epoch": 0.8101848115567751, + "grad_norm": 0.6639137268066406, + "learning_rate": 0.00010017365578804358, + "loss": 2.5152, + "step": 10039 + }, + { + "epoch": 0.8102655152933581, + "grad_norm": 0.6604194641113281, + "learning_rate": 0.00010015786891199221, + "loss": 2.5302, + "step": 10040 + }, + { + "epoch": 0.8103462190299411, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.00010014208203200634, + "loss": 2.5437, + "step": 10041 + }, + { + "epoch": 0.8104269227665241, + "grad_norm": 0.7404079437255859, + "learning_rate": 0.00010012629514847942, + "loss": 2.6559, + "step": 10042 + }, + { + "epoch": 0.8105076265031071, + "grad_norm": 0.694006085395813, + "learning_rate": 0.00010011050826180488, + "loss": 2.5571, + "step": 10043 + }, + { + "epoch": 0.8105883302396901, + "grad_norm": 0.7007058262825012, + "learning_rate": 0.00010009472137237616, + "loss": 2.5639, + "step": 10044 + }, + { + "epoch": 0.8106690339762731, + "grad_norm": 0.7331913113594055, + "learning_rate": 0.00010007893448058678, + "loss": 2.5499, + "step": 10045 + }, + { + "epoch": 0.8107497377128561, + "grad_norm": 0.7636487483978271, + "learning_rate": 0.00010006314758683015, + "loss": 2.6068, + "step": 10046 + }, + { + "epoch": 0.810830441449439, + "grad_norm": 0.6505223512649536, + "learning_rate": 0.0001000473606914997, + "loss": 2.5313, + "step": 10047 + }, + { + "epoch": 0.8109111451860221, + "grad_norm": 0.6425966620445251, + "learning_rate": 0.00010003157379498886, + "loss": 2.5998, + "step": 10048 + }, + { + "epoch": 0.8109918489226051, + "grad_norm": 0.7163281440734863, + "learning_rate": 0.00010001578689769116, + "loss": 2.5493, + "step": 10049 + }, + { + "epoch": 0.8110725526591881, + "grad_norm": 0.7345306873321533, + "learning_rate": 0.0001, + "loss": 2.5609, + "step": 10050 + }, + { + "epoch": 0.8111532563957711, + "grad_norm": 0.6808427572250366, + "learning_rate": 9.998421310230884e-05, + "loss": 2.4823, + "step": 10051 + }, + { + "epoch": 0.8112339601323542, + "grad_norm": 0.7456082105636597, + "learning_rate": 9.996842620501115e-05, + "loss": 2.4782, + "step": 10052 + }, + { + "epoch": 0.8113146638689371, + "grad_norm": 0.7061728239059448, + "learning_rate": 9.995263930850034e-05, + "loss": 2.4906, + "step": 10053 + }, + { + "epoch": 0.8113953676055201, + "grad_norm": 0.691663920879364, + "learning_rate": 9.993685241316986e-05, + "loss": 2.5842, + "step": 10054 + }, + { + "epoch": 0.8114760713421031, + "grad_norm": 0.6899400353431702, + "learning_rate": 9.992106551941325e-05, + "loss": 2.5628, + "step": 10055 + }, + { + "epoch": 0.8115567750786862, + "grad_norm": 0.6909289360046387, + "learning_rate": 9.990527862762385e-05, + "loss": 2.5173, + "step": 10056 + }, + { + "epoch": 0.8116374788152692, + "grad_norm": 0.6507968306541443, + "learning_rate": 9.988949173819514e-05, + "loss": 2.5763, + "step": 10057 + }, + { + "epoch": 0.8117181825518521, + "grad_norm": 0.6972371339797974, + "learning_rate": 9.98737048515206e-05, + "loss": 2.604, + "step": 10058 + }, + { + "epoch": 0.8117988862884351, + "grad_norm": 0.6500107049942017, + "learning_rate": 9.985791796799368e-05, + "loss": 2.509, + "step": 10059 + }, + { + "epoch": 0.8118795900250182, + "grad_norm": 0.704501211643219, + "learning_rate": 9.98421310880078e-05, + "loss": 2.5773, + "step": 10060 + }, + { + "epoch": 0.8119602937616012, + "grad_norm": 0.7037203311920166, + "learning_rate": 9.982634421195641e-05, + "loss": 2.5968, + "step": 10061 + }, + { + "epoch": 0.8120409974981841, + "grad_norm": 0.7161232829093933, + "learning_rate": 9.981055734023304e-05, + "loss": 2.5373, + "step": 10062 + }, + { + "epoch": 0.8121217012347671, + "grad_norm": 0.6602928638458252, + "learning_rate": 9.979477047323099e-05, + "loss": 2.5851, + "step": 10063 + }, + { + "epoch": 0.8122024049713502, + "grad_norm": 0.6685947775840759, + "learning_rate": 9.977898361134383e-05, + "loss": 2.5543, + "step": 10064 + }, + { + "epoch": 0.8122831087079332, + "grad_norm": 0.6772760152816772, + "learning_rate": 9.976319675496502e-05, + "loss": 2.5355, + "step": 10065 + }, + { + "epoch": 0.8123638124445162, + "grad_norm": 0.6140885949134827, + "learning_rate": 9.974740990448792e-05, + "loss": 2.489, + "step": 10066 + }, + { + "epoch": 0.8124445161810991, + "grad_norm": 0.6597142219543457, + "learning_rate": 9.973162306030604e-05, + "loss": 2.5619, + "step": 10067 + }, + { + "epoch": 0.8125252199176822, + "grad_norm": 0.6768592000007629, + "learning_rate": 9.971583622281281e-05, + "loss": 2.5107, + "step": 10068 + }, + { + "epoch": 0.8126059236542652, + "grad_norm": 0.682296633720398, + "learning_rate": 9.970004939240168e-05, + "loss": 2.5003, + "step": 10069 + }, + { + "epoch": 0.8126866273908482, + "grad_norm": 0.7356325387954712, + "learning_rate": 9.96842625694661e-05, + "loss": 2.5864, + "step": 10070 + }, + { + "epoch": 0.8127673311274312, + "grad_norm": 0.6818091869354248, + "learning_rate": 9.966847575439956e-05, + "loss": 2.5375, + "step": 10071 + }, + { + "epoch": 0.8128480348640142, + "grad_norm": 0.6954368352890015, + "learning_rate": 9.965268894759543e-05, + "loss": 2.5314, + "step": 10072 + }, + { + "epoch": 0.8129287386005972, + "grad_norm": 0.6759306192398071, + "learning_rate": 9.963690214944721e-05, + "loss": 2.5881, + "step": 10073 + }, + { + "epoch": 0.8130094423371802, + "grad_norm": 0.6546545624732971, + "learning_rate": 9.962111536034832e-05, + "loss": 2.5264, + "step": 10074 + }, + { + "epoch": 0.8130901460737632, + "grad_norm": 0.6709586977958679, + "learning_rate": 9.960532858069226e-05, + "loss": 2.5906, + "step": 10075 + }, + { + "epoch": 0.8131708498103463, + "grad_norm": 0.7310851812362671, + "learning_rate": 9.958954181087241e-05, + "loss": 2.5134, + "step": 10076 + }, + { + "epoch": 0.8132515535469292, + "grad_norm": 0.6793027520179749, + "learning_rate": 9.957375505128227e-05, + "loss": 2.5387, + "step": 10077 + }, + { + "epoch": 0.8133322572835122, + "grad_norm": 0.6965875029563904, + "learning_rate": 9.955796830231528e-05, + "loss": 2.5649, + "step": 10078 + }, + { + "epoch": 0.8134129610200952, + "grad_norm": 0.6597574353218079, + "learning_rate": 9.954218156436485e-05, + "loss": 2.5281, + "step": 10079 + }, + { + "epoch": 0.8134936647566783, + "grad_norm": 0.7911555171012878, + "learning_rate": 9.952639483782445e-05, + "loss": 2.535, + "step": 10080 + }, + { + "epoch": 0.8135743684932613, + "grad_norm": 0.7405688762664795, + "learning_rate": 9.951060812308757e-05, + "loss": 2.5303, + "step": 10081 + }, + { + "epoch": 0.8136550722298442, + "grad_norm": 0.6961480379104614, + "learning_rate": 9.949482142054758e-05, + "loss": 2.4959, + "step": 10082 + }, + { + "epoch": 0.8137357759664272, + "grad_norm": 0.6761718392372131, + "learning_rate": 9.947903473059797e-05, + "loss": 2.5591, + "step": 10083 + }, + { + "epoch": 0.8138164797030103, + "grad_norm": 0.7383104562759399, + "learning_rate": 9.946324805363218e-05, + "loss": 2.5848, + "step": 10084 + }, + { + "epoch": 0.8138971834395933, + "grad_norm": 0.6495873928070068, + "learning_rate": 9.944746139004364e-05, + "loss": 2.4972, + "step": 10085 + }, + { + "epoch": 0.8139778871761763, + "grad_norm": 0.7247152328491211, + "learning_rate": 9.94316747402258e-05, + "loss": 2.5361, + "step": 10086 + }, + { + "epoch": 0.8140585909127592, + "grad_norm": 0.6965751051902771, + "learning_rate": 9.941588810457215e-05, + "loss": 2.4997, + "step": 10087 + }, + { + "epoch": 0.8141392946493423, + "grad_norm": 0.7138223648071289, + "learning_rate": 9.940010148347603e-05, + "loss": 2.5226, + "step": 10088 + }, + { + "epoch": 0.8142199983859253, + "grad_norm": 0.6571210622787476, + "learning_rate": 9.938431487733099e-05, + "loss": 2.5388, + "step": 10089 + }, + { + "epoch": 0.8143007021225083, + "grad_norm": 0.6721277832984924, + "learning_rate": 9.936852828653042e-05, + "loss": 2.5219, + "step": 10090 + }, + { + "epoch": 0.8143814058590912, + "grad_norm": 0.647520124912262, + "learning_rate": 9.935274171146782e-05, + "loss": 2.6199, + "step": 10091 + }, + { + "epoch": 0.8144621095956743, + "grad_norm": 0.6892204284667969, + "learning_rate": 9.933695515253654e-05, + "loss": 2.5132, + "step": 10092 + }, + { + "epoch": 0.8145428133322573, + "grad_norm": 0.6979050636291504, + "learning_rate": 9.932116861013008e-05, + "loss": 2.5148, + "step": 10093 + }, + { + "epoch": 0.8146235170688403, + "grad_norm": 0.6682664752006531, + "learning_rate": 9.930538208464189e-05, + "loss": 2.5795, + "step": 10094 + }, + { + "epoch": 0.8147042208054233, + "grad_norm": 0.734121561050415, + "learning_rate": 9.928959557646537e-05, + "loss": 2.5469, + "step": 10095 + }, + { + "epoch": 0.8147849245420064, + "grad_norm": 0.6669620275497437, + "learning_rate": 9.9273809085994e-05, + "loss": 2.5277, + "step": 10096 + }, + { + "epoch": 0.8148656282785893, + "grad_norm": 0.6750600934028625, + "learning_rate": 9.925802261362124e-05, + "loss": 2.5869, + "step": 10097 + }, + { + "epoch": 0.8149463320151723, + "grad_norm": 0.6813061237335205, + "learning_rate": 9.924223615974044e-05, + "loss": 2.585, + "step": 10098 + }, + { + "epoch": 0.8150270357517553, + "grad_norm": 0.6775497794151306, + "learning_rate": 9.92264497247451e-05, + "loss": 2.5353, + "step": 10099 + }, + { + "epoch": 0.8151077394883383, + "grad_norm": 0.6877530813217163, + "learning_rate": 9.92106633090287e-05, + "loss": 2.5349, + "step": 10100 + }, + { + "epoch": 0.8151884432249213, + "grad_norm": 0.6984169483184814, + "learning_rate": 9.91948769129846e-05, + "loss": 2.5986, + "step": 10101 + }, + { + "epoch": 0.8152691469615043, + "grad_norm": 0.7144806981086731, + "learning_rate": 9.917909053700626e-05, + "loss": 2.5797, + "step": 10102 + }, + { + "epoch": 0.8153498506980873, + "grad_norm": 0.6494203209877014, + "learning_rate": 9.916330418148715e-05, + "loss": 2.5035, + "step": 10103 + }, + { + "epoch": 0.8154305544346703, + "grad_norm": 0.6669752597808838, + "learning_rate": 9.914751784682069e-05, + "loss": 2.5489, + "step": 10104 + }, + { + "epoch": 0.8155112581712534, + "grad_norm": 0.6557981371879578, + "learning_rate": 9.913173153340029e-05, + "loss": 2.5266, + "step": 10105 + }, + { + "epoch": 0.8155919619078363, + "grad_norm": 0.6633948087692261, + "learning_rate": 9.911594524161941e-05, + "loss": 2.5263, + "step": 10106 + }, + { + "epoch": 0.8156726656444193, + "grad_norm": 0.7191522717475891, + "learning_rate": 9.910015897187154e-05, + "loss": 2.5625, + "step": 10107 + }, + { + "epoch": 0.8157533693810023, + "grad_norm": 0.7089062929153442, + "learning_rate": 9.908437272455001e-05, + "loss": 2.5644, + "step": 10108 + }, + { + "epoch": 0.8158340731175854, + "grad_norm": 0.7662761211395264, + "learning_rate": 9.906858650004831e-05, + "loss": 2.5875, + "step": 10109 + }, + { + "epoch": 0.8159147768541684, + "grad_norm": 0.6658861041069031, + "learning_rate": 9.905280029875988e-05, + "loss": 2.5818, + "step": 10110 + }, + { + "epoch": 0.8159954805907513, + "grad_norm": 0.7229514718055725, + "learning_rate": 9.903701412107815e-05, + "loss": 2.5421, + "step": 10111 + }, + { + "epoch": 0.8160761843273343, + "grad_norm": 0.7295149564743042, + "learning_rate": 9.902122796739652e-05, + "loss": 2.5298, + "step": 10112 + }, + { + "epoch": 0.8161568880639174, + "grad_norm": 0.6805420517921448, + "learning_rate": 9.900544183810849e-05, + "loss": 2.6693, + "step": 10113 + }, + { + "epoch": 0.8162375918005004, + "grad_norm": 0.6560602188110352, + "learning_rate": 9.898965573360738e-05, + "loss": 2.5445, + "step": 10114 + }, + { + "epoch": 0.8163182955370833, + "grad_norm": 0.690396785736084, + "learning_rate": 9.897386965428674e-05, + "loss": 2.5281, + "step": 10115 + }, + { + "epoch": 0.8163989992736663, + "grad_norm": 0.6905054450035095, + "learning_rate": 9.895808360053998e-05, + "loss": 2.5406, + "step": 10116 + }, + { + "epoch": 0.8164797030102494, + "grad_norm": 0.6905301213264465, + "learning_rate": 9.894229757276045e-05, + "loss": 2.5458, + "step": 10117 + }, + { + "epoch": 0.8165604067468324, + "grad_norm": 0.6827620267868042, + "learning_rate": 9.892651157134162e-05, + "loss": 2.4403, + "step": 10118 + }, + { + "epoch": 0.8166411104834154, + "grad_norm": 0.7614343166351318, + "learning_rate": 9.891072559667697e-05, + "loss": 2.6369, + "step": 10119 + }, + { + "epoch": 0.8167218142199983, + "grad_norm": 0.6913704872131348, + "learning_rate": 9.889493964915985e-05, + "loss": 2.5914, + "step": 10120 + }, + { + "epoch": 0.8168025179565814, + "grad_norm": 0.7026088237762451, + "learning_rate": 9.887915372918372e-05, + "loss": 2.5139, + "step": 10121 + }, + { + "epoch": 0.8168832216931644, + "grad_norm": 0.7064465284347534, + "learning_rate": 9.886336783714203e-05, + "loss": 2.549, + "step": 10122 + }, + { + "epoch": 0.8169639254297474, + "grad_norm": 0.7345553040504456, + "learning_rate": 9.884758197342821e-05, + "loss": 2.5887, + "step": 10123 + }, + { + "epoch": 0.8170446291663304, + "grad_norm": 0.6916251182556152, + "learning_rate": 9.883179613843563e-05, + "loss": 2.5659, + "step": 10124 + }, + { + "epoch": 0.8171253329029134, + "grad_norm": 0.6428200602531433, + "learning_rate": 9.881601033255771e-05, + "loss": 2.5379, + "step": 10125 + }, + { + "epoch": 0.8172060366394964, + "grad_norm": 0.7433571815490723, + "learning_rate": 9.880022455618796e-05, + "loss": 2.5751, + "step": 10126 + }, + { + "epoch": 0.8172867403760794, + "grad_norm": 0.733256995677948, + "learning_rate": 9.878443880971974e-05, + "loss": 2.4971, + "step": 10127 + }, + { + "epoch": 0.8173674441126624, + "grad_norm": 0.708289384841919, + "learning_rate": 9.876865309354646e-05, + "loss": 2.635, + "step": 10128 + }, + { + "epoch": 0.8174481478492455, + "grad_norm": 0.6877188682556152, + "learning_rate": 9.87528674080616e-05, + "loss": 2.5827, + "step": 10129 + }, + { + "epoch": 0.8175288515858284, + "grad_norm": 0.7108712792396545, + "learning_rate": 9.873708175365852e-05, + "loss": 2.5643, + "step": 10130 + }, + { + "epoch": 0.8176095553224114, + "grad_norm": 0.7435629367828369, + "learning_rate": 9.872129613073065e-05, + "loss": 2.5267, + "step": 10131 + }, + { + "epoch": 0.8176902590589944, + "grad_norm": 0.669913113117218, + "learning_rate": 9.870551053967148e-05, + "loss": 2.5684, + "step": 10132 + }, + { + "epoch": 0.8177709627955775, + "grad_norm": 0.6981424689292908, + "learning_rate": 9.868972498087431e-05, + "loss": 2.592, + "step": 10133 + }, + { + "epoch": 0.8178516665321605, + "grad_norm": 0.6661834716796875, + "learning_rate": 9.867393945473263e-05, + "loss": 2.5082, + "step": 10134 + }, + { + "epoch": 0.8179323702687434, + "grad_norm": 0.6611261367797852, + "learning_rate": 9.865815396163987e-05, + "loss": 2.556, + "step": 10135 + }, + { + "epoch": 0.8180130740053264, + "grad_norm": 0.6732283234596252, + "learning_rate": 9.86423685019894e-05, + "loss": 2.5668, + "step": 10136 + }, + { + "epoch": 0.8180937777419095, + "grad_norm": 0.6768637299537659, + "learning_rate": 9.862658307617465e-05, + "loss": 2.5467, + "step": 10137 + }, + { + "epoch": 0.8181744814784925, + "grad_norm": 0.6943596601486206, + "learning_rate": 9.861079768458904e-05, + "loss": 2.5989, + "step": 10138 + }, + { + "epoch": 0.8182551852150755, + "grad_norm": 0.7369638681411743, + "learning_rate": 9.859501232762601e-05, + "loss": 2.5189, + "step": 10139 + }, + { + "epoch": 0.8183358889516584, + "grad_norm": 0.7443112730979919, + "learning_rate": 9.857922700567892e-05, + "loss": 2.5979, + "step": 10140 + }, + { + "epoch": 0.8184165926882415, + "grad_norm": 0.6726163029670715, + "learning_rate": 9.85634417191412e-05, + "loss": 2.5451, + "step": 10141 + }, + { + "epoch": 0.8184972964248245, + "grad_norm": 0.720492422580719, + "learning_rate": 9.854765646840632e-05, + "loss": 2.6116, + "step": 10142 + }, + { + "epoch": 0.8185780001614075, + "grad_norm": 0.6998233795166016, + "learning_rate": 9.85318712538676e-05, + "loss": 2.556, + "step": 10143 + }, + { + "epoch": 0.8186587038979904, + "grad_norm": 0.7580110430717468, + "learning_rate": 9.851608607591848e-05, + "loss": 2.5222, + "step": 10144 + }, + { + "epoch": 0.8187394076345735, + "grad_norm": 0.6893007755279541, + "learning_rate": 9.85003009349524e-05, + "loss": 2.4639, + "step": 10145 + }, + { + "epoch": 0.8188201113711565, + "grad_norm": 0.6448441743850708, + "learning_rate": 9.84845158313627e-05, + "loss": 2.5249, + "step": 10146 + }, + { + "epoch": 0.8189008151077395, + "grad_norm": 0.7591872215270996, + "learning_rate": 9.846873076554285e-05, + "loss": 2.5173, + "step": 10147 + }, + { + "epoch": 0.8189815188443225, + "grad_norm": 0.6994685530662537, + "learning_rate": 9.845294573788626e-05, + "loss": 2.5181, + "step": 10148 + }, + { + "epoch": 0.8190622225809054, + "grad_norm": 0.6822378635406494, + "learning_rate": 9.843716074878628e-05, + "loss": 2.5109, + "step": 10149 + }, + { + "epoch": 0.8191429263174885, + "grad_norm": 0.6730359792709351, + "learning_rate": 9.842137579863632e-05, + "loss": 2.5402, + "step": 10150 + }, + { + "epoch": 0.8192236300540715, + "grad_norm": 0.6280627846717834, + "learning_rate": 9.840559088782984e-05, + "loss": 2.4806, + "step": 10151 + }, + { + "epoch": 0.8193043337906545, + "grad_norm": 0.6887876391410828, + "learning_rate": 9.838980601676017e-05, + "loss": 2.5498, + "step": 10152 + }, + { + "epoch": 0.8193850375272375, + "grad_norm": 0.7823790907859802, + "learning_rate": 9.837402118582075e-05, + "loss": 2.467, + "step": 10153 + }, + { + "epoch": 0.8194657412638205, + "grad_norm": 0.8109384179115295, + "learning_rate": 9.835823639540496e-05, + "loss": 2.5898, + "step": 10154 + }, + { + "epoch": 0.8195464450004035, + "grad_norm": 0.6883066892623901, + "learning_rate": 9.834245164590624e-05, + "loss": 2.5589, + "step": 10155 + }, + { + "epoch": 0.8196271487369865, + "grad_norm": 0.7291175723075867, + "learning_rate": 9.832666693771794e-05, + "loss": 2.5317, + "step": 10156 + }, + { + "epoch": 0.8197078524735695, + "grad_norm": 0.6819449663162231, + "learning_rate": 9.831088227123346e-05, + "loss": 2.5513, + "step": 10157 + }, + { + "epoch": 0.8197885562101526, + "grad_norm": 0.7038870453834534, + "learning_rate": 9.829509764684626e-05, + "loss": 2.5301, + "step": 10158 + }, + { + "epoch": 0.8198692599467355, + "grad_norm": 0.7483033537864685, + "learning_rate": 9.827931306494965e-05, + "loss": 2.5273, + "step": 10159 + }, + { + "epoch": 0.8199499636833185, + "grad_norm": 0.6998303532600403, + "learning_rate": 9.826352852593705e-05, + "loss": 2.5083, + "step": 10160 + }, + { + "epoch": 0.8200306674199015, + "grad_norm": 0.6865512728691101, + "learning_rate": 9.824774403020188e-05, + "loss": 2.5693, + "step": 10161 + }, + { + "epoch": 0.8201113711564846, + "grad_norm": 0.8144257068634033, + "learning_rate": 9.823195957813749e-05, + "loss": 2.6052, + "step": 10162 + }, + { + "epoch": 0.8201920748930676, + "grad_norm": 0.6920810341835022, + "learning_rate": 9.821617517013729e-05, + "loss": 2.5467, + "step": 10163 + }, + { + "epoch": 0.8202727786296505, + "grad_norm": 0.7538061141967773, + "learning_rate": 9.820039080659469e-05, + "loss": 2.5933, + "step": 10164 + }, + { + "epoch": 0.8203534823662335, + "grad_norm": 0.6744310259819031, + "learning_rate": 9.818460648790302e-05, + "loss": 2.5633, + "step": 10165 + }, + { + "epoch": 0.8204341861028166, + "grad_norm": 0.6943854689598083, + "learning_rate": 9.816882221445571e-05, + "loss": 2.5868, + "step": 10166 + }, + { + "epoch": 0.8205148898393996, + "grad_norm": 0.6486902832984924, + "learning_rate": 9.815303798664614e-05, + "loss": 2.4983, + "step": 10167 + }, + { + "epoch": 0.8205955935759826, + "grad_norm": 0.6699065566062927, + "learning_rate": 9.813725380486773e-05, + "loss": 2.563, + "step": 10168 + }, + { + "epoch": 0.8206762973125655, + "grad_norm": 0.6547110080718994, + "learning_rate": 9.812146966951379e-05, + "loss": 2.5404, + "step": 10169 + }, + { + "epoch": 0.8207570010491486, + "grad_norm": 0.692592203617096, + "learning_rate": 9.810568558097774e-05, + "loss": 2.5625, + "step": 10170 + }, + { + "epoch": 0.8208377047857316, + "grad_norm": 0.6696702837944031, + "learning_rate": 9.808990153965296e-05, + "loss": 2.5866, + "step": 10171 + }, + { + "epoch": 0.8209184085223146, + "grad_norm": 0.6425998210906982, + "learning_rate": 9.807411754593282e-05, + "loss": 2.5487, + "step": 10172 + }, + { + "epoch": 0.8209991122588975, + "grad_norm": 0.6849769949913025, + "learning_rate": 9.805833360021069e-05, + "loss": 2.5772, + "step": 10173 + }, + { + "epoch": 0.8210798159954806, + "grad_norm": 0.7451414465904236, + "learning_rate": 9.804254970288001e-05, + "loss": 2.5089, + "step": 10174 + }, + { + "epoch": 0.8211605197320636, + "grad_norm": 0.7134390473365784, + "learning_rate": 9.802676585433408e-05, + "loss": 2.541, + "step": 10175 + }, + { + "epoch": 0.8212412234686466, + "grad_norm": 0.7490564584732056, + "learning_rate": 9.801098205496627e-05, + "loss": 2.5299, + "step": 10176 + }, + { + "epoch": 0.8213219272052296, + "grad_norm": 0.6614408493041992, + "learning_rate": 9.799519830517005e-05, + "loss": 2.5252, + "step": 10177 + }, + { + "epoch": 0.8214026309418127, + "grad_norm": 0.761049211025238, + "learning_rate": 9.797941460533869e-05, + "loss": 2.5153, + "step": 10178 + }, + { + "epoch": 0.8214833346783956, + "grad_norm": 0.6352702379226685, + "learning_rate": 9.796363095586561e-05, + "loss": 2.5407, + "step": 10179 + }, + { + "epoch": 0.8215640384149786, + "grad_norm": 0.684212863445282, + "learning_rate": 9.794784735714417e-05, + "loss": 2.5425, + "step": 10180 + }, + { + "epoch": 0.8216447421515616, + "grad_norm": 0.652987539768219, + "learning_rate": 9.793206380956772e-05, + "loss": 2.5542, + "step": 10181 + }, + { + "epoch": 0.8217254458881447, + "grad_norm": 0.6912897229194641, + "learning_rate": 9.791628031352966e-05, + "loss": 2.5041, + "step": 10182 + }, + { + "epoch": 0.8218061496247276, + "grad_norm": 0.7025408744812012, + "learning_rate": 9.790049686942333e-05, + "loss": 2.5296, + "step": 10183 + }, + { + "epoch": 0.8218868533613106, + "grad_norm": 0.7580777406692505, + "learning_rate": 9.788471347764215e-05, + "loss": 2.578, + "step": 10184 + }, + { + "epoch": 0.8219675570978936, + "grad_norm": 0.7044378519058228, + "learning_rate": 9.78689301385794e-05, + "loss": 2.5093, + "step": 10185 + }, + { + "epoch": 0.8220482608344767, + "grad_norm": 0.7339754700660706, + "learning_rate": 9.785314685262849e-05, + "loss": 2.5202, + "step": 10186 + }, + { + "epoch": 0.8221289645710597, + "grad_norm": 0.6872244477272034, + "learning_rate": 9.783736362018277e-05, + "loss": 2.541, + "step": 10187 + }, + { + "epoch": 0.8222096683076426, + "grad_norm": 0.7052434682846069, + "learning_rate": 9.78215804416356e-05, + "loss": 2.4968, + "step": 10188 + }, + { + "epoch": 0.8222903720442256, + "grad_norm": 0.6739610433578491, + "learning_rate": 9.780579731738033e-05, + "loss": 2.5137, + "step": 10189 + }, + { + "epoch": 0.8223710757808087, + "grad_norm": 0.6842939853668213, + "learning_rate": 9.779001424781035e-05, + "loss": 2.5329, + "step": 10190 + }, + { + "epoch": 0.8224517795173917, + "grad_norm": 0.7057977914810181, + "learning_rate": 9.777423123331898e-05, + "loss": 2.5657, + "step": 10191 + }, + { + "epoch": 0.8225324832539747, + "grad_norm": 0.6748424172401428, + "learning_rate": 9.775844827429958e-05, + "loss": 2.6104, + "step": 10192 + }, + { + "epoch": 0.8226131869905576, + "grad_norm": 0.6492514610290527, + "learning_rate": 9.774266537114555e-05, + "loss": 2.58, + "step": 10193 + }, + { + "epoch": 0.8226938907271407, + "grad_norm": 0.6987641453742981, + "learning_rate": 9.772688252425016e-05, + "loss": 2.5301, + "step": 10194 + }, + { + "epoch": 0.8227745944637237, + "grad_norm": 0.710921585559845, + "learning_rate": 9.771109973400679e-05, + "loss": 2.6245, + "step": 10195 + }, + { + "epoch": 0.8228552982003067, + "grad_norm": 0.6673738360404968, + "learning_rate": 9.769531700080883e-05, + "loss": 2.5205, + "step": 10196 + }, + { + "epoch": 0.8229360019368896, + "grad_norm": 0.6705252528190613, + "learning_rate": 9.767953432504958e-05, + "loss": 2.4932, + "step": 10197 + }, + { + "epoch": 0.8230167056734727, + "grad_norm": 0.6587076783180237, + "learning_rate": 9.766375170712237e-05, + "loss": 2.5085, + "step": 10198 + }, + { + "epoch": 0.8230974094100557, + "grad_norm": 0.7285338640213013, + "learning_rate": 9.764796914742061e-05, + "loss": 2.5481, + "step": 10199 + }, + { + "epoch": 0.8231781131466387, + "grad_norm": 0.6971831321716309, + "learning_rate": 9.763218664633763e-05, + "loss": 2.6092, + "step": 10200 + }, + { + "epoch": 0.8232588168832217, + "grad_norm": 0.6940265893936157, + "learning_rate": 9.761640420426669e-05, + "loss": 2.5325, + "step": 10201 + }, + { + "epoch": 0.8233395206198046, + "grad_norm": 0.6612978577613831, + "learning_rate": 9.76006218216012e-05, + "loss": 2.5532, + "step": 10202 + }, + { + "epoch": 0.8234202243563877, + "grad_norm": 0.6707638502120972, + "learning_rate": 9.758483949873453e-05, + "loss": 2.512, + "step": 10203 + }, + { + "epoch": 0.8235009280929707, + "grad_norm": 0.6636764407157898, + "learning_rate": 9.756905723605994e-05, + "loss": 2.5446, + "step": 10204 + }, + { + "epoch": 0.8235816318295537, + "grad_norm": 0.6996643543243408, + "learning_rate": 9.755327503397081e-05, + "loss": 2.5504, + "step": 10205 + }, + { + "epoch": 0.8236623355661367, + "grad_norm": 0.604487955570221, + "learning_rate": 9.753749289286046e-05, + "loss": 2.4767, + "step": 10206 + }, + { + "epoch": 0.8237430393027197, + "grad_norm": 0.6484553217887878, + "learning_rate": 9.752171081312222e-05, + "loss": 2.5522, + "step": 10207 + }, + { + "epoch": 0.8238237430393027, + "grad_norm": 0.6890987753868103, + "learning_rate": 9.75059287951494e-05, + "loss": 2.5545, + "step": 10208 + }, + { + "epoch": 0.8239044467758857, + "grad_norm": 0.6786034107208252, + "learning_rate": 9.749014683933541e-05, + "loss": 2.591, + "step": 10209 + }, + { + "epoch": 0.8239851505124687, + "grad_norm": 0.751192033290863, + "learning_rate": 9.747436494607349e-05, + "loss": 2.5335, + "step": 10210 + }, + { + "epoch": 0.8240658542490518, + "grad_norm": 0.6611589789390564, + "learning_rate": 9.7458583115757e-05, + "loss": 2.5104, + "step": 10211 + }, + { + "epoch": 0.8241465579856347, + "grad_norm": 0.6602892875671387, + "learning_rate": 9.744280134877926e-05, + "loss": 2.5319, + "step": 10212 + }, + { + "epoch": 0.8242272617222177, + "grad_norm": 0.6856467127799988, + "learning_rate": 9.742701964553359e-05, + "loss": 2.5418, + "step": 10213 + }, + { + "epoch": 0.8243079654588007, + "grad_norm": 0.6810153126716614, + "learning_rate": 9.741123800641332e-05, + "loss": 2.5691, + "step": 10214 + }, + { + "epoch": 0.8243886691953838, + "grad_norm": 0.7044229507446289, + "learning_rate": 9.739545643181175e-05, + "loss": 2.5911, + "step": 10215 + }, + { + "epoch": 0.8244693729319668, + "grad_norm": 0.6689271330833435, + "learning_rate": 9.737967492212225e-05, + "loss": 2.5374, + "step": 10216 + }, + { + "epoch": 0.8245500766685497, + "grad_norm": 0.6558904051780701, + "learning_rate": 9.736389347773807e-05, + "loss": 2.5118, + "step": 10217 + }, + { + "epoch": 0.8246307804051327, + "grad_norm": 0.6900291442871094, + "learning_rate": 9.734811209905255e-05, + "loss": 2.515, + "step": 10218 + }, + { + "epoch": 0.8247114841417158, + "grad_norm": 0.7129492163658142, + "learning_rate": 9.733233078645907e-05, + "loss": 2.5191, + "step": 10219 + }, + { + "epoch": 0.8247921878782988, + "grad_norm": 0.7031866908073425, + "learning_rate": 9.731654954035082e-05, + "loss": 2.5616, + "step": 10220 + }, + { + "epoch": 0.8248728916148818, + "grad_norm": 0.6418820023536682, + "learning_rate": 9.730076836112118e-05, + "loss": 2.537, + "step": 10221 + }, + { + "epoch": 0.8249535953514647, + "grad_norm": 0.6731035113334656, + "learning_rate": 9.728498724916347e-05, + "loss": 2.5483, + "step": 10222 + }, + { + "epoch": 0.8250342990880478, + "grad_norm": 0.6941342353820801, + "learning_rate": 9.726920620487096e-05, + "loss": 2.5314, + "step": 10223 + }, + { + "epoch": 0.8251150028246308, + "grad_norm": 0.6808927059173584, + "learning_rate": 9.725342522863696e-05, + "loss": 2.5521, + "step": 10224 + }, + { + "epoch": 0.8251957065612138, + "grad_norm": 0.6873155832290649, + "learning_rate": 9.723764432085481e-05, + "loss": 2.5205, + "step": 10225 + }, + { + "epoch": 0.8252764102977967, + "grad_norm": 0.8590287566184998, + "learning_rate": 9.722186348191776e-05, + "loss": 2.5378, + "step": 10226 + }, + { + "epoch": 0.8253571140343798, + "grad_norm": 0.691523015499115, + "learning_rate": 9.720608271221912e-05, + "loss": 2.5062, + "step": 10227 + }, + { + "epoch": 0.8254378177709628, + "grad_norm": 0.6695523262023926, + "learning_rate": 9.719030201215226e-05, + "loss": 2.5164, + "step": 10228 + }, + { + "epoch": 0.8255185215075458, + "grad_norm": 0.745516300201416, + "learning_rate": 9.717452138211037e-05, + "loss": 2.5207, + "step": 10229 + }, + { + "epoch": 0.8255992252441288, + "grad_norm": 0.6628115773200989, + "learning_rate": 9.715874082248679e-05, + "loss": 2.5293, + "step": 10230 + }, + { + "epoch": 0.8256799289807119, + "grad_norm": 0.6531884074211121, + "learning_rate": 9.714296033367482e-05, + "loss": 2.4812, + "step": 10231 + }, + { + "epoch": 0.8257606327172948, + "grad_norm": 0.7444833517074585, + "learning_rate": 9.712717991606777e-05, + "loss": 2.5422, + "step": 10232 + }, + { + "epoch": 0.8258413364538778, + "grad_norm": 0.7013139128684998, + "learning_rate": 9.711139957005888e-05, + "loss": 2.5117, + "step": 10233 + }, + { + "epoch": 0.8259220401904608, + "grad_norm": 0.6588132977485657, + "learning_rate": 9.709561929604147e-05, + "loss": 2.5257, + "step": 10234 + }, + { + "epoch": 0.8260027439270439, + "grad_norm": 0.7538537383079529, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5225, + "step": 10235 + }, + { + "epoch": 0.8260834476636268, + "grad_norm": Infinity, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5532, + "step": 10236 + }, + { + "epoch": 0.8261641514002098, + "grad_norm": 0.7414929270744324, + "learning_rate": 9.706405896555425e-05, + "loss": 2.5653, + "step": 10237 + }, + { + "epoch": 0.8262448551367928, + "grad_norm": 0.757057785987854, + "learning_rate": 9.704827890987097e-05, + "loss": 2.5732, + "step": 10238 + }, + { + "epoch": 0.8263255588733759, + "grad_norm": 0.730721652507782, + "learning_rate": 9.703249892775232e-05, + "loss": 2.5317, + "step": 10239 + }, + { + "epoch": 0.8264062626099589, + "grad_norm": 0.6943208575248718, + "learning_rate": 9.701671901959151e-05, + "loss": 2.5849, + "step": 10240 + }, + { + "epoch": 0.8264869663465418, + "grad_norm": 0.7111102938652039, + "learning_rate": 9.700093918578188e-05, + "loss": 2.5007, + "step": 10241 + }, + { + "epoch": 0.8265676700831248, + "grad_norm": 0.7240251302719116, + "learning_rate": 9.69851594267167e-05, + "loss": 2.5002, + "step": 10242 + }, + { + "epoch": 0.8266483738197079, + "grad_norm": 0.6624411344528198, + "learning_rate": 9.696937974278922e-05, + "loss": 2.5175, + "step": 10243 + }, + { + "epoch": 0.8267290775562909, + "grad_norm": 0.6972576975822449, + "learning_rate": 9.695360013439269e-05, + "loss": 2.5285, + "step": 10244 + }, + { + "epoch": 0.8268097812928739, + "grad_norm": 0.684446394443512, + "learning_rate": 9.693782060192046e-05, + "loss": 2.57, + "step": 10245 + }, + { + "epoch": 0.8268904850294568, + "grad_norm": 0.6920011639595032, + "learning_rate": 9.692204114576573e-05, + "loss": 2.5042, + "step": 10246 + }, + { + "epoch": 0.8269711887660399, + "grad_norm": 0.7526013851165771, + "learning_rate": 9.690626176632176e-05, + "loss": 2.5878, + "step": 10247 + }, + { + "epoch": 0.8270518925026229, + "grad_norm": 0.6936177611351013, + "learning_rate": 9.689048246398184e-05, + "loss": 2.5572, + "step": 10248 + }, + { + "epoch": 0.8271325962392059, + "grad_norm": 0.672168493270874, + "learning_rate": 9.687470323913922e-05, + "loss": 2.5127, + "step": 10249 + }, + { + "epoch": 0.8272132999757889, + "grad_norm": 0.6847899556159973, + "learning_rate": 9.685892409218717e-05, + "loss": 2.5443, + "step": 10250 + }, + { + "epoch": 0.8272940037123718, + "grad_norm": 0.6877103447914124, + "learning_rate": 9.684314502351894e-05, + "loss": 2.4924, + "step": 10251 + }, + { + "epoch": 0.8273747074489549, + "grad_norm": 0.6894243359565735, + "learning_rate": 9.682736603352783e-05, + "loss": 2.5107, + "step": 10252 + }, + { + "epoch": 0.8274554111855379, + "grad_norm": 0.7318278551101685, + "learning_rate": 9.681158712260698e-05, + "loss": 2.5276, + "step": 10253 + }, + { + "epoch": 0.8275361149221209, + "grad_norm": 0.6949039101600647, + "learning_rate": 9.679580829114975e-05, + "loss": 2.5128, + "step": 10254 + }, + { + "epoch": 0.8276168186587038, + "grad_norm": 0.6523800492286682, + "learning_rate": 9.678002953954939e-05, + "loss": 2.5584, + "step": 10255 + }, + { + "epoch": 0.8276975223952869, + "grad_norm": 0.6914480328559875, + "learning_rate": 9.676425086819905e-05, + "loss": 2.5597, + "step": 10256 + }, + { + "epoch": 0.8277782261318699, + "grad_norm": 0.7107869982719421, + "learning_rate": 9.674847227749206e-05, + "loss": 2.5009, + "step": 10257 + }, + { + "epoch": 0.8278589298684529, + "grad_norm": 0.7066758275032043, + "learning_rate": 9.673269376782166e-05, + "loss": 2.4599, + "step": 10258 + }, + { + "epoch": 0.8279396336050359, + "grad_norm": 0.7147037982940674, + "learning_rate": 9.671691533958104e-05, + "loss": 2.4478, + "step": 10259 + }, + { + "epoch": 0.828020337341619, + "grad_norm": 0.666265606880188, + "learning_rate": 9.670113699316347e-05, + "loss": 2.5652, + "step": 10260 + }, + { + "epoch": 0.8281010410782019, + "grad_norm": 0.7026315927505493, + "learning_rate": 9.668535872896225e-05, + "loss": 2.5397, + "step": 10261 + }, + { + "epoch": 0.8281817448147849, + "grad_norm": 0.6611438393592834, + "learning_rate": 9.66695805473705e-05, + "loss": 2.5628, + "step": 10262 + }, + { + "epoch": 0.8282624485513679, + "grad_norm": 0.7211201190948486, + "learning_rate": 9.66538024487815e-05, + "loss": 2.5551, + "step": 10263 + }, + { + "epoch": 0.828343152287951, + "grad_norm": 0.7224553227424622, + "learning_rate": 9.663802443358849e-05, + "loss": 2.5329, + "step": 10264 + }, + { + "epoch": 0.8284238560245339, + "grad_norm": 0.6805843710899353, + "learning_rate": 9.662224650218474e-05, + "loss": 2.5744, + "step": 10265 + }, + { + "epoch": 0.8285045597611169, + "grad_norm": 0.7101335525512695, + "learning_rate": 9.66064686549634e-05, + "loss": 2.5281, + "step": 10266 + }, + { + "epoch": 0.8285852634976999, + "grad_norm": 0.7208443284034729, + "learning_rate": 9.659069089231774e-05, + "loss": 2.5326, + "step": 10267 + }, + { + "epoch": 0.828665967234283, + "grad_norm": 0.747894287109375, + "learning_rate": 9.6574913214641e-05, + "loss": 2.4909, + "step": 10268 + }, + { + "epoch": 0.828746670970866, + "grad_norm": 0.6618027091026306, + "learning_rate": 9.655913562232635e-05, + "loss": 2.6091, + "step": 10269 + }, + { + "epoch": 0.8288273747074489, + "grad_norm": 0.7101535201072693, + "learning_rate": 9.654335811576704e-05, + "loss": 2.5194, + "step": 10270 + }, + { + "epoch": 0.8289080784440319, + "grad_norm": 0.727763831615448, + "learning_rate": 9.652758069535631e-05, + "loss": 2.5767, + "step": 10271 + }, + { + "epoch": 0.828988782180615, + "grad_norm": 0.6936737895011902, + "learning_rate": 9.65118033614873e-05, + "loss": 2.498, + "step": 10272 + }, + { + "epoch": 0.829069485917198, + "grad_norm": 0.699462354183197, + "learning_rate": 9.64960261145533e-05, + "loss": 2.5033, + "step": 10273 + }, + { + "epoch": 0.829150189653781, + "grad_norm": 0.7024868726730347, + "learning_rate": 9.648024895494749e-05, + "loss": 2.5937, + "step": 10274 + }, + { + "epoch": 0.8292308933903639, + "grad_norm": 0.7028421759605408, + "learning_rate": 9.646447188306305e-05, + "loss": 2.5528, + "step": 10275 + }, + { + "epoch": 0.829311597126947, + "grad_norm": 0.7216476202011108, + "learning_rate": 9.644869489929321e-05, + "loss": 2.5298, + "step": 10276 + }, + { + "epoch": 0.82939230086353, + "grad_norm": 0.6815251111984253, + "learning_rate": 9.643291800403123e-05, + "loss": 2.5138, + "step": 10277 + }, + { + "epoch": 0.829473004600113, + "grad_norm": 0.6961970925331116, + "learning_rate": 9.64171411976702e-05, + "loss": 2.5441, + "step": 10278 + }, + { + "epoch": 0.829553708336696, + "grad_norm": 0.7317311763763428, + "learning_rate": 9.640136448060337e-05, + "loss": 2.5885, + "step": 10279 + }, + { + "epoch": 0.829634412073279, + "grad_norm": 0.729086697101593, + "learning_rate": 9.638558785322396e-05, + "loss": 2.475, + "step": 10280 + }, + { + "epoch": 0.829715115809862, + "grad_norm": 0.7790165543556213, + "learning_rate": 9.636981131592521e-05, + "loss": 2.5538, + "step": 10281 + }, + { + "epoch": 0.829795819546445, + "grad_norm": 0.7066864967346191, + "learning_rate": 9.635403486910018e-05, + "loss": 2.5916, + "step": 10282 + }, + { + "epoch": 0.829876523283028, + "grad_norm": 0.7070252299308777, + "learning_rate": 9.633825851314215e-05, + "loss": 2.5879, + "step": 10283 + }, + { + "epoch": 0.829957227019611, + "grad_norm": 0.7604004740715027, + "learning_rate": 9.63224822484443e-05, + "loss": 2.5298, + "step": 10284 + }, + { + "epoch": 0.830037930756194, + "grad_norm": 0.7548386454582214, + "learning_rate": 9.63067060753998e-05, + "loss": 2.5313, + "step": 10285 + }, + { + "epoch": 0.830118634492777, + "grad_norm": 0.7241540551185608, + "learning_rate": 9.629092999440183e-05, + "loss": 2.5498, + "step": 10286 + }, + { + "epoch": 0.83019933822936, + "grad_norm": 0.6748291850090027, + "learning_rate": 9.627515400584361e-05, + "loss": 2.523, + "step": 10287 + }, + { + "epoch": 0.8302800419659431, + "grad_norm": 0.6624683141708374, + "learning_rate": 9.625937811011826e-05, + "loss": 2.568, + "step": 10288 + }, + { + "epoch": 0.830360745702526, + "grad_norm": 0.6681114435195923, + "learning_rate": 9.624360230761899e-05, + "loss": 2.5255, + "step": 10289 + }, + { + "epoch": 0.830441449439109, + "grad_norm": 0.6895325183868408, + "learning_rate": 9.622782659873899e-05, + "loss": 2.5275, + "step": 10290 + }, + { + "epoch": 0.830522153175692, + "grad_norm": 0.7257826924324036, + "learning_rate": 9.621205098387137e-05, + "loss": 2.5102, + "step": 10291 + }, + { + "epoch": 0.8306028569122751, + "grad_norm": 0.6567066311836243, + "learning_rate": 9.619627546340935e-05, + "loss": 2.5721, + "step": 10292 + }, + { + "epoch": 0.8306835606488581, + "grad_norm": 0.6571428179740906, + "learning_rate": 9.61805000377461e-05, + "loss": 2.5014, + "step": 10293 + }, + { + "epoch": 0.830764264385441, + "grad_norm": 0.7807042598724365, + "learning_rate": 9.61647247072748e-05, + "loss": 2.632, + "step": 10294 + }, + { + "epoch": 0.830844968122024, + "grad_norm": 0.6688913702964783, + "learning_rate": 9.614894947238854e-05, + "loss": 2.5457, + "step": 10295 + }, + { + "epoch": 0.8309256718586071, + "grad_norm": 0.7769338488578796, + "learning_rate": 9.613317433348055e-05, + "loss": 2.4775, + "step": 10296 + }, + { + "epoch": 0.8310063755951901, + "grad_norm": 0.7089162468910217, + "learning_rate": 9.611739929094399e-05, + "loss": 2.4887, + "step": 10297 + }, + { + "epoch": 0.8310870793317731, + "grad_norm": 0.6901174783706665, + "learning_rate": 9.610162434517196e-05, + "loss": 2.6127, + "step": 10298 + }, + { + "epoch": 0.831167783068356, + "grad_norm": 0.6862173676490784, + "learning_rate": 9.608584949655764e-05, + "loss": 2.5432, + "step": 10299 + }, + { + "epoch": 0.8312484868049391, + "grad_norm": 0.6789367198944092, + "learning_rate": 9.607007474549418e-05, + "loss": 2.5135, + "step": 10300 + }, + { + "epoch": 0.8313291905415221, + "grad_norm": 0.6548805832862854, + "learning_rate": 9.605430009237474e-05, + "loss": 2.5466, + "step": 10301 + }, + { + "epoch": 0.8314098942781051, + "grad_norm": 0.6873800158500671, + "learning_rate": 9.603852553759244e-05, + "loss": 2.4954, + "step": 10302 + }, + { + "epoch": 0.831490598014688, + "grad_norm": 0.6816138029098511, + "learning_rate": 9.602275108154046e-05, + "loss": 2.5556, + "step": 10303 + }, + { + "epoch": 0.831571301751271, + "grad_norm": 0.6890314221382141, + "learning_rate": 9.600697672461189e-05, + "loss": 2.5253, + "step": 10304 + }, + { + "epoch": 0.8316520054878541, + "grad_norm": 0.6217427849769592, + "learning_rate": 9.599120246719992e-05, + "loss": 2.53, + "step": 10305 + }, + { + "epoch": 0.8317327092244371, + "grad_norm": 0.6638299226760864, + "learning_rate": 9.59754283096977e-05, + "loss": 2.5323, + "step": 10306 + }, + { + "epoch": 0.8318134129610201, + "grad_norm": 0.6834245920181274, + "learning_rate": 9.595965425249828e-05, + "loss": 2.5339, + "step": 10307 + }, + { + "epoch": 0.831894116697603, + "grad_norm": 0.8013476729393005, + "learning_rate": 9.594388029599484e-05, + "loss": 2.4925, + "step": 10308 + }, + { + "epoch": 0.8319748204341861, + "grad_norm": 0.7677187323570251, + "learning_rate": 9.592810644058049e-05, + "loss": 2.5717, + "step": 10309 + }, + { + "epoch": 0.8320555241707691, + "grad_norm": 0.6558046340942383, + "learning_rate": 9.591233268664841e-05, + "loss": 2.5631, + "step": 10310 + }, + { + "epoch": 0.8321362279073521, + "grad_norm": 0.6648481488227844, + "learning_rate": 9.589655903459165e-05, + "loss": 2.5232, + "step": 10311 + }, + { + "epoch": 0.8322169316439351, + "grad_norm": 0.6907756328582764, + "learning_rate": 9.588078548480338e-05, + "loss": 2.4804, + "step": 10312 + }, + { + "epoch": 0.8322976353805182, + "grad_norm": 0.6924928426742554, + "learning_rate": 9.586501203767675e-05, + "loss": 2.4648, + "step": 10313 + }, + { + "epoch": 0.8323783391171011, + "grad_norm": 0.7654799222946167, + "learning_rate": 9.584923869360477e-05, + "loss": 2.6184, + "step": 10314 + }, + { + "epoch": 0.8324590428536841, + "grad_norm": 0.7056179046630859, + "learning_rate": 9.58334654529806e-05, + "loss": 2.5862, + "step": 10315 + }, + { + "epoch": 0.8325397465902671, + "grad_norm": 0.7245064973831177, + "learning_rate": 9.581769231619743e-05, + "loss": 2.4866, + "step": 10316 + }, + { + "epoch": 0.8326204503268502, + "grad_norm": 0.6782355308532715, + "learning_rate": 9.580191928364824e-05, + "loss": 2.5519, + "step": 10317 + }, + { + "epoch": 0.8327011540634331, + "grad_norm": 0.6910805106163025, + "learning_rate": 9.578614635572621e-05, + "loss": 2.542, + "step": 10318 + }, + { + "epoch": 0.8327818578000161, + "grad_norm": 0.6858026385307312, + "learning_rate": 9.577037353282444e-05, + "loss": 2.5601, + "step": 10319 + }, + { + "epoch": 0.8328625615365991, + "grad_norm": 0.6886423230171204, + "learning_rate": 9.5754600815336e-05, + "loss": 2.5817, + "step": 10320 + }, + { + "epoch": 0.8329432652731822, + "grad_norm": 0.7585750818252563, + "learning_rate": 9.573882820365402e-05, + "loss": 2.5153, + "step": 10321 + }, + { + "epoch": 0.8330239690097652, + "grad_norm": 0.7004472613334656, + "learning_rate": 9.57230556981716e-05, + "loss": 2.5456, + "step": 10322 + }, + { + "epoch": 0.8331046727463481, + "grad_norm": 0.6530508399009705, + "learning_rate": 9.570728329928179e-05, + "loss": 2.5453, + "step": 10323 + }, + { + "epoch": 0.8331853764829311, + "grad_norm": 0.6767956614494324, + "learning_rate": 9.569151100737769e-05, + "loss": 2.5311, + "step": 10324 + }, + { + "epoch": 0.8332660802195142, + "grad_norm": 0.6835905909538269, + "learning_rate": 9.56757388228524e-05, + "loss": 2.5417, + "step": 10325 + }, + { + "epoch": 0.8333467839560972, + "grad_norm": 0.6582748889923096, + "learning_rate": 9.565996674609901e-05, + "loss": 2.5144, + "step": 10326 + }, + { + "epoch": 0.8334274876926802, + "grad_norm": 0.6815205216407776, + "learning_rate": 9.56441947775106e-05, + "loss": 2.5272, + "step": 10327 + }, + { + "epoch": 0.8335081914292631, + "grad_norm": 0.6810150146484375, + "learning_rate": 9.562842291748022e-05, + "loss": 2.5475, + "step": 10328 + }, + { + "epoch": 0.8335888951658462, + "grad_norm": 0.7220990657806396, + "learning_rate": 9.5612651166401e-05, + "loss": 2.54, + "step": 10329 + }, + { + "epoch": 0.8336695989024292, + "grad_norm": 0.6840164065361023, + "learning_rate": 9.559687952466596e-05, + "loss": 2.5987, + "step": 10330 + }, + { + "epoch": 0.8337503026390122, + "grad_norm": 0.7085031867027283, + "learning_rate": 9.558110799266819e-05, + "loss": 2.5674, + "step": 10331 + }, + { + "epoch": 0.8338310063755952, + "grad_norm": 0.6658117175102234, + "learning_rate": 9.55653365708008e-05, + "loss": 2.5793, + "step": 10332 + }, + { + "epoch": 0.8339117101121782, + "grad_norm": 0.782648503780365, + "learning_rate": 9.554956525945677e-05, + "loss": 2.5463, + "step": 10333 + }, + { + "epoch": 0.8339924138487612, + "grad_norm": 0.6999937891960144, + "learning_rate": 9.553379405902922e-05, + "loss": 2.5961, + "step": 10334 + }, + { + "epoch": 0.8340731175853442, + "grad_norm": 0.6681220531463623, + "learning_rate": 9.55180229699112e-05, + "loss": 2.6055, + "step": 10335 + }, + { + "epoch": 0.8341538213219272, + "grad_norm": 0.7127133011817932, + "learning_rate": 9.550225199249577e-05, + "loss": 2.5571, + "step": 10336 + }, + { + "epoch": 0.8342345250585103, + "grad_norm": 0.6939001679420471, + "learning_rate": 9.548648112717596e-05, + "loss": 2.5653, + "step": 10337 + }, + { + "epoch": 0.8343152287950932, + "grad_norm": 0.7483924031257629, + "learning_rate": 9.547071037434487e-05, + "loss": 2.5316, + "step": 10338 + }, + { + "epoch": 0.8343959325316762, + "grad_norm": 0.7975850105285645, + "learning_rate": 9.545493973439548e-05, + "loss": 2.6039, + "step": 10339 + }, + { + "epoch": 0.8344766362682592, + "grad_norm": 0.6893026232719421, + "learning_rate": 9.543916920772087e-05, + "loss": 2.5797, + "step": 10340 + }, + { + "epoch": 0.8345573400048423, + "grad_norm": 0.752869188785553, + "learning_rate": 9.542339879471409e-05, + "loss": 2.5677, + "step": 10341 + }, + { + "epoch": 0.8346380437414253, + "grad_norm": 0.7336339354515076, + "learning_rate": 9.540762849576822e-05, + "loss": 2.5212, + "step": 10342 + }, + { + "epoch": 0.8347187474780082, + "grad_norm": 0.7742713689804077, + "learning_rate": 9.539185831127621e-05, + "loss": 2.5599, + "step": 10343 + }, + { + "epoch": 0.8347994512145912, + "grad_norm": 0.7205352783203125, + "learning_rate": 9.537608824163114e-05, + "loss": 2.5591, + "step": 10344 + }, + { + "epoch": 0.8348801549511743, + "grad_norm": 0.7794787287712097, + "learning_rate": 9.536031828722605e-05, + "loss": 2.5858, + "step": 10345 + }, + { + "epoch": 0.8349608586877573, + "grad_norm": 0.7129528522491455, + "learning_rate": 9.534454844845396e-05, + "loss": 2.5591, + "step": 10346 + }, + { + "epoch": 0.8350415624243402, + "grad_norm": 0.731038510799408, + "learning_rate": 9.532877872570787e-05, + "loss": 2.5774, + "step": 10347 + }, + { + "epoch": 0.8351222661609232, + "grad_norm": 0.7706510424613953, + "learning_rate": 9.531300911938087e-05, + "loss": 2.6102, + "step": 10348 + }, + { + "epoch": 0.8352029698975063, + "grad_norm": 0.6890363097190857, + "learning_rate": 9.52972396298659e-05, + "loss": 2.5393, + "step": 10349 + }, + { + "epoch": 0.8352836736340893, + "grad_norm": 0.6792402863502502, + "learning_rate": 9.528147025755601e-05, + "loss": 2.5607, + "step": 10350 + }, + { + "epoch": 0.8353643773706723, + "grad_norm": 0.7097377777099609, + "learning_rate": 9.526570100284422e-05, + "loss": 2.5681, + "step": 10351 + }, + { + "epoch": 0.8354450811072552, + "grad_norm": 0.7530940771102905, + "learning_rate": 9.524993186612353e-05, + "loss": 2.5405, + "step": 10352 + }, + { + "epoch": 0.8355257848438382, + "grad_norm": 0.714080810546875, + "learning_rate": 9.523416284778696e-05, + "loss": 2.5365, + "step": 10353 + }, + { + "epoch": 0.8356064885804213, + "grad_norm": 0.6745832562446594, + "learning_rate": 9.521839394822752e-05, + "loss": 2.5553, + "step": 10354 + }, + { + "epoch": 0.8356871923170043, + "grad_norm": 0.7163450121879578, + "learning_rate": 9.52026251678382e-05, + "loss": 2.5074, + "step": 10355 + }, + { + "epoch": 0.8357678960535873, + "grad_norm": 0.6876534223556519, + "learning_rate": 9.518685650701197e-05, + "loss": 2.5652, + "step": 10356 + }, + { + "epoch": 0.8358485997901702, + "grad_norm": 0.6424533128738403, + "learning_rate": 9.517108796614187e-05, + "loss": 2.4823, + "step": 10357 + }, + { + "epoch": 0.8359293035267533, + "grad_norm": 0.646802544593811, + "learning_rate": 9.515531954562094e-05, + "loss": 2.5602, + "step": 10358 + }, + { + "epoch": 0.8360100072633363, + "grad_norm": 0.7266993522644043, + "learning_rate": 9.513955124584205e-05, + "loss": 2.5384, + "step": 10359 + }, + { + "epoch": 0.8360907109999193, + "grad_norm": 0.7358742356300354, + "learning_rate": 9.512378306719826e-05, + "loss": 2.5798, + "step": 10360 + }, + { + "epoch": 0.8361714147365022, + "grad_norm": 0.7191498279571533, + "learning_rate": 9.510801501008256e-05, + "loss": 2.5229, + "step": 10361 + }, + { + "epoch": 0.8362521184730853, + "grad_norm": 0.7058876156806946, + "learning_rate": 9.509224707488788e-05, + "loss": 2.5146, + "step": 10362 + }, + { + "epoch": 0.8363328222096683, + "grad_norm": 0.7348346710205078, + "learning_rate": 9.507647926200725e-05, + "loss": 2.5878, + "step": 10363 + }, + { + "epoch": 0.8364135259462513, + "grad_norm": 0.7464115619659424, + "learning_rate": 9.506071157183366e-05, + "loss": 2.6056, + "step": 10364 + }, + { + "epoch": 0.8364942296828343, + "grad_norm": 0.7077332139015198, + "learning_rate": 9.504494400476e-05, + "loss": 2.5161, + "step": 10365 + }, + { + "epoch": 0.8365749334194174, + "grad_norm": 0.7381827235221863, + "learning_rate": 9.502917656117928e-05, + "loss": 2.519, + "step": 10366 + }, + { + "epoch": 0.8366556371560003, + "grad_norm": 0.743180513381958, + "learning_rate": 9.501340924148452e-05, + "loss": 2.6149, + "step": 10367 + }, + { + "epoch": 0.8367363408925833, + "grad_norm": 0.6496078372001648, + "learning_rate": 9.499764204606863e-05, + "loss": 2.4969, + "step": 10368 + }, + { + "epoch": 0.8368170446291663, + "grad_norm": 0.6796541810035706, + "learning_rate": 9.498187497532454e-05, + "loss": 2.5304, + "step": 10369 + }, + { + "epoch": 0.8368977483657494, + "grad_norm": 0.6555948853492737, + "learning_rate": 9.496610802964529e-05, + "loss": 2.6029, + "step": 10370 + }, + { + "epoch": 0.8369784521023323, + "grad_norm": 0.6990405321121216, + "learning_rate": 9.495034120942374e-05, + "loss": 2.5286, + "step": 10371 + }, + { + "epoch": 0.8370591558389153, + "grad_norm": 0.7417613863945007, + "learning_rate": 9.49345745150529e-05, + "loss": 2.5301, + "step": 10372 + }, + { + "epoch": 0.8371398595754983, + "grad_norm": 0.6809872388839722, + "learning_rate": 9.49188079469257e-05, + "loss": 2.5075, + "step": 10373 + }, + { + "epoch": 0.8372205633120814, + "grad_norm": 0.6537099480628967, + "learning_rate": 9.490304150543514e-05, + "loss": 2.5515, + "step": 10374 + }, + { + "epoch": 0.8373012670486644, + "grad_norm": 0.6660431027412415, + "learning_rate": 9.488727519097407e-05, + "loss": 2.549, + "step": 10375 + }, + { + "epoch": 0.8373819707852473, + "grad_norm": 0.7257838249206543, + "learning_rate": 9.487150900393546e-05, + "loss": 2.546, + "step": 10376 + }, + { + "epoch": 0.8374626745218303, + "grad_norm": 0.742085874080658, + "learning_rate": 9.485574294471226e-05, + "loss": 2.5302, + "step": 10377 + }, + { + "epoch": 0.8375433782584134, + "grad_norm": 0.659934401512146, + "learning_rate": 9.48399770136974e-05, + "loss": 2.5553, + "step": 10378 + }, + { + "epoch": 0.8376240819949964, + "grad_norm": 0.7219613790512085, + "learning_rate": 9.482421121128377e-05, + "loss": 2.6186, + "step": 10379 + }, + { + "epoch": 0.8377047857315794, + "grad_norm": 0.706444263458252, + "learning_rate": 9.480844553786436e-05, + "loss": 2.5082, + "step": 10380 + }, + { + "epoch": 0.8377854894681623, + "grad_norm": 0.7527014017105103, + "learning_rate": 9.479267999383204e-05, + "loss": 2.5625, + "step": 10381 + }, + { + "epoch": 0.8378661932047454, + "grad_norm": 0.7488746643066406, + "learning_rate": 9.477691457957976e-05, + "loss": 2.528, + "step": 10382 + }, + { + "epoch": 0.8379468969413284, + "grad_norm": 0.7394229173660278, + "learning_rate": 9.476114929550045e-05, + "loss": 2.5387, + "step": 10383 + }, + { + "epoch": 0.8380276006779114, + "grad_norm": 0.7490981817245483, + "learning_rate": 9.474538414198695e-05, + "loss": 2.548, + "step": 10384 + }, + { + "epoch": 0.8381083044144944, + "grad_norm": 0.7203173041343689, + "learning_rate": 9.472961911943222e-05, + "loss": 2.5547, + "step": 10385 + }, + { + "epoch": 0.8381890081510774, + "grad_norm": 0.6929850578308105, + "learning_rate": 9.471385422822917e-05, + "loss": 2.4831, + "step": 10386 + }, + { + "epoch": 0.8382697118876604, + "grad_norm": 0.6303263902664185, + "learning_rate": 9.469808946877067e-05, + "loss": 2.4569, + "step": 10387 + }, + { + "epoch": 0.8383504156242434, + "grad_norm": 0.6986981630325317, + "learning_rate": 9.468232484144964e-05, + "loss": 2.5278, + "step": 10388 + }, + { + "epoch": 0.8384311193608264, + "grad_norm": 0.6910964846611023, + "learning_rate": 9.466656034665898e-05, + "loss": 2.5657, + "step": 10389 + }, + { + "epoch": 0.8385118230974095, + "grad_norm": 0.6571134924888611, + "learning_rate": 9.465079598479163e-05, + "loss": 2.6017, + "step": 10390 + }, + { + "epoch": 0.8385925268339924, + "grad_norm": 0.7117733359336853, + "learning_rate": 9.463503175624034e-05, + "loss": 2.56, + "step": 10391 + }, + { + "epoch": 0.8386732305705754, + "grad_norm": 0.7052998542785645, + "learning_rate": 9.461926766139813e-05, + "loss": 2.4998, + "step": 10392 + }, + { + "epoch": 0.8387539343071584, + "grad_norm": 0.7306597232818604, + "learning_rate": 9.460350370065786e-05, + "loss": 2.5292, + "step": 10393 + }, + { + "epoch": 0.8388346380437415, + "grad_norm": 0.681069552898407, + "learning_rate": 9.458773987441235e-05, + "loss": 2.5469, + "step": 10394 + }, + { + "epoch": 0.8389153417803245, + "grad_norm": 0.6681767702102661, + "learning_rate": 9.45719761830545e-05, + "loss": 2.5476, + "step": 10395 + }, + { + "epoch": 0.8389960455169074, + "grad_norm": 0.6759339570999146, + "learning_rate": 9.455621262697723e-05, + "loss": 2.4806, + "step": 10396 + }, + { + "epoch": 0.8390767492534904, + "grad_norm": 0.695829451084137, + "learning_rate": 9.454044920657333e-05, + "loss": 2.5255, + "step": 10397 + }, + { + "epoch": 0.8391574529900735, + "grad_norm": 0.686568558216095, + "learning_rate": 9.452468592223572e-05, + "loss": 2.5655, + "step": 10398 + }, + { + "epoch": 0.8392381567266565, + "grad_norm": 0.6529035568237305, + "learning_rate": 9.45089227743573e-05, + "loss": 2.5026, + "step": 10399 + }, + { + "epoch": 0.8393188604632394, + "grad_norm": 0.6809061765670776, + "learning_rate": 9.449315976333082e-05, + "loss": 2.5549, + "step": 10400 + }, + { + "epoch": 0.8393995641998224, + "grad_norm": 0.6920269727706909, + "learning_rate": 9.447739688954919e-05, + "loss": 2.517, + "step": 10401 + }, + { + "epoch": 0.8394802679364055, + "grad_norm": 0.6626712083816528, + "learning_rate": 9.446163415340526e-05, + "loss": 2.605, + "step": 10402 + }, + { + "epoch": 0.8395609716729885, + "grad_norm": 0.6912916898727417, + "learning_rate": 9.444587155529195e-05, + "loss": 2.588, + "step": 10403 + }, + { + "epoch": 0.8396416754095715, + "grad_norm": 0.6771352291107178, + "learning_rate": 9.443010909560198e-05, + "loss": 2.5148, + "step": 10404 + }, + { + "epoch": 0.8397223791461544, + "grad_norm": 0.7015509009361267, + "learning_rate": 9.441434677472827e-05, + "loss": 2.5425, + "step": 10405 + }, + { + "epoch": 0.8398030828827374, + "grad_norm": 0.6789976358413696, + "learning_rate": 9.439858459306364e-05, + "loss": 2.598, + "step": 10406 + }, + { + "epoch": 0.8398837866193205, + "grad_norm": 0.674391508102417, + "learning_rate": 9.438282255100091e-05, + "loss": 2.5581, + "step": 10407 + }, + { + "epoch": 0.8399644903559035, + "grad_norm": 0.6944772005081177, + "learning_rate": 9.436706064893294e-05, + "loss": 2.5591, + "step": 10408 + }, + { + "epoch": 0.8400451940924865, + "grad_norm": 0.6750832200050354, + "learning_rate": 9.435129888725259e-05, + "loss": 2.533, + "step": 10409 + }, + { + "epoch": 0.8401258978290694, + "grad_norm": 0.6927465200424194, + "learning_rate": 9.433553726635257e-05, + "loss": 2.536, + "step": 10410 + }, + { + "epoch": 0.8402066015656525, + "grad_norm": 0.6399651765823364, + "learning_rate": 9.431977578662578e-05, + "loss": 2.5123, + "step": 10411 + }, + { + "epoch": 0.8402873053022355, + "grad_norm": 0.7588143944740295, + "learning_rate": 9.430401444846505e-05, + "loss": 2.6133, + "step": 10412 + }, + { + "epoch": 0.8403680090388185, + "grad_norm": 0.8010972738265991, + "learning_rate": 9.428825325226313e-05, + "loss": 2.5407, + "step": 10413 + }, + { + "epoch": 0.8404487127754015, + "grad_norm": 0.6847307085990906, + "learning_rate": 9.427249219841288e-05, + "loss": 2.5912, + "step": 10414 + }, + { + "epoch": 0.8405294165119845, + "grad_norm": 0.7005963325500488, + "learning_rate": 9.425673128730716e-05, + "loss": 2.5059, + "step": 10415 + }, + { + "epoch": 0.8406101202485675, + "grad_norm": 0.7383962273597717, + "learning_rate": 9.424097051933862e-05, + "loss": 2.5157, + "step": 10416 + }, + { + "epoch": 0.8406908239851505, + "grad_norm": 0.7078843712806702, + "learning_rate": 9.422520989490018e-05, + "loss": 2.6093, + "step": 10417 + }, + { + "epoch": 0.8407715277217335, + "grad_norm": 0.7449501752853394, + "learning_rate": 9.42094494143846e-05, + "loss": 2.594, + "step": 10418 + }, + { + "epoch": 0.8408522314583166, + "grad_norm": 0.6823872923851013, + "learning_rate": 9.419368907818473e-05, + "loss": 2.5653, + "step": 10419 + }, + { + "epoch": 0.8409329351948995, + "grad_norm": 0.7403056025505066, + "learning_rate": 9.417792888669325e-05, + "loss": 2.5296, + "step": 10420 + }, + { + "epoch": 0.8410136389314825, + "grad_norm": 0.6858980655670166, + "learning_rate": 9.4162168840303e-05, + "loss": 2.5401, + "step": 10421 + }, + { + "epoch": 0.8410943426680655, + "grad_norm": 0.692348837852478, + "learning_rate": 9.41464089394068e-05, + "loss": 2.4797, + "step": 10422 + }, + { + "epoch": 0.8411750464046486, + "grad_norm": 0.6939836144447327, + "learning_rate": 9.413064918439736e-05, + "loss": 2.505, + "step": 10423 + }, + { + "epoch": 0.8412557501412316, + "grad_norm": 0.7334314584732056, + "learning_rate": 9.411488957566748e-05, + "loss": 2.5792, + "step": 10424 + }, + { + "epoch": 0.8413364538778145, + "grad_norm": 0.6977920532226562, + "learning_rate": 9.409913011360999e-05, + "loss": 2.5204, + "step": 10425 + }, + { + "epoch": 0.8414171576143975, + "grad_norm": 0.7121822834014893, + "learning_rate": 9.408337079861756e-05, + "loss": 2.571, + "step": 10426 + }, + { + "epoch": 0.8414978613509806, + "grad_norm": 0.761476993560791, + "learning_rate": 9.406761163108297e-05, + "loss": 2.5845, + "step": 10427 + }, + { + "epoch": 0.8415785650875636, + "grad_norm": 0.7160221934318542, + "learning_rate": 9.405185261139906e-05, + "loss": 2.5331, + "step": 10428 + }, + { + "epoch": 0.8416592688241465, + "grad_norm": 0.6828827857971191, + "learning_rate": 9.40360937399585e-05, + "loss": 2.5596, + "step": 10429 + }, + { + "epoch": 0.8417399725607295, + "grad_norm": 0.756473183631897, + "learning_rate": 9.402033501715406e-05, + "loss": 2.6107, + "step": 10430 + }, + { + "epoch": 0.8418206762973126, + "grad_norm": 0.7486895322799683, + "learning_rate": 9.400457644337853e-05, + "loss": 2.5388, + "step": 10431 + }, + { + "epoch": 0.8419013800338956, + "grad_norm": 0.7759146690368652, + "learning_rate": 9.398881801902461e-05, + "loss": 2.5559, + "step": 10432 + }, + { + "epoch": 0.8419820837704786, + "grad_norm": 0.71756911277771, + "learning_rate": 9.397305974448506e-05, + "loss": 2.6109, + "step": 10433 + }, + { + "epoch": 0.8420627875070615, + "grad_norm": 0.7741644382476807, + "learning_rate": 9.395730162015261e-05, + "loss": 2.5664, + "step": 10434 + }, + { + "epoch": 0.8421434912436446, + "grad_norm": 0.7155938744544983, + "learning_rate": 9.394154364642006e-05, + "loss": 2.5693, + "step": 10435 + }, + { + "epoch": 0.8422241949802276, + "grad_norm": 0.6862725019454956, + "learning_rate": 9.392578582368002e-05, + "loss": 2.4942, + "step": 10436 + }, + { + "epoch": 0.8423048987168106, + "grad_norm": 0.6698417067527771, + "learning_rate": 9.391002815232528e-05, + "loss": 2.5258, + "step": 10437 + }, + { + "epoch": 0.8423856024533936, + "grad_norm": 0.7756468057632446, + "learning_rate": 9.389427063274858e-05, + "loss": 2.5008, + "step": 10438 + }, + { + "epoch": 0.8424663061899766, + "grad_norm": 0.6579857468605042, + "learning_rate": 9.387851326534259e-05, + "loss": 2.5335, + "step": 10439 + }, + { + "epoch": 0.8425470099265596, + "grad_norm": 0.7673436403274536, + "learning_rate": 9.386275605050006e-05, + "loss": 2.5646, + "step": 10440 + }, + { + "epoch": 0.8426277136631426, + "grad_norm": 0.7377188205718994, + "learning_rate": 9.384699898861372e-05, + "loss": 2.568, + "step": 10441 + }, + { + "epoch": 0.8427084173997256, + "grad_norm": 0.6502123475074768, + "learning_rate": 9.38312420800762e-05, + "loss": 2.6091, + "step": 10442 + }, + { + "epoch": 0.8427891211363087, + "grad_norm": 0.729852020740509, + "learning_rate": 9.381548532528026e-05, + "loss": 2.4873, + "step": 10443 + }, + { + "epoch": 0.8428698248728916, + "grad_norm": 0.7419102191925049, + "learning_rate": 9.379972872461865e-05, + "loss": 2.4966, + "step": 10444 + }, + { + "epoch": 0.8429505286094746, + "grad_norm": 0.6921093463897705, + "learning_rate": 9.378397227848395e-05, + "loss": 2.4895, + "step": 10445 + }, + { + "epoch": 0.8430312323460576, + "grad_norm": 0.7697325944900513, + "learning_rate": 9.376821598726892e-05, + "loss": 2.5779, + "step": 10446 + }, + { + "epoch": 0.8431119360826407, + "grad_norm": 0.6441029906272888, + "learning_rate": 9.375245985136626e-05, + "loss": 2.4909, + "step": 10447 + }, + { + "epoch": 0.8431926398192237, + "grad_norm": 0.6962057948112488, + "learning_rate": 9.373670387116861e-05, + "loss": 2.5602, + "step": 10448 + }, + { + "epoch": 0.8432733435558066, + "grad_norm": 0.7030641436576843, + "learning_rate": 9.372094804706867e-05, + "loss": 2.5641, + "step": 10449 + }, + { + "epoch": 0.8433540472923896, + "grad_norm": 0.6969063878059387, + "learning_rate": 9.370519237945912e-05, + "loss": 2.5555, + "step": 10450 + }, + { + "epoch": 0.8434347510289727, + "grad_norm": 0.7169879674911499, + "learning_rate": 9.368943686873267e-05, + "loss": 2.5258, + "step": 10451 + }, + { + "epoch": 0.8435154547655557, + "grad_norm": 0.7198735475540161, + "learning_rate": 9.36736815152819e-05, + "loss": 2.5192, + "step": 10452 + }, + { + "epoch": 0.8435961585021386, + "grad_norm": 0.6613535284996033, + "learning_rate": 9.365792631949951e-05, + "loss": 2.5596, + "step": 10453 + }, + { + "epoch": 0.8436768622387216, + "grad_norm": 0.6377065777778625, + "learning_rate": 9.364217128177824e-05, + "loss": 2.5518, + "step": 10454 + }, + { + "epoch": 0.8437575659753046, + "grad_norm": 0.6670635938644409, + "learning_rate": 9.362641640251063e-05, + "loss": 2.4793, + "step": 10455 + }, + { + "epoch": 0.8438382697118877, + "grad_norm": 0.6556122899055481, + "learning_rate": 9.361066168208939e-05, + "loss": 2.5492, + "step": 10456 + }, + { + "epoch": 0.8439189734484707, + "grad_norm": 0.7262280583381653, + "learning_rate": 9.35949071209072e-05, + "loss": 2.6059, + "step": 10457 + }, + { + "epoch": 0.8439996771850536, + "grad_norm": 0.702953040599823, + "learning_rate": 9.357915271935662e-05, + "loss": 2.5445, + "step": 10458 + }, + { + "epoch": 0.8440803809216366, + "grad_norm": 0.6619930267333984, + "learning_rate": 9.356339847783036e-05, + "loss": 2.5688, + "step": 10459 + }, + { + "epoch": 0.8441610846582197, + "grad_norm": 0.7038032412528992, + "learning_rate": 9.354764439672106e-05, + "loss": 2.5195, + "step": 10460 + }, + { + "epoch": 0.8442417883948027, + "grad_norm": 0.6615132689476013, + "learning_rate": 9.353189047642129e-05, + "loss": 2.5176, + "step": 10461 + }, + { + "epoch": 0.8443224921313857, + "grad_norm": 0.6524826288223267, + "learning_rate": 9.351613671732372e-05, + "loss": 2.4294, + "step": 10462 + }, + { + "epoch": 0.8444031958679686, + "grad_norm": 0.6526279449462891, + "learning_rate": 9.350038311982099e-05, + "loss": 2.595, + "step": 10463 + }, + { + "epoch": 0.8444838996045517, + "grad_norm": 0.6610859632492065, + "learning_rate": 9.348462968430569e-05, + "loss": 2.5311, + "step": 10464 + }, + { + "epoch": 0.8445646033411347, + "grad_norm": 0.6835470795631409, + "learning_rate": 9.346887641117045e-05, + "loss": 2.5694, + "step": 10465 + }, + { + "epoch": 0.8446453070777177, + "grad_norm": 0.6768551468849182, + "learning_rate": 9.345312330080787e-05, + "loss": 2.6082, + "step": 10466 + }, + { + "epoch": 0.8447260108143007, + "grad_norm": 0.6368672847747803, + "learning_rate": 9.343737035361059e-05, + "loss": 2.5221, + "step": 10467 + }, + { + "epoch": 0.8448067145508837, + "grad_norm": 0.6952844858169556, + "learning_rate": 9.34216175699712e-05, + "loss": 2.5003, + "step": 10468 + }, + { + "epoch": 0.8448874182874667, + "grad_norm": 0.6663931012153625, + "learning_rate": 9.340586495028227e-05, + "loss": 2.5469, + "step": 10469 + }, + { + "epoch": 0.8449681220240497, + "grad_norm": 0.6840688586235046, + "learning_rate": 9.339011249493647e-05, + "loss": 2.5499, + "step": 10470 + }, + { + "epoch": 0.8450488257606327, + "grad_norm": 0.6832869052886963, + "learning_rate": 9.337436020432632e-05, + "loss": 2.5492, + "step": 10471 + }, + { + "epoch": 0.8451295294972158, + "grad_norm": 0.7444044947624207, + "learning_rate": 9.335860807884442e-05, + "loss": 2.5791, + "step": 10472 + }, + { + "epoch": 0.8452102332337987, + "grad_norm": 0.6821839809417725, + "learning_rate": 9.334285611888339e-05, + "loss": 2.4772, + "step": 10473 + }, + { + "epoch": 0.8452909369703817, + "grad_norm": 0.6209141612052917, + "learning_rate": 9.332710432483577e-05, + "loss": 2.5656, + "step": 10474 + }, + { + "epoch": 0.8453716407069647, + "grad_norm": 0.6531212329864502, + "learning_rate": 9.331135269709415e-05, + "loss": 2.5285, + "step": 10475 + }, + { + "epoch": 0.8454523444435478, + "grad_norm": 0.6418079137802124, + "learning_rate": 9.329560123605115e-05, + "loss": 2.5503, + "step": 10476 + }, + { + "epoch": 0.8455330481801308, + "grad_norm": 0.6636360287666321, + "learning_rate": 9.327984994209924e-05, + "loss": 2.528, + "step": 10477 + }, + { + "epoch": 0.8456137519167137, + "grad_norm": 0.6196488738059998, + "learning_rate": 9.326409881563102e-05, + "loss": 2.4907, + "step": 10478 + }, + { + "epoch": 0.8456944556532967, + "grad_norm": 0.6339137554168701, + "learning_rate": 9.324834785703913e-05, + "loss": 2.4672, + "step": 10479 + }, + { + "epoch": 0.8457751593898798, + "grad_norm": 0.6803932189941406, + "learning_rate": 9.323259706671602e-05, + "loss": 2.5538, + "step": 10480 + }, + { + "epoch": 0.8458558631264628, + "grad_norm": 0.6815275549888611, + "learning_rate": 9.321684644505429e-05, + "loss": 2.5291, + "step": 10481 + }, + { + "epoch": 0.8459365668630457, + "grad_norm": 0.6497374773025513, + "learning_rate": 9.320109599244646e-05, + "loss": 2.5499, + "step": 10482 + }, + { + "epoch": 0.8460172705996287, + "grad_norm": 0.7966926097869873, + "learning_rate": 9.318534570928512e-05, + "loss": 2.523, + "step": 10483 + }, + { + "epoch": 0.8460979743362118, + "grad_norm": 0.6532156467437744, + "learning_rate": 9.316959559596276e-05, + "loss": 2.5138, + "step": 10484 + }, + { + "epoch": 0.8461786780727948, + "grad_norm": 0.7292522192001343, + "learning_rate": 9.315384565287193e-05, + "loss": 2.5413, + "step": 10485 + }, + { + "epoch": 0.8462593818093778, + "grad_norm": 0.7610795497894287, + "learning_rate": 9.313809588040519e-05, + "loss": 2.5071, + "step": 10486 + }, + { + "epoch": 0.8463400855459607, + "grad_norm": 0.7038258910179138, + "learning_rate": 9.312234627895502e-05, + "loss": 2.5568, + "step": 10487 + }, + { + "epoch": 0.8464207892825438, + "grad_norm": 0.7136046290397644, + "learning_rate": 9.310659684891395e-05, + "loss": 2.5372, + "step": 10488 + }, + { + "epoch": 0.8465014930191268, + "grad_norm": 0.7512896060943604, + "learning_rate": 9.309084759067452e-05, + "loss": 2.5821, + "step": 10489 + }, + { + "epoch": 0.8465821967557098, + "grad_norm": 0.7436400651931763, + "learning_rate": 9.307509850462922e-05, + "loss": 2.5489, + "step": 10490 + }, + { + "epoch": 0.8466629004922928, + "grad_norm": 0.6858603954315186, + "learning_rate": 9.305934959117056e-05, + "loss": 2.5622, + "step": 10491 + }, + { + "epoch": 0.8467436042288758, + "grad_norm": 0.707185685634613, + "learning_rate": 9.304360085069107e-05, + "loss": 2.5275, + "step": 10492 + }, + { + "epoch": 0.8468243079654588, + "grad_norm": 0.7207933068275452, + "learning_rate": 9.302785228358322e-05, + "loss": 2.5877, + "step": 10493 + }, + { + "epoch": 0.8469050117020418, + "grad_norm": 0.6470080614089966, + "learning_rate": 9.30121038902395e-05, + "loss": 2.5117, + "step": 10494 + }, + { + "epoch": 0.8469857154386248, + "grad_norm": 0.75248783826828, + "learning_rate": 9.299635567105247e-05, + "loss": 2.5259, + "step": 10495 + }, + { + "epoch": 0.8470664191752079, + "grad_norm": 0.7150708436965942, + "learning_rate": 9.298060762641452e-05, + "loss": 2.551, + "step": 10496 + }, + { + "epoch": 0.8471471229117908, + "grad_norm": 0.6865069270133972, + "learning_rate": 9.296485975671818e-05, + "loss": 2.5184, + "step": 10497 + }, + { + "epoch": 0.8472278266483738, + "grad_norm": 0.7188237309455872, + "learning_rate": 9.294911206235593e-05, + "loss": 2.5207, + "step": 10498 + }, + { + "epoch": 0.8473085303849568, + "grad_norm": 0.6907880902290344, + "learning_rate": 9.293336454372026e-05, + "loss": 2.5544, + "step": 10499 + }, + { + "epoch": 0.8473892341215399, + "grad_norm": 0.7626079320907593, + "learning_rate": 9.291761720120358e-05, + "loss": 2.5741, + "step": 10500 + }, + { + "epoch": 0.8474699378581229, + "grad_norm": 0.6731963753700256, + "learning_rate": 9.29018700351984e-05, + "loss": 2.5433, + "step": 10501 + }, + { + "epoch": 0.8475506415947058, + "grad_norm": 0.7256288528442383, + "learning_rate": 9.288612304609723e-05, + "loss": 2.5131, + "step": 10502 + }, + { + "epoch": 0.8476313453312888, + "grad_norm": 0.7129119634628296, + "learning_rate": 9.287037623429242e-05, + "loss": 2.5054, + "step": 10503 + }, + { + "epoch": 0.8477120490678719, + "grad_norm": 0.6711156964302063, + "learning_rate": 9.285462960017644e-05, + "loss": 2.5671, + "step": 10504 + }, + { + "epoch": 0.8477927528044549, + "grad_norm": 0.7268081903457642, + "learning_rate": 9.283888314414184e-05, + "loss": 2.5627, + "step": 10505 + }, + { + "epoch": 0.8478734565410379, + "grad_norm": 0.8635050058364868, + "learning_rate": 9.282313686658094e-05, + "loss": 2.517, + "step": 10506 + }, + { + "epoch": 0.8479541602776208, + "grad_norm": 0.7077138423919678, + "learning_rate": 9.280739076788624e-05, + "loss": 2.5551, + "step": 10507 + }, + { + "epoch": 0.8480348640142038, + "grad_norm": 0.6312204599380493, + "learning_rate": 9.279164484845018e-05, + "loss": 2.5329, + "step": 10508 + }, + { + "epoch": 0.8481155677507869, + "grad_norm": 0.6749829649925232, + "learning_rate": 9.277589910866516e-05, + "loss": 2.5092, + "step": 10509 + }, + { + "epoch": 0.8481962714873699, + "grad_norm": 0.753391683101654, + "learning_rate": 9.27601535489236e-05, + "loss": 2.6244, + "step": 10510 + }, + { + "epoch": 0.8482769752239528, + "grad_norm": 0.7230119109153748, + "learning_rate": 9.2744408169618e-05, + "loss": 2.5021, + "step": 10511 + }, + { + "epoch": 0.8483576789605358, + "grad_norm": 0.6759157776832581, + "learning_rate": 9.272866297114067e-05, + "loss": 2.5399, + "step": 10512 + }, + { + "epoch": 0.8484383826971189, + "grad_norm": 0.7049473524093628, + "learning_rate": 9.271291795388406e-05, + "loss": 2.5024, + "step": 10513 + }, + { + "epoch": 0.8485190864337019, + "grad_norm": 0.6579850912094116, + "learning_rate": 9.269717311824058e-05, + "loss": 2.5019, + "step": 10514 + }, + { + "epoch": 0.8485997901702849, + "grad_norm": 0.7091391086578369, + "learning_rate": 9.268142846460265e-05, + "loss": 2.5785, + "step": 10515 + }, + { + "epoch": 0.8486804939068678, + "grad_norm": 0.6612898707389832, + "learning_rate": 9.266568399336266e-05, + "loss": 2.5046, + "step": 10516 + }, + { + "epoch": 0.8487611976434509, + "grad_norm": 0.6348623633384705, + "learning_rate": 9.264993970491298e-05, + "loss": 2.543, + "step": 10517 + }, + { + "epoch": 0.8488419013800339, + "grad_norm": 0.688360869884491, + "learning_rate": 9.263419559964604e-05, + "loss": 2.5294, + "step": 10518 + }, + { + "epoch": 0.8489226051166169, + "grad_norm": 0.6483190059661865, + "learning_rate": 9.261845167795418e-05, + "loss": 2.5623, + "step": 10519 + }, + { + "epoch": 0.8490033088531999, + "grad_norm": 0.689379096031189, + "learning_rate": 9.26027079402298e-05, + "loss": 2.4871, + "step": 10520 + }, + { + "epoch": 0.8490840125897829, + "grad_norm": 0.6627655625343323, + "learning_rate": 9.25869643868653e-05, + "loss": 2.5353, + "step": 10521 + }, + { + "epoch": 0.8491647163263659, + "grad_norm": 0.6701192259788513, + "learning_rate": 9.2571221018253e-05, + "loss": 2.5003, + "step": 10522 + }, + { + "epoch": 0.8492454200629489, + "grad_norm": 0.7413944005966187, + "learning_rate": 9.255547783478529e-05, + "loss": 2.5473, + "step": 10523 + }, + { + "epoch": 0.8493261237995319, + "grad_norm": 0.6490365266799927, + "learning_rate": 9.253973483685455e-05, + "loss": 2.5168, + "step": 10524 + }, + { + "epoch": 0.849406827536115, + "grad_norm": 0.7303688526153564, + "learning_rate": 9.25239920248531e-05, + "loss": 2.5953, + "step": 10525 + }, + { + "epoch": 0.8494875312726979, + "grad_norm": 0.7132991552352905, + "learning_rate": 9.250824939917331e-05, + "loss": 2.475, + "step": 10526 + }, + { + "epoch": 0.8495682350092809, + "grad_norm": 0.6935676336288452, + "learning_rate": 9.249250696020753e-05, + "loss": 2.5212, + "step": 10527 + }, + { + "epoch": 0.8496489387458639, + "grad_norm": 0.732961118221283, + "learning_rate": 9.247676470834814e-05, + "loss": 2.5848, + "step": 10528 + }, + { + "epoch": 0.849729642482447, + "grad_norm": 0.6899160146713257, + "learning_rate": 9.246102264398739e-05, + "loss": 2.4551, + "step": 10529 + }, + { + "epoch": 0.84981034621903, + "grad_norm": 0.6941123604774475, + "learning_rate": 9.244528076751766e-05, + "loss": 2.5441, + "step": 10530 + }, + { + "epoch": 0.8498910499556129, + "grad_norm": 0.7351016998291016, + "learning_rate": 9.242953907933134e-05, + "loss": 2.6519, + "step": 10531 + }, + { + "epoch": 0.8499717536921959, + "grad_norm": 0.7156691551208496, + "learning_rate": 9.241379757982065e-05, + "loss": 2.573, + "step": 10532 + }, + { + "epoch": 0.850052457428779, + "grad_norm": 0.7137688994407654, + "learning_rate": 9.239805626937797e-05, + "loss": 2.5688, + "step": 10533 + }, + { + "epoch": 0.850133161165362, + "grad_norm": 0.7018687129020691, + "learning_rate": 9.238231514839559e-05, + "loss": 2.5725, + "step": 10534 + }, + { + "epoch": 0.850213864901945, + "grad_norm": 0.6723659634590149, + "learning_rate": 9.236657421726583e-05, + "loss": 2.5661, + "step": 10535 + }, + { + "epoch": 0.8502945686385279, + "grad_norm": 0.7105850577354431, + "learning_rate": 9.235083347638098e-05, + "loss": 2.5676, + "step": 10536 + }, + { + "epoch": 0.850375272375111, + "grad_norm": 0.682601809501648, + "learning_rate": 9.233509292613341e-05, + "loss": 2.5489, + "step": 10537 + }, + { + "epoch": 0.850455976111694, + "grad_norm": 0.6703988313674927, + "learning_rate": 9.231935256691531e-05, + "loss": 2.5349, + "step": 10538 + }, + { + "epoch": 0.850536679848277, + "grad_norm": 0.6430882215499878, + "learning_rate": 9.230361239911903e-05, + "loss": 2.4959, + "step": 10539 + }, + { + "epoch": 0.8506173835848599, + "grad_norm": 0.7164519429206848, + "learning_rate": 9.228787242313687e-05, + "loss": 2.4999, + "step": 10540 + }, + { + "epoch": 0.850698087321443, + "grad_norm": 0.7463028430938721, + "learning_rate": 9.227213263936107e-05, + "loss": 2.545, + "step": 10541 + }, + { + "epoch": 0.850778791058026, + "grad_norm": 0.650577187538147, + "learning_rate": 9.22563930481839e-05, + "loss": 2.5707, + "step": 10542 + }, + { + "epoch": 0.850859494794609, + "grad_norm": 0.6808211207389832, + "learning_rate": 9.224065364999768e-05, + "loss": 2.5236, + "step": 10543 + }, + { + "epoch": 0.850940198531192, + "grad_norm": 0.6947758793830872, + "learning_rate": 9.222491444519467e-05, + "loss": 2.555, + "step": 10544 + }, + { + "epoch": 0.851020902267775, + "grad_norm": 0.6805624961853027, + "learning_rate": 9.22091754341671e-05, + "loss": 2.517, + "step": 10545 + }, + { + "epoch": 0.851101606004358, + "grad_norm": 0.6645655035972595, + "learning_rate": 9.219343661730724e-05, + "loss": 2.5237, + "step": 10546 + }, + { + "epoch": 0.851182309740941, + "grad_norm": 0.6912586092948914, + "learning_rate": 9.217769799500738e-05, + "loss": 2.5345, + "step": 10547 + }, + { + "epoch": 0.851263013477524, + "grad_norm": 0.6713781356811523, + "learning_rate": 9.21619595676597e-05, + "loss": 2.56, + "step": 10548 + }, + { + "epoch": 0.8513437172141071, + "grad_norm": 0.7031502723693848, + "learning_rate": 9.214622133565648e-05, + "loss": 2.4885, + "step": 10549 + }, + { + "epoch": 0.85142442095069, + "grad_norm": 0.6616455316543579, + "learning_rate": 9.213048329938997e-05, + "loss": 2.5101, + "step": 10550 + }, + { + "epoch": 0.851505124687273, + "grad_norm": 0.711077094078064, + "learning_rate": 9.211474545925236e-05, + "loss": 2.6264, + "step": 10551 + }, + { + "epoch": 0.851585828423856, + "grad_norm": 0.7534502744674683, + "learning_rate": 9.209900781563592e-05, + "loss": 2.5417, + "step": 10552 + }, + { + "epoch": 0.8516665321604391, + "grad_norm": 0.7405222058296204, + "learning_rate": 9.208327036893288e-05, + "loss": 2.546, + "step": 10553 + }, + { + "epoch": 0.8517472358970221, + "grad_norm": 0.7014057040214539, + "learning_rate": 9.20675331195354e-05, + "loss": 2.5211, + "step": 10554 + }, + { + "epoch": 0.851827939633605, + "grad_norm": 0.6984074115753174, + "learning_rate": 9.205179606783573e-05, + "loss": 2.5181, + "step": 10555 + }, + { + "epoch": 0.851908643370188, + "grad_norm": 0.7312670350074768, + "learning_rate": 9.203605921422613e-05, + "loss": 2.5345, + "step": 10556 + }, + { + "epoch": 0.851989347106771, + "grad_norm": 0.6861104369163513, + "learning_rate": 9.202032255909871e-05, + "loss": 2.5426, + "step": 10557 + }, + { + "epoch": 0.8520700508433541, + "grad_norm": 0.6989030838012695, + "learning_rate": 9.200458610284571e-05, + "loss": 2.5221, + "step": 10558 + }, + { + "epoch": 0.852150754579937, + "grad_norm": 0.6645115613937378, + "learning_rate": 9.198884984585932e-05, + "loss": 2.4755, + "step": 10559 + }, + { + "epoch": 0.85223145831652, + "grad_norm": 0.6577785015106201, + "learning_rate": 9.197311378853176e-05, + "loss": 2.5491, + "step": 10560 + }, + { + "epoch": 0.852312162053103, + "grad_norm": 0.7311568856239319, + "learning_rate": 9.195737793125517e-05, + "loss": 2.5653, + "step": 10561 + }, + { + "epoch": 0.8523928657896861, + "grad_norm": 0.6469970345497131, + "learning_rate": 9.194164227442174e-05, + "loss": 2.5384, + "step": 10562 + }, + { + "epoch": 0.8524735695262691, + "grad_norm": 0.6562933325767517, + "learning_rate": 9.19259068184237e-05, + "loss": 2.5644, + "step": 10563 + }, + { + "epoch": 0.852554273262852, + "grad_norm": 0.7740273475646973, + "learning_rate": 9.19101715636531e-05, + "loss": 2.5868, + "step": 10564 + }, + { + "epoch": 0.852634976999435, + "grad_norm": 0.6461195349693298, + "learning_rate": 9.18944365105022e-05, + "loss": 2.4862, + "step": 10565 + }, + { + "epoch": 0.8527156807360181, + "grad_norm": 0.7230537533760071, + "learning_rate": 9.187870165936313e-05, + "loss": 2.5125, + "step": 10566 + }, + { + "epoch": 0.8527963844726011, + "grad_norm": 0.6858233213424683, + "learning_rate": 9.186296701062805e-05, + "loss": 2.5463, + "step": 10567 + }, + { + "epoch": 0.8528770882091841, + "grad_norm": 0.717407763004303, + "learning_rate": 9.184723256468908e-05, + "loss": 2.5399, + "step": 10568 + }, + { + "epoch": 0.852957791945767, + "grad_norm": 0.7537745237350464, + "learning_rate": 9.18314983219384e-05, + "loss": 2.5164, + "step": 10569 + }, + { + "epoch": 0.8530384956823501, + "grad_norm": 0.7068665027618408, + "learning_rate": 9.181576428276814e-05, + "loss": 2.5747, + "step": 10570 + }, + { + "epoch": 0.8531191994189331, + "grad_norm": 0.8013456463813782, + "learning_rate": 9.18000304475704e-05, + "loss": 2.5401, + "step": 10571 + }, + { + "epoch": 0.8531999031555161, + "grad_norm": 0.6458969712257385, + "learning_rate": 9.178429681673741e-05, + "loss": 2.4781, + "step": 10572 + }, + { + "epoch": 0.8532806068920991, + "grad_norm": 0.7235112190246582, + "learning_rate": 9.176856339066114e-05, + "loss": 2.5753, + "step": 10573 + }, + { + "epoch": 0.8533613106286821, + "grad_norm": 0.6815706491470337, + "learning_rate": 9.175283016973382e-05, + "loss": 2.5526, + "step": 10574 + }, + { + "epoch": 0.8534420143652651, + "grad_norm": 0.739747166633606, + "learning_rate": 9.173709715434751e-05, + "loss": 2.5631, + "step": 10575 + }, + { + "epoch": 0.8535227181018481, + "grad_norm": 0.7325060963630676, + "learning_rate": 9.172136434489437e-05, + "loss": 2.4925, + "step": 10576 + }, + { + "epoch": 0.8536034218384311, + "grad_norm": 0.6505454182624817, + "learning_rate": 9.170563174176645e-05, + "loss": 2.5423, + "step": 10577 + }, + { + "epoch": 0.8536841255750142, + "grad_norm": 0.7267098426818848, + "learning_rate": 9.168989934535586e-05, + "loss": 2.5687, + "step": 10578 + }, + { + "epoch": 0.8537648293115971, + "grad_norm": 0.7264497876167297, + "learning_rate": 9.167416715605476e-05, + "loss": 2.5165, + "step": 10579 + }, + { + "epoch": 0.8538455330481801, + "grad_norm": 0.7473852634429932, + "learning_rate": 9.165843517425509e-05, + "loss": 2.5837, + "step": 10580 + }, + { + "epoch": 0.8539262367847631, + "grad_norm": 0.7249133586883545, + "learning_rate": 9.164270340034906e-05, + "loss": 2.5805, + "step": 10581 + }, + { + "epoch": 0.8540069405213462, + "grad_norm": 0.7463760375976562, + "learning_rate": 9.162697183472875e-05, + "loss": 2.5067, + "step": 10582 + }, + { + "epoch": 0.8540876442579292, + "grad_norm": 0.7125511169433594, + "learning_rate": 9.161124047778614e-05, + "loss": 2.5093, + "step": 10583 + }, + { + "epoch": 0.8541683479945121, + "grad_norm": 0.7247455716133118, + "learning_rate": 9.159550932991335e-05, + "loss": 2.5356, + "step": 10584 + }, + { + "epoch": 0.8542490517310951, + "grad_norm": 0.7593860030174255, + "learning_rate": 9.157977839150246e-05, + "loss": 2.5477, + "step": 10585 + }, + { + "epoch": 0.8543297554676782, + "grad_norm": 0.6758295297622681, + "learning_rate": 9.156404766294547e-05, + "loss": 2.4748, + "step": 10586 + }, + { + "epoch": 0.8544104592042612, + "grad_norm": 0.7114073634147644, + "learning_rate": 9.154831714463447e-05, + "loss": 2.5479, + "step": 10587 + }, + { + "epoch": 0.8544911629408442, + "grad_norm": 0.6881263256072998, + "learning_rate": 9.153258683696156e-05, + "loss": 2.5471, + "step": 10588 + }, + { + "epoch": 0.8545718666774271, + "grad_norm": 0.6509317755699158, + "learning_rate": 9.151685674031866e-05, + "loss": 2.5239, + "step": 10589 + }, + { + "epoch": 0.8546525704140102, + "grad_norm": 0.7754644751548767, + "learning_rate": 9.150112685509787e-05, + "loss": 2.5572, + "step": 10590 + }, + { + "epoch": 0.8547332741505932, + "grad_norm": 0.707080602645874, + "learning_rate": 9.148539718169118e-05, + "loss": 2.5572, + "step": 10591 + }, + { + "epoch": 0.8548139778871762, + "grad_norm": 0.6996685266494751, + "learning_rate": 9.146966772049073e-05, + "loss": 2.4968, + "step": 10592 + }, + { + "epoch": 0.8548946816237591, + "grad_norm": 0.6830589771270752, + "learning_rate": 9.145393847188841e-05, + "loss": 2.5795, + "step": 10593 + }, + { + "epoch": 0.8549753853603422, + "grad_norm": 0.7507784366607666, + "learning_rate": 9.143820943627628e-05, + "loss": 2.6135, + "step": 10594 + }, + { + "epoch": 0.8550560890969252, + "grad_norm": 0.673218309879303, + "learning_rate": 9.142248061404638e-05, + "loss": 2.5875, + "step": 10595 + }, + { + "epoch": 0.8551367928335082, + "grad_norm": 0.6861804723739624, + "learning_rate": 9.140675200559065e-05, + "loss": 2.5892, + "step": 10596 + }, + { + "epoch": 0.8552174965700912, + "grad_norm": 0.6928709149360657, + "learning_rate": 9.139102361130114e-05, + "loss": 2.5303, + "step": 10597 + }, + { + "epoch": 0.8552982003066743, + "grad_norm": 0.6958343386650085, + "learning_rate": 9.137529543156986e-05, + "loss": 2.5567, + "step": 10598 + }, + { + "epoch": 0.8553789040432572, + "grad_norm": 0.703845739364624, + "learning_rate": 9.135956746678873e-05, + "loss": 2.5215, + "step": 10599 + }, + { + "epoch": 0.8554596077798402, + "grad_norm": 0.7108649015426636, + "learning_rate": 9.134383971734975e-05, + "loss": 2.5687, + "step": 10600 + }, + { + "epoch": 0.8555403115164232, + "grad_norm": 0.7249850034713745, + "learning_rate": 9.132811218364495e-05, + "loss": 2.565, + "step": 10601 + }, + { + "epoch": 0.8556210152530063, + "grad_norm": 0.7060014009475708, + "learning_rate": 9.131238486606623e-05, + "loss": 2.5366, + "step": 10602 + }, + { + "epoch": 0.8557017189895892, + "grad_norm": 0.6915088891983032, + "learning_rate": 9.129665776500559e-05, + "loss": 2.527, + "step": 10603 + }, + { + "epoch": 0.8557824227261722, + "grad_norm": 0.7226938605308533, + "learning_rate": 9.128093088085503e-05, + "loss": 2.5999, + "step": 10604 + }, + { + "epoch": 0.8558631264627552, + "grad_norm": 0.6802428364753723, + "learning_rate": 9.126520421400641e-05, + "loss": 2.4788, + "step": 10605 + }, + { + "epoch": 0.8559438301993383, + "grad_norm": 0.7855350375175476, + "learning_rate": 9.124947776485175e-05, + "loss": 2.5349, + "step": 10606 + }, + { + "epoch": 0.8560245339359213, + "grad_norm": 0.6758337020874023, + "learning_rate": 9.123375153378296e-05, + "loss": 2.5874, + "step": 10607 + }, + { + "epoch": 0.8561052376725042, + "grad_norm": 0.675061821937561, + "learning_rate": 9.121802552119206e-05, + "loss": 2.5343, + "step": 10608 + }, + { + "epoch": 0.8561859414090872, + "grad_norm": 0.7044726014137268, + "learning_rate": 9.120229972747087e-05, + "loss": 2.5361, + "step": 10609 + }, + { + "epoch": 0.8562666451456702, + "grad_norm": 0.6324402689933777, + "learning_rate": 9.118657415301137e-05, + "loss": 2.5039, + "step": 10610 + }, + { + "epoch": 0.8563473488822533, + "grad_norm": 0.6621509790420532, + "learning_rate": 9.11708487982055e-05, + "loss": 2.5346, + "step": 10611 + }, + { + "epoch": 0.8564280526188363, + "grad_norm": 0.6709887981414795, + "learning_rate": 9.115512366344516e-05, + "loss": 2.5409, + "step": 10612 + }, + { + "epoch": 0.8565087563554192, + "grad_norm": 0.7237712740898132, + "learning_rate": 9.113939874912223e-05, + "loss": 2.5051, + "step": 10613 + }, + { + "epoch": 0.8565894600920022, + "grad_norm": 0.6646109223365784, + "learning_rate": 9.11236740556287e-05, + "loss": 2.5866, + "step": 10614 + }, + { + "epoch": 0.8566701638285853, + "grad_norm": 0.7131930589675903, + "learning_rate": 9.110794958335637e-05, + "loss": 2.5472, + "step": 10615 + }, + { + "epoch": 0.8567508675651683, + "grad_norm": 0.6662428975105286, + "learning_rate": 9.109222533269715e-05, + "loss": 2.4863, + "step": 10616 + }, + { + "epoch": 0.8568315713017512, + "grad_norm": 0.6527226567268372, + "learning_rate": 9.107650130404304e-05, + "loss": 2.5594, + "step": 10617 + }, + { + "epoch": 0.8569122750383342, + "grad_norm": 0.6639060378074646, + "learning_rate": 9.106077749778578e-05, + "loss": 2.5519, + "step": 10618 + }, + { + "epoch": 0.8569929787749173, + "grad_norm": 0.7088096737861633, + "learning_rate": 9.104505391431734e-05, + "loss": 2.5404, + "step": 10619 + }, + { + "epoch": 0.8570736825115003, + "grad_norm": 0.7155873775482178, + "learning_rate": 9.102933055402957e-05, + "loss": 2.5636, + "step": 10620 + }, + { + "epoch": 0.8571543862480833, + "grad_norm": 0.6522316932678223, + "learning_rate": 9.101360741731431e-05, + "loss": 2.5216, + "step": 10621 + }, + { + "epoch": 0.8572350899846662, + "grad_norm": 0.6515649557113647, + "learning_rate": 9.099788450456345e-05, + "loss": 2.5804, + "step": 10622 + }, + { + "epoch": 0.8573157937212493, + "grad_norm": 0.6791853904724121, + "learning_rate": 9.098216181616883e-05, + "loss": 2.5353, + "step": 10623 + }, + { + "epoch": 0.8573964974578323, + "grad_norm": 0.6946877241134644, + "learning_rate": 9.096643935252236e-05, + "loss": 2.5492, + "step": 10624 + }, + { + "epoch": 0.8574772011944153, + "grad_norm": 0.7235898375511169, + "learning_rate": 9.095071711401581e-05, + "loss": 2.5178, + "step": 10625 + }, + { + "epoch": 0.8575579049309983, + "grad_norm": 0.6740610003471375, + "learning_rate": 9.093499510104102e-05, + "loss": 2.5699, + "step": 10626 + }, + { + "epoch": 0.8576386086675813, + "grad_norm": 0.7441792488098145, + "learning_rate": 9.091927331398988e-05, + "loss": 2.579, + "step": 10627 + }, + { + "epoch": 0.8577193124041643, + "grad_norm": 0.6986937522888184, + "learning_rate": 9.090355175325416e-05, + "loss": 2.5556, + "step": 10628 + }, + { + "epoch": 0.8578000161407473, + "grad_norm": 0.6960151791572571, + "learning_rate": 9.08878304192257e-05, + "loss": 2.5448, + "step": 10629 + }, + { + "epoch": 0.8578807198773303, + "grad_norm": 0.6376819014549255, + "learning_rate": 9.087210931229636e-05, + "loss": 2.4636, + "step": 10630 + }, + { + "epoch": 0.8579614236139134, + "grad_norm": 0.752473771572113, + "learning_rate": 9.08563884328579e-05, + "loss": 2.5451, + "step": 10631 + }, + { + "epoch": 0.8580421273504963, + "grad_norm": 0.6879361867904663, + "learning_rate": 9.084066778130213e-05, + "loss": 2.5365, + "step": 10632 + }, + { + "epoch": 0.8581228310870793, + "grad_norm": 0.6630483865737915, + "learning_rate": 9.082494735802091e-05, + "loss": 2.5085, + "step": 10633 + }, + { + "epoch": 0.8582035348236623, + "grad_norm": 0.689602792263031, + "learning_rate": 9.080922716340594e-05, + "loss": 2.5087, + "step": 10634 + }, + { + "epoch": 0.8582842385602454, + "grad_norm": 0.7333599925041199, + "learning_rate": 9.079350719784905e-05, + "loss": 2.5476, + "step": 10635 + }, + { + "epoch": 0.8583649422968284, + "grad_norm": 0.6895802021026611, + "learning_rate": 9.077778746174204e-05, + "loss": 2.5099, + "step": 10636 + }, + { + "epoch": 0.8584456460334113, + "grad_norm": 0.7202162146568298, + "learning_rate": 9.076206795547668e-05, + "loss": 2.5197, + "step": 10637 + }, + { + "epoch": 0.8585263497699943, + "grad_norm": 0.6454200148582458, + "learning_rate": 9.074634867944472e-05, + "loss": 2.5303, + "step": 10638 + }, + { + "epoch": 0.8586070535065774, + "grad_norm": 0.6842506527900696, + "learning_rate": 9.073062963403795e-05, + "loss": 2.5051, + "step": 10639 + }, + { + "epoch": 0.8586877572431604, + "grad_norm": 0.6979129314422607, + "learning_rate": 9.071491081964815e-05, + "loss": 2.5209, + "step": 10640 + }, + { + "epoch": 0.8587684609797434, + "grad_norm": 0.6851540803909302, + "learning_rate": 9.0699192236667e-05, + "loss": 2.5003, + "step": 10641 + }, + { + "epoch": 0.8588491647163263, + "grad_norm": 0.7528585195541382, + "learning_rate": 9.068347388548627e-05, + "loss": 2.5524, + "step": 10642 + }, + { + "epoch": 0.8589298684529094, + "grad_norm": 0.6297397613525391, + "learning_rate": 9.06677557664978e-05, + "loss": 2.5412, + "step": 10643 + }, + { + "epoch": 0.8590105721894924, + "grad_norm": 0.7034026980400085, + "learning_rate": 9.06520378800932e-05, + "loss": 2.4958, + "step": 10644 + }, + { + "epoch": 0.8590912759260754, + "grad_norm": 0.690258800983429, + "learning_rate": 9.063632022666425e-05, + "loss": 2.4894, + "step": 10645 + }, + { + "epoch": 0.8591719796626583, + "grad_norm": 0.6449949145317078, + "learning_rate": 9.06206028066027e-05, + "loss": 2.507, + "step": 10646 + }, + { + "epoch": 0.8592526833992414, + "grad_norm": 0.6328588724136353, + "learning_rate": 9.060488562030023e-05, + "loss": 2.5503, + "step": 10647 + }, + { + "epoch": 0.8593333871358244, + "grad_norm": 0.6570547819137573, + "learning_rate": 9.058916866814858e-05, + "loss": 2.4993, + "step": 10648 + }, + { + "epoch": 0.8594140908724074, + "grad_norm": 0.7689602375030518, + "learning_rate": 9.057345195053945e-05, + "loss": 2.5498, + "step": 10649 + }, + { + "epoch": 0.8594947946089904, + "grad_norm": 0.6727081537246704, + "learning_rate": 9.055773546786454e-05, + "loss": 2.5172, + "step": 10650 + }, + { + "epoch": 0.8595754983455735, + "grad_norm": 0.694722056388855, + "learning_rate": 9.054201922051552e-05, + "loss": 2.5485, + "step": 10651 + }, + { + "epoch": 0.8596562020821564, + "grad_norm": 0.6638815999031067, + "learning_rate": 9.052630320888411e-05, + "loss": 2.5134, + "step": 10652 + }, + { + "epoch": 0.8597369058187394, + "grad_norm": 0.6600833535194397, + "learning_rate": 9.0510587433362e-05, + "loss": 2.5206, + "step": 10653 + }, + { + "epoch": 0.8598176095553224, + "grad_norm": 0.7193894386291504, + "learning_rate": 9.049487189434084e-05, + "loss": 2.5485, + "step": 10654 + }, + { + "epoch": 0.8598983132919055, + "grad_norm": 0.6651753187179565, + "learning_rate": 9.047915659221233e-05, + "loss": 2.5703, + "step": 10655 + }, + { + "epoch": 0.8599790170284884, + "grad_norm": 0.7346364855766296, + "learning_rate": 9.046344152736815e-05, + "loss": 2.5301, + "step": 10656 + }, + { + "epoch": 0.8600597207650714, + "grad_norm": 0.6681811809539795, + "learning_rate": 9.04477267001999e-05, + "loss": 2.5124, + "step": 10657 + }, + { + "epoch": 0.8601404245016544, + "grad_norm": 0.6928461790084839, + "learning_rate": 9.043201211109929e-05, + "loss": 2.5153, + "step": 10658 + }, + { + "epoch": 0.8602211282382374, + "grad_norm": 0.6957700252532959, + "learning_rate": 9.041629776045797e-05, + "loss": 2.4697, + "step": 10659 + }, + { + "epoch": 0.8603018319748205, + "grad_norm": 0.6361939311027527, + "learning_rate": 9.040058364866752e-05, + "loss": 2.5162, + "step": 10660 + }, + { + "epoch": 0.8603825357114034, + "grad_norm": 0.6827390193939209, + "learning_rate": 9.038486977611964e-05, + "loss": 2.4856, + "step": 10661 + }, + { + "epoch": 0.8604632394479864, + "grad_norm": 0.6638801097869873, + "learning_rate": 9.036915614320595e-05, + "loss": 2.5224, + "step": 10662 + }, + { + "epoch": 0.8605439431845694, + "grad_norm": 0.7249652743339539, + "learning_rate": 9.035344275031802e-05, + "loss": 2.5461, + "step": 10663 + }, + { + "epoch": 0.8606246469211525, + "grad_norm": 0.6693316102027893, + "learning_rate": 9.033772959784754e-05, + "loss": 2.5676, + "step": 10664 + }, + { + "epoch": 0.8607053506577355, + "grad_norm": 0.6787340641021729, + "learning_rate": 9.032201668618614e-05, + "loss": 2.5374, + "step": 10665 + }, + { + "epoch": 0.8607860543943184, + "grad_norm": 0.6581670641899109, + "learning_rate": 9.030630401572533e-05, + "loss": 2.5052, + "step": 10666 + }, + { + "epoch": 0.8608667581309014, + "grad_norm": 0.6975873112678528, + "learning_rate": 9.029059158685675e-05, + "loss": 2.4823, + "step": 10667 + }, + { + "epoch": 0.8609474618674845, + "grad_norm": 0.6632521748542786, + "learning_rate": 9.027487939997201e-05, + "loss": 2.5992, + "step": 10668 + }, + { + "epoch": 0.8610281656040675, + "grad_norm": 0.6793977618217468, + "learning_rate": 9.025916745546276e-05, + "loss": 2.5308, + "step": 10669 + }, + { + "epoch": 0.8611088693406505, + "grad_norm": 0.6499481797218323, + "learning_rate": 9.024345575372046e-05, + "loss": 2.4964, + "step": 10670 + }, + { + "epoch": 0.8611895730772334, + "grad_norm": 0.6858868598937988, + "learning_rate": 9.022774429513677e-05, + "loss": 2.5388, + "step": 10671 + }, + { + "epoch": 0.8612702768138165, + "grad_norm": 0.7586160898208618, + "learning_rate": 9.021203308010324e-05, + "loss": 2.5166, + "step": 10672 + }, + { + "epoch": 0.8613509805503995, + "grad_norm": 0.7179701328277588, + "learning_rate": 9.019632210901141e-05, + "loss": 2.5501, + "step": 10673 + }, + { + "epoch": 0.8614316842869825, + "grad_norm": 0.6830369830131531, + "learning_rate": 9.018061138225287e-05, + "loss": 2.4956, + "step": 10674 + }, + { + "epoch": 0.8615123880235654, + "grad_norm": 0.6710512042045593, + "learning_rate": 9.01649009002192e-05, + "loss": 2.5722, + "step": 10675 + }, + { + "epoch": 0.8615930917601485, + "grad_norm": 0.640011727809906, + "learning_rate": 9.014919066330186e-05, + "loss": 2.5197, + "step": 10676 + }, + { + "epoch": 0.8616737954967315, + "grad_norm": 0.6803860664367676, + "learning_rate": 9.013348067189245e-05, + "loss": 2.4794, + "step": 10677 + }, + { + "epoch": 0.8617544992333145, + "grad_norm": 0.6734865307807922, + "learning_rate": 9.011777092638251e-05, + "loss": 2.5831, + "step": 10678 + }, + { + "epoch": 0.8618352029698975, + "grad_norm": 0.6525718569755554, + "learning_rate": 9.010206142716353e-05, + "loss": 2.4925, + "step": 10679 + }, + { + "epoch": 0.8619159067064806, + "grad_norm": 0.6886672377586365, + "learning_rate": 9.008635217462706e-05, + "loss": 2.491, + "step": 10680 + }, + { + "epoch": 0.8619966104430635, + "grad_norm": 0.6397131085395813, + "learning_rate": 9.007064316916461e-05, + "loss": 2.4684, + "step": 10681 + }, + { + "epoch": 0.8620773141796465, + "grad_norm": 0.6308462023735046, + "learning_rate": 9.005493441116768e-05, + "loss": 2.504, + "step": 10682 + }, + { + "epoch": 0.8621580179162295, + "grad_norm": 0.7223808169364929, + "learning_rate": 9.003922590102778e-05, + "loss": 2.5342, + "step": 10683 + }, + { + "epoch": 0.8622387216528126, + "grad_norm": 0.687515914440155, + "learning_rate": 9.002351763913642e-05, + "loss": 2.4822, + "step": 10684 + }, + { + "epoch": 0.8623194253893955, + "grad_norm": 0.6888468265533447, + "learning_rate": 9.00078096258851e-05, + "loss": 2.5497, + "step": 10685 + }, + { + "epoch": 0.8624001291259785, + "grad_norm": 0.7429301738739014, + "learning_rate": 8.999210186166525e-05, + "loss": 2.624, + "step": 10686 + }, + { + "epoch": 0.8624808328625615, + "grad_norm": 0.6901945471763611, + "learning_rate": 8.997639434686839e-05, + "loss": 2.5268, + "step": 10687 + }, + { + "epoch": 0.8625615365991446, + "grad_norm": 0.7396681308746338, + "learning_rate": 8.9960687081886e-05, + "loss": 2.5427, + "step": 10688 + }, + { + "epoch": 0.8626422403357276, + "grad_norm": 0.6825531125068665, + "learning_rate": 8.99449800671095e-05, + "loss": 2.5722, + "step": 10689 + }, + { + "epoch": 0.8627229440723105, + "grad_norm": 0.6719860434532166, + "learning_rate": 8.992927330293039e-05, + "loss": 2.4939, + "step": 10690 + }, + { + "epoch": 0.8628036478088935, + "grad_norm": 0.644567608833313, + "learning_rate": 8.991356678974017e-05, + "loss": 2.5495, + "step": 10691 + }, + { + "epoch": 0.8628843515454766, + "grad_norm": 0.7066643834114075, + "learning_rate": 8.989786052793015e-05, + "loss": 2.5508, + "step": 10692 + }, + { + "epoch": 0.8629650552820596, + "grad_norm": 0.6697196364402771, + "learning_rate": 8.988215451789187e-05, + "loss": 2.5231, + "step": 10693 + }, + { + "epoch": 0.8630457590186426, + "grad_norm": 0.7143658399581909, + "learning_rate": 8.986644876001681e-05, + "loss": 2.5368, + "step": 10694 + }, + { + "epoch": 0.8631264627552255, + "grad_norm": 0.7597684264183044, + "learning_rate": 8.985074325469628e-05, + "loss": 2.5983, + "step": 10695 + }, + { + "epoch": 0.8632071664918086, + "grad_norm": 0.7418014407157898, + "learning_rate": 8.983503800232176e-05, + "loss": 2.5736, + "step": 10696 + }, + { + "epoch": 0.8632878702283916, + "grad_norm": 0.654435932636261, + "learning_rate": 8.981933300328468e-05, + "loss": 2.5389, + "step": 10697 + }, + { + "epoch": 0.8633685739649746, + "grad_norm": 0.658203661441803, + "learning_rate": 8.980362825797643e-05, + "loss": 2.5204, + "step": 10698 + }, + { + "epoch": 0.8634492777015575, + "grad_norm": 0.7132784724235535, + "learning_rate": 8.97879237667884e-05, + "loss": 2.4982, + "step": 10699 + }, + { + "epoch": 0.8635299814381406, + "grad_norm": 0.6901868581771851, + "learning_rate": 8.9772219530112e-05, + "loss": 2.5599, + "step": 10700 + }, + { + "epoch": 0.8636106851747236, + "grad_norm": 0.6241179704666138, + "learning_rate": 8.975651554833869e-05, + "loss": 2.5185, + "step": 10701 + }, + { + "epoch": 0.8636913889113066, + "grad_norm": 0.693692147731781, + "learning_rate": 8.974081182185974e-05, + "loss": 2.506, + "step": 10702 + }, + { + "epoch": 0.8637720926478896, + "grad_norm": 0.6699246168136597, + "learning_rate": 8.972510835106658e-05, + "loss": 2.557, + "step": 10703 + }, + { + "epoch": 0.8638527963844727, + "grad_norm": 0.7339062094688416, + "learning_rate": 8.970940513635059e-05, + "loss": 2.5614, + "step": 10704 + }, + { + "epoch": 0.8639335001210556, + "grad_norm": 0.7558815479278564, + "learning_rate": 8.969370217810311e-05, + "loss": 2.5949, + "step": 10705 + }, + { + "epoch": 0.8640142038576386, + "grad_norm": 0.6992602348327637, + "learning_rate": 8.96779994767155e-05, + "loss": 2.4755, + "step": 10706 + }, + { + "epoch": 0.8640949075942216, + "grad_norm": 0.6836397647857666, + "learning_rate": 8.966229703257915e-05, + "loss": 2.5172, + "step": 10707 + }, + { + "epoch": 0.8641756113308047, + "grad_norm": 0.7054563760757446, + "learning_rate": 8.964659484608537e-05, + "loss": 2.5186, + "step": 10708 + }, + { + "epoch": 0.8642563150673876, + "grad_norm": 0.7096611261367798, + "learning_rate": 8.963089291762551e-05, + "loss": 2.5157, + "step": 10709 + }, + { + "epoch": 0.8643370188039706, + "grad_norm": 0.657465934753418, + "learning_rate": 8.961519124759094e-05, + "loss": 2.5332, + "step": 10710 + }, + { + "epoch": 0.8644177225405536, + "grad_norm": 0.7490121126174927, + "learning_rate": 8.959948983637291e-05, + "loss": 2.512, + "step": 10711 + }, + { + "epoch": 0.8644984262771366, + "grad_norm": 0.7074166536331177, + "learning_rate": 8.958378868436279e-05, + "loss": 2.4745, + "step": 10712 + }, + { + "epoch": 0.8645791300137197, + "grad_norm": 0.7496227025985718, + "learning_rate": 8.956808779195188e-05, + "loss": 2.5533, + "step": 10713 + }, + { + "epoch": 0.8646598337503026, + "grad_norm": 0.6624657511711121, + "learning_rate": 8.95523871595315e-05, + "loss": 2.5346, + "step": 10714 + }, + { + "epoch": 0.8647405374868856, + "grad_norm": 0.6829125881195068, + "learning_rate": 8.953668678749292e-05, + "loss": 2.558, + "step": 10715 + }, + { + "epoch": 0.8648212412234686, + "grad_norm": 0.6954498887062073, + "learning_rate": 8.952098667622745e-05, + "loss": 2.5617, + "step": 10716 + }, + { + "epoch": 0.8649019449600517, + "grad_norm": 0.6722636818885803, + "learning_rate": 8.950528682612645e-05, + "loss": 2.5565, + "step": 10717 + }, + { + "epoch": 0.8649826486966347, + "grad_norm": 0.6793767213821411, + "learning_rate": 8.948958723758107e-05, + "loss": 2.5803, + "step": 10718 + }, + { + "epoch": 0.8650633524332176, + "grad_norm": 0.7159373760223389, + "learning_rate": 8.947388791098266e-05, + "loss": 2.5465, + "step": 10719 + }, + { + "epoch": 0.8651440561698006, + "grad_norm": 0.6823835372924805, + "learning_rate": 8.945818884672253e-05, + "loss": 2.5079, + "step": 10720 + }, + { + "epoch": 0.8652247599063837, + "grad_norm": 0.7521452903747559, + "learning_rate": 8.944249004519185e-05, + "loss": 2.5628, + "step": 10721 + }, + { + "epoch": 0.8653054636429667, + "grad_norm": 0.6774886846542358, + "learning_rate": 8.94267915067819e-05, + "loss": 2.6042, + "step": 10722 + }, + { + "epoch": 0.8653861673795497, + "grad_norm": 0.6915935277938843, + "learning_rate": 8.941109323188398e-05, + "loss": 2.5563, + "step": 10723 + }, + { + "epoch": 0.8654668711161326, + "grad_norm": 0.6609061360359192, + "learning_rate": 8.939539522088927e-05, + "loss": 2.5083, + "step": 10724 + }, + { + "epoch": 0.8655475748527157, + "grad_norm": 0.6457223892211914, + "learning_rate": 8.937969747418903e-05, + "loss": 2.573, + "step": 10725 + }, + { + "epoch": 0.8656282785892987, + "grad_norm": 0.6960360407829285, + "learning_rate": 8.936399999217455e-05, + "loss": 2.516, + "step": 10726 + }, + { + "epoch": 0.8657089823258817, + "grad_norm": 0.7269721627235413, + "learning_rate": 8.934830277523693e-05, + "loss": 2.5932, + "step": 10727 + }, + { + "epoch": 0.8657896860624646, + "grad_norm": 0.7057532668113708, + "learning_rate": 8.933260582376745e-05, + "loss": 2.5022, + "step": 10728 + }, + { + "epoch": 0.8658703897990477, + "grad_norm": 0.6698749661445618, + "learning_rate": 8.931690913815735e-05, + "loss": 2.5357, + "step": 10729 + }, + { + "epoch": 0.8659510935356307, + "grad_norm": 0.6616599559783936, + "learning_rate": 8.930121271879777e-05, + "loss": 2.4776, + "step": 10730 + }, + { + "epoch": 0.8660317972722137, + "grad_norm": 0.7457093000411987, + "learning_rate": 8.928551656607993e-05, + "loss": 2.5799, + "step": 10731 + }, + { + "epoch": 0.8661125010087967, + "grad_norm": 0.7199469804763794, + "learning_rate": 8.926982068039505e-05, + "loss": 2.5278, + "step": 10732 + }, + { + "epoch": 0.8661932047453798, + "grad_norm": 0.7579182386398315, + "learning_rate": 8.925412506213428e-05, + "loss": 2.5227, + "step": 10733 + }, + { + "epoch": 0.8662739084819627, + "grad_norm": 0.687455952167511, + "learning_rate": 8.92384297116888e-05, + "loss": 2.5099, + "step": 10734 + }, + { + "epoch": 0.8663546122185457, + "grad_norm": 0.7616521120071411, + "learning_rate": 8.922273462944978e-05, + "loss": 2.598, + "step": 10735 + }, + { + "epoch": 0.8664353159551287, + "grad_norm": 0.6730697751045227, + "learning_rate": 8.920703981580842e-05, + "loss": 2.5517, + "step": 10736 + }, + { + "epoch": 0.8665160196917118, + "grad_norm": 0.6769895553588867, + "learning_rate": 8.91913452711558e-05, + "loss": 2.5535, + "step": 10737 + }, + { + "epoch": 0.8665967234282947, + "grad_norm": 0.6284549832344055, + "learning_rate": 8.917565099588312e-05, + "loss": 2.4597, + "step": 10738 + }, + { + "epoch": 0.8666774271648777, + "grad_norm": 0.6900805830955505, + "learning_rate": 8.915995699038152e-05, + "loss": 2.5236, + "step": 10739 + }, + { + "epoch": 0.8667581309014607, + "grad_norm": 0.6842896938323975, + "learning_rate": 8.914426325504211e-05, + "loss": 2.5199, + "step": 10740 + }, + { + "epoch": 0.8668388346380438, + "grad_norm": 0.6637243628501892, + "learning_rate": 8.912856979025604e-05, + "loss": 2.5368, + "step": 10741 + }, + { + "epoch": 0.8669195383746268, + "grad_norm": 0.7474464178085327, + "learning_rate": 8.911287659641449e-05, + "loss": 2.4902, + "step": 10742 + }, + { + "epoch": 0.8670002421112097, + "grad_norm": 0.6977849006652832, + "learning_rate": 8.909718367390843e-05, + "loss": 2.5034, + "step": 10743 + }, + { + "epoch": 0.8670809458477927, + "grad_norm": 0.6968807578086853, + "learning_rate": 8.908149102312907e-05, + "loss": 2.5396, + "step": 10744 + }, + { + "epoch": 0.8671616495843758, + "grad_norm": 0.6656209230422974, + "learning_rate": 8.906579864446755e-05, + "loss": 2.5702, + "step": 10745 + }, + { + "epoch": 0.8672423533209588, + "grad_norm": 0.7079079151153564, + "learning_rate": 8.905010653831486e-05, + "loss": 2.5344, + "step": 10746 + }, + { + "epoch": 0.8673230570575418, + "grad_norm": 0.7423387765884399, + "learning_rate": 8.903441470506214e-05, + "loss": 2.5635, + "step": 10747 + }, + { + "epoch": 0.8674037607941247, + "grad_norm": 0.6607224941253662, + "learning_rate": 8.901872314510046e-05, + "loss": 2.54, + "step": 10748 + }, + { + "epoch": 0.8674844645307078, + "grad_norm": 0.6646947860717773, + "learning_rate": 8.900303185882095e-05, + "loss": 2.4661, + "step": 10749 + }, + { + "epoch": 0.8675651682672908, + "grad_norm": 0.6943496465682983, + "learning_rate": 8.89873408466146e-05, + "loss": 2.5213, + "step": 10750 + }, + { + "epoch": 0.8676458720038738, + "grad_norm": 0.7048123478889465, + "learning_rate": 8.89716501088725e-05, + "loss": 2.5529, + "step": 10751 + }, + { + "epoch": 0.8677265757404568, + "grad_norm": 0.654617428779602, + "learning_rate": 8.895595964598574e-05, + "loss": 2.5535, + "step": 10752 + }, + { + "epoch": 0.8678072794770398, + "grad_norm": 0.672063410282135, + "learning_rate": 8.894026945834531e-05, + "loss": 2.5279, + "step": 10753 + }, + { + "epoch": 0.8678879832136228, + "grad_norm": 0.7134148478507996, + "learning_rate": 8.892457954634225e-05, + "loss": 2.5403, + "step": 10754 + }, + { + "epoch": 0.8679686869502058, + "grad_norm": 0.6457598805427551, + "learning_rate": 8.890888991036768e-05, + "loss": 2.515, + "step": 10755 + }, + { + "epoch": 0.8680493906867888, + "grad_norm": 0.6725220084190369, + "learning_rate": 8.889320055081252e-05, + "loss": 2.4829, + "step": 10756 + }, + { + "epoch": 0.8681300944233719, + "grad_norm": 0.6425862312316895, + "learning_rate": 8.887751146806785e-05, + "loss": 2.4965, + "step": 10757 + }, + { + "epoch": 0.8682107981599548, + "grad_norm": 0.6654682755470276, + "learning_rate": 8.886182266252468e-05, + "loss": 2.48, + "step": 10758 + }, + { + "epoch": 0.8682915018965378, + "grad_norm": 0.7102493643760681, + "learning_rate": 8.884613413457398e-05, + "loss": 2.5415, + "step": 10759 + }, + { + "epoch": 0.8683722056331208, + "grad_norm": 0.6996567249298096, + "learning_rate": 8.883044588460677e-05, + "loss": 2.542, + "step": 10760 + }, + { + "epoch": 0.8684529093697038, + "grad_norm": 0.7011905312538147, + "learning_rate": 8.881475791301405e-05, + "loss": 2.5391, + "step": 10761 + }, + { + "epoch": 0.8685336131062869, + "grad_norm": 0.6508356928825378, + "learning_rate": 8.879907022018686e-05, + "loss": 2.4892, + "step": 10762 + }, + { + "epoch": 0.8686143168428698, + "grad_norm": 0.7104009985923767, + "learning_rate": 8.878338280651605e-05, + "loss": 2.5152, + "step": 10763 + }, + { + "epoch": 0.8686950205794528, + "grad_norm": 0.6501138210296631, + "learning_rate": 8.876769567239268e-05, + "loss": 2.5767, + "step": 10764 + }, + { + "epoch": 0.8687757243160358, + "grad_norm": 0.6463173031806946, + "learning_rate": 8.875200881820771e-05, + "loss": 2.4758, + "step": 10765 + }, + { + "epoch": 0.8688564280526189, + "grad_norm": 0.6494991779327393, + "learning_rate": 8.873632224435206e-05, + "loss": 2.5364, + "step": 10766 + }, + { + "epoch": 0.8689371317892018, + "grad_norm": 0.6926043033599854, + "learning_rate": 8.872063595121671e-05, + "loss": 2.5288, + "step": 10767 + }, + { + "epoch": 0.8690178355257848, + "grad_norm": 0.7076035737991333, + "learning_rate": 8.870494993919261e-05, + "loss": 2.5118, + "step": 10768 + }, + { + "epoch": 0.8690985392623678, + "grad_norm": 0.6456892490386963, + "learning_rate": 8.868926420867068e-05, + "loss": 2.4957, + "step": 10769 + }, + { + "epoch": 0.8691792429989509, + "grad_norm": 0.6585200428962708, + "learning_rate": 8.867357876004183e-05, + "loss": 2.5049, + "step": 10770 + }, + { + "epoch": 0.8692599467355339, + "grad_norm": 0.6893252730369568, + "learning_rate": 8.865789359369706e-05, + "loss": 2.4808, + "step": 10771 + }, + { + "epoch": 0.8693406504721168, + "grad_norm": 0.6700639724731445, + "learning_rate": 8.864220871002719e-05, + "loss": 2.5475, + "step": 10772 + }, + { + "epoch": 0.8694213542086998, + "grad_norm": 0.6551913619041443, + "learning_rate": 8.862652410942315e-05, + "loss": 2.5063, + "step": 10773 + }, + { + "epoch": 0.8695020579452829, + "grad_norm": 0.6870427131652832, + "learning_rate": 8.86108397922759e-05, + "loss": 2.5785, + "step": 10774 + }, + { + "epoch": 0.8695827616818659, + "grad_norm": 0.6489934325218201, + "learning_rate": 8.859515575897626e-05, + "loss": 2.5584, + "step": 10775 + }, + { + "epoch": 0.8696634654184489, + "grad_norm": 0.6726663112640381, + "learning_rate": 8.857947200991517e-05, + "loss": 2.5707, + "step": 10776 + }, + { + "epoch": 0.8697441691550318, + "grad_norm": 0.7696183323860168, + "learning_rate": 8.856378854548347e-05, + "loss": 2.501, + "step": 10777 + }, + { + "epoch": 0.8698248728916149, + "grad_norm": 0.7002642154693604, + "learning_rate": 8.854810536607212e-05, + "loss": 2.5792, + "step": 10778 + }, + { + "epoch": 0.8699055766281979, + "grad_norm": 0.6429435610771179, + "learning_rate": 8.853242247207185e-05, + "loss": 2.5463, + "step": 10779 + }, + { + "epoch": 0.8699862803647809, + "grad_norm": 0.7006216645240784, + "learning_rate": 8.851673986387358e-05, + "loss": 2.5698, + "step": 10780 + }, + { + "epoch": 0.8700669841013638, + "grad_norm": 0.7053292989730835, + "learning_rate": 8.850105754186824e-05, + "loss": 2.5468, + "step": 10781 + }, + { + "epoch": 0.8701476878379469, + "grad_norm": 0.6592122912406921, + "learning_rate": 8.848537550644654e-05, + "loss": 2.5271, + "step": 10782 + }, + { + "epoch": 0.8702283915745299, + "grad_norm": 0.679132342338562, + "learning_rate": 8.846969375799941e-05, + "loss": 2.5281, + "step": 10783 + }, + { + "epoch": 0.8703090953111129, + "grad_norm": 0.6868568062782288, + "learning_rate": 8.845401229691765e-05, + "loss": 2.5415, + "step": 10784 + }, + { + "epoch": 0.8703897990476959, + "grad_norm": 0.7060674428939819, + "learning_rate": 8.843833112359208e-05, + "loss": 2.5649, + "step": 10785 + }, + { + "epoch": 0.870470502784279, + "grad_norm": 0.6663981676101685, + "learning_rate": 8.842265023841352e-05, + "loss": 2.5055, + "step": 10786 + }, + { + "epoch": 0.8705512065208619, + "grad_norm": 0.7095218896865845, + "learning_rate": 8.840696964177282e-05, + "loss": 2.5442, + "step": 10787 + }, + { + "epoch": 0.8706319102574449, + "grad_norm": 0.6884104013442993, + "learning_rate": 8.839128933406069e-05, + "loss": 2.5285, + "step": 10788 + }, + { + "epoch": 0.8707126139940279, + "grad_norm": 0.6427462697029114, + "learning_rate": 8.837560931566798e-05, + "loss": 2.5197, + "step": 10789 + }, + { + "epoch": 0.870793317730611, + "grad_norm": 0.6870493292808533, + "learning_rate": 8.835992958698548e-05, + "loss": 2.4937, + "step": 10790 + }, + { + "epoch": 0.870874021467194, + "grad_norm": 0.7006319761276245, + "learning_rate": 8.834425014840398e-05, + "loss": 2.5148, + "step": 10791 + }, + { + "epoch": 0.8709547252037769, + "grad_norm": 0.690601646900177, + "learning_rate": 8.83285710003142e-05, + "loss": 2.5454, + "step": 10792 + }, + { + "epoch": 0.8710354289403599, + "grad_norm": 0.7205955982208252, + "learning_rate": 8.831289214310695e-05, + "loss": 2.5221, + "step": 10793 + }, + { + "epoch": 0.871116132676943, + "grad_norm": 0.7134295105934143, + "learning_rate": 8.8297213577173e-05, + "loss": 2.5626, + "step": 10794 + }, + { + "epoch": 0.871196836413526, + "grad_norm": 0.6560496091842651, + "learning_rate": 8.828153530290307e-05, + "loss": 2.5408, + "step": 10795 + }, + { + "epoch": 0.8712775401501089, + "grad_norm": 0.7055882215499878, + "learning_rate": 8.82658573206879e-05, + "loss": 2.5173, + "step": 10796 + }, + { + "epoch": 0.8713582438866919, + "grad_norm": 0.6751883029937744, + "learning_rate": 8.825017963091827e-05, + "loss": 2.5378, + "step": 10797 + }, + { + "epoch": 0.871438947623275, + "grad_norm": 0.6794824600219727, + "learning_rate": 8.823450223398485e-05, + "loss": 2.592, + "step": 10798 + }, + { + "epoch": 0.871519651359858, + "grad_norm": 0.675729513168335, + "learning_rate": 8.821882513027838e-05, + "loss": 2.5253, + "step": 10799 + }, + { + "epoch": 0.871600355096441, + "grad_norm": 0.7185894250869751, + "learning_rate": 8.820314832018962e-05, + "loss": 2.5073, + "step": 10800 + }, + { + "epoch": 0.8716810588330239, + "grad_norm": 0.6605187654495239, + "learning_rate": 8.818747180410921e-05, + "loss": 2.5141, + "step": 10801 + }, + { + "epoch": 0.871761762569607, + "grad_norm": 0.6955205798149109, + "learning_rate": 8.817179558242788e-05, + "loss": 2.5313, + "step": 10802 + }, + { + "epoch": 0.87184246630619, + "grad_norm": 0.6307928562164307, + "learning_rate": 8.815611965553638e-05, + "loss": 2.4975, + "step": 10803 + }, + { + "epoch": 0.871923170042773, + "grad_norm": 0.7283728122711182, + "learning_rate": 8.814044402382527e-05, + "loss": 2.4623, + "step": 10804 + }, + { + "epoch": 0.872003873779356, + "grad_norm": 0.7019702792167664, + "learning_rate": 8.81247686876853e-05, + "loss": 2.4755, + "step": 10805 + }, + { + "epoch": 0.872084577515939, + "grad_norm": 0.6769137382507324, + "learning_rate": 8.81090936475072e-05, + "loss": 2.59, + "step": 10806 + }, + { + "epoch": 0.872165281252522, + "grad_norm": 0.6185588836669922, + "learning_rate": 8.80934189036815e-05, + "loss": 2.5308, + "step": 10807 + }, + { + "epoch": 0.872245984989105, + "grad_norm": 0.7127000689506531, + "learning_rate": 8.807774445659894e-05, + "loss": 2.5301, + "step": 10808 + }, + { + "epoch": 0.872326688725688, + "grad_norm": 0.7039114236831665, + "learning_rate": 8.806207030665016e-05, + "loss": 2.5176, + "step": 10809 + }, + { + "epoch": 0.8724073924622711, + "grad_norm": 0.6763370633125305, + "learning_rate": 8.804639645422582e-05, + "loss": 2.5324, + "step": 10810 + }, + { + "epoch": 0.872488096198854, + "grad_norm": 0.7546409368515015, + "learning_rate": 8.803072289971648e-05, + "loss": 2.5446, + "step": 10811 + }, + { + "epoch": 0.872568799935437, + "grad_norm": 0.6916004419326782, + "learning_rate": 8.801504964351284e-05, + "loss": 2.5056, + "step": 10812 + }, + { + "epoch": 0.87264950367202, + "grad_norm": 0.7108416557312012, + "learning_rate": 8.799937668600552e-05, + "loss": 2.5966, + "step": 10813 + }, + { + "epoch": 0.872730207408603, + "grad_norm": 0.7146576046943665, + "learning_rate": 8.798370402758506e-05, + "loss": 2.5152, + "step": 10814 + }, + { + "epoch": 0.872810911145186, + "grad_norm": 0.6708142757415771, + "learning_rate": 8.796803166864211e-05, + "loss": 2.5248, + "step": 10815 + }, + { + "epoch": 0.872891614881769, + "grad_norm": 0.6687600612640381, + "learning_rate": 8.795235960956729e-05, + "loss": 2.4451, + "step": 10816 + }, + { + "epoch": 0.872972318618352, + "grad_norm": 0.724012553691864, + "learning_rate": 8.793668785075114e-05, + "loss": 2.4816, + "step": 10817 + }, + { + "epoch": 0.873053022354935, + "grad_norm": 0.6938769221305847, + "learning_rate": 8.792101639258426e-05, + "loss": 2.5435, + "step": 10818 + }, + { + "epoch": 0.8731337260915181, + "grad_norm": 0.7066235542297363, + "learning_rate": 8.790534523545724e-05, + "loss": 2.5167, + "step": 10819 + }, + { + "epoch": 0.873214429828101, + "grad_norm": 0.7129037380218506, + "learning_rate": 8.788967437976062e-05, + "loss": 2.5079, + "step": 10820 + }, + { + "epoch": 0.873295133564684, + "grad_norm": 0.6949728727340698, + "learning_rate": 8.787400382588497e-05, + "loss": 2.5564, + "step": 10821 + }, + { + "epoch": 0.873375837301267, + "grad_norm": 0.7924233675003052, + "learning_rate": 8.785833357422088e-05, + "loss": 2.5748, + "step": 10822 + }, + { + "epoch": 0.8734565410378501, + "grad_norm": 0.7486331462860107, + "learning_rate": 8.784266362515882e-05, + "loss": 2.565, + "step": 10823 + }, + { + "epoch": 0.8735372447744331, + "grad_norm": 0.7036460638046265, + "learning_rate": 8.782699397908935e-05, + "loss": 2.5101, + "step": 10824 + }, + { + "epoch": 0.873617948511016, + "grad_norm": 0.6691471338272095, + "learning_rate": 8.781132463640302e-05, + "loss": 2.5262, + "step": 10825 + }, + { + "epoch": 0.873698652247599, + "grad_norm": 0.6836682558059692, + "learning_rate": 8.779565559749037e-05, + "loss": 2.5651, + "step": 10826 + }, + { + "epoch": 0.8737793559841821, + "grad_norm": 0.6634507775306702, + "learning_rate": 8.777998686274185e-05, + "loss": 2.5383, + "step": 10827 + }, + { + "epoch": 0.8738600597207651, + "grad_norm": 0.6903105974197388, + "learning_rate": 8.7764318432548e-05, + "loss": 2.5659, + "step": 10828 + }, + { + "epoch": 0.8739407634573481, + "grad_norm": 0.737859308719635, + "learning_rate": 8.774865030729937e-05, + "loss": 2.5859, + "step": 10829 + }, + { + "epoch": 0.874021467193931, + "grad_norm": 0.696843683719635, + "learning_rate": 8.773298248738633e-05, + "loss": 2.5244, + "step": 10830 + }, + { + "epoch": 0.8741021709305141, + "grad_norm": 0.7342235445976257, + "learning_rate": 8.771731497319946e-05, + "loss": 2.5073, + "step": 10831 + }, + { + "epoch": 0.8741828746670971, + "grad_norm": 0.6676939725875854, + "learning_rate": 8.770164776512926e-05, + "loss": 2.5408, + "step": 10832 + }, + { + "epoch": 0.8742635784036801, + "grad_norm": 0.6957886219024658, + "learning_rate": 8.768598086356608e-05, + "loss": 2.5566, + "step": 10833 + }, + { + "epoch": 0.874344282140263, + "grad_norm": 0.6938990950584412, + "learning_rate": 8.767031426890046e-05, + "loss": 2.517, + "step": 10834 + }, + { + "epoch": 0.8744249858768461, + "grad_norm": 0.8387169241905212, + "learning_rate": 8.765464798152286e-05, + "loss": 2.5507, + "step": 10835 + }, + { + "epoch": 0.8745056896134291, + "grad_norm": 0.6396276354789734, + "learning_rate": 8.763898200182368e-05, + "loss": 2.5063, + "step": 10836 + }, + { + "epoch": 0.8745863933500121, + "grad_norm": 0.7122719883918762, + "learning_rate": 8.762331633019339e-05, + "loss": 2.5816, + "step": 10837 + }, + { + "epoch": 0.8746670970865951, + "grad_norm": 0.6807141304016113, + "learning_rate": 8.760765096702244e-05, + "loss": 2.6004, + "step": 10838 + }, + { + "epoch": 0.8747478008231782, + "grad_norm": 0.6764848232269287, + "learning_rate": 8.759198591270117e-05, + "loss": 2.5303, + "step": 10839 + }, + { + "epoch": 0.8748285045597611, + "grad_norm": 0.718515932559967, + "learning_rate": 8.757632116762006e-05, + "loss": 2.5088, + "step": 10840 + }, + { + "epoch": 0.8749092082963441, + "grad_norm": 0.7084362506866455, + "learning_rate": 8.75606567321695e-05, + "loss": 2.5496, + "step": 10841 + }, + { + "epoch": 0.8749899120329271, + "grad_norm": 0.7191734910011292, + "learning_rate": 8.754499260673991e-05, + "loss": 2.5525, + "step": 10842 + }, + { + "epoch": 0.8750706157695102, + "grad_norm": 0.7167977094650269, + "learning_rate": 8.752932879172164e-05, + "loss": 2.5479, + "step": 10843 + }, + { + "epoch": 0.8751513195060932, + "grad_norm": 0.6994979381561279, + "learning_rate": 8.751366528750511e-05, + "loss": 2.4942, + "step": 10844 + }, + { + "epoch": 0.8752320232426761, + "grad_norm": 0.7192725539207458, + "learning_rate": 8.749800209448068e-05, + "loss": 2.5233, + "step": 10845 + }, + { + "epoch": 0.8753127269792591, + "grad_norm": 0.7728807330131531, + "learning_rate": 8.748233921303871e-05, + "loss": 2.5698, + "step": 10846 + }, + { + "epoch": 0.8753934307158422, + "grad_norm": 0.7305434942245483, + "learning_rate": 8.746667664356956e-05, + "loss": 2.5096, + "step": 10847 + }, + { + "epoch": 0.8754741344524252, + "grad_norm": 0.7117629051208496, + "learning_rate": 8.745101438646365e-05, + "loss": 2.5272, + "step": 10848 + }, + { + "epoch": 0.8755548381890081, + "grad_norm": 0.7180361151695251, + "learning_rate": 8.743535244211121e-05, + "loss": 2.4718, + "step": 10849 + }, + { + "epoch": 0.8756355419255911, + "grad_norm": 0.6419457793235779, + "learning_rate": 8.741969081090263e-05, + "loss": 2.5407, + "step": 10850 + }, + { + "epoch": 0.8757162456621742, + "grad_norm": 0.7928328514099121, + "learning_rate": 8.740402949322827e-05, + "loss": 2.488, + "step": 10851 + }, + { + "epoch": 0.8757969493987572, + "grad_norm": 0.7449139952659607, + "learning_rate": 8.738836848947839e-05, + "loss": 2.5943, + "step": 10852 + }, + { + "epoch": 0.8758776531353402, + "grad_norm": 0.7919576168060303, + "learning_rate": 8.737270780004334e-05, + "loss": 2.5556, + "step": 10853 + }, + { + "epoch": 0.8759583568719231, + "grad_norm": 0.6867526769638062, + "learning_rate": 8.735704742531346e-05, + "loss": 2.5395, + "step": 10854 + }, + { + "epoch": 0.8760390606085062, + "grad_norm": 0.7195394039154053, + "learning_rate": 8.734138736567896e-05, + "loss": 2.4404, + "step": 10855 + }, + { + "epoch": 0.8761197643450892, + "grad_norm": 0.68385910987854, + "learning_rate": 8.732572762153016e-05, + "loss": 2.502, + "step": 10856 + }, + { + "epoch": 0.8762004680816722, + "grad_norm": 0.6957393884658813, + "learning_rate": 8.731006819325739e-05, + "loss": 2.5788, + "step": 10857 + }, + { + "epoch": 0.8762811718182552, + "grad_norm": 0.6973037123680115, + "learning_rate": 8.729440908125092e-05, + "loss": 2.4927, + "step": 10858 + }, + { + "epoch": 0.8763618755548382, + "grad_norm": 0.6535985469818115, + "learning_rate": 8.727875028590095e-05, + "loss": 2.596, + "step": 10859 + }, + { + "epoch": 0.8764425792914212, + "grad_norm": 0.7447848320007324, + "learning_rate": 8.726309180759777e-05, + "loss": 2.5825, + "step": 10860 + }, + { + "epoch": 0.8765232830280042, + "grad_norm": 0.7155942320823669, + "learning_rate": 8.724743364673168e-05, + "loss": 2.5105, + "step": 10861 + }, + { + "epoch": 0.8766039867645872, + "grad_norm": 0.6664694547653198, + "learning_rate": 8.723177580369285e-05, + "loss": 2.5244, + "step": 10862 + }, + { + "epoch": 0.8766846905011701, + "grad_norm": 0.7437852025032043, + "learning_rate": 8.721611827887153e-05, + "loss": 2.534, + "step": 10863 + }, + { + "epoch": 0.8767653942377532, + "grad_norm": 0.6752577424049377, + "learning_rate": 8.7200461072658e-05, + "loss": 2.5025, + "step": 10864 + }, + { + "epoch": 0.8768460979743362, + "grad_norm": 0.7420764565467834, + "learning_rate": 8.718480418544241e-05, + "loss": 2.5261, + "step": 10865 + }, + { + "epoch": 0.8769268017109192, + "grad_norm": 0.669384777545929, + "learning_rate": 8.7169147617615e-05, + "loss": 2.5258, + "step": 10866 + }, + { + "epoch": 0.8770075054475022, + "grad_norm": 0.6649587750434875, + "learning_rate": 8.715349136956599e-05, + "loss": 2.5308, + "step": 10867 + }, + { + "epoch": 0.8770882091840853, + "grad_norm": 0.728922426700592, + "learning_rate": 8.713783544168552e-05, + "loss": 2.5251, + "step": 10868 + }, + { + "epoch": 0.8771689129206682, + "grad_norm": 0.6957671642303467, + "learning_rate": 8.712217983436384e-05, + "loss": 2.5818, + "step": 10869 + }, + { + "epoch": 0.8772496166572512, + "grad_norm": 0.6796830892562866, + "learning_rate": 8.710652454799108e-05, + "loss": 2.5122, + "step": 10870 + }, + { + "epoch": 0.8773303203938342, + "grad_norm": 0.7230980396270752, + "learning_rate": 8.709086958295746e-05, + "loss": 2.5836, + "step": 10871 + }, + { + "epoch": 0.8774110241304173, + "grad_norm": 0.6992264986038208, + "learning_rate": 8.707521493965309e-05, + "loss": 2.5907, + "step": 10872 + }, + { + "epoch": 0.8774917278670002, + "grad_norm": 0.7066535353660583, + "learning_rate": 8.705956061846816e-05, + "loss": 2.5508, + "step": 10873 + }, + { + "epoch": 0.8775724316035832, + "grad_norm": 0.6559327244758606, + "learning_rate": 8.704390661979283e-05, + "loss": 2.611, + "step": 10874 + }, + { + "epoch": 0.8776531353401662, + "grad_norm": 0.6673287749290466, + "learning_rate": 8.70282529440172e-05, + "loss": 2.5778, + "step": 10875 + }, + { + "epoch": 0.8777338390767493, + "grad_norm": 0.6715971231460571, + "learning_rate": 8.701259959153139e-05, + "loss": 2.5342, + "step": 10876 + }, + { + "epoch": 0.8778145428133323, + "grad_norm": 0.7456488609313965, + "learning_rate": 8.699694656272557e-05, + "loss": 2.5365, + "step": 10877 + }, + { + "epoch": 0.8778952465499152, + "grad_norm": 0.6658159494400024, + "learning_rate": 8.698129385798983e-05, + "loss": 2.4387, + "step": 10878 + }, + { + "epoch": 0.8779759502864982, + "grad_norm": 0.6653816103935242, + "learning_rate": 8.696564147771427e-05, + "loss": 2.5791, + "step": 10879 + }, + { + "epoch": 0.8780566540230813, + "grad_norm": 0.6763200163841248, + "learning_rate": 8.694998942228902e-05, + "loss": 2.5356, + "step": 10880 + }, + { + "epoch": 0.8781373577596643, + "grad_norm": 0.6534504890441895, + "learning_rate": 8.69343376921041e-05, + "loss": 2.5358, + "step": 10881 + }, + { + "epoch": 0.8782180614962473, + "grad_norm": 0.6341667771339417, + "learning_rate": 8.691868628754967e-05, + "loss": 2.4927, + "step": 10882 + }, + { + "epoch": 0.8782987652328302, + "grad_norm": 0.6215559244155884, + "learning_rate": 8.690303520901579e-05, + "loss": 2.4312, + "step": 10883 + }, + { + "epoch": 0.8783794689694133, + "grad_norm": 0.6705841422080994, + "learning_rate": 8.688738445689248e-05, + "loss": 2.4778, + "step": 10884 + }, + { + "epoch": 0.8784601727059963, + "grad_norm": 0.680275559425354, + "learning_rate": 8.687173403156982e-05, + "loss": 2.5577, + "step": 10885 + }, + { + "epoch": 0.8785408764425793, + "grad_norm": 0.6918728351593018, + "learning_rate": 8.685608393343789e-05, + "loss": 2.5212, + "step": 10886 + }, + { + "epoch": 0.8786215801791623, + "grad_norm": 0.623636782169342, + "learning_rate": 8.68404341628867e-05, + "loss": 2.5131, + "step": 10887 + }, + { + "epoch": 0.8787022839157453, + "grad_norm": 0.7200562357902527, + "learning_rate": 8.682478472030628e-05, + "loss": 2.5517, + "step": 10888 + }, + { + "epoch": 0.8787829876523283, + "grad_norm": 0.6902644634246826, + "learning_rate": 8.680913560608666e-05, + "loss": 2.511, + "step": 10889 + }, + { + "epoch": 0.8788636913889113, + "grad_norm": 0.6855802536010742, + "learning_rate": 8.679348682061792e-05, + "loss": 2.5169, + "step": 10890 + }, + { + "epoch": 0.8789443951254943, + "grad_norm": 0.7229284048080444, + "learning_rate": 8.677783836428995e-05, + "loss": 2.5634, + "step": 10891 + }, + { + "epoch": 0.8790250988620774, + "grad_norm": 0.6350376605987549, + "learning_rate": 8.676219023749281e-05, + "loss": 2.443, + "step": 10892 + }, + { + "epoch": 0.8791058025986603, + "grad_norm": 0.6884307265281677, + "learning_rate": 8.674654244061653e-05, + "loss": 2.524, + "step": 10893 + }, + { + "epoch": 0.8791865063352433, + "grad_norm": 0.6571067571640015, + "learning_rate": 8.673089497405102e-05, + "loss": 2.5322, + "step": 10894 + }, + { + "epoch": 0.8792672100718263, + "grad_norm": 0.7078021764755249, + "learning_rate": 8.67152478381863e-05, + "loss": 2.5317, + "step": 10895 + }, + { + "epoch": 0.8793479138084094, + "grad_norm": 0.6809059381484985, + "learning_rate": 8.669960103341236e-05, + "loss": 2.5767, + "step": 10896 + }, + { + "epoch": 0.8794286175449924, + "grad_norm": 0.7399441003799438, + "learning_rate": 8.66839545601191e-05, + "loss": 2.5194, + "step": 10897 + }, + { + "epoch": 0.8795093212815753, + "grad_norm": 0.6762270927429199, + "learning_rate": 8.66683084186965e-05, + "loss": 2.5306, + "step": 10898 + }, + { + "epoch": 0.8795900250181583, + "grad_norm": 0.7394620776176453, + "learning_rate": 8.665266260953455e-05, + "loss": 2.4516, + "step": 10899 + }, + { + "epoch": 0.8796707287547414, + "grad_norm": 0.6775416135787964, + "learning_rate": 8.663701713302309e-05, + "loss": 2.5574, + "step": 10900 + }, + { + "epoch": 0.8797514324913244, + "grad_norm": 0.7630520462989807, + "learning_rate": 8.66213719895521e-05, + "loss": 2.5516, + "step": 10901 + }, + { + "epoch": 0.8798321362279073, + "grad_norm": 0.6555768847465515, + "learning_rate": 8.660572717951149e-05, + "loss": 2.5267, + "step": 10902 + }, + { + "epoch": 0.8799128399644903, + "grad_norm": 0.6899500489234924, + "learning_rate": 8.659008270329119e-05, + "loss": 2.4938, + "step": 10903 + }, + { + "epoch": 0.8799935437010734, + "grad_norm": 0.6939221024513245, + "learning_rate": 8.657443856128107e-05, + "loss": 2.5358, + "step": 10904 + }, + { + "epoch": 0.8800742474376564, + "grad_norm": 0.6454630494117737, + "learning_rate": 8.655879475387102e-05, + "loss": 2.5528, + "step": 10905 + }, + { + "epoch": 0.8801549511742394, + "grad_norm": 0.7142425775527954, + "learning_rate": 8.654315128145099e-05, + "loss": 2.5668, + "step": 10906 + }, + { + "epoch": 0.8802356549108223, + "grad_norm": 0.7512764930725098, + "learning_rate": 8.652750814441075e-05, + "loss": 2.5224, + "step": 10907 + }, + { + "epoch": 0.8803163586474054, + "grad_norm": 0.6599575877189636, + "learning_rate": 8.651186534314026e-05, + "loss": 2.5363, + "step": 10908 + }, + { + "epoch": 0.8803970623839884, + "grad_norm": 0.6787410974502563, + "learning_rate": 8.649622287802935e-05, + "loss": 2.4587, + "step": 10909 + }, + { + "epoch": 0.8804777661205714, + "grad_norm": 0.7124783396720886, + "learning_rate": 8.648058074946786e-05, + "loss": 2.5842, + "step": 10910 + }, + { + "epoch": 0.8805584698571544, + "grad_norm": 0.6698839664459229, + "learning_rate": 8.646493895784562e-05, + "loss": 2.513, + "step": 10911 + }, + { + "epoch": 0.8806391735937374, + "grad_norm": 0.6660044193267822, + "learning_rate": 8.644929750355249e-05, + "loss": 2.4996, + "step": 10912 + }, + { + "epoch": 0.8807198773303204, + "grad_norm": 0.7060455083847046, + "learning_rate": 8.643365638697828e-05, + "loss": 2.5497, + "step": 10913 + }, + { + "epoch": 0.8808005810669034, + "grad_norm": 0.6835277676582336, + "learning_rate": 8.641801560851281e-05, + "loss": 2.5198, + "step": 10914 + }, + { + "epoch": 0.8808812848034864, + "grad_norm": 0.6994042992591858, + "learning_rate": 8.640237516854595e-05, + "loss": 2.5692, + "step": 10915 + }, + { + "epoch": 0.8809619885400694, + "grad_norm": 0.6583377718925476, + "learning_rate": 8.63867350674674e-05, + "loss": 2.5025, + "step": 10916 + }, + { + "epoch": 0.8810426922766524, + "grad_norm": 0.6882332563400269, + "learning_rate": 8.637109530566698e-05, + "loss": 2.5343, + "step": 10917 + }, + { + "epoch": 0.8811233960132354, + "grad_norm": 0.6329876184463501, + "learning_rate": 8.635545588353449e-05, + "loss": 2.5335, + "step": 10918 + }, + { + "epoch": 0.8812040997498184, + "grad_norm": 0.713196337223053, + "learning_rate": 8.633981680145975e-05, + "loss": 2.4814, + "step": 10919 + }, + { + "epoch": 0.8812848034864014, + "grad_norm": 0.7388820648193359, + "learning_rate": 8.632417805983246e-05, + "loss": 2.4927, + "step": 10920 + }, + { + "epoch": 0.8813655072229845, + "grad_norm": 0.7316160798072815, + "learning_rate": 8.63085396590424e-05, + "loss": 2.508, + "step": 10921 + }, + { + "epoch": 0.8814462109595674, + "grad_norm": 0.6690139174461365, + "learning_rate": 8.629290159947934e-05, + "loss": 2.5719, + "step": 10922 + }, + { + "epoch": 0.8815269146961504, + "grad_norm": 0.6369553208351135, + "learning_rate": 8.627726388153297e-05, + "loss": 2.5277, + "step": 10923 + }, + { + "epoch": 0.8816076184327334, + "grad_norm": 0.6870365738868713, + "learning_rate": 8.626162650559306e-05, + "loss": 2.4731, + "step": 10924 + }, + { + "epoch": 0.8816883221693165, + "grad_norm": 0.6890872716903687, + "learning_rate": 8.624598947204938e-05, + "loss": 2.5417, + "step": 10925 + }, + { + "epoch": 0.8817690259058995, + "grad_norm": 0.6548230051994324, + "learning_rate": 8.623035278129156e-05, + "loss": 2.4888, + "step": 10926 + }, + { + "epoch": 0.8818497296424824, + "grad_norm": 0.6835262775421143, + "learning_rate": 8.621471643370933e-05, + "loss": 2.531, + "step": 10927 + }, + { + "epoch": 0.8819304333790654, + "grad_norm": 0.6910626292228699, + "learning_rate": 8.619908042969243e-05, + "loss": 2.4864, + "step": 10928 + }, + { + "epoch": 0.8820111371156485, + "grad_norm": 0.6727725267410278, + "learning_rate": 8.618344476963049e-05, + "loss": 2.5063, + "step": 10929 + }, + { + "epoch": 0.8820918408522315, + "grad_norm": 0.7285245656967163, + "learning_rate": 8.616780945391323e-05, + "loss": 2.5036, + "step": 10930 + }, + { + "epoch": 0.8821725445888144, + "grad_norm": 0.6561840176582336, + "learning_rate": 8.615217448293035e-05, + "loss": 2.5152, + "step": 10931 + }, + { + "epoch": 0.8822532483253974, + "grad_norm": 0.6524627208709717, + "learning_rate": 8.613653985707144e-05, + "loss": 2.4827, + "step": 10932 + }, + { + "epoch": 0.8823339520619805, + "grad_norm": 0.6815671920776367, + "learning_rate": 8.612090557672619e-05, + "loss": 2.5385, + "step": 10933 + }, + { + "epoch": 0.8824146557985635, + "grad_norm": 0.7479865550994873, + "learning_rate": 8.610527164228429e-05, + "loss": 2.5311, + "step": 10934 + }, + { + "epoch": 0.8824953595351465, + "grad_norm": 0.699504554271698, + "learning_rate": 8.608963805413535e-05, + "loss": 2.5332, + "step": 10935 + }, + { + "epoch": 0.8825760632717294, + "grad_norm": 0.7081198692321777, + "learning_rate": 8.607400481266896e-05, + "loss": 2.5636, + "step": 10936 + }, + { + "epoch": 0.8826567670083125, + "grad_norm": 0.7020730972290039, + "learning_rate": 8.605837191827478e-05, + "loss": 2.498, + "step": 10937 + }, + { + "epoch": 0.8827374707448955, + "grad_norm": 0.8004096150398254, + "learning_rate": 8.604273937134242e-05, + "loss": 2.5352, + "step": 10938 + }, + { + "epoch": 0.8828181744814785, + "grad_norm": 0.6399645209312439, + "learning_rate": 8.602710717226147e-05, + "loss": 2.5673, + "step": 10939 + }, + { + "epoch": 0.8828988782180615, + "grad_norm": 0.683195173740387, + "learning_rate": 8.601147532142153e-05, + "loss": 2.4812, + "step": 10940 + }, + { + "epoch": 0.8829795819546445, + "grad_norm": 0.7783642411231995, + "learning_rate": 8.599584381921224e-05, + "loss": 2.4812, + "step": 10941 + }, + { + "epoch": 0.8830602856912275, + "grad_norm": 0.7107423543930054, + "learning_rate": 8.598021266602308e-05, + "loss": 2.5527, + "step": 10942 + }, + { + "epoch": 0.8831409894278105, + "grad_norm": 0.6419345140457153, + "learning_rate": 8.596458186224365e-05, + "loss": 2.5642, + "step": 10943 + }, + { + "epoch": 0.8832216931643935, + "grad_norm": 0.6897309422492981, + "learning_rate": 8.59489514082636e-05, + "loss": 2.5743, + "step": 10944 + }, + { + "epoch": 0.8833023969009766, + "grad_norm": 0.6901495456695557, + "learning_rate": 8.593332130447236e-05, + "loss": 2.5139, + "step": 10945 + }, + { + "epoch": 0.8833831006375595, + "grad_norm": 0.6865388751029968, + "learning_rate": 8.591769155125953e-05, + "loss": 2.5281, + "step": 10946 + }, + { + "epoch": 0.8834638043741425, + "grad_norm": 0.7070403099060059, + "learning_rate": 8.590206214901465e-05, + "loss": 2.4648, + "step": 10947 + }, + { + "epoch": 0.8835445081107255, + "grad_norm": 0.6846395134925842, + "learning_rate": 8.588643309812721e-05, + "loss": 2.4792, + "step": 10948 + }, + { + "epoch": 0.8836252118473086, + "grad_norm": 0.6875495314598083, + "learning_rate": 8.587080439898675e-05, + "loss": 2.5126, + "step": 10949 + }, + { + "epoch": 0.8837059155838916, + "grad_norm": 0.670098066329956, + "learning_rate": 8.58551760519828e-05, + "loss": 2.4922, + "step": 10950 + }, + { + "epoch": 0.8837866193204745, + "grad_norm": 0.6675527691841125, + "learning_rate": 8.583954805750487e-05, + "loss": 2.499, + "step": 10951 + }, + { + "epoch": 0.8838673230570575, + "grad_norm": 0.6694127321243286, + "learning_rate": 8.582392041594236e-05, + "loss": 2.5286, + "step": 10952 + }, + { + "epoch": 0.8839480267936406, + "grad_norm": 0.7291092872619629, + "learning_rate": 8.580829312768482e-05, + "loss": 2.5705, + "step": 10953 + }, + { + "epoch": 0.8840287305302236, + "grad_norm": 0.709904670715332, + "learning_rate": 8.579266619312174e-05, + "loss": 2.5238, + "step": 10954 + }, + { + "epoch": 0.8841094342668065, + "grad_norm": 0.7037622332572937, + "learning_rate": 8.577703961264254e-05, + "loss": 2.5491, + "step": 10955 + }, + { + "epoch": 0.8841901380033895, + "grad_norm": 0.7553049325942993, + "learning_rate": 8.576141338663668e-05, + "loss": 2.5643, + "step": 10956 + }, + { + "epoch": 0.8842708417399726, + "grad_norm": 0.7177377343177795, + "learning_rate": 8.574578751549364e-05, + "loss": 2.49, + "step": 10957 + }, + { + "epoch": 0.8843515454765556, + "grad_norm": 0.682668149471283, + "learning_rate": 8.573016199960283e-05, + "loss": 2.5221, + "step": 10958 + }, + { + "epoch": 0.8844322492131386, + "grad_norm": 0.7508956789970398, + "learning_rate": 8.571453683935366e-05, + "loss": 2.5766, + "step": 10959 + }, + { + "epoch": 0.8845129529497215, + "grad_norm": 0.6495946645736694, + "learning_rate": 8.569891203513562e-05, + "loss": 2.534, + "step": 10960 + }, + { + "epoch": 0.8845936566863046, + "grad_norm": 0.7362824082374573, + "learning_rate": 8.568328758733806e-05, + "loss": 2.4614, + "step": 10961 + }, + { + "epoch": 0.8846743604228876, + "grad_norm": 0.6571496725082397, + "learning_rate": 8.566766349635037e-05, + "loss": 2.4393, + "step": 10962 + }, + { + "epoch": 0.8847550641594706, + "grad_norm": 0.7088329195976257, + "learning_rate": 8.5652039762562e-05, + "loss": 2.5476, + "step": 10963 + }, + { + "epoch": 0.8848357678960536, + "grad_norm": 0.6414440274238586, + "learning_rate": 8.56364163863623e-05, + "loss": 2.4668, + "step": 10964 + }, + { + "epoch": 0.8849164716326365, + "grad_norm": 0.7333478331565857, + "learning_rate": 8.562079336814063e-05, + "loss": 2.5151, + "step": 10965 + }, + { + "epoch": 0.8849971753692196, + "grad_norm": 0.638038694858551, + "learning_rate": 8.560517070828638e-05, + "loss": 2.5063, + "step": 10966 + }, + { + "epoch": 0.8850778791058026, + "grad_norm": 0.638921320438385, + "learning_rate": 8.558954840718896e-05, + "loss": 2.4769, + "step": 10967 + }, + { + "epoch": 0.8851585828423856, + "grad_norm": 0.6923465728759766, + "learning_rate": 8.557392646523759e-05, + "loss": 2.5388, + "step": 10968 + }, + { + "epoch": 0.8852392865789686, + "grad_norm": 0.7095212936401367, + "learning_rate": 8.555830488282169e-05, + "loss": 2.4955, + "step": 10969 + }, + { + "epoch": 0.8853199903155516, + "grad_norm": 0.689908504486084, + "learning_rate": 8.554268366033065e-05, + "loss": 2.4998, + "step": 10970 + }, + { + "epoch": 0.8854006940521346, + "grad_norm": 0.6551975011825562, + "learning_rate": 8.552706279815366e-05, + "loss": 2.4965, + "step": 10971 + }, + { + "epoch": 0.8854813977887176, + "grad_norm": 0.7239118218421936, + "learning_rate": 8.551144229668012e-05, + "loss": 2.5785, + "step": 10972 + }, + { + "epoch": 0.8855621015253006, + "grad_norm": 0.6743230819702148, + "learning_rate": 8.549582215629932e-05, + "loss": 2.5146, + "step": 10973 + }, + { + "epoch": 0.8856428052618837, + "grad_norm": 0.6991584300994873, + "learning_rate": 8.548020237740052e-05, + "loss": 2.5524, + "step": 10974 + }, + { + "epoch": 0.8857235089984666, + "grad_norm": 0.6605305075645447, + "learning_rate": 8.546458296037304e-05, + "loss": 2.5505, + "step": 10975 + }, + { + "epoch": 0.8858042127350496, + "grad_norm": 0.7011568546295166, + "learning_rate": 8.54489639056062e-05, + "loss": 2.4381, + "step": 10976 + }, + { + "epoch": 0.8858849164716326, + "grad_norm": 0.7015339136123657, + "learning_rate": 8.543334521348916e-05, + "loss": 2.5432, + "step": 10977 + }, + { + "epoch": 0.8859656202082157, + "grad_norm": 0.6892278790473938, + "learning_rate": 8.541772688441124e-05, + "loss": 2.5286, + "step": 10978 + }, + { + "epoch": 0.8860463239447987, + "grad_norm": 0.6680187582969666, + "learning_rate": 8.540210891876168e-05, + "loss": 2.439, + "step": 10979 + }, + { + "epoch": 0.8861270276813816, + "grad_norm": 0.7043240666389465, + "learning_rate": 8.538649131692975e-05, + "loss": 2.5558, + "step": 10980 + }, + { + "epoch": 0.8862077314179646, + "grad_norm": 0.6940229535102844, + "learning_rate": 8.537087407930463e-05, + "loss": 2.5219, + "step": 10981 + }, + { + "epoch": 0.8862884351545477, + "grad_norm": 0.6571553945541382, + "learning_rate": 8.535525720627558e-05, + "loss": 2.5054, + "step": 10982 + }, + { + "epoch": 0.8863691388911307, + "grad_norm": 0.6846656203269958, + "learning_rate": 8.533964069823182e-05, + "loss": 2.497, + "step": 10983 + }, + { + "epoch": 0.8864498426277136, + "grad_norm": 0.6838627457618713, + "learning_rate": 8.53240245555625e-05, + "loss": 2.5495, + "step": 10984 + }, + { + "epoch": 0.8865305463642966, + "grad_norm": 0.6825091242790222, + "learning_rate": 8.530840877865687e-05, + "loss": 2.5656, + "step": 10985 + }, + { + "epoch": 0.8866112501008797, + "grad_norm": 0.7368674278259277, + "learning_rate": 8.529279336790414e-05, + "loss": 2.5378, + "step": 10986 + }, + { + "epoch": 0.8866919538374627, + "grad_norm": 0.7333693504333496, + "learning_rate": 8.527717832369338e-05, + "loss": 2.506, + "step": 10987 + }, + { + "epoch": 0.8867726575740457, + "grad_norm": 0.6623306274414062, + "learning_rate": 8.526156364641384e-05, + "loss": 2.4824, + "step": 10988 + }, + { + "epoch": 0.8868533613106286, + "grad_norm": 0.6863973140716553, + "learning_rate": 8.524594933645468e-05, + "loss": 2.536, + "step": 10989 + }, + { + "epoch": 0.8869340650472117, + "grad_norm": 0.6805100440979004, + "learning_rate": 8.523033539420501e-05, + "loss": 2.4954, + "step": 10990 + }, + { + "epoch": 0.8870147687837947, + "grad_norm": 0.6672216653823853, + "learning_rate": 8.521472182005399e-05, + "loss": 2.4893, + "step": 10991 + }, + { + "epoch": 0.8870954725203777, + "grad_norm": 0.7310158610343933, + "learning_rate": 8.519910861439079e-05, + "loss": 2.5317, + "step": 10992 + }, + { + "epoch": 0.8871761762569607, + "grad_norm": 0.6820743083953857, + "learning_rate": 8.518349577760445e-05, + "loss": 2.4482, + "step": 10993 + }, + { + "epoch": 0.8872568799935437, + "grad_norm": 0.6660269498825073, + "learning_rate": 8.516788331008411e-05, + "loss": 2.5353, + "step": 10994 + }, + { + "epoch": 0.8873375837301267, + "grad_norm": 0.676243007183075, + "learning_rate": 8.51522712122189e-05, + "loss": 2.531, + "step": 10995 + }, + { + "epoch": 0.8874182874667097, + "grad_norm": 0.6677152514457703, + "learning_rate": 8.513665948439796e-05, + "loss": 2.4732, + "step": 10996 + }, + { + "epoch": 0.8874989912032927, + "grad_norm": 0.7341045141220093, + "learning_rate": 8.512104812701027e-05, + "loss": 2.5668, + "step": 10997 + }, + { + "epoch": 0.8875796949398758, + "grad_norm": 0.6475326418876648, + "learning_rate": 8.510543714044496e-05, + "loss": 2.5026, + "step": 10998 + }, + { + "epoch": 0.8876603986764587, + "grad_norm": 0.7335529923439026, + "learning_rate": 8.50898265250911e-05, + "loss": 2.4946, + "step": 10999 + }, + { + "epoch": 0.8877411024130417, + "grad_norm": 0.760108232498169, + "learning_rate": 8.507421628133772e-05, + "loss": 2.5697, + "step": 11000 + }, + { + "epoch": 0.8877411024130417, + "eval_loss": 2.450413465499878, + "eval_runtime": 975.281, + "eval_samples_per_second": 2.686, + "eval_steps_per_second": 0.448, + "step": 11000 + }, + { + "epoch": 0.8878218061496247, + "grad_norm": 0.6420160531997681, + "learning_rate": 8.505860640957391e-05, + "loss": 2.5842, + "step": 11001 + }, + { + "epoch": 0.8879025098862078, + "grad_norm": 0.6625204086303711, + "learning_rate": 8.50429969101887e-05, + "loss": 2.4771, + "step": 11002 + }, + { + "epoch": 0.8879832136227908, + "grad_norm": 0.7430149912834167, + "learning_rate": 8.502738778357107e-05, + "loss": 2.5509, + "step": 11003 + }, + { + "epoch": 0.8880639173593737, + "grad_norm": 0.663624107837677, + "learning_rate": 8.501177903011008e-05, + "loss": 2.504, + "step": 11004 + }, + { + "epoch": 0.8881446210959567, + "grad_norm": 0.6638087630271912, + "learning_rate": 8.499617065019476e-05, + "loss": 2.492, + "step": 11005 + }, + { + "epoch": 0.8882253248325398, + "grad_norm": 0.7321780323982239, + "learning_rate": 8.498056264421406e-05, + "loss": 2.5808, + "step": 11006 + }, + { + "epoch": 0.8883060285691228, + "grad_norm": 0.7108619809150696, + "learning_rate": 8.4964955012557e-05, + "loss": 2.6185, + "step": 11007 + }, + { + "epoch": 0.8883867323057058, + "grad_norm": 0.6745856404304504, + "learning_rate": 8.494934775561258e-05, + "loss": 2.576, + "step": 11008 + }, + { + "epoch": 0.8884674360422887, + "grad_norm": 0.8002225756645203, + "learning_rate": 8.493374087376976e-05, + "loss": 2.5598, + "step": 11009 + }, + { + "epoch": 0.8885481397788718, + "grad_norm": 0.6848840713500977, + "learning_rate": 8.491813436741746e-05, + "loss": 2.5218, + "step": 11010 + }, + { + "epoch": 0.8886288435154548, + "grad_norm": 0.6464105248451233, + "learning_rate": 8.490252823694471e-05, + "loss": 2.5503, + "step": 11011 + }, + { + "epoch": 0.8887095472520378, + "grad_norm": 0.7165790796279907, + "learning_rate": 8.488692248274045e-05, + "loss": 2.5104, + "step": 11012 + }, + { + "epoch": 0.8887902509886207, + "grad_norm": 0.6832898259162903, + "learning_rate": 8.487131710519355e-05, + "loss": 2.5379, + "step": 11013 + }, + { + "epoch": 0.8888709547252038, + "grad_norm": 0.6992432475090027, + "learning_rate": 8.485571210469296e-05, + "loss": 2.5388, + "step": 11014 + }, + { + "epoch": 0.8889516584617868, + "grad_norm": 0.6410119533538818, + "learning_rate": 8.484010748162765e-05, + "loss": 2.5237, + "step": 11015 + }, + { + "epoch": 0.8890323621983698, + "grad_norm": 0.716248095035553, + "learning_rate": 8.482450323638647e-05, + "loss": 2.4977, + "step": 11016 + }, + { + "epoch": 0.8891130659349528, + "grad_norm": 0.6620567440986633, + "learning_rate": 8.480889936935833e-05, + "loss": 2.5088, + "step": 11017 + }, + { + "epoch": 0.8891937696715357, + "grad_norm": 0.7311015129089355, + "learning_rate": 8.479329588093217e-05, + "loss": 2.5547, + "step": 11018 + }, + { + "epoch": 0.8892744734081188, + "grad_norm": 0.757203996181488, + "learning_rate": 8.477769277149676e-05, + "loss": 2.5681, + "step": 11019 + }, + { + "epoch": 0.8893551771447018, + "grad_norm": 0.6941282153129578, + "learning_rate": 8.476209004144107e-05, + "loss": 2.5078, + "step": 11020 + }, + { + "epoch": 0.8894358808812848, + "grad_norm": 0.6381667256355286, + "learning_rate": 8.474648769115396e-05, + "loss": 2.5371, + "step": 11021 + }, + { + "epoch": 0.8895165846178678, + "grad_norm": 0.7978621125221252, + "learning_rate": 8.473088572102422e-05, + "loss": 2.5384, + "step": 11022 + }, + { + "epoch": 0.8895972883544508, + "grad_norm": 0.7229189872741699, + "learning_rate": 8.471528413144072e-05, + "loss": 2.5469, + "step": 11023 + }, + { + "epoch": 0.8896779920910338, + "grad_norm": 0.705545961856842, + "learning_rate": 8.469968292279231e-05, + "loss": 2.5281, + "step": 11024 + }, + { + "epoch": 0.8897586958276168, + "grad_norm": 0.7259972095489502, + "learning_rate": 8.468408209546777e-05, + "loss": 2.5485, + "step": 11025 + }, + { + "epoch": 0.8898393995641998, + "grad_norm": 0.6859608888626099, + "learning_rate": 8.466848164985594e-05, + "loss": 2.5548, + "step": 11026 + }, + { + "epoch": 0.8899201033007829, + "grad_norm": 0.7036644816398621, + "learning_rate": 8.465288158634565e-05, + "loss": 2.5159, + "step": 11027 + }, + { + "epoch": 0.8900008070373658, + "grad_norm": 0.6899380087852478, + "learning_rate": 8.463728190532569e-05, + "loss": 2.5037, + "step": 11028 + }, + { + "epoch": 0.8900815107739488, + "grad_norm": 0.7428410649299622, + "learning_rate": 8.462168260718477e-05, + "loss": 2.5074, + "step": 11029 + }, + { + "epoch": 0.8901622145105318, + "grad_norm": 0.6724158525466919, + "learning_rate": 8.460608369231173e-05, + "loss": 2.5544, + "step": 11030 + }, + { + "epoch": 0.8902429182471149, + "grad_norm": 0.6516450643539429, + "learning_rate": 8.459048516109535e-05, + "loss": 2.5152, + "step": 11031 + }, + { + "epoch": 0.8903236219836979, + "grad_norm": 0.7013405561447144, + "learning_rate": 8.457488701392434e-05, + "loss": 2.5116, + "step": 11032 + }, + { + "epoch": 0.8904043257202808, + "grad_norm": 0.7207479476928711, + "learning_rate": 8.455928925118747e-05, + "loss": 2.6041, + "step": 11033 + }, + { + "epoch": 0.8904850294568638, + "grad_norm": 0.69600510597229, + "learning_rate": 8.454369187327348e-05, + "loss": 2.5794, + "step": 11034 + }, + { + "epoch": 0.8905657331934469, + "grad_norm": 0.6831288933753967, + "learning_rate": 8.452809488057108e-05, + "loss": 2.4682, + "step": 11035 + }, + { + "epoch": 0.8906464369300299, + "grad_norm": 0.6978991627693176, + "learning_rate": 8.451249827346901e-05, + "loss": 2.4862, + "step": 11036 + }, + { + "epoch": 0.8907271406666128, + "grad_norm": 0.6772337555885315, + "learning_rate": 8.4496902052356e-05, + "loss": 2.5357, + "step": 11037 + }, + { + "epoch": 0.8908078444031958, + "grad_norm": 0.6735778450965881, + "learning_rate": 8.448130621762067e-05, + "loss": 2.5115, + "step": 11038 + }, + { + "epoch": 0.8908885481397789, + "grad_norm": 0.6695345044136047, + "learning_rate": 8.446571076965177e-05, + "loss": 2.5083, + "step": 11039 + }, + { + "epoch": 0.8909692518763619, + "grad_norm": 0.685343325138092, + "learning_rate": 8.445011570883796e-05, + "loss": 2.5221, + "step": 11040 + }, + { + "epoch": 0.8910499556129449, + "grad_norm": 0.7030319571495056, + "learning_rate": 8.443452103556792e-05, + "loss": 2.5708, + "step": 11041 + }, + { + "epoch": 0.8911306593495278, + "grad_norm": 0.6910343766212463, + "learning_rate": 8.441892675023029e-05, + "loss": 2.5373, + "step": 11042 + }, + { + "epoch": 0.8912113630861109, + "grad_norm": 0.7207868099212646, + "learning_rate": 8.440333285321374e-05, + "loss": 2.5862, + "step": 11043 + }, + { + "epoch": 0.8912920668226939, + "grad_norm": 0.6780788898468018, + "learning_rate": 8.438773934490692e-05, + "loss": 2.562, + "step": 11044 + }, + { + "epoch": 0.8913727705592769, + "grad_norm": 0.7010074257850647, + "learning_rate": 8.437214622569842e-05, + "loss": 2.4556, + "step": 11045 + }, + { + "epoch": 0.8914534742958599, + "grad_norm": 0.6763667464256287, + "learning_rate": 8.435655349597689e-05, + "loss": 2.5402, + "step": 11046 + }, + { + "epoch": 0.891534178032443, + "grad_norm": 0.6870944499969482, + "learning_rate": 8.4340961156131e-05, + "loss": 2.5307, + "step": 11047 + }, + { + "epoch": 0.8916148817690259, + "grad_norm": 0.7835623025894165, + "learning_rate": 8.432536920654923e-05, + "loss": 2.4974, + "step": 11048 + }, + { + "epoch": 0.8916955855056089, + "grad_norm": 0.7551318407058716, + "learning_rate": 8.430977764762024e-05, + "loss": 2.5206, + "step": 11049 + }, + { + "epoch": 0.8917762892421919, + "grad_norm": 0.6486842632293701, + "learning_rate": 8.429418647973265e-05, + "loss": 2.4909, + "step": 11050 + }, + { + "epoch": 0.891856992978775, + "grad_norm": 0.6894064545631409, + "learning_rate": 8.427859570327494e-05, + "loss": 2.5846, + "step": 11051 + }, + { + "epoch": 0.8919376967153579, + "grad_norm": 0.7597395181655884, + "learning_rate": 8.426300531863571e-05, + "loss": 2.5259, + "step": 11052 + }, + { + "epoch": 0.8920184004519409, + "grad_norm": 0.6784652471542358, + "learning_rate": 8.42474153262036e-05, + "loss": 2.5048, + "step": 11053 + }, + { + "epoch": 0.8920991041885239, + "grad_norm": 0.7703847885131836, + "learning_rate": 8.4231825726367e-05, + "loss": 2.4962, + "step": 11054 + }, + { + "epoch": 0.892179807925107, + "grad_norm": 0.6646561026573181, + "learning_rate": 8.421623651951454e-05, + "loss": 2.491, + "step": 11055 + }, + { + "epoch": 0.89226051166169, + "grad_norm": 0.6901054978370667, + "learning_rate": 8.420064770603475e-05, + "loss": 2.515, + "step": 11056 + }, + { + "epoch": 0.8923412153982729, + "grad_norm": 0.6789328455924988, + "learning_rate": 8.41850592863161e-05, + "loss": 2.5481, + "step": 11057 + }, + { + "epoch": 0.8924219191348559, + "grad_norm": 0.6211017370223999, + "learning_rate": 8.41694712607471e-05, + "loss": 2.51, + "step": 11058 + }, + { + "epoch": 0.892502622871439, + "grad_norm": 0.6482260823249817, + "learning_rate": 8.415388362971626e-05, + "loss": 2.5418, + "step": 11059 + }, + { + "epoch": 0.892583326608022, + "grad_norm": 0.7627651691436768, + "learning_rate": 8.413829639361209e-05, + "loss": 2.5033, + "step": 11060 + }, + { + "epoch": 0.892664030344605, + "grad_norm": 0.6560852527618408, + "learning_rate": 8.412270955282302e-05, + "loss": 2.5442, + "step": 11061 + }, + { + "epoch": 0.8927447340811879, + "grad_norm": 0.7479087114334106, + "learning_rate": 8.410712310773752e-05, + "loss": 2.5189, + "step": 11062 + }, + { + "epoch": 0.892825437817771, + "grad_norm": 0.6970879435539246, + "learning_rate": 8.409153705874411e-05, + "loss": 2.5418, + "step": 11063 + }, + { + "epoch": 0.892906141554354, + "grad_norm": 0.6514548659324646, + "learning_rate": 8.407595140623113e-05, + "loss": 2.5277, + "step": 11064 + }, + { + "epoch": 0.892986845290937, + "grad_norm": 0.6745554804801941, + "learning_rate": 8.406036615058707e-05, + "loss": 2.5085, + "step": 11065 + }, + { + "epoch": 0.89306754902752, + "grad_norm": 0.7510363459587097, + "learning_rate": 8.404478129220037e-05, + "loss": 2.4941, + "step": 11066 + }, + { + "epoch": 0.8931482527641029, + "grad_norm": 0.6531470417976379, + "learning_rate": 8.402919683145941e-05, + "loss": 2.5363, + "step": 11067 + }, + { + "epoch": 0.893228956500686, + "grad_norm": 0.6861493587493896, + "learning_rate": 8.401361276875262e-05, + "loss": 2.6369, + "step": 11068 + }, + { + "epoch": 0.893309660237269, + "grad_norm": 0.6029497981071472, + "learning_rate": 8.39980291044684e-05, + "loss": 2.4953, + "step": 11069 + }, + { + "epoch": 0.893390363973852, + "grad_norm": 0.6831715106964111, + "learning_rate": 8.39824458389951e-05, + "loss": 2.5074, + "step": 11070 + }, + { + "epoch": 0.8934710677104349, + "grad_norm": 0.7076299786567688, + "learning_rate": 8.396686297272112e-05, + "loss": 2.5934, + "step": 11071 + }, + { + "epoch": 0.893551771447018, + "grad_norm": 0.6941438913345337, + "learning_rate": 8.395128050603487e-05, + "loss": 2.5338, + "step": 11072 + }, + { + "epoch": 0.893632475183601, + "grad_norm": 0.6867249011993408, + "learning_rate": 8.393569843932463e-05, + "loss": 2.5311, + "step": 11073 + }, + { + "epoch": 0.893713178920184, + "grad_norm": 0.623991847038269, + "learning_rate": 8.392011677297877e-05, + "loss": 2.5133, + "step": 11074 + }, + { + "epoch": 0.893793882656767, + "grad_norm": 0.6808422803878784, + "learning_rate": 8.390453550738564e-05, + "loss": 2.5398, + "step": 11075 + }, + { + "epoch": 0.89387458639335, + "grad_norm": 0.7136701345443726, + "learning_rate": 8.388895464293357e-05, + "loss": 2.5415, + "step": 11076 + }, + { + "epoch": 0.893955290129933, + "grad_norm": 0.6814287304878235, + "learning_rate": 8.387337418001084e-05, + "loss": 2.4782, + "step": 11077 + }, + { + "epoch": 0.894035993866516, + "grad_norm": 0.8101940155029297, + "learning_rate": 8.385779411900579e-05, + "loss": 2.5292, + "step": 11078 + }, + { + "epoch": 0.894116697603099, + "grad_norm": 0.7106796503067017, + "learning_rate": 8.384221446030676e-05, + "loss": 2.5819, + "step": 11079 + }, + { + "epoch": 0.8941974013396821, + "grad_norm": 0.7840015292167664, + "learning_rate": 8.382663520430191e-05, + "loss": 2.5243, + "step": 11080 + }, + { + "epoch": 0.894278105076265, + "grad_norm": 0.7037288546562195, + "learning_rate": 8.381105635137959e-05, + "loss": 2.5606, + "step": 11081 + }, + { + "epoch": 0.894358808812848, + "grad_norm": 0.671558678150177, + "learning_rate": 8.379547790192812e-05, + "loss": 2.4923, + "step": 11082 + }, + { + "epoch": 0.894439512549431, + "grad_norm": 0.6789675951004028, + "learning_rate": 8.377989985633567e-05, + "loss": 2.5281, + "step": 11083 + }, + { + "epoch": 0.8945202162860141, + "grad_norm": 0.6777840852737427, + "learning_rate": 8.37643222149905e-05, + "loss": 2.5159, + "step": 11084 + }, + { + "epoch": 0.8946009200225971, + "grad_norm": 0.6920693516731262, + "learning_rate": 8.374874497828089e-05, + "loss": 2.4952, + "step": 11085 + }, + { + "epoch": 0.89468162375918, + "grad_norm": 0.7394022941589355, + "learning_rate": 8.373316814659502e-05, + "loss": 2.5035, + "step": 11086 + }, + { + "epoch": 0.894762327495763, + "grad_norm": 0.625960648059845, + "learning_rate": 8.37175917203211e-05, + "loss": 2.5324, + "step": 11087 + }, + { + "epoch": 0.8948430312323461, + "grad_norm": 0.6848758459091187, + "learning_rate": 8.370201569984742e-05, + "loss": 2.5312, + "step": 11088 + }, + { + "epoch": 0.8949237349689291, + "grad_norm": 0.7207037210464478, + "learning_rate": 8.368644008556205e-05, + "loss": 2.5807, + "step": 11089 + }, + { + "epoch": 0.895004438705512, + "grad_norm": 0.7582261562347412, + "learning_rate": 8.367086487785326e-05, + "loss": 2.532, + "step": 11090 + }, + { + "epoch": 0.895085142442095, + "grad_norm": 0.6916806101799011, + "learning_rate": 8.36552900771092e-05, + "loss": 2.4772, + "step": 11091 + }, + { + "epoch": 0.8951658461786781, + "grad_norm": 0.6457386016845703, + "learning_rate": 8.363971568371805e-05, + "loss": 2.4952, + "step": 11092 + }, + { + "epoch": 0.8952465499152611, + "grad_norm": 0.7006754279136658, + "learning_rate": 8.362414169806792e-05, + "loss": 2.5818, + "step": 11093 + }, + { + "epoch": 0.8953272536518441, + "grad_norm": 0.6939932703971863, + "learning_rate": 8.3608568120547e-05, + "loss": 2.5411, + "step": 11094 + }, + { + "epoch": 0.895407957388427, + "grad_norm": 0.6314546465873718, + "learning_rate": 8.359299495154343e-05, + "loss": 2.5408, + "step": 11095 + }, + { + "epoch": 0.8954886611250101, + "grad_norm": 0.7202826738357544, + "learning_rate": 8.357742219144529e-05, + "loss": 2.4925, + "step": 11096 + }, + { + "epoch": 0.8955693648615931, + "grad_norm": 0.6475295424461365, + "learning_rate": 8.356184984064071e-05, + "loss": 2.5023, + "step": 11097 + }, + { + "epoch": 0.8956500685981761, + "grad_norm": 0.6161238551139832, + "learning_rate": 8.354627789951785e-05, + "loss": 2.5053, + "step": 11098 + }, + { + "epoch": 0.8957307723347591, + "grad_norm": 0.6919825077056885, + "learning_rate": 8.353070636846472e-05, + "loss": 2.5387, + "step": 11099 + }, + { + "epoch": 0.8958114760713421, + "grad_norm": 0.6374878883361816, + "learning_rate": 8.351513524786944e-05, + "loss": 2.5526, + "step": 11100 + }, + { + "epoch": 0.8958921798079251, + "grad_norm": 0.7041093707084656, + "learning_rate": 8.349956453812009e-05, + "loss": 2.5282, + "step": 11101 + }, + { + "epoch": 0.8959728835445081, + "grad_norm": 0.7252324819564819, + "learning_rate": 8.348399423960471e-05, + "loss": 2.5723, + "step": 11102 + }, + { + "epoch": 0.8960535872810911, + "grad_norm": 0.681682825088501, + "learning_rate": 8.346842435271137e-05, + "loss": 2.5284, + "step": 11103 + }, + { + "epoch": 0.8961342910176742, + "grad_norm": 0.7293850183486938, + "learning_rate": 8.34528548778281e-05, + "loss": 2.5014, + "step": 11104 + }, + { + "epoch": 0.8962149947542571, + "grad_norm": 0.7057846188545227, + "learning_rate": 8.343728581534299e-05, + "loss": 2.5502, + "step": 11105 + }, + { + "epoch": 0.8962956984908401, + "grad_norm": 0.6740830540657043, + "learning_rate": 8.342171716564398e-05, + "loss": 2.5205, + "step": 11106 + }, + { + "epoch": 0.8963764022274231, + "grad_norm": 0.6917470097541809, + "learning_rate": 8.340614892911907e-05, + "loss": 2.5216, + "step": 11107 + }, + { + "epoch": 0.8964571059640062, + "grad_norm": 0.7495635151863098, + "learning_rate": 8.339058110615638e-05, + "loss": 2.5509, + "step": 11108 + }, + { + "epoch": 0.8965378097005892, + "grad_norm": 0.6687765717506409, + "learning_rate": 8.33750136971438e-05, + "loss": 2.5286, + "step": 11109 + }, + { + "epoch": 0.8966185134371721, + "grad_norm": 0.6901381015777588, + "learning_rate": 8.335944670246931e-05, + "loss": 2.5545, + "step": 11110 + }, + { + "epoch": 0.8966992171737551, + "grad_norm": 0.6645506024360657, + "learning_rate": 8.334388012252094e-05, + "loss": 2.4883, + "step": 11111 + }, + { + "epoch": 0.8967799209103382, + "grad_norm": 0.6427997350692749, + "learning_rate": 8.332831395768662e-05, + "loss": 2.5103, + "step": 11112 + }, + { + "epoch": 0.8968606246469212, + "grad_norm": 0.7224035263061523, + "learning_rate": 8.331274820835425e-05, + "loss": 2.5086, + "step": 11113 + }, + { + "epoch": 0.8969413283835042, + "grad_norm": 0.6918233036994934, + "learning_rate": 8.329718287491188e-05, + "loss": 2.5222, + "step": 11114 + }, + { + "epoch": 0.8970220321200871, + "grad_norm": 0.735583484172821, + "learning_rate": 8.328161795774734e-05, + "loss": 2.5277, + "step": 11115 + }, + { + "epoch": 0.8971027358566702, + "grad_norm": 0.6624864339828491, + "learning_rate": 8.326605345724857e-05, + "loss": 2.532, + "step": 11116 + }, + { + "epoch": 0.8971834395932532, + "grad_norm": 0.6227770447731018, + "learning_rate": 8.325048937380352e-05, + "loss": 2.5386, + "step": 11117 + }, + { + "epoch": 0.8972641433298362, + "grad_norm": 0.6483022570610046, + "learning_rate": 8.323492570780004e-05, + "loss": 2.4958, + "step": 11118 + }, + { + "epoch": 0.8973448470664191, + "grad_norm": 0.7072618007659912, + "learning_rate": 8.321936245962602e-05, + "loss": 2.4931, + "step": 11119 + }, + { + "epoch": 0.8974255508030021, + "grad_norm": 0.6848764419555664, + "learning_rate": 8.320379962966937e-05, + "loss": 2.4549, + "step": 11120 + }, + { + "epoch": 0.8975062545395852, + "grad_norm": 0.6819620132446289, + "learning_rate": 8.318823721831795e-05, + "loss": 2.5156, + "step": 11121 + }, + { + "epoch": 0.8975869582761682, + "grad_norm": 0.6834476590156555, + "learning_rate": 8.31726752259596e-05, + "loss": 2.507, + "step": 11122 + }, + { + "epoch": 0.8976676620127512, + "grad_norm": 0.6785772442817688, + "learning_rate": 8.315711365298214e-05, + "loss": 2.5086, + "step": 11123 + }, + { + "epoch": 0.8977483657493341, + "grad_norm": 0.6303566098213196, + "learning_rate": 8.314155249977351e-05, + "loss": 2.5087, + "step": 11124 + }, + { + "epoch": 0.8978290694859172, + "grad_norm": 0.6544361710548401, + "learning_rate": 8.31259917667214e-05, + "loss": 2.505, + "step": 11125 + }, + { + "epoch": 0.8979097732225002, + "grad_norm": 0.8135818243026733, + "learning_rate": 8.311043145421369e-05, + "loss": 2.5139, + "step": 11126 + }, + { + "epoch": 0.8979904769590832, + "grad_norm": 0.6744341254234314, + "learning_rate": 8.309487156263818e-05, + "loss": 2.4797, + "step": 11127 + }, + { + "epoch": 0.8980711806956662, + "grad_norm": 0.6138790845870972, + "learning_rate": 8.307931209238267e-05, + "loss": 2.5334, + "step": 11128 + }, + { + "epoch": 0.8981518844322492, + "grad_norm": 0.702434241771698, + "learning_rate": 8.306375304383492e-05, + "loss": 2.5343, + "step": 11129 + }, + { + "epoch": 0.8982325881688322, + "grad_norm": 0.6787155270576477, + "learning_rate": 8.304819441738275e-05, + "loss": 2.507, + "step": 11130 + }, + { + "epoch": 0.8983132919054152, + "grad_norm": 0.6963719129562378, + "learning_rate": 8.303263621341386e-05, + "loss": 2.5238, + "step": 11131 + }, + { + "epoch": 0.8983939956419982, + "grad_norm": 0.6623271107673645, + "learning_rate": 8.3017078432316e-05, + "loss": 2.5206, + "step": 11132 + }, + { + "epoch": 0.8984746993785813, + "grad_norm": 0.777222752571106, + "learning_rate": 8.300152107447701e-05, + "loss": 2.5004, + "step": 11133 + }, + { + "epoch": 0.8985554031151642, + "grad_norm": 0.6788455247879028, + "learning_rate": 8.29859641402845e-05, + "loss": 2.5735, + "step": 11134 + }, + { + "epoch": 0.8986361068517472, + "grad_norm": 0.6595063209533691, + "learning_rate": 8.297040763012624e-05, + "loss": 2.4988, + "step": 11135 + }, + { + "epoch": 0.8987168105883302, + "grad_norm": 0.7105697989463806, + "learning_rate": 8.295485154438994e-05, + "loss": 2.5531, + "step": 11136 + }, + { + "epoch": 0.8987975143249133, + "grad_norm": 0.6884949803352356, + "learning_rate": 8.29392958834633e-05, + "loss": 2.5158, + "step": 11137 + }, + { + "epoch": 0.8988782180614963, + "grad_norm": 0.7178345322608948, + "learning_rate": 8.2923740647734e-05, + "loss": 2.5836, + "step": 11138 + }, + { + "epoch": 0.8989589217980792, + "grad_norm": 0.7000541687011719, + "learning_rate": 8.290818583758973e-05, + "loss": 2.5345, + "step": 11139 + }, + { + "epoch": 0.8990396255346622, + "grad_norm": 0.6808128952980042, + "learning_rate": 8.289263145341816e-05, + "loss": 2.5227, + "step": 11140 + }, + { + "epoch": 0.8991203292712453, + "grad_norm": 0.7047473788261414, + "learning_rate": 8.287707749560691e-05, + "loss": 2.477, + "step": 11141 + }, + { + "epoch": 0.8992010330078283, + "grad_norm": 0.6654812693595886, + "learning_rate": 8.286152396454365e-05, + "loss": 2.4575, + "step": 11142 + }, + { + "epoch": 0.8992817367444113, + "grad_norm": 0.6690360307693481, + "learning_rate": 8.284597086061603e-05, + "loss": 2.4755, + "step": 11143 + }, + { + "epoch": 0.8993624404809942, + "grad_norm": 0.7270147204399109, + "learning_rate": 8.283041818421164e-05, + "loss": 2.5893, + "step": 11144 + }, + { + "epoch": 0.8994431442175773, + "grad_norm": 0.5977498888969421, + "learning_rate": 8.28148659357181e-05, + "loss": 2.5108, + "step": 11145 + }, + { + "epoch": 0.8995238479541603, + "grad_norm": 0.694593071937561, + "learning_rate": 8.279931411552307e-05, + "loss": 2.5036, + "step": 11146 + }, + { + "epoch": 0.8996045516907433, + "grad_norm": 0.7395440936088562, + "learning_rate": 8.278376272401404e-05, + "loss": 2.5244, + "step": 11147 + }, + { + "epoch": 0.8996852554273262, + "grad_norm": 0.6483517289161682, + "learning_rate": 8.276821176157867e-05, + "loss": 2.5619, + "step": 11148 + }, + { + "epoch": 0.8997659591639093, + "grad_norm": 0.6996768116950989, + "learning_rate": 8.275266122860454e-05, + "loss": 2.5275, + "step": 11149 + }, + { + "epoch": 0.8998466629004923, + "grad_norm": 0.661122739315033, + "learning_rate": 8.273711112547914e-05, + "loss": 2.5053, + "step": 11150 + }, + { + "epoch": 0.8999273666370753, + "grad_norm": 0.6919111609458923, + "learning_rate": 8.272156145259006e-05, + "loss": 2.578, + "step": 11151 + }, + { + "epoch": 0.9000080703736583, + "grad_norm": 0.6680958867073059, + "learning_rate": 8.270601221032482e-05, + "loss": 2.4942, + "step": 11152 + }, + { + "epoch": 0.9000887741102414, + "grad_norm": 0.6782989501953125, + "learning_rate": 8.269046339907101e-05, + "loss": 2.5461, + "step": 11153 + }, + { + "epoch": 0.9001694778468243, + "grad_norm": 0.743468165397644, + "learning_rate": 8.267491501921605e-05, + "loss": 2.629, + "step": 11154 + }, + { + "epoch": 0.9002501815834073, + "grad_norm": 0.709562361240387, + "learning_rate": 8.265936707114751e-05, + "loss": 2.566, + "step": 11155 + }, + { + "epoch": 0.9003308853199903, + "grad_norm": 0.7075676918029785, + "learning_rate": 8.264381955525291e-05, + "loss": 2.5409, + "step": 11156 + }, + { + "epoch": 0.9004115890565734, + "grad_norm": 0.7021335959434509, + "learning_rate": 8.262827247191963e-05, + "loss": 2.5606, + "step": 11157 + }, + { + "epoch": 0.9004922927931563, + "grad_norm": 0.6507331132888794, + "learning_rate": 8.261272582153524e-05, + "loss": 2.5557, + "step": 11158 + }, + { + "epoch": 0.9005729965297393, + "grad_norm": 0.7182760238647461, + "learning_rate": 8.25971796044872e-05, + "loss": 2.5567, + "step": 11159 + }, + { + "epoch": 0.9006537002663223, + "grad_norm": 0.6632338762283325, + "learning_rate": 8.258163382116291e-05, + "loss": 2.5081, + "step": 11160 + }, + { + "epoch": 0.9007344040029054, + "grad_norm": 0.6889928579330444, + "learning_rate": 8.256608847194983e-05, + "loss": 2.5034, + "step": 11161 + }, + { + "epoch": 0.9008151077394884, + "grad_norm": 0.6374824047088623, + "learning_rate": 8.255054355723542e-05, + "loss": 2.4826, + "step": 11162 + }, + { + "epoch": 0.9008958114760713, + "grad_norm": 0.7100771069526672, + "learning_rate": 8.253499907740706e-05, + "loss": 2.4666, + "step": 11163 + }, + { + "epoch": 0.9009765152126543, + "grad_norm": 0.8141123652458191, + "learning_rate": 8.251945503285218e-05, + "loss": 2.5339, + "step": 11164 + }, + { + "epoch": 0.9010572189492374, + "grad_norm": 0.6621670722961426, + "learning_rate": 8.250391142395822e-05, + "loss": 2.4805, + "step": 11165 + }, + { + "epoch": 0.9011379226858204, + "grad_norm": 0.6624772548675537, + "learning_rate": 8.248836825111245e-05, + "loss": 2.5148, + "step": 11166 + }, + { + "epoch": 0.9012186264224034, + "grad_norm": 0.6783565282821655, + "learning_rate": 8.247282551470235e-05, + "loss": 2.4481, + "step": 11167 + }, + { + "epoch": 0.9012993301589863, + "grad_norm": 0.700089156627655, + "learning_rate": 8.245728321511525e-05, + "loss": 2.5649, + "step": 11168 + }, + { + "epoch": 0.9013800338955693, + "grad_norm": 0.6765339970588684, + "learning_rate": 8.244174135273852e-05, + "loss": 2.5221, + "step": 11169 + }, + { + "epoch": 0.9014607376321524, + "grad_norm": 0.6896056532859802, + "learning_rate": 8.242619992795948e-05, + "loss": 2.4742, + "step": 11170 + }, + { + "epoch": 0.9015414413687354, + "grad_norm": 0.7134374976158142, + "learning_rate": 8.241065894116547e-05, + "loss": 2.5231, + "step": 11171 + }, + { + "epoch": 0.9016221451053184, + "grad_norm": 0.6939442753791809, + "learning_rate": 8.239511839274385e-05, + "loss": 2.5159, + "step": 11172 + }, + { + "epoch": 0.9017028488419013, + "grad_norm": 0.6780345439910889, + "learning_rate": 8.237957828308187e-05, + "loss": 2.5474, + "step": 11173 + }, + { + "epoch": 0.9017835525784844, + "grad_norm": 0.6532382965087891, + "learning_rate": 8.236403861256687e-05, + "loss": 2.4982, + "step": 11174 + }, + { + "epoch": 0.9018642563150674, + "grad_norm": 0.6918137073516846, + "learning_rate": 8.234849938158615e-05, + "loss": 2.4657, + "step": 11175 + }, + { + "epoch": 0.9019449600516504, + "grad_norm": 0.6838762164115906, + "learning_rate": 8.233296059052695e-05, + "loss": 2.5405, + "step": 11176 + }, + { + "epoch": 0.9020256637882333, + "grad_norm": 0.7560290098190308, + "learning_rate": 8.231742223977653e-05, + "loss": 2.5379, + "step": 11177 + }, + { + "epoch": 0.9021063675248164, + "grad_norm": 0.6673319339752197, + "learning_rate": 8.230188432972221e-05, + "loss": 2.4669, + "step": 11178 + }, + { + "epoch": 0.9021870712613994, + "grad_norm": 0.7486294507980347, + "learning_rate": 8.228634686075116e-05, + "loss": 2.526, + "step": 11179 + }, + { + "epoch": 0.9022677749979824, + "grad_norm": 0.7012811303138733, + "learning_rate": 8.227080983325067e-05, + "loss": 2.5544, + "step": 11180 + }, + { + "epoch": 0.9023484787345654, + "grad_norm": 0.6807447075843811, + "learning_rate": 8.225527324760796e-05, + "loss": 2.5139, + "step": 11181 + }, + { + "epoch": 0.9024291824711484, + "grad_norm": 0.7594932317733765, + "learning_rate": 8.223973710421018e-05, + "loss": 2.539, + "step": 11182 + }, + { + "epoch": 0.9025098862077314, + "grad_norm": 0.6764204502105713, + "learning_rate": 8.22242014034446e-05, + "loss": 2.6128, + "step": 11183 + }, + { + "epoch": 0.9025905899443144, + "grad_norm": 0.6499967575073242, + "learning_rate": 8.220866614569837e-05, + "loss": 2.5459, + "step": 11184 + }, + { + "epoch": 0.9026712936808974, + "grad_norm": 0.673076331615448, + "learning_rate": 8.219313133135876e-05, + "loss": 2.5852, + "step": 11185 + }, + { + "epoch": 0.9027519974174805, + "grad_norm": 0.784854531288147, + "learning_rate": 8.21775969608128e-05, + "loss": 2.5586, + "step": 11186 + }, + { + "epoch": 0.9028327011540634, + "grad_norm": 0.658963680267334, + "learning_rate": 8.216206303444771e-05, + "loss": 2.4376, + "step": 11187 + }, + { + "epoch": 0.9029134048906464, + "grad_norm": 0.6456249356269836, + "learning_rate": 8.214652955265067e-05, + "loss": 2.5166, + "step": 11188 + }, + { + "epoch": 0.9029941086272294, + "grad_norm": 0.6940007209777832, + "learning_rate": 8.213099651580874e-05, + "loss": 2.4992, + "step": 11189 + }, + { + "epoch": 0.9030748123638125, + "grad_norm": 0.6661425828933716, + "learning_rate": 8.211546392430911e-05, + "loss": 2.5177, + "step": 11190 + }, + { + "epoch": 0.9031555161003955, + "grad_norm": 0.647834300994873, + "learning_rate": 8.20999317785389e-05, + "loss": 2.4666, + "step": 11191 + }, + { + "epoch": 0.9032362198369784, + "grad_norm": 0.7673383355140686, + "learning_rate": 8.208440007888515e-05, + "loss": 2.4852, + "step": 11192 + }, + { + "epoch": 0.9033169235735614, + "grad_norm": 0.7033390998840332, + "learning_rate": 8.206886882573498e-05, + "loss": 2.5549, + "step": 11193 + }, + { + "epoch": 0.9033976273101445, + "grad_norm": 0.6871141195297241, + "learning_rate": 8.205333801947548e-05, + "loss": 2.4585, + "step": 11194 + }, + { + "epoch": 0.9034783310467275, + "grad_norm": 0.7201984524726868, + "learning_rate": 8.20378076604937e-05, + "loss": 2.5271, + "step": 11195 + }, + { + "epoch": 0.9035590347833105, + "grad_norm": 0.704060971736908, + "learning_rate": 8.202227774917671e-05, + "loss": 2.4915, + "step": 11196 + }, + { + "epoch": 0.9036397385198934, + "grad_norm": 0.6833879947662354, + "learning_rate": 8.200674828591156e-05, + "loss": 2.4496, + "step": 11197 + }, + { + "epoch": 0.9037204422564765, + "grad_norm": 0.6564866304397583, + "learning_rate": 8.199121927108527e-05, + "loss": 2.4818, + "step": 11198 + }, + { + "epoch": 0.9038011459930595, + "grad_norm": 0.6970151662826538, + "learning_rate": 8.197569070508486e-05, + "loss": 2.5812, + "step": 11199 + }, + { + "epoch": 0.9038818497296425, + "grad_norm": 0.7147194743156433, + "learning_rate": 8.196016258829737e-05, + "loss": 2.5543, + "step": 11200 + }, + { + "epoch": 0.9039625534662254, + "grad_norm": 0.6357648968696594, + "learning_rate": 8.194463492110981e-05, + "loss": 2.5254, + "step": 11201 + }, + { + "epoch": 0.9040432572028085, + "grad_norm": 0.7113756537437439, + "learning_rate": 8.19291077039091e-05, + "loss": 2.5179, + "step": 11202 + }, + { + "epoch": 0.9041239609393915, + "grad_norm": 0.7252987623214722, + "learning_rate": 8.191358093708228e-05, + "loss": 2.5658, + "step": 11203 + }, + { + "epoch": 0.9042046646759745, + "grad_norm": 0.7095803618431091, + "learning_rate": 8.189805462101631e-05, + "loss": 2.583, + "step": 11204 + }, + { + "epoch": 0.9042853684125575, + "grad_norm": 0.7447760105133057, + "learning_rate": 8.188252875609812e-05, + "loss": 2.5608, + "step": 11205 + }, + { + "epoch": 0.9043660721491406, + "grad_norm": 0.6578439474105835, + "learning_rate": 8.186700334271468e-05, + "loss": 2.508, + "step": 11206 + }, + { + "epoch": 0.9044467758857235, + "grad_norm": 0.6776832938194275, + "learning_rate": 8.185147838125296e-05, + "loss": 2.6188, + "step": 11207 + }, + { + "epoch": 0.9045274796223065, + "grad_norm": 0.6559253931045532, + "learning_rate": 8.183595387209976e-05, + "loss": 2.5307, + "step": 11208 + }, + { + "epoch": 0.9046081833588895, + "grad_norm": 0.7078405022621155, + "learning_rate": 8.18204298156421e-05, + "loss": 2.5545, + "step": 11209 + }, + { + "epoch": 0.9046888870954726, + "grad_norm": 0.6790273189544678, + "learning_rate": 8.18049062122669e-05, + "loss": 2.4963, + "step": 11210 + }, + { + "epoch": 0.9047695908320555, + "grad_norm": 0.6888250708580017, + "learning_rate": 8.178938306236095e-05, + "loss": 2.5108, + "step": 11211 + }, + { + "epoch": 0.9048502945686385, + "grad_norm": 0.6438474059104919, + "learning_rate": 8.177386036631119e-05, + "loss": 2.4976, + "step": 11212 + }, + { + "epoch": 0.9049309983052215, + "grad_norm": 0.6786646842956543, + "learning_rate": 8.175833812450445e-05, + "loss": 2.4584, + "step": 11213 + }, + { + "epoch": 0.9050117020418046, + "grad_norm": 0.6480324268341064, + "learning_rate": 8.174281633732764e-05, + "loss": 2.5021, + "step": 11214 + }, + { + "epoch": 0.9050924057783876, + "grad_norm": 0.7232171893119812, + "learning_rate": 8.172729500516756e-05, + "loss": 2.4742, + "step": 11215 + }, + { + "epoch": 0.9051731095149705, + "grad_norm": 0.7048845291137695, + "learning_rate": 8.171177412841105e-05, + "loss": 2.518, + "step": 11216 + }, + { + "epoch": 0.9052538132515535, + "grad_norm": 0.6363180875778198, + "learning_rate": 8.169625370744496e-05, + "loss": 2.5154, + "step": 11217 + }, + { + "epoch": 0.9053345169881366, + "grad_norm": 0.7176045179367065, + "learning_rate": 8.168073374265605e-05, + "loss": 2.5182, + "step": 11218 + }, + { + "epoch": 0.9054152207247196, + "grad_norm": 0.7011643052101135, + "learning_rate": 8.166521423443112e-05, + "loss": 2.5615, + "step": 11219 + }, + { + "epoch": 0.9054959244613026, + "grad_norm": 0.6853327751159668, + "learning_rate": 8.164969518315704e-05, + "loss": 2.5057, + "step": 11220 + }, + { + "epoch": 0.9055766281978855, + "grad_norm": 0.6972528696060181, + "learning_rate": 8.163417658922049e-05, + "loss": 2.4949, + "step": 11221 + }, + { + "epoch": 0.9056573319344685, + "grad_norm": 0.6780978441238403, + "learning_rate": 8.161865845300824e-05, + "loss": 2.5601, + "step": 11222 + }, + { + "epoch": 0.9057380356710516, + "grad_norm": 0.6454098224639893, + "learning_rate": 8.160314077490711e-05, + "loss": 2.4203, + "step": 11223 + }, + { + "epoch": 0.9058187394076346, + "grad_norm": 0.7300907969474792, + "learning_rate": 8.158762355530378e-05, + "loss": 2.4818, + "step": 11224 + }, + { + "epoch": 0.9058994431442176, + "grad_norm": 0.682475745677948, + "learning_rate": 8.1572106794585e-05, + "loss": 2.4852, + "step": 11225 + }, + { + "epoch": 0.9059801468808005, + "grad_norm": 0.6666192412376404, + "learning_rate": 8.155659049313754e-05, + "loss": 2.5642, + "step": 11226 + }, + { + "epoch": 0.9060608506173836, + "grad_norm": 0.6873177886009216, + "learning_rate": 8.154107465134801e-05, + "loss": 2.5163, + "step": 11227 + }, + { + "epoch": 0.9061415543539666, + "grad_norm": 0.6704845428466797, + "learning_rate": 8.152555926960315e-05, + "loss": 2.5481, + "step": 11228 + }, + { + "epoch": 0.9062222580905496, + "grad_norm": 0.6340618133544922, + "learning_rate": 8.151004434828963e-05, + "loss": 2.4701, + "step": 11229 + }, + { + "epoch": 0.9063029618271325, + "grad_norm": 0.7886226177215576, + "learning_rate": 8.14945298877942e-05, + "loss": 2.5322, + "step": 11230 + }, + { + "epoch": 0.9063836655637156, + "grad_norm": 0.7086018919944763, + "learning_rate": 8.14790158885034e-05, + "loss": 2.4909, + "step": 11231 + }, + { + "epoch": 0.9064643693002986, + "grad_norm": 0.6791329979896545, + "learning_rate": 8.146350235080396e-05, + "loss": 2.4438, + "step": 11232 + }, + { + "epoch": 0.9065450730368816, + "grad_norm": 0.7070720791816711, + "learning_rate": 8.14479892750825e-05, + "loss": 2.528, + "step": 11233 + }, + { + "epoch": 0.9066257767734646, + "grad_norm": 0.6551348567008972, + "learning_rate": 8.143247666172564e-05, + "loss": 2.4747, + "step": 11234 + }, + { + "epoch": 0.9067064805100477, + "grad_norm": 0.6691645979881287, + "learning_rate": 8.141696451111997e-05, + "loss": 2.5038, + "step": 11235 + }, + { + "epoch": 0.9067871842466306, + "grad_norm": 0.6814864277839661, + "learning_rate": 8.14014528236522e-05, + "loss": 2.5737, + "step": 11236 + }, + { + "epoch": 0.9068678879832136, + "grad_norm": 0.7442377209663391, + "learning_rate": 8.138594159970877e-05, + "loss": 2.5839, + "step": 11237 + }, + { + "epoch": 0.9069485917197966, + "grad_norm": 0.6861338019371033, + "learning_rate": 8.137043083967634e-05, + "loss": 2.567, + "step": 11238 + }, + { + "epoch": 0.9070292954563797, + "grad_norm": 0.7056479454040527, + "learning_rate": 8.135492054394151e-05, + "loss": 2.5297, + "step": 11239 + }, + { + "epoch": 0.9071099991929626, + "grad_norm": 0.7166962623596191, + "learning_rate": 8.133941071289076e-05, + "loss": 2.4834, + "step": 11240 + }, + { + "epoch": 0.9071907029295456, + "grad_norm": 0.6285616159439087, + "learning_rate": 8.132390134691068e-05, + "loss": 2.5066, + "step": 11241 + }, + { + "epoch": 0.9072714066661286, + "grad_norm": 0.681915283203125, + "learning_rate": 8.130839244638783e-05, + "loss": 2.5387, + "step": 11242 + }, + { + "epoch": 0.9073521104027117, + "grad_norm": 0.6876898407936096, + "learning_rate": 8.129288401170866e-05, + "loss": 2.4465, + "step": 11243 + }, + { + "epoch": 0.9074328141392947, + "grad_norm": 0.657132625579834, + "learning_rate": 8.127737604325975e-05, + "loss": 2.499, + "step": 11244 + }, + { + "epoch": 0.9075135178758776, + "grad_norm": 0.6678825616836548, + "learning_rate": 8.126186854142752e-05, + "loss": 2.4872, + "step": 11245 + }, + { + "epoch": 0.9075942216124606, + "grad_norm": 0.7296879291534424, + "learning_rate": 8.124636150659858e-05, + "loss": 2.4783, + "step": 11246 + }, + { + "epoch": 0.9076749253490437, + "grad_norm": 0.7087056040763855, + "learning_rate": 8.12308549391593e-05, + "loss": 2.507, + "step": 11247 + }, + { + "epoch": 0.9077556290856267, + "grad_norm": 0.7099738121032715, + "learning_rate": 8.121534883949616e-05, + "loss": 2.5317, + "step": 11248 + }, + { + "epoch": 0.9078363328222097, + "grad_norm": 0.6421170830726624, + "learning_rate": 8.119984320799566e-05, + "loss": 2.5291, + "step": 11249 + }, + { + "epoch": 0.9079170365587926, + "grad_norm": 0.6835018396377563, + "learning_rate": 8.11843380450442e-05, + "loss": 2.5523, + "step": 11250 + }, + { + "epoch": 0.9079977402953757, + "grad_norm": 0.6638229489326477, + "learning_rate": 8.11688333510282e-05, + "loss": 2.5128, + "step": 11251 + }, + { + "epoch": 0.9080784440319587, + "grad_norm": 0.6783459186553955, + "learning_rate": 8.115332912633415e-05, + "loss": 2.5485, + "step": 11252 + }, + { + "epoch": 0.9081591477685417, + "grad_norm": 0.65911865234375, + "learning_rate": 8.113782537134838e-05, + "loss": 2.5408, + "step": 11253 + }, + { + "epoch": 0.9082398515051247, + "grad_norm": 0.6844244003295898, + "learning_rate": 8.112232208645729e-05, + "loss": 2.6067, + "step": 11254 + }, + { + "epoch": 0.9083205552417077, + "grad_norm": 0.6896870136260986, + "learning_rate": 8.110681927204729e-05, + "loss": 2.5444, + "step": 11255 + }, + { + "epoch": 0.9084012589782907, + "grad_norm": 0.6693820953369141, + "learning_rate": 8.109131692850473e-05, + "loss": 2.5118, + "step": 11256 + }, + { + "epoch": 0.9084819627148737, + "grad_norm": 0.6401854753494263, + "learning_rate": 8.107581505621599e-05, + "loss": 2.4811, + "step": 11257 + }, + { + "epoch": 0.9085626664514567, + "grad_norm": 0.6861663460731506, + "learning_rate": 8.106031365556743e-05, + "loss": 2.4633, + "step": 11258 + }, + { + "epoch": 0.9086433701880398, + "grad_norm": 0.6631655097007751, + "learning_rate": 8.104481272694533e-05, + "loss": 2.5748, + "step": 11259 + }, + { + "epoch": 0.9087240739246227, + "grad_norm": 0.6499454975128174, + "learning_rate": 8.102931227073604e-05, + "loss": 2.5573, + "step": 11260 + }, + { + "epoch": 0.9088047776612057, + "grad_norm": 0.7214524149894714, + "learning_rate": 8.10138122873259e-05, + "loss": 2.4905, + "step": 11261 + }, + { + "epoch": 0.9088854813977887, + "grad_norm": 0.6481152176856995, + "learning_rate": 8.099831277710122e-05, + "loss": 2.5073, + "step": 11262 + }, + { + "epoch": 0.9089661851343718, + "grad_norm": 0.6666486859321594, + "learning_rate": 8.09828137404482e-05, + "loss": 2.5379, + "step": 11263 + }, + { + "epoch": 0.9090468888709548, + "grad_norm": 0.7186474800109863, + "learning_rate": 8.096731517775319e-05, + "loss": 2.5164, + "step": 11264 + }, + { + "epoch": 0.9091275926075377, + "grad_norm": 0.6838653087615967, + "learning_rate": 8.095181708940245e-05, + "loss": 2.49, + "step": 11265 + }, + { + "epoch": 0.9092082963441207, + "grad_norm": 0.7740866541862488, + "learning_rate": 8.093631947578221e-05, + "loss": 2.5487, + "step": 11266 + }, + { + "epoch": 0.9092890000807038, + "grad_norm": 0.7198607325553894, + "learning_rate": 8.092082233727871e-05, + "loss": 2.4477, + "step": 11267 + }, + { + "epoch": 0.9093697038172868, + "grad_norm": 0.6454673409461975, + "learning_rate": 8.090532567427825e-05, + "loss": 2.523, + "step": 11268 + }, + { + "epoch": 0.9094504075538697, + "grad_norm": 0.6169581413269043, + "learning_rate": 8.088982948716692e-05, + "loss": 2.4924, + "step": 11269 + }, + { + "epoch": 0.9095311112904527, + "grad_norm": 0.7034861445426941, + "learning_rate": 8.0874333776331e-05, + "loss": 2.4756, + "step": 11270 + }, + { + "epoch": 0.9096118150270357, + "grad_norm": 0.7231355309486389, + "learning_rate": 8.085883854215671e-05, + "loss": 2.4963, + "step": 11271 + }, + { + "epoch": 0.9096925187636188, + "grad_norm": 0.6597892045974731, + "learning_rate": 8.084334378503017e-05, + "loss": 2.5617, + "step": 11272 + }, + { + "epoch": 0.9097732225002018, + "grad_norm": 0.7257365584373474, + "learning_rate": 8.082784950533759e-05, + "loss": 2.5293, + "step": 11273 + }, + { + "epoch": 0.9098539262367847, + "grad_norm": 0.7305313944816589, + "learning_rate": 8.081235570346512e-05, + "loss": 2.5355, + "step": 11274 + }, + { + "epoch": 0.9099346299733677, + "grad_norm": 0.6814435720443726, + "learning_rate": 8.07968623797989e-05, + "loss": 2.4842, + "step": 11275 + }, + { + "epoch": 0.9100153337099508, + "grad_norm": 0.7342902421951294, + "learning_rate": 8.078136953472506e-05, + "loss": 2.4817, + "step": 11276 + }, + { + "epoch": 0.9100960374465338, + "grad_norm": 0.6456516981124878, + "learning_rate": 8.076587716862973e-05, + "loss": 2.5119, + "step": 11277 + }, + { + "epoch": 0.9101767411831168, + "grad_norm": 0.7268881797790527, + "learning_rate": 8.075038528189906e-05, + "loss": 2.4614, + "step": 11278 + }, + { + "epoch": 0.9102574449196997, + "grad_norm": 0.6901549696922302, + "learning_rate": 8.073489387491906e-05, + "loss": 2.5411, + "step": 11279 + }, + { + "epoch": 0.9103381486562828, + "grad_norm": 0.6850160956382751, + "learning_rate": 8.071940294807588e-05, + "loss": 2.5078, + "step": 11280 + }, + { + "epoch": 0.9104188523928658, + "grad_norm": 0.6550731658935547, + "learning_rate": 8.070391250175558e-05, + "loss": 2.5502, + "step": 11281 + }, + { + "epoch": 0.9104995561294488, + "grad_norm": 0.7524412274360657, + "learning_rate": 8.068842253634421e-05, + "loss": 2.4699, + "step": 11282 + }, + { + "epoch": 0.9105802598660317, + "grad_norm": 0.6659243702888489, + "learning_rate": 8.067293305222784e-05, + "loss": 2.557, + "step": 11283 + }, + { + "epoch": 0.9106609636026148, + "grad_norm": 0.67015540599823, + "learning_rate": 8.065744404979251e-05, + "loss": 2.5929, + "step": 11284 + }, + { + "epoch": 0.9107416673391978, + "grad_norm": 0.7139000296592712, + "learning_rate": 8.064195552942422e-05, + "loss": 2.5262, + "step": 11285 + }, + { + "epoch": 0.9108223710757808, + "grad_norm": 0.6918016672134399, + "learning_rate": 8.062646749150899e-05, + "loss": 2.5161, + "step": 11286 + }, + { + "epoch": 0.9109030748123638, + "grad_norm": 0.7395541667938232, + "learning_rate": 8.061097993643289e-05, + "loss": 2.5351, + "step": 11287 + }, + { + "epoch": 0.9109837785489469, + "grad_norm": 0.6794499158859253, + "learning_rate": 8.05954928645818e-05, + "loss": 2.4617, + "step": 11288 + }, + { + "epoch": 0.9110644822855298, + "grad_norm": 0.6906577348709106, + "learning_rate": 8.058000627634176e-05, + "loss": 2.5701, + "step": 11289 + }, + { + "epoch": 0.9111451860221128, + "grad_norm": 0.6954079866409302, + "learning_rate": 8.056452017209874e-05, + "loss": 2.5137, + "step": 11290 + }, + { + "epoch": 0.9112258897586958, + "grad_norm": 0.7381381988525391, + "learning_rate": 8.054903455223866e-05, + "loss": 2.6666, + "step": 11291 + }, + { + "epoch": 0.9113065934952789, + "grad_norm": 0.6731518507003784, + "learning_rate": 8.053354941714749e-05, + "loss": 2.5173, + "step": 11292 + }, + { + "epoch": 0.9113872972318618, + "grad_norm": 0.6976885795593262, + "learning_rate": 8.051806476721116e-05, + "loss": 2.5089, + "step": 11293 + }, + { + "epoch": 0.9114680009684448, + "grad_norm": 0.6401965618133545, + "learning_rate": 8.050258060281562e-05, + "loss": 2.5295, + "step": 11294 + }, + { + "epoch": 0.9115487047050278, + "grad_norm": 0.7409671545028687, + "learning_rate": 8.048709692434667e-05, + "loss": 2.5074, + "step": 11295 + }, + { + "epoch": 0.9116294084416109, + "grad_norm": 0.6028234958648682, + "learning_rate": 8.04716137321903e-05, + "loss": 2.5437, + "step": 11296 + }, + { + "epoch": 0.9117101121781939, + "grad_norm": 0.727643609046936, + "learning_rate": 8.04561310267324e-05, + "loss": 2.5272, + "step": 11297 + }, + { + "epoch": 0.9117908159147768, + "grad_norm": 0.6912926435470581, + "learning_rate": 8.044064880835876e-05, + "loss": 2.5166, + "step": 11298 + }, + { + "epoch": 0.9118715196513598, + "grad_norm": 0.6971367001533508, + "learning_rate": 8.042516707745528e-05, + "loss": 2.5421, + "step": 11299 + }, + { + "epoch": 0.9119522233879429, + "grad_norm": 0.6722451448440552, + "learning_rate": 8.040968583440783e-05, + "loss": 2.5088, + "step": 11300 + }, + { + "epoch": 0.9120329271245259, + "grad_norm": 0.6469144225120544, + "learning_rate": 8.03942050796022e-05, + "loss": 2.4921, + "step": 11301 + }, + { + "epoch": 0.9121136308611089, + "grad_norm": 0.6709008812904358, + "learning_rate": 8.037872481342423e-05, + "loss": 2.4553, + "step": 11302 + }, + { + "epoch": 0.9121943345976918, + "grad_norm": 0.6540920734405518, + "learning_rate": 8.036324503625977e-05, + "loss": 2.489, + "step": 11303 + }, + { + "epoch": 0.9122750383342749, + "grad_norm": 0.6589755415916443, + "learning_rate": 8.034776574849453e-05, + "loss": 2.5195, + "step": 11304 + }, + { + "epoch": 0.9123557420708579, + "grad_norm": 0.676943838596344, + "learning_rate": 8.033228695051434e-05, + "loss": 2.4877, + "step": 11305 + }, + { + "epoch": 0.9124364458074409, + "grad_norm": 0.6509177088737488, + "learning_rate": 8.031680864270498e-05, + "loss": 2.5229, + "step": 11306 + }, + { + "epoch": 0.9125171495440239, + "grad_norm": 0.7480820417404175, + "learning_rate": 8.030133082545219e-05, + "loss": 2.5016, + "step": 11307 + }, + { + "epoch": 0.9125978532806069, + "grad_norm": 0.7130550742149353, + "learning_rate": 8.028585349914174e-05, + "loss": 2.5251, + "step": 11308 + }, + { + "epoch": 0.9126785570171899, + "grad_norm": 0.6959688067436218, + "learning_rate": 8.027037666415934e-05, + "loss": 2.4776, + "step": 11309 + }, + { + "epoch": 0.9127592607537729, + "grad_norm": 0.7540854215621948, + "learning_rate": 8.025490032089076e-05, + "loss": 2.5097, + "step": 11310 + }, + { + "epoch": 0.9128399644903559, + "grad_norm": 0.6921199560165405, + "learning_rate": 8.023942446972165e-05, + "loss": 2.5354, + "step": 11311 + }, + { + "epoch": 0.912920668226939, + "grad_norm": 0.649824857711792, + "learning_rate": 8.022394911103774e-05, + "loss": 2.5398, + "step": 11312 + }, + { + "epoch": 0.9130013719635219, + "grad_norm": 0.6951068639755249, + "learning_rate": 8.020847424522474e-05, + "loss": 2.5302, + "step": 11313 + }, + { + "epoch": 0.9130820757001049, + "grad_norm": 0.6906851530075073, + "learning_rate": 8.019299987266827e-05, + "loss": 2.581, + "step": 11314 + }, + { + "epoch": 0.9131627794366879, + "grad_norm": 0.6758459210395813, + "learning_rate": 8.0177525993754e-05, + "loss": 2.5208, + "step": 11315 + }, + { + "epoch": 0.913243483173271, + "grad_norm": 0.6915175318717957, + "learning_rate": 8.016205260886766e-05, + "loss": 2.5386, + "step": 11316 + }, + { + "epoch": 0.913324186909854, + "grad_norm": 0.7083550691604614, + "learning_rate": 8.014657971839476e-05, + "loss": 2.4895, + "step": 11317 + }, + { + "epoch": 0.9134048906464369, + "grad_norm": 0.7052562832832336, + "learning_rate": 8.013110732272102e-05, + "loss": 2.4896, + "step": 11318 + }, + { + "epoch": 0.9134855943830199, + "grad_norm": 0.7811834216117859, + "learning_rate": 8.011563542223206e-05, + "loss": 2.5082, + "step": 11319 + }, + { + "epoch": 0.913566298119603, + "grad_norm": 0.6207153797149658, + "learning_rate": 8.01001640173134e-05, + "loss": 2.4967, + "step": 11320 + }, + { + "epoch": 0.913647001856186, + "grad_norm": 0.7637950778007507, + "learning_rate": 8.008469310835065e-05, + "loss": 2.4907, + "step": 11321 + }, + { + "epoch": 0.913727705592769, + "grad_norm": 0.7263950705528259, + "learning_rate": 8.006922269572947e-05, + "loss": 2.5259, + "step": 11322 + }, + { + "epoch": 0.9138084093293519, + "grad_norm": 0.6965721845626831, + "learning_rate": 8.005375277983531e-05, + "loss": 2.5648, + "step": 11323 + }, + { + "epoch": 0.9138891130659349, + "grad_norm": 0.7146127223968506, + "learning_rate": 8.003828336105377e-05, + "loss": 2.53, + "step": 11324 + }, + { + "epoch": 0.913969816802518, + "grad_norm": 0.7083697319030762, + "learning_rate": 8.00228144397704e-05, + "loss": 2.4923, + "step": 11325 + }, + { + "epoch": 0.914050520539101, + "grad_norm": 0.7259312868118286, + "learning_rate": 8.000734601637074e-05, + "loss": 2.5303, + "step": 11326 + }, + { + "epoch": 0.9141312242756839, + "grad_norm": 0.7072086930274963, + "learning_rate": 7.999187809124025e-05, + "loss": 2.4662, + "step": 11327 + }, + { + "epoch": 0.9142119280122669, + "grad_norm": 0.7216035723686218, + "learning_rate": 7.997641066476445e-05, + "loss": 2.5069, + "step": 11328 + }, + { + "epoch": 0.91429263174885, + "grad_norm": 0.6925712823867798, + "learning_rate": 7.99609437373289e-05, + "loss": 2.5107, + "step": 11329 + }, + { + "epoch": 0.914373335485433, + "grad_norm": 0.6672701835632324, + "learning_rate": 7.994547730931896e-05, + "loss": 2.5248, + "step": 11330 + }, + { + "epoch": 0.914454039222016, + "grad_norm": 0.8058515787124634, + "learning_rate": 7.993001138112016e-05, + "loss": 2.4427, + "step": 11331 + }, + { + "epoch": 0.9145347429585989, + "grad_norm": 0.6942592859268188, + "learning_rate": 7.991454595311795e-05, + "loss": 2.6163, + "step": 11332 + }, + { + "epoch": 0.914615446695182, + "grad_norm": 0.7051894068717957, + "learning_rate": 7.989908102569774e-05, + "loss": 2.5327, + "step": 11333 + }, + { + "epoch": 0.914696150431765, + "grad_norm": 0.6824771761894226, + "learning_rate": 7.988361659924496e-05, + "loss": 2.4843, + "step": 11334 + }, + { + "epoch": 0.914776854168348, + "grad_norm": 0.6756488084793091, + "learning_rate": 7.98681526741451e-05, + "loss": 2.5215, + "step": 11335 + }, + { + "epoch": 0.914857557904931, + "grad_norm": 0.6988239288330078, + "learning_rate": 7.985268925078344e-05, + "loss": 2.5153, + "step": 11336 + }, + { + "epoch": 0.914938261641514, + "grad_norm": 0.6446006298065186, + "learning_rate": 7.983722632954544e-05, + "loss": 2.5081, + "step": 11337 + }, + { + "epoch": 0.915018965378097, + "grad_norm": 0.6828100681304932, + "learning_rate": 7.982176391081649e-05, + "loss": 2.5607, + "step": 11338 + }, + { + "epoch": 0.91509966911468, + "grad_norm": 0.659721851348877, + "learning_rate": 7.980630199498193e-05, + "loss": 2.531, + "step": 11339 + }, + { + "epoch": 0.915180372851263, + "grad_norm": 0.6298564076423645, + "learning_rate": 7.979084058242709e-05, + "loss": 2.513, + "step": 11340 + }, + { + "epoch": 0.9152610765878461, + "grad_norm": 0.664299726486206, + "learning_rate": 7.977537967353735e-05, + "loss": 2.5533, + "step": 11341 + }, + { + "epoch": 0.915341780324429, + "grad_norm": 0.7035108804702759, + "learning_rate": 7.975991926869801e-05, + "loss": 2.4868, + "step": 11342 + }, + { + "epoch": 0.915422484061012, + "grad_norm": 0.7428407073020935, + "learning_rate": 7.974445936829438e-05, + "loss": 2.5694, + "step": 11343 + }, + { + "epoch": 0.915503187797595, + "grad_norm": 0.6845505237579346, + "learning_rate": 7.972899997271176e-05, + "loss": 2.5092, + "step": 11344 + }, + { + "epoch": 0.9155838915341781, + "grad_norm": 0.7135340571403503, + "learning_rate": 7.971354108233551e-05, + "loss": 2.5157, + "step": 11345 + }, + { + "epoch": 0.915664595270761, + "grad_norm": 0.7032433152198792, + "learning_rate": 7.969808269755077e-05, + "loss": 2.5292, + "step": 11346 + }, + { + "epoch": 0.915745299007344, + "grad_norm": 0.6874690651893616, + "learning_rate": 7.96826248187429e-05, + "loss": 2.5312, + "step": 11347 + }, + { + "epoch": 0.915826002743927, + "grad_norm": 0.6497030258178711, + "learning_rate": 7.966716744629718e-05, + "loss": 2.505, + "step": 11348 + }, + { + "epoch": 0.9159067064805101, + "grad_norm": 0.6618520021438599, + "learning_rate": 7.965171058059874e-05, + "loss": 2.5287, + "step": 11349 + }, + { + "epoch": 0.9159874102170931, + "grad_norm": 0.6737041473388672, + "learning_rate": 7.963625422203288e-05, + "loss": 2.5494, + "step": 11350 + }, + { + "epoch": 0.916068113953676, + "grad_norm": 0.705646276473999, + "learning_rate": 7.96207983709848e-05, + "loss": 2.5402, + "step": 11351 + }, + { + "epoch": 0.916148817690259, + "grad_norm": 0.6852068901062012, + "learning_rate": 7.96053430278397e-05, + "loss": 2.51, + "step": 11352 + }, + { + "epoch": 0.9162295214268421, + "grad_norm": 0.7166822552680969, + "learning_rate": 7.958988819298274e-05, + "loss": 2.576, + "step": 11353 + }, + { + "epoch": 0.9163102251634251, + "grad_norm": 0.6349207162857056, + "learning_rate": 7.957443386679913e-05, + "loss": 2.5219, + "step": 11354 + }, + { + "epoch": 0.9163909289000081, + "grad_norm": 0.6504647135734558, + "learning_rate": 7.955898004967406e-05, + "loss": 2.4593, + "step": 11355 + }, + { + "epoch": 0.916471632636591, + "grad_norm": 0.7313871383666992, + "learning_rate": 7.95435267419926e-05, + "loss": 2.5616, + "step": 11356 + }, + { + "epoch": 0.9165523363731741, + "grad_norm": 0.6948587894439697, + "learning_rate": 7.95280739441399e-05, + "loss": 2.4608, + "step": 11357 + }, + { + "epoch": 0.9166330401097571, + "grad_norm": 0.6130328178405762, + "learning_rate": 7.95126216565012e-05, + "loss": 2.5563, + "step": 11358 + }, + { + "epoch": 0.9167137438463401, + "grad_norm": 0.7149228453636169, + "learning_rate": 7.949716987946145e-05, + "loss": 2.5664, + "step": 11359 + }, + { + "epoch": 0.916794447582923, + "grad_norm": 0.7452285289764404, + "learning_rate": 7.948171861340584e-05, + "loss": 2.525, + "step": 11360 + }, + { + "epoch": 0.9168751513195061, + "grad_norm": 0.6840611100196838, + "learning_rate": 7.946626785871945e-05, + "loss": 2.537, + "step": 11361 + }, + { + "epoch": 0.9169558550560891, + "grad_norm": 0.7269708514213562, + "learning_rate": 7.945081761578732e-05, + "loss": 2.5227, + "step": 11362 + }, + { + "epoch": 0.9170365587926721, + "grad_norm": 0.6521697044372559, + "learning_rate": 7.943536788499452e-05, + "loss": 2.54, + "step": 11363 + }, + { + "epoch": 0.9171172625292551, + "grad_norm": 0.6516863107681274, + "learning_rate": 7.941991866672618e-05, + "loss": 2.4788, + "step": 11364 + }, + { + "epoch": 0.9171979662658382, + "grad_norm": 0.7673580050468445, + "learning_rate": 7.94044699613672e-05, + "loss": 2.4678, + "step": 11365 + }, + { + "epoch": 0.9172786700024211, + "grad_norm": 0.6666994690895081, + "learning_rate": 7.938902176930268e-05, + "loss": 2.5251, + "step": 11366 + }, + { + "epoch": 0.9173593737390041, + "grad_norm": 0.7261863946914673, + "learning_rate": 7.937357409091761e-05, + "loss": 2.4977, + "step": 11367 + }, + { + "epoch": 0.9174400774755871, + "grad_norm": 0.6920679807662964, + "learning_rate": 7.9358126926597e-05, + "loss": 2.5367, + "step": 11368 + }, + { + "epoch": 0.9175207812121702, + "grad_norm": 0.6715712547302246, + "learning_rate": 7.93426802767258e-05, + "loss": 2.4898, + "step": 11369 + }, + { + "epoch": 0.9176014849487532, + "grad_norm": 0.7014333605766296, + "learning_rate": 7.932723414168904e-05, + "loss": 2.4507, + "step": 11370 + }, + { + "epoch": 0.9176821886853361, + "grad_norm": 0.6755761504173279, + "learning_rate": 7.931178852187163e-05, + "loss": 2.5895, + "step": 11371 + }, + { + "epoch": 0.9177628924219191, + "grad_norm": 0.6846731305122375, + "learning_rate": 7.929634341765852e-05, + "loss": 2.5002, + "step": 11372 + }, + { + "epoch": 0.9178435961585021, + "grad_norm": 0.6422831416130066, + "learning_rate": 7.928089882943466e-05, + "loss": 2.5326, + "step": 11373 + }, + { + "epoch": 0.9179242998950852, + "grad_norm": 0.7256442308425903, + "learning_rate": 7.9265454757585e-05, + "loss": 2.5706, + "step": 11374 + }, + { + "epoch": 0.9180050036316681, + "grad_norm": 0.6514387130737305, + "learning_rate": 7.925001120249436e-05, + "loss": 2.5349, + "step": 11375 + }, + { + "epoch": 0.9180857073682511, + "grad_norm": 0.7596457600593567, + "learning_rate": 7.923456816454768e-05, + "loss": 2.4767, + "step": 11376 + }, + { + "epoch": 0.9181664111048341, + "grad_norm": 0.673283040523529, + "learning_rate": 7.921912564412988e-05, + "loss": 2.5156, + "step": 11377 + }, + { + "epoch": 0.9182471148414172, + "grad_norm": 0.6964103579521179, + "learning_rate": 7.920368364162575e-05, + "loss": 2.5293, + "step": 11378 + }, + { + "epoch": 0.9183278185780002, + "grad_norm": 0.6765062212944031, + "learning_rate": 7.91882421574202e-05, + "loss": 2.5757, + "step": 11379 + }, + { + "epoch": 0.9184085223145831, + "grad_norm": 0.7039035558700562, + "learning_rate": 7.917280119189811e-05, + "loss": 2.513, + "step": 11380 + }, + { + "epoch": 0.9184892260511661, + "grad_norm": 0.6523976922035217, + "learning_rate": 7.915736074544419e-05, + "loss": 2.4712, + "step": 11381 + }, + { + "epoch": 0.9185699297877492, + "grad_norm": 0.7159552574157715, + "learning_rate": 7.914192081844334e-05, + "loss": 2.4713, + "step": 11382 + }, + { + "epoch": 0.9186506335243322, + "grad_norm": 0.7071694731712341, + "learning_rate": 7.912648141128036e-05, + "loss": 2.5367, + "step": 11383 + }, + { + "epoch": 0.9187313372609152, + "grad_norm": 0.6675183773040771, + "learning_rate": 7.911104252434e-05, + "loss": 2.5372, + "step": 11384 + }, + { + "epoch": 0.9188120409974981, + "grad_norm": 0.7293995022773743, + "learning_rate": 7.909560415800707e-05, + "loss": 2.5469, + "step": 11385 + }, + { + "epoch": 0.9188927447340812, + "grad_norm": 0.6774035096168518, + "learning_rate": 7.908016631266635e-05, + "loss": 2.5655, + "step": 11386 + }, + { + "epoch": 0.9189734484706642, + "grad_norm": 0.7068144083023071, + "learning_rate": 7.906472898870256e-05, + "loss": 2.5265, + "step": 11387 + }, + { + "epoch": 0.9190541522072472, + "grad_norm": 0.6756324172019958, + "learning_rate": 7.904929218650044e-05, + "loss": 2.4966, + "step": 11388 + }, + { + "epoch": 0.9191348559438302, + "grad_norm": 0.6964625120162964, + "learning_rate": 7.903385590644473e-05, + "loss": 2.5646, + "step": 11389 + }, + { + "epoch": 0.9192155596804132, + "grad_norm": 0.6760976314544678, + "learning_rate": 7.901842014892018e-05, + "loss": 2.5159, + "step": 11390 + }, + { + "epoch": 0.9192962634169962, + "grad_norm": 0.6648714542388916, + "learning_rate": 7.900298491431139e-05, + "loss": 2.5715, + "step": 11391 + }, + { + "epoch": 0.9193769671535792, + "grad_norm": 0.7492914199829102, + "learning_rate": 7.898755020300312e-05, + "loss": 2.5226, + "step": 11392 + }, + { + "epoch": 0.9194576708901622, + "grad_norm": 0.7041164040565491, + "learning_rate": 7.897211601538004e-05, + "loss": 2.5809, + "step": 11393 + }, + { + "epoch": 0.9195383746267453, + "grad_norm": 0.6746383309364319, + "learning_rate": 7.895668235182677e-05, + "loss": 2.5369, + "step": 11394 + }, + { + "epoch": 0.9196190783633282, + "grad_norm": 0.6486156582832336, + "learning_rate": 7.894124921272798e-05, + "loss": 2.5406, + "step": 11395 + }, + { + "epoch": 0.9196997820999112, + "grad_norm": 0.6828807592391968, + "learning_rate": 7.892581659846834e-05, + "loss": 2.5241, + "step": 11396 + }, + { + "epoch": 0.9197804858364942, + "grad_norm": 0.694970428943634, + "learning_rate": 7.891038450943242e-05, + "loss": 2.4402, + "step": 11397 + }, + { + "epoch": 0.9198611895730773, + "grad_norm": 0.7187039852142334, + "learning_rate": 7.889495294600484e-05, + "loss": 2.5052, + "step": 11398 + }, + { + "epoch": 0.9199418933096603, + "grad_norm": 0.6919832825660706, + "learning_rate": 7.887952190857024e-05, + "loss": 2.5078, + "step": 11399 + }, + { + "epoch": 0.9200225970462432, + "grad_norm": 0.7129504084587097, + "learning_rate": 7.886409139751313e-05, + "loss": 2.5047, + "step": 11400 + }, + { + "epoch": 0.9201033007828262, + "grad_norm": 0.6755272746086121, + "learning_rate": 7.88486614132181e-05, + "loss": 2.4821, + "step": 11401 + }, + { + "epoch": 0.9201840045194093, + "grad_norm": 0.7253937125205994, + "learning_rate": 7.883323195606973e-05, + "loss": 2.5062, + "step": 11402 + }, + { + "epoch": 0.9202647082559923, + "grad_norm": 0.7057155966758728, + "learning_rate": 7.881780302645257e-05, + "loss": 2.5475, + "step": 11403 + }, + { + "epoch": 0.9203454119925752, + "grad_norm": 0.713869571685791, + "learning_rate": 7.880237462475111e-05, + "loss": 2.5335, + "step": 11404 + }, + { + "epoch": 0.9204261157291582, + "grad_norm": 0.769648551940918, + "learning_rate": 7.878694675134987e-05, + "loss": 2.4944, + "step": 11405 + }, + { + "epoch": 0.9205068194657413, + "grad_norm": 0.6444964408874512, + "learning_rate": 7.877151940663343e-05, + "loss": 2.5755, + "step": 11406 + }, + { + "epoch": 0.9205875232023243, + "grad_norm": 0.6811819672584534, + "learning_rate": 7.875609259098618e-05, + "loss": 2.5475, + "step": 11407 + }, + { + "epoch": 0.9206682269389073, + "grad_norm": 0.6959417462348938, + "learning_rate": 7.874066630479259e-05, + "loss": 2.5095, + "step": 11408 + }, + { + "epoch": 0.9207489306754902, + "grad_norm": 0.6721363067626953, + "learning_rate": 7.872524054843724e-05, + "loss": 2.5166, + "step": 11409 + }, + { + "epoch": 0.9208296344120733, + "grad_norm": 0.713122546672821, + "learning_rate": 7.870981532230447e-05, + "loss": 2.5084, + "step": 11410 + }, + { + "epoch": 0.9209103381486563, + "grad_norm": 0.7059469819068909, + "learning_rate": 7.869439062677876e-05, + "loss": 2.437, + "step": 11411 + }, + { + "epoch": 0.9209910418852393, + "grad_norm": 0.6808314323425293, + "learning_rate": 7.867896646224454e-05, + "loss": 2.5658, + "step": 11412 + }, + { + "epoch": 0.9210717456218223, + "grad_norm": 0.7060894966125488, + "learning_rate": 7.86635428290862e-05, + "loss": 2.515, + "step": 11413 + }, + { + "epoch": 0.9211524493584053, + "grad_norm": 0.7538465857505798, + "learning_rate": 7.864811972768813e-05, + "loss": 2.4448, + "step": 11414 + }, + { + "epoch": 0.9212331530949883, + "grad_norm": 0.6824522018432617, + "learning_rate": 7.863269715843478e-05, + "loss": 2.503, + "step": 11415 + }, + { + "epoch": 0.9213138568315713, + "grad_norm": 0.7068174481391907, + "learning_rate": 7.861727512171044e-05, + "loss": 2.5198, + "step": 11416 + }, + { + "epoch": 0.9213945605681543, + "grad_norm": 0.6742961406707764, + "learning_rate": 7.860185361789948e-05, + "loss": 2.5167, + "step": 11417 + }, + { + "epoch": 0.9214752643047374, + "grad_norm": 0.7643383741378784, + "learning_rate": 7.858643264738628e-05, + "loss": 2.5508, + "step": 11418 + }, + { + "epoch": 0.9215559680413203, + "grad_norm": 0.6737802028656006, + "learning_rate": 7.857101221055518e-05, + "loss": 2.589, + "step": 11419 + }, + { + "epoch": 0.9216366717779033, + "grad_norm": 0.668214738368988, + "learning_rate": 7.855559230779043e-05, + "loss": 2.4747, + "step": 11420 + }, + { + "epoch": 0.9217173755144863, + "grad_norm": 0.6933084726333618, + "learning_rate": 7.854017293947638e-05, + "loss": 2.5171, + "step": 11421 + }, + { + "epoch": 0.9217980792510694, + "grad_norm": 0.6320228576660156, + "learning_rate": 7.852475410599736e-05, + "loss": 2.5213, + "step": 11422 + }, + { + "epoch": 0.9218787829876524, + "grad_norm": 0.6578245759010315, + "learning_rate": 7.850933580773756e-05, + "loss": 2.5085, + "step": 11423 + }, + { + "epoch": 0.9219594867242353, + "grad_norm": 0.6741796135902405, + "learning_rate": 7.849391804508129e-05, + "loss": 2.5294, + "step": 11424 + }, + { + "epoch": 0.9220401904608183, + "grad_norm": 0.6875781416893005, + "learning_rate": 7.847850081841285e-05, + "loss": 2.5034, + "step": 11425 + }, + { + "epoch": 0.9221208941974013, + "grad_norm": 0.6515244245529175, + "learning_rate": 7.846308412811638e-05, + "loss": 2.4707, + "step": 11426 + }, + { + "epoch": 0.9222015979339844, + "grad_norm": 0.7326812148094177, + "learning_rate": 7.844766797457615e-05, + "loss": 2.5049, + "step": 11427 + }, + { + "epoch": 0.9222823016705674, + "grad_norm": 0.7539918422698975, + "learning_rate": 7.84322523581764e-05, + "loss": 2.4726, + "step": 11428 + }, + { + "epoch": 0.9223630054071503, + "grad_norm": 0.745468020439148, + "learning_rate": 7.841683727930129e-05, + "loss": 2.5003, + "step": 11429 + }, + { + "epoch": 0.9224437091437333, + "grad_norm": 0.726362943649292, + "learning_rate": 7.840142273833499e-05, + "loss": 2.5056, + "step": 11430 + }, + { + "epoch": 0.9225244128803164, + "grad_norm": 0.7275403738021851, + "learning_rate": 7.838600873566175e-05, + "loss": 2.5188, + "step": 11431 + }, + { + "epoch": 0.9226051166168994, + "grad_norm": 0.6908789873123169, + "learning_rate": 7.837059527166563e-05, + "loss": 2.5349, + "step": 11432 + }, + { + "epoch": 0.9226858203534823, + "grad_norm": 0.7220396399497986, + "learning_rate": 7.835518234673079e-05, + "loss": 2.4863, + "step": 11433 + }, + { + "epoch": 0.9227665240900653, + "grad_norm": 0.6516178846359253, + "learning_rate": 7.833976996124142e-05, + "loss": 2.556, + "step": 11434 + }, + { + "epoch": 0.9228472278266484, + "grad_norm": 0.6958726644515991, + "learning_rate": 7.832435811558163e-05, + "loss": 2.5286, + "step": 11435 + }, + { + "epoch": 0.9229279315632314, + "grad_norm": 0.7734121680259705, + "learning_rate": 7.830894681013546e-05, + "loss": 2.5087, + "step": 11436 + }, + { + "epoch": 0.9230086352998144, + "grad_norm": 0.709064245223999, + "learning_rate": 7.829353604528703e-05, + "loss": 2.4817, + "step": 11437 + }, + { + "epoch": 0.9230893390363973, + "grad_norm": 0.7224971652030945, + "learning_rate": 7.827812582142045e-05, + "loss": 2.5179, + "step": 11438 + }, + { + "epoch": 0.9231700427729804, + "grad_norm": 0.7139936685562134, + "learning_rate": 7.826271613891973e-05, + "loss": 2.537, + "step": 11439 + }, + { + "epoch": 0.9232507465095634, + "grad_norm": 0.671138346195221, + "learning_rate": 7.824730699816896e-05, + "loss": 2.4865, + "step": 11440 + }, + { + "epoch": 0.9233314502461464, + "grad_norm": 0.6547425389289856, + "learning_rate": 7.823189839955218e-05, + "loss": 2.509, + "step": 11441 + }, + { + "epoch": 0.9234121539827294, + "grad_norm": 0.719765305519104, + "learning_rate": 7.821649034345338e-05, + "loss": 2.591, + "step": 11442 + }, + { + "epoch": 0.9234928577193124, + "grad_norm": 0.7128504514694214, + "learning_rate": 7.820108283025656e-05, + "loss": 2.541, + "step": 11443 + }, + { + "epoch": 0.9235735614558954, + "grad_norm": 0.7711538672447205, + "learning_rate": 7.818567586034577e-05, + "loss": 2.5388, + "step": 11444 + }, + { + "epoch": 0.9236542651924784, + "grad_norm": 0.7151121497154236, + "learning_rate": 7.817026943410494e-05, + "loss": 2.5539, + "step": 11445 + }, + { + "epoch": 0.9237349689290614, + "grad_norm": 0.7009569406509399, + "learning_rate": 7.815486355191805e-05, + "loss": 2.4793, + "step": 11446 + }, + { + "epoch": 0.9238156726656445, + "grad_norm": 0.7251109480857849, + "learning_rate": 7.813945821416909e-05, + "loss": 2.5406, + "step": 11447 + }, + { + "epoch": 0.9238963764022274, + "grad_norm": 0.6907934546470642, + "learning_rate": 7.812405342124196e-05, + "loss": 2.5069, + "step": 11448 + }, + { + "epoch": 0.9239770801388104, + "grad_norm": 0.699207067489624, + "learning_rate": 7.810864917352061e-05, + "loss": 2.4844, + "step": 11449 + }, + { + "epoch": 0.9240577838753934, + "grad_norm": 0.718386173248291, + "learning_rate": 7.809324547138893e-05, + "loss": 2.5666, + "step": 11450 + }, + { + "epoch": 0.9241384876119765, + "grad_norm": 0.6420444846153259, + "learning_rate": 7.807784231523089e-05, + "loss": 2.506, + "step": 11451 + }, + { + "epoch": 0.9242191913485595, + "grad_norm": 0.6777252554893494, + "learning_rate": 7.806243970543028e-05, + "loss": 2.487, + "step": 11452 + }, + { + "epoch": 0.9242998950851424, + "grad_norm": 0.6907702684402466, + "learning_rate": 7.804703764237102e-05, + "loss": 2.5284, + "step": 11453 + }, + { + "epoch": 0.9243805988217254, + "grad_norm": 0.6383422613143921, + "learning_rate": 7.803163612643698e-05, + "loss": 2.4704, + "step": 11454 + }, + { + "epoch": 0.9244613025583085, + "grad_norm": 0.6879577040672302, + "learning_rate": 7.801623515801198e-05, + "loss": 2.5103, + "step": 11455 + }, + { + "epoch": 0.9245420062948915, + "grad_norm": 0.6856719851493835, + "learning_rate": 7.800083473747986e-05, + "loss": 2.5086, + "step": 11456 + }, + { + "epoch": 0.9246227100314744, + "grad_norm": 0.7463707327842712, + "learning_rate": 7.79854348652245e-05, + "loss": 2.5456, + "step": 11457 + }, + { + "epoch": 0.9247034137680574, + "grad_norm": 0.7352643013000488, + "learning_rate": 7.79700355416296e-05, + "loss": 2.5335, + "step": 11458 + }, + { + "epoch": 0.9247841175046405, + "grad_norm": 0.7525908350944519, + "learning_rate": 7.795463676707897e-05, + "loss": 2.5855, + "step": 11459 + }, + { + "epoch": 0.9248648212412235, + "grad_norm": 0.7323870658874512, + "learning_rate": 7.79392385419565e-05, + "loss": 2.5471, + "step": 11460 + }, + { + "epoch": 0.9249455249778065, + "grad_norm": 0.7443860769271851, + "learning_rate": 7.792384086664582e-05, + "loss": 2.5449, + "step": 11461 + }, + { + "epoch": 0.9250262287143894, + "grad_norm": 0.6928641200065613, + "learning_rate": 7.790844374153073e-05, + "loss": 2.505, + "step": 11462 + }, + { + "epoch": 0.9251069324509725, + "grad_norm": 0.6491222381591797, + "learning_rate": 7.789304716699498e-05, + "loss": 2.5447, + "step": 11463 + }, + { + "epoch": 0.9251876361875555, + "grad_norm": 0.7351166009902954, + "learning_rate": 7.78776511434223e-05, + "loss": 2.524, + "step": 11464 + }, + { + "epoch": 0.9252683399241385, + "grad_norm": 0.6680036783218384, + "learning_rate": 7.786225567119637e-05, + "loss": 2.5019, + "step": 11465 + }, + { + "epoch": 0.9253490436607215, + "grad_norm": 0.7070801258087158, + "learning_rate": 7.784686075070089e-05, + "loss": 2.5052, + "step": 11466 + }, + { + "epoch": 0.9254297473973045, + "grad_norm": 0.7095211148262024, + "learning_rate": 7.783146638231957e-05, + "loss": 2.4998, + "step": 11467 + }, + { + "epoch": 0.9255104511338875, + "grad_norm": 0.6725812554359436, + "learning_rate": 7.781607256643604e-05, + "loss": 2.4909, + "step": 11468 + }, + { + "epoch": 0.9255911548704705, + "grad_norm": 0.684177577495575, + "learning_rate": 7.780067930343396e-05, + "loss": 2.5636, + "step": 11469 + }, + { + "epoch": 0.9256718586070535, + "grad_norm": 0.703419029712677, + "learning_rate": 7.778528659369702e-05, + "loss": 2.4295, + "step": 11470 + }, + { + "epoch": 0.9257525623436366, + "grad_norm": 0.6850195527076721, + "learning_rate": 7.776989443760877e-05, + "loss": 2.5143, + "step": 11471 + }, + { + "epoch": 0.9258332660802195, + "grad_norm": 0.7322348952293396, + "learning_rate": 7.775450283555286e-05, + "loss": 2.5616, + "step": 11472 + }, + { + "epoch": 0.9259139698168025, + "grad_norm": 0.6924510598182678, + "learning_rate": 7.77391117879129e-05, + "loss": 2.4796, + "step": 11473 + }, + { + "epoch": 0.9259946735533855, + "grad_norm": 0.7006441354751587, + "learning_rate": 7.772372129507249e-05, + "loss": 2.5142, + "step": 11474 + }, + { + "epoch": 0.9260753772899685, + "grad_norm": 0.6379218697547913, + "learning_rate": 7.770833135741513e-05, + "loss": 2.5366, + "step": 11475 + }, + { + "epoch": 0.9261560810265516, + "grad_norm": 0.676163375377655, + "learning_rate": 7.769294197532448e-05, + "loss": 2.4936, + "step": 11476 + }, + { + "epoch": 0.9262367847631345, + "grad_norm": 0.6964210271835327, + "learning_rate": 7.767755314918399e-05, + "loss": 2.429, + "step": 11477 + }, + { + "epoch": 0.9263174884997175, + "grad_norm": 0.7017048597335815, + "learning_rate": 7.766216487937722e-05, + "loss": 2.5488, + "step": 11478 + }, + { + "epoch": 0.9263981922363005, + "grad_norm": 0.6742509603500366, + "learning_rate": 7.76467771662877e-05, + "loss": 2.5121, + "step": 11479 + }, + { + "epoch": 0.9264788959728836, + "grad_norm": 0.6751403212547302, + "learning_rate": 7.763139001029893e-05, + "loss": 2.5897, + "step": 11480 + }, + { + "epoch": 0.9265595997094666, + "grad_norm": 0.6639657616615295, + "learning_rate": 7.761600341179439e-05, + "loss": 2.5015, + "step": 11481 + }, + { + "epoch": 0.9266403034460495, + "grad_norm": 0.6332827210426331, + "learning_rate": 7.760061737115756e-05, + "loss": 2.5518, + "step": 11482 + }, + { + "epoch": 0.9267210071826325, + "grad_norm": 0.6751062870025635, + "learning_rate": 7.758523188877192e-05, + "loss": 2.4252, + "step": 11483 + }, + { + "epoch": 0.9268017109192156, + "grad_norm": 0.6763231754302979, + "learning_rate": 7.756984696502084e-05, + "loss": 2.5683, + "step": 11484 + }, + { + "epoch": 0.9268824146557986, + "grad_norm": 0.6480380296707153, + "learning_rate": 7.755446260028784e-05, + "loss": 2.558, + "step": 11485 + }, + { + "epoch": 0.9269631183923815, + "grad_norm": 0.6925072073936462, + "learning_rate": 7.753907879495634e-05, + "loss": 2.5374, + "step": 11486 + }, + { + "epoch": 0.9270438221289645, + "grad_norm": 0.6771834492683411, + "learning_rate": 7.752369554940966e-05, + "loss": 2.5652, + "step": 11487 + }, + { + "epoch": 0.9271245258655476, + "grad_norm": 0.6747026443481445, + "learning_rate": 7.750831286403124e-05, + "loss": 2.5076, + "step": 11488 + }, + { + "epoch": 0.9272052296021306, + "grad_norm": 0.6727211475372314, + "learning_rate": 7.749293073920448e-05, + "loss": 2.4774, + "step": 11489 + }, + { + "epoch": 0.9272859333387136, + "grad_norm": 0.6334055066108704, + "learning_rate": 7.747754917531272e-05, + "loss": 2.5245, + "step": 11490 + }, + { + "epoch": 0.9273666370752965, + "grad_norm": 0.740700900554657, + "learning_rate": 7.746216817273928e-05, + "loss": 2.5485, + "step": 11491 + }, + { + "epoch": 0.9274473408118796, + "grad_norm": 0.6500691771507263, + "learning_rate": 7.744678773186757e-05, + "loss": 2.5277, + "step": 11492 + }, + { + "epoch": 0.9275280445484626, + "grad_norm": 0.6592985987663269, + "learning_rate": 7.743140785308084e-05, + "loss": 2.5304, + "step": 11493 + }, + { + "epoch": 0.9276087482850456, + "grad_norm": 0.6980452537536621, + "learning_rate": 7.741602853676241e-05, + "loss": 2.544, + "step": 11494 + }, + { + "epoch": 0.9276894520216286, + "grad_norm": 0.643190860748291, + "learning_rate": 7.740064978329555e-05, + "loss": 2.5167, + "step": 11495 + }, + { + "epoch": 0.9277701557582116, + "grad_norm": 0.6789804100990295, + "learning_rate": 7.738527159306366e-05, + "loss": 2.5117, + "step": 11496 + }, + { + "epoch": 0.9278508594947946, + "grad_norm": 0.7109663486480713, + "learning_rate": 7.736989396644987e-05, + "loss": 2.5294, + "step": 11497 + }, + { + "epoch": 0.9279315632313776, + "grad_norm": 0.6752706170082092, + "learning_rate": 7.735451690383746e-05, + "loss": 2.4851, + "step": 11498 + }, + { + "epoch": 0.9280122669679606, + "grad_norm": 0.6947829723358154, + "learning_rate": 7.733914040560972e-05, + "loss": 2.5792, + "step": 11499 + }, + { + "epoch": 0.9280929707045437, + "grad_norm": 0.6701157689094543, + "learning_rate": 7.732376447214981e-05, + "loss": 2.4884, + "step": 11500 + }, + { + "epoch": 0.9281736744411266, + "grad_norm": 0.64533531665802, + "learning_rate": 7.730838910384097e-05, + "loss": 2.4644, + "step": 11501 + }, + { + "epoch": 0.9282543781777096, + "grad_norm": 0.6664395332336426, + "learning_rate": 7.729301430106644e-05, + "loss": 2.5286, + "step": 11502 + }, + { + "epoch": 0.9283350819142926, + "grad_norm": 0.6982395648956299, + "learning_rate": 7.72776400642093e-05, + "loss": 2.5092, + "step": 11503 + }, + { + "epoch": 0.9284157856508757, + "grad_norm": 0.6656171679496765, + "learning_rate": 7.726226639365278e-05, + "loss": 2.4945, + "step": 11504 + }, + { + "epoch": 0.9284964893874587, + "grad_norm": 0.6213308572769165, + "learning_rate": 7.724689328978001e-05, + "loss": 2.5042, + "step": 11505 + }, + { + "epoch": 0.9285771931240416, + "grad_norm": 0.6855599880218506, + "learning_rate": 7.723152075297414e-05, + "loss": 2.5207, + "step": 11506 + }, + { + "epoch": 0.9286578968606246, + "grad_norm": 0.7724171280860901, + "learning_rate": 7.721614878361828e-05, + "loss": 2.4842, + "step": 11507 + }, + { + "epoch": 0.9287386005972077, + "grad_norm": 0.708634614944458, + "learning_rate": 7.720077738209559e-05, + "loss": 2.58, + "step": 11508 + }, + { + "epoch": 0.9288193043337907, + "grad_norm": 0.6766082644462585, + "learning_rate": 7.718540654878907e-05, + "loss": 2.492, + "step": 11509 + }, + { + "epoch": 0.9289000080703737, + "grad_norm": 0.6856982707977295, + "learning_rate": 7.717003628408187e-05, + "loss": 2.5186, + "step": 11510 + }, + { + "epoch": 0.9289807118069566, + "grad_norm": 0.680647611618042, + "learning_rate": 7.715466658835705e-05, + "loss": 2.5305, + "step": 11511 + }, + { + "epoch": 0.9290614155435397, + "grad_norm": 0.7174721360206604, + "learning_rate": 7.713929746199771e-05, + "loss": 2.4498, + "step": 11512 + }, + { + "epoch": 0.9291421192801227, + "grad_norm": 0.6507031321525574, + "learning_rate": 7.712392890538676e-05, + "loss": 2.5334, + "step": 11513 + }, + { + "epoch": 0.9292228230167057, + "grad_norm": 0.7545748353004456, + "learning_rate": 7.710856091890732e-05, + "loss": 2.505, + "step": 11514 + }, + { + "epoch": 0.9293035267532886, + "grad_norm": 0.6978560090065002, + "learning_rate": 7.709319350294242e-05, + "loss": 2.5243, + "step": 11515 + }, + { + "epoch": 0.9293842304898717, + "grad_norm": 0.6620199084281921, + "learning_rate": 7.707782665787497e-05, + "loss": 2.5114, + "step": 11516 + }, + { + "epoch": 0.9294649342264547, + "grad_norm": 0.7160476446151733, + "learning_rate": 7.7062460384088e-05, + "loss": 2.5322, + "step": 11517 + }, + { + "epoch": 0.9295456379630377, + "grad_norm": 0.6637005805969238, + "learning_rate": 7.704709468196454e-05, + "loss": 2.456, + "step": 11518 + }, + { + "epoch": 0.9296263416996207, + "grad_norm": 0.6668851375579834, + "learning_rate": 7.703172955188742e-05, + "loss": 2.5251, + "step": 11519 + }, + { + "epoch": 0.9297070454362037, + "grad_norm": 0.6840329170227051, + "learning_rate": 7.701636499423965e-05, + "loss": 2.5068, + "step": 11520 + }, + { + "epoch": 0.9297877491727867, + "grad_norm": 0.695122241973877, + "learning_rate": 7.700100100940415e-05, + "loss": 2.4822, + "step": 11521 + }, + { + "epoch": 0.9298684529093697, + "grad_norm": 0.6784923672676086, + "learning_rate": 7.698563759776382e-05, + "loss": 2.4978, + "step": 11522 + }, + { + "epoch": 0.9299491566459527, + "grad_norm": 0.6949357986450195, + "learning_rate": 7.697027475970154e-05, + "loss": 2.5392, + "step": 11523 + }, + { + "epoch": 0.9300298603825358, + "grad_norm": 0.7128093242645264, + "learning_rate": 7.695491249560025e-05, + "loss": 2.455, + "step": 11524 + }, + { + "epoch": 0.9301105641191187, + "grad_norm": 0.6534962058067322, + "learning_rate": 7.693955080584277e-05, + "loss": 2.5272, + "step": 11525 + }, + { + "epoch": 0.9301912678557017, + "grad_norm": 0.6893511414527893, + "learning_rate": 7.692418969081194e-05, + "loss": 2.5366, + "step": 11526 + }, + { + "epoch": 0.9302719715922847, + "grad_norm": 0.6335335373878479, + "learning_rate": 7.690882915089064e-05, + "loss": 2.5781, + "step": 11527 + }, + { + "epoch": 0.9303526753288677, + "grad_norm": 0.7264769077301025, + "learning_rate": 7.689346918646172e-05, + "loss": 2.5322, + "step": 11528 + }, + { + "epoch": 0.9304333790654508, + "grad_norm": 0.7156329154968262, + "learning_rate": 7.68781097979079e-05, + "loss": 2.5558, + "step": 11529 + }, + { + "epoch": 0.9305140828020337, + "grad_norm": 0.6914563775062561, + "learning_rate": 7.686275098561203e-05, + "loss": 2.5058, + "step": 11530 + }, + { + "epoch": 0.9305947865386167, + "grad_norm": 0.6939939260482788, + "learning_rate": 7.684739274995691e-05, + "loss": 2.4764, + "step": 11531 + }, + { + "epoch": 0.9306754902751997, + "grad_norm": 0.7103014588356018, + "learning_rate": 7.683203509132526e-05, + "loss": 2.5062, + "step": 11532 + }, + { + "epoch": 0.9307561940117828, + "grad_norm": 0.6558870077133179, + "learning_rate": 7.681667801009985e-05, + "loss": 2.4869, + "step": 11533 + }, + { + "epoch": 0.9308368977483658, + "grad_norm": 0.7280104160308838, + "learning_rate": 7.680132150666348e-05, + "loss": 2.566, + "step": 11534 + }, + { + "epoch": 0.9309176014849487, + "grad_norm": 0.6814180612564087, + "learning_rate": 7.678596558139875e-05, + "loss": 2.4926, + "step": 11535 + }, + { + "epoch": 0.9309983052215317, + "grad_norm": 0.6916589736938477, + "learning_rate": 7.677061023468846e-05, + "loss": 2.5189, + "step": 11536 + }, + { + "epoch": 0.9310790089581148, + "grad_norm": 0.6527554988861084, + "learning_rate": 7.675525546691533e-05, + "loss": 2.4969, + "step": 11537 + }, + { + "epoch": 0.9311597126946978, + "grad_norm": 0.6458954811096191, + "learning_rate": 7.673990127846196e-05, + "loss": 2.5159, + "step": 11538 + }, + { + "epoch": 0.9312404164312807, + "grad_norm": 0.6704902052879333, + "learning_rate": 7.672454766971105e-05, + "loss": 2.49, + "step": 11539 + }, + { + "epoch": 0.9313211201678637, + "grad_norm": 0.6599698066711426, + "learning_rate": 7.670919464104527e-05, + "loss": 2.4872, + "step": 11540 + }, + { + "epoch": 0.9314018239044468, + "grad_norm": 0.7638888955116272, + "learning_rate": 7.669384219284722e-05, + "loss": 2.5228, + "step": 11541 + }, + { + "epoch": 0.9314825276410298, + "grad_norm": 0.6911981105804443, + "learning_rate": 7.667849032549954e-05, + "loss": 2.4675, + "step": 11542 + }, + { + "epoch": 0.9315632313776128, + "grad_norm": 0.6414669156074524, + "learning_rate": 7.666313903938486e-05, + "loss": 2.5137, + "step": 11543 + }, + { + "epoch": 0.9316439351141957, + "grad_norm": 0.7552139759063721, + "learning_rate": 7.66477883348858e-05, + "loss": 2.5778, + "step": 11544 + }, + { + "epoch": 0.9317246388507788, + "grad_norm": 0.6738760471343994, + "learning_rate": 7.663243821238484e-05, + "loss": 2.5326, + "step": 11545 + }, + { + "epoch": 0.9318053425873618, + "grad_norm": 0.7406899333000183, + "learning_rate": 7.661708867226459e-05, + "loss": 2.4608, + "step": 11546 + }, + { + "epoch": 0.9318860463239448, + "grad_norm": 0.7261415719985962, + "learning_rate": 7.660173971490769e-05, + "loss": 2.5684, + "step": 11547 + }, + { + "epoch": 0.9319667500605278, + "grad_norm": 0.636542797088623, + "learning_rate": 7.658639134069654e-05, + "loss": 2.5159, + "step": 11548 + }, + { + "epoch": 0.9320474537971108, + "grad_norm": 0.7730209231376648, + "learning_rate": 7.657104355001373e-05, + "loss": 2.487, + "step": 11549 + }, + { + "epoch": 0.9321281575336938, + "grad_norm": 0.6553641557693481, + "learning_rate": 7.655569634324178e-05, + "loss": 2.5105, + "step": 11550 + }, + { + "epoch": 0.9322088612702768, + "grad_norm": 0.7008326649665833, + "learning_rate": 7.654034972076314e-05, + "loss": 2.492, + "step": 11551 + }, + { + "epoch": 0.9322895650068598, + "grad_norm": 0.7074279189109802, + "learning_rate": 7.65250036829603e-05, + "loss": 2.5221, + "step": 11552 + }, + { + "epoch": 0.9323702687434429, + "grad_norm": 0.7235530018806458, + "learning_rate": 7.650965823021578e-05, + "loss": 2.5285, + "step": 11553 + }, + { + "epoch": 0.9324509724800258, + "grad_norm": 0.7601436376571655, + "learning_rate": 7.649431336291194e-05, + "loss": 2.5071, + "step": 11554 + }, + { + "epoch": 0.9325316762166088, + "grad_norm": 0.6446424126625061, + "learning_rate": 7.647896908143127e-05, + "loss": 2.5032, + "step": 11555 + }, + { + "epoch": 0.9326123799531918, + "grad_norm": 0.7032139897346497, + "learning_rate": 7.646362538615614e-05, + "loss": 2.6096, + "step": 11556 + }, + { + "epoch": 0.9326930836897749, + "grad_norm": 0.6727899312973022, + "learning_rate": 7.644828227746904e-05, + "loss": 2.5041, + "step": 11557 + }, + { + "epoch": 0.9327737874263579, + "grad_norm": 0.6817529201507568, + "learning_rate": 7.643293975575229e-05, + "loss": 2.4474, + "step": 11558 + }, + { + "epoch": 0.9328544911629408, + "grad_norm": 0.6374444365501404, + "learning_rate": 7.641759782138827e-05, + "loss": 2.5204, + "step": 11559 + }, + { + "epoch": 0.9329351948995238, + "grad_norm": 0.6889457702636719, + "learning_rate": 7.640225647475939e-05, + "loss": 2.6344, + "step": 11560 + }, + { + "epoch": 0.9330158986361069, + "grad_norm": 0.6657958626747131, + "learning_rate": 7.638691571624794e-05, + "loss": 2.4672, + "step": 11561 + }, + { + "epoch": 0.9330966023726899, + "grad_norm": 0.6425464749336243, + "learning_rate": 7.637157554623627e-05, + "loss": 2.4756, + "step": 11562 + }, + { + "epoch": 0.9331773061092729, + "grad_norm": 0.7193450927734375, + "learning_rate": 7.635623596510675e-05, + "loss": 2.4969, + "step": 11563 + }, + { + "epoch": 0.9332580098458558, + "grad_norm": 0.6595252156257629, + "learning_rate": 7.634089697324159e-05, + "loss": 2.4647, + "step": 11564 + }, + { + "epoch": 0.9333387135824389, + "grad_norm": 0.6505268812179565, + "learning_rate": 7.632555857102312e-05, + "loss": 2.5059, + "step": 11565 + }, + { + "epoch": 0.9334194173190219, + "grad_norm": 0.6877838969230652, + "learning_rate": 7.631022075883365e-05, + "loss": 2.4855, + "step": 11566 + }, + { + "epoch": 0.9335001210556049, + "grad_norm": 0.6376198530197144, + "learning_rate": 7.629488353705538e-05, + "loss": 2.5024, + "step": 11567 + }, + { + "epoch": 0.9335808247921878, + "grad_norm": 0.6807642579078674, + "learning_rate": 7.627954690607058e-05, + "loss": 2.4954, + "step": 11568 + }, + { + "epoch": 0.9336615285287709, + "grad_norm": 0.6785219311714172, + "learning_rate": 7.62642108662615e-05, + "loss": 2.4854, + "step": 11569 + }, + { + "epoch": 0.9337422322653539, + "grad_norm": 0.8159591555595398, + "learning_rate": 7.624887541801032e-05, + "loss": 2.524, + "step": 11570 + }, + { + "epoch": 0.9338229360019369, + "grad_norm": 0.6912592053413391, + "learning_rate": 7.62335405616992e-05, + "loss": 2.5111, + "step": 11571 + }, + { + "epoch": 0.9339036397385199, + "grad_norm": 0.6772454977035522, + "learning_rate": 7.621820629771041e-05, + "loss": 2.5603, + "step": 11572 + }, + { + "epoch": 0.933984343475103, + "grad_norm": 0.6720221638679504, + "learning_rate": 7.620287262642613e-05, + "loss": 2.5016, + "step": 11573 + }, + { + "epoch": 0.9340650472116859, + "grad_norm": 0.651935338973999, + "learning_rate": 7.618753954822841e-05, + "loss": 2.445, + "step": 11574 + }, + { + "epoch": 0.9341457509482689, + "grad_norm": 0.6731166839599609, + "learning_rate": 7.617220706349947e-05, + "loss": 2.4703, + "step": 11575 + }, + { + "epoch": 0.9342264546848519, + "grad_norm": 0.6283879280090332, + "learning_rate": 7.615687517262143e-05, + "loss": 2.5232, + "step": 11576 + }, + { + "epoch": 0.9343071584214349, + "grad_norm": 0.7193455696105957, + "learning_rate": 7.614154387597638e-05, + "loss": 2.5268, + "step": 11577 + }, + { + "epoch": 0.934387862158018, + "grad_norm": 0.6992828845977783, + "learning_rate": 7.61262131739464e-05, + "loss": 2.5834, + "step": 11578 + }, + { + "epoch": 0.9344685658946009, + "grad_norm": 0.6501220464706421, + "learning_rate": 7.611088306691365e-05, + "loss": 2.5146, + "step": 11579 + }, + { + "epoch": 0.9345492696311839, + "grad_norm": 0.7246220111846924, + "learning_rate": 7.60955535552601e-05, + "loss": 2.5665, + "step": 11580 + }, + { + "epoch": 0.9346299733677669, + "grad_norm": 0.7190428376197815, + "learning_rate": 7.608022463936783e-05, + "loss": 2.5061, + "step": 11581 + }, + { + "epoch": 0.93471067710435, + "grad_norm": 0.7144324779510498, + "learning_rate": 7.606489631961893e-05, + "loss": 2.4982, + "step": 11582 + }, + { + "epoch": 0.9347913808409329, + "grad_norm": 0.7144657373428345, + "learning_rate": 7.604956859639535e-05, + "loss": 2.5506, + "step": 11583 + }, + { + "epoch": 0.9348720845775159, + "grad_norm": 0.6596626043319702, + "learning_rate": 7.603424147007913e-05, + "loss": 2.4911, + "step": 11584 + }, + { + "epoch": 0.9349527883140989, + "grad_norm": 0.7090883851051331, + "learning_rate": 7.601891494105227e-05, + "loss": 2.5087, + "step": 11585 + }, + { + "epoch": 0.935033492050682, + "grad_norm": 0.6679760217666626, + "learning_rate": 7.600358900969671e-05, + "loss": 2.497, + "step": 11586 + }, + { + "epoch": 0.935114195787265, + "grad_norm": 0.6795344948768616, + "learning_rate": 7.598826367639447e-05, + "loss": 2.4839, + "step": 11587 + }, + { + "epoch": 0.9351948995238479, + "grad_norm": 0.6378790736198425, + "learning_rate": 7.597293894152744e-05, + "loss": 2.4656, + "step": 11588 + }, + { + "epoch": 0.9352756032604309, + "grad_norm": 0.6646658182144165, + "learning_rate": 7.595761480547762e-05, + "loss": 2.4739, + "step": 11589 + }, + { + "epoch": 0.935356306997014, + "grad_norm": 0.6662073731422424, + "learning_rate": 7.594229126862687e-05, + "loss": 2.4872, + "step": 11590 + }, + { + "epoch": 0.935437010733597, + "grad_norm": 0.6698113679885864, + "learning_rate": 7.592696833135708e-05, + "loss": 2.4964, + "step": 11591 + }, + { + "epoch": 0.93551771447018, + "grad_norm": 0.6520004272460938, + "learning_rate": 7.59116459940502e-05, + "loss": 2.5616, + "step": 11592 + }, + { + "epoch": 0.9355984182067629, + "grad_norm": 0.6675869226455688, + "learning_rate": 7.589632425708806e-05, + "loss": 2.4854, + "step": 11593 + }, + { + "epoch": 0.935679121943346, + "grad_norm": 0.6914103031158447, + "learning_rate": 7.588100312085251e-05, + "loss": 2.5252, + "step": 11594 + }, + { + "epoch": 0.935759825679929, + "grad_norm": 0.7283286452293396, + "learning_rate": 7.586568258572546e-05, + "loss": 2.543, + "step": 11595 + }, + { + "epoch": 0.935840529416512, + "grad_norm": 0.6881958246231079, + "learning_rate": 7.585036265208864e-05, + "loss": 2.4499, + "step": 11596 + }, + { + "epoch": 0.935921233153095, + "grad_norm": 0.7733677625656128, + "learning_rate": 7.58350433203239e-05, + "loss": 2.5595, + "step": 11597 + }, + { + "epoch": 0.936001936889678, + "grad_norm": 0.672711968421936, + "learning_rate": 7.58197245908131e-05, + "loss": 2.4757, + "step": 11598 + }, + { + "epoch": 0.936082640626261, + "grad_norm": 0.691780686378479, + "learning_rate": 7.580440646393794e-05, + "loss": 2.5134, + "step": 11599 + }, + { + "epoch": 0.936163344362844, + "grad_norm": 0.6935102343559265, + "learning_rate": 7.578908894008021e-05, + "loss": 2.5128, + "step": 11600 + }, + { + "epoch": 0.936244048099427, + "grad_norm": 0.7005696892738342, + "learning_rate": 7.57737720196217e-05, + "loss": 2.5338, + "step": 11601 + }, + { + "epoch": 0.93632475183601, + "grad_norm": 0.6729815602302551, + "learning_rate": 7.575845570294409e-05, + "loss": 2.5373, + "step": 11602 + }, + { + "epoch": 0.936405455572593, + "grad_norm": 0.6694760918617249, + "learning_rate": 7.574313999042913e-05, + "loss": 2.5165, + "step": 11603 + }, + { + "epoch": 0.936486159309176, + "grad_norm": 0.6425337791442871, + "learning_rate": 7.572782488245854e-05, + "loss": 2.5102, + "step": 11604 + }, + { + "epoch": 0.936566863045759, + "grad_norm": 0.6613046526908875, + "learning_rate": 7.571251037941405e-05, + "loss": 2.5108, + "step": 11605 + }, + { + "epoch": 0.9366475667823421, + "grad_norm": 0.7396309971809387, + "learning_rate": 7.569719648167723e-05, + "loss": 2.5261, + "step": 11606 + }, + { + "epoch": 0.936728270518925, + "grad_norm": 0.6783239245414734, + "learning_rate": 7.568188318962981e-05, + "loss": 2.5725, + "step": 11607 + }, + { + "epoch": 0.936808974255508, + "grad_norm": 0.7591684460639954, + "learning_rate": 7.566657050365345e-05, + "loss": 2.5085, + "step": 11608 + }, + { + "epoch": 0.936889677992091, + "grad_norm": 0.6805615425109863, + "learning_rate": 7.565125842412974e-05, + "loss": 2.5598, + "step": 11609 + }, + { + "epoch": 0.9369703817286741, + "grad_norm": 0.680203378200531, + "learning_rate": 7.563594695144032e-05, + "loss": 2.5072, + "step": 11610 + }, + { + "epoch": 0.9370510854652571, + "grad_norm": 0.7035777568817139, + "learning_rate": 7.56206360859668e-05, + "loss": 2.4882, + "step": 11611 + }, + { + "epoch": 0.93713178920184, + "grad_norm": 0.7457048892974854, + "learning_rate": 7.560532582809075e-05, + "loss": 2.4975, + "step": 11612 + }, + { + "epoch": 0.937212492938423, + "grad_norm": 0.702055037021637, + "learning_rate": 7.559001617819374e-05, + "loss": 2.5522, + "step": 11613 + }, + { + "epoch": 0.9372931966750061, + "grad_norm": 0.7618527412414551, + "learning_rate": 7.557470713665738e-05, + "loss": 2.5503, + "step": 11614 + }, + { + "epoch": 0.9373739004115891, + "grad_norm": 0.8611559867858887, + "learning_rate": 7.555939870386312e-05, + "loss": 2.4866, + "step": 11615 + }, + { + "epoch": 0.937454604148172, + "grad_norm": 0.7285227179527283, + "learning_rate": 7.554409088019254e-05, + "loss": 2.4855, + "step": 11616 + }, + { + "epoch": 0.937535307884755, + "grad_norm": 0.7512121796607971, + "learning_rate": 7.552878366602716e-05, + "loss": 2.5496, + "step": 11617 + }, + { + "epoch": 0.9376160116213381, + "grad_norm": 0.7353625297546387, + "learning_rate": 7.551347706174844e-05, + "loss": 2.5754, + "step": 11618 + }, + { + "epoch": 0.9376967153579211, + "grad_norm": 0.7131205797195435, + "learning_rate": 7.549817106773788e-05, + "loss": 2.4927, + "step": 11619 + }, + { + "epoch": 0.9377774190945041, + "grad_norm": 0.6562477946281433, + "learning_rate": 7.548286568437695e-05, + "loss": 2.5247, + "step": 11620 + }, + { + "epoch": 0.937858122831087, + "grad_norm": 0.7094948887825012, + "learning_rate": 7.546756091204713e-05, + "loss": 2.5084, + "step": 11621 + }, + { + "epoch": 0.9379388265676701, + "grad_norm": 0.6890475153923035, + "learning_rate": 7.545225675112977e-05, + "loss": 2.5178, + "step": 11622 + }, + { + "epoch": 0.9380195303042531, + "grad_norm": 0.6801474094390869, + "learning_rate": 7.543695320200634e-05, + "loss": 2.5457, + "step": 11623 + }, + { + "epoch": 0.9381002340408361, + "grad_norm": 0.7093712687492371, + "learning_rate": 7.54216502650583e-05, + "loss": 2.6122, + "step": 11624 + }, + { + "epoch": 0.9381809377774191, + "grad_norm": 0.7246927618980408, + "learning_rate": 7.540634794066695e-05, + "loss": 2.5251, + "step": 11625 + }, + { + "epoch": 0.9382616415140022, + "grad_norm": 0.7358111143112183, + "learning_rate": 7.539104622921368e-05, + "loss": 2.5444, + "step": 11626 + }, + { + "epoch": 0.9383423452505851, + "grad_norm": 0.6915993690490723, + "learning_rate": 7.53757451310799e-05, + "loss": 2.448, + "step": 11627 + }, + { + "epoch": 0.9384230489871681, + "grad_norm": 0.6864039301872253, + "learning_rate": 7.536044464664689e-05, + "loss": 2.5267, + "step": 11628 + }, + { + "epoch": 0.9385037527237511, + "grad_norm": 0.664799690246582, + "learning_rate": 7.534514477629602e-05, + "loss": 2.5602, + "step": 11629 + }, + { + "epoch": 0.9385844564603341, + "grad_norm": 0.6770062446594238, + "learning_rate": 7.532984552040862e-05, + "loss": 2.5034, + "step": 11630 + }, + { + "epoch": 0.9386651601969171, + "grad_norm": 0.6961095929145813, + "learning_rate": 7.531454687936592e-05, + "loss": 2.4523, + "step": 11631 + }, + { + "epoch": 0.9387458639335001, + "grad_norm": 0.6776804327964783, + "learning_rate": 7.529924885354924e-05, + "loss": 2.5526, + "step": 11632 + }, + { + "epoch": 0.9388265676700831, + "grad_norm": 0.785796582698822, + "learning_rate": 7.528395144333988e-05, + "loss": 2.5256, + "step": 11633 + }, + { + "epoch": 0.9389072714066661, + "grad_norm": 0.7016655206680298, + "learning_rate": 7.526865464911902e-05, + "loss": 2.4781, + "step": 11634 + }, + { + "epoch": 0.9389879751432492, + "grad_norm": 0.7027767300605774, + "learning_rate": 7.525335847126795e-05, + "loss": 2.5287, + "step": 11635 + }, + { + "epoch": 0.9390686788798321, + "grad_norm": 0.710624098777771, + "learning_rate": 7.523806291016787e-05, + "loss": 2.5486, + "step": 11636 + }, + { + "epoch": 0.9391493826164151, + "grad_norm": 0.7029656767845154, + "learning_rate": 7.52227679662e-05, + "loss": 2.5244, + "step": 11637 + }, + { + "epoch": 0.9392300863529981, + "grad_norm": 0.7417333722114563, + "learning_rate": 7.520747363974551e-05, + "loss": 2.5561, + "step": 11638 + }, + { + "epoch": 0.9393107900895812, + "grad_norm": 0.6595067381858826, + "learning_rate": 7.519217993118559e-05, + "loss": 2.617, + "step": 11639 + }, + { + "epoch": 0.9393914938261642, + "grad_norm": 0.6808187365531921, + "learning_rate": 7.517688684090141e-05, + "loss": 2.5279, + "step": 11640 + }, + { + "epoch": 0.9394721975627471, + "grad_norm": 0.6618706583976746, + "learning_rate": 7.516159436927408e-05, + "loss": 2.4976, + "step": 11641 + }, + { + "epoch": 0.9395529012993301, + "grad_norm": 0.6979385018348694, + "learning_rate": 7.514630251668475e-05, + "loss": 2.4542, + "step": 11642 + }, + { + "epoch": 0.9396336050359132, + "grad_norm": 0.6380844116210938, + "learning_rate": 7.513101128351454e-05, + "loss": 2.48, + "step": 11643 + }, + { + "epoch": 0.9397143087724962, + "grad_norm": 0.6390014290809631, + "learning_rate": 7.511572067014452e-05, + "loss": 2.5111, + "step": 11644 + }, + { + "epoch": 0.9397950125090792, + "grad_norm": 0.7592498064041138, + "learning_rate": 7.510043067695578e-05, + "loss": 2.5161, + "step": 11645 + }, + { + "epoch": 0.9398757162456621, + "grad_norm": 0.6269322037696838, + "learning_rate": 7.508514130432945e-05, + "loss": 2.491, + "step": 11646 + }, + { + "epoch": 0.9399564199822452, + "grad_norm": 0.6372053623199463, + "learning_rate": 7.506985255264646e-05, + "loss": 2.4826, + "step": 11647 + }, + { + "epoch": 0.9400371237188282, + "grad_norm": 0.6962460875511169, + "learning_rate": 7.505456442228794e-05, + "loss": 2.5605, + "step": 11648 + }, + { + "epoch": 0.9401178274554112, + "grad_norm": 0.7931656241416931, + "learning_rate": 7.503927691363491e-05, + "loss": 2.4909, + "step": 11649 + }, + { + "epoch": 0.9401985311919941, + "grad_norm": 0.688792884349823, + "learning_rate": 7.502399002706832e-05, + "loss": 2.4888, + "step": 11650 + }, + { + "epoch": 0.9402792349285772, + "grad_norm": 0.6683691143989563, + "learning_rate": 7.500870376296918e-05, + "loss": 2.5233, + "step": 11651 + }, + { + "epoch": 0.9403599386651602, + "grad_norm": 0.6537527441978455, + "learning_rate": 7.499341812171846e-05, + "loss": 2.5061, + "step": 11652 + }, + { + "epoch": 0.9404406424017432, + "grad_norm": 0.6657658219337463, + "learning_rate": 7.497813310369717e-05, + "loss": 2.4844, + "step": 11653 + }, + { + "epoch": 0.9405213461383262, + "grad_norm": 0.6865110993385315, + "learning_rate": 7.496284870928618e-05, + "loss": 2.4986, + "step": 11654 + }, + { + "epoch": 0.9406020498749093, + "grad_norm": 0.6724923849105835, + "learning_rate": 7.494756493886644e-05, + "loss": 2.4818, + "step": 11655 + }, + { + "epoch": 0.9406827536114922, + "grad_norm": 0.6478626728057861, + "learning_rate": 7.493228179281892e-05, + "loss": 2.5321, + "step": 11656 + }, + { + "epoch": 0.9407634573480752, + "grad_norm": 0.6474425792694092, + "learning_rate": 7.491699927152443e-05, + "loss": 2.5276, + "step": 11657 + }, + { + "epoch": 0.9408441610846582, + "grad_norm": 0.6736220717430115, + "learning_rate": 7.490171737536387e-05, + "loss": 2.4734, + "step": 11658 + }, + { + "epoch": 0.9409248648212413, + "grad_norm": 0.6714746952056885, + "learning_rate": 7.488643610471815e-05, + "loss": 2.5754, + "step": 11659 + }, + { + "epoch": 0.9410055685578242, + "grad_norm": 0.6714532375335693, + "learning_rate": 7.487115545996805e-05, + "loss": 2.4855, + "step": 11660 + }, + { + "epoch": 0.9410862722944072, + "grad_norm": 0.7601683139801025, + "learning_rate": 7.485587544149447e-05, + "loss": 2.4887, + "step": 11661 + }, + { + "epoch": 0.9411669760309902, + "grad_norm": 0.7655646204948425, + "learning_rate": 7.484059604967821e-05, + "loss": 2.4904, + "step": 11662 + }, + { + "epoch": 0.9412476797675733, + "grad_norm": 0.6841822862625122, + "learning_rate": 7.482531728490006e-05, + "loss": 2.5272, + "step": 11663 + }, + { + "epoch": 0.9413283835041563, + "grad_norm": 0.7683621048927307, + "learning_rate": 7.481003914754078e-05, + "loss": 2.5218, + "step": 11664 + }, + { + "epoch": 0.9414090872407392, + "grad_norm": 0.6597647070884705, + "learning_rate": 7.479476163798124e-05, + "loss": 2.4925, + "step": 11665 + }, + { + "epoch": 0.9414897909773222, + "grad_norm": 0.6573941111564636, + "learning_rate": 7.477948475660208e-05, + "loss": 2.4854, + "step": 11666 + }, + { + "epoch": 0.9415704947139053, + "grad_norm": 0.6639125943183899, + "learning_rate": 7.476420850378407e-05, + "loss": 2.5207, + "step": 11667 + }, + { + "epoch": 0.9416511984504883, + "grad_norm": 0.6770366430282593, + "learning_rate": 7.474893287990796e-05, + "loss": 2.5167, + "step": 11668 + }, + { + "epoch": 0.9417319021870713, + "grad_norm": 0.6908389925956726, + "learning_rate": 7.473365788535447e-05, + "loss": 2.4606, + "step": 11669 + }, + { + "epoch": 0.9418126059236542, + "grad_norm": 0.6625069975852966, + "learning_rate": 7.471838352050427e-05, + "loss": 2.5344, + "step": 11670 + }, + { + "epoch": 0.9418933096602373, + "grad_norm": 0.6690869331359863, + "learning_rate": 7.470310978573803e-05, + "loss": 2.4507, + "step": 11671 + }, + { + "epoch": 0.9419740133968203, + "grad_norm": 0.6741886734962463, + "learning_rate": 7.468783668143645e-05, + "loss": 2.5755, + "step": 11672 + }, + { + "epoch": 0.9420547171334033, + "grad_norm": 0.6876424551010132, + "learning_rate": 7.467256420798009e-05, + "loss": 2.483, + "step": 11673 + }, + { + "epoch": 0.9421354208699863, + "grad_norm": 0.7044318318367004, + "learning_rate": 7.465729236574965e-05, + "loss": 2.5025, + "step": 11674 + }, + { + "epoch": 0.9422161246065693, + "grad_norm": 0.6608660817146301, + "learning_rate": 7.46420211551258e-05, + "loss": 2.5253, + "step": 11675 + }, + { + "epoch": 0.9422968283431523, + "grad_norm": 0.6944260001182556, + "learning_rate": 7.4626750576489e-05, + "loss": 2.5002, + "step": 11676 + }, + { + "epoch": 0.9423775320797353, + "grad_norm": 0.7304964065551758, + "learning_rate": 7.46114806302199e-05, + "loss": 2.5501, + "step": 11677 + }, + { + "epoch": 0.9424582358163183, + "grad_norm": 0.688525378704071, + "learning_rate": 7.459621131669911e-05, + "loss": 2.5291, + "step": 11678 + }, + { + "epoch": 0.9425389395529012, + "grad_norm": 0.7388432025909424, + "learning_rate": 7.45809426363071e-05, + "loss": 2.5391, + "step": 11679 + }, + { + "epoch": 0.9426196432894843, + "grad_norm": 0.6777819991111755, + "learning_rate": 7.456567458942447e-05, + "loss": 2.5425, + "step": 11680 + }, + { + "epoch": 0.9427003470260673, + "grad_norm": 0.7208845615386963, + "learning_rate": 7.455040717643169e-05, + "loss": 2.5306, + "step": 11681 + }, + { + "epoch": 0.9427810507626503, + "grad_norm": 0.745384693145752, + "learning_rate": 7.453514039770934e-05, + "loss": 2.4695, + "step": 11682 + }, + { + "epoch": 0.9428617544992333, + "grad_norm": 0.7088115215301514, + "learning_rate": 7.451987425363782e-05, + "loss": 2.5413, + "step": 11683 + }, + { + "epoch": 0.9429424582358163, + "grad_norm": 0.7287998795509338, + "learning_rate": 7.450460874459762e-05, + "loss": 2.5773, + "step": 11684 + }, + { + "epoch": 0.9430231619723993, + "grad_norm": 0.6897092461585999, + "learning_rate": 7.448934387096928e-05, + "loss": 2.5255, + "step": 11685 + }, + { + "epoch": 0.9431038657089823, + "grad_norm": 0.6227227449417114, + "learning_rate": 7.447407963313313e-05, + "loss": 2.5027, + "step": 11686 + }, + { + "epoch": 0.9431845694455653, + "grad_norm": 0.6954305768013, + "learning_rate": 7.445881603146964e-05, + "loss": 2.5477, + "step": 11687 + }, + { + "epoch": 0.9432652731821484, + "grad_norm": 0.7860052585601807, + "learning_rate": 7.444355306635924e-05, + "loss": 2.469, + "step": 11688 + }, + { + "epoch": 0.9433459769187313, + "grad_norm": 0.6851965188980103, + "learning_rate": 7.442829073818227e-05, + "loss": 2.4997, + "step": 11689 + }, + { + "epoch": 0.9434266806553143, + "grad_norm": 0.7011744379997253, + "learning_rate": 7.441302904731916e-05, + "loss": 2.5399, + "step": 11690 + }, + { + "epoch": 0.9435073843918973, + "grad_norm": 0.703167200088501, + "learning_rate": 7.439776799415028e-05, + "loss": 2.5323, + "step": 11691 + }, + { + "epoch": 0.9435880881284804, + "grad_norm": 0.6747310161590576, + "learning_rate": 7.438250757905591e-05, + "loss": 2.5406, + "step": 11692 + }, + { + "epoch": 0.9436687918650634, + "grad_norm": 0.8631153106689453, + "learning_rate": 7.436724780241642e-05, + "loss": 2.5215, + "step": 11693 + }, + { + "epoch": 0.9437494956016463, + "grad_norm": 0.6919798254966736, + "learning_rate": 7.435198866461214e-05, + "loss": 2.4654, + "step": 11694 + }, + { + "epoch": 0.9438301993382293, + "grad_norm": 0.6747070550918579, + "learning_rate": 7.433673016602332e-05, + "loss": 2.5186, + "step": 11695 + }, + { + "epoch": 0.9439109030748124, + "grad_norm": 0.7368776798248291, + "learning_rate": 7.432147230703026e-05, + "loss": 2.5365, + "step": 11696 + }, + { + "epoch": 0.9439916068113954, + "grad_norm": 0.7443639636039734, + "learning_rate": 7.430621508801325e-05, + "loss": 2.4966, + "step": 11697 + }, + { + "epoch": 0.9440723105479784, + "grad_norm": 0.7371395230293274, + "learning_rate": 7.429095850935255e-05, + "loss": 2.4638, + "step": 11698 + }, + { + "epoch": 0.9441530142845613, + "grad_norm": 0.6917321681976318, + "learning_rate": 7.427570257142832e-05, + "loss": 2.5341, + "step": 11699 + }, + { + "epoch": 0.9442337180211444, + "grad_norm": 0.7704101800918579, + "learning_rate": 7.426044727462085e-05, + "loss": 2.5144, + "step": 11700 + }, + { + "epoch": 0.9443144217577274, + "grad_norm": 0.692197859287262, + "learning_rate": 7.424519261931036e-05, + "loss": 2.5293, + "step": 11701 + }, + { + "epoch": 0.9443951254943104, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.422993860587695e-05, + "loss": 2.5236, + "step": 11702 + }, + { + "epoch": 0.9444758292308933, + "grad_norm": 0.6955052018165588, + "learning_rate": 7.421468523470081e-05, + "loss": 2.4765, + "step": 11703 + }, + { + "epoch": 0.9445565329674764, + "grad_norm": 0.7394432425498962, + "learning_rate": 7.419943250616216e-05, + "loss": 2.5053, + "step": 11704 + }, + { + "epoch": 0.9446372367040594, + "grad_norm": 0.679044246673584, + "learning_rate": 7.418418042064108e-05, + "loss": 2.5413, + "step": 11705 + }, + { + "epoch": 0.9447179404406424, + "grad_norm": 0.7153440117835999, + "learning_rate": 7.41689289785177e-05, + "loss": 2.4938, + "step": 11706 + }, + { + "epoch": 0.9447986441772254, + "grad_norm": 0.697068452835083, + "learning_rate": 7.415367818017217e-05, + "loss": 2.5157, + "step": 11707 + }, + { + "epoch": 0.9448793479138085, + "grad_norm": 0.664616048336029, + "learning_rate": 7.41384280259845e-05, + "loss": 2.4859, + "step": 11708 + }, + { + "epoch": 0.9449600516503914, + "grad_norm": 0.7275365591049194, + "learning_rate": 7.412317851633479e-05, + "loss": 2.523, + "step": 11709 + }, + { + "epoch": 0.9450407553869744, + "grad_norm": 0.7408944368362427, + "learning_rate": 7.410792965160318e-05, + "loss": 2.4994, + "step": 11710 + }, + { + "epoch": 0.9451214591235574, + "grad_norm": 0.7222678065299988, + "learning_rate": 7.40926814321696e-05, + "loss": 2.5084, + "step": 11711 + }, + { + "epoch": 0.9452021628601405, + "grad_norm": 0.7242292761802673, + "learning_rate": 7.407743385841412e-05, + "loss": 2.5165, + "step": 11712 + }, + { + "epoch": 0.9452828665967234, + "grad_norm": 0.6634014844894409, + "learning_rate": 7.406218693071677e-05, + "loss": 2.4947, + "step": 11713 + }, + { + "epoch": 0.9453635703333064, + "grad_norm": 0.8126605153083801, + "learning_rate": 7.404694064945751e-05, + "loss": 2.5553, + "step": 11714 + }, + { + "epoch": 0.9454442740698894, + "grad_norm": 0.679344654083252, + "learning_rate": 7.403169501501632e-05, + "loss": 2.5475, + "step": 11715 + }, + { + "epoch": 0.9455249778064725, + "grad_norm": 0.7584314346313477, + "learning_rate": 7.401645002777318e-05, + "loss": 2.5498, + "step": 11716 + }, + { + "epoch": 0.9456056815430555, + "grad_norm": 0.7191590666770935, + "learning_rate": 7.400120568810806e-05, + "loss": 2.5161, + "step": 11717 + }, + { + "epoch": 0.9456863852796384, + "grad_norm": 0.6738762855529785, + "learning_rate": 7.398596199640084e-05, + "loss": 2.4819, + "step": 11718 + }, + { + "epoch": 0.9457670890162214, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.397071895303143e-05, + "loss": 2.4842, + "step": 11719 + }, + { + "epoch": 0.9458477927528045, + "grad_norm": 0.6885291337966919, + "learning_rate": 7.395547655837976e-05, + "loss": 2.5016, + "step": 11720 + }, + { + "epoch": 0.9459284964893875, + "grad_norm": 0.6807307600975037, + "learning_rate": 7.394023481282568e-05, + "loss": 2.4949, + "step": 11721 + }, + { + "epoch": 0.9460092002259705, + "grad_norm": 0.6683849096298218, + "learning_rate": 7.392499371674907e-05, + "loss": 2.4974, + "step": 11722 + }, + { + "epoch": 0.9460899039625534, + "grad_norm": 0.6615697741508484, + "learning_rate": 7.39097532705298e-05, + "loss": 2.4744, + "step": 11723 + }, + { + "epoch": 0.9461706076991365, + "grad_norm": 0.6463690996170044, + "learning_rate": 7.389451347454765e-05, + "loss": 2.478, + "step": 11724 + }, + { + "epoch": 0.9462513114357195, + "grad_norm": 0.6848269701004028, + "learning_rate": 7.387927432918247e-05, + "loss": 2.5491, + "step": 11725 + }, + { + "epoch": 0.9463320151723025, + "grad_norm": 0.7251551747322083, + "learning_rate": 7.386403583481409e-05, + "loss": 2.4936, + "step": 11726 + }, + { + "epoch": 0.9464127189088855, + "grad_norm": 0.6562095284461975, + "learning_rate": 7.384879799182223e-05, + "loss": 2.4895, + "step": 11727 + }, + { + "epoch": 0.9464934226454685, + "grad_norm": 0.6891352534294128, + "learning_rate": 7.383356080058668e-05, + "loss": 2.508, + "step": 11728 + }, + { + "epoch": 0.9465741263820515, + "grad_norm": 0.7220255136489868, + "learning_rate": 7.381832426148719e-05, + "loss": 2.5181, + "step": 11729 + }, + { + "epoch": 0.9466548301186345, + "grad_norm": 0.7213689088821411, + "learning_rate": 7.38030883749035e-05, + "loss": 2.5136, + "step": 11730 + }, + { + "epoch": 0.9467355338552175, + "grad_norm": 0.6711129546165466, + "learning_rate": 7.378785314121535e-05, + "loss": 2.5463, + "step": 11731 + }, + { + "epoch": 0.9468162375918004, + "grad_norm": 0.6380139589309692, + "learning_rate": 7.377261856080239e-05, + "loss": 2.5092, + "step": 11732 + }, + { + "epoch": 0.9468969413283835, + "grad_norm": 0.66046142578125, + "learning_rate": 7.375738463404437e-05, + "loss": 2.5561, + "step": 11733 + }, + { + "epoch": 0.9469776450649665, + "grad_norm": 0.6857354044914246, + "learning_rate": 7.37421513613209e-05, + "loss": 2.5774, + "step": 11734 + }, + { + "epoch": 0.9470583488015495, + "grad_norm": 0.6811589598655701, + "learning_rate": 7.372691874301163e-05, + "loss": 2.4918, + "step": 11735 + }, + { + "epoch": 0.9471390525381325, + "grad_norm": 0.6401017308235168, + "learning_rate": 7.37116867794963e-05, + "loss": 2.4994, + "step": 11736 + }, + { + "epoch": 0.9472197562747156, + "grad_norm": 0.6967078447341919, + "learning_rate": 7.369645547115438e-05, + "loss": 2.5809, + "step": 11737 + }, + { + "epoch": 0.9473004600112985, + "grad_norm": 0.6695219278335571, + "learning_rate": 7.368122481836557e-05, + "loss": 2.4735, + "step": 11738 + }, + { + "epoch": 0.9473811637478815, + "grad_norm": 0.6540528535842896, + "learning_rate": 7.366599482150944e-05, + "loss": 2.4998, + "step": 11739 + }, + { + "epoch": 0.9474618674844645, + "grad_norm": 0.700683057308197, + "learning_rate": 7.365076548096556e-05, + "loss": 2.5258, + "step": 11740 + }, + { + "epoch": 0.9475425712210476, + "grad_norm": 0.7125419974327087, + "learning_rate": 7.363553679711347e-05, + "loss": 2.4653, + "step": 11741 + }, + { + "epoch": 0.9476232749576305, + "grad_norm": 0.7285346984863281, + "learning_rate": 7.362030877033275e-05, + "loss": 2.5523, + "step": 11742 + }, + { + "epoch": 0.9477039786942135, + "grad_norm": 0.7310814261436462, + "learning_rate": 7.360508140100288e-05, + "loss": 2.5027, + "step": 11743 + }, + { + "epoch": 0.9477846824307965, + "grad_norm": 0.746961772441864, + "learning_rate": 7.358985468950335e-05, + "loss": 2.5485, + "step": 11744 + }, + { + "epoch": 0.9478653861673796, + "grad_norm": 0.6880186796188354, + "learning_rate": 7.357462863621369e-05, + "loss": 2.5243, + "step": 11745 + }, + { + "epoch": 0.9479460899039626, + "grad_norm": 0.6406471133232117, + "learning_rate": 7.355940324151339e-05, + "loss": 2.512, + "step": 11746 + }, + { + "epoch": 0.9480267936405455, + "grad_norm": 0.6503005027770996, + "learning_rate": 7.354417850578184e-05, + "loss": 2.5318, + "step": 11747 + }, + { + "epoch": 0.9481074973771285, + "grad_norm": 0.6458879113197327, + "learning_rate": 7.352895442939852e-05, + "loss": 2.5451, + "step": 11748 + }, + { + "epoch": 0.9481882011137116, + "grad_norm": 0.7382936477661133, + "learning_rate": 7.351373101274288e-05, + "loss": 2.5393, + "step": 11749 + }, + { + "epoch": 0.9482689048502946, + "grad_norm": 0.7366087436676025, + "learning_rate": 7.349850825619429e-05, + "loss": 2.5591, + "step": 11750 + }, + { + "epoch": 0.9483496085868776, + "grad_norm": 0.6652588248252869, + "learning_rate": 7.348328616013213e-05, + "loss": 2.5348, + "step": 11751 + }, + { + "epoch": 0.9484303123234605, + "grad_norm": 0.7515435814857483, + "learning_rate": 7.346806472493584e-05, + "loss": 2.5208, + "step": 11752 + }, + { + "epoch": 0.9485110160600436, + "grad_norm": 0.7161263227462769, + "learning_rate": 7.345284395098469e-05, + "loss": 2.5518, + "step": 11753 + }, + { + "epoch": 0.9485917197966266, + "grad_norm": 0.7433953285217285, + "learning_rate": 7.343762383865807e-05, + "loss": 2.5914, + "step": 11754 + }, + { + "epoch": 0.9486724235332096, + "grad_norm": 0.674991250038147, + "learning_rate": 7.342240438833532e-05, + "loss": 2.5566, + "step": 11755 + }, + { + "epoch": 0.9487531272697926, + "grad_norm": 0.7511670589447021, + "learning_rate": 7.34071856003957e-05, + "loss": 2.5253, + "step": 11756 + }, + { + "epoch": 0.9488338310063756, + "grad_norm": 0.6672492623329163, + "learning_rate": 7.339196747521853e-05, + "loss": 2.4887, + "step": 11757 + }, + { + "epoch": 0.9489145347429586, + "grad_norm": 0.6826158761978149, + "learning_rate": 7.337675001318312e-05, + "loss": 2.5072, + "step": 11758 + }, + { + "epoch": 0.9489952384795416, + "grad_norm": 0.7189450860023499, + "learning_rate": 7.336153321466867e-05, + "loss": 2.5583, + "step": 11759 + }, + { + "epoch": 0.9490759422161246, + "grad_norm": 0.6923015117645264, + "learning_rate": 7.33463170800544e-05, + "loss": 2.5416, + "step": 11760 + }, + { + "epoch": 0.9491566459527077, + "grad_norm": 0.690060555934906, + "learning_rate": 7.333110160971963e-05, + "loss": 2.4931, + "step": 11761 + }, + { + "epoch": 0.9492373496892906, + "grad_norm": 0.6887977719306946, + "learning_rate": 7.331588680404354e-05, + "loss": 2.4676, + "step": 11762 + }, + { + "epoch": 0.9493180534258736, + "grad_norm": 0.8573753237724304, + "learning_rate": 7.330067266340528e-05, + "loss": 2.5074, + "step": 11763 + }, + { + "epoch": 0.9493987571624566, + "grad_norm": 0.6760974526405334, + "learning_rate": 7.328545918818403e-05, + "loss": 2.5395, + "step": 11764 + }, + { + "epoch": 0.9494794608990397, + "grad_norm": 0.6946160197257996, + "learning_rate": 7.327024637875901e-05, + "loss": 2.535, + "step": 11765 + }, + { + "epoch": 0.9495601646356226, + "grad_norm": 0.6851378679275513, + "learning_rate": 7.32550342355093e-05, + "loss": 2.487, + "step": 11766 + }, + { + "epoch": 0.9496408683722056, + "grad_norm": 0.6480168104171753, + "learning_rate": 7.323982275881404e-05, + "loss": 2.513, + "step": 11767 + }, + { + "epoch": 0.9497215721087886, + "grad_norm": 0.6492218971252441, + "learning_rate": 7.322461194905239e-05, + "loss": 2.4532, + "step": 11768 + }, + { + "epoch": 0.9498022758453717, + "grad_norm": 0.6670051217079163, + "learning_rate": 7.320940180660337e-05, + "loss": 2.5258, + "step": 11769 + }, + { + "epoch": 0.9498829795819547, + "grad_norm": 0.6678066253662109, + "learning_rate": 7.319419233184608e-05, + "loss": 2.5388, + "step": 11770 + }, + { + "epoch": 0.9499636833185376, + "grad_norm": 0.693545937538147, + "learning_rate": 7.31789835251596e-05, + "loss": 2.5304, + "step": 11771 + }, + { + "epoch": 0.9500443870551206, + "grad_norm": 0.680486798286438, + "learning_rate": 7.316377538692297e-05, + "loss": 2.5024, + "step": 11772 + }, + { + "epoch": 0.9501250907917037, + "grad_norm": 0.7271847128868103, + "learning_rate": 7.314856791751518e-05, + "loss": 2.5947, + "step": 11773 + }, + { + "epoch": 0.9502057945282867, + "grad_norm": 0.6889839172363281, + "learning_rate": 7.31333611173153e-05, + "loss": 2.5135, + "step": 11774 + }, + { + "epoch": 0.9502864982648697, + "grad_norm": 0.7431777119636536, + "learning_rate": 7.311815498670226e-05, + "loss": 2.5856, + "step": 11775 + }, + { + "epoch": 0.9503672020014526, + "grad_norm": 0.7168101072311401, + "learning_rate": 7.310294952605508e-05, + "loss": 2.4383, + "step": 11776 + }, + { + "epoch": 0.9504479057380357, + "grad_norm": 0.654803454875946, + "learning_rate": 7.308774473575271e-05, + "loss": 2.4908, + "step": 11777 + }, + { + "epoch": 0.9505286094746187, + "grad_norm": 0.6810718774795532, + "learning_rate": 7.307254061617412e-05, + "loss": 2.5073, + "step": 11778 + }, + { + "epoch": 0.9506093132112017, + "grad_norm": 0.637980043888092, + "learning_rate": 7.305733716769817e-05, + "loss": 2.5686, + "step": 11779 + }, + { + "epoch": 0.9506900169477847, + "grad_norm": 0.6549471020698547, + "learning_rate": 7.30421343907038e-05, + "loss": 2.5502, + "step": 11780 + }, + { + "epoch": 0.9507707206843676, + "grad_norm": 0.7087163329124451, + "learning_rate": 7.302693228556994e-05, + "loss": 2.4773, + "step": 11781 + }, + { + "epoch": 0.9508514244209507, + "grad_norm": 0.6230717897415161, + "learning_rate": 7.301173085267541e-05, + "loss": 2.4806, + "step": 11782 + }, + { + "epoch": 0.9509321281575337, + "grad_norm": 0.7145688533782959, + "learning_rate": 7.299653009239911e-05, + "loss": 2.5259, + "step": 11783 + }, + { + "epoch": 0.9510128318941167, + "grad_norm": 0.679100513458252, + "learning_rate": 7.298133000511988e-05, + "loss": 2.5012, + "step": 11784 + }, + { + "epoch": 0.9510935356306996, + "grad_norm": 0.7057691216468811, + "learning_rate": 7.29661305912165e-05, + "loss": 2.4826, + "step": 11785 + }, + { + "epoch": 0.9511742393672827, + "grad_norm": 0.65343177318573, + "learning_rate": 7.295093185106782e-05, + "loss": 2.4553, + "step": 11786 + }, + { + "epoch": 0.9512549431038657, + "grad_norm": 0.7948461174964905, + "learning_rate": 7.293573378505268e-05, + "loss": 2.478, + "step": 11787 + }, + { + "epoch": 0.9513356468404487, + "grad_norm": 0.6511468887329102, + "learning_rate": 7.292053639354975e-05, + "loss": 2.4862, + "step": 11788 + }, + { + "epoch": 0.9514163505770317, + "grad_norm": 0.7293919324874878, + "learning_rate": 7.290533967693782e-05, + "loss": 2.5956, + "step": 11789 + }, + { + "epoch": 0.9514970543136148, + "grad_norm": 0.6691277623176575, + "learning_rate": 7.289014363559567e-05, + "loss": 2.5659, + "step": 11790 + }, + { + "epoch": 0.9515777580501977, + "grad_norm": 0.7054625749588013, + "learning_rate": 7.287494826990203e-05, + "loss": 2.5875, + "step": 11791 + }, + { + "epoch": 0.9516584617867807, + "grad_norm": 0.6597220301628113, + "learning_rate": 7.285975358023555e-05, + "loss": 2.5215, + "step": 11792 + }, + { + "epoch": 0.9517391655233637, + "grad_norm": 0.6719489097595215, + "learning_rate": 7.284455956697497e-05, + "loss": 2.4752, + "step": 11793 + }, + { + "epoch": 0.9518198692599468, + "grad_norm": 0.7325637340545654, + "learning_rate": 7.2829366230499e-05, + "loss": 2.5504, + "step": 11794 + }, + { + "epoch": 0.9519005729965297, + "grad_norm": 0.637668788433075, + "learning_rate": 7.281417357118619e-05, + "loss": 2.5105, + "step": 11795 + }, + { + "epoch": 0.9519812767331127, + "grad_norm": 0.7815340161323547, + "learning_rate": 7.279898158941525e-05, + "loss": 2.4998, + "step": 11796 + }, + { + "epoch": 0.9520619804696957, + "grad_norm": 0.6555821299552917, + "learning_rate": 7.278379028556481e-05, + "loss": 2.4326, + "step": 11797 + }, + { + "epoch": 0.9521426842062788, + "grad_norm": 0.7298933863639832, + "learning_rate": 7.276859966001344e-05, + "loss": 2.4779, + "step": 11798 + }, + { + "epoch": 0.9522233879428618, + "grad_norm": 0.683455765247345, + "learning_rate": 7.275340971313974e-05, + "loss": 2.4416, + "step": 11799 + }, + { + "epoch": 0.9523040916794447, + "grad_norm": 0.6353151798248291, + "learning_rate": 7.273822044532232e-05, + "loss": 2.4777, + "step": 11800 + }, + { + "epoch": 0.9523847954160277, + "grad_norm": 0.6898894309997559, + "learning_rate": 7.27230318569397e-05, + "loss": 2.5351, + "step": 11801 + }, + { + "epoch": 0.9524654991526108, + "grad_norm": 0.6528690457344055, + "learning_rate": 7.270784394837041e-05, + "loss": 2.5145, + "step": 11802 + }, + { + "epoch": 0.9525462028891938, + "grad_norm": 0.6432619094848633, + "learning_rate": 7.269265671999304e-05, + "loss": 2.5002, + "step": 11803 + }, + { + "epoch": 0.9526269066257768, + "grad_norm": 0.7317861318588257, + "learning_rate": 7.267747017218601e-05, + "loss": 2.5318, + "step": 11804 + }, + { + "epoch": 0.9527076103623597, + "grad_norm": 0.7581185698509216, + "learning_rate": 7.266228430532785e-05, + "loss": 2.5313, + "step": 11805 + }, + { + "epoch": 0.9527883140989428, + "grad_norm": 0.7316486239433289, + "learning_rate": 7.264709911979702e-05, + "loss": 2.5147, + "step": 11806 + }, + { + "epoch": 0.9528690178355258, + "grad_norm": 0.7378978729248047, + "learning_rate": 7.263191461597199e-05, + "loss": 2.5149, + "step": 11807 + }, + { + "epoch": 0.9529497215721088, + "grad_norm": 0.6603738069534302, + "learning_rate": 7.26167307942312e-05, + "loss": 2.4684, + "step": 11808 + }, + { + "epoch": 0.9530304253086918, + "grad_norm": 0.7566502690315247, + "learning_rate": 7.260154765495302e-05, + "loss": 2.5535, + "step": 11809 + }, + { + "epoch": 0.9531111290452748, + "grad_norm": 0.693067729473114, + "learning_rate": 7.258636519851596e-05, + "loss": 2.5103, + "step": 11810 + }, + { + "epoch": 0.9531918327818578, + "grad_norm": 0.7049208283424377, + "learning_rate": 7.257118342529826e-05, + "loss": 2.5482, + "step": 11811 + }, + { + "epoch": 0.9532725365184408, + "grad_norm": 0.6986998319625854, + "learning_rate": 7.25560023356784e-05, + "loss": 2.4921, + "step": 11812 + }, + { + "epoch": 0.9533532402550238, + "grad_norm": 0.7079482674598694, + "learning_rate": 7.254082193003476e-05, + "loss": 2.5339, + "step": 11813 + }, + { + "epoch": 0.9534339439916069, + "grad_norm": 0.7283922433853149, + "learning_rate": 7.252564220874553e-05, + "loss": 2.5056, + "step": 11814 + }, + { + "epoch": 0.9535146477281898, + "grad_norm": 0.6965533494949341, + "learning_rate": 7.251046317218914e-05, + "loss": 2.5512, + "step": 11815 + }, + { + "epoch": 0.9535953514647728, + "grad_norm": 0.7367159128189087, + "learning_rate": 7.24952848207439e-05, + "loss": 2.5015, + "step": 11816 + }, + { + "epoch": 0.9536760552013558, + "grad_norm": 0.6959818601608276, + "learning_rate": 7.248010715478802e-05, + "loss": 2.4969, + "step": 11817 + }, + { + "epoch": 0.9537567589379389, + "grad_norm": 0.69304358959198, + "learning_rate": 7.246493017469981e-05, + "loss": 2.5098, + "step": 11818 + }, + { + "epoch": 0.9538374626745219, + "grad_norm": 0.6830596327781677, + "learning_rate": 7.244975388085757e-05, + "loss": 2.5206, + "step": 11819 + }, + { + "epoch": 0.9539181664111048, + "grad_norm": 0.7354303598403931, + "learning_rate": 7.243457827363944e-05, + "loss": 2.5223, + "step": 11820 + }, + { + "epoch": 0.9539988701476878, + "grad_norm": 0.7046182751655579, + "learning_rate": 7.241940335342366e-05, + "loss": 2.4931, + "step": 11821 + }, + { + "epoch": 0.9540795738842709, + "grad_norm": 0.6990540623664856, + "learning_rate": 7.240422912058843e-05, + "loss": 2.4302, + "step": 11822 + }, + { + "epoch": 0.9541602776208539, + "grad_norm": 0.7562115788459778, + "learning_rate": 7.238905557551202e-05, + "loss": 2.5118, + "step": 11823 + }, + { + "epoch": 0.9542409813574368, + "grad_norm": 0.8212862014770508, + "learning_rate": 7.237388271857248e-05, + "loss": 2.5476, + "step": 11824 + }, + { + "epoch": 0.9543216850940198, + "grad_norm": 0.7095397710800171, + "learning_rate": 7.235871055014798e-05, + "loss": 2.5073, + "step": 11825 + }, + { + "epoch": 0.9544023888306029, + "grad_norm": 0.7174660563468933, + "learning_rate": 7.23435390706167e-05, + "loss": 2.4553, + "step": 11826 + }, + { + "epoch": 0.9544830925671859, + "grad_norm": 0.7121314406394958, + "learning_rate": 7.23283682803567e-05, + "loss": 2.5164, + "step": 11827 + }, + { + "epoch": 0.9545637963037689, + "grad_norm": 0.7354126572608948, + "learning_rate": 7.231319817974609e-05, + "loss": 2.5413, + "step": 11828 + }, + { + "epoch": 0.9546445000403518, + "grad_norm": 0.7770543694496155, + "learning_rate": 7.2298028769163e-05, + "loss": 2.5244, + "step": 11829 + }, + { + "epoch": 0.9547252037769349, + "grad_norm": 0.6770393252372742, + "learning_rate": 7.228286004898541e-05, + "loss": 2.4707, + "step": 11830 + }, + { + "epoch": 0.9548059075135179, + "grad_norm": 0.6916880011558533, + "learning_rate": 7.22676920195914e-05, + "loss": 2.506, + "step": 11831 + }, + { + "epoch": 0.9548866112501009, + "grad_norm": 0.6299161314964294, + "learning_rate": 7.225252468135901e-05, + "loss": 2.5042, + "step": 11832 + }, + { + "epoch": 0.9549673149866839, + "grad_norm": 0.7081227898597717, + "learning_rate": 7.223735803466623e-05, + "loss": 2.5537, + "step": 11833 + }, + { + "epoch": 0.9550480187232668, + "grad_norm": 0.6600900888442993, + "learning_rate": 7.222219207989104e-05, + "loss": 2.5329, + "step": 11834 + }, + { + "epoch": 0.9551287224598499, + "grad_norm": 0.6715366244316101, + "learning_rate": 7.22070268174115e-05, + "loss": 2.5273, + "step": 11835 + }, + { + "epoch": 0.9552094261964329, + "grad_norm": 0.6655930280685425, + "learning_rate": 7.219186224760543e-05, + "loss": 2.4254, + "step": 11836 + }, + { + "epoch": 0.9552901299330159, + "grad_norm": 0.6925715208053589, + "learning_rate": 7.217669837085088e-05, + "loss": 2.5104, + "step": 11837 + }, + { + "epoch": 0.9553708336695989, + "grad_norm": 0.7132978439331055, + "learning_rate": 7.216153518752571e-05, + "loss": 2.5238, + "step": 11838 + }, + { + "epoch": 0.9554515374061819, + "grad_norm": 0.661651611328125, + "learning_rate": 7.214637269800791e-05, + "loss": 2.445, + "step": 11839 + }, + { + "epoch": 0.9555322411427649, + "grad_norm": 0.6635430455207825, + "learning_rate": 7.213121090267528e-05, + "loss": 2.4707, + "step": 11840 + }, + { + "epoch": 0.9556129448793479, + "grad_norm": 0.6303616166114807, + "learning_rate": 7.211604980190571e-05, + "loss": 2.4923, + "step": 11841 + }, + { + "epoch": 0.9556936486159309, + "grad_norm": 0.7027459144592285, + "learning_rate": 7.210088939607708e-05, + "loss": 2.5592, + "step": 11842 + }, + { + "epoch": 0.955774352352514, + "grad_norm": 0.6539996862411499, + "learning_rate": 7.208572968556722e-05, + "loss": 2.5256, + "step": 11843 + }, + { + "epoch": 0.9558550560890969, + "grad_norm": 0.7019872069358826, + "learning_rate": 7.207057067075393e-05, + "loss": 2.488, + "step": 11844 + }, + { + "epoch": 0.9559357598256799, + "grad_norm": 0.6848211288452148, + "learning_rate": 7.205541235201507e-05, + "loss": 2.4883, + "step": 11845 + }, + { + "epoch": 0.9560164635622629, + "grad_norm": 0.7806351184844971, + "learning_rate": 7.204025472972834e-05, + "loss": 2.5563, + "step": 11846 + }, + { + "epoch": 0.956097167298846, + "grad_norm": 0.7327724695205688, + "learning_rate": 7.202509780427156e-05, + "loss": 2.5275, + "step": 11847 + }, + { + "epoch": 0.956177871035429, + "grad_norm": 0.6805681586265564, + "learning_rate": 7.200994157602248e-05, + "loss": 2.4723, + "step": 11848 + }, + { + "epoch": 0.9562585747720119, + "grad_norm": 0.7053409814834595, + "learning_rate": 7.19947860453588e-05, + "loss": 2.4471, + "step": 11849 + }, + { + "epoch": 0.9563392785085949, + "grad_norm": 0.6783127188682556, + "learning_rate": 7.197963121265826e-05, + "loss": 2.4586, + "step": 11850 + }, + { + "epoch": 0.956419982245178, + "grad_norm": 0.6639916300773621, + "learning_rate": 7.196447707829857e-05, + "loss": 2.4966, + "step": 11851 + }, + { + "epoch": 0.956500685981761, + "grad_norm": 0.684066891670227, + "learning_rate": 7.194932364265739e-05, + "loss": 2.5676, + "step": 11852 + }, + { + "epoch": 0.9565813897183439, + "grad_norm": 0.7872990965843201, + "learning_rate": 7.193417090611239e-05, + "loss": 2.5101, + "step": 11853 + }, + { + "epoch": 0.9566620934549269, + "grad_norm": 0.7543401122093201, + "learning_rate": 7.19190188690412e-05, + "loss": 2.5503, + "step": 11854 + }, + { + "epoch": 0.95674279719151, + "grad_norm": 0.6514382362365723, + "learning_rate": 7.190386753182152e-05, + "loss": 2.4902, + "step": 11855 + }, + { + "epoch": 0.956823500928093, + "grad_norm": 0.6867108345031738, + "learning_rate": 7.188871689483087e-05, + "loss": 2.5054, + "step": 11856 + }, + { + "epoch": 0.956904204664676, + "grad_norm": 0.6536040306091309, + "learning_rate": 7.187356695844687e-05, + "loss": 2.5462, + "step": 11857 + }, + { + "epoch": 0.9569849084012589, + "grad_norm": 0.690237820148468, + "learning_rate": 7.185841772304711e-05, + "loss": 2.5673, + "step": 11858 + }, + { + "epoch": 0.957065612137842, + "grad_norm": 0.6699091196060181, + "learning_rate": 7.184326918900915e-05, + "loss": 2.4733, + "step": 11859 + }, + { + "epoch": 0.957146315874425, + "grad_norm": 0.6482241153717041, + "learning_rate": 7.18281213567105e-05, + "loss": 2.4897, + "step": 11860 + }, + { + "epoch": 0.957227019611008, + "grad_norm": 0.686130166053772, + "learning_rate": 7.181297422652874e-05, + "loss": 2.4596, + "step": 11861 + }, + { + "epoch": 0.957307723347591, + "grad_norm": 0.6507205367088318, + "learning_rate": 7.179782779884132e-05, + "loss": 2.5527, + "step": 11862 + }, + { + "epoch": 0.957388427084174, + "grad_norm": 0.6578813195228577, + "learning_rate": 7.178268207402577e-05, + "loss": 2.4975, + "step": 11863 + }, + { + "epoch": 0.957469130820757, + "grad_norm": 0.6931977272033691, + "learning_rate": 7.176753705245956e-05, + "loss": 2.5533, + "step": 11864 + }, + { + "epoch": 0.95754983455734, + "grad_norm": 0.7306256890296936, + "learning_rate": 7.17523927345201e-05, + "loss": 2.534, + "step": 11865 + }, + { + "epoch": 0.957630538293923, + "grad_norm": 0.6337448358535767, + "learning_rate": 7.173724912058483e-05, + "loss": 2.5015, + "step": 11866 + }, + { + "epoch": 0.9577112420305061, + "grad_norm": 0.6561456322669983, + "learning_rate": 7.172210621103124e-05, + "loss": 2.4946, + "step": 11867 + }, + { + "epoch": 0.957791945767089, + "grad_norm": 0.6341130137443542, + "learning_rate": 7.170696400623666e-05, + "loss": 2.5611, + "step": 11868 + }, + { + "epoch": 0.957872649503672, + "grad_norm": 0.7202804088592529, + "learning_rate": 7.169182250657849e-05, + "loss": 2.5209, + "step": 11869 + }, + { + "epoch": 0.957953353240255, + "grad_norm": 0.6620556712150574, + "learning_rate": 7.167668171243408e-05, + "loss": 2.4895, + "step": 11870 + }, + { + "epoch": 0.9580340569768381, + "grad_norm": 0.6842508912086487, + "learning_rate": 7.166154162418087e-05, + "loss": 2.4417, + "step": 11871 + }, + { + "epoch": 0.958114760713421, + "grad_norm": 0.7539907693862915, + "learning_rate": 7.164640224219608e-05, + "loss": 2.5153, + "step": 11872 + }, + { + "epoch": 0.958195464450004, + "grad_norm": 0.6524286270141602, + "learning_rate": 7.163126356685703e-05, + "loss": 2.509, + "step": 11873 + }, + { + "epoch": 0.958276168186587, + "grad_norm": 0.7022691965103149, + "learning_rate": 7.16161255985411e-05, + "loss": 2.5223, + "step": 11874 + }, + { + "epoch": 0.9583568719231701, + "grad_norm": 0.6659076809883118, + "learning_rate": 7.160098833762549e-05, + "loss": 2.5231, + "step": 11875 + }, + { + "epoch": 0.9584375756597531, + "grad_norm": 0.6756494641304016, + "learning_rate": 7.15858517844875e-05, + "loss": 2.5017, + "step": 11876 + }, + { + "epoch": 0.958518279396336, + "grad_norm": 0.729850709438324, + "learning_rate": 7.157071593950436e-05, + "loss": 2.4583, + "step": 11877 + }, + { + "epoch": 0.958598983132919, + "grad_norm": 0.7155230641365051, + "learning_rate": 7.155558080305326e-05, + "loss": 2.4753, + "step": 11878 + }, + { + "epoch": 0.9586796868695021, + "grad_norm": 0.6553284525871277, + "learning_rate": 7.154044637551147e-05, + "loss": 2.5093, + "step": 11879 + }, + { + "epoch": 0.9587603906060851, + "grad_norm": 0.6516379117965698, + "learning_rate": 7.152531265725617e-05, + "loss": 2.4996, + "step": 11880 + }, + { + "epoch": 0.9588410943426681, + "grad_norm": 0.6871184706687927, + "learning_rate": 7.151017964866449e-05, + "loss": 2.5322, + "step": 11881 + }, + { + "epoch": 0.958921798079251, + "grad_norm": 0.6998933553695679, + "learning_rate": 7.149504735011358e-05, + "loss": 2.5328, + "step": 11882 + }, + { + "epoch": 0.959002501815834, + "grad_norm": 0.7065120935440063, + "learning_rate": 7.147991576198065e-05, + "loss": 2.5251, + "step": 11883 + }, + { + "epoch": 0.9590832055524171, + "grad_norm": 0.6718337535858154, + "learning_rate": 7.146478488464275e-05, + "loss": 2.5596, + "step": 11884 + }, + { + "epoch": 0.9591639092890001, + "grad_norm": 0.6394883990287781, + "learning_rate": 7.144965471847698e-05, + "loss": 2.5022, + "step": 11885 + }, + { + "epoch": 0.9592446130255831, + "grad_norm": 0.6867207288742065, + "learning_rate": 7.143452526386045e-05, + "loss": 2.4927, + "step": 11886 + }, + { + "epoch": 0.959325316762166, + "grad_norm": 0.6710157990455627, + "learning_rate": 7.141939652117026e-05, + "loss": 2.5127, + "step": 11887 + }, + { + "epoch": 0.9594060204987491, + "grad_norm": 0.6286540627479553, + "learning_rate": 7.14042684907834e-05, + "loss": 2.4966, + "step": 11888 + }, + { + "epoch": 0.9594867242353321, + "grad_norm": 0.7295787334442139, + "learning_rate": 7.13891411730769e-05, + "loss": 2.5127, + "step": 11889 + }, + { + "epoch": 0.9595674279719151, + "grad_norm": 0.646084189414978, + "learning_rate": 7.137401456842784e-05, + "loss": 2.5575, + "step": 11890 + }, + { + "epoch": 0.959648131708498, + "grad_norm": 0.7884495258331299, + "learning_rate": 7.135888867721312e-05, + "loss": 2.4807, + "step": 11891 + }, + { + "epoch": 0.9597288354450811, + "grad_norm": 0.638469934463501, + "learning_rate": 7.134376349980977e-05, + "loss": 2.4989, + "step": 11892 + }, + { + "epoch": 0.9598095391816641, + "grad_norm": 0.6802849769592285, + "learning_rate": 7.132863903659476e-05, + "loss": 2.5139, + "step": 11893 + }, + { + "epoch": 0.9598902429182471, + "grad_norm": 0.6657521724700928, + "learning_rate": 7.131351528794499e-05, + "loss": 2.4488, + "step": 11894 + }, + { + "epoch": 0.9599709466548301, + "grad_norm": 0.6537562012672424, + "learning_rate": 7.129839225423741e-05, + "loss": 2.4664, + "step": 11895 + }, + { + "epoch": 0.9600516503914132, + "grad_norm": 0.689637303352356, + "learning_rate": 7.128326993584897e-05, + "loss": 2.582, + "step": 11896 + }, + { + "epoch": 0.9601323541279961, + "grad_norm": 0.6701640486717224, + "learning_rate": 7.126814833315646e-05, + "loss": 2.4963, + "step": 11897 + }, + { + "epoch": 0.9602130578645791, + "grad_norm": 0.7466658353805542, + "learning_rate": 7.125302744653677e-05, + "loss": 2.5015, + "step": 11898 + }, + { + "epoch": 0.9602937616011621, + "grad_norm": 0.6487225294113159, + "learning_rate": 7.123790727636685e-05, + "loss": 2.5393, + "step": 11899 + }, + { + "epoch": 0.9603744653377452, + "grad_norm": 0.7204654216766357, + "learning_rate": 7.122278782302343e-05, + "loss": 2.4668, + "step": 11900 + }, + { + "epoch": 0.9604551690743282, + "grad_norm": 0.6852861046791077, + "learning_rate": 7.120766908688336e-05, + "loss": 2.5893, + "step": 11901 + }, + { + "epoch": 0.9605358728109111, + "grad_norm": 0.6483901739120483, + "learning_rate": 7.119255106832344e-05, + "loss": 2.48, + "step": 11902 + }, + { + "epoch": 0.9606165765474941, + "grad_norm": 0.6670375466346741, + "learning_rate": 7.117743376772049e-05, + "loss": 2.5225, + "step": 11903 + }, + { + "epoch": 0.9606972802840772, + "grad_norm": 0.6805974841117859, + "learning_rate": 7.116231718545118e-05, + "loss": 2.4652, + "step": 11904 + }, + { + "epoch": 0.9607779840206602, + "grad_norm": 0.6700397729873657, + "learning_rate": 7.114720132189232e-05, + "loss": 2.5115, + "step": 11905 + }, + { + "epoch": 0.9608586877572431, + "grad_norm": 0.7167409062385559, + "learning_rate": 7.113208617742066e-05, + "loss": 2.5062, + "step": 11906 + }, + { + "epoch": 0.9609393914938261, + "grad_norm": 0.7337077856063843, + "learning_rate": 7.111697175241286e-05, + "loss": 2.5768, + "step": 11907 + }, + { + "epoch": 0.9610200952304092, + "grad_norm": 0.6681819558143616, + "learning_rate": 7.110185804724558e-05, + "loss": 2.5058, + "step": 11908 + }, + { + "epoch": 0.9611007989669922, + "grad_norm": 0.7235603332519531, + "learning_rate": 7.10867450622956e-05, + "loss": 2.4606, + "step": 11909 + }, + { + "epoch": 0.9611815027035752, + "grad_norm": 0.6931360363960266, + "learning_rate": 7.107163279793947e-05, + "loss": 2.5129, + "step": 11910 + }, + { + "epoch": 0.9612622064401581, + "grad_norm": 0.7331648468971252, + "learning_rate": 7.105652125455388e-05, + "loss": 2.4916, + "step": 11911 + }, + { + "epoch": 0.9613429101767412, + "grad_norm": 0.6538143754005432, + "learning_rate": 7.104141043251545e-05, + "loss": 2.5184, + "step": 11912 + }, + { + "epoch": 0.9614236139133242, + "grad_norm": 0.7018921375274658, + "learning_rate": 7.102630033220077e-05, + "loss": 2.5446, + "step": 11913 + }, + { + "epoch": 0.9615043176499072, + "grad_norm": 0.7528507709503174, + "learning_rate": 7.10111909539864e-05, + "loss": 2.4404, + "step": 11914 + }, + { + "epoch": 0.9615850213864902, + "grad_norm": 0.7258831858634949, + "learning_rate": 7.099608229824894e-05, + "loss": 2.4758, + "step": 11915 + }, + { + "epoch": 0.9616657251230732, + "grad_norm": 0.6954349875450134, + "learning_rate": 7.098097436536498e-05, + "loss": 2.4894, + "step": 11916 + }, + { + "epoch": 0.9617464288596562, + "grad_norm": 0.691584050655365, + "learning_rate": 7.096586715571092e-05, + "loss": 2.544, + "step": 11917 + }, + { + "epoch": 0.9618271325962392, + "grad_norm": 0.7107009291648865, + "learning_rate": 7.095076066966337e-05, + "loss": 2.4994, + "step": 11918 + }, + { + "epoch": 0.9619078363328222, + "grad_norm": 0.6492058634757996, + "learning_rate": 7.093565490759881e-05, + "loss": 2.5751, + "step": 11919 + }, + { + "epoch": 0.9619885400694053, + "grad_norm": 0.6817753314971924, + "learning_rate": 7.092054986989371e-05, + "loss": 2.5129, + "step": 11920 + }, + { + "epoch": 0.9620692438059882, + "grad_norm": 0.6991822123527527, + "learning_rate": 7.090544555692448e-05, + "loss": 2.5728, + "step": 11921 + }, + { + "epoch": 0.9621499475425712, + "grad_norm": 0.6627625226974487, + "learning_rate": 7.089034196906768e-05, + "loss": 2.4479, + "step": 11922 + }, + { + "epoch": 0.9622306512791542, + "grad_norm": 0.6889652013778687, + "learning_rate": 7.087523910669957e-05, + "loss": 2.5323, + "step": 11923 + }, + { + "epoch": 0.9623113550157373, + "grad_norm": 0.7863786816596985, + "learning_rate": 7.086013697019667e-05, + "loss": 2.5146, + "step": 11924 + }, + { + "epoch": 0.9623920587523203, + "grad_norm": 0.6885324716567993, + "learning_rate": 7.084503555993536e-05, + "loss": 2.5072, + "step": 11925 + }, + { + "epoch": 0.9624727624889032, + "grad_norm": 0.619239091873169, + "learning_rate": 7.082993487629192e-05, + "loss": 2.4622, + "step": 11926 + }, + { + "epoch": 0.9625534662254862, + "grad_norm": 0.6762447953224182, + "learning_rate": 7.081483491964278e-05, + "loss": 2.5155, + "step": 11927 + }, + { + "epoch": 0.9626341699620693, + "grad_norm": 0.6559715867042542, + "learning_rate": 7.079973569036424e-05, + "loss": 2.4729, + "step": 11928 + }, + { + "epoch": 0.9627148736986523, + "grad_norm": 0.633280873298645, + "learning_rate": 7.078463718883261e-05, + "loss": 2.4715, + "step": 11929 + }, + { + "epoch": 0.9627955774352353, + "grad_norm": 0.7740094065666199, + "learning_rate": 7.07695394154242e-05, + "loss": 2.4871, + "step": 11930 + }, + { + "epoch": 0.9628762811718182, + "grad_norm": 0.7103284597396851, + "learning_rate": 7.075444237051527e-05, + "loss": 2.5299, + "step": 11931 + }, + { + "epoch": 0.9629569849084013, + "grad_norm": 0.6800934076309204, + "learning_rate": 7.073934605448212e-05, + "loss": 2.5919, + "step": 11932 + }, + { + "epoch": 0.9630376886449843, + "grad_norm": 0.6680917143821716, + "learning_rate": 7.072425046770092e-05, + "loss": 2.4942, + "step": 11933 + }, + { + "epoch": 0.9631183923815673, + "grad_norm": 0.7248062491416931, + "learning_rate": 7.070915561054792e-05, + "loss": 2.4956, + "step": 11934 + }, + { + "epoch": 0.9631990961181502, + "grad_norm": 0.6635782122612, + "learning_rate": 7.069406148339936e-05, + "loss": 2.4658, + "step": 11935 + }, + { + "epoch": 0.9632797998547332, + "grad_norm": 0.6751061081886292, + "learning_rate": 7.067896808663137e-05, + "loss": 2.4912, + "step": 11936 + }, + { + "epoch": 0.9633605035913163, + "grad_norm": 0.7476027607917786, + "learning_rate": 7.066387542062013e-05, + "loss": 2.4858, + "step": 11937 + }, + { + "epoch": 0.9634412073278993, + "grad_norm": 0.6770931482315063, + "learning_rate": 7.064878348574183e-05, + "loss": 2.4574, + "step": 11938 + }, + { + "epoch": 0.9635219110644823, + "grad_norm": 0.7105392813682556, + "learning_rate": 7.063369228237255e-05, + "loss": 2.5523, + "step": 11939 + }, + { + "epoch": 0.9636026148010652, + "grad_norm": 0.6806207299232483, + "learning_rate": 7.061860181088842e-05, + "loss": 2.4992, + "step": 11940 + }, + { + "epoch": 0.9636833185376483, + "grad_norm": 0.7059600353240967, + "learning_rate": 7.060351207166558e-05, + "loss": 2.5778, + "step": 11941 + }, + { + "epoch": 0.9637640222742313, + "grad_norm": 0.6306884288787842, + "learning_rate": 7.058842306508002e-05, + "loss": 2.5389, + "step": 11942 + }, + { + "epoch": 0.9638447260108143, + "grad_norm": 0.6997150778770447, + "learning_rate": 7.057333479150783e-05, + "loss": 2.5077, + "step": 11943 + }, + { + "epoch": 0.9639254297473973, + "grad_norm": 0.7073743343353271, + "learning_rate": 7.05582472513251e-05, + "loss": 2.5274, + "step": 11944 + }, + { + "epoch": 0.9640061334839803, + "grad_norm": 0.6768803596496582, + "learning_rate": 7.054316044490777e-05, + "loss": 2.5155, + "step": 11945 + }, + { + "epoch": 0.9640868372205633, + "grad_norm": 0.6792057752609253, + "learning_rate": 7.052807437263189e-05, + "loss": 2.5509, + "step": 11946 + }, + { + "epoch": 0.9641675409571463, + "grad_norm": 0.6883981823921204, + "learning_rate": 7.051298903487344e-05, + "loss": 2.5176, + "step": 11947 + }, + { + "epoch": 0.9642482446937293, + "grad_norm": 0.6934401392936707, + "learning_rate": 7.049790443200844e-05, + "loss": 2.502, + "step": 11948 + }, + { + "epoch": 0.9643289484303124, + "grad_norm": 0.6882597804069519, + "learning_rate": 7.048282056441269e-05, + "loss": 2.487, + "step": 11949 + }, + { + "epoch": 0.9644096521668953, + "grad_norm": 0.6972896456718445, + "learning_rate": 7.046773743246225e-05, + "loss": 2.5304, + "step": 11950 + }, + { + "epoch": 0.9644903559034783, + "grad_norm": 0.6591988205909729, + "learning_rate": 7.045265503653303e-05, + "loss": 2.4734, + "step": 11951 + }, + { + "epoch": 0.9645710596400613, + "grad_norm": 0.6890063285827637, + "learning_rate": 7.043757337700082e-05, + "loss": 2.5289, + "step": 11952 + }, + { + "epoch": 0.9646517633766444, + "grad_norm": 0.6931065917015076, + "learning_rate": 7.042249245424157e-05, + "loss": 2.484, + "step": 11953 + }, + { + "epoch": 0.9647324671132274, + "grad_norm": 0.6943762898445129, + "learning_rate": 7.040741226863117e-05, + "loss": 2.501, + "step": 11954 + }, + { + "epoch": 0.9648131708498103, + "grad_norm": 0.677154004573822, + "learning_rate": 7.039233282054536e-05, + "loss": 2.4976, + "step": 11955 + }, + { + "epoch": 0.9648938745863933, + "grad_norm": 0.6662883758544922, + "learning_rate": 7.037725411036003e-05, + "loss": 2.4928, + "step": 11956 + }, + { + "epoch": 0.9649745783229764, + "grad_norm": 0.6854663491249084, + "learning_rate": 7.0362176138451e-05, + "loss": 2.4657, + "step": 11957 + }, + { + "epoch": 0.9650552820595594, + "grad_norm": 0.6703238487243652, + "learning_rate": 7.034709890519397e-05, + "loss": 2.4879, + "step": 11958 + }, + { + "epoch": 0.9651359857961423, + "grad_norm": 0.7023652791976929, + "learning_rate": 7.033202241096474e-05, + "loss": 2.4619, + "step": 11959 + }, + { + "epoch": 0.9652166895327253, + "grad_norm": 0.6950454711914062, + "learning_rate": 7.031694665613911e-05, + "loss": 2.5125, + "step": 11960 + }, + { + "epoch": 0.9652973932693084, + "grad_norm": 0.6740411520004272, + "learning_rate": 7.030187164109272e-05, + "loss": 2.436, + "step": 11961 + }, + { + "epoch": 0.9653780970058914, + "grad_norm": 0.6697152256965637, + "learning_rate": 7.028679736620132e-05, + "loss": 2.5513, + "step": 11962 + }, + { + "epoch": 0.9654588007424744, + "grad_norm": 0.6920599937438965, + "learning_rate": 7.027172383184061e-05, + "loss": 2.5264, + "step": 11963 + }, + { + "epoch": 0.9655395044790573, + "grad_norm": 0.6493465304374695, + "learning_rate": 7.025665103838627e-05, + "loss": 2.4834, + "step": 11964 + }, + { + "epoch": 0.9656202082156404, + "grad_norm": 0.684092104434967, + "learning_rate": 7.02415789862139e-05, + "loss": 2.4662, + "step": 11965 + }, + { + "epoch": 0.9657009119522234, + "grad_norm": 0.7161515355110168, + "learning_rate": 7.022650767569921e-05, + "loss": 2.4648, + "step": 11966 + }, + { + "epoch": 0.9657816156888064, + "grad_norm": 0.6994524002075195, + "learning_rate": 7.021143710721778e-05, + "loss": 2.5186, + "step": 11967 + }, + { + "epoch": 0.9658623194253894, + "grad_norm": 0.7105295062065125, + "learning_rate": 7.019636728114518e-05, + "loss": 2.5132, + "step": 11968 + }, + { + "epoch": 0.9659430231619724, + "grad_norm": 0.7182292938232422, + "learning_rate": 7.018129819785702e-05, + "loss": 2.5469, + "step": 11969 + }, + { + "epoch": 0.9660237268985554, + "grad_norm": 0.7021759152412415, + "learning_rate": 7.016622985772887e-05, + "loss": 2.5477, + "step": 11970 + }, + { + "epoch": 0.9661044306351384, + "grad_norm": 0.6751413941383362, + "learning_rate": 7.015116226113624e-05, + "loss": 2.5174, + "step": 11971 + }, + { + "epoch": 0.9661851343717214, + "grad_norm": 0.6341918110847473, + "learning_rate": 7.013609540845468e-05, + "loss": 2.4778, + "step": 11972 + }, + { + "epoch": 0.9662658381083045, + "grad_norm": 0.7080956697463989, + "learning_rate": 7.012102930005971e-05, + "loss": 2.5304, + "step": 11973 + }, + { + "epoch": 0.9663465418448874, + "grad_norm": 0.6367003321647644, + "learning_rate": 7.010596393632674e-05, + "loss": 2.4857, + "step": 11974 + }, + { + "epoch": 0.9664272455814704, + "grad_norm": 0.6841328740119934, + "learning_rate": 7.009089931763131e-05, + "loss": 2.5365, + "step": 11975 + }, + { + "epoch": 0.9665079493180534, + "grad_norm": 0.6568236351013184, + "learning_rate": 7.00758354443489e-05, + "loss": 2.5286, + "step": 11976 + }, + { + "epoch": 0.9665886530546365, + "grad_norm": 0.7071812152862549, + "learning_rate": 7.006077231685485e-05, + "loss": 2.458, + "step": 11977 + }, + { + "epoch": 0.9666693567912195, + "grad_norm": 0.6997712850570679, + "learning_rate": 7.004570993552462e-05, + "loss": 2.4571, + "step": 11978 + }, + { + "epoch": 0.9667500605278024, + "grad_norm": 0.6920793056488037, + "learning_rate": 7.003064830073359e-05, + "loss": 2.4172, + "step": 11979 + }, + { + "epoch": 0.9668307642643854, + "grad_norm": 0.6823387742042542, + "learning_rate": 7.001558741285718e-05, + "loss": 2.4895, + "step": 11980 + }, + { + "epoch": 0.9669114680009685, + "grad_norm": 0.7309569716453552, + "learning_rate": 7.000052727227068e-05, + "loss": 2.502, + "step": 11981 + }, + { + "epoch": 0.9669921717375515, + "grad_norm": 0.734708845615387, + "learning_rate": 6.998546787934946e-05, + "loss": 2.4918, + "step": 11982 + }, + { + "epoch": 0.9670728754741345, + "grad_norm": 0.690406084060669, + "learning_rate": 6.997040923446889e-05, + "loss": 2.4994, + "step": 11983 + }, + { + "epoch": 0.9671535792107174, + "grad_norm": 0.7126687169075012, + "learning_rate": 6.995535133800416e-05, + "loss": 2.4824, + "step": 11984 + }, + { + "epoch": 0.9672342829473004, + "grad_norm": 0.7020599246025085, + "learning_rate": 6.994029419033062e-05, + "loss": 2.4889, + "step": 11985 + }, + { + "epoch": 0.9673149866838835, + "grad_norm": 0.7690796852111816, + "learning_rate": 6.992523779182356e-05, + "loss": 2.4997, + "step": 11986 + }, + { + "epoch": 0.9673956904204665, + "grad_norm": 0.6635778546333313, + "learning_rate": 6.991018214285816e-05, + "loss": 2.4989, + "step": 11987 + }, + { + "epoch": 0.9674763941570494, + "grad_norm": 0.7088577747344971, + "learning_rate": 6.989512724380967e-05, + "loss": 2.549, + "step": 11988 + }, + { + "epoch": 0.9675570978936324, + "grad_norm": 0.6420924663543701, + "learning_rate": 6.988007309505333e-05, + "loss": 2.4585, + "step": 11989 + }, + { + "epoch": 0.9676378016302155, + "grad_norm": 0.7902400493621826, + "learning_rate": 6.986501969696428e-05, + "loss": 2.5009, + "step": 11990 + }, + { + "epoch": 0.9677185053667985, + "grad_norm": 0.700907289981842, + "learning_rate": 6.984996704991773e-05, + "loss": 2.4778, + "step": 11991 + }, + { + "epoch": 0.9677992091033815, + "grad_norm": 0.664378821849823, + "learning_rate": 6.983491515428883e-05, + "loss": 2.5116, + "step": 11992 + }, + { + "epoch": 0.9678799128399644, + "grad_norm": 0.6314663887023926, + "learning_rate": 6.981986401045266e-05, + "loss": 2.4588, + "step": 11993 + }, + { + "epoch": 0.9679606165765475, + "grad_norm": 0.6521078944206238, + "learning_rate": 6.980481361878438e-05, + "loss": 2.5224, + "step": 11994 + }, + { + "epoch": 0.9680413203131305, + "grad_norm": 0.6336014270782471, + "learning_rate": 6.978976397965907e-05, + "loss": 2.4297, + "step": 11995 + }, + { + "epoch": 0.9681220240497135, + "grad_norm": 0.7321500778198242, + "learning_rate": 6.977471509345183e-05, + "loss": 2.5252, + "step": 11996 + }, + { + "epoch": 0.9682027277862965, + "grad_norm": 0.686950147151947, + "learning_rate": 6.97596669605377e-05, + "loss": 2.5188, + "step": 11997 + }, + { + "epoch": 0.9682834315228795, + "grad_norm": 0.729343056678772, + "learning_rate": 6.97446195812917e-05, + "loss": 2.5157, + "step": 11998 + }, + { + "epoch": 0.9683641352594625, + "grad_norm": 0.6447068452835083, + "learning_rate": 6.972957295608889e-05, + "loss": 2.5041, + "step": 11999 + }, + { + "epoch": 0.9684448389960455, + "grad_norm": 0.6847280859947205, + "learning_rate": 6.971452708530423e-05, + "loss": 2.443, + "step": 12000 + }, + { + "epoch": 0.9684448389960455, + "eval_loss": 2.431878089904785, + "eval_runtime": 758.167, + "eval_samples_per_second": 3.456, + "eval_steps_per_second": 0.576, + "step": 12000 + }, + { + "epoch": 0.9685255427326285, + "grad_norm": 0.6440466046333313, + "learning_rate": 6.969948196931272e-05, + "loss": 2.5091, + "step": 12001 + }, + { + "epoch": 0.9686062464692116, + "grad_norm": 0.6570029258728027, + "learning_rate": 6.968443760848937e-05, + "loss": 2.491, + "step": 12002 + }, + { + "epoch": 0.9686869502057945, + "grad_norm": 0.7610877752304077, + "learning_rate": 6.966939400320905e-05, + "loss": 2.4713, + "step": 12003 + }, + { + "epoch": 0.9687676539423775, + "grad_norm": 0.7187781929969788, + "learning_rate": 6.965435115384669e-05, + "loss": 2.4303, + "step": 12004 + }, + { + "epoch": 0.9688483576789605, + "grad_norm": 0.7668420672416687, + "learning_rate": 6.963930906077727e-05, + "loss": 2.5513, + "step": 12005 + }, + { + "epoch": 0.9689290614155436, + "grad_norm": 0.7025619745254517, + "learning_rate": 6.96242677243756e-05, + "loss": 2.4349, + "step": 12006 + }, + { + "epoch": 0.9690097651521266, + "grad_norm": 0.7066935896873474, + "learning_rate": 6.960922714501657e-05, + "loss": 2.5465, + "step": 12007 + }, + { + "epoch": 0.9690904688887095, + "grad_norm": 0.6758970618247986, + "learning_rate": 6.95941873230751e-05, + "loss": 2.4827, + "step": 12008 + }, + { + "epoch": 0.9691711726252925, + "grad_norm": 0.7108862996101379, + "learning_rate": 6.957914825892591e-05, + "loss": 2.5412, + "step": 12009 + }, + { + "epoch": 0.9692518763618756, + "grad_norm": 0.660784125328064, + "learning_rate": 6.956410995294389e-05, + "loss": 2.5173, + "step": 12010 + }, + { + "epoch": 0.9693325800984586, + "grad_norm": 0.6966561079025269, + "learning_rate": 6.954907240550377e-05, + "loss": 2.5196, + "step": 12011 + }, + { + "epoch": 0.9694132838350416, + "grad_norm": 0.6889416575431824, + "learning_rate": 6.953403561698042e-05, + "loss": 2.5351, + "step": 12012 + }, + { + "epoch": 0.9694939875716245, + "grad_norm": 0.7578341960906982, + "learning_rate": 6.951899958774852e-05, + "loss": 2.5184, + "step": 12013 + }, + { + "epoch": 0.9695746913082076, + "grad_norm": 0.6735317707061768, + "learning_rate": 6.950396431818282e-05, + "loss": 2.4592, + "step": 12014 + }, + { + "epoch": 0.9696553950447906, + "grad_norm": 0.6903232932090759, + "learning_rate": 6.948892980865806e-05, + "loss": 2.5212, + "step": 12015 + }, + { + "epoch": 0.9697360987813736, + "grad_norm": 0.6477165818214417, + "learning_rate": 6.94738960595489e-05, + "loss": 2.4423, + "step": 12016 + }, + { + "epoch": 0.9698168025179565, + "grad_norm": 0.6778751015663147, + "learning_rate": 6.945886307123007e-05, + "loss": 2.547, + "step": 12017 + }, + { + "epoch": 0.9698975062545396, + "grad_norm": 0.690558135509491, + "learning_rate": 6.944383084407623e-05, + "loss": 2.5081, + "step": 12018 + }, + { + "epoch": 0.9699782099911226, + "grad_norm": 0.7210639119148254, + "learning_rate": 6.942879937846196e-05, + "loss": 2.496, + "step": 12019 + }, + { + "epoch": 0.9700589137277056, + "grad_norm": 0.7182444930076599, + "learning_rate": 6.941376867476194e-05, + "loss": 2.6138, + "step": 12020 + }, + { + "epoch": 0.9701396174642886, + "grad_norm": 0.6929295063018799, + "learning_rate": 6.939873873335077e-05, + "loss": 2.4828, + "step": 12021 + }, + { + "epoch": 0.9702203212008716, + "grad_norm": 0.6919693350791931, + "learning_rate": 6.938370955460298e-05, + "loss": 2.5123, + "step": 12022 + }, + { + "epoch": 0.9703010249374546, + "grad_norm": 0.6475244164466858, + "learning_rate": 6.93686811388932e-05, + "loss": 2.4992, + "step": 12023 + }, + { + "epoch": 0.9703817286740376, + "grad_norm": 0.6728265881538391, + "learning_rate": 6.935365348659597e-05, + "loss": 2.4486, + "step": 12024 + }, + { + "epoch": 0.9704624324106206, + "grad_norm": 0.6791470646858215, + "learning_rate": 6.933862659808582e-05, + "loss": 2.4657, + "step": 12025 + }, + { + "epoch": 0.9705431361472037, + "grad_norm": 0.7611662745475769, + "learning_rate": 6.932360047373721e-05, + "loss": 2.5243, + "step": 12026 + }, + { + "epoch": 0.9706238398837866, + "grad_norm": 0.6642355918884277, + "learning_rate": 6.930857511392467e-05, + "loss": 2.5308, + "step": 12027 + }, + { + "epoch": 0.9707045436203696, + "grad_norm": 0.7270805239677429, + "learning_rate": 6.92935505190227e-05, + "loss": 2.4708, + "step": 12028 + }, + { + "epoch": 0.9707852473569526, + "grad_norm": 0.6706295013427734, + "learning_rate": 6.927852668940568e-05, + "loss": 2.5136, + "step": 12029 + }, + { + "epoch": 0.9708659510935357, + "grad_norm": 0.6923376321792603, + "learning_rate": 6.92635036254481e-05, + "loss": 2.5238, + "step": 12030 + }, + { + "epoch": 0.9709466548301187, + "grad_norm": 0.7154483199119568, + "learning_rate": 6.924848132752436e-05, + "loss": 2.488, + "step": 12031 + }, + { + "epoch": 0.9710273585667016, + "grad_norm": 0.6675701141357422, + "learning_rate": 6.923345979600884e-05, + "loss": 2.5066, + "step": 12032 + }, + { + "epoch": 0.9711080623032846, + "grad_norm": 0.7282043695449829, + "learning_rate": 6.921843903127592e-05, + "loss": 2.5096, + "step": 12033 + }, + { + "epoch": 0.9711887660398677, + "grad_norm": 0.663526177406311, + "learning_rate": 6.92034190337e-05, + "loss": 2.5276, + "step": 12034 + }, + { + "epoch": 0.9712694697764507, + "grad_norm": 0.7491087913513184, + "learning_rate": 6.918839980365534e-05, + "loss": 2.5044, + "step": 12035 + }, + { + "epoch": 0.9713501735130337, + "grad_norm": 0.6977766156196594, + "learning_rate": 6.917338134151629e-05, + "loss": 2.6102, + "step": 12036 + }, + { + "epoch": 0.9714308772496166, + "grad_norm": 0.6447446346282959, + "learning_rate": 6.915836364765722e-05, + "loss": 2.5137, + "step": 12037 + }, + { + "epoch": 0.9715115809861996, + "grad_norm": 0.6801442503929138, + "learning_rate": 6.91433467224523e-05, + "loss": 2.5145, + "step": 12038 + }, + { + "epoch": 0.9715922847227827, + "grad_norm": 0.6843627691268921, + "learning_rate": 6.912833056627583e-05, + "loss": 2.6099, + "step": 12039 + }, + { + "epoch": 0.9716729884593657, + "grad_norm": 0.6862856149673462, + "learning_rate": 6.911331517950209e-05, + "loss": 2.5358, + "step": 12040 + }, + { + "epoch": 0.9717536921959486, + "grad_norm": 0.6835047602653503, + "learning_rate": 6.909830056250527e-05, + "loss": 2.5257, + "step": 12041 + }, + { + "epoch": 0.9718343959325316, + "grad_norm": 0.6958080530166626, + "learning_rate": 6.908328671565956e-05, + "loss": 2.5008, + "step": 12042 + }, + { + "epoch": 0.9719150996691147, + "grad_norm": 0.7556219100952148, + "learning_rate": 6.906827363933917e-05, + "loss": 2.5283, + "step": 12043 + }, + { + "epoch": 0.9719958034056977, + "grad_norm": 0.7074917554855347, + "learning_rate": 6.90532613339183e-05, + "loss": 2.4898, + "step": 12044 + }, + { + "epoch": 0.9720765071422807, + "grad_norm": 0.6456350684165955, + "learning_rate": 6.903824979977101e-05, + "loss": 2.4989, + "step": 12045 + }, + { + "epoch": 0.9721572108788636, + "grad_norm": 0.6609941720962524, + "learning_rate": 6.902323903727146e-05, + "loss": 2.4883, + "step": 12046 + }, + { + "epoch": 0.9722379146154467, + "grad_norm": 0.7132936716079712, + "learning_rate": 6.90082290467938e-05, + "loss": 2.4983, + "step": 12047 + }, + { + "epoch": 0.9723186183520297, + "grad_norm": 0.6686434745788574, + "learning_rate": 6.899321982871206e-05, + "loss": 2.4862, + "step": 12048 + }, + { + "epoch": 0.9723993220886127, + "grad_norm": 0.6792194247245789, + "learning_rate": 6.897821138340033e-05, + "loss": 2.5368, + "step": 12049 + }, + { + "epoch": 0.9724800258251957, + "grad_norm": 0.6829379796981812, + "learning_rate": 6.896320371123268e-05, + "loss": 2.4842, + "step": 12050 + }, + { + "epoch": 0.9725607295617787, + "grad_norm": 0.7459573745727539, + "learning_rate": 6.894819681258312e-05, + "loss": 2.5023, + "step": 12051 + }, + { + "epoch": 0.9726414332983617, + "grad_norm": 0.6700068712234497, + "learning_rate": 6.893319068782566e-05, + "loss": 2.552, + "step": 12052 + }, + { + "epoch": 0.9727221370349447, + "grad_norm": 0.7093638777732849, + "learning_rate": 6.891818533733434e-05, + "loss": 2.445, + "step": 12053 + }, + { + "epoch": 0.9728028407715277, + "grad_norm": 0.703599214553833, + "learning_rate": 6.890318076148304e-05, + "loss": 2.5536, + "step": 12054 + }, + { + "epoch": 0.9728835445081108, + "grad_norm": 0.6214482188224792, + "learning_rate": 6.888817696064578e-05, + "loss": 2.5188, + "step": 12055 + }, + { + "epoch": 0.9729642482446937, + "grad_norm": 0.6893547773361206, + "learning_rate": 6.887317393519645e-05, + "loss": 2.5596, + "step": 12056 + }, + { + "epoch": 0.9730449519812767, + "grad_norm": 0.6282656788825989, + "learning_rate": 6.885817168550903e-05, + "loss": 2.4873, + "step": 12057 + }, + { + "epoch": 0.9731256557178597, + "grad_norm": 0.6979188323020935, + "learning_rate": 6.884317021195737e-05, + "loss": 2.5358, + "step": 12058 + }, + { + "epoch": 0.9732063594544428, + "grad_norm": 0.7925785183906555, + "learning_rate": 6.882816951491533e-05, + "loss": 2.5358, + "step": 12059 + }, + { + "epoch": 0.9732870631910258, + "grad_norm": 0.6449821591377258, + "learning_rate": 6.881316959475684e-05, + "loss": 2.4784, + "step": 12060 + }, + { + "epoch": 0.9733677669276087, + "grad_norm": 0.7013393044471741, + "learning_rate": 6.879817045185565e-05, + "loss": 2.4804, + "step": 12061 + }, + { + "epoch": 0.9734484706641917, + "grad_norm": 0.8338057398796082, + "learning_rate": 6.878317208658559e-05, + "loss": 2.512, + "step": 12062 + }, + { + "epoch": 0.9735291744007748, + "grad_norm": 0.6815133094787598, + "learning_rate": 6.876817449932054e-05, + "loss": 2.467, + "step": 12063 + }, + { + "epoch": 0.9736098781373578, + "grad_norm": 0.659156858921051, + "learning_rate": 6.87531776904342e-05, + "loss": 2.503, + "step": 12064 + }, + { + "epoch": 0.9736905818739408, + "grad_norm": 0.7149603962898254, + "learning_rate": 6.873818166030033e-05, + "loss": 2.5135, + "step": 12065 + }, + { + "epoch": 0.9737712856105237, + "grad_norm": 0.7010510563850403, + "learning_rate": 6.872318640929272e-05, + "loss": 2.5133, + "step": 12066 + }, + { + "epoch": 0.9738519893471068, + "grad_norm": 0.6247616410255432, + "learning_rate": 6.870819193778504e-05, + "loss": 2.5189, + "step": 12067 + }, + { + "epoch": 0.9739326930836898, + "grad_norm": 0.6938940286636353, + "learning_rate": 6.869319824615101e-05, + "loss": 2.5053, + "step": 12068 + }, + { + "epoch": 0.9740133968202728, + "grad_norm": 0.7636895179748535, + "learning_rate": 6.867820533476436e-05, + "loss": 2.4989, + "step": 12069 + }, + { + "epoch": 0.9740941005568557, + "grad_norm": 0.6489234566688538, + "learning_rate": 6.866321320399869e-05, + "loss": 2.4935, + "step": 12070 + }, + { + "epoch": 0.9741748042934388, + "grad_norm": 0.6752095818519592, + "learning_rate": 6.864822185422764e-05, + "loss": 2.4835, + "step": 12071 + }, + { + "epoch": 0.9742555080300218, + "grad_norm": 0.6947118639945984, + "learning_rate": 6.863323128582486e-05, + "loss": 2.504, + "step": 12072 + }, + { + "epoch": 0.9743362117666048, + "grad_norm": 0.6815536618232727, + "learning_rate": 6.861824149916398e-05, + "loss": 2.5369, + "step": 12073 + }, + { + "epoch": 0.9744169155031878, + "grad_norm": 0.6550236344337463, + "learning_rate": 6.860325249461852e-05, + "loss": 2.4753, + "step": 12074 + }, + { + "epoch": 0.9744976192397709, + "grad_norm": 0.6833250522613525, + "learning_rate": 6.858826427256209e-05, + "loss": 2.4687, + "step": 12075 + }, + { + "epoch": 0.9745783229763538, + "grad_norm": 0.6925075650215149, + "learning_rate": 6.857327683336824e-05, + "loss": 2.5363, + "step": 12076 + }, + { + "epoch": 0.9746590267129368, + "grad_norm": 0.6754821538925171, + "learning_rate": 6.855829017741046e-05, + "loss": 2.4696, + "step": 12077 + }, + { + "epoch": 0.9747397304495198, + "grad_norm": 0.7360671162605286, + "learning_rate": 6.854330430506228e-05, + "loss": 2.5144, + "step": 12078 + }, + { + "epoch": 0.9748204341861029, + "grad_norm": 0.6814733743667603, + "learning_rate": 6.852831921669723e-05, + "loss": 2.5059, + "step": 12079 + }, + { + "epoch": 0.9749011379226858, + "grad_norm": 0.7106744647026062, + "learning_rate": 6.851333491268869e-05, + "loss": 2.453, + "step": 12080 + }, + { + "epoch": 0.9749818416592688, + "grad_norm": 0.6623831987380981, + "learning_rate": 6.849835139341015e-05, + "loss": 2.5244, + "step": 12081 + }, + { + "epoch": 0.9750625453958518, + "grad_norm": 0.6723372936248779, + "learning_rate": 6.848336865923506e-05, + "loss": 2.5159, + "step": 12082 + }, + { + "epoch": 0.9751432491324349, + "grad_norm": 0.7256618142127991, + "learning_rate": 6.84683867105368e-05, + "loss": 2.494, + "step": 12083 + }, + { + "epoch": 0.9752239528690179, + "grad_norm": 0.6881731152534485, + "learning_rate": 6.845340554768874e-05, + "loss": 2.4374, + "step": 12084 + }, + { + "epoch": 0.9753046566056008, + "grad_norm": 0.6759666204452515, + "learning_rate": 6.843842517106434e-05, + "loss": 2.5082, + "step": 12085 + }, + { + "epoch": 0.9753853603421838, + "grad_norm": 0.6983315348625183, + "learning_rate": 6.842344558103684e-05, + "loss": 2.5191, + "step": 12086 + }, + { + "epoch": 0.9754660640787668, + "grad_norm": 0.6805596351623535, + "learning_rate": 6.840846677797959e-05, + "loss": 2.5289, + "step": 12087 + }, + { + "epoch": 0.9755467678153499, + "grad_norm": 0.712942361831665, + "learning_rate": 6.839348876226595e-05, + "loss": 2.5544, + "step": 12088 + }, + { + "epoch": 0.9756274715519329, + "grad_norm": 0.6931124329566956, + "learning_rate": 6.837851153426924e-05, + "loss": 2.5407, + "step": 12089 + }, + { + "epoch": 0.9757081752885158, + "grad_norm": 0.6939486265182495, + "learning_rate": 6.836353509436264e-05, + "loss": 2.5236, + "step": 12090 + }, + { + "epoch": 0.9757888790250988, + "grad_norm": 0.7434083223342896, + "learning_rate": 6.834855944291944e-05, + "loss": 2.4903, + "step": 12091 + }, + { + "epoch": 0.9758695827616819, + "grad_norm": 0.672177255153656, + "learning_rate": 6.833358458031292e-05, + "loss": 2.4995, + "step": 12092 + }, + { + "epoch": 0.9759502864982649, + "grad_norm": 0.6631280779838562, + "learning_rate": 6.831861050691619e-05, + "loss": 2.4689, + "step": 12093 + }, + { + "epoch": 0.9760309902348479, + "grad_norm": 0.7485793232917786, + "learning_rate": 6.830363722310253e-05, + "loss": 2.5526, + "step": 12094 + }, + { + "epoch": 0.9761116939714308, + "grad_norm": 0.6592193245887756, + "learning_rate": 6.828866472924511e-05, + "loss": 2.4425, + "step": 12095 + }, + { + "epoch": 0.9761923977080139, + "grad_norm": 0.6479860544204712, + "learning_rate": 6.827369302571703e-05, + "loss": 2.4637, + "step": 12096 + }, + { + "epoch": 0.9762731014445969, + "grad_norm": 0.6694966554641724, + "learning_rate": 6.825872211289146e-05, + "loss": 2.5256, + "step": 12097 + }, + { + "epoch": 0.9763538051811799, + "grad_norm": 0.675751805305481, + "learning_rate": 6.82437519911415e-05, + "loss": 2.5021, + "step": 12098 + }, + { + "epoch": 0.9764345089177628, + "grad_norm": 0.7255450487136841, + "learning_rate": 6.822878266084026e-05, + "loss": 2.5275, + "step": 12099 + }, + { + "epoch": 0.9765152126543459, + "grad_norm": 0.7034213542938232, + "learning_rate": 6.821381412236079e-05, + "loss": 2.5432, + "step": 12100 + }, + { + "epoch": 0.9765959163909289, + "grad_norm": 0.6808038949966431, + "learning_rate": 6.819884637607619e-05, + "loss": 2.5044, + "step": 12101 + }, + { + "epoch": 0.9766766201275119, + "grad_norm": 0.6601580381393433, + "learning_rate": 6.818387942235945e-05, + "loss": 2.4602, + "step": 12102 + }, + { + "epoch": 0.9767573238640949, + "grad_norm": 0.7163928151130676, + "learning_rate": 6.816891326158359e-05, + "loss": 2.4785, + "step": 12103 + }, + { + "epoch": 0.976838027600678, + "grad_norm": 0.6616904735565186, + "learning_rate": 6.815394789412164e-05, + "loss": 2.5081, + "step": 12104 + }, + { + "epoch": 0.9769187313372609, + "grad_norm": 0.6476422548294067, + "learning_rate": 6.813898332034657e-05, + "loss": 2.4624, + "step": 12105 + }, + { + "epoch": 0.9769994350738439, + "grad_norm": 0.6468440890312195, + "learning_rate": 6.812401954063131e-05, + "loss": 2.4948, + "step": 12106 + }, + { + "epoch": 0.9770801388104269, + "grad_norm": 0.6988391876220703, + "learning_rate": 6.810905655534878e-05, + "loss": 2.4958, + "step": 12107 + }, + { + "epoch": 0.97716084254701, + "grad_norm": 0.6777953505516052, + "learning_rate": 6.809409436487196e-05, + "loss": 2.5304, + "step": 12108 + }, + { + "epoch": 0.9772415462835929, + "grad_norm": 0.7115550637245178, + "learning_rate": 6.807913296957368e-05, + "loss": 2.5321, + "step": 12109 + }, + { + "epoch": 0.9773222500201759, + "grad_norm": 0.737823486328125, + "learning_rate": 6.806417236982684e-05, + "loss": 2.5121, + "step": 12110 + }, + { + "epoch": 0.9774029537567589, + "grad_norm": 0.6797437071800232, + "learning_rate": 6.804921256600439e-05, + "loss": 2.4783, + "step": 12111 + }, + { + "epoch": 0.977483657493342, + "grad_norm": 0.7240802645683289, + "learning_rate": 6.803425355847897e-05, + "loss": 2.4949, + "step": 12112 + }, + { + "epoch": 0.977564361229925, + "grad_norm": 0.6433781981468201, + "learning_rate": 6.801929534762357e-05, + "loss": 2.4937, + "step": 12113 + }, + { + "epoch": 0.9776450649665079, + "grad_norm": 0.6935293078422546, + "learning_rate": 6.800433793381095e-05, + "loss": 2.5025, + "step": 12114 + }, + { + "epoch": 0.9777257687030909, + "grad_norm": 0.699780285358429, + "learning_rate": 6.798938131741383e-05, + "loss": 2.5231, + "step": 12115 + }, + { + "epoch": 0.977806472439674, + "grad_norm": 0.6414729952812195, + "learning_rate": 6.7974425498805e-05, + "loss": 2.4422, + "step": 12116 + }, + { + "epoch": 0.977887176176257, + "grad_norm": 0.6733608841896057, + "learning_rate": 6.795947047835722e-05, + "loss": 2.4873, + "step": 12117 + }, + { + "epoch": 0.97796787991284, + "grad_norm": 0.6985765099525452, + "learning_rate": 6.794451625644318e-05, + "loss": 2.4994, + "step": 12118 + }, + { + "epoch": 0.9780485836494229, + "grad_norm": 0.6429893374443054, + "learning_rate": 6.792956283343559e-05, + "loss": 2.4968, + "step": 12119 + }, + { + "epoch": 0.978129287386006, + "grad_norm": 0.7129024267196655, + "learning_rate": 6.79146102097071e-05, + "loss": 2.5457, + "step": 12120 + }, + { + "epoch": 0.978209991122589, + "grad_norm": 0.6811943650245667, + "learning_rate": 6.789965838563047e-05, + "loss": 2.5012, + "step": 12121 + }, + { + "epoch": 0.978290694859172, + "grad_norm": 0.7269948720932007, + "learning_rate": 6.788470736157821e-05, + "loss": 2.5124, + "step": 12122 + }, + { + "epoch": 0.978371398595755, + "grad_norm": 0.7396084666252136, + "learning_rate": 6.786975713792299e-05, + "loss": 2.5631, + "step": 12123 + }, + { + "epoch": 0.978452102332338, + "grad_norm": 0.6880094408988953, + "learning_rate": 6.785480771503745e-05, + "loss": 2.5103, + "step": 12124 + }, + { + "epoch": 0.978532806068921, + "grad_norm": 0.737095057964325, + "learning_rate": 6.783985909329409e-05, + "loss": 2.5062, + "step": 12125 + }, + { + "epoch": 0.978613509805504, + "grad_norm": 0.6540948152542114, + "learning_rate": 6.782491127306552e-05, + "loss": 2.5568, + "step": 12126 + }, + { + "epoch": 0.978694213542087, + "grad_norm": 0.669706404209137, + "learning_rate": 6.780996425472427e-05, + "loss": 2.5156, + "step": 12127 + }, + { + "epoch": 0.97877491727867, + "grad_norm": 0.6722843647003174, + "learning_rate": 6.779501803864286e-05, + "loss": 2.4784, + "step": 12128 + }, + { + "epoch": 0.978855621015253, + "grad_norm": 0.6545475125312805, + "learning_rate": 6.778007262519377e-05, + "loss": 2.5159, + "step": 12129 + }, + { + "epoch": 0.978936324751836, + "grad_norm": 0.7010136246681213, + "learning_rate": 6.776512801474953e-05, + "loss": 2.5244, + "step": 12130 + }, + { + "epoch": 0.979017028488419, + "grad_norm": 0.6912714242935181, + "learning_rate": 6.775018420768253e-05, + "loss": 2.5223, + "step": 12131 + }, + { + "epoch": 0.9790977322250021, + "grad_norm": 0.6864827275276184, + "learning_rate": 6.773524120436525e-05, + "loss": 2.5027, + "step": 12132 + }, + { + "epoch": 0.979178435961585, + "grad_norm": 0.7586981058120728, + "learning_rate": 6.77202990051701e-05, + "loss": 2.4554, + "step": 12133 + }, + { + "epoch": 0.979259139698168, + "grad_norm": 0.6487839818000793, + "learning_rate": 6.770535761046948e-05, + "loss": 2.5035, + "step": 12134 + }, + { + "epoch": 0.979339843434751, + "grad_norm": 0.7193071246147156, + "learning_rate": 6.769041702063575e-05, + "loss": 2.4669, + "step": 12135 + }, + { + "epoch": 0.9794205471713341, + "grad_norm": 0.7118960618972778, + "learning_rate": 6.76754772360413e-05, + "loss": 2.493, + "step": 12136 + }, + { + "epoch": 0.9795012509079171, + "grad_norm": 0.6617394685745239, + "learning_rate": 6.766053825705847e-05, + "loss": 2.4771, + "step": 12137 + }, + { + "epoch": 0.9795819546445, + "grad_norm": 0.7664859294891357, + "learning_rate": 6.764560008405953e-05, + "loss": 2.5191, + "step": 12138 + }, + { + "epoch": 0.979662658381083, + "grad_norm": 0.708063542842865, + "learning_rate": 6.763066271741682e-05, + "loss": 2.5521, + "step": 12139 + }, + { + "epoch": 0.979743362117666, + "grad_norm": 0.6951049566268921, + "learning_rate": 6.761572615750267e-05, + "loss": 2.4708, + "step": 12140 + }, + { + "epoch": 0.9798240658542491, + "grad_norm": 0.6914932727813721, + "learning_rate": 6.760079040468921e-05, + "loss": 2.5101, + "step": 12141 + }, + { + "epoch": 0.9799047695908321, + "grad_norm": 0.6843075752258301, + "learning_rate": 6.758585545934876e-05, + "loss": 2.4932, + "step": 12142 + }, + { + "epoch": 0.979985473327415, + "grad_norm": 0.6567733883857727, + "learning_rate": 6.757092132185354e-05, + "loss": 2.4577, + "step": 12143 + }, + { + "epoch": 0.980066177063998, + "grad_norm": 0.6874415874481201, + "learning_rate": 6.75559879925757e-05, + "loss": 2.4818, + "step": 12144 + }, + { + "epoch": 0.9801468808005811, + "grad_norm": 0.7274627685546875, + "learning_rate": 6.754105547188746e-05, + "loss": 2.523, + "step": 12145 + }, + { + "epoch": 0.9802275845371641, + "grad_norm": 0.6991173028945923, + "learning_rate": 6.7526123760161e-05, + "loss": 2.4864, + "step": 12146 + }, + { + "epoch": 0.980308288273747, + "grad_norm": 0.670078456401825, + "learning_rate": 6.75111928577684e-05, + "loss": 2.4889, + "step": 12147 + }, + { + "epoch": 0.98038899201033, + "grad_norm": 0.6653482913970947, + "learning_rate": 6.749626276508178e-05, + "loss": 2.4652, + "step": 12148 + }, + { + "epoch": 0.9804696957469131, + "grad_norm": 0.7329251766204834, + "learning_rate": 6.748133348247326e-05, + "loss": 2.518, + "step": 12149 + }, + { + "epoch": 0.9805503994834961, + "grad_norm": 0.7792871594429016, + "learning_rate": 6.746640501031495e-05, + "loss": 2.5018, + "step": 12150 + }, + { + "epoch": 0.9806311032200791, + "grad_norm": 0.6962797045707703, + "learning_rate": 6.745147734897883e-05, + "loss": 2.4388, + "step": 12151 + }, + { + "epoch": 0.980711806956662, + "grad_norm": 0.6981272101402283, + "learning_rate": 6.7436550498837e-05, + "loss": 2.4886, + "step": 12152 + }, + { + "epoch": 0.9807925106932451, + "grad_norm": 0.6696565747261047, + "learning_rate": 6.742162446026146e-05, + "loss": 2.5258, + "step": 12153 + }, + { + "epoch": 0.9808732144298281, + "grad_norm": 0.6922139525413513, + "learning_rate": 6.740669923362417e-05, + "loss": 2.493, + "step": 12154 + }, + { + "epoch": 0.9809539181664111, + "grad_norm": 0.6745694875717163, + "learning_rate": 6.739177481929715e-05, + "loss": 2.5209, + "step": 12155 + }, + { + "epoch": 0.9810346219029941, + "grad_norm": 0.7023215889930725, + "learning_rate": 6.737685121765238e-05, + "loss": 2.4987, + "step": 12156 + }, + { + "epoch": 0.9811153256395772, + "grad_norm": 0.6337805390357971, + "learning_rate": 6.73619284290617e-05, + "loss": 2.4838, + "step": 12157 + }, + { + "epoch": 0.9811960293761601, + "grad_norm": 0.6747817397117615, + "learning_rate": 6.73470064538971e-05, + "loss": 2.4834, + "step": 12158 + }, + { + "epoch": 0.9812767331127431, + "grad_norm": 0.6714580655097961, + "learning_rate": 6.733208529253047e-05, + "loss": 2.4724, + "step": 12159 + }, + { + "epoch": 0.9813574368493261, + "grad_norm": 0.6927861571311951, + "learning_rate": 6.731716494533364e-05, + "loss": 2.495, + "step": 12160 + }, + { + "epoch": 0.9814381405859092, + "grad_norm": 0.6576036214828491, + "learning_rate": 6.73022454126785e-05, + "loss": 2.5415, + "step": 12161 + }, + { + "epoch": 0.9815188443224921, + "grad_norm": 0.6495294570922852, + "learning_rate": 6.728732669493691e-05, + "loss": 2.4889, + "step": 12162 + }, + { + "epoch": 0.9815995480590751, + "grad_norm": 0.6680364012718201, + "learning_rate": 6.72724087924806e-05, + "loss": 2.4733, + "step": 12163 + }, + { + "epoch": 0.9816802517956581, + "grad_norm": 0.6816582083702087, + "learning_rate": 6.725749170568143e-05, + "loss": 2.4688, + "step": 12164 + }, + { + "epoch": 0.9817609555322412, + "grad_norm": 0.6995956897735596, + "learning_rate": 6.724257543491116e-05, + "loss": 2.4962, + "step": 12165 + }, + { + "epoch": 0.9818416592688242, + "grad_norm": 0.6728340983390808, + "learning_rate": 6.722765998054157e-05, + "loss": 2.5218, + "step": 12166 + }, + { + "epoch": 0.9819223630054071, + "grad_norm": 0.6835319995880127, + "learning_rate": 6.721274534294433e-05, + "loss": 2.4845, + "step": 12167 + }, + { + "epoch": 0.9820030667419901, + "grad_norm": 0.6969910264015198, + "learning_rate": 6.719783152249119e-05, + "loss": 2.4983, + "step": 12168 + }, + { + "epoch": 0.9820837704785732, + "grad_norm": 0.7327036261558533, + "learning_rate": 6.718291851955383e-05, + "loss": 2.5893, + "step": 12169 + }, + { + "epoch": 0.9821644742151562, + "grad_norm": 0.7092839479446411, + "learning_rate": 6.716800633450393e-05, + "loss": 2.5104, + "step": 12170 + }, + { + "epoch": 0.9822451779517392, + "grad_norm": 0.7384308576583862, + "learning_rate": 6.715309496771311e-05, + "loss": 2.5066, + "step": 12171 + }, + { + "epoch": 0.9823258816883221, + "grad_norm": 0.6744845509529114, + "learning_rate": 6.713818441955308e-05, + "loss": 2.469, + "step": 12172 + }, + { + "epoch": 0.9824065854249052, + "grad_norm": 0.6497980952262878, + "learning_rate": 6.712327469039536e-05, + "loss": 2.4943, + "step": 12173 + }, + { + "epoch": 0.9824872891614882, + "grad_norm": 0.6550357937812805, + "learning_rate": 6.710836578061156e-05, + "loss": 2.5019, + "step": 12174 + }, + { + "epoch": 0.9825679928980712, + "grad_norm": 0.6813549995422363, + "learning_rate": 6.709345769057331e-05, + "loss": 2.4314, + "step": 12175 + }, + { + "epoch": 0.9826486966346542, + "grad_norm": 0.6636531352996826, + "learning_rate": 6.707855042065209e-05, + "loss": 2.5202, + "step": 12176 + }, + { + "epoch": 0.9827294003712372, + "grad_norm": 0.6684894561767578, + "learning_rate": 6.706364397121944e-05, + "loss": 2.4353, + "step": 12177 + }, + { + "epoch": 0.9828101041078202, + "grad_norm": 0.6813677549362183, + "learning_rate": 6.704873834264688e-05, + "loss": 2.4254, + "step": 12178 + }, + { + "epoch": 0.9828908078444032, + "grad_norm": 0.6584975719451904, + "learning_rate": 6.70338335353059e-05, + "loss": 2.5647, + "step": 12179 + }, + { + "epoch": 0.9829715115809862, + "grad_norm": 0.6959114074707031, + "learning_rate": 6.701892954956796e-05, + "loss": 2.5203, + "step": 12180 + }, + { + "epoch": 0.9830522153175693, + "grad_norm": 0.6399044990539551, + "learning_rate": 6.700402638580452e-05, + "loss": 2.4697, + "step": 12181 + }, + { + "epoch": 0.9831329190541522, + "grad_norm": 0.6838750839233398, + "learning_rate": 6.698912404438702e-05, + "loss": 2.5261, + "step": 12182 + }, + { + "epoch": 0.9832136227907352, + "grad_norm": 0.6286367177963257, + "learning_rate": 6.697422252568679e-05, + "loss": 2.4264, + "step": 12183 + }, + { + "epoch": 0.9832943265273182, + "grad_norm": 0.901637852191925, + "learning_rate": 6.695932183007528e-05, + "loss": 2.4908, + "step": 12184 + }, + { + "epoch": 0.9833750302639013, + "grad_norm": 0.8361458778381348, + "learning_rate": 6.694442195792386e-05, + "loss": 2.5183, + "step": 12185 + }, + { + "epoch": 0.9834557340004842, + "grad_norm": 0.7033401727676392, + "learning_rate": 6.692952290960384e-05, + "loss": 2.5702, + "step": 12186 + }, + { + "epoch": 0.9835364377370672, + "grad_norm": 0.669486939907074, + "learning_rate": 6.691462468548653e-05, + "loss": 2.5143, + "step": 12187 + }, + { + "epoch": 0.9836171414736502, + "grad_norm": 0.7043797969818115, + "learning_rate": 6.689972728594329e-05, + "loss": 2.5638, + "step": 12188 + }, + { + "epoch": 0.9836978452102332, + "grad_norm": 0.6532511115074158, + "learning_rate": 6.688483071134537e-05, + "loss": 2.5227, + "step": 12189 + }, + { + "epoch": 0.9837785489468163, + "grad_norm": 0.7363922595977783, + "learning_rate": 6.6869934962064e-05, + "loss": 2.4953, + "step": 12190 + }, + { + "epoch": 0.9838592526833992, + "grad_norm": 0.6746651530265808, + "learning_rate": 6.685504003847051e-05, + "loss": 2.5021, + "step": 12191 + }, + { + "epoch": 0.9839399564199822, + "grad_norm": 0.665459930896759, + "learning_rate": 6.684014594093604e-05, + "loss": 2.5126, + "step": 12192 + }, + { + "epoch": 0.9840206601565652, + "grad_norm": 0.6618975400924683, + "learning_rate": 6.682525266983179e-05, + "loss": 2.5046, + "step": 12193 + }, + { + "epoch": 0.9841013638931483, + "grad_norm": 0.6536173224449158, + "learning_rate": 6.6810360225529e-05, + "loss": 2.4222, + "step": 12194 + }, + { + "epoch": 0.9841820676297313, + "grad_norm": 0.6882187724113464, + "learning_rate": 6.679546860839876e-05, + "loss": 2.475, + "step": 12195 + }, + { + "epoch": 0.9842627713663142, + "grad_norm": 0.6941187977790833, + "learning_rate": 6.678057781881224e-05, + "loss": 2.5642, + "step": 12196 + }, + { + "epoch": 0.9843434751028972, + "grad_norm": 0.7057064175605774, + "learning_rate": 6.676568785714057e-05, + "loss": 2.4817, + "step": 12197 + }, + { + "epoch": 0.9844241788394803, + "grad_norm": 0.6455948352813721, + "learning_rate": 6.675079872375487e-05, + "loss": 2.5206, + "step": 12198 + }, + { + "epoch": 0.9845048825760633, + "grad_norm": 0.6559014320373535, + "learning_rate": 6.673591041902613e-05, + "loss": 2.4082, + "step": 12199 + }, + { + "epoch": 0.9845855863126463, + "grad_norm": 0.6732046008110046, + "learning_rate": 6.672102294332542e-05, + "loss": 2.5472, + "step": 12200 + }, + { + "epoch": 0.9846662900492292, + "grad_norm": 0.7074914574623108, + "learning_rate": 6.670613629702391e-05, + "loss": 2.5243, + "step": 12201 + }, + { + "epoch": 0.9847469937858123, + "grad_norm": 0.6780694127082825, + "learning_rate": 6.669125048049246e-05, + "loss": 2.494, + "step": 12202 + }, + { + "epoch": 0.9848276975223953, + "grad_norm": 0.6361132264137268, + "learning_rate": 6.66763654941021e-05, + "loss": 2.4764, + "step": 12203 + }, + { + "epoch": 0.9849084012589783, + "grad_norm": 0.752727210521698, + "learning_rate": 6.666148133822387e-05, + "loss": 2.4942, + "step": 12204 + }, + { + "epoch": 0.9849891049955612, + "grad_norm": 0.7282724976539612, + "learning_rate": 6.664659801322863e-05, + "loss": 2.471, + "step": 12205 + }, + { + "epoch": 0.9850698087321443, + "grad_norm": 0.6977601051330566, + "learning_rate": 6.663171551948736e-05, + "loss": 2.4695, + "step": 12206 + }, + { + "epoch": 0.9851505124687273, + "grad_norm": 0.6957824230194092, + "learning_rate": 6.661683385737101e-05, + "loss": 2.5096, + "step": 12207 + }, + { + "epoch": 0.9852312162053103, + "grad_norm": 0.6197221279144287, + "learning_rate": 6.660195302725037e-05, + "loss": 2.4199, + "step": 12208 + }, + { + "epoch": 0.9853119199418933, + "grad_norm": 0.747558057308197, + "learning_rate": 6.658707302949638e-05, + "loss": 2.5988, + "step": 12209 + }, + { + "epoch": 0.9853926236784764, + "grad_norm": 0.6593184471130371, + "learning_rate": 6.657219386447989e-05, + "loss": 2.4837, + "step": 12210 + }, + { + "epoch": 0.9854733274150593, + "grad_norm": 0.6795992255210876, + "learning_rate": 6.655731553257169e-05, + "loss": 2.498, + "step": 12211 + }, + { + "epoch": 0.9855540311516423, + "grad_norm": 0.7588422298431396, + "learning_rate": 6.65424380341426e-05, + "loss": 2.444, + "step": 12212 + }, + { + "epoch": 0.9856347348882253, + "grad_norm": 0.7791433930397034, + "learning_rate": 6.652756136956342e-05, + "loss": 2.4893, + "step": 12213 + }, + { + "epoch": 0.9857154386248084, + "grad_norm": 0.6320767998695374, + "learning_rate": 6.651268553920493e-05, + "loss": 2.4831, + "step": 12214 + }, + { + "epoch": 0.9857961423613913, + "grad_norm": 0.6818140745162964, + "learning_rate": 6.649781054343783e-05, + "loss": 2.4316, + "step": 12215 + }, + { + "epoch": 0.9858768460979743, + "grad_norm": 0.7460113763809204, + "learning_rate": 6.648293638263285e-05, + "loss": 2.5335, + "step": 12216 + }, + { + "epoch": 0.9859575498345573, + "grad_norm": 0.714074432849884, + "learning_rate": 6.646806305716079e-05, + "loss": 2.4573, + "step": 12217 + }, + { + "epoch": 0.9860382535711404, + "grad_norm": 0.6815951466560364, + "learning_rate": 6.645319056739217e-05, + "loss": 2.4758, + "step": 12218 + }, + { + "epoch": 0.9861189573077234, + "grad_norm": 0.6842799782752991, + "learning_rate": 6.643831891369775e-05, + "loss": 2.4998, + "step": 12219 + }, + { + "epoch": 0.9861996610443063, + "grad_norm": 0.6725212335586548, + "learning_rate": 6.642344809644818e-05, + "loss": 2.5179, + "step": 12220 + }, + { + "epoch": 0.9862803647808893, + "grad_norm": 0.7859417796134949, + "learning_rate": 6.640857811601402e-05, + "loss": 2.5801, + "step": 12221 + }, + { + "epoch": 0.9863610685174724, + "grad_norm": 0.6438577771186829, + "learning_rate": 6.639370897276591e-05, + "loss": 2.4659, + "step": 12222 + }, + { + "epoch": 0.9864417722540554, + "grad_norm": 0.7036609053611755, + "learning_rate": 6.637884066707447e-05, + "loss": 2.5637, + "step": 12223 + }, + { + "epoch": 0.9865224759906384, + "grad_norm": 0.6756969094276428, + "learning_rate": 6.636397319931016e-05, + "loss": 2.5381, + "step": 12224 + }, + { + "epoch": 0.9866031797272213, + "grad_norm": 0.6907589435577393, + "learning_rate": 6.634910656984354e-05, + "loss": 2.4927, + "step": 12225 + }, + { + "epoch": 0.9866838834638044, + "grad_norm": 0.7347010374069214, + "learning_rate": 6.63342407790452e-05, + "loss": 2.5131, + "step": 12226 + }, + { + "epoch": 0.9867645872003874, + "grad_norm": 0.6835876107215881, + "learning_rate": 6.631937582728555e-05, + "loss": 2.4611, + "step": 12227 + }, + { + "epoch": 0.9868452909369704, + "grad_norm": 0.8199172616004944, + "learning_rate": 6.630451171493511e-05, + "loss": 2.5341, + "step": 12228 + }, + { + "epoch": 0.9869259946735534, + "grad_norm": 0.7537188529968262, + "learning_rate": 6.62896484423643e-05, + "loss": 2.5218, + "step": 12229 + }, + { + "epoch": 0.9870066984101364, + "grad_norm": 0.7254310250282288, + "learning_rate": 6.62747860099436e-05, + "loss": 2.4766, + "step": 12230 + }, + { + "epoch": 0.9870874021467194, + "grad_norm": 0.6852995157241821, + "learning_rate": 6.625992441804338e-05, + "loss": 2.548, + "step": 12231 + }, + { + "epoch": 0.9871681058833024, + "grad_norm": 0.7089388966560364, + "learning_rate": 6.624506366703402e-05, + "loss": 2.5125, + "step": 12232 + }, + { + "epoch": 0.9872488096198854, + "grad_norm": 0.7114216685295105, + "learning_rate": 6.623020375728597e-05, + "loss": 2.5408, + "step": 12233 + }, + { + "epoch": 0.9873295133564685, + "grad_norm": 0.7891978025436401, + "learning_rate": 6.621534468916946e-05, + "loss": 2.5946, + "step": 12234 + }, + { + "epoch": 0.9874102170930514, + "grad_norm": 0.671399712562561, + "learning_rate": 6.620048646305488e-05, + "loss": 2.4732, + "step": 12235 + }, + { + "epoch": 0.9874909208296344, + "grad_norm": 0.6712855696678162, + "learning_rate": 6.618562907931256e-05, + "loss": 2.4376, + "step": 12236 + }, + { + "epoch": 0.9875716245662174, + "grad_norm": 0.7183727025985718, + "learning_rate": 6.617077253831272e-05, + "loss": 2.5406, + "step": 12237 + }, + { + "epoch": 0.9876523283028005, + "grad_norm": 0.6857761144638062, + "learning_rate": 6.615591684042568e-05, + "loss": 2.5279, + "step": 12238 + }, + { + "epoch": 0.9877330320393835, + "grad_norm": 0.7268103957176208, + "learning_rate": 6.614106198602165e-05, + "loss": 2.5283, + "step": 12239 + }, + { + "epoch": 0.9878137357759664, + "grad_norm": 0.6703717708587646, + "learning_rate": 6.612620797547087e-05, + "loss": 2.4254, + "step": 12240 + }, + { + "epoch": 0.9878944395125494, + "grad_norm": 0.7110719680786133, + "learning_rate": 6.611135480914352e-05, + "loss": 2.496, + "step": 12241 + }, + { + "epoch": 0.9879751432491324, + "grad_norm": 0.7268263697624207, + "learning_rate": 6.609650248740983e-05, + "loss": 2.5489, + "step": 12242 + }, + { + "epoch": 0.9880558469857155, + "grad_norm": 0.7413432598114014, + "learning_rate": 6.60816510106399e-05, + "loss": 2.4998, + "step": 12243 + }, + { + "epoch": 0.9881365507222984, + "grad_norm": 0.7443360090255737, + "learning_rate": 6.606680037920389e-05, + "loss": 2.5282, + "step": 12244 + }, + { + "epoch": 0.9882172544588814, + "grad_norm": 0.7787832021713257, + "learning_rate": 6.605195059347191e-05, + "loss": 2.5221, + "step": 12245 + }, + { + "epoch": 0.9882979581954644, + "grad_norm": 0.6921473741531372, + "learning_rate": 6.603710165381409e-05, + "loss": 2.5434, + "step": 12246 + }, + { + "epoch": 0.9883786619320475, + "grad_norm": 0.737328827381134, + "learning_rate": 6.602225356060044e-05, + "loss": 2.5222, + "step": 12247 + }, + { + "epoch": 0.9884593656686305, + "grad_norm": 0.698823094367981, + "learning_rate": 6.600740631420106e-05, + "loss": 2.528, + "step": 12248 + }, + { + "epoch": 0.9885400694052134, + "grad_norm": 0.6735067963600159, + "learning_rate": 6.599255991498601e-05, + "loss": 2.4942, + "step": 12249 + }, + { + "epoch": 0.9886207731417964, + "grad_norm": 0.659622311592102, + "learning_rate": 6.59777143633252e-05, + "loss": 2.4822, + "step": 12250 + }, + { + "epoch": 0.9887014768783795, + "grad_norm": 0.6973726153373718, + "learning_rate": 6.596286965958872e-05, + "loss": 2.5499, + "step": 12251 + }, + { + "epoch": 0.9887821806149625, + "grad_norm": 0.6771909594535828, + "learning_rate": 6.594802580414651e-05, + "loss": 2.4968, + "step": 12252 + }, + { + "epoch": 0.9888628843515455, + "grad_norm": 0.68080073595047, + "learning_rate": 6.593318279736849e-05, + "loss": 2.5142, + "step": 12253 + }, + { + "epoch": 0.9889435880881284, + "grad_norm": NaN, + "learning_rate": 6.593318279736849e-05, + "loss": 2.466, + "step": 12254 + }, + { + "epoch": 0.9890242918247115, + "grad_norm": 0.6865221858024597, + "learning_rate": 6.591834063962461e-05, + "loss": 2.4894, + "step": 12255 + }, + { + "epoch": 0.9891049955612945, + "grad_norm": 0.7050445079803467, + "learning_rate": 6.590349933128478e-05, + "loss": 2.5733, + "step": 12256 + }, + { + "epoch": 0.9891856992978775, + "grad_norm": 0.6971526741981506, + "learning_rate": 6.588865887271887e-05, + "loss": 2.4997, + "step": 12257 + }, + { + "epoch": 0.9892664030344605, + "grad_norm": 0.6465088725090027, + "learning_rate": 6.587381926429674e-05, + "loss": 2.5155, + "step": 12258 + }, + { + "epoch": 0.9893471067710435, + "grad_norm": 0.6521422266960144, + "learning_rate": 6.585898050638823e-05, + "loss": 2.4803, + "step": 12259 + }, + { + "epoch": 0.9894278105076265, + "grad_norm": 0.6798849105834961, + "learning_rate": 6.584414259936324e-05, + "loss": 2.5301, + "step": 12260 + }, + { + "epoch": 0.9895085142442095, + "grad_norm": 0.6903446912765503, + "learning_rate": 6.582930554359144e-05, + "loss": 2.4662, + "step": 12261 + }, + { + "epoch": 0.9895892179807925, + "grad_norm": 0.7183516621589661, + "learning_rate": 6.581446933944267e-05, + "loss": 2.4711, + "step": 12262 + }, + { + "epoch": 0.9896699217173756, + "grad_norm": 0.702738344669342, + "learning_rate": 6.579963398728671e-05, + "loss": 2.531, + "step": 12263 + }, + { + "epoch": 0.9897506254539585, + "grad_norm": 0.7187048196792603, + "learning_rate": 6.578479948749325e-05, + "loss": 2.4933, + "step": 12264 + }, + { + "epoch": 0.9898313291905415, + "grad_norm": 0.6988784670829773, + "learning_rate": 6.576996584043202e-05, + "loss": 2.5179, + "step": 12265 + }, + { + "epoch": 0.9899120329271245, + "grad_norm": 0.7434641122817993, + "learning_rate": 6.575513304647276e-05, + "loss": 2.5157, + "step": 12266 + }, + { + "epoch": 0.9899927366637076, + "grad_norm": 0.667881429195404, + "learning_rate": 6.574030110598505e-05, + "loss": 2.5152, + "step": 12267 + }, + { + "epoch": 0.9900734404002905, + "grad_norm": 0.6766676902770996, + "learning_rate": 6.572547001933862e-05, + "loss": 2.5041, + "step": 12268 + }, + { + "epoch": 0.9901541441368735, + "grad_norm": 0.6531797051429749, + "learning_rate": 6.571063978690311e-05, + "loss": 2.5457, + "step": 12269 + }, + { + "epoch": 0.9902348478734565, + "grad_norm": 0.6557255983352661, + "learning_rate": 6.569581040904804e-05, + "loss": 2.5253, + "step": 12270 + }, + { + "epoch": 0.9903155516100396, + "grad_norm": 0.6818893551826477, + "learning_rate": 6.568098188614304e-05, + "loss": 2.5031, + "step": 12271 + }, + { + "epoch": 0.9903962553466226, + "grad_norm": 0.6644853949546814, + "learning_rate": 6.56661542185577e-05, + "loss": 2.5285, + "step": 12272 + }, + { + "epoch": 0.9904769590832055, + "grad_norm": 0.6035603284835815, + "learning_rate": 6.565132740666155e-05, + "loss": 2.46, + "step": 12273 + }, + { + "epoch": 0.9905576628197885, + "grad_norm": 0.7061343193054199, + "learning_rate": 6.56365014508241e-05, + "loss": 2.4731, + "step": 12274 + }, + { + "epoch": 0.9906383665563716, + "grad_norm": 0.6981248259544373, + "learning_rate": 6.562167635141486e-05, + "loss": 2.4518, + "step": 12275 + }, + { + "epoch": 0.9907190702929546, + "grad_norm": 0.6718073487281799, + "learning_rate": 6.560685210880334e-05, + "loss": 2.4919, + "step": 12276 + }, + { + "epoch": 0.9907997740295376, + "grad_norm": 0.7095392942428589, + "learning_rate": 6.559202872335893e-05, + "loss": 2.5284, + "step": 12277 + }, + { + "epoch": 0.9908804777661205, + "grad_norm": 0.7052092552185059, + "learning_rate": 6.557720619545111e-05, + "loss": 2.4781, + "step": 12278 + }, + { + "epoch": 0.9909611815027036, + "grad_norm": 0.653570830821991, + "learning_rate": 6.556238452544934e-05, + "loss": 2.5293, + "step": 12279 + }, + { + "epoch": 0.9910418852392866, + "grad_norm": 0.6705330610275269, + "learning_rate": 6.554756371372293e-05, + "loss": 2.4437, + "step": 12280 + }, + { + "epoch": 0.9911225889758696, + "grad_norm": 0.6494189500808716, + "learning_rate": 6.553274376064127e-05, + "loss": 2.4833, + "step": 12281 + }, + { + "epoch": 0.9912032927124526, + "grad_norm": 0.6497724652290344, + "learning_rate": 6.551792466657378e-05, + "loss": 2.4803, + "step": 12282 + }, + { + "epoch": 0.9912839964490356, + "grad_norm": 0.7740494608879089, + "learning_rate": 6.550310643188972e-05, + "loss": 2.4907, + "step": 12283 + }, + { + "epoch": 0.9913647001856186, + "grad_norm": 0.699562668800354, + "learning_rate": 6.548828905695843e-05, + "loss": 2.4576, + "step": 12284 + }, + { + "epoch": 0.9914454039222016, + "grad_norm": 0.8123162984848022, + "learning_rate": 6.547347254214921e-05, + "loss": 2.5118, + "step": 12285 + }, + { + "epoch": 0.9915261076587846, + "grad_norm": 0.7227715253829956, + "learning_rate": 6.545865688783129e-05, + "loss": 2.4688, + "step": 12286 + }, + { + "epoch": 0.9916068113953677, + "grad_norm": 0.6498493552207947, + "learning_rate": 6.544384209437392e-05, + "loss": 2.477, + "step": 12287 + }, + { + "epoch": 0.9916875151319506, + "grad_norm": 0.6427823901176453, + "learning_rate": 6.542902816214636e-05, + "loss": 2.4388, + "step": 12288 + }, + { + "epoch": 0.9917682188685336, + "grad_norm": 0.6803679466247559, + "learning_rate": 6.541421509151778e-05, + "loss": 2.5095, + "step": 12289 + }, + { + "epoch": 0.9918489226051166, + "grad_norm": 0.7025790810585022, + "learning_rate": 6.539940288285734e-05, + "loss": 2.4881, + "step": 12290 + }, + { + "epoch": 0.9919296263416996, + "grad_norm": 0.6899270415306091, + "learning_rate": 6.538459153653424e-05, + "loss": 2.486, + "step": 12291 + }, + { + "epoch": 0.9920103300782827, + "grad_norm": 0.7379609942436218, + "learning_rate": 6.536978105291762e-05, + "loss": 2.5368, + "step": 12292 + }, + { + "epoch": 0.9920910338148656, + "grad_norm": 0.7279202342033386, + "learning_rate": 6.535497143237657e-05, + "loss": 2.5275, + "step": 12293 + }, + { + "epoch": 0.9921717375514486, + "grad_norm": 0.6810527443885803, + "learning_rate": 6.53401626752802e-05, + "loss": 2.5053, + "step": 12294 + }, + { + "epoch": 0.9922524412880316, + "grad_norm": 0.6578424572944641, + "learning_rate": 6.532535478199759e-05, + "loss": 2.5334, + "step": 12295 + }, + { + "epoch": 0.9923331450246147, + "grad_norm": 0.6819284558296204, + "learning_rate": 6.531054775289778e-05, + "loss": 2.4879, + "step": 12296 + }, + { + "epoch": 0.9924138487611976, + "grad_norm": 0.6524500846862793, + "learning_rate": 6.529574158834977e-05, + "loss": 2.5349, + "step": 12297 + }, + { + "epoch": 0.9924945524977806, + "grad_norm": 0.6853352785110474, + "learning_rate": 6.528093628872263e-05, + "loss": 2.4217, + "step": 12298 + }, + { + "epoch": 0.9925752562343636, + "grad_norm": 0.6731893420219421, + "learning_rate": 6.526613185438529e-05, + "loss": 2.4739, + "step": 12299 + }, + { + "epoch": 0.9926559599709467, + "grad_norm": 0.6515606641769409, + "learning_rate": 6.525132828570673e-05, + "loss": 2.5348, + "step": 12300 + }, + { + "epoch": 0.9927366637075297, + "grad_norm": 0.6819963455200195, + "learning_rate": 6.523652558305596e-05, + "loss": 2.5052, + "step": 12301 + }, + { + "epoch": 0.9928173674441126, + "grad_norm": 0.6521475911140442, + "learning_rate": 6.522172374680177e-05, + "loss": 2.5283, + "step": 12302 + }, + { + "epoch": 0.9928980711806956, + "grad_norm": 0.6488186717033386, + "learning_rate": 6.520692277731315e-05, + "loss": 2.4779, + "step": 12303 + }, + { + "epoch": 0.9929787749172787, + "grad_norm": 0.6509760022163391, + "learning_rate": 6.519212267495903e-05, + "loss": 2.5426, + "step": 12304 + }, + { + "epoch": 0.9930594786538617, + "grad_norm": 0.621366560459137, + "learning_rate": 6.517732344010814e-05, + "loss": 2.4804, + "step": 12305 + }, + { + "epoch": 0.9931401823904447, + "grad_norm": 0.6907268166542053, + "learning_rate": 6.516252507312938e-05, + "loss": 2.4883, + "step": 12306 + }, + { + "epoch": 0.9932208861270276, + "grad_norm": 0.7739343643188477, + "learning_rate": 6.514772757439157e-05, + "loss": 2.481, + "step": 12307 + }, + { + "epoch": 0.9933015898636107, + "grad_norm": 0.6794601082801819, + "learning_rate": 6.513293094426352e-05, + "loss": 2.5244, + "step": 12308 + }, + { + "epoch": 0.9933822936001937, + "grad_norm": 0.7189902663230896, + "learning_rate": 6.511813518311394e-05, + "loss": 2.5221, + "step": 12309 + }, + { + "epoch": 0.9934629973367767, + "grad_norm": 0.733318030834198, + "learning_rate": 6.510334029131163e-05, + "loss": 2.521, + "step": 12310 + }, + { + "epoch": 0.9935437010733597, + "grad_norm": 0.7584299445152283, + "learning_rate": 6.508854626922531e-05, + "loss": 2.4962, + "step": 12311 + }, + { + "epoch": 0.9936244048099427, + "grad_norm": 0.6442410349845886, + "learning_rate": 6.507375311722366e-05, + "loss": 2.4775, + "step": 12312 + }, + { + "epoch": 0.9937051085465257, + "grad_norm": 0.6609243154525757, + "learning_rate": 6.505896083567536e-05, + "loss": 2.4706, + "step": 12313 + }, + { + "epoch": 0.9937858122831087, + "grad_norm": 0.6527631878852844, + "learning_rate": 6.504416942494914e-05, + "loss": 2.4612, + "step": 12314 + }, + { + "epoch": 0.9938665160196917, + "grad_norm": 0.6798218488693237, + "learning_rate": 6.502937888541357e-05, + "loss": 2.5502, + "step": 12315 + }, + { + "epoch": 0.9939472197562748, + "grad_norm": 0.6573790907859802, + "learning_rate": 6.501458921743728e-05, + "loss": 2.5598, + "step": 12316 + }, + { + "epoch": 0.9940279234928577, + "grad_norm": 0.6945913434028625, + "learning_rate": 6.49998004213889e-05, + "loss": 2.5323, + "step": 12317 + }, + { + "epoch": 0.9941086272294407, + "grad_norm": 0.7609078288078308, + "learning_rate": 6.498501249763697e-05, + "loss": 2.5211, + "step": 12318 + }, + { + "epoch": 0.9941893309660237, + "grad_norm": 0.6878666281700134, + "learning_rate": 6.497022544655006e-05, + "loss": 2.5366, + "step": 12319 + }, + { + "epoch": 0.9942700347026068, + "grad_norm": 0.6675810813903809, + "learning_rate": 6.495543926849674e-05, + "loss": 2.512, + "step": 12320 + }, + { + "epoch": 0.9943507384391898, + "grad_norm": 0.7285950779914856, + "learning_rate": 6.494065396384544e-05, + "loss": 2.4741, + "step": 12321 + }, + { + "epoch": 0.9944314421757727, + "grad_norm": 0.6287158131599426, + "learning_rate": 6.49258695329647e-05, + "loss": 2.4824, + "step": 12322 + }, + { + "epoch": 0.9945121459123557, + "grad_norm": 0.6506727337837219, + "learning_rate": 6.491108597622296e-05, + "loss": 2.5126, + "step": 12323 + }, + { + "epoch": 0.9945928496489388, + "grad_norm": 0.7679052352905273, + "learning_rate": 6.489630329398869e-05, + "loss": 2.5503, + "step": 12324 + }, + { + "epoch": 0.9946735533855218, + "grad_norm": 0.637184202671051, + "learning_rate": 6.488152148663029e-05, + "loss": 2.5098, + "step": 12325 + }, + { + "epoch": 0.9947542571221047, + "grad_norm": 0.6747186779975891, + "learning_rate": 6.486674055451619e-05, + "loss": 2.5154, + "step": 12326 + }, + { + "epoch": 0.9948349608586877, + "grad_norm": 0.7288245558738708, + "learning_rate": 6.485196049801476e-05, + "loss": 2.5077, + "step": 12327 + }, + { + "epoch": 0.9949156645952708, + "grad_norm": 0.6914251446723938, + "learning_rate": 6.483718131749435e-05, + "loss": 2.4877, + "step": 12328 + }, + { + "epoch": 0.9949963683318538, + "grad_norm": 0.7224392294883728, + "learning_rate": 6.48224030133233e-05, + "loss": 2.4862, + "step": 12329 + }, + { + "epoch": 0.9950770720684368, + "grad_norm": 0.7365561723709106, + "learning_rate": 6.480762558586995e-05, + "loss": 2.477, + "step": 12330 + }, + { + "epoch": 0.9951577758050197, + "grad_norm": 0.7673236131668091, + "learning_rate": 6.47928490355025e-05, + "loss": 2.5423, + "step": 12331 + }, + { + "epoch": 0.9952384795416028, + "grad_norm": 0.6638002395629883, + "learning_rate": 6.477807336258931e-05, + "loss": 2.5007, + "step": 12332 + }, + { + "epoch": 0.9953191832781858, + "grad_norm": 0.6415974497795105, + "learning_rate": 6.476329856749864e-05, + "loss": 2.4924, + "step": 12333 + }, + { + "epoch": 0.9953998870147688, + "grad_norm": 0.7129398584365845, + "learning_rate": 6.474852465059864e-05, + "loss": 2.5313, + "step": 12334 + }, + { + "epoch": 0.9954805907513518, + "grad_norm": 0.6896344423294067, + "learning_rate": 6.473375161225756e-05, + "loss": 2.5073, + "step": 12335 + }, + { + "epoch": 0.9955612944879348, + "grad_norm": 0.7009317874908447, + "learning_rate": 6.47189794528436e-05, + "loss": 2.574, + "step": 12336 + }, + { + "epoch": 0.9956419982245178, + "grad_norm": 0.6555172801017761, + "learning_rate": 6.470420817272488e-05, + "loss": 2.4769, + "step": 12337 + }, + { + "epoch": 0.9957227019611008, + "grad_norm": 0.7569532990455627, + "learning_rate": 6.468943777226954e-05, + "loss": 2.4691, + "step": 12338 + }, + { + "epoch": 0.9958034056976838, + "grad_norm": 0.68092280626297, + "learning_rate": 6.467466825184569e-05, + "loss": 2.4793, + "step": 12339 + }, + { + "epoch": 0.9958841094342669, + "grad_norm": 0.6977378726005554, + "learning_rate": 6.465989961182152e-05, + "loss": 2.4678, + "step": 12340 + }, + { + "epoch": 0.9959648131708498, + "grad_norm": 0.6702281832695007, + "learning_rate": 6.4645131852565e-05, + "loss": 2.5398, + "step": 12341 + }, + { + "epoch": 0.9960455169074328, + "grad_norm": 0.7584038972854614, + "learning_rate": 6.46303649744442e-05, + "loss": 2.5355, + "step": 12342 + }, + { + "epoch": 0.9961262206440158, + "grad_norm": 0.6779505610466003, + "learning_rate": 6.461559897782718e-05, + "loss": 2.4828, + "step": 12343 + }, + { + "epoch": 0.9962069243805988, + "grad_norm": 0.6968233585357666, + "learning_rate": 6.460083386308192e-05, + "loss": 2.5108, + "step": 12344 + }, + { + "epoch": 0.9962876281171819, + "grad_norm": 0.7114594578742981, + "learning_rate": 6.45860696305764e-05, + "loss": 2.5236, + "step": 12345 + }, + { + "epoch": 0.9963683318537648, + "grad_norm": 0.6850530505180359, + "learning_rate": 6.457130628067865e-05, + "loss": 2.458, + "step": 12346 + }, + { + "epoch": 0.9964490355903478, + "grad_norm": 0.7135400772094727, + "learning_rate": 6.455654381375651e-05, + "loss": 2.539, + "step": 12347 + }, + { + "epoch": 0.9965297393269308, + "grad_norm": 0.6736366748809814, + "learning_rate": 6.454178223017797e-05, + "loss": 2.4721, + "step": 12348 + }, + { + "epoch": 0.9966104430635139, + "grad_norm": 0.6806206107139587, + "learning_rate": 6.45270215303109e-05, + "loss": 2.5035, + "step": 12349 + }, + { + "epoch": 0.9966911468000968, + "grad_norm": 0.7120711803436279, + "learning_rate": 6.451226171452318e-05, + "loss": 2.5344, + "step": 12350 + }, + { + "epoch": 0.9967718505366798, + "grad_norm": 0.6865986585617065, + "learning_rate": 6.449750278318264e-05, + "loss": 2.4807, + "step": 12351 + }, + { + "epoch": 0.9968525542732628, + "grad_norm": 0.6461294889450073, + "learning_rate": 6.448274473665717e-05, + "loss": 2.4878, + "step": 12352 + }, + { + "epoch": 0.9969332580098459, + "grad_norm": 0.7090638279914856, + "learning_rate": 6.446798757531454e-05, + "loss": 2.4599, + "step": 12353 + }, + { + "epoch": 0.9970139617464289, + "grad_norm": 0.6933324337005615, + "learning_rate": 6.445323129952252e-05, + "loss": 2.5398, + "step": 12354 + }, + { + "epoch": 0.9970946654830118, + "grad_norm": 0.7018197774887085, + "learning_rate": 6.443847590964888e-05, + "loss": 2.5159, + "step": 12355 + }, + { + "epoch": 0.9971753692195948, + "grad_norm": 0.7292604446411133, + "learning_rate": 6.442372140606145e-05, + "loss": 2.4934, + "step": 12356 + }, + { + "epoch": 0.9972560729561779, + "grad_norm": 0.6686378121376038, + "learning_rate": 6.440896778912783e-05, + "loss": 2.5076, + "step": 12357 + }, + { + "epoch": 0.9973367766927609, + "grad_norm": 0.7194764018058777, + "learning_rate": 6.439421505921576e-05, + "loss": 2.4958, + "step": 12358 + }, + { + "epoch": 0.9974174804293439, + "grad_norm": 0.662467360496521, + "learning_rate": 6.437946321669296e-05, + "loss": 2.5202, + "step": 12359 + }, + { + "epoch": 0.9974981841659268, + "grad_norm": 0.7222515940666199, + "learning_rate": 6.436471226192703e-05, + "loss": 2.5058, + "step": 12360 + }, + { + "epoch": 0.9975788879025099, + "grad_norm": 0.6354855895042419, + "learning_rate": 6.434996219528562e-05, + "loss": 2.4849, + "step": 12361 + }, + { + "epoch": 0.9976595916390929, + "grad_norm": 0.7689539790153503, + "learning_rate": 6.433521301713636e-05, + "loss": 2.4959, + "step": 12362 + }, + { + "epoch": 0.9977402953756759, + "grad_norm": 0.6894338130950928, + "learning_rate": 6.43204647278468e-05, + "loss": 2.5098, + "step": 12363 + }, + { + "epoch": 0.9978209991122589, + "grad_norm": 0.7694165110588074, + "learning_rate": 6.430571732778451e-05, + "loss": 2.513, + "step": 12364 + }, + { + "epoch": 0.9979017028488419, + "grad_norm": 0.6512044668197632, + "learning_rate": 6.42909708173171e-05, + "loss": 2.4785, + "step": 12365 + }, + { + "epoch": 0.9979824065854249, + "grad_norm": 0.6605672836303711, + "learning_rate": 6.427622519681201e-05, + "loss": 2.4804, + "step": 12366 + }, + { + "epoch": 0.9980631103220079, + "grad_norm": 0.7123624086380005, + "learning_rate": 6.426148046663677e-05, + "loss": 2.4854, + "step": 12367 + }, + { + "epoch": 0.9981438140585909, + "grad_norm": 0.662645697593689, + "learning_rate": 6.424673662715886e-05, + "loss": 2.5314, + "step": 12368 + }, + { + "epoch": 0.998224517795174, + "grad_norm": 0.6482149362564087, + "learning_rate": 6.423199367874573e-05, + "loss": 2.4492, + "step": 12369 + }, + { + "epoch": 0.9983052215317569, + "grad_norm": 0.6545752286911011, + "learning_rate": 6.421725162176482e-05, + "loss": 2.5042, + "step": 12370 + }, + { + "epoch": 0.9983859252683399, + "grad_norm": 0.6698874235153198, + "learning_rate": 6.420251045658353e-05, + "loss": 2.4523, + "step": 12371 + }, + { + "epoch": 0.9984666290049229, + "grad_norm": 0.6961477398872375, + "learning_rate": 6.418777018356929e-05, + "loss": 2.556, + "step": 12372 + }, + { + "epoch": 0.998547332741506, + "grad_norm": 0.67090904712677, + "learning_rate": 6.41730308030894e-05, + "loss": 2.5237, + "step": 12373 + }, + { + "epoch": 0.998628036478089, + "grad_norm": 0.6828685402870178, + "learning_rate": 6.415829231551124e-05, + "loss": 2.453, + "step": 12374 + }, + { + "epoch": 0.9987087402146719, + "grad_norm": 0.6699565649032593, + "learning_rate": 6.414355472120213e-05, + "loss": 2.4632, + "step": 12375 + }, + { + "epoch": 0.9987894439512549, + "grad_norm": 0.6918730735778809, + "learning_rate": 6.412881802052936e-05, + "loss": 2.4532, + "step": 12376 + }, + { + "epoch": 0.998870147687838, + "grad_norm": 0.7222442030906677, + "learning_rate": 6.411408221386021e-05, + "loss": 2.5113, + "step": 12377 + }, + { + "epoch": 0.998950851424421, + "grad_norm": 0.7479627132415771, + "learning_rate": 6.409934730156195e-05, + "loss": 2.4857, + "step": 12378 + }, + { + "epoch": 0.999031555161004, + "grad_norm": 0.6552882194519043, + "learning_rate": 6.40846132840018e-05, + "loss": 2.4816, + "step": 12379 + }, + { + "epoch": 0.9991122588975869, + "grad_norm": 0.5990073084831238, + "learning_rate": 6.406988016154694e-05, + "loss": 2.4753, + "step": 12380 + }, + { + "epoch": 0.99919296263417, + "grad_norm": 0.6671901941299438, + "learning_rate": 6.405514793456465e-05, + "loss": 2.5298, + "step": 12381 + }, + { + "epoch": 0.999273666370753, + "grad_norm": 0.6630427241325378, + "learning_rate": 6.4040416603422e-05, + "loss": 2.485, + "step": 12382 + }, + { + "epoch": 0.999354370107336, + "grad_norm": 0.6873636841773987, + "learning_rate": 6.402568616848614e-05, + "loss": 2.4902, + "step": 12383 + }, + { + "epoch": 0.9994350738439189, + "grad_norm": 0.6912413239479065, + "learning_rate": 6.401095663012424e-05, + "loss": 2.5339, + "step": 12384 + }, + { + "epoch": 0.999515777580502, + "grad_norm": 0.6491912603378296, + "learning_rate": 6.39962279887034e-05, + "loss": 2.5367, + "step": 12385 + }, + { + "epoch": 0.999596481317085, + "grad_norm": 0.6668288111686707, + "learning_rate": 6.398150024459065e-05, + "loss": 2.5294, + "step": 12386 + }, + { + "epoch": 0.999677185053668, + "grad_norm": 0.6603856086730957, + "learning_rate": 6.396677339815306e-05, + "loss": 2.4378, + "step": 12387 + }, + { + "epoch": 0.999757888790251, + "grad_norm": 0.6461218595504761, + "learning_rate": 6.395204744975772e-05, + "loss": 2.4835, + "step": 12388 + }, + { + "epoch": 0.999838592526834, + "grad_norm": 0.6621688604354858, + "learning_rate": 6.39373223997715e-05, + "loss": 2.4834, + "step": 12389 + }, + { + "epoch": 0.999919296263417, + "grad_norm": 0.6758724451065063, + "learning_rate": 6.392259824856153e-05, + "loss": 2.4549, + "step": 12390 + }, + { + "epoch": 1.0, + "grad_norm": 1.1304112672805786, + "learning_rate": 6.390787499649473e-05, + "loss": 2.5547, + "step": 12391 + }, + { + "epoch": 1.000080703736583, + "grad_norm": 0.6919478178024292, + "learning_rate": 6.389315264393801e-05, + "loss": 2.47, + "step": 12392 + }, + { + "epoch": 1.000161407473166, + "grad_norm": 0.6916815638542175, + "learning_rate": 6.38784311912583e-05, + "loss": 2.4636, + "step": 12393 + }, + { + "epoch": 1.000242111209749, + "grad_norm": 0.6627040505409241, + "learning_rate": 6.386371063882252e-05, + "loss": 2.5094, + "step": 12394 + }, + { + "epoch": 1.000322814946332, + "grad_norm": 0.6408648490905762, + "learning_rate": 6.384899098699754e-05, + "loss": 2.426, + "step": 12395 + }, + { + "epoch": 1.000403518682915, + "grad_norm": 0.70432448387146, + "learning_rate": 6.38342722361502e-05, + "loss": 2.4861, + "step": 12396 + }, + { + "epoch": 1.000484222419498, + "grad_norm": 0.7115964889526367, + "learning_rate": 6.381955438664735e-05, + "loss": 2.4824, + "step": 12397 + }, + { + "epoch": 1.000564926156081, + "grad_norm": 0.6547040939331055, + "learning_rate": 6.380483743885574e-05, + "loss": 2.488, + "step": 12398 + }, + { + "epoch": 1.000645629892664, + "grad_norm": 0.6916625499725342, + "learning_rate": 6.379012139314223e-05, + "loss": 2.4864, + "step": 12399 + }, + { + "epoch": 1.0007263336292471, + "grad_norm": 0.6311133503913879, + "learning_rate": 6.377540624987352e-05, + "loss": 2.4672, + "step": 12400 + }, + { + "epoch": 1.00080703736583, + "grad_norm": 0.7115580439567566, + "learning_rate": 6.376069200941642e-05, + "loss": 2.4359, + "step": 12401 + }, + { + "epoch": 1.000887741102413, + "grad_norm": 0.6734051704406738, + "learning_rate": 6.374597867213756e-05, + "loss": 2.4896, + "step": 12402 + }, + { + "epoch": 1.000968444838996, + "grad_norm": 0.6910715699195862, + "learning_rate": 6.373126623840368e-05, + "loss": 2.4502, + "step": 12403 + }, + { + "epoch": 1.001049148575579, + "grad_norm": 0.6807514429092407, + "learning_rate": 6.37165547085815e-05, + "loss": 2.4791, + "step": 12404 + }, + { + "epoch": 1.0011298523121621, + "grad_norm": 0.679350733757019, + "learning_rate": 6.370184408303759e-05, + "loss": 2.4758, + "step": 12405 + }, + { + "epoch": 1.001210556048745, + "grad_norm": 0.6516300439834595, + "learning_rate": 6.36871343621386e-05, + "loss": 2.4338, + "step": 12406 + }, + { + "epoch": 1.001291259785328, + "grad_norm": 0.7033620476722717, + "learning_rate": 6.367242554625119e-05, + "loss": 2.429, + "step": 12407 + }, + { + "epoch": 1.0013719635219112, + "grad_norm": 0.6750274896621704, + "learning_rate": 6.365771763574186e-05, + "loss": 2.4283, + "step": 12408 + }, + { + "epoch": 1.001452667258494, + "grad_norm": 0.7188721895217896, + "learning_rate": 6.364301063097722e-05, + "loss": 2.4509, + "step": 12409 + }, + { + "epoch": 1.001533370995077, + "grad_norm": 0.6936308741569519, + "learning_rate": 6.362830453232379e-05, + "loss": 2.4469, + "step": 12410 + }, + { + "epoch": 1.00161407473166, + "grad_norm": 0.673060953617096, + "learning_rate": 6.361359934014808e-05, + "loss": 2.4444, + "step": 12411 + }, + { + "epoch": 1.001694778468243, + "grad_norm": 0.7465113997459412, + "learning_rate": 6.359889505481658e-05, + "loss": 2.4376, + "step": 12412 + }, + { + "epoch": 1.0017754822048262, + "grad_norm": 0.7180366516113281, + "learning_rate": 6.358419167669582e-05, + "loss": 2.4223, + "step": 12413 + }, + { + "epoch": 1.001856185941409, + "grad_norm": 0.6582302451133728, + "learning_rate": 6.356948920615214e-05, + "loss": 2.4723, + "step": 12414 + }, + { + "epoch": 1.001936889677992, + "grad_norm": 0.6452654600143433, + "learning_rate": 6.3554787643552e-05, + "loss": 2.4609, + "step": 12415 + }, + { + "epoch": 1.0020175934145752, + "grad_norm": 0.7170321345329285, + "learning_rate": 6.354008698926185e-05, + "loss": 2.5377, + "step": 12416 + }, + { + "epoch": 1.002098297151158, + "grad_norm": 0.6483680605888367, + "learning_rate": 6.352538724364809e-05, + "loss": 2.4349, + "step": 12417 + }, + { + "epoch": 1.0021790008877411, + "grad_norm": 0.6567494869232178, + "learning_rate": 6.351068840707697e-05, + "loss": 2.4421, + "step": 12418 + }, + { + "epoch": 1.002259704624324, + "grad_norm": 0.7498565912246704, + "learning_rate": 6.349599047991488e-05, + "loss": 2.4212, + "step": 12419 + }, + { + "epoch": 1.002340408360907, + "grad_norm": 0.6894906759262085, + "learning_rate": 6.348129346252816e-05, + "loss": 2.4356, + "step": 12420 + }, + { + "epoch": 1.0024211120974902, + "grad_norm": 0.657361626625061, + "learning_rate": 6.346659735528304e-05, + "loss": 2.4164, + "step": 12421 + }, + { + "epoch": 1.002501815834073, + "grad_norm": 0.6369211673736572, + "learning_rate": 6.345190215854581e-05, + "loss": 2.4229, + "step": 12422 + }, + { + "epoch": 1.0025825195706561, + "grad_norm": 0.7033721208572388, + "learning_rate": 6.343720787268277e-05, + "loss": 2.5052, + "step": 12423 + }, + { + "epoch": 1.0026632233072392, + "grad_norm": 0.7125518918037415, + "learning_rate": 6.342251449806003e-05, + "loss": 2.514, + "step": 12424 + }, + { + "epoch": 1.002743927043822, + "grad_norm": 0.7355595827102661, + "learning_rate": 6.340782203504385e-05, + "loss": 2.4459, + "step": 12425 + }, + { + "epoch": 1.0028246307804052, + "grad_norm": 0.7244594693183899, + "learning_rate": 6.339313048400042e-05, + "loss": 2.452, + "step": 12426 + }, + { + "epoch": 1.002905334516988, + "grad_norm": 0.7112728357315063, + "learning_rate": 6.337843984529585e-05, + "loss": 2.4951, + "step": 12427 + }, + { + "epoch": 1.0029860382535711, + "grad_norm": 0.7235615849494934, + "learning_rate": 6.336375011929628e-05, + "loss": 2.4697, + "step": 12428 + }, + { + "epoch": 1.0030667419901542, + "grad_norm": 0.653865396976471, + "learning_rate": 6.334906130636784e-05, + "loss": 2.4804, + "step": 12429 + }, + { + "epoch": 1.003147445726737, + "grad_norm": 0.7845149636268616, + "learning_rate": 6.33343734068766e-05, + "loss": 2.5415, + "step": 12430 + }, + { + "epoch": 1.0032281494633202, + "grad_norm": 0.7356342077255249, + "learning_rate": 6.33196864211886e-05, + "loss": 2.5321, + "step": 12431 + }, + { + "epoch": 1.0033088531999033, + "grad_norm": 0.6828265190124512, + "learning_rate": 6.330500034966991e-05, + "loss": 2.3849, + "step": 12432 + }, + { + "epoch": 1.0033895569364861, + "grad_norm": 0.7226579189300537, + "learning_rate": 6.329031519268658e-05, + "loss": 2.512, + "step": 12433 + }, + { + "epoch": 1.0034702606730692, + "grad_norm": 0.6490235924720764, + "learning_rate": 6.327563095060449e-05, + "loss": 2.487, + "step": 12434 + }, + { + "epoch": 1.003550964409652, + "grad_norm": 0.6889309883117676, + "learning_rate": 6.326094762378969e-05, + "loss": 2.4677, + "step": 12435 + }, + { + "epoch": 1.0036316681462352, + "grad_norm": 0.695854127407074, + "learning_rate": 6.324626521260815e-05, + "loss": 2.4362, + "step": 12436 + }, + { + "epoch": 1.0037123718828183, + "grad_norm": 0.7045256495475769, + "learning_rate": 6.32315837174257e-05, + "loss": 2.4307, + "step": 12437 + }, + { + "epoch": 1.0037930756194011, + "grad_norm": 0.662604570388794, + "learning_rate": 6.321690313860833e-05, + "loss": 2.4271, + "step": 12438 + }, + { + "epoch": 1.0038737793559842, + "grad_norm": 0.7682240009307861, + "learning_rate": 6.320222347652191e-05, + "loss": 2.4617, + "step": 12439 + }, + { + "epoch": 1.0039544830925673, + "grad_norm": 0.6599584817886353, + "learning_rate": 6.318754473153221e-05, + "loss": 2.405, + "step": 12440 + }, + { + "epoch": 1.0040351868291502, + "grad_norm": 0.7423116564750671, + "learning_rate": 6.317286690400515e-05, + "loss": 2.5496, + "step": 12441 + }, + { + "epoch": 1.0041158905657332, + "grad_norm": 0.6928953528404236, + "learning_rate": 6.315818999430654e-05, + "loss": 2.4265, + "step": 12442 + }, + { + "epoch": 1.0041965943023161, + "grad_norm": 0.699990451335907, + "learning_rate": 6.314351400280211e-05, + "loss": 2.4747, + "step": 12443 + }, + { + "epoch": 1.0042772980388992, + "grad_norm": 0.673384964466095, + "learning_rate": 6.312883892985765e-05, + "loss": 2.4891, + "step": 12444 + }, + { + "epoch": 1.0043580017754823, + "grad_norm": 0.6668596863746643, + "learning_rate": 6.311416477583893e-05, + "loss": 2.4312, + "step": 12445 + }, + { + "epoch": 1.0044387055120652, + "grad_norm": 0.6931218504905701, + "learning_rate": 6.309949154111163e-05, + "loss": 2.4907, + "step": 12446 + }, + { + "epoch": 1.0045194092486482, + "grad_norm": 0.687683641910553, + "learning_rate": 6.308481922604146e-05, + "loss": 2.4302, + "step": 12447 + }, + { + "epoch": 1.004600112985231, + "grad_norm": 0.6887302398681641, + "learning_rate": 6.30701478309941e-05, + "loss": 2.4749, + "step": 12448 + }, + { + "epoch": 1.0046808167218142, + "grad_norm": 0.6713404655456543, + "learning_rate": 6.305547735633522e-05, + "loss": 2.5046, + "step": 12449 + }, + { + "epoch": 1.0047615204583973, + "grad_norm": 0.7147336006164551, + "learning_rate": 6.304080780243038e-05, + "loss": 2.4578, + "step": 12450 + }, + { + "epoch": 1.0048422241949801, + "grad_norm": 0.87425297498703, + "learning_rate": 6.30261391696452e-05, + "loss": 2.4487, + "step": 12451 + }, + { + "epoch": 1.0049229279315632, + "grad_norm": 0.6641440987586975, + "learning_rate": 6.301147145834534e-05, + "loss": 2.4657, + "step": 12452 + }, + { + "epoch": 1.0050036316681463, + "grad_norm": 0.7311998009681702, + "learning_rate": 6.299680466889626e-05, + "loss": 2.4784, + "step": 12453 + }, + { + "epoch": 1.0050843354047292, + "grad_norm": 0.6722697615623474, + "learning_rate": 6.298213880166354e-05, + "loss": 2.4653, + "step": 12454 + }, + { + "epoch": 1.0051650391413123, + "grad_norm": 0.6886328458786011, + "learning_rate": 6.29674738570127e-05, + "loss": 2.3949, + "step": 12455 + }, + { + "epoch": 1.0052457428778951, + "grad_norm": 0.684688925743103, + "learning_rate": 6.295280983530921e-05, + "loss": 2.4334, + "step": 12456 + }, + { + "epoch": 1.0053264466144782, + "grad_norm": 0.7436798214912415, + "learning_rate": 6.293814673691853e-05, + "loss": 2.5316, + "step": 12457 + }, + { + "epoch": 1.0054071503510613, + "grad_norm": 0.7401304244995117, + "learning_rate": 6.292348456220615e-05, + "loss": 2.4556, + "step": 12458 + }, + { + "epoch": 1.0054878540876442, + "grad_norm": 0.7330329418182373, + "learning_rate": 6.290882331153742e-05, + "loss": 2.4321, + "step": 12459 + }, + { + "epoch": 1.0055685578242273, + "grad_norm": 0.8005052208900452, + "learning_rate": 6.289416298527776e-05, + "loss": 2.415, + "step": 12460 + }, + { + "epoch": 1.0056492615608104, + "grad_norm": 0.8047310709953308, + "learning_rate": 6.28795035837926e-05, + "loss": 2.4144, + "step": 12461 + }, + { + "epoch": 1.0057299652973932, + "grad_norm": 0.7384032011032104, + "learning_rate": 6.28648451074472e-05, + "loss": 2.5237, + "step": 12462 + }, + { + "epoch": 1.0058106690339763, + "grad_norm": 0.7240314483642578, + "learning_rate": 6.285018755660695e-05, + "loss": 2.4894, + "step": 12463 + }, + { + "epoch": 1.0058913727705592, + "grad_norm": 0.6901080012321472, + "learning_rate": 6.283553093163712e-05, + "loss": 2.4244, + "step": 12464 + }, + { + "epoch": 1.0059720765071423, + "grad_norm": 0.6572268605232239, + "learning_rate": 6.282087523290304e-05, + "loss": 2.456, + "step": 12465 + }, + { + "epoch": 1.0060527802437254, + "grad_norm": 0.7207481861114502, + "learning_rate": 6.28062204607699e-05, + "loss": 2.4153, + "step": 12466 + }, + { + "epoch": 1.0061334839803082, + "grad_norm": 0.6901980042457581, + "learning_rate": 6.279156661560299e-05, + "loss": 2.4776, + "step": 12467 + }, + { + "epoch": 1.0062141877168913, + "grad_norm": 0.7003545761108398, + "learning_rate": 6.277691369776752e-05, + "loss": 2.4206, + "step": 12468 + }, + { + "epoch": 1.0062948914534744, + "grad_norm": 0.6978366374969482, + "learning_rate": 6.276226170762865e-05, + "loss": 2.3866, + "step": 12469 + }, + { + "epoch": 1.0063755951900573, + "grad_norm": 0.6763097643852234, + "learning_rate": 6.274761064555154e-05, + "loss": 2.5439, + "step": 12470 + }, + { + "epoch": 1.0064562989266403, + "grad_norm": 0.7146836519241333, + "learning_rate": 6.273296051190139e-05, + "loss": 2.5486, + "step": 12471 + }, + { + "epoch": 1.0065370026632232, + "grad_norm": 0.7448136806488037, + "learning_rate": 6.271831130704326e-05, + "loss": 2.4539, + "step": 12472 + }, + { + "epoch": 1.0066177063998063, + "grad_norm": 0.6918472051620483, + "learning_rate": 6.270366303134226e-05, + "loss": 2.4756, + "step": 12473 + }, + { + "epoch": 1.0066984101363894, + "grad_norm": 0.7067514657974243, + "learning_rate": 6.26890156851635e-05, + "loss": 2.4925, + "step": 12474 + }, + { + "epoch": 1.0067791138729723, + "grad_norm": 0.6517517566680908, + "learning_rate": 6.267436926887197e-05, + "loss": 2.4339, + "step": 12475 + }, + { + "epoch": 1.0068598176095553, + "grad_norm": 0.673367977142334, + "learning_rate": 6.265972378283274e-05, + "loss": 2.416, + "step": 12476 + }, + { + "epoch": 1.0069405213461384, + "grad_norm": 0.7190212607383728, + "learning_rate": 6.26450792274108e-05, + "loss": 2.4822, + "step": 12477 + }, + { + "epoch": 1.0070212250827213, + "grad_norm": 0.7568029165267944, + "learning_rate": 6.263043560297112e-05, + "loss": 2.4607, + "step": 12478 + }, + { + "epoch": 1.0071019288193044, + "grad_norm": 0.6860609650611877, + "learning_rate": 6.261579290987866e-05, + "loss": 2.4429, + "step": 12479 + }, + { + "epoch": 1.0071826325558872, + "grad_norm": 0.7066059112548828, + "learning_rate": 6.260115114849839e-05, + "loss": 2.5504, + "step": 12480 + }, + { + "epoch": 1.0072633362924703, + "grad_norm": 0.6857946515083313, + "learning_rate": 6.25865103191952e-05, + "loss": 2.4776, + "step": 12481 + }, + { + "epoch": 1.0073440400290534, + "grad_norm": 0.6879859566688538, + "learning_rate": 6.257187042233396e-05, + "loss": 2.3651, + "step": 12482 + }, + { + "epoch": 1.0074247437656363, + "grad_norm": 0.6900867223739624, + "learning_rate": 6.255723145827954e-05, + "loss": 2.4644, + "step": 12483 + }, + { + "epoch": 1.0075054475022194, + "grad_norm": 0.7144716382026672, + "learning_rate": 6.254259342739683e-05, + "loss": 2.4219, + "step": 12484 + }, + { + "epoch": 1.0075861512388025, + "grad_norm": 0.674619197845459, + "learning_rate": 6.252795633005056e-05, + "loss": 2.5038, + "step": 12485 + }, + { + "epoch": 1.0076668549753853, + "grad_norm": 0.7036965489387512, + "learning_rate": 6.251332016660558e-05, + "loss": 2.4784, + "step": 12486 + }, + { + "epoch": 1.0077475587119684, + "grad_norm": 0.7046369910240173, + "learning_rate": 6.249868493742668e-05, + "loss": 2.514, + "step": 12487 + }, + { + "epoch": 1.0078282624485513, + "grad_norm": 0.6933087110519409, + "learning_rate": 6.248405064287854e-05, + "loss": 2.4855, + "step": 12488 + }, + { + "epoch": 1.0079089661851344, + "grad_norm": 0.7210546731948853, + "learning_rate": 6.246941728332594e-05, + "loss": 2.5101, + "step": 12489 + }, + { + "epoch": 1.0079896699217175, + "grad_norm": 0.6738288402557373, + "learning_rate": 6.245478485913361e-05, + "loss": 2.4891, + "step": 12490 + }, + { + "epoch": 1.0080703736583003, + "grad_norm": 0.7023273706436157, + "learning_rate": 6.244015337066611e-05, + "loss": 2.4977, + "step": 12491 + }, + { + "epoch": 1.0081510773948834, + "grad_norm": 0.6761355996131897, + "learning_rate": 6.24255228182882e-05, + "loss": 2.4948, + "step": 12492 + }, + { + "epoch": 1.0082317811314665, + "grad_norm": 0.6427976489067078, + "learning_rate": 6.241089320236448e-05, + "loss": 2.466, + "step": 12493 + }, + { + "epoch": 1.0083124848680494, + "grad_norm": 0.6907719969749451, + "learning_rate": 6.23962645232596e-05, + "loss": 2.437, + "step": 12494 + }, + { + "epoch": 1.0083931886046325, + "grad_norm": 0.709032416343689, + "learning_rate": 6.238163678133807e-05, + "loss": 2.4298, + "step": 12495 + }, + { + "epoch": 1.0084738923412153, + "grad_norm": 0.7395734786987305, + "learning_rate": 6.236700997696448e-05, + "loss": 2.4502, + "step": 12496 + }, + { + "epoch": 1.0085545960777984, + "grad_norm": 0.6535435914993286, + "learning_rate": 6.23523841105034e-05, + "loss": 2.4494, + "step": 12497 + }, + { + "epoch": 1.0086352998143815, + "grad_norm": 0.6597761511802673, + "learning_rate": 6.23377591823193e-05, + "loss": 2.4377, + "step": 12498 + }, + { + "epoch": 1.0087160035509644, + "grad_norm": 0.6610515713691711, + "learning_rate": 6.232313519277668e-05, + "loss": 2.4328, + "step": 12499 + }, + { + "epoch": 1.0087967072875474, + "grad_norm": 0.6785424947738647, + "learning_rate": 6.230851214224009e-05, + "loss": 2.457, + "step": 12500 + }, + { + "epoch": 1.0088774110241303, + "grad_norm": 0.6939748525619507, + "learning_rate": 6.229389003107383e-05, + "loss": 2.383, + "step": 12501 + }, + { + "epoch": 1.0089581147607134, + "grad_norm": 0.7592256665229797, + "learning_rate": 6.22792688596424e-05, + "loss": 2.4665, + "step": 12502 + }, + { + "epoch": 1.0090388184972965, + "grad_norm": 0.6751298308372498, + "learning_rate": 6.226464862831023e-05, + "loss": 2.491, + "step": 12503 + }, + { + "epoch": 1.0091195222338794, + "grad_norm": 0.682771623134613, + "learning_rate": 6.225002933744164e-05, + "loss": 2.4275, + "step": 12504 + }, + { + "epoch": 1.0092002259704624, + "grad_norm": 0.7314651608467102, + "learning_rate": 6.223541098740098e-05, + "loss": 2.4489, + "step": 12505 + }, + { + "epoch": 1.0092809297070455, + "grad_norm": 0.7132120132446289, + "learning_rate": 6.222079357855261e-05, + "loss": 2.4819, + "step": 12506 + }, + { + "epoch": 1.0093616334436284, + "grad_norm": 0.6571424007415771, + "learning_rate": 6.220617711126082e-05, + "loss": 2.455, + "step": 12507 + }, + { + "epoch": 1.0094423371802115, + "grad_norm": 0.7675301432609558, + "learning_rate": 6.21915615858899e-05, + "loss": 2.5282, + "step": 12508 + }, + { + "epoch": 1.0095230409167943, + "grad_norm": 0.6907868385314941, + "learning_rate": 6.217694700280408e-05, + "loss": 2.4639, + "step": 12509 + }, + { + "epoch": 1.0096037446533774, + "grad_norm": 0.7223815321922302, + "learning_rate": 6.216233336236764e-05, + "loss": 2.4682, + "step": 12510 + }, + { + "epoch": 1.0096844483899605, + "grad_norm": 0.7325109839439392, + "learning_rate": 6.214772066494474e-05, + "loss": 2.4591, + "step": 12511 + }, + { + "epoch": 1.0097651521265434, + "grad_norm": 0.6589400768280029, + "learning_rate": 6.213310891089957e-05, + "loss": 2.4883, + "step": 12512 + }, + { + "epoch": 1.0098458558631265, + "grad_norm": 0.6692262291908264, + "learning_rate": 6.211849810059635e-05, + "loss": 2.4635, + "step": 12513 + }, + { + "epoch": 1.0099265595997096, + "grad_norm": 0.7352520823478699, + "learning_rate": 6.210388823439914e-05, + "loss": 2.4743, + "step": 12514 + }, + { + "epoch": 1.0100072633362924, + "grad_norm": 0.6631996035575867, + "learning_rate": 6.208927931267212e-05, + "loss": 2.4848, + "step": 12515 + }, + { + "epoch": 1.0100879670728755, + "grad_norm": 0.6985767483711243, + "learning_rate": 6.207467133577937e-05, + "loss": 2.5044, + "step": 12516 + }, + { + "epoch": 1.0101686708094584, + "grad_norm": 0.665635347366333, + "learning_rate": 6.206006430408494e-05, + "loss": 2.4718, + "step": 12517 + }, + { + "epoch": 1.0102493745460415, + "grad_norm": 0.6859133243560791, + "learning_rate": 6.204545821795286e-05, + "loss": 2.4702, + "step": 12518 + }, + { + "epoch": 1.0103300782826246, + "grad_norm": 0.6578841805458069, + "learning_rate": 6.203085307774722e-05, + "loss": 2.4614, + "step": 12519 + }, + { + "epoch": 1.0104107820192074, + "grad_norm": 0.717523455619812, + "learning_rate": 6.201624888383194e-05, + "loss": 2.4412, + "step": 12520 + }, + { + "epoch": 1.0104914857557905, + "grad_norm": 0.7333831787109375, + "learning_rate": 6.200164563657103e-05, + "loss": 2.4157, + "step": 12521 + }, + { + "epoch": 1.0105721894923736, + "grad_norm": 0.6968720555305481, + "learning_rate": 6.198704333632845e-05, + "loss": 2.4556, + "step": 12522 + }, + { + "epoch": 1.0106528932289565, + "grad_norm": 0.6533070802688599, + "learning_rate": 6.19724419834681e-05, + "loss": 2.43, + "step": 12523 + }, + { + "epoch": 1.0107335969655395, + "grad_norm": 0.7341824769973755, + "learning_rate": 6.195784157835391e-05, + "loss": 2.5326, + "step": 12524 + }, + { + "epoch": 1.0108143007021224, + "grad_norm": 0.752912163734436, + "learning_rate": 6.194324212134974e-05, + "loss": 2.4282, + "step": 12525 + }, + { + "epoch": 1.0108950044387055, + "grad_norm": 0.6538611650466919, + "learning_rate": 6.192864361281951e-05, + "loss": 2.4135, + "step": 12526 + }, + { + "epoch": 1.0109757081752886, + "grad_norm": 0.6931454539299011, + "learning_rate": 6.191404605312695e-05, + "loss": 2.5097, + "step": 12527 + }, + { + "epoch": 1.0110564119118715, + "grad_norm": 0.6317688822746277, + "learning_rate": 6.18994494426359e-05, + "loss": 2.4977, + "step": 12528 + }, + { + "epoch": 1.0111371156484545, + "grad_norm": 0.6793715953826904, + "learning_rate": 6.188485378171024e-05, + "loss": 2.4619, + "step": 12529 + }, + { + "epoch": 1.0112178193850376, + "grad_norm": 0.6696654558181763, + "learning_rate": 6.187025907071361e-05, + "loss": 2.4658, + "step": 12530 + }, + { + "epoch": 1.0112985231216205, + "grad_norm": 0.6788807511329651, + "learning_rate": 6.185566531000979e-05, + "loss": 2.4793, + "step": 12531 + }, + { + "epoch": 1.0113792268582036, + "grad_norm": 0.6933971643447876, + "learning_rate": 6.184107249996253e-05, + "loss": 2.4772, + "step": 12532 + }, + { + "epoch": 1.0114599305947864, + "grad_norm": 0.6866000294685364, + "learning_rate": 6.182648064093546e-05, + "loss": 2.428, + "step": 12533 + }, + { + "epoch": 1.0115406343313695, + "grad_norm": 0.7013841271400452, + "learning_rate": 6.181188973329229e-05, + "loss": 2.5273, + "step": 12534 + }, + { + "epoch": 1.0116213380679526, + "grad_norm": 0.6569108963012695, + "learning_rate": 6.179729977739669e-05, + "loss": 2.4125, + "step": 12535 + }, + { + "epoch": 1.0117020418045355, + "grad_norm": 0.7503486275672913, + "learning_rate": 6.17827107736122e-05, + "loss": 2.4385, + "step": 12536 + }, + { + "epoch": 1.0117827455411186, + "grad_norm": 0.6757314205169678, + "learning_rate": 6.176812272230246e-05, + "loss": 2.4364, + "step": 12537 + }, + { + "epoch": 1.0118634492777017, + "grad_norm": 0.6567254662513733, + "learning_rate": 6.175353562383106e-05, + "loss": 2.4992, + "step": 12538 + }, + { + "epoch": 1.0119441530142845, + "grad_norm": 0.7564988732337952, + "learning_rate": 6.17389494785615e-05, + "loss": 2.4777, + "step": 12539 + }, + { + "epoch": 1.0120248567508676, + "grad_norm": 0.6972391605377197, + "learning_rate": 6.172436428685735e-05, + "loss": 2.5041, + "step": 12540 + }, + { + "epoch": 1.0121055604874505, + "grad_norm": 0.6861580610275269, + "learning_rate": 6.170978004908209e-05, + "loss": 2.4684, + "step": 12541 + }, + { + "epoch": 1.0121862642240336, + "grad_norm": 0.6621903777122498, + "learning_rate": 6.169519676559921e-05, + "loss": 2.4614, + "step": 12542 + }, + { + "epoch": 1.0122669679606167, + "grad_norm": 0.6879795789718628, + "learning_rate": 6.168061443677215e-05, + "loss": 2.4765, + "step": 12543 + }, + { + "epoch": 1.0123476716971995, + "grad_norm": 0.6361081004142761, + "learning_rate": 6.166603306296434e-05, + "loss": 2.4792, + "step": 12544 + }, + { + "epoch": 1.0124283754337826, + "grad_norm": 0.6660729050636292, + "learning_rate": 6.165145264453924e-05, + "loss": 2.489, + "step": 12545 + }, + { + "epoch": 1.0125090791703655, + "grad_norm": 0.6900594234466553, + "learning_rate": 6.163687318186015e-05, + "loss": 2.4543, + "step": 12546 + }, + { + "epoch": 1.0125897829069486, + "grad_norm": 0.7195869088172913, + "learning_rate": 6.162229467529046e-05, + "loss": 2.4137, + "step": 12547 + }, + { + "epoch": 1.0126704866435317, + "grad_norm": 0.7030326128005981, + "learning_rate": 6.16077171251935e-05, + "loss": 2.4657, + "step": 12548 + }, + { + "epoch": 1.0127511903801145, + "grad_norm": 0.6712052822113037, + "learning_rate": 6.15931405319326e-05, + "loss": 2.4718, + "step": 12549 + }, + { + "epoch": 1.0128318941166976, + "grad_norm": 0.7471029162406921, + "learning_rate": 6.157856489587102e-05, + "loss": 2.4705, + "step": 12550 + }, + { + "epoch": 1.0129125978532807, + "grad_norm": 0.6813762187957764, + "learning_rate": 6.15639902173721e-05, + "loss": 2.4479, + "step": 12551 + }, + { + "epoch": 1.0129933015898636, + "grad_norm": 0.6657249927520752, + "learning_rate": 6.154941649679894e-05, + "loss": 2.4911, + "step": 12552 + }, + { + "epoch": 1.0130740053264466, + "grad_norm": 0.6700132489204407, + "learning_rate": 6.153484373451483e-05, + "loss": 2.4962, + "step": 12553 + }, + { + "epoch": 1.0131547090630295, + "grad_norm": 0.7058695554733276, + "learning_rate": 6.152027193088302e-05, + "loss": 2.3935, + "step": 12554 + }, + { + "epoch": 1.0132354127996126, + "grad_norm": 0.7390396595001221, + "learning_rate": 6.150570108626658e-05, + "loss": 2.4454, + "step": 12555 + }, + { + "epoch": 1.0133161165361957, + "grad_norm": 0.7251414060592651, + "learning_rate": 6.149113120102869e-05, + "loss": 2.4146, + "step": 12556 + }, + { + "epoch": 1.0133968202727786, + "grad_norm": 0.8262537717819214, + "learning_rate": 6.14765622755325e-05, + "loss": 2.4638, + "step": 12557 + }, + { + "epoch": 1.0134775240093616, + "grad_norm": 0.7184064984321594, + "learning_rate": 6.146199431014106e-05, + "loss": 2.3958, + "step": 12558 + }, + { + "epoch": 1.0135582277459447, + "grad_norm": 0.7544865012168884, + "learning_rate": 6.144742730521746e-05, + "loss": 2.4662, + "step": 12559 + }, + { + "epoch": 1.0136389314825276, + "grad_norm": 0.6866207718849182, + "learning_rate": 6.143286126112475e-05, + "loss": 2.4951, + "step": 12560 + }, + { + "epoch": 1.0137196352191107, + "grad_norm": 0.6566087603569031, + "learning_rate": 6.1418296178226e-05, + "loss": 2.4002, + "step": 12561 + }, + { + "epoch": 1.0138003389556935, + "grad_norm": 0.6999008059501648, + "learning_rate": 6.140373205688411e-05, + "loss": 2.5306, + "step": 12562 + }, + { + "epoch": 1.0138810426922766, + "grad_norm": 0.6682353615760803, + "learning_rate": 6.138916889746212e-05, + "loss": 2.5565, + "step": 12563 + }, + { + "epoch": 1.0139617464288597, + "grad_norm": 0.7443362474441528, + "learning_rate": 6.137460670032298e-05, + "loss": 2.3958, + "step": 12564 + }, + { + "epoch": 1.0140424501654426, + "grad_norm": 0.6542403697967529, + "learning_rate": 6.136004546582958e-05, + "loss": 2.4394, + "step": 12565 + }, + { + "epoch": 1.0141231539020257, + "grad_norm": 0.6524317264556885, + "learning_rate": 6.134548519434488e-05, + "loss": 2.4979, + "step": 12566 + }, + { + "epoch": 1.0142038576386088, + "grad_norm": 0.6605600118637085, + "learning_rate": 6.133092588623174e-05, + "loss": 2.4827, + "step": 12567 + }, + { + "epoch": 1.0142845613751916, + "grad_norm": 0.7114397883415222, + "learning_rate": 6.1316367541853e-05, + "loss": 2.4799, + "step": 12568 + }, + { + "epoch": 1.0143652651117747, + "grad_norm": 0.6607296466827393, + "learning_rate": 6.130181016157148e-05, + "loss": 2.4991, + "step": 12569 + }, + { + "epoch": 1.0144459688483576, + "grad_norm": 0.6750844717025757, + "learning_rate": 6.128725374575005e-05, + "loss": 2.4451, + "step": 12570 + }, + { + "epoch": 1.0145266725849407, + "grad_norm": 0.6978901624679565, + "learning_rate": 6.127269829475141e-05, + "loss": 2.4608, + "step": 12571 + }, + { + "epoch": 1.0146073763215238, + "grad_norm": 0.676343560218811, + "learning_rate": 6.125814380893838e-05, + "loss": 2.4536, + "step": 12572 + }, + { + "epoch": 1.0146880800581066, + "grad_norm": 0.7082604765892029, + "learning_rate": 6.124359028867368e-05, + "loss": 2.45, + "step": 12573 + }, + { + "epoch": 1.0147687837946897, + "grad_norm": 0.7049853205680847, + "learning_rate": 6.122903773432003e-05, + "loss": 2.4378, + "step": 12574 + }, + { + "epoch": 1.0148494875312728, + "grad_norm": 0.6329593062400818, + "learning_rate": 6.121448614624009e-05, + "loss": 2.4386, + "step": 12575 + }, + { + "epoch": 1.0149301912678557, + "grad_norm": 0.7249468564987183, + "learning_rate": 6.119993552479655e-05, + "loss": 2.5191, + "step": 12576 + }, + { + "epoch": 1.0150108950044388, + "grad_norm": 0.7028193473815918, + "learning_rate": 6.118538587035206e-05, + "loss": 2.4376, + "step": 12577 + }, + { + "epoch": 1.0150915987410216, + "grad_norm": 0.697382926940918, + "learning_rate": 6.117083718326917e-05, + "loss": 2.4797, + "step": 12578 + }, + { + "epoch": 1.0151723024776047, + "grad_norm": 0.7386965155601501, + "learning_rate": 6.115628946391055e-05, + "loss": 2.4512, + "step": 12579 + }, + { + "epoch": 1.0152530062141878, + "grad_norm": 0.6614577174186707, + "learning_rate": 6.114174271263875e-05, + "loss": 2.4404, + "step": 12580 + }, + { + "epoch": 1.0153337099507707, + "grad_norm": 0.6927464604377747, + "learning_rate": 6.112719692981627e-05, + "loss": 2.47, + "step": 12581 + }, + { + "epoch": 1.0154144136873537, + "grad_norm": 0.7004262208938599, + "learning_rate": 6.111265211580566e-05, + "loss": 2.4212, + "step": 12582 + }, + { + "epoch": 1.0154951174239368, + "grad_norm": 0.71146559715271, + "learning_rate": 6.109810827096942e-05, + "loss": 2.4431, + "step": 12583 + }, + { + "epoch": 1.0155758211605197, + "grad_norm": 0.6857032775878906, + "learning_rate": 6.108356539567e-05, + "loss": 2.453, + "step": 12584 + }, + { + "epoch": 1.0156565248971028, + "grad_norm": 0.6976168155670166, + "learning_rate": 6.106902349026986e-05, + "loss": 2.4718, + "step": 12585 + }, + { + "epoch": 1.0157372286336857, + "grad_norm": 0.7158414125442505, + "learning_rate": 6.105448255513146e-05, + "loss": 2.425, + "step": 12586 + }, + { + "epoch": 1.0158179323702687, + "grad_norm": 0.6611737608909607, + "learning_rate": 6.103994259061714e-05, + "loss": 2.4563, + "step": 12587 + }, + { + "epoch": 1.0158986361068518, + "grad_norm": 0.7262980937957764, + "learning_rate": 6.102540359708926e-05, + "loss": 2.4538, + "step": 12588 + }, + { + "epoch": 1.0159793398434347, + "grad_norm": 0.7123451828956604, + "learning_rate": 6.10108655749102e-05, + "loss": 2.4677, + "step": 12589 + }, + { + "epoch": 1.0160600435800178, + "grad_norm": 0.7135589122772217, + "learning_rate": 6.099632852444235e-05, + "loss": 2.4312, + "step": 12590 + }, + { + "epoch": 1.0161407473166009, + "grad_norm": 0.6509461998939514, + "learning_rate": 6.09817924460479e-05, + "loss": 2.4716, + "step": 12591 + }, + { + "epoch": 1.0162214510531837, + "grad_norm": 0.8835915923118591, + "learning_rate": 6.096725734008919e-05, + "loss": 2.4817, + "step": 12592 + }, + { + "epoch": 1.0163021547897668, + "grad_norm": 0.7084136605262756, + "learning_rate": 6.095272320692846e-05, + "loss": 2.483, + "step": 12593 + }, + { + "epoch": 1.0163828585263497, + "grad_norm": 0.6866818070411682, + "learning_rate": 6.0938190046927934e-05, + "loss": 2.4838, + "step": 12594 + }, + { + "epoch": 1.0164635622629328, + "grad_norm": 0.7297510504722595, + "learning_rate": 6.0923657860449824e-05, + "loss": 2.4675, + "step": 12595 + }, + { + "epoch": 1.0165442659995159, + "grad_norm": 0.6735619306564331, + "learning_rate": 6.090912664785633e-05, + "loss": 2.444, + "step": 12596 + }, + { + "epoch": 1.0166249697360987, + "grad_norm": 0.7046451568603516, + "learning_rate": 6.0894596409509565e-05, + "loss": 2.4757, + "step": 12597 + }, + { + "epoch": 1.0167056734726818, + "grad_norm": 0.6646085977554321, + "learning_rate": 6.0880067145771656e-05, + "loss": 2.4772, + "step": 12598 + }, + { + "epoch": 1.0167863772092647, + "grad_norm": 0.7217094302177429, + "learning_rate": 6.086553885700478e-05, + "loss": 2.4589, + "step": 12599 + }, + { + "epoch": 1.0168670809458478, + "grad_norm": 0.647378146648407, + "learning_rate": 6.085101154357093e-05, + "loss": 2.4327, + "step": 12600 + }, + { + "epoch": 1.0169477846824309, + "grad_norm": 0.6907125115394592, + "learning_rate": 6.083648520583223e-05, + "loss": 2.467, + "step": 12601 + }, + { + "epoch": 1.0170284884190137, + "grad_norm": 0.690433919429779, + "learning_rate": 6.0821959844150687e-05, + "loss": 2.488, + "step": 12602 + }, + { + "epoch": 1.0171091921555968, + "grad_norm": 0.6528738737106323, + "learning_rate": 6.080743545888833e-05, + "loss": 2.5028, + "step": 12603 + }, + { + "epoch": 1.01718989589218, + "grad_norm": 0.6962323784828186, + "learning_rate": 6.079291205040711e-05, + "loss": 2.5381, + "step": 12604 + }, + { + "epoch": 1.0172705996287628, + "grad_norm": 0.7386075854301453, + "learning_rate": 6.077838961906902e-05, + "loss": 2.4445, + "step": 12605 + }, + { + "epoch": 1.0173513033653458, + "grad_norm": 0.7382189631462097, + "learning_rate": 6.0763868165236025e-05, + "loss": 2.4926, + "step": 12606 + }, + { + "epoch": 1.0174320071019287, + "grad_norm": 0.7291865944862366, + "learning_rate": 6.074934768926995e-05, + "loss": 2.4624, + "step": 12607 + }, + { + "epoch": 1.0175127108385118, + "grad_norm": 0.754843533039093, + "learning_rate": 6.073482819153275e-05, + "loss": 2.4291, + "step": 12608 + }, + { + "epoch": 1.017593414575095, + "grad_norm": 0.6827771663665771, + "learning_rate": 6.072030967238628e-05, + "loss": 2.453, + "step": 12609 + }, + { + "epoch": 1.0176741183116778, + "grad_norm": 0.7138541340827942, + "learning_rate": 6.0705792132192355e-05, + "loss": 2.5172, + "step": 12610 + }, + { + "epoch": 1.0177548220482608, + "grad_norm": 0.6539924740791321, + "learning_rate": 6.06912755713128e-05, + "loss": 2.4393, + "step": 12611 + }, + { + "epoch": 1.017835525784844, + "grad_norm": 0.7021273970603943, + "learning_rate": 6.067675999010945e-05, + "loss": 2.4519, + "step": 12612 + }, + { + "epoch": 1.0179162295214268, + "grad_norm": 0.7124225497245789, + "learning_rate": 6.0662245388944004e-05, + "loss": 2.4417, + "step": 12613 + }, + { + "epoch": 1.0179969332580099, + "grad_norm": 0.7214948534965515, + "learning_rate": 6.064773176817823e-05, + "loss": 2.4708, + "step": 12614 + }, + { + "epoch": 1.0180776369945927, + "grad_norm": 0.6738584041595459, + "learning_rate": 6.063321912817386e-05, + "loss": 2.4574, + "step": 12615 + }, + { + "epoch": 1.0181583407311758, + "grad_norm": 0.7215890884399414, + "learning_rate": 6.061870746929257e-05, + "loss": 2.4903, + "step": 12616 + }, + { + "epoch": 1.018239044467759, + "grad_norm": 0.6720155477523804, + "learning_rate": 6.0604196791896016e-05, + "loss": 2.4251, + "step": 12617 + }, + { + "epoch": 1.0183197482043418, + "grad_norm": 0.7046420574188232, + "learning_rate": 6.058968709634587e-05, + "loss": 2.446, + "step": 12618 + }, + { + "epoch": 1.0184004519409249, + "grad_norm": 0.6419540047645569, + "learning_rate": 6.0575178383003764e-05, + "loss": 2.4052, + "step": 12619 + }, + { + "epoch": 1.018481155677508, + "grad_norm": 0.6948695182800293, + "learning_rate": 6.0560670652231235e-05, + "loss": 2.5068, + "step": 12620 + }, + { + "epoch": 1.0185618594140908, + "grad_norm": 0.7274870276451111, + "learning_rate": 6.05461639043899e-05, + "loss": 2.4705, + "step": 12621 + }, + { + "epoch": 1.018642563150674, + "grad_norm": 0.6809766292572021, + "learning_rate": 6.053165813984134e-05, + "loss": 2.3767, + "step": 12622 + }, + { + "epoch": 1.0187232668872568, + "grad_norm": 0.6197625994682312, + "learning_rate": 6.0517153358946985e-05, + "loss": 2.4639, + "step": 12623 + }, + { + "epoch": 1.0188039706238399, + "grad_norm": 0.6613010764122009, + "learning_rate": 6.050264956206837e-05, + "loss": 2.5155, + "step": 12624 + }, + { + "epoch": 1.018884674360423, + "grad_norm": 0.7335553765296936, + "learning_rate": 6.0488146749567e-05, + "loss": 2.5344, + "step": 12625 + }, + { + "epoch": 1.0189653780970058, + "grad_norm": 0.7175146341323853, + "learning_rate": 6.047364492180428e-05, + "loss": 2.4972, + "step": 12626 + }, + { + "epoch": 1.019046081833589, + "grad_norm": 0.6825357675552368, + "learning_rate": 6.045914407914166e-05, + "loss": 2.4356, + "step": 12627 + }, + { + "epoch": 1.019126785570172, + "grad_norm": 0.6369633078575134, + "learning_rate": 6.044464422194056e-05, + "loss": 2.4692, + "step": 12628 + }, + { + "epoch": 1.0192074893067549, + "grad_norm": 0.7407073378562927, + "learning_rate": 6.0430145350562264e-05, + "loss": 2.4565, + "step": 12629 + }, + { + "epoch": 1.019288193043338, + "grad_norm": 0.6836552619934082, + "learning_rate": 6.041564746536821e-05, + "loss": 2.4357, + "step": 12630 + }, + { + "epoch": 1.0193688967799208, + "grad_norm": 0.6778741478919983, + "learning_rate": 6.040115056671972e-05, + "loss": 2.424, + "step": 12631 + }, + { + "epoch": 1.019449600516504, + "grad_norm": 0.6440724730491638, + "learning_rate": 6.0386654654978035e-05, + "loss": 2.4455, + "step": 12632 + }, + { + "epoch": 1.019530304253087, + "grad_norm": 0.681376039981842, + "learning_rate": 6.0372159730504476e-05, + "loss": 2.4562, + "step": 12633 + }, + { + "epoch": 1.0196110079896699, + "grad_norm": 0.657462477684021, + "learning_rate": 6.035766579366029e-05, + "loss": 2.4315, + "step": 12634 + }, + { + "epoch": 1.019691711726253, + "grad_norm": 0.6540380716323853, + "learning_rate": 6.0343172844806706e-05, + "loss": 2.4789, + "step": 12635 + }, + { + "epoch": 1.019772415462836, + "grad_norm": 0.711883008480072, + "learning_rate": 6.03286808843049e-05, + "loss": 2.4178, + "step": 12636 + }, + { + "epoch": 1.019853119199419, + "grad_norm": 0.6746736168861389, + "learning_rate": 6.031418991251607e-05, + "loss": 2.4351, + "step": 12637 + }, + { + "epoch": 1.019933822936002, + "grad_norm": 0.677237331867218, + "learning_rate": 6.02996999298014e-05, + "loss": 2.4335, + "step": 12638 + }, + { + "epoch": 1.0200145266725849, + "grad_norm": 0.6950497627258301, + "learning_rate": 6.0285210936521955e-05, + "loss": 2.5178, + "step": 12639 + }, + { + "epoch": 1.020095230409168, + "grad_norm": 0.6349243521690369, + "learning_rate": 6.027072293303885e-05, + "loss": 2.4405, + "step": 12640 + }, + { + "epoch": 1.020175934145751, + "grad_norm": 0.744276762008667, + "learning_rate": 6.0256235919713236e-05, + "loss": 2.5156, + "step": 12641 + }, + { + "epoch": 1.020256637882334, + "grad_norm": 0.7697997689247131, + "learning_rate": 6.0241749896906075e-05, + "loss": 2.4393, + "step": 12642 + }, + { + "epoch": 1.020337341618917, + "grad_norm": 0.7784204483032227, + "learning_rate": 6.022726486497844e-05, + "loss": 2.4565, + "step": 12643 + }, + { + "epoch": 1.0204180453555, + "grad_norm": 0.7434312701225281, + "learning_rate": 6.021278082429136e-05, + "loss": 2.4637, + "step": 12644 + }, + { + "epoch": 1.020498749092083, + "grad_norm": 0.7770118117332458, + "learning_rate": 6.019829777520575e-05, + "loss": 2.4998, + "step": 12645 + }, + { + "epoch": 1.020579452828666, + "grad_norm": 0.7021752595901489, + "learning_rate": 6.01838157180826e-05, + "loss": 2.4661, + "step": 12646 + }, + { + "epoch": 1.0206601565652489, + "grad_norm": 0.6812437176704407, + "learning_rate": 6.0169334653282895e-05, + "loss": 2.4611, + "step": 12647 + }, + { + "epoch": 1.020740860301832, + "grad_norm": 0.757724940776825, + "learning_rate": 6.0154854581167455e-05, + "loss": 2.4427, + "step": 12648 + }, + { + "epoch": 1.020821564038415, + "grad_norm": 0.7386252880096436, + "learning_rate": 6.014037550209718e-05, + "loss": 2.424, + "step": 12649 + }, + { + "epoch": 1.020902267774998, + "grad_norm": 0.7138059735298157, + "learning_rate": 6.012589741643295e-05, + "loss": 2.4951, + "step": 12650 + }, + { + "epoch": 1.020982971511581, + "grad_norm": 0.714022159576416, + "learning_rate": 6.011142032453561e-05, + "loss": 2.4398, + "step": 12651 + }, + { + "epoch": 1.0210636752481639, + "grad_norm": 0.6961550712585449, + "learning_rate": 6.00969442267659e-05, + "loss": 2.4495, + "step": 12652 + }, + { + "epoch": 1.021144378984747, + "grad_norm": 0.7196643948554993, + "learning_rate": 6.008246912348467e-05, + "loss": 2.4449, + "step": 12653 + }, + { + "epoch": 1.02122508272133, + "grad_norm": 0.6163341999053955, + "learning_rate": 6.006799501505268e-05, + "loss": 2.4108, + "step": 12654 + }, + { + "epoch": 1.021305786457913, + "grad_norm": 0.6657030582427979, + "learning_rate": 6.005352190183061e-05, + "loss": 2.4328, + "step": 12655 + }, + { + "epoch": 1.021386490194496, + "grad_norm": 0.7183353900909424, + "learning_rate": 6.00390497841792e-05, + "loss": 2.4912, + "step": 12656 + }, + { + "epoch": 1.021467193931079, + "grad_norm": 0.6912575364112854, + "learning_rate": 6.002457866245916e-05, + "loss": 2.4597, + "step": 12657 + }, + { + "epoch": 1.021547897667662, + "grad_norm": 0.7395210266113281, + "learning_rate": 6.0010108537031084e-05, + "loss": 2.4823, + "step": 12658 + }, + { + "epoch": 1.021628601404245, + "grad_norm": 0.722618043422699, + "learning_rate": 5.9995639408255636e-05, + "loss": 2.4924, + "step": 12659 + }, + { + "epoch": 1.021709305140828, + "grad_norm": 0.739009439945221, + "learning_rate": 5.998117127649344e-05, + "loss": 2.4454, + "step": 12660 + }, + { + "epoch": 1.021790008877411, + "grad_norm": 0.7017633318901062, + "learning_rate": 5.996670414210506e-05, + "loss": 2.5058, + "step": 12661 + }, + { + "epoch": 1.021870712613994, + "grad_norm": 0.742664635181427, + "learning_rate": 5.9952238005451046e-05, + "loss": 2.436, + "step": 12662 + }, + { + "epoch": 1.021951416350577, + "grad_norm": 0.6865660548210144, + "learning_rate": 5.9937772866892e-05, + "loss": 2.4364, + "step": 12663 + }, + { + "epoch": 1.02203212008716, + "grad_norm": 0.7376219034194946, + "learning_rate": 5.992330872678833e-05, + "loss": 2.4975, + "step": 12664 + }, + { + "epoch": 1.0221128238237431, + "grad_norm": 0.6496078372001648, + "learning_rate": 5.990884558550054e-05, + "loss": 2.4651, + "step": 12665 + }, + { + "epoch": 1.022193527560326, + "grad_norm": 0.7178322076797485, + "learning_rate": 5.989438344338915e-05, + "loss": 2.5015, + "step": 12666 + }, + { + "epoch": 1.022274231296909, + "grad_norm": 0.7084102034568787, + "learning_rate": 5.987992230081459e-05, + "loss": 2.4741, + "step": 12667 + }, + { + "epoch": 1.022354935033492, + "grad_norm": 0.6634935736656189, + "learning_rate": 5.986546215813722e-05, + "loss": 2.4255, + "step": 12668 + }, + { + "epoch": 1.022435638770075, + "grad_norm": 0.6897543668746948, + "learning_rate": 5.985100301571742e-05, + "loss": 2.4682, + "step": 12669 + }, + { + "epoch": 1.0225163425066581, + "grad_norm": 0.6643948554992676, + "learning_rate": 5.9836544873915614e-05, + "loss": 2.4009, + "step": 12670 + }, + { + "epoch": 1.022597046243241, + "grad_norm": 0.681252658367157, + "learning_rate": 5.982208773309208e-05, + "loss": 2.4542, + "step": 12671 + }, + { + "epoch": 1.022677749979824, + "grad_norm": 0.7608681917190552, + "learning_rate": 5.980763159360714e-05, + "loss": 2.5614, + "step": 12672 + }, + { + "epoch": 1.0227584537164072, + "grad_norm": 0.6855095028877258, + "learning_rate": 5.979317645582112e-05, + "loss": 2.4505, + "step": 12673 + }, + { + "epoch": 1.02283915745299, + "grad_norm": 0.6846089363098145, + "learning_rate": 5.97787223200942e-05, + "loss": 2.4438, + "step": 12674 + }, + { + "epoch": 1.0229198611895731, + "grad_norm": 0.7198090553283691, + "learning_rate": 5.9764269186786684e-05, + "loss": 2.4469, + "step": 12675 + }, + { + "epoch": 1.023000564926156, + "grad_norm": 0.7120245099067688, + "learning_rate": 5.9749817056258764e-05, + "loss": 2.4626, + "step": 12676 + }, + { + "epoch": 1.023081268662739, + "grad_norm": 0.6839897036552429, + "learning_rate": 5.973536592887059e-05, + "loss": 2.4384, + "step": 12677 + }, + { + "epoch": 1.0231619723993222, + "grad_norm": 0.7053773999214172, + "learning_rate": 5.9720915804982356e-05, + "loss": 2.4554, + "step": 12678 + }, + { + "epoch": 1.023242676135905, + "grad_norm": 0.7114294767379761, + "learning_rate": 5.970646668495421e-05, + "loss": 2.3964, + "step": 12679 + }, + { + "epoch": 1.0233233798724881, + "grad_norm": 0.7001516819000244, + "learning_rate": 5.9692018569146224e-05, + "loss": 2.5216, + "step": 12680 + }, + { + "epoch": 1.0234040836090712, + "grad_norm": 0.6715773940086365, + "learning_rate": 5.96775714579185e-05, + "loss": 2.4595, + "step": 12681 + }, + { + "epoch": 1.023484787345654, + "grad_norm": 0.6856278777122498, + "learning_rate": 5.96631253516311e-05, + "loss": 2.4637, + "step": 12682 + }, + { + "epoch": 1.0235654910822372, + "grad_norm": 0.6785625219345093, + "learning_rate": 5.96486802506441e-05, + "loss": 2.4615, + "step": 12683 + }, + { + "epoch": 1.02364619481882, + "grad_norm": 0.6834213137626648, + "learning_rate": 5.963423615531743e-05, + "loss": 2.4729, + "step": 12684 + }, + { + "epoch": 1.023726898555403, + "grad_norm": 0.6729516386985779, + "learning_rate": 5.961979306601109e-05, + "loss": 2.4013, + "step": 12685 + }, + { + "epoch": 1.0238076022919862, + "grad_norm": 0.6785775423049927, + "learning_rate": 5.960535098308511e-05, + "loss": 2.4825, + "step": 12686 + }, + { + "epoch": 1.023888306028569, + "grad_norm": 0.67277991771698, + "learning_rate": 5.959090990689934e-05, + "loss": 2.4606, + "step": 12687 + }, + { + "epoch": 1.0239690097651521, + "grad_norm": 0.7679588198661804, + "learning_rate": 5.957646983781373e-05, + "loss": 2.5234, + "step": 12688 + }, + { + "epoch": 1.0240497135017352, + "grad_norm": 0.6597407460212708, + "learning_rate": 5.956203077618821e-05, + "loss": 2.4699, + "step": 12689 + }, + { + "epoch": 1.024130417238318, + "grad_norm": 0.6743008494377136, + "learning_rate": 5.9547592722382525e-05, + "loss": 2.4266, + "step": 12690 + }, + { + "epoch": 1.0242111209749012, + "grad_norm": 0.7223396897315979, + "learning_rate": 5.953315567675657e-05, + "loss": 2.5117, + "step": 12691 + }, + { + "epoch": 1.024291824711484, + "grad_norm": 0.6729528307914734, + "learning_rate": 5.951871963967022e-05, + "loss": 2.4586, + "step": 12692 + }, + { + "epoch": 1.0243725284480671, + "grad_norm": 0.6523739695549011, + "learning_rate": 5.950428461148314e-05, + "loss": 2.4408, + "step": 12693 + }, + { + "epoch": 1.0244532321846502, + "grad_norm": 0.6830984950065613, + "learning_rate": 5.9489850592555164e-05, + "loss": 2.4094, + "step": 12694 + }, + { + "epoch": 1.024533935921233, + "grad_norm": 0.6223493814468384, + "learning_rate": 5.9475417583246006e-05, + "loss": 2.4105, + "step": 12695 + }, + { + "epoch": 1.0246146396578162, + "grad_norm": 0.6506635546684265, + "learning_rate": 5.9460985583915374e-05, + "loss": 2.4451, + "step": 12696 + }, + { + "epoch": 1.024695343394399, + "grad_norm": 0.7626760005950928, + "learning_rate": 5.944655459492293e-05, + "loss": 2.4643, + "step": 12697 + }, + { + "epoch": 1.0247760471309821, + "grad_norm": 0.7074631452560425, + "learning_rate": 5.943212461662837e-05, + "loss": 2.4662, + "step": 12698 + }, + { + "epoch": 1.0248567508675652, + "grad_norm": 0.718083918094635, + "learning_rate": 5.9417695649391346e-05, + "loss": 2.4686, + "step": 12699 + }, + { + "epoch": 1.024937454604148, + "grad_norm": 0.6850628852844238, + "learning_rate": 5.9403267693571384e-05, + "loss": 2.4542, + "step": 12700 + }, + { + "epoch": 1.0250181583407312, + "grad_norm": 0.6662585735321045, + "learning_rate": 5.938884074952812e-05, + "loss": 2.4676, + "step": 12701 + }, + { + "epoch": 1.0250988620773143, + "grad_norm": 0.6806240677833557, + "learning_rate": 5.9374414817621114e-05, + "loss": 2.4243, + "step": 12702 + }, + { + "epoch": 1.0251795658138971, + "grad_norm": 0.6763548851013184, + "learning_rate": 5.9359989898209876e-05, + "loss": 2.4389, + "step": 12703 + }, + { + "epoch": 1.0252602695504802, + "grad_norm": 0.7390143275260925, + "learning_rate": 5.934556599165393e-05, + "loss": 2.4667, + "step": 12704 + }, + { + "epoch": 1.025340973287063, + "grad_norm": 0.6159299612045288, + "learning_rate": 5.933114309831276e-05, + "loss": 2.3832, + "step": 12705 + }, + { + "epoch": 1.0254216770236462, + "grad_norm": 0.6779586672782898, + "learning_rate": 5.931672121854579e-05, + "loss": 2.4615, + "step": 12706 + }, + { + "epoch": 1.0255023807602293, + "grad_norm": 0.643800675868988, + "learning_rate": 5.930230035271247e-05, + "loss": 2.4725, + "step": 12707 + }, + { + "epoch": 1.0255830844968121, + "grad_norm": 0.6605903506278992, + "learning_rate": 5.928788050117227e-05, + "loss": 2.4332, + "step": 12708 + }, + { + "epoch": 1.0256637882333952, + "grad_norm": 0.7046334743499756, + "learning_rate": 5.927346166428446e-05, + "loss": 2.4445, + "step": 12709 + }, + { + "epoch": 1.0257444919699783, + "grad_norm": 0.6536325216293335, + "learning_rate": 5.925904384240843e-05, + "loss": 2.4168, + "step": 12710 + }, + { + "epoch": 1.0258251957065612, + "grad_norm": 0.6861097812652588, + "learning_rate": 5.9244627035903564e-05, + "loss": 2.512, + "step": 12711 + }, + { + "epoch": 1.0259058994431443, + "grad_norm": 0.6782278418540955, + "learning_rate": 5.923021124512911e-05, + "loss": 2.4667, + "step": 12712 + }, + { + "epoch": 1.0259866031797271, + "grad_norm": 0.724435031414032, + "learning_rate": 5.921579647044436e-05, + "loss": 2.4828, + "step": 12713 + }, + { + "epoch": 1.0260673069163102, + "grad_norm": 0.6690630316734314, + "learning_rate": 5.9201382712208575e-05, + "loss": 2.4832, + "step": 12714 + }, + { + "epoch": 1.0261480106528933, + "grad_norm": 0.7045348286628723, + "learning_rate": 5.9186969970781015e-05, + "loss": 2.4576, + "step": 12715 + }, + { + "epoch": 1.0262287143894762, + "grad_norm": 0.673321008682251, + "learning_rate": 5.9172558246520796e-05, + "loss": 2.3986, + "step": 12716 + }, + { + "epoch": 1.0263094181260592, + "grad_norm": 0.7184785008430481, + "learning_rate": 5.915814753978717e-05, + "loss": 2.4008, + "step": 12717 + }, + { + "epoch": 1.0263901218626423, + "grad_norm": 0.6971293091773987, + "learning_rate": 5.914373785093931e-05, + "loss": 2.4559, + "step": 12718 + }, + { + "epoch": 1.0264708255992252, + "grad_norm": 0.6941563487052917, + "learning_rate": 5.912932918033626e-05, + "loss": 2.4787, + "step": 12719 + }, + { + "epoch": 1.0265515293358083, + "grad_norm": 0.6276142001152039, + "learning_rate": 5.911492152833715e-05, + "loss": 2.4275, + "step": 12720 + }, + { + "epoch": 1.0266322330723912, + "grad_norm": 0.715928316116333, + "learning_rate": 5.9100514895301106e-05, + "loss": 2.4127, + "step": 12721 + }, + { + "epoch": 1.0267129368089742, + "grad_norm": 0.7004076838493347, + "learning_rate": 5.908610928158713e-05, + "loss": 2.4651, + "step": 12722 + }, + { + "epoch": 1.0267936405455573, + "grad_norm": 0.6761921048164368, + "learning_rate": 5.907170468755425e-05, + "loss": 2.4245, + "step": 12723 + }, + { + "epoch": 1.0268743442821402, + "grad_norm": 0.7246574759483337, + "learning_rate": 5.9057301113561515e-05, + "loss": 2.4489, + "step": 12724 + }, + { + "epoch": 1.0269550480187233, + "grad_norm": 0.7196606397628784, + "learning_rate": 5.904289855996783e-05, + "loss": 2.4357, + "step": 12725 + }, + { + "epoch": 1.0270357517553064, + "grad_norm": 0.7142692804336548, + "learning_rate": 5.902849702713216e-05, + "loss": 2.4821, + "step": 12726 + }, + { + "epoch": 1.0271164554918892, + "grad_norm": 0.7207832336425781, + "learning_rate": 5.9014096515413454e-05, + "loss": 2.4337, + "step": 12727 + }, + { + "epoch": 1.0271971592284723, + "grad_norm": 0.6865695714950562, + "learning_rate": 5.899969702517063e-05, + "loss": 2.4549, + "step": 12728 + }, + { + "epoch": 1.0272778629650552, + "grad_norm": 0.7136662006378174, + "learning_rate": 5.898529855676249e-05, + "loss": 2.4606, + "step": 12729 + }, + { + "epoch": 1.0273585667016383, + "grad_norm": 0.701885998249054, + "learning_rate": 5.897090111054795e-05, + "loss": 2.4913, + "step": 12730 + }, + { + "epoch": 1.0274392704382214, + "grad_norm": 0.6671354174613953, + "learning_rate": 5.8956504686885805e-05, + "loss": 2.4064, + "step": 12731 + }, + { + "epoch": 1.0275199741748042, + "grad_norm": 0.6720621585845947, + "learning_rate": 5.894210928613484e-05, + "loss": 2.4908, + "step": 12732 + }, + { + "epoch": 1.0276006779113873, + "grad_norm": 0.7530980706214905, + "learning_rate": 5.892771490865383e-05, + "loss": 2.4486, + "step": 12733 + }, + { + "epoch": 1.0276813816479704, + "grad_norm": 0.6771122813224792, + "learning_rate": 5.891332155480158e-05, + "loss": 2.3954, + "step": 12734 + }, + { + "epoch": 1.0277620853845533, + "grad_norm": 0.6779236793518066, + "learning_rate": 5.889892922493671e-05, + "loss": 2.4404, + "step": 12735 + }, + { + "epoch": 1.0278427891211364, + "grad_norm": 0.7593358755111694, + "learning_rate": 5.8884537919417974e-05, + "loss": 2.4997, + "step": 12736 + }, + { + "epoch": 1.0279234928577192, + "grad_norm": 0.672686755657196, + "learning_rate": 5.8870147638604044e-05, + "loss": 2.5394, + "step": 12737 + }, + { + "epoch": 1.0280041965943023, + "grad_norm": 0.6727546453475952, + "learning_rate": 5.885575838285353e-05, + "loss": 2.4554, + "step": 12738 + }, + { + "epoch": 1.0280849003308854, + "grad_norm": 0.7092764377593994, + "learning_rate": 5.884137015252507e-05, + "loss": 2.4568, + "step": 12739 + }, + { + "epoch": 1.0281656040674683, + "grad_norm": 0.6988070011138916, + "learning_rate": 5.882698294797728e-05, + "loss": 2.4453, + "step": 12740 + }, + { + "epoch": 1.0282463078040514, + "grad_norm": 0.7578697204589844, + "learning_rate": 5.8812596769568676e-05, + "loss": 2.5648, + "step": 12741 + }, + { + "epoch": 1.0283270115406344, + "grad_norm": 0.6523683667182922, + "learning_rate": 5.879821161765782e-05, + "loss": 2.4088, + "step": 12742 + }, + { + "epoch": 1.0284077152772173, + "grad_norm": 0.6797270178794861, + "learning_rate": 5.878382749260323e-05, + "loss": 2.4465, + "step": 12743 + }, + { + "epoch": 1.0284884190138004, + "grad_norm": 0.6823786497116089, + "learning_rate": 5.876944439476345e-05, + "loss": 2.5053, + "step": 12744 + }, + { + "epoch": 1.0285691227503833, + "grad_norm": 0.6840088367462158, + "learning_rate": 5.875506232449686e-05, + "loss": 2.3771, + "step": 12745 + }, + { + "epoch": 1.0286498264869663, + "grad_norm": 0.6985318064689636, + "learning_rate": 5.8740681282161914e-05, + "loss": 2.4456, + "step": 12746 + }, + { + "epoch": 1.0287305302235494, + "grad_norm": 0.7102388739585876, + "learning_rate": 5.872630126811707e-05, + "loss": 2.4802, + "step": 12747 + }, + { + "epoch": 1.0288112339601323, + "grad_norm": 0.7917937636375427, + "learning_rate": 5.871192228272067e-05, + "loss": 2.4606, + "step": 12748 + }, + { + "epoch": 1.0288919376967154, + "grad_norm": 0.683397114276886, + "learning_rate": 5.86975443263311e-05, + "loss": 2.5011, + "step": 12749 + }, + { + "epoch": 1.0289726414332985, + "grad_norm": 0.7543408870697021, + "learning_rate": 5.8683167399306724e-05, + "loss": 2.4705, + "step": 12750 + }, + { + "epoch": 1.0290533451698813, + "grad_norm": 0.6946283578872681, + "learning_rate": 5.866879150200579e-05, + "loss": 2.4986, + "step": 12751 + }, + { + "epoch": 1.0291340489064644, + "grad_norm": 0.6535125374794006, + "learning_rate": 5.8654416634786605e-05, + "loss": 2.4203, + "step": 12752 + }, + { + "epoch": 1.0292147526430473, + "grad_norm": 0.7470195889472961, + "learning_rate": 5.8640042798007455e-05, + "loss": 2.5103, + "step": 12753 + }, + { + "epoch": 1.0292954563796304, + "grad_norm": 0.6782363653182983, + "learning_rate": 5.8625669992026535e-05, + "loss": 2.4087, + "step": 12754 + }, + { + "epoch": 1.0293761601162135, + "grad_norm": 0.7601497173309326, + "learning_rate": 5.861129821720207e-05, + "loss": 2.4752, + "step": 12755 + }, + { + "epoch": 1.0294568638527963, + "grad_norm": 0.6875388026237488, + "learning_rate": 5.859692747389227e-05, + "loss": 2.448, + "step": 12756 + }, + { + "epoch": 1.0295375675893794, + "grad_norm": 0.7153629064559937, + "learning_rate": 5.858255776245525e-05, + "loss": 2.4641, + "step": 12757 + }, + { + "epoch": 1.0296182713259623, + "grad_norm": 0.682954728603363, + "learning_rate": 5.8568189083249145e-05, + "loss": 2.441, + "step": 12758 + }, + { + "epoch": 1.0296989750625454, + "grad_norm": 0.6959100961685181, + "learning_rate": 5.855382143663209e-05, + "loss": 2.4316, + "step": 12759 + }, + { + "epoch": 1.0297796787991285, + "grad_norm": 0.7062023878097534, + "learning_rate": 5.8539454822962167e-05, + "loss": 2.4287, + "step": 12760 + }, + { + "epoch": 1.0298603825357113, + "grad_norm": 0.706523597240448, + "learning_rate": 5.852508924259736e-05, + "loss": 2.4596, + "step": 12761 + }, + { + "epoch": 1.0299410862722944, + "grad_norm": 0.6908385753631592, + "learning_rate": 5.851072469589578e-05, + "loss": 2.4428, + "step": 12762 + }, + { + "epoch": 1.0300217900088775, + "grad_norm": 0.6810726523399353, + "learning_rate": 5.8496361183215386e-05, + "loss": 2.4902, + "step": 12763 + }, + { + "epoch": 1.0301024937454604, + "grad_norm": 0.661613941192627, + "learning_rate": 5.8481998704914156e-05, + "loss": 2.4256, + "step": 12764 + }, + { + "epoch": 1.0301831974820435, + "grad_norm": 0.6633132100105286, + "learning_rate": 5.846763726135005e-05, + "loss": 2.4512, + "step": 12765 + }, + { + "epoch": 1.0302639012186263, + "grad_norm": 0.6991820335388184, + "learning_rate": 5.8453276852881025e-05, + "loss": 2.3747, + "step": 12766 + }, + { + "epoch": 1.0303446049552094, + "grad_norm": 0.7392076253890991, + "learning_rate": 5.843891747986487e-05, + "loss": 2.438, + "step": 12767 + }, + { + "epoch": 1.0304253086917925, + "grad_norm": 0.6371724605560303, + "learning_rate": 5.842455914265958e-05, + "loss": 2.4627, + "step": 12768 + }, + { + "epoch": 1.0305060124283754, + "grad_norm": 0.6475048661231995, + "learning_rate": 5.841020184162298e-05, + "loss": 2.4883, + "step": 12769 + }, + { + "epoch": 1.0305867161649584, + "grad_norm": 0.6848995685577393, + "learning_rate": 5.839584557711283e-05, + "loss": 2.4452, + "step": 12770 + }, + { + "epoch": 1.0306674199015415, + "grad_norm": 0.7345505952835083, + "learning_rate": 5.838149034948697e-05, + "loss": 2.5121, + "step": 12771 + }, + { + "epoch": 1.0307481236381244, + "grad_norm": 0.715373158454895, + "learning_rate": 5.836713615910318e-05, + "loss": 2.4549, + "step": 12772 + }, + { + "epoch": 1.0308288273747075, + "grad_norm": 0.7371035814285278, + "learning_rate": 5.8352783006319166e-05, + "loss": 2.4633, + "step": 12773 + }, + { + "epoch": 1.0309095311112904, + "grad_norm": 0.6843077540397644, + "learning_rate": 5.833843089149267e-05, + "loss": 2.4067, + "step": 12774 + }, + { + "epoch": 1.0309902348478734, + "grad_norm": 0.7398965954780579, + "learning_rate": 5.832407981498136e-05, + "loss": 2.5199, + "step": 12775 + }, + { + "epoch": 1.0310709385844565, + "grad_norm": 0.6860283017158508, + "learning_rate": 5.830972977714294e-05, + "loss": 2.4564, + "step": 12776 + }, + { + "epoch": 1.0311516423210394, + "grad_norm": 0.683893084526062, + "learning_rate": 5.829538077833503e-05, + "loss": 2.4635, + "step": 12777 + }, + { + "epoch": 1.0312323460576225, + "grad_norm": 0.6412089467048645, + "learning_rate": 5.828103281891525e-05, + "loss": 2.4806, + "step": 12778 + }, + { + "epoch": 1.0313130497942056, + "grad_norm": 0.646393895149231, + "learning_rate": 5.826668589924123e-05, + "loss": 2.4674, + "step": 12779 + }, + { + "epoch": 1.0313937535307884, + "grad_norm": 0.6805605292320251, + "learning_rate": 5.825234001967044e-05, + "loss": 2.5145, + "step": 12780 + }, + { + "epoch": 1.0314744572673715, + "grad_norm": 0.681532084941864, + "learning_rate": 5.8237995180560455e-05, + "loss": 2.5041, + "step": 12781 + }, + { + "epoch": 1.0315551610039544, + "grad_norm": 0.6971312165260315, + "learning_rate": 5.8223651382268865e-05, + "loss": 2.5324, + "step": 12782 + }, + { + "epoch": 1.0316358647405375, + "grad_norm": 0.6634463667869568, + "learning_rate": 5.8209308625153026e-05, + "loss": 2.5086, + "step": 12783 + }, + { + "epoch": 1.0317165684771206, + "grad_norm": 0.6752117276191711, + "learning_rate": 5.819496690957047e-05, + "loss": 2.4805, + "step": 12784 + }, + { + "epoch": 1.0317972722137034, + "grad_norm": 0.7242109775543213, + "learning_rate": 5.818062623587861e-05, + "loss": 2.4205, + "step": 12785 + }, + { + "epoch": 1.0318779759502865, + "grad_norm": 0.7338563203811646, + "learning_rate": 5.816628660443486e-05, + "loss": 2.4277, + "step": 12786 + }, + { + "epoch": 1.0319586796868696, + "grad_norm": 0.6764293313026428, + "learning_rate": 5.81519480155966e-05, + "loss": 2.5096, + "step": 12787 + }, + { + "epoch": 1.0320393834234525, + "grad_norm": 0.6757099032402039, + "learning_rate": 5.813761046972124e-05, + "loss": 2.468, + "step": 12788 + }, + { + "epoch": 1.0321200871600356, + "grad_norm": 0.7072502374649048, + "learning_rate": 5.8123273967166017e-05, + "loss": 2.4642, + "step": 12789 + }, + { + "epoch": 1.0322007908966184, + "grad_norm": 0.6470256447792053, + "learning_rate": 5.810893850828827e-05, + "loss": 2.4146, + "step": 12790 + }, + { + "epoch": 1.0322814946332015, + "grad_norm": 0.7403351068496704, + "learning_rate": 5.809460409344527e-05, + "loss": 2.512, + "step": 12791 + }, + { + "epoch": 1.0323621983697846, + "grad_norm": 0.6711490154266357, + "learning_rate": 5.808027072299432e-05, + "loss": 2.4602, + "step": 12792 + }, + { + "epoch": 1.0324429021063675, + "grad_norm": 0.7920248508453369, + "learning_rate": 5.806593839729258e-05, + "loss": 2.4512, + "step": 12793 + }, + { + "epoch": 1.0325236058429506, + "grad_norm": 0.6442045569419861, + "learning_rate": 5.805160711669725e-05, + "loss": 2.4165, + "step": 12794 + }, + { + "epoch": 1.0326043095795336, + "grad_norm": 0.6681340932846069, + "learning_rate": 5.803727688156553e-05, + "loss": 2.4296, + "step": 12795 + }, + { + "epoch": 1.0326850133161165, + "grad_norm": 0.6653337478637695, + "learning_rate": 5.802294769225457e-05, + "loss": 2.5165, + "step": 12796 + }, + { + "epoch": 1.0327657170526996, + "grad_norm": 0.6444782018661499, + "learning_rate": 5.8008619549121476e-05, + "loss": 2.4266, + "step": 12797 + }, + { + "epoch": 1.0328464207892825, + "grad_norm": 0.6741451621055603, + "learning_rate": 5.7994292452523394e-05, + "loss": 2.4837, + "step": 12798 + }, + { + "epoch": 1.0329271245258655, + "grad_norm": 0.6629341840744019, + "learning_rate": 5.797996640281731e-05, + "loss": 2.4368, + "step": 12799 + }, + { + "epoch": 1.0330078282624486, + "grad_norm": 0.6755850315093994, + "learning_rate": 5.796564140036029e-05, + "loss": 2.4834, + "step": 12800 + }, + { + "epoch": 1.0330885319990315, + "grad_norm": 0.7271782755851746, + "learning_rate": 5.795131744550942e-05, + "loss": 2.5025, + "step": 12801 + }, + { + "epoch": 1.0331692357356146, + "grad_norm": 0.6870545744895935, + "learning_rate": 5.7936994538621605e-05, + "loss": 2.4443, + "step": 12802 + }, + { + "epoch": 1.0332499394721975, + "grad_norm": 0.7231935858726501, + "learning_rate": 5.792267268005382e-05, + "loss": 2.4917, + "step": 12803 + }, + { + "epoch": 1.0333306432087805, + "grad_norm": 0.6905832290649414, + "learning_rate": 5.790835187016307e-05, + "loss": 2.4902, + "step": 12804 + }, + { + "epoch": 1.0334113469453636, + "grad_norm": 0.711814284324646, + "learning_rate": 5.789403210930613e-05, + "loss": 2.4579, + "step": 12805 + }, + { + "epoch": 1.0334920506819465, + "grad_norm": 0.6982280015945435, + "learning_rate": 5.787971339784004e-05, + "loss": 2.5275, + "step": 12806 + }, + { + "epoch": 1.0335727544185296, + "grad_norm": 0.6871493458747864, + "learning_rate": 5.7865395736121575e-05, + "loss": 2.4401, + "step": 12807 + }, + { + "epoch": 1.0336534581551127, + "grad_norm": 0.6898353099822998, + "learning_rate": 5.785107912450763e-05, + "loss": 2.4005, + "step": 12808 + }, + { + "epoch": 1.0337341618916955, + "grad_norm": 0.6264411807060242, + "learning_rate": 5.7836763563354946e-05, + "loss": 2.4497, + "step": 12809 + }, + { + "epoch": 1.0338148656282786, + "grad_norm": 0.6997092962265015, + "learning_rate": 5.782244905302032e-05, + "loss": 2.4388, + "step": 12810 + }, + { + "epoch": 1.0338955693648615, + "grad_norm": 0.6834601759910583, + "learning_rate": 5.7808135593860555e-05, + "loss": 2.4298, + "step": 12811 + }, + { + "epoch": 1.0339762731014446, + "grad_norm": 0.664315402507782, + "learning_rate": 5.77938231862323e-05, + "loss": 2.4289, + "step": 12812 + }, + { + "epoch": 1.0340569768380277, + "grad_norm": 0.6660603284835815, + "learning_rate": 5.7779511830492306e-05, + "loss": 2.4772, + "step": 12813 + }, + { + "epoch": 1.0341376805746105, + "grad_norm": 0.6457028388977051, + "learning_rate": 5.776520152699728e-05, + "loss": 2.4408, + "step": 12814 + }, + { + "epoch": 1.0342183843111936, + "grad_norm": 0.7132207155227661, + "learning_rate": 5.7750892276103794e-05, + "loss": 2.4953, + "step": 12815 + }, + { + "epoch": 1.0342990880477767, + "grad_norm": 0.7397382259368896, + "learning_rate": 5.773658407816848e-05, + "loss": 2.4396, + "step": 12816 + }, + { + "epoch": 1.0343797917843596, + "grad_norm": 0.6951746344566345, + "learning_rate": 5.7722276933548034e-05, + "loss": 2.5021, + "step": 12817 + }, + { + "epoch": 1.0344604955209427, + "grad_norm": 0.6789736151695251, + "learning_rate": 5.7707970842598935e-05, + "loss": 2.4883, + "step": 12818 + }, + { + "epoch": 1.0345411992575255, + "grad_norm": 0.7231541872024536, + "learning_rate": 5.7693665805677747e-05, + "loss": 2.4761, + "step": 12819 + }, + { + "epoch": 1.0346219029941086, + "grad_norm": 0.685943603515625, + "learning_rate": 5.767936182314104e-05, + "loss": 2.4489, + "step": 12820 + }, + { + "epoch": 1.0347026067306917, + "grad_norm": 0.7081817984580994, + "learning_rate": 5.7665058895345236e-05, + "loss": 2.4329, + "step": 12821 + }, + { + "epoch": 1.0347833104672746, + "grad_norm": 0.6700818538665771, + "learning_rate": 5.7650757022646804e-05, + "loss": 2.4252, + "step": 12822 + }, + { + "epoch": 1.0348640142038577, + "grad_norm": 0.6712214946746826, + "learning_rate": 5.763645620540223e-05, + "loss": 2.419, + "step": 12823 + }, + { + "epoch": 1.0349447179404407, + "grad_norm": 0.6732817888259888, + "learning_rate": 5.762215644396793e-05, + "loss": 2.3928, + "step": 12824 + }, + { + "epoch": 1.0350254216770236, + "grad_norm": 0.6689301133155823, + "learning_rate": 5.760785773870024e-05, + "loss": 2.3981, + "step": 12825 + }, + { + "epoch": 1.0351061254136067, + "grad_norm": 0.6822957992553711, + "learning_rate": 5.759356008995556e-05, + "loss": 2.5265, + "step": 12826 + }, + { + "epoch": 1.0351868291501896, + "grad_norm": 0.7316287755966187, + "learning_rate": 5.7579263498090194e-05, + "loss": 2.4132, + "step": 12827 + }, + { + "epoch": 1.0352675328867726, + "grad_norm": 0.6688703894615173, + "learning_rate": 5.756496796346047e-05, + "loss": 2.4195, + "step": 12828 + }, + { + "epoch": 1.0353482366233557, + "grad_norm": 0.6894570589065552, + "learning_rate": 5.755067348642268e-05, + "loss": 2.4897, + "step": 12829 + }, + { + "epoch": 1.0354289403599386, + "grad_norm": 0.7635753750801086, + "learning_rate": 5.753638006733311e-05, + "loss": 2.4643, + "step": 12830 + }, + { + "epoch": 1.0355096440965217, + "grad_norm": 0.6353672742843628, + "learning_rate": 5.75220877065479e-05, + "loss": 2.4533, + "step": 12831 + }, + { + "epoch": 1.0355903478331048, + "grad_norm": 0.6725208759307861, + "learning_rate": 5.750779640442332e-05, + "loss": 2.4958, + "step": 12832 + }, + { + "epoch": 1.0356710515696876, + "grad_norm": 0.7350767254829407, + "learning_rate": 5.749350616131556e-05, + "loss": 2.4192, + "step": 12833 + }, + { + "epoch": 1.0357517553062707, + "grad_norm": 0.7322222590446472, + "learning_rate": 5.7479216977580695e-05, + "loss": 2.4719, + "step": 12834 + }, + { + "epoch": 1.0358324590428536, + "grad_norm": 0.7233425974845886, + "learning_rate": 5.7464928853574904e-05, + "loss": 2.4707, + "step": 12835 + }, + { + "epoch": 1.0359131627794367, + "grad_norm": 0.7117420434951782, + "learning_rate": 5.745064178965427e-05, + "loss": 2.4463, + "step": 12836 + }, + { + "epoch": 1.0359938665160198, + "grad_norm": 0.7615050077438354, + "learning_rate": 5.743635578617486e-05, + "loss": 2.4256, + "step": 12837 + }, + { + "epoch": 1.0360745702526026, + "grad_norm": 0.7056093215942383, + "learning_rate": 5.7422070843492734e-05, + "loss": 2.4628, + "step": 12838 + }, + { + "epoch": 1.0361552739891857, + "grad_norm": 0.685989499092102, + "learning_rate": 5.740778696196389e-05, + "loss": 2.4271, + "step": 12839 + }, + { + "epoch": 1.0362359777257688, + "grad_norm": 0.7286686301231384, + "learning_rate": 5.739350414194439e-05, + "loss": 2.4984, + "step": 12840 + }, + { + "epoch": 1.0363166814623517, + "grad_norm": 0.6939802765846252, + "learning_rate": 5.737922238379009e-05, + "loss": 2.4601, + "step": 12841 + }, + { + "epoch": 1.0363973851989348, + "grad_norm": 0.7077060341835022, + "learning_rate": 5.736494168785698e-05, + "loss": 2.4264, + "step": 12842 + }, + { + "epoch": 1.0364780889355176, + "grad_norm": 0.667086124420166, + "learning_rate": 5.7350662054501016e-05, + "loss": 2.4733, + "step": 12843 + }, + { + "epoch": 1.0365587926721007, + "grad_norm": 0.6531338691711426, + "learning_rate": 5.7336383484078004e-05, + "loss": 2.4709, + "step": 12844 + }, + { + "epoch": 1.0366394964086838, + "grad_norm": 0.7141630053520203, + "learning_rate": 5.732210597694383e-05, + "loss": 2.4747, + "step": 12845 + }, + { + "epoch": 1.0367202001452667, + "grad_norm": 0.7186396718025208, + "learning_rate": 5.730782953345435e-05, + "loss": 2.4401, + "step": 12846 + }, + { + "epoch": 1.0368009038818498, + "grad_norm": 0.6709686517715454, + "learning_rate": 5.7293554153965345e-05, + "loss": 2.456, + "step": 12847 + }, + { + "epoch": 1.0368816076184326, + "grad_norm": 0.6867267489433289, + "learning_rate": 5.727927983883261e-05, + "loss": 2.4522, + "step": 12848 + }, + { + "epoch": 1.0369623113550157, + "grad_norm": 0.7016724348068237, + "learning_rate": 5.7265006588411926e-05, + "loss": 2.4348, + "step": 12849 + }, + { + "epoch": 1.0370430150915988, + "grad_norm": 0.6764764785766602, + "learning_rate": 5.725073440305896e-05, + "loss": 2.4241, + "step": 12850 + }, + { + "epoch": 1.0371237188281817, + "grad_norm": 0.6965062618255615, + "learning_rate": 5.7236463283129435e-05, + "loss": 2.4559, + "step": 12851 + }, + { + "epoch": 1.0372044225647647, + "grad_norm": 0.6878135800361633, + "learning_rate": 5.7222193228979037e-05, + "loss": 2.4874, + "step": 12852 + }, + { + "epoch": 1.0372851263013478, + "grad_norm": 0.6576557755470276, + "learning_rate": 5.720792424096344e-05, + "loss": 2.4273, + "step": 12853 + }, + { + "epoch": 1.0373658300379307, + "grad_norm": 0.7463123798370361, + "learning_rate": 5.719365631943818e-05, + "loss": 2.4933, + "step": 12854 + }, + { + "epoch": 1.0374465337745138, + "grad_norm": 0.6920896768569946, + "learning_rate": 5.7179389464758914e-05, + "loss": 2.4799, + "step": 12855 + }, + { + "epoch": 1.0375272375110969, + "grad_norm": 0.7330591082572937, + "learning_rate": 5.71651236772812e-05, + "loss": 2.469, + "step": 12856 + }, + { + "epoch": 1.0376079412476797, + "grad_norm": 0.6766076683998108, + "learning_rate": 5.715085895736057e-05, + "loss": 2.4787, + "step": 12857 + }, + { + "epoch": 1.0376886449842628, + "grad_norm": 0.724278450012207, + "learning_rate": 5.713659530535255e-05, + "loss": 2.4524, + "step": 12858 + }, + { + "epoch": 1.0377693487208457, + "grad_norm": 0.6816281676292419, + "learning_rate": 5.712233272161265e-05, + "loss": 2.4993, + "step": 12859 + }, + { + "epoch": 1.0378500524574288, + "grad_norm": 0.7186439633369446, + "learning_rate": 5.710807120649626e-05, + "loss": 2.4108, + "step": 12860 + }, + { + "epoch": 1.0379307561940119, + "grad_norm": 0.6616777181625366, + "learning_rate": 5.709381076035887e-05, + "loss": 2.4797, + "step": 12861 + }, + { + "epoch": 1.0380114599305947, + "grad_norm": 0.6956895589828491, + "learning_rate": 5.7079551383555906e-05, + "loss": 2.4017, + "step": 12862 + }, + { + "epoch": 1.0380921636671778, + "grad_norm": 0.6650584936141968, + "learning_rate": 5.706529307644268e-05, + "loss": 2.4808, + "step": 12863 + }, + { + "epoch": 1.0381728674037607, + "grad_norm": 0.6362698674201965, + "learning_rate": 5.705103583937458e-05, + "loss": 2.4077, + "step": 12864 + }, + { + "epoch": 1.0382535711403438, + "grad_norm": 0.6962565183639526, + "learning_rate": 5.703677967270697e-05, + "loss": 2.4715, + "step": 12865 + }, + { + "epoch": 1.0383342748769269, + "grad_norm": 0.6927294135093689, + "learning_rate": 5.702252457679509e-05, + "loss": 2.4983, + "step": 12866 + }, + { + "epoch": 1.0384149786135097, + "grad_norm": 0.7107497453689575, + "learning_rate": 5.70082705519942e-05, + "loss": 2.4198, + "step": 12867 + }, + { + "epoch": 1.0384956823500928, + "grad_norm": 0.6459221243858337, + "learning_rate": 5.6994017598659634e-05, + "loss": 2.4423, + "step": 12868 + }, + { + "epoch": 1.038576386086676, + "grad_norm": 0.705563485622406, + "learning_rate": 5.697976571714658e-05, + "loss": 2.5346, + "step": 12869 + }, + { + "epoch": 1.0386570898232588, + "grad_norm": 0.7424784898757935, + "learning_rate": 5.696551490781021e-05, + "loss": 2.4824, + "step": 12870 + }, + { + "epoch": 1.0387377935598419, + "grad_norm": 0.6820988059043884, + "learning_rate": 5.695126517100569e-05, + "loss": 2.4965, + "step": 12871 + }, + { + "epoch": 1.0388184972964247, + "grad_norm": 0.8209595680236816, + "learning_rate": 5.6937016507088225e-05, + "loss": 2.475, + "step": 12872 + }, + { + "epoch": 1.0388992010330078, + "grad_norm": 0.7407695055007935, + "learning_rate": 5.6922768916412815e-05, + "loss": 2.4683, + "step": 12873 + }, + { + "epoch": 1.038979904769591, + "grad_norm": 0.7335677742958069, + "learning_rate": 5.690852239933462e-05, + "loss": 2.4621, + "step": 12874 + }, + { + "epoch": 1.0390606085061738, + "grad_norm": 0.6731325387954712, + "learning_rate": 5.689427695620873e-05, + "loss": 2.4882, + "step": 12875 + }, + { + "epoch": 1.0391413122427569, + "grad_norm": 0.7256175875663757, + "learning_rate": 5.68800325873901e-05, + "loss": 2.4827, + "step": 12876 + }, + { + "epoch": 1.03922201597934, + "grad_norm": 0.711928129196167, + "learning_rate": 5.686578929323377e-05, + "loss": 2.4447, + "step": 12877 + }, + { + "epoch": 1.0393027197159228, + "grad_norm": 0.6445996165275574, + "learning_rate": 5.685154707409473e-05, + "loss": 2.453, + "step": 12878 + }, + { + "epoch": 1.039383423452506, + "grad_norm": 0.6656066179275513, + "learning_rate": 5.6837305930327923e-05, + "loss": 2.4863, + "step": 12879 + }, + { + "epoch": 1.0394641271890888, + "grad_norm": 0.6844663619995117, + "learning_rate": 5.682306586228828e-05, + "loss": 2.4524, + "step": 12880 + }, + { + "epoch": 1.0395448309256718, + "grad_norm": 0.6436383724212646, + "learning_rate": 5.6808826870330746e-05, + "loss": 2.4137, + "step": 12881 + }, + { + "epoch": 1.039625534662255, + "grad_norm": 0.6731196641921997, + "learning_rate": 5.6794588954810104e-05, + "loss": 2.4176, + "step": 12882 + }, + { + "epoch": 1.0397062383988378, + "grad_norm": 0.6994587779045105, + "learning_rate": 5.678035211608125e-05, + "loss": 2.4651, + "step": 12883 + }, + { + "epoch": 1.0397869421354209, + "grad_norm": 0.6912599205970764, + "learning_rate": 5.6766116354499e-05, + "loss": 2.3918, + "step": 12884 + }, + { + "epoch": 1.039867645872004, + "grad_norm": 0.7627033591270447, + "learning_rate": 5.6751881670418185e-05, + "loss": 2.4278, + "step": 12885 + }, + { + "epoch": 1.0399483496085868, + "grad_norm": 0.7107213139533997, + "learning_rate": 5.6737648064193485e-05, + "loss": 2.5249, + "step": 12886 + }, + { + "epoch": 1.04002905334517, + "grad_norm": 0.7254211902618408, + "learning_rate": 5.672341553617968e-05, + "loss": 2.4454, + "step": 12887 + }, + { + "epoch": 1.0401097570817528, + "grad_norm": 0.6776205897331238, + "learning_rate": 5.670918408673149e-05, + "loss": 2.4333, + "step": 12888 + }, + { + "epoch": 1.0401904608183359, + "grad_norm": 0.6824465394020081, + "learning_rate": 5.669495371620359e-05, + "loss": 2.427, + "step": 12889 + }, + { + "epoch": 1.040271164554919, + "grad_norm": 0.6633001565933228, + "learning_rate": 5.668072442495066e-05, + "loss": 2.4874, + "step": 12890 + }, + { + "epoch": 1.0403518682915018, + "grad_norm": 0.6655289530754089, + "learning_rate": 5.666649621332735e-05, + "loss": 2.5023, + "step": 12891 + }, + { + "epoch": 1.040432572028085, + "grad_norm": 0.6892853379249573, + "learning_rate": 5.665226908168818e-05, + "loss": 2.4505, + "step": 12892 + }, + { + "epoch": 1.040513275764668, + "grad_norm": 0.7154649496078491, + "learning_rate": 5.6638043030387774e-05, + "loss": 2.4916, + "step": 12893 + }, + { + "epoch": 1.0405939795012509, + "grad_norm": 0.6780592799186707, + "learning_rate": 5.662381805978074e-05, + "loss": 2.4116, + "step": 12894 + }, + { + "epoch": 1.040674683237834, + "grad_norm": 0.6737352013587952, + "learning_rate": 5.66095941702215e-05, + "loss": 2.3903, + "step": 12895 + }, + { + "epoch": 1.0407553869744168, + "grad_norm": 0.7623820304870605, + "learning_rate": 5.659537136206461e-05, + "loss": 2.4334, + "step": 12896 + }, + { + "epoch": 1.040836090711, + "grad_norm": 0.7043081521987915, + "learning_rate": 5.65811496356645e-05, + "loss": 2.4403, + "step": 12897 + }, + { + "epoch": 1.040916794447583, + "grad_norm": 0.6704873442649841, + "learning_rate": 5.6566928991375654e-05, + "loss": 2.4416, + "step": 12898 + }, + { + "epoch": 1.0409974981841659, + "grad_norm": 0.6556837558746338, + "learning_rate": 5.6552709429552474e-05, + "loss": 2.4904, + "step": 12899 + }, + { + "epoch": 1.041078201920749, + "grad_norm": 0.6926451325416565, + "learning_rate": 5.653849095054935e-05, + "loss": 2.4889, + "step": 12900 + }, + { + "epoch": 1.041158905657332, + "grad_norm": 0.6407613158226013, + "learning_rate": 5.6524273554720674e-05, + "loss": 2.3951, + "step": 12901 + }, + { + "epoch": 1.041239609393915, + "grad_norm": 0.7812615633010864, + "learning_rate": 5.651005724242071e-05, + "loss": 2.4535, + "step": 12902 + }, + { + "epoch": 1.041320313130498, + "grad_norm": 0.6868990659713745, + "learning_rate": 5.6495842014003796e-05, + "loss": 2.4373, + "step": 12903 + }, + { + "epoch": 1.0414010168670809, + "grad_norm": 0.6467776894569397, + "learning_rate": 5.648162786982427e-05, + "loss": 2.4929, + "step": 12904 + }, + { + "epoch": 1.041481720603664, + "grad_norm": 0.6588063836097717, + "learning_rate": 5.64674148102363e-05, + "loss": 2.4445, + "step": 12905 + }, + { + "epoch": 1.041562424340247, + "grad_norm": 0.6880654096603394, + "learning_rate": 5.6453202835594136e-05, + "loss": 2.4298, + "step": 12906 + }, + { + "epoch": 1.04164312807683, + "grad_norm": 0.7471407055854797, + "learning_rate": 5.6438991946251996e-05, + "loss": 2.4669, + "step": 12907 + }, + { + "epoch": 1.041723831813413, + "grad_norm": 0.7069533467292786, + "learning_rate": 5.6424782142564034e-05, + "loss": 2.4498, + "step": 12908 + }, + { + "epoch": 1.0418045355499959, + "grad_norm": 0.7013602256774902, + "learning_rate": 5.641057342488443e-05, + "loss": 2.4993, + "step": 12909 + }, + { + "epoch": 1.041885239286579, + "grad_norm": 0.6870697736740112, + "learning_rate": 5.6396365793567305e-05, + "loss": 2.5338, + "step": 12910 + }, + { + "epoch": 1.041965943023162, + "grad_norm": 0.6569130420684814, + "learning_rate": 5.638215924896669e-05, + "loss": 2.4538, + "step": 12911 + }, + { + "epoch": 1.042046646759745, + "grad_norm": 0.6900331377983093, + "learning_rate": 5.636795379143669e-05, + "loss": 2.4013, + "step": 12912 + }, + { + "epoch": 1.042127350496328, + "grad_norm": 0.6800071001052856, + "learning_rate": 5.635374942133136e-05, + "loss": 2.4733, + "step": 12913 + }, + { + "epoch": 1.042208054232911, + "grad_norm": 0.703601598739624, + "learning_rate": 5.6339546139004663e-05, + "loss": 2.432, + "step": 12914 + }, + { + "epoch": 1.042288757969494, + "grad_norm": 0.6781988739967346, + "learning_rate": 5.6325343944810594e-05, + "loss": 2.4418, + "step": 12915 + }, + { + "epoch": 1.042369461706077, + "grad_norm": 0.7247167825698853, + "learning_rate": 5.6311142839103125e-05, + "loss": 2.5133, + "step": 12916 + }, + { + "epoch": 1.04245016544266, + "grad_norm": 0.7738155126571655, + "learning_rate": 5.629694282223619e-05, + "loss": 2.5137, + "step": 12917 + }, + { + "epoch": 1.042530869179243, + "grad_norm": 0.74723219871521, + "learning_rate": 5.628274389456367e-05, + "loss": 2.3996, + "step": 12918 + }, + { + "epoch": 1.042611572915826, + "grad_norm": 0.7245466709136963, + "learning_rate": 5.6268546056439456e-05, + "loss": 2.4213, + "step": 12919 + }, + { + "epoch": 1.042692276652409, + "grad_norm": 0.6307608485221863, + "learning_rate": 5.625434930821742e-05, + "loss": 2.4195, + "step": 12920 + }, + { + "epoch": 1.042772980388992, + "grad_norm": 0.7138007879257202, + "learning_rate": 5.6240153650251326e-05, + "loss": 2.463, + "step": 12921 + }, + { + "epoch": 1.042853684125575, + "grad_norm": 0.779659628868103, + "learning_rate": 5.622595908289498e-05, + "loss": 2.4898, + "step": 12922 + }, + { + "epoch": 1.042934387862158, + "grad_norm": 0.7144278287887573, + "learning_rate": 5.621176560650221e-05, + "loss": 2.4083, + "step": 12923 + }, + { + "epoch": 1.043015091598741, + "grad_norm": 0.7724754214286804, + "learning_rate": 5.619757322142667e-05, + "loss": 2.3917, + "step": 12924 + }, + { + "epoch": 1.043095795335324, + "grad_norm": 0.7667245268821716, + "learning_rate": 5.618338192802208e-05, + "loss": 2.4943, + "step": 12925 + }, + { + "epoch": 1.043176499071907, + "grad_norm": 0.6528030037879944, + "learning_rate": 5.616919172664221e-05, + "loss": 2.4323, + "step": 12926 + }, + { + "epoch": 1.04325720280849, + "grad_norm": 0.6790263652801514, + "learning_rate": 5.6155002617640615e-05, + "loss": 2.4304, + "step": 12927 + }, + { + "epoch": 1.043337906545073, + "grad_norm": 0.7554369568824768, + "learning_rate": 5.614081460137097e-05, + "loss": 2.4637, + "step": 12928 + }, + { + "epoch": 1.043418610281656, + "grad_norm": 0.7126293182373047, + "learning_rate": 5.612662767818686e-05, + "loss": 2.4765, + "step": 12929 + }, + { + "epoch": 1.0434993140182391, + "grad_norm": 0.6705749034881592, + "learning_rate": 5.611244184844189e-05, + "loss": 2.4746, + "step": 12930 + }, + { + "epoch": 1.043580017754822, + "grad_norm": 0.6595145463943481, + "learning_rate": 5.609825711248958e-05, + "loss": 2.463, + "step": 12931 + }, + { + "epoch": 1.043660721491405, + "grad_norm": 0.6942049860954285, + "learning_rate": 5.6084073470683476e-05, + "loss": 2.5101, + "step": 12932 + }, + { + "epoch": 1.043741425227988, + "grad_norm": 0.7285810708999634, + "learning_rate": 5.6069890923377087e-05, + "loss": 2.467, + "step": 12933 + }, + { + "epoch": 1.043822128964571, + "grad_norm": 0.7702928185462952, + "learning_rate": 5.605570947092382e-05, + "loss": 2.4998, + "step": 12934 + }, + { + "epoch": 1.0439028327011541, + "grad_norm": 0.6631895899772644, + "learning_rate": 5.604152911367713e-05, + "loss": 2.4277, + "step": 12935 + }, + { + "epoch": 1.043983536437737, + "grad_norm": 0.6447882652282715, + "learning_rate": 5.6027349851990494e-05, + "loss": 2.4868, + "step": 12936 + }, + { + "epoch": 1.04406424017432, + "grad_norm": 0.695160448551178, + "learning_rate": 5.6013171686217205e-05, + "loss": 2.3917, + "step": 12937 + }, + { + "epoch": 1.0441449439109032, + "grad_norm": 0.6579271554946899, + "learning_rate": 5.5998994616710656e-05, + "loss": 2.4245, + "step": 12938 + }, + { + "epoch": 1.044225647647486, + "grad_norm": 0.7053574323654175, + "learning_rate": 5.598481864382419e-05, + "loss": 2.4809, + "step": 12939 + }, + { + "epoch": 1.0443063513840691, + "grad_norm": 0.7008736729621887, + "learning_rate": 5.5970643767911105e-05, + "loss": 2.4481, + "step": 12940 + }, + { + "epoch": 1.044387055120652, + "grad_norm": 0.6577918529510498, + "learning_rate": 5.5956469989324644e-05, + "loss": 2.4211, + "step": 12941 + }, + { + "epoch": 1.044467758857235, + "grad_norm": 0.6662739515304565, + "learning_rate": 5.594229730841815e-05, + "loss": 2.4607, + "step": 12942 + }, + { + "epoch": 1.0445484625938182, + "grad_norm": 0.6637060046195984, + "learning_rate": 5.592812572554471e-05, + "loss": 2.4388, + "step": 12943 + }, + { + "epoch": 1.044629166330401, + "grad_norm": 0.7282097935676575, + "learning_rate": 5.5913955241057605e-05, + "loss": 2.4536, + "step": 12944 + }, + { + "epoch": 1.0447098700669841, + "grad_norm": 0.6470810174942017, + "learning_rate": 5.589978585530997e-05, + "loss": 2.4032, + "step": 12945 + }, + { + "epoch": 1.0447905738035672, + "grad_norm": 0.6958881616592407, + "learning_rate": 5.588561756865498e-05, + "loss": 2.4577, + "step": 12946 + }, + { + "epoch": 1.04487127754015, + "grad_norm": 0.6999812722206116, + "learning_rate": 5.587145038144569e-05, + "loss": 2.454, + "step": 12947 + }, + { + "epoch": 1.0449519812767332, + "grad_norm": 0.6919988989830017, + "learning_rate": 5.58572842940352e-05, + "loss": 2.4505, + "step": 12948 + }, + { + "epoch": 1.045032685013316, + "grad_norm": 0.6813084483146667, + "learning_rate": 5.584311930677659e-05, + "loss": 2.4873, + "step": 12949 + }, + { + "epoch": 1.0451133887498991, + "grad_norm": 0.6587427854537964, + "learning_rate": 5.582895542002286e-05, + "loss": 2.4658, + "step": 12950 + }, + { + "epoch": 1.0451940924864822, + "grad_norm": 0.6942041516304016, + "learning_rate": 5.581479263412703e-05, + "loss": 2.47, + "step": 12951 + }, + { + "epoch": 1.045274796223065, + "grad_norm": 0.7330117225646973, + "learning_rate": 5.58006309494421e-05, + "loss": 2.4826, + "step": 12952 + }, + { + "epoch": 1.0453554999596482, + "grad_norm": 0.7197144031524658, + "learning_rate": 5.578647036632096e-05, + "loss": 2.4425, + "step": 12953 + }, + { + "epoch": 1.045436203696231, + "grad_norm": 0.7442573308944702, + "learning_rate": 5.577231088511654e-05, + "loss": 2.4946, + "step": 12954 + }, + { + "epoch": 1.0455169074328141, + "grad_norm": 0.7039753198623657, + "learning_rate": 5.575815250618179e-05, + "loss": 2.4188, + "step": 12955 + }, + { + "epoch": 1.0455976111693972, + "grad_norm": 0.7374606728553772, + "learning_rate": 5.574399522986951e-05, + "loss": 2.3916, + "step": 12956 + }, + { + "epoch": 1.04567831490598, + "grad_norm": 0.6358140707015991, + "learning_rate": 5.572983905653253e-05, + "loss": 2.4502, + "step": 12957 + }, + { + "epoch": 1.0457590186425632, + "grad_norm": 0.712858259677887, + "learning_rate": 5.5715683986523694e-05, + "loss": 2.4746, + "step": 12958 + }, + { + "epoch": 1.0458397223791462, + "grad_norm": 0.6757933497428894, + "learning_rate": 5.5701530020195756e-05, + "loss": 2.4836, + "step": 12959 + }, + { + "epoch": 1.045920426115729, + "grad_norm": 0.7509831786155701, + "learning_rate": 5.568737715790151e-05, + "loss": 2.4061, + "step": 12960 + }, + { + "epoch": 1.0460011298523122, + "grad_norm": 0.7120335102081299, + "learning_rate": 5.5673225399993646e-05, + "loss": 2.4772, + "step": 12961 + }, + { + "epoch": 1.046081833588895, + "grad_norm": 0.7213751673698425, + "learning_rate": 5.5659074746824924e-05, + "loss": 2.4637, + "step": 12962 + }, + { + "epoch": 1.0461625373254781, + "grad_norm": 0.7161290645599365, + "learning_rate": 5.5644925198747934e-05, + "loss": 2.4552, + "step": 12963 + }, + { + "epoch": 1.0462432410620612, + "grad_norm": 0.7303922772407532, + "learning_rate": 5.563077675611534e-05, + "loss": 2.5091, + "step": 12964 + }, + { + "epoch": 1.046323944798644, + "grad_norm": 0.7051636576652527, + "learning_rate": 5.561662941927981e-05, + "loss": 2.3717, + "step": 12965 + }, + { + "epoch": 1.0464046485352272, + "grad_norm": 0.6880733370780945, + "learning_rate": 5.5602483188593866e-05, + "loss": 2.4205, + "step": 12966 + }, + { + "epoch": 1.0464853522718103, + "grad_norm": 0.6942360401153564, + "learning_rate": 5.558833806441008e-05, + "loss": 2.4601, + "step": 12967 + }, + { + "epoch": 1.0465660560083931, + "grad_norm": 0.7264992594718933, + "learning_rate": 5.5574194047081016e-05, + "loss": 2.4612, + "step": 12968 + }, + { + "epoch": 1.0466467597449762, + "grad_norm": 0.7502472996711731, + "learning_rate": 5.5560051136959166e-05, + "loss": 2.4099, + "step": 12969 + }, + { + "epoch": 1.046727463481559, + "grad_norm": 0.691694438457489, + "learning_rate": 5.5545909334397004e-05, + "loss": 2.5071, + "step": 12970 + }, + { + "epoch": 1.0468081672181422, + "grad_norm": 0.7120653986930847, + "learning_rate": 5.5531768639747026e-05, + "loss": 2.4066, + "step": 12971 + }, + { + "epoch": 1.0468888709547253, + "grad_norm": 0.6501363515853882, + "learning_rate": 5.551762905336159e-05, + "loss": 2.4186, + "step": 12972 + }, + { + "epoch": 1.0469695746913081, + "grad_norm": 0.6924965977668762, + "learning_rate": 5.5503490575593095e-05, + "loss": 2.4864, + "step": 12973 + }, + { + "epoch": 1.0470502784278912, + "grad_norm": 0.6772900819778442, + "learning_rate": 5.548935320679398e-05, + "loss": 2.4101, + "step": 12974 + }, + { + "epoch": 1.0471309821644743, + "grad_norm": 0.6950967311859131, + "learning_rate": 5.54752169473165e-05, + "loss": 2.4893, + "step": 12975 + }, + { + "epoch": 1.0472116859010572, + "grad_norm": 0.6663516163825989, + "learning_rate": 5.5461081797512994e-05, + "loss": 2.4136, + "step": 12976 + }, + { + "epoch": 1.0472923896376403, + "grad_norm": 0.7337449789047241, + "learning_rate": 5.5446947757735754e-05, + "loss": 2.473, + "step": 12977 + }, + { + "epoch": 1.0473730933742231, + "grad_norm": 0.6808840036392212, + "learning_rate": 5.543281482833709e-05, + "loss": 2.4473, + "step": 12978 + }, + { + "epoch": 1.0474537971108062, + "grad_norm": 0.6472508907318115, + "learning_rate": 5.5418683009669124e-05, + "loss": 2.4077, + "step": 12979 + }, + { + "epoch": 1.0475345008473893, + "grad_norm": 0.6904192566871643, + "learning_rate": 5.540455230208409e-05, + "loss": 2.482, + "step": 12980 + }, + { + "epoch": 1.0476152045839722, + "grad_norm": 0.6781610250473022, + "learning_rate": 5.5390422705934264e-05, + "loss": 2.4458, + "step": 12981 + }, + { + "epoch": 1.0476959083205553, + "grad_norm": 0.7130050659179688, + "learning_rate": 5.5376294221571666e-05, + "loss": 2.5136, + "step": 12982 + }, + { + "epoch": 1.0477766120571383, + "grad_norm": 0.7727184891700745, + "learning_rate": 5.536216684934846e-05, + "loss": 2.5346, + "step": 12983 + }, + { + "epoch": 1.0478573157937212, + "grad_norm": 0.7177208662033081, + "learning_rate": 5.534804058961679e-05, + "loss": 2.4153, + "step": 12984 + }, + { + "epoch": 1.0479380195303043, + "grad_norm": 0.7333023548126221, + "learning_rate": 5.5333915442728634e-05, + "loss": 2.4171, + "step": 12985 + }, + { + "epoch": 1.0480187232668872, + "grad_norm": 0.658423125743866, + "learning_rate": 5.5319791409036046e-05, + "loss": 2.446, + "step": 12986 + }, + { + "epoch": 1.0480994270034703, + "grad_norm": 0.8305184841156006, + "learning_rate": 5.5305668488891114e-05, + "loss": 2.5026, + "step": 12987 + }, + { + "epoch": 1.0481801307400533, + "grad_norm": 0.7083305716514587, + "learning_rate": 5.52915466826457e-05, + "loss": 2.5366, + "step": 12988 + }, + { + "epoch": 1.0482608344766362, + "grad_norm": 0.7924454212188721, + "learning_rate": 5.5277425990651824e-05, + "loss": 2.528, + "step": 12989 + }, + { + "epoch": 1.0483415382132193, + "grad_norm": 0.633376955986023, + "learning_rate": 5.5263306413261384e-05, + "loss": 2.4442, + "step": 12990 + }, + { + "epoch": 1.0484222419498024, + "grad_norm": 0.7387240529060364, + "learning_rate": 5.5249187950826295e-05, + "loss": 2.4761, + "step": 12991 + }, + { + "epoch": 1.0485029456863852, + "grad_norm": 0.6796224117279053, + "learning_rate": 5.523507060369843e-05, + "loss": 2.4828, + "step": 12992 + }, + { + "epoch": 1.0485836494229683, + "grad_norm": 0.6925581097602844, + "learning_rate": 5.5220954372229604e-05, + "loss": 2.4861, + "step": 12993 + }, + { + "epoch": 1.0486643531595512, + "grad_norm": 0.6854318380355835, + "learning_rate": 5.5206839256771704e-05, + "loss": 2.473, + "step": 12994 + }, + { + "epoch": 1.0487450568961343, + "grad_norm": 0.706375241279602, + "learning_rate": 5.519272525767643e-05, + "loss": 2.4284, + "step": 12995 + }, + { + "epoch": 1.0488257606327174, + "grad_norm": 0.6917428374290466, + "learning_rate": 5.517861237529556e-05, + "loss": 2.4702, + "step": 12996 + }, + { + "epoch": 1.0489064643693002, + "grad_norm": 0.6903818845748901, + "learning_rate": 5.516450060998086e-05, + "loss": 2.4679, + "step": 12997 + }, + { + "epoch": 1.0489871681058833, + "grad_norm": 0.6403356194496155, + "learning_rate": 5.515038996208398e-05, + "loss": 2.396, + "step": 12998 + }, + { + "epoch": 1.0490678718424662, + "grad_norm": 0.6491792798042297, + "learning_rate": 5.513628043195662e-05, + "loss": 2.4543, + "step": 12999 + }, + { + "epoch": 1.0491485755790493, + "grad_norm": 0.687303900718689, + "learning_rate": 5.512217201995043e-05, + "loss": 2.4716, + "step": 13000 + }, + { + "epoch": 1.0491485755790493, + "eval_loss": 2.4177169799804688, + "eval_runtime": 763.9215, + "eval_samples_per_second": 3.43, + "eval_steps_per_second": 0.572, + "step": 13000 + }, + { + "epoch": 1.0492292793156324, + "grad_norm": 0.7020761370658875, + "learning_rate": 5.510806472641701e-05, + "loss": 2.3591, + "step": 13001 + }, + { + "epoch": 1.0493099830522152, + "grad_norm": 0.6978075504302979, + "learning_rate": 5.509395855170798e-05, + "loss": 2.4585, + "step": 13002 + }, + { + "epoch": 1.0493906867887983, + "grad_norm": 0.7327752113342285, + "learning_rate": 5.5079853496174925e-05, + "loss": 2.5265, + "step": 13003 + }, + { + "epoch": 1.0494713905253814, + "grad_norm": 0.7552505135536194, + "learning_rate": 5.50657495601693e-05, + "loss": 2.4821, + "step": 13004 + }, + { + "epoch": 1.0495520942619643, + "grad_norm": 0.7100770473480225, + "learning_rate": 5.5051646744042664e-05, + "loss": 2.4566, + "step": 13005 + }, + { + "epoch": 1.0496327979985474, + "grad_norm": 0.7008209824562073, + "learning_rate": 5.503754504814651e-05, + "loss": 2.4476, + "step": 13006 + }, + { + "epoch": 1.0497135017351304, + "grad_norm": 0.640724241733551, + "learning_rate": 5.502344447283223e-05, + "loss": 2.437, + "step": 13007 + }, + { + "epoch": 1.0497942054717133, + "grad_norm": 0.7064981460571289, + "learning_rate": 5.5009345018451297e-05, + "loss": 2.5129, + "step": 13008 + }, + { + "epoch": 1.0498749092082964, + "grad_norm": 0.6729782223701477, + "learning_rate": 5.49952466853551e-05, + "loss": 2.4867, + "step": 13009 + }, + { + "epoch": 1.0499556129448793, + "grad_norm": 0.7245302200317383, + "learning_rate": 5.4981149473894966e-05, + "loss": 2.4485, + "step": 13010 + }, + { + "epoch": 1.0500363166814624, + "grad_norm": 0.6686248779296875, + "learning_rate": 5.4967053384422294e-05, + "loss": 2.4314, + "step": 13011 + }, + { + "epoch": 1.0501170204180454, + "grad_norm": 0.6790863871574402, + "learning_rate": 5.495295841728836e-05, + "loss": 2.4847, + "step": 13012 + }, + { + "epoch": 1.0501977241546283, + "grad_norm": 0.6516931653022766, + "learning_rate": 5.49388645728445e-05, + "loss": 2.4306, + "step": 13013 + }, + { + "epoch": 1.0502784278912114, + "grad_norm": 0.6967600584030151, + "learning_rate": 5.492477185144189e-05, + "loss": 2.4942, + "step": 13014 + }, + { + "epoch": 1.0503591316277943, + "grad_norm": 0.696246325969696, + "learning_rate": 5.491068025343178e-05, + "loss": 2.4647, + "step": 13015 + }, + { + "epoch": 1.0504398353643774, + "grad_norm": 0.6962751150131226, + "learning_rate": 5.489658977916543e-05, + "loss": 2.5095, + "step": 13016 + }, + { + "epoch": 1.0505205391009604, + "grad_norm": 0.6982631087303162, + "learning_rate": 5.488250042899392e-05, + "loss": 2.4327, + "step": 13017 + }, + { + "epoch": 1.0506012428375433, + "grad_norm": 0.6932644844055176, + "learning_rate": 5.486841220326845e-05, + "loss": 2.4777, + "step": 13018 + }, + { + "epoch": 1.0506819465741264, + "grad_norm": 0.6923339366912842, + "learning_rate": 5.485432510234012e-05, + "loss": 2.4321, + "step": 13019 + }, + { + "epoch": 1.0507626503107095, + "grad_norm": 0.7445859313011169, + "learning_rate": 5.4840239126560015e-05, + "loss": 2.4425, + "step": 13020 + }, + { + "epoch": 1.0508433540472923, + "grad_norm": 0.7122324705123901, + "learning_rate": 5.48261542762792e-05, + "loss": 2.4545, + "step": 13021 + }, + { + "epoch": 1.0509240577838754, + "grad_norm": 0.734779417514801, + "learning_rate": 5.4812070551848736e-05, + "loss": 2.4764, + "step": 13022 + }, + { + "epoch": 1.0510047615204583, + "grad_norm": 0.6544109582901001, + "learning_rate": 5.4797987953619566e-05, + "loss": 2.4492, + "step": 13023 + }, + { + "epoch": 1.0510854652570414, + "grad_norm": 0.6366097331047058, + "learning_rate": 5.4783906481942704e-05, + "loss": 2.4695, + "step": 13024 + }, + { + "epoch": 1.0511661689936245, + "grad_norm": 0.6966270804405212, + "learning_rate": 5.476982613716908e-05, + "loss": 2.4505, + "step": 13025 + }, + { + "epoch": 1.0512468727302073, + "grad_norm": 0.7010120153427124, + "learning_rate": 5.4755746919649665e-05, + "loss": 2.4545, + "step": 13026 + }, + { + "epoch": 1.0513275764667904, + "grad_norm": 0.6704719662666321, + "learning_rate": 5.474166882973526e-05, + "loss": 2.3899, + "step": 13027 + }, + { + "epoch": 1.0514082802033735, + "grad_norm": 0.757152259349823, + "learning_rate": 5.472759186777679e-05, + "loss": 2.5112, + "step": 13028 + }, + { + "epoch": 1.0514889839399564, + "grad_norm": 0.6668868660926819, + "learning_rate": 5.471351603412509e-05, + "loss": 2.4797, + "step": 13029 + }, + { + "epoch": 1.0515696876765395, + "grad_norm": 0.7919496893882751, + "learning_rate": 5.4699441329130887e-05, + "loss": 2.4874, + "step": 13030 + }, + { + "epoch": 1.0516503914131223, + "grad_norm": 0.7595484852790833, + "learning_rate": 5.468536775314506e-05, + "loss": 2.4621, + "step": 13031 + }, + { + "epoch": 1.0517310951497054, + "grad_norm": 0.6575995683670044, + "learning_rate": 5.467129530651835e-05, + "loss": 2.4474, + "step": 13032 + }, + { + "epoch": 1.0518117988862885, + "grad_norm": 0.6817733645439148, + "learning_rate": 5.4657223989601425e-05, + "loss": 2.4329, + "step": 13033 + }, + { + "epoch": 1.0518925026228714, + "grad_norm": 0.722882091999054, + "learning_rate": 5.464315380274501e-05, + "loss": 2.4544, + "step": 13034 + }, + { + "epoch": 1.0519732063594545, + "grad_norm": 0.6957377791404724, + "learning_rate": 5.4629084746299796e-05, + "loss": 2.5669, + "step": 13035 + }, + { + "epoch": 1.0520539100960375, + "grad_norm": 0.6749420166015625, + "learning_rate": 5.461501682061636e-05, + "loss": 2.5053, + "step": 13036 + }, + { + "epoch": 1.0521346138326204, + "grad_norm": 0.8158369064331055, + "learning_rate": 5.4600950026045326e-05, + "loss": 2.429, + "step": 13037 + }, + { + "epoch": 1.0522153175692035, + "grad_norm": 0.6960736513137817, + "learning_rate": 5.458688436293735e-05, + "loss": 2.4731, + "step": 13038 + }, + { + "epoch": 1.0522960213057864, + "grad_norm": 0.6686301231384277, + "learning_rate": 5.457281983164287e-05, + "loss": 2.4495, + "step": 13039 + }, + { + "epoch": 1.0523767250423695, + "grad_norm": 0.6691476106643677, + "learning_rate": 5.455875643251248e-05, + "loss": 2.4329, + "step": 13040 + }, + { + "epoch": 1.0524574287789525, + "grad_norm": 0.7737297415733337, + "learning_rate": 5.454469416589666e-05, + "loss": 2.4664, + "step": 13041 + }, + { + "epoch": 1.0525381325155354, + "grad_norm": 0.7848188281059265, + "learning_rate": 5.453063303214588e-05, + "loss": 2.4799, + "step": 13042 + }, + { + "epoch": 1.0526188362521185, + "grad_norm": 0.7831119894981384, + "learning_rate": 5.45165730316106e-05, + "loss": 2.5076, + "step": 13043 + }, + { + "epoch": 1.0526995399887016, + "grad_norm": 0.691635012626648, + "learning_rate": 5.4502514164641196e-05, + "loss": 2.4866, + "step": 13044 + }, + { + "epoch": 1.0527802437252844, + "grad_norm": 0.6667110919952393, + "learning_rate": 5.4488456431588106e-05, + "loss": 2.4162, + "step": 13045 + }, + { + "epoch": 1.0528609474618675, + "grad_norm": 0.7201905846595764, + "learning_rate": 5.447439983280163e-05, + "loss": 2.498, + "step": 13046 + }, + { + "epoch": 1.0529416511984504, + "grad_norm": 0.8538106083869934, + "learning_rate": 5.44603443686321e-05, + "loss": 2.4477, + "step": 13047 + }, + { + "epoch": 1.0530223549350335, + "grad_norm": 0.6661962270736694, + "learning_rate": 5.444629003942987e-05, + "loss": 2.5253, + "step": 13048 + }, + { + "epoch": 1.0531030586716166, + "grad_norm": 0.7239834666252136, + "learning_rate": 5.4432236845545146e-05, + "loss": 2.4786, + "step": 13049 + }, + { + "epoch": 1.0531837624081994, + "grad_norm": 0.7328412532806396, + "learning_rate": 5.4418184787328186e-05, + "loss": 2.4841, + "step": 13050 + }, + { + "epoch": 1.0532644661447825, + "grad_norm": 0.6395559310913086, + "learning_rate": 5.440413386512922e-05, + "loss": 2.3544, + "step": 13051 + }, + { + "epoch": 1.0533451698813656, + "grad_norm": 0.6632471084594727, + "learning_rate": 5.43900840792984e-05, + "loss": 2.4753, + "step": 13052 + }, + { + "epoch": 1.0534258736179485, + "grad_norm": 0.7262828350067139, + "learning_rate": 5.4376035430185935e-05, + "loss": 2.4162, + "step": 13053 + }, + { + "epoch": 1.0535065773545316, + "grad_norm": 0.7897952198982239, + "learning_rate": 5.436198791814196e-05, + "loss": 2.4571, + "step": 13054 + }, + { + "epoch": 1.0535872810911144, + "grad_norm": 0.7281489372253418, + "learning_rate": 5.434794154351651e-05, + "loss": 2.4531, + "step": 13055 + }, + { + "epoch": 1.0536679848276975, + "grad_norm": 0.7322356700897217, + "learning_rate": 5.4333896306659694e-05, + "loss": 2.4102, + "step": 13056 + }, + { + "epoch": 1.0537486885642806, + "grad_norm": 0.7657945156097412, + "learning_rate": 5.4319852207921554e-05, + "loss": 2.4526, + "step": 13057 + }, + { + "epoch": 1.0538293923008635, + "grad_norm": 0.6732973456382751, + "learning_rate": 5.430580924765214e-05, + "loss": 2.4516, + "step": 13058 + }, + { + "epoch": 1.0539100960374466, + "grad_norm": 0.663398027420044, + "learning_rate": 5.429176742620137e-05, + "loss": 2.4437, + "step": 13059 + }, + { + "epoch": 1.0539907997740294, + "grad_norm": 0.6363258957862854, + "learning_rate": 5.4277726743919244e-05, + "loss": 2.414, + "step": 13060 + }, + { + "epoch": 1.0540715035106125, + "grad_norm": 0.6600647568702698, + "learning_rate": 5.426368720115568e-05, + "loss": 2.4319, + "step": 13061 + }, + { + "epoch": 1.0541522072471956, + "grad_norm": 0.6941983699798584, + "learning_rate": 5.4249648798260574e-05, + "loss": 2.5247, + "step": 13062 + }, + { + "epoch": 1.0542329109837785, + "grad_norm": 0.7419719099998474, + "learning_rate": 5.423561153558383e-05, + "loss": 2.5088, + "step": 13063 + }, + { + "epoch": 1.0543136147203616, + "grad_norm": 0.708073079586029, + "learning_rate": 5.4221575413475326e-05, + "loss": 2.4037, + "step": 13064 + }, + { + "epoch": 1.0543943184569446, + "grad_norm": 0.7081628441810608, + "learning_rate": 5.4207540432284764e-05, + "loss": 2.4556, + "step": 13065 + }, + { + "epoch": 1.0544750221935275, + "grad_norm": 0.7058689594268799, + "learning_rate": 5.419350659236201e-05, + "loss": 2.4244, + "step": 13066 + }, + { + "epoch": 1.0545557259301106, + "grad_norm": 0.6858707070350647, + "learning_rate": 5.417947389405684e-05, + "loss": 2.4431, + "step": 13067 + }, + { + "epoch": 1.0546364296666935, + "grad_norm": 0.6769983768463135, + "learning_rate": 5.416544233771893e-05, + "loss": 2.4257, + "step": 13068 + }, + { + "epoch": 1.0547171334032766, + "grad_norm": 0.7128089070320129, + "learning_rate": 5.4151411923698e-05, + "loss": 2.4558, + "step": 13069 + }, + { + "epoch": 1.0547978371398596, + "grad_norm": 0.6419198513031006, + "learning_rate": 5.413738265234374e-05, + "loss": 2.4421, + "step": 13070 + }, + { + "epoch": 1.0548785408764425, + "grad_norm": 0.760848879814148, + "learning_rate": 5.4123354524005784e-05, + "loss": 2.4427, + "step": 13071 + }, + { + "epoch": 1.0549592446130256, + "grad_norm": 0.6749173998832703, + "learning_rate": 5.410932753903377e-05, + "loss": 2.4902, + "step": 13072 + }, + { + "epoch": 1.0550399483496087, + "grad_norm": 0.6908800601959229, + "learning_rate": 5.4095301697777265e-05, + "loss": 2.4219, + "step": 13073 + }, + { + "epoch": 1.0551206520861915, + "grad_norm": 0.6779965758323669, + "learning_rate": 5.408127700058587e-05, + "loss": 2.4533, + "step": 13074 + }, + { + "epoch": 1.0552013558227746, + "grad_norm": 0.6832355260848999, + "learning_rate": 5.406725344780906e-05, + "loss": 2.418, + "step": 13075 + }, + { + "epoch": 1.0552820595593575, + "grad_norm": 0.6766698956489563, + "learning_rate": 5.4053231039796357e-05, + "loss": 2.4493, + "step": 13076 + }, + { + "epoch": 1.0553627632959406, + "grad_norm": 0.7256276607513428, + "learning_rate": 5.4039209776897285e-05, + "loss": 2.4126, + "step": 13077 + }, + { + "epoch": 1.0554434670325237, + "grad_norm": 0.6687275171279907, + "learning_rate": 5.4025189659461196e-05, + "loss": 2.435, + "step": 13078 + }, + { + "epoch": 1.0555241707691065, + "grad_norm": 0.6800444722175598, + "learning_rate": 5.401117068783758e-05, + "loss": 2.4608, + "step": 13079 + }, + { + "epoch": 1.0556048745056896, + "grad_norm": 0.6947116851806641, + "learning_rate": 5.399715286237583e-05, + "loss": 2.4908, + "step": 13080 + }, + { + "epoch": 1.0556855782422727, + "grad_norm": 0.6907915472984314, + "learning_rate": 5.398313618342521e-05, + "loss": 2.4805, + "step": 13081 + }, + { + "epoch": 1.0557662819788556, + "grad_norm": 0.7429100275039673, + "learning_rate": 5.396912065133516e-05, + "loss": 2.458, + "step": 13082 + }, + { + "epoch": 1.0558469857154387, + "grad_norm": 0.7186924815177917, + "learning_rate": 5.3955106266454994e-05, + "loss": 2.4924, + "step": 13083 + }, + { + "epoch": 1.0559276894520215, + "grad_norm": 0.7017999887466431, + "learning_rate": 5.394109302913391e-05, + "loss": 2.4103, + "step": 13084 + }, + { + "epoch": 1.0560083931886046, + "grad_norm": 0.7318955659866333, + "learning_rate": 5.392708093972117e-05, + "loss": 2.4424, + "step": 13085 + }, + { + "epoch": 1.0560890969251877, + "grad_norm": 0.6278600692749023, + "learning_rate": 5.391306999856602e-05, + "loss": 2.4433, + "step": 13086 + }, + { + "epoch": 1.0561698006617706, + "grad_norm": 0.6895800232887268, + "learning_rate": 5.389906020601767e-05, + "loss": 2.4275, + "step": 13087 + }, + { + "epoch": 1.0562505043983537, + "grad_norm": 0.7197345495223999, + "learning_rate": 5.388505156242522e-05, + "loss": 2.4309, + "step": 13088 + }, + { + "epoch": 1.0563312081349367, + "grad_norm": 0.636433482170105, + "learning_rate": 5.3871044068137824e-05, + "loss": 2.4258, + "step": 13089 + }, + { + "epoch": 1.0564119118715196, + "grad_norm": 0.6884748339653015, + "learning_rate": 5.3857037723504634e-05, + "loss": 2.4543, + "step": 13090 + }, + { + "epoch": 1.0564926156081027, + "grad_norm": 0.7277036309242249, + "learning_rate": 5.384303252887464e-05, + "loss": 2.4911, + "step": 13091 + }, + { + "epoch": 1.0565733193446856, + "grad_norm": 0.6940809488296509, + "learning_rate": 5.38290284845969e-05, + "loss": 2.4112, + "step": 13092 + }, + { + "epoch": 1.0566540230812687, + "grad_norm": 0.6729177236557007, + "learning_rate": 5.3815025591020526e-05, + "loss": 2.4394, + "step": 13093 + }, + { + "epoch": 1.0567347268178517, + "grad_norm": 0.6941854357719421, + "learning_rate": 5.3801023848494416e-05, + "loss": 2.4263, + "step": 13094 + }, + { + "epoch": 1.0568154305544346, + "grad_norm": 0.7046812772750854, + "learning_rate": 5.3787023257367554e-05, + "loss": 2.5196, + "step": 13095 + }, + { + "epoch": 1.0568961342910177, + "grad_norm": 0.6896177530288696, + "learning_rate": 5.377302381798891e-05, + "loss": 2.4178, + "step": 13096 + }, + { + "epoch": 1.0569768380276008, + "grad_norm": 0.6693699955940247, + "learning_rate": 5.375902553070731e-05, + "loss": 2.4908, + "step": 13097 + }, + { + "epoch": 1.0570575417641837, + "grad_norm": 0.6751677989959717, + "learning_rate": 5.3745028395871674e-05, + "loss": 2.4222, + "step": 13098 + }, + { + "epoch": 1.0571382455007667, + "grad_norm": 0.7666265368461609, + "learning_rate": 5.373103241383088e-05, + "loss": 2.4965, + "step": 13099 + }, + { + "epoch": 1.0572189492373496, + "grad_norm": 0.8069329857826233, + "learning_rate": 5.3717037584933674e-05, + "loss": 2.4988, + "step": 13100 + }, + { + "epoch": 1.0572996529739327, + "grad_norm": 0.7160749435424805, + "learning_rate": 5.370304390952887e-05, + "loss": 2.4311, + "step": 13101 + }, + { + "epoch": 1.0573803567105158, + "grad_norm": 0.6936448812484741, + "learning_rate": 5.368905138796523e-05, + "loss": 2.4877, + "step": 13102 + }, + { + "epoch": 1.0574610604470986, + "grad_norm": 0.7202793955802917, + "learning_rate": 5.3675060020591494e-05, + "loss": 2.4841, + "step": 13103 + }, + { + "epoch": 1.0575417641836817, + "grad_norm": 0.7750168442726135, + "learning_rate": 5.366106980775636e-05, + "loss": 2.4828, + "step": 13104 + }, + { + "epoch": 1.0576224679202646, + "grad_norm": 0.7079972624778748, + "learning_rate": 5.364708074980849e-05, + "loss": 2.4912, + "step": 13105 + }, + { + "epoch": 1.0577031716568477, + "grad_norm": 0.704066276550293, + "learning_rate": 5.363309284709657e-05, + "loss": 2.4731, + "step": 13106 + }, + { + "epoch": 1.0577838753934308, + "grad_norm": 0.7040490508079529, + "learning_rate": 5.361910609996915e-05, + "loss": 2.3811, + "step": 13107 + }, + { + "epoch": 1.0578645791300136, + "grad_norm": 0.6669453978538513, + "learning_rate": 5.360512050877484e-05, + "loss": 2.5372, + "step": 13108 + }, + { + "epoch": 1.0579452828665967, + "grad_norm": 0.7197996973991394, + "learning_rate": 5.359113607386226e-05, + "loss": 2.4612, + "step": 13109 + }, + { + "epoch": 1.0580259866031798, + "grad_norm": 0.7192320823669434, + "learning_rate": 5.3577152795579824e-05, + "loss": 2.4636, + "step": 13110 + }, + { + "epoch": 1.0581066903397627, + "grad_norm": 0.6907937526702881, + "learning_rate": 5.35631706742761e-05, + "loss": 2.4791, + "step": 13111 + }, + { + "epoch": 1.0581873940763458, + "grad_norm": 0.687035083770752, + "learning_rate": 5.354918971029954e-05, + "loss": 2.4706, + "step": 13112 + }, + { + "epoch": 1.0582680978129286, + "grad_norm": 0.6666533350944519, + "learning_rate": 5.353520990399861e-05, + "loss": 2.4789, + "step": 13113 + }, + { + "epoch": 1.0583488015495117, + "grad_norm": 0.6261809468269348, + "learning_rate": 5.35212312557217e-05, + "loss": 2.4485, + "step": 13114 + }, + { + "epoch": 1.0584295052860948, + "grad_norm": 0.6740814447402954, + "learning_rate": 5.350725376581725e-05, + "loss": 2.47, + "step": 13115 + }, + { + "epoch": 1.0585102090226777, + "grad_norm": 0.7634154558181763, + "learning_rate": 5.3493277434633526e-05, + "loss": 2.4685, + "step": 13116 + }, + { + "epoch": 1.0585909127592608, + "grad_norm": 0.6674611568450928, + "learning_rate": 5.34793022625189e-05, + "loss": 2.4362, + "step": 13117 + }, + { + "epoch": 1.0586716164958438, + "grad_norm": 0.7584757804870605, + "learning_rate": 5.346532824982167e-05, + "loss": 2.499, + "step": 13118 + }, + { + "epoch": 1.0587523202324267, + "grad_norm": 0.6453456282615662, + "learning_rate": 5.345135539689015e-05, + "loss": 2.4341, + "step": 13119 + }, + { + "epoch": 1.0588330239690098, + "grad_norm": 0.70013427734375, + "learning_rate": 5.343738370407247e-05, + "loss": 2.3448, + "step": 13120 + }, + { + "epoch": 1.0589137277055927, + "grad_norm": 0.6763362884521484, + "learning_rate": 5.342341317171693e-05, + "loss": 2.4234, + "step": 13121 + }, + { + "epoch": 1.0589944314421758, + "grad_norm": 0.6896576881408691, + "learning_rate": 5.3409443800171664e-05, + "loss": 2.4753, + "step": 13122 + }, + { + "epoch": 1.0590751351787588, + "grad_norm": 0.6984997987747192, + "learning_rate": 5.339547558978486e-05, + "loss": 2.4581, + "step": 13123 + }, + { + "epoch": 1.0591558389153417, + "grad_norm": 0.7276118993759155, + "learning_rate": 5.338150854090462e-05, + "loss": 2.4765, + "step": 13124 + }, + { + "epoch": 1.0592365426519248, + "grad_norm": 0.6943252086639404, + "learning_rate": 5.336754265387911e-05, + "loss": 2.4514, + "step": 13125 + }, + { + "epoch": 1.0593172463885079, + "grad_norm": 0.7070014476776123, + "learning_rate": 5.335357792905628e-05, + "loss": 2.4365, + "step": 13126 + }, + { + "epoch": 1.0593979501250907, + "grad_norm": 0.6887189149856567, + "learning_rate": 5.333961436678422e-05, + "loss": 2.4834, + "step": 13127 + }, + { + "epoch": 1.0594786538616738, + "grad_norm": 0.8150162696838379, + "learning_rate": 5.332565196741098e-05, + "loss": 2.4474, + "step": 13128 + }, + { + "epoch": 1.0595593575982567, + "grad_norm": 0.6681316494941711, + "learning_rate": 5.331169073128447e-05, + "loss": 2.4888, + "step": 13129 + }, + { + "epoch": 1.0596400613348398, + "grad_norm": 0.6696690320968628, + "learning_rate": 5.329773065875267e-05, + "loss": 2.3874, + "step": 13130 + }, + { + "epoch": 1.0597207650714229, + "grad_norm": 0.729807436466217, + "learning_rate": 5.32837717501635e-05, + "loss": 2.4442, + "step": 13131 + }, + { + "epoch": 1.0598014688080057, + "grad_norm": 0.6959047913551331, + "learning_rate": 5.326981400586486e-05, + "loss": 2.4697, + "step": 13132 + }, + { + "epoch": 1.0598821725445888, + "grad_norm": 0.667294442653656, + "learning_rate": 5.3255857426204606e-05, + "loss": 2.3986, + "step": 13133 + }, + { + "epoch": 1.059962876281172, + "grad_norm": 0.6953842639923096, + "learning_rate": 5.3241902011530566e-05, + "loss": 2.396, + "step": 13134 + }, + { + "epoch": 1.0600435800177548, + "grad_norm": 0.6544597148895264, + "learning_rate": 5.32279477621906e-05, + "loss": 2.426, + "step": 13135 + }, + { + "epoch": 1.0601242837543379, + "grad_norm": 0.708017885684967, + "learning_rate": 5.321399467853241e-05, + "loss": 2.4931, + "step": 13136 + }, + { + "epoch": 1.0602049874909207, + "grad_norm": 0.6669809818267822, + "learning_rate": 5.3200042760903764e-05, + "loss": 2.4354, + "step": 13137 + }, + { + "epoch": 1.0602856912275038, + "grad_norm": 1.0144098997116089, + "learning_rate": 5.3186092009652435e-05, + "loss": 2.4803, + "step": 13138 + }, + { + "epoch": 1.060366394964087, + "grad_norm": 0.7213768362998962, + "learning_rate": 5.317214242512601e-05, + "loss": 2.4318, + "step": 13139 + }, + { + "epoch": 1.0604470987006698, + "grad_norm": 0.6429069638252258, + "learning_rate": 5.315819400767223e-05, + "loss": 2.458, + "step": 13140 + }, + { + "epoch": 1.0605278024372529, + "grad_norm": 0.6480485796928406, + "learning_rate": 5.3144246757638714e-05, + "loss": 2.4586, + "step": 13141 + }, + { + "epoch": 1.060608506173836, + "grad_norm": 0.7037697434425354, + "learning_rate": 5.3130300675373035e-05, + "loss": 2.4698, + "step": 13142 + }, + { + "epoch": 1.0606892099104188, + "grad_norm": 0.7307559251785278, + "learning_rate": 5.3116355761222725e-05, + "loss": 2.4027, + "step": 13143 + }, + { + "epoch": 1.060769913647002, + "grad_norm": 0.6684615612030029, + "learning_rate": 5.310241201553547e-05, + "loss": 2.478, + "step": 13144 + }, + { + "epoch": 1.0608506173835848, + "grad_norm": 0.7018016576766968, + "learning_rate": 5.308846943865866e-05, + "loss": 2.4229, + "step": 13145 + }, + { + "epoch": 1.0609313211201679, + "grad_norm": 0.7538621425628662, + "learning_rate": 5.307452803093982e-05, + "loss": 2.5201, + "step": 13146 + }, + { + "epoch": 1.061012024856751, + "grad_norm": 0.6957963109016418, + "learning_rate": 5.306058779272645e-05, + "loss": 2.4233, + "step": 13147 + }, + { + "epoch": 1.0610927285933338, + "grad_norm": 0.6280590295791626, + "learning_rate": 5.304664872436588e-05, + "loss": 2.5117, + "step": 13148 + }, + { + "epoch": 1.061173432329917, + "grad_norm": 0.6937280297279358, + "learning_rate": 5.3032710826205564e-05, + "loss": 2.4889, + "step": 13149 + }, + { + "epoch": 1.0612541360664998, + "grad_norm": 0.6750391125679016, + "learning_rate": 5.3018774098592884e-05, + "loss": 2.4472, + "step": 13150 + }, + { + "epoch": 1.0613348398030829, + "grad_norm": 0.6931902766227722, + "learning_rate": 5.300483854187519e-05, + "loss": 2.3883, + "step": 13151 + }, + { + "epoch": 1.061415543539666, + "grad_norm": 0.6982774138450623, + "learning_rate": 5.2990904156399726e-05, + "loss": 2.4688, + "step": 13152 + }, + { + "epoch": 1.0614962472762488, + "grad_norm": 0.6873522996902466, + "learning_rate": 5.297697094251382e-05, + "loss": 2.4818, + "step": 13153 + }, + { + "epoch": 1.061576951012832, + "grad_norm": 0.635377049446106, + "learning_rate": 5.296303890056471e-05, + "loss": 2.3906, + "step": 13154 + }, + { + "epoch": 1.061657654749415, + "grad_norm": 0.6368159651756287, + "learning_rate": 5.294910803089963e-05, + "loss": 2.4714, + "step": 13155 + }, + { + "epoch": 1.0617383584859978, + "grad_norm": 0.7147238254547119, + "learning_rate": 5.293517833386576e-05, + "loss": 2.4746, + "step": 13156 + }, + { + "epoch": 1.061819062222581, + "grad_norm": 0.742189884185791, + "learning_rate": 5.2921249809810326e-05, + "loss": 2.3913, + "step": 13157 + }, + { + "epoch": 1.061899765959164, + "grad_norm": 0.6665734648704529, + "learning_rate": 5.290732245908038e-05, + "loss": 2.4263, + "step": 13158 + }, + { + "epoch": 1.0619804696957469, + "grad_norm": 0.6894757747650146, + "learning_rate": 5.2893396282023055e-05, + "loss": 2.4204, + "step": 13159 + }, + { + "epoch": 1.06206117343233, + "grad_norm": 0.6394561529159546, + "learning_rate": 5.287947127898546e-05, + "loss": 2.4183, + "step": 13160 + }, + { + "epoch": 1.0621418771689128, + "grad_norm": 0.7422548532485962, + "learning_rate": 5.2865547450314576e-05, + "loss": 2.4454, + "step": 13161 + }, + { + "epoch": 1.062222580905496, + "grad_norm": 0.7486133575439453, + "learning_rate": 5.285162479635748e-05, + "loss": 2.4856, + "step": 13162 + }, + { + "epoch": 1.062303284642079, + "grad_norm": 0.6743031144142151, + "learning_rate": 5.283770331746112e-05, + "loss": 2.4318, + "step": 13163 + }, + { + "epoch": 1.0623839883786619, + "grad_norm": 0.6461686491966248, + "learning_rate": 5.282378301397248e-05, + "loss": 2.4133, + "step": 13164 + }, + { + "epoch": 1.062464692115245, + "grad_norm": 0.6745431423187256, + "learning_rate": 5.28098638862385e-05, + "loss": 2.4463, + "step": 13165 + }, + { + "epoch": 1.0625453958518278, + "grad_norm": 0.6646310687065125, + "learning_rate": 5.279594593460606e-05, + "loss": 2.4211, + "step": 13166 + }, + { + "epoch": 1.062626099588411, + "grad_norm": 0.6789249777793884, + "learning_rate": 5.278202915942207e-05, + "loss": 2.4832, + "step": 13167 + }, + { + "epoch": 1.062706803324994, + "grad_norm": 0.7082679867744446, + "learning_rate": 5.2768113561033326e-05, + "loss": 2.4303, + "step": 13168 + }, + { + "epoch": 1.0627875070615769, + "grad_norm": 0.6875587701797485, + "learning_rate": 5.275419913978664e-05, + "loss": 2.4601, + "step": 13169 + }, + { + "epoch": 1.06286821079816, + "grad_norm": 0.6556203961372375, + "learning_rate": 5.274028589602886e-05, + "loss": 2.4359, + "step": 13170 + }, + { + "epoch": 1.062948914534743, + "grad_norm": 0.7280015349388123, + "learning_rate": 5.272637383010666e-05, + "loss": 2.4999, + "step": 13171 + }, + { + "epoch": 1.063029618271326, + "grad_norm": 0.664654016494751, + "learning_rate": 5.271246294236678e-05, + "loss": 2.3951, + "step": 13172 + }, + { + "epoch": 1.063110322007909, + "grad_norm": 0.6941719055175781, + "learning_rate": 5.2698553233155945e-05, + "loss": 2.45, + "step": 13173 + }, + { + "epoch": 1.0631910257444919, + "grad_norm": 0.7212931513786316, + "learning_rate": 5.268464470282082e-05, + "loss": 2.4615, + "step": 13174 + }, + { + "epoch": 1.063271729481075, + "grad_norm": 0.6877106428146362, + "learning_rate": 5.2670737351708014e-05, + "loss": 2.4495, + "step": 13175 + }, + { + "epoch": 1.063352433217658, + "grad_norm": 0.737718939781189, + "learning_rate": 5.26568311801642e-05, + "loss": 2.4971, + "step": 13176 + }, + { + "epoch": 1.063433136954241, + "grad_norm": 0.6909129619598389, + "learning_rate": 5.264292618853587e-05, + "loss": 2.4889, + "step": 13177 + }, + { + "epoch": 1.063513840690824, + "grad_norm": 0.6750304102897644, + "learning_rate": 5.262902237716961e-05, + "loss": 2.4779, + "step": 13178 + }, + { + "epoch": 1.063594544427407, + "grad_norm": 0.7256019115447998, + "learning_rate": 5.2615119746411954e-05, + "loss": 2.4904, + "step": 13179 + }, + { + "epoch": 1.06367524816399, + "grad_norm": 0.7335983514785767, + "learning_rate": 5.26012182966094e-05, + "loss": 2.4357, + "step": 13180 + }, + { + "epoch": 1.063755951900573, + "grad_norm": 0.6534200310707092, + "learning_rate": 5.258731802810837e-05, + "loss": 2.4213, + "step": 13181 + }, + { + "epoch": 1.063836655637156, + "grad_norm": 0.6899768114089966, + "learning_rate": 5.257341894125529e-05, + "loss": 2.4963, + "step": 13182 + }, + { + "epoch": 1.063917359373739, + "grad_norm": 0.7016159892082214, + "learning_rate": 5.25595210363966e-05, + "loss": 2.4583, + "step": 13183 + }, + { + "epoch": 1.063998063110322, + "grad_norm": 0.6868152022361755, + "learning_rate": 5.2545624313878636e-05, + "loss": 2.4523, + "step": 13184 + }, + { + "epoch": 1.064078766846905, + "grad_norm": 0.7442622184753418, + "learning_rate": 5.2531728774047785e-05, + "loss": 2.425, + "step": 13185 + }, + { + "epoch": 1.064159470583488, + "grad_norm": 0.6900869011878967, + "learning_rate": 5.251783441725037e-05, + "loss": 2.459, + "step": 13186 + }, + { + "epoch": 1.0642401743200711, + "grad_norm": 0.6910288333892822, + "learning_rate": 5.25039412438326e-05, + "loss": 2.4882, + "step": 13187 + }, + { + "epoch": 1.064320878056654, + "grad_norm": 0.7644359469413757, + "learning_rate": 5.249004925414076e-05, + "loss": 2.4663, + "step": 13188 + }, + { + "epoch": 1.064401581793237, + "grad_norm": 0.6703082919120789, + "learning_rate": 5.247615844852114e-05, + "loss": 2.4309, + "step": 13189 + }, + { + "epoch": 1.06448228552982, + "grad_norm": 0.6449835896492004, + "learning_rate": 5.246226882731983e-05, + "loss": 2.4307, + "step": 13190 + }, + { + "epoch": 1.064562989266403, + "grad_norm": 0.7332713603973389, + "learning_rate": 5.244838039088305e-05, + "loss": 2.3763, + "step": 13191 + }, + { + "epoch": 1.0646436930029861, + "grad_norm": 0.7626641988754272, + "learning_rate": 5.2434493139556974e-05, + "loss": 2.4167, + "step": 13192 + }, + { + "epoch": 1.064724396739569, + "grad_norm": 0.6924002170562744, + "learning_rate": 5.2420607073687614e-05, + "loss": 2.4751, + "step": 13193 + }, + { + "epoch": 1.064805100476152, + "grad_norm": 0.6815003156661987, + "learning_rate": 5.2406722193621074e-05, + "loss": 2.4731, + "step": 13194 + }, + { + "epoch": 1.064885804212735, + "grad_norm": 0.7632609009742737, + "learning_rate": 5.239283849970347e-05, + "loss": 2.4562, + "step": 13195 + }, + { + "epoch": 1.064966507949318, + "grad_norm": 0.7157592177391052, + "learning_rate": 5.23789559922808e-05, + "loss": 2.4507, + "step": 13196 + }, + { + "epoch": 1.065047211685901, + "grad_norm": 0.7035543918609619, + "learning_rate": 5.2365074671699e-05, + "loss": 2.4616, + "step": 13197 + }, + { + "epoch": 1.065127915422484, + "grad_norm": 0.7566644549369812, + "learning_rate": 5.235119453830406e-05, + "loss": 2.4751, + "step": 13198 + }, + { + "epoch": 1.065208619159067, + "grad_norm": 0.7030916213989258, + "learning_rate": 5.233731559244194e-05, + "loss": 2.381, + "step": 13199 + }, + { + "epoch": 1.0652893228956501, + "grad_norm": 0.7663755416870117, + "learning_rate": 5.232343783445847e-05, + "loss": 2.4822, + "step": 13200 + }, + { + "epoch": 1.065370026632233, + "grad_norm": 0.717767596244812, + "learning_rate": 5.230956126469955e-05, + "loss": 2.4807, + "step": 13201 + }, + { + "epoch": 1.065450730368816, + "grad_norm": 0.6920818090438843, + "learning_rate": 5.229568588351108e-05, + "loss": 2.4643, + "step": 13202 + }, + { + "epoch": 1.0655314341053992, + "grad_norm": 0.6812553405761719, + "learning_rate": 5.228181169123877e-05, + "loss": 2.4443, + "step": 13203 + }, + { + "epoch": 1.065612137841982, + "grad_norm": 0.7241889834403992, + "learning_rate": 5.226793868822846e-05, + "loss": 2.4581, + "step": 13204 + }, + { + "epoch": 1.0656928415785651, + "grad_norm": 0.7254642248153687, + "learning_rate": 5.225406687482588e-05, + "loss": 2.4999, + "step": 13205 + }, + { + "epoch": 1.065773545315148, + "grad_norm": 0.7316950559616089, + "learning_rate": 5.2240196251376764e-05, + "loss": 2.4493, + "step": 13206 + }, + { + "epoch": 1.065854249051731, + "grad_norm": 0.7208307385444641, + "learning_rate": 5.22263268182268e-05, + "loss": 2.5083, + "step": 13207 + }, + { + "epoch": 1.0659349527883142, + "grad_norm": 0.6552214622497559, + "learning_rate": 5.22124585757217e-05, + "loss": 2.4662, + "step": 13208 + }, + { + "epoch": 1.066015656524897, + "grad_norm": 0.7949681878089905, + "learning_rate": 5.219859152420701e-05, + "loss": 2.4584, + "step": 13209 + }, + { + "epoch": 1.0660963602614801, + "grad_norm": 0.7012154459953308, + "learning_rate": 5.2184725664028366e-05, + "loss": 2.4702, + "step": 13210 + }, + { + "epoch": 1.066177063998063, + "grad_norm": 0.7431927919387817, + "learning_rate": 5.217086099553136e-05, + "loss": 2.4422, + "step": 13211 + }, + { + "epoch": 1.066257767734646, + "grad_norm": 0.7235366702079773, + "learning_rate": 5.2156997519061554e-05, + "loss": 2.4173, + "step": 13212 + }, + { + "epoch": 1.0663384714712292, + "grad_norm": 0.7475029826164246, + "learning_rate": 5.214313523496439e-05, + "loss": 2.4924, + "step": 13213 + }, + { + "epoch": 1.066419175207812, + "grad_norm": 0.6326786875724792, + "learning_rate": 5.212927414358542e-05, + "loss": 2.4154, + "step": 13214 + }, + { + "epoch": 1.0664998789443951, + "grad_norm": 0.6755837798118591, + "learning_rate": 5.211541424527004e-05, + "loss": 2.4248, + "step": 13215 + }, + { + "epoch": 1.0665805826809782, + "grad_norm": 0.645395040512085, + "learning_rate": 5.210155554036373e-05, + "loss": 2.4078, + "step": 13216 + }, + { + "epoch": 1.066661286417561, + "grad_norm": 0.799913763999939, + "learning_rate": 5.208769802921185e-05, + "loss": 2.5067, + "step": 13217 + }, + { + "epoch": 1.0667419901541442, + "grad_norm": 0.7056344747543335, + "learning_rate": 5.207384171215983e-05, + "loss": 2.4817, + "step": 13218 + }, + { + "epoch": 1.0668226938907273, + "grad_norm": 0.7082187533378601, + "learning_rate": 5.205998658955291e-05, + "loss": 2.4495, + "step": 13219 + }, + { + "epoch": 1.0669033976273101, + "grad_norm": 0.6948464512825012, + "learning_rate": 5.204613266173646e-05, + "loss": 2.4584, + "step": 13220 + }, + { + "epoch": 1.0669841013638932, + "grad_norm": 0.7812542915344238, + "learning_rate": 5.203227992905575e-05, + "loss": 2.4803, + "step": 13221 + }, + { + "epoch": 1.067064805100476, + "grad_norm": 0.6892200708389282, + "learning_rate": 5.201842839185598e-05, + "loss": 2.4424, + "step": 13222 + }, + { + "epoch": 1.0671455088370592, + "grad_norm": 0.6982070803642273, + "learning_rate": 5.20045780504824e-05, + "loss": 2.4654, + "step": 13223 + }, + { + "epoch": 1.0672262125736423, + "grad_norm": 0.6799101233482361, + "learning_rate": 5.1990728905280205e-05, + "loss": 2.4748, + "step": 13224 + }, + { + "epoch": 1.0673069163102251, + "grad_norm": 0.6703687906265259, + "learning_rate": 5.1976880956594544e-05, + "loss": 2.4459, + "step": 13225 + }, + { + "epoch": 1.0673876200468082, + "grad_norm": 0.6821435689926147, + "learning_rate": 5.196303420477053e-05, + "loss": 2.4517, + "step": 13226 + }, + { + "epoch": 1.067468323783391, + "grad_norm": 0.6369695067405701, + "learning_rate": 5.194918865015328e-05, + "loss": 2.4388, + "step": 13227 + }, + { + "epoch": 1.0675490275199742, + "grad_norm": 0.6465736627578735, + "learning_rate": 5.1935344293087885e-05, + "loss": 2.3839, + "step": 13228 + }, + { + "epoch": 1.0676297312565572, + "grad_norm": 0.6745415329933167, + "learning_rate": 5.192150113391933e-05, + "loss": 2.4676, + "step": 13229 + }, + { + "epoch": 1.0677104349931401, + "grad_norm": 0.7605211138725281, + "learning_rate": 5.190765917299263e-05, + "loss": 2.4764, + "step": 13230 + }, + { + "epoch": 1.0677911387297232, + "grad_norm": 0.7040959596633911, + "learning_rate": 5.1893818410652825e-05, + "loss": 2.4727, + "step": 13231 + }, + { + "epoch": 1.0678718424663063, + "grad_norm": 0.6718928813934326, + "learning_rate": 5.1879978847244785e-05, + "loss": 2.4308, + "step": 13232 + }, + { + "epoch": 1.0679525462028892, + "grad_norm": 0.6788188219070435, + "learning_rate": 5.1866140483113445e-05, + "loss": 2.4278, + "step": 13233 + }, + { + "epoch": 1.0680332499394722, + "grad_norm": 0.7310218811035156, + "learning_rate": 5.185230331860371e-05, + "loss": 2.4585, + "step": 13234 + }, + { + "epoch": 1.068113953676055, + "grad_norm": 0.8092277646064758, + "learning_rate": 5.183846735406044e-05, + "loss": 2.4128, + "step": 13235 + }, + { + "epoch": 1.0681946574126382, + "grad_norm": 0.6469862461090088, + "learning_rate": 5.182463258982846e-05, + "loss": 2.4315, + "step": 13236 + }, + { + "epoch": 1.0682753611492213, + "grad_norm": 0.7948115468025208, + "learning_rate": 5.181079902625261e-05, + "loss": 2.5127, + "step": 13237 + }, + { + "epoch": 1.0683560648858041, + "grad_norm": 0.6988852620124817, + "learning_rate": 5.179696666367757e-05, + "loss": 2.432, + "step": 13238 + }, + { + "epoch": 1.0684367686223872, + "grad_norm": 0.6914555430412292, + "learning_rate": 5.1783135502448124e-05, + "loss": 2.4748, + "step": 13239 + }, + { + "epoch": 1.0685174723589703, + "grad_norm": 0.7586313486099243, + "learning_rate": 5.176930554290902e-05, + "loss": 2.4522, + "step": 13240 + }, + { + "epoch": 1.0685981760955532, + "grad_norm": 0.6763948798179626, + "learning_rate": 5.175547678540487e-05, + "loss": 2.4477, + "step": 13241 + }, + { + "epoch": 1.0686788798321363, + "grad_norm": 0.7625983357429504, + "learning_rate": 5.1741649230280334e-05, + "loss": 2.4725, + "step": 13242 + }, + { + "epoch": 1.0687595835687191, + "grad_norm": 0.6574710011482239, + "learning_rate": 5.172782287788005e-05, + "loss": 2.4212, + "step": 13243 + }, + { + "epoch": 1.0688402873053022, + "grad_norm": 0.770062267780304, + "learning_rate": 5.1713997728548615e-05, + "loss": 2.5065, + "step": 13244 + }, + { + "epoch": 1.0689209910418853, + "grad_norm": 0.7719037532806396, + "learning_rate": 5.170017378263057e-05, + "loss": 2.5082, + "step": 13245 + }, + { + "epoch": 1.0690016947784682, + "grad_norm": 0.7106119394302368, + "learning_rate": 5.168635104047046e-05, + "loss": 2.4922, + "step": 13246 + }, + { + "epoch": 1.0690823985150513, + "grad_norm": 0.711815595626831, + "learning_rate": 5.167252950241281e-05, + "loss": 2.498, + "step": 13247 + }, + { + "epoch": 1.0691631022516344, + "grad_norm": 0.6926038265228271, + "learning_rate": 5.165870916880201e-05, + "loss": 2.4464, + "step": 13248 + }, + { + "epoch": 1.0692438059882172, + "grad_norm": 0.6959360241889954, + "learning_rate": 5.164489003998254e-05, + "loss": 2.4668, + "step": 13249 + }, + { + "epoch": 1.0693245097248003, + "grad_norm": 0.7165184617042542, + "learning_rate": 5.1631072116298875e-05, + "loss": 2.4198, + "step": 13250 + }, + { + "epoch": 1.0694052134613832, + "grad_norm": 0.7133236527442932, + "learning_rate": 5.161725539809527e-05, + "loss": 2.4691, + "step": 13251 + }, + { + "epoch": 1.0694859171979663, + "grad_norm": 0.7057758569717407, + "learning_rate": 5.160343988571613e-05, + "loss": 2.466, + "step": 13252 + }, + { + "epoch": 1.0695666209345494, + "grad_norm": 0.6808326244354248, + "learning_rate": 5.158962557950583e-05, + "loss": 2.4248, + "step": 13253 + }, + { + "epoch": 1.0696473246711322, + "grad_norm": 0.7166025638580322, + "learning_rate": 5.1575812479808563e-05, + "loss": 2.4753, + "step": 13254 + }, + { + "epoch": 1.0697280284077153, + "grad_norm": 0.7395358085632324, + "learning_rate": 5.156200058696863e-05, + "loss": 2.485, + "step": 13255 + }, + { + "epoch": 1.0698087321442982, + "grad_norm": 0.681106686592102, + "learning_rate": 5.154818990133026e-05, + "loss": 2.5077, + "step": 13256 + }, + { + "epoch": 1.0698894358808813, + "grad_norm": 0.7517002820968628, + "learning_rate": 5.153438042323766e-05, + "loss": 2.5093, + "step": 13257 + }, + { + "epoch": 1.0699701396174643, + "grad_norm": 0.6516926288604736, + "learning_rate": 5.152057215303499e-05, + "loss": 2.4416, + "step": 13258 + }, + { + "epoch": 1.0700508433540472, + "grad_norm": 0.6930893063545227, + "learning_rate": 5.150676509106638e-05, + "loss": 2.506, + "step": 13259 + }, + { + "epoch": 1.0701315470906303, + "grad_norm": 0.7737041115760803, + "learning_rate": 5.1492959237675986e-05, + "loss": 2.4355, + "step": 13260 + }, + { + "epoch": 1.0702122508272134, + "grad_norm": 0.7274872660636902, + "learning_rate": 5.14791545932078e-05, + "loss": 2.5552, + "step": 13261 + }, + { + "epoch": 1.0702929545637963, + "grad_norm": 0.7112408876419067, + "learning_rate": 5.146535115800593e-05, + "loss": 2.4041, + "step": 13262 + }, + { + "epoch": 1.0703736583003793, + "grad_norm": 0.6822024583816528, + "learning_rate": 5.1451548932414415e-05, + "loss": 2.4346, + "step": 13263 + }, + { + "epoch": 1.0704543620369624, + "grad_norm": 0.6590598225593567, + "learning_rate": 5.1437747916777165e-05, + "loss": 2.3946, + "step": 13264 + }, + { + "epoch": 1.0705350657735453, + "grad_norm": 0.643014132976532, + "learning_rate": 5.142394811143818e-05, + "loss": 2.4455, + "step": 13265 + }, + { + "epoch": 1.0706157695101284, + "grad_norm": 0.6480194926261902, + "learning_rate": 5.141014951674139e-05, + "loss": 2.4304, + "step": 13266 + }, + { + "epoch": 1.0706964732467112, + "grad_norm": 0.6933526992797852, + "learning_rate": 5.139635213303069e-05, + "loss": 2.4627, + "step": 13267 + }, + { + "epoch": 1.0707771769832943, + "grad_norm": 0.6832638382911682, + "learning_rate": 5.138255596064995e-05, + "loss": 2.4645, + "step": 13268 + }, + { + "epoch": 1.0708578807198774, + "grad_norm": 0.6579757928848267, + "learning_rate": 5.1368760999943034e-05, + "loss": 2.3928, + "step": 13269 + }, + { + "epoch": 1.0709385844564603, + "grad_norm": 0.6658132672309875, + "learning_rate": 5.1354967251253684e-05, + "loss": 2.4732, + "step": 13270 + }, + { + "epoch": 1.0710192881930434, + "grad_norm": 0.7610828876495361, + "learning_rate": 5.13411747149257e-05, + "loss": 2.4781, + "step": 13271 + }, + { + "epoch": 1.0710999919296262, + "grad_norm": 0.682858943939209, + "learning_rate": 5.1327383391302895e-05, + "loss": 2.4545, + "step": 13272 + }, + { + "epoch": 1.0711806956662093, + "grad_norm": 0.7461360692977905, + "learning_rate": 5.131359328072887e-05, + "loss": 2.4647, + "step": 13273 + }, + { + "epoch": 1.0712613994027924, + "grad_norm": 0.6767961382865906, + "learning_rate": 5.129980438354738e-05, + "loss": 2.4562, + "step": 13274 + }, + { + "epoch": 1.0713421031393753, + "grad_norm": 0.6768184304237366, + "learning_rate": 5.1286016700102066e-05, + "loss": 2.4662, + "step": 13275 + }, + { + "epoch": 1.0714228068759584, + "grad_norm": 0.7022743225097656, + "learning_rate": 5.1272230230736554e-05, + "loss": 2.4321, + "step": 13276 + }, + { + "epoch": 1.0715035106125415, + "grad_norm": 0.725488007068634, + "learning_rate": 5.125844497579444e-05, + "loss": 2.457, + "step": 13277 + }, + { + "epoch": 1.0715842143491243, + "grad_norm": 0.7542931437492371, + "learning_rate": 5.124466093561928e-05, + "loss": 2.4302, + "step": 13278 + }, + { + "epoch": 1.0716649180857074, + "grad_norm": 0.6598316431045532, + "learning_rate": 5.123087811055467e-05, + "loss": 2.4552, + "step": 13279 + }, + { + "epoch": 1.0717456218222903, + "grad_norm": 0.7533490061759949, + "learning_rate": 5.1217096500944017e-05, + "loss": 2.4778, + "step": 13280 + }, + { + "epoch": 1.0718263255588734, + "grad_norm": 0.6890795826911926, + "learning_rate": 5.1203316107130825e-05, + "loss": 2.4349, + "step": 13281 + }, + { + "epoch": 1.0719070292954564, + "grad_norm": 0.7004082202911377, + "learning_rate": 5.118953692945862e-05, + "loss": 2.4645, + "step": 13282 + }, + { + "epoch": 1.0719877330320393, + "grad_norm": 0.7409259676933289, + "learning_rate": 5.117575896827068e-05, + "loss": 2.4734, + "step": 13283 + }, + { + "epoch": 1.0720684367686224, + "grad_norm": 0.7035481929779053, + "learning_rate": 5.116198222391046e-05, + "loss": 2.5027, + "step": 13284 + }, + { + "epoch": 1.0721491405052055, + "grad_norm": 0.7146698236465454, + "learning_rate": 5.114820669672132e-05, + "loss": 2.4623, + "step": 13285 + }, + { + "epoch": 1.0722298442417884, + "grad_norm": 0.7813882231712341, + "learning_rate": 5.113443238704656e-05, + "loss": 2.4644, + "step": 13286 + }, + { + "epoch": 1.0723105479783714, + "grad_norm": 0.6592430472373962, + "learning_rate": 5.1120659295229486e-05, + "loss": 2.4682, + "step": 13287 + }, + { + "epoch": 1.0723912517149543, + "grad_norm": 0.7047967910766602, + "learning_rate": 5.1106887421613395e-05, + "loss": 2.4368, + "step": 13288 + }, + { + "epoch": 1.0724719554515374, + "grad_norm": 0.700977087020874, + "learning_rate": 5.109311676654143e-05, + "loss": 2.4471, + "step": 13289 + }, + { + "epoch": 1.0725526591881205, + "grad_norm": 0.6821093559265137, + "learning_rate": 5.107934733035684e-05, + "loss": 2.433, + "step": 13290 + }, + { + "epoch": 1.0726333629247033, + "grad_norm": 0.6579930186271667, + "learning_rate": 5.1065579113402794e-05, + "loss": 2.4527, + "step": 13291 + }, + { + "epoch": 1.0727140666612864, + "grad_norm": 0.658514678478241, + "learning_rate": 5.105181211602248e-05, + "loss": 2.4443, + "step": 13292 + }, + { + "epoch": 1.0727947703978695, + "grad_norm": 0.6963977217674255, + "learning_rate": 5.103804633855891e-05, + "loss": 2.4699, + "step": 13293 + }, + { + "epoch": 1.0728754741344524, + "grad_norm": 0.6670787334442139, + "learning_rate": 5.102428178135522e-05, + "loss": 2.4672, + "step": 13294 + }, + { + "epoch": 1.0729561778710355, + "grad_norm": 0.6959822773933411, + "learning_rate": 5.1010518444754454e-05, + "loss": 2.4338, + "step": 13295 + }, + { + "epoch": 1.0730368816076183, + "grad_norm": 0.6534817218780518, + "learning_rate": 5.0996756329099614e-05, + "loss": 2.4491, + "step": 13296 + }, + { + "epoch": 1.0731175853442014, + "grad_norm": 0.7265146970748901, + "learning_rate": 5.098299543473371e-05, + "loss": 2.4718, + "step": 13297 + }, + { + "epoch": 1.0731982890807845, + "grad_norm": 0.6554745435714722, + "learning_rate": 5.0969235761999746e-05, + "loss": 2.4286, + "step": 13298 + }, + { + "epoch": 1.0732789928173674, + "grad_norm": 0.7003172039985657, + "learning_rate": 5.095547731124053e-05, + "loss": 2.4182, + "step": 13299 + }, + { + "epoch": 1.0733596965539505, + "grad_norm": 0.6700341105461121, + "learning_rate": 5.094172008279904e-05, + "loss": 2.428, + "step": 13300 + }, + { + "epoch": 1.0734404002905333, + "grad_norm": 0.7290289402008057, + "learning_rate": 5.0927964077018164e-05, + "loss": 2.4324, + "step": 13301 + }, + { + "epoch": 1.0735211040271164, + "grad_norm": 0.6999204158782959, + "learning_rate": 5.0914209294240644e-05, + "loss": 2.5386, + "step": 13302 + }, + { + "epoch": 1.0736018077636995, + "grad_norm": 0.7008000612258911, + "learning_rate": 5.090045573480935e-05, + "loss": 2.5295, + "step": 13303 + }, + { + "epoch": 1.0736825115002824, + "grad_norm": 0.7023071646690369, + "learning_rate": 5.088670339906705e-05, + "loss": 2.4418, + "step": 13304 + }, + { + "epoch": 1.0737632152368655, + "grad_norm": 0.627174437046051, + "learning_rate": 5.0872952287356525e-05, + "loss": 2.3782, + "step": 13305 + }, + { + "epoch": 1.0738439189734486, + "grad_norm": 0.6992766857147217, + "learning_rate": 5.0859202400020364e-05, + "loss": 2.4698, + "step": 13306 + }, + { + "epoch": 1.0739246227100314, + "grad_norm": 0.7189817428588867, + "learning_rate": 5.084545373740138e-05, + "loss": 2.5248, + "step": 13307 + }, + { + "epoch": 1.0740053264466145, + "grad_norm": 0.6849164962768555, + "learning_rate": 5.0831706299842216e-05, + "loss": 2.4084, + "step": 13308 + }, + { + "epoch": 1.0740860301831976, + "grad_norm": 0.6985825300216675, + "learning_rate": 5.0817960087685424e-05, + "loss": 2.4893, + "step": 13309 + }, + { + "epoch": 1.0741667339197805, + "grad_norm": 0.6519783139228821, + "learning_rate": 5.080421510127362e-05, + "loss": 2.5144, + "step": 13310 + }, + { + "epoch": 1.0742474376563635, + "grad_norm": 0.6605731248855591, + "learning_rate": 5.079047134094941e-05, + "loss": 2.4487, + "step": 13311 + }, + { + "epoch": 1.0743281413929464, + "grad_norm": 0.7236705422401428, + "learning_rate": 5.077672880705526e-05, + "loss": 2.4578, + "step": 13312 + }, + { + "epoch": 1.0744088451295295, + "grad_norm": 0.7126381397247314, + "learning_rate": 5.07629874999337e-05, + "loss": 2.4528, + "step": 13313 + }, + { + "epoch": 1.0744895488661126, + "grad_norm": 0.7247878313064575, + "learning_rate": 5.0749247419927236e-05, + "loss": 2.563, + "step": 13314 + }, + { + "epoch": 1.0745702526026955, + "grad_norm": 0.728349506855011, + "learning_rate": 5.0735508567378234e-05, + "loss": 2.4229, + "step": 13315 + }, + { + "epoch": 1.0746509563392785, + "grad_norm": 0.6593719124794006, + "learning_rate": 5.072177094262913e-05, + "loss": 2.4853, + "step": 13316 + }, + { + "epoch": 1.0747316600758614, + "grad_norm": 0.6519735455513, + "learning_rate": 5.070803454602231e-05, + "loss": 2.4507, + "step": 13317 + }, + { + "epoch": 1.0748123638124445, + "grad_norm": 0.6660017371177673, + "learning_rate": 5.0694299377900115e-05, + "loss": 2.4286, + "step": 13318 + }, + { + "epoch": 1.0748930675490276, + "grad_norm": 0.7506695985794067, + "learning_rate": 5.0680565438604876e-05, + "loss": 2.4841, + "step": 13319 + }, + { + "epoch": 1.0749737712856104, + "grad_norm": 0.6855955719947815, + "learning_rate": 5.0666832728478863e-05, + "loss": 2.3817, + "step": 13320 + }, + { + "epoch": 1.0750544750221935, + "grad_norm": 0.7151634693145752, + "learning_rate": 5.065310124786438e-05, + "loss": 2.3984, + "step": 13321 + }, + { + "epoch": 1.0751351787587766, + "grad_norm": 0.6551649570465088, + "learning_rate": 5.063937099710356e-05, + "loss": 2.4574, + "step": 13322 + }, + { + "epoch": 1.0752158824953595, + "grad_norm": 0.7443479895591736, + "learning_rate": 5.062564197653865e-05, + "loss": 2.52, + "step": 13323 + }, + { + "epoch": 1.0752965862319426, + "grad_norm": 0.7554972767829895, + "learning_rate": 5.061191418651186e-05, + "loss": 2.483, + "step": 13324 + }, + { + "epoch": 1.0753772899685254, + "grad_norm": 0.7661007642745972, + "learning_rate": 5.059818762736521e-05, + "loss": 2.566, + "step": 13325 + }, + { + "epoch": 1.0754579937051085, + "grad_norm": 0.7416480183601379, + "learning_rate": 5.058446229944087e-05, + "loss": 2.465, + "step": 13326 + }, + { + "epoch": 1.0755386974416916, + "grad_norm": 0.6997848749160767, + "learning_rate": 5.057073820308089e-05, + "loss": 2.4936, + "step": 13327 + }, + { + "epoch": 1.0756194011782745, + "grad_norm": 0.7570235133171082, + "learning_rate": 5.0557015338627345e-05, + "loss": 2.519, + "step": 13328 + }, + { + "epoch": 1.0757001049148576, + "grad_norm": 0.7910803556442261, + "learning_rate": 5.0543293706422214e-05, + "loss": 2.4932, + "step": 13329 + }, + { + "epoch": 1.0757808086514407, + "grad_norm": 0.7068312168121338, + "learning_rate": 5.052957330680752e-05, + "loss": 2.4489, + "step": 13330 + }, + { + "epoch": 1.0758615123880235, + "grad_norm": 0.7818215489387512, + "learning_rate": 5.051585414012514e-05, + "loss": 2.4467, + "step": 13331 + }, + { + "epoch": 1.0759422161246066, + "grad_norm": 0.7359446287155151, + "learning_rate": 5.0502136206717046e-05, + "loss": 2.4348, + "step": 13332 + }, + { + "epoch": 1.0760229198611895, + "grad_norm": 0.694726824760437, + "learning_rate": 5.0488419506925124e-05, + "loss": 2.4554, + "step": 13333 + }, + { + "epoch": 1.0761036235977726, + "grad_norm": 0.6776530742645264, + "learning_rate": 5.047470404109118e-05, + "loss": 2.4206, + "step": 13334 + }, + { + "epoch": 1.0761843273343557, + "grad_norm": 0.6977556943893433, + "learning_rate": 5.0460989809557066e-05, + "loss": 2.4748, + "step": 13335 + }, + { + "epoch": 1.0762650310709385, + "grad_norm": 0.6888061761856079, + "learning_rate": 5.044727681266459e-05, + "loss": 2.4129, + "step": 13336 + }, + { + "epoch": 1.0763457348075216, + "grad_norm": 0.744110643863678, + "learning_rate": 5.043356505075549e-05, + "loss": 2.4815, + "step": 13337 + }, + { + "epoch": 1.0764264385441047, + "grad_norm": 0.6726455688476562, + "learning_rate": 5.041985452417154e-05, + "loss": 2.4299, + "step": 13338 + }, + { + "epoch": 1.0765071422806876, + "grad_norm": 0.6755545735359192, + "learning_rate": 5.040614523325441e-05, + "loss": 2.4188, + "step": 13339 + }, + { + "epoch": 1.0765878460172706, + "grad_norm": 0.7152739763259888, + "learning_rate": 5.039243717834582e-05, + "loss": 2.4366, + "step": 13340 + }, + { + "epoch": 1.0766685497538535, + "grad_norm": 0.7253085374832153, + "learning_rate": 5.037873035978733e-05, + "loss": 2.4681, + "step": 13341 + }, + { + "epoch": 1.0767492534904366, + "grad_norm": 0.6780266165733337, + "learning_rate": 5.03650247779206e-05, + "loss": 2.5163, + "step": 13342 + }, + { + "epoch": 1.0768299572270197, + "grad_norm": 0.7440996170043945, + "learning_rate": 5.035132043308722e-05, + "loss": 2.4831, + "step": 13343 + }, + { + "epoch": 1.0769106609636026, + "grad_norm": 0.6619833111763, + "learning_rate": 5.0337617325628695e-05, + "loss": 2.433, + "step": 13344 + }, + { + "epoch": 1.0769913647001856, + "grad_norm": 0.7518059015274048, + "learning_rate": 5.032391545588656e-05, + "loss": 2.4241, + "step": 13345 + }, + { + "epoch": 1.0770720684367687, + "grad_norm": 0.6592784523963928, + "learning_rate": 5.031021482420231e-05, + "loss": 2.4902, + "step": 13346 + }, + { + "epoch": 1.0771527721733516, + "grad_norm": 0.7192299365997314, + "learning_rate": 5.029651543091739e-05, + "loss": 2.4445, + "step": 13347 + }, + { + "epoch": 1.0772334759099347, + "grad_norm": 0.7376793622970581, + "learning_rate": 5.028281727637323e-05, + "loss": 2.4532, + "step": 13348 + }, + { + "epoch": 1.0773141796465175, + "grad_norm": 0.7344524264335632, + "learning_rate": 5.026912036091127e-05, + "loss": 2.4193, + "step": 13349 + }, + { + "epoch": 1.0773948833831006, + "grad_norm": 0.7343986630439758, + "learning_rate": 5.0255424684872785e-05, + "loss": 2.4912, + "step": 13350 + }, + { + "epoch": 1.0774755871196837, + "grad_norm": 0.7103631496429443, + "learning_rate": 5.024173024859916e-05, + "loss": 2.4611, + "step": 13351 + }, + { + "epoch": 1.0775562908562666, + "grad_norm": 0.7554094791412354, + "learning_rate": 5.022803705243169e-05, + "loss": 2.4875, + "step": 13352 + }, + { + "epoch": 1.0776369945928497, + "grad_norm": 0.6754978895187378, + "learning_rate": 5.0214345096711655e-05, + "loss": 2.4585, + "step": 13353 + }, + { + "epoch": 1.0777176983294328, + "grad_norm": 0.690747857093811, + "learning_rate": 5.020065438178026e-05, + "loss": 2.4751, + "step": 13354 + }, + { + "epoch": 1.0777984020660156, + "grad_norm": 0.7012028694152832, + "learning_rate": 5.018696490797874e-05, + "loss": 2.4443, + "step": 13355 + }, + { + "epoch": 1.0778791058025987, + "grad_norm": 0.6788459420204163, + "learning_rate": 5.017327667564831e-05, + "loss": 2.4135, + "step": 13356 + }, + { + "epoch": 1.0779598095391816, + "grad_norm": 0.6662794351577759, + "learning_rate": 5.015958968512997e-05, + "loss": 2.3801, + "step": 13357 + }, + { + "epoch": 1.0780405132757647, + "grad_norm": 0.7873939275741577, + "learning_rate": 5.0145903936764994e-05, + "loss": 2.4629, + "step": 13358 + }, + { + "epoch": 1.0781212170123478, + "grad_norm": 0.7484980225563049, + "learning_rate": 5.0132219430894455e-05, + "loss": 2.4307, + "step": 13359 + }, + { + "epoch": 1.0782019207489306, + "grad_norm": 0.7559076547622681, + "learning_rate": 5.011853616785932e-05, + "loss": 2.4846, + "step": 13360 + }, + { + "epoch": 1.0782826244855137, + "grad_norm": 0.6822710633277893, + "learning_rate": 5.010485414800066e-05, + "loss": 2.4448, + "step": 13361 + }, + { + "epoch": 1.0783633282220966, + "grad_norm": 0.6665955185890198, + "learning_rate": 5.0091173371659496e-05, + "loss": 2.4562, + "step": 13362 + }, + { + "epoch": 1.0784440319586797, + "grad_norm": 0.6645659804344177, + "learning_rate": 5.0077493839176714e-05, + "loss": 2.4545, + "step": 13363 + }, + { + "epoch": 1.0785247356952627, + "grad_norm": 0.6648181080818176, + "learning_rate": 5.0063815550893276e-05, + "loss": 2.4565, + "step": 13364 + }, + { + "epoch": 1.0786054394318456, + "grad_norm": 0.6679299473762512, + "learning_rate": 5.005013850715014e-05, + "loss": 2.4301, + "step": 13365 + }, + { + "epoch": 1.0786861431684287, + "grad_norm": 0.7116484642028809, + "learning_rate": 5.003646270828808e-05, + "loss": 2.4174, + "step": 13366 + }, + { + "epoch": 1.0787668469050118, + "grad_norm": 0.6850735545158386, + "learning_rate": 5.002278815464798e-05, + "loss": 2.4386, + "step": 13367 + }, + { + "epoch": 1.0788475506415947, + "grad_norm": 0.6613513827323914, + "learning_rate": 5.00091148465706e-05, + "loss": 2.4038, + "step": 13368 + }, + { + "epoch": 1.0789282543781777, + "grad_norm": 0.659635603427887, + "learning_rate": 4.9995442784396827e-05, + "loss": 2.4346, + "step": 13369 + }, + { + "epoch": 1.0790089581147608, + "grad_norm": 0.6775132417678833, + "learning_rate": 4.998177196846731e-05, + "loss": 2.4853, + "step": 13370 + }, + { + "epoch": 1.0790896618513437, + "grad_norm": 0.719860851764679, + "learning_rate": 4.996810239912277e-05, + "loss": 2.4018, + "step": 13371 + }, + { + "epoch": 1.0791703655879268, + "grad_norm": 0.7316389083862305, + "learning_rate": 4.9954434076703946e-05, + "loss": 2.424, + "step": 13372 + }, + { + "epoch": 1.0792510693245096, + "grad_norm": 0.6779622435569763, + "learning_rate": 4.99407670015514e-05, + "loss": 2.4743, + "step": 13373 + }, + { + "epoch": 1.0793317730610927, + "grad_norm": 0.7357139587402344, + "learning_rate": 4.992710117400581e-05, + "loss": 2.4385, + "step": 13374 + }, + { + "epoch": 1.0794124767976758, + "grad_norm": 0.671441912651062, + "learning_rate": 4.9913436594407784e-05, + "loss": 2.3988, + "step": 13375 + }, + { + "epoch": 1.0794931805342587, + "grad_norm": 0.7205149531364441, + "learning_rate": 4.9899773263097804e-05, + "loss": 2.4594, + "step": 13376 + }, + { + "epoch": 1.0795738842708418, + "grad_norm": 0.702910840511322, + "learning_rate": 4.988611118041644e-05, + "loss": 2.4831, + "step": 13377 + }, + { + "epoch": 1.0796545880074246, + "grad_norm": 0.6977962255477905, + "learning_rate": 4.987245034670418e-05, + "loss": 2.422, + "step": 13378 + }, + { + "epoch": 1.0797352917440077, + "grad_norm": 0.7106757760047913, + "learning_rate": 4.985879076230149e-05, + "loss": 2.4073, + "step": 13379 + }, + { + "epoch": 1.0798159954805908, + "grad_norm": 0.7046806812286377, + "learning_rate": 4.9845132427548814e-05, + "loss": 2.4065, + "step": 13380 + }, + { + "epoch": 1.0798966992171737, + "grad_norm": 0.7476605772972107, + "learning_rate": 4.9831475342786574e-05, + "loss": 2.4886, + "step": 13381 + }, + { + "epoch": 1.0799774029537568, + "grad_norm": 0.696977972984314, + "learning_rate": 4.981781950835508e-05, + "loss": 2.4732, + "step": 13382 + }, + { + "epoch": 1.0800581066903399, + "grad_norm": 0.6596804857254028, + "learning_rate": 4.98041649245947e-05, + "loss": 2.4497, + "step": 13383 + }, + { + "epoch": 1.0801388104269227, + "grad_norm": 0.7216050028800964, + "learning_rate": 4.979051159184573e-05, + "loss": 2.4745, + "step": 13384 + }, + { + "epoch": 1.0802195141635058, + "grad_norm": 0.6636630296707153, + "learning_rate": 4.977685951044852e-05, + "loss": 2.4904, + "step": 13385 + }, + { + "epoch": 1.0803002179000887, + "grad_norm": 0.7030208110809326, + "learning_rate": 4.97632086807432e-05, + "loss": 2.4302, + "step": 13386 + }, + { + "epoch": 1.0803809216366718, + "grad_norm": 0.7158327102661133, + "learning_rate": 4.974955910307004e-05, + "loss": 2.4735, + "step": 13387 + }, + { + "epoch": 1.0804616253732549, + "grad_norm": 0.6736464500427246, + "learning_rate": 4.9735910777769234e-05, + "loss": 2.4334, + "step": 13388 + }, + { + "epoch": 1.0805423291098377, + "grad_norm": 0.6913403272628784, + "learning_rate": 4.972226370518092e-05, + "loss": 2.468, + "step": 13389 + }, + { + "epoch": 1.0806230328464208, + "grad_norm": 0.7006524205207825, + "learning_rate": 4.970861788564522e-05, + "loss": 2.4598, + "step": 13390 + }, + { + "epoch": 1.080703736583004, + "grad_norm": 0.6892947554588318, + "learning_rate": 4.969497331950227e-05, + "loss": 2.4297, + "step": 13391 + }, + { + "epoch": 1.0807844403195868, + "grad_norm": 0.7270283699035645, + "learning_rate": 4.968133000709203e-05, + "loss": 2.5344, + "step": 13392 + }, + { + "epoch": 1.0808651440561698, + "grad_norm": 0.735342264175415, + "learning_rate": 4.9667687948754594e-05, + "loss": 2.4431, + "step": 13393 + }, + { + "epoch": 1.0809458477927527, + "grad_norm": 0.6869279146194458, + "learning_rate": 4.9654047144829974e-05, + "loss": 2.5581, + "step": 13394 + }, + { + "epoch": 1.0810265515293358, + "grad_norm": 0.6975715160369873, + "learning_rate": 4.964040759565808e-05, + "loss": 2.4328, + "step": 13395 + }, + { + "epoch": 1.0811072552659189, + "grad_norm": 0.7312532067298889, + "learning_rate": 4.9626769301578856e-05, + "loss": 2.4686, + "step": 13396 + }, + { + "epoch": 1.0811879590025018, + "grad_norm": 0.7824496626853943, + "learning_rate": 4.9613132262932215e-05, + "loss": 2.4564, + "step": 13397 + }, + { + "epoch": 1.0812686627390848, + "grad_norm": 0.7337941527366638, + "learning_rate": 4.959949648005805e-05, + "loss": 2.4752, + "step": 13398 + }, + { + "epoch": 1.081349366475668, + "grad_norm": 0.7450836300849915, + "learning_rate": 4.958586195329617e-05, + "loss": 2.4457, + "step": 13399 + }, + { + "epoch": 1.0814300702122508, + "grad_norm": 0.6990504860877991, + "learning_rate": 4.9572228682986385e-05, + "loss": 2.4172, + "step": 13400 + }, + { + "epoch": 1.0815107739488339, + "grad_norm": 0.7293999791145325, + "learning_rate": 4.955859666946853e-05, + "loss": 2.5295, + "step": 13401 + }, + { + "epoch": 1.0815914776854167, + "grad_norm": 0.6872537136077881, + "learning_rate": 4.9544965913082264e-05, + "loss": 2.5029, + "step": 13402 + }, + { + "epoch": 1.0816721814219998, + "grad_norm": 0.6821706891059875, + "learning_rate": 4.953133641416733e-05, + "loss": 2.4738, + "step": 13403 + }, + { + "epoch": 1.081752885158583, + "grad_norm": 0.6811527609825134, + "learning_rate": 4.951770817306346e-05, + "loss": 2.4323, + "step": 13404 + }, + { + "epoch": 1.0818335888951658, + "grad_norm": 0.7138943076133728, + "learning_rate": 4.950408119011023e-05, + "loss": 2.5155, + "step": 13405 + }, + { + "epoch": 1.0819142926317489, + "grad_norm": 0.6777952909469604, + "learning_rate": 4.949045546564729e-05, + "loss": 2.4414, + "step": 13406 + }, + { + "epoch": 1.0819949963683317, + "grad_norm": 0.7065548896789551, + "learning_rate": 4.9476831000014276e-05, + "loss": 2.4913, + "step": 13407 + }, + { + "epoch": 1.0820757001049148, + "grad_norm": 0.7286355495452881, + "learning_rate": 4.9463207793550626e-05, + "loss": 2.4171, + "step": 13408 + }, + { + "epoch": 1.082156403841498, + "grad_norm": 0.6703049540519714, + "learning_rate": 4.944958584659597e-05, + "loss": 2.4387, + "step": 13409 + }, + { + "epoch": 1.0822371075780808, + "grad_norm": 0.6572019457817078, + "learning_rate": 4.943596515948983e-05, + "loss": 2.4324, + "step": 13410 + }, + { + "epoch": 1.0823178113146639, + "grad_norm": 0.6722360849380493, + "learning_rate": 4.942234573257156e-05, + "loss": 2.4802, + "step": 13411 + }, + { + "epoch": 1.082398515051247, + "grad_norm": 0.7122535109519958, + "learning_rate": 4.9408727566180655e-05, + "loss": 2.4531, + "step": 13412 + }, + { + "epoch": 1.0824792187878298, + "grad_norm": 0.6769903898239136, + "learning_rate": 4.9395110660656505e-05, + "loss": 2.4549, + "step": 13413 + }, + { + "epoch": 1.082559922524413, + "grad_norm": 0.766251266002655, + "learning_rate": 4.938149501633852e-05, + "loss": 2.4416, + "step": 13414 + }, + { + "epoch": 1.082640626260996, + "grad_norm": 0.6677987575531006, + "learning_rate": 4.936788063356596e-05, + "loss": 2.4578, + "step": 13415 + }, + { + "epoch": 1.0827213299975789, + "grad_norm": 0.7461380362510681, + "learning_rate": 4.9354267512678156e-05, + "loss": 2.4776, + "step": 13416 + }, + { + "epoch": 1.082802033734162, + "grad_norm": 0.6681976914405823, + "learning_rate": 4.934065565401443e-05, + "loss": 2.5044, + "step": 13417 + }, + { + "epoch": 1.0828827374707448, + "grad_norm": 0.6809324622154236, + "learning_rate": 4.932704505791397e-05, + "loss": 2.4651, + "step": 13418 + }, + { + "epoch": 1.082963441207328, + "grad_norm": 0.6926563382148743, + "learning_rate": 4.931343572471596e-05, + "loss": 2.4633, + "step": 13419 + }, + { + "epoch": 1.083044144943911, + "grad_norm": 0.6451820135116577, + "learning_rate": 4.929982765475971e-05, + "loss": 2.474, + "step": 13420 + }, + { + "epoch": 1.0831248486804939, + "grad_norm": 0.7088493704795837, + "learning_rate": 4.9286220848384247e-05, + "loss": 2.462, + "step": 13421 + }, + { + "epoch": 1.083205552417077, + "grad_norm": 0.7819172739982605, + "learning_rate": 4.9272615305928725e-05, + "loss": 2.4534, + "step": 13422 + }, + { + "epoch": 1.0832862561536598, + "grad_norm": 0.6579666137695312, + "learning_rate": 4.925901102773227e-05, + "loss": 2.4101, + "step": 13423 + }, + { + "epoch": 1.083366959890243, + "grad_norm": 0.6999555230140686, + "learning_rate": 4.924540801413385e-05, + "loss": 2.4534, + "step": 13424 + }, + { + "epoch": 1.083447663626826, + "grad_norm": 0.7034400105476379, + "learning_rate": 4.9231806265472555e-05, + "loss": 2.4741, + "step": 13425 + }, + { + "epoch": 1.0835283673634089, + "grad_norm": 0.6595034599304199, + "learning_rate": 4.921820578208739e-05, + "loss": 2.4011, + "step": 13426 + }, + { + "epoch": 1.083609071099992, + "grad_norm": 0.666419267654419, + "learning_rate": 4.920460656431723e-05, + "loss": 2.4399, + "step": 13427 + }, + { + "epoch": 1.083689774836575, + "grad_norm": 0.7058294415473938, + "learning_rate": 4.919100861250108e-05, + "loss": 2.434, + "step": 13428 + }, + { + "epoch": 1.083770478573158, + "grad_norm": 0.7045806050300598, + "learning_rate": 4.917741192697779e-05, + "loss": 2.4616, + "step": 13429 + }, + { + "epoch": 1.083851182309741, + "grad_norm": 0.6565639972686768, + "learning_rate": 4.916381650808626e-05, + "loss": 2.3864, + "step": 13430 + }, + { + "epoch": 1.0839318860463238, + "grad_norm": 0.6939674615859985, + "learning_rate": 4.9150222356165295e-05, + "loss": 2.4217, + "step": 13431 + }, + { + "epoch": 1.084012589782907, + "grad_norm": 0.7240599989891052, + "learning_rate": 4.913662947155373e-05, + "loss": 2.447, + "step": 13432 + }, + { + "epoch": 1.08409329351949, + "grad_norm": 0.7369012832641602, + "learning_rate": 4.9123037854590336e-05, + "loss": 2.4588, + "step": 13433 + }, + { + "epoch": 1.0841739972560729, + "grad_norm": 0.714269757270813, + "learning_rate": 4.9109447505613803e-05, + "loss": 2.4921, + "step": 13434 + }, + { + "epoch": 1.084254700992656, + "grad_norm": 0.7541659474372864, + "learning_rate": 4.909585842496287e-05, + "loss": 2.4191, + "step": 13435 + }, + { + "epoch": 1.084335404729239, + "grad_norm": 0.7245596051216125, + "learning_rate": 4.9082270612976243e-05, + "loss": 2.4904, + "step": 13436 + }, + { + "epoch": 1.084416108465822, + "grad_norm": 0.7301090359687805, + "learning_rate": 4.90686840699925e-05, + "loss": 2.4461, + "step": 13437 + }, + { + "epoch": 1.084496812202405, + "grad_norm": 0.7404102683067322, + "learning_rate": 4.905509879635028e-05, + "loss": 2.4826, + "step": 13438 + }, + { + "epoch": 1.0845775159389879, + "grad_norm": 0.7053710222244263, + "learning_rate": 4.9041514792388175e-05, + "loss": 2.4231, + "step": 13439 + }, + { + "epoch": 1.084658219675571, + "grad_norm": 0.6171362400054932, + "learning_rate": 4.9027932058444724e-05, + "loss": 2.4472, + "step": 13440 + }, + { + "epoch": 1.084738923412154, + "grad_norm": 0.7367038130760193, + "learning_rate": 4.901435059485845e-05, + "loss": 2.4847, + "step": 13441 + }, + { + "epoch": 1.084819627148737, + "grad_norm": 0.754828691482544, + "learning_rate": 4.900077040196788e-05, + "loss": 2.4731, + "step": 13442 + }, + { + "epoch": 1.08490033088532, + "grad_norm": 0.7380684018135071, + "learning_rate": 4.8987191480111386e-05, + "loss": 2.4227, + "step": 13443 + }, + { + "epoch": 1.084981034621903, + "grad_norm": 0.6711444854736328, + "learning_rate": 4.897361382962742e-05, + "loss": 2.4744, + "step": 13444 + }, + { + "epoch": 1.085061738358486, + "grad_norm": 0.7709227204322815, + "learning_rate": 4.896003745085438e-05, + "loss": 2.5422, + "step": 13445 + }, + { + "epoch": 1.085142442095069, + "grad_norm": 0.6778519153594971, + "learning_rate": 4.8946462344130675e-05, + "loss": 2.4757, + "step": 13446 + }, + { + "epoch": 1.085223145831652, + "grad_norm": 0.7390698194503784, + "learning_rate": 4.893288850979454e-05, + "loss": 2.4214, + "step": 13447 + }, + { + "epoch": 1.085303849568235, + "grad_norm": 0.6632684469223022, + "learning_rate": 4.891931594818432e-05, + "loss": 2.4689, + "step": 13448 + }, + { + "epoch": 1.085384553304818, + "grad_norm": 0.68693608045578, + "learning_rate": 4.890574465963827e-05, + "loss": 2.4788, + "step": 13449 + }, + { + "epoch": 1.085465257041401, + "grad_norm": 0.6910344362258911, + "learning_rate": 4.8892174644494625e-05, + "loss": 2.4611, + "step": 13450 + }, + { + "epoch": 1.085545960777984, + "grad_norm": 0.6935380101203918, + "learning_rate": 4.887860590309158e-05, + "loss": 2.4481, + "step": 13451 + }, + { + "epoch": 1.085626664514567, + "grad_norm": 0.7086954712867737, + "learning_rate": 4.886503843576735e-05, + "loss": 2.4583, + "step": 13452 + }, + { + "epoch": 1.08570736825115, + "grad_norm": 0.7447777986526489, + "learning_rate": 4.8851472242859994e-05, + "loss": 2.5035, + "step": 13453 + }, + { + "epoch": 1.085788071987733, + "grad_norm": 0.6896036267280579, + "learning_rate": 4.8837907324707656e-05, + "loss": 2.4622, + "step": 13454 + }, + { + "epoch": 1.085868775724316, + "grad_norm": 0.7261155247688293, + "learning_rate": 4.882434368164843e-05, + "loss": 2.4958, + "step": 13455 + }, + { + "epoch": 1.085949479460899, + "grad_norm": 0.6868197321891785, + "learning_rate": 4.881078131402031e-05, + "loss": 2.4952, + "step": 13456 + }, + { + "epoch": 1.0860301831974821, + "grad_norm": 0.6338867545127869, + "learning_rate": 4.879722022216132e-05, + "loss": 2.4553, + "step": 13457 + }, + { + "epoch": 1.086110886934065, + "grad_norm": 0.7214454412460327, + "learning_rate": 4.878366040640946e-05, + "loss": 2.4433, + "step": 13458 + }, + { + "epoch": 1.086191590670648, + "grad_norm": 0.6871301531791687, + "learning_rate": 4.877010186710266e-05, + "loss": 2.4118, + "step": 13459 + }, + { + "epoch": 1.0862722944072312, + "grad_norm": 0.6845650672912598, + "learning_rate": 4.875654460457883e-05, + "loss": 2.4684, + "step": 13460 + }, + { + "epoch": 1.086352998143814, + "grad_norm": 0.7027513980865479, + "learning_rate": 4.8742988619175865e-05, + "loss": 2.4569, + "step": 13461 + }, + { + "epoch": 1.0864337018803971, + "grad_norm": 0.6428621411323547, + "learning_rate": 4.8729433911231646e-05, + "loss": 2.4211, + "step": 13462 + }, + { + "epoch": 1.08651440561698, + "grad_norm": 0.6921488046646118, + "learning_rate": 4.8715880481083934e-05, + "loss": 2.4668, + "step": 13463 + }, + { + "epoch": 1.086595109353563, + "grad_norm": 0.7001025676727295, + "learning_rate": 4.870232832907051e-05, + "loss": 2.4685, + "step": 13464 + }, + { + "epoch": 1.0866758130901462, + "grad_norm": 0.7460644245147705, + "learning_rate": 4.868877745552922e-05, + "loss": 2.3922, + "step": 13465 + }, + { + "epoch": 1.086756516826729, + "grad_norm": 0.7418891191482544, + "learning_rate": 4.867522786079768e-05, + "loss": 2.3777, + "step": 13466 + }, + { + "epoch": 1.0868372205633121, + "grad_norm": 0.6430083513259888, + "learning_rate": 4.8661679545213625e-05, + "loss": 2.4385, + "step": 13467 + }, + { + "epoch": 1.086917924299895, + "grad_norm": 0.6963593363761902, + "learning_rate": 4.864813250911475e-05, + "loss": 2.4083, + "step": 13468 + }, + { + "epoch": 1.086998628036478, + "grad_norm": 0.6796097159385681, + "learning_rate": 4.8634586752838606e-05, + "loss": 2.4984, + "step": 13469 + }, + { + "epoch": 1.0870793317730612, + "grad_norm": 0.6845307946205139, + "learning_rate": 4.862104227672281e-05, + "loss": 2.4168, + "step": 13470 + }, + { + "epoch": 1.087160035509644, + "grad_norm": 0.705348014831543, + "learning_rate": 4.8607499081105e-05, + "loss": 2.4216, + "step": 13471 + }, + { + "epoch": 1.087240739246227, + "grad_norm": 0.6906474828720093, + "learning_rate": 4.8593957166322636e-05, + "loss": 2.4955, + "step": 13472 + }, + { + "epoch": 1.0873214429828102, + "grad_norm": 0.696489691734314, + "learning_rate": 4.858041653271323e-05, + "loss": 2.4186, + "step": 13473 + }, + { + "epoch": 1.087402146719393, + "grad_norm": 0.6997761726379395, + "learning_rate": 4.856687718061429e-05, + "loss": 2.441, + "step": 13474 + }, + { + "epoch": 1.0874828504559761, + "grad_norm": 0.6515649557113647, + "learning_rate": 4.8553339110363184e-05, + "loss": 2.3997, + "step": 13475 + }, + { + "epoch": 1.087563554192559, + "grad_norm": 0.6902725696563721, + "learning_rate": 4.853980232229734e-05, + "loss": 2.4765, + "step": 13476 + }, + { + "epoch": 1.087644257929142, + "grad_norm": 0.6832055449485779, + "learning_rate": 4.852626681675415e-05, + "loss": 2.411, + "step": 13477 + }, + { + "epoch": 1.0877249616657252, + "grad_norm": 0.668520987033844, + "learning_rate": 4.8512732594070984e-05, + "loss": 2.4742, + "step": 13478 + }, + { + "epoch": 1.087805665402308, + "grad_norm": 0.7019832134246826, + "learning_rate": 4.849919965458507e-05, + "loss": 2.4638, + "step": 13479 + }, + { + "epoch": 1.0878863691388911, + "grad_norm": 0.6986027359962463, + "learning_rate": 4.8485667998633724e-05, + "loss": 2.4866, + "step": 13480 + }, + { + "epoch": 1.0879670728754742, + "grad_norm": 0.659037709236145, + "learning_rate": 4.8472137626554195e-05, + "loss": 2.4821, + "step": 13481 + }, + { + "epoch": 1.088047776612057, + "grad_norm": 0.6506801247596741, + "learning_rate": 4.8458608538683694e-05, + "loss": 2.4686, + "step": 13482 + }, + { + "epoch": 1.0881284803486402, + "grad_norm": 0.7136878967285156, + "learning_rate": 4.844508073535939e-05, + "loss": 2.4523, + "step": 13483 + }, + { + "epoch": 1.088209184085223, + "grad_norm": 0.6663414239883423, + "learning_rate": 4.843155421691848e-05, + "loss": 2.4287, + "step": 13484 + }, + { + "epoch": 1.0882898878218061, + "grad_norm": 0.7192783355712891, + "learning_rate": 4.8418028983698006e-05, + "loss": 2.4433, + "step": 13485 + }, + { + "epoch": 1.0883705915583892, + "grad_norm": 0.6620980501174927, + "learning_rate": 4.8404505036035086e-05, + "loss": 2.4823, + "step": 13486 + }, + { + "epoch": 1.088451295294972, + "grad_norm": 0.6282123327255249, + "learning_rate": 4.83909823742668e-05, + "loss": 2.4641, + "step": 13487 + }, + { + "epoch": 1.0885319990315552, + "grad_norm": 0.6384354829788208, + "learning_rate": 4.837746099873012e-05, + "loss": 2.4234, + "step": 13488 + }, + { + "epoch": 1.0886127027681383, + "grad_norm": 0.6550076603889465, + "learning_rate": 4.836394090976204e-05, + "loss": 2.4743, + "step": 13489 + }, + { + "epoch": 1.0886934065047211, + "grad_norm": 0.6987888216972351, + "learning_rate": 4.8350422107699545e-05, + "loss": 2.4263, + "step": 13490 + }, + { + "epoch": 1.0887741102413042, + "grad_norm": 0.7012613415718079, + "learning_rate": 4.833690459287953e-05, + "loss": 2.4801, + "step": 13491 + }, + { + "epoch": 1.088854813977887, + "grad_norm": 0.6986923217773438, + "learning_rate": 4.832338836563891e-05, + "loss": 2.426, + "step": 13492 + }, + { + "epoch": 1.0889355177144702, + "grad_norm": 0.6936241984367371, + "learning_rate": 4.830987342631453e-05, + "loss": 2.4361, + "step": 13493 + }, + { + "epoch": 1.0890162214510533, + "grad_norm": 0.6612359881401062, + "learning_rate": 4.8296359775243275e-05, + "loss": 2.4385, + "step": 13494 + }, + { + "epoch": 1.0890969251876361, + "grad_norm": 0.6927692294120789, + "learning_rate": 4.828284741276183e-05, + "loss": 2.4692, + "step": 13495 + }, + { + "epoch": 1.0891776289242192, + "grad_norm": 0.6710225343704224, + "learning_rate": 4.8269336339207036e-05, + "loss": 2.4078, + "step": 13496 + }, + { + "epoch": 1.0892583326608023, + "grad_norm": 0.639076828956604, + "learning_rate": 4.825582655491564e-05, + "loss": 2.4368, + "step": 13497 + }, + { + "epoch": 1.0893390363973852, + "grad_norm": 0.7050483226776123, + "learning_rate": 4.824231806022426e-05, + "loss": 2.4308, + "step": 13498 + }, + { + "epoch": 1.0894197401339683, + "grad_norm": 0.7097769975662231, + "learning_rate": 4.822881085546962e-05, + "loss": 2.4378, + "step": 13499 + }, + { + "epoch": 1.0895004438705511, + "grad_norm": 0.6939458847045898, + "learning_rate": 4.821530494098834e-05, + "loss": 2.4678, + "step": 13500 + }, + { + "epoch": 1.0895811476071342, + "grad_norm": 0.6797441840171814, + "learning_rate": 4.8201800317117016e-05, + "loss": 2.4837, + "step": 13501 + }, + { + "epoch": 1.0896618513437173, + "grad_norm": 0.7451521158218384, + "learning_rate": 4.818829698419225e-05, + "loss": 2.4651, + "step": 13502 + }, + { + "epoch": 1.0897425550803002, + "grad_norm": 0.6749109625816345, + "learning_rate": 4.8174794942550585e-05, + "loss": 2.4569, + "step": 13503 + }, + { + "epoch": 1.0898232588168832, + "grad_norm": 0.6321636438369751, + "learning_rate": 4.8161294192528474e-05, + "loss": 2.4049, + "step": 13504 + }, + { + "epoch": 1.0899039625534663, + "grad_norm": 0.7002367377281189, + "learning_rate": 4.8147794734462415e-05, + "loss": 2.4489, + "step": 13505 + }, + { + "epoch": 1.0899846662900492, + "grad_norm": 0.758057713508606, + "learning_rate": 4.813429656868889e-05, + "loss": 2.436, + "step": 13506 + }, + { + "epoch": 1.0900653700266323, + "grad_norm": 0.6665529012680054, + "learning_rate": 4.812079969554424e-05, + "loss": 2.3805, + "step": 13507 + }, + { + "epoch": 1.0901460737632152, + "grad_norm": 0.6962547898292542, + "learning_rate": 4.810730411536487e-05, + "loss": 2.4203, + "step": 13508 + }, + { + "epoch": 1.0902267774997982, + "grad_norm": 0.6860647201538086, + "learning_rate": 4.809380982848712e-05, + "loss": 2.4482, + "step": 13509 + }, + { + "epoch": 1.0903074812363813, + "grad_norm": 0.7045090198516846, + "learning_rate": 4.808031683524733e-05, + "loss": 2.4155, + "step": 13510 + }, + { + "epoch": 1.0903881849729642, + "grad_norm": 0.6609304547309875, + "learning_rate": 4.806682513598176e-05, + "loss": 2.4295, + "step": 13511 + }, + { + "epoch": 1.0904688887095473, + "grad_norm": 0.7647323608398438, + "learning_rate": 4.8053334731026665e-05, + "loss": 2.4704, + "step": 13512 + }, + { + "epoch": 1.0905495924461301, + "grad_norm": 0.677449643611908, + "learning_rate": 4.803984562071829e-05, + "loss": 2.4501, + "step": 13513 + }, + { + "epoch": 1.0906302961827132, + "grad_norm": 0.645866334438324, + "learning_rate": 4.8026357805392754e-05, + "loss": 2.427, + "step": 13514 + }, + { + "epoch": 1.0907109999192963, + "grad_norm": 0.6968488097190857, + "learning_rate": 4.801287128538624e-05, + "loss": 2.3933, + "step": 13515 + }, + { + "epoch": 1.0907917036558792, + "grad_norm": 0.7137444615364075, + "learning_rate": 4.799938606103491e-05, + "loss": 2.4611, + "step": 13516 + }, + { + "epoch": 1.0908724073924623, + "grad_norm": 0.6860007047653198, + "learning_rate": 4.7985902132674765e-05, + "loss": 2.4252, + "step": 13517 + }, + { + "epoch": 1.0909531111290454, + "grad_norm": 0.726290762424469, + "learning_rate": 4.797241950064192e-05, + "loss": 2.44, + "step": 13518 + }, + { + "epoch": 1.0910338148656282, + "grad_norm": 0.6833362579345703, + "learning_rate": 4.795893816527241e-05, + "loss": 2.4199, + "step": 13519 + }, + { + "epoch": 1.0911145186022113, + "grad_norm": 0.7412242293357849, + "learning_rate": 4.794545812690212e-05, + "loss": 2.5412, + "step": 13520 + }, + { + "epoch": 1.0911952223387944, + "grad_norm": 0.6882274150848389, + "learning_rate": 4.793197938586712e-05, + "loss": 2.473, + "step": 13521 + }, + { + "epoch": 1.0912759260753773, + "grad_norm": 0.7334007024765015, + "learning_rate": 4.791850194250335e-05, + "loss": 2.4357, + "step": 13522 + }, + { + "epoch": 1.0913566298119604, + "grad_norm": 0.6564081311225891, + "learning_rate": 4.790502579714661e-05, + "loss": 2.4425, + "step": 13523 + }, + { + "epoch": 1.0914373335485432, + "grad_norm": 0.7045762538909912, + "learning_rate": 4.78915509501328e-05, + "loss": 2.4929, + "step": 13524 + }, + { + "epoch": 1.0915180372851263, + "grad_norm": 0.7512505650520325, + "learning_rate": 4.787807740179776e-05, + "loss": 2.4187, + "step": 13525 + }, + { + "epoch": 1.0915987410217094, + "grad_norm": 0.6592997908592224, + "learning_rate": 4.786460515247732e-05, + "loss": 2.4344, + "step": 13526 + }, + { + "epoch": 1.0916794447582923, + "grad_norm": 0.6721770763397217, + "learning_rate": 4.785113420250715e-05, + "loss": 2.4415, + "step": 13527 + }, + { + "epoch": 1.0917601484948753, + "grad_norm": 0.7544431686401367, + "learning_rate": 4.783766455222305e-05, + "loss": 2.4831, + "step": 13528 + }, + { + "epoch": 1.0918408522314582, + "grad_norm": 0.7226355671882629, + "learning_rate": 4.782419620196073e-05, + "loss": 2.4807, + "step": 13529 + }, + { + "epoch": 1.0919215559680413, + "grad_norm": 0.6386340260505676, + "learning_rate": 4.78107291520558e-05, + "loss": 2.4062, + "step": 13530 + }, + { + "epoch": 1.0920022597046244, + "grad_norm": 0.6670595407485962, + "learning_rate": 4.7797263402843926e-05, + "loss": 2.4009, + "step": 13531 + }, + { + "epoch": 1.0920829634412073, + "grad_norm": 0.6600756049156189, + "learning_rate": 4.778379895466071e-05, + "loss": 2.4321, + "step": 13532 + }, + { + "epoch": 1.0921636671777903, + "grad_norm": 0.7190701961517334, + "learning_rate": 4.77703358078417e-05, + "loss": 2.4229, + "step": 13533 + }, + { + "epoch": 1.0922443709143734, + "grad_norm": 0.6554828882217407, + "learning_rate": 4.775687396272247e-05, + "loss": 2.442, + "step": 13534 + }, + { + "epoch": 1.0923250746509563, + "grad_norm": 0.6720205545425415, + "learning_rate": 4.774341341963853e-05, + "loss": 2.4994, + "step": 13535 + }, + { + "epoch": 1.0924057783875394, + "grad_norm": 0.7161003947257996, + "learning_rate": 4.7729954178925295e-05, + "loss": 2.4666, + "step": 13536 + }, + { + "epoch": 1.0924864821241222, + "grad_norm": 0.6817156672477722, + "learning_rate": 4.771649624091824e-05, + "loss": 2.4203, + "step": 13537 + }, + { + "epoch": 1.0925671858607053, + "grad_norm": 0.7167035937309265, + "learning_rate": 4.770303960595277e-05, + "loss": 2.4214, + "step": 13538 + }, + { + "epoch": 1.0926478895972884, + "grad_norm": 0.6373945474624634, + "learning_rate": 4.768958427436429e-05, + "loss": 2.485, + "step": 13539 + }, + { + "epoch": 1.0927285933338713, + "grad_norm": 0.7361387014389038, + "learning_rate": 4.767613024648808e-05, + "loss": 2.5192, + "step": 13540 + }, + { + "epoch": 1.0928092970704544, + "grad_norm": 0.7034375667572021, + "learning_rate": 4.766267752265947e-05, + "loss": 2.4324, + "step": 13541 + }, + { + "epoch": 1.0928900008070375, + "grad_norm": 0.7355689406394958, + "learning_rate": 4.7649226103213765e-05, + "loss": 2.5048, + "step": 13542 + }, + { + "epoch": 1.0929707045436203, + "grad_norm": 0.7120445966720581, + "learning_rate": 4.7635775988486176e-05, + "loss": 2.449, + "step": 13543 + }, + { + "epoch": 1.0930514082802034, + "grad_norm": 0.695888876914978, + "learning_rate": 4.7622327178811935e-05, + "loss": 2.4974, + "step": 13544 + }, + { + "epoch": 1.0931321120167863, + "grad_norm": 0.6953639984130859, + "learning_rate": 4.760887967452625e-05, + "loss": 2.3927, + "step": 13545 + }, + { + "epoch": 1.0932128157533694, + "grad_norm": 0.6457183957099915, + "learning_rate": 4.759543347596421e-05, + "loss": 2.4501, + "step": 13546 + }, + { + "epoch": 1.0932935194899525, + "grad_norm": 0.7259296774864197, + "learning_rate": 4.7581988583460946e-05, + "loss": 2.4896, + "step": 13547 + }, + { + "epoch": 1.0933742232265353, + "grad_norm": 0.6897724270820618, + "learning_rate": 4.7568544997351586e-05, + "loss": 2.4181, + "step": 13548 + }, + { + "epoch": 1.0934549269631184, + "grad_norm": 0.6723688840866089, + "learning_rate": 4.755510271797111e-05, + "loss": 2.5097, + "step": 13549 + }, + { + "epoch": 1.0935356306997015, + "grad_norm": 0.7353307604789734, + "learning_rate": 4.754166174565456e-05, + "loss": 2.4548, + "step": 13550 + }, + { + "epoch": 1.0936163344362844, + "grad_norm": 0.7334069013595581, + "learning_rate": 4.752822208073693e-05, + "loss": 2.5113, + "step": 13551 + }, + { + "epoch": 1.0936970381728675, + "grad_norm": 0.6581420302391052, + "learning_rate": 4.751478372355317e-05, + "loss": 2.4546, + "step": 13552 + }, + { + "epoch": 1.0937777419094503, + "grad_norm": 0.7890802621841431, + "learning_rate": 4.75013466744382e-05, + "loss": 2.4092, + "step": 13553 + }, + { + "epoch": 1.0938584456460334, + "grad_norm": 0.7226595282554626, + "learning_rate": 4.7487910933726895e-05, + "loss": 2.457, + "step": 13554 + }, + { + "epoch": 1.0939391493826165, + "grad_norm": 0.7108014225959778, + "learning_rate": 4.7474476501754165e-05, + "loss": 2.471, + "step": 13555 + }, + { + "epoch": 1.0940198531191994, + "grad_norm": 0.6864863038063049, + "learning_rate": 4.746104337885473e-05, + "loss": 2.4778, + "step": 13556 + }, + { + "epoch": 1.0941005568557824, + "grad_norm": 0.6890624165534973, + "learning_rate": 4.744761156536345e-05, + "loss": 2.456, + "step": 13557 + }, + { + "epoch": 1.0941812605923653, + "grad_norm": 0.7052781581878662, + "learning_rate": 4.743418106161509e-05, + "loss": 2.4796, + "step": 13558 + }, + { + "epoch": 1.0942619643289484, + "grad_norm": 0.6569164991378784, + "learning_rate": 4.742075186794431e-05, + "loss": 2.469, + "step": 13559 + }, + { + "epoch": 1.0943426680655315, + "grad_norm": 0.7302874326705933, + "learning_rate": 4.7407323984685836e-05, + "loss": 2.4543, + "step": 13560 + }, + { + "epoch": 1.0944233718021144, + "grad_norm": 0.6499345898628235, + "learning_rate": 4.7393897412174335e-05, + "loss": 2.4037, + "step": 13561 + }, + { + "epoch": 1.0945040755386974, + "grad_norm": 0.6643944382667542, + "learning_rate": 4.7380472150744416e-05, + "loss": 2.4067, + "step": 13562 + }, + { + "epoch": 1.0945847792752805, + "grad_norm": 0.7491872906684875, + "learning_rate": 4.736704820073069e-05, + "loss": 2.4277, + "step": 13563 + }, + { + "epoch": 1.0946654830118634, + "grad_norm": 0.7319512367248535, + "learning_rate": 4.735362556246773e-05, + "loss": 2.4588, + "step": 13564 + }, + { + "epoch": 1.0947461867484465, + "grad_norm": 0.7404350638389587, + "learning_rate": 4.734020423629001e-05, + "loss": 2.432, + "step": 13565 + }, + { + "epoch": 1.0948268904850296, + "grad_norm": 0.6462193727493286, + "learning_rate": 4.732678422253206e-05, + "loss": 2.4417, + "step": 13566 + }, + { + "epoch": 1.0949075942216124, + "grad_norm": 0.6711323857307434, + "learning_rate": 4.731336552152836e-05, + "loss": 2.4023, + "step": 13567 + }, + { + "epoch": 1.0949882979581955, + "grad_norm": 0.658261239528656, + "learning_rate": 4.729994813361329e-05, + "loss": 2.4132, + "step": 13568 + }, + { + "epoch": 1.0950690016947784, + "grad_norm": 0.8081904053688049, + "learning_rate": 4.728653205912127e-05, + "loss": 2.4412, + "step": 13569 + }, + { + "epoch": 1.0951497054313615, + "grad_norm": 0.6620786786079407, + "learning_rate": 4.727311729838666e-05, + "loss": 2.4357, + "step": 13570 + }, + { + "epoch": 1.0952304091679446, + "grad_norm": 0.7026848793029785, + "learning_rate": 4.725970385174381e-05, + "loss": 2.4159, + "step": 13571 + }, + { + "epoch": 1.0953111129045274, + "grad_norm": 0.7017392516136169, + "learning_rate": 4.7246291719526995e-05, + "loss": 2.4253, + "step": 13572 + }, + { + "epoch": 1.0953918166411105, + "grad_norm": 0.710172712802887, + "learning_rate": 4.7232880902070483e-05, + "loss": 2.4057, + "step": 13573 + }, + { + "epoch": 1.0954725203776934, + "grad_norm": 0.7208876013755798, + "learning_rate": 4.721947139970856e-05, + "loss": 2.4803, + "step": 13574 + }, + { + "epoch": 1.0955532241142765, + "grad_norm": 0.693219006061554, + "learning_rate": 4.720606321277534e-05, + "loss": 2.3611, + "step": 13575 + }, + { + "epoch": 1.0956339278508596, + "grad_norm": 0.737206757068634, + "learning_rate": 4.7192656341605026e-05, + "loss": 2.3873, + "step": 13576 + }, + { + "epoch": 1.0957146315874424, + "grad_norm": 0.6605268120765686, + "learning_rate": 4.717925078653179e-05, + "loss": 2.4155, + "step": 13577 + }, + { + "epoch": 1.0957953353240255, + "grad_norm": 0.7143047451972961, + "learning_rate": 4.716584654788967e-05, + "loss": 2.4526, + "step": 13578 + }, + { + "epoch": 1.0958760390606086, + "grad_norm": 0.6980953216552734, + "learning_rate": 4.715244362601277e-05, + "loss": 2.4422, + "step": 13579 + }, + { + "epoch": 1.0959567427971915, + "grad_norm": 0.6852009892463684, + "learning_rate": 4.713904202123515e-05, + "loss": 2.4599, + "step": 13580 + }, + { + "epoch": 1.0960374465337746, + "grad_norm": 0.7436656355857849, + "learning_rate": 4.712564173389074e-05, + "loss": 2.4441, + "step": 13581 + }, + { + "epoch": 1.0961181502703574, + "grad_norm": 0.7090624570846558, + "learning_rate": 4.711224276431352e-05, + "loss": 2.4741, + "step": 13582 + }, + { + "epoch": 1.0961988540069405, + "grad_norm": 0.6611043810844421, + "learning_rate": 4.709884511283753e-05, + "loss": 2.4589, + "step": 13583 + }, + { + "epoch": 1.0962795577435236, + "grad_norm": 0.6932426691055298, + "learning_rate": 4.708544877979658e-05, + "loss": 2.4199, + "step": 13584 + }, + { + "epoch": 1.0963602614801065, + "grad_norm": 0.7629422545433044, + "learning_rate": 4.707205376552456e-05, + "loss": 2.4588, + "step": 13585 + }, + { + "epoch": 1.0964409652166895, + "grad_norm": 0.8116739392280579, + "learning_rate": 4.705866007035531e-05, + "loss": 2.472, + "step": 13586 + }, + { + "epoch": 1.0965216689532726, + "grad_norm": 0.6711297631263733, + "learning_rate": 4.704526769462269e-05, + "loss": 2.4086, + "step": 13587 + }, + { + "epoch": 1.0966023726898555, + "grad_norm": 0.716015636920929, + "learning_rate": 4.703187663866037e-05, + "loss": 2.4411, + "step": 13588 + }, + { + "epoch": 1.0966830764264386, + "grad_norm": 0.6982430219650269, + "learning_rate": 4.701848690280215e-05, + "loss": 2.4438, + "step": 13589 + }, + { + "epoch": 1.0967637801630215, + "grad_norm": 0.7183159589767456, + "learning_rate": 4.7005098487381785e-05, + "loss": 2.4464, + "step": 13590 + }, + { + "epoch": 1.0968444838996045, + "grad_norm": 0.6983399391174316, + "learning_rate": 4.699171139273284e-05, + "loss": 2.4354, + "step": 13591 + }, + { + "epoch": 1.0969251876361876, + "grad_norm": 0.7157938480377197, + "learning_rate": 4.697832561918901e-05, + "loss": 2.4393, + "step": 13592 + }, + { + "epoch": 1.0970058913727705, + "grad_norm": 0.6991363763809204, + "learning_rate": 4.696494116708392e-05, + "loss": 2.4723, + "step": 13593 + }, + { + "epoch": 1.0970865951093536, + "grad_norm": 0.6722309589385986, + "learning_rate": 4.695155803675112e-05, + "loss": 2.447, + "step": 13594 + }, + { + "epoch": 1.0971672988459367, + "grad_norm": 0.6492688655853271, + "learning_rate": 4.6938176228524175e-05, + "loss": 2.4213, + "step": 13595 + }, + { + "epoch": 1.0972480025825195, + "grad_norm": 0.6941642165184021, + "learning_rate": 4.6924795742736616e-05, + "loss": 2.4714, + "step": 13596 + }, + { + "epoch": 1.0973287063191026, + "grad_norm": 0.7506042122840881, + "learning_rate": 4.691141657972185e-05, + "loss": 2.4563, + "step": 13597 + }, + { + "epoch": 1.0974094100556855, + "grad_norm": 0.7032836675643921, + "learning_rate": 4.6898038739813356e-05, + "loss": 2.4824, + "step": 13598 + }, + { + "epoch": 1.0974901137922686, + "grad_norm": 0.6908734440803528, + "learning_rate": 4.6884662223344575e-05, + "loss": 2.4486, + "step": 13599 + }, + { + "epoch": 1.0975708175288517, + "grad_norm": 0.714971661567688, + "learning_rate": 4.687128703064883e-05, + "loss": 2.4372, + "step": 13600 + }, + { + "epoch": 1.0976515212654345, + "grad_norm": 0.6989198327064514, + "learning_rate": 4.6857913162059486e-05, + "loss": 2.395, + "step": 13601 + }, + { + "epoch": 1.0977322250020176, + "grad_norm": 0.7163406014442444, + "learning_rate": 4.684454061790987e-05, + "loss": 2.4868, + "step": 13602 + }, + { + "epoch": 1.0978129287386005, + "grad_norm": 0.6600626707077026, + "learning_rate": 4.6831169398533245e-05, + "loss": 2.5134, + "step": 13603 + }, + { + "epoch": 1.0978936324751836, + "grad_norm": 0.6657080054283142, + "learning_rate": 4.681779950426286e-05, + "loss": 2.4701, + "step": 13604 + }, + { + "epoch": 1.0979743362117667, + "grad_norm": 0.665860116481781, + "learning_rate": 4.680443093543194e-05, + "loss": 2.4593, + "step": 13605 + }, + { + "epoch": 1.0980550399483495, + "grad_norm": 0.7000327110290527, + "learning_rate": 4.679106369237368e-05, + "loss": 2.4523, + "step": 13606 + }, + { + "epoch": 1.0981357436849326, + "grad_norm": 0.6969157457351685, + "learning_rate": 4.677769777542118e-05, + "loss": 2.4935, + "step": 13607 + }, + { + "epoch": 1.0982164474215157, + "grad_norm": 0.6864836812019348, + "learning_rate": 4.676433318490757e-05, + "loss": 2.457, + "step": 13608 + }, + { + "epoch": 1.0982971511580986, + "grad_norm": 0.7331364750862122, + "learning_rate": 4.675096992116598e-05, + "loss": 2.4253, + "step": 13609 + }, + { + "epoch": 1.0983778548946816, + "grad_norm": 0.75, + "learning_rate": 4.673760798452936e-05, + "loss": 2.4147, + "step": 13610 + }, + { + "epoch": 1.0984585586312647, + "grad_norm": 0.6589440703392029, + "learning_rate": 4.6724247375330786e-05, + "loss": 2.4718, + "step": 13611 + }, + { + "epoch": 1.0985392623678476, + "grad_norm": 0.7032667994499207, + "learning_rate": 4.671088809390324e-05, + "loss": 2.4724, + "step": 13612 + }, + { + "epoch": 1.0986199661044307, + "grad_norm": 0.7544135451316833, + "learning_rate": 4.6697530140579646e-05, + "loss": 2.4804, + "step": 13613 + }, + { + "epoch": 1.0987006698410136, + "grad_norm": 0.6503081917762756, + "learning_rate": 4.668417351569295e-05, + "loss": 2.3829, + "step": 13614 + }, + { + "epoch": 1.0987813735775966, + "grad_norm": 0.6928786039352417, + "learning_rate": 4.667081821957605e-05, + "loss": 2.5678, + "step": 13615 + }, + { + "epoch": 1.0988620773141797, + "grad_norm": 0.6652864217758179, + "learning_rate": 4.665746425256173e-05, + "loss": 2.4585, + "step": 13616 + }, + { + "epoch": 1.0989427810507626, + "grad_norm": 0.700265109539032, + "learning_rate": 4.664411161498283e-05, + "loss": 2.4785, + "step": 13617 + }, + { + "epoch": 1.0990234847873457, + "grad_norm": 0.7443608045578003, + "learning_rate": 4.663076030717216e-05, + "loss": 2.4869, + "step": 13618 + }, + { + "epoch": 1.0991041885239285, + "grad_norm": 0.7037705779075623, + "learning_rate": 4.6617410329462477e-05, + "loss": 2.4518, + "step": 13619 + }, + { + "epoch": 1.0991848922605116, + "grad_norm": 0.7528365850448608, + "learning_rate": 4.660406168218643e-05, + "loss": 2.4616, + "step": 13620 + }, + { + "epoch": 1.0992655959970947, + "grad_norm": 0.7149221301078796, + "learning_rate": 4.659071436567676e-05, + "loss": 2.4661, + "step": 13621 + }, + { + "epoch": 1.0993462997336776, + "grad_norm": 0.7212862968444824, + "learning_rate": 4.657736838026608e-05, + "loss": 2.4424, + "step": 13622 + }, + { + "epoch": 1.0994270034702607, + "grad_norm": 0.6934216022491455, + "learning_rate": 4.6564023726287045e-05, + "loss": 2.4633, + "step": 13623 + }, + { + "epoch": 1.0995077072068438, + "grad_norm": 0.7244036793708801, + "learning_rate": 4.655068040407221e-05, + "loss": 2.409, + "step": 13624 + }, + { + "epoch": 1.0995884109434266, + "grad_norm": 0.6911318898200989, + "learning_rate": 4.653733841395419e-05, + "loss": 2.5117, + "step": 13625 + }, + { + "epoch": 1.0996691146800097, + "grad_norm": 0.7579816579818726, + "learning_rate": 4.65239977562654e-05, + "loss": 2.4927, + "step": 13626 + }, + { + "epoch": 1.0997498184165928, + "grad_norm": 0.7699651122093201, + "learning_rate": 4.651065843133837e-05, + "loss": 2.4083, + "step": 13627 + }, + { + "epoch": 1.0998305221531757, + "grad_norm": 0.6669431328773499, + "learning_rate": 4.649732043950561e-05, + "loss": 2.4402, + "step": 13628 + }, + { + "epoch": 1.0999112258897588, + "grad_norm": 0.7134940028190613, + "learning_rate": 4.6483983781099426e-05, + "loss": 2.4275, + "step": 13629 + }, + { + "epoch": 1.0999919296263416, + "grad_norm": 0.7107651233673096, + "learning_rate": 4.647064845645227e-05, + "loss": 2.4654, + "step": 13630 + }, + { + "epoch": 1.1000726333629247, + "grad_norm": 0.7101391553878784, + "learning_rate": 4.645731446589652e-05, + "loss": 2.4357, + "step": 13631 + }, + { + "epoch": 1.1001533370995078, + "grad_norm": 0.7511606216430664, + "learning_rate": 4.6443981809764405e-05, + "loss": 2.5016, + "step": 13632 + }, + { + "epoch": 1.1002340408360907, + "grad_norm": 0.7315953373908997, + "learning_rate": 4.6430650488388226e-05, + "loss": 2.4541, + "step": 13633 + }, + { + "epoch": 1.1003147445726738, + "grad_norm": 0.6701769232749939, + "learning_rate": 4.6417320502100316e-05, + "loss": 2.4071, + "step": 13634 + }, + { + "epoch": 1.1003954483092566, + "grad_norm": 0.7164294123649597, + "learning_rate": 4.6403991851232876e-05, + "loss": 2.478, + "step": 13635 + }, + { + "epoch": 1.1004761520458397, + "grad_norm": 0.7003894448280334, + "learning_rate": 4.639066453611802e-05, + "loss": 2.4686, + "step": 13636 + }, + { + "epoch": 1.1005568557824228, + "grad_norm": 0.6855250000953674, + "learning_rate": 4.6377338557087957e-05, + "loss": 2.4531, + "step": 13637 + }, + { + "epoch": 1.1006375595190057, + "grad_norm": 0.6581299901008606, + "learning_rate": 4.6364013914474816e-05, + "loss": 2.4511, + "step": 13638 + }, + { + "epoch": 1.1007182632555887, + "grad_norm": 0.7599080204963684, + "learning_rate": 4.6350690608610604e-05, + "loss": 2.5143, + "step": 13639 + }, + { + "epoch": 1.1007989669921718, + "grad_norm": 0.7029981017112732, + "learning_rate": 4.633736863982744e-05, + "loss": 2.4541, + "step": 13640 + }, + { + "epoch": 1.1008796707287547, + "grad_norm": 0.7378708720207214, + "learning_rate": 4.6324048008457357e-05, + "loss": 2.4319, + "step": 13641 + }, + { + "epoch": 1.1009603744653378, + "grad_norm": 0.7087826728820801, + "learning_rate": 4.631072871483226e-05, + "loss": 2.4148, + "step": 13642 + }, + { + "epoch": 1.1010410782019207, + "grad_norm": 0.7000819444656372, + "learning_rate": 4.629741075928415e-05, + "loss": 2.4692, + "step": 13643 + }, + { + "epoch": 1.1011217819385037, + "grad_norm": 0.7363965511322021, + "learning_rate": 4.628409414214496e-05, + "loss": 2.4584, + "step": 13644 + }, + { + "epoch": 1.1012024856750868, + "grad_norm": 0.6691753268241882, + "learning_rate": 4.627077886374656e-05, + "loss": 2.4356, + "step": 13645 + }, + { + "epoch": 1.1012831894116697, + "grad_norm": 0.6864185929298401, + "learning_rate": 4.625746492442078e-05, + "loss": 2.4713, + "step": 13646 + }, + { + "epoch": 1.1013638931482528, + "grad_norm": 0.714318573474884, + "learning_rate": 4.624415232449947e-05, + "loss": 2.4482, + "step": 13647 + }, + { + "epoch": 1.1014445968848359, + "grad_norm": 0.6383495330810547, + "learning_rate": 4.623084106431444e-05, + "loss": 2.4248, + "step": 13648 + }, + { + "epoch": 1.1015253006214187, + "grad_norm": 0.7014495730400085, + "learning_rate": 4.6217531144197365e-05, + "loss": 2.4393, + "step": 13649 + }, + { + "epoch": 1.1016060043580018, + "grad_norm": 0.8128634095191956, + "learning_rate": 4.620422256448e-05, + "loss": 2.4741, + "step": 13650 + }, + { + "epoch": 1.1016867080945847, + "grad_norm": 0.7333208322525024, + "learning_rate": 4.619091532549408e-05, + "loss": 2.4288, + "step": 13651 + }, + { + "epoch": 1.1017674118311678, + "grad_norm": 0.7023218274116516, + "learning_rate": 4.617760942757117e-05, + "loss": 2.5025, + "step": 13652 + }, + { + "epoch": 1.1018481155677509, + "grad_norm": 0.6420873403549194, + "learning_rate": 4.616430487104292e-05, + "loss": 2.4165, + "step": 13653 + }, + { + "epoch": 1.1019288193043337, + "grad_norm": 0.6767684817314148, + "learning_rate": 4.615100165624092e-05, + "loss": 2.4642, + "step": 13654 + }, + { + "epoch": 1.1020095230409168, + "grad_norm": 0.7361159920692444, + "learning_rate": 4.613769978349672e-05, + "loss": 2.5343, + "step": 13655 + }, + { + "epoch": 1.1020902267775, + "grad_norm": 0.6642624735832214, + "learning_rate": 4.6124399253141846e-05, + "loss": 2.3769, + "step": 13656 + }, + { + "epoch": 1.1021709305140828, + "grad_norm": 0.6912256479263306, + "learning_rate": 4.611110006550781e-05, + "loss": 2.455, + "step": 13657 + }, + { + "epoch": 1.1022516342506659, + "grad_norm": 0.7419310212135315, + "learning_rate": 4.609780222092599e-05, + "loss": 2.4171, + "step": 13658 + }, + { + "epoch": 1.1023323379872487, + "grad_norm": 0.718953549861908, + "learning_rate": 4.6084505719727835e-05, + "loss": 2.4791, + "step": 13659 + }, + { + "epoch": 1.1024130417238318, + "grad_norm": 0.7904248237609863, + "learning_rate": 4.607121056224477e-05, + "loss": 2.4429, + "step": 13660 + }, + { + "epoch": 1.102493745460415, + "grad_norm": 0.6743534803390503, + "learning_rate": 4.605791674880808e-05, + "loss": 2.4481, + "step": 13661 + }, + { + "epoch": 1.1025744491969978, + "grad_norm": 0.6829143166542053, + "learning_rate": 4.6044624279749106e-05, + "loss": 2.4078, + "step": 13662 + }, + { + "epoch": 1.1026551529335809, + "grad_norm": 0.6803167462348938, + "learning_rate": 4.6031333155399136e-05, + "loss": 2.4509, + "step": 13663 + }, + { + "epoch": 1.1027358566701637, + "grad_norm": 0.7474592328071594, + "learning_rate": 4.601804337608943e-05, + "loss": 2.4563, + "step": 13664 + }, + { + "epoch": 1.1028165604067468, + "grad_norm": 0.6753630042076111, + "learning_rate": 4.6004754942151174e-05, + "loss": 2.4285, + "step": 13665 + }, + { + "epoch": 1.10289726414333, + "grad_norm": 0.7990161180496216, + "learning_rate": 4.599146785391558e-05, + "loss": 2.4907, + "step": 13666 + }, + { + "epoch": 1.1029779678799128, + "grad_norm": 0.8161290287971497, + "learning_rate": 4.597818211171383e-05, + "loss": 2.4599, + "step": 13667 + }, + { + "epoch": 1.1030586716164958, + "grad_norm": 0.6813610792160034, + "learning_rate": 4.596489771587695e-05, + "loss": 2.4484, + "step": 13668 + }, + { + "epoch": 1.103139375353079, + "grad_norm": 0.6598966121673584, + "learning_rate": 4.5951614666736076e-05, + "loss": 2.4326, + "step": 13669 + }, + { + "epoch": 1.1032200790896618, + "grad_norm": 0.7084827423095703, + "learning_rate": 4.593833296462228e-05, + "loss": 2.4188, + "step": 13670 + }, + { + "epoch": 1.1033007828262449, + "grad_norm": 0.6876685619354248, + "learning_rate": 4.59250526098665e-05, + "loss": 2.4482, + "step": 13671 + }, + { + "epoch": 1.103381486562828, + "grad_norm": 0.7292699813842773, + "learning_rate": 4.591177360279978e-05, + "loss": 2.4452, + "step": 13672 + }, + { + "epoch": 1.1034621902994108, + "grad_norm": 0.7057675123214722, + "learning_rate": 4.589849594375304e-05, + "loss": 2.4336, + "step": 13673 + }, + { + "epoch": 1.103542894035994, + "grad_norm": 0.7684180736541748, + "learning_rate": 4.5885219633057196e-05, + "loss": 2.4453, + "step": 13674 + }, + { + "epoch": 1.1036235977725768, + "grad_norm": 0.7107112407684326, + "learning_rate": 4.5871944671043154e-05, + "loss": 2.4116, + "step": 13675 + }, + { + "epoch": 1.1037043015091599, + "grad_norm": 0.659501314163208, + "learning_rate": 4.585867105804177e-05, + "loss": 2.4907, + "step": 13676 + }, + { + "epoch": 1.103785005245743, + "grad_norm": 0.7553967833518982, + "learning_rate": 4.5845398794383786e-05, + "loss": 2.3982, + "step": 13677 + }, + { + "epoch": 1.1038657089823258, + "grad_norm": 0.6861104965209961, + "learning_rate": 4.583212788040003e-05, + "loss": 2.416, + "step": 13678 + }, + { + "epoch": 1.103946412718909, + "grad_norm": 0.6546811461448669, + "learning_rate": 4.5818858316421254e-05, + "loss": 2.4506, + "step": 13679 + }, + { + "epoch": 1.1040271164554918, + "grad_norm": 0.7012909650802612, + "learning_rate": 4.58055901027782e-05, + "loss": 2.439, + "step": 13680 + }, + { + "epoch": 1.1041078201920749, + "grad_norm": 0.7594780325889587, + "learning_rate": 4.5792323239801446e-05, + "loss": 2.4437, + "step": 13681 + }, + { + "epoch": 1.104188523928658, + "grad_norm": 0.6576492190361023, + "learning_rate": 4.577905772782172e-05, + "loss": 2.443, + "step": 13682 + }, + { + "epoch": 1.1042692276652408, + "grad_norm": 0.6751925349235535, + "learning_rate": 4.576579356716963e-05, + "loss": 2.507, + "step": 13683 + }, + { + "epoch": 1.104349931401824, + "grad_norm": 0.7206710577011108, + "learning_rate": 4.575253075817567e-05, + "loss": 2.4236, + "step": 13684 + }, + { + "epoch": 1.104430635138407, + "grad_norm": 0.7736170291900635, + "learning_rate": 4.5739269301170485e-05, + "loss": 2.4095, + "step": 13685 + }, + { + "epoch": 1.1045113388749899, + "grad_norm": 0.6901736855506897, + "learning_rate": 4.572600919648457e-05, + "loss": 2.4519, + "step": 13686 + }, + { + "epoch": 1.104592042611573, + "grad_norm": 0.7762539982795715, + "learning_rate": 4.571275044444836e-05, + "loss": 2.5018, + "step": 13687 + }, + { + "epoch": 1.1046727463481558, + "grad_norm": 0.7231423854827881, + "learning_rate": 4.569949304539232e-05, + "loss": 2.4553, + "step": 13688 + }, + { + "epoch": 1.104753450084739, + "grad_norm": 0.7713531255722046, + "learning_rate": 4.568623699964688e-05, + "loss": 2.49, + "step": 13689 + }, + { + "epoch": 1.104834153821322, + "grad_norm": 0.7355079650878906, + "learning_rate": 4.5672982307542354e-05, + "loss": 2.5191, + "step": 13690 + }, + { + "epoch": 1.1049148575579049, + "grad_norm": 0.6916452050209045, + "learning_rate": 4.565972896940913e-05, + "loss": 2.3867, + "step": 13691 + }, + { + "epoch": 1.104995561294488, + "grad_norm": 0.6622549295425415, + "learning_rate": 4.5646476985577544e-05, + "loss": 2.4364, + "step": 13692 + }, + { + "epoch": 1.105076265031071, + "grad_norm": 0.6683297157287598, + "learning_rate": 4.563322635637779e-05, + "loss": 2.43, + "step": 13693 + }, + { + "epoch": 1.105156968767654, + "grad_norm": 0.6857880353927612, + "learning_rate": 4.561997708214015e-05, + "loss": 2.4515, + "step": 13694 + }, + { + "epoch": 1.105237672504237, + "grad_norm": 0.7473817467689514, + "learning_rate": 4.5606729163194807e-05, + "loss": 2.442, + "step": 13695 + }, + { + "epoch": 1.1053183762408199, + "grad_norm": 0.6988846063613892, + "learning_rate": 4.559348259987203e-05, + "loss": 2.3886, + "step": 13696 + }, + { + "epoch": 1.105399079977403, + "grad_norm": 0.6450650691986084, + "learning_rate": 4.5580237392501836e-05, + "loss": 2.4647, + "step": 13697 + }, + { + "epoch": 1.105479783713986, + "grad_norm": 0.7669623494148254, + "learning_rate": 4.556699354141439e-05, + "loss": 2.4362, + "step": 13698 + }, + { + "epoch": 1.105560487450569, + "grad_norm": 0.7019730806350708, + "learning_rate": 4.55537510469398e-05, + "loss": 2.49, + "step": 13699 + }, + { + "epoch": 1.105641191187152, + "grad_norm": 0.6736636757850647, + "learning_rate": 4.5540509909408e-05, + "loss": 2.43, + "step": 13700 + }, + { + "epoch": 1.105721894923735, + "grad_norm": 0.6872034668922424, + "learning_rate": 4.552727012914907e-05, + "loss": 2.4507, + "step": 13701 + }, + { + "epoch": 1.105802598660318, + "grad_norm": 0.6726621985435486, + "learning_rate": 4.5514031706492986e-05, + "loss": 2.4193, + "step": 13702 + }, + { + "epoch": 1.105883302396901, + "grad_norm": 0.7345453500747681, + "learning_rate": 4.550079464176963e-05, + "loss": 2.4257, + "step": 13703 + }, + { + "epoch": 1.105964006133484, + "grad_norm": 0.6764804124832153, + "learning_rate": 4.548755893530894e-05, + "loss": 2.4656, + "step": 13704 + }, + { + "epoch": 1.106044709870067, + "grad_norm": 0.6915058493614197, + "learning_rate": 4.5474324587440766e-05, + "loss": 2.4148, + "step": 13705 + }, + { + "epoch": 1.10612541360665, + "grad_norm": 0.7960236668586731, + "learning_rate": 4.5461091598494954e-05, + "loss": 2.4148, + "step": 13706 + }, + { + "epoch": 1.106206117343233, + "grad_norm": 0.7058970928192139, + "learning_rate": 4.544785996880131e-05, + "loss": 2.4795, + "step": 13707 + }, + { + "epoch": 1.106286821079816, + "grad_norm": 0.6979549527168274, + "learning_rate": 4.5434629698689634e-05, + "loss": 2.4329, + "step": 13708 + }, + { + "epoch": 1.1063675248163989, + "grad_norm": 0.6805241107940674, + "learning_rate": 4.5421400788489586e-05, + "loss": 2.4303, + "step": 13709 + }, + { + "epoch": 1.106448228552982, + "grad_norm": 0.7566354274749756, + "learning_rate": 4.5408173238530905e-05, + "loss": 2.4769, + "step": 13710 + }, + { + "epoch": 1.106528932289565, + "grad_norm": 0.647773802280426, + "learning_rate": 4.539494704914324e-05, + "loss": 2.4037, + "step": 13711 + }, + { + "epoch": 1.106609636026148, + "grad_norm": 0.7248135209083557, + "learning_rate": 4.538172222065628e-05, + "loss": 2.4366, + "step": 13712 + }, + { + "epoch": 1.106690339762731, + "grad_norm": 0.6861057281494141, + "learning_rate": 4.536849875339953e-05, + "loss": 2.456, + "step": 13713 + }, + { + "epoch": 1.106771043499314, + "grad_norm": 0.7386166453361511, + "learning_rate": 4.5355276647702605e-05, + "loss": 2.4806, + "step": 13714 + }, + { + "epoch": 1.106851747235897, + "grad_norm": 0.664402961730957, + "learning_rate": 4.534205590389503e-05, + "loss": 2.4846, + "step": 13715 + }, + { + "epoch": 1.10693245097248, + "grad_norm": 0.8123969435691833, + "learning_rate": 4.5328836522306296e-05, + "loss": 2.4945, + "step": 13716 + }, + { + "epoch": 1.1070131547090631, + "grad_norm": 0.7375624775886536, + "learning_rate": 4.5315618503265865e-05, + "loss": 2.4533, + "step": 13717 + }, + { + "epoch": 1.107093858445646, + "grad_norm": 0.70960932970047, + "learning_rate": 4.53024018471032e-05, + "loss": 2.4351, + "step": 13718 + }, + { + "epoch": 1.107174562182229, + "grad_norm": 0.7170885801315308, + "learning_rate": 4.5289186554147645e-05, + "loss": 2.4654, + "step": 13719 + }, + { + "epoch": 1.107255265918812, + "grad_norm": 0.6986895203590393, + "learning_rate": 4.5275972624728556e-05, + "loss": 2.4079, + "step": 13720 + }, + { + "epoch": 1.107335969655395, + "grad_norm": 0.6948813796043396, + "learning_rate": 4.526276005917532e-05, + "loss": 2.4981, + "step": 13721 + }, + { + "epoch": 1.1074166733919781, + "grad_norm": 0.7719457149505615, + "learning_rate": 4.524954885781717e-05, + "loss": 2.4853, + "step": 13722 + }, + { + "epoch": 1.107497377128561, + "grad_norm": 0.652686357498169, + "learning_rate": 4.5236339020983363e-05, + "loss": 2.3672, + "step": 13723 + }, + { + "epoch": 1.107578080865144, + "grad_norm": 0.7517427802085876, + "learning_rate": 4.5223130549003144e-05, + "loss": 2.3947, + "step": 13724 + }, + { + "epoch": 1.107658784601727, + "grad_norm": 0.6755498647689819, + "learning_rate": 4.5209923442205705e-05, + "loss": 2.4173, + "step": 13725 + }, + { + "epoch": 1.10773948833831, + "grad_norm": 0.6801806688308716, + "learning_rate": 4.519671770092019e-05, + "loss": 2.4366, + "step": 13726 + }, + { + "epoch": 1.1078201920748931, + "grad_norm": 0.6665045619010925, + "learning_rate": 4.5183513325475724e-05, + "loss": 2.4797, + "step": 13727 + }, + { + "epoch": 1.107900895811476, + "grad_norm": 0.7303451299667358, + "learning_rate": 4.517031031620145e-05, + "loss": 2.4487, + "step": 13728 + }, + { + "epoch": 1.107981599548059, + "grad_norm": 0.7241206765174866, + "learning_rate": 4.515710867342632e-05, + "loss": 2.4632, + "step": 13729 + }, + { + "epoch": 1.1080623032846422, + "grad_norm": 0.738835334777832, + "learning_rate": 4.514390839747941e-05, + "loss": 2.3937, + "step": 13730 + }, + { + "epoch": 1.108143007021225, + "grad_norm": 0.7062843441963196, + "learning_rate": 4.5130709488689726e-05, + "loss": 2.4576, + "step": 13731 + }, + { + "epoch": 1.1082237107578081, + "grad_norm": 0.7074100971221924, + "learning_rate": 4.511751194738616e-05, + "loss": 2.4843, + "step": 13732 + }, + { + "epoch": 1.108304414494391, + "grad_norm": 0.751742959022522, + "learning_rate": 4.510431577389765e-05, + "loss": 2.4607, + "step": 13733 + }, + { + "epoch": 1.108385118230974, + "grad_norm": 0.7370054125785828, + "learning_rate": 4.50911209685531e-05, + "loss": 2.4877, + "step": 13734 + }, + { + "epoch": 1.1084658219675572, + "grad_norm": 0.6410251259803772, + "learning_rate": 4.507792753168135e-05, + "loss": 2.4254, + "step": 13735 + }, + { + "epoch": 1.10854652570414, + "grad_norm": 0.7141317129135132, + "learning_rate": 4.506473546361121e-05, + "loss": 2.4962, + "step": 13736 + }, + { + "epoch": 1.1086272294407231, + "grad_norm": 0.6903412342071533, + "learning_rate": 4.50515447646715e-05, + "loss": 2.4315, + "step": 13737 + }, + { + "epoch": 1.1087079331773062, + "grad_norm": 0.7068564891815186, + "learning_rate": 4.50383554351909e-05, + "loss": 2.5795, + "step": 13738 + }, + { + "epoch": 1.108788636913889, + "grad_norm": 0.6880627274513245, + "learning_rate": 4.5025167475498154e-05, + "loss": 2.4399, + "step": 13739 + }, + { + "epoch": 1.1088693406504722, + "grad_norm": 0.6721192598342896, + "learning_rate": 4.5011980885921965e-05, + "loss": 2.4651, + "step": 13740 + }, + { + "epoch": 1.108950044387055, + "grad_norm": 0.7084259986877441, + "learning_rate": 4.499879566679093e-05, + "loss": 2.4121, + "step": 13741 + }, + { + "epoch": 1.109030748123638, + "grad_norm": 0.6809335947036743, + "learning_rate": 4.498561181843368e-05, + "loss": 2.4714, + "step": 13742 + }, + { + "epoch": 1.1091114518602212, + "grad_norm": 0.690416693687439, + "learning_rate": 4.497242934117879e-05, + "loss": 2.4744, + "step": 13743 + }, + { + "epoch": 1.109192155596804, + "grad_norm": 0.728522002696991, + "learning_rate": 4.495924823535483e-05, + "loss": 2.4374, + "step": 13744 + }, + { + "epoch": 1.1092728593333872, + "grad_norm": 0.7000796794891357, + "learning_rate": 4.494606850129026e-05, + "loss": 2.4635, + "step": 13745 + }, + { + "epoch": 1.1093535630699702, + "grad_norm": 0.824645459651947, + "learning_rate": 4.493289013931353e-05, + "loss": 2.3724, + "step": 13746 + }, + { + "epoch": 1.109434266806553, + "grad_norm": 0.6561198830604553, + "learning_rate": 4.491971314975321e-05, + "loss": 2.3726, + "step": 13747 + }, + { + "epoch": 1.1095149705431362, + "grad_norm": 0.7067599892616272, + "learning_rate": 4.490653753293757e-05, + "loss": 2.4285, + "step": 13748 + }, + { + "epoch": 1.109595674279719, + "grad_norm": 0.6954898834228516, + "learning_rate": 4.489336328919503e-05, + "loss": 2.4252, + "step": 13749 + }, + { + "epoch": 1.1096763780163021, + "grad_norm": 0.6683667302131653, + "learning_rate": 4.4880190418853974e-05, + "loss": 2.4815, + "step": 13750 + }, + { + "epoch": 1.1097570817528852, + "grad_norm": 0.7554971575737, + "learning_rate": 4.486701892224261e-05, + "loss": 2.5036, + "step": 13751 + }, + { + "epoch": 1.109837785489468, + "grad_norm": 0.7043242454528809, + "learning_rate": 4.485384879968926e-05, + "loss": 2.3757, + "step": 13752 + }, + { + "epoch": 1.1099184892260512, + "grad_norm": 0.8016893863677979, + "learning_rate": 4.4840680051522186e-05, + "loss": 2.4655, + "step": 13753 + }, + { + "epoch": 1.1099991929626343, + "grad_norm": 0.7022131085395813, + "learning_rate": 4.4827512678069515e-05, + "loss": 2.475, + "step": 13754 + }, + { + "epoch": 1.1100798966992171, + "grad_norm": 0.6963247656822205, + "learning_rate": 4.4814346679659455e-05, + "loss": 2.4866, + "step": 13755 + }, + { + "epoch": 1.1101606004358002, + "grad_norm": 0.6980907917022705, + "learning_rate": 4.4801182056620125e-05, + "loss": 2.4322, + "step": 13756 + }, + { + "epoch": 1.110241304172383, + "grad_norm": 0.68063884973526, + "learning_rate": 4.478801880927964e-05, + "loss": 2.426, + "step": 13757 + }, + { + "epoch": 1.1103220079089662, + "grad_norm": 0.7454195618629456, + "learning_rate": 4.477485693796605e-05, + "loss": 2.5042, + "step": 13758 + }, + { + "epoch": 1.1104027116455493, + "grad_norm": 0.685975193977356, + "learning_rate": 4.476169644300737e-05, + "loss": 2.4874, + "step": 13759 + }, + { + "epoch": 1.1104834153821321, + "grad_norm": 0.7060961723327637, + "learning_rate": 4.4748537324731664e-05, + "loss": 2.4126, + "step": 13760 + }, + { + "epoch": 1.1105641191187152, + "grad_norm": 0.6794416904449463, + "learning_rate": 4.4735379583466795e-05, + "loss": 2.4112, + "step": 13761 + }, + { + "epoch": 1.1106448228552983, + "grad_norm": 0.6854961514472961, + "learning_rate": 4.472222321954073e-05, + "loss": 2.4909, + "step": 13762 + }, + { + "epoch": 1.1107255265918812, + "grad_norm": 0.7660776972770691, + "learning_rate": 4.470906823328139e-05, + "loss": 2.5021, + "step": 13763 + }, + { + "epoch": 1.1108062303284643, + "grad_norm": 0.7027743458747864, + "learning_rate": 4.4695914625016564e-05, + "loss": 2.4375, + "step": 13764 + }, + { + "epoch": 1.1108869340650471, + "grad_norm": 0.6896719336509705, + "learning_rate": 4.468276239507413e-05, + "loss": 2.4574, + "step": 13765 + }, + { + "epoch": 1.1109676378016302, + "grad_norm": 0.685141384601593, + "learning_rate": 4.4669611543781844e-05, + "loss": 2.4311, + "step": 13766 + }, + { + "epoch": 1.1110483415382133, + "grad_norm": 0.7108263373374939, + "learning_rate": 4.465646207146746e-05, + "loss": 2.4565, + "step": 13767 + }, + { + "epoch": 1.1111290452747962, + "grad_norm": 0.63578861951828, + "learning_rate": 4.464331397845873e-05, + "loss": 2.449, + "step": 13768 + }, + { + "epoch": 1.1112097490113793, + "grad_norm": 0.6917306780815125, + "learning_rate": 4.463016726508335e-05, + "loss": 2.4681, + "step": 13769 + }, + { + "epoch": 1.1112904527479621, + "grad_norm": 0.7328054308891296, + "learning_rate": 4.4617021931668914e-05, + "loss": 2.404, + "step": 13770 + }, + { + "epoch": 1.1113711564845452, + "grad_norm": 0.6501660943031311, + "learning_rate": 4.460387797854305e-05, + "loss": 2.4228, + "step": 13771 + }, + { + "epoch": 1.1114518602211283, + "grad_norm": 0.6656771302223206, + "learning_rate": 4.459073540603336e-05, + "loss": 2.4814, + "step": 13772 + }, + { + "epoch": 1.1115325639577112, + "grad_norm": 0.671017587184906, + "learning_rate": 4.457759421446742e-05, + "loss": 2.4605, + "step": 13773 + }, + { + "epoch": 1.1116132676942942, + "grad_norm": 0.6715343594551086, + "learning_rate": 4.456445440417267e-05, + "loss": 2.424, + "step": 13774 + }, + { + "epoch": 1.1116939714308773, + "grad_norm": 0.7051515579223633, + "learning_rate": 4.4551315975476626e-05, + "loss": 2.4358, + "step": 13775 + }, + { + "epoch": 1.1117746751674602, + "grad_norm": 0.7810437679290771, + "learning_rate": 4.453817892870673e-05, + "loss": 2.4718, + "step": 13776 + }, + { + "epoch": 1.1118553789040433, + "grad_norm": 0.7072561383247375, + "learning_rate": 4.4525043264190405e-05, + "loss": 2.4429, + "step": 13777 + }, + { + "epoch": 1.1119360826406264, + "grad_norm": 0.7949702143669128, + "learning_rate": 4.4511908982255e-05, + "loss": 2.4413, + "step": 13778 + }, + { + "epoch": 1.1120167863772092, + "grad_norm": 0.6716235876083374, + "learning_rate": 4.449877608322792e-05, + "loss": 2.427, + "step": 13779 + }, + { + "epoch": 1.1120974901137923, + "grad_norm": 0.7332563996315002, + "learning_rate": 4.448564456743638e-05, + "loss": 2.4567, + "step": 13780 + }, + { + "epoch": 1.1121781938503752, + "grad_norm": 0.7264607548713684, + "learning_rate": 4.447251443520769e-05, + "loss": 2.4844, + "step": 13781 + }, + { + "epoch": 1.1122588975869583, + "grad_norm": 0.7819967865943909, + "learning_rate": 4.4459385686869136e-05, + "loss": 2.5129, + "step": 13782 + }, + { + "epoch": 1.1123396013235414, + "grad_norm": 0.7587651610374451, + "learning_rate": 4.4446258322747824e-05, + "loss": 2.4714, + "step": 13783 + }, + { + "epoch": 1.1124203050601242, + "grad_norm": 0.6392871141433716, + "learning_rate": 4.443313234317099e-05, + "loss": 2.462, + "step": 13784 + }, + { + "epoch": 1.1125010087967073, + "grad_norm": 0.6609585881233215, + "learning_rate": 4.442000774846574e-05, + "loss": 2.4566, + "step": 13785 + }, + { + "epoch": 1.1125817125332902, + "grad_norm": 0.762924075126648, + "learning_rate": 4.440688453895919e-05, + "loss": 2.4613, + "step": 13786 + }, + { + "epoch": 1.1126624162698733, + "grad_norm": 0.7096089124679565, + "learning_rate": 4.4393762714978394e-05, + "loss": 2.4195, + "step": 13787 + }, + { + "epoch": 1.1127431200064564, + "grad_norm": 0.6663284301757812, + "learning_rate": 4.438064227685039e-05, + "loss": 2.422, + "step": 13788 + }, + { + "epoch": 1.1128238237430392, + "grad_norm": 0.6653628945350647, + "learning_rate": 4.436752322490221e-05, + "loss": 2.4477, + "step": 13789 + }, + { + "epoch": 1.1129045274796223, + "grad_norm": 0.6527605056762695, + "learning_rate": 4.435440555946073e-05, + "loss": 2.3874, + "step": 13790 + }, + { + "epoch": 1.1129852312162054, + "grad_norm": 0.6801275014877319, + "learning_rate": 4.4341289280852935e-05, + "loss": 2.4474, + "step": 13791 + }, + { + "epoch": 1.1130659349527883, + "grad_norm": 0.729905366897583, + "learning_rate": 4.432817438940574e-05, + "loss": 2.4711, + "step": 13792 + }, + { + "epoch": 1.1131466386893714, + "grad_norm": 0.7074751853942871, + "learning_rate": 4.431506088544593e-05, + "loss": 2.451, + "step": 13793 + }, + { + "epoch": 1.1132273424259542, + "grad_norm": 0.7241154313087463, + "learning_rate": 4.430194876930035e-05, + "loss": 2.4883, + "step": 13794 + }, + { + "epoch": 1.1133080461625373, + "grad_norm": 0.6549142003059387, + "learning_rate": 4.428883804129586e-05, + "loss": 2.4243, + "step": 13795 + }, + { + "epoch": 1.1133887498991204, + "grad_norm": 0.7046780586242676, + "learning_rate": 4.427572870175907e-05, + "loss": 2.4143, + "step": 13796 + }, + { + "epoch": 1.1134694536357033, + "grad_norm": 0.6563952565193176, + "learning_rate": 4.426262075101682e-05, + "loss": 2.416, + "step": 13797 + }, + { + "epoch": 1.1135501573722864, + "grad_norm": 0.7002081871032715, + "learning_rate": 4.4249514189395803e-05, + "loss": 2.3673, + "step": 13798 + }, + { + "epoch": 1.1136308611088694, + "grad_norm": 0.6766571998596191, + "learning_rate": 4.423640901722259e-05, + "loss": 2.4941, + "step": 13799 + }, + { + "epoch": 1.1137115648454523, + "grad_norm": 0.7404381632804871, + "learning_rate": 4.422330523482383e-05, + "loss": 2.4794, + "step": 13800 + }, + { + "epoch": 1.1137922685820354, + "grad_norm": 0.6670998930931091, + "learning_rate": 4.421020284252614e-05, + "loss": 2.5131, + "step": 13801 + }, + { + "epoch": 1.1138729723186183, + "grad_norm": 0.803720235824585, + "learning_rate": 4.4197101840655995e-05, + "loss": 2.4751, + "step": 13802 + }, + { + "epoch": 1.1139536760552013, + "grad_norm": 0.6532074809074402, + "learning_rate": 4.4184002229539947e-05, + "loss": 2.4147, + "step": 13803 + }, + { + "epoch": 1.1140343797917844, + "grad_norm": 0.6548035144805908, + "learning_rate": 4.417090400950447e-05, + "loss": 2.4601, + "step": 13804 + }, + { + "epoch": 1.1141150835283673, + "grad_norm": 0.6971763968467712, + "learning_rate": 4.415780718087603e-05, + "loss": 2.4752, + "step": 13805 + }, + { + "epoch": 1.1141957872649504, + "grad_norm": 0.6624024510383606, + "learning_rate": 4.414471174398098e-05, + "loss": 2.4183, + "step": 13806 + }, + { + "epoch": 1.1142764910015335, + "grad_norm": 0.6571507453918457, + "learning_rate": 4.4131617699145714e-05, + "loss": 2.4747, + "step": 13807 + }, + { + "epoch": 1.1143571947381163, + "grad_norm": 0.7165808081626892, + "learning_rate": 4.411852504669658e-05, + "loss": 2.453, + "step": 13808 + }, + { + "epoch": 1.1144378984746994, + "grad_norm": 0.6708057522773743, + "learning_rate": 4.410543378695988e-05, + "loss": 2.4858, + "step": 13809 + }, + { + "epoch": 1.1145186022112823, + "grad_norm": 0.889302134513855, + "learning_rate": 4.409234392026187e-05, + "loss": 2.4333, + "step": 13810 + }, + { + "epoch": 1.1145993059478654, + "grad_norm": 0.7440677881240845, + "learning_rate": 4.407925544692884e-05, + "loss": 2.49, + "step": 13811 + }, + { + "epoch": 1.1146800096844485, + "grad_norm": 0.6688372492790222, + "learning_rate": 4.406616836728691e-05, + "loss": 2.4663, + "step": 13812 + }, + { + "epoch": 1.1147607134210313, + "grad_norm": 0.7108204364776611, + "learning_rate": 4.4053082681662264e-05, + "loss": 2.4843, + "step": 13813 + }, + { + "epoch": 1.1148414171576144, + "grad_norm": 0.7270475029945374, + "learning_rate": 4.4039998390381087e-05, + "loss": 2.4158, + "step": 13814 + }, + { + "epoch": 1.1149221208941973, + "grad_norm": 0.7243396639823914, + "learning_rate": 4.402691549376939e-05, + "loss": 2.3969, + "step": 13815 + }, + { + "epoch": 1.1150028246307804, + "grad_norm": 0.6687803268432617, + "learning_rate": 4.4013833992153285e-05, + "loss": 2.42, + "step": 13816 + }, + { + "epoch": 1.1150835283673635, + "grad_norm": 0.6892626285552979, + "learning_rate": 4.400075388585877e-05, + "loss": 2.4086, + "step": 13817 + }, + { + "epoch": 1.1151642321039463, + "grad_norm": 0.7556231021881104, + "learning_rate": 4.398767517521186e-05, + "loss": 2.4201, + "step": 13818 + }, + { + "epoch": 1.1152449358405294, + "grad_norm": 0.6872838735580444, + "learning_rate": 4.397459786053851e-05, + "loss": 2.4143, + "step": 13819 + }, + { + "epoch": 1.1153256395771125, + "grad_norm": 0.6681817770004272, + "learning_rate": 4.396152194216463e-05, + "loss": 2.4404, + "step": 13820 + }, + { + "epoch": 1.1154063433136954, + "grad_norm": 0.7107201218605042, + "learning_rate": 4.394844742041614e-05, + "loss": 2.4503, + "step": 13821 + }, + { + "epoch": 1.1154870470502785, + "grad_norm": 0.706541121006012, + "learning_rate": 4.3935374295618824e-05, + "loss": 2.5106, + "step": 13822 + }, + { + "epoch": 1.1155677507868615, + "grad_norm": 0.6659905910491943, + "learning_rate": 4.392230256809854e-05, + "loss": 2.3839, + "step": 13823 + }, + { + "epoch": 1.1156484545234444, + "grad_norm": 0.7125810980796814, + "learning_rate": 4.3909232238181095e-05, + "loss": 2.4463, + "step": 13824 + }, + { + "epoch": 1.1157291582600275, + "grad_norm": 0.6581901907920837, + "learning_rate": 4.389616330619217e-05, + "loss": 2.4004, + "step": 13825 + }, + { + "epoch": 1.1158098619966104, + "grad_norm": 0.7660872340202332, + "learning_rate": 4.388309577245752e-05, + "loss": 2.4685, + "step": 13826 + }, + { + "epoch": 1.1158905657331935, + "grad_norm": 0.699526846408844, + "learning_rate": 4.387002963730281e-05, + "loss": 2.4131, + "step": 13827 + }, + { + "epoch": 1.1159712694697765, + "grad_norm": 0.7031015753746033, + "learning_rate": 4.3856964901053685e-05, + "loss": 2.4476, + "step": 13828 + }, + { + "epoch": 1.1160519732063594, + "grad_norm": 0.6876828074455261, + "learning_rate": 4.384390156403575e-05, + "loss": 2.4402, + "step": 13829 + }, + { + "epoch": 1.1161326769429425, + "grad_norm": 0.7188935279846191, + "learning_rate": 4.3830839626574626e-05, + "loss": 2.4473, + "step": 13830 + }, + { + "epoch": 1.1162133806795254, + "grad_norm": 0.6825287938117981, + "learning_rate": 4.381777908899577e-05, + "loss": 2.4757, + "step": 13831 + }, + { + "epoch": 1.1162940844161084, + "grad_norm": 0.718267560005188, + "learning_rate": 4.380471995162472e-05, + "loss": 2.483, + "step": 13832 + }, + { + "epoch": 1.1163747881526915, + "grad_norm": 0.6526767611503601, + "learning_rate": 4.379166221478697e-05, + "loss": 2.4161, + "step": 13833 + }, + { + "epoch": 1.1164554918892744, + "grad_norm": 0.7541480660438538, + "learning_rate": 4.37786058788079e-05, + "loss": 2.4876, + "step": 13834 + }, + { + "epoch": 1.1165361956258575, + "grad_norm": 0.7144232988357544, + "learning_rate": 4.376555094401294e-05, + "loss": 2.4153, + "step": 13835 + }, + { + "epoch": 1.1166168993624406, + "grad_norm": 0.7544882297515869, + "learning_rate": 4.3752497410727445e-05, + "loss": 2.4634, + "step": 13836 + }, + { + "epoch": 1.1166976030990234, + "grad_norm": 0.7263267040252686, + "learning_rate": 4.373944527927674e-05, + "loss": 2.5189, + "step": 13837 + }, + { + "epoch": 1.1167783068356065, + "grad_norm": 0.7709252834320068, + "learning_rate": 4.3726394549986135e-05, + "loss": 2.5036, + "step": 13838 + }, + { + "epoch": 1.1168590105721894, + "grad_norm": 0.6849128007888794, + "learning_rate": 4.3713345223180866e-05, + "loss": 2.414, + "step": 13839 + }, + { + "epoch": 1.1169397143087725, + "grad_norm": 0.6807512044906616, + "learning_rate": 4.3700297299186224e-05, + "loss": 2.4924, + "step": 13840 + }, + { + "epoch": 1.1170204180453556, + "grad_norm": 0.6894977688789368, + "learning_rate": 4.3687250778327294e-05, + "loss": 2.4183, + "step": 13841 + }, + { + "epoch": 1.1171011217819384, + "grad_norm": 0.6657617092132568, + "learning_rate": 4.367420566092928e-05, + "loss": 2.448, + "step": 13842 + }, + { + "epoch": 1.1171818255185215, + "grad_norm": 0.7104446291923523, + "learning_rate": 4.366116194731733e-05, + "loss": 2.4862, + "step": 13843 + }, + { + "epoch": 1.1172625292551046, + "grad_norm": 0.7485257387161255, + "learning_rate": 4.3648119637816465e-05, + "loss": 2.4253, + "step": 13844 + }, + { + "epoch": 1.1173432329916875, + "grad_norm": 0.7079899907112122, + "learning_rate": 4.363507873275177e-05, + "loss": 2.4235, + "step": 13845 + }, + { + "epoch": 1.1174239367282706, + "grad_norm": 0.6891573667526245, + "learning_rate": 4.3622039232448274e-05, + "loss": 2.4382, + "step": 13846 + }, + { + "epoch": 1.1175046404648534, + "grad_norm": 0.6886103749275208, + "learning_rate": 4.360900113723086e-05, + "loss": 2.5115, + "step": 13847 + }, + { + "epoch": 1.1175853442014365, + "grad_norm": 0.7511457800865173, + "learning_rate": 4.35959644474246e-05, + "loss": 2.4071, + "step": 13848 + }, + { + "epoch": 1.1176660479380196, + "grad_norm": 0.6526182293891907, + "learning_rate": 4.358292916335437e-05, + "loss": 2.4242, + "step": 13849 + }, + { + "epoch": 1.1177467516746025, + "grad_norm": 0.7385138273239136, + "learning_rate": 4.356989528534499e-05, + "loss": 2.4459, + "step": 13850 + }, + { + "epoch": 1.1178274554111856, + "grad_norm": 0.6668610572814941, + "learning_rate": 4.355686281372132e-05, + "loss": 2.4188, + "step": 13851 + }, + { + "epoch": 1.1179081591477686, + "grad_norm": 0.6950691342353821, + "learning_rate": 4.354383174880818e-05, + "loss": 2.4339, + "step": 13852 + }, + { + "epoch": 1.1179888628843515, + "grad_norm": 0.7017496824264526, + "learning_rate": 4.3530802090930375e-05, + "loss": 2.4733, + "step": 13853 + }, + { + "epoch": 1.1180695666209346, + "grad_norm": 0.8118221759796143, + "learning_rate": 4.351777384041254e-05, + "loss": 2.4826, + "step": 13854 + }, + { + "epoch": 1.1181502703575175, + "grad_norm": 0.7233164310455322, + "learning_rate": 4.350474699757945e-05, + "loss": 2.4637, + "step": 13855 + }, + { + "epoch": 1.1182309740941005, + "grad_norm": 0.6354575157165527, + "learning_rate": 4.349172156275576e-05, + "loss": 2.4487, + "step": 13856 + }, + { + "epoch": 1.1183116778306836, + "grad_norm": 0.6776937246322632, + "learning_rate": 4.347869753626606e-05, + "loss": 2.4292, + "step": 13857 + }, + { + "epoch": 1.1183923815672665, + "grad_norm": 0.6656864881515503, + "learning_rate": 4.3465674918434953e-05, + "loss": 2.484, + "step": 13858 + }, + { + "epoch": 1.1184730853038496, + "grad_norm": 0.7659650444984436, + "learning_rate": 4.345265370958702e-05, + "loss": 2.4181, + "step": 13859 + }, + { + "epoch": 1.1185537890404325, + "grad_norm": 0.6546063423156738, + "learning_rate": 4.3439633910046764e-05, + "loss": 2.4657, + "step": 13860 + }, + { + "epoch": 1.1186344927770155, + "grad_norm": 0.6869762539863586, + "learning_rate": 4.342661552013869e-05, + "loss": 2.513, + "step": 13861 + }, + { + "epoch": 1.1187151965135986, + "grad_norm": 0.6633490324020386, + "learning_rate": 4.3413598540187275e-05, + "loss": 2.4716, + "step": 13862 + }, + { + "epoch": 1.1187959002501815, + "grad_norm": 0.7238267660140991, + "learning_rate": 4.340058297051687e-05, + "loss": 2.4353, + "step": 13863 + }, + { + "epoch": 1.1188766039867646, + "grad_norm": 0.67429119348526, + "learning_rate": 4.3387568811451875e-05, + "loss": 2.4808, + "step": 13864 + }, + { + "epoch": 1.1189573077233477, + "grad_norm": 0.6901153326034546, + "learning_rate": 4.33745560633167e-05, + "loss": 2.4785, + "step": 13865 + }, + { + "epoch": 1.1190380114599305, + "grad_norm": 0.7227689027786255, + "learning_rate": 4.336154472643556e-05, + "loss": 2.4414, + "step": 13866 + }, + { + "epoch": 1.1191187151965136, + "grad_norm": 0.713793933391571, + "learning_rate": 4.33485348011328e-05, + "loss": 2.5136, + "step": 13867 + }, + { + "epoch": 1.1191994189330967, + "grad_norm": 0.6495655179023743, + "learning_rate": 4.333552628773263e-05, + "loss": 2.4267, + "step": 13868 + }, + { + "epoch": 1.1192801226696796, + "grad_norm": 0.7265790104866028, + "learning_rate": 4.3322519186559274e-05, + "loss": 2.4406, + "step": 13869 + }, + { + "epoch": 1.1193608264062627, + "grad_norm": 0.6700571179389954, + "learning_rate": 4.330951349793688e-05, + "loss": 2.4457, + "step": 13870 + }, + { + "epoch": 1.1194415301428455, + "grad_norm": 0.7112334966659546, + "learning_rate": 4.3296509222189616e-05, + "loss": 2.4788, + "step": 13871 + }, + { + "epoch": 1.1195222338794286, + "grad_norm": 0.7056662440299988, + "learning_rate": 4.32835063596416e-05, + "loss": 2.5195, + "step": 13872 + }, + { + "epoch": 1.1196029376160117, + "grad_norm": 0.7198836207389832, + "learning_rate": 4.327050491061683e-05, + "loss": 2.4827, + "step": 13873 + }, + { + "epoch": 1.1196836413525946, + "grad_norm": 0.7384079694747925, + "learning_rate": 4.325750487543936e-05, + "loss": 2.4556, + "step": 13874 + }, + { + "epoch": 1.1197643450891777, + "grad_norm": 0.7315430641174316, + "learning_rate": 4.324450625443324e-05, + "loss": 2.4302, + "step": 13875 + }, + { + "epoch": 1.1198450488257605, + "grad_norm": 0.6692587733268738, + "learning_rate": 4.323150904792234e-05, + "loss": 2.5283, + "step": 13876 + }, + { + "epoch": 1.1199257525623436, + "grad_norm": 0.7407168745994568, + "learning_rate": 4.321851325623063e-05, + "loss": 2.4757, + "step": 13877 + }, + { + "epoch": 1.1200064562989267, + "grad_norm": 0.7387246489524841, + "learning_rate": 4.3205518879682e-05, + "loss": 2.5025, + "step": 13878 + }, + { + "epoch": 1.1200871600355096, + "grad_norm": 0.8058405518531799, + "learning_rate": 4.319252591860031e-05, + "loss": 2.4951, + "step": 13879 + }, + { + "epoch": 1.1201678637720927, + "grad_norm": 0.6964818835258484, + "learning_rate": 4.317953437330936e-05, + "loss": 2.4462, + "step": 13880 + }, + { + "epoch": 1.1202485675086757, + "grad_norm": 0.6904557347297668, + "learning_rate": 4.316654424413294e-05, + "loss": 2.3981, + "step": 13881 + }, + { + "epoch": 1.1203292712452586, + "grad_norm": 0.6555196046829224, + "learning_rate": 4.315355553139485e-05, + "loss": 2.418, + "step": 13882 + }, + { + "epoch": 1.1204099749818417, + "grad_norm": 0.7745094299316406, + "learning_rate": 4.3140568235418724e-05, + "loss": 2.4635, + "step": 13883 + }, + { + "epoch": 1.1204906787184246, + "grad_norm": 0.686676025390625, + "learning_rate": 4.312758235652825e-05, + "loss": 2.4847, + "step": 13884 + }, + { + "epoch": 1.1205713824550076, + "grad_norm": 0.6937002539634705, + "learning_rate": 4.311459789504714e-05, + "loss": 2.4632, + "step": 13885 + }, + { + "epoch": 1.1206520861915907, + "grad_norm": 0.7024590373039246, + "learning_rate": 4.310161485129891e-05, + "loss": 2.4268, + "step": 13886 + }, + { + "epoch": 1.1207327899281736, + "grad_norm": 0.6848484873771667, + "learning_rate": 4.308863322560717e-05, + "loss": 2.4895, + "step": 13887 + }, + { + "epoch": 1.1208134936647567, + "grad_norm": 0.7071602940559387, + "learning_rate": 4.307565301829546e-05, + "loss": 2.4348, + "step": 13888 + }, + { + "epoch": 1.1208941974013398, + "grad_norm": 0.6868199706077576, + "learning_rate": 4.3062674229687274e-05, + "loss": 2.4613, + "step": 13889 + }, + { + "epoch": 1.1209749011379226, + "grad_norm": 0.7283496260643005, + "learning_rate": 4.304969686010608e-05, + "loss": 2.478, + "step": 13890 + }, + { + "epoch": 1.1210556048745057, + "grad_norm": 0.6907255053520203, + "learning_rate": 4.303672090987535e-05, + "loss": 2.4431, + "step": 13891 + }, + { + "epoch": 1.1211363086110886, + "grad_norm": 0.675089418888092, + "learning_rate": 4.302374637931841e-05, + "loss": 2.4398, + "step": 13892 + }, + { + "epoch": 1.1212170123476717, + "grad_norm": 0.6929863095283508, + "learning_rate": 4.301077326875863e-05, + "loss": 2.3909, + "step": 13893 + }, + { + "epoch": 1.1212977160842548, + "grad_norm": 0.6746132969856262, + "learning_rate": 4.29978015785194e-05, + "loss": 2.4726, + "step": 13894 + }, + { + "epoch": 1.1213784198208376, + "grad_norm": 0.720781147480011, + "learning_rate": 4.298483130892392e-05, + "loss": 2.4445, + "step": 13895 + }, + { + "epoch": 1.1214591235574207, + "grad_norm": 0.6624416708946228, + "learning_rate": 4.297186246029549e-05, + "loss": 2.3868, + "step": 13896 + }, + { + "epoch": 1.1215398272940038, + "grad_norm": 0.7849127054214478, + "learning_rate": 4.295889503295731e-05, + "loss": 2.4479, + "step": 13897 + }, + { + "epoch": 1.1216205310305867, + "grad_norm": 0.6655337810516357, + "learning_rate": 4.294592902723259e-05, + "loss": 2.5093, + "step": 13898 + }, + { + "epoch": 1.1217012347671698, + "grad_norm": 0.7055402398109436, + "learning_rate": 4.293296444344445e-05, + "loss": 2.4385, + "step": 13899 + }, + { + "epoch": 1.1217819385037526, + "grad_norm": 0.7388767600059509, + "learning_rate": 4.2920001281916e-05, + "loss": 2.4863, + "step": 13900 + }, + { + "epoch": 1.1218626422403357, + "grad_norm": 0.6915223002433777, + "learning_rate": 4.2907039542970373e-05, + "loss": 2.4218, + "step": 13901 + }, + { + "epoch": 1.1219433459769188, + "grad_norm": 0.7124893665313721, + "learning_rate": 4.289407922693053e-05, + "loss": 2.4514, + "step": 13902 + }, + { + "epoch": 1.1220240497135017, + "grad_norm": 0.6552406549453735, + "learning_rate": 4.28811203341195e-05, + "loss": 2.4558, + "step": 13903 + }, + { + "epoch": 1.1221047534500848, + "grad_norm": 0.6641791462898254, + "learning_rate": 4.286816286486031e-05, + "loss": 2.4277, + "step": 13904 + }, + { + "epoch": 1.1221854571866678, + "grad_norm": 0.677733838558197, + "learning_rate": 4.285520681947579e-05, + "loss": 2.4861, + "step": 13905 + }, + { + "epoch": 1.1222661609232507, + "grad_norm": 0.6572888493537903, + "learning_rate": 4.284225219828891e-05, + "loss": 2.4657, + "step": 13906 + }, + { + "epoch": 1.1223468646598338, + "grad_norm": 0.6923860907554626, + "learning_rate": 4.2829299001622546e-05, + "loss": 2.4857, + "step": 13907 + }, + { + "epoch": 1.1224275683964167, + "grad_norm": 0.6971977949142456, + "learning_rate": 4.281634722979947e-05, + "loss": 2.4434, + "step": 13908 + }, + { + "epoch": 1.1225082721329998, + "grad_norm": 0.6828060746192932, + "learning_rate": 4.2803396883142456e-05, + "loss": 2.4342, + "step": 13909 + }, + { + "epoch": 1.1225889758695828, + "grad_norm": 0.7001270651817322, + "learning_rate": 4.279044796197438e-05, + "loss": 2.5222, + "step": 13910 + }, + { + "epoch": 1.1226696796061657, + "grad_norm": 0.6425578594207764, + "learning_rate": 4.277750046661785e-05, + "loss": 2.42, + "step": 13911 + }, + { + "epoch": 1.1227503833427488, + "grad_norm": 0.6498209834098816, + "learning_rate": 4.2764554397395585e-05, + "loss": 2.4448, + "step": 13912 + }, + { + "epoch": 1.1228310870793319, + "grad_norm": 0.6894031763076782, + "learning_rate": 4.275160975463025e-05, + "loss": 2.4508, + "step": 13913 + }, + { + "epoch": 1.1229117908159147, + "grad_norm": 0.7286608219146729, + "learning_rate": 4.273866653864448e-05, + "loss": 2.4557, + "step": 13914 + }, + { + "epoch": 1.1229924945524978, + "grad_norm": 0.753826379776001, + "learning_rate": 4.272572474976079e-05, + "loss": 2.4635, + "step": 13915 + }, + { + "epoch": 1.1230731982890807, + "grad_norm": 0.6715937256813049, + "learning_rate": 4.271278438830174e-05, + "loss": 2.5107, + "step": 13916 + }, + { + "epoch": 1.1231539020256638, + "grad_norm": 0.6833200454711914, + "learning_rate": 4.26998454545899e-05, + "loss": 2.4883, + "step": 13917 + }, + { + "epoch": 1.1232346057622469, + "grad_norm": 0.6763597130775452, + "learning_rate": 4.2686907948947666e-05, + "loss": 2.4178, + "step": 13918 + }, + { + "epoch": 1.1233153094988297, + "grad_norm": 0.7336227297782898, + "learning_rate": 4.26739718716975e-05, + "loss": 2.4542, + "step": 13919 + }, + { + "epoch": 1.1233960132354128, + "grad_norm": 0.6583260297775269, + "learning_rate": 4.2661037223161806e-05, + "loss": 2.3998, + "step": 13920 + }, + { + "epoch": 1.1234767169719957, + "grad_norm": 0.6444356441497803, + "learning_rate": 4.264810400366295e-05, + "loss": 2.4354, + "step": 13921 + }, + { + "epoch": 1.1235574207085788, + "grad_norm": 0.6786002516746521, + "learning_rate": 4.2635172213523255e-05, + "loss": 2.3989, + "step": 13922 + }, + { + "epoch": 1.1236381244451619, + "grad_norm": 0.6838372349739075, + "learning_rate": 4.262224185306507e-05, + "loss": 2.4431, + "step": 13923 + }, + { + "epoch": 1.1237188281817447, + "grad_norm": 0.7516793012619019, + "learning_rate": 4.260931292261056e-05, + "loss": 2.4373, + "step": 13924 + }, + { + "epoch": 1.1237995319183278, + "grad_norm": 0.6860260367393494, + "learning_rate": 4.2596385422481985e-05, + "loss": 2.4457, + "step": 13925 + }, + { + "epoch": 1.123880235654911, + "grad_norm": 0.6556448936462402, + "learning_rate": 4.2583459353001595e-05, + "loss": 2.4165, + "step": 13926 + }, + { + "epoch": 1.1239609393914938, + "grad_norm": 0.729131281375885, + "learning_rate": 4.257053471449144e-05, + "loss": 2.4124, + "step": 13927 + }, + { + "epoch": 1.1240416431280769, + "grad_norm": 0.6941910982131958, + "learning_rate": 4.2557611507273684e-05, + "loss": 2.4095, + "step": 13928 + }, + { + "epoch": 1.12412234686466, + "grad_norm": 0.6390536427497864, + "learning_rate": 4.25446897316704e-05, + "loss": 2.4221, + "step": 13929 + }, + { + "epoch": 1.1242030506012428, + "grad_norm": 0.7034881114959717, + "learning_rate": 4.253176938800365e-05, + "loss": 2.4685, + "step": 13930 + }, + { + "epoch": 1.124283754337826, + "grad_norm": 0.6975526809692383, + "learning_rate": 4.251885047659542e-05, + "loss": 2.4771, + "step": 13931 + }, + { + "epoch": 1.1243644580744088, + "grad_norm": 0.7020023465156555, + "learning_rate": 4.2505932997767695e-05, + "loss": 2.4746, + "step": 13932 + }, + { + "epoch": 1.1244451618109919, + "grad_norm": 0.7207093238830566, + "learning_rate": 4.2493016951842444e-05, + "loss": 2.4707, + "step": 13933 + }, + { + "epoch": 1.124525865547575, + "grad_norm": 0.7711251974105835, + "learning_rate": 4.24801023391415e-05, + "loss": 2.5104, + "step": 13934 + }, + { + "epoch": 1.1246065692841578, + "grad_norm": 0.7324040532112122, + "learning_rate": 4.246718915998677e-05, + "loss": 2.4257, + "step": 13935 + }, + { + "epoch": 1.124687273020741, + "grad_norm": 0.6532757878303528, + "learning_rate": 4.2454277414700116e-05, + "loss": 2.3708, + "step": 13936 + }, + { + "epoch": 1.1247679767573238, + "grad_norm": 0.6933012008666992, + "learning_rate": 4.244136710360325e-05, + "loss": 2.4985, + "step": 13937 + }, + { + "epoch": 1.1248486804939068, + "grad_norm": 0.6787589192390442, + "learning_rate": 4.242845822701798e-05, + "loss": 2.402, + "step": 13938 + }, + { + "epoch": 1.12492938423049, + "grad_norm": 0.6567786931991577, + "learning_rate": 4.241555078526602e-05, + "loss": 2.4295, + "step": 13939 + }, + { + "epoch": 1.1250100879670728, + "grad_norm": 0.6962547302246094, + "learning_rate": 4.2402644778669074e-05, + "loss": 2.4006, + "step": 13940 + }, + { + "epoch": 1.125090791703656, + "grad_norm": 0.7152721285820007, + "learning_rate": 4.238974020754877e-05, + "loss": 2.4757, + "step": 13941 + }, + { + "epoch": 1.125171495440239, + "grad_norm": 0.6869861483573914, + "learning_rate": 4.237683707222677e-05, + "loss": 2.3877, + "step": 13942 + }, + { + "epoch": 1.1252521991768218, + "grad_norm": 0.6951470971107483, + "learning_rate": 4.236393537302459e-05, + "loss": 2.3755, + "step": 13943 + }, + { + "epoch": 1.125332902913405, + "grad_norm": 0.6997567415237427, + "learning_rate": 4.2351035110263805e-05, + "loss": 2.4731, + "step": 13944 + }, + { + "epoch": 1.125413606649988, + "grad_norm": 0.6765854358673096, + "learning_rate": 4.23381362842659e-05, + "loss": 2.4004, + "step": 13945 + }, + { + "epoch": 1.1254943103865709, + "grad_norm": 0.7046722173690796, + "learning_rate": 4.2325238895352426e-05, + "loss": 2.4379, + "step": 13946 + }, + { + "epoch": 1.125575014123154, + "grad_norm": 0.6862985491752625, + "learning_rate": 4.231234294384472e-05, + "loss": 2.4614, + "step": 13947 + }, + { + "epoch": 1.1256557178597368, + "grad_norm": 0.6637778282165527, + "learning_rate": 4.229944843006422e-05, + "loss": 2.4412, + "step": 13948 + }, + { + "epoch": 1.12573642159632, + "grad_norm": 0.7042228579521179, + "learning_rate": 4.228655535433231e-05, + "loss": 2.4296, + "step": 13949 + }, + { + "epoch": 1.1258171253329028, + "grad_norm": 0.6767764687538147, + "learning_rate": 4.227366371697029e-05, + "loss": 2.409, + "step": 13950 + }, + { + "epoch": 1.1258978290694859, + "grad_norm": 0.6886798143386841, + "learning_rate": 4.226077351829948e-05, + "loss": 2.4786, + "step": 13951 + }, + { + "epoch": 1.125978532806069, + "grad_norm": 0.7723653316497803, + "learning_rate": 4.224788475864115e-05, + "loss": 2.4111, + "step": 13952 + }, + { + "epoch": 1.1260592365426518, + "grad_norm": 0.7614055275917053, + "learning_rate": 4.2234997438316473e-05, + "loss": 2.5055, + "step": 13953 + }, + { + "epoch": 1.126139940279235, + "grad_norm": 0.7195241451263428, + "learning_rate": 4.222211155764665e-05, + "loss": 2.411, + "step": 13954 + }, + { + "epoch": 1.126220644015818, + "grad_norm": 0.7130021452903748, + "learning_rate": 4.220922711695288e-05, + "loss": 2.4819, + "step": 13955 + }, + { + "epoch": 1.1263013477524009, + "grad_norm": 0.6972241401672363, + "learning_rate": 4.2196344116556194e-05, + "loss": 2.4611, + "step": 13956 + }, + { + "epoch": 1.126382051488984, + "grad_norm": 0.7023231387138367, + "learning_rate": 4.218346255677772e-05, + "loss": 2.4509, + "step": 13957 + }, + { + "epoch": 1.126462755225567, + "grad_norm": 0.6959301829338074, + "learning_rate": 4.2170582437938534e-05, + "loss": 2.4441, + "step": 13958 + }, + { + "epoch": 1.12654345896215, + "grad_norm": 0.7423149347305298, + "learning_rate": 4.2157703760359555e-05, + "loss": 2.4452, + "step": 13959 + }, + { + "epoch": 1.126624162698733, + "grad_norm": 0.6587820053100586, + "learning_rate": 4.214482652436177e-05, + "loss": 2.3936, + "step": 13960 + }, + { + "epoch": 1.1267048664353159, + "grad_norm": 0.6601768136024475, + "learning_rate": 4.213195073026618e-05, + "loss": 2.453, + "step": 13961 + }, + { + "epoch": 1.126785570171899, + "grad_norm": 0.6986891031265259, + "learning_rate": 4.2119076378393676e-05, + "loss": 2.452, + "step": 13962 + }, + { + "epoch": 1.126866273908482, + "grad_norm": 0.7207025289535522, + "learning_rate": 4.2106203469065055e-05, + "loss": 2.4048, + "step": 13963 + }, + { + "epoch": 1.126946977645065, + "grad_norm": 0.6731177568435669, + "learning_rate": 4.2093332002601184e-05, + "loss": 2.4573, + "step": 13964 + }, + { + "epoch": 1.127027681381648, + "grad_norm": 0.7330070734024048, + "learning_rate": 4.208046197932288e-05, + "loss": 2.4274, + "step": 13965 + }, + { + "epoch": 1.1271083851182309, + "grad_norm": 0.7008770704269409, + "learning_rate": 4.206759339955084e-05, + "loss": 2.4933, + "step": 13966 + }, + { + "epoch": 1.127189088854814, + "grad_norm": 0.8309584259986877, + "learning_rate": 4.20547262636058e-05, + "loss": 2.3857, + "step": 13967 + }, + { + "epoch": 1.127269792591397, + "grad_norm": 0.6705843210220337, + "learning_rate": 4.204186057180849e-05, + "loss": 2.4303, + "step": 13968 + }, + { + "epoch": 1.12735049632798, + "grad_norm": 0.7526851296424866, + "learning_rate": 4.202899632447949e-05, + "loss": 2.455, + "step": 13969 + }, + { + "epoch": 1.127431200064563, + "grad_norm": 0.6690995097160339, + "learning_rate": 4.201613352193943e-05, + "loss": 2.4398, + "step": 13970 + }, + { + "epoch": 1.127511903801146, + "grad_norm": 0.6946840286254883, + "learning_rate": 4.20032721645089e-05, + "loss": 2.4032, + "step": 13971 + }, + { + "epoch": 1.127592607537729, + "grad_norm": 0.7438863515853882, + "learning_rate": 4.1990412252508426e-05, + "loss": 2.4644, + "step": 13972 + }, + { + "epoch": 1.127673311274312, + "grad_norm": 0.6975359916687012, + "learning_rate": 4.197755378625852e-05, + "loss": 2.3991, + "step": 13973 + }, + { + "epoch": 1.1277540150108951, + "grad_norm": 0.6799279451370239, + "learning_rate": 4.196469676607968e-05, + "loss": 2.4328, + "step": 13974 + }, + { + "epoch": 1.127834718747478, + "grad_norm": 0.7014481425285339, + "learning_rate": 4.1951841192292274e-05, + "loss": 2.5045, + "step": 13975 + }, + { + "epoch": 1.127915422484061, + "grad_norm": 0.7074011564254761, + "learning_rate": 4.1938987065216716e-05, + "loss": 2.4583, + "step": 13976 + }, + { + "epoch": 1.127996126220644, + "grad_norm": 0.7246339917182922, + "learning_rate": 4.192613438517338e-05, + "loss": 2.447, + "step": 13977 + }, + { + "epoch": 1.128076829957227, + "grad_norm": 0.6757462620735168, + "learning_rate": 4.191328315248262e-05, + "loss": 2.4181, + "step": 13978 + }, + { + "epoch": 1.12815753369381, + "grad_norm": 0.6758493185043335, + "learning_rate": 4.1900433367464644e-05, + "loss": 2.4837, + "step": 13979 + }, + { + "epoch": 1.128238237430393, + "grad_norm": 0.6782165765762329, + "learning_rate": 4.1887585030439736e-05, + "loss": 2.3946, + "step": 13980 + }, + { + "epoch": 1.128318941166976, + "grad_norm": 0.7176415324211121, + "learning_rate": 4.187473814172812e-05, + "loss": 2.4538, + "step": 13981 + }, + { + "epoch": 1.128399644903559, + "grad_norm": 0.6636224985122681, + "learning_rate": 4.186189270164997e-05, + "loss": 2.4493, + "step": 13982 + }, + { + "epoch": 1.128480348640142, + "grad_norm": 0.6613143086433411, + "learning_rate": 4.184904871052544e-05, + "loss": 2.4994, + "step": 13983 + }, + { + "epoch": 1.128561052376725, + "grad_norm": 0.7148364186286926, + "learning_rate": 4.183620616867465e-05, + "loss": 2.4673, + "step": 13984 + }, + { + "epoch": 1.128641756113308, + "grad_norm": 0.6657952070236206, + "learning_rate": 4.1823365076417606e-05, + "loss": 2.3915, + "step": 13985 + }, + { + "epoch": 1.128722459849891, + "grad_norm": 0.7135687470436096, + "learning_rate": 4.181052543407439e-05, + "loss": 2.4961, + "step": 13986 + }, + { + "epoch": 1.1288031635864741, + "grad_norm": 0.7245377898216248, + "learning_rate": 4.179768724196501e-05, + "loss": 2.4519, + "step": 13987 + }, + { + "epoch": 1.128883867323057, + "grad_norm": 0.6832938194274902, + "learning_rate": 4.1784850500409376e-05, + "loss": 2.4471, + "step": 13988 + }, + { + "epoch": 1.12896457105964, + "grad_norm": 0.7303032279014587, + "learning_rate": 4.177201520972746e-05, + "loss": 2.3906, + "step": 13989 + }, + { + "epoch": 1.1290452747962232, + "grad_norm": 0.698581874370575, + "learning_rate": 4.175918137023911e-05, + "loss": 2.4667, + "step": 13990 + }, + { + "epoch": 1.129125978532806, + "grad_norm": 0.69133061170578, + "learning_rate": 4.174634898226422e-05, + "loss": 2.4285, + "step": 13991 + }, + { + "epoch": 1.1292066822693891, + "grad_norm": 0.7029501795768738, + "learning_rate": 4.1733518046122576e-05, + "loss": 2.4839, + "step": 13992 + }, + { + "epoch": 1.129287386005972, + "grad_norm": 0.7566521167755127, + "learning_rate": 4.172068856213398e-05, + "loss": 2.5019, + "step": 13993 + }, + { + "epoch": 1.129368089742555, + "grad_norm": 0.697998046875, + "learning_rate": 4.1707860530618204e-05, + "loss": 2.4305, + "step": 13994 + }, + { + "epoch": 1.1294487934791382, + "grad_norm": 0.674194872379303, + "learning_rate": 4.169503395189489e-05, + "loss": 2.4361, + "step": 13995 + }, + { + "epoch": 1.129529497215721, + "grad_norm": 0.6936436891555786, + "learning_rate": 4.168220882628373e-05, + "loss": 2.518, + "step": 13996 + }, + { + "epoch": 1.1296102009523041, + "grad_norm": 0.6831670999526978, + "learning_rate": 4.166938515410442e-05, + "loss": 2.4197, + "step": 13997 + }, + { + "epoch": 1.129690904688887, + "grad_norm": 0.7323662638664246, + "learning_rate": 4.165656293567647e-05, + "loss": 2.4555, + "step": 13998 + }, + { + "epoch": 1.12977160842547, + "grad_norm": 0.7699782848358154, + "learning_rate": 4.164374217131948e-05, + "loss": 2.4456, + "step": 13999 + }, + { + "epoch": 1.1298523121620532, + "grad_norm": 0.7009051442146301, + "learning_rate": 4.163092286135297e-05, + "loss": 2.4429, + "step": 14000 + }, + { + "epoch": 1.1298523121620532, + "eval_loss": 2.4034411907196045, + "eval_runtime": 771.1158, + "eval_samples_per_second": 3.398, + "eval_steps_per_second": 0.567, + "step": 14000 + }, + { + "epoch": 1.129933015898636, + "grad_norm": 0.674665093421936, + "learning_rate": 4.1618105006096456e-05, + "loss": 2.4127, + "step": 14001 + }, + { + "epoch": 1.1300137196352191, + "grad_norm": 0.7332403659820557, + "learning_rate": 4.1605288605869365e-05, + "loss": 2.4854, + "step": 14002 + }, + { + "epoch": 1.1300944233718022, + "grad_norm": 0.70233553647995, + "learning_rate": 4.159247366099117e-05, + "loss": 2.4433, + "step": 14003 + }, + { + "epoch": 1.130175127108385, + "grad_norm": 0.6259445548057556, + "learning_rate": 4.157966017178118e-05, + "loss": 2.3605, + "step": 14004 + }, + { + "epoch": 1.1302558308449682, + "grad_norm": 0.717408299446106, + "learning_rate": 4.1566848138558755e-05, + "loss": 2.4378, + "step": 14005 + }, + { + "epoch": 1.130336534581551, + "grad_norm": 0.6973297595977783, + "learning_rate": 4.155403756164323e-05, + "loss": 2.4363, + "step": 14006 + }, + { + "epoch": 1.1304172383181341, + "grad_norm": 0.7204940915107727, + "learning_rate": 4.154122844135391e-05, + "loss": 2.4814, + "step": 14007 + }, + { + "epoch": 1.1304979420547172, + "grad_norm": 0.8976696133613586, + "learning_rate": 4.1528420778009935e-05, + "loss": 2.4654, + "step": 14008 + }, + { + "epoch": 1.1305786457913, + "grad_norm": 0.7270354628562927, + "learning_rate": 4.151561457193057e-05, + "loss": 2.4088, + "step": 14009 + }, + { + "epoch": 1.1306593495278832, + "grad_norm": 0.7200367450714111, + "learning_rate": 4.1502809823434985e-05, + "loss": 2.4412, + "step": 14010 + }, + { + "epoch": 1.130740053264466, + "grad_norm": 0.7593986392021179, + "learning_rate": 4.149000653284227e-05, + "loss": 2.5058, + "step": 14011 + }, + { + "epoch": 1.1308207570010491, + "grad_norm": 0.7322795987129211, + "learning_rate": 4.147720470047155e-05, + "loss": 2.4899, + "step": 14012 + }, + { + "epoch": 1.1309014607376322, + "grad_norm": 0.6649030447006226, + "learning_rate": 4.1464404326641905e-05, + "loss": 2.4358, + "step": 14013 + }, + { + "epoch": 1.130982164474215, + "grad_norm": 0.7258814573287964, + "learning_rate": 4.145160541167228e-05, + "loss": 2.4732, + "step": 14014 + }, + { + "epoch": 1.1310628682107982, + "grad_norm": 0.7414976358413696, + "learning_rate": 4.1438807955881695e-05, + "loss": 2.4157, + "step": 14015 + }, + { + "epoch": 1.1311435719473812, + "grad_norm": 0.6813236474990845, + "learning_rate": 4.142601195958914e-05, + "loss": 2.3966, + "step": 14016 + }, + { + "epoch": 1.131224275683964, + "grad_norm": 0.6715923547744751, + "learning_rate": 4.141321742311344e-05, + "loss": 2.4358, + "step": 14017 + }, + { + "epoch": 1.1313049794205472, + "grad_norm": 0.7174912691116333, + "learning_rate": 4.14004243467735e-05, + "loss": 2.4838, + "step": 14018 + }, + { + "epoch": 1.1313856831571303, + "grad_norm": 0.6945109963417053, + "learning_rate": 4.138763273088821e-05, + "loss": 2.4674, + "step": 14019 + }, + { + "epoch": 1.1314663868937131, + "grad_norm": 0.6759494543075562, + "learning_rate": 4.137484257577629e-05, + "loss": 2.4659, + "step": 14020 + }, + { + "epoch": 1.1315470906302962, + "grad_norm": 0.7077876925468445, + "learning_rate": 4.1362053881756534e-05, + "loss": 2.4731, + "step": 14021 + }, + { + "epoch": 1.131627794366879, + "grad_norm": 0.6769500970840454, + "learning_rate": 4.1349266649147654e-05, + "loss": 2.3606, + "step": 14022 + }, + { + "epoch": 1.1317084981034622, + "grad_norm": 0.7104208469390869, + "learning_rate": 4.1336480878268424e-05, + "loss": 2.4626, + "step": 14023 + }, + { + "epoch": 1.1317892018400453, + "grad_norm": 0.7102686762809753, + "learning_rate": 4.132369656943741e-05, + "loss": 2.4545, + "step": 14024 + }, + { + "epoch": 1.1318699055766281, + "grad_norm": 0.7773897647857666, + "learning_rate": 4.1310913722973256e-05, + "loss": 2.5107, + "step": 14025 + }, + { + "epoch": 1.1319506093132112, + "grad_norm": 0.6427130103111267, + "learning_rate": 4.1298132339194585e-05, + "loss": 2.4349, + "step": 14026 + }, + { + "epoch": 1.132031313049794, + "grad_norm": 0.6725162863731384, + "learning_rate": 4.128535241841987e-05, + "loss": 2.4566, + "step": 14027 + }, + { + "epoch": 1.1321120167863772, + "grad_norm": 0.7182251214981079, + "learning_rate": 4.127257396096764e-05, + "loss": 2.4472, + "step": 14028 + }, + { + "epoch": 1.1321927205229603, + "grad_norm": 0.6712302565574646, + "learning_rate": 4.1259796967156426e-05, + "loss": 2.4326, + "step": 14029 + }, + { + "epoch": 1.1322734242595431, + "grad_norm": 0.7726041078567505, + "learning_rate": 4.124702143730459e-05, + "loss": 2.4994, + "step": 14030 + }, + { + "epoch": 1.1323541279961262, + "grad_norm": 0.651899516582489, + "learning_rate": 4.123424737173056e-05, + "loss": 2.4244, + "step": 14031 + }, + { + "epoch": 1.1324348317327093, + "grad_norm": 0.6646261215209961, + "learning_rate": 4.12214747707527e-05, + "loss": 2.5027, + "step": 14032 + }, + { + "epoch": 1.1325155354692922, + "grad_norm": 0.729098916053772, + "learning_rate": 4.120870363468933e-05, + "loss": 2.5117, + "step": 14033 + }, + { + "epoch": 1.1325962392058753, + "grad_norm": 0.7056638598442078, + "learning_rate": 4.119593396385876e-05, + "loss": 2.4279, + "step": 14034 + }, + { + "epoch": 1.1326769429424584, + "grad_norm": 0.7051844000816345, + "learning_rate": 4.1183165758579255e-05, + "loss": 2.3844, + "step": 14035 + }, + { + "epoch": 1.1327576466790412, + "grad_norm": 0.6954311728477478, + "learning_rate": 4.1170399019168984e-05, + "loss": 2.4041, + "step": 14036 + }, + { + "epoch": 1.1328383504156243, + "grad_norm": 0.650044858455658, + "learning_rate": 4.1157633745946135e-05, + "loss": 2.4397, + "step": 14037 + }, + { + "epoch": 1.1329190541522072, + "grad_norm": 0.6974380016326904, + "learning_rate": 4.114486993922888e-05, + "loss": 2.4391, + "step": 14038 + }, + { + "epoch": 1.1329997578887903, + "grad_norm": 0.7252807021141052, + "learning_rate": 4.113210759933536e-05, + "loss": 2.4471, + "step": 14039 + }, + { + "epoch": 1.1330804616253733, + "grad_norm": 0.7001414895057678, + "learning_rate": 4.111934672658354e-05, + "loss": 2.402, + "step": 14040 + }, + { + "epoch": 1.1331611653619562, + "grad_norm": 0.7420533895492554, + "learning_rate": 4.110658732129153e-05, + "loss": 2.4987, + "step": 14041 + }, + { + "epoch": 1.1332418690985393, + "grad_norm": 0.6850644946098328, + "learning_rate": 4.1093829383777315e-05, + "loss": 2.4355, + "step": 14042 + }, + { + "epoch": 1.1333225728351222, + "grad_norm": 0.6905977725982666, + "learning_rate": 4.108107291435885e-05, + "loss": 2.4818, + "step": 14043 + }, + { + "epoch": 1.1334032765717053, + "grad_norm": 0.6555112600326538, + "learning_rate": 4.106831791335407e-05, + "loss": 2.425, + "step": 14044 + }, + { + "epoch": 1.1334839803082883, + "grad_norm": 0.6570355892181396, + "learning_rate": 4.105556438108089e-05, + "loss": 2.4232, + "step": 14045 + }, + { + "epoch": 1.1335646840448712, + "grad_norm": 0.7910747528076172, + "learning_rate": 4.104281231785708e-05, + "loss": 2.484, + "step": 14046 + }, + { + "epoch": 1.1336453877814543, + "grad_norm": 0.6581952571868896, + "learning_rate": 4.103006172400052e-05, + "loss": 2.4102, + "step": 14047 + }, + { + "epoch": 1.1337260915180374, + "grad_norm": 0.6834773421287537, + "learning_rate": 4.1017312599828994e-05, + "loss": 2.4602, + "step": 14048 + }, + { + "epoch": 1.1338067952546202, + "grad_norm": 0.7588350772857666, + "learning_rate": 4.1004564945660195e-05, + "loss": 2.5059, + "step": 14049 + }, + { + "epoch": 1.1338874989912033, + "grad_norm": 0.6604699492454529, + "learning_rate": 4.099181876181185e-05, + "loss": 2.4403, + "step": 14050 + }, + { + "epoch": 1.1339682027277862, + "grad_norm": 0.6957669258117676, + "learning_rate": 4.097907404860163e-05, + "loss": 2.4218, + "step": 14051 + }, + { + "epoch": 1.1340489064643693, + "grad_norm": 0.7091849446296692, + "learning_rate": 4.0966330806347166e-05, + "loss": 2.4396, + "step": 14052 + }, + { + "epoch": 1.1341296102009524, + "grad_norm": 0.6637482047080994, + "learning_rate": 4.095358903536605e-05, + "loss": 2.4514, + "step": 14053 + }, + { + "epoch": 1.1342103139375352, + "grad_norm": 0.7485960125923157, + "learning_rate": 4.0940848735975846e-05, + "loss": 2.4401, + "step": 14054 + }, + { + "epoch": 1.1342910176741183, + "grad_norm": 0.6509774327278137, + "learning_rate": 4.092810990849411e-05, + "loss": 2.4575, + "step": 14055 + }, + { + "epoch": 1.1343717214107012, + "grad_norm": 0.7151626348495483, + "learning_rate": 4.091537255323825e-05, + "loss": 2.45, + "step": 14056 + }, + { + "epoch": 1.1344524251472843, + "grad_norm": 0.7536267042160034, + "learning_rate": 4.0902636670525764e-05, + "loss": 2.497, + "step": 14057 + }, + { + "epoch": 1.1345331288838674, + "grad_norm": 0.7779545783996582, + "learning_rate": 4.0889902260674086e-05, + "loss": 2.412, + "step": 14058 + }, + { + "epoch": 1.1346138326204502, + "grad_norm": 0.7211748957633972, + "learning_rate": 4.087716932400052e-05, + "loss": 2.4727, + "step": 14059 + }, + { + "epoch": 1.1346945363570333, + "grad_norm": 0.6710701584815979, + "learning_rate": 4.086443786082245e-05, + "loss": 2.4318, + "step": 14060 + }, + { + "epoch": 1.1347752400936164, + "grad_norm": 0.7072857022285461, + "learning_rate": 4.085170787145717e-05, + "loss": 2.4672, + "step": 14061 + }, + { + "epoch": 1.1348559438301993, + "grad_norm": 0.6475152969360352, + "learning_rate": 4.083897935622194e-05, + "loss": 2.4104, + "step": 14062 + }, + { + "epoch": 1.1349366475667824, + "grad_norm": 0.7408067584037781, + "learning_rate": 4.0826252315433986e-05, + "loss": 2.4129, + "step": 14063 + }, + { + "epoch": 1.1350173513033655, + "grad_norm": 0.732540488243103, + "learning_rate": 4.081352674941056e-05, + "loss": 2.4209, + "step": 14064 + }, + { + "epoch": 1.1350980550399483, + "grad_norm": 0.6933332681655884, + "learning_rate": 4.080080265846872e-05, + "loss": 2.3797, + "step": 14065 + }, + { + "epoch": 1.1351787587765314, + "grad_norm": 0.6507896780967712, + "learning_rate": 4.078808004292561e-05, + "loss": 2.4372, + "step": 14066 + }, + { + "epoch": 1.1352594625131143, + "grad_norm": 0.729292094707489, + "learning_rate": 4.0775358903098384e-05, + "loss": 2.5513, + "step": 14067 + }, + { + "epoch": 1.1353401662496974, + "grad_norm": 0.692757248878479, + "learning_rate": 4.076263923930398e-05, + "loss": 2.4228, + "step": 14068 + }, + { + "epoch": 1.1354208699862804, + "grad_norm": 0.7028260231018066, + "learning_rate": 4.074992105185946e-05, + "loss": 2.4478, + "step": 14069 + }, + { + "epoch": 1.1355015737228633, + "grad_norm": 0.65067058801651, + "learning_rate": 4.073720434108179e-05, + "loss": 2.3729, + "step": 14070 + }, + { + "epoch": 1.1355822774594464, + "grad_norm": 0.6884061098098755, + "learning_rate": 4.0724489107287933e-05, + "loss": 2.3693, + "step": 14071 + }, + { + "epoch": 1.1356629811960293, + "grad_norm": 0.70686936378479, + "learning_rate": 4.071177535079472e-05, + "loss": 2.4989, + "step": 14072 + }, + { + "epoch": 1.1357436849326124, + "grad_norm": 0.6792482733726501, + "learning_rate": 4.0699063071919016e-05, + "loss": 2.393, + "step": 14073 + }, + { + "epoch": 1.1358243886691954, + "grad_norm": 0.7231085896492004, + "learning_rate": 4.0686352270977745e-05, + "loss": 2.4597, + "step": 14074 + }, + { + "epoch": 1.1359050924057783, + "grad_norm": 0.8024532198905945, + "learning_rate": 4.067364294828758e-05, + "loss": 2.4409, + "step": 14075 + }, + { + "epoch": 1.1359857961423614, + "grad_norm": 0.6761424541473389, + "learning_rate": 4.066093510416532e-05, + "loss": 2.4598, + "step": 14076 + }, + { + "epoch": 1.1360664998789445, + "grad_norm": 0.7075559496879578, + "learning_rate": 4.064822873892771e-05, + "loss": 2.4649, + "step": 14077 + }, + { + "epoch": 1.1361472036155273, + "grad_norm": 0.6292272806167603, + "learning_rate": 4.063552385289134e-05, + "loss": 2.445, + "step": 14078 + }, + { + "epoch": 1.1362279073521104, + "grad_norm": 0.6435273885726929, + "learning_rate": 4.06228204463729e-05, + "loss": 2.4105, + "step": 14079 + }, + { + "epoch": 1.1363086110886935, + "grad_norm": 0.7135637402534485, + "learning_rate": 4.061011851968903e-05, + "loss": 2.3907, + "step": 14080 + }, + { + "epoch": 1.1363893148252764, + "grad_norm": 0.7424013614654541, + "learning_rate": 4.059741807315621e-05, + "loss": 2.4405, + "step": 14081 + }, + { + "epoch": 1.1364700185618595, + "grad_norm": 0.6649916768074036, + "learning_rate": 4.0584719107091016e-05, + "loss": 2.4314, + "step": 14082 + }, + { + "epoch": 1.1365507222984423, + "grad_norm": 0.6700563430786133, + "learning_rate": 4.0572021621809944e-05, + "loss": 2.4093, + "step": 14083 + }, + { + "epoch": 1.1366314260350254, + "grad_norm": 0.6740709543228149, + "learning_rate": 4.055932561762942e-05, + "loss": 2.4301, + "step": 14084 + }, + { + "epoch": 1.1367121297716085, + "grad_norm": 0.7039555907249451, + "learning_rate": 4.0546631094865895e-05, + "loss": 2.4427, + "step": 14085 + }, + { + "epoch": 1.1367928335081914, + "grad_norm": 0.7461164593696594, + "learning_rate": 4.053393805383573e-05, + "loss": 2.3865, + "step": 14086 + }, + { + "epoch": 1.1368735372447745, + "grad_norm": 0.6808290481567383, + "learning_rate": 4.0521246494855316e-05, + "loss": 2.3738, + "step": 14087 + }, + { + "epoch": 1.1369542409813573, + "grad_norm": 0.6942760944366455, + "learning_rate": 4.0508556418240875e-05, + "loss": 2.4351, + "step": 14088 + }, + { + "epoch": 1.1370349447179404, + "grad_norm": 0.7615510821342468, + "learning_rate": 4.049586782430872e-05, + "loss": 2.3968, + "step": 14089 + }, + { + "epoch": 1.1371156484545235, + "grad_norm": 0.7240662574768066, + "learning_rate": 4.048318071337512e-05, + "loss": 2.4046, + "step": 14090 + }, + { + "epoch": 1.1371963521911064, + "grad_norm": 0.7286471128463745, + "learning_rate": 4.047049508575621e-05, + "loss": 2.4039, + "step": 14091 + }, + { + "epoch": 1.1372770559276895, + "grad_norm": 0.7031459212303162, + "learning_rate": 4.045781094176816e-05, + "loss": 2.4494, + "step": 14092 + }, + { + "epoch": 1.1373577596642725, + "grad_norm": 0.7116301655769348, + "learning_rate": 4.0445128281727116e-05, + "loss": 2.3991, + "step": 14093 + }, + { + "epoch": 1.1374384634008554, + "grad_norm": 0.6719788312911987, + "learning_rate": 4.043244710594914e-05, + "loss": 2.4823, + "step": 14094 + }, + { + "epoch": 1.1375191671374385, + "grad_norm": 0.6770508885383606, + "learning_rate": 4.041976741475031e-05, + "loss": 2.4362, + "step": 14095 + }, + { + "epoch": 1.1375998708740216, + "grad_norm": 0.6808609962463379, + "learning_rate": 4.040708920844666e-05, + "loss": 2.435, + "step": 14096 + }, + { + "epoch": 1.1376805746106045, + "grad_norm": 0.7445514798164368, + "learning_rate": 4.0394412487354074e-05, + "loss": 2.4749, + "step": 14097 + }, + { + "epoch": 1.1377612783471875, + "grad_norm": 0.7024775743484497, + "learning_rate": 4.038173725178854e-05, + "loss": 2.4354, + "step": 14098 + }, + { + "epoch": 1.1378419820837704, + "grad_norm": 0.6925685405731201, + "learning_rate": 4.0369063502066e-05, + "loss": 2.4462, + "step": 14099 + }, + { + "epoch": 1.1379226858203535, + "grad_norm": 0.6970539689064026, + "learning_rate": 4.035639123850223e-05, + "loss": 2.3842, + "step": 14100 + }, + { + "epoch": 1.1380033895569364, + "grad_norm": 0.6571836471557617, + "learning_rate": 4.0343720461413107e-05, + "loss": 2.4213, + "step": 14101 + }, + { + "epoch": 1.1380840932935194, + "grad_norm": 0.7264918684959412, + "learning_rate": 4.033105117111441e-05, + "loss": 2.4697, + "step": 14102 + }, + { + "epoch": 1.1381647970301025, + "grad_norm": 0.6929560899734497, + "learning_rate": 4.03183833679219e-05, + "loss": 2.461, + "step": 14103 + }, + { + "epoch": 1.1382455007666854, + "grad_norm": 0.6533559560775757, + "learning_rate": 4.030571705215128e-05, + "loss": 2.4336, + "step": 14104 + }, + { + "epoch": 1.1383262045032685, + "grad_norm": 0.7372364401817322, + "learning_rate": 4.0293052224118234e-05, + "loss": 2.4396, + "step": 14105 + }, + { + "epoch": 1.1384069082398516, + "grad_norm": 0.6736310720443726, + "learning_rate": 4.028038888413844e-05, + "loss": 2.4123, + "step": 14106 + }, + { + "epoch": 1.1384876119764344, + "grad_norm": 0.6898338794708252, + "learning_rate": 4.026772703252742e-05, + "loss": 2.431, + "step": 14107 + }, + { + "epoch": 1.1385683157130175, + "grad_norm": 0.7933369278907776, + "learning_rate": 4.02550666696008e-05, + "loss": 2.4669, + "step": 14108 + }, + { + "epoch": 1.1386490194496006, + "grad_norm": 0.7218122482299805, + "learning_rate": 4.024240779567412e-05, + "loss": 2.3761, + "step": 14109 + }, + { + "epoch": 1.1387297231861835, + "grad_norm": 0.7018248438835144, + "learning_rate": 4.022975041106281e-05, + "loss": 2.4011, + "step": 14110 + }, + { + "epoch": 1.1388104269227666, + "grad_norm": 0.6709668040275574, + "learning_rate": 4.0217094516082364e-05, + "loss": 2.426, + "step": 14111 + }, + { + "epoch": 1.1388911306593494, + "grad_norm": 0.7241504192352295, + "learning_rate": 4.0204440111048195e-05, + "loss": 2.4085, + "step": 14112 + }, + { + "epoch": 1.1389718343959325, + "grad_norm": 0.731347382068634, + "learning_rate": 4.0191787196275675e-05, + "loss": 2.502, + "step": 14113 + }, + { + "epoch": 1.1390525381325156, + "grad_norm": 0.6630167365074158, + "learning_rate": 4.0179135772080166e-05, + "loss": 2.3999, + "step": 14114 + }, + { + "epoch": 1.1391332418690985, + "grad_norm": 0.7094748616218567, + "learning_rate": 4.016648583877698e-05, + "loss": 2.4666, + "step": 14115 + }, + { + "epoch": 1.1392139456056816, + "grad_norm": 0.7262436151504517, + "learning_rate": 4.0153837396681395e-05, + "loss": 2.4369, + "step": 14116 + }, + { + "epoch": 1.1392946493422644, + "grad_norm": 0.6796039938926697, + "learning_rate": 4.014119044610859e-05, + "loss": 2.4607, + "step": 14117 + }, + { + "epoch": 1.1393753530788475, + "grad_norm": 0.6690036058425903, + "learning_rate": 4.0128544987373785e-05, + "loss": 2.4145, + "step": 14118 + }, + { + "epoch": 1.1394560568154306, + "grad_norm": 0.6987181305885315, + "learning_rate": 4.011590102079219e-05, + "loss": 2.4294, + "step": 14119 + }, + { + "epoch": 1.1395367605520135, + "grad_norm": 0.6756789684295654, + "learning_rate": 4.0103258546678836e-05, + "loss": 2.396, + "step": 14120 + }, + { + "epoch": 1.1396174642885966, + "grad_norm": 0.7027772068977356, + "learning_rate": 4.009061756534885e-05, + "loss": 2.3971, + "step": 14121 + }, + { + "epoch": 1.1396981680251796, + "grad_norm": 0.6872174143791199, + "learning_rate": 4.007797807711732e-05, + "loss": 2.4297, + "step": 14122 + }, + { + "epoch": 1.1397788717617625, + "grad_norm": 0.7213007211685181, + "learning_rate": 4.006534008229914e-05, + "loss": 2.4792, + "step": 14123 + }, + { + "epoch": 1.1398595754983456, + "grad_norm": 0.6771649122238159, + "learning_rate": 4.0052703581209395e-05, + "loss": 2.4397, + "step": 14124 + }, + { + "epoch": 1.1399402792349287, + "grad_norm": 0.6577184796333313, + "learning_rate": 4.0040068574163013e-05, + "loss": 2.4113, + "step": 14125 + }, + { + "epoch": 1.1400209829715116, + "grad_norm": 0.7493160367012024, + "learning_rate": 4.002743506147483e-05, + "loss": 2.4454, + "step": 14126 + }, + { + "epoch": 1.1401016867080946, + "grad_norm": 0.6820357441902161, + "learning_rate": 4.0014803043459726e-05, + "loss": 2.4126, + "step": 14127 + }, + { + "epoch": 1.1401823904446775, + "grad_norm": 0.7177188992500305, + "learning_rate": 4.000217252043258e-05, + "loss": 2.4355, + "step": 14128 + }, + { + "epoch": 1.1402630941812606, + "grad_norm": 0.654371440410614, + "learning_rate": 3.998954349270808e-05, + "loss": 2.4932, + "step": 14129 + }, + { + "epoch": 1.1403437979178437, + "grad_norm": 0.7029837965965271, + "learning_rate": 3.997691596060104e-05, + "loss": 2.4341, + "step": 14130 + }, + { + "epoch": 1.1404245016544265, + "grad_norm": 0.7971171140670776, + "learning_rate": 3.996428992442615e-05, + "loss": 2.4466, + "step": 14131 + }, + { + "epoch": 1.1405052053910096, + "grad_norm": 0.6941849589347839, + "learning_rate": 3.9951665384498114e-05, + "loss": 2.4861, + "step": 14132 + }, + { + "epoch": 1.1405859091275925, + "grad_norm": 0.6657733917236328, + "learning_rate": 3.993904234113153e-05, + "loss": 2.4266, + "step": 14133 + }, + { + "epoch": 1.1406666128641756, + "grad_norm": 0.6780329346656799, + "learning_rate": 3.9926420794641e-05, + "loss": 2.458, + "step": 14134 + }, + { + "epoch": 1.1407473166007587, + "grad_norm": 0.7070702910423279, + "learning_rate": 3.991380074534109e-05, + "loss": 2.368, + "step": 14135 + }, + { + "epoch": 1.1408280203373415, + "grad_norm": 0.7186575531959534, + "learning_rate": 3.990118219354635e-05, + "loss": 2.4611, + "step": 14136 + }, + { + "epoch": 1.1409087240739246, + "grad_norm": 0.7171763777732849, + "learning_rate": 3.988856513957123e-05, + "loss": 2.4315, + "step": 14137 + }, + { + "epoch": 1.1409894278105077, + "grad_norm": 0.7090228796005249, + "learning_rate": 3.987594958373025e-05, + "loss": 2.4668, + "step": 14138 + }, + { + "epoch": 1.1410701315470906, + "grad_norm": 0.6523951888084412, + "learning_rate": 3.986333552633773e-05, + "loss": 2.4392, + "step": 14139 + }, + { + "epoch": 1.1411508352836737, + "grad_norm": 0.706000804901123, + "learning_rate": 3.98507229677081e-05, + "loss": 2.4382, + "step": 14140 + }, + { + "epoch": 1.1412315390202568, + "grad_norm": 0.6537537574768066, + "learning_rate": 3.983811190815571e-05, + "loss": 2.456, + "step": 14141 + }, + { + "epoch": 1.1413122427568396, + "grad_norm": 0.7509549856185913, + "learning_rate": 3.982550234799479e-05, + "loss": 2.4744, + "step": 14142 + }, + { + "epoch": 1.1413929464934227, + "grad_norm": 0.7188650965690613, + "learning_rate": 3.981289428753967e-05, + "loss": 2.4632, + "step": 14143 + }, + { + "epoch": 1.1414736502300056, + "grad_norm": 0.7563674449920654, + "learning_rate": 3.9800287727104544e-05, + "loss": 2.5063, + "step": 14144 + }, + { + "epoch": 1.1415543539665887, + "grad_norm": 0.8374128341674805, + "learning_rate": 3.978768266700361e-05, + "loss": 2.4942, + "step": 14145 + }, + { + "epoch": 1.1416350577031718, + "grad_norm": 0.7020177841186523, + "learning_rate": 3.9775079107551027e-05, + "loss": 2.4404, + "step": 14146 + }, + { + "epoch": 1.1417157614397546, + "grad_norm": 0.7326170802116394, + "learning_rate": 3.9762477049060895e-05, + "loss": 2.4127, + "step": 14147 + }, + { + "epoch": 1.1417964651763377, + "grad_norm": 0.6661173105239868, + "learning_rate": 3.974987649184734e-05, + "loss": 2.4649, + "step": 14148 + }, + { + "epoch": 1.1418771689129206, + "grad_norm": 0.7186033129692078, + "learning_rate": 3.973727743622432e-05, + "loss": 2.4275, + "step": 14149 + }, + { + "epoch": 1.1419578726495037, + "grad_norm": 0.7193881869316101, + "learning_rate": 3.972467988250588e-05, + "loss": 2.4997, + "step": 14150 + }, + { + "epoch": 1.1420385763860867, + "grad_norm": 0.7139542102813721, + "learning_rate": 3.971208383100601e-05, + "loss": 2.4211, + "step": 14151 + }, + { + "epoch": 1.1421192801226696, + "grad_norm": 0.6840166449546814, + "learning_rate": 3.969948928203856e-05, + "loss": 2.4504, + "step": 14152 + }, + { + "epoch": 1.1421999838592527, + "grad_norm": 0.8261072039604187, + "learning_rate": 3.968689623591747e-05, + "loss": 2.4901, + "step": 14153 + }, + { + "epoch": 1.1422806875958358, + "grad_norm": 0.7636086940765381, + "learning_rate": 3.96743046929566e-05, + "loss": 2.4202, + "step": 14154 + }, + { + "epoch": 1.1423613913324187, + "grad_norm": 0.7477976679801941, + "learning_rate": 3.966171465346973e-05, + "loss": 2.492, + "step": 14155 + }, + { + "epoch": 1.1424420950690017, + "grad_norm": 0.7516389489173889, + "learning_rate": 3.9649126117770665e-05, + "loss": 2.4512, + "step": 14156 + }, + { + "epoch": 1.1425227988055846, + "grad_norm": 0.6987521648406982, + "learning_rate": 3.9636539086173174e-05, + "loss": 2.4005, + "step": 14157 + }, + { + "epoch": 1.1426035025421677, + "grad_norm": 0.7242532968521118, + "learning_rate": 3.962395355899088e-05, + "loss": 2.4414, + "step": 14158 + }, + { + "epoch": 1.1426842062787508, + "grad_norm": 0.6616180539131165, + "learning_rate": 3.961136953653749e-05, + "loss": 2.4442, + "step": 14159 + }, + { + "epoch": 1.1427649100153336, + "grad_norm": 0.7165415287017822, + "learning_rate": 3.959878701912667e-05, + "loss": 2.4658, + "step": 14160 + }, + { + "epoch": 1.1428456137519167, + "grad_norm": 0.6619318127632141, + "learning_rate": 3.9586206007071926e-05, + "loss": 2.3803, + "step": 14161 + }, + { + "epoch": 1.1429263174884996, + "grad_norm": 0.6654838919639587, + "learning_rate": 3.957362650068684e-05, + "loss": 2.4584, + "step": 14162 + }, + { + "epoch": 1.1430070212250827, + "grad_norm": 0.6947140097618103, + "learning_rate": 3.956104850028496e-05, + "loss": 2.4236, + "step": 14163 + }, + { + "epoch": 1.1430877249616658, + "grad_norm": 0.6510412096977234, + "learning_rate": 3.954847200617973e-05, + "loss": 2.3589, + "step": 14164 + }, + { + "epoch": 1.1431684286982486, + "grad_norm": 0.7550667524337769, + "learning_rate": 3.95358970186846e-05, + "loss": 2.419, + "step": 14165 + }, + { + "epoch": 1.1432491324348317, + "grad_norm": 0.7898361682891846, + "learning_rate": 3.9523323538112975e-05, + "loss": 2.4549, + "step": 14166 + }, + { + "epoch": 1.1433298361714148, + "grad_norm": 0.7162390947341919, + "learning_rate": 3.9510751564778246e-05, + "loss": 2.4493, + "step": 14167 + }, + { + "epoch": 1.1434105399079977, + "grad_norm": 0.8251990079879761, + "learning_rate": 3.949818109899367e-05, + "loss": 2.4474, + "step": 14168 + }, + { + "epoch": 1.1434912436445808, + "grad_norm": 0.6739209890365601, + "learning_rate": 3.948561214107258e-05, + "loss": 2.4564, + "step": 14169 + }, + { + "epoch": 1.1435719473811639, + "grad_norm": 0.6606340408325195, + "learning_rate": 3.9473044691328254e-05, + "loss": 2.3838, + "step": 14170 + }, + { + "epoch": 1.1436526511177467, + "grad_norm": 0.7297452092170715, + "learning_rate": 3.946047875007384e-05, + "loss": 2.4673, + "step": 14171 + }, + { + "epoch": 1.1437333548543298, + "grad_norm": 0.7382420301437378, + "learning_rate": 3.9447914317622546e-05, + "loss": 2.4279, + "step": 14172 + }, + { + "epoch": 1.1438140585909127, + "grad_norm": 0.6947354674339294, + "learning_rate": 3.9435351394287546e-05, + "loss": 2.4553, + "step": 14173 + }, + { + "epoch": 1.1438947623274958, + "grad_norm": 0.670369565486908, + "learning_rate": 3.942278998038183e-05, + "loss": 2.4285, + "step": 14174 + }, + { + "epoch": 1.1439754660640788, + "grad_norm": 0.7097954154014587, + "learning_rate": 3.941023007621859e-05, + "loss": 2.477, + "step": 14175 + }, + { + "epoch": 1.1440561698006617, + "grad_norm": 0.6490213871002197, + "learning_rate": 3.9397671682110826e-05, + "loss": 2.3943, + "step": 14176 + }, + { + "epoch": 1.1441368735372448, + "grad_norm": 0.6505936980247498, + "learning_rate": 3.938511479837147e-05, + "loss": 2.4188, + "step": 14177 + }, + { + "epoch": 1.1442175772738277, + "grad_norm": 0.6696773767471313, + "learning_rate": 3.9372559425313496e-05, + "loss": 2.4377, + "step": 14178 + }, + { + "epoch": 1.1442982810104108, + "grad_norm": 0.6747034192085266, + "learning_rate": 3.936000556324982e-05, + "loss": 2.4111, + "step": 14179 + }, + { + "epoch": 1.1443789847469938, + "grad_norm": 0.7766546607017517, + "learning_rate": 3.934745321249336e-05, + "loss": 2.3873, + "step": 14180 + }, + { + "epoch": 1.1444596884835767, + "grad_norm": 0.7608100175857544, + "learning_rate": 3.933490237335688e-05, + "loss": 2.4567, + "step": 14181 + }, + { + "epoch": 1.1445403922201598, + "grad_norm": 0.7724356055259705, + "learning_rate": 3.9322353046153205e-05, + "loss": 2.4729, + "step": 14182 + }, + { + "epoch": 1.1446210959567429, + "grad_norm": 0.6908414363861084, + "learning_rate": 3.930980523119515e-05, + "loss": 2.41, + "step": 14183 + }, + { + "epoch": 1.1447017996933257, + "grad_norm": 0.7209733128547668, + "learning_rate": 3.9297258928795356e-05, + "loss": 2.4629, + "step": 14184 + }, + { + "epoch": 1.1447825034299088, + "grad_norm": 0.7116519212722778, + "learning_rate": 3.928471413926651e-05, + "loss": 2.5081, + "step": 14185 + }, + { + "epoch": 1.144863207166492, + "grad_norm": 0.6704578995704651, + "learning_rate": 3.9272170862921365e-05, + "loss": 2.494, + "step": 14186 + }, + { + "epoch": 1.1449439109030748, + "grad_norm": 0.6914607882499695, + "learning_rate": 3.9259629100072435e-05, + "loss": 2.3979, + "step": 14187 + }, + { + "epoch": 1.1450246146396579, + "grad_norm": 0.7413245439529419, + "learning_rate": 3.924708885103233e-05, + "loss": 2.4534, + "step": 14188 + }, + { + "epoch": 1.1451053183762407, + "grad_norm": 0.7411661744117737, + "learning_rate": 3.923455011611362e-05, + "loss": 2.4191, + "step": 14189 + }, + { + "epoch": 1.1451860221128238, + "grad_norm": 0.6581972241401672, + "learning_rate": 3.9222012895628716e-05, + "loss": 2.4494, + "step": 14190 + }, + { + "epoch": 1.145266725849407, + "grad_norm": 0.6628647446632385, + "learning_rate": 3.920947718989013e-05, + "loss": 2.4483, + "step": 14191 + }, + { + "epoch": 1.1453474295859898, + "grad_norm": 0.7068151831626892, + "learning_rate": 3.9196942999210316e-05, + "loss": 2.4549, + "step": 14192 + }, + { + "epoch": 1.1454281333225729, + "grad_norm": 0.6727713942527771, + "learning_rate": 3.918441032390159e-05, + "loss": 2.4261, + "step": 14193 + }, + { + "epoch": 1.1455088370591557, + "grad_norm": 0.6680718660354614, + "learning_rate": 3.9171879164276334e-05, + "loss": 2.4705, + "step": 14194 + }, + { + "epoch": 1.1455895407957388, + "grad_norm": 0.710096538066864, + "learning_rate": 3.915934952064685e-05, + "loss": 2.474, + "step": 14195 + }, + { + "epoch": 1.145670244532322, + "grad_norm": 0.6927496790885925, + "learning_rate": 3.9146821393325414e-05, + "loss": 2.3979, + "step": 14196 + }, + { + "epoch": 1.1457509482689048, + "grad_norm": 0.6887550354003906, + "learning_rate": 3.913429478262427e-05, + "loss": 2.4588, + "step": 14197 + }, + { + "epoch": 1.1458316520054879, + "grad_norm": 0.6847062706947327, + "learning_rate": 3.912176968885559e-05, + "loss": 2.4602, + "step": 14198 + }, + { + "epoch": 1.145912355742071, + "grad_norm": 0.6832349300384521, + "learning_rate": 3.91092461123316e-05, + "loss": 2.4672, + "step": 14199 + }, + { + "epoch": 1.1459930594786538, + "grad_norm": 0.6789066791534424, + "learning_rate": 3.909672405336432e-05, + "loss": 2.5029, + "step": 14200 + }, + { + "epoch": 1.146073763215237, + "grad_norm": 0.6953951120376587, + "learning_rate": 3.9084203512265885e-05, + "loss": 2.4223, + "step": 14201 + }, + { + "epoch": 1.1461544669518198, + "grad_norm": 0.6629688739776611, + "learning_rate": 3.907168448934836e-05, + "loss": 2.4028, + "step": 14202 + }, + { + "epoch": 1.1462351706884029, + "grad_norm": 0.6661216020584106, + "learning_rate": 3.90591669849237e-05, + "loss": 2.4668, + "step": 14203 + }, + { + "epoch": 1.146315874424986, + "grad_norm": 0.6814442276954651, + "learning_rate": 3.9046650999303894e-05, + "loss": 2.4273, + "step": 14204 + }, + { + "epoch": 1.1463965781615688, + "grad_norm": 0.6678626537322998, + "learning_rate": 3.903413653280088e-05, + "loss": 2.444, + "step": 14205 + }, + { + "epoch": 1.146477281898152, + "grad_norm": 0.6703703999519348, + "learning_rate": 3.902162358572655e-05, + "loss": 2.4273, + "step": 14206 + }, + { + "epoch": 1.1465579856347348, + "grad_norm": 0.7052578926086426, + "learning_rate": 3.900911215839276e-05, + "loss": 2.4397, + "step": 14207 + }, + { + "epoch": 1.1466386893713179, + "grad_norm": 0.6792036294937134, + "learning_rate": 3.899660225111136e-05, + "loss": 2.439, + "step": 14208 + }, + { + "epoch": 1.146719393107901, + "grad_norm": 0.6995401978492737, + "learning_rate": 3.898409386419407e-05, + "loss": 2.5002, + "step": 14209 + }, + { + "epoch": 1.1468000968444838, + "grad_norm": 0.6527338027954102, + "learning_rate": 3.897158699795265e-05, + "loss": 2.4523, + "step": 14210 + }, + { + "epoch": 1.146880800581067, + "grad_norm": 0.7509400248527527, + "learning_rate": 3.8959081652698814e-05, + "loss": 2.4193, + "step": 14211 + }, + { + "epoch": 1.14696150431765, + "grad_norm": 0.6985350251197815, + "learning_rate": 3.894657782874426e-05, + "loss": 2.4251, + "step": 14212 + }, + { + "epoch": 1.1470422080542328, + "grad_norm": 0.6831483840942383, + "learning_rate": 3.893407552640055e-05, + "loss": 2.4172, + "step": 14213 + }, + { + "epoch": 1.147122911790816, + "grad_norm": 0.7281469702720642, + "learning_rate": 3.892157474597929e-05, + "loss": 2.4451, + "step": 14214 + }, + { + "epoch": 1.147203615527399, + "grad_norm": 0.7326027750968933, + "learning_rate": 3.8909075487792066e-05, + "loss": 2.3926, + "step": 14215 + }, + { + "epoch": 1.1472843192639819, + "grad_norm": 0.7030496597290039, + "learning_rate": 3.889657775215036e-05, + "loss": 2.435, + "step": 14216 + }, + { + "epoch": 1.147365023000565, + "grad_norm": 0.6915596127510071, + "learning_rate": 3.888408153936568e-05, + "loss": 2.4622, + "step": 14217 + }, + { + "epoch": 1.1474457267371478, + "grad_norm": 0.678600013256073, + "learning_rate": 3.8871586849749474e-05, + "loss": 2.4264, + "step": 14218 + }, + { + "epoch": 1.147526430473731, + "grad_norm": 0.7487786412239075, + "learning_rate": 3.885909368361308e-05, + "loss": 2.4038, + "step": 14219 + }, + { + "epoch": 1.147607134210314, + "grad_norm": 0.6658064723014832, + "learning_rate": 3.8846602041267886e-05, + "loss": 2.4079, + "step": 14220 + }, + { + "epoch": 1.1476878379468969, + "grad_norm": 0.6985111832618713, + "learning_rate": 3.883411192302527e-05, + "loss": 2.481, + "step": 14221 + }, + { + "epoch": 1.14776854168348, + "grad_norm": 0.7056208848953247, + "learning_rate": 3.8821623329196445e-05, + "loss": 2.4409, + "step": 14222 + }, + { + "epoch": 1.1478492454200628, + "grad_norm": 0.7107830047607422, + "learning_rate": 3.880913626009268e-05, + "loss": 2.4578, + "step": 14223 + }, + { + "epoch": 1.147929949156646, + "grad_norm": 0.6678555607795715, + "learning_rate": 3.87966507160252e-05, + "loss": 2.4548, + "step": 14224 + }, + { + "epoch": 1.148010652893229, + "grad_norm": 0.6699830293655396, + "learning_rate": 3.8784166697305157e-05, + "loss": 2.3763, + "step": 14225 + }, + { + "epoch": 1.1480913566298119, + "grad_norm": 0.7695464491844177, + "learning_rate": 3.8771684204243716e-05, + "loss": 2.4774, + "step": 14226 + }, + { + "epoch": 1.148172060366395, + "grad_norm": 0.7801330089569092, + "learning_rate": 3.8759203237151954e-05, + "loss": 2.4598, + "step": 14227 + }, + { + "epoch": 1.148252764102978, + "grad_norm": 0.7029622793197632, + "learning_rate": 3.8746723796340955e-05, + "loss": 2.3901, + "step": 14228 + }, + { + "epoch": 1.148333467839561, + "grad_norm": 0.7472359538078308, + "learning_rate": 3.873424588212169e-05, + "loss": 2.4724, + "step": 14229 + }, + { + "epoch": 1.148414171576144, + "grad_norm": 0.6621725559234619, + "learning_rate": 3.872176949480517e-05, + "loss": 2.4523, + "step": 14230 + }, + { + "epoch": 1.148494875312727, + "grad_norm": 0.722658634185791, + "learning_rate": 3.8709294634702376e-05, + "loss": 2.4032, + "step": 14231 + }, + { + "epoch": 1.14857557904931, + "grad_norm": 0.7743202447891235, + "learning_rate": 3.869682130212413e-05, + "loss": 2.4373, + "step": 14232 + }, + { + "epoch": 1.148656282785893, + "grad_norm": 0.6906178593635559, + "learning_rate": 3.868434949738136e-05, + "loss": 2.4765, + "step": 14233 + }, + { + "epoch": 1.148736986522476, + "grad_norm": 0.6708275675773621, + "learning_rate": 3.86718792207849e-05, + "loss": 2.4263, + "step": 14234 + }, + { + "epoch": 1.148817690259059, + "grad_norm": 0.6992776989936829, + "learning_rate": 3.8659410472645494e-05, + "loss": 2.378, + "step": 14235 + }, + { + "epoch": 1.148898393995642, + "grad_norm": 0.7229011058807373, + "learning_rate": 3.864694325327389e-05, + "loss": 2.4075, + "step": 14236 + }, + { + "epoch": 1.148979097732225, + "grad_norm": 0.6622509956359863, + "learning_rate": 3.863447756298091e-05, + "loss": 2.3954, + "step": 14237 + }, + { + "epoch": 1.149059801468808, + "grad_norm": 0.7233534455299377, + "learning_rate": 3.862201340207712e-05, + "loss": 2.4506, + "step": 14238 + }, + { + "epoch": 1.149140505205391, + "grad_norm": 0.716869056224823, + "learning_rate": 3.860955077087321e-05, + "loss": 2.4304, + "step": 14239 + }, + { + "epoch": 1.149221208941974, + "grad_norm": 0.6550257205963135, + "learning_rate": 3.8597089669679766e-05, + "loss": 2.4261, + "step": 14240 + }, + { + "epoch": 1.149301912678557, + "grad_norm": 0.6981741786003113, + "learning_rate": 3.858463009880738e-05, + "loss": 2.4115, + "step": 14241 + }, + { + "epoch": 1.14938261641514, + "grad_norm": 0.6792196035385132, + "learning_rate": 3.8572172058566534e-05, + "loss": 2.4195, + "step": 14242 + }, + { + "epoch": 1.149463320151723, + "grad_norm": 0.7278807163238525, + "learning_rate": 3.855971554926773e-05, + "loss": 2.418, + "step": 14243 + }, + { + "epoch": 1.1495440238883061, + "grad_norm": 0.6451076865196228, + "learning_rate": 3.8547260571221456e-05, + "loss": 2.4591, + "step": 14244 + }, + { + "epoch": 1.149624727624889, + "grad_norm": 0.7052451968193054, + "learning_rate": 3.853480712473805e-05, + "loss": 2.4023, + "step": 14245 + }, + { + "epoch": 1.149705431361472, + "grad_norm": 0.7016182541847229, + "learning_rate": 3.852235521012793e-05, + "loss": 2.4959, + "step": 14246 + }, + { + "epoch": 1.1497861350980552, + "grad_norm": 0.7287492156028748, + "learning_rate": 3.850990482770141e-05, + "loss": 2.3884, + "step": 14247 + }, + { + "epoch": 1.149866838834638, + "grad_norm": 0.6648508310317993, + "learning_rate": 3.84974559777688e-05, + "loss": 2.4632, + "step": 14248 + }, + { + "epoch": 1.1499475425712211, + "grad_norm": 0.7387828230857849, + "learning_rate": 3.848500866064036e-05, + "loss": 2.4053, + "step": 14249 + }, + { + "epoch": 1.150028246307804, + "grad_norm": 0.7230356931686401, + "learning_rate": 3.847256287662635e-05, + "loss": 2.5128, + "step": 14250 + }, + { + "epoch": 1.150108950044387, + "grad_norm": 0.7209547162055969, + "learning_rate": 3.846011862603686e-05, + "loss": 2.4626, + "step": 14251 + }, + { + "epoch": 1.1501896537809702, + "grad_norm": 0.7177916765213013, + "learning_rate": 3.844767590918209e-05, + "loss": 2.4469, + "step": 14252 + }, + { + "epoch": 1.150270357517553, + "grad_norm": 0.7850151658058167, + "learning_rate": 3.843523472637216e-05, + "loss": 2.4731, + "step": 14253 + }, + { + "epoch": 1.150351061254136, + "grad_norm": 0.7051519155502319, + "learning_rate": 3.8422795077917084e-05, + "loss": 2.3696, + "step": 14254 + }, + { + "epoch": 1.150431764990719, + "grad_norm": 0.7434025406837463, + "learning_rate": 3.841035696412692e-05, + "loss": 2.444, + "step": 14255 + }, + { + "epoch": 1.150512468727302, + "grad_norm": 0.7404719591140747, + "learning_rate": 3.839792038531166e-05, + "loss": 2.4415, + "step": 14256 + }, + { + "epoch": 1.1505931724638851, + "grad_norm": 0.6883764266967773, + "learning_rate": 3.838548534178125e-05, + "loss": 2.4887, + "step": 14257 + }, + { + "epoch": 1.150673876200468, + "grad_norm": 0.6697155237197876, + "learning_rate": 3.83730518338456e-05, + "loss": 2.3721, + "step": 14258 + }, + { + "epoch": 1.150754579937051, + "grad_norm": 0.68825763463974, + "learning_rate": 3.836061986181459e-05, + "loss": 2.4712, + "step": 14259 + }, + { + "epoch": 1.1508352836736342, + "grad_norm": 0.6810611486434937, + "learning_rate": 3.8348189425998114e-05, + "loss": 2.3995, + "step": 14260 + }, + { + "epoch": 1.150915987410217, + "grad_norm": 0.6718329787254333, + "learning_rate": 3.8335760526705866e-05, + "loss": 2.4068, + "step": 14261 + }, + { + "epoch": 1.1509966911468001, + "grad_norm": 0.694618284702301, + "learning_rate": 3.832333316424767e-05, + "loss": 2.458, + "step": 14262 + }, + { + "epoch": 1.151077394883383, + "grad_norm": 0.6824250817298889, + "learning_rate": 3.8310907338933266e-05, + "loss": 2.4623, + "step": 14263 + }, + { + "epoch": 1.151158098619966, + "grad_norm": 0.6875178217887878, + "learning_rate": 3.8298483051072264e-05, + "loss": 2.4827, + "step": 14264 + }, + { + "epoch": 1.1512388023565492, + "grad_norm": 0.7868281602859497, + "learning_rate": 3.828606030097437e-05, + "loss": 2.4638, + "step": 14265 + }, + { + "epoch": 1.151319506093132, + "grad_norm": 0.7003639936447144, + "learning_rate": 3.8273639088949165e-05, + "loss": 2.4885, + "step": 14266 + }, + { + "epoch": 1.1514002098297151, + "grad_norm": 0.6965197920799255, + "learning_rate": 3.826121941530623e-05, + "loss": 2.3983, + "step": 14267 + }, + { + "epoch": 1.151480913566298, + "grad_norm": 0.7241101264953613, + "learning_rate": 3.824880128035509e-05, + "loss": 2.4598, + "step": 14268 + }, + { + "epoch": 1.151561617302881, + "grad_norm": 0.700764536857605, + "learning_rate": 3.823638468440528e-05, + "loss": 2.3627, + "step": 14269 + }, + { + "epoch": 1.1516423210394642, + "grad_norm": 0.6889846324920654, + "learning_rate": 3.822396962776619e-05, + "loss": 2.4442, + "step": 14270 + }, + { + "epoch": 1.151723024776047, + "grad_norm": 0.6660009026527405, + "learning_rate": 3.8211556110747245e-05, + "loss": 2.403, + "step": 14271 + }, + { + "epoch": 1.1518037285126301, + "grad_norm": 0.6537240743637085, + "learning_rate": 3.819914413365785e-05, + "loss": 2.4358, + "step": 14272 + }, + { + "epoch": 1.1518844322492132, + "grad_norm": 0.6852741837501526, + "learning_rate": 3.818673369680735e-05, + "loss": 2.4272, + "step": 14273 + }, + { + "epoch": 1.151965135985796, + "grad_norm": 0.701874852180481, + "learning_rate": 3.817432480050501e-05, + "loss": 2.4419, + "step": 14274 + }, + { + "epoch": 1.1520458397223792, + "grad_norm": 0.7089500427246094, + "learning_rate": 3.816191744506011e-05, + "loss": 2.4537, + "step": 14275 + }, + { + "epoch": 1.1521265434589623, + "grad_norm": 0.698564887046814, + "learning_rate": 3.8149511630781866e-05, + "loss": 2.3991, + "step": 14276 + }, + { + "epoch": 1.1522072471955451, + "grad_norm": 0.6940335035324097, + "learning_rate": 3.813710735797947e-05, + "loss": 2.5022, + "step": 14277 + }, + { + "epoch": 1.1522879509321282, + "grad_norm": 0.6916826367378235, + "learning_rate": 3.812470462696208e-05, + "loss": 2.4449, + "step": 14278 + }, + { + "epoch": 1.152368654668711, + "grad_norm": 0.7115256190299988, + "learning_rate": 3.811230343803882e-05, + "loss": 2.4371, + "step": 14279 + }, + { + "epoch": 1.1524493584052942, + "grad_norm": 0.6857369542121887, + "learning_rate": 3.80999037915187e-05, + "loss": 2.4426, + "step": 14280 + }, + { + "epoch": 1.1525300621418773, + "grad_norm": 0.7605363130569458, + "learning_rate": 3.808750568771079e-05, + "loss": 2.4999, + "step": 14281 + }, + { + "epoch": 1.1526107658784601, + "grad_norm": 0.6604358553886414, + "learning_rate": 3.8075109126924115e-05, + "loss": 2.419, + "step": 14282 + }, + { + "epoch": 1.1526914696150432, + "grad_norm": 0.6945412755012512, + "learning_rate": 3.806271410946756e-05, + "loss": 2.4555, + "step": 14283 + }, + { + "epoch": 1.152772173351626, + "grad_norm": 0.7205908894538879, + "learning_rate": 3.805032063565007e-05, + "loss": 2.4745, + "step": 14284 + }, + { + "epoch": 1.1528528770882092, + "grad_norm": 0.7198025584220886, + "learning_rate": 3.8037928705780554e-05, + "loss": 2.4358, + "step": 14285 + }, + { + "epoch": 1.1529335808247922, + "grad_norm": 0.7231044769287109, + "learning_rate": 3.802553832016781e-05, + "loss": 2.4713, + "step": 14286 + }, + { + "epoch": 1.1530142845613751, + "grad_norm": 0.6878815293312073, + "learning_rate": 3.80131494791206e-05, + "loss": 2.4479, + "step": 14287 + }, + { + "epoch": 1.1530949882979582, + "grad_norm": 0.6930533647537231, + "learning_rate": 3.800076218294779e-05, + "loss": 2.3912, + "step": 14288 + }, + { + "epoch": 1.1531756920345413, + "grad_norm": 0.703521192073822, + "learning_rate": 3.798837643195808e-05, + "loss": 2.451, + "step": 14289 + }, + { + "epoch": 1.1532563957711242, + "grad_norm": 0.7099746465682983, + "learning_rate": 3.79759922264601e-05, + "loss": 2.4957, + "step": 14290 + }, + { + "epoch": 1.1533370995077072, + "grad_norm": 0.7268218398094177, + "learning_rate": 3.7963609566762527e-05, + "loss": 2.4242, + "step": 14291 + }, + { + "epoch": 1.1534178032442903, + "grad_norm": 0.7465239763259888, + "learning_rate": 3.7951228453174004e-05, + "loss": 2.3867, + "step": 14292 + }, + { + "epoch": 1.1534985069808732, + "grad_norm": 0.704584002494812, + "learning_rate": 3.793884888600302e-05, + "loss": 2.5009, + "step": 14293 + }, + { + "epoch": 1.1535792107174563, + "grad_norm": 0.7057262063026428, + "learning_rate": 3.792647086555816e-05, + "loss": 2.4381, + "step": 14294 + }, + { + "epoch": 1.1536599144540391, + "grad_norm": 0.7045955061912537, + "learning_rate": 3.791409439214794e-05, + "loss": 2.4456, + "step": 14295 + }, + { + "epoch": 1.1537406181906222, + "grad_norm": 0.705476701259613, + "learning_rate": 3.790171946608074e-05, + "loss": 2.466, + "step": 14296 + }, + { + "epoch": 1.1538213219272053, + "grad_norm": 0.7128286957740784, + "learning_rate": 3.788934608766503e-05, + "loss": 2.4891, + "step": 14297 + }, + { + "epoch": 1.1539020256637882, + "grad_norm": 0.678144633769989, + "learning_rate": 3.787697425720918e-05, + "loss": 2.4453, + "step": 14298 + }, + { + "epoch": 1.1539827294003713, + "grad_norm": 0.754216730594635, + "learning_rate": 3.786460397502151e-05, + "loss": 2.4331, + "step": 14299 + }, + { + "epoch": 1.1540634331369541, + "grad_norm": 0.6881092190742493, + "learning_rate": 3.7852235241410325e-05, + "loss": 2.3692, + "step": 14300 + }, + { + "epoch": 1.1541441368735372, + "grad_norm": 0.7498507499694824, + "learning_rate": 3.783986805668395e-05, + "loss": 2.4556, + "step": 14301 + }, + { + "epoch": 1.1542248406101203, + "grad_norm": 0.6312216520309448, + "learning_rate": 3.7827502421150496e-05, + "loss": 2.4727, + "step": 14302 + }, + { + "epoch": 1.1543055443467032, + "grad_norm": 0.7156404256820679, + "learning_rate": 3.781513833511822e-05, + "loss": 2.4003, + "step": 14303 + }, + { + "epoch": 1.1543862480832863, + "grad_norm": 0.6589376926422119, + "learning_rate": 3.7802775798895226e-05, + "loss": 2.4461, + "step": 14304 + }, + { + "epoch": 1.1544669518198694, + "grad_norm": 0.7259865999221802, + "learning_rate": 3.77904148127897e-05, + "loss": 2.4021, + "step": 14305 + }, + { + "epoch": 1.1545476555564522, + "grad_norm": 0.7248456478118896, + "learning_rate": 3.777805537710961e-05, + "loss": 2.4784, + "step": 14306 + }, + { + "epoch": 1.1546283592930353, + "grad_norm": 0.7085593342781067, + "learning_rate": 3.7765697492163034e-05, + "loss": 2.4394, + "step": 14307 + }, + { + "epoch": 1.1547090630296182, + "grad_norm": 0.7394313216209412, + "learning_rate": 3.775334115825796e-05, + "loss": 2.5055, + "step": 14308 + }, + { + "epoch": 1.1547897667662013, + "grad_norm": 0.7231999039649963, + "learning_rate": 3.7740986375702336e-05, + "loss": 2.4551, + "step": 14309 + }, + { + "epoch": 1.1548704705027844, + "grad_norm": 0.6875953078269958, + "learning_rate": 3.7728633144804084e-05, + "loss": 2.4641, + "step": 14310 + }, + { + "epoch": 1.1549511742393672, + "grad_norm": 0.7477203607559204, + "learning_rate": 3.7716281465871094e-05, + "loss": 2.4929, + "step": 14311 + }, + { + "epoch": 1.1550318779759503, + "grad_norm": 0.6653971076011658, + "learning_rate": 3.770393133921115e-05, + "loss": 2.4819, + "step": 14312 + }, + { + "epoch": 1.1551125817125332, + "grad_norm": 0.7267318964004517, + "learning_rate": 3.769158276513209e-05, + "loss": 2.4568, + "step": 14313 + }, + { + "epoch": 1.1551932854491163, + "grad_norm": 0.6675654053688049, + "learning_rate": 3.76792357439417e-05, + "loss": 2.4789, + "step": 14314 + }, + { + "epoch": 1.1552739891856993, + "grad_norm": 0.6847487688064575, + "learning_rate": 3.7666890275947616e-05, + "loss": 2.4034, + "step": 14315 + }, + { + "epoch": 1.1553546929222822, + "grad_norm": 0.811553418636322, + "learning_rate": 3.765454636145758e-05, + "loss": 2.5051, + "step": 14316 + }, + { + "epoch": 1.1554353966588653, + "grad_norm": 0.690026581287384, + "learning_rate": 3.7642204000779204e-05, + "loss": 2.4477, + "step": 14317 + }, + { + "epoch": 1.1555161003954484, + "grad_norm": 0.695810079574585, + "learning_rate": 3.762986319422013e-05, + "loss": 2.4516, + "step": 14318 + }, + { + "epoch": 1.1555968041320313, + "grad_norm": 0.6869217753410339, + "learning_rate": 3.7617523942087886e-05, + "loss": 2.3802, + "step": 14319 + }, + { + "epoch": 1.1556775078686143, + "grad_norm": 0.7109078764915466, + "learning_rate": 3.7605186244690016e-05, + "loss": 2.4306, + "step": 14320 + }, + { + "epoch": 1.1557582116051974, + "grad_norm": 0.7385044693946838, + "learning_rate": 3.759285010233404e-05, + "loss": 2.4288, + "step": 14321 + }, + { + "epoch": 1.1558389153417803, + "grad_norm": 0.6775605082511902, + "learning_rate": 3.7580515515327355e-05, + "loss": 2.4155, + "step": 14322 + }, + { + "epoch": 1.1559196190783634, + "grad_norm": 0.7325694561004639, + "learning_rate": 3.7568182483977375e-05, + "loss": 2.5035, + "step": 14323 + }, + { + "epoch": 1.1560003228149462, + "grad_norm": 0.6896799206733704, + "learning_rate": 3.7555851008591526e-05, + "loss": 2.4739, + "step": 14324 + }, + { + "epoch": 1.1560810265515293, + "grad_norm": 0.7086506485939026, + "learning_rate": 3.7543521089477065e-05, + "loss": 2.4815, + "step": 14325 + }, + { + "epoch": 1.1561617302881124, + "grad_norm": 0.6886687874794006, + "learning_rate": 3.753119272694132e-05, + "loss": 2.4261, + "step": 14326 + }, + { + "epoch": 1.1562424340246953, + "grad_norm": 0.675136148929596, + "learning_rate": 3.751886592129155e-05, + "loss": 2.3946, + "step": 14327 + }, + { + "epoch": 1.1563231377612784, + "grad_norm": 0.706729531288147, + "learning_rate": 3.7506540672834964e-05, + "loss": 2.4199, + "step": 14328 + }, + { + "epoch": 1.1564038414978612, + "grad_norm": 0.6790904998779297, + "learning_rate": 3.749421698187875e-05, + "loss": 2.4419, + "step": 14329 + }, + { + "epoch": 1.1564845452344443, + "grad_norm": 0.6688171029090881, + "learning_rate": 3.748189484873007e-05, + "loss": 2.4516, + "step": 14330 + }, + { + "epoch": 1.1565652489710274, + "grad_norm": 0.6782420873641968, + "learning_rate": 3.746957427369596e-05, + "loss": 2.4586, + "step": 14331 + }, + { + "epoch": 1.1566459527076103, + "grad_norm": 0.7633399367332458, + "learning_rate": 3.7457255257083514e-05, + "loss": 2.3776, + "step": 14332 + }, + { + "epoch": 1.1567266564441934, + "grad_norm": 0.680000364780426, + "learning_rate": 3.744493779919976e-05, + "loss": 2.4978, + "step": 14333 + }, + { + "epoch": 1.1568073601807765, + "grad_norm": 0.6993350386619568, + "learning_rate": 3.743262190035171e-05, + "loss": 2.3974, + "step": 14334 + }, + { + "epoch": 1.1568880639173593, + "grad_norm": 0.7316375374794006, + "learning_rate": 3.7420307560846234e-05, + "loss": 2.4423, + "step": 14335 + }, + { + "epoch": 1.1569687676539424, + "grad_norm": 0.7384842038154602, + "learning_rate": 3.7407994780990285e-05, + "loss": 2.4604, + "step": 14336 + }, + { + "epoch": 1.1570494713905255, + "grad_norm": 0.6980708837509155, + "learning_rate": 3.739568356109072e-05, + "loss": 2.4408, + "step": 14337 + }, + { + "epoch": 1.1571301751271084, + "grad_norm": 0.6510182619094849, + "learning_rate": 3.738337390145438e-05, + "loss": 2.4076, + "step": 14338 + }, + { + "epoch": 1.1572108788636915, + "grad_norm": 0.7458614706993103, + "learning_rate": 3.737106580238804e-05, + "loss": 2.4976, + "step": 14339 + }, + { + "epoch": 1.1572915826002743, + "grad_norm": 0.6663469672203064, + "learning_rate": 3.735875926419849e-05, + "loss": 2.4414, + "step": 14340 + }, + { + "epoch": 1.1573722863368574, + "grad_norm": 0.6611858606338501, + "learning_rate": 3.7346454287192355e-05, + "loss": 2.3783, + "step": 14341 + }, + { + "epoch": 1.1574529900734405, + "grad_norm": 0.6605291366577148, + "learning_rate": 3.7334150871676364e-05, + "loss": 2.4291, + "step": 14342 + }, + { + "epoch": 1.1575336938100234, + "grad_norm": 0.6879985928535461, + "learning_rate": 3.7321849017957186e-05, + "loss": 2.4229, + "step": 14343 + }, + { + "epoch": 1.1576143975466064, + "grad_norm": 0.7466493844985962, + "learning_rate": 3.7309548726341334e-05, + "loss": 2.4278, + "step": 14344 + }, + { + "epoch": 1.1576951012831893, + "grad_norm": 0.7476457357406616, + "learning_rate": 3.72972499971354e-05, + "loss": 2.4944, + "step": 14345 + }, + { + "epoch": 1.1577758050197724, + "grad_norm": 0.6339364647865295, + "learning_rate": 3.728495283064594e-05, + "loss": 2.3753, + "step": 14346 + }, + { + "epoch": 1.1578565087563555, + "grad_norm": 0.6885230541229248, + "learning_rate": 3.7272657227179355e-05, + "loss": 2.4519, + "step": 14347 + }, + { + "epoch": 1.1579372124929384, + "grad_norm": 0.7561741471290588, + "learning_rate": 3.7260363187042126e-05, + "loss": 2.4808, + "step": 14348 + }, + { + "epoch": 1.1580179162295214, + "grad_norm": 0.8007705211639404, + "learning_rate": 3.724807071054062e-05, + "loss": 2.4649, + "step": 14349 + }, + { + "epoch": 1.1580986199661045, + "grad_norm": 0.6920937895774841, + "learning_rate": 3.72357797979813e-05, + "loss": 2.4145, + "step": 14350 + }, + { + "epoch": 1.1581793237026874, + "grad_norm": 0.7310675978660583, + "learning_rate": 3.7223490449670364e-05, + "loss": 2.4475, + "step": 14351 + }, + { + "epoch": 1.1582600274392705, + "grad_norm": 0.6600463390350342, + "learning_rate": 3.7211202665914155e-05, + "loss": 2.3938, + "step": 14352 + }, + { + "epoch": 1.1583407311758536, + "grad_norm": 0.690258800983429, + "learning_rate": 3.719891644701894e-05, + "loss": 2.3944, + "step": 14353 + }, + { + "epoch": 1.1584214349124364, + "grad_norm": 0.7075135111808777, + "learning_rate": 3.718663179329085e-05, + "loss": 2.3931, + "step": 14354 + }, + { + "epoch": 1.1585021386490195, + "grad_norm": 0.7416332960128784, + "learning_rate": 3.71743487050361e-05, + "loss": 2.4566, + "step": 14355 + }, + { + "epoch": 1.1585828423856024, + "grad_norm": 0.7459710836410522, + "learning_rate": 3.7162067182560846e-05, + "loss": 2.4232, + "step": 14356 + }, + { + "epoch": 1.1586635461221855, + "grad_norm": 0.7265400886535645, + "learning_rate": 3.71497872261711e-05, + "loss": 2.4798, + "step": 14357 + }, + { + "epoch": 1.1587442498587683, + "grad_norm": 0.7142636775970459, + "learning_rate": 3.713750883617294e-05, + "loss": 2.4576, + "step": 14358 + }, + { + "epoch": 1.1588249535953514, + "grad_norm": 0.7279871702194214, + "learning_rate": 3.712523201287239e-05, + "loss": 2.439, + "step": 14359 + }, + { + "epoch": 1.1589056573319345, + "grad_norm": 0.7151274681091309, + "learning_rate": 3.7112956756575414e-05, + "loss": 2.4684, + "step": 14360 + }, + { + "epoch": 1.1589863610685174, + "grad_norm": 0.7142657041549683, + "learning_rate": 3.7100683067587946e-05, + "loss": 2.4582, + "step": 14361 + }, + { + "epoch": 1.1590670648051005, + "grad_norm": 0.7716035842895508, + "learning_rate": 3.7088410946215914e-05, + "loss": 2.5038, + "step": 14362 + }, + { + "epoch": 1.1591477685416836, + "grad_norm": 0.7232338190078735, + "learning_rate": 3.707614039276509e-05, + "loss": 2.4558, + "step": 14363 + }, + { + "epoch": 1.1592284722782664, + "grad_norm": 0.7388719916343689, + "learning_rate": 3.706387140754134e-05, + "loss": 2.4535, + "step": 14364 + }, + { + "epoch": 1.1593091760148495, + "grad_norm": 0.7022652626037598, + "learning_rate": 3.7051603990850425e-05, + "loss": 2.4479, + "step": 14365 + }, + { + "epoch": 1.1593898797514326, + "grad_norm": 0.7861798405647278, + "learning_rate": 3.703933814299813e-05, + "loss": 2.4219, + "step": 14366 + }, + { + "epoch": 1.1594705834880155, + "grad_norm": 0.6928723454475403, + "learning_rate": 3.7027073864290074e-05, + "loss": 2.4401, + "step": 14367 + }, + { + "epoch": 1.1595512872245985, + "grad_norm": 0.6312821507453918, + "learning_rate": 3.701481115503194e-05, + "loss": 2.3975, + "step": 14368 + }, + { + "epoch": 1.1596319909611814, + "grad_norm": 0.7008257508277893, + "learning_rate": 3.700255001552937e-05, + "loss": 2.4988, + "step": 14369 + }, + { + "epoch": 1.1597126946977645, + "grad_norm": 0.6664693355560303, + "learning_rate": 3.699029044608792e-05, + "loss": 2.4123, + "step": 14370 + }, + { + "epoch": 1.1597933984343476, + "grad_norm": 0.6613842844963074, + "learning_rate": 3.6978032447013145e-05, + "loss": 2.4802, + "step": 14371 + }, + { + "epoch": 1.1598741021709305, + "grad_norm": 0.707788348197937, + "learning_rate": 3.696577601861057e-05, + "loss": 2.4432, + "step": 14372 + }, + { + "epoch": 1.1599548059075135, + "grad_norm": 0.6547604203224182, + "learning_rate": 3.695352116118561e-05, + "loss": 2.412, + "step": 14373 + }, + { + "epoch": 1.1600355096440964, + "grad_norm": 0.7238109707832336, + "learning_rate": 3.69412678750437e-05, + "loss": 2.4858, + "step": 14374 + }, + { + "epoch": 1.1601162133806795, + "grad_norm": 0.8156580328941345, + "learning_rate": 3.692901616049026e-05, + "loss": 2.4063, + "step": 14375 + }, + { + "epoch": 1.1601969171172626, + "grad_norm": 0.7035481333732605, + "learning_rate": 3.6916766017830585e-05, + "loss": 2.4586, + "step": 14376 + }, + { + "epoch": 1.1602776208538454, + "grad_norm": 0.7523401379585266, + "learning_rate": 3.690451744736999e-05, + "loss": 2.4262, + "step": 14377 + }, + { + "epoch": 1.1603583245904285, + "grad_norm": 0.6740732192993164, + "learning_rate": 3.689227044941376e-05, + "loss": 2.5215, + "step": 14378 + }, + { + "epoch": 1.1604390283270116, + "grad_norm": 0.6502695083618164, + "learning_rate": 3.6880025024267115e-05, + "loss": 2.4292, + "step": 14379 + }, + { + "epoch": 1.1605197320635945, + "grad_norm": 0.7000409364700317, + "learning_rate": 3.686778117223524e-05, + "loss": 2.4323, + "step": 14380 + }, + { + "epoch": 1.1606004358001776, + "grad_norm": 0.7415478229522705, + "learning_rate": 3.68555388936233e-05, + "loss": 2.4515, + "step": 14381 + }, + { + "epoch": 1.1606811395367607, + "grad_norm": 0.6890547871589661, + "learning_rate": 3.684329818873641e-05, + "loss": 2.4115, + "step": 14382 + }, + { + "epoch": 1.1607618432733435, + "grad_norm": 0.8238685727119446, + "learning_rate": 3.68310590578796e-05, + "loss": 2.4666, + "step": 14383 + }, + { + "epoch": 1.1608425470099266, + "grad_norm": 0.8098889589309692, + "learning_rate": 3.681882150135791e-05, + "loss": 2.4667, + "step": 14384 + }, + { + "epoch": 1.1609232507465095, + "grad_norm": 0.6932713985443115, + "learning_rate": 3.680658551947639e-05, + "loss": 2.4574, + "step": 14385 + }, + { + "epoch": 1.1610039544830926, + "grad_norm": 0.7062943577766418, + "learning_rate": 3.6794351112539915e-05, + "loss": 2.4408, + "step": 14386 + }, + { + "epoch": 1.1610846582196757, + "grad_norm": 0.7859255075454712, + "learning_rate": 3.678211828085343e-05, + "loss": 2.3946, + "step": 14387 + }, + { + "epoch": 1.1611653619562585, + "grad_norm": 0.674609899520874, + "learning_rate": 3.676988702472181e-05, + "loss": 2.4456, + "step": 14388 + }, + { + "epoch": 1.1612460656928416, + "grad_norm": 0.7068402171134949, + "learning_rate": 3.675765734444989e-05, + "loss": 2.4393, + "step": 14389 + }, + { + "epoch": 1.1613267694294245, + "grad_norm": 0.7276526689529419, + "learning_rate": 3.674542924034246e-05, + "loss": 2.456, + "step": 14390 + }, + { + "epoch": 1.1614074731660076, + "grad_norm": 0.7670585513114929, + "learning_rate": 3.673320271270433e-05, + "loss": 2.3774, + "step": 14391 + }, + { + "epoch": 1.1614881769025907, + "grad_norm": 0.702173113822937, + "learning_rate": 3.672097776184013e-05, + "loss": 2.3974, + "step": 14392 + }, + { + "epoch": 1.1615688806391735, + "grad_norm": 0.6922066807746887, + "learning_rate": 3.670875438805457e-05, + "loss": 2.4035, + "step": 14393 + }, + { + "epoch": 1.1616495843757566, + "grad_norm": 0.6675707697868347, + "learning_rate": 3.6696532591652335e-05, + "loss": 2.4369, + "step": 14394 + }, + { + "epoch": 1.1617302881123397, + "grad_norm": 0.6939712762832642, + "learning_rate": 3.668431237293796e-05, + "loss": 2.4265, + "step": 14395 + }, + { + "epoch": 1.1618109918489226, + "grad_norm": 0.719510018825531, + "learning_rate": 3.667209373221602e-05, + "loss": 2.4686, + "step": 14396 + }, + { + "epoch": 1.1618916955855056, + "grad_norm": 0.7167489528656006, + "learning_rate": 3.665987666979104e-05, + "loss": 2.5077, + "step": 14397 + }, + { + "epoch": 1.1619723993220887, + "grad_norm": 0.6539514064788818, + "learning_rate": 3.664766118596754e-05, + "loss": 2.4476, + "step": 14398 + }, + { + "epoch": 1.1620531030586716, + "grad_norm": 0.6926440596580505, + "learning_rate": 3.6635447281049876e-05, + "loss": 2.4336, + "step": 14399 + }, + { + "epoch": 1.1621338067952547, + "grad_norm": 0.7124993205070496, + "learning_rate": 3.662323495534252e-05, + "loss": 2.3938, + "step": 14400 + }, + { + "epoch": 1.1622145105318376, + "grad_norm": 0.7073954939842224, + "learning_rate": 3.661102420914986e-05, + "loss": 2.4232, + "step": 14401 + }, + { + "epoch": 1.1622952142684206, + "grad_norm": 0.7491076588630676, + "learning_rate": 3.659881504277613e-05, + "loss": 2.5047, + "step": 14402 + }, + { + "epoch": 1.1623759180050037, + "grad_norm": 0.6698675155639648, + "learning_rate": 3.658660745652568e-05, + "loss": 2.4164, + "step": 14403 + }, + { + "epoch": 1.1624566217415866, + "grad_norm": 0.6576815843582153, + "learning_rate": 3.657440145070276e-05, + "loss": 2.4368, + "step": 14404 + }, + { + "epoch": 1.1625373254781697, + "grad_norm": 0.8236953020095825, + "learning_rate": 3.6562197025611524e-05, + "loss": 2.5041, + "step": 14405 + }, + { + "epoch": 1.1626180292147525, + "grad_norm": 0.7391532063484192, + "learning_rate": 3.6549994181556157e-05, + "loss": 2.4556, + "step": 14406 + }, + { + "epoch": 1.1626987329513356, + "grad_norm": 0.6529936790466309, + "learning_rate": 3.653779291884084e-05, + "loss": 2.4559, + "step": 14407 + }, + { + "epoch": 1.1627794366879187, + "grad_norm": 0.7101796269416809, + "learning_rate": 3.652559323776957e-05, + "loss": 2.3937, + "step": 14408 + }, + { + "epoch": 1.1628601404245016, + "grad_norm": 0.6890308260917664, + "learning_rate": 3.651339513864645e-05, + "loss": 2.4694, + "step": 14409 + }, + { + "epoch": 1.1629408441610847, + "grad_norm": 0.6919918060302734, + "learning_rate": 3.650119862177548e-05, + "loss": 2.4793, + "step": 14410 + }, + { + "epoch": 1.1630215478976678, + "grad_norm": 0.6553575992584229, + "learning_rate": 3.6489003687460624e-05, + "loss": 2.454, + "step": 14411 + }, + { + "epoch": 1.1631022516342506, + "grad_norm": 0.7095460891723633, + "learning_rate": 3.6476810336005804e-05, + "loss": 2.4672, + "step": 14412 + }, + { + "epoch": 1.1631829553708337, + "grad_norm": 0.738301694393158, + "learning_rate": 3.6464618567714935e-05, + "loss": 2.4369, + "step": 14413 + }, + { + "epoch": 1.1632636591074166, + "grad_norm": 0.7574542760848999, + "learning_rate": 3.645242838289189e-05, + "loss": 2.4981, + "step": 14414 + }, + { + "epoch": 1.1633443628439997, + "grad_norm": 0.6780585646629333, + "learning_rate": 3.64402397818404e-05, + "loss": 2.4811, + "step": 14415 + }, + { + "epoch": 1.1634250665805828, + "grad_norm": 0.7050060629844666, + "learning_rate": 3.6428052764864287e-05, + "loss": 2.4607, + "step": 14416 + }, + { + "epoch": 1.1635057703171656, + "grad_norm": 0.6946923136711121, + "learning_rate": 3.6415867332267316e-05, + "loss": 2.4482, + "step": 14417 + }, + { + "epoch": 1.1635864740537487, + "grad_norm": 0.7202015519142151, + "learning_rate": 3.64036834843531e-05, + "loss": 2.4764, + "step": 14418 + }, + { + "epoch": 1.1636671777903316, + "grad_norm": 0.7845996618270874, + "learning_rate": 3.639150122142534e-05, + "loss": 2.4926, + "step": 14419 + }, + { + "epoch": 1.1637478815269147, + "grad_norm": 0.6924630403518677, + "learning_rate": 3.6379320543787645e-05, + "loss": 2.4664, + "step": 14420 + }, + { + "epoch": 1.1638285852634978, + "grad_norm": 0.7225920557975769, + "learning_rate": 3.636714145174358e-05, + "loss": 2.4638, + "step": 14421 + }, + { + "epoch": 1.1639092890000806, + "grad_norm": 0.6587103605270386, + "learning_rate": 3.63549639455967e-05, + "loss": 2.3629, + "step": 14422 + }, + { + "epoch": 1.1639899927366637, + "grad_norm": 0.7537658214569092, + "learning_rate": 3.634278802565051e-05, + "loss": 2.4971, + "step": 14423 + }, + { + "epoch": 1.1640706964732468, + "grad_norm": 0.6881381273269653, + "learning_rate": 3.633061369220841e-05, + "loss": 2.3737, + "step": 14424 + }, + { + "epoch": 1.1641514002098297, + "grad_norm": 0.693779468536377, + "learning_rate": 3.6318440945573864e-05, + "loss": 2.4346, + "step": 14425 + }, + { + "epoch": 1.1642321039464127, + "grad_norm": 0.777563750743866, + "learning_rate": 3.6306269786050265e-05, + "loss": 2.4288, + "step": 14426 + }, + { + "epoch": 1.1643128076829958, + "grad_norm": 0.6786738634109497, + "learning_rate": 3.629410021394087e-05, + "loss": 2.4094, + "step": 14427 + }, + { + "epoch": 1.1643935114195787, + "grad_norm": 0.7478442788124084, + "learning_rate": 3.628193222954904e-05, + "loss": 2.4163, + "step": 14428 + }, + { + "epoch": 1.1644742151561618, + "grad_norm": 0.6530766487121582, + "learning_rate": 3.626976583317803e-05, + "loss": 2.4328, + "step": 14429 + }, + { + "epoch": 1.1645549188927447, + "grad_norm": 0.6665371060371399, + "learning_rate": 3.6257601025131026e-05, + "loss": 2.4006, + "step": 14430 + }, + { + "epoch": 1.1646356226293277, + "grad_norm": 0.7184741497039795, + "learning_rate": 3.624543780571125e-05, + "loss": 2.462, + "step": 14431 + }, + { + "epoch": 1.1647163263659108, + "grad_norm": 0.7039462327957153, + "learning_rate": 3.6233276175221794e-05, + "loss": 2.4321, + "step": 14432 + }, + { + "epoch": 1.1647970301024937, + "grad_norm": 0.7039144039154053, + "learning_rate": 3.622111613396584e-05, + "loss": 2.4399, + "step": 14433 + }, + { + "epoch": 1.1648777338390768, + "grad_norm": 0.6690253615379333, + "learning_rate": 3.620895768224635e-05, + "loss": 2.3976, + "step": 14434 + }, + { + "epoch": 1.1649584375756596, + "grad_norm": 0.7048032879829407, + "learning_rate": 3.6196800820366384e-05, + "loss": 2.4848, + "step": 14435 + }, + { + "epoch": 1.1650391413122427, + "grad_norm": 0.668971836566925, + "learning_rate": 3.618464554862896e-05, + "loss": 2.4614, + "step": 14436 + }, + { + "epoch": 1.1651198450488258, + "grad_norm": 0.704858660697937, + "learning_rate": 3.617249186733695e-05, + "loss": 2.3962, + "step": 14437 + }, + { + "epoch": 1.1652005487854087, + "grad_norm": 0.692435085773468, + "learning_rate": 3.6160339776793296e-05, + "loss": 2.4059, + "step": 14438 + }, + { + "epoch": 1.1652812525219918, + "grad_norm": 0.6774182319641113, + "learning_rate": 3.614818927730085e-05, + "loss": 2.4975, + "step": 14439 + }, + { + "epoch": 1.1653619562585749, + "grad_norm": 0.6507411003112793, + "learning_rate": 3.613604036916243e-05, + "loss": 2.5029, + "step": 14440 + }, + { + "epoch": 1.1654426599951577, + "grad_norm": 0.7223206162452698, + "learning_rate": 3.612389305268084e-05, + "loss": 2.4599, + "step": 14441 + }, + { + "epoch": 1.1655233637317408, + "grad_norm": 0.6523364186286926, + "learning_rate": 3.611174732815883e-05, + "loss": 2.4521, + "step": 14442 + }, + { + "epoch": 1.165604067468324, + "grad_norm": 0.6668452024459839, + "learning_rate": 3.6099603195899046e-05, + "loss": 2.4082, + "step": 14443 + }, + { + "epoch": 1.1656847712049068, + "grad_norm": 0.6878299117088318, + "learning_rate": 3.60874606562042e-05, + "loss": 2.4144, + "step": 14444 + }, + { + "epoch": 1.1657654749414899, + "grad_norm": 0.6662277579307556, + "learning_rate": 3.6075319709376895e-05, + "loss": 2.438, + "step": 14445 + }, + { + "epoch": 1.1658461786780727, + "grad_norm": 0.721422553062439, + "learning_rate": 3.606318035571976e-05, + "loss": 2.4414, + "step": 14446 + }, + { + "epoch": 1.1659268824146558, + "grad_norm": 0.6739782691001892, + "learning_rate": 3.6051042595535264e-05, + "loss": 2.4093, + "step": 14447 + }, + { + "epoch": 1.166007586151239, + "grad_norm": 0.6890884637832642, + "learning_rate": 3.603890642912596e-05, + "loss": 2.4385, + "step": 14448 + }, + { + "epoch": 1.1660882898878218, + "grad_norm": 0.6503998637199402, + "learning_rate": 3.602677185679433e-05, + "loss": 2.4498, + "step": 14449 + }, + { + "epoch": 1.1661689936244048, + "grad_norm": 0.6748046875, + "learning_rate": 3.601463887884271e-05, + "loss": 2.3739, + "step": 14450 + }, + { + "epoch": 1.1662496973609877, + "grad_norm": 0.6843422651290894, + "learning_rate": 3.600250749557358e-05, + "loss": 2.4323, + "step": 14451 + }, + { + "epoch": 1.1663304010975708, + "grad_norm": 0.7061208486557007, + "learning_rate": 3.599037770728929e-05, + "loss": 2.4611, + "step": 14452 + }, + { + "epoch": 1.166411104834154, + "grad_norm": 0.6614537239074707, + "learning_rate": 3.597824951429208e-05, + "loss": 2.4656, + "step": 14453 + }, + { + "epoch": 1.1664918085707368, + "grad_norm": 0.6620328426361084, + "learning_rate": 3.596612291688424e-05, + "loss": 2.415, + "step": 14454 + }, + { + "epoch": 1.1665725123073198, + "grad_norm": 0.6936565041542053, + "learning_rate": 3.595399791536804e-05, + "loss": 2.4655, + "step": 14455 + }, + { + "epoch": 1.166653216043903, + "grad_norm": 0.6766063570976257, + "learning_rate": 3.594187451004559e-05, + "loss": 2.4628, + "step": 14456 + }, + { + "epoch": 1.1667339197804858, + "grad_norm": 0.6588734984397888, + "learning_rate": 3.592975270121909e-05, + "loss": 2.4503, + "step": 14457 + }, + { + "epoch": 1.1668146235170689, + "grad_norm": 0.7290894985198975, + "learning_rate": 3.591763248919062e-05, + "loss": 2.5075, + "step": 14458 + }, + { + "epoch": 1.1668953272536517, + "grad_norm": 0.6952784657478333, + "learning_rate": 3.590551387426231e-05, + "loss": 2.4258, + "step": 14459 + }, + { + "epoch": 1.1669760309902348, + "grad_norm": 0.6737042665481567, + "learning_rate": 3.5893396856736096e-05, + "loss": 2.4459, + "step": 14460 + }, + { + "epoch": 1.167056734726818, + "grad_norm": 0.6616976857185364, + "learning_rate": 3.588128143691397e-05, + "loss": 2.4726, + "step": 14461 + }, + { + "epoch": 1.1671374384634008, + "grad_norm": 0.7017171382904053, + "learning_rate": 3.5869167615098e-05, + "loss": 2.375, + "step": 14462 + }, + { + "epoch": 1.1672181421999839, + "grad_norm": 0.7153809666633606, + "learning_rate": 3.585705539158997e-05, + "loss": 2.4271, + "step": 14463 + }, + { + "epoch": 1.1672988459365667, + "grad_norm": 0.749196469783783, + "learning_rate": 3.584494476669179e-05, + "loss": 2.4713, + "step": 14464 + }, + { + "epoch": 1.1673795496731498, + "grad_norm": 0.6593676209449768, + "learning_rate": 3.583283574070533e-05, + "loss": 2.4276, + "step": 14465 + }, + { + "epoch": 1.167460253409733, + "grad_norm": 0.6949084401130676, + "learning_rate": 3.5820728313932295e-05, + "loss": 2.4128, + "step": 14466 + }, + { + "epoch": 1.1675409571463158, + "grad_norm": 0.6795482039451599, + "learning_rate": 3.5808622486674484e-05, + "loss": 2.485, + "step": 14467 + }, + { + "epoch": 1.1676216608828989, + "grad_norm": 0.6763483881950378, + "learning_rate": 3.5796518259233625e-05, + "loss": 2.4063, + "step": 14468 + }, + { + "epoch": 1.167702364619482, + "grad_norm": 0.665687620639801, + "learning_rate": 3.578441563191133e-05, + "loss": 2.437, + "step": 14469 + }, + { + "epoch": 1.1677830683560648, + "grad_norm": 0.6338435411453247, + "learning_rate": 3.577231460500926e-05, + "loss": 2.3747, + "step": 14470 + }, + { + "epoch": 1.167863772092648, + "grad_norm": 0.7031865119934082, + "learning_rate": 3.5760215178829e-05, + "loss": 2.3952, + "step": 14471 + }, + { + "epoch": 1.167944475829231, + "grad_norm": 0.7544599771499634, + "learning_rate": 3.5748117353672106e-05, + "loss": 2.3941, + "step": 14472 + }, + { + "epoch": 1.1680251795658139, + "grad_norm": 0.7271532416343689, + "learning_rate": 3.5736021129840083e-05, + "loss": 2.4371, + "step": 14473 + }, + { + "epoch": 1.168105883302397, + "grad_norm": 0.709048867225647, + "learning_rate": 3.572392650763441e-05, + "loss": 2.482, + "step": 14474 + }, + { + "epoch": 1.1681865870389798, + "grad_norm": 0.6894589066505432, + "learning_rate": 3.571183348735653e-05, + "loss": 2.4347, + "step": 14475 + }, + { + "epoch": 1.168267290775563, + "grad_norm": 0.6680620908737183, + "learning_rate": 3.5699742069307774e-05, + "loss": 2.3995, + "step": 14476 + }, + { + "epoch": 1.168347994512146, + "grad_norm": 0.701669454574585, + "learning_rate": 3.568765225378954e-05, + "loss": 2.4045, + "step": 14477 + }, + { + "epoch": 1.1684286982487289, + "grad_norm": 0.7102392911911011, + "learning_rate": 3.567556404110315e-05, + "loss": 2.4695, + "step": 14478 + }, + { + "epoch": 1.168509401985312, + "grad_norm": 0.6820430755615234, + "learning_rate": 3.566347743154982e-05, + "loss": 2.4155, + "step": 14479 + }, + { + "epoch": 1.1685901057218948, + "grad_norm": 0.6611022353172302, + "learning_rate": 3.565139242543081e-05, + "loss": 2.3992, + "step": 14480 + }, + { + "epoch": 1.168670809458478, + "grad_norm": 0.6844382882118225, + "learning_rate": 3.5639309023047306e-05, + "loss": 2.4345, + "step": 14481 + }, + { + "epoch": 1.168751513195061, + "grad_norm": 0.7557988166809082, + "learning_rate": 3.5627227224700464e-05, + "loss": 2.4454, + "step": 14482 + }, + { + "epoch": 1.1688322169316439, + "grad_norm": 0.6652555465698242, + "learning_rate": 3.5615147030691384e-05, + "loss": 2.3749, + "step": 14483 + }, + { + "epoch": 1.168912920668227, + "grad_norm": 0.6912989020347595, + "learning_rate": 3.56030684413212e-05, + "loss": 2.4737, + "step": 14484 + }, + { + "epoch": 1.16899362440481, + "grad_norm": 0.735103964805603, + "learning_rate": 3.559099145689083e-05, + "loss": 2.4098, + "step": 14485 + }, + { + "epoch": 1.169074328141393, + "grad_norm": 0.6873028874397278, + "learning_rate": 3.557891607770133e-05, + "loss": 2.4247, + "step": 14486 + }, + { + "epoch": 1.169155031877976, + "grad_norm": 0.7364680171012878, + "learning_rate": 3.556684230405367e-05, + "loss": 2.4314, + "step": 14487 + }, + { + "epoch": 1.169235735614559, + "grad_norm": 0.679122269153595, + "learning_rate": 3.55547701362487e-05, + "loss": 2.4196, + "step": 14488 + }, + { + "epoch": 1.169316439351142, + "grad_norm": 0.6783872246742249, + "learning_rate": 3.554269957458731e-05, + "loss": 2.4212, + "step": 14489 + }, + { + "epoch": 1.169397143087725, + "grad_norm": 0.7434942126274109, + "learning_rate": 3.553063061937034e-05, + "loss": 2.4139, + "step": 14490 + }, + { + "epoch": 1.1694778468243079, + "grad_norm": 0.6799852252006531, + "learning_rate": 3.55185632708986e-05, + "loss": 2.4252, + "step": 14491 + }, + { + "epoch": 1.169558550560891, + "grad_norm": 0.7040107250213623, + "learning_rate": 3.5506497529472795e-05, + "loss": 2.3937, + "step": 14492 + }, + { + "epoch": 1.169639254297474, + "grad_norm": 0.7350315451622009, + "learning_rate": 3.549443339539368e-05, + "loss": 2.4063, + "step": 14493 + }, + { + "epoch": 1.169719958034057, + "grad_norm": 0.694521963596344, + "learning_rate": 3.548237086896192e-05, + "loss": 2.4715, + "step": 14494 + }, + { + "epoch": 1.16980066177064, + "grad_norm": 0.6648221015930176, + "learning_rate": 3.5470309950478096e-05, + "loss": 2.4365, + "step": 14495 + }, + { + "epoch": 1.1698813655072229, + "grad_norm": 0.688024640083313, + "learning_rate": 3.545825064024284e-05, + "loss": 2.449, + "step": 14496 + }, + { + "epoch": 1.169962069243806, + "grad_norm": 0.6743311882019043, + "learning_rate": 3.544619293855672e-05, + "loss": 2.4283, + "step": 14497 + }, + { + "epoch": 1.170042772980389, + "grad_norm": 0.669119119644165, + "learning_rate": 3.543413684572019e-05, + "loss": 2.4363, + "step": 14498 + }, + { + "epoch": 1.170123476716972, + "grad_norm": 0.6998667120933533, + "learning_rate": 3.5422082362033745e-05, + "loss": 2.425, + "step": 14499 + }, + { + "epoch": 1.170204180453555, + "grad_norm": 0.7681630253791809, + "learning_rate": 3.5410029487797845e-05, + "loss": 2.4382, + "step": 14500 + }, + { + "epoch": 1.170284884190138, + "grad_norm": 0.6925049424171448, + "learning_rate": 3.539797822331279e-05, + "loss": 2.4261, + "step": 14501 + }, + { + "epoch": 1.170365587926721, + "grad_norm": 0.7145542502403259, + "learning_rate": 3.538592856887901e-05, + "loss": 2.4681, + "step": 14502 + }, + { + "epoch": 1.170446291663304, + "grad_norm": 0.6441611647605896, + "learning_rate": 3.537388052479684e-05, + "loss": 2.4187, + "step": 14503 + }, + { + "epoch": 1.1705269953998871, + "grad_norm": 0.6622560620307922, + "learning_rate": 3.5361834091366466e-05, + "loss": 2.4615, + "step": 14504 + }, + { + "epoch": 1.17060769913647, + "grad_norm": 0.6987677812576294, + "learning_rate": 3.5349789268888144e-05, + "loss": 2.413, + "step": 14505 + }, + { + "epoch": 1.170688402873053, + "grad_norm": 0.668358325958252, + "learning_rate": 3.533774605766207e-05, + "loss": 2.5146, + "step": 14506 + }, + { + "epoch": 1.170769106609636, + "grad_norm": 0.7514958381652832, + "learning_rate": 3.532570445798844e-05, + "loss": 2.4474, + "step": 14507 + }, + { + "epoch": 1.170849810346219, + "grad_norm": 0.6454465389251709, + "learning_rate": 3.5313664470167276e-05, + "loss": 2.3911, + "step": 14508 + }, + { + "epoch": 1.170930514082802, + "grad_norm": 0.6653602719306946, + "learning_rate": 3.5301626094498674e-05, + "loss": 2.4223, + "step": 14509 + }, + { + "epoch": 1.171011217819385, + "grad_norm": 0.6782815456390381, + "learning_rate": 3.5289589331282715e-05, + "loss": 2.457, + "step": 14510 + }, + { + "epoch": 1.171091921555968, + "grad_norm": 0.720973014831543, + "learning_rate": 3.527755418081932e-05, + "loss": 2.4541, + "step": 14511 + }, + { + "epoch": 1.171172625292551, + "grad_norm": 0.6300156712532043, + "learning_rate": 3.526552064340841e-05, + "loss": 2.4451, + "step": 14512 + }, + { + "epoch": 1.171253329029134, + "grad_norm": 0.7660964727401733, + "learning_rate": 3.5253488719350026e-05, + "loss": 2.5031, + "step": 14513 + }, + { + "epoch": 1.1713340327657171, + "grad_norm": 0.6931602358818054, + "learning_rate": 3.5241458408943905e-05, + "loss": 2.4249, + "step": 14514 + }, + { + "epoch": 1.1714147365023, + "grad_norm": 0.6863045692443848, + "learning_rate": 3.522942971248993e-05, + "loss": 2.4429, + "step": 14515 + }, + { + "epoch": 1.171495440238883, + "grad_norm": 0.6993531584739685, + "learning_rate": 3.521740263028791e-05, + "loss": 2.3864, + "step": 14516 + }, + { + "epoch": 1.1715761439754662, + "grad_norm": 0.807991087436676, + "learning_rate": 3.520537716263753e-05, + "loss": 2.459, + "step": 14517 + }, + { + "epoch": 1.171656847712049, + "grad_norm": 0.6722908020019531, + "learning_rate": 3.519335330983852e-05, + "loss": 2.4426, + "step": 14518 + }, + { + "epoch": 1.1717375514486321, + "grad_norm": 0.6934377551078796, + "learning_rate": 3.5181331072190585e-05, + "loss": 2.4326, + "step": 14519 + }, + { + "epoch": 1.171818255185215, + "grad_norm": 0.6532938480377197, + "learning_rate": 3.516931044999329e-05, + "loss": 2.3778, + "step": 14520 + }, + { + "epoch": 1.171898958921798, + "grad_norm": 0.6779183745384216, + "learning_rate": 3.5157291443546247e-05, + "loss": 2.4089, + "step": 14521 + }, + { + "epoch": 1.1719796626583812, + "grad_norm": 0.687005877494812, + "learning_rate": 3.514527405314899e-05, + "loss": 2.4669, + "step": 14522 + }, + { + "epoch": 1.172060366394964, + "grad_norm": 0.6804830431938171, + "learning_rate": 3.5133258279101045e-05, + "loss": 2.4789, + "step": 14523 + }, + { + "epoch": 1.1721410701315471, + "grad_norm": 0.8345538973808289, + "learning_rate": 3.512124412170187e-05, + "loss": 2.4506, + "step": 14524 + }, + { + "epoch": 1.17222177386813, + "grad_norm": 0.6571901440620422, + "learning_rate": 3.510923158125088e-05, + "loss": 2.4911, + "step": 14525 + }, + { + "epoch": 1.172302477604713, + "grad_norm": 0.6607047915458679, + "learning_rate": 3.5097220658047504e-05, + "loss": 2.4882, + "step": 14526 + }, + { + "epoch": 1.1723831813412962, + "grad_norm": 0.6883669495582581, + "learning_rate": 3.508521135239101e-05, + "loss": 2.4083, + "step": 14527 + }, + { + "epoch": 1.172463885077879, + "grad_norm": 0.6792941689491272, + "learning_rate": 3.5073203664580746e-05, + "loss": 2.368, + "step": 14528 + }, + { + "epoch": 1.172544588814462, + "grad_norm": 0.6675198674201965, + "learning_rate": 3.506119759491598e-05, + "loss": 2.4193, + "step": 14529 + }, + { + "epoch": 1.1726252925510452, + "grad_norm": 0.7267464399337769, + "learning_rate": 3.504919314369591e-05, + "loss": 2.3906, + "step": 14530 + }, + { + "epoch": 1.172705996287628, + "grad_norm": 0.6927710175514221, + "learning_rate": 3.503719031121973e-05, + "loss": 2.4082, + "step": 14531 + }, + { + "epoch": 1.1727867000242111, + "grad_norm": 0.7231000065803528, + "learning_rate": 3.502518909778656e-05, + "loss": 2.4845, + "step": 14532 + }, + { + "epoch": 1.1728674037607942, + "grad_norm": 0.7087520360946655, + "learning_rate": 3.5013189503695544e-05, + "loss": 2.4622, + "step": 14533 + }, + { + "epoch": 1.172948107497377, + "grad_norm": 0.6669846177101135, + "learning_rate": 3.5001191529245716e-05, + "loss": 2.4151, + "step": 14534 + }, + { + "epoch": 1.1730288112339602, + "grad_norm": 0.7338447570800781, + "learning_rate": 3.4989195174736134e-05, + "loss": 2.4274, + "step": 14535 + }, + { + "epoch": 1.173109514970543, + "grad_norm": 0.7032054662704468, + "learning_rate": 3.497720044046572e-05, + "loss": 2.4066, + "step": 14536 + }, + { + "epoch": 1.1731902187071261, + "grad_norm": 0.6571083068847656, + "learning_rate": 3.496520732673344e-05, + "loss": 2.4581, + "step": 14537 + }, + { + "epoch": 1.1732709224437092, + "grad_norm": 0.6618444919586182, + "learning_rate": 3.495321583383819e-05, + "loss": 2.3675, + "step": 14538 + }, + { + "epoch": 1.173351626180292, + "grad_norm": 0.6597652435302734, + "learning_rate": 3.4941225962078885e-05, + "loss": 2.416, + "step": 14539 + }, + { + "epoch": 1.1734323299168752, + "grad_norm": 0.682634711265564, + "learning_rate": 3.492923771175425e-05, + "loss": 2.5081, + "step": 14540 + }, + { + "epoch": 1.173513033653458, + "grad_norm": 0.7046132683753967, + "learning_rate": 3.49172510831631e-05, + "loss": 2.4439, + "step": 14541 + }, + { + "epoch": 1.1735937373900411, + "grad_norm": 0.6734833717346191, + "learning_rate": 3.4905266076604196e-05, + "loss": 2.4348, + "step": 14542 + }, + { + "epoch": 1.1736744411266242, + "grad_norm": 0.6624744534492493, + "learning_rate": 3.4893282692376214e-05, + "loss": 2.4364, + "step": 14543 + }, + { + "epoch": 1.173755144863207, + "grad_norm": 0.8425754308700562, + "learning_rate": 3.4881300930777815e-05, + "loss": 2.4803, + "step": 14544 + }, + { + "epoch": 1.1738358485997902, + "grad_norm": 0.6438888311386108, + "learning_rate": 3.486932079210766e-05, + "loss": 2.3973, + "step": 14545 + }, + { + "epoch": 1.1739165523363733, + "grad_norm": 0.650399923324585, + "learning_rate": 3.485734227666424e-05, + "loss": 2.4183, + "step": 14546 + }, + { + "epoch": 1.1739972560729561, + "grad_norm": 0.6857002973556519, + "learning_rate": 3.4845365384746144e-05, + "loss": 2.4061, + "step": 14547 + }, + { + "epoch": 1.1740779598095392, + "grad_norm": 0.6680994629859924, + "learning_rate": 3.483339011665189e-05, + "loss": 2.421, + "step": 14548 + }, + { + "epoch": 1.1741586635461223, + "grad_norm": 0.6440950632095337, + "learning_rate": 3.482141647267987e-05, + "loss": 2.3914, + "step": 14549 + }, + { + "epoch": 1.1742393672827052, + "grad_norm": 0.7329740524291992, + "learning_rate": 3.480944445312853e-05, + "loss": 2.4805, + "step": 14550 + }, + { + "epoch": 1.1743200710192883, + "grad_norm": 0.6848189234733582, + "learning_rate": 3.4797474058296245e-05, + "loss": 2.3611, + "step": 14551 + }, + { + "epoch": 1.1744007747558711, + "grad_norm": 0.6994072794914246, + "learning_rate": 3.478550528848134e-05, + "loss": 2.5106, + "step": 14552 + }, + { + "epoch": 1.1744814784924542, + "grad_norm": 0.6826444268226624, + "learning_rate": 3.477353814398212e-05, + "loss": 2.467, + "step": 14553 + }, + { + "epoch": 1.1745621822290373, + "grad_norm": 0.6658408045768738, + "learning_rate": 3.476157262509683e-05, + "loss": 2.423, + "step": 14554 + }, + { + "epoch": 1.1746428859656202, + "grad_norm": 0.6963697075843811, + "learning_rate": 3.474960873212372e-05, + "loss": 2.457, + "step": 14555 + }, + { + "epoch": 1.1747235897022033, + "grad_norm": 0.7574479579925537, + "learning_rate": 3.4737646465360894e-05, + "loss": 2.4292, + "step": 14556 + }, + { + "epoch": 1.1748042934387861, + "grad_norm": 0.7494931817054749, + "learning_rate": 3.472568582510652e-05, + "loss": 2.4395, + "step": 14557 + }, + { + "epoch": 1.1748849971753692, + "grad_norm": 0.7062687873840332, + "learning_rate": 3.471372681165872e-05, + "loss": 2.4561, + "step": 14558 + }, + { + "epoch": 1.1749657009119523, + "grad_norm": 0.6875349879264832, + "learning_rate": 3.4701769425315465e-05, + "loss": 2.4728, + "step": 14559 + }, + { + "epoch": 1.1750464046485352, + "grad_norm": 0.7009960412979126, + "learning_rate": 3.46898136663748e-05, + "loss": 2.5364, + "step": 14560 + }, + { + "epoch": 1.1751271083851182, + "grad_norm": 0.673791766166687, + "learning_rate": 3.467785953513475e-05, + "loss": 2.4611, + "step": 14561 + }, + { + "epoch": 1.1752078121217013, + "grad_norm": 0.7166882753372192, + "learning_rate": 3.4665907031893164e-05, + "loss": 2.4451, + "step": 14562 + }, + { + "epoch": 1.1752885158582842, + "grad_norm": 0.6868429780006409, + "learning_rate": 3.465395615694791e-05, + "loss": 2.4282, + "step": 14563 + }, + { + "epoch": 1.1753692195948673, + "grad_norm": 0.7212893962860107, + "learning_rate": 3.464200691059697e-05, + "loss": 2.4239, + "step": 14564 + }, + { + "epoch": 1.1754499233314502, + "grad_norm": 0.7213432192802429, + "learning_rate": 3.463005929313802e-05, + "loss": 2.4872, + "step": 14565 + }, + { + "epoch": 1.1755306270680332, + "grad_norm": 0.6805179119110107, + "learning_rate": 3.461811330486887e-05, + "loss": 2.4192, + "step": 14566 + }, + { + "epoch": 1.1756113308046163, + "grad_norm": 0.6746333241462708, + "learning_rate": 3.460616894608725e-05, + "loss": 2.3911, + "step": 14567 + }, + { + "epoch": 1.1756920345411992, + "grad_norm": 0.7388630509376526, + "learning_rate": 3.459422621709088e-05, + "loss": 2.4758, + "step": 14568 + }, + { + "epoch": 1.1757727382777823, + "grad_norm": 0.7730274200439453, + "learning_rate": 3.458228511817731e-05, + "loss": 2.4159, + "step": 14569 + }, + { + "epoch": 1.1758534420143651, + "grad_norm": 0.721075177192688, + "learning_rate": 3.457034564964422e-05, + "loss": 2.4673, + "step": 14570 + }, + { + "epoch": 1.1759341457509482, + "grad_norm": 0.6647645235061646, + "learning_rate": 3.4558407811789184e-05, + "loss": 2.395, + "step": 14571 + }, + { + "epoch": 1.1760148494875313, + "grad_norm": 0.7155466675758362, + "learning_rate": 3.454647160490965e-05, + "loss": 2.503, + "step": 14572 + }, + { + "epoch": 1.1760955532241142, + "grad_norm": 0.6789268851280212, + "learning_rate": 3.453453702930314e-05, + "loss": 2.401, + "step": 14573 + }, + { + "epoch": 1.1761762569606973, + "grad_norm": 0.7488093376159668, + "learning_rate": 3.4522604085267105e-05, + "loss": 2.4434, + "step": 14574 + }, + { + "epoch": 1.1762569606972804, + "grad_norm": 0.7954889535903931, + "learning_rate": 3.451067277309893e-05, + "loss": 2.5302, + "step": 14575 + }, + { + "epoch": 1.1763376644338632, + "grad_norm": 0.7008484601974487, + "learning_rate": 3.4498743093095975e-05, + "loss": 2.3935, + "step": 14576 + }, + { + "epoch": 1.1764183681704463, + "grad_norm": 0.6725437641143799, + "learning_rate": 3.448681504555561e-05, + "loss": 2.399, + "step": 14577 + }, + { + "epoch": 1.1764990719070294, + "grad_norm": 0.6778931617736816, + "learning_rate": 3.4474888630775026e-05, + "loss": 2.4178, + "step": 14578 + }, + { + "epoch": 1.1765797756436123, + "grad_norm": 0.7043762803077698, + "learning_rate": 3.44629638490515e-05, + "loss": 2.5581, + "step": 14579 + }, + { + "epoch": 1.1766604793801954, + "grad_norm": 0.6848085522651672, + "learning_rate": 3.445104070068227e-05, + "loss": 2.436, + "step": 14580 + }, + { + "epoch": 1.1767411831167782, + "grad_norm": 0.7504082322120667, + "learning_rate": 3.443911918596441e-05, + "loss": 2.4138, + "step": 14581 + }, + { + "epoch": 1.1768218868533613, + "grad_norm": 0.7441161870956421, + "learning_rate": 3.442719930519508e-05, + "loss": 2.4333, + "step": 14582 + }, + { + "epoch": 1.1769025905899444, + "grad_norm": 0.663894772529602, + "learning_rate": 3.4415281058671354e-05, + "loss": 2.4672, + "step": 14583 + }, + { + "epoch": 1.1769832943265273, + "grad_norm": 0.6814345121383667, + "learning_rate": 3.440336444669027e-05, + "loss": 2.4196, + "step": 14584 + }, + { + "epoch": 1.1770639980631104, + "grad_norm": 0.7566598057746887, + "learning_rate": 3.439144946954881e-05, + "loss": 2.4586, + "step": 14585 + }, + { + "epoch": 1.1771447017996932, + "grad_norm": 0.7324996590614319, + "learning_rate": 3.4379536127543934e-05, + "loss": 2.4286, + "step": 14586 + }, + { + "epoch": 1.1772254055362763, + "grad_norm": 0.6632608771324158, + "learning_rate": 3.436762442097259e-05, + "loss": 2.4713, + "step": 14587 + }, + { + "epoch": 1.1773061092728594, + "grad_norm": 0.7246156930923462, + "learning_rate": 3.4355714350131564e-05, + "loss": 2.4374, + "step": 14588 + }, + { + "epoch": 1.1773868130094423, + "grad_norm": 0.7096351981163025, + "learning_rate": 3.4343805915317737e-05, + "loss": 2.4649, + "step": 14589 + }, + { + "epoch": 1.1774675167460253, + "grad_norm": 0.7090620398521423, + "learning_rate": 3.433189911682793e-05, + "loss": 2.396, + "step": 14590 + }, + { + "epoch": 1.1775482204826084, + "grad_norm": 0.7782440185546875, + "learning_rate": 3.431999395495882e-05, + "loss": 2.4506, + "step": 14591 + }, + { + "epoch": 1.1776289242191913, + "grad_norm": 0.6933457851409912, + "learning_rate": 3.4308090430007155e-05, + "loss": 2.3985, + "step": 14592 + }, + { + "epoch": 1.1777096279557744, + "grad_norm": 0.6935414671897888, + "learning_rate": 3.429618854226959e-05, + "loss": 2.4372, + "step": 14593 + }, + { + "epoch": 1.1777903316923575, + "grad_norm": 0.6971156597137451, + "learning_rate": 3.428428829204276e-05, + "loss": 2.4837, + "step": 14594 + }, + { + "epoch": 1.1778710354289403, + "grad_norm": 0.6460022926330566, + "learning_rate": 3.427238967962325e-05, + "loss": 2.3742, + "step": 14595 + }, + { + "epoch": 1.1779517391655234, + "grad_norm": 0.6941941976547241, + "learning_rate": 3.426049270530763e-05, + "loss": 2.4706, + "step": 14596 + }, + { + "epoch": 1.1780324429021063, + "grad_norm": 0.7062166333198547, + "learning_rate": 3.424859736939236e-05, + "loss": 2.3893, + "step": 14597 + }, + { + "epoch": 1.1781131466386894, + "grad_norm": 0.6586433053016663, + "learning_rate": 3.42367036721739e-05, + "loss": 2.4385, + "step": 14598 + }, + { + "epoch": 1.1781938503752725, + "grad_norm": 0.6781242489814758, + "learning_rate": 3.422481161394869e-05, + "loss": 2.3876, + "step": 14599 + }, + { + "epoch": 1.1782745541118553, + "grad_norm": 0.710127592086792, + "learning_rate": 3.421292119501316e-05, + "loss": 2.4067, + "step": 14600 + }, + { + "epoch": 1.1783552578484384, + "grad_norm": 0.6856096982955933, + "learning_rate": 3.420103241566357e-05, + "loss": 2.4855, + "step": 14601 + }, + { + "epoch": 1.1784359615850213, + "grad_norm": 0.7173380851745605, + "learning_rate": 3.4189145276196245e-05, + "loss": 2.4871, + "step": 14602 + }, + { + "epoch": 1.1785166653216044, + "grad_norm": 0.6895382404327393, + "learning_rate": 3.417725977690745e-05, + "loss": 2.4066, + "step": 14603 + }, + { + "epoch": 1.1785973690581875, + "grad_norm": 0.7417690753936768, + "learning_rate": 3.416537591809341e-05, + "loss": 2.3779, + "step": 14604 + }, + { + "epoch": 1.1786780727947703, + "grad_norm": 0.7258411049842834, + "learning_rate": 3.4153493700050286e-05, + "loss": 2.4334, + "step": 14605 + }, + { + "epoch": 1.1787587765313534, + "grad_norm": 0.65704345703125, + "learning_rate": 3.414161312307427e-05, + "loss": 2.4531, + "step": 14606 + }, + { + "epoch": 1.1788394802679365, + "grad_norm": 0.6937118172645569, + "learning_rate": 3.4129734187461374e-05, + "loss": 2.4562, + "step": 14607 + }, + { + "epoch": 1.1789201840045194, + "grad_norm": 0.7331998348236084, + "learning_rate": 3.411785689350768e-05, + "loss": 2.4418, + "step": 14608 + }, + { + "epoch": 1.1790008877411025, + "grad_norm": 0.666582465171814, + "learning_rate": 3.410598124150924e-05, + "loss": 2.4154, + "step": 14609 + }, + { + "epoch": 1.1790815914776853, + "grad_norm": 0.6684321165084839, + "learning_rate": 3.409410723176197e-05, + "loss": 2.4155, + "step": 14610 + }, + { + "epoch": 1.1791622952142684, + "grad_norm": 0.6413382291793823, + "learning_rate": 3.408223486456184e-05, + "loss": 2.3924, + "step": 14611 + }, + { + "epoch": 1.1792429989508515, + "grad_norm": 0.7081305384635925, + "learning_rate": 3.407036414020475e-05, + "loss": 2.3811, + "step": 14612 + }, + { + "epoch": 1.1793237026874344, + "grad_norm": 0.7550063133239746, + "learning_rate": 3.405849505898645e-05, + "loss": 2.4425, + "step": 14613 + }, + { + "epoch": 1.1794044064240174, + "grad_norm": 0.677200198173523, + "learning_rate": 3.404662762120288e-05, + "loss": 2.5182, + "step": 14614 + }, + { + "epoch": 1.1794851101606003, + "grad_norm": 0.6829770803451538, + "learning_rate": 3.4034761827149745e-05, + "loss": 2.5068, + "step": 14615 + }, + { + "epoch": 1.1795658138971834, + "grad_norm": 0.7069409489631653, + "learning_rate": 3.4022897677122815e-05, + "loss": 2.4449, + "step": 14616 + }, + { + "epoch": 1.1796465176337665, + "grad_norm": 0.6604448556900024, + "learning_rate": 3.4011035171417696e-05, + "loss": 2.3996, + "step": 14617 + }, + { + "epoch": 1.1797272213703494, + "grad_norm": 0.6577324271202087, + "learning_rate": 3.3999174310330084e-05, + "loss": 2.4723, + "step": 14618 + }, + { + "epoch": 1.1798079251069324, + "grad_norm": 0.8159187436103821, + "learning_rate": 3.398731509415561e-05, + "loss": 2.4655, + "step": 14619 + }, + { + "epoch": 1.1798886288435155, + "grad_norm": 0.7170652747154236, + "learning_rate": 3.397545752318977e-05, + "loss": 2.5095, + "step": 14620 + }, + { + "epoch": 1.1799693325800984, + "grad_norm": 0.6865009665489197, + "learning_rate": 3.396360159772812e-05, + "loss": 2.4358, + "step": 14621 + }, + { + "epoch": 1.1800500363166815, + "grad_norm": 0.6485020518302917, + "learning_rate": 3.3951747318066175e-05, + "loss": 2.4576, + "step": 14622 + }, + { + "epoch": 1.1801307400532646, + "grad_norm": 0.6626582145690918, + "learning_rate": 3.39398946844993e-05, + "loss": 2.4824, + "step": 14623 + }, + { + "epoch": 1.1802114437898474, + "grad_norm": 0.718588650226593, + "learning_rate": 3.392804369732293e-05, + "loss": 2.4211, + "step": 14624 + }, + { + "epoch": 1.1802921475264305, + "grad_norm": 0.7449582815170288, + "learning_rate": 3.391619435683243e-05, + "loss": 2.444, + "step": 14625 + }, + { + "epoch": 1.1803728512630134, + "grad_norm": 0.6988492012023926, + "learning_rate": 3.3904346663323115e-05, + "loss": 2.4262, + "step": 14626 + }, + { + "epoch": 1.1804535549995965, + "grad_norm": 0.6779490113258362, + "learning_rate": 3.389250061709025e-05, + "loss": 2.4751, + "step": 14627 + }, + { + "epoch": 1.1805342587361796, + "grad_norm": 0.6883673667907715, + "learning_rate": 3.388065621842912e-05, + "loss": 2.4995, + "step": 14628 + }, + { + "epoch": 1.1806149624727624, + "grad_norm": 0.7112017273902893, + "learning_rate": 3.386881346763483e-05, + "loss": 2.4181, + "step": 14629 + }, + { + "epoch": 1.1806956662093455, + "grad_norm": 0.6960459351539612, + "learning_rate": 3.385697236500258e-05, + "loss": 2.4888, + "step": 14630 + }, + { + "epoch": 1.1807763699459284, + "grad_norm": 0.6874156594276428, + "learning_rate": 3.3845132910827484e-05, + "loss": 2.4175, + "step": 14631 + }, + { + "epoch": 1.1808570736825115, + "grad_norm": 0.7075642347335815, + "learning_rate": 3.383329510540463e-05, + "loss": 2.4315, + "step": 14632 + }, + { + "epoch": 1.1809377774190946, + "grad_norm": 0.674907386302948, + "learning_rate": 3.3821458949028995e-05, + "loss": 2.4216, + "step": 14633 + }, + { + "epoch": 1.1810184811556774, + "grad_norm": 0.7008463740348816, + "learning_rate": 3.380962444199559e-05, + "loss": 2.4114, + "step": 14634 + }, + { + "epoch": 1.1810991848922605, + "grad_norm": 0.6784217953681946, + "learning_rate": 3.379779158459937e-05, + "loss": 2.3663, + "step": 14635 + }, + { + "epoch": 1.1811798886288436, + "grad_norm": 0.7174829244613647, + "learning_rate": 3.378596037713525e-05, + "loss": 2.4582, + "step": 14636 + }, + { + "epoch": 1.1812605923654265, + "grad_norm": 0.7106035947799683, + "learning_rate": 3.3774130819898065e-05, + "loss": 2.5095, + "step": 14637 + }, + { + "epoch": 1.1813412961020096, + "grad_norm": 0.809107780456543, + "learning_rate": 3.3762302913182696e-05, + "loss": 2.4942, + "step": 14638 + }, + { + "epoch": 1.1814219998385926, + "grad_norm": 0.7150272727012634, + "learning_rate": 3.375047665728386e-05, + "loss": 2.378, + "step": 14639 + }, + { + "epoch": 1.1815027035751755, + "grad_norm": 0.7016271352767944, + "learning_rate": 3.373865205249632e-05, + "loss": 2.4393, + "step": 14640 + }, + { + "epoch": 1.1815834073117586, + "grad_norm": 0.6387282013893127, + "learning_rate": 3.372682909911481e-05, + "loss": 2.4399, + "step": 14641 + }, + { + "epoch": 1.1816641110483415, + "grad_norm": 0.834181010723114, + "learning_rate": 3.371500779743393e-05, + "loss": 2.4312, + "step": 14642 + }, + { + "epoch": 1.1817448147849245, + "grad_norm": 0.6690472960472107, + "learning_rate": 3.370318814774832e-05, + "loss": 2.407, + "step": 14643 + }, + { + "epoch": 1.1818255185215076, + "grad_norm": 0.6594302654266357, + "learning_rate": 3.369137015035256e-05, + "loss": 2.4275, + "step": 14644 + }, + { + "epoch": 1.1819062222580905, + "grad_norm": 0.7284699082374573, + "learning_rate": 3.3679553805541194e-05, + "loss": 2.3981, + "step": 14645 + }, + { + "epoch": 1.1819869259946736, + "grad_norm": 0.7109572291374207, + "learning_rate": 3.366773911360871e-05, + "loss": 2.4345, + "step": 14646 + }, + { + "epoch": 1.1820676297312565, + "grad_norm": 0.6874241828918457, + "learning_rate": 3.3655926074849566e-05, + "loss": 2.4488, + "step": 14647 + }, + { + "epoch": 1.1821483334678395, + "grad_norm": 0.6698973178863525, + "learning_rate": 3.364411468955819e-05, + "loss": 2.42, + "step": 14648 + }, + { + "epoch": 1.1822290372044226, + "grad_norm": 0.7816089391708374, + "learning_rate": 3.3632304958028915e-05, + "loss": 2.4638, + "step": 14649 + }, + { + "epoch": 1.1823097409410055, + "grad_norm": 0.6718220710754395, + "learning_rate": 3.3620496880556075e-05, + "loss": 2.413, + "step": 14650 + }, + { + "epoch": 1.1823904446775886, + "grad_norm": 0.753463089466095, + "learning_rate": 3.360869045743401e-05, + "loss": 2.3772, + "step": 14651 + }, + { + "epoch": 1.1824711484141717, + "grad_norm": 0.7031456828117371, + "learning_rate": 3.359688568895689e-05, + "loss": 2.4198, + "step": 14652 + }, + { + "epoch": 1.1825518521507545, + "grad_norm": 0.7857323288917542, + "learning_rate": 3.358508257541897e-05, + "loss": 2.4223, + "step": 14653 + }, + { + "epoch": 1.1826325558873376, + "grad_norm": 0.7779297828674316, + "learning_rate": 3.357328111711439e-05, + "loss": 2.5266, + "step": 14654 + }, + { + "epoch": 1.1827132596239207, + "grad_norm": 0.7382386326789856, + "learning_rate": 3.356148131433728e-05, + "loss": 2.4673, + "step": 14655 + }, + { + "epoch": 1.1827939633605036, + "grad_norm": 0.7868054509162903, + "learning_rate": 3.354968316738174e-05, + "loss": 2.4285, + "step": 14656 + }, + { + "epoch": 1.1828746670970867, + "grad_norm": 0.7007591724395752, + "learning_rate": 3.353788667654183e-05, + "loss": 2.4054, + "step": 14657 + }, + { + "epoch": 1.1829553708336695, + "grad_norm": 0.6627741456031799, + "learning_rate": 3.352609184211148e-05, + "loss": 2.4224, + "step": 14658 + }, + { + "epoch": 1.1830360745702526, + "grad_norm": 0.6865360736846924, + "learning_rate": 3.351429866438469e-05, + "loss": 2.4084, + "step": 14659 + }, + { + "epoch": 1.1831167783068357, + "grad_norm": 0.7572095990180969, + "learning_rate": 3.3502507143655404e-05, + "loss": 2.4339, + "step": 14660 + }, + { + "epoch": 1.1831974820434186, + "grad_norm": 0.6907969117164612, + "learning_rate": 3.349071728021743e-05, + "loss": 2.4578, + "step": 14661 + }, + { + "epoch": 1.1832781857800017, + "grad_norm": 0.6618743538856506, + "learning_rate": 3.347892907436465e-05, + "loss": 2.4131, + "step": 14662 + }, + { + "epoch": 1.1833588895165845, + "grad_norm": 0.777159571647644, + "learning_rate": 3.346714252639084e-05, + "loss": 2.419, + "step": 14663 + }, + { + "epoch": 1.1834395932531676, + "grad_norm": 0.666344165802002, + "learning_rate": 3.345535763658975e-05, + "loss": 2.4155, + "step": 14664 + }, + { + "epoch": 1.1835202969897507, + "grad_norm": 0.708848774433136, + "learning_rate": 3.3443574405255095e-05, + "loss": 2.4794, + "step": 14665 + }, + { + "epoch": 1.1836010007263336, + "grad_norm": 0.7247438430786133, + "learning_rate": 3.3431792832680555e-05, + "loss": 2.4445, + "step": 14666 + }, + { + "epoch": 1.1836817044629167, + "grad_norm": 0.6870034337043762, + "learning_rate": 3.342001291915978e-05, + "loss": 2.4309, + "step": 14667 + }, + { + "epoch": 1.1837624081994997, + "grad_norm": 0.7088049650192261, + "learning_rate": 3.340823466498629e-05, + "loss": 2.4456, + "step": 14668 + }, + { + "epoch": 1.1838431119360826, + "grad_norm": 0.695148229598999, + "learning_rate": 3.3396458070453676e-05, + "loss": 2.4018, + "step": 14669 + }, + { + "epoch": 1.1839238156726657, + "grad_norm": 0.7947117686271667, + "learning_rate": 3.3384683135855444e-05, + "loss": 2.4099, + "step": 14670 + }, + { + "epoch": 1.1840045194092486, + "grad_norm": 0.7268195748329163, + "learning_rate": 3.337290986148502e-05, + "loss": 2.3955, + "step": 14671 + }, + { + "epoch": 1.1840852231458316, + "grad_norm": 0.6932024955749512, + "learning_rate": 3.336113824763585e-05, + "loss": 2.4046, + "step": 14672 + }, + { + "epoch": 1.1841659268824147, + "grad_norm": 0.7408114671707153, + "learning_rate": 3.3349368294601334e-05, + "loss": 2.4186, + "step": 14673 + }, + { + "epoch": 1.1842466306189976, + "grad_norm": 0.6678428053855896, + "learning_rate": 3.3337600002674765e-05, + "loss": 2.4324, + "step": 14674 + }, + { + "epoch": 1.1843273343555807, + "grad_norm": 0.7221381664276123, + "learning_rate": 3.3325833372149416e-05, + "loss": 2.4474, + "step": 14675 + }, + { + "epoch": 1.1844080380921636, + "grad_norm": 0.6971224546432495, + "learning_rate": 3.3314068403318654e-05, + "loss": 2.4197, + "step": 14676 + }, + { + "epoch": 1.1844887418287466, + "grad_norm": 0.65053391456604, + "learning_rate": 3.3302305096475604e-05, + "loss": 2.4169, + "step": 14677 + }, + { + "epoch": 1.1845694455653297, + "grad_norm": 0.7231155633926392, + "learning_rate": 3.3290543451913457e-05, + "loss": 2.4222, + "step": 14678 + }, + { + "epoch": 1.1846501493019126, + "grad_norm": 0.6458824872970581, + "learning_rate": 3.3278783469925345e-05, + "loss": 2.422, + "step": 14679 + }, + { + "epoch": 1.1847308530384957, + "grad_norm": 0.6783488392829895, + "learning_rate": 3.32670251508044e-05, + "loss": 2.4231, + "step": 14680 + }, + { + "epoch": 1.1848115567750788, + "grad_norm": 0.6742293238639832, + "learning_rate": 3.3255268494843586e-05, + "loss": 2.409, + "step": 14681 + }, + { + "epoch": 1.1848922605116616, + "grad_norm": 0.7455186247825623, + "learning_rate": 3.3243513502335956e-05, + "loss": 2.4121, + "step": 14682 + }, + { + "epoch": 1.1849729642482447, + "grad_norm": 0.7042234539985657, + "learning_rate": 3.323176017357451e-05, + "loss": 2.4574, + "step": 14683 + }, + { + "epoch": 1.1850536679848278, + "grad_norm": 0.7897992134094238, + "learning_rate": 3.3220008508852094e-05, + "loss": 2.4796, + "step": 14684 + }, + { + "epoch": 1.1851343717214107, + "grad_norm": 0.6894058585166931, + "learning_rate": 3.3208258508461644e-05, + "loss": 2.4125, + "step": 14685 + }, + { + "epoch": 1.1852150754579938, + "grad_norm": 0.7574072480201721, + "learning_rate": 3.319651017269597e-05, + "loss": 2.4714, + "step": 14686 + }, + { + "epoch": 1.1852957791945766, + "grad_norm": 0.7457531094551086, + "learning_rate": 3.3184763501847905e-05, + "loss": 2.4793, + "step": 14687 + }, + { + "epoch": 1.1853764829311597, + "grad_norm": 0.6819709539413452, + "learning_rate": 3.317301849621018e-05, + "loss": 2.4563, + "step": 14688 + }, + { + "epoch": 1.1854571866677428, + "grad_norm": 0.6998026371002197, + "learning_rate": 3.316127515607555e-05, + "loss": 2.4548, + "step": 14689 + }, + { + "epoch": 1.1855378904043257, + "grad_norm": 0.7148768305778503, + "learning_rate": 3.314953348173664e-05, + "loss": 2.4897, + "step": 14690 + }, + { + "epoch": 1.1856185941409088, + "grad_norm": 0.6581987738609314, + "learning_rate": 3.31377934734861e-05, + "loss": 2.4683, + "step": 14691 + }, + { + "epoch": 1.1856992978774916, + "grad_norm": 0.7493093609809875, + "learning_rate": 3.312605513161653e-05, + "loss": 2.4564, + "step": 14692 + }, + { + "epoch": 1.1857800016140747, + "grad_norm": 0.7095562219619751, + "learning_rate": 3.311431845642051e-05, + "loss": 2.4595, + "step": 14693 + }, + { + "epoch": 1.1858607053506578, + "grad_norm": 0.8045323491096497, + "learning_rate": 3.310258344819047e-05, + "loss": 2.5044, + "step": 14694 + }, + { + "epoch": 1.1859414090872407, + "grad_norm": 0.7381219267845154, + "learning_rate": 3.3090850107218943e-05, + "loss": 2.415, + "step": 14695 + }, + { + "epoch": 1.1860221128238237, + "grad_norm": 0.6859883069992065, + "learning_rate": 3.307911843379832e-05, + "loss": 2.4314, + "step": 14696 + }, + { + "epoch": 1.1861028165604068, + "grad_norm": 0.7084196209907532, + "learning_rate": 3.306738842822099e-05, + "loss": 2.4404, + "step": 14697 + }, + { + "epoch": 1.1861835202969897, + "grad_norm": 0.6964806318283081, + "learning_rate": 3.305566009077932e-05, + "loss": 2.4391, + "step": 14698 + }, + { + "epoch": 1.1862642240335728, + "grad_norm": 0.7272049188613892, + "learning_rate": 3.304393342176562e-05, + "loss": 2.4395, + "step": 14699 + }, + { + "epoch": 1.1863449277701559, + "grad_norm": 0.6651458144187927, + "learning_rate": 3.303220842147209e-05, + "loss": 2.4059, + "step": 14700 + }, + { + "epoch": 1.1864256315067387, + "grad_norm": 0.7599130868911743, + "learning_rate": 3.302048509019099e-05, + "loss": 2.5044, + "step": 14701 + }, + { + "epoch": 1.1865063352433218, + "grad_norm": 0.6694391965866089, + "learning_rate": 3.3008763428214505e-05, + "loss": 2.4817, + "step": 14702 + }, + { + "epoch": 1.1865870389799047, + "grad_norm": 0.7176856398582458, + "learning_rate": 3.299704343583473e-05, + "loss": 2.4702, + "step": 14703 + }, + { + "epoch": 1.1866677427164878, + "grad_norm": 0.7133145332336426, + "learning_rate": 3.298532511334378e-05, + "loss": 2.4685, + "step": 14704 + }, + { + "epoch": 1.1867484464530709, + "grad_norm": 0.7170277833938599, + "learning_rate": 3.297360846103371e-05, + "loss": 2.4203, + "step": 14705 + }, + { + "epoch": 1.1868291501896537, + "grad_norm": 0.6853376626968384, + "learning_rate": 3.296189347919652e-05, + "loss": 2.4067, + "step": 14706 + }, + { + "epoch": 1.1869098539262368, + "grad_norm": 0.7269156575202942, + "learning_rate": 3.2950180168124175e-05, + "loss": 2.4211, + "step": 14707 + }, + { + "epoch": 1.1869905576628197, + "grad_norm": 0.8649005889892578, + "learning_rate": 3.2938468528108626e-05, + "loss": 2.4611, + "step": 14708 + }, + { + "epoch": 1.1870712613994028, + "grad_norm": 0.7256221771240234, + "learning_rate": 3.292675855944177e-05, + "loss": 2.4618, + "step": 14709 + }, + { + "epoch": 1.1871519651359859, + "grad_norm": 0.6854279637336731, + "learning_rate": 3.291505026241539e-05, + "loss": 2.4466, + "step": 14710 + }, + { + "epoch": 1.1872326688725687, + "grad_norm": 0.7182712554931641, + "learning_rate": 3.2903343637321316e-05, + "loss": 2.4847, + "step": 14711 + }, + { + "epoch": 1.1873133726091518, + "grad_norm": 0.6795300841331482, + "learning_rate": 3.289163868445134e-05, + "loss": 2.4407, + "step": 14712 + }, + { + "epoch": 1.187394076345735, + "grad_norm": 0.685146689414978, + "learning_rate": 3.287993540409713e-05, + "loss": 2.4537, + "step": 14713 + }, + { + "epoch": 1.1874747800823178, + "grad_norm": 0.7891005873680115, + "learning_rate": 3.2868233796550375e-05, + "loss": 2.4085, + "step": 14714 + }, + { + "epoch": 1.1875554838189009, + "grad_norm": 0.6521769762039185, + "learning_rate": 3.2856533862102724e-05, + "loss": 2.4174, + "step": 14715 + }, + { + "epoch": 1.1876361875554837, + "grad_norm": 0.7486612200737, + "learning_rate": 3.284483560104575e-05, + "loss": 2.4072, + "step": 14716 + }, + { + "epoch": 1.1877168912920668, + "grad_norm": 0.6895913481712341, + "learning_rate": 3.283313901367103e-05, + "loss": 2.4398, + "step": 14717 + }, + { + "epoch": 1.18779759502865, + "grad_norm": 0.6595678329467773, + "learning_rate": 3.282144410027009e-05, + "loss": 2.4407, + "step": 14718 + }, + { + "epoch": 1.1878782987652328, + "grad_norm": 0.7724249958992004, + "learning_rate": 3.280975086113435e-05, + "loss": 2.464, + "step": 14719 + }, + { + "epoch": 1.1879590025018159, + "grad_norm": 0.659472644329071, + "learning_rate": 3.279805929655524e-05, + "loss": 2.4774, + "step": 14720 + }, + { + "epoch": 1.1880397062383987, + "grad_norm": 0.7187919020652771, + "learning_rate": 3.27863694068242e-05, + "loss": 2.4767, + "step": 14721 + }, + { + "epoch": 1.1881204099749818, + "grad_norm": 0.7740198373794556, + "learning_rate": 3.2774681192232506e-05, + "loss": 2.4762, + "step": 14722 + }, + { + "epoch": 1.188201113711565, + "grad_norm": 0.700591504573822, + "learning_rate": 3.2762994653071464e-05, + "loss": 2.448, + "step": 14723 + }, + { + "epoch": 1.1882818174481478, + "grad_norm": 0.7168558239936829, + "learning_rate": 3.275130978963237e-05, + "loss": 2.4084, + "step": 14724 + }, + { + "epoch": 1.1883625211847308, + "grad_norm": 0.8039551973342896, + "learning_rate": 3.273962660220646e-05, + "loss": 2.3849, + "step": 14725 + }, + { + "epoch": 1.188443224921314, + "grad_norm": 0.6453016400337219, + "learning_rate": 3.27279450910848e-05, + "loss": 2.3856, + "step": 14726 + }, + { + "epoch": 1.1885239286578968, + "grad_norm": 0.7194651365280151, + "learning_rate": 3.2716265256558644e-05, + "loss": 2.4337, + "step": 14727 + }, + { + "epoch": 1.1886046323944799, + "grad_norm": 0.7298597097396851, + "learning_rate": 3.270458709891906e-05, + "loss": 2.4491, + "step": 14728 + }, + { + "epoch": 1.188685336131063, + "grad_norm": 0.7127524614334106, + "learning_rate": 3.269291061845705e-05, + "loss": 2.4319, + "step": 14729 + }, + { + "epoch": 1.1887660398676458, + "grad_norm": 0.6782705783843994, + "learning_rate": 3.2681235815463654e-05, + "loss": 2.4375, + "step": 14730 + }, + { + "epoch": 1.188846743604229, + "grad_norm": 0.7418326735496521, + "learning_rate": 3.266956269022987e-05, + "loss": 2.4149, + "step": 14731 + }, + { + "epoch": 1.1889274473408118, + "grad_norm": 0.7442455291748047, + "learning_rate": 3.265789124304654e-05, + "loss": 2.3935, + "step": 14732 + }, + { + "epoch": 1.1890081510773949, + "grad_norm": 0.7238253951072693, + "learning_rate": 3.264622147420461e-05, + "loss": 2.4592, + "step": 14733 + }, + { + "epoch": 1.189088854813978, + "grad_norm": 0.6488127708435059, + "learning_rate": 3.2634553383994925e-05, + "loss": 2.3468, + "step": 14734 + }, + { + "epoch": 1.1891695585505608, + "grad_norm": 0.7182446718215942, + "learning_rate": 3.2622886972708246e-05, + "loss": 2.4457, + "step": 14735 + }, + { + "epoch": 1.189250262287144, + "grad_norm": 0.6885523796081543, + "learning_rate": 3.261122224063534e-05, + "loss": 2.3943, + "step": 14736 + }, + { + "epoch": 1.1893309660237268, + "grad_norm": 0.653367817401886, + "learning_rate": 3.259955918806693e-05, + "loss": 2.4188, + "step": 14737 + }, + { + "epoch": 1.1894116697603099, + "grad_norm": 0.6968675851821899, + "learning_rate": 3.2587897815293686e-05, + "loss": 2.4276, + "step": 14738 + }, + { + "epoch": 1.189492373496893, + "grad_norm": 0.6827409267425537, + "learning_rate": 3.257623812260626e-05, + "loss": 2.4417, + "step": 14739 + }, + { + "epoch": 1.1895730772334758, + "grad_norm": 0.6807438731193542, + "learning_rate": 3.256458011029523e-05, + "loss": 2.4495, + "step": 14740 + }, + { + "epoch": 1.189653780970059, + "grad_norm": 0.6692882180213928, + "learning_rate": 3.255292377865116e-05, + "loss": 2.3789, + "step": 14741 + }, + { + "epoch": 1.189734484706642, + "grad_norm": 0.6581685543060303, + "learning_rate": 3.2541269127964515e-05, + "loss": 2.4073, + "step": 14742 + }, + { + "epoch": 1.1898151884432249, + "grad_norm": 0.6458544731140137, + "learning_rate": 3.252961615852578e-05, + "loss": 2.4657, + "step": 14743 + }, + { + "epoch": 1.189895892179808, + "grad_norm": 0.6971322298049927, + "learning_rate": 3.251796487062541e-05, + "loss": 2.4404, + "step": 14744 + }, + { + "epoch": 1.189976595916391, + "grad_norm": 0.6770374178886414, + "learning_rate": 3.2506315264553724e-05, + "loss": 2.4329, + "step": 14745 + }, + { + "epoch": 1.190057299652974, + "grad_norm": 0.7634715437889099, + "learning_rate": 3.2494667340601085e-05, + "loss": 2.4234, + "step": 14746 + }, + { + "epoch": 1.190138003389557, + "grad_norm": 0.7717967629432678, + "learning_rate": 3.24830210990578e-05, + "loss": 2.5009, + "step": 14747 + }, + { + "epoch": 1.1902187071261399, + "grad_norm": 0.7133559584617615, + "learning_rate": 3.2471376540214124e-05, + "loss": 2.4272, + "step": 14748 + }, + { + "epoch": 1.190299410862723, + "grad_norm": 0.7273291349411011, + "learning_rate": 3.245973366436027e-05, + "loss": 2.4174, + "step": 14749 + }, + { + "epoch": 1.190380114599306, + "grad_norm": 0.6955052614212036, + "learning_rate": 3.244809247178643e-05, + "loss": 2.3605, + "step": 14750 + }, + { + "epoch": 1.190460818335889, + "grad_norm": 0.7072615027427673, + "learning_rate": 3.2436452962782685e-05, + "loss": 2.4897, + "step": 14751 + }, + { + "epoch": 1.190541522072472, + "grad_norm": 0.7095344662666321, + "learning_rate": 3.242481513763913e-05, + "loss": 2.4172, + "step": 14752 + }, + { + "epoch": 1.1906222258090549, + "grad_norm": 0.7260944247245789, + "learning_rate": 3.2413178996645864e-05, + "loss": 2.4272, + "step": 14753 + }, + { + "epoch": 1.190702929545638, + "grad_norm": 0.6601141691207886, + "learning_rate": 3.2401544540092824e-05, + "loss": 2.4072, + "step": 14754 + }, + { + "epoch": 1.190783633282221, + "grad_norm": 0.6684936881065369, + "learning_rate": 3.238991176827e-05, + "loss": 2.3968, + "step": 14755 + }, + { + "epoch": 1.190864337018804, + "grad_norm": 0.7264483571052551, + "learning_rate": 3.23782806814673e-05, + "loss": 2.4263, + "step": 14756 + }, + { + "epoch": 1.190945040755387, + "grad_norm": 0.6927621960639954, + "learning_rate": 3.2366651279974614e-05, + "loss": 2.4495, + "step": 14757 + }, + { + "epoch": 1.19102574449197, + "grad_norm": 0.7007272243499756, + "learning_rate": 3.2355023564081775e-05, + "loss": 2.4373, + "step": 14758 + }, + { + "epoch": 1.191106448228553, + "grad_norm": 0.6756663918495178, + "learning_rate": 3.234339753407857e-05, + "loss": 2.4148, + "step": 14759 + }, + { + "epoch": 1.191187151965136, + "grad_norm": 0.6741094589233398, + "learning_rate": 3.233177319025479e-05, + "loss": 2.3976, + "step": 14760 + }, + { + "epoch": 1.1912678557017191, + "grad_norm": 0.7098578810691833, + "learning_rate": 3.2320150532900085e-05, + "loss": 2.4326, + "step": 14761 + }, + { + "epoch": 1.191348559438302, + "grad_norm": 0.750271737575531, + "learning_rate": 3.230852956230413e-05, + "loss": 2.4766, + "step": 14762 + }, + { + "epoch": 1.191429263174885, + "grad_norm": 0.68764728307724, + "learning_rate": 3.229691027875661e-05, + "loss": 2.4128, + "step": 14763 + }, + { + "epoch": 1.191509966911468, + "grad_norm": 0.656295657157898, + "learning_rate": 3.228529268254702e-05, + "loss": 2.3928, + "step": 14764 + }, + { + "epoch": 1.191590670648051, + "grad_norm": 0.6690353155136108, + "learning_rate": 3.2273676773964955e-05, + "loss": 2.408, + "step": 14765 + }, + { + "epoch": 1.1916713743846339, + "grad_norm": 0.8111640214920044, + "learning_rate": 3.22620625532999e-05, + "loss": 2.4644, + "step": 14766 + }, + { + "epoch": 1.191752078121217, + "grad_norm": 0.7329768538475037, + "learning_rate": 3.2250450020841316e-05, + "loss": 2.4235, + "step": 14767 + }, + { + "epoch": 1.1918327818578, + "grad_norm": 0.6902688145637512, + "learning_rate": 3.223883917687861e-05, + "loss": 2.3883, + "step": 14768 + }, + { + "epoch": 1.191913485594383, + "grad_norm": 0.797249972820282, + "learning_rate": 3.2227230021701205e-05, + "loss": 2.523, + "step": 14769 + }, + { + "epoch": 1.191994189330966, + "grad_norm": 0.6294408440589905, + "learning_rate": 3.221562255559834e-05, + "loss": 2.4156, + "step": 14770 + }, + { + "epoch": 1.192074893067549, + "grad_norm": 0.7326164245605469, + "learning_rate": 3.220401677885936e-05, + "loss": 2.3828, + "step": 14771 + }, + { + "epoch": 1.192155596804132, + "grad_norm": 0.783747673034668, + "learning_rate": 3.219241269177351e-05, + "loss": 2.4321, + "step": 14772 + }, + { + "epoch": 1.192236300540715, + "grad_norm": 0.7415335178375244, + "learning_rate": 3.2180810294630005e-05, + "loss": 2.4446, + "step": 14773 + }, + { + "epoch": 1.1923170042772981, + "grad_norm": 0.7125591039657593, + "learning_rate": 3.2169209587717966e-05, + "loss": 2.3914, + "step": 14774 + }, + { + "epoch": 1.192397708013881, + "grad_norm": 0.6714075207710266, + "learning_rate": 3.215761057132652e-05, + "loss": 2.3918, + "step": 14775 + }, + { + "epoch": 1.192478411750464, + "grad_norm": 0.7147830724716187, + "learning_rate": 3.214601324574481e-05, + "loss": 2.4389, + "step": 14776 + }, + { + "epoch": 1.192559115487047, + "grad_norm": 0.6780480146408081, + "learning_rate": 3.2134417611261755e-05, + "loss": 2.4119, + "step": 14777 + }, + { + "epoch": 1.19263981922363, + "grad_norm": 0.7473881840705872, + "learning_rate": 3.212282366816645e-05, + "loss": 2.4547, + "step": 14778 + }, + { + "epoch": 1.1927205229602131, + "grad_norm": 0.7418377995491028, + "learning_rate": 3.211123141674784e-05, + "loss": 2.4156, + "step": 14779 + }, + { + "epoch": 1.192801226696796, + "grad_norm": 0.687524139881134, + "learning_rate": 3.209964085729477e-05, + "loss": 2.4309, + "step": 14780 + }, + { + "epoch": 1.192881930433379, + "grad_norm": 0.6965883374214172, + "learning_rate": 3.208805199009615e-05, + "loss": 2.4028, + "step": 14781 + }, + { + "epoch": 1.192962634169962, + "grad_norm": 0.7024682760238647, + "learning_rate": 3.207646481544082e-05, + "loss": 2.4482, + "step": 14782 + }, + { + "epoch": 1.193043337906545, + "grad_norm": 0.6835834383964539, + "learning_rate": 3.2064879333617514e-05, + "loss": 2.3898, + "step": 14783 + }, + { + "epoch": 1.1931240416431281, + "grad_norm": 0.7002003788948059, + "learning_rate": 3.2053295544915e-05, + "loss": 2.487, + "step": 14784 + }, + { + "epoch": 1.193204745379711, + "grad_norm": 0.7128168940544128, + "learning_rate": 3.2041713449622e-05, + "loss": 2.4591, + "step": 14785 + }, + { + "epoch": 1.193285449116294, + "grad_norm": 0.6897242665290833, + "learning_rate": 3.203013304802712e-05, + "loss": 2.4458, + "step": 14786 + }, + { + "epoch": 1.1933661528528772, + "grad_norm": 0.7281817197799683, + "learning_rate": 3.2018554340419004e-05, + "loss": 2.3772, + "step": 14787 + }, + { + "epoch": 1.19344685658946, + "grad_norm": 0.6956086754798889, + "learning_rate": 3.200697732708619e-05, + "loss": 2.4316, + "step": 14788 + }, + { + "epoch": 1.1935275603260431, + "grad_norm": 0.7679805159568787, + "learning_rate": 3.199540200831729e-05, + "loss": 2.4464, + "step": 14789 + }, + { + "epoch": 1.1936082640626262, + "grad_norm": 0.6993041634559631, + "learning_rate": 3.19838283844007e-05, + "loss": 2.3881, + "step": 14790 + }, + { + "epoch": 1.193688967799209, + "grad_norm": 0.689618706703186, + "learning_rate": 3.197225645562493e-05, + "loss": 2.4184, + "step": 14791 + }, + { + "epoch": 1.1937696715357922, + "grad_norm": 0.6896520853042603, + "learning_rate": 3.1960686222278354e-05, + "loss": 2.4484, + "step": 14792 + }, + { + "epoch": 1.193850375272375, + "grad_norm": 0.6743811368942261, + "learning_rate": 3.1949117684649334e-05, + "loss": 2.4636, + "step": 14793 + }, + { + "epoch": 1.1939310790089581, + "grad_norm": 0.7028046250343323, + "learning_rate": 3.1937550843026163e-05, + "loss": 2.4576, + "step": 14794 + }, + { + "epoch": 1.1940117827455412, + "grad_norm": 0.7219679951667786, + "learning_rate": 3.192598569769718e-05, + "loss": 2.4495, + "step": 14795 + }, + { + "epoch": 1.194092486482124, + "grad_norm": 0.731438159942627, + "learning_rate": 3.191442224895056e-05, + "loss": 2.4699, + "step": 14796 + }, + { + "epoch": 1.1941731902187072, + "grad_norm": 0.6731431484222412, + "learning_rate": 3.19028604970745e-05, + "loss": 2.4292, + "step": 14797 + }, + { + "epoch": 1.19425389395529, + "grad_norm": 0.6720147728919983, + "learning_rate": 3.1891300442357174e-05, + "loss": 2.4482, + "step": 14798 + }, + { + "epoch": 1.1943345976918731, + "grad_norm": 0.7504273653030396, + "learning_rate": 3.187974208508667e-05, + "loss": 2.4233, + "step": 14799 + }, + { + "epoch": 1.1944153014284562, + "grad_norm": 0.6882641315460205, + "learning_rate": 3.186818542555108e-05, + "loss": 2.4633, + "step": 14800 + }, + { + "epoch": 1.194496005165039, + "grad_norm": 0.7337899208068848, + "learning_rate": 3.1856630464038385e-05, + "loss": 2.4257, + "step": 14801 + }, + { + "epoch": 1.1945767089016222, + "grad_norm": 0.7026493549346924, + "learning_rate": 3.1845077200836636e-05, + "loss": 2.482, + "step": 14802 + }, + { + "epoch": 1.1946574126382052, + "grad_norm": 0.763351321220398, + "learning_rate": 3.1833525636233675e-05, + "loss": 2.4428, + "step": 14803 + }, + { + "epoch": 1.194738116374788, + "grad_norm": 0.6568076610565186, + "learning_rate": 3.182197577051745e-05, + "loss": 2.4373, + "step": 14804 + }, + { + "epoch": 1.1948188201113712, + "grad_norm": 0.6954717040061951, + "learning_rate": 3.1810427603975844e-05, + "loss": 2.4582, + "step": 14805 + }, + { + "epoch": 1.1948995238479543, + "grad_norm": 0.7130215167999268, + "learning_rate": 3.179888113689661e-05, + "loss": 2.443, + "step": 14806 + }, + { + "epoch": 1.1949802275845371, + "grad_norm": 0.6789865493774414, + "learning_rate": 3.178733636956752e-05, + "loss": 2.4138, + "step": 14807 + }, + { + "epoch": 1.1950609313211202, + "grad_norm": 0.7725361585617065, + "learning_rate": 3.177579330227633e-05, + "loss": 2.4783, + "step": 14808 + }, + { + "epoch": 1.195141635057703, + "grad_norm": 0.6952371001243591, + "learning_rate": 3.17642519353107e-05, + "loss": 2.4571, + "step": 14809 + }, + { + "epoch": 1.1952223387942862, + "grad_norm": 0.7541885375976562, + "learning_rate": 3.1752712268958275e-05, + "loss": 2.4075, + "step": 14810 + }, + { + "epoch": 1.1953030425308693, + "grad_norm": 0.6974624395370483, + "learning_rate": 3.174117430350671e-05, + "loss": 2.4525, + "step": 14811 + }, + { + "epoch": 1.1953837462674521, + "grad_norm": 0.7293709516525269, + "learning_rate": 3.172963803924347e-05, + "loss": 2.4646, + "step": 14812 + }, + { + "epoch": 1.1954644500040352, + "grad_norm": 0.6944144368171692, + "learning_rate": 3.1718103476456106e-05, + "loss": 2.462, + "step": 14813 + }, + { + "epoch": 1.195545153740618, + "grad_norm": 0.6415363550186157, + "learning_rate": 3.170657061543214e-05, + "loss": 2.4086, + "step": 14814 + }, + { + "epoch": 1.1956258574772012, + "grad_norm": 0.6511349081993103, + "learning_rate": 3.169503945645892e-05, + "loss": 2.4376, + "step": 14815 + }, + { + "epoch": 1.1957065612137843, + "grad_norm": 0.7420210242271423, + "learning_rate": 3.1683509999823854e-05, + "loss": 2.4317, + "step": 14816 + }, + { + "epoch": 1.1957872649503671, + "grad_norm": 0.7291967272758484, + "learning_rate": 3.1671982245814316e-05, + "loss": 2.4369, + "step": 14817 + }, + { + "epoch": 1.1958679686869502, + "grad_norm": 0.685743510723114, + "learning_rate": 3.166045619471758e-05, + "loss": 2.465, + "step": 14818 + }, + { + "epoch": 1.1959486724235333, + "grad_norm": 0.7130060195922852, + "learning_rate": 3.164893184682093e-05, + "loss": 2.4305, + "step": 14819 + }, + { + "epoch": 1.1960293761601162, + "grad_norm": 0.694508969783783, + "learning_rate": 3.163740920241156e-05, + "loss": 2.4278, + "step": 14820 + }, + { + "epoch": 1.1961100798966993, + "grad_norm": 0.6478514075279236, + "learning_rate": 3.162588826177669e-05, + "loss": 2.4721, + "step": 14821 + }, + { + "epoch": 1.1961907836332821, + "grad_norm": 0.6586465835571289, + "learning_rate": 3.1614369025203386e-05, + "loss": 2.4716, + "step": 14822 + }, + { + "epoch": 1.1962714873698652, + "grad_norm": 0.7558106184005737, + "learning_rate": 3.160285149297876e-05, + "loss": 2.4656, + "step": 14823 + }, + { + "epoch": 1.1963521911064483, + "grad_norm": 0.7208340764045715, + "learning_rate": 3.1591335665389896e-05, + "loss": 2.4374, + "step": 14824 + }, + { + "epoch": 1.1964328948430312, + "grad_norm": 0.70301353931427, + "learning_rate": 3.157982154272375e-05, + "loss": 2.397, + "step": 14825 + }, + { + "epoch": 1.1965135985796143, + "grad_norm": 0.6857609152793884, + "learning_rate": 3.15683091252673e-05, + "loss": 2.4258, + "step": 14826 + }, + { + "epoch": 1.1965943023161971, + "grad_norm": 0.6954602003097534, + "learning_rate": 3.155679841330747e-05, + "loss": 2.4566, + "step": 14827 + }, + { + "epoch": 1.1966750060527802, + "grad_norm": 0.6923913955688477, + "learning_rate": 3.154528940713113e-05, + "loss": 2.4, + "step": 14828 + }, + { + "epoch": 1.1967557097893633, + "grad_norm": 0.6641134023666382, + "learning_rate": 3.1533782107025124e-05, + "loss": 2.4721, + "step": 14829 + }, + { + "epoch": 1.1968364135259462, + "grad_norm": 0.7470134496688843, + "learning_rate": 3.152227651327627e-05, + "loss": 2.4253, + "step": 14830 + }, + { + "epoch": 1.1969171172625293, + "grad_norm": 0.7234545350074768, + "learning_rate": 3.151077262617126e-05, + "loss": 2.4109, + "step": 14831 + }, + { + "epoch": 1.1969978209991123, + "grad_norm": 0.7814013957977295, + "learning_rate": 3.149927044599682e-05, + "loss": 2.4522, + "step": 14832 + }, + { + "epoch": 1.1970785247356952, + "grad_norm": 0.6825435161590576, + "learning_rate": 3.1487769973039624e-05, + "loss": 2.4728, + "step": 14833 + }, + { + "epoch": 1.1971592284722783, + "grad_norm": 0.7091361880302429, + "learning_rate": 3.147627120758634e-05, + "loss": 2.4615, + "step": 14834 + }, + { + "epoch": 1.1972399322088614, + "grad_norm": 0.7271433472633362, + "learning_rate": 3.146477414992346e-05, + "loss": 2.4154, + "step": 14835 + }, + { + "epoch": 1.1973206359454442, + "grad_norm": 0.6557306051254272, + "learning_rate": 3.145327880033756e-05, + "loss": 2.4348, + "step": 14836 + }, + { + "epoch": 1.1974013396820273, + "grad_norm": 0.6667891144752502, + "learning_rate": 3.1441785159115166e-05, + "loss": 2.4123, + "step": 14837 + }, + { + "epoch": 1.1974820434186102, + "grad_norm": 0.6755266189575195, + "learning_rate": 3.143029322654266e-05, + "loss": 2.4287, + "step": 14838 + }, + { + "epoch": 1.1975627471551933, + "grad_norm": 0.7647396922111511, + "learning_rate": 3.1418803002906475e-05, + "loss": 2.4343, + "step": 14839 + }, + { + "epoch": 1.1976434508917764, + "grad_norm": 0.7288243174552917, + "learning_rate": 3.140731448849305e-05, + "loss": 2.4536, + "step": 14840 + }, + { + "epoch": 1.1977241546283592, + "grad_norm": 0.6126244068145752, + "learning_rate": 3.1395827683588605e-05, + "loss": 2.4187, + "step": 14841 + }, + { + "epoch": 1.1978048583649423, + "grad_norm": 0.6773896217346191, + "learning_rate": 3.138434258847948e-05, + "loss": 2.3916, + "step": 14842 + }, + { + "epoch": 1.1978855621015252, + "grad_norm": 0.724413275718689, + "learning_rate": 3.1372859203451934e-05, + "loss": 2.4614, + "step": 14843 + }, + { + "epoch": 1.1979662658381083, + "grad_norm": 0.7043039798736572, + "learning_rate": 3.136137752879209e-05, + "loss": 2.4343, + "step": 14844 + }, + { + "epoch": 1.1980469695746914, + "grad_norm": 0.7543383240699768, + "learning_rate": 3.134989756478615e-05, + "loss": 2.4345, + "step": 14845 + }, + { + "epoch": 1.1981276733112742, + "grad_norm": 0.7193408608436584, + "learning_rate": 3.1338419311720244e-05, + "loss": 2.4728, + "step": 14846 + }, + { + "epoch": 1.1982083770478573, + "grad_norm": 0.8090186715126038, + "learning_rate": 3.132694276988038e-05, + "loss": 2.4246, + "step": 14847 + }, + { + "epoch": 1.1982890807844404, + "grad_norm": 0.7154600620269775, + "learning_rate": 3.131546793955261e-05, + "loss": 2.4061, + "step": 14848 + }, + { + "epoch": 1.1983697845210233, + "grad_norm": 0.6987032890319824, + "learning_rate": 3.130399482102293e-05, + "loss": 2.4525, + "step": 14849 + }, + { + "epoch": 1.1984504882576064, + "grad_norm": 0.7123507261276245, + "learning_rate": 3.129252341457727e-05, + "loss": 2.4017, + "step": 14850 + }, + { + "epoch": 1.1985311919941894, + "grad_norm": 0.6475987434387207, + "learning_rate": 3.128105372050153e-05, + "loss": 2.4617, + "step": 14851 + }, + { + "epoch": 1.1986118957307723, + "grad_norm": 0.6799046993255615, + "learning_rate": 3.126958573908156e-05, + "loss": 2.4337, + "step": 14852 + }, + { + "epoch": 1.1986925994673554, + "grad_norm": 0.6910607218742371, + "learning_rate": 3.125811947060322e-05, + "loss": 2.415, + "step": 14853 + }, + { + "epoch": 1.1987733032039383, + "grad_norm": 0.6879963278770447, + "learning_rate": 3.124665491535219e-05, + "loss": 2.4912, + "step": 14854 + }, + { + "epoch": 1.1988540069405214, + "grad_norm": 0.7038810849189758, + "learning_rate": 3.123519207361425e-05, + "loss": 2.4528, + "step": 14855 + }, + { + "epoch": 1.1989347106771044, + "grad_norm": 0.6771957278251648, + "learning_rate": 3.1223730945675104e-05, + "loss": 2.4524, + "step": 14856 + }, + { + "epoch": 1.1990154144136873, + "grad_norm": 0.7529320120811462, + "learning_rate": 3.1212271531820336e-05, + "loss": 2.4667, + "step": 14857 + }, + { + "epoch": 1.1990961181502704, + "grad_norm": 0.6498474478721619, + "learning_rate": 3.1200813832335574e-05, + "loss": 2.3863, + "step": 14858 + }, + { + "epoch": 1.1991768218868533, + "grad_norm": 0.7587705850601196, + "learning_rate": 3.1189357847506383e-05, + "loss": 2.4962, + "step": 14859 + }, + { + "epoch": 1.1992575256234363, + "grad_norm": 0.674013078212738, + "learning_rate": 3.117790357761825e-05, + "loss": 2.3939, + "step": 14860 + }, + { + "epoch": 1.1993382293600194, + "grad_norm": 0.6546844840049744, + "learning_rate": 3.116645102295668e-05, + "loss": 2.4775, + "step": 14861 + }, + { + "epoch": 1.1994189330966023, + "grad_norm": 0.7558320760726929, + "learning_rate": 3.11550001838071e-05, + "loss": 2.3918, + "step": 14862 + }, + { + "epoch": 1.1994996368331854, + "grad_norm": 0.7074883580207825, + "learning_rate": 3.114355106045486e-05, + "loss": 2.3969, + "step": 14863 + }, + { + "epoch": 1.1995803405697685, + "grad_norm": 0.706078290939331, + "learning_rate": 3.1132103653185305e-05, + "loss": 2.5028, + "step": 14864 + }, + { + "epoch": 1.1996610443063513, + "grad_norm": 0.6883544921875, + "learning_rate": 3.1120657962283764e-05, + "loss": 2.4407, + "step": 14865 + }, + { + "epoch": 1.1997417480429344, + "grad_norm": 0.6905466914176941, + "learning_rate": 3.110921398803551e-05, + "loss": 2.3893, + "step": 14866 + }, + { + "epoch": 1.1998224517795173, + "grad_norm": 0.6584910154342651, + "learning_rate": 3.109777173072569e-05, + "loss": 2.4515, + "step": 14867 + }, + { + "epoch": 1.1999031555161004, + "grad_norm": 0.6957471370697021, + "learning_rate": 3.108633119063951e-05, + "loss": 2.4483, + "step": 14868 + }, + { + "epoch": 1.1999838592526835, + "grad_norm": 0.6716276407241821, + "learning_rate": 3.1074892368062095e-05, + "loss": 2.4298, + "step": 14869 + }, + { + "epoch": 1.2000645629892663, + "grad_norm": 0.7350820302963257, + "learning_rate": 3.1063455263278543e-05, + "loss": 2.4088, + "step": 14870 + }, + { + "epoch": 1.2001452667258494, + "grad_norm": 0.7409771680831909, + "learning_rate": 3.105201987657388e-05, + "loss": 2.4089, + "step": 14871 + }, + { + "epoch": 1.2002259704624323, + "grad_norm": 0.7273266911506653, + "learning_rate": 3.104058620823315e-05, + "loss": 2.5149, + "step": 14872 + }, + { + "epoch": 1.2003066741990154, + "grad_norm": 0.6793962717056274, + "learning_rate": 3.102915425854124e-05, + "loss": 2.4422, + "step": 14873 + }, + { + "epoch": 1.2003873779355985, + "grad_norm": 0.72386234998703, + "learning_rate": 3.101772402778309e-05, + "loss": 2.4756, + "step": 14874 + }, + { + "epoch": 1.2004680816721813, + "grad_norm": 0.6530055999755859, + "learning_rate": 3.1006295516243625e-05, + "loss": 2.4145, + "step": 14875 + }, + { + "epoch": 1.2005487854087644, + "grad_norm": 0.7288365960121155, + "learning_rate": 3.099486872420758e-05, + "loss": 2.4565, + "step": 14876 + }, + { + "epoch": 1.2006294891453475, + "grad_norm": 0.6982102394104004, + "learning_rate": 3.09834436519598e-05, + "loss": 2.4788, + "step": 14877 + }, + { + "epoch": 1.2007101928819304, + "grad_norm": 0.7208256125450134, + "learning_rate": 3.0972020299785007e-05, + "loss": 2.4186, + "step": 14878 + }, + { + "epoch": 1.2007908966185135, + "grad_norm": 0.6928278803825378, + "learning_rate": 3.096059866796791e-05, + "loss": 2.4177, + "step": 14879 + }, + { + "epoch": 1.2008716003550965, + "grad_norm": 0.7145438194274902, + "learning_rate": 3.094917875679317e-05, + "loss": 2.4796, + "step": 14880 + }, + { + "epoch": 1.2009523040916794, + "grad_norm": 0.7126322388648987, + "learning_rate": 3.093776056654539e-05, + "loss": 2.4926, + "step": 14881 + }, + { + "epoch": 1.2010330078282625, + "grad_norm": 0.7775046825408936, + "learning_rate": 3.092634409750919e-05, + "loss": 2.4386, + "step": 14882 + }, + { + "epoch": 1.2011137115648454, + "grad_norm": 0.6387330889701843, + "learning_rate": 3.091492934996901e-05, + "loss": 2.4302, + "step": 14883 + }, + { + "epoch": 1.2011944153014285, + "grad_norm": 0.6883525252342224, + "learning_rate": 3.090351632420939e-05, + "loss": 2.4644, + "step": 14884 + }, + { + "epoch": 1.2012751190380115, + "grad_norm": 0.6698900461196899, + "learning_rate": 3.0892105020514795e-05, + "loss": 2.414, + "step": 14885 + }, + { + "epoch": 1.2013558227745944, + "grad_norm": 0.7124409079551697, + "learning_rate": 3.088069543916956e-05, + "loss": 2.4275, + "step": 14886 + }, + { + "epoch": 1.2014365265111775, + "grad_norm": 0.6996601223945618, + "learning_rate": 3.0869287580458076e-05, + "loss": 2.4725, + "step": 14887 + }, + { + "epoch": 1.2015172302477604, + "grad_norm": 0.653087317943573, + "learning_rate": 3.085788144466468e-05, + "loss": 2.383, + "step": 14888 + }, + { + "epoch": 1.2015979339843434, + "grad_norm": 0.7426899671554565, + "learning_rate": 3.0846477032073554e-05, + "loss": 2.4064, + "step": 14889 + }, + { + "epoch": 1.2016786377209265, + "grad_norm": 0.6417646408081055, + "learning_rate": 3.083507434296903e-05, + "loss": 2.3964, + "step": 14890 + }, + { + "epoch": 1.2017593414575094, + "grad_norm": 0.6301923394203186, + "learning_rate": 3.0823673377635274e-05, + "loss": 2.4285, + "step": 14891 + }, + { + "epoch": 1.2018400451940925, + "grad_norm": 0.7621259093284607, + "learning_rate": 3.081227413635638e-05, + "loss": 2.4731, + "step": 14892 + }, + { + "epoch": 1.2019207489306756, + "grad_norm": 0.6637598872184753, + "learning_rate": 3.080087661941648e-05, + "loss": 2.4126, + "step": 14893 + }, + { + "epoch": 1.2020014526672584, + "grad_norm": 0.6820287108421326, + "learning_rate": 3.078948082709964e-05, + "loss": 2.4108, + "step": 14894 + }, + { + "epoch": 1.2020821564038415, + "grad_norm": 0.7090989351272583, + "learning_rate": 3.077808675968983e-05, + "loss": 2.4678, + "step": 14895 + }, + { + "epoch": 1.2021628601404246, + "grad_norm": 0.7242181897163391, + "learning_rate": 3.076669441747105e-05, + "loss": 2.5346, + "step": 14896 + }, + { + "epoch": 1.2022435638770075, + "grad_norm": 0.7790088653564453, + "learning_rate": 3.075530380072722e-05, + "loss": 2.4436, + "step": 14897 + }, + { + "epoch": 1.2023242676135906, + "grad_norm": 0.6828821301460266, + "learning_rate": 3.074391490974225e-05, + "loss": 2.3767, + "step": 14898 + }, + { + "epoch": 1.2024049713501734, + "grad_norm": 0.709815502166748, + "learning_rate": 3.0732527744799945e-05, + "loss": 2.4139, + "step": 14899 + }, + { + "epoch": 1.2024856750867565, + "grad_norm": 0.6561180353164673, + "learning_rate": 3.07211423061841e-05, + "loss": 2.399, + "step": 14900 + }, + { + "epoch": 1.2025663788233396, + "grad_norm": 0.7122004628181458, + "learning_rate": 3.0709758594178495e-05, + "loss": 2.4314, + "step": 14901 + }, + { + "epoch": 1.2026470825599225, + "grad_norm": 0.6817516684532166, + "learning_rate": 3.0698376609066825e-05, + "loss": 2.4241, + "step": 14902 + }, + { + "epoch": 1.2027277862965056, + "grad_norm": 0.6848475337028503, + "learning_rate": 3.068699635113277e-05, + "loss": 2.4583, + "step": 14903 + }, + { + "epoch": 1.2028084900330884, + "grad_norm": 0.6567823886871338, + "learning_rate": 3.067561782065999e-05, + "loss": 2.3818, + "step": 14904 + }, + { + "epoch": 1.2028891937696715, + "grad_norm": 0.7373961806297302, + "learning_rate": 3.066424101793198e-05, + "loss": 2.4075, + "step": 14905 + }, + { + "epoch": 1.2029698975062546, + "grad_norm": 0.6968079209327698, + "learning_rate": 3.0652865943232346e-05, + "loss": 2.4701, + "step": 14906 + }, + { + "epoch": 1.2030506012428375, + "grad_norm": 0.7356292009353638, + "learning_rate": 3.064149259684459e-05, + "loss": 2.4188, + "step": 14907 + }, + { + "epoch": 1.2031313049794206, + "grad_norm": 0.7144857048988342, + "learning_rate": 3.063012097905211e-05, + "loss": 2.4411, + "step": 14908 + }, + { + "epoch": 1.2032120087160036, + "grad_norm": 0.734531044960022, + "learning_rate": 3.0618751090138365e-05, + "loss": 2.4595, + "step": 14909 + }, + { + "epoch": 1.2032927124525865, + "grad_norm": 0.6658234000205994, + "learning_rate": 3.060738293038669e-05, + "loss": 2.4206, + "step": 14910 + }, + { + "epoch": 1.2033734161891696, + "grad_norm": 0.678424596786499, + "learning_rate": 3.059601650008044e-05, + "loss": 2.4704, + "step": 14911 + }, + { + "epoch": 1.2034541199257527, + "grad_norm": 0.6852440237998962, + "learning_rate": 3.058465179950287e-05, + "loss": 2.46, + "step": 14912 + }, + { + "epoch": 1.2035348236623356, + "grad_norm": 0.702881395816803, + "learning_rate": 3.057328882893724e-05, + "loss": 2.4372, + "step": 14913 + }, + { + "epoch": 1.2036155273989186, + "grad_norm": 0.6978999972343445, + "learning_rate": 3.056192758866676e-05, + "loss": 2.401, + "step": 14914 + }, + { + "epoch": 1.2036962311355015, + "grad_norm": 0.7070993185043335, + "learning_rate": 3.055056807897454e-05, + "loss": 2.3967, + "step": 14915 + }, + { + "epoch": 1.2037769348720846, + "grad_norm": 0.7159305810928345, + "learning_rate": 3.0539210300143693e-05, + "loss": 2.4388, + "step": 14916 + }, + { + "epoch": 1.2038576386086675, + "grad_norm": 0.6920869946479797, + "learning_rate": 3.0527854252457333e-05, + "loss": 2.441, + "step": 14917 + }, + { + "epoch": 1.2039383423452505, + "grad_norm": 0.7014884352684021, + "learning_rate": 3.0516499936198417e-05, + "loss": 2.4115, + "step": 14918 + }, + { + "epoch": 1.2040190460818336, + "grad_norm": 0.6754150986671448, + "learning_rate": 3.0505147351649955e-05, + "loss": 2.3722, + "step": 14919 + }, + { + "epoch": 1.2040997498184165, + "grad_norm": 0.7681791186332703, + "learning_rate": 3.0493796499094874e-05, + "loss": 2.4331, + "step": 14920 + }, + { + "epoch": 1.2041804535549996, + "grad_norm": 0.7265221476554871, + "learning_rate": 3.0482447378816082e-05, + "loss": 2.4806, + "step": 14921 + }, + { + "epoch": 1.2042611572915827, + "grad_norm": 0.6841520667076111, + "learning_rate": 3.047109999109642e-05, + "loss": 2.3896, + "step": 14922 + }, + { + "epoch": 1.2043418610281655, + "grad_norm": 0.746347963809967, + "learning_rate": 3.0459754336218737e-05, + "loss": 2.4081, + "step": 14923 + }, + { + "epoch": 1.2044225647647486, + "grad_norm": 0.6679818034172058, + "learning_rate": 3.0448410414465712e-05, + "loss": 2.4206, + "step": 14924 + }, + { + "epoch": 1.2045032685013317, + "grad_norm": 0.7122265100479126, + "learning_rate": 3.0437068226120114e-05, + "loss": 2.4217, + "step": 14925 + }, + { + "epoch": 1.2045839722379146, + "grad_norm": 0.7023499011993408, + "learning_rate": 3.0425727771464618e-05, + "loss": 2.4597, + "step": 14926 + }, + { + "epoch": 1.2046646759744977, + "grad_norm": 0.7304259538650513, + "learning_rate": 3.0414389050781876e-05, + "loss": 2.4915, + "step": 14927 + }, + { + "epoch": 1.2047453797110805, + "grad_norm": 0.7209908962249756, + "learning_rate": 3.0403052064354442e-05, + "loss": 2.4163, + "step": 14928 + }, + { + "epoch": 1.2048260834476636, + "grad_norm": 0.7367275953292847, + "learning_rate": 3.0391716812464865e-05, + "loss": 2.4192, + "step": 14929 + }, + { + "epoch": 1.2049067871842467, + "grad_norm": 0.6576591730117798, + "learning_rate": 3.0380383295395674e-05, + "loss": 2.4606, + "step": 14930 + }, + { + "epoch": 1.2049874909208296, + "grad_norm": 0.7082500457763672, + "learning_rate": 3.0369051513429315e-05, + "loss": 2.4079, + "step": 14931 + }, + { + "epoch": 1.2050681946574127, + "grad_norm": 0.6770346760749817, + "learning_rate": 3.03577214668482e-05, + "loss": 2.45, + "step": 14932 + }, + { + "epoch": 1.2051488983939955, + "grad_norm": 0.6979790925979614, + "learning_rate": 3.034639315593476e-05, + "loss": 2.3966, + "step": 14933 + }, + { + "epoch": 1.2052296021305786, + "grad_norm": 0.6863394975662231, + "learning_rate": 3.033506658097124e-05, + "loss": 2.4637, + "step": 14934 + }, + { + "epoch": 1.2053103058671617, + "grad_norm": 0.7522799372673035, + "learning_rate": 3.0323741742239963e-05, + "loss": 2.4585, + "step": 14935 + }, + { + "epoch": 1.2053910096037446, + "grad_norm": 0.7119878530502319, + "learning_rate": 3.031241864002321e-05, + "loss": 2.4473, + "step": 14936 + }, + { + "epoch": 1.2054717133403277, + "grad_norm": 0.690861701965332, + "learning_rate": 3.030109727460312e-05, + "loss": 2.4564, + "step": 14937 + }, + { + "epoch": 1.2055524170769107, + "grad_norm": 0.6825447082519531, + "learning_rate": 3.0289777646261886e-05, + "loss": 2.4511, + "step": 14938 + }, + { + "epoch": 1.2056331208134936, + "grad_norm": 0.7404600977897644, + "learning_rate": 3.027845975528164e-05, + "loss": 2.4461, + "step": 14939 + }, + { + "epoch": 1.2057138245500767, + "grad_norm": 0.6871766448020935, + "learning_rate": 3.026714360194437e-05, + "loss": 2.4486, + "step": 14940 + }, + { + "epoch": 1.2057945282866598, + "grad_norm": 0.6646476984024048, + "learning_rate": 3.02558291865322e-05, + "loss": 2.378, + "step": 14941 + }, + { + "epoch": 1.2058752320232426, + "grad_norm": 0.6998385787010193, + "learning_rate": 3.024451650932707e-05, + "loss": 2.4646, + "step": 14942 + }, + { + "epoch": 1.2059559357598257, + "grad_norm": 0.6763097047805786, + "learning_rate": 3.023320557061098e-05, + "loss": 2.3971, + "step": 14943 + }, + { + "epoch": 1.2060366394964086, + "grad_norm": 0.7409633994102478, + "learning_rate": 3.0221896370665736e-05, + "loss": 2.4405, + "step": 14944 + }, + { + "epoch": 1.2061173432329917, + "grad_norm": 0.6972076892852783, + "learning_rate": 3.0210588909773242e-05, + "loss": 2.3935, + "step": 14945 + }, + { + "epoch": 1.2061980469695748, + "grad_norm": 0.6898512840270996, + "learning_rate": 3.0199283188215333e-05, + "loss": 2.4173, + "step": 14946 + }, + { + "epoch": 1.2062787507061576, + "grad_norm": 0.6878097057342529, + "learning_rate": 3.0187979206273707e-05, + "loss": 2.44, + "step": 14947 + }, + { + "epoch": 1.2063594544427407, + "grad_norm": 0.6629695296287537, + "learning_rate": 3.0176676964230143e-05, + "loss": 2.3836, + "step": 14948 + }, + { + "epoch": 1.2064401581793236, + "grad_norm": 0.717654824256897, + "learning_rate": 3.0165376462366336e-05, + "loss": 2.415, + "step": 14949 + }, + { + "epoch": 1.2065208619159067, + "grad_norm": 0.7526129484176636, + "learning_rate": 3.0154077700963867e-05, + "loss": 2.4985, + "step": 14950 + }, + { + "epoch": 1.2066015656524898, + "grad_norm": 0.6867300271987915, + "learning_rate": 3.014278068030435e-05, + "loss": 2.395, + "step": 14951 + }, + { + "epoch": 1.2066822693890726, + "grad_norm": 0.7321466207504272, + "learning_rate": 3.0131485400669356e-05, + "loss": 2.4503, + "step": 14952 + }, + { + "epoch": 1.2067629731256557, + "grad_norm": 0.6915534734725952, + "learning_rate": 3.0120191862340387e-05, + "loss": 2.398, + "step": 14953 + }, + { + "epoch": 1.2068436768622388, + "grad_norm": 0.7017377018928528, + "learning_rate": 3.01089000655989e-05, + "loss": 2.4367, + "step": 14954 + }, + { + "epoch": 1.2069243805988217, + "grad_norm": 0.7032245397567749, + "learning_rate": 3.0097610010726353e-05, + "loss": 2.4078, + "step": 14955 + }, + { + "epoch": 1.2070050843354048, + "grad_norm": 0.6795478463172913, + "learning_rate": 3.008632169800406e-05, + "loss": 2.3508, + "step": 14956 + }, + { + "epoch": 1.2070857880719879, + "grad_norm": 0.7149559855461121, + "learning_rate": 3.007503512771339e-05, + "loss": 2.4023, + "step": 14957 + }, + { + "epoch": 1.2071664918085707, + "grad_norm": 0.724756121635437, + "learning_rate": 3.006375030013563e-05, + "loss": 2.4439, + "step": 14958 + }, + { + "epoch": 1.2072471955451538, + "grad_norm": 0.7233348488807678, + "learning_rate": 3.005246721555205e-05, + "loss": 2.3819, + "step": 14959 + }, + { + "epoch": 1.2073278992817367, + "grad_norm": 0.700322151184082, + "learning_rate": 3.0041185874243815e-05, + "loss": 2.4222, + "step": 14960 + }, + { + "epoch": 1.2074086030183198, + "grad_norm": 0.7268145680427551, + "learning_rate": 3.002990627649209e-05, + "loss": 2.4698, + "step": 14961 + }, + { + "epoch": 1.2074893067549028, + "grad_norm": 0.6885111331939697, + "learning_rate": 3.001862842257801e-05, + "loss": 2.4505, + "step": 14962 + }, + { + "epoch": 1.2075700104914857, + "grad_norm": 0.7237974405288696, + "learning_rate": 3.0007352312782632e-05, + "loss": 2.422, + "step": 14963 + }, + { + "epoch": 1.2076507142280688, + "grad_norm": 0.7214741110801697, + "learning_rate": 2.9996077947387015e-05, + "loss": 2.4428, + "step": 14964 + }, + { + "epoch": 1.2077314179646517, + "grad_norm": 0.7264460921287537, + "learning_rate": 2.998480532667215e-05, + "loss": 2.4669, + "step": 14965 + }, + { + "epoch": 1.2078121217012348, + "grad_norm": 0.7055517435073853, + "learning_rate": 2.9973534450918928e-05, + "loss": 2.5082, + "step": 14966 + }, + { + "epoch": 1.2078928254378178, + "grad_norm": 0.6886781454086304, + "learning_rate": 2.9962265320408268e-05, + "loss": 2.4697, + "step": 14967 + }, + { + "epoch": 1.2079735291744007, + "grad_norm": 0.6875878572463989, + "learning_rate": 2.9950997935421076e-05, + "loss": 2.4384, + "step": 14968 + }, + { + "epoch": 1.2080542329109838, + "grad_norm": 0.7586886882781982, + "learning_rate": 2.99397322962381e-05, + "loss": 2.4088, + "step": 14969 + }, + { + "epoch": 1.2081349366475669, + "grad_norm": 0.6744365096092224, + "learning_rate": 2.992846840314013e-05, + "loss": 2.4109, + "step": 14970 + }, + { + "epoch": 1.2082156403841497, + "grad_norm": 0.6589661240577698, + "learning_rate": 2.9917206256407893e-05, + "loss": 2.4386, + "step": 14971 + }, + { + "epoch": 1.2082963441207328, + "grad_norm": 0.6787264943122864, + "learning_rate": 2.990594585632208e-05, + "loss": 2.401, + "step": 14972 + }, + { + "epoch": 1.2083770478573157, + "grad_norm": 0.710517406463623, + "learning_rate": 2.9894687203163317e-05, + "loss": 2.4813, + "step": 14973 + }, + { + "epoch": 1.2084577515938988, + "grad_norm": 0.676110029220581, + "learning_rate": 2.988343029721221e-05, + "loss": 2.4654, + "step": 14974 + }, + { + "epoch": 1.2085384553304819, + "grad_norm": 0.6940518617630005, + "learning_rate": 2.9872175138749336e-05, + "loss": 2.4188, + "step": 14975 + }, + { + "epoch": 1.2086191590670647, + "grad_norm": 0.6849910020828247, + "learning_rate": 2.9860921728055147e-05, + "loss": 2.384, + "step": 14976 + }, + { + "epoch": 1.2086998628036478, + "grad_norm": 0.6902467608451843, + "learning_rate": 2.9849670065410128e-05, + "loss": 2.4364, + "step": 14977 + }, + { + "epoch": 1.2087805665402307, + "grad_norm": 0.6742224097251892, + "learning_rate": 2.9838420151094747e-05, + "loss": 2.5085, + "step": 14978 + }, + { + "epoch": 1.2088612702768138, + "grad_norm": 0.6635094285011292, + "learning_rate": 2.9827171985389303e-05, + "loss": 2.3635, + "step": 14979 + }, + { + "epoch": 1.2089419740133969, + "grad_norm": 0.7189158201217651, + "learning_rate": 2.9815925568574165e-05, + "loss": 2.458, + "step": 14980 + }, + { + "epoch": 1.2090226777499797, + "grad_norm": 0.7370143532752991, + "learning_rate": 2.9804680900929628e-05, + "loss": 2.4543, + "step": 14981 + }, + { + "epoch": 1.2091033814865628, + "grad_norm": 0.7410217523574829, + "learning_rate": 2.979343798273593e-05, + "loss": 2.4537, + "step": 14982 + }, + { + "epoch": 1.209184085223146, + "grad_norm": 0.7525770664215088, + "learning_rate": 2.9782196814273277e-05, + "loss": 2.5147, + "step": 14983 + }, + { + "epoch": 1.2092647889597288, + "grad_norm": 0.7302291393280029, + "learning_rate": 2.9770957395821863e-05, + "loss": 2.4711, + "step": 14984 + }, + { + "epoch": 1.2093454926963119, + "grad_norm": 0.7154920101165771, + "learning_rate": 2.975971972766175e-05, + "loss": 2.5224, + "step": 14985 + }, + { + "epoch": 1.209426196432895, + "grad_norm": 0.6827684640884399, + "learning_rate": 2.9748483810073025e-05, + "loss": 2.4477, + "step": 14986 + }, + { + "epoch": 1.2095069001694778, + "grad_norm": 0.7753484845161438, + "learning_rate": 2.973724964333575e-05, + "loss": 2.4257, + "step": 14987 + }, + { + "epoch": 1.209587603906061, + "grad_norm": 0.7146809101104736, + "learning_rate": 2.9726017227729862e-05, + "loss": 2.3953, + "step": 14988 + }, + { + "epoch": 1.2096683076426438, + "grad_norm": 0.7360730767250061, + "learning_rate": 2.9714786563535313e-05, + "loss": 2.3774, + "step": 14989 + }, + { + "epoch": 1.2097490113792269, + "grad_norm": 0.7159923911094666, + "learning_rate": 2.970355765103201e-05, + "loss": 2.4068, + "step": 14990 + }, + { + "epoch": 1.20982971511581, + "grad_norm": 0.6732171773910522, + "learning_rate": 2.969233049049982e-05, + "loss": 2.4215, + "step": 14991 + }, + { + "epoch": 1.2099104188523928, + "grad_norm": 0.749812126159668, + "learning_rate": 2.968110508221853e-05, + "loss": 2.4415, + "step": 14992 + }, + { + "epoch": 1.209991122588976, + "grad_norm": 0.7185530662536621, + "learning_rate": 2.9669881426467916e-05, + "loss": 2.4536, + "step": 14993 + }, + { + "epoch": 1.2100718263255588, + "grad_norm": 0.6757143139839172, + "learning_rate": 2.9658659523527733e-05, + "loss": 2.3892, + "step": 14994 + }, + { + "epoch": 1.2101525300621419, + "grad_norm": 0.7187495231628418, + "learning_rate": 2.96474393736776e-05, + "loss": 2.434, + "step": 14995 + }, + { + "epoch": 1.210233233798725, + "grad_norm": 0.7016372680664062, + "learning_rate": 2.9636220977197182e-05, + "loss": 2.4903, + "step": 14996 + }, + { + "epoch": 1.2103139375353078, + "grad_norm": 0.7528983950614929, + "learning_rate": 2.9625004334366103e-05, + "loss": 2.3829, + "step": 14997 + }, + { + "epoch": 1.210394641271891, + "grad_norm": 0.6735692024230957, + "learning_rate": 2.9613789445463837e-05, + "loss": 2.3844, + "step": 14998 + }, + { + "epoch": 1.210475345008474, + "grad_norm": 0.6825322508811951, + "learning_rate": 2.9602576310769935e-05, + "loss": 2.4691, + "step": 14999 + }, + { + "epoch": 1.2105560487450568, + "grad_norm": 0.7507675290107727, + "learning_rate": 2.959136493056389e-05, + "loss": 2.4605, + "step": 15000 + }, + { + "epoch": 1.2105560487450568, + "eval_loss": 2.3882925510406494, + "eval_runtime": 1014.0781, + "eval_samples_per_second": 2.584, + "eval_steps_per_second": 0.431, + "step": 15000 + }, + { + "epoch": 1.21063675248164, + "grad_norm": 0.6937146782875061, + "learning_rate": 2.9580155305125044e-05, + "loss": 2.4444, + "step": 15001 + }, + { + "epoch": 1.210717456218223, + "grad_norm": 0.6572179794311523, + "learning_rate": 2.9568947434732775e-05, + "loss": 2.4373, + "step": 15002 + }, + { + "epoch": 1.2107981599548059, + "grad_norm": 0.7420738935470581, + "learning_rate": 2.955774131966651e-05, + "loss": 2.4046, + "step": 15003 + }, + { + "epoch": 1.210878863691389, + "grad_norm": 0.7952237129211426, + "learning_rate": 2.954653696020543e-05, + "loss": 2.4082, + "step": 15004 + }, + { + "epoch": 1.2109595674279718, + "grad_norm": 0.6640750765800476, + "learning_rate": 2.9535334356628817e-05, + "loss": 2.4109, + "step": 15005 + }, + { + "epoch": 1.211040271164555, + "grad_norm": 0.6968019008636475, + "learning_rate": 2.952413350921588e-05, + "loss": 2.3991, + "step": 15006 + }, + { + "epoch": 1.211120974901138, + "grad_norm": 0.7174221277236938, + "learning_rate": 2.9512934418245787e-05, + "loss": 2.3909, + "step": 15007 + }, + { + "epoch": 1.2112016786377209, + "grad_norm": 0.6854268908500671, + "learning_rate": 2.9501737083997595e-05, + "loss": 2.4321, + "step": 15008 + }, + { + "epoch": 1.211282382374304, + "grad_norm": 0.6705672740936279, + "learning_rate": 2.949054150675039e-05, + "loss": 2.4749, + "step": 15009 + }, + { + "epoch": 1.2113630861108868, + "grad_norm": 0.7871068716049194, + "learning_rate": 2.9479347686783244e-05, + "loss": 2.424, + "step": 15010 + }, + { + "epoch": 1.21144378984747, + "grad_norm": 0.8194620609283447, + "learning_rate": 2.946815562437506e-05, + "loss": 2.461, + "step": 15011 + }, + { + "epoch": 1.211524493584053, + "grad_norm": 0.673367977142334, + "learning_rate": 2.9456965319804818e-05, + "loss": 2.4212, + "step": 15012 + }, + { + "epoch": 1.2116051973206359, + "grad_norm": 0.6630001068115234, + "learning_rate": 2.9445776773351397e-05, + "loss": 2.4393, + "step": 15013 + }, + { + "epoch": 1.211685901057219, + "grad_norm": 0.676170825958252, + "learning_rate": 2.943458998529365e-05, + "loss": 2.3889, + "step": 15014 + }, + { + "epoch": 1.211766604793802, + "grad_norm": 0.6951417326927185, + "learning_rate": 2.942340495591037e-05, + "loss": 2.4088, + "step": 15015 + }, + { + "epoch": 1.211847308530385, + "grad_norm": 0.6909857988357544, + "learning_rate": 2.941222168548037e-05, + "loss": 2.4282, + "step": 15016 + }, + { + "epoch": 1.211928012266968, + "grad_norm": 0.653264045715332, + "learning_rate": 2.9401040174282292e-05, + "loss": 2.4369, + "step": 15017 + }, + { + "epoch": 1.2120087160035509, + "grad_norm": 0.6994543075561523, + "learning_rate": 2.938986042259484e-05, + "loss": 2.419, + "step": 15018 + }, + { + "epoch": 1.212089419740134, + "grad_norm": 0.709015965461731, + "learning_rate": 2.9378682430696668e-05, + "loss": 2.4747, + "step": 15019 + }, + { + "epoch": 1.212170123476717, + "grad_norm": 0.6899579167366028, + "learning_rate": 2.9367506198866313e-05, + "loss": 2.4134, + "step": 15020 + }, + { + "epoch": 1.2122508272133, + "grad_norm": 0.6811912059783936, + "learning_rate": 2.9356331727382337e-05, + "loss": 2.449, + "step": 15021 + }, + { + "epoch": 1.212331530949883, + "grad_norm": 0.8119748830795288, + "learning_rate": 2.9345159016523237e-05, + "loss": 2.4463, + "step": 15022 + }, + { + "epoch": 1.2124122346864659, + "grad_norm": 0.7323578000068665, + "learning_rate": 2.9333988066567463e-05, + "loss": 2.4305, + "step": 15023 + }, + { + "epoch": 1.212492938423049, + "grad_norm": 0.6639837622642517, + "learning_rate": 2.9322818877793436e-05, + "loss": 2.4237, + "step": 15024 + }, + { + "epoch": 1.212573642159632, + "grad_norm": 0.669623076915741, + "learning_rate": 2.9311651450479516e-05, + "loss": 2.4436, + "step": 15025 + }, + { + "epoch": 1.212654345896215, + "grad_norm": 0.7200437784194946, + "learning_rate": 2.9300485784904054e-05, + "loss": 2.4399, + "step": 15026 + }, + { + "epoch": 1.212735049632798, + "grad_norm": 0.7015525102615356, + "learning_rate": 2.9289321881345254e-05, + "loss": 2.4696, + "step": 15027 + }, + { + "epoch": 1.212815753369381, + "grad_norm": 0.74539715051651, + "learning_rate": 2.9278159740081402e-05, + "loss": 2.4204, + "step": 15028 + }, + { + "epoch": 1.212896457105964, + "grad_norm": 0.6373662352561951, + "learning_rate": 2.9266999361390713e-05, + "loss": 2.4273, + "step": 15029 + }, + { + "epoch": 1.212977160842547, + "grad_norm": 0.8213370442390442, + "learning_rate": 2.9255840745551256e-05, + "loss": 2.4166, + "step": 15030 + }, + { + "epoch": 1.2130578645791301, + "grad_norm": 0.7386181354522705, + "learning_rate": 2.9244683892841185e-05, + "loss": 2.3973, + "step": 15031 + }, + { + "epoch": 1.213138568315713, + "grad_norm": 0.7939273118972778, + "learning_rate": 2.9233528803538534e-05, + "loss": 2.5593, + "step": 15032 + }, + { + "epoch": 1.213219272052296, + "grad_norm": 0.7580689191818237, + "learning_rate": 2.9222375477921347e-05, + "loss": 2.4255, + "step": 15033 + }, + { + "epoch": 1.213299975788879, + "grad_norm": 0.7680409550666809, + "learning_rate": 2.9211223916267573e-05, + "loss": 2.4447, + "step": 15034 + }, + { + "epoch": 1.213380679525462, + "grad_norm": 0.6998565196990967, + "learning_rate": 2.9200074118855135e-05, + "loss": 2.4061, + "step": 15035 + }, + { + "epoch": 1.2134613832620451, + "grad_norm": 0.6673001050949097, + "learning_rate": 2.9188926085961954e-05, + "loss": 2.3989, + "step": 15036 + }, + { + "epoch": 1.213542086998628, + "grad_norm": 0.683215320110321, + "learning_rate": 2.9177779817865815e-05, + "loss": 2.4078, + "step": 15037 + }, + { + "epoch": 1.213622790735211, + "grad_norm": 0.696967363357544, + "learning_rate": 2.9166635314844527e-05, + "loss": 2.4224, + "step": 15038 + }, + { + "epoch": 1.213703494471794, + "grad_norm": 0.6930364370346069, + "learning_rate": 2.915549257717588e-05, + "loss": 2.4112, + "step": 15039 + }, + { + "epoch": 1.213784198208377, + "grad_norm": 0.7387405633926392, + "learning_rate": 2.914435160513752e-05, + "loss": 2.4458, + "step": 15040 + }, + { + "epoch": 1.21386490194496, + "grad_norm": 0.6615941524505615, + "learning_rate": 2.913321239900714e-05, + "loss": 2.4406, + "step": 15041 + }, + { + "epoch": 1.213945605681543, + "grad_norm": 0.7520569562911987, + "learning_rate": 2.912207495906235e-05, + "loss": 2.3991, + "step": 15042 + }, + { + "epoch": 1.214026309418126, + "grad_norm": 0.6952454447746277, + "learning_rate": 2.911093928558072e-05, + "loss": 2.4404, + "step": 15043 + }, + { + "epoch": 1.2141070131547091, + "grad_norm": 0.7595344185829163, + "learning_rate": 2.9099805378839794e-05, + "loss": 2.551, + "step": 15044 + }, + { + "epoch": 1.214187716891292, + "grad_norm": 0.6645220518112183, + "learning_rate": 2.9088673239117094e-05, + "loss": 2.4167, + "step": 15045 + }, + { + "epoch": 1.214268420627875, + "grad_norm": 0.6433377861976624, + "learning_rate": 2.907754286668998e-05, + "loss": 2.3873, + "step": 15046 + }, + { + "epoch": 1.2143491243644582, + "grad_norm": 0.6806936860084534, + "learning_rate": 2.9066414261835894e-05, + "loss": 2.3868, + "step": 15047 + }, + { + "epoch": 1.214429828101041, + "grad_norm": 0.7261343598365784, + "learning_rate": 2.905528742483222e-05, + "loss": 2.4785, + "step": 15048 + }, + { + "epoch": 1.2145105318376241, + "grad_norm": 0.6495440602302551, + "learning_rate": 2.9044162355956196e-05, + "loss": 2.4167, + "step": 15049 + }, + { + "epoch": 1.214591235574207, + "grad_norm": 0.6816607117652893, + "learning_rate": 2.9033039055485135e-05, + "loss": 2.459, + "step": 15050 + }, + { + "epoch": 1.21467193931079, + "grad_norm": 0.6624214053153992, + "learning_rate": 2.902191752369624e-05, + "loss": 2.4498, + "step": 15051 + }, + { + "epoch": 1.2147526430473732, + "grad_norm": 0.6800024509429932, + "learning_rate": 2.9010797760866737e-05, + "loss": 2.4442, + "step": 15052 + }, + { + "epoch": 1.214833346783956, + "grad_norm": 0.711705207824707, + "learning_rate": 2.8999679767273667e-05, + "loss": 2.422, + "step": 15053 + }, + { + "epoch": 1.2149140505205391, + "grad_norm": 0.6854784488677979, + "learning_rate": 2.898856354319419e-05, + "loss": 2.4567, + "step": 15054 + }, + { + "epoch": 1.214994754257122, + "grad_norm": 0.6676114797592163, + "learning_rate": 2.8977449088905373e-05, + "loss": 2.3913, + "step": 15055 + }, + { + "epoch": 1.215075457993705, + "grad_norm": 0.6893348693847656, + "learning_rate": 2.8966336404684145e-05, + "loss": 2.4407, + "step": 15056 + }, + { + "epoch": 1.2151561617302882, + "grad_norm": 0.6749289035797119, + "learning_rate": 2.8955225490807514e-05, + "loss": 2.409, + "step": 15057 + }, + { + "epoch": 1.215236865466871, + "grad_norm": 0.6998956203460693, + "learning_rate": 2.8944116347552387e-05, + "loss": 2.4297, + "step": 15058 + }, + { + "epoch": 1.2153175692034541, + "grad_norm": 0.7040024399757385, + "learning_rate": 2.8933008975195596e-05, + "loss": 2.4262, + "step": 15059 + }, + { + "epoch": 1.2153982729400372, + "grad_norm": 0.6638362407684326, + "learning_rate": 2.8921903374014005e-05, + "loss": 2.4355, + "step": 15060 + }, + { + "epoch": 1.21547897667662, + "grad_norm": 0.6864547729492188, + "learning_rate": 2.8910799544284407e-05, + "loss": 2.4493, + "step": 15061 + }, + { + "epoch": 1.2155596804132032, + "grad_norm": 0.707383394241333, + "learning_rate": 2.8899697486283474e-05, + "loss": 2.4604, + "step": 15062 + }, + { + "epoch": 1.2156403841497863, + "grad_norm": 0.7121397852897644, + "learning_rate": 2.888859720028795e-05, + "loss": 2.4272, + "step": 15063 + }, + { + "epoch": 1.2157210878863691, + "grad_norm": 0.7600439786911011, + "learning_rate": 2.8877498686574455e-05, + "loss": 2.4499, + "step": 15064 + }, + { + "epoch": 1.2158017916229522, + "grad_norm": 0.6654962301254272, + "learning_rate": 2.886640194541962e-05, + "loss": 2.4632, + "step": 15065 + }, + { + "epoch": 1.215882495359535, + "grad_norm": 0.7138063311576843, + "learning_rate": 2.8855306977099994e-05, + "loss": 2.4321, + "step": 15066 + }, + { + "epoch": 1.2159631990961182, + "grad_norm": 0.672604501247406, + "learning_rate": 2.884421378189208e-05, + "loss": 2.4026, + "step": 15067 + }, + { + "epoch": 1.2160439028327013, + "grad_norm": 0.6894693970680237, + "learning_rate": 2.8833122360072405e-05, + "loss": 2.4213, + "step": 15068 + }, + { + "epoch": 1.2161246065692841, + "grad_norm": 0.6784985065460205, + "learning_rate": 2.8822032711917325e-05, + "loss": 2.4207, + "step": 15069 + }, + { + "epoch": 1.2162053103058672, + "grad_norm": 0.6569294929504395, + "learning_rate": 2.8810944837703248e-05, + "loss": 2.4142, + "step": 15070 + }, + { + "epoch": 1.21628601404245, + "grad_norm": 0.7240702509880066, + "learning_rate": 2.879985873770654e-05, + "loss": 2.4173, + "step": 15071 + }, + { + "epoch": 1.2163667177790332, + "grad_norm": 0.6935575604438782, + "learning_rate": 2.8788774412203444e-05, + "loss": 2.4487, + "step": 15072 + }, + { + "epoch": 1.2164474215156162, + "grad_norm": 0.6903246641159058, + "learning_rate": 2.8777691861470234e-05, + "loss": 2.4193, + "step": 15073 + }, + { + "epoch": 1.216528125252199, + "grad_norm": 0.7982182502746582, + "learning_rate": 2.8766611085783123e-05, + "loss": 2.492, + "step": 15074 + }, + { + "epoch": 1.2166088289887822, + "grad_norm": 0.6958058476448059, + "learning_rate": 2.875553208541827e-05, + "loss": 2.4198, + "step": 15075 + }, + { + "epoch": 1.2166895327253653, + "grad_norm": 0.6869969964027405, + "learning_rate": 2.8744454860651794e-05, + "loss": 2.3768, + "step": 15076 + }, + { + "epoch": 1.2167702364619482, + "grad_norm": 0.7263007760047913, + "learning_rate": 2.8733379411759796e-05, + "loss": 2.386, + "step": 15077 + }, + { + "epoch": 1.2168509401985312, + "grad_norm": 0.7010302543640137, + "learning_rate": 2.872230573901825e-05, + "loss": 2.4417, + "step": 15078 + }, + { + "epoch": 1.216931643935114, + "grad_norm": 0.818980872631073, + "learning_rate": 2.8711233842703156e-05, + "loss": 2.433, + "step": 15079 + }, + { + "epoch": 1.2170123476716972, + "grad_norm": 0.6937929391860962, + "learning_rate": 2.87001637230905e-05, + "loss": 2.379, + "step": 15080 + }, + { + "epoch": 1.2170930514082803, + "grad_norm": 0.6954175233840942, + "learning_rate": 2.868909538045612e-05, + "loss": 2.4296, + "step": 15081 + }, + { + "epoch": 1.2171737551448631, + "grad_norm": 0.7177354097366333, + "learning_rate": 2.8678028815075887e-05, + "loss": 2.3978, + "step": 15082 + }, + { + "epoch": 1.2172544588814462, + "grad_norm": 0.7100846171379089, + "learning_rate": 2.8666964027225607e-05, + "loss": 2.4566, + "step": 15083 + }, + { + "epoch": 1.217335162618029, + "grad_norm": 0.6909635066986084, + "learning_rate": 2.8655901017181064e-05, + "loss": 2.4772, + "step": 15084 + }, + { + "epoch": 1.2174158663546122, + "grad_norm": 0.7319501638412476, + "learning_rate": 2.8644839785217947e-05, + "loss": 2.4402, + "step": 15085 + }, + { + "epoch": 1.2174965700911953, + "grad_norm": 0.6691421270370483, + "learning_rate": 2.8633780331611958e-05, + "loss": 2.4465, + "step": 15086 + }, + { + "epoch": 1.2175772738277781, + "grad_norm": 0.7028824687004089, + "learning_rate": 2.8622722656638745e-05, + "loss": 2.4765, + "step": 15087 + }, + { + "epoch": 1.2176579775643612, + "grad_norm": 0.7428398728370667, + "learning_rate": 2.861166676057383e-05, + "loss": 2.441, + "step": 15088 + }, + { + "epoch": 1.2177386813009443, + "grad_norm": 0.6715269684791565, + "learning_rate": 2.8600612643692803e-05, + "loss": 2.4621, + "step": 15089 + }, + { + "epoch": 1.2178193850375272, + "grad_norm": 0.6768512725830078, + "learning_rate": 2.8589560306271168e-05, + "loss": 2.4257, + "step": 15090 + }, + { + "epoch": 1.2179000887741103, + "grad_norm": 0.7442535758018494, + "learning_rate": 2.8578509748584326e-05, + "loss": 2.424, + "step": 15091 + }, + { + "epoch": 1.2179807925106934, + "grad_norm": 0.7275974154472351, + "learning_rate": 2.8567460970907722e-05, + "loss": 2.4698, + "step": 15092 + }, + { + "epoch": 1.2180614962472762, + "grad_norm": 0.7050346732139587, + "learning_rate": 2.8556413973516727e-05, + "loss": 2.4734, + "step": 15093 + }, + { + "epoch": 1.2181421999838593, + "grad_norm": 0.7325939536094666, + "learning_rate": 2.854536875668664e-05, + "loss": 2.4166, + "step": 15094 + }, + { + "epoch": 1.2182229037204422, + "grad_norm": 0.6764184236526489, + "learning_rate": 2.8534325320692746e-05, + "loss": 2.4742, + "step": 15095 + }, + { + "epoch": 1.2183036074570253, + "grad_norm": 0.7405500411987305, + "learning_rate": 2.8523283665810318e-05, + "loss": 2.3959, + "step": 15096 + }, + { + "epoch": 1.2183843111936083, + "grad_norm": 0.6714199185371399, + "learning_rate": 2.8512243792314465e-05, + "loss": 2.4571, + "step": 15097 + }, + { + "epoch": 1.2184650149301912, + "grad_norm": 0.6779391169548035, + "learning_rate": 2.8501205700480372e-05, + "loss": 2.3745, + "step": 15098 + }, + { + "epoch": 1.2185457186667743, + "grad_norm": 0.6876079440116882, + "learning_rate": 2.8490169390583134e-05, + "loss": 2.4432, + "step": 15099 + }, + { + "epoch": 1.2186264224033572, + "grad_norm": 0.7092362642288208, + "learning_rate": 2.8479134862897826e-05, + "loss": 2.4716, + "step": 15100 + }, + { + "epoch": 1.2187071261399403, + "grad_norm": 0.6901989579200745, + "learning_rate": 2.8468102117699414e-05, + "loss": 2.417, + "step": 15101 + }, + { + "epoch": 1.2187878298765233, + "grad_norm": 0.7011592984199524, + "learning_rate": 2.8457071155262884e-05, + "loss": 2.4439, + "step": 15102 + }, + { + "epoch": 1.2188685336131062, + "grad_norm": 0.6923472285270691, + "learning_rate": 2.8446041975863146e-05, + "loss": 2.4247, + "step": 15103 + }, + { + "epoch": 1.2189492373496893, + "grad_norm": 0.6948748230934143, + "learning_rate": 2.843501457977509e-05, + "loss": 2.3902, + "step": 15104 + }, + { + "epoch": 1.2190299410862724, + "grad_norm": 0.7034386396408081, + "learning_rate": 2.842398896727354e-05, + "loss": 2.4277, + "step": 15105 + }, + { + "epoch": 1.2191106448228552, + "grad_norm": 0.7965617775917053, + "learning_rate": 2.8412965138633318e-05, + "loss": 2.435, + "step": 15106 + }, + { + "epoch": 1.2191913485594383, + "grad_norm": 0.7371121644973755, + "learning_rate": 2.8401943094129112e-05, + "loss": 2.3928, + "step": 15107 + }, + { + "epoch": 1.2192720522960214, + "grad_norm": 0.7079561352729797, + "learning_rate": 2.839092283403564e-05, + "loss": 2.4706, + "step": 15108 + }, + { + "epoch": 1.2193527560326043, + "grad_norm": 0.6711337566375732, + "learning_rate": 2.8379904358627584e-05, + "loss": 2.4272, + "step": 15109 + }, + { + "epoch": 1.2194334597691874, + "grad_norm": 0.6840410828590393, + "learning_rate": 2.836888766817951e-05, + "loss": 2.4174, + "step": 15110 + }, + { + "epoch": 1.2195141635057702, + "grad_norm": 0.700366199016571, + "learning_rate": 2.8357872762965986e-05, + "loss": 2.4667, + "step": 15111 + }, + { + "epoch": 1.2195948672423533, + "grad_norm": 0.7090682983398438, + "learning_rate": 2.8346859643261593e-05, + "loss": 2.3748, + "step": 15112 + }, + { + "epoch": 1.2196755709789364, + "grad_norm": 0.7965148687362671, + "learning_rate": 2.8335848309340717e-05, + "loss": 2.5138, + "step": 15113 + }, + { + "epoch": 1.2197562747155193, + "grad_norm": 0.7845773696899414, + "learning_rate": 2.8324838761477833e-05, + "loss": 2.4274, + "step": 15114 + }, + { + "epoch": 1.2198369784521024, + "grad_norm": 0.6545087099075317, + "learning_rate": 2.831383099994731e-05, + "loss": 2.4311, + "step": 15115 + }, + { + "epoch": 1.2199176821886852, + "grad_norm": 0.6846331357955933, + "learning_rate": 2.830282502502356e-05, + "loss": 2.4239, + "step": 15116 + }, + { + "epoch": 1.2199983859252683, + "grad_norm": 0.7062236070632935, + "learning_rate": 2.8291820836980798e-05, + "loss": 2.4429, + "step": 15117 + }, + { + "epoch": 1.2200790896618514, + "grad_norm": 0.7526285648345947, + "learning_rate": 2.8280818436093315e-05, + "loss": 2.4882, + "step": 15118 + }, + { + "epoch": 1.2201597933984343, + "grad_norm": 0.6853364109992981, + "learning_rate": 2.8269817822635337e-05, + "loss": 2.3803, + "step": 15119 + }, + { + "epoch": 1.2202404971350174, + "grad_norm": 0.7796143293380737, + "learning_rate": 2.8258818996880964e-05, + "loss": 2.4157, + "step": 15120 + }, + { + "epoch": 1.2203212008716005, + "grad_norm": 0.7202157378196716, + "learning_rate": 2.824782195910437e-05, + "loss": 2.5101, + "step": 15121 + }, + { + "epoch": 1.2204019046081833, + "grad_norm": 0.6730707287788391, + "learning_rate": 2.8236826709579644e-05, + "loss": 2.4397, + "step": 15122 + }, + { + "epoch": 1.2204826083447664, + "grad_norm": 0.7840865850448608, + "learning_rate": 2.8225833248580745e-05, + "loss": 2.4452, + "step": 15123 + }, + { + "epoch": 1.2205633120813493, + "grad_norm": 0.8323497772216797, + "learning_rate": 2.821484157638171e-05, + "loss": 2.4775, + "step": 15124 + }, + { + "epoch": 1.2206440158179324, + "grad_norm": 0.6699438691139221, + "learning_rate": 2.8203851693256466e-05, + "loss": 2.3958, + "step": 15125 + }, + { + "epoch": 1.2207247195545154, + "grad_norm": 0.6711557507514954, + "learning_rate": 2.8192863599478923e-05, + "loss": 2.477, + "step": 15126 + }, + { + "epoch": 1.2208054232910983, + "grad_norm": 0.6255797743797302, + "learning_rate": 2.8181877295322922e-05, + "loss": 2.4222, + "step": 15127 + }, + { + "epoch": 1.2208861270276814, + "grad_norm": 0.7313731908798218, + "learning_rate": 2.8170892781062297e-05, + "loss": 2.4343, + "step": 15128 + }, + { + "epoch": 1.2209668307642643, + "grad_norm": 0.6611476540565491, + "learning_rate": 2.815991005697076e-05, + "loss": 2.3844, + "step": 15129 + }, + { + "epoch": 1.2210475345008474, + "grad_norm": 0.7293661236763, + "learning_rate": 2.8148929123322065e-05, + "loss": 2.3912, + "step": 15130 + }, + { + "epoch": 1.2211282382374304, + "grad_norm": 0.7150777578353882, + "learning_rate": 2.8137949980389866e-05, + "loss": 2.4227, + "step": 15131 + }, + { + "epoch": 1.2212089419740133, + "grad_norm": 0.7001000642776489, + "learning_rate": 2.8126972628447845e-05, + "loss": 2.4751, + "step": 15132 + }, + { + "epoch": 1.2212896457105964, + "grad_norm": 0.7106043100357056, + "learning_rate": 2.8115997067769505e-05, + "loss": 2.4127, + "step": 15133 + }, + { + "epoch": 1.2213703494471795, + "grad_norm": 0.6969115138053894, + "learning_rate": 2.810502329862842e-05, + "loss": 2.4073, + "step": 15134 + }, + { + "epoch": 1.2214510531837623, + "grad_norm": 0.7493317127227783, + "learning_rate": 2.8094051321298098e-05, + "loss": 2.4541, + "step": 15135 + }, + { + "epoch": 1.2215317569203454, + "grad_norm": 0.6499322652816772, + "learning_rate": 2.808308113605198e-05, + "loss": 2.4057, + "step": 15136 + }, + { + "epoch": 1.2216124606569285, + "grad_norm": 0.6716788411140442, + "learning_rate": 2.807211274316347e-05, + "loss": 2.3856, + "step": 15137 + }, + { + "epoch": 1.2216931643935114, + "grad_norm": 0.7724741101264954, + "learning_rate": 2.8061146142905958e-05, + "loss": 2.4652, + "step": 15138 + }, + { + "epoch": 1.2217738681300945, + "grad_norm": 0.7014325261116028, + "learning_rate": 2.8050181335552718e-05, + "loss": 2.4506, + "step": 15139 + }, + { + "epoch": 1.2218545718666773, + "grad_norm": 0.6705317497253418, + "learning_rate": 2.8039218321377026e-05, + "loss": 2.4581, + "step": 15140 + }, + { + "epoch": 1.2219352756032604, + "grad_norm": 0.709973931312561, + "learning_rate": 2.8028257100652156e-05, + "loss": 2.427, + "step": 15141 + }, + { + "epoch": 1.2220159793398435, + "grad_norm": 0.7021297812461853, + "learning_rate": 2.801729767365122e-05, + "loss": 2.3784, + "step": 15142 + }, + { + "epoch": 1.2220966830764264, + "grad_norm": 0.7431899905204773, + "learning_rate": 2.8006340040647393e-05, + "loss": 2.4135, + "step": 15143 + }, + { + "epoch": 1.2221773868130095, + "grad_norm": 0.6724472045898438, + "learning_rate": 2.7995384201913765e-05, + "loss": 2.3966, + "step": 15144 + }, + { + "epoch": 1.2222580905495923, + "grad_norm": 0.7381375432014465, + "learning_rate": 2.7984430157723384e-05, + "loss": 2.4853, + "step": 15145 + }, + { + "epoch": 1.2223387942861754, + "grad_norm": 0.6809988617897034, + "learning_rate": 2.7973477908349255e-05, + "loss": 2.408, + "step": 15146 + }, + { + "epoch": 1.2224194980227585, + "grad_norm": 0.7042898535728455, + "learning_rate": 2.7962527454064337e-05, + "loss": 2.3981, + "step": 15147 + }, + { + "epoch": 1.2225002017593414, + "grad_norm": 0.7096118330955505, + "learning_rate": 2.7951578795141576e-05, + "loss": 2.4175, + "step": 15148 + }, + { + "epoch": 1.2225809054959245, + "grad_norm": 0.7271720767021179, + "learning_rate": 2.794063193185378e-05, + "loss": 2.4193, + "step": 15149 + }, + { + "epoch": 1.2226616092325076, + "grad_norm": 0.7000352740287781, + "learning_rate": 2.7929686864473792e-05, + "loss": 2.422, + "step": 15150 + }, + { + "epoch": 1.2227423129690904, + "grad_norm": 0.6983076333999634, + "learning_rate": 2.791874359327443e-05, + "loss": 2.4613, + "step": 15151 + }, + { + "epoch": 1.2228230167056735, + "grad_norm": 0.7520100474357605, + "learning_rate": 2.7907802118528383e-05, + "loss": 2.4147, + "step": 15152 + }, + { + "epoch": 1.2229037204422566, + "grad_norm": 0.7056650519371033, + "learning_rate": 2.789686244050834e-05, + "loss": 2.4568, + "step": 15153 + }, + { + "epoch": 1.2229844241788395, + "grad_norm": 0.7092614769935608, + "learning_rate": 2.7885924559486975e-05, + "loss": 2.4758, + "step": 15154 + }, + { + "epoch": 1.2230651279154225, + "grad_norm": 0.702521562576294, + "learning_rate": 2.7874988475736885e-05, + "loss": 2.4893, + "step": 15155 + }, + { + "epoch": 1.2231458316520054, + "grad_norm": 0.7454921007156372, + "learning_rate": 2.786405418953061e-05, + "loss": 2.4277, + "step": 15156 + }, + { + "epoch": 1.2232265353885885, + "grad_norm": 0.659503161907196, + "learning_rate": 2.7853121701140694e-05, + "loss": 2.4664, + "step": 15157 + }, + { + "epoch": 1.2233072391251716, + "grad_norm": 0.6368914842605591, + "learning_rate": 2.7842191010839556e-05, + "loss": 2.3728, + "step": 15158 + }, + { + "epoch": 1.2233879428617545, + "grad_norm": 0.7076737880706787, + "learning_rate": 2.783126211889965e-05, + "loss": 2.4204, + "step": 15159 + }, + { + "epoch": 1.2234686465983375, + "grad_norm": 0.718100905418396, + "learning_rate": 2.7820335025593325e-05, + "loss": 2.478, + "step": 15160 + }, + { + "epoch": 1.2235493503349204, + "grad_norm": 0.6804678440093994, + "learning_rate": 2.7809409731192972e-05, + "loss": 2.3755, + "step": 15161 + }, + { + "epoch": 1.2236300540715035, + "grad_norm": 0.7068643569946289, + "learning_rate": 2.77984862359708e-05, + "loss": 2.3713, + "step": 15162 + }, + { + "epoch": 1.2237107578080866, + "grad_norm": 0.7047072052955627, + "learning_rate": 2.7787564540199097e-05, + "loss": 2.4264, + "step": 15163 + }, + { + "epoch": 1.2237914615446694, + "grad_norm": 0.6985021829605103, + "learning_rate": 2.7776644644150076e-05, + "loss": 2.4101, + "step": 15164 + }, + { + "epoch": 1.2238721652812525, + "grad_norm": 0.7543687224388123, + "learning_rate": 2.776572654809583e-05, + "loss": 2.3722, + "step": 15165 + }, + { + "epoch": 1.2239528690178356, + "grad_norm": 0.7199926972389221, + "learning_rate": 2.7754810252308473e-05, + "loss": 2.3819, + "step": 15166 + }, + { + "epoch": 1.2240335727544185, + "grad_norm": 0.696756899356842, + "learning_rate": 2.7743895757060156e-05, + "loss": 2.4245, + "step": 15167 + }, + { + "epoch": 1.2241142764910016, + "grad_norm": 0.7848933339118958, + "learning_rate": 2.773298306262281e-05, + "loss": 2.4725, + "step": 15168 + }, + { + "epoch": 1.2241949802275847, + "grad_norm": 0.6819389462471008, + "learning_rate": 2.7722072169268432e-05, + "loss": 2.4338, + "step": 15169 + }, + { + "epoch": 1.2242756839641675, + "grad_norm": 0.7185801267623901, + "learning_rate": 2.7711163077268977e-05, + "loss": 2.4745, + "step": 15170 + }, + { + "epoch": 1.2243563877007506, + "grad_norm": 0.7645030617713928, + "learning_rate": 2.7700255786896278e-05, + "loss": 2.4677, + "step": 15171 + }, + { + "epoch": 1.2244370914373335, + "grad_norm": 0.6559275388717651, + "learning_rate": 2.7689350298422202e-05, + "loss": 2.386, + "step": 15172 + }, + { + "epoch": 1.2245177951739166, + "grad_norm": 0.6965066194534302, + "learning_rate": 2.767844661211856e-05, + "loss": 2.4022, + "step": 15173 + }, + { + "epoch": 1.2245984989104994, + "grad_norm": 0.6618858575820923, + "learning_rate": 2.7667544728257057e-05, + "loss": 2.3541, + "step": 15174 + }, + { + "epoch": 1.2246792026470825, + "grad_norm": 0.6635501980781555, + "learning_rate": 2.765664464710941e-05, + "loss": 2.3984, + "step": 15175 + }, + { + "epoch": 1.2247599063836656, + "grad_norm": 0.6987191438674927, + "learning_rate": 2.764574636894729e-05, + "loss": 2.4637, + "step": 15176 + }, + { + "epoch": 1.2248406101202485, + "grad_norm": 0.7289232611656189, + "learning_rate": 2.7634849894042303e-05, + "loss": 2.4033, + "step": 15177 + }, + { + "epoch": 1.2249213138568316, + "grad_norm": 0.7245565056800842, + "learning_rate": 2.762395522266602e-05, + "loss": 2.4281, + "step": 15178 + }, + { + "epoch": 1.2250020175934146, + "grad_norm": 0.6946065425872803, + "learning_rate": 2.761306235508997e-05, + "loss": 2.3869, + "step": 15179 + }, + { + "epoch": 1.2250827213299975, + "grad_norm": 0.6381784677505493, + "learning_rate": 2.7602171291585666e-05, + "loss": 2.404, + "step": 15180 + }, + { + "epoch": 1.2251634250665806, + "grad_norm": 0.6893685460090637, + "learning_rate": 2.759128203242446e-05, + "loss": 2.4807, + "step": 15181 + }, + { + "epoch": 1.2252441288031637, + "grad_norm": 0.6640260815620422, + "learning_rate": 2.7580394577877787e-05, + "loss": 2.4036, + "step": 15182 + }, + { + "epoch": 1.2253248325397466, + "grad_norm": 0.7125177979469299, + "learning_rate": 2.7569508928217026e-05, + "loss": 2.3869, + "step": 15183 + }, + { + "epoch": 1.2254055362763296, + "grad_norm": 0.657865583896637, + "learning_rate": 2.7558625083713397e-05, + "loss": 2.3869, + "step": 15184 + }, + { + "epoch": 1.2254862400129125, + "grad_norm": 0.6776065230369568, + "learning_rate": 2.7547743044638197e-05, + "loss": 2.4128, + "step": 15185 + }, + { + "epoch": 1.2255669437494956, + "grad_norm": 0.7126299738883972, + "learning_rate": 2.753686281126263e-05, + "loss": 2.4465, + "step": 15186 + }, + { + "epoch": 1.2256476474860787, + "grad_norm": 0.6918273568153381, + "learning_rate": 2.7525984383857873e-05, + "loss": 2.428, + "step": 15187 + }, + { + "epoch": 1.2257283512226615, + "grad_norm": 0.7742759585380554, + "learning_rate": 2.7515107762695025e-05, + "loss": 2.4299, + "step": 15188 + }, + { + "epoch": 1.2258090549592446, + "grad_norm": 0.7194607853889465, + "learning_rate": 2.7504232948045205e-05, + "loss": 2.4315, + "step": 15189 + }, + { + "epoch": 1.2258897586958275, + "grad_norm": 0.6962646245956421, + "learning_rate": 2.7493359940179363e-05, + "loss": 2.4494, + "step": 15190 + }, + { + "epoch": 1.2259704624324106, + "grad_norm": 0.6681686639785767, + "learning_rate": 2.7482488739368538e-05, + "loss": 2.427, + "step": 15191 + }, + { + "epoch": 1.2260511661689937, + "grad_norm": 0.6589877009391785, + "learning_rate": 2.747161934588366e-05, + "loss": 2.4333, + "step": 15192 + }, + { + "epoch": 1.2261318699055765, + "grad_norm": 0.7415218949317932, + "learning_rate": 2.746075175999564e-05, + "loss": 2.4203, + "step": 15193 + }, + { + "epoch": 1.2262125736421596, + "grad_norm": 0.7371910214424133, + "learning_rate": 2.7449885981975276e-05, + "loss": 2.4684, + "step": 15194 + }, + { + "epoch": 1.2262932773787427, + "grad_norm": 0.7010802626609802, + "learning_rate": 2.7439022012093407e-05, + "loss": 2.4625, + "step": 15195 + }, + { + "epoch": 1.2263739811153256, + "grad_norm": 0.7125125527381897, + "learning_rate": 2.7428159850620773e-05, + "loss": 2.4075, + "step": 15196 + }, + { + "epoch": 1.2264546848519087, + "grad_norm": 0.701133668422699, + "learning_rate": 2.7417299497828107e-05, + "loss": 2.4525, + "step": 15197 + }, + { + "epoch": 1.2265353885884918, + "grad_norm": 0.7543410658836365, + "learning_rate": 2.7406440953986078e-05, + "loss": 2.474, + "step": 15198 + }, + { + "epoch": 1.2266160923250746, + "grad_norm": 0.69012051820755, + "learning_rate": 2.7395584219365323e-05, + "loss": 2.4853, + "step": 15199 + }, + { + "epoch": 1.2266967960616577, + "grad_norm": 0.6559048295021057, + "learning_rate": 2.7384729294236378e-05, + "loss": 2.4252, + "step": 15200 + }, + { + "epoch": 1.2267774997982406, + "grad_norm": 0.6603518128395081, + "learning_rate": 2.7373876178869794e-05, + "loss": 2.4047, + "step": 15201 + }, + { + "epoch": 1.2268582035348237, + "grad_norm": 0.7159265279769897, + "learning_rate": 2.736302487353609e-05, + "loss": 2.4352, + "step": 15202 + }, + { + "epoch": 1.2269389072714068, + "grad_norm": 0.6784560084342957, + "learning_rate": 2.735217537850565e-05, + "loss": 2.3933, + "step": 15203 + }, + { + "epoch": 1.2270196110079896, + "grad_norm": 0.7341950535774231, + "learning_rate": 2.7341327694048903e-05, + "loss": 2.4514, + "step": 15204 + }, + { + "epoch": 1.2271003147445727, + "grad_norm": 0.726046621799469, + "learning_rate": 2.7330481820436204e-05, + "loss": 2.4427, + "step": 15205 + }, + { + "epoch": 1.2271810184811556, + "grad_norm": 0.6897192001342773, + "learning_rate": 2.7319637757937854e-05, + "loss": 2.4587, + "step": 15206 + }, + { + "epoch": 1.2272617222177387, + "grad_norm": 0.6981058716773987, + "learning_rate": 2.7308795506824124e-05, + "loss": 2.4297, + "step": 15207 + }, + { + "epoch": 1.2273424259543217, + "grad_norm": 0.694583535194397, + "learning_rate": 2.729795506736522e-05, + "loss": 2.3608, + "step": 15208 + }, + { + "epoch": 1.2274231296909046, + "grad_norm": 0.710192084312439, + "learning_rate": 2.728711643983136e-05, + "loss": 2.3733, + "step": 15209 + }, + { + "epoch": 1.2275038334274877, + "grad_norm": 0.7203633785247803, + "learning_rate": 2.7276279624492595e-05, + "loss": 2.389, + "step": 15210 + }, + { + "epoch": 1.2275845371640708, + "grad_norm": 0.7298668622970581, + "learning_rate": 2.726544462161905e-05, + "loss": 2.3981, + "step": 15211 + }, + { + "epoch": 1.2276652409006537, + "grad_norm": 0.6640039682388306, + "learning_rate": 2.725461143148078e-05, + "loss": 2.4073, + "step": 15212 + }, + { + "epoch": 1.2277459446372367, + "grad_norm": 0.7203015685081482, + "learning_rate": 2.724378005434772e-05, + "loss": 2.4901, + "step": 15213 + }, + { + "epoch": 1.2278266483738198, + "grad_norm": 0.6668895483016968, + "learning_rate": 2.723295049048985e-05, + "loss": 2.4482, + "step": 15214 + }, + { + "epoch": 1.2279073521104027, + "grad_norm": 0.7551584839820862, + "learning_rate": 2.7222122740177103e-05, + "loss": 2.4877, + "step": 15215 + }, + { + "epoch": 1.2279880558469858, + "grad_norm": 0.707202672958374, + "learning_rate": 2.721129680367923e-05, + "loss": 2.4577, + "step": 15216 + }, + { + "epoch": 1.2280687595835686, + "grad_norm": 0.685153603553772, + "learning_rate": 2.7200472681266155e-05, + "loss": 2.476, + "step": 15217 + }, + { + "epoch": 1.2281494633201517, + "grad_norm": 0.6843041181564331, + "learning_rate": 2.718965037320762e-05, + "loss": 2.4164, + "step": 15218 + }, + { + "epoch": 1.2282301670567348, + "grad_norm": 0.6548978686332703, + "learning_rate": 2.7178829879773306e-05, + "loss": 2.4187, + "step": 15219 + }, + { + "epoch": 1.2283108707933177, + "grad_norm": 0.7037245035171509, + "learning_rate": 2.7168011201232902e-05, + "loss": 2.3621, + "step": 15220 + }, + { + "epoch": 1.2283915745299008, + "grad_norm": 0.6540676951408386, + "learning_rate": 2.7157194337856074e-05, + "loss": 2.4542, + "step": 15221 + }, + { + "epoch": 1.2284722782664836, + "grad_norm": 0.7699899673461914, + "learning_rate": 2.7146379289912338e-05, + "loss": 2.4639, + "step": 15222 + }, + { + "epoch": 1.2285529820030667, + "grad_norm": 0.7178743481636047, + "learning_rate": 2.713556605767128e-05, + "loss": 2.4222, + "step": 15223 + }, + { + "epoch": 1.2286336857396498, + "grad_norm": 0.6749793887138367, + "learning_rate": 2.7124754641402383e-05, + "loss": 2.4323, + "step": 15224 + }, + { + "epoch": 1.2287143894762327, + "grad_norm": 0.7035594582557678, + "learning_rate": 2.711394504137513e-05, + "loss": 2.4466, + "step": 15225 + }, + { + "epoch": 1.2287950932128158, + "grad_norm": 0.6518487930297852, + "learning_rate": 2.7103137257858868e-05, + "loss": 2.4969, + "step": 15226 + }, + { + "epoch": 1.2288757969493989, + "grad_norm": 0.6739057898521423, + "learning_rate": 2.7092331291122974e-05, + "loss": 2.406, + "step": 15227 + }, + { + "epoch": 1.2289565006859817, + "grad_norm": 0.6584770083427429, + "learning_rate": 2.7081527141436767e-05, + "loss": 2.4304, + "step": 15228 + }, + { + "epoch": 1.2290372044225648, + "grad_norm": 0.6846301555633545, + "learning_rate": 2.7070724809069514e-05, + "loss": 2.3995, + "step": 15229 + }, + { + "epoch": 1.2291179081591477, + "grad_norm": 0.6778364777565002, + "learning_rate": 2.705992429429044e-05, + "loss": 2.38, + "step": 15230 + }, + { + "epoch": 1.2291986118957308, + "grad_norm": 0.6957302689552307, + "learning_rate": 2.7049125597368753e-05, + "loss": 2.3973, + "step": 15231 + }, + { + "epoch": 1.2292793156323139, + "grad_norm": 0.730269193649292, + "learning_rate": 2.7038328718573514e-05, + "loss": 2.4829, + "step": 15232 + }, + { + "epoch": 1.2293600193688967, + "grad_norm": 0.7114049196243286, + "learning_rate": 2.702753365817384e-05, + "loss": 2.3902, + "step": 15233 + }, + { + "epoch": 1.2294407231054798, + "grad_norm": 0.7137531638145447, + "learning_rate": 2.7016740416438823e-05, + "loss": 2.3957, + "step": 15234 + }, + { + "epoch": 1.2295214268420627, + "grad_norm": 0.7178330421447754, + "learning_rate": 2.7005948993637386e-05, + "loss": 2.4429, + "step": 15235 + }, + { + "epoch": 1.2296021305786458, + "grad_norm": 0.6767767071723938, + "learning_rate": 2.6995159390038506e-05, + "loss": 2.4009, + "step": 15236 + }, + { + "epoch": 1.2296828343152288, + "grad_norm": 0.7713541984558105, + "learning_rate": 2.6984371605911086e-05, + "loss": 2.4326, + "step": 15237 + }, + { + "epoch": 1.2297635380518117, + "grad_norm": 0.7218228578567505, + "learning_rate": 2.6973585641523992e-05, + "loss": 2.4358, + "step": 15238 + }, + { + "epoch": 1.2298442417883948, + "grad_norm": 0.6782575249671936, + "learning_rate": 2.696280149714604e-05, + "loss": 2.3844, + "step": 15239 + }, + { + "epoch": 1.2299249455249779, + "grad_norm": 0.6825734972953796, + "learning_rate": 2.6952019173045982e-05, + "loss": 2.4621, + "step": 15240 + }, + { + "epoch": 1.2300056492615608, + "grad_norm": 0.6587522625923157, + "learning_rate": 2.6941238669492608e-05, + "loss": 2.4465, + "step": 15241 + }, + { + "epoch": 1.2300863529981438, + "grad_norm": 0.6898796558380127, + "learning_rate": 2.6930459986754498e-05, + "loss": 2.4469, + "step": 15242 + }, + { + "epoch": 1.230167056734727, + "grad_norm": 0.6764062643051147, + "learning_rate": 2.6919683125100338e-05, + "loss": 2.4476, + "step": 15243 + }, + { + "epoch": 1.2302477604713098, + "grad_norm": 0.6647047400474548, + "learning_rate": 2.6908908084798733e-05, + "loss": 2.3677, + "step": 15244 + }, + { + "epoch": 1.2303284642078929, + "grad_norm": 0.7091608047485352, + "learning_rate": 2.6898134866118174e-05, + "loss": 2.4605, + "step": 15245 + }, + { + "epoch": 1.2304091679444757, + "grad_norm": 0.691007137298584, + "learning_rate": 2.6887363469327188e-05, + "loss": 2.4397, + "step": 15246 + }, + { + "epoch": 1.2304898716810588, + "grad_norm": 0.6685532927513123, + "learning_rate": 2.6876593894694214e-05, + "loss": 2.4279, + "step": 15247 + }, + { + "epoch": 1.230570575417642, + "grad_norm": 0.684474766254425, + "learning_rate": 2.686582614248767e-05, + "loss": 2.4162, + "step": 15248 + }, + { + "epoch": 1.2306512791542248, + "grad_norm": 0.657293975353241, + "learning_rate": 2.6855060212975915e-05, + "loss": 2.4337, + "step": 15249 + }, + { + "epoch": 1.2307319828908079, + "grad_norm": 0.7136504650115967, + "learning_rate": 2.684429610642729e-05, + "loss": 2.4156, + "step": 15250 + }, + { + "epoch": 1.2308126866273907, + "grad_norm": 0.6564410924911499, + "learning_rate": 2.6833533823110013e-05, + "loss": 2.5101, + "step": 15251 + }, + { + "epoch": 1.2308933903639738, + "grad_norm": 0.6628747582435608, + "learning_rate": 2.682277336329233e-05, + "loss": 2.3933, + "step": 15252 + }, + { + "epoch": 1.230974094100557, + "grad_norm": 0.7362595796585083, + "learning_rate": 2.681201472724244e-05, + "loss": 2.4541, + "step": 15253 + }, + { + "epoch": 1.2310547978371398, + "grad_norm": 0.7604697346687317, + "learning_rate": 2.680125791522844e-05, + "loss": 2.4383, + "step": 15254 + }, + { + "epoch": 1.2311355015737229, + "grad_norm": 0.7128429412841797, + "learning_rate": 2.6790502927518434e-05, + "loss": 2.4492, + "step": 15255 + }, + { + "epoch": 1.231216205310306, + "grad_norm": 0.6761955618858337, + "learning_rate": 2.677974976438047e-05, + "loss": 2.4355, + "step": 15256 + }, + { + "epoch": 1.2312969090468888, + "grad_norm": 0.6687077879905701, + "learning_rate": 2.6768998426082538e-05, + "loss": 2.4317, + "step": 15257 + }, + { + "epoch": 1.231377612783472, + "grad_norm": 0.7423825860023499, + "learning_rate": 2.675824891289259e-05, + "loss": 2.4216, + "step": 15258 + }, + { + "epoch": 1.231458316520055, + "grad_norm": 0.671130359172821, + "learning_rate": 2.6747501225078542e-05, + "loss": 2.4775, + "step": 15259 + }, + { + "epoch": 1.2315390202566379, + "grad_norm": 0.7421461939811707, + "learning_rate": 2.6736755362908273e-05, + "loss": 2.4042, + "step": 15260 + }, + { + "epoch": 1.231619723993221, + "grad_norm": 0.7084131240844727, + "learning_rate": 2.6726011326649547e-05, + "loss": 2.4506, + "step": 15261 + }, + { + "epoch": 1.2317004277298038, + "grad_norm": 0.641852855682373, + "learning_rate": 2.671526911657015e-05, + "loss": 2.4261, + "step": 15262 + }, + { + "epoch": 1.231781131466387, + "grad_norm": 0.7627724409103394, + "learning_rate": 2.670452873293785e-05, + "loss": 2.4647, + "step": 15263 + }, + { + "epoch": 1.23186183520297, + "grad_norm": 0.6638163924217224, + "learning_rate": 2.669379017602026e-05, + "loss": 2.4208, + "step": 15264 + }, + { + "epoch": 1.2319425389395529, + "grad_norm": 0.6815361380577087, + "learning_rate": 2.668305344608505e-05, + "loss": 2.4404, + "step": 15265 + }, + { + "epoch": 1.232023242676136, + "grad_norm": 0.6466485857963562, + "learning_rate": 2.6672318543399823e-05, + "loss": 2.4327, + "step": 15266 + }, + { + "epoch": 1.2321039464127188, + "grad_norm": 0.7119305729866028, + "learning_rate": 2.6661585468232042e-05, + "loss": 2.4266, + "step": 15267 + }, + { + "epoch": 1.232184650149302, + "grad_norm": 0.7245718836784363, + "learning_rate": 2.6650854220849286e-05, + "loss": 2.4484, + "step": 15268 + }, + { + "epoch": 1.232265353885885, + "grad_norm": 0.7050287127494812, + "learning_rate": 2.6640124801518972e-05, + "loss": 2.4441, + "step": 15269 + }, + { + "epoch": 1.2323460576224678, + "grad_norm": 0.6906494498252869, + "learning_rate": 2.6629397210508556e-05, + "loss": 2.4297, + "step": 15270 + }, + { + "epoch": 1.232426761359051, + "grad_norm": 0.7224171757698059, + "learning_rate": 2.661867144808532e-05, + "loss": 2.4279, + "step": 15271 + }, + { + "epoch": 1.232507465095634, + "grad_norm": 0.688804030418396, + "learning_rate": 2.6607947514516606e-05, + "loss": 2.4741, + "step": 15272 + }, + { + "epoch": 1.232588168832217, + "grad_norm": 0.6462350487709045, + "learning_rate": 2.6597225410069726e-05, + "loss": 2.4499, + "step": 15273 + }, + { + "epoch": 1.2326688725688, + "grad_norm": 0.6860110759735107, + "learning_rate": 2.658650513501184e-05, + "loss": 2.4488, + "step": 15274 + }, + { + "epoch": 1.2327495763053828, + "grad_norm": 0.7158305644989014, + "learning_rate": 2.6575786689610138e-05, + "loss": 2.4318, + "step": 15275 + }, + { + "epoch": 1.232830280041966, + "grad_norm": 0.7740959525108337, + "learning_rate": 2.6565070074131804e-05, + "loss": 2.4824, + "step": 15276 + }, + { + "epoch": 1.232910983778549, + "grad_norm": 0.7573856711387634, + "learning_rate": 2.6554355288843847e-05, + "loss": 2.4034, + "step": 15277 + }, + { + "epoch": 1.2329916875151319, + "grad_norm": 0.6809369921684265, + "learning_rate": 2.654364233401332e-05, + "loss": 2.5085, + "step": 15278 + }, + { + "epoch": 1.233072391251715, + "grad_norm": 0.6695643067359924, + "learning_rate": 2.6532931209907307e-05, + "loss": 2.4697, + "step": 15279 + }, + { + "epoch": 1.2331530949882978, + "grad_norm": 0.7218750715255737, + "learning_rate": 2.6522221916792655e-05, + "loss": 2.4753, + "step": 15280 + }, + { + "epoch": 1.233233798724881, + "grad_norm": 0.8171822428703308, + "learning_rate": 2.6511514454936314e-05, + "loss": 2.45, + "step": 15281 + }, + { + "epoch": 1.233314502461464, + "grad_norm": 0.7234573364257812, + "learning_rate": 2.6500808824605162e-05, + "loss": 2.3963, + "step": 15282 + }, + { + "epoch": 1.2333952061980469, + "grad_norm": 0.6993409395217896, + "learning_rate": 2.6490105026065948e-05, + "loss": 2.4449, + "step": 15283 + }, + { + "epoch": 1.23347590993463, + "grad_norm": 0.7984449863433838, + "learning_rate": 2.6479403059585472e-05, + "loss": 2.4322, + "step": 15284 + }, + { + "epoch": 1.233556613671213, + "grad_norm": 0.683971107006073, + "learning_rate": 2.6468702925430466e-05, + "loss": 2.4125, + "step": 15285 + }, + { + "epoch": 1.233637317407796, + "grad_norm": 0.6739822626113892, + "learning_rate": 2.6458004623867617e-05, + "loss": 2.4487, + "step": 15286 + }, + { + "epoch": 1.233718021144379, + "grad_norm": 0.7003912925720215, + "learning_rate": 2.644730815516351e-05, + "loss": 2.4437, + "step": 15287 + }, + { + "epoch": 1.233798724880962, + "grad_norm": 0.7011744379997253, + "learning_rate": 2.643661351958474e-05, + "loss": 2.4798, + "step": 15288 + }, + { + "epoch": 1.233879428617545, + "grad_norm": 0.7003397941589355, + "learning_rate": 2.6425920717397867e-05, + "loss": 2.4554, + "step": 15289 + }, + { + "epoch": 1.233960132354128, + "grad_norm": 0.6682165265083313, + "learning_rate": 2.6415229748869374e-05, + "loss": 2.4252, + "step": 15290 + }, + { + "epoch": 1.234040836090711, + "grad_norm": 0.6712457537651062, + "learning_rate": 2.6404540614265715e-05, + "loss": 2.4225, + "step": 15291 + }, + { + "epoch": 1.234121539827294, + "grad_norm": 0.654464602470398, + "learning_rate": 2.63938533138533e-05, + "loss": 2.4462, + "step": 15292 + }, + { + "epoch": 1.234202243563877, + "grad_norm": 0.7311797738075256, + "learning_rate": 2.638316784789845e-05, + "loss": 2.502, + "step": 15293 + }, + { + "epoch": 1.23428294730046, + "grad_norm": 0.6836559176445007, + "learning_rate": 2.6372484216667492e-05, + "loss": 2.5134, + "step": 15294 + }, + { + "epoch": 1.234363651037043, + "grad_norm": 0.6961826086044312, + "learning_rate": 2.636180242042672e-05, + "loss": 2.4479, + "step": 15295 + }, + { + "epoch": 1.234444354773626, + "grad_norm": 0.6824259161949158, + "learning_rate": 2.635112245944229e-05, + "loss": 2.4299, + "step": 15296 + }, + { + "epoch": 1.234525058510209, + "grad_norm": 0.7594609260559082, + "learning_rate": 2.634044433398042e-05, + "loss": 2.4469, + "step": 15297 + }, + { + "epoch": 1.234605762246792, + "grad_norm": 0.7044653296470642, + "learning_rate": 2.632976804430721e-05, + "loss": 2.447, + "step": 15298 + }, + { + "epoch": 1.234686465983375, + "grad_norm": 0.6986916065216064, + "learning_rate": 2.631909359068876e-05, + "loss": 2.4705, + "step": 15299 + }, + { + "epoch": 1.234767169719958, + "grad_norm": 0.7025431990623474, + "learning_rate": 2.630842097339111e-05, + "loss": 2.3951, + "step": 15300 + }, + { + "epoch": 1.2348478734565411, + "grad_norm": 0.6533786058425903, + "learning_rate": 2.6297750192680237e-05, + "loss": 2.3769, + "step": 15301 + }, + { + "epoch": 1.234928577193124, + "grad_norm": 0.6575472354888916, + "learning_rate": 2.628708124882212e-05, + "loss": 2.4293, + "step": 15302 + }, + { + "epoch": 1.235009280929707, + "grad_norm": 0.6712046265602112, + "learning_rate": 2.6276414142082584e-05, + "loss": 2.4819, + "step": 15303 + }, + { + "epoch": 1.2350899846662902, + "grad_norm": 0.6947652101516724, + "learning_rate": 2.6265748872727535e-05, + "loss": 2.449, + "step": 15304 + }, + { + "epoch": 1.235170688402873, + "grad_norm": 0.6881443858146667, + "learning_rate": 2.62550854410228e-05, + "loss": 2.3991, + "step": 15305 + }, + { + "epoch": 1.2352513921394561, + "grad_norm": 0.6681519746780396, + "learning_rate": 2.624442384723407e-05, + "loss": 2.4005, + "step": 15306 + }, + { + "epoch": 1.235332095876039, + "grad_norm": 0.6728120446205139, + "learning_rate": 2.62337640916271e-05, + "loss": 2.4242, + "step": 15307 + }, + { + "epoch": 1.235412799612622, + "grad_norm": 0.707360029220581, + "learning_rate": 2.622310617446755e-05, + "loss": 2.4385, + "step": 15308 + }, + { + "epoch": 1.2354935033492052, + "grad_norm": 0.6890079975128174, + "learning_rate": 2.6212450096021058e-05, + "loss": 2.443, + "step": 15309 + }, + { + "epoch": 1.235574207085788, + "grad_norm": 0.7022379636764526, + "learning_rate": 2.620179585655318e-05, + "loss": 2.3982, + "step": 15310 + }, + { + "epoch": 1.235654910822371, + "grad_norm": 0.7283182740211487, + "learning_rate": 2.61911434563295e-05, + "loss": 2.4197, + "step": 15311 + }, + { + "epoch": 1.235735614558954, + "grad_norm": 0.6721852421760559, + "learning_rate": 2.6180492895615426e-05, + "loss": 2.4356, + "step": 15312 + }, + { + "epoch": 1.235816318295537, + "grad_norm": 0.6817916631698608, + "learning_rate": 2.616984417467645e-05, + "loss": 2.4325, + "step": 15313 + }, + { + "epoch": 1.2358970220321202, + "grad_norm": 0.6826596260070801, + "learning_rate": 2.6159197293777972e-05, + "loss": 2.4043, + "step": 15314 + }, + { + "epoch": 1.235977725768703, + "grad_norm": 0.7135530114173889, + "learning_rate": 2.6148552253185288e-05, + "loss": 2.4269, + "step": 15315 + }, + { + "epoch": 1.236058429505286, + "grad_norm": 0.7027753591537476, + "learning_rate": 2.6137909053163722e-05, + "loss": 2.4266, + "step": 15316 + }, + { + "epoch": 1.2361391332418692, + "grad_norm": 0.6597041487693787, + "learning_rate": 2.6127267693978552e-05, + "loss": 2.4073, + "step": 15317 + }, + { + "epoch": 1.236219836978452, + "grad_norm": 0.6450026631355286, + "learning_rate": 2.6116628175894974e-05, + "loss": 2.4299, + "step": 15318 + }, + { + "epoch": 1.2363005407150351, + "grad_norm": 0.7740476727485657, + "learning_rate": 2.6105990499178156e-05, + "loss": 2.4088, + "step": 15319 + }, + { + "epoch": 1.2363812444516182, + "grad_norm": 0.6460183262825012, + "learning_rate": 2.609535466409322e-05, + "loss": 2.4311, + "step": 15320 + }, + { + "epoch": 1.236461948188201, + "grad_norm": 0.6514838337898254, + "learning_rate": 2.608472067090525e-05, + "loss": 2.4069, + "step": 15321 + }, + { + "epoch": 1.2365426519247842, + "grad_norm": 0.7281234860420227, + "learning_rate": 2.6074088519879237e-05, + "loss": 2.4245, + "step": 15322 + }, + { + "epoch": 1.236623355661367, + "grad_norm": 0.752983570098877, + "learning_rate": 2.606345821128018e-05, + "loss": 2.4149, + "step": 15323 + }, + { + "epoch": 1.2367040593979501, + "grad_norm": 0.6912856101989746, + "learning_rate": 2.6052829745373054e-05, + "loss": 2.4489, + "step": 15324 + }, + { + "epoch": 1.236784763134533, + "grad_norm": 0.6719293594360352, + "learning_rate": 2.604220312242267e-05, + "loss": 2.457, + "step": 15325 + }, + { + "epoch": 1.236865466871116, + "grad_norm": 0.7440586090087891, + "learning_rate": 2.6031578342693918e-05, + "loss": 2.4657, + "step": 15326 + }, + { + "epoch": 1.2369461706076992, + "grad_norm": 0.694442629814148, + "learning_rate": 2.602095540645162e-05, + "loss": 2.4422, + "step": 15327 + }, + { + "epoch": 1.237026874344282, + "grad_norm": 0.7186843752861023, + "learning_rate": 2.601033431396046e-05, + "loss": 2.4229, + "step": 15328 + }, + { + "epoch": 1.2371075780808651, + "grad_norm": 0.7401825785636902, + "learning_rate": 2.5999715065485153e-05, + "loss": 2.45, + "step": 15329 + }, + { + "epoch": 1.2371882818174482, + "grad_norm": 0.6710138916969299, + "learning_rate": 2.598909766129045e-05, + "loss": 2.4074, + "step": 15330 + }, + { + "epoch": 1.237268985554031, + "grad_norm": 0.7867769598960876, + "learning_rate": 2.5978482101640867e-05, + "loss": 2.4709, + "step": 15331 + }, + { + "epoch": 1.2373496892906142, + "grad_norm": 0.7076219916343689, + "learning_rate": 2.5967868386801e-05, + "loss": 2.4887, + "step": 15332 + }, + { + "epoch": 1.2374303930271973, + "grad_norm": 0.7277626991271973, + "learning_rate": 2.5957256517035378e-05, + "loss": 2.4295, + "step": 15333 + }, + { + "epoch": 1.2375110967637801, + "grad_norm": 0.7339804768562317, + "learning_rate": 2.5946646492608506e-05, + "loss": 2.4624, + "step": 15334 + }, + { + "epoch": 1.2375918005003632, + "grad_norm": 0.6707656383514404, + "learning_rate": 2.593603831378475e-05, + "loss": 2.4159, + "step": 15335 + }, + { + "epoch": 1.237672504236946, + "grad_norm": 0.7118813991546631, + "learning_rate": 2.592543198082852e-05, + "loss": 2.4496, + "step": 15336 + }, + { + "epoch": 1.2377532079735292, + "grad_norm": 0.675167977809906, + "learning_rate": 2.591482749400419e-05, + "loss": 2.4519, + "step": 15337 + }, + { + "epoch": 1.2378339117101123, + "grad_norm": 0.8245306611061096, + "learning_rate": 2.5904224853575986e-05, + "loss": 2.4732, + "step": 15338 + }, + { + "epoch": 1.2379146154466951, + "grad_norm": 0.7411863207817078, + "learning_rate": 2.5893624059808184e-05, + "loss": 2.4458, + "step": 15339 + }, + { + "epoch": 1.2379953191832782, + "grad_norm": 0.6864522695541382, + "learning_rate": 2.5883025112964997e-05, + "loss": 2.4264, + "step": 15340 + }, + { + "epoch": 1.238076022919861, + "grad_norm": 0.6585919260978699, + "learning_rate": 2.5872428013310567e-05, + "loss": 2.3904, + "step": 15341 + }, + { + "epoch": 1.2381567266564442, + "grad_norm": 0.6605508327484131, + "learning_rate": 2.5861832761108995e-05, + "loss": 2.4828, + "step": 15342 + }, + { + "epoch": 1.2382374303930272, + "grad_norm": 0.7353223562240601, + "learning_rate": 2.5851239356624392e-05, + "loss": 2.4335, + "step": 15343 + }, + { + "epoch": 1.2383181341296101, + "grad_norm": 0.6907783150672913, + "learning_rate": 2.5840647800120688e-05, + "loss": 2.4394, + "step": 15344 + }, + { + "epoch": 1.2383988378661932, + "grad_norm": 0.7239590287208557, + "learning_rate": 2.5830058091861896e-05, + "loss": 2.4221, + "step": 15345 + }, + { + "epoch": 1.2384795416027763, + "grad_norm": 0.7001412510871887, + "learning_rate": 2.5819470232111975e-05, + "loss": 2.4521, + "step": 15346 + }, + { + "epoch": 1.2385602453393592, + "grad_norm": 0.6983658671379089, + "learning_rate": 2.580888422113473e-05, + "loss": 2.4839, + "step": 15347 + }, + { + "epoch": 1.2386409490759422, + "grad_norm": 0.7829005718231201, + "learning_rate": 2.5798300059194037e-05, + "loss": 2.4546, + "step": 15348 + }, + { + "epoch": 1.2387216528125253, + "grad_norm": 0.7248061299324036, + "learning_rate": 2.5787717746553664e-05, + "loss": 2.4341, + "step": 15349 + }, + { + "epoch": 1.2388023565491082, + "grad_norm": 0.7921163439750671, + "learning_rate": 2.577713728347736e-05, + "loss": 2.475, + "step": 15350 + }, + { + "epoch": 1.2388830602856913, + "grad_norm": 0.6571238040924072, + "learning_rate": 2.5766558670228813e-05, + "loss": 2.4636, + "step": 15351 + }, + { + "epoch": 1.2389637640222741, + "grad_norm": 0.7436683177947998, + "learning_rate": 2.575598190707168e-05, + "loss": 2.4868, + "step": 15352 + }, + { + "epoch": 1.2390444677588572, + "grad_norm": 0.6471900939941406, + "learning_rate": 2.5745406994269573e-05, + "loss": 2.4349, + "step": 15353 + }, + { + "epoch": 1.2391251714954403, + "grad_norm": 0.6612011194229126, + "learning_rate": 2.5734833932086012e-05, + "loss": 2.4088, + "step": 15354 + }, + { + "epoch": 1.2392058752320232, + "grad_norm": 0.6882977485656738, + "learning_rate": 2.572426272078451e-05, + "loss": 2.4344, + "step": 15355 + }, + { + "epoch": 1.2392865789686063, + "grad_norm": 0.6836830973625183, + "learning_rate": 2.5713693360628565e-05, + "loss": 2.4325, + "step": 15356 + }, + { + "epoch": 1.2393672827051891, + "grad_norm": 0.712127149105072, + "learning_rate": 2.5703125851881536e-05, + "loss": 2.4505, + "step": 15357 + }, + { + "epoch": 1.2394479864417722, + "grad_norm": 0.7162468433380127, + "learning_rate": 2.5692560194806837e-05, + "loss": 2.4167, + "step": 15358 + }, + { + "epoch": 1.2395286901783553, + "grad_norm": 0.7770177125930786, + "learning_rate": 2.568199638966777e-05, + "loss": 2.4072, + "step": 15359 + }, + { + "epoch": 1.2396093939149382, + "grad_norm": 0.7049651741981506, + "learning_rate": 2.5671434436727636e-05, + "loss": 2.434, + "step": 15360 + }, + { + "epoch": 1.2396900976515213, + "grad_norm": 0.7793349027633667, + "learning_rate": 2.566087433624964e-05, + "loss": 2.4762, + "step": 15361 + }, + { + "epoch": 1.2397708013881044, + "grad_norm": 0.6776690483093262, + "learning_rate": 2.5650316088497018e-05, + "loss": 2.402, + "step": 15362 + }, + { + "epoch": 1.2398515051246872, + "grad_norm": 0.7207701802253723, + "learning_rate": 2.5639759693732834e-05, + "loss": 2.4398, + "step": 15363 + }, + { + "epoch": 1.2399322088612703, + "grad_norm": 0.759787917137146, + "learning_rate": 2.5629205152220215e-05, + "loss": 2.4268, + "step": 15364 + }, + { + "epoch": 1.2400129125978534, + "grad_norm": 0.6906142830848694, + "learning_rate": 2.5618652464222215e-05, + "loss": 2.4075, + "step": 15365 + }, + { + "epoch": 1.2400936163344363, + "grad_norm": 0.7002954483032227, + "learning_rate": 2.560810163000187e-05, + "loss": 2.4516, + "step": 15366 + }, + { + "epoch": 1.2401743200710194, + "grad_norm": 0.7287559509277344, + "learning_rate": 2.5597552649822053e-05, + "loss": 2.4975, + "step": 15367 + }, + { + "epoch": 1.2402550238076022, + "grad_norm": 0.6523926854133606, + "learning_rate": 2.558700552394572e-05, + "loss": 2.4085, + "step": 15368 + }, + { + "epoch": 1.2403357275441853, + "grad_norm": 0.7289387583732605, + "learning_rate": 2.5576460252635727e-05, + "loss": 2.4789, + "step": 15369 + }, + { + "epoch": 1.2404164312807684, + "grad_norm": 0.6613432765007019, + "learning_rate": 2.5565916836154878e-05, + "loss": 2.4263, + "step": 15370 + }, + { + "epoch": 1.2404971350173513, + "grad_norm": 0.7275245785713196, + "learning_rate": 2.555537527476597e-05, + "loss": 2.4652, + "step": 15371 + }, + { + "epoch": 1.2405778387539343, + "grad_norm": 0.6726976037025452, + "learning_rate": 2.554483556873173e-05, + "loss": 2.4092, + "step": 15372 + }, + { + "epoch": 1.2406585424905172, + "grad_norm": 0.6908233761787415, + "learning_rate": 2.5534297718314794e-05, + "loss": 2.3678, + "step": 15373 + }, + { + "epoch": 1.2407392462271003, + "grad_norm": 0.6893147826194763, + "learning_rate": 2.5523761723777806e-05, + "loss": 2.4625, + "step": 15374 + }, + { + "epoch": 1.2408199499636834, + "grad_norm": 0.7640267014503479, + "learning_rate": 2.551322758538339e-05, + "loss": 2.446, + "step": 15375 + }, + { + "epoch": 1.2409006537002663, + "grad_norm": 0.7187458276748657, + "learning_rate": 2.550269530339402e-05, + "loss": 2.4215, + "step": 15376 + }, + { + "epoch": 1.2409813574368493, + "grad_norm": 0.8041789531707764, + "learning_rate": 2.5492164878072234e-05, + "loss": 2.5085, + "step": 15377 + }, + { + "epoch": 1.2410620611734324, + "grad_norm": 0.6582188010215759, + "learning_rate": 2.5481636309680445e-05, + "loss": 2.467, + "step": 15378 + }, + { + "epoch": 1.2411427649100153, + "grad_norm": 0.705731213092804, + "learning_rate": 2.5471109598481112e-05, + "loss": 2.3764, + "step": 15379 + }, + { + "epoch": 1.2412234686465984, + "grad_norm": 0.6918940544128418, + "learning_rate": 2.5460584744736495e-05, + "loss": 2.4513, + "step": 15380 + }, + { + "epoch": 1.2413041723831812, + "grad_norm": 0.7402673959732056, + "learning_rate": 2.5450061748708975e-05, + "loss": 2.5133, + "step": 15381 + }, + { + "epoch": 1.2413848761197643, + "grad_norm": 0.6740667223930359, + "learning_rate": 2.543954061066083e-05, + "loss": 2.4649, + "step": 15382 + }, + { + "epoch": 1.2414655798563474, + "grad_norm": 0.6665407419204712, + "learning_rate": 2.5429021330854197e-05, + "loss": 2.4321, + "step": 15383 + }, + { + "epoch": 1.2415462835929303, + "grad_norm": 0.7324530482292175, + "learning_rate": 2.5418503909551296e-05, + "loss": 2.3574, + "step": 15384 + }, + { + "epoch": 1.2416269873295134, + "grad_norm": 0.7117868661880493, + "learning_rate": 2.5407988347014255e-05, + "loss": 2.4552, + "step": 15385 + }, + { + "epoch": 1.2417076910660962, + "grad_norm": 0.7162930965423584, + "learning_rate": 2.5397474643505103e-05, + "loss": 2.4135, + "step": 15386 + }, + { + "epoch": 1.2417883948026793, + "grad_norm": 0.7301257848739624, + "learning_rate": 2.5386962799285895e-05, + "loss": 2.4277, + "step": 15387 + }, + { + "epoch": 1.2418690985392624, + "grad_norm": 0.7404977679252625, + "learning_rate": 2.5376452814618645e-05, + "loss": 2.478, + "step": 15388 + }, + { + "epoch": 1.2419498022758453, + "grad_norm": 0.6546272039413452, + "learning_rate": 2.536594468976522e-05, + "loss": 2.4879, + "step": 15389 + }, + { + "epoch": 1.2420305060124284, + "grad_norm": 0.6501599550247192, + "learning_rate": 2.5355438424987565e-05, + "loss": 2.3964, + "step": 15390 + }, + { + "epoch": 1.2421112097490115, + "grad_norm": 0.6711748242378235, + "learning_rate": 2.5344934020547496e-05, + "loss": 2.4123, + "step": 15391 + }, + { + "epoch": 1.2421919134855943, + "grad_norm": 0.6803534030914307, + "learning_rate": 2.5334431476706823e-05, + "loss": 2.4271, + "step": 15392 + }, + { + "epoch": 1.2422726172221774, + "grad_norm": 0.7407296299934387, + "learning_rate": 2.5323930793727302e-05, + "loss": 2.49, + "step": 15393 + }, + { + "epoch": 1.2423533209587605, + "grad_norm": 0.701870858669281, + "learning_rate": 2.5313431971870617e-05, + "loss": 2.4534, + "step": 15394 + }, + { + "epoch": 1.2424340246953434, + "grad_norm": 0.6658090353012085, + "learning_rate": 2.5302935011398475e-05, + "loss": 2.4581, + "step": 15395 + }, + { + "epoch": 1.2425147284319265, + "grad_norm": 0.6616473197937012, + "learning_rate": 2.529243991257243e-05, + "loss": 2.4169, + "step": 15396 + }, + { + "epoch": 1.2425954321685093, + "grad_norm": 0.6714773178100586, + "learning_rate": 2.5281946675654067e-05, + "loss": 2.4159, + "step": 15397 + }, + { + "epoch": 1.2426761359050924, + "grad_norm": 0.6789337396621704, + "learning_rate": 2.5271455300904935e-05, + "loss": 2.4211, + "step": 15398 + }, + { + "epoch": 1.2427568396416755, + "grad_norm": 0.6793739795684814, + "learning_rate": 2.5260965788586456e-05, + "loss": 2.4337, + "step": 15399 + }, + { + "epoch": 1.2428375433782584, + "grad_norm": 0.6432294249534607, + "learning_rate": 2.5250478138960076e-05, + "loss": 2.4268, + "step": 15400 + }, + { + "epoch": 1.2429182471148414, + "grad_norm": 0.6960669159889221, + "learning_rate": 2.523999235228718e-05, + "loss": 2.3535, + "step": 15401 + }, + { + "epoch": 1.2429989508514243, + "grad_norm": 0.6724488735198975, + "learning_rate": 2.5229508428829096e-05, + "loss": 2.4294, + "step": 15402 + }, + { + "epoch": 1.2430796545880074, + "grad_norm": 0.636105477809906, + "learning_rate": 2.521902636884711e-05, + "loss": 2.4438, + "step": 15403 + }, + { + "epoch": 1.2431603583245905, + "grad_norm": 0.6865580677986145, + "learning_rate": 2.52085461726025e-05, + "loss": 2.4473, + "step": 15404 + }, + { + "epoch": 1.2432410620611734, + "grad_norm": 0.6740261316299438, + "learning_rate": 2.5198067840356398e-05, + "loss": 2.4642, + "step": 15405 + }, + { + "epoch": 1.2433217657977564, + "grad_norm": 0.7241789698600769, + "learning_rate": 2.518759137236998e-05, + "loss": 2.4294, + "step": 15406 + }, + { + "epoch": 1.2434024695343395, + "grad_norm": 0.6839794516563416, + "learning_rate": 2.5177116768904373e-05, + "loss": 2.4697, + "step": 15407 + }, + { + "epoch": 1.2434831732709224, + "grad_norm": 0.677390992641449, + "learning_rate": 2.5166644030220578e-05, + "loss": 2.4411, + "step": 15408 + }, + { + "epoch": 1.2435638770075055, + "grad_norm": 0.709065854549408, + "learning_rate": 2.515617315657962e-05, + "loss": 2.4392, + "step": 15409 + }, + { + "epoch": 1.2436445807440886, + "grad_norm": 0.6735498905181885, + "learning_rate": 2.514570414824249e-05, + "loss": 2.3924, + "step": 15410 + }, + { + "epoch": 1.2437252844806714, + "grad_norm": 0.6729374527931213, + "learning_rate": 2.513523700547007e-05, + "loss": 2.4464, + "step": 15411 + }, + { + "epoch": 1.2438059882172545, + "grad_norm": 0.7232720851898193, + "learning_rate": 2.5124771728523244e-05, + "loss": 2.3975, + "step": 15412 + }, + { + "epoch": 1.2438866919538374, + "grad_norm": 0.7467584609985352, + "learning_rate": 2.5114308317662837e-05, + "loss": 2.4191, + "step": 15413 + }, + { + "epoch": 1.2439673956904205, + "grad_norm": 0.6951141953468323, + "learning_rate": 2.5103846773149642e-05, + "loss": 2.4207, + "step": 15414 + }, + { + "epoch": 1.2440480994270036, + "grad_norm": 0.6427489519119263, + "learning_rate": 2.5093387095244336e-05, + "loss": 2.3539, + "step": 15415 + }, + { + "epoch": 1.2441288031635864, + "grad_norm": 0.729580283164978, + "learning_rate": 2.5082929284207644e-05, + "loss": 2.4464, + "step": 15416 + }, + { + "epoch": 1.2442095069001695, + "grad_norm": 0.7247009873390198, + "learning_rate": 2.5072473340300207e-05, + "loss": 2.4294, + "step": 15417 + }, + { + "epoch": 1.2442902106367524, + "grad_norm": 0.7037674784660339, + "learning_rate": 2.5062019263782577e-05, + "loss": 2.4294, + "step": 15418 + }, + { + "epoch": 1.2443709143733355, + "grad_norm": 0.6997841596603394, + "learning_rate": 2.5051567054915303e-05, + "loss": 2.4976, + "step": 15419 + }, + { + "epoch": 1.2444516181099186, + "grad_norm": 0.7001172304153442, + "learning_rate": 2.504111671395891e-05, + "loss": 2.371, + "step": 15420 + }, + { + "epoch": 1.2445323218465014, + "grad_norm": 0.6781473159790039, + "learning_rate": 2.5030668241173827e-05, + "loss": 2.4124, + "step": 15421 + }, + { + "epoch": 1.2446130255830845, + "grad_norm": 0.7053182125091553, + "learning_rate": 2.5020221636820463e-05, + "loss": 2.4109, + "step": 15422 + }, + { + "epoch": 1.2446937293196676, + "grad_norm": 0.68635493516922, + "learning_rate": 2.50097769011592e-05, + "loss": 2.4548, + "step": 15423 + }, + { + "epoch": 1.2447744330562505, + "grad_norm": 0.7015564441680908, + "learning_rate": 2.4999334034450293e-05, + "loss": 2.4537, + "step": 15424 + }, + { + "epoch": 1.2448551367928335, + "grad_norm": 0.694054901599884, + "learning_rate": 2.4988893036954043e-05, + "loss": 2.4396, + "step": 15425 + }, + { + "epoch": 1.2449358405294164, + "grad_norm": 0.702518880367279, + "learning_rate": 2.4978453908930665e-05, + "loss": 2.4015, + "step": 15426 + }, + { + "epoch": 1.2450165442659995, + "grad_norm": 0.7237387895584106, + "learning_rate": 2.4968016650640348e-05, + "loss": 2.4257, + "step": 15427 + }, + { + "epoch": 1.2450972480025826, + "grad_norm": 0.7133163809776306, + "learning_rate": 2.4957581262343154e-05, + "loss": 2.4532, + "step": 15428 + }, + { + "epoch": 1.2451779517391655, + "grad_norm": 0.8339287042617798, + "learning_rate": 2.4947147744299203e-05, + "loss": 2.4621, + "step": 15429 + }, + { + "epoch": 1.2452586554757485, + "grad_norm": 0.7620034217834473, + "learning_rate": 2.493671609676852e-05, + "loss": 2.365, + "step": 15430 + }, + { + "epoch": 1.2453393592123314, + "grad_norm": 0.7445465922355652, + "learning_rate": 2.4926286320011094e-05, + "loss": 2.4764, + "step": 15431 + }, + { + "epoch": 1.2454200629489145, + "grad_norm": 0.7366160154342651, + "learning_rate": 2.4915858414286852e-05, + "loss": 2.4597, + "step": 15432 + }, + { + "epoch": 1.2455007666854976, + "grad_norm": 0.7098437547683716, + "learning_rate": 2.490543237985572e-05, + "loss": 2.4202, + "step": 15433 + }, + { + "epoch": 1.2455814704220805, + "grad_norm": 0.6483333706855774, + "learning_rate": 2.4895008216977478e-05, + "loss": 2.4108, + "step": 15434 + }, + { + "epoch": 1.2456621741586635, + "grad_norm": 0.6797904968261719, + "learning_rate": 2.4884585925911963e-05, + "loss": 2.4414, + "step": 15435 + }, + { + "epoch": 1.2457428778952466, + "grad_norm": 0.6853424310684204, + "learning_rate": 2.4874165506918957e-05, + "loss": 2.4226, + "step": 15436 + }, + { + "epoch": 1.2458235816318295, + "grad_norm": 0.6861590147018433, + "learning_rate": 2.4863746960258094e-05, + "loss": 2.3748, + "step": 15437 + }, + { + "epoch": 1.2459042853684126, + "grad_norm": 0.7360263466835022, + "learning_rate": 2.4853330286189058e-05, + "loss": 2.4441, + "step": 15438 + }, + { + "epoch": 1.2459849891049957, + "grad_norm": 0.6894183158874512, + "learning_rate": 2.4842915484971496e-05, + "loss": 2.3495, + "step": 15439 + }, + { + "epoch": 1.2460656928415785, + "grad_norm": 0.7570669651031494, + "learning_rate": 2.4832502556864923e-05, + "loss": 2.4622, + "step": 15440 + }, + { + "epoch": 1.2461463965781616, + "grad_norm": 0.6986069083213806, + "learning_rate": 2.4822091502128876e-05, + "loss": 2.3647, + "step": 15441 + }, + { + "epoch": 1.2462271003147445, + "grad_norm": 0.681450366973877, + "learning_rate": 2.481168232102279e-05, + "loss": 2.3872, + "step": 15442 + }, + { + "epoch": 1.2463078040513276, + "grad_norm": 0.7241837978363037, + "learning_rate": 2.480127501380618e-05, + "loss": 2.4692, + "step": 15443 + }, + { + "epoch": 1.2463885077879107, + "grad_norm": 0.6575295329093933, + "learning_rate": 2.479086958073834e-05, + "loss": 2.5057, + "step": 15444 + }, + { + "epoch": 1.2464692115244935, + "grad_norm": 0.7289770841598511, + "learning_rate": 2.478046602207864e-05, + "loss": 2.4164, + "step": 15445 + }, + { + "epoch": 1.2465499152610766, + "grad_norm": 0.6682024598121643, + "learning_rate": 2.4770064338086374e-05, + "loss": 2.4466, + "step": 15446 + }, + { + "epoch": 1.2466306189976595, + "grad_norm": 0.7238918542861938, + "learning_rate": 2.475966452902072e-05, + "loss": 2.4367, + "step": 15447 + }, + { + "epoch": 1.2467113227342426, + "grad_norm": 0.6825705170631409, + "learning_rate": 2.4749266595140918e-05, + "loss": 2.4337, + "step": 15448 + }, + { + "epoch": 1.2467920264708257, + "grad_norm": 0.7352269887924194, + "learning_rate": 2.4738870536706126e-05, + "loss": 2.4103, + "step": 15449 + }, + { + "epoch": 1.2468727302074085, + "grad_norm": 0.658930778503418, + "learning_rate": 2.4728476353975394e-05, + "loss": 2.4281, + "step": 15450 + }, + { + "epoch": 1.2469534339439916, + "grad_norm": 0.6933601498603821, + "learning_rate": 2.4718084047207778e-05, + "loss": 2.4502, + "step": 15451 + }, + { + "epoch": 1.2470341376805747, + "grad_norm": 0.6901879906654358, + "learning_rate": 2.4707693616662308e-05, + "loss": 2.4057, + "step": 15452 + }, + { + "epoch": 1.2471148414171576, + "grad_norm": 0.7648913860321045, + "learning_rate": 2.469730506259792e-05, + "loss": 2.4163, + "step": 15453 + }, + { + "epoch": 1.2471955451537406, + "grad_norm": 0.6496175527572632, + "learning_rate": 2.4686918385273537e-05, + "loss": 2.4373, + "step": 15454 + }, + { + "epoch": 1.2472762488903237, + "grad_norm": 0.6949105858802795, + "learning_rate": 2.4676533584948048e-05, + "loss": 2.4108, + "step": 15455 + }, + { + "epoch": 1.2473569526269066, + "grad_norm": 0.7018688321113586, + "learning_rate": 2.4666150661880206e-05, + "loss": 2.4589, + "step": 15456 + }, + { + "epoch": 1.2474376563634897, + "grad_norm": 0.7141219973564148, + "learning_rate": 2.4655769616328827e-05, + "loss": 2.4022, + "step": 15457 + }, + { + "epoch": 1.2475183601000726, + "grad_norm": 0.7276743054389954, + "learning_rate": 2.4645390448552608e-05, + "loss": 2.4443, + "step": 15458 + }, + { + "epoch": 1.2475990638366556, + "grad_norm": 0.6861153244972229, + "learning_rate": 2.463501315881027e-05, + "loss": 2.4478, + "step": 15459 + }, + { + "epoch": 1.2476797675732387, + "grad_norm": 0.7252256274223328, + "learning_rate": 2.462463774736038e-05, + "loss": 2.446, + "step": 15460 + }, + { + "epoch": 1.2477604713098216, + "grad_norm": 0.6914857625961304, + "learning_rate": 2.4614264214461557e-05, + "loss": 2.4294, + "step": 15461 + }, + { + "epoch": 1.2478411750464047, + "grad_norm": 0.6815036535263062, + "learning_rate": 2.460389256037232e-05, + "loss": 2.4389, + "step": 15462 + }, + { + "epoch": 1.2479218787829875, + "grad_norm": 0.7420194745063782, + "learning_rate": 2.4593522785351176e-05, + "loss": 2.4932, + "step": 15463 + }, + { + "epoch": 1.2480025825195706, + "grad_norm": 0.6622182130813599, + "learning_rate": 2.4583154889656556e-05, + "loss": 2.4327, + "step": 15464 + }, + { + "epoch": 1.2480832862561537, + "grad_norm": 0.6527934074401855, + "learning_rate": 2.457278887354689e-05, + "loss": 2.3857, + "step": 15465 + }, + { + "epoch": 1.2481639899927366, + "grad_norm": 0.6942344903945923, + "learning_rate": 2.4562424737280465e-05, + "loss": 2.4181, + "step": 15466 + }, + { + "epoch": 1.2482446937293197, + "grad_norm": 0.7449823021888733, + "learning_rate": 2.45520624811156e-05, + "loss": 2.4575, + "step": 15467 + }, + { + "epoch": 1.2483253974659028, + "grad_norm": 0.6905208826065063, + "learning_rate": 2.4541702105310605e-05, + "loss": 2.3858, + "step": 15468 + }, + { + "epoch": 1.2484061012024856, + "grad_norm": 0.6928502917289734, + "learning_rate": 2.4531343610123603e-05, + "loss": 2.4212, + "step": 15469 + }, + { + "epoch": 1.2484868049390687, + "grad_norm": 0.7182145118713379, + "learning_rate": 2.45209869958128e-05, + "loss": 2.4063, + "step": 15470 + }, + { + "epoch": 1.2485675086756518, + "grad_norm": 0.7379452586174011, + "learning_rate": 2.4510632262636314e-05, + "loss": 2.4612, + "step": 15471 + }, + { + "epoch": 1.2486482124122347, + "grad_norm": 0.6663349270820618, + "learning_rate": 2.450027941085219e-05, + "loss": 2.4583, + "step": 15472 + }, + { + "epoch": 1.2487289161488178, + "grad_norm": 0.7266560792922974, + "learning_rate": 2.4489928440718467e-05, + "loss": 2.4483, + "step": 15473 + }, + { + "epoch": 1.2488096198854006, + "grad_norm": 0.7046550512313843, + "learning_rate": 2.447957935249311e-05, + "loss": 2.4087, + "step": 15474 + }, + { + "epoch": 1.2488903236219837, + "grad_norm": 0.684248685836792, + "learning_rate": 2.4469232146434084e-05, + "loss": 2.4352, + "step": 15475 + }, + { + "epoch": 1.2489710273585668, + "grad_norm": 0.6864973902702332, + "learning_rate": 2.4458886822799198e-05, + "loss": 2.3872, + "step": 15476 + }, + { + "epoch": 1.2490517310951497, + "grad_norm": 0.6964752674102783, + "learning_rate": 2.444854338184631e-05, + "loss": 2.437, + "step": 15477 + }, + { + "epoch": 1.2491324348317328, + "grad_norm": 0.6755973100662231, + "learning_rate": 2.4438201823833252e-05, + "loss": 2.4302, + "step": 15478 + }, + { + "epoch": 1.2492131385683156, + "grad_norm": 0.6434857249259949, + "learning_rate": 2.44278621490177e-05, + "loss": 2.406, + "step": 15479 + }, + { + "epoch": 1.2492938423048987, + "grad_norm": 0.7342328429222107, + "learning_rate": 2.441752435765736e-05, + "loss": 2.451, + "step": 15480 + }, + { + "epoch": 1.2493745460414818, + "grad_norm": 0.7486860752105713, + "learning_rate": 2.44071884500099e-05, + "loss": 2.4536, + "step": 15481 + }, + { + "epoch": 1.2494552497780647, + "grad_norm": 0.7274537086486816, + "learning_rate": 2.4396854426332903e-05, + "loss": 2.4599, + "step": 15482 + }, + { + "epoch": 1.2495359535146477, + "grad_norm": 0.7580124735832214, + "learning_rate": 2.4386522286883918e-05, + "loss": 2.4038, + "step": 15483 + }, + { + "epoch": 1.2496166572512308, + "grad_norm": 0.6776975393295288, + "learning_rate": 2.4376192031920488e-05, + "loss": 2.4246, + "step": 15484 + }, + { + "epoch": 1.2496973609878137, + "grad_norm": 0.6899511814117432, + "learning_rate": 2.4365863661699996e-05, + "loss": 2.3922, + "step": 15485 + }, + { + "epoch": 1.2497780647243968, + "grad_norm": 0.7487930059432983, + "learning_rate": 2.4355537176479903e-05, + "loss": 2.4573, + "step": 15486 + }, + { + "epoch": 1.2498587684609797, + "grad_norm": 0.7306599617004395, + "learning_rate": 2.4345212576517575e-05, + "loss": 2.4745, + "step": 15487 + }, + { + "epoch": 1.2499394721975627, + "grad_norm": 0.7152543067932129, + "learning_rate": 2.43348898620703e-05, + "loss": 2.4768, + "step": 15488 + }, + { + "epoch": 1.2500201759341458, + "grad_norm": 0.6576277017593384, + "learning_rate": 2.432456903339535e-05, + "loss": 2.4289, + "step": 15489 + }, + { + "epoch": 1.2501008796707287, + "grad_norm": 0.6974572539329529, + "learning_rate": 2.4314250090749956e-05, + "loss": 2.4218, + "step": 15490 + }, + { + "epoch": 1.2501815834073118, + "grad_norm": 0.7869577407836914, + "learning_rate": 2.4303933034391323e-05, + "loss": 2.3899, + "step": 15491 + }, + { + "epoch": 1.2502622871438946, + "grad_norm": 0.6723129749298096, + "learning_rate": 2.42936178645765e-05, + "loss": 2.4238, + "step": 15492 + }, + { + "epoch": 1.2503429908804777, + "grad_norm": 0.6839526891708374, + "learning_rate": 2.428330458156265e-05, + "loss": 2.4037, + "step": 15493 + }, + { + "epoch": 1.2504236946170608, + "grad_norm": 0.6866093277931213, + "learning_rate": 2.4272993185606796e-05, + "loss": 2.4228, + "step": 15494 + }, + { + "epoch": 1.2505043983536437, + "grad_norm": 0.6992947459220886, + "learning_rate": 2.426268367696588e-05, + "loss": 2.4248, + "step": 15495 + }, + { + "epoch": 1.2505851020902268, + "grad_norm": 0.6836698651313782, + "learning_rate": 2.4252376055896862e-05, + "loss": 2.5387, + "step": 15496 + }, + { + "epoch": 1.2506658058268099, + "grad_norm": 0.6990752816200256, + "learning_rate": 2.4242070322656663e-05, + "loss": 2.4438, + "step": 15497 + }, + { + "epoch": 1.2507465095633927, + "grad_norm": 0.7143029570579529, + "learning_rate": 2.4231766477502082e-05, + "loss": 2.4, + "step": 15498 + }, + { + "epoch": 1.2508272132999758, + "grad_norm": 0.6585043668746948, + "learning_rate": 2.422146452068994e-05, + "loss": 2.4256, + "step": 15499 + }, + { + "epoch": 1.250907917036559, + "grad_norm": 0.739107072353363, + "learning_rate": 2.421116445247702e-05, + "loss": 2.428, + "step": 15500 + }, + { + "epoch": 1.2509886207731418, + "grad_norm": 0.6675287485122681, + "learning_rate": 2.420086627311997e-05, + "loss": 2.5095, + "step": 15501 + }, + { + "epoch": 1.2510693245097249, + "grad_norm": 0.7133405804634094, + "learning_rate": 2.4190569982875467e-05, + "loss": 2.4719, + "step": 15502 + }, + { + "epoch": 1.2511500282463077, + "grad_norm": 0.710904061794281, + "learning_rate": 2.4180275582000134e-05, + "loss": 2.4449, + "step": 15503 + }, + { + "epoch": 1.2512307319828908, + "grad_norm": 0.7088729739189148, + "learning_rate": 2.4169983070750525e-05, + "loss": 2.4059, + "step": 15504 + }, + { + "epoch": 1.2513114357194737, + "grad_norm": 0.7187358736991882, + "learning_rate": 2.4159692449383152e-05, + "loss": 2.4577, + "step": 15505 + }, + { + "epoch": 1.2513921394560568, + "grad_norm": 0.7531955242156982, + "learning_rate": 2.4149403718154497e-05, + "loss": 2.4101, + "step": 15506 + }, + { + "epoch": 1.2514728431926398, + "grad_norm": 0.7565199136734009, + "learning_rate": 2.413911687732101e-05, + "loss": 2.4805, + "step": 15507 + }, + { + "epoch": 1.2515535469292227, + "grad_norm": 0.706471860408783, + "learning_rate": 2.4128831927139008e-05, + "loss": 2.4494, + "step": 15508 + }, + { + "epoch": 1.2516342506658058, + "grad_norm": 0.7022314667701721, + "learning_rate": 2.4118548867864832e-05, + "loss": 2.4442, + "step": 15509 + }, + { + "epoch": 1.251714954402389, + "grad_norm": 0.6885591745376587, + "learning_rate": 2.4108267699754806e-05, + "loss": 2.4186, + "step": 15510 + }, + { + "epoch": 1.2517956581389718, + "grad_norm": 0.6963610649108887, + "learning_rate": 2.409798842306511e-05, + "loss": 2.4209, + "step": 15511 + }, + { + "epoch": 1.2518763618755548, + "grad_norm": 0.7117185592651367, + "learning_rate": 2.4087711038051942e-05, + "loss": 2.4106, + "step": 15512 + }, + { + "epoch": 1.251957065612138, + "grad_norm": 0.6944519281387329, + "learning_rate": 2.407743554497146e-05, + "loss": 2.4493, + "step": 15513 + }, + { + "epoch": 1.2520377693487208, + "grad_norm": 0.689818263053894, + "learning_rate": 2.406716194407974e-05, + "loss": 2.4358, + "step": 15514 + }, + { + "epoch": 1.2521184730853039, + "grad_norm": 0.8132768273353577, + "learning_rate": 2.4056890235632846e-05, + "loss": 2.4574, + "step": 15515 + }, + { + "epoch": 1.252199176821887, + "grad_norm": 0.6855002045631409, + "learning_rate": 2.4046620419886777e-05, + "loss": 2.4118, + "step": 15516 + }, + { + "epoch": 1.2522798805584698, + "grad_norm": 0.6616373658180237, + "learning_rate": 2.4036352497097458e-05, + "loss": 2.4332, + "step": 15517 + }, + { + "epoch": 1.252360584295053, + "grad_norm": 0.6657225489616394, + "learning_rate": 2.4026086467520803e-05, + "loss": 2.3989, + "step": 15518 + }, + { + "epoch": 1.2524412880316358, + "grad_norm": 0.6796447038650513, + "learning_rate": 2.4015822331412664e-05, + "loss": 2.4269, + "step": 15519 + }, + { + "epoch": 1.2525219917682189, + "grad_norm": 0.7168079614639282, + "learning_rate": 2.400556008902889e-05, + "loss": 2.4263, + "step": 15520 + }, + { + "epoch": 1.2526026955048017, + "grad_norm": 0.6985058188438416, + "learning_rate": 2.3995299740625186e-05, + "loss": 2.437, + "step": 15521 + }, + { + "epoch": 1.2526833992413848, + "grad_norm": 0.7078086137771606, + "learning_rate": 2.3985041286457287e-05, + "loss": 2.3996, + "step": 15522 + }, + { + "epoch": 1.252764102977968, + "grad_norm": 0.6989054083824158, + "learning_rate": 2.3974784726780865e-05, + "loss": 2.4717, + "step": 15523 + }, + { + "epoch": 1.2528448067145508, + "grad_norm": 0.747606098651886, + "learning_rate": 2.396453006185153e-05, + "loss": 2.4228, + "step": 15524 + }, + { + "epoch": 1.2529255104511339, + "grad_norm": 0.7500887513160706, + "learning_rate": 2.3954277291924876e-05, + "loss": 2.4636, + "step": 15525 + }, + { + "epoch": 1.253006214187717, + "grad_norm": 0.7710712552070618, + "learning_rate": 2.3944026417256437e-05, + "loss": 2.4405, + "step": 15526 + }, + { + "epoch": 1.2530869179242998, + "grad_norm": 0.7278285622596741, + "learning_rate": 2.3933777438101657e-05, + "loss": 2.4279, + "step": 15527 + }, + { + "epoch": 1.253167621660883, + "grad_norm": 0.6979010701179504, + "learning_rate": 2.3923530354715973e-05, + "loss": 2.4272, + "step": 15528 + }, + { + "epoch": 1.253248325397466, + "grad_norm": 0.7330336570739746, + "learning_rate": 2.3913285167354804e-05, + "loss": 2.3861, + "step": 15529 + }, + { + "epoch": 1.2533290291340489, + "grad_norm": 0.675499677658081, + "learning_rate": 2.3903041876273436e-05, + "loss": 2.3987, + "step": 15530 + }, + { + "epoch": 1.253409732870632, + "grad_norm": 0.6854682564735413, + "learning_rate": 2.3892800481727186e-05, + "loss": 2.4085, + "step": 15531 + }, + { + "epoch": 1.253490436607215, + "grad_norm": 0.713810384273529, + "learning_rate": 2.388256098397129e-05, + "loss": 2.3897, + "step": 15532 + }, + { + "epoch": 1.253571140343798, + "grad_norm": 0.683214545249939, + "learning_rate": 2.3872323383260953e-05, + "loss": 2.4526, + "step": 15533 + }, + { + "epoch": 1.253651844080381, + "grad_norm": 0.6718357801437378, + "learning_rate": 2.3862087679851318e-05, + "loss": 2.4612, + "step": 15534 + }, + { + "epoch": 1.2537325478169639, + "grad_norm": 0.722283124923706, + "learning_rate": 2.3851853873997488e-05, + "loss": 2.4163, + "step": 15535 + }, + { + "epoch": 1.253813251553547, + "grad_norm": 0.689393162727356, + "learning_rate": 2.384162196595453e-05, + "loss": 2.3984, + "step": 15536 + }, + { + "epoch": 1.2538939552901298, + "grad_norm": 0.7146410346031189, + "learning_rate": 2.3831391955977412e-05, + "loss": 2.4442, + "step": 15537 + }, + { + "epoch": 1.253974659026713, + "grad_norm": 0.6651021838188171, + "learning_rate": 2.3821163844321104e-05, + "loss": 2.4064, + "step": 15538 + }, + { + "epoch": 1.254055362763296, + "grad_norm": 0.7088985443115234, + "learning_rate": 2.381093763124056e-05, + "loss": 2.4831, + "step": 15539 + }, + { + "epoch": 1.2541360664998789, + "grad_norm": 0.661375105381012, + "learning_rate": 2.3800713316990588e-05, + "loss": 2.3657, + "step": 15540 + }, + { + "epoch": 1.254216770236462, + "grad_norm": 0.6870979070663452, + "learning_rate": 2.3790490901826012e-05, + "loss": 2.4208, + "step": 15541 + }, + { + "epoch": 1.254297473973045, + "grad_norm": 0.6256219148635864, + "learning_rate": 2.3780270386001657e-05, + "loss": 2.4182, + "step": 15542 + }, + { + "epoch": 1.254378177709628, + "grad_norm": 0.7070638537406921, + "learning_rate": 2.377005176977215e-05, + "loss": 2.3758, + "step": 15543 + }, + { + "epoch": 1.254458881446211, + "grad_norm": 0.6571370363235474, + "learning_rate": 2.3759835053392242e-05, + "loss": 2.3927, + "step": 15544 + }, + { + "epoch": 1.254539585182794, + "grad_norm": 0.644263744354248, + "learning_rate": 2.3749620237116565e-05, + "loss": 2.3992, + "step": 15545 + }, + { + "epoch": 1.254620288919377, + "grad_norm": 0.7127394676208496, + "learning_rate": 2.3739407321199648e-05, + "loss": 2.3942, + "step": 15546 + }, + { + "epoch": 1.25470099265596, + "grad_norm": 0.7274866104125977, + "learning_rate": 2.372919630589605e-05, + "loss": 2.5232, + "step": 15547 + }, + { + "epoch": 1.2547816963925431, + "grad_norm": 0.690138041973114, + "learning_rate": 2.3718987191460274e-05, + "loss": 2.4371, + "step": 15548 + }, + { + "epoch": 1.254862400129126, + "grad_norm": 0.6990681886672974, + "learning_rate": 2.3708779978146724e-05, + "loss": 2.4568, + "step": 15549 + }, + { + "epoch": 1.254943103865709, + "grad_norm": 0.7430790662765503, + "learning_rate": 2.3698574666209793e-05, + "loss": 2.423, + "step": 15550 + }, + { + "epoch": 1.255023807602292, + "grad_norm": 0.6991416215896606, + "learning_rate": 2.3688371255903828e-05, + "loss": 2.4529, + "step": 15551 + }, + { + "epoch": 1.255104511338875, + "grad_norm": 0.6733322739601135, + "learning_rate": 2.367816974748317e-05, + "loss": 2.4531, + "step": 15552 + }, + { + "epoch": 1.2551852150754579, + "grad_norm": 0.7460463047027588, + "learning_rate": 2.3667970141202e-05, + "loss": 2.4267, + "step": 15553 + }, + { + "epoch": 1.255265918812041, + "grad_norm": 0.6784021854400635, + "learning_rate": 2.3657772437314517e-05, + "loss": 2.4996, + "step": 15554 + }, + { + "epoch": 1.255346622548624, + "grad_norm": 0.7499529719352722, + "learning_rate": 2.3647576636074975e-05, + "loss": 2.4749, + "step": 15555 + }, + { + "epoch": 1.255427326285207, + "grad_norm": 0.6698335409164429, + "learning_rate": 2.3637382737737368e-05, + "loss": 2.4499, + "step": 15556 + }, + { + "epoch": 1.25550803002179, + "grad_norm": 0.6644846200942993, + "learning_rate": 2.3627190742555806e-05, + "loss": 2.397, + "step": 15557 + }, + { + "epoch": 1.255588733758373, + "grad_norm": 0.7041488289833069, + "learning_rate": 2.3617000650784315e-05, + "loss": 2.4012, + "step": 15558 + }, + { + "epoch": 1.255669437494956, + "grad_norm": 0.72523033618927, + "learning_rate": 2.3606812462676798e-05, + "loss": 2.4151, + "step": 15559 + }, + { + "epoch": 1.255750141231539, + "grad_norm": 0.77669757604599, + "learning_rate": 2.3596626178487225e-05, + "loss": 2.4478, + "step": 15560 + }, + { + "epoch": 1.2558308449681221, + "grad_norm": 0.6919559836387634, + "learning_rate": 2.3586441798469462e-05, + "loss": 2.4548, + "step": 15561 + }, + { + "epoch": 1.255911548704705, + "grad_norm": 0.7613349556922913, + "learning_rate": 2.3576259322877292e-05, + "loss": 2.4475, + "step": 15562 + }, + { + "epoch": 1.255992252441288, + "grad_norm": 0.6738333106040955, + "learning_rate": 2.3566078751964515e-05, + "loss": 2.4242, + "step": 15563 + }, + { + "epoch": 1.256072956177871, + "grad_norm": 0.7242118716239929, + "learning_rate": 2.355590008598486e-05, + "loss": 2.4047, + "step": 15564 + }, + { + "epoch": 1.256153659914454, + "grad_norm": 0.7117685675621033, + "learning_rate": 2.354572332519199e-05, + "loss": 2.4473, + "step": 15565 + }, + { + "epoch": 1.256234363651037, + "grad_norm": 0.7466531991958618, + "learning_rate": 2.3535548469839564e-05, + "loss": 2.453, + "step": 15566 + }, + { + "epoch": 1.25631506738762, + "grad_norm": 0.6750668883323669, + "learning_rate": 2.3525375520181136e-05, + "loss": 2.4367, + "step": 15567 + }, + { + "epoch": 1.256395771124203, + "grad_norm": 0.7640851736068726, + "learning_rate": 2.35152044764703e-05, + "loss": 2.5014, + "step": 15568 + }, + { + "epoch": 1.256476474860786, + "grad_norm": 0.7198928594589233, + "learning_rate": 2.3505035338960456e-05, + "loss": 2.5138, + "step": 15569 + }, + { + "epoch": 1.256557178597369, + "grad_norm": 0.7079946398735046, + "learning_rate": 2.349486810790511e-05, + "loss": 2.4172, + "step": 15570 + }, + { + "epoch": 1.2566378823339521, + "grad_norm": 0.7477186918258667, + "learning_rate": 2.3484702783557655e-05, + "loss": 2.4224, + "step": 15571 + }, + { + "epoch": 1.256718586070535, + "grad_norm": 0.6875394582748413, + "learning_rate": 2.3474539366171388e-05, + "loss": 2.4621, + "step": 15572 + }, + { + "epoch": 1.256799289807118, + "grad_norm": 0.7164824604988098, + "learning_rate": 2.346437785599964e-05, + "loss": 2.4416, + "step": 15573 + }, + { + "epoch": 1.2568799935437012, + "grad_norm": 0.7031935453414917, + "learning_rate": 2.3454218253295668e-05, + "loss": 2.3943, + "step": 15574 + }, + { + "epoch": 1.256960697280284, + "grad_norm": 0.6739614009857178, + "learning_rate": 2.3444060558312665e-05, + "loss": 2.4114, + "step": 15575 + }, + { + "epoch": 1.2570414010168671, + "grad_norm": 0.6710866689682007, + "learning_rate": 2.3433904771303794e-05, + "loss": 2.4077, + "step": 15576 + }, + { + "epoch": 1.2571221047534502, + "grad_norm": 0.6589750051498413, + "learning_rate": 2.342375089252219e-05, + "loss": 2.3494, + "step": 15577 + }, + { + "epoch": 1.257202808490033, + "grad_norm": 0.7018333077430725, + "learning_rate": 2.3413598922220857e-05, + "loss": 2.459, + "step": 15578 + }, + { + "epoch": 1.2572835122266162, + "grad_norm": 0.7735301852226257, + "learning_rate": 2.3403448860652842e-05, + "loss": 2.4524, + "step": 15579 + }, + { + "epoch": 1.257364215963199, + "grad_norm": 0.7009726762771606, + "learning_rate": 2.339330070807113e-05, + "loss": 2.4244, + "step": 15580 + }, + { + "epoch": 1.2574449196997821, + "grad_norm": 0.671521008014679, + "learning_rate": 2.3383154464728595e-05, + "loss": 2.3808, + "step": 15581 + }, + { + "epoch": 1.257525623436365, + "grad_norm": 0.7736711502075195, + "learning_rate": 2.3373010130878126e-05, + "loss": 2.4936, + "step": 15582 + }, + { + "epoch": 1.257606327172948, + "grad_norm": 0.6987056136131287, + "learning_rate": 2.336286770677255e-05, + "loss": 2.4484, + "step": 15583 + }, + { + "epoch": 1.2576870309095312, + "grad_norm": 0.6337067484855652, + "learning_rate": 2.3352727192664635e-05, + "loss": 2.4196, + "step": 15584 + }, + { + "epoch": 1.257767734646114, + "grad_norm": 0.6832795143127441, + "learning_rate": 2.3342588588807123e-05, + "loss": 2.3681, + "step": 15585 + }, + { + "epoch": 1.257848438382697, + "grad_norm": 0.7208079695701599, + "learning_rate": 2.3332451895452688e-05, + "loss": 2.4436, + "step": 15586 + }, + { + "epoch": 1.2579291421192802, + "grad_norm": 0.6607621312141418, + "learning_rate": 2.3322317112853986e-05, + "loss": 2.4088, + "step": 15587 + }, + { + "epoch": 1.258009845855863, + "grad_norm": 0.7261247038841248, + "learning_rate": 2.331218424126356e-05, + "loss": 2.4389, + "step": 15588 + }, + { + "epoch": 1.2580905495924462, + "grad_norm": 0.6187729239463806, + "learning_rate": 2.3302053280933954e-05, + "loss": 2.3568, + "step": 15589 + }, + { + "epoch": 1.2581712533290292, + "grad_norm": 0.6196430921554565, + "learning_rate": 2.3291924232117713e-05, + "loss": 2.4285, + "step": 15590 + }, + { + "epoch": 1.258251957065612, + "grad_norm": 0.7271853685379028, + "learning_rate": 2.3281797095067193e-05, + "loss": 2.4058, + "step": 15591 + }, + { + "epoch": 1.2583326608021952, + "grad_norm": 0.7141130566596985, + "learning_rate": 2.327167187003484e-05, + "loss": 2.3971, + "step": 15592 + }, + { + "epoch": 1.2584133645387783, + "grad_norm": 0.680743932723999, + "learning_rate": 2.3261548557273027e-05, + "loss": 2.4387, + "step": 15593 + }, + { + "epoch": 1.2584940682753611, + "grad_norm": 0.718173086643219, + "learning_rate": 2.3251427157033955e-05, + "loss": 2.43, + "step": 15594 + }, + { + "epoch": 1.2585747720119442, + "grad_norm": 0.7600045800209045, + "learning_rate": 2.324130766956998e-05, + "loss": 2.4584, + "step": 15595 + }, + { + "epoch": 1.258655475748527, + "grad_norm": 0.7432500123977661, + "learning_rate": 2.3231190095133294e-05, + "loss": 2.4717, + "step": 15596 + }, + { + "epoch": 1.2587361794851102, + "grad_norm": 0.6603000164031982, + "learning_rate": 2.3221074433975988e-05, + "loss": 2.3952, + "step": 15597 + }, + { + "epoch": 1.258816883221693, + "grad_norm": 0.7020140290260315, + "learning_rate": 2.3210960686350213e-05, + "loss": 2.4064, + "step": 15598 + }, + { + "epoch": 1.2588975869582761, + "grad_norm": 0.7434887290000916, + "learning_rate": 2.320084885250804e-05, + "loss": 2.4708, + "step": 15599 + }, + { + "epoch": 1.2589782906948592, + "grad_norm": 0.6626797318458557, + "learning_rate": 2.3190738932701482e-05, + "loss": 2.4503, + "step": 15600 + }, + { + "epoch": 1.259058994431442, + "grad_norm": 0.7880598902702332, + "learning_rate": 2.3180630927182466e-05, + "loss": 2.384, + "step": 15601 + }, + { + "epoch": 1.2591396981680252, + "grad_norm": 0.7766147255897522, + "learning_rate": 2.3170524836202933e-05, + "loss": 2.4019, + "step": 15602 + }, + { + "epoch": 1.2592204019046083, + "grad_norm": 0.7817980051040649, + "learning_rate": 2.3160420660014792e-05, + "loss": 2.4729, + "step": 15603 + }, + { + "epoch": 1.2593011056411911, + "grad_norm": 0.6915614604949951, + "learning_rate": 2.3150318398869787e-05, + "loss": 2.4028, + "step": 15604 + }, + { + "epoch": 1.2593818093777742, + "grad_norm": 0.690882682800293, + "learning_rate": 2.3140218053019714e-05, + "loss": 2.4386, + "step": 15605 + }, + { + "epoch": 1.2594625131143573, + "grad_norm": 0.6670350432395935, + "learning_rate": 2.3130119622716382e-05, + "loss": 2.4224, + "step": 15606 + }, + { + "epoch": 1.2595432168509402, + "grad_norm": 0.6680006980895996, + "learning_rate": 2.3120023108211375e-05, + "loss": 2.3475, + "step": 15607 + }, + { + "epoch": 1.2596239205875233, + "grad_norm": 0.7003577947616577, + "learning_rate": 2.310992850975636e-05, + "loss": 2.4198, + "step": 15608 + }, + { + "epoch": 1.2597046243241061, + "grad_norm": 0.7444167733192444, + "learning_rate": 2.3099835827602944e-05, + "loss": 2.3756, + "step": 15609 + }, + { + "epoch": 1.2597853280606892, + "grad_norm": 0.6757989525794983, + "learning_rate": 2.3089745062002612e-05, + "loss": 2.3955, + "step": 15610 + }, + { + "epoch": 1.259866031797272, + "grad_norm": 0.6955820322036743, + "learning_rate": 2.3079656213206878e-05, + "loss": 2.4031, + "step": 15611 + }, + { + "epoch": 1.2599467355338552, + "grad_norm": 0.6646408438682556, + "learning_rate": 2.3069569281467184e-05, + "loss": 2.4246, + "step": 15612 + }, + { + "epoch": 1.2600274392704383, + "grad_norm": 0.6922882199287415, + "learning_rate": 2.3059484267034958e-05, + "loss": 2.4157, + "step": 15613 + }, + { + "epoch": 1.2601081430070211, + "grad_norm": 0.8092310428619385, + "learning_rate": 2.3049401170161468e-05, + "loss": 2.4137, + "step": 15614 + }, + { + "epoch": 1.2601888467436042, + "grad_norm": 0.7024559378623962, + "learning_rate": 2.3039319991098063e-05, + "loss": 2.4497, + "step": 15615 + }, + { + "epoch": 1.2602695504801873, + "grad_norm": 0.7096099853515625, + "learning_rate": 2.302924073009597e-05, + "loss": 2.4045, + "step": 15616 + }, + { + "epoch": 1.2603502542167702, + "grad_norm": 0.6777564287185669, + "learning_rate": 2.3019163387406406e-05, + "loss": 2.4607, + "step": 15617 + }, + { + "epoch": 1.2604309579533532, + "grad_norm": 0.7564159035682678, + "learning_rate": 2.300908796328052e-05, + "loss": 2.4985, + "step": 15618 + }, + { + "epoch": 1.2605116616899363, + "grad_norm": 0.7432986497879028, + "learning_rate": 2.2999014457969447e-05, + "loss": 2.4326, + "step": 15619 + }, + { + "epoch": 1.2605923654265192, + "grad_norm": 0.7178141474723816, + "learning_rate": 2.2988942871724182e-05, + "loss": 2.4118, + "step": 15620 + }, + { + "epoch": 1.2606730691631023, + "grad_norm": 0.7074497938156128, + "learning_rate": 2.2978873204795782e-05, + "loss": 2.4163, + "step": 15621 + }, + { + "epoch": 1.2607537728996854, + "grad_norm": 0.670200765132904, + "learning_rate": 2.2968805457435217e-05, + "loss": 2.4081, + "step": 15622 + }, + { + "epoch": 1.2608344766362682, + "grad_norm": 0.7258187532424927, + "learning_rate": 2.2958739629893355e-05, + "loss": 2.4889, + "step": 15623 + }, + { + "epoch": 1.2609151803728513, + "grad_norm": 0.6999781727790833, + "learning_rate": 2.2948675722421086e-05, + "loss": 2.3945, + "step": 15624 + }, + { + "epoch": 1.2609958841094342, + "grad_norm": 0.7030084133148193, + "learning_rate": 2.2938613735269243e-05, + "loss": 2.4509, + "step": 15625 + }, + { + "epoch": 1.2610765878460173, + "grad_norm": 0.6875420212745667, + "learning_rate": 2.292855366868858e-05, + "loss": 2.3658, + "step": 15626 + }, + { + "epoch": 1.2611572915826001, + "grad_norm": 0.7375235557556152, + "learning_rate": 2.2918495522929817e-05, + "loss": 2.4308, + "step": 15627 + }, + { + "epoch": 1.2612379953191832, + "grad_norm": 0.7021106481552124, + "learning_rate": 2.2908439298243644e-05, + "loss": 2.4046, + "step": 15628 + }, + { + "epoch": 1.2613186990557663, + "grad_norm": 0.76661616563797, + "learning_rate": 2.2898384994880716e-05, + "loss": 2.5156, + "step": 15629 + }, + { + "epoch": 1.2613994027923492, + "grad_norm": 0.6684869527816772, + "learning_rate": 2.2888332613091558e-05, + "loss": 2.4342, + "step": 15630 + }, + { + "epoch": 1.2614801065289323, + "grad_norm": 0.6878669261932373, + "learning_rate": 2.2878282153126706e-05, + "loss": 2.4544, + "step": 15631 + }, + { + "epoch": 1.2615608102655154, + "grad_norm": 0.6659132838249207, + "learning_rate": 2.2868233615236702e-05, + "loss": 2.4341, + "step": 15632 + }, + { + "epoch": 1.2616415140020982, + "grad_norm": 0.657474160194397, + "learning_rate": 2.2858186999671905e-05, + "loss": 2.3515, + "step": 15633 + }, + { + "epoch": 1.2617222177386813, + "grad_norm": 0.7245650291442871, + "learning_rate": 2.284814230668274e-05, + "loss": 2.3983, + "step": 15634 + }, + { + "epoch": 1.2618029214752644, + "grad_norm": 0.6400195360183716, + "learning_rate": 2.2838099536519554e-05, + "loss": 2.3535, + "step": 15635 + }, + { + "epoch": 1.2618836252118473, + "grad_norm": 0.6719450950622559, + "learning_rate": 2.282805868943262e-05, + "loss": 2.3906, + "step": 15636 + }, + { + "epoch": 1.2619643289484304, + "grad_norm": 0.682746946811676, + "learning_rate": 2.2818019765672207e-05, + "loss": 2.4045, + "step": 15637 + }, + { + "epoch": 1.2620450326850134, + "grad_norm": 0.6631760597229004, + "learning_rate": 2.2807982765488513e-05, + "loss": 2.4896, + "step": 15638 + }, + { + "epoch": 1.2621257364215963, + "grad_norm": 0.782202422618866, + "learning_rate": 2.279794768913164e-05, + "loss": 2.4628, + "step": 15639 + }, + { + "epoch": 1.2622064401581794, + "grad_norm": 0.7579823732376099, + "learning_rate": 2.278791453685173e-05, + "loss": 2.4635, + "step": 15640 + }, + { + "epoch": 1.2622871438947623, + "grad_norm": 0.665096640586853, + "learning_rate": 2.277788330889884e-05, + "loss": 2.4899, + "step": 15641 + }, + { + "epoch": 1.2623678476313454, + "grad_norm": 0.7635685205459595, + "learning_rate": 2.2767854005522936e-05, + "loss": 2.4146, + "step": 15642 + }, + { + "epoch": 1.2624485513679282, + "grad_norm": 0.7579118609428406, + "learning_rate": 2.2757826626974e-05, + "loss": 2.3692, + "step": 15643 + }, + { + "epoch": 1.2625292551045113, + "grad_norm": 0.6772074699401855, + "learning_rate": 2.2747801173501938e-05, + "loss": 2.3954, + "step": 15644 + }, + { + "epoch": 1.2626099588410944, + "grad_norm": 0.7028382420539856, + "learning_rate": 2.2737777645356606e-05, + "loss": 2.4799, + "step": 15645 + }, + { + "epoch": 1.2626906625776773, + "grad_norm": 0.7152617573738098, + "learning_rate": 2.2727756042787818e-05, + "loss": 2.4095, + "step": 15646 + }, + { + "epoch": 1.2627713663142603, + "grad_norm": 0.7286608219146729, + "learning_rate": 2.271773636604535e-05, + "loss": 2.4496, + "step": 15647 + }, + { + "epoch": 1.2628520700508434, + "grad_norm": 0.7006896734237671, + "learning_rate": 2.2707718615378935e-05, + "loss": 2.4128, + "step": 15648 + }, + { + "epoch": 1.2629327737874263, + "grad_norm": 0.6856697797775269, + "learning_rate": 2.2697702791038177e-05, + "loss": 2.4169, + "step": 15649 + }, + { + "epoch": 1.2630134775240094, + "grad_norm": 0.7582918405532837, + "learning_rate": 2.268768889327275e-05, + "loss": 2.4007, + "step": 15650 + }, + { + "epoch": 1.2630941812605925, + "grad_norm": 0.664633572101593, + "learning_rate": 2.2677676922332237e-05, + "loss": 2.3876, + "step": 15651 + }, + { + "epoch": 1.2631748849971753, + "grad_norm": 0.7283070087432861, + "learning_rate": 2.266766687846611e-05, + "loss": 2.4175, + "step": 15652 + }, + { + "epoch": 1.2632555887337584, + "grad_norm": 0.7309537529945374, + "learning_rate": 2.2657658761923863e-05, + "loss": 2.3998, + "step": 15653 + }, + { + "epoch": 1.2633362924703415, + "grad_norm": 0.6386510133743286, + "learning_rate": 2.2647652572954968e-05, + "loss": 2.3723, + "step": 15654 + }, + { + "epoch": 1.2634169962069244, + "grad_norm": 0.6805689930915833, + "learning_rate": 2.263764831180876e-05, + "loss": 2.3989, + "step": 15655 + }, + { + "epoch": 1.2634976999435072, + "grad_norm": 0.7147208452224731, + "learning_rate": 2.2627645978734536e-05, + "loss": 2.4748, + "step": 15656 + }, + { + "epoch": 1.2635784036800903, + "grad_norm": 0.6835155487060547, + "learning_rate": 2.2617645573981683e-05, + "loss": 2.4266, + "step": 15657 + }, + { + "epoch": 1.2636591074166734, + "grad_norm": 0.7631552219390869, + "learning_rate": 2.2607647097799368e-05, + "loss": 2.4152, + "step": 15658 + }, + { + "epoch": 1.2637398111532563, + "grad_norm": 0.6793624758720398, + "learning_rate": 2.2597650550436777e-05, + "loss": 2.3491, + "step": 15659 + }, + { + "epoch": 1.2638205148898394, + "grad_norm": 0.6465637683868408, + "learning_rate": 2.2587655932143083e-05, + "loss": 2.3774, + "step": 15660 + }, + { + "epoch": 1.2639012186264225, + "grad_norm": 0.6920284628868103, + "learning_rate": 2.2577663243167368e-05, + "loss": 2.4321, + "step": 15661 + }, + { + "epoch": 1.2639819223630053, + "grad_norm": 0.6922522783279419, + "learning_rate": 2.256767248375866e-05, + "loss": 2.4242, + "step": 15662 + }, + { + "epoch": 1.2640626260995884, + "grad_norm": 0.6811214089393616, + "learning_rate": 2.255768365416595e-05, + "loss": 2.4101, + "step": 15663 + }, + { + "epoch": 1.2641433298361715, + "grad_norm": 0.6704947352409363, + "learning_rate": 2.2547696754638238e-05, + "loss": 2.4792, + "step": 15664 + }, + { + "epoch": 1.2642240335727544, + "grad_norm": 0.6814701557159424, + "learning_rate": 2.2537711785424354e-05, + "loss": 2.4429, + "step": 15665 + }, + { + "epoch": 1.2643047373093375, + "grad_norm": 0.6778244972229004, + "learning_rate": 2.252772874677318e-05, + "loss": 2.3882, + "step": 15666 + }, + { + "epoch": 1.2643854410459205, + "grad_norm": 0.6570093035697937, + "learning_rate": 2.2517747638933518e-05, + "loss": 2.4162, + "step": 15667 + }, + { + "epoch": 1.2644661447825034, + "grad_norm": 0.6973466873168945, + "learning_rate": 2.2507768462154133e-05, + "loss": 2.3646, + "step": 15668 + }, + { + "epoch": 1.2645468485190865, + "grad_norm": 0.7258623242378235, + "learning_rate": 2.2497791216683715e-05, + "loss": 2.404, + "step": 15669 + }, + { + "epoch": 1.2646275522556694, + "grad_norm": 0.7462170124053955, + "learning_rate": 2.248781590277097e-05, + "loss": 2.5076, + "step": 15670 + }, + { + "epoch": 1.2647082559922525, + "grad_norm": 0.7070441246032715, + "learning_rate": 2.247784252066444e-05, + "loss": 2.3817, + "step": 15671 + }, + { + "epoch": 1.2647889597288353, + "grad_norm": 0.7150183916091919, + "learning_rate": 2.246787107061272e-05, + "loss": 2.461, + "step": 15672 + }, + { + "epoch": 1.2648696634654184, + "grad_norm": 0.668436586856842, + "learning_rate": 2.2457901552864347e-05, + "loss": 2.466, + "step": 15673 + }, + { + "epoch": 1.2649503672020015, + "grad_norm": 0.7011097073554993, + "learning_rate": 2.2447933967667745e-05, + "loss": 2.4582, + "step": 15674 + }, + { + "epoch": 1.2650310709385844, + "grad_norm": 0.7149096727371216, + "learning_rate": 2.243796831527134e-05, + "loss": 2.4461, + "step": 15675 + }, + { + "epoch": 1.2651117746751674, + "grad_norm": 0.6810914278030396, + "learning_rate": 2.2428004595923525e-05, + "loss": 2.4043, + "step": 15676 + }, + { + "epoch": 1.2651924784117505, + "grad_norm": 0.7700765132904053, + "learning_rate": 2.241804280987261e-05, + "loss": 2.4197, + "step": 15677 + }, + { + "epoch": 1.2652731821483334, + "grad_norm": 0.6897448897361755, + "learning_rate": 2.240808295736686e-05, + "loss": 2.4052, + "step": 15678 + }, + { + "epoch": 1.2653538858849165, + "grad_norm": 0.7092932462692261, + "learning_rate": 2.2398125038654515e-05, + "loss": 2.4088, + "step": 15679 + }, + { + "epoch": 1.2654345896214996, + "grad_norm": 0.6930294632911682, + "learning_rate": 2.2388169053983777e-05, + "loss": 2.4504, + "step": 15680 + }, + { + "epoch": 1.2655152933580824, + "grad_norm": 0.7056782245635986, + "learning_rate": 2.237821500360271e-05, + "loss": 2.3975, + "step": 15681 + }, + { + "epoch": 1.2655959970946655, + "grad_norm": 0.651772141456604, + "learning_rate": 2.236826288775944e-05, + "loss": 2.3941, + "step": 15682 + }, + { + "epoch": 1.2656767008312486, + "grad_norm": 0.7254980206489563, + "learning_rate": 2.2358312706702012e-05, + "loss": 2.4149, + "step": 15683 + }, + { + "epoch": 1.2657574045678315, + "grad_norm": 0.6553635597229004, + "learning_rate": 2.2348364460678373e-05, + "loss": 2.4099, + "step": 15684 + }, + { + "epoch": 1.2658381083044146, + "grad_norm": 0.6952616572380066, + "learning_rate": 2.233841814993646e-05, + "loss": 2.384, + "step": 15685 + }, + { + "epoch": 1.2659188120409974, + "grad_norm": 0.72947096824646, + "learning_rate": 2.2328473774724178e-05, + "loss": 2.5033, + "step": 15686 + }, + { + "epoch": 1.2659995157775805, + "grad_norm": 0.7419683933258057, + "learning_rate": 2.231853133528937e-05, + "loss": 2.4881, + "step": 15687 + }, + { + "epoch": 1.2660802195141634, + "grad_norm": 0.7125211358070374, + "learning_rate": 2.2308590831879827e-05, + "loss": 2.4334, + "step": 15688 + }, + { + "epoch": 1.2661609232507465, + "grad_norm": 0.6668617129325867, + "learning_rate": 2.2298652264743315e-05, + "loss": 2.4144, + "step": 15689 + }, + { + "epoch": 1.2662416269873296, + "grad_norm": 0.8075512051582336, + "learning_rate": 2.2288715634127465e-05, + "loss": 2.421, + "step": 15690 + }, + { + "epoch": 1.2663223307239124, + "grad_norm": 0.6894629001617432, + "learning_rate": 2.2278780940279965e-05, + "loss": 2.4142, + "step": 15691 + }, + { + "epoch": 1.2664030344604955, + "grad_norm": 0.7418074011802673, + "learning_rate": 2.226884818344841e-05, + "loss": 2.4214, + "step": 15692 + }, + { + "epoch": 1.2664837381970786, + "grad_norm": 0.6724219918251038, + "learning_rate": 2.225891736388037e-05, + "loss": 2.4455, + "step": 15693 + }, + { + "epoch": 1.2665644419336615, + "grad_norm": 0.7202882766723633, + "learning_rate": 2.224898848182331e-05, + "loss": 2.4017, + "step": 15694 + }, + { + "epoch": 1.2666451456702446, + "grad_norm": 0.7671259641647339, + "learning_rate": 2.2239061537524698e-05, + "loss": 2.4386, + "step": 15695 + }, + { + "epoch": 1.2667258494068276, + "grad_norm": 0.7154317498207092, + "learning_rate": 2.222913653123194e-05, + "loss": 2.3754, + "step": 15696 + }, + { + "epoch": 1.2668065531434105, + "grad_norm": 0.7203264236450195, + "learning_rate": 2.221921346319239e-05, + "loss": 2.3926, + "step": 15697 + }, + { + "epoch": 1.2668872568799936, + "grad_norm": 0.7104187607765198, + "learning_rate": 2.2209292333653365e-05, + "loss": 2.4528, + "step": 15698 + }, + { + "epoch": 1.2669679606165767, + "grad_norm": 0.7650138139724731, + "learning_rate": 2.2199373142862158e-05, + "loss": 2.4372, + "step": 15699 + }, + { + "epoch": 1.2670486643531595, + "grad_norm": 0.6796044111251831, + "learning_rate": 2.2189455891065903e-05, + "loss": 2.415, + "step": 15700 + }, + { + "epoch": 1.2671293680897426, + "grad_norm": 0.6749297380447388, + "learning_rate": 2.2179540578511813e-05, + "loss": 2.4337, + "step": 15701 + }, + { + "epoch": 1.2672100718263255, + "grad_norm": 0.7330272793769836, + "learning_rate": 2.216962720544703e-05, + "loss": 2.4322, + "step": 15702 + }, + { + "epoch": 1.2672907755629086, + "grad_norm": 0.6793510913848877, + "learning_rate": 2.215971577211855e-05, + "loss": 2.4473, + "step": 15703 + }, + { + "epoch": 1.2673714792994915, + "grad_norm": 0.7477267384529114, + "learning_rate": 2.2149806278773433e-05, + "loss": 2.4699, + "step": 15704 + }, + { + "epoch": 1.2674521830360745, + "grad_norm": 0.7048643827438354, + "learning_rate": 2.213989872565867e-05, + "loss": 2.4341, + "step": 15705 + }, + { + "epoch": 1.2675328867726576, + "grad_norm": 0.647433340549469, + "learning_rate": 2.2129993113021108e-05, + "loss": 2.423, + "step": 15706 + }, + { + "epoch": 1.2676135905092405, + "grad_norm": 0.6886507272720337, + "learning_rate": 2.2120089441107706e-05, + "loss": 2.4185, + "step": 15707 + }, + { + "epoch": 1.2676942942458236, + "grad_norm": 0.6720516085624695, + "learning_rate": 2.2110187710165242e-05, + "loss": 2.4587, + "step": 15708 + }, + { + "epoch": 1.2677749979824067, + "grad_norm": 0.676665723323822, + "learning_rate": 2.2100287920440543e-05, + "loss": 2.4241, + "step": 15709 + }, + { + "epoch": 1.2678557017189895, + "grad_norm": 0.6939559578895569, + "learning_rate": 2.209039007218028e-05, + "loss": 2.3974, + "step": 15710 + }, + { + "epoch": 1.2679364054555726, + "grad_norm": 0.6485786437988281, + "learning_rate": 2.2080494165631137e-05, + "loss": 2.4041, + "step": 15711 + }, + { + "epoch": 1.2680171091921557, + "grad_norm": 0.668319582939148, + "learning_rate": 2.2070600201039802e-05, + "loss": 2.4705, + "step": 15712 + }, + { + "epoch": 1.2680978129287386, + "grad_norm": 0.6837478280067444, + "learning_rate": 2.206070817865279e-05, + "loss": 2.4474, + "step": 15713 + }, + { + "epoch": 1.2681785166653217, + "grad_norm": 0.7000131011009216, + "learning_rate": 2.2050818098716664e-05, + "loss": 2.4463, + "step": 15714 + }, + { + "epoch": 1.2682592204019045, + "grad_norm": 0.7063068151473999, + "learning_rate": 2.204092996147794e-05, + "loss": 2.4226, + "step": 15715 + }, + { + "epoch": 1.2683399241384876, + "grad_norm": 0.6497172117233276, + "learning_rate": 2.2031043767183003e-05, + "loss": 2.3678, + "step": 15716 + }, + { + "epoch": 1.2684206278750705, + "grad_norm": 0.6558645963668823, + "learning_rate": 2.2021159516078262e-05, + "loss": 2.4021, + "step": 15717 + }, + { + "epoch": 1.2685013316116536, + "grad_norm": 0.7411713600158691, + "learning_rate": 2.2011277208410062e-05, + "loss": 2.4346, + "step": 15718 + }, + { + "epoch": 1.2685820353482367, + "grad_norm": 0.7275578379631042, + "learning_rate": 2.2001396844424714e-05, + "loss": 2.4262, + "step": 15719 + }, + { + "epoch": 1.2686627390848195, + "grad_norm": 0.7010936141014099, + "learning_rate": 2.199151842436844e-05, + "loss": 2.4774, + "step": 15720 + }, + { + "epoch": 1.2687434428214026, + "grad_norm": 0.7551137208938599, + "learning_rate": 2.1981641948487462e-05, + "loss": 2.5286, + "step": 15721 + }, + { + "epoch": 1.2688241465579857, + "grad_norm": 0.6510799527168274, + "learning_rate": 2.1971767417027888e-05, + "loss": 2.3813, + "step": 15722 + }, + { + "epoch": 1.2689048502945686, + "grad_norm": 0.636050283908844, + "learning_rate": 2.196189483023584e-05, + "loss": 2.4226, + "step": 15723 + }, + { + "epoch": 1.2689855540311517, + "grad_norm": 0.6939265131950378, + "learning_rate": 2.1952024188357368e-05, + "loss": 2.4516, + "step": 15724 + }, + { + "epoch": 1.2690662577677347, + "grad_norm": 0.6715239882469177, + "learning_rate": 2.1942155491638494e-05, + "loss": 2.4358, + "step": 15725 + }, + { + "epoch": 1.2691469615043176, + "grad_norm": 0.740680456161499, + "learning_rate": 2.1932288740325123e-05, + "loss": 2.4135, + "step": 15726 + }, + { + "epoch": 1.2692276652409007, + "grad_norm": 0.6969335079193115, + "learning_rate": 2.1922423934663193e-05, + "loss": 2.43, + "step": 15727 + }, + { + "epoch": 1.2693083689774838, + "grad_norm": 0.6390758156776428, + "learning_rate": 2.1912561074898554e-05, + "loss": 2.4492, + "step": 15728 + }, + { + "epoch": 1.2693890727140666, + "grad_norm": 0.7129701375961304, + "learning_rate": 2.190270016127701e-05, + "loss": 2.3799, + "step": 15729 + }, + { + "epoch": 1.2694697764506497, + "grad_norm": 0.7309553027153015, + "learning_rate": 2.1892841194044332e-05, + "loss": 2.4955, + "step": 15730 + }, + { + "epoch": 1.2695504801872326, + "grad_norm": 0.7257225513458252, + "learning_rate": 2.1882984173446252e-05, + "loss": 2.4184, + "step": 15731 + }, + { + "epoch": 1.2696311839238157, + "grad_norm": 0.7434510588645935, + "learning_rate": 2.1873129099728384e-05, + "loss": 2.453, + "step": 15732 + }, + { + "epoch": 1.2697118876603986, + "grad_norm": 0.6643160581588745, + "learning_rate": 2.1863275973136356e-05, + "loss": 2.3619, + "step": 15733 + }, + { + "epoch": 1.2697925913969816, + "grad_norm": 0.6677344441413879, + "learning_rate": 2.1853424793915778e-05, + "loss": 2.406, + "step": 15734 + }, + { + "epoch": 1.2698732951335647, + "grad_norm": 0.760028064250946, + "learning_rate": 2.1843575562312092e-05, + "loss": 2.5479, + "step": 15735 + }, + { + "epoch": 1.2699539988701476, + "grad_norm": 0.6668389439582825, + "learning_rate": 2.183372827857082e-05, + "loss": 2.4104, + "step": 15736 + }, + { + "epoch": 1.2700347026067307, + "grad_norm": 0.651155412197113, + "learning_rate": 2.182388294293736e-05, + "loss": 2.3738, + "step": 15737 + }, + { + "epoch": 1.2701154063433138, + "grad_norm": 0.736907958984375, + "learning_rate": 2.1814039555657084e-05, + "loss": 2.4179, + "step": 15738 + }, + { + "epoch": 1.2701961100798966, + "grad_norm": 0.7068225741386414, + "learning_rate": 2.180419811697534e-05, + "loss": 2.3911, + "step": 15739 + }, + { + "epoch": 1.2702768138164797, + "grad_norm": 0.6959261894226074, + "learning_rate": 2.1794358627137368e-05, + "loss": 2.452, + "step": 15740 + }, + { + "epoch": 1.2703575175530628, + "grad_norm": 0.6886181235313416, + "learning_rate": 2.1784521086388442e-05, + "loss": 2.4166, + "step": 15741 + }, + { + "epoch": 1.2704382212896457, + "grad_norm": 0.6494541168212891, + "learning_rate": 2.177468549497369e-05, + "loss": 2.3589, + "step": 15742 + }, + { + "epoch": 1.2705189250262288, + "grad_norm": 0.7008326649665833, + "learning_rate": 2.1764851853138247e-05, + "loss": 2.3697, + "step": 15743 + }, + { + "epoch": 1.2705996287628119, + "grad_norm": 0.6800456643104553, + "learning_rate": 2.1755020161127238e-05, + "loss": 2.4162, + "step": 15744 + }, + { + "epoch": 1.2706803324993947, + "grad_norm": 0.6836018562316895, + "learning_rate": 2.1745190419185634e-05, + "loss": 2.3977, + "step": 15745 + }, + { + "epoch": 1.2707610362359778, + "grad_norm": 0.6489691138267517, + "learning_rate": 2.173536262755844e-05, + "loss": 2.464, + "step": 15746 + }, + { + "epoch": 1.2708417399725607, + "grad_norm": 0.7309786677360535, + "learning_rate": 2.172553678649061e-05, + "loss": 2.4065, + "step": 15747 + }, + { + "epoch": 1.2709224437091438, + "grad_norm": 0.6752686500549316, + "learning_rate": 2.1715712896227004e-05, + "loss": 2.3935, + "step": 15748 + }, + { + "epoch": 1.2710031474457266, + "grad_norm": 0.7039850354194641, + "learning_rate": 2.1705890957012465e-05, + "loss": 2.4605, + "step": 15749 + }, + { + "epoch": 1.2710838511823097, + "grad_norm": 0.6904652714729309, + "learning_rate": 2.169607096909182e-05, + "loss": 2.4264, + "step": 15750 + }, + { + "epoch": 1.2711645549188928, + "grad_norm": 0.7104331254959106, + "learning_rate": 2.168625293270974e-05, + "loss": 2.378, + "step": 15751 + }, + { + "epoch": 1.2712452586554757, + "grad_norm": 0.6732800602912903, + "learning_rate": 2.167643684811096e-05, + "loss": 2.4216, + "step": 15752 + }, + { + "epoch": 1.2713259623920588, + "grad_norm": 0.7207335829734802, + "learning_rate": 2.166662271554011e-05, + "loss": 2.3861, + "step": 15753 + }, + { + "epoch": 1.2714066661286418, + "grad_norm": 0.7561055421829224, + "learning_rate": 2.1656810535241813e-05, + "loss": 2.4753, + "step": 15754 + }, + { + "epoch": 1.2714873698652247, + "grad_norm": 0.7018210887908936, + "learning_rate": 2.1647000307460564e-05, + "loss": 2.401, + "step": 15755 + }, + { + "epoch": 1.2715680736018078, + "grad_norm": 0.6908013224601746, + "learning_rate": 2.163719203244089e-05, + "loss": 2.4451, + "step": 15756 + }, + { + "epoch": 1.2716487773383909, + "grad_norm": 0.734909176826477, + "learning_rate": 2.162738571042723e-05, + "loss": 2.4221, + "step": 15757 + }, + { + "epoch": 1.2717294810749737, + "grad_norm": 0.7047279477119446, + "learning_rate": 2.1617581341663973e-05, + "loss": 2.4149, + "step": 15758 + }, + { + "epoch": 1.2718101848115568, + "grad_norm": 0.6875640749931335, + "learning_rate": 2.1607778926395496e-05, + "loss": 2.3874, + "step": 15759 + }, + { + "epoch": 1.2718908885481397, + "grad_norm": 0.7300851345062256, + "learning_rate": 2.159797846486611e-05, + "loss": 2.4706, + "step": 15760 + }, + { + "epoch": 1.2719715922847228, + "grad_norm": 0.733775794506073, + "learning_rate": 2.1588179957320022e-05, + "loss": 2.4208, + "step": 15761 + }, + { + "epoch": 1.2720522960213057, + "grad_norm": 0.8375213742256165, + "learning_rate": 2.1578383404001458e-05, + "loss": 2.4672, + "step": 15762 + }, + { + "epoch": 1.2721329997578887, + "grad_norm": 0.7276780009269714, + "learning_rate": 2.15685888051546e-05, + "loss": 2.4536, + "step": 15763 + }, + { + "epoch": 1.2722137034944718, + "grad_norm": 0.7765224575996399, + "learning_rate": 2.1558796161023508e-05, + "loss": 2.3671, + "step": 15764 + }, + { + "epoch": 1.2722944072310547, + "grad_norm": 0.7225642204284668, + "learning_rate": 2.1549005471852256e-05, + "loss": 2.4316, + "step": 15765 + }, + { + "epoch": 1.2723751109676378, + "grad_norm": 0.6959484219551086, + "learning_rate": 2.1539216737884904e-05, + "loss": 2.4581, + "step": 15766 + }, + { + "epoch": 1.2724558147042209, + "grad_norm": 0.6943621039390564, + "learning_rate": 2.1529429959365332e-05, + "loss": 2.4372, + "step": 15767 + }, + { + "epoch": 1.2725365184408037, + "grad_norm": 0.7067148089408875, + "learning_rate": 2.151964513653746e-05, + "loss": 2.431, + "step": 15768 + }, + { + "epoch": 1.2726172221773868, + "grad_norm": 0.8317076563835144, + "learning_rate": 2.150986226964521e-05, + "loss": 2.4177, + "step": 15769 + }, + { + "epoch": 1.27269792591397, + "grad_norm": 0.7390087246894836, + "learning_rate": 2.150008135893239e-05, + "loss": 2.4711, + "step": 15770 + }, + { + "epoch": 1.2727786296505528, + "grad_norm": 0.6829150915145874, + "learning_rate": 2.1490302404642725e-05, + "loss": 2.4477, + "step": 15771 + }, + { + "epoch": 1.2728593333871359, + "grad_norm": 0.7355613708496094, + "learning_rate": 2.148052540701995e-05, + "loss": 2.493, + "step": 15772 + }, + { + "epoch": 1.272940037123719, + "grad_norm": 0.6872289776802063, + "learning_rate": 2.1470750366307747e-05, + "loss": 2.4363, + "step": 15773 + }, + { + "epoch": 1.2730207408603018, + "grad_norm": 0.7753220796585083, + "learning_rate": 2.1460977282749705e-05, + "loss": 2.4376, + "step": 15774 + }, + { + "epoch": 1.273101444596885, + "grad_norm": 0.6717056632041931, + "learning_rate": 2.145120615658942e-05, + "loss": 2.4383, + "step": 15775 + }, + { + "epoch": 1.2731821483334678, + "grad_norm": 0.7441569566726685, + "learning_rate": 2.1441436988070428e-05, + "loss": 2.462, + "step": 15776 + }, + { + "epoch": 1.2732628520700509, + "grad_norm": 0.6824371814727783, + "learning_rate": 2.143166977743615e-05, + "loss": 2.4173, + "step": 15777 + }, + { + "epoch": 1.2733435558066337, + "grad_norm": 0.7310225963592529, + "learning_rate": 2.1421904524930038e-05, + "loss": 2.4222, + "step": 15778 + }, + { + "epoch": 1.2734242595432168, + "grad_norm": 0.7198066115379333, + "learning_rate": 2.141214123079548e-05, + "loss": 2.4262, + "step": 15779 + }, + { + "epoch": 1.2735049632798, + "grad_norm": 0.7081776857376099, + "learning_rate": 2.1402379895275783e-05, + "loss": 2.4473, + "step": 15780 + }, + { + "epoch": 1.2735856670163828, + "grad_norm": 0.6909368634223938, + "learning_rate": 2.1392620518614235e-05, + "loss": 2.4528, + "step": 15781 + }, + { + "epoch": 1.2736663707529658, + "grad_norm": 0.7170675992965698, + "learning_rate": 2.1382863101054107e-05, + "loss": 2.4214, + "step": 15782 + }, + { + "epoch": 1.273747074489549, + "grad_norm": 0.6992846727371216, + "learning_rate": 2.1373107642838497e-05, + "loss": 2.4397, + "step": 15783 + }, + { + "epoch": 1.2738277782261318, + "grad_norm": 0.7245237231254578, + "learning_rate": 2.1363354144210578e-05, + "loss": 2.373, + "step": 15784 + }, + { + "epoch": 1.273908481962715, + "grad_norm": 0.6929232478141785, + "learning_rate": 2.1353602605413435e-05, + "loss": 2.4297, + "step": 15785 + }, + { + "epoch": 1.273989185699298, + "grad_norm": 0.7243950366973877, + "learning_rate": 2.134385302669013e-05, + "loss": 2.3856, + "step": 15786 + }, + { + "epoch": 1.2740698894358808, + "grad_norm": 0.6712679266929626, + "learning_rate": 2.133410540828359e-05, + "loss": 2.3818, + "step": 15787 + }, + { + "epoch": 1.274150593172464, + "grad_norm": 0.7433474063873291, + "learning_rate": 2.1324359750436774e-05, + "loss": 2.4148, + "step": 15788 + }, + { + "epoch": 1.274231296909047, + "grad_norm": 0.7225894927978516, + "learning_rate": 2.1314616053392577e-05, + "loss": 2.395, + "step": 15789 + }, + { + "epoch": 1.2743120006456299, + "grad_norm": 0.7026889324188232, + "learning_rate": 2.130487431739383e-05, + "loss": 2.4693, + "step": 15790 + }, + { + "epoch": 1.274392704382213, + "grad_norm": 0.6898565292358398, + "learning_rate": 2.1295134542683325e-05, + "loss": 2.3643, + "step": 15791 + }, + { + "epoch": 1.2744734081187958, + "grad_norm": 0.7212820649147034, + "learning_rate": 2.1285396729503826e-05, + "loss": 2.4178, + "step": 15792 + }, + { + "epoch": 1.274554111855379, + "grad_norm": 0.7149149179458618, + "learning_rate": 2.127566087809798e-05, + "loss": 2.4023, + "step": 15793 + }, + { + "epoch": 1.2746348155919618, + "grad_norm": 0.7039671540260315, + "learning_rate": 2.126592698870846e-05, + "loss": 2.4667, + "step": 15794 + }, + { + "epoch": 1.2747155193285449, + "grad_norm": 0.806849479675293, + "learning_rate": 2.1256195061577877e-05, + "loss": 2.4741, + "step": 15795 + }, + { + "epoch": 1.274796223065128, + "grad_norm": 0.7544776797294617, + "learning_rate": 2.124646509694872e-05, + "loss": 2.4258, + "step": 15796 + }, + { + "epoch": 1.2748769268017108, + "grad_norm": 0.6946810483932495, + "learning_rate": 2.1236737095063518e-05, + "loss": 2.4088, + "step": 15797 + }, + { + "epoch": 1.274957630538294, + "grad_norm": 0.7714219093322754, + "learning_rate": 2.1227011056164714e-05, + "loss": 2.4705, + "step": 15798 + }, + { + "epoch": 1.275038334274877, + "grad_norm": 0.6789658665657043, + "learning_rate": 2.121728698049471e-05, + "loss": 2.4692, + "step": 15799 + }, + { + "epoch": 1.2751190380114599, + "grad_norm": 0.7003477215766907, + "learning_rate": 2.120756486829586e-05, + "loss": 2.4437, + "step": 15800 + }, + { + "epoch": 1.275199741748043, + "grad_norm": 0.6802948117256165, + "learning_rate": 2.1197844719810455e-05, + "loss": 2.4002, + "step": 15801 + }, + { + "epoch": 1.275280445484626, + "grad_norm": 0.67823326587677, + "learning_rate": 2.1188126535280773e-05, + "loss": 2.5119, + "step": 15802 + }, + { + "epoch": 1.275361149221209, + "grad_norm": 0.6580843925476074, + "learning_rate": 2.1178410314948972e-05, + "loss": 2.3814, + "step": 15803 + }, + { + "epoch": 1.275441852957792, + "grad_norm": 0.681642472743988, + "learning_rate": 2.1168696059057226e-05, + "loss": 2.4206, + "step": 15804 + }, + { + "epoch": 1.275522556694375, + "grad_norm": 0.7483543753623962, + "learning_rate": 2.1158983767847674e-05, + "loss": 2.4633, + "step": 15805 + }, + { + "epoch": 1.275603260430958, + "grad_norm": 0.6565235257148743, + "learning_rate": 2.11492734415623e-05, + "loss": 2.4145, + "step": 15806 + }, + { + "epoch": 1.275683964167541, + "grad_norm": 0.6606764793395996, + "learning_rate": 2.1139565080443157e-05, + "loss": 2.3935, + "step": 15807 + }, + { + "epoch": 1.275764667904124, + "grad_norm": 0.7915800213813782, + "learning_rate": 2.1129858684732206e-05, + "loss": 2.4288, + "step": 15808 + }, + { + "epoch": 1.275845371640707, + "grad_norm": 0.6763594746589661, + "learning_rate": 2.112015425467133e-05, + "loss": 2.4147, + "step": 15809 + }, + { + "epoch": 1.2759260753772899, + "grad_norm": 0.6886053085327148, + "learning_rate": 2.1110451790502405e-05, + "loss": 2.3798, + "step": 15810 + }, + { + "epoch": 1.276006779113873, + "grad_norm": 0.686122715473175, + "learning_rate": 2.110075129246728e-05, + "loss": 2.3896, + "step": 15811 + }, + { + "epoch": 1.276087482850456, + "grad_norm": 0.6989614367485046, + "learning_rate": 2.109105276080764e-05, + "loss": 2.4533, + "step": 15812 + }, + { + "epoch": 1.276168186587039, + "grad_norm": 0.6818450689315796, + "learning_rate": 2.1081356195765232e-05, + "loss": 2.4012, + "step": 15813 + }, + { + "epoch": 1.276248890323622, + "grad_norm": 0.7492663860321045, + "learning_rate": 2.107166159758176e-05, + "loss": 2.4269, + "step": 15814 + }, + { + "epoch": 1.276329594060205, + "grad_norm": 0.6752359867095947, + "learning_rate": 2.1061968966498767e-05, + "loss": 2.4478, + "step": 15815 + }, + { + "epoch": 1.276410297796788, + "grad_norm": 0.6784162521362305, + "learning_rate": 2.1052278302757854e-05, + "loss": 2.4853, + "step": 15816 + }, + { + "epoch": 1.276491001533371, + "grad_norm": 0.7273215651512146, + "learning_rate": 2.104258960660055e-05, + "loss": 2.4365, + "step": 15817 + }, + { + "epoch": 1.2765717052699541, + "grad_norm": 0.7021621465682983, + "learning_rate": 2.1032902878268323e-05, + "loss": 2.4665, + "step": 15818 + }, + { + "epoch": 1.276652409006537, + "grad_norm": 0.666828989982605, + "learning_rate": 2.102321811800253e-05, + "loss": 2.3922, + "step": 15819 + }, + { + "epoch": 1.27673311274312, + "grad_norm": 0.6780487298965454, + "learning_rate": 2.1013535326044608e-05, + "loss": 2.4072, + "step": 15820 + }, + { + "epoch": 1.276813816479703, + "grad_norm": 0.6474688053131104, + "learning_rate": 2.1003854502635888e-05, + "loss": 2.4145, + "step": 15821 + }, + { + "epoch": 1.276894520216286, + "grad_norm": 0.6712753772735596, + "learning_rate": 2.0994175648017587e-05, + "loss": 2.4349, + "step": 15822 + }, + { + "epoch": 1.2769752239528689, + "grad_norm": 0.6705189943313599, + "learning_rate": 2.098449876243096e-05, + "loss": 2.4376, + "step": 15823 + }, + { + "epoch": 1.277055927689452, + "grad_norm": 0.6794685125350952, + "learning_rate": 2.0974823846117197e-05, + "loss": 2.3717, + "step": 15824 + }, + { + "epoch": 1.277136631426035, + "grad_norm": 0.7145677804946899, + "learning_rate": 2.0965150899317364e-05, + "loss": 2.3829, + "step": 15825 + }, + { + "epoch": 1.277217335162618, + "grad_norm": 0.7043245434761047, + "learning_rate": 2.095547992227257e-05, + "loss": 2.405, + "step": 15826 + }, + { + "epoch": 1.277298038899201, + "grad_norm": 0.7969205379486084, + "learning_rate": 2.0945810915223873e-05, + "loss": 2.4115, + "step": 15827 + }, + { + "epoch": 1.277378742635784, + "grad_norm": 0.657482385635376, + "learning_rate": 2.0936143878412186e-05, + "loss": 2.372, + "step": 15828 + }, + { + "epoch": 1.277459446372367, + "grad_norm": 0.7315167784690857, + "learning_rate": 2.0926478812078466e-05, + "loss": 2.4372, + "step": 15829 + }, + { + "epoch": 1.27754015010895, + "grad_norm": 0.6985061764717102, + "learning_rate": 2.09168157164636e-05, + "loss": 2.3901, + "step": 15830 + }, + { + "epoch": 1.2776208538455331, + "grad_norm": 0.6906184554100037, + "learning_rate": 2.0907154591808408e-05, + "loss": 2.4562, + "step": 15831 + }, + { + "epoch": 1.277701557582116, + "grad_norm": 0.655094563961029, + "learning_rate": 2.0897495438353676e-05, + "loss": 2.451, + "step": 15832 + }, + { + "epoch": 1.277782261318699, + "grad_norm": 0.7663134932518005, + "learning_rate": 2.0887838256340143e-05, + "loss": 2.4634, + "step": 15833 + }, + { + "epoch": 1.2778629650552822, + "grad_norm": 0.7164491415023804, + "learning_rate": 2.087818304600849e-05, + "loss": 2.4624, + "step": 15834 + }, + { + "epoch": 1.277943668791865, + "grad_norm": 0.6962822079658508, + "learning_rate": 2.0868529807599336e-05, + "loss": 2.4325, + "step": 15835 + }, + { + "epoch": 1.2780243725284481, + "grad_norm": 0.702985405921936, + "learning_rate": 2.0858878541353255e-05, + "loss": 2.4219, + "step": 15836 + }, + { + "epoch": 1.278105076265031, + "grad_norm": 0.7605595588684082, + "learning_rate": 2.0849229247510826e-05, + "loss": 2.4201, + "step": 15837 + }, + { + "epoch": 1.278185780001614, + "grad_norm": 0.8479344248771667, + "learning_rate": 2.083958192631249e-05, + "loss": 2.4689, + "step": 15838 + }, + { + "epoch": 1.278266483738197, + "grad_norm": 0.7241235375404358, + "learning_rate": 2.082993657799869e-05, + "loss": 2.4861, + "step": 15839 + }, + { + "epoch": 1.27834718747478, + "grad_norm": 0.7069835066795349, + "learning_rate": 2.0820293202809827e-05, + "loss": 2.3759, + "step": 15840 + }, + { + "epoch": 1.2784278912113631, + "grad_norm": 0.6606370210647583, + "learning_rate": 2.0810651800986237e-05, + "loss": 2.4444, + "step": 15841 + }, + { + "epoch": 1.278508594947946, + "grad_norm": 0.6608174443244934, + "learning_rate": 2.08010123727682e-05, + "loss": 2.4339, + "step": 15842 + }, + { + "epoch": 1.278589298684529, + "grad_norm": 0.751000702381134, + "learning_rate": 2.0791374918396e-05, + "loss": 2.4327, + "step": 15843 + }, + { + "epoch": 1.2786700024211122, + "grad_norm": 0.7223808765411377, + "learning_rate": 2.0781739438109748e-05, + "loss": 2.3573, + "step": 15844 + }, + { + "epoch": 1.278750706157695, + "grad_norm": 0.6872109770774841, + "learning_rate": 2.0772105932149642e-05, + "loss": 2.3973, + "step": 15845 + }, + { + "epoch": 1.2788314098942781, + "grad_norm": 0.6967385411262512, + "learning_rate": 2.0762474400755762e-05, + "loss": 2.4622, + "step": 15846 + }, + { + "epoch": 1.2789121136308612, + "grad_norm": 0.7289159893989563, + "learning_rate": 2.0752844844168163e-05, + "loss": 2.4507, + "step": 15847 + }, + { + "epoch": 1.278992817367444, + "grad_norm": 0.7735978364944458, + "learning_rate": 2.0743217262626802e-05, + "loss": 2.4341, + "step": 15848 + }, + { + "epoch": 1.2790735211040272, + "grad_norm": 0.7209177017211914, + "learning_rate": 2.0733591656371655e-05, + "loss": 2.4024, + "step": 15849 + }, + { + "epoch": 1.2791542248406103, + "grad_norm": 0.6789259314537048, + "learning_rate": 2.0723968025642604e-05, + "loss": 2.3809, + "step": 15850 + }, + { + "epoch": 1.2792349285771931, + "grad_norm": 0.6972812414169312, + "learning_rate": 2.0714346370679495e-05, + "loss": 2.3986, + "step": 15851 + }, + { + "epoch": 1.2793156323137762, + "grad_norm": 0.7144166827201843, + "learning_rate": 2.070472669172213e-05, + "loss": 2.4241, + "step": 15852 + }, + { + "epoch": 1.279396336050359, + "grad_norm": 0.7325223088264465, + "learning_rate": 2.0695108989010282e-05, + "loss": 2.452, + "step": 15853 + }, + { + "epoch": 1.2794770397869422, + "grad_norm": 0.6900116205215454, + "learning_rate": 2.0685493262783608e-05, + "loss": 2.4091, + "step": 15854 + }, + { + "epoch": 1.279557743523525, + "grad_norm": 0.6846197843551636, + "learning_rate": 2.0675879513281758e-05, + "loss": 2.4337, + "step": 15855 + }, + { + "epoch": 1.2796384472601081, + "grad_norm": 0.6901541352272034, + "learning_rate": 2.0666267740744372e-05, + "loss": 2.4586, + "step": 15856 + }, + { + "epoch": 1.2797191509966912, + "grad_norm": 0.6842665672302246, + "learning_rate": 2.0656657945410953e-05, + "loss": 2.4383, + "step": 15857 + }, + { + "epoch": 1.279799854733274, + "grad_norm": 0.7450493574142456, + "learning_rate": 2.0647050127521028e-05, + "loss": 2.4308, + "step": 15858 + }, + { + "epoch": 1.2798805584698572, + "grad_norm": 0.6928436160087585, + "learning_rate": 2.0637444287314033e-05, + "loss": 2.4726, + "step": 15859 + }, + { + "epoch": 1.2799612622064402, + "grad_norm": 0.6539968252182007, + "learning_rate": 2.06278404250294e-05, + "loss": 2.3983, + "step": 15860 + }, + { + "epoch": 1.280041965943023, + "grad_norm": 0.7183163166046143, + "learning_rate": 2.0618238540906444e-05, + "loss": 2.4172, + "step": 15861 + }, + { + "epoch": 1.2801226696796062, + "grad_norm": 0.7070814371109009, + "learning_rate": 2.0608638635184507e-05, + "loss": 2.4018, + "step": 15862 + }, + { + "epoch": 1.2802033734161893, + "grad_norm": 0.7589142918586731, + "learning_rate": 2.0599040708102847e-05, + "loss": 2.4175, + "step": 15863 + }, + { + "epoch": 1.2802840771527721, + "grad_norm": 0.6945414543151855, + "learning_rate": 2.0589444759900613e-05, + "loss": 2.4093, + "step": 15864 + }, + { + "epoch": 1.2803647808893552, + "grad_norm": 0.685482919216156, + "learning_rate": 2.0579850790817003e-05, + "loss": 2.4388, + "step": 15865 + }, + { + "epoch": 1.280445484625938, + "grad_norm": 0.7089706063270569, + "learning_rate": 2.0570258801091148e-05, + "loss": 2.3779, + "step": 15866 + }, + { + "epoch": 1.2805261883625212, + "grad_norm": 0.6994217038154602, + "learning_rate": 2.0560668790962046e-05, + "loss": 2.3757, + "step": 15867 + }, + { + "epoch": 1.280606892099104, + "grad_norm": 0.7170232534408569, + "learning_rate": 2.055108076066874e-05, + "loss": 2.4087, + "step": 15868 + }, + { + "epoch": 1.2806875958356871, + "grad_norm": 0.7008751034736633, + "learning_rate": 2.0541494710450206e-05, + "loss": 2.4384, + "step": 15869 + }, + { + "epoch": 1.2807682995722702, + "grad_norm": 0.6795800924301147, + "learning_rate": 2.053191064054527e-05, + "loss": 2.415, + "step": 15870 + }, + { + "epoch": 1.280849003308853, + "grad_norm": 0.6650210022926331, + "learning_rate": 2.0522328551192882e-05, + "loss": 2.4421, + "step": 15871 + }, + { + "epoch": 1.2809297070454362, + "grad_norm": 0.7045374512672424, + "learning_rate": 2.0512748442631858e-05, + "loss": 2.4285, + "step": 15872 + }, + { + "epoch": 1.2810104107820193, + "grad_norm": 0.6585350632667542, + "learning_rate": 2.0503170315100883e-05, + "loss": 2.3806, + "step": 15873 + }, + { + "epoch": 1.2810911145186021, + "grad_norm": 0.7833496332168579, + "learning_rate": 2.0493594168838725e-05, + "loss": 2.4557, + "step": 15874 + }, + { + "epoch": 1.2811718182551852, + "grad_norm": 0.7237457036972046, + "learning_rate": 2.0484020004084048e-05, + "loss": 2.3966, + "step": 15875 + }, + { + "epoch": 1.2812525219917683, + "grad_norm": 0.7416609525680542, + "learning_rate": 2.0474447821075426e-05, + "loss": 2.3729, + "step": 15876 + }, + { + "epoch": 1.2813332257283512, + "grad_norm": 0.7148095369338989, + "learning_rate": 2.046487762005146e-05, + "loss": 2.4163, + "step": 15877 + }, + { + "epoch": 1.2814139294649343, + "grad_norm": 0.670281171798706, + "learning_rate": 2.0455309401250632e-05, + "loss": 2.383, + "step": 15878 + }, + { + "epoch": 1.2814946332015174, + "grad_norm": 0.6968950629234314, + "learning_rate": 2.0445743164911457e-05, + "loss": 2.3967, + "step": 15879 + }, + { + "epoch": 1.2815753369381002, + "grad_norm": 0.783441960811615, + "learning_rate": 2.0436178911272298e-05, + "loss": 2.455, + "step": 15880 + }, + { + "epoch": 1.2816560406746833, + "grad_norm": 0.709032416343689, + "learning_rate": 2.0426616640571518e-05, + "loss": 2.4207, + "step": 15881 + }, + { + "epoch": 1.2817367444112662, + "grad_norm": 0.6727990508079529, + "learning_rate": 2.0417056353047504e-05, + "loss": 2.4115, + "step": 15882 + }, + { + "epoch": 1.2818174481478493, + "grad_norm": 0.7336034774780273, + "learning_rate": 2.0407498048938445e-05, + "loss": 2.43, + "step": 15883 + }, + { + "epoch": 1.2818981518844321, + "grad_norm": 0.7649042010307312, + "learning_rate": 2.0397941728482604e-05, + "loss": 2.4655, + "step": 15884 + }, + { + "epoch": 1.2819788556210152, + "grad_norm": 0.7218052744865417, + "learning_rate": 2.038838739191816e-05, + "loss": 2.4872, + "step": 15885 + }, + { + "epoch": 1.2820595593575983, + "grad_norm": 0.7192350625991821, + "learning_rate": 2.0378835039483178e-05, + "loss": 2.4751, + "step": 15886 + }, + { + "epoch": 1.2821402630941812, + "grad_norm": 0.7059212923049927, + "learning_rate": 2.0369284671415768e-05, + "loss": 2.43, + "step": 15887 + }, + { + "epoch": 1.2822209668307643, + "grad_norm": 0.7387098073959351, + "learning_rate": 2.0359736287953956e-05, + "loss": 2.4281, + "step": 15888 + }, + { + "epoch": 1.2823016705673473, + "grad_norm": 0.7454321980476379, + "learning_rate": 2.035018988933568e-05, + "loss": 2.4372, + "step": 15889 + }, + { + "epoch": 1.2823823743039302, + "grad_norm": 0.6822765469551086, + "learning_rate": 2.034064547579888e-05, + "loss": 2.3728, + "step": 15890 + }, + { + "epoch": 1.2824630780405133, + "grad_norm": 0.6917527914047241, + "learning_rate": 2.0331103047581412e-05, + "loss": 2.3997, + "step": 15891 + }, + { + "epoch": 1.2825437817770964, + "grad_norm": 0.6734376549720764, + "learning_rate": 2.032156260492113e-05, + "loss": 2.4495, + "step": 15892 + }, + { + "epoch": 1.2826244855136792, + "grad_norm": 0.7222443222999573, + "learning_rate": 2.0312024148055776e-05, + "loss": 2.3466, + "step": 15893 + }, + { + "epoch": 1.2827051892502623, + "grad_norm": 0.703714907169342, + "learning_rate": 2.030248767722309e-05, + "loss": 2.4599, + "step": 15894 + }, + { + "epoch": 1.2827858929868454, + "grad_norm": 0.655161440372467, + "learning_rate": 2.029295319266078e-05, + "loss": 2.3896, + "step": 15895 + }, + { + "epoch": 1.2828665967234283, + "grad_norm": 0.6449242234230042, + "learning_rate": 2.028342069460639e-05, + "loss": 2.3511, + "step": 15896 + }, + { + "epoch": 1.2829473004600114, + "grad_norm": 0.6578382849693298, + "learning_rate": 2.027389018329755e-05, + "loss": 2.3678, + "step": 15897 + }, + { + "epoch": 1.2830280041965942, + "grad_norm": 0.7047572731971741, + "learning_rate": 2.0264361658971797e-05, + "loss": 2.4522, + "step": 15898 + }, + { + "epoch": 1.2831087079331773, + "grad_norm": 0.7310267090797424, + "learning_rate": 2.0254835121866554e-05, + "loss": 2.4117, + "step": 15899 + }, + { + "epoch": 1.2831894116697602, + "grad_norm": 0.7020776867866516, + "learning_rate": 2.024531057221927e-05, + "loss": 2.4033, + "step": 15900 + }, + { + "epoch": 1.2832701154063433, + "grad_norm": 0.6967746615409851, + "learning_rate": 2.023578801026733e-05, + "loss": 2.3491, + "step": 15901 + }, + { + "epoch": 1.2833508191429264, + "grad_norm": 0.7062339782714844, + "learning_rate": 2.022626743624807e-05, + "loss": 2.4598, + "step": 15902 + }, + { + "epoch": 1.2834315228795092, + "grad_norm": 0.730625331401825, + "learning_rate": 2.0216748850398748e-05, + "loss": 2.4995, + "step": 15903 + }, + { + "epoch": 1.2835122266160923, + "grad_norm": 0.6634403467178345, + "learning_rate": 2.020723225295662e-05, + "loss": 2.3843, + "step": 15904 + }, + { + "epoch": 1.2835929303526754, + "grad_norm": 0.6924816966056824, + "learning_rate": 2.019771764415883e-05, + "loss": 2.4258, + "step": 15905 + }, + { + "epoch": 1.2836736340892583, + "grad_norm": 0.7127227187156677, + "learning_rate": 2.018820502424251e-05, + "loss": 2.4038, + "step": 15906 + }, + { + "epoch": 1.2837543378258414, + "grad_norm": 0.7108431458473206, + "learning_rate": 2.0178694393444785e-05, + "loss": 2.4571, + "step": 15907 + }, + { + "epoch": 1.2838350415624245, + "grad_norm": 0.7478229999542236, + "learning_rate": 2.016918575200262e-05, + "loss": 2.4526, + "step": 15908 + }, + { + "epoch": 1.2839157452990073, + "grad_norm": 0.65651935338974, + "learning_rate": 2.015967910015303e-05, + "loss": 2.434, + "step": 15909 + }, + { + "epoch": 1.2839964490355904, + "grad_norm": 0.7285312414169312, + "learning_rate": 2.015017443813294e-05, + "loss": 2.3857, + "step": 15910 + }, + { + "epoch": 1.2840771527721733, + "grad_norm": 0.6947231292724609, + "learning_rate": 2.014067176617923e-05, + "loss": 2.4294, + "step": 15911 + }, + { + "epoch": 1.2841578565087564, + "grad_norm": 0.6965867877006531, + "learning_rate": 2.0131171084528744e-05, + "loss": 2.4514, + "step": 15912 + }, + { + "epoch": 1.2842385602453392, + "grad_norm": 0.6962311863899231, + "learning_rate": 2.0121672393418246e-05, + "loss": 2.4391, + "step": 15913 + }, + { + "epoch": 1.2843192639819223, + "grad_norm": 0.6687992215156555, + "learning_rate": 2.01121756930845e-05, + "loss": 2.4266, + "step": 15914 + }, + { + "epoch": 1.2843999677185054, + "grad_norm": 0.7118954658508301, + "learning_rate": 2.0102680983764145e-05, + "loss": 2.3436, + "step": 15915 + }, + { + "epoch": 1.2844806714550883, + "grad_norm": 0.6866199970245361, + "learning_rate": 2.009318826569382e-05, + "loss": 2.3719, + "step": 15916 + }, + { + "epoch": 1.2845613751916714, + "grad_norm": 0.6701404452323914, + "learning_rate": 2.008369753911016e-05, + "loss": 2.4875, + "step": 15917 + }, + { + "epoch": 1.2846420789282544, + "grad_norm": 0.7020917534828186, + "learning_rate": 2.007420880424963e-05, + "loss": 2.3871, + "step": 15918 + }, + { + "epoch": 1.2847227826648373, + "grad_norm": 0.6865704655647278, + "learning_rate": 2.006472206134875e-05, + "loss": 2.3815, + "step": 15919 + }, + { + "epoch": 1.2848034864014204, + "grad_norm": 0.7106871008872986, + "learning_rate": 2.0055237310643948e-05, + "loss": 2.4276, + "step": 15920 + }, + { + "epoch": 1.2848841901380035, + "grad_norm": 0.6891976594924927, + "learning_rate": 2.004575455237161e-05, + "loss": 2.3641, + "step": 15921 + }, + { + "epoch": 1.2849648938745863, + "grad_norm": 0.6385056972503662, + "learning_rate": 2.0036273786768067e-05, + "loss": 2.3898, + "step": 15922 + }, + { + "epoch": 1.2850455976111694, + "grad_norm": 0.7038321495056152, + "learning_rate": 2.0026795014069633e-05, + "loss": 2.4688, + "step": 15923 + }, + { + "epoch": 1.2851263013477525, + "grad_norm": 0.6310208439826965, + "learning_rate": 2.0017318234512494e-05, + "loss": 2.3821, + "step": 15924 + }, + { + "epoch": 1.2852070050843354, + "grad_norm": 0.6989426016807556, + "learning_rate": 2.0007843448332865e-05, + "loss": 2.434, + "step": 15925 + }, + { + "epoch": 1.2852877088209185, + "grad_norm": 0.6666426658630371, + "learning_rate": 1.9998370655766886e-05, + "loss": 2.4687, + "step": 15926 + }, + { + "epoch": 1.2853684125575013, + "grad_norm": 0.6421633958816528, + "learning_rate": 1.9988899857050648e-05, + "loss": 2.4269, + "step": 15927 + }, + { + "epoch": 1.2854491162940844, + "grad_norm": 0.7229343056678772, + "learning_rate": 1.997943105242016e-05, + "loss": 2.4139, + "step": 15928 + }, + { + "epoch": 1.2855298200306673, + "grad_norm": 0.7168964743614197, + "learning_rate": 1.9969964242111427e-05, + "loss": 2.405, + "step": 15929 + }, + { + "epoch": 1.2856105237672504, + "grad_norm": 0.6824480891227722, + "learning_rate": 1.99604994263604e-05, + "loss": 2.3955, + "step": 15930 + }, + { + "epoch": 1.2856912275038335, + "grad_norm": 0.670956552028656, + "learning_rate": 1.995103660540294e-05, + "loss": 2.3743, + "step": 15931 + }, + { + "epoch": 1.2857719312404163, + "grad_norm": 0.7057971954345703, + "learning_rate": 1.9941575779474864e-05, + "loss": 2.4496, + "step": 15932 + }, + { + "epoch": 1.2858526349769994, + "grad_norm": 0.7802264094352722, + "learning_rate": 1.9932116948812052e-05, + "loss": 2.4231, + "step": 15933 + }, + { + "epoch": 1.2859333387135825, + "grad_norm": 0.7151160836219788, + "learning_rate": 1.992266011365016e-05, + "loss": 2.4319, + "step": 15934 + }, + { + "epoch": 1.2860140424501654, + "grad_norm": 0.7078769207000732, + "learning_rate": 1.991320527422489e-05, + "loss": 2.4037, + "step": 15935 + }, + { + "epoch": 1.2860947461867485, + "grad_norm": 0.7483938336372375, + "learning_rate": 1.9903752430771927e-05, + "loss": 2.4946, + "step": 15936 + }, + { + "epoch": 1.2861754499233315, + "grad_norm": 0.7774620056152344, + "learning_rate": 1.9894301583526808e-05, + "loss": 2.4536, + "step": 15937 + }, + { + "epoch": 1.2862561536599144, + "grad_norm": 0.7311348915100098, + "learning_rate": 1.988485273272509e-05, + "loss": 2.4178, + "step": 15938 + }, + { + "epoch": 1.2863368573964975, + "grad_norm": 0.6821309328079224, + "learning_rate": 1.9875405878602282e-05, + "loss": 2.4851, + "step": 15939 + }, + { + "epoch": 1.2864175611330806, + "grad_norm": 0.7081651091575623, + "learning_rate": 1.9865961021393785e-05, + "loss": 2.4377, + "step": 15940 + }, + { + "epoch": 1.2864982648696635, + "grad_norm": 0.8093439340591431, + "learning_rate": 1.9856518161335014e-05, + "loss": 2.4681, + "step": 15941 + }, + { + "epoch": 1.2865789686062465, + "grad_norm": 0.6769521832466125, + "learning_rate": 1.984707729866131e-05, + "loss": 2.4231, + "step": 15942 + }, + { + "epoch": 1.2866596723428294, + "grad_norm": 0.6973356604576111, + "learning_rate": 1.983763843360795e-05, + "loss": 2.4144, + "step": 15943 + }, + { + "epoch": 1.2867403760794125, + "grad_norm": 0.7814682722091675, + "learning_rate": 1.9828201566410197e-05, + "loss": 2.3935, + "step": 15944 + }, + { + "epoch": 1.2868210798159954, + "grad_norm": 0.7545498609542847, + "learning_rate": 1.9818766697303236e-05, + "loss": 2.4136, + "step": 15945 + }, + { + "epoch": 1.2869017835525784, + "grad_norm": 0.7165581583976746, + "learning_rate": 1.9809333826522225e-05, + "loss": 2.3757, + "step": 15946 + }, + { + "epoch": 1.2869824872891615, + "grad_norm": 0.6812456846237183, + "learning_rate": 1.9799902954302208e-05, + "loss": 2.4143, + "step": 15947 + }, + { + "epoch": 1.2870631910257444, + "grad_norm": 0.7231366634368896, + "learning_rate": 1.9790474080878262e-05, + "loss": 2.4837, + "step": 15948 + }, + { + "epoch": 1.2871438947623275, + "grad_norm": 0.690916121006012, + "learning_rate": 1.9781047206485393e-05, + "loss": 2.4513, + "step": 15949 + }, + { + "epoch": 1.2872245984989106, + "grad_norm": 0.6608129143714905, + "learning_rate": 1.9771622331358485e-05, + "loss": 2.3908, + "step": 15950 + }, + { + "epoch": 1.2873053022354934, + "grad_norm": 0.7194501161575317, + "learning_rate": 1.976219945573249e-05, + "loss": 2.38, + "step": 15951 + }, + { + "epoch": 1.2873860059720765, + "grad_norm": 0.7315083146095276, + "learning_rate": 1.9752778579842213e-05, + "loss": 2.4351, + "step": 15952 + }, + { + "epoch": 1.2874667097086596, + "grad_norm": 0.7313492298126221, + "learning_rate": 1.974335970392246e-05, + "loss": 2.3531, + "step": 15953 + }, + { + "epoch": 1.2875474134452425, + "grad_norm": 0.6982418894767761, + "learning_rate": 1.9733942828207985e-05, + "loss": 2.4319, + "step": 15954 + }, + { + "epoch": 1.2876281171818256, + "grad_norm": 0.6664792895317078, + "learning_rate": 1.972452795293347e-05, + "loss": 2.3981, + "step": 15955 + }, + { + "epoch": 1.2877088209184087, + "grad_norm": 0.6849696040153503, + "learning_rate": 1.9715115078333578e-05, + "loss": 2.3952, + "step": 15956 + }, + { + "epoch": 1.2877895246549915, + "grad_norm": 0.7355225086212158, + "learning_rate": 1.9705704204642873e-05, + "loss": 2.4556, + "step": 15957 + }, + { + "epoch": 1.2878702283915746, + "grad_norm": 0.6850876808166504, + "learning_rate": 1.9696295332095906e-05, + "loss": 2.3873, + "step": 15958 + }, + { + "epoch": 1.2879509321281575, + "grad_norm": 0.6449069976806641, + "learning_rate": 1.9686888460927198e-05, + "loss": 2.4226, + "step": 15959 + }, + { + "epoch": 1.2880316358647406, + "grad_norm": 0.7517794966697693, + "learning_rate": 1.967748359137114e-05, + "loss": 2.377, + "step": 15960 + }, + { + "epoch": 1.2881123396013234, + "grad_norm": 0.6861303448677063, + "learning_rate": 1.9668080723662162e-05, + "loss": 2.4451, + "step": 15961 + }, + { + "epoch": 1.2881930433379065, + "grad_norm": 0.7025154829025269, + "learning_rate": 1.9658679858034602e-05, + "loss": 2.3856, + "step": 15962 + }, + { + "epoch": 1.2882737470744896, + "grad_norm": 0.6775577068328857, + "learning_rate": 1.964928099472275e-05, + "loss": 2.4383, + "step": 15963 + }, + { + "epoch": 1.2883544508110725, + "grad_norm": 0.6889605522155762, + "learning_rate": 1.963988413396086e-05, + "loss": 2.3766, + "step": 15964 + }, + { + "epoch": 1.2884351545476556, + "grad_norm": 0.6697166562080383, + "learning_rate": 1.9630489275983156e-05, + "loss": 2.44, + "step": 15965 + }, + { + "epoch": 1.2885158582842386, + "grad_norm": 0.6895437836647034, + "learning_rate": 1.96210964210237e-05, + "loss": 2.4242, + "step": 15966 + }, + { + "epoch": 1.2885965620208215, + "grad_norm": 0.6955164670944214, + "learning_rate": 1.9611705569316652e-05, + "loss": 2.3915, + "step": 15967 + }, + { + "epoch": 1.2886772657574046, + "grad_norm": 0.7133461236953735, + "learning_rate": 1.960231672109605e-05, + "loss": 2.4307, + "step": 15968 + }, + { + "epoch": 1.2887579694939877, + "grad_norm": 0.6874761581420898, + "learning_rate": 1.9592929876595857e-05, + "loss": 2.4371, + "step": 15969 + }, + { + "epoch": 1.2888386732305706, + "grad_norm": 0.7168406248092651, + "learning_rate": 1.9583545036050044e-05, + "loss": 2.4681, + "step": 15970 + }, + { + "epoch": 1.2889193769671536, + "grad_norm": 0.701874852180481, + "learning_rate": 1.9574162199692492e-05, + "loss": 2.4746, + "step": 15971 + }, + { + "epoch": 1.2890000807037365, + "grad_norm": 0.7118390202522278, + "learning_rate": 1.9564781367757058e-05, + "loss": 2.4139, + "step": 15972 + }, + { + "epoch": 1.2890807844403196, + "grad_norm": 0.6597239971160889, + "learning_rate": 1.955540254047753e-05, + "loss": 2.4346, + "step": 15973 + }, + { + "epoch": 1.2891614881769025, + "grad_norm": 0.7461068630218506, + "learning_rate": 1.9546025718087645e-05, + "loss": 2.4331, + "step": 15974 + }, + { + "epoch": 1.2892421919134855, + "grad_norm": 0.6992977857589722, + "learning_rate": 1.953665090082115e-05, + "loss": 2.424, + "step": 15975 + }, + { + "epoch": 1.2893228956500686, + "grad_norm": 0.6674031615257263, + "learning_rate": 1.9527278088911617e-05, + "loss": 2.4545, + "step": 15976 + }, + { + "epoch": 1.2894035993866515, + "grad_norm": 0.7377402782440186, + "learning_rate": 1.9517907282592662e-05, + "loss": 2.4625, + "step": 15977 + }, + { + "epoch": 1.2894843031232346, + "grad_norm": 0.720579206943512, + "learning_rate": 1.950853848209788e-05, + "loss": 2.4073, + "step": 15978 + }, + { + "epoch": 1.2895650068598177, + "grad_norm": 0.7221893668174744, + "learning_rate": 1.9499171687660688e-05, + "loss": 2.4056, + "step": 15979 + }, + { + "epoch": 1.2896457105964005, + "grad_norm": 0.7409725189208984, + "learning_rate": 1.9489806899514574e-05, + "loss": 2.3899, + "step": 15980 + }, + { + "epoch": 1.2897264143329836, + "grad_norm": 0.6946583986282349, + "learning_rate": 1.948044411789296e-05, + "loss": 2.4832, + "step": 15981 + }, + { + "epoch": 1.2898071180695667, + "grad_norm": 0.7031306028366089, + "learning_rate": 1.9471083343029096e-05, + "loss": 2.4265, + "step": 15982 + }, + { + "epoch": 1.2898878218061496, + "grad_norm": 0.660093367099762, + "learning_rate": 1.946172457515637e-05, + "loss": 2.4883, + "step": 15983 + }, + { + "epoch": 1.2899685255427327, + "grad_norm": 0.700641930103302, + "learning_rate": 1.945236781450802e-05, + "loss": 2.4096, + "step": 15984 + }, + { + "epoch": 1.2900492292793158, + "grad_norm": 0.7350760698318481, + "learning_rate": 1.9443013061317205e-05, + "loss": 2.4161, + "step": 15985 + }, + { + "epoch": 1.2901299330158986, + "grad_norm": 0.7567386031150818, + "learning_rate": 1.9433660315817072e-05, + "loss": 2.3978, + "step": 15986 + }, + { + "epoch": 1.2902106367524817, + "grad_norm": 0.7471369504928589, + "learning_rate": 1.9424309578240717e-05, + "loss": 2.4079, + "step": 15987 + }, + { + "epoch": 1.2902913404890646, + "grad_norm": 0.6630815267562866, + "learning_rate": 1.941496084882124e-05, + "loss": 2.4223, + "step": 15988 + }, + { + "epoch": 1.2903720442256477, + "grad_norm": 0.687224268913269, + "learning_rate": 1.940561412779155e-05, + "loss": 2.4413, + "step": 15989 + }, + { + "epoch": 1.2904527479622305, + "grad_norm": 0.6989685297012329, + "learning_rate": 1.9396269415384637e-05, + "loss": 2.3651, + "step": 15990 + }, + { + "epoch": 1.2905334516988136, + "grad_norm": 0.7256720066070557, + "learning_rate": 1.938692671183342e-05, + "loss": 2.4526, + "step": 15991 + }, + { + "epoch": 1.2906141554353967, + "grad_norm": 0.692032516002655, + "learning_rate": 1.9377586017370685e-05, + "loss": 2.3936, + "step": 15992 + }, + { + "epoch": 1.2906948591719796, + "grad_norm": 0.6733511686325073, + "learning_rate": 1.936824733222925e-05, + "loss": 2.4691, + "step": 15993 + }, + { + "epoch": 1.2907755629085627, + "grad_norm": 0.6698563098907471, + "learning_rate": 1.935891065664187e-05, + "loss": 2.3904, + "step": 15994 + }, + { + "epoch": 1.2908562666451457, + "grad_norm": 0.660521388053894, + "learning_rate": 1.934957599084123e-05, + "loss": 2.4647, + "step": 15995 + }, + { + "epoch": 1.2909369703817286, + "grad_norm": 0.6714615821838379, + "learning_rate": 1.9340243335059982e-05, + "loss": 2.403, + "step": 15996 + }, + { + "epoch": 1.2910176741183117, + "grad_norm": 0.726099967956543, + "learning_rate": 1.9330912689530746e-05, + "loss": 2.4101, + "step": 15997 + }, + { + "epoch": 1.2910983778548948, + "grad_norm": 0.6585896015167236, + "learning_rate": 1.932158405448601e-05, + "loss": 2.3813, + "step": 15998 + }, + { + "epoch": 1.2911790815914777, + "grad_norm": 0.7967908382415771, + "learning_rate": 1.9312257430158286e-05, + "loss": 2.4188, + "step": 15999 + }, + { + "epoch": 1.2912597853280607, + "grad_norm": 0.7340367436408997, + "learning_rate": 1.9302932816780063e-05, + "loss": 2.4642, + "step": 16000 + }, + { + "epoch": 1.2912597853280607, + "eval_loss": 2.3791537284851074, + "eval_runtime": 780.6124, + "eval_samples_per_second": 3.356, + "eval_steps_per_second": 0.56, + "step": 16000 + }, + { + "epoch": 1.2913404890646438, + "grad_norm": 0.6778663992881775, + "learning_rate": 1.929361021458367e-05, + "loss": 2.4057, + "step": 16001 + }, + { + "epoch": 1.2914211928012267, + "grad_norm": 0.6982381343841553, + "learning_rate": 1.9284289623801477e-05, + "loss": 2.4376, + "step": 16002 + }, + { + "epoch": 1.2915018965378098, + "grad_norm": 0.6956612467765808, + "learning_rate": 1.927497104466578e-05, + "loss": 2.4485, + "step": 16003 + }, + { + "epoch": 1.2915826002743926, + "grad_norm": 0.6780211925506592, + "learning_rate": 1.9265654477408825e-05, + "loss": 2.4233, + "step": 16004 + }, + { + "epoch": 1.2916633040109757, + "grad_norm": 0.6869028806686401, + "learning_rate": 1.92563399222628e-05, + "loss": 2.4156, + "step": 16005 + }, + { + "epoch": 1.2917440077475586, + "grad_norm": 0.6402696967124939, + "learning_rate": 1.9247027379459848e-05, + "loss": 2.4208, + "step": 16006 + }, + { + "epoch": 1.2918247114841417, + "grad_norm": 0.6868177652359009, + "learning_rate": 1.92377168492321e-05, + "loss": 2.4067, + "step": 16007 + }, + { + "epoch": 1.2919054152207248, + "grad_norm": 0.7152438759803772, + "learning_rate": 1.922840833181152e-05, + "loss": 2.3944, + "step": 16008 + }, + { + "epoch": 1.2919861189573076, + "grad_norm": 0.6467335820198059, + "learning_rate": 1.921910182743015e-05, + "loss": 2.4064, + "step": 16009 + }, + { + "epoch": 1.2920668226938907, + "grad_norm": 0.6918551325798035, + "learning_rate": 1.9209797336319956e-05, + "loss": 2.4457, + "step": 16010 + }, + { + "epoch": 1.2921475264304738, + "grad_norm": 0.7308588027954102, + "learning_rate": 1.920049485871278e-05, + "loss": 2.3785, + "step": 16011 + }, + { + "epoch": 1.2922282301670567, + "grad_norm": 0.6918718814849854, + "learning_rate": 1.9191194394840472e-05, + "loss": 2.4645, + "step": 16012 + }, + { + "epoch": 1.2923089339036398, + "grad_norm": 0.7048078775405884, + "learning_rate": 1.9181895944934848e-05, + "loss": 2.4082, + "step": 16013 + }, + { + "epoch": 1.2923896376402229, + "grad_norm": 0.7175794839859009, + "learning_rate": 1.917259950922763e-05, + "loss": 2.4521, + "step": 16014 + }, + { + "epoch": 1.2924703413768057, + "grad_norm": 0.6895543932914734, + "learning_rate": 1.916330508795051e-05, + "loss": 2.4058, + "step": 16015 + }, + { + "epoch": 1.2925510451133888, + "grad_norm": 0.6951895952224731, + "learning_rate": 1.9154012681335176e-05, + "loss": 2.4274, + "step": 16016 + }, + { + "epoch": 1.2926317488499717, + "grad_norm": 0.6807428598403931, + "learning_rate": 1.9144722289613148e-05, + "loss": 2.4008, + "step": 16017 + }, + { + "epoch": 1.2927124525865548, + "grad_norm": 0.6643410325050354, + "learning_rate": 1.9135433913015997e-05, + "loss": 2.4036, + "step": 16018 + }, + { + "epoch": 1.2927931563231376, + "grad_norm": 0.7283294796943665, + "learning_rate": 1.912614755177522e-05, + "loss": 2.4118, + "step": 16019 + }, + { + "epoch": 1.2928738600597207, + "grad_norm": 0.7516021132469177, + "learning_rate": 1.911686320612227e-05, + "loss": 2.3983, + "step": 16020 + }, + { + "epoch": 1.2929545637963038, + "grad_norm": 0.7314203381538391, + "learning_rate": 1.91075808762885e-05, + "loss": 2.4352, + "step": 16021 + }, + { + "epoch": 1.2930352675328867, + "grad_norm": 0.6904106736183167, + "learning_rate": 1.9098300562505266e-05, + "loss": 2.3734, + "step": 16022 + }, + { + "epoch": 1.2931159712694698, + "grad_norm": 0.6936709880828857, + "learning_rate": 1.9089022265003863e-05, + "loss": 2.4356, + "step": 16023 + }, + { + "epoch": 1.2931966750060528, + "grad_norm": 0.6753442883491516, + "learning_rate": 1.9079745984015528e-05, + "loss": 2.4713, + "step": 16024 + }, + { + "epoch": 1.2932773787426357, + "grad_norm": 0.7185340523719788, + "learning_rate": 1.9070471719771445e-05, + "loss": 2.4021, + "step": 16025 + }, + { + "epoch": 1.2933580824792188, + "grad_norm": 0.7486871480941772, + "learning_rate": 1.9061199472502798e-05, + "loss": 2.4144, + "step": 16026 + }, + { + "epoch": 1.2934387862158019, + "grad_norm": 0.6790735721588135, + "learning_rate": 1.90519292424406e-05, + "loss": 2.413, + "step": 16027 + }, + { + "epoch": 1.2935194899523847, + "grad_norm": 0.7104402780532837, + "learning_rate": 1.9042661029815922e-05, + "loss": 2.452, + "step": 16028 + }, + { + "epoch": 1.2936001936889678, + "grad_norm": 0.6975364685058594, + "learning_rate": 1.9033394834859796e-05, + "loss": 2.4169, + "step": 16029 + }, + { + "epoch": 1.293680897425551, + "grad_norm": 0.7619667649269104, + "learning_rate": 1.9024130657803085e-05, + "loss": 2.4106, + "step": 16030 + }, + { + "epoch": 1.2937616011621338, + "grad_norm": 0.6600254774093628, + "learning_rate": 1.9014868498876716e-05, + "loss": 2.3955, + "step": 16031 + }, + { + "epoch": 1.2938423048987169, + "grad_norm": 0.6790784597396851, + "learning_rate": 1.9005608358311533e-05, + "loss": 2.437, + "step": 16032 + }, + { + "epoch": 1.2939230086352997, + "grad_norm": 0.7085568308830261, + "learning_rate": 1.899635023633828e-05, + "loss": 2.4729, + "step": 16033 + }, + { + "epoch": 1.2940037123718828, + "grad_norm": 0.6940603256225586, + "learning_rate": 1.8987094133187732e-05, + "loss": 2.4099, + "step": 16034 + }, + { + "epoch": 1.2940844161084657, + "grad_norm": 0.7387171387672424, + "learning_rate": 1.897784004909058e-05, + "loss": 2.4509, + "step": 16035 + }, + { + "epoch": 1.2941651198450488, + "grad_norm": 0.8263981938362122, + "learning_rate": 1.8968587984277463e-05, + "loss": 2.4208, + "step": 16036 + }, + { + "epoch": 1.2942458235816319, + "grad_norm": 0.7393552660942078, + "learning_rate": 1.8959337938978937e-05, + "loss": 2.4458, + "step": 16037 + }, + { + "epoch": 1.2943265273182147, + "grad_norm": 0.652787983417511, + "learning_rate": 1.895008991342555e-05, + "loss": 2.3593, + "step": 16038 + }, + { + "epoch": 1.2944072310547978, + "grad_norm": 0.6533015370368958, + "learning_rate": 1.8940843907847817e-05, + "loss": 2.4538, + "step": 16039 + }, + { + "epoch": 1.294487934791381, + "grad_norm": 0.6723785400390625, + "learning_rate": 1.8931599922476106e-05, + "loss": 2.4528, + "step": 16040 + }, + { + "epoch": 1.2945686385279638, + "grad_norm": 0.693242073059082, + "learning_rate": 1.892235795754085e-05, + "loss": 2.4006, + "step": 16041 + }, + { + "epoch": 1.2946493422645469, + "grad_norm": 0.6849604845046997, + "learning_rate": 1.8913118013272403e-05, + "loss": 2.3758, + "step": 16042 + }, + { + "epoch": 1.29473004600113, + "grad_norm": 0.7252739667892456, + "learning_rate": 1.8903880089900983e-05, + "loss": 2.4101, + "step": 16043 + }, + { + "epoch": 1.2948107497377128, + "grad_norm": 0.720431923866272, + "learning_rate": 1.8894644187656864e-05, + "loss": 2.4241, + "step": 16044 + }, + { + "epoch": 1.294891453474296, + "grad_norm": 0.6936169862747192, + "learning_rate": 1.8885410306770225e-05, + "loss": 2.4225, + "step": 16045 + }, + { + "epoch": 1.294972157210879, + "grad_norm": 0.7698646187782288, + "learning_rate": 1.8876178447471193e-05, + "loss": 2.4031, + "step": 16046 + }, + { + "epoch": 1.2950528609474619, + "grad_norm": 0.6800495982170105, + "learning_rate": 1.8866948609989854e-05, + "loss": 2.3679, + "step": 16047 + }, + { + "epoch": 1.295133564684045, + "grad_norm": 0.7348111867904663, + "learning_rate": 1.8857720794556267e-05, + "loss": 2.4263, + "step": 16048 + }, + { + "epoch": 1.2952142684206278, + "grad_norm": 0.6614782214164734, + "learning_rate": 1.8848495001400356e-05, + "loss": 2.4396, + "step": 16049 + }, + { + "epoch": 1.295294972157211, + "grad_norm": 0.6683650612831116, + "learning_rate": 1.8839271230752075e-05, + "loss": 2.4189, + "step": 16050 + }, + { + "epoch": 1.2953756758937938, + "grad_norm": 0.711040198802948, + "learning_rate": 1.8830049482841328e-05, + "loss": 2.3974, + "step": 16051 + }, + { + "epoch": 1.2954563796303769, + "grad_norm": 0.6663193702697754, + "learning_rate": 1.882082975789795e-05, + "loss": 2.4196, + "step": 16052 + }, + { + "epoch": 1.29553708336696, + "grad_norm": 0.6551210284233093, + "learning_rate": 1.881161205615166e-05, + "loss": 2.3793, + "step": 16053 + }, + { + "epoch": 1.2956177871035428, + "grad_norm": 0.6849039793014526, + "learning_rate": 1.8802396377832243e-05, + "loss": 2.3941, + "step": 16054 + }, + { + "epoch": 1.295698490840126, + "grad_norm": 0.7642949223518372, + "learning_rate": 1.8793182723169357e-05, + "loss": 2.4296, + "step": 16055 + }, + { + "epoch": 1.295779194576709, + "grad_norm": 0.7104716897010803, + "learning_rate": 1.878397109239263e-05, + "loss": 2.4124, + "step": 16056 + }, + { + "epoch": 1.2958598983132918, + "grad_norm": 0.6822344064712524, + "learning_rate": 1.877476148573164e-05, + "loss": 2.4072, + "step": 16057 + }, + { + "epoch": 1.295940602049875, + "grad_norm": 0.6824066042900085, + "learning_rate": 1.8765553903415956e-05, + "loss": 2.4137, + "step": 16058 + }, + { + "epoch": 1.296021305786458, + "grad_norm": 0.7083307504653931, + "learning_rate": 1.875634834567498e-05, + "loss": 2.4423, + "step": 16059 + }, + { + "epoch": 1.2961020095230409, + "grad_norm": 0.7301077246665955, + "learning_rate": 1.874714481273818e-05, + "loss": 2.3926, + "step": 16060 + }, + { + "epoch": 1.296182713259624, + "grad_norm": 0.685656726360321, + "learning_rate": 1.873794330483496e-05, + "loss": 2.4409, + "step": 16061 + }, + { + "epoch": 1.296263416996207, + "grad_norm": 0.6916719675064087, + "learning_rate": 1.8728743822194584e-05, + "loss": 2.4141, + "step": 16062 + }, + { + "epoch": 1.29634412073279, + "grad_norm": 0.7188845276832581, + "learning_rate": 1.871954636504636e-05, + "loss": 2.4186, + "step": 16063 + }, + { + "epoch": 1.2964248244693728, + "grad_norm": 0.6637440919876099, + "learning_rate": 1.8710350933619504e-05, + "loss": 2.4526, + "step": 16064 + }, + { + "epoch": 1.2965055282059559, + "grad_norm": 0.7000349760055542, + "learning_rate": 1.87011575281432e-05, + "loss": 2.4096, + "step": 16065 + }, + { + "epoch": 1.296586231942539, + "grad_norm": 0.693513810634613, + "learning_rate": 1.8691966148846573e-05, + "loss": 2.3931, + "step": 16066 + }, + { + "epoch": 1.2966669356791218, + "grad_norm": 0.6928985118865967, + "learning_rate": 1.8682776795958678e-05, + "loss": 2.4384, + "step": 16067 + }, + { + "epoch": 1.296747639415705, + "grad_norm": 0.6474096179008484, + "learning_rate": 1.8673589469708585e-05, + "loss": 2.3985, + "step": 16068 + }, + { + "epoch": 1.296828343152288, + "grad_norm": 0.6827313899993896, + "learning_rate": 1.866440417032521e-05, + "loss": 2.4607, + "step": 16069 + }, + { + "epoch": 1.2969090468888709, + "grad_norm": 0.7183445692062378, + "learning_rate": 1.8655220898037485e-05, + "loss": 2.4396, + "step": 16070 + }, + { + "epoch": 1.296989750625454, + "grad_norm": 0.6997376680374146, + "learning_rate": 1.8646039653074333e-05, + "loss": 2.4627, + "step": 16071 + }, + { + "epoch": 1.297070454362037, + "grad_norm": 0.7358444333076477, + "learning_rate": 1.8636860435664493e-05, + "loss": 2.4165, + "step": 16072 + }, + { + "epoch": 1.29715115809862, + "grad_norm": 0.8126270771026611, + "learning_rate": 1.8627683246036787e-05, + "loss": 2.4681, + "step": 16073 + }, + { + "epoch": 1.297231861835203, + "grad_norm": 0.7364177107810974, + "learning_rate": 1.8618508084419918e-05, + "loss": 2.44, + "step": 16074 + }, + { + "epoch": 1.297312565571786, + "grad_norm": 0.7480010390281677, + "learning_rate": 1.8609334951042567e-05, + "loss": 2.4759, + "step": 16075 + }, + { + "epoch": 1.297393269308369, + "grad_norm": 0.6563693284988403, + "learning_rate": 1.8600163846133335e-05, + "loss": 2.3865, + "step": 16076 + }, + { + "epoch": 1.297473973044952, + "grad_norm": 0.6961230039596558, + "learning_rate": 1.8590994769920832e-05, + "loss": 2.3851, + "step": 16077 + }, + { + "epoch": 1.297554676781535, + "grad_norm": 0.7137415409088135, + "learning_rate": 1.8581827722633527e-05, + "loss": 2.4115, + "step": 16078 + }, + { + "epoch": 1.297635380518118, + "grad_norm": 0.6579335331916809, + "learning_rate": 1.85726627044999e-05, + "loss": 2.4464, + "step": 16079 + }, + { + "epoch": 1.2977160842547009, + "grad_norm": 0.7069905400276184, + "learning_rate": 1.8563499715748366e-05, + "loss": 2.4057, + "step": 16080 + }, + { + "epoch": 1.297796787991284, + "grad_norm": 0.771925687789917, + "learning_rate": 1.8554338756607325e-05, + "loss": 2.4696, + "step": 16081 + }, + { + "epoch": 1.297877491727867, + "grad_norm": 0.7268456816673279, + "learning_rate": 1.8545179827305048e-05, + "loss": 2.3949, + "step": 16082 + }, + { + "epoch": 1.29795819546445, + "grad_norm": 0.7049130797386169, + "learning_rate": 1.8536022928069796e-05, + "loss": 2.4448, + "step": 16083 + }, + { + "epoch": 1.298038899201033, + "grad_norm": 0.6716888546943665, + "learning_rate": 1.852686805912982e-05, + "loss": 2.3356, + "step": 16084 + }, + { + "epoch": 1.298119602937616, + "grad_norm": 0.666386604309082, + "learning_rate": 1.851771522071325e-05, + "loss": 2.4226, + "step": 16085 + }, + { + "epoch": 1.298200306674199, + "grad_norm": 0.7084901332855225, + "learning_rate": 1.8508564413048223e-05, + "loss": 2.4452, + "step": 16086 + }, + { + "epoch": 1.298281010410782, + "grad_norm": 0.6615412831306458, + "learning_rate": 1.8499415636362815e-05, + "loss": 2.4193, + "step": 16087 + }, + { + "epoch": 1.2983617141473651, + "grad_norm": 0.7143606543540955, + "learning_rate": 1.849026889088499e-05, + "loss": 2.4513, + "step": 16088 + }, + { + "epoch": 1.298442417883948, + "grad_norm": 0.7241482734680176, + "learning_rate": 1.8481124176842723e-05, + "loss": 2.458, + "step": 16089 + }, + { + "epoch": 1.298523121620531, + "grad_norm": 0.6762149930000305, + "learning_rate": 1.8471981494463963e-05, + "loss": 2.4386, + "step": 16090 + }, + { + "epoch": 1.2986038253571142, + "grad_norm": 0.6672768592834473, + "learning_rate": 1.8462840843976525e-05, + "loss": 2.375, + "step": 16091 + }, + { + "epoch": 1.298684529093697, + "grad_norm": 0.6871693134307861, + "learning_rate": 1.8453702225608226e-05, + "loss": 2.4342, + "step": 16092 + }, + { + "epoch": 1.2987652328302801, + "grad_norm": 0.6771275401115417, + "learning_rate": 1.8444565639586864e-05, + "loss": 2.402, + "step": 16093 + }, + { + "epoch": 1.298845936566863, + "grad_norm": 0.6627403497695923, + "learning_rate": 1.8435431086140077e-05, + "loss": 2.4667, + "step": 16094 + }, + { + "epoch": 1.298926640303446, + "grad_norm": 0.7001610398292542, + "learning_rate": 1.8426298565495538e-05, + "loss": 2.4396, + "step": 16095 + }, + { + "epoch": 1.299007344040029, + "grad_norm": 0.7574489712715149, + "learning_rate": 1.8417168077880908e-05, + "loss": 2.4601, + "step": 16096 + }, + { + "epoch": 1.299088047776612, + "grad_norm": 0.7771055698394775, + "learning_rate": 1.840803962352372e-05, + "loss": 2.4371, + "step": 16097 + }, + { + "epoch": 1.299168751513195, + "grad_norm": 0.6738649606704712, + "learning_rate": 1.8398913202651457e-05, + "loss": 2.3921, + "step": 16098 + }, + { + "epoch": 1.299249455249778, + "grad_norm": 0.7014862895011902, + "learning_rate": 1.8389788815491583e-05, + "loss": 2.451, + "step": 16099 + }, + { + "epoch": 1.299330158986361, + "grad_norm": 0.7026070952415466, + "learning_rate": 1.8380666462271523e-05, + "loss": 2.4583, + "step": 16100 + }, + { + "epoch": 1.2994108627229441, + "grad_norm": 0.6904535293579102, + "learning_rate": 1.8371546143218588e-05, + "loss": 2.4453, + "step": 16101 + }, + { + "epoch": 1.299491566459527, + "grad_norm": 0.6974804997444153, + "learning_rate": 1.8362427858560093e-05, + "loss": 2.4291, + "step": 16102 + }, + { + "epoch": 1.29957227019611, + "grad_norm": 0.6826989650726318, + "learning_rate": 1.8353311608523326e-05, + "loss": 2.4183, + "step": 16103 + }, + { + "epoch": 1.2996529739326932, + "grad_norm": 0.6804787516593933, + "learning_rate": 1.8344197393335448e-05, + "loss": 2.434, + "step": 16104 + }, + { + "epoch": 1.299733677669276, + "grad_norm": 0.7144587635993958, + "learning_rate": 1.8335085213223613e-05, + "loss": 2.4296, + "step": 16105 + }, + { + "epoch": 1.2998143814058591, + "grad_norm": 0.7228755354881287, + "learning_rate": 1.8325975068414924e-05, + "loss": 2.3987, + "step": 16106 + }, + { + "epoch": 1.2998950851424422, + "grad_norm": 0.7417716383934021, + "learning_rate": 1.8316866959136438e-05, + "loss": 2.4076, + "step": 16107 + }, + { + "epoch": 1.299975788879025, + "grad_norm": 0.6737387776374817, + "learning_rate": 1.8307760885615154e-05, + "loss": 2.4175, + "step": 16108 + }, + { + "epoch": 1.3000564926156082, + "grad_norm": 0.7294918298721313, + "learning_rate": 1.8298656848078035e-05, + "loss": 2.4022, + "step": 16109 + }, + { + "epoch": 1.300137196352191, + "grad_norm": 0.7200861573219299, + "learning_rate": 1.828955484675193e-05, + "loss": 2.4018, + "step": 16110 + }, + { + "epoch": 1.3002179000887741, + "grad_norm": 0.7704176306724548, + "learning_rate": 1.8280454881863718e-05, + "loss": 2.4539, + "step": 16111 + }, + { + "epoch": 1.300298603825357, + "grad_norm": 0.6790730953216553, + "learning_rate": 1.8271356953640184e-05, + "loss": 2.4196, + "step": 16112 + }, + { + "epoch": 1.30037930756194, + "grad_norm": 0.7165740132331848, + "learning_rate": 1.8262261062308096e-05, + "loss": 2.4234, + "step": 16113 + }, + { + "epoch": 1.3004600112985232, + "grad_norm": 0.7716830372810364, + "learning_rate": 1.82531672080941e-05, + "loss": 2.4255, + "step": 16114 + }, + { + "epoch": 1.300540715035106, + "grad_norm": 0.6525317430496216, + "learning_rate": 1.824407539122488e-05, + "loss": 2.4482, + "step": 16115 + }, + { + "epoch": 1.3006214187716891, + "grad_norm": 0.7397769093513489, + "learning_rate": 1.8234985611927003e-05, + "loss": 2.33, + "step": 16116 + }, + { + "epoch": 1.3007021225082722, + "grad_norm": 0.7106032967567444, + "learning_rate": 1.822589787042702e-05, + "loss": 2.485, + "step": 16117 + }, + { + "epoch": 1.300782826244855, + "grad_norm": 0.7030045390129089, + "learning_rate": 1.8216812166951425e-05, + "loss": 2.454, + "step": 16118 + }, + { + "epoch": 1.3008635299814382, + "grad_norm": 0.7075662612915039, + "learning_rate": 1.8207728501726683e-05, + "loss": 2.4589, + "step": 16119 + }, + { + "epoch": 1.3009442337180213, + "grad_norm": 0.6700533032417297, + "learning_rate": 1.819864687497912e-05, + "loss": 2.4398, + "step": 16120 + }, + { + "epoch": 1.3010249374546041, + "grad_norm": 0.6951712369918823, + "learning_rate": 1.8189567286935117e-05, + "loss": 2.3998, + "step": 16121 + }, + { + "epoch": 1.3011056411911872, + "grad_norm": 0.708344578742981, + "learning_rate": 1.818048973782097e-05, + "loss": 2.4142, + "step": 16122 + }, + { + "epoch": 1.30118634492777, + "grad_norm": 0.7078592777252197, + "learning_rate": 1.817141422786287e-05, + "loss": 2.451, + "step": 16123 + }, + { + "epoch": 1.3012670486643532, + "grad_norm": 0.7111849784851074, + "learning_rate": 1.816234075728703e-05, + "loss": 2.4762, + "step": 16124 + }, + { + "epoch": 1.301347752400936, + "grad_norm": 0.6716348528862, + "learning_rate": 1.8153269326319588e-05, + "loss": 2.4373, + "step": 16125 + }, + { + "epoch": 1.3014284561375191, + "grad_norm": 0.6592512130737305, + "learning_rate": 1.8144199935186623e-05, + "loss": 2.412, + "step": 16126 + }, + { + "epoch": 1.3015091598741022, + "grad_norm": 0.6958334445953369, + "learning_rate": 1.8135132584114167e-05, + "loss": 2.4077, + "step": 16127 + }, + { + "epoch": 1.301589863610685, + "grad_norm": 0.6911341547966003, + "learning_rate": 1.8126067273328207e-05, + "loss": 2.409, + "step": 16128 + }, + { + "epoch": 1.3016705673472682, + "grad_norm": 0.676114022731781, + "learning_rate": 1.8117004003054693e-05, + "loss": 2.4463, + "step": 16129 + }, + { + "epoch": 1.3017512710838512, + "grad_norm": 0.6493322849273682, + "learning_rate": 1.810794277351947e-05, + "loss": 2.4377, + "step": 16130 + }, + { + "epoch": 1.3018319748204341, + "grad_norm": 0.6938454508781433, + "learning_rate": 1.8098883584948367e-05, + "loss": 2.4298, + "step": 16131 + }, + { + "epoch": 1.3019126785570172, + "grad_norm": 0.69407719373703, + "learning_rate": 1.8089826437567214e-05, + "loss": 2.4107, + "step": 16132 + }, + { + "epoch": 1.3019933822936003, + "grad_norm": 0.6898862719535828, + "learning_rate": 1.8080771331601664e-05, + "loss": 2.4182, + "step": 16133 + }, + { + "epoch": 1.3020740860301832, + "grad_norm": 0.7377758026123047, + "learning_rate": 1.807171826727744e-05, + "loss": 2.4112, + "step": 16134 + }, + { + "epoch": 1.3021547897667662, + "grad_norm": 0.674057126045227, + "learning_rate": 1.8062667244820154e-05, + "loss": 2.4276, + "step": 16135 + }, + { + "epoch": 1.3022354935033493, + "grad_norm": 0.7087522745132446, + "learning_rate": 1.8053618264455384e-05, + "loss": 2.4338, + "step": 16136 + }, + { + "epoch": 1.3023161972399322, + "grad_norm": 0.70958411693573, + "learning_rate": 1.8044571326408667e-05, + "loss": 2.4369, + "step": 16137 + }, + { + "epoch": 1.3023969009765153, + "grad_norm": 0.7023837566375732, + "learning_rate": 1.803552643090548e-05, + "loss": 2.4185, + "step": 16138 + }, + { + "epoch": 1.3024776047130981, + "grad_norm": 0.708543598651886, + "learning_rate": 1.8026483578171216e-05, + "loss": 2.4053, + "step": 16139 + }, + { + "epoch": 1.3025583084496812, + "grad_norm": 0.748601496219635, + "learning_rate": 1.8017442768431257e-05, + "loss": 2.3948, + "step": 16140 + }, + { + "epoch": 1.302639012186264, + "grad_norm": 0.6626949310302734, + "learning_rate": 1.800840400191096e-05, + "loss": 2.4636, + "step": 16141 + }, + { + "epoch": 1.3027197159228472, + "grad_norm": 0.7079617977142334, + "learning_rate": 1.7999367278835534e-05, + "loss": 2.4091, + "step": 16142 + }, + { + "epoch": 1.3028004196594303, + "grad_norm": 0.7025624513626099, + "learning_rate": 1.7990332599430225e-05, + "loss": 2.3732, + "step": 16143 + }, + { + "epoch": 1.3028811233960131, + "grad_norm": 0.7365758419036865, + "learning_rate": 1.7981299963920205e-05, + "loss": 2.4725, + "step": 16144 + }, + { + "epoch": 1.3029618271325962, + "grad_norm": 0.7511963248252869, + "learning_rate": 1.7972269372530615e-05, + "loss": 2.4304, + "step": 16145 + }, + { + "epoch": 1.3030425308691793, + "grad_norm": 0.7055985331535339, + "learning_rate": 1.796324082548644e-05, + "loss": 2.4259, + "step": 16146 + }, + { + "epoch": 1.3031232346057622, + "grad_norm": 0.691162645816803, + "learning_rate": 1.7954214323012775e-05, + "loss": 2.4262, + "step": 16147 + }, + { + "epoch": 1.3032039383423453, + "grad_norm": 0.7179710268974304, + "learning_rate": 1.7945189865334587e-05, + "loss": 2.4301, + "step": 16148 + }, + { + "epoch": 1.3032846420789284, + "grad_norm": 0.7391623258590698, + "learning_rate": 1.7936167452676744e-05, + "loss": 2.4302, + "step": 16149 + }, + { + "epoch": 1.3033653458155112, + "grad_norm": 0.7297981381416321, + "learning_rate": 1.7927147085264117e-05, + "loss": 2.3911, + "step": 16150 + }, + { + "epoch": 1.3034460495520943, + "grad_norm": 0.7571932673454285, + "learning_rate": 1.7918128763321552e-05, + "loss": 2.4348, + "step": 16151 + }, + { + "epoch": 1.3035267532886774, + "grad_norm": 0.7074765563011169, + "learning_rate": 1.7909112487073754e-05, + "loss": 2.4164, + "step": 16152 + }, + { + "epoch": 1.3036074570252603, + "grad_norm": 0.7534131407737732, + "learning_rate": 1.7900098256745467e-05, + "loss": 2.3784, + "step": 16153 + }, + { + "epoch": 1.3036881607618434, + "grad_norm": 0.675398588180542, + "learning_rate": 1.789108607256136e-05, + "loss": 2.4305, + "step": 16154 + }, + { + "epoch": 1.3037688644984262, + "grad_norm": 0.7099249362945557, + "learning_rate": 1.7882075934746002e-05, + "loss": 2.4053, + "step": 16155 + }, + { + "epoch": 1.3038495682350093, + "grad_norm": 0.6914681196212769, + "learning_rate": 1.787306784352397e-05, + "loss": 2.3902, + "step": 16156 + }, + { + "epoch": 1.3039302719715922, + "grad_norm": 0.6956958770751953, + "learning_rate": 1.786406179911977e-05, + "loss": 2.4026, + "step": 16157 + }, + { + "epoch": 1.3040109757081753, + "grad_norm": 0.6873000860214233, + "learning_rate": 1.7855057801757857e-05, + "loss": 2.4082, + "step": 16158 + }, + { + "epoch": 1.3040916794447583, + "grad_norm": 0.7340587377548218, + "learning_rate": 1.7846055851662625e-05, + "loss": 2.4894, + "step": 16159 + }, + { + "epoch": 1.3041723831813412, + "grad_norm": 0.6956963539123535, + "learning_rate": 1.7837055949058444e-05, + "loss": 2.3976, + "step": 16160 + }, + { + "epoch": 1.3042530869179243, + "grad_norm": 0.7654300332069397, + "learning_rate": 1.782805809416962e-05, + "loss": 2.4272, + "step": 16161 + }, + { + "epoch": 1.3043337906545074, + "grad_norm": 0.7735971212387085, + "learning_rate": 1.7819062287220368e-05, + "loss": 2.4513, + "step": 16162 + }, + { + "epoch": 1.3044144943910903, + "grad_norm": 0.6897203326225281, + "learning_rate": 1.7810068528434908e-05, + "loss": 2.3974, + "step": 16163 + }, + { + "epoch": 1.3044951981276733, + "grad_norm": 0.7328432202339172, + "learning_rate": 1.780107681803741e-05, + "loss": 2.4455, + "step": 16164 + }, + { + "epoch": 1.3045759018642564, + "grad_norm": 0.7098489999771118, + "learning_rate": 1.7792087156251924e-05, + "loss": 2.4173, + "step": 16165 + }, + { + "epoch": 1.3046566056008393, + "grad_norm": 0.6593194007873535, + "learning_rate": 1.7783099543302518e-05, + "loss": 2.4102, + "step": 16166 + }, + { + "epoch": 1.3047373093374224, + "grad_norm": 0.7329291105270386, + "learning_rate": 1.7774113979413188e-05, + "loss": 2.4856, + "step": 16167 + }, + { + "epoch": 1.3048180130740052, + "grad_norm": 0.7033355236053467, + "learning_rate": 1.776513046480788e-05, + "loss": 2.4503, + "step": 16168 + }, + { + "epoch": 1.3048987168105883, + "grad_norm": 0.7063608765602112, + "learning_rate": 1.7756148999710486e-05, + "loss": 2.4523, + "step": 16169 + }, + { + "epoch": 1.3049794205471712, + "grad_norm": 0.6905883550643921, + "learning_rate": 1.774716958434487e-05, + "loss": 2.4149, + "step": 16170 + }, + { + "epoch": 1.3050601242837543, + "grad_norm": 0.694551408290863, + "learning_rate": 1.7738192218934778e-05, + "loss": 2.437, + "step": 16171 + }, + { + "epoch": 1.3051408280203374, + "grad_norm": 0.7173176407814026, + "learning_rate": 1.772921690370396e-05, + "loss": 2.4817, + "step": 16172 + }, + { + "epoch": 1.3052215317569202, + "grad_norm": 0.7197130918502808, + "learning_rate": 1.7720243638876153e-05, + "loss": 2.4481, + "step": 16173 + }, + { + "epoch": 1.3053022354935033, + "grad_norm": 0.710811197757721, + "learning_rate": 1.771127242467493e-05, + "loss": 2.397, + "step": 16174 + }, + { + "epoch": 1.3053829392300864, + "grad_norm": 0.9194550514221191, + "learning_rate": 1.7702303261323894e-05, + "loss": 2.5206, + "step": 16175 + }, + { + "epoch": 1.3054636429666693, + "grad_norm": 0.7003832459449768, + "learning_rate": 1.769333614904659e-05, + "loss": 2.4175, + "step": 16176 + }, + { + "epoch": 1.3055443467032524, + "grad_norm": 0.7161554098129272, + "learning_rate": 1.768437108806651e-05, + "loss": 2.3892, + "step": 16177 + }, + { + "epoch": 1.3056250504398355, + "grad_norm": 0.6516181826591492, + "learning_rate": 1.767540807860707e-05, + "loss": 2.4361, + "step": 16178 + }, + { + "epoch": 1.3057057541764183, + "grad_norm": 0.7518061399459839, + "learning_rate": 1.7666447120891662e-05, + "loss": 2.4572, + "step": 16179 + }, + { + "epoch": 1.3057864579130014, + "grad_norm": 0.735388994216919, + "learning_rate": 1.7657488215143637e-05, + "loss": 2.3965, + "step": 16180 + }, + { + "epoch": 1.3058671616495845, + "grad_norm": 0.6994282007217407, + "learning_rate": 1.764853136158622e-05, + "loss": 2.4052, + "step": 16181 + }, + { + "epoch": 1.3059478653861674, + "grad_norm": 0.7095311880111694, + "learning_rate": 1.7639576560442684e-05, + "loss": 2.4818, + "step": 16182 + }, + { + "epoch": 1.3060285691227504, + "grad_norm": 0.6527207493782043, + "learning_rate": 1.7630623811936208e-05, + "loss": 2.3962, + "step": 16183 + }, + { + "epoch": 1.3061092728593333, + "grad_norm": 0.6668451428413391, + "learning_rate": 1.7621673116289882e-05, + "loss": 2.4514, + "step": 16184 + }, + { + "epoch": 1.3061899765959164, + "grad_norm": 0.7119911909103394, + "learning_rate": 1.7612724473726795e-05, + "loss": 2.4313, + "step": 16185 + }, + { + "epoch": 1.3062706803324993, + "grad_norm": 0.706249475479126, + "learning_rate": 1.7603777884469984e-05, + "loss": 2.4131, + "step": 16186 + }, + { + "epoch": 1.3063513840690824, + "grad_norm": 0.6634086966514587, + "learning_rate": 1.759483334874241e-05, + "loss": 2.3532, + "step": 16187 + }, + { + "epoch": 1.3064320878056654, + "grad_norm": 0.8096393942832947, + "learning_rate": 1.7585890866766995e-05, + "loss": 2.4485, + "step": 16188 + }, + { + "epoch": 1.3065127915422483, + "grad_norm": 0.675308883190155, + "learning_rate": 1.7576950438766615e-05, + "loss": 2.388, + "step": 16189 + }, + { + "epoch": 1.3065934952788314, + "grad_norm": 0.738275408744812, + "learning_rate": 1.756801206496411e-05, + "loss": 2.4485, + "step": 16190 + }, + { + "epoch": 1.3066741990154145, + "grad_norm": 0.7045620083808899, + "learning_rate": 1.755907574558221e-05, + "loss": 2.3985, + "step": 16191 + }, + { + "epoch": 1.3067549027519973, + "grad_norm": 0.6499879360198975, + "learning_rate": 1.755014148084363e-05, + "loss": 2.3992, + "step": 16192 + }, + { + "epoch": 1.3068356064885804, + "grad_norm": 0.7101179361343384, + "learning_rate": 1.7541209270971083e-05, + "loss": 2.4217, + "step": 16193 + }, + { + "epoch": 1.3069163102251635, + "grad_norm": 0.6865181922912598, + "learning_rate": 1.7532279116187124e-05, + "loss": 2.4805, + "step": 16194 + }, + { + "epoch": 1.3069970139617464, + "grad_norm": 0.7710141539573669, + "learning_rate": 1.752335101671434e-05, + "loss": 2.3654, + "step": 16195 + }, + { + "epoch": 1.3070777176983295, + "grad_norm": 0.695936381816864, + "learning_rate": 1.7514424972775244e-05, + "loss": 2.4315, + "step": 16196 + }, + { + "epoch": 1.3071584214349126, + "grad_norm": 0.6781535148620605, + "learning_rate": 1.7505500984592304e-05, + "loss": 2.4238, + "step": 16197 + }, + { + "epoch": 1.3072391251714954, + "grad_norm": 0.6549252271652222, + "learning_rate": 1.7496579052387918e-05, + "loss": 2.3766, + "step": 16198 + }, + { + "epoch": 1.3073198289080785, + "grad_norm": 0.6599059700965881, + "learning_rate": 1.7487659176384474e-05, + "loss": 2.4613, + "step": 16199 + }, + { + "epoch": 1.3074005326446614, + "grad_norm": 0.6742514967918396, + "learning_rate": 1.7478741356804228e-05, + "loss": 2.3917, + "step": 16200 + }, + { + "epoch": 1.3074812363812445, + "grad_norm": 0.6542397141456604, + "learning_rate": 1.746982559386946e-05, + "loss": 2.44, + "step": 16201 + }, + { + "epoch": 1.3075619401178273, + "grad_norm": 0.7200478315353394, + "learning_rate": 1.74609118878024e-05, + "loss": 2.4324, + "step": 16202 + }, + { + "epoch": 1.3076426438544104, + "grad_norm": 0.717628002166748, + "learning_rate": 1.745200023882515e-05, + "loss": 2.3996, + "step": 16203 + }, + { + "epoch": 1.3077233475909935, + "grad_norm": 0.7350025177001953, + "learning_rate": 1.744309064715983e-05, + "loss": 2.4812, + "step": 16204 + }, + { + "epoch": 1.3078040513275764, + "grad_norm": 0.7253599762916565, + "learning_rate": 1.74341831130285e-05, + "loss": 2.4454, + "step": 16205 + }, + { + "epoch": 1.3078847550641595, + "grad_norm": 0.7537909746170044, + "learning_rate": 1.7425277636653193e-05, + "loss": 2.4247, + "step": 16206 + }, + { + "epoch": 1.3079654588007426, + "grad_norm": 0.7563284039497375, + "learning_rate": 1.7416374218255783e-05, + "loss": 2.3893, + "step": 16207 + }, + { + "epoch": 1.3080461625373254, + "grad_norm": 0.7118926048278809, + "learning_rate": 1.740747285805818e-05, + "loss": 2.4146, + "step": 16208 + }, + { + "epoch": 1.3081268662739085, + "grad_norm": 0.7805569171905518, + "learning_rate": 1.7398573556282304e-05, + "loss": 2.396, + "step": 16209 + }, + { + "epoch": 1.3082075700104916, + "grad_norm": 0.7357630133628845, + "learning_rate": 1.738967631314987e-05, + "loss": 2.5405, + "step": 16210 + }, + { + "epoch": 1.3082882737470745, + "grad_norm": 0.6670438647270203, + "learning_rate": 1.7380781128882652e-05, + "loss": 2.4452, + "step": 16211 + }, + { + "epoch": 1.3083689774836575, + "grad_norm": 0.7374427318572998, + "learning_rate": 1.7371888003702353e-05, + "loss": 2.5143, + "step": 16212 + }, + { + "epoch": 1.3084496812202406, + "grad_norm": 0.672207236289978, + "learning_rate": 1.736299693783058e-05, + "loss": 2.4178, + "step": 16213 + }, + { + "epoch": 1.3085303849568235, + "grad_norm": 0.6926576495170593, + "learning_rate": 1.735410793148894e-05, + "loss": 2.3466, + "step": 16214 + }, + { + "epoch": 1.3086110886934066, + "grad_norm": 0.6928917169570923, + "learning_rate": 1.734522098489899e-05, + "loss": 2.4654, + "step": 16215 + }, + { + "epoch": 1.3086917924299895, + "grad_norm": 0.6536242961883545, + "learning_rate": 1.733633609828217e-05, + "loss": 2.3761, + "step": 16216 + }, + { + "epoch": 1.3087724961665725, + "grad_norm": 0.6993953585624695, + "learning_rate": 1.732745327185994e-05, + "loss": 2.3963, + "step": 16217 + }, + { + "epoch": 1.3088531999031554, + "grad_norm": 0.6851957440376282, + "learning_rate": 1.731857250585368e-05, + "loss": 2.4253, + "step": 16218 + }, + { + "epoch": 1.3089339036397385, + "grad_norm": 0.6620005965232849, + "learning_rate": 1.7309693800484728e-05, + "loss": 2.4302, + "step": 16219 + }, + { + "epoch": 1.3090146073763216, + "grad_norm": 0.6704410314559937, + "learning_rate": 1.7300817155974356e-05, + "loss": 2.4065, + "step": 16220 + }, + { + "epoch": 1.3090953111129044, + "grad_norm": 0.6882327198982239, + "learning_rate": 1.7291942572543807e-05, + "loss": 2.4526, + "step": 16221 + }, + { + "epoch": 1.3091760148494875, + "grad_norm": 0.6971533298492432, + "learning_rate": 1.7283070050414275e-05, + "loss": 2.4076, + "step": 16222 + }, + { + "epoch": 1.3092567185860706, + "grad_norm": 0.6662544012069702, + "learning_rate": 1.7274199589806827e-05, + "loss": 2.3678, + "step": 16223 + }, + { + "epoch": 1.3093374223226535, + "grad_norm": 0.6342894434928894, + "learning_rate": 1.726533119094258e-05, + "loss": 2.3424, + "step": 16224 + }, + { + "epoch": 1.3094181260592366, + "grad_norm": 0.6808488965034485, + "learning_rate": 1.7256464854042577e-05, + "loss": 2.4286, + "step": 16225 + }, + { + "epoch": 1.3094988297958197, + "grad_norm": 0.6417922973632812, + "learning_rate": 1.7247600579327738e-05, + "loss": 2.3677, + "step": 16226 + }, + { + "epoch": 1.3095795335324025, + "grad_norm": 0.7267102599143982, + "learning_rate": 1.7238738367019002e-05, + "loss": 2.3974, + "step": 16227 + }, + { + "epoch": 1.3096602372689856, + "grad_norm": 0.6915002465248108, + "learning_rate": 1.722987821733725e-05, + "loss": 2.4429, + "step": 16228 + }, + { + "epoch": 1.3097409410055685, + "grad_norm": 0.6930112242698669, + "learning_rate": 1.7221020130503296e-05, + "loss": 2.4272, + "step": 16229 + }, + { + "epoch": 1.3098216447421516, + "grad_norm": 0.7049465179443359, + "learning_rate": 1.7212164106737904e-05, + "loss": 2.4089, + "step": 16230 + }, + { + "epoch": 1.3099023484787344, + "grad_norm": 0.7230044603347778, + "learning_rate": 1.720331014626182e-05, + "loss": 2.4313, + "step": 16231 + }, + { + "epoch": 1.3099830522153175, + "grad_norm": 0.6513530015945435, + "learning_rate": 1.7194458249295665e-05, + "loss": 2.3293, + "step": 16232 + }, + { + "epoch": 1.3100637559519006, + "grad_norm": 0.6880534291267395, + "learning_rate": 1.718560841606005e-05, + "loss": 2.4556, + "step": 16233 + }, + { + "epoch": 1.3101444596884835, + "grad_norm": 0.7075292468070984, + "learning_rate": 1.717676064677559e-05, + "loss": 2.4747, + "step": 16234 + }, + { + "epoch": 1.3102251634250666, + "grad_norm": 0.7713594436645508, + "learning_rate": 1.7167914941662723e-05, + "loss": 2.4135, + "step": 16235 + }, + { + "epoch": 1.3103058671616497, + "grad_norm": 0.7883979082107544, + "learning_rate": 1.7159071300941943e-05, + "loss": 2.418, + "step": 16236 + }, + { + "epoch": 1.3103865708982325, + "grad_norm": 0.6588975787162781, + "learning_rate": 1.7150229724833655e-05, + "loss": 2.3295, + "step": 16237 + }, + { + "epoch": 1.3104672746348156, + "grad_norm": 0.679086446762085, + "learning_rate": 1.7141390213558217e-05, + "loss": 2.413, + "step": 16238 + }, + { + "epoch": 1.3105479783713987, + "grad_norm": 0.6803067326545715, + "learning_rate": 1.713255276733592e-05, + "loss": 2.4338, + "step": 16239 + }, + { + "epoch": 1.3106286821079816, + "grad_norm": 0.7041650414466858, + "learning_rate": 1.712371738638704e-05, + "loss": 2.469, + "step": 16240 + }, + { + "epoch": 1.3107093858445646, + "grad_norm": 0.6560962796211243, + "learning_rate": 1.711488407093178e-05, + "loss": 2.4353, + "step": 16241 + }, + { + "epoch": 1.3107900895811477, + "grad_norm": 0.6637921333312988, + "learning_rate": 1.7106052821190244e-05, + "loss": 2.3996, + "step": 16242 + }, + { + "epoch": 1.3108707933177306, + "grad_norm": 0.8131709098815918, + "learning_rate": 1.7097223637382565e-05, + "loss": 2.466, + "step": 16243 + }, + { + "epoch": 1.3109514970543137, + "grad_norm": 0.6637253165245056, + "learning_rate": 1.708839651972881e-05, + "loss": 2.3811, + "step": 16244 + }, + { + "epoch": 1.3110322007908966, + "grad_norm": 0.71912682056427, + "learning_rate": 1.7079571468448917e-05, + "loss": 2.4175, + "step": 16245 + }, + { + "epoch": 1.3111129045274796, + "grad_norm": 0.7028010487556458, + "learning_rate": 1.7070748483762854e-05, + "loss": 2.41, + "step": 16246 + }, + { + "epoch": 1.3111936082640625, + "grad_norm": 0.7241945862770081, + "learning_rate": 1.7061927565890522e-05, + "loss": 2.4171, + "step": 16247 + }, + { + "epoch": 1.3112743120006456, + "grad_norm": 0.7039221525192261, + "learning_rate": 1.705310871505177e-05, + "loss": 2.4154, + "step": 16248 + }, + { + "epoch": 1.3113550157372287, + "grad_norm": 0.672444760799408, + "learning_rate": 1.704429193146636e-05, + "loss": 2.4025, + "step": 16249 + }, + { + "epoch": 1.3114357194738115, + "grad_norm": 0.7240859866142273, + "learning_rate": 1.7035477215354068e-05, + "loss": 2.3864, + "step": 16250 + }, + { + "epoch": 1.3115164232103946, + "grad_norm": 0.7379294633865356, + "learning_rate": 1.7026664566934536e-05, + "loss": 2.4663, + "step": 16251 + }, + { + "epoch": 1.3115971269469777, + "grad_norm": 0.6928708553314209, + "learning_rate": 1.7017853986427425e-05, + "loss": 2.4407, + "step": 16252 + }, + { + "epoch": 1.3116778306835606, + "grad_norm": 0.6304093599319458, + "learning_rate": 1.7009045474052298e-05, + "loss": 2.4755, + "step": 16253 + }, + { + "epoch": 1.3117585344201437, + "grad_norm": 0.6945829391479492, + "learning_rate": 1.700023903002872e-05, + "loss": 2.3817, + "step": 16254 + }, + { + "epoch": 1.3118392381567268, + "grad_norm": 0.6899009346961975, + "learning_rate": 1.6991434654576133e-05, + "loss": 2.3989, + "step": 16255 + }, + { + "epoch": 1.3119199418933096, + "grad_norm": 0.7359157204627991, + "learning_rate": 1.6982632347913985e-05, + "loss": 2.3788, + "step": 16256 + }, + { + "epoch": 1.3120006456298927, + "grad_norm": 0.6562486886978149, + "learning_rate": 1.6973832110261658e-05, + "loss": 2.3955, + "step": 16257 + }, + { + "epoch": 1.3120813493664758, + "grad_norm": 0.6772989630699158, + "learning_rate": 1.696503394183846e-05, + "loss": 2.4788, + "step": 16258 + }, + { + "epoch": 1.3121620531030587, + "grad_norm": 0.7214391231536865, + "learning_rate": 1.695623784286363e-05, + "loss": 2.3836, + "step": 16259 + }, + { + "epoch": 1.3122427568396418, + "grad_norm": 0.7041679620742798, + "learning_rate": 1.6947443813556495e-05, + "loss": 2.4547, + "step": 16260 + }, + { + "epoch": 1.3123234605762246, + "grad_norm": 0.6819555163383484, + "learning_rate": 1.6938651854136135e-05, + "loss": 2.468, + "step": 16261 + }, + { + "epoch": 1.3124041643128077, + "grad_norm": 0.6466858983039856, + "learning_rate": 1.6929861964821693e-05, + "loss": 2.4572, + "step": 16262 + }, + { + "epoch": 1.3124848680493906, + "grad_norm": 0.688709557056427, + "learning_rate": 1.6921074145832248e-05, + "loss": 2.3891, + "step": 16263 + }, + { + "epoch": 1.3125655717859737, + "grad_norm": 0.6896470785140991, + "learning_rate": 1.69122883973868e-05, + "loss": 2.3825, + "step": 16264 + }, + { + "epoch": 1.3126462755225567, + "grad_norm": 0.8242524266242981, + "learning_rate": 1.690350471970431e-05, + "loss": 2.4804, + "step": 16265 + }, + { + "epoch": 1.3127269792591396, + "grad_norm": 0.7506044507026672, + "learning_rate": 1.689472311300373e-05, + "loss": 2.4671, + "step": 16266 + }, + { + "epoch": 1.3128076829957227, + "grad_norm": 0.6776263117790222, + "learning_rate": 1.688594357750386e-05, + "loss": 2.4646, + "step": 16267 + }, + { + "epoch": 1.3128883867323058, + "grad_norm": 0.6843759417533875, + "learning_rate": 1.6877166113423548e-05, + "loss": 2.4147, + "step": 16268 + }, + { + "epoch": 1.3129690904688887, + "grad_norm": 0.6650474667549133, + "learning_rate": 1.686839072098153e-05, + "loss": 2.4379, + "step": 16269 + }, + { + "epoch": 1.3130497942054717, + "grad_norm": 0.6636466383934021, + "learning_rate": 1.6859617400396533e-05, + "loss": 2.4334, + "step": 16270 + }, + { + "epoch": 1.3131304979420548, + "grad_norm": 0.649217963218689, + "learning_rate": 1.685084615188719e-05, + "loss": 2.319, + "step": 16271 + }, + { + "epoch": 1.3132112016786377, + "grad_norm": 0.7343039512634277, + "learning_rate": 1.6842076975672126e-05, + "loss": 2.3844, + "step": 16272 + }, + { + "epoch": 1.3132919054152208, + "grad_norm": 0.6916847825050354, + "learning_rate": 1.6833309871969894e-05, + "loss": 2.4544, + "step": 16273 + }, + { + "epoch": 1.3133726091518036, + "grad_norm": 0.6762102842330933, + "learning_rate": 1.6824544840998967e-05, + "loss": 2.3912, + "step": 16274 + }, + { + "epoch": 1.3134533128883867, + "grad_norm": 0.7327221035957336, + "learning_rate": 1.68157818829778e-05, + "loss": 2.4403, + "step": 16275 + }, + { + "epoch": 1.3135340166249696, + "grad_norm": 0.7362363338470459, + "learning_rate": 1.6807020998124812e-05, + "loss": 2.5169, + "step": 16276 + }, + { + "epoch": 1.3136147203615527, + "grad_norm": 0.6882300972938538, + "learning_rate": 1.679826218665832e-05, + "loss": 2.4139, + "step": 16277 + }, + { + "epoch": 1.3136954240981358, + "grad_norm": 0.7146984934806824, + "learning_rate": 1.6789505448796615e-05, + "loss": 2.4738, + "step": 16278 + }, + { + "epoch": 1.3137761278347186, + "grad_norm": 0.6581223607063293, + "learning_rate": 1.6780750784757947e-05, + "loss": 2.4617, + "step": 16279 + }, + { + "epoch": 1.3138568315713017, + "grad_norm": 0.7729318141937256, + "learning_rate": 1.6771998194760518e-05, + "loss": 2.4541, + "step": 16280 + }, + { + "epoch": 1.3139375353078848, + "grad_norm": 0.7617159485816956, + "learning_rate": 1.6763247679022442e-05, + "loss": 2.4727, + "step": 16281 + }, + { + "epoch": 1.3140182390444677, + "grad_norm": 0.6640555262565613, + "learning_rate": 1.6754499237761844e-05, + "loss": 2.4717, + "step": 16282 + }, + { + "epoch": 1.3140989427810508, + "grad_norm": 0.7289882898330688, + "learning_rate": 1.6745752871196707e-05, + "loss": 2.4515, + "step": 16283 + }, + { + "epoch": 1.3141796465176339, + "grad_norm": 0.7075887322425842, + "learning_rate": 1.6737008579545043e-05, + "loss": 2.4586, + "step": 16284 + }, + { + "epoch": 1.3142603502542167, + "grad_norm": 0.7152252197265625, + "learning_rate": 1.672826636302477e-05, + "loss": 2.512, + "step": 16285 + }, + { + "epoch": 1.3143410539907998, + "grad_norm": 0.6875295639038086, + "learning_rate": 1.6719526221853808e-05, + "loss": 2.4049, + "step": 16286 + }, + { + "epoch": 1.314421757727383, + "grad_norm": 0.6812484860420227, + "learning_rate": 1.671078815624991e-05, + "loss": 2.3705, + "step": 16287 + }, + { + "epoch": 1.3145024614639658, + "grad_norm": 0.664282500743866, + "learning_rate": 1.6702052166430904e-05, + "loss": 2.3776, + "step": 16288 + }, + { + "epoch": 1.3145831652005489, + "grad_norm": 0.7460842728614807, + "learning_rate": 1.66933182526145e-05, + "loss": 2.4525, + "step": 16289 + }, + { + "epoch": 1.3146638689371317, + "grad_norm": 0.6555477380752563, + "learning_rate": 1.6684586415018366e-05, + "loss": 2.3902, + "step": 16290 + }, + { + "epoch": 1.3147445726737148, + "grad_norm": 0.7191921472549438, + "learning_rate": 1.6675856653860135e-05, + "loss": 2.4957, + "step": 16291 + }, + { + "epoch": 1.3148252764102977, + "grad_norm": 0.738667368888855, + "learning_rate": 1.666712896935738e-05, + "loss": 2.4182, + "step": 16292 + }, + { + "epoch": 1.3149059801468808, + "grad_norm": 0.6764421463012695, + "learning_rate": 1.6658403361727593e-05, + "loss": 2.4179, + "step": 16293 + }, + { + "epoch": 1.3149866838834638, + "grad_norm": 0.6981594562530518, + "learning_rate": 1.6649679831188247e-05, + "loss": 2.4288, + "step": 16294 + }, + { + "epoch": 1.3150673876200467, + "grad_norm": 0.6657801866531372, + "learning_rate": 1.6640958377956784e-05, + "loss": 2.3716, + "step": 16295 + }, + { + "epoch": 1.3151480913566298, + "grad_norm": 0.7238973379135132, + "learning_rate": 1.6632239002250505e-05, + "loss": 2.438, + "step": 16296 + }, + { + "epoch": 1.3152287950932129, + "grad_norm": 0.6727766990661621, + "learning_rate": 1.6623521704286772e-05, + "loss": 2.4406, + "step": 16297 + }, + { + "epoch": 1.3153094988297958, + "grad_norm": 0.6741603016853333, + "learning_rate": 1.661480648428282e-05, + "loss": 2.4379, + "step": 16298 + }, + { + "epoch": 1.3153902025663788, + "grad_norm": 0.7174610495567322, + "learning_rate": 1.6606093342455865e-05, + "loss": 2.4368, + "step": 16299 + }, + { + "epoch": 1.315470906302962, + "grad_norm": 0.6604920029640198, + "learning_rate": 1.6597382279023057e-05, + "loss": 2.4431, + "step": 16300 + }, + { + "epoch": 1.3155516100395448, + "grad_norm": 0.6930821537971497, + "learning_rate": 1.6588673294201494e-05, + "loss": 2.4064, + "step": 16301 + }, + { + "epoch": 1.3156323137761279, + "grad_norm": 0.6489799618721008, + "learning_rate": 1.657996638820826e-05, + "loss": 2.4256, + "step": 16302 + }, + { + "epoch": 1.315713017512711, + "grad_norm": 0.6781083345413208, + "learning_rate": 1.65712615612603e-05, + "loss": 2.4731, + "step": 16303 + }, + { + "epoch": 1.3157937212492938, + "grad_norm": 0.6710748076438904, + "learning_rate": 1.656255881357458e-05, + "loss": 2.4065, + "step": 16304 + }, + { + "epoch": 1.315874424985877, + "grad_norm": 0.7099822163581848, + "learning_rate": 1.655385814536804e-05, + "loss": 2.3978, + "step": 16305 + }, + { + "epoch": 1.3159551287224598, + "grad_norm": 0.7215133905410767, + "learning_rate": 1.6545159556857447e-05, + "loss": 2.4655, + "step": 16306 + }, + { + "epoch": 1.3160358324590429, + "grad_norm": 0.7705253958702087, + "learning_rate": 1.6536463048259643e-05, + "loss": 2.4576, + "step": 16307 + }, + { + "epoch": 1.3161165361956257, + "grad_norm": 0.6232311725616455, + "learning_rate": 1.6527768619791372e-05, + "loss": 2.3923, + "step": 16308 + }, + { + "epoch": 1.3161972399322088, + "grad_norm": 0.6599528789520264, + "learning_rate": 1.6519076271669264e-05, + "loss": 2.4236, + "step": 16309 + }, + { + "epoch": 1.316277943668792, + "grad_norm": 0.6598034501075745, + "learning_rate": 1.6510386004110023e-05, + "loss": 2.368, + "step": 16310 + }, + { + "epoch": 1.3163586474053748, + "grad_norm": 0.6949655413627625, + "learning_rate": 1.650169781733022e-05, + "loss": 2.4277, + "step": 16311 + }, + { + "epoch": 1.3164393511419579, + "grad_norm": 0.6838186383247375, + "learning_rate": 1.6493011711546358e-05, + "loss": 2.4413, + "step": 16312 + }, + { + "epoch": 1.316520054878541, + "grad_norm": 0.7026765942573547, + "learning_rate": 1.6484327686974933e-05, + "loss": 2.4628, + "step": 16313 + }, + { + "epoch": 1.3166007586151238, + "grad_norm": 0.745360791683197, + "learning_rate": 1.647564574383237e-05, + "loss": 2.4358, + "step": 16314 + }, + { + "epoch": 1.316681462351707, + "grad_norm": 0.676225483417511, + "learning_rate": 1.6466965882335083e-05, + "loss": 2.4119, + "step": 16315 + }, + { + "epoch": 1.31676216608829, + "grad_norm": 0.6767755150794983, + "learning_rate": 1.6458288102699325e-05, + "loss": 2.4322, + "step": 16316 + }, + { + "epoch": 1.3168428698248729, + "grad_norm": 0.6957309246063232, + "learning_rate": 1.6449612405141424e-05, + "loss": 2.4327, + "step": 16317 + }, + { + "epoch": 1.316923573561456, + "grad_norm": 0.6773050427436829, + "learning_rate": 1.64409387898776e-05, + "loss": 2.4207, + "step": 16318 + }, + { + "epoch": 1.3170042772980388, + "grad_norm": 0.7319278717041016, + "learning_rate": 1.6432267257123978e-05, + "loss": 2.445, + "step": 16319 + }, + { + "epoch": 1.317084981034622, + "grad_norm": 0.7531326413154602, + "learning_rate": 1.6423597807096714e-05, + "loss": 2.3948, + "step": 16320 + }, + { + "epoch": 1.3171656847712048, + "grad_norm": 0.6741669178009033, + "learning_rate": 1.6414930440011854e-05, + "loss": 2.4177, + "step": 16321 + }, + { + "epoch": 1.3172463885077879, + "grad_norm": 0.6814963221549988, + "learning_rate": 1.640626515608543e-05, + "loss": 2.4419, + "step": 16322 + }, + { + "epoch": 1.317327092244371, + "grad_norm": 0.6740893721580505, + "learning_rate": 1.6397601955533392e-05, + "loss": 2.3516, + "step": 16323 + }, + { + "epoch": 1.3174077959809538, + "grad_norm": 0.7172163724899292, + "learning_rate": 1.6388940838571675e-05, + "loss": 2.4665, + "step": 16324 + }, + { + "epoch": 1.317488499717537, + "grad_norm": 0.6690489053726196, + "learning_rate": 1.6380281805416085e-05, + "loss": 2.3957, + "step": 16325 + }, + { + "epoch": 1.31756920345412, + "grad_norm": 0.7182994484901428, + "learning_rate": 1.6371624856282462e-05, + "loss": 2.4456, + "step": 16326 + }, + { + "epoch": 1.3176499071907029, + "grad_norm": 0.6324366927146912, + "learning_rate": 1.636296999138659e-05, + "loss": 2.4111, + "step": 16327 + }, + { + "epoch": 1.317730610927286, + "grad_norm": 0.6740162372589111, + "learning_rate": 1.6354317210944093e-05, + "loss": 2.451, + "step": 16328 + }, + { + "epoch": 1.317811314663869, + "grad_norm": 0.6964122653007507, + "learning_rate": 1.6345666515170665e-05, + "loss": 2.4269, + "step": 16329 + }, + { + "epoch": 1.317892018400452, + "grad_norm": 0.7093058824539185, + "learning_rate": 1.6337017904281915e-05, + "loss": 2.4686, + "step": 16330 + }, + { + "epoch": 1.317972722137035, + "grad_norm": 0.693233072757721, + "learning_rate": 1.6328371378493367e-05, + "loss": 2.4149, + "step": 16331 + }, + { + "epoch": 1.318053425873618, + "grad_norm": 0.6418019533157349, + "learning_rate": 1.631972693802052e-05, + "loss": 2.4268, + "step": 16332 + }, + { + "epoch": 1.318134129610201, + "grad_norm": 0.6815310120582581, + "learning_rate": 1.631108458307883e-05, + "loss": 2.4274, + "step": 16333 + }, + { + "epoch": 1.318214833346784, + "grad_norm": 0.6774280071258545, + "learning_rate": 1.630244431388369e-05, + "loss": 2.3927, + "step": 16334 + }, + { + "epoch": 1.3182955370833669, + "grad_norm": 0.688090443611145, + "learning_rate": 1.6293806130650413e-05, + "loss": 2.4013, + "step": 16335 + }, + { + "epoch": 1.31837624081995, + "grad_norm": 0.7300553321838379, + "learning_rate": 1.6285170033594288e-05, + "loss": 2.4716, + "step": 16336 + }, + { + "epoch": 1.3184569445565328, + "grad_norm": 0.6798286437988281, + "learning_rate": 1.627653602293059e-05, + "loss": 2.3893, + "step": 16337 + }, + { + "epoch": 1.318537648293116, + "grad_norm": 0.6699275970458984, + "learning_rate": 1.6267904098874442e-05, + "loss": 2.4446, + "step": 16338 + }, + { + "epoch": 1.318618352029699, + "grad_norm": 0.7632322311401367, + "learning_rate": 1.6259274261641e-05, + "loss": 2.4434, + "step": 16339 + }, + { + "epoch": 1.3186990557662819, + "grad_norm": 0.7156099677085876, + "learning_rate": 1.6250646511445343e-05, + "loss": 2.4142, + "step": 16340 + }, + { + "epoch": 1.318779759502865, + "grad_norm": 0.7525599598884583, + "learning_rate": 1.6242020848502505e-05, + "loss": 2.3543, + "step": 16341 + }, + { + "epoch": 1.318860463239448, + "grad_norm": 0.7063113451004028, + "learning_rate": 1.623339727302745e-05, + "loss": 2.4754, + "step": 16342 + }, + { + "epoch": 1.318941166976031, + "grad_norm": 0.7138137221336365, + "learning_rate": 1.6224775785235123e-05, + "loss": 2.4223, + "step": 16343 + }, + { + "epoch": 1.319021870712614, + "grad_norm": 0.6976706981658936, + "learning_rate": 1.6216156385340352e-05, + "loss": 2.4878, + "step": 16344 + }, + { + "epoch": 1.319102574449197, + "grad_norm": 0.6931003332138062, + "learning_rate": 1.6207539073557974e-05, + "loss": 2.39, + "step": 16345 + }, + { + "epoch": 1.31918327818578, + "grad_norm": 0.6919357180595398, + "learning_rate": 1.6198923850102765e-05, + "loss": 2.4197, + "step": 16346 + }, + { + "epoch": 1.319263981922363, + "grad_norm": 0.7453805804252625, + "learning_rate": 1.619031071518945e-05, + "loss": 2.4226, + "step": 16347 + }, + { + "epoch": 1.3193446856589461, + "grad_norm": 0.6990562677383423, + "learning_rate": 1.6181699669032658e-05, + "loss": 2.3925, + "step": 16348 + }, + { + "epoch": 1.319425389395529, + "grad_norm": 0.6974303126335144, + "learning_rate": 1.6173090711847006e-05, + "loss": 2.445, + "step": 16349 + }, + { + "epoch": 1.319506093132112, + "grad_norm": 0.7278286814689636, + "learning_rate": 1.6164483843847057e-05, + "loss": 2.3869, + "step": 16350 + }, + { + "epoch": 1.319586796868695, + "grad_norm": 0.7282646298408508, + "learning_rate": 1.6155879065247326e-05, + "loss": 2.3694, + "step": 16351 + }, + { + "epoch": 1.319667500605278, + "grad_norm": 0.7329844832420349, + "learning_rate": 1.6147276376262255e-05, + "loss": 2.4369, + "step": 16352 + }, + { + "epoch": 1.319748204341861, + "grad_norm": 0.6499385833740234, + "learning_rate": 1.613867577710627e-05, + "loss": 2.441, + "step": 16353 + }, + { + "epoch": 1.319828908078444, + "grad_norm": 0.7026061415672302, + "learning_rate": 1.6130077267993683e-05, + "loss": 2.4117, + "step": 16354 + }, + { + "epoch": 1.319909611815027, + "grad_norm": 0.7007814049720764, + "learning_rate": 1.6121480849138803e-05, + "loss": 2.4287, + "step": 16355 + }, + { + "epoch": 1.31999031555161, + "grad_norm": 0.6525697708129883, + "learning_rate": 1.611288652075591e-05, + "loss": 2.3969, + "step": 16356 + }, + { + "epoch": 1.320071019288193, + "grad_norm": 0.7268216609954834, + "learning_rate": 1.610429428305914e-05, + "loss": 2.4227, + "step": 16357 + }, + { + "epoch": 1.3201517230247761, + "grad_norm": 0.6665107011795044, + "learning_rate": 1.6095704136262668e-05, + "loss": 2.3694, + "step": 16358 + }, + { + "epoch": 1.320232426761359, + "grad_norm": 0.6832399368286133, + "learning_rate": 1.60871160805806e-05, + "loss": 2.4001, + "step": 16359 + }, + { + "epoch": 1.320313130497942, + "grad_norm": 0.6788592338562012, + "learning_rate": 1.6078530116226897e-05, + "loss": 2.4294, + "step": 16360 + }, + { + "epoch": 1.3203938342345252, + "grad_norm": 0.7147449254989624, + "learning_rate": 1.6069946243415625e-05, + "loss": 2.3904, + "step": 16361 + }, + { + "epoch": 1.320474537971108, + "grad_norm": 0.7014418840408325, + "learning_rate": 1.6061364462360683e-05, + "loss": 2.4026, + "step": 16362 + }, + { + "epoch": 1.3205552417076911, + "grad_norm": 0.6867612600326538, + "learning_rate": 1.6052784773275987e-05, + "loss": 2.4092, + "step": 16363 + }, + { + "epoch": 1.3206359454442742, + "grad_norm": 0.6588961482048035, + "learning_rate": 1.6044207176375303e-05, + "loss": 2.4588, + "step": 16364 + }, + { + "epoch": 1.320716649180857, + "grad_norm": 0.688671350479126, + "learning_rate": 1.6035631671872444e-05, + "loss": 2.3957, + "step": 16365 + }, + { + "epoch": 1.3207973529174402, + "grad_norm": 0.7548064589500427, + "learning_rate": 1.6027058259981154e-05, + "loss": 2.4168, + "step": 16366 + }, + { + "epoch": 1.320878056654023, + "grad_norm": 0.7251972556114197, + "learning_rate": 1.6018486940915044e-05, + "loss": 2.4704, + "step": 16367 + }, + { + "epoch": 1.3209587603906061, + "grad_norm": 0.73149174451828, + "learning_rate": 1.6009917714887778e-05, + "loss": 2.4597, + "step": 16368 + }, + { + "epoch": 1.321039464127189, + "grad_norm": 0.6741003394126892, + "learning_rate": 1.600135058211294e-05, + "loss": 2.3876, + "step": 16369 + }, + { + "epoch": 1.321120167863772, + "grad_norm": 0.6891310214996338, + "learning_rate": 1.5992785542804e-05, + "loss": 2.4229, + "step": 16370 + }, + { + "epoch": 1.3212008716003552, + "grad_norm": 0.7529458403587341, + "learning_rate": 1.5984222597174415e-05, + "loss": 2.45, + "step": 16371 + }, + { + "epoch": 1.321281575336938, + "grad_norm": 0.708134651184082, + "learning_rate": 1.5975661745437664e-05, + "loss": 2.454, + "step": 16372 + }, + { + "epoch": 1.321362279073521, + "grad_norm": 0.7511130571365356, + "learning_rate": 1.596710298780705e-05, + "loss": 2.4201, + "step": 16373 + }, + { + "epoch": 1.3214429828101042, + "grad_norm": 0.6599537134170532, + "learning_rate": 1.595854632449588e-05, + "loss": 2.3982, + "step": 16374 + }, + { + "epoch": 1.321523686546687, + "grad_norm": 0.6821228861808777, + "learning_rate": 1.5949991755717453e-05, + "loss": 2.4525, + "step": 16375 + }, + { + "epoch": 1.3216043902832701, + "grad_norm": 0.6872302293777466, + "learning_rate": 1.5941439281684923e-05, + "loss": 2.3631, + "step": 16376 + }, + { + "epoch": 1.3216850940198532, + "grad_norm": 0.6650066375732422, + "learning_rate": 1.5932888902611453e-05, + "loss": 2.3718, + "step": 16377 + }, + { + "epoch": 1.321765797756436, + "grad_norm": 0.6620016694068909, + "learning_rate": 1.5924340618710143e-05, + "loss": 2.4076, + "step": 16378 + }, + { + "epoch": 1.3218465014930192, + "grad_norm": 0.694807231426239, + "learning_rate": 1.5915794430194066e-05, + "loss": 2.4369, + "step": 16379 + }, + { + "epoch": 1.321927205229602, + "grad_norm": 0.6810131669044495, + "learning_rate": 1.590725033727616e-05, + "loss": 2.4151, + "step": 16380 + }, + { + "epoch": 1.3220079089661851, + "grad_norm": 0.768846333026886, + "learning_rate": 1.58987083401694e-05, + "loss": 2.4991, + "step": 16381 + }, + { + "epoch": 1.322088612702768, + "grad_norm": 0.6581698656082153, + "learning_rate": 1.5890168439086672e-05, + "loss": 2.4263, + "step": 16382 + }, + { + "epoch": 1.322169316439351, + "grad_norm": 0.7267034649848938, + "learning_rate": 1.5881630634240818e-05, + "loss": 2.4219, + "step": 16383 + }, + { + "epoch": 1.3222500201759342, + "grad_norm": 0.7391555905342102, + "learning_rate": 1.5873094925844612e-05, + "loss": 2.427, + "step": 16384 + }, + { + "epoch": 1.322330723912517, + "grad_norm": 0.6612021923065186, + "learning_rate": 1.5864561314110815e-05, + "loss": 2.4108, + "step": 16385 + }, + { + "epoch": 1.3224114276491001, + "grad_norm": 0.7118437886238098, + "learning_rate": 1.585602979925206e-05, + "loss": 2.3839, + "step": 16386 + }, + { + "epoch": 1.3224921313856832, + "grad_norm": 0.6663616299629211, + "learning_rate": 1.5847500381480997e-05, + "loss": 2.4302, + "step": 16387 + }, + { + "epoch": 1.322572835122266, + "grad_norm": 0.6848715543746948, + "learning_rate": 1.583897306101022e-05, + "loss": 2.4228, + "step": 16388 + }, + { + "epoch": 1.3226535388588492, + "grad_norm": 0.680895209312439, + "learning_rate": 1.5830447838052208e-05, + "loss": 2.4457, + "step": 16389 + }, + { + "epoch": 1.3227342425954323, + "grad_norm": 0.683276891708374, + "learning_rate": 1.582192471281946e-05, + "loss": 2.4412, + "step": 16390 + }, + { + "epoch": 1.3228149463320151, + "grad_norm": 0.7311880588531494, + "learning_rate": 1.5813403685524396e-05, + "loss": 2.4604, + "step": 16391 + }, + { + "epoch": 1.3228956500685982, + "grad_norm": 0.6769095659255981, + "learning_rate": 1.580488475637937e-05, + "loss": 2.4311, + "step": 16392 + }, + { + "epoch": 1.3229763538051813, + "grad_norm": 0.6683096289634705, + "learning_rate": 1.579636792559671e-05, + "loss": 2.445, + "step": 16393 + }, + { + "epoch": 1.3230570575417642, + "grad_norm": 0.7268782258033752, + "learning_rate": 1.5787853193388667e-05, + "loss": 2.4176, + "step": 16394 + }, + { + "epoch": 1.3231377612783473, + "grad_norm": 0.6878541707992554, + "learning_rate": 1.5779340559967494e-05, + "loss": 2.4615, + "step": 16395 + }, + { + "epoch": 1.3232184650149301, + "grad_norm": 0.7031291127204895, + "learning_rate": 1.577083002554527e-05, + "loss": 2.3726, + "step": 16396 + }, + { + "epoch": 1.3232991687515132, + "grad_norm": 0.7738708853721619, + "learning_rate": 1.5762321590334138e-05, + "loss": 2.5046, + "step": 16397 + }, + { + "epoch": 1.323379872488096, + "grad_norm": 0.6660913228988647, + "learning_rate": 1.575381525454619e-05, + "loss": 2.3759, + "step": 16398 + }, + { + "epoch": 1.3234605762246792, + "grad_norm": 0.6534021496772766, + "learning_rate": 1.574531101839335e-05, + "loss": 2.3983, + "step": 16399 + }, + { + "epoch": 1.3235412799612623, + "grad_norm": 0.6645511388778687, + "learning_rate": 1.5736808882087606e-05, + "loss": 2.3958, + "step": 16400 + }, + { + "epoch": 1.3236219836978451, + "grad_norm": 0.6723225712776184, + "learning_rate": 1.5728308845840855e-05, + "loss": 2.4248, + "step": 16401 + }, + { + "epoch": 1.3237026874344282, + "grad_norm": 0.6609976887702942, + "learning_rate": 1.5719810909864942e-05, + "loss": 2.3888, + "step": 16402 + }, + { + "epoch": 1.3237833911710113, + "grad_norm": 0.6713845729827881, + "learning_rate": 1.5711315074371635e-05, + "loss": 2.4474, + "step": 16403 + }, + { + "epoch": 1.3238640949075942, + "grad_norm": 0.701438307762146, + "learning_rate": 1.5702821339572726e-05, + "loss": 2.4673, + "step": 16404 + }, + { + "epoch": 1.3239447986441772, + "grad_norm": 0.7235428094863892, + "learning_rate": 1.5694329705679834e-05, + "loss": 2.3825, + "step": 16405 + }, + { + "epoch": 1.3240255023807603, + "grad_norm": 0.6785053610801697, + "learning_rate": 1.568584017290462e-05, + "loss": 2.4668, + "step": 16406 + }, + { + "epoch": 1.3241062061173432, + "grad_norm": 0.6918929815292358, + "learning_rate": 1.5677352741458705e-05, + "loss": 2.4329, + "step": 16407 + }, + { + "epoch": 1.3241869098539263, + "grad_norm": 0.7194826006889343, + "learning_rate": 1.5668867411553544e-05, + "loss": 2.3717, + "step": 16408 + }, + { + "epoch": 1.3242676135905094, + "grad_norm": 0.7299134731292725, + "learning_rate": 1.5660384183400658e-05, + "loss": 2.4695, + "step": 16409 + }, + { + "epoch": 1.3243483173270922, + "grad_norm": 0.7047600746154785, + "learning_rate": 1.565190305721147e-05, + "loss": 2.4525, + "step": 16410 + }, + { + "epoch": 1.3244290210636753, + "grad_norm": 0.685001015663147, + "learning_rate": 1.5643424033197328e-05, + "loss": 2.322, + "step": 16411 + }, + { + "epoch": 1.3245097248002582, + "grad_norm": 0.7696635127067566, + "learning_rate": 1.5634947111569588e-05, + "loss": 2.4464, + "step": 16412 + }, + { + "epoch": 1.3245904285368413, + "grad_norm": 0.7066066265106201, + "learning_rate": 1.5626472292539485e-05, + "loss": 2.4315, + "step": 16413 + }, + { + "epoch": 1.3246711322734241, + "grad_norm": 0.6553033590316772, + "learning_rate": 1.5617999576318276e-05, + "loss": 2.4296, + "step": 16414 + }, + { + "epoch": 1.3247518360100072, + "grad_norm": 0.7031354308128357, + "learning_rate": 1.560952896311707e-05, + "loss": 2.4565, + "step": 16415 + }, + { + "epoch": 1.3248325397465903, + "grad_norm": 0.7826353311538696, + "learning_rate": 1.560106045314701e-05, + "loss": 2.4275, + "step": 16416 + }, + { + "epoch": 1.3249132434831732, + "grad_norm": 0.6408981084823608, + "learning_rate": 1.559259404661916e-05, + "loss": 2.3869, + "step": 16417 + }, + { + "epoch": 1.3249939472197563, + "grad_norm": 0.7487547993659973, + "learning_rate": 1.558412974374448e-05, + "loss": 2.3678, + "step": 16418 + }, + { + "epoch": 1.3250746509563394, + "grad_norm": 0.7163991332054138, + "learning_rate": 1.5575667544733963e-05, + "loss": 2.397, + "step": 16419 + }, + { + "epoch": 1.3251553546929222, + "grad_norm": 0.6933553814888, + "learning_rate": 1.5567207449798515e-05, + "loss": 2.424, + "step": 16420 + }, + { + "epoch": 1.3252360584295053, + "grad_norm": 0.687406063079834, + "learning_rate": 1.5558749459148945e-05, + "loss": 2.4346, + "step": 16421 + }, + { + "epoch": 1.3253167621660884, + "grad_norm": 0.6781243681907654, + "learning_rate": 1.5550293572996054e-05, + "loss": 2.4526, + "step": 16422 + }, + { + "epoch": 1.3253974659026713, + "grad_norm": 0.6632506847381592, + "learning_rate": 1.5541839791550616e-05, + "loss": 2.4559, + "step": 16423 + }, + { + "epoch": 1.3254781696392544, + "grad_norm": 0.668396532535553, + "learning_rate": 1.5533388115023327e-05, + "loss": 2.4463, + "step": 16424 + }, + { + "epoch": 1.3255588733758372, + "grad_norm": 0.6853309869766235, + "learning_rate": 1.552493854362479e-05, + "loss": 2.429, + "step": 16425 + }, + { + "epoch": 1.3256395771124203, + "grad_norm": 0.7443413138389587, + "learning_rate": 1.5516491077565597e-05, + "loss": 2.4091, + "step": 16426 + }, + { + "epoch": 1.3257202808490032, + "grad_norm": 0.690170168876648, + "learning_rate": 1.550804571705632e-05, + "loss": 2.3942, + "step": 16427 + }, + { + "epoch": 1.3258009845855863, + "grad_norm": NaN, + "learning_rate": 1.550804571705632e-05, + "loss": 2.3788, + "step": 16428 + }, + { + "epoch": 1.3258816883221693, + "grad_norm": 0.6901132464408875, + "learning_rate": 1.5499602462307373e-05, + "loss": 2.3859, + "step": 16429 + }, + { + "epoch": 1.3259623920587522, + "grad_norm": 0.6639334559440613, + "learning_rate": 1.5491161313529223e-05, + "loss": 2.4271, + "step": 16430 + }, + { + "epoch": 1.3260430957953353, + "grad_norm": 0.7121936678886414, + "learning_rate": 1.548272227093227e-05, + "loss": 2.3818, + "step": 16431 + }, + { + "epoch": 1.3261237995319184, + "grad_norm": 0.6863218545913696, + "learning_rate": 1.5474285334726778e-05, + "loss": 2.3744, + "step": 16432 + }, + { + "epoch": 1.3262045032685013, + "grad_norm": 0.6697081327438354, + "learning_rate": 1.5465850505123057e-05, + "loss": 2.4001, + "step": 16433 + }, + { + "epoch": 1.3262852070050843, + "grad_norm": 0.7258912324905396, + "learning_rate": 1.5457417782331308e-05, + "loss": 2.4556, + "step": 16434 + }, + { + "epoch": 1.3263659107416674, + "grad_norm": 0.6930057406425476, + "learning_rate": 1.5448987166561712e-05, + "loss": 2.4979, + "step": 16435 + }, + { + "epoch": 1.3264466144782503, + "grad_norm": 0.6475574970245361, + "learning_rate": 1.5440558658024363e-05, + "loss": 2.3821, + "step": 16436 + }, + { + "epoch": 1.3265273182148334, + "grad_norm": 0.7489237785339355, + "learning_rate": 1.5432132256929367e-05, + "loss": 2.465, + "step": 16437 + }, + { + "epoch": 1.3266080219514165, + "grad_norm": 0.704391360282898, + "learning_rate": 1.5423707963486667e-05, + "loss": 2.433, + "step": 16438 + }, + { + "epoch": 1.3266887256879993, + "grad_norm": 0.669452965259552, + "learning_rate": 1.5415285777906253e-05, + "loss": 2.3981, + "step": 16439 + }, + { + "epoch": 1.3267694294245824, + "grad_norm": 0.6961604356765747, + "learning_rate": 1.540686570039802e-05, + "loss": 2.4684, + "step": 16440 + }, + { + "epoch": 1.3268501331611653, + "grad_norm": 0.6613924503326416, + "learning_rate": 1.539844773117185e-05, + "loss": 2.3711, + "step": 16441 + }, + { + "epoch": 1.3269308368977484, + "grad_norm": 0.7019763588905334, + "learning_rate": 1.5390031870437492e-05, + "loss": 2.3716, + "step": 16442 + }, + { + "epoch": 1.3270115406343312, + "grad_norm": 0.700176477432251, + "learning_rate": 1.5381618118404707e-05, + "loss": 2.4305, + "step": 16443 + }, + { + "epoch": 1.3270922443709143, + "grad_norm": 0.6716598272323608, + "learning_rate": 1.5373206475283197e-05, + "loss": 2.3835, + "step": 16444 + }, + { + "epoch": 1.3271729481074974, + "grad_norm": 0.6449697017669678, + "learning_rate": 1.53647969412826e-05, + "loss": 2.3707, + "step": 16445 + }, + { + "epoch": 1.3272536518440803, + "grad_norm": 0.7276685237884521, + "learning_rate": 1.535638951661249e-05, + "loss": 2.4313, + "step": 16446 + }, + { + "epoch": 1.3273343555806634, + "grad_norm": 0.7144705057144165, + "learning_rate": 1.5347984201482456e-05, + "loss": 2.4122, + "step": 16447 + }, + { + "epoch": 1.3274150593172465, + "grad_norm": 0.660225510597229, + "learning_rate": 1.53395809961019e-05, + "loss": 2.4282, + "step": 16448 + }, + { + "epoch": 1.3274957630538293, + "grad_norm": 0.7431676983833313, + "learning_rate": 1.5331179900680293e-05, + "loss": 2.3863, + "step": 16449 + }, + { + "epoch": 1.3275764667904124, + "grad_norm": 0.6670290231704712, + "learning_rate": 1.5322780915427036e-05, + "loss": 2.4266, + "step": 16450 + }, + { + "epoch": 1.3276571705269955, + "grad_norm": 0.711098313331604, + "learning_rate": 1.531438404055141e-05, + "loss": 2.4431, + "step": 16451 + }, + { + "epoch": 1.3277378742635784, + "grad_norm": 0.6908091902732849, + "learning_rate": 1.5305989276262688e-05, + "loss": 2.4153, + "step": 16452 + }, + { + "epoch": 1.3278185780001615, + "grad_norm": 0.7458107471466064, + "learning_rate": 1.5297596622770115e-05, + "loss": 2.4076, + "step": 16453 + }, + { + "epoch": 1.3278992817367445, + "grad_norm": 0.7406951189041138, + "learning_rate": 1.528920608028285e-05, + "loss": 2.3585, + "step": 16454 + }, + { + "epoch": 1.3279799854733274, + "grad_norm": 0.718824565410614, + "learning_rate": 1.5280817649010005e-05, + "loss": 2.4092, + "step": 16455 + }, + { + "epoch": 1.3280606892099105, + "grad_norm": 0.7163959741592407, + "learning_rate": 1.527243132916064e-05, + "loss": 2.4344, + "step": 16456 + }, + { + "epoch": 1.3281413929464934, + "grad_norm": 0.6695916652679443, + "learning_rate": 1.5264047120943793e-05, + "loss": 2.4144, + "step": 16457 + }, + { + "epoch": 1.3282220966830764, + "grad_norm": 0.6858509182929993, + "learning_rate": 1.5255665024568366e-05, + "loss": 2.4345, + "step": 16458 + }, + { + "epoch": 1.3283028004196593, + "grad_norm": 0.7277235388755798, + "learning_rate": 1.5247285040243297e-05, + "loss": 2.4219, + "step": 16459 + }, + { + "epoch": 1.3283835041562424, + "grad_norm": 0.6481949090957642, + "learning_rate": 1.5238907168177441e-05, + "loss": 2.4483, + "step": 16460 + }, + { + "epoch": 1.3284642078928255, + "grad_norm": 0.6956833600997925, + "learning_rate": 1.5230531408579574e-05, + "loss": 2.4241, + "step": 16461 + }, + { + "epoch": 1.3285449116294084, + "grad_norm": 0.7266185879707336, + "learning_rate": 1.522215776165845e-05, + "loss": 2.4577, + "step": 16462 + }, + { + "epoch": 1.3286256153659914, + "grad_norm": 0.725574254989624, + "learning_rate": 1.5213786227622773e-05, + "loss": 2.4451, + "step": 16463 + }, + { + "epoch": 1.3287063191025745, + "grad_norm": 0.7550850510597229, + "learning_rate": 1.5205416806681172e-05, + "loss": 2.4262, + "step": 16464 + }, + { + "epoch": 1.3287870228391574, + "grad_norm": 0.6391028761863708, + "learning_rate": 1.5197049499042237e-05, + "loss": 2.4116, + "step": 16465 + }, + { + "epoch": 1.3288677265757405, + "grad_norm": 0.6899027824401855, + "learning_rate": 1.5188684304914524e-05, + "loss": 2.3754, + "step": 16466 + }, + { + "epoch": 1.3289484303123236, + "grad_norm": 0.696681022644043, + "learning_rate": 1.518032122450649e-05, + "loss": 2.471, + "step": 16467 + }, + { + "epoch": 1.3290291340489064, + "grad_norm": 0.7090939283370972, + "learning_rate": 1.5171960258026551e-05, + "loss": 2.4153, + "step": 16468 + }, + { + "epoch": 1.3291098377854895, + "grad_norm": 0.7125746607780457, + "learning_rate": 1.5163601405683148e-05, + "loss": 2.4102, + "step": 16469 + }, + { + "epoch": 1.3291905415220726, + "grad_norm": 0.7407518029212952, + "learning_rate": 1.5155244667684531e-05, + "loss": 2.429, + "step": 16470 + }, + { + "epoch": 1.3292712452586555, + "grad_norm": 0.7401885390281677, + "learning_rate": 1.5146890044239004e-05, + "loss": 2.4577, + "step": 16471 + }, + { + "epoch": 1.3293519489952383, + "grad_norm": 0.7625757455825806, + "learning_rate": 1.5138537535554786e-05, + "loss": 2.3813, + "step": 16472 + }, + { + "epoch": 1.3294326527318214, + "grad_norm": 0.7423396706581116, + "learning_rate": 1.5130187141840057e-05, + "loss": 2.3797, + "step": 16473 + }, + { + "epoch": 1.3295133564684045, + "grad_norm": 0.7029228806495667, + "learning_rate": 1.5121838863302884e-05, + "loss": 2.4203, + "step": 16474 + }, + { + "epoch": 1.3295940602049874, + "grad_norm": 0.8062863349914551, + "learning_rate": 1.5113492700151378e-05, + "loss": 2.3743, + "step": 16475 + }, + { + "epoch": 1.3296747639415705, + "grad_norm": 0.7113343477249146, + "learning_rate": 1.5105148652593548e-05, + "loss": 2.3837, + "step": 16476 + }, + { + "epoch": 1.3297554676781536, + "grad_norm": 0.6733126044273376, + "learning_rate": 1.5096806720837309e-05, + "loss": 2.4677, + "step": 16477 + }, + { + "epoch": 1.3298361714147364, + "grad_norm": 0.6936657428741455, + "learning_rate": 1.5088466905090593e-05, + "loss": 2.3677, + "step": 16478 + }, + { + "epoch": 1.3299168751513195, + "grad_norm": 0.746746301651001, + "learning_rate": 1.5080129205561255e-05, + "loss": 2.423, + "step": 16479 + }, + { + "epoch": 1.3299975788879026, + "grad_norm": 0.6879116296768188, + "learning_rate": 1.5071793622457065e-05, + "loss": 2.4867, + "step": 16480 + }, + { + "epoch": 1.3300782826244855, + "grad_norm": 0.6841214299201965, + "learning_rate": 1.5063460155985776e-05, + "loss": 2.5015, + "step": 16481 + }, + { + "epoch": 1.3301589863610686, + "grad_norm": 0.6955111622810364, + "learning_rate": 1.5055128806355123e-05, + "loss": 2.3975, + "step": 16482 + }, + { + "epoch": 1.3302396900976516, + "grad_norm": 0.7084987163543701, + "learning_rate": 1.5046799573772673e-05, + "loss": 2.4511, + "step": 16483 + }, + { + "epoch": 1.3303203938342345, + "grad_norm": 0.6905840039253235, + "learning_rate": 1.5038472458446051e-05, + "loss": 2.3542, + "step": 16484 + }, + { + "epoch": 1.3304010975708176, + "grad_norm": 0.7182672023773193, + "learning_rate": 1.5030147460582788e-05, + "loss": 2.3673, + "step": 16485 + }, + { + "epoch": 1.3304818013074005, + "grad_norm": 0.6805183291435242, + "learning_rate": 1.5021824580390353e-05, + "loss": 2.3751, + "step": 16486 + }, + { + "epoch": 1.3305625050439835, + "grad_norm": 0.6278836727142334, + "learning_rate": 1.5013503818076202e-05, + "loss": 2.3508, + "step": 16487 + }, + { + "epoch": 1.3306432087805664, + "grad_norm": 0.664000391960144, + "learning_rate": 1.500518517384768e-05, + "loss": 2.4039, + "step": 16488 + }, + { + "epoch": 1.3307239125171495, + "grad_norm": 0.6906681060791016, + "learning_rate": 1.4996868647912155e-05, + "loss": 2.4068, + "step": 16489 + }, + { + "epoch": 1.3308046162537326, + "grad_norm": 0.6756102442741394, + "learning_rate": 1.4988554240476826e-05, + "loss": 2.4423, + "step": 16490 + }, + { + "epoch": 1.3308853199903155, + "grad_norm": 0.7013095021247864, + "learning_rate": 1.4980241951748964e-05, + "loss": 2.3536, + "step": 16491 + }, + { + "epoch": 1.3309660237268985, + "grad_norm": 0.6689851880073547, + "learning_rate": 1.4971931781935732e-05, + "loss": 2.4192, + "step": 16492 + }, + { + "epoch": 1.3310467274634816, + "grad_norm": 0.6411572694778442, + "learning_rate": 1.4963623731244202e-05, + "loss": 2.4012, + "step": 16493 + }, + { + "epoch": 1.3311274312000645, + "grad_norm": 0.7209812998771667, + "learning_rate": 1.4955317799881453e-05, + "loss": 2.378, + "step": 16494 + }, + { + "epoch": 1.3312081349366476, + "grad_norm": 0.7041119933128357, + "learning_rate": 1.4947013988054504e-05, + "loss": 2.4047, + "step": 16495 + }, + { + "epoch": 1.3312888386732307, + "grad_norm": 0.6928852796554565, + "learning_rate": 1.4938712295970292e-05, + "loss": 2.4489, + "step": 16496 + }, + { + "epoch": 1.3313695424098135, + "grad_norm": 0.6923524141311646, + "learning_rate": 1.4930412723835718e-05, + "loss": 2.3752, + "step": 16497 + }, + { + "epoch": 1.3314502461463966, + "grad_norm": 0.7034686803817749, + "learning_rate": 1.4922115271857662e-05, + "loss": 2.3898, + "step": 16498 + }, + { + "epoch": 1.3315309498829797, + "grad_norm": 0.6717320084571838, + "learning_rate": 1.4913819940242856e-05, + "loss": 2.3629, + "step": 16499 + }, + { + "epoch": 1.3316116536195626, + "grad_norm": 0.6885079741477966, + "learning_rate": 1.4905526729198083e-05, + "loss": 2.4321, + "step": 16500 + }, + { + "epoch": 1.3316923573561457, + "grad_norm": 0.662452757358551, + "learning_rate": 1.489723563893004e-05, + "loss": 2.4532, + "step": 16501 + }, + { + "epoch": 1.3317730610927285, + "grad_norm": 0.6650903224945068, + "learning_rate": 1.4888946669645332e-05, + "loss": 2.4347, + "step": 16502 + }, + { + "epoch": 1.3318537648293116, + "grad_norm": 0.7217590808868408, + "learning_rate": 1.4880659821550546e-05, + "loss": 2.4641, + "step": 16503 + }, + { + "epoch": 1.3319344685658945, + "grad_norm": 0.7063763737678528, + "learning_rate": 1.4872375094852232e-05, + "loss": 2.4365, + "step": 16504 + }, + { + "epoch": 1.3320151723024776, + "grad_norm": 0.7366454005241394, + "learning_rate": 1.4864092489756853e-05, + "loss": 2.4223, + "step": 16505 + }, + { + "epoch": 1.3320958760390607, + "grad_norm": 0.7132206559181213, + "learning_rate": 1.4855812006470838e-05, + "loss": 2.4404, + "step": 16506 + }, + { + "epoch": 1.3321765797756435, + "grad_norm": 0.665553867816925, + "learning_rate": 1.484753364520055e-05, + "loss": 2.3818, + "step": 16507 + }, + { + "epoch": 1.3322572835122266, + "grad_norm": 0.7854028344154358, + "learning_rate": 1.483925740615234e-05, + "loss": 2.4111, + "step": 16508 + }, + { + "epoch": 1.3323379872488097, + "grad_norm": 0.7331317663192749, + "learning_rate": 1.4830983289532418e-05, + "loss": 2.4446, + "step": 16509 + }, + { + "epoch": 1.3324186909853926, + "grad_norm": 0.670315146446228, + "learning_rate": 1.4822711295547042e-05, + "loss": 2.4017, + "step": 16510 + }, + { + "epoch": 1.3324993947219756, + "grad_norm": 0.7242144346237183, + "learning_rate": 1.481444142440237e-05, + "loss": 2.4281, + "step": 16511 + }, + { + "epoch": 1.3325800984585587, + "grad_norm": 0.7108538746833801, + "learning_rate": 1.4806173676304468e-05, + "loss": 2.4331, + "step": 16512 + }, + { + "epoch": 1.3326608021951416, + "grad_norm": 0.658989667892456, + "learning_rate": 1.479790805145943e-05, + "loss": 2.4321, + "step": 16513 + }, + { + "epoch": 1.3327415059317247, + "grad_norm": 0.6596404314041138, + "learning_rate": 1.4789644550073233e-05, + "loss": 2.3817, + "step": 16514 + }, + { + "epoch": 1.3328222096683078, + "grad_norm": 0.6922028064727783, + "learning_rate": 1.4781383172351837e-05, + "loss": 2.399, + "step": 16515 + }, + { + "epoch": 1.3329029134048906, + "grad_norm": 0.750747799873352, + "learning_rate": 1.4773123918501141e-05, + "loss": 2.4502, + "step": 16516 + }, + { + "epoch": 1.3329836171414737, + "grad_norm": 0.6887632608413696, + "learning_rate": 1.4764866788727006e-05, + "loss": 2.3636, + "step": 16517 + }, + { + "epoch": 1.3330643208780566, + "grad_norm": 0.6751166582107544, + "learning_rate": 1.4756611783235163e-05, + "loss": 2.3956, + "step": 16518 + }, + { + "epoch": 1.3331450246146397, + "grad_norm": 0.679040253162384, + "learning_rate": 1.4748358902231395e-05, + "loss": 2.4044, + "step": 16519 + }, + { + "epoch": 1.3332257283512225, + "grad_norm": 0.6396780610084534, + "learning_rate": 1.4740108145921373e-05, + "loss": 2.4114, + "step": 16520 + }, + { + "epoch": 1.3333064320878056, + "grad_norm": 0.6686230301856995, + "learning_rate": 1.4731859514510738e-05, + "loss": 2.4535, + "step": 16521 + }, + { + "epoch": 1.3333871358243887, + "grad_norm": 0.6693681478500366, + "learning_rate": 1.472361300820505e-05, + "loss": 2.3885, + "step": 16522 + }, + { + "epoch": 1.3334678395609716, + "grad_norm": 0.7700718641281128, + "learning_rate": 1.4715368627209836e-05, + "loss": 2.3939, + "step": 16523 + }, + { + "epoch": 1.3335485432975547, + "grad_norm": 0.7203121781349182, + "learning_rate": 1.4707126371730561e-05, + "loss": 2.4644, + "step": 16524 + }, + { + "epoch": 1.3336292470341378, + "grad_norm": 0.7798308730125427, + "learning_rate": 1.4698886241972665e-05, + "loss": 2.4293, + "step": 16525 + }, + { + "epoch": 1.3337099507707206, + "grad_norm": 0.7017160654067993, + "learning_rate": 1.4690648238141503e-05, + "loss": 2.4327, + "step": 16526 + }, + { + "epoch": 1.3337906545073037, + "grad_norm": 0.6522603631019592, + "learning_rate": 1.468241236044241e-05, + "loss": 2.3955, + "step": 16527 + }, + { + "epoch": 1.3338713582438868, + "grad_norm": 0.766222357749939, + "learning_rate": 1.4674178609080602e-05, + "loss": 2.4652, + "step": 16528 + }, + { + "epoch": 1.3339520619804697, + "grad_norm": 0.7351565361022949, + "learning_rate": 1.4665946984261303e-05, + "loss": 2.4607, + "step": 16529 + }, + { + "epoch": 1.3340327657170528, + "grad_norm": 0.6817728281021118, + "learning_rate": 1.4657717486189693e-05, + "loss": 2.3687, + "step": 16530 + }, + { + "epoch": 1.3341134694536356, + "grad_norm": 0.7401643395423889, + "learning_rate": 1.464949011507083e-05, + "loss": 2.4179, + "step": 16531 + }, + { + "epoch": 1.3341941731902187, + "grad_norm": 0.7783530354499817, + "learning_rate": 1.4641264871109784e-05, + "loss": 2.4088, + "step": 16532 + }, + { + "epoch": 1.3342748769268016, + "grad_norm": 0.6761943697929382, + "learning_rate": 1.4633041754511534e-05, + "loss": 2.4141, + "step": 16533 + }, + { + "epoch": 1.3343555806633847, + "grad_norm": 0.6842260360717773, + "learning_rate": 1.4624820765481073e-05, + "loss": 2.4918, + "step": 16534 + }, + { + "epoch": 1.3344362843999678, + "grad_norm": 0.6906094551086426, + "learning_rate": 1.4616601904223225e-05, + "loss": 2.4576, + "step": 16535 + }, + { + "epoch": 1.3345169881365506, + "grad_norm": 0.6549125909805298, + "learning_rate": 1.4608385170942829e-05, + "loss": 2.3748, + "step": 16536 + }, + { + "epoch": 1.3345976918731337, + "grad_norm": 0.6603896617889404, + "learning_rate": 1.4600170565844728e-05, + "loss": 2.3739, + "step": 16537 + }, + { + "epoch": 1.3346783956097168, + "grad_norm": 0.6413096189498901, + "learning_rate": 1.4591958089133606e-05, + "loss": 2.3979, + "step": 16538 + }, + { + "epoch": 1.3347590993462997, + "grad_norm": 0.7085204720497131, + "learning_rate": 1.4583747741014142e-05, + "loss": 2.4185, + "step": 16539 + }, + { + "epoch": 1.3348398030828827, + "grad_norm": 0.6517937183380127, + "learning_rate": 1.4575539521690983e-05, + "loss": 2.3938, + "step": 16540 + }, + { + "epoch": 1.3349205068194658, + "grad_norm": 0.6326449513435364, + "learning_rate": 1.4567333431368658e-05, + "loss": 2.4613, + "step": 16541 + }, + { + "epoch": 1.3350012105560487, + "grad_norm": 0.8046317100524902, + "learning_rate": 1.4559129470251708e-05, + "loss": 2.4547, + "step": 16542 + }, + { + "epoch": 1.3350819142926318, + "grad_norm": 0.6661570072174072, + "learning_rate": 1.455092763854462e-05, + "loss": 2.3636, + "step": 16543 + }, + { + "epoch": 1.3351626180292149, + "grad_norm": 0.6806541085243225, + "learning_rate": 1.454272793645176e-05, + "loss": 2.4309, + "step": 16544 + }, + { + "epoch": 1.3352433217657977, + "grad_norm": 0.651836097240448, + "learning_rate": 1.45345303641775e-05, + "loss": 2.3862, + "step": 16545 + }, + { + "epoch": 1.3353240255023808, + "grad_norm": 0.7448983192443848, + "learning_rate": 1.4526334921926165e-05, + "loss": 2.4654, + "step": 16546 + }, + { + "epoch": 1.3354047292389637, + "grad_norm": 0.6885285973548889, + "learning_rate": 1.4518141609901992e-05, + "loss": 2.3943, + "step": 16547 + }, + { + "epoch": 1.3354854329755468, + "grad_norm": 0.7204004526138306, + "learning_rate": 1.450995042830917e-05, + "loss": 2.4117, + "step": 16548 + }, + { + "epoch": 1.3355661367121296, + "grad_norm": 0.6551961898803711, + "learning_rate": 1.4501761377351864e-05, + "loss": 2.4269, + "step": 16549 + }, + { + "epoch": 1.3356468404487127, + "grad_norm": 0.7191253304481506, + "learning_rate": 1.4493574457234182e-05, + "loss": 2.3472, + "step": 16550 + }, + { + "epoch": 1.3357275441852958, + "grad_norm": 0.6793580651283264, + "learning_rate": 1.4485389668160121e-05, + "loss": 2.4264, + "step": 16551 + }, + { + "epoch": 1.3358082479218787, + "grad_norm": 0.704250693321228, + "learning_rate": 1.4477207010333682e-05, + "loss": 2.5236, + "step": 16552 + }, + { + "epoch": 1.3358889516584618, + "grad_norm": 0.6826470494270325, + "learning_rate": 1.4469026483958837e-05, + "loss": 2.4473, + "step": 16553 + }, + { + "epoch": 1.3359696553950449, + "grad_norm": 0.6646167039871216, + "learning_rate": 1.4460848089239399e-05, + "loss": 2.4232, + "step": 16554 + }, + { + "epoch": 1.3360503591316277, + "grad_norm": 0.7604451179504395, + "learning_rate": 1.4452671826379227e-05, + "loss": 2.4208, + "step": 16555 + }, + { + "epoch": 1.3361310628682108, + "grad_norm": 0.7129300236701965, + "learning_rate": 1.4444497695582093e-05, + "loss": 2.4304, + "step": 16556 + }, + { + "epoch": 1.336211766604794, + "grad_norm": 0.6769927740097046, + "learning_rate": 1.4436325697051733e-05, + "loss": 2.3467, + "step": 16557 + }, + { + "epoch": 1.3362924703413768, + "grad_norm": 0.6568608283996582, + "learning_rate": 1.4428155830991797e-05, + "loss": 2.4285, + "step": 16558 + }, + { + "epoch": 1.3363731740779599, + "grad_norm": 0.7687276005744934, + "learning_rate": 1.4419988097605919e-05, + "loss": 2.4815, + "step": 16559 + }, + { + "epoch": 1.336453877814543, + "grad_norm": 0.7001463770866394, + "learning_rate": 1.4411822497097638e-05, + "loss": 2.4629, + "step": 16560 + }, + { + "epoch": 1.3365345815511258, + "grad_norm": 0.7211995720863342, + "learning_rate": 1.4403659029670458e-05, + "loss": 2.4323, + "step": 16561 + }, + { + "epoch": 1.336615285287709, + "grad_norm": 0.7371769547462463, + "learning_rate": 1.439549769552787e-05, + "loss": 2.3962, + "step": 16562 + }, + { + "epoch": 1.3366959890242918, + "grad_norm": 0.7475463151931763, + "learning_rate": 1.4387338494873237e-05, + "loss": 2.3593, + "step": 16563 + }, + { + "epoch": 1.3367766927608749, + "grad_norm": 0.7215834856033325, + "learning_rate": 1.4379181427909916e-05, + "loss": 2.3687, + "step": 16564 + }, + { + "epoch": 1.3368573964974577, + "grad_norm": 0.7160200476646423, + "learning_rate": 1.4371026494841211e-05, + "loss": 2.3652, + "step": 16565 + }, + { + "epoch": 1.3369381002340408, + "grad_norm": 0.6636231541633606, + "learning_rate": 1.436287369587036e-05, + "loss": 2.4628, + "step": 16566 + }, + { + "epoch": 1.337018803970624, + "grad_norm": 0.657774806022644, + "learning_rate": 1.4354723031200556e-05, + "loss": 2.4082, + "step": 16567 + }, + { + "epoch": 1.3370995077072068, + "grad_norm": 0.7020300626754761, + "learning_rate": 1.4346574501034936e-05, + "loss": 2.3821, + "step": 16568 + }, + { + "epoch": 1.3371802114437898, + "grad_norm": 0.6800786256790161, + "learning_rate": 1.4338428105576595e-05, + "loss": 2.3839, + "step": 16569 + }, + { + "epoch": 1.337260915180373, + "grad_norm": 0.7176932692527771, + "learning_rate": 1.4330283845028536e-05, + "loss": 2.4614, + "step": 16570 + }, + { + "epoch": 1.3373416189169558, + "grad_norm": 0.7233355641365051, + "learning_rate": 1.432214171959374e-05, + "loss": 2.4048, + "step": 16571 + }, + { + "epoch": 1.3374223226535389, + "grad_norm": 0.7721874117851257, + "learning_rate": 1.4314001729475157e-05, + "loss": 2.4169, + "step": 16572 + }, + { + "epoch": 1.337503026390122, + "grad_norm": 0.7123380303382874, + "learning_rate": 1.4305863874875613e-05, + "loss": 2.3799, + "step": 16573 + }, + { + "epoch": 1.3375837301267048, + "grad_norm": 0.7297765016555786, + "learning_rate": 1.4297728155997958e-05, + "loss": 2.4655, + "step": 16574 + }, + { + "epoch": 1.337664433863288, + "grad_norm": 0.6806401610374451, + "learning_rate": 1.428959457304493e-05, + "loss": 2.4102, + "step": 16575 + }, + { + "epoch": 1.3377451375998708, + "grad_norm": 0.6811275482177734, + "learning_rate": 1.4281463126219264e-05, + "loss": 2.4298, + "step": 16576 + }, + { + "epoch": 1.3378258413364539, + "grad_norm": 0.6900678277015686, + "learning_rate": 1.427333381572361e-05, + "loss": 2.4745, + "step": 16577 + }, + { + "epoch": 1.3379065450730367, + "grad_norm": 0.7815307974815369, + "learning_rate": 1.4265206641760587e-05, + "loss": 2.3624, + "step": 16578 + }, + { + "epoch": 1.3379872488096198, + "grad_norm": 0.6948800683021545, + "learning_rate": 1.4257081604532708e-05, + "loss": 2.4142, + "step": 16579 + }, + { + "epoch": 1.338067952546203, + "grad_norm": 0.7387657165527344, + "learning_rate": 1.4248958704242488e-05, + "loss": 2.4241, + "step": 16580 + }, + { + "epoch": 1.3381486562827858, + "grad_norm": 0.7158597111701965, + "learning_rate": 1.4240837941092367e-05, + "loss": 2.4473, + "step": 16581 + }, + { + "epoch": 1.3382293600193689, + "grad_norm": 0.758674144744873, + "learning_rate": 1.423271931528477e-05, + "loss": 2.4504, + "step": 16582 + }, + { + "epoch": 1.338310063755952, + "grad_norm": 0.6904417872428894, + "learning_rate": 1.4224602827021982e-05, + "loss": 2.4288, + "step": 16583 + }, + { + "epoch": 1.3383907674925348, + "grad_norm": 0.6988760828971863, + "learning_rate": 1.4216488476506307e-05, + "loss": 2.3874, + "step": 16584 + }, + { + "epoch": 1.338471471229118, + "grad_norm": 0.6969872117042542, + "learning_rate": 1.4208376263940003e-05, + "loss": 2.3388, + "step": 16585 + }, + { + "epoch": 1.338552174965701, + "grad_norm": 0.687179684638977, + "learning_rate": 1.420026618952518e-05, + "loss": 2.431, + "step": 16586 + }, + { + "epoch": 1.3386328787022839, + "grad_norm": 0.6319810152053833, + "learning_rate": 1.4192158253464038e-05, + "loss": 2.4415, + "step": 16587 + }, + { + "epoch": 1.338713582438867, + "grad_norm": 0.7554977536201477, + "learning_rate": 1.4184052455958629e-05, + "loss": 2.3863, + "step": 16588 + }, + { + "epoch": 1.33879428617545, + "grad_norm": 0.7025974988937378, + "learning_rate": 1.4175948797210936e-05, + "loss": 2.3957, + "step": 16589 + }, + { + "epoch": 1.338874989912033, + "grad_norm": 0.7270370721817017, + "learning_rate": 1.4167847277422952e-05, + "loss": 2.4309, + "step": 16590 + }, + { + "epoch": 1.338955693648616, + "grad_norm": 0.7017608284950256, + "learning_rate": 1.4159747896796593e-05, + "loss": 2.4142, + "step": 16591 + }, + { + "epoch": 1.3390363973851989, + "grad_norm": 0.7114055156707764, + "learning_rate": 1.4151650655533687e-05, + "loss": 2.473, + "step": 16592 + }, + { + "epoch": 1.339117101121782, + "grad_norm": 0.6420357823371887, + "learning_rate": 1.4143555553836063e-05, + "loss": 2.3671, + "step": 16593 + }, + { + "epoch": 1.3391978048583648, + "grad_norm": 0.7067350745201111, + "learning_rate": 1.413546259190548e-05, + "loss": 2.4422, + "step": 16594 + }, + { + "epoch": 1.339278508594948, + "grad_norm": 0.7376763224601746, + "learning_rate": 1.4127371769943598e-05, + "loss": 2.4443, + "step": 16595 + }, + { + "epoch": 1.339359212331531, + "grad_norm": 0.646515965461731, + "learning_rate": 1.4119283088152092e-05, + "loss": 2.3949, + "step": 16596 + }, + { + "epoch": 1.3394399160681139, + "grad_norm": 0.6896061301231384, + "learning_rate": 1.411119654673254e-05, + "loss": 2.4535, + "step": 16597 + }, + { + "epoch": 1.339520619804697, + "grad_norm": 0.6992611289024353, + "learning_rate": 1.4103112145886489e-05, + "loss": 2.3983, + "step": 16598 + }, + { + "epoch": 1.33960132354128, + "grad_norm": 0.7176348567008972, + "learning_rate": 1.4095029885815426e-05, + "loss": 2.4671, + "step": 16599 + }, + { + "epoch": 1.339682027277863, + "grad_norm": 0.6635856628417969, + "learning_rate": 1.4086949766720759e-05, + "loss": 2.4235, + "step": 16600 + }, + { + "epoch": 1.339762731014446, + "grad_norm": 0.673332154750824, + "learning_rate": 1.4078871788803915e-05, + "loss": 2.4328, + "step": 16601 + }, + { + "epoch": 1.339843434751029, + "grad_norm": 0.6738821864128113, + "learning_rate": 1.407079595226617e-05, + "loss": 2.4786, + "step": 16602 + }, + { + "epoch": 1.339924138487612, + "grad_norm": 0.690605103969574, + "learning_rate": 1.4062722257308803e-05, + "loss": 2.4025, + "step": 16603 + }, + { + "epoch": 1.340004842224195, + "grad_norm": 0.7186758518218994, + "learning_rate": 1.4054650704133066e-05, + "loss": 2.4793, + "step": 16604 + }, + { + "epoch": 1.3400855459607781, + "grad_norm": 0.6484951376914978, + "learning_rate": 1.4046581292940075e-05, + "loss": 2.3855, + "step": 16605 + }, + { + "epoch": 1.340166249697361, + "grad_norm": 0.6993771195411682, + "learning_rate": 1.403851402393096e-05, + "loss": 2.3872, + "step": 16606 + }, + { + "epoch": 1.340246953433944, + "grad_norm": 0.7446531653404236, + "learning_rate": 1.403044889730678e-05, + "loss": 2.4253, + "step": 16607 + }, + { + "epoch": 1.340327657170527, + "grad_norm": 0.6873160004615784, + "learning_rate": 1.4022385913268542e-05, + "loss": 2.464, + "step": 16608 + }, + { + "epoch": 1.34040836090711, + "grad_norm": 0.6570948362350464, + "learning_rate": 1.4014325072017198e-05, + "loss": 2.4063, + "step": 16609 + }, + { + "epoch": 1.3404890646436929, + "grad_norm": 0.7209224104881287, + "learning_rate": 1.4006266373753651e-05, + "loss": 2.4827, + "step": 16610 + }, + { + "epoch": 1.340569768380276, + "grad_norm": 0.7283413410186768, + "learning_rate": 1.3998209818678732e-05, + "loss": 2.4009, + "step": 16611 + }, + { + "epoch": 1.340650472116859, + "grad_norm": 0.6650960445404053, + "learning_rate": 1.3990155406993221e-05, + "loss": 2.3576, + "step": 16612 + }, + { + "epoch": 1.340731175853442, + "grad_norm": 0.6857860088348389, + "learning_rate": 1.3982103138897873e-05, + "loss": 2.4686, + "step": 16613 + }, + { + "epoch": 1.340811879590025, + "grad_norm": 0.7065873146057129, + "learning_rate": 1.3974053014593402e-05, + "loss": 2.3999, + "step": 16614 + }, + { + "epoch": 1.340892583326608, + "grad_norm": 0.8093010783195496, + "learning_rate": 1.3966005034280372e-05, + "loss": 2.4273, + "step": 16615 + }, + { + "epoch": 1.340973287063191, + "grad_norm": 0.649132251739502, + "learning_rate": 1.3957959198159387e-05, + "loss": 2.3418, + "step": 16616 + }, + { + "epoch": 1.341053990799774, + "grad_norm": 0.7114978432655334, + "learning_rate": 1.3949915506430976e-05, + "loss": 2.4393, + "step": 16617 + }, + { + "epoch": 1.3411346945363571, + "grad_norm": 0.7989282608032227, + "learning_rate": 1.3941873959295615e-05, + "loss": 2.4044, + "step": 16618 + }, + { + "epoch": 1.34121539827294, + "grad_norm": 0.7373676896095276, + "learning_rate": 1.3933834556953707e-05, + "loss": 2.4758, + "step": 16619 + }, + { + "epoch": 1.341296102009523, + "grad_norm": 0.7076435089111328, + "learning_rate": 1.3925797299605647e-05, + "loss": 2.4429, + "step": 16620 + }, + { + "epoch": 1.3413768057461062, + "grad_norm": 0.6739028692245483, + "learning_rate": 1.39177621874517e-05, + "loss": 2.4275, + "step": 16621 + }, + { + "epoch": 1.341457509482689, + "grad_norm": 0.7134198546409607, + "learning_rate": 1.3909729220692125e-05, + "loss": 2.4541, + "step": 16622 + }, + { + "epoch": 1.3415382132192721, + "grad_norm": 0.6770301461219788, + "learning_rate": 1.3901698399527175e-05, + "loss": 2.4143, + "step": 16623 + }, + { + "epoch": 1.341618916955855, + "grad_norm": 0.7146373987197876, + "learning_rate": 1.3893669724156943e-05, + "loss": 2.4886, + "step": 16624 + }, + { + "epoch": 1.341699620692438, + "grad_norm": 0.6801536083221436, + "learning_rate": 1.3885643194781539e-05, + "loss": 2.4154, + "step": 16625 + }, + { + "epoch": 1.341780324429021, + "grad_norm": 0.7350363731384277, + "learning_rate": 1.3877618811601024e-05, + "loss": 2.3918, + "step": 16626 + }, + { + "epoch": 1.341861028165604, + "grad_norm": 0.7088882327079773, + "learning_rate": 1.3869596574815358e-05, + "loss": 2.412, + "step": 16627 + }, + { + "epoch": 1.3419417319021871, + "grad_norm": 0.7199791669845581, + "learning_rate": 1.3861576484624506e-05, + "loss": 2.3912, + "step": 16628 + }, + { + "epoch": 1.34202243563877, + "grad_norm": 0.692971408367157, + "learning_rate": 1.3853558541228328e-05, + "loss": 2.3826, + "step": 16629 + }, + { + "epoch": 1.342103139375353, + "grad_norm": 0.7524722814559937, + "learning_rate": 1.3845542744826679e-05, + "loss": 2.4227, + "step": 16630 + }, + { + "epoch": 1.3421838431119362, + "grad_norm": 0.6624585390090942, + "learning_rate": 1.3837529095619307e-05, + "loss": 2.3649, + "step": 16631 + }, + { + "epoch": 1.342264546848519, + "grad_norm": 0.6884489059448242, + "learning_rate": 1.3829517593805929e-05, + "loss": 2.3687, + "step": 16632 + }, + { + "epoch": 1.3423452505851021, + "grad_norm": 0.6766197085380554, + "learning_rate": 1.3821508239586246e-05, + "loss": 2.4191, + "step": 16633 + }, + { + "epoch": 1.3424259543216852, + "grad_norm": 0.6744453310966492, + "learning_rate": 1.3813501033159837e-05, + "loss": 2.4254, + "step": 16634 + }, + { + "epoch": 1.342506658058268, + "grad_norm": 0.6906216144561768, + "learning_rate": 1.3805495974726267e-05, + "loss": 2.4763, + "step": 16635 + }, + { + "epoch": 1.3425873617948512, + "grad_norm": 0.7052608132362366, + "learning_rate": 1.3797493064485078e-05, + "loss": 2.4307, + "step": 16636 + }, + { + "epoch": 1.342668065531434, + "grad_norm": 0.6701127290725708, + "learning_rate": 1.3789492302635653e-05, + "loss": 2.4529, + "step": 16637 + }, + { + "epoch": 1.3427487692680171, + "grad_norm": 0.7440397143363953, + "learning_rate": 1.3781493689377455e-05, + "loss": 2.4471, + "step": 16638 + }, + { + "epoch": 1.3428294730046, + "grad_norm": 0.7340207695960999, + "learning_rate": 1.3773497224909848e-05, + "loss": 2.4434, + "step": 16639 + }, + { + "epoch": 1.342910176741183, + "grad_norm": 0.6836793422698975, + "learning_rate": 1.376550290943205e-05, + "loss": 2.4072, + "step": 16640 + }, + { + "epoch": 1.3429908804777662, + "grad_norm": 0.6820472478866577, + "learning_rate": 1.3757510743143342e-05, + "loss": 2.4078, + "step": 16641 + }, + { + "epoch": 1.343071584214349, + "grad_norm": 0.6608061194419861, + "learning_rate": 1.3749520726242938e-05, + "loss": 2.3995, + "step": 16642 + }, + { + "epoch": 1.3431522879509321, + "grad_norm": 0.6582421064376831, + "learning_rate": 1.3741532858929906e-05, + "loss": 2.3768, + "step": 16643 + }, + { + "epoch": 1.3432329916875152, + "grad_norm": 0.7032744288444519, + "learning_rate": 1.3733547141403358e-05, + "loss": 2.4367, + "step": 16644 + }, + { + "epoch": 1.343313695424098, + "grad_norm": 0.7149307727813721, + "learning_rate": 1.3725563573862321e-05, + "loss": 2.4425, + "step": 16645 + }, + { + "epoch": 1.3433943991606812, + "grad_norm": 0.7375392913818359, + "learning_rate": 1.3717582156505793e-05, + "loss": 2.409, + "step": 16646 + }, + { + "epoch": 1.3434751028972642, + "grad_norm": 0.8422170877456665, + "learning_rate": 1.3709602889532624e-05, + "loss": 2.4758, + "step": 16647 + }, + { + "epoch": 1.343555806633847, + "grad_norm": 0.6542177796363831, + "learning_rate": 1.3701625773141712e-05, + "loss": 2.4199, + "step": 16648 + }, + { + "epoch": 1.3436365103704302, + "grad_norm": 0.6639342904090881, + "learning_rate": 1.3693650807531898e-05, + "loss": 2.4366, + "step": 16649 + }, + { + "epoch": 1.3437172141070133, + "grad_norm": 0.7270925045013428, + "learning_rate": 1.3685677992901901e-05, + "loss": 2.3745, + "step": 16650 + }, + { + "epoch": 1.3437979178435961, + "grad_norm": 0.7325547337532043, + "learning_rate": 1.367770732945044e-05, + "loss": 2.5053, + "step": 16651 + }, + { + "epoch": 1.3438786215801792, + "grad_norm": 0.7752320766448975, + "learning_rate": 1.3669738817376177e-05, + "loss": 2.4505, + "step": 16652 + }, + { + "epoch": 1.343959325316762, + "grad_norm": 0.6538182497024536, + "learning_rate": 1.3661772456877675e-05, + "loss": 2.4164, + "step": 16653 + }, + { + "epoch": 1.3440400290533452, + "grad_norm": 0.6886051297187805, + "learning_rate": 1.3653808248153487e-05, + "loss": 2.4156, + "step": 16654 + }, + { + "epoch": 1.344120732789928, + "grad_norm": 0.6990679502487183, + "learning_rate": 1.3645846191402134e-05, + "loss": 2.418, + "step": 16655 + }, + { + "epoch": 1.3442014365265111, + "grad_norm": 0.7006608247756958, + "learning_rate": 1.3637886286821999e-05, + "loss": 2.3987, + "step": 16656 + }, + { + "epoch": 1.3442821402630942, + "grad_norm": 0.6858758926391602, + "learning_rate": 1.3629928534611502e-05, + "loss": 2.3571, + "step": 16657 + }, + { + "epoch": 1.344362843999677, + "grad_norm": 0.7273774147033691, + "learning_rate": 1.3621972934968951e-05, + "loss": 2.4141, + "step": 16658 + }, + { + "epoch": 1.3444435477362602, + "grad_norm": 0.6770352721214294, + "learning_rate": 1.3614019488092633e-05, + "loss": 2.4602, + "step": 16659 + }, + { + "epoch": 1.3445242514728433, + "grad_norm": 0.7473095655441284, + "learning_rate": 1.3606068194180766e-05, + "loss": 2.3884, + "step": 16660 + }, + { + "epoch": 1.3446049552094261, + "grad_norm": 0.7271387577056885, + "learning_rate": 1.3598119053431512e-05, + "loss": 2.4705, + "step": 16661 + }, + { + "epoch": 1.3446856589460092, + "grad_norm": 0.658349335193634, + "learning_rate": 1.3590172066043006e-05, + "loss": 2.4271, + "step": 16662 + }, + { + "epoch": 1.3447663626825923, + "grad_norm": 0.6479319930076599, + "learning_rate": 1.3582227232213273e-05, + "loss": 2.3428, + "step": 16663 + }, + { + "epoch": 1.3448470664191752, + "grad_norm": 0.700951874256134, + "learning_rate": 1.3574284552140337e-05, + "loss": 2.4926, + "step": 16664 + }, + { + "epoch": 1.3449277701557583, + "grad_norm": 0.6699960231781006, + "learning_rate": 1.3566344026022171e-05, + "loss": 2.4372, + "step": 16665 + }, + { + "epoch": 1.3450084738923413, + "grad_norm": 0.6743033528327942, + "learning_rate": 1.3558405654056617e-05, + "loss": 2.4142, + "step": 16666 + }, + { + "epoch": 1.3450891776289242, + "grad_norm": 0.6619464755058289, + "learning_rate": 1.355046943644157e-05, + "loss": 2.4099, + "step": 16667 + }, + { + "epoch": 1.3451698813655073, + "grad_norm": 0.668084442615509, + "learning_rate": 1.3542535373374798e-05, + "loss": 2.3895, + "step": 16668 + }, + { + "epoch": 1.3452505851020902, + "grad_norm": 0.7954626679420471, + "learning_rate": 1.3534603465054052e-05, + "loss": 2.479, + "step": 16669 + }, + { + "epoch": 1.3453312888386733, + "grad_norm": 0.6742919683456421, + "learning_rate": 1.3526673711677008e-05, + "loss": 2.4289, + "step": 16670 + }, + { + "epoch": 1.3454119925752561, + "grad_norm": 0.6564723253250122, + "learning_rate": 1.3518746113441316e-05, + "loss": 2.404, + "step": 16671 + }, + { + "epoch": 1.3454926963118392, + "grad_norm": 0.6955705881118774, + "learning_rate": 1.3510820670544521e-05, + "loss": 2.4274, + "step": 16672 + }, + { + "epoch": 1.3455734000484223, + "grad_norm": 0.6687749028205872, + "learning_rate": 1.3502897383184154e-05, + "loss": 2.4564, + "step": 16673 + }, + { + "epoch": 1.3456541037850052, + "grad_norm": 0.7984250783920288, + "learning_rate": 1.34949762515577e-05, + "loss": 2.3426, + "step": 16674 + }, + { + "epoch": 1.3457348075215882, + "grad_norm": 0.7334223389625549, + "learning_rate": 1.348705727586258e-05, + "loss": 2.4712, + "step": 16675 + }, + { + "epoch": 1.3458155112581713, + "grad_norm": 0.6732765436172485, + "learning_rate": 1.3479140456296114e-05, + "loss": 2.424, + "step": 16676 + }, + { + "epoch": 1.3458962149947542, + "grad_norm": 0.7944334149360657, + "learning_rate": 1.3471225793055641e-05, + "loss": 2.3951, + "step": 16677 + }, + { + "epoch": 1.3459769187313373, + "grad_norm": 0.6829007863998413, + "learning_rate": 1.3463313286338408e-05, + "loss": 2.4158, + "step": 16678 + }, + { + "epoch": 1.3460576224679204, + "grad_norm": 0.7019640207290649, + "learning_rate": 1.345540293634161e-05, + "loss": 2.4093, + "step": 16679 + }, + { + "epoch": 1.3461383262045032, + "grad_norm": 0.6839374303817749, + "learning_rate": 1.3447494743262412e-05, + "loss": 2.3959, + "step": 16680 + }, + { + "epoch": 1.3462190299410863, + "grad_norm": 0.7211155295372009, + "learning_rate": 1.3439588707297911e-05, + "loss": 2.4052, + "step": 16681 + }, + { + "epoch": 1.3462997336776692, + "grad_norm": 0.73811274766922, + "learning_rate": 1.3431684828645109e-05, + "loss": 2.4179, + "step": 16682 + }, + { + "epoch": 1.3463804374142523, + "grad_norm": 0.6634721159934998, + "learning_rate": 1.3423783107501009e-05, + "loss": 2.379, + "step": 16683 + }, + { + "epoch": 1.3464611411508352, + "grad_norm": 0.6884057521820068, + "learning_rate": 1.3415883544062579e-05, + "loss": 2.4144, + "step": 16684 + }, + { + "epoch": 1.3465418448874182, + "grad_norm": 0.7239587306976318, + "learning_rate": 1.340798613852664e-05, + "loss": 2.3856, + "step": 16685 + }, + { + "epoch": 1.3466225486240013, + "grad_norm": 0.7201077342033386, + "learning_rate": 1.3400090891090033e-05, + "loss": 2.4552, + "step": 16686 + }, + { + "epoch": 1.3467032523605842, + "grad_norm": 0.7049584984779358, + "learning_rate": 1.3392197801949558e-05, + "loss": 2.4424, + "step": 16687 + }, + { + "epoch": 1.3467839560971673, + "grad_norm": 0.7240790128707886, + "learning_rate": 1.3384306871301877e-05, + "loss": 2.4156, + "step": 16688 + }, + { + "epoch": 1.3468646598337504, + "grad_norm": 0.7276458740234375, + "learning_rate": 1.337641809934369e-05, + "loss": 2.3882, + "step": 16689 + }, + { + "epoch": 1.3469453635703332, + "grad_norm": 0.6650896072387695, + "learning_rate": 1.3368531486271607e-05, + "loss": 2.396, + "step": 16690 + }, + { + "epoch": 1.3470260673069163, + "grad_norm": 0.6946447491645813, + "learning_rate": 1.3360647032282203e-05, + "loss": 2.3779, + "step": 16691 + }, + { + "epoch": 1.3471067710434994, + "grad_norm": 0.7507699728012085, + "learning_rate": 1.3352764737571932e-05, + "loss": 2.4378, + "step": 16692 + }, + { + "epoch": 1.3471874747800823, + "grad_norm": 0.6548876762390137, + "learning_rate": 1.334488460233725e-05, + "loss": 2.4181, + "step": 16693 + }, + { + "epoch": 1.3472681785166654, + "grad_norm": 0.7000874280929565, + "learning_rate": 1.3337006626774595e-05, + "loss": 2.4463, + "step": 16694 + }, + { + "epoch": 1.3473488822532484, + "grad_norm": 0.6487517356872559, + "learning_rate": 1.3329130811080249e-05, + "loss": 2.3703, + "step": 16695 + }, + { + "epoch": 1.3474295859898313, + "grad_norm": 0.6447827219963074, + "learning_rate": 1.3321257155450517e-05, + "loss": 2.3779, + "step": 16696 + }, + { + "epoch": 1.3475102897264144, + "grad_norm": 0.6309572458267212, + "learning_rate": 1.3313385660081667e-05, + "loss": 2.4443, + "step": 16697 + }, + { + "epoch": 1.3475909934629973, + "grad_norm": 0.6366227865219116, + "learning_rate": 1.330551632516982e-05, + "loss": 2.3418, + "step": 16698 + }, + { + "epoch": 1.3476716971995804, + "grad_norm": 0.6864019632339478, + "learning_rate": 1.3297649150911117e-05, + "loss": 2.4416, + "step": 16699 + }, + { + "epoch": 1.3477524009361632, + "grad_norm": 0.6807940006256104, + "learning_rate": 1.3289784137501671e-05, + "loss": 2.4465, + "step": 16700 + }, + { + "epoch": 1.3478331046727463, + "grad_norm": 0.6991185545921326, + "learning_rate": 1.3281921285137455e-05, + "loss": 2.3929, + "step": 16701 + }, + { + "epoch": 1.3479138084093294, + "grad_norm": 0.691908061504364, + "learning_rate": 1.3274060594014437e-05, + "loss": 2.4237, + "step": 16702 + }, + { + "epoch": 1.3479945121459123, + "grad_norm": 0.6909685730934143, + "learning_rate": 1.3266202064328548e-05, + "loss": 2.3695, + "step": 16703 + }, + { + "epoch": 1.3480752158824953, + "grad_norm": 0.6473715901374817, + "learning_rate": 1.325834569627562e-05, + "loss": 2.384, + "step": 16704 + }, + { + "epoch": 1.3481559196190784, + "grad_norm": 0.7433453798294067, + "learning_rate": 1.3250491490051454e-05, + "loss": 2.4546, + "step": 16705 + }, + { + "epoch": 1.3482366233556613, + "grad_norm": 0.7432501316070557, + "learning_rate": 1.3242639445851812e-05, + "loss": 2.4204, + "step": 16706 + }, + { + "epoch": 1.3483173270922444, + "grad_norm": 0.6661228537559509, + "learning_rate": 1.3234789563872397e-05, + "loss": 2.4454, + "step": 16707 + }, + { + "epoch": 1.3483980308288275, + "grad_norm": 0.7481260895729065, + "learning_rate": 1.3226941844308816e-05, + "loss": 2.4348, + "step": 16708 + }, + { + "epoch": 1.3484787345654103, + "grad_norm": 0.6986531019210815, + "learning_rate": 1.3219096287356669e-05, + "loss": 2.3622, + "step": 16709 + }, + { + "epoch": 1.3485594383019934, + "grad_norm": 0.7457645535469055, + "learning_rate": 1.321125289321149e-05, + "loss": 2.4399, + "step": 16710 + }, + { + "epoch": 1.3486401420385765, + "grad_norm": 0.6710307598114014, + "learning_rate": 1.3203411662068754e-05, + "loss": 2.3857, + "step": 16711 + }, + { + "epoch": 1.3487208457751594, + "grad_norm": 0.767304539680481, + "learning_rate": 1.3195572594123884e-05, + "loss": 2.4666, + "step": 16712 + }, + { + "epoch": 1.3488015495117425, + "grad_norm": 0.6720963716506958, + "learning_rate": 1.3187735689572289e-05, + "loss": 2.3952, + "step": 16713 + }, + { + "epoch": 1.3488822532483253, + "grad_norm": 0.6381734609603882, + "learning_rate": 1.3179900948609213e-05, + "loss": 2.3632, + "step": 16714 + }, + { + "epoch": 1.3489629569849084, + "grad_norm": 0.6697315573692322, + "learning_rate": 1.317206837142997e-05, + "loss": 2.4117, + "step": 16715 + }, + { + "epoch": 1.3490436607214913, + "grad_norm": 0.723676323890686, + "learning_rate": 1.3164237958229764e-05, + "loss": 2.3772, + "step": 16716 + }, + { + "epoch": 1.3491243644580744, + "grad_norm": 0.7021055817604065, + "learning_rate": 1.3156409709203732e-05, + "loss": 2.3808, + "step": 16717 + }, + { + "epoch": 1.3492050681946575, + "grad_norm": 0.7128920555114746, + "learning_rate": 1.3148583624546962e-05, + "loss": 2.3854, + "step": 16718 + }, + { + "epoch": 1.3492857719312403, + "grad_norm": 0.6684797406196594, + "learning_rate": 1.314075970445453e-05, + "loss": 2.3722, + "step": 16719 + }, + { + "epoch": 1.3493664756678234, + "grad_norm": 0.6710386276245117, + "learning_rate": 1.3132937949121426e-05, + "loss": 2.412, + "step": 16720 + }, + { + "epoch": 1.3494471794044065, + "grad_norm": 0.7207252979278564, + "learning_rate": 1.3125118358742572e-05, + "loss": 2.4506, + "step": 16721 + }, + { + "epoch": 1.3495278831409894, + "grad_norm": 0.685516893863678, + "learning_rate": 1.3117300933512865e-05, + "loss": 2.435, + "step": 16722 + }, + { + "epoch": 1.3496085868775725, + "grad_norm": 0.71708744764328, + "learning_rate": 1.3109485673627154e-05, + "loss": 2.4735, + "step": 16723 + }, + { + "epoch": 1.3496892906141555, + "grad_norm": 0.7293861508369446, + "learning_rate": 1.3101672579280166e-05, + "loss": 2.4545, + "step": 16724 + }, + { + "epoch": 1.3497699943507384, + "grad_norm": 0.6448976993560791, + "learning_rate": 1.3093861650666661e-05, + "loss": 2.386, + "step": 16725 + }, + { + "epoch": 1.3498506980873215, + "grad_norm": 0.8111226558685303, + "learning_rate": 1.3086052887981315e-05, + "loss": 2.4733, + "step": 16726 + }, + { + "epoch": 1.3499314018239044, + "grad_norm": 0.7673875093460083, + "learning_rate": 1.3078246291418706e-05, + "loss": 2.4119, + "step": 16727 + }, + { + "epoch": 1.3500121055604875, + "grad_norm": 0.7296731472015381, + "learning_rate": 1.307044186117341e-05, + "loss": 2.3724, + "step": 16728 + }, + { + "epoch": 1.3500928092970703, + "grad_norm": 0.6947155594825745, + "learning_rate": 1.306263959743994e-05, + "loss": 2.3989, + "step": 16729 + }, + { + "epoch": 1.3501735130336534, + "grad_norm": 0.6781659722328186, + "learning_rate": 1.3054839500412753e-05, + "loss": 2.429, + "step": 16730 + }, + { + "epoch": 1.3502542167702365, + "grad_norm": 0.7498819231987, + "learning_rate": 1.3047041570286244e-05, + "loss": 2.459, + "step": 16731 + }, + { + "epoch": 1.3503349205068194, + "grad_norm": 0.6651057004928589, + "learning_rate": 1.3039245807254774e-05, + "loss": 2.4049, + "step": 16732 + }, + { + "epoch": 1.3504156242434024, + "grad_norm": 0.6998507380485535, + "learning_rate": 1.3031452211512596e-05, + "loss": 2.4083, + "step": 16733 + }, + { + "epoch": 1.3504963279799855, + "grad_norm": 0.6522402167320251, + "learning_rate": 1.3023660783253966e-05, + "loss": 2.3987, + "step": 16734 + }, + { + "epoch": 1.3505770317165684, + "grad_norm": 0.6618130207061768, + "learning_rate": 1.3015871522673096e-05, + "loss": 2.4514, + "step": 16735 + }, + { + "epoch": 1.3506577354531515, + "grad_norm": 0.7139489650726318, + "learning_rate": 1.300808442996405e-05, + "loss": 2.484, + "step": 16736 + }, + { + "epoch": 1.3507384391897346, + "grad_norm": 0.6582522988319397, + "learning_rate": 1.3000299505320956e-05, + "loss": 2.4463, + "step": 16737 + }, + { + "epoch": 1.3508191429263174, + "grad_norm": 0.7115446329116821, + "learning_rate": 1.2992516748937811e-05, + "loss": 2.4795, + "step": 16738 + }, + { + "epoch": 1.3508998466629005, + "grad_norm": 0.7243752479553223, + "learning_rate": 1.2984736161008581e-05, + "loss": 2.4151, + "step": 16739 + }, + { + "epoch": 1.3509805503994836, + "grad_norm": 0.758084774017334, + "learning_rate": 1.297695774172719e-05, + "loss": 2.4028, + "step": 16740 + }, + { + "epoch": 1.3510612541360665, + "grad_norm": 0.6555618643760681, + "learning_rate": 1.2969181491287496e-05, + "loss": 2.4184, + "step": 16741 + }, + { + "epoch": 1.3511419578726496, + "grad_norm": 0.6657842993736267, + "learning_rate": 1.2961407409883331e-05, + "loss": 2.375, + "step": 16742 + }, + { + "epoch": 1.3512226616092324, + "grad_norm": 0.6355723142623901, + "learning_rate": 1.2953635497708382e-05, + "loss": 2.4202, + "step": 16743 + }, + { + "epoch": 1.3513033653458155, + "grad_norm": 0.7384408116340637, + "learning_rate": 1.2945865754956377e-05, + "loss": 2.4298, + "step": 16744 + }, + { + "epoch": 1.3513840690823984, + "grad_norm": 0.7300455570220947, + "learning_rate": 1.2938098181820979e-05, + "loss": 2.3842, + "step": 16745 + }, + { + "epoch": 1.3514647728189815, + "grad_norm": 0.7378895282745361, + "learning_rate": 1.2930332778495735e-05, + "loss": 2.4025, + "step": 16746 + }, + { + "epoch": 1.3515454765555646, + "grad_norm": 0.6542565822601318, + "learning_rate": 1.2922569545174212e-05, + "loss": 2.3995, + "step": 16747 + }, + { + "epoch": 1.3516261802921474, + "grad_norm": 0.669829249382019, + "learning_rate": 1.291480848204989e-05, + "loss": 2.3843, + "step": 16748 + }, + { + "epoch": 1.3517068840287305, + "grad_norm": 0.6747604608535767, + "learning_rate": 1.2907049589316167e-05, + "loss": 2.4108, + "step": 16749 + }, + { + "epoch": 1.3517875877653136, + "grad_norm": 0.7003559470176697, + "learning_rate": 1.2899292867166402e-05, + "loss": 2.4233, + "step": 16750 + }, + { + "epoch": 1.3518682915018965, + "grad_norm": 0.7365099191665649, + "learning_rate": 1.2891538315793994e-05, + "loss": 2.3592, + "step": 16751 + }, + { + "epoch": 1.3519489952384796, + "grad_norm": 0.6849377751350403, + "learning_rate": 1.2883785935392123e-05, + "loss": 2.3943, + "step": 16752 + }, + { + "epoch": 1.3520296989750626, + "grad_norm": 0.7263002395629883, + "learning_rate": 1.2876035726154045e-05, + "loss": 2.4078, + "step": 16753 + }, + { + "epoch": 1.3521104027116455, + "grad_norm": 0.7341182827949524, + "learning_rate": 1.2868287688272884e-05, + "loss": 2.3568, + "step": 16754 + }, + { + "epoch": 1.3521911064482286, + "grad_norm": 0.7281078100204468, + "learning_rate": 1.2860541821941796e-05, + "loss": 2.4073, + "step": 16755 + }, + { + "epoch": 1.3522718101848117, + "grad_norm": 0.6302868127822876, + "learning_rate": 1.285279812735376e-05, + "loss": 2.3946, + "step": 16756 + }, + { + "epoch": 1.3523525139213946, + "grad_norm": 0.7333062887191772, + "learning_rate": 1.28450566047018e-05, + "loss": 2.3892, + "step": 16757 + }, + { + "epoch": 1.3524332176579776, + "grad_norm": 0.74838787317276, + "learning_rate": 1.2837317254178882e-05, + "loss": 2.4844, + "step": 16758 + }, + { + "epoch": 1.3525139213945605, + "grad_norm": 0.7085757255554199, + "learning_rate": 1.2829580075977843e-05, + "loss": 2.3583, + "step": 16759 + }, + { + "epoch": 1.3525946251311436, + "grad_norm": 0.7182579040527344, + "learning_rate": 1.2821845070291527e-05, + "loss": 2.4326, + "step": 16760 + }, + { + "epoch": 1.3526753288677265, + "grad_norm": 0.6857885718345642, + "learning_rate": 1.2814112237312714e-05, + "loss": 2.4406, + "step": 16761 + }, + { + "epoch": 1.3527560326043095, + "grad_norm": 0.7629652619361877, + "learning_rate": 1.2806381577234139e-05, + "loss": 2.4839, + "step": 16762 + }, + { + "epoch": 1.3528367363408926, + "grad_norm": 0.6940319538116455, + "learning_rate": 1.2798653090248458e-05, + "loss": 2.3918, + "step": 16763 + }, + { + "epoch": 1.3529174400774755, + "grad_norm": 0.6825633645057678, + "learning_rate": 1.2790926776548318e-05, + "loss": 2.3828, + "step": 16764 + }, + { + "epoch": 1.3529981438140586, + "grad_norm": 0.6830280423164368, + "learning_rate": 1.278320263632622e-05, + "loss": 2.3727, + "step": 16765 + }, + { + "epoch": 1.3530788475506417, + "grad_norm": 0.6782984733581543, + "learning_rate": 1.2775480669774698e-05, + "loss": 2.3984, + "step": 16766 + }, + { + "epoch": 1.3531595512872245, + "grad_norm": 0.6939808130264282, + "learning_rate": 1.276776087708621e-05, + "loss": 2.3724, + "step": 16767 + }, + { + "epoch": 1.3532402550238076, + "grad_norm": 0.7562546133995056, + "learning_rate": 1.276004325845317e-05, + "loss": 2.4178, + "step": 16768 + }, + { + "epoch": 1.3533209587603907, + "grad_norm": 0.6692922115325928, + "learning_rate": 1.2752327814067877e-05, + "loss": 2.4072, + "step": 16769 + }, + { + "epoch": 1.3534016624969736, + "grad_norm": 0.6783415079116821, + "learning_rate": 1.2744614544122635e-05, + "loss": 2.3993, + "step": 16770 + }, + { + "epoch": 1.3534823662335567, + "grad_norm": 0.6608997583389282, + "learning_rate": 1.27369034488097e-05, + "loss": 2.3883, + "step": 16771 + }, + { + "epoch": 1.3535630699701398, + "grad_norm": 0.6849228739738464, + "learning_rate": 1.2729194528321231e-05, + "loss": 2.4009, + "step": 16772 + }, + { + "epoch": 1.3536437737067226, + "grad_norm": 0.7059305906295776, + "learning_rate": 1.2721487782849362e-05, + "loss": 2.508, + "step": 16773 + }, + { + "epoch": 1.3537244774433057, + "grad_norm": 0.6471492052078247, + "learning_rate": 1.2713783212586183e-05, + "loss": 2.3813, + "step": 16774 + }, + { + "epoch": 1.3538051811798886, + "grad_norm": 0.7108949422836304, + "learning_rate": 1.2706080817723687e-05, + "loss": 2.4189, + "step": 16775 + }, + { + "epoch": 1.3538858849164717, + "grad_norm": 0.6623945236206055, + "learning_rate": 1.269838059845383e-05, + "loss": 2.4128, + "step": 16776 + }, + { + "epoch": 1.3539665886530545, + "grad_norm": 0.6595518589019775, + "learning_rate": 1.269068255496857e-05, + "loss": 2.3984, + "step": 16777 + }, + { + "epoch": 1.3540472923896376, + "grad_norm": 0.6932248473167419, + "learning_rate": 1.2682986687459708e-05, + "loss": 2.3951, + "step": 16778 + }, + { + "epoch": 1.3541279961262207, + "grad_norm": 0.6914867162704468, + "learning_rate": 1.2675292996119059e-05, + "loss": 2.4602, + "step": 16779 + }, + { + "epoch": 1.3542086998628036, + "grad_norm": 0.6633034348487854, + "learning_rate": 1.266760148113838e-05, + "loss": 2.43, + "step": 16780 + }, + { + "epoch": 1.3542894035993867, + "grad_norm": 0.6987594366073608, + "learning_rate": 1.2659912142709363e-05, + "loss": 2.3962, + "step": 16781 + }, + { + "epoch": 1.3543701073359697, + "grad_norm": 0.7429597973823547, + "learning_rate": 1.2652224981023652e-05, + "loss": 2.4838, + "step": 16782 + }, + { + "epoch": 1.3544508110725526, + "grad_norm": 0.6402504444122314, + "learning_rate": 1.2644539996272808e-05, + "loss": 2.43, + "step": 16783 + }, + { + "epoch": 1.3545315148091357, + "grad_norm": 0.6763156652450562, + "learning_rate": 1.263685718864841e-05, + "loss": 2.4911, + "step": 16784 + }, + { + "epoch": 1.3546122185457188, + "grad_norm": 0.8133900165557861, + "learning_rate": 1.2629176558341881e-05, + "loss": 2.45, + "step": 16785 + }, + { + "epoch": 1.3546929222823016, + "grad_norm": 0.6946277022361755, + "learning_rate": 1.262149810554465e-05, + "loss": 2.43, + "step": 16786 + }, + { + "epoch": 1.3547736260188847, + "grad_norm": 0.7667170166969299, + "learning_rate": 1.2613821830448125e-05, + "loss": 2.4464, + "step": 16787 + }, + { + "epoch": 1.3548543297554676, + "grad_norm": 0.672662615776062, + "learning_rate": 1.2606147733243567e-05, + "loss": 2.3653, + "step": 16788 + }, + { + "epoch": 1.3549350334920507, + "grad_norm": 0.6856412291526794, + "learning_rate": 1.2598475814122258e-05, + "loss": 2.3924, + "step": 16789 + }, + { + "epoch": 1.3550157372286336, + "grad_norm": 0.6966650485992432, + "learning_rate": 1.2590806073275407e-05, + "loss": 2.4039, + "step": 16790 + }, + { + "epoch": 1.3550964409652166, + "grad_norm": 0.7397874593734741, + "learning_rate": 1.2583138510894143e-05, + "loss": 2.4769, + "step": 16791 + }, + { + "epoch": 1.3551771447017997, + "grad_norm": 0.6960996985435486, + "learning_rate": 1.2575473127169591e-05, + "loss": 2.4342, + "step": 16792 + }, + { + "epoch": 1.3552578484383826, + "grad_norm": 0.7324376702308655, + "learning_rate": 1.2567809922292795e-05, + "loss": 2.4779, + "step": 16793 + }, + { + "epoch": 1.3553385521749657, + "grad_norm": 0.6891930103302002, + "learning_rate": 1.2560148896454704e-05, + "loss": 2.4228, + "step": 16794 + }, + { + "epoch": 1.3554192559115488, + "grad_norm": 0.6919474601745605, + "learning_rate": 1.2552490049846278e-05, + "loss": 2.4178, + "step": 16795 + }, + { + "epoch": 1.3554999596481316, + "grad_norm": 0.7067604660987854, + "learning_rate": 1.2544833382658405e-05, + "loss": 2.457, + "step": 16796 + }, + { + "epoch": 1.3555806633847147, + "grad_norm": 0.7667992115020752, + "learning_rate": 1.253717889508188e-05, + "loss": 2.3951, + "step": 16797 + }, + { + "epoch": 1.3556613671212978, + "grad_norm": 0.6337998509407043, + "learning_rate": 1.2529526587307482e-05, + "loss": 2.3788, + "step": 16798 + }, + { + "epoch": 1.3557420708578807, + "grad_norm": 0.6591900587081909, + "learning_rate": 1.2521876459525927e-05, + "loss": 2.4101, + "step": 16799 + }, + { + "epoch": 1.3558227745944638, + "grad_norm": 0.7115298509597778, + "learning_rate": 1.2514228511927895e-05, + "loss": 2.4417, + "step": 16800 + }, + { + "epoch": 1.3559034783310469, + "grad_norm": 0.6851321458816528, + "learning_rate": 1.2506582744703965e-05, + "loss": 2.4081, + "step": 16801 + }, + { + "epoch": 1.3559841820676297, + "grad_norm": 0.7469603419303894, + "learning_rate": 1.249893915804471e-05, + "loss": 2.3703, + "step": 16802 + }, + { + "epoch": 1.3560648858042128, + "grad_norm": 0.6972614526748657, + "learning_rate": 1.2491297752140641e-05, + "loss": 2.3549, + "step": 16803 + }, + { + "epoch": 1.3561455895407957, + "grad_norm": 0.6669485569000244, + "learning_rate": 1.2483658527182151e-05, + "loss": 2.4261, + "step": 16804 + }, + { + "epoch": 1.3562262932773788, + "grad_norm": 0.7516919374465942, + "learning_rate": 1.247602148335968e-05, + "loss": 2.4323, + "step": 16805 + }, + { + "epoch": 1.3563069970139616, + "grad_norm": 0.7191836833953857, + "learning_rate": 1.2468386620863548e-05, + "loss": 2.4242, + "step": 16806 + }, + { + "epoch": 1.3563877007505447, + "grad_norm": 0.660237729549408, + "learning_rate": 1.2460753939884017e-05, + "loss": 2.4154, + "step": 16807 + }, + { + "epoch": 1.3564684044871278, + "grad_norm": 0.749531626701355, + "learning_rate": 1.2453123440611325e-05, + "loss": 2.4138, + "step": 16808 + }, + { + "epoch": 1.3565491082237107, + "grad_norm": 0.6808986067771912, + "learning_rate": 1.2445495123235673e-05, + "loss": 2.3918, + "step": 16809 + }, + { + "epoch": 1.3566298119602938, + "grad_norm": 0.686183750629425, + "learning_rate": 1.2437868987947133e-05, + "loss": 2.4172, + "step": 16810 + }, + { + "epoch": 1.3567105156968768, + "grad_norm": 0.6487868428230286, + "learning_rate": 1.2430245034935784e-05, + "loss": 2.4199, + "step": 16811 + }, + { + "epoch": 1.3567912194334597, + "grad_norm": 0.7352244257926941, + "learning_rate": 1.242262326439163e-05, + "loss": 2.3779, + "step": 16812 + }, + { + "epoch": 1.3568719231700428, + "grad_norm": 0.7250565886497498, + "learning_rate": 1.2415003676504644e-05, + "loss": 2.4106, + "step": 16813 + }, + { + "epoch": 1.3569526269066259, + "grad_norm": 0.6843926906585693, + "learning_rate": 1.2407386271464716e-05, + "loss": 2.3725, + "step": 16814 + }, + { + "epoch": 1.3570333306432087, + "grad_norm": 0.686326801776886, + "learning_rate": 1.2399771049461684e-05, + "loss": 2.3709, + "step": 16815 + }, + { + "epoch": 1.3571140343797918, + "grad_norm": 0.6796969771385193, + "learning_rate": 1.2392158010685373e-05, + "loss": 2.4545, + "step": 16816 + }, + { + "epoch": 1.357194738116375, + "grad_norm": 0.6469466090202332, + "learning_rate": 1.2384547155325466e-05, + "loss": 2.4263, + "step": 16817 + }, + { + "epoch": 1.3572754418529578, + "grad_norm": 0.7089909911155701, + "learning_rate": 1.2376938483571688e-05, + "loss": 2.378, + "step": 16818 + }, + { + "epoch": 1.3573561455895409, + "grad_norm": 0.7313235402107239, + "learning_rate": 1.2369331995613665e-05, + "loss": 2.46, + "step": 16819 + }, + { + "epoch": 1.3574368493261237, + "grad_norm": 0.7555651664733887, + "learning_rate": 1.2361727691640934e-05, + "loss": 2.531, + "step": 16820 + }, + { + "epoch": 1.3575175530627068, + "grad_norm": 0.7563485503196716, + "learning_rate": 1.2354125571843033e-05, + "loss": 2.4205, + "step": 16821 + }, + { + "epoch": 1.3575982567992897, + "grad_norm": 0.7996519804000854, + "learning_rate": 1.2346525636409434e-05, + "loss": 2.4223, + "step": 16822 + }, + { + "epoch": 1.3576789605358728, + "grad_norm": 0.7141731977462769, + "learning_rate": 1.233892788552955e-05, + "loss": 2.4554, + "step": 16823 + }, + { + "epoch": 1.3577596642724559, + "grad_norm": 0.6715070605278015, + "learning_rate": 1.233133231939273e-05, + "loss": 2.4386, + "step": 16824 + }, + { + "epoch": 1.3578403680090387, + "grad_norm": 0.6893020272254944, + "learning_rate": 1.2323738938188301e-05, + "loss": 2.4065, + "step": 16825 + }, + { + "epoch": 1.3579210717456218, + "grad_norm": 0.7542821764945984, + "learning_rate": 1.2316147742105454e-05, + "loss": 2.3974, + "step": 16826 + }, + { + "epoch": 1.358001775482205, + "grad_norm": 0.7177664041519165, + "learning_rate": 1.230855873133343e-05, + "loss": 2.4306, + "step": 16827 + }, + { + "epoch": 1.3580824792187878, + "grad_norm": 0.7056576013565063, + "learning_rate": 1.2300971906061354e-05, + "loss": 2.4238, + "step": 16828 + }, + { + "epoch": 1.3581631829553709, + "grad_norm": 0.686903715133667, + "learning_rate": 1.2293387266478296e-05, + "loss": 2.3902, + "step": 16829 + }, + { + "epoch": 1.358243886691954, + "grad_norm": 0.7377725839614868, + "learning_rate": 1.2285804812773293e-05, + "loss": 2.4294, + "step": 16830 + }, + { + "epoch": 1.3583245904285368, + "grad_norm": 0.6537891030311584, + "learning_rate": 1.227822454513532e-05, + "loss": 2.374, + "step": 16831 + }, + { + "epoch": 1.35840529416512, + "grad_norm": 0.684699296951294, + "learning_rate": 1.2270646463753288e-05, + "loss": 2.4105, + "step": 16832 + }, + { + "epoch": 1.3584859979017028, + "grad_norm": 0.7042316794395447, + "learning_rate": 1.2263070568816081e-05, + "loss": 2.4246, + "step": 16833 + }, + { + "epoch": 1.3585667016382859, + "grad_norm": 0.7610476613044739, + "learning_rate": 1.2255496860512505e-05, + "loss": 2.4581, + "step": 16834 + }, + { + "epoch": 1.3586474053748687, + "grad_norm": 0.6620839834213257, + "learning_rate": 1.224792533903134e-05, + "loss": 2.4138, + "step": 16835 + }, + { + "epoch": 1.3587281091114518, + "grad_norm": 0.6861035823822021, + "learning_rate": 1.2240356004561227e-05, + "loss": 2.4195, + "step": 16836 + }, + { + "epoch": 1.358808812848035, + "grad_norm": 0.7186882495880127, + "learning_rate": 1.2232788857290855e-05, + "loss": 2.404, + "step": 16837 + }, + { + "epoch": 1.3588895165846178, + "grad_norm": 0.7219386696815491, + "learning_rate": 1.2225223897408833e-05, + "loss": 2.3778, + "step": 16838 + }, + { + "epoch": 1.3589702203212009, + "grad_norm": 0.6935911774635315, + "learning_rate": 1.2217661125103663e-05, + "loss": 2.4617, + "step": 16839 + }, + { + "epoch": 1.359050924057784, + "grad_norm": 0.7885910272598267, + "learning_rate": 1.2210100540563828e-05, + "loss": 2.4467, + "step": 16840 + }, + { + "epoch": 1.3591316277943668, + "grad_norm": 0.6690255403518677, + "learning_rate": 1.220254214397778e-05, + "loss": 2.381, + "step": 16841 + }, + { + "epoch": 1.35921233153095, + "grad_norm": 0.7592741847038269, + "learning_rate": 1.2194985935533887e-05, + "loss": 2.4459, + "step": 16842 + }, + { + "epoch": 1.359293035267533, + "grad_norm": 0.827460527420044, + "learning_rate": 1.2187431915420466e-05, + "loss": 2.3842, + "step": 16843 + }, + { + "epoch": 1.3593737390041158, + "grad_norm": 0.7313764691352844, + "learning_rate": 1.2179880083825811e-05, + "loss": 2.3938, + "step": 16844 + }, + { + "epoch": 1.359454442740699, + "grad_norm": 0.7093486189842224, + "learning_rate": 1.2172330440938084e-05, + "loss": 2.4316, + "step": 16845 + }, + { + "epoch": 1.359535146477282, + "grad_norm": 0.6805742383003235, + "learning_rate": 1.2164782986945467e-05, + "loss": 2.4372, + "step": 16846 + }, + { + "epoch": 1.3596158502138649, + "grad_norm": 0.7525961399078369, + "learning_rate": 1.2157237722036064e-05, + "loss": 2.3867, + "step": 16847 + }, + { + "epoch": 1.359696553950448, + "grad_norm": 0.723896861076355, + "learning_rate": 1.2149694646397947e-05, + "loss": 2.4685, + "step": 16848 + }, + { + "epoch": 1.3597772576870308, + "grad_norm": 0.704448938369751, + "learning_rate": 1.2142153760219055e-05, + "loss": 2.4463, + "step": 16849 + }, + { + "epoch": 1.359857961423614, + "grad_norm": 0.7207927703857422, + "learning_rate": 1.2134615063687349e-05, + "loss": 2.3549, + "step": 16850 + }, + { + "epoch": 1.3599386651601968, + "grad_norm": 0.7106234431266785, + "learning_rate": 1.2127078556990724e-05, + "loss": 2.4145, + "step": 16851 + }, + { + "epoch": 1.3600193688967799, + "grad_norm": 0.7740694284439087, + "learning_rate": 1.2119544240316993e-05, + "loss": 2.3999, + "step": 16852 + }, + { + "epoch": 1.360100072633363, + "grad_norm": 0.6696181297302246, + "learning_rate": 1.2112012113853954e-05, + "loss": 2.4046, + "step": 16853 + }, + { + "epoch": 1.3601807763699458, + "grad_norm": 0.6758043169975281, + "learning_rate": 1.2104482177789334e-05, + "loss": 2.4021, + "step": 16854 + }, + { + "epoch": 1.360261480106529, + "grad_norm": 0.6659380793571472, + "learning_rate": 1.2096954432310758e-05, + "loss": 2.4145, + "step": 16855 + }, + { + "epoch": 1.360342183843112, + "grad_norm": 0.6889290809631348, + "learning_rate": 1.2089428877605858e-05, + "loss": 2.3486, + "step": 16856 + }, + { + "epoch": 1.3604228875796949, + "grad_norm": 0.6755563020706177, + "learning_rate": 1.2081905513862201e-05, + "loss": 2.4294, + "step": 16857 + }, + { + "epoch": 1.360503591316278, + "grad_norm": 0.7662243843078613, + "learning_rate": 1.2074384341267276e-05, + "loss": 2.414, + "step": 16858 + }, + { + "epoch": 1.360584295052861, + "grad_norm": 0.7432721853256226, + "learning_rate": 1.2066865360008517e-05, + "loss": 2.4314, + "step": 16859 + }, + { + "epoch": 1.360664998789444, + "grad_norm": 0.6465074419975281, + "learning_rate": 1.2059348570273366e-05, + "loss": 2.3349, + "step": 16860 + }, + { + "epoch": 1.360745702526027, + "grad_norm": 0.6940968632698059, + "learning_rate": 1.2051833972249105e-05, + "loss": 2.4539, + "step": 16861 + }, + { + "epoch": 1.36082640626261, + "grad_norm": 0.7211138010025024, + "learning_rate": 1.2044321566123019e-05, + "loss": 2.4041, + "step": 16862 + }, + { + "epoch": 1.360907109999193, + "grad_norm": 0.6746649146080017, + "learning_rate": 1.2036811352082367e-05, + "loss": 2.4329, + "step": 16863 + }, + { + "epoch": 1.360987813735776, + "grad_norm": 0.7502184510231018, + "learning_rate": 1.2029303330314345e-05, + "loss": 2.407, + "step": 16864 + }, + { + "epoch": 1.361068517472359, + "grad_norm": 0.7192596793174744, + "learning_rate": 1.2021797501006027e-05, + "loss": 2.3907, + "step": 16865 + }, + { + "epoch": 1.361149221208942, + "grad_norm": 0.6682254672050476, + "learning_rate": 1.2014293864344483e-05, + "loss": 2.391, + "step": 16866 + }, + { + "epoch": 1.3612299249455249, + "grad_norm": 0.680969774723053, + "learning_rate": 1.2006792420516755e-05, + "loss": 2.3479, + "step": 16867 + }, + { + "epoch": 1.361310628682108, + "grad_norm": 0.682671308517456, + "learning_rate": 1.1999293169709757e-05, + "loss": 2.4097, + "step": 16868 + }, + { + "epoch": 1.361391332418691, + "grad_norm": 0.7030573487281799, + "learning_rate": 1.199179611211041e-05, + "loss": 2.4514, + "step": 16869 + }, + { + "epoch": 1.361472036155274, + "grad_norm": 0.670630693435669, + "learning_rate": 1.1984301247905582e-05, + "loss": 2.3982, + "step": 16870 + }, + { + "epoch": 1.361552739891857, + "grad_norm": 0.6993644833564758, + "learning_rate": 1.1976808577282017e-05, + "loss": 2.4297, + "step": 16871 + }, + { + "epoch": 1.36163344362844, + "grad_norm": 0.7448122501373291, + "learning_rate": 1.1969318100426486e-05, + "loss": 2.3612, + "step": 16872 + }, + { + "epoch": 1.361714147365023, + "grad_norm": 0.7014498114585876, + "learning_rate": 1.1961829817525649e-05, + "loss": 2.3451, + "step": 16873 + }, + { + "epoch": 1.361794851101606, + "grad_norm": 0.7140750885009766, + "learning_rate": 1.195434372876616e-05, + "loss": 2.4231, + "step": 16874 + }, + { + "epoch": 1.3618755548381891, + "grad_norm": 0.7377427816390991, + "learning_rate": 1.1946859834334567e-05, + "loss": 2.4055, + "step": 16875 + }, + { + "epoch": 1.361956258574772, + "grad_norm": 0.7969191670417786, + "learning_rate": 1.1939378134417433e-05, + "loss": 2.3503, + "step": 16876 + }, + { + "epoch": 1.362036962311355, + "grad_norm": 0.6821554899215698, + "learning_rate": 1.1931898629201155e-05, + "loss": 2.4259, + "step": 16877 + }, + { + "epoch": 1.3621176660479382, + "grad_norm": 0.6598221659660339, + "learning_rate": 1.1924421318872182e-05, + "loss": 2.3833, + "step": 16878 + }, + { + "epoch": 1.362198369784521, + "grad_norm": 0.8031432628631592, + "learning_rate": 1.1916946203616863e-05, + "loss": 2.5077, + "step": 16879 + }, + { + "epoch": 1.362279073521104, + "grad_norm": 0.7247405648231506, + "learning_rate": 1.190947328362152e-05, + "loss": 2.426, + "step": 16880 + }, + { + "epoch": 1.362359777257687, + "grad_norm": 0.7256691455841064, + "learning_rate": 1.1902002559072344e-05, + "loss": 2.474, + "step": 16881 + }, + { + "epoch": 1.36244048099427, + "grad_norm": 0.7382180094718933, + "learning_rate": 1.1894534030155558e-05, + "loss": 2.4487, + "step": 16882 + }, + { + "epoch": 1.362521184730853, + "grad_norm": 0.700179398059845, + "learning_rate": 1.1887067697057297e-05, + "loss": 2.3836, + "step": 16883 + }, + { + "epoch": 1.362601888467436, + "grad_norm": 0.706106424331665, + "learning_rate": 1.1879603559963638e-05, + "loss": 2.4304, + "step": 16884 + }, + { + "epoch": 1.362682592204019, + "grad_norm": 0.7514815926551819, + "learning_rate": 1.1872141619060606e-05, + "loss": 2.4895, + "step": 16885 + }, + { + "epoch": 1.362763295940602, + "grad_norm": 0.6605612635612488, + "learning_rate": 1.1864681874534201e-05, + "loss": 2.3569, + "step": 16886 + }, + { + "epoch": 1.362843999677185, + "grad_norm": 0.6366496682167053, + "learning_rate": 1.1857224326570283e-05, + "loss": 2.3919, + "step": 16887 + }, + { + "epoch": 1.3629247034137681, + "grad_norm": 0.8100820183753967, + "learning_rate": 1.1849768975354736e-05, + "loss": 2.5063, + "step": 16888 + }, + { + "epoch": 1.363005407150351, + "grad_norm": 0.685127854347229, + "learning_rate": 1.1842315821073403e-05, + "loss": 2.4647, + "step": 16889 + }, + { + "epoch": 1.363086110886934, + "grad_norm": 0.696172833442688, + "learning_rate": 1.1834864863911987e-05, + "loss": 2.4224, + "step": 16890 + }, + { + "epoch": 1.3631668146235172, + "grad_norm": 0.6558032035827637, + "learning_rate": 1.1827416104056199e-05, + "loss": 2.3619, + "step": 16891 + }, + { + "epoch": 1.3632475183601, + "grad_norm": 0.744687020778656, + "learning_rate": 1.1819969541691689e-05, + "loss": 2.4669, + "step": 16892 + }, + { + "epoch": 1.3633282220966831, + "grad_norm": 0.6925212740898132, + "learning_rate": 1.1812525177004052e-05, + "loss": 2.3967, + "step": 16893 + }, + { + "epoch": 1.363408925833266, + "grad_norm": 0.6861244440078735, + "learning_rate": 1.1805083010178797e-05, + "loss": 2.3979, + "step": 16894 + }, + { + "epoch": 1.363489629569849, + "grad_norm": 0.6987108588218689, + "learning_rate": 1.179764304140143e-05, + "loss": 2.4263, + "step": 16895 + }, + { + "epoch": 1.363570333306432, + "grad_norm": 0.6940091848373413, + "learning_rate": 1.179020527085738e-05, + "loss": 2.4328, + "step": 16896 + }, + { + "epoch": 1.363651037043015, + "grad_norm": 0.6831968426704407, + "learning_rate": 1.1782769698731966e-05, + "loss": 2.427, + "step": 16897 + }, + { + "epoch": 1.3637317407795981, + "grad_norm": 0.7370985746383667, + "learning_rate": 1.177533632521054e-05, + "loss": 2.3711, + "step": 16898 + }, + { + "epoch": 1.363812444516181, + "grad_norm": 0.8176774978637695, + "learning_rate": 1.1767905150478376e-05, + "loss": 2.4337, + "step": 16899 + }, + { + "epoch": 1.363893148252764, + "grad_norm": 0.786318302154541, + "learning_rate": 1.1760476174720637e-05, + "loss": 2.5099, + "step": 16900 + }, + { + "epoch": 1.3639738519893472, + "grad_norm": 0.7309854626655579, + "learning_rate": 1.1753049398122495e-05, + "loss": 2.46, + "step": 16901 + }, + { + "epoch": 1.36405455572593, + "grad_norm": 0.7410863637924194, + "learning_rate": 1.1745624820869039e-05, + "loss": 2.4249, + "step": 16902 + }, + { + "epoch": 1.3641352594625131, + "grad_norm": 0.7059988379478455, + "learning_rate": 1.1738202443145308e-05, + "loss": 2.4964, + "step": 16903 + }, + { + "epoch": 1.3642159631990962, + "grad_norm": 0.7351845502853394, + "learning_rate": 1.1730782265136287e-05, + "loss": 2.4694, + "step": 16904 + }, + { + "epoch": 1.364296666935679, + "grad_norm": 0.6928153038024902, + "learning_rate": 1.1723364287026938e-05, + "loss": 2.426, + "step": 16905 + }, + { + "epoch": 1.3643773706722622, + "grad_norm": 0.759920060634613, + "learning_rate": 1.1715948509002083e-05, + "loss": 2.4359, + "step": 16906 + }, + { + "epoch": 1.3644580744088453, + "grad_norm": 0.6655696630477905, + "learning_rate": 1.1708534931246573e-05, + "loss": 2.4118, + "step": 16907 + }, + { + "epoch": 1.3645387781454281, + "grad_norm": 0.6912528872489929, + "learning_rate": 1.170112355394517e-05, + "loss": 2.4257, + "step": 16908 + }, + { + "epoch": 1.3646194818820112, + "grad_norm": 0.6612871289253235, + "learning_rate": 1.1693714377282604e-05, + "loss": 2.4192, + "step": 16909 + }, + { + "epoch": 1.364700185618594, + "grad_norm": 0.6548018455505371, + "learning_rate": 1.1686307401443486e-05, + "loss": 2.4054, + "step": 16910 + }, + { + "epoch": 1.3647808893551772, + "grad_norm": 0.7749961018562317, + "learning_rate": 1.1678902626612443e-05, + "loss": 2.44, + "step": 16911 + }, + { + "epoch": 1.36486159309176, + "grad_norm": 0.7187496423721313, + "learning_rate": 1.1671500052974039e-05, + "loss": 2.4033, + "step": 16912 + }, + { + "epoch": 1.3649422968283431, + "grad_norm": 0.7002814412117004, + "learning_rate": 1.1664099680712715e-05, + "loss": 2.4442, + "step": 16913 + }, + { + "epoch": 1.3650230005649262, + "grad_norm": 0.6852529644966125, + "learning_rate": 1.1656701510012946e-05, + "loss": 2.4253, + "step": 16914 + }, + { + "epoch": 1.365103704301509, + "grad_norm": 0.6922035813331604, + "learning_rate": 1.1649305541059142e-05, + "loss": 2.4406, + "step": 16915 + }, + { + "epoch": 1.3651844080380922, + "grad_norm": 0.6883397698402405, + "learning_rate": 1.1641911774035563e-05, + "loss": 2.4064, + "step": 16916 + }, + { + "epoch": 1.3652651117746752, + "grad_norm": 0.7101531624794006, + "learning_rate": 1.163452020912652e-05, + "loss": 2.4068, + "step": 16917 + }, + { + "epoch": 1.365345815511258, + "grad_norm": 0.728369951248169, + "learning_rate": 1.1627130846516231e-05, + "loss": 2.4319, + "step": 16918 + }, + { + "epoch": 1.3654265192478412, + "grad_norm": 0.6765053272247314, + "learning_rate": 1.161974368638884e-05, + "loss": 2.3922, + "step": 16919 + }, + { + "epoch": 1.3655072229844243, + "grad_norm": 0.6909242868423462, + "learning_rate": 1.1612358728928475e-05, + "loss": 2.4124, + "step": 16920 + }, + { + "epoch": 1.3655879267210072, + "grad_norm": 0.735650897026062, + "learning_rate": 1.1604975974319177e-05, + "loss": 2.5137, + "step": 16921 + }, + { + "epoch": 1.3656686304575902, + "grad_norm": 0.6587653756141663, + "learning_rate": 1.1597595422744934e-05, + "loss": 2.4163, + "step": 16922 + }, + { + "epoch": 1.3657493341941733, + "grad_norm": 0.700282096862793, + "learning_rate": 1.159021707438971e-05, + "loss": 2.4272, + "step": 16923 + }, + { + "epoch": 1.3658300379307562, + "grad_norm": 0.7175682783126831, + "learning_rate": 1.1582840929437365e-05, + "loss": 2.4598, + "step": 16924 + }, + { + "epoch": 1.3659107416673393, + "grad_norm": 0.6725881695747375, + "learning_rate": 1.157546698807176e-05, + "loss": 2.4064, + "step": 16925 + }, + { + "epoch": 1.3659914454039221, + "grad_norm": 0.7130467295646667, + "learning_rate": 1.1568095250476651e-05, + "loss": 2.3851, + "step": 16926 + }, + { + "epoch": 1.3660721491405052, + "grad_norm": 0.6859269142150879, + "learning_rate": 1.1560725716835785e-05, + "loss": 2.3577, + "step": 16927 + }, + { + "epoch": 1.366152852877088, + "grad_norm": 0.7037541270256042, + "learning_rate": 1.1553358387332824e-05, + "loss": 2.4402, + "step": 16928 + }, + { + "epoch": 1.3662335566136712, + "grad_norm": 0.7094031572341919, + "learning_rate": 1.1545993262151366e-05, + "loss": 2.4036, + "step": 16929 + }, + { + "epoch": 1.3663142603502543, + "grad_norm": 0.6953302025794983, + "learning_rate": 1.1538630341474965e-05, + "loss": 2.4192, + "step": 16930 + }, + { + "epoch": 1.3663949640868371, + "grad_norm": 0.7012252807617188, + "learning_rate": 1.1531269625487163e-05, + "loss": 2.4207, + "step": 16931 + }, + { + "epoch": 1.3664756678234202, + "grad_norm": 0.6616495847702026, + "learning_rate": 1.1523911114371366e-05, + "loss": 2.4187, + "step": 16932 + }, + { + "epoch": 1.3665563715600033, + "grad_norm": 0.6819868087768555, + "learning_rate": 1.1516554808310975e-05, + "loss": 2.448, + "step": 16933 + }, + { + "epoch": 1.3666370752965862, + "grad_norm": 0.6869969964027405, + "learning_rate": 1.1509200707489343e-05, + "loss": 2.4134, + "step": 16934 + }, + { + "epoch": 1.3667177790331693, + "grad_norm": 0.6600778698921204, + "learning_rate": 1.1501848812089733e-05, + "loss": 2.4159, + "step": 16935 + }, + { + "epoch": 1.3667984827697524, + "grad_norm": 0.668712317943573, + "learning_rate": 1.1494499122295398e-05, + "loss": 2.41, + "step": 16936 + }, + { + "epoch": 1.3668791865063352, + "grad_norm": 0.767365574836731, + "learning_rate": 1.1487151638289518e-05, + "loss": 2.3856, + "step": 16937 + }, + { + "epoch": 1.3669598902429183, + "grad_norm": 0.721546471118927, + "learning_rate": 1.1479806360255174e-05, + "loss": 2.4038, + "step": 16938 + }, + { + "epoch": 1.3670405939795012, + "grad_norm": 0.6796963810920715, + "learning_rate": 1.1472463288375456e-05, + "loss": 2.3698, + "step": 16939 + }, + { + "epoch": 1.3671212977160843, + "grad_norm": 0.7340671420097351, + "learning_rate": 1.1465122422833363e-05, + "loss": 2.4296, + "step": 16940 + }, + { + "epoch": 1.3672020014526671, + "grad_norm": 0.7173369526863098, + "learning_rate": 1.145778376381187e-05, + "loss": 2.3923, + "step": 16941 + }, + { + "epoch": 1.3672827051892502, + "grad_norm": 0.6683956980705261, + "learning_rate": 1.1450447311493839e-05, + "loss": 2.4092, + "step": 16942 + }, + { + "epoch": 1.3673634089258333, + "grad_norm": 0.6457851529121399, + "learning_rate": 1.1443113066062129e-05, + "loss": 2.3467, + "step": 16943 + }, + { + "epoch": 1.3674441126624162, + "grad_norm": 0.6870608925819397, + "learning_rate": 1.1435781027699532e-05, + "loss": 2.3766, + "step": 16944 + }, + { + "epoch": 1.3675248163989993, + "grad_norm": 0.6496049165725708, + "learning_rate": 1.1428451196588775e-05, + "loss": 2.4464, + "step": 16945 + }, + { + "epoch": 1.3676055201355823, + "grad_norm": 0.7554739117622375, + "learning_rate": 1.1421123572912551e-05, + "loss": 2.4243, + "step": 16946 + }, + { + "epoch": 1.3676862238721652, + "grad_norm": 0.7208122611045837, + "learning_rate": 1.1413798156853495e-05, + "loss": 2.3699, + "step": 16947 + }, + { + "epoch": 1.3677669276087483, + "grad_norm": 0.7072176337242126, + "learning_rate": 1.1406474948594126e-05, + "loss": 2.4011, + "step": 16948 + }, + { + "epoch": 1.3678476313453314, + "grad_norm": 0.7316476106643677, + "learning_rate": 1.1399153948316999e-05, + "loss": 2.4508, + "step": 16949 + }, + { + "epoch": 1.3679283350819142, + "grad_norm": 0.8518069386482239, + "learning_rate": 1.1391835156204577e-05, + "loss": 2.4197, + "step": 16950 + }, + { + "epoch": 1.3680090388184973, + "grad_norm": 0.6700364947319031, + "learning_rate": 1.1384518572439228e-05, + "loss": 2.4272, + "step": 16951 + }, + { + "epoch": 1.3680897425550804, + "grad_norm": 0.7007749676704407, + "learning_rate": 1.1377204197203317e-05, + "loss": 2.3777, + "step": 16952 + }, + { + "epoch": 1.3681704462916633, + "grad_norm": 0.6792053580284119, + "learning_rate": 1.1369892030679141e-05, + "loss": 2.4487, + "step": 16953 + }, + { + "epoch": 1.3682511500282464, + "grad_norm": 0.6913022398948669, + "learning_rate": 1.1362582073048932e-05, + "loss": 2.3757, + "step": 16954 + }, + { + "epoch": 1.3683318537648292, + "grad_norm": 0.648248016834259, + "learning_rate": 1.135527432449488e-05, + "loss": 2.3482, + "step": 16955 + }, + { + "epoch": 1.3684125575014123, + "grad_norm": 0.6711798906326294, + "learning_rate": 1.1347968785199115e-05, + "loss": 2.4096, + "step": 16956 + }, + { + "epoch": 1.3684932612379952, + "grad_norm": 0.6932381987571716, + "learning_rate": 1.1340665455343724e-05, + "loss": 2.3834, + "step": 16957 + }, + { + "epoch": 1.3685739649745783, + "grad_norm": 0.6890178918838501, + "learning_rate": 1.1333364335110697e-05, + "loss": 2.4182, + "step": 16958 + }, + { + "epoch": 1.3686546687111614, + "grad_norm": 0.6612519025802612, + "learning_rate": 1.1326065424681997e-05, + "loss": 2.3691, + "step": 16959 + }, + { + "epoch": 1.3687353724477442, + "grad_norm": 0.7123190760612488, + "learning_rate": 1.131876872423957e-05, + "loss": 2.3919, + "step": 16960 + }, + { + "epoch": 1.3688160761843273, + "grad_norm": 0.6615463495254517, + "learning_rate": 1.1311474233965214e-05, + "loss": 2.4266, + "step": 16961 + }, + { + "epoch": 1.3688967799209104, + "grad_norm": 0.7320190668106079, + "learning_rate": 1.130418195404076e-05, + "loss": 2.4268, + "step": 16962 + }, + { + "epoch": 1.3689774836574933, + "grad_norm": 0.6845116019248962, + "learning_rate": 1.1296891884647965e-05, + "loss": 2.3972, + "step": 16963 + }, + { + "epoch": 1.3690581873940764, + "grad_norm": 0.70455002784729, + "learning_rate": 1.1289604025968448e-05, + "loss": 2.4183, + "step": 16964 + }, + { + "epoch": 1.3691388911306595, + "grad_norm": 0.6952407956123352, + "learning_rate": 1.128231837818392e-05, + "loss": 2.4276, + "step": 16965 + }, + { + "epoch": 1.3692195948672423, + "grad_norm": 0.7939464449882507, + "learning_rate": 1.1275034941475938e-05, + "loss": 2.4072, + "step": 16966 + }, + { + "epoch": 1.3693002986038254, + "grad_norm": 0.6974930763244629, + "learning_rate": 1.1267753716026007e-05, + "loss": 2.4133, + "step": 16967 + }, + { + "epoch": 1.3693810023404085, + "grad_norm": 0.7187508344650269, + "learning_rate": 1.126047470201559e-05, + "loss": 2.3588, + "step": 16968 + }, + { + "epoch": 1.3694617060769914, + "grad_norm": 0.6887609958648682, + "learning_rate": 1.1253197899626134e-05, + "loss": 2.4322, + "step": 16969 + }, + { + "epoch": 1.3695424098135744, + "grad_norm": 0.679957389831543, + "learning_rate": 1.1245923309038964e-05, + "loss": 2.3907, + "step": 16970 + }, + { + "epoch": 1.3696231135501573, + "grad_norm": 0.7540870308876038, + "learning_rate": 1.1238650930435378e-05, + "loss": 2.4752, + "step": 16971 + }, + { + "epoch": 1.3697038172867404, + "grad_norm": 0.7697634100914001, + "learning_rate": 1.1231380763996635e-05, + "loss": 2.4366, + "step": 16972 + }, + { + "epoch": 1.3697845210233233, + "grad_norm": 0.6836850643157959, + "learning_rate": 1.1224112809903954e-05, + "loss": 2.3511, + "step": 16973 + }, + { + "epoch": 1.3698652247599064, + "grad_norm": 0.6904506683349609, + "learning_rate": 1.1216847068338421e-05, + "loss": 2.4109, + "step": 16974 + }, + { + "epoch": 1.3699459284964894, + "grad_norm": 0.6579318046569824, + "learning_rate": 1.1209583539481127e-05, + "loss": 2.4391, + "step": 16975 + }, + { + "epoch": 1.3700266322330723, + "grad_norm": 0.7107192277908325, + "learning_rate": 1.120232222351314e-05, + "loss": 2.399, + "step": 16976 + }, + { + "epoch": 1.3701073359696554, + "grad_norm": 0.7581583261489868, + "learning_rate": 1.119506312061539e-05, + "loss": 2.4817, + "step": 16977 + }, + { + "epoch": 1.3701880397062385, + "grad_norm": 0.6836642622947693, + "learning_rate": 1.11878062309688e-05, + "loss": 2.4415, + "step": 16978 + }, + { + "epoch": 1.3702687434428213, + "grad_norm": 0.6842699646949768, + "learning_rate": 1.118055155475426e-05, + "loss": 2.4045, + "step": 16979 + }, + { + "epoch": 1.3703494471794044, + "grad_norm": 0.7630519270896912, + "learning_rate": 1.1173299092152534e-05, + "loss": 2.4314, + "step": 16980 + }, + { + "epoch": 1.3704301509159875, + "grad_norm": 0.7334303259849548, + "learning_rate": 1.116604884334439e-05, + "loss": 2.3564, + "step": 16981 + }, + { + "epoch": 1.3705108546525704, + "grad_norm": 0.6929439306259155, + "learning_rate": 1.1158800808510538e-05, + "loss": 2.4258, + "step": 16982 + }, + { + "epoch": 1.3705915583891535, + "grad_norm": 0.6387187838554382, + "learning_rate": 1.1151554987831591e-05, + "loss": 2.3263, + "step": 16983 + }, + { + "epoch": 1.3706722621257363, + "grad_norm": 0.7279032468795776, + "learning_rate": 1.1144311381488136e-05, + "loss": 2.4074, + "step": 16984 + }, + { + "epoch": 1.3707529658623194, + "grad_norm": 0.7066916227340698, + "learning_rate": 1.113706998966072e-05, + "loss": 2.4358, + "step": 16985 + }, + { + "epoch": 1.3708336695989023, + "grad_norm": 0.6753098964691162, + "learning_rate": 1.1129830812529807e-05, + "loss": 2.4195, + "step": 16986 + }, + { + "epoch": 1.3709143733354854, + "grad_norm": 0.6728894114494324, + "learning_rate": 1.112259385027582e-05, + "loss": 2.3712, + "step": 16987 + }, + { + "epoch": 1.3709950770720685, + "grad_norm": 0.7251775860786438, + "learning_rate": 1.1115359103079115e-05, + "loss": 2.4063, + "step": 16988 + }, + { + "epoch": 1.3710757808086513, + "grad_norm": 0.6797254085540771, + "learning_rate": 1.1108126571120036e-05, + "loss": 2.395, + "step": 16989 + }, + { + "epoch": 1.3711564845452344, + "grad_norm": 0.7505605220794678, + "learning_rate": 1.1100896254578786e-05, + "loss": 2.4044, + "step": 16990 + }, + { + "epoch": 1.3712371882818175, + "grad_norm": 0.7126416563987732, + "learning_rate": 1.1093668153635594e-05, + "loss": 2.4043, + "step": 16991 + }, + { + "epoch": 1.3713178920184004, + "grad_norm": 0.6550771594047546, + "learning_rate": 1.1086442268470609e-05, + "loss": 2.3515, + "step": 16992 + }, + { + "epoch": 1.3713985957549835, + "grad_norm": 0.7253621816635132, + "learning_rate": 1.1079218599263874e-05, + "loss": 2.4109, + "step": 16993 + }, + { + "epoch": 1.3714792994915666, + "grad_norm": 0.7272186875343323, + "learning_rate": 1.1071997146195468e-05, + "loss": 2.3531, + "step": 16994 + }, + { + "epoch": 1.3715600032281494, + "grad_norm": 0.6841129660606384, + "learning_rate": 1.1064777909445345e-05, + "loss": 2.4031, + "step": 16995 + }, + { + "epoch": 1.3716407069647325, + "grad_norm": 0.692945659160614, + "learning_rate": 1.1057560889193441e-05, + "loss": 2.3858, + "step": 16996 + }, + { + "epoch": 1.3717214107013156, + "grad_norm": 0.721182644367218, + "learning_rate": 1.1050346085619612e-05, + "loss": 2.3871, + "step": 16997 + }, + { + "epoch": 1.3718021144378985, + "grad_norm": 0.722960889339447, + "learning_rate": 1.1043133498903702e-05, + "loss": 2.3452, + "step": 16998 + }, + { + "epoch": 1.3718828181744815, + "grad_norm": 0.7148451805114746, + "learning_rate": 1.1035923129225412e-05, + "loss": 2.3905, + "step": 16999 + }, + { + "epoch": 1.3719635219110644, + "grad_norm": 0.7118532061576843, + "learning_rate": 1.1028714976764486e-05, + "loss": 2.3894, + "step": 17000 + }, + { + "epoch": 1.3719635219110644, + "eval_loss": 2.3730249404907227, + "eval_runtime": 769.4165, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.568, + "step": 17000 + }, + { + "epoch": 1.3720442256476475, + "grad_norm": 0.6933719515800476, + "learning_rate": 1.1021509041700539e-05, + "loss": 2.394, + "step": 17001 + }, + { + "epoch": 1.3721249293842304, + "grad_norm": 0.7330136895179749, + "learning_rate": 1.1014305324213215e-05, + "loss": 2.4466, + "step": 17002 + }, + { + "epoch": 1.3722056331208135, + "grad_norm": 0.6614598631858826, + "learning_rate": 1.1007103824481979e-05, + "loss": 2.4441, + "step": 17003 + }, + { + "epoch": 1.3722863368573965, + "grad_norm": 0.8030059933662415, + "learning_rate": 1.0999904542686356e-05, + "loss": 2.4284, + "step": 17004 + }, + { + "epoch": 1.3723670405939794, + "grad_norm": 0.6881710886955261, + "learning_rate": 1.099270747900576e-05, + "loss": 2.4433, + "step": 17005 + }, + { + "epoch": 1.3724477443305625, + "grad_norm": 0.661325216293335, + "learning_rate": 1.0985512633619555e-05, + "loss": 2.4144, + "step": 17006 + }, + { + "epoch": 1.3725284480671456, + "grad_norm": 0.6896070241928101, + "learning_rate": 1.0978320006707065e-05, + "loss": 2.3972, + "step": 17007 + }, + { + "epoch": 1.3726091518037284, + "grad_norm": 0.7043858766555786, + "learning_rate": 1.0971129598447561e-05, + "loss": 2.4082, + "step": 17008 + }, + { + "epoch": 1.3726898555403115, + "grad_norm": 0.7162652611732483, + "learning_rate": 1.0963941409020217e-05, + "loss": 2.3696, + "step": 17009 + }, + { + "epoch": 1.3727705592768946, + "grad_norm": 0.6809261441230774, + "learning_rate": 1.0956755438604194e-05, + "loss": 2.4392, + "step": 17010 + }, + { + "epoch": 1.3728512630134775, + "grad_norm": 0.6897100806236267, + "learning_rate": 1.0949571687378602e-05, + "loss": 2.4942, + "step": 17011 + }, + { + "epoch": 1.3729319667500606, + "grad_norm": 0.6903488039970398, + "learning_rate": 1.0942390155522442e-05, + "loss": 2.3936, + "step": 17012 + }, + { + "epoch": 1.3730126704866437, + "grad_norm": 0.676643431186676, + "learning_rate": 1.0935210843214727e-05, + "loss": 2.3972, + "step": 17013 + }, + { + "epoch": 1.3730933742232265, + "grad_norm": 0.6523454189300537, + "learning_rate": 1.092803375063437e-05, + "loss": 2.4914, + "step": 17014 + }, + { + "epoch": 1.3731740779598096, + "grad_norm": 0.7250776886940002, + "learning_rate": 1.092085887796026e-05, + "loss": 2.4493, + "step": 17015 + }, + { + "epoch": 1.3732547816963925, + "grad_norm": 0.6791245937347412, + "learning_rate": 1.091368622537119e-05, + "loss": 2.4553, + "step": 17016 + }, + { + "epoch": 1.3733354854329756, + "grad_norm": 0.8086698651313782, + "learning_rate": 1.0906515793045934e-05, + "loss": 2.457, + "step": 17017 + }, + { + "epoch": 1.3734161891695584, + "grad_norm": 0.6653520464897156, + "learning_rate": 1.0899347581163221e-05, + "loss": 2.3974, + "step": 17018 + }, + { + "epoch": 1.3734968929061415, + "grad_norm": 0.6596232056617737, + "learning_rate": 1.0892181589901651e-05, + "loss": 2.3771, + "step": 17019 + }, + { + "epoch": 1.3735775966427246, + "grad_norm": 0.7042080760002136, + "learning_rate": 1.0885017819439858e-05, + "loss": 2.4493, + "step": 17020 + }, + { + "epoch": 1.3736583003793075, + "grad_norm": 0.6882427930831909, + "learning_rate": 1.0877856269956377e-05, + "loss": 2.4293, + "step": 17021 + }, + { + "epoch": 1.3737390041158906, + "grad_norm": 0.6881027221679688, + "learning_rate": 1.0870696941629676e-05, + "loss": 2.4503, + "step": 17022 + }, + { + "epoch": 1.3738197078524736, + "grad_norm": 0.7282640337944031, + "learning_rate": 1.086353983463818e-05, + "loss": 2.4173, + "step": 17023 + }, + { + "epoch": 1.3739004115890565, + "grad_norm": 0.7281018495559692, + "learning_rate": 1.0856384949160314e-05, + "loss": 2.4514, + "step": 17024 + }, + { + "epoch": 1.3739811153256396, + "grad_norm": 0.7185690402984619, + "learning_rate": 1.0849232285374323e-05, + "loss": 2.4244, + "step": 17025 + }, + { + "epoch": 1.3740618190622227, + "grad_norm": 0.7732044458389282, + "learning_rate": 1.0842081843458496e-05, + "loss": 2.4855, + "step": 17026 + }, + { + "epoch": 1.3741425227988056, + "grad_norm": 0.6599788665771484, + "learning_rate": 1.0834933623591093e-05, + "loss": 2.4339, + "step": 17027 + }, + { + "epoch": 1.3742232265353886, + "grad_norm": 0.7193527817726135, + "learning_rate": 1.0827787625950192e-05, + "loss": 2.4284, + "step": 17028 + }, + { + "epoch": 1.3743039302719717, + "grad_norm": 0.7255674004554749, + "learning_rate": 1.082064385071393e-05, + "loss": 2.4056, + "step": 17029 + }, + { + "epoch": 1.3743846340085546, + "grad_norm": 0.7823398113250732, + "learning_rate": 1.0813502298060363e-05, + "loss": 2.4268, + "step": 17030 + }, + { + "epoch": 1.3744653377451377, + "grad_norm": 0.6839333176612854, + "learning_rate": 1.0806362968167427e-05, + "loss": 2.4415, + "step": 17031 + }, + { + "epoch": 1.3745460414817205, + "grad_norm": 0.798973560333252, + "learning_rate": 1.079922586121308e-05, + "loss": 2.4251, + "step": 17032 + }, + { + "epoch": 1.3746267452183036, + "grad_norm": 0.7234559655189514, + "learning_rate": 1.0792090977375203e-05, + "loss": 2.3821, + "step": 17033 + }, + { + "epoch": 1.3747074489548865, + "grad_norm": 0.6686646938323975, + "learning_rate": 1.0784958316831628e-05, + "loss": 2.4123, + "step": 17034 + }, + { + "epoch": 1.3747881526914696, + "grad_norm": 0.6656081676483154, + "learning_rate": 1.0777827879760084e-05, + "loss": 2.3527, + "step": 17035 + }, + { + "epoch": 1.3748688564280527, + "grad_norm": 0.6609933972358704, + "learning_rate": 1.0770699666338303e-05, + "loss": 2.4128, + "step": 17036 + }, + { + "epoch": 1.3749495601646355, + "grad_norm": 0.710719108581543, + "learning_rate": 1.0763573676743921e-05, + "loss": 2.4634, + "step": 17037 + }, + { + "epoch": 1.3750302639012186, + "grad_norm": 0.6638451814651489, + "learning_rate": 1.0756449911154554e-05, + "loss": 2.3828, + "step": 17038 + }, + { + "epoch": 1.3751109676378017, + "grad_norm": 0.7525094151496887, + "learning_rate": 1.0749328369747746e-05, + "loss": 2.4078, + "step": 17039 + }, + { + "epoch": 1.3751916713743846, + "grad_norm": 0.7343288064002991, + "learning_rate": 1.0742209052701002e-05, + "loss": 2.4731, + "step": 17040 + }, + { + "epoch": 1.3752723751109677, + "grad_norm": 0.7966243624687195, + "learning_rate": 1.0735091960191701e-05, + "loss": 2.3501, + "step": 17041 + }, + { + "epoch": 1.3753530788475508, + "grad_norm": 0.6693055033683777, + "learning_rate": 1.0727977092397256e-05, + "loss": 2.4214, + "step": 17042 + }, + { + "epoch": 1.3754337825841336, + "grad_norm": 0.6831601858139038, + "learning_rate": 1.0720864449494994e-05, + "loss": 2.4029, + "step": 17043 + }, + { + "epoch": 1.3755144863207167, + "grad_norm": 0.7081588506698608, + "learning_rate": 1.0713754031662149e-05, + "loss": 2.4532, + "step": 17044 + }, + { + "epoch": 1.3755951900572996, + "grad_norm": 0.698469877243042, + "learning_rate": 1.0706645839075957e-05, + "loss": 2.4181, + "step": 17045 + }, + { + "epoch": 1.3756758937938827, + "grad_norm": 0.652568519115448, + "learning_rate": 1.0699539871913556e-05, + "loss": 2.4761, + "step": 17046 + }, + { + "epoch": 1.3757565975304655, + "grad_norm": 0.7698256969451904, + "learning_rate": 1.0692436130352068e-05, + "loss": 2.4742, + "step": 17047 + }, + { + "epoch": 1.3758373012670486, + "grad_norm": 0.7192606329917908, + "learning_rate": 1.068533461456851e-05, + "loss": 2.401, + "step": 17048 + }, + { + "epoch": 1.3759180050036317, + "grad_norm": 0.6296666860580444, + "learning_rate": 1.0678235324739894e-05, + "loss": 2.4628, + "step": 17049 + }, + { + "epoch": 1.3759987087402146, + "grad_norm": 0.7048724293708801, + "learning_rate": 1.0671138261043156e-05, + "loss": 2.4799, + "step": 17050 + }, + { + "epoch": 1.3760794124767977, + "grad_norm": 0.6724091172218323, + "learning_rate": 1.0664043423655146e-05, + "loss": 2.4108, + "step": 17051 + }, + { + "epoch": 1.3761601162133807, + "grad_norm": 0.6380212306976318, + "learning_rate": 1.0656950812752709e-05, + "loss": 2.3943, + "step": 17052 + }, + { + "epoch": 1.3762408199499636, + "grad_norm": 0.7005279660224915, + "learning_rate": 1.0649860428512604e-05, + "loss": 2.3623, + "step": 17053 + }, + { + "epoch": 1.3763215236865467, + "grad_norm": 0.719219982624054, + "learning_rate": 1.0642772271111534e-05, + "loss": 2.3873, + "step": 17054 + }, + { + "epoch": 1.3764022274231298, + "grad_norm": 0.7318363785743713, + "learning_rate": 1.063568634072616e-05, + "loss": 2.4335, + "step": 17055 + }, + { + "epoch": 1.3764829311597127, + "grad_norm": 0.7131830453872681, + "learning_rate": 1.062860263753308e-05, + "loss": 2.3829, + "step": 17056 + }, + { + "epoch": 1.3765636348962957, + "grad_norm": 0.7030664086341858, + "learning_rate": 1.0621521161708836e-05, + "loss": 2.3216, + "step": 17057 + }, + { + "epoch": 1.3766443386328788, + "grad_norm": 0.738999605178833, + "learning_rate": 1.0614441913429929e-05, + "loss": 2.4951, + "step": 17058 + }, + { + "epoch": 1.3767250423694617, + "grad_norm": 0.6926800012588501, + "learning_rate": 1.0607364892872806e-05, + "loss": 2.3977, + "step": 17059 + }, + { + "epoch": 1.3768057461060448, + "grad_norm": 0.6439639925956726, + "learning_rate": 1.0600290100213805e-05, + "loss": 2.4049, + "step": 17060 + }, + { + "epoch": 1.3768864498426276, + "grad_norm": 0.7035220265388489, + "learning_rate": 1.0593217535629264e-05, + "loss": 2.4212, + "step": 17061 + }, + { + "epoch": 1.3769671535792107, + "grad_norm": 0.705183207988739, + "learning_rate": 1.0586147199295482e-05, + "loss": 2.4244, + "step": 17062 + }, + { + "epoch": 1.3770478573157936, + "grad_norm": 0.7036949396133423, + "learning_rate": 1.057907909138861e-05, + "loss": 2.4254, + "step": 17063 + }, + { + "epoch": 1.3771285610523767, + "grad_norm": 0.7137075066566467, + "learning_rate": 1.0572013212084841e-05, + "loss": 2.4135, + "step": 17064 + }, + { + "epoch": 1.3772092647889598, + "grad_norm": 0.6973327398300171, + "learning_rate": 1.0564949561560267e-05, + "loss": 2.4568, + "step": 17065 + }, + { + "epoch": 1.3772899685255426, + "grad_norm": 0.7157370448112488, + "learning_rate": 1.0557888139990946e-05, + "loss": 2.3877, + "step": 17066 + }, + { + "epoch": 1.3773706722621257, + "grad_norm": 0.6622396111488342, + "learning_rate": 1.0550828947552848e-05, + "loss": 2.3636, + "step": 17067 + }, + { + "epoch": 1.3774513759987088, + "grad_norm": 0.7295750975608826, + "learning_rate": 1.0543771984421913e-05, + "loss": 2.4192, + "step": 17068 + }, + { + "epoch": 1.3775320797352917, + "grad_norm": 0.7245587110519409, + "learning_rate": 1.0536717250774053e-05, + "loss": 2.3575, + "step": 17069 + }, + { + "epoch": 1.3776127834718748, + "grad_norm": 0.6923871040344238, + "learning_rate": 1.052966474678503e-05, + "loss": 2.4547, + "step": 17070 + }, + { + "epoch": 1.3776934872084579, + "grad_norm": 0.6754410862922668, + "learning_rate": 1.0522614472630632e-05, + "loss": 2.4469, + "step": 17071 + }, + { + "epoch": 1.3777741909450407, + "grad_norm": 0.6979227662086487, + "learning_rate": 1.0515566428486612e-05, + "loss": 2.407, + "step": 17072 + }, + { + "epoch": 1.3778548946816238, + "grad_norm": 0.7050029635429382, + "learning_rate": 1.050852061452856e-05, + "loss": 2.3937, + "step": 17073 + }, + { + "epoch": 1.377935598418207, + "grad_norm": 0.676030158996582, + "learning_rate": 1.0501477030932117e-05, + "loss": 2.4144, + "step": 17074 + }, + { + "epoch": 1.3780163021547898, + "grad_norm": 0.6984726786613464, + "learning_rate": 1.0494435677872827e-05, + "loss": 2.4541, + "step": 17075 + }, + { + "epoch": 1.3780970058913729, + "grad_norm": 0.6987836956977844, + "learning_rate": 1.0487396555526141e-05, + "loss": 2.3984, + "step": 17076 + }, + { + "epoch": 1.3781777096279557, + "grad_norm": 0.7071307897567749, + "learning_rate": 1.0480359664067529e-05, + "loss": 2.3861, + "step": 17077 + }, + { + "epoch": 1.3782584133645388, + "grad_norm": 0.6713467836380005, + "learning_rate": 1.0473325003672384e-05, + "loss": 2.4029, + "step": 17078 + }, + { + "epoch": 1.3783391171011217, + "grad_norm": 0.7389634847640991, + "learning_rate": 1.046629257451599e-05, + "loss": 2.415, + "step": 17079 + }, + { + "epoch": 1.3784198208377048, + "grad_norm": 0.7122809886932373, + "learning_rate": 1.0459262376773627e-05, + "loss": 2.4278, + "step": 17080 + }, + { + "epoch": 1.3785005245742878, + "grad_norm": 0.7036066651344299, + "learning_rate": 1.045223441062051e-05, + "loss": 2.4276, + "step": 17081 + }, + { + "epoch": 1.3785812283108707, + "grad_norm": 0.7709795236587524, + "learning_rate": 1.0445208676231811e-05, + "loss": 2.4398, + "step": 17082 + }, + { + "epoch": 1.3786619320474538, + "grad_norm": 0.7131057977676392, + "learning_rate": 1.0438185173782589e-05, + "loss": 2.4414, + "step": 17083 + }, + { + "epoch": 1.3787426357840369, + "grad_norm": 0.7172132730484009, + "learning_rate": 1.0431163903447904e-05, + "loss": 2.4574, + "step": 17084 + }, + { + "epoch": 1.3788233395206198, + "grad_norm": 0.6760988831520081, + "learning_rate": 1.0424144865402774e-05, + "loss": 2.442, + "step": 17085 + }, + { + "epoch": 1.3789040432572028, + "grad_norm": 0.701665997505188, + "learning_rate": 1.041712805982209e-05, + "loss": 2.4012, + "step": 17086 + }, + { + "epoch": 1.378984746993786, + "grad_norm": 0.661851167678833, + "learning_rate": 1.0410113486880746e-05, + "loss": 2.3591, + "step": 17087 + }, + { + "epoch": 1.3790654507303688, + "grad_norm": 0.6929948925971985, + "learning_rate": 1.0403101146753569e-05, + "loss": 2.4285, + "step": 17088 + }, + { + "epoch": 1.3791461544669519, + "grad_norm": 0.703576922416687, + "learning_rate": 1.0396091039615308e-05, + "loss": 2.4643, + "step": 17089 + }, + { + "epoch": 1.3792268582035347, + "grad_norm": 0.6697961688041687, + "learning_rate": 1.038908316564069e-05, + "loss": 2.4046, + "step": 17090 + }, + { + "epoch": 1.3793075619401178, + "grad_norm": 0.7338510155677795, + "learning_rate": 1.0382077525004396e-05, + "loss": 2.3507, + "step": 17091 + }, + { + "epoch": 1.3793882656767007, + "grad_norm": 0.6967883110046387, + "learning_rate": 1.0375074117880956e-05, + "loss": 2.4458, + "step": 17092 + }, + { + "epoch": 1.3794689694132838, + "grad_norm": 0.7204736471176147, + "learning_rate": 1.0368072944444962e-05, + "loss": 2.427, + "step": 17093 + }, + { + "epoch": 1.3795496731498669, + "grad_norm": 0.7665053606033325, + "learning_rate": 1.0361074004870907e-05, + "loss": 2.3985, + "step": 17094 + }, + { + "epoch": 1.3796303768864497, + "grad_norm": 0.7157881855964661, + "learning_rate": 1.0354077299333187e-05, + "loss": 2.4229, + "step": 17095 + }, + { + "epoch": 1.3797110806230328, + "grad_norm": 0.6643819808959961, + "learning_rate": 1.0347082828006194e-05, + "loss": 2.357, + "step": 17096 + }, + { + "epoch": 1.379791784359616, + "grad_norm": 0.6965252757072449, + "learning_rate": 1.0340090591064255e-05, + "loss": 2.42, + "step": 17097 + }, + { + "epoch": 1.3798724880961988, + "grad_norm": 0.767876923084259, + "learning_rate": 1.0333100588681633e-05, + "loss": 2.4019, + "step": 17098 + }, + { + "epoch": 1.3799531918327819, + "grad_norm": 0.6687513589859009, + "learning_rate": 1.0326112821032541e-05, + "loss": 2.3515, + "step": 17099 + }, + { + "epoch": 1.380033895569365, + "grad_norm": 0.674007773399353, + "learning_rate": 1.031912728829112e-05, + "loss": 2.4281, + "step": 17100 + }, + { + "epoch": 1.3801145993059478, + "grad_norm": 0.6486735939979553, + "learning_rate": 1.0312143990631495e-05, + "loss": 2.4324, + "step": 17101 + }, + { + "epoch": 1.380195303042531, + "grad_norm": 0.7174487709999084, + "learning_rate": 1.0305162928227674e-05, + "loss": 2.4445, + "step": 17102 + }, + { + "epoch": 1.380276006779114, + "grad_norm": 0.6515870690345764, + "learning_rate": 1.029818410125365e-05, + "loss": 2.4078, + "step": 17103 + }, + { + "epoch": 1.3803567105156969, + "grad_norm": 0.697830080986023, + "learning_rate": 1.0291207509883383e-05, + "loss": 2.4024, + "step": 17104 + }, + { + "epoch": 1.38043741425228, + "grad_norm": 0.7636575102806091, + "learning_rate": 1.0284233154290711e-05, + "loss": 2.3912, + "step": 17105 + }, + { + "epoch": 1.3805181179888628, + "grad_norm": 0.6910358667373657, + "learning_rate": 1.0277261034649466e-05, + "loss": 2.4099, + "step": 17106 + }, + { + "epoch": 1.380598821725446, + "grad_norm": 0.6778038740158081, + "learning_rate": 1.0270291151133415e-05, + "loss": 2.4111, + "step": 17107 + }, + { + "epoch": 1.3806795254620288, + "grad_norm": 0.6927553415298462, + "learning_rate": 1.0263323503916255e-05, + "loss": 2.4239, + "step": 17108 + }, + { + "epoch": 1.3807602291986119, + "grad_norm": 0.6654019355773926, + "learning_rate": 1.0256358093171658e-05, + "loss": 2.4374, + "step": 17109 + }, + { + "epoch": 1.380840932935195, + "grad_norm": 0.7174705266952515, + "learning_rate": 1.0249394919073219e-05, + "loss": 2.4142, + "step": 17110 + }, + { + "epoch": 1.3809216366717778, + "grad_norm": 0.7386046648025513, + "learning_rate": 1.0242433981794463e-05, + "loss": 2.4453, + "step": 17111 + }, + { + "epoch": 1.381002340408361, + "grad_norm": 0.6723792552947998, + "learning_rate": 1.0235475281508866e-05, + "loss": 2.4595, + "step": 17112 + }, + { + "epoch": 1.381083044144944, + "grad_norm": 0.7069140672683716, + "learning_rate": 1.0228518818389887e-05, + "loss": 2.4434, + "step": 17113 + }, + { + "epoch": 1.3811637478815268, + "grad_norm": 0.7239270210266113, + "learning_rate": 1.0221564592610888e-05, + "loss": 2.5121, + "step": 17114 + }, + { + "epoch": 1.38124445161811, + "grad_norm": 0.6907179951667786, + "learning_rate": 1.0214612604345175e-05, + "loss": 2.3673, + "step": 17115 + }, + { + "epoch": 1.381325155354693, + "grad_norm": 0.6908708810806274, + "learning_rate": 1.020766285376602e-05, + "loss": 2.4419, + "step": 17116 + }, + { + "epoch": 1.381405859091276, + "grad_norm": 0.6947401165962219, + "learning_rate": 1.0200715341046618e-05, + "loss": 2.4566, + "step": 17117 + }, + { + "epoch": 1.381486562827859, + "grad_norm": 0.687776505947113, + "learning_rate": 1.019377006636012e-05, + "loss": 2.4631, + "step": 17118 + }, + { + "epoch": 1.381567266564442, + "grad_norm": 0.7059805989265442, + "learning_rate": 1.0186827029879642e-05, + "loss": 2.3892, + "step": 17119 + }, + { + "epoch": 1.381647970301025, + "grad_norm": 0.685351550579071, + "learning_rate": 1.0179886231778224e-05, + "loss": 2.4041, + "step": 17120 + }, + { + "epoch": 1.381728674037608, + "grad_norm": 0.6662759184837341, + "learning_rate": 1.0172947672228817e-05, + "loss": 2.4254, + "step": 17121 + }, + { + "epoch": 1.3818093777741909, + "grad_norm": 0.6769386529922485, + "learning_rate": 1.0166011351404358e-05, + "loss": 2.5057, + "step": 17122 + }, + { + "epoch": 1.381890081510774, + "grad_norm": 0.8168340921401978, + "learning_rate": 1.0159077269477746e-05, + "loss": 2.4936, + "step": 17123 + }, + { + "epoch": 1.3819707852473568, + "grad_norm": 0.6659611463546753, + "learning_rate": 1.0152145426621751e-05, + "loss": 2.4062, + "step": 17124 + }, + { + "epoch": 1.38205148898394, + "grad_norm": 0.7131680846214294, + "learning_rate": 1.0145215823009158e-05, + "loss": 2.3767, + "step": 17125 + }, + { + "epoch": 1.382132192720523, + "grad_norm": 0.7241190075874329, + "learning_rate": 1.0138288458812673e-05, + "loss": 2.4082, + "step": 17126 + }, + { + "epoch": 1.3822128964571059, + "grad_norm": 0.6905619502067566, + "learning_rate": 1.0131363334204947e-05, + "loss": 2.3859, + "step": 17127 + }, + { + "epoch": 1.382293600193689, + "grad_norm": 0.7163190245628357, + "learning_rate": 1.0124440449358551e-05, + "loss": 2.4238, + "step": 17128 + }, + { + "epoch": 1.382374303930272, + "grad_norm": 0.6857485175132751, + "learning_rate": 1.0117519804446041e-05, + "loss": 2.4076, + "step": 17129 + }, + { + "epoch": 1.382455007666855, + "grad_norm": 0.6817807555198669, + "learning_rate": 1.0110601399639918e-05, + "loss": 2.4226, + "step": 17130 + }, + { + "epoch": 1.382535711403438, + "grad_norm": 0.714421808719635, + "learning_rate": 1.0103685235112558e-05, + "loss": 2.3581, + "step": 17131 + }, + { + "epoch": 1.382616415140021, + "grad_norm": 0.7885473370552063, + "learning_rate": 1.0096771311036357e-05, + "loss": 2.3821, + "step": 17132 + }, + { + "epoch": 1.382697118876604, + "grad_norm": 0.6432569026947021, + "learning_rate": 1.0089859627583642e-05, + "loss": 2.3899, + "step": 17133 + }, + { + "epoch": 1.382777822613187, + "grad_norm": 0.6620168089866638, + "learning_rate": 1.0082950184926632e-05, + "loss": 2.4503, + "step": 17134 + }, + { + "epoch": 1.38285852634977, + "grad_norm": 0.6495606303215027, + "learning_rate": 1.0076042983237544e-05, + "loss": 2.3606, + "step": 17135 + }, + { + "epoch": 1.382939230086353, + "grad_norm": 0.7192469835281372, + "learning_rate": 1.006913802268855e-05, + "loss": 2.425, + "step": 17136 + }, + { + "epoch": 1.3830199338229359, + "grad_norm": 0.6835115551948547, + "learning_rate": 1.0062235303451706e-05, + "loss": 2.3605, + "step": 17137 + }, + { + "epoch": 1.383100637559519, + "grad_norm": 0.7469161748886108, + "learning_rate": 1.0055334825699059e-05, + "loss": 2.4811, + "step": 17138 + }, + { + "epoch": 1.383181341296102, + "grad_norm": 0.7641372084617615, + "learning_rate": 1.0048436589602572e-05, + "loss": 2.4317, + "step": 17139 + }, + { + "epoch": 1.383262045032685, + "grad_norm": 0.7059566378593445, + "learning_rate": 1.0041540595334186e-05, + "loss": 2.4677, + "step": 17140 + }, + { + "epoch": 1.383342748769268, + "grad_norm": 0.7218295931816101, + "learning_rate": 1.0034646843065777e-05, + "loss": 2.3889, + "step": 17141 + }, + { + "epoch": 1.383423452505851, + "grad_norm": 0.7059688568115234, + "learning_rate": 1.0027755332969124e-05, + "loss": 2.4276, + "step": 17142 + }, + { + "epoch": 1.383504156242434, + "grad_norm": 0.7444838285446167, + "learning_rate": 1.0020866065216017e-05, + "loss": 2.4647, + "step": 17143 + }, + { + "epoch": 1.383584859979017, + "grad_norm": 0.662229597568512, + "learning_rate": 1.0013979039978127e-05, + "loss": 2.3913, + "step": 17144 + }, + { + "epoch": 1.3836655637156001, + "grad_norm": 0.6696064472198486, + "learning_rate": 1.0007094257427097e-05, + "loss": 2.3904, + "step": 17145 + }, + { + "epoch": 1.383746267452183, + "grad_norm": 0.7516316175460815, + "learning_rate": 1.0000211717734541e-05, + "loss": 2.3621, + "step": 17146 + }, + { + "epoch": 1.383826971188766, + "grad_norm": 0.6833345293998718, + "learning_rate": 9.993331421071961e-06, + "loss": 2.4113, + "step": 17147 + }, + { + "epoch": 1.3839076749253492, + "grad_norm": 0.675074577331543, + "learning_rate": 9.986453367610827e-06, + "loss": 2.398, + "step": 17148 + }, + { + "epoch": 1.383988378661932, + "grad_norm": 0.7046546936035156, + "learning_rate": 9.979577557522579e-06, + "loss": 2.4441, + "step": 17149 + }, + { + "epoch": 1.3840690823985151, + "grad_norm": 0.7228004336357117, + "learning_rate": 9.972703990978582e-06, + "loss": 2.4451, + "step": 17150 + }, + { + "epoch": 1.384149786135098, + "grad_norm": 0.6642273664474487, + "learning_rate": 9.965832668150132e-06, + "loss": 2.3809, + "step": 17151 + }, + { + "epoch": 1.384230489871681, + "grad_norm": 0.7238738536834717, + "learning_rate": 9.958963589208493e-06, + "loss": 2.4283, + "step": 17152 + }, + { + "epoch": 1.384311193608264, + "grad_norm": 0.7356482744216919, + "learning_rate": 9.952096754324847e-06, + "loss": 2.4666, + "step": 17153 + }, + { + "epoch": 1.384391897344847, + "grad_norm": 0.7092667818069458, + "learning_rate": 9.945232163670327e-06, + "loss": 2.5028, + "step": 17154 + }, + { + "epoch": 1.38447260108143, + "grad_norm": 0.6972974538803101, + "learning_rate": 9.938369817416049e-06, + "loss": 2.4223, + "step": 17155 + }, + { + "epoch": 1.384553304818013, + "grad_norm": 0.7163854837417603, + "learning_rate": 9.931509715733e-06, + "loss": 2.4256, + "step": 17156 + }, + { + "epoch": 1.384634008554596, + "grad_norm": 0.7319930195808411, + "learning_rate": 9.924651858792166e-06, + "loss": 2.4208, + "step": 17157 + }, + { + "epoch": 1.3847147122911792, + "grad_norm": 0.6813424825668335, + "learning_rate": 9.917796246764466e-06, + "loss": 2.3794, + "step": 17158 + }, + { + "epoch": 1.384795416027762, + "grad_norm": 0.7059821486473083, + "learning_rate": 9.910942879820761e-06, + "loss": 2.4462, + "step": 17159 + }, + { + "epoch": 1.384876119764345, + "grad_norm": 0.726754903793335, + "learning_rate": 9.904091758131862e-06, + "loss": 2.4037, + "step": 17160 + }, + { + "epoch": 1.3849568235009282, + "grad_norm": 0.6972840428352356, + "learning_rate": 9.897242881868508e-06, + "loss": 2.4275, + "step": 17161 + }, + { + "epoch": 1.385037527237511, + "grad_norm": 0.6906942129135132, + "learning_rate": 9.890396251201405e-06, + "loss": 2.4547, + "step": 17162 + }, + { + "epoch": 1.3851182309740941, + "grad_norm": 0.6928840279579163, + "learning_rate": 9.883551866301165e-06, + "loss": 2.4622, + "step": 17163 + }, + { + "epoch": 1.3851989347106772, + "grad_norm": 0.6840118169784546, + "learning_rate": 9.876709727338374e-06, + "loss": 2.4546, + "step": 17164 + }, + { + "epoch": 1.38527963844726, + "grad_norm": 0.6800721287727356, + "learning_rate": 9.86986983448358e-06, + "loss": 2.508, + "step": 17165 + }, + { + "epoch": 1.3853603421838432, + "grad_norm": 0.678666353225708, + "learning_rate": 9.863032187907217e-06, + "loss": 2.383, + "step": 17166 + }, + { + "epoch": 1.385441045920426, + "grad_norm": 0.7311298251152039, + "learning_rate": 9.856196787779714e-06, + "loss": 2.4111, + "step": 17167 + }, + { + "epoch": 1.3855217496570091, + "grad_norm": 0.6527237296104431, + "learning_rate": 9.849363634271425e-06, + "loss": 2.3592, + "step": 17168 + }, + { + "epoch": 1.385602453393592, + "grad_norm": 0.7478907108306885, + "learning_rate": 9.842532727552645e-06, + "loss": 2.4321, + "step": 17169 + }, + { + "epoch": 1.385683157130175, + "grad_norm": 0.6855963468551636, + "learning_rate": 9.835704067793628e-06, + "loss": 2.3966, + "step": 17170 + }, + { + "epoch": 1.3857638608667582, + "grad_norm": 0.7468744516372681, + "learning_rate": 9.828877655164571e-06, + "loss": 2.3695, + "step": 17171 + }, + { + "epoch": 1.385844564603341, + "grad_norm": 0.7127626538276672, + "learning_rate": 9.82205348983558e-06, + "loss": 2.4718, + "step": 17172 + }, + { + "epoch": 1.3859252683399241, + "grad_norm": 0.6831564903259277, + "learning_rate": 9.815231571976735e-06, + "loss": 2.373, + "step": 17173 + }, + { + "epoch": 1.3860059720765072, + "grad_norm": 0.7020923495292664, + "learning_rate": 9.808411901758075e-06, + "loss": 2.4516, + "step": 17174 + }, + { + "epoch": 1.38608667581309, + "grad_norm": 0.8129574060440063, + "learning_rate": 9.801594479349563e-06, + "loss": 2.4157, + "step": 17175 + }, + { + "epoch": 1.3861673795496732, + "grad_norm": 0.6603944301605225, + "learning_rate": 9.794779304921087e-06, + "loss": 2.386, + "step": 17176 + }, + { + "epoch": 1.3862480832862563, + "grad_norm": 0.669863224029541, + "learning_rate": 9.78796637864251e-06, + "loss": 2.4273, + "step": 17177 + }, + { + "epoch": 1.3863287870228391, + "grad_norm": 0.7654524445533752, + "learning_rate": 9.78115570068362e-06, + "loss": 2.4868, + "step": 17178 + }, + { + "epoch": 1.3864094907594222, + "grad_norm": 0.7104062438011169, + "learning_rate": 9.774347271214169e-06, + "loss": 2.4684, + "step": 17179 + }, + { + "epoch": 1.3864901944960053, + "grad_norm": 0.6499059796333313, + "learning_rate": 9.767541090403831e-06, + "loss": 2.4131, + "step": 17180 + }, + { + "epoch": 1.3865708982325882, + "grad_norm": 0.7515703439712524, + "learning_rate": 9.760737158422262e-06, + "loss": 2.4484, + "step": 17181 + }, + { + "epoch": 1.3866516019691713, + "grad_norm": 0.7019369006156921, + "learning_rate": 9.753935475438991e-06, + "loss": 2.4393, + "step": 17182 + }, + { + "epoch": 1.3867323057057541, + "grad_norm": 0.7191709280014038, + "learning_rate": 9.747136041623562e-06, + "loss": 2.4533, + "step": 17183 + }, + { + "epoch": 1.3868130094423372, + "grad_norm": 0.6970816254615784, + "learning_rate": 9.740338857145438e-06, + "loss": 2.4886, + "step": 17184 + }, + { + "epoch": 1.38689371317892, + "grad_norm": 0.6682983636856079, + "learning_rate": 9.733543922173982e-06, + "loss": 2.3896, + "step": 17185 + }, + { + "epoch": 1.3869744169155032, + "grad_norm": 0.735559344291687, + "learning_rate": 9.726751236878584e-06, + "loss": 2.4777, + "step": 17186 + }, + { + "epoch": 1.3870551206520862, + "grad_norm": 0.790460467338562, + "learning_rate": 9.71996080142854e-06, + "loss": 2.3773, + "step": 17187 + }, + { + "epoch": 1.3871358243886691, + "grad_norm": 0.6593269109725952, + "learning_rate": 9.713172615993038e-06, + "loss": 2.461, + "step": 17188 + }, + { + "epoch": 1.3872165281252522, + "grad_norm": 0.7211339473724365, + "learning_rate": 9.706386680741275e-06, + "loss": 2.4155, + "step": 17189 + }, + { + "epoch": 1.3872972318618353, + "grad_norm": 0.7158735990524292, + "learning_rate": 9.699602995842406e-06, + "loss": 2.4214, + "step": 17190 + }, + { + "epoch": 1.3873779355984182, + "grad_norm": 0.7172560095787048, + "learning_rate": 9.692821561465493e-06, + "loss": 2.3617, + "step": 17191 + }, + { + "epoch": 1.3874586393350012, + "grad_norm": 0.721144437789917, + "learning_rate": 9.686042377779513e-06, + "loss": 2.3984, + "step": 17192 + }, + { + "epoch": 1.3875393430715843, + "grad_norm": 0.7066751718521118, + "learning_rate": 9.679265444953444e-06, + "loss": 2.4735, + "step": 17193 + }, + { + "epoch": 1.3876200468081672, + "grad_norm": 0.7111334204673767, + "learning_rate": 9.672490763156194e-06, + "loss": 2.4336, + "step": 17194 + }, + { + "epoch": 1.3877007505447503, + "grad_norm": 0.6845266222953796, + "learning_rate": 9.665718332556584e-06, + "loss": 2.466, + "step": 17195 + }, + { + "epoch": 1.3877814542813331, + "grad_norm": 0.6982793807983398, + "learning_rate": 9.6589481533234e-06, + "loss": 2.3819, + "step": 17196 + }, + { + "epoch": 1.3878621580179162, + "grad_norm": 0.8404912352561951, + "learning_rate": 9.652180225625407e-06, + "loss": 2.4329, + "step": 17197 + }, + { + "epoch": 1.387942861754499, + "grad_norm": 0.7335420250892639, + "learning_rate": 9.645414549631227e-06, + "loss": 2.4368, + "step": 17198 + }, + { + "epoch": 1.3880235654910822, + "grad_norm": 0.7425113916397095, + "learning_rate": 9.638651125509513e-06, + "loss": 2.41, + "step": 17199 + }, + { + "epoch": 1.3881042692276653, + "grad_norm": 0.6818472146987915, + "learning_rate": 9.631889953428818e-06, + "loss": 2.4227, + "step": 17200 + }, + { + "epoch": 1.3881849729642481, + "grad_norm": 0.6991598010063171, + "learning_rate": 9.625131033557655e-06, + "loss": 2.422, + "step": 17201 + }, + { + "epoch": 1.3882656767008312, + "grad_norm": 0.6927391886711121, + "learning_rate": 9.618374366064465e-06, + "loss": 2.4092, + "step": 17202 + }, + { + "epoch": 1.3883463804374143, + "grad_norm": 0.6987093687057495, + "learning_rate": 9.611619951117657e-06, + "loss": 2.419, + "step": 17203 + }, + { + "epoch": 1.3884270841739972, + "grad_norm": 0.7766227722167969, + "learning_rate": 9.604867788885552e-06, + "loss": 2.4174, + "step": 17204 + }, + { + "epoch": 1.3885077879105803, + "grad_norm": 0.77024245262146, + "learning_rate": 9.598117879536427e-06, + "loss": 2.3851, + "step": 17205 + }, + { + "epoch": 1.3885884916471634, + "grad_norm": 0.7106937170028687, + "learning_rate": 9.591370223238515e-06, + "loss": 2.3322, + "step": 17206 + }, + { + "epoch": 1.3886691953837462, + "grad_norm": 0.7056468725204468, + "learning_rate": 9.584624820160016e-06, + "loss": 2.4496, + "step": 17207 + }, + { + "epoch": 1.3887498991203293, + "grad_norm": 0.6738306879997253, + "learning_rate": 9.57788167046899e-06, + "loss": 2.3853, + "step": 17208 + }, + { + "epoch": 1.3888306028569124, + "grad_norm": 0.6830081343650818, + "learning_rate": 9.57114077433352e-06, + "loss": 2.3974, + "step": 17209 + }, + { + "epoch": 1.3889113065934953, + "grad_norm": 0.6968281865119934, + "learning_rate": 9.564402131921612e-06, + "loss": 2.4349, + "step": 17210 + }, + { + "epoch": 1.3889920103300784, + "grad_norm": 0.720506489276886, + "learning_rate": 9.55766574340119e-06, + "loss": 2.386, + "step": 17211 + }, + { + "epoch": 1.3890727140666612, + "grad_norm": 0.7361373901367188, + "learning_rate": 9.550931608940161e-06, + "loss": 2.4303, + "step": 17212 + }, + { + "epoch": 1.3891534178032443, + "grad_norm": 0.6967737674713135, + "learning_rate": 9.544199728706383e-06, + "loss": 2.4073, + "step": 17213 + }, + { + "epoch": 1.3892341215398272, + "grad_norm": 0.6645474433898926, + "learning_rate": 9.537470102867573e-06, + "loss": 2.4236, + "step": 17214 + }, + { + "epoch": 1.3893148252764103, + "grad_norm": 0.7314795851707458, + "learning_rate": 9.53074273159148e-06, + "loss": 2.4362, + "step": 17215 + }, + { + "epoch": 1.3893955290129933, + "grad_norm": 0.7935917377471924, + "learning_rate": 9.524017615045789e-06, + "loss": 2.3982, + "step": 17216 + }, + { + "epoch": 1.3894762327495762, + "grad_norm": 0.7083787322044373, + "learning_rate": 9.517294753398064e-06, + "loss": 2.4095, + "step": 17217 + }, + { + "epoch": 1.3895569364861593, + "grad_norm": 0.6737664937973022, + "learning_rate": 9.510574146815876e-06, + "loss": 2.457, + "step": 17218 + }, + { + "epoch": 1.3896376402227424, + "grad_norm": 0.6705507040023804, + "learning_rate": 9.50385579546672e-06, + "loss": 2.3893, + "step": 17219 + }, + { + "epoch": 1.3897183439593253, + "grad_norm": 0.6711611151695251, + "learning_rate": 9.497139699518042e-06, + "loss": 2.3982, + "step": 17220 + }, + { + "epoch": 1.3897990476959083, + "grad_norm": 0.7133504748344421, + "learning_rate": 9.490425859137219e-06, + "loss": 2.4178, + "step": 17221 + }, + { + "epoch": 1.3898797514324914, + "grad_norm": 0.6962296366691589, + "learning_rate": 9.483714274491572e-06, + "loss": 2.4126, + "step": 17222 + }, + { + "epoch": 1.3899604551690743, + "grad_norm": 0.7658503651618958, + "learning_rate": 9.477004945748402e-06, + "loss": 2.3047, + "step": 17223 + }, + { + "epoch": 1.3900411589056574, + "grad_norm": 0.706066370010376, + "learning_rate": 9.470297873074885e-06, + "loss": 2.4055, + "step": 17224 + }, + { + "epoch": 1.3901218626422405, + "grad_norm": 0.6563149094581604, + "learning_rate": 9.463593056638187e-06, + "loss": 2.4425, + "step": 17225 + }, + { + "epoch": 1.3902025663788233, + "grad_norm": 0.7133740782737732, + "learning_rate": 9.45689049660543e-06, + "loss": 2.3917, + "step": 17226 + }, + { + "epoch": 1.3902832701154064, + "grad_norm": 0.6759207248687744, + "learning_rate": 9.450190193143626e-06, + "loss": 2.4261, + "step": 17227 + }, + { + "epoch": 1.3903639738519893, + "grad_norm": 0.7461724877357483, + "learning_rate": 9.443492146419786e-06, + "loss": 2.4121, + "step": 17228 + }, + { + "epoch": 1.3904446775885724, + "grad_norm": 0.6825011372566223, + "learning_rate": 9.436796356600842e-06, + "loss": 2.3746, + "step": 17229 + }, + { + "epoch": 1.3905253813251552, + "grad_norm": 0.7314637303352356, + "learning_rate": 9.430102823853659e-06, + "loss": 2.4246, + "step": 17230 + }, + { + "epoch": 1.3906060850617383, + "grad_norm": 0.6963483095169067, + "learning_rate": 9.423411548345063e-06, + "loss": 2.3504, + "step": 17231 + }, + { + "epoch": 1.3906867887983214, + "grad_norm": 0.7879536747932434, + "learning_rate": 9.41672253024185e-06, + "loss": 2.4454, + "step": 17232 + }, + { + "epoch": 1.3907674925349043, + "grad_norm": 0.6961038708686829, + "learning_rate": 9.410035769710668e-06, + "loss": 2.4107, + "step": 17233 + }, + { + "epoch": 1.3908481962714874, + "grad_norm": 0.6528958082199097, + "learning_rate": 9.403351266918215e-06, + "loss": 2.4131, + "step": 17234 + }, + { + "epoch": 1.3909289000080705, + "grad_norm": 0.8091046810150146, + "learning_rate": 9.396669022031057e-06, + "loss": 2.4143, + "step": 17235 + }, + { + "epoch": 1.3910096037446533, + "grad_norm": 0.7430968880653381, + "learning_rate": 9.389989035215774e-06, + "loss": 2.4197, + "step": 17236 + }, + { + "epoch": 1.3910903074812364, + "grad_norm": 0.7089489102363586, + "learning_rate": 9.383311306638797e-06, + "loss": 2.4179, + "step": 17237 + }, + { + "epoch": 1.3911710112178195, + "grad_norm": 0.7121657729148865, + "learning_rate": 9.376635836466574e-06, + "loss": 2.4136, + "step": 17238 + }, + { + "epoch": 1.3912517149544024, + "grad_norm": 0.6793569326400757, + "learning_rate": 9.369962624865503e-06, + "loss": 2.4029, + "step": 17239 + }, + { + "epoch": 1.3913324186909855, + "grad_norm": 0.7534452080726624, + "learning_rate": 9.363291672001828e-06, + "loss": 2.421, + "step": 17240 + }, + { + "epoch": 1.3914131224275683, + "grad_norm": 0.6758937239646912, + "learning_rate": 9.356622978041873e-06, + "loss": 2.378, + "step": 17241 + }, + { + "epoch": 1.3914938261641514, + "grad_norm": 0.7330620288848877, + "learning_rate": 9.349956543151839e-06, + "loss": 2.3983, + "step": 17242 + }, + { + "epoch": 1.3915745299007343, + "grad_norm": 0.7044413089752197, + "learning_rate": 9.343292367497835e-06, + "loss": 2.4204, + "step": 17243 + }, + { + "epoch": 1.3916552336373174, + "grad_norm": 0.7051666975021362, + "learning_rate": 9.336630451245954e-06, + "loss": 2.3994, + "step": 17244 + }, + { + "epoch": 1.3917359373739004, + "grad_norm": 0.721764326095581, + "learning_rate": 9.32997079456227e-06, + "loss": 2.4127, + "step": 17245 + }, + { + "epoch": 1.3918166411104833, + "grad_norm": 0.7074810862541199, + "learning_rate": 9.323313397612698e-06, + "loss": 2.4449, + "step": 17246 + }, + { + "epoch": 1.3918973448470664, + "grad_norm": 0.7203366160392761, + "learning_rate": 9.316658260563193e-06, + "loss": 2.3564, + "step": 17247 + }, + { + "epoch": 1.3919780485836495, + "grad_norm": 0.6879156827926636, + "learning_rate": 9.310005383579623e-06, + "loss": 2.3568, + "step": 17248 + }, + { + "epoch": 1.3920587523202324, + "grad_norm": 0.6491550803184509, + "learning_rate": 9.303354766827776e-06, + "loss": 2.421, + "step": 17249 + }, + { + "epoch": 1.3921394560568154, + "grad_norm": 0.683704674243927, + "learning_rate": 9.29670641047341e-06, + "loss": 2.4633, + "step": 17250 + }, + { + "epoch": 1.3922201597933985, + "grad_norm": 0.6716236472129822, + "learning_rate": 9.290060314682203e-06, + "loss": 2.4423, + "step": 17251 + }, + { + "epoch": 1.3923008635299814, + "grad_norm": 0.7086344957351685, + "learning_rate": 9.283416479619844e-06, + "loss": 2.3877, + "step": 17252 + }, + { + "epoch": 1.3923815672665645, + "grad_norm": 0.6638349294662476, + "learning_rate": 9.276774905451869e-06, + "loss": 2.4499, + "step": 17253 + }, + { + "epoch": 1.3924622710031476, + "grad_norm": 0.7091326713562012, + "learning_rate": 9.27013559234381e-06, + "loss": 2.4659, + "step": 17254 + }, + { + "epoch": 1.3925429747397304, + "grad_norm": 0.6906822323799133, + "learning_rate": 9.263498540461157e-06, + "loss": 2.4195, + "step": 17255 + }, + { + "epoch": 1.3926236784763135, + "grad_norm": 0.7003819942474365, + "learning_rate": 9.256863749969302e-06, + "loss": 2.4156, + "step": 17256 + }, + { + "epoch": 1.3927043822128964, + "grad_norm": 0.7270472645759583, + "learning_rate": 9.250231221033601e-06, + "loss": 2.4197, + "step": 17257 + }, + { + "epoch": 1.3927850859494795, + "grad_norm": 0.7070592641830444, + "learning_rate": 9.243600953819376e-06, + "loss": 2.4296, + "step": 17258 + }, + { + "epoch": 1.3928657896860623, + "grad_norm": 0.6560600996017456, + "learning_rate": 9.23697294849184e-06, + "loss": 2.4441, + "step": 17259 + }, + { + "epoch": 1.3929464934226454, + "grad_norm": 0.6654617190361023, + "learning_rate": 9.230347205216194e-06, + "loss": 2.3406, + "step": 17260 + }, + { + "epoch": 1.3930271971592285, + "grad_norm": 0.7147239446640015, + "learning_rate": 9.223723724157563e-06, + "loss": 2.4203, + "step": 17261 + }, + { + "epoch": 1.3931079008958114, + "grad_norm": 0.7148180603981018, + "learning_rate": 9.217102505481046e-06, + "loss": 2.4525, + "step": 17262 + }, + { + "epoch": 1.3931886046323945, + "grad_norm": 0.6779814958572388, + "learning_rate": 9.210483549351623e-06, + "loss": 2.4051, + "step": 17263 + }, + { + "epoch": 1.3932693083689776, + "grad_norm": 0.6880484223365784, + "learning_rate": 9.203866855934307e-06, + "loss": 2.4492, + "step": 17264 + }, + { + "epoch": 1.3933500121055604, + "grad_norm": 0.7845660448074341, + "learning_rate": 9.197252425393954e-06, + "loss": 2.4448, + "step": 17265 + }, + { + "epoch": 1.3934307158421435, + "grad_norm": 0.7001363635063171, + "learning_rate": 9.190640257895433e-06, + "loss": 2.4226, + "step": 17266 + }, + { + "epoch": 1.3935114195787266, + "grad_norm": 0.7282695770263672, + "learning_rate": 9.184030353603524e-06, + "loss": 2.4354, + "step": 17267 + }, + { + "epoch": 1.3935921233153095, + "grad_norm": 0.7547619342803955, + "learning_rate": 9.177422712683003e-06, + "loss": 2.456, + "step": 17268 + }, + { + "epoch": 1.3936728270518925, + "grad_norm": 0.7191921472549438, + "learning_rate": 9.170817335298499e-06, + "loss": 2.3923, + "step": 17269 + }, + { + "epoch": 1.3937535307884756, + "grad_norm": 0.6578717827796936, + "learning_rate": 9.164214221614654e-06, + "loss": 2.4354, + "step": 17270 + }, + { + "epoch": 1.3938342345250585, + "grad_norm": 0.7156858444213867, + "learning_rate": 9.157613371796036e-06, + "loss": 2.3983, + "step": 17271 + }, + { + "epoch": 1.3939149382616416, + "grad_norm": 0.6779402494430542, + "learning_rate": 9.151014786007162e-06, + "loss": 2.435, + "step": 17272 + }, + { + "epoch": 1.3939956419982245, + "grad_norm": 0.7038381099700928, + "learning_rate": 9.144418464412486e-06, + "loss": 2.3848, + "step": 17273 + }, + { + "epoch": 1.3940763457348075, + "grad_norm": 0.7381990551948547, + "learning_rate": 9.13782440717641e-06, + "loss": 2.3693, + "step": 17274 + }, + { + "epoch": 1.3941570494713904, + "grad_norm": 0.6982381939888, + "learning_rate": 9.131232614463247e-06, + "loss": 2.4095, + "step": 17275 + }, + { + "epoch": 1.3942377532079735, + "grad_norm": 0.6968829035758972, + "learning_rate": 9.124643086437312e-06, + "loss": 2.3802, + "step": 17276 + }, + { + "epoch": 1.3943184569445566, + "grad_norm": 0.7584258317947388, + "learning_rate": 9.118055823262828e-06, + "loss": 2.4153, + "step": 17277 + }, + { + "epoch": 1.3943991606811394, + "grad_norm": 0.7331502437591553, + "learning_rate": 9.11147082510395e-06, + "loss": 2.4404, + "step": 17278 + }, + { + "epoch": 1.3944798644177225, + "grad_norm": 0.7939555048942566, + "learning_rate": 9.104888092124796e-06, + "loss": 2.4568, + "step": 17279 + }, + { + "epoch": 1.3945605681543056, + "grad_norm": 0.6752094626426697, + "learning_rate": 9.098307624489443e-06, + "loss": 2.3298, + "step": 17280 + }, + { + "epoch": 1.3946412718908885, + "grad_norm": 0.682428240776062, + "learning_rate": 9.091729422361872e-06, + "loss": 2.4449, + "step": 17281 + }, + { + "epoch": 1.3947219756274716, + "grad_norm": 0.7422902584075928, + "learning_rate": 9.085153485906051e-06, + "loss": 2.4, + "step": 17282 + }, + { + "epoch": 1.3948026793640547, + "grad_norm": 0.7528017163276672, + "learning_rate": 9.07857981528586e-06, + "loss": 2.4045, + "step": 17283 + }, + { + "epoch": 1.3948833831006375, + "grad_norm": 0.622075080871582, + "learning_rate": 9.072008410665133e-06, + "loss": 2.3865, + "step": 17284 + }, + { + "epoch": 1.3949640868372206, + "grad_norm": 0.7127060890197754, + "learning_rate": 9.065439272207642e-06, + "loss": 2.4108, + "step": 17285 + }, + { + "epoch": 1.3950447905738037, + "grad_norm": 0.7381206750869751, + "learning_rate": 9.0588724000771e-06, + "loss": 2.4459, + "step": 17286 + }, + { + "epoch": 1.3951254943103866, + "grad_norm": 0.7453467845916748, + "learning_rate": 9.05230779443721e-06, + "loss": 2.4144, + "step": 17287 + }, + { + "epoch": 1.3952061980469694, + "grad_norm": 0.6772522330284119, + "learning_rate": 9.045745455451527e-06, + "loss": 2.4373, + "step": 17288 + }, + { + "epoch": 1.3952869017835525, + "grad_norm": 0.7005482316017151, + "learning_rate": 9.039185383283622e-06, + "loss": 2.3991, + "step": 17289 + }, + { + "epoch": 1.3953676055201356, + "grad_norm": 0.7172494530677795, + "learning_rate": 9.032627578096986e-06, + "loss": 2.4535, + "step": 17290 + }, + { + "epoch": 1.3954483092567185, + "grad_norm": 0.6911814212799072, + "learning_rate": 9.026072040055067e-06, + "loss": 2.3586, + "step": 17291 + }, + { + "epoch": 1.3955290129933016, + "grad_norm": 0.6708523035049438, + "learning_rate": 9.019518769321245e-06, + "loss": 2.4189, + "step": 17292 + }, + { + "epoch": 1.3956097167298847, + "grad_norm": 0.6716340780258179, + "learning_rate": 9.012967766058855e-06, + "loss": 2.3982, + "step": 17293 + }, + { + "epoch": 1.3956904204664675, + "grad_norm": 0.7001132965087891, + "learning_rate": 9.006419030431135e-06, + "loss": 2.3722, + "step": 17294 + }, + { + "epoch": 1.3957711242030506, + "grad_norm": 0.6912658214569092, + "learning_rate": 8.999872562601308e-06, + "loss": 2.371, + "step": 17295 + }, + { + "epoch": 1.3958518279396337, + "grad_norm": 0.7627947330474854, + "learning_rate": 8.993328362732545e-06, + "loss": 2.4123, + "step": 17296 + }, + { + "epoch": 1.3959325316762166, + "grad_norm": 0.6897323131561279, + "learning_rate": 8.986786430987926e-06, + "loss": 2.4466, + "step": 17297 + }, + { + "epoch": 1.3960132354127996, + "grad_norm": 0.7040663361549377, + "learning_rate": 8.980246767530498e-06, + "loss": 2.4008, + "step": 17298 + }, + { + "epoch": 1.3960939391493827, + "grad_norm": 0.7423021197319031, + "learning_rate": 8.973709372523254e-06, + "loss": 2.421, + "step": 17299 + }, + { + "epoch": 1.3961746428859656, + "grad_norm": 0.7053872346878052, + "learning_rate": 8.967174246129128e-06, + "loss": 2.4217, + "step": 17300 + }, + { + "epoch": 1.3962553466225487, + "grad_norm": 0.7772163152694702, + "learning_rate": 8.960641388510959e-06, + "loss": 2.3686, + "step": 17301 + }, + { + "epoch": 1.3963360503591316, + "grad_norm": 0.7254317402839661, + "learning_rate": 8.954110799831582e-06, + "loss": 2.3974, + "step": 17302 + }, + { + "epoch": 1.3964167540957146, + "grad_norm": 0.6462311744689941, + "learning_rate": 8.94758248025378e-06, + "loss": 2.3506, + "step": 17303 + }, + { + "epoch": 1.3964974578322975, + "grad_norm": 0.693526029586792, + "learning_rate": 8.94105642994023e-06, + "loss": 2.3774, + "step": 17304 + }, + { + "epoch": 1.3965781615688806, + "grad_norm": 0.6220893263816833, + "learning_rate": 8.934532649053585e-06, + "loss": 2.3588, + "step": 17305 + }, + { + "epoch": 1.3966588653054637, + "grad_norm": 0.6866275668144226, + "learning_rate": 8.928011137756443e-06, + "loss": 2.4001, + "step": 17306 + }, + { + "epoch": 1.3967395690420465, + "grad_norm": 0.7290368676185608, + "learning_rate": 8.92149189621132e-06, + "loss": 2.3936, + "step": 17307 + }, + { + "epoch": 1.3968202727786296, + "grad_norm": 0.6699230670928955, + "learning_rate": 8.914974924580688e-06, + "loss": 2.3656, + "step": 17308 + }, + { + "epoch": 1.3969009765152127, + "grad_norm": 0.6863143444061279, + "learning_rate": 8.908460223027016e-06, + "loss": 2.4157, + "step": 17309 + }, + { + "epoch": 1.3969816802517956, + "grad_norm": 0.7856658697128296, + "learning_rate": 8.901947791712594e-06, + "loss": 2.3927, + "step": 17310 + }, + { + "epoch": 1.3970623839883787, + "grad_norm": 0.692934513092041, + "learning_rate": 8.895437630799775e-06, + "loss": 2.4089, + "step": 17311 + }, + { + "epoch": 1.3971430877249618, + "grad_norm": 0.6908941268920898, + "learning_rate": 8.888929740450802e-06, + "loss": 2.3907, + "step": 17312 + }, + { + "epoch": 1.3972237914615446, + "grad_norm": 0.662405788898468, + "learning_rate": 8.88242412082786e-06, + "loss": 2.4287, + "step": 17313 + }, + { + "epoch": 1.3973044951981277, + "grad_norm": 0.6889618635177612, + "learning_rate": 8.875920772093094e-06, + "loss": 2.3815, + "step": 17314 + }, + { + "epoch": 1.3973851989347108, + "grad_norm": 0.6734819412231445, + "learning_rate": 8.869419694408586e-06, + "loss": 2.4046, + "step": 17315 + }, + { + "epoch": 1.3974659026712937, + "grad_norm": 0.6958059668540955, + "learning_rate": 8.862920887936378e-06, + "loss": 2.4449, + "step": 17316 + }, + { + "epoch": 1.3975466064078768, + "grad_norm": 0.6793306469917297, + "learning_rate": 8.856424352838389e-06, + "loss": 2.4023, + "step": 17317 + }, + { + "epoch": 1.3976273101444596, + "grad_norm": 0.6622069478034973, + "learning_rate": 8.84993008927656e-06, + "loss": 2.4098, + "step": 17318 + }, + { + "epoch": 1.3977080138810427, + "grad_norm": 0.6999792456626892, + "learning_rate": 8.843438097412771e-06, + "loss": 2.4205, + "step": 17319 + }, + { + "epoch": 1.3977887176176256, + "grad_norm": 0.693848192691803, + "learning_rate": 8.83694837740876e-06, + "loss": 2.4284, + "step": 17320 + }, + { + "epoch": 1.3978694213542087, + "grad_norm": 0.6813297271728516, + "learning_rate": 8.830460929426299e-06, + "loss": 2.3887, + "step": 17321 + }, + { + "epoch": 1.3979501250907918, + "grad_norm": 0.6795780658721924, + "learning_rate": 8.823975753627079e-06, + "loss": 2.4428, + "step": 17322 + }, + { + "epoch": 1.3980308288273746, + "grad_norm": 0.7395818829536438, + "learning_rate": 8.817492850172703e-06, + "loss": 2.4842, + "step": 17323 + }, + { + "epoch": 1.3981115325639577, + "grad_norm": 0.6772391200065613, + "learning_rate": 8.811012219224778e-06, + "loss": 2.4555, + "step": 17324 + }, + { + "epoch": 1.3981922363005408, + "grad_norm": 0.66059809923172, + "learning_rate": 8.804533860944808e-06, + "loss": 2.3565, + "step": 17325 + }, + { + "epoch": 1.3982729400371237, + "grad_norm": 0.7336263656616211, + "learning_rate": 8.798057775494229e-06, + "loss": 2.4575, + "step": 17326 + }, + { + "epoch": 1.3983536437737067, + "grad_norm": 0.7758119702339172, + "learning_rate": 8.791583963034444e-06, + "loss": 2.4239, + "step": 17327 + }, + { + "epoch": 1.3984343475102898, + "grad_norm": 0.7417536377906799, + "learning_rate": 8.785112423726827e-06, + "loss": 2.4547, + "step": 17328 + }, + { + "epoch": 1.3985150512468727, + "grad_norm": 0.6901140213012695, + "learning_rate": 8.778643157732636e-06, + "loss": 2.4253, + "step": 17329 + }, + { + "epoch": 1.3985957549834558, + "grad_norm": 0.6766345500946045, + "learning_rate": 8.772176165213109e-06, + "loss": 2.4312, + "step": 17330 + }, + { + "epoch": 1.3986764587200389, + "grad_norm": 0.7406117916107178, + "learning_rate": 8.765711446329427e-06, + "loss": 2.4223, + "step": 17331 + }, + { + "epoch": 1.3987571624566217, + "grad_norm": 0.7236598134040833, + "learning_rate": 8.759249001242697e-06, + "loss": 2.4078, + "step": 17332 + }, + { + "epoch": 1.3988378661932048, + "grad_norm": 0.7009963393211365, + "learning_rate": 8.752788830114e-06, + "loss": 2.3573, + "step": 17333 + }, + { + "epoch": 1.3989185699297877, + "grad_norm": 0.7128826975822449, + "learning_rate": 8.746330933104319e-06, + "loss": 2.4039, + "step": 17334 + }, + { + "epoch": 1.3989992736663708, + "grad_norm": 0.6832678914070129, + "learning_rate": 8.739875310374635e-06, + "loss": 2.3917, + "step": 17335 + }, + { + "epoch": 1.3990799774029536, + "grad_norm": 0.6790578961372375, + "learning_rate": 8.733421962085786e-06, + "loss": 2.3908, + "step": 17336 + }, + { + "epoch": 1.3991606811395367, + "grad_norm": 0.7215133905410767, + "learning_rate": 8.726970888398644e-06, + "loss": 2.3494, + "step": 17337 + }, + { + "epoch": 1.3992413848761198, + "grad_norm": 0.677761435508728, + "learning_rate": 8.720522089473992e-06, + "loss": 2.3747, + "step": 17338 + }, + { + "epoch": 1.3993220886127027, + "grad_norm": 0.6423436403274536, + "learning_rate": 8.714075565472513e-06, + "loss": 2.3386, + "step": 17339 + }, + { + "epoch": 1.3994027923492858, + "grad_norm": 0.798370897769928, + "learning_rate": 8.707631316554909e-06, + "loss": 2.3901, + "step": 17340 + }, + { + "epoch": 1.3994834960858689, + "grad_norm": 0.6572564840316772, + "learning_rate": 8.701189342881767e-06, + "loss": 2.4311, + "step": 17341 + }, + { + "epoch": 1.3995641998224517, + "grad_norm": 0.721610426902771, + "learning_rate": 8.694749644613642e-06, + "loss": 2.4158, + "step": 17342 + }, + { + "epoch": 1.3996449035590348, + "grad_norm": 0.8007451891899109, + "learning_rate": 8.688312221911022e-06, + "loss": 2.3931, + "step": 17343 + }, + { + "epoch": 1.399725607295618, + "grad_norm": 0.7181806564331055, + "learning_rate": 8.681877074934363e-06, + "loss": 2.4062, + "step": 17344 + }, + { + "epoch": 1.3998063110322008, + "grad_norm": 0.6630976796150208, + "learning_rate": 8.675444203844053e-06, + "loss": 2.3936, + "step": 17345 + }, + { + "epoch": 1.3998870147687839, + "grad_norm": 0.7093006372451782, + "learning_rate": 8.66901360880038e-06, + "loss": 2.4065, + "step": 17346 + }, + { + "epoch": 1.3999677185053667, + "grad_norm": 0.6685216426849365, + "learning_rate": 8.662585289963621e-06, + "loss": 2.4589, + "step": 17347 + }, + { + "epoch": 1.4000484222419498, + "grad_norm": 0.7227702140808105, + "learning_rate": 8.656159247494023e-06, + "loss": 2.3946, + "step": 17348 + }, + { + "epoch": 1.4001291259785327, + "grad_norm": 0.7459855079650879, + "learning_rate": 8.64973548155169e-06, + "loss": 2.4766, + "step": 17349 + }, + { + "epoch": 1.4002098297151158, + "grad_norm": 0.713190495967865, + "learning_rate": 8.643313992296743e-06, + "loss": 2.3974, + "step": 17350 + }, + { + "epoch": 1.4002905334516988, + "grad_norm": 0.6921802759170532, + "learning_rate": 8.636894779889237e-06, + "loss": 2.4483, + "step": 17351 + }, + { + "epoch": 1.4003712371882817, + "grad_norm": 0.7517138719558716, + "learning_rate": 8.630477844489116e-06, + "loss": 2.402, + "step": 17352 + }, + { + "epoch": 1.4004519409248648, + "grad_norm": 0.728131115436554, + "learning_rate": 8.624063186256326e-06, + "loss": 2.4363, + "step": 17353 + }, + { + "epoch": 1.400532644661448, + "grad_norm": 0.6918095350265503, + "learning_rate": 8.617650805350763e-06, + "loss": 2.4424, + "step": 17354 + }, + { + "epoch": 1.4006133483980308, + "grad_norm": 0.6802886128425598, + "learning_rate": 8.6112407019322e-06, + "loss": 2.4133, + "step": 17355 + }, + { + "epoch": 1.4006940521346138, + "grad_norm": 0.6760320663452148, + "learning_rate": 8.604832876160418e-06, + "loss": 2.4187, + "step": 17356 + }, + { + "epoch": 1.400774755871197, + "grad_norm": 0.7422602772712708, + "learning_rate": 8.598427328195124e-06, + "loss": 2.4051, + "step": 17357 + }, + { + "epoch": 1.4008554596077798, + "grad_norm": 0.7278845906257629, + "learning_rate": 8.592024058195925e-06, + "loss": 2.4256, + "step": 17358 + }, + { + "epoch": 1.4009361633443629, + "grad_norm": 0.7399848699569702, + "learning_rate": 8.585623066322435e-06, + "loss": 2.4045, + "step": 17359 + }, + { + "epoch": 1.401016867080946, + "grad_norm": 0.703372061252594, + "learning_rate": 8.579224352734184e-06, + "loss": 2.404, + "step": 17360 + }, + { + "epoch": 1.4010975708175288, + "grad_norm": 0.6849603056907654, + "learning_rate": 8.572827917590642e-06, + "loss": 2.3808, + "step": 17361 + }, + { + "epoch": 1.401178274554112, + "grad_norm": 0.6907341480255127, + "learning_rate": 8.566433761051207e-06, + "loss": 2.3777, + "step": 17362 + }, + { + "epoch": 1.4012589782906948, + "grad_norm": 0.7436221837997437, + "learning_rate": 8.560041883275261e-06, + "loss": 2.4027, + "step": 17363 + }, + { + "epoch": 1.4013396820272779, + "grad_norm": 0.6975259780883789, + "learning_rate": 8.553652284422088e-06, + "loss": 2.4235, + "step": 17364 + }, + { + "epoch": 1.4014203857638607, + "grad_norm": 0.7692399024963379, + "learning_rate": 8.547264964650948e-06, + "loss": 2.4615, + "step": 17365 + }, + { + "epoch": 1.4015010895004438, + "grad_norm": 0.7096135020256042, + "learning_rate": 8.540879924121025e-06, + "loss": 2.3972, + "step": 17366 + }, + { + "epoch": 1.401581793237027, + "grad_norm": 0.6851587891578674, + "learning_rate": 8.534497162991473e-06, + "loss": 2.3697, + "step": 17367 + }, + { + "epoch": 1.4016624969736098, + "grad_norm": 0.6977655291557312, + "learning_rate": 8.528116681421317e-06, + "loss": 2.4413, + "step": 17368 + }, + { + "epoch": 1.4017432007101929, + "grad_norm": 0.715307354927063, + "learning_rate": 8.521738479569618e-06, + "loss": 2.4006, + "step": 17369 + }, + { + "epoch": 1.401823904446776, + "grad_norm": 0.7282734513282776, + "learning_rate": 8.51536255759533e-06, + "loss": 2.4418, + "step": 17370 + }, + { + "epoch": 1.4019046081833588, + "grad_norm": 0.6996017098426819, + "learning_rate": 8.508988915657334e-06, + "loss": 2.435, + "step": 17371 + }, + { + "epoch": 1.401985311919942, + "grad_norm": 0.7084866762161255, + "learning_rate": 8.502617553914494e-06, + "loss": 2.4314, + "step": 17372 + }, + { + "epoch": 1.402066015656525, + "grad_norm": 0.7217462658882141, + "learning_rate": 8.496248472525603e-06, + "loss": 2.4811, + "step": 17373 + }, + { + "epoch": 1.4021467193931079, + "grad_norm": 0.7414960265159607, + "learning_rate": 8.489881671649391e-06, + "loss": 2.4016, + "step": 17374 + }, + { + "epoch": 1.402227423129691, + "grad_norm": 0.7439210414886475, + "learning_rate": 8.483517151444532e-06, + "loss": 2.4711, + "step": 17375 + }, + { + "epoch": 1.402308126866274, + "grad_norm": 0.7277424335479736, + "learning_rate": 8.477154912069663e-06, + "loss": 2.4095, + "step": 17376 + }, + { + "epoch": 1.402388830602857, + "grad_norm": 0.7506297826766968, + "learning_rate": 8.470794953683347e-06, + "loss": 2.4187, + "step": 17377 + }, + { + "epoch": 1.40246953433944, + "grad_norm": 0.7137917280197144, + "learning_rate": 8.464437276444059e-06, + "loss": 2.4069, + "step": 17378 + }, + { + "epoch": 1.4025502380760229, + "grad_norm": 0.6610304117202759, + "learning_rate": 8.458081880510282e-06, + "loss": 2.4709, + "step": 17379 + }, + { + "epoch": 1.402630941812606, + "grad_norm": 0.7147911190986633, + "learning_rate": 8.451728766040411e-06, + "loss": 2.4147, + "step": 17380 + }, + { + "epoch": 1.4027116455491888, + "grad_norm": 0.7196649312973022, + "learning_rate": 8.445377933192745e-06, + "loss": 2.4611, + "step": 17381 + }, + { + "epoch": 1.402792349285772, + "grad_norm": 0.6550390124320984, + "learning_rate": 8.439029382125596e-06, + "loss": 2.4229, + "step": 17382 + }, + { + "epoch": 1.402873053022355, + "grad_norm": 0.6517959833145142, + "learning_rate": 8.432683112997175e-06, + "loss": 2.421, + "step": 17383 + }, + { + "epoch": 1.4029537567589379, + "grad_norm": 0.6660284399986267, + "learning_rate": 8.426339125965643e-06, + "loss": 2.3918, + "step": 17384 + }, + { + "epoch": 1.403034460495521, + "grad_norm": 0.696163535118103, + "learning_rate": 8.41999742118913e-06, + "loss": 2.4334, + "step": 17385 + }, + { + "epoch": 1.403115164232104, + "grad_norm": 0.7146298885345459, + "learning_rate": 8.413657998825674e-06, + "loss": 2.3984, + "step": 17386 + }, + { + "epoch": 1.403195867968687, + "grad_norm": 0.7084376215934753, + "learning_rate": 8.407320859033262e-06, + "loss": 2.4098, + "step": 17387 + }, + { + "epoch": 1.40327657170527, + "grad_norm": 0.7499445080757141, + "learning_rate": 8.400986001969846e-06, + "loss": 2.4315, + "step": 17388 + }, + { + "epoch": 1.403357275441853, + "grad_norm": 0.6822247505187988, + "learning_rate": 8.394653427793308e-06, + "loss": 2.3816, + "step": 17389 + }, + { + "epoch": 1.403437979178436, + "grad_norm": 0.6859664916992188, + "learning_rate": 8.388323136661458e-06, + "loss": 2.3772, + "step": 17390 + }, + { + "epoch": 1.403518682915019, + "grad_norm": 0.6771109104156494, + "learning_rate": 8.381995128732057e-06, + "loss": 2.4295, + "step": 17391 + }, + { + "epoch": 1.4035993866516019, + "grad_norm": 0.7589800357818604, + "learning_rate": 8.375669404162845e-06, + "loss": 2.3806, + "step": 17392 + }, + { + "epoch": 1.403680090388185, + "grad_norm": 0.665472149848938, + "learning_rate": 8.369345963111453e-06, + "loss": 2.383, + "step": 17393 + }, + { + "epoch": 1.4037607941247678, + "grad_norm": 0.6658698916435242, + "learning_rate": 8.363024805735475e-06, + "loss": 2.3682, + "step": 17394 + }, + { + "epoch": 1.403841497861351, + "grad_norm": 0.7445670366287231, + "learning_rate": 8.356705932192477e-06, + "loss": 2.5224, + "step": 17395 + }, + { + "epoch": 1.403922201597934, + "grad_norm": 0.6812258362770081, + "learning_rate": 8.35038934263993e-06, + "loss": 2.426, + "step": 17396 + }, + { + "epoch": 1.4040029053345169, + "grad_norm": 0.6613782644271851, + "learning_rate": 8.344075037235243e-06, + "loss": 2.3756, + "step": 17397 + }, + { + "epoch": 1.4040836090711, + "grad_norm": 0.6314469575881958, + "learning_rate": 8.337763016135792e-06, + "loss": 2.3703, + "step": 17398 + }, + { + "epoch": 1.404164312807683, + "grad_norm": 0.6611869931221008, + "learning_rate": 8.331453279498914e-06, + "loss": 2.3951, + "step": 17399 + }, + { + "epoch": 1.404245016544266, + "grad_norm": 0.6668544411659241, + "learning_rate": 8.325145827481828e-06, + "loss": 2.4732, + "step": 17400 + }, + { + "epoch": 1.404325720280849, + "grad_norm": 0.7428251504898071, + "learning_rate": 8.318840660241755e-06, + "loss": 2.391, + "step": 17401 + }, + { + "epoch": 1.404406424017432, + "grad_norm": 0.7163440585136414, + "learning_rate": 8.312537777935836e-06, + "loss": 2.4379, + "step": 17402 + }, + { + "epoch": 1.404487127754015, + "grad_norm": 0.7152317762374878, + "learning_rate": 8.306237180721121e-06, + "loss": 2.426, + "step": 17403 + }, + { + "epoch": 1.404567831490598, + "grad_norm": 0.7675083875656128, + "learning_rate": 8.299938868754686e-06, + "loss": 2.4014, + "step": 17404 + }, + { + "epoch": 1.4046485352271811, + "grad_norm": 0.7118947505950928, + "learning_rate": 8.293642842193494e-06, + "loss": 2.3998, + "step": 17405 + }, + { + "epoch": 1.404729238963764, + "grad_norm": 0.713556706905365, + "learning_rate": 8.28734910119442e-06, + "loss": 2.4134, + "step": 17406 + }, + { + "epoch": 1.404809942700347, + "grad_norm": 0.7631849646568298, + "learning_rate": 8.281057645914359e-06, + "loss": 2.4866, + "step": 17407 + }, + { + "epoch": 1.40489064643693, + "grad_norm": 0.7348508834838867, + "learning_rate": 8.274768476510087e-06, + "loss": 2.4067, + "step": 17408 + }, + { + "epoch": 1.404971350173513, + "grad_norm": 0.7371857762336731, + "learning_rate": 8.268481593138377e-06, + "loss": 2.429, + "step": 17409 + }, + { + "epoch": 1.405052053910096, + "grad_norm": 0.674980640411377, + "learning_rate": 8.262196995955874e-06, + "loss": 2.3897, + "step": 17410 + }, + { + "epoch": 1.405132757646679, + "grad_norm": 0.6975973844528198, + "learning_rate": 8.255914685119237e-06, + "loss": 2.445, + "step": 17411 + }, + { + "epoch": 1.405213461383262, + "grad_norm": 0.6854067444801331, + "learning_rate": 8.249634660785033e-06, + "loss": 2.3528, + "step": 17412 + }, + { + "epoch": 1.405294165119845, + "grad_norm": 0.6678418517112732, + "learning_rate": 8.243356923109768e-06, + "loss": 2.4078, + "step": 17413 + }, + { + "epoch": 1.405374868856428, + "grad_norm": 0.6600239276885986, + "learning_rate": 8.237081472249885e-06, + "loss": 2.3719, + "step": 17414 + }, + { + "epoch": 1.4054555725930111, + "grad_norm": 0.7209253907203674, + "learning_rate": 8.230808308361815e-06, + "loss": 2.4203, + "step": 17415 + }, + { + "epoch": 1.405536276329594, + "grad_norm": 0.6849339604377747, + "learning_rate": 8.224537431601886e-06, + "loss": 2.3898, + "step": 17416 + }, + { + "epoch": 1.405616980066177, + "grad_norm": 0.718558132648468, + "learning_rate": 8.218268842126387e-06, + "loss": 2.4063, + "step": 17417 + }, + { + "epoch": 1.4056976838027602, + "grad_norm": 0.7118551731109619, + "learning_rate": 8.212002540091567e-06, + "loss": 2.3942, + "step": 17418 + }, + { + "epoch": 1.405778387539343, + "grad_norm": 0.7138789892196655, + "learning_rate": 8.205738525653562e-06, + "loss": 2.4614, + "step": 17419 + }, + { + "epoch": 1.4058590912759261, + "grad_norm": 0.7254295349121094, + "learning_rate": 8.199476798968508e-06, + "loss": 2.4126, + "step": 17420 + }, + { + "epoch": 1.4059397950125092, + "grad_norm": 0.691965639591217, + "learning_rate": 8.193217360192473e-06, + "loss": 2.4233, + "step": 17421 + }, + { + "epoch": 1.406020498749092, + "grad_norm": 0.7132619619369507, + "learning_rate": 8.186960209481431e-06, + "loss": 2.3764, + "step": 17422 + }, + { + "epoch": 1.4061012024856752, + "grad_norm": 0.6838160753250122, + "learning_rate": 8.180705346991346e-06, + "loss": 2.3927, + "step": 17423 + }, + { + "epoch": 1.406181906222258, + "grad_norm": 0.6755721569061279, + "learning_rate": 8.174452772878094e-06, + "loss": 2.435, + "step": 17424 + }, + { + "epoch": 1.4062626099588411, + "grad_norm": 0.774718701839447, + "learning_rate": 8.168202487297527e-06, + "loss": 2.4811, + "step": 17425 + }, + { + "epoch": 1.406343313695424, + "grad_norm": 0.6601200699806213, + "learning_rate": 8.161954490405388e-06, + "loss": 2.3494, + "step": 17426 + }, + { + "epoch": 1.406424017432007, + "grad_norm": 0.6854710578918457, + "learning_rate": 8.155708782357419e-06, + "loss": 2.4214, + "step": 17427 + }, + { + "epoch": 1.4065047211685902, + "grad_norm": 0.7471936345100403, + "learning_rate": 8.149465363309294e-06, + "loss": 2.3702, + "step": 17428 + }, + { + "epoch": 1.406585424905173, + "grad_norm": 0.7129673957824707, + "learning_rate": 8.143224233416569e-06, + "loss": 2.4078, + "step": 17429 + }, + { + "epoch": 1.406666128641756, + "grad_norm": 0.7168975472450256, + "learning_rate": 8.136985392834807e-06, + "loss": 2.4265, + "step": 17430 + }, + { + "epoch": 1.4067468323783392, + "grad_norm": 0.709699809551239, + "learning_rate": 8.130748841719526e-06, + "loss": 2.4069, + "step": 17431 + }, + { + "epoch": 1.406827536114922, + "grad_norm": 0.7571663856506348, + "learning_rate": 8.124514580226105e-06, + "loss": 2.3949, + "step": 17432 + }, + { + "epoch": 1.4069082398515051, + "grad_norm": 0.6844212412834167, + "learning_rate": 8.118282608509952e-06, + "loss": 2.4156, + "step": 17433 + }, + { + "epoch": 1.4069889435880882, + "grad_norm": 0.6632293462753296, + "learning_rate": 8.112052926726376e-06, + "loss": 2.3973, + "step": 17434 + }, + { + "epoch": 1.407069647324671, + "grad_norm": 0.6375966668128967, + "learning_rate": 8.105825535030643e-06, + "loss": 2.4168, + "step": 17435 + }, + { + "epoch": 1.4071503510612542, + "grad_norm": 0.6997824907302856, + "learning_rate": 8.099600433577947e-06, + "loss": 2.3279, + "step": 17436 + }, + { + "epoch": 1.4072310547978373, + "grad_norm": 0.7491862177848816, + "learning_rate": 8.093377622523458e-06, + "loss": 2.403, + "step": 17437 + }, + { + "epoch": 1.4073117585344201, + "grad_norm": 0.6938888430595398, + "learning_rate": 8.087157102022235e-06, + "loss": 2.3965, + "step": 17438 + }, + { + "epoch": 1.4073924622710032, + "grad_norm": 0.708043098449707, + "learning_rate": 8.080938872229304e-06, + "loss": 2.4429, + "step": 17439 + }, + { + "epoch": 1.407473166007586, + "grad_norm": 0.6587165594100952, + "learning_rate": 8.074722933299673e-06, + "loss": 2.3951, + "step": 17440 + }, + { + "epoch": 1.4075538697441692, + "grad_norm": 0.6987459659576416, + "learning_rate": 8.068509285388248e-06, + "loss": 2.41, + "step": 17441 + }, + { + "epoch": 1.407634573480752, + "grad_norm": 0.6864002346992493, + "learning_rate": 8.062297928649865e-06, + "loss": 2.3867, + "step": 17442 + }, + { + "epoch": 1.4077152772173351, + "grad_norm": 0.6478279829025269, + "learning_rate": 8.056088863239342e-06, + "loss": 2.391, + "step": 17443 + }, + { + "epoch": 1.4077959809539182, + "grad_norm": 0.658235490322113, + "learning_rate": 8.049882089311433e-06, + "loss": 2.3646, + "step": 17444 + }, + { + "epoch": 1.407876684690501, + "grad_norm": 0.6664391160011292, + "learning_rate": 8.043677607020828e-06, + "loss": 2.4101, + "step": 17445 + }, + { + "epoch": 1.4079573884270842, + "grad_norm": 0.6662336587905884, + "learning_rate": 8.037475416522144e-06, + "loss": 2.4461, + "step": 17446 + }, + { + "epoch": 1.4080380921636673, + "grad_norm": 0.6629661321640015, + "learning_rate": 8.031275517969982e-06, + "loss": 2.4191, + "step": 17447 + }, + { + "epoch": 1.4081187959002501, + "grad_norm": 0.6586340665817261, + "learning_rate": 8.02507791151883e-06, + "loss": 2.4213, + "step": 17448 + }, + { + "epoch": 1.4081994996368332, + "grad_norm": 0.692555844783783, + "learning_rate": 8.018882597323163e-06, + "loss": 2.4148, + "step": 17449 + }, + { + "epoch": 1.4082802033734163, + "grad_norm": 0.6890958547592163, + "learning_rate": 8.012689575537402e-06, + "loss": 2.4121, + "step": 17450 + }, + { + "epoch": 1.4083609071099992, + "grad_norm": 0.7425588965415955, + "learning_rate": 8.006498846315846e-06, + "loss": 2.4426, + "step": 17451 + }, + { + "epoch": 1.4084416108465823, + "grad_norm": 0.6801562309265137, + "learning_rate": 8.000310409812828e-06, + "loss": 2.3786, + "step": 17452 + }, + { + "epoch": 1.4085223145831651, + "grad_norm": 0.7273206114768982, + "learning_rate": 7.994124266182568e-06, + "loss": 2.3635, + "step": 17453 + }, + { + "epoch": 1.4086030183197482, + "grad_norm": 0.6684201955795288, + "learning_rate": 7.987940415579209e-06, + "loss": 2.4565, + "step": 17454 + }, + { + "epoch": 1.408683722056331, + "grad_norm": 0.7803860902786255, + "learning_rate": 7.981758858156908e-06, + "loss": 2.3957, + "step": 17455 + }, + { + "epoch": 1.4087644257929142, + "grad_norm": 0.7033873200416565, + "learning_rate": 7.975579594069727e-06, + "loss": 2.3273, + "step": 17456 + }, + { + "epoch": 1.4088451295294973, + "grad_norm": 0.7338894009590149, + "learning_rate": 7.969402623471656e-06, + "loss": 2.4657, + "step": 17457 + }, + { + "epoch": 1.4089258332660801, + "grad_norm": 0.6912354230880737, + "learning_rate": 7.963227946516637e-06, + "loss": 2.4329, + "step": 17458 + }, + { + "epoch": 1.4090065370026632, + "grad_norm": 0.7227259278297424, + "learning_rate": 7.957055563358561e-06, + "loss": 2.4043, + "step": 17459 + }, + { + "epoch": 1.4090872407392463, + "grad_norm": 0.7320930361747742, + "learning_rate": 7.950885474151281e-06, + "loss": 2.3889, + "step": 17460 + }, + { + "epoch": 1.4091679444758292, + "grad_norm": 0.6754814982414246, + "learning_rate": 7.944717679048542e-06, + "loss": 2.4199, + "step": 17461 + }, + { + "epoch": 1.4092486482124122, + "grad_norm": 0.6574978828430176, + "learning_rate": 7.938552178204061e-06, + "loss": 2.3846, + "step": 17462 + }, + { + "epoch": 1.4093293519489953, + "grad_norm": 0.6976850628852844, + "learning_rate": 7.932388971771543e-06, + "loss": 2.4647, + "step": 17463 + }, + { + "epoch": 1.4094100556855782, + "grad_norm": 0.7376202344894409, + "learning_rate": 7.926228059904529e-06, + "loss": 2.4279, + "step": 17464 + }, + { + "epoch": 1.4094907594221613, + "grad_norm": 0.6907104253768921, + "learning_rate": 7.920069442756584e-06, + "loss": 2.4238, + "step": 17465 + }, + { + "epoch": 1.4095714631587444, + "grad_norm": 0.7079440951347351, + "learning_rate": 7.913913120481243e-06, + "loss": 2.4173, + "step": 17466 + }, + { + "epoch": 1.4096521668953272, + "grad_norm": 0.7188387513160706, + "learning_rate": 7.907759093231882e-06, + "loss": 2.4134, + "step": 17467 + }, + { + "epoch": 1.4097328706319103, + "grad_norm": 0.6877745389938354, + "learning_rate": 7.901607361161889e-06, + "loss": 2.4098, + "step": 17468 + }, + { + "epoch": 1.4098135743684932, + "grad_norm": 0.6914156079292297, + "learning_rate": 7.8954579244246e-06, + "loss": 2.4244, + "step": 17469 + }, + { + "epoch": 1.4098942781050763, + "grad_norm": 0.6616036295890808, + "learning_rate": 7.889310783173277e-06, + "loss": 2.4617, + "step": 17470 + }, + { + "epoch": 1.4099749818416591, + "grad_norm": 0.7090594172477722, + "learning_rate": 7.883165937561088e-06, + "loss": 2.4234, + "step": 17471 + }, + { + "epoch": 1.4100556855782422, + "grad_norm": 0.7596384286880493, + "learning_rate": 7.8770233877412e-06, + "loss": 2.39, + "step": 17472 + }, + { + "epoch": 1.4101363893148253, + "grad_norm": 0.7311475872993469, + "learning_rate": 7.870883133866725e-06, + "loss": 2.418, + "step": 17473 + }, + { + "epoch": 1.4102170930514082, + "grad_norm": 0.6628947854042053, + "learning_rate": 7.86474517609065e-06, + "loss": 2.4177, + "step": 17474 + }, + { + "epoch": 1.4102977967879913, + "grad_norm": 0.7169137597084045, + "learning_rate": 7.858609514565974e-06, + "loss": 2.4359, + "step": 17475 + }, + { + "epoch": 1.4103785005245744, + "grad_norm": 0.7364529371261597, + "learning_rate": 7.852476149445598e-06, + "loss": 2.45, + "step": 17476 + }, + { + "epoch": 1.4104592042611572, + "grad_norm": 0.7494707703590393, + "learning_rate": 7.8463450808824e-06, + "loss": 2.403, + "step": 17477 + }, + { + "epoch": 1.4105399079977403, + "grad_norm": 0.6723065376281738, + "learning_rate": 7.84021630902917e-06, + "loss": 2.4089, + "step": 17478 + }, + { + "epoch": 1.4106206117343234, + "grad_norm": 0.7032917141914368, + "learning_rate": 7.83408983403867e-06, + "loss": 2.4285, + "step": 17479 + }, + { + "epoch": 1.4107013154709063, + "grad_norm": 0.6634184718132019, + "learning_rate": 7.827965656063573e-06, + "loss": 2.3701, + "step": 17480 + }, + { + "epoch": 1.4107820192074894, + "grad_norm": 0.6645818948745728, + "learning_rate": 7.821843775256498e-06, + "loss": 2.3891, + "step": 17481 + }, + { + "epoch": 1.4108627229440724, + "grad_norm": 0.6750596165657043, + "learning_rate": 7.815724191770058e-06, + "loss": 2.4043, + "step": 17482 + }, + { + "epoch": 1.4109434266806553, + "grad_norm": 0.7519060969352722, + "learning_rate": 7.809606905756727e-06, + "loss": 2.4287, + "step": 17483 + }, + { + "epoch": 1.4110241304172384, + "grad_norm": 0.69886714220047, + "learning_rate": 7.803491917368977e-06, + "loss": 2.4565, + "step": 17484 + }, + { + "epoch": 1.4111048341538213, + "grad_norm": 0.6600854992866516, + "learning_rate": 7.797379226759216e-06, + "loss": 2.3743, + "step": 17485 + }, + { + "epoch": 1.4111855378904044, + "grad_norm": 0.65254807472229, + "learning_rate": 7.791268834079779e-06, + "loss": 2.435, + "step": 17486 + }, + { + "epoch": 1.4112662416269872, + "grad_norm": 0.6900071501731873, + "learning_rate": 7.785160739482955e-06, + "loss": 2.4073, + "step": 17487 + }, + { + "epoch": 1.4113469453635703, + "grad_norm": 0.6831900477409363, + "learning_rate": 7.779054943120989e-06, + "loss": 2.4325, + "step": 17488 + }, + { + "epoch": 1.4114276491001534, + "grad_norm": 0.7446292042732239, + "learning_rate": 7.772951445146049e-06, + "loss": 2.4693, + "step": 17489 + }, + { + "epoch": 1.4115083528367363, + "grad_norm": 0.6620200872421265, + "learning_rate": 7.766850245710233e-06, + "loss": 2.4345, + "step": 17490 + }, + { + "epoch": 1.4115890565733193, + "grad_norm": 0.7509312629699707, + "learning_rate": 7.76075134496561e-06, + "loss": 2.3596, + "step": 17491 + }, + { + "epoch": 1.4116697603099024, + "grad_norm": 0.7003920078277588, + "learning_rate": 7.754654743064194e-06, + "loss": 2.4016, + "step": 17492 + }, + { + "epoch": 1.4117504640464853, + "grad_norm": 0.6603164076805115, + "learning_rate": 7.748560440157892e-06, + "loss": 2.4031, + "step": 17493 + }, + { + "epoch": 1.4118311677830684, + "grad_norm": 0.7125976085662842, + "learning_rate": 7.742468436398608e-06, + "loss": 2.4199, + "step": 17494 + }, + { + "epoch": 1.4119118715196515, + "grad_norm": 0.7279991507530212, + "learning_rate": 7.736378731938187e-06, + "loss": 2.4263, + "step": 17495 + }, + { + "epoch": 1.4119925752562343, + "grad_norm": 0.7445220351219177, + "learning_rate": 7.730291326928385e-06, + "loss": 2.4256, + "step": 17496 + }, + { + "epoch": 1.4120732789928174, + "grad_norm": 0.7625001072883606, + "learning_rate": 7.724206221520913e-06, + "loss": 2.4307, + "step": 17497 + }, + { + "epoch": 1.4121539827294003, + "grad_norm": 0.7109429240226746, + "learning_rate": 7.71812341586745e-06, + "loss": 2.4157, + "step": 17498 + }, + { + "epoch": 1.4122346864659834, + "grad_norm": 0.7360411882400513, + "learning_rate": 7.712042910119566e-06, + "loss": 2.3855, + "step": 17499 + }, + { + "epoch": 1.4123153902025662, + "grad_norm": 0.6878146529197693, + "learning_rate": 7.705964704428815e-06, + "loss": 2.4059, + "step": 17500 + }, + { + "epoch": 1.4123960939391493, + "grad_norm": 0.7399710416793823, + "learning_rate": 7.699888798946674e-06, + "loss": 2.4234, + "step": 17501 + }, + { + "epoch": 1.4124767976757324, + "grad_norm": 0.6825466156005859, + "learning_rate": 7.693815193824605e-06, + "loss": 2.4428, + "step": 17502 + }, + { + "epoch": 1.4125575014123153, + "grad_norm": 0.6567744016647339, + "learning_rate": 7.687743889213938e-06, + "loss": 2.3609, + "step": 17503 + }, + { + "epoch": 1.4126382051488984, + "grad_norm": 0.7361522316932678, + "learning_rate": 7.681674885265989e-06, + "loss": 2.4006, + "step": 17504 + }, + { + "epoch": 1.4127189088854815, + "grad_norm": 0.7350279688835144, + "learning_rate": 7.675608182132033e-06, + "loss": 2.4395, + "step": 17505 + }, + { + "epoch": 1.4127996126220643, + "grad_norm": 0.6630931496620178, + "learning_rate": 7.669543779963262e-06, + "loss": 2.4451, + "step": 17506 + }, + { + "epoch": 1.4128803163586474, + "grad_norm": 0.6845518350601196, + "learning_rate": 7.6634816789108e-06, + "loss": 2.436, + "step": 17507 + }, + { + "epoch": 1.4129610200952305, + "grad_norm": 0.6736167073249817, + "learning_rate": 7.657421879125782e-06, + "loss": 2.3628, + "step": 17508 + }, + { + "epoch": 1.4130417238318134, + "grad_norm": 0.6932296752929688, + "learning_rate": 7.651364380759163e-06, + "loss": 2.4353, + "step": 17509 + }, + { + "epoch": 1.4131224275683965, + "grad_norm": 0.7034411430358887, + "learning_rate": 7.645309183961947e-06, + "loss": 2.3853, + "step": 17510 + }, + { + "epoch": 1.4132031313049795, + "grad_norm": 0.6912705898284912, + "learning_rate": 7.639256288885065e-06, + "loss": 2.2978, + "step": 17511 + }, + { + "epoch": 1.4132838350415624, + "grad_norm": 0.6716031432151794, + "learning_rate": 7.633205695679336e-06, + "loss": 2.3602, + "step": 17512 + }, + { + "epoch": 1.4133645387781455, + "grad_norm": 0.707477331161499, + "learning_rate": 7.6271574044955664e-06, + "loss": 2.434, + "step": 17513 + }, + { + "epoch": 1.4134452425147284, + "grad_norm": 0.7031993269920349, + "learning_rate": 7.621111415484517e-06, + "loss": 2.3718, + "step": 17514 + }, + { + "epoch": 1.4135259462513114, + "grad_norm": 0.6708939671516418, + "learning_rate": 7.615067728796832e-06, + "loss": 2.4218, + "step": 17515 + }, + { + "epoch": 1.4136066499878943, + "grad_norm": 0.7508932948112488, + "learning_rate": 7.609026344583148e-06, + "loss": 2.4273, + "step": 17516 + }, + { + "epoch": 1.4136873537244774, + "grad_norm": 0.6981049180030823, + "learning_rate": 7.602987262994055e-06, + "loss": 2.3941, + "step": 17517 + }, + { + "epoch": 1.4137680574610605, + "grad_norm": 0.7662717700004578, + "learning_rate": 7.5969504841800544e-06, + "loss": 2.3875, + "step": 17518 + }, + { + "epoch": 1.4138487611976434, + "grad_norm": 0.688423752784729, + "learning_rate": 7.590916008291582e-06, + "loss": 2.4091, + "step": 17519 + }, + { + "epoch": 1.4139294649342264, + "grad_norm": 0.6867286562919617, + "learning_rate": 7.584883835479039e-06, + "loss": 2.3983, + "step": 17520 + }, + { + "epoch": 1.4140101686708095, + "grad_norm": 0.7491776943206787, + "learning_rate": 7.578853965892785e-06, + "loss": 2.4151, + "step": 17521 + }, + { + "epoch": 1.4140908724073924, + "grad_norm": 0.6946732997894287, + "learning_rate": 7.572826399683064e-06, + "loss": 2.4196, + "step": 17522 + }, + { + "epoch": 1.4141715761439755, + "grad_norm": 0.6638106107711792, + "learning_rate": 7.566801137000123e-06, + "loss": 2.441, + "step": 17523 + }, + { + "epoch": 1.4142522798805586, + "grad_norm": 0.7190408110618591, + "learning_rate": 7.5607781779941325e-06, + "loss": 2.4026, + "step": 17524 + }, + { + "epoch": 1.4143329836171414, + "grad_norm": 0.708963930606842, + "learning_rate": 7.55475752281517e-06, + "loss": 2.3842, + "step": 17525 + }, + { + "epoch": 1.4144136873537245, + "grad_norm": 0.6763237118721008, + "learning_rate": 7.548739171613306e-06, + "loss": 2.4259, + "step": 17526 + }, + { + "epoch": 1.4144943910903076, + "grad_norm": 0.7374435067176819, + "learning_rate": 7.542723124538531e-06, + "loss": 2.4603, + "step": 17527 + }, + { + "epoch": 1.4145750948268905, + "grad_norm": 0.7165411114692688, + "learning_rate": 7.5367093817407805e-06, + "loss": 2.4103, + "step": 17528 + }, + { + "epoch": 1.4146557985634736, + "grad_norm": 0.7794588804244995, + "learning_rate": 7.530697943369935e-06, + "loss": 2.3912, + "step": 17529 + }, + { + "epoch": 1.4147365023000564, + "grad_norm": 0.691405713558197, + "learning_rate": 7.5246888095758305e-06, + "loss": 2.4357, + "step": 17530 + }, + { + "epoch": 1.4148172060366395, + "grad_norm": 0.6955364346504211, + "learning_rate": 7.518681980508191e-06, + "loss": 2.3645, + "step": 17531 + }, + { + "epoch": 1.4148979097732224, + "grad_norm": 0.6848856210708618, + "learning_rate": 7.512677456316753e-06, + "loss": 2.4145, + "step": 17532 + }, + { + "epoch": 1.4149786135098055, + "grad_norm": 0.668624997138977, + "learning_rate": 7.506675237151151e-06, + "loss": 2.4367, + "step": 17533 + }, + { + "epoch": 1.4150593172463886, + "grad_norm": 0.7547643780708313, + "learning_rate": 7.50067532316101e-06, + "loss": 2.437, + "step": 17534 + }, + { + "epoch": 1.4151400209829714, + "grad_norm": 0.6710182428359985, + "learning_rate": 7.494677714495812e-06, + "loss": 2.3596, + "step": 17535 + }, + { + "epoch": 1.4152207247195545, + "grad_norm": 0.7603517770767212, + "learning_rate": 7.488682411305048e-06, + "loss": 2.4277, + "step": 17536 + }, + { + "epoch": 1.4153014284561376, + "grad_norm": 0.7142195105552673, + "learning_rate": 7.482689413738153e-06, + "loss": 2.386, + "step": 17537 + }, + { + "epoch": 1.4153821321927205, + "grad_norm": 0.6910836100578308, + "learning_rate": 7.4766987219444865e-06, + "loss": 2.4394, + "step": 17538 + }, + { + "epoch": 1.4154628359293036, + "grad_norm": 0.7568751573562622, + "learning_rate": 7.470710336073339e-06, + "loss": 2.4621, + "step": 17539 + }, + { + "epoch": 1.4155435396658866, + "grad_norm": 0.7378259301185608, + "learning_rate": 7.46472425627398e-06, + "loss": 2.3677, + "step": 17540 + }, + { + "epoch": 1.4156242434024695, + "grad_norm": 0.7365754842758179, + "learning_rate": 7.458740482695569e-06, + "loss": 2.3881, + "step": 17541 + }, + { + "epoch": 1.4157049471390526, + "grad_norm": 0.6753227114677429, + "learning_rate": 7.452759015487254e-06, + "loss": 2.3997, + "step": 17542 + }, + { + "epoch": 1.4157856508756355, + "grad_norm": 0.6384701728820801, + "learning_rate": 7.446779854798114e-06, + "loss": 2.4029, + "step": 17543 + }, + { + "epoch": 1.4158663546122185, + "grad_norm": 0.6766810417175293, + "learning_rate": 7.4408030007771416e-06, + "loss": 2.4083, + "step": 17544 + }, + { + "epoch": 1.4159470583488014, + "grad_norm": 0.6948650479316711, + "learning_rate": 7.434828453573317e-06, + "loss": 2.3521, + "step": 17545 + }, + { + "epoch": 1.4160277620853845, + "grad_norm": 0.7690626978874207, + "learning_rate": 7.428856213335533e-06, + "loss": 2.4318, + "step": 17546 + }, + { + "epoch": 1.4161084658219676, + "grad_norm": 0.7151117920875549, + "learning_rate": 7.422886280212626e-06, + "loss": 2.4261, + "step": 17547 + }, + { + "epoch": 1.4161891695585505, + "grad_norm": 0.6966549754142761, + "learning_rate": 7.4169186543534e-06, + "loss": 2.4112, + "step": 17548 + }, + { + "epoch": 1.4162698732951335, + "grad_norm": 0.6930578947067261, + "learning_rate": 7.410953335906578e-06, + "loss": 2.4155, + "step": 17549 + }, + { + "epoch": 1.4163505770317166, + "grad_norm": 0.7319084405899048, + "learning_rate": 7.404990325020844e-06, + "loss": 2.4015, + "step": 17550 + }, + { + "epoch": 1.4164312807682995, + "grad_norm": 0.6913621425628662, + "learning_rate": 7.399029621844778e-06, + "loss": 2.4474, + "step": 17551 + }, + { + "epoch": 1.4165119845048826, + "grad_norm": 0.7726523280143738, + "learning_rate": 7.3930712265269595e-06, + "loss": 2.4815, + "step": 17552 + }, + { + "epoch": 1.4165926882414657, + "grad_norm": 0.6549103856086731, + "learning_rate": 7.387115139215894e-06, + "loss": 2.378, + "step": 17553 + }, + { + "epoch": 1.4166733919780485, + "grad_norm": 0.6902545094490051, + "learning_rate": 7.381161360059996e-06, + "loss": 2.3993, + "step": 17554 + }, + { + "epoch": 1.4167540957146316, + "grad_norm": 0.6871094107627869, + "learning_rate": 7.375209889207668e-06, + "loss": 2.4211, + "step": 17555 + }, + { + "epoch": 1.4168347994512147, + "grad_norm": 0.7043696641921997, + "learning_rate": 7.369260726807226e-06, + "loss": 2.4395, + "step": 17556 + }, + { + "epoch": 1.4169155031877976, + "grad_norm": 0.6889273524284363, + "learning_rate": 7.363313873006949e-06, + "loss": 2.4014, + "step": 17557 + }, + { + "epoch": 1.4169962069243807, + "grad_norm": 0.6670657992362976, + "learning_rate": 7.3573693279550545e-06, + "loss": 2.3943, + "step": 17558 + }, + { + "epoch": 1.4170769106609635, + "grad_norm": 0.7316192984580994, + "learning_rate": 7.3514270917996895e-06, + "loss": 2.3763, + "step": 17559 + }, + { + "epoch": 1.4171576143975466, + "grad_norm": 0.6922768950462341, + "learning_rate": 7.345487164688947e-06, + "loss": 2.4102, + "step": 17560 + }, + { + "epoch": 1.4172383181341295, + "grad_norm": 0.7255418300628662, + "learning_rate": 7.339549546770852e-06, + "loss": 2.4874, + "step": 17561 + }, + { + "epoch": 1.4173190218707126, + "grad_norm": 0.7474549412727356, + "learning_rate": 7.3336142381934206e-06, + "loss": 2.4817, + "step": 17562 + }, + { + "epoch": 1.4173997256072957, + "grad_norm": 0.6574866771697998, + "learning_rate": 7.327681239104534e-06, + "loss": 2.4504, + "step": 17563 + }, + { + "epoch": 1.4174804293438785, + "grad_norm": 0.751109778881073, + "learning_rate": 7.321750549652084e-06, + "loss": 2.482, + "step": 17564 + }, + { + "epoch": 1.4175611330804616, + "grad_norm": 0.6917319297790527, + "learning_rate": 7.315822169983866e-06, + "loss": 2.426, + "step": 17565 + }, + { + "epoch": 1.4176418368170447, + "grad_norm": 0.7236911058425903, + "learning_rate": 7.309896100247671e-06, + "loss": 2.4222, + "step": 17566 + }, + { + "epoch": 1.4177225405536276, + "grad_norm": 0.7382739186286926, + "learning_rate": 7.3039723405911145e-06, + "loss": 2.4673, + "step": 17567 + }, + { + "epoch": 1.4178032442902107, + "grad_norm": 0.6394448280334473, + "learning_rate": 7.2980508911618895e-06, + "loss": 2.4301, + "step": 17568 + }, + { + "epoch": 1.4178839480267937, + "grad_norm": 0.7402171492576599, + "learning_rate": 7.292131752107589e-06, + "loss": 2.4345, + "step": 17569 + }, + { + "epoch": 1.4179646517633766, + "grad_norm": 0.6540209054946899, + "learning_rate": 7.286214923575685e-06, + "loss": 2.4025, + "step": 17570 + }, + { + "epoch": 1.4180453554999597, + "grad_norm": 0.7361408472061157, + "learning_rate": 7.280300405713658e-06, + "loss": 2.4383, + "step": 17571 + }, + { + "epoch": 1.4181260592365428, + "grad_norm": 0.7483302354812622, + "learning_rate": 7.274388198668936e-06, + "loss": 2.3909, + "step": 17572 + }, + { + "epoch": 1.4182067629731256, + "grad_norm": 0.7666492462158203, + "learning_rate": 7.268478302588833e-06, + "loss": 2.3646, + "step": 17573 + }, + { + "epoch": 1.4182874667097087, + "grad_norm": 0.7461634278297424, + "learning_rate": 7.262570717620642e-06, + "loss": 2.4247, + "step": 17574 + }, + { + "epoch": 1.4183681704462916, + "grad_norm": 0.6593511700630188, + "learning_rate": 7.256665443911637e-06, + "loss": 2.4373, + "step": 17575 + }, + { + "epoch": 1.4184488741828747, + "grad_norm": 0.6628448963165283, + "learning_rate": 7.250762481608941e-06, + "loss": 2.4028, + "step": 17576 + }, + { + "epoch": 1.4185295779194576, + "grad_norm": 0.7371554970741272, + "learning_rate": 7.244861830859695e-06, + "loss": 2.3893, + "step": 17577 + }, + { + "epoch": 1.4186102816560406, + "grad_norm": 0.6896550059318542, + "learning_rate": 7.238963491810935e-06, + "loss": 2.4039, + "step": 17578 + }, + { + "epoch": 1.4186909853926237, + "grad_norm": 0.6840630173683167, + "learning_rate": 7.233067464609722e-06, + "loss": 2.3658, + "step": 17579 + }, + { + "epoch": 1.4187716891292066, + "grad_norm": 0.7413774728775024, + "learning_rate": 7.227173749402949e-06, + "loss": 2.4429, + "step": 17580 + }, + { + "epoch": 1.4188523928657897, + "grad_norm": 0.7088857889175415, + "learning_rate": 7.22128234633751e-06, + "loss": 2.4487, + "step": 17581 + }, + { + "epoch": 1.4189330966023728, + "grad_norm": 0.7451753616333008, + "learning_rate": 7.215393255560265e-06, + "loss": 2.43, + "step": 17582 + }, + { + "epoch": 1.4190138003389556, + "grad_norm": 0.7113354802131653, + "learning_rate": 7.209506477217942e-06, + "loss": 2.4079, + "step": 17583 + }, + { + "epoch": 1.4190945040755387, + "grad_norm": 0.6877462863922119, + "learning_rate": 7.203622011457268e-06, + "loss": 2.4638, + "step": 17584 + }, + { + "epoch": 1.4191752078121218, + "grad_norm": 0.6908687353134155, + "learning_rate": 7.1977398584249345e-06, + "loss": 2.4117, + "step": 17585 + }, + { + "epoch": 1.4192559115487047, + "grad_norm": 0.7053657174110413, + "learning_rate": 7.191860018267482e-06, + "loss": 2.4128, + "step": 17586 + }, + { + "epoch": 1.4193366152852878, + "grad_norm": 0.6886352896690369, + "learning_rate": 7.185982491131493e-06, + "loss": 2.4201, + "step": 17587 + }, + { + "epoch": 1.4194173190218708, + "grad_norm": 0.7148453593254089, + "learning_rate": 7.180107277163428e-06, + "loss": 2.456, + "step": 17588 + }, + { + "epoch": 1.4194980227584537, + "grad_norm": 0.7405968904495239, + "learning_rate": 7.174234376509725e-06, + "loss": 2.371, + "step": 17589 + }, + { + "epoch": 1.4195787264950368, + "grad_norm": 0.6733896136283875, + "learning_rate": 7.168363789316757e-06, + "loss": 2.439, + "step": 17590 + }, + { + "epoch": 1.4196594302316197, + "grad_norm": 0.7196522355079651, + "learning_rate": 7.162495515730838e-06, + "loss": 2.4666, + "step": 17591 + }, + { + "epoch": 1.4197401339682028, + "grad_norm": 0.7885043025016785, + "learning_rate": 7.156629555898198e-06, + "loss": 2.3704, + "step": 17592 + }, + { + "epoch": 1.4198208377047856, + "grad_norm": 0.7290148735046387, + "learning_rate": 7.15076590996504e-06, + "loss": 2.4693, + "step": 17593 + }, + { + "epoch": 1.4199015414413687, + "grad_norm": 0.7527376413345337, + "learning_rate": 7.144904578077505e-06, + "loss": 2.5135, + "step": 17594 + }, + { + "epoch": 1.4199822451779518, + "grad_norm": 0.740208625793457, + "learning_rate": 7.139045560381697e-06, + "loss": 2.4153, + "step": 17595 + }, + { + "epoch": 1.4200629489145347, + "grad_norm": 0.7285439968109131, + "learning_rate": 7.133188857023599e-06, + "loss": 2.391, + "step": 17596 + }, + { + "epoch": 1.4201436526511177, + "grad_norm": 0.6705127358436584, + "learning_rate": 7.1273344681491824e-06, + "loss": 2.4037, + "step": 17597 + }, + { + "epoch": 1.4202243563877008, + "grad_norm": 0.7113380432128906, + "learning_rate": 7.121482393904366e-06, + "loss": 2.4395, + "step": 17598 + }, + { + "epoch": 1.4203050601242837, + "grad_norm": 0.6606113314628601, + "learning_rate": 7.1156326344349985e-06, + "loss": 2.4618, + "step": 17599 + }, + { + "epoch": 1.4203857638608668, + "grad_norm": 0.6471076607704163, + "learning_rate": 7.109785189886864e-06, + "loss": 2.4263, + "step": 17600 + }, + { + "epoch": 1.4204664675974499, + "grad_norm": 0.7686622142791748, + "learning_rate": 7.103940060405712e-06, + "loss": 2.3989, + "step": 17601 + }, + { + "epoch": 1.4205471713340327, + "grad_norm": 0.6636856198310852, + "learning_rate": 7.0980972461372035e-06, + "loss": 2.4012, + "step": 17602 + }, + { + "epoch": 1.4206278750706158, + "grad_norm": 0.719194769859314, + "learning_rate": 7.0922567472269444e-06, + "loss": 2.4121, + "step": 17603 + }, + { + "epoch": 1.4207085788071987, + "grad_norm": 0.6569145321846008, + "learning_rate": 7.0864185638205404e-06, + "loss": 2.368, + "step": 17604 + }, + { + "epoch": 1.4207892825437818, + "grad_norm": 0.6548880338668823, + "learning_rate": 7.080582696063442e-06, + "loss": 2.4081, + "step": 17605 + }, + { + "epoch": 1.4208699862803646, + "grad_norm": 0.6192221641540527, + "learning_rate": 7.074749144101112e-06, + "loss": 2.3765, + "step": 17606 + }, + { + "epoch": 1.4209506900169477, + "grad_norm": 0.733065128326416, + "learning_rate": 7.068917908078942e-06, + "loss": 2.4429, + "step": 17607 + }, + { + "epoch": 1.4210313937535308, + "grad_norm": 0.7430265545845032, + "learning_rate": 7.063088988142275e-06, + "loss": 2.4041, + "step": 17608 + }, + { + "epoch": 1.4211120974901137, + "grad_norm": 0.7140394449234009, + "learning_rate": 7.0572623844363584e-06, + "loss": 2.3897, + "step": 17609 + }, + { + "epoch": 1.4211928012266968, + "grad_norm": 0.7149982452392578, + "learning_rate": 7.051438097106422e-06, + "loss": 2.4124, + "step": 17610 + }, + { + "epoch": 1.4212735049632799, + "grad_norm": 0.7337482571601868, + "learning_rate": 7.045616126297638e-06, + "loss": 2.4636, + "step": 17611 + }, + { + "epoch": 1.4213542086998627, + "grad_norm": 0.6936220526695251, + "learning_rate": 7.039796472155058e-06, + "loss": 2.4287, + "step": 17612 + }, + { + "epoch": 1.4214349124364458, + "grad_norm": 0.7598823308944702, + "learning_rate": 7.033979134823765e-06, + "loss": 2.3592, + "step": 17613 + }, + { + "epoch": 1.421515616173029, + "grad_norm": 0.7291054725646973, + "learning_rate": 7.028164114448732e-06, + "loss": 2.4433, + "step": 17614 + }, + { + "epoch": 1.4215963199096118, + "grad_norm": 0.7178683876991272, + "learning_rate": 7.022351411174866e-06, + "loss": 2.4615, + "step": 17615 + }, + { + "epoch": 1.4216770236461949, + "grad_norm": 0.6711047887802124, + "learning_rate": 7.01654102514705e-06, + "loss": 2.3828, + "step": 17616 + }, + { + "epoch": 1.421757727382778, + "grad_norm": 0.7782542705535889, + "learning_rate": 7.010732956510091e-06, + "loss": 2.3609, + "step": 17617 + }, + { + "epoch": 1.4218384311193608, + "grad_norm": 0.7100348472595215, + "learning_rate": 7.004927205408751e-06, + "loss": 2.4107, + "step": 17618 + }, + { + "epoch": 1.421919134855944, + "grad_norm": 0.7031453251838684, + "learning_rate": 6.9991237719877145e-06, + "loss": 2.3806, + "step": 17619 + }, + { + "epoch": 1.4219998385925268, + "grad_norm": 0.6231544613838196, + "learning_rate": 6.993322656391632e-06, + "loss": 2.3515, + "step": 17620 + }, + { + "epoch": 1.4220805423291099, + "grad_norm": 0.7339803576469421, + "learning_rate": 6.987523858765055e-06, + "loss": 2.4218, + "step": 17621 + }, + { + "epoch": 1.4221612460656927, + "grad_norm": 0.6874008774757385, + "learning_rate": 6.9817273792525224e-06, + "loss": 2.4308, + "step": 17622 + }, + { + "epoch": 1.4222419498022758, + "grad_norm": 0.692850649356842, + "learning_rate": 6.97593321799851e-06, + "loss": 2.4159, + "step": 17623 + }, + { + "epoch": 1.422322653538859, + "grad_norm": 0.7120705842971802, + "learning_rate": 6.970141375147398e-06, + "loss": 2.4639, + "step": 17624 + }, + { + "epoch": 1.4224033572754418, + "grad_norm": 0.6556580662727356, + "learning_rate": 6.9643518508435425e-06, + "loss": 2.425, + "step": 17625 + }, + { + "epoch": 1.4224840610120248, + "grad_norm": 0.6515032052993774, + "learning_rate": 6.958564645231225e-06, + "loss": 2.3712, + "step": 17626 + }, + { + "epoch": 1.422564764748608, + "grad_norm": 0.6835498213768005, + "learning_rate": 6.95277975845472e-06, + "loss": 2.4274, + "step": 17627 + }, + { + "epoch": 1.4226454684851908, + "grad_norm": 0.7465600967407227, + "learning_rate": 6.9469971906581555e-06, + "loss": 2.4905, + "step": 17628 + }, + { + "epoch": 1.4227261722217739, + "grad_norm": 0.7540421485900879, + "learning_rate": 6.94121694198564e-06, + "loss": 2.4636, + "step": 17629 + }, + { + "epoch": 1.422806875958357, + "grad_norm": 0.8491081595420837, + "learning_rate": 6.935439012581291e-06, + "loss": 2.345, + "step": 17630 + }, + { + "epoch": 1.4228875796949398, + "grad_norm": 0.6806172728538513, + "learning_rate": 6.92966340258906e-06, + "loss": 2.3937, + "step": 17631 + }, + { + "epoch": 1.422968283431523, + "grad_norm": 0.7586994171142578, + "learning_rate": 6.9238901121529085e-06, + "loss": 2.3645, + "step": 17632 + }, + { + "epoch": 1.423048987168106, + "grad_norm": 0.6934102773666382, + "learning_rate": 6.918119141416735e-06, + "loss": 2.3861, + "step": 17633 + }, + { + "epoch": 1.4231296909046889, + "grad_norm": 0.7167627215385437, + "learning_rate": 6.912350490524322e-06, + "loss": 2.4044, + "step": 17634 + }, + { + "epoch": 1.423210394641272, + "grad_norm": 0.6630876660346985, + "learning_rate": 6.906584159619478e-06, + "loss": 2.4214, + "step": 17635 + }, + { + "epoch": 1.4232910983778548, + "grad_norm": 0.7125325798988342, + "learning_rate": 6.9008201488459325e-06, + "loss": 2.4516, + "step": 17636 + }, + { + "epoch": 1.423371802114438, + "grad_norm": 0.6531164050102234, + "learning_rate": 6.895058458347281e-06, + "loss": 2.4223, + "step": 17637 + }, + { + "epoch": 1.4234525058510208, + "grad_norm": 0.727008581161499, + "learning_rate": 6.889299088267154e-06, + "loss": 2.446, + "step": 17638 + }, + { + "epoch": 1.4235332095876039, + "grad_norm": 0.7188040614128113, + "learning_rate": 6.883542038749091e-06, + "loss": 2.4109, + "step": 17639 + }, + { + "epoch": 1.423613913324187, + "grad_norm": 0.73248291015625, + "learning_rate": 6.877787309936568e-06, + "loss": 2.4398, + "step": 17640 + }, + { + "epoch": 1.4236946170607698, + "grad_norm": 0.7350964546203613, + "learning_rate": 6.872034901973012e-06, + "loss": 2.4766, + "step": 17641 + }, + { + "epoch": 1.423775320797353, + "grad_norm": 0.7280460596084595, + "learning_rate": 6.866284815001777e-06, + "loss": 2.4588, + "step": 17642 + }, + { + "epoch": 1.423856024533936, + "grad_norm": 0.68912672996521, + "learning_rate": 6.860537049166205e-06, + "loss": 2.353, + "step": 17643 + }, + { + "epoch": 1.4239367282705189, + "grad_norm": 0.6742156147956848, + "learning_rate": 6.85479160460949e-06, + "loss": 2.4123, + "step": 17644 + }, + { + "epoch": 1.424017432007102, + "grad_norm": 0.6858388185501099, + "learning_rate": 6.849048481474863e-06, + "loss": 2.4243, + "step": 17645 + }, + { + "epoch": 1.424098135743685, + "grad_norm": 0.7317911386489868, + "learning_rate": 6.8433076799054644e-06, + "loss": 2.3713, + "step": 17646 + }, + { + "epoch": 1.424178839480268, + "grad_norm": 0.6934579014778137, + "learning_rate": 6.837569200044325e-06, + "loss": 2.4667, + "step": 17647 + }, + { + "epoch": 1.424259543216851, + "grad_norm": 0.7017713189125061, + "learning_rate": 6.831833042034497e-06, + "loss": 2.3543, + "step": 17648 + }, + { + "epoch": 1.4243402469534339, + "grad_norm": 0.7379886507987976, + "learning_rate": 6.8260992060189325e-06, + "loss": 2.4392, + "step": 17649 + }, + { + "epoch": 1.424420950690017, + "grad_norm": 0.6645724177360535, + "learning_rate": 6.820367692140539e-06, + "loss": 2.4329, + "step": 17650 + }, + { + "epoch": 1.4245016544265998, + "grad_norm": 0.642423689365387, + "learning_rate": 6.814638500542159e-06, + "loss": 2.4157, + "step": 17651 + }, + { + "epoch": 1.424582358163183, + "grad_norm": 0.6720073819160461, + "learning_rate": 6.808911631366588e-06, + "loss": 2.44, + "step": 17652 + }, + { + "epoch": 1.424663061899766, + "grad_norm": 0.6966024041175842, + "learning_rate": 6.803187084756524e-06, + "loss": 2.4087, + "step": 17653 + }, + { + "epoch": 1.4247437656363489, + "grad_norm": 0.6998239755630493, + "learning_rate": 6.797464860854652e-06, + "loss": 2.4335, + "step": 17654 + }, + { + "epoch": 1.424824469372932, + "grad_norm": 0.6885339617729187, + "learning_rate": 6.791744959803614e-06, + "loss": 2.4327, + "step": 17655 + }, + { + "epoch": 1.424905173109515, + "grad_norm": 0.6395631432533264, + "learning_rate": 6.7860273817459294e-06, + "loss": 2.3941, + "step": 17656 + }, + { + "epoch": 1.424985876846098, + "grad_norm": 0.7010350823402405, + "learning_rate": 6.7803121268240956e-06, + "loss": 2.4118, + "step": 17657 + }, + { + "epoch": 1.425066580582681, + "grad_norm": 0.6954346895217896, + "learning_rate": 6.774599195180565e-06, + "loss": 2.416, + "step": 17658 + }, + { + "epoch": 1.425147284319264, + "grad_norm": 0.6685010194778442, + "learning_rate": 6.768888586957722e-06, + "loss": 2.4246, + "step": 17659 + }, + { + "epoch": 1.425227988055847, + "grad_norm": 0.7244373559951782, + "learning_rate": 6.7631803022978776e-06, + "loss": 2.4385, + "step": 17660 + }, + { + "epoch": 1.42530869179243, + "grad_norm": 0.6633989810943604, + "learning_rate": 6.757474341343306e-06, + "loss": 2.413, + "step": 17661 + }, + { + "epoch": 1.4253893955290131, + "grad_norm": 0.6696286797523499, + "learning_rate": 6.751770704236226e-06, + "loss": 2.4586, + "step": 17662 + }, + { + "epoch": 1.425470099265596, + "grad_norm": 0.7322936654090881, + "learning_rate": 6.746069391118759e-06, + "loss": 2.414, + "step": 17663 + }, + { + "epoch": 1.425550803002179, + "grad_norm": 0.6786227226257324, + "learning_rate": 6.740370402133012e-06, + "loss": 2.3964, + "step": 17664 + }, + { + "epoch": 1.425631506738762, + "grad_norm": 0.6408207416534424, + "learning_rate": 6.734673737421027e-06, + "loss": 2.4064, + "step": 17665 + }, + { + "epoch": 1.425712210475345, + "grad_norm": 0.7589663863182068, + "learning_rate": 6.728979397124768e-06, + "loss": 2.3765, + "step": 17666 + }, + { + "epoch": 1.4257929142119279, + "grad_norm": 0.6696135401725769, + "learning_rate": 6.723287381386145e-06, + "loss": 2.4317, + "step": 17667 + }, + { + "epoch": 1.425873617948511, + "grad_norm": 0.6599292159080505, + "learning_rate": 6.7175976903470325e-06, + "loss": 2.3867, + "step": 17668 + }, + { + "epoch": 1.425954321685094, + "grad_norm": 0.692328929901123, + "learning_rate": 6.711910324149228e-06, + "loss": 2.3996, + "step": 17669 + }, + { + "epoch": 1.426035025421677, + "grad_norm": 0.7615126371383667, + "learning_rate": 6.706225282934475e-06, + "loss": 2.4436, + "step": 17670 + }, + { + "epoch": 1.42611572915826, + "grad_norm": 0.7187603712081909, + "learning_rate": 6.70054256684447e-06, + "loss": 2.4128, + "step": 17671 + }, + { + "epoch": 1.426196432894843, + "grad_norm": 0.6679204702377319, + "learning_rate": 6.694862176020822e-06, + "loss": 2.423, + "step": 17672 + }, + { + "epoch": 1.426277136631426, + "grad_norm": 0.759952962398529, + "learning_rate": 6.689184110605106e-06, + "loss": 2.4279, + "step": 17673 + }, + { + "epoch": 1.426357840368009, + "grad_norm": 0.6619845628738403, + "learning_rate": 6.683508370738845e-06, + "loss": 2.4219, + "step": 17674 + }, + { + "epoch": 1.4264385441045921, + "grad_norm": 0.6806942224502563, + "learning_rate": 6.6778349565635005e-06, + "loss": 2.4214, + "step": 17675 + }, + { + "epoch": 1.426519247841175, + "grad_norm": 0.6780219674110413, + "learning_rate": 6.672163868220449e-06, + "loss": 2.4404, + "step": 17676 + }, + { + "epoch": 1.426599951577758, + "grad_norm": 0.7276327013969421, + "learning_rate": 6.6664951058510224e-06, + "loss": 2.4088, + "step": 17677 + }, + { + "epoch": 1.4266806553143412, + "grad_norm": 0.7608953714370728, + "learning_rate": 6.66082866959653e-06, + "loss": 2.4102, + "step": 17678 + }, + { + "epoch": 1.426761359050924, + "grad_norm": 0.6784111261367798, + "learning_rate": 6.6551645595981485e-06, + "loss": 2.4823, + "step": 17679 + }, + { + "epoch": 1.4268420627875071, + "grad_norm": 0.6937912106513977, + "learning_rate": 6.649502775997096e-06, + "loss": 2.4118, + "step": 17680 + }, + { + "epoch": 1.42692276652409, + "grad_norm": 0.7426064014434814, + "learning_rate": 6.643843318934462e-06, + "loss": 2.4407, + "step": 17681 + }, + { + "epoch": 1.427003470260673, + "grad_norm": 0.6722440719604492, + "learning_rate": 6.638186188551277e-06, + "loss": 2.3981, + "step": 17682 + }, + { + "epoch": 1.427084173997256, + "grad_norm": 0.6830718517303467, + "learning_rate": 6.632531384988538e-06, + "loss": 2.4076, + "step": 17683 + }, + { + "epoch": 1.427164877733839, + "grad_norm": 0.6521410942077637, + "learning_rate": 6.626878908387202e-06, + "loss": 2.4311, + "step": 17684 + }, + { + "epoch": 1.4272455814704221, + "grad_norm": 0.7150115966796875, + "learning_rate": 6.6212287588880985e-06, + "loss": 2.4776, + "step": 17685 + }, + { + "epoch": 1.427326285207005, + "grad_norm": 0.6741146445274353, + "learning_rate": 6.615580936632082e-06, + "loss": 2.4134, + "step": 17686 + }, + { + "epoch": 1.427406988943588, + "grad_norm": 0.6979508996009827, + "learning_rate": 6.6099354417599064e-06, + "loss": 2.4022, + "step": 17687 + }, + { + "epoch": 1.4274876926801712, + "grad_norm": 0.7078632712364197, + "learning_rate": 6.604292274412249e-06, + "loss": 2.4259, + "step": 17688 + }, + { + "epoch": 1.427568396416754, + "grad_norm": 0.6485830545425415, + "learning_rate": 6.598651434729764e-06, + "loss": 2.3641, + "step": 17689 + }, + { + "epoch": 1.4276491001533371, + "grad_norm": 0.7130312919616699, + "learning_rate": 6.593012922853048e-06, + "loss": 2.3965, + "step": 17690 + }, + { + "epoch": 1.4277298038899202, + "grad_norm": 0.6736258268356323, + "learning_rate": 6.587376738922613e-06, + "loss": 2.3729, + "step": 17691 + }, + { + "epoch": 1.427810507626503, + "grad_norm": 0.6798346638679504, + "learning_rate": 6.581742883078923e-06, + "loss": 2.4479, + "step": 17692 + }, + { + "epoch": 1.4278912113630862, + "grad_norm": 0.6962637901306152, + "learning_rate": 6.576111355462411e-06, + "loss": 2.4433, + "step": 17693 + }, + { + "epoch": 1.4279719150996693, + "grad_norm": 0.6981319785118103, + "learning_rate": 6.570482156213431e-06, + "loss": 2.4564, + "step": 17694 + }, + { + "epoch": 1.4280526188362521, + "grad_norm": 0.6484888195991516, + "learning_rate": 6.564855285472238e-06, + "loss": 2.3709, + "step": 17695 + }, + { + "epoch": 1.428133322572835, + "grad_norm": 0.6646093726158142, + "learning_rate": 6.5592307433791074e-06, + "loss": 2.3716, + "step": 17696 + }, + { + "epoch": 1.428214026309418, + "grad_norm": 0.7607010006904602, + "learning_rate": 6.5536085300742065e-06, + "loss": 2.4029, + "step": 17697 + }, + { + "epoch": 1.4282947300460012, + "grad_norm": 0.7242185473442078, + "learning_rate": 6.547988645697644e-06, + "loss": 2.4091, + "step": 17698 + }, + { + "epoch": 1.428375433782584, + "grad_norm": 0.7394922375679016, + "learning_rate": 6.542371090389487e-06, + "loss": 2.4288, + "step": 17699 + }, + { + "epoch": 1.4284561375191671, + "grad_norm": 0.6763161420822144, + "learning_rate": 6.536755864289745e-06, + "loss": 2.3556, + "step": 17700 + }, + { + "epoch": 1.4285368412557502, + "grad_norm": 0.6837669610977173, + "learning_rate": 6.531142967538362e-06, + "loss": 2.4312, + "step": 17701 + }, + { + "epoch": 1.428617544992333, + "grad_norm": 0.6702602505683899, + "learning_rate": 6.525532400275225e-06, + "loss": 2.4144, + "step": 17702 + }, + { + "epoch": 1.4286982487289162, + "grad_norm": 0.7338566780090332, + "learning_rate": 6.519924162640167e-06, + "loss": 2.4536, + "step": 17703 + }, + { + "epoch": 1.4287789524654992, + "grad_norm": 0.7169400453567505, + "learning_rate": 6.514318254772967e-06, + "loss": 2.4236, + "step": 17704 + }, + { + "epoch": 1.428859656202082, + "grad_norm": 0.7129381895065308, + "learning_rate": 6.508714676813321e-06, + "loss": 2.393, + "step": 17705 + }, + { + "epoch": 1.4289403599386652, + "grad_norm": 0.7212249636650085, + "learning_rate": 6.503113428900898e-06, + "loss": 2.3907, + "step": 17706 + }, + { + "epoch": 1.4290210636752483, + "grad_norm": 0.7539047002792358, + "learning_rate": 6.497514511175296e-06, + "loss": 2.434, + "step": 17707 + }, + { + "epoch": 1.4291017674118311, + "grad_norm": 0.6876792907714844, + "learning_rate": 6.491917923776048e-06, + "loss": 2.4172, + "step": 17708 + }, + { + "epoch": 1.4291824711484142, + "grad_norm": 0.6665194034576416, + "learning_rate": 6.486323666842631e-06, + "loss": 2.4277, + "step": 17709 + }, + { + "epoch": 1.429263174884997, + "grad_norm": 0.7311907410621643, + "learning_rate": 6.4807317405144675e-06, + "loss": 2.4201, + "step": 17710 + }, + { + "epoch": 1.4293438786215802, + "grad_norm": 0.6492041349411011, + "learning_rate": 6.475142144930946e-06, + "loss": 2.425, + "step": 17711 + }, + { + "epoch": 1.429424582358163, + "grad_norm": 0.7610225677490234, + "learning_rate": 6.469554880231343e-06, + "loss": 2.4694, + "step": 17712 + }, + { + "epoch": 1.4295052860947461, + "grad_norm": 0.7112852931022644, + "learning_rate": 6.463969946554948e-06, + "loss": 2.4431, + "step": 17713 + }, + { + "epoch": 1.4295859898313292, + "grad_norm": 0.6712578535079956, + "learning_rate": 6.458387344040917e-06, + "loss": 2.4067, + "step": 17714 + }, + { + "epoch": 1.429666693567912, + "grad_norm": 0.6936217546463013, + "learning_rate": 6.452807072828393e-06, + "loss": 2.4229, + "step": 17715 + }, + { + "epoch": 1.4297473973044952, + "grad_norm": 0.6615330576896667, + "learning_rate": 6.4472291330564535e-06, + "loss": 2.3567, + "step": 17716 + }, + { + "epoch": 1.4298281010410783, + "grad_norm": 0.7209796905517578, + "learning_rate": 6.441653524864111e-06, + "loss": 2.3577, + "step": 17717 + }, + { + "epoch": 1.4299088047776611, + "grad_norm": 0.7022082805633545, + "learning_rate": 6.436080248390319e-06, + "loss": 2.3681, + "step": 17718 + }, + { + "epoch": 1.4299895085142442, + "grad_norm": 0.6859815120697021, + "learning_rate": 6.430509303773991e-06, + "loss": 2.4193, + "step": 17719 + }, + { + "epoch": 1.4300702122508273, + "grad_norm": 0.7126015424728394, + "learning_rate": 6.424940691153969e-06, + "loss": 2.3746, + "step": 17720 + }, + { + "epoch": 1.4301509159874102, + "grad_norm": 0.6499980092048645, + "learning_rate": 6.419374410669021e-06, + "loss": 2.445, + "step": 17721 + }, + { + "epoch": 1.4302316197239933, + "grad_norm": 0.6867473125457764, + "learning_rate": 6.413810462457892e-06, + "loss": 2.3323, + "step": 17722 + }, + { + "epoch": 1.4303123234605764, + "grad_norm": 0.7272062301635742, + "learning_rate": 6.4082488466592596e-06, + "loss": 2.4058, + "step": 17723 + }, + { + "epoch": 1.4303930271971592, + "grad_norm": 0.7681101560592651, + "learning_rate": 6.40268956341169e-06, + "loss": 2.4534, + "step": 17724 + }, + { + "epoch": 1.4304737309337423, + "grad_norm": 0.8149757981300354, + "learning_rate": 6.397132612853773e-06, + "loss": 2.4165, + "step": 17725 + }, + { + "epoch": 1.4305544346703252, + "grad_norm": 0.6749057769775391, + "learning_rate": 6.39157799512401e-06, + "loss": 2.364, + "step": 17726 + }, + { + "epoch": 1.4306351384069083, + "grad_norm": 0.716894268989563, + "learning_rate": 6.386025710360799e-06, + "loss": 2.4379, + "step": 17727 + }, + { + "epoch": 1.4307158421434911, + "grad_norm": 0.738310694694519, + "learning_rate": 6.380475758702531e-06, + "loss": 2.3938, + "step": 17728 + }, + { + "epoch": 1.4307965458800742, + "grad_norm": 0.7101424336433411, + "learning_rate": 6.3749281402875505e-06, + "loss": 2.4629, + "step": 17729 + }, + { + "epoch": 1.4308772496166573, + "grad_norm": 0.6945566534996033, + "learning_rate": 6.369382855254069e-06, + "loss": 2.4235, + "step": 17730 + }, + { + "epoch": 1.4309579533532402, + "grad_norm": 0.7886360287666321, + "learning_rate": 6.363839903740332e-06, + "loss": 2.4284, + "step": 17731 + }, + { + "epoch": 1.4310386570898233, + "grad_norm": 0.7391656637191772, + "learning_rate": 6.358299285884495e-06, + "loss": 2.379, + "step": 17732 + }, + { + "epoch": 1.4311193608264063, + "grad_norm": 0.6601181626319885, + "learning_rate": 6.352761001824603e-06, + "loss": 2.3646, + "step": 17733 + }, + { + "epoch": 1.4312000645629892, + "grad_norm": 0.7043817043304443, + "learning_rate": 6.347225051698702e-06, + "loss": 2.4055, + "step": 17734 + }, + { + "epoch": 1.4312807682995723, + "grad_norm": 0.7078529000282288, + "learning_rate": 6.341691435644759e-06, + "loss": 2.3811, + "step": 17735 + }, + { + "epoch": 1.4313614720361554, + "grad_norm": 0.7172150015830994, + "learning_rate": 6.336160153800707e-06, + "loss": 2.3854, + "step": 17736 + }, + { + "epoch": 1.4314421757727382, + "grad_norm": 0.6997926235198975, + "learning_rate": 6.330631206304383e-06, + "loss": 2.3534, + "step": 17737 + }, + { + "epoch": 1.4315228795093213, + "grad_norm": 0.7089913487434387, + "learning_rate": 6.325104593293563e-06, + "loss": 2.4508, + "step": 17738 + }, + { + "epoch": 1.4316035832459044, + "grad_norm": 0.7183980345726013, + "learning_rate": 6.319580314906037e-06, + "loss": 2.3972, + "step": 17739 + }, + { + "epoch": 1.4316842869824873, + "grad_norm": 0.6621310710906982, + "learning_rate": 6.3140583712794295e-06, + "loss": 2.3512, + "step": 17740 + }, + { + "epoch": 1.4317649907190704, + "grad_norm": 0.7076746821403503, + "learning_rate": 6.308538762551386e-06, + "loss": 2.4544, + "step": 17741 + }, + { + "epoch": 1.4318456944556532, + "grad_norm": 0.7050352692604065, + "learning_rate": 6.303021488859462e-06, + "loss": 2.3314, + "step": 17742 + }, + { + "epoch": 1.4319263981922363, + "grad_norm": 0.7305126190185547, + "learning_rate": 6.297506550341181e-06, + "loss": 2.4232, + "step": 17743 + }, + { + "epoch": 1.4320071019288192, + "grad_norm": 0.7779221534729004, + "learning_rate": 6.291993947133967e-06, + "loss": 2.4861, + "step": 17744 + }, + { + "epoch": 1.4320878056654023, + "grad_norm": 0.7207643389701843, + "learning_rate": 6.286483679375244e-06, + "loss": 2.4184, + "step": 17745 + }, + { + "epoch": 1.4321685094019854, + "grad_norm": 0.7540406584739685, + "learning_rate": 6.280975747202289e-06, + "loss": 2.4741, + "step": 17746 + }, + { + "epoch": 1.4322492131385682, + "grad_norm": 0.7011128067970276, + "learning_rate": 6.275470150752416e-06, + "loss": 2.3661, + "step": 17747 + }, + { + "epoch": 1.4323299168751513, + "grad_norm": 0.666495680809021, + "learning_rate": 6.269966890162837e-06, + "loss": 2.4294, + "step": 17748 + }, + { + "epoch": 1.4324106206117344, + "grad_norm": 0.7928789854049683, + "learning_rate": 6.264465965570676e-06, + "loss": 2.3722, + "step": 17749 + }, + { + "epoch": 1.4324913243483173, + "grad_norm": 0.778322160243988, + "learning_rate": 6.258967377113056e-06, + "loss": 2.4365, + "step": 17750 + }, + { + "epoch": 1.4325720280849004, + "grad_norm": 0.7157254815101624, + "learning_rate": 6.2534711249270015e-06, + "loss": 2.4222, + "step": 17751 + }, + { + "epoch": 1.4326527318214834, + "grad_norm": 0.752855122089386, + "learning_rate": 6.247977209149514e-06, + "loss": 2.4195, + "step": 17752 + }, + { + "epoch": 1.4327334355580663, + "grad_norm": 0.6898384690284729, + "learning_rate": 6.242485629917494e-06, + "loss": 2.372, + "step": 17753 + }, + { + "epoch": 1.4328141392946494, + "grad_norm": 0.6400893330574036, + "learning_rate": 6.236996387367822e-06, + "loss": 2.3678, + "step": 17754 + }, + { + "epoch": 1.4328948430312323, + "grad_norm": 0.6957802176475525, + "learning_rate": 6.23150948163731e-06, + "loss": 2.4423, + "step": 17755 + }, + { + "epoch": 1.4329755467678154, + "grad_norm": 0.6983963251113892, + "learning_rate": 6.226024912862683e-06, + "loss": 2.3467, + "step": 17756 + }, + { + "epoch": 1.4330562505043982, + "grad_norm": 0.697910487651825, + "learning_rate": 6.220542681180652e-06, + "loss": 2.3676, + "step": 17757 + }, + { + "epoch": 1.4331369542409813, + "grad_norm": 0.6732818484306335, + "learning_rate": 6.215062786727843e-06, + "loss": 2.4259, + "step": 17758 + }, + { + "epoch": 1.4332176579775644, + "grad_norm": 0.6379408240318298, + "learning_rate": 6.209585229640813e-06, + "loss": 2.409, + "step": 17759 + }, + { + "epoch": 1.4332983617141473, + "grad_norm": 0.6726407408714294, + "learning_rate": 6.2041100100560856e-06, + "loss": 2.3732, + "step": 17760 + }, + { + "epoch": 1.4333790654507303, + "grad_norm": 0.7126357555389404, + "learning_rate": 6.19863712811013e-06, + "loss": 2.4324, + "step": 17761 + }, + { + "epoch": 1.4334597691873134, + "grad_norm": 0.7055345773696899, + "learning_rate": 6.193166583939336e-06, + "loss": 2.463, + "step": 17762 + }, + { + "epoch": 1.4335404729238963, + "grad_norm": 0.6864510774612427, + "learning_rate": 6.18769837768004e-06, + "loss": 2.4155, + "step": 17763 + }, + { + "epoch": 1.4336211766604794, + "grad_norm": 0.7269968390464783, + "learning_rate": 6.182232509468544e-06, + "loss": 2.4197, + "step": 17764 + }, + { + "epoch": 1.4337018803970625, + "grad_norm": 0.7829548716545105, + "learning_rate": 6.176768979441039e-06, + "loss": 2.4054, + "step": 17765 + }, + { + "epoch": 1.4337825841336453, + "grad_norm": 0.6840609312057495, + "learning_rate": 6.171307787733704e-06, + "loss": 2.4177, + "step": 17766 + }, + { + "epoch": 1.4338632878702284, + "grad_norm": 0.7106159925460815, + "learning_rate": 6.165848934482654e-06, + "loss": 2.4039, + "step": 17767 + }, + { + "epoch": 1.4339439916068115, + "grad_norm": 0.6945303082466125, + "learning_rate": 6.160392419823957e-06, + "loss": 2.45, + "step": 17768 + }, + { + "epoch": 1.4340246953433944, + "grad_norm": 0.6924156546592712, + "learning_rate": 6.15493824389356e-06, + "loss": 2.4059, + "step": 17769 + }, + { + "epoch": 1.4341053990799775, + "grad_norm": 0.6932214498519897, + "learning_rate": 6.149486406827409e-06, + "loss": 2.4046, + "step": 17770 + }, + { + "epoch": 1.4341861028165603, + "grad_norm": 0.6683449149131775, + "learning_rate": 6.144036908761386e-06, + "loss": 2.4074, + "step": 17771 + }, + { + "epoch": 1.4342668065531434, + "grad_norm": 0.7230218052864075, + "learning_rate": 6.138589749831314e-06, + "loss": 2.3718, + "step": 17772 + }, + { + "epoch": 1.4343475102897263, + "grad_norm": 0.68938809633255, + "learning_rate": 6.133144930172929e-06, + "loss": 2.3776, + "step": 17773 + }, + { + "epoch": 1.4344282140263094, + "grad_norm": 0.6659870743751526, + "learning_rate": 6.127702449921968e-06, + "loss": 2.3779, + "step": 17774 + }, + { + "epoch": 1.4345089177628925, + "grad_norm": 0.7351429462432861, + "learning_rate": 6.122262309214033e-06, + "loss": 2.334, + "step": 17775 + }, + { + "epoch": 1.4345896214994753, + "grad_norm": 0.6995889544487, + "learning_rate": 6.116824508184715e-06, + "loss": 2.4139, + "step": 17776 + }, + { + "epoch": 1.4346703252360584, + "grad_norm": 0.6568582653999329, + "learning_rate": 6.111389046969551e-06, + "loss": 2.4348, + "step": 17777 + }, + { + "epoch": 1.4347510289726415, + "grad_norm": 0.7047903537750244, + "learning_rate": 6.1059559257039985e-06, + "loss": 2.3877, + "step": 17778 + }, + { + "epoch": 1.4348317327092244, + "grad_norm": 0.7299826145172119, + "learning_rate": 6.10052514452345e-06, + "loss": 2.4533, + "step": 17779 + }, + { + "epoch": 1.4349124364458075, + "grad_norm": 0.6617172956466675, + "learning_rate": 6.095096703563296e-06, + "loss": 2.4276, + "step": 17780 + }, + { + "epoch": 1.4349931401823905, + "grad_norm": 0.7248536944389343, + "learning_rate": 6.089670602958775e-06, + "loss": 2.4145, + "step": 17781 + }, + { + "epoch": 1.4350738439189734, + "grad_norm": 0.7404766082763672, + "learning_rate": 6.084246842845154e-06, + "loss": 2.4556, + "step": 17782 + }, + { + "epoch": 1.4351545476555565, + "grad_norm": 0.6808308362960815, + "learning_rate": 6.0788254233576035e-06, + "loss": 2.3648, + "step": 17783 + }, + { + "epoch": 1.4352352513921396, + "grad_norm": 0.6631487011909485, + "learning_rate": 6.073406344631249e-06, + "loss": 2.4064, + "step": 17784 + }, + { + "epoch": 1.4353159551287225, + "grad_norm": 0.6690654158592224, + "learning_rate": 6.067989606801128e-06, + "loss": 2.4749, + "step": 17785 + }, + { + "epoch": 1.4353966588653055, + "grad_norm": 0.6438129544258118, + "learning_rate": 6.062575210002241e-06, + "loss": 2.424, + "step": 17786 + }, + { + "epoch": 1.4354773626018884, + "grad_norm": 0.710590124130249, + "learning_rate": 6.05716315436955e-06, + "loss": 2.4419, + "step": 17787 + }, + { + "epoch": 1.4355580663384715, + "grad_norm": 0.72870272397995, + "learning_rate": 6.0517534400378995e-06, + "loss": 2.4341, + "step": 17788 + }, + { + "epoch": 1.4356387700750544, + "grad_norm": 0.6548538208007812, + "learning_rate": 6.04634606714215e-06, + "loss": 2.3721, + "step": 17789 + }, + { + "epoch": 1.4357194738116374, + "grad_norm": 0.7368030548095703, + "learning_rate": 6.040941035817061e-06, + "loss": 2.461, + "step": 17790 + }, + { + "epoch": 1.4358001775482205, + "grad_norm": 0.7763129472732544, + "learning_rate": 6.035538346197311e-06, + "loss": 2.4701, + "step": 17791 + }, + { + "epoch": 1.4358808812848034, + "grad_norm": 0.7631728649139404, + "learning_rate": 6.030137998417573e-06, + "loss": 2.4796, + "step": 17792 + }, + { + "epoch": 1.4359615850213865, + "grad_norm": 0.7032707929611206, + "learning_rate": 6.024739992612449e-06, + "loss": 2.4119, + "step": 17793 + }, + { + "epoch": 1.4360422887579696, + "grad_norm": 0.701252818107605, + "learning_rate": 6.019344328916454e-06, + "loss": 2.4501, + "step": 17794 + }, + { + "epoch": 1.4361229924945524, + "grad_norm": 0.7271695733070374, + "learning_rate": 6.013951007464058e-06, + "loss": 2.4136, + "step": 17795 + }, + { + "epoch": 1.4362036962311355, + "grad_norm": 0.6560700535774231, + "learning_rate": 6.0085600283897095e-06, + "loss": 2.3737, + "step": 17796 + }, + { + "epoch": 1.4362843999677186, + "grad_norm": 0.6831890344619751, + "learning_rate": 6.003171391827722e-06, + "loss": 2.3986, + "step": 17797 + }, + { + "epoch": 1.4363651037043015, + "grad_norm": 0.6875705718994141, + "learning_rate": 5.997785097912412e-06, + "loss": 2.4159, + "step": 17798 + }, + { + "epoch": 1.4364458074408846, + "grad_norm": 0.704727053642273, + "learning_rate": 5.992401146778026e-06, + "loss": 2.3833, + "step": 17799 + }, + { + "epoch": 1.4365265111774674, + "grad_norm": 0.6632246971130371, + "learning_rate": 5.987019538558758e-06, + "loss": 2.3907, + "step": 17800 + }, + { + "epoch": 1.4366072149140505, + "grad_norm": 0.7065477967262268, + "learning_rate": 5.981640273388689e-06, + "loss": 2.3473, + "step": 17801 + }, + { + "epoch": 1.4366879186506334, + "grad_norm": 0.6765400171279907, + "learning_rate": 5.976263351401923e-06, + "loss": 2.4051, + "step": 17802 + }, + { + "epoch": 1.4367686223872165, + "grad_norm": 0.6867364645004272, + "learning_rate": 5.9708887727324525e-06, + "loss": 2.3452, + "step": 17803 + }, + { + "epoch": 1.4368493261237996, + "grad_norm": 0.644715428352356, + "learning_rate": 5.965516537514215e-06, + "loss": 2.3826, + "step": 17804 + }, + { + "epoch": 1.4369300298603824, + "grad_norm": 0.7649596333503723, + "learning_rate": 5.9601466458811265e-06, + "loss": 2.436, + "step": 17805 + }, + { + "epoch": 1.4370107335969655, + "grad_norm": 0.699653148651123, + "learning_rate": 5.954779097967023e-06, + "loss": 2.3694, + "step": 17806 + }, + { + "epoch": 1.4370914373335486, + "grad_norm": 0.7054964900016785, + "learning_rate": 5.949413893905642e-06, + "loss": 2.4194, + "step": 17807 + }, + { + "epoch": 1.4371721410701315, + "grad_norm": 0.7534568309783936, + "learning_rate": 5.944051033830722e-06, + "loss": 2.4175, + "step": 17808 + }, + { + "epoch": 1.4372528448067146, + "grad_norm": 0.7056108117103577, + "learning_rate": 5.9386905178759225e-06, + "loss": 2.4232, + "step": 17809 + }, + { + "epoch": 1.4373335485432976, + "grad_norm": 0.6868974566459656, + "learning_rate": 5.933332346174825e-06, + "loss": 2.3799, + "step": 17810 + }, + { + "epoch": 1.4374142522798805, + "grad_norm": 0.7155748009681702, + "learning_rate": 5.927976518860978e-06, + "loss": 2.4151, + "step": 17811 + }, + { + "epoch": 1.4374949560164636, + "grad_norm": 0.7482681274414062, + "learning_rate": 5.922623036067853e-06, + "loss": 2.4568, + "step": 17812 + }, + { + "epoch": 1.4375756597530467, + "grad_norm": 0.6348850727081299, + "learning_rate": 5.917271897928889e-06, + "loss": 2.4202, + "step": 17813 + }, + { + "epoch": 1.4376563634896296, + "grad_norm": 0.7463829517364502, + "learning_rate": 5.911923104577455e-06, + "loss": 2.4288, + "step": 17814 + }, + { + "epoch": 1.4377370672262126, + "grad_norm": 0.7019917964935303, + "learning_rate": 5.9065766561468335e-06, + "loss": 2.475, + "step": 17815 + }, + { + "epoch": 1.4378177709627955, + "grad_norm": 0.7005626559257507, + "learning_rate": 5.9012325527702975e-06, + "loss": 2.3869, + "step": 17816 + }, + { + "epoch": 1.4378984746993786, + "grad_norm": 0.7216863632202148, + "learning_rate": 5.895890794581016e-06, + "loss": 2.4224, + "step": 17817 + }, + { + "epoch": 1.4379791784359615, + "grad_norm": 0.7037425637245178, + "learning_rate": 5.890551381712128e-06, + "loss": 2.4347, + "step": 17818 + }, + { + "epoch": 1.4380598821725445, + "grad_norm": 0.7240646481513977, + "learning_rate": 5.8852143142967055e-06, + "loss": 2.4275, + "step": 17819 + }, + { + "epoch": 1.4381405859091276, + "grad_norm": 0.6970441937446594, + "learning_rate": 5.879879592467763e-06, + "loss": 2.4526, + "step": 17820 + }, + { + "epoch": 1.4382212896457105, + "grad_norm": 0.6941537857055664, + "learning_rate": 5.8745472163582395e-06, + "loss": 2.4882, + "step": 17821 + }, + { + "epoch": 1.4383019933822936, + "grad_norm": 0.668228030204773, + "learning_rate": 5.86921718610105e-06, + "loss": 2.3824, + "step": 17822 + }, + { + "epoch": 1.4383826971188767, + "grad_norm": 0.6851341128349304, + "learning_rate": 5.863889501829034e-06, + "loss": 2.3931, + "step": 17823 + }, + { + "epoch": 1.4384634008554595, + "grad_norm": 0.6785841584205627, + "learning_rate": 5.858564163674962e-06, + "loss": 2.4268, + "step": 17824 + }, + { + "epoch": 1.4385441045920426, + "grad_norm": 0.7137345671653748, + "learning_rate": 5.853241171771573e-06, + "loss": 2.3509, + "step": 17825 + }, + { + "epoch": 1.4386248083286257, + "grad_norm": 0.7188790440559387, + "learning_rate": 5.847920526251505e-06, + "loss": 2.422, + "step": 17826 + }, + { + "epoch": 1.4387055120652086, + "grad_norm": 0.6798515915870667, + "learning_rate": 5.842602227247374e-06, + "loss": 2.3917, + "step": 17827 + }, + { + "epoch": 1.4387862158017917, + "grad_norm": 0.7113839387893677, + "learning_rate": 5.837286274891718e-06, + "loss": 2.4119, + "step": 17828 + }, + { + "epoch": 1.4388669195383748, + "grad_norm": 0.6735878586769104, + "learning_rate": 5.831972669317054e-06, + "loss": 2.3973, + "step": 17829 + }, + { + "epoch": 1.4389476232749576, + "grad_norm": 0.6665332913398743, + "learning_rate": 5.8266614106557645e-06, + "loss": 2.3567, + "step": 17830 + }, + { + "epoch": 1.4390283270115407, + "grad_norm": 0.6652774214744568, + "learning_rate": 5.821352499040256e-06, + "loss": 2.4022, + "step": 17831 + }, + { + "epoch": 1.4391090307481236, + "grad_norm": 0.672563910484314, + "learning_rate": 5.8160459346028205e-06, + "loss": 2.4142, + "step": 17832 + }, + { + "epoch": 1.4391897344847067, + "grad_norm": 0.6333127021789551, + "learning_rate": 5.8107417174757205e-06, + "loss": 2.3679, + "step": 17833 + }, + { + "epoch": 1.4392704382212895, + "grad_norm": 0.7484139204025269, + "learning_rate": 5.80543984779115e-06, + "loss": 2.408, + "step": 17834 + }, + { + "epoch": 1.4393511419578726, + "grad_norm": 0.687872052192688, + "learning_rate": 5.800140325681269e-06, + "loss": 2.3956, + "step": 17835 + }, + { + "epoch": 1.4394318456944557, + "grad_norm": 0.716371476650238, + "learning_rate": 5.794843151278107e-06, + "loss": 2.4134, + "step": 17836 + }, + { + "epoch": 1.4395125494310386, + "grad_norm": 0.7058377265930176, + "learning_rate": 5.789548324713711e-06, + "loss": 2.3758, + "step": 17837 + }, + { + "epoch": 1.4395932531676217, + "grad_norm": 0.6678213477134705, + "learning_rate": 5.784255846120057e-06, + "loss": 2.437, + "step": 17838 + }, + { + "epoch": 1.4396739569042047, + "grad_norm": 0.659657895565033, + "learning_rate": 5.778965715629015e-06, + "loss": 2.4551, + "step": 17839 + }, + { + "epoch": 1.4397546606407876, + "grad_norm": 0.7233473062515259, + "learning_rate": 5.773677933372445e-06, + "loss": 2.422, + "step": 17840 + }, + { + "epoch": 1.4398353643773707, + "grad_norm": 0.6661399006843567, + "learning_rate": 5.768392499482144e-06, + "loss": 2.4354, + "step": 17841 + }, + { + "epoch": 1.4399160681139538, + "grad_norm": 0.700758695602417, + "learning_rate": 5.763109414089807e-06, + "loss": 2.4248, + "step": 17842 + }, + { + "epoch": 1.4399967718505366, + "grad_norm": 0.7119004130363464, + "learning_rate": 5.757828677327104e-06, + "loss": 2.4281, + "step": 17843 + }, + { + "epoch": 1.4400774755871197, + "grad_norm": 0.6928756237030029, + "learning_rate": 5.752550289325687e-06, + "loss": 2.431, + "step": 17844 + }, + { + "epoch": 1.4401581793237028, + "grad_norm": 0.7062112092971802, + "learning_rate": 5.747274250217094e-06, + "loss": 2.3986, + "step": 17845 + }, + { + "epoch": 1.4402388830602857, + "grad_norm": 0.7257757782936096, + "learning_rate": 5.742000560132787e-06, + "loss": 2.398, + "step": 17846 + }, + { + "epoch": 1.4403195867968688, + "grad_norm": 0.7206892371177673, + "learning_rate": 5.736729219204218e-06, + "loss": 2.4126, + "step": 17847 + }, + { + "epoch": 1.4404002905334516, + "grad_norm": 0.6752306818962097, + "learning_rate": 5.73146022756278e-06, + "loss": 2.3732, + "step": 17848 + }, + { + "epoch": 1.4404809942700347, + "grad_norm": 0.6507758498191833, + "learning_rate": 5.726193585339756e-06, + "loss": 2.42, + "step": 17849 + }, + { + "epoch": 1.4405616980066176, + "grad_norm": 0.6858177781105042, + "learning_rate": 5.7209292926664325e-06, + "loss": 2.3956, + "step": 17850 + }, + { + "epoch": 1.4406424017432007, + "grad_norm": 0.7283064723014832, + "learning_rate": 5.715667349674003e-06, + "loss": 2.4295, + "step": 17851 + }, + { + "epoch": 1.4407231054797838, + "grad_norm": 0.7306254506111145, + "learning_rate": 5.710407756493597e-06, + "loss": 2.4017, + "step": 17852 + }, + { + "epoch": 1.4408038092163666, + "grad_norm": 0.6728531122207642, + "learning_rate": 5.7051505132562965e-06, + "loss": 2.3767, + "step": 17853 + }, + { + "epoch": 1.4408845129529497, + "grad_norm": 0.6739331483840942, + "learning_rate": 5.699895620093143e-06, + "loss": 2.4215, + "step": 17854 + }, + { + "epoch": 1.4409652166895328, + "grad_norm": 0.6646329760551453, + "learning_rate": 5.6946430771350975e-06, + "loss": 2.3565, + "step": 17855 + }, + { + "epoch": 1.4410459204261157, + "grad_norm": 0.7297715544700623, + "learning_rate": 5.6893928845130565e-06, + "loss": 2.4182, + "step": 17856 + }, + { + "epoch": 1.4411266241626988, + "grad_norm": 0.7202762961387634, + "learning_rate": 5.684145042357891e-06, + "loss": 2.4061, + "step": 17857 + }, + { + "epoch": 1.4412073278992819, + "grad_norm": 0.6860011219978333, + "learning_rate": 5.678899550800354e-06, + "loss": 2.4116, + "step": 17858 + }, + { + "epoch": 1.4412880316358647, + "grad_norm": 0.8249632120132446, + "learning_rate": 5.6736564099712064e-06, + "loss": 2.44, + "step": 17859 + }, + { + "epoch": 1.4413687353724478, + "grad_norm": 0.6403428912162781, + "learning_rate": 5.668415620001111e-06, + "loss": 2.4067, + "step": 17860 + }, + { + "epoch": 1.4414494391090307, + "grad_norm": 0.7119578123092651, + "learning_rate": 5.663177181020696e-06, + "loss": 2.4161, + "step": 17861 + }, + { + "epoch": 1.4415301428456138, + "grad_norm": 0.6670625805854797, + "learning_rate": 5.65794109316049e-06, + "loss": 2.4548, + "step": 17862 + }, + { + "epoch": 1.4416108465821966, + "grad_norm": 0.7028807997703552, + "learning_rate": 5.652707356551001e-06, + "loss": 2.4008, + "step": 17863 + }, + { + "epoch": 1.4416915503187797, + "grad_norm": 0.7150121331214905, + "learning_rate": 5.64747597132268e-06, + "loss": 2.3776, + "step": 17864 + }, + { + "epoch": 1.4417722540553628, + "grad_norm": 0.6778405904769897, + "learning_rate": 5.642246937605888e-06, + "loss": 2.4485, + "step": 17865 + }, + { + "epoch": 1.4418529577919457, + "grad_norm": 0.7118825316429138, + "learning_rate": 5.637020255530967e-06, + "loss": 2.3808, + "step": 17866 + }, + { + "epoch": 1.4419336615285288, + "grad_norm": 0.7020435929298401, + "learning_rate": 5.631795925228178e-06, + "loss": 2.3947, + "step": 17867 + }, + { + "epoch": 1.4420143652651118, + "grad_norm": 0.6727933287620544, + "learning_rate": 5.626573946827696e-06, + "loss": 2.3789, + "step": 17868 + }, + { + "epoch": 1.4420950690016947, + "grad_norm": 0.7938553690910339, + "learning_rate": 5.621354320459693e-06, + "loss": 2.4262, + "step": 17869 + }, + { + "epoch": 1.4421757727382778, + "grad_norm": 0.6903455853462219, + "learning_rate": 5.616137046254255e-06, + "loss": 2.3382, + "step": 17870 + }, + { + "epoch": 1.4422564764748609, + "grad_norm": 0.6873618960380554, + "learning_rate": 5.6109221243414e-06, + "loss": 2.3795, + "step": 17871 + }, + { + "epoch": 1.4423371802114437, + "grad_norm": 0.667328953742981, + "learning_rate": 5.60570955485109e-06, + "loss": 2.4353, + "step": 17872 + }, + { + "epoch": 1.4424178839480268, + "grad_norm": 0.7091758847236633, + "learning_rate": 5.600499337913256e-06, + "loss": 2.3897, + "step": 17873 + }, + { + "epoch": 1.44249858768461, + "grad_norm": 0.6954033374786377, + "learning_rate": 5.5952914736577375e-06, + "loss": 2.4334, + "step": 17874 + }, + { + "epoch": 1.4425792914211928, + "grad_norm": 0.692724347114563, + "learning_rate": 5.590085962214331e-06, + "loss": 2.3355, + "step": 17875 + }, + { + "epoch": 1.4426599951577759, + "grad_norm": 0.7159389853477478, + "learning_rate": 5.584882803712777e-06, + "loss": 2.4425, + "step": 17876 + }, + { + "epoch": 1.4427406988943587, + "grad_norm": 0.7154572606086731, + "learning_rate": 5.579681998282759e-06, + "loss": 2.4353, + "step": 17877 + }, + { + "epoch": 1.4428214026309418, + "grad_norm": 0.6575120687484741, + "learning_rate": 5.574483546053866e-06, + "loss": 2.4038, + "step": 17878 + }, + { + "epoch": 1.4429021063675247, + "grad_norm": 0.7108171582221985, + "learning_rate": 5.56928744715568e-06, + "loss": 2.3661, + "step": 17879 + }, + { + "epoch": 1.4429828101041078, + "grad_norm": 0.7755489349365234, + "learning_rate": 5.564093701717698e-06, + "loss": 2.4026, + "step": 17880 + }, + { + "epoch": 1.4430635138406909, + "grad_norm": 0.7044881582260132, + "learning_rate": 5.5589023098693625e-06, + "loss": 2.433, + "step": 17881 + }, + { + "epoch": 1.4431442175772737, + "grad_norm": 0.6959014534950256, + "learning_rate": 5.553713271740035e-06, + "loss": 2.3399, + "step": 17882 + }, + { + "epoch": 1.4432249213138568, + "grad_norm": 0.6273486614227295, + "learning_rate": 5.5485265874590685e-06, + "loss": 2.4085, + "step": 17883 + }, + { + "epoch": 1.44330562505044, + "grad_norm": 0.711344301700592, + "learning_rate": 5.5433422571557145e-06, + "loss": 2.5058, + "step": 17884 + }, + { + "epoch": 1.4433863287870228, + "grad_norm": 0.7118481397628784, + "learning_rate": 5.5381602809591815e-06, + "loss": 2.4213, + "step": 17885 + }, + { + "epoch": 1.4434670325236059, + "grad_norm": 0.6486421227455139, + "learning_rate": 5.5329806589986435e-06, + "loss": 2.4225, + "step": 17886 + }, + { + "epoch": 1.443547736260189, + "grad_norm": 0.6768030524253845, + "learning_rate": 5.527803391403141e-06, + "loss": 2.4155, + "step": 17887 + }, + { + "epoch": 1.4436284399967718, + "grad_norm": 0.6921476721763611, + "learning_rate": 5.522628478301739e-06, + "loss": 2.4487, + "step": 17888 + }, + { + "epoch": 1.443709143733355, + "grad_norm": 0.6598425507545471, + "learning_rate": 5.517455919823411e-06, + "loss": 2.3929, + "step": 17889 + }, + { + "epoch": 1.443789847469938, + "grad_norm": 0.6784876585006714, + "learning_rate": 5.512285716097043e-06, + "loss": 2.4357, + "step": 17890 + }, + { + "epoch": 1.4438705512065209, + "grad_norm": 0.6828306913375854, + "learning_rate": 5.507117867251521e-06, + "loss": 2.3931, + "step": 17891 + }, + { + "epoch": 1.443951254943104, + "grad_norm": 0.708244800567627, + "learning_rate": 5.5019523734156195e-06, + "loss": 2.3955, + "step": 17892 + }, + { + "epoch": 1.4440319586796868, + "grad_norm": 0.7499315142631531, + "learning_rate": 5.496789234718081e-06, + "loss": 2.4862, + "step": 17893 + }, + { + "epoch": 1.44411266241627, + "grad_norm": 0.6969838738441467, + "learning_rate": 5.491628451287601e-06, + "loss": 2.4367, + "step": 17894 + }, + { + "epoch": 1.4441933661528528, + "grad_norm": 0.6904775500297546, + "learning_rate": 5.486470023252777e-06, + "loss": 2.4772, + "step": 17895 + }, + { + "epoch": 1.4442740698894359, + "grad_norm": 0.7058213949203491, + "learning_rate": 5.481313950742195e-06, + "loss": 2.4059, + "step": 17896 + }, + { + "epoch": 1.444354773626019, + "grad_norm": 0.6824650764465332, + "learning_rate": 5.4761602338843425e-06, + "loss": 2.4058, + "step": 17897 + }, + { + "epoch": 1.4444354773626018, + "grad_norm": 0.6874315738677979, + "learning_rate": 5.471008872807648e-06, + "loss": 2.4055, + "step": 17898 + }, + { + "epoch": 1.444516181099185, + "grad_norm": 0.7096625566482544, + "learning_rate": 5.465859867640544e-06, + "loss": 2.4319, + "step": 17899 + }, + { + "epoch": 1.444596884835768, + "grad_norm": 0.6456719636917114, + "learning_rate": 5.460713218511304e-06, + "loss": 2.3403, + "step": 17900 + }, + { + "epoch": 1.4446775885723508, + "grad_norm": 0.6711640357971191, + "learning_rate": 5.4555689255482156e-06, + "loss": 2.4333, + "step": 17901 + }, + { + "epoch": 1.444758292308934, + "grad_norm": 0.6594802737236023, + "learning_rate": 5.450426988879509e-06, + "loss": 2.4027, + "step": 17902 + }, + { + "epoch": 1.444838996045517, + "grad_norm": 0.6931496858596802, + "learning_rate": 5.445287408633304e-06, + "loss": 2.4085, + "step": 17903 + }, + { + "epoch": 1.4449196997820999, + "grad_norm": 0.6932462453842163, + "learning_rate": 5.440150184937709e-06, + "loss": 2.3989, + "step": 17904 + }, + { + "epoch": 1.445000403518683, + "grad_norm": 0.7502899765968323, + "learning_rate": 5.435015317920744e-06, + "loss": 2.4083, + "step": 17905 + }, + { + "epoch": 1.4450811072552658, + "grad_norm": 0.6513844132423401, + "learning_rate": 5.429882807710396e-06, + "loss": 2.3895, + "step": 17906 + }, + { + "epoch": 1.445161810991849, + "grad_norm": 0.6809015274047852, + "learning_rate": 5.4247526544345835e-06, + "loss": 2.3957, + "step": 17907 + }, + { + "epoch": 1.4452425147284318, + "grad_norm": 0.6784202456474304, + "learning_rate": 5.419624858221151e-06, + "loss": 2.3735, + "step": 17908 + }, + { + "epoch": 1.4453232184650149, + "grad_norm": 0.8005407452583313, + "learning_rate": 5.414499419197916e-06, + "loss": 2.3888, + "step": 17909 + }, + { + "epoch": 1.445403922201598, + "grad_norm": 0.7133296728134155, + "learning_rate": 5.409376337492589e-06, + "loss": 2.4347, + "step": 17910 + }, + { + "epoch": 1.4454846259381808, + "grad_norm": 0.6852008104324341, + "learning_rate": 5.404255613232867e-06, + "loss": 2.4154, + "step": 17911 + }, + { + "epoch": 1.445565329674764, + "grad_norm": 0.7864294648170471, + "learning_rate": 5.399137246546393e-06, + "loss": 2.4104, + "step": 17912 + }, + { + "epoch": 1.445646033411347, + "grad_norm": 0.7150406837463379, + "learning_rate": 5.394021237560687e-06, + "loss": 2.4423, + "step": 17913 + }, + { + "epoch": 1.4457267371479299, + "grad_norm": 0.6756410598754883, + "learning_rate": 5.388907586403269e-06, + "loss": 2.4038, + "step": 17914 + }, + { + "epoch": 1.445807440884513, + "grad_norm": 0.662440836429596, + "learning_rate": 5.383796293201604e-06, + "loss": 2.3529, + "step": 17915 + }, + { + "epoch": 1.445888144621096, + "grad_norm": 0.7391942739486694, + "learning_rate": 5.378687358083057e-06, + "loss": 2.4062, + "step": 17916 + }, + { + "epoch": 1.445968848357679, + "grad_norm": 0.762143611907959, + "learning_rate": 5.373580781174958e-06, + "loss": 2.4344, + "step": 17917 + }, + { + "epoch": 1.446049552094262, + "grad_norm": 0.7365298867225647, + "learning_rate": 5.368476562604608e-06, + "loss": 2.4144, + "step": 17918 + }, + { + "epoch": 1.446130255830845, + "grad_norm": 0.7313491702079773, + "learning_rate": 5.3633747024991685e-06, + "loss": 2.3671, + "step": 17919 + }, + { + "epoch": 1.446210959567428, + "grad_norm": 0.7121514081954956, + "learning_rate": 5.358275200985818e-06, + "loss": 2.3573, + "step": 17920 + }, + { + "epoch": 1.446291663304011, + "grad_norm": 0.6716858744621277, + "learning_rate": 5.353178058191643e-06, + "loss": 2.4398, + "step": 17921 + }, + { + "epoch": 1.446372367040594, + "grad_norm": 0.7036706805229187, + "learning_rate": 5.348083274243687e-06, + "loss": 2.3913, + "step": 17922 + }, + { + "epoch": 1.446453070777177, + "grad_norm": 0.7855868935585022, + "learning_rate": 5.342990849268914e-06, + "loss": 2.4195, + "step": 17923 + }, + { + "epoch": 1.4465337745137599, + "grad_norm": 0.627890408039093, + "learning_rate": 5.337900783394245e-06, + "loss": 2.3954, + "step": 17924 + }, + { + "epoch": 1.446614478250343, + "grad_norm": 0.7047661542892456, + "learning_rate": 5.332813076746535e-06, + "loss": 2.5015, + "step": 17925 + }, + { + "epoch": 1.446695181986926, + "grad_norm": 0.6752549409866333, + "learning_rate": 5.327727729452592e-06, + "loss": 2.4384, + "step": 17926 + }, + { + "epoch": 1.446775885723509, + "grad_norm": 0.8034621477127075, + "learning_rate": 5.322644741639138e-06, + "loss": 2.444, + "step": 17927 + }, + { + "epoch": 1.446856589460092, + "grad_norm": 0.7055982947349548, + "learning_rate": 5.317564113432882e-06, + "loss": 2.4228, + "step": 17928 + }, + { + "epoch": 1.446937293196675, + "grad_norm": 0.7311068177223206, + "learning_rate": 5.312485844960424e-06, + "loss": 2.3979, + "step": 17929 + }, + { + "epoch": 1.447017996933258, + "grad_norm": 0.7067704796791077, + "learning_rate": 5.307409936348329e-06, + "loss": 2.3724, + "step": 17930 + }, + { + "epoch": 1.447098700669841, + "grad_norm": 0.7303062677383423, + "learning_rate": 5.302336387723128e-06, + "loss": 2.444, + "step": 17931 + }, + { + "epoch": 1.4471794044064241, + "grad_norm": 0.7445392608642578, + "learning_rate": 5.297265199211232e-06, + "loss": 2.4629, + "step": 17932 + }, + { + "epoch": 1.447260108143007, + "grad_norm": 0.6778857707977295, + "learning_rate": 5.2921963709390394e-06, + "loss": 2.3836, + "step": 17933 + }, + { + "epoch": 1.44734081187959, + "grad_norm": 0.6575925350189209, + "learning_rate": 5.287129903032873e-06, + "loss": 2.3851, + "step": 17934 + }, + { + "epoch": 1.4474215156161732, + "grad_norm": 0.736710250377655, + "learning_rate": 5.282065795619029e-06, + "loss": 2.4644, + "step": 17935 + }, + { + "epoch": 1.447502219352756, + "grad_norm": 0.6607224941253662, + "learning_rate": 5.277004048823686e-06, + "loss": 2.3838, + "step": 17936 + }, + { + "epoch": 1.4475829230893391, + "grad_norm": 0.6364536881446838, + "learning_rate": 5.271944662773021e-06, + "loss": 2.3929, + "step": 17937 + }, + { + "epoch": 1.447663626825922, + "grad_norm": 0.7810595631599426, + "learning_rate": 5.266887637593121e-06, + "loss": 2.3823, + "step": 17938 + }, + { + "epoch": 1.447744330562505, + "grad_norm": 0.6959996819496155, + "learning_rate": 5.261832973410008e-06, + "loss": 2.4392, + "step": 17939 + }, + { + "epoch": 1.447825034299088, + "grad_norm": 0.7112187147140503, + "learning_rate": 5.256780670349659e-06, + "loss": 2.356, + "step": 17940 + }, + { + "epoch": 1.447905738035671, + "grad_norm": 0.7003504633903503, + "learning_rate": 5.251730728538018e-06, + "loss": 2.4182, + "step": 17941 + }, + { + "epoch": 1.447986441772254, + "grad_norm": 0.7685346603393555, + "learning_rate": 5.246683148100906e-06, + "loss": 2.3814, + "step": 17942 + }, + { + "epoch": 1.448067145508837, + "grad_norm": 0.6874574422836304, + "learning_rate": 5.2416379291641336e-06, + "loss": 2.5082, + "step": 17943 + }, + { + "epoch": 1.44814784924542, + "grad_norm": 0.6901064515113831, + "learning_rate": 5.236595071853456e-06, + "loss": 2.484, + "step": 17944 + }, + { + "epoch": 1.4482285529820031, + "grad_norm": 0.7325465083122253, + "learning_rate": 5.231554576294528e-06, + "loss": 2.3479, + "step": 17945 + }, + { + "epoch": 1.448309256718586, + "grad_norm": 0.6547845005989075, + "learning_rate": 5.226516442612994e-06, + "loss": 2.4001, + "step": 17946 + }, + { + "epoch": 1.448389960455169, + "grad_norm": 0.7091573476791382, + "learning_rate": 5.221480670934431e-06, + "loss": 2.3743, + "step": 17947 + }, + { + "epoch": 1.4484706641917522, + "grad_norm": 0.6750717163085938, + "learning_rate": 5.216447261384306e-06, + "loss": 2.3841, + "step": 17948 + }, + { + "epoch": 1.448551367928335, + "grad_norm": 0.682778537273407, + "learning_rate": 5.2114162140880715e-06, + "loss": 2.3735, + "step": 17949 + }, + { + "epoch": 1.4486320716649181, + "grad_norm": 0.702796995639801, + "learning_rate": 5.206387529171153e-06, + "loss": 2.397, + "step": 17950 + }, + { + "epoch": 1.448712775401501, + "grad_norm": 0.7154842615127563, + "learning_rate": 5.2013612067588254e-06, + "loss": 2.4072, + "step": 17951 + }, + { + "epoch": 1.448793479138084, + "grad_norm": 0.7017061710357666, + "learning_rate": 5.1963372469763905e-06, + "loss": 2.3638, + "step": 17952 + }, + { + "epoch": 1.448874182874667, + "grad_norm": 0.7153539657592773, + "learning_rate": 5.191315649949047e-06, + "loss": 2.4159, + "step": 17953 + }, + { + "epoch": 1.44895488661125, + "grad_norm": 0.7425200939178467, + "learning_rate": 5.1862964158019615e-06, + "loss": 2.3536, + "step": 17954 + }, + { + "epoch": 1.4490355903478331, + "grad_norm": 0.6961267590522766, + "learning_rate": 5.1812795446602115e-06, + "loss": 2.4257, + "step": 17955 + }, + { + "epoch": 1.449116294084416, + "grad_norm": 0.6912462115287781, + "learning_rate": 5.176265036648808e-06, + "loss": 2.4573, + "step": 17956 + }, + { + "epoch": 1.449196997820999, + "grad_norm": 0.7435596585273743, + "learning_rate": 5.171252891892786e-06, + "loss": 2.4134, + "step": 17957 + }, + { + "epoch": 1.4492777015575822, + "grad_norm": 0.7270591259002686, + "learning_rate": 5.166243110517011e-06, + "loss": 2.3162, + "step": 17958 + }, + { + "epoch": 1.449358405294165, + "grad_norm": 0.6728709936141968, + "learning_rate": 5.161235692646349e-06, + "loss": 2.3991, + "step": 17959 + }, + { + "epoch": 1.4494391090307481, + "grad_norm": 0.6676486134529114, + "learning_rate": 5.156230638405624e-06, + "loss": 2.4215, + "step": 17960 + }, + { + "epoch": 1.4495198127673312, + "grad_norm": 0.7242336869239807, + "learning_rate": 5.1512279479195455e-06, + "loss": 2.4144, + "step": 17961 + }, + { + "epoch": 1.449600516503914, + "grad_norm": 0.6936756372451782, + "learning_rate": 5.146227621312804e-06, + "loss": 2.3752, + "step": 17962 + }, + { + "epoch": 1.4496812202404972, + "grad_norm": 0.7574671506881714, + "learning_rate": 5.141229658710034e-06, + "loss": 2.4536, + "step": 17963 + }, + { + "epoch": 1.4497619239770803, + "grad_norm": 0.6585906147956848, + "learning_rate": 5.136234060235767e-06, + "loss": 2.4192, + "step": 17964 + }, + { + "epoch": 1.4498426277136631, + "grad_norm": 0.7344881296157837, + "learning_rate": 5.131240826014516e-06, + "loss": 2.375, + "step": 17965 + }, + { + "epoch": 1.4499233314502462, + "grad_norm": 0.6896358132362366, + "learning_rate": 5.126249956170748e-06, + "loss": 2.3417, + "step": 17966 + }, + { + "epoch": 1.450004035186829, + "grad_norm": 0.7076104283332825, + "learning_rate": 5.1212614508288185e-06, + "loss": 2.4131, + "step": 17967 + }, + { + "epoch": 1.4500847389234122, + "grad_norm": 0.6901896595954895, + "learning_rate": 5.116275310113083e-06, + "loss": 2.4232, + "step": 17968 + }, + { + "epoch": 1.450165442659995, + "grad_norm": 0.7986876964569092, + "learning_rate": 5.111291534147788e-06, + "loss": 2.4545, + "step": 17969 + }, + { + "epoch": 1.4502461463965781, + "grad_norm": 0.723733127117157, + "learning_rate": 5.106310123057167e-06, + "loss": 2.3816, + "step": 17970 + }, + { + "epoch": 1.4503268501331612, + "grad_norm": 0.6440990567207336, + "learning_rate": 5.101331076965332e-06, + "loss": 2.3819, + "step": 17971 + }, + { + "epoch": 1.450407553869744, + "grad_norm": 0.718396782875061, + "learning_rate": 5.096354395996405e-06, + "loss": 2.406, + "step": 17972 + }, + { + "epoch": 1.4504882576063272, + "grad_norm": 0.6515427231788635, + "learning_rate": 5.0913800802744105e-06, + "loss": 2.4555, + "step": 17973 + }, + { + "epoch": 1.4505689613429102, + "grad_norm": 0.7006518244743347, + "learning_rate": 5.0864081299233035e-06, + "loss": 2.3532, + "step": 17974 + }, + { + "epoch": 1.4506496650794931, + "grad_norm": 0.6596084237098694, + "learning_rate": 5.081438545067019e-06, + "loss": 2.3521, + "step": 17975 + }, + { + "epoch": 1.4507303688160762, + "grad_norm": 0.7091804146766663, + "learning_rate": 5.076471325829413e-06, + "loss": 2.397, + "step": 17976 + }, + { + "epoch": 1.4508110725526593, + "grad_norm": 0.6768068671226501, + "learning_rate": 5.071506472334264e-06, + "loss": 2.3692, + "step": 17977 + }, + { + "epoch": 1.4508917762892422, + "grad_norm": 0.6937921643257141, + "learning_rate": 5.066543984705318e-06, + "loss": 2.4674, + "step": 17978 + }, + { + "epoch": 1.4509724800258252, + "grad_norm": 0.6987953186035156, + "learning_rate": 5.061583863066266e-06, + "loss": 2.388, + "step": 17979 + }, + { + "epoch": 1.4510531837624083, + "grad_norm": 0.7390346527099609, + "learning_rate": 5.056626107540708e-06, + "loss": 2.4279, + "step": 17980 + }, + { + "epoch": 1.4511338874989912, + "grad_norm": 0.6433011889457703, + "learning_rate": 5.05167071825221e-06, + "loss": 2.3897, + "step": 17981 + }, + { + "epoch": 1.4512145912355743, + "grad_norm": 0.6530279517173767, + "learning_rate": 5.046717695324288e-06, + "loss": 2.3794, + "step": 17982 + }, + { + "epoch": 1.4512952949721571, + "grad_norm": 0.7322575449943542, + "learning_rate": 5.041767038880363e-06, + "loss": 2.3391, + "step": 17983 + }, + { + "epoch": 1.4513759987087402, + "grad_norm": 0.7013799548149109, + "learning_rate": 5.036818749043825e-06, + "loss": 2.417, + "step": 17984 + }, + { + "epoch": 1.451456702445323, + "grad_norm": 0.6833368539810181, + "learning_rate": 5.031872825937989e-06, + "loss": 2.4109, + "step": 17985 + }, + { + "epoch": 1.4515374061819062, + "grad_norm": 0.6758227348327637, + "learning_rate": 5.026929269686143e-06, + "loss": 2.3913, + "step": 17986 + }, + { + "epoch": 1.4516181099184893, + "grad_norm": 0.6799556016921997, + "learning_rate": 5.021988080411477e-06, + "loss": 2.3963, + "step": 17987 + }, + { + "epoch": 1.4516988136550721, + "grad_norm": 0.670512318611145, + "learning_rate": 5.01704925823715e-06, + "loss": 2.4372, + "step": 17988 + }, + { + "epoch": 1.4517795173916552, + "grad_norm": 0.7226561903953552, + "learning_rate": 5.01211280328625e-06, + "loss": 2.3723, + "step": 17989 + }, + { + "epoch": 1.4518602211282383, + "grad_norm": 0.7119970917701721, + "learning_rate": 5.007178715681793e-06, + "loss": 2.454, + "step": 17990 + }, + { + "epoch": 1.4519409248648212, + "grad_norm": 0.670310378074646, + "learning_rate": 5.002246995546744e-06, + "loss": 2.4751, + "step": 17991 + }, + { + "epoch": 1.4520216286014043, + "grad_norm": 0.6663460731506348, + "learning_rate": 4.9973176430040515e-06, + "loss": 2.4779, + "step": 17992 + }, + { + "epoch": 1.4521023323379874, + "grad_norm": 0.72465980052948, + "learning_rate": 4.992390658176526e-06, + "loss": 2.429, + "step": 17993 + }, + { + "epoch": 1.4521830360745702, + "grad_norm": 0.7189087867736816, + "learning_rate": 4.987466041186972e-06, + "loss": 2.4086, + "step": 17994 + }, + { + "epoch": 1.4522637398111533, + "grad_norm": 0.6699924468994141, + "learning_rate": 4.982543792158134e-06, + "loss": 2.3932, + "step": 17995 + }, + { + "epoch": 1.4523444435477364, + "grad_norm": 0.6420440077781677, + "learning_rate": 4.977623911212681e-06, + "loss": 2.4164, + "step": 17996 + }, + { + "epoch": 1.4524251472843193, + "grad_norm": 0.6452329754829407, + "learning_rate": 4.972706398473237e-06, + "loss": 2.3391, + "step": 17997 + }, + { + "epoch": 1.4525058510209023, + "grad_norm": 0.6906129121780396, + "learning_rate": 4.967791254062359e-06, + "loss": 2.4345, + "step": 17998 + }, + { + "epoch": 1.4525865547574852, + "grad_norm": 0.6918602585792542, + "learning_rate": 4.96287847810254e-06, + "loss": 2.3304, + "step": 17999 + }, + { + "epoch": 1.4526672584940683, + "grad_norm": 0.727873682975769, + "learning_rate": 4.957968070716201e-06, + "loss": 2.417, + "step": 18000 + }, + { + "epoch": 1.4526672584940683, + "eval_loss": 2.3678998947143555, + "eval_runtime": 764.534, + "eval_samples_per_second": 3.427, + "eval_steps_per_second": 0.572, + "step": 18000 + } + ], + "logging_steps": 1, + "max_steps": 20000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.06902713549312e+17, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-18000/training_args.bin b/out/checkpoint-18000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae4a8b118e2a671c30e37a5d24a42d8090b49055 --- /dev/null +++ b/out/checkpoint-18000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2928f4418c9a306cbe65ca0c1b156ae660c125ec9122008a9f527a50891704 +size 5112 diff --git a/out/checkpoint-19000/config.json b/out/checkpoint-19000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..16f06bb1cdbf882eb90d57ea1906b3790e298a3f --- /dev/null +++ b/out/checkpoint-19000/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "./models/checkpoint-10000", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1877, + "pad_token_id": 1026, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 6027 +} diff --git a/out/checkpoint-19000/generation_config.json b/out/checkpoint-19000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..51f4dbe1c89cfa9da69401685604ff16254d9d20 --- /dev/null +++ b/out/checkpoint-19000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "pad_token_id": 1026, + "transformers_version": "4.41.2" +} diff --git a/out/checkpoint-19000/model.safetensors b/out/checkpoint-19000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2465237b2e8e78562759b53a560daf484ab42f79 --- /dev/null +++ b/out/checkpoint-19000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e11468e1acb3fae967bb255a9fa4df68708ee3d58154ff4fa1e59f0c0b958a +size 364520064 diff --git a/out/checkpoint-19000/optimizer.pt b/out/checkpoint-19000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..25cfe3b6d769e0afcb8ba259e0cde1666c189e86 --- /dev/null +++ b/out/checkpoint-19000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:662d02c50a885048d10fc2072649021701469d88c40afebf2f833e9be89aa710 +size 729134010 diff --git a/out/checkpoint-19000/rng_state.pth b/out/checkpoint-19000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf276e90a5de0fbd28a4a5a4d9061722a9f4d928 --- /dev/null +++ b/out/checkpoint-19000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e1e994b500aad62a1a11a9a4c291a77ef34a0bdb5ac056c07bfd1c2d3241f0 +size 14244 diff --git a/out/checkpoint-19000/scheduler.pt b/out/checkpoint-19000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..81e06b99f21f43e14fee5ec224478e59d03994d9 --- /dev/null +++ b/out/checkpoint-19000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8dbd9359bf168f116b41245dd391aa41b350b02d0581ee33c1787e65d68039 +size 1064 diff --git a/out/checkpoint-19000/special_tokens_map.json b/out/checkpoint-19000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9fa6207c25267215ce16bfacdcb9089df3e897 --- /dev/null +++ b/out/checkpoint-19000/special_tokens_map.json @@ -0,0 +1,9 @@ +{ + "pad_token": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-19000/tokenizer.json b/out/checkpoint-19000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..2bf66a33fda75b69f9b1a9597987f418f5acfb49 --- /dev/null +++ b/out/checkpoint-19000/tokenizer.json @@ -0,0 +1,20279 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|audio:0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|audio:1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|audio:2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "<|audio:3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "<|audio:4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 5, + "content": "<|audio:5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 6, + "content": "<|audio:6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 7, + "content": "<|audio:7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 8, + "content": "<|audio:8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 9, + "content": "<|audio:9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 10, + "content": "<|audio:10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 11, + "content": "<|audio:11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 12, + "content": "<|audio:12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 13, + "content": "<|audio:13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 14, + "content": "<|audio:14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 15, + "content": "<|audio:15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 16, + "content": "<|audio:16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 17, + "content": "<|audio:17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 18, + "content": "<|audio:18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 19, + "content": "<|audio:19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 20, + "content": "<|audio:20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 21, + "content": "<|audio:21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 22, + "content": "<|audio:22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 23, + "content": "<|audio:23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 24, + "content": "<|audio:24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 25, + "content": "<|audio:25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 26, + "content": "<|audio:26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 27, + "content": "<|audio:27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 28, + "content": "<|audio:28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 29, + "content": "<|audio:29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 30, + "content": "<|audio:30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 31, + "content": "<|audio:31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 32, + "content": "<|audio:32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 33, + "content": "<|audio:33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 34, + "content": "<|audio:34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 35, + "content": "<|audio:35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 36, + "content": "<|audio:36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 37, + "content": "<|audio:37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 38, + "content": "<|audio:38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 39, + "content": "<|audio:39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 40, + "content": "<|audio:40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 41, + "content": "<|audio:41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 42, + "content": "<|audio:42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 43, + "content": "<|audio:43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 44, + "content": "<|audio:44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 45, + "content": "<|audio:45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 46, + "content": "<|audio:46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 47, + "content": "<|audio:47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 48, + "content": "<|audio:48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 49, + "content": "<|audio:49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 50, + "content": "<|audio:50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 51, + "content": "<|audio:51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 52, + "content": "<|audio:52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 53, + "content": "<|audio:53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 54, + "content": "<|audio:54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 55, + "content": "<|audio:55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 56, + "content": "<|audio:56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 57, + "content": "<|audio:57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 58, + "content": "<|audio:58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 59, + "content": "<|audio:59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 60, + "content": "<|audio:60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 61, + "content": "<|audio:61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 62, + "content": "<|audio:62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 63, + "content": "<|audio:63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 64, + "content": "<|audio:64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 65, + "content": "<|audio:65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 66, + "content": "<|audio:66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 67, + "content": "<|audio:67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 68, + "content": "<|audio:68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 69, + "content": "<|audio:69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 70, + "content": "<|audio:70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 71, + "content": "<|audio:71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 72, + "content": "<|audio:72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 73, + "content": "<|audio:73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 74, + "content": "<|audio:74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 75, + "content": "<|audio:75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 76, + "content": "<|audio:76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 77, + "content": "<|audio:77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 78, + "content": "<|audio:78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 79, + "content": "<|audio:79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 80, + "content": "<|audio:80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 81, + "content": "<|audio:81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 82, + "content": "<|audio:82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 83, + "content": "<|audio:83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 84, + "content": "<|audio:84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 85, + "content": "<|audio:85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 86, + "content": "<|audio:86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 87, + "content": "<|audio:87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 88, + "content": "<|audio:88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 89, + "content": "<|audio:89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 90, + "content": "<|audio:90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 91, + "content": "<|audio:91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 92, + "content": "<|audio:92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 93, + "content": "<|audio:93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 94, + "content": "<|audio:94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 95, + "content": "<|audio:95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 96, + "content": "<|audio:96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 97, + "content": "<|audio:97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 98, + "content": "<|audio:98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 99, + "content": "<|audio:99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 100, + "content": "<|audio:100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 101, + "content": "<|audio:101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 102, + "content": "<|audio:102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 103, + "content": "<|audio:103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 104, + "content": "<|audio:104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 105, + "content": "<|audio:105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 106, + "content": "<|audio:106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 107, + "content": "<|audio:107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 108, + "content": "<|audio:108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 109, + "content": "<|audio:109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 110, + "content": "<|audio:110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 111, + "content": "<|audio:111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 112, + "content": "<|audio:112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 113, + "content": "<|audio:113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 114, + "content": "<|audio:114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 115, + "content": "<|audio:115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 116, + "content": "<|audio:116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 117, + "content": "<|audio:117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 118, + "content": "<|audio:118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 119, + "content": "<|audio:119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 120, + "content": "<|audio:120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 121, + "content": "<|audio:121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 122, + "content": "<|audio:122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 123, + "content": "<|audio:123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 124, + "content": "<|audio:124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 125, + "content": "<|audio:125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 126, + "content": "<|audio:126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127, + "content": "<|audio:127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128, + "content": "<|audio:128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 129, + "content": "<|audio:129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 130, + "content": "<|audio:130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 131, + "content": "<|audio:131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 132, + "content": "<|audio:132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 133, + "content": "<|audio:133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 134, + "content": "<|audio:134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 135, + "content": "<|audio:135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 136, + "content": "<|audio:136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 137, + "content": "<|audio:137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 138, + "content": "<|audio:138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 139, + "content": "<|audio:139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 140, + "content": "<|audio:140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 141, + "content": "<|audio:141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 142, + "content": "<|audio:142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 143, + "content": "<|audio:143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 144, + "content": "<|audio:144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 145, + "content": "<|audio:145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 146, + "content": "<|audio:146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 147, + "content": "<|audio:147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 148, + "content": "<|audio:148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 149, + "content": "<|audio:149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 150, + "content": "<|audio:150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 151, + "content": "<|audio:151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 152, + "content": "<|audio:152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 153, + "content": "<|audio:153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 154, + "content": "<|audio:154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 155, + "content": "<|audio:155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 156, + "content": "<|audio:156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 157, + "content": "<|audio:157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 158, + "content": "<|audio:158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 159, + "content": "<|audio:159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 160, + "content": "<|audio:160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 161, + "content": "<|audio:161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 162, + "content": "<|audio:162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 163, + "content": "<|audio:163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 164, + "content": "<|audio:164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 165, + "content": "<|audio:165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 166, + "content": "<|audio:166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 167, + "content": "<|audio:167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 168, + "content": "<|audio:168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 169, + "content": "<|audio:169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 170, + "content": "<|audio:170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 171, + "content": "<|audio:171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 172, + "content": "<|audio:172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 173, + "content": "<|audio:173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 174, + "content": "<|audio:174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 175, + "content": "<|audio:175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 176, + "content": "<|audio:176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 177, + "content": "<|audio:177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 178, + "content": "<|audio:178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 179, + "content": "<|audio:179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 180, + "content": "<|audio:180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 181, + "content": "<|audio:181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 182, + "content": "<|audio:182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 183, + "content": "<|audio:183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 184, + "content": "<|audio:184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 185, + "content": "<|audio:185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 186, + "content": "<|audio:186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 187, + "content": "<|audio:187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 188, + "content": "<|audio:188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 189, + "content": "<|audio:189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 190, + "content": "<|audio:190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 191, + "content": "<|audio:191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 192, + "content": "<|audio:192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 193, + "content": "<|audio:193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 194, + "content": "<|audio:194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 195, + "content": "<|audio:195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 196, + "content": "<|audio:196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 197, + "content": "<|audio:197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 198, + "content": "<|audio:198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 199, + "content": "<|audio:199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 200, + "content": "<|audio:200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 201, + "content": "<|audio:201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 202, + "content": "<|audio:202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 203, + "content": "<|audio:203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 204, + "content": "<|audio:204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 205, + "content": "<|audio:205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 206, + "content": "<|audio:206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 207, + "content": "<|audio:207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 208, + "content": "<|audio:208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 209, + "content": "<|audio:209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 210, + "content": "<|audio:210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 211, + "content": "<|audio:211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 212, + "content": "<|audio:212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 213, + "content": "<|audio:213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 214, + "content": "<|audio:214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 215, + "content": "<|audio:215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 216, + "content": "<|audio:216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 217, + "content": "<|audio:217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 218, + "content": "<|audio:218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 219, + "content": "<|audio:219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 220, + "content": "<|audio:220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 221, + "content": "<|audio:221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 222, + "content": "<|audio:222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 223, + "content": "<|audio:223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 224, + "content": "<|audio:224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 225, + "content": "<|audio:225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 226, + "content": "<|audio:226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 227, + "content": "<|audio:227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 228, + "content": "<|audio:228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 229, + "content": "<|audio:229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 230, + "content": "<|audio:230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 231, + "content": "<|audio:231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 232, + "content": "<|audio:232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 233, + "content": "<|audio:233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 234, + "content": "<|audio:234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 235, + "content": "<|audio:235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 236, + "content": "<|audio:236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 237, + "content": "<|audio:237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 238, + "content": "<|audio:238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 239, + "content": "<|audio:239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 240, + "content": "<|audio:240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 241, + "content": "<|audio:241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 242, + "content": "<|audio:242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 243, + "content": "<|audio:243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 244, + "content": "<|audio:244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 245, + "content": "<|audio:245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 246, + "content": "<|audio:246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 247, + "content": "<|audio:247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 248, + "content": "<|audio:248|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 249, + "content": "<|audio:249|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 250, + "content": "<|audio:250|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 251, + "content": "<|audio:251|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 252, + "content": "<|audio:252|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 253, + "content": "<|audio:253|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 254, + "content": "<|audio:254|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 255, + "content": "<|audio:255|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 256, + "content": "<|audio:256|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 257, + "content": "<|audio:257|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 258, + "content": "<|audio:258|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "<|audio:259|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 260, + "content": "<|audio:260|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 261, + "content": "<|audio:261|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 262, + "content": "<|audio:262|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 263, + "content": "<|audio:263|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 264, + "content": "<|audio:264|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 265, + "content": "<|audio:265|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 266, + "content": "<|audio:266|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 267, + "content": "<|audio:267|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 268, + "content": "<|audio:268|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 269, + "content": "<|audio:269|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 270, + "content": "<|audio:270|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 271, + "content": "<|audio:271|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 272, + "content": "<|audio:272|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 273, + "content": "<|audio:273|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 274, + "content": "<|audio:274|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 275, + "content": "<|audio:275|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 276, + "content": "<|audio:276|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 277, + "content": "<|audio:277|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 278, + "content": "<|audio:278|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 279, + "content": "<|audio:279|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 280, + "content": "<|audio:280|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 281, + "content": "<|audio:281|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 282, + "content": "<|audio:282|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 283, + "content": "<|audio:283|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 284, + "content": "<|audio:284|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 285, + "content": "<|audio:285|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 286, + "content": "<|audio:286|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 287, + "content": "<|audio:287|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 288, + "content": "<|audio:288|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 289, + "content": "<|audio:289|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 290, + "content": "<|audio:290|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 291, + "content": "<|audio:291|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 292, + "content": "<|audio:292|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 293, + "content": "<|audio:293|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 294, + "content": "<|audio:294|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 295, + "content": "<|audio:295|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 296, + "content": "<|audio:296|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 297, + "content": "<|audio:297|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 298, + "content": "<|audio:298|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 299, + "content": "<|audio:299|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 300, + "content": "<|audio:300|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 301, + "content": "<|audio:301|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 302, + "content": "<|audio:302|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 303, + "content": "<|audio:303|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 304, + "content": "<|audio:304|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 305, + "content": "<|audio:305|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 306, + "content": "<|audio:306|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 307, + "content": "<|audio:307|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 308, + "content": "<|audio:308|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 309, + "content": "<|audio:309|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 310, + "content": "<|audio:310|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 311, + "content": "<|audio:311|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 312, + "content": "<|audio:312|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 313, + "content": "<|audio:313|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 314, + "content": "<|audio:314|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 315, + "content": "<|audio:315|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 316, + "content": "<|audio:316|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 317, + "content": "<|audio:317|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 318, + "content": "<|audio:318|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 319, + "content": "<|audio:319|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 320, + "content": "<|audio:320|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 321, + "content": "<|audio:321|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 322, + "content": "<|audio:322|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 323, + "content": "<|audio:323|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 324, + "content": "<|audio:324|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 325, + "content": "<|audio:325|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 326, + "content": "<|audio:326|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 327, + "content": "<|audio:327|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 328, + "content": "<|audio:328|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 329, + "content": "<|audio:329|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 330, + "content": "<|audio:330|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 331, + "content": "<|audio:331|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 332, + "content": "<|audio:332|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 333, + "content": "<|audio:333|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 334, + "content": "<|audio:334|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 335, + "content": "<|audio:335|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 336, + "content": "<|audio:336|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 337, + "content": "<|audio:337|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 338, + "content": "<|audio:338|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 339, + "content": "<|audio:339|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 340, + "content": "<|audio:340|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 341, + "content": "<|audio:341|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 342, + "content": "<|audio:342|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 343, + "content": "<|audio:343|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 344, + "content": "<|audio:344|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 345, + "content": "<|audio:345|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 346, + "content": "<|audio:346|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 347, + "content": "<|audio:347|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 348, + "content": "<|audio:348|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 349, + "content": "<|audio:349|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 350, + "content": "<|audio:350|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 351, + "content": "<|audio:351|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 352, + "content": "<|audio:352|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 353, + "content": "<|audio:353|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 354, + "content": "<|audio:354|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 355, + "content": "<|audio:355|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 356, + "content": "<|audio:356|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 357, + "content": "<|audio:357|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 358, + "content": "<|audio:358|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 359, + "content": "<|audio:359|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 360, + "content": "<|audio:360|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 361, + "content": "<|audio:361|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 362, + "content": "<|audio:362|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 363, + "content": "<|audio:363|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 364, + "content": "<|audio:364|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 365, + "content": "<|audio:365|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 366, + "content": "<|audio:366|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 367, + "content": "<|audio:367|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 368, + "content": "<|audio:368|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 369, + "content": "<|audio:369|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 370, + "content": "<|audio:370|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 371, + "content": "<|audio:371|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 372, + "content": "<|audio:372|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 373, + "content": "<|audio:373|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 374, + "content": "<|audio:374|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 375, + "content": "<|audio:375|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 376, + "content": "<|audio:376|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 377, + "content": "<|audio:377|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 378, + "content": "<|audio:378|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 379, + "content": "<|audio:379|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 380, + "content": "<|audio:380|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 381, + "content": "<|audio:381|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 382, + "content": "<|audio:382|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 383, + "content": "<|audio:383|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 384, + "content": "<|audio:384|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 385, + "content": "<|audio:385|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 386, + "content": "<|audio:386|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 387, + "content": "<|audio:387|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 388, + "content": "<|audio:388|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 389, + "content": "<|audio:389|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 390, + "content": "<|audio:390|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 391, + "content": "<|audio:391|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 392, + "content": "<|audio:392|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 393, + "content": "<|audio:393|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 394, + "content": "<|audio:394|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 395, + "content": "<|audio:395|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 396, + "content": "<|audio:396|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 397, + "content": "<|audio:397|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 398, + "content": "<|audio:398|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 399, + "content": "<|audio:399|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 400, + "content": "<|audio:400|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 401, + "content": "<|audio:401|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 402, + "content": "<|audio:402|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 403, + "content": "<|audio:403|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 404, + "content": "<|audio:404|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 405, + "content": "<|audio:405|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 406, + "content": "<|audio:406|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 407, + "content": "<|audio:407|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 408, + "content": "<|audio:408|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 409, + "content": "<|audio:409|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 410, + "content": "<|audio:410|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 411, + "content": "<|audio:411|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 412, + "content": "<|audio:412|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 413, + "content": "<|audio:413|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 414, + "content": "<|audio:414|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 415, + "content": "<|audio:415|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 416, + "content": "<|audio:416|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 417, + "content": "<|audio:417|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 418, + "content": "<|audio:418|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 419, + "content": "<|audio:419|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 420, + "content": "<|audio:420|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 421, + "content": "<|audio:421|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 422, + "content": "<|audio:422|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 423, + "content": "<|audio:423|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 424, + "content": "<|audio:424|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 425, + "content": "<|audio:425|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 426, + "content": "<|audio:426|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 427, + "content": "<|audio:427|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 428, + "content": "<|audio:428|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 429, + "content": "<|audio:429|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 430, + "content": "<|audio:430|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 431, + "content": "<|audio:431|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 432, + "content": "<|audio:432|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 433, + "content": "<|audio:433|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 434, + "content": "<|audio:434|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 435, + "content": "<|audio:435|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 436, + "content": "<|audio:436|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 437, + "content": "<|audio:437|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 438, + "content": "<|audio:438|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 439, + "content": "<|audio:439|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 440, + "content": "<|audio:440|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 441, + "content": "<|audio:441|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 442, + "content": "<|audio:442|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 443, + "content": "<|audio:443|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 444, + "content": "<|audio:444|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 445, + "content": "<|audio:445|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 446, + "content": "<|audio:446|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 447, + "content": "<|audio:447|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 448, + "content": "<|audio:448|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 449, + "content": "<|audio:449|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 450, + "content": "<|audio:450|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 451, + "content": "<|audio:451|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 452, + "content": "<|audio:452|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 453, + "content": "<|audio:453|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 454, + "content": "<|audio:454|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 455, + "content": "<|audio:455|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 456, + "content": "<|audio:456|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 457, + "content": "<|audio:457|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 458, + "content": "<|audio:458|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 459, + "content": "<|audio:459|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 460, + "content": "<|audio:460|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 461, + "content": "<|audio:461|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 462, + "content": "<|audio:462|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 463, + "content": "<|audio:463|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 464, + "content": "<|audio:464|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 465, + "content": "<|audio:465|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 466, + "content": "<|audio:466|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 467, + "content": "<|audio:467|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 468, + "content": "<|audio:468|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 469, + "content": "<|audio:469|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 470, + "content": "<|audio:470|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 471, + "content": "<|audio:471|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 472, + "content": "<|audio:472|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 473, + "content": "<|audio:473|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 474, + "content": "<|audio:474|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 475, + "content": "<|audio:475|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 476, + "content": "<|audio:476|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 477, + "content": "<|audio:477|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 478, + "content": "<|audio:478|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 479, + "content": "<|audio:479|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 480, + "content": "<|audio:480|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 481, + "content": "<|audio:481|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 482, + "content": "<|audio:482|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 483, + "content": "<|audio:483|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 484, + "content": "<|audio:484|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 485, + "content": "<|audio:485|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 486, + "content": "<|audio:486|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 487, + "content": "<|audio:487|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 488, + "content": "<|audio:488|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 489, + "content": "<|audio:489|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 490, + "content": "<|audio:490|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 491, + "content": "<|audio:491|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 492, + "content": "<|audio:492|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 493, + "content": "<|audio:493|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 494, + "content": "<|audio:494|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 495, + "content": "<|audio:495|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 496, + "content": "<|audio:496|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 497, + "content": "<|audio:497|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 498, + "content": "<|audio:498|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 499, + "content": "<|audio:499|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 500, + "content": "<|audio:500|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 501, + "content": "<|audio:501|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 502, + "content": "<|audio:502|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 503, + "content": "<|audio:503|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 504, + "content": "<|audio:504|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 505, + "content": "<|audio:505|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 506, + "content": "<|audio:506|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 507, + "content": "<|audio:507|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 508, + "content": "<|audio:508|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 509, + "content": "<|audio:509|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 510, + "content": "<|audio:510|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 511, + "content": "<|audio:511|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 512, + "content": "<|audio:512|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 513, + "content": "<|audio:513|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 514, + "content": "<|audio:514|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 515, + "content": "<|audio:515|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 516, + "content": "<|audio:516|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 517, + "content": "<|audio:517|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 518, + "content": "<|audio:518|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 519, + "content": "<|audio:519|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 520, + "content": "<|audio:520|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 521, + "content": "<|audio:521|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 522, + "content": "<|audio:522|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 523, + "content": "<|audio:523|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 524, + "content": "<|audio:524|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 525, + "content": "<|audio:525|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 526, + "content": "<|audio:526|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 527, + "content": "<|audio:527|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 528, + "content": "<|audio:528|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 529, + "content": "<|audio:529|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 530, + "content": "<|audio:530|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 531, + "content": "<|audio:531|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 532, + "content": "<|audio:532|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 533, + "content": "<|audio:533|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 534, + "content": "<|audio:534|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 535, + "content": "<|audio:535|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 536, + "content": "<|audio:536|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 537, + "content": "<|audio:537|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 538, + "content": "<|audio:538|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 539, + "content": "<|audio:539|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 540, + "content": "<|audio:540|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 541, + "content": "<|audio:541|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 542, + "content": "<|audio:542|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 543, + "content": "<|audio:543|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 544, + "content": "<|audio:544|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 545, + "content": "<|audio:545|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 546, + "content": "<|audio:546|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 547, + "content": "<|audio:547|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 548, + "content": "<|audio:548|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 549, + "content": "<|audio:549|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 550, + "content": "<|audio:550|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 551, + "content": "<|audio:551|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 552, + "content": "<|audio:552|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 553, + "content": "<|audio:553|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 554, + "content": "<|audio:554|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 555, + "content": "<|audio:555|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 556, + "content": "<|audio:556|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 557, + "content": "<|audio:557|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 558, + "content": "<|audio:558|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 559, + "content": "<|audio:559|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 560, + "content": "<|audio:560|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 561, + "content": "<|audio:561|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 562, + "content": "<|audio:562|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 563, + "content": "<|audio:563|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 564, + "content": "<|audio:564|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 565, + "content": "<|audio:565|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 566, + "content": "<|audio:566|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 567, + "content": "<|audio:567|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 568, + "content": "<|audio:568|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 569, + "content": "<|audio:569|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 570, + "content": "<|audio:570|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 571, + "content": "<|audio:571|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 572, + "content": "<|audio:572|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 573, + "content": "<|audio:573|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 574, + "content": "<|audio:574|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 575, + "content": "<|audio:575|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 576, + "content": "<|audio:576|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 577, + "content": "<|audio:577|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 578, + "content": "<|audio:578|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 579, + "content": "<|audio:579|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 580, + "content": "<|audio:580|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 581, + "content": "<|audio:581|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 582, + "content": "<|audio:582|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 583, + "content": "<|audio:583|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 584, + "content": "<|audio:584|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 585, + "content": "<|audio:585|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 586, + "content": "<|audio:586|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 587, + "content": "<|audio:587|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 588, + "content": "<|audio:588|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 589, + "content": "<|audio:589|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 590, + "content": "<|audio:590|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 591, + "content": "<|audio:591|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 592, + "content": "<|audio:592|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 593, + "content": "<|audio:593|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 594, + "content": "<|audio:594|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 595, + "content": "<|audio:595|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 596, + "content": "<|audio:596|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 597, + "content": "<|audio:597|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 598, + "content": "<|audio:598|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 599, + "content": "<|audio:599|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 600, + "content": "<|audio:600|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 601, + "content": "<|audio:601|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 602, + "content": "<|audio:602|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 603, + "content": "<|audio:603|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 604, + "content": "<|audio:604|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "<|audio:605|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "<|audio:606|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "<|audio:607|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "<|audio:608|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "<|audio:609|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "<|audio:610|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "<|audio:611|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "<|audio:612|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "<|audio:613|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "<|audio:614|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "<|audio:615|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "<|audio:616|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "<|audio:617|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "<|audio:618|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "<|audio:619|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "<|audio:620|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "<|audio:621|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "<|audio:622|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "<|audio:623|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "<|audio:624|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "<|audio:625|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "<|audio:626|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "<|audio:627|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "<|audio:628|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "<|audio:629|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "<|audio:630|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "<|audio:631|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "<|audio:632|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "<|audio:633|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "<|audio:634|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "<|audio:635|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "<|audio:636|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "<|audio:637|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "<|audio:638|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "<|audio:639|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 640, + "content": "<|audio:640|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 641, + "content": "<|audio:641|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 642, + "content": "<|audio:642|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 643, + "content": "<|audio:643|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 644, + "content": "<|audio:644|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 645, + "content": "<|audio:645|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 646, + "content": "<|audio:646|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 647, + "content": "<|audio:647|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 648, + "content": "<|audio:648|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 649, + "content": "<|audio:649|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 650, + "content": "<|audio:650|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 651, + "content": "<|audio:651|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 652, + "content": "<|audio:652|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 653, + "content": "<|audio:653|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 654, + "content": "<|audio:654|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 655, + "content": "<|audio:655|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 656, + "content": "<|audio:656|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 657, + "content": "<|audio:657|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 658, + "content": "<|audio:658|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 659, + "content": "<|audio:659|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 660, + "content": "<|audio:660|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 661, + "content": "<|audio:661|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 662, + "content": "<|audio:662|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 663, + "content": "<|audio:663|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 664, + "content": "<|audio:664|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 665, + "content": "<|audio:665|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 666, + "content": "<|audio:666|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 667, + "content": "<|audio:667|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 668, + "content": "<|audio:668|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 669, + "content": "<|audio:669|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 670, + "content": "<|audio:670|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 671, + "content": "<|audio:671|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 672, + "content": "<|audio:672|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 673, + "content": "<|audio:673|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 674, + "content": "<|audio:674|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 675, + "content": "<|audio:675|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 676, + "content": "<|audio:676|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 677, + "content": "<|audio:677|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 678, + "content": "<|audio:678|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 679, + "content": "<|audio:679|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 680, + "content": "<|audio:680|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 681, + "content": "<|audio:681|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 682, + "content": "<|audio:682|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 683, + "content": "<|audio:683|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 684, + "content": "<|audio:684|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 685, + "content": "<|audio:685|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 686, + "content": "<|audio:686|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 687, + "content": "<|audio:687|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 688, + "content": "<|audio:688|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 689, + "content": "<|audio:689|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 690, + "content": "<|audio:690|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 691, + "content": "<|audio:691|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 692, + "content": "<|audio:692|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 693, + "content": "<|audio:693|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 694, + "content": "<|audio:694|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "<|audio:695|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "<|audio:696|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "<|audio:697|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "<|audio:698|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "<|audio:699|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "<|audio:700|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "<|audio:701|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "<|audio:702|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "<|audio:703|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 704, + "content": "<|audio:704|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 705, + "content": "<|audio:705|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 706, + "content": "<|audio:706|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 707, + "content": "<|audio:707|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 708, + "content": "<|audio:708|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 709, + "content": "<|audio:709|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 710, + "content": "<|audio:710|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 711, + "content": "<|audio:711|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 712, + "content": "<|audio:712|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 713, + "content": "<|audio:713|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 714, + "content": "<|audio:714|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 715, + "content": "<|audio:715|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 716, + "content": "<|audio:716|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 717, + "content": "<|audio:717|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 718, + "content": "<|audio:718|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 719, + "content": "<|audio:719|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 720, + "content": "<|audio:720|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 721, + "content": "<|audio:721|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 722, + "content": "<|audio:722|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 723, + "content": "<|audio:723|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 724, + "content": "<|audio:724|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 725, + "content": "<|audio:725|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 726, + "content": "<|audio:726|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 727, + "content": "<|audio:727|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 728, + "content": "<|audio:728|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 729, + "content": "<|audio:729|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 730, + "content": "<|audio:730|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 731, + "content": "<|audio:731|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 732, + "content": "<|audio:732|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 733, + "content": "<|audio:733|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 734, + "content": "<|audio:734|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 735, + "content": "<|audio:735|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 736, + "content": "<|audio:736|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 737, + "content": "<|audio:737|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 738, + "content": "<|audio:738|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 739, + "content": "<|audio:739|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 740, + "content": "<|audio:740|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 741, + "content": "<|audio:741|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 742, + "content": "<|audio:742|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 743, + "content": "<|audio:743|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 744, + "content": "<|audio:744|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 745, + "content": "<|audio:745|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 746, + "content": "<|audio:746|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 747, + "content": "<|audio:747|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 748, + "content": "<|audio:748|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 749, + "content": "<|audio:749|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 750, + "content": "<|audio:750|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 751, + "content": "<|audio:751|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 752, + "content": "<|audio:752|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 753, + "content": "<|audio:753|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 754, + "content": "<|audio:754|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 755, + "content": "<|audio:755|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 756, + "content": "<|audio:756|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 757, + "content": "<|audio:757|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 758, + "content": "<|audio:758|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 759, + "content": "<|audio:759|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 760, + "content": "<|audio:760|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 761, + "content": "<|audio:761|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 762, + "content": "<|audio:762|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 763, + "content": "<|audio:763|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 764, + "content": "<|audio:764|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 765, + "content": "<|audio:765|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 766, + "content": "<|audio:766|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 767, + "content": "<|audio:767|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 768, + "content": "<|audio:768|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 769, + "content": "<|audio:769|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 770, + "content": "<|audio:770|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 771, + "content": "<|audio:771|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 772, + "content": "<|audio:772|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 773, + "content": "<|audio:773|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 774, + "content": "<|audio:774|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 775, + "content": "<|audio:775|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 776, + "content": "<|audio:776|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 777, + "content": "<|audio:777|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 778, + "content": "<|audio:778|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 779, + "content": "<|audio:779|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 780, + "content": "<|audio:780|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 781, + "content": "<|audio:781|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 782, + "content": "<|audio:782|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 783, + "content": "<|audio:783|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 784, + "content": "<|audio:784|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 785, + "content": "<|audio:785|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 786, + "content": "<|audio:786|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 787, + "content": "<|audio:787|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 788, + "content": "<|audio:788|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 789, + "content": "<|audio:789|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 790, + "content": "<|audio:790|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 791, + "content": "<|audio:791|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 792, + "content": "<|audio:792|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 793, + "content": "<|audio:793|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 794, + "content": "<|audio:794|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 795, + "content": "<|audio:795|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 796, + "content": "<|audio:796|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 797, + "content": "<|audio:797|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 798, + "content": "<|audio:798|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 799, + "content": "<|audio:799|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 800, + "content": "<|audio:800|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 801, + "content": "<|audio:801|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 802, + "content": "<|audio:802|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 803, + "content": "<|audio:803|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 804, + "content": "<|audio:804|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 805, + "content": "<|audio:805|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 806, + "content": "<|audio:806|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 807, + "content": "<|audio:807|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 808, + "content": "<|audio:808|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 809, + "content": "<|audio:809|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 810, + "content": "<|audio:810|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 811, + "content": "<|audio:811|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 812, + "content": "<|audio:812|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 813, + "content": "<|audio:813|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 814, + "content": "<|audio:814|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 815, + "content": "<|audio:815|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 816, + "content": "<|audio:816|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 817, + "content": "<|audio:817|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 818, + "content": "<|audio:818|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 819, + "content": "<|audio:819|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 820, + "content": "<|audio:820|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 821, + "content": "<|audio:821|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 822, + "content": "<|audio:822|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 823, + "content": "<|audio:823|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 824, + "content": "<|audio:824|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 825, + "content": "<|audio:825|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 826, + "content": "<|audio:826|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 827, + "content": "<|audio:827|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 828, + "content": "<|audio:828|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 829, + "content": "<|audio:829|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 830, + "content": "<|audio:830|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 831, + "content": "<|audio:831|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 832, + "content": "<|audio:832|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 833, + "content": "<|audio:833|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 834, + "content": "<|audio:834|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 835, + "content": "<|audio:835|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 836, + "content": "<|audio:836|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 837, + "content": "<|audio:837|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 838, + "content": "<|audio:838|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 839, + "content": "<|audio:839|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 840, + "content": "<|audio:840|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 841, + "content": "<|audio:841|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 842, + "content": "<|audio:842|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 843, + "content": "<|audio:843|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 844, + "content": "<|audio:844|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 845, + "content": "<|audio:845|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 846, + "content": "<|audio:846|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 847, + "content": "<|audio:847|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 848, + "content": "<|audio:848|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 849, + "content": "<|audio:849|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 850, + "content": "<|audio:850|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 851, + "content": "<|audio:851|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 852, + "content": "<|audio:852|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 853, + "content": "<|audio:853|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 854, + "content": "<|audio:854|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 855, + "content": "<|audio:855|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 856, + "content": "<|audio:856|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 857, + "content": "<|audio:857|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 858, + "content": "<|audio:858|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 859, + "content": "<|audio:859|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 860, + "content": "<|audio:860|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 861, + "content": "<|audio:861|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 862, + "content": "<|audio:862|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 863, + "content": "<|audio:863|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 864, + "content": "<|audio:864|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 865, + "content": "<|audio:865|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 866, + "content": "<|audio:866|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 867, + "content": "<|audio:867|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 868, + "content": "<|audio:868|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 869, + "content": "<|audio:869|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 870, + "content": "<|audio:870|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 871, + "content": "<|audio:871|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 872, + "content": "<|audio:872|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 873, + "content": "<|audio:873|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 874, + "content": "<|audio:874|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 875, + "content": "<|audio:875|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 876, + "content": "<|audio:876|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 877, + "content": "<|audio:877|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 878, + "content": "<|audio:878|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 879, + "content": "<|audio:879|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 880, + "content": "<|audio:880|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 881, + "content": "<|audio:881|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 882, + "content": "<|audio:882|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 883, + "content": "<|audio:883|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 884, + "content": "<|audio:884|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 885, + "content": "<|audio:885|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 886, + "content": "<|audio:886|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 887, + "content": "<|audio:887|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 888, + "content": "<|audio:888|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 889, + "content": "<|audio:889|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 890, + "content": "<|audio:890|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 891, + "content": "<|audio:891|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 892, + "content": "<|audio:892|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 893, + "content": "<|audio:893|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 894, + "content": "<|audio:894|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 895, + "content": "<|audio:895|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 896, + "content": "<|audio:896|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 897, + "content": "<|audio:897|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 898, + "content": "<|audio:898|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 899, + "content": "<|audio:899|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 900, + "content": "<|audio:900|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 901, + "content": "<|audio:901|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 902, + "content": "<|audio:902|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 903, + "content": "<|audio:903|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 904, + "content": "<|audio:904|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 905, + "content": "<|audio:905|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 906, + "content": "<|audio:906|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 907, + "content": "<|audio:907|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 908, + "content": "<|audio:908|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 909, + "content": "<|audio:909|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 910, + "content": "<|audio:910|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 911, + "content": "<|audio:911|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 912, + "content": "<|audio:912|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 913, + "content": "<|audio:913|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 914, + "content": "<|audio:914|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 915, + "content": "<|audio:915|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 916, + "content": "<|audio:916|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 917, + "content": "<|audio:917|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 918, + "content": "<|audio:918|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 919, + "content": "<|audio:919|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 920, + "content": "<|audio:920|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 921, + "content": "<|audio:921|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 922, + "content": "<|audio:922|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 923, + "content": "<|audio:923|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 924, + "content": "<|audio:924|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 925, + "content": "<|audio:925|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 926, + "content": "<|audio:926|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 927, + "content": "<|audio:927|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 928, + "content": "<|audio:928|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 929, + "content": "<|audio:929|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 930, + "content": "<|audio:930|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 931, + "content": "<|audio:931|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 932, + "content": "<|audio:932|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 933, + "content": "<|audio:933|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 934, + "content": "<|audio:934|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 935, + "content": "<|audio:935|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 936, + "content": "<|audio:936|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 937, + "content": "<|audio:937|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 938, + "content": "<|audio:938|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 939, + "content": "<|audio:939|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 940, + "content": "<|audio:940|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 941, + "content": "<|audio:941|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 942, + "content": "<|audio:942|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 943, + "content": "<|audio:943|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 944, + "content": "<|audio:944|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 945, + "content": "<|audio:945|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 946, + "content": "<|audio:946|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 947, + "content": "<|audio:947|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 948, + "content": "<|audio:948|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 949, + "content": "<|audio:949|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 950, + "content": "<|audio:950|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 951, + "content": "<|audio:951|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 952, + "content": "<|audio:952|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 953, + "content": "<|audio:953|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 954, + "content": "<|audio:954|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 955, + "content": "<|audio:955|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 956, + "content": "<|audio:956|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 957, + "content": "<|audio:957|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 958, + "content": "<|audio:958|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 959, + "content": "<|audio:959|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 960, + "content": "<|audio:960|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 961, + "content": "<|audio:961|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 962, + "content": "<|audio:962|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 963, + "content": "<|audio:963|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 964, + "content": "<|audio:964|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 965, + "content": "<|audio:965|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 966, + "content": "<|audio:966|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 967, + "content": "<|audio:967|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 968, + "content": "<|audio:968|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 969, + "content": "<|audio:969|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 970, + "content": "<|audio:970|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 971, + "content": "<|audio:971|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 972, + "content": "<|audio:972|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 973, + "content": "<|audio:973|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 974, + "content": "<|audio:974|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 975, + "content": "<|audio:975|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 976, + "content": "<|audio:976|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 977, + "content": "<|audio:977|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 978, + "content": "<|audio:978|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 979, + "content": "<|audio:979|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 980, + "content": "<|audio:980|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 981, + "content": "<|audio:981|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 982, + "content": "<|audio:982|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 983, + "content": "<|audio:983|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 984, + "content": "<|audio:984|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 985, + "content": "<|audio:985|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 986, + "content": "<|audio:986|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 987, + "content": "<|audio:987|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 988, + "content": "<|audio:988|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 989, + "content": "<|audio:989|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 990, + "content": "<|audio:990|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 991, + "content": "<|audio:991|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 992, + "content": "<|audio:992|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 993, + "content": "<|audio:993|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 994, + "content": "<|audio:994|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 995, + "content": "<|audio:995|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 996, + "content": "<|audio:996|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 997, + "content": "<|audio:997|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 998, + "content": "<|audio:998|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 999, + "content": "<|audio:999|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1000, + "content": "<|audio:1000|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1001, + "content": "<|audio:1001|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1002, + "content": "<|audio:1002|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1003, + "content": "<|audio:1003|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1004, + "content": "<|audio:1004|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1005, + "content": "<|audio:1005|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1006, + "content": "<|audio:1006|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1007, + "content": "<|audio:1007|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1008, + "content": "<|audio:1008|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1009, + "content": "<|audio:1009|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1010, + "content": "<|audio:1010|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1011, + "content": "<|audio:1011|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1012, + "content": "<|audio:1012|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1013, + "content": "<|audio:1013|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1014, + "content": "<|audio:1014|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1015, + "content": "<|audio:1015|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1016, + "content": "<|audio:1016|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1017, + "content": "<|audio:1017|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1018, + "content": "<|audio:1018|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1019, + "content": "<|audio:1019|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1020, + "content": "<|audio:1020|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1021, + "content": "<|audio:1021|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1022, + "content": "<|audio:1022|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1023, + "content": "<|audio:1023|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1024, + "content": "<|startoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1025, + "content": "<|endoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1026, + "content": "<|padding|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFKC" + }, + "pre_tokenizer": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "post_processor": null, + "decoder": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|audio:0|>": 0, + "<|audio:1|>": 1, + "<|audio:2|>": 2, + "<|audio:3|>": 3, + "<|audio:4|>": 4, + "<|audio:5|>": 5, + "<|audio:6|>": 6, + "<|audio:7|>": 7, + "<|audio:8|>": 8, + "<|audio:9|>": 9, + "<|audio:10|>": 10, + "<|audio:11|>": 11, + "<|audio:12|>": 12, + "<|audio:13|>": 13, + "<|audio:14|>": 14, + "<|audio:15|>": 15, + "<|audio:16|>": 16, + "<|audio:17|>": 17, + "<|audio:18|>": 18, + "<|audio:19|>": 19, + "<|audio:20|>": 20, + "<|audio:21|>": 21, + "<|audio:22|>": 22, + "<|audio:23|>": 23, + "<|audio:24|>": 24, + "<|audio:25|>": 25, + "<|audio:26|>": 26, + "<|audio:27|>": 27, + "<|audio:28|>": 28, + "<|audio:29|>": 29, + "<|audio:30|>": 30, + "<|audio:31|>": 31, + "<|audio:32|>": 32, + "<|audio:33|>": 33, + "<|audio:34|>": 34, + "<|audio:35|>": 35, + "<|audio:36|>": 36, + "<|audio:37|>": 37, + "<|audio:38|>": 38, + "<|audio:39|>": 39, + "<|audio:40|>": 40, + "<|audio:41|>": 41, + "<|audio:42|>": 42, + "<|audio:43|>": 43, + "<|audio:44|>": 44, + "<|audio:45|>": 45, + "<|audio:46|>": 46, + "<|audio:47|>": 47, + "<|audio:48|>": 48, + "<|audio:49|>": 49, + "<|audio:50|>": 50, + "<|audio:51|>": 51, + "<|audio:52|>": 52, + "<|audio:53|>": 53, + "<|audio:54|>": 54, + "<|audio:55|>": 55, + "<|audio:56|>": 56, + "<|audio:57|>": 57, + "<|audio:58|>": 58, + "<|audio:59|>": 59, + "<|audio:60|>": 60, + "<|audio:61|>": 61, + "<|audio:62|>": 62, + "<|audio:63|>": 63, + "<|audio:64|>": 64, + "<|audio:65|>": 65, + "<|audio:66|>": 66, + "<|audio:67|>": 67, + "<|audio:68|>": 68, + "<|audio:69|>": 69, + "<|audio:70|>": 70, + "<|audio:71|>": 71, + "<|audio:72|>": 72, + "<|audio:73|>": 73, + "<|audio:74|>": 74, + "<|audio:75|>": 75, + "<|audio:76|>": 76, + "<|audio:77|>": 77, + "<|audio:78|>": 78, + "<|audio:79|>": 79, + "<|audio:80|>": 80, + "<|audio:81|>": 81, + "<|audio:82|>": 82, + "<|audio:83|>": 83, + "<|audio:84|>": 84, + "<|audio:85|>": 85, + "<|audio:86|>": 86, + "<|audio:87|>": 87, + "<|audio:88|>": 88, + "<|audio:89|>": 89, + "<|audio:90|>": 90, + "<|audio:91|>": 91, + "<|audio:92|>": 92, + "<|audio:93|>": 93, + "<|audio:94|>": 94, + "<|audio:95|>": 95, + "<|audio:96|>": 96, + "<|audio:97|>": 97, + "<|audio:98|>": 98, + "<|audio:99|>": 99, + "<|audio:100|>": 100, + "<|audio:101|>": 101, + "<|audio:102|>": 102, + "<|audio:103|>": 103, + "<|audio:104|>": 104, + "<|audio:105|>": 105, + "<|audio:106|>": 106, + "<|audio:107|>": 107, + "<|audio:108|>": 108, + "<|audio:109|>": 109, + "<|audio:110|>": 110, + "<|audio:111|>": 111, + "<|audio:112|>": 112, + "<|audio:113|>": 113, + "<|audio:114|>": 114, + "<|audio:115|>": 115, + "<|audio:116|>": 116, + "<|audio:117|>": 117, + "<|audio:118|>": 118, + "<|audio:119|>": 119, + "<|audio:120|>": 120, + "<|audio:121|>": 121, + "<|audio:122|>": 122, + "<|audio:123|>": 123, + "<|audio:124|>": 124, + "<|audio:125|>": 125, + "<|audio:126|>": 126, + "<|audio:127|>": 127, + "<|audio:128|>": 128, + "<|audio:129|>": 129, + "<|audio:130|>": 130, + "<|audio:131|>": 131, + "<|audio:132|>": 132, + "<|audio:133|>": 133, + "<|audio:134|>": 134, + "<|audio:135|>": 135, + "<|audio:136|>": 136, + "<|audio:137|>": 137, + "<|audio:138|>": 138, + "<|audio:139|>": 139, + "<|audio:140|>": 140, + "<|audio:141|>": 141, + "<|audio:142|>": 142, + "<|audio:143|>": 143, + "<|audio:144|>": 144, + "<|audio:145|>": 145, + "<|audio:146|>": 146, + "<|audio:147|>": 147, + "<|audio:148|>": 148, + "<|audio:149|>": 149, + "<|audio:150|>": 150, + "<|audio:151|>": 151, + "<|audio:152|>": 152, + "<|audio:153|>": 153, + "<|audio:154|>": 154, + "<|audio:155|>": 155, + "<|audio:156|>": 156, + "<|audio:157|>": 157, + "<|audio:158|>": 158, + "<|audio:159|>": 159, + "<|audio:160|>": 160, + "<|audio:161|>": 161, + "<|audio:162|>": 162, + "<|audio:163|>": 163, + "<|audio:164|>": 164, + "<|audio:165|>": 165, + "<|audio:166|>": 166, + "<|audio:167|>": 167, + "<|audio:168|>": 168, + "<|audio:169|>": 169, + "<|audio:170|>": 170, + "<|audio:171|>": 171, + "<|audio:172|>": 172, + "<|audio:173|>": 173, + "<|audio:174|>": 174, + "<|audio:175|>": 175, + "<|audio:176|>": 176, + "<|audio:177|>": 177, + "<|audio:178|>": 178, + "<|audio:179|>": 179, + "<|audio:180|>": 180, + "<|audio:181|>": 181, + "<|audio:182|>": 182, + "<|audio:183|>": 183, + "<|audio:184|>": 184, + "<|audio:185|>": 185, + "<|audio:186|>": 186, + "<|audio:187|>": 187, + "<|audio:188|>": 188, + "<|audio:189|>": 189, + "<|audio:190|>": 190, + "<|audio:191|>": 191, + "<|audio:192|>": 192, + "<|audio:193|>": 193, + "<|audio:194|>": 194, + "<|audio:195|>": 195, + "<|audio:196|>": 196, + "<|audio:197|>": 197, + "<|audio:198|>": 198, + "<|audio:199|>": 199, + "<|audio:200|>": 200, + "<|audio:201|>": 201, + "<|audio:202|>": 202, + "<|audio:203|>": 203, + "<|audio:204|>": 204, + "<|audio:205|>": 205, + "<|audio:206|>": 206, + "<|audio:207|>": 207, + "<|audio:208|>": 208, + "<|audio:209|>": 209, + "<|audio:210|>": 210, + "<|audio:211|>": 211, + "<|audio:212|>": 212, + "<|audio:213|>": 213, + "<|audio:214|>": 214, + "<|audio:215|>": 215, + "<|audio:216|>": 216, + "<|audio:217|>": 217, + "<|audio:218|>": 218, + "<|audio:219|>": 219, + "<|audio:220|>": 220, + "<|audio:221|>": 221, + "<|audio:222|>": 222, + "<|audio:223|>": 223, + "<|audio:224|>": 224, + "<|audio:225|>": 225, + "<|audio:226|>": 226, + "<|audio:227|>": 227, + "<|audio:228|>": 228, + "<|audio:229|>": 229, + "<|audio:230|>": 230, + "<|audio:231|>": 231, + "<|audio:232|>": 232, + "<|audio:233|>": 233, + "<|audio:234|>": 234, + "<|audio:235|>": 235, + "<|audio:236|>": 236, + "<|audio:237|>": 237, + "<|audio:238|>": 238, + "<|audio:239|>": 239, + "<|audio:240|>": 240, + "<|audio:241|>": 241, + "<|audio:242|>": 242, + "<|audio:243|>": 243, + "<|audio:244|>": 244, + "<|audio:245|>": 245, + "<|audio:246|>": 246, + "<|audio:247|>": 247, + "<|audio:248|>": 248, + "<|audio:249|>": 249, + "<|audio:250|>": 250, + "<|audio:251|>": 251, + "<|audio:252|>": 252, + "<|audio:253|>": 253, + "<|audio:254|>": 254, + "<|audio:255|>": 255, + "<|audio:256|>": 256, + "<|audio:257|>": 257, + "<|audio:258|>": 258, + "<|audio:259|>": 259, + "<|audio:260|>": 260, + "<|audio:261|>": 261, + "<|audio:262|>": 262, + "<|audio:263|>": 263, + "<|audio:264|>": 264, + "<|audio:265|>": 265, + "<|audio:266|>": 266, + "<|audio:267|>": 267, + "<|audio:268|>": 268, + "<|audio:269|>": 269, + "<|audio:270|>": 270, + "<|audio:271|>": 271, + "<|audio:272|>": 272, + "<|audio:273|>": 273, + "<|audio:274|>": 274, + "<|audio:275|>": 275, + "<|audio:276|>": 276, + "<|audio:277|>": 277, + "<|audio:278|>": 278, + "<|audio:279|>": 279, + "<|audio:280|>": 280, + "<|audio:281|>": 281, + "<|audio:282|>": 282, + "<|audio:283|>": 283, + "<|audio:284|>": 284, + "<|audio:285|>": 285, + "<|audio:286|>": 286, + "<|audio:287|>": 287, + "<|audio:288|>": 288, + "<|audio:289|>": 289, + "<|audio:290|>": 290, + "<|audio:291|>": 291, + "<|audio:292|>": 292, + "<|audio:293|>": 293, + "<|audio:294|>": 294, + "<|audio:295|>": 295, + "<|audio:296|>": 296, + "<|audio:297|>": 297, + "<|audio:298|>": 298, + "<|audio:299|>": 299, + "<|audio:300|>": 300, + "<|audio:301|>": 301, + "<|audio:302|>": 302, + "<|audio:303|>": 303, + "<|audio:304|>": 304, + "<|audio:305|>": 305, + "<|audio:306|>": 306, + "<|audio:307|>": 307, + "<|audio:308|>": 308, + "<|audio:309|>": 309, + "<|audio:310|>": 310, + "<|audio:311|>": 311, + "<|audio:312|>": 312, + "<|audio:313|>": 313, + "<|audio:314|>": 314, + "<|audio:315|>": 315, + "<|audio:316|>": 316, + "<|audio:317|>": 317, + "<|audio:318|>": 318, + "<|audio:319|>": 319, + "<|audio:320|>": 320, + "<|audio:321|>": 321, + "<|audio:322|>": 322, + "<|audio:323|>": 323, + "<|audio:324|>": 324, + "<|audio:325|>": 325, + "<|audio:326|>": 326, + "<|audio:327|>": 327, + "<|audio:328|>": 328, + "<|audio:329|>": 329, + "<|audio:330|>": 330, + "<|audio:331|>": 331, + "<|audio:332|>": 332, + "<|audio:333|>": 333, + "<|audio:334|>": 334, + "<|audio:335|>": 335, + "<|audio:336|>": 336, + "<|audio:337|>": 337, + "<|audio:338|>": 338, + "<|audio:339|>": 339, + "<|audio:340|>": 340, + "<|audio:341|>": 341, + "<|audio:342|>": 342, + "<|audio:343|>": 343, + "<|audio:344|>": 344, + "<|audio:345|>": 345, + "<|audio:346|>": 346, + "<|audio:347|>": 347, + "<|audio:348|>": 348, + "<|audio:349|>": 349, + "<|audio:350|>": 350, + "<|audio:351|>": 351, + "<|audio:352|>": 352, + "<|audio:353|>": 353, + "<|audio:354|>": 354, + "<|audio:355|>": 355, + "<|audio:356|>": 356, + "<|audio:357|>": 357, + "<|audio:358|>": 358, + "<|audio:359|>": 359, + "<|audio:360|>": 360, + "<|audio:361|>": 361, + "<|audio:362|>": 362, + "<|audio:363|>": 363, + "<|audio:364|>": 364, + "<|audio:365|>": 365, + "<|audio:366|>": 366, + "<|audio:367|>": 367, + "<|audio:368|>": 368, + "<|audio:369|>": 369, + "<|audio:370|>": 370, + "<|audio:371|>": 371, + "<|audio:372|>": 372, + "<|audio:373|>": 373, + "<|audio:374|>": 374, + "<|audio:375|>": 375, + "<|audio:376|>": 376, + "<|audio:377|>": 377, + "<|audio:378|>": 378, + "<|audio:379|>": 379, + "<|audio:380|>": 380, + "<|audio:381|>": 381, + "<|audio:382|>": 382, + "<|audio:383|>": 383, + "<|audio:384|>": 384, + "<|audio:385|>": 385, + "<|audio:386|>": 386, + "<|audio:387|>": 387, + "<|audio:388|>": 388, + "<|audio:389|>": 389, + "<|audio:390|>": 390, + "<|audio:391|>": 391, + "<|audio:392|>": 392, + "<|audio:393|>": 393, + "<|audio:394|>": 394, + "<|audio:395|>": 395, + "<|audio:396|>": 396, + "<|audio:397|>": 397, + "<|audio:398|>": 398, + "<|audio:399|>": 399, + "<|audio:400|>": 400, + "<|audio:401|>": 401, + "<|audio:402|>": 402, + "<|audio:403|>": 403, + "<|audio:404|>": 404, + "<|audio:405|>": 405, + "<|audio:406|>": 406, + "<|audio:407|>": 407, + "<|audio:408|>": 408, + "<|audio:409|>": 409, + "<|audio:410|>": 410, + "<|audio:411|>": 411, + "<|audio:412|>": 412, + "<|audio:413|>": 413, + "<|audio:414|>": 414, + "<|audio:415|>": 415, + "<|audio:416|>": 416, + "<|audio:417|>": 417, + "<|audio:418|>": 418, + "<|audio:419|>": 419, + "<|audio:420|>": 420, + "<|audio:421|>": 421, + "<|audio:422|>": 422, + "<|audio:423|>": 423, + "<|audio:424|>": 424, + "<|audio:425|>": 425, + "<|audio:426|>": 426, + "<|audio:427|>": 427, + "<|audio:428|>": 428, + "<|audio:429|>": 429, + "<|audio:430|>": 430, + "<|audio:431|>": 431, + "<|audio:432|>": 432, + "<|audio:433|>": 433, + "<|audio:434|>": 434, + "<|audio:435|>": 435, + "<|audio:436|>": 436, + "<|audio:437|>": 437, + "<|audio:438|>": 438, + "<|audio:439|>": 439, + "<|audio:440|>": 440, + "<|audio:441|>": 441, + "<|audio:442|>": 442, + "<|audio:443|>": 443, + "<|audio:444|>": 444, + "<|audio:445|>": 445, + "<|audio:446|>": 446, + "<|audio:447|>": 447, + "<|audio:448|>": 448, + "<|audio:449|>": 449, + "<|audio:450|>": 450, + "<|audio:451|>": 451, + "<|audio:452|>": 452, + "<|audio:453|>": 453, + "<|audio:454|>": 454, + "<|audio:455|>": 455, + "<|audio:456|>": 456, + "<|audio:457|>": 457, + "<|audio:458|>": 458, + "<|audio:459|>": 459, + "<|audio:460|>": 460, + "<|audio:461|>": 461, + "<|audio:462|>": 462, + "<|audio:463|>": 463, + "<|audio:464|>": 464, + "<|audio:465|>": 465, + "<|audio:466|>": 466, + "<|audio:467|>": 467, + "<|audio:468|>": 468, + "<|audio:469|>": 469, + "<|audio:470|>": 470, + "<|audio:471|>": 471, + "<|audio:472|>": 472, + "<|audio:473|>": 473, + "<|audio:474|>": 474, + "<|audio:475|>": 475, + "<|audio:476|>": 476, + "<|audio:477|>": 477, + "<|audio:478|>": 478, + "<|audio:479|>": 479, + "<|audio:480|>": 480, + "<|audio:481|>": 481, + "<|audio:482|>": 482, + "<|audio:483|>": 483, + "<|audio:484|>": 484, + "<|audio:485|>": 485, + "<|audio:486|>": 486, + "<|audio:487|>": 487, + "<|audio:488|>": 488, + "<|audio:489|>": 489, + "<|audio:490|>": 490, + "<|audio:491|>": 491, + "<|audio:492|>": 492, + "<|audio:493|>": 493, + "<|audio:494|>": 494, + "<|audio:495|>": 495, + "<|audio:496|>": 496, + "<|audio:497|>": 497, + "<|audio:498|>": 498, + "<|audio:499|>": 499, + "<|audio:500|>": 500, + "<|audio:501|>": 501, + "<|audio:502|>": 502, + "<|audio:503|>": 503, + "<|audio:504|>": 504, + "<|audio:505|>": 505, + "<|audio:506|>": 506, + "<|audio:507|>": 507, + "<|audio:508|>": 508, + "<|audio:509|>": 509, + "<|audio:510|>": 510, + "<|audio:511|>": 511, + "<|audio:512|>": 512, + "<|audio:513|>": 513, + "<|audio:514|>": 514, + "<|audio:515|>": 515, + "<|audio:516|>": 516, + "<|audio:517|>": 517, + "<|audio:518|>": 518, + "<|audio:519|>": 519, + "<|audio:520|>": 520, + "<|audio:521|>": 521, + "<|audio:522|>": 522, + "<|audio:523|>": 523, + "<|audio:524|>": 524, + "<|audio:525|>": 525, + "<|audio:526|>": 526, + "<|audio:527|>": 527, + "<|audio:528|>": 528, + "<|audio:529|>": 529, + "<|audio:530|>": 530, + "<|audio:531|>": 531, + "<|audio:532|>": 532, + "<|audio:533|>": 533, + "<|audio:534|>": 534, + "<|audio:535|>": 535, + "<|audio:536|>": 536, + "<|audio:537|>": 537, + "<|audio:538|>": 538, + "<|audio:539|>": 539, + "<|audio:540|>": 540, + "<|audio:541|>": 541, + "<|audio:542|>": 542, + "<|audio:543|>": 543, + "<|audio:544|>": 544, + "<|audio:545|>": 545, + "<|audio:546|>": 546, + "<|audio:547|>": 547, + "<|audio:548|>": 548, + "<|audio:549|>": 549, + "<|audio:550|>": 550, + "<|audio:551|>": 551, + "<|audio:552|>": 552, + "<|audio:553|>": 553, + "<|audio:554|>": 554, + "<|audio:555|>": 555, + "<|audio:556|>": 556, + "<|audio:557|>": 557, + "<|audio:558|>": 558, + "<|audio:559|>": 559, + "<|audio:560|>": 560, + "<|audio:561|>": 561, + "<|audio:562|>": 562, + "<|audio:563|>": 563, + "<|audio:564|>": 564, + "<|audio:565|>": 565, + "<|audio:566|>": 566, + "<|audio:567|>": 567, + "<|audio:568|>": 568, + "<|audio:569|>": 569, + "<|audio:570|>": 570, + "<|audio:571|>": 571, + "<|audio:572|>": 572, + "<|audio:573|>": 573, + "<|audio:574|>": 574, + "<|audio:575|>": 575, + "<|audio:576|>": 576, + "<|audio:577|>": 577, + "<|audio:578|>": 578, + "<|audio:579|>": 579, + "<|audio:580|>": 580, + "<|audio:581|>": 581, + "<|audio:582|>": 582, + "<|audio:583|>": 583, + "<|audio:584|>": 584, + "<|audio:585|>": 585, + "<|audio:586|>": 586, + "<|audio:587|>": 587, + "<|audio:588|>": 588, + "<|audio:589|>": 589, + "<|audio:590|>": 590, + "<|audio:591|>": 591, + "<|audio:592|>": 592, + "<|audio:593|>": 593, + "<|audio:594|>": 594, + "<|audio:595|>": 595, + "<|audio:596|>": 596, + "<|audio:597|>": 597, + "<|audio:598|>": 598, + "<|audio:599|>": 599, + "<|audio:600|>": 600, + "<|audio:601|>": 601, + "<|audio:602|>": 602, + "<|audio:603|>": 603, + "<|audio:604|>": 604, + "<|audio:605|>": 605, + "<|audio:606|>": 606, + "<|audio:607|>": 607, + "<|audio:608|>": 608, + "<|audio:609|>": 609, + "<|audio:610|>": 610, + "<|audio:611|>": 611, + "<|audio:612|>": 612, + "<|audio:613|>": 613, + "<|audio:614|>": 614, + "<|audio:615|>": 615, + "<|audio:616|>": 616, + "<|audio:617|>": 617, + "<|audio:618|>": 618, + "<|audio:619|>": 619, + "<|audio:620|>": 620, + "<|audio:621|>": 621, + "<|audio:622|>": 622, + "<|audio:623|>": 623, + "<|audio:624|>": 624, + "<|audio:625|>": 625, + "<|audio:626|>": 626, + "<|audio:627|>": 627, + "<|audio:628|>": 628, + "<|audio:629|>": 629, + "<|audio:630|>": 630, + "<|audio:631|>": 631, + "<|audio:632|>": 632, + "<|audio:633|>": 633, + "<|audio:634|>": 634, + "<|audio:635|>": 635, + "<|audio:636|>": 636, + "<|audio:637|>": 637, + "<|audio:638|>": 638, + "<|audio:639|>": 639, + "<|audio:640|>": 640, + "<|audio:641|>": 641, + "<|audio:642|>": 642, + "<|audio:643|>": 643, + "<|audio:644|>": 644, + "<|audio:645|>": 645, + "<|audio:646|>": 646, + "<|audio:647|>": 647, + "<|audio:648|>": 648, + "<|audio:649|>": 649, + "<|audio:650|>": 650, + "<|audio:651|>": 651, + "<|audio:652|>": 652, + "<|audio:653|>": 653, + "<|audio:654|>": 654, + "<|audio:655|>": 655, + "<|audio:656|>": 656, + "<|audio:657|>": 657, + "<|audio:658|>": 658, + "<|audio:659|>": 659, + "<|audio:660|>": 660, + "<|audio:661|>": 661, + "<|audio:662|>": 662, + "<|audio:663|>": 663, + "<|audio:664|>": 664, + "<|audio:665|>": 665, + "<|audio:666|>": 666, + "<|audio:667|>": 667, + "<|audio:668|>": 668, + "<|audio:669|>": 669, + "<|audio:670|>": 670, + "<|audio:671|>": 671, + "<|audio:672|>": 672, + "<|audio:673|>": 673, + "<|audio:674|>": 674, + "<|audio:675|>": 675, + "<|audio:676|>": 676, + "<|audio:677|>": 677, + "<|audio:678|>": 678, + "<|audio:679|>": 679, + "<|audio:680|>": 680, + "<|audio:681|>": 681, + "<|audio:682|>": 682, + "<|audio:683|>": 683, + "<|audio:684|>": 684, + "<|audio:685|>": 685, + "<|audio:686|>": 686, + "<|audio:687|>": 687, + "<|audio:688|>": 688, + "<|audio:689|>": 689, + "<|audio:690|>": 690, + "<|audio:691|>": 691, + "<|audio:692|>": 692, + "<|audio:693|>": 693, + "<|audio:694|>": 694, + "<|audio:695|>": 695, + "<|audio:696|>": 696, + "<|audio:697|>": 697, + "<|audio:698|>": 698, + "<|audio:699|>": 699, + "<|audio:700|>": 700, + "<|audio:701|>": 701, + "<|audio:702|>": 702, + "<|audio:703|>": 703, + "<|audio:704|>": 704, + "<|audio:705|>": 705, + "<|audio:706|>": 706, + "<|audio:707|>": 707, + "<|audio:708|>": 708, + "<|audio:709|>": 709, + "<|audio:710|>": 710, + "<|audio:711|>": 711, + "<|audio:712|>": 712, + "<|audio:713|>": 713, + "<|audio:714|>": 714, + "<|audio:715|>": 715, + "<|audio:716|>": 716, + "<|audio:717|>": 717, + "<|audio:718|>": 718, + "<|audio:719|>": 719, + "<|audio:720|>": 720, + "<|audio:721|>": 721, + "<|audio:722|>": 722, + "<|audio:723|>": 723, + "<|audio:724|>": 724, + "<|audio:725|>": 725, + "<|audio:726|>": 726, + "<|audio:727|>": 727, + "<|audio:728|>": 728, + "<|audio:729|>": 729, + "<|audio:730|>": 730, + "<|audio:731|>": 731, + "<|audio:732|>": 732, + "<|audio:733|>": 733, + "<|audio:734|>": 734, + "<|audio:735|>": 735, + "<|audio:736|>": 736, + "<|audio:737|>": 737, + "<|audio:738|>": 738, + "<|audio:739|>": 739, + "<|audio:740|>": 740, + "<|audio:741|>": 741, + "<|audio:742|>": 742, + "<|audio:743|>": 743, + "<|audio:744|>": 744, + "<|audio:745|>": 745, + "<|audio:746|>": 746, + "<|audio:747|>": 747, + "<|audio:748|>": 748, + "<|audio:749|>": 749, + "<|audio:750|>": 750, + "<|audio:751|>": 751, + "<|audio:752|>": 752, + "<|audio:753|>": 753, + "<|audio:754|>": 754, + "<|audio:755|>": 755, + "<|audio:756|>": 756, + "<|audio:757|>": 757, + "<|audio:758|>": 758, + "<|audio:759|>": 759, + "<|audio:760|>": 760, + "<|audio:761|>": 761, + "<|audio:762|>": 762, + "<|audio:763|>": 763, + "<|audio:764|>": 764, + "<|audio:765|>": 765, + "<|audio:766|>": 766, + "<|audio:767|>": 767, + "<|audio:768|>": 768, + "<|audio:769|>": 769, + "<|audio:770|>": 770, + "<|audio:771|>": 771, + "<|audio:772|>": 772, + "<|audio:773|>": 773, + "<|audio:774|>": 774, + "<|audio:775|>": 775, + "<|audio:776|>": 776, + "<|audio:777|>": 777, + "<|audio:778|>": 778, + "<|audio:779|>": 779, + "<|audio:780|>": 780, + "<|audio:781|>": 781, + "<|audio:782|>": 782, + "<|audio:783|>": 783, + "<|audio:784|>": 784, + "<|audio:785|>": 785, + "<|audio:786|>": 786, + "<|audio:787|>": 787, + "<|audio:788|>": 788, + "<|audio:789|>": 789, + "<|audio:790|>": 790, + "<|audio:791|>": 791, + "<|audio:792|>": 792, + "<|audio:793|>": 793, + "<|audio:794|>": 794, + "<|audio:795|>": 795, + "<|audio:796|>": 796, + "<|audio:797|>": 797, + "<|audio:798|>": 798, + "<|audio:799|>": 799, + "<|audio:800|>": 800, + "<|audio:801|>": 801, + "<|audio:802|>": 802, + "<|audio:803|>": 803, + "<|audio:804|>": 804, + "<|audio:805|>": 805, + "<|audio:806|>": 806, + "<|audio:807|>": 807, + "<|audio:808|>": 808, + "<|audio:809|>": 809, + "<|audio:810|>": 810, + "<|audio:811|>": 811, + "<|audio:812|>": 812, + "<|audio:813|>": 813, + "<|audio:814|>": 814, + "<|audio:815|>": 815, + "<|audio:816|>": 816, + "<|audio:817|>": 817, + "<|audio:818|>": 818, + "<|audio:819|>": 819, + "<|audio:820|>": 820, + "<|audio:821|>": 821, + "<|audio:822|>": 822, + "<|audio:823|>": 823, + "<|audio:824|>": 824, + "<|audio:825|>": 825, + "<|audio:826|>": 826, + "<|audio:827|>": 827, + "<|audio:828|>": 828, + "<|audio:829|>": 829, + "<|audio:830|>": 830, + "<|audio:831|>": 831, + "<|audio:832|>": 832, + "<|audio:833|>": 833, + "<|audio:834|>": 834, + "<|audio:835|>": 835, + "<|audio:836|>": 836, + "<|audio:837|>": 837, + "<|audio:838|>": 838, + "<|audio:839|>": 839, + "<|audio:840|>": 840, + "<|audio:841|>": 841, + "<|audio:842|>": 842, + "<|audio:843|>": 843, + "<|audio:844|>": 844, + "<|audio:845|>": 845, + "<|audio:846|>": 846, + "<|audio:847|>": 847, + "<|audio:848|>": 848, + "<|audio:849|>": 849, + "<|audio:850|>": 850, + "<|audio:851|>": 851, + "<|audio:852|>": 852, + "<|audio:853|>": 853, + "<|audio:854|>": 854, + "<|audio:855|>": 855, + "<|audio:856|>": 856, + "<|audio:857|>": 857, + "<|audio:858|>": 858, + "<|audio:859|>": 859, + "<|audio:860|>": 860, + "<|audio:861|>": 861, + "<|audio:862|>": 862, + "<|audio:863|>": 863, + "<|audio:864|>": 864, + "<|audio:865|>": 865, + "<|audio:866|>": 866, + "<|audio:867|>": 867, + "<|audio:868|>": 868, + "<|audio:869|>": 869, + "<|audio:870|>": 870, + "<|audio:871|>": 871, + "<|audio:872|>": 872, + "<|audio:873|>": 873, + "<|audio:874|>": 874, + "<|audio:875|>": 875, + "<|audio:876|>": 876, + "<|audio:877|>": 877, + "<|audio:878|>": 878, + "<|audio:879|>": 879, + "<|audio:880|>": 880, + "<|audio:881|>": 881, + "<|audio:882|>": 882, + "<|audio:883|>": 883, + "<|audio:884|>": 884, + "<|audio:885|>": 885, + "<|audio:886|>": 886, + "<|audio:887|>": 887, + "<|audio:888|>": 888, + "<|audio:889|>": 889, + "<|audio:890|>": 890, + "<|audio:891|>": 891, + "<|audio:892|>": 892, + "<|audio:893|>": 893, + "<|audio:894|>": 894, + "<|audio:895|>": 895, + "<|audio:896|>": 896, + "<|audio:897|>": 897, + "<|audio:898|>": 898, + "<|audio:899|>": 899, + "<|audio:900|>": 900, + "<|audio:901|>": 901, + "<|audio:902|>": 902, + "<|audio:903|>": 903, + "<|audio:904|>": 904, + "<|audio:905|>": 905, + "<|audio:906|>": 906, + "<|audio:907|>": 907, + "<|audio:908|>": 908, + "<|audio:909|>": 909, + "<|audio:910|>": 910, + "<|audio:911|>": 911, + "<|audio:912|>": 912, + "<|audio:913|>": 913, + "<|audio:914|>": 914, + "<|audio:915|>": 915, + "<|audio:916|>": 916, + "<|audio:917|>": 917, + "<|audio:918|>": 918, + "<|audio:919|>": 919, + "<|audio:920|>": 920, + "<|audio:921|>": 921, + "<|audio:922|>": 922, + "<|audio:923|>": 923, + "<|audio:924|>": 924, + "<|audio:925|>": 925, + "<|audio:926|>": 926, + "<|audio:927|>": 927, + "<|audio:928|>": 928, + "<|audio:929|>": 929, + "<|audio:930|>": 930, + "<|audio:931|>": 931, + "<|audio:932|>": 932, + "<|audio:933|>": 933, + "<|audio:934|>": 934, + "<|audio:935|>": 935, + "<|audio:936|>": 936, + "<|audio:937|>": 937, + "<|audio:938|>": 938, + "<|audio:939|>": 939, + "<|audio:940|>": 940, + "<|audio:941|>": 941, + "<|audio:942|>": 942, + "<|audio:943|>": 943, + "<|audio:944|>": 944, + "<|audio:945|>": 945, + "<|audio:946|>": 946, + "<|audio:947|>": 947, + "<|audio:948|>": 948, + "<|audio:949|>": 949, + "<|audio:950|>": 950, + "<|audio:951|>": 951, + "<|audio:952|>": 952, + "<|audio:953|>": 953, + "<|audio:954|>": 954, + "<|audio:955|>": 955, + "<|audio:956|>": 956, + "<|audio:957|>": 957, + "<|audio:958|>": 958, + "<|audio:959|>": 959, + "<|audio:960|>": 960, + "<|audio:961|>": 961, + "<|audio:962|>": 962, + "<|audio:963|>": 963, + "<|audio:964|>": 964, + "<|audio:965|>": 965, + "<|audio:966|>": 966, + "<|audio:967|>": 967, + "<|audio:968|>": 968, + "<|audio:969|>": 969, + "<|audio:970|>": 970, + "<|audio:971|>": 971, + "<|audio:972|>": 972, + "<|audio:973|>": 973, + "<|audio:974|>": 974, + "<|audio:975|>": 975, + "<|audio:976|>": 976, + "<|audio:977|>": 977, + "<|audio:978|>": 978, + "<|audio:979|>": 979, + "<|audio:980|>": 980, + "<|audio:981|>": 981, + "<|audio:982|>": 982, + "<|audio:983|>": 983, + "<|audio:984|>": 984, + "<|audio:985|>": 985, + "<|audio:986|>": 986, + "<|audio:987|>": 987, + "<|audio:988|>": 988, + "<|audio:989|>": 989, + "<|audio:990|>": 990, + "<|audio:991|>": 991, + "<|audio:992|>": 992, + "<|audio:993|>": 993, + "<|audio:994|>": 994, + "<|audio:995|>": 995, + "<|audio:996|>": 996, + "<|audio:997|>": 997, + "<|audio:998|>": 998, + "<|audio:999|>": 999, + "<|audio:1000|>": 1000, + "<|audio:1001|>": 1001, + "<|audio:1002|>": 1002, + "<|audio:1003|>": 1003, + "<|audio:1004|>": 1004, + "<|audio:1005|>": 1005, + "<|audio:1006|>": 1006, + "<|audio:1007|>": 1007, + "<|audio:1008|>": 1008, + "<|audio:1009|>": 1009, + "<|audio:1010|>": 1010, + "<|audio:1011|>": 1011, + "<|audio:1012|>": 1012, + "<|audio:1013|>": 1013, + "<|audio:1014|>": 1014, + "<|audio:1015|>": 1015, + "<|audio:1016|>": 1016, + "<|audio:1017|>": 1017, + "<|audio:1018|>": 1018, + "<|audio:1019|>": 1019, + "<|audio:1020|>": 1020, + "<|audio:1021|>": 1021, + "<|audio:1022|>": 1022, + "<|audio:1023|>": 1023, + "<|startoftranscript|>": 1024, + "<|endoftranscript|>": 1025, + "<|padding|>": 1026, + "'": 1027, + "a": 1028, + "b": 1029, + "c": 1030, + "d": 1031, + "e": 1032, + "f": 1033, + "g": 1034, + "h": 1035, + "i": 1036, + "j": 1037, + "k": 1038, + "l": 1039, + "m": 1040, + "n": 1041, + "o": 1042, + "p": 1043, + "q": 1044, + "r": 1045, + "s": 1046, + "t": 1047, + "u": 1048, + "v": 1049, + "w": 1050, + "x": 1051, + "y": 1052, + "z": 1053, + "▁": 1054, + "▁t": 1055, + "he": 1056, + "▁a": 1057, + "▁the": 1058, + "in": 1059, + "▁s": 1060, + "▁w": 1061, + "▁o": 1062, + "re": 1063, + "nd": 1064, + "▁b": 1065, + "▁h": 1066, + "er": 1067, + "▁m": 1068, + "▁i": 1069, + "ou": 1070, + "▁c": 1071, + "▁f": 1072, + "at": 1073, + "ed": 1074, + "▁and": 1075, + "en": 1076, + "▁to": 1077, + "▁of": 1078, + "on": 1079, + "is": 1080, + "▁d": 1081, + "ing": 1082, + "▁th": 1083, + "▁p": 1084, + "▁he": 1085, + "or": 1086, + "▁l": 1087, + "es": 1088, + "▁in": 1089, + "ll": 1090, + "it": 1091, + "ar": 1092, + "as": 1093, + "an": 1094, + "▁n": 1095, + "▁g": 1096, + "om": 1097, + "▁be": 1098, + "▁ha": 1099, + "▁e": 1100, + "le": 1101, + "ot": 1102, + "▁y": 1103, + "ut": 1104, + "ow": 1105, + "ic": 1106, + "▁wh": 1107, + "▁it": 1108, + "ld": 1109, + "ve": 1110, + "▁that": 1111, + "ly": 1112, + "▁was": 1113, + "id": 1114, + "se": 1115, + "st": 1116, + "▁on": 1117, + "gh": 1118, + "ent": 1119, + "▁re": 1120, + "▁you": 1121, + "im": 1122, + "ce": 1123, + "▁u": 1124, + "ver": 1125, + "ion": 1126, + "▁as": 1127, + "et": 1128, + "▁for": 1129, + "ay": 1130, + "▁his": 1131, + "▁we": 1132, + "ith": 1133, + "al": 1134, + "ir": 1135, + "▁r": 1136, + "▁with": 1137, + "▁st": 1138, + "ad": 1139, + "ur": 1140, + "ght": 1141, + "▁an": 1142, + "▁her": 1143, + "▁not": 1144, + "▁is": 1145, + "▁had": 1146, + "ter": 1147, + "her": 1148, + "ac": 1149, + "am": 1150, + "▁at": 1151, + "oo": 1152, + "▁but": 1153, + "ould": 1154, + "▁she": 1155, + "▁k": 1156, + "▁se": 1157, + "▁sa": 1158, + "▁sh": 1159, + "▁fr": 1160, + "▁him": 1161, + "▁so": 1162, + "▁me": 1163, + "ill": 1164, + "ain": 1165, + "▁su": 1166, + "ight": 1167, + "ch": 1168, + "red": 1169, + "ct": 1170, + "all": 1171, + "ro": 1172, + "ke": 1173, + "ess": 1174, + "il": 1175, + "'s": 1176, + "ore": 1177, + "▁de": 1178, + "▁my": 1179, + "▁they": 1180, + "▁whe": 1181, + "▁all": 1182, + "ich": 1183, + "▁ne": 1184, + "ri": 1185, + "▁by": 1186, + "▁have": 1187, + "ome": 1188, + "pp": 1189, + "▁this": 1190, + "▁li": 1191, + "▁do": 1192, + "▁con": 1193, + "us": 1194, + "▁which": 1195, + "▁ch": 1196, + "ul": 1197, + "qu": 1198, + "▁j": 1199, + "▁up": 1200, + "▁said": 1201, + "▁from": 1202, + "ard": 1203, + "ge": 1204, + "▁or": 1205, + "▁v": 1206, + "▁one": 1207, + "▁no": 1208, + "th": 1209, + "▁ex": 1210, + "▁were": 1211, + "▁there": 1212, + "pe": 1213, + "and": 1214, + "est": 1215, + "▁man": 1216, + "▁who": 1217, + "ble": 1218, + "ie": 1219, + "▁al": 1220, + "ant": 1221, + "res": 1222, + "ous": 1223, + "ust": 1224, + "very": 1225, + "ation": 1226, + "▁fe": 1227, + "▁them": 1228, + "lf": 1229, + "▁when": 1230, + "nt": 1231, + "ame": 1232, + "ind": 1233, + "ra": 1234, + "▁go": 1235, + "ers": 1236, + "ast": 1237, + "fe": 1238, + "ood": 1239, + "▁kn": 1240, + "▁int": 1241, + "ist": 1242, + "▁are": 1243, + "art": 1244, + "out": 1245, + "▁would": 1246, + "▁le": 1247, + "▁what": 1248, + "os": 1249, + "▁their": 1250, + "ong": 1251, + "our": 1252, + "▁if": 1253, + "▁com": 1254, + "ound": 1255, + "▁ab": 1256, + "▁out": 1257, + "▁wor": 1258, + "em": 1259, + "▁will": 1260, + "ak": 1261, + "▁mis": 1262, + "ate": 1263, + "ol": 1264, + "um": 1265, + "un": 1266, + "itt": 1267, + "ough": 1268, + "ked": 1269, + "ig": 1270, + "ap": 1271, + "one": 1272, + "▁been": 1273, + "own": 1274, + "ive": 1275, + "▁then": 1276, + "▁br": 1277, + "ven": 1278, + "if": 1279, + "▁ar": 1280, + "'t": 1281, + "self": 1282, + "▁tr": 1283, + "▁pl": 1284, + "▁ro": 1285, + "▁pr": 1286, + "ther": 1287, + "reat": 1288, + "▁un": 1289, + "▁af": 1290, + "▁sp": 1291, + "▁qu": 1292, + "▁pro": 1293, + "ity": 1294, + "hed": 1295, + "▁tw": 1296, + "▁ag": 1297, + "▁could": 1298, + "ost": 1299, + "ace": 1300, + "ort": 1301, + "ure": 1302, + "ake": 1303, + "▁am": 1304, + "ack": 1305, + "▁any": 1306, + "▁some": 1307, + "▁your": 1308, + "▁more": 1309, + "▁can": 1310, + "au": 1311, + "▁tim": 1312, + "ep": 1313, + "ag": 1314, + "▁en": 1315, + "ck": 1316, + "▁into": 1317, + "▁cl": 1318, + "ry": 1319, + "▁now": 1320, + "hing": 1321, + "nder": 1322, + "are": 1323, + "▁very": 1324, + "▁gr": 1325, + "el": 1326, + "ose": 1327, + "▁loo": 1328, + "▁bo": 1329, + "ved": 1330, + "op": 1331, + "▁other": 1332, + "▁did": 1333, + "ance": 1334, + "▁than": 1335, + "ittle": 1336, + "▁little": 1337, + "ine": 1338, + "ies": 1339, + "way": 1340, + "ite": 1341, + "▁like": 1342, + "ide": 1343, + "▁lo": 1344, + "ass": 1345, + "▁bl": 1346, + "able": 1347, + "urn": 1348, + "ought": 1349, + "▁know": 1350, + "other": 1351, + "▁time": 1352, + "▁im": 1353, + "▁dis": 1354, + "▁us": 1355, + "▁co": 1356, + "fore": 1357, + "▁how": 1358, + "▁te": 1359, + "ence": 1360, + "▁day": 1361, + "▁ad": 1362, + "ade": 1363, + "ice": 1364, + "▁about": 1365, + "▁see": 1366, + "▁over": 1367, + "pt": 1368, + "cc": 1369, + "▁too": 1370, + "ink": 1371, + "▁fl": 1372, + "wn": 1373, + "▁great": 1374, + "▁after": 1375, + "pl": 1376, + "de": 1377, + "▁per": 1378, + "ment": 1379, + "▁again": 1380, + "▁upon": 1381, + "▁hand": 1382, + "ab": 1383, + "▁has": 1384, + "ree": 1385, + "ish": 1386, + "ci": 1387, + "▁only": 1388, + "ally": 1389, + "▁well": 1390, + "▁should": 1391, + "▁po": 1392, + "▁mar": 1393, + "ress": 1394, + "▁say": 1395, + "▁good": 1396, + "ather": 1397, + "▁two": 1398, + "ings": 1399, + "▁pe": 1400, + "ount": 1401, + "▁our": 1402, + "ire": 1403, + "ving": 1404, + "▁down": 1405, + "ars": 1406, + "ert": 1407, + "we": 1408, + "▁before": 1409, + "ile": 1410, + "ves": 1411, + "▁app": 1412, + "▁every": 1413, + "▁its": 1414, + "▁old": 1415, + "▁thr": 1416, + "▁mu": 1417, + "▁made": 1418, + "ied": 1419, + "ick": 1420, + "▁long": 1421, + "age": 1422, + "te": 1423, + "ft": 1424, + "▁where": 1425, + "ang": 1426, + "▁never": 1427, + "▁must": 1428, + "▁pre": 1429, + "▁sm": 1430, + "ful": 1431, + "▁such": 1432, + "ull": 1433, + "▁str": 1434, + "ions": 1435, + "▁off": 1436, + "▁sc": 1437, + "▁came": 1438, + "ious": 1439, + "ue": 1440, + "▁miss": 1441, + "ward": 1442, + "ild": 1443, + "▁fir": 1444, + "▁even": 1445, + "▁under": 1446, + "act": 1447, + "▁these": 1448, + "▁come": 1449, + "▁part": 1450, + "▁fo": 1451, + "ated": 1452, + "ness": 1453, + "▁rem": 1454, + "ord": 1455, + "▁bec": 1456, + "ty": 1457, + "▁may": 1458, + "▁much": 1459, + "▁think": 1460, + "per": 1461, + "▁way": 1462, + "▁mister": 1463, + "led": 1464, + "▁let": 1465, + "orn": 1466, + "▁ey": 1467, + "▁gl": 1468, + "▁cont": 1469, + "▁thought": 1470, + "▁look": 1471, + "ect": 1472, + "▁spe": 1473, + "ise": 1474, + "▁back": 1475, + "▁bet": 1476, + "ady": 1477, + "▁ye": 1478, + "ans": 1479, + "ach": 1480, + "▁here": 1481, + "▁just": 1482, + "ren": 1483, + "▁first": 1484, + "▁ho": 1485, + "▁own": 1486, + "▁des": 1487, + "▁ob": 1488, + "ried": 1489, + "ud": 1490, + "ary": 1491, + "▁went": 1492, + "▁mo": 1493, + "▁himself": 1494, + "▁men": 1495, + "air": 1496, + "cl": 1497, + "ave": 1498, + "ath": 1499, + "ff": 1500, + "▁sl": 1501, + "co": 1502, + "on't": 1503, + "llow": 1504, + "▁cr": 1505, + "▁res": 1506, + "▁i'": 1507, + "▁might": 1508, + "ily": 1509, + "▁seem": 1510, + "int": 1511, + "ip": 1512, + "▁beg": 1513, + "ouse": 1514, + "anc": 1515, + "n't": 1516, + "▁wat": 1517, + "▁through": 1518, + "▁comp": 1519, + "ber": 1520, + "▁away": 1521, + "▁car": 1522, + "▁em": 1523, + "▁get": 1524, + "▁imp": 1525, + "▁head": 1526, + "oss": 1527, + "▁life": 1528, + "▁bel": 1529, + "▁without": 1530, + "▁most": 1531, + "▁pass": 1532, + "▁make": 1533, + "▁cons": 1534, + "ened": 1535, + "▁som": 1536, + "▁turn": 1537, + "av": 1538, + "ng": 1539, + "▁shall": 1540, + "▁acc": 1541, + "▁those": 1542, + "▁pres": 1543, + "▁eyes": 1544, + "▁house": 1545, + "iz": 1546, + "▁somet": 1547, + "▁jo": 1548, + "▁still": 1549, + "▁call": 1550, + "▁night": 1551, + "hes": 1552, + "▁op": 1553, + "ause": 1554, + "▁wom": 1555, + "▁last": 1556, + "ks": 1557, + "less": 1558, + "ared": 1559, + "▁comm": 1560, + "▁don't": 1561, + "▁tell": 1562, + "▁ent": 1563, + "▁nothing": 1564, + "▁new": 1565, + "ign": 1566, + "▁take": 1567, + "▁being": 1568, + "▁many": 1569, + "▁word": 1570, + "ons": 1571, + "▁found": 1572, + "▁ret": 1573, + "ase": 1574, + "▁ear": 1575, + "▁while": 1576, + "▁att": 1577, + "ory": 1578, + "ix": 1579, + "▁ser": 1580, + "▁saw": 1581, + "▁put": 1582, + "ne": 1583, + "oth": 1584, + "iend": 1585, + "▁peop": 1586, + "▁wr": 1587, + "▁young": 1588, + "ark": 1589, + "dy": 1590, + "aking": 1591, + "les": 1592, + "▁count": 1593, + "▁once": 1594, + "▁friend": 1595, + "▁la": 1596, + "ens": 1597, + "▁people": 1598, + "pect": 1599, + "ors": 1600, + "fect": 1601, + "▁mat": 1602, + "ince": 1603, + "ible": 1604, + "ered": 1605, + "▁room": 1606, + "▁three": 1607, + "▁yet": 1608, + "ail": 1609, + "▁same": 1610, + "▁father": 1611, + "▁right": 1612, + "▁child": 1613, + "▁cour": 1614, + "igh": 1615, + "▁place": 1616, + "▁another": 1617, + "ult": 1618, + "iv": 1619, + "ition": 1620, + "▁ind": 1621, + "▁want": 1622, + "▁though": 1623, + "▁nor": 1624, + "▁far": 1625, + "▁king": 1626, + "▁happ": 1627, + "▁heart": 1628, + "▁face": 1629, + "▁end": 1630, + "▁ever": 1631, + "▁nat": 1632, + "thing": 1633, + "▁love": 1634, + "get": 1635, + "▁took": 1636, + "▁dist": 1637, + "ever": 1638, + "ian": 1639, + "▁hu": 1640, + "ew": 1641, + "▁arm": 1642, + "▁inst": 1643, + "man": 1644, + "▁work": 1645, + "▁light": 1646, + "▁char": 1647, + "▁ple": 1648, + "ict": 1649, + "▁set": 1650, + "▁ac": 1651, + "▁looked": 1652, + "▁missus": 1653, + "▁asked": 1654, + "▁mind": 1655, + "▁yes": 1656, + "▁supp": 1657, + "▁inte": 1658, + "▁rep": 1659, + "cess": 1660, + "ently": 1661, + "▁left": 1662, + "gg": 1663, + "ertain": 1664, + "▁ke": 1665, + "ished": 1666, + "ub": 1667, + "▁pers": 1668, + "ways": 1669, + "▁things": 1670, + "alk": 1671, + "irl": 1672, + "▁mom": 1673, + "▁sir": 1674, + "▁wa": 1675, + "▁moment": 1676, + "ations": 1677, + "▁sat": 1678, + "sel": 1679, + "▁find": 1680, + "ower": 1681, + "ia": 1682, + "vent": 1683, + "rew": 1684, + "▁world": 1685, + "ject": 1686, + "▁give": 1687, + "▁cap": 1688, + "▁why": 1689, + "so": 1690, + "▁gu": 1691, + "▁mother": 1692, + "▁gen": 1693, + "▁sw": 1694, + "▁always": 1695, + "der": 1696, + "lt": 1697, + "ling": 1698, + "▁ans": 1699, + "pped": 1700, + "▁soon": 1701, + "▁act": 1702, + "▁form": 1703, + "▁el": 1704, + "dd": 1705, + "▁heard": 1706, + "ret": 1707, + "▁thing": 1708, + "▁something": 1709, + "▁seemed": 1710, + "▁sub": 1711, + "▁door": 1712, + "ange": 1713, + "▁girl": 1714, + "ced": 1715, + "▁appe": 1716, + "ither": 1717, + "▁wind": 1718, + "▁because": 1719, + "▁dif": 1720, + "▁mon": 1721, + "ss": 1722, + "▁going": 1723, + "▁told": 1724, + "orm": 1725, + "▁home": 1726, + "ained": 1727, + "▁got": 1728, + "▁war": 1729, + "▁god": 1730, + "aught": 1731, + "▁gi": 1732, + "▁eng": 1733, + "▁sur": 1734, + "ning": 1735, + "▁hands": 1736, + "▁woman": 1737, + "▁follow": 1738, + "land": 1739, + "aut": 1740, + "▁vo": 1741, + "▁feel": 1742, + "▁rel": 1743, + "▁poss": 1744, + "ched": 1745, + "ical": 1746, + "ple": 1747, + "ph": 1748, + "▁boy": 1749, + "▁return": 1750, + "▁reg": 1751, + "▁rest": 1752, + "ook": 1753, + "▁knew": 1754, + "ner": 1755, + "▁each": 1756, + "▁oh": 1757, + "▁sil": 1758, + "▁kind": 1759, + "▁exp": 1760, + "▁ma": 1761, + "▁cle": 1762, + "▁hel": 1763, + "iver": 1764, + "ting": 1765, + "▁del": 1766, + "ual": 1767, + "▁inf": 1768, + "▁ass": 1769, + "▁water": 1770, + "▁conf": 1771, + "▁bre": 1772, + "▁wo": 1773, + "cept": 1774, + "▁belie": 1775, + "▁certain": 1776, + "▁against": 1777, + "▁hard": 1778, + "▁ph": 1779, + "row": 1780, + "▁unt": 1781, + "▁years": 1782, + "▁quite": 1783, + "▁side": 1784, + "iness": 1785, + "ined": 1786, + "▁near": 1787, + "▁hor": 1788, + "ters": 1789, + "ired": 1790, + "ool": 1791, + "▁four": 1792, + "▁few": 1793, + "▁done": 1794, + "ier": 1795, + "▁che": 1796, + "rest": 1797, + "ited": 1798, + "most": 1799, + "▁better": 1800, + "▁half": 1801, + "▁min": 1802, + "▁tre": 1803, + "ps": 1804, + "▁also": 1805, + "▁care": 1806, + "ock": 1807, + "uck": 1808, + "oub": 1809, + "▁began": 1810, + "ully": 1811, + "▁enough": 1812, + "ised": 1813, + "ru": 1814, + "▁having": 1815, + "▁seen": 1816, + "▁gener": 1817, + "▁lady": 1818, + "▁dra": 1819, + "▁hum": 1820, + "aps": 1821, + "ott": 1822, + "▁pur": 1823, + "aken": 1824, + "ross": 1825, + "ying": 1826, + "▁ter": 1827, + "▁hour": 1828, + "▁inde": 1829, + "ank": 1830, + "▁called": 1831, + "ial": 1832, + "ason": 1833, + "▁beh": 1834, + "▁does": 1835, + "▁whole": 1836, + "▁morn": 1837, + "▁turned": 1838, + "▁pleas": 1839, + "▁ste": 1840, + "▁ref": 1841, + "▁gave": 1842, + "ense": 1843, + "▁occ": 1844, + "ib": 1845, + "▁course": 1846, + "▁ins": 1847, + "ream": 1848, + "gether": 1849, + "uth": 1850, + "▁both": 1851, + "▁sou": 1852, + "▁cur": 1853, + "▁add": 1854, + "een": 1855, + "▁col": 1856, + "▁read": 1857, + "ween": 1858, + "selves": 1859, + "▁among": 1860, + "▁between": 1861, + "▁inc": 1862, + "▁keep": 1863, + "▁beaut": 1864, + "ular": 1865, + "▁poor": 1866, + "▁it's": 1867, + "▁sure": 1868, + "▁morning": 1869, + "▁white": 1870, + "ged": 1871, + "▁name": 1872, + "▁dear": 1873, + "▁toward": 1874, + "ute": 1875, + "▁small": 1876, + "▁whom": 1877, + "▁repl": 1878, + "▁sk": 1879, + "▁lar": 1880, + "▁felt": 1881, + "bo": 1882, + "osed": 1883, + "ating": 1884, + "▁myself": 1885, + "▁open": 1886, + "▁six": 1887, + "▁herself": 1888, + "▁however": 1889, + "▁bu": 1890, + "ond": 1891, + "aint": 1892, + "xt": 1893, + "▁fore": 1894, + "▁inter": 1895, + "▁ev": 1896, + "▁high": 1897, + "ction": 1898, + "▁hund": 1899, + "▁stood": 1900, + "▁hundred": 1901, + "aster": 1902, + "▁tra": 1903, + "▁show": 1904, + "▁sent": 1905, + "ife": 1906, + "▁round": 1907, + "▁sim": 1908, + "▁dr": 1909, + "▁gra": 1910, + "▁words": 1911, + "▁days": 1912, + "▁almost": 1913, + "ale": 1914, + "vel": 1915, + "▁point": 1916, + "ents": 1917, + "▁gre": 1918, + "▁eight": 1919, + "ces": 1920, + "ates": 1921, + "dden": 1922, + "▁fam": 1923, + "▁stand": 1924, + "▁bus": 1925, + "▁land": 1926, + "▁ed": 1927, + "▁mean": 1928, + "ung": 1929, + "haps": 1930, + "▁sun": 1931, + "ures": 1932, + "▁since": 1933, + "iet": 1934, + "ird": 1935, + "▁perhaps": 1936, + "ned": 1937, + "▁sle": 1938, + "iss": 1939, + "▁best": 1940, + "▁sudden": 1941, + "▁dark": 1942, + "▁replied": 1943, + "▁voice": 1944, + "▁met": 1945, + "▁anything": 1946, + "▁till": 1947, + "▁underst": 1948, + "▁bar": 1949, + "its": 1950, + "▁until": 1951, + "ins": 1952, + "oud": 1953, + "▁black": 1954, + "▁bro": 1955, + "▁hear": 1956, + "▁looking": 1957, + "▁cried": 1958, + "▁you'": 1959, + "▁fact": 1960, + "amp": 1961, + "▁prin": 1962, + "▁less": 1963, + "▁lay": 1964, + "▁next": 1965, + "▁law": 1966, + "up": 1967, + "▁power": 1968, + "▁prop": 1969, + "not": 1970, + "rent": 1971, + "▁brought": 1972, + "ately": 1973, + "enty": 1974, + "▁country": 1975, + "▁help": 1976, + "als": 1977, + "▁quest": 1978, + "med": 1979, + "▁use": 1980, + "▁vis": 1981, + "▁sn": 1982, + "▁i'm": 1983, + "fully": 1984, + "▁spo": 1985, + "▁together": 1986, + "▁need": 1987, + "▁air": 1988, + "▁adv": 1989, + "▁person": 1990, + "▁indeed": 1991, + "▁contin": 1992, + "▁unc": 1993, + "oney": 1994, + "▁gent": 1995, + "▁present": 1996, + "▁aw": 1997, + "▁par": 1998, + "ows": 1999, + "ured": 2000, + "▁full": 2001, + "tain": 2002, + "▁run": 2003, + "▁rather": 2004, + "▁ide": 2005, + "▁cond": 2006, + "nded": 2007, + "▁lat": 2008, + "▁sy": 2009, + "be": 2010, + "du": 2011, + "▁har": 2012, + "▁feet": 2013, + "▁fin": 2014, + "eter": 2015, + "▁fall": 2016, + "cei": 2017, + "▁five": 2018, + "▁mil": 2019, + "▁bed": 2020, + "oc": 2021, + "▁doct": 2022, + "▁interest": 2023, + "ressed": 2024, + "▁matter": 2025, + "▁lord": 2026, + "▁gone": 2027, + "▁es": 2028, + "fort": 2029, + "▁death": 2030, + "▁wife": 2031, + "▁serv": 2032, + "▁pat": 2033, + "ering": 2034, + "oubt": 2035, + "▁adm": 2036, + "▁talk": 2037, + "▁taken": 2038, + "▁art": 2039, + "▁tri": 2040, + "▁others": 2041, + "▁hope": 2042, + "ash": 2043, + "az": 2044, + "▁ext": 2045, + "▁cannot": 2046, + "ief": 2047, + "▁speak": 2048, + "▁lau": 2049, + "▁themselves": 2050, + "▁along": 2051, + "▁dire": 2052, + "ove": 2053, + "mb": 2054, + "pr": 2055, + "▁bes": 2056, + "▁cou": 2057, + "▁mor": 2058, + "ten": 2059, + "▁gentle": 2060, + "uring": 2061, + "▁fire": 2062, + "▁large": 2063, + "▁pol": 2064, + "▁cat": 2065, + "▁swe": 2066, + "ention": 2067, + "vers": 2068, + "▁thus": 2069, + "app": 2070, + "▁sec": 2071, + "▁play": 2072, + "▁real": 2073, + "▁prom": 2074, + "ments": 2075, + "wered": 2076, + "ield": 2077, + "ains": 2078, + "ison": 2079, + "ached": 2080, + "▁thou": 2081, + "▁reason": 2082, + "▁thous": 2083, + "iting": 2084, + "▁brother": 2085, + "akes": 2086, + "▁thousand": 2087, + "ont": 2088, + "▁money": 2089, + "▁remem": 2090, + "▁dep": 2091, + "▁answered": 2092, + "▁true": 2093, + "▁children": 2094, + "▁behind": 2095, + "oy": 2096, + "▁sound": 2097, + "ants": 2098, + "ably": 2099, + "▁wood": 2100, + "used": 2101, + "▁dec": 2102, + "▁whose": 2103, + "od": 2104, + "▁ele": 2105, + "▁twenty": 2106, + "▁ra": 2107, + "itu": 2108, + "▁believe": 2109, + "▁wonder": 2110, + "ene": 2111, + "▁inv": 2112, + "▁hon": 2113, + "aring": 2114, + "sh": 2115, + "ued": 2116, + "▁suff": 2117, + "▁opp": 2118, + "▁doubt": 2119, + "▁rec": 2120, + "ton": 2121, + "▁hold": 2122, + "▁diffe": 2123, + "▁passed": 2124, + "▁cor": 2125, + "me": 2126, + "ided": 2127, + "ities": 2128, + "▁mer": 2129, + "▁sing": 2130, + "▁nature": 2131, + "▁alone": 2132, + "▁dead": 2133, + "▁pri": 2134, + "ken": 2135, + "lic": 2136, + "▁red": 2137, + "▁bur": 2138, + "aces": 2139, + "▁close": 2140, + "▁gold": 2141, + "▁start": 2142, + "▁hur": 2143, + "▁fur": 2144, + "og": 2145, + "ances": 2146, + "▁ask": 2147, + "▁doctor": 2148, + "▁son": 2149, + "▁ground": 2150, + "wer": 2151, + "ets": 2152, + "▁sea": 2153, + "▁strong": 2154, + "▁leave": 2155, + "▁compan": 2156, + "▁i'll": 2157, + "ery": 2158, + "cy": 2159, + "illed": 2160, + "ept": 2161, + "ides": 2162, + "tle": 2163, + "▁ce": 2164, + "▁obs": 2165, + "body": 2166, + "▁fell": 2167, + "▁sign": 2168, + "cond": 2169, + "▁mount": 2170, + "▁fair": 2171, + "▁given": 2172, + "▁therefore": 2173, + "ane": 2174, + "▁ir": 2175, + "▁deep": 2176, + "iful": 2177, + "fic": 2178, + "ys": 2179, + "▁often": 2180, + "▁body": 2181, + "unt": 2182, + "▁short": 2183, + "▁tem": 2184, + "▁fa": 2185, + "▁master": 2186, + "▁earth": 2187, + "▁pap": 2188, + "ceed": 2189, + "▁stre": 2190, + "▁second": 2191, + "▁fort": 2192, + "bed": 2193, + "gth": 2194, + "owed": 2195, + "▁horse": 2196, + "idd": 2197, + "▁mad": 2198, + "ually": 2199, + "▁pa": 2200, + "▁chr": 2201, + "▁order": 2202, + "▁ten": 2203, + "vered": 2204, + "▁const": 2205, + "▁wish": 2206, + "▁fif": 2207, + "▁eas": 2208, + "▁cir": 2209, + "▁dro": 2210, + "aim": 2211, + "hen": 2212, + "▁ca": 2213, + "▁really": 2214, + "read": 2215, + "ceived": 2216, + "▁ill": 2217, + "▁fear": 2218, + "osition": 2219, + "▁understand": 2220, + "▁spir": 2221, + "▁list": 2222, + "▁abs": 2223, + "▁spr": 2224, + "aced": 2225, + "▁question": 2226, + "anger": 2227, + "▁everything": 2228, + "aughter": 2229, + "▁aff": 2230, + "▁wall": 2231, + "▁coming": 2232, + "ching": 2233, + "ready": 2234, + "ider": 2235, + "▁above": 2236, + "▁prince": 2237, + "▁already": 2238, + "▁least": 2239, + "▁reco": 2240, + "▁expl": 2241, + "▁step": 2242, + "▁used": 2243, + "▁ru": 2244, + "▁itself": 2245, + "ister": 2246, + "▁necess": 2247, + "▁case": 2248, + "▁around": 2249, + "hn": 2250, + "▁soul": 2251, + "▁suddenly": 2252, + "ger": 2253, + "▁lad": 2254, + "▁evening": 2255, + "▁mag": 2256, + "▁general": 2257, + "▁num": 2258, + "imes": 2259, + "▁known": 2260, + "▁wal": 2261, + "▁quick": 2262, + "ized": 2263, + "▁mus": 2264, + "▁sch": 2265, + "▁captain": 2266, + "▁that's": 2267, + "ific": 2268, + "▁whether": 2269, + "▁lear": 2270, + "gn": 2271, + "▁within": 2272, + "men": 2273, + "▁live": 2274, + "vern": 2275, + "▁times": 2276, + "▁expect": 2277, + "▁state": 2278, + "▁friends": 2279, + "▁bring": 2280, + "▁sort": 2281, + "▁women": 2282, + "▁table": 2283, + "▁meet": 2284, + "▁john": 2285, + "▁circ": 2286, + "▁sum": 2287, + "▁returned": 2288, + "iled": 2289, + "▁dri": 2290, + "▁held": 2291, + "▁exc": 2292, + "▁big": 2293, + "▁says": 2294, + "▁perfect": 2295, + "▁lea": 2296, + "▁obser": 2297, + "▁else": 2298, + "▁during": 2299, + "ident": 2300, + "▁hus": 2301, + "ted": 2302, + "▁beautiful": 2303, + "▁clear": 2304, + "▁either": 2305, + "▁town": 2306, + "▁sight": 2307, + "▁lost": 2308, + "▁sleep": 2309, + "▁means": 2310, + "▁foot": 2311, + "▁cut": 2312, + "▁cal": 2313, + "▁kept": 2314, + "▁ran": 2315, + "ience": 2316, + "▁prof": 2317, + "tered": 2318, + "here": 2319, + "ety": 2320, + "▁fellow": 2321, + "▁can't": 2322, + "▁mist": 2323, + "▁past": 2324, + "▁dream": 2325, + "ages": 2326, + "▁became": 2327, + "▁pret": 2328, + "▁disc": 2329, + "▁bad": 2330, + "▁making": 2331, + "ution": 2332, + "▁object": 2333, + "▁towards": 2334, + "▁low": 2335, + "ught": 2336, + "▁dev": 2337, + "▁human": 2338, + "▁manner": 2339, + "▁strange": 2340, + "▁year": 2341, + "old": 2342, + "ient": 2343, + "ines": 2344, + "▁sever": 2345, + "mon": 2346, + "▁ann": 2347, + "airs": 2348, + "ches": 2349, + "▁city": 2350, + "▁sometimes": 2351, + "'d": 2352, + "▁rose": 2353, + "▁est": 2354, + "ility": 2355, + "▁walk": 2356, + "▁ready": 2357, + "▁pal": 2358, + "▁leg": 2359, + "▁road": 2360, + "ians": 2361, + "cious": 2362, + "▁corn": 2363, + "▁thy": 2364, + "▁cold": 2365, + "lly": 2366, + "iously": 2367, + "lish": 2368, + "▁stra": 2369, + "mer": 2370, + "▁bat": 2371, + "owing": 2372, + "iew": 2373, + "▁christ": 2374, + "▁squ": 2375, + "▁truth": 2376, + "cri": 2377, + "lled": 2378, + "▁thir": 2379, + "▁didn't": 2380, + "bert": 2381, + "▁soci": 2382, + "br": 2383, + "▁bit": 2384, + "▁subject": 2385, + "▁ship": 2386, + "▁mur": 2387, + "▁appro": 2388, + "▁pie": 2389, + "▁answer": 2390, + "▁free": 2391, + "▁business": 2392, + "▁ut": 2393, + "ape": 2394, + "▁appear": 2395, + "▁river": 2396, + "▁sto": 2397, + "▁cast": 2398, + "▁family": 2399, + "▁jud": 2400, + "▁excl": 2401, + "▁letter": 2402, + "ingly": 2403, + "rie": 2404, + "▁hair": 2405, + "ote": 2406, + "▁arms": 2407, + "▁become": 2408, + "ern": 2409, + "ouble": 2410, + "▁different": 2411, + "▁val": 2412, + "ffect": 2413, + "▁natur": 2414, + "▁possible": 2415, + "▁several": 2416, + "▁fine": 2417, + "ah": 2418, + "▁lead": 2419, + "▁forg": 2420, + "▁express": 2421, + "li": 2422, + "▁sus": 2423, + "▁glad": 2424, + "oon": 2425, + "▁arri": 2426, + "▁blood": 2427, + "itting": 2428, + "▁quiet": 2429, + "rence": 2430, + "▁idea": 2431, + "▁able": 2432, + "itted": 2433, + "ster": 2434, + "▁charac": 2435, + "▁begin": 2436, + "▁chur": 2437, + "▁tou": 2438, + "▁story": 2439, + "▁eye": 2440, + "band": 2441, + "ative": 2442, + "▁grand": 2443, + "▁consider": 2444, + "▁across": 2445, + "▁pen": 2446, + "▁except": 2447, + "▁fre": 2448, + "▁win": 2449, + "▁equ": 2450, + "eth": 2451, + "▁cent": 2452, + "isf": 2453, + "▁partic": 2454, + "▁diffic": 2455, + "▁window": 2456, + "▁surpr": 2457, + "llect": 2458, + "▁prov": 2459, + "▁direct": 2460, + "▁conc": 2461, + "ey": 2462, + "aw": 2463, + "▁govern": 2464, + "▁disco": 2465, + "▁wild": 2466, + "▁dog": 2467, + "▁flo": 2468, + "▁soft": 2469, + "teen": 2470, + "▁cross": 2471, + "ased": 2472, + "▁effect": 2473, + "▁sor": 2474, + "▁longer": 2475, + "▁hen": 2476, + "▁followed": 2477, + "▁sold": 2478, + "▁thee": 2479, + "▁pub": 2480, + "▁husband": 2481, + "ards": 2482, + "antly": 2483, + "by": 2484, + "▁ap": 2485, + "▁suppose": 2486, + "▁respect": 2487, + "ts": 2488, + "▁hast": 2489, + "▁sal": 2490, + "▁comple": 2491, + "▁heav": 2492, + "▁happy": 2493, + "▁rich": 2494, + "▁creat": 2495, + "une": 2496, + "▁taking": 2497, + "▁requ": 2498, + "▁stay": 2499, + "▁spoke": 2500, + "▁daughter": 2501, + "▁wee": 2502, + "▁ve": 2503, + "▁du": 2504, + "▁green": 2505, + "▁anim": 2506, + "▁din": 2507, + "'ll": 2508, + "▁bird": 2509, + "alth": 2510, + "▁mere": 2511, + "▁gard": 2512, + "ny": 2513, + "ley": 2514, + "▁possess": 2515, + "empt": 2516, + "▁reached": 2517, + "▁appeared": 2518, + "ov": 2519, + "▁exist": 2520, + "ination": 2521, + "▁pretty": 2522, + "▁remember": 2523, + "▁hea": 2524, + "▁opened": 2525, + "▁tom": 2526, + "anged": 2527, + "▁slow": 2528, + "▁imag": 2529, + "▁i've": 2530, + "ract": 2531, + "▁saying": 2532, + "king": 2533, + "utes": 2534, + "▁common": 2535, + "▁occas": 2536, + "▁book": 2537, + "▁rus": 2538, + "ames": 2539, + "ices": 2540, + "▁bright": 2541, + "ms": 2542, + "▁satisf": 2543, + "▁sense": 2544, + "▁fav": 2545, + "▁succ": 2546, + "ump": 2547, + "ising": 2548, + "▁lu": 2549, + "▁accord": 2550, + "tern": 2551, + "▁break": 2552, + "▁exper": 2553, + "▁month": 2554, + "use": 2555, + "▁dem": 2556, + "▁scar": 2557, + "▁continued": 2558, + "▁secret": 2559, + "▁church": 2560, + "▁tree": 2561, + "▁stri": 2562, + "▁carried": 2563, + "▁cry": 2564, + "nding": 2565, + "▁spirit": 2566, + "▁wanted": 2567, + "eric": 2568, + "▁certainly": 2569, + "▁command": 2570, + "▁dest": 2571, + "▁move": 2572, + "oun": 2573, + "▁sweet": 2574, + "▁street": 2575, + "▁ought": 2576, + "▁account": 2577, + "▁def": 2578, + "ham": 2579, + "▁prep": 2580, + "▁sens": 2581, + "▁esc": 2582, + "▁rock": 2583, + "ots": 2584, + "▁decl": 2585, + "▁purp": 2586, + "riage": 2587, + "outh": 2588, + "owers": 2589, + "▁draw": 2590, + "▁eat": 2591, + "▁breat": 2592, + "▁character": 2593, + "ime": 2594, + "cul": 2595, + "medi": 2596, + "▁stud": 2597, + "▁school": 2598, + "itude": 2599, + "▁heaven": 2600, + "▁feeling": 2601, + "▁sad": 2602, + "▁regard": 2603, + "ement": 2604, + "▁pain": 2605, + "▁worth": 2606, + "▁bra": 2607, + "ney": 2608, + "▁dut": 2609, + "▁smo": 2610, + "aimed": 2611, + "▁trans": 2612, + "▁delight": 2613, + "▁quar": 2614, + "▁hung": 2615, + "▁mot": 2616, + "▁blue": 2617, + "▁hot": 2618, + "▁hill": 2619, + "▁div": 2620, + "umb": 2621, + "▁disapp": 2622, + "▁marg": 2623, + "▁laugh": 2624, + "idence": 2625, + "▁produ": 2626, + "▁success": 2627, + "ury": 2628, + "son": 2629, + "▁fast": 2630, + "▁english": 2631, + "▁dress": 2632, + "▁hat": 2633, + "▁terri": 2634, + "▁port": 2635, + "▁neither": 2636, + "▁court": 2637, + "▁seven": 2638, + "▁fight": 2639, + "▁princess": 2640, + "▁lived": 2641, + "▁view": 2642, + "▁immedi": 2643, + "▁self": 2644, + "▁var": 2645, + "▁hours": 2646, + "▁mill": 2647, + "▁sol": 2648, + "▁exam": 2649, + "▁tried": 2650, + "▁won't": 2651, + "▁entered": 2652, + "▁disp": 2653, + "to": 2654, + "ric": 2655, + "▁carry": 2656, + "▁import": 2657, + "▁ang": 2658, + "ze": 2659, + "ony": 2660, + "▁danger": 2661, + "ledge": 2662, + "▁offic": 2663, + "▁cause": 2664, + "▁none": 2665, + "▁forward": 2666, + "▁uncle": 2667, + "▁tor": 2668, + "▁det": 2669, + "ask": 2670, + "▁len": 2671, + "▁further": 2672, + "▁pay": 2673, + "▁added": 2674, + "▁front": 2675, + "ror": 2676, + "▁ge": 2677, + "▁particular": 2678, + "▁deal": 2679, + "▁prot": 2680, + "▁led": 2681, + "▁acqu": 2682, + "▁pray": 2683, + "▁eff": 2684, + "▁happened": 2685, + "▁chief": 2686, + "lect": 2687, + "▁walked": 2688, + "▁later": 2689, + "▁joy": 2690, + "iar": 2691, + "day": 2692, + "▁ord": 2693, + "▁alth": 2694, + "▁comfort": 2695, + "▁prob": 2696, + "▁maj": 2697, + "▁affect": 2698, + "▁public": 2699, + "▁bene": 2700, + "ening": 2701, + "▁although": 2702, + "gr": 2703, + "▁sho": 2704, + "▁fig": 2705, + "resh": 2706, + "▁fail": 2707, + "uct": 2708, + "ug": 2709, + "ality": 2710, + "▁mem": 2711, + "▁seems": 2712, + "▁yourself": 2713, + "ship": 2714, + "ead": 2715, + "iam": 2716, + "▁number": 2717, + "side": 2718, + "▁ah": 2719, + "▁doing": 2720, + "▁living": 2721, + "arent": 2722, + "▁desp": 2723, + "ize": 2724, + "oof": 2725, + "▁field": 2726, + "▁received": 2727, + "▁shad": 2728, + "▁bey": 2729, + "▁beyond": 2730, + "▁phil": 2731, + "▁line": 2732, + "▁visit": 2733, + "inct": 2734, + "rig": 2735, + "▁party": 2736, + "▁garden": 2737, + "▁je": 2738, + "▁mouth": 2739, + "▁hall": 2740, + "▁queen": 2741, + "▁boat": 2742, + "▁bear": 2743, + "▁americ": 2744, + "ism": 2745, + "▁gentleman": 2746, + "▁vi": 2747, + "irt": 2748, + "uff": 2749, + "▁laid": 2750, + "raid": 2751, + "▁occasion": 2752, + "▁entire": 2753, + "▁age": 2754, + "▁sister": 2755, + "▁clot": 2756, + "▁repe": 2757, + "ously": 2758, + "▁prison": 2759, + "▁accom": 2760, + "▁whis": 2761, + "▁nearly": 2762, + "▁trees": 2763, + "iling": 2764, + "iff": 2765, + "▁eighteen": 2766, + "bit": 2767, + "wards": 2768, + "▁early": 2769, + "▁tal": 2770, + "▁lab": 2771, + "▁forth": 2772, + "ming": 2773, + "ones": 2774, + "▁med": 2775, + "▁try": 2776, + "▁da": 2777, + "ilt": 2778, + "anced": 2779, + "▁princi": 2780, + "▁enem": 2781, + "▁thinking": 2782, + "▁chance": 2783, + "where": 2784, + "▁cre": 2785, + "▁minutes": 2786, + "▁anx": 2787, + "▁mary": 2788, + "▁pict": 2789, + "▁wait": 2790, + "▁vill": 2791, + "▁stren": 2792, + "▁afraid": 2793, + "▁crow": 2794, + "▁smile": 2795, + "▁late": 2796, + "▁england": 2797, + "▁pleasure": 2798, + "▁aunt": 2799, + "▁news": 2800, + "▁wis": 2801, + "▁fle": 2802, + "▁seeing": 2803, + "▁super": 2804, + "▁faith": 2805, + "▁rob": 2806, + "iment": 2807, + "oint": 2808, + "▁bill": 2809, + "lling": 2810, + "▁neigh": 2811, + "▁trouble": 2812, + "▁silence": 2813, + "▁plain": 2814, + "▁there's": 2815, + "aret": 2816, + "pend": 2817, + "▁exclaimed": 2818, + "rench": 2819, + "gy": 2820, + "▁miles": 2821, + "ply": 2822, + "▁glass": 2823, + "▁drew": 2824, + "▁neighb": 2825, + "els": 2826, + "▁mine": 2827, + "▁pract": 2828, + "▁heavy": 2829, + "▁standing": 2830, + "▁sevent": 2831, + "▁shar": 2832, + "▁change": 2833, + "▁necessary": 2834, + "▁chap": 2835, + "▁purpose": 2836, + "▁inqu": 2837, + "▁natural": 2838, + "▁deter": 2839, + "icked": 2840, + "▁bott": 2841, + "▁hardly": 2842, + "▁bell": 2843, + "▁top": 2844, + "▁caught": 2845, + "fered": 2846, + "wh": 2847, + "ives": 2848, + "ounded": 2849, + "▁auth": 2850, + "▁circum": 2851, + "▁fing": 2852, + "▁stopped": 2853, + "uc": 2854, + "▁wit": 2855, + "ament": 2856, + "▁opin": 2857, + "▁av": 2858, + "▁priv": 2859, + "aining": 2860, + "▁instead": 2861, + "rupt": 2862, + "▁grew": 2863, + "▁loved": 2864, + "▁island": 2865, + "▁knight": 2866, + "▁ago": 2867, + "▁length": 2868, + "▁inn": 2869, + "▁peace": 2870, + "ls": 2871, + "inary": 2872, + "ior": 2873, + "ues": 2874, + "▁third": 2875, + "ush": 2876, + "▁beauty": 2877, + "▁hig": 2878, + "▁he's": 2879, + "the": 2880, + "form": 2881, + "head": 2882, + "ically": 2883, + "asp": 2884, + "ancy": 2885, + "▁determ": 2886, + "▁straight": 2887, + "▁cra": 2888, + "ining": 2889, + "pper": 2890, + "ler": 2891, + "▁infl": 2892, + "▁thor": 2893, + "▁convers": 2894, + "▁besides": 2895, + "▁position": 2896, + "▁thirty": 2897, + "▁den": 2898, + "rage": 2899, + "▁attention": 2900, + "ma": 2901, + "▁conv": 2902, + "ager": 2903, + "▁hist": 2904, + "ored": 2905, + "▁comes": 2906, + "aged": 2907, + "▁force": 2908, + "▁sitting": 2909, + "▁please": 2910, + "tend": 2911, + "iter": 2912, + "▁whatever": 2913, + "▁inform": 2914, + "▁hop": 2915, + "▁chair": 2916, + "▁build": 2917, + "▁bab": 2918, + "ustom": 2919, + "▁girls": 2920, + "▁rom": 2921, + "▁french": 2922, + "▁struck": 2923, + "▁pull": 2924, + "▁ast": 2925, + "▁lie": 2926, + "▁wrong": 2927, + "▁knowledge": 2928, + "▁grace": 2929, + "▁scarce": 2930, + "ghed": 2931, + "▁resol": 2932, + "▁watch": 2933, + "▁thoughts": 2934, + "▁rid": 2935, + "▁attempt": 2936, + "▁fifty": 2937, + "▁rap": 2938, + "▁box": 2939, + "hood": 2940, + "▁getting": 2941, + "▁ver": 2942, + "▁fat": 2943, + "▁company": 2944, + "▁arr": 2945, + "▁crowd": 2946, + "▁burn": 2947, + "▁slight": 2948, + "▁class": 2949, + "▁south": 2950, + "▁die": 2951, + "▁exact": 2952, + "▁drink": 2953, + "▁enj": 2954, + "▁thick": 2955, + "▁dinner": 2956, + "▁save": 2957, + "▁maid": 2958, + "▁plan": 2959, + "▁saint": 2960, + "▁immediately": 2961, + "iers": 2962, + "▁born": 2963, + "ius": 2964, + "▁rev": 2965, + "▁tears": 2966, + "ists": 2967, + "▁treat": 2968, + "usion": 2969, + "▁meant": 2970, + "▁boys": 2971, + "pping": 2972, + "▁slowly": 2973, + "▁incl": 2974, + "▁lim": 2975, + "▁died": 2976, + "iced": 2977, + "▁compl": 2978, + "▁fool": 2979, + "▁forest": 2980, + "▁sugg": 2981, + "▁post": 2982, + "▁accept": 2983, + "▁result": 2984, + "▁author": 2985, + "ndon": 2986, + "ceive": 2987, + "▁suggest": 2988, + "cient": 2989, + "▁stone": 2990, + "▁fright": 2991, + "▁paper": 2992, + "▁conse": 2993, + "▁jour": 2994, + "▁ty": 2995, + "▁enc": 2996, + "▁quickly": 2997, + "▁contr": 2998, + "▁youth": 2999, + "▁send": 3000, + "▁vict": 3001, + "ified": 3002, + "▁belong": 3003, + "▁warm": 3004, + "▁fix": 3005, + "▁imposs": 3006, + "▁beside": 3007, + "▁er": 3008, + "▁tone": 3009, + "▁camp": 3010, + "▁desire": 3011, + "▁bound": 3012, + "▁makes": 3013, + "▁margaret": 3014, + "▁north": 3015, + "▁brown": 3016, + "▁moon": 3017, + "▁lips": 3018, + "▁placed": 3019, + "val": 3020, + "▁circumst": 3021, + "▁food": 3022, + "▁filled": 3023, + "ics": 3024, + "ift": 3025, + "ann": 3026, + "▁london": 3027, + "▁distance": 3028, + "ging": 3029, + "▁strength": 3030, + "▁id": 3031, + "▁floor": 3032, + "▁forget": 3033, + "▁obl": 3034, + "▁mid": 3035, + "ries": 3036, + "itions": 3037, + "bs": 3038, + "▁spring": 3039, + "▁you're": 3040, + "▁viol": 3041, + "▁jack": 3042, + "▁pock": 3043, + "ooks": 3044, + "▁following": 3045, + "▁sac": 3046, + "▁remained": 3047, + "arch": 3048, + "▁grow": 3049, + "▁snow": 3050, + "▁government": 3051, + "▁ball": 3052, + "▁hors": 3053, + "▁nar": 3054, + "aded": 3055, + "▁broken": 3056, + "▁laughed": 3057, + "▁descri": 3058, + "▁safe": 3059, + "itten": 3060, + "ively": 3061, + "▁profess": 3062, + "▁o'": 3063, + "amed": 3064, + "▁depart": 3065, + "▁easy": 3066, + "oured": 3067, + "▁und": 3068, + "▁coun": 3069, + "▁thank": 3070, + "▁knows": 3071, + "▁waiting": 3072, + "dom": 3073, + "ats": 3074, + "▁ger": 3075, + "▁van": 3076, + "▁anne": 3077, + "▁horses": 3078, + "ugg": 3079, + "▁dread": 3080, + "▁une": 3081, + "ges": 3082, + "acy": 3083, + "▁proceed": 3084, + "▁gaz": 3085, + "▁shout": 3086, + "▁started": 3087, + "ented": 3088, + "▁complete": 3089, + "ope": 3090, + "▁gall": 3091, + "dered": 3092, + "▁wide": 3093, + "ires": 3094, + "▁neck": 3095, + "asure": 3096, + "isted": 3097, + "▁service": 3098, + "▁piece": 3099, + "cially": 3100, + "ences": 3101, + "▁sail": 3102, + "▁palace": 3103, + "erv": 3104, + "▁guard": 3105, + "▁doll": 3106, + "▁talking": 3107, + "▁man's": 3108, + "▁lift": 3109, + "▁grave": 3110, + "▁week": 3111, + "let": 3112, + "▁impossible": 3113, + "▁effort": 3114, + "▁imm": 3115, + "▁army": 3116, + "well": 3117, + "▁difficult": 3118, + "und": 3119, + "▁fresh": 3120, + "▁fun": 3121, + "reme": 3122, + "▁stop": 3123, + "▁mess": 3124, + "▁gar": 3125, + "▁deg": 3126, + "▁incre": 3127, + "▁corner": 3128, + "▁society": 3129, + "▁weak": 3130, + "▁shut": 3131, + "▁hy": 3132, + "▁proper": 3133, + "aching": 3134, + "▁cloud": 3135, + "iddle": 3136, + "ivid": 3137, + "▁demand": 3138, + "▁nine": 3139, + "▁sit": 3140, + "▁recogn": 3141, + "▁beat": 3142, + "uss": 3143, + "▁turning": 3144, + "▁sky": 3145, + "▁opinion": 3146, + "▁single": 3147, + "pic": 3148, + "▁fly": 3149, + "▁lang": 3150, + "▁mass": 3151, + "cell": 3152, + "▁outside": 3153, + "▁kiss": 3154, + "▁trust": 3155, + "▁occup": 3156, + "▁evil": 3157, + "▁below": 3158, + "▁appearance": 3159, + "uit": 3160, + "▁aftern": 3161, + "▁glo": 3162, + "▁gun": 3163, + "▁west": 3164, + "ency": 3165, + "par": 3166, + "▁showed": 3167, + "▁conversation": 3168, + "ises": 3169, + "▁conn": 3170, + "▁couldn't": 3171, + "▁running": 3172, + "▁mention": 3173, + "▁greater": 3174, + "▁music": 3175, + "▁breath": 3176, + "ases": 3177, + "▁nin": 3178, + "▁ant": 3179, + "arer": 3180, + "▁morrow": 3181, + "▁bank": 3182, + "▁espe": 3183, + "▁peter": 3184, + "ork": 3185, + "cial": 3186, + "▁presence": 3187, + "▁battle": 3188, + "▁winter": 3189, + "hered": 3190, + "▁probably": 3191, + "▁clothes": 3192, + "▁fash": 3193, + "▁mark": 3194, + "▁wished": 3195, + "vere": 3196, + "▁coll": 3197, + "▁emb": 3198, + "▁kne": 3199, + "▁married": 3200, + "▁arrived": 3201, + "▁pun": 3202, + "▁event": 3203, + "ushed": 3204, + "▁suffic": 3205, + "▁eager": 3206, + "▁former": 3207, + "▁giving": 3208, + "▁pop": 3209, + "▁sand": 3210, + "▁neg": 3211, + "▁usual": 3212, + "▁relig": 3213, + "▁simple": 3214, + "▁sym": 3215, + "itation": 3216, + "▁gro": 3217, + "ories": 3218, + "▁moved": 3219, + "▁months": 3220, + "▁speaking": 3221, + "▁pet": 3222, + "▁silent": 3223, + "▁cab": 3224, + "▁mountain": 3225, + "▁expression": 3226, + "gar": 3227, + "▁covered": 3228, + "▁hunt": 3229, + "▁afternoon": 3230, + "aped": 3231, + "▁occur": 3232, + "rief": 3233, + "▁states": 3234, + "▁z": 3235, + "str": 3236, + "▁loc": 3237, + "light": 3238, + "▁shore": 3239, + "che": 3240, + "▁easily": 3241, + "▁pale": 3242, + "unity": 3243, + "▁remark": 3244, + "▁phys": 3245, + "▁beginning": 3246, + "▁duty": 3247, + "▁chapter": 3248, + "▁influ": 3249, + "cho": 3250, + "▁concl": 3251, + "amb": 3252, + "▁instant": 3253, + "▁polit": 3254, + "zz": 3255, + "▁enjoy": 3256, + "▁sick": 3257, + "▁remain": 3258, + "uel": 3259, + "▁stream": 3260, + "▁figure": 3261, + "ald": 3262, + "▁tur": 3263, + "▁path": 3264, + "▁vol": 3265, + "▁minute": 3266, + "▁pleasant": 3267, + "▁scarcely": 3268, + "▁conscious": 3269, + "▁terrible": 3270, + "▁kill": 3271, + "▁raised": 3272, + "▁fashion": 3273, + "▁twel": 3274, + "yal": 3275, + "▁leaving": 3276, + "▁twelve": 3277, + "ature": 3278, + "▁fut": 3279, + "▁threw": 3280, + "▁star": 3281, + "▁flowers": 3282, + "olog": 3283, + "▁trying": 3284, + "rib": 3285, + "▁sword": 3286, + "▁tall": 3287, + "▁marry": 3288, + "▁ben": 3289, + "▁expected": 3290, + "▁according": 3291, + "▁forty": 3292, + "▁stick": 3293, + "inal": 3294, + "▁guess": 3295, + "▁silver": 3296, + "▁iron": 3297, + "▁oblig": 3298, + "▁office": 3299, + "▁rapid": 3300, + "▁ladies": 3301, + "▁especially": 3302, + "ipped": 3303, + "orted": 3304, + "▁bread": 3305, + "ech": 3306, + "▁tender": 3307, + "orth": 3308, + "▁learned": 3309, + "▁books": 3310, + "▁isn't": 3311, + "▁surprise": 3312, + "▁write": 3313, + "▁purs": 3314, + "pered": 3315, + "▁written": 3316, + "▁killed": 3317, + "▁consequ": 3318, + "▁exh": 3319, + "▁places": 3320, + "▁condition": 3321, + "▁direction": 3322, + "▁cho": 3323, + "ulty": 3324, + "jo": 3325, + "mit": 3326, + "▁entirely": 3327, + "tering": 3328, + "▁enter": 3329, + "▁action": 3330, + "wise": 3331, + "▁suc": 3332, + "ibly": 3333, + "▁happiness": 3334, + "▁decided": 3335, + "▁golden": 3336, + "▁langu": 3337, + "eness": 3338, + "▁note": 3339, + "▁unless": 3340, + "uous": 3341, + "▁fal": 3342, + "aled": 3343, + "▁you'll": 3344, + "▁wonderful": 3345, + "ounds": 3346, + "ume": 3347, + "'re": 3348, + "▁shook": 3349, + "er's": 3350, + "oop": 3351, + "onel": 3352, + "▁perfectly": 3353, + "▁geor": 3354, + "ndered": 3355, + "▁broad": 3356, + "atic": 3357, + "▁closed": 3358, + "a's": 3359, + "▁spot": 3360, + "tended": 3361, + "▁latter": 3362, + "▁steps": 3363, + "▁merely": 3364, + "▁history": 3365, + "fer": 3366, + "▁wise": 3367, + "ishing": 3368, + "osing": 3369, + "▁middle": 3370, + "idered": 3371, + "▁understood": 3372, + "▁enemy": 3373, + "▁sole": 3374, + "llig": 3375, + "▁jew": 3376, + "▁simply": 3377, + "gan": 3378, + "▁conduct": 3379, + "▁tast": 3380, + "▁board": 3381, + "▁sav": 3382, + "▁wouldn't": 3383, + "▁shot": 3384, + "▁reply": 3385, + "▁changed": 3386, + "mn": 3387, + "▁grass": 3388, + "▁finally": 3389, + "▁admir": 3390, + "ital": 3391, + "▁sharp": 3392, + "itch": 3393, + "▁fortune": 3394, + "▁summer": 3395, + "▁experience": 3396, + "▁succeed": 3397, + "gress": 3398, + "uted": 3399, + "▁orig": 3400, + "retched": 3401, + "▁journey": 3402, + "▁excell": 3403, + "▁observed": 3404, + "ax": 3405, + "▁afterwards": 3406, + "fast": 3407, + "sy": 3408, + "▁bow": 3409, + "▁flat": 3410, + "▁persons": 3411, + "▁lean": 3412, + "▁earn": 3413, + "▁broke": 3414, + "▁mir": 3415, + "▁fit": 3416, + "osp": 3417, + "▁marriage": 3418, + "▁repres": 3419, + "io": 3420, + "▁lying": 3421, + "unk": 3422, + "▁trave": 3423, + "▁situ": 3424, + "▁listen": 3425, + "▁acquaint": 3426, + "▁ring": 3427, + "cience": 3428, + "▁faint": 3429, + "olute": 3430, + "▁calm": 3431, + "bered": 3432, + "▁lives": 3433, + "▁escape": 3434, + "▁beneath": 3435, + "ouses": 3436, + "▁clim": 3437, + "▁bless": 3438, + "▁repeated": 3439, + "▁pocket": 3440, + "ests": 3441, + "▁tail": 3442, + "▁passion": 3443, + "▁dick": 3444, + "▁ven": 3445, + "oses": 3446, + "clock": 3447, + "▁mut": 3448, + "▁becom": 3449, + "▁oper": 3450, + "▁o'clock": 3451, + "▁fish": 3452, + "▁lou": 3453, + "semb": 3454, + "▁prev": 3455, + "▁allowed": 3456, + "▁famil": 3457, + "hel": 3458, + "▁gate": 3459, + "▁spite": 3460, + "ivers": 3461, + "▁health": 3462, + "ission": 3463, + "▁ign": 3464, + "▁reach": 3465, + "▁cand": 3466, + "▁rain": 3467, + "▁empl": 3468, + "▁ban": 3469, + "▁strugg": 3470, + "▁firm": 3471, + "▁bitter": 3472, + "▁sorry": 3473, + "bing": 3474, + "▁father's": 3475, + "▁temper": 3476, + "▁madame": 3477, + "ples": 3478, + "▁furn": 3479, + "▁future": 3480, + "umed": 3481, + "▁nice": 3482, + "▁separ": 3483, + "▁presently": 3484, + "▁circumstances": 3485, + "▁connect": 3486, + "iding": 3487, + "▁sett": 3488, + "kes": 3489, + "▁loud": 3490, + "▁worse": 3491, + "▁wand": 3492, + "▁spread": 3493, + "▁i'd": 3494, + "▁letters": 3495, + "▁yellow": 3496, + "▁magn": 3497, + "▁passing": 3498, + "▁kit": 3499, + "▁pleased": 3500, + "▁darkness": 3501, + "▁remar": 3502, + "idden": 3503, + "come": 3504, + "▁tea": 3505, + "▁civ": 3506, + "▁apart": 3507, + "▁disappe": 3508, + "▁important": 3509, + "▁legs": 3510, + "▁nation": 3511, + "▁delic": 3512, + "▁dressed": 3513, + "▁game": 3514, + "▁walls": 3515, + "ec": 3516, + "▁dry": 3517, + "▁virt": 3518, + "▁dim": 3519, + "idently": 3520, + "rel": 3521, + "▁rub": 3522, + "▁absolute": 3523, + "▁blind": 3524, + "▁discovered": 3525, + "▁exactly": 3526, + "▁dam": 3527, + "otten": 3528, + "▁sorrow": 3529, + "my": 3530, + "▁cost": 3531, + "ference": 3532, + "▁employ": 3533, + "velop": 3534, + "▁cous": 3535, + "▁beast": 3536, + "▁spec": 3537, + "▁opport": 3538, + "▁ears": 3539, + "▁dropped": 3540, + "▁subst": 3541, + "▁chee": 3542, + "▁protect": 3543, + "ils": 3544, + "▁smiled": 3545, + "ina": 3546, + "▁resp": 3547, + "▁promise": 3548, + "▁bag": 3549, + "▁host": 3550, + "urs": 3551, + "▁creature": 3552, + "▁notice": 3553, + "▁knowing": 3554, + "▁heads": 3555, + "▁concer": 3556, + "▁seat": 3557, + "ishment": 3558, + "▁individ": 3559, + "▁existence": 3560, + "▁determined": 3561, + "lend": 3562, + "▁storm": 3563, + "roy": 3564, + "ours": 3565, + "▁conce": 3566, + "anging": 3567, + "▁fixed": 3568, + "▁press": 3569, + "▁major": 3570, + "oved": 3571, + "▁ves": 3572, + "iod": 3573, + "▁learn": 3574, + "▁motion": 3575, + "▁empt": 3576, + "▁leaves": 3577, + "▁bottom": 3578, + "▁arg": 3579, + "iety": 3580, + "▁nobody": 3581, + "▁pros": 3582, + "que": 3583, + "▁utter": 3584, + "▁pick": 3585, + "acked": 3586, + "▁intellig": 3587, + "▁hes": 3588, + "▁stir": 3589, + "▁prevent": 3590, + "▁assist": 3591, + "▁dom": 3592, + "▁disg": 3593, + "▁advant": 3594, + "erable": 3595, + "▁vent": 3596, + "ument": 3597, + "▁tired": 3598, + "rect": 3599, + "ashed": 3600, + "action": 3601, + "▁considered": 3602, + "▁wrote": 3603, + "▁houses": 3604, + "▁suit": 3605, + "▁cheer": 3606, + "▁castle": 3607, + "▁pra": 3608, + "▁perform": 3609, + "ancing": 3610, + "▁clean": 3611, + "ruct": 3612, + "▁stro": 3613, + "▁frequ": 3614, + "▁drawing": 3615, + "▁luck": 3616, + "▁habit": 3617, + "idge": 3618, + "ell": 3619, + "▁ones": 3620, + "▁noble": 3621, + "▁splend": 3622, + "▁honor": 3623, + "zen": 3624, + "▁paid": 3625, + "▁speech": 3626, + "▁estab": 3627, + "▁ur": 3628, + "istr": 3629, + "▁individual": 3630, + "inite": 3631, + "▁vall": 3632, + "▁birds": 3633, + "rodu": 3634, + "▁dar": 3635, + "▁allow": 3636, + "▁confess": 3637, + "▁impress": 3638, + "▁propert": 3639, + "▁jane": 3640, + "▁song": 3641, + "▁various": 3642, + "▁narrow": 3643, + "▁moder": 3644, + "▁believed": 3645, + "ays": 3646, + "▁extra": 3647, + "▁pure": 3648, + "arily": 3649, + "▁period": 3650, + "▁shadow": 3651, + "▁somewh": 3652, + "▁mal": 3653, + "▁cott": 3654, + "▁extreme": 3655, + "▁judge": 3656, + "▁village": 3657, + "▁royal": 3658, + "▁somewhat": 3659, + "▁lower": 3660, + "▁ham": 3661, + "▁agree": 3662, + "▁remembered": 3663, + "▁aston": 3664, + "enth": 3665, + "▁declared": 3666, + "pan": 3667, + "▁train": 3668, + "▁parts": 3669, + "▁colonel": 3670, + "amber": 3671, + "▁breakfast": 3672, + "▁surely": 3673, + "▁sin": 3674, + "ayed": 3675, + "▁scene": 3676, + "go": 3677, + "▁greatest": 3678, + "▁influence": 3679, + "▁custom": 3680, + "itary": 3681, + "▁animal": 3682, + "▁sake": 3683, + "▁mod": 3684, + "▁soldiers": 3685, + "iny": 3686, + "▁ancient": 3687, + "▁drawn": 3688, + "▁evidently": 3689, + "▁ways": 3690, + "▁looks": 3691, + "▁revol": 3692, + "ator": 3693, + "anted": 3694, + "▁reflect": 3695, + "▁picture": 3696, + "▁likely": 3697, + "▁shr": 3698, + "▁laws": 3699, + "▁holding": 3700, + "▁difficulty": 3701, + "▁inj": 3702, + "▁mel": 3703, + "▁courage": 3704, + "nes": 3705, + "▁mort": 3706, + "▁troub": 3707, + "▁burst": 3708, + "▁angry": 3709, + "▁proud": 3710, + "gged": 3711, + "▁spoken": 3712, + "ision": 3713, + "▁desert": 3714, + "ption": 3715, + "▁comb": 3716, + "▁apparent": 3717, + "ring": 3718, + "▁watched": 3719, + "na": 3720, + "▁east": 3721, + "▁shop": 3722, + "▁agre": 3723, + "▁private": 3724, + "esty": 3725, + "▁jul": 3726, + "▁finished": 3727, + "▁anxious": 3728, + "otion": 3729, + "▁fifteen": 3730, + "▁social": 3731, + "under": 3732, + "▁dism": 3733, + "▁touch": 3734, + "▁wine": 3735, + "▁attack": 3736, + "▁ideas": 3737, + "▁george": 3738, + "af": 3739, + "rer": 3740, + "oose": 3741, + "▁space": 3742, + "▁scr": 3743, + "▁inside": 3744, + "▁gentlemen": 3745, + "▁civil": 3746, + "iently": 3747, + "▁formed": 3748, + "▁fol": 3749, + "▁goes": 3750, + "▁you've": 3751, + "▁thin": 3752, + "▁surf": 3753, + "▁servant": 3754, + "▁bal": 3755, + "▁cover": 3756, + "▁ourselves": 3757, + "▁fallen": 3758, + "▁henry": 3759, + "▁lot": 3760, + "ium": 3761, + "▁advent": 3762, + "▁carriage": 3763, + "▁baby": 3764, + "▁elect": 3765, + "▁tong": 3766, + "▁appre": 3767, + "▁everybody": 3768, + "uded": 3769, + "▁commun": 3770, + "▁ine": 3771, + "itive": 3772, + "▁waited": 3773, + "cise": 3774, + "▁grou": 3775, + "het": 3776, + "▁vain": 3777, + "▁impro": 3778, + "▁favor": 3779, + "erial": 3780, + "▁speed": 3781, + "▁windows": 3782, + "▁carefully": 3783, + "▁ice": 3784, + "▁noise": 3785, + "▁hero": 3786, + "▁jim": 3787, + "▁william": 3788, + "▁pecul": 3789, + "▁promised": 3790, + "▁walking": 3791, + "▁forgotten": 3792, + "▁obliged": 3793, + "▁earnest": 3794, + "▁main": 3795, + "▁lose": 3796, + "▁glance": 3797, + "▁vessel": 3798, + "▁grad": 3799, + "▁thro": 3800, + "▁bod": 3801, + "▁shoulder": 3802, + "▁meth": 3803, + "▁animals": 3804, + "▁noticed": 3805, + "ables": 3806, + "▁peculiar": 3807, + "▁fier": 3808, + "▁pot": 3809, + "▁quietly": 3810, + "▁cup": 3811, + "▁serious": 3812, + "▁tremb": 3813, + "▁generally": 3814, + "▁american": 3815, + "▁symp": 3816, + "ral": 3817, + "▁don": 3818, + "▁france": 3819, + "iction": 3820, + "▁property": 3821, + "▁shoulders": 3822, + "▁stranger": 3823, + "▁san": 3824, + "▁cow": 3825, + "▁what's": 3826, + "▁dust": 3827, + "▁affection": 3828, + "▁handsome": 3829, + "▁higher": 3830, + "iant": 3831, + "nday": 3832, + "▁wel": 3833, + "▁poet": 3834, + "▁sla": 3835, + "▁distinct": 3836, + "▁mam": 3837, + "▁pier": 3838, + "acing": 3839, + "ague": 3840, + "▁grown": 3841, + "uly": 3842, + "▁d'": 3843, + "▁chamber": 3844, + "▁desce": 3845, + "▁murm": 3846, + "stem": 3847, + "▁personal": 3848, + "▁fancy": 3849, + "▁offered": 3850, + "osite": 3851, + "onsie": 3852, + "▁built": 3853, + "▁edge": 3854, + "▁whispered": 3855, + "▁skin": 3856, + "▁pieces": 3857, + "itated": 3858, + "cher": 3859, + "osity": 3860, + "▁pit": 3861, + "▁contro": 3862, + "▁faces": 3863, + "▁spent": 3864, + "▁interrupt": 3865, + "how": 3866, + "isters": 3867, + "▁butter": 3868, + "▁develop": 3869, + "▁unk": 3870, + "hip": 3871, + "▁heat": 3872, + "▁fond": 3873, + "▁coat": 3874, + "▁touched": 3875, + "▁hol": 3876, + "ingu": 3877, + "▁pi": 3878, + "▁race": 3879, + "▁jump": 3880, + "▁surprised": 3881, + "oted": 3882, + "▁defe": 3883, + "enced": 3884, + "▁wasn't": 3885, + "▁wear": 3886, + "andon": 3887, + "▁fan": 3888, + "acher": 3889, + "▁arch": 3890, + "▁educ": 3891, + "▁brave": 3892, + "athered": 3893, + "▁eld": 3894, + "▁wealth": 3895, + "▁system": 3896, + "▁german": 3897, + "▁false": 3898, + "wood": 3899, + "▁dare": 3900, + "aked": 3901, + "▁cousin": 3902, + "▁fer": 3903, + "key": 3904, + "▁lin": 3905, + "▁intellect": 3906, + "▁prepared": 3907, + "▁fingers": 3908, + "▁surr": 3909, + "▁mountains": 3910, + "ipp": 3911, + "▁opportunity": 3912, + "aff": 3913, + "▁bare": 3914, + "▁dor": 3915, + "▁introdu": 3916, + "▁collect": 3917, + "▁lovely": 3918, + "▁rag": 3919, + "▁crown": 3920, + "▁matters": 3921, + "▁companion": 3922, + "▁weather": 3923, + "▁alar": 3924, + "▁innoc": 3925, + "▁ris": 3926, + "▁mix": 3927, + "▁lake": 3928, + "▁store": 3929, + "▁unh": 3930, + "▁meaning": 3931, + "▁memory": 3932, + "over": 3933, + "▁band": 3934, + "leep": 3935, + "▁finding": 3936, + "ee": 3937, + "▁charge": 3938, + "▁grat": 3939, + "▁attract": 3940, + "▁gray": 3941, + "▁quarter": 3942, + "▁avo": 3943, + "▁greatly": 3944, + "▁mach": 3945, + "▁inh": 3946, + "▁asleep": 3947, + "▁paris": 3948, + "▁dav": 3949, + "▁alto": 3950, + "▁offer": 3951, + "▁opposite": 3952, + "ounced": 3953, + "erve": 3954, + "▁breast": 3955, + "nown": 3956, + "▁reading": 3957, + "▁altogether": 3958, + "▁writing": 3959, + "pected": 3960, + "▁degree": 3961, + "cing": 3962, + "night": 3963, + "▁exec": 3964, + "fortun": 3965, + "▁stat": 3966, + "▁feelings": 3967, + "▁hath": 3968, + "▁cook": 3969, + "▁rail": 3970, + "▁honour": 3971, + "ding": 3972, + "▁fate": 3973, + "▁por": 3974, + "▁frank": 3975, + "▁meeting": 3976, + "▁rough": 3977, + "▁alive": 3978, + "▁hide": 3979, + "ites": 3980, + "ilar": 3981, + "▁blow": 3982, + "▁cruel": 3983, + "raph": 3984, + "▁hurt": 3985, + "▁loss": 3986, + "▁thrown": 3987, + "▁caused": 3988, + "▁we'll": 3989, + "▁serve": 3990, + "▁duke": 3991, + "▁bent": 3992, + "▁united": 3993, + "▁seek": 3994, + "▁kingdom": 3995, + "▁situation": 3996, + "▁empty": 3997, + "ners": 3998, + "▁due": 3999, + "▁liked": 4000, + "▁swift": 4001, + "▁opening": 4002, + "▁servants": 4003, + "chen": 4004, + "oura": 4005, + "▁gh": 4006, + "▁suspic": 4007, + "▁freed": 4008, + "ointed": 4009, + "▁surface": 4010, + "cil": 4011, + "▁questions": 4012, + "▁ess": 4013, + "▁curious": 4014, + "▁constit": 4015, + "▁accompan": 4016, + "▁christian": 4017, + "▁fill": 4018, + "arest": 4019, + "▁satisfied": 4020, + "ron": 4021, + "▁sides": 4022, + "▁pity": 4023, + "▁reve": 4024, + "▁equal": 4025, + "▁height": 4026, + "▁ordered": 4027, + "osop": 4028, + "▁grey": 4029, + "▁listened": 4030, + "pet": 4031, + "▁rejo": 4032, + "▁capt": 4033, + "ibility": 4034, + "ob": 4035, + "▁mart": 4036, + "▁happen": 4037, + "▁hurried": 4038, + "▁dollars": 4039, + "▁language": 4040, + "▁ange": 4041, + "▁yours": 4042, + "▁supposed": 4043, + "▁laughing": 4044, + "▁settled": 4045, + "▁rode": 4046, + "▁perm": 4047, + "▁distingu": 4048, + "▁hurry": 4049, + "▁destroy": 4050, + "▁talked": 4051, + "▁lifted": 4052, + "ocr": 4053, + "▁square": 4054, + "▁value": 4055, + "▁taste": 4056, + "▁vast": 4057, + "▁king's": 4058, + "▁rul": 4059, + "▁roof": 4060, + "▁telling": 4061, + "▁study": 4062, + "▁ow": 4063, + "▁pan": 4064, + "▁bas": 4065, + "▁rising": 4066, + "▁sufficient": 4067, + "▁forced": 4068, + "▁rise": 4069, + "▁attend": 4070, + "▁philosop": 4071, + "▁nose": 4072, + "▁sixty": 4073, + "hest": 4074, + "▁pin": 4075, + "▁egg": 4076, + "▁amb": 4077, + "▁fault": 4078, + "bur": 4079, + "▁station": 4080, + "▁distur": 4081, + "▁regular": 4082, + "ille": 4083, + "▁pack": 4084, + "▁special": 4085, + "▁honest": 4086, + "▁building": 4087, + "▁season": 4088, + "▁shape": 4089, + "▁pride": 4090, + "▁smiling": 4091, + "like": 4092, + "▁orders": 4093, + "yn": 4094, + "▁woods": 4095, + "▁accompl": 4096, + "con": 4097, + "▁sam": 4098, + "▁usually": 4099, + "▁watching": 4100, + "▁sacri": 4101, + "erved": 4102, + "▁passage": 4103, + "▁material": 4104, + "▁valley": 4105, + "yr": 4106, + "▁stairs": 4107, + "▁libert": 4108, + "▁frightened": 4109, + "▁remarked": 4110, + "▁tit": 4111, + "▁wed": 4112, + "▁mistress": 4113, + "▁directly": 4114, + "▁suffer": 4115, + "▁gloom": 4116, + "▁lines": 4117, + "▁stock": 4118, + "▁justice": 4119, + "▁diam": 4120, + "ested": 4121, + "▁growing": 4122, + "▁doesn't": 4123, + "▁gathered": 4124, + "▁ordinary": 4125, + "uce": 4126, + "▁eur": 4127, + "▁unf": 4128, + "▁kitchen": 4129, + "▁threat": 4130, + "▁depend": 4131, + "▁weeks": 4132, + "▁despair": 4133, + "▁method": 4134, + "▁seized": 4135, + "▁discuss": 4136, + "▁exer": 4137, + "ify": 4138, + "▁flower": 4139, + "▁ignor": 4140, + "eer": 4141, + "ades": 4142, + "▁deb": 4143, + "eping": 4144, + "▁ale": 4145, + "▁yo": 4146, + "chief": 4147, + "▁supper": 4148, + "ik": 4149, + "▁bold": 4150, + "▁putting": 4151, + "▁nearer": 4152, + "uses": 4153, + "▁one's": 4154, + "▁ble": 4155, + "▁york": 4156, + "▁ende": 4157, + "▁affairs": 4158, + "▁soldier": 4159, + "▁contrary": 4160, + "▁moving": 4161, + "▁streets": 4162, + "▁bir": 4163, + "rance": 4164, + "hens": 4165, + "▁cit": 4166, + "icated": 4167, + "▁catch": 4168, + "▁imagine": 4169, + "eds": 4170, + "▁march": 4171, + "▁search": 4172, + "ara": 4173, + "▁receive": 4174, + "imate": 4175, + "▁monsie": 4176, + "▁twice": 4177, + "▁papa": 4178, + "▁monsieur": 4179, + "▁reck": 4180, + "min": 4181, + "ude": 4182, + "▁process": 4183, + "▁hole": 4184, + "aly": 4185, + "lin": 4186, + "▁cro": 4187, + "▁favour": 4188, + "▁dign": 4189, + "▁working": 4190, + "▁harm": 4191, + "▁europe": 4192, + "antic": 4193, + "▁proved": 4194, + "ocked": 4195, + "▁prove": 4196, + "▁cler": 4197, + "▁lod": 4198, + "ception": 4199, + "▁pulled": 4200, + "▁arth": 4201, + "▁authority": 4202, + "▁haven": 4203, + "▁jer": 4204, + "▁uns": 4205, + "▁movement": 4206, + "usted": 4207, + "▁engaged": 4208, + "▁brothers": 4209, + "▁advantage": 4210, + "lished": 4211, + "ole": 4212, + "▁arthur": 4213, + "▁aut": 4214, + "▁stones": 4215, + "▁farm": 4216, + "▁difference": 4217, + "▁fart": 4218, + "▁aside": 4219, + "▁mas": 4220, + "▁observ": 4221, + "▁hence": 4222, + "▁possession": 4223, + "▁hills": 4224, + "▁fortun": 4225, + "uls": 4226, + "ails": 4227, + "▁instance": 4228, + "▁she's": 4229, + "▁ol": 4230, + "▁holy": 4231, + "▁flew": 4232, + "ky": 4233, + "▁color": 4234, + "▁rate": 4235, + "▁doors": 4236, + "▁busy": 4237, + "set": 4238, + "▁address": 4239, + "▁familiar": 4240, + "▁weight": 4241, + "▁aware": 4242, + "▁played": 4243, + "▁sympath": 4244, + "lls": 4245, + "▁solemn": 4246, + "▁liter": 4247, + "▁test": 4248, + "▁emper": 4249, + "▁indian": 4250, + "▁distant": 4251, + "▁interesting": 4252, + "▁bull": 4253, + "▁thorough": 4254, + "▁wore": 4255, + "▁worked": 4256, + "▁explained": 4257, + "▁excellent": 4258, + "▁splendid": 4259, + "▁tongue": 4260, + "▁di": 4261, + "▁pard": 4262, + "▁named": 4263, + "▁shame": 4264, + "▁franc": 4265, + "▁spect": 4266, + "▁moments": 4267, + "bers": 4268, + "▁wil": 4269, + "▁myster": 4270, + "▁seated": 4271, + "▁instantly": 4272, + "▁similar": 4273, + "▁endeav": 4274, + "▁measure": 4275, + "▁naturally": 4276, + "nds": 4277, + "▁suf": 4278, + "▁amount": 4279, + "▁imper": 4280, + "▁dogs": 4281, + "itable": 4282, + "▁brit": 4283, + "▁necessity": 4284, + "rid": 4285, + "ulous": 4286, + "▁confidence": 4287, + "den": 4288, + "▁parent": 4289, + "▁wid": 4290, + "▁vir": 4291, + "▁neverthe": 4292, + "▁agreed": 4293, + "▁nevertheless": 4294, + "unch": 4295, + "▁hearing": 4296, + "▁takes": 4297, + "▁aug": 4298, + "▁univers": 4299, + "enance": 4300, + "▁unw": 4301, + "▁earl": 4302, + "▁keeping": 4303, + "▁drive": 4304, + "▁produced": 4305, + "▁aud": 4306, + "on's": 4307, + "▁names": 4308, + "agn": 4309, + "▁disappeared": 4310, + "▁throw": 4311, + "▁president": 4312, + "▁gods": 4313, + "▁magic": 4314, + "▁represent": 4315, + "▁unknown": 4316, + "por": 4317, + "▁terror": 4318, + "▁haven't": 4319, + "asc": 4320, + "▁support": 4321, + "▁smoke": 4322, + "▁wicked": 4323, + "ker": 4324, + "▁works": 4325, + "▁artic": 4326, + "▁dull": 4327, + "▁yester": 4328, + "▁falling": 4329, + "▁worthy": 4330, + "▁liberty": 4331, + "ulation": 4332, + "▁design": 4333, + "▁wants": 4334, + "▁evidence": 4335, + "▁companions": 4336, + "▁spirits": 4337, + "▁coast": 4338, + "▁mighty": 4339, + "▁particularly": 4340, + "▁witness": 4341, + "▁discover": 4342, + "▁sought": 4343, + "▁span": 4344, + "'ve": 4345, + "▁rare": 4346, + "▁officers": 4347, + "lv": 4348, + "zy": 4349, + "▁yesterday": 4350, + "vey": 4351, + "cent": 4352, + "▁powers": 4353, + "▁yield": 4354, + "▁cool": 4355, + "▁organ": 4356, + "▁amaz": 4357, + "▁pointed": 4358, + "ford": 4359, + "▁claim": 4360, + "▁content": 4361, + "▁possibly": 4362, + "▁terms": 4363, + "▁trium": 4364, + "▁officer": 4365, + "▁persu": 4366, + "▁ceased": 4367, + "▁drove": 4368, + "▁occurred": 4369, + "▁gree": 4370, + "▁lies": 4371, + "▁otherwise": 4372, + "▁emperor": 4373, + "▁hom": 4374, + "▁stars": 4375, + "▁knees": 4376, + "▁triumph": 4377, + "ruction": 4378, + "▁paused": 4379, + "oms": 4380, + "▁required": 4381, + "▁failed": 4382, + "▁unhapp": 4383, + "▁diamond": 4384, + "▁rat": 4385, + "▁ali": 4386, + "▁double": 4387, + "▁forms": 4388, + "▁gives": 4389, + "▁finger": 4390, + "race": 4391, + "▁pair": 4392, + "alous": 4393, + "illa": 4394, + "▁bob": 4395, + "▁eliz": 4396, + "▁travel": 4397, + "▁carrying": 4398, + "▁gle": 4399, + "iles": 4400, + "▁teeth": 4401, + "esh": 4402, + "▁shown": 4403, + "▁fruit": 4404, + "▁waters": 4405, + "▁entertain": 4406, + "▁hearts": 4407, + "umn": 4408, + "▁labor": 4409, + "in't": 4410, + "▁pill": 4411, + "▁ener": 4412, + "soci": 4413, + "▁example": 4414, + "▁upper": 4415, + "▁foreign": 4416, + "▁moral": 4417, + "▁softly": 4418, + "rose": 4419, + "▁huge": 4420, + "▁charles": 4421, + "▁priest": 4422, + "▁excit": 4423, + "▁fet": 4424, + "▁mother's": 4425, + "▁possessed": 4426, + "▁cases": 4427, + "▁report": 4428, + "▁milk": 4429, + "▁affair": 4430, + "▁principle": 4431, + "▁inhab": 4432, + "▁freedom": 4433, + "▁proof": 4434, + "▁intended": 4435, + "▁satisfaction": 4436, + "▁shouted": 4437, + "isc": 4438, + "▁plat": 4439, + "▁bask": 4440, + "ental": 4441, + "▁group": 4442, + "▁farther": 4443, + "asm": 4444, + "▁unfortun": 4445, + "▁unto": 4446, + "▁singing": 4447, + "▁arrange": 4448, + "▁religion": 4449, + "▁ber": 4450, + "▁rocks": 4451, + "▁seventeen": 4452, + "▁der": 4453, + "▁james": 4454, + "▁buy": 4455, + "▁succeeded": 4456, + "▁rooms": 4457, + "▁leading": 4458, + "▁majesty": 4459, + "▁events": 4460, + "▁dance": 4461, + "▁paint": 4462, + "▁gently": 4463, + "acle": 4464, + "▁tele": 4465, + "▁pardon": 4466, + "using": 4467, + "▁drop": 4468, + "father": 4469, + "▁invent": 4470, + "▁key": 4471, + "▁mentioned": 4472, + "▁seventy": 4473, + "▁ros": 4474, + "▁suffering": 4475, + "▁record": 4476, + "▁cabin": 4477, + "road": 4478, + "▁diss": 4479, + "ival": 4480, + "▁demanded": 4481, + "▁excitement": 4482, + "▁associ": 4483, + "▁progress": 4484, + "angers": 4485, + "▁curi": 4486, + "▁america": 4487, + "▁rule": 4488, + "▁bor": 4489, + "▁vig": 4490, + "lessly": 4491, + "▁clearly": 4492, + "▁bore": 4493, + "▁sheep": 4494, + "▁regret": 4495, + "▁neighbour": 4496, + "bly": 4497, + "iance": 4498, + "▁instinct": 4499, + "▁advice": 4500, + "▁awful": 4501, + "▁sen": 4502, + "▁fully": 4503, + "▁gather": 4504, + "▁papers": 4505, + "▁hidden": 4506, + "▁chest": 4507, + "▁birth": 4508, + "hy": 4509, + "pap": 4510, + "▁hither": 4511, + "▁stuff": 4512, + "▁impat": 4513, + "▁calling": 4514, + "▁fourth": 4515, + "▁dreadful": 4516, + "▁pos": 4517, + "▁grief": 4518, + "▁brill": 4519, + "▁powerful": 4520, + "▁presented": 4521, + "▁fairy": 4522, + "▁explain": 4523, + "▁shoot": 4524, + "▁prisoner": 4525, + "▁joined": 4526, + "▁afford": 4527, + "mond": 4528, + "attered": 4529, + "▁ing": 4530, + "iments": 4531, + "▁shel": 4532, + "▁prefer": 4533, + "▁considerable": 4534, + "▁obey": 4535, + "▁voices": 4536, + "▁interv": 4537, + "▁interested": 4538, + "▁virg": 4539, + "▁cred": 4540, + "▁card": 4541, + "▁ep": 4542, + "▁needed": 4543, + "▁pounds": 4544, + "▁conqu": 4545, + "▁clever": 4546, + "▁advanced": 4547, + "▁cord": 4548, + "ighed": 4549, + "▁undert": 4550, + "▁resolved": 4551, + "▁wag": 4552, + "istic": 4553, + "▁paul": 4554, + "▁excited": 4555, + "▁conditions": 4556, + "▁pictures": 4557, + "acious": 4558, + "▁shining": 4559, + "▁sunday": 4560, + "▁served": 4561, + "▁steam": 4562, + "▁police": 4563, + "▁sprang": 4564, + "sie": 4565, + "ora": 4566, + "ese": 4567, + "▁jes": 4568, + "▁nodd": 4569, + "▁salt": 4570, + "▁fields": 4571, + "▁cart": 4572, + "▁indians": 4573, + "▁fierce": 4574, + "dle": 4575, + "▁ride": 4576, + "▁desired": 4577, + "▁edward": 4578, + "▁importance": 4579, + "▁information": 4580, + "ture": 4581, + "▁hosp": 4582, + "▁memb": 4583, + "▁perceived": 4584, + "▁yard": 4585, + "▁crit": 4586, + "ternal": 4587, + "▁task": 4588, + "▁fold": 4589, + "rant": 4590, + "▁sooner": 4591, + "▁merch": 4592, + "▁absolutely": 4593, + "▁citiz": 4594, + "▁suffered": 4595, + "▁tight": 4596, + "▁dur": 4597, + "▁iss": 4598, + "illy": 4599, + "▁log": 4600, + "▁completely": 4601, + "hold": 4602, + "▁rad": 4603, + "▁share": 4604, + "▁willing": 4605, + "▁devil": 4606, + "▁ships": 4607, + "▁imagination": 4608, + "▁superior": 4609, + "com": 4610, + "ams": 4611, + "▁anybody": 4612, + "▁env": 4613, + "▁appl": 4614, + "▁drag": 4615, + "▁dawn": 4616, + "asped": 4617, + "▁occupied": 4618, + "▁curiosity": 4619, + "iest": 4620, + "▁sigh": 4621, + "▁fox": 4622, + "asant": 4623, + "▁myst": 4624, + "▁stead": 4625, + "ett": 4626, + "▁couple": 4627, + "▁type": 4628, + "▁extraord": 4629, + "▁apparently": 4630, + "▁welcome": 4631, + "▁daily": 4632, + "▁modern": 4633, + "iot": 4634, + "▁ain't": 4635, + "▁dying": 4636, + "llen": 4637, + "▁feat": 4638, + "▁accident": 4639, + "▁countenance": 4640, + "▁abandon": 4641, + "ortion": 4642, + "▁lock": 4643, + "▁crime": 4644, + "pir": 4645, + "▁mult": 4646, + "▁alas": 4647, + "▁refused": 4648, + "▁hate": 4649, + "▁dw": 4650, + "▁whenever": 4651, + "▁thanks": 4652, + "▁slave": 4653, + "▁regarded": 4654, + "▁suggested": 4655, + "ulf": 4656, + "▁actually": 4657, + "gment": 4658, + "▁size": 4659, + "reg": 4660, + "▁cult": 4661, + "▁kat": 4662, + "▁bodies": 4663, + "hus": 4664, + "▁bay": 4665, + "▁truly": 4666, + "▁flesh": 4667, + "ishop": 4668, + "▁smith": 4669, + "▁betr": 4670, + "with": 4671, + "▁wet": 4672, + "▁rapidly": 4673, + "gers": 4674, + "▁odd": 4675, + "asons": 4676, + "ette": 4677, + "▁club": 4678, + "abel": 4679, + "▁horror": 4680, + "▁mile": 4681, + "▁flight": 4682, + "▁crossed": 4683, + "▁professor": 4684, + "▁oce": 4685, + "▁worst": 4686, + "ization": 4687, + "▁rushed": 4688, + "▁science": 4689, + "▁brief": 4690, + "▁stepped": 4691, + "▁midst": 4692, + "ha": 4693, + "▁sour": 4694, + "▁maint": 4695, + "▁brain": 4696, + "▁cottage": 4697, + "▁expressed": 4698, + "▁equally": 4699, + "▁education": 4700, + "▁august": 4701, + "▁buck": 4702, + "▁nay": 4703, + "ids": 4704, + "▁tempt": 4705, + "▁inquir": 4706, + "▁foolish": 4707, + "▁taught": 4708, + "▁cop": 4709, + "▁dun": 4710, + "▁picked": 4711, + "▁elsie": 4712, + "▁lands": 4713, + "▁driven": 4714, + "▁political": 4715, + "mas": 4716, + "▁deck": 4717, + "▁resist": 4718, + "▁instr": 4719, + "▁bon": 4720, + "▁ken": 4721, + "ips": 4722, + "▁hotel": 4723, + "▁dangerous": 4724, + "ially": 4725, + "now": 4726, + "▁dozen": 4727, + "▁trade": 4728, + "▁points": 4729, + "▁ninet": 4730, + "ability": 4731, + "▁crim": 4732, + "▁relations": 4733, + "▁interp": 4734, + "▁barb": 4735, + "▁delighted": 4736, + "▁members": 4737, + "▁sisters": 4738, + "▁sty": 4739, + "▁anger": 4740, + "▁belief": 4741, + "▁asking": 4742, + "▁meat": 4743, + "▁displ": 4744, + "▁relief": 4745, + "ification": 4746, + "▁hunting": 4747, + "▁alex": 4748, + "aries": 4749, + "▁obst": 4750, + "▁behold": 4751, + "▁mistake": 4752, + "▁inquired": 4753, + "▁remarkable": 4754, + "▁origin": 4755, + "cked": 4756, + "▁nerv": 4757, + "acks": 4758, + "vert": 4759, + "rop": 4760, + "▁careful": 4761, + "▁wounded": 4762, + "ading": 4763, + "▁cere": 4764, + "▁enemies": 4765, + "▁gradually": 4766, + "▁interrupted": 4767, + "▁fis": 4768, + "▁stup": 4769, + "▁severe": 4770, + "▁keen": 4771, + "▁sixteen": 4772, + "kins": 4773, + "resp": 4774, + "▁worn": 4775, + "▁flour": 4776, + "▁sylv": 4777, + "▁control": 4778, + "kin": 4779, + "▁lone": 4780, + "asing": 4781, + "▁nap": 4782, + "▁assert": 4783, + "▁depth": 4784, + "▁kindly": 4785, + "▁murder": 4786, + "acity": 4787, + "▁eleven": 4788, + "▁invol": 4789, + "▁d'art": 4790, + "▁wings": 4791, + "▁oak": 4792, + "▁et": 4793, + "▁begun": 4794, + "▁dreams": 4795, + "while": 4796, + "▁moreover": 4797, + "▁exped": 4798, + "▁independ": 4799, + "▁buried": 4800, + "▁approached": 4801, + "agnan": 4802, + "▁d'artagnan": 4803, + "▁sex": 4804, + "▁saved": 4805, + "▁harry": 4806, + "▁physical": 4807, + "▁species": 4808, + "cer": 4809, + "oe": 4810, + "▁glory": 4811, + "▁creatures": 4812, + "▁newspap": 4813, + "▁sang": 4814, + "▁plenty": 4815, + "▁useful": 4816, + "▁shoes": 4817, + "▁hoped": 4818, + "▁frequently": 4819, + "▁saf": 4820, + "▁distr": 4821, + "▁princip": 4822, + "▁pu": 4823, + "y's": 4824, + "aunt": 4825, + "▁lover": 4826, + "▁famous": 4827, + "▁recollect": 4828, + "▁nur": 4829, + "▁grim": 4830, + "▁indif": 4831, + "▁charming": 4832, + "▁aim": 4833, + "▁loose": 4834, + "▁consciousness": 4835, + "▁mamma": 4836, + "▁enthus": 4837, + "▁slept": 4838, + "▁smooth": 4839, + "▁fighting": 4840, + "▁hyp": 4841, + "▁enthusi": 4842, + "▁dig": 4843, + "aling": 4844, + "▁stage": 4845, + "▁anyone": 4846, + "▁thrust": 4847, + "▁desper": 4848, + "▁tar": 4849, + "▁lamp": 4850, + "stone": 4851, + "▁stern": 4852, + "▁evident": 4853, + "▁meanwhile": 4854, + "▁forgive": 4855, + "▁accepted": 4856, + "▁ocean": 4857, + "▁tot": 4858, + "▁they're": 4859, + "▁wondered": 4860, + "▁playing": 4861, + "▁detect": 4862, + "▁hale": 4863, + "▁knife": 4864, + "ailed": 4865, + "▁closely": 4866, + "▁meas": 4867, + "▁proceeded": 4868, + "▁message": 4869, + "▁mour": 4870, + "▁fac": 4871, + "▁union": 4872, + "ustomed": 4873, + "hem": 4874, + "aming": 4875, + "▁exceed": 4876, + "▁feather": 4877, + "▁precious": 4878, + "▁century": 4879, + "▁unex": 4880, + "▁park": 4881, + "ication": 4882, + "▁everywhere": 4883, + "▁minds": 4884, + "▁extraordinary": 4885, + "▁arose": 4886, + "▁entrance": 4887, + "▁capital": 4888, + "▁recall": 4889, + "▁burning": 4890, + "▁magnific": 4891, + "oes": 4892, + "orious": 4893, + "stand": 4894, + "▁assemb": 4895, + "▁plant": 4896, + "▁neighbor": 4897, + "▁lest": 4898, + "uments": 4899, + "▁colle": 4900, + "▁virtue": 4901, + "▁bew": 4902, + "▁forb": 4903, + "▁retreat": 4904, + "▁capable": 4905, + "▁assured": 4906, + "▁constant": 4907, + "▁governor": 4908, + "▁increased": 4909, + "▁horn": 4910, + "▁removed": 4911, + "▁facts": 4912, + "▁absence": 4913, + "▁explan": 4914, + "▁ack": 4915, + "▁somebody": 4916, + "▁awa": 4917, + "▁admit": 4918, + "▁correct": 4919, + "▁forgot": 4920, + "▁jealous": 4921, + "▁kissed": 4922, + "▁popular": 4923, + "▁hut": 4924, + "▁ug": 4925, + "pelled": 4926, + "▁grant": 4927, + "▁friendship": 4928, + "▁indign": 4929, + "▁sympathy": 4930, + "iable": 4931, + "erous": 4932, + "▁thom": 4933, + "▁alice": 4934, + "▁level": 4935, + "▁objects": 4936, + "▁pressed": 4937, + "▁sha": 4938, + "room": 4939, + "▁qual": 4940, + "▁begged": 4941, + "▁emp": 4942, + "▁hind": 4943, + "▁highest": 4944, + "▁clouds": 4945, + "▁ghost": 4946, + "▁acknow": 4947, + "oused": 4948, + "▁strike": 4949, + "▁wishes": 4950, + "▁becomes": 4951, + "▁trembling": 4952, + "▁nob": 4953, + "▁kindness": 4954, + "▁accordingly": 4955, + "▁throat": 4956, + "ration": 4957, + "▁fare": 4958, + "▁we're": 4959, + "▁stretched": 4960, + "▁frag": 4961, + "▁wheel": 4962, + "▁queer": 4963, + "▁grandfather": 4964, + "for": 4965, + "▁choose": 4966, + "▁helen": 4967, + "▁eighty": 4968, + "▁ly": 4969, + "▁miserable": 4970, + "▁contempt": 4971, + "igned": 4972, + "▁military": 4973, + "▁russ": 4974, + "▁basket": 4975, + "▁ahead": 4976, + "oops": 4977, + "ivered": 4978, + "▁listening": 4979, + "▁fro": 4980, + "▁larger": 4981, + "▁divine": 4982, + "iber": 4983, + "▁stories": 4984, + "anches": 4985, + "ushing": 4986, + "izing": 4987, + "▁treasure": 4988, + "▁excuse": 4989, + "▁innocent": 4990, + "▁aid": 4991, + "▁remind": 4992, + "▁slaves": 4993, + "rit": 4994, + "stairs": 4995, + "▁reward": 4996, + "ograph": 4997, + "▁manage": 4998, + "▁dish": 4999, + "▁throughout": 5000, + "▁waves": 5001, + "▁judgment": 5002, + "▁arrival": 5003, + "▁choice": 5004, + "▁unhappy": 5005, + "astic": 5006, + "▁blank": 5007, + "▁advance": 5008, + "▁informed": 5009, + "▁acquaintance": 5010, + "▁impression": 5011, + "▁mysterious": 5012, + "bb": 5013, + "▁ara": 5014, + "▁notes": 5015, + "▁hadn't": 5016, + "▁sell": 5017, + "▁comr": 5018, + "▁impl": 5019, + "▁indust": 5020, + "▁ended": 5021, + "▁lights": 5022, + "▁nurse": 5023, + "▁sout": 5024, + "▁bought": 5025, + "▁fred": 5026, + "▁marked": 5027, + "▁scream": 5028, + "mend": 5029, + "▁uneas": 5030, + "▁delicate": 5031, + "▁weary": 5032, + "estic": 5033, + "▁prompt": 5034, + "▁experi": 5035, + "▁hungry": 5036, + "▁flying": 5037, + "▁pow": 5038, + "▁bridge": 5039, + "▁join": 5040, + "▁visible": 5041, + "▁understanding": 5042, + "▁crying": 5043, + "▁avoid": 5044, + "▁tis": 5045, + "▁stiff": 5046, + "aches": 5047, + "▁restr": 5048, + "▁sounds": 5049, + "▁bowed": 5050, + "▁caut": 5051, + "▁goods": 5052, + "▁david": 5053, + "▁unable": 5054, + "▁you'd": 5055, + "hamed": 5056, + "▁bos": 5057, + "eral": 5058, + "▁ashamed": 5059, + "▁somewhere": 5060, + "▁infinite": 5061, + "ocks": 5062, + "▁dignity": 5063, + "▁gay": 5064, + "▁vic": 5065, + "▁amid": 5066, + "▁hollow": 5067, + "▁emotion": 5068, + "▁admitted": 5069, + "▁parents": 5070, + "▁wra": 5071, + "▁hint": 5072, + "▁temple": 5073, + "▁comfortable": 5074, + "▁intelligence": 5075, + "orous": 5076, + "▁bearing": 5077, + "▁hers": 5078, + "abeth": 5079, + "▁remains": 5080, + "▁contem": 5081, + "▁settle": 5082, + "▁immense": 5083, + "ffe": 5084, + "pher": 5085, + "▁cher": 5086, + "ldom": 5087, + "▁weap": 5088, + "ulated": 5089, + "▁lighted": 5090, + "gypt": 5091, + "▁adventure": 5092, + "▁thoroughly": 5093, + "▁egypt": 5094, + "ilst": 5095, + "anges": 5096, + "▁obt": 5097, + "▁friendly": 5098, + "▁reckon": 5099, + "▁stupid": 5100, + "▁fed": 5101, + "▁rome": 5102, + "▁meal": 5103, + "▁intention": 5104, + "▁returning": 5105, + "▁convin": 5106, + "▁coo": 5107, + "lection": 5108, + "▁ash": 5109, + "achel": 5110, + "▁rope": 5111, + "▁price": 5112, + "▁project": 5113, + "elt": 5114, + "rows": 5115, + "▁secure": 5116, + "▁escaped": 5117, + "▁hopes": 5118, + "▁elizabeth": 5119, + "▁safety": 5120, + "▁wound": 5121, + "▁sup": 5122, + "▁unus": 5123, + "onscious": 5124, + "▁horri": 5125, + "▁minister": 5126, + "▁ox": 5127, + "lla": 5128, + "ensive": 5129, + "▁helped": 5130, + "▁plainly": 5131, + "▁seldom": 5132, + "▁thinks": 5133, + "▁fellows": 5134, + "▁mood": 5135, + "▁pushed": 5136, + "▁exhib": 5137, + "inging": 5138, + "▁thunder": 5139, + "aud": 5140, + "iana": 5141, + "▁fairly": 5142, + "▁elder": 5143, + "▁eggs": 5144, + "irm": 5145, + "▁maiden": 5146, + "mother": 5147, + "▁appears": 5148, + "▁cheeks": 5149, + "▁won": 5150, + "▁ease": 5151, + "▁redu": 5152, + "▁skill": 5153, + "▁extent": 5154, + "▁practice": 5155, + "▁religious": 5156, + "▁becoming": 5157, + "▁virgin": 5158, + "▁features": 5159, + "▁tied": 5160, + "▁whence": 5161, + "▁somehow": 5162, + "▁greet": 5163, + "▁faithful": 5164, + "▁concerned": 5165, + "▁theat": 5166, + "▁bishop": 5167, + "▁pink": 5168, + "▁eagerly": 5169, + "rees": 5170, + "▁eating": 5171, + "▁waste": 5172, + "▁rank": 5173, + "▁fem": 5174, + "▁bride": 5175, + "▁unl": 5176, + "otted": 5177, + "ceiving": 5178, + "▁trib": 5179, + "▁original": 5180, + "▁concerning": 5181, + "▁hab": 5182, + "▁accustomed": 5183, + "▁patient": 5184, + "▁recom": 5185, + "▁cell": 5186, + "ointment": 5187, + "▁arranged": 5188, + "ville": 5189, + "iture": 5190, + "▁wholly": 5191, + "▁older": 5192, + "▁colour": 5193, + "▁provided": 5194, + "▁ate": 5195, + "▁partly": 5196, + "▁mont": 5197, + "ology": 5198, + "▁prospect": 5199, + "▁ceremon": 5200, + "▁ze": 5201, + "▁laughter": 5202, + "▁fee": 5203, + "▁branches": 5204, + "▁fled": 5205, + "right": 5206, + "▁whilst": 5207, + "▁slipped": 5208, + "▁violent": 5209, + "▁inhabit": 5210, + "▁sons": 5211, + "▁engage": 5212, + "▁uncom": 5213, + "▁deeply": 5214, + "▁substance": 5215, + "▁tale": 5216, + "▁tiny": 5217, + "▁dan": 5218, + "▁ga": 5219, + "▁bee": 5220, + "▁yards": 5221, + "icks": 5222, + "▁hastily": 5223, + "held": 5224, + "▁wes": 5225, + "▁vague": 5226, + "▁amuse": 5227, + "▁mud": 5228, + "▁wolf": 5229, + "▁hans": 5230, + "illing": 5231, + "▁supply": 5232, + "▁silk": 5233, + "▁constantly": 5234, + "▁christmas": 5235, + "▁million": 5236, + "▁whisper": 5237, + "▁mental": 5238, + "▁washing": 5239, + "verse": 5240, + "▁cloth": 5241, + "▁baron": 5242, + "▁corresp": 5243, + "▁nodded": 5244, + "▁correspond": 5245, + "ka": 5246, + "▁hell": 5247, + "▁gain": 5248, + "▁rust": 5249, + "▁obtain": 5250, + "▁unconscious": 5251, + "▁struggle": 5252, + "▁established": 5253, + "▁lawy": 5254, + "ols": 5255, + "▁signs": 5256, + "▁uttered": 5257, + "▁roman": 5258, + "▁constitution": 5259, + "pes": 5260, + "▁cave": 5261, + "▁spare": 5262, + "▁quant": 5263, + "▁image": 5264, + "▁merry": 5265, + "▁treated": 5266, + "▁efforts": 5267, + "▁lonely": 5268, + "rated": 5269, + "▁nut": 5270, + "▁glanced": 5271, + "▁portion": 5272, + "itor": 5273, + "▁resemb": 5274, + "▁withd": 5275, + "▁mead": 5276, + "▁feast": 5277, + "▁prim": 5278, + "▁cliff": 5279, + "▁emer": 5280, + "▁proportion": 5281, + "▁consideration": 5282, + "▁haste": 5283, + "▁gaze": 5284, + "▁savage": 5285, + "▁crew": 5286, + "▁tower": 5287, + "▁lack": 5288, + "▁conscience": 5289, + "▁mercy": 5290, + "▁exha": 5291, + "▁consent": 5292, + "ators": 5293, + "urd": 5294, + "▁outl": 5295, + "▁clo": 5296, + "▁adop": 5297, + "▁amongst": 5298, + "▁hanging": 5299, + "▁circle": 5300, + "▁prepar": 5301, + "▁brilliant": 5302, + "fl": 5303, + "▁gained": 5304, + "▁row": 5305, + "▁troops": 5306, + "▁repro": 5307, + "▁ming": 5308, + "oul": 5309, + "▁dared": 5310, + "▁lion": 5311, + "▁joe": 5312, + "▁winds": 5313, + "▁bringing": 5314, + "▁anxiety": 5315, + "▁billy": 5316, + "▁consequence": 5317, + "fice": 5318, + "pse": 5319, + "▁fought": 5320, + "▁pred": 5321, + "▁scra": 5322, + "▁glim": 5323, + "▁victory": 5324, + "ped": 5325, + "▁rab": 5326, + "▁scot": 5327, + "▁obv": 5328, + "▁shock": 5329, + "chan": 5330, + "▁knock": 5331, + "ourse": 5332, + "▁handed": 5333, + "▁indul": 5334, + "▁patience": 5335, + "▁souther": 5336, + "▁jose": 5337, + "▁fever": 5338, + "▁rolled": 5339, + "icted": 5340, + "▁setting": 5341, + "▁profession": 5342, + "▁sylvia": 5343, + "▁hun": 5344, + "utions": 5345, + "▁feared": 5346, + "▁brand": 5347, + "▁boots": 5348, + "▁forehead": 5349, + "▁principles": 5350, + "▁sink": 5351, + "▁rig": 5352, + "aval": 5353, + "▁purch": 5354, + "▁gazed": 5355, + "▁employed": 5356, + "▁murmured": 5357, + "more": 5358, + "▁sar": 5359, + "ashing": 5360, + "ural": 5361, + "acles": 5362, + "▁trad": 5363, + "▁active": 5364, + "▁benef": 5365, + "▁bottle": 5366, + "▁rage": 5367, + "▁invest": 5368, + "▁lux": 5369, + "▁sank": 5370, + "▁hang": 5371, + "▁beard": 5372, + "ential": 5373, + "▁loving": 5374, + "▁native": 5375, + "▁instruct": 5376, + "▁waist": 5377, + "▁relation": 5378, + "▁discovery": 5379, + "▁melan": 5380, + "▁nervous": 5381, + "▁obtained": 5382, + "▁pig": 5383, + "▁sear": 5384, + "▁flag": 5385, + "▁trail": 5386, + "▁distinguished": 5387, + "▁stared": 5388, + "▁misery": 5389, + "▁print": 5390, + "▁guil": 5391, + "▁jumped": 5392, + "▁swim": 5393, + "▁approaching": 5394, + "▁suspicion": 5395, + "▁iv": 5396, + "▁managed": 5397, + "aker": 5398, + "▁teach": 5399, + "▁match": 5400, + "▁guilty": 5401, + "▁wretched": 5402, + "▁rum": 5403, + "▁compar": 5404, + "▁theory": 5405, + "▁sher": 5406, + "▁bree": 5407, + "▁kings": 5408, + "▁shone": 5409, + "atherine": 5410, + "▁throne": 5411, + "▁showing": 5412, + "aws": 5413, + "▁robin": 5414, + "▁embar": 5415, + "utation": 5416, + "▁woman's": 5417, + "▁addressed": 5418, + "▁protest": 5419, + "▁admiration": 5420, + "▁troubled": 5421, + "▁ugly": 5422, + "oom": 5423, + "erves": 5424, + "▁flung": 5425, + "▁subs": 5426, + "▁relie": 5427, + "▁thousands": 5428, + "nce": 5429, + "▁od": 5430, + "▁current": 5431, + "▁wooden": 5432, + "▁sacrifice": 5433, + "urity": 5434, + "cip": 5435, + "▁pear": 5436, + "▁farmer": 5437, + "▁needs": 5438, + "▁condem": 5439, + "▁member": 5440, + "▁bade": 5441, + "▁dancing": 5442, + "▁reasons": 5443, + "▁consult": 5444, + "▁swall": 5445, + "▁shadows": 5446, + "▁angel": 5447, + "▁nineteen": 5448, + "▁style": 5449, + "field": 5450, + "▁lan": 5451, + "▁manif": 5452, + "▁robert": 5453, + "▁grate": 5454, + "▁engine": 5455, + "▁wisdom": 5456, + "▁jesus": 5457, + "▁convent": 5458, + "▁preced": 5459, + "▁interests": 5460, + "▁trial": 5461, + "bor": 5462, + "iven": 5463, + "▁nest": 5464, + "▁exch": 5465, + "▁voy": 5466, + "▁illust": 5467, + "▁worship": 5468, + "▁adam": 5469, + "▁phr": 5470, + "▁principal": 5471, + "▁hit": 5472, + "▁spend": 5473, + "▁stands": 5474, + "▁respons": 5475, + "▁ay": 5476, + "▁haw": 5477, + "▁whist": 5478, + "▁arrest": 5479, + "▁kinds": 5480, + "▁require": 5481, + "▁described": 5482, + "▁lit": 5483, + "▁precise": 5484, + "▁proposed": 5485, + "▁produce": 5486, + "▁utterly": 5487, + "ulse": 5488, + "▁novel": 5489, + "▁blame": 5490, + "▁credit": 5491, + "▁pause": 5492, + "osen": 5493, + "▁household": 5494, + "▁armed": 5495, + "▁follows": 5496, + "upon": 5497, + "▁approach": 5498, + "▁ninety": 5499, + "▁pir": 5500, + "▁flore": 5501, + "ivity": 5502, + "▁refuse": 5503, + "▁sensible": 5504, + "choly": 5505, + "▁national": 5506, + "▁grie": 5507, + "▁reven": 5508, + "▁let's": 5509, + "▁delightful": 5510, + "▁extremely": 5511, + "▁melancholy": 5512, + "uing": 5513, + "▁enorm": 5514, + "cles": 5515, + "▁slightly": 5516, + "▁sacred": 5517, + "▁recognized": 5518, + "▁mystery": 5519, + "▁gri": 5520, + "▁compre": 5521, + "▁distress": 5522, + "▁warri": 5523, + "▁useless": 5524, + "▁trif": 5525, + "▁mounted": 5526, + "▁philip": 5527, + "▁energy": 5528, + "▁explanation": 5529, + "▁cas": 5530, + "atory": 5531, + "▁pour": 5532, + "▁ric": 5533, + "▁chosen": 5534, + "▁everyone": 5535, + "umbled": 5536, + "▁apr": 5537, + "▁cam": 5538, + "▁proc": 5539, + "▁resumed": 5540, + "▁appreci": 5541, + "▁alexand": 5542, + "▁aven": 5543, + "▁wing": 5544, + "▁intense": 5545, + "▁highly": 5546, + "▁lucy": 5547, + "▁solid": 5548, + "▁departure": 5549, + "▁agreeable": 5550, + "▁exercise": 5551, + "apped": 5552, + "▁ward": 5553, + "▁bud": 5554, + "▁dwell": 5555, + "icate": 5556, + "▁dece": 5557, + "▁teacher": 5558, + "tending": 5559, + "▁max": 5560, + "▁request": 5561, + "▁unexpected": 5562, + "▁joseph": 5563, + "col": 5564, + "▁leap": 5565, + "▁victim": 5566, + "▁sighed": 5567, + "▁forces": 5568, + "chie": 5569, + "▁feed": 5570, + "▁sport": 5571, + "▁drift": 5572, + "▁wedding": 5573, + "▁british": 5574, + "sec": 5575, + "▁attitude": 5576, + "▁vision": 5577, + "▁pipe": 5578, + "▁tow": 5579, + "▁halt": 5580, + "▁manners": 5581, + "▁tend": 5582, + "▁flood": 5583, + "▁commission": 5584, + "▁guide": 5585, + "▁observe": 5586, + "▁concern": 5587, + "▁rush": 5588, + "▁affected": 5589, + "fall": 5590, + "▁stret": 5591, + "▁coach": 5592, + "▁poison": 5593, + "▁directed": 5594, + "▁medic": 5595, + "▁gest": 5596, + "▁echo": 5597, + "▁younger": 5598, + "▁confusion": 5599, + "▁continue": 5600, + "▁parli": 5601, + "▁absor": 5602, + "▁centre": 5603, + "conom": 5604, + "▁horrible": 5605, + "rison": 5606, + "▁bol": 5607, + "▁bath": 5608, + "▁gown": 5609, + "▁bye": 5610, + "▁aloud": 5611, + "▁suppl": 5612, + "▁profound": 5613, + "▁err": 5614, + "▁cheerful": 5615, + "worth": 5616, + "▁sentence": 5617, + "▁mistaken": 5618, + "▁torn": 5619, + "▁figures": 5620, + "▁accompanied": 5621, + "▁catherine": 5622, + "▁econom": 5623, + "▁atm": 5624, + "▁shaking": 5625, + "umber": 5626, + "▁council": 5627, + "lot": 5628, + "▁asce": 5629, + "ilities": 5630, + "▁spar": 5631, + "▁ends": 5632, + "▁straw": 5633, + "▁knights": 5634, + "▁atmosp": 5635, + "▁shade": 5636, + "▁brow": 5637, + "▁spark": 5638, + "▁rested": 5639, + "▁sentiment": 5640, + "▁recovered": 5641, + "▁subjects": 5642, + "▁duties": 5643, + "▁composed": 5644, + "▁swept": 5645, + "▁reality": 5646, + "▁singular": 5647, + "▁transp": 5648, + "▁locked": 5649, + "▁louis": 5650, + "▁assistance": 5651, + "▁wake": 5652, + "rem": 5653, + "▁sovere": 5654, + "▁unp": 5655, + "▁loves": 5656, + "▁absurd": 5657, + "▁souls": 5658, + "▁immediate": 5659, + "▁riding": 5660, + "▁connection": 5661, + "▁cheek": 5662, + "▁magnificent": 5663, + "▁ere": 5664, + "▁sugar": 5665, + "▁plans": 5666, + "▁prud": 5667, + "▁dise": 5668, + "▁adj": 5669, + "▁leaning": 5670, + "▁surrounded": 5671, + "▁we've": 5672, + "▁orn": 5673, + "▁roll": 5674, + "▁proble": 5675, + "▁strict": 5676, + "▁awake": 5677, + "▁praise": 5678, + "▁convinced": 5679, + "▁rele": 5680, + "▁frame": 5681, + "▁breaking": 5682, + "▁curtain": 5683, + "▁stayed": 5684, + "▁divided": 5685, + "▁craw": 5686, + "▁inclined": 5687, + "▁previous": 5688, + "ault": 5689, + "omen": 5690, + "▁stair": 5691, + "▁sees": 5692, + "▁pron": 5693, + "board": 5694, + "▁complex": 5695, + "▁prayer": 5696, + "▁pierre": 5697, + "▁unfortunate": 5698, + "gs": 5699, + "▁genius": 5700, + "▁increase": 5701, + "▁sufficiently": 5702, + "▁banks": 5703, + "▁revolution": 5704, + "▁southern": 5705, + "ki": 5706, + "oke": 5707, + "▁aust": 5708, + "edy": 5709, + "▁ling": 5710, + "▁countess": 5711, + "▁sleeping": 5712, + "▁devoted": 5713, + "▁utmost": 5714, + "▁market": 5715, + "▁bosom": 5716, + "▁bark": 5717, + "▁cath": 5718, + "alt": 5719, + "char": 5720, + "▁clock": 5721, + "▁handker": 5722, + "▁admin": 5723, + "▁senses": 5724, + "▁ident": 5725, + "▁midnight": 5726, + "▁connected": 5727, + "▁permitted": 5728, + "▁hid": 5729, + "▁fil": 5730, + "▁faced": 5731, + "▁gift": 5732, + "▁chat": 5733, + "▁brid": 5734, + "▁norther": 5735, + "▁horiz": 5736, + "▁college": 5737, + "▁handkerchief": 5738, + "isions": 5739, + "▁rebe": 5740, + "▁polic": 5741, + "▁announced": 5742, + "ounce": 5743, + "▁nons": 5744, + "▁nurs": 5745, + "ales": 5746, + "▁fleet": 5747, + "▁ragged": 5748, + "▁coffe": 5749, + "▁parties": 5750, + "▁delay": 5751, + "▁sounded": 5752, + "▁cities": 5753, + "▁wash": 5754, + "▁appointed": 5755, + "▁nights": 5756, + "▁instit": 5757, + "▁god's": 5758, + "▁striking": 5759, + "▁guns": 5760, + "▁astonishment": 5761, + "▁merchant": 5762, + "▁parliament": 5763, + "nal": 5764, + "▁ax": 5765, + "atched": 5766, + "▁pil": 5767, + "▁page": 5768, + "iform": 5769, + "▁plate": 5770, + "▁thirst": 5771, + "▁negro": 5772, + "▁ruin": 5773, + "▁inhabitants": 5774, + "win": 5775, + "arf": 5776, + "▁rib": 5777, + "▁addition": 5778, + "▁argument": 5779, + "bour": 5780, + "▁tad": 5781, + "▁scen": 5782, + "▁guests": 5783, + "▁wondering": 5784, + "▁acquainted": 5785, + "▁intent": 5786, + "pless": 5787, + "▁destroyed": 5788, + "▁coffee": 5789, + "inent": 5790, + "lebr": 5791, + "▁render": 5792, + "▁sob": 5793, + "▁demon": 5794, + "▁desir": 5795, + "uding": 5796, + "▁gets": 5797, + "▁assure": 5798, + "▁raise": 5799, + "▁sharply": 5800, + "▁privile": 5801, + "▁alarm": 5802, + "▁machine": 5803, + "fied": 5804, + "▁contract": 5805, + "▁deliber": 5806, + "▁drown": 5807, + "▁afterward": 5808, + "▁guest": 5809, + "▁conclusion": 5810, + "▁risk": 5811, + "▁ignorant": 5812, + "bury": 5813, + "kind": 5814, + "▁pian": 5815, + "an's": 5816, + "uries": 5817, + "▁soil": 5818, + "▁refer": 5819, + "▁commanded": 5820, + "▁practical": 5821, + "▁toss": 5822, + "▁offe": 5823, + "▁beheld": 5824, + "▁arist": 5825, + "▁quarters": 5826, + "▁degrees": 5827, + "▁fisher": 5828, + "▁nonsense": 5829, + "▁mc": 5830, + "isp": 5831, + "▁mechan": 5832, + "keep": 5833, + "▁doubtless": 5834, + "▁violence": 5835, + "▁neglect": 5836, + "▁folk": 5837, + "liness": 5838, + "▁bul": 5839, + "▁easter": 5840, + "▁loft": 5841, + "▁contained": 5842, + "▁reflection": 5843, + "▁celebr": 5844, + "▁leaf": 5845, + "▁concluded": 5846, + "▁district": 5847, + "iation": 5848, + "rs": 5849, + "▁scient": 5850, + "▁he'd": 5851, + "▁scorn": 5852, + "▁crack": 5853, + "▁steep": 5854, + "▁muttered": 5855, + "▁establish": 5856, + "▁darling": 5857, + "▁andrew": 5858, + "▁chim": 5859, + "quis": 5860, + "▁quality": 5861, + "▁polly": 5862, + "▁check": 5863, + "▁craft": 5864, + "▁travell": 5865, + "▁universal": 5866, + "inate": 5867, + "▁cig": 5868, + "atives": 5869, + "omp": 5870, + "uten": 5871, + "▁jac": 5872, + "▁job": 5873, + "▁subm": 5874, + "▁reader": 5875, + "▁leis": 5876, + "▁emph": 5877, + "▁surround": 5878, + "ox": 5879, + "pent": 5880, + "itate": 5881, + "▁extended": 5882, + "▁lev": 5883, + "▁overt": 5884, + "▁retired": 5885, + "▁puzz": 5886, + "uable": 5887, + "▁libr": 5888, + "▁chin": 5889, + "▁spl": 5890, + "▁realized": 5891, + "▁causes": 5892, + "▁punishment": 5893, + "▁physic": 5894, + "▁leisure": 5895, + "can": 5896, + "▁wave": 5897, + "▁shake": 5898, + "▁charm": 5899, + "▁belonged": 5900, + "mber": 5901, + "▁bones": 5902, + "▁gas": 5903, + "▁range": 5904, + "▁prec": 5905, + "▁smell": 5906, + "▁maybe": 5907, + "▁invited": 5908, + "▁troubles": 5909, + "▁tables": 5910, + "anch": 5911, + "icip": 5912, + "▁june": 5913, + "▁abo": 5914, + "▁ages": 5915, + "▁anywhere": 5916, + "ffin": 5917, + "▁drunk": 5918, + "▁properly": 5919, + "▁local": 5920, + "▁improve": 5921, + "▁atmosphere": 5922, + "▁dir": 5923, + "▁he'll": 5924, + "▁reb": 5925, + "▁rang": 5926, + "▁compass": 5927, + "▁lieuten": 5928, + "▁leaned": 5929, + "▁firmly": 5930, + "▁nations": 5931, + "▁hay": 5932, + "▁wept": 5933, + "▁ral": 5934, + "▁conven": 5935, + "▁uniform": 5936, + "▁julia": 5937, + "eem": 5938, + "rass": 5939, + "▁track": 5940, + "▁commer": 5941, + "▁bushes": 5942, + "▁obsc": 5943, + "▁sorts": 5944, + "▁difficulties": 5945, + "▁intellectual": 5946, + "▁introduced": 5947, + "mith": 5948, + "▁tro": 5949, + "iday": 5950, + "▁rendered": 5951, + "▁rout": 5952, + "add": 5953, + "▁plun": 5954, + "▁throwing": 5955, + "▁humble": 5956, + "▁polite": 5957, + "▁numerous": 5958, + "▁movements": 5959, + "▁successful": 5960, + "▁candle": 5961, + "▁separate": 5962, + "▁protection": 5963, + "▁thomas": 5964, + "▁enormous": 5965, + "▁unb": 5966, + "▁repub": 5967, + "▁sunsh": 5968, + "▁descended": 5969, + "▁unusual": 5970, + "ived": 5971, + "▁blaz": 5972, + "▁shows": 5973, + "▁simpl": 5974, + "▁cattle": 5975, + "▁crept": 5976, + "▁astonished": 5977, + "▁deserted": 5978, + "▁lap": 5979, + "arse": 5980, + "▁nearest": 5981, + "udes": 5982, + "▁entering": 5983, + "▁ideal": 5984, + "standing": 5985, + "nders": 5986, + "▁sore": 5987, + "aine": 5988, + "▁clos": 5989, + "▁ours": 5990, + "▁wherever": 5991, + "▁term": 5992, + "▁visited": 5993, + "▁calcul": 5994, + "ds": 5995, + "▁base": 5996, + "▁gates": 5997, + "▁stamp": 5998, + "▁liber": 5999, + "▁official": 6000, + "▁erect": 6001, + "▁alt": 6002, + "elia": 6003, + "▁harmon": 6004, + "▁painful": 6005, + "▁burned": 6006, + "▁republic": 6007, + "uer": 6008, + "▁lately": 6009, + "▁ital": 6010, + "amm": 6011, + "▁tear": 6012, + "▁actions": 6013, + "▁final": 6014, + "▁startled": 6015, + "▁sensation": 6016, + "▁fatal": 6017, + "olic": 6018, + "▁flash": 6019, + "▁appet": 6020, + "▁stronger": 6021, + "▁numbers": 6022, + "▁gratitude": 6023, + "▁female": 6024, + "▁western": 6025, + "lest": 6026 + }, + "merges": [ + "▁ t", + "h e", + "▁ a", + "▁t he", + "i n", + "▁ s", + "▁ w", + "▁ o", + "r e", + "n d", + "▁ b", + "▁ h", + "e r", + "▁ m", + "▁ i", + "o u", + "▁ c", + "▁ f", + "a t", + "e d", + "▁a nd", + "e n", + "▁t o", + "▁o f", + "o n", + "i s", + "▁ d", + "in g", + "▁t h", + "▁ p", + "▁ he", + "o r", + "▁ l", + "e s", + "▁ in", + "l l", + "i t", + "a r", + "a s", + "a n", + "▁ n", + "▁ g", + "o m", + "▁b e", + "▁h a", + "▁ e", + "l e", + "o t", + "▁ y", + "u t", + "o w", + "i c", + "▁w h", + "▁i t", + "l d", + "v e", + "▁th at", + "l y", + "▁w as", + "i d", + "s e", + "s t", + "▁o n", + "g h", + "en t", + "▁ re", + "▁y ou", + "i m", + "c e", + "▁ u", + "v er", + "i on", + "▁a s", + "e t", + "▁f or", + "a y", + "▁h is", + "▁w e", + "it h", + "a l", + "i r", + "▁ r", + "▁w ith", + "▁s t", + "a d", + "u r", + "gh t", + "▁a n", + "▁he r", + "▁n ot", + "▁i s", + "▁ha d", + "t er", + "he r", + "a c", + "a m", + "▁a t", + "o o", + "▁b ut", + "ou ld", + "▁s he", + "▁ k", + "▁s e", + "▁s a", + "▁s h", + "▁f r", + "▁h im", + "▁s o", + "▁m e", + "i ll", + "a in", + "▁s u", + "i ght", + "c h", + "re d", + "c t", + "a ll", + "r o", + "k e", + "es s", + "i l", + "' s", + "o re", + "▁d e", + "▁m y", + "▁the y", + "▁w he", + "▁a ll", + "ic h", + "▁n e", + "r i", + "▁b y", + "▁ha ve", + "om e", + "p p", + "▁th is", + "▁l i", + "▁d o", + "▁c on", + "u s", + "▁wh ich", + "▁c h", + "u l", + "q u", + "▁ j", + "▁u p", + "▁sa id", + "▁fr om", + "ar d", + "g e", + "▁o r", + "▁ v", + "▁on e", + "▁n o", + "t h", + "▁e x", + "▁we re", + "▁the re", + "p e", + "a nd", + "es t", + "▁m an", + "▁wh o", + "b le", + "i e", + "▁a l", + "an t", + "re s", + "ou s", + "u st", + "ver y", + "at ion", + "▁f e", + "▁the m", + "l f", + "▁whe n", + "n t", + "am e", + "in d", + "r a", + "▁g o", + "er s", + "as t", + "f e", + "oo d", + "▁k n", + "▁in t", + "is t", + "▁a re", + "ar t", + "ou t", + "▁w ould", + "▁l e", + "▁wh at", + "o s", + "▁the ir", + "on g", + "ou r", + "▁i f", + "▁c om", + "ou nd", + "▁a b", + "▁o ut", + "▁w or", + "e m", + "▁w ill", + "a k", + "▁m is", + "at e", + "o l", + "u m", + "u n", + "it t", + "ou gh", + "k ed", + "i g", + "a p", + "on e", + "▁be en", + "ow n", + "i ve", + "▁the n", + "▁b r", + "v en", + "i f", + "▁a r", + "' t", + "se lf", + "▁t r", + "▁p l", + "▁r o", + "▁p r", + "t her", + "re at", + "▁u n", + "▁a f", + "▁s p", + "▁ qu", + "▁p ro", + "it y", + "he d", + "▁t w", + "▁a g", + "▁c ould", + "o st", + "a ce", + "or t", + "u re", + "a ke", + "▁a m", + "ac k", + "▁an y", + "▁s ome", + "▁you r", + "▁m ore", + "▁c an", + "a u", + "▁t im", + "e p", + "a g", + "▁ en", + "c k", + "▁int o", + "▁c l", + "r y", + "▁n ow", + "h ing", + "nd er", + "a re", + "▁ very", + "▁g r", + "e l", + "o se", + "▁l oo", + "▁b o", + "v ed", + "o p", + "▁o ther", + "▁d id", + "an ce", + "▁th an", + "itt le", + "▁l ittle", + "in e", + "i es", + "w ay", + "it e", + "▁li ke", + "id e", + "▁l o", + "as s", + "▁b l", + "a ble", + "ur n", + "ou ght", + "▁kn ow", + "ot her", + "▁tim e", + "▁i m", + "▁d is", + "▁u s", + "▁c o", + "f ore", + "▁h ow", + "▁t e", + "en ce", + "▁d ay", + "▁a d", + "ad e", + "ic e", + "▁ab out", + "▁se e", + "▁o ver", + "p t", + "c c", + "▁to o", + "in k", + "▁f l", + "w n", + "▁g reat", + "▁af ter", + "p l", + "d e", + "▁p er", + "m ent", + "▁ag ain", + "▁up on", + "▁ha nd", + "a b", + "▁h as", + "re e", + "is h", + "c i", + "▁on ly", + "all y", + "▁we ll", + "▁sh ould", + "▁p o", + "▁m ar", + "res s", + "▁s ay", + "▁g ood", + "at her", + "▁tw o", + "ing s", + "▁p e", + "ou nt", + "▁o ur", + "i re", + "v ing", + "▁d own", + "ar s", + "er t", + "w e", + "▁be fore", + "i le", + "v es", + "▁a pp", + "▁e very", + "▁it s", + "▁o ld", + "▁th r", + "▁m u", + "▁m ade", + "i ed", + "ic k", + "▁l ong", + "a ge", + "t e", + "f t", + "▁whe re", + "an g", + "▁ne ver", + "▁m ust", + "▁p re", + "▁s m", + "f ul", + "▁su ch", + "u ll", + "▁st r", + "ion s", + "▁of f", + "▁s c", + "▁c ame", + "i ous", + "u e", + "▁mis s", + "w ard", + "i ld", + "▁f ir", + "▁e ven", + "▁u nder", + "ac t", + "▁the se", + "▁c ome", + "▁p art", + "▁f o", + "at ed", + "n ess", + "▁re m", + "or d", + "▁be c", + "t y", + "▁m ay", + "▁mu ch", + "▁th ink", + "p er", + "▁w ay", + "▁mis ter", + "l ed", + "▁l et", + "or n", + "▁e y", + "▁g l", + "▁con t", + "▁th ought", + "▁loo k", + "e ct", + "▁s pe", + "is e", + "▁b ack", + "▁be t", + "ad y", + "▁y e", + "an s", + "ac h", + "▁he re", + "▁j ust", + "re n", + "▁fir st", + "▁h o", + "▁o wn", + "▁d es", + "▁o b", + "ri ed", + "u d", + "ar y", + "▁w ent", + "▁m o", + "▁him self", + "▁m en", + "a ir", + "c l", + "a ve", + "at h", + "f f", + "▁s l", + "c o", + "on 't", + "ll ow", + "▁c r", + "▁re s", + "▁i '", + "▁m ight", + "i ly", + "▁se em", + "in t", + "i p", + "▁be g", + "ou se", + "an c", + "n 't", + "▁w at", + "▁thr ough", + "▁com p", + "b er", + "▁a way", + "▁c ar", + "▁e m", + "▁g et", + "▁im p", + "▁he ad", + "os s", + "▁li fe", + "▁be l", + "▁with out", + "▁m ost", + "▁p ass", + "▁m ake", + "▁con s", + "en ed", + "▁s om", + "▁t urn", + "a v", + "n g", + "▁sh all", + "▁a cc", + "▁th ose", + "▁p res", + "▁ey es", + "▁h ouse", + "i z", + "▁som et", + "▁j o", + "▁st ill", + "▁c all", + "▁n ight", + "he s", + "▁o p", + "au se", + "▁w om", + "▁l ast", + "k s", + "l ess", + "a red", + "▁com m", + "▁d on't", + "▁te ll", + "▁ ent", + "▁not hing", + "▁ne w", + "ig n", + "▁t ake", + "▁be ing", + "▁man y", + "▁wor d", + "on s", + "▁f ound", + "▁re t", + "as e", + "▁e ar", + "▁wh ile", + "▁at t", + "or y", + "i x", + "▁s er", + "▁sa w", + "▁p ut", + "n e", + "ot h", + "ie nd", + "▁pe op", + "▁w r", + "▁you ng", + "ar k", + "d y", + "ak ing", + "l es", + "▁c ount", + "▁on ce", + "▁fr iend", + "▁l a", + "en s", + "▁peop le", + "pe ct", + "or s", + "fe ct", + "▁m at", + "in ce", + "i ble", + "e red", + "▁ro om", + "▁th ree", + "▁y et", + "a il", + "▁s ame", + "▁f ather", + "▁r ight", + "▁ch ild", + "▁c our", + "i gh", + "▁pl ace", + "▁an other", + "ul t", + "i v", + "it ion", + "▁in d", + "▁w ant", + "▁th ough", + "▁n or", + "▁f ar", + "▁k ing", + "▁ha pp", + "▁he art", + "▁f ace", + "▁e nd", + "▁e ver", + "▁n at", + "th ing", + "▁lo ve", + "g et", + "▁too k", + "▁d ist", + "e ver", + "i an", + "▁h u", + "e w", + "▁ar m", + "▁in st", + "m an", + "▁wor k", + "▁l ight", + "▁ch ar", + "▁p le", + "ic t", + "▁s et", + "▁a c", + "▁loo ked", + "▁miss us", + "▁as ked", + "▁m ind", + "▁y es", + "▁su pp", + "▁int e", + "▁re p", + "c ess", + "ent ly", + "▁le ft", + "g g", + "ert ain", + "▁k e", + "is hed", + "u b", + "▁p ers", + "way s", + "▁th ings", + "al k", + "ir l", + "▁m om", + "▁s ir", + "▁w a", + "▁mom ent", + "ation s", + "▁s at", + "se l", + "▁f ind", + "ow er", + "i a", + "v ent", + "re w", + "▁wor ld", + "j ect", + "▁g ive", + "▁c ap", + "▁wh y", + "s o", + "▁g u", + "▁m other", + "▁g en", + "▁s w", + "▁al ways", + "d er", + "l t", + "l ing", + "▁an s", + "pp ed", + "▁so on", + "▁a ct", + "▁for m", + "▁e l", + "d d", + "▁he ard", + "re t", + "▁th ing", + "▁somet hing", + "▁seem ed", + "▁su b", + "▁do or", + "an ge", + "▁g irl", + "c ed", + "▁app e", + "it her", + "▁w ind", + "▁bec ause", + "▁d if", + "▁m on", + "s s", + "▁go ing", + "▁to ld", + "or m", + "▁h ome", + "ain ed", + "▁g ot", + "▁w ar", + "▁go d", + "au ght", + "▁g i", + "▁en g", + "▁s ur", + "n ing", + "▁hand s", + "▁wom an", + "▁fo llow", + "l and", + "a ut", + "▁v o", + "▁fe el", + "▁re l", + "▁p oss", + "c hed", + "ic al", + "p le", + "p h", + "▁bo y", + "▁ret urn", + "▁re g", + "▁re st", + "oo k", + "▁kn ew", + "n er", + "▁e ach", + "▁o h", + "▁s il", + "▁k ind", + "▁ex p", + "▁m a", + "▁c le", + "▁he l", + "i ver", + "t ing", + "▁de l", + "u al", + "▁in f", + "▁as s", + "▁wat er", + "▁con f", + "▁b re", + "▁w o", + "ce pt", + "▁bel ie", + "▁c ertain", + "▁again st", + "▁h ard", + "▁p h", + "r ow", + "▁u nt", + "▁ye ars", + "▁qu ite", + "▁s ide", + "in ess", + "in ed", + "▁ne ar", + "▁h or", + "ter s", + "i red", + "oo l", + "▁f our", + "▁fe w", + "▁d one", + "i er", + "▁c he", + "re st", + "it ed", + "m ost", + "▁bet ter", + "▁ha lf", + "▁m in", + "▁t re", + "p s", + "▁al so", + "▁c are", + "o ck", + "u ck", + "ou b", + "▁beg an", + "ull y", + "▁en ough", + "is ed", + "r u", + "▁ha ving", + "▁se en", + "▁gen er", + "▁l ady", + "▁d ra", + "▁h um", + "ap s", + "ot t", + "▁p ur", + "ak en", + "ro ss", + "y ing", + "▁t er", + "▁h our", + "▁in de", + "an k", + "▁call ed", + "i al", + "as on", + "▁be h", + "▁do es", + "▁who le", + "▁m orn", + "▁turn ed", + "▁ple as", + "▁st e", + "▁re f", + "▁g ave", + "en se", + "▁o cc", + "i b", + "▁cour se", + "▁in s", + "re am", + "get her", + "ut h", + "▁b oth", + "▁s ou", + "▁c ur", + "▁ad d", + "e en", + "▁c ol", + "▁re ad", + "we en", + "sel ves", + "▁am ong", + "▁bet ween", + "▁in c", + "▁ke ep", + "▁be aut", + "ul ar", + "▁po or", + "▁it 's", + "▁su re", + "▁morn ing", + "▁wh ite", + "g ed", + "▁n ame", + "▁de ar", + "▁to ward", + "ut e", + "▁sm all", + "▁wh om", + "▁re pl", + "▁s k", + "▁l ar", + "▁fe lt", + "b o", + "os ed", + "at ing", + "▁my self", + "▁op en", + "▁s ix", + "▁her self", + "▁how ever", + "▁b u", + "o nd", + "ain t", + "x t", + "▁f ore", + "▁in ter", + "▁e v", + "▁h igh", + "ct ion", + "▁hu nd", + "▁st ood", + "▁hund red", + "as ter", + "▁t ra", + "▁sh ow", + "▁s ent", + "i fe", + "▁r ound", + "▁s im", + "▁d r", + "▁g ra", + "▁word s", + "▁day s", + "▁al most", + "a le", + "ve l", + "▁po int", + "ent s", + "▁g re", + "▁e ight", + "c es", + "at es", + "dd en", + "▁f am", + "▁st and", + "▁b us", + "▁l and", + "▁ ed", + "▁me an", + "un g", + "h aps", + "▁su n", + "u res", + "▁s ince", + "i et", + "ir d", + "▁per haps", + "n ed", + "▁s le", + "is s", + "▁b est", + "▁su dden", + "▁d ark", + "▁repl ied", + "▁vo ice", + "▁m et", + "▁any thing", + "▁t ill", + "▁under st", + "▁b ar", + "it s", + "▁unt il", + "in s", + "ou d", + "▁bl ack", + "▁b ro", + "▁he ar", + "▁look ing", + "▁c ried", + "▁you '", + "▁f act", + "am p", + "▁pr in", + "▁l ess", + "▁l ay", + "▁ne xt", + "▁la w", + "u p", + "▁p ower", + "▁pro p", + "n ot", + "re nt", + "▁br ought", + "ate ly", + "ent y", + "▁count ry", + "▁hel p", + "al s", + "▁qu est", + "m ed", + "▁u se", + "▁v is", + "▁s n", + "▁i' m", + "f ully", + "▁sp o", + "▁to gether", + "▁ne ed", + "▁a ir", + "▁ad v", + "▁pers on", + "▁inde ed", + "▁cont in", + "▁un c", + "one y", + "▁g ent", + "▁pres ent", + "▁a w", + "▁p ar", + "ow s", + "u red", + "▁f ull", + "t ain", + "▁r un", + "▁r ather", + "▁i de", + "▁co nd", + "nd ed", + "▁l at", + "▁s y", + "b e", + "d u", + "▁h ar", + "▁fe et", + "▁f in", + "et er", + "▁f all", + "ce i", + "▁f ive", + "▁m il", + "▁b ed", + "o c", + "▁do ct", + "▁inte rest", + "ress ed", + "▁mat ter", + "▁l ord", + "▁g one", + "▁ es", + "f ort", + "▁de ath", + "▁w ife", + "▁ser v", + "▁p at", + "er ing", + "oub t", + "▁ad m", + "▁t alk", + "▁t aken", + "▁ar t", + "▁t ri", + "▁other s", + "▁ho pe", + "as h", + "a z", + "▁ex t", + "▁can not", + "ie f", + "▁spe ak", + "▁l au", + "▁them selves", + "▁al ong", + "▁d ire", + "o ve", + "m b", + "p r", + "▁b es", + "▁c ou", + "▁m or", + "t en", + "▁gent le", + "ur ing", + "▁f ire", + "▁lar ge", + "▁p ol", + "▁c at", + "▁s we", + "ent ion", + "ver s", + "▁th us", + "a pp", + "▁se c", + "▁pl ay", + "▁re al", + "▁pr om", + "ment s", + "we red", + "ie ld", + "ain s", + "is on", + "ac hed", + "▁th ou", + "▁re ason", + "▁th ous", + "it ing", + "▁br other", + "ak es", + "▁thous and", + "on t", + "▁m oney", + "▁rem em", + "▁de p", + "▁ans wered", + "▁tr ue", + "▁child ren", + "▁beh ind", + "o y", + "▁s ound", + "ant s", + "ab ly", + "▁w ood", + "us ed", + "▁de c", + "▁who se", + "o d", + "▁e le", + "▁tw enty", + "▁r a", + "it u", + "▁belie ve", + "▁wo nder", + "en e", + "▁in v", + "▁h on", + "ar ing", + "s h", + "u ed", + "▁su ff", + "▁o pp", + "▁d oubt", + "▁re c", + "t on", + "▁ho ld", + "▁dif fe", + "▁pass ed", + "▁c or", + "m e", + "id ed", + "it ies", + "▁m er", + "▁s ing", + "▁nat ure", + "▁al one", + "▁de ad", + "▁p ri", + "k en", + "l ic", + "▁re d", + "▁b ur", + "ac es", + "▁cl ose", + "▁go ld", + "▁st art", + "▁h ur", + "▁f ur", + "o g", + "anc es", + "▁as k", + "▁doct or", + "▁s on", + "▁gr ound", + "w er", + "et s", + "▁se a", + "▁str ong", + "▁le ave", + "▁comp an", + "▁i' ll", + "er y", + "c y", + "ill ed", + "ep t", + "id es", + "t le", + "▁c e", + "▁ob s", + "bo dy", + "▁fe ll", + "▁s ign", + "co nd", + "▁m ount", + "▁f air", + "▁gi ven", + "▁there fore", + "an e", + "▁i r", + "▁de ep", + "if ul", + "f ic", + "y s", + "▁of ten", + "▁bo dy", + "u nt", + "▁sh ort", + "▁t em", + "▁f a", + "▁m aster", + "▁ear th", + "▁p ap", + "ce ed", + "▁st re", + "▁se cond", + "▁for t", + "b ed", + "g th", + "ow ed", + "▁hor se", + "id d", + "▁m ad", + "u ally", + "▁p a", + "▁ch r", + "▁or der", + "▁t en", + "ve red", + "▁con st", + "▁w ish", + "▁f if", + "▁e as", + "▁c ir", + "▁d ro", + "a im", + "he n", + "▁c a", + "▁re ally", + "re ad", + "cei ved", + "▁i ll", + "▁fe ar", + "os ition", + "▁underst and", + "▁sp ir", + "▁l ist", + "▁ab s", + "▁sp r", + "ac ed", + "▁quest ion", + "ang er", + "▁every thing", + "aught er", + "▁af f", + "▁w all", + "▁com ing", + "ch ing", + "re ady", + "id er", + "▁ab ove", + "▁pr ince", + "▁al ready", + "▁le ast", + "▁re co", + "▁ex pl", + "▁st ep", + "▁us ed", + "▁r u", + "▁it self", + "is ter", + "▁ne cess", + "▁c ase", + "▁ar ound", + "h n", + "▁sou l", + "▁sudden ly", + "g er", + "▁l ad", + "▁even ing", + "▁m ag", + "▁gener al", + "▁n um", + "im es", + "▁kn own", + "▁w al", + "▁qu ick", + "iz ed", + "▁m us", + "▁s ch", + "▁cap tain", + "▁that 's", + "if ic", + "▁whe ther", + "▁le ar", + "g n", + "▁with in", + "m en", + "▁li ve", + "ver n", + "▁tim es", + "▁ex pect", + "▁st ate", + "▁friend s", + "▁br ing", + "▁s ort", + "▁wom en", + "▁t able", + "▁me et", + "▁jo hn", + "▁cir c", + "▁su m", + "▁return ed", + "il ed", + "▁d ri", + "▁he ld", + "▁ex c", + "▁b ig", + "▁say s", + "▁per fect", + "▁le a", + "▁obs er", + "▁el se", + "▁d uring", + "id ent", + "▁h us", + "t ed", + "▁beaut iful", + "▁cle ar", + "▁e ither", + "▁to wn", + "▁s ight", + "▁l ost", + "▁sle ep", + "▁me ans", + "▁fo ot", + "▁c ut", + "▁c al", + "▁k ept", + "▁r an", + "i ence", + "▁pro f", + "te red", + "he re", + "et y", + "▁fe llow", + "▁can 't", + "▁m ist", + "▁p ast", + "▁d ream", + "ag es", + "▁bec ame", + "▁pre t", + "▁dis c", + "▁b ad", + "▁m aking", + "ut ion", + "▁ob ject", + "▁toward s", + "▁l ow", + "u ght", + "▁de v", + "▁hum an", + "▁man ner", + "▁str ange", + "▁ye ar", + "o ld", + "i ent", + "in es", + "▁se ver", + "m on", + "▁an n", + "air s", + "c hes", + "▁c ity", + "▁somet imes", + "' d", + "▁ro se", + "▁ est", + "il ity", + "▁w alk", + "▁re ady", + "▁p al", + "▁le g", + "▁ro ad", + "i ans", + "ci ous", + "▁c orn", + "▁th y", + "▁co ld", + "ll y", + "ious ly", + "l ish", + "▁st ra", + "m er", + "▁b at", + "ow ing", + "ie w", + "▁chr ist", + "▁s qu", + "▁tr uth", + "c ri", + "ll ed", + "▁th ir", + "▁did n't", + "b ert", + "▁so ci", + "b r", + "▁b it", + "▁sub ject", + "▁sh ip", + "▁m ur", + "▁app ro", + "▁p ie", + "▁ans wer", + "▁f ree", + "▁bus iness", + "▁ ut", + "a pe", + "▁appe ar", + "▁r iver", + "▁st o", + "▁c ast", + "▁fam ily", + "▁j ud", + "▁ex cl", + "▁let ter", + "ing ly", + "ri e", + "▁ha ir", + "ot e", + "▁arm s", + "▁bec ome", + "er n", + "ou ble", + "▁diffe rent", + "▁v al", + "f fect", + "▁nat ur", + "▁poss ible", + "▁sever al", + "▁f ine", + "a h", + "▁le ad", + "▁for g", + "▁exp ress", + "l i", + "▁su s", + "▁gl ad", + "o on", + "▁ar ri", + "▁bl ood", + "itt ing", + "▁qu iet", + "ren ce", + "▁ide a", + "▁a ble", + "itt ed", + "st er", + "▁char ac", + "▁beg in", + "▁ch ur", + "▁t ou", + "▁st ory", + "▁ey e", + "b and", + "at ive", + "▁gr and", + "▁cons ider", + "▁ac ross", + "▁p en", + "▁ex cept", + "▁f re", + "▁w in", + "▁e qu", + "et h", + "▁c ent", + "is f", + "▁part ic", + "▁dif fic", + "▁wind ow", + "▁sur pr", + "ll ect", + "▁pro v", + "▁dire ct", + "▁con c", + "e y", + "a w", + "▁go vern", + "▁dis co", + "▁w ild", + "▁do g", + "▁fl o", + "▁so ft", + "te en", + "▁c ross", + "as ed", + "▁e ffect", + "▁s or", + "▁long er", + "▁he n", + "▁follow ed", + "▁so ld", + "▁the e", + "▁p ub", + "▁hus band", + "ard s", + "ant ly", + "b y", + "▁a p", + "▁supp ose", + "▁res pect", + "t s", + "▁h ast", + "▁s al", + "▁comp le", + "▁he av", + "▁happ y", + "▁r ich", + "▁c reat", + "un e", + "▁t aking", + "▁re qu", + "▁st ay", + "▁spo ke", + "▁d aughter", + "▁we e", + "▁ ve", + "▁d u", + "▁gre en", + "▁an im", + "▁d in", + "' ll", + "▁b ird", + "al th", + "▁me re", + "▁g ard", + "n y", + "le y", + "▁poss ess", + "em pt", + "▁re ached", + "▁appe ared", + "o v", + "▁ex ist", + "in ation", + "▁pret ty", + "▁remem ber", + "▁he a", + "▁op ened", + "▁to m", + "ang ed", + "▁sl ow", + "▁im ag", + "▁i' ve", + "r act", + "▁say ing", + "k ing", + "ut es", + "▁comm on", + "▁occ as", + "▁b ook", + "▁r us", + "am es", + "ic es", + "▁br ight", + "m s", + "▁sat isf", + "▁s ense", + "▁f av", + "▁su cc", + "um p", + "is ing", + "▁l u", + "▁acc ord", + "ter n", + "▁bre ak", + "▁ex per", + "▁mon th", + "u se", + "▁de m", + "▁sc ar", + "▁contin ued", + "▁sec ret", + "▁chur ch", + "▁t ree", + "▁st ri", + "▁car ried", + "▁c ry", + "nd ing", + "▁spir it", + "▁want ed", + "er ic", + "▁certain ly", + "▁comm and", + "▁d est", + "▁mo ve", + "ou n", + "▁swe et", + "▁stre et", + "▁o ught", + "▁acc ount", + "▁de f", + "h am", + "▁pre p", + "▁s ens", + "▁es c", + "▁ro ck", + "ot s", + "▁de cl", + "▁pur p", + "ri age", + "ou th", + "ow ers", + "▁dra w", + "▁e at", + "▁b reat", + "▁charac ter", + "im e", + "c ul", + "med i", + "▁st ud", + "▁sch ool", + "itu de", + "▁hea ven", + "▁feel ing", + "▁s ad", + "▁reg ard", + "em ent", + "▁p ain", + "▁wor th", + "▁b ra", + "ne y", + "▁d ut", + "▁sm o", + "aim ed", + "▁tr ans", + "▁del ight", + "▁qu ar", + "▁h ung", + "▁m ot", + "▁bl ue", + "▁h ot", + "▁h ill", + "▁d iv", + "um b", + "▁dis app", + "▁mar g", + "▁lau gh", + "id ence", + "▁pro du", + "▁succ ess", + "ur y", + "s on", + "▁f ast", + "▁eng lish", + "▁d ress", + "▁h at", + "▁ter ri", + "▁p ort", + "▁ne ither", + "▁cour t", + "▁se ven", + "▁f ight", + "▁prin cess", + "▁li ved", + "▁v iew", + "▁im medi", + "▁se lf", + "▁v ar", + "▁hour s", + "▁m ill", + "▁so l", + "▁ex am", + "▁t ried", + "▁w on't", + "▁ent ered", + "▁dis p", + "t o", + "r ic", + "▁car ry", + "▁imp ort", + "▁an g", + "z e", + "on y", + "▁d anger", + "led ge", + "▁off ic", + "▁c ause", + "▁n one", + "▁for ward", + "▁unc le", + "▁to r", + "▁d et", + "as k", + "▁l en", + "▁fur ther", + "▁p ay", + "▁add ed", + "▁fr ont", + "r or", + "▁g e", + "▁partic ular", + "▁de al", + "▁pr ot", + "▁l ed", + "▁ac qu", + "▁pr ay", + "▁e ff", + "▁happ ened", + "▁ch ief", + "le ct", + "▁wal ked", + "▁lat er", + "▁jo y", + "i ar", + "d ay", + "▁or d", + "▁al th", + "▁com fort", + "▁pro b", + "▁ma j", + "▁af fect", + "▁pub lic", + "▁b ene", + "en ing", + "▁alth ough", + "g r", + "▁sh o", + "▁f ig", + "res h", + "▁f ail", + "u ct", + "u g", + "al ity", + "▁me m", + "▁seem s", + "▁your self", + "sh ip", + "e ad", + "i am", + "▁num ber", + "s ide", + "▁a h", + "▁do ing", + "▁li ving", + "are nt", + "▁des p", + "iz e", + "oo f", + "▁f ield", + "▁re ceived", + "▁sh ad", + "▁be y", + "▁bey ond", + "▁ph il", + "▁l ine", + "▁vis it", + "in ct", + "ri g", + "▁part y", + "▁gard en", + "▁j e", + "▁m outh", + "▁ha ll", + "▁qu een", + "▁bo at", + "▁be ar", + "▁am eric", + "is m", + "▁gentle man", + "▁v i", + "ir t", + "u ff", + "▁la id", + "ra id", + "▁occas ion", + "▁ent ire", + "▁a ge", + "▁s ister", + "▁cl ot", + "▁re pe", + "ous ly", + "▁pr ison", + "▁acc om", + "▁wh is", + "▁near ly", + "▁tre es", + "il ing", + "if f", + "▁eight een", + "b it", + "ward s", + "▁ear ly", + "▁t al", + "▁l ab", + "▁for th", + "m ing", + "on es", + "▁m ed", + "▁tr y", + "▁d a", + "il t", + "anc ed", + "▁prin ci", + "▁en em", + "▁think ing", + "▁ch ance", + "w here", + "▁c re", + "▁min utes", + "▁an x", + "▁mar y", + "▁p ict", + "▁wa it", + "▁v ill", + "▁st ren", + "▁af raid", + "▁cr ow", + "▁sm ile", + "▁l ate", + "▁eng land", + "▁pleas ure", + "▁a unt", + "▁new s", + "▁w is", + "▁f le", + "▁see ing", + "▁su per", + "▁fa ith", + "▁ro b", + "im ent", + "o int", + "▁b ill", + "ll ing", + "▁ne igh", + "▁tr ouble", + "▁sil ence", + "▁pl ain", + "▁there 's", + "are t", + "pe nd", + "▁excl aimed", + "ren ch", + "g y", + "▁mil es", + "p ly", + "▁gl ass", + "▁d rew", + "▁neigh b", + "el s", + "▁m ine", + "▁pr act", + "▁heav y", + "▁stand ing", + "▁se vent", + "▁sh ar", + "▁ch ange", + "▁necess ary", + "▁ch ap", + "▁purp ose", + "▁in qu", + "▁natur al", + "▁d eter", + "ic ked", + "▁b ott", + "▁hard ly", + "▁be ll", + "▁to p", + "▁c aught", + "fe red", + "w h", + "i ves", + "ound ed", + "▁a uth", + "▁circ um", + "▁f ing", + "▁sto pped", + "u c", + "▁w it", + "am ent", + "▁op in", + "▁a v", + "▁pri v", + "ain ing", + "▁inst ead", + "ru pt", + "▁g rew", + "▁lo ved", + "▁is land", + "▁kn ight", + "▁ag o", + "▁len gth", + "▁in n", + "▁pe ace", + "l s", + "in ary", + "i or", + "u es", + "▁th ird", + "us h", + "▁beaut y", + "▁h ig", + "▁he 's", + "t he", + "f orm", + "he ad", + "ic ally", + "as p", + "anc y", + "▁deter m", + "▁stra ight", + "▁c ra", + "in ing", + "pp er", + "l er", + "▁inf l", + "▁th or", + "▁con vers", + "▁bes ides", + "▁p osition", + "▁thir ty", + "▁d en", + "ra ge", + "▁att ention", + "m a", + "▁con v", + "ag er", + "▁his t", + "o red", + "▁com es", + "ag ed", + "▁for ce", + "▁s itting", + "▁ple ase", + "te nd", + "it er", + "▁what ever", + "▁inf orm", + "▁h op", + "▁ch air", + "▁bu ild", + "▁b ab", + "ust om", + "▁girl s", + "▁r om", + "▁f rench", + "▁str uck", + "▁p ull", + "▁a st", + "▁li e", + "▁wr ong", + "▁know ledge", + "▁gra ce", + "▁scar ce", + "g hed", + "▁res ol", + "▁wat ch", + "▁thought s", + "▁r id", + "▁att empt", + "▁fif ty", + "▁r ap", + "▁bo x", + "h ood", + "▁get ting", + "▁ ver", + "▁f at", + "▁compan y", + "▁ar r", + "▁crow d", + "▁b urn", + "▁sl ight", + "▁cl ass", + "▁sou th", + "▁d ie", + "▁ex act", + "▁dr ink", + "▁en j", + "▁th ick", + "▁din ner", + "▁sa ve", + "▁ma id", + "▁pl an", + "▁sa int", + "▁immedi ately", + "i ers", + "▁b orn", + "i us", + "▁re v", + "▁te ars", + "ist s", + "▁t reat", + "us ion", + "▁me ant", + "▁boy s", + "pp ing", + "▁slow ly", + "▁in cl", + "▁l im", + "▁d ied", + "ic ed", + "▁com pl", + "▁f ool", + "▁fore st", + "▁su gg", + "▁p ost", + "▁ac cept", + "▁res ult", + "▁auth or", + "nd on", + "ce ive", + "▁sugg est", + "ci ent", + "▁st one", + "▁fr ight", + "▁pap er", + "▁con se", + "▁j our", + "▁t y", + "▁en c", + "▁quick ly", + "▁cont r", + "▁you th", + "▁se nd", + "▁v ict", + "if ied", + "▁bel ong", + "▁war m", + "▁f ix", + "▁imp oss", + "▁bes ide", + "▁ er", + "▁to ne", + "▁c amp", + "▁des ire", + "▁b ound", + "▁m akes", + "▁marg aret", + "▁nor th", + "▁br own", + "▁mo on", + "▁li ps", + "▁pl aced", + "v al", + "▁circum st", + "▁f ood", + "▁f illed", + "ic s", + "if t", + "an n", + "▁lo ndon", + "▁dist ance", + "g ing", + "▁stren gth", + "▁i d", + "▁flo or", + "▁for get", + "▁ob l", + "▁m id", + "ri es", + "it ions", + "b s", + "▁spr ing", + "▁you' re", + "▁vi ol", + "▁j ack", + "▁po ck", + "oo ks", + "▁follow ing", + "▁s ac", + "▁rem ained", + "ar ch", + "▁gr ow", + "▁sn ow", + "▁govern ment", + "▁b all", + "▁h ors", + "▁n ar", + "ad ed", + "▁bro ken", + "▁lau ghed", + "▁des cri", + "▁sa fe", + "itt en", + "ive ly", + "▁prof ess", + "▁o '", + "am ed", + "▁dep art", + "▁eas y", + "ou red", + "▁u nd", + "▁cou n", + "▁than k", + "▁know s", + "▁wa iting", + "d om", + "at s", + "▁g er", + "▁v an", + "▁an ne", + "▁hors es", + "u gg", + "▁d read", + "▁un e", + "g es", + "ac y", + "▁pro ceed", + "▁g az", + "▁sh out", + "▁start ed", + "ent ed", + "▁comple te", + "o pe", + "▁g all", + "de red", + "▁w ide", + "i res", + "▁ne ck", + "as ure", + "ist ed", + "▁serv ice", + "▁pie ce", + "ci ally", + "en ces", + "▁sa il", + "▁pal ace", + "er v", + "▁gu ard", + "▁do ll", + "▁talk ing", + "▁man 's", + "▁li ft", + "▁gra ve", + "▁wee k", + "le t", + "▁imposs ible", + "▁eff ort", + "▁im m", + "▁arm y", + "we ll", + "▁diffic ult", + "u nd", + "▁f resh", + "▁f un", + "re me", + "▁st op", + "▁m ess", + "▁g ar", + "▁de g", + "▁inc re", + "▁corn er", + "▁soci ety", + "▁we ak", + "▁sh ut", + "▁h y", + "▁pro per", + "ac hing", + "▁cl oud", + "idd le", + "iv id", + "▁dem and", + "▁n ine", + "▁s it", + "▁reco gn", + "▁be at", + "us s", + "▁turn ing", + "▁sk y", + "▁opin ion", + "▁sing le", + "p ic", + "▁f ly", + "▁l ang", + "▁m ass", + "ce ll", + "▁out side", + "▁k iss", + "▁tr ust", + "▁occ up", + "▁ev il", + "▁bel ow", + "▁appear ance", + "u it", + "▁after n", + "▁gl o", + "▁g un", + "▁w est", + "en cy", + "p ar", + "▁show ed", + "▁convers ation", + "is es", + "▁con n", + "▁could n't", + "▁run ning", + "▁m ention", + "▁great er", + "▁mus ic", + "▁breat h", + "as es", + "▁n in", + "▁an t", + "are r", + "▁mor row", + "▁b ank", + "▁es pe", + "▁p eter", + "or k", + "ci al", + "▁pres ence", + "▁bat tle", + "▁win ter", + "he red", + "▁prob ably", + "▁clot hes", + "▁f ash", + "▁mar k", + "▁w ished", + "ve re", + "▁co ll", + "▁em b", + "▁kn e", + "▁mar ried", + "▁arri ved", + "▁p un", + "▁e vent", + "us hed", + "▁suff ic", + "▁e ager", + "▁form er", + "▁gi ving", + "▁p op", + "▁sa nd", + "▁ne g", + "▁us ual", + "▁rel ig", + "▁sim ple", + "▁sy m", + "it ation", + "▁g ro", + "or ies", + "▁mo ved", + "▁month s", + "▁spe aking", + "▁p et", + "▁sil ent", + "▁c ab", + "▁mount ain", + "▁express ion", + "g ar", + "▁co vered", + "▁hu nt", + "▁aftern oon", + "ap ed", + "▁occ ur", + "rie f", + "▁st ates", + "▁ z", + "st r", + "▁lo c", + "l ight", + "▁sh ore", + "c he", + "▁eas ily", + "▁p ale", + "un ity", + "▁rem ark", + "▁ph ys", + "▁begin ning", + "▁dut y", + "▁chap ter", + "▁infl u", + "ch o", + "▁con cl", + "am b", + "▁inst ant", + "▁pol it", + "z z", + "▁enj oy", + "▁s ick", + "▁rem ain", + "u el", + "▁st ream", + "▁fig ure", + "a ld", + "▁t ur", + "▁p ath", + "▁v ol", + "▁min ute", + "▁pleas ant", + "▁scarce ly", + "▁cons cious", + "▁terri ble", + "▁k ill", + "▁ra ised", + "▁fash ion", + "▁tw el", + "y al", + "▁lea ving", + "▁twel ve", + "at ure", + "▁f ut", + "▁th rew", + "▁st ar", + "▁fl owers", + "ol og", + "▁tr ying", + "ri b", + "▁sw ord", + "▁t all", + "▁mar ry", + "▁b en", + "▁expect ed", + "▁accord ing", + "▁for ty", + "▁st ick", + "in al", + "▁gu ess", + "▁sil ver", + "▁ir on", + "▁obl ig", + "▁off ice", + "▁rap id", + "▁lad ies", + "▁espe cially", + "i pped", + "ort ed", + "▁bre ad", + "e ch", + "▁te nder", + "or th", + "▁lear ned", + "▁b ooks", + "▁is n't", + "▁surpr ise", + "▁wr ite", + "▁pur s", + "pe red", + "▁wr itten", + "▁k illed", + "▁conse qu", + "▁ex h", + "▁pl aces", + "▁cond ition", + "▁dire ction", + "▁ch o", + "ul ty", + "j o", + "m it", + "▁entire ly", + "ter ing", + "▁ent er", + "▁act ion", + "w ise", + "▁su c", + "ib ly", + "▁happ iness", + "▁dec ided", + "▁gold en", + "▁lang u", + "en ess", + "▁not e", + "▁un less", + "u ous", + "▁f al", + "al ed", + "▁you' ll", + "▁wonder ful", + "ound s", + "um e", + "' re", + "▁sh ook", + "er 's", + "oo p", + "one l", + "▁perfect ly", + "▁ge or", + "nd ered", + "▁bro ad", + "at ic", + "▁cl osed", + "a 's", + "▁sp ot", + "te nded", + "▁lat ter", + "▁step s", + "▁mere ly", + "▁hist ory", + "f er", + "▁w ise", + "is hing", + "os ing", + "▁m iddle", + "ide red", + "▁underst ood", + "▁enem y", + "▁so le", + "ll ig", + "▁j ew", + "▁sim ply", + "g an", + "▁cond uct", + "▁t ast", + "▁bo ard", + "▁sa v", + "▁would n't", + "▁sh ot", + "▁rep ly", + "▁ch anged", + "m n", + "▁gr ass", + "▁fin ally", + "▁adm ir", + "it al", + "▁shar p", + "it ch", + "▁fort une", + "▁sum mer", + "▁exper ience", + "▁suc ceed", + "g ress", + "ut ed", + "▁o rig", + "ret ched", + "▁jour ney", + "▁ex cell", + "▁obser ved", + "a x", + "▁after wards", + "f ast", + "s y", + "▁b ow", + "▁fl at", + "▁pers ons", + "▁le an", + "▁ear n", + "▁bro ke", + "▁m ir", + "▁f it", + "os p", + "▁mar riage", + "▁rep res", + "i o", + "▁l ying", + "un k", + "▁tra ve", + "▁s itu", + "▁list en", + "▁acqu aint", + "▁r ing", + "ci ence", + "▁f aint", + "ol ute", + "▁cal m", + "b ered", + "▁li ves", + "▁esc ape", + "▁bene ath", + "ous es", + "▁cl im", + "▁bl ess", + "▁repe ated", + "▁pock et", + "est s", + "▁t ail", + "▁pass ion", + "▁d ick", + "▁v en", + "os es", + "cl ock", + "▁m ut", + "▁bec om", + "▁o per", + "▁o' clock", + "▁f ish", + "▁l ou", + "se mb", + "▁pre v", + "▁all owed", + "▁fam il", + "he l", + "▁g ate", + "▁sp ite", + "iver s", + "▁he alth", + "iss ion", + "▁i gn", + "▁re ach", + "▁c and", + "▁r ain", + "▁em pl", + "▁b an", + "▁str ugg", + "▁fir m", + "▁bit ter", + "▁sor ry", + "b ing", + "▁father 's", + "▁tem per", + "▁mad ame", + "pl es", + "▁f urn", + "▁fut ure", + "um ed", + "▁n ice", + "▁se par", + "▁pres ently", + "▁circumst ances", + "▁conn ect", + "id ing", + "▁set t", + "k es", + "▁l oud", + "▁wor se", + "▁w and", + "▁sp read", + "▁i' d", + "▁let ters", + "▁ye llow", + "▁mag n", + "▁pass ing", + "▁k it", + "▁pleas ed", + "▁dark ness", + "▁rem ar", + "idd en", + "c ome", + "▁te a", + "▁c iv", + "▁ap art", + "▁disapp e", + "▁import ant", + "▁leg s", + "▁n ation", + "▁del ic", + "▁d ressed", + "▁g ame", + "▁wall s", + "e c", + "▁d ry", + "▁v irt", + "▁d im", + "id ently", + "re l", + "▁r ub", + "▁abs olute", + "▁bl ind", + "▁disco vered", + "▁exact ly", + "▁d am", + "ott en", + "▁sor row", + "m y", + "▁c ost", + "fe rence", + "▁empl oy", + "vel op", + "▁c ous", + "▁be ast", + "▁spe c", + "▁opp ort", + "▁e ars", + "▁dro pped", + "▁sub st", + "▁che e", + "▁prot ect", + "il s", + "▁sm iled", + "in a", + "▁res p", + "▁prom ise", + "▁b ag", + "▁h ost", + "ur s", + "▁creat ure", + "▁not ice", + "▁know ing", + "▁head s", + "▁conc er", + "▁se at", + "ish ment", + "▁ind ivid", + "▁exist ence", + "▁determ ined", + "le nd", + "▁st orm", + "ro y", + "our s", + "▁con ce", + "ang ing", + "▁fix ed", + "▁p ress", + "▁maj or", + "o ved", + "▁v es", + "i od", + "▁lear n", + "▁mot ion", + "▁em pt", + "▁lea ves", + "▁bott om", + "▁ar g", + "iet y", + "▁no body", + "▁pro s", + "qu e", + "▁ut ter", + "▁p ick", + "ac ked", + "▁inte llig", + "▁he s", + "▁st ir", + "▁pre vent", + "▁ass ist", + "▁d om", + "▁dis g", + "▁adv ant", + "er able", + "▁v ent", + "um ent", + "▁t ired", + "re ct", + "as hed", + "act ion", + "▁cons idered", + "▁wr ote", + "▁h ouses", + "▁su it", + "▁che er", + "▁cast le", + "▁p ra", + "▁per form", + "anc ing", + "▁cle an", + "ru ct", + "▁st ro", + "▁fre qu", + "▁draw ing", + "▁l uck", + "▁ha bit", + "id ge", + "e ll", + "▁on es", + "▁no ble", + "▁sp lend", + "▁hon or", + "z en", + "▁pa id", + "▁spe ech", + "▁est ab", + "▁u r", + "ist r", + "▁individ ual", + "in ite", + "▁v all", + "▁bird s", + "ro du", + "▁d ar", + "▁all ow", + "▁conf ess", + "▁imp ress", + "▁prop ert", + "▁j ane", + "▁s ong", + "▁var ious", + "▁nar row", + "▁mo der", + "▁belie ved", + "ay s", + "▁ext ra", + "▁p ure", + "ar ily", + "▁per iod", + "▁shad ow", + "▁some wh", + "▁m al", + "▁c ott", + "▁ext reme", + "▁jud ge", + "▁vill age", + "▁ro yal", + "▁somewh at", + "▁l ower", + "▁ha m", + "▁ag ree", + "▁remem bered", + "▁ast on", + "ent h", + "▁decl ared", + "p an", + "▁tr ain", + "▁part s", + "▁col onel", + "am ber", + "▁break fast", + "▁sure ly", + "▁s in", + "ay ed", + "▁sc ene", + "g o", + "▁great est", + "▁influ ence", + "▁c ustom", + "it ary", + "▁anim al", + "▁sa ke", + "▁mo d", + "▁sold iers", + "in y", + "▁an cient", + "▁dra wn", + "▁ev idently", + "▁way s", + "▁look s", + "▁rev ol", + "at or", + "ant ed", + "▁ref lect", + "▁pict ure", + "▁like ly", + "▁sh r", + "▁law s", + "▁hold ing", + "▁diffic ulty", + "▁in j", + "▁me l", + "▁cou rage", + "n es", + "▁m ort", + "▁tr oub", + "▁bur st", + "▁ang ry", + "▁pr oud", + "gg ed", + "▁spo ken", + "is ion", + "▁des ert", + "pt ion", + "▁com b", + "▁app arent", + "r ing", + "▁wat ched", + "n a", + "▁e ast", + "▁sh op", + "▁ag re", + "▁priv ate", + "est y", + "▁j ul", + "▁fin ished", + "▁anx ious", + "ot ion", + "▁fif teen", + "▁soci al", + "u nder", + "▁dis m", + "▁tou ch", + "▁w ine", + "▁att ack", + "▁ide as", + "▁geor ge", + "a f", + "re r", + "oo se", + "▁sp ace", + "▁sc r", + "▁ins ide", + "▁gentle men", + "▁civ il", + "i ently", + "▁form ed", + "▁f ol", + "▁go es", + "▁you' ve", + "▁th in", + "▁sur f", + "▁serv ant", + "▁b al", + "▁co ver", + "▁our selves", + "▁fall en", + "▁hen ry", + "▁l ot", + "i um", + "▁ad vent", + "▁car riage", + "▁bab y", + "▁ele ct", + "▁to ng", + "▁app re", + "▁every body", + "ud ed", + "▁comm un", + "▁in e", + "it ive", + "▁wa ited", + "c ise", + "▁gr ou", + "he t", + "▁v ain", + "▁imp ro", + "▁fav or", + "er ial", + "▁spe ed", + "▁wind ows", + "▁care fully", + "▁i ce", + "▁no ise", + "▁her o", + "▁j im", + "▁will iam", + "▁pe cul", + "▁prom ised", + "▁walk ing", + "▁forg otten", + "▁oblig ed", + "▁earn est", + "▁m ain", + "▁l ose", + "▁gl ance", + "▁ves sel", + "▁gr ad", + "▁th ro", + "▁bo d", + "▁should er", + "▁met h", + "▁anim als", + "▁not iced", + "ab les", + "▁pecul iar", + "▁f ier", + "▁p ot", + "▁quiet ly", + "▁c up", + "▁ser ious", + "▁tre mb", + "▁gener ally", + "▁americ an", + "▁sym p", + "r al", + "▁d on", + "▁fr ance", + "ict ion", + "▁propert y", + "▁should ers", + "▁str anger", + "▁s an", + "▁c ow", + "▁what 's", + "▁d ust", + "▁affect ion", + "▁hands ome", + "▁hig her", + "i ant", + "nd ay", + "▁we l", + "▁po et", + "▁sl a", + "▁dist inct", + "▁m am", + "▁p ier", + "ac ing", + "ag ue", + "▁gr own", + "u ly", + "▁d '", + "▁ch amber", + "▁des ce", + "▁mur m", + "st em", + "▁person al", + "▁f ancy", + "▁of fered", + "os ite", + "ons ie", + "▁bu ilt", + "▁ed ge", + "▁whis pered", + "▁sk in", + "▁pie ces", + "it ated", + "c her", + "os ity", + "▁p it", + "▁cont ro", + "▁f aces", + "▁sp ent", + "▁inter rupt", + "h ow", + "is ters", + "▁but ter", + "▁de velop", + "▁un k", + "h ip", + "▁he at", + "▁fo nd", + "▁co at", + "▁tou ched", + "▁h ol", + "ing u", + "▁p i", + "▁r ace", + "▁j ump", + "▁surpr ised", + "ot ed", + "▁de fe", + "en ced", + "▁was n't", + "▁we ar", + "and on", + "▁f an", + "ac her", + "▁ar ch", + "▁ed uc", + "▁bra ve", + "at hered", + "▁e ld", + "▁we alth", + "▁sy stem", + "▁ger man", + "▁fal se", + "w ood", + "▁d are", + "ak ed", + "▁cous in", + "▁f er", + "ke y", + "▁l in", + "▁inte llect", + "▁prep ared", + "▁fing ers", + "▁sur r", + "▁mount ains", + "i pp", + "▁opport unity", + "a ff", + "▁b are", + "▁d or", + "▁int rodu", + "▁co llect", + "▁love ly", + "▁r ag", + "▁cr own", + "▁mat ters", + "▁compan ion", + "▁we ather", + "▁al ar", + "▁inn oc", + "▁r is", + "▁m ix", + "▁l ake", + "▁st ore", + "▁un h", + "▁mean ing", + "▁mem ory", + "o ver", + "▁b and", + "le ep", + "▁find ing", + "e e", + "▁char ge", + "▁gr at", + "▁att ract", + "▁gr ay", + "▁quar ter", + "▁av o", + "▁great ly", + "▁m ach", + "▁in h", + "▁as leep", + "▁par is", + "▁d av", + "▁al to", + "▁off er", + "▁opp osite", + "oun ced", + "er ve", + "▁bre ast", + "n own", + "▁read ing", + "▁alto gether", + "▁wr iting", + "pect ed", + "▁deg ree", + "c ing", + "n ight", + "▁ex ec", + "fort un", + "▁st at", + "▁feel ings", + "▁h ath", + "▁c ook", + "▁r ail", + "▁hon our", + "d ing", + "▁f ate", + "▁p or", + "▁fr ank", + "▁meet ing", + "▁r ough", + "▁al ive", + "▁h ide", + "it es", + "il ar", + "▁bl ow", + "▁cr uel", + "ra ph", + "▁hur t", + "▁l oss", + "▁thr own", + "▁ca used", + "▁we 'll", + "▁ser ve", + "▁du ke", + "▁b ent", + "▁un ited", + "▁see k", + "▁king dom", + "▁situ ation", + "▁empt y", + "n ers", + "▁d ue", + "▁li ked", + "▁sw ift", + "▁open ing", + "▁serv ants", + "c hen", + "ou ra", + "▁g h", + "▁sus pic", + "▁fre ed", + "oint ed", + "▁surf ace", + "c il", + "▁quest ions", + "▁ ess", + "▁cur ious", + "▁const it", + "▁accom pan", + "▁christ ian", + "▁f ill", + "are st", + "▁satisf ied", + "r on", + "▁s ides", + "▁p ity", + "▁re ve", + "▁equ al", + "▁he ight", + "▁or dered", + "os op", + "▁gre y", + "▁list ened", + "p et", + "▁re jo", + "▁cap t", + "ib ility", + "o b", + "▁m art", + "▁happ en", + "▁hur ried", + "▁doll ars", + "▁langu age", + "▁an ge", + "▁your s", + "▁supp osed", + "▁laugh ing", + "▁sett led", + "▁ro de", + "▁per m", + "▁dist ingu", + "▁hur ry", + "▁dest roy", + "▁tal ked", + "▁lift ed", + "oc r", + "▁squ are", + "▁val ue", + "▁tast e", + "▁v ast", + "▁king 's", + "▁r ul", + "▁r oof", + "▁tell ing", + "▁stud y", + "▁o w", + "▁p an", + "▁b as", + "▁r ising", + "▁suffic ient", + "▁for ced", + "▁r ise", + "▁at tend", + "▁phil osop", + "▁no se", + "▁six ty", + "he st", + "▁p in", + "▁e gg", + "▁am b", + "▁fa ult", + "b ur", + "▁st ation", + "▁dist ur", + "▁reg ular", + "ill e", + "▁p ack", + "▁spe cial", + "▁hon est", + "▁build ing", + "▁se ason", + "▁sh ape", + "▁pr ide", + "▁sm iling", + "li ke", + "▁ord ers", + "y n", + "▁wood s", + "▁accom pl", + "c on", + "▁s am", + "▁us ually", + "▁wat ching", + "▁sac ri", + "er ved", + "▁pass age", + "▁mat erial", + "▁vall ey", + "y r", + "▁st airs", + "▁li bert", + "▁fright ened", + "▁remar ked", + "▁t it", + "▁w ed", + "▁mist ress", + "▁direct ly", + "▁suff er", + "▁glo om", + "▁l ines", + "▁st ock", + "▁just ice", + "▁d iam", + "est ed", + "▁gr owing", + "▁does n't", + "▁g athered", + "▁ord inary", + "u ce", + "▁e ur", + "▁un f", + "▁kit chen", + "▁th reat", + "▁de pend", + "▁wee ks", + "▁desp air", + "▁meth od", + "▁se ized", + "▁disc uss", + "▁ex er", + "if y", + "▁fl ower", + "▁ign or", + "e er", + "ad es", + "▁de b", + "ep ing", + "▁a le", + "▁y o", + "ch ief", + "▁supp er", + "i k", + "▁bo ld", + "▁put ting", + "▁ne arer", + "us es", + "▁one 's", + "▁b le", + "▁y ork", + "▁end e", + "▁aff airs", + "▁sold ier", + "▁contr ary", + "▁mo ving", + "▁stre ets", + "▁b ir", + "r ance", + "hen s", + "▁c it", + "ic ated", + "▁cat ch", + "▁imag ine", + "ed s", + "▁mar ch", + "▁se arch", + "ar a", + "▁re ceive", + "im ate", + "▁m onsie", + "▁tw ice", + "▁pap a", + "▁monsie ur", + "▁re ck", + "m in", + "u de", + "▁pro cess", + "▁ho le", + "a ly", + "l in", + "▁c ro", + "▁fav our", + "▁d ign", + "▁work ing", + "▁har m", + "▁eur ope", + "ant ic", + "▁pro ved", + "oc ked", + "▁pro ve", + "▁cl er", + "▁lo d", + "cept ion", + "▁pull ed", + "▁ar th", + "▁author ity", + "▁ha ven", + "▁j er", + "▁un s", + "▁move ment", + "ust ed", + "▁eng aged", + "▁brother s", + "▁advant age", + "l ished", + "o le", + "▁arth ur", + "▁a ut", + "▁st ones", + "▁far m", + "▁diffe rence", + "▁f art", + "▁as ide", + "▁m as", + "▁obser v", + "▁hen ce", + "▁possess ion", + "▁hill s", + "▁fort un", + "ul s", + "ail s", + "▁inst ance", + "▁she 's", + "▁o l", + "▁ho ly", + "▁fle w", + "k y", + "▁col or", + "▁r ate", + "▁do ors", + "▁bus y", + "se t", + "▁add ress", + "▁famil iar", + "▁we ight", + "▁aw are", + "▁play ed", + "▁symp ath", + "ll s", + "▁sole mn", + "▁l iter", + "▁t est", + "▁em per", + "▁ind ian", + "▁dist ant", + "▁interest ing", + "▁b ull", + "▁thor ough", + "▁w ore", + "▁wor ked", + "▁expl ained", + "▁excell ent", + "▁splend id", + "▁tong ue", + "▁d i", + "▁p ard", + "▁n amed", + "▁sh ame", + "▁fr anc", + "▁spe ct", + "▁moment s", + "b ers", + "▁w il", + "▁my ster", + "▁se ated", + "▁inst antly", + "▁sim ilar", + "▁ende av", + "▁me asure", + "▁natur ally", + "nd s", + "▁su f", + "▁am ount", + "▁im per", + "▁dog s", + "it able", + "▁br it", + "▁necess ity", + "r id", + "ul ous", + "▁conf idence", + "d en", + "▁p arent", + "▁w id", + "▁v ir", + "▁never the", + "▁agre ed", + "▁neverthe less", + "un ch", + "▁hear ing", + "▁t akes", + "▁a ug", + "▁un ivers", + "en ance", + "▁un w", + "▁ear l", + "▁keep ing", + "▁dri ve", + "▁produ ced", + "▁a ud", + "on 's", + "▁n ames", + "ag n", + "▁disappe ared", + "▁thr ow", + "▁pres ident", + "▁god s", + "▁mag ic", + "▁repres ent", + "▁unk nown", + "p or", + "▁ter ror", + "▁haven 't", + "as c", + "▁supp ort", + "▁smo ke", + "▁w icked", + "k er", + "▁wor ks", + "▁art ic", + "▁d ull", + "▁yes ter", + "▁fall ing", + "▁worth y", + "▁libert y", + "ul ation", + "▁des ign", + "▁want s", + "▁ev idence", + "▁compan ions", + "▁spir its", + "▁co ast", + "▁might y", + "▁particular ly", + "▁wit ness", + "▁disco ver", + "▁s ought", + "▁sp an", + "' ve", + "▁r are", + "▁offic ers", + "l v", + "z y", + "▁yester day", + "ve y", + "c ent", + "▁p owers", + "▁y ield", + "▁c ool", + "▁or gan", + "▁am az", + "▁point ed", + "f ord", + "▁cl aim", + "▁cont ent", + "▁poss ibly", + "▁ter ms", + "▁tri um", + "▁offic er", + "▁pers u", + "▁ce ased", + "▁dro ve", + "▁occur red", + "▁g ree", + "▁li es", + "▁other wise", + "▁emper or", + "▁h om", + "▁st ars", + "▁kne es", + "▁trium ph", + "ru ction", + "▁pa used", + "om s", + "▁requ ired", + "▁fail ed", + "▁unh app", + "▁diam ond", + "▁r at", + "▁al i", + "▁d ouble", + "▁form s", + "▁gi ves", + "▁fing er", + "ra ce", + "▁p air", + "al ous", + "ill a", + "▁bo b", + "▁el iz", + "▁tra vel", + "▁carry ing", + "▁g le", + "il es", + "▁te eth", + "es h", + "▁sh own", + "▁fr uit", + "▁wat ers", + "▁ent ertain", + "▁heart s", + "um n", + "▁lab or", + "in 't", + "▁p ill", + "▁en er", + "so ci", + "▁exam ple", + "▁u pper", + "▁fore ign", + "▁mor al", + "▁soft ly", + "ro se", + "▁hu ge", + "▁char les", + "▁pri est", + "▁exc it", + "▁f et", + "▁mother 's", + "▁possess ed", + "▁c ases", + "▁rep ort", + "▁mil k", + "▁aff air", + "▁princi ple", + "▁inh ab", + "▁freed om", + "▁pr oof", + "▁inte nded", + "▁satisf action", + "▁shout ed", + "is c", + "▁pl at", + "▁b ask", + "ent al", + "▁grou p", + "▁fart her", + "as m", + "▁un fortun", + "▁unt o", + "▁sing ing", + "▁arr ange", + "▁relig ion", + "▁b er", + "▁rock s", + "▁sevent een", + "▁d er", + "▁j ames", + "▁bu y", + "▁succeed ed", + "▁room s", + "▁lead ing", + "▁maj esty", + "▁event s", + "▁d ance", + "▁p aint", + "▁g ently", + "ac le", + "▁te le", + "▁pard on", + "us ing", + "▁dro p", + "f ather", + "▁in vent", + "▁ke y", + "▁mention ed", + "▁sevent y", + "▁r os", + "▁suff ering", + "▁rec ord", + "▁cab in", + "ro ad", + "▁dis s", + "iv al", + "▁demand ed", + "▁excit ement", + "▁as soci", + "▁pro gress", + "ang ers", + "▁cur i", + "▁americ a", + "▁ru le", + "▁b or", + "▁v ig", + "less ly", + "▁clear ly", + "▁b ore", + "▁she ep", + "▁reg ret", + "▁neighb our", + "b ly", + "i ance", + "▁inst inct", + "▁adv ice", + "▁aw ful", + "▁s en", + "▁f ully", + "▁g ather", + "▁pap ers", + "▁h idden", + "▁che st", + "▁bir th", + "h y", + "p ap", + "▁h ither", + "▁st uff", + "▁imp at", + "▁call ing", + "▁four th", + "▁dread ful", + "▁p os", + "▁g rief", + "▁br ill", + "▁power ful", + "▁present ed", + "▁fair y", + "▁expl ain", + "▁sho ot", + "▁prison er", + "▁jo ined", + "▁aff ord", + "m ond", + "at tered", + "▁ ing", + "im ents", + "▁she l", + "▁pre fer", + "▁consider able", + "▁ob ey", + "▁vo ices", + "▁inter v", + "▁interest ed", + "▁vir g", + "▁c red", + "▁c ard", + "▁e p", + "▁need ed", + "▁p ounds", + "▁con qu", + "▁cle ver", + "▁adv anced", + "▁c ord", + "ig hed", + "▁under t", + "▁resol ved", + "▁w ag", + "ist ic", + "▁pa ul", + "▁exc ited", + "▁cond itions", + "▁pict ures", + "ac ious", + "▁sh ining", + "▁su nday", + "▁ser ved", + "▁ste am", + "▁pol ice", + "▁spr ang", + "s ie", + "or a", + "es e", + "▁j es", + "▁no dd", + "▁sal t", + "▁field s", + "▁c art", + "▁ind ians", + "▁fier ce", + "d le", + "▁r ide", + "▁des ired", + "▁ed ward", + "▁import ance", + "▁inform ation", + "t ure", + "▁h osp", + "▁me mb", + "▁per ceived", + "▁y ard", + "▁cr it", + "tern al", + "▁t ask", + "▁fo ld", + "r ant", + "▁soon er", + "▁mer ch", + "▁absolute ly", + "▁cit iz", + "▁suf fered", + "▁t ight", + "▁d ur", + "▁is s", + "ill y", + "▁lo g", + "▁complete ly", + "h old", + "▁r ad", + "▁sh are", + "▁will ing", + "▁dev il", + "▁ship s", + "▁imag ination", + "▁super ior", + "c om", + "am s", + "▁any body", + "▁en v", + "▁app l", + "▁dra g", + "▁da wn", + "asp ed", + "▁occup ied", + "▁curi osity", + "i est", + "▁s igh", + "▁fo x", + "as ant", + "▁my st", + "▁ste ad", + "et t", + "▁cou ple", + "▁ty pe", + "▁extra ord", + "▁apparent ly", + "▁wel come", + "▁da ily", + "▁moder n", + "i ot", + "▁a in't", + "▁d ying", + "ll en", + "▁fe at", + "▁acc ident", + "▁count enance", + "▁ab andon", + "ort ion", + "▁lo ck", + "▁cr ime", + "p ir", + "▁m ult", + "▁al as", + "▁ref used", + "▁h ate", + "▁d w", + "▁when ever", + "▁than ks", + "▁sl ave", + "▁regard ed", + "▁suggest ed", + "ul f", + "▁act ually", + "g ment", + "▁s ize", + "re g", + "▁c ult", + "▁k at", + "▁bod ies", + "h us", + "▁b ay", + "▁tr uly", + "▁fl esh", + "ish op", + "▁sm ith", + "▁bet r", + "w ith", + "▁w et", + "▁rapid ly", + "g ers", + "▁o dd", + "as ons", + "et te", + "▁cl ub", + "ab el", + "▁hor ror", + "▁m ile", + "▁fl ight", + "▁cross ed", + "▁profess or", + "▁o ce", + "▁wor st", + "iz ation", + "▁rus hed", + "▁s cience", + "▁b rief", + "▁ste pped", + "▁mid st", + "h a", + "▁s our", + "▁m aint", + "▁br ain", + "▁cott age", + "▁exp ressed", + "▁equ ally", + "▁educ ation", + "▁aug ust", + "▁b uck", + "▁n ay", + "id s", + "▁tem pt", + "▁inqu ir", + "▁fool ish", + "▁t aught", + "▁c op", + "▁d un", + "▁p icked", + "▁el sie", + "▁land s", + "▁dri ven", + "▁polit ical", + "m as", + "▁de ck", + "▁res ist", + "▁inst r", + "▁b on", + "▁k en", + "ip s", + "▁hot el", + "▁danger ous", + "i ally", + "n ow", + "▁do zen", + "▁tr ade", + "▁point s", + "▁nin et", + "ab ility", + "▁cr im", + "▁rel ations", + "▁inter p", + "▁bar b", + "▁delight ed", + "▁memb ers", + "▁s isters", + "▁st y", + "▁an ger", + "▁belie f", + "▁ask ing", + "▁me at", + "▁dis pl", + "▁rel ief", + "ific ation", + "▁hunt ing", + "▁ale x", + "ar ies", + "▁ob st", + "▁beh old", + "▁mist ake", + "▁inqu ired", + "▁remark able", + "▁orig in", + "c ked", + "▁n erv", + "ack s", + "ver t", + "ro p", + "▁care ful", + "▁w ounded", + "ad ing", + "▁ce re", + "▁enem ies", + "▁grad ually", + "▁interrupt ed", + "▁f is", + "▁st up", + "▁se vere", + "▁ke en", + "▁six teen", + "k ins", + "res p", + "▁wor n", + "▁fl our", + "▁sy lv", + "▁contro l", + "k in", + "▁l one", + "as ing", + "▁n ap", + "▁ass ert", + "▁dep th", + "▁kind ly", + "▁mur der", + "ac ity", + "▁ele ven", + "▁inv ol", + "▁d' art", + "▁w ings", + "▁o ak", + "▁e t", + "▁beg un", + "▁dream s", + "wh ile", + "▁more over", + "▁exp ed", + "▁inde pend", + "▁bur ied", + "▁appro ached", + "agn an", + "▁d'art agnan", + "▁se x", + "▁sa ved", + "▁har ry", + "▁phys ical", + "▁spec ies", + "c er", + "o e", + "▁gl ory", + "▁creat ures", + "▁news pap", + "▁s ang", + "▁pl enty", + "▁use ful", + "▁sho es", + "▁hop ed", + "▁frequ ently", + "▁sa f", + "▁dist r", + "▁princi p", + "▁p u", + "y 's", + "au nt", + "▁lo ver", + "▁fam ous", + "▁reco llect", + "▁n ur", + "▁gr im", + "▁ind if", + "▁char ming", + "▁a im", + "▁loo se", + "▁conscious ness", + "▁mam ma", + "▁ent hus", + "▁sle pt", + "▁smo oth", + "▁fight ing", + "▁hy p", + "▁enthus i", + "▁d ig", + "al ing", + "▁st age", + "▁any one", + "▁thr ust", + "▁des per", + "▁t ar", + "▁l amp", + "st one", + "▁st ern", + "▁ev ident", + "▁mean while", + "▁forg ive", + "▁accept ed", + "▁oce an", + "▁to t", + "▁they 're", + "▁wo ndered", + "▁play ing", + "▁det ect", + "▁ha le", + "▁kn ife", + "ail ed", + "▁close ly", + "▁me as", + "▁proceed ed", + "▁mess age", + "▁m our", + "▁f ac", + "▁un ion", + "ustom ed", + "he m", + "am ing", + "▁ex ceed", + "▁fe ather", + "▁pre cious", + "▁cent ury", + "▁une x", + "▁p ark", + "ic ation", + "▁every where", + "▁mind s", + "▁extraord inary", + "▁a rose", + "▁ent rance", + "▁cap ital", + "▁rec all", + "▁burn ing", + "▁magn ific", + "o es", + "or ious", + "st and", + "▁as semb", + "▁pl ant", + "▁neighb or", + "▁l est", + "um ents", + "▁coll e", + "▁virt ue", + "▁be w", + "▁for b", + "▁ret reat", + "▁cap able", + "▁ass ured", + "▁const ant", + "▁govern or", + "▁incre ased", + "▁h orn", + "▁rem oved", + "▁fact s", + "▁abs ence", + "▁expl an", + "▁a ck", + "▁some body", + "▁aw a", + "▁adm it", + "▁cor rect", + "▁forg ot", + "▁je alous", + "▁kiss ed", + "▁pop ular", + "▁h ut", + "▁u g", + "pe lled", + "▁gr ant", + "▁friend ship", + "▁ind ign", + "▁sympath y", + "i able", + "er ous", + "▁th om", + "▁al ice", + "▁le vel", + "▁object s", + "▁p ressed", + "▁sh a", + "ro om", + "▁qu al", + "▁beg ged", + "▁em p", + "▁h ind", + "▁hig hest", + "▁cloud s", + "▁gh ost", + "▁ack now", + "ous ed", + "▁stri ke", + "▁wis hes", + "▁becom es", + "▁tremb ling", + "▁no b", + "▁kind ness", + "▁accord ingly", + "▁thro at", + "r ation", + "▁f are", + "▁we 're", + "▁st retched", + "▁fr ag", + "▁whe el", + "▁qu eer", + "▁grand father", + "f or", + "▁ch oose", + "▁hel en", + "▁eight y", + "▁l y", + "▁mis erable", + "▁cont empt", + "ign ed", + "▁mil itary", + "▁rus s", + "▁bask et", + "▁a head", + "oo ps", + "ive red", + "▁list ening", + "▁fr o", + "▁lar ger", + "▁div ine", + "i ber", + "▁st ories", + "anc hes", + "us hing", + "iz ing", + "▁tre asure", + "▁exc use", + "▁innoc ent", + "▁a id", + "▁rem ind", + "▁sla ves", + "r it", + "st airs", + "▁re ward", + "og raph", + "▁man age", + "▁dis h", + "▁through out", + "▁wa ves", + "▁jud gment", + "▁arri val", + "▁cho ice", + "▁unhapp y", + "ast ic", + "▁bl ank", + "▁adv ance", + "▁inform ed", + "▁acquaint ance", + "▁impress ion", + "▁myster ious", + "b b", + "▁a ra", + "▁not es", + "▁had n't", + "▁se ll", + "▁com r", + "▁im pl", + "▁ind ust", + "▁end ed", + "▁light s", + "▁nur se", + "▁s out", + "▁b ought", + "▁f red", + "▁mar ked", + "▁sc ream", + "me nd", + "▁une as", + "▁delic ate", + "▁we ary", + "est ic", + "▁prom pt", + "▁exper i", + "▁hung ry", + "▁fly ing", + "▁p ow", + "▁br idge", + "▁jo in", + "▁vis ible", + "▁understand ing", + "▁cry ing", + "▁avo id", + "▁t is", + "▁st iff", + "ac hes", + "▁rest r", + "▁sound s", + "▁b owed", + "▁c aut", + "▁good s", + "▁dav id", + "▁un able", + "▁you' d", + "ham ed", + "▁b os", + "er al", + "▁as hamed", + "▁some where", + "▁inf inite", + "ock s", + "▁dign ity", + "▁g ay", + "▁v ic", + "▁am id", + "▁ho llow", + "▁em otion", + "▁adm itted", + "▁parent s", + "▁w ra", + "▁h int", + "▁tem ple", + "▁comfort able", + "▁intellig ence", + "or ous", + "▁be aring", + "▁her s", + "ab eth", + "▁rem ains", + "▁cont em", + "▁set tle", + "▁imm ense", + "f fe", + "p her", + "▁c her", + "ld om", + "▁we ap", + "ul ated", + "▁light ed", + "gy pt", + "▁advent ure", + "▁thorough ly", + "▁e gypt", + "il st", + "ang es", + "▁ob t", + "▁friend ly", + "▁reck on", + "▁stup id", + "▁f ed", + "▁r ome", + "▁me al", + "▁int ention", + "▁return ing", + "▁conv in", + "▁c oo", + "le ction", + "▁as h", + "ac hel", + "▁ro pe", + "▁pr ice", + "▁pro ject", + "el t", + "row s", + "▁sec ure", + "▁esc aped", + "▁hop es", + "▁eliz abeth", + "▁saf ety", + "▁w ound", + "▁su p", + "▁un us", + "ons cious", + "▁hor ri", + "▁min ister", + "▁o x", + "ll a", + "ens ive", + "▁help ed", + "▁plain ly", + "▁se ldom", + "▁think s", + "▁fellow s", + "▁m ood", + "▁p ushed", + "▁exh ib", + "ing ing", + "▁th under", + "au d", + "ian a", + "▁fair ly", + "▁eld er", + "▁egg s", + "ir m", + "▁maid en", + "m other", + "▁appe ars", + "▁chee ks", + "▁w on", + "▁e ase", + "▁re du", + "▁sk ill", + "▁ext ent", + "▁pract ice", + "▁relig ious", + "▁becom ing", + "▁virg in", + "▁feat ures", + "▁t ied", + "▁when ce", + "▁some how", + "▁gre et", + "▁faith ful", + "▁concer ned", + "▁the at", + "▁b ishop", + "▁p ink", + "▁eager ly", + "re es", + "▁e ating", + "▁was te", + "▁r ank", + "▁fe m", + "▁br ide", + "▁un l", + "ott ed", + "cei ving", + "▁tri b", + "▁orig inal", + "▁concer ning", + "▁ha b", + "▁acc ustomed", + "▁pat ient", + "▁rec om", + "▁ce ll", + "oint ment", + "▁arr anged", + "v ille", + "it ure", + "▁who lly", + "▁old er", + "▁col our", + "▁prov ided", + "▁at e", + "▁part ly", + "▁mon t", + "olog y", + "▁pros pect", + "▁cere mon", + "▁ ze", + "▁l aughter", + "▁fe e", + "▁br anches", + "▁fl ed", + "r ight", + "▁wh ilst", + "▁sl ipped", + "▁viol ent", + "▁inhab it", + "▁s ons", + "▁eng age", + "▁unc om", + "▁deep ly", + "▁subst ance", + "▁t ale", + "▁t iny", + "▁d an", + "▁g a", + "▁be e", + "▁y ards", + "ick s", + "▁hast ily", + "he ld", + "▁w es", + "▁v ague", + "▁am use", + "▁mu d", + "▁wo lf", + "▁h ans", + "ill ing", + "▁supp ly", + "▁sil k", + "▁const antly", + "▁christ mas", + "▁mill ion", + "▁whis per", + "▁m ental", + "▁was hing", + "ver se", + "▁cl oth", + "▁bar on", + "▁cor resp", + "▁nodd ed", + "▁corresp ond", + "k a", + "▁he ll", + "▁g ain", + "▁r ust", + "▁ob tain", + "▁unc onscious", + "▁strugg le", + "▁estab lished", + "▁law y", + "ol s", + "▁sign s", + "▁ut tered", + "▁rom an", + "▁constit ution", + "p es", + "▁c ave", + "▁sp are", + "▁qu ant", + "▁im age", + "▁mer ry", + "▁treat ed", + "▁effort s", + "▁lone ly", + "r ated", + "▁n ut", + "▁gl anced", + "▁port ion", + "it or", + "▁re semb", + "▁with d", + "▁me ad", + "▁fe ast", + "▁pr im", + "▁cl iff", + "▁em er", + "▁prop ortion", + "▁consider ation", + "▁hast e", + "▁gaz e", + "▁sav age", + "▁c rew", + "▁to wer", + "▁l ack", + "▁cons cience", + "▁mer cy", + "▁exh a", + "▁cons ent", + "at ors", + "ur d", + "▁out l", + "▁cl o", + "▁ad op", + "▁among st", + "▁h anging", + "▁circ le", + "▁prep ar", + "▁brill iant", + "f l", + "▁g ained", + "▁r ow", + "▁tr oops", + "▁rep ro", + "▁m ing", + "ou l", + "▁d ared", + "▁l ion", + "▁jo e", + "▁wind s", + "▁bring ing", + "▁anx iety", + "▁bill y", + "▁consequ ence", + "f ice", + "p se", + "▁f ought", + "▁p red", + "▁sc ra", + "▁gl im", + "▁vict ory", + "p ed", + "▁r ab", + "▁sc ot", + "▁ob v", + "▁sh ock", + "ch an", + "▁kn ock", + "our se", + "▁hand ed", + "▁ind ul", + "▁pat ience", + "▁sout her", + "▁j ose", + "▁fe ver", + "▁ro lled", + "ict ed", + "▁set ting", + "▁profess ion", + "▁sylv ia", + "▁h un", + "ut ions", + "▁fe ared", + "▁br and", + "▁bo ots", + "▁fore head", + "▁princi ples", + "▁s ink", + "▁r ig", + "av al", + "▁pur ch", + "▁gaz ed", + "▁employ ed", + "▁murm ured", + "m ore", + "▁s ar", + "as hing", + "ur al", + "ac les", + "▁tr ad", + "▁act ive", + "▁bene f", + "▁bott le", + "▁r age", + "▁inv est", + "▁lu x", + "▁s ank", + "▁h ang", + "▁be ard", + "ent ial", + "▁lo ving", + "▁nat ive", + "▁inst ruct", + "▁wa ist", + "▁rel ation", + "▁disco very", + "▁mel an", + "▁nerv ous", + "▁obt ained", + "▁p ig", + "▁se ar", + "▁fl ag", + "▁tra il", + "▁distingu ished", + "▁st ared", + "▁mis ery", + "▁pr int", + "▁gu il", + "▁jump ed", + "▁sw im", + "▁appro aching", + "▁suspic ion", + "▁i v", + "▁man aged", + "ak er", + "▁te ach", + "▁mat ch", + "▁guil ty", + "▁w retched", + "▁r um", + "▁comp ar", + "▁the ory", + "▁s her", + "▁b ree", + "▁k ings", + "▁sh one", + "ather ine", + "▁thr one", + "▁show ing", + "aw s", + "▁rob in", + "▁emb ar", + "ut ation", + "▁woman 's", + "▁add ressed", + "▁prot est", + "▁admir ation", + "▁troub led", + "▁ug ly", + "o om", + "er ves", + "▁fl ung", + "▁sub s", + "▁rel ie", + "▁thousand s", + "n ce", + "▁o d", + "▁cur rent", + "▁wood en", + "▁sacri fice", + "ur ity", + "ci p", + "▁pe ar", + "▁far mer", + "▁need s", + "▁cond em", + "▁mem ber", + "▁b ade", + "▁d ancing", + "▁re asons", + "▁cons ult", + "▁sw all", + "▁shad ows", + "▁ange l", + "▁ninet een", + "▁sty le", + "f ield", + "▁l an", + "▁man if", + "▁ro bert", + "▁gr ate", + "▁eng ine", + "▁wis dom", + "▁jes us", + "▁con vent", + "▁pre ced", + "▁interest s", + "▁tri al", + "b or", + "i ven", + "▁n est", + "▁ex ch", + "▁vo y", + "▁ill ust", + "▁wor ship", + "▁ad am", + "▁ph r", + "▁princip al", + "▁h it", + "▁spe nd", + "▁stand s", + "▁resp ons", + "▁a y", + "▁ha w", + "▁wh ist", + "▁ar rest", + "▁kind s", + "▁requ ire", + "▁descri bed", + "▁l it", + "▁pre cise", + "▁prop osed", + "▁produ ce", + "▁utter ly", + "ul se", + "▁no vel", + "▁bl ame", + "▁cred it", + "▁p ause", + "os en", + "▁house hold", + "▁arm ed", + "▁follow s", + "up on", + "▁appro ach", + "▁nin ety", + "▁p ir", + "▁fl ore", + "iv ity", + "▁ref use", + "▁sens ible", + "cho ly", + "▁nation al", + "▁g rie", + "▁re ven", + "▁let 's", + "▁delight ful", + "▁extreme ly", + "▁melan choly", + "u ing", + "▁en orm", + "cl es", + "▁slight ly", + "▁sac red", + "▁recogn ized", + "▁myst ery", + "▁g ri", + "▁comp re", + "▁dist ress", + "▁war ri", + "▁use less", + "▁tri f", + "▁mount ed", + "▁phil ip", + "▁ener gy", + "▁explan ation", + "▁c as", + "at ory", + "▁p our", + "▁r ic", + "▁ch osen", + "▁every one", + "umb led", + "▁a pr", + "▁c am", + "▁pro c", + "▁res umed", + "▁appre ci", + "▁alex and", + "▁a ven", + "▁w ing", + "▁int ense", + "▁high ly", + "▁lu cy", + "▁sol id", + "▁depart ure", + "▁agree able", + "▁exer cise", + "a pped", + "▁w ard", + "▁b ud", + "▁d well", + "ic ate", + "▁de ce", + "▁te acher", + "te nding", + "▁ma x", + "▁requ est", + "▁unex pected", + "▁jose ph", + "c ol", + "▁le ap", + "▁vict im", + "▁s ighed", + "▁for ces", + "ch ie", + "▁fe ed", + "▁sp ort", + "▁dri ft", + "▁wed ding", + "▁brit ish", + "se c", + "▁att itude", + "▁vis ion", + "▁pi pe", + "▁to w", + "▁ha lt", + "▁man ners", + "▁te nd", + "▁fl ood", + "▁comm ission", + "▁gu ide", + "▁obser ve", + "▁conc ern", + "▁rus h", + "▁affect ed", + "f all", + "▁st ret", + "▁co ach", + "▁po ison", + "▁direct ed", + "▁med ic", + "▁g est", + "▁e cho", + "▁young er", + "▁conf usion", + "▁contin ue", + "▁par li", + "▁abs or", + "▁cent re", + "con om", + "▁horri ble", + "r ison", + "▁b ol", + "▁b ath", + "▁g own", + "▁by e", + "▁al oud", + "▁supp l", + "▁prof ound", + "▁er r", + "▁cheer ful", + "w orth", + "▁sent ence", + "▁mist aken", + "▁tor n", + "▁fig ures", + "▁accompan ied", + "▁c atherine", + "▁e conom", + "▁at m", + "▁sh aking", + "um ber", + "▁coun cil", + "l ot", + "▁as ce", + "il ities", + "▁sp ar", + "▁end s", + "▁stra w", + "▁knight s", + "▁atm osp", + "▁sh ade", + "▁br ow", + "▁sp ark", + "▁rest ed", + "▁sent iment", + "▁reco vered", + "▁subject s", + "▁dut ies", + "▁comp osed", + "▁sw ept", + "▁real ity", + "▁sing ular", + "▁trans p", + "▁loc ked", + "▁lou is", + "▁assist ance", + "▁w ake", + "re m", + "▁so vere", + "▁un p", + "▁lo ves", + "▁abs urd", + "▁soul s", + "▁immedi ate", + "▁rid ing", + "▁connect ion", + "▁chee k", + "▁magnific ent", + "▁e re", + "▁su gar", + "▁pl ans", + "▁pr ud", + "▁dis e", + "▁ad j", + "▁lean ing", + "▁surr ounded", + "▁we 've", + "▁or n", + "▁ro ll", + "▁pro ble", + "▁str ict", + "▁aw ake", + "▁pra ise", + "▁convin ced", + "▁re le", + "▁fr ame", + "▁bre aking", + "▁cur tain", + "▁stay ed", + "▁div ided", + "▁cra w", + "▁incl ined", + "▁prev ious", + "a ult", + "om en", + "▁st air", + "▁se es", + "▁pr on", + "bo ard", + "▁comple x", + "▁pray er", + "▁pier re", + "▁unfortun ate", + "g s", + "▁gen ius", + "▁incre ase", + "▁suffic iently", + "▁ban ks", + "▁revol ution", + "▁souther n", + "k i", + "o ke", + "▁a ust", + "ed y", + "▁l ing", + "▁count ess", + "▁sleep ing", + "▁dev oted", + "▁ut most", + "▁mark et", + "▁bos om", + "▁b ark", + "▁c ath", + "al t", + "ch ar", + "▁cl ock", + "▁hand ker", + "▁adm in", + "▁sens es", + "▁id ent", + "▁mid night", + "▁connect ed", + "▁perm itted", + "▁h id", + "▁f il", + "▁f aced", + "▁g ift", + "▁ch at", + "▁br id", + "▁nor ther", + "▁hor iz", + "▁colle ge", + "▁handker chief", + "is ions", + "▁re be", + "▁pol ic", + "▁ann ounced", + "oun ce", + "▁n ons", + "▁n urs", + "al es", + "▁fle et", + "▁rag ged", + "▁co ffe", + "▁part ies", + "▁del ay", + "▁sound ed", + "▁c ities", + "▁was h", + "▁app ointed", + "▁night s", + "▁inst it", + "▁god 's", + "▁stri king", + "▁gun s", + "▁aston ishment", + "▁merch ant", + "▁parli ament", + "n al", + "▁a x", + "at ched", + "▁p il", + "▁p age", + "if orm", + "▁pl ate", + "▁thir st", + "▁neg ro", + "▁ru in", + "▁inhabit ants", + "w in", + "ar f", + "▁r ib", + "▁add ition", + "▁arg ument", + "b our", + "▁t ad", + "▁sc en", + "▁gu ests", + "▁wonder ing", + "▁acquaint ed", + "▁int ent", + "pl ess", + "▁destroy ed", + "▁coffe e", + "in ent", + "le br", + "▁re nder", + "▁so b", + "▁de mon", + "▁des ir", + "ud ing", + "▁get s", + "▁ass ure", + "▁ra ise", + "▁shar ply", + "▁priv ile", + "▁alar m", + "▁mach ine", + "f ied", + "▁cont ract", + "▁del iber", + "▁dr own", + "▁after ward", + "▁gu est", + "▁concl usion", + "▁ris k", + "▁ignor ant", + "b ury", + "k ind", + "▁p ian", + "an 's", + "ur ies", + "▁so il", + "▁ref er", + "▁command ed", + "▁pract ical", + "▁to ss", + "▁of fe", + "▁be held", + "▁ar ist", + "▁quar ters", + "▁deg rees", + "▁fis her", + "▁nons ense", + "▁m c", + "is p", + "▁me chan", + "ke ep", + "▁doubt less", + "▁viol ence", + "▁neg lect", + "▁fol k", + "l iness", + "▁b ul", + "▁e aster", + "▁lo ft", + "▁cont ained", + "▁ref lection", + "▁ce lebr", + "▁lea f", + "▁concl uded", + "▁distr ict", + "i ation", + "r s", + "▁s cient", + "▁he 'd", + "▁sc orn", + "▁cr ack", + "▁ste ep", + "▁mut tered", + "▁estab lish", + "▁dar ling", + "▁and rew", + "▁ch im", + "qu is", + "▁qu ality", + "▁po lly", + "▁che ck", + "▁cra ft", + "▁trave ll", + "▁univers al", + "in ate", + "▁c ig", + "at ives", + "om p", + "ut en", + "▁j ac", + "▁jo b", + "▁sub m", + "▁read er", + "▁le is", + "▁em ph", + "▁surr ound", + "o x", + "p ent", + "it ate", + "▁ex tended", + "▁le v", + "▁over t", + "▁ret ired", + "▁pu zz", + "u able", + "▁li br", + "▁ch in", + "▁sp l", + "▁real ized", + "▁ca uses", + "▁pun ishment", + "▁phys ic", + "▁leis ure", + "c an", + "▁w ave", + "▁sh ake", + "▁char m", + "▁belong ed", + "m ber", + "▁b ones", + "▁g as", + "▁r ange", + "▁pre c", + "▁sm ell", + "▁may be", + "▁inv ited", + "▁troub les", + "▁t ables", + "an ch", + "ic ip", + "▁j une", + "▁ab o", + "▁ag es", + "▁any where", + "ff in", + "▁dr unk", + "▁proper ly", + "▁loc al", + "▁impro ve", + "▁atmosp here", + "▁d ir", + "▁he 'll", + "▁re b", + "▁r ang", + "▁comp ass", + "▁lie uten", + "▁lean ed", + "▁firm ly", + "▁n ations", + "▁ha y", + "▁we pt", + "▁r al", + "▁con ven", + "▁un iform", + "▁jul ia", + "e em", + "r ass", + "▁tr ack", + "▁comm er", + "▁bus hes", + "▁obs c", + "▁sort s", + "▁difficult ies", + "▁intellect ual", + "▁introdu ced", + "m ith", + "▁t ro", + "id ay", + "▁re ndered", + "▁r out", + "ad d", + "▁pl un", + "▁thr owing", + "▁hum ble", + "▁pol ite", + "▁num erous", + "▁move ments", + "▁success ful", + "▁cand le", + "▁separ ate", + "▁protect ion", + "▁thom as", + "▁enorm ous", + "▁un b", + "▁rep ub", + "▁sun sh", + "▁desce nded", + "▁unus ual", + "i ved", + "▁bl az", + "▁show s", + "▁sim pl", + "▁cat tle", + "▁cre pt", + "▁aston ished", + "▁desert ed", + "▁l ap", + "ar se", + "▁ne arest", + "ud es", + "▁ent ering", + "▁ide al", + "stand ing", + "nd ers", + "▁so re", + "ain e", + "▁cl os", + "▁our s", + "▁where ver", + "▁ter m", + "▁vis ited", + "▁cal cul", + "d s", + "▁b ase", + "▁g ates", + "▁st amp", + "▁li ber", + "▁offic ial", + "▁e rect", + "▁al t", + "el ia", + "▁har mon", + "▁pain ful", + "▁burn ed", + "▁repub lic", + "u er", + "▁l ately", + "▁it al", + "am m", + "▁te ar", + "▁act ions", + "▁fin al", + "▁start led", + "▁sens ation", + "▁fat al", + "ol ic", + "▁fl ash", + "▁app et", + "▁strong er", + "▁num bers", + "▁grat itude", + "▁fem ale", + "▁wes tern", + "l est" + ] + } +} \ No newline at end of file diff --git a/out/checkpoint-19000/tokenizer_config.json b/out/checkpoint-19000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0073e6415da746fc5c44a52e02785cb94510efa4 --- /dev/null +++ b/out/checkpoint-19000/tokenizer_config.json @@ -0,0 +1,9253 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|audio:0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|audio:1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|audio:2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "<|audio:3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "<|audio:4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "<|audio:5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "<|audio:6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "<|audio:7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "8": { + "content": "<|audio:8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9": { + "content": "<|audio:9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10": { + "content": "<|audio:10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11": { + "content": "<|audio:11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12": { + "content": "<|audio:12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13": { + "content": "<|audio:13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "14": { + "content": "<|audio:14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15": { + "content": "<|audio:15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "16": { + "content": "<|audio:16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17": { + "content": "<|audio:17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "18": { + "content": "<|audio:18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19": { + "content": "<|audio:19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20": { + "content": "<|audio:20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21": { + "content": "<|audio:21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22": { + "content": "<|audio:22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "23": { + "content": "<|audio:23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "24": { + "content": "<|audio:24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "25": { + "content": "<|audio:25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "26": { + "content": "<|audio:26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27": { + "content": "<|audio:27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "<|audio:28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "<|audio:29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "<|audio:30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "<|audio:31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "<|audio:32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "33": { + "content": "<|audio:33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34": { + "content": "<|audio:34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "35": { + "content": "<|audio:35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "36": { + "content": "<|audio:36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "37": { + "content": "<|audio:37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "38": { + "content": "<|audio:38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39": { + "content": "<|audio:39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "40": { + "content": "<|audio:40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "41": { + "content": "<|audio:41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "42": { + "content": "<|audio:42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43": { + "content": "<|audio:43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "44": { + "content": "<|audio:44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "45": { + "content": "<|audio:45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46": { + "content": "<|audio:46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47": { + "content": "<|audio:47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "48": { + "content": "<|audio:48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "49": { + "content": "<|audio:49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50": { + "content": "<|audio:50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "51": { + "content": "<|audio:51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52": { + "content": "<|audio:52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "53": { + "content": "<|audio:53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54": { + "content": "<|audio:54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "55": { + "content": "<|audio:55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "56": { + "content": "<|audio:56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "57": { + "content": "<|audio:57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58": { + "content": "<|audio:58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "59": { + "content": "<|audio:59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "60": { + "content": "<|audio:60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "61": { + "content": "<|audio:61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "62": { + "content": "<|audio:62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "63": { + "content": "<|audio:63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64": { + "content": "<|audio:64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "65": { + "content": "<|audio:65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66": { + "content": "<|audio:66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "67": { + "content": "<|audio:67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68": { + "content": "<|audio:68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "69": { + "content": "<|audio:69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70": { + "content": "<|audio:70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "71": { + "content": "<|audio:71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72": { + "content": "<|audio:72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "73": { + "content": "<|audio:73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74": { + "content": "<|audio:74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "75": { + "content": "<|audio:75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "76": { + "content": "<|audio:76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77": { + "content": "<|audio:77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "78": { + "content": "<|audio:78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "<|audio:79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "<|audio:80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81": { + "content": "<|audio:81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82": { + "content": "<|audio:82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "83": { + "content": "<|audio:83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84": { + "content": "<|audio:84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "85": { + "content": "<|audio:85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86": { + "content": "<|audio:86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "87": { + "content": "<|audio:87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "88": { + "content": "<|audio:88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "89": { + "content": "<|audio:89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "90": { + "content": "<|audio:90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "91": { + "content": "<|audio:91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92": { + "content": "<|audio:92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93": { + "content": "<|audio:93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94": { + "content": "<|audio:94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95": { + "content": "<|audio:95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "96": { + "content": "<|audio:96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "97": { + "content": "<|audio:97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "98": { + "content": "<|audio:98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "99": { + "content": "<|audio:99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100": { + "content": "<|audio:100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "<|audio:101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "<|audio:102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "<|audio:103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104": { + "content": "<|audio:104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "105": { + "content": "<|audio:105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "<|audio:106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "<|audio:107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "<|audio:108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "109": { + "content": "<|audio:109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110": { + "content": "<|audio:110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "111": { + "content": "<|audio:111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "112": { + "content": "<|audio:112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "113": { + "content": "<|audio:113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "114": { + "content": "<|audio:114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "115": { + "content": "<|audio:115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "116": { + "content": "<|audio:116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117": { + "content": "<|audio:117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118": { + "content": "<|audio:118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "119": { + "content": "<|audio:119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "120": { + "content": "<|audio:120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "121": { + "content": "<|audio:121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "122": { + "content": "<|audio:122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "123": { + "content": "<|audio:123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "124": { + "content": "<|audio:124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "125": { + "content": "<|audio:125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126": { + "content": "<|audio:126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "127": { + "content": "<|audio:127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128": { + "content": "<|audio:128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "129": { + "content": "<|audio:129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130": { + "content": "<|audio:130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "131": { + "content": "<|audio:131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "132": { + "content": "<|audio:132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "133": { + "content": "<|audio:133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "134": { + "content": "<|audio:134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "135": { + "content": "<|audio:135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "136": { + "content": "<|audio:136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "137": { + "content": "<|audio:137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "138": { + "content": "<|audio:138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "139": { + "content": "<|audio:139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "140": { + "content": "<|audio:140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "141": { + "content": "<|audio:141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "142": { + "content": "<|audio:142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "143": { + "content": "<|audio:143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "144": { + "content": "<|audio:144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "145": { + "content": "<|audio:145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "146": { + "content": "<|audio:146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147": { + "content": "<|audio:147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "148": { + "content": "<|audio:148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "149": { + "content": "<|audio:149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "150": { + "content": "<|audio:150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151": { + "content": "<|audio:151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152": { + "content": "<|audio:152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "153": { + "content": "<|audio:153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "154": { + "content": "<|audio:154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155": { + "content": "<|audio:155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "156": { + "content": "<|audio:156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "157": { + "content": "<|audio:157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158": { + "content": "<|audio:158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "159": { + "content": "<|audio:159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "160": { + "content": "<|audio:160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "161": { + "content": "<|audio:161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162": { + "content": "<|audio:162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "163": { + "content": "<|audio:163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "164": { + "content": "<|audio:164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "165": { + "content": "<|audio:165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166": { + "content": "<|audio:166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "167": { + "content": "<|audio:167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "168": { + "content": "<|audio:168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "169": { + "content": "<|audio:169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "170": { + "content": "<|audio:170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171": { + "content": "<|audio:171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172": { + "content": "<|audio:172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "173": { + "content": "<|audio:173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "174": { + "content": "<|audio:174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175": { + "content": "<|audio:175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "176": { + "content": "<|audio:176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177": { + "content": "<|audio:177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178": { + "content": "<|audio:178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "179": { + "content": "<|audio:179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "180": { + "content": "<|audio:180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "181": { + "content": "<|audio:181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "182": { + "content": "<|audio:182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "183": { + "content": "<|audio:183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "184": { + "content": "<|audio:184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "185": { + "content": "<|audio:185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186": { + "content": "<|audio:186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187": { + "content": "<|audio:187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "188": { + "content": "<|audio:188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "189": { + "content": "<|audio:189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "190": { + "content": "<|audio:190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "191": { + "content": "<|audio:191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "192": { + "content": "<|audio:192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "193": { + "content": "<|audio:193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "194": { + "content": "<|audio:194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "195": { + "content": "<|audio:195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "196": { + "content": "<|audio:196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "197": { + "content": "<|audio:197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "198": { + "content": "<|audio:198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199": { + "content": "<|audio:199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200": { + "content": "<|audio:200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "201": { + "content": "<|audio:201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "202": { + "content": "<|audio:202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "203": { + "content": "<|audio:203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "204": { + "content": "<|audio:204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "205": { + "content": "<|audio:205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "206": { + "content": "<|audio:206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "207": { + "content": "<|audio:207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "208": { + "content": "<|audio:208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "209": { + "content": "<|audio:209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "210": { + "content": "<|audio:210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "211": { + "content": "<|audio:211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "212": { + "content": "<|audio:212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "213": { + "content": "<|audio:213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "214": { + "content": "<|audio:214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "215": { + "content": "<|audio:215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "216": { + "content": "<|audio:216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "217": { + "content": "<|audio:217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "218": { + "content": "<|audio:218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "219": { + "content": "<|audio:219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "220": { + "content": "<|audio:220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "221": { + "content": "<|audio:221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "222": { + "content": "<|audio:222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "223": { + "content": "<|audio:223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "224": { + "content": "<|audio:224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "225": { + "content": "<|audio:225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "226": { + "content": "<|audio:226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "227": { + "content": "<|audio:227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "228": { + "content": "<|audio:228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "229": { + "content": "<|audio:229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "230": { + "content": "<|audio:230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "231": { + "content": "<|audio:231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "232": { + "content": "<|audio:232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "233": { + "content": "<|audio:233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "234": { + "content": "<|audio:234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "235": { + "content": "<|audio:235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "236": { + "content": "<|audio:236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "237": { + "content": "<|audio:237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "238": { + "content": "<|audio:238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "239": { + "content": "<|audio:239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "240": { + "content": "<|audio:240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "241": { + "content": "<|audio:241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "242": { + "content": "<|audio:242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "243": { + "content": "<|audio:243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "244": { + "content": "<|audio:244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "245": { + "content": "<|audio:245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "246": { + "content": "<|audio:246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "247": { + "content": "<|audio:247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "248": { + "content": "<|audio:248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "249": { + "content": "<|audio:249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250": { + "content": "<|audio:250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "251": { + "content": "<|audio:251|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "252": { + "content": "<|audio:252|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "253": { + "content": "<|audio:253|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "254": { + "content": "<|audio:254|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255": { + "content": "<|audio:255|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256": { + "content": "<|audio:256|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "257": { + "content": "<|audio:257|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "258": { + "content": "<|audio:258|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "<|audio:259|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "<|audio:260|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "<|audio:261|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "<|audio:262|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "<|audio:263|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "<|audio:264|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "<|audio:265|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "<|audio:266|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "<|audio:267|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "<|audio:268|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "<|audio:269|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "<|audio:270|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "<|audio:271|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "<|audio:272|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "<|audio:273|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "<|audio:274|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "<|audio:275|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "<|audio:276|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "<|audio:277|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "<|audio:278|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "<|audio:279|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "<|audio:280|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "<|audio:281|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "<|audio:282|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "<|audio:283|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "<|audio:284|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "<|audio:285|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "<|audio:286|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "<|audio:287|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "<|audio:288|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "<|audio:289|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "<|audio:290|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "<|audio:291|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "<|audio:292|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "<|audio:293|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "<|audio:294|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "<|audio:295|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "<|audio:296|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "<|audio:297|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "<|audio:298|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "<|audio:299|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "<|audio:300|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "<|audio:301|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "<|audio:302|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "<|audio:303|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "<|audio:304|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "<|audio:305|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "<|audio:306|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "<|audio:307|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "<|audio:308|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "<|audio:309|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "<|audio:310|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "<|audio:311|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "<|audio:312|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "<|audio:313|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "<|audio:314|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "<|audio:315|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "<|audio:316|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "<|audio:317|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "<|audio:318|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "<|audio:319|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "<|audio:320|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "<|audio:321|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "<|audio:322|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "<|audio:323|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "<|audio:324|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "<|audio:325|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "<|audio:326|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "<|audio:327|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "<|audio:328|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "<|audio:329|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "<|audio:330|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "<|audio:331|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "<|audio:332|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "<|audio:333|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "<|audio:334|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "<|audio:335|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "<|audio:336|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "<|audio:337|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "<|audio:338|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "<|audio:339|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "<|audio:340|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "<|audio:341|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "<|audio:342|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "<|audio:343|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "<|audio:344|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "<|audio:345|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "<|audio:346|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "<|audio:347|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "<|audio:348|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "<|audio:349|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "<|audio:350|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "<|audio:351|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "<|audio:352|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "<|audio:353|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "<|audio:354|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "<|audio:355|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "<|audio:356|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "<|audio:357|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "<|audio:358|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "<|audio:359|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "<|audio:360|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "<|audio:361|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "<|audio:362|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "<|audio:363|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "<|audio:364|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "<|audio:365|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "<|audio:366|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "<|audio:367|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "<|audio:368|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "<|audio:369|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "<|audio:370|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "<|audio:371|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "<|audio:372|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "<|audio:373|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "<|audio:374|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "<|audio:375|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "<|audio:376|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "<|audio:377|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "<|audio:378|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "<|audio:379|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "<|audio:380|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "<|audio:381|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "<|audio:382|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "<|audio:383|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "384": { + "content": "<|audio:384|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "385": { + "content": "<|audio:385|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "386": { + "content": "<|audio:386|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "387": { + "content": "<|audio:387|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "388": { + "content": "<|audio:388|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "389": { + "content": "<|audio:389|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "390": { + "content": "<|audio:390|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "391": { + "content": "<|audio:391|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "392": { + "content": "<|audio:392|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "393": { + "content": "<|audio:393|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "394": { + "content": "<|audio:394|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "395": { + "content": "<|audio:395|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "396": { + "content": "<|audio:396|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "397": { + "content": "<|audio:397|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "398": { + "content": "<|audio:398|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "399": { + "content": "<|audio:399|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "400": { + "content": "<|audio:400|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "401": { + "content": "<|audio:401|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "402": { + "content": "<|audio:402|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "403": { + "content": "<|audio:403|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "404": { + "content": "<|audio:404|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "405": { + "content": "<|audio:405|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "406": { + "content": "<|audio:406|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "407": { + "content": "<|audio:407|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "408": { + "content": "<|audio:408|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "409": { + "content": "<|audio:409|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "410": { + "content": "<|audio:410|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "411": { + "content": "<|audio:411|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "412": { + "content": "<|audio:412|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "413": { + "content": "<|audio:413|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "414": { + "content": "<|audio:414|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "415": { + "content": "<|audio:415|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "416": { + "content": "<|audio:416|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "417": { + "content": "<|audio:417|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "418": { + "content": "<|audio:418|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "419": { + "content": "<|audio:419|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "420": { + "content": "<|audio:420|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "421": { + "content": "<|audio:421|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "422": { + "content": "<|audio:422|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "423": { + "content": "<|audio:423|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "424": { + "content": "<|audio:424|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "425": { + "content": "<|audio:425|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "426": { + "content": "<|audio:426|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "427": { + "content": "<|audio:427|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "428": { + "content": "<|audio:428|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "429": { + "content": "<|audio:429|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "430": { + "content": "<|audio:430|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "431": { + "content": "<|audio:431|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "432": { + "content": "<|audio:432|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "433": { + "content": "<|audio:433|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "434": { + "content": "<|audio:434|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "435": { + "content": "<|audio:435|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "436": { + "content": "<|audio:436|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "437": { + "content": "<|audio:437|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "438": { + "content": "<|audio:438|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "439": { + "content": "<|audio:439|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "440": { + "content": "<|audio:440|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "441": { + "content": "<|audio:441|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "442": { + "content": "<|audio:442|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "443": { + "content": "<|audio:443|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "444": { + "content": "<|audio:444|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "445": { + "content": "<|audio:445|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "446": { + "content": "<|audio:446|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "447": { + "content": "<|audio:447|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "448": { + "content": "<|audio:448|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "449": { + "content": "<|audio:449|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "450": { + "content": "<|audio:450|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "451": { + "content": "<|audio:451|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "452": { + "content": "<|audio:452|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "453": { + "content": "<|audio:453|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "454": { + "content": "<|audio:454|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "455": { + "content": "<|audio:455|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "456": { + "content": "<|audio:456|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "457": { + "content": "<|audio:457|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "458": { + "content": "<|audio:458|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "459": { + "content": "<|audio:459|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "460": { + "content": "<|audio:460|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "461": { + "content": "<|audio:461|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "462": { + "content": "<|audio:462|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "463": { + "content": "<|audio:463|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "464": { + "content": "<|audio:464|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "465": { + "content": "<|audio:465|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "466": { + "content": "<|audio:466|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "467": { + "content": "<|audio:467|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "468": { + "content": "<|audio:468|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "469": { + "content": "<|audio:469|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "470": { + "content": "<|audio:470|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "471": { + "content": "<|audio:471|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "472": { + "content": "<|audio:472|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "473": { + "content": "<|audio:473|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "474": { + "content": "<|audio:474|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "475": { + "content": "<|audio:475|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "476": { + "content": "<|audio:476|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "477": { + "content": "<|audio:477|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "478": { + "content": "<|audio:478|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "479": { + "content": "<|audio:479|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "480": { + "content": "<|audio:480|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "481": { + "content": "<|audio:481|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "482": { + "content": "<|audio:482|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "483": { + "content": "<|audio:483|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "484": { + "content": "<|audio:484|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "485": { + "content": "<|audio:485|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "486": { + "content": "<|audio:486|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "487": { + "content": "<|audio:487|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "488": { + "content": "<|audio:488|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "489": { + "content": "<|audio:489|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "490": { + "content": "<|audio:490|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "491": { + "content": "<|audio:491|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "492": { + "content": "<|audio:492|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "493": { + "content": "<|audio:493|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "494": { + "content": "<|audio:494|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "495": { + "content": "<|audio:495|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "496": { + "content": "<|audio:496|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "497": { + "content": "<|audio:497|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "498": { + "content": "<|audio:498|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "499": { + "content": "<|audio:499|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "500": { + "content": "<|audio:500|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "501": { + "content": "<|audio:501|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "502": { + "content": "<|audio:502|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "503": { + "content": "<|audio:503|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "504": { + "content": "<|audio:504|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "505": { + "content": "<|audio:505|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "506": { + "content": "<|audio:506|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "507": { + "content": "<|audio:507|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "508": { + "content": "<|audio:508|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "509": { + "content": "<|audio:509|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "510": { + "content": "<|audio:510|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "511": { + "content": "<|audio:511|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "512": { + "content": "<|audio:512|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "513": { + "content": "<|audio:513|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "514": { + "content": "<|audio:514|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "515": { + "content": "<|audio:515|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "516": { + "content": "<|audio:516|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "517": { + "content": "<|audio:517|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "518": { + "content": "<|audio:518|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "519": { + "content": "<|audio:519|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "520": { + "content": "<|audio:520|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "521": { + "content": "<|audio:521|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "522": { + "content": "<|audio:522|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "523": { + "content": "<|audio:523|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "524": { + "content": "<|audio:524|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "525": { + "content": "<|audio:525|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "526": { + "content": "<|audio:526|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "527": { + "content": "<|audio:527|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "528": { + "content": "<|audio:528|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "529": { + "content": "<|audio:529|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "530": { + "content": "<|audio:530|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "531": { + "content": "<|audio:531|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "532": { + "content": "<|audio:532|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "533": { + "content": "<|audio:533|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "534": { + "content": "<|audio:534|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "535": { + "content": "<|audio:535|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "536": { + "content": "<|audio:536|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "537": { + "content": "<|audio:537|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "538": { + "content": "<|audio:538|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "539": { + "content": "<|audio:539|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "540": { + "content": "<|audio:540|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "541": { + "content": "<|audio:541|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "542": { + "content": "<|audio:542|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "543": { + "content": "<|audio:543|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "544": { + "content": "<|audio:544|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "545": { + "content": "<|audio:545|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "546": { + "content": "<|audio:546|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "547": { + "content": "<|audio:547|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "548": { + "content": "<|audio:548|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "549": { + "content": "<|audio:549|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "550": { + "content": "<|audio:550|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "551": { + "content": "<|audio:551|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "552": { + "content": "<|audio:552|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "553": { + "content": "<|audio:553|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "554": { + "content": "<|audio:554|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "555": { + "content": "<|audio:555|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "556": { + "content": "<|audio:556|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "557": { + "content": "<|audio:557|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "558": { + "content": "<|audio:558|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "559": { + "content": "<|audio:559|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "560": { + "content": "<|audio:560|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "561": { + "content": "<|audio:561|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "562": { + "content": "<|audio:562|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "563": { + "content": "<|audio:563|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "564": { + "content": "<|audio:564|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "565": { + "content": "<|audio:565|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "566": { + "content": "<|audio:566|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "567": { + "content": "<|audio:567|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "568": { + "content": "<|audio:568|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "569": { + "content": "<|audio:569|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "570": { + "content": "<|audio:570|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "571": { + "content": "<|audio:571|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "572": { + "content": "<|audio:572|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "573": { + "content": "<|audio:573|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "574": { + "content": "<|audio:574|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "575": { + "content": "<|audio:575|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "576": { + "content": "<|audio:576|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "577": { + "content": "<|audio:577|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "578": { + "content": "<|audio:578|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "579": { + "content": "<|audio:579|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "580": { + "content": "<|audio:580|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "581": { + "content": "<|audio:581|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "582": { + "content": "<|audio:582|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "583": { + "content": "<|audio:583|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "584": { + "content": "<|audio:584|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "585": { + "content": "<|audio:585|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "586": { + "content": "<|audio:586|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "587": { + "content": "<|audio:587|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "588": { + "content": "<|audio:588|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "589": { + "content": "<|audio:589|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "590": { + "content": "<|audio:590|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "591": { + "content": "<|audio:591|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "592": { + "content": "<|audio:592|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "593": { + "content": "<|audio:593|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "594": { + "content": "<|audio:594|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "595": { + "content": "<|audio:595|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "596": { + "content": "<|audio:596|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "597": { + "content": "<|audio:597|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "598": { + "content": "<|audio:598|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "599": { + "content": "<|audio:599|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "600": { + "content": "<|audio:600|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "601": { + "content": "<|audio:601|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "602": { + "content": "<|audio:602|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "603": { + "content": "<|audio:603|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "604": { + "content": "<|audio:604|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "605": { + "content": "<|audio:605|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "606": { + "content": "<|audio:606|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "607": { + "content": "<|audio:607|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "608": { + "content": "<|audio:608|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "609": { + "content": "<|audio:609|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "610": { + "content": "<|audio:610|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "611": { + "content": "<|audio:611|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "612": { + "content": "<|audio:612|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "613": { + "content": "<|audio:613|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "614": { + "content": "<|audio:614|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "615": { + "content": "<|audio:615|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "616": { + "content": "<|audio:616|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "617": { + "content": "<|audio:617|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "618": { + "content": "<|audio:618|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "619": { + "content": "<|audio:619|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "620": { + "content": "<|audio:620|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "621": { + "content": "<|audio:621|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "622": { + "content": "<|audio:622|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "623": { + "content": "<|audio:623|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "624": { + "content": "<|audio:624|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "625": { + "content": "<|audio:625|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "626": { + "content": "<|audio:626|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "627": { + "content": "<|audio:627|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "628": { + "content": "<|audio:628|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "629": { + "content": "<|audio:629|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "630": { + "content": "<|audio:630|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "631": { + "content": "<|audio:631|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "632": { + "content": "<|audio:632|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "633": { + "content": "<|audio:633|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "634": { + "content": "<|audio:634|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "635": { + "content": "<|audio:635|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "636": { + "content": "<|audio:636|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "637": { + "content": "<|audio:637|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "638": { + "content": "<|audio:638|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "639": { + "content": "<|audio:639|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "640": { + "content": "<|audio:640|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "641": { + "content": "<|audio:641|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "642": { + "content": "<|audio:642|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "643": { + "content": "<|audio:643|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "644": { + "content": "<|audio:644|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "645": { + "content": "<|audio:645|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "646": { + "content": "<|audio:646|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "647": { + "content": "<|audio:647|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "648": { + "content": "<|audio:648|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "649": { + "content": "<|audio:649|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "650": { + "content": "<|audio:650|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "651": { + "content": "<|audio:651|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "652": { + "content": "<|audio:652|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "653": { + "content": "<|audio:653|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "654": { + "content": "<|audio:654|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "655": { + "content": "<|audio:655|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "656": { + "content": "<|audio:656|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "657": { + "content": "<|audio:657|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "658": { + "content": "<|audio:658|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "659": { + "content": "<|audio:659|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "660": { + "content": "<|audio:660|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "661": { + "content": "<|audio:661|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "662": { + "content": "<|audio:662|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "663": { + "content": "<|audio:663|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "664": { + "content": "<|audio:664|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "665": { + "content": "<|audio:665|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "666": { + "content": "<|audio:666|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "667": { + "content": "<|audio:667|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "668": { + "content": "<|audio:668|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "669": { + "content": "<|audio:669|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "670": { + "content": "<|audio:670|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "671": { + "content": "<|audio:671|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "672": { + "content": "<|audio:672|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "673": { + "content": "<|audio:673|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "674": { + "content": "<|audio:674|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "675": { + "content": "<|audio:675|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "676": { + "content": "<|audio:676|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "677": { + "content": "<|audio:677|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "678": { + "content": "<|audio:678|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "679": { + "content": "<|audio:679|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "680": { + "content": "<|audio:680|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "681": { + "content": "<|audio:681|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "682": { + "content": "<|audio:682|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "683": { + "content": "<|audio:683|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "684": { + "content": "<|audio:684|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "685": { + "content": "<|audio:685|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "686": { + "content": "<|audio:686|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "687": { + "content": "<|audio:687|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "688": { + "content": "<|audio:688|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "689": { + "content": "<|audio:689|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "690": { + "content": "<|audio:690|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "691": { + "content": "<|audio:691|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "692": { + "content": "<|audio:692|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "693": { + "content": "<|audio:693|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "694": { + "content": "<|audio:694|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "695": { + "content": "<|audio:695|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "696": { + "content": "<|audio:696|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "697": { + "content": "<|audio:697|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "698": { + "content": "<|audio:698|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "699": { + "content": "<|audio:699|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "700": { + "content": "<|audio:700|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "701": { + "content": "<|audio:701|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "702": { + "content": "<|audio:702|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "703": { + "content": "<|audio:703|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "704": { + "content": "<|audio:704|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "705": { + "content": "<|audio:705|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "706": { + "content": "<|audio:706|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "707": { + "content": "<|audio:707|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "708": { + "content": "<|audio:708|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "709": { + "content": "<|audio:709|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "710": { + "content": "<|audio:710|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "711": { + "content": "<|audio:711|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "712": { + "content": "<|audio:712|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "713": { + "content": "<|audio:713|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "714": { + "content": "<|audio:714|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "715": { + "content": "<|audio:715|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "716": { + "content": "<|audio:716|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "717": { + "content": "<|audio:717|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "718": { + "content": "<|audio:718|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "719": { + "content": "<|audio:719|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "720": { + "content": "<|audio:720|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "721": { + "content": "<|audio:721|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "722": { + "content": "<|audio:722|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "723": { + "content": "<|audio:723|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "724": { + "content": "<|audio:724|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "725": { + "content": "<|audio:725|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "726": { + "content": "<|audio:726|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "727": { + "content": "<|audio:727|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "728": { + "content": "<|audio:728|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "729": { + "content": "<|audio:729|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "730": { + "content": "<|audio:730|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "731": { + "content": "<|audio:731|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "732": { + "content": "<|audio:732|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "733": { + "content": "<|audio:733|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "734": { + "content": "<|audio:734|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "735": { + "content": "<|audio:735|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "736": { + "content": "<|audio:736|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "737": { + "content": "<|audio:737|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "738": { + "content": "<|audio:738|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "739": { + "content": "<|audio:739|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "740": { + "content": "<|audio:740|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "741": { + "content": "<|audio:741|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "742": { + "content": "<|audio:742|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "743": { + "content": "<|audio:743|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "744": { + "content": "<|audio:744|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "745": { + "content": "<|audio:745|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "746": { + "content": "<|audio:746|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "747": { + "content": "<|audio:747|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "748": { + "content": "<|audio:748|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "749": { + "content": "<|audio:749|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "750": { + "content": "<|audio:750|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "751": { + "content": "<|audio:751|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "752": { + "content": "<|audio:752|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "753": { + "content": "<|audio:753|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "754": { + "content": "<|audio:754|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "755": { + "content": "<|audio:755|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "756": { + "content": "<|audio:756|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "757": { + "content": "<|audio:757|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "758": { + "content": "<|audio:758|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "759": { + "content": "<|audio:759|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "760": { + "content": "<|audio:760|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "761": { + "content": "<|audio:761|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "762": { + "content": "<|audio:762|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "763": { + "content": "<|audio:763|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "764": { + "content": "<|audio:764|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "765": { + "content": "<|audio:765|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "766": { + "content": "<|audio:766|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "767": { + "content": "<|audio:767|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "768": { + "content": "<|audio:768|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "769": { + "content": "<|audio:769|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "770": { + "content": "<|audio:770|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "771": { + "content": "<|audio:771|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "772": { + "content": "<|audio:772|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "773": { + "content": "<|audio:773|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "774": { + "content": "<|audio:774|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "775": { + "content": "<|audio:775|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "776": { + "content": "<|audio:776|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "777": { + "content": "<|audio:777|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "778": { + "content": "<|audio:778|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "779": { + "content": "<|audio:779|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "780": { + "content": "<|audio:780|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "781": { + "content": "<|audio:781|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "782": { + "content": "<|audio:782|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "783": { + "content": "<|audio:783|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "784": { + "content": "<|audio:784|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "785": { + "content": "<|audio:785|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "786": { + "content": "<|audio:786|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "787": { + "content": "<|audio:787|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "788": { + "content": "<|audio:788|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "789": { + "content": "<|audio:789|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "790": { + "content": "<|audio:790|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "791": { + "content": "<|audio:791|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "792": { + "content": "<|audio:792|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "793": { + "content": "<|audio:793|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "794": { + "content": "<|audio:794|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "795": { + "content": "<|audio:795|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "796": { + "content": "<|audio:796|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "797": { + "content": "<|audio:797|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "798": { + "content": "<|audio:798|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "799": { + "content": "<|audio:799|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "800": { + "content": "<|audio:800|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "801": { + "content": "<|audio:801|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "802": { + "content": "<|audio:802|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "803": { + "content": "<|audio:803|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "804": { + "content": "<|audio:804|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "805": { + "content": "<|audio:805|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "806": { + "content": "<|audio:806|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "807": { + "content": "<|audio:807|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "808": { + "content": "<|audio:808|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "809": { + "content": "<|audio:809|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "810": { + "content": "<|audio:810|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "811": { + "content": "<|audio:811|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "812": { + "content": "<|audio:812|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "813": { + "content": "<|audio:813|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "814": { + "content": "<|audio:814|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "815": { + "content": "<|audio:815|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "816": { + "content": "<|audio:816|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "817": { + "content": "<|audio:817|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "818": { + "content": "<|audio:818|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "819": { + "content": "<|audio:819|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "820": { + "content": "<|audio:820|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "821": { + "content": "<|audio:821|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "822": { + "content": "<|audio:822|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "823": { + "content": "<|audio:823|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "824": { + "content": "<|audio:824|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "825": { + "content": "<|audio:825|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "826": { + "content": "<|audio:826|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "827": { + "content": "<|audio:827|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "828": { + "content": "<|audio:828|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "829": { + "content": "<|audio:829|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "830": { + "content": "<|audio:830|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "831": { + "content": "<|audio:831|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "832": { + "content": "<|audio:832|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "833": { + "content": "<|audio:833|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "834": { + "content": "<|audio:834|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "835": { + "content": "<|audio:835|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "836": { + "content": "<|audio:836|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "837": { + "content": "<|audio:837|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "838": { + "content": "<|audio:838|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "839": { + "content": "<|audio:839|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "840": { + "content": "<|audio:840|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "841": { + "content": "<|audio:841|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "842": { + "content": "<|audio:842|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "843": { + "content": "<|audio:843|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "844": { + "content": "<|audio:844|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "845": { + "content": "<|audio:845|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "846": { + "content": "<|audio:846|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "847": { + "content": "<|audio:847|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "848": { + "content": "<|audio:848|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "849": { + "content": "<|audio:849|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "850": { + "content": "<|audio:850|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "851": { + "content": "<|audio:851|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "852": { + "content": "<|audio:852|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "853": { + "content": "<|audio:853|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "854": { + "content": "<|audio:854|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "855": { + "content": "<|audio:855|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "856": { + "content": "<|audio:856|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "857": { + "content": "<|audio:857|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "858": { + "content": "<|audio:858|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "859": { + "content": "<|audio:859|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "860": { + "content": "<|audio:860|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "861": { + "content": "<|audio:861|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "862": { + "content": "<|audio:862|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "863": { + "content": "<|audio:863|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "864": { + "content": "<|audio:864|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "865": { + "content": "<|audio:865|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "866": { + "content": "<|audio:866|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "867": { + "content": "<|audio:867|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "868": { + "content": "<|audio:868|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "869": { + "content": "<|audio:869|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "870": { + "content": "<|audio:870|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "871": { + "content": "<|audio:871|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "872": { + "content": "<|audio:872|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "873": { + "content": "<|audio:873|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "874": { + "content": "<|audio:874|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "875": { + "content": "<|audio:875|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "876": { + "content": "<|audio:876|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "877": { + "content": "<|audio:877|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "878": { + "content": "<|audio:878|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "879": { + "content": "<|audio:879|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "880": { + "content": "<|audio:880|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "881": { + "content": "<|audio:881|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "882": { + "content": "<|audio:882|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "883": { + "content": "<|audio:883|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "884": { + "content": "<|audio:884|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "885": { + "content": "<|audio:885|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "886": { + "content": "<|audio:886|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "887": { + "content": "<|audio:887|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "888": { + "content": "<|audio:888|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "889": { + "content": "<|audio:889|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "890": { + "content": "<|audio:890|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "891": { + "content": "<|audio:891|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "892": { + "content": "<|audio:892|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "893": { + "content": "<|audio:893|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "894": { + "content": "<|audio:894|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "895": { + "content": "<|audio:895|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "896": { + "content": "<|audio:896|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "897": { + "content": "<|audio:897|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "898": { + "content": "<|audio:898|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "899": { + "content": "<|audio:899|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "900": { + "content": "<|audio:900|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "901": { + "content": "<|audio:901|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "902": { + "content": "<|audio:902|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "903": { + "content": "<|audio:903|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "904": { + "content": "<|audio:904|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "905": { + "content": "<|audio:905|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "906": { + "content": "<|audio:906|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "907": { + "content": "<|audio:907|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "908": { + "content": "<|audio:908|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "909": { + "content": "<|audio:909|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "910": { + "content": "<|audio:910|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "911": { + "content": "<|audio:911|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "912": { + "content": "<|audio:912|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "913": { + "content": "<|audio:913|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "914": { + "content": "<|audio:914|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "915": { + "content": "<|audio:915|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "916": { + "content": "<|audio:916|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "917": { + "content": "<|audio:917|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "918": { + "content": "<|audio:918|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "919": { + "content": "<|audio:919|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "920": { + "content": "<|audio:920|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "921": { + "content": "<|audio:921|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "922": { + "content": "<|audio:922|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "923": { + "content": "<|audio:923|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "924": { + "content": "<|audio:924|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "925": { + "content": "<|audio:925|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "926": { + "content": "<|audio:926|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "927": { + "content": "<|audio:927|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "928": { + "content": "<|audio:928|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "929": { + "content": "<|audio:929|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "930": { + "content": "<|audio:930|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "931": { + "content": "<|audio:931|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "932": { + "content": "<|audio:932|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "933": { + "content": "<|audio:933|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "934": { + "content": "<|audio:934|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "935": { + "content": "<|audio:935|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "936": { + "content": "<|audio:936|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "937": { + "content": "<|audio:937|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "938": { + "content": "<|audio:938|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "939": { + "content": "<|audio:939|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "940": { + "content": "<|audio:940|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "941": { + "content": "<|audio:941|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "942": { + "content": "<|audio:942|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "943": { + "content": "<|audio:943|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "944": { + "content": "<|audio:944|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "945": { + "content": "<|audio:945|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "946": { + "content": "<|audio:946|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "947": { + "content": "<|audio:947|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "948": { + "content": "<|audio:948|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "949": { + "content": "<|audio:949|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "950": { + "content": "<|audio:950|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "951": { + "content": "<|audio:951|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "952": { + "content": "<|audio:952|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "953": { + "content": "<|audio:953|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "954": { + "content": "<|audio:954|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "955": { + "content": "<|audio:955|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "956": { + "content": "<|audio:956|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "957": { + "content": "<|audio:957|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "958": { + "content": "<|audio:958|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "959": { + "content": "<|audio:959|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "960": { + "content": "<|audio:960|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "961": { + "content": "<|audio:961|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "962": { + "content": "<|audio:962|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "963": { + "content": "<|audio:963|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "964": { + "content": "<|audio:964|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "965": { + "content": "<|audio:965|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "966": { + "content": "<|audio:966|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "967": { + "content": "<|audio:967|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "968": { + "content": "<|audio:968|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "969": { + "content": "<|audio:969|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "970": { + "content": "<|audio:970|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "971": { + "content": "<|audio:971|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "972": { + "content": "<|audio:972|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "973": { + "content": "<|audio:973|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "974": { + "content": "<|audio:974|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "975": { + "content": "<|audio:975|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "976": { + "content": "<|audio:976|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "977": { + "content": "<|audio:977|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "978": { + "content": "<|audio:978|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "979": { + "content": "<|audio:979|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "980": { + "content": "<|audio:980|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "981": { + "content": "<|audio:981|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "982": { + "content": "<|audio:982|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "983": { + "content": "<|audio:983|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "984": { + "content": "<|audio:984|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "985": { + "content": "<|audio:985|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "986": { + "content": "<|audio:986|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "987": { + "content": "<|audio:987|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "988": { + "content": "<|audio:988|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "989": { + "content": "<|audio:989|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "990": { + "content": "<|audio:990|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "991": { + "content": "<|audio:991|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "992": { + "content": "<|audio:992|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "993": { + "content": "<|audio:993|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "994": { + "content": "<|audio:994|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "995": { + "content": "<|audio:995|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "996": { + "content": "<|audio:996|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "997": { + "content": "<|audio:997|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "998": { + "content": "<|audio:998|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "999": { + "content": "<|audio:999|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1000": { + "content": "<|audio:1000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1001": { + "content": "<|audio:1001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1002": { + "content": "<|audio:1002|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1003": { + "content": "<|audio:1003|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1004": { + "content": "<|audio:1004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1005": { + "content": "<|audio:1005|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1006": { + "content": "<|audio:1006|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1007": { + "content": "<|audio:1007|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1008": { + "content": "<|audio:1008|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1009": { + "content": "<|audio:1009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1010": { + "content": "<|audio:1010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1011": { + "content": "<|audio:1011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1012": { + "content": "<|audio:1012|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1013": { + "content": "<|audio:1013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1014": { + "content": "<|audio:1014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1015": { + "content": "<|audio:1015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1016": { + "content": "<|audio:1016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1017": { + "content": "<|audio:1017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1018": { + "content": "<|audio:1018|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1019": { + "content": "<|audio:1019|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1020": { + "content": "<|audio:1020|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1021": { + "content": "<|audio:1021|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1022": { + "content": "<|audio:1022|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1023": { + "content": "<|audio:1023|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1024": { + "content": "<|startoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1025": { + "content": "<|endoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1026": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "model_max_length": 1877, + "pad_token": "<|padding|>", + "special_tokens": [ + "<|audio:0|>", + "<|audio:1|>", + "<|audio:2|>", + "<|audio:3|>", + "<|audio:4|>", + "<|audio:5|>", + "<|audio:6|>", + "<|audio:7|>", + "<|audio:8|>", + "<|audio:9|>", + "<|audio:10|>", + "<|audio:11|>", + "<|audio:12|>", + "<|audio:13|>", + "<|audio:14|>", + "<|audio:15|>", + "<|audio:16|>", + "<|audio:17|>", + "<|audio:18|>", + "<|audio:19|>", + "<|audio:20|>", + "<|audio:21|>", + "<|audio:22|>", + "<|audio:23|>", + "<|audio:24|>", + "<|audio:25|>", + "<|audio:26|>", + "<|audio:27|>", + "<|audio:28|>", + "<|audio:29|>", + "<|audio:30|>", + "<|audio:31|>", + "<|audio:32|>", + "<|audio:33|>", + "<|audio:34|>", + "<|audio:35|>", + "<|audio:36|>", + "<|audio:37|>", + "<|audio:38|>", + "<|audio:39|>", + "<|audio:40|>", + "<|audio:41|>", + "<|audio:42|>", + "<|audio:43|>", + "<|audio:44|>", + "<|audio:45|>", + "<|audio:46|>", + "<|audio:47|>", + "<|audio:48|>", + "<|audio:49|>", + "<|audio:50|>", + "<|audio:51|>", + "<|audio:52|>", + "<|audio:53|>", + "<|audio:54|>", + "<|audio:55|>", + "<|audio:56|>", + "<|audio:57|>", + "<|audio:58|>", + "<|audio:59|>", + "<|audio:60|>", + "<|audio:61|>", + "<|audio:62|>", + "<|audio:63|>", + "<|audio:64|>", + "<|audio:65|>", + "<|audio:66|>", + "<|audio:67|>", + "<|audio:68|>", + "<|audio:69|>", + "<|audio:70|>", + "<|audio:71|>", + "<|audio:72|>", + "<|audio:73|>", + "<|audio:74|>", + "<|audio:75|>", + "<|audio:76|>", + "<|audio:77|>", + "<|audio:78|>", + "<|audio:79|>", + "<|audio:80|>", + "<|audio:81|>", + "<|audio:82|>", + "<|audio:83|>", + "<|audio:84|>", + "<|audio:85|>", + "<|audio:86|>", + "<|audio:87|>", + "<|audio:88|>", + "<|audio:89|>", + "<|audio:90|>", + "<|audio:91|>", + "<|audio:92|>", + "<|audio:93|>", + "<|audio:94|>", + "<|audio:95|>", + "<|audio:96|>", + "<|audio:97|>", + "<|audio:98|>", + "<|audio:99|>", + "<|audio:100|>", + "<|audio:101|>", + "<|audio:102|>", + "<|audio:103|>", + "<|audio:104|>", + "<|audio:105|>", + "<|audio:106|>", + "<|audio:107|>", + "<|audio:108|>", + "<|audio:109|>", + "<|audio:110|>", + "<|audio:111|>", + "<|audio:112|>", + "<|audio:113|>", + "<|audio:114|>", + "<|audio:115|>", + "<|audio:116|>", + "<|audio:117|>", + "<|audio:118|>", + "<|audio:119|>", + "<|audio:120|>", + "<|audio:121|>", + "<|audio:122|>", + "<|audio:123|>", + "<|audio:124|>", + "<|audio:125|>", + "<|audio:126|>", + "<|audio:127|>", + "<|audio:128|>", + "<|audio:129|>", + "<|audio:130|>", + "<|audio:131|>", + "<|audio:132|>", + "<|audio:133|>", + "<|audio:134|>", + "<|audio:135|>", + "<|audio:136|>", + "<|audio:137|>", + "<|audio:138|>", + "<|audio:139|>", + "<|audio:140|>", + "<|audio:141|>", + "<|audio:142|>", + "<|audio:143|>", + "<|audio:144|>", + "<|audio:145|>", + "<|audio:146|>", + "<|audio:147|>", + "<|audio:148|>", + "<|audio:149|>", + "<|audio:150|>", + "<|audio:151|>", + "<|audio:152|>", + "<|audio:153|>", + "<|audio:154|>", + "<|audio:155|>", + "<|audio:156|>", + "<|audio:157|>", + "<|audio:158|>", + "<|audio:159|>", + "<|audio:160|>", + "<|audio:161|>", + "<|audio:162|>", + "<|audio:163|>", + "<|audio:164|>", + "<|audio:165|>", + "<|audio:166|>", + "<|audio:167|>", + "<|audio:168|>", + "<|audio:169|>", + "<|audio:170|>", + "<|audio:171|>", + "<|audio:172|>", + "<|audio:173|>", + "<|audio:174|>", + "<|audio:175|>", + "<|audio:176|>", + "<|audio:177|>", + "<|audio:178|>", + "<|audio:179|>", + "<|audio:180|>", + "<|audio:181|>", + "<|audio:182|>", + "<|audio:183|>", + "<|audio:184|>", + "<|audio:185|>", + "<|audio:186|>", + "<|audio:187|>", + "<|audio:188|>", + "<|audio:189|>", + "<|audio:190|>", + "<|audio:191|>", + "<|audio:192|>", + "<|audio:193|>", + "<|audio:194|>", + "<|audio:195|>", + "<|audio:196|>", + "<|audio:197|>", + "<|audio:198|>", + "<|audio:199|>", + "<|audio:200|>", + "<|audio:201|>", + "<|audio:202|>", + "<|audio:203|>", + "<|audio:204|>", + "<|audio:205|>", + "<|audio:206|>", + "<|audio:207|>", + "<|audio:208|>", + "<|audio:209|>", + "<|audio:210|>", + "<|audio:211|>", + "<|audio:212|>", + "<|audio:213|>", + "<|audio:214|>", + "<|audio:215|>", + "<|audio:216|>", + "<|audio:217|>", + "<|audio:218|>", + "<|audio:219|>", + "<|audio:220|>", + "<|audio:221|>", + "<|audio:222|>", + "<|audio:223|>", + "<|audio:224|>", + "<|audio:225|>", + "<|audio:226|>", + "<|audio:227|>", + "<|audio:228|>", + "<|audio:229|>", + "<|audio:230|>", + "<|audio:231|>", + "<|audio:232|>", + "<|audio:233|>", + "<|audio:234|>", + "<|audio:235|>", + "<|audio:236|>", + "<|audio:237|>", + "<|audio:238|>", + "<|audio:239|>", + "<|audio:240|>", + "<|audio:241|>", + "<|audio:242|>", + "<|audio:243|>", + "<|audio:244|>", + "<|audio:245|>", + "<|audio:246|>", + "<|audio:247|>", + "<|audio:248|>", + "<|audio:249|>", + "<|audio:250|>", + "<|audio:251|>", + "<|audio:252|>", + "<|audio:253|>", + "<|audio:254|>", + "<|audio:255|>", + "<|audio:256|>", + "<|audio:257|>", + "<|audio:258|>", + "<|audio:259|>", + "<|audio:260|>", + "<|audio:261|>", + "<|audio:262|>", + "<|audio:263|>", + "<|audio:264|>", + "<|audio:265|>", + "<|audio:266|>", + "<|audio:267|>", + "<|audio:268|>", + "<|audio:269|>", + "<|audio:270|>", + "<|audio:271|>", + "<|audio:272|>", + "<|audio:273|>", + "<|audio:274|>", + "<|audio:275|>", + "<|audio:276|>", + "<|audio:277|>", + "<|audio:278|>", + "<|audio:279|>", + "<|audio:280|>", + "<|audio:281|>", + "<|audio:282|>", + "<|audio:283|>", + "<|audio:284|>", + "<|audio:285|>", + "<|audio:286|>", + "<|audio:287|>", + "<|audio:288|>", + "<|audio:289|>", + "<|audio:290|>", + "<|audio:291|>", + "<|audio:292|>", + "<|audio:293|>", + "<|audio:294|>", + "<|audio:295|>", + "<|audio:296|>", + "<|audio:297|>", + "<|audio:298|>", + "<|audio:299|>", + "<|audio:300|>", + "<|audio:301|>", + "<|audio:302|>", + "<|audio:303|>", + "<|audio:304|>", + "<|audio:305|>", + "<|audio:306|>", + "<|audio:307|>", + "<|audio:308|>", + "<|audio:309|>", + "<|audio:310|>", + "<|audio:311|>", + "<|audio:312|>", + "<|audio:313|>", + "<|audio:314|>", + "<|audio:315|>", + "<|audio:316|>", + "<|audio:317|>", + "<|audio:318|>", + "<|audio:319|>", + "<|audio:320|>", + "<|audio:321|>", + "<|audio:322|>", + "<|audio:323|>", + "<|audio:324|>", + "<|audio:325|>", + "<|audio:326|>", + "<|audio:327|>", + "<|audio:328|>", + "<|audio:329|>", + "<|audio:330|>", + "<|audio:331|>", + "<|audio:332|>", + "<|audio:333|>", + "<|audio:334|>", + "<|audio:335|>", + "<|audio:336|>", + "<|audio:337|>", + "<|audio:338|>", + "<|audio:339|>", + "<|audio:340|>", + "<|audio:341|>", + "<|audio:342|>", + "<|audio:343|>", + "<|audio:344|>", + "<|audio:345|>", + "<|audio:346|>", + "<|audio:347|>", + "<|audio:348|>", + "<|audio:349|>", + "<|audio:350|>", + "<|audio:351|>", + "<|audio:352|>", + "<|audio:353|>", + "<|audio:354|>", + "<|audio:355|>", + "<|audio:356|>", + "<|audio:357|>", + "<|audio:358|>", + "<|audio:359|>", + "<|audio:360|>", + "<|audio:361|>", + "<|audio:362|>", + "<|audio:363|>", + "<|audio:364|>", + "<|audio:365|>", + "<|audio:366|>", + "<|audio:367|>", + "<|audio:368|>", + "<|audio:369|>", + "<|audio:370|>", + "<|audio:371|>", + "<|audio:372|>", + "<|audio:373|>", + "<|audio:374|>", + "<|audio:375|>", + "<|audio:376|>", + "<|audio:377|>", + "<|audio:378|>", + "<|audio:379|>", + "<|audio:380|>", + "<|audio:381|>", + "<|audio:382|>", + "<|audio:383|>", + "<|audio:384|>", + "<|audio:385|>", + "<|audio:386|>", + "<|audio:387|>", + "<|audio:388|>", + "<|audio:389|>", + "<|audio:390|>", + "<|audio:391|>", + "<|audio:392|>", + "<|audio:393|>", + "<|audio:394|>", + "<|audio:395|>", + "<|audio:396|>", + "<|audio:397|>", + "<|audio:398|>", + "<|audio:399|>", + "<|audio:400|>", + "<|audio:401|>", + "<|audio:402|>", + "<|audio:403|>", + "<|audio:404|>", + "<|audio:405|>", + "<|audio:406|>", + "<|audio:407|>", + "<|audio:408|>", + "<|audio:409|>", + "<|audio:410|>", + "<|audio:411|>", + "<|audio:412|>", + "<|audio:413|>", + "<|audio:414|>", + "<|audio:415|>", + "<|audio:416|>", + "<|audio:417|>", + "<|audio:418|>", + "<|audio:419|>", + "<|audio:420|>", + "<|audio:421|>", + "<|audio:422|>", + "<|audio:423|>", + "<|audio:424|>", + "<|audio:425|>", + "<|audio:426|>", + "<|audio:427|>", + "<|audio:428|>", + "<|audio:429|>", + "<|audio:430|>", + "<|audio:431|>", + "<|audio:432|>", + "<|audio:433|>", + "<|audio:434|>", + "<|audio:435|>", + "<|audio:436|>", + "<|audio:437|>", + "<|audio:438|>", + "<|audio:439|>", + "<|audio:440|>", + "<|audio:441|>", + "<|audio:442|>", + "<|audio:443|>", + "<|audio:444|>", + "<|audio:445|>", + "<|audio:446|>", + "<|audio:447|>", + "<|audio:448|>", + "<|audio:449|>", + "<|audio:450|>", + "<|audio:451|>", + "<|audio:452|>", + "<|audio:453|>", + "<|audio:454|>", + "<|audio:455|>", + "<|audio:456|>", + "<|audio:457|>", + "<|audio:458|>", + "<|audio:459|>", + "<|audio:460|>", + "<|audio:461|>", + "<|audio:462|>", + "<|audio:463|>", + "<|audio:464|>", + "<|audio:465|>", + "<|audio:466|>", + "<|audio:467|>", + "<|audio:468|>", + "<|audio:469|>", + "<|audio:470|>", + "<|audio:471|>", + "<|audio:472|>", + "<|audio:473|>", + "<|audio:474|>", + "<|audio:475|>", + "<|audio:476|>", + "<|audio:477|>", + "<|audio:478|>", + "<|audio:479|>", + "<|audio:480|>", + "<|audio:481|>", + "<|audio:482|>", + "<|audio:483|>", + "<|audio:484|>", + "<|audio:485|>", + "<|audio:486|>", + "<|audio:487|>", + "<|audio:488|>", + "<|audio:489|>", + "<|audio:490|>", + "<|audio:491|>", + "<|audio:492|>", + "<|audio:493|>", + "<|audio:494|>", + "<|audio:495|>", + "<|audio:496|>", + "<|audio:497|>", + "<|audio:498|>", + "<|audio:499|>", + "<|audio:500|>", + "<|audio:501|>", + "<|audio:502|>", + "<|audio:503|>", + "<|audio:504|>", + "<|audio:505|>", + "<|audio:506|>", + "<|audio:507|>", + "<|audio:508|>", + "<|audio:509|>", + "<|audio:510|>", + "<|audio:511|>", + "<|audio:512|>", + "<|audio:513|>", + "<|audio:514|>", + "<|audio:515|>", + "<|audio:516|>", + "<|audio:517|>", + "<|audio:518|>", + "<|audio:519|>", + "<|audio:520|>", + "<|audio:521|>", + "<|audio:522|>", + "<|audio:523|>", + "<|audio:524|>", + "<|audio:525|>", + "<|audio:526|>", + "<|audio:527|>", + "<|audio:528|>", + "<|audio:529|>", + "<|audio:530|>", + "<|audio:531|>", + "<|audio:532|>", + "<|audio:533|>", + "<|audio:534|>", + "<|audio:535|>", + "<|audio:536|>", + "<|audio:537|>", + "<|audio:538|>", + "<|audio:539|>", + "<|audio:540|>", + "<|audio:541|>", + "<|audio:542|>", + "<|audio:543|>", + "<|audio:544|>", + "<|audio:545|>", + "<|audio:546|>", + "<|audio:547|>", + "<|audio:548|>", + "<|audio:549|>", + "<|audio:550|>", + "<|audio:551|>", + "<|audio:552|>", + "<|audio:553|>", + "<|audio:554|>", + "<|audio:555|>", + "<|audio:556|>", + "<|audio:557|>", + "<|audio:558|>", + "<|audio:559|>", + "<|audio:560|>", + "<|audio:561|>", + "<|audio:562|>", + "<|audio:563|>", + "<|audio:564|>", + "<|audio:565|>", + "<|audio:566|>", + "<|audio:567|>", + "<|audio:568|>", + "<|audio:569|>", + "<|audio:570|>", + "<|audio:571|>", + "<|audio:572|>", + "<|audio:573|>", + "<|audio:574|>", + "<|audio:575|>", + "<|audio:576|>", + "<|audio:577|>", + "<|audio:578|>", + "<|audio:579|>", + "<|audio:580|>", + "<|audio:581|>", + "<|audio:582|>", + "<|audio:583|>", + "<|audio:584|>", + "<|audio:585|>", + "<|audio:586|>", + "<|audio:587|>", + "<|audio:588|>", + "<|audio:589|>", + "<|audio:590|>", + "<|audio:591|>", + "<|audio:592|>", + "<|audio:593|>", + "<|audio:594|>", + "<|audio:595|>", + "<|audio:596|>", + "<|audio:597|>", + "<|audio:598|>", + "<|audio:599|>", + "<|audio:600|>", + "<|audio:601|>", + "<|audio:602|>", + "<|audio:603|>", + "<|audio:604|>", + "<|audio:605|>", + "<|audio:606|>", + "<|audio:607|>", + "<|audio:608|>", + "<|audio:609|>", + "<|audio:610|>", + "<|audio:611|>", + "<|audio:612|>", + "<|audio:613|>", + "<|audio:614|>", + "<|audio:615|>", + "<|audio:616|>", + "<|audio:617|>", + "<|audio:618|>", + "<|audio:619|>", + "<|audio:620|>", + "<|audio:621|>", + "<|audio:622|>", + "<|audio:623|>", + "<|audio:624|>", + "<|audio:625|>", + "<|audio:626|>", + "<|audio:627|>", + "<|audio:628|>", + "<|audio:629|>", + "<|audio:630|>", + "<|audio:631|>", + "<|audio:632|>", + "<|audio:633|>", + "<|audio:634|>", + "<|audio:635|>", + "<|audio:636|>", + "<|audio:637|>", + "<|audio:638|>", + "<|audio:639|>", + "<|audio:640|>", + "<|audio:641|>", + "<|audio:642|>", + "<|audio:643|>", + "<|audio:644|>", + "<|audio:645|>", + "<|audio:646|>", + "<|audio:647|>", + "<|audio:648|>", + "<|audio:649|>", + "<|audio:650|>", + "<|audio:651|>", + "<|audio:652|>", + "<|audio:653|>", + "<|audio:654|>", + "<|audio:655|>", + "<|audio:656|>", + "<|audio:657|>", + "<|audio:658|>", + "<|audio:659|>", + "<|audio:660|>", + "<|audio:661|>", + "<|audio:662|>", + "<|audio:663|>", + "<|audio:664|>", + "<|audio:665|>", + "<|audio:666|>", + "<|audio:667|>", + "<|audio:668|>", + "<|audio:669|>", + "<|audio:670|>", + "<|audio:671|>", + "<|audio:672|>", + "<|audio:673|>", + "<|audio:674|>", + "<|audio:675|>", + "<|audio:676|>", + "<|audio:677|>", + "<|audio:678|>", + "<|audio:679|>", + "<|audio:680|>", + "<|audio:681|>", + "<|audio:682|>", + "<|audio:683|>", + "<|audio:684|>", + "<|audio:685|>", + "<|audio:686|>", + "<|audio:687|>", + "<|audio:688|>", + "<|audio:689|>", + "<|audio:690|>", + "<|audio:691|>", + "<|audio:692|>", + "<|audio:693|>", + "<|audio:694|>", + "<|audio:695|>", + "<|audio:696|>", + "<|audio:697|>", + "<|audio:698|>", + "<|audio:699|>", + "<|audio:700|>", + "<|audio:701|>", + "<|audio:702|>", + "<|audio:703|>", + "<|audio:704|>", + "<|audio:705|>", + "<|audio:706|>", + "<|audio:707|>", + "<|audio:708|>", + "<|audio:709|>", + "<|audio:710|>", + "<|audio:711|>", + "<|audio:712|>", + "<|audio:713|>", + "<|audio:714|>", + "<|audio:715|>", + "<|audio:716|>", + "<|audio:717|>", + "<|audio:718|>", + "<|audio:719|>", + "<|audio:720|>", + "<|audio:721|>", + "<|audio:722|>", + "<|audio:723|>", + "<|audio:724|>", + "<|audio:725|>", + "<|audio:726|>", + "<|audio:727|>", + "<|audio:728|>", + "<|audio:729|>", + "<|audio:730|>", + "<|audio:731|>", + "<|audio:732|>", + "<|audio:733|>", + "<|audio:734|>", + "<|audio:735|>", + "<|audio:736|>", + "<|audio:737|>", + "<|audio:738|>", + "<|audio:739|>", + "<|audio:740|>", + "<|audio:741|>", + "<|audio:742|>", + "<|audio:743|>", + "<|audio:744|>", + "<|audio:745|>", + "<|audio:746|>", + "<|audio:747|>", + "<|audio:748|>", + "<|audio:749|>", + "<|audio:750|>", + "<|audio:751|>", + "<|audio:752|>", + "<|audio:753|>", + "<|audio:754|>", + "<|audio:755|>", + "<|audio:756|>", + "<|audio:757|>", + "<|audio:758|>", + "<|audio:759|>", + "<|audio:760|>", + "<|audio:761|>", + "<|audio:762|>", + "<|audio:763|>", + "<|audio:764|>", + "<|audio:765|>", + "<|audio:766|>", + "<|audio:767|>", + "<|audio:768|>", + "<|audio:769|>", + "<|audio:770|>", + "<|audio:771|>", + "<|audio:772|>", + "<|audio:773|>", + "<|audio:774|>", + "<|audio:775|>", + "<|audio:776|>", + "<|audio:777|>", + "<|audio:778|>", + "<|audio:779|>", + "<|audio:780|>", + "<|audio:781|>", + "<|audio:782|>", + "<|audio:783|>", + "<|audio:784|>", + "<|audio:785|>", + "<|audio:786|>", + "<|audio:787|>", + "<|audio:788|>", + "<|audio:789|>", + "<|audio:790|>", + "<|audio:791|>", + "<|audio:792|>", + "<|audio:793|>", + "<|audio:794|>", + "<|audio:795|>", + "<|audio:796|>", + "<|audio:797|>", + "<|audio:798|>", + "<|audio:799|>", + "<|audio:800|>", + "<|audio:801|>", + "<|audio:802|>", + "<|audio:803|>", + "<|audio:804|>", + "<|audio:805|>", + "<|audio:806|>", + "<|audio:807|>", + "<|audio:808|>", + "<|audio:809|>", + "<|audio:810|>", + "<|audio:811|>", + "<|audio:812|>", + "<|audio:813|>", + "<|audio:814|>", + "<|audio:815|>", + "<|audio:816|>", + "<|audio:817|>", + "<|audio:818|>", + "<|audio:819|>", + "<|audio:820|>", + "<|audio:821|>", + "<|audio:822|>", + "<|audio:823|>", + "<|audio:824|>", + "<|audio:825|>", + "<|audio:826|>", + "<|audio:827|>", + "<|audio:828|>", + "<|audio:829|>", + "<|audio:830|>", + "<|audio:831|>", + "<|audio:832|>", + "<|audio:833|>", + "<|audio:834|>", + "<|audio:835|>", + "<|audio:836|>", + "<|audio:837|>", + "<|audio:838|>", + "<|audio:839|>", + "<|audio:840|>", + "<|audio:841|>", + "<|audio:842|>", + "<|audio:843|>", + "<|audio:844|>", + "<|audio:845|>", + "<|audio:846|>", + "<|audio:847|>", + "<|audio:848|>", + "<|audio:849|>", + "<|audio:850|>", + "<|audio:851|>", + "<|audio:852|>", + "<|audio:853|>", + "<|audio:854|>", + "<|audio:855|>", + "<|audio:856|>", + "<|audio:857|>", + "<|audio:858|>", + "<|audio:859|>", + "<|audio:860|>", + "<|audio:861|>", + "<|audio:862|>", + "<|audio:863|>", + "<|audio:864|>", + "<|audio:865|>", + "<|audio:866|>", + "<|audio:867|>", + "<|audio:868|>", + "<|audio:869|>", + "<|audio:870|>", + "<|audio:871|>", + "<|audio:872|>", + "<|audio:873|>", + "<|audio:874|>", + "<|audio:875|>", + "<|audio:876|>", + "<|audio:877|>", + "<|audio:878|>", + "<|audio:879|>", + "<|audio:880|>", + "<|audio:881|>", + "<|audio:882|>", + "<|audio:883|>", + "<|audio:884|>", + "<|audio:885|>", + "<|audio:886|>", + "<|audio:887|>", + "<|audio:888|>", + "<|audio:889|>", + "<|audio:890|>", + "<|audio:891|>", + "<|audio:892|>", + "<|audio:893|>", + "<|audio:894|>", + "<|audio:895|>", + "<|audio:896|>", + "<|audio:897|>", + "<|audio:898|>", + "<|audio:899|>", + "<|audio:900|>", + "<|audio:901|>", + "<|audio:902|>", + "<|audio:903|>", + "<|audio:904|>", + "<|audio:905|>", + "<|audio:906|>", + "<|audio:907|>", + "<|audio:908|>", + "<|audio:909|>", + "<|audio:910|>", + "<|audio:911|>", + "<|audio:912|>", + "<|audio:913|>", + "<|audio:914|>", + "<|audio:915|>", + "<|audio:916|>", + "<|audio:917|>", + "<|audio:918|>", + "<|audio:919|>", + "<|audio:920|>", + "<|audio:921|>", + "<|audio:922|>", + "<|audio:923|>", + "<|audio:924|>", + "<|audio:925|>", + "<|audio:926|>", + "<|audio:927|>", + "<|audio:928|>", + "<|audio:929|>", + "<|audio:930|>", + "<|audio:931|>", + "<|audio:932|>", + "<|audio:933|>", + "<|audio:934|>", + "<|audio:935|>", + "<|audio:936|>", + "<|audio:937|>", + "<|audio:938|>", + "<|audio:939|>", + "<|audio:940|>", + "<|audio:941|>", + "<|audio:942|>", + "<|audio:943|>", + "<|audio:944|>", + "<|audio:945|>", + "<|audio:946|>", + "<|audio:947|>", + "<|audio:948|>", + "<|audio:949|>", + "<|audio:950|>", + "<|audio:951|>", + "<|audio:952|>", + "<|audio:953|>", + "<|audio:954|>", + "<|audio:955|>", + "<|audio:956|>", + "<|audio:957|>", + "<|audio:958|>", + "<|audio:959|>", + "<|audio:960|>", + "<|audio:961|>", + "<|audio:962|>", + "<|audio:963|>", + "<|audio:964|>", + "<|audio:965|>", + "<|audio:966|>", + "<|audio:967|>", + "<|audio:968|>", + "<|audio:969|>", + "<|audio:970|>", + "<|audio:971|>", + "<|audio:972|>", + "<|audio:973|>", + "<|audio:974|>", + "<|audio:975|>", + "<|audio:976|>", + "<|audio:977|>", + "<|audio:978|>", + "<|audio:979|>", + "<|audio:980|>", + "<|audio:981|>", + "<|audio:982|>", + "<|audio:983|>", + "<|audio:984|>", + "<|audio:985|>", + "<|audio:986|>", + "<|audio:987|>", + "<|audio:988|>", + "<|audio:989|>", + "<|audio:990|>", + "<|audio:991|>", + "<|audio:992|>", + "<|audio:993|>", + "<|audio:994|>", + "<|audio:995|>", + "<|audio:996|>", + "<|audio:997|>", + "<|audio:998|>", + "<|audio:999|>", + "<|audio:1000|>", + "<|audio:1001|>", + "<|audio:1002|>", + "<|audio:1003|>", + "<|audio:1004|>", + "<|audio:1005|>", + "<|audio:1006|>", + "<|audio:1007|>", + "<|audio:1008|>", + "<|audio:1009|>", + "<|audio:1010|>", + "<|audio:1011|>", + "<|audio:1012|>", + "<|audio:1013|>", + "<|audio:1014|>", + "<|audio:1015|>", + "<|audio:1016|>", + "<|audio:1017|>", + "<|audio:1018|>", + "<|audio:1019|>", + "<|audio:1020|>", + "<|audio:1021|>", + "<|audio:1022|>", + "<|audio:1023|>", + "<|startoftranscript|>", + "<|endoftranscript|>", + "<|padding|>" + ], + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/out/checkpoint-19000/trainer_state.json b/out/checkpoint-19000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..54057f833ebaaaecbb9c78d0c316353e0fda05c0 --- /dev/null +++ b/out/checkpoint-19000/trainer_state.json @@ -0,0 +1,133185 @@ +{ + "best_metric": 2.36470365524292, + "best_model_checkpoint": "./out/checkpoint-19000", + "epoch": 1.533370995077072, + "eval_steps": 1000, + "global_step": 19000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 8.07037365830038e-05, + "grad_norm": 0.8911969065666199, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.6759, + "step": 1 + }, + { + "epoch": 0.0001614074731660076, + "grad_norm": 0.8724873661994934, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7001, + "step": 2 + }, + { + "epoch": 0.00024211120974901139, + "grad_norm": 0.9050428867340088, + "learning_rate": 6e-06, + "loss": 2.6291, + "step": 3 + }, + { + "epoch": 0.0003228149463320152, + "grad_norm": 0.9249712824821472, + "learning_rate": 8.000000000000001e-06, + "loss": 2.7174, + "step": 4 + }, + { + "epoch": 0.000403518682915019, + "grad_norm": 0.9102846384048462, + "learning_rate": 1e-05, + "loss": 2.6831, + "step": 5 + }, + { + "epoch": 0.00048422241949802277, + "grad_norm": 0.9129141569137573, + "learning_rate": 1.2e-05, + "loss": 2.684, + "step": 6 + }, + { + "epoch": 0.0005649261560810266, + "grad_norm": 0.8648065328598022, + "learning_rate": 1.4000000000000001e-05, + "loss": 2.6488, + "step": 7 + }, + { + "epoch": 0.0006456298926640304, + "grad_norm": 0.8677545785903931, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.7143, + "step": 8 + }, + { + "epoch": 0.0007263336292470342, + "grad_norm": 0.919029712677002, + "learning_rate": 1.8e-05, + "loss": 2.631, + "step": 9 + }, + { + "epoch": 0.000807037365830038, + "grad_norm": 0.9289683103561401, + "learning_rate": 2e-05, + "loss": 2.6564, + "step": 10 + }, + { + "epoch": 0.0008877411024130417, + "grad_norm": 0.8810267448425293, + "learning_rate": 2.2000000000000003e-05, + "loss": 2.6395, + "step": 11 + }, + { + "epoch": 0.0009684448389960455, + "grad_norm": 0.8185754418373108, + "learning_rate": 2.4e-05, + "loss": 2.6871, + "step": 12 + }, + { + "epoch": 0.0010491485755790492, + "grad_norm": 0.9476913213729858, + "learning_rate": 2.6000000000000002e-05, + "loss": 2.7011, + "step": 13 + }, + { + "epoch": 0.0011298523121620531, + "grad_norm": 0.9616057872772217, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.7373, + "step": 14 + }, + { + "epoch": 0.0012105560487450568, + "grad_norm": 0.9429686665534973, + "learning_rate": 3e-05, + "loss": 2.7556, + "step": 15 + }, + { + "epoch": 0.0012912597853280607, + "grad_norm": 1.0331422090530396, + "learning_rate": 3.2000000000000005e-05, + "loss": 2.7756, + "step": 16 + }, + { + "epoch": 0.0013719635219110644, + "grad_norm": 0.906057596206665, + "learning_rate": 3.4000000000000007e-05, + "loss": 2.7053, + "step": 17 + }, + { + "epoch": 0.0014526672584940683, + "grad_norm": 0.8677626252174377, + "learning_rate": 3.6e-05, + "loss": 2.7012, + "step": 18 + }, + { + "epoch": 0.001533370995077072, + "grad_norm": 0.9378079175949097, + "learning_rate": 3.8e-05, + "loss": 2.6786, + "step": 19 + }, + { + "epoch": 0.001614074731660076, + "grad_norm": 1.0333882570266724, + "learning_rate": 4e-05, + "loss": 2.689, + "step": 20 + }, + { + "epoch": 0.0016947784682430796, + "grad_norm": 0.9435378909111023, + "learning_rate": 4.2e-05, + "loss": 2.7084, + "step": 21 + }, + { + "epoch": 0.0017754822048260835, + "grad_norm": 0.9530225396156311, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.7039, + "step": 22 + }, + { + "epoch": 0.0018561859414090872, + "grad_norm": 1.0154749155044556, + "learning_rate": 4.600000000000001e-05, + "loss": 2.6623, + "step": 23 + }, + { + "epoch": 0.001936889677992091, + "grad_norm": 1.0341671705245972, + "learning_rate": 4.8e-05, + "loss": 2.7072, + "step": 24 + }, + { + "epoch": 0.002017593414575095, + "grad_norm": 0.9185739159584045, + "learning_rate": 5e-05, + "loss": 2.6595, + "step": 25 + }, + { + "epoch": 0.0020982971511580985, + "grad_norm": 1.060390591621399, + "learning_rate": 5.2000000000000004e-05, + "loss": 2.7045, + "step": 26 + }, + { + "epoch": 0.0021790008877411024, + "grad_norm": 0.9720118641853333, + "learning_rate": 5.4000000000000005e-05, + "loss": 2.6513, + "step": 27 + }, + { + "epoch": 0.0022597046243241063, + "grad_norm": 0.9426784515380859, + "learning_rate": 5.6000000000000006e-05, + "loss": 2.6541, + "step": 28 + }, + { + "epoch": 0.00234040836090711, + "grad_norm": 0.9736170768737793, + "learning_rate": 5.8e-05, + "loss": 2.7324, + "step": 29 + }, + { + "epoch": 0.0024211120974901136, + "grad_norm": 0.9831354022026062, + "learning_rate": 6e-05, + "loss": 2.6651, + "step": 30 + }, + { + "epoch": 0.0025018158340731175, + "grad_norm": 1.0222605466842651, + "learning_rate": 6.2e-05, + "loss": 2.7375, + "step": 31 + }, + { + "epoch": 0.0025825195706561214, + "grad_norm": 0.9182235598564148, + "learning_rate": 6.400000000000001e-05, + "loss": 2.7142, + "step": 32 + }, + { + "epoch": 0.0026632233072391254, + "grad_norm": 1.0200958251953125, + "learning_rate": 6.6e-05, + "loss": 2.6785, + "step": 33 + }, + { + "epoch": 0.002743927043822129, + "grad_norm": 1.0153381824493408, + "learning_rate": 6.800000000000001e-05, + "loss": 2.6737, + "step": 34 + }, + { + "epoch": 0.0028246307804051327, + "grad_norm": 0.8998087644577026, + "learning_rate": 7e-05, + "loss": 2.7594, + "step": 35 + }, + { + "epoch": 0.0029053345169881366, + "grad_norm": 0.9005621671676636, + "learning_rate": 7.2e-05, + "loss": 2.713, + "step": 36 + }, + { + "epoch": 0.0029860382535711405, + "grad_norm": 1.0165663957595825, + "learning_rate": 7.4e-05, + "loss": 2.7197, + "step": 37 + }, + { + "epoch": 0.003066741990154144, + "grad_norm": 1.0011894702911377, + "learning_rate": 7.6e-05, + "loss": 2.6315, + "step": 38 + }, + { + "epoch": 0.003147445726737148, + "grad_norm": 1.141209602355957, + "learning_rate": 7.800000000000001e-05, + "loss": 2.7249, + "step": 39 + }, + { + "epoch": 0.003228149463320152, + "grad_norm": 0.9114719033241272, + "learning_rate": 8e-05, + "loss": 2.7039, + "step": 40 + }, + { + "epoch": 0.0033088531999031557, + "grad_norm": 1.0193392038345337, + "learning_rate": 8.2e-05, + "loss": 2.6501, + "step": 41 + }, + { + "epoch": 0.003389556936486159, + "grad_norm": 0.9458270072937012, + "learning_rate": 8.4e-05, + "loss": 2.725, + "step": 42 + }, + { + "epoch": 0.003470260673069163, + "grad_norm": 0.9667492508888245, + "learning_rate": 8.6e-05, + "loss": 2.7232, + "step": 43 + }, + { + "epoch": 0.003550964409652167, + "grad_norm": 0.9987972378730774, + "learning_rate": 8.800000000000001e-05, + "loss": 2.6554, + "step": 44 + }, + { + "epoch": 0.003631668146235171, + "grad_norm": 1.0166393518447876, + "learning_rate": 9e-05, + "loss": 2.7291, + "step": 45 + }, + { + "epoch": 0.0037123718828181744, + "grad_norm": 0.9557009935379028, + "learning_rate": 9.200000000000001e-05, + "loss": 2.7194, + "step": 46 + }, + { + "epoch": 0.0037930756194011783, + "grad_norm": 0.9575492143630981, + "learning_rate": 9.4e-05, + "loss": 2.6671, + "step": 47 + }, + { + "epoch": 0.003873779355984182, + "grad_norm": 0.9614555239677429, + "learning_rate": 9.6e-05, + "loss": 2.6865, + "step": 48 + }, + { + "epoch": 0.003954483092567186, + "grad_norm": 0.9245515465736389, + "learning_rate": 9.8e-05, + "loss": 2.7821, + "step": 49 + }, + { + "epoch": 0.00403518682915019, + "grad_norm": 0.9756044745445251, + "learning_rate": 0.0001, + "loss": 2.7608, + "step": 50 + }, + { + "epoch": 0.0041158905657331935, + "grad_norm": 0.95787513256073, + "learning_rate": 0.00010200000000000001, + "loss": 2.6458, + "step": 51 + }, + { + "epoch": 0.004196594302316197, + "grad_norm": 1.0102490186691284, + "learning_rate": 0.00010400000000000001, + "loss": 2.7835, + "step": 52 + }, + { + "epoch": 0.004277298038899201, + "grad_norm": 0.9676176309585571, + "learning_rate": 0.00010600000000000002, + "loss": 2.702, + "step": 53 + }, + { + "epoch": 0.004358001775482205, + "grad_norm": 0.9724096655845642, + "learning_rate": 0.00010800000000000001, + "loss": 2.714, + "step": 54 + }, + { + "epoch": 0.004438705512065208, + "grad_norm": 0.9482994675636292, + "learning_rate": 0.00011000000000000002, + "loss": 2.8069, + "step": 55 + }, + { + "epoch": 0.0045194092486482125, + "grad_norm": 0.9886480569839478, + "learning_rate": 0.00011200000000000001, + "loss": 2.7468, + "step": 56 + }, + { + "epoch": 0.004600112985231216, + "grad_norm": 0.9696247577667236, + "learning_rate": 0.00011399999999999999, + "loss": 2.7486, + "step": 57 + }, + { + "epoch": 0.00468081672181422, + "grad_norm": 1.0638912916183472, + "learning_rate": 0.000116, + "loss": 2.7747, + "step": 58 + }, + { + "epoch": 0.004761520458397224, + "grad_norm": 1.016483187675476, + "learning_rate": 0.000118, + "loss": 2.6925, + "step": 59 + }, + { + "epoch": 0.004842224194980227, + "grad_norm": 1.0298779010772705, + "learning_rate": 0.00012, + "loss": 2.7487, + "step": 60 + }, + { + "epoch": 0.004922927931563232, + "grad_norm": 1.1082268953323364, + "learning_rate": 0.000122, + "loss": 2.7697, + "step": 61 + }, + { + "epoch": 0.005003631668146235, + "grad_norm": 0.9202101826667786, + "learning_rate": 0.000124, + "loss": 2.7429, + "step": 62 + }, + { + "epoch": 0.0050843354047292386, + "grad_norm": 1.0140503644943237, + "learning_rate": 0.000126, + "loss": 2.7492, + "step": 63 + }, + { + "epoch": 0.005165039141312243, + "grad_norm": 1.0689163208007812, + "learning_rate": 0.00012800000000000002, + "loss": 2.7353, + "step": 64 + }, + { + "epoch": 0.005245742877895246, + "grad_norm": 0.9947141408920288, + "learning_rate": 0.00013000000000000002, + "loss": 2.7385, + "step": 65 + }, + { + "epoch": 0.005326446614478251, + "grad_norm": 1.2034410238265991, + "learning_rate": 0.000132, + "loss": 2.7632, + "step": 66 + }, + { + "epoch": 0.005407150351061254, + "grad_norm": 0.9450412392616272, + "learning_rate": 0.000134, + "loss": 2.7547, + "step": 67 + }, + { + "epoch": 0.005487854087644258, + "grad_norm": 1.1818269491195679, + "learning_rate": 0.00013600000000000003, + "loss": 2.7663, + "step": 68 + }, + { + "epoch": 0.005568557824227262, + "grad_norm": 1.003347396850586, + "learning_rate": 0.000138, + "loss": 2.7299, + "step": 69 + }, + { + "epoch": 0.0056492615608102655, + "grad_norm": 1.0105760097503662, + "learning_rate": 0.00014, + "loss": 2.7261, + "step": 70 + }, + { + "epoch": 0.005729965297393269, + "grad_norm": 0.9459090232849121, + "learning_rate": 0.000142, + "loss": 2.7237, + "step": 71 + }, + { + "epoch": 0.005810669033976273, + "grad_norm": 0.9716219305992126, + "learning_rate": 0.000144, + "loss": 2.8175, + "step": 72 + }, + { + "epoch": 0.005891372770559277, + "grad_norm": 0.9968419075012207, + "learning_rate": 0.000146, + "loss": 2.7828, + "step": 73 + }, + { + "epoch": 0.005972076507142281, + "grad_norm": 1.099680781364441, + "learning_rate": 0.000148, + "loss": 2.7111, + "step": 74 + }, + { + "epoch": 0.0060527802437252845, + "grad_norm": 1.004846453666687, + "learning_rate": 0.00015000000000000001, + "loss": 2.7508, + "step": 75 + }, + { + "epoch": 0.006133483980308288, + "grad_norm": 1.0568128824234009, + "learning_rate": 0.000152, + "loss": 2.7341, + "step": 76 + }, + { + "epoch": 0.006214187716891292, + "grad_norm": 0.9871000051498413, + "learning_rate": 0.000154, + "loss": 2.7831, + "step": 77 + }, + { + "epoch": 0.006294891453474296, + "grad_norm": 1.005947232246399, + "learning_rate": 0.00015600000000000002, + "loss": 2.6798, + "step": 78 + }, + { + "epoch": 0.006375595190057299, + "grad_norm": 0.9984713792800903, + "learning_rate": 0.00015800000000000002, + "loss": 2.8126, + "step": 79 + }, + { + "epoch": 0.006456298926640304, + "grad_norm": 0.9805751442909241, + "learning_rate": 0.00016, + "loss": 2.7826, + "step": 80 + }, + { + "epoch": 0.006537002663223307, + "grad_norm": 1.02998685836792, + "learning_rate": 0.000162, + "loss": 2.7636, + "step": 81 + }, + { + "epoch": 0.006617706399806311, + "grad_norm": 1.0790135860443115, + "learning_rate": 0.000164, + "loss": 2.7809, + "step": 82 + }, + { + "epoch": 0.006698410136389315, + "grad_norm": 1.1058307886123657, + "learning_rate": 0.000166, + "loss": 2.787, + "step": 83 + }, + { + "epoch": 0.006779113872972318, + "grad_norm": 1.0199624300003052, + "learning_rate": 0.000168, + "loss": 2.7171, + "step": 84 + }, + { + "epoch": 0.006859817609555323, + "grad_norm": 1.006494402885437, + "learning_rate": 0.00017, + "loss": 2.7791, + "step": 85 + }, + { + "epoch": 0.006940521346138326, + "grad_norm": 0.9672449827194214, + "learning_rate": 0.000172, + "loss": 2.6929, + "step": 86 + }, + { + "epoch": 0.00702122508272133, + "grad_norm": 0.9747781157493591, + "learning_rate": 0.000174, + "loss": 2.7676, + "step": 87 + }, + { + "epoch": 0.007101928819304334, + "grad_norm": 0.9193839430809021, + "learning_rate": 0.00017600000000000002, + "loss": 2.7124, + "step": 88 + }, + { + "epoch": 0.0071826325558873375, + "grad_norm": 1.078499436378479, + "learning_rate": 0.00017800000000000002, + "loss": 2.8018, + "step": 89 + }, + { + "epoch": 0.007263336292470342, + "grad_norm": 1.070957899093628, + "learning_rate": 0.00018, + "loss": 2.7889, + "step": 90 + }, + { + "epoch": 0.007344040029053345, + "grad_norm": 1.160942554473877, + "learning_rate": 0.000182, + "loss": 2.8026, + "step": 91 + }, + { + "epoch": 0.007424743765636349, + "grad_norm": 0.9988501071929932, + "learning_rate": 0.00018400000000000003, + "loss": 2.7746, + "step": 92 + }, + { + "epoch": 0.007505447502219353, + "grad_norm": 1.0882319211959839, + "learning_rate": 0.00018600000000000002, + "loss": 2.8105, + "step": 93 + }, + { + "epoch": 0.0075861512388023565, + "grad_norm": 1.1882357597351074, + "learning_rate": 0.000188, + "loss": 2.8294, + "step": 94 + }, + { + "epoch": 0.00766685497538536, + "grad_norm": 1.0761829614639282, + "learning_rate": 0.00019, + "loss": 2.7846, + "step": 95 + }, + { + "epoch": 0.007747558711968364, + "grad_norm": 1.0665982961654663, + "learning_rate": 0.000192, + "loss": 2.8542, + "step": 96 + }, + { + "epoch": 0.007828262448551369, + "grad_norm": 1.206127405166626, + "learning_rate": 0.000194, + "loss": 2.7711, + "step": 97 + }, + { + "epoch": 0.007908966185134371, + "grad_norm": 1.095150113105774, + "learning_rate": 0.000196, + "loss": 2.732, + "step": 98 + }, + { + "epoch": 0.007989669921717376, + "grad_norm": 1.118348240852356, + "learning_rate": 0.00019800000000000002, + "loss": 2.7736, + "step": 99 + }, + { + "epoch": 0.00807037365830038, + "grad_norm": 1.0646461248397827, + "learning_rate": 0.0002, + "loss": 2.8584, + "step": 100 + }, + { + "epoch": 0.008151077394883383, + "grad_norm": 1.0387661457061768, + "learning_rate": 0.0001999999987538693, + "loss": 2.7961, + "step": 101 + }, + { + "epoch": 0.008231781131466387, + "grad_norm": 1.1905474662780762, + "learning_rate": 0.00019999999501547723, + "loss": 2.8615, + "step": 102 + }, + { + "epoch": 0.008312484868049391, + "grad_norm": 0.9630722999572754, + "learning_rate": 0.0001999999887848239, + "loss": 2.8076, + "step": 103 + }, + { + "epoch": 0.008393188604632394, + "grad_norm": 1.1034537553787231, + "learning_rate": 0.00019999998006190942, + "loss": 2.8402, + "step": 104 + }, + { + "epoch": 0.008473892341215398, + "grad_norm": 1.0679295063018799, + "learning_rate": 0.00019999996884673403, + "loss": 2.7948, + "step": 105 + }, + { + "epoch": 0.008554596077798403, + "grad_norm": 1.0108860731124878, + "learning_rate": 0.00019999995513929802, + "loss": 2.7996, + "step": 106 + }, + { + "epoch": 0.008635299814381405, + "grad_norm": 1.3762084245681763, + "learning_rate": 0.0001999999389396017, + "loss": 2.8023, + "step": 107 + }, + { + "epoch": 0.00871600355096441, + "grad_norm": 1.1320533752441406, + "learning_rate": 0.00019999992024764555, + "loss": 2.793, + "step": 108 + }, + { + "epoch": 0.008796707287547414, + "grad_norm": 1.1752389669418335, + "learning_rate": 0.00019999989906342998, + "loss": 2.8274, + "step": 109 + }, + { + "epoch": 0.008877411024130416, + "grad_norm": 1.2734956741333008, + "learning_rate": 0.00019999987538695552, + "loss": 2.8017, + "step": 110 + }, + { + "epoch": 0.00895811476071342, + "grad_norm": 1.3703055381774902, + "learning_rate": 0.00019999984921822273, + "loss": 2.8699, + "step": 111 + }, + { + "epoch": 0.009038818497296425, + "grad_norm": 1.0079127550125122, + "learning_rate": 0.0001999998205572323, + "loss": 2.8845, + "step": 112 + }, + { + "epoch": 0.00911952223387943, + "grad_norm": 1.28025484085083, + "learning_rate": 0.000199999789403985, + "loss": 2.8636, + "step": 113 + }, + { + "epoch": 0.009200225970462432, + "grad_norm": 1.1057093143463135, + "learning_rate": 0.00019999975575848148, + "loss": 2.8484, + "step": 114 + }, + { + "epoch": 0.009280929707045436, + "grad_norm": 1.0874677896499634, + "learning_rate": 0.00019999971962072265, + "loss": 2.7314, + "step": 115 + }, + { + "epoch": 0.00936163344362844, + "grad_norm": 1.0909658670425415, + "learning_rate": 0.00019999968099070943, + "loss": 2.7827, + "step": 116 + }, + { + "epoch": 0.009442337180211443, + "grad_norm": 1.0881624221801758, + "learning_rate": 0.00019999963986844273, + "loss": 2.827, + "step": 117 + }, + { + "epoch": 0.009523040916794448, + "grad_norm": 1.2498180866241455, + "learning_rate": 0.00019999959625392362, + "loss": 2.8695, + "step": 118 + }, + { + "epoch": 0.009603744653377452, + "grad_norm": 1.1344549655914307, + "learning_rate": 0.00019999955014715317, + "loss": 2.8079, + "step": 119 + }, + { + "epoch": 0.009684448389960455, + "grad_norm": 1.032563328742981, + "learning_rate": 0.00019999950154813253, + "loss": 2.7787, + "step": 120 + }, + { + "epoch": 0.009765152126543459, + "grad_norm": 0.9630110263824463, + "learning_rate": 0.0001999994504568629, + "loss": 2.8103, + "step": 121 + }, + { + "epoch": 0.009845855863126463, + "grad_norm": 1.0418641567230225, + "learning_rate": 0.0001999993968733456, + "loss": 2.8679, + "step": 122 + }, + { + "epoch": 0.009926559599709466, + "grad_norm": 0.9797310829162598, + "learning_rate": 0.00019999934079758188, + "loss": 2.7792, + "step": 123 + }, + { + "epoch": 0.01000726333629247, + "grad_norm": 1.0494028329849243, + "learning_rate": 0.00019999928222957323, + "loss": 2.8007, + "step": 124 + }, + { + "epoch": 0.010087967072875475, + "grad_norm": 1.1570640802383423, + "learning_rate": 0.00019999922116932105, + "loss": 2.8331, + "step": 125 + }, + { + "epoch": 0.010168670809458477, + "grad_norm": 1.2753098011016846, + "learning_rate": 0.00019999915761682684, + "loss": 2.8533, + "step": 126 + }, + { + "epoch": 0.010249374546041481, + "grad_norm": 0.9804013967514038, + "learning_rate": 0.00019999909157209227, + "loss": 2.841, + "step": 127 + }, + { + "epoch": 0.010330078282624486, + "grad_norm": 1.320839285850525, + "learning_rate": 0.00019999902303511892, + "loss": 2.8738, + "step": 128 + }, + { + "epoch": 0.01041078201920749, + "grad_norm": 1.1105059385299683, + "learning_rate": 0.0001999989520059085, + "loss": 2.8458, + "step": 129 + }, + { + "epoch": 0.010491485755790493, + "grad_norm": 1.2869762182235718, + "learning_rate": 0.0001999988784844628, + "loss": 2.7951, + "step": 130 + }, + { + "epoch": 0.010572189492373497, + "grad_norm": 1.1609153747558594, + "learning_rate": 0.00019999880247078368, + "loss": 2.8147, + "step": 131 + }, + { + "epoch": 0.010652893228956501, + "grad_norm": 1.066728115081787, + "learning_rate": 0.00019999872396487297, + "loss": 2.863, + "step": 132 + }, + { + "epoch": 0.010733596965539504, + "grad_norm": 1.2868720293045044, + "learning_rate": 0.0001999986429667327, + "loss": 2.7765, + "step": 133 + }, + { + "epoch": 0.010814300702122508, + "grad_norm": 1.0064955949783325, + "learning_rate": 0.00019999855947636485, + "loss": 2.7834, + "step": 134 + }, + { + "epoch": 0.010895004438705513, + "grad_norm": 1.146589756011963, + "learning_rate": 0.00019999847349377143, + "loss": 2.7966, + "step": 135 + }, + { + "epoch": 0.010975708175288515, + "grad_norm": 0.9831073880195618, + "learning_rate": 0.0001999983850189547, + "loss": 2.8877, + "step": 136 + }, + { + "epoch": 0.01105641191187152, + "grad_norm": 1.1690322160720825, + "learning_rate": 0.0001999982940519168, + "loss": 2.8514, + "step": 137 + }, + { + "epoch": 0.011137115648454524, + "grad_norm": 1.0014944076538086, + "learning_rate": 0.00019999820059266003, + "loss": 2.7846, + "step": 138 + }, + { + "epoch": 0.011217819385037527, + "grad_norm": 0.9581566452980042, + "learning_rate": 0.0001999981046411867, + "loss": 2.7907, + "step": 139 + }, + { + "epoch": 0.011298523121620531, + "grad_norm": 1.1300675868988037, + "learning_rate": 0.00019999800619749922, + "loss": 2.8099, + "step": 140 + }, + { + "epoch": 0.011379226858203535, + "grad_norm": 0.9845526814460754, + "learning_rate": 0.0001999979052616, + "loss": 2.8607, + "step": 141 + }, + { + "epoch": 0.011459930594786538, + "grad_norm": 1.0781387090682983, + "learning_rate": 0.0001999978018334916, + "loss": 2.831, + "step": 142 + }, + { + "epoch": 0.011540634331369542, + "grad_norm": 1.1142648458480835, + "learning_rate": 0.00019999769591317658, + "loss": 2.9194, + "step": 143 + }, + { + "epoch": 0.011621338067952547, + "grad_norm": 0.9972650408744812, + "learning_rate": 0.00019999758750065757, + "loss": 2.8253, + "step": 144 + }, + { + "epoch": 0.01170204180453555, + "grad_norm": 1.040738582611084, + "learning_rate": 0.0001999974765959373, + "loss": 2.7378, + "step": 145 + }, + { + "epoch": 0.011782745541118553, + "grad_norm": 0.9824327826499939, + "learning_rate": 0.00019999736319901848, + "loss": 2.8263, + "step": 146 + }, + { + "epoch": 0.011863449277701558, + "grad_norm": 1.0531679391860962, + "learning_rate": 0.00019999724730990402, + "loss": 2.7975, + "step": 147 + }, + { + "epoch": 0.011944153014284562, + "grad_norm": 1.0699561834335327, + "learning_rate": 0.0001999971289285967, + "loss": 2.8199, + "step": 148 + }, + { + "epoch": 0.012024856750867565, + "grad_norm": 1.0203633308410645, + "learning_rate": 0.0001999970080550996, + "loss": 2.8479, + "step": 149 + }, + { + "epoch": 0.012105560487450569, + "grad_norm": 1.035589575767517, + "learning_rate": 0.00019999688468941564, + "loss": 2.8263, + "step": 150 + }, + { + "epoch": 0.012186264224033573, + "grad_norm": 0.9706670641899109, + "learning_rate": 0.00019999675883154792, + "loss": 2.8324, + "step": 151 + }, + { + "epoch": 0.012266967960616576, + "grad_norm": 1.1565446853637695, + "learning_rate": 0.00019999663048149958, + "loss": 2.8098, + "step": 152 + }, + { + "epoch": 0.01234767169719958, + "grad_norm": 1.025796890258789, + "learning_rate": 0.0001999964996392738, + "loss": 2.7906, + "step": 153 + }, + { + "epoch": 0.012428375433782585, + "grad_norm": 1.117438554763794, + "learning_rate": 0.00019999636630487386, + "loss": 2.8276, + "step": 154 + }, + { + "epoch": 0.012509079170365587, + "grad_norm": 1.025159478187561, + "learning_rate": 0.00019999623047830308, + "loss": 2.8089, + "step": 155 + }, + { + "epoch": 0.012589782906948592, + "grad_norm": 1.007582664489746, + "learning_rate": 0.00019999609215956487, + "loss": 2.8147, + "step": 156 + }, + { + "epoch": 0.012670486643531596, + "grad_norm": 1.0504885911941528, + "learning_rate": 0.0001999959513486626, + "loss": 2.8329, + "step": 157 + }, + { + "epoch": 0.012751190380114599, + "grad_norm": 0.918382465839386, + "learning_rate": 0.00019999580804559987, + "loss": 2.878, + "step": 158 + }, + { + "epoch": 0.012831894116697603, + "grad_norm": 0.9397236704826355, + "learning_rate": 0.0001999956622503802, + "loss": 2.8254, + "step": 159 + }, + { + "epoch": 0.012912597853280607, + "grad_norm": 0.9985697269439697, + "learning_rate": 0.00019999551396300723, + "loss": 2.8417, + "step": 160 + }, + { + "epoch": 0.01299330158986361, + "grad_norm": 0.9866878390312195, + "learning_rate": 0.00019999536318348465, + "loss": 2.7524, + "step": 161 + }, + { + "epoch": 0.013074005326446614, + "grad_norm": 1.0707440376281738, + "learning_rate": 0.00019999520991181627, + "loss": 2.8171, + "step": 162 + }, + { + "epoch": 0.013154709063029619, + "grad_norm": 0.9359755516052246, + "learning_rate": 0.00019999505414800583, + "loss": 2.8463, + "step": 163 + }, + { + "epoch": 0.013235412799612623, + "grad_norm": 1.056647777557373, + "learning_rate": 0.00019999489589205726, + "loss": 2.8602, + "step": 164 + }, + { + "epoch": 0.013316116536195625, + "grad_norm": 0.975370466709137, + "learning_rate": 0.0001999947351439745, + "loss": 2.8292, + "step": 165 + }, + { + "epoch": 0.01339682027277863, + "grad_norm": 0.9241237044334412, + "learning_rate": 0.00019999457190376157, + "loss": 2.7827, + "step": 166 + }, + { + "epoch": 0.013477524009361634, + "grad_norm": 0.9478302001953125, + "learning_rate": 0.00019999440617142247, + "loss": 2.7708, + "step": 167 + }, + { + "epoch": 0.013558227745944637, + "grad_norm": 0.9804863333702087, + "learning_rate": 0.00019999423794696142, + "loss": 2.7696, + "step": 168 + }, + { + "epoch": 0.013638931482527641, + "grad_norm": 0.9764013886451721, + "learning_rate": 0.00019999406723038255, + "loss": 2.8521, + "step": 169 + }, + { + "epoch": 0.013719635219110645, + "grad_norm": 1.026532769203186, + "learning_rate": 0.00019999389402169016, + "loss": 2.8507, + "step": 170 + }, + { + "epoch": 0.013800338955693648, + "grad_norm": 0.9983204007148743, + "learning_rate": 0.00019999371832088854, + "loss": 2.8761, + "step": 171 + }, + { + "epoch": 0.013881042692276652, + "grad_norm": 0.9914593696594238, + "learning_rate": 0.00019999354012798206, + "loss": 2.8723, + "step": 172 + }, + { + "epoch": 0.013961746428859657, + "grad_norm": 1.066962718963623, + "learning_rate": 0.00019999335944297517, + "loss": 2.8635, + "step": 173 + }, + { + "epoch": 0.01404245016544266, + "grad_norm": 1.0848973989486694, + "learning_rate": 0.0001999931762658724, + "loss": 2.8645, + "step": 174 + }, + { + "epoch": 0.014123153902025664, + "grad_norm": 1.0245702266693115, + "learning_rate": 0.0001999929905966783, + "loss": 2.8463, + "step": 175 + }, + { + "epoch": 0.014203857638608668, + "grad_norm": 1.2363669872283936, + "learning_rate": 0.00019999280243539747, + "loss": 2.8345, + "step": 176 + }, + { + "epoch": 0.01428456137519167, + "grad_norm": 1.0224756002426147, + "learning_rate": 0.0001999926117820346, + "loss": 2.8309, + "step": 177 + }, + { + "epoch": 0.014365265111774675, + "grad_norm": 1.0882402658462524, + "learning_rate": 0.0001999924186365945, + "loss": 2.8619, + "step": 178 + }, + { + "epoch": 0.01444596884835768, + "grad_norm": 1.0384254455566406, + "learning_rate": 0.00019999222299908192, + "loss": 2.8477, + "step": 179 + }, + { + "epoch": 0.014526672584940684, + "grad_norm": 0.9662587642669678, + "learning_rate": 0.00019999202486950177, + "loss": 2.8087, + "step": 180 + }, + { + "epoch": 0.014607376321523686, + "grad_norm": 0.9086892604827881, + "learning_rate": 0.000199991824247859, + "loss": 2.7688, + "step": 181 + }, + { + "epoch": 0.01468808005810669, + "grad_norm": 1.004185676574707, + "learning_rate": 0.00019999162113415854, + "loss": 2.8237, + "step": 182 + }, + { + "epoch": 0.014768783794689695, + "grad_norm": 0.997965395450592, + "learning_rate": 0.00019999141552840552, + "loss": 2.8228, + "step": 183 + }, + { + "epoch": 0.014849487531272697, + "grad_norm": 0.9844975471496582, + "learning_rate": 0.00019999120743060503, + "loss": 2.8582, + "step": 184 + }, + { + "epoch": 0.014930191267855702, + "grad_norm": 1.0531272888183594, + "learning_rate": 0.00019999099684076232, + "loss": 2.8571, + "step": 185 + }, + { + "epoch": 0.015010895004438706, + "grad_norm": 1.1178920269012451, + "learning_rate": 0.00019999078375888257, + "loss": 2.85, + "step": 186 + }, + { + "epoch": 0.015091598741021709, + "grad_norm": 1.0773903131484985, + "learning_rate": 0.0001999905681849711, + "loss": 2.826, + "step": 187 + }, + { + "epoch": 0.015172302477604713, + "grad_norm": 1.1573486328125, + "learning_rate": 0.00019999035011903325, + "loss": 2.8866, + "step": 188 + }, + { + "epoch": 0.015253006214187717, + "grad_norm": 1.0401980876922607, + "learning_rate": 0.00019999012956107456, + "loss": 2.788, + "step": 189 + }, + { + "epoch": 0.01533370995077072, + "grad_norm": 1.0150686502456665, + "learning_rate": 0.00019998990651110045, + "loss": 2.8542, + "step": 190 + }, + { + "epoch": 0.015414413687353724, + "grad_norm": 1.1902797222137451, + "learning_rate": 0.0001999896809691165, + "loss": 2.9209, + "step": 191 + }, + { + "epoch": 0.015495117423936729, + "grad_norm": 1.0177555084228516, + "learning_rate": 0.0001999894529351283, + "loss": 2.7852, + "step": 192 + }, + { + "epoch": 0.015575821160519731, + "grad_norm": 1.062322974205017, + "learning_rate": 0.00019998922240914159, + "loss": 2.8328, + "step": 193 + }, + { + "epoch": 0.015656524897102737, + "grad_norm": 1.0937334299087524, + "learning_rate": 0.00019998898939116205, + "loss": 2.8069, + "step": 194 + }, + { + "epoch": 0.015737228633685738, + "grad_norm": 0.9553198218345642, + "learning_rate": 0.00019998875388119554, + "loss": 2.8402, + "step": 195 + }, + { + "epoch": 0.015817932370268743, + "grad_norm": 1.1802356243133545, + "learning_rate": 0.0001999885158792479, + "loss": 2.945, + "step": 196 + }, + { + "epoch": 0.015898636106851747, + "grad_norm": 1.160346269607544, + "learning_rate": 0.0001999882753853251, + "loss": 2.8341, + "step": 197 + }, + { + "epoch": 0.01597933984343475, + "grad_norm": 1.0379278659820557, + "learning_rate": 0.00019998803239943305, + "loss": 2.898, + "step": 198 + }, + { + "epoch": 0.016060043580017756, + "grad_norm": 1.2022395133972168, + "learning_rate": 0.00019998778692157792, + "loss": 2.8302, + "step": 199 + }, + { + "epoch": 0.01614074731660076, + "grad_norm": 1.057017207145691, + "learning_rate": 0.00019998753895176575, + "loss": 2.8474, + "step": 200 + }, + { + "epoch": 0.01622145105318376, + "grad_norm": 0.9299072027206421, + "learning_rate": 0.00019998728849000271, + "loss": 2.8266, + "step": 201 + }, + { + "epoch": 0.016302154789766765, + "grad_norm": 1.0296592712402344, + "learning_rate": 0.00019998703553629512, + "loss": 2.8106, + "step": 202 + }, + { + "epoch": 0.01638285852634977, + "grad_norm": 0.9641671180725098, + "learning_rate": 0.0001999867800906492, + "loss": 2.8089, + "step": 203 + }, + { + "epoch": 0.016463562262932774, + "grad_norm": 0.9951125383377075, + "learning_rate": 0.00019998652215307136, + "loss": 2.813, + "step": 204 + }, + { + "epoch": 0.016544265999515778, + "grad_norm": 1.0089969635009766, + "learning_rate": 0.00019998626172356804, + "loss": 2.8021, + "step": 205 + }, + { + "epoch": 0.016624969736098782, + "grad_norm": 0.9916231632232666, + "learning_rate": 0.00019998599880214566, + "loss": 2.8455, + "step": 206 + }, + { + "epoch": 0.016705673472681787, + "grad_norm": 0.9612492322921753, + "learning_rate": 0.00019998573338881088, + "loss": 2.8653, + "step": 207 + }, + { + "epoch": 0.016786377209264788, + "grad_norm": 0.984578013420105, + "learning_rate": 0.00019998546548357022, + "loss": 2.8359, + "step": 208 + }, + { + "epoch": 0.016867080945847792, + "grad_norm": 0.9457565546035767, + "learning_rate": 0.0001999851950864304, + "loss": 2.8507, + "step": 209 + }, + { + "epoch": 0.016947784682430796, + "grad_norm": 1.0219026803970337, + "learning_rate": 0.00019998492219739817, + "loss": 2.8326, + "step": 210 + }, + { + "epoch": 0.0170284884190138, + "grad_norm": 0.971570611000061, + "learning_rate": 0.00019998464681648032, + "loss": 2.8079, + "step": 211 + }, + { + "epoch": 0.017109192155596805, + "grad_norm": 0.9731320738792419, + "learning_rate": 0.00019998436894368368, + "loss": 2.8536, + "step": 212 + }, + { + "epoch": 0.01718989589217981, + "grad_norm": 1.0519105195999146, + "learning_rate": 0.00019998408857901525, + "loss": 2.8589, + "step": 213 + }, + { + "epoch": 0.01727059962876281, + "grad_norm": 0.9725883603096008, + "learning_rate": 0.00019998380572248194, + "loss": 2.7937, + "step": 214 + }, + { + "epoch": 0.017351303365345815, + "grad_norm": 1.0397064685821533, + "learning_rate": 0.00019998352037409084, + "loss": 2.9145, + "step": 215 + }, + { + "epoch": 0.01743200710192882, + "grad_norm": 0.9094852209091187, + "learning_rate": 0.00019998323253384904, + "loss": 2.7692, + "step": 216 + }, + { + "epoch": 0.017512710838511823, + "grad_norm": 0.941646158695221, + "learning_rate": 0.00019998294220176374, + "loss": 2.7975, + "step": 217 + }, + { + "epoch": 0.017593414575094828, + "grad_norm": 0.9939892888069153, + "learning_rate": 0.00019998264937784216, + "loss": 2.8421, + "step": 218 + }, + { + "epoch": 0.017674118311677832, + "grad_norm": 0.8985795378684998, + "learning_rate": 0.0001999823540620916, + "loss": 2.8146, + "step": 219 + }, + { + "epoch": 0.017754822048260833, + "grad_norm": 1.0436078310012817, + "learning_rate": 0.00019998205625451943, + "loss": 2.8416, + "step": 220 + }, + { + "epoch": 0.017835525784843837, + "grad_norm": 0.9941675066947937, + "learning_rate": 0.00019998175595513305, + "loss": 2.8723, + "step": 221 + }, + { + "epoch": 0.01791622952142684, + "grad_norm": 0.9203903675079346, + "learning_rate": 0.00019998145316393995, + "loss": 2.7791, + "step": 222 + }, + { + "epoch": 0.017996933258009846, + "grad_norm": 0.9325969815254211, + "learning_rate": 0.00019998114788094768, + "loss": 2.8664, + "step": 223 + }, + { + "epoch": 0.01807763699459285, + "grad_norm": 0.9483599662780762, + "learning_rate": 0.00019998084010616388, + "loss": 2.7782, + "step": 224 + }, + { + "epoch": 0.018158340731175854, + "grad_norm": 0.9555078744888306, + "learning_rate": 0.00019998052983959615, + "loss": 2.7771, + "step": 225 + }, + { + "epoch": 0.01823904446775886, + "grad_norm": 0.9452421069145203, + "learning_rate": 0.00019998021708125233, + "loss": 2.8878, + "step": 226 + }, + { + "epoch": 0.01831974820434186, + "grad_norm": 0.9784894585609436, + "learning_rate": 0.00019997990183114007, + "loss": 2.8382, + "step": 227 + }, + { + "epoch": 0.018400451940924864, + "grad_norm": 1.0844931602478027, + "learning_rate": 0.00019997958408926735, + "loss": 2.8015, + "step": 228 + }, + { + "epoch": 0.01848115567750787, + "grad_norm": 1.0416710376739502, + "learning_rate": 0.00019997926385564207, + "loss": 2.8364, + "step": 229 + }, + { + "epoch": 0.018561859414090873, + "grad_norm": 0.9213813543319702, + "learning_rate": 0.00019997894113027215, + "loss": 2.8489, + "step": 230 + }, + { + "epoch": 0.018642563150673877, + "grad_norm": 1.0186388492584229, + "learning_rate": 0.00019997861591316567, + "loss": 2.914, + "step": 231 + }, + { + "epoch": 0.01872326688725688, + "grad_norm": 1.0032236576080322, + "learning_rate": 0.00019997828820433072, + "loss": 2.8733, + "step": 232 + }, + { + "epoch": 0.018803970623839882, + "grad_norm": 0.9783569574356079, + "learning_rate": 0.0001999779580037755, + "loss": 2.851, + "step": 233 + }, + { + "epoch": 0.018884674360422887, + "grad_norm": 0.8471441268920898, + "learning_rate": 0.00019997762531150825, + "loss": 2.7923, + "step": 234 + }, + { + "epoch": 0.01896537809700589, + "grad_norm": 0.8912937641143799, + "learning_rate": 0.00019997729012753717, + "loss": 2.8725, + "step": 235 + }, + { + "epoch": 0.019046081833588895, + "grad_norm": 1.2453325986862183, + "learning_rate": 0.00019997695245187075, + "loss": 2.9292, + "step": 236 + }, + { + "epoch": 0.0191267855701719, + "grad_norm": 0.8870908617973328, + "learning_rate": 0.0001999766122845173, + "loss": 2.8008, + "step": 237 + }, + { + "epoch": 0.019207489306754904, + "grad_norm": 1.0679768323898315, + "learning_rate": 0.0001999762696254853, + "loss": 2.8919, + "step": 238 + }, + { + "epoch": 0.01928819304333791, + "grad_norm": 0.9769917130470276, + "learning_rate": 0.00019997592447478337, + "loss": 2.7937, + "step": 239 + }, + { + "epoch": 0.01936889677992091, + "grad_norm": 1.066183090209961, + "learning_rate": 0.00019997557683242004, + "loss": 2.8375, + "step": 240 + }, + { + "epoch": 0.019449600516503913, + "grad_norm": 0.9834103584289551, + "learning_rate": 0.000199975226698404, + "loss": 2.8577, + "step": 241 + }, + { + "epoch": 0.019530304253086918, + "grad_norm": 1.102211833000183, + "learning_rate": 0.00019997487407274396, + "loss": 2.8466, + "step": 242 + }, + { + "epoch": 0.019611007989669922, + "grad_norm": 0.9936226606369019, + "learning_rate": 0.00019997451895544872, + "loss": 2.7729, + "step": 243 + }, + { + "epoch": 0.019691711726252926, + "grad_norm": 1.0995992422103882, + "learning_rate": 0.00019997416134652713, + "loss": 2.8425, + "step": 244 + }, + { + "epoch": 0.01977241546283593, + "grad_norm": 0.94181889295578, + "learning_rate": 0.00019997380124598814, + "loss": 2.8495, + "step": 245 + }, + { + "epoch": 0.01985311919941893, + "grad_norm": 0.9791487455368042, + "learning_rate": 0.00019997343865384067, + "loss": 2.8919, + "step": 246 + }, + { + "epoch": 0.019933822936001936, + "grad_norm": 0.9173399209976196, + "learning_rate": 0.00019997307357009375, + "loss": 2.8593, + "step": 247 + }, + { + "epoch": 0.02001452667258494, + "grad_norm": 0.9675281047821045, + "learning_rate": 0.00019997270599475653, + "loss": 2.8226, + "step": 248 + }, + { + "epoch": 0.020095230409167945, + "grad_norm": 0.8928244113922119, + "learning_rate": 0.00019997233592783812, + "loss": 2.8296, + "step": 249 + }, + { + "epoch": 0.02017593414575095, + "grad_norm": 0.928601861000061, + "learning_rate": 0.0001999719633693478, + "loss": 2.8399, + "step": 250 + }, + { + "epoch": 0.020256637882333953, + "grad_norm": 0.9378123879432678, + "learning_rate": 0.00019997158831929482, + "loss": 2.8711, + "step": 251 + }, + { + "epoch": 0.020337341618916954, + "grad_norm": 0.9041047692298889, + "learning_rate": 0.00019997121077768853, + "loss": 2.8338, + "step": 252 + }, + { + "epoch": 0.02041804535549996, + "grad_norm": 0.9673274755477905, + "learning_rate": 0.00019997083074453832, + "loss": 2.8556, + "step": 253 + }, + { + "epoch": 0.020498749092082963, + "grad_norm": 0.9204083681106567, + "learning_rate": 0.0001999704482198537, + "loss": 2.7954, + "step": 254 + }, + { + "epoch": 0.020579452828665967, + "grad_norm": 0.9267606735229492, + "learning_rate": 0.00019997006320364417, + "loss": 2.8656, + "step": 255 + }, + { + "epoch": 0.02066015656524897, + "grad_norm": 0.9562919735908508, + "learning_rate": 0.00019996967569591936, + "loss": 2.8406, + "step": 256 + }, + { + "epoch": 0.020740860301831976, + "grad_norm": 0.9065950512886047, + "learning_rate": 0.0001999692856966889, + "loss": 2.7856, + "step": 257 + }, + { + "epoch": 0.02082156403841498, + "grad_norm": 0.9136463403701782, + "learning_rate": 0.0001999688932059625, + "loss": 2.8083, + "step": 258 + }, + { + "epoch": 0.02090226777499798, + "grad_norm": 0.9785570502281189, + "learning_rate": 0.00019996849822374998, + "loss": 2.7984, + "step": 259 + }, + { + "epoch": 0.020982971511580985, + "grad_norm": 0.9549168348312378, + "learning_rate": 0.00019996810075006117, + "loss": 2.8048, + "step": 260 + }, + { + "epoch": 0.02106367524816399, + "grad_norm": 0.8923975825309753, + "learning_rate": 0.00019996770078490594, + "loss": 2.8559, + "step": 261 + }, + { + "epoch": 0.021144378984746994, + "grad_norm": 0.9516206383705139, + "learning_rate": 0.0001999672983282943, + "loss": 2.9171, + "step": 262 + }, + { + "epoch": 0.02122508272133, + "grad_norm": 0.9101666808128357, + "learning_rate": 0.0001999668933802363, + "loss": 2.8746, + "step": 263 + }, + { + "epoch": 0.021305786457913003, + "grad_norm": 0.9081267714500427, + "learning_rate": 0.00019996648594074195, + "loss": 2.8637, + "step": 264 + }, + { + "epoch": 0.021386490194496004, + "grad_norm": 1.0048178434371948, + "learning_rate": 0.0001999660760098215, + "loss": 2.8783, + "step": 265 + }, + { + "epoch": 0.021467193931079008, + "grad_norm": 0.9625924229621887, + "learning_rate": 0.0001999656635874851, + "loss": 2.8226, + "step": 266 + }, + { + "epoch": 0.021547897667662012, + "grad_norm": 0.9911805391311646, + "learning_rate": 0.00019996524867374306, + "loss": 2.8135, + "step": 267 + }, + { + "epoch": 0.021628601404245017, + "grad_norm": 0.8920134902000427, + "learning_rate": 0.00019996483126860572, + "loss": 2.7934, + "step": 268 + }, + { + "epoch": 0.02170930514082802, + "grad_norm": 1.0806514024734497, + "learning_rate": 0.00019996441137208346, + "loss": 2.8435, + "step": 269 + }, + { + "epoch": 0.021790008877411025, + "grad_norm": 0.9426547884941101, + "learning_rate": 0.00019996398898418675, + "loss": 2.7919, + "step": 270 + }, + { + "epoch": 0.021870712613994026, + "grad_norm": 0.9893020987510681, + "learning_rate": 0.00019996356410492615, + "loss": 2.8616, + "step": 271 + }, + { + "epoch": 0.02195141635057703, + "grad_norm": 1.0196046829223633, + "learning_rate": 0.00019996313673431218, + "loss": 2.8101, + "step": 272 + }, + { + "epoch": 0.022032120087160035, + "grad_norm": 0.9556699991226196, + "learning_rate": 0.00019996270687235558, + "loss": 2.8669, + "step": 273 + }, + { + "epoch": 0.02211282382374304, + "grad_norm": 0.8985902667045593, + "learning_rate": 0.00019996227451906702, + "loss": 2.8078, + "step": 274 + }, + { + "epoch": 0.022193527560326044, + "grad_norm": 1.0198246240615845, + "learning_rate": 0.00019996183967445726, + "loss": 2.8314, + "step": 275 + }, + { + "epoch": 0.022274231296909048, + "grad_norm": 0.9360179901123047, + "learning_rate": 0.00019996140233853715, + "loss": 2.7969, + "step": 276 + }, + { + "epoch": 0.022354935033492052, + "grad_norm": 1.0250160694122314, + "learning_rate": 0.00019996096251131759, + "loss": 2.7897, + "step": 277 + }, + { + "epoch": 0.022435638770075053, + "grad_norm": 0.934582531452179, + "learning_rate": 0.00019996052019280954, + "loss": 2.8667, + "step": 278 + }, + { + "epoch": 0.022516342506658057, + "grad_norm": 0.9394461512565613, + "learning_rate": 0.00019996007538302407, + "loss": 2.7681, + "step": 279 + }, + { + "epoch": 0.022597046243241062, + "grad_norm": 0.9468861222267151, + "learning_rate": 0.00019995962808197216, + "loss": 2.7709, + "step": 280 + }, + { + "epoch": 0.022677749979824066, + "grad_norm": 0.9798515439033508, + "learning_rate": 0.00019995917828966506, + "loss": 2.8274, + "step": 281 + }, + { + "epoch": 0.02275845371640707, + "grad_norm": 1.0403941869735718, + "learning_rate": 0.00019995872600611395, + "loss": 2.8897, + "step": 282 + }, + { + "epoch": 0.022839157452990075, + "grad_norm": 0.9795030951499939, + "learning_rate": 0.00019995827123133006, + "loss": 2.8792, + "step": 283 + }, + { + "epoch": 0.022919861189573076, + "grad_norm": 0.9162538647651672, + "learning_rate": 0.00019995781396532479, + "loss": 2.8339, + "step": 284 + }, + { + "epoch": 0.02300056492615608, + "grad_norm": 1.0864707231521606, + "learning_rate": 0.00019995735420810947, + "loss": 2.8599, + "step": 285 + }, + { + "epoch": 0.023081268662739084, + "grad_norm": 0.9181776642799377, + "learning_rate": 0.0001999568919596956, + "loss": 2.8736, + "step": 286 + }, + { + "epoch": 0.02316197239932209, + "grad_norm": 0.8880531191825867, + "learning_rate": 0.00019995642722009472, + "loss": 2.8215, + "step": 287 + }, + { + "epoch": 0.023242676135905093, + "grad_norm": 0.9287240505218506, + "learning_rate": 0.00019995595998931835, + "loss": 2.844, + "step": 288 + }, + { + "epoch": 0.023323379872488097, + "grad_norm": 0.886894941329956, + "learning_rate": 0.0001999554902673782, + "loss": 2.8319, + "step": 289 + }, + { + "epoch": 0.0234040836090711, + "grad_norm": 0.9564458131790161, + "learning_rate": 0.0001999550180542859, + "loss": 2.8126, + "step": 290 + }, + { + "epoch": 0.023484787345654103, + "grad_norm": 0.8745970726013184, + "learning_rate": 0.00019995454335005334, + "loss": 2.8344, + "step": 291 + }, + { + "epoch": 0.023565491082237107, + "grad_norm": 1.0343137979507446, + "learning_rate": 0.00019995406615469217, + "loss": 2.8498, + "step": 292 + }, + { + "epoch": 0.02364619481882011, + "grad_norm": 0.9951575994491577, + "learning_rate": 0.0001999535864682145, + "loss": 2.8655, + "step": 293 + }, + { + "epoch": 0.023726898555403116, + "grad_norm": 0.8457592725753784, + "learning_rate": 0.0001999531042906321, + "loss": 2.8189, + "step": 294 + }, + { + "epoch": 0.02380760229198612, + "grad_norm": 0.9126954674720764, + "learning_rate": 0.00019995261962195708, + "loss": 2.8272, + "step": 295 + }, + { + "epoch": 0.023888306028569124, + "grad_norm": 1.0171937942504883, + "learning_rate": 0.0001999521324622015, + "loss": 2.869, + "step": 296 + }, + { + "epoch": 0.023969009765152125, + "grad_norm": 0.9887226223945618, + "learning_rate": 0.00019995164281137753, + "loss": 2.7643, + "step": 297 + }, + { + "epoch": 0.02404971350173513, + "grad_norm": 1.4240798950195312, + "learning_rate": 0.00019995115066949733, + "loss": 2.8332, + "step": 298 + }, + { + "epoch": 0.024130417238318134, + "grad_norm": 0.9856921434402466, + "learning_rate": 0.00019995065603657316, + "loss": 2.8283, + "step": 299 + }, + { + "epoch": 0.024211120974901138, + "grad_norm": 0.997164785861969, + "learning_rate": 0.0001999501589126174, + "loss": 2.9164, + "step": 300 + }, + { + "epoch": 0.024291824711484142, + "grad_norm": 1.6480412483215332, + "learning_rate": 0.00019994965929764238, + "loss": 2.8941, + "step": 301 + }, + { + "epoch": 0.024372528448067147, + "grad_norm": 1.1590758562088013, + "learning_rate": 0.0001999491571916606, + "loss": 2.8127, + "step": 302 + }, + { + "epoch": 0.024453232184650148, + "grad_norm": 1.1228376626968384, + "learning_rate": 0.00019994865259468454, + "loss": 2.8439, + "step": 303 + }, + { + "epoch": 0.024533935921233152, + "grad_norm": 1.0426349639892578, + "learning_rate": 0.0001999481455067268, + "loss": 2.8671, + "step": 304 + }, + { + "epoch": 0.024614639657816156, + "grad_norm": 1.0911917686462402, + "learning_rate": 0.00019994763592779996, + "loss": 2.8297, + "step": 305 + }, + { + "epoch": 0.02469534339439916, + "grad_norm": 1.0493195056915283, + "learning_rate": 0.00019994712385791683, + "loss": 2.7996, + "step": 306 + }, + { + "epoch": 0.024776047130982165, + "grad_norm": 0.9275023341178894, + "learning_rate": 0.00019994660929709008, + "loss": 2.7949, + "step": 307 + }, + { + "epoch": 0.02485675086756517, + "grad_norm": 1.1074799299240112, + "learning_rate": 0.00019994609224533255, + "loss": 2.8364, + "step": 308 + }, + { + "epoch": 0.024937454604148174, + "grad_norm": 0.9189429879188538, + "learning_rate": 0.00019994557270265717, + "loss": 2.8293, + "step": 309 + }, + { + "epoch": 0.025018158340731175, + "grad_norm": 0.9577780961990356, + "learning_rate": 0.00019994505066907683, + "loss": 2.8295, + "step": 310 + }, + { + "epoch": 0.02509886207731418, + "grad_norm": 1.0707277059555054, + "learning_rate": 0.0001999445261446046, + "loss": 2.795, + "step": 311 + }, + { + "epoch": 0.025179565813897183, + "grad_norm": 0.9211257696151733, + "learning_rate": 0.0001999439991292535, + "loss": 2.8355, + "step": 312 + }, + { + "epoch": 0.025260269550480188, + "grad_norm": 0.987779438495636, + "learning_rate": 0.00019994346962303667, + "loss": 2.8175, + "step": 313 + }, + { + "epoch": 0.025340973287063192, + "grad_norm": 0.9317128658294678, + "learning_rate": 0.00019994293762596734, + "loss": 2.8205, + "step": 314 + }, + { + "epoch": 0.025421677023646196, + "grad_norm": 0.8989154100418091, + "learning_rate": 0.00019994240313805873, + "loss": 2.8257, + "step": 315 + }, + { + "epoch": 0.025502380760229197, + "grad_norm": 0.8391042351722717, + "learning_rate": 0.00019994186615932423, + "loss": 2.8105, + "step": 316 + }, + { + "epoch": 0.0255830844968122, + "grad_norm": 0.8908089995384216, + "learning_rate": 0.00019994132668977715, + "loss": 2.7894, + "step": 317 + }, + { + "epoch": 0.025663788233395206, + "grad_norm": 0.8666881322860718, + "learning_rate": 0.00019994078472943097, + "loss": 2.7934, + "step": 318 + }, + { + "epoch": 0.02574449196997821, + "grad_norm": 0.8834616541862488, + "learning_rate": 0.00019994024027829914, + "loss": 2.8166, + "step": 319 + }, + { + "epoch": 0.025825195706561214, + "grad_norm": 0.9831370115280151, + "learning_rate": 0.00019993969333639532, + "loss": 2.889, + "step": 320 + }, + { + "epoch": 0.02590589944314422, + "grad_norm": 0.9171644449234009, + "learning_rate": 0.00019993914390373308, + "loss": 2.8582, + "step": 321 + }, + { + "epoch": 0.02598660317972722, + "grad_norm": 0.9624861478805542, + "learning_rate": 0.00019993859198032615, + "loss": 2.8574, + "step": 322 + }, + { + "epoch": 0.026067306916310224, + "grad_norm": 0.8826586008071899, + "learning_rate": 0.00019993803756618826, + "loss": 2.8544, + "step": 323 + }, + { + "epoch": 0.02614801065289323, + "grad_norm": 0.9286447763442993, + "learning_rate": 0.0001999374806613332, + "loss": 2.7937, + "step": 324 + }, + { + "epoch": 0.026228714389476233, + "grad_norm": 0.9901685118675232, + "learning_rate": 0.00019993692126577493, + "loss": 2.7654, + "step": 325 + }, + { + "epoch": 0.026309418126059237, + "grad_norm": 0.9624341130256653, + "learning_rate": 0.00019993635937952734, + "loss": 2.8804, + "step": 326 + }, + { + "epoch": 0.02639012186264224, + "grad_norm": 0.8867596387863159, + "learning_rate": 0.0001999357950026044, + "loss": 2.8254, + "step": 327 + }, + { + "epoch": 0.026470825599225246, + "grad_norm": 0.9243817925453186, + "learning_rate": 0.00019993522813502022, + "loss": 2.8177, + "step": 328 + }, + { + "epoch": 0.026551529335808247, + "grad_norm": 0.9322247505187988, + "learning_rate": 0.00019993465877678895, + "loss": 2.9023, + "step": 329 + }, + { + "epoch": 0.02663223307239125, + "grad_norm": 0.8768174648284912, + "learning_rate": 0.00019993408692792474, + "loss": 2.8184, + "step": 330 + }, + { + "epoch": 0.026712936808974255, + "grad_norm": 0.9436870813369751, + "learning_rate": 0.00019993351258844184, + "loss": 2.8319, + "step": 331 + }, + { + "epoch": 0.02679364054555726, + "grad_norm": 0.9970327019691467, + "learning_rate": 0.0001999329357583546, + "loss": 2.7946, + "step": 332 + }, + { + "epoch": 0.026874344282140264, + "grad_norm": 0.9100088477134705, + "learning_rate": 0.00019993235643767736, + "loss": 2.782, + "step": 333 + }, + { + "epoch": 0.02695504801872327, + "grad_norm": 0.9693402051925659, + "learning_rate": 0.00019993177462642456, + "loss": 2.8182, + "step": 334 + }, + { + "epoch": 0.02703575175530627, + "grad_norm": 0.8761965036392212, + "learning_rate": 0.00019993119032461073, + "loss": 2.8058, + "step": 335 + }, + { + "epoch": 0.027116455491889273, + "grad_norm": 1.0699270963668823, + "learning_rate": 0.00019993060353225043, + "loss": 2.9211, + "step": 336 + }, + { + "epoch": 0.027197159228472278, + "grad_norm": 1.0094172954559326, + "learning_rate": 0.00019993001424935822, + "loss": 2.8837, + "step": 337 + }, + { + "epoch": 0.027277862965055282, + "grad_norm": 0.9683573842048645, + "learning_rate": 0.00019992942247594887, + "loss": 2.8523, + "step": 338 + }, + { + "epoch": 0.027358566701638286, + "grad_norm": 1.3243813514709473, + "learning_rate": 0.00019992882821203708, + "loss": 2.7891, + "step": 339 + }, + { + "epoch": 0.02743927043822129, + "grad_norm": 1.0227056741714478, + "learning_rate": 0.0001999282314576377, + "loss": 2.8396, + "step": 340 + }, + { + "epoch": 0.027519974174804295, + "grad_norm": 1.03257417678833, + "learning_rate": 0.00019992763221276556, + "loss": 2.824, + "step": 341 + }, + { + "epoch": 0.027600677911387296, + "grad_norm": 0.86456698179245, + "learning_rate": 0.00019992703047743562, + "loss": 2.8006, + "step": 342 + }, + { + "epoch": 0.0276813816479703, + "grad_norm": 0.965339720249176, + "learning_rate": 0.00019992642625166286, + "loss": 2.8658, + "step": 343 + }, + { + "epoch": 0.027762085384553305, + "grad_norm": 1.0028942823410034, + "learning_rate": 0.00019992581953546236, + "loss": 2.8311, + "step": 344 + }, + { + "epoch": 0.02784278912113631, + "grad_norm": 0.984307050704956, + "learning_rate": 0.0001999252103288492, + "loss": 2.8748, + "step": 345 + }, + { + "epoch": 0.027923492857719313, + "grad_norm": 0.9405032396316528, + "learning_rate": 0.00019992459863183858, + "loss": 2.8371, + "step": 346 + }, + { + "epoch": 0.028004196594302318, + "grad_norm": 0.9867002367973328, + "learning_rate": 0.0001999239844444458, + "loss": 2.7914, + "step": 347 + }, + { + "epoch": 0.02808490033088532, + "grad_norm": 0.9224951267242432, + "learning_rate": 0.00019992336776668613, + "loss": 2.7986, + "step": 348 + }, + { + "epoch": 0.028165604067468323, + "grad_norm": 1.002838134765625, + "learning_rate": 0.0001999227485985749, + "loss": 2.8207, + "step": 349 + }, + { + "epoch": 0.028246307804051327, + "grad_norm": 0.8922045826911926, + "learning_rate": 0.00019992212694012757, + "loss": 2.8264, + "step": 350 + }, + { + "epoch": 0.02832701154063433, + "grad_norm": 1.0860323905944824, + "learning_rate": 0.00019992150279135964, + "loss": 2.8778, + "step": 351 + }, + { + "epoch": 0.028407715277217336, + "grad_norm": 1.0995604991912842, + "learning_rate": 0.0001999208761522867, + "loss": 2.8599, + "step": 352 + }, + { + "epoch": 0.02848841901380034, + "grad_norm": 0.8741658926010132, + "learning_rate": 0.0001999202470229243, + "loss": 2.7757, + "step": 353 + }, + { + "epoch": 0.02856912275038334, + "grad_norm": 0.9142587184906006, + "learning_rate": 0.00019991961540328815, + "loss": 2.8235, + "step": 354 + }, + { + "epoch": 0.028649826486966345, + "grad_norm": 1.0000953674316406, + "learning_rate": 0.000199918981293394, + "loss": 2.8, + "step": 355 + }, + { + "epoch": 0.02873053022354935, + "grad_norm": 0.9416046738624573, + "learning_rate": 0.00019991834469325763, + "loss": 2.7941, + "step": 356 + }, + { + "epoch": 0.028811233960132354, + "grad_norm": 0.9135935306549072, + "learning_rate": 0.00019991770560289496, + "loss": 2.8315, + "step": 357 + }, + { + "epoch": 0.02889193769671536, + "grad_norm": 0.8867244124412537, + "learning_rate": 0.00019991706402232184, + "loss": 2.8649, + "step": 358 + }, + { + "epoch": 0.028972641433298363, + "grad_norm": 0.9360243678092957, + "learning_rate": 0.00019991641995155431, + "loss": 2.7556, + "step": 359 + }, + { + "epoch": 0.029053345169881367, + "grad_norm": 0.8903766870498657, + "learning_rate": 0.00019991577339060842, + "loss": 2.8379, + "step": 360 + }, + { + "epoch": 0.029134048906464368, + "grad_norm": 1.0178784132003784, + "learning_rate": 0.00019991512433950023, + "loss": 2.8045, + "step": 361 + }, + { + "epoch": 0.029214752643047372, + "grad_norm": 0.9318631887435913, + "learning_rate": 0.000199914472798246, + "loss": 2.823, + "step": 362 + }, + { + "epoch": 0.029295456379630377, + "grad_norm": 0.9384647011756897, + "learning_rate": 0.00019991381876686195, + "loss": 2.9379, + "step": 363 + }, + { + "epoch": 0.02937616011621338, + "grad_norm": 0.9318633675575256, + "learning_rate": 0.00019991316224536433, + "loss": 2.8222, + "step": 364 + }, + { + "epoch": 0.029456863852796385, + "grad_norm": 0.8653938174247742, + "learning_rate": 0.00019991250323376952, + "loss": 2.8447, + "step": 365 + }, + { + "epoch": 0.02953756758937939, + "grad_norm": 0.8997991681098938, + "learning_rate": 0.00019991184173209398, + "loss": 2.8523, + "step": 366 + }, + { + "epoch": 0.02961827132596239, + "grad_norm": 0.8587092161178589, + "learning_rate": 0.00019991117774035416, + "loss": 2.8141, + "step": 367 + }, + { + "epoch": 0.029698975062545395, + "grad_norm": 0.8740741014480591, + "learning_rate": 0.00019991051125856663, + "loss": 2.7487, + "step": 368 + }, + { + "epoch": 0.0297796787991284, + "grad_norm": 0.9099416732788086, + "learning_rate": 0.00019990984228674798, + "loss": 2.834, + "step": 369 + }, + { + "epoch": 0.029860382535711404, + "grad_norm": 0.8675365447998047, + "learning_rate": 0.0001999091708249149, + "loss": 2.8259, + "step": 370 + }, + { + "epoch": 0.029941086272294408, + "grad_norm": 1.0141092538833618, + "learning_rate": 0.00019990849687308412, + "loss": 2.8369, + "step": 371 + }, + { + "epoch": 0.030021790008877412, + "grad_norm": 0.849155604839325, + "learning_rate": 0.00019990782043127243, + "loss": 2.7505, + "step": 372 + }, + { + "epoch": 0.030102493745460413, + "grad_norm": 1.073754072189331, + "learning_rate": 0.0001999071414994967, + "loss": 2.8939, + "step": 373 + }, + { + "epoch": 0.030183197482043417, + "grad_norm": 0.8615279197692871, + "learning_rate": 0.00019990646007777383, + "loss": 2.7662, + "step": 374 + }, + { + "epoch": 0.030263901218626422, + "grad_norm": 0.8803398609161377, + "learning_rate": 0.0001999057761661208, + "loss": 2.7992, + "step": 375 + }, + { + "epoch": 0.030344604955209426, + "grad_norm": 0.8901834487915039, + "learning_rate": 0.00019990508976455473, + "loss": 2.8222, + "step": 376 + }, + { + "epoch": 0.03042530869179243, + "grad_norm": 0.9443284869194031, + "learning_rate": 0.00019990440087309263, + "loss": 2.8326, + "step": 377 + }, + { + "epoch": 0.030506012428375435, + "grad_norm": 0.9122868180274963, + "learning_rate": 0.0001999037094917517, + "loss": 2.7653, + "step": 378 + }, + { + "epoch": 0.03058671616495844, + "grad_norm": 0.8764635920524597, + "learning_rate": 0.0001999030156205492, + "loss": 2.7813, + "step": 379 + }, + { + "epoch": 0.03066741990154144, + "grad_norm": 0.8466865420341492, + "learning_rate": 0.0001999023192595024, + "loss": 2.8338, + "step": 380 + }, + { + "epoch": 0.030748123638124444, + "grad_norm": 0.8833961486816406, + "learning_rate": 0.00019990162040862863, + "loss": 2.78, + "step": 381 + }, + { + "epoch": 0.03082882737470745, + "grad_norm": 1.0298357009887695, + "learning_rate": 0.00019990091906794537, + "loss": 2.8059, + "step": 382 + }, + { + "epoch": 0.030909531111290453, + "grad_norm": 0.8651318550109863, + "learning_rate": 0.00019990021523747005, + "loss": 2.8608, + "step": 383 + }, + { + "epoch": 0.030990234847873457, + "grad_norm": 1.0262864828109741, + "learning_rate": 0.0001998995089172202, + "loss": 2.8226, + "step": 384 + }, + { + "epoch": 0.03107093858445646, + "grad_norm": 0.9266276955604553, + "learning_rate": 0.00019989880010721348, + "loss": 2.9414, + "step": 385 + }, + { + "epoch": 0.031151642321039463, + "grad_norm": 0.8762117028236389, + "learning_rate": 0.00019989808880746749, + "loss": 2.8023, + "step": 386 + }, + { + "epoch": 0.031232346057622467, + "grad_norm": 0.8531816601753235, + "learning_rate": 0.00019989737501800004, + "loss": 2.777, + "step": 387 + }, + { + "epoch": 0.031313049794205475, + "grad_norm": 0.8999545574188232, + "learning_rate": 0.0001998966587388288, + "loss": 2.8656, + "step": 388 + }, + { + "epoch": 0.03139375353078847, + "grad_norm": 0.932248055934906, + "learning_rate": 0.00019989593996997177, + "loss": 2.8212, + "step": 389 + }, + { + "epoch": 0.031474457267371476, + "grad_norm": 0.9059134125709534, + "learning_rate": 0.00019989521871144672, + "loss": 2.7945, + "step": 390 + }, + { + "epoch": 0.03155516100395448, + "grad_norm": 0.9323028922080994, + "learning_rate": 0.00019989449496327172, + "loss": 2.8338, + "step": 391 + }, + { + "epoch": 0.031635864740537485, + "grad_norm": 0.9141251444816589, + "learning_rate": 0.0001998937687254648, + "loss": 2.7935, + "step": 392 + }, + { + "epoch": 0.03171656847712049, + "grad_norm": 1.0026880502700806, + "learning_rate": 0.000199893039998044, + "loss": 2.8811, + "step": 393 + }, + { + "epoch": 0.031797272213703494, + "grad_norm": 1.0178622007369995, + "learning_rate": 0.00019989230878102756, + "loss": 2.9003, + "step": 394 + }, + { + "epoch": 0.0318779759502865, + "grad_norm": 0.9111912846565247, + "learning_rate": 0.00019989157507443363, + "loss": 2.8399, + "step": 395 + }, + { + "epoch": 0.0319586796868695, + "grad_norm": 1.054563283920288, + "learning_rate": 0.00019989083887828052, + "loss": 2.9088, + "step": 396 + }, + { + "epoch": 0.03203938342345251, + "grad_norm": 0.9459816217422485, + "learning_rate": 0.00019989010019258663, + "loss": 2.805, + "step": 397 + }, + { + "epoch": 0.03212008716003551, + "grad_norm": 1.0139873027801514, + "learning_rate": 0.00019988935901737033, + "loss": 2.8452, + "step": 398 + }, + { + "epoch": 0.032200790896618516, + "grad_norm": 0.986325204372406, + "learning_rate": 0.00019988861535265006, + "loss": 2.8311, + "step": 399 + }, + { + "epoch": 0.03228149463320152, + "grad_norm": 0.9565223455429077, + "learning_rate": 0.00019988786919844436, + "loss": 2.7766, + "step": 400 + }, + { + "epoch": 0.032362198369784524, + "grad_norm": 0.8901559710502625, + "learning_rate": 0.0001998871205547719, + "loss": 2.7966, + "step": 401 + }, + { + "epoch": 0.03244290210636752, + "grad_norm": 1.0959528684616089, + "learning_rate": 0.00019988636942165123, + "loss": 2.8377, + "step": 402 + }, + { + "epoch": 0.032523605842950526, + "grad_norm": 1.0768988132476807, + "learning_rate": 0.00019988561579910118, + "loss": 2.8267, + "step": 403 + }, + { + "epoch": 0.03260430957953353, + "grad_norm": 0.9563855528831482, + "learning_rate": 0.00019988485968714048, + "loss": 2.8459, + "step": 404 + }, + { + "epoch": 0.032685013316116535, + "grad_norm": 0.930927038192749, + "learning_rate": 0.00019988410108578796, + "loss": 2.8053, + "step": 405 + }, + { + "epoch": 0.03276571705269954, + "grad_norm": 1.0658363103866577, + "learning_rate": 0.00019988333999506255, + "loss": 2.8512, + "step": 406 + }, + { + "epoch": 0.03284642078928254, + "grad_norm": 0.9258090257644653, + "learning_rate": 0.0001998825764149832, + "loss": 2.8541, + "step": 407 + }, + { + "epoch": 0.03292712452586555, + "grad_norm": 1.18158757686615, + "learning_rate": 0.00019988181034556895, + "loss": 2.8838, + "step": 408 + }, + { + "epoch": 0.03300782826244855, + "grad_norm": 0.9506754875183105, + "learning_rate": 0.00019988104178683891, + "loss": 2.7733, + "step": 409 + }, + { + "epoch": 0.033088531999031556, + "grad_norm": 0.9559460282325745, + "learning_rate": 0.0001998802707388122, + "loss": 2.9259, + "step": 410 + }, + { + "epoch": 0.03316923573561456, + "grad_norm": 0.9322298765182495, + "learning_rate": 0.00019987949720150808, + "loss": 2.8318, + "step": 411 + }, + { + "epoch": 0.033249939472197565, + "grad_norm": 0.9226691722869873, + "learning_rate": 0.00019987872117494576, + "loss": 2.9063, + "step": 412 + }, + { + "epoch": 0.03333064320878057, + "grad_norm": 1.0543674230575562, + "learning_rate": 0.00019987794265914464, + "loss": 2.7877, + "step": 413 + }, + { + "epoch": 0.033411346945363574, + "grad_norm": 0.989986002445221, + "learning_rate": 0.00019987716165412408, + "loss": 2.8354, + "step": 414 + }, + { + "epoch": 0.03349205068194657, + "grad_norm": 0.8703451752662659, + "learning_rate": 0.0001998763781599036, + "loss": 2.8127, + "step": 415 + }, + { + "epoch": 0.033572754418529575, + "grad_norm": 0.974943220615387, + "learning_rate": 0.0001998755921765027, + "loss": 2.9272, + "step": 416 + }, + { + "epoch": 0.03365345815511258, + "grad_norm": 0.8714169859886169, + "learning_rate": 0.000199874803703941, + "loss": 2.8027, + "step": 417 + }, + { + "epoch": 0.033734161891695584, + "grad_norm": 0.9251161217689514, + "learning_rate": 0.00019987401274223804, + "loss": 2.8186, + "step": 418 + }, + { + "epoch": 0.03381486562827859, + "grad_norm": 0.9657236933708191, + "learning_rate": 0.00019987321929141366, + "loss": 2.8297, + "step": 419 + }, + { + "epoch": 0.03389556936486159, + "grad_norm": 0.9022002816200256, + "learning_rate": 0.00019987242335148757, + "loss": 2.881, + "step": 420 + }, + { + "epoch": 0.0339762731014446, + "grad_norm": 0.9479621052742004, + "learning_rate": 0.0001998716249224796, + "loss": 2.8288, + "step": 421 + }, + { + "epoch": 0.0340569768380276, + "grad_norm": 0.9458955526351929, + "learning_rate": 0.00019987082400440968, + "loss": 2.8861, + "step": 422 + }, + { + "epoch": 0.034137680574610606, + "grad_norm": 0.9444572329521179, + "learning_rate": 0.0001998700205972978, + "loss": 2.8877, + "step": 423 + }, + { + "epoch": 0.03421838431119361, + "grad_norm": 0.9263925552368164, + "learning_rate": 0.00019986921470116392, + "loss": 2.8028, + "step": 424 + }, + { + "epoch": 0.034299088047776614, + "grad_norm": 1.0690566301345825, + "learning_rate": 0.00019986840631602812, + "loss": 2.882, + "step": 425 + }, + { + "epoch": 0.03437979178435962, + "grad_norm": 0.8999007940292358, + "learning_rate": 0.0001998675954419106, + "loss": 2.8179, + "step": 426 + }, + { + "epoch": 0.03446049552094262, + "grad_norm": 0.894395112991333, + "learning_rate": 0.00019986678207883153, + "loss": 2.814, + "step": 427 + }, + { + "epoch": 0.03454119925752562, + "grad_norm": 0.8621550798416138, + "learning_rate": 0.00019986596622681123, + "loss": 2.7584, + "step": 428 + }, + { + "epoch": 0.034621902994108625, + "grad_norm": 0.9452527165412903, + "learning_rate": 0.00019986514788587, + "loss": 2.8949, + "step": 429 + }, + { + "epoch": 0.03470260673069163, + "grad_norm": 0.8973272442817688, + "learning_rate": 0.0001998643270560282, + "loss": 2.868, + "step": 430 + }, + { + "epoch": 0.034783310467274633, + "grad_norm": 0.9887418150901794, + "learning_rate": 0.00019986350373730634, + "loss": 2.8009, + "step": 431 + }, + { + "epoch": 0.03486401420385764, + "grad_norm": 0.9449994564056396, + "learning_rate": 0.0001998626779297249, + "loss": 2.8305, + "step": 432 + }, + { + "epoch": 0.03494471794044064, + "grad_norm": 1.052871823310852, + "learning_rate": 0.0001998618496333045, + "loss": 2.8136, + "step": 433 + }, + { + "epoch": 0.035025421677023647, + "grad_norm": 0.9600724577903748, + "learning_rate": 0.00019986101884806576, + "loss": 2.7857, + "step": 434 + }, + { + "epoch": 0.03510612541360665, + "grad_norm": 0.874043345451355, + "learning_rate": 0.00019986018557402942, + "loss": 2.8524, + "step": 435 + }, + { + "epoch": 0.035186829150189655, + "grad_norm": 0.9810616374015808, + "learning_rate": 0.0001998593498112162, + "loss": 2.7506, + "step": 436 + }, + { + "epoch": 0.03526753288677266, + "grad_norm": 0.9163016080856323, + "learning_rate": 0.00019985851155964693, + "loss": 2.798, + "step": 437 + }, + { + "epoch": 0.035348236623355664, + "grad_norm": 1.0688380002975464, + "learning_rate": 0.00019985767081934252, + "loss": 2.8916, + "step": 438 + }, + { + "epoch": 0.03542894035993867, + "grad_norm": 0.925020158290863, + "learning_rate": 0.00019985682759032393, + "loss": 2.8017, + "step": 439 + }, + { + "epoch": 0.035509644096521666, + "grad_norm": 0.9429430961608887, + "learning_rate": 0.0001998559818726122, + "loss": 2.837, + "step": 440 + }, + { + "epoch": 0.03559034783310467, + "grad_norm": 0.9135627150535583, + "learning_rate": 0.00019985513366622832, + "loss": 2.8423, + "step": 441 + }, + { + "epoch": 0.035671051569687674, + "grad_norm": 0.9218924045562744, + "learning_rate": 0.00019985428297119353, + "loss": 2.854, + "step": 442 + }, + { + "epoch": 0.03575175530627068, + "grad_norm": 0.9307878613471985, + "learning_rate": 0.00019985342978752897, + "loss": 2.8591, + "step": 443 + }, + { + "epoch": 0.03583245904285368, + "grad_norm": 0.935394287109375, + "learning_rate": 0.00019985257411525592, + "loss": 2.8388, + "step": 444 + }, + { + "epoch": 0.03591316277943669, + "grad_norm": 0.890959620475769, + "learning_rate": 0.0001998517159543957, + "loss": 2.78, + "step": 445 + }, + { + "epoch": 0.03599386651601969, + "grad_norm": 1.110924482345581, + "learning_rate": 0.0001998508553049697, + "loss": 2.8117, + "step": 446 + }, + { + "epoch": 0.036074570252602696, + "grad_norm": 0.8774176239967346, + "learning_rate": 0.0001998499921669994, + "loss": 2.8368, + "step": 447 + }, + { + "epoch": 0.0361552739891857, + "grad_norm": 0.9766948819160461, + "learning_rate": 0.00019984912654050625, + "loss": 2.764, + "step": 448 + }, + { + "epoch": 0.036235977725768705, + "grad_norm": 1.1439398527145386, + "learning_rate": 0.00019984825842551187, + "loss": 2.84, + "step": 449 + }, + { + "epoch": 0.03631668146235171, + "grad_norm": 0.8995118737220764, + "learning_rate": 0.0001998473878220379, + "loss": 2.834, + "step": 450 + }, + { + "epoch": 0.03639738519893471, + "grad_norm": 0.9810060858726501, + "learning_rate": 0.000199846514730106, + "loss": 2.9338, + "step": 451 + }, + { + "epoch": 0.03647808893551772, + "grad_norm": 1.0862053632736206, + "learning_rate": 0.00019984563914973795, + "loss": 2.837, + "step": 452 + }, + { + "epoch": 0.036558792672100715, + "grad_norm": 0.9456702470779419, + "learning_rate": 0.0001998447610809556, + "loss": 2.7664, + "step": 453 + }, + { + "epoch": 0.03663949640868372, + "grad_norm": 1.0714432001113892, + "learning_rate": 0.0001998438805237808, + "loss": 2.8339, + "step": 454 + }, + { + "epoch": 0.036720200145266724, + "grad_norm": 0.89134281873703, + "learning_rate": 0.00019984299747823547, + "loss": 2.7818, + "step": 455 + }, + { + "epoch": 0.03680090388184973, + "grad_norm": 0.869742214679718, + "learning_rate": 0.0001998421119443417, + "loss": 2.7916, + "step": 456 + }, + { + "epoch": 0.03688160761843273, + "grad_norm": 0.9307265281677246, + "learning_rate": 0.00019984122392212149, + "loss": 2.8485, + "step": 457 + }, + { + "epoch": 0.03696231135501574, + "grad_norm": 0.900215744972229, + "learning_rate": 0.00019984033341159698, + "loss": 2.8536, + "step": 458 + }, + { + "epoch": 0.03704301509159874, + "grad_norm": 0.8679699897766113, + "learning_rate": 0.00019983944041279038, + "loss": 2.8344, + "step": 459 + }, + { + "epoch": 0.037123718828181745, + "grad_norm": 0.9540488719940186, + "learning_rate": 0.00019983854492572394, + "loss": 2.873, + "step": 460 + }, + { + "epoch": 0.03720442256476475, + "grad_norm": 0.8697962760925293, + "learning_rate": 0.00019983764695042, + "loss": 2.8122, + "step": 461 + }, + { + "epoch": 0.037285126301347754, + "grad_norm": 0.9534483551979065, + "learning_rate": 0.0001998367464869009, + "loss": 2.8842, + "step": 462 + }, + { + "epoch": 0.03736583003793076, + "grad_norm": 0.8402275443077087, + "learning_rate": 0.00019983584353518911, + "loss": 2.8135, + "step": 463 + }, + { + "epoch": 0.03744653377451376, + "grad_norm": 0.8226146697998047, + "learning_rate": 0.0001998349380953071, + "loss": 2.8036, + "step": 464 + }, + { + "epoch": 0.03752723751109677, + "grad_norm": 0.9292199611663818, + "learning_rate": 0.0001998340301672775, + "loss": 2.7887, + "step": 465 + }, + { + "epoch": 0.037607941247679764, + "grad_norm": 0.9035555124282837, + "learning_rate": 0.0001998331197511229, + "loss": 2.7851, + "step": 466 + }, + { + "epoch": 0.03768864498426277, + "grad_norm": 0.9411706328392029, + "learning_rate": 0.00019983220684686596, + "loss": 2.7782, + "step": 467 + }, + { + "epoch": 0.03776934872084577, + "grad_norm": 0.9867696166038513, + "learning_rate": 0.0001998312914545295, + "loss": 2.8125, + "step": 468 + }, + { + "epoch": 0.03785005245742878, + "grad_norm": 0.9683675169944763, + "learning_rate": 0.00019983037357413624, + "loss": 2.8325, + "step": 469 + }, + { + "epoch": 0.03793075619401178, + "grad_norm": 0.963941752910614, + "learning_rate": 0.00019982945320570913, + "loss": 2.8281, + "step": 470 + }, + { + "epoch": 0.038011459930594786, + "grad_norm": 0.9812459349632263, + "learning_rate": 0.0001998285303492711, + "loss": 2.765, + "step": 471 + }, + { + "epoch": 0.03809216366717779, + "grad_norm": 0.9681405425071716, + "learning_rate": 0.00019982760500484516, + "loss": 2.8882, + "step": 472 + }, + { + "epoch": 0.038172867403760795, + "grad_norm": 0.8983948826789856, + "learning_rate": 0.00019982667717245432, + "loss": 2.8182, + "step": 473 + }, + { + "epoch": 0.0382535711403438, + "grad_norm": 0.9875261783599854, + "learning_rate": 0.00019982574685212178, + "loss": 2.8072, + "step": 474 + }, + { + "epoch": 0.038334274876926804, + "grad_norm": 0.8889442086219788, + "learning_rate": 0.00019982481404387064, + "loss": 2.8635, + "step": 475 + }, + { + "epoch": 0.03841497861350981, + "grad_norm": 0.8904242515563965, + "learning_rate": 0.00019982387874772418, + "loss": 2.829, + "step": 476 + }, + { + "epoch": 0.03849568235009281, + "grad_norm": 1.0182000398635864, + "learning_rate": 0.00019982294096370574, + "loss": 2.8552, + "step": 477 + }, + { + "epoch": 0.03857638608667582, + "grad_norm": 0.9867151975631714, + "learning_rate": 0.00019982200069183867, + "loss": 2.8201, + "step": 478 + }, + { + "epoch": 0.038657089823258814, + "grad_norm": 0.9785345196723938, + "learning_rate": 0.0001998210579321464, + "loss": 2.8652, + "step": 479 + }, + { + "epoch": 0.03873779355984182, + "grad_norm": 0.9696915149688721, + "learning_rate": 0.00019982011268465243, + "loss": 2.8276, + "step": 480 + }, + { + "epoch": 0.03881849729642482, + "grad_norm": 0.9257470965385437, + "learning_rate": 0.00019981916494938033, + "loss": 2.8321, + "step": 481 + }, + { + "epoch": 0.03889920103300783, + "grad_norm": 0.9394895434379578, + "learning_rate": 0.00019981821472635369, + "loss": 2.8747, + "step": 482 + }, + { + "epoch": 0.03897990476959083, + "grad_norm": 0.9888504147529602, + "learning_rate": 0.00019981726201559626, + "loss": 2.8201, + "step": 483 + }, + { + "epoch": 0.039060608506173836, + "grad_norm": 0.8957003951072693, + "learning_rate": 0.0001998163068171317, + "loss": 2.8255, + "step": 484 + }, + { + "epoch": 0.03914131224275684, + "grad_norm": 0.9792008996009827, + "learning_rate": 0.00019981534913098383, + "loss": 2.7985, + "step": 485 + }, + { + "epoch": 0.039222015979339844, + "grad_norm": 0.8689060211181641, + "learning_rate": 0.00019981438895717656, + "loss": 2.7945, + "step": 486 + }, + { + "epoch": 0.03930271971592285, + "grad_norm": 0.9932593703269958, + "learning_rate": 0.0001998134262957338, + "loss": 2.9041, + "step": 487 + }, + { + "epoch": 0.03938342345250585, + "grad_norm": 0.8496069312095642, + "learning_rate": 0.00019981246114667955, + "loss": 2.8433, + "step": 488 + }, + { + "epoch": 0.03946412718908886, + "grad_norm": 0.8484126925468445, + "learning_rate": 0.00019981149351003786, + "loss": 2.7872, + "step": 489 + }, + { + "epoch": 0.03954483092567186, + "grad_norm": 0.9208858013153076, + "learning_rate": 0.00019981052338583283, + "loss": 2.7776, + "step": 490 + }, + { + "epoch": 0.03962553466225486, + "grad_norm": 0.9305418729782104, + "learning_rate": 0.00019980955077408865, + "loss": 2.7851, + "step": 491 + }, + { + "epoch": 0.03970623839883786, + "grad_norm": 0.9803212881088257, + "learning_rate": 0.00019980857567482955, + "loss": 2.8469, + "step": 492 + }, + { + "epoch": 0.03978694213542087, + "grad_norm": 0.9165790677070618, + "learning_rate": 0.00019980759808807985, + "loss": 2.8513, + "step": 493 + }, + { + "epoch": 0.03986764587200387, + "grad_norm": 0.9153794050216675, + "learning_rate": 0.00019980661801386393, + "loss": 2.8322, + "step": 494 + }, + { + "epoch": 0.039948349608586876, + "grad_norm": 0.89347904920578, + "learning_rate": 0.00019980563545220616, + "loss": 2.8316, + "step": 495 + }, + { + "epoch": 0.04002905334516988, + "grad_norm": 0.9882236123085022, + "learning_rate": 0.00019980465040313105, + "loss": 2.7471, + "step": 496 + }, + { + "epoch": 0.040109757081752885, + "grad_norm": 0.9391099810600281, + "learning_rate": 0.00019980366286666322, + "loss": 2.8182, + "step": 497 + }, + { + "epoch": 0.04019046081833589, + "grad_norm": 1.0155293941497803, + "learning_rate": 0.00019980267284282717, + "loss": 2.8721, + "step": 498 + }, + { + "epoch": 0.040271164554918894, + "grad_norm": 0.9952930212020874, + "learning_rate": 0.00019980168033164765, + "loss": 2.8538, + "step": 499 + }, + { + "epoch": 0.0403518682915019, + "grad_norm": 0.8385666608810425, + "learning_rate": 0.00019980068533314934, + "loss": 2.8242, + "step": 500 + }, + { + "epoch": 0.0404325720280849, + "grad_norm": 0.8747559785842896, + "learning_rate": 0.0001997996878473571, + "loss": 2.7908, + "step": 501 + }, + { + "epoch": 0.04051327576466791, + "grad_norm": 0.9267926216125488, + "learning_rate": 0.00019979868787429575, + "loss": 2.8359, + "step": 502 + }, + { + "epoch": 0.04059397950125091, + "grad_norm": 0.8194155693054199, + "learning_rate": 0.00019979768541399022, + "loss": 2.8161, + "step": 503 + }, + { + "epoch": 0.04067468323783391, + "grad_norm": 0.8923258185386658, + "learning_rate": 0.00019979668046646548, + "loss": 2.7547, + "step": 504 + }, + { + "epoch": 0.04075538697441691, + "grad_norm": 0.8965646028518677, + "learning_rate": 0.00019979567303174663, + "loss": 2.8432, + "step": 505 + }, + { + "epoch": 0.04083609071099992, + "grad_norm": 0.814481794834137, + "learning_rate": 0.0001997946631098587, + "loss": 2.8327, + "step": 506 + }, + { + "epoch": 0.04091679444758292, + "grad_norm": 0.8806928396224976, + "learning_rate": 0.00019979365070082694, + "loss": 2.8573, + "step": 507 + }, + { + "epoch": 0.040997498184165926, + "grad_norm": 0.8546919822692871, + "learning_rate": 0.00019979263580467653, + "loss": 2.8618, + "step": 508 + }, + { + "epoch": 0.04107820192074893, + "grad_norm": 0.8557277321815491, + "learning_rate": 0.00019979161842143274, + "loss": 2.8454, + "step": 509 + }, + { + "epoch": 0.041158905657331935, + "grad_norm": 0.9153180122375488, + "learning_rate": 0.00019979059855112098, + "loss": 2.8027, + "step": 510 + }, + { + "epoch": 0.04123960939391494, + "grad_norm": 0.8616741895675659, + "learning_rate": 0.00019978957619376666, + "loss": 2.7628, + "step": 511 + }, + { + "epoch": 0.04132031313049794, + "grad_norm": 0.8777137398719788, + "learning_rate": 0.00019978855134939524, + "loss": 2.8443, + "step": 512 + }, + { + "epoch": 0.04140101686708095, + "grad_norm": 0.852100133895874, + "learning_rate": 0.0001997875240180323, + "loss": 2.8125, + "step": 513 + }, + { + "epoch": 0.04148172060366395, + "grad_norm": 0.8470742702484131, + "learning_rate": 0.00019978649419970338, + "loss": 2.8139, + "step": 514 + }, + { + "epoch": 0.041562424340246956, + "grad_norm": 0.8890305161476135, + "learning_rate": 0.0001997854618944342, + "loss": 2.8633, + "step": 515 + }, + { + "epoch": 0.04164312807682996, + "grad_norm": 0.8893599510192871, + "learning_rate": 0.00019978442710225043, + "loss": 2.8066, + "step": 516 + }, + { + "epoch": 0.04172383181341296, + "grad_norm": 0.9093891382217407, + "learning_rate": 0.00019978338982317792, + "loss": 2.8026, + "step": 517 + }, + { + "epoch": 0.04180453554999596, + "grad_norm": 0.9775434136390686, + "learning_rate": 0.00019978235005724252, + "loss": 2.849, + "step": 518 + }, + { + "epoch": 0.04188523928657897, + "grad_norm": 1.0014091730117798, + "learning_rate": 0.00019978130780447012, + "loss": 2.8572, + "step": 519 + }, + { + "epoch": 0.04196594302316197, + "grad_norm": 0.8487632870674133, + "learning_rate": 0.00019978026306488668, + "loss": 2.7611, + "step": 520 + }, + { + "epoch": 0.042046646759744975, + "grad_norm": 0.86592698097229, + "learning_rate": 0.00019977921583851825, + "loss": 2.7616, + "step": 521 + }, + { + "epoch": 0.04212735049632798, + "grad_norm": 1.0285916328430176, + "learning_rate": 0.00019977816612539093, + "loss": 2.8049, + "step": 522 + }, + { + "epoch": 0.042208054232910984, + "grad_norm": 0.9716495871543884, + "learning_rate": 0.00019977711392553092, + "loss": 2.8459, + "step": 523 + }, + { + "epoch": 0.04228875796949399, + "grad_norm": 0.8842264413833618, + "learning_rate": 0.0001997760592389644, + "loss": 2.7934, + "step": 524 + }, + { + "epoch": 0.04236946170607699, + "grad_norm": 0.8839964866638184, + "learning_rate": 0.00019977500206571765, + "loss": 2.8135, + "step": 525 + }, + { + "epoch": 0.04245016544266, + "grad_norm": 0.870331346988678, + "learning_rate": 0.00019977394240581705, + "loss": 2.8684, + "step": 526 + }, + { + "epoch": 0.042530869179243, + "grad_norm": 0.8844720125198364, + "learning_rate": 0.000199772880259289, + "loss": 2.7867, + "step": 527 + }, + { + "epoch": 0.042611572915826006, + "grad_norm": 0.9353455901145935, + "learning_rate": 0.00019977181562615994, + "loss": 2.8051, + "step": 528 + }, + { + "epoch": 0.04269227665240901, + "grad_norm": 0.9530816078186035, + "learning_rate": 0.00019977074850645646, + "loss": 2.7915, + "step": 529 + }, + { + "epoch": 0.04277298038899201, + "grad_norm": 0.8984190821647644, + "learning_rate": 0.00019976967890020507, + "loss": 2.7957, + "step": 530 + }, + { + "epoch": 0.04285368412557501, + "grad_norm": 0.9146613478660583, + "learning_rate": 0.00019976860680743252, + "loss": 2.9053, + "step": 531 + }, + { + "epoch": 0.042934387862158016, + "grad_norm": 0.9228026866912842, + "learning_rate": 0.0001997675322281655, + "loss": 2.8578, + "step": 532 + }, + { + "epoch": 0.04301509159874102, + "grad_norm": 0.8266343474388123, + "learning_rate": 0.0001997664551624308, + "loss": 2.7393, + "step": 533 + }, + { + "epoch": 0.043095795335324025, + "grad_norm": 0.9197628498077393, + "learning_rate": 0.0001997653756102552, + "loss": 2.8828, + "step": 534 + }, + { + "epoch": 0.04317649907190703, + "grad_norm": 0.9145991802215576, + "learning_rate": 0.00019976429357166566, + "loss": 2.7767, + "step": 535 + }, + { + "epoch": 0.04325720280849003, + "grad_norm": 0.9123281240463257, + "learning_rate": 0.00019976320904668913, + "loss": 2.7993, + "step": 536 + }, + { + "epoch": 0.04333790654507304, + "grad_norm": 0.8597636818885803, + "learning_rate": 0.00019976212203535266, + "loss": 2.8148, + "step": 537 + }, + { + "epoch": 0.04341861028165604, + "grad_norm": 0.8963296413421631, + "learning_rate": 0.00019976103253768334, + "loss": 2.7722, + "step": 538 + }, + { + "epoch": 0.043499314018239046, + "grad_norm": 0.9480688571929932, + "learning_rate": 0.0001997599405537083, + "loss": 2.8038, + "step": 539 + }, + { + "epoch": 0.04358001775482205, + "grad_norm": 0.8115736842155457, + "learning_rate": 0.00019975884608345476, + "loss": 2.8069, + "step": 540 + }, + { + "epoch": 0.043660721491405055, + "grad_norm": 0.9642506837844849, + "learning_rate": 0.00019975774912695, + "loss": 2.8703, + "step": 541 + }, + { + "epoch": 0.04374142522798805, + "grad_norm": 0.9638697504997253, + "learning_rate": 0.0001997566496842214, + "loss": 2.8223, + "step": 542 + }, + { + "epoch": 0.04382212896457106, + "grad_norm": 0.9478490352630615, + "learning_rate": 0.00019975554775529628, + "loss": 2.8164, + "step": 543 + }, + { + "epoch": 0.04390283270115406, + "grad_norm": 1.1771583557128906, + "learning_rate": 0.00019975444334020215, + "loss": 2.7969, + "step": 544 + }, + { + "epoch": 0.043983536437737066, + "grad_norm": 0.9597339034080505, + "learning_rate": 0.00019975333643896655, + "loss": 2.8025, + "step": 545 + }, + { + "epoch": 0.04406424017432007, + "grad_norm": 0.981595516204834, + "learning_rate": 0.00019975222705161704, + "loss": 2.7994, + "step": 546 + }, + { + "epoch": 0.044144943910903074, + "grad_norm": 0.9581133723258972, + "learning_rate": 0.00019975111517818127, + "loss": 2.802, + "step": 547 + }, + { + "epoch": 0.04422564764748608, + "grad_norm": 0.8643878698348999, + "learning_rate": 0.00019975000081868697, + "loss": 2.7958, + "step": 548 + }, + { + "epoch": 0.04430635138406908, + "grad_norm": 1.2188652753829956, + "learning_rate": 0.0001997488839731619, + "loss": 2.8786, + "step": 549 + }, + { + "epoch": 0.04438705512065209, + "grad_norm": 0.9138071537017822, + "learning_rate": 0.00019974776464163387, + "loss": 2.809, + "step": 550 + }, + { + "epoch": 0.04446775885723509, + "grad_norm": 0.9604587554931641, + "learning_rate": 0.00019974664282413083, + "loss": 2.8009, + "step": 551 + }, + { + "epoch": 0.044548462593818096, + "grad_norm": 1.0271116495132446, + "learning_rate": 0.00019974551852068072, + "loss": 2.8689, + "step": 552 + }, + { + "epoch": 0.0446291663304011, + "grad_norm": 0.9330877065658569, + "learning_rate": 0.00019974439173131155, + "loss": 2.7613, + "step": 553 + }, + { + "epoch": 0.044709870066984105, + "grad_norm": 0.9549325108528137, + "learning_rate": 0.00019974326245605136, + "loss": 2.8314, + "step": 554 + }, + { + "epoch": 0.0447905738035671, + "grad_norm": 0.8928439021110535, + "learning_rate": 0.00019974213069492836, + "loss": 2.8097, + "step": 555 + }, + { + "epoch": 0.044871277540150106, + "grad_norm": 0.8705076575279236, + "learning_rate": 0.00019974099644797075, + "loss": 2.8112, + "step": 556 + }, + { + "epoch": 0.04495198127673311, + "grad_norm": 0.988345742225647, + "learning_rate": 0.00019973985971520676, + "loss": 2.7648, + "step": 557 + }, + { + "epoch": 0.045032685013316115, + "grad_norm": 0.9161957502365112, + "learning_rate": 0.00019973872049666475, + "loss": 2.8691, + "step": 558 + }, + { + "epoch": 0.04511338874989912, + "grad_norm": 0.8404076099395752, + "learning_rate": 0.00019973757879237312, + "loss": 2.7708, + "step": 559 + }, + { + "epoch": 0.045194092486482124, + "grad_norm": 1.05247962474823, + "learning_rate": 0.0001997364346023603, + "loss": 2.8638, + "step": 560 + }, + { + "epoch": 0.04527479622306513, + "grad_norm": 0.9235066175460815, + "learning_rate": 0.00019973528792665483, + "loss": 2.7876, + "step": 561 + }, + { + "epoch": 0.04535549995964813, + "grad_norm": 1.220075249671936, + "learning_rate": 0.00019973413876528526, + "loss": 2.8563, + "step": 562 + }, + { + "epoch": 0.04543620369623114, + "grad_norm": 0.9098384976387024, + "learning_rate": 0.00019973298711828025, + "loss": 2.8427, + "step": 563 + }, + { + "epoch": 0.04551690743281414, + "grad_norm": 0.8792217969894409, + "learning_rate": 0.00019973183298566848, + "loss": 2.8673, + "step": 564 + }, + { + "epoch": 0.045597611169397145, + "grad_norm": 0.9895235896110535, + "learning_rate": 0.00019973067636747875, + "loss": 2.8262, + "step": 565 + }, + { + "epoch": 0.04567831490598015, + "grad_norm": 0.9191479086875916, + "learning_rate": 0.00019972951726373984, + "loss": 2.8005, + "step": 566 + }, + { + "epoch": 0.045759018642563154, + "grad_norm": 0.9631491899490356, + "learning_rate": 0.0001997283556744807, + "loss": 2.8438, + "step": 567 + }, + { + "epoch": 0.04583972237914615, + "grad_norm": 0.8302746415138245, + "learning_rate": 0.00019972719159973024, + "loss": 2.8221, + "step": 568 + }, + { + "epoch": 0.045920426115729156, + "grad_norm": 0.8238534927368164, + "learning_rate": 0.00019972602503951748, + "loss": 2.7674, + "step": 569 + }, + { + "epoch": 0.04600112985231216, + "grad_norm": 0.9675811529159546, + "learning_rate": 0.00019972485599387146, + "loss": 2.8457, + "step": 570 + }, + { + "epoch": 0.046081833588895164, + "grad_norm": 0.8663914203643799, + "learning_rate": 0.00019972368446282134, + "loss": 2.7851, + "step": 571 + }, + { + "epoch": 0.04616253732547817, + "grad_norm": 0.9904592633247375, + "learning_rate": 0.00019972251044639636, + "loss": 2.8792, + "step": 572 + }, + { + "epoch": 0.04624324106206117, + "grad_norm": 0.907600462436676, + "learning_rate": 0.0001997213339446257, + "loss": 2.7991, + "step": 573 + }, + { + "epoch": 0.04632394479864418, + "grad_norm": 0.871362566947937, + "learning_rate": 0.00019972015495753876, + "loss": 2.7959, + "step": 574 + }, + { + "epoch": 0.04640464853522718, + "grad_norm": 0.9664937853813171, + "learning_rate": 0.00019971897348516486, + "loss": 2.7847, + "step": 575 + }, + { + "epoch": 0.046485352271810186, + "grad_norm": 1.0670619010925293, + "learning_rate": 0.0001997177895275335, + "loss": 2.8864, + "step": 576 + }, + { + "epoch": 0.04656605600839319, + "grad_norm": 0.9281025528907776, + "learning_rate": 0.00019971660308467414, + "loss": 2.8568, + "step": 577 + }, + { + "epoch": 0.046646759744976195, + "grad_norm": 0.8964822888374329, + "learning_rate": 0.00019971541415661639, + "loss": 2.7246, + "step": 578 + }, + { + "epoch": 0.0467274634815592, + "grad_norm": 0.8921917676925659, + "learning_rate": 0.00019971422274338985, + "loss": 2.8513, + "step": 579 + }, + { + "epoch": 0.0468081672181422, + "grad_norm": 0.9550159573554993, + "learning_rate": 0.0001997130288450242, + "loss": 2.7615, + "step": 580 + }, + { + "epoch": 0.0468888709547252, + "grad_norm": 0.9330170154571533, + "learning_rate": 0.00019971183246154925, + "loss": 2.9017, + "step": 581 + }, + { + "epoch": 0.046969574691308205, + "grad_norm": 0.9125271439552307, + "learning_rate": 0.00019971063359299477, + "loss": 2.8263, + "step": 582 + }, + { + "epoch": 0.04705027842789121, + "grad_norm": 1.0005927085876465, + "learning_rate": 0.00019970943223939066, + "loss": 2.8371, + "step": 583 + }, + { + "epoch": 0.047130982164474214, + "grad_norm": 1.0333613157272339, + "learning_rate": 0.00019970822840076685, + "loss": 2.8275, + "step": 584 + }, + { + "epoch": 0.04721168590105722, + "grad_norm": 0.8684708476066589, + "learning_rate": 0.00019970702207715334, + "loss": 2.8343, + "step": 585 + }, + { + "epoch": 0.04729238963764022, + "grad_norm": 1.1112761497497559, + "learning_rate": 0.00019970581326858025, + "loss": 2.9012, + "step": 586 + }, + { + "epoch": 0.04737309337422323, + "grad_norm": 1.0187962055206299, + "learning_rate": 0.00019970460197507763, + "loss": 2.8423, + "step": 587 + }, + { + "epoch": 0.04745379711080623, + "grad_norm": 0.9802024960517883, + "learning_rate": 0.00019970338819667567, + "loss": 2.867, + "step": 588 + }, + { + "epoch": 0.047534500847389236, + "grad_norm": 0.9825551509857178, + "learning_rate": 0.00019970217193340467, + "loss": 2.8359, + "step": 589 + }, + { + "epoch": 0.04761520458397224, + "grad_norm": 1.1399210691452026, + "learning_rate": 0.00019970095318529494, + "loss": 2.8356, + "step": 590 + }, + { + "epoch": 0.047695908320555244, + "grad_norm": 1.0373995304107666, + "learning_rate": 0.00019969973195237684, + "loss": 2.8005, + "step": 591 + }, + { + "epoch": 0.04777661205713825, + "grad_norm": 1.133596420288086, + "learning_rate": 0.00019969850823468077, + "loss": 2.8778, + "step": 592 + }, + { + "epoch": 0.047857315793721246, + "grad_norm": 1.0187327861785889, + "learning_rate": 0.00019969728203223728, + "loss": 2.8291, + "step": 593 + }, + { + "epoch": 0.04793801953030425, + "grad_norm": 1.0588128566741943, + "learning_rate": 0.00019969605334507688, + "loss": 2.9396, + "step": 594 + }, + { + "epoch": 0.048018723266887255, + "grad_norm": 0.8783230781555176, + "learning_rate": 0.00019969482217323026, + "loss": 2.8076, + "step": 595 + }, + { + "epoch": 0.04809942700347026, + "grad_norm": 1.0500195026397705, + "learning_rate": 0.00019969358851672805, + "loss": 2.9099, + "step": 596 + }, + { + "epoch": 0.04818013074005326, + "grad_norm": 0.9523593187332153, + "learning_rate": 0.000199692352375601, + "loss": 2.7448, + "step": 597 + }, + { + "epoch": 0.04826083447663627, + "grad_norm": 1.0008500814437866, + "learning_rate": 0.00019969111374987995, + "loss": 2.8212, + "step": 598 + }, + { + "epoch": 0.04834153821321927, + "grad_norm": 0.8992626070976257, + "learning_rate": 0.00019968987263959575, + "loss": 2.8698, + "step": 599 + }, + { + "epoch": 0.048422241949802276, + "grad_norm": 0.9914852380752563, + "learning_rate": 0.00019968862904477935, + "loss": 2.8221, + "step": 600 + }, + { + "epoch": 0.04850294568638528, + "grad_norm": 0.9633241295814514, + "learning_rate": 0.00019968738296546168, + "loss": 2.8835, + "step": 601 + }, + { + "epoch": 0.048583649422968285, + "grad_norm": 1.055831789970398, + "learning_rate": 0.00019968613440167387, + "loss": 2.8781, + "step": 602 + }, + { + "epoch": 0.04866435315955129, + "grad_norm": 0.913856029510498, + "learning_rate": 0.000199684883353447, + "loss": 2.7863, + "step": 603 + }, + { + "epoch": 0.048745056896134294, + "grad_norm": 0.8429243564605713, + "learning_rate": 0.00019968362982081226, + "loss": 2.7753, + "step": 604 + }, + { + "epoch": 0.0488257606327173, + "grad_norm": 0.9324761629104614, + "learning_rate": 0.0001996823738038009, + "loss": 2.8058, + "step": 605 + }, + { + "epoch": 0.048906464369300295, + "grad_norm": 1.0004981756210327, + "learning_rate": 0.0001996811153024442, + "loss": 2.8537, + "step": 606 + }, + { + "epoch": 0.0489871681058833, + "grad_norm": 0.9438043236732483, + "learning_rate": 0.00019967985431677354, + "loss": 2.8828, + "step": 607 + }, + { + "epoch": 0.049067871842466304, + "grad_norm": 0.9359340071678162, + "learning_rate": 0.00019967859084682034, + "loss": 2.8149, + "step": 608 + }, + { + "epoch": 0.04914857557904931, + "grad_norm": 1.0400227308273315, + "learning_rate": 0.00019967732489261609, + "loss": 2.8489, + "step": 609 + }, + { + "epoch": 0.04922927931563231, + "grad_norm": 0.8978031277656555, + "learning_rate": 0.00019967605645419237, + "loss": 2.8599, + "step": 610 + }, + { + "epoch": 0.04930998305221532, + "grad_norm": 0.9982689619064331, + "learning_rate": 0.00019967478553158073, + "loss": 2.9024, + "step": 611 + }, + { + "epoch": 0.04939068678879832, + "grad_norm": 1.0695222616195679, + "learning_rate": 0.00019967351212481292, + "loss": 2.8483, + "step": 612 + }, + { + "epoch": 0.049471390525381326, + "grad_norm": 1.0615525245666504, + "learning_rate": 0.0001996722362339206, + "loss": 2.806, + "step": 613 + }, + { + "epoch": 0.04955209426196433, + "grad_norm": 0.9624890089035034, + "learning_rate": 0.0001996709578589356, + "loss": 2.8641, + "step": 614 + }, + { + "epoch": 0.049632797998547334, + "grad_norm": 0.9156595468521118, + "learning_rate": 0.00019966967699988985, + "loss": 2.7991, + "step": 615 + }, + { + "epoch": 0.04971350173513034, + "grad_norm": 0.8687645196914673, + "learning_rate": 0.00019966839365681517, + "loss": 2.774, + "step": 616 + }, + { + "epoch": 0.04979420547171334, + "grad_norm": 0.9175437688827515, + "learning_rate": 0.00019966710782974359, + "loss": 2.8064, + "step": 617 + }, + { + "epoch": 0.04987490920829635, + "grad_norm": 0.8897463083267212, + "learning_rate": 0.00019966581951870715, + "loss": 2.8487, + "step": 618 + }, + { + "epoch": 0.049955612944879345, + "grad_norm": 0.8908397555351257, + "learning_rate": 0.00019966452872373795, + "loss": 2.8523, + "step": 619 + }, + { + "epoch": 0.05003631668146235, + "grad_norm": 0.95484858751297, + "learning_rate": 0.00019966323544486818, + "loss": 2.8471, + "step": 620 + }, + { + "epoch": 0.050117020418045354, + "grad_norm": 0.9995831251144409, + "learning_rate": 0.00019966193968213008, + "loss": 2.8341, + "step": 621 + }, + { + "epoch": 0.05019772415462836, + "grad_norm": 0.8731706142425537, + "learning_rate": 0.00019966064143555587, + "loss": 2.8491, + "step": 622 + }, + { + "epoch": 0.05027842789121136, + "grad_norm": 0.9213298559188843, + "learning_rate": 0.000199659340705178, + "loss": 2.8256, + "step": 623 + }, + { + "epoch": 0.050359131627794367, + "grad_norm": 0.9565179347991943, + "learning_rate": 0.00019965803749102885, + "loss": 2.8177, + "step": 624 + }, + { + "epoch": 0.05043983536437737, + "grad_norm": 1.0076881647109985, + "learning_rate": 0.00019965673179314086, + "loss": 2.7812, + "step": 625 + }, + { + "epoch": 0.050520539100960375, + "grad_norm": 0.989647388458252, + "learning_rate": 0.00019965542361154666, + "loss": 2.9226, + "step": 626 + }, + { + "epoch": 0.05060124283754338, + "grad_norm": 0.9671580791473389, + "learning_rate": 0.00019965411294627878, + "loss": 2.8204, + "step": 627 + }, + { + "epoch": 0.050681946574126384, + "grad_norm": 0.9275986552238464, + "learning_rate": 0.00019965279979736989, + "loss": 2.8481, + "step": 628 + }, + { + "epoch": 0.05076265031070939, + "grad_norm": 0.9949543476104736, + "learning_rate": 0.00019965148416485273, + "loss": 2.8606, + "step": 629 + }, + { + "epoch": 0.05084335404729239, + "grad_norm": 0.9506482481956482, + "learning_rate": 0.0001996501660487601, + "loss": 2.8088, + "step": 630 + }, + { + "epoch": 0.0509240577838754, + "grad_norm": 0.9147887229919434, + "learning_rate": 0.00019964884544912488, + "loss": 2.7997, + "step": 631 + }, + { + "epoch": 0.051004761520458394, + "grad_norm": 0.8964840769767761, + "learning_rate": 0.00019964752236597993, + "loss": 2.8342, + "step": 632 + }, + { + "epoch": 0.0510854652570414, + "grad_norm": 0.931811511516571, + "learning_rate": 0.00019964619679935824, + "loss": 2.8229, + "step": 633 + }, + { + "epoch": 0.0511661689936244, + "grad_norm": 0.8634423017501831, + "learning_rate": 0.00019964486874929282, + "loss": 2.803, + "step": 634 + }, + { + "epoch": 0.05124687273020741, + "grad_norm": 0.892223596572876, + "learning_rate": 0.00019964353821581683, + "loss": 2.802, + "step": 635 + }, + { + "epoch": 0.05132757646679041, + "grad_norm": 0.8373630046844482, + "learning_rate": 0.00019964220519896338, + "loss": 2.7693, + "step": 636 + }, + { + "epoch": 0.051408280203373416, + "grad_norm": 0.8729730248451233, + "learning_rate": 0.0001996408696987657, + "loss": 2.8467, + "step": 637 + }, + { + "epoch": 0.05148898393995642, + "grad_norm": 0.8994413614273071, + "learning_rate": 0.0001996395317152571, + "loss": 2.8837, + "step": 638 + }, + { + "epoch": 0.051569687676539425, + "grad_norm": 0.9146113395690918, + "learning_rate": 0.0001996381912484709, + "loss": 2.8189, + "step": 639 + }, + { + "epoch": 0.05165039141312243, + "grad_norm": 0.9330562353134155, + "learning_rate": 0.00019963684829844052, + "loss": 2.7873, + "step": 640 + }, + { + "epoch": 0.05173109514970543, + "grad_norm": 0.9076224565505981, + "learning_rate": 0.00019963550286519944, + "loss": 2.802, + "step": 641 + }, + { + "epoch": 0.05181179888628844, + "grad_norm": 0.9580704569816589, + "learning_rate": 0.00019963415494878115, + "loss": 2.8173, + "step": 642 + }, + { + "epoch": 0.05189250262287144, + "grad_norm": 0.9291248917579651, + "learning_rate": 0.00019963280454921928, + "loss": 2.7866, + "step": 643 + }, + { + "epoch": 0.05197320635945444, + "grad_norm": 0.9815296530723572, + "learning_rate": 0.0001996314516665475, + "loss": 2.7903, + "step": 644 + }, + { + "epoch": 0.052053910096037444, + "grad_norm": 0.9461820721626282, + "learning_rate": 0.00019963009630079949, + "loss": 2.7854, + "step": 645 + }, + { + "epoch": 0.05213461383262045, + "grad_norm": 0.9660771489143372, + "learning_rate": 0.00019962873845200908, + "loss": 2.9187, + "step": 646 + }, + { + "epoch": 0.05221531756920345, + "grad_norm": 0.8987802863121033, + "learning_rate": 0.00019962737812021002, + "loss": 2.8854, + "step": 647 + }, + { + "epoch": 0.05229602130578646, + "grad_norm": 0.9810429215431213, + "learning_rate": 0.0001996260153054363, + "loss": 2.8974, + "step": 648 + }, + { + "epoch": 0.05237672504236946, + "grad_norm": 0.8185738325119019, + "learning_rate": 0.00019962465000772183, + "loss": 2.797, + "step": 649 + }, + { + "epoch": 0.052457428778952465, + "grad_norm": 0.8976237773895264, + "learning_rate": 0.0001996232822271007, + "loss": 2.8557, + "step": 650 + }, + { + "epoch": 0.05253813251553547, + "grad_norm": 0.8591496348381042, + "learning_rate": 0.0001996219119636069, + "loss": 2.8521, + "step": 651 + }, + { + "epoch": 0.052618836252118474, + "grad_norm": 0.8907031416893005, + "learning_rate": 0.00019962053921727472, + "loss": 2.8117, + "step": 652 + }, + { + "epoch": 0.05269953998870148, + "grad_norm": 0.9034241437911987, + "learning_rate": 0.00019961916398813823, + "loss": 2.741, + "step": 653 + }, + { + "epoch": 0.05278024372528448, + "grad_norm": 0.8284802436828613, + "learning_rate": 0.00019961778627623176, + "loss": 2.776, + "step": 654 + }, + { + "epoch": 0.05286094746186749, + "grad_norm": 0.8459529876708984, + "learning_rate": 0.00019961640608158967, + "loss": 2.8027, + "step": 655 + }, + { + "epoch": 0.05294165119845049, + "grad_norm": 0.9720042943954468, + "learning_rate": 0.00019961502340424636, + "loss": 2.9086, + "step": 656 + }, + { + "epoch": 0.05302235493503349, + "grad_norm": 0.8581427335739136, + "learning_rate": 0.00019961363824423626, + "loss": 2.8347, + "step": 657 + }, + { + "epoch": 0.05310305867161649, + "grad_norm": 0.9545331597328186, + "learning_rate": 0.00019961225060159386, + "loss": 2.828, + "step": 658 + }, + { + "epoch": 0.0531837624081995, + "grad_norm": 1.0303562879562378, + "learning_rate": 0.00019961086047635385, + "loss": 2.8461, + "step": 659 + }, + { + "epoch": 0.0532644661447825, + "grad_norm": 0.86605304479599, + "learning_rate": 0.0001996094678685508, + "loss": 2.8355, + "step": 660 + }, + { + "epoch": 0.053345169881365506, + "grad_norm": 0.8146334886550903, + "learning_rate": 0.0001996080727782194, + "loss": 2.8638, + "step": 661 + }, + { + "epoch": 0.05342587361794851, + "grad_norm": 0.9434560537338257, + "learning_rate": 0.00019960667520539446, + "loss": 2.8196, + "step": 662 + }, + { + "epoch": 0.053506577354531515, + "grad_norm": 0.9362602829933167, + "learning_rate": 0.00019960527515011084, + "loss": 2.8452, + "step": 663 + }, + { + "epoch": 0.05358728109111452, + "grad_norm": 0.828713059425354, + "learning_rate": 0.00019960387261240334, + "loss": 2.8079, + "step": 664 + }, + { + "epoch": 0.053667984827697524, + "grad_norm": 0.8610214591026306, + "learning_rate": 0.00019960246759230697, + "loss": 2.8197, + "step": 665 + }, + { + "epoch": 0.05374868856428053, + "grad_norm": 0.8913124799728394, + "learning_rate": 0.00019960106008985674, + "loss": 2.8392, + "step": 666 + }, + { + "epoch": 0.05382939230086353, + "grad_norm": 0.8109759092330933, + "learning_rate": 0.00019959965010508778, + "loss": 2.7961, + "step": 667 + }, + { + "epoch": 0.05391009603744654, + "grad_norm": 0.8714832663536072, + "learning_rate": 0.00019959823763803514, + "loss": 2.7984, + "step": 668 + }, + { + "epoch": 0.05399079977402954, + "grad_norm": 0.9008125066757202, + "learning_rate": 0.00019959682268873408, + "loss": 2.8319, + "step": 669 + }, + { + "epoch": 0.05407150351061254, + "grad_norm": 0.8718584775924683, + "learning_rate": 0.00019959540525721985, + "loss": 2.7973, + "step": 670 + }, + { + "epoch": 0.05415220724719554, + "grad_norm": 0.8666327595710754, + "learning_rate": 0.00019959398534352774, + "loss": 2.8296, + "step": 671 + }, + { + "epoch": 0.05423291098377855, + "grad_norm": 0.9755229949951172, + "learning_rate": 0.00019959256294769322, + "loss": 2.8358, + "step": 672 + }, + { + "epoch": 0.05431361472036155, + "grad_norm": 1.193708062171936, + "learning_rate": 0.0001995911380697517, + "loss": 2.7672, + "step": 673 + }, + { + "epoch": 0.054394318456944556, + "grad_norm": 0.9104088544845581, + "learning_rate": 0.00019958971070973866, + "loss": 2.8389, + "step": 674 + }, + { + "epoch": 0.05447502219352756, + "grad_norm": 0.9266251921653748, + "learning_rate": 0.0001995882808676897, + "loss": 2.8226, + "step": 675 + }, + { + "epoch": 0.054555725930110564, + "grad_norm": 1.1161282062530518, + "learning_rate": 0.00019958684854364046, + "loss": 2.8236, + "step": 676 + }, + { + "epoch": 0.05463642966669357, + "grad_norm": 0.9200586080551147, + "learning_rate": 0.00019958541373762666, + "loss": 2.8074, + "step": 677 + }, + { + "epoch": 0.05471713340327657, + "grad_norm": 1.0372560024261475, + "learning_rate": 0.000199583976449684, + "loss": 2.815, + "step": 678 + }, + { + "epoch": 0.05479783713985958, + "grad_norm": 0.8822301030158997, + "learning_rate": 0.0001995825366798483, + "loss": 2.7985, + "step": 679 + }, + { + "epoch": 0.05487854087644258, + "grad_norm": 0.9226076006889343, + "learning_rate": 0.00019958109442815553, + "loss": 2.7649, + "step": 680 + }, + { + "epoch": 0.054959244613025586, + "grad_norm": 0.8769479990005493, + "learning_rate": 0.00019957964969464156, + "loss": 2.8483, + "step": 681 + }, + { + "epoch": 0.05503994834960859, + "grad_norm": 0.8601027727127075, + "learning_rate": 0.0001995782024793424, + "loss": 2.8072, + "step": 682 + }, + { + "epoch": 0.05512065208619159, + "grad_norm": 0.9684911370277405, + "learning_rate": 0.00019957675278229416, + "loss": 2.8693, + "step": 683 + }, + { + "epoch": 0.05520135582277459, + "grad_norm": 0.9119890928268433, + "learning_rate": 0.00019957530060353294, + "loss": 2.853, + "step": 684 + }, + { + "epoch": 0.055282059559357596, + "grad_norm": 0.9588247537612915, + "learning_rate": 0.0001995738459430949, + "loss": 2.8435, + "step": 685 + }, + { + "epoch": 0.0553627632959406, + "grad_norm": 0.8317441940307617, + "learning_rate": 0.00019957238880101636, + "loss": 2.8208, + "step": 686 + }, + { + "epoch": 0.055443467032523605, + "grad_norm": 0.92695152759552, + "learning_rate": 0.00019957092917733361, + "loss": 2.8378, + "step": 687 + }, + { + "epoch": 0.05552417076910661, + "grad_norm": 0.8908315300941467, + "learning_rate": 0.00019956946707208305, + "loss": 2.8041, + "step": 688 + }, + { + "epoch": 0.055604874505689614, + "grad_norm": 0.9787055253982544, + "learning_rate": 0.00019956800248530107, + "loss": 2.8604, + "step": 689 + }, + { + "epoch": 0.05568557824227262, + "grad_norm": 0.8707631826400757, + "learning_rate": 0.00019956653541702415, + "loss": 2.7763, + "step": 690 + }, + { + "epoch": 0.05576628197885562, + "grad_norm": 1.0059715509414673, + "learning_rate": 0.00019956506586728896, + "loss": 2.8267, + "step": 691 + }, + { + "epoch": 0.05584698571543863, + "grad_norm": 0.88490891456604, + "learning_rate": 0.00019956359383613203, + "loss": 2.8278, + "step": 692 + }, + { + "epoch": 0.05592768945202163, + "grad_norm": 0.9527923464775085, + "learning_rate": 0.00019956211932359007, + "loss": 2.8251, + "step": 693 + }, + { + "epoch": 0.056008393188604635, + "grad_norm": 0.9612617492675781, + "learning_rate": 0.00019956064232969987, + "loss": 2.8148, + "step": 694 + }, + { + "epoch": 0.05608909692518763, + "grad_norm": 0.9261285066604614, + "learning_rate": 0.0001995591628544982, + "loss": 2.8176, + "step": 695 + }, + { + "epoch": 0.05616980066177064, + "grad_norm": 0.9766250252723694, + "learning_rate": 0.0001995576808980219, + "loss": 2.7968, + "step": 696 + }, + { + "epoch": 0.05625050439835364, + "grad_norm": 0.9287495017051697, + "learning_rate": 0.00019955619646030802, + "loss": 2.7679, + "step": 697 + }, + { + "epoch": 0.056331208134936646, + "grad_norm": 0.9182924032211304, + "learning_rate": 0.00019955470954139345, + "loss": 2.8295, + "step": 698 + }, + { + "epoch": 0.05641191187151965, + "grad_norm": 0.8650663495063782, + "learning_rate": 0.00019955322014131524, + "loss": 2.7928, + "step": 699 + }, + { + "epoch": 0.056492615608102655, + "grad_norm": 0.9543934464454651, + "learning_rate": 0.00019955172826011062, + "loss": 2.8049, + "step": 700 + }, + { + "epoch": 0.05657331934468566, + "grad_norm": 0.9060636162757874, + "learning_rate": 0.00019955023389781664, + "loss": 2.871, + "step": 701 + }, + { + "epoch": 0.05665402308126866, + "grad_norm": 0.9824137091636658, + "learning_rate": 0.00019954873705447065, + "loss": 2.816, + "step": 702 + }, + { + "epoch": 0.05673472681785167, + "grad_norm": 0.8831053972244263, + "learning_rate": 0.00019954723773010988, + "loss": 2.8207, + "step": 703 + }, + { + "epoch": 0.05681543055443467, + "grad_norm": 0.9603390693664551, + "learning_rate": 0.00019954573592477173, + "loss": 2.831, + "step": 704 + }, + { + "epoch": 0.056896134291017676, + "grad_norm": 0.911556601524353, + "learning_rate": 0.00019954423163849364, + "loss": 2.7679, + "step": 705 + }, + { + "epoch": 0.05697683802760068, + "grad_norm": 0.8558745384216309, + "learning_rate": 0.00019954272487131305, + "loss": 2.7934, + "step": 706 + }, + { + "epoch": 0.057057541764183685, + "grad_norm": 1.0175282955169678, + "learning_rate": 0.00019954121562326758, + "loss": 2.905, + "step": 707 + }, + { + "epoch": 0.05713824550076668, + "grad_norm": 0.9480875730514526, + "learning_rate": 0.00019953970389439483, + "loss": 2.85, + "step": 708 + }, + { + "epoch": 0.05721894923734969, + "grad_norm": 0.9271003603935242, + "learning_rate": 0.0001995381896847324, + "loss": 2.8237, + "step": 709 + }, + { + "epoch": 0.05729965297393269, + "grad_norm": 0.8439653515815735, + "learning_rate": 0.00019953667299431815, + "loss": 2.821, + "step": 710 + }, + { + "epoch": 0.057380356710515695, + "grad_norm": 0.9750552177429199, + "learning_rate": 0.0001995351538231898, + "loss": 2.8613, + "step": 711 + }, + { + "epoch": 0.0574610604470987, + "grad_norm": 0.9409266710281372, + "learning_rate": 0.0001995336321713852, + "loss": 2.7876, + "step": 712 + }, + { + "epoch": 0.057541764183681704, + "grad_norm": 0.811138927936554, + "learning_rate": 0.00019953210803894233, + "loss": 2.7957, + "step": 713 + }, + { + "epoch": 0.05762246792026471, + "grad_norm": 0.9504825472831726, + "learning_rate": 0.00019953058142589916, + "loss": 2.8536, + "step": 714 + }, + { + "epoch": 0.05770317165684771, + "grad_norm": 0.8183554410934448, + "learning_rate": 0.00019952905233229368, + "loss": 2.7697, + "step": 715 + }, + { + "epoch": 0.05778387539343072, + "grad_norm": 1.1146113872528076, + "learning_rate": 0.0001995275207581641, + "loss": 2.8629, + "step": 716 + }, + { + "epoch": 0.05786457913001372, + "grad_norm": 0.8797986507415771, + "learning_rate": 0.00019952598670354852, + "loss": 2.7962, + "step": 717 + }, + { + "epoch": 0.057945282866596726, + "grad_norm": 0.8771101832389832, + "learning_rate": 0.00019952445016848517, + "loss": 2.8323, + "step": 718 + }, + { + "epoch": 0.05802598660317973, + "grad_norm": 0.9003355503082275, + "learning_rate": 0.00019952291115301235, + "loss": 2.777, + "step": 719 + }, + { + "epoch": 0.058106690339762734, + "grad_norm": 0.846125602722168, + "learning_rate": 0.00019952136965716846, + "loss": 2.7875, + "step": 720 + }, + { + "epoch": 0.05818739407634573, + "grad_norm": 0.908833920955658, + "learning_rate": 0.00019951982568099187, + "loss": 2.7975, + "step": 721 + }, + { + "epoch": 0.058268097812928736, + "grad_norm": 0.8616230487823486, + "learning_rate": 0.00019951827922452106, + "loss": 2.7486, + "step": 722 + }, + { + "epoch": 0.05834880154951174, + "grad_norm": 0.8791850805282593, + "learning_rate": 0.00019951673028779462, + "loss": 2.8301, + "step": 723 + }, + { + "epoch": 0.058429505286094745, + "grad_norm": 0.9437321424484253, + "learning_rate": 0.00019951517887085112, + "loss": 2.7956, + "step": 724 + }, + { + "epoch": 0.05851020902267775, + "grad_norm": 0.9263394474983215, + "learning_rate": 0.00019951362497372922, + "loss": 2.867, + "step": 725 + }, + { + "epoch": 0.05859091275926075, + "grad_norm": 0.9442462921142578, + "learning_rate": 0.00019951206859646764, + "loss": 2.8447, + "step": 726 + }, + { + "epoch": 0.05867161649584376, + "grad_norm": 0.9286711812019348, + "learning_rate": 0.0001995105097391052, + "loss": 2.7588, + "step": 727 + }, + { + "epoch": 0.05875232023242676, + "grad_norm": 0.9338774085044861, + "learning_rate": 0.00019950894840168072, + "loss": 2.7394, + "step": 728 + }, + { + "epoch": 0.058833023969009766, + "grad_norm": 0.8880760073661804, + "learning_rate": 0.00019950738458423314, + "loss": 2.7949, + "step": 729 + }, + { + "epoch": 0.05891372770559277, + "grad_norm": 1.0091183185577393, + "learning_rate": 0.00019950581828680143, + "loss": 2.8633, + "step": 730 + }, + { + "epoch": 0.058994431442175775, + "grad_norm": 0.8657729625701904, + "learning_rate": 0.0001995042495094246, + "loss": 2.8649, + "step": 731 + }, + { + "epoch": 0.05907513517875878, + "grad_norm": 1.0084047317504883, + "learning_rate": 0.00019950267825214176, + "loss": 2.8422, + "step": 732 + }, + { + "epoch": 0.059155838915341784, + "grad_norm": 0.9096506237983704, + "learning_rate": 0.00019950110451499208, + "loss": 2.7908, + "step": 733 + }, + { + "epoch": 0.05923654265192478, + "grad_norm": 1.1338937282562256, + "learning_rate": 0.0001994995282980148, + "loss": 2.8093, + "step": 734 + }, + { + "epoch": 0.059317246388507786, + "grad_norm": 0.8813811540603638, + "learning_rate": 0.00019949794960124915, + "loss": 2.8866, + "step": 735 + }, + { + "epoch": 0.05939795012509079, + "grad_norm": 0.8457592129707336, + "learning_rate": 0.00019949636842473453, + "loss": 2.7744, + "step": 736 + }, + { + "epoch": 0.059478653861673794, + "grad_norm": 0.8731856346130371, + "learning_rate": 0.0001994947847685103, + "loss": 2.7822, + "step": 737 + }, + { + "epoch": 0.0595593575982568, + "grad_norm": 0.8915185332298279, + "learning_rate": 0.00019949319863261597, + "loss": 2.773, + "step": 738 + }, + { + "epoch": 0.0596400613348398, + "grad_norm": 0.9478987455368042, + "learning_rate": 0.00019949161001709106, + "loss": 2.8462, + "step": 739 + }, + { + "epoch": 0.05972076507142281, + "grad_norm": 0.8903716206550598, + "learning_rate": 0.00019949001892197515, + "loss": 2.7741, + "step": 740 + }, + { + "epoch": 0.05980146880800581, + "grad_norm": 0.8870117664337158, + "learning_rate": 0.00019948842534730786, + "loss": 2.8255, + "step": 741 + }, + { + "epoch": 0.059882172544588816, + "grad_norm": 1.0766080617904663, + "learning_rate": 0.00019948682929312898, + "loss": 2.8865, + "step": 742 + }, + { + "epoch": 0.05996287628117182, + "grad_norm": 0.846447229385376, + "learning_rate": 0.00019948523075947824, + "loss": 2.8441, + "step": 743 + }, + { + "epoch": 0.060043580017754825, + "grad_norm": 0.9847991466522217, + "learning_rate": 0.00019948362974639552, + "loss": 2.8099, + "step": 744 + }, + { + "epoch": 0.06012428375433783, + "grad_norm": 0.9170514941215515, + "learning_rate": 0.00019948202625392068, + "loss": 2.8797, + "step": 745 + }, + { + "epoch": 0.060204987490920826, + "grad_norm": 0.8564898371696472, + "learning_rate": 0.0001994804202820937, + "loss": 2.7993, + "step": 746 + }, + { + "epoch": 0.06028569122750383, + "grad_norm": 0.8527392148971558, + "learning_rate": 0.00019947881183095457, + "loss": 2.7816, + "step": 747 + }, + { + "epoch": 0.060366394964086835, + "grad_norm": 0.9170876145362854, + "learning_rate": 0.00019947720090054342, + "loss": 2.8031, + "step": 748 + }, + { + "epoch": 0.06044709870066984, + "grad_norm": 0.8891414403915405, + "learning_rate": 0.0001994755874909004, + "loss": 2.8072, + "step": 749 + }, + { + "epoch": 0.060527802437252844, + "grad_norm": 0.8853670358657837, + "learning_rate": 0.0001994739716020657, + "loss": 2.8857, + "step": 750 + }, + { + "epoch": 0.06060850617383585, + "grad_norm": 0.9011211395263672, + "learning_rate": 0.0001994723532340796, + "loss": 2.8519, + "step": 751 + }, + { + "epoch": 0.06068920991041885, + "grad_norm": 0.8843330144882202, + "learning_rate": 0.00019947073238698243, + "loss": 2.7882, + "step": 752 + }, + { + "epoch": 0.06076991364700186, + "grad_norm": 0.8712944984436035, + "learning_rate": 0.00019946910906081463, + "loss": 2.791, + "step": 753 + }, + { + "epoch": 0.06085061738358486, + "grad_norm": 0.8296090364456177, + "learning_rate": 0.00019946748325561656, + "loss": 2.8073, + "step": 754 + }, + { + "epoch": 0.060931321120167865, + "grad_norm": 0.9239117503166199, + "learning_rate": 0.00019946585497142885, + "loss": 2.8209, + "step": 755 + }, + { + "epoch": 0.06101202485675087, + "grad_norm": 0.8885170221328735, + "learning_rate": 0.000199464224208292, + "loss": 2.8391, + "step": 756 + }, + { + "epoch": 0.061092728593333874, + "grad_norm": 0.933720588684082, + "learning_rate": 0.0001994625909662467, + "loss": 2.7635, + "step": 757 + }, + { + "epoch": 0.06117343232991688, + "grad_norm": 0.9751253724098206, + "learning_rate": 0.00019946095524533362, + "loss": 2.7933, + "step": 758 + }, + { + "epoch": 0.061254136066499876, + "grad_norm": 0.9469670057296753, + "learning_rate": 0.00019945931704559353, + "loss": 2.7652, + "step": 759 + }, + { + "epoch": 0.06133483980308288, + "grad_norm": 0.8559684157371521, + "learning_rate": 0.00019945767636706728, + "loss": 2.8258, + "step": 760 + }, + { + "epoch": 0.061415543539665884, + "grad_norm": 1.021478295326233, + "learning_rate": 0.00019945603320979574, + "loss": 2.8047, + "step": 761 + }, + { + "epoch": 0.06149624727624889, + "grad_norm": 0.8421681523323059, + "learning_rate": 0.00019945438757381986, + "loss": 2.8233, + "step": 762 + }, + { + "epoch": 0.06157695101283189, + "grad_norm": 0.900654137134552, + "learning_rate": 0.0001994527394591807, + "loss": 2.7591, + "step": 763 + }, + { + "epoch": 0.0616576547494149, + "grad_norm": 0.878300666809082, + "learning_rate": 0.0001994510888659193, + "loss": 2.715, + "step": 764 + }, + { + "epoch": 0.0617383584859979, + "grad_norm": 0.9170855283737183, + "learning_rate": 0.00019944943579407678, + "loss": 2.8604, + "step": 765 + }, + { + "epoch": 0.061819062222580906, + "grad_norm": 0.8532859683036804, + "learning_rate": 0.00019944778024369434, + "loss": 2.8124, + "step": 766 + }, + { + "epoch": 0.06189976595916391, + "grad_norm": 0.8549049496650696, + "learning_rate": 0.00019944612221481332, + "loss": 2.8066, + "step": 767 + }, + { + "epoch": 0.061980469695746915, + "grad_norm": 0.9602857828140259, + "learning_rate": 0.00019944446170747492, + "loss": 2.8424, + "step": 768 + }, + { + "epoch": 0.06206117343232992, + "grad_norm": 0.910953164100647, + "learning_rate": 0.0001994427987217206, + "loss": 2.8093, + "step": 769 + }, + { + "epoch": 0.06214187716891292, + "grad_norm": 0.8536386489868164, + "learning_rate": 0.0001994411332575918, + "loss": 2.802, + "step": 770 + }, + { + "epoch": 0.06222258090549593, + "grad_norm": 0.9166232347488403, + "learning_rate": 0.00019943946531513, + "loss": 2.783, + "step": 771 + }, + { + "epoch": 0.062303284642078925, + "grad_norm": 0.9954056739807129, + "learning_rate": 0.00019943779489437678, + "loss": 2.8198, + "step": 772 + }, + { + "epoch": 0.06238398837866193, + "grad_norm": 0.8527171015739441, + "learning_rate": 0.0001994361219953738, + "loss": 2.8159, + "step": 773 + }, + { + "epoch": 0.062464692115244934, + "grad_norm": 0.8951592445373535, + "learning_rate": 0.00019943444661816274, + "loss": 2.7969, + "step": 774 + }, + { + "epoch": 0.06254539585182795, + "grad_norm": 0.9348207116127014, + "learning_rate": 0.00019943276876278532, + "loss": 2.8403, + "step": 775 + }, + { + "epoch": 0.06262609958841095, + "grad_norm": 0.866318941116333, + "learning_rate": 0.00019943108842928342, + "loss": 2.7886, + "step": 776 + }, + { + "epoch": 0.06270680332499395, + "grad_norm": 0.8571285605430603, + "learning_rate": 0.00019942940561769884, + "loss": 2.771, + "step": 777 + }, + { + "epoch": 0.06278750706157694, + "grad_norm": 0.8384295105934143, + "learning_rate": 0.00019942772032807357, + "loss": 2.7885, + "step": 778 + }, + { + "epoch": 0.06286821079815995, + "grad_norm": 0.9934808611869812, + "learning_rate": 0.00019942603256044961, + "loss": 2.8399, + "step": 779 + }, + { + "epoch": 0.06294891453474295, + "grad_norm": 0.8275915384292603, + "learning_rate": 0.00019942434231486902, + "loss": 2.8983, + "step": 780 + }, + { + "epoch": 0.06302961827132596, + "grad_norm": 0.9073596000671387, + "learning_rate": 0.0001994226495913739, + "loss": 2.7886, + "step": 781 + }, + { + "epoch": 0.06311032200790896, + "grad_norm": 0.9091461300849915, + "learning_rate": 0.00019942095439000646, + "loss": 2.814, + "step": 782 + }, + { + "epoch": 0.06319102574449197, + "grad_norm": 0.9356934428215027, + "learning_rate": 0.000199419256710809, + "loss": 2.8238, + "step": 783 + }, + { + "epoch": 0.06327172948107497, + "grad_norm": 0.883514940738678, + "learning_rate": 0.00019941755655382374, + "loss": 2.7912, + "step": 784 + }, + { + "epoch": 0.06335243321765797, + "grad_norm": 0.8770506381988525, + "learning_rate": 0.00019941585391909308, + "loss": 2.7774, + "step": 785 + }, + { + "epoch": 0.06343313695424098, + "grad_norm": 0.8891726136207581, + "learning_rate": 0.00019941414880665948, + "loss": 2.7975, + "step": 786 + }, + { + "epoch": 0.06351384069082398, + "grad_norm": 0.9280585050582886, + "learning_rate": 0.00019941244121656545, + "loss": 2.9468, + "step": 787 + }, + { + "epoch": 0.06359454442740699, + "grad_norm": 0.8545510768890381, + "learning_rate": 0.00019941073114885347, + "loss": 2.8165, + "step": 788 + }, + { + "epoch": 0.06367524816398999, + "grad_norm": 0.8631312847137451, + "learning_rate": 0.0001994090186035662, + "loss": 2.7955, + "step": 789 + }, + { + "epoch": 0.063755951900573, + "grad_norm": 0.8883851170539856, + "learning_rate": 0.00019940730358074634, + "loss": 2.7828, + "step": 790 + }, + { + "epoch": 0.063836655637156, + "grad_norm": 0.8421074748039246, + "learning_rate": 0.00019940558608043664, + "loss": 2.7999, + "step": 791 + }, + { + "epoch": 0.063917359373739, + "grad_norm": 0.918134868144989, + "learning_rate": 0.0001994038661026799, + "loss": 2.7888, + "step": 792 + }, + { + "epoch": 0.06399806311032201, + "grad_norm": 0.8513637781143188, + "learning_rate": 0.00019940214364751896, + "loss": 2.7719, + "step": 793 + }, + { + "epoch": 0.06407876684690501, + "grad_norm": 0.9181898236274719, + "learning_rate": 0.00019940041871499675, + "loss": 2.8345, + "step": 794 + }, + { + "epoch": 0.06415947058348802, + "grad_norm": 0.8129134774208069, + "learning_rate": 0.00019939869130515626, + "loss": 2.7316, + "step": 795 + }, + { + "epoch": 0.06424017432007102, + "grad_norm": 0.8782191872596741, + "learning_rate": 0.00019939696141804057, + "loss": 2.7852, + "step": 796 + }, + { + "epoch": 0.06432087805665403, + "grad_norm": 0.9064851403236389, + "learning_rate": 0.00019939522905369276, + "loss": 2.8105, + "step": 797 + }, + { + "epoch": 0.06440158179323703, + "grad_norm": 0.9888454675674438, + "learning_rate": 0.00019939349421215603, + "loss": 2.8496, + "step": 798 + }, + { + "epoch": 0.06448228552982004, + "grad_norm": 0.8717427253723145, + "learning_rate": 0.0001993917568934736, + "loss": 2.8227, + "step": 799 + }, + { + "epoch": 0.06456298926640304, + "grad_norm": 0.922980010509491, + "learning_rate": 0.0001993900170976888, + "loss": 2.8571, + "step": 800 + }, + { + "epoch": 0.06464369300298604, + "grad_norm": 0.8311850428581238, + "learning_rate": 0.00019938827482484492, + "loss": 2.7905, + "step": 801 + }, + { + "epoch": 0.06472439673956905, + "grad_norm": 0.9274900555610657, + "learning_rate": 0.0001993865300749855, + "loss": 2.8526, + "step": 802 + }, + { + "epoch": 0.06480510047615205, + "grad_norm": 0.9072165489196777, + "learning_rate": 0.00019938478284815388, + "loss": 2.8384, + "step": 803 + }, + { + "epoch": 0.06488580421273504, + "grad_norm": 0.854099452495575, + "learning_rate": 0.0001993830331443937, + "loss": 2.8459, + "step": 804 + }, + { + "epoch": 0.06496650794931805, + "grad_norm": 0.824126660823822, + "learning_rate": 0.00019938128096374854, + "loss": 2.7845, + "step": 805 + }, + { + "epoch": 0.06504721168590105, + "grad_norm": 0.8570442795753479, + "learning_rate": 0.0001993795263062621, + "loss": 2.8446, + "step": 806 + }, + { + "epoch": 0.06512791542248406, + "grad_norm": 0.8998628854751587, + "learning_rate": 0.00019937776917197805, + "loss": 2.8604, + "step": 807 + }, + { + "epoch": 0.06520861915906706, + "grad_norm": 0.9189189076423645, + "learning_rate": 0.00019937600956094023, + "loss": 2.7866, + "step": 808 + }, + { + "epoch": 0.06528932289565006, + "grad_norm": 0.9471604824066162, + "learning_rate": 0.00019937424747319248, + "loss": 2.7619, + "step": 809 + }, + { + "epoch": 0.06537002663223307, + "grad_norm": 0.8507755994796753, + "learning_rate": 0.00019937248290877874, + "loss": 2.8259, + "step": 810 + }, + { + "epoch": 0.06545073036881607, + "grad_norm": 0.8800963759422302, + "learning_rate": 0.00019937071586774292, + "loss": 2.827, + "step": 811 + }, + { + "epoch": 0.06553143410539908, + "grad_norm": 0.8851124048233032, + "learning_rate": 0.00019936894635012915, + "loss": 2.793, + "step": 812 + }, + { + "epoch": 0.06561213784198208, + "grad_norm": 0.88127601146698, + "learning_rate": 0.00019936717435598144, + "loss": 2.8885, + "step": 813 + }, + { + "epoch": 0.06569284157856509, + "grad_norm": 0.9115073084831238, + "learning_rate": 0.000199365399885344, + "loss": 2.8278, + "step": 814 + }, + { + "epoch": 0.06577354531514809, + "grad_norm": 0.8722662925720215, + "learning_rate": 0.00019936362293826107, + "loss": 2.8125, + "step": 815 + }, + { + "epoch": 0.0658542490517311, + "grad_norm": 0.8332365155220032, + "learning_rate": 0.0001993618435147769, + "loss": 2.7682, + "step": 816 + }, + { + "epoch": 0.0659349527883141, + "grad_norm": 0.9524003863334656, + "learning_rate": 0.0001993600616149359, + "loss": 2.8166, + "step": 817 + }, + { + "epoch": 0.0660156565248971, + "grad_norm": 0.8402767181396484, + "learning_rate": 0.0001993582772387824, + "loss": 2.8192, + "step": 818 + }, + { + "epoch": 0.06609636026148011, + "grad_norm": 0.8589913249015808, + "learning_rate": 0.0001993564903863609, + "loss": 2.7785, + "step": 819 + }, + { + "epoch": 0.06617706399806311, + "grad_norm": 1.034550428390503, + "learning_rate": 0.00019935470105771598, + "loss": 2.8407, + "step": 820 + }, + { + "epoch": 0.06625776773464612, + "grad_norm": 0.856490969657898, + "learning_rate": 0.0001993529092528921, + "loss": 2.794, + "step": 821 + }, + { + "epoch": 0.06633847147122912, + "grad_norm": 0.897498369216919, + "learning_rate": 0.0001993511149719341, + "loss": 2.7959, + "step": 822 + }, + { + "epoch": 0.06641917520781213, + "grad_norm": 0.8495277166366577, + "learning_rate": 0.00019934931821488658, + "loss": 2.783, + "step": 823 + }, + { + "epoch": 0.06649987894439513, + "grad_norm": 0.8362239599227905, + "learning_rate": 0.00019934751898179436, + "loss": 2.8628, + "step": 824 + }, + { + "epoch": 0.06658058268097813, + "grad_norm": 0.8702061176300049, + "learning_rate": 0.00019934571727270225, + "loss": 2.7878, + "step": 825 + }, + { + "epoch": 0.06666128641756114, + "grad_norm": 0.8341560363769531, + "learning_rate": 0.0001993439130876552, + "loss": 2.7345, + "step": 826 + }, + { + "epoch": 0.06674199015414414, + "grad_norm": 0.880181074142456, + "learning_rate": 0.00019934210642669813, + "loss": 2.7789, + "step": 827 + }, + { + "epoch": 0.06682269389072715, + "grad_norm": 0.9088126420974731, + "learning_rate": 0.00019934029728987607, + "loss": 2.7893, + "step": 828 + }, + { + "epoch": 0.06690339762731014, + "grad_norm": 0.8087106347084045, + "learning_rate": 0.00019933848567723416, + "loss": 2.7967, + "step": 829 + }, + { + "epoch": 0.06698410136389314, + "grad_norm": 0.8970876336097717, + "learning_rate": 0.00019933667158881745, + "loss": 2.8837, + "step": 830 + }, + { + "epoch": 0.06706480510047615, + "grad_norm": 0.9344804883003235, + "learning_rate": 0.00019933485502467128, + "loss": 2.7754, + "step": 831 + }, + { + "epoch": 0.06714550883705915, + "grad_norm": 0.8119301795959473, + "learning_rate": 0.00019933303598484084, + "loss": 2.7919, + "step": 832 + }, + { + "epoch": 0.06722621257364216, + "grad_norm": 0.9370681047439575, + "learning_rate": 0.00019933121446937148, + "loss": 2.8011, + "step": 833 + }, + { + "epoch": 0.06730691631022516, + "grad_norm": 0.8358973264694214, + "learning_rate": 0.00019932939047830858, + "loss": 2.8339, + "step": 834 + }, + { + "epoch": 0.06738762004680816, + "grad_norm": 0.8565972447395325, + "learning_rate": 0.00019932756401169765, + "loss": 2.8269, + "step": 835 + }, + { + "epoch": 0.06746832378339117, + "grad_norm": 0.8405514359474182, + "learning_rate": 0.00019932573506958417, + "loss": 2.7621, + "step": 836 + }, + { + "epoch": 0.06754902751997417, + "grad_norm": 0.8217617869377136, + "learning_rate": 0.00019932390365201373, + "loss": 2.8363, + "step": 837 + }, + { + "epoch": 0.06762973125655718, + "grad_norm": 0.9121438264846802, + "learning_rate": 0.00019932206975903198, + "loss": 2.8033, + "step": 838 + }, + { + "epoch": 0.06771043499314018, + "grad_norm": 0.9113054871559143, + "learning_rate": 0.00019932023339068464, + "loss": 2.8696, + "step": 839 + }, + { + "epoch": 0.06779113872972319, + "grad_norm": 0.8638293743133545, + "learning_rate": 0.00019931839454701743, + "loss": 2.8008, + "step": 840 + }, + { + "epoch": 0.06787184246630619, + "grad_norm": 0.862932562828064, + "learning_rate": 0.0001993165532280762, + "loss": 2.8092, + "step": 841 + }, + { + "epoch": 0.0679525462028892, + "grad_norm": 0.9089607000350952, + "learning_rate": 0.00019931470943390685, + "loss": 2.8921, + "step": 842 + }, + { + "epoch": 0.0680332499394722, + "grad_norm": 0.9233555793762207, + "learning_rate": 0.00019931286316455537, + "loss": 2.9025, + "step": 843 + }, + { + "epoch": 0.0681139536760552, + "grad_norm": 0.9403017163276672, + "learning_rate": 0.0001993110144200677, + "loss": 2.7875, + "step": 844 + }, + { + "epoch": 0.06819465741263821, + "grad_norm": 0.9194290637969971, + "learning_rate": 0.00019930916320048996, + "loss": 2.8254, + "step": 845 + }, + { + "epoch": 0.06827536114922121, + "grad_norm": 0.8238688111305237, + "learning_rate": 0.00019930730950586828, + "loss": 2.82, + "step": 846 + }, + { + "epoch": 0.06835606488580422, + "grad_norm": 0.8560660481452942, + "learning_rate": 0.00019930545333624885, + "loss": 2.8516, + "step": 847 + }, + { + "epoch": 0.06843676862238722, + "grad_norm": 0.9127222895622253, + "learning_rate": 0.0001993035946916779, + "loss": 2.7674, + "step": 848 + }, + { + "epoch": 0.06851747235897022, + "grad_norm": 0.8679420948028564, + "learning_rate": 0.00019930173357220182, + "loss": 2.777, + "step": 849 + }, + { + "epoch": 0.06859817609555323, + "grad_norm": 0.9686945676803589, + "learning_rate": 0.00019929986997786699, + "loss": 2.7841, + "step": 850 + }, + { + "epoch": 0.06867887983213623, + "grad_norm": 0.8366333246231079, + "learning_rate": 0.00019929800390871977, + "loss": 2.7993, + "step": 851 + }, + { + "epoch": 0.06875958356871924, + "grad_norm": 0.8374585509300232, + "learning_rate": 0.00019929613536480675, + "loss": 2.7545, + "step": 852 + }, + { + "epoch": 0.06884028730530224, + "grad_norm": 0.9843763709068298, + "learning_rate": 0.00019929426434617451, + "loss": 2.8118, + "step": 853 + }, + { + "epoch": 0.06892099104188525, + "grad_norm": 0.8093454241752625, + "learning_rate": 0.0001992923908528696, + "loss": 2.7301, + "step": 854 + }, + { + "epoch": 0.06900169477846824, + "grad_norm": 0.8374418020248413, + "learning_rate": 0.00019929051488493877, + "loss": 2.7745, + "step": 855 + }, + { + "epoch": 0.06908239851505124, + "grad_norm": 0.869965136051178, + "learning_rate": 0.00019928863644242875, + "loss": 2.7637, + "step": 856 + }, + { + "epoch": 0.06916310225163425, + "grad_norm": 0.9280590415000916, + "learning_rate": 0.00019928675552538638, + "loss": 2.7792, + "step": 857 + }, + { + "epoch": 0.06924380598821725, + "grad_norm": 0.8624193668365479, + "learning_rate": 0.00019928487213385852, + "loss": 2.7755, + "step": 858 + }, + { + "epoch": 0.06932450972480025, + "grad_norm": 0.8379972577095032, + "learning_rate": 0.00019928298626789212, + "loss": 2.8563, + "step": 859 + }, + { + "epoch": 0.06940521346138326, + "grad_norm": 0.9272914528846741, + "learning_rate": 0.00019928109792753418, + "loss": 2.836, + "step": 860 + }, + { + "epoch": 0.06948591719796626, + "grad_norm": 0.9239040613174438, + "learning_rate": 0.00019927920711283175, + "loss": 2.7999, + "step": 861 + }, + { + "epoch": 0.06956662093454927, + "grad_norm": 0.9125113487243652, + "learning_rate": 0.00019927731382383195, + "loss": 2.8494, + "step": 862 + }, + { + "epoch": 0.06964732467113227, + "grad_norm": 0.8782855868339539, + "learning_rate": 0.00019927541806058198, + "loss": 2.767, + "step": 863 + }, + { + "epoch": 0.06972802840771528, + "grad_norm": 0.8815447092056274, + "learning_rate": 0.00019927351982312907, + "loss": 2.7877, + "step": 864 + }, + { + "epoch": 0.06980873214429828, + "grad_norm": 0.8555476069450378, + "learning_rate": 0.00019927161911152056, + "loss": 2.8057, + "step": 865 + }, + { + "epoch": 0.06988943588088128, + "grad_norm": 0.8562924265861511, + "learning_rate": 0.00019926971592580382, + "loss": 2.8049, + "step": 866 + }, + { + "epoch": 0.06997013961746429, + "grad_norm": 0.846503734588623, + "learning_rate": 0.00019926781026602625, + "loss": 2.8545, + "step": 867 + }, + { + "epoch": 0.07005084335404729, + "grad_norm": 0.8439623713493347, + "learning_rate": 0.00019926590213223535, + "loss": 2.7451, + "step": 868 + }, + { + "epoch": 0.0701315470906303, + "grad_norm": 0.8471730351448059, + "learning_rate": 0.00019926399152447868, + "loss": 2.7879, + "step": 869 + }, + { + "epoch": 0.0702122508272133, + "grad_norm": 0.8721400499343872, + "learning_rate": 0.00019926207844280387, + "loss": 2.8594, + "step": 870 + }, + { + "epoch": 0.0702929545637963, + "grad_norm": 0.8110925555229187, + "learning_rate": 0.0001992601628872586, + "loss": 2.7789, + "step": 871 + }, + { + "epoch": 0.07037365830037931, + "grad_norm": 0.9593119025230408, + "learning_rate": 0.0001992582448578906, + "loss": 2.8792, + "step": 872 + }, + { + "epoch": 0.07045436203696231, + "grad_norm": 0.8553354144096375, + "learning_rate": 0.00019925632435474765, + "loss": 2.8056, + "step": 873 + }, + { + "epoch": 0.07053506577354532, + "grad_norm": 0.8062612414360046, + "learning_rate": 0.00019925440137787768, + "loss": 2.7762, + "step": 874 + }, + { + "epoch": 0.07061576951012832, + "grad_norm": 0.8264921307563782, + "learning_rate": 0.00019925247592732858, + "loss": 2.8435, + "step": 875 + }, + { + "epoch": 0.07069647324671133, + "grad_norm": 0.7770401835441589, + "learning_rate": 0.00019925054800314828, + "loss": 2.7846, + "step": 876 + }, + { + "epoch": 0.07077717698329433, + "grad_norm": 0.8426765203475952, + "learning_rate": 0.0001992486176053849, + "loss": 2.782, + "step": 877 + }, + { + "epoch": 0.07085788071987734, + "grad_norm": 0.855330228805542, + "learning_rate": 0.00019924668473408655, + "loss": 2.8051, + "step": 878 + }, + { + "epoch": 0.07093858445646034, + "grad_norm": 0.8762049674987793, + "learning_rate": 0.00019924474938930135, + "loss": 2.7634, + "step": 879 + }, + { + "epoch": 0.07101928819304333, + "grad_norm": 0.9226812124252319, + "learning_rate": 0.0001992428115710776, + "loss": 2.8342, + "step": 880 + }, + { + "epoch": 0.07109999192962634, + "grad_norm": 0.9031660556793213, + "learning_rate": 0.00019924087127946353, + "loss": 2.7953, + "step": 881 + }, + { + "epoch": 0.07118069566620934, + "grad_norm": 1.0151792764663696, + "learning_rate": 0.00019923892851450757, + "loss": 2.8225, + "step": 882 + }, + { + "epoch": 0.07126139940279234, + "grad_norm": 0.9805678725242615, + "learning_rate": 0.00019923698327625806, + "loss": 2.7727, + "step": 883 + }, + { + "epoch": 0.07134210313937535, + "grad_norm": 0.8831729888916016, + "learning_rate": 0.00019923503556476356, + "loss": 2.7682, + "step": 884 + }, + { + "epoch": 0.07142280687595835, + "grad_norm": 1.0311404466629028, + "learning_rate": 0.00019923308538007253, + "loss": 2.8422, + "step": 885 + }, + { + "epoch": 0.07150351061254136, + "grad_norm": 0.8143388628959656, + "learning_rate": 0.0001992311327222336, + "loss": 2.7876, + "step": 886 + }, + { + "epoch": 0.07158421434912436, + "grad_norm": 0.877017617225647, + "learning_rate": 0.00019922917759129552, + "loss": 2.7486, + "step": 887 + }, + { + "epoch": 0.07166491808570737, + "grad_norm": 0.930646538734436, + "learning_rate": 0.0001992272199873069, + "loss": 2.8022, + "step": 888 + }, + { + "epoch": 0.07174562182229037, + "grad_norm": 0.934753954410553, + "learning_rate": 0.00019922525991031655, + "loss": 2.8485, + "step": 889 + }, + { + "epoch": 0.07182632555887337, + "grad_norm": 0.9564220905303955, + "learning_rate": 0.00019922329736037339, + "loss": 2.761, + "step": 890 + }, + { + "epoch": 0.07190702929545638, + "grad_norm": 0.9457311630249023, + "learning_rate": 0.00019922133233752626, + "loss": 2.8279, + "step": 891 + }, + { + "epoch": 0.07198773303203938, + "grad_norm": 0.9385658502578735, + "learning_rate": 0.0001992193648418242, + "loss": 2.8222, + "step": 892 + }, + { + "epoch": 0.07206843676862239, + "grad_norm": 1.0157524347305298, + "learning_rate": 0.00019921739487331616, + "loss": 2.9166, + "step": 893 + }, + { + "epoch": 0.07214914050520539, + "grad_norm": 0.9143860340118408, + "learning_rate": 0.00019921542243205132, + "loss": 2.8139, + "step": 894 + }, + { + "epoch": 0.0722298442417884, + "grad_norm": 0.8769320249557495, + "learning_rate": 0.00019921344751807878, + "loss": 2.8023, + "step": 895 + }, + { + "epoch": 0.0723105479783714, + "grad_norm": 0.9647517204284668, + "learning_rate": 0.0001992114701314478, + "loss": 2.8872, + "step": 896 + }, + { + "epoch": 0.0723912517149544, + "grad_norm": 1.025978446006775, + "learning_rate": 0.00019920949027220762, + "loss": 2.837, + "step": 897 + }, + { + "epoch": 0.07247195545153741, + "grad_norm": 0.8848521113395691, + "learning_rate": 0.0001992075079404076, + "loss": 2.7498, + "step": 898 + }, + { + "epoch": 0.07255265918812041, + "grad_norm": 0.9395595788955688, + "learning_rate": 0.0001992055231360972, + "loss": 2.8752, + "step": 899 + }, + { + "epoch": 0.07263336292470342, + "grad_norm": 0.8711572885513306, + "learning_rate": 0.00019920353585932578, + "loss": 2.8608, + "step": 900 + }, + { + "epoch": 0.07271406666128642, + "grad_norm": 0.8606846332550049, + "learning_rate": 0.00019920154611014295, + "loss": 2.829, + "step": 901 + }, + { + "epoch": 0.07279477039786943, + "grad_norm": 0.859354555606842, + "learning_rate": 0.0001991995538885983, + "loss": 2.8102, + "step": 902 + }, + { + "epoch": 0.07287547413445243, + "grad_norm": 0.9063243865966797, + "learning_rate": 0.00019919755919474143, + "loss": 2.8509, + "step": 903 + }, + { + "epoch": 0.07295617787103544, + "grad_norm": 0.8321940898895264, + "learning_rate": 0.00019919556202862207, + "loss": 2.796, + "step": 904 + }, + { + "epoch": 0.07303688160761844, + "grad_norm": 0.8875191807746887, + "learning_rate": 0.00019919356239029003, + "loss": 2.8672, + "step": 905 + }, + { + "epoch": 0.07311758534420143, + "grad_norm": 0.9028071165084839, + "learning_rate": 0.0001991915602797951, + "loss": 2.8926, + "step": 906 + }, + { + "epoch": 0.07319828908078443, + "grad_norm": 0.9449291825294495, + "learning_rate": 0.0001991895556971872, + "loss": 2.8159, + "step": 907 + }, + { + "epoch": 0.07327899281736744, + "grad_norm": 0.871576189994812, + "learning_rate": 0.0001991875486425163, + "loss": 2.8162, + "step": 908 + }, + { + "epoch": 0.07335969655395044, + "grad_norm": 0.818423330783844, + "learning_rate": 0.0001991855391158324, + "loss": 2.8882, + "step": 909 + }, + { + "epoch": 0.07344040029053345, + "grad_norm": 0.8802343606948853, + "learning_rate": 0.0001991835271171856, + "loss": 2.8245, + "step": 910 + }, + { + "epoch": 0.07352110402711645, + "grad_norm": 0.916023313999176, + "learning_rate": 0.000199181512646626, + "loss": 2.8966, + "step": 911 + }, + { + "epoch": 0.07360180776369946, + "grad_norm": 1.0663317441940308, + "learning_rate": 0.0001991794957042039, + "loss": 2.7736, + "step": 912 + }, + { + "epoch": 0.07368251150028246, + "grad_norm": 0.9212445616722107, + "learning_rate": 0.00019917747628996947, + "loss": 2.7924, + "step": 913 + }, + { + "epoch": 0.07376321523686546, + "grad_norm": 0.9785256385803223, + "learning_rate": 0.00019917545440397308, + "loss": 2.8021, + "step": 914 + }, + { + "epoch": 0.07384391897344847, + "grad_norm": 0.8510444760322571, + "learning_rate": 0.00019917343004626514, + "loss": 2.7991, + "step": 915 + }, + { + "epoch": 0.07392462271003147, + "grad_norm": 0.8967106342315674, + "learning_rate": 0.0001991714032168961, + "loss": 2.8838, + "step": 916 + }, + { + "epoch": 0.07400532644661448, + "grad_norm": 0.8940563797950745, + "learning_rate": 0.0001991693739159164, + "loss": 2.8124, + "step": 917 + }, + { + "epoch": 0.07408603018319748, + "grad_norm": 0.9270479679107666, + "learning_rate": 0.0001991673421433767, + "loss": 2.7627, + "step": 918 + }, + { + "epoch": 0.07416673391978049, + "grad_norm": 0.905805230140686, + "learning_rate": 0.0001991653078993276, + "loss": 2.781, + "step": 919 + }, + { + "epoch": 0.07424743765636349, + "grad_norm": 0.9295129179954529, + "learning_rate": 0.00019916327118381982, + "loss": 2.8332, + "step": 920 + }, + { + "epoch": 0.0743281413929465, + "grad_norm": 0.863331139087677, + "learning_rate": 0.00019916123199690408, + "loss": 2.8489, + "step": 921 + }, + { + "epoch": 0.0744088451295295, + "grad_norm": 0.9966896772384644, + "learning_rate": 0.00019915919033863127, + "loss": 2.9107, + "step": 922 + }, + { + "epoch": 0.0744895488661125, + "grad_norm": 0.8921390771865845, + "learning_rate": 0.00019915714620905218, + "loss": 2.7668, + "step": 923 + }, + { + "epoch": 0.07457025260269551, + "grad_norm": 0.9378434419631958, + "learning_rate": 0.00019915509960821782, + "loss": 2.8305, + "step": 924 + }, + { + "epoch": 0.07465095633927851, + "grad_norm": 1.0351817607879639, + "learning_rate": 0.0001991530505361792, + "loss": 2.9412, + "step": 925 + }, + { + "epoch": 0.07473166007586152, + "grad_norm": 0.7995476722717285, + "learning_rate": 0.0001991509989929874, + "loss": 2.7872, + "step": 926 + }, + { + "epoch": 0.07481236381244452, + "grad_norm": 0.858830988407135, + "learning_rate": 0.0001991489449786935, + "loss": 2.7775, + "step": 927 + }, + { + "epoch": 0.07489306754902753, + "grad_norm": 1.1254682540893555, + "learning_rate": 0.00019914688849334867, + "loss": 2.7913, + "step": 928 + }, + { + "epoch": 0.07497377128561053, + "grad_norm": 0.9475330710411072, + "learning_rate": 0.00019914482953700428, + "loss": 2.7945, + "step": 929 + }, + { + "epoch": 0.07505447502219353, + "grad_norm": 0.8427290916442871, + "learning_rate": 0.00019914276810971152, + "loss": 2.8297, + "step": 930 + }, + { + "epoch": 0.07513517875877652, + "grad_norm": 0.9308956265449524, + "learning_rate": 0.00019914070421152183, + "loss": 2.8534, + "step": 931 + }, + { + "epoch": 0.07521588249535953, + "grad_norm": 0.9264787435531616, + "learning_rate": 0.00019913863784248664, + "loss": 2.7959, + "step": 932 + }, + { + "epoch": 0.07529658623194253, + "grad_norm": 0.8432087302207947, + "learning_rate": 0.00019913656900265742, + "loss": 2.8479, + "step": 933 + }, + { + "epoch": 0.07537728996852554, + "grad_norm": 0.8237274885177612, + "learning_rate": 0.0001991344976920858, + "loss": 2.782, + "step": 934 + }, + { + "epoch": 0.07545799370510854, + "grad_norm": 0.8143243789672852, + "learning_rate": 0.0001991324239108233, + "loss": 2.7567, + "step": 935 + }, + { + "epoch": 0.07553869744169155, + "grad_norm": 0.8824434280395508, + "learning_rate": 0.0001991303476589217, + "loss": 2.7971, + "step": 936 + }, + { + "epoch": 0.07561940117827455, + "grad_norm": 0.8202407360076904, + "learning_rate": 0.00019912826893643272, + "loss": 2.7825, + "step": 937 + }, + { + "epoch": 0.07570010491485755, + "grad_norm": 0.8001337647438049, + "learning_rate": 0.00019912618774340813, + "loss": 2.8294, + "step": 938 + }, + { + "epoch": 0.07578080865144056, + "grad_norm": 0.8875572085380554, + "learning_rate": 0.00019912410407989982, + "loss": 2.8013, + "step": 939 + }, + { + "epoch": 0.07586151238802356, + "grad_norm": 0.8676280379295349, + "learning_rate": 0.0001991220179459597, + "loss": 2.767, + "step": 940 + }, + { + "epoch": 0.07594221612460657, + "grad_norm": 0.9767136573791504, + "learning_rate": 0.00019911992934163982, + "loss": 2.8315, + "step": 941 + }, + { + "epoch": 0.07602291986118957, + "grad_norm": 0.8690733909606934, + "learning_rate": 0.0001991178382669922, + "loss": 2.8042, + "step": 942 + }, + { + "epoch": 0.07610362359777258, + "grad_norm": 0.862978458404541, + "learning_rate": 0.00019911574472206893, + "loss": 2.8243, + "step": 943 + }, + { + "epoch": 0.07618432733435558, + "grad_norm": 0.9116127490997314, + "learning_rate": 0.00019911364870692225, + "loss": 2.7377, + "step": 944 + }, + { + "epoch": 0.07626503107093859, + "grad_norm": 0.8765420317649841, + "learning_rate": 0.00019911155022160433, + "loss": 2.7673, + "step": 945 + }, + { + "epoch": 0.07634573480752159, + "grad_norm": 0.8229342699050903, + "learning_rate": 0.0001991094492661675, + "loss": 2.7749, + "step": 946 + }, + { + "epoch": 0.0764264385441046, + "grad_norm": 0.8340098261833191, + "learning_rate": 0.00019910734584066412, + "loss": 2.7871, + "step": 947 + }, + { + "epoch": 0.0765071422806876, + "grad_norm": 0.8116940259933472, + "learning_rate": 0.0001991052399451466, + "loss": 2.8202, + "step": 948 + }, + { + "epoch": 0.0765878460172706, + "grad_norm": 0.8730412721633911, + "learning_rate": 0.00019910313157966747, + "loss": 2.8661, + "step": 949 + }, + { + "epoch": 0.07666854975385361, + "grad_norm": 0.8272213339805603, + "learning_rate": 0.0001991010207442792, + "loss": 2.8352, + "step": 950 + }, + { + "epoch": 0.07674925349043661, + "grad_norm": 0.8586944937705994, + "learning_rate": 0.0001990989074390345, + "loss": 2.8018, + "step": 951 + }, + { + "epoch": 0.07682995722701962, + "grad_norm": 0.81830894947052, + "learning_rate": 0.00019909679166398592, + "loss": 2.8154, + "step": 952 + }, + { + "epoch": 0.07691066096360262, + "grad_norm": 0.8158484101295471, + "learning_rate": 0.00019909467341918627, + "loss": 2.7618, + "step": 953 + }, + { + "epoch": 0.07699136470018562, + "grad_norm": 0.816834032535553, + "learning_rate": 0.00019909255270468833, + "loss": 2.8125, + "step": 954 + }, + { + "epoch": 0.07707206843676863, + "grad_norm": 0.944790780544281, + "learning_rate": 0.00019909042952054496, + "loss": 2.8054, + "step": 955 + }, + { + "epoch": 0.07715277217335163, + "grad_norm": 0.9281302690505981, + "learning_rate": 0.00019908830386680904, + "loss": 2.8724, + "step": 956 + }, + { + "epoch": 0.07723347590993462, + "grad_norm": 0.8850300908088684, + "learning_rate": 0.00019908617574353356, + "loss": 2.7906, + "step": 957 + }, + { + "epoch": 0.07731417964651763, + "grad_norm": 0.8997938632965088, + "learning_rate": 0.00019908404515077158, + "loss": 2.7814, + "step": 958 + }, + { + "epoch": 0.07739488338310063, + "grad_norm": 0.8814194798469543, + "learning_rate": 0.0001990819120885762, + "loss": 2.7423, + "step": 959 + }, + { + "epoch": 0.07747558711968364, + "grad_norm": 0.8759928345680237, + "learning_rate": 0.00019907977655700054, + "loss": 2.7803, + "step": 960 + }, + { + "epoch": 0.07755629085626664, + "grad_norm": 0.8439476490020752, + "learning_rate": 0.00019907763855609787, + "loss": 2.8277, + "step": 961 + }, + { + "epoch": 0.07763699459284965, + "grad_norm": 0.8745121955871582, + "learning_rate": 0.00019907549808592144, + "loss": 2.8152, + "step": 962 + }, + { + "epoch": 0.07771769832943265, + "grad_norm": 1.0439598560333252, + "learning_rate": 0.00019907335514652465, + "loss": 2.7882, + "step": 963 + }, + { + "epoch": 0.07779840206601565, + "grad_norm": 0.9516503810882568, + "learning_rate": 0.00019907120973796082, + "loss": 2.8555, + "step": 964 + }, + { + "epoch": 0.07787910580259866, + "grad_norm": 0.928717315196991, + "learning_rate": 0.0001990690618602835, + "loss": 2.8214, + "step": 965 + }, + { + "epoch": 0.07795980953918166, + "grad_norm": 0.7923071384429932, + "learning_rate": 0.00019906691151354617, + "loss": 2.8153, + "step": 966 + }, + { + "epoch": 0.07804051327576467, + "grad_norm": 0.8783324956893921, + "learning_rate": 0.00019906475869780246, + "loss": 2.7691, + "step": 967 + }, + { + "epoch": 0.07812121701234767, + "grad_norm": 0.8974801301956177, + "learning_rate": 0.000199062603413106, + "loss": 2.8156, + "step": 968 + }, + { + "epoch": 0.07820192074893068, + "grad_norm": 0.9304391741752625, + "learning_rate": 0.00019906044565951052, + "loss": 2.8489, + "step": 969 + }, + { + "epoch": 0.07828262448551368, + "grad_norm": 0.8351098895072937, + "learning_rate": 0.00019905828543706976, + "loss": 2.7744, + "step": 970 + }, + { + "epoch": 0.07836332822209668, + "grad_norm": 0.8634265065193176, + "learning_rate": 0.0001990561227458376, + "loss": 2.8193, + "step": 971 + }, + { + "epoch": 0.07844403195867969, + "grad_norm": 0.8969653248786926, + "learning_rate": 0.00019905395758586792, + "loss": 2.7548, + "step": 972 + }, + { + "epoch": 0.07852473569526269, + "grad_norm": 0.8964852094650269, + "learning_rate": 0.0001990517899572147, + "loss": 2.8037, + "step": 973 + }, + { + "epoch": 0.0786054394318457, + "grad_norm": 0.8567596077919006, + "learning_rate": 0.00019904961985993196, + "loss": 2.7942, + "step": 974 + }, + { + "epoch": 0.0786861431684287, + "grad_norm": 0.8275273442268372, + "learning_rate": 0.00019904744729407374, + "loss": 2.8359, + "step": 975 + }, + { + "epoch": 0.0787668469050117, + "grad_norm": 0.9458810091018677, + "learning_rate": 0.00019904527225969424, + "loss": 2.8354, + "step": 976 + }, + { + "epoch": 0.07884755064159471, + "grad_norm": 0.8690593838691711, + "learning_rate": 0.00019904309475684767, + "loss": 2.7894, + "step": 977 + }, + { + "epoch": 0.07892825437817771, + "grad_norm": 0.810279130935669, + "learning_rate": 0.00019904091478558823, + "loss": 2.7939, + "step": 978 + }, + { + "epoch": 0.07900895811476072, + "grad_norm": 0.8779012560844421, + "learning_rate": 0.0001990387323459703, + "loss": 2.7551, + "step": 979 + }, + { + "epoch": 0.07908966185134372, + "grad_norm": 0.7936381101608276, + "learning_rate": 0.00019903654743804833, + "loss": 2.814, + "step": 980 + }, + { + "epoch": 0.07917036558792673, + "grad_norm": 0.9567989110946655, + "learning_rate": 0.00019903436006187667, + "loss": 2.7715, + "step": 981 + }, + { + "epoch": 0.07925106932450972, + "grad_norm": 0.9250255823135376, + "learning_rate": 0.00019903217021750987, + "loss": 2.8967, + "step": 982 + }, + { + "epoch": 0.07933177306109272, + "grad_norm": 0.8342804312705994, + "learning_rate": 0.00019902997790500256, + "loss": 2.7728, + "step": 983 + }, + { + "epoch": 0.07941247679767573, + "grad_norm": 0.8321473598480225, + "learning_rate": 0.00019902778312440932, + "loss": 2.8479, + "step": 984 + }, + { + "epoch": 0.07949318053425873, + "grad_norm": 0.894727885723114, + "learning_rate": 0.00019902558587578484, + "loss": 2.8211, + "step": 985 + }, + { + "epoch": 0.07957388427084174, + "grad_norm": 0.8093457221984863, + "learning_rate": 0.0001990233861591839, + "loss": 2.7481, + "step": 986 + }, + { + "epoch": 0.07965458800742474, + "grad_norm": 0.8626284599304199, + "learning_rate": 0.00019902118397466132, + "loss": 2.8368, + "step": 987 + }, + { + "epoch": 0.07973529174400774, + "grad_norm": 0.799648642539978, + "learning_rate": 0.00019901897932227204, + "loss": 2.8713, + "step": 988 + }, + { + "epoch": 0.07981599548059075, + "grad_norm": 0.9658265709877014, + "learning_rate": 0.00019901677220207092, + "loss": 2.7284, + "step": 989 + }, + { + "epoch": 0.07989669921717375, + "grad_norm": 0.877299427986145, + "learning_rate": 0.00019901456261411303, + "loss": 2.7916, + "step": 990 + }, + { + "epoch": 0.07997740295375676, + "grad_norm": 0.926450252532959, + "learning_rate": 0.00019901235055845337, + "loss": 2.8207, + "step": 991 + }, + { + "epoch": 0.08005810669033976, + "grad_norm": 0.8858455419540405, + "learning_rate": 0.00019901013603514716, + "loss": 2.795, + "step": 992 + }, + { + "epoch": 0.08013881042692277, + "grad_norm": 0.8619922995567322, + "learning_rate": 0.0001990079190442495, + "loss": 2.8163, + "step": 993 + }, + { + "epoch": 0.08021951416350577, + "grad_norm": 0.859200656414032, + "learning_rate": 0.00019900569958581572, + "loss": 2.7715, + "step": 994 + }, + { + "epoch": 0.08030021790008877, + "grad_norm": 0.8346282839775085, + "learning_rate": 0.0001990034776599011, + "loss": 2.8312, + "step": 995 + }, + { + "epoch": 0.08038092163667178, + "grad_norm": 0.9188725352287292, + "learning_rate": 0.00019900125326656102, + "loss": 2.799, + "step": 996 + }, + { + "epoch": 0.08046162537325478, + "grad_norm": 0.8548648953437805, + "learning_rate": 0.00019899902640585092, + "loss": 2.7778, + "step": 997 + }, + { + "epoch": 0.08054232910983779, + "grad_norm": 0.8883183002471924, + "learning_rate": 0.00019899679707782624, + "loss": 2.809, + "step": 998 + }, + { + "epoch": 0.08062303284642079, + "grad_norm": 0.8915852308273315, + "learning_rate": 0.00019899456528254267, + "loss": 2.8309, + "step": 999 + }, + { + "epoch": 0.0807037365830038, + "grad_norm": 0.8092094659805298, + "learning_rate": 0.00019899233102005573, + "loss": 2.7753, + "step": 1000 + }, + { + "epoch": 0.0807037365830038, + "eval_loss": 2.7104671001434326, + "eval_runtime": 773.7354, + "eval_samples_per_second": 3.386, + "eval_steps_per_second": 0.565, + "step": 1000 + }, + { + "epoch": 0.0807844403195868, + "grad_norm": 0.8744900226593018, + "learning_rate": 0.00019899009429042114, + "loss": 2.7948, + "step": 1001 + }, + { + "epoch": 0.0808651440561698, + "grad_norm": 0.8749974370002747, + "learning_rate": 0.0001989878550936946, + "loss": 2.7609, + "step": 1002 + }, + { + "epoch": 0.08094584779275281, + "grad_norm": 0.8622820377349854, + "learning_rate": 0.000198985613429932, + "loss": 2.8023, + "step": 1003 + }, + { + "epoch": 0.08102655152933581, + "grad_norm": 0.9404367208480835, + "learning_rate": 0.00019898336929918915, + "loss": 2.7992, + "step": 1004 + }, + { + "epoch": 0.08110725526591882, + "grad_norm": 0.8846708536148071, + "learning_rate": 0.000198981122701522, + "loss": 2.8084, + "step": 1005 + }, + { + "epoch": 0.08118795900250182, + "grad_norm": 0.8105908036231995, + "learning_rate": 0.0001989788736369865, + "loss": 2.8504, + "step": 1006 + }, + { + "epoch": 0.08126866273908483, + "grad_norm": 1.0107187032699585, + "learning_rate": 0.0001989766221056388, + "loss": 2.7935, + "step": 1007 + }, + { + "epoch": 0.08134936647566782, + "grad_norm": 0.7825451493263245, + "learning_rate": 0.0001989743681075349, + "loss": 2.8024, + "step": 1008 + }, + { + "epoch": 0.08143007021225082, + "grad_norm": 0.8478613495826721, + "learning_rate": 0.000198972111642731, + "loss": 2.8645, + "step": 1009 + }, + { + "epoch": 0.08151077394883383, + "grad_norm": 0.8432144522666931, + "learning_rate": 0.0001989698527112834, + "loss": 2.8469, + "step": 1010 + }, + { + "epoch": 0.08159147768541683, + "grad_norm": 0.8147936463356018, + "learning_rate": 0.00019896759131324835, + "loss": 2.7799, + "step": 1011 + }, + { + "epoch": 0.08167218142199983, + "grad_norm": 0.8446993827819824, + "learning_rate": 0.00019896532744868224, + "loss": 2.7685, + "step": 1012 + }, + { + "epoch": 0.08175288515858284, + "grad_norm": 0.7635807394981384, + "learning_rate": 0.00019896306111764146, + "loss": 2.7823, + "step": 1013 + }, + { + "epoch": 0.08183358889516584, + "grad_norm": 0.8272855877876282, + "learning_rate": 0.00019896079232018253, + "loss": 2.7877, + "step": 1014 + }, + { + "epoch": 0.08191429263174885, + "grad_norm": 0.8079700469970703, + "learning_rate": 0.00019895852105636193, + "loss": 2.7849, + "step": 1015 + }, + { + "epoch": 0.08199499636833185, + "grad_norm": 0.8518063426017761, + "learning_rate": 0.0001989562473262363, + "loss": 2.8622, + "step": 1016 + }, + { + "epoch": 0.08207570010491486, + "grad_norm": 0.8646622896194458, + "learning_rate": 0.00019895397112986235, + "loss": 2.8224, + "step": 1017 + }, + { + "epoch": 0.08215640384149786, + "grad_norm": 0.8764398097991943, + "learning_rate": 0.00019895169246729672, + "loss": 2.938, + "step": 1018 + }, + { + "epoch": 0.08223710757808086, + "grad_norm": 0.8304057717323303, + "learning_rate": 0.0001989494113385963, + "loss": 2.7586, + "step": 1019 + }, + { + "epoch": 0.08231781131466387, + "grad_norm": 0.8569272756576538, + "learning_rate": 0.00019894712774381787, + "loss": 2.7803, + "step": 1020 + }, + { + "epoch": 0.08239851505124687, + "grad_norm": 0.8788578510284424, + "learning_rate": 0.00019894484168301836, + "loss": 2.8138, + "step": 1021 + }, + { + "epoch": 0.08247921878782988, + "grad_norm": 0.9113569855690002, + "learning_rate": 0.0001989425531562548, + "loss": 2.8023, + "step": 1022 + }, + { + "epoch": 0.08255992252441288, + "grad_norm": 0.8630590438842773, + "learning_rate": 0.00019894026216358413, + "loss": 2.791, + "step": 1023 + }, + { + "epoch": 0.08264062626099589, + "grad_norm": 0.8691157698631287, + "learning_rate": 0.00019893796870506348, + "loss": 2.811, + "step": 1024 + }, + { + "epoch": 0.08272132999757889, + "grad_norm": 0.9078284502029419, + "learning_rate": 0.00019893567278075007, + "loss": 2.8282, + "step": 1025 + }, + { + "epoch": 0.0828020337341619, + "grad_norm": 0.867511510848999, + "learning_rate": 0.00019893337439070105, + "loss": 2.7862, + "step": 1026 + }, + { + "epoch": 0.0828827374707449, + "grad_norm": 0.8016698360443115, + "learning_rate": 0.00019893107353497372, + "loss": 2.8083, + "step": 1027 + }, + { + "epoch": 0.0829634412073279, + "grad_norm": 0.8583545684814453, + "learning_rate": 0.00019892877021362543, + "loss": 2.8041, + "step": 1028 + }, + { + "epoch": 0.08304414494391091, + "grad_norm": 0.8302493691444397, + "learning_rate": 0.0001989264644267136, + "loss": 2.7866, + "step": 1029 + }, + { + "epoch": 0.08312484868049391, + "grad_norm": 0.9628411531448364, + "learning_rate": 0.00019892415617429567, + "loss": 2.8187, + "step": 1030 + }, + { + "epoch": 0.08320555241707692, + "grad_norm": 0.874840259552002, + "learning_rate": 0.0001989218454564292, + "loss": 2.7475, + "step": 1031 + }, + { + "epoch": 0.08328625615365992, + "grad_norm": 0.8641294836997986, + "learning_rate": 0.0001989195322731717, + "loss": 2.7795, + "step": 1032 + }, + { + "epoch": 0.08336695989024291, + "grad_norm": 0.8219757080078125, + "learning_rate": 0.0001989172166245809, + "loss": 2.7683, + "step": 1033 + }, + { + "epoch": 0.08344766362682592, + "grad_norm": 0.7905694246292114, + "learning_rate": 0.00019891489851071455, + "loss": 2.7668, + "step": 1034 + }, + { + "epoch": 0.08352836736340892, + "grad_norm": 0.8180816173553467, + "learning_rate": 0.0001989125779316303, + "loss": 2.7661, + "step": 1035 + }, + { + "epoch": 0.08360907109999192, + "grad_norm": 0.8337293267250061, + "learning_rate": 0.00019891025488738605, + "loss": 2.7823, + "step": 1036 + }, + { + "epoch": 0.08368977483657493, + "grad_norm": 0.9673140048980713, + "learning_rate": 0.00019890792937803973, + "loss": 2.8164, + "step": 1037 + }, + { + "epoch": 0.08377047857315793, + "grad_norm": 0.8810501098632812, + "learning_rate": 0.00019890560140364922, + "loss": 2.7904, + "step": 1038 + }, + { + "epoch": 0.08385118230974094, + "grad_norm": 0.9507614374160767, + "learning_rate": 0.0001989032709642726, + "loss": 2.7928, + "step": 1039 + }, + { + "epoch": 0.08393188604632394, + "grad_norm": 0.953738808631897, + "learning_rate": 0.00019890093805996793, + "loss": 2.7922, + "step": 1040 + }, + { + "epoch": 0.08401258978290695, + "grad_norm": 0.8079931139945984, + "learning_rate": 0.00019889860269079336, + "loss": 2.7909, + "step": 1041 + }, + { + "epoch": 0.08409329351948995, + "grad_norm": 1.0330647230148315, + "learning_rate": 0.0001988962648568071, + "loss": 2.7526, + "step": 1042 + }, + { + "epoch": 0.08417399725607295, + "grad_norm": 0.8988988399505615, + "learning_rate": 0.00019889392455806738, + "loss": 2.7471, + "step": 1043 + }, + { + "epoch": 0.08425470099265596, + "grad_norm": 0.7986348271369934, + "learning_rate": 0.00019889158179463255, + "loss": 2.7208, + "step": 1044 + }, + { + "epoch": 0.08433540472923896, + "grad_norm": 0.9231631755828857, + "learning_rate": 0.000198889236566561, + "loss": 2.7953, + "step": 1045 + }, + { + "epoch": 0.08441610846582197, + "grad_norm": 0.8438155055046082, + "learning_rate": 0.00019888688887391117, + "loss": 2.8006, + "step": 1046 + }, + { + "epoch": 0.08449681220240497, + "grad_norm": 0.8915219306945801, + "learning_rate": 0.0001988845387167416, + "loss": 2.8184, + "step": 1047 + }, + { + "epoch": 0.08457751593898798, + "grad_norm": 0.924401581287384, + "learning_rate": 0.0001988821860951108, + "loss": 2.8411, + "step": 1048 + }, + { + "epoch": 0.08465821967557098, + "grad_norm": 0.8144630193710327, + "learning_rate": 0.00019887983100907745, + "loss": 2.8258, + "step": 1049 + }, + { + "epoch": 0.08473892341215399, + "grad_norm": 0.9974459409713745, + "learning_rate": 0.00019887747345870028, + "loss": 2.7567, + "step": 1050 + }, + { + "epoch": 0.08481962714873699, + "grad_norm": 0.944526195526123, + "learning_rate": 0.00019887511344403796, + "loss": 2.8657, + "step": 1051 + }, + { + "epoch": 0.08490033088532, + "grad_norm": 0.8204831480979919, + "learning_rate": 0.00019887275096514936, + "loss": 2.8054, + "step": 1052 + }, + { + "epoch": 0.084981034621903, + "grad_norm": 0.8855900168418884, + "learning_rate": 0.00019887038602209336, + "loss": 2.8019, + "step": 1053 + }, + { + "epoch": 0.085061738358486, + "grad_norm": 0.9025108814239502, + "learning_rate": 0.0001988680186149289, + "loss": 2.7934, + "step": 1054 + }, + { + "epoch": 0.08514244209506901, + "grad_norm": 0.8486441373825073, + "learning_rate": 0.00019886564874371494, + "loss": 2.809, + "step": 1055 + }, + { + "epoch": 0.08522314583165201, + "grad_norm": 0.778364896774292, + "learning_rate": 0.00019886327640851058, + "loss": 2.7783, + "step": 1056 + }, + { + "epoch": 0.08530384956823502, + "grad_norm": 0.8515299558639526, + "learning_rate": 0.00019886090160937497, + "loss": 2.8122, + "step": 1057 + }, + { + "epoch": 0.08538455330481802, + "grad_norm": 0.8466131091117859, + "learning_rate": 0.00019885852434636724, + "loss": 2.7798, + "step": 1058 + }, + { + "epoch": 0.08546525704140101, + "grad_norm": 0.8856541514396667, + "learning_rate": 0.00019885614461954667, + "loss": 2.8033, + "step": 1059 + }, + { + "epoch": 0.08554596077798401, + "grad_norm": 0.8853924870491028, + "learning_rate": 0.00019885376242897258, + "loss": 2.8368, + "step": 1060 + }, + { + "epoch": 0.08562666451456702, + "grad_norm": 0.7858660221099854, + "learning_rate": 0.0001988513777747043, + "loss": 2.7806, + "step": 1061 + }, + { + "epoch": 0.08570736825115002, + "grad_norm": 0.8601513504981995, + "learning_rate": 0.0001988489906568013, + "loss": 2.8434, + "step": 1062 + }, + { + "epoch": 0.08578807198773303, + "grad_norm": 0.9126001596450806, + "learning_rate": 0.00019884660107532306, + "loss": 2.8469, + "step": 1063 + }, + { + "epoch": 0.08586877572431603, + "grad_norm": 0.9016061425209045, + "learning_rate": 0.00019884420903032912, + "loss": 2.7907, + "step": 1064 + }, + { + "epoch": 0.08594947946089904, + "grad_norm": 0.9134494066238403, + "learning_rate": 0.00019884181452187915, + "loss": 2.8426, + "step": 1065 + }, + { + "epoch": 0.08603018319748204, + "grad_norm": 0.8891138434410095, + "learning_rate": 0.00019883941755003272, + "loss": 2.8092, + "step": 1066 + }, + { + "epoch": 0.08611088693406505, + "grad_norm": 0.822884202003479, + "learning_rate": 0.0001988370181148497, + "loss": 2.8454, + "step": 1067 + }, + { + "epoch": 0.08619159067064805, + "grad_norm": 0.8341901898384094, + "learning_rate": 0.0001988346162163898, + "loss": 2.8027, + "step": 1068 + }, + { + "epoch": 0.08627229440723105, + "grad_norm": 0.8653229475021362, + "learning_rate": 0.00019883221185471291, + "loss": 2.7487, + "step": 1069 + }, + { + "epoch": 0.08635299814381406, + "grad_norm": 0.8065966367721558, + "learning_rate": 0.00019882980502987894, + "loss": 2.7847, + "step": 1070 + }, + { + "epoch": 0.08643370188039706, + "grad_norm": 0.9106903076171875, + "learning_rate": 0.0001988273957419479, + "loss": 2.7962, + "step": 1071 + }, + { + "epoch": 0.08651440561698007, + "grad_norm": 0.953815221786499, + "learning_rate": 0.0001988249839909798, + "loss": 2.8168, + "step": 1072 + }, + { + "epoch": 0.08659510935356307, + "grad_norm": 0.8642842173576355, + "learning_rate": 0.00019882256977703477, + "loss": 2.8205, + "step": 1073 + }, + { + "epoch": 0.08667581309014608, + "grad_norm": 0.8500350117683411, + "learning_rate": 0.000198820153100173, + "loss": 2.8798, + "step": 1074 + }, + { + "epoch": 0.08675651682672908, + "grad_norm": 0.9212989807128906, + "learning_rate": 0.00019881773396045467, + "loss": 2.8088, + "step": 1075 + }, + { + "epoch": 0.08683722056331208, + "grad_norm": 0.8897970914840698, + "learning_rate": 0.0001988153123579401, + "loss": 2.7983, + "step": 1076 + }, + { + "epoch": 0.08691792429989509, + "grad_norm": 0.7942636609077454, + "learning_rate": 0.00019881288829268968, + "loss": 2.7711, + "step": 1077 + }, + { + "epoch": 0.08699862803647809, + "grad_norm": 0.8286700248718262, + "learning_rate": 0.00019881046176476374, + "loss": 2.7995, + "step": 1078 + }, + { + "epoch": 0.0870793317730611, + "grad_norm": 0.9436343908309937, + "learning_rate": 0.00019880803277422281, + "loss": 2.8399, + "step": 1079 + }, + { + "epoch": 0.0871600355096441, + "grad_norm": 0.9592518210411072, + "learning_rate": 0.00019880560132112742, + "loss": 2.7888, + "step": 1080 + }, + { + "epoch": 0.0872407392462271, + "grad_norm": 0.8956589698791504, + "learning_rate": 0.00019880316740553816, + "loss": 2.7635, + "step": 1081 + }, + { + "epoch": 0.08732144298281011, + "grad_norm": 1.055312156677246, + "learning_rate": 0.00019880073102751574, + "loss": 2.7778, + "step": 1082 + }, + { + "epoch": 0.08740214671939311, + "grad_norm": 0.783273458480835, + "learning_rate": 0.00019879829218712075, + "loss": 2.735, + "step": 1083 + }, + { + "epoch": 0.0874828504559761, + "grad_norm": 0.8315421938896179, + "learning_rate": 0.00019879585088441413, + "loss": 2.7973, + "step": 1084 + }, + { + "epoch": 0.08756355419255911, + "grad_norm": 0.9550945162773132, + "learning_rate": 0.00019879340711945662, + "loss": 2.8083, + "step": 1085 + }, + { + "epoch": 0.08764425792914211, + "grad_norm": 0.9579277634620667, + "learning_rate": 0.00019879096089230915, + "loss": 2.7411, + "step": 1086 + }, + { + "epoch": 0.08772496166572512, + "grad_norm": 0.8602219223976135, + "learning_rate": 0.0001987885122030327, + "loss": 2.7461, + "step": 1087 + }, + { + "epoch": 0.08780566540230812, + "grad_norm": 0.9749068021774292, + "learning_rate": 0.00019878606105168829, + "loss": 2.7701, + "step": 1088 + }, + { + "epoch": 0.08788636913889113, + "grad_norm": 0.8128982186317444, + "learning_rate": 0.00019878360743833703, + "loss": 2.7949, + "step": 1089 + }, + { + "epoch": 0.08796707287547413, + "grad_norm": 0.9177080988883972, + "learning_rate": 0.00019878115136304003, + "loss": 2.7471, + "step": 1090 + }, + { + "epoch": 0.08804777661205714, + "grad_norm": 0.9052132368087769, + "learning_rate": 0.0001987786928258585, + "loss": 2.8356, + "step": 1091 + }, + { + "epoch": 0.08812848034864014, + "grad_norm": 0.8972994089126587, + "learning_rate": 0.00019877623182685378, + "loss": 2.8304, + "step": 1092 + }, + { + "epoch": 0.08820918408522314, + "grad_norm": 0.861251950263977, + "learning_rate": 0.0001987737683660871, + "loss": 2.8436, + "step": 1093 + }, + { + "epoch": 0.08828988782180615, + "grad_norm": 0.9139869809150696, + "learning_rate": 0.00019877130244361996, + "loss": 2.7583, + "step": 1094 + }, + { + "epoch": 0.08837059155838915, + "grad_norm": 0.8441170454025269, + "learning_rate": 0.00019876883405951377, + "loss": 2.7508, + "step": 1095 + }, + { + "epoch": 0.08845129529497216, + "grad_norm": 0.8624769449234009, + "learning_rate": 0.00019876636321383004, + "loss": 2.8003, + "step": 1096 + }, + { + "epoch": 0.08853199903155516, + "grad_norm": 0.9033877849578857, + "learning_rate": 0.00019876388990663037, + "loss": 2.7934, + "step": 1097 + }, + { + "epoch": 0.08861270276813817, + "grad_norm": 0.9492632746696472, + "learning_rate": 0.0001987614141379764, + "loss": 2.7852, + "step": 1098 + }, + { + "epoch": 0.08869340650472117, + "grad_norm": 0.9004682302474976, + "learning_rate": 0.00019875893590792982, + "loss": 2.7518, + "step": 1099 + }, + { + "epoch": 0.08877411024130417, + "grad_norm": 0.8352272510528564, + "learning_rate": 0.0001987564552165524, + "loss": 2.8035, + "step": 1100 + }, + { + "epoch": 0.08885481397788718, + "grad_norm": 0.8488562107086182, + "learning_rate": 0.00019875397206390593, + "loss": 2.7672, + "step": 1101 + }, + { + "epoch": 0.08893551771447018, + "grad_norm": 0.9450985193252563, + "learning_rate": 0.00019875148645005238, + "loss": 2.7558, + "step": 1102 + }, + { + "epoch": 0.08901622145105319, + "grad_norm": 0.9203561544418335, + "learning_rate": 0.0001987489983750536, + "loss": 2.7983, + "step": 1103 + }, + { + "epoch": 0.08909692518763619, + "grad_norm": 0.8761897087097168, + "learning_rate": 0.0001987465078389717, + "loss": 2.7536, + "step": 1104 + }, + { + "epoch": 0.0891776289242192, + "grad_norm": 0.9064637422561646, + "learning_rate": 0.00019874401484186867, + "loss": 2.8104, + "step": 1105 + }, + { + "epoch": 0.0892583326608022, + "grad_norm": 0.8394999504089355, + "learning_rate": 0.00019874151938380666, + "loss": 2.7459, + "step": 1106 + }, + { + "epoch": 0.0893390363973852, + "grad_norm": 0.8782099485397339, + "learning_rate": 0.00019873902146484785, + "loss": 2.8675, + "step": 1107 + }, + { + "epoch": 0.08941974013396821, + "grad_norm": 0.8564850091934204, + "learning_rate": 0.00019873652108505458, + "loss": 2.8561, + "step": 1108 + }, + { + "epoch": 0.08950044387055121, + "grad_norm": 0.8343809843063354, + "learning_rate": 0.0001987340182444891, + "loss": 2.8406, + "step": 1109 + }, + { + "epoch": 0.0895811476071342, + "grad_norm": 1.096273422241211, + "learning_rate": 0.00019873151294321376, + "loss": 2.8264, + "step": 1110 + }, + { + "epoch": 0.08966185134371721, + "grad_norm": 0.8654618263244629, + "learning_rate": 0.00019872900518129103, + "loss": 2.7956, + "step": 1111 + }, + { + "epoch": 0.08974255508030021, + "grad_norm": 0.8868138194084167, + "learning_rate": 0.00019872649495878344, + "loss": 2.8028, + "step": 1112 + }, + { + "epoch": 0.08982325881688322, + "grad_norm": 0.8139104843139648, + "learning_rate": 0.00019872398227575348, + "loss": 2.7502, + "step": 1113 + }, + { + "epoch": 0.08990396255346622, + "grad_norm": 0.8277762532234192, + "learning_rate": 0.00019872146713226384, + "loss": 2.7913, + "step": 1114 + }, + { + "epoch": 0.08998466629004923, + "grad_norm": 0.8470397591590881, + "learning_rate": 0.00019871894952837717, + "loss": 2.7982, + "step": 1115 + }, + { + "epoch": 0.09006537002663223, + "grad_norm": 0.8424760103225708, + "learning_rate": 0.00019871642946415625, + "loss": 2.8067, + "step": 1116 + }, + { + "epoch": 0.09014607376321523, + "grad_norm": 0.8253894448280334, + "learning_rate": 0.00019871390693966382, + "loss": 2.8339, + "step": 1117 + }, + { + "epoch": 0.09022677749979824, + "grad_norm": 0.8120691776275635, + "learning_rate": 0.00019871138195496282, + "loss": 2.7938, + "step": 1118 + }, + { + "epoch": 0.09030748123638124, + "grad_norm": 0.920189619064331, + "learning_rate": 0.00019870885451011617, + "loss": 2.8083, + "step": 1119 + }, + { + "epoch": 0.09038818497296425, + "grad_norm": 0.8990969657897949, + "learning_rate": 0.0001987063246051868, + "loss": 2.7481, + "step": 1120 + }, + { + "epoch": 0.09046888870954725, + "grad_norm": 0.8280801773071289, + "learning_rate": 0.0001987037922402378, + "loss": 2.8536, + "step": 1121 + }, + { + "epoch": 0.09054959244613026, + "grad_norm": 0.8510503768920898, + "learning_rate": 0.0001987012574153323, + "loss": 2.758, + "step": 1122 + }, + { + "epoch": 0.09063029618271326, + "grad_norm": 0.9103946685791016, + "learning_rate": 0.00019869872013053344, + "loss": 2.7594, + "step": 1123 + }, + { + "epoch": 0.09071099991929626, + "grad_norm": 0.804916262626648, + "learning_rate": 0.00019869618038590448, + "loss": 2.7489, + "step": 1124 + }, + { + "epoch": 0.09079170365587927, + "grad_norm": 0.7542802095413208, + "learning_rate": 0.00019869363818150867, + "loss": 2.76, + "step": 1125 + }, + { + "epoch": 0.09087240739246227, + "grad_norm": 0.7725108861923218, + "learning_rate": 0.00019869109351740947, + "loss": 2.8124, + "step": 1126 + }, + { + "epoch": 0.09095311112904528, + "grad_norm": 0.8533692955970764, + "learning_rate": 0.0001986885463936702, + "loss": 2.8499, + "step": 1127 + }, + { + "epoch": 0.09103381486562828, + "grad_norm": 0.8351541757583618, + "learning_rate": 0.0001986859968103544, + "loss": 2.8075, + "step": 1128 + }, + { + "epoch": 0.09111451860221129, + "grad_norm": 0.8780044913291931, + "learning_rate": 0.0001986834447675256, + "loss": 2.7587, + "step": 1129 + }, + { + "epoch": 0.09119522233879429, + "grad_norm": 0.9587519764900208, + "learning_rate": 0.00019868089026524736, + "loss": 2.8069, + "step": 1130 + }, + { + "epoch": 0.0912759260753773, + "grad_norm": 0.8285651206970215, + "learning_rate": 0.00019867833330358342, + "loss": 2.8209, + "step": 1131 + }, + { + "epoch": 0.0913566298119603, + "grad_norm": 0.8589211106300354, + "learning_rate": 0.00019867577388259745, + "loss": 2.8144, + "step": 1132 + }, + { + "epoch": 0.0914373335485433, + "grad_norm": 0.8740364909172058, + "learning_rate": 0.00019867321200235324, + "loss": 2.858, + "step": 1133 + }, + { + "epoch": 0.09151803728512631, + "grad_norm": 0.8368108868598938, + "learning_rate": 0.00019867064766291467, + "loss": 2.7997, + "step": 1134 + }, + { + "epoch": 0.0915987410217093, + "grad_norm": 0.8243690133094788, + "learning_rate": 0.00019866808086434564, + "loss": 2.7925, + "step": 1135 + }, + { + "epoch": 0.0916794447582923, + "grad_norm": 0.8296996355056763, + "learning_rate": 0.0001986655116067101, + "loss": 2.7953, + "step": 1136 + }, + { + "epoch": 0.09176014849487531, + "grad_norm": 0.9255942702293396, + "learning_rate": 0.0001986629398900721, + "loss": 2.844, + "step": 1137 + }, + { + "epoch": 0.09184085223145831, + "grad_norm": 0.7498174905776978, + "learning_rate": 0.00019866036571449574, + "loss": 2.7372, + "step": 1138 + }, + { + "epoch": 0.09192155596804132, + "grad_norm": 0.8170139193534851, + "learning_rate": 0.00019865778908004513, + "loss": 2.7656, + "step": 1139 + }, + { + "epoch": 0.09200225970462432, + "grad_norm": 0.8858106732368469, + "learning_rate": 0.00019865520998678458, + "loss": 2.7657, + "step": 1140 + }, + { + "epoch": 0.09208296344120732, + "grad_norm": 0.8789847493171692, + "learning_rate": 0.00019865262843477826, + "loss": 2.8419, + "step": 1141 + }, + { + "epoch": 0.09216366717779033, + "grad_norm": 0.8433314561843872, + "learning_rate": 0.00019865004442409058, + "loss": 2.7981, + "step": 1142 + }, + { + "epoch": 0.09224437091437333, + "grad_norm": 0.8822595477104187, + "learning_rate": 0.0001986474579547859, + "loss": 2.8368, + "step": 1143 + }, + { + "epoch": 0.09232507465095634, + "grad_norm": 0.9067013263702393, + "learning_rate": 0.00019864486902692872, + "loss": 2.7807, + "step": 1144 + }, + { + "epoch": 0.09240577838753934, + "grad_norm": 0.9551558494567871, + "learning_rate": 0.00019864227764058355, + "loss": 2.7617, + "step": 1145 + }, + { + "epoch": 0.09248648212412235, + "grad_norm": 0.8337206244468689, + "learning_rate": 0.00019863968379581494, + "loss": 2.8289, + "step": 1146 + }, + { + "epoch": 0.09256718586070535, + "grad_norm": 0.952702522277832, + "learning_rate": 0.0001986370874926876, + "loss": 2.8508, + "step": 1147 + }, + { + "epoch": 0.09264788959728835, + "grad_norm": 0.8586699366569519, + "learning_rate": 0.00019863448873126615, + "loss": 2.8784, + "step": 1148 + }, + { + "epoch": 0.09272859333387136, + "grad_norm": 0.7625309228897095, + "learning_rate": 0.00019863188751161544, + "loss": 2.7936, + "step": 1149 + }, + { + "epoch": 0.09280929707045436, + "grad_norm": 0.8912700414657593, + "learning_rate": 0.0001986292838338003, + "loss": 2.8745, + "step": 1150 + }, + { + "epoch": 0.09289000080703737, + "grad_norm": 0.8618904948234558, + "learning_rate": 0.00019862667769788553, + "loss": 2.8086, + "step": 1151 + }, + { + "epoch": 0.09297070454362037, + "grad_norm": 1.0013352632522583, + "learning_rate": 0.00019862406910393617, + "loss": 2.8211, + "step": 1152 + }, + { + "epoch": 0.09305140828020338, + "grad_norm": 0.7922475337982178, + "learning_rate": 0.0001986214580520172, + "loss": 2.7668, + "step": 1153 + }, + { + "epoch": 0.09313211201678638, + "grad_norm": 0.9490330815315247, + "learning_rate": 0.00019861884454219365, + "loss": 2.7571, + "step": 1154 + }, + { + "epoch": 0.09321281575336939, + "grad_norm": 0.8780270218849182, + "learning_rate": 0.00019861622857453076, + "loss": 2.7598, + "step": 1155 + }, + { + "epoch": 0.09329351948995239, + "grad_norm": 0.9220066070556641, + "learning_rate": 0.00019861361014909365, + "loss": 2.7609, + "step": 1156 + }, + { + "epoch": 0.0933742232265354, + "grad_norm": 0.8299020528793335, + "learning_rate": 0.0001986109892659476, + "loss": 2.8655, + "step": 1157 + }, + { + "epoch": 0.0934549269631184, + "grad_norm": 0.9700348377227783, + "learning_rate": 0.0001986083659251579, + "loss": 2.8597, + "step": 1158 + }, + { + "epoch": 0.0935356306997014, + "grad_norm": 0.8820784687995911, + "learning_rate": 0.00019860574012679001, + "loss": 2.8776, + "step": 1159 + }, + { + "epoch": 0.0936163344362844, + "grad_norm": 0.8134172558784485, + "learning_rate": 0.0001986031118709093, + "loss": 2.8163, + "step": 1160 + }, + { + "epoch": 0.0936970381728674, + "grad_norm": 0.885974109172821, + "learning_rate": 0.00019860048115758123, + "loss": 2.752, + "step": 1161 + }, + { + "epoch": 0.0937777419094504, + "grad_norm": 0.9650186896324158, + "learning_rate": 0.0001985978479868715, + "loss": 2.7587, + "step": 1162 + }, + { + "epoch": 0.0938584456460334, + "grad_norm": 0.8550445437431335, + "learning_rate": 0.00019859521235884563, + "loss": 2.7887, + "step": 1163 + }, + { + "epoch": 0.09393914938261641, + "grad_norm": 0.9686560034751892, + "learning_rate": 0.00019859257427356933, + "loss": 2.7974, + "step": 1164 + }, + { + "epoch": 0.09401985311919941, + "grad_norm": 0.9185387492179871, + "learning_rate": 0.00019858993373110837, + "loss": 2.7933, + "step": 1165 + }, + { + "epoch": 0.09410055685578242, + "grad_norm": 0.9549610018730164, + "learning_rate": 0.00019858729073152852, + "loss": 2.7698, + "step": 1166 + }, + { + "epoch": 0.09418126059236542, + "grad_norm": 1.0523492097854614, + "learning_rate": 0.0001985846452748957, + "loss": 2.7215, + "step": 1167 + }, + { + "epoch": 0.09426196432894843, + "grad_norm": 0.8551118969917297, + "learning_rate": 0.00019858199736127582, + "loss": 2.805, + "step": 1168 + }, + { + "epoch": 0.09434266806553143, + "grad_norm": 1.021374225616455, + "learning_rate": 0.0001985793469907349, + "loss": 2.794, + "step": 1169 + }, + { + "epoch": 0.09442337180211444, + "grad_norm": 0.8745501041412354, + "learning_rate": 0.0001985766941633389, + "loss": 2.7793, + "step": 1170 + }, + { + "epoch": 0.09450407553869744, + "grad_norm": 0.7426434755325317, + "learning_rate": 0.00019857403887915402, + "loss": 2.7808, + "step": 1171 + }, + { + "epoch": 0.09458477927528045, + "grad_norm": 0.9183726906776428, + "learning_rate": 0.0001985713811382464, + "loss": 2.8001, + "step": 1172 + }, + { + "epoch": 0.09466548301186345, + "grad_norm": 0.8136709928512573, + "learning_rate": 0.00019856872094068233, + "loss": 2.7394, + "step": 1173 + }, + { + "epoch": 0.09474618674844645, + "grad_norm": 0.9399348497390747, + "learning_rate": 0.00019856605828652807, + "loss": 2.7733, + "step": 1174 + }, + { + "epoch": 0.09482689048502946, + "grad_norm": 0.8233176469802856, + "learning_rate": 0.00019856339317584997, + "loss": 2.7672, + "step": 1175 + }, + { + "epoch": 0.09490759422161246, + "grad_norm": 0.9157048463821411, + "learning_rate": 0.00019856072560871447, + "loss": 2.7992, + "step": 1176 + }, + { + "epoch": 0.09498829795819547, + "grad_norm": 0.8729545474052429, + "learning_rate": 0.00019855805558518803, + "loss": 2.749, + "step": 1177 + }, + { + "epoch": 0.09506900169477847, + "grad_norm": 0.8592300415039062, + "learning_rate": 0.00019855538310533722, + "loss": 2.7257, + "step": 1178 + }, + { + "epoch": 0.09514970543136148, + "grad_norm": 0.8470803499221802, + "learning_rate": 0.00019855270816922867, + "loss": 2.7479, + "step": 1179 + }, + { + "epoch": 0.09523040916794448, + "grad_norm": 0.8538667559623718, + "learning_rate": 0.00019855003077692897, + "loss": 2.7576, + "step": 1180 + }, + { + "epoch": 0.09531111290452748, + "grad_norm": 0.8890984654426575, + "learning_rate": 0.0001985473509285049, + "loss": 2.7961, + "step": 1181 + }, + { + "epoch": 0.09539181664111049, + "grad_norm": 0.7769411206245422, + "learning_rate": 0.00019854466862402324, + "loss": 2.8087, + "step": 1182 + }, + { + "epoch": 0.09547252037769349, + "grad_norm": 0.8892520666122437, + "learning_rate": 0.00019854198386355085, + "loss": 2.7935, + "step": 1183 + }, + { + "epoch": 0.0955532241142765, + "grad_norm": 0.8675585389137268, + "learning_rate": 0.00019853929664715464, + "loss": 2.833, + "step": 1184 + }, + { + "epoch": 0.0956339278508595, + "grad_norm": 0.8053853511810303, + "learning_rate": 0.00019853660697490154, + "loss": 2.8002, + "step": 1185 + }, + { + "epoch": 0.09571463158744249, + "grad_norm": 0.9237198829650879, + "learning_rate": 0.00019853391484685865, + "loss": 2.8281, + "step": 1186 + }, + { + "epoch": 0.0957953353240255, + "grad_norm": 0.8432926535606384, + "learning_rate": 0.000198531220263093, + "loss": 2.8131, + "step": 1187 + }, + { + "epoch": 0.0958760390606085, + "grad_norm": 0.796380341053009, + "learning_rate": 0.0001985285232236718, + "loss": 2.753, + "step": 1188 + }, + { + "epoch": 0.0959567427971915, + "grad_norm": 0.9183037281036377, + "learning_rate": 0.00019852582372866225, + "loss": 2.7625, + "step": 1189 + }, + { + "epoch": 0.09603744653377451, + "grad_norm": 0.8194435238838196, + "learning_rate": 0.0001985231217781316, + "loss": 2.7906, + "step": 1190 + }, + { + "epoch": 0.09611815027035751, + "grad_norm": 0.8430871367454529, + "learning_rate": 0.00019852041737214725, + "loss": 2.8457, + "step": 1191 + }, + { + "epoch": 0.09619885400694052, + "grad_norm": 1.0237345695495605, + "learning_rate": 0.0001985177105107765, + "loss": 2.789, + "step": 1192 + }, + { + "epoch": 0.09627955774352352, + "grad_norm": 0.8721581101417542, + "learning_rate": 0.00019851500119408692, + "loss": 2.7187, + "step": 1193 + }, + { + "epoch": 0.09636026148010653, + "grad_norm": 0.8089142441749573, + "learning_rate": 0.00019851228942214603, + "loss": 2.7544, + "step": 1194 + }, + { + "epoch": 0.09644096521668953, + "grad_norm": 1.1076842546463013, + "learning_rate": 0.0001985095751950213, + "loss": 2.7859, + "step": 1195 + }, + { + "epoch": 0.09652166895327254, + "grad_norm": 0.84585040807724, + "learning_rate": 0.0001985068585127805, + "loss": 2.8005, + "step": 1196 + }, + { + "epoch": 0.09660237268985554, + "grad_norm": 0.8231167197227478, + "learning_rate": 0.00019850413937549127, + "loss": 2.8561, + "step": 1197 + }, + { + "epoch": 0.09668307642643854, + "grad_norm": 1.0028103590011597, + "learning_rate": 0.00019850141778322136, + "loss": 2.8049, + "step": 1198 + }, + { + "epoch": 0.09676378016302155, + "grad_norm": 0.8575148582458496, + "learning_rate": 0.0001984986937360387, + "loss": 2.7723, + "step": 1199 + }, + { + "epoch": 0.09684448389960455, + "grad_norm": 0.8567116260528564, + "learning_rate": 0.00019849596723401107, + "loss": 2.7418, + "step": 1200 + }, + { + "epoch": 0.09692518763618756, + "grad_norm": 1.1159218549728394, + "learning_rate": 0.00019849323827720645, + "loss": 2.8352, + "step": 1201 + }, + { + "epoch": 0.09700589137277056, + "grad_norm": 0.849656879901886, + "learning_rate": 0.0001984905068656929, + "loss": 2.7875, + "step": 1202 + }, + { + "epoch": 0.09708659510935357, + "grad_norm": 0.8479150533676147, + "learning_rate": 0.00019848777299953847, + "loss": 2.7828, + "step": 1203 + }, + { + "epoch": 0.09716729884593657, + "grad_norm": 0.9143954515457153, + "learning_rate": 0.00019848503667881125, + "loss": 2.7978, + "step": 1204 + }, + { + "epoch": 0.09724800258251957, + "grad_norm": 0.8162297010421753, + "learning_rate": 0.0001984822979035795, + "loss": 2.7621, + "step": 1205 + }, + { + "epoch": 0.09732870631910258, + "grad_norm": 0.8625509142875671, + "learning_rate": 0.00019847955667391144, + "loss": 2.7484, + "step": 1206 + }, + { + "epoch": 0.09740941005568558, + "grad_norm": 0.8485168218612671, + "learning_rate": 0.00019847681298987543, + "loss": 2.7599, + "step": 1207 + }, + { + "epoch": 0.09749011379226859, + "grad_norm": 0.8962678909301758, + "learning_rate": 0.00019847406685153976, + "loss": 2.7753, + "step": 1208 + }, + { + "epoch": 0.09757081752885159, + "grad_norm": 0.8890791535377502, + "learning_rate": 0.00019847131825897297, + "loss": 2.7635, + "step": 1209 + }, + { + "epoch": 0.0976515212654346, + "grad_norm": 0.8461710810661316, + "learning_rate": 0.00019846856721224355, + "loss": 2.796, + "step": 1210 + }, + { + "epoch": 0.0977322250020176, + "grad_norm": 0.912738025188446, + "learning_rate": 0.00019846581371141996, + "loss": 2.7889, + "step": 1211 + }, + { + "epoch": 0.09781292873860059, + "grad_norm": 0.8530749082565308, + "learning_rate": 0.00019846305775657097, + "loss": 2.8298, + "step": 1212 + }, + { + "epoch": 0.0978936324751836, + "grad_norm": 0.8890148401260376, + "learning_rate": 0.00019846029934776516, + "loss": 2.7491, + "step": 1213 + }, + { + "epoch": 0.0979743362117666, + "grad_norm": 0.8936887979507446, + "learning_rate": 0.0001984575384850713, + "loss": 2.7759, + "step": 1214 + }, + { + "epoch": 0.0980550399483496, + "grad_norm": 0.7811321020126343, + "learning_rate": 0.00019845477516855823, + "loss": 2.8126, + "step": 1215 + }, + { + "epoch": 0.09813574368493261, + "grad_norm": 0.8751768469810486, + "learning_rate": 0.00019845200939829484, + "loss": 2.792, + "step": 1216 + }, + { + "epoch": 0.09821644742151561, + "grad_norm": 0.8749501705169678, + "learning_rate": 0.00019844924117434998, + "loss": 2.7818, + "step": 1217 + }, + { + "epoch": 0.09829715115809862, + "grad_norm": 0.8130955100059509, + "learning_rate": 0.0001984464704967927, + "loss": 2.8581, + "step": 1218 + }, + { + "epoch": 0.09837785489468162, + "grad_norm": 0.8158220648765564, + "learning_rate": 0.00019844369736569196, + "loss": 2.7704, + "step": 1219 + }, + { + "epoch": 0.09845855863126463, + "grad_norm": 0.9351849555969238, + "learning_rate": 0.00019844092178111702, + "loss": 2.7857, + "step": 1220 + }, + { + "epoch": 0.09853926236784763, + "grad_norm": 0.8373914957046509, + "learning_rate": 0.00019843814374313697, + "loss": 2.8217, + "step": 1221 + }, + { + "epoch": 0.09861996610443063, + "grad_norm": 0.8919960856437683, + "learning_rate": 0.00019843536325182104, + "loss": 2.7914, + "step": 1222 + }, + { + "epoch": 0.09870066984101364, + "grad_norm": 0.9994316697120667, + "learning_rate": 0.00019843258030723858, + "loss": 2.7981, + "step": 1223 + }, + { + "epoch": 0.09878137357759664, + "grad_norm": 0.8144915699958801, + "learning_rate": 0.0001984297949094589, + "loss": 2.811, + "step": 1224 + }, + { + "epoch": 0.09886207731417965, + "grad_norm": 0.8957876563072205, + "learning_rate": 0.0001984270070585514, + "loss": 2.7752, + "step": 1225 + }, + { + "epoch": 0.09894278105076265, + "grad_norm": 0.9426520466804504, + "learning_rate": 0.0001984242167545856, + "loss": 2.8139, + "step": 1226 + }, + { + "epoch": 0.09902348478734566, + "grad_norm": 0.888769268989563, + "learning_rate": 0.00019842142399763106, + "loss": 2.8305, + "step": 1227 + }, + { + "epoch": 0.09910418852392866, + "grad_norm": 0.9497748613357544, + "learning_rate": 0.00019841862878775736, + "loss": 2.748, + "step": 1228 + }, + { + "epoch": 0.09918489226051166, + "grad_norm": 0.8715065717697144, + "learning_rate": 0.00019841583112503416, + "loss": 2.7794, + "step": 1229 + }, + { + "epoch": 0.09926559599709467, + "grad_norm": 0.875599205493927, + "learning_rate": 0.00019841303100953116, + "loss": 2.8016, + "step": 1230 + }, + { + "epoch": 0.09934629973367767, + "grad_norm": 0.8631919622421265, + "learning_rate": 0.0001984102284413182, + "loss": 2.8239, + "step": 1231 + }, + { + "epoch": 0.09942700347026068, + "grad_norm": 0.9028074741363525, + "learning_rate": 0.0001984074234204651, + "loss": 2.8372, + "step": 1232 + }, + { + "epoch": 0.09950770720684368, + "grad_norm": 0.890933096408844, + "learning_rate": 0.00019840461594704175, + "loss": 2.799, + "step": 1233 + }, + { + "epoch": 0.09958841094342669, + "grad_norm": 0.9626480340957642, + "learning_rate": 0.00019840180602111816, + "loss": 2.8207, + "step": 1234 + }, + { + "epoch": 0.09966911468000969, + "grad_norm": 0.798394501209259, + "learning_rate": 0.00019839899364276433, + "loss": 2.7784, + "step": 1235 + }, + { + "epoch": 0.0997498184165927, + "grad_norm": 0.8246447443962097, + "learning_rate": 0.00019839617881205036, + "loss": 2.8193, + "step": 1236 + }, + { + "epoch": 0.09983052215317569, + "grad_norm": 0.8315989375114441, + "learning_rate": 0.0001983933615290464, + "loss": 2.8036, + "step": 1237 + }, + { + "epoch": 0.09991122588975869, + "grad_norm": 0.8889075517654419, + "learning_rate": 0.00019839054179382267, + "loss": 2.7606, + "step": 1238 + }, + { + "epoch": 0.0999919296263417, + "grad_norm": 0.7558645009994507, + "learning_rate": 0.00019838771960644942, + "loss": 2.7666, + "step": 1239 + }, + { + "epoch": 0.1000726333629247, + "grad_norm": 0.8876601457595825, + "learning_rate": 0.00019838489496699704, + "loss": 2.8778, + "step": 1240 + }, + { + "epoch": 0.1001533370995077, + "grad_norm": 0.8609516620635986, + "learning_rate": 0.00019838206787553588, + "loss": 2.8189, + "step": 1241 + }, + { + "epoch": 0.10023404083609071, + "grad_norm": 0.8521148562431335, + "learning_rate": 0.00019837923833213644, + "loss": 2.8159, + "step": 1242 + }, + { + "epoch": 0.10031474457267371, + "grad_norm": 0.9155359268188477, + "learning_rate": 0.0001983764063368692, + "loss": 2.8351, + "step": 1243 + }, + { + "epoch": 0.10039544830925672, + "grad_norm": 0.8595378398895264, + "learning_rate": 0.00019837357188980475, + "loss": 2.8447, + "step": 1244 + }, + { + "epoch": 0.10047615204583972, + "grad_norm": 0.900244951248169, + "learning_rate": 0.00019837073499101373, + "loss": 2.8646, + "step": 1245 + }, + { + "epoch": 0.10055685578242272, + "grad_norm": 0.8404260277748108, + "learning_rate": 0.00019836789564056689, + "loss": 2.7824, + "step": 1246 + }, + { + "epoch": 0.10063755951900573, + "grad_norm": 0.8776196241378784, + "learning_rate": 0.0001983650538385349, + "loss": 2.8045, + "step": 1247 + }, + { + "epoch": 0.10071826325558873, + "grad_norm": 0.8889327049255371, + "learning_rate": 0.00019836220958498868, + "loss": 2.7967, + "step": 1248 + }, + { + "epoch": 0.10079896699217174, + "grad_norm": 0.8905191421508789, + "learning_rate": 0.00019835936287999906, + "loss": 2.8167, + "step": 1249 + }, + { + "epoch": 0.10087967072875474, + "grad_norm": 0.839970052242279, + "learning_rate": 0.000198356513723637, + "loss": 2.8643, + "step": 1250 + }, + { + "epoch": 0.10096037446533775, + "grad_norm": 0.7989531755447388, + "learning_rate": 0.00019835366211597353, + "loss": 2.8493, + "step": 1251 + }, + { + "epoch": 0.10104107820192075, + "grad_norm": 0.7960095405578613, + "learning_rate": 0.0001983508080570797, + "loss": 2.7377, + "step": 1252 + }, + { + "epoch": 0.10112178193850375, + "grad_norm": 0.7989903092384338, + "learning_rate": 0.00019834795154702661, + "loss": 2.7409, + "step": 1253 + }, + { + "epoch": 0.10120248567508676, + "grad_norm": 0.8557813167572021, + "learning_rate": 0.0001983450925858855, + "loss": 2.7945, + "step": 1254 + }, + { + "epoch": 0.10128318941166976, + "grad_norm": 0.948357880115509, + "learning_rate": 0.0001983422311737276, + "loss": 2.826, + "step": 1255 + }, + { + "epoch": 0.10136389314825277, + "grad_norm": 0.8356020450592041, + "learning_rate": 0.00019833936731062423, + "loss": 2.8157, + "step": 1256 + }, + { + "epoch": 0.10144459688483577, + "grad_norm": 0.8199872970581055, + "learning_rate": 0.00019833650099664678, + "loss": 2.7273, + "step": 1257 + }, + { + "epoch": 0.10152530062141878, + "grad_norm": 0.8178466558456421, + "learning_rate": 0.00019833363223186669, + "loss": 2.7513, + "step": 1258 + }, + { + "epoch": 0.10160600435800178, + "grad_norm": 0.8165889978408813, + "learning_rate": 0.00019833076101635538, + "loss": 2.7689, + "step": 1259 + }, + { + "epoch": 0.10168670809458479, + "grad_norm": 0.8240275979042053, + "learning_rate": 0.0001983278873501845, + "loss": 2.7477, + "step": 1260 + }, + { + "epoch": 0.10176741183116779, + "grad_norm": 0.8470584750175476, + "learning_rate": 0.00019832501123342563, + "loss": 2.7414, + "step": 1261 + }, + { + "epoch": 0.1018481155677508, + "grad_norm": 0.819063663482666, + "learning_rate": 0.00019832213266615046, + "loss": 2.7335, + "step": 1262 + }, + { + "epoch": 0.10192881930433378, + "grad_norm": 0.8045673370361328, + "learning_rate": 0.00019831925164843071, + "loss": 2.8141, + "step": 1263 + }, + { + "epoch": 0.10200952304091679, + "grad_norm": 0.7827214598655701, + "learning_rate": 0.00019831636818033824, + "loss": 2.7549, + "step": 1264 + }, + { + "epoch": 0.10209022677749979, + "grad_norm": 0.9596436619758606, + "learning_rate": 0.00019831348226194485, + "loss": 2.7327, + "step": 1265 + }, + { + "epoch": 0.1021709305140828, + "grad_norm": 0.826909601688385, + "learning_rate": 0.0001983105938933225, + "loss": 2.7166, + "step": 1266 + }, + { + "epoch": 0.1022516342506658, + "grad_norm": 0.8060985207557678, + "learning_rate": 0.00019830770307454313, + "loss": 2.7514, + "step": 1267 + }, + { + "epoch": 0.1023323379872488, + "grad_norm": 0.8257390856742859, + "learning_rate": 0.00019830480980567887, + "loss": 2.77, + "step": 1268 + }, + { + "epoch": 0.10241304172383181, + "grad_norm": 0.844406008720398, + "learning_rate": 0.00019830191408680173, + "loss": 2.8548, + "step": 1269 + }, + { + "epoch": 0.10249374546041481, + "grad_norm": 0.84171462059021, + "learning_rate": 0.00019829901591798398, + "loss": 2.7404, + "step": 1270 + }, + { + "epoch": 0.10257444919699782, + "grad_norm": 0.8084118962287903, + "learning_rate": 0.00019829611529929774, + "loss": 2.8078, + "step": 1271 + }, + { + "epoch": 0.10265515293358082, + "grad_norm": 0.8273561000823975, + "learning_rate": 0.00019829321223081538, + "loss": 2.787, + "step": 1272 + }, + { + "epoch": 0.10273585667016383, + "grad_norm": 0.799098551273346, + "learning_rate": 0.00019829030671260925, + "loss": 2.7563, + "step": 1273 + }, + { + "epoch": 0.10281656040674683, + "grad_norm": 0.885866105556488, + "learning_rate": 0.00019828739874475172, + "loss": 2.7313, + "step": 1274 + }, + { + "epoch": 0.10289726414332984, + "grad_norm": 0.7702760696411133, + "learning_rate": 0.00019828448832731529, + "loss": 2.7919, + "step": 1275 + }, + { + "epoch": 0.10297796787991284, + "grad_norm": 0.7577444911003113, + "learning_rate": 0.0001982815754603725, + "loss": 2.7149, + "step": 1276 + }, + { + "epoch": 0.10305867161649584, + "grad_norm": 0.8439713716506958, + "learning_rate": 0.00019827866014399592, + "loss": 2.7881, + "step": 1277 + }, + { + "epoch": 0.10313937535307885, + "grad_norm": 0.8504937291145325, + "learning_rate": 0.00019827574237825827, + "loss": 2.7611, + "step": 1278 + }, + { + "epoch": 0.10322007908966185, + "grad_norm": 0.7775665521621704, + "learning_rate": 0.00019827282216323218, + "loss": 2.7312, + "step": 1279 + }, + { + "epoch": 0.10330078282624486, + "grad_norm": 0.8671591281890869, + "learning_rate": 0.00019826989949899048, + "loss": 2.836, + "step": 1280 + }, + { + "epoch": 0.10338148656282786, + "grad_norm": 0.9308713674545288, + "learning_rate": 0.00019826697438560603, + "loss": 2.7494, + "step": 1281 + }, + { + "epoch": 0.10346219029941087, + "grad_norm": 0.9145268797874451, + "learning_rate": 0.0001982640468231517, + "loss": 2.8054, + "step": 1282 + }, + { + "epoch": 0.10354289403599387, + "grad_norm": 0.8150805234909058, + "learning_rate": 0.00019826111681170043, + "loss": 2.7879, + "step": 1283 + }, + { + "epoch": 0.10362359777257688, + "grad_norm": 0.8576685786247253, + "learning_rate": 0.00019825818435132531, + "loss": 2.8184, + "step": 1284 + }, + { + "epoch": 0.10370430150915988, + "grad_norm": 0.8838599920272827, + "learning_rate": 0.00019825524944209937, + "loss": 2.7838, + "step": 1285 + }, + { + "epoch": 0.10378500524574288, + "grad_norm": 0.9119304418563843, + "learning_rate": 0.00019825231208409576, + "loss": 2.8392, + "step": 1286 + }, + { + "epoch": 0.10386570898232589, + "grad_norm": 0.8112398982048035, + "learning_rate": 0.00019824937227738771, + "loss": 2.7844, + "step": 1287 + }, + { + "epoch": 0.10394641271890888, + "grad_norm": 0.8714308738708496, + "learning_rate": 0.00019824643002204847, + "loss": 2.7765, + "step": 1288 + }, + { + "epoch": 0.10402711645549188, + "grad_norm": 0.8733358979225159, + "learning_rate": 0.00019824348531815138, + "loss": 2.771, + "step": 1289 + }, + { + "epoch": 0.10410782019207489, + "grad_norm": 0.8218281269073486, + "learning_rate": 0.00019824053816576981, + "loss": 2.8099, + "step": 1290 + }, + { + "epoch": 0.10418852392865789, + "grad_norm": 0.8647308945655823, + "learning_rate": 0.00019823758856497725, + "loss": 2.7738, + "step": 1291 + }, + { + "epoch": 0.1042692276652409, + "grad_norm": 0.8358582854270935, + "learning_rate": 0.00019823463651584718, + "loss": 2.8021, + "step": 1292 + }, + { + "epoch": 0.1043499314018239, + "grad_norm": 0.7943673133850098, + "learning_rate": 0.00019823168201845318, + "loss": 2.8293, + "step": 1293 + }, + { + "epoch": 0.1044306351384069, + "grad_norm": 0.8501425981521606, + "learning_rate": 0.0001982287250728689, + "loss": 2.7701, + "step": 1294 + }, + { + "epoch": 0.10451133887498991, + "grad_norm": 0.8503665328025818, + "learning_rate": 0.00019822576567916797, + "loss": 2.7881, + "step": 1295 + }, + { + "epoch": 0.10459204261157291, + "grad_norm": 0.9687628149986267, + "learning_rate": 0.0001982228038374242, + "loss": 2.7623, + "step": 1296 + }, + { + "epoch": 0.10467274634815592, + "grad_norm": 0.8034376502037048, + "learning_rate": 0.00019821983954771146, + "loss": 2.8072, + "step": 1297 + }, + { + "epoch": 0.10475345008473892, + "grad_norm": 0.817135214805603, + "learning_rate": 0.00019821687281010352, + "loss": 2.7572, + "step": 1298 + }, + { + "epoch": 0.10483415382132193, + "grad_norm": 0.7961457371711731, + "learning_rate": 0.0001982139036246744, + "loss": 2.8405, + "step": 1299 + }, + { + "epoch": 0.10491485755790493, + "grad_norm": 0.7572407722473145, + "learning_rate": 0.00019821093199149804, + "loss": 2.7495, + "step": 1300 + }, + { + "epoch": 0.10499556129448794, + "grad_norm": 0.7990664839744568, + "learning_rate": 0.00019820795791064856, + "loss": 2.7567, + "step": 1301 + }, + { + "epoch": 0.10507626503107094, + "grad_norm": 0.8197236061096191, + "learning_rate": 0.0001982049813822, + "loss": 2.7807, + "step": 1302 + }, + { + "epoch": 0.10515696876765394, + "grad_norm": 0.9491304159164429, + "learning_rate": 0.00019820200240622664, + "loss": 2.8531, + "step": 1303 + }, + { + "epoch": 0.10523767250423695, + "grad_norm": 0.8143845200538635, + "learning_rate": 0.00019819902098280268, + "loss": 2.7542, + "step": 1304 + }, + { + "epoch": 0.10531837624081995, + "grad_norm": 0.9055941104888916, + "learning_rate": 0.0001981960371120024, + "loss": 2.863, + "step": 1305 + }, + { + "epoch": 0.10539907997740296, + "grad_norm": 0.7804721593856812, + "learning_rate": 0.0001981930507939002, + "loss": 2.8213, + "step": 1306 + }, + { + "epoch": 0.10547978371398596, + "grad_norm": 0.8375318050384521, + "learning_rate": 0.00019819006202857046, + "loss": 2.8222, + "step": 1307 + }, + { + "epoch": 0.10556048745056897, + "grad_norm": 0.9145569801330566, + "learning_rate": 0.00019818707081608773, + "loss": 2.805, + "step": 1308 + }, + { + "epoch": 0.10564119118715197, + "grad_norm": 0.7899324893951416, + "learning_rate": 0.00019818407715652654, + "loss": 2.8246, + "step": 1309 + }, + { + "epoch": 0.10572189492373497, + "grad_norm": 0.7843480110168457, + "learning_rate": 0.0001981810810499615, + "loss": 2.7909, + "step": 1310 + }, + { + "epoch": 0.10580259866031798, + "grad_norm": 0.8071008920669556, + "learning_rate": 0.00019817808249646723, + "loss": 2.7434, + "step": 1311 + }, + { + "epoch": 0.10588330239690098, + "grad_norm": 0.8682011961936951, + "learning_rate": 0.0001981750814961185, + "loss": 2.8387, + "step": 1312 + }, + { + "epoch": 0.10596400613348399, + "grad_norm": 0.7501091361045837, + "learning_rate": 0.0001981720780489902, + "loss": 2.7633, + "step": 1313 + }, + { + "epoch": 0.10604470987006698, + "grad_norm": 0.9259567856788635, + "learning_rate": 0.000198169072155157, + "loss": 2.8309, + "step": 1314 + }, + { + "epoch": 0.10612541360664998, + "grad_norm": 0.8018674254417419, + "learning_rate": 0.00019816606381469393, + "loss": 2.8647, + "step": 1315 + }, + { + "epoch": 0.10620611734323299, + "grad_norm": 0.8218088746070862, + "learning_rate": 0.00019816305302767595, + "loss": 2.823, + "step": 1316 + }, + { + "epoch": 0.10628682107981599, + "grad_norm": 0.812125027179718, + "learning_rate": 0.00019816003979417808, + "loss": 2.7216, + "step": 1317 + }, + { + "epoch": 0.106367524816399, + "grad_norm": 0.787407636642456, + "learning_rate": 0.0001981570241142754, + "loss": 2.7639, + "step": 1318 + }, + { + "epoch": 0.106448228552982, + "grad_norm": 0.7982528805732727, + "learning_rate": 0.00019815400598804312, + "loss": 2.8597, + "step": 1319 + }, + { + "epoch": 0.106528932289565, + "grad_norm": 0.8490404486656189, + "learning_rate": 0.00019815098541555646, + "loss": 2.7947, + "step": 1320 + }, + { + "epoch": 0.10660963602614801, + "grad_norm": 0.8743172883987427, + "learning_rate": 0.00019814796239689064, + "loss": 2.8674, + "step": 1321 + }, + { + "epoch": 0.10669033976273101, + "grad_norm": 0.8338125348091125, + "learning_rate": 0.00019814493693212106, + "loss": 2.781, + "step": 1322 + }, + { + "epoch": 0.10677104349931402, + "grad_norm": 0.871516764163971, + "learning_rate": 0.00019814190902132307, + "loss": 2.8742, + "step": 1323 + }, + { + "epoch": 0.10685174723589702, + "grad_norm": 0.8935555815696716, + "learning_rate": 0.00019813887866457216, + "loss": 2.7991, + "step": 1324 + }, + { + "epoch": 0.10693245097248003, + "grad_norm": 0.840067446231842, + "learning_rate": 0.00019813584586194388, + "loss": 2.7922, + "step": 1325 + }, + { + "epoch": 0.10701315470906303, + "grad_norm": 0.7919262647628784, + "learning_rate": 0.0001981328106135138, + "loss": 2.7912, + "step": 1326 + }, + { + "epoch": 0.10709385844564603, + "grad_norm": 0.7974550127983093, + "learning_rate": 0.00019812977291935752, + "loss": 2.8497, + "step": 1327 + }, + { + "epoch": 0.10717456218222904, + "grad_norm": 0.9126157164573669, + "learning_rate": 0.00019812673277955082, + "loss": 2.7698, + "step": 1328 + }, + { + "epoch": 0.10725526591881204, + "grad_norm": 0.8329752683639526, + "learning_rate": 0.0001981236901941694, + "loss": 2.8366, + "step": 1329 + }, + { + "epoch": 0.10733596965539505, + "grad_norm": 0.8313524127006531, + "learning_rate": 0.00019812064516328915, + "loss": 2.6863, + "step": 1330 + }, + { + "epoch": 0.10741667339197805, + "grad_norm": 0.8917783498764038, + "learning_rate": 0.0001981175976869859, + "loss": 2.7817, + "step": 1331 + }, + { + "epoch": 0.10749737712856106, + "grad_norm": 0.8370450735092163, + "learning_rate": 0.00019811454776533566, + "loss": 2.837, + "step": 1332 + }, + { + "epoch": 0.10757808086514406, + "grad_norm": 0.8415676355361938, + "learning_rate": 0.00019811149539841443, + "loss": 2.7399, + "step": 1333 + }, + { + "epoch": 0.10765878460172706, + "grad_norm": 0.8576632142066956, + "learning_rate": 0.00019810844058629825, + "loss": 2.7747, + "step": 1334 + }, + { + "epoch": 0.10773948833831007, + "grad_norm": 0.8943549394607544, + "learning_rate": 0.00019810538332906328, + "loss": 2.7368, + "step": 1335 + }, + { + "epoch": 0.10782019207489307, + "grad_norm": 0.8878718018531799, + "learning_rate": 0.00019810232362678568, + "loss": 2.7907, + "step": 1336 + }, + { + "epoch": 0.10790089581147608, + "grad_norm": 0.8131409287452698, + "learning_rate": 0.00019809926147954174, + "loss": 2.7782, + "step": 1337 + }, + { + "epoch": 0.10798159954805908, + "grad_norm": 0.8733747005462646, + "learning_rate": 0.0001980961968874078, + "loss": 2.8552, + "step": 1338 + }, + { + "epoch": 0.10806230328464207, + "grad_norm": 0.8997320532798767, + "learning_rate": 0.0001980931298504602, + "loss": 2.8452, + "step": 1339 + }, + { + "epoch": 0.10814300702122508, + "grad_norm": 0.8400282263755798, + "learning_rate": 0.00019809006036877538, + "loss": 2.786, + "step": 1340 + }, + { + "epoch": 0.10822371075780808, + "grad_norm": 0.8173925280570984, + "learning_rate": 0.00019808698844242983, + "loss": 2.8363, + "step": 1341 + }, + { + "epoch": 0.10830441449439109, + "grad_norm": 0.872278094291687, + "learning_rate": 0.00019808391407150015, + "loss": 2.7789, + "step": 1342 + }, + { + "epoch": 0.10838511823097409, + "grad_norm": 0.8939952254295349, + "learning_rate": 0.00019808083725606293, + "loss": 2.7453, + "step": 1343 + }, + { + "epoch": 0.1084658219675571, + "grad_norm": 0.8351218104362488, + "learning_rate": 0.00019807775799619484, + "loss": 2.8004, + "step": 1344 + }, + { + "epoch": 0.1085465257041401, + "grad_norm": 0.8381102681159973, + "learning_rate": 0.00019807467629197266, + "loss": 2.8155, + "step": 1345 + }, + { + "epoch": 0.1086272294407231, + "grad_norm": 0.869458019733429, + "learning_rate": 0.00019807159214347317, + "loss": 2.8219, + "step": 1346 + }, + { + "epoch": 0.10870793317730611, + "grad_norm": 0.8251017928123474, + "learning_rate": 0.00019806850555077326, + "loss": 2.7978, + "step": 1347 + }, + { + "epoch": 0.10878863691388911, + "grad_norm": 0.8056492209434509, + "learning_rate": 0.0001980654165139498, + "loss": 2.7994, + "step": 1348 + }, + { + "epoch": 0.10886934065047212, + "grad_norm": 0.9566174745559692, + "learning_rate": 0.00019806232503307984, + "loss": 2.794, + "step": 1349 + }, + { + "epoch": 0.10895004438705512, + "grad_norm": 0.7891408801078796, + "learning_rate": 0.0001980592311082404, + "loss": 2.7134, + "step": 1350 + }, + { + "epoch": 0.10903074812363812, + "grad_norm": 0.8894741535186768, + "learning_rate": 0.00019805613473950862, + "loss": 2.7829, + "step": 1351 + }, + { + "epoch": 0.10911145186022113, + "grad_norm": 0.893086850643158, + "learning_rate": 0.0001980530359269616, + "loss": 2.7475, + "step": 1352 + }, + { + "epoch": 0.10919215559680413, + "grad_norm": 0.8758537173271179, + "learning_rate": 0.00019804993467067666, + "loss": 2.8715, + "step": 1353 + }, + { + "epoch": 0.10927285933338714, + "grad_norm": 0.9304648041725159, + "learning_rate": 0.00019804683097073098, + "loss": 2.8051, + "step": 1354 + }, + { + "epoch": 0.10935356306997014, + "grad_norm": 0.8465876579284668, + "learning_rate": 0.00019804372482720202, + "loss": 2.7879, + "step": 1355 + }, + { + "epoch": 0.10943426680655315, + "grad_norm": 0.8485612273216248, + "learning_rate": 0.00019804061624016713, + "loss": 2.7783, + "step": 1356 + }, + { + "epoch": 0.10951497054313615, + "grad_norm": 0.835630476474762, + "learning_rate": 0.0001980375052097038, + "loss": 2.8116, + "step": 1357 + }, + { + "epoch": 0.10959567427971915, + "grad_norm": 0.8404836058616638, + "learning_rate": 0.00019803439173588956, + "loss": 2.8257, + "step": 1358 + }, + { + "epoch": 0.10967637801630216, + "grad_norm": 0.8048505783081055, + "learning_rate": 0.00019803127581880206, + "loss": 2.7762, + "step": 1359 + }, + { + "epoch": 0.10975708175288516, + "grad_norm": 0.8481776118278503, + "learning_rate": 0.00019802815745851885, + "loss": 2.8243, + "step": 1360 + }, + { + "epoch": 0.10983778548946817, + "grad_norm": 0.8565996885299683, + "learning_rate": 0.00019802503665511775, + "loss": 2.7958, + "step": 1361 + }, + { + "epoch": 0.10991848922605117, + "grad_norm": 0.8867515921592712, + "learning_rate": 0.0001980219134086765, + "loss": 2.7973, + "step": 1362 + }, + { + "epoch": 0.10999919296263418, + "grad_norm": 0.8459765911102295, + "learning_rate": 0.0001980187877192729, + "loss": 2.848, + "step": 1363 + }, + { + "epoch": 0.11007989669921718, + "grad_norm": 0.7929832339286804, + "learning_rate": 0.0001980156595869849, + "loss": 2.8583, + "step": 1364 + }, + { + "epoch": 0.11016060043580017, + "grad_norm": 0.8475651741027832, + "learning_rate": 0.00019801252901189043, + "loss": 2.8436, + "step": 1365 + }, + { + "epoch": 0.11024130417238318, + "grad_norm": 0.8545576333999634, + "learning_rate": 0.00019800939599406755, + "loss": 2.7457, + "step": 1366 + }, + { + "epoch": 0.11032200790896618, + "grad_norm": 1.0093715190887451, + "learning_rate": 0.00019800626053359435, + "loss": 2.8198, + "step": 1367 + }, + { + "epoch": 0.11040271164554918, + "grad_norm": 0.8728145956993103, + "learning_rate": 0.0001980031226305489, + "loss": 2.7794, + "step": 1368 + }, + { + "epoch": 0.11048341538213219, + "grad_norm": 0.8538581728935242, + "learning_rate": 0.00019799998228500946, + "loss": 2.8018, + "step": 1369 + }, + { + "epoch": 0.11056411911871519, + "grad_norm": 0.9452785849571228, + "learning_rate": 0.00019799683949705432, + "loss": 2.8173, + "step": 1370 + }, + { + "epoch": 0.1106448228552982, + "grad_norm": 0.806508481502533, + "learning_rate": 0.00019799369426676174, + "loss": 2.8192, + "step": 1371 + }, + { + "epoch": 0.1107255265918812, + "grad_norm": 0.8952856063842773, + "learning_rate": 0.00019799054659421018, + "loss": 2.8072, + "step": 1372 + }, + { + "epoch": 0.1108062303284642, + "grad_norm": 0.8863561749458313, + "learning_rate": 0.00019798739647947802, + "loss": 2.7836, + "step": 1373 + }, + { + "epoch": 0.11088693406504721, + "grad_norm": 0.8544357419013977, + "learning_rate": 0.00019798424392264378, + "loss": 2.7714, + "step": 1374 + }, + { + "epoch": 0.11096763780163021, + "grad_norm": 0.807546854019165, + "learning_rate": 0.00019798108892378607, + "loss": 2.7635, + "step": 1375 + }, + { + "epoch": 0.11104834153821322, + "grad_norm": 0.8198233246803284, + "learning_rate": 0.0001979779314829835, + "loss": 2.8253, + "step": 1376 + }, + { + "epoch": 0.11112904527479622, + "grad_norm": 0.9268671870231628, + "learning_rate": 0.00019797477160031477, + "loss": 2.8007, + "step": 1377 + }, + { + "epoch": 0.11120974901137923, + "grad_norm": 0.8547680974006653, + "learning_rate": 0.0001979716092758586, + "loss": 2.7749, + "step": 1378 + }, + { + "epoch": 0.11129045274796223, + "grad_norm": 0.8052394390106201, + "learning_rate": 0.00019796844450969384, + "loss": 2.763, + "step": 1379 + }, + { + "epoch": 0.11137115648454524, + "grad_norm": 0.8291144371032715, + "learning_rate": 0.00019796527730189936, + "loss": 2.8053, + "step": 1380 + }, + { + "epoch": 0.11145186022112824, + "grad_norm": 0.8114006519317627, + "learning_rate": 0.00019796210765255404, + "loss": 2.8047, + "step": 1381 + }, + { + "epoch": 0.11153256395771124, + "grad_norm": 0.9326293468475342, + "learning_rate": 0.00019795893556173697, + "loss": 2.8199, + "step": 1382 + }, + { + "epoch": 0.11161326769429425, + "grad_norm": 0.7702555656433105, + "learning_rate": 0.00019795576102952714, + "loss": 2.7909, + "step": 1383 + }, + { + "epoch": 0.11169397143087725, + "grad_norm": 0.8115492463111877, + "learning_rate": 0.0001979525840560037, + "loss": 2.748, + "step": 1384 + }, + { + "epoch": 0.11177467516746026, + "grad_norm": 0.8926187753677368, + "learning_rate": 0.0001979494046412458, + "loss": 2.7791, + "step": 1385 + }, + { + "epoch": 0.11185537890404326, + "grad_norm": 0.8549754023551941, + "learning_rate": 0.0001979462227853327, + "loss": 2.7989, + "step": 1386 + }, + { + "epoch": 0.11193608264062627, + "grad_norm": 0.8625262975692749, + "learning_rate": 0.0001979430384883437, + "loss": 2.7202, + "step": 1387 + }, + { + "epoch": 0.11201678637720927, + "grad_norm": 0.8134698867797852, + "learning_rate": 0.00019793985175035813, + "loss": 2.8008, + "step": 1388 + }, + { + "epoch": 0.11209749011379228, + "grad_norm": 0.8546617031097412, + "learning_rate": 0.00019793666257145547, + "loss": 2.8076, + "step": 1389 + }, + { + "epoch": 0.11217819385037527, + "grad_norm": 0.8003748059272766, + "learning_rate": 0.00019793347095171514, + "loss": 2.826, + "step": 1390 + }, + { + "epoch": 0.11225889758695827, + "grad_norm": 0.8116614818572998, + "learning_rate": 0.00019793027689121674, + "loss": 2.7096, + "step": 1391 + }, + { + "epoch": 0.11233960132354127, + "grad_norm": 0.7785829901695251, + "learning_rate": 0.00019792708039003984, + "loss": 2.748, + "step": 1392 + }, + { + "epoch": 0.11242030506012428, + "grad_norm": 0.7999277710914612, + "learning_rate": 0.0001979238814482641, + "loss": 2.7671, + "step": 1393 + }, + { + "epoch": 0.11250100879670728, + "grad_norm": 0.8862190842628479, + "learning_rate": 0.00019792068006596925, + "loss": 2.8484, + "step": 1394 + }, + { + "epoch": 0.11258171253329029, + "grad_norm": 0.8747627139091492, + "learning_rate": 0.00019791747624323512, + "loss": 2.7477, + "step": 1395 + }, + { + "epoch": 0.11266241626987329, + "grad_norm": 0.8280831575393677, + "learning_rate": 0.0001979142699801415, + "loss": 2.87, + "step": 1396 + }, + { + "epoch": 0.1127431200064563, + "grad_norm": 0.8069074153900146, + "learning_rate": 0.00019791106127676832, + "loss": 2.7724, + "step": 1397 + }, + { + "epoch": 0.1128238237430393, + "grad_norm": 0.8253301382064819, + "learning_rate": 0.00019790785013319557, + "loss": 2.7351, + "step": 1398 + }, + { + "epoch": 0.1129045274796223, + "grad_norm": 0.8298853635787964, + "learning_rate": 0.00019790463654950323, + "loss": 2.7709, + "step": 1399 + }, + { + "epoch": 0.11298523121620531, + "grad_norm": 0.7796407341957092, + "learning_rate": 0.0001979014205257715, + "loss": 2.7766, + "step": 1400 + }, + { + "epoch": 0.11306593495278831, + "grad_norm": 0.8922166228294373, + "learning_rate": 0.00019789820206208037, + "loss": 2.8473, + "step": 1401 + }, + { + "epoch": 0.11314663868937132, + "grad_norm": 0.7763219475746155, + "learning_rate": 0.00019789498115851015, + "loss": 2.8629, + "step": 1402 + }, + { + "epoch": 0.11322734242595432, + "grad_norm": 0.8679928779602051, + "learning_rate": 0.0001978917578151411, + "loss": 2.8017, + "step": 1403 + }, + { + "epoch": 0.11330804616253733, + "grad_norm": 0.8491933941841125, + "learning_rate": 0.00019788853203205357, + "loss": 2.7156, + "step": 1404 + }, + { + "epoch": 0.11338874989912033, + "grad_norm": 0.8271194696426392, + "learning_rate": 0.00019788530380932792, + "loss": 2.7892, + "step": 1405 + }, + { + "epoch": 0.11346945363570334, + "grad_norm": 0.9224163293838501, + "learning_rate": 0.00019788207314704463, + "loss": 2.7824, + "step": 1406 + }, + { + "epoch": 0.11355015737228634, + "grad_norm": 0.7662777900695801, + "learning_rate": 0.00019787884004528422, + "loss": 2.7364, + "step": 1407 + }, + { + "epoch": 0.11363086110886934, + "grad_norm": 0.8750362396240234, + "learning_rate": 0.00019787560450412728, + "loss": 2.7546, + "step": 1408 + }, + { + "epoch": 0.11371156484545235, + "grad_norm": 0.9158821105957031, + "learning_rate": 0.0001978723665236544, + "loss": 2.8304, + "step": 1409 + }, + { + "epoch": 0.11379226858203535, + "grad_norm": 0.8291050791740417, + "learning_rate": 0.0001978691261039463, + "loss": 2.758, + "step": 1410 + }, + { + "epoch": 0.11387297231861836, + "grad_norm": 0.801886796951294, + "learning_rate": 0.00019786588324508374, + "loss": 2.7805, + "step": 1411 + }, + { + "epoch": 0.11395367605520136, + "grad_norm": 0.8140222430229187, + "learning_rate": 0.00019786263794714757, + "loss": 2.8155, + "step": 1412 + }, + { + "epoch": 0.11403437979178437, + "grad_norm": 0.7747580409049988, + "learning_rate": 0.00019785939021021865, + "loss": 2.778, + "step": 1413 + }, + { + "epoch": 0.11411508352836737, + "grad_norm": 0.8954138159751892, + "learning_rate": 0.0001978561400343779, + "loss": 2.7756, + "step": 1414 + }, + { + "epoch": 0.11419578726495037, + "grad_norm": 0.9038921594619751, + "learning_rate": 0.00019785288741970634, + "loss": 2.7181, + "step": 1415 + }, + { + "epoch": 0.11427649100153336, + "grad_norm": 0.8284393548965454, + "learning_rate": 0.000197849632366285, + "loss": 2.7467, + "step": 1416 + }, + { + "epoch": 0.11435719473811637, + "grad_norm": 0.8996441960334778, + "learning_rate": 0.00019784637487419514, + "loss": 2.7918, + "step": 1417 + }, + { + "epoch": 0.11443789847469937, + "grad_norm": 0.9868448376655579, + "learning_rate": 0.00019784311494351777, + "loss": 2.7687, + "step": 1418 + }, + { + "epoch": 0.11451860221128238, + "grad_norm": 0.8491402864456177, + "learning_rate": 0.0001978398525743342, + "loss": 2.8492, + "step": 1419 + }, + { + "epoch": 0.11459930594786538, + "grad_norm": 1.06125807762146, + "learning_rate": 0.0001978365877667258, + "loss": 2.8041, + "step": 1420 + }, + { + "epoch": 0.11468000968444839, + "grad_norm": 0.8194011449813843, + "learning_rate": 0.00019783332052077386, + "loss": 2.7109, + "step": 1421 + }, + { + "epoch": 0.11476071342103139, + "grad_norm": 0.972620964050293, + "learning_rate": 0.00019783005083655984, + "loss": 2.8107, + "step": 1422 + }, + { + "epoch": 0.1148414171576144, + "grad_norm": 0.925410270690918, + "learning_rate": 0.0001978267787141652, + "loss": 2.7603, + "step": 1423 + }, + { + "epoch": 0.1149221208941974, + "grad_norm": 0.920156717300415, + "learning_rate": 0.00019782350415367152, + "loss": 2.7644, + "step": 1424 + }, + { + "epoch": 0.1150028246307804, + "grad_norm": 0.8617576360702515, + "learning_rate": 0.00019782022715516043, + "loss": 2.769, + "step": 1425 + }, + { + "epoch": 0.11508352836736341, + "grad_norm": 1.0987342596054077, + "learning_rate": 0.00019781694771871356, + "loss": 2.8224, + "step": 1426 + }, + { + "epoch": 0.11516423210394641, + "grad_norm": 0.8418076634407043, + "learning_rate": 0.00019781366584441264, + "loss": 2.7947, + "step": 1427 + }, + { + "epoch": 0.11524493584052942, + "grad_norm": 0.8010901808738708, + "learning_rate": 0.0001978103815323395, + "loss": 2.733, + "step": 1428 + }, + { + "epoch": 0.11532563957711242, + "grad_norm": 0.8649042844772339, + "learning_rate": 0.00019780709478257598, + "loss": 2.7681, + "step": 1429 + }, + { + "epoch": 0.11540634331369543, + "grad_norm": 0.7728127837181091, + "learning_rate": 0.00019780380559520397, + "loss": 2.7795, + "step": 1430 + }, + { + "epoch": 0.11548704705027843, + "grad_norm": 0.7770940065383911, + "learning_rate": 0.00019780051397030545, + "loss": 2.743, + "step": 1431 + }, + { + "epoch": 0.11556775078686143, + "grad_norm": 0.8341890573501587, + "learning_rate": 0.0001977972199079625, + "loss": 2.8047, + "step": 1432 + }, + { + "epoch": 0.11564845452344444, + "grad_norm": 0.7894187569618225, + "learning_rate": 0.00019779392340825717, + "loss": 2.7757, + "step": 1433 + }, + { + "epoch": 0.11572915826002744, + "grad_norm": 0.8002873063087463, + "learning_rate": 0.00019779062447127164, + "loss": 2.7816, + "step": 1434 + }, + { + "epoch": 0.11580986199661045, + "grad_norm": 0.8256075978279114, + "learning_rate": 0.0001977873230970881, + "loss": 2.7839, + "step": 1435 + }, + { + "epoch": 0.11589056573319345, + "grad_norm": 0.8695322871208191, + "learning_rate": 0.0001977840192857889, + "loss": 2.746, + "step": 1436 + }, + { + "epoch": 0.11597126946977646, + "grad_norm": 0.767425537109375, + "learning_rate": 0.00019778071303745628, + "loss": 2.797, + "step": 1437 + }, + { + "epoch": 0.11605197320635946, + "grad_norm": 0.8263241052627563, + "learning_rate": 0.0001977774043521727, + "loss": 2.7702, + "step": 1438 + }, + { + "epoch": 0.11613267694294246, + "grad_norm": 0.8108638525009155, + "learning_rate": 0.0001977740932300206, + "loss": 2.6981, + "step": 1439 + }, + { + "epoch": 0.11621338067952547, + "grad_norm": 0.7945007681846619, + "learning_rate": 0.00019777077967108255, + "loss": 2.7357, + "step": 1440 + }, + { + "epoch": 0.11629408441610846, + "grad_norm": 0.8480326533317566, + "learning_rate": 0.00019776746367544107, + "loss": 2.8563, + "step": 1441 + }, + { + "epoch": 0.11637478815269146, + "grad_norm": 0.8202071785926819, + "learning_rate": 0.00019776414524317882, + "loss": 2.7955, + "step": 1442 + }, + { + "epoch": 0.11645549188927447, + "grad_norm": 0.8202874660491943, + "learning_rate": 0.00019776082437437852, + "loss": 2.765, + "step": 1443 + }, + { + "epoch": 0.11653619562585747, + "grad_norm": 0.8053051829338074, + "learning_rate": 0.00019775750106912294, + "loss": 2.6866, + "step": 1444 + }, + { + "epoch": 0.11661689936244048, + "grad_norm": 0.831968367099762, + "learning_rate": 0.00019775417532749486, + "loss": 2.7022, + "step": 1445 + }, + { + "epoch": 0.11669760309902348, + "grad_norm": 0.8903129696846008, + "learning_rate": 0.00019775084714957725, + "loss": 2.7308, + "step": 1446 + }, + { + "epoch": 0.11677830683560649, + "grad_norm": 0.8178622722625732, + "learning_rate": 0.000197747516535453, + "loss": 2.7446, + "step": 1447 + }, + { + "epoch": 0.11685901057218949, + "grad_norm": 0.8270576596260071, + "learning_rate": 0.00019774418348520508, + "loss": 2.7716, + "step": 1448 + }, + { + "epoch": 0.1169397143087725, + "grad_norm": 0.7965807914733887, + "learning_rate": 0.00019774084799891662, + "loss": 2.7305, + "step": 1449 + }, + { + "epoch": 0.1170204180453555, + "grad_norm": 0.8499472737312317, + "learning_rate": 0.00019773751007667073, + "loss": 2.7584, + "step": 1450 + }, + { + "epoch": 0.1171011217819385, + "grad_norm": 0.8961663842201233, + "learning_rate": 0.0001977341697185506, + "loss": 2.7729, + "step": 1451 + }, + { + "epoch": 0.1171818255185215, + "grad_norm": 1.0203527212142944, + "learning_rate": 0.0001977308269246395, + "loss": 2.727, + "step": 1452 + }, + { + "epoch": 0.11726252925510451, + "grad_norm": 0.953289806842804, + "learning_rate": 0.0001977274816950207, + "loss": 2.8158, + "step": 1453 + }, + { + "epoch": 0.11734323299168752, + "grad_norm": 1.0064597129821777, + "learning_rate": 0.0001977241340297776, + "loss": 2.8743, + "step": 1454 + }, + { + "epoch": 0.11742393672827052, + "grad_norm": 0.8541988730430603, + "learning_rate": 0.00019772078392899363, + "loss": 2.8532, + "step": 1455 + }, + { + "epoch": 0.11750464046485352, + "grad_norm": 0.8351433873176575, + "learning_rate": 0.00019771743139275228, + "loss": 2.7749, + "step": 1456 + }, + { + "epoch": 0.11758534420143653, + "grad_norm": 0.9555812478065491, + "learning_rate": 0.00019771407642113712, + "loss": 2.7408, + "step": 1457 + }, + { + "epoch": 0.11766604793801953, + "grad_norm": 0.7943894267082214, + "learning_rate": 0.0001977107190142317, + "loss": 2.7265, + "step": 1458 + }, + { + "epoch": 0.11774675167460254, + "grad_norm": 0.8636460900306702, + "learning_rate": 0.0001977073591721198, + "loss": 2.8178, + "step": 1459 + }, + { + "epoch": 0.11782745541118554, + "grad_norm": 0.8673834800720215, + "learning_rate": 0.00019770399689488506, + "loss": 2.7928, + "step": 1460 + }, + { + "epoch": 0.11790815914776855, + "grad_norm": 0.9463722705841064, + "learning_rate": 0.00019770063218261133, + "loss": 2.7448, + "step": 1461 + }, + { + "epoch": 0.11798886288435155, + "grad_norm": 0.8429726362228394, + "learning_rate": 0.00019769726503538246, + "loss": 2.7564, + "step": 1462 + }, + { + "epoch": 0.11806956662093455, + "grad_norm": 0.9412201642990112, + "learning_rate": 0.00019769389545328236, + "loss": 2.793, + "step": 1463 + }, + { + "epoch": 0.11815027035751756, + "grad_norm": 0.9112111926078796, + "learning_rate": 0.000197690523436395, + "loss": 2.7787, + "step": 1464 + }, + { + "epoch": 0.11823097409410056, + "grad_norm": 0.8417023420333862, + "learning_rate": 0.00019768714898480444, + "loss": 2.7654, + "step": 1465 + }, + { + "epoch": 0.11831167783068357, + "grad_norm": 0.8275290727615356, + "learning_rate": 0.00019768377209859476, + "loss": 2.7914, + "step": 1466 + }, + { + "epoch": 0.11839238156726656, + "grad_norm": 0.8113142848014832, + "learning_rate": 0.00019768039277785017, + "loss": 2.7516, + "step": 1467 + }, + { + "epoch": 0.11847308530384956, + "grad_norm": 0.8655288219451904, + "learning_rate": 0.0001976770110226548, + "loss": 2.8158, + "step": 1468 + }, + { + "epoch": 0.11855378904043257, + "grad_norm": 0.8063547611236572, + "learning_rate": 0.000197673626833093, + "loss": 2.7624, + "step": 1469 + }, + { + "epoch": 0.11863449277701557, + "grad_norm": 0.843772292137146, + "learning_rate": 0.00019767024020924908, + "loss": 2.86, + "step": 1470 + }, + { + "epoch": 0.11871519651359858, + "grad_norm": 0.7942481637001038, + "learning_rate": 0.0001976668511512075, + "loss": 2.758, + "step": 1471 + }, + { + "epoch": 0.11879590025018158, + "grad_norm": 0.841275155544281, + "learning_rate": 0.00019766345965905268, + "loss": 2.8014, + "step": 1472 + }, + { + "epoch": 0.11887660398676458, + "grad_norm": 0.8003600835800171, + "learning_rate": 0.00019766006573286915, + "loss": 2.7829, + "step": 1473 + }, + { + "epoch": 0.11895730772334759, + "grad_norm": 0.8437239527702332, + "learning_rate": 0.00019765666937274147, + "loss": 2.7706, + "step": 1474 + }, + { + "epoch": 0.11903801145993059, + "grad_norm": 0.8118240833282471, + "learning_rate": 0.00019765327057875433, + "loss": 2.8185, + "step": 1475 + }, + { + "epoch": 0.1191187151965136, + "grad_norm": 0.8051649928092957, + "learning_rate": 0.00019764986935099244, + "loss": 2.7676, + "step": 1476 + }, + { + "epoch": 0.1191994189330966, + "grad_norm": 0.7786862850189209, + "learning_rate": 0.00019764646568954053, + "loss": 2.8069, + "step": 1477 + }, + { + "epoch": 0.1192801226696796, + "grad_norm": 0.8199592232704163, + "learning_rate": 0.0001976430595944834, + "loss": 2.7718, + "step": 1478 + }, + { + "epoch": 0.11936082640626261, + "grad_norm": 0.8696652054786682, + "learning_rate": 0.00019763965106590604, + "loss": 2.7682, + "step": 1479 + }, + { + "epoch": 0.11944153014284561, + "grad_norm": 0.7993931174278259, + "learning_rate": 0.00019763624010389334, + "loss": 2.7607, + "step": 1480 + }, + { + "epoch": 0.11952223387942862, + "grad_norm": 0.8107055425643921, + "learning_rate": 0.0001976328267085303, + "loss": 2.7885, + "step": 1481 + }, + { + "epoch": 0.11960293761601162, + "grad_norm": 0.8189423084259033, + "learning_rate": 0.000197629410879902, + "loss": 2.7332, + "step": 1482 + }, + { + "epoch": 0.11968364135259463, + "grad_norm": 0.9134814143180847, + "learning_rate": 0.0001976259926180936, + "loss": 2.7691, + "step": 1483 + }, + { + "epoch": 0.11976434508917763, + "grad_norm": 0.8642883896827698, + "learning_rate": 0.00019762257192319023, + "loss": 2.7876, + "step": 1484 + }, + { + "epoch": 0.11984504882576064, + "grad_norm": 0.7411352396011353, + "learning_rate": 0.0001976191487952772, + "loss": 2.7577, + "step": 1485 + }, + { + "epoch": 0.11992575256234364, + "grad_norm": 0.7741669416427612, + "learning_rate": 0.00019761572323443978, + "loss": 2.8005, + "step": 1486 + }, + { + "epoch": 0.12000645629892664, + "grad_norm": 0.8195405602455139, + "learning_rate": 0.0001976122952407634, + "loss": 2.7421, + "step": 1487 + }, + { + "epoch": 0.12008716003550965, + "grad_norm": 0.8355886936187744, + "learning_rate": 0.00019760886481433345, + "loss": 2.8156, + "step": 1488 + }, + { + "epoch": 0.12016786377209265, + "grad_norm": 0.8321093916893005, + "learning_rate": 0.00019760543195523542, + "loss": 2.7261, + "step": 1489 + }, + { + "epoch": 0.12024856750867566, + "grad_norm": 0.7792446613311768, + "learning_rate": 0.0001976019966635549, + "loss": 2.7319, + "step": 1490 + }, + { + "epoch": 0.12032927124525866, + "grad_norm": 0.770535409450531, + "learning_rate": 0.00019759855893937748, + "loss": 2.7727, + "step": 1491 + }, + { + "epoch": 0.12040997498184165, + "grad_norm": 0.8168532252311707, + "learning_rate": 0.00019759511878278887, + "loss": 2.7763, + "step": 1492 + }, + { + "epoch": 0.12049067871842466, + "grad_norm": 0.8395755290985107, + "learning_rate": 0.00019759167619387476, + "loss": 2.8382, + "step": 1493 + }, + { + "epoch": 0.12057138245500766, + "grad_norm": 0.8682762384414673, + "learning_rate": 0.00019758823117272097, + "loss": 2.8056, + "step": 1494 + }, + { + "epoch": 0.12065208619159067, + "grad_norm": 0.815192699432373, + "learning_rate": 0.00019758478371941337, + "loss": 2.7602, + "step": 1495 + }, + { + "epoch": 0.12073278992817367, + "grad_norm": 0.7919273376464844, + "learning_rate": 0.00019758133383403786, + "loss": 2.7989, + "step": 1496 + }, + { + "epoch": 0.12081349366475667, + "grad_norm": 1.004387378692627, + "learning_rate": 0.00019757788151668045, + "loss": 2.7765, + "step": 1497 + }, + { + "epoch": 0.12089419740133968, + "grad_norm": 1.0032062530517578, + "learning_rate": 0.00019757442676742715, + "loss": 2.7751, + "step": 1498 + }, + { + "epoch": 0.12097490113792268, + "grad_norm": 0.8797723054885864, + "learning_rate": 0.00019757096958636407, + "loss": 2.7798, + "step": 1499 + }, + { + "epoch": 0.12105560487450569, + "grad_norm": 0.9239820241928101, + "learning_rate": 0.0001975675099735774, + "loss": 2.7976, + "step": 1500 + }, + { + "epoch": 0.12113630861108869, + "grad_norm": 0.9903601408004761, + "learning_rate": 0.00019756404792915328, + "loss": 2.7891, + "step": 1501 + }, + { + "epoch": 0.1212170123476717, + "grad_norm": 0.8402895331382751, + "learning_rate": 0.0001975605834531781, + "loss": 2.8037, + "step": 1502 + }, + { + "epoch": 0.1212977160842547, + "grad_norm": 0.8986102342605591, + "learning_rate": 0.00019755711654573813, + "loss": 2.8375, + "step": 1503 + }, + { + "epoch": 0.1213784198208377, + "grad_norm": 0.8795471787452698, + "learning_rate": 0.0001975536472069198, + "loss": 2.7916, + "step": 1504 + }, + { + "epoch": 0.12145912355742071, + "grad_norm": 0.866278350353241, + "learning_rate": 0.00019755017543680962, + "loss": 2.7884, + "step": 1505 + }, + { + "epoch": 0.12153982729400371, + "grad_norm": 0.7877952456474304, + "learning_rate": 0.00019754670123549398, + "loss": 2.7659, + "step": 1506 + }, + { + "epoch": 0.12162053103058672, + "grad_norm": 0.857155978679657, + "learning_rate": 0.00019754322460305962, + "loss": 2.8029, + "step": 1507 + }, + { + "epoch": 0.12170123476716972, + "grad_norm": 0.8323284387588501, + "learning_rate": 0.00019753974553959314, + "loss": 2.7764, + "step": 1508 + }, + { + "epoch": 0.12178193850375273, + "grad_norm": 0.8557485938072205, + "learning_rate": 0.00019753626404518117, + "loss": 2.7448, + "step": 1509 + }, + { + "epoch": 0.12186264224033573, + "grad_norm": 0.8026818037033081, + "learning_rate": 0.00019753278011991058, + "loss": 2.7323, + "step": 1510 + }, + { + "epoch": 0.12194334597691874, + "grad_norm": 0.8578904271125793, + "learning_rate": 0.00019752929376386816, + "loss": 2.759, + "step": 1511 + }, + { + "epoch": 0.12202404971350174, + "grad_norm": 0.8617175221443176, + "learning_rate": 0.00019752580497714076, + "loss": 2.7641, + "step": 1512 + }, + { + "epoch": 0.12210475345008474, + "grad_norm": 0.8261943459510803, + "learning_rate": 0.00019752231375981538, + "loss": 2.7554, + "step": 1513 + }, + { + "epoch": 0.12218545718666775, + "grad_norm": 0.9984099268913269, + "learning_rate": 0.00019751882011197902, + "loss": 2.763, + "step": 1514 + }, + { + "epoch": 0.12226616092325075, + "grad_norm": 0.8014064431190491, + "learning_rate": 0.00019751532403371874, + "loss": 2.8083, + "step": 1515 + }, + { + "epoch": 0.12234686465983376, + "grad_norm": 0.9276653528213501, + "learning_rate": 0.0001975118255251217, + "loss": 2.8055, + "step": 1516 + }, + { + "epoch": 0.12242756839641676, + "grad_norm": 0.9365193843841553, + "learning_rate": 0.00019750832458627503, + "loss": 2.7397, + "step": 1517 + }, + { + "epoch": 0.12250827213299975, + "grad_norm": 0.8952646851539612, + "learning_rate": 0.00019750482121726605, + "loss": 2.8305, + "step": 1518 + }, + { + "epoch": 0.12258897586958276, + "grad_norm": 0.8395531177520752, + "learning_rate": 0.00019750131541818204, + "loss": 2.7852, + "step": 1519 + }, + { + "epoch": 0.12266967960616576, + "grad_norm": 0.8123572468757629, + "learning_rate": 0.0001974978071891104, + "loss": 2.831, + "step": 1520 + }, + { + "epoch": 0.12275038334274876, + "grad_norm": 0.8716141581535339, + "learning_rate": 0.00019749429653013851, + "loss": 2.8012, + "step": 1521 + }, + { + "epoch": 0.12283108707933177, + "grad_norm": 0.7848379611968994, + "learning_rate": 0.0001974907834413539, + "loss": 2.7812, + "step": 1522 + }, + { + "epoch": 0.12291179081591477, + "grad_norm": 0.834072470664978, + "learning_rate": 0.00019748726792284414, + "loss": 2.7442, + "step": 1523 + }, + { + "epoch": 0.12299249455249778, + "grad_norm": 0.8377225399017334, + "learning_rate": 0.0001974837499746968, + "loss": 2.7967, + "step": 1524 + }, + { + "epoch": 0.12307319828908078, + "grad_norm": 0.8809494376182556, + "learning_rate": 0.0001974802295969996, + "loss": 2.8042, + "step": 1525 + }, + { + "epoch": 0.12315390202566379, + "grad_norm": 0.8504741787910461, + "learning_rate": 0.00019747670678984028, + "loss": 2.7909, + "step": 1526 + }, + { + "epoch": 0.12323460576224679, + "grad_norm": 0.9444355368614197, + "learning_rate": 0.00019747318155330663, + "loss": 2.8567, + "step": 1527 + }, + { + "epoch": 0.1233153094988298, + "grad_norm": 0.859166145324707, + "learning_rate": 0.00019746965388748645, + "loss": 2.8305, + "step": 1528 + }, + { + "epoch": 0.1233960132354128, + "grad_norm": 0.8431086540222168, + "learning_rate": 0.00019746612379246777, + "loss": 2.7799, + "step": 1529 + }, + { + "epoch": 0.1234767169719958, + "grad_norm": 0.8872438669204712, + "learning_rate": 0.00019746259126833846, + "loss": 2.8413, + "step": 1530 + }, + { + "epoch": 0.12355742070857881, + "grad_norm": 0.8698925375938416, + "learning_rate": 0.0001974590563151866, + "loss": 2.8446, + "step": 1531 + }, + { + "epoch": 0.12363812444516181, + "grad_norm": 0.8926429152488708, + "learning_rate": 0.0001974555189331003, + "loss": 2.7859, + "step": 1532 + }, + { + "epoch": 0.12371882818174482, + "grad_norm": 0.8089048862457275, + "learning_rate": 0.00019745197912216775, + "loss": 2.7985, + "step": 1533 + }, + { + "epoch": 0.12379953191832782, + "grad_norm": 0.8180400729179382, + "learning_rate": 0.0001974484368824771, + "loss": 2.7587, + "step": 1534 + }, + { + "epoch": 0.12388023565491083, + "grad_norm": 0.9584212303161621, + "learning_rate": 0.00019744489221411668, + "loss": 2.766, + "step": 1535 + }, + { + "epoch": 0.12396093939149383, + "grad_norm": 0.8425920009613037, + "learning_rate": 0.00019744134511717485, + "loss": 2.8125, + "step": 1536 + }, + { + "epoch": 0.12404164312807683, + "grad_norm": 0.9109299182891846, + "learning_rate": 0.00019743779559173996, + "loss": 2.8613, + "step": 1537 + }, + { + "epoch": 0.12412234686465984, + "grad_norm": 0.8840214610099792, + "learning_rate": 0.0001974342436379005, + "loss": 2.7603, + "step": 1538 + }, + { + "epoch": 0.12420305060124284, + "grad_norm": 0.8128962516784668, + "learning_rate": 0.00019743068925574502, + "loss": 2.7593, + "step": 1539 + }, + { + "epoch": 0.12428375433782585, + "grad_norm": 0.8150052428245544, + "learning_rate": 0.00019742713244536204, + "loss": 2.8099, + "step": 1540 + }, + { + "epoch": 0.12436445807440885, + "grad_norm": 0.8442968130111694, + "learning_rate": 0.00019742357320684027, + "loss": 2.7746, + "step": 1541 + }, + { + "epoch": 0.12444516181099186, + "grad_norm": 0.9347402453422546, + "learning_rate": 0.00019742001154026838, + "loss": 2.8247, + "step": 1542 + }, + { + "epoch": 0.12452586554757485, + "grad_norm": 0.8305966854095459, + "learning_rate": 0.00019741644744573512, + "loss": 2.7398, + "step": 1543 + }, + { + "epoch": 0.12460656928415785, + "grad_norm": 0.8811129927635193, + "learning_rate": 0.00019741288092332935, + "loss": 2.8014, + "step": 1544 + }, + { + "epoch": 0.12468727302074085, + "grad_norm": 1.0287303924560547, + "learning_rate": 0.00019740931197313996, + "loss": 2.8449, + "step": 1545 + }, + { + "epoch": 0.12476797675732386, + "grad_norm": 0.8499771356582642, + "learning_rate": 0.00019740574059525588, + "loss": 2.7845, + "step": 1546 + }, + { + "epoch": 0.12484868049390686, + "grad_norm": 0.8110969066619873, + "learning_rate": 0.00019740216678976614, + "loss": 2.7565, + "step": 1547 + }, + { + "epoch": 0.12492938423048987, + "grad_norm": 0.8530771136283875, + "learning_rate": 0.00019739859055675977, + "loss": 2.8098, + "step": 1548 + }, + { + "epoch": 0.12501008796707289, + "grad_norm": 0.8483901619911194, + "learning_rate": 0.00019739501189632591, + "loss": 2.812, + "step": 1549 + }, + { + "epoch": 0.1250907917036559, + "grad_norm": 0.7894467711448669, + "learning_rate": 0.00019739143080855378, + "loss": 2.8576, + "step": 1550 + }, + { + "epoch": 0.1251714954402389, + "grad_norm": 0.8270247578620911, + "learning_rate": 0.0001973878472935326, + "loss": 2.7613, + "step": 1551 + }, + { + "epoch": 0.1252521991768219, + "grad_norm": 0.8496212959289551, + "learning_rate": 0.00019738426135135174, + "loss": 2.8375, + "step": 1552 + }, + { + "epoch": 0.1253329029134049, + "grad_norm": 0.8465524911880493, + "learning_rate": 0.00019738067298210045, + "loss": 2.8023, + "step": 1553 + }, + { + "epoch": 0.1254136066499879, + "grad_norm": 0.7843824028968811, + "learning_rate": 0.00019737708218586826, + "loss": 2.7424, + "step": 1554 + }, + { + "epoch": 0.1254943103865709, + "grad_norm": 0.8310040235519409, + "learning_rate": 0.00019737348896274462, + "loss": 2.7608, + "step": 1555 + }, + { + "epoch": 0.1255750141231539, + "grad_norm": 0.7895017266273499, + "learning_rate": 0.00019736989331281914, + "loss": 2.7549, + "step": 1556 + }, + { + "epoch": 0.1256557178597369, + "grad_norm": 0.8140431642532349, + "learning_rate": 0.00019736629523618138, + "loss": 2.802, + "step": 1557 + }, + { + "epoch": 0.1257364215963199, + "grad_norm": 0.8026889562606812, + "learning_rate": 0.000197362694732921, + "loss": 2.7758, + "step": 1558 + }, + { + "epoch": 0.1258171253329029, + "grad_norm": 0.8018048405647278, + "learning_rate": 0.0001973590918031278, + "loss": 2.7729, + "step": 1559 + }, + { + "epoch": 0.1258978290694859, + "grad_norm": 0.8394612073898315, + "learning_rate": 0.00019735548644689147, + "loss": 2.7692, + "step": 1560 + }, + { + "epoch": 0.1259785328060689, + "grad_norm": 0.819804310798645, + "learning_rate": 0.00019735187866430198, + "loss": 2.6933, + "step": 1561 + }, + { + "epoch": 0.12605923654265191, + "grad_norm": 0.8094257116317749, + "learning_rate": 0.0001973482684554492, + "loss": 2.7722, + "step": 1562 + }, + { + "epoch": 0.12613994027923492, + "grad_norm": 0.8647315502166748, + "learning_rate": 0.00019734465582042305, + "loss": 2.787, + "step": 1563 + }, + { + "epoch": 0.12622064401581792, + "grad_norm": 0.8439335823059082, + "learning_rate": 0.00019734104075931367, + "loss": 2.8, + "step": 1564 + }, + { + "epoch": 0.12630134775240093, + "grad_norm": 0.852480947971344, + "learning_rate": 0.00019733742327221105, + "loss": 2.8656, + "step": 1565 + }, + { + "epoch": 0.12638205148898393, + "grad_norm": 0.813846230506897, + "learning_rate": 0.00019733380335920542, + "loss": 2.7733, + "step": 1566 + }, + { + "epoch": 0.12646275522556694, + "grad_norm": 0.7860896587371826, + "learning_rate": 0.00019733018102038698, + "loss": 2.8201, + "step": 1567 + }, + { + "epoch": 0.12654345896214994, + "grad_norm": 0.7857748866081238, + "learning_rate": 0.00019732655625584602, + "loss": 2.8726, + "step": 1568 + }, + { + "epoch": 0.12662416269873294, + "grad_norm": 0.8152899146080017, + "learning_rate": 0.00019732292906567286, + "loss": 2.7738, + "step": 1569 + }, + { + "epoch": 0.12670486643531595, + "grad_norm": 0.8281696438789368, + "learning_rate": 0.00019731929944995788, + "loss": 2.7966, + "step": 1570 + }, + { + "epoch": 0.12678557017189895, + "grad_norm": 0.8070773482322693, + "learning_rate": 0.00019731566740879158, + "loss": 2.6988, + "step": 1571 + }, + { + "epoch": 0.12686627390848196, + "grad_norm": 0.7859680652618408, + "learning_rate": 0.00019731203294226445, + "loss": 2.7241, + "step": 1572 + }, + { + "epoch": 0.12694697764506496, + "grad_norm": 0.7753982543945312, + "learning_rate": 0.0001973083960504671, + "loss": 2.7621, + "step": 1573 + }, + { + "epoch": 0.12702768138164797, + "grad_norm": 0.8063471913337708, + "learning_rate": 0.00019730475673349014, + "loss": 2.7298, + "step": 1574 + }, + { + "epoch": 0.12710838511823097, + "grad_norm": 0.7943962812423706, + "learning_rate": 0.0001973011149914243, + "loss": 2.7714, + "step": 1575 + }, + { + "epoch": 0.12718908885481398, + "grad_norm": 0.8297483325004578, + "learning_rate": 0.00019729747082436033, + "loss": 2.7743, + "step": 1576 + }, + { + "epoch": 0.12726979259139698, + "grad_norm": 0.8728111386299133, + "learning_rate": 0.000197293824232389, + "loss": 2.8251, + "step": 1577 + }, + { + "epoch": 0.12735049632797998, + "grad_norm": 0.8762480020523071, + "learning_rate": 0.00019729017521560128, + "loss": 2.8036, + "step": 1578 + }, + { + "epoch": 0.127431200064563, + "grad_norm": 0.9266185164451599, + "learning_rate": 0.00019728652377408806, + "loss": 2.7335, + "step": 1579 + }, + { + "epoch": 0.127511903801146, + "grad_norm": 0.9289839267730713, + "learning_rate": 0.00019728286990794037, + "loss": 2.7715, + "step": 1580 + }, + { + "epoch": 0.127592607537729, + "grad_norm": 0.8811823725700378, + "learning_rate": 0.0001972792136172493, + "loss": 2.7389, + "step": 1581 + }, + { + "epoch": 0.127673311274312, + "grad_norm": 0.8174294233322144, + "learning_rate": 0.00019727555490210588, + "loss": 2.7483, + "step": 1582 + }, + { + "epoch": 0.127754015010895, + "grad_norm": 0.8254107236862183, + "learning_rate": 0.00019727189376260137, + "loss": 2.7897, + "step": 1583 + }, + { + "epoch": 0.127834718747478, + "grad_norm": 0.8478763699531555, + "learning_rate": 0.000197268230198827, + "loss": 2.7394, + "step": 1584 + }, + { + "epoch": 0.12791542248406101, + "grad_norm": 0.8356192111968994, + "learning_rate": 0.00019726456421087404, + "loss": 2.7518, + "step": 1585 + }, + { + "epoch": 0.12799612622064402, + "grad_norm": 0.8523107767105103, + "learning_rate": 0.00019726089579883392, + "loss": 2.7893, + "step": 1586 + }, + { + "epoch": 0.12807682995722702, + "grad_norm": 0.9048579931259155, + "learning_rate": 0.00019725722496279804, + "loss": 2.7488, + "step": 1587 + }, + { + "epoch": 0.12815753369381003, + "grad_norm": 0.8242251873016357, + "learning_rate": 0.00019725355170285787, + "loss": 2.7544, + "step": 1588 + }, + { + "epoch": 0.12823823743039303, + "grad_norm": 0.8343983888626099, + "learning_rate": 0.00019724987601910497, + "loss": 2.7317, + "step": 1589 + }, + { + "epoch": 0.12831894116697604, + "grad_norm": 0.8084509372711182, + "learning_rate": 0.00019724619791163095, + "loss": 2.7822, + "step": 1590 + }, + { + "epoch": 0.12839964490355904, + "grad_norm": 0.8397380113601685, + "learning_rate": 0.00019724251738052745, + "loss": 2.8188, + "step": 1591 + }, + { + "epoch": 0.12848034864014204, + "grad_norm": 0.8558558821678162, + "learning_rate": 0.00019723883442588624, + "loss": 2.7623, + "step": 1592 + }, + { + "epoch": 0.12856105237672505, + "grad_norm": 0.7602639198303223, + "learning_rate": 0.0001972351490477991, + "loss": 2.7932, + "step": 1593 + }, + { + "epoch": 0.12864175611330805, + "grad_norm": 0.8379851579666138, + "learning_rate": 0.00019723146124635786, + "loss": 2.8296, + "step": 1594 + }, + { + "epoch": 0.12872245984989106, + "grad_norm": 0.8454548716545105, + "learning_rate": 0.00019722777102165444, + "loss": 2.8192, + "step": 1595 + }, + { + "epoch": 0.12880316358647406, + "grad_norm": 0.8344082832336426, + "learning_rate": 0.0001972240783737808, + "loss": 2.7628, + "step": 1596 + }, + { + "epoch": 0.12888386732305707, + "grad_norm": 0.809093713760376, + "learning_rate": 0.000197220383302829, + "loss": 2.8055, + "step": 1597 + }, + { + "epoch": 0.12896457105964007, + "grad_norm": 0.7909694910049438, + "learning_rate": 0.0001972166858088911, + "loss": 2.7292, + "step": 1598 + }, + { + "epoch": 0.12904527479622308, + "grad_norm": 0.8350280523300171, + "learning_rate": 0.00019721298589205928, + "loss": 2.7671, + "step": 1599 + }, + { + "epoch": 0.12912597853280608, + "grad_norm": 0.7857616543769836, + "learning_rate": 0.00019720928355242568, + "loss": 2.729, + "step": 1600 + }, + { + "epoch": 0.12920668226938908, + "grad_norm": 0.7899746298789978, + "learning_rate": 0.0001972055787900827, + "loss": 2.8023, + "step": 1601 + }, + { + "epoch": 0.1292873860059721, + "grad_norm": 0.8604246377944946, + "learning_rate": 0.00019720187160512256, + "loss": 2.749, + "step": 1602 + }, + { + "epoch": 0.1293680897425551, + "grad_norm": 0.8517864942550659, + "learning_rate": 0.0001971981619976377, + "loss": 2.7203, + "step": 1603 + }, + { + "epoch": 0.1294487934791381, + "grad_norm": 0.8860471248626709, + "learning_rate": 0.00019719444996772056, + "loss": 2.7372, + "step": 1604 + }, + { + "epoch": 0.1295294972157211, + "grad_norm": 0.8355888724327087, + "learning_rate": 0.00019719073551546367, + "loss": 2.7284, + "step": 1605 + }, + { + "epoch": 0.1296102009523041, + "grad_norm": 0.7998479604721069, + "learning_rate": 0.00019718701864095955, + "loss": 2.7726, + "step": 1606 + }, + { + "epoch": 0.12969090468888708, + "grad_norm": 0.8564549088478088, + "learning_rate": 0.00019718329934430092, + "loss": 2.7334, + "step": 1607 + }, + { + "epoch": 0.1297716084254701, + "grad_norm": 0.8594443798065186, + "learning_rate": 0.00019717957762558044, + "loss": 2.7865, + "step": 1608 + }, + { + "epoch": 0.1298523121620531, + "grad_norm": 0.804553210735321, + "learning_rate": 0.00019717585348489082, + "loss": 2.8094, + "step": 1609 + }, + { + "epoch": 0.1299330158986361, + "grad_norm": 0.7892553806304932, + "learning_rate": 0.0001971721269223249, + "loss": 2.7969, + "step": 1610 + }, + { + "epoch": 0.1300137196352191, + "grad_norm": 0.8703331351280212, + "learning_rate": 0.0001971683979379756, + "loss": 2.8192, + "step": 1611 + }, + { + "epoch": 0.1300944233718021, + "grad_norm": 0.8176589012145996, + "learning_rate": 0.00019716466653193582, + "loss": 2.7902, + "step": 1612 + }, + { + "epoch": 0.1301751271083851, + "grad_norm": 0.8305137157440186, + "learning_rate": 0.00019716093270429855, + "loss": 2.8202, + "step": 1613 + }, + { + "epoch": 0.1302558308449681, + "grad_norm": 0.8261505365371704, + "learning_rate": 0.00019715719645515688, + "loss": 2.7905, + "step": 1614 + }, + { + "epoch": 0.13033653458155112, + "grad_norm": 0.9465535879135132, + "learning_rate": 0.00019715345778460389, + "loss": 2.7965, + "step": 1615 + }, + { + "epoch": 0.13041723831813412, + "grad_norm": 0.8847100138664246, + "learning_rate": 0.00019714971669273275, + "loss": 2.8177, + "step": 1616 + }, + { + "epoch": 0.13049794205471713, + "grad_norm": 0.9768328666687012, + "learning_rate": 0.0001971459731796367, + "loss": 2.7668, + "step": 1617 + }, + { + "epoch": 0.13057864579130013, + "grad_norm": 0.7498586177825928, + "learning_rate": 0.0001971422272454091, + "loss": 2.761, + "step": 1618 + }, + { + "epoch": 0.13065934952788313, + "grad_norm": 1.0455373525619507, + "learning_rate": 0.00019713847889014325, + "loss": 2.7652, + "step": 1619 + }, + { + "epoch": 0.13074005326446614, + "grad_norm": 0.8484631180763245, + "learning_rate": 0.00019713472811393258, + "loss": 2.7858, + "step": 1620 + }, + { + "epoch": 0.13082075700104914, + "grad_norm": 0.8190686702728271, + "learning_rate": 0.00019713097491687057, + "loss": 2.7217, + "step": 1621 + }, + { + "epoch": 0.13090146073763215, + "grad_norm": 0.8866000175476074, + "learning_rate": 0.00019712721929905077, + "loss": 2.7868, + "step": 1622 + }, + { + "epoch": 0.13098216447421515, + "grad_norm": 0.8026713132858276, + "learning_rate": 0.00019712346126056677, + "loss": 2.7276, + "step": 1623 + }, + { + "epoch": 0.13106286821079816, + "grad_norm": 0.8306462168693542, + "learning_rate": 0.00019711970080151225, + "loss": 2.7747, + "step": 1624 + }, + { + "epoch": 0.13114357194738116, + "grad_norm": 0.8276618123054504, + "learning_rate": 0.0001971159379219809, + "loss": 2.7146, + "step": 1625 + }, + { + "epoch": 0.13122427568396416, + "grad_norm": 0.9749011993408203, + "learning_rate": 0.00019711217262206648, + "loss": 2.8731, + "step": 1626 + }, + { + "epoch": 0.13130497942054717, + "grad_norm": 0.828484058380127, + "learning_rate": 0.00019710840490186292, + "loss": 2.803, + "step": 1627 + }, + { + "epoch": 0.13138568315713017, + "grad_norm": 0.8095957636833191, + "learning_rate": 0.00019710463476146402, + "loss": 2.7751, + "step": 1628 + }, + { + "epoch": 0.13146638689371318, + "grad_norm": 0.8731853365898132, + "learning_rate": 0.0001971008622009638, + "loss": 2.8274, + "step": 1629 + }, + { + "epoch": 0.13154709063029618, + "grad_norm": 0.8180200457572937, + "learning_rate": 0.00019709708722045628, + "loss": 2.813, + "step": 1630 + }, + { + "epoch": 0.13162779436687919, + "grad_norm": 0.7740067839622498, + "learning_rate": 0.00019709330982003553, + "loss": 2.7319, + "step": 1631 + }, + { + "epoch": 0.1317084981034622, + "grad_norm": 0.8439326882362366, + "learning_rate": 0.0001970895299997957, + "loss": 2.8182, + "step": 1632 + }, + { + "epoch": 0.1317892018400452, + "grad_norm": 0.8254802823066711, + "learning_rate": 0.000197085747759831, + "loss": 2.7874, + "step": 1633 + }, + { + "epoch": 0.1318699055766282, + "grad_norm": 0.8128175139427185, + "learning_rate": 0.00019708196310023562, + "loss": 2.8125, + "step": 1634 + }, + { + "epoch": 0.1319506093132112, + "grad_norm": 0.8664820790290833, + "learning_rate": 0.00019707817602110402, + "loss": 2.8446, + "step": 1635 + }, + { + "epoch": 0.1320313130497942, + "grad_norm": 0.8101332783699036, + "learning_rate": 0.00019707438652253044, + "loss": 2.8027, + "step": 1636 + }, + { + "epoch": 0.1321120167863772, + "grad_norm": 0.8296725153923035, + "learning_rate": 0.00019707059460460945, + "loss": 2.7677, + "step": 1637 + }, + { + "epoch": 0.13219272052296022, + "grad_norm": 0.7321150898933411, + "learning_rate": 0.0001970668002674355, + "loss": 2.6991, + "step": 1638 + }, + { + "epoch": 0.13227342425954322, + "grad_norm": 0.8321375250816345, + "learning_rate": 0.0001970630035111031, + "loss": 2.6948, + "step": 1639 + }, + { + "epoch": 0.13235412799612623, + "grad_norm": 0.7622714042663574, + "learning_rate": 0.00019705920433570694, + "loss": 2.6957, + "step": 1640 + }, + { + "epoch": 0.13243483173270923, + "grad_norm": 0.8413416147232056, + "learning_rate": 0.00019705540274134173, + "loss": 2.7277, + "step": 1641 + }, + { + "epoch": 0.13251553546929223, + "grad_norm": 0.8798941373825073, + "learning_rate": 0.00019705159872810218, + "loss": 2.7699, + "step": 1642 + }, + { + "epoch": 0.13259623920587524, + "grad_norm": 0.788287341594696, + "learning_rate": 0.00019704779229608304, + "loss": 2.7933, + "step": 1643 + }, + { + "epoch": 0.13267694294245824, + "grad_norm": 0.8547430634498596, + "learning_rate": 0.00019704398344537927, + "loss": 2.7706, + "step": 1644 + }, + { + "epoch": 0.13275764667904125, + "grad_norm": 0.8474008440971375, + "learning_rate": 0.00019704017217608575, + "loss": 2.8005, + "step": 1645 + }, + { + "epoch": 0.13283835041562425, + "grad_norm": 0.8636945486068726, + "learning_rate": 0.00019703635848829747, + "loss": 2.8241, + "step": 1646 + }, + { + "epoch": 0.13291905415220726, + "grad_norm": 0.8158168792724609, + "learning_rate": 0.00019703254238210947, + "loss": 2.7576, + "step": 1647 + }, + { + "epoch": 0.13299975788879026, + "grad_norm": 0.8420887589454651, + "learning_rate": 0.0001970287238576169, + "loss": 2.7677, + "step": 1648 + }, + { + "epoch": 0.13308046162537326, + "grad_norm": 0.7910059690475464, + "learning_rate": 0.00019702490291491486, + "loss": 2.7807, + "step": 1649 + }, + { + "epoch": 0.13316116536195627, + "grad_norm": 0.8308143615722656, + "learning_rate": 0.00019702107955409863, + "loss": 2.7698, + "step": 1650 + }, + { + "epoch": 0.13324186909853927, + "grad_norm": 0.8215764760971069, + "learning_rate": 0.00019701725377526349, + "loss": 2.8263, + "step": 1651 + }, + { + "epoch": 0.13332257283512228, + "grad_norm": 0.8780504465103149, + "learning_rate": 0.00019701342557850476, + "loss": 2.8032, + "step": 1652 + }, + { + "epoch": 0.13340327657170528, + "grad_norm": 0.8125136494636536, + "learning_rate": 0.0001970095949639179, + "loss": 2.8317, + "step": 1653 + }, + { + "epoch": 0.13348398030828829, + "grad_norm": 0.8170902132987976, + "learning_rate": 0.00019700576193159831, + "loss": 2.7528, + "step": 1654 + }, + { + "epoch": 0.1335646840448713, + "grad_norm": 0.8318637013435364, + "learning_rate": 0.00019700192648164157, + "loss": 2.7963, + "step": 1655 + }, + { + "epoch": 0.1336453877814543, + "grad_norm": 0.8445270657539368, + "learning_rate": 0.00019699808861414327, + "loss": 2.772, + "step": 1656 + }, + { + "epoch": 0.1337260915180373, + "grad_norm": 0.7908959984779358, + "learning_rate": 0.00019699424832919906, + "loss": 2.7528, + "step": 1657 + }, + { + "epoch": 0.13380679525462028, + "grad_norm": 0.8153900504112244, + "learning_rate": 0.00019699040562690462, + "loss": 2.7643, + "step": 1658 + }, + { + "epoch": 0.13388749899120328, + "grad_norm": 0.86302250623703, + "learning_rate": 0.0001969865605073557, + "loss": 2.8037, + "step": 1659 + }, + { + "epoch": 0.13396820272778628, + "grad_norm": 0.8373419046401978, + "learning_rate": 0.0001969827129706482, + "loss": 2.7647, + "step": 1660 + }, + { + "epoch": 0.1340489064643693, + "grad_norm": 0.8166481852531433, + "learning_rate": 0.00019697886301687798, + "loss": 2.8333, + "step": 1661 + }, + { + "epoch": 0.1341296102009523, + "grad_norm": 0.7807812094688416, + "learning_rate": 0.00019697501064614098, + "loss": 2.7495, + "step": 1662 + }, + { + "epoch": 0.1342103139375353, + "grad_norm": 0.8375338315963745, + "learning_rate": 0.00019697115585853324, + "loss": 2.7518, + "step": 1663 + }, + { + "epoch": 0.1342910176741183, + "grad_norm": 0.7392182350158691, + "learning_rate": 0.00019696729865415077, + "loss": 2.758, + "step": 1664 + }, + { + "epoch": 0.1343717214107013, + "grad_norm": 0.8041971921920776, + "learning_rate": 0.00019696343903308978, + "loss": 2.7485, + "step": 1665 + }, + { + "epoch": 0.1344524251472843, + "grad_norm": 0.789310097694397, + "learning_rate": 0.00019695957699544643, + "loss": 2.8179, + "step": 1666 + }, + { + "epoch": 0.13453312888386731, + "grad_norm": 0.7643609642982483, + "learning_rate": 0.00019695571254131693, + "loss": 2.7791, + "step": 1667 + }, + { + "epoch": 0.13461383262045032, + "grad_norm": 0.8284661769866943, + "learning_rate": 0.00019695184567079766, + "loss": 2.717, + "step": 1668 + }, + { + "epoch": 0.13469453635703332, + "grad_norm": 0.7620903253555298, + "learning_rate": 0.00019694797638398494, + "loss": 2.7808, + "step": 1669 + }, + { + "epoch": 0.13477524009361633, + "grad_norm": 0.9123913645744324, + "learning_rate": 0.00019694410468097524, + "loss": 2.7648, + "step": 1670 + }, + { + "epoch": 0.13485594383019933, + "grad_norm": 0.735518217086792, + "learning_rate": 0.000196940230561865, + "loss": 2.7653, + "step": 1671 + }, + { + "epoch": 0.13493664756678234, + "grad_norm": 0.8363413214683533, + "learning_rate": 0.00019693635402675085, + "loss": 2.766, + "step": 1672 + }, + { + "epoch": 0.13501735130336534, + "grad_norm": 0.8206491470336914, + "learning_rate": 0.00019693247507572936, + "loss": 2.7829, + "step": 1673 + }, + { + "epoch": 0.13509805503994834, + "grad_norm": 0.7726099491119385, + "learning_rate": 0.0001969285937088972, + "loss": 2.7381, + "step": 1674 + }, + { + "epoch": 0.13517875877653135, + "grad_norm": 0.8970316052436829, + "learning_rate": 0.0001969247099263511, + "loss": 2.7836, + "step": 1675 + }, + { + "epoch": 0.13525946251311435, + "grad_norm": 0.7966172099113464, + "learning_rate": 0.00019692082372818788, + "loss": 2.7135, + "step": 1676 + }, + { + "epoch": 0.13534016624969736, + "grad_norm": 0.8583024740219116, + "learning_rate": 0.00019691693511450438, + "loss": 2.7908, + "step": 1677 + }, + { + "epoch": 0.13542086998628036, + "grad_norm": 0.9430457353591919, + "learning_rate": 0.0001969130440853975, + "loss": 2.7311, + "step": 1678 + }, + { + "epoch": 0.13550157372286337, + "grad_norm": 0.8066009879112244, + "learning_rate": 0.00019690915064096424, + "loss": 2.7039, + "step": 1679 + }, + { + "epoch": 0.13558227745944637, + "grad_norm": 1.0169655084609985, + "learning_rate": 0.0001969052547813016, + "loss": 2.7832, + "step": 1680 + }, + { + "epoch": 0.13566298119602938, + "grad_norm": 0.8606080412864685, + "learning_rate": 0.00019690135650650672, + "loss": 2.751, + "step": 1681 + }, + { + "epoch": 0.13574368493261238, + "grad_norm": 0.8625333905220032, + "learning_rate": 0.00019689745581667674, + "loss": 2.761, + "step": 1682 + }, + { + "epoch": 0.13582438866919538, + "grad_norm": 0.9304285645484924, + "learning_rate": 0.00019689355271190886, + "loss": 2.7566, + "step": 1683 + }, + { + "epoch": 0.1359050924057784, + "grad_norm": 0.793397068977356, + "learning_rate": 0.00019688964719230035, + "loss": 2.7648, + "step": 1684 + }, + { + "epoch": 0.1359857961423614, + "grad_norm": 0.8496749401092529, + "learning_rate": 0.00019688573925794858, + "loss": 2.7461, + "step": 1685 + }, + { + "epoch": 0.1360664998789444, + "grad_norm": 0.7807914018630981, + "learning_rate": 0.0001968818289089509, + "loss": 2.8266, + "step": 1686 + }, + { + "epoch": 0.1361472036155274, + "grad_norm": 0.8186607956886292, + "learning_rate": 0.0001968779161454048, + "loss": 2.8447, + "step": 1687 + }, + { + "epoch": 0.1362279073521104, + "grad_norm": 0.8007118701934814, + "learning_rate": 0.0001968740009674078, + "loss": 2.7888, + "step": 1688 + }, + { + "epoch": 0.1363086110886934, + "grad_norm": 0.8735570311546326, + "learning_rate": 0.00019687008337505749, + "loss": 2.7152, + "step": 1689 + }, + { + "epoch": 0.13638931482527641, + "grad_norm": 0.8546476364135742, + "learning_rate": 0.00019686616336845144, + "loss": 2.8113, + "step": 1690 + }, + { + "epoch": 0.13647001856185942, + "grad_norm": 0.9156736135482788, + "learning_rate": 0.0001968622409476874, + "loss": 2.7561, + "step": 1691 + }, + { + "epoch": 0.13655072229844242, + "grad_norm": 0.8091925382614136, + "learning_rate": 0.0001968583161128631, + "loss": 2.7384, + "step": 1692 + }, + { + "epoch": 0.13663142603502543, + "grad_norm": 0.7871039509773254, + "learning_rate": 0.0001968543888640764, + "loss": 2.7138, + "step": 1693 + }, + { + "epoch": 0.13671212977160843, + "grad_norm": 0.9537062048912048, + "learning_rate": 0.00019685045920142516, + "loss": 2.7726, + "step": 1694 + }, + { + "epoch": 0.13679283350819144, + "grad_norm": 0.8663280010223389, + "learning_rate": 0.00019684652712500728, + "loss": 2.7509, + "step": 1695 + }, + { + "epoch": 0.13687353724477444, + "grad_norm": 0.8717214465141296, + "learning_rate": 0.0001968425926349208, + "loss": 2.791, + "step": 1696 + }, + { + "epoch": 0.13695424098135744, + "grad_norm": 0.8942584991455078, + "learning_rate": 0.00019683865573126374, + "loss": 2.77, + "step": 1697 + }, + { + "epoch": 0.13703494471794045, + "grad_norm": 0.8243421316146851, + "learning_rate": 0.00019683471641413424, + "loss": 2.8063, + "step": 1698 + }, + { + "epoch": 0.13711564845452345, + "grad_norm": 0.8618699908256531, + "learning_rate": 0.0001968307746836305, + "loss": 2.6872, + "step": 1699 + }, + { + "epoch": 0.13719635219110646, + "grad_norm": 0.7931695580482483, + "learning_rate": 0.00019682683053985072, + "loss": 2.7495, + "step": 1700 + }, + { + "epoch": 0.13727705592768946, + "grad_norm": 0.7549482583999634, + "learning_rate": 0.00019682288398289324, + "loss": 2.7543, + "step": 1701 + }, + { + "epoch": 0.13735775966427247, + "grad_norm": 0.7953789234161377, + "learning_rate": 0.00019681893501285636, + "loss": 2.6895, + "step": 1702 + }, + { + "epoch": 0.13743846340085547, + "grad_norm": 0.7916574478149414, + "learning_rate": 0.00019681498362983857, + "loss": 2.819, + "step": 1703 + }, + { + "epoch": 0.13751916713743847, + "grad_norm": 0.7986735105514526, + "learning_rate": 0.0001968110298339383, + "loss": 2.8062, + "step": 1704 + }, + { + "epoch": 0.13759987087402148, + "grad_norm": 0.8601658940315247, + "learning_rate": 0.00019680707362525407, + "loss": 2.7625, + "step": 1705 + }, + { + "epoch": 0.13768057461060448, + "grad_norm": 0.8888362050056458, + "learning_rate": 0.00019680311500388454, + "loss": 2.7747, + "step": 1706 + }, + { + "epoch": 0.1377612783471875, + "grad_norm": 0.7762896418571472, + "learning_rate": 0.00019679915396992833, + "loss": 2.7959, + "step": 1707 + }, + { + "epoch": 0.1378419820837705, + "grad_norm": 0.8942253589630127, + "learning_rate": 0.00019679519052348416, + "loss": 2.7717, + "step": 1708 + }, + { + "epoch": 0.13792268582035347, + "grad_norm": 0.8388909697532654, + "learning_rate": 0.00019679122466465082, + "loss": 2.7448, + "step": 1709 + }, + { + "epoch": 0.13800338955693647, + "grad_norm": 0.8826024532318115, + "learning_rate": 0.00019678725639352712, + "loss": 2.7307, + "step": 1710 + }, + { + "epoch": 0.13808409329351948, + "grad_norm": 0.8972313404083252, + "learning_rate": 0.00019678328571021204, + "loss": 2.7619, + "step": 1711 + }, + { + "epoch": 0.13816479703010248, + "grad_norm": 0.9373044371604919, + "learning_rate": 0.00019677931261480444, + "loss": 2.7664, + "step": 1712 + }, + { + "epoch": 0.1382455007666855, + "grad_norm": 0.8060994148254395, + "learning_rate": 0.00019677533710740343, + "loss": 2.7707, + "step": 1713 + }, + { + "epoch": 0.1383262045032685, + "grad_norm": 0.8324100971221924, + "learning_rate": 0.000196771359188108, + "loss": 2.8249, + "step": 1714 + }, + { + "epoch": 0.1384069082398515, + "grad_norm": 0.879176676273346, + "learning_rate": 0.00019676737885701738, + "loss": 2.7767, + "step": 1715 + }, + { + "epoch": 0.1384876119764345, + "grad_norm": 0.8823966979980469, + "learning_rate": 0.0001967633961142307, + "loss": 2.791, + "step": 1716 + }, + { + "epoch": 0.1385683157130175, + "grad_norm": 0.8176039457321167, + "learning_rate": 0.00019675941095984728, + "loss": 2.8225, + "step": 1717 + }, + { + "epoch": 0.1386490194496005, + "grad_norm": 0.8005076050758362, + "learning_rate": 0.00019675542339396635, + "loss": 2.8175, + "step": 1718 + }, + { + "epoch": 0.1387297231861835, + "grad_norm": 0.800854504108429, + "learning_rate": 0.0001967514334166874, + "loss": 2.8226, + "step": 1719 + }, + { + "epoch": 0.13881042692276652, + "grad_norm": 0.7941261529922485, + "learning_rate": 0.00019674744102810978, + "loss": 2.7488, + "step": 1720 + }, + { + "epoch": 0.13889113065934952, + "grad_norm": 0.7955947518348694, + "learning_rate": 0.00019674344622833302, + "loss": 2.7749, + "step": 1721 + }, + { + "epoch": 0.13897183439593253, + "grad_norm": 0.8353856205940247, + "learning_rate": 0.00019673944901745674, + "loss": 2.7982, + "step": 1722 + }, + { + "epoch": 0.13905253813251553, + "grad_norm": 0.8711503744125366, + "learning_rate": 0.00019673544939558047, + "loss": 2.8007, + "step": 1723 + }, + { + "epoch": 0.13913324186909853, + "grad_norm": 0.8525274991989136, + "learning_rate": 0.00019673144736280396, + "loss": 2.7423, + "step": 1724 + }, + { + "epoch": 0.13921394560568154, + "grad_norm": 0.8143991231918335, + "learning_rate": 0.0001967274429192269, + "loss": 2.7752, + "step": 1725 + }, + { + "epoch": 0.13929464934226454, + "grad_norm": 0.8508228063583374, + "learning_rate": 0.00019672343606494912, + "loss": 2.7422, + "step": 1726 + }, + { + "epoch": 0.13937535307884755, + "grad_norm": 0.8320932984352112, + "learning_rate": 0.0001967194268000705, + "loss": 2.7598, + "step": 1727 + }, + { + "epoch": 0.13945605681543055, + "grad_norm": 0.8233908414840698, + "learning_rate": 0.00019671541512469092, + "loss": 2.7834, + "step": 1728 + }, + { + "epoch": 0.13953676055201356, + "grad_norm": 0.8097162246704102, + "learning_rate": 0.00019671140103891038, + "loss": 2.7856, + "step": 1729 + }, + { + "epoch": 0.13961746428859656, + "grad_norm": 0.9043141007423401, + "learning_rate": 0.0001967073845428289, + "loss": 2.8047, + "step": 1730 + }, + { + "epoch": 0.13969816802517956, + "grad_norm": 0.9118517637252808, + "learning_rate": 0.00019670336563654662, + "loss": 2.789, + "step": 1731 + }, + { + "epoch": 0.13977887176176257, + "grad_norm": 0.8016074895858765, + "learning_rate": 0.00019669934432016368, + "loss": 2.7506, + "step": 1732 + }, + { + "epoch": 0.13985957549834557, + "grad_norm": 0.8376848697662354, + "learning_rate": 0.0001966953205937803, + "loss": 2.7832, + "step": 1733 + }, + { + "epoch": 0.13994027923492858, + "grad_norm": 0.8511834144592285, + "learning_rate": 0.0001966912944574968, + "loss": 2.7564, + "step": 1734 + }, + { + "epoch": 0.14002098297151158, + "grad_norm": 0.7796351909637451, + "learning_rate": 0.00019668726591141344, + "loss": 2.7489, + "step": 1735 + }, + { + "epoch": 0.14010168670809459, + "grad_norm": 0.8204767107963562, + "learning_rate": 0.00019668323495563068, + "loss": 2.7634, + "step": 1736 + }, + { + "epoch": 0.1401823904446776, + "grad_norm": 0.9049975872039795, + "learning_rate": 0.000196679201590249, + "loss": 2.7863, + "step": 1737 + }, + { + "epoch": 0.1402630941812606, + "grad_norm": 0.7473673224449158, + "learning_rate": 0.0001966751658153689, + "loss": 2.7557, + "step": 1738 + }, + { + "epoch": 0.1403437979178436, + "grad_norm": 0.7765525579452515, + "learning_rate": 0.0001966711276310909, + "loss": 2.7865, + "step": 1739 + }, + { + "epoch": 0.1404245016544266, + "grad_norm": 0.8766517043113708, + "learning_rate": 0.00019666708703751576, + "loss": 2.7873, + "step": 1740 + }, + { + "epoch": 0.1405052053910096, + "grad_norm": 0.8351505994796753, + "learning_rate": 0.00019666304403474408, + "loss": 2.7355, + "step": 1741 + }, + { + "epoch": 0.1405859091275926, + "grad_norm": 0.7612324953079224, + "learning_rate": 0.00019665899862287667, + "loss": 2.7608, + "step": 1742 + }, + { + "epoch": 0.14066661286417562, + "grad_norm": 0.894249439239502, + "learning_rate": 0.00019665495080201434, + "loss": 2.7469, + "step": 1743 + }, + { + "epoch": 0.14074731660075862, + "grad_norm": 0.8528907895088196, + "learning_rate": 0.00019665090057225803, + "loss": 2.773, + "step": 1744 + }, + { + "epoch": 0.14082802033734163, + "grad_norm": 0.7718498706817627, + "learning_rate": 0.00019664684793370855, + "loss": 2.8045, + "step": 1745 + }, + { + "epoch": 0.14090872407392463, + "grad_norm": 0.8013718128204346, + "learning_rate": 0.00019664279288646706, + "loss": 2.7665, + "step": 1746 + }, + { + "epoch": 0.14098942781050763, + "grad_norm": 0.828803539276123, + "learning_rate": 0.00019663873543063448, + "loss": 2.7846, + "step": 1747 + }, + { + "epoch": 0.14107013154709064, + "grad_norm": 0.8349393606185913, + "learning_rate": 0.00019663467556631204, + "loss": 2.7405, + "step": 1748 + }, + { + "epoch": 0.14115083528367364, + "grad_norm": 0.8273345232009888, + "learning_rate": 0.00019663061329360085, + "loss": 2.7578, + "step": 1749 + }, + { + "epoch": 0.14123153902025665, + "grad_norm": 0.7989444136619568, + "learning_rate": 0.0001966265486126022, + "loss": 2.739, + "step": 1750 + }, + { + "epoch": 0.14131224275683965, + "grad_norm": 0.8690519332885742, + "learning_rate": 0.00019662248152341736, + "loss": 2.7566, + "step": 1751 + }, + { + "epoch": 0.14139294649342266, + "grad_norm": 0.8453623056411743, + "learning_rate": 0.0001966184120261477, + "loss": 2.8572, + "step": 1752 + }, + { + "epoch": 0.14147365023000566, + "grad_norm": 0.8396254777908325, + "learning_rate": 0.00019661434012089468, + "loss": 2.786, + "step": 1753 + }, + { + "epoch": 0.14155435396658866, + "grad_norm": 0.7643738389015198, + "learning_rate": 0.00019661026580775973, + "loss": 2.8193, + "step": 1754 + }, + { + "epoch": 0.14163505770317167, + "grad_norm": 0.8124154806137085, + "learning_rate": 0.00019660618908684443, + "loss": 2.7754, + "step": 1755 + }, + { + "epoch": 0.14171576143975467, + "grad_norm": 0.8620683550834656, + "learning_rate": 0.00019660210995825036, + "loss": 2.7827, + "step": 1756 + }, + { + "epoch": 0.14179646517633768, + "grad_norm": 0.8241196274757385, + "learning_rate": 0.0001965980284220792, + "loss": 2.7573, + "step": 1757 + }, + { + "epoch": 0.14187716891292068, + "grad_norm": 0.8264089822769165, + "learning_rate": 0.00019659394447843262, + "loss": 2.8214, + "step": 1758 + }, + { + "epoch": 0.14195787264950369, + "grad_norm": 0.9129722118377686, + "learning_rate": 0.00019658985812741247, + "loss": 2.7962, + "step": 1759 + }, + { + "epoch": 0.14203857638608666, + "grad_norm": 0.7976365089416504, + "learning_rate": 0.00019658576936912057, + "loss": 2.7534, + "step": 1760 + }, + { + "epoch": 0.14211928012266967, + "grad_norm": 0.7587228417396545, + "learning_rate": 0.00019658167820365882, + "loss": 2.7083, + "step": 1761 + }, + { + "epoch": 0.14219998385925267, + "grad_norm": 0.757882833480835, + "learning_rate": 0.00019657758463112918, + "loss": 2.7135, + "step": 1762 + }, + { + "epoch": 0.14228068759583568, + "grad_norm": 0.8541501760482788, + "learning_rate": 0.00019657348865163369, + "loss": 2.7833, + "step": 1763 + }, + { + "epoch": 0.14236139133241868, + "grad_norm": 0.7708966135978699, + "learning_rate": 0.00019656939026527442, + "loss": 2.7128, + "step": 1764 + }, + { + "epoch": 0.14244209506900168, + "grad_norm": 0.8733000159263611, + "learning_rate": 0.00019656528947215347, + "loss": 2.7597, + "step": 1765 + }, + { + "epoch": 0.1425227988055847, + "grad_norm": 0.7913360595703125, + "learning_rate": 0.0001965611862723731, + "loss": 2.7681, + "step": 1766 + }, + { + "epoch": 0.1426035025421677, + "grad_norm": 0.8692380785942078, + "learning_rate": 0.00019655708066603555, + "loss": 2.7587, + "step": 1767 + }, + { + "epoch": 0.1426842062787507, + "grad_norm": 0.8231006860733032, + "learning_rate": 0.00019655297265324317, + "loss": 2.772, + "step": 1768 + }, + { + "epoch": 0.1427649100153337, + "grad_norm": 0.7373722791671753, + "learning_rate": 0.0001965488622340983, + "loss": 2.7875, + "step": 1769 + }, + { + "epoch": 0.1428456137519167, + "grad_norm": 0.8614751696586609, + "learning_rate": 0.0001965447494087034, + "loss": 2.7962, + "step": 1770 + }, + { + "epoch": 0.1429263174884997, + "grad_norm": 0.8336494565010071, + "learning_rate": 0.000196540634177161, + "loss": 2.7072, + "step": 1771 + }, + { + "epoch": 0.14300702122508271, + "grad_norm": 0.844292163848877, + "learning_rate": 0.00019653651653957362, + "loss": 2.8043, + "step": 1772 + }, + { + "epoch": 0.14308772496166572, + "grad_norm": 0.7366824150085449, + "learning_rate": 0.0001965323964960439, + "loss": 2.7296, + "step": 1773 + }, + { + "epoch": 0.14316842869824872, + "grad_norm": 0.75767982006073, + "learning_rate": 0.0001965282740466745, + "loss": 2.7946, + "step": 1774 + }, + { + "epoch": 0.14324913243483173, + "grad_norm": 0.8361382484436035, + "learning_rate": 0.00019652414919156823, + "loss": 2.7232, + "step": 1775 + }, + { + "epoch": 0.14332983617141473, + "grad_norm": 0.8473719358444214, + "learning_rate": 0.0001965200219308278, + "loss": 2.774, + "step": 1776 + }, + { + "epoch": 0.14341053990799774, + "grad_norm": 0.7446423172950745, + "learning_rate": 0.00019651589226455613, + "loss": 2.7439, + "step": 1777 + }, + { + "epoch": 0.14349124364458074, + "grad_norm": 0.8332851529121399, + "learning_rate": 0.00019651176019285616, + "loss": 2.7891, + "step": 1778 + }, + { + "epoch": 0.14357194738116374, + "grad_norm": 0.885313868522644, + "learning_rate": 0.0001965076257158308, + "loss": 2.7677, + "step": 1779 + }, + { + "epoch": 0.14365265111774675, + "grad_norm": 0.8506965637207031, + "learning_rate": 0.00019650348883358315, + "loss": 2.8112, + "step": 1780 + }, + { + "epoch": 0.14373335485432975, + "grad_norm": 0.8415799736976624, + "learning_rate": 0.0001964993495462163, + "loss": 2.8242, + "step": 1781 + }, + { + "epoch": 0.14381405859091276, + "grad_norm": 0.8501513004302979, + "learning_rate": 0.00019649520785383338, + "loss": 2.8352, + "step": 1782 + }, + { + "epoch": 0.14389476232749576, + "grad_norm": 0.7839778065681458, + "learning_rate": 0.00019649106375653767, + "loss": 2.7194, + "step": 1783 + }, + { + "epoch": 0.14397546606407877, + "grad_norm": 0.8013346195220947, + "learning_rate": 0.00019648691725443243, + "loss": 2.7665, + "step": 1784 + }, + { + "epoch": 0.14405616980066177, + "grad_norm": 1.0338317155838013, + "learning_rate": 0.00019648276834762095, + "loss": 2.8599, + "step": 1785 + }, + { + "epoch": 0.14413687353724478, + "grad_norm": 0.898417592048645, + "learning_rate": 0.0001964786170362067, + "loss": 2.7192, + "step": 1786 + }, + { + "epoch": 0.14421757727382778, + "grad_norm": 0.8876320123672485, + "learning_rate": 0.00019647446332029313, + "loss": 2.7722, + "step": 1787 + }, + { + "epoch": 0.14429828101041078, + "grad_norm": 0.819461464881897, + "learning_rate": 0.00019647030719998373, + "loss": 2.7698, + "step": 1788 + }, + { + "epoch": 0.1443789847469938, + "grad_norm": 0.848380446434021, + "learning_rate": 0.0001964661486753821, + "loss": 2.7894, + "step": 1789 + }, + { + "epoch": 0.1444596884835768, + "grad_norm": 0.8343753814697266, + "learning_rate": 0.0001964619877465919, + "loss": 2.699, + "step": 1790 + }, + { + "epoch": 0.1445403922201598, + "grad_norm": 0.8718340396881104, + "learning_rate": 0.0001964578244137168, + "loss": 2.7313, + "step": 1791 + }, + { + "epoch": 0.1446210959567428, + "grad_norm": 0.866122841835022, + "learning_rate": 0.00019645365867686056, + "loss": 2.7112, + "step": 1792 + }, + { + "epoch": 0.1447017996933258, + "grad_norm": 0.8351789712905884, + "learning_rate": 0.000196449490536127, + "loss": 2.7765, + "step": 1793 + }, + { + "epoch": 0.1447825034299088, + "grad_norm": 0.8628408312797546, + "learning_rate": 0.00019644531999162004, + "loss": 2.7375, + "step": 1794 + }, + { + "epoch": 0.14486320716649181, + "grad_norm": 0.8414484858512878, + "learning_rate": 0.00019644114704344358, + "loss": 2.7502, + "step": 1795 + }, + { + "epoch": 0.14494391090307482, + "grad_norm": 0.9092586636543274, + "learning_rate": 0.00019643697169170166, + "loss": 2.7714, + "step": 1796 + }, + { + "epoch": 0.14502461463965782, + "grad_norm": 0.8458060622215271, + "learning_rate": 0.0001964327939364983, + "loss": 2.8376, + "step": 1797 + }, + { + "epoch": 0.14510531837624083, + "grad_norm": 0.8150759935379028, + "learning_rate": 0.00019642861377793764, + "loss": 2.7147, + "step": 1798 + }, + { + "epoch": 0.14518602211282383, + "grad_norm": 0.9008790850639343, + "learning_rate": 0.00019642443121612387, + "loss": 2.7786, + "step": 1799 + }, + { + "epoch": 0.14526672584940684, + "grad_norm": 0.848671555519104, + "learning_rate": 0.00019642024625116117, + "loss": 2.7813, + "step": 1800 + }, + { + "epoch": 0.14534742958598984, + "grad_norm": 0.8035007119178772, + "learning_rate": 0.00019641605888315393, + "loss": 2.7988, + "step": 1801 + }, + { + "epoch": 0.14542813332257284, + "grad_norm": 0.8210242390632629, + "learning_rate": 0.00019641186911220645, + "loss": 2.8451, + "step": 1802 + }, + { + "epoch": 0.14550883705915585, + "grad_norm": 0.8852066397666931, + "learning_rate": 0.00019640767693842318, + "loss": 2.7492, + "step": 1803 + }, + { + "epoch": 0.14558954079573885, + "grad_norm": 0.8421196937561035, + "learning_rate": 0.0001964034823619086, + "loss": 2.759, + "step": 1804 + }, + { + "epoch": 0.14567024453232186, + "grad_norm": 0.8166298866271973, + "learning_rate": 0.00019639928538276724, + "loss": 2.7942, + "step": 1805 + }, + { + "epoch": 0.14575094826890486, + "grad_norm": 0.8502809405326843, + "learning_rate": 0.00019639508600110368, + "loss": 2.7829, + "step": 1806 + }, + { + "epoch": 0.14583165200548787, + "grad_norm": 0.8371078372001648, + "learning_rate": 0.0001963908842170226, + "loss": 2.7168, + "step": 1807 + }, + { + "epoch": 0.14591235574207087, + "grad_norm": 0.8148230910301208, + "learning_rate": 0.0001963866800306287, + "loss": 2.7706, + "step": 1808 + }, + { + "epoch": 0.14599305947865387, + "grad_norm": 0.8984564542770386, + "learning_rate": 0.0001963824734420268, + "loss": 2.7761, + "step": 1809 + }, + { + "epoch": 0.14607376321523688, + "grad_norm": 0.9357183575630188, + "learning_rate": 0.00019637826445132172, + "loss": 2.7738, + "step": 1810 + }, + { + "epoch": 0.14615446695181986, + "grad_norm": 0.8545449376106262, + "learning_rate": 0.00019637405305861834, + "loss": 2.772, + "step": 1811 + }, + { + "epoch": 0.14623517068840286, + "grad_norm": 1.1674948930740356, + "learning_rate": 0.00019636983926402165, + "loss": 2.8988, + "step": 1812 + }, + { + "epoch": 0.14631587442498586, + "grad_norm": 0.7875451445579529, + "learning_rate": 0.00019636562306763665, + "loss": 2.7053, + "step": 1813 + }, + { + "epoch": 0.14639657816156887, + "grad_norm": 0.8980962038040161, + "learning_rate": 0.0001963614044695684, + "loss": 2.7731, + "step": 1814 + }, + { + "epoch": 0.14647728189815187, + "grad_norm": 0.8403381705284119, + "learning_rate": 0.00019635718346992207, + "loss": 2.8555, + "step": 1815 + }, + { + "epoch": 0.14655798563473488, + "grad_norm": 0.8736433982849121, + "learning_rate": 0.00019635296006880284, + "loss": 2.7918, + "step": 1816 + }, + { + "epoch": 0.14663868937131788, + "grad_norm": 0.8604151606559753, + "learning_rate": 0.000196348734266316, + "loss": 2.7493, + "step": 1817 + }, + { + "epoch": 0.1467193931079009, + "grad_norm": 0.8329424262046814, + "learning_rate": 0.00019634450606256681, + "loss": 2.7348, + "step": 1818 + }, + { + "epoch": 0.1468000968444839, + "grad_norm": 0.9835913181304932, + "learning_rate": 0.0001963402754576607, + "loss": 2.7651, + "step": 1819 + }, + { + "epoch": 0.1468808005810669, + "grad_norm": 0.7968378067016602, + "learning_rate": 0.0001963360424517031, + "loss": 2.7672, + "step": 1820 + }, + { + "epoch": 0.1469615043176499, + "grad_norm": 0.8012512922286987, + "learning_rate": 0.00019633180704479948, + "loss": 2.8022, + "step": 1821 + }, + { + "epoch": 0.1470422080542329, + "grad_norm": 0.7656376957893372, + "learning_rate": 0.0001963275692370554, + "loss": 2.7561, + "step": 1822 + }, + { + "epoch": 0.1471229117908159, + "grad_norm": 0.8030453324317932, + "learning_rate": 0.00019632332902857656, + "loss": 2.8048, + "step": 1823 + }, + { + "epoch": 0.1472036155273989, + "grad_norm": 0.8050903677940369, + "learning_rate": 0.0001963190864194685, + "loss": 2.7846, + "step": 1824 + }, + { + "epoch": 0.14728431926398192, + "grad_norm": 0.8001886606216431, + "learning_rate": 0.00019631484140983705, + "loss": 2.7382, + "step": 1825 + }, + { + "epoch": 0.14736502300056492, + "grad_norm": 0.8589862585067749, + "learning_rate": 0.00019631059399978796, + "loss": 2.8376, + "step": 1826 + }, + { + "epoch": 0.14744572673714793, + "grad_norm": 0.86325603723526, + "learning_rate": 0.00019630634418942714, + "loss": 2.7643, + "step": 1827 + }, + { + "epoch": 0.14752643047373093, + "grad_norm": 0.7893280386924744, + "learning_rate": 0.00019630209197886046, + "loss": 2.713, + "step": 1828 + }, + { + "epoch": 0.14760713421031393, + "grad_norm": 0.8890528082847595, + "learning_rate": 0.00019629783736819394, + "loss": 2.7435, + "step": 1829 + }, + { + "epoch": 0.14768783794689694, + "grad_norm": 0.794924795627594, + "learning_rate": 0.00019629358035753357, + "loss": 2.7703, + "step": 1830 + }, + { + "epoch": 0.14776854168347994, + "grad_norm": 0.7712973952293396, + "learning_rate": 0.00019628932094698545, + "loss": 2.7487, + "step": 1831 + }, + { + "epoch": 0.14784924542006295, + "grad_norm": 0.7810670137405396, + "learning_rate": 0.00019628505913665576, + "loss": 2.7687, + "step": 1832 + }, + { + "epoch": 0.14792994915664595, + "grad_norm": 0.8331059813499451, + "learning_rate": 0.0001962807949266507, + "loss": 2.7166, + "step": 1833 + }, + { + "epoch": 0.14801065289322896, + "grad_norm": 0.8983452916145325, + "learning_rate": 0.00019627652831707656, + "loss": 2.8096, + "step": 1834 + }, + { + "epoch": 0.14809135662981196, + "grad_norm": 0.8387179374694824, + "learning_rate": 0.00019627225930803963, + "loss": 2.8252, + "step": 1835 + }, + { + "epoch": 0.14817206036639496, + "grad_norm": 0.8619294762611389, + "learning_rate": 0.0001962679878996464, + "loss": 2.7623, + "step": 1836 + }, + { + "epoch": 0.14825276410297797, + "grad_norm": 0.8195026516914368, + "learning_rate": 0.0001962637140920032, + "loss": 2.7295, + "step": 1837 + }, + { + "epoch": 0.14833346783956097, + "grad_norm": 0.806216835975647, + "learning_rate": 0.00019625943788521664, + "loss": 2.7184, + "step": 1838 + }, + { + "epoch": 0.14841417157614398, + "grad_norm": 0.7758379578590393, + "learning_rate": 0.00019625515927939327, + "loss": 2.7675, + "step": 1839 + }, + { + "epoch": 0.14849487531272698, + "grad_norm": 0.7617168426513672, + "learning_rate": 0.0001962508782746397, + "loss": 2.8041, + "step": 1840 + }, + { + "epoch": 0.14857557904930999, + "grad_norm": 0.9630066156387329, + "learning_rate": 0.00019624659487106264, + "loss": 2.814, + "step": 1841 + }, + { + "epoch": 0.148656282785893, + "grad_norm": 0.7656112313270569, + "learning_rate": 0.00019624230906876888, + "loss": 2.7564, + "step": 1842 + }, + { + "epoch": 0.148736986522476, + "grad_norm": 0.9394779801368713, + "learning_rate": 0.0001962380208678652, + "loss": 2.7958, + "step": 1843 + }, + { + "epoch": 0.148817690259059, + "grad_norm": 0.7647004127502441, + "learning_rate": 0.00019623373026845842, + "loss": 2.72, + "step": 1844 + }, + { + "epoch": 0.148898393995642, + "grad_norm": 0.809079647064209, + "learning_rate": 0.00019622943727065555, + "loss": 2.7732, + "step": 1845 + }, + { + "epoch": 0.148979097732225, + "grad_norm": 0.8241337537765503, + "learning_rate": 0.00019622514187456357, + "loss": 2.759, + "step": 1846 + }, + { + "epoch": 0.149059801468808, + "grad_norm": 0.8979619145393372, + "learning_rate": 0.00019622084408028948, + "loss": 2.8307, + "step": 1847 + }, + { + "epoch": 0.14914050520539102, + "grad_norm": 0.8058865666389465, + "learning_rate": 0.00019621654388794047, + "loss": 2.807, + "step": 1848 + }, + { + "epoch": 0.14922120894197402, + "grad_norm": 0.81967693567276, + "learning_rate": 0.00019621224129762364, + "loss": 2.7762, + "step": 1849 + }, + { + "epoch": 0.14930191267855702, + "grad_norm": 0.7385755777359009, + "learning_rate": 0.0001962079363094463, + "loss": 2.7854, + "step": 1850 + }, + { + "epoch": 0.14938261641514003, + "grad_norm": 0.8585657477378845, + "learning_rate": 0.00019620362892351566, + "loss": 2.7781, + "step": 1851 + }, + { + "epoch": 0.14946332015172303, + "grad_norm": 0.8328986763954163, + "learning_rate": 0.00019619931913993912, + "loss": 2.8245, + "step": 1852 + }, + { + "epoch": 0.14954402388830604, + "grad_norm": 0.749727189540863, + "learning_rate": 0.0001961950069588241, + "loss": 2.8049, + "step": 1853 + }, + { + "epoch": 0.14962472762488904, + "grad_norm": 0.7886502742767334, + "learning_rate": 0.00019619069238027803, + "loss": 2.7521, + "step": 1854 + }, + { + "epoch": 0.14970543136147205, + "grad_norm": 0.816137433052063, + "learning_rate": 0.00019618637540440848, + "loss": 2.8383, + "step": 1855 + }, + { + "epoch": 0.14978613509805505, + "grad_norm": 0.80442214012146, + "learning_rate": 0.000196182056031323, + "loss": 2.7227, + "step": 1856 + }, + { + "epoch": 0.14986683883463806, + "grad_norm": 0.7605221271514893, + "learning_rate": 0.00019617773426112924, + "loss": 2.7494, + "step": 1857 + }, + { + "epoch": 0.14994754257122106, + "grad_norm": 0.8745137453079224, + "learning_rate": 0.00019617341009393497, + "loss": 2.6978, + "step": 1858 + }, + { + "epoch": 0.15002824630780406, + "grad_norm": 0.8151741623878479, + "learning_rate": 0.00019616908352984789, + "loss": 2.7817, + "step": 1859 + }, + { + "epoch": 0.15010895004438707, + "grad_norm": 0.773876428604126, + "learning_rate": 0.0001961647545689759, + "loss": 2.812, + "step": 1860 + }, + { + "epoch": 0.15018965378097007, + "grad_norm": 0.8216966390609741, + "learning_rate": 0.00019616042321142683, + "loss": 2.8181, + "step": 1861 + }, + { + "epoch": 0.15027035751755305, + "grad_norm": 0.8097409605979919, + "learning_rate": 0.00019615608945730862, + "loss": 2.8336, + "step": 1862 + }, + { + "epoch": 0.15035106125413605, + "grad_norm": 0.8085697293281555, + "learning_rate": 0.00019615175330672932, + "loss": 2.8176, + "step": 1863 + }, + { + "epoch": 0.15043176499071906, + "grad_norm": 0.7658133506774902, + "learning_rate": 0.00019614741475979701, + "loss": 2.7543, + "step": 1864 + }, + { + "epoch": 0.15051246872730206, + "grad_norm": 0.7193909883499146, + "learning_rate": 0.00019614307381661978, + "loss": 2.7475, + "step": 1865 + }, + { + "epoch": 0.15059317246388507, + "grad_norm": 0.835608959197998, + "learning_rate": 0.0001961387304773058, + "loss": 2.8017, + "step": 1866 + }, + { + "epoch": 0.15067387620046807, + "grad_norm": 0.7898489832878113, + "learning_rate": 0.0001961343847419634, + "loss": 2.7613, + "step": 1867 + }, + { + "epoch": 0.15075457993705108, + "grad_norm": 0.8031982183456421, + "learning_rate": 0.0001961300366107008, + "loss": 2.7442, + "step": 1868 + }, + { + "epoch": 0.15083528367363408, + "grad_norm": 0.8427363634109497, + "learning_rate": 0.00019612568608362642, + "loss": 2.8095, + "step": 1869 + }, + { + "epoch": 0.15091598741021708, + "grad_norm": 0.8282802700996399, + "learning_rate": 0.00019612133316084863, + "loss": 2.7216, + "step": 1870 + }, + { + "epoch": 0.1509966911468001, + "grad_norm": 0.7799758911132812, + "learning_rate": 0.000196116977842476, + "loss": 2.793, + "step": 1871 + }, + { + "epoch": 0.1510773948833831, + "grad_norm": 0.8151525259017944, + "learning_rate": 0.00019611262012861702, + "loss": 2.7641, + "step": 1872 + }, + { + "epoch": 0.1511580986199661, + "grad_norm": 0.7926812767982483, + "learning_rate": 0.0001961082600193803, + "loss": 2.7523, + "step": 1873 + }, + { + "epoch": 0.1512388023565491, + "grad_norm": 0.8737135529518127, + "learning_rate": 0.0001961038975148745, + "loss": 2.7965, + "step": 1874 + }, + { + "epoch": 0.1513195060931321, + "grad_norm": 0.7948090434074402, + "learning_rate": 0.00019609953261520837, + "loss": 2.7737, + "step": 1875 + }, + { + "epoch": 0.1514002098297151, + "grad_norm": 0.8161277770996094, + "learning_rate": 0.0001960951653204907, + "loss": 2.7423, + "step": 1876 + }, + { + "epoch": 0.15148091356629811, + "grad_norm": 0.8904973864555359, + "learning_rate": 0.00019609079563083026, + "loss": 2.7066, + "step": 1877 + }, + { + "epoch": 0.15156161730288112, + "grad_norm": 0.8107061982154846, + "learning_rate": 0.00019608642354633604, + "loss": 2.7939, + "step": 1878 + }, + { + "epoch": 0.15164232103946412, + "grad_norm": 0.8410987854003906, + "learning_rate": 0.00019608204906711694, + "loss": 2.7521, + "step": 1879 + }, + { + "epoch": 0.15172302477604713, + "grad_norm": 0.8336483836174011, + "learning_rate": 0.0001960776721932821, + "loss": 2.7613, + "step": 1880 + }, + { + "epoch": 0.15180372851263013, + "grad_norm": 0.730549156665802, + "learning_rate": 0.00019607329292494044, + "loss": 2.8019, + "step": 1881 + }, + { + "epoch": 0.15188443224921314, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001960689112622012, + "loss": 2.6907, + "step": 1882 + }, + { + "epoch": 0.15196513598579614, + "grad_norm": 0.848414421081543, + "learning_rate": 0.00019606452720517359, + "loss": 2.7278, + "step": 1883 + }, + { + "epoch": 0.15204583972237914, + "grad_norm": 0.8331718444824219, + "learning_rate": 0.00019606014075396682, + "loss": 2.6994, + "step": 1884 + }, + { + "epoch": 0.15212654345896215, + "grad_norm": 0.9192764759063721, + "learning_rate": 0.00019605575190869025, + "loss": 2.7095, + "step": 1885 + }, + { + "epoch": 0.15220724719554515, + "grad_norm": 0.8377116322517395, + "learning_rate": 0.00019605136066945324, + "loss": 2.7925, + "step": 1886 + }, + { + "epoch": 0.15228795093212816, + "grad_norm": 0.7302869558334351, + "learning_rate": 0.00019604696703636525, + "loss": 2.7286, + "step": 1887 + }, + { + "epoch": 0.15236865466871116, + "grad_norm": 0.7972438335418701, + "learning_rate": 0.00019604257100953577, + "loss": 2.7732, + "step": 1888 + }, + { + "epoch": 0.15244935840529417, + "grad_norm": 1.0350826978683472, + "learning_rate": 0.00019603817258907435, + "loss": 2.8211, + "step": 1889 + }, + { + "epoch": 0.15253006214187717, + "grad_norm": 0.782755970954895, + "learning_rate": 0.00019603377177509067, + "loss": 2.8489, + "step": 1890 + }, + { + "epoch": 0.15261076587846018, + "grad_norm": 0.9072603583335876, + "learning_rate": 0.0001960293685676943, + "loss": 2.7764, + "step": 1891 + }, + { + "epoch": 0.15269146961504318, + "grad_norm": 0.7878704071044922, + "learning_rate": 0.0001960249629669951, + "loss": 2.7494, + "step": 1892 + }, + { + "epoch": 0.15277217335162618, + "grad_norm": 0.8770418167114258, + "learning_rate": 0.00019602055497310278, + "loss": 2.7318, + "step": 1893 + }, + { + "epoch": 0.1528528770882092, + "grad_norm": 0.8004975914955139, + "learning_rate": 0.00019601614458612723, + "loss": 2.7272, + "step": 1894 + }, + { + "epoch": 0.1529335808247922, + "grad_norm": 0.8511070013046265, + "learning_rate": 0.00019601173180617835, + "loss": 2.7876, + "step": 1895 + }, + { + "epoch": 0.1530142845613752, + "grad_norm": 0.7946128845214844, + "learning_rate": 0.00019600731663336617, + "loss": 2.7435, + "step": 1896 + }, + { + "epoch": 0.1530949882979582, + "grad_norm": 0.8155317902565002, + "learning_rate": 0.00019600289906780067, + "loss": 2.7642, + "step": 1897 + }, + { + "epoch": 0.1531756920345412, + "grad_norm": 0.8086098432540894, + "learning_rate": 0.000195998479109592, + "loss": 2.7358, + "step": 1898 + }, + { + "epoch": 0.1532563957711242, + "grad_norm": 0.8698278665542603, + "learning_rate": 0.00019599405675885026, + "loss": 2.725, + "step": 1899 + }, + { + "epoch": 0.15333709950770721, + "grad_norm": 0.8756006360054016, + "learning_rate": 0.00019598963201568573, + "loss": 2.7209, + "step": 1900 + }, + { + "epoch": 0.15341780324429022, + "grad_norm": 0.7984628081321716, + "learning_rate": 0.0001959852048802086, + "loss": 2.7685, + "step": 1901 + }, + { + "epoch": 0.15349850698087322, + "grad_norm": 0.8244056105613708, + "learning_rate": 0.0001959807753525293, + "loss": 2.7692, + "step": 1902 + }, + { + "epoch": 0.15357921071745623, + "grad_norm": 0.8577731251716614, + "learning_rate": 0.00019597634343275814, + "loss": 2.7571, + "step": 1903 + }, + { + "epoch": 0.15365991445403923, + "grad_norm": 0.8410975933074951, + "learning_rate": 0.00019597190912100566, + "loss": 2.7862, + "step": 1904 + }, + { + "epoch": 0.15374061819062224, + "grad_norm": 0.9094158411026001, + "learning_rate": 0.0001959674724173823, + "loss": 2.7655, + "step": 1905 + }, + { + "epoch": 0.15382132192720524, + "grad_norm": 0.8375208973884583, + "learning_rate": 0.00019596303332199868, + "loss": 2.8129, + "step": 1906 + }, + { + "epoch": 0.15390202566378824, + "grad_norm": 0.8335977792739868, + "learning_rate": 0.00019595859183496543, + "loss": 2.7835, + "step": 1907 + }, + { + "epoch": 0.15398272940037125, + "grad_norm": 0.7973531484603882, + "learning_rate": 0.0001959541479563932, + "loss": 2.7785, + "step": 1908 + }, + { + "epoch": 0.15406343313695425, + "grad_norm": 0.7808824181556702, + "learning_rate": 0.0001959497016863928, + "loss": 2.7862, + "step": 1909 + }, + { + "epoch": 0.15414413687353726, + "grad_norm": 0.853824257850647, + "learning_rate": 0.00019594525302507504, + "loss": 2.6721, + "step": 1910 + }, + { + "epoch": 0.15422484061012026, + "grad_norm": 0.8589324355125427, + "learning_rate": 0.00019594080197255073, + "loss": 2.7948, + "step": 1911 + }, + { + "epoch": 0.15430554434670327, + "grad_norm": 0.7951898574829102, + "learning_rate": 0.00019593634852893086, + "loss": 2.7903, + "step": 1912 + }, + { + "epoch": 0.15438624808328624, + "grad_norm": 0.8333349227905273, + "learning_rate": 0.0001959318926943264, + "loss": 2.8073, + "step": 1913 + }, + { + "epoch": 0.15446695181986925, + "grad_norm": 0.8552380800247192, + "learning_rate": 0.0001959274344688484, + "loss": 2.8199, + "step": 1914 + }, + { + "epoch": 0.15454765555645225, + "grad_norm": 0.8356214165687561, + "learning_rate": 0.000195922973852608, + "loss": 2.7985, + "step": 1915 + }, + { + "epoch": 0.15462835929303526, + "grad_norm": 0.7167248725891113, + "learning_rate": 0.00019591851084571634, + "loss": 2.6802, + "step": 1916 + }, + { + "epoch": 0.15470906302961826, + "grad_norm": 0.7980726361274719, + "learning_rate": 0.00019591404544828464, + "loss": 2.692, + "step": 1917 + }, + { + "epoch": 0.15478976676620126, + "grad_norm": 0.7766004800796509, + "learning_rate": 0.00019590957766042424, + "loss": 2.7219, + "step": 1918 + }, + { + "epoch": 0.15487047050278427, + "grad_norm": 0.828852653503418, + "learning_rate": 0.0001959051074822464, + "loss": 2.7369, + "step": 1919 + }, + { + "epoch": 0.15495117423936727, + "grad_norm": 0.7818129062652588, + "learning_rate": 0.0001959006349138626, + "loss": 2.7778, + "step": 1920 + }, + { + "epoch": 0.15503187797595028, + "grad_norm": 0.8428593873977661, + "learning_rate": 0.00019589615995538432, + "loss": 2.8257, + "step": 1921 + }, + { + "epoch": 0.15511258171253328, + "grad_norm": 0.8756616115570068, + "learning_rate": 0.00019589168260692307, + "loss": 2.7692, + "step": 1922 + }, + { + "epoch": 0.15519328544911629, + "grad_norm": 0.7802519202232361, + "learning_rate": 0.0001958872028685904, + "loss": 2.7811, + "step": 1923 + }, + { + "epoch": 0.1552739891856993, + "grad_norm": 0.7787032723426819, + "learning_rate": 0.00019588272074049797, + "loss": 2.7546, + "step": 1924 + }, + { + "epoch": 0.1553546929222823, + "grad_norm": 0.848479151725769, + "learning_rate": 0.0001958782362227575, + "loss": 2.7759, + "step": 1925 + }, + { + "epoch": 0.1554353966588653, + "grad_norm": 0.8331353664398193, + "learning_rate": 0.00019587374931548076, + "loss": 2.7881, + "step": 1926 + }, + { + "epoch": 0.1555161003954483, + "grad_norm": 0.8646424412727356, + "learning_rate": 0.00019586926001877958, + "loss": 2.8059, + "step": 1927 + }, + { + "epoch": 0.1555968041320313, + "grad_norm": 0.912253737449646, + "learning_rate": 0.00019586476833276584, + "loss": 2.7446, + "step": 1928 + }, + { + "epoch": 0.1556775078686143, + "grad_norm": 0.9256471395492554, + "learning_rate": 0.00019586027425755147, + "loss": 2.8, + "step": 1929 + }, + { + "epoch": 0.15575821160519732, + "grad_norm": 1.0984607934951782, + "learning_rate": 0.0001958557777932485, + "loss": 2.7759, + "step": 1930 + }, + { + "epoch": 0.15583891534178032, + "grad_norm": 0.8736081123352051, + "learning_rate": 0.00019585127893996895, + "loss": 2.7464, + "step": 1931 + }, + { + "epoch": 0.15591961907836333, + "grad_norm": 0.932538628578186, + "learning_rate": 0.00019584677769782498, + "loss": 2.7874, + "step": 1932 + }, + { + "epoch": 0.15600032281494633, + "grad_norm": 0.9742087125778198, + "learning_rate": 0.0001958422740669288, + "loss": 2.7727, + "step": 1933 + }, + { + "epoch": 0.15608102655152933, + "grad_norm": 0.8975874781608582, + "learning_rate": 0.00019583776804739256, + "loss": 2.7812, + "step": 1934 + }, + { + "epoch": 0.15616173028811234, + "grad_norm": 0.9380232691764832, + "learning_rate": 0.00019583325963932864, + "loss": 2.7284, + "step": 1935 + }, + { + "epoch": 0.15624243402469534, + "grad_norm": 0.8332872986793518, + "learning_rate": 0.00019582874884284938, + "loss": 2.7792, + "step": 1936 + }, + { + "epoch": 0.15632313776127835, + "grad_norm": 1.0017194747924805, + "learning_rate": 0.0001958242356580672, + "loss": 2.7187, + "step": 1937 + }, + { + "epoch": 0.15640384149786135, + "grad_norm": 0.9433515667915344, + "learning_rate": 0.0001958197200850946, + "loss": 2.8394, + "step": 1938 + }, + { + "epoch": 0.15648454523444436, + "grad_norm": 0.8781030178070068, + "learning_rate": 0.00019581520212404407, + "loss": 2.7667, + "step": 1939 + }, + { + "epoch": 0.15656524897102736, + "grad_norm": 0.895656168460846, + "learning_rate": 0.00019581068177502826, + "loss": 2.799, + "step": 1940 + }, + { + "epoch": 0.15664595270761036, + "grad_norm": 0.8336960673332214, + "learning_rate": 0.0001958061590381598, + "loss": 2.8152, + "step": 1941 + }, + { + "epoch": 0.15672665644419337, + "grad_norm": 0.9184536337852478, + "learning_rate": 0.00019580163391355143, + "loss": 2.7746, + "step": 1942 + }, + { + "epoch": 0.15680736018077637, + "grad_norm": 0.8564908504486084, + "learning_rate": 0.00019579710640131587, + "loss": 2.7674, + "step": 1943 + }, + { + "epoch": 0.15688806391735938, + "grad_norm": 0.7491608262062073, + "learning_rate": 0.00019579257650156605, + "loss": 2.7665, + "step": 1944 + }, + { + "epoch": 0.15696876765394238, + "grad_norm": 0.9165031313896179, + "learning_rate": 0.00019578804421441478, + "loss": 2.7343, + "step": 1945 + }, + { + "epoch": 0.15704947139052539, + "grad_norm": 0.8413978815078735, + "learning_rate": 0.00019578350953997512, + "loss": 2.7503, + "step": 1946 + }, + { + "epoch": 0.1571301751271084, + "grad_norm": 0.7820419073104858, + "learning_rate": 0.00019577897247835993, + "loss": 2.7535, + "step": 1947 + }, + { + "epoch": 0.1572108788636914, + "grad_norm": 0.8134996294975281, + "learning_rate": 0.00019577443302968246, + "loss": 2.7504, + "step": 1948 + }, + { + "epoch": 0.1572915826002744, + "grad_norm": 0.8201301097869873, + "learning_rate": 0.00019576989119405574, + "loss": 2.6927, + "step": 1949 + }, + { + "epoch": 0.1573722863368574, + "grad_norm": 0.8343217372894287, + "learning_rate": 0.00019576534697159296, + "loss": 2.7742, + "step": 1950 + }, + { + "epoch": 0.1574529900734404, + "grad_norm": 0.8161751627922058, + "learning_rate": 0.0001957608003624074, + "loss": 2.8236, + "step": 1951 + }, + { + "epoch": 0.1575336938100234, + "grad_norm": 0.8626808524131775, + "learning_rate": 0.00019575625136661242, + "loss": 2.7305, + "step": 1952 + }, + { + "epoch": 0.15761439754660642, + "grad_norm": 0.8238986730575562, + "learning_rate": 0.0001957516999843213, + "loss": 2.7641, + "step": 1953 + }, + { + "epoch": 0.15769510128318942, + "grad_norm": 0.7806095480918884, + "learning_rate": 0.00019574714621564755, + "loss": 2.7155, + "step": 1954 + }, + { + "epoch": 0.15777580501977242, + "grad_norm": 0.8137761950492859, + "learning_rate": 0.0001957425900607046, + "loss": 2.7529, + "step": 1955 + }, + { + "epoch": 0.15785650875635543, + "grad_norm": 0.8383988738059998, + "learning_rate": 0.00019573803151960606, + "loss": 2.7726, + "step": 1956 + }, + { + "epoch": 0.15793721249293843, + "grad_norm": 0.8734413385391235, + "learning_rate": 0.00019573347059246549, + "loss": 2.8563, + "step": 1957 + }, + { + "epoch": 0.15801791622952144, + "grad_norm": 0.8018438816070557, + "learning_rate": 0.0001957289072793966, + "loss": 2.8031, + "step": 1958 + }, + { + "epoch": 0.15809861996610444, + "grad_norm": 0.8175764083862305, + "learning_rate": 0.0001957243415805131, + "loss": 2.7824, + "step": 1959 + }, + { + "epoch": 0.15817932370268745, + "grad_norm": 0.7642164826393127, + "learning_rate": 0.00019571977349592878, + "loss": 2.7666, + "step": 1960 + }, + { + "epoch": 0.15826002743927045, + "grad_norm": 0.7584841847419739, + "learning_rate": 0.0001957152030257575, + "loss": 2.7211, + "step": 1961 + }, + { + "epoch": 0.15834073117585346, + "grad_norm": 0.822610080242157, + "learning_rate": 0.00019571063017011312, + "loss": 2.7025, + "step": 1962 + }, + { + "epoch": 0.15842143491243646, + "grad_norm": 0.7553817629814148, + "learning_rate": 0.00019570605492910968, + "loss": 2.8122, + "step": 1963 + }, + { + "epoch": 0.15850213864901944, + "grad_norm": 0.7224497199058533, + "learning_rate": 0.0001957014773028612, + "loss": 2.7613, + "step": 1964 + }, + { + "epoch": 0.15858284238560244, + "grad_norm": 0.8563623428344727, + "learning_rate": 0.00019569689729148168, + "loss": 2.8005, + "step": 1965 + }, + { + "epoch": 0.15866354612218544, + "grad_norm": 0.7665508389472961, + "learning_rate": 0.00019569231489508537, + "loss": 2.7387, + "step": 1966 + }, + { + "epoch": 0.15874424985876845, + "grad_norm": 0.7788479328155518, + "learning_rate": 0.0001956877301137864, + "loss": 2.7229, + "step": 1967 + }, + { + "epoch": 0.15882495359535145, + "grad_norm": 0.7326748371124268, + "learning_rate": 0.00019568314294769908, + "loss": 2.7728, + "step": 1968 + }, + { + "epoch": 0.15890565733193446, + "grad_norm": 0.790492594242096, + "learning_rate": 0.00019567855339693772, + "loss": 2.7809, + "step": 1969 + }, + { + "epoch": 0.15898636106851746, + "grad_norm": 0.8026898503303528, + "learning_rate": 0.0001956739614616167, + "loss": 2.7267, + "step": 1970 + }, + { + "epoch": 0.15906706480510047, + "grad_norm": 0.7963770627975464, + "learning_rate": 0.00019566936714185046, + "loss": 2.7161, + "step": 1971 + }, + { + "epoch": 0.15914776854168347, + "grad_norm": 0.7708200216293335, + "learning_rate": 0.00019566477043775354, + "loss": 2.7223, + "step": 1972 + }, + { + "epoch": 0.15922847227826648, + "grad_norm": 0.8036624789237976, + "learning_rate": 0.00019566017134944042, + "loss": 2.7644, + "step": 1973 + }, + { + "epoch": 0.15930917601484948, + "grad_norm": 0.8221341967582703, + "learning_rate": 0.00019565556987702581, + "loss": 2.7629, + "step": 1974 + }, + { + "epoch": 0.15938987975143248, + "grad_norm": 0.7685462832450867, + "learning_rate": 0.00019565096602062435, + "loss": 2.8016, + "step": 1975 + }, + { + "epoch": 0.1594705834880155, + "grad_norm": 0.8173574209213257, + "learning_rate": 0.00019564635978035075, + "loss": 2.761, + "step": 1976 + }, + { + "epoch": 0.1595512872245985, + "grad_norm": 0.7567519545555115, + "learning_rate": 0.00019564175115631988, + "loss": 2.7794, + "step": 1977 + }, + { + "epoch": 0.1596319909611815, + "grad_norm": 0.8754587173461914, + "learning_rate": 0.00019563714014864654, + "loss": 2.7769, + "step": 1978 + }, + { + "epoch": 0.1597126946977645, + "grad_norm": 0.753871738910675, + "learning_rate": 0.00019563252675744569, + "loss": 2.7489, + "step": 1979 + }, + { + "epoch": 0.1597933984343475, + "grad_norm": 0.777103841304779, + "learning_rate": 0.00019562791098283225, + "loss": 2.7667, + "step": 1980 + }, + { + "epoch": 0.1598741021709305, + "grad_norm": 0.8227293491363525, + "learning_rate": 0.00019562329282492131, + "loss": 2.7904, + "step": 1981 + }, + { + "epoch": 0.15995480590751351, + "grad_norm": 0.7595541477203369, + "learning_rate": 0.00019561867228382797, + "loss": 2.7654, + "step": 1982 + }, + { + "epoch": 0.16003550964409652, + "grad_norm": 0.8330550789833069, + "learning_rate": 0.00019561404935966733, + "loss": 2.7533, + "step": 1983 + }, + { + "epoch": 0.16011621338067952, + "grad_norm": 0.8213297128677368, + "learning_rate": 0.0001956094240525547, + "loss": 2.8103, + "step": 1984 + }, + { + "epoch": 0.16019691711726253, + "grad_norm": 0.8046056628227234, + "learning_rate": 0.00019560479636260527, + "loss": 2.7666, + "step": 1985 + }, + { + "epoch": 0.16027762085384553, + "grad_norm": 0.7886037230491638, + "learning_rate": 0.0001956001662899344, + "loss": 2.7066, + "step": 1986 + }, + { + "epoch": 0.16035832459042854, + "grad_norm": 0.8300043940544128, + "learning_rate": 0.00019559553383465748, + "loss": 2.7617, + "step": 1987 + }, + { + "epoch": 0.16043902832701154, + "grad_norm": 0.7963815331459045, + "learning_rate": 0.00019559089899688994, + "loss": 2.6891, + "step": 1988 + }, + { + "epoch": 0.16051973206359454, + "grad_norm": 0.7794002294540405, + "learning_rate": 0.00019558626177674734, + "loss": 2.8012, + "step": 1989 + }, + { + "epoch": 0.16060043580017755, + "grad_norm": 0.8345863819122314, + "learning_rate": 0.00019558162217434526, + "loss": 2.7715, + "step": 1990 + }, + { + "epoch": 0.16068113953676055, + "grad_norm": 0.8883393406867981, + "learning_rate": 0.00019557698018979927, + "loss": 2.7863, + "step": 1991 + }, + { + "epoch": 0.16076184327334356, + "grad_norm": 0.8069450259208679, + "learning_rate": 0.0001955723358232251, + "loss": 2.759, + "step": 1992 + }, + { + "epoch": 0.16084254700992656, + "grad_norm": 0.9014191031455994, + "learning_rate": 0.00019556768907473852, + "loss": 2.711, + "step": 1993 + }, + { + "epoch": 0.16092325074650957, + "grad_norm": 0.8429470658302307, + "learning_rate": 0.0001955630399444553, + "loss": 2.6936, + "step": 1994 + }, + { + "epoch": 0.16100395448309257, + "grad_norm": 0.7859500050544739, + "learning_rate": 0.00019555838843249128, + "loss": 2.7343, + "step": 1995 + }, + { + "epoch": 0.16108465821967557, + "grad_norm": 0.8068249821662903, + "learning_rate": 0.00019555373453896245, + "loss": 2.7492, + "step": 1996 + }, + { + "epoch": 0.16116536195625858, + "grad_norm": 0.8194023370742798, + "learning_rate": 0.00019554907826398478, + "loss": 2.7265, + "step": 1997 + }, + { + "epoch": 0.16124606569284158, + "grad_norm": 0.8139404654502869, + "learning_rate": 0.00019554441960767434, + "loss": 2.7311, + "step": 1998 + }, + { + "epoch": 0.1613267694294246, + "grad_norm": 0.8210673928260803, + "learning_rate": 0.00019553975857014718, + "loss": 2.7095, + "step": 1999 + }, + { + "epoch": 0.1614074731660076, + "grad_norm": 0.8615561723709106, + "learning_rate": 0.0001955350951515195, + "loss": 2.7458, + "step": 2000 + }, + { + "epoch": 0.1614074731660076, + "eval_loss": 2.6739437580108643, + "eval_runtime": 813.8274, + "eval_samples_per_second": 3.219, + "eval_steps_per_second": 0.537, + "step": 2000 + }, + { + "epoch": 0.1614881769025906, + "grad_norm": 0.8945594429969788, + "learning_rate": 0.0001955304293519075, + "loss": 2.776, + "step": 2001 + }, + { + "epoch": 0.1615688806391736, + "grad_norm": 0.7943438291549683, + "learning_rate": 0.00019552576117142748, + "loss": 2.7484, + "step": 2002 + }, + { + "epoch": 0.1616495843757566, + "grad_norm": 0.8264374136924744, + "learning_rate": 0.00019552109061019582, + "loss": 2.7725, + "step": 2003 + }, + { + "epoch": 0.1617302881123396, + "grad_norm": 0.7591681480407715, + "learning_rate": 0.00019551641766832887, + "loss": 2.7217, + "step": 2004 + }, + { + "epoch": 0.16181099184892261, + "grad_norm": 0.8275293707847595, + "learning_rate": 0.0001955117423459431, + "loss": 2.7279, + "step": 2005 + }, + { + "epoch": 0.16189169558550562, + "grad_norm": 0.8109650611877441, + "learning_rate": 0.00019550706464315504, + "loss": 2.8111, + "step": 2006 + }, + { + "epoch": 0.16197239932208862, + "grad_norm": 0.8710397481918335, + "learning_rate": 0.00019550238456008127, + "loss": 2.7166, + "step": 2007 + }, + { + "epoch": 0.16205310305867163, + "grad_norm": 0.8569270968437195, + "learning_rate": 0.00019549770209683845, + "loss": 2.7739, + "step": 2008 + }, + { + "epoch": 0.16213380679525463, + "grad_norm": 0.7927817702293396, + "learning_rate": 0.00019549301725354325, + "loss": 2.7154, + "step": 2009 + }, + { + "epoch": 0.16221451053183764, + "grad_norm": 0.7576590776443481, + "learning_rate": 0.00019548833003031244, + "loss": 2.7276, + "step": 2010 + }, + { + "epoch": 0.16229521426842064, + "grad_norm": 0.8092780709266663, + "learning_rate": 0.00019548364042726283, + "loss": 2.7494, + "step": 2011 + }, + { + "epoch": 0.16237591800500364, + "grad_norm": 0.7643424868583679, + "learning_rate": 0.0001954789484445113, + "loss": 2.7877, + "step": 2012 + }, + { + "epoch": 0.16245662174158665, + "grad_norm": 0.8235166072845459, + "learning_rate": 0.0001954742540821748, + "loss": 2.7884, + "step": 2013 + }, + { + "epoch": 0.16253732547816965, + "grad_norm": 0.9297853708267212, + "learning_rate": 0.00019546955734037034, + "loss": 2.765, + "step": 2014 + }, + { + "epoch": 0.16261802921475263, + "grad_norm": 0.7778275609016418, + "learning_rate": 0.0001954648582192149, + "loss": 2.7178, + "step": 2015 + }, + { + "epoch": 0.16269873295133563, + "grad_norm": 0.8767017126083374, + "learning_rate": 0.00019546015671882566, + "loss": 2.8254, + "step": 2016 + }, + { + "epoch": 0.16277943668791864, + "grad_norm": 0.7870603203773499, + "learning_rate": 0.0001954554528393198, + "loss": 2.797, + "step": 2017 + }, + { + "epoch": 0.16286014042450164, + "grad_norm": 0.8112391233444214, + "learning_rate": 0.00019545074658081454, + "loss": 2.8562, + "step": 2018 + }, + { + "epoch": 0.16294084416108465, + "grad_norm": 0.8216677308082581, + "learning_rate": 0.00019544603794342713, + "loss": 2.7894, + "step": 2019 + }, + { + "epoch": 0.16302154789766765, + "grad_norm": 0.8445515632629395, + "learning_rate": 0.00019544132692727497, + "loss": 2.8618, + "step": 2020 + }, + { + "epoch": 0.16310225163425066, + "grad_norm": 0.8275444507598877, + "learning_rate": 0.00019543661353247548, + "loss": 2.8087, + "step": 2021 + }, + { + "epoch": 0.16318295537083366, + "grad_norm": 0.8142833709716797, + "learning_rate": 0.00019543189775914608, + "loss": 2.8075, + "step": 2022 + }, + { + "epoch": 0.16326365910741666, + "grad_norm": 0.8182976245880127, + "learning_rate": 0.0001954271796074043, + "loss": 2.8312, + "step": 2023 + }, + { + "epoch": 0.16334436284399967, + "grad_norm": 0.7629228234291077, + "learning_rate": 0.0001954224590773678, + "loss": 2.7191, + "step": 2024 + }, + { + "epoch": 0.16342506658058267, + "grad_norm": 0.8630000948905945, + "learning_rate": 0.00019541773616915418, + "loss": 2.8013, + "step": 2025 + }, + { + "epoch": 0.16350577031716568, + "grad_norm": 0.8917906880378723, + "learning_rate": 0.00019541301088288115, + "loss": 2.7573, + "step": 2026 + }, + { + "epoch": 0.16358647405374868, + "grad_norm": 0.8641694188117981, + "learning_rate": 0.00019540828321866648, + "loss": 2.7509, + "step": 2027 + }, + { + "epoch": 0.16366717779033169, + "grad_norm": 0.7687639594078064, + "learning_rate": 0.00019540355317662798, + "loss": 2.7266, + "step": 2028 + }, + { + "epoch": 0.1637478815269147, + "grad_norm": 0.7870400547981262, + "learning_rate": 0.00019539882075688355, + "loss": 2.8217, + "step": 2029 + }, + { + "epoch": 0.1638285852634977, + "grad_norm": 0.9373054504394531, + "learning_rate": 0.0001953940859595511, + "loss": 2.7562, + "step": 2030 + }, + { + "epoch": 0.1639092890000807, + "grad_norm": 0.7941255569458008, + "learning_rate": 0.00019538934878474872, + "loss": 2.7553, + "step": 2031 + }, + { + "epoch": 0.1639899927366637, + "grad_norm": 0.735977053642273, + "learning_rate": 0.00019538460923259438, + "loss": 2.7058, + "step": 2032 + }, + { + "epoch": 0.1640706964732467, + "grad_norm": 0.7812782526016235, + "learning_rate": 0.00019537986730320625, + "loss": 2.7885, + "step": 2033 + }, + { + "epoch": 0.1641514002098297, + "grad_norm": 1.1534128189086914, + "learning_rate": 0.0001953751229967025, + "loss": 2.7139, + "step": 2034 + }, + { + "epoch": 0.16423210394641272, + "grad_norm": 0.9139814972877502, + "learning_rate": 0.00019537037631320135, + "loss": 2.7869, + "step": 2035 + }, + { + "epoch": 0.16431280768299572, + "grad_norm": 0.8330421447753906, + "learning_rate": 0.00019536562725282116, + "loss": 2.7491, + "step": 2036 + }, + { + "epoch": 0.16439351141957873, + "grad_norm": 0.9040594696998596, + "learning_rate": 0.00019536087581568026, + "loss": 2.7637, + "step": 2037 + }, + { + "epoch": 0.16447421515616173, + "grad_norm": 0.9158666729927063, + "learning_rate": 0.00019535612200189705, + "loss": 2.7709, + "step": 2038 + }, + { + "epoch": 0.16455491889274473, + "grad_norm": 0.8668088912963867, + "learning_rate": 0.00019535136581158997, + "loss": 2.7994, + "step": 2039 + }, + { + "epoch": 0.16463562262932774, + "grad_norm": 0.9179345369338989, + "learning_rate": 0.00019534660724487764, + "loss": 2.747, + "step": 2040 + }, + { + "epoch": 0.16471632636591074, + "grad_norm": 0.9690881967544556, + "learning_rate": 0.00019534184630187862, + "loss": 2.742, + "step": 2041 + }, + { + "epoch": 0.16479703010249375, + "grad_norm": 0.8478729724884033, + "learning_rate": 0.00019533708298271157, + "loss": 2.7824, + "step": 2042 + }, + { + "epoch": 0.16487773383907675, + "grad_norm": 0.8286584615707397, + "learning_rate": 0.00019533231728749518, + "loss": 2.7263, + "step": 2043 + }, + { + "epoch": 0.16495843757565976, + "grad_norm": 0.8095324039459229, + "learning_rate": 0.00019532754921634826, + "loss": 2.7845, + "step": 2044 + }, + { + "epoch": 0.16503914131224276, + "grad_norm": 0.9552872776985168, + "learning_rate": 0.0001953227787693896, + "loss": 2.7676, + "step": 2045 + }, + { + "epoch": 0.16511984504882576, + "grad_norm": 1.021515130996704, + "learning_rate": 0.00019531800594673815, + "loss": 2.784, + "step": 2046 + }, + { + "epoch": 0.16520054878540877, + "grad_norm": 0.7847293019294739, + "learning_rate": 0.00019531323074851276, + "loss": 2.7319, + "step": 2047 + }, + { + "epoch": 0.16528125252199177, + "grad_norm": 0.7803899049758911, + "learning_rate": 0.0001953084531748326, + "loss": 2.8321, + "step": 2048 + }, + { + "epoch": 0.16536195625857478, + "grad_norm": 0.8687692880630493, + "learning_rate": 0.0001953036732258166, + "loss": 2.763, + "step": 2049 + }, + { + "epoch": 0.16544265999515778, + "grad_norm": 0.8212031126022339, + "learning_rate": 0.00019529889090158392, + "loss": 2.7262, + "step": 2050 + }, + { + "epoch": 0.16552336373174079, + "grad_norm": 0.8460689187049866, + "learning_rate": 0.0001952941062022538, + "loss": 2.8018, + "step": 2051 + }, + { + "epoch": 0.1656040674683238, + "grad_norm": 0.9189361929893494, + "learning_rate": 0.00019528931912794547, + "loss": 2.8079, + "step": 2052 + }, + { + "epoch": 0.1656847712049068, + "grad_norm": 0.9529987573623657, + "learning_rate": 0.00019528452967877816, + "loss": 2.8015, + "step": 2053 + }, + { + "epoch": 0.1657654749414898, + "grad_norm": 0.8468493223190308, + "learning_rate": 0.00019527973785487133, + "loss": 2.8013, + "step": 2054 + }, + { + "epoch": 0.1658461786780728, + "grad_norm": 0.8150945901870728, + "learning_rate": 0.00019527494365634436, + "loss": 2.7975, + "step": 2055 + }, + { + "epoch": 0.1659268824146558, + "grad_norm": 0.814942479133606, + "learning_rate": 0.00019527014708331674, + "loss": 2.7503, + "step": 2056 + }, + { + "epoch": 0.1660075861512388, + "grad_norm": 0.7841517329216003, + "learning_rate": 0.000195265348135908, + "loss": 2.7921, + "step": 2057 + }, + { + "epoch": 0.16608828988782182, + "grad_norm": 0.7603738903999329, + "learning_rate": 0.0001952605468142378, + "loss": 2.7658, + "step": 2058 + }, + { + "epoch": 0.16616899362440482, + "grad_norm": 0.8460882902145386, + "learning_rate": 0.00019525574311842574, + "loss": 2.7644, + "step": 2059 + }, + { + "epoch": 0.16624969736098782, + "grad_norm": 0.8633555173873901, + "learning_rate": 0.00019525093704859156, + "loss": 2.7956, + "step": 2060 + }, + { + "epoch": 0.16633040109757083, + "grad_norm": 0.7700977325439453, + "learning_rate": 0.00019524612860485503, + "loss": 2.7103, + "step": 2061 + }, + { + "epoch": 0.16641110483415383, + "grad_norm": 0.888770580291748, + "learning_rate": 0.00019524131778733602, + "loss": 2.7325, + "step": 2062 + }, + { + "epoch": 0.16649180857073684, + "grad_norm": 0.8338149189949036, + "learning_rate": 0.00019523650459615438, + "loss": 2.7533, + "step": 2063 + }, + { + "epoch": 0.16657251230731984, + "grad_norm": 0.7723987698554993, + "learning_rate": 0.0001952316890314301, + "loss": 2.7316, + "step": 2064 + }, + { + "epoch": 0.16665321604390285, + "grad_norm": 0.8952934145927429, + "learning_rate": 0.0001952268710932832, + "loss": 2.7825, + "step": 2065 + }, + { + "epoch": 0.16673391978048582, + "grad_norm": 0.8201496601104736, + "learning_rate": 0.00019522205078183378, + "loss": 2.7162, + "step": 2066 + }, + { + "epoch": 0.16681462351706883, + "grad_norm": 0.7733781337738037, + "learning_rate": 0.00019521722809720188, + "loss": 2.7834, + "step": 2067 + }, + { + "epoch": 0.16689532725365183, + "grad_norm": 0.8285118937492371, + "learning_rate": 0.0001952124030395078, + "loss": 2.8475, + "step": 2068 + }, + { + "epoch": 0.16697603099023484, + "grad_norm": 0.84097820520401, + "learning_rate": 0.00019520757560887174, + "loss": 2.784, + "step": 2069 + }, + { + "epoch": 0.16705673472681784, + "grad_norm": 0.7336563467979431, + "learning_rate": 0.000195202745805414, + "loss": 2.7663, + "step": 2070 + }, + { + "epoch": 0.16713743846340084, + "grad_norm": 0.8359388113021851, + "learning_rate": 0.000195197913629255, + "loss": 2.7931, + "step": 2071 + }, + { + "epoch": 0.16721814219998385, + "grad_norm": 0.8272559642791748, + "learning_rate": 0.0001951930790805151, + "loss": 2.8578, + "step": 2072 + }, + { + "epoch": 0.16729884593656685, + "grad_norm": 0.7970743179321289, + "learning_rate": 0.00019518824215931487, + "loss": 2.8148, + "step": 2073 + }, + { + "epoch": 0.16737954967314986, + "grad_norm": 0.856200098991394, + "learning_rate": 0.00019518340286577482, + "loss": 2.8067, + "step": 2074 + }, + { + "epoch": 0.16746025340973286, + "grad_norm": 0.7581893801689148, + "learning_rate": 0.00019517856120001556, + "loss": 2.7339, + "step": 2075 + }, + { + "epoch": 0.16754095714631587, + "grad_norm": 0.8488386869430542, + "learning_rate": 0.00019517371716215774, + "loss": 2.7332, + "step": 2076 + }, + { + "epoch": 0.16762166088289887, + "grad_norm": 0.7488275170326233, + "learning_rate": 0.00019516887075232212, + "loss": 2.7734, + "step": 2077 + }, + { + "epoch": 0.16770236461948188, + "grad_norm": 0.9173932075500488, + "learning_rate": 0.00019516402197062945, + "loss": 2.7792, + "step": 2078 + }, + { + "epoch": 0.16778306835606488, + "grad_norm": 0.8200702667236328, + "learning_rate": 0.0001951591708172006, + "loss": 2.8046, + "step": 2079 + }, + { + "epoch": 0.16786377209264788, + "grad_norm": 0.8270781636238098, + "learning_rate": 0.00019515431729215642, + "loss": 2.7467, + "step": 2080 + }, + { + "epoch": 0.1679444758292309, + "grad_norm": 0.8660609722137451, + "learning_rate": 0.00019514946139561799, + "loss": 2.8169, + "step": 2081 + }, + { + "epoch": 0.1680251795658139, + "grad_norm": 0.78753262758255, + "learning_rate": 0.0001951446031277062, + "loss": 2.7388, + "step": 2082 + }, + { + "epoch": 0.1681058833023969, + "grad_norm": 0.791593074798584, + "learning_rate": 0.00019513974248854224, + "loss": 2.8776, + "step": 2083 + }, + { + "epoch": 0.1681865870389799, + "grad_norm": 0.7883535623550415, + "learning_rate": 0.0001951348794782472, + "loss": 2.78, + "step": 2084 + }, + { + "epoch": 0.1682672907755629, + "grad_norm": 0.7877013087272644, + "learning_rate": 0.00019513001409694224, + "loss": 2.7559, + "step": 2085 + }, + { + "epoch": 0.1683479945121459, + "grad_norm": 0.8838450908660889, + "learning_rate": 0.00019512514634474864, + "loss": 2.7489, + "step": 2086 + }, + { + "epoch": 0.16842869824872891, + "grad_norm": 0.7751588821411133, + "learning_rate": 0.00019512027622178775, + "loss": 2.6832, + "step": 2087 + }, + { + "epoch": 0.16850940198531192, + "grad_norm": 0.90345299243927, + "learning_rate": 0.00019511540372818095, + "loss": 2.8189, + "step": 2088 + }, + { + "epoch": 0.16859010572189492, + "grad_norm": 0.7820938229560852, + "learning_rate": 0.00019511052886404966, + "loss": 2.7655, + "step": 2089 + }, + { + "epoch": 0.16867080945847793, + "grad_norm": 0.8250375986099243, + "learning_rate": 0.00019510565162951537, + "loss": 2.7866, + "step": 2090 + }, + { + "epoch": 0.16875151319506093, + "grad_norm": 0.8063845634460449, + "learning_rate": 0.00019510077202469962, + "loss": 2.7774, + "step": 2091 + }, + { + "epoch": 0.16883221693164394, + "grad_norm": 0.7627965807914734, + "learning_rate": 0.00019509589004972403, + "loss": 2.7201, + "step": 2092 + }, + { + "epoch": 0.16891292066822694, + "grad_norm": 0.8392470479011536, + "learning_rate": 0.00019509100570471027, + "loss": 2.7613, + "step": 2093 + }, + { + "epoch": 0.16899362440480994, + "grad_norm": 0.7807552814483643, + "learning_rate": 0.0001950861189897801, + "loss": 2.7451, + "step": 2094 + }, + { + "epoch": 0.16907432814139295, + "grad_norm": 0.7829259634017944, + "learning_rate": 0.00019508122990505528, + "loss": 2.7128, + "step": 2095 + }, + { + "epoch": 0.16915503187797595, + "grad_norm": 0.7793046832084656, + "learning_rate": 0.00019507633845065766, + "loss": 2.7849, + "step": 2096 + }, + { + "epoch": 0.16923573561455896, + "grad_norm": 0.869752824306488, + "learning_rate": 0.00019507144462670915, + "loss": 2.7882, + "step": 2097 + }, + { + "epoch": 0.16931643935114196, + "grad_norm": 0.7550783753395081, + "learning_rate": 0.00019506654843333174, + "loss": 2.7211, + "step": 2098 + }, + { + "epoch": 0.16939714308772497, + "grad_norm": 0.8364891409873962, + "learning_rate": 0.0001950616498706474, + "loss": 2.7171, + "step": 2099 + }, + { + "epoch": 0.16947784682430797, + "grad_norm": 0.8026537299156189, + "learning_rate": 0.0001950567489387783, + "loss": 2.8362, + "step": 2100 + }, + { + "epoch": 0.16955855056089097, + "grad_norm": 0.8073398470878601, + "learning_rate": 0.00019505184563784652, + "loss": 2.7635, + "step": 2101 + }, + { + "epoch": 0.16963925429747398, + "grad_norm": 0.8168368935585022, + "learning_rate": 0.00019504693996797424, + "loss": 2.7553, + "step": 2102 + }, + { + "epoch": 0.16971995803405698, + "grad_norm": 0.7933681011199951, + "learning_rate": 0.0001950420319292838, + "loss": 2.7887, + "step": 2103 + }, + { + "epoch": 0.16980066177064, + "grad_norm": 0.8326540589332581, + "learning_rate": 0.00019503712152189748, + "loss": 2.7844, + "step": 2104 + }, + { + "epoch": 0.169881365507223, + "grad_norm": 0.8357202410697937, + "learning_rate": 0.00019503220874593765, + "loss": 2.7744, + "step": 2105 + }, + { + "epoch": 0.169962069243806, + "grad_norm": 0.8541022539138794, + "learning_rate": 0.00019502729360152676, + "loss": 2.7867, + "step": 2106 + }, + { + "epoch": 0.170042772980389, + "grad_norm": 0.8338841795921326, + "learning_rate": 0.0001950223760887873, + "loss": 2.7208, + "step": 2107 + }, + { + "epoch": 0.170123476716972, + "grad_norm": 0.8824255466461182, + "learning_rate": 0.00019501745620784187, + "loss": 2.7658, + "step": 2108 + }, + { + "epoch": 0.170204180453555, + "grad_norm": 0.7710463404655457, + "learning_rate": 0.00019501253395881306, + "loss": 2.7167, + "step": 2109 + }, + { + "epoch": 0.17028488419013801, + "grad_norm": 0.7740076184272766, + "learning_rate": 0.0001950076093418235, + "loss": 2.7251, + "step": 2110 + }, + { + "epoch": 0.17036558792672102, + "grad_norm": 0.8258434534072876, + "learning_rate": 0.00019500268235699597, + "loss": 2.7533, + "step": 2111 + }, + { + "epoch": 0.17044629166330402, + "grad_norm": 0.8347997069358826, + "learning_rate": 0.00019499775300445326, + "loss": 2.7372, + "step": 2112 + }, + { + "epoch": 0.17052699539988703, + "grad_norm": 0.8246529698371887, + "learning_rate": 0.00019499282128431823, + "loss": 2.7458, + "step": 2113 + }, + { + "epoch": 0.17060769913647003, + "grad_norm": 0.8510704040527344, + "learning_rate": 0.00019498788719671378, + "loss": 2.8144, + "step": 2114 + }, + { + "epoch": 0.17068840287305304, + "grad_norm": 0.7793454527854919, + "learning_rate": 0.00019498295074176286, + "loss": 2.7927, + "step": 2115 + }, + { + "epoch": 0.17076910660963604, + "grad_norm": 0.7888665199279785, + "learning_rate": 0.00019497801191958853, + "loss": 2.7156, + "step": 2116 + }, + { + "epoch": 0.17084981034621902, + "grad_norm": 0.8502812385559082, + "learning_rate": 0.00019497307073031386, + "loss": 2.7906, + "step": 2117 + }, + { + "epoch": 0.17093051408280202, + "grad_norm": 0.8376502990722656, + "learning_rate": 0.00019496812717406203, + "loss": 2.7354, + "step": 2118 + }, + { + "epoch": 0.17101121781938503, + "grad_norm": 0.7974401116371155, + "learning_rate": 0.0001949631812509562, + "loss": 2.7755, + "step": 2119 + }, + { + "epoch": 0.17109192155596803, + "grad_norm": 0.7760190963745117, + "learning_rate": 0.00019495823296111965, + "loss": 2.7694, + "step": 2120 + }, + { + "epoch": 0.17117262529255103, + "grad_norm": 0.7721701860427856, + "learning_rate": 0.00019495328230467575, + "loss": 2.7474, + "step": 2121 + }, + { + "epoch": 0.17125332902913404, + "grad_norm": 0.7360577583312988, + "learning_rate": 0.0001949483292817478, + "loss": 2.8044, + "step": 2122 + }, + { + "epoch": 0.17133403276571704, + "grad_norm": 0.7536107301712036, + "learning_rate": 0.0001949433738924593, + "loss": 2.8165, + "step": 2123 + }, + { + "epoch": 0.17141473650230005, + "grad_norm": 0.7668276429176331, + "learning_rate": 0.00019493841613693375, + "loss": 2.7964, + "step": 2124 + }, + { + "epoch": 0.17149544023888305, + "grad_norm": 0.8323161602020264, + "learning_rate": 0.0001949334560152947, + "loss": 2.7395, + "step": 2125 + }, + { + "epoch": 0.17157614397546606, + "grad_norm": 0.8132179975509644, + "learning_rate": 0.00019492849352766576, + "loss": 2.7511, + "step": 2126 + }, + { + "epoch": 0.17165684771204906, + "grad_norm": 0.7806998491287231, + "learning_rate": 0.0001949235286741706, + "loss": 2.7649, + "step": 2127 + }, + { + "epoch": 0.17173755144863206, + "grad_norm": 0.8315939903259277, + "learning_rate": 0.00019491856145493298, + "loss": 2.7742, + "step": 2128 + }, + { + "epoch": 0.17181825518521507, + "grad_norm": 0.8368063569068909, + "learning_rate": 0.00019491359187007672, + "loss": 2.7667, + "step": 2129 + }, + { + "epoch": 0.17189895892179807, + "grad_norm": 0.9183431267738342, + "learning_rate": 0.0001949086199197256, + "loss": 2.7444, + "step": 2130 + }, + { + "epoch": 0.17197966265838108, + "grad_norm": 0.7824065089225769, + "learning_rate": 0.0001949036456040036, + "loss": 2.7455, + "step": 2131 + }, + { + "epoch": 0.17206036639496408, + "grad_norm": 0.777974009513855, + "learning_rate": 0.00019489866892303468, + "loss": 2.7466, + "step": 2132 + }, + { + "epoch": 0.17214107013154709, + "grad_norm": 0.8068816065788269, + "learning_rate": 0.00019489368987694286, + "loss": 2.7081, + "step": 2133 + }, + { + "epoch": 0.1722217738681301, + "grad_norm": 0.8757622838020325, + "learning_rate": 0.00019488870846585222, + "loss": 2.8005, + "step": 2134 + }, + { + "epoch": 0.1723024776047131, + "grad_norm": 0.7967162728309631, + "learning_rate": 0.00019488372468988693, + "loss": 2.7737, + "step": 2135 + }, + { + "epoch": 0.1723831813412961, + "grad_norm": 0.7700283527374268, + "learning_rate": 0.00019487873854917117, + "loss": 2.7431, + "step": 2136 + }, + { + "epoch": 0.1724638850778791, + "grad_norm": 0.8259130716323853, + "learning_rate": 0.00019487375004382927, + "loss": 2.7635, + "step": 2137 + }, + { + "epoch": 0.1725445888144621, + "grad_norm": 0.8253815770149231, + "learning_rate": 0.0001948687591739855, + "loss": 2.7046, + "step": 2138 + }, + { + "epoch": 0.1726252925510451, + "grad_norm": 0.8087987303733826, + "learning_rate": 0.00019486376593976426, + "loss": 2.7728, + "step": 2139 + }, + { + "epoch": 0.17270599628762812, + "grad_norm": 0.8437588214874268, + "learning_rate": 0.00019485877034128998, + "loss": 2.7606, + "step": 2140 + }, + { + "epoch": 0.17278670002421112, + "grad_norm": 0.8416075110435486, + "learning_rate": 0.00019485377237868723, + "loss": 2.7396, + "step": 2141 + }, + { + "epoch": 0.17286740376079412, + "grad_norm": 0.784275472164154, + "learning_rate": 0.00019484877205208046, + "loss": 2.766, + "step": 2142 + }, + { + "epoch": 0.17294810749737713, + "grad_norm": 0.8082472681999207, + "learning_rate": 0.0001948437693615944, + "loss": 2.8, + "step": 2143 + }, + { + "epoch": 0.17302881123396013, + "grad_norm": 0.8904329538345337, + "learning_rate": 0.00019483876430735365, + "loss": 2.6579, + "step": 2144 + }, + { + "epoch": 0.17310951497054314, + "grad_norm": 0.7864851355552673, + "learning_rate": 0.000194833756889483, + "loss": 2.8231, + "step": 2145 + }, + { + "epoch": 0.17319021870712614, + "grad_norm": 0.7445049285888672, + "learning_rate": 0.00019482874710810723, + "loss": 2.7498, + "step": 2146 + }, + { + "epoch": 0.17327092244370915, + "grad_norm": 0.8266116380691528, + "learning_rate": 0.00019482373496335117, + "loss": 2.7152, + "step": 2147 + }, + { + "epoch": 0.17335162618029215, + "grad_norm": 0.7712300419807434, + "learning_rate": 0.0001948187204553398, + "loss": 2.7751, + "step": 2148 + }, + { + "epoch": 0.17343232991687516, + "grad_norm": 0.7472708225250244, + "learning_rate": 0.00019481370358419807, + "loss": 2.7397, + "step": 2149 + }, + { + "epoch": 0.17351303365345816, + "grad_norm": 0.763454020023346, + "learning_rate": 0.00019480868435005095, + "loss": 2.7174, + "step": 2150 + }, + { + "epoch": 0.17359373739004116, + "grad_norm": 0.8187674283981323, + "learning_rate": 0.00019480366275302362, + "loss": 2.8424, + "step": 2151 + }, + { + "epoch": 0.17367444112662417, + "grad_norm": 0.8183228373527527, + "learning_rate": 0.0001947986387932412, + "loss": 2.7351, + "step": 2152 + }, + { + "epoch": 0.17375514486320717, + "grad_norm": 0.807231605052948, + "learning_rate": 0.00019479361247082884, + "loss": 2.8054, + "step": 2153 + }, + { + "epoch": 0.17383584859979018, + "grad_norm": 0.8383626341819763, + "learning_rate": 0.00019478858378591194, + "loss": 2.7181, + "step": 2154 + }, + { + "epoch": 0.17391655233637318, + "grad_norm": 0.8330298662185669, + "learning_rate": 0.0001947835527386157, + "loss": 2.748, + "step": 2155 + }, + { + "epoch": 0.17399725607295619, + "grad_norm": 0.8433073163032532, + "learning_rate": 0.0001947785193290656, + "loss": 2.8115, + "step": 2156 + }, + { + "epoch": 0.1740779598095392, + "grad_norm": 0.8873384594917297, + "learning_rate": 0.000194773483557387, + "loss": 2.8288, + "step": 2157 + }, + { + "epoch": 0.1741586635461222, + "grad_norm": 0.8399423360824585, + "learning_rate": 0.00019476844542370546, + "loss": 2.7514, + "step": 2158 + }, + { + "epoch": 0.1742393672827052, + "grad_norm": 0.7808830738067627, + "learning_rate": 0.00019476340492814655, + "loss": 2.7003, + "step": 2159 + }, + { + "epoch": 0.1743200710192882, + "grad_norm": 0.8268750905990601, + "learning_rate": 0.00019475836207083589, + "loss": 2.7961, + "step": 2160 + }, + { + "epoch": 0.1744007747558712, + "grad_norm": 0.9144260883331299, + "learning_rate": 0.0001947533168518991, + "loss": 2.769, + "step": 2161 + }, + { + "epoch": 0.1744814784924542, + "grad_norm": 0.8409113883972168, + "learning_rate": 0.000194748269271462, + "loss": 2.8004, + "step": 2162 + }, + { + "epoch": 0.17456218222903722, + "grad_norm": 0.8747037649154663, + "learning_rate": 0.00019474321932965035, + "loss": 2.7602, + "step": 2163 + }, + { + "epoch": 0.17464288596562022, + "grad_norm": 0.8582575917243958, + "learning_rate": 0.00019473816702659, + "loss": 2.7292, + "step": 2164 + }, + { + "epoch": 0.17472358970220322, + "grad_norm": 0.7402843832969666, + "learning_rate": 0.0001947331123624069, + "loss": 2.7287, + "step": 2165 + }, + { + "epoch": 0.17480429343878623, + "grad_norm": 0.8019410967826843, + "learning_rate": 0.000194728055337227, + "loss": 2.7451, + "step": 2166 + }, + { + "epoch": 0.17488499717536923, + "grad_norm": 0.9137046337127686, + "learning_rate": 0.0001947229959511763, + "loss": 2.808, + "step": 2167 + }, + { + "epoch": 0.1749657009119522, + "grad_norm": 0.7539177536964417, + "learning_rate": 0.000194717934204381, + "loss": 2.7031, + "step": 2168 + }, + { + "epoch": 0.17504640464853521, + "grad_norm": 0.8611089587211609, + "learning_rate": 0.00019471287009696715, + "loss": 2.8751, + "step": 2169 + }, + { + "epoch": 0.17512710838511822, + "grad_norm": 0.906134843826294, + "learning_rate": 0.000194707803629061, + "loss": 2.9163, + "step": 2170 + }, + { + "epoch": 0.17520781212170122, + "grad_norm": 0.8066667318344116, + "learning_rate": 0.00019470273480078879, + "loss": 2.7549, + "step": 2171 + }, + { + "epoch": 0.17528851585828423, + "grad_norm": 0.7962325215339661, + "learning_rate": 0.00019469766361227692, + "loss": 2.7964, + "step": 2172 + }, + { + "epoch": 0.17536921959486723, + "grad_norm": 0.7802287340164185, + "learning_rate": 0.0001946925900636517, + "loss": 2.7022, + "step": 2173 + }, + { + "epoch": 0.17544992333145024, + "grad_norm": 0.783478319644928, + "learning_rate": 0.0001946875141550396, + "loss": 2.7798, + "step": 2174 + }, + { + "epoch": 0.17553062706803324, + "grad_norm": 0.8006815314292908, + "learning_rate": 0.00019468243588656713, + "loss": 2.7345, + "step": 2175 + }, + { + "epoch": 0.17561133080461624, + "grad_norm": 0.7566428184509277, + "learning_rate": 0.00019467735525836085, + "loss": 2.7822, + "step": 2176 + }, + { + "epoch": 0.17569203454119925, + "grad_norm": 0.772282600402832, + "learning_rate": 0.0001946722722705474, + "loss": 2.7346, + "step": 2177 + }, + { + "epoch": 0.17577273827778225, + "grad_norm": 0.7808345556259155, + "learning_rate": 0.00019466718692325347, + "loss": 2.755, + "step": 2178 + }, + { + "epoch": 0.17585344201436526, + "grad_norm": 0.8150362372398376, + "learning_rate": 0.00019466209921660576, + "loss": 2.7691, + "step": 2179 + }, + { + "epoch": 0.17593414575094826, + "grad_norm": 0.7952939867973328, + "learning_rate": 0.0001946570091507311, + "loss": 2.8175, + "step": 2180 + }, + { + "epoch": 0.17601484948753127, + "grad_norm": 0.8211334347724915, + "learning_rate": 0.00019465191672575634, + "loss": 2.7561, + "step": 2181 + }, + { + "epoch": 0.17609555322411427, + "grad_norm": 0.7726178765296936, + "learning_rate": 0.00019464682194180838, + "loss": 2.7435, + "step": 2182 + }, + { + "epoch": 0.17617625696069728, + "grad_norm": 0.7614372372627258, + "learning_rate": 0.00019464172479901422, + "loss": 2.7301, + "step": 2183 + }, + { + "epoch": 0.17625696069728028, + "grad_norm": 0.7818898558616638, + "learning_rate": 0.00019463662529750083, + "loss": 2.6964, + "step": 2184 + }, + { + "epoch": 0.17633766443386328, + "grad_norm": 0.7849796414375305, + "learning_rate": 0.0001946315234373954, + "loss": 2.7431, + "step": 2185 + }, + { + "epoch": 0.1764183681704463, + "grad_norm": 0.7939459085464478, + "learning_rate": 0.00019462641921882506, + "loss": 2.7126, + "step": 2186 + }, + { + "epoch": 0.1764990719070293, + "grad_norm": 0.8391629457473755, + "learning_rate": 0.00019462131264191696, + "loss": 2.8394, + "step": 2187 + }, + { + "epoch": 0.1765797756436123, + "grad_norm": 0.7548067569732666, + "learning_rate": 0.0001946162037067984, + "loss": 2.7315, + "step": 2188 + }, + { + "epoch": 0.1766604793801953, + "grad_norm": 0.8278634548187256, + "learning_rate": 0.00019461109241359674, + "loss": 2.8298, + "step": 2189 + }, + { + "epoch": 0.1767411831167783, + "grad_norm": 0.8275949954986572, + "learning_rate": 0.00019460597876243933, + "loss": 2.8072, + "step": 2190 + }, + { + "epoch": 0.1768218868533613, + "grad_norm": 0.7720363140106201, + "learning_rate": 0.00019460086275345363, + "loss": 2.7478, + "step": 2191 + }, + { + "epoch": 0.17690259058994431, + "grad_norm": 0.7795925140380859, + "learning_rate": 0.00019459574438676714, + "loss": 2.7633, + "step": 2192 + }, + { + "epoch": 0.17698329432652732, + "grad_norm": 0.7722043991088867, + "learning_rate": 0.00019459062366250743, + "loss": 2.8001, + "step": 2193 + }, + { + "epoch": 0.17706399806311032, + "grad_norm": 0.8560587763786316, + "learning_rate": 0.00019458550058080212, + "loss": 2.7494, + "step": 2194 + }, + { + "epoch": 0.17714470179969333, + "grad_norm": 0.7473754286766052, + "learning_rate": 0.00019458037514177886, + "loss": 2.7112, + "step": 2195 + }, + { + "epoch": 0.17722540553627633, + "grad_norm": 0.7625827789306641, + "learning_rate": 0.00019457524734556542, + "loss": 2.7496, + "step": 2196 + }, + { + "epoch": 0.17730610927285934, + "grad_norm": 0.7809351682662964, + "learning_rate": 0.00019457011719228962, + "loss": 2.7764, + "step": 2197 + }, + { + "epoch": 0.17738681300944234, + "grad_norm": 0.7846190333366394, + "learning_rate": 0.00019456498468207927, + "loss": 2.7189, + "step": 2198 + }, + { + "epoch": 0.17746751674602534, + "grad_norm": 0.7919551134109497, + "learning_rate": 0.0001945598498150623, + "loss": 2.7798, + "step": 2199 + }, + { + "epoch": 0.17754822048260835, + "grad_norm": 0.796183705329895, + "learning_rate": 0.0001945547125913667, + "loss": 2.7498, + "step": 2200 + }, + { + "epoch": 0.17762892421919135, + "grad_norm": 0.791668176651001, + "learning_rate": 0.0001945495730111205, + "loss": 2.7638, + "step": 2201 + }, + { + "epoch": 0.17770962795577436, + "grad_norm": 0.8303191661834717, + "learning_rate": 0.0001945444310744518, + "loss": 2.8079, + "step": 2202 + }, + { + "epoch": 0.17779033169235736, + "grad_norm": 0.8245917558670044, + "learning_rate": 0.00019453928678148872, + "loss": 2.7222, + "step": 2203 + }, + { + "epoch": 0.17787103542894037, + "grad_norm": 0.793456494808197, + "learning_rate": 0.0001945341401323595, + "loss": 2.8532, + "step": 2204 + }, + { + "epoch": 0.17795173916552337, + "grad_norm": 0.7574856877326965, + "learning_rate": 0.00019452899112719235, + "loss": 2.7361, + "step": 2205 + }, + { + "epoch": 0.17803244290210637, + "grad_norm": 0.7748556733131409, + "learning_rate": 0.0001945238397661157, + "loss": 2.7423, + "step": 2206 + }, + { + "epoch": 0.17811314663868938, + "grad_norm": 0.8973588347434998, + "learning_rate": 0.00019451868604925782, + "loss": 2.7604, + "step": 2207 + }, + { + "epoch": 0.17819385037527238, + "grad_norm": 0.7613589763641357, + "learning_rate": 0.00019451352997674722, + "loss": 2.7168, + "step": 2208 + }, + { + "epoch": 0.1782745541118554, + "grad_norm": 0.8152763247489929, + "learning_rate": 0.00019450837154871243, + "loss": 2.7904, + "step": 2209 + }, + { + "epoch": 0.1783552578484384, + "grad_norm": 0.8115083575248718, + "learning_rate": 0.00019450321076528194, + "loss": 2.7595, + "step": 2210 + }, + { + "epoch": 0.1784359615850214, + "grad_norm": 0.772665798664093, + "learning_rate": 0.00019449804762658438, + "loss": 2.7125, + "step": 2211 + }, + { + "epoch": 0.1785166653216044, + "grad_norm": 0.8002723455429077, + "learning_rate": 0.0001944928821327485, + "loss": 2.8121, + "step": 2212 + }, + { + "epoch": 0.1785973690581874, + "grad_norm": 0.8354858160018921, + "learning_rate": 0.00019448771428390296, + "loss": 2.8662, + "step": 2213 + }, + { + "epoch": 0.1786780727947704, + "grad_norm": 0.7799130082130432, + "learning_rate": 0.0001944825440801766, + "loss": 2.7247, + "step": 2214 + }, + { + "epoch": 0.1787587765313534, + "grad_norm": 0.810265302658081, + "learning_rate": 0.00019447737152169828, + "loss": 2.7095, + "step": 2215 + }, + { + "epoch": 0.17883948026793642, + "grad_norm": 0.8305599093437195, + "learning_rate": 0.00019447219660859687, + "loss": 2.7448, + "step": 2216 + }, + { + "epoch": 0.17892018400451942, + "grad_norm": 0.7899554371833801, + "learning_rate": 0.00019446701934100138, + "loss": 2.7295, + "step": 2217 + }, + { + "epoch": 0.17900088774110243, + "grad_norm": 0.7675672173500061, + "learning_rate": 0.00019446183971904082, + "loss": 2.7236, + "step": 2218 + }, + { + "epoch": 0.1790815914776854, + "grad_norm": 0.8717279434204102, + "learning_rate": 0.0001944566577428443, + "loss": 2.8044, + "step": 2219 + }, + { + "epoch": 0.1791622952142684, + "grad_norm": 0.8151431679725647, + "learning_rate": 0.00019445147341254094, + "loss": 2.7753, + "step": 2220 + }, + { + "epoch": 0.1792429989508514, + "grad_norm": 0.8481619358062744, + "learning_rate": 0.00019444628672825998, + "loss": 2.7954, + "step": 2221 + }, + { + "epoch": 0.17932370268743442, + "grad_norm": 0.8133199214935303, + "learning_rate": 0.00019444109769013065, + "loss": 2.7235, + "step": 2222 + }, + { + "epoch": 0.17940440642401742, + "grad_norm": 0.8250097036361694, + "learning_rate": 0.00019443590629828232, + "loss": 2.8352, + "step": 2223 + }, + { + "epoch": 0.17948511016060043, + "grad_norm": 0.8279787302017212, + "learning_rate": 0.00019443071255284433, + "loss": 2.7513, + "step": 2224 + }, + { + "epoch": 0.17956581389718343, + "grad_norm": 0.7781538963317871, + "learning_rate": 0.00019442551645394612, + "loss": 2.7239, + "step": 2225 + }, + { + "epoch": 0.17964651763376643, + "grad_norm": 0.7718615531921387, + "learning_rate": 0.00019442031800171727, + "loss": 2.7387, + "step": 2226 + }, + { + "epoch": 0.17972722137034944, + "grad_norm": 0.7704512476921082, + "learning_rate": 0.00019441511719628724, + "loss": 2.792, + "step": 2227 + }, + { + "epoch": 0.17980792510693244, + "grad_norm": 0.8290835618972778, + "learning_rate": 0.00019440991403778566, + "loss": 2.7745, + "step": 2228 + }, + { + "epoch": 0.17988862884351545, + "grad_norm": 0.8408392667770386, + "learning_rate": 0.00019440470852634227, + "loss": 2.7688, + "step": 2229 + }, + { + "epoch": 0.17996933258009845, + "grad_norm": 0.8503465056419373, + "learning_rate": 0.00019439950066208676, + "loss": 2.6747, + "step": 2230 + }, + { + "epoch": 0.18005003631668146, + "grad_norm": 0.8213364481925964, + "learning_rate": 0.0001943942904451489, + "loss": 2.7212, + "step": 2231 + }, + { + "epoch": 0.18013074005326446, + "grad_norm": 0.8511209487915039, + "learning_rate": 0.0001943890778756586, + "loss": 2.701, + "step": 2232 + }, + { + "epoch": 0.18021144378984746, + "grad_norm": 0.8034417033195496, + "learning_rate": 0.00019438386295374577, + "loss": 2.7029, + "step": 2233 + }, + { + "epoch": 0.18029214752643047, + "grad_norm": 0.7603715658187866, + "learning_rate": 0.0001943786456795403, + "loss": 2.7201, + "step": 2234 + }, + { + "epoch": 0.18037285126301347, + "grad_norm": 0.9210647940635681, + "learning_rate": 0.0001943734260531723, + "loss": 2.7847, + "step": 2235 + }, + { + "epoch": 0.18045355499959648, + "grad_norm": 0.7429665923118591, + "learning_rate": 0.00019436820407477186, + "loss": 2.7493, + "step": 2236 + }, + { + "epoch": 0.18053425873617948, + "grad_norm": 0.8290510773658752, + "learning_rate": 0.00019436297974446905, + "loss": 2.7711, + "step": 2237 + }, + { + "epoch": 0.18061496247276249, + "grad_norm": 0.7593570947647095, + "learning_rate": 0.0001943577530623941, + "loss": 2.7539, + "step": 2238 + }, + { + "epoch": 0.1806956662093455, + "grad_norm": 0.8222225308418274, + "learning_rate": 0.00019435252402867734, + "loss": 2.7703, + "step": 2239 + }, + { + "epoch": 0.1807763699459285, + "grad_norm": 0.8280842900276184, + "learning_rate": 0.00019434729264344898, + "loss": 2.7966, + "step": 2240 + }, + { + "epoch": 0.1808570736825115, + "grad_norm": 0.8258495926856995, + "learning_rate": 0.00019434205890683952, + "loss": 2.759, + "step": 2241 + }, + { + "epoch": 0.1809377774190945, + "grad_norm": 0.8294420838356018, + "learning_rate": 0.00019433682281897932, + "loss": 2.6996, + "step": 2242 + }, + { + "epoch": 0.1810184811556775, + "grad_norm": 0.8258811235427856, + "learning_rate": 0.0001943315843799989, + "loss": 2.774, + "step": 2243 + }, + { + "epoch": 0.1810991848922605, + "grad_norm": 0.8035838007926941, + "learning_rate": 0.0001943263435900288, + "loss": 2.7806, + "step": 2244 + }, + { + "epoch": 0.18117988862884352, + "grad_norm": 0.7900332808494568, + "learning_rate": 0.00019432110044919964, + "loss": 2.7462, + "step": 2245 + }, + { + "epoch": 0.18126059236542652, + "grad_norm": 0.8126730918884277, + "learning_rate": 0.00019431585495764212, + "loss": 2.6913, + "step": 2246 + }, + { + "epoch": 0.18134129610200952, + "grad_norm": 0.8411321043968201, + "learning_rate": 0.00019431060711548695, + "loss": 2.7503, + "step": 2247 + }, + { + "epoch": 0.18142199983859253, + "grad_norm": 0.7712867856025696, + "learning_rate": 0.0001943053569228649, + "loss": 2.7703, + "step": 2248 + }, + { + "epoch": 0.18150270357517553, + "grad_norm": 0.9093566536903381, + "learning_rate": 0.00019430010437990688, + "loss": 2.7838, + "step": 2249 + }, + { + "epoch": 0.18158340731175854, + "grad_norm": 0.8184913396835327, + "learning_rate": 0.00019429484948674372, + "loss": 2.8167, + "step": 2250 + }, + { + "epoch": 0.18166411104834154, + "grad_norm": 0.7215915322303772, + "learning_rate": 0.00019428959224350643, + "loss": 2.739, + "step": 2251 + }, + { + "epoch": 0.18174481478492455, + "grad_norm": 0.7842726111412048, + "learning_rate": 0.000194284332650326, + "loss": 2.8547, + "step": 2252 + }, + { + "epoch": 0.18182551852150755, + "grad_norm": 0.7758263349533081, + "learning_rate": 0.00019427907070733357, + "loss": 2.7746, + "step": 2253 + }, + { + "epoch": 0.18190622225809056, + "grad_norm": 0.7710500359535217, + "learning_rate": 0.00019427380641466027, + "loss": 2.7415, + "step": 2254 + }, + { + "epoch": 0.18198692599467356, + "grad_norm": 0.8233851194381714, + "learning_rate": 0.00019426853977243724, + "loss": 2.7471, + "step": 2255 + }, + { + "epoch": 0.18206762973125656, + "grad_norm": 0.7856284379959106, + "learning_rate": 0.00019426327078079578, + "loss": 2.6892, + "step": 2256 + }, + { + "epoch": 0.18214833346783957, + "grad_norm": 0.7978290915489197, + "learning_rate": 0.00019425799943986722, + "loss": 2.7346, + "step": 2257 + }, + { + "epoch": 0.18222903720442257, + "grad_norm": 0.8339362740516663, + "learning_rate": 0.00019425272574978293, + "loss": 2.7403, + "step": 2258 + }, + { + "epoch": 0.18230974094100558, + "grad_norm": 0.8035171031951904, + "learning_rate": 0.0001942474497106743, + "loss": 2.7444, + "step": 2259 + }, + { + "epoch": 0.18239044467758858, + "grad_norm": 0.7950475811958313, + "learning_rate": 0.0001942421713226729, + "loss": 2.7218, + "step": 2260 + }, + { + "epoch": 0.18247114841417159, + "grad_norm": 0.8439741730690002, + "learning_rate": 0.00019423689058591022, + "loss": 2.7498, + "step": 2261 + }, + { + "epoch": 0.1825518521507546, + "grad_norm": 0.8585919737815857, + "learning_rate": 0.00019423160750051789, + "loss": 2.7459, + "step": 2262 + }, + { + "epoch": 0.1826325558873376, + "grad_norm": 0.857276201248169, + "learning_rate": 0.00019422632206662755, + "loss": 2.8404, + "step": 2263 + }, + { + "epoch": 0.1827132596239206, + "grad_norm": 0.7692707777023315, + "learning_rate": 0.000194221034284371, + "loss": 2.8069, + "step": 2264 + }, + { + "epoch": 0.1827939633605036, + "grad_norm": 0.9107782244682312, + "learning_rate": 0.00019421574415387998, + "loss": 2.7554, + "step": 2265 + }, + { + "epoch": 0.1828746670970866, + "grad_norm": 0.763300895690918, + "learning_rate": 0.00019421045167528628, + "loss": 2.8031, + "step": 2266 + }, + { + "epoch": 0.1829553708336696, + "grad_norm": 0.8625530004501343, + "learning_rate": 0.0001942051568487219, + "loss": 2.7622, + "step": 2267 + }, + { + "epoch": 0.18303607457025262, + "grad_norm": 0.8483080863952637, + "learning_rate": 0.00019419985967431875, + "loss": 2.7726, + "step": 2268 + }, + { + "epoch": 0.18311677830683562, + "grad_norm": 0.8295309543609619, + "learning_rate": 0.00019419456015220884, + "loss": 2.7676, + "step": 2269 + }, + { + "epoch": 0.1831974820434186, + "grad_norm": 0.812976062297821, + "learning_rate": 0.0001941892582825243, + "loss": 2.745, + "step": 2270 + }, + { + "epoch": 0.1832781857800016, + "grad_norm": 0.799846351146698, + "learning_rate": 0.00019418395406539717, + "loss": 2.7474, + "step": 2271 + }, + { + "epoch": 0.1833588895165846, + "grad_norm": 0.7825174331665039, + "learning_rate": 0.00019417864750095976, + "loss": 2.7982, + "step": 2272 + }, + { + "epoch": 0.1834395932531676, + "grad_norm": 0.8331060409545898, + "learning_rate": 0.00019417333858934424, + "loss": 2.7279, + "step": 2273 + }, + { + "epoch": 0.18352029698975061, + "grad_norm": 0.8579809665679932, + "learning_rate": 0.00019416802733068295, + "loss": 2.7425, + "step": 2274 + }, + { + "epoch": 0.18360100072633362, + "grad_norm": 0.8643589019775391, + "learning_rate": 0.0001941627137251083, + "loss": 2.7369, + "step": 2275 + }, + { + "epoch": 0.18368170446291662, + "grad_norm": 0.9086846113204956, + "learning_rate": 0.00019415739777275265, + "loss": 2.7681, + "step": 2276 + }, + { + "epoch": 0.18376240819949963, + "grad_norm": 0.8442896604537964, + "learning_rate": 0.00019415207947374853, + "loss": 2.7733, + "step": 2277 + }, + { + "epoch": 0.18384311193608263, + "grad_norm": 0.7858592867851257, + "learning_rate": 0.00019414675882822846, + "loss": 2.7726, + "step": 2278 + }, + { + "epoch": 0.18392381567266564, + "grad_norm": 0.8191118240356445, + "learning_rate": 0.00019414143583632503, + "loss": 2.8142, + "step": 2279 + }, + { + "epoch": 0.18400451940924864, + "grad_norm": 0.8093815445899963, + "learning_rate": 0.00019413611049817097, + "loss": 2.7068, + "step": 2280 + }, + { + "epoch": 0.18408522314583164, + "grad_norm": 0.80247563123703, + "learning_rate": 0.00019413078281389895, + "loss": 2.7459, + "step": 2281 + }, + { + "epoch": 0.18416592688241465, + "grad_norm": 0.8200877904891968, + "learning_rate": 0.00019412545278364176, + "loss": 2.6963, + "step": 2282 + }, + { + "epoch": 0.18424663061899765, + "grad_norm": 0.870662271976471, + "learning_rate": 0.00019412012040753224, + "loss": 2.8636, + "step": 2283 + }, + { + "epoch": 0.18432733435558066, + "grad_norm": 0.7626601457595825, + "learning_rate": 0.00019411478568570332, + "loss": 2.8082, + "step": 2284 + }, + { + "epoch": 0.18440803809216366, + "grad_norm": 0.7492787837982178, + "learning_rate": 0.00019410944861828787, + "loss": 2.7231, + "step": 2285 + }, + { + "epoch": 0.18448874182874667, + "grad_norm": 0.8172419667243958, + "learning_rate": 0.000194104109205419, + "loss": 2.7054, + "step": 2286 + }, + { + "epoch": 0.18456944556532967, + "grad_norm": 0.7749670147895813, + "learning_rate": 0.0001940987674472297, + "loss": 2.6907, + "step": 2287 + }, + { + "epoch": 0.18465014930191267, + "grad_norm": 0.8855465054512024, + "learning_rate": 0.00019409342334385316, + "loss": 2.7439, + "step": 2288 + }, + { + "epoch": 0.18473085303849568, + "grad_norm": 0.8066419363021851, + "learning_rate": 0.00019408807689542257, + "loss": 2.7126, + "step": 2289 + }, + { + "epoch": 0.18481155677507868, + "grad_norm": 0.7759004235267639, + "learning_rate": 0.00019408272810207114, + "loss": 2.7207, + "step": 2290 + }, + { + "epoch": 0.1848922605116617, + "grad_norm": 0.8593513369560242, + "learning_rate": 0.00019407737696393215, + "loss": 2.7375, + "step": 2291 + }, + { + "epoch": 0.1849729642482447, + "grad_norm": 0.8154759407043457, + "learning_rate": 0.00019407202348113904, + "loss": 2.7608, + "step": 2292 + }, + { + "epoch": 0.1850536679848277, + "grad_norm": 0.7912892699241638, + "learning_rate": 0.0001940666676538252, + "loss": 2.7886, + "step": 2293 + }, + { + "epoch": 0.1851343717214107, + "grad_norm": 0.9184576272964478, + "learning_rate": 0.0001940613094821241, + "loss": 2.7867, + "step": 2294 + }, + { + "epoch": 0.1852150754579937, + "grad_norm": 0.8114588856697083, + "learning_rate": 0.0001940559489661693, + "loss": 2.8105, + "step": 2295 + }, + { + "epoch": 0.1852957791945767, + "grad_norm": 0.7681595683097839, + "learning_rate": 0.00019405058610609438, + "loss": 2.7707, + "step": 2296 + }, + { + "epoch": 0.18537648293115971, + "grad_norm": 0.7719643712043762, + "learning_rate": 0.000194045220902033, + "loss": 2.6767, + "step": 2297 + }, + { + "epoch": 0.18545718666774272, + "grad_norm": 0.7602487206459045, + "learning_rate": 0.00019403985335411888, + "loss": 2.7698, + "step": 2298 + }, + { + "epoch": 0.18553789040432572, + "grad_norm": 0.8044554591178894, + "learning_rate": 0.00019403448346248578, + "loss": 2.7578, + "step": 2299 + }, + { + "epoch": 0.18561859414090873, + "grad_norm": 0.7830328345298767, + "learning_rate": 0.00019402911122726757, + "loss": 2.7113, + "step": 2300 + }, + { + "epoch": 0.18569929787749173, + "grad_norm": 0.7793100476264954, + "learning_rate": 0.0001940237366485981, + "loss": 2.7388, + "step": 2301 + }, + { + "epoch": 0.18578000161407474, + "grad_norm": 0.9127374887466431, + "learning_rate": 0.00019401835972661133, + "loss": 2.7459, + "step": 2302 + }, + { + "epoch": 0.18586070535065774, + "grad_norm": 0.8007177114486694, + "learning_rate": 0.00019401298046144128, + "loss": 2.776, + "step": 2303 + }, + { + "epoch": 0.18594140908724074, + "grad_norm": 0.7384614944458008, + "learning_rate": 0.000194007598853222, + "loss": 2.6819, + "step": 2304 + }, + { + "epoch": 0.18602211282382375, + "grad_norm": 0.798909068107605, + "learning_rate": 0.0001940022149020876, + "loss": 2.7218, + "step": 2305 + }, + { + "epoch": 0.18610281656040675, + "grad_norm": 0.8388963341712952, + "learning_rate": 0.0001939968286081723, + "loss": 2.8248, + "step": 2306 + }, + { + "epoch": 0.18618352029698976, + "grad_norm": 0.8411754369735718, + "learning_rate": 0.0001939914399716103, + "loss": 2.7575, + "step": 2307 + }, + { + "epoch": 0.18626422403357276, + "grad_norm": 0.7936103343963623, + "learning_rate": 0.00019398604899253594, + "loss": 2.7488, + "step": 2308 + }, + { + "epoch": 0.18634492777015577, + "grad_norm": 0.7913734912872314, + "learning_rate": 0.00019398065567108357, + "loss": 2.7963, + "step": 2309 + }, + { + "epoch": 0.18642563150673877, + "grad_norm": 0.8341575860977173, + "learning_rate": 0.00019397526000738754, + "loss": 2.7698, + "step": 2310 + }, + { + "epoch": 0.18650633524332177, + "grad_norm": 0.8323128819465637, + "learning_rate": 0.00019396986200158244, + "loss": 2.7218, + "step": 2311 + }, + { + "epoch": 0.18658703897990478, + "grad_norm": 0.748073160648346, + "learning_rate": 0.0001939644616538027, + "loss": 2.7798, + "step": 2312 + }, + { + "epoch": 0.18666774271648778, + "grad_norm": 0.8166958689689636, + "learning_rate": 0.00019395905896418296, + "loss": 2.661, + "step": 2313 + }, + { + "epoch": 0.1867484464530708, + "grad_norm": 0.796791672706604, + "learning_rate": 0.00019395365393285786, + "loss": 2.7297, + "step": 2314 + }, + { + "epoch": 0.1868291501896538, + "grad_norm": 0.7851170897483826, + "learning_rate": 0.0001939482465599621, + "loss": 2.7798, + "step": 2315 + }, + { + "epoch": 0.1869098539262368, + "grad_norm": 0.7545836567878723, + "learning_rate": 0.00019394283684563045, + "loss": 2.7327, + "step": 2316 + }, + { + "epoch": 0.1869905576628198, + "grad_norm": 0.8100360631942749, + "learning_rate": 0.00019393742478999776, + "loss": 2.7901, + "step": 2317 + }, + { + "epoch": 0.1870712613994028, + "grad_norm": 0.7874314785003662, + "learning_rate": 0.00019393201039319887, + "loss": 2.7597, + "step": 2318 + }, + { + "epoch": 0.1871519651359858, + "grad_norm": 0.7698730826377869, + "learning_rate": 0.00019392659365536876, + "loss": 2.7327, + "step": 2319 + }, + { + "epoch": 0.1872326688725688, + "grad_norm": 0.7417994141578674, + "learning_rate": 0.0001939211745766424, + "loss": 2.7413, + "step": 2320 + }, + { + "epoch": 0.1873133726091518, + "grad_norm": 0.7823258638381958, + "learning_rate": 0.00019391575315715485, + "loss": 2.7577, + "step": 2321 + }, + { + "epoch": 0.1873940763457348, + "grad_norm": 0.82382732629776, + "learning_rate": 0.00019391032939704124, + "loss": 2.7769, + "step": 2322 + }, + { + "epoch": 0.1874747800823178, + "grad_norm": 0.8405026197433472, + "learning_rate": 0.0001939049032964367, + "loss": 2.8402, + "step": 2323 + }, + { + "epoch": 0.1875554838189008, + "grad_norm": 0.8307906985282898, + "learning_rate": 0.00019389947485547654, + "loss": 2.7642, + "step": 2324 + }, + { + "epoch": 0.1876361875554838, + "grad_norm": 0.8618248701095581, + "learning_rate": 0.000193894044074296, + "loss": 2.7853, + "step": 2325 + }, + { + "epoch": 0.1877168912920668, + "grad_norm": 0.8040831685066223, + "learning_rate": 0.00019388861095303046, + "loss": 2.7467, + "step": 2326 + }, + { + "epoch": 0.18779759502864982, + "grad_norm": 0.7723637223243713, + "learning_rate": 0.0001938831754918153, + "loss": 2.7222, + "step": 2327 + }, + { + "epoch": 0.18787829876523282, + "grad_norm": 0.8189084529876709, + "learning_rate": 0.000193877737690786, + "loss": 2.7857, + "step": 2328 + }, + { + "epoch": 0.18795900250181583, + "grad_norm": 0.8335791826248169, + "learning_rate": 0.00019387229755007805, + "loss": 2.6997, + "step": 2329 + }, + { + "epoch": 0.18803970623839883, + "grad_norm": 0.7732782959938049, + "learning_rate": 0.00019386685506982707, + "loss": 2.7155, + "step": 2330 + }, + { + "epoch": 0.18812040997498183, + "grad_norm": 0.8262906670570374, + "learning_rate": 0.0001938614102501687, + "loss": 2.7638, + "step": 2331 + }, + { + "epoch": 0.18820111371156484, + "grad_norm": 0.7969058156013489, + "learning_rate": 0.00019385596309123862, + "loss": 2.7363, + "step": 2332 + }, + { + "epoch": 0.18828181744814784, + "grad_norm": 0.7834853529930115, + "learning_rate": 0.0001938505135931726, + "loss": 2.7205, + "step": 2333 + }, + { + "epoch": 0.18836252118473085, + "grad_norm": 0.748481810092926, + "learning_rate": 0.00019384506175610647, + "loss": 2.7759, + "step": 2334 + }, + { + "epoch": 0.18844322492131385, + "grad_norm": 0.8137786984443665, + "learning_rate": 0.00019383960758017604, + "loss": 2.828, + "step": 2335 + }, + { + "epoch": 0.18852392865789686, + "grad_norm": 0.8065745234489441, + "learning_rate": 0.00019383415106551734, + "loss": 2.7408, + "step": 2336 + }, + { + "epoch": 0.18860463239447986, + "grad_norm": 0.768643856048584, + "learning_rate": 0.0001938286922122663, + "loss": 2.6503, + "step": 2337 + }, + { + "epoch": 0.18868533613106286, + "grad_norm": 0.7677921652793884, + "learning_rate": 0.00019382323102055897, + "loss": 2.7088, + "step": 2338 + }, + { + "epoch": 0.18876603986764587, + "grad_norm": 0.7648717164993286, + "learning_rate": 0.0001938177674905315, + "loss": 2.7015, + "step": 2339 + }, + { + "epoch": 0.18884674360422887, + "grad_norm": 0.7517116665840149, + "learning_rate": 0.00019381230162231997, + "loss": 2.7095, + "step": 2340 + }, + { + "epoch": 0.18892744734081188, + "grad_norm": 0.8147841691970825, + "learning_rate": 0.00019380683341606067, + "loss": 2.8563, + "step": 2341 + }, + { + "epoch": 0.18900815107739488, + "grad_norm": 0.7849822640419006, + "learning_rate": 0.00019380136287188988, + "loss": 2.7432, + "step": 2342 + }, + { + "epoch": 0.18908885481397789, + "grad_norm": 0.813811719417572, + "learning_rate": 0.0001937958899899439, + "loss": 2.7419, + "step": 2343 + }, + { + "epoch": 0.1891695585505609, + "grad_norm": 0.8142707943916321, + "learning_rate": 0.00019379041477035923, + "loss": 2.7658, + "step": 2344 + }, + { + "epoch": 0.1892502622871439, + "grad_norm": 0.7594506740570068, + "learning_rate": 0.00019378493721327217, + "loss": 2.7298, + "step": 2345 + }, + { + "epoch": 0.1893309660237269, + "grad_norm": 0.8374232053756714, + "learning_rate": 0.00019377945731881936, + "loss": 2.8112, + "step": 2346 + }, + { + "epoch": 0.1894116697603099, + "grad_norm": 0.783608615398407, + "learning_rate": 0.00019377397508713734, + "loss": 2.8168, + "step": 2347 + }, + { + "epoch": 0.1894923734968929, + "grad_norm": 0.720214307308197, + "learning_rate": 0.0001937684905183627, + "loss": 2.7516, + "step": 2348 + }, + { + "epoch": 0.1895730772334759, + "grad_norm": 0.7939600944519043, + "learning_rate": 0.0001937630036126322, + "loss": 2.7609, + "step": 2349 + }, + { + "epoch": 0.18965378097005892, + "grad_norm": 0.787315309047699, + "learning_rate": 0.00019375751437008252, + "loss": 2.758, + "step": 2350 + }, + { + "epoch": 0.18973448470664192, + "grad_norm": 0.7862411141395569, + "learning_rate": 0.00019375202279085053, + "loss": 2.6866, + "step": 2351 + }, + { + "epoch": 0.18981518844322492, + "grad_norm": 0.8651136159896851, + "learning_rate": 0.000193746528875073, + "loss": 2.7488, + "step": 2352 + }, + { + "epoch": 0.18989589217980793, + "grad_norm": 0.8150602579116821, + "learning_rate": 0.00019374103262288696, + "loss": 2.7417, + "step": 2353 + }, + { + "epoch": 0.18997659591639093, + "grad_norm": 0.9053540229797363, + "learning_rate": 0.00019373553403442934, + "loss": 2.7587, + "step": 2354 + }, + { + "epoch": 0.19005729965297394, + "grad_norm": 0.8775703310966492, + "learning_rate": 0.0001937300331098372, + "loss": 2.733, + "step": 2355 + }, + { + "epoch": 0.19013800338955694, + "grad_norm": 0.7714357972145081, + "learning_rate": 0.0001937245298492476, + "loss": 2.7595, + "step": 2356 + }, + { + "epoch": 0.19021870712613995, + "grad_norm": 0.8648017048835754, + "learning_rate": 0.0001937190242527977, + "loss": 2.7944, + "step": 2357 + }, + { + "epoch": 0.19029941086272295, + "grad_norm": 0.9367388486862183, + "learning_rate": 0.00019371351632062477, + "loss": 2.7902, + "step": 2358 + }, + { + "epoch": 0.19038011459930596, + "grad_norm": 0.8116368651390076, + "learning_rate": 0.00019370800605286604, + "loss": 2.7291, + "step": 2359 + }, + { + "epoch": 0.19046081833588896, + "grad_norm": 0.7892753481864929, + "learning_rate": 0.00019370249344965882, + "loss": 2.8192, + "step": 2360 + }, + { + "epoch": 0.19054152207247196, + "grad_norm": 0.8109372854232788, + "learning_rate": 0.00019369697851114056, + "loss": 2.6982, + "step": 2361 + }, + { + "epoch": 0.19062222580905497, + "grad_norm": 0.8756314516067505, + "learning_rate": 0.00019369146123744864, + "loss": 2.744, + "step": 2362 + }, + { + "epoch": 0.19070292954563797, + "grad_norm": 0.7400399446487427, + "learning_rate": 0.00019368594162872058, + "loss": 2.7328, + "step": 2363 + }, + { + "epoch": 0.19078363328222098, + "grad_norm": 0.8223158717155457, + "learning_rate": 0.000193680419685094, + "loss": 2.7614, + "step": 2364 + }, + { + "epoch": 0.19086433701880398, + "grad_norm": 0.7350139617919922, + "learning_rate": 0.00019367489540670645, + "loss": 2.7074, + "step": 2365 + }, + { + "epoch": 0.19094504075538699, + "grad_norm": 0.7915631532669067, + "learning_rate": 0.00019366936879369563, + "loss": 2.7835, + "step": 2366 + }, + { + "epoch": 0.19102574449197, + "grad_norm": 0.7765628099441528, + "learning_rate": 0.00019366383984619932, + "loss": 2.765, + "step": 2367 + }, + { + "epoch": 0.191106448228553, + "grad_norm": 0.8127059936523438, + "learning_rate": 0.00019365830856435525, + "loss": 2.7753, + "step": 2368 + }, + { + "epoch": 0.191187151965136, + "grad_norm": 0.8652897477149963, + "learning_rate": 0.0001936527749483013, + "loss": 2.7137, + "step": 2369 + }, + { + "epoch": 0.191267855701719, + "grad_norm": 0.8086774945259094, + "learning_rate": 0.00019364723899817541, + "loss": 2.7209, + "step": 2370 + }, + { + "epoch": 0.191348559438302, + "grad_norm": 0.7965098023414612, + "learning_rate": 0.00019364170071411554, + "loss": 2.786, + "step": 2371 + }, + { + "epoch": 0.19142926317488498, + "grad_norm": 0.7954064607620239, + "learning_rate": 0.00019363616009625967, + "loss": 2.7508, + "step": 2372 + }, + { + "epoch": 0.191509966911468, + "grad_norm": 0.7835928201675415, + "learning_rate": 0.00019363061714474595, + "loss": 2.7423, + "step": 2373 + }, + { + "epoch": 0.191590670648051, + "grad_norm": 0.8720580339431763, + "learning_rate": 0.0001936250718597125, + "loss": 2.7877, + "step": 2374 + }, + { + "epoch": 0.191671374384634, + "grad_norm": 0.836066484451294, + "learning_rate": 0.00019361952424129747, + "loss": 2.8456, + "step": 2375 + }, + { + "epoch": 0.191752078121217, + "grad_norm": 0.793666660785675, + "learning_rate": 0.00019361397428963923, + "loss": 2.786, + "step": 2376 + }, + { + "epoch": 0.1918327818578, + "grad_norm": 0.8573217391967773, + "learning_rate": 0.000193608422004876, + "loss": 2.7569, + "step": 2377 + }, + { + "epoch": 0.191913485594383, + "grad_norm": 0.81243896484375, + "learning_rate": 0.00019360286738714623, + "loss": 2.771, + "step": 2378 + }, + { + "epoch": 0.19199418933096601, + "grad_norm": 0.7449626326560974, + "learning_rate": 0.00019359731043658832, + "loss": 2.7479, + "step": 2379 + }, + { + "epoch": 0.19207489306754902, + "grad_norm": 0.8124165534973145, + "learning_rate": 0.00019359175115334076, + "loss": 2.7602, + "step": 2380 + }, + { + "epoch": 0.19215559680413202, + "grad_norm": 0.7786986827850342, + "learning_rate": 0.00019358618953754211, + "loss": 2.6926, + "step": 2381 + }, + { + "epoch": 0.19223630054071503, + "grad_norm": 0.7987258434295654, + "learning_rate": 0.000193580625589331, + "loss": 2.7573, + "step": 2382 + }, + { + "epoch": 0.19231700427729803, + "grad_norm": 0.8236463665962219, + "learning_rate": 0.00019357505930884606, + "loss": 2.6755, + "step": 2383 + }, + { + "epoch": 0.19239770801388104, + "grad_norm": 0.8285779356956482, + "learning_rate": 0.00019356949069622602, + "loss": 2.7658, + "step": 2384 + }, + { + "epoch": 0.19247841175046404, + "grad_norm": 0.7823960781097412, + "learning_rate": 0.0001935639197516097, + "loss": 2.7404, + "step": 2385 + }, + { + "epoch": 0.19255911548704704, + "grad_norm": 0.968638002872467, + "learning_rate": 0.00019355834647513591, + "loss": 2.7836, + "step": 2386 + }, + { + "epoch": 0.19263981922363005, + "grad_norm": 0.8170328736305237, + "learning_rate": 0.00019355277086694357, + "loss": 2.7816, + "step": 2387 + }, + { + "epoch": 0.19272052296021305, + "grad_norm": 0.8342583179473877, + "learning_rate": 0.00019354719292717163, + "loss": 2.8204, + "step": 2388 + }, + { + "epoch": 0.19280122669679606, + "grad_norm": 0.8160435557365417, + "learning_rate": 0.0001935416126559591, + "loss": 2.6938, + "step": 2389 + }, + { + "epoch": 0.19288193043337906, + "grad_norm": 0.7888174653053284, + "learning_rate": 0.00019353603005344504, + "loss": 2.6804, + "step": 2390 + }, + { + "epoch": 0.19296263416996207, + "grad_norm": 0.8389205932617188, + "learning_rate": 0.00019353044511976865, + "loss": 2.7571, + "step": 2391 + }, + { + "epoch": 0.19304333790654507, + "grad_norm": 0.7920562028884888, + "learning_rate": 0.00019352485785506906, + "loss": 2.7174, + "step": 2392 + }, + { + "epoch": 0.19312404164312807, + "grad_norm": 0.7853459715843201, + "learning_rate": 0.00019351926825948555, + "loss": 2.7626, + "step": 2393 + }, + { + "epoch": 0.19320474537971108, + "grad_norm": 0.9109459519386292, + "learning_rate": 0.0001935136763331574, + "loss": 2.7568, + "step": 2394 + }, + { + "epoch": 0.19328544911629408, + "grad_norm": 0.7983853816986084, + "learning_rate": 0.00019350808207622397, + "loss": 2.7412, + "step": 2395 + }, + { + "epoch": 0.1933661528528771, + "grad_norm": 0.7416854500770569, + "learning_rate": 0.00019350248548882472, + "loss": 2.7335, + "step": 2396 + }, + { + "epoch": 0.1934468565894601, + "grad_norm": 0.7305171489715576, + "learning_rate": 0.0001934968865710991, + "loss": 2.7295, + "step": 2397 + }, + { + "epoch": 0.1935275603260431, + "grad_norm": 0.7717033624649048, + "learning_rate": 0.0001934912853231867, + "loss": 2.7568, + "step": 2398 + }, + { + "epoch": 0.1936082640626261, + "grad_norm": 0.7833831906318665, + "learning_rate": 0.00019348568174522705, + "loss": 2.736, + "step": 2399 + }, + { + "epoch": 0.1936889677992091, + "grad_norm": 0.872831404209137, + "learning_rate": 0.00019348007583735983, + "loss": 2.7719, + "step": 2400 + }, + { + "epoch": 0.1937696715357921, + "grad_norm": 0.8389193415641785, + "learning_rate": 0.0001934744675997248, + "loss": 2.7572, + "step": 2401 + }, + { + "epoch": 0.19385037527237511, + "grad_norm": 0.8442249298095703, + "learning_rate": 0.00019346885703246165, + "loss": 2.8117, + "step": 2402 + }, + { + "epoch": 0.19393107900895812, + "grad_norm": 0.8451170325279236, + "learning_rate": 0.00019346324413571027, + "loss": 2.7216, + "step": 2403 + }, + { + "epoch": 0.19401178274554112, + "grad_norm": 0.898529052734375, + "learning_rate": 0.00019345762890961052, + "loss": 2.8119, + "step": 2404 + }, + { + "epoch": 0.19409248648212413, + "grad_norm": 0.8302313685417175, + "learning_rate": 0.00019345201135430236, + "loss": 2.76, + "step": 2405 + }, + { + "epoch": 0.19417319021870713, + "grad_norm": 0.8975207209587097, + "learning_rate": 0.00019344639146992582, + "loss": 2.8043, + "step": 2406 + }, + { + "epoch": 0.19425389395529014, + "grad_norm": 0.8972581028938293, + "learning_rate": 0.0001934407692566209, + "loss": 2.7487, + "step": 2407 + }, + { + "epoch": 0.19433459769187314, + "grad_norm": 0.8311447501182556, + "learning_rate": 0.00019343514471452776, + "loss": 2.7653, + "step": 2408 + }, + { + "epoch": 0.19441530142845614, + "grad_norm": 0.8336243033409119, + "learning_rate": 0.0001934295178437866, + "loss": 2.753, + "step": 2409 + }, + { + "epoch": 0.19449600516503915, + "grad_norm": 0.8339207172393799, + "learning_rate": 0.0001934238886445376, + "loss": 2.7643, + "step": 2410 + }, + { + "epoch": 0.19457670890162215, + "grad_norm": 0.906074583530426, + "learning_rate": 0.0001934182571169211, + "loss": 2.7777, + "step": 2411 + }, + { + "epoch": 0.19465741263820516, + "grad_norm": 0.8759943246841431, + "learning_rate": 0.00019341262326107742, + "loss": 2.77, + "step": 2412 + }, + { + "epoch": 0.19473811637478816, + "grad_norm": 0.8399369716644287, + "learning_rate": 0.00019340698707714699, + "loss": 2.752, + "step": 2413 + }, + { + "epoch": 0.19481882011137117, + "grad_norm": 0.8551808595657349, + "learning_rate": 0.00019340134856527026, + "loss": 2.6727, + "step": 2414 + }, + { + "epoch": 0.19489952384795417, + "grad_norm": 0.7660732865333557, + "learning_rate": 0.00019339570772558778, + "loss": 2.7491, + "step": 2415 + }, + { + "epoch": 0.19498022758453717, + "grad_norm": 0.8257685303688049, + "learning_rate": 0.00019339006455824015, + "loss": 2.7584, + "step": 2416 + }, + { + "epoch": 0.19506093132112018, + "grad_norm": 0.797275960445404, + "learning_rate": 0.00019338441906336794, + "loss": 2.7051, + "step": 2417 + }, + { + "epoch": 0.19514163505770318, + "grad_norm": 0.8311913013458252, + "learning_rate": 0.00019337877124111193, + "loss": 2.8084, + "step": 2418 + }, + { + "epoch": 0.1952223387942862, + "grad_norm": 0.7995893359184265, + "learning_rate": 0.0001933731210916128, + "loss": 2.7556, + "step": 2419 + }, + { + "epoch": 0.1953030425308692, + "grad_norm": 0.792850136756897, + "learning_rate": 0.00019336746861501147, + "loss": 2.7289, + "step": 2420 + }, + { + "epoch": 0.1953837462674522, + "grad_norm": 0.8058848977088928, + "learning_rate": 0.00019336181381144873, + "loss": 2.7394, + "step": 2421 + }, + { + "epoch": 0.1954644500040352, + "grad_norm": 0.8267124891281128, + "learning_rate": 0.00019335615668106555, + "loss": 2.771, + "step": 2422 + }, + { + "epoch": 0.19554515374061818, + "grad_norm": 0.7641060948371887, + "learning_rate": 0.00019335049722400292, + "loss": 2.7311, + "step": 2423 + }, + { + "epoch": 0.19562585747720118, + "grad_norm": 0.8023245930671692, + "learning_rate": 0.00019334483544040186, + "loss": 2.7658, + "step": 2424 + }, + { + "epoch": 0.19570656121378419, + "grad_norm": 0.8341927528381348, + "learning_rate": 0.00019333917133040348, + "loss": 2.7476, + "step": 2425 + }, + { + "epoch": 0.1957872649503672, + "grad_norm": 0.7985726594924927, + "learning_rate": 0.000193333504894149, + "loss": 2.7362, + "step": 2426 + }, + { + "epoch": 0.1958679686869502, + "grad_norm": 0.7267594933509827, + "learning_rate": 0.0001933278361317796, + "loss": 2.6875, + "step": 2427 + }, + { + "epoch": 0.1959486724235332, + "grad_norm": 0.8292990326881409, + "learning_rate": 0.00019332216504343652, + "loss": 2.7619, + "step": 2428 + }, + { + "epoch": 0.1960293761601162, + "grad_norm": 0.7549588680267334, + "learning_rate": 0.00019331649162926116, + "loss": 2.7385, + "step": 2429 + }, + { + "epoch": 0.1961100798966992, + "grad_norm": 0.7688446640968323, + "learning_rate": 0.0001933108158893949, + "loss": 2.7544, + "step": 2430 + }, + { + "epoch": 0.1961907836332822, + "grad_norm": 0.8168436884880066, + "learning_rate": 0.00019330513782397918, + "loss": 2.8013, + "step": 2431 + }, + { + "epoch": 0.19627148736986522, + "grad_norm": 0.8405759334564209, + "learning_rate": 0.00019329945743315556, + "loss": 2.7299, + "step": 2432 + }, + { + "epoch": 0.19635219110644822, + "grad_norm": 0.79430091381073, + "learning_rate": 0.00019329377471706554, + "loss": 2.7293, + "step": 2433 + }, + { + "epoch": 0.19643289484303122, + "grad_norm": 0.8428656458854675, + "learning_rate": 0.0001932880896758508, + "loss": 2.8211, + "step": 2434 + }, + { + "epoch": 0.19651359857961423, + "grad_norm": 0.7883139252662659, + "learning_rate": 0.00019328240230965298, + "loss": 2.6943, + "step": 2435 + }, + { + "epoch": 0.19659430231619723, + "grad_norm": 0.7539335489273071, + "learning_rate": 0.00019327671261861387, + "loss": 2.6926, + "step": 2436 + }, + { + "epoch": 0.19667500605278024, + "grad_norm": 0.9986057281494141, + "learning_rate": 0.00019327102060287524, + "loss": 2.7851, + "step": 2437 + }, + { + "epoch": 0.19675570978936324, + "grad_norm": 0.7716113924980164, + "learning_rate": 0.000193265326262579, + "loss": 2.752, + "step": 2438 + }, + { + "epoch": 0.19683641352594625, + "grad_norm": 0.9134296774864197, + "learning_rate": 0.000193259629597867, + "loss": 2.7698, + "step": 2439 + }, + { + "epoch": 0.19691711726252925, + "grad_norm": 0.7966345548629761, + "learning_rate": 0.00019325393060888124, + "loss": 2.7839, + "step": 2440 + }, + { + "epoch": 0.19699782099911226, + "grad_norm": 0.8051251173019409, + "learning_rate": 0.0001932482292957638, + "loss": 2.7322, + "step": 2441 + }, + { + "epoch": 0.19707852473569526, + "grad_norm": 0.843169629573822, + "learning_rate": 0.0001932425256586567, + "loss": 2.8263, + "step": 2442 + }, + { + "epoch": 0.19715922847227826, + "grad_norm": 0.7552370429039001, + "learning_rate": 0.00019323681969770213, + "loss": 2.7342, + "step": 2443 + }, + { + "epoch": 0.19723993220886127, + "grad_norm": 0.844473123550415, + "learning_rate": 0.0001932311114130423, + "loss": 2.776, + "step": 2444 + }, + { + "epoch": 0.19732063594544427, + "grad_norm": 0.8002473711967468, + "learning_rate": 0.00019322540080481945, + "loss": 2.7382, + "step": 2445 + }, + { + "epoch": 0.19740133968202728, + "grad_norm": 0.8564329147338867, + "learning_rate": 0.00019321968787317594, + "loss": 2.7592, + "step": 2446 + }, + { + "epoch": 0.19748204341861028, + "grad_norm": 0.7853825688362122, + "learning_rate": 0.00019321397261825408, + "loss": 2.7101, + "step": 2447 + }, + { + "epoch": 0.19756274715519329, + "grad_norm": 0.8482939004898071, + "learning_rate": 0.0001932082550401964, + "loss": 2.7891, + "step": 2448 + }, + { + "epoch": 0.1976434508917763, + "grad_norm": 0.8361770510673523, + "learning_rate": 0.00019320253513914536, + "loss": 2.7341, + "step": 2449 + }, + { + "epoch": 0.1977241546283593, + "grad_norm": 0.7814618945121765, + "learning_rate": 0.0001931968129152435, + "loss": 2.771, + "step": 2450 + }, + { + "epoch": 0.1978048583649423, + "grad_norm": 0.7588146924972534, + "learning_rate": 0.00019319108836863343, + "loss": 2.7577, + "step": 2451 + }, + { + "epoch": 0.1978855621015253, + "grad_norm": 0.9184895157814026, + "learning_rate": 0.00019318536149945785, + "loss": 2.7711, + "step": 2452 + }, + { + "epoch": 0.1979662658381083, + "grad_norm": 0.8454298973083496, + "learning_rate": 0.00019317963230785947, + "loss": 2.7748, + "step": 2453 + }, + { + "epoch": 0.1980469695746913, + "grad_norm": 0.7662420868873596, + "learning_rate": 0.0001931739007939811, + "loss": 2.7704, + "step": 2454 + }, + { + "epoch": 0.19812767331127432, + "grad_norm": 0.837888777256012, + "learning_rate": 0.0001931681669579655, + "loss": 2.7613, + "step": 2455 + }, + { + "epoch": 0.19820837704785732, + "grad_norm": 0.7835226058959961, + "learning_rate": 0.0001931624307999557, + "loss": 2.6888, + "step": 2456 + }, + { + "epoch": 0.19828908078444032, + "grad_norm": 0.8491464257240295, + "learning_rate": 0.00019315669232009456, + "loss": 2.7521, + "step": 2457 + }, + { + "epoch": 0.19836978452102333, + "grad_norm": 0.7590088248252869, + "learning_rate": 0.00019315095151852516, + "loss": 2.7441, + "step": 2458 + }, + { + "epoch": 0.19845048825760633, + "grad_norm": 0.9316127300262451, + "learning_rate": 0.00019314520839539052, + "loss": 2.786, + "step": 2459 + }, + { + "epoch": 0.19853119199418934, + "grad_norm": 0.7819615006446838, + "learning_rate": 0.0001931394629508338, + "loss": 2.7003, + "step": 2460 + }, + { + "epoch": 0.19861189573077234, + "grad_norm": 0.7675932049751282, + "learning_rate": 0.0001931337151849982, + "loss": 2.7065, + "step": 2461 + }, + { + "epoch": 0.19869259946735535, + "grad_norm": 0.7797678112983704, + "learning_rate": 0.000193127965098027, + "loss": 2.7605, + "step": 2462 + }, + { + "epoch": 0.19877330320393835, + "grad_norm": 0.789544403553009, + "learning_rate": 0.00019312221269006345, + "loss": 2.7913, + "step": 2463 + }, + { + "epoch": 0.19885400694052136, + "grad_norm": 0.9594957232475281, + "learning_rate": 0.00019311645796125094, + "loss": 2.785, + "step": 2464 + }, + { + "epoch": 0.19893471067710436, + "grad_norm": 0.8154739141464233, + "learning_rate": 0.00019311070091173287, + "loss": 2.6716, + "step": 2465 + }, + { + "epoch": 0.19901541441368736, + "grad_norm": 0.9042142629623413, + "learning_rate": 0.00019310494154165274, + "loss": 2.734, + "step": 2466 + }, + { + "epoch": 0.19909611815027037, + "grad_norm": 0.7803483605384827, + "learning_rate": 0.0001930991798511541, + "loss": 2.7052, + "step": 2467 + }, + { + "epoch": 0.19917682188685337, + "grad_norm": 0.7917614579200745, + "learning_rate": 0.00019309341584038055, + "loss": 2.728, + "step": 2468 + }, + { + "epoch": 0.19925752562343638, + "grad_norm": 0.8295063376426697, + "learning_rate": 0.00019308764950947568, + "loss": 2.7496, + "step": 2469 + }, + { + "epoch": 0.19933822936001938, + "grad_norm": 0.790831983089447, + "learning_rate": 0.0001930818808585833, + "loss": 2.7356, + "step": 2470 + }, + { + "epoch": 0.19941893309660239, + "grad_norm": 0.8527843952178955, + "learning_rate": 0.0001930761098878471, + "loss": 2.718, + "step": 2471 + }, + { + "epoch": 0.1994996368331854, + "grad_norm": 0.8518494367599487, + "learning_rate": 0.00019307033659741096, + "loss": 2.7189, + "step": 2472 + }, + { + "epoch": 0.1995803405697684, + "grad_norm": 0.8027220368385315, + "learning_rate": 0.00019306456098741872, + "loss": 2.7272, + "step": 2473 + }, + { + "epoch": 0.19966104430635137, + "grad_norm": 0.7516468167304993, + "learning_rate": 0.00019305878305801434, + "loss": 2.798, + "step": 2474 + }, + { + "epoch": 0.19974174804293438, + "grad_norm": 0.7676397562026978, + "learning_rate": 0.00019305300280934187, + "loss": 2.8076, + "step": 2475 + }, + { + "epoch": 0.19982245177951738, + "grad_norm": 0.8237762451171875, + "learning_rate": 0.00019304722024154528, + "loss": 2.6998, + "step": 2476 + }, + { + "epoch": 0.19990315551610038, + "grad_norm": 0.8397759199142456, + "learning_rate": 0.0001930414353547688, + "loss": 2.806, + "step": 2477 + }, + { + "epoch": 0.1999838592526834, + "grad_norm": 0.8911117911338806, + "learning_rate": 0.00019303564814915645, + "loss": 2.7566, + "step": 2478 + }, + { + "epoch": 0.2000645629892664, + "grad_norm": 0.765404999256134, + "learning_rate": 0.00019302985862485264, + "loss": 2.7363, + "step": 2479 + }, + { + "epoch": 0.2001452667258494, + "grad_norm": 0.7898589372634888, + "learning_rate": 0.0001930240667820015, + "loss": 2.7007, + "step": 2480 + }, + { + "epoch": 0.2002259704624324, + "grad_norm": 0.7581521272659302, + "learning_rate": 0.0001930182726207475, + "loss": 2.7508, + "step": 2481 + }, + { + "epoch": 0.2003066741990154, + "grad_norm": 0.8179795742034912, + "learning_rate": 0.00019301247614123495, + "loss": 2.7327, + "step": 2482 + }, + { + "epoch": 0.2003873779355984, + "grad_norm": 0.8103611469268799, + "learning_rate": 0.00019300667734360838, + "loss": 2.7869, + "step": 2483 + }, + { + "epoch": 0.20046808167218141, + "grad_norm": 0.7368054389953613, + "learning_rate": 0.0001930008762280123, + "loss": 2.73, + "step": 2484 + }, + { + "epoch": 0.20054878540876442, + "grad_norm": 0.7679662108421326, + "learning_rate": 0.00019299507279459127, + "loss": 2.7905, + "step": 2485 + }, + { + "epoch": 0.20062948914534742, + "grad_norm": 0.7783839702606201, + "learning_rate": 0.0001929892670434899, + "loss": 2.6816, + "step": 2486 + }, + { + "epoch": 0.20071019288193043, + "grad_norm": 0.7575809359550476, + "learning_rate": 0.00019298345897485298, + "loss": 2.7351, + "step": 2487 + }, + { + "epoch": 0.20079089661851343, + "grad_norm": 0.7674959301948547, + "learning_rate": 0.00019297764858882514, + "loss": 2.7682, + "step": 2488 + }, + { + "epoch": 0.20087160035509644, + "grad_norm": 0.7972592115402222, + "learning_rate": 0.00019297183588555127, + "loss": 2.782, + "step": 2489 + }, + { + "epoch": 0.20095230409167944, + "grad_norm": 0.8417105674743652, + "learning_rate": 0.00019296602086517624, + "loss": 2.8173, + "step": 2490 + }, + { + "epoch": 0.20103300782826244, + "grad_norm": 0.7194239497184753, + "learning_rate": 0.00019296020352784496, + "loss": 2.7735, + "step": 2491 + }, + { + "epoch": 0.20111371156484545, + "grad_norm": 0.801895022392273, + "learning_rate": 0.00019295438387370237, + "loss": 2.7018, + "step": 2492 + }, + { + "epoch": 0.20119441530142845, + "grad_norm": 0.900943398475647, + "learning_rate": 0.0001929485619028936, + "loss": 2.77, + "step": 2493 + }, + { + "epoch": 0.20127511903801146, + "grad_norm": 0.7882106304168701, + "learning_rate": 0.00019294273761556366, + "loss": 2.7195, + "step": 2494 + }, + { + "epoch": 0.20135582277459446, + "grad_norm": 0.7471950054168701, + "learning_rate": 0.00019293691101185775, + "loss": 2.7346, + "step": 2495 + }, + { + "epoch": 0.20143652651117747, + "grad_norm": 0.7498352527618408, + "learning_rate": 0.00019293108209192104, + "loss": 2.7255, + "step": 2496 + }, + { + "epoch": 0.20151723024776047, + "grad_norm": 0.8233164548873901, + "learning_rate": 0.0001929252508558989, + "loss": 2.8253, + "step": 2497 + }, + { + "epoch": 0.20159793398434347, + "grad_norm": 0.7533289790153503, + "learning_rate": 0.00019291941730393658, + "loss": 2.7487, + "step": 2498 + }, + { + "epoch": 0.20167863772092648, + "grad_norm": 0.7372691035270691, + "learning_rate": 0.0001929135814361795, + "loss": 2.6799, + "step": 2499 + }, + { + "epoch": 0.20175934145750948, + "grad_norm": 0.7760890126228333, + "learning_rate": 0.00019290774325277305, + "loss": 2.8366, + "step": 2500 + }, + { + "epoch": 0.2018400451940925, + "grad_norm": 0.7653746008872986, + "learning_rate": 0.0001929019027538628, + "loss": 2.7413, + "step": 2501 + }, + { + "epoch": 0.2019207489306755, + "grad_norm": 0.7364951372146606, + "learning_rate": 0.0001928960599395943, + "loss": 2.7405, + "step": 2502 + }, + { + "epoch": 0.2020014526672585, + "grad_norm": 0.8317872285842896, + "learning_rate": 0.00019289021481011314, + "loss": 2.7186, + "step": 2503 + }, + { + "epoch": 0.2020821564038415, + "grad_norm": 0.8325691223144531, + "learning_rate": 0.00019288436736556502, + "loss": 2.7305, + "step": 2504 + }, + { + "epoch": 0.2021628601404245, + "grad_norm": 0.7674683332443237, + "learning_rate": 0.00019287851760609566, + "loss": 2.7171, + "step": 2505 + }, + { + "epoch": 0.2022435638770075, + "grad_norm": 0.8043155074119568, + "learning_rate": 0.00019287266553185084, + "loss": 2.7425, + "step": 2506 + }, + { + "epoch": 0.2023242676135905, + "grad_norm": 0.8522058725357056, + "learning_rate": 0.00019286681114297642, + "loss": 2.7764, + "step": 2507 + }, + { + "epoch": 0.20240497135017352, + "grad_norm": 0.7700086236000061, + "learning_rate": 0.00019286095443961832, + "loss": 2.7499, + "step": 2508 + }, + { + "epoch": 0.20248567508675652, + "grad_norm": 0.8078013062477112, + "learning_rate": 0.0001928550954219225, + "loss": 2.7863, + "step": 2509 + }, + { + "epoch": 0.20256637882333953, + "grad_norm": 0.7431712746620178, + "learning_rate": 0.00019284923409003496, + "loss": 2.8296, + "step": 2510 + }, + { + "epoch": 0.20264708255992253, + "grad_norm": 0.753754734992981, + "learning_rate": 0.00019284337044410182, + "loss": 2.722, + "step": 2511 + }, + { + "epoch": 0.20272778629650554, + "grad_norm": 0.8117631077766418, + "learning_rate": 0.00019283750448426918, + "loss": 2.7718, + "step": 2512 + }, + { + "epoch": 0.20280849003308854, + "grad_norm": 0.9149020910263062, + "learning_rate": 0.00019283163621068325, + "loss": 2.7416, + "step": 2513 + }, + { + "epoch": 0.20288919376967154, + "grad_norm": 0.8240262866020203, + "learning_rate": 0.0001928257656234903, + "loss": 2.811, + "step": 2514 + }, + { + "epoch": 0.20296989750625455, + "grad_norm": 0.7394035458564758, + "learning_rate": 0.00019281989272283657, + "loss": 2.7345, + "step": 2515 + }, + { + "epoch": 0.20305060124283755, + "grad_norm": 0.7827345132827759, + "learning_rate": 0.00019281401750886854, + "loss": 2.7955, + "step": 2516 + }, + { + "epoch": 0.20313130497942056, + "grad_norm": 0.7482333183288574, + "learning_rate": 0.00019280813998173252, + "loss": 2.6963, + "step": 2517 + }, + { + "epoch": 0.20321200871600356, + "grad_norm": 0.8187180757522583, + "learning_rate": 0.00019280226014157509, + "loss": 2.7413, + "step": 2518 + }, + { + "epoch": 0.20329271245258657, + "grad_norm": 0.7708666920661926, + "learning_rate": 0.00019279637798854274, + "loss": 2.7636, + "step": 2519 + }, + { + "epoch": 0.20337341618916957, + "grad_norm": 0.7414180040359497, + "learning_rate": 0.00019279049352278208, + "loss": 2.7321, + "step": 2520 + }, + { + "epoch": 0.20345411992575257, + "grad_norm": 0.8172248601913452, + "learning_rate": 0.00019278460674443975, + "loss": 2.8026, + "step": 2521 + }, + { + "epoch": 0.20353482366233558, + "grad_norm": 0.7463089227676392, + "learning_rate": 0.0001927787176536625, + "loss": 2.74, + "step": 2522 + }, + { + "epoch": 0.20361552739891858, + "grad_norm": 0.7684210538864136, + "learning_rate": 0.00019277282625059704, + "loss": 2.782, + "step": 2523 + }, + { + "epoch": 0.2036962311355016, + "grad_norm": 0.9246797561645508, + "learning_rate": 0.00019276693253539027, + "loss": 2.8546, + "step": 2524 + }, + { + "epoch": 0.20377693487208456, + "grad_norm": 0.753753125667572, + "learning_rate": 0.00019276103650818906, + "loss": 2.7422, + "step": 2525 + }, + { + "epoch": 0.20385763860866757, + "grad_norm": 0.7461897134780884, + "learning_rate": 0.00019275513816914032, + "loss": 2.7575, + "step": 2526 + }, + { + "epoch": 0.20393834234525057, + "grad_norm": 0.7555257081985474, + "learning_rate": 0.00019274923751839106, + "loss": 2.7423, + "step": 2527 + }, + { + "epoch": 0.20401904608183358, + "grad_norm": 0.7628511786460876, + "learning_rate": 0.00019274333455608837, + "loss": 2.7386, + "step": 2528 + }, + { + "epoch": 0.20409974981841658, + "grad_norm": 0.7529371976852417, + "learning_rate": 0.00019273742928237937, + "loss": 2.6852, + "step": 2529 + }, + { + "epoch": 0.20418045355499959, + "grad_norm": 0.7466779351234436, + "learning_rate": 0.00019273152169741118, + "loss": 2.6996, + "step": 2530 + }, + { + "epoch": 0.2042611572915826, + "grad_norm": 0.7916153073310852, + "learning_rate": 0.0001927256118013311, + "loss": 2.7644, + "step": 2531 + }, + { + "epoch": 0.2043418610281656, + "grad_norm": 0.7662972211837769, + "learning_rate": 0.00019271969959428636, + "loss": 2.7497, + "step": 2532 + }, + { + "epoch": 0.2044225647647486, + "grad_norm": 0.8244680166244507, + "learning_rate": 0.00019271378507642432, + "loss": 2.7598, + "step": 2533 + }, + { + "epoch": 0.2045032685013316, + "grad_norm": 0.7721532583236694, + "learning_rate": 0.00019270786824789244, + "loss": 2.7303, + "step": 2534 + }, + { + "epoch": 0.2045839722379146, + "grad_norm": 0.7598209381103516, + "learning_rate": 0.0001927019491088381, + "loss": 2.734, + "step": 2535 + }, + { + "epoch": 0.2046646759744976, + "grad_norm": 0.7778685092926025, + "learning_rate": 0.00019269602765940887, + "loss": 2.7113, + "step": 2536 + }, + { + "epoch": 0.20474537971108062, + "grad_norm": 0.7447141408920288, + "learning_rate": 0.00019269010389975235, + "loss": 2.7205, + "step": 2537 + }, + { + "epoch": 0.20482608344766362, + "grad_norm": 0.8066664338111877, + "learning_rate": 0.00019268417783001613, + "loss": 2.7637, + "step": 2538 + }, + { + "epoch": 0.20490678718424662, + "grad_norm": 0.7055318355560303, + "learning_rate": 0.00019267824945034794, + "loss": 2.6936, + "step": 2539 + }, + { + "epoch": 0.20498749092082963, + "grad_norm": 0.832647979259491, + "learning_rate": 0.0001926723187608955, + "loss": 2.7423, + "step": 2540 + }, + { + "epoch": 0.20506819465741263, + "grad_norm": 0.7316983938217163, + "learning_rate": 0.0001926663857618066, + "loss": 2.7136, + "step": 2541 + }, + { + "epoch": 0.20514889839399564, + "grad_norm": 0.8115554451942444, + "learning_rate": 0.00019266045045322915, + "loss": 2.6964, + "step": 2542 + }, + { + "epoch": 0.20522960213057864, + "grad_norm": 0.802573025226593, + "learning_rate": 0.00019265451283531108, + "loss": 2.7989, + "step": 2543 + }, + { + "epoch": 0.20531030586716165, + "grad_norm": 0.7073348164558411, + "learning_rate": 0.00019264857290820033, + "loss": 2.7399, + "step": 2544 + }, + { + "epoch": 0.20539100960374465, + "grad_norm": 0.7749258279800415, + "learning_rate": 0.00019264263067204495, + "loss": 2.7321, + "step": 2545 + }, + { + "epoch": 0.20547171334032766, + "grad_norm": 0.7473557591438293, + "learning_rate": 0.00019263668612699305, + "loss": 2.7774, + "step": 2546 + }, + { + "epoch": 0.20555241707691066, + "grad_norm": 0.8073423504829407, + "learning_rate": 0.0001926307392731928, + "loss": 2.7429, + "step": 2547 + }, + { + "epoch": 0.20563312081349366, + "grad_norm": 0.9106586575508118, + "learning_rate": 0.00019262479011079235, + "loss": 2.7972, + "step": 2548 + }, + { + "epoch": 0.20571382455007667, + "grad_norm": 0.7975970506668091, + "learning_rate": 0.00019261883863994002, + "loss": 2.7561, + "step": 2549 + }, + { + "epoch": 0.20579452828665967, + "grad_norm": 0.8967030048370361, + "learning_rate": 0.00019261288486078414, + "loss": 2.7368, + "step": 2550 + }, + { + "epoch": 0.20587523202324268, + "grad_norm": 0.7157345414161682, + "learning_rate": 0.00019260692877347304, + "loss": 2.7329, + "step": 2551 + }, + { + "epoch": 0.20595593575982568, + "grad_norm": 0.8758620619773865, + "learning_rate": 0.00019260097037815524, + "loss": 2.7522, + "step": 2552 + }, + { + "epoch": 0.20603663949640869, + "grad_norm": 0.7948124408721924, + "learning_rate": 0.00019259500967497916, + "loss": 2.7675, + "step": 2553 + }, + { + "epoch": 0.2061173432329917, + "grad_norm": 0.8233941197395325, + "learning_rate": 0.00019258904666409344, + "loss": 2.7728, + "step": 2554 + }, + { + "epoch": 0.2061980469695747, + "grad_norm": 0.8084299564361572, + "learning_rate": 0.0001925830813456466, + "loss": 2.7728, + "step": 2555 + }, + { + "epoch": 0.2062787507061577, + "grad_norm": 0.8004557490348816, + "learning_rate": 0.00019257711371978737, + "loss": 2.7783, + "step": 2556 + }, + { + "epoch": 0.2063594544427407, + "grad_norm": 0.7999755144119263, + "learning_rate": 0.0001925711437866645, + "loss": 2.7632, + "step": 2557 + }, + { + "epoch": 0.2064401581793237, + "grad_norm": 0.7317264080047607, + "learning_rate": 0.0001925651715464267, + "loss": 2.7101, + "step": 2558 + }, + { + "epoch": 0.2065208619159067, + "grad_norm": 0.7906385660171509, + "learning_rate": 0.00019255919699922287, + "loss": 2.7258, + "step": 2559 + }, + { + "epoch": 0.20660156565248972, + "grad_norm": 0.7932917475700378, + "learning_rate": 0.0001925532201452019, + "loss": 2.7714, + "step": 2560 + }, + { + "epoch": 0.20668226938907272, + "grad_norm": 0.8039286732673645, + "learning_rate": 0.00019254724098451275, + "loss": 2.7469, + "step": 2561 + }, + { + "epoch": 0.20676297312565572, + "grad_norm": 0.79400634765625, + "learning_rate": 0.00019254125951730444, + "loss": 2.7499, + "step": 2562 + }, + { + "epoch": 0.20684367686223873, + "grad_norm": 0.8072263598442078, + "learning_rate": 0.00019253527574372603, + "loss": 2.7805, + "step": 2563 + }, + { + "epoch": 0.20692438059882173, + "grad_norm": 0.7117579579353333, + "learning_rate": 0.00019252928966392667, + "loss": 2.7321, + "step": 2564 + }, + { + "epoch": 0.20700508433540474, + "grad_norm": 0.7080324292182922, + "learning_rate": 0.00019252330127805554, + "loss": 2.7225, + "step": 2565 + }, + { + "epoch": 0.20708578807198774, + "grad_norm": 0.7276670336723328, + "learning_rate": 0.00019251731058626186, + "loss": 2.7592, + "step": 2566 + }, + { + "epoch": 0.20716649180857075, + "grad_norm": 0.8030811548233032, + "learning_rate": 0.00019251131758869495, + "loss": 2.7184, + "step": 2567 + }, + { + "epoch": 0.20724719554515375, + "grad_norm": 0.7808283567428589, + "learning_rate": 0.0001925053222855042, + "loss": 2.7504, + "step": 2568 + }, + { + "epoch": 0.20732789928173675, + "grad_norm": 0.783225953578949, + "learning_rate": 0.00019249932467683902, + "loss": 2.7125, + "step": 2569 + }, + { + "epoch": 0.20740860301831976, + "grad_norm": 0.7440134286880493, + "learning_rate": 0.00019249332476284887, + "loss": 2.7938, + "step": 2570 + }, + { + "epoch": 0.20748930675490276, + "grad_norm": 0.8729553818702698, + "learning_rate": 0.00019248732254368328, + "loss": 2.8338, + "step": 2571 + }, + { + "epoch": 0.20757001049148577, + "grad_norm": 0.8170497417449951, + "learning_rate": 0.0001924813180194918, + "loss": 2.7254, + "step": 2572 + }, + { + "epoch": 0.20765071422806877, + "grad_norm": 0.733220100402832, + "learning_rate": 0.00019247531119042418, + "loss": 2.6401, + "step": 2573 + }, + { + "epoch": 0.20773141796465178, + "grad_norm": 0.7247937917709351, + "learning_rate": 0.00019246930205663008, + "loss": 2.736, + "step": 2574 + }, + { + "epoch": 0.20781212170123478, + "grad_norm": 0.7880212068557739, + "learning_rate": 0.00019246329061825925, + "loss": 2.7173, + "step": 2575 + }, + { + "epoch": 0.20789282543781776, + "grad_norm": 0.820808470249176, + "learning_rate": 0.00019245727687546149, + "loss": 2.7331, + "step": 2576 + }, + { + "epoch": 0.20797352917440076, + "grad_norm": 0.8605412840843201, + "learning_rate": 0.00019245126082838673, + "loss": 2.761, + "step": 2577 + }, + { + "epoch": 0.20805423291098377, + "grad_norm": 0.763506293296814, + "learning_rate": 0.00019244524247718486, + "loss": 2.7053, + "step": 2578 + }, + { + "epoch": 0.20813493664756677, + "grad_norm": 0.8428114652633667, + "learning_rate": 0.00019243922182200592, + "loss": 2.724, + "step": 2579 + }, + { + "epoch": 0.20821564038414977, + "grad_norm": 0.821986734867096, + "learning_rate": 0.0001924331988629999, + "loss": 2.7615, + "step": 2580 + }, + { + "epoch": 0.20829634412073278, + "grad_norm": 0.8177430629730225, + "learning_rate": 0.00019242717360031693, + "loss": 2.7012, + "step": 2581 + }, + { + "epoch": 0.20837704785731578, + "grad_norm": 0.7584180235862732, + "learning_rate": 0.00019242114603410724, + "loss": 2.7372, + "step": 2582 + }, + { + "epoch": 0.2084577515938988, + "grad_norm": 0.9384645223617554, + "learning_rate": 0.00019241511616452096, + "loss": 2.695, + "step": 2583 + }, + { + "epoch": 0.2085384553304818, + "grad_norm": 0.8518964648246765, + "learning_rate": 0.00019240908399170844, + "loss": 2.8216, + "step": 2584 + }, + { + "epoch": 0.2086191590670648, + "grad_norm": 0.9082949161529541, + "learning_rate": 0.00019240304951581995, + "loss": 2.777, + "step": 2585 + }, + { + "epoch": 0.2086998628036478, + "grad_norm": 0.7906371355056763, + "learning_rate": 0.00019239701273700597, + "loss": 2.7083, + "step": 2586 + }, + { + "epoch": 0.2087805665402308, + "grad_norm": 0.7711954712867737, + "learning_rate": 0.00019239097365541686, + "loss": 2.6907, + "step": 2587 + }, + { + "epoch": 0.2088612702768138, + "grad_norm": 0.8155506253242493, + "learning_rate": 0.0001923849322712032, + "loss": 2.7602, + "step": 2588 + }, + { + "epoch": 0.20894197401339681, + "grad_norm": 0.8843441009521484, + "learning_rate": 0.0001923788885845155, + "loss": 2.7525, + "step": 2589 + }, + { + "epoch": 0.20902267774997982, + "grad_norm": 0.7336379289627075, + "learning_rate": 0.00019237284259550444, + "loss": 2.731, + "step": 2590 + }, + { + "epoch": 0.20910338148656282, + "grad_norm": 0.8261263370513916, + "learning_rate": 0.00019236679430432066, + "loss": 2.6493, + "step": 2591 + }, + { + "epoch": 0.20918408522314583, + "grad_norm": 0.7716216444969177, + "learning_rate": 0.00019236074371111497, + "loss": 2.7775, + "step": 2592 + }, + { + "epoch": 0.20926478895972883, + "grad_norm": 0.8390100598335266, + "learning_rate": 0.00019235469081603808, + "loss": 2.7532, + "step": 2593 + }, + { + "epoch": 0.20934549269631184, + "grad_norm": 0.8388446569442749, + "learning_rate": 0.00019234863561924087, + "loss": 2.8171, + "step": 2594 + }, + { + "epoch": 0.20942619643289484, + "grad_norm": 0.8003209829330444, + "learning_rate": 0.00019234257812087425, + "loss": 2.7385, + "step": 2595 + }, + { + "epoch": 0.20950690016947784, + "grad_norm": 0.8008458018302917, + "learning_rate": 0.00019233651832108918, + "loss": 2.7366, + "step": 2596 + }, + { + "epoch": 0.20958760390606085, + "grad_norm": 0.7701897025108337, + "learning_rate": 0.00019233045622003676, + "loss": 2.69, + "step": 2597 + }, + { + "epoch": 0.20966830764264385, + "grad_norm": 0.8106730580329895, + "learning_rate": 0.00019232439181786796, + "loss": 2.6911, + "step": 2598 + }, + { + "epoch": 0.20974901137922686, + "grad_norm": 0.9580766558647156, + "learning_rate": 0.00019231832511473401, + "loss": 2.7663, + "step": 2599 + }, + { + "epoch": 0.20982971511580986, + "grad_norm": 0.7851876616477966, + "learning_rate": 0.0001923122561107861, + "loss": 2.7632, + "step": 2600 + }, + { + "epoch": 0.20991041885239287, + "grad_norm": 0.8160942196846008, + "learning_rate": 0.0001923061848061754, + "loss": 2.8533, + "step": 2601 + }, + { + "epoch": 0.20999112258897587, + "grad_norm": 0.8540663719177246, + "learning_rate": 0.00019230011120105334, + "loss": 2.7083, + "step": 2602 + }, + { + "epoch": 0.21007182632555887, + "grad_norm": 0.8273833394050598, + "learning_rate": 0.0001922940352955712, + "loss": 2.7916, + "step": 2603 + }, + { + "epoch": 0.21015253006214188, + "grad_norm": 0.8394255638122559, + "learning_rate": 0.00019228795708988046, + "loss": 2.8561, + "step": 2604 + }, + { + "epoch": 0.21023323379872488, + "grad_norm": 0.8291410803794861, + "learning_rate": 0.00019228187658413258, + "loss": 2.7462, + "step": 2605 + }, + { + "epoch": 0.2103139375353079, + "grad_norm": 0.7984235286712646, + "learning_rate": 0.00019227579377847912, + "loss": 2.7459, + "step": 2606 + }, + { + "epoch": 0.2103946412718909, + "grad_norm": 0.8343340158462524, + "learning_rate": 0.00019226970867307163, + "loss": 2.6963, + "step": 2607 + }, + { + "epoch": 0.2104753450084739, + "grad_norm": 0.6982808709144592, + "learning_rate": 0.00019226362126806184, + "loss": 2.7333, + "step": 2608 + }, + { + "epoch": 0.2105560487450569, + "grad_norm": 0.8039572834968567, + "learning_rate": 0.0001922575315636014, + "loss": 2.7253, + "step": 2609 + }, + { + "epoch": 0.2106367524816399, + "grad_norm": 0.8708705902099609, + "learning_rate": 0.00019225143955984214, + "loss": 2.7555, + "step": 2610 + }, + { + "epoch": 0.2107174562182229, + "grad_norm": 0.8773347735404968, + "learning_rate": 0.00019224534525693585, + "loss": 2.7598, + "step": 2611 + }, + { + "epoch": 0.2107981599548059, + "grad_norm": 0.8151054978370667, + "learning_rate": 0.0001922392486550344, + "loss": 2.7398, + "step": 2612 + }, + { + "epoch": 0.21087886369138892, + "grad_norm": 0.7922329306602478, + "learning_rate": 0.0001922331497542898, + "loss": 2.7296, + "step": 2613 + }, + { + "epoch": 0.21095956742797192, + "grad_norm": 0.7536506652832031, + "learning_rate": 0.00019222704855485396, + "loss": 2.7897, + "step": 2614 + }, + { + "epoch": 0.21104027116455493, + "grad_norm": 0.7539274096488953, + "learning_rate": 0.000192220945056879, + "loss": 2.7809, + "step": 2615 + }, + { + "epoch": 0.21112097490113793, + "grad_norm": 0.7737646698951721, + "learning_rate": 0.00019221483926051705, + "loss": 2.7195, + "step": 2616 + }, + { + "epoch": 0.21120167863772094, + "grad_norm": 0.7421913743019104, + "learning_rate": 0.00019220873116592024, + "loss": 2.6817, + "step": 2617 + }, + { + "epoch": 0.21128238237430394, + "grad_norm": 0.7872927784919739, + "learning_rate": 0.0001922026207732408, + "loss": 2.7379, + "step": 2618 + }, + { + "epoch": 0.21136308611088694, + "grad_norm": 0.7950671315193176, + "learning_rate": 0.00019219650808263104, + "loss": 2.7135, + "step": 2619 + }, + { + "epoch": 0.21144378984746995, + "grad_norm": 0.7711792588233948, + "learning_rate": 0.0001921903930942433, + "loss": 2.7021, + "step": 2620 + }, + { + "epoch": 0.21152449358405295, + "grad_norm": 0.9030743837356567, + "learning_rate": 0.00019218427580822996, + "loss": 2.8083, + "step": 2621 + }, + { + "epoch": 0.21160519732063596, + "grad_norm": 0.8191907405853271, + "learning_rate": 0.0001921781562247435, + "loss": 2.6998, + "step": 2622 + }, + { + "epoch": 0.21168590105721896, + "grad_norm": 0.7883538603782654, + "learning_rate": 0.00019217203434393644, + "loss": 2.7573, + "step": 2623 + }, + { + "epoch": 0.21176660479380197, + "grad_norm": 0.7565868496894836, + "learning_rate": 0.00019216591016596134, + "loss": 2.7725, + "step": 2624 + }, + { + "epoch": 0.21184730853038497, + "grad_norm": 0.8579828143119812, + "learning_rate": 0.00019215978369097086, + "loss": 2.7529, + "step": 2625 + }, + { + "epoch": 0.21192801226696797, + "grad_norm": 0.7835422158241272, + "learning_rate": 0.0001921536549191176, + "loss": 2.6926, + "step": 2626 + }, + { + "epoch": 0.21200871600355095, + "grad_norm": 0.8041907548904419, + "learning_rate": 0.00019214752385055442, + "loss": 2.7541, + "step": 2627 + }, + { + "epoch": 0.21208941974013396, + "grad_norm": 0.7754014730453491, + "learning_rate": 0.00019214139048543406, + "loss": 2.6807, + "step": 2628 + }, + { + "epoch": 0.21217012347671696, + "grad_norm": 0.8222344517707825, + "learning_rate": 0.00019213525482390936, + "loss": 2.7339, + "step": 2629 + }, + { + "epoch": 0.21225082721329996, + "grad_norm": 0.8083673715591431, + "learning_rate": 0.0001921291168661333, + "loss": 2.739, + "step": 2630 + }, + { + "epoch": 0.21233153094988297, + "grad_norm": 0.8039100766181946, + "learning_rate": 0.0001921229766122588, + "loss": 2.7372, + "step": 2631 + }, + { + "epoch": 0.21241223468646597, + "grad_norm": 0.7513072490692139, + "learning_rate": 0.00019211683406243892, + "loss": 2.7284, + "step": 2632 + }, + { + "epoch": 0.21249293842304898, + "grad_norm": 0.7653890252113342, + "learning_rate": 0.00019211068921682673, + "loss": 2.6911, + "step": 2633 + }, + { + "epoch": 0.21257364215963198, + "grad_norm": 0.7210217714309692, + "learning_rate": 0.00019210454207557542, + "loss": 2.6989, + "step": 2634 + }, + { + "epoch": 0.21265434589621499, + "grad_norm": 0.7389202117919922, + "learning_rate": 0.00019209839263883814, + "loss": 2.7016, + "step": 2635 + }, + { + "epoch": 0.212735049632798, + "grad_norm": 0.8069031238555908, + "learning_rate": 0.00019209224090676813, + "loss": 2.8213, + "step": 2636 + }, + { + "epoch": 0.212815753369381, + "grad_norm": 0.8019161224365234, + "learning_rate": 0.00019208608687951877, + "loss": 2.7413, + "step": 2637 + }, + { + "epoch": 0.212896457105964, + "grad_norm": 0.775572657585144, + "learning_rate": 0.00019207993055724343, + "loss": 2.7016, + "step": 2638 + }, + { + "epoch": 0.212977160842547, + "grad_norm": 0.7482941746711731, + "learning_rate": 0.0001920737719400955, + "loss": 2.7991, + "step": 2639 + }, + { + "epoch": 0.21305786457913, + "grad_norm": 0.8467636704444885, + "learning_rate": 0.0001920676110282285, + "loss": 2.7401, + "step": 2640 + }, + { + "epoch": 0.213138568315713, + "grad_norm": 0.8726305365562439, + "learning_rate": 0.00019206144782179597, + "loss": 2.7599, + "step": 2641 + }, + { + "epoch": 0.21321927205229602, + "grad_norm": 0.740527868270874, + "learning_rate": 0.00019205528232095148, + "loss": 2.7326, + "step": 2642 + }, + { + "epoch": 0.21329997578887902, + "grad_norm": 0.7932354211807251, + "learning_rate": 0.00019204911452584873, + "loss": 2.7873, + "step": 2643 + }, + { + "epoch": 0.21338067952546202, + "grad_norm": 0.7994125485420227, + "learning_rate": 0.00019204294443664143, + "loss": 2.7305, + "step": 2644 + }, + { + "epoch": 0.21346138326204503, + "grad_norm": 0.880557656288147, + "learning_rate": 0.00019203677205348338, + "loss": 2.7295, + "step": 2645 + }, + { + "epoch": 0.21354208699862803, + "grad_norm": 0.8269557952880859, + "learning_rate": 0.00019203059737652836, + "loss": 2.765, + "step": 2646 + }, + { + "epoch": 0.21362279073521104, + "grad_norm": 0.8732784986495972, + "learning_rate": 0.00019202442040593026, + "loss": 2.6742, + "step": 2647 + }, + { + "epoch": 0.21370349447179404, + "grad_norm": 0.7921704649925232, + "learning_rate": 0.0001920182411418431, + "loss": 2.7144, + "step": 2648 + }, + { + "epoch": 0.21378419820837705, + "grad_norm": 0.8097628355026245, + "learning_rate": 0.00019201205958442082, + "loss": 2.7513, + "step": 2649 + }, + { + "epoch": 0.21386490194496005, + "grad_norm": 0.8230542540550232, + "learning_rate": 0.00019200587573381744, + "loss": 2.7648, + "step": 2650 + }, + { + "epoch": 0.21394560568154306, + "grad_norm": 0.7719153761863708, + "learning_rate": 0.0001919996895901872, + "loss": 2.7637, + "step": 2651 + }, + { + "epoch": 0.21402630941812606, + "grad_norm": 0.9022669792175293, + "learning_rate": 0.00019199350115368415, + "loss": 2.7707, + "step": 2652 + }, + { + "epoch": 0.21410701315470906, + "grad_norm": 0.8111257553100586, + "learning_rate": 0.00019198731042446263, + "loss": 2.7423, + "step": 2653 + }, + { + "epoch": 0.21418771689129207, + "grad_norm": 0.7534981966018677, + "learning_rate": 0.00019198111740267683, + "loss": 2.7474, + "step": 2654 + }, + { + "epoch": 0.21426842062787507, + "grad_norm": 0.761411190032959, + "learning_rate": 0.00019197492208848117, + "loss": 2.7541, + "step": 2655 + }, + { + "epoch": 0.21434912436445808, + "grad_norm": 0.8076324462890625, + "learning_rate": 0.00019196872448203002, + "loss": 2.7198, + "step": 2656 + }, + { + "epoch": 0.21442982810104108, + "grad_norm": 0.7987746000289917, + "learning_rate": 0.00019196252458347784, + "loss": 2.7164, + "step": 2657 + }, + { + "epoch": 0.21451053183762409, + "grad_norm": 0.7581545114517212, + "learning_rate": 0.0001919563223929792, + "loss": 2.6837, + "step": 2658 + }, + { + "epoch": 0.2145912355742071, + "grad_norm": 0.8773601055145264, + "learning_rate": 0.00019195011791068857, + "loss": 2.8248, + "step": 2659 + }, + { + "epoch": 0.2146719393107901, + "grad_norm": 0.7027503252029419, + "learning_rate": 0.00019194391113676066, + "loss": 2.6726, + "step": 2660 + }, + { + "epoch": 0.2147526430473731, + "grad_norm": 0.8650866746902466, + "learning_rate": 0.00019193770207135015, + "loss": 2.7348, + "step": 2661 + }, + { + "epoch": 0.2148333467839561, + "grad_norm": 0.8521862030029297, + "learning_rate": 0.0001919314907146118, + "loss": 2.7409, + "step": 2662 + }, + { + "epoch": 0.2149140505205391, + "grad_norm": 0.8098535537719727, + "learning_rate": 0.00019192527706670033, + "loss": 2.7615, + "step": 2663 + }, + { + "epoch": 0.2149947542571221, + "grad_norm": 0.7396193146705627, + "learning_rate": 0.0001919190611277707, + "loss": 2.7191, + "step": 2664 + }, + { + "epoch": 0.21507545799370512, + "grad_norm": 0.8245799541473389, + "learning_rate": 0.00019191284289797776, + "loss": 2.7429, + "step": 2665 + }, + { + "epoch": 0.21515616173028812, + "grad_norm": 0.791646420955658, + "learning_rate": 0.00019190662237747656, + "loss": 2.7197, + "step": 2666 + }, + { + "epoch": 0.21523686546687112, + "grad_norm": 0.7850802540779114, + "learning_rate": 0.00019190039956642205, + "loss": 2.7353, + "step": 2667 + }, + { + "epoch": 0.21531756920345413, + "grad_norm": 0.7657971978187561, + "learning_rate": 0.00019189417446496937, + "loss": 2.7083, + "step": 2668 + }, + { + "epoch": 0.21539827294003713, + "grad_norm": 0.7704403400421143, + "learning_rate": 0.00019188794707327363, + "loss": 2.7813, + "step": 2669 + }, + { + "epoch": 0.21547897667662014, + "grad_norm": 0.7345917224884033, + "learning_rate": 0.00019188171739149005, + "loss": 2.7098, + "step": 2670 + }, + { + "epoch": 0.21555968041320314, + "grad_norm": 0.728831946849823, + "learning_rate": 0.00019187548541977392, + "loss": 2.6745, + "step": 2671 + }, + { + "epoch": 0.21564038414978615, + "grad_norm": 0.8079627156257629, + "learning_rate": 0.0001918692511582805, + "loss": 2.6427, + "step": 2672 + }, + { + "epoch": 0.21572108788636915, + "grad_norm": 0.766808032989502, + "learning_rate": 0.0001918630146071652, + "loss": 2.6956, + "step": 2673 + }, + { + "epoch": 0.21580179162295215, + "grad_norm": 0.7555391192436218, + "learning_rate": 0.00019185677576658345, + "loss": 2.6499, + "step": 2674 + }, + { + "epoch": 0.21588249535953516, + "grad_norm": 0.7740229964256287, + "learning_rate": 0.00019185053463669074, + "loss": 2.7685, + "step": 2675 + }, + { + "epoch": 0.21596319909611816, + "grad_norm": 0.8272803425788879, + "learning_rate": 0.00019184429121764257, + "loss": 2.7272, + "step": 2676 + }, + { + "epoch": 0.21604390283270117, + "grad_norm": 0.870625376701355, + "learning_rate": 0.00019183804550959463, + "loss": 2.7509, + "step": 2677 + }, + { + "epoch": 0.21612460656928414, + "grad_norm": 0.8021238446235657, + "learning_rate": 0.0001918317975127025, + "loss": 2.7058, + "step": 2678 + }, + { + "epoch": 0.21620531030586715, + "grad_norm": 0.729918897151947, + "learning_rate": 0.00019182554722712192, + "loss": 2.6145, + "step": 2679 + }, + { + "epoch": 0.21628601404245015, + "grad_norm": 0.7658380270004272, + "learning_rate": 0.00019181929465300867, + "loss": 2.712, + "step": 2680 + }, + { + "epoch": 0.21636671777903316, + "grad_norm": 0.7702174186706543, + "learning_rate": 0.00019181303979051858, + "loss": 2.8257, + "step": 2681 + }, + { + "epoch": 0.21644742151561616, + "grad_norm": 0.7782231569290161, + "learning_rate": 0.00019180678263980755, + "loss": 2.8226, + "step": 2682 + }, + { + "epoch": 0.21652812525219917, + "grad_norm": 0.7448495626449585, + "learning_rate": 0.0001918005232010315, + "loss": 2.7877, + "step": 2683 + }, + { + "epoch": 0.21660882898878217, + "grad_norm": 0.7273527979850769, + "learning_rate": 0.00019179426147434647, + "loss": 2.7169, + "step": 2684 + }, + { + "epoch": 0.21668953272536517, + "grad_norm": 0.7730992436408997, + "learning_rate": 0.00019178799745990846, + "loss": 2.717, + "step": 2685 + }, + { + "epoch": 0.21677023646194818, + "grad_norm": 0.7709231376647949, + "learning_rate": 0.0001917817311578736, + "loss": 2.7676, + "step": 2686 + }, + { + "epoch": 0.21685094019853118, + "grad_norm": 0.7825181484222412, + "learning_rate": 0.00019177546256839812, + "loss": 2.7473, + "step": 2687 + }, + { + "epoch": 0.2169316439351142, + "grad_norm": 0.8133581280708313, + "learning_rate": 0.0001917691916916382, + "loss": 2.7242, + "step": 2688 + }, + { + "epoch": 0.2170123476716972, + "grad_norm": 0.7833015322685242, + "learning_rate": 0.00019176291852775011, + "loss": 2.8128, + "step": 2689 + }, + { + "epoch": 0.2170930514082802, + "grad_norm": 0.7423487305641174, + "learning_rate": 0.00019175664307689028, + "loss": 2.6999, + "step": 2690 + }, + { + "epoch": 0.2171737551448632, + "grad_norm": 0.7881289124488831, + "learning_rate": 0.000191750365339215, + "loss": 2.7349, + "step": 2691 + }, + { + "epoch": 0.2172544588814462, + "grad_norm": 0.8316197395324707, + "learning_rate": 0.00019174408531488077, + "loss": 2.7654, + "step": 2692 + }, + { + "epoch": 0.2173351626180292, + "grad_norm": 0.7589917778968811, + "learning_rate": 0.00019173780300404413, + "loss": 2.6815, + "step": 2693 + }, + { + "epoch": 0.21741586635461221, + "grad_norm": 0.7752439975738525, + "learning_rate": 0.00019173151840686163, + "loss": 2.7804, + "step": 2694 + }, + { + "epoch": 0.21749657009119522, + "grad_norm": 0.8156552910804749, + "learning_rate": 0.0001917252315234899, + "loss": 2.7325, + "step": 2695 + }, + { + "epoch": 0.21757727382777822, + "grad_norm": 0.8886982798576355, + "learning_rate": 0.00019171894235408564, + "loss": 2.7257, + "step": 2696 + }, + { + "epoch": 0.21765797756436123, + "grad_norm": 0.8270704746246338, + "learning_rate": 0.00019171265089880558, + "loss": 2.7357, + "step": 2697 + }, + { + "epoch": 0.21773868130094423, + "grad_norm": 0.807700514793396, + "learning_rate": 0.00019170635715780651, + "loss": 2.7488, + "step": 2698 + }, + { + "epoch": 0.21781938503752724, + "grad_norm": 0.8195288181304932, + "learning_rate": 0.00019170006113124533, + "loss": 2.7048, + "step": 2699 + }, + { + "epoch": 0.21790008877411024, + "grad_norm": 0.817097008228302, + "learning_rate": 0.00019169376281927888, + "loss": 2.7148, + "step": 2700 + }, + { + "epoch": 0.21798079251069324, + "grad_norm": 0.8415588140487671, + "learning_rate": 0.0001916874622220642, + "loss": 2.7376, + "step": 2701 + }, + { + "epoch": 0.21806149624727625, + "grad_norm": 0.8004198670387268, + "learning_rate": 0.00019168115933975826, + "loss": 2.7145, + "step": 2702 + }, + { + "epoch": 0.21814219998385925, + "grad_norm": 0.8167368769645691, + "learning_rate": 0.0001916748541725182, + "loss": 2.6923, + "step": 2703 + }, + { + "epoch": 0.21822290372044226, + "grad_norm": 0.8877980709075928, + "learning_rate": 0.0001916685467205011, + "loss": 2.8232, + "step": 2704 + }, + { + "epoch": 0.21830360745702526, + "grad_norm": 0.7835622429847717, + "learning_rate": 0.00019166223698386422, + "loss": 2.7797, + "step": 2705 + }, + { + "epoch": 0.21838431119360827, + "grad_norm": 0.8023552894592285, + "learning_rate": 0.00019165592496276477, + "loss": 2.6697, + "step": 2706 + }, + { + "epoch": 0.21846501493019127, + "grad_norm": 0.8549069166183472, + "learning_rate": 0.00019164961065736008, + "loss": 2.729, + "step": 2707 + }, + { + "epoch": 0.21854571866677427, + "grad_norm": 0.8561950325965881, + "learning_rate": 0.00019164329406780753, + "loss": 2.772, + "step": 2708 + }, + { + "epoch": 0.21862642240335728, + "grad_norm": 0.6979276537895203, + "learning_rate": 0.00019163697519426453, + "loss": 2.7195, + "step": 2709 + }, + { + "epoch": 0.21870712613994028, + "grad_norm": 0.7659175395965576, + "learning_rate": 0.00019163065403688856, + "loss": 2.7742, + "step": 2710 + }, + { + "epoch": 0.2187878298765233, + "grad_norm": 0.8621466755867004, + "learning_rate": 0.00019162433059583718, + "loss": 2.721, + "step": 2711 + }, + { + "epoch": 0.2188685336131063, + "grad_norm": 0.8086833357810974, + "learning_rate": 0.00019161800487126795, + "loss": 2.7356, + "step": 2712 + }, + { + "epoch": 0.2189492373496893, + "grad_norm": 0.816215455532074, + "learning_rate": 0.00019161167686333855, + "loss": 2.7159, + "step": 2713 + }, + { + "epoch": 0.2190299410862723, + "grad_norm": 0.9180822968482971, + "learning_rate": 0.0001916053465722067, + "loss": 2.7162, + "step": 2714 + }, + { + "epoch": 0.2191106448228553, + "grad_norm": 0.7547199130058289, + "learning_rate": 0.00019159901399803014, + "loss": 2.7338, + "step": 2715 + }, + { + "epoch": 0.2191913485594383, + "grad_norm": 0.7380769848823547, + "learning_rate": 0.00019159267914096675, + "loss": 2.7149, + "step": 2716 + }, + { + "epoch": 0.2192720522960213, + "grad_norm": 0.7242285013198853, + "learning_rate": 0.00019158634200117433, + "loss": 2.724, + "step": 2717 + }, + { + "epoch": 0.21935275603260432, + "grad_norm": 0.8400316834449768, + "learning_rate": 0.00019158000257881087, + "loss": 2.7528, + "step": 2718 + }, + { + "epoch": 0.21943345976918732, + "grad_norm": 0.8437172770500183, + "learning_rate": 0.00019157366087403435, + "loss": 2.7872, + "step": 2719 + }, + { + "epoch": 0.21951416350577033, + "grad_norm": 0.7428301572799683, + "learning_rate": 0.00019156731688700282, + "loss": 2.6831, + "step": 2720 + }, + { + "epoch": 0.21959486724235333, + "grad_norm": 0.7589641213417053, + "learning_rate": 0.00019156097061787445, + "loss": 2.7105, + "step": 2721 + }, + { + "epoch": 0.21967557097893634, + "grad_norm": 0.7607305645942688, + "learning_rate": 0.00019155462206680727, + "loss": 2.7913, + "step": 2722 + }, + { + "epoch": 0.21975627471551934, + "grad_norm": 0.7455689311027527, + "learning_rate": 0.00019154827123395963, + "loss": 2.6321, + "step": 2723 + }, + { + "epoch": 0.21983697845210234, + "grad_norm": 0.7860318422317505, + "learning_rate": 0.00019154191811948974, + "loss": 2.7907, + "step": 2724 + }, + { + "epoch": 0.21991768218868535, + "grad_norm": 0.8101385235786438, + "learning_rate": 0.00019153556272355596, + "loss": 2.7682, + "step": 2725 + }, + { + "epoch": 0.21999838592526835, + "grad_norm": 0.7437283396720886, + "learning_rate": 0.00019152920504631667, + "loss": 2.7271, + "step": 2726 + }, + { + "epoch": 0.22007908966185136, + "grad_norm": 0.7390851974487305, + "learning_rate": 0.00019152284508793034, + "loss": 2.7492, + "step": 2727 + }, + { + "epoch": 0.22015979339843436, + "grad_norm": 0.9074966311454773, + "learning_rate": 0.0001915164828485555, + "loss": 2.8076, + "step": 2728 + }, + { + "epoch": 0.22024049713501734, + "grad_norm": 0.7644218802452087, + "learning_rate": 0.00019151011832835063, + "loss": 2.7238, + "step": 2729 + }, + { + "epoch": 0.22032120087160034, + "grad_norm": 0.823567807674408, + "learning_rate": 0.0001915037515274744, + "loss": 2.7701, + "step": 2730 + }, + { + "epoch": 0.22040190460818335, + "grad_norm": 0.7601858377456665, + "learning_rate": 0.00019149738244608552, + "loss": 2.6981, + "step": 2731 + }, + { + "epoch": 0.22048260834476635, + "grad_norm": 0.8242961764335632, + "learning_rate": 0.00019149101108434269, + "loss": 2.6916, + "step": 2732 + }, + { + "epoch": 0.22056331208134936, + "grad_norm": 0.7970656156539917, + "learning_rate": 0.0001914846374424047, + "loss": 2.7858, + "step": 2733 + }, + { + "epoch": 0.22064401581793236, + "grad_norm": 0.7844050526618958, + "learning_rate": 0.0001914782615204304, + "loss": 2.6782, + "step": 2734 + }, + { + "epoch": 0.22072471955451536, + "grad_norm": 0.7965044975280762, + "learning_rate": 0.00019147188331857868, + "loss": 2.7563, + "step": 2735 + }, + { + "epoch": 0.22080542329109837, + "grad_norm": 0.8189071416854858, + "learning_rate": 0.00019146550283700856, + "loss": 2.7587, + "step": 2736 + }, + { + "epoch": 0.22088612702768137, + "grad_norm": 0.7610960602760315, + "learning_rate": 0.00019145912007587898, + "loss": 2.663, + "step": 2737 + }, + { + "epoch": 0.22096683076426438, + "grad_norm": 0.7642313838005066, + "learning_rate": 0.00019145273503534907, + "loss": 2.78, + "step": 2738 + }, + { + "epoch": 0.22104753450084738, + "grad_norm": 0.7699539065361023, + "learning_rate": 0.0001914463477155779, + "loss": 2.7429, + "step": 2739 + }, + { + "epoch": 0.22112823823743039, + "grad_norm": 0.7674413919448853, + "learning_rate": 0.00019143995811672477, + "loss": 2.7048, + "step": 2740 + }, + { + "epoch": 0.2212089419740134, + "grad_norm": 0.7871866226196289, + "learning_rate": 0.00019143356623894882, + "loss": 2.7769, + "step": 2741 + }, + { + "epoch": 0.2212896457105964, + "grad_norm": 0.8453468680381775, + "learning_rate": 0.00019142717208240937, + "loss": 2.7677, + "step": 2742 + }, + { + "epoch": 0.2213703494471794, + "grad_norm": 0.8050780892372131, + "learning_rate": 0.00019142077564726582, + "loss": 2.7809, + "step": 2743 + }, + { + "epoch": 0.2214510531837624, + "grad_norm": 0.811287522315979, + "learning_rate": 0.0001914143769336776, + "loss": 2.7201, + "step": 2744 + }, + { + "epoch": 0.2215317569203454, + "grad_norm": 0.823106050491333, + "learning_rate": 0.00019140797594180412, + "loss": 2.7371, + "step": 2745 + }, + { + "epoch": 0.2216124606569284, + "grad_norm": 0.778126060962677, + "learning_rate": 0.0001914015726718049, + "loss": 2.6925, + "step": 2746 + }, + { + "epoch": 0.22169316439351142, + "grad_norm": 0.8240278959274292, + "learning_rate": 0.0001913951671238396, + "loss": 2.7227, + "step": 2747 + }, + { + "epoch": 0.22177386813009442, + "grad_norm": 0.8061805963516235, + "learning_rate": 0.0001913887592980678, + "loss": 2.7092, + "step": 2748 + }, + { + "epoch": 0.22185457186667742, + "grad_norm": 0.9111800789833069, + "learning_rate": 0.00019138234919464925, + "loss": 2.7364, + "step": 2749 + }, + { + "epoch": 0.22193527560326043, + "grad_norm": 0.8154863715171814, + "learning_rate": 0.0001913759368137437, + "loss": 2.6983, + "step": 2750 + }, + { + "epoch": 0.22201597933984343, + "grad_norm": 0.8547734022140503, + "learning_rate": 0.0001913695221555109, + "loss": 2.7016, + "step": 2751 + }, + { + "epoch": 0.22209668307642644, + "grad_norm": 0.7488531470298767, + "learning_rate": 0.00019136310522011079, + "loss": 2.6641, + "step": 2752 + }, + { + "epoch": 0.22217738681300944, + "grad_norm": 0.9118027091026306, + "learning_rate": 0.00019135668600770326, + "loss": 2.6965, + "step": 2753 + }, + { + "epoch": 0.22225809054959245, + "grad_norm": 0.7629117369651794, + "learning_rate": 0.00019135026451844834, + "loss": 2.7836, + "step": 2754 + }, + { + "epoch": 0.22233879428617545, + "grad_norm": 0.8081222176551819, + "learning_rate": 0.000191343840752506, + "loss": 2.7339, + "step": 2755 + }, + { + "epoch": 0.22241949802275846, + "grad_norm": 0.9143899083137512, + "learning_rate": 0.00019133741471003636, + "loss": 2.7051, + "step": 2756 + }, + { + "epoch": 0.22250020175934146, + "grad_norm": 0.8096790909767151, + "learning_rate": 0.00019133098639119962, + "loss": 2.6884, + "step": 2757 + }, + { + "epoch": 0.22258090549592446, + "grad_norm": 0.7959297895431519, + "learning_rate": 0.00019132455579615597, + "loss": 2.7127, + "step": 2758 + }, + { + "epoch": 0.22266160923250747, + "grad_norm": 0.7111356854438782, + "learning_rate": 0.00019131812292506563, + "loss": 2.7418, + "step": 2759 + }, + { + "epoch": 0.22274231296909047, + "grad_norm": 0.7584012150764465, + "learning_rate": 0.00019131168777808898, + "loss": 2.6705, + "step": 2760 + }, + { + "epoch": 0.22282301670567348, + "grad_norm": 0.7646663784980774, + "learning_rate": 0.0001913052503553864, + "loss": 2.7166, + "step": 2761 + }, + { + "epoch": 0.22290372044225648, + "grad_norm": 0.7643954157829285, + "learning_rate": 0.00019129881065711827, + "loss": 2.7967, + "step": 2762 + }, + { + "epoch": 0.22298442417883949, + "grad_norm": 0.7591429948806763, + "learning_rate": 0.0001912923686834451, + "loss": 2.6611, + "step": 2763 + }, + { + "epoch": 0.2230651279154225, + "grad_norm": 0.7182386517524719, + "learning_rate": 0.00019128592443452749, + "loss": 2.6808, + "step": 2764 + }, + { + "epoch": 0.2231458316520055, + "grad_norm": 0.7689648270606995, + "learning_rate": 0.00019127947791052602, + "loss": 2.7288, + "step": 2765 + }, + { + "epoch": 0.2232265353885885, + "grad_norm": 0.7851321697235107, + "learning_rate": 0.00019127302911160136, + "loss": 2.7227, + "step": 2766 + }, + { + "epoch": 0.2233072391251715, + "grad_norm": 0.8419411182403564, + "learning_rate": 0.00019126657803791424, + "loss": 2.7397, + "step": 2767 + }, + { + "epoch": 0.2233879428617545, + "grad_norm": 0.7657596468925476, + "learning_rate": 0.0001912601246896254, + "loss": 2.7223, + "step": 2768 + }, + { + "epoch": 0.2234686465983375, + "grad_norm": 0.8033619523048401, + "learning_rate": 0.00019125366906689567, + "loss": 2.7256, + "step": 2769 + }, + { + "epoch": 0.22354935033492052, + "grad_norm": 0.7784682512283325, + "learning_rate": 0.00019124721116988601, + "loss": 2.7692, + "step": 2770 + }, + { + "epoch": 0.22363005407150352, + "grad_norm": 0.7842707633972168, + "learning_rate": 0.00019124075099875731, + "loss": 2.7707, + "step": 2771 + }, + { + "epoch": 0.22371075780808652, + "grad_norm": 0.7864845395088196, + "learning_rate": 0.0001912342885536706, + "loss": 2.6912, + "step": 2772 + }, + { + "epoch": 0.22379146154466953, + "grad_norm": 0.8544312715530396, + "learning_rate": 0.0001912278238347869, + "loss": 2.8345, + "step": 2773 + }, + { + "epoch": 0.22387216528125253, + "grad_norm": 0.7210882306098938, + "learning_rate": 0.0001912213568422674, + "loss": 2.6933, + "step": 2774 + }, + { + "epoch": 0.22395286901783554, + "grad_norm": 0.8877022862434387, + "learning_rate": 0.00019121488757627318, + "loss": 2.7583, + "step": 2775 + }, + { + "epoch": 0.22403357275441854, + "grad_norm": 0.902886688709259, + "learning_rate": 0.00019120841603696554, + "loss": 2.8, + "step": 2776 + }, + { + "epoch": 0.22411427649100155, + "grad_norm": 0.771294355392456, + "learning_rate": 0.0001912019422245058, + "loss": 2.7712, + "step": 2777 + }, + { + "epoch": 0.22419498022758455, + "grad_norm": 0.7973463535308838, + "learning_rate": 0.0001911954661390552, + "loss": 2.7368, + "step": 2778 + }, + { + "epoch": 0.22427568396416755, + "grad_norm": 0.776836633682251, + "learning_rate": 0.00019118898778077524, + "loss": 2.7126, + "step": 2779 + }, + { + "epoch": 0.22435638770075053, + "grad_norm": 0.8286641240119934, + "learning_rate": 0.00019118250714982731, + "loss": 2.7148, + "step": 2780 + }, + { + "epoch": 0.22443709143733354, + "grad_norm": 0.7848700284957886, + "learning_rate": 0.00019117602424637294, + "loss": 2.7284, + "step": 2781 + }, + { + "epoch": 0.22451779517391654, + "grad_norm": 0.7658216953277588, + "learning_rate": 0.0001911695390705737, + "loss": 2.7186, + "step": 2782 + }, + { + "epoch": 0.22459849891049954, + "grad_norm": 0.7596792578697205, + "learning_rate": 0.00019116305162259124, + "loss": 2.6854, + "step": 2783 + }, + { + "epoch": 0.22467920264708255, + "grad_norm": 0.7901157140731812, + "learning_rate": 0.00019115656190258726, + "loss": 2.7347, + "step": 2784 + }, + { + "epoch": 0.22475990638366555, + "grad_norm": 0.7499287128448486, + "learning_rate": 0.00019115006991072346, + "loss": 2.7219, + "step": 2785 + }, + { + "epoch": 0.22484061012024856, + "grad_norm": 0.7427374124526978, + "learning_rate": 0.00019114357564716162, + "loss": 2.7147, + "step": 2786 + }, + { + "epoch": 0.22492131385683156, + "grad_norm": 0.8305855393409729, + "learning_rate": 0.00019113707911206363, + "loss": 2.7587, + "step": 2787 + }, + { + "epoch": 0.22500201759341457, + "grad_norm": 0.8266459703445435, + "learning_rate": 0.00019113058030559142, + "loss": 2.7275, + "step": 2788 + }, + { + "epoch": 0.22508272132999757, + "grad_norm": 0.7338323593139648, + "learning_rate": 0.0001911240792279069, + "loss": 2.762, + "step": 2789 + }, + { + "epoch": 0.22516342506658057, + "grad_norm": 0.7653434872627258, + "learning_rate": 0.00019111757587917216, + "loss": 2.6715, + "step": 2790 + }, + { + "epoch": 0.22524412880316358, + "grad_norm": 0.76301509141922, + "learning_rate": 0.00019111107025954923, + "loss": 2.698, + "step": 2791 + }, + { + "epoch": 0.22532483253974658, + "grad_norm": 0.7810547947883606, + "learning_rate": 0.00019110456236920024, + "loss": 2.7295, + "step": 2792 + }, + { + "epoch": 0.2254055362763296, + "grad_norm": 0.7885214686393738, + "learning_rate": 0.00019109805220828742, + "loss": 2.7724, + "step": 2793 + }, + { + "epoch": 0.2254862400129126, + "grad_norm": 0.8087031841278076, + "learning_rate": 0.00019109153977697301, + "loss": 2.7888, + "step": 2794 + }, + { + "epoch": 0.2255669437494956, + "grad_norm": 0.795101523399353, + "learning_rate": 0.00019108502507541933, + "loss": 2.6815, + "step": 2795 + }, + { + "epoch": 0.2256476474860786, + "grad_norm": 0.8337482213973999, + "learning_rate": 0.0001910785081037887, + "loss": 2.8192, + "step": 2796 + }, + { + "epoch": 0.2257283512226616, + "grad_norm": 0.8357288241386414, + "learning_rate": 0.00019107198886224357, + "loss": 2.7867, + "step": 2797 + }, + { + "epoch": 0.2258090549592446, + "grad_norm": 0.80678391456604, + "learning_rate": 0.00019106546735094644, + "loss": 2.7313, + "step": 2798 + }, + { + "epoch": 0.2258897586958276, + "grad_norm": 0.7481401562690735, + "learning_rate": 0.00019105894357005979, + "loss": 2.7073, + "step": 2799 + }, + { + "epoch": 0.22597046243241062, + "grad_norm": 0.8025074005126953, + "learning_rate": 0.00019105241751974622, + "loss": 2.6922, + "step": 2800 + }, + { + "epoch": 0.22605116616899362, + "grad_norm": 0.7308986186981201, + "learning_rate": 0.00019104588920016842, + "loss": 2.7511, + "step": 2801 + }, + { + "epoch": 0.22613186990557663, + "grad_norm": 0.7727689146995544, + "learning_rate": 0.00019103935861148905, + "loss": 2.707, + "step": 2802 + }, + { + "epoch": 0.22621257364215963, + "grad_norm": 0.8611076474189758, + "learning_rate": 0.0001910328257538709, + "loss": 2.8494, + "step": 2803 + }, + { + "epoch": 0.22629327737874264, + "grad_norm": 0.8487605452537537, + "learning_rate": 0.00019102629062747677, + "loss": 2.7698, + "step": 2804 + }, + { + "epoch": 0.22637398111532564, + "grad_norm": 0.7495502233505249, + "learning_rate": 0.00019101975323246952, + "loss": 2.7091, + "step": 2805 + }, + { + "epoch": 0.22645468485190864, + "grad_norm": 0.7334234118461609, + "learning_rate": 0.0001910132135690121, + "loss": 2.7375, + "step": 2806 + }, + { + "epoch": 0.22653538858849165, + "grad_norm": 0.879912257194519, + "learning_rate": 0.00019100667163726747, + "loss": 2.7278, + "step": 2807 + }, + { + "epoch": 0.22661609232507465, + "grad_norm": 0.8087306618690491, + "learning_rate": 0.0001910001274373987, + "loss": 2.8065, + "step": 2808 + }, + { + "epoch": 0.22669679606165766, + "grad_norm": 0.7548169493675232, + "learning_rate": 0.00019099358096956887, + "loss": 2.7235, + "step": 2809 + }, + { + "epoch": 0.22677749979824066, + "grad_norm": 0.7505785822868347, + "learning_rate": 0.00019098703223394118, + "loss": 2.6633, + "step": 2810 + }, + { + "epoch": 0.22685820353482367, + "grad_norm": 0.829075813293457, + "learning_rate": 0.00019098048123067875, + "loss": 2.7389, + "step": 2811 + }, + { + "epoch": 0.22693890727140667, + "grad_norm": 0.7731673121452332, + "learning_rate": 0.00019097392795994493, + "loss": 2.7639, + "step": 2812 + }, + { + "epoch": 0.22701961100798967, + "grad_norm": 0.7389004826545715, + "learning_rate": 0.00019096737242190303, + "loss": 2.717, + "step": 2813 + }, + { + "epoch": 0.22710031474457268, + "grad_norm": 0.7520460486412048, + "learning_rate": 0.0001909608146167164, + "loss": 2.7203, + "step": 2814 + }, + { + "epoch": 0.22718101848115568, + "grad_norm": 0.7272354364395142, + "learning_rate": 0.00019095425454454849, + "loss": 2.7306, + "step": 2815 + }, + { + "epoch": 0.2272617222177387, + "grad_norm": 0.7593528032302856, + "learning_rate": 0.00019094769220556282, + "loss": 2.7565, + "step": 2816 + }, + { + "epoch": 0.2273424259543217, + "grad_norm": 0.7312695384025574, + "learning_rate": 0.0001909411275999229, + "loss": 2.744, + "step": 2817 + }, + { + "epoch": 0.2274231296909047, + "grad_norm": 0.7483308911323547, + "learning_rate": 0.00019093456072779238, + "loss": 2.7938, + "step": 2818 + }, + { + "epoch": 0.2275038334274877, + "grad_norm": 0.8515620231628418, + "learning_rate": 0.00019092799158933486, + "loss": 2.7392, + "step": 2819 + }, + { + "epoch": 0.2275845371640707, + "grad_norm": 0.7119776606559753, + "learning_rate": 0.00019092142018471415, + "loss": 2.6985, + "step": 2820 + }, + { + "epoch": 0.2276652409006537, + "grad_norm": 0.7549445033073425, + "learning_rate": 0.00019091484651409394, + "loss": 2.7621, + "step": 2821 + }, + { + "epoch": 0.2277459446372367, + "grad_norm": 0.8728097081184387, + "learning_rate": 0.00019090827057763814, + "loss": 2.8321, + "step": 2822 + }, + { + "epoch": 0.22782664837381972, + "grad_norm": 0.755043089389801, + "learning_rate": 0.00019090169237551057, + "loss": 2.7341, + "step": 2823 + }, + { + "epoch": 0.22790735211040272, + "grad_norm": 0.7949401140213013, + "learning_rate": 0.00019089511190787523, + "loss": 2.7646, + "step": 2824 + }, + { + "epoch": 0.22798805584698573, + "grad_norm": 0.8027622103691101, + "learning_rate": 0.00019088852917489607, + "loss": 2.7606, + "step": 2825 + }, + { + "epoch": 0.22806875958356873, + "grad_norm": 0.8609418869018555, + "learning_rate": 0.0001908819441767372, + "loss": 2.7433, + "step": 2826 + }, + { + "epoch": 0.22814946332015174, + "grad_norm": 0.8021805882453918, + "learning_rate": 0.00019087535691356271, + "loss": 2.7723, + "step": 2827 + }, + { + "epoch": 0.22823016705673474, + "grad_norm": 0.8104252219200134, + "learning_rate": 0.00019086876738553675, + "loss": 2.7229, + "step": 2828 + }, + { + "epoch": 0.22831087079331774, + "grad_norm": 0.8714433908462524, + "learning_rate": 0.00019086217559282362, + "loss": 2.75, + "step": 2829 + }, + { + "epoch": 0.22839157452990075, + "grad_norm": 0.7598714828491211, + "learning_rate": 0.0001908555815355875, + "loss": 2.6979, + "step": 2830 + }, + { + "epoch": 0.22847227826648372, + "grad_norm": 0.859708309173584, + "learning_rate": 0.00019084898521399283, + "loss": 2.7863, + "step": 2831 + }, + { + "epoch": 0.22855298200306673, + "grad_norm": 0.7798011302947998, + "learning_rate": 0.00019084238662820397, + "loss": 2.7623, + "step": 2832 + }, + { + "epoch": 0.22863368573964973, + "grad_norm": 0.7869576811790466, + "learning_rate": 0.00019083578577838535, + "loss": 2.7341, + "step": 2833 + }, + { + "epoch": 0.22871438947623274, + "grad_norm": 0.7486738562583923, + "learning_rate": 0.0001908291826647015, + "loss": 2.7615, + "step": 2834 + }, + { + "epoch": 0.22879509321281574, + "grad_norm": 0.8270190954208374, + "learning_rate": 0.00019082257728731704, + "loss": 2.7515, + "step": 2835 + }, + { + "epoch": 0.22887579694939875, + "grad_norm": 0.9060254693031311, + "learning_rate": 0.00019081596964639648, + "loss": 2.874, + "step": 2836 + }, + { + "epoch": 0.22895650068598175, + "grad_norm": 0.7802320122718811, + "learning_rate": 0.00019080935974210458, + "loss": 2.7224, + "step": 2837 + }, + { + "epoch": 0.22903720442256476, + "grad_norm": 0.9513018131256104, + "learning_rate": 0.00019080274757460607, + "loss": 2.7168, + "step": 2838 + }, + { + "epoch": 0.22911790815914776, + "grad_norm": 0.7139711976051331, + "learning_rate": 0.0001907961331440657, + "loss": 2.676, + "step": 2839 + }, + { + "epoch": 0.22919861189573076, + "grad_norm": 0.8635632395744324, + "learning_rate": 0.00019078951645064838, + "loss": 2.6979, + "step": 2840 + }, + { + "epoch": 0.22927931563231377, + "grad_norm": 0.8823218941688538, + "learning_rate": 0.000190782897494519, + "loss": 2.7345, + "step": 2841 + }, + { + "epoch": 0.22936001936889677, + "grad_norm": 0.8139359354972839, + "learning_rate": 0.00019077627627584246, + "loss": 2.6988, + "step": 2842 + }, + { + "epoch": 0.22944072310547978, + "grad_norm": 0.8935994505882263, + "learning_rate": 0.00019076965279478383, + "loss": 2.7706, + "step": 2843 + }, + { + "epoch": 0.22952142684206278, + "grad_norm": 0.8362705111503601, + "learning_rate": 0.00019076302705150816, + "loss": 2.7593, + "step": 2844 + }, + { + "epoch": 0.22960213057864579, + "grad_norm": 0.7534157633781433, + "learning_rate": 0.00019075639904618066, + "loss": 2.7501, + "step": 2845 + }, + { + "epoch": 0.2296828343152288, + "grad_norm": 0.8826640248298645, + "learning_rate": 0.00019074976877896642, + "loss": 2.7758, + "step": 2846 + }, + { + "epoch": 0.2297635380518118, + "grad_norm": 0.8395571112632751, + "learning_rate": 0.0001907431362500307, + "loss": 2.7625, + "step": 2847 + }, + { + "epoch": 0.2298442417883948, + "grad_norm": 0.7927684783935547, + "learning_rate": 0.00019073650145953885, + "loss": 2.7392, + "step": 2848 + }, + { + "epoch": 0.2299249455249778, + "grad_norm": 0.823208749294281, + "learning_rate": 0.00019072986440765618, + "loss": 2.7259, + "step": 2849 + }, + { + "epoch": 0.2300056492615608, + "grad_norm": 0.889416515827179, + "learning_rate": 0.00019072322509454815, + "loss": 2.7539, + "step": 2850 + }, + { + "epoch": 0.2300863529981438, + "grad_norm": 0.7957748770713806, + "learning_rate": 0.0001907165835203802, + "loss": 2.7756, + "step": 2851 + }, + { + "epoch": 0.23016705673472682, + "grad_norm": 0.7924029231071472, + "learning_rate": 0.00019070993968531782, + "loss": 2.7439, + "step": 2852 + }, + { + "epoch": 0.23024776047130982, + "grad_norm": 0.7811052799224854, + "learning_rate": 0.0001907032935895266, + "loss": 2.7479, + "step": 2853 + }, + { + "epoch": 0.23032846420789282, + "grad_norm": 0.7973877191543579, + "learning_rate": 0.00019069664523317225, + "loss": 2.7502, + "step": 2854 + }, + { + "epoch": 0.23040916794447583, + "grad_norm": 0.7524267435073853, + "learning_rate": 0.0001906899946164204, + "loss": 2.75, + "step": 2855 + }, + { + "epoch": 0.23048987168105883, + "grad_norm": 0.7594791054725647, + "learning_rate": 0.00019068334173943683, + "loss": 2.6534, + "step": 2856 + }, + { + "epoch": 0.23057057541764184, + "grad_norm": 0.7253785729408264, + "learning_rate": 0.00019067668660238733, + "loss": 2.7246, + "step": 2857 + }, + { + "epoch": 0.23065127915422484, + "grad_norm": 0.788737416267395, + "learning_rate": 0.00019067002920543775, + "loss": 2.757, + "step": 2858 + }, + { + "epoch": 0.23073198289080785, + "grad_norm": 0.7577618956565857, + "learning_rate": 0.00019066336954875403, + "loss": 2.674, + "step": 2859 + }, + { + "epoch": 0.23081268662739085, + "grad_norm": 0.7682929635047913, + "learning_rate": 0.0001906567076325022, + "loss": 2.8193, + "step": 2860 + }, + { + "epoch": 0.23089339036397385, + "grad_norm": 0.7742112874984741, + "learning_rate": 0.00019065004345684817, + "loss": 2.6969, + "step": 2861 + }, + { + "epoch": 0.23097409410055686, + "grad_norm": 0.7981678247451782, + "learning_rate": 0.00019064337702195814, + "loss": 2.7681, + "step": 2862 + }, + { + "epoch": 0.23105479783713986, + "grad_norm": 0.7608500123023987, + "learning_rate": 0.00019063670832799817, + "loss": 2.7459, + "step": 2863 + }, + { + "epoch": 0.23113550157372287, + "grad_norm": 0.7563463449478149, + "learning_rate": 0.00019063003737513455, + "loss": 2.7678, + "step": 2864 + }, + { + "epoch": 0.23121620531030587, + "grad_norm": 0.7915034890174866, + "learning_rate": 0.00019062336416353343, + "loss": 2.7577, + "step": 2865 + }, + { + "epoch": 0.23129690904688888, + "grad_norm": 0.7229592204093933, + "learning_rate": 0.00019061668869336122, + "loss": 2.7308, + "step": 2866 + }, + { + "epoch": 0.23137761278347188, + "grad_norm": 0.7910905480384827, + "learning_rate": 0.00019061001096478425, + "loss": 2.7571, + "step": 2867 + }, + { + "epoch": 0.23145831652005489, + "grad_norm": 0.8474656939506531, + "learning_rate": 0.00019060333097796895, + "loss": 2.7011, + "step": 2868 + }, + { + "epoch": 0.2315390202566379, + "grad_norm": 0.8005419373512268, + "learning_rate": 0.00019059664873308178, + "loss": 2.7441, + "step": 2869 + }, + { + "epoch": 0.2316197239932209, + "grad_norm": 0.7728021740913391, + "learning_rate": 0.00019058996423028935, + "loss": 2.7753, + "step": 2870 + }, + { + "epoch": 0.2317004277298039, + "grad_norm": 0.7338094115257263, + "learning_rate": 0.00019058327746975816, + "loss": 2.7009, + "step": 2871 + }, + { + "epoch": 0.2317811314663869, + "grad_norm": 0.7746245265007019, + "learning_rate": 0.00019057658845165494, + "loss": 2.6938, + "step": 2872 + }, + { + "epoch": 0.2318618352029699, + "grad_norm": 0.7474356293678284, + "learning_rate": 0.00019056989717614636, + "loss": 2.7161, + "step": 2873 + }, + { + "epoch": 0.2319425389395529, + "grad_norm": 0.9540585279464722, + "learning_rate": 0.00019056320364339917, + "loss": 2.7753, + "step": 2874 + }, + { + "epoch": 0.23202324267613592, + "grad_norm": 0.799726665019989, + "learning_rate": 0.00019055650785358024, + "loss": 2.7301, + "step": 2875 + }, + { + "epoch": 0.23210394641271892, + "grad_norm": 0.8087828159332275, + "learning_rate": 0.0001905498098068564, + "loss": 2.7305, + "step": 2876 + }, + { + "epoch": 0.23218465014930192, + "grad_norm": 0.8177600502967834, + "learning_rate": 0.00019054310950339457, + "loss": 2.7462, + "step": 2877 + }, + { + "epoch": 0.23226535388588493, + "grad_norm": 0.7106238603591919, + "learning_rate": 0.00019053640694336181, + "loss": 2.7183, + "step": 2878 + }, + { + "epoch": 0.23234605762246793, + "grad_norm": 0.884185791015625, + "learning_rate": 0.00019052970212692514, + "loss": 2.7549, + "step": 2879 + }, + { + "epoch": 0.23242676135905094, + "grad_norm": 0.7532132267951965, + "learning_rate": 0.00019052299505425163, + "loss": 2.7524, + "step": 2880 + }, + { + "epoch": 0.23250746509563394, + "grad_norm": 0.7295021414756775, + "learning_rate": 0.00019051628572550842, + "loss": 2.6928, + "step": 2881 + }, + { + "epoch": 0.23258816883221692, + "grad_norm": 0.8475896716117859, + "learning_rate": 0.00019050957414086278, + "loss": 2.7138, + "step": 2882 + }, + { + "epoch": 0.23266887256879992, + "grad_norm": 0.7219378352165222, + "learning_rate": 0.00019050286030048198, + "loss": 2.7034, + "step": 2883 + }, + { + "epoch": 0.23274957630538293, + "grad_norm": 0.8410176634788513, + "learning_rate": 0.0001904961442045333, + "loss": 2.7413, + "step": 2884 + }, + { + "epoch": 0.23283028004196593, + "grad_norm": 0.7792301177978516, + "learning_rate": 0.00019048942585318414, + "loss": 2.6771, + "step": 2885 + }, + { + "epoch": 0.23291098377854894, + "grad_norm": 0.7457073926925659, + "learning_rate": 0.00019048270524660196, + "loss": 2.7325, + "step": 2886 + }, + { + "epoch": 0.23299168751513194, + "grad_norm": 0.8258858323097229, + "learning_rate": 0.00019047598238495424, + "loss": 2.7434, + "step": 2887 + }, + { + "epoch": 0.23307239125171494, + "grad_norm": 0.8188657164573669, + "learning_rate": 0.00019046925726840853, + "loss": 2.732, + "step": 2888 + }, + { + "epoch": 0.23315309498829795, + "grad_norm": 0.8084142208099365, + "learning_rate": 0.00019046252989713246, + "loss": 2.7537, + "step": 2889 + }, + { + "epoch": 0.23323379872488095, + "grad_norm": 0.75553297996521, + "learning_rate": 0.00019045580027129364, + "loss": 2.6685, + "step": 2890 + }, + { + "epoch": 0.23331450246146396, + "grad_norm": 0.8145995736122131, + "learning_rate": 0.00019044906839105986, + "loss": 2.7654, + "step": 2891 + }, + { + "epoch": 0.23339520619804696, + "grad_norm": 0.8433949947357178, + "learning_rate": 0.0001904423342565988, + "loss": 2.7713, + "step": 2892 + }, + { + "epoch": 0.23347590993462997, + "grad_norm": 0.7826054096221924, + "learning_rate": 0.0001904355978680784, + "loss": 2.7108, + "step": 2893 + }, + { + "epoch": 0.23355661367121297, + "grad_norm": 0.7281686663627625, + "learning_rate": 0.0001904288592256665, + "loss": 2.7606, + "step": 2894 + }, + { + "epoch": 0.23363731740779597, + "grad_norm": 0.8282813429832458, + "learning_rate": 0.00019042211832953103, + "loss": 2.6662, + "step": 2895 + }, + { + "epoch": 0.23371802114437898, + "grad_norm": 0.8227263689041138, + "learning_rate": 0.00019041537517984, + "loss": 2.7493, + "step": 2896 + }, + { + "epoch": 0.23379872488096198, + "grad_norm": 0.839350700378418, + "learning_rate": 0.0001904086297767615, + "loss": 2.7258, + "step": 2897 + }, + { + "epoch": 0.233879428617545, + "grad_norm": 0.713231086730957, + "learning_rate": 0.00019040188212046357, + "loss": 2.6722, + "step": 2898 + }, + { + "epoch": 0.233960132354128, + "grad_norm": 0.8314552903175354, + "learning_rate": 0.00019039513221111447, + "loss": 2.8509, + "step": 2899 + }, + { + "epoch": 0.234040836090711, + "grad_norm": 0.8885688781738281, + "learning_rate": 0.0001903883800488824, + "loss": 2.7608, + "step": 2900 + }, + { + "epoch": 0.234121539827294, + "grad_norm": 0.755308210849762, + "learning_rate": 0.00019038162563393555, + "loss": 2.7065, + "step": 2901 + }, + { + "epoch": 0.234202243563877, + "grad_norm": 0.7436641454696655, + "learning_rate": 0.00019037486896644236, + "loss": 2.6865, + "step": 2902 + }, + { + "epoch": 0.23428294730046, + "grad_norm": 0.7861987948417664, + "learning_rate": 0.0001903681100465712, + "loss": 2.7238, + "step": 2903 + }, + { + "epoch": 0.234363651037043, + "grad_norm": 0.7481045126914978, + "learning_rate": 0.0001903613488744905, + "loss": 2.7038, + "step": 2904 + }, + { + "epoch": 0.23444435477362602, + "grad_norm": 0.790765106678009, + "learning_rate": 0.0001903545854503688, + "loss": 2.6865, + "step": 2905 + }, + { + "epoch": 0.23452505851020902, + "grad_norm": 0.8594793677330017, + "learning_rate": 0.0001903478197743746, + "loss": 2.7324, + "step": 2906 + }, + { + "epoch": 0.23460576224679203, + "grad_norm": 0.7504310011863708, + "learning_rate": 0.00019034105184667662, + "loss": 2.6535, + "step": 2907 + }, + { + "epoch": 0.23468646598337503, + "grad_norm": 0.7824578881263733, + "learning_rate": 0.00019033428166744342, + "loss": 2.7113, + "step": 2908 + }, + { + "epoch": 0.23476716971995804, + "grad_norm": 0.7766899466514587, + "learning_rate": 0.0001903275092368438, + "loss": 2.6907, + "step": 2909 + }, + { + "epoch": 0.23484787345654104, + "grad_norm": 0.8082600235939026, + "learning_rate": 0.00019032073455504657, + "loss": 2.6781, + "step": 2910 + }, + { + "epoch": 0.23492857719312404, + "grad_norm": 0.7790517210960388, + "learning_rate": 0.0001903139576222205, + "loss": 2.7277, + "step": 2911 + }, + { + "epoch": 0.23500928092970705, + "grad_norm": 0.7449578046798706, + "learning_rate": 0.00019030717843853453, + "loss": 2.7078, + "step": 2912 + }, + { + "epoch": 0.23508998466629005, + "grad_norm": 0.7931632399559021, + "learning_rate": 0.0001903003970041576, + "loss": 2.7165, + "step": 2913 + }, + { + "epoch": 0.23517068840287306, + "grad_norm": 0.7970653176307678, + "learning_rate": 0.00019029361331925873, + "loss": 2.7993, + "step": 2914 + }, + { + "epoch": 0.23525139213945606, + "grad_norm": 0.8497335314750671, + "learning_rate": 0.00019028682738400697, + "loss": 2.7564, + "step": 2915 + }, + { + "epoch": 0.23533209587603907, + "grad_norm": 0.7840128540992737, + "learning_rate": 0.0001902800391985715, + "loss": 2.7546, + "step": 2916 + }, + { + "epoch": 0.23541279961262207, + "grad_norm": 0.8237372636795044, + "learning_rate": 0.00019027324876312146, + "loss": 2.7507, + "step": 2917 + }, + { + "epoch": 0.23549350334920507, + "grad_norm": 0.8445321917533875, + "learning_rate": 0.00019026645607782603, + "loss": 2.7287, + "step": 2918 + }, + { + "epoch": 0.23557420708578808, + "grad_norm": 0.8380417227745056, + "learning_rate": 0.0001902596611428546, + "loss": 2.7778, + "step": 2919 + }, + { + "epoch": 0.23565491082237108, + "grad_norm": 0.7989064455032349, + "learning_rate": 0.00019025286395837646, + "loss": 2.7254, + "step": 2920 + }, + { + "epoch": 0.2357356145589541, + "grad_norm": 0.8223496079444885, + "learning_rate": 0.00019024606452456102, + "loss": 2.7028, + "step": 2921 + }, + { + "epoch": 0.2358163182955371, + "grad_norm": 0.8090229630470276, + "learning_rate": 0.00019023926284157775, + "loss": 2.6911, + "step": 2922 + }, + { + "epoch": 0.2358970220321201, + "grad_norm": 0.7556560635566711, + "learning_rate": 0.00019023245890959615, + "loss": 2.7183, + "step": 2923 + }, + { + "epoch": 0.2359777257687031, + "grad_norm": 0.7907983660697937, + "learning_rate": 0.00019022565272878582, + "loss": 2.6805, + "step": 2924 + }, + { + "epoch": 0.2360584295052861, + "grad_norm": 0.9404142498970032, + "learning_rate": 0.0001902188442993164, + "loss": 2.8081, + "step": 2925 + }, + { + "epoch": 0.2361391332418691, + "grad_norm": 0.8349069952964783, + "learning_rate": 0.0001902120336213575, + "loss": 2.8329, + "step": 2926 + }, + { + "epoch": 0.2362198369784521, + "grad_norm": 0.8557522892951965, + "learning_rate": 0.00019020522069507892, + "loss": 2.704, + "step": 2927 + }, + { + "epoch": 0.23630054071503512, + "grad_norm": 0.7557278275489807, + "learning_rate": 0.00019019840552065044, + "loss": 2.7071, + "step": 2928 + }, + { + "epoch": 0.23638124445161812, + "grad_norm": 0.8810723423957825, + "learning_rate": 0.00019019158809824193, + "loss": 2.7535, + "step": 2929 + }, + { + "epoch": 0.23646194818820113, + "grad_norm": 0.7845562100410461, + "learning_rate": 0.00019018476842802326, + "loss": 2.7254, + "step": 2930 + }, + { + "epoch": 0.23654265192478413, + "grad_norm": 0.7566044926643372, + "learning_rate": 0.00019017794651016444, + "loss": 2.7295, + "step": 2931 + }, + { + "epoch": 0.23662335566136714, + "grad_norm": 0.8083382248878479, + "learning_rate": 0.00019017112234483545, + "loss": 2.7305, + "step": 2932 + }, + { + "epoch": 0.2367040593979501, + "grad_norm": 0.7924187183380127, + "learning_rate": 0.00019016429593220638, + "loss": 2.7659, + "step": 2933 + }, + { + "epoch": 0.23678476313453312, + "grad_norm": 0.8400307297706604, + "learning_rate": 0.00019015746727244737, + "loss": 2.7293, + "step": 2934 + }, + { + "epoch": 0.23686546687111612, + "grad_norm": 0.6931199431419373, + "learning_rate": 0.0001901506363657286, + "loss": 2.7189, + "step": 2935 + }, + { + "epoch": 0.23694617060769912, + "grad_norm": 0.8263585567474365, + "learning_rate": 0.0001901438032122203, + "loss": 2.7368, + "step": 2936 + }, + { + "epoch": 0.23702687434428213, + "grad_norm": 0.8001893162727356, + "learning_rate": 0.0001901369678120928, + "loss": 2.7793, + "step": 2937 + }, + { + "epoch": 0.23710757808086513, + "grad_norm": 0.7724235653877258, + "learning_rate": 0.00019013013016551644, + "loss": 2.717, + "step": 2938 + }, + { + "epoch": 0.23718828181744814, + "grad_norm": 0.7617147564888, + "learning_rate": 0.00019012329027266164, + "loss": 2.7275, + "step": 2939 + }, + { + "epoch": 0.23726898555403114, + "grad_norm": 0.80738765001297, + "learning_rate": 0.00019011644813369884, + "loss": 2.7444, + "step": 2940 + }, + { + "epoch": 0.23734968929061415, + "grad_norm": 0.7885528802871704, + "learning_rate": 0.00019010960374879861, + "loss": 2.7377, + "step": 2941 + }, + { + "epoch": 0.23743039302719715, + "grad_norm": 0.720268964767456, + "learning_rate": 0.00019010275711813147, + "loss": 2.6897, + "step": 2942 + }, + { + "epoch": 0.23751109676378016, + "grad_norm": 0.7532111406326294, + "learning_rate": 0.00019009590824186815, + "loss": 2.8117, + "step": 2943 + }, + { + "epoch": 0.23759180050036316, + "grad_norm": 0.780777633190155, + "learning_rate": 0.00019008905712017925, + "loss": 2.7565, + "step": 2944 + }, + { + "epoch": 0.23767250423694616, + "grad_norm": 0.8721919059753418, + "learning_rate": 0.00019008220375323553, + "loss": 2.801, + "step": 2945 + }, + { + "epoch": 0.23775320797352917, + "grad_norm": 0.8258914947509766, + "learning_rate": 0.00019007534814120786, + "loss": 2.7696, + "step": 2946 + }, + { + "epoch": 0.23783391171011217, + "grad_norm": 0.7292730808258057, + "learning_rate": 0.00019006849028426704, + "loss": 2.7512, + "step": 2947 + }, + { + "epoch": 0.23791461544669518, + "grad_norm": 0.7789164185523987, + "learning_rate": 0.00019006163018258398, + "loss": 2.7489, + "step": 2948 + }, + { + "epoch": 0.23799531918327818, + "grad_norm": 0.8049725294113159, + "learning_rate": 0.00019005476783632967, + "loss": 2.672, + "step": 2949 + }, + { + "epoch": 0.23807602291986119, + "grad_norm": 0.7440119981765747, + "learning_rate": 0.00019004790324567519, + "loss": 2.7208, + "step": 2950 + }, + { + "epoch": 0.2381567266564442, + "grad_norm": 0.7695925235748291, + "learning_rate": 0.00019004103641079154, + "loss": 2.7816, + "step": 2951 + }, + { + "epoch": 0.2382374303930272, + "grad_norm": 0.7623234391212463, + "learning_rate": 0.00019003416733184988, + "loss": 2.7034, + "step": 2952 + }, + { + "epoch": 0.2383181341296102, + "grad_norm": 0.8136502504348755, + "learning_rate": 0.00019002729600902141, + "loss": 2.7638, + "step": 2953 + }, + { + "epoch": 0.2383988378661932, + "grad_norm": 0.7813066840171814, + "learning_rate": 0.00019002042244247743, + "loss": 2.7606, + "step": 2954 + }, + { + "epoch": 0.2384795416027762, + "grad_norm": 0.7863059043884277, + "learning_rate": 0.0001900135466323892, + "loss": 2.7219, + "step": 2955 + }, + { + "epoch": 0.2385602453393592, + "grad_norm": 0.8712359070777893, + "learning_rate": 0.00019000666857892806, + "loss": 2.7485, + "step": 2956 + }, + { + "epoch": 0.23864094907594222, + "grad_norm": 0.8130611777305603, + "learning_rate": 0.00018999978828226547, + "loss": 2.7195, + "step": 2957 + }, + { + "epoch": 0.23872165281252522, + "grad_norm": 0.759503960609436, + "learning_rate": 0.00018999290574257292, + "loss": 2.6856, + "step": 2958 + }, + { + "epoch": 0.23880235654910822, + "grad_norm": 0.7490882277488708, + "learning_rate": 0.0001899860209600219, + "loss": 2.7587, + "step": 2959 + }, + { + "epoch": 0.23888306028569123, + "grad_norm": 0.8111297488212585, + "learning_rate": 0.000189979133934784, + "loss": 2.7688, + "step": 2960 + }, + { + "epoch": 0.23896376402227423, + "grad_norm": 0.844894289970398, + "learning_rate": 0.0001899722446670309, + "loss": 2.7706, + "step": 2961 + }, + { + "epoch": 0.23904446775885724, + "grad_norm": 0.7875459790229797, + "learning_rate": 0.00018996535315693423, + "loss": 2.7535, + "step": 2962 + }, + { + "epoch": 0.23912517149544024, + "grad_norm": 0.7768518328666687, + "learning_rate": 0.0001899584594046658, + "loss": 2.7268, + "step": 2963 + }, + { + "epoch": 0.23920587523202325, + "grad_norm": 0.8645716309547424, + "learning_rate": 0.00018995156341039744, + "loss": 2.7856, + "step": 2964 + }, + { + "epoch": 0.23928657896860625, + "grad_norm": 0.7816600799560547, + "learning_rate": 0.00018994466517430097, + "loss": 2.757, + "step": 2965 + }, + { + "epoch": 0.23936728270518925, + "grad_norm": 0.7967644333839417, + "learning_rate": 0.00018993776469654832, + "loss": 2.7021, + "step": 2966 + }, + { + "epoch": 0.23944798644177226, + "grad_norm": 0.800589919090271, + "learning_rate": 0.00018993086197731146, + "loss": 2.6838, + "step": 2967 + }, + { + "epoch": 0.23952869017835526, + "grad_norm": 0.7658529281616211, + "learning_rate": 0.00018992395701676246, + "loss": 2.6992, + "step": 2968 + }, + { + "epoch": 0.23960939391493827, + "grad_norm": 0.848456621170044, + "learning_rate": 0.00018991704981507338, + "loss": 2.7249, + "step": 2969 + }, + { + "epoch": 0.23969009765152127, + "grad_norm": 0.7365427017211914, + "learning_rate": 0.00018991014037241638, + "loss": 2.7044, + "step": 2970 + }, + { + "epoch": 0.23977080138810428, + "grad_norm": 0.8026351928710938, + "learning_rate": 0.00018990322868896365, + "loss": 2.7409, + "step": 2971 + }, + { + "epoch": 0.23985150512468728, + "grad_norm": 0.788646936416626, + "learning_rate": 0.00018989631476488744, + "loss": 2.7331, + "step": 2972 + }, + { + "epoch": 0.23993220886127029, + "grad_norm": 0.8388644456863403, + "learning_rate": 0.00018988939860036007, + "loss": 2.7478, + "step": 2973 + }, + { + "epoch": 0.2400129125978533, + "grad_norm": 0.7479026913642883, + "learning_rate": 0.00018988248019555394, + "loss": 2.7248, + "step": 2974 + }, + { + "epoch": 0.2400936163344363, + "grad_norm": 0.7313364744186401, + "learning_rate": 0.00018987555955064144, + "loss": 2.7323, + "step": 2975 + }, + { + "epoch": 0.2401743200710193, + "grad_norm": 0.7858260273933411, + "learning_rate": 0.00018986863666579505, + "loss": 2.6845, + "step": 2976 + }, + { + "epoch": 0.2402550238076023, + "grad_norm": 0.8090949654579163, + "learning_rate": 0.00018986171154118732, + "loss": 2.8094, + "step": 2977 + }, + { + "epoch": 0.2403357275441853, + "grad_norm": 0.7917135953903198, + "learning_rate": 0.00018985478417699085, + "loss": 2.7106, + "step": 2978 + }, + { + "epoch": 0.2404164312807683, + "grad_norm": 0.8192126154899597, + "learning_rate": 0.00018984785457337825, + "loss": 2.7729, + "step": 2979 + }, + { + "epoch": 0.24049713501735132, + "grad_norm": 0.797922670841217, + "learning_rate": 0.00018984092273052226, + "loss": 2.7747, + "step": 2980 + }, + { + "epoch": 0.24057783875393432, + "grad_norm": 0.9050948023796082, + "learning_rate": 0.00018983398864859564, + "loss": 2.7453, + "step": 2981 + }, + { + "epoch": 0.24065854249051732, + "grad_norm": 0.7827617526054382, + "learning_rate": 0.0001898270523277712, + "loss": 2.7371, + "step": 2982 + }, + { + "epoch": 0.24073924622710033, + "grad_norm": 0.7530156373977661, + "learning_rate": 0.0001898201137682218, + "loss": 2.7397, + "step": 2983 + }, + { + "epoch": 0.2408199499636833, + "grad_norm": 0.7989545464515686, + "learning_rate": 0.00018981317297012034, + "loss": 2.7532, + "step": 2984 + }, + { + "epoch": 0.2409006537002663, + "grad_norm": 0.7501168847084045, + "learning_rate": 0.00018980622993363988, + "loss": 2.7395, + "step": 2985 + }, + { + "epoch": 0.2409813574368493, + "grad_norm": 0.8073468208312988, + "learning_rate": 0.0001897992846589534, + "loss": 2.7673, + "step": 2986 + }, + { + "epoch": 0.24106206117343232, + "grad_norm": 0.9155512452125549, + "learning_rate": 0.00018979233714623401, + "loss": 2.6608, + "step": 2987 + }, + { + "epoch": 0.24114276491001532, + "grad_norm": 0.7461311221122742, + "learning_rate": 0.00018978538739565485, + "loss": 2.7657, + "step": 2988 + }, + { + "epoch": 0.24122346864659833, + "grad_norm": 0.8011443018913269, + "learning_rate": 0.00018977843540738914, + "loss": 2.7363, + "step": 2989 + }, + { + "epoch": 0.24130417238318133, + "grad_norm": 0.7602998614311218, + "learning_rate": 0.0001897714811816101, + "loss": 2.7285, + "step": 2990 + }, + { + "epoch": 0.24138487611976434, + "grad_norm": 0.8283531069755554, + "learning_rate": 0.00018976452471849116, + "loss": 2.7614, + "step": 2991 + }, + { + "epoch": 0.24146557985634734, + "grad_norm": 0.7358889579772949, + "learning_rate": 0.00018975756601820556, + "loss": 2.7429, + "step": 2992 + }, + { + "epoch": 0.24154628359293034, + "grad_norm": 0.7749240398406982, + "learning_rate": 0.0001897506050809268, + "loss": 2.6884, + "step": 2993 + }, + { + "epoch": 0.24162698732951335, + "grad_norm": 0.7529963254928589, + "learning_rate": 0.00018974364190682837, + "loss": 2.7619, + "step": 2994 + }, + { + "epoch": 0.24170769106609635, + "grad_norm": 0.7946054935455322, + "learning_rate": 0.00018973667649608376, + "loss": 2.7403, + "step": 2995 + }, + { + "epoch": 0.24178839480267936, + "grad_norm": 0.735870897769928, + "learning_rate": 0.0001897297088488666, + "loss": 2.7158, + "step": 2996 + }, + { + "epoch": 0.24186909853926236, + "grad_norm": 0.8409188985824585, + "learning_rate": 0.00018972273896535055, + "loss": 2.768, + "step": 2997 + }, + { + "epoch": 0.24194980227584537, + "grad_norm": 0.8351938724517822, + "learning_rate": 0.0001897157668457093, + "loss": 2.7548, + "step": 2998 + }, + { + "epoch": 0.24203050601242837, + "grad_norm": 0.8339046239852905, + "learning_rate": 0.00018970879249011663, + "loss": 2.7842, + "step": 2999 + }, + { + "epoch": 0.24211120974901137, + "grad_norm": 0.8092730641365051, + "learning_rate": 0.00018970181589874637, + "loss": 2.7141, + "step": 3000 + }, + { + "epoch": 0.24211120974901137, + "eval_loss": 2.643277406692505, + "eval_runtime": 784.7512, + "eval_samples_per_second": 3.339, + "eval_steps_per_second": 0.557, + "step": 3000 + }, + { + "epoch": 0.24219191348559438, + "grad_norm": 0.8014447093009949, + "learning_rate": 0.00018969483707177235, + "loss": 2.7341, + "step": 3001 + }, + { + "epoch": 0.24227261722217738, + "grad_norm": 0.744153618812561, + "learning_rate": 0.00018968785600936855, + "loss": 2.678, + "step": 3002 + }, + { + "epoch": 0.2423533209587604, + "grad_norm": 0.7264240384101868, + "learning_rate": 0.0001896808727117089, + "loss": 2.7321, + "step": 3003 + }, + { + "epoch": 0.2424340246953434, + "grad_norm": 0.8214067220687866, + "learning_rate": 0.00018967388717896748, + "loss": 2.7311, + "step": 3004 + }, + { + "epoch": 0.2425147284319264, + "grad_norm": 0.7871330976486206, + "learning_rate": 0.00018966689941131838, + "loss": 2.7184, + "step": 3005 + }, + { + "epoch": 0.2425954321685094, + "grad_norm": 0.7301360964775085, + "learning_rate": 0.00018965990940893575, + "loss": 2.7039, + "step": 3006 + }, + { + "epoch": 0.2426761359050924, + "grad_norm": 0.8290385603904724, + "learning_rate": 0.00018965291717199382, + "loss": 2.7848, + "step": 3007 + }, + { + "epoch": 0.2427568396416754, + "grad_norm": 0.7465909123420715, + "learning_rate": 0.00018964592270066683, + "loss": 2.7271, + "step": 3008 + }, + { + "epoch": 0.2428375433782584, + "grad_norm": 0.7992933988571167, + "learning_rate": 0.00018963892599512913, + "loss": 2.7749, + "step": 3009 + }, + { + "epoch": 0.24291824711484142, + "grad_norm": 0.7879100441932678, + "learning_rate": 0.00018963192705555507, + "loss": 2.6844, + "step": 3010 + }, + { + "epoch": 0.24299895085142442, + "grad_norm": 0.7895401120185852, + "learning_rate": 0.00018962492588211905, + "loss": 2.725, + "step": 3011 + }, + { + "epoch": 0.24307965458800743, + "grad_norm": 0.7699374556541443, + "learning_rate": 0.00018961792247499564, + "loss": 2.7408, + "step": 3012 + }, + { + "epoch": 0.24316035832459043, + "grad_norm": 0.828372597694397, + "learning_rate": 0.0001896109168343593, + "loss": 2.7527, + "step": 3013 + }, + { + "epoch": 0.24324106206117344, + "grad_norm": 0.7611951231956482, + "learning_rate": 0.0001896039089603847, + "loss": 2.7294, + "step": 3014 + }, + { + "epoch": 0.24332176579775644, + "grad_norm": 0.8214892148971558, + "learning_rate": 0.00018959689885324646, + "loss": 2.6931, + "step": 3015 + }, + { + "epoch": 0.24340246953433944, + "grad_norm": 0.7472538352012634, + "learning_rate": 0.00018958988651311928, + "loss": 2.7316, + "step": 3016 + }, + { + "epoch": 0.24348317327092245, + "grad_norm": 0.7574933171272278, + "learning_rate": 0.00018958287194017795, + "loss": 2.7764, + "step": 3017 + }, + { + "epoch": 0.24356387700750545, + "grad_norm": 0.739152729511261, + "learning_rate": 0.00018957585513459723, + "loss": 2.7949, + "step": 3018 + }, + { + "epoch": 0.24364458074408846, + "grad_norm": 0.824097752571106, + "learning_rate": 0.00018956883609655208, + "loss": 2.6612, + "step": 3019 + }, + { + "epoch": 0.24372528448067146, + "grad_norm": 0.7891144156455994, + "learning_rate": 0.00018956181482621744, + "loss": 2.7139, + "step": 3020 + }, + { + "epoch": 0.24380598821725447, + "grad_norm": 0.7364415526390076, + "learning_rate": 0.0001895547913237682, + "loss": 2.6984, + "step": 3021 + }, + { + "epoch": 0.24388669195383747, + "grad_norm": 0.7631362080574036, + "learning_rate": 0.0001895477655893795, + "loss": 2.7015, + "step": 3022 + }, + { + "epoch": 0.24396739569042047, + "grad_norm": 0.780541181564331, + "learning_rate": 0.00018954073762322637, + "loss": 2.7716, + "step": 3023 + }, + { + "epoch": 0.24404809942700348, + "grad_norm": 0.7877349853515625, + "learning_rate": 0.00018953370742548403, + "loss": 2.6654, + "step": 3024 + }, + { + "epoch": 0.24412880316358648, + "grad_norm": 0.7786216139793396, + "learning_rate": 0.00018952667499632763, + "loss": 2.7491, + "step": 3025 + }, + { + "epoch": 0.2442095069001695, + "grad_norm": 0.8207663893699646, + "learning_rate": 0.00018951964033593247, + "loss": 2.7212, + "step": 3026 + }, + { + "epoch": 0.2442902106367525, + "grad_norm": 0.8271831274032593, + "learning_rate": 0.00018951260344447386, + "loss": 2.7456, + "step": 3027 + }, + { + "epoch": 0.2443709143733355, + "grad_norm": 0.7610505819320679, + "learning_rate": 0.00018950556432212722, + "loss": 2.7472, + "step": 3028 + }, + { + "epoch": 0.2444516181099185, + "grad_norm": 0.7521701455116272, + "learning_rate": 0.00018949852296906792, + "loss": 2.7263, + "step": 3029 + }, + { + "epoch": 0.2445323218465015, + "grad_norm": 0.7518337965011597, + "learning_rate": 0.00018949147938547144, + "loss": 2.7069, + "step": 3030 + }, + { + "epoch": 0.2446130255830845, + "grad_norm": 0.7823107838630676, + "learning_rate": 0.00018948443357151343, + "loss": 2.7858, + "step": 3031 + }, + { + "epoch": 0.2446937293196675, + "grad_norm": 0.733132004737854, + "learning_rate": 0.00018947738552736938, + "loss": 2.7194, + "step": 3032 + }, + { + "epoch": 0.24477443305625052, + "grad_norm": 0.7756488919258118, + "learning_rate": 0.00018947033525321501, + "loss": 2.7299, + "step": 3033 + }, + { + "epoch": 0.24485513679283352, + "grad_norm": 0.7971112728118896, + "learning_rate": 0.00018946328274922598, + "loss": 2.7474, + "step": 3034 + }, + { + "epoch": 0.2449358405294165, + "grad_norm": 0.7871260643005371, + "learning_rate": 0.0001894562280155781, + "loss": 2.6994, + "step": 3035 + }, + { + "epoch": 0.2450165442659995, + "grad_norm": 0.7431116104125977, + "learning_rate": 0.00018944917105244717, + "loss": 2.6834, + "step": 3036 + }, + { + "epoch": 0.2450972480025825, + "grad_norm": 0.7372273206710815, + "learning_rate": 0.00018944211186000906, + "loss": 2.6988, + "step": 3037 + }, + { + "epoch": 0.2451779517391655, + "grad_norm": 0.8161508440971375, + "learning_rate": 0.00018943505043843975, + "loss": 2.7595, + "step": 3038 + }, + { + "epoch": 0.24525865547574852, + "grad_norm": 0.8062586784362793, + "learning_rate": 0.00018942798678791518, + "loss": 2.6893, + "step": 3039 + }, + { + "epoch": 0.24533935921233152, + "grad_norm": 0.824023425579071, + "learning_rate": 0.0001894209209086114, + "loss": 2.7188, + "step": 3040 + }, + { + "epoch": 0.24542006294891452, + "grad_norm": 0.740466833114624, + "learning_rate": 0.00018941385280070455, + "loss": 2.674, + "step": 3041 + }, + { + "epoch": 0.24550076668549753, + "grad_norm": 0.8543577194213867, + "learning_rate": 0.00018940678246437073, + "loss": 2.7423, + "step": 3042 + }, + { + "epoch": 0.24558147042208053, + "grad_norm": 0.7059324979782104, + "learning_rate": 0.0001893997098997862, + "loss": 2.6669, + "step": 3043 + }, + { + "epoch": 0.24566217415866354, + "grad_norm": 0.7739956974983215, + "learning_rate": 0.00018939263510712721, + "loss": 2.7118, + "step": 3044 + }, + { + "epoch": 0.24574287789524654, + "grad_norm": 0.7701205611228943, + "learning_rate": 0.00018938555808657007, + "loss": 2.7653, + "step": 3045 + }, + { + "epoch": 0.24582358163182955, + "grad_norm": 0.7243000864982605, + "learning_rate": 0.00018937847883829115, + "loss": 2.6789, + "step": 3046 + }, + { + "epoch": 0.24590428536841255, + "grad_norm": 0.7645598649978638, + "learning_rate": 0.00018937139736246693, + "loss": 2.7108, + "step": 3047 + }, + { + "epoch": 0.24598498910499556, + "grad_norm": 0.7544745802879333, + "learning_rate": 0.00018936431365927385, + "loss": 2.6958, + "step": 3048 + }, + { + "epoch": 0.24606569284157856, + "grad_norm": 0.709282398223877, + "learning_rate": 0.00018935722772888848, + "loss": 2.6728, + "step": 3049 + }, + { + "epoch": 0.24614639657816156, + "grad_norm": 0.7524243593215942, + "learning_rate": 0.00018935013957148742, + "loss": 2.7283, + "step": 3050 + }, + { + "epoch": 0.24622710031474457, + "grad_norm": 0.7959655523300171, + "learning_rate": 0.0001893430491872473, + "loss": 2.7384, + "step": 3051 + }, + { + "epoch": 0.24630780405132757, + "grad_norm": 0.7252553105354309, + "learning_rate": 0.00018933595657634486, + "loss": 2.7226, + "step": 3052 + }, + { + "epoch": 0.24638850778791058, + "grad_norm": 0.7387316226959229, + "learning_rate": 0.00018932886173895686, + "loss": 2.7546, + "step": 3053 + }, + { + "epoch": 0.24646921152449358, + "grad_norm": 0.804856538772583, + "learning_rate": 0.0001893217646752601, + "loss": 2.7321, + "step": 3054 + }, + { + "epoch": 0.24654991526107659, + "grad_norm": 0.6929069757461548, + "learning_rate": 0.0001893146653854315, + "loss": 2.6735, + "step": 3055 + }, + { + "epoch": 0.2466306189976596, + "grad_norm": 0.7076159715652466, + "learning_rate": 0.00018930756386964794, + "loss": 2.7368, + "step": 3056 + }, + { + "epoch": 0.2467113227342426, + "grad_norm": 0.7522851228713989, + "learning_rate": 0.00018930046012808648, + "loss": 2.7448, + "step": 3057 + }, + { + "epoch": 0.2467920264708256, + "grad_norm": 0.8347200155258179, + "learning_rate": 0.00018929335416092408, + "loss": 2.6837, + "step": 3058 + }, + { + "epoch": 0.2468727302074086, + "grad_norm": 0.737503707408905, + "learning_rate": 0.00018928624596833786, + "loss": 2.693, + "step": 3059 + }, + { + "epoch": 0.2469534339439916, + "grad_norm": 0.7836787104606628, + "learning_rate": 0.00018927913555050503, + "loss": 2.7335, + "step": 3060 + }, + { + "epoch": 0.2470341376805746, + "grad_norm": 0.7823840975761414, + "learning_rate": 0.00018927202290760278, + "loss": 2.6736, + "step": 3061 + }, + { + "epoch": 0.24711484141715762, + "grad_norm": 0.7894529700279236, + "learning_rate": 0.00018926490803980833, + "loss": 2.7112, + "step": 3062 + }, + { + "epoch": 0.24719554515374062, + "grad_norm": 0.8289024829864502, + "learning_rate": 0.000189257790947299, + "loss": 2.7667, + "step": 3063 + }, + { + "epoch": 0.24727624889032362, + "grad_norm": 0.70560222864151, + "learning_rate": 0.00018925067163025227, + "loss": 2.6946, + "step": 3064 + }, + { + "epoch": 0.24735695262690663, + "grad_norm": 0.6954196095466614, + "learning_rate": 0.00018924355008884548, + "loss": 2.7237, + "step": 3065 + }, + { + "epoch": 0.24743765636348963, + "grad_norm": 0.7975121736526489, + "learning_rate": 0.0001892364263232561, + "loss": 2.6392, + "step": 3066 + }, + { + "epoch": 0.24751836010007264, + "grad_norm": 0.777350902557373, + "learning_rate": 0.00018922930033366174, + "loss": 2.7284, + "step": 3067 + }, + { + "epoch": 0.24759906383665564, + "grad_norm": 0.738240659236908, + "learning_rate": 0.00018922217212023995, + "loss": 2.6884, + "step": 3068 + }, + { + "epoch": 0.24767976757323865, + "grad_norm": 0.8077268600463867, + "learning_rate": 0.0001892150416831684, + "loss": 2.7205, + "step": 3069 + }, + { + "epoch": 0.24776047130982165, + "grad_norm": 0.8108188509941101, + "learning_rate": 0.00018920790902262483, + "loss": 2.7592, + "step": 3070 + }, + { + "epoch": 0.24784117504640465, + "grad_norm": 0.7842642664909363, + "learning_rate": 0.00018920077413878695, + "loss": 2.7474, + "step": 3071 + }, + { + "epoch": 0.24792187878298766, + "grad_norm": 0.7644543051719666, + "learning_rate": 0.0001891936370318326, + "loss": 2.7179, + "step": 3072 + }, + { + "epoch": 0.24800258251957066, + "grad_norm": 0.7761854529380798, + "learning_rate": 0.00018918649770193965, + "loss": 2.71, + "step": 3073 + }, + { + "epoch": 0.24808328625615367, + "grad_norm": 0.7724074125289917, + "learning_rate": 0.00018917935614928607, + "loss": 2.7359, + "step": 3074 + }, + { + "epoch": 0.24816398999273667, + "grad_norm": 0.7360609173774719, + "learning_rate": 0.0001891722123740498, + "loss": 2.7342, + "step": 3075 + }, + { + "epoch": 0.24824469372931968, + "grad_norm": 0.757561206817627, + "learning_rate": 0.00018916506637640894, + "loss": 2.7647, + "step": 3076 + }, + { + "epoch": 0.24832539746590268, + "grad_norm": 0.7180947065353394, + "learning_rate": 0.00018915791815654148, + "loss": 2.6771, + "step": 3077 + }, + { + "epoch": 0.24840610120248569, + "grad_norm": 0.7219653129577637, + "learning_rate": 0.0001891507677146257, + "loss": 2.7772, + "step": 3078 + }, + { + "epoch": 0.2484868049390687, + "grad_norm": 0.749113917350769, + "learning_rate": 0.0001891436150508397, + "loss": 2.6996, + "step": 3079 + }, + { + "epoch": 0.2485675086756517, + "grad_norm": 0.766180157661438, + "learning_rate": 0.00018913646016536183, + "loss": 2.7896, + "step": 3080 + }, + { + "epoch": 0.2486482124122347, + "grad_norm": 0.7672411799430847, + "learning_rate": 0.00018912930305837032, + "loss": 2.7307, + "step": 3081 + }, + { + "epoch": 0.2487289161488177, + "grad_norm": 0.7639018297195435, + "learning_rate": 0.00018912214373004364, + "loss": 2.6569, + "step": 3082 + }, + { + "epoch": 0.2488096198854007, + "grad_norm": 0.8935483694076538, + "learning_rate": 0.00018911498218056013, + "loss": 2.6897, + "step": 3083 + }, + { + "epoch": 0.2488903236219837, + "grad_norm": 0.8506368398666382, + "learning_rate": 0.00018910781841009836, + "loss": 2.778, + "step": 3084 + }, + { + "epoch": 0.24897102735856672, + "grad_norm": 0.8026999235153198, + "learning_rate": 0.0001891006524188368, + "loss": 2.7799, + "step": 3085 + }, + { + "epoch": 0.2490517310951497, + "grad_norm": 0.784637987613678, + "learning_rate": 0.00018909348420695406, + "loss": 2.673, + "step": 3086 + }, + { + "epoch": 0.2491324348317327, + "grad_norm": 0.8949337601661682, + "learning_rate": 0.00018908631377462882, + "loss": 2.7726, + "step": 3087 + }, + { + "epoch": 0.2492131385683157, + "grad_norm": 0.73841792345047, + "learning_rate": 0.00018907914112203974, + "loss": 2.7403, + "step": 3088 + }, + { + "epoch": 0.2492938423048987, + "grad_norm": 0.7305924296379089, + "learning_rate": 0.00018907196624936564, + "loss": 2.6713, + "step": 3089 + }, + { + "epoch": 0.2493745460414817, + "grad_norm": 0.7707394361495972, + "learning_rate": 0.0001890647891567853, + "loss": 2.7306, + "step": 3090 + }, + { + "epoch": 0.2494552497780647, + "grad_norm": 0.8691473603248596, + "learning_rate": 0.00018905760984447759, + "loss": 2.6775, + "step": 3091 + }, + { + "epoch": 0.24953595351464772, + "grad_norm": 0.7466028332710266, + "learning_rate": 0.00018905042831262144, + "loss": 2.7196, + "step": 3092 + }, + { + "epoch": 0.24961665725123072, + "grad_norm": 0.7785150408744812, + "learning_rate": 0.0001890432445613958, + "loss": 2.7099, + "step": 3093 + }, + { + "epoch": 0.24969736098781373, + "grad_norm": 0.7775028347969055, + "learning_rate": 0.0001890360585909798, + "loss": 2.698, + "step": 3094 + }, + { + "epoch": 0.24977806472439673, + "grad_norm": 0.829257071018219, + "learning_rate": 0.00018902887040155245, + "loss": 2.711, + "step": 3095 + }, + { + "epoch": 0.24985876846097974, + "grad_norm": 0.8492234945297241, + "learning_rate": 0.00018902167999329295, + "loss": 2.7164, + "step": 3096 + }, + { + "epoch": 0.24993947219756274, + "grad_norm": 0.7332174777984619, + "learning_rate": 0.00018901448736638045, + "loss": 2.6925, + "step": 3097 + }, + { + "epoch": 0.25002017593414577, + "grad_norm": 0.7494251728057861, + "learning_rate": 0.00018900729252099426, + "loss": 2.6899, + "step": 3098 + }, + { + "epoch": 0.25010087967072875, + "grad_norm": 0.7760747075080872, + "learning_rate": 0.00018900009545731367, + "loss": 2.6626, + "step": 3099 + }, + { + "epoch": 0.2501815834073118, + "grad_norm": 0.7270001173019409, + "learning_rate": 0.00018899289617551804, + "loss": 2.7338, + "step": 3100 + }, + { + "epoch": 0.25026228714389476, + "grad_norm": 0.7832693457603455, + "learning_rate": 0.0001889856946757868, + "loss": 2.6668, + "step": 3101 + }, + { + "epoch": 0.2503429908804778, + "grad_norm": 0.8833239674568176, + "learning_rate": 0.00018897849095829945, + "loss": 2.7219, + "step": 3102 + }, + { + "epoch": 0.25042369461706077, + "grad_norm": 0.8144814372062683, + "learning_rate": 0.0001889712850232355, + "loss": 2.724, + "step": 3103 + }, + { + "epoch": 0.2505043983536438, + "grad_norm": 0.9466180801391602, + "learning_rate": 0.0001889640768707746, + "loss": 2.7499, + "step": 3104 + }, + { + "epoch": 0.2505851020902268, + "grad_norm": 0.926292359828949, + "learning_rate": 0.00018895686650109632, + "loss": 2.7391, + "step": 3105 + }, + { + "epoch": 0.2506658058268098, + "grad_norm": 0.8214002251625061, + "learning_rate": 0.00018894965391438038, + "loss": 2.7546, + "step": 3106 + }, + { + "epoch": 0.2507465095633928, + "grad_norm": 0.9021030068397522, + "learning_rate": 0.00018894243911080655, + "loss": 2.7188, + "step": 3107 + }, + { + "epoch": 0.2508272132999758, + "grad_norm": 0.778366208076477, + "learning_rate": 0.00018893522209055465, + "loss": 2.7852, + "step": 3108 + }, + { + "epoch": 0.2509079170365588, + "grad_norm": 0.8780209422111511, + "learning_rate": 0.00018892800285380456, + "loss": 2.7344, + "step": 3109 + }, + { + "epoch": 0.2509886207731418, + "grad_norm": 0.7581839561462402, + "learning_rate": 0.00018892078140073614, + "loss": 2.6697, + "step": 3110 + }, + { + "epoch": 0.2510693245097248, + "grad_norm": 0.7818635702133179, + "learning_rate": 0.00018891355773152944, + "loss": 2.6969, + "step": 3111 + }, + { + "epoch": 0.2511500282463078, + "grad_norm": 0.7528424859046936, + "learning_rate": 0.0001889063318463644, + "loss": 2.7359, + "step": 3112 + }, + { + "epoch": 0.2512307319828908, + "grad_norm": 0.8274288773536682, + "learning_rate": 0.0001888991037454212, + "loss": 2.7124, + "step": 3113 + }, + { + "epoch": 0.2513114357194738, + "grad_norm": 0.7186813354492188, + "learning_rate": 0.00018889187342888, + "loss": 2.7037, + "step": 3114 + }, + { + "epoch": 0.2513921394560568, + "grad_norm": 0.7458071112632751, + "learning_rate": 0.00018888464089692088, + "loss": 2.7178, + "step": 3115 + }, + { + "epoch": 0.2514728431926398, + "grad_norm": 0.7814257740974426, + "learning_rate": 0.00018887740614972418, + "loss": 2.7554, + "step": 3116 + }, + { + "epoch": 0.2515535469292228, + "grad_norm": 0.7706831097602844, + "learning_rate": 0.0001888701691874702, + "loss": 2.7441, + "step": 3117 + }, + { + "epoch": 0.2516342506658058, + "grad_norm": 0.8177775740623474, + "learning_rate": 0.0001888629300103393, + "loss": 2.7257, + "step": 3118 + }, + { + "epoch": 0.25171495440238884, + "grad_norm": 0.791097104549408, + "learning_rate": 0.00018885568861851188, + "loss": 2.6937, + "step": 3119 + }, + { + "epoch": 0.2517956581389718, + "grad_norm": 0.7521430850028992, + "learning_rate": 0.00018884844501216845, + "loss": 2.7723, + "step": 3120 + }, + { + "epoch": 0.25187636187555484, + "grad_norm": 0.8119359016418457, + "learning_rate": 0.00018884119919148948, + "loss": 2.7573, + "step": 3121 + }, + { + "epoch": 0.2519570656121378, + "grad_norm": 0.7579830288887024, + "learning_rate": 0.00018883395115665562, + "loss": 2.6943, + "step": 3122 + }, + { + "epoch": 0.25203776934872085, + "grad_norm": 0.7718791365623474, + "learning_rate": 0.00018882670090784748, + "loss": 2.6911, + "step": 3123 + }, + { + "epoch": 0.25211847308530383, + "grad_norm": 0.7718087434768677, + "learning_rate": 0.00018881944844524576, + "loss": 2.7505, + "step": 3124 + }, + { + "epoch": 0.25219917682188686, + "grad_norm": 0.7696875333786011, + "learning_rate": 0.0001888121937690312, + "loss": 2.7272, + "step": 3125 + }, + { + "epoch": 0.25227988055846984, + "grad_norm": 0.8082131743431091, + "learning_rate": 0.00018880493687938464, + "loss": 2.6677, + "step": 3126 + }, + { + "epoch": 0.25236058429505287, + "grad_norm": 0.857224702835083, + "learning_rate": 0.00018879767777648686, + "loss": 2.7237, + "step": 3127 + }, + { + "epoch": 0.25244128803163585, + "grad_norm": 0.8135749697685242, + "learning_rate": 0.00018879041646051886, + "loss": 2.7298, + "step": 3128 + }, + { + "epoch": 0.2525219917682189, + "grad_norm": 0.7772457003593445, + "learning_rate": 0.0001887831529316616, + "loss": 2.7723, + "step": 3129 + }, + { + "epoch": 0.25260269550480186, + "grad_norm": 0.795555055141449, + "learning_rate": 0.00018877588719009607, + "loss": 2.7207, + "step": 3130 + }, + { + "epoch": 0.2526833992413849, + "grad_norm": 0.7677939534187317, + "learning_rate": 0.00018876861923600337, + "loss": 2.6649, + "step": 3131 + }, + { + "epoch": 0.25276410297796786, + "grad_norm": 0.7706151008605957, + "learning_rate": 0.00018876134906956464, + "loss": 2.7154, + "step": 3132 + }, + { + "epoch": 0.2528448067145509, + "grad_norm": 0.8230584859848022, + "learning_rate": 0.00018875407669096105, + "loss": 2.7871, + "step": 3133 + }, + { + "epoch": 0.2529255104511339, + "grad_norm": 0.7037158608436584, + "learning_rate": 0.0001887468021003739, + "loss": 2.669, + "step": 3134 + }, + { + "epoch": 0.2530062141877169, + "grad_norm": 0.8485400080680847, + "learning_rate": 0.00018873952529798441, + "loss": 2.7517, + "step": 3135 + }, + { + "epoch": 0.2530869179242999, + "grad_norm": 0.7803399562835693, + "learning_rate": 0.000188732246283974, + "loss": 2.6987, + "step": 3136 + }, + { + "epoch": 0.2531676216608829, + "grad_norm": 0.7884016633033752, + "learning_rate": 0.0001887249650585241, + "loss": 2.7348, + "step": 3137 + }, + { + "epoch": 0.2532483253974659, + "grad_norm": 0.7794530987739563, + "learning_rate": 0.0001887176816218161, + "loss": 2.6934, + "step": 3138 + }, + { + "epoch": 0.2533290291340489, + "grad_norm": 0.7905173301696777, + "learning_rate": 0.00018871039597403156, + "loss": 2.714, + "step": 3139 + }, + { + "epoch": 0.2534097328706319, + "grad_norm": 0.7857949137687683, + "learning_rate": 0.0001887031081153521, + "loss": 2.7591, + "step": 3140 + }, + { + "epoch": 0.25349043660721493, + "grad_norm": 0.8602419495582581, + "learning_rate": 0.00018869581804595927, + "loss": 2.7819, + "step": 3141 + }, + { + "epoch": 0.2535711403437979, + "grad_norm": 0.7845202088356018, + "learning_rate": 0.00018868852576603483, + "loss": 2.6796, + "step": 3142 + }, + { + "epoch": 0.25365184408038094, + "grad_norm": 0.7600612640380859, + "learning_rate": 0.00018868123127576048, + "loss": 2.6785, + "step": 3143 + }, + { + "epoch": 0.2537325478169639, + "grad_norm": 0.7731521725654602, + "learning_rate": 0.000188673934575318, + "loss": 2.7435, + "step": 3144 + }, + { + "epoch": 0.25381325155354695, + "grad_norm": 0.8214225172996521, + "learning_rate": 0.0001886666356648893, + "loss": 2.7264, + "step": 3145 + }, + { + "epoch": 0.2538939552901299, + "grad_norm": 0.7623010277748108, + "learning_rate": 0.00018865933454465628, + "loss": 2.73, + "step": 3146 + }, + { + "epoch": 0.25397465902671296, + "grad_norm": 0.7864633798599243, + "learning_rate": 0.00018865203121480088, + "loss": 2.7654, + "step": 3147 + }, + { + "epoch": 0.25405536276329593, + "grad_norm": 0.7654051780700684, + "learning_rate": 0.0001886447256755051, + "loss": 2.7171, + "step": 3148 + }, + { + "epoch": 0.25413606649987897, + "grad_norm": 0.8045486211776733, + "learning_rate": 0.0001886374179269511, + "loss": 2.7385, + "step": 3149 + }, + { + "epoch": 0.25421677023646194, + "grad_norm": 0.8504971861839294, + "learning_rate": 0.0001886301079693209, + "loss": 2.6719, + "step": 3150 + }, + { + "epoch": 0.254297473973045, + "grad_norm": 0.771538496017456, + "learning_rate": 0.0001886227958027967, + "loss": 2.6707, + "step": 3151 + }, + { + "epoch": 0.25437817770962795, + "grad_norm": 0.8472220301628113, + "learning_rate": 0.0001886154814275608, + "loss": 2.7201, + "step": 3152 + }, + { + "epoch": 0.254458881446211, + "grad_norm": 0.7639158368110657, + "learning_rate": 0.00018860816484379545, + "loss": 2.76, + "step": 3153 + }, + { + "epoch": 0.25453958518279396, + "grad_norm": 0.8042064905166626, + "learning_rate": 0.000188600846051683, + "loss": 2.6862, + "step": 3154 + }, + { + "epoch": 0.254620288919377, + "grad_norm": 0.7481087446212769, + "learning_rate": 0.0001885935250514059, + "loss": 2.7394, + "step": 3155 + }, + { + "epoch": 0.25470099265595997, + "grad_norm": 0.7826097011566162, + "learning_rate": 0.00018858620184314653, + "loss": 2.596, + "step": 3156 + }, + { + "epoch": 0.254781696392543, + "grad_norm": 0.7477610111236572, + "learning_rate": 0.00018857887642708743, + "loss": 2.7385, + "step": 3157 + }, + { + "epoch": 0.254862400129126, + "grad_norm": 0.7347466945648193, + "learning_rate": 0.00018857154880341122, + "loss": 2.722, + "step": 3158 + }, + { + "epoch": 0.254943103865709, + "grad_norm": 0.7853806018829346, + "learning_rate": 0.00018856421897230048, + "loss": 2.7675, + "step": 3159 + }, + { + "epoch": 0.255023807602292, + "grad_norm": 0.7497034072875977, + "learning_rate": 0.0001885568869339379, + "loss": 2.6882, + "step": 3160 + }, + { + "epoch": 0.255104511338875, + "grad_norm": 0.7932263612747192, + "learning_rate": 0.0001885495526885062, + "loss": 2.7938, + "step": 3161 + }, + { + "epoch": 0.255185215075458, + "grad_norm": 0.7776823043823242, + "learning_rate": 0.00018854221623618815, + "loss": 2.6955, + "step": 3162 + }, + { + "epoch": 0.25526591881204097, + "grad_norm": 0.7564878463745117, + "learning_rate": 0.00018853487757716666, + "loss": 2.7644, + "step": 3163 + }, + { + "epoch": 0.255346622548624, + "grad_norm": 0.836270809173584, + "learning_rate": 0.00018852753671162454, + "loss": 2.7119, + "step": 3164 + }, + { + "epoch": 0.255427326285207, + "grad_norm": 0.7540388703346252, + "learning_rate": 0.00018852019363974485, + "loss": 2.797, + "step": 3165 + }, + { + "epoch": 0.25550803002179, + "grad_norm": 0.7943860292434692, + "learning_rate": 0.0001885128483617105, + "loss": 2.7973, + "step": 3166 + }, + { + "epoch": 0.255588733758373, + "grad_norm": 0.7743831276893616, + "learning_rate": 0.00018850550087770463, + "loss": 2.7403, + "step": 3167 + }, + { + "epoch": 0.255669437494956, + "grad_norm": 0.7593801021575928, + "learning_rate": 0.00018849815118791028, + "loss": 2.7203, + "step": 3168 + }, + { + "epoch": 0.255750141231539, + "grad_norm": 0.7663586139678955, + "learning_rate": 0.00018849079929251068, + "loss": 2.7481, + "step": 3169 + }, + { + "epoch": 0.25583084496812203, + "grad_norm": 0.7218170166015625, + "learning_rate": 0.00018848344519168905, + "loss": 2.6698, + "step": 3170 + }, + { + "epoch": 0.255911548704705, + "grad_norm": 0.8374441266059875, + "learning_rate": 0.00018847608888562868, + "loss": 2.8121, + "step": 3171 + }, + { + "epoch": 0.25599225244128804, + "grad_norm": 0.7488373517990112, + "learning_rate": 0.00018846873037451286, + "loss": 2.6871, + "step": 3172 + }, + { + "epoch": 0.256072956177871, + "grad_norm": 0.7513325810432434, + "learning_rate": 0.00018846136965852505, + "loss": 2.6924, + "step": 3173 + }, + { + "epoch": 0.25615365991445405, + "grad_norm": 0.7467690706253052, + "learning_rate": 0.00018845400673784865, + "loss": 2.714, + "step": 3174 + }, + { + "epoch": 0.256234363651037, + "grad_norm": 0.7717954516410828, + "learning_rate": 0.0001884466416126672, + "loss": 2.6679, + "step": 3175 + }, + { + "epoch": 0.25631506738762005, + "grad_norm": 0.7086547613143921, + "learning_rate": 0.0001884392742831642, + "loss": 2.7046, + "step": 3176 + }, + { + "epoch": 0.25639577112420303, + "grad_norm": 0.7024885416030884, + "learning_rate": 0.00018843190474952337, + "loss": 2.6724, + "step": 3177 + }, + { + "epoch": 0.25647647486078606, + "grad_norm": 0.8376390933990479, + "learning_rate": 0.00018842453301192827, + "loss": 2.7818, + "step": 3178 + }, + { + "epoch": 0.25655717859736904, + "grad_norm": 0.8190221190452576, + "learning_rate": 0.00018841715907056265, + "loss": 2.7455, + "step": 3179 + }, + { + "epoch": 0.25663788233395207, + "grad_norm": 0.8029047846794128, + "learning_rate": 0.0001884097829256103, + "loss": 2.7102, + "step": 3180 + }, + { + "epoch": 0.25671858607053505, + "grad_norm": 0.7467923760414124, + "learning_rate": 0.00018840240457725508, + "loss": 2.7051, + "step": 3181 + }, + { + "epoch": 0.2567992898071181, + "grad_norm": 0.7850394248962402, + "learning_rate": 0.00018839502402568086, + "loss": 2.6826, + "step": 3182 + }, + { + "epoch": 0.25687999354370106, + "grad_norm": 0.7144927978515625, + "learning_rate": 0.00018838764127107155, + "loss": 2.6694, + "step": 3183 + }, + { + "epoch": 0.2569606972802841, + "grad_norm": 0.7580311894416809, + "learning_rate": 0.0001883802563136112, + "loss": 2.7191, + "step": 3184 + }, + { + "epoch": 0.25704140101686707, + "grad_norm": 0.7366482615470886, + "learning_rate": 0.0001883728691534838, + "loss": 2.7175, + "step": 3185 + }, + { + "epoch": 0.2571221047534501, + "grad_norm": 0.6961715817451477, + "learning_rate": 0.0001883654797908735, + "loss": 2.7705, + "step": 3186 + }, + { + "epoch": 0.2572028084900331, + "grad_norm": 0.7473716735839844, + "learning_rate": 0.00018835808822596445, + "loss": 2.707, + "step": 3187 + }, + { + "epoch": 0.2572835122266161, + "grad_norm": 0.8376151919364929, + "learning_rate": 0.00018835069445894087, + "loss": 2.7424, + "step": 3188 + }, + { + "epoch": 0.2573642159631991, + "grad_norm": 0.7950237393379211, + "learning_rate": 0.00018834329848998706, + "loss": 2.7593, + "step": 3189 + }, + { + "epoch": 0.2574449196997821, + "grad_norm": 0.7637122869491577, + "learning_rate": 0.0001883359003192873, + "loss": 2.6708, + "step": 3190 + }, + { + "epoch": 0.2575256234363651, + "grad_norm": 0.709516704082489, + "learning_rate": 0.00018832849994702597, + "loss": 2.6988, + "step": 3191 + }, + { + "epoch": 0.2576063271729481, + "grad_norm": 0.7465435266494751, + "learning_rate": 0.00018832109737338757, + "loss": 2.7183, + "step": 3192 + }, + { + "epoch": 0.2576870309095311, + "grad_norm": 0.7619186043739319, + "learning_rate": 0.00018831369259855653, + "loss": 2.6833, + "step": 3193 + }, + { + "epoch": 0.25776773464611413, + "grad_norm": 0.7501961588859558, + "learning_rate": 0.0001883062856227174, + "loss": 2.725, + "step": 3194 + }, + { + "epoch": 0.2578484383826971, + "grad_norm": 0.7720133066177368, + "learning_rate": 0.00018829887644605483, + "loss": 2.7988, + "step": 3195 + }, + { + "epoch": 0.25792914211928014, + "grad_norm": 0.7253942489624023, + "learning_rate": 0.00018829146506875344, + "loss": 2.6999, + "step": 3196 + }, + { + "epoch": 0.2580098458558631, + "grad_norm": 0.7759599685668945, + "learning_rate": 0.00018828405149099792, + "loss": 2.6831, + "step": 3197 + }, + { + "epoch": 0.25809054959244615, + "grad_norm": 0.7250547409057617, + "learning_rate": 0.0001882766357129731, + "loss": 2.6742, + "step": 3198 + }, + { + "epoch": 0.2581712533290291, + "grad_norm": 0.7565183043479919, + "learning_rate": 0.00018826921773486372, + "loss": 2.6777, + "step": 3199 + }, + { + "epoch": 0.25825195706561216, + "grad_norm": 0.7183675169944763, + "learning_rate": 0.0001882617975568547, + "loss": 2.6743, + "step": 3200 + }, + { + "epoch": 0.25833266080219514, + "grad_norm": 0.7021663784980774, + "learning_rate": 0.00018825437517913098, + "loss": 2.727, + "step": 3201 + }, + { + "epoch": 0.25841336453877817, + "grad_norm": 0.7406932711601257, + "learning_rate": 0.00018824695060187753, + "loss": 2.7448, + "step": 3202 + }, + { + "epoch": 0.25849406827536114, + "grad_norm": 0.7766773104667664, + "learning_rate": 0.0001882395238252794, + "loss": 2.69, + "step": 3203 + }, + { + "epoch": 0.2585747720119442, + "grad_norm": 0.7483372688293457, + "learning_rate": 0.00018823209484952164, + "loss": 2.6611, + "step": 3204 + }, + { + "epoch": 0.25865547574852715, + "grad_norm": 0.781831681728363, + "learning_rate": 0.0001882246636747895, + "loss": 2.7292, + "step": 3205 + }, + { + "epoch": 0.2587361794851102, + "grad_norm": 0.7188203930854797, + "learning_rate": 0.00018821723030126806, + "loss": 2.718, + "step": 3206 + }, + { + "epoch": 0.25881688322169316, + "grad_norm": 0.7332054972648621, + "learning_rate": 0.00018820979472914263, + "loss": 2.6492, + "step": 3207 + }, + { + "epoch": 0.2588975869582762, + "grad_norm": 0.7044041156768799, + "learning_rate": 0.00018820235695859858, + "loss": 2.7047, + "step": 3208 + }, + { + "epoch": 0.25897829069485917, + "grad_norm": 0.8651862740516663, + "learning_rate": 0.00018819491698982121, + "loss": 2.6301, + "step": 3209 + }, + { + "epoch": 0.2590589944314422, + "grad_norm": 0.8118106126785278, + "learning_rate": 0.00018818747482299598, + "loss": 2.6522, + "step": 3210 + }, + { + "epoch": 0.2591396981680252, + "grad_norm": 0.7239218354225159, + "learning_rate": 0.00018818003045830832, + "loss": 2.7058, + "step": 3211 + }, + { + "epoch": 0.2592204019046082, + "grad_norm": 0.8557687997817993, + "learning_rate": 0.00018817258389594382, + "loss": 2.7125, + "step": 3212 + }, + { + "epoch": 0.2593011056411912, + "grad_norm": 0.7685148119926453, + "learning_rate": 0.00018816513513608801, + "loss": 2.7516, + "step": 3213 + }, + { + "epoch": 0.25938180937777416, + "grad_norm": 0.7497698664665222, + "learning_rate": 0.00018815768417892664, + "loss": 2.6536, + "step": 3214 + }, + { + "epoch": 0.2594625131143572, + "grad_norm": 0.7041923403739929, + "learning_rate": 0.0001881502310246453, + "loss": 2.7031, + "step": 3215 + }, + { + "epoch": 0.2595432168509402, + "grad_norm": 0.7815428376197815, + "learning_rate": 0.00018814277567342976, + "loss": 2.7291, + "step": 3216 + }, + { + "epoch": 0.2596239205875232, + "grad_norm": 0.7285065650939941, + "learning_rate": 0.00018813531812546583, + "loss": 2.7712, + "step": 3217 + }, + { + "epoch": 0.2597046243241062, + "grad_norm": 0.7606547474861145, + "learning_rate": 0.0001881278583809394, + "loss": 2.6714, + "step": 3218 + }, + { + "epoch": 0.2597853280606892, + "grad_norm": 0.7166680097579956, + "learning_rate": 0.00018812039644003638, + "loss": 2.7147, + "step": 3219 + }, + { + "epoch": 0.2598660317972722, + "grad_norm": 0.8977978229522705, + "learning_rate": 0.0001881129323029427, + "loss": 2.7743, + "step": 3220 + }, + { + "epoch": 0.2599467355338552, + "grad_norm": 0.7447277307510376, + "learning_rate": 0.00018810546596984446, + "loss": 2.7049, + "step": 3221 + }, + { + "epoch": 0.2600274392704382, + "grad_norm": 0.7343515157699585, + "learning_rate": 0.00018809799744092768, + "loss": 2.6999, + "step": 3222 + }, + { + "epoch": 0.26010814300702123, + "grad_norm": 0.7303341627120972, + "learning_rate": 0.00018809052671637852, + "loss": 2.7222, + "step": 3223 + }, + { + "epoch": 0.2601888467436042, + "grad_norm": 0.7412950396537781, + "learning_rate": 0.00018808305379638314, + "loss": 2.6957, + "step": 3224 + }, + { + "epoch": 0.26026955048018724, + "grad_norm": 0.7495343089103699, + "learning_rate": 0.00018807557868112781, + "loss": 2.7123, + "step": 3225 + }, + { + "epoch": 0.2603502542167702, + "grad_norm": 0.8137524724006653, + "learning_rate": 0.00018806810137079886, + "loss": 2.7191, + "step": 3226 + }, + { + "epoch": 0.26043095795335325, + "grad_norm": 0.786374568939209, + "learning_rate": 0.0001880606218655826, + "loss": 2.7237, + "step": 3227 + }, + { + "epoch": 0.2605116616899362, + "grad_norm": 0.9969484806060791, + "learning_rate": 0.00018805314016566543, + "loss": 2.7603, + "step": 3228 + }, + { + "epoch": 0.26059236542651926, + "grad_norm": 0.8132432103157043, + "learning_rate": 0.00018804565627123386, + "loss": 2.6807, + "step": 3229 + }, + { + "epoch": 0.26067306916310223, + "grad_norm": 0.7604904174804688, + "learning_rate": 0.00018803817018247436, + "loss": 2.7105, + "step": 3230 + }, + { + "epoch": 0.26075377289968527, + "grad_norm": 0.743505597114563, + "learning_rate": 0.00018803068189957354, + "loss": 2.7152, + "step": 3231 + }, + { + "epoch": 0.26083447663626824, + "grad_norm": 0.7780006527900696, + "learning_rate": 0.000188023191422718, + "loss": 2.7043, + "step": 3232 + }, + { + "epoch": 0.2609151803728513, + "grad_norm": 0.7683089375495911, + "learning_rate": 0.00018801569875209447, + "loss": 2.7033, + "step": 3233 + }, + { + "epoch": 0.26099588410943425, + "grad_norm": 0.7540118098258972, + "learning_rate": 0.0001880082038878896, + "loss": 2.7121, + "step": 3234 + }, + { + "epoch": 0.2610765878460173, + "grad_norm": 0.7509592771530151, + "learning_rate": 0.00018800070683029025, + "loss": 2.6575, + "step": 3235 + }, + { + "epoch": 0.26115729158260026, + "grad_norm": 0.8015461564064026, + "learning_rate": 0.00018799320757948327, + "loss": 2.6956, + "step": 3236 + }, + { + "epoch": 0.2612379953191833, + "grad_norm": 0.7586383819580078, + "learning_rate": 0.00018798570613565553, + "loss": 2.6719, + "step": 3237 + }, + { + "epoch": 0.26131869905576627, + "grad_norm": 0.7833155989646912, + "learning_rate": 0.000187978202498994, + "loss": 2.7317, + "step": 3238 + }, + { + "epoch": 0.2613994027923493, + "grad_norm": 0.7976018786430359, + "learning_rate": 0.00018797069666968565, + "loss": 2.7514, + "step": 3239 + }, + { + "epoch": 0.2614801065289323, + "grad_norm": 0.8388968706130981, + "learning_rate": 0.00018796318864791763, + "loss": 2.6845, + "step": 3240 + }, + { + "epoch": 0.2615608102655153, + "grad_norm": 0.8082842230796814, + "learning_rate": 0.00018795567843387701, + "loss": 2.7204, + "step": 3241 + }, + { + "epoch": 0.2616415140020983, + "grad_norm": 0.7514800429344177, + "learning_rate": 0.00018794816602775094, + "loss": 2.7117, + "step": 3242 + }, + { + "epoch": 0.2617222177386813, + "grad_norm": 0.8676564693450928, + "learning_rate": 0.00018794065142972664, + "loss": 2.6596, + "step": 3243 + }, + { + "epoch": 0.2618029214752643, + "grad_norm": 0.7449865341186523, + "learning_rate": 0.0001879331346399915, + "loss": 2.7089, + "step": 3244 + }, + { + "epoch": 0.2618836252118473, + "grad_norm": 0.8020811676979065, + "learning_rate": 0.00018792561565873274, + "loss": 2.7293, + "step": 3245 + }, + { + "epoch": 0.2619643289484303, + "grad_norm": 0.7961642146110535, + "learning_rate": 0.00018791809448613783, + "loss": 2.7269, + "step": 3246 + }, + { + "epoch": 0.26204503268501333, + "grad_norm": 0.7842351198196411, + "learning_rate": 0.00018791057112239415, + "loss": 2.6773, + "step": 3247 + }, + { + "epoch": 0.2621257364215963, + "grad_norm": 0.7494246959686279, + "learning_rate": 0.00018790304556768925, + "loss": 2.7317, + "step": 3248 + }, + { + "epoch": 0.26220644015817934, + "grad_norm": 0.7822836637496948, + "learning_rate": 0.0001878955178222107, + "loss": 2.6834, + "step": 3249 + }, + { + "epoch": 0.2622871438947623, + "grad_norm": 0.8432494401931763, + "learning_rate": 0.00018788798788614607, + "loss": 2.7048, + "step": 3250 + }, + { + "epoch": 0.26236784763134535, + "grad_norm": 0.9599446058273315, + "learning_rate": 0.000187880455759683, + "loss": 2.7793, + "step": 3251 + }, + { + "epoch": 0.26244855136792833, + "grad_norm": 0.8097226023674011, + "learning_rate": 0.00018787292144300928, + "loss": 2.7177, + "step": 3252 + }, + { + "epoch": 0.26252925510451136, + "grad_norm": 0.8423499464988708, + "learning_rate": 0.00018786538493631265, + "loss": 2.7265, + "step": 3253 + }, + { + "epoch": 0.26260995884109434, + "grad_norm": 0.7388847470283508, + "learning_rate": 0.00018785784623978095, + "loss": 2.6778, + "step": 3254 + }, + { + "epoch": 0.26269066257767737, + "grad_norm": 0.766368567943573, + "learning_rate": 0.0001878503053536021, + "loss": 2.654, + "step": 3255 + }, + { + "epoch": 0.26277136631426035, + "grad_norm": 0.8181266188621521, + "learning_rate": 0.00018784276227796394, + "loss": 2.7568, + "step": 3256 + }, + { + "epoch": 0.2628520700508434, + "grad_norm": 0.8235312104225159, + "learning_rate": 0.00018783521701305452, + "loss": 2.7317, + "step": 3257 + }, + { + "epoch": 0.26293277378742635, + "grad_norm": 0.7103183269500732, + "learning_rate": 0.00018782766955906195, + "loss": 2.6919, + "step": 3258 + }, + { + "epoch": 0.2630134775240094, + "grad_norm": 0.7202538251876831, + "learning_rate": 0.0001878201199161742, + "loss": 2.7179, + "step": 3259 + }, + { + "epoch": 0.26309418126059236, + "grad_norm": 0.8402286171913147, + "learning_rate": 0.00018781256808457952, + "loss": 2.7789, + "step": 3260 + }, + { + "epoch": 0.2631748849971754, + "grad_norm": 0.8136829137802124, + "learning_rate": 0.00018780501406446613, + "loss": 2.6872, + "step": 3261 + }, + { + "epoch": 0.26325558873375837, + "grad_norm": 0.8017000555992126, + "learning_rate": 0.00018779745785602224, + "loss": 2.7527, + "step": 3262 + }, + { + "epoch": 0.2633362924703414, + "grad_norm": 0.7880774140357971, + "learning_rate": 0.00018778989945943619, + "loss": 2.7348, + "step": 3263 + }, + { + "epoch": 0.2634169962069244, + "grad_norm": 0.7402438521385193, + "learning_rate": 0.00018778233887489635, + "loss": 2.6946, + "step": 3264 + }, + { + "epoch": 0.26349769994350736, + "grad_norm": 0.7450907230377197, + "learning_rate": 0.0001877747761025912, + "loss": 2.7502, + "step": 3265 + }, + { + "epoch": 0.2635784036800904, + "grad_norm": 0.7504056692123413, + "learning_rate": 0.00018776721114270917, + "loss": 2.832, + "step": 3266 + }, + { + "epoch": 0.26365910741667337, + "grad_norm": 0.7710226774215698, + "learning_rate": 0.00018775964399543878, + "loss": 2.6895, + "step": 3267 + }, + { + "epoch": 0.2637398111532564, + "grad_norm": 0.769927978515625, + "learning_rate": 0.00018775207466096867, + "loss": 2.6801, + "step": 3268 + }, + { + "epoch": 0.2638205148898394, + "grad_norm": 0.7210869193077087, + "learning_rate": 0.0001877445031394875, + "loss": 2.6966, + "step": 3269 + }, + { + "epoch": 0.2639012186264224, + "grad_norm": 0.7731119990348816, + "learning_rate": 0.00018773692943118393, + "loss": 2.6965, + "step": 3270 + }, + { + "epoch": 0.2639819223630054, + "grad_norm": 0.7539728283882141, + "learning_rate": 0.00018772935353624672, + "loss": 2.753, + "step": 3271 + }, + { + "epoch": 0.2640626260995884, + "grad_norm": 0.7993821501731873, + "learning_rate": 0.00018772177545486472, + "loss": 2.7177, + "step": 3272 + }, + { + "epoch": 0.2641433298361714, + "grad_norm": 0.7880005240440369, + "learning_rate": 0.00018771419518722672, + "loss": 2.6854, + "step": 3273 + }, + { + "epoch": 0.2642240335727544, + "grad_norm": 0.8079188466072083, + "learning_rate": 0.0001877066127335217, + "loss": 2.734, + "step": 3274 + }, + { + "epoch": 0.2643047373093374, + "grad_norm": 0.8241428732872009, + "learning_rate": 0.00018769902809393865, + "loss": 2.7156, + "step": 3275 + }, + { + "epoch": 0.26438544104592043, + "grad_norm": 0.8007158041000366, + "learning_rate": 0.00018769144126866657, + "loss": 2.693, + "step": 3276 + }, + { + "epoch": 0.2644661447825034, + "grad_norm": 0.8360451459884644, + "learning_rate": 0.00018768385225789456, + "loss": 2.6919, + "step": 3277 + }, + { + "epoch": 0.26454684851908644, + "grad_norm": 0.7596627473831177, + "learning_rate": 0.00018767626106181172, + "loss": 2.7861, + "step": 3278 + }, + { + "epoch": 0.2646275522556694, + "grad_norm": 0.7469248175621033, + "learning_rate": 0.00018766866768060727, + "loss": 2.7305, + "step": 3279 + }, + { + "epoch": 0.26470825599225245, + "grad_norm": 0.7103936076164246, + "learning_rate": 0.00018766107211447045, + "loss": 2.6456, + "step": 3280 + }, + { + "epoch": 0.2647889597288354, + "grad_norm": 0.7595266103744507, + "learning_rate": 0.00018765347436359056, + "loss": 2.7235, + "step": 3281 + }, + { + "epoch": 0.26486966346541846, + "grad_norm": 0.786648154258728, + "learning_rate": 0.00018764587442815698, + "loss": 2.7182, + "step": 3282 + }, + { + "epoch": 0.26495036720200144, + "grad_norm": 0.7152618169784546, + "learning_rate": 0.00018763827230835908, + "loss": 2.6842, + "step": 3283 + }, + { + "epoch": 0.26503107093858447, + "grad_norm": 0.89169842004776, + "learning_rate": 0.00018763066800438636, + "loss": 2.7661, + "step": 3284 + }, + { + "epoch": 0.26511177467516744, + "grad_norm": 0.8148171305656433, + "learning_rate": 0.00018762306151642833, + "loss": 2.7264, + "step": 3285 + }, + { + "epoch": 0.2651924784117505, + "grad_norm": 0.8070533871650696, + "learning_rate": 0.00018761545284467454, + "loss": 2.7425, + "step": 3286 + }, + { + "epoch": 0.26527318214833345, + "grad_norm": 0.8536118268966675, + "learning_rate": 0.00018760784198931465, + "loss": 2.702, + "step": 3287 + }, + { + "epoch": 0.2653538858849165, + "grad_norm": 0.7422329783439636, + "learning_rate": 0.00018760022895053833, + "loss": 2.6913, + "step": 3288 + }, + { + "epoch": 0.26543458962149946, + "grad_norm": 0.7415527105331421, + "learning_rate": 0.0001875926137285353, + "loss": 2.6472, + "step": 3289 + }, + { + "epoch": 0.2655152933580825, + "grad_norm": 0.8432031273841858, + "learning_rate": 0.00018758499632349538, + "loss": 2.7506, + "step": 3290 + }, + { + "epoch": 0.26559599709466547, + "grad_norm": 0.8113259077072144, + "learning_rate": 0.0001875773767356084, + "loss": 2.6866, + "step": 3291 + }, + { + "epoch": 0.2656767008312485, + "grad_norm": 0.7898122668266296, + "learning_rate": 0.00018756975496506424, + "loss": 2.6516, + "step": 3292 + }, + { + "epoch": 0.2657574045678315, + "grad_norm": 0.7627275586128235, + "learning_rate": 0.0001875621310120529, + "loss": 2.7065, + "step": 3293 + }, + { + "epoch": 0.2658381083044145, + "grad_norm": 0.8227291107177734, + "learning_rate": 0.00018755450487676435, + "loss": 2.7614, + "step": 3294 + }, + { + "epoch": 0.2659188120409975, + "grad_norm": 0.8162109851837158, + "learning_rate": 0.00018754687655938868, + "loss": 2.7924, + "step": 3295 + }, + { + "epoch": 0.2659995157775805, + "grad_norm": 0.7231846451759338, + "learning_rate": 0.00018753924606011602, + "loss": 2.7505, + "step": 3296 + }, + { + "epoch": 0.2660802195141635, + "grad_norm": 0.8635944724082947, + "learning_rate": 0.00018753161337913647, + "loss": 2.7505, + "step": 3297 + }, + { + "epoch": 0.26616092325074653, + "grad_norm": 0.8131890892982483, + "learning_rate": 0.00018752397851664031, + "loss": 2.7872, + "step": 3298 + }, + { + "epoch": 0.2662416269873295, + "grad_norm": 0.7336695790290833, + "learning_rate": 0.00018751634147281786, + "loss": 2.7517, + "step": 3299 + }, + { + "epoch": 0.26632233072391254, + "grad_norm": 0.7541754841804504, + "learning_rate": 0.00018750870224785939, + "loss": 2.7807, + "step": 3300 + }, + { + "epoch": 0.2664030344604955, + "grad_norm": 0.9347110390663147, + "learning_rate": 0.0001875010608419553, + "loss": 2.6954, + "step": 3301 + }, + { + "epoch": 0.26648373819707855, + "grad_norm": 0.7591213583946228, + "learning_rate": 0.00018749341725529604, + "loss": 2.7019, + "step": 3302 + }, + { + "epoch": 0.2665644419336615, + "grad_norm": 0.811527669429779, + "learning_rate": 0.00018748577148807211, + "loss": 2.7123, + "step": 3303 + }, + { + "epoch": 0.26664514567024455, + "grad_norm": 0.7419980764389038, + "learning_rate": 0.00018747812354047408, + "loss": 2.7383, + "step": 3304 + }, + { + "epoch": 0.26672584940682753, + "grad_norm": 0.7801192402839661, + "learning_rate": 0.00018747047341269256, + "loss": 2.7245, + "step": 3305 + }, + { + "epoch": 0.26680655314341056, + "grad_norm": 0.7392756938934326, + "learning_rate": 0.00018746282110491816, + "loss": 2.6992, + "step": 3306 + }, + { + "epoch": 0.26688725687999354, + "grad_norm": 0.7085927724838257, + "learning_rate": 0.00018745516661734161, + "loss": 2.739, + "step": 3307 + }, + { + "epoch": 0.26696796061657657, + "grad_norm": 0.7218676209449768, + "learning_rate": 0.00018744750995015373, + "loss": 2.7091, + "step": 3308 + }, + { + "epoch": 0.26704866435315955, + "grad_norm": 0.847872257232666, + "learning_rate": 0.0001874398511035453, + "loss": 2.699, + "step": 3309 + }, + { + "epoch": 0.2671293680897426, + "grad_norm": 0.8280770778656006, + "learning_rate": 0.00018743219007770723, + "loss": 2.763, + "step": 3310 + }, + { + "epoch": 0.26721007182632556, + "grad_norm": 0.7271165251731873, + "learning_rate": 0.0001874245268728304, + "loss": 2.7219, + "step": 3311 + }, + { + "epoch": 0.2672907755629086, + "grad_norm": 0.7342363595962524, + "learning_rate": 0.00018741686148910586, + "loss": 2.6765, + "step": 3312 + }, + { + "epoch": 0.26737147929949157, + "grad_norm": 0.7260174751281738, + "learning_rate": 0.0001874091939267246, + "loss": 2.7003, + "step": 3313 + }, + { + "epoch": 0.2674521830360746, + "grad_norm": 0.742494523525238, + "learning_rate": 0.00018740152418587775, + "loss": 2.7371, + "step": 3314 + }, + { + "epoch": 0.2675328867726576, + "grad_norm": 0.7238131165504456, + "learning_rate": 0.00018739385226675646, + "loss": 2.7486, + "step": 3315 + }, + { + "epoch": 0.26761359050924055, + "grad_norm": 0.7329363226890564, + "learning_rate": 0.0001873861781695519, + "loss": 2.6414, + "step": 3316 + }, + { + "epoch": 0.2676942942458236, + "grad_norm": 0.7078117728233337, + "learning_rate": 0.00018737850189445534, + "loss": 2.7271, + "step": 3317 + }, + { + "epoch": 0.26777499798240656, + "grad_norm": 0.7945309281349182, + "learning_rate": 0.00018737082344165814, + "loss": 2.7323, + "step": 3318 + }, + { + "epoch": 0.2678557017189896, + "grad_norm": 0.7510890364646912, + "learning_rate": 0.0001873631428113516, + "loss": 2.6563, + "step": 3319 + }, + { + "epoch": 0.26793640545557257, + "grad_norm": 0.7790820002555847, + "learning_rate": 0.0001873554600037272, + "loss": 2.7445, + "step": 3320 + }, + { + "epoch": 0.2680171091921556, + "grad_norm": 0.7689393162727356, + "learning_rate": 0.00018734777501897636, + "loss": 2.669, + "step": 3321 + }, + { + "epoch": 0.2680978129287386, + "grad_norm": 0.8227118253707886, + "learning_rate": 0.00018734008785729065, + "loss": 2.7279, + "step": 3322 + }, + { + "epoch": 0.2681785166653216, + "grad_norm": 0.7551290392875671, + "learning_rate": 0.00018733239851886162, + "loss": 2.6864, + "step": 3323 + }, + { + "epoch": 0.2682592204019046, + "grad_norm": 0.8572004437446594, + "learning_rate": 0.00018732470700388097, + "loss": 2.8159, + "step": 3324 + }, + { + "epoch": 0.2683399241384876, + "grad_norm": 0.7509044408798218, + "learning_rate": 0.00018731701331254033, + "loss": 2.7698, + "step": 3325 + }, + { + "epoch": 0.2684206278750706, + "grad_norm": 0.8474129438400269, + "learning_rate": 0.00018730931744503148, + "loss": 2.6745, + "step": 3326 + }, + { + "epoch": 0.2685013316116536, + "grad_norm": 0.8310953378677368, + "learning_rate": 0.00018730161940154618, + "loss": 2.712, + "step": 3327 + }, + { + "epoch": 0.2685820353482366, + "grad_norm": 0.8820717334747314, + "learning_rate": 0.00018729391918227632, + "loss": 2.7776, + "step": 3328 + }, + { + "epoch": 0.26866273908481964, + "grad_norm": 0.8827663064002991, + "learning_rate": 0.00018728621678741384, + "loss": 2.7115, + "step": 3329 + }, + { + "epoch": 0.2687434428214026, + "grad_norm": 0.7896323800086975, + "learning_rate": 0.00018727851221715064, + "loss": 2.6799, + "step": 3330 + }, + { + "epoch": 0.26882414655798564, + "grad_norm": 0.7775614261627197, + "learning_rate": 0.0001872708054716788, + "loss": 2.7021, + "step": 3331 + }, + { + "epoch": 0.2689048502945686, + "grad_norm": 0.8150187134742737, + "learning_rate": 0.0001872630965511903, + "loss": 2.679, + "step": 3332 + }, + { + "epoch": 0.26898555403115165, + "grad_norm": 0.7821844220161438, + "learning_rate": 0.00018725538545587736, + "loss": 2.7067, + "step": 3333 + }, + { + "epoch": 0.26906625776773463, + "grad_norm": 0.8390234112739563, + "learning_rate": 0.00018724767218593216, + "loss": 2.7133, + "step": 3334 + }, + { + "epoch": 0.26914696150431766, + "grad_norm": 0.8150694370269775, + "learning_rate": 0.00018723995674154687, + "loss": 2.7022, + "step": 3335 + }, + { + "epoch": 0.26922766524090064, + "grad_norm": 0.7473872900009155, + "learning_rate": 0.0001872322391229138, + "loss": 2.7268, + "step": 3336 + }, + { + "epoch": 0.26930836897748367, + "grad_norm": 0.7591951489448547, + "learning_rate": 0.0001872245193302253, + "loss": 2.7516, + "step": 3337 + }, + { + "epoch": 0.26938907271406665, + "grad_norm": 0.7914662957191467, + "learning_rate": 0.00018721679736367382, + "loss": 2.6613, + "step": 3338 + }, + { + "epoch": 0.2694697764506497, + "grad_norm": 0.7823428511619568, + "learning_rate": 0.00018720907322345172, + "loss": 2.6661, + "step": 3339 + }, + { + "epoch": 0.26955048018723266, + "grad_norm": 0.8428264260292053, + "learning_rate": 0.00018720134690975156, + "loss": 2.672, + "step": 3340 + }, + { + "epoch": 0.2696311839238157, + "grad_norm": 0.71320641040802, + "learning_rate": 0.00018719361842276587, + "loss": 2.7326, + "step": 3341 + }, + { + "epoch": 0.26971188766039866, + "grad_norm": 0.7972821593284607, + "learning_rate": 0.00018718588776268731, + "loss": 2.7182, + "step": 3342 + }, + { + "epoch": 0.2697925913969817, + "grad_norm": 0.7924500107765198, + "learning_rate": 0.0001871781549297085, + "loss": 2.7308, + "step": 3343 + }, + { + "epoch": 0.2698732951335647, + "grad_norm": 0.7668356895446777, + "learning_rate": 0.0001871704199240222, + "loss": 2.678, + "step": 3344 + }, + { + "epoch": 0.2699539988701477, + "grad_norm": 0.866973876953125, + "learning_rate": 0.00018716268274582114, + "loss": 2.7802, + "step": 3345 + }, + { + "epoch": 0.2700347026067307, + "grad_norm": 0.7709557414054871, + "learning_rate": 0.0001871549433952982, + "loss": 2.7418, + "step": 3346 + }, + { + "epoch": 0.2701154063433137, + "grad_norm": 0.7707573771476746, + "learning_rate": 0.00018714720187264626, + "loss": 2.7486, + "step": 3347 + }, + { + "epoch": 0.2701961100798967, + "grad_norm": 0.8007768392562866, + "learning_rate": 0.00018713945817805822, + "loss": 2.7106, + "step": 3348 + }, + { + "epoch": 0.2702768138164797, + "grad_norm": 0.7239583134651184, + "learning_rate": 0.0001871317123117271, + "loss": 2.7209, + "step": 3349 + }, + { + "epoch": 0.2703575175530627, + "grad_norm": 0.775104820728302, + "learning_rate": 0.00018712396427384594, + "loss": 2.6503, + "step": 3350 + }, + { + "epoch": 0.27043822128964573, + "grad_norm": 0.7492741346359253, + "learning_rate": 0.0001871162140646079, + "loss": 2.699, + "step": 3351 + }, + { + "epoch": 0.2705189250262287, + "grad_norm": 0.7550846338272095, + "learning_rate": 0.00018710846168420604, + "loss": 2.7458, + "step": 3352 + }, + { + "epoch": 0.27059962876281174, + "grad_norm": 0.807996928691864, + "learning_rate": 0.0001871007071328336, + "loss": 2.7604, + "step": 3353 + }, + { + "epoch": 0.2706803324993947, + "grad_norm": 0.7381845116615295, + "learning_rate": 0.00018709295041068386, + "loss": 2.6833, + "step": 3354 + }, + { + "epoch": 0.27076103623597775, + "grad_norm": 0.7542420625686646, + "learning_rate": 0.00018708519151795016, + "loss": 2.6462, + "step": 3355 + }, + { + "epoch": 0.2708417399725607, + "grad_norm": 0.7675846219062805, + "learning_rate": 0.00018707743045482582, + "loss": 2.7068, + "step": 3356 + }, + { + "epoch": 0.27092244370914376, + "grad_norm": 0.7437357902526855, + "learning_rate": 0.0001870696672215043, + "loss": 2.73, + "step": 3357 + }, + { + "epoch": 0.27100314744572673, + "grad_norm": 0.7880852222442627, + "learning_rate": 0.00018706190181817903, + "loss": 2.759, + "step": 3358 + }, + { + "epoch": 0.27108385118230977, + "grad_norm": 0.7403178811073303, + "learning_rate": 0.00018705413424504363, + "loss": 2.7538, + "step": 3359 + }, + { + "epoch": 0.27116455491889274, + "grad_norm": 0.7601225972175598, + "learning_rate": 0.00018704636450229164, + "loss": 2.7331, + "step": 3360 + }, + { + "epoch": 0.2712452586554758, + "grad_norm": 0.7810701727867126, + "learning_rate": 0.0001870385925901167, + "loss": 2.7736, + "step": 3361 + }, + { + "epoch": 0.27132596239205875, + "grad_norm": 0.8934530019760132, + "learning_rate": 0.0001870308185087125, + "loss": 2.7214, + "step": 3362 + }, + { + "epoch": 0.2714066661286418, + "grad_norm": 0.7468441128730774, + "learning_rate": 0.0001870230422582728, + "loss": 2.6957, + "step": 3363 + }, + { + "epoch": 0.27148736986522476, + "grad_norm": 0.7643293142318726, + "learning_rate": 0.00018701526383899144, + "loss": 2.6773, + "step": 3364 + }, + { + "epoch": 0.2715680736018078, + "grad_norm": 0.7602033615112305, + "learning_rate": 0.0001870074832510622, + "loss": 2.7095, + "step": 3365 + }, + { + "epoch": 0.27164877733839077, + "grad_norm": 0.772065281867981, + "learning_rate": 0.00018699970049467908, + "loss": 2.6753, + "step": 3366 + }, + { + "epoch": 0.27172948107497374, + "grad_norm": 0.7718359231948853, + "learning_rate": 0.00018699191557003598, + "loss": 2.6857, + "step": 3367 + }, + { + "epoch": 0.2718101848115568, + "grad_norm": 0.8207093477249146, + "learning_rate": 0.00018698412847732693, + "loss": 2.7549, + "step": 3368 + }, + { + "epoch": 0.27189088854813975, + "grad_norm": 0.7393590807914734, + "learning_rate": 0.00018697633921674605, + "loss": 2.6884, + "step": 3369 + }, + { + "epoch": 0.2719715922847228, + "grad_norm": 0.7955869436264038, + "learning_rate": 0.0001869685477884874, + "loss": 2.708, + "step": 3370 + }, + { + "epoch": 0.27205229602130576, + "grad_norm": 0.7392188906669617, + "learning_rate": 0.00018696075419274527, + "loss": 2.717, + "step": 3371 + }, + { + "epoch": 0.2721329997578888, + "grad_norm": 0.800204873085022, + "learning_rate": 0.00018695295842971376, + "loss": 2.7184, + "step": 3372 + }, + { + "epoch": 0.27221370349447177, + "grad_norm": 0.8195740580558777, + "learning_rate": 0.00018694516049958725, + "loss": 2.6865, + "step": 3373 + }, + { + "epoch": 0.2722944072310548, + "grad_norm": 0.8617578148841858, + "learning_rate": 0.00018693736040256007, + "loss": 2.7098, + "step": 3374 + }, + { + "epoch": 0.2723751109676378, + "grad_norm": 0.8184413909912109, + "learning_rate": 0.00018692955813882662, + "loss": 2.7449, + "step": 3375 + }, + { + "epoch": 0.2724558147042208, + "grad_norm": 0.990275502204895, + "learning_rate": 0.00018692175370858133, + "loss": 2.7891, + "step": 3376 + }, + { + "epoch": 0.2725365184408038, + "grad_norm": 0.7857810854911804, + "learning_rate": 0.0001869139471120187, + "loss": 2.6884, + "step": 3377 + }, + { + "epoch": 0.2726172221773868, + "grad_norm": 0.8040915131568909, + "learning_rate": 0.00018690613834933335, + "loss": 2.7047, + "step": 3378 + }, + { + "epoch": 0.2726979259139698, + "grad_norm": 0.7512348294258118, + "learning_rate": 0.00018689832742071983, + "loss": 2.6898, + "step": 3379 + }, + { + "epoch": 0.27277862965055283, + "grad_norm": 0.6781859397888184, + "learning_rate": 0.00018689051432637288, + "loss": 2.6396, + "step": 3380 + }, + { + "epoch": 0.2728593333871358, + "grad_norm": 0.7858247756958008, + "learning_rate": 0.00018688269906648716, + "loss": 2.6785, + "step": 3381 + }, + { + "epoch": 0.27294003712371884, + "grad_norm": 0.7342140674591064, + "learning_rate": 0.00018687488164125744, + "loss": 2.6778, + "step": 3382 + }, + { + "epoch": 0.2730207408603018, + "grad_norm": 0.8113372921943665, + "learning_rate": 0.00018686706205087858, + "loss": 2.6982, + "step": 3383 + }, + { + "epoch": 0.27310144459688485, + "grad_norm": 0.7904205918312073, + "learning_rate": 0.0001868592402955455, + "loss": 2.7891, + "step": 3384 + }, + { + "epoch": 0.2731821483334678, + "grad_norm": 0.7274135947227478, + "learning_rate": 0.00018685141637545308, + "loss": 2.6908, + "step": 3385 + }, + { + "epoch": 0.27326285207005085, + "grad_norm": 0.7675744295120239, + "learning_rate": 0.0001868435902907963, + "loss": 2.6987, + "step": 3386 + }, + { + "epoch": 0.27334355580663383, + "grad_norm": 0.8085030913352966, + "learning_rate": 0.00018683576204177026, + "loss": 2.7798, + "step": 3387 + }, + { + "epoch": 0.27342425954321686, + "grad_norm": 0.7498135566711426, + "learning_rate": 0.00018682793162857006, + "loss": 2.7216, + "step": 3388 + }, + { + "epoch": 0.27350496327979984, + "grad_norm": 0.900741696357727, + "learning_rate": 0.0001868200990513908, + "loss": 2.6871, + "step": 3389 + }, + { + "epoch": 0.27358566701638287, + "grad_norm": 0.7948571443557739, + "learning_rate": 0.00018681226431042772, + "loss": 2.6985, + "step": 3390 + }, + { + "epoch": 0.27366637075296585, + "grad_norm": 0.8739100098609924, + "learning_rate": 0.00018680442740587612, + "loss": 2.6922, + "step": 3391 + }, + { + "epoch": 0.2737470744895489, + "grad_norm": 0.730084240436554, + "learning_rate": 0.00018679658833793125, + "loss": 2.7029, + "step": 3392 + }, + { + "epoch": 0.27382777822613186, + "grad_norm": 0.7560603022575378, + "learning_rate": 0.00018678874710678853, + "loss": 2.7429, + "step": 3393 + }, + { + "epoch": 0.2739084819627149, + "grad_norm": 0.8331460356712341, + "learning_rate": 0.00018678090371264334, + "loss": 2.7157, + "step": 3394 + }, + { + "epoch": 0.27398918569929787, + "grad_norm": 0.8070168495178223, + "learning_rate": 0.00018677305815569122, + "loss": 2.7629, + "step": 3395 + }, + { + "epoch": 0.2740698894358809, + "grad_norm": 0.7922534346580505, + "learning_rate": 0.00018676521043612762, + "loss": 2.7159, + "step": 3396 + }, + { + "epoch": 0.2741505931724639, + "grad_norm": 0.7838901281356812, + "learning_rate": 0.0001867573605541482, + "loss": 2.6721, + "step": 3397 + }, + { + "epoch": 0.2742312969090469, + "grad_norm": 0.8912512063980103, + "learning_rate": 0.00018674950850994856, + "loss": 2.7243, + "step": 3398 + }, + { + "epoch": 0.2743120006456299, + "grad_norm": 0.7205448150634766, + "learning_rate": 0.0001867416543037244, + "loss": 2.7152, + "step": 3399 + }, + { + "epoch": 0.2743927043822129, + "grad_norm": 0.6992877721786499, + "learning_rate": 0.00018673379793567146, + "loss": 2.7183, + "step": 3400 + }, + { + "epoch": 0.2744734081187959, + "grad_norm": 0.8009448051452637, + "learning_rate": 0.00018672593940598556, + "loss": 2.715, + "step": 3401 + }, + { + "epoch": 0.2745541118553789, + "grad_norm": 0.7812647819519043, + "learning_rate": 0.0001867180787148626, + "loss": 2.7579, + "step": 3402 + }, + { + "epoch": 0.2746348155919619, + "grad_norm": 0.7300555109977722, + "learning_rate": 0.00018671021586249835, + "loss": 2.694, + "step": 3403 + }, + { + "epoch": 0.27471551932854493, + "grad_norm": 0.8082736134529114, + "learning_rate": 0.00018670235084908887, + "loss": 2.768, + "step": 3404 + }, + { + "epoch": 0.2747962230651279, + "grad_norm": 0.7729581594467163, + "learning_rate": 0.0001866944836748302, + "loss": 2.7256, + "step": 3405 + }, + { + "epoch": 0.27487692680171094, + "grad_norm": 0.8113458752632141, + "learning_rate": 0.00018668661433991835, + "loss": 2.6692, + "step": 3406 + }, + { + "epoch": 0.2749576305382939, + "grad_norm": 0.7757337689399719, + "learning_rate": 0.00018667874284454948, + "loss": 2.6769, + "step": 3407 + }, + { + "epoch": 0.27503833427487695, + "grad_norm": 0.7896093726158142, + "learning_rate": 0.00018667086918891976, + "loss": 2.7118, + "step": 3408 + }, + { + "epoch": 0.2751190380114599, + "grad_norm": 0.7764071822166443, + "learning_rate": 0.00018666299337322543, + "loss": 2.7284, + "step": 3409 + }, + { + "epoch": 0.27519974174804296, + "grad_norm": 0.794815182685852, + "learning_rate": 0.00018665511539766273, + "loss": 2.7232, + "step": 3410 + }, + { + "epoch": 0.27528044548462594, + "grad_norm": 0.8134122490882874, + "learning_rate": 0.0001866472352624281, + "loss": 2.7023, + "step": 3411 + }, + { + "epoch": 0.27536114922120897, + "grad_norm": 0.7654025554656982, + "learning_rate": 0.00018663935296771782, + "loss": 2.7002, + "step": 3412 + }, + { + "epoch": 0.27544185295779194, + "grad_norm": 0.6930806636810303, + "learning_rate": 0.0001866314685137284, + "loss": 2.6764, + "step": 3413 + }, + { + "epoch": 0.275522556694375, + "grad_norm": 0.7535184621810913, + "learning_rate": 0.00018662358190065631, + "loss": 2.6657, + "step": 3414 + }, + { + "epoch": 0.27560326043095795, + "grad_norm": 0.7775620818138123, + "learning_rate": 0.00018661569312869816, + "loss": 2.6931, + "step": 3415 + }, + { + "epoch": 0.275683964167541, + "grad_norm": 0.7209072113037109, + "learning_rate": 0.00018660780219805048, + "loss": 2.7293, + "step": 3416 + }, + { + "epoch": 0.27576466790412396, + "grad_norm": 0.7182055711746216, + "learning_rate": 0.00018659990910891, + "loss": 2.6561, + "step": 3417 + }, + { + "epoch": 0.27584537164070694, + "grad_norm": 0.7130969166755676, + "learning_rate": 0.00018659201386147338, + "loss": 2.7156, + "step": 3418 + }, + { + "epoch": 0.27592607537728997, + "grad_norm": 0.7296265959739685, + "learning_rate": 0.00018658411645593745, + "loss": 2.6894, + "step": 3419 + }, + { + "epoch": 0.27600677911387295, + "grad_norm": 0.7707972526550293, + "learning_rate": 0.000186576216892499, + "loss": 2.7528, + "step": 3420 + }, + { + "epoch": 0.276087482850456, + "grad_norm": 0.6945170164108276, + "learning_rate": 0.0001865683151713549, + "loss": 2.6762, + "step": 3421 + }, + { + "epoch": 0.27616818658703896, + "grad_norm": 0.7664114236831665, + "learning_rate": 0.0001865604112927021, + "loss": 2.7212, + "step": 3422 + }, + { + "epoch": 0.276248890323622, + "grad_norm": 0.6950399875640869, + "learning_rate": 0.0001865525052567376, + "loss": 2.7035, + "step": 3423 + }, + { + "epoch": 0.27632959406020496, + "grad_norm": 0.7307506799697876, + "learning_rate": 0.00018654459706365838, + "loss": 2.7296, + "step": 3424 + }, + { + "epoch": 0.276410297796788, + "grad_norm": 0.720912516117096, + "learning_rate": 0.0001865366867136616, + "loss": 2.6884, + "step": 3425 + }, + { + "epoch": 0.276491001533371, + "grad_norm": 0.7581072449684143, + "learning_rate": 0.00018652877420694436, + "loss": 2.705, + "step": 3426 + }, + { + "epoch": 0.276571705269954, + "grad_norm": 0.7473136186599731, + "learning_rate": 0.0001865208595437039, + "loss": 2.7316, + "step": 3427 + }, + { + "epoch": 0.276652409006537, + "grad_norm": 0.7272855639457703, + "learning_rate": 0.00018651294272413745, + "loss": 2.6834, + "step": 3428 + }, + { + "epoch": 0.27673311274312, + "grad_norm": 0.7046366930007935, + "learning_rate": 0.0001865050237484423, + "loss": 2.6491, + "step": 3429 + }, + { + "epoch": 0.276813816479703, + "grad_norm": 0.7521376609802246, + "learning_rate": 0.00018649710261681586, + "loss": 2.708, + "step": 3430 + }, + { + "epoch": 0.276894520216286, + "grad_norm": 0.7372453808784485, + "learning_rate": 0.0001864891793294555, + "loss": 2.682, + "step": 3431 + }, + { + "epoch": 0.276975223952869, + "grad_norm": 0.7381749749183655, + "learning_rate": 0.0001864812538865587, + "loss": 2.7526, + "step": 3432 + }, + { + "epoch": 0.27705592768945203, + "grad_norm": 0.7891514301300049, + "learning_rate": 0.00018647332628832298, + "loss": 2.6904, + "step": 3433 + }, + { + "epoch": 0.277136631426035, + "grad_norm": 0.7942724823951721, + "learning_rate": 0.00018646539653494596, + "loss": 2.7873, + "step": 3434 + }, + { + "epoch": 0.27721733516261804, + "grad_norm": 0.7365398406982422, + "learning_rate": 0.0001864574646266252, + "loss": 2.6684, + "step": 3435 + }, + { + "epoch": 0.277298038899201, + "grad_norm": 0.7802249193191528, + "learning_rate": 0.00018644953056355846, + "loss": 2.7152, + "step": 3436 + }, + { + "epoch": 0.27737874263578405, + "grad_norm": 0.7801448106765747, + "learning_rate": 0.0001864415943459434, + "loss": 2.7034, + "step": 3437 + }, + { + "epoch": 0.277459446372367, + "grad_norm": 0.7722738981246948, + "learning_rate": 0.00018643365597397786, + "loss": 2.7135, + "step": 3438 + }, + { + "epoch": 0.27754015010895006, + "grad_norm": 0.7847445011138916, + "learning_rate": 0.00018642571544785967, + "loss": 2.6999, + "step": 3439 + }, + { + "epoch": 0.27762085384553303, + "grad_norm": 0.7226125597953796, + "learning_rate": 0.00018641777276778675, + "loss": 2.7613, + "step": 3440 + }, + { + "epoch": 0.27770155758211607, + "grad_norm": 0.713188111782074, + "learning_rate": 0.000186409827933957, + "loss": 2.6953, + "step": 3441 + }, + { + "epoch": 0.27778226131869904, + "grad_norm": 0.7308298349380493, + "learning_rate": 0.0001864018809465685, + "loss": 2.7045, + "step": 3442 + }, + { + "epoch": 0.2778629650552821, + "grad_norm": 0.7606719732284546, + "learning_rate": 0.00018639393180581925, + "loss": 2.7883, + "step": 3443 + }, + { + "epoch": 0.27794366879186505, + "grad_norm": 0.7583296895027161, + "learning_rate": 0.00018638598051190738, + "loss": 2.6734, + "step": 3444 + }, + { + "epoch": 0.2780243725284481, + "grad_norm": 0.7147012948989868, + "learning_rate": 0.00018637802706503108, + "loss": 2.7223, + "step": 3445 + }, + { + "epoch": 0.27810507626503106, + "grad_norm": 0.7812997102737427, + "learning_rate": 0.00018637007146538853, + "loss": 2.7277, + "step": 3446 + }, + { + "epoch": 0.2781857800016141, + "grad_norm": 0.7460772395133972, + "learning_rate": 0.000186362113713178, + "loss": 2.6875, + "step": 3447 + }, + { + "epoch": 0.27826648373819707, + "grad_norm": 0.7359143495559692, + "learning_rate": 0.0001863541538085979, + "loss": 2.7122, + "step": 3448 + }, + { + "epoch": 0.2783471874747801, + "grad_norm": 0.7122978568077087, + "learning_rate": 0.00018634619175184655, + "loss": 2.6381, + "step": 3449 + }, + { + "epoch": 0.2784278912113631, + "grad_norm": 0.6965885758399963, + "learning_rate": 0.00018633822754312234, + "loss": 2.6957, + "step": 3450 + }, + { + "epoch": 0.2785085949479461, + "grad_norm": 0.7737082242965698, + "learning_rate": 0.00018633026118262385, + "loss": 2.7579, + "step": 3451 + }, + { + "epoch": 0.2785892986845291, + "grad_norm": 0.6925420165061951, + "learning_rate": 0.00018632229267054958, + "loss": 2.6226, + "step": 3452 + }, + { + "epoch": 0.2786700024211121, + "grad_norm": 0.7496356964111328, + "learning_rate": 0.0001863143220070981, + "loss": 2.7059, + "step": 3453 + }, + { + "epoch": 0.2787507061576951, + "grad_norm": 0.7066817283630371, + "learning_rate": 0.0001863063491924681, + "loss": 2.681, + "step": 3454 + }, + { + "epoch": 0.2788314098942781, + "grad_norm": 0.8143237829208374, + "learning_rate": 0.0001862983742268583, + "loss": 2.6698, + "step": 3455 + }, + { + "epoch": 0.2789121136308611, + "grad_norm": 0.7518483996391296, + "learning_rate": 0.00018629039711046737, + "loss": 2.7041, + "step": 3456 + }, + { + "epoch": 0.27899281736744413, + "grad_norm": 0.8756366968154907, + "learning_rate": 0.00018628241784349422, + "loss": 2.7547, + "step": 3457 + }, + { + "epoch": 0.2790735211040271, + "grad_norm": 0.8709446787834167, + "learning_rate": 0.0001862744364261377, + "loss": 2.7068, + "step": 3458 + }, + { + "epoch": 0.27915422484061014, + "grad_norm": 0.8121913075447083, + "learning_rate": 0.00018626645285859666, + "loss": 2.673, + "step": 3459 + }, + { + "epoch": 0.2792349285771931, + "grad_norm": 0.7685909271240234, + "learning_rate": 0.00018625846714107012, + "loss": 2.7389, + "step": 3460 + }, + { + "epoch": 0.27931563231377615, + "grad_norm": 0.7098073363304138, + "learning_rate": 0.0001862504792737571, + "loss": 2.6942, + "step": 3461 + }, + { + "epoch": 0.27939633605035913, + "grad_norm": 0.7718049883842468, + "learning_rate": 0.00018624248925685666, + "loss": 2.7359, + "step": 3462 + }, + { + "epoch": 0.27947703978694216, + "grad_norm": 0.7912909984588623, + "learning_rate": 0.00018623449709056797, + "loss": 2.6658, + "step": 3463 + }, + { + "epoch": 0.27955774352352514, + "grad_norm": 0.7255454659461975, + "learning_rate": 0.0001862265027750902, + "loss": 2.771, + "step": 3464 + }, + { + "epoch": 0.27963844726010817, + "grad_norm": 0.7542218565940857, + "learning_rate": 0.00018621850631062254, + "loss": 2.6741, + "step": 3465 + }, + { + "epoch": 0.27971915099669115, + "grad_norm": 0.8386052846908569, + "learning_rate": 0.00018621050769736437, + "loss": 2.67, + "step": 3466 + }, + { + "epoch": 0.2797998547332742, + "grad_norm": 0.8563781976699829, + "learning_rate": 0.00018620250693551495, + "loss": 2.7461, + "step": 3467 + }, + { + "epoch": 0.27988055846985715, + "grad_norm": 0.7490699291229248, + "learning_rate": 0.00018619450402527376, + "loss": 2.6863, + "step": 3468 + }, + { + "epoch": 0.27996126220644013, + "grad_norm": 0.8008999824523926, + "learning_rate": 0.00018618649896684017, + "loss": 2.7769, + "step": 3469 + }, + { + "epoch": 0.28004196594302316, + "grad_norm": 0.7678235769271851, + "learning_rate": 0.00018617849176041378, + "loss": 2.7237, + "step": 3470 + }, + { + "epoch": 0.28012266967960614, + "grad_norm": 0.8774877786636353, + "learning_rate": 0.00018617048240619408, + "loss": 2.7502, + "step": 3471 + }, + { + "epoch": 0.28020337341618917, + "grad_norm": 0.8150283098220825, + "learning_rate": 0.00018616247090438073, + "loss": 2.6941, + "step": 3472 + }, + { + "epoch": 0.28028407715277215, + "grad_norm": 0.7330089807510376, + "learning_rate": 0.00018615445725517332, + "loss": 2.7002, + "step": 3473 + }, + { + "epoch": 0.2803647808893552, + "grad_norm": 0.748275101184845, + "learning_rate": 0.00018614644145877168, + "loss": 2.6996, + "step": 3474 + }, + { + "epoch": 0.28044548462593816, + "grad_norm": 0.7718296647071838, + "learning_rate": 0.0001861384235153755, + "loss": 2.7333, + "step": 3475 + }, + { + "epoch": 0.2805261883625212, + "grad_norm": 0.7751123309135437, + "learning_rate": 0.00018613040342518465, + "loss": 2.7362, + "step": 3476 + }, + { + "epoch": 0.28060689209910417, + "grad_norm": 0.70979243516922, + "learning_rate": 0.000186122381188399, + "loss": 2.6651, + "step": 3477 + }, + { + "epoch": 0.2806875958356872, + "grad_norm": 0.9607138633728027, + "learning_rate": 0.00018611435680521848, + "loss": 2.7779, + "step": 3478 + }, + { + "epoch": 0.2807682995722702, + "grad_norm": 0.709671676158905, + "learning_rate": 0.0001861063302758431, + "loss": 2.6994, + "step": 3479 + }, + { + "epoch": 0.2808490033088532, + "grad_norm": 0.8765757083892822, + "learning_rate": 0.00018609830160047283, + "loss": 2.7107, + "step": 3480 + }, + { + "epoch": 0.2809297070454362, + "grad_norm": 0.7996764183044434, + "learning_rate": 0.0001860902707793079, + "loss": 2.7921, + "step": 3481 + }, + { + "epoch": 0.2810104107820192, + "grad_norm": 0.7094513177871704, + "learning_rate": 0.0001860822378125483, + "loss": 2.7211, + "step": 3482 + }, + { + "epoch": 0.2810911145186022, + "grad_norm": 0.8068607449531555, + "learning_rate": 0.0001860742027003944, + "loss": 2.675, + "step": 3483 + }, + { + "epoch": 0.2811718182551852, + "grad_norm": 0.7737938165664673, + "learning_rate": 0.00018606616544304628, + "loss": 2.7538, + "step": 3484 + }, + { + "epoch": 0.2812525219917682, + "grad_norm": 0.7979975342750549, + "learning_rate": 0.0001860581260407044, + "loss": 2.7894, + "step": 3485 + }, + { + "epoch": 0.28133322572835123, + "grad_norm": 0.7671655416488647, + "learning_rate": 0.00018605008449356904, + "loss": 2.7097, + "step": 3486 + }, + { + "epoch": 0.2814139294649342, + "grad_norm": 0.7284159064292908, + "learning_rate": 0.00018604204080184062, + "loss": 2.7447, + "step": 3487 + }, + { + "epoch": 0.28149463320151724, + "grad_norm": 0.7425351142883301, + "learning_rate": 0.00018603399496571968, + "loss": 2.7302, + "step": 3488 + }, + { + "epoch": 0.2815753369381002, + "grad_norm": 0.7709810733795166, + "learning_rate": 0.00018602594698540663, + "loss": 2.6979, + "step": 3489 + }, + { + "epoch": 0.28165604067468325, + "grad_norm": 0.744628369808197, + "learning_rate": 0.00018601789686110214, + "loss": 2.7279, + "step": 3490 + }, + { + "epoch": 0.2817367444112662, + "grad_norm": 0.7679976224899292, + "learning_rate": 0.00018600984459300678, + "loss": 2.6862, + "step": 3491 + }, + { + "epoch": 0.28181744814784926, + "grad_norm": 0.7923497557640076, + "learning_rate": 0.0001860017901813213, + "loss": 2.6975, + "step": 3492 + }, + { + "epoch": 0.28189815188443224, + "grad_norm": 0.7896692156791687, + "learning_rate": 0.00018599373362624636, + "loss": 2.7052, + "step": 3493 + }, + { + "epoch": 0.28197885562101527, + "grad_norm": 0.7913276553153992, + "learning_rate": 0.00018598567492798284, + "loss": 2.7233, + "step": 3494 + }, + { + "epoch": 0.28205955935759824, + "grad_norm": 0.7385257482528687, + "learning_rate": 0.00018597761408673146, + "loss": 2.7616, + "step": 3495 + }, + { + "epoch": 0.2821402630941813, + "grad_norm": 0.7181909084320068, + "learning_rate": 0.00018596955110269323, + "loss": 2.718, + "step": 3496 + }, + { + "epoch": 0.28222096683076425, + "grad_norm": 0.8313151597976685, + "learning_rate": 0.00018596148597606907, + "loss": 2.6775, + "step": 3497 + }, + { + "epoch": 0.2823016705673473, + "grad_norm": 0.7235481142997742, + "learning_rate": 0.00018595341870705995, + "loss": 2.7085, + "step": 3498 + }, + { + "epoch": 0.28238237430393026, + "grad_norm": 0.7092145085334778, + "learning_rate": 0.00018594534929586697, + "loss": 2.7167, + "step": 3499 + }, + { + "epoch": 0.2824630780405133, + "grad_norm": 0.7929207682609558, + "learning_rate": 0.0001859372777426912, + "loss": 2.663, + "step": 3500 + }, + { + "epoch": 0.28254378177709627, + "grad_norm": 0.7488871216773987, + "learning_rate": 0.00018592920404773383, + "loss": 2.7911, + "step": 3501 + }, + { + "epoch": 0.2826244855136793, + "grad_norm": 0.8230419158935547, + "learning_rate": 0.0001859211282111961, + "loss": 2.754, + "step": 3502 + }, + { + "epoch": 0.2827051892502623, + "grad_norm": 0.731971025466919, + "learning_rate": 0.00018591305023327924, + "loss": 2.7142, + "step": 3503 + }, + { + "epoch": 0.2827858929868453, + "grad_norm": 0.8159881234169006, + "learning_rate": 0.00018590497011418457, + "loss": 2.7046, + "step": 3504 + }, + { + "epoch": 0.2828665967234283, + "grad_norm": 0.750266432762146, + "learning_rate": 0.0001858968878541135, + "loss": 2.6951, + "step": 3505 + }, + { + "epoch": 0.2829473004600113, + "grad_norm": 0.7750049233436584, + "learning_rate": 0.00018588880345326748, + "loss": 2.6958, + "step": 3506 + }, + { + "epoch": 0.2830280041965943, + "grad_norm": 0.8559218049049377, + "learning_rate": 0.00018588071691184795, + "loss": 2.7205, + "step": 3507 + }, + { + "epoch": 0.28310870793317733, + "grad_norm": 0.7334830164909363, + "learning_rate": 0.00018587262823005642, + "loss": 2.7134, + "step": 3508 + }, + { + "epoch": 0.2831894116697603, + "grad_norm": 0.8749497532844543, + "learning_rate": 0.00018586453740809456, + "loss": 2.6811, + "step": 3509 + }, + { + "epoch": 0.28327011540634334, + "grad_norm": 0.8800753355026245, + "learning_rate": 0.00018585644444616396, + "loss": 2.7427, + "step": 3510 + }, + { + "epoch": 0.2833508191429263, + "grad_norm": 0.8666185736656189, + "learning_rate": 0.00018584834934446632, + "loss": 2.6828, + "step": 3511 + }, + { + "epoch": 0.28343152287950935, + "grad_norm": 0.7451635003089905, + "learning_rate": 0.00018584025210320343, + "loss": 2.6784, + "step": 3512 + }, + { + "epoch": 0.2835122266160923, + "grad_norm": 0.8512656688690186, + "learning_rate": 0.00018583215272257708, + "loss": 2.7762, + "step": 3513 + }, + { + "epoch": 0.28359293035267535, + "grad_norm": 0.9298297166824341, + "learning_rate": 0.00018582405120278907, + "loss": 2.7714, + "step": 3514 + }, + { + "epoch": 0.28367363408925833, + "grad_norm": 0.7968065738677979, + "learning_rate": 0.0001858159475440414, + "loss": 2.7286, + "step": 3515 + }, + { + "epoch": 0.28375433782584136, + "grad_norm": 0.7381564378738403, + "learning_rate": 0.00018580784174653596, + "loss": 2.6697, + "step": 3516 + }, + { + "epoch": 0.28383504156242434, + "grad_norm": 0.8199222683906555, + "learning_rate": 0.00018579973381047481, + "loss": 2.7463, + "step": 3517 + }, + { + "epoch": 0.28391574529900737, + "grad_norm": 0.8022071123123169, + "learning_rate": 0.00018579162373606002, + "loss": 2.6898, + "step": 3518 + }, + { + "epoch": 0.28399644903559035, + "grad_norm": 0.7899700999259949, + "learning_rate": 0.0001857835115234937, + "loss": 2.7074, + "step": 3519 + }, + { + "epoch": 0.2840771527721733, + "grad_norm": 0.7237183451652527, + "learning_rate": 0.00018577539717297805, + "loss": 2.6699, + "step": 3520 + }, + { + "epoch": 0.28415785650875636, + "grad_norm": 0.7627314329147339, + "learning_rate": 0.00018576728068471526, + "loss": 2.7745, + "step": 3521 + }, + { + "epoch": 0.28423856024533933, + "grad_norm": 0.7301654815673828, + "learning_rate": 0.00018575916205890766, + "loss": 2.7191, + "step": 3522 + }, + { + "epoch": 0.28431926398192237, + "grad_norm": 0.7441647052764893, + "learning_rate": 0.00018575104129575753, + "loss": 2.7529, + "step": 3523 + }, + { + "epoch": 0.28439996771850534, + "grad_norm": 0.7715914249420166, + "learning_rate": 0.0001857429183954673, + "loss": 2.6893, + "step": 3524 + }, + { + "epoch": 0.2844806714550884, + "grad_norm": 0.7464057207107544, + "learning_rate": 0.00018573479335823944, + "loss": 2.7169, + "step": 3525 + }, + { + "epoch": 0.28456137519167135, + "grad_norm": 0.753198504447937, + "learning_rate": 0.00018572666618427638, + "loss": 2.7144, + "step": 3526 + }, + { + "epoch": 0.2846420789282544, + "grad_norm": 0.7681953310966492, + "learning_rate": 0.00018571853687378073, + "loss": 2.709, + "step": 3527 + }, + { + "epoch": 0.28472278266483736, + "grad_norm": 0.7591876983642578, + "learning_rate": 0.0001857104054269551, + "loss": 2.7519, + "step": 3528 + }, + { + "epoch": 0.2848034864014204, + "grad_norm": 0.7417709827423096, + "learning_rate": 0.00018570227184400205, + "loss": 2.6756, + "step": 3529 + }, + { + "epoch": 0.28488419013800337, + "grad_norm": 0.7641329169273376, + "learning_rate": 0.0001856941361251244, + "loss": 2.6614, + "step": 3530 + }, + { + "epoch": 0.2849648938745864, + "grad_norm": 0.7813490033149719, + "learning_rate": 0.0001856859982705249, + "loss": 2.7145, + "step": 3531 + }, + { + "epoch": 0.2850455976111694, + "grad_norm": 0.7777202129364014, + "learning_rate": 0.00018567785828040628, + "loss": 2.7015, + "step": 3532 + }, + { + "epoch": 0.2851263013477524, + "grad_norm": 0.7647144794464111, + "learning_rate": 0.0001856697161549715, + "loss": 2.7311, + "step": 3533 + }, + { + "epoch": 0.2852070050843354, + "grad_norm": 0.7477256655693054, + "learning_rate": 0.00018566157189442342, + "loss": 2.6832, + "step": 3534 + }, + { + "epoch": 0.2852877088209184, + "grad_norm": 0.7037049531936646, + "learning_rate": 0.00018565342549896506, + "loss": 2.6942, + "step": 3535 + }, + { + "epoch": 0.2853684125575014, + "grad_norm": 0.7309197783470154, + "learning_rate": 0.00018564527696879945, + "loss": 2.6797, + "step": 3536 + }, + { + "epoch": 0.2854491162940844, + "grad_norm": 0.798075795173645, + "learning_rate": 0.00018563712630412967, + "loss": 2.6926, + "step": 3537 + }, + { + "epoch": 0.2855298200306674, + "grad_norm": 0.7831682562828064, + "learning_rate": 0.0001856289735051588, + "loss": 2.7537, + "step": 3538 + }, + { + "epoch": 0.28561052376725043, + "grad_norm": 0.7983096241950989, + "learning_rate": 0.0001856208185720901, + "loss": 2.7037, + "step": 3539 + }, + { + "epoch": 0.2856912275038334, + "grad_norm": 0.7250573635101318, + "learning_rate": 0.00018561266150512678, + "loss": 2.7282, + "step": 3540 + }, + { + "epoch": 0.28577193124041644, + "grad_norm": 0.7800211906433105, + "learning_rate": 0.00018560450230447218, + "loss": 2.6541, + "step": 3541 + }, + { + "epoch": 0.2858526349769994, + "grad_norm": 0.7624209523200989, + "learning_rate": 0.00018559634097032953, + "loss": 2.7041, + "step": 3542 + }, + { + "epoch": 0.28593333871358245, + "grad_norm": 0.7212036848068237, + "learning_rate": 0.0001855881775029024, + "loss": 2.7287, + "step": 3543 + }, + { + "epoch": 0.28601404245016543, + "grad_norm": 0.7774164080619812, + "learning_rate": 0.00018558001190239408, + "loss": 2.6515, + "step": 3544 + }, + { + "epoch": 0.28609474618674846, + "grad_norm": 0.7169588208198547, + "learning_rate": 0.0001855718441690082, + "loss": 2.7111, + "step": 3545 + }, + { + "epoch": 0.28617544992333144, + "grad_norm": 0.7473909258842468, + "learning_rate": 0.00018556367430294827, + "loss": 2.7405, + "step": 3546 + }, + { + "epoch": 0.28625615365991447, + "grad_norm": 0.7213929295539856, + "learning_rate": 0.0001855555023044179, + "loss": 2.7336, + "step": 3547 + }, + { + "epoch": 0.28633685739649745, + "grad_norm": 0.701816201210022, + "learning_rate": 0.00018554732817362078, + "loss": 2.721, + "step": 3548 + }, + { + "epoch": 0.2864175611330805, + "grad_norm": 0.8158134818077087, + "learning_rate": 0.00018553915191076064, + "loss": 2.6979, + "step": 3549 + }, + { + "epoch": 0.28649826486966345, + "grad_norm": 0.7303084135055542, + "learning_rate": 0.00018553097351604118, + "loss": 2.6734, + "step": 3550 + }, + { + "epoch": 0.2865789686062465, + "grad_norm": 0.8140435814857483, + "learning_rate": 0.00018552279298966634, + "loss": 2.6832, + "step": 3551 + }, + { + "epoch": 0.28665967234282946, + "grad_norm": 0.7024678587913513, + "learning_rate": 0.00018551461033183988, + "loss": 2.7118, + "step": 3552 + }, + { + "epoch": 0.2867403760794125, + "grad_norm": 0.7277806401252747, + "learning_rate": 0.00018550642554276582, + "loss": 2.6362, + "step": 3553 + }, + { + "epoch": 0.28682107981599547, + "grad_norm": 0.8376575112342834, + "learning_rate": 0.00018549823862264812, + "loss": 2.744, + "step": 3554 + }, + { + "epoch": 0.2869017835525785, + "grad_norm": 0.712195098400116, + "learning_rate": 0.00018549004957169082, + "loss": 2.6715, + "step": 3555 + }, + { + "epoch": 0.2869824872891615, + "grad_norm": 0.7511523962020874, + "learning_rate": 0.00018548185839009805, + "loss": 2.7655, + "step": 3556 + }, + { + "epoch": 0.2870631910257445, + "grad_norm": 0.7397211790084839, + "learning_rate": 0.00018547366507807388, + "loss": 2.6813, + "step": 3557 + }, + { + "epoch": 0.2871438947623275, + "grad_norm": 0.6926341652870178, + "learning_rate": 0.00018546546963582253, + "loss": 2.6477, + "step": 3558 + }, + { + "epoch": 0.2872245984989105, + "grad_norm": 0.7776244878768921, + "learning_rate": 0.00018545727206354827, + "loss": 2.6979, + "step": 3559 + }, + { + "epoch": 0.2873053022354935, + "grad_norm": 0.7639400959014893, + "learning_rate": 0.00018544907236145542, + "loss": 2.6913, + "step": 3560 + }, + { + "epoch": 0.28738600597207653, + "grad_norm": 0.7738329768180847, + "learning_rate": 0.0001854408705297483, + "loss": 2.7231, + "step": 3561 + }, + { + "epoch": 0.2874667097086595, + "grad_norm": 0.7182422876358032, + "learning_rate": 0.00018543266656863137, + "loss": 2.718, + "step": 3562 + }, + { + "epoch": 0.28754741344524254, + "grad_norm": 0.7257261276245117, + "learning_rate": 0.00018542446047830903, + "loss": 2.7354, + "step": 3563 + }, + { + "epoch": 0.2876281171818255, + "grad_norm": 0.7761391997337341, + "learning_rate": 0.00018541625225898588, + "loss": 2.705, + "step": 3564 + }, + { + "epoch": 0.28770882091840855, + "grad_norm": 0.9272314310073853, + "learning_rate": 0.0001854080419108664, + "loss": 2.7278, + "step": 3565 + }, + { + "epoch": 0.2877895246549915, + "grad_norm": 0.7622589468955994, + "learning_rate": 0.00018539982943415527, + "loss": 2.7224, + "step": 3566 + }, + { + "epoch": 0.28787022839157456, + "grad_norm": 0.725349485874176, + "learning_rate": 0.0001853916148290572, + "loss": 2.6782, + "step": 3567 + }, + { + "epoch": 0.28795093212815753, + "grad_norm": 0.776242733001709, + "learning_rate": 0.0001853833980957768, + "loss": 2.6467, + "step": 3568 + }, + { + "epoch": 0.28803163586474057, + "grad_norm": 0.8461112976074219, + "learning_rate": 0.00018537517923451896, + "loss": 2.6763, + "step": 3569 + }, + { + "epoch": 0.28811233960132354, + "grad_norm": 0.8161221742630005, + "learning_rate": 0.00018536695824548848, + "loss": 2.7057, + "step": 3570 + }, + { + "epoch": 0.2881930433379065, + "grad_norm": 0.7404211759567261, + "learning_rate": 0.00018535873512889024, + "loss": 2.7083, + "step": 3571 + }, + { + "epoch": 0.28827374707448955, + "grad_norm": 0.831042468547821, + "learning_rate": 0.00018535050988492918, + "loss": 2.6121, + "step": 3572 + }, + { + "epoch": 0.2883544508110725, + "grad_norm": 0.7286352515220642, + "learning_rate": 0.00018534228251381035, + "loss": 2.7165, + "step": 3573 + }, + { + "epoch": 0.28843515454765556, + "grad_norm": 0.7951883673667908, + "learning_rate": 0.00018533405301573872, + "loss": 2.6794, + "step": 3574 + }, + { + "epoch": 0.28851585828423854, + "grad_norm": 0.7431079149246216, + "learning_rate": 0.00018532582139091944, + "loss": 2.6758, + "step": 3575 + }, + { + "epoch": 0.28859656202082157, + "grad_norm": 0.7408809065818787, + "learning_rate": 0.0001853175876395576, + "loss": 2.6901, + "step": 3576 + }, + { + "epoch": 0.28867726575740454, + "grad_norm": 0.7428708672523499, + "learning_rate": 0.00018530935176185848, + "loss": 2.6679, + "step": 3577 + }, + { + "epoch": 0.2887579694939876, + "grad_norm": 0.7670302987098694, + "learning_rate": 0.00018530111375802735, + "loss": 2.7306, + "step": 3578 + }, + { + "epoch": 0.28883867323057055, + "grad_norm": 0.7582474946975708, + "learning_rate": 0.00018529287362826943, + "loss": 2.7715, + "step": 3579 + }, + { + "epoch": 0.2889193769671536, + "grad_norm": 0.750973105430603, + "learning_rate": 0.0001852846313727902, + "loss": 2.7147, + "step": 3580 + }, + { + "epoch": 0.28900008070373656, + "grad_norm": 0.771854043006897, + "learning_rate": 0.00018527638699179498, + "loss": 2.6874, + "step": 3581 + }, + { + "epoch": 0.2890807844403196, + "grad_norm": 0.785469651222229, + "learning_rate": 0.00018526814048548928, + "loss": 2.6858, + "step": 3582 + }, + { + "epoch": 0.28916148817690257, + "grad_norm": 0.7601101398468018, + "learning_rate": 0.00018525989185407864, + "loss": 2.6927, + "step": 3583 + }, + { + "epoch": 0.2892421919134856, + "grad_norm": 0.7313411831855774, + "learning_rate": 0.00018525164109776861, + "loss": 2.6813, + "step": 3584 + }, + { + "epoch": 0.2893228956500686, + "grad_norm": 0.7471718192100525, + "learning_rate": 0.00018524338821676483, + "loss": 2.6791, + "step": 3585 + }, + { + "epoch": 0.2894035993866516, + "grad_norm": 0.7615204453468323, + "learning_rate": 0.00018523513321127302, + "loss": 2.7767, + "step": 3586 + }, + { + "epoch": 0.2894843031232346, + "grad_norm": 0.766793966293335, + "learning_rate": 0.00018522687608149886, + "loss": 2.664, + "step": 3587 + }, + { + "epoch": 0.2895650068598176, + "grad_norm": 0.7897932529449463, + "learning_rate": 0.00018521861682764816, + "loss": 2.7148, + "step": 3588 + }, + { + "epoch": 0.2896457105964006, + "grad_norm": 0.7366818785667419, + "learning_rate": 0.00018521035544992679, + "loss": 2.69, + "step": 3589 + }, + { + "epoch": 0.28972641433298363, + "grad_norm": 0.7503829598426819, + "learning_rate": 0.00018520209194854058, + "loss": 2.7141, + "step": 3590 + }, + { + "epoch": 0.2898071180695666, + "grad_norm": 0.8064351081848145, + "learning_rate": 0.00018519382632369556, + "loss": 2.6738, + "step": 3591 + }, + { + "epoch": 0.28988782180614964, + "grad_norm": 0.7364048361778259, + "learning_rate": 0.00018518555857559768, + "loss": 2.6731, + "step": 3592 + }, + { + "epoch": 0.2899685255427326, + "grad_norm": 0.7065430283546448, + "learning_rate": 0.00018517728870445297, + "loss": 2.7314, + "step": 3593 + }, + { + "epoch": 0.29004922927931565, + "grad_norm": 0.8233428001403809, + "learning_rate": 0.0001851690167104676, + "loss": 2.727, + "step": 3594 + }, + { + "epoch": 0.2901299330158986, + "grad_norm": 0.7563758492469788, + "learning_rate": 0.00018516074259384768, + "loss": 2.665, + "step": 3595 + }, + { + "epoch": 0.29021063675248165, + "grad_norm": 0.7451249361038208, + "learning_rate": 0.00018515246635479943, + "loss": 2.7686, + "step": 3596 + }, + { + "epoch": 0.29029134048906463, + "grad_norm": 0.7374305725097656, + "learning_rate": 0.00018514418799352918, + "loss": 2.6466, + "step": 3597 + }, + { + "epoch": 0.29037204422564766, + "grad_norm": 0.7596983909606934, + "learning_rate": 0.00018513590751024315, + "loss": 2.6763, + "step": 3598 + }, + { + "epoch": 0.29045274796223064, + "grad_norm": 0.7808190584182739, + "learning_rate": 0.0001851276249051478, + "loss": 2.7362, + "step": 3599 + }, + { + "epoch": 0.29053345169881367, + "grad_norm": 0.765785276889801, + "learning_rate": 0.00018511934017844948, + "loss": 2.7049, + "step": 3600 + }, + { + "epoch": 0.29061415543539665, + "grad_norm": 0.7503563165664673, + "learning_rate": 0.0001851110533303547, + "loss": 2.6262, + "step": 3601 + }, + { + "epoch": 0.2906948591719797, + "grad_norm": 0.7287782430648804, + "learning_rate": 0.00018510276436107, + "loss": 2.7076, + "step": 3602 + }, + { + "epoch": 0.29077556290856266, + "grad_norm": 0.7748721837997437, + "learning_rate": 0.00018509447327080193, + "loss": 2.6945, + "step": 3603 + }, + { + "epoch": 0.2908562666451457, + "grad_norm": 0.7482423186302185, + "learning_rate": 0.00018508618005975714, + "loss": 2.7326, + "step": 3604 + }, + { + "epoch": 0.29093697038172867, + "grad_norm": 0.7708765864372253, + "learning_rate": 0.00018507788472814238, + "loss": 2.7602, + "step": 3605 + }, + { + "epoch": 0.2910176741183117, + "grad_norm": 0.7308060526847839, + "learning_rate": 0.0001850695872761643, + "loss": 2.6735, + "step": 3606 + }, + { + "epoch": 0.2910983778548947, + "grad_norm": 0.7512951493263245, + "learning_rate": 0.00018506128770402972, + "loss": 2.6877, + "step": 3607 + }, + { + "epoch": 0.2911790815914777, + "grad_norm": 0.6806616187095642, + "learning_rate": 0.00018505298601194552, + "loss": 2.6689, + "step": 3608 + }, + { + "epoch": 0.2912597853280607, + "grad_norm": 0.7825661301612854, + "learning_rate": 0.00018504468220011857, + "loss": 2.7108, + "step": 3609 + }, + { + "epoch": 0.2913404890646437, + "grad_norm": 0.8243381977081299, + "learning_rate": 0.00018503637626875584, + "loss": 2.6789, + "step": 3610 + }, + { + "epoch": 0.2914211928012267, + "grad_norm": 0.745012640953064, + "learning_rate": 0.00018502806821806429, + "loss": 2.7658, + "step": 3611 + }, + { + "epoch": 0.2915018965378097, + "grad_norm": 0.7091341018676758, + "learning_rate": 0.00018501975804825104, + "loss": 2.7046, + "step": 3612 + }, + { + "epoch": 0.2915826002743927, + "grad_norm": 0.729026734828949, + "learning_rate": 0.0001850114457595232, + "loss": 2.6692, + "step": 3613 + }, + { + "epoch": 0.29166330401097573, + "grad_norm": 0.8098071813583374, + "learning_rate": 0.00018500313135208786, + "loss": 2.712, + "step": 3614 + }, + { + "epoch": 0.2917440077475587, + "grad_norm": 0.7387483716011047, + "learning_rate": 0.0001849948148261523, + "loss": 2.6705, + "step": 3615 + }, + { + "epoch": 0.29182471148414174, + "grad_norm": 0.7904576659202576, + "learning_rate": 0.0001849864961819238, + "loss": 2.5969, + "step": 3616 + }, + { + "epoch": 0.2919054152207247, + "grad_norm": 0.7560681700706482, + "learning_rate": 0.00018497817541960964, + "loss": 2.6971, + "step": 3617 + }, + { + "epoch": 0.29198611895730775, + "grad_norm": 0.8488430976867676, + "learning_rate": 0.00018496985253941723, + "loss": 2.7367, + "step": 3618 + }, + { + "epoch": 0.2920668226938907, + "grad_norm": 0.7641268372535706, + "learning_rate": 0.00018496152754155399, + "loss": 2.6948, + "step": 3619 + }, + { + "epoch": 0.29214752643047376, + "grad_norm": 0.7219721674919128, + "learning_rate": 0.00018495320042622736, + "loss": 2.7225, + "step": 3620 + }, + { + "epoch": 0.29222823016705674, + "grad_norm": 0.7583872675895691, + "learning_rate": 0.00018494487119364493, + "loss": 2.7335, + "step": 3621 + }, + { + "epoch": 0.2923089339036397, + "grad_norm": 0.7771418690681458, + "learning_rate": 0.00018493653984401424, + "loss": 2.6712, + "step": 3622 + }, + { + "epoch": 0.29238963764022274, + "grad_norm": 0.7537891268730164, + "learning_rate": 0.00018492820637754296, + "loss": 2.7282, + "step": 3623 + }, + { + "epoch": 0.2924703413768057, + "grad_norm": 0.7334226965904236, + "learning_rate": 0.00018491987079443875, + "loss": 2.7072, + "step": 3624 + }, + { + "epoch": 0.29255104511338875, + "grad_norm": 0.7768076658248901, + "learning_rate": 0.00018491153309490942, + "loss": 2.7176, + "step": 3625 + }, + { + "epoch": 0.29263174884997173, + "grad_norm": 0.6831281185150146, + "learning_rate": 0.0001849031932791627, + "loss": 2.6982, + "step": 3626 + }, + { + "epoch": 0.29271245258655476, + "grad_norm": 0.7150557637214661, + "learning_rate": 0.00018489485134740648, + "loss": 2.7325, + "step": 3627 + }, + { + "epoch": 0.29279315632313774, + "grad_norm": 0.782667338848114, + "learning_rate": 0.00018488650729984863, + "loss": 2.7146, + "step": 3628 + }, + { + "epoch": 0.29287386005972077, + "grad_norm": 0.7718524932861328, + "learning_rate": 0.0001848781611366971, + "loss": 2.746, + "step": 3629 + }, + { + "epoch": 0.29295456379630375, + "grad_norm": 0.7066439390182495, + "learning_rate": 0.00018486981285815998, + "loss": 2.7497, + "step": 3630 + }, + { + "epoch": 0.2930352675328868, + "grad_norm": 0.7705665826797485, + "learning_rate": 0.00018486146246444522, + "loss": 2.6448, + "step": 3631 + }, + { + "epoch": 0.29311597126946976, + "grad_norm": 0.7334863543510437, + "learning_rate": 0.000184853109955761, + "loss": 2.6931, + "step": 3632 + }, + { + "epoch": 0.2931966750060528, + "grad_norm": 0.7903133630752563, + "learning_rate": 0.0001848447553323155, + "loss": 2.6954, + "step": 3633 + }, + { + "epoch": 0.29327737874263576, + "grad_norm": 0.6821191310882568, + "learning_rate": 0.00018483639859431689, + "loss": 2.6165, + "step": 3634 + }, + { + "epoch": 0.2933580824792188, + "grad_norm": 0.7187811136245728, + "learning_rate": 0.00018482803974197344, + "loss": 2.6387, + "step": 3635 + }, + { + "epoch": 0.2934387862158018, + "grad_norm": 0.7429843544960022, + "learning_rate": 0.00018481967877549354, + "loss": 2.6848, + "step": 3636 + }, + { + "epoch": 0.2935194899523848, + "grad_norm": 0.7431524395942688, + "learning_rate": 0.0001848113156950855, + "loss": 2.7044, + "step": 3637 + }, + { + "epoch": 0.2936001936889678, + "grad_norm": 0.7008687853813171, + "learning_rate": 0.00018480295050095778, + "loss": 2.6922, + "step": 3638 + }, + { + "epoch": 0.2936808974255508, + "grad_norm": 0.7106652855873108, + "learning_rate": 0.00018479458319331884, + "loss": 2.6845, + "step": 3639 + }, + { + "epoch": 0.2937616011621338, + "grad_norm": 0.7288951873779297, + "learning_rate": 0.00018478621377237723, + "loss": 2.7017, + "step": 3640 + }, + { + "epoch": 0.2938423048987168, + "grad_norm": 0.7228607535362244, + "learning_rate": 0.00018477784223834155, + "loss": 2.7449, + "step": 3641 + }, + { + "epoch": 0.2939230086352998, + "grad_norm": 0.7180825471878052, + "learning_rate": 0.00018476946859142043, + "loss": 2.7291, + "step": 3642 + }, + { + "epoch": 0.29400371237188283, + "grad_norm": 0.7854947447776794, + "learning_rate": 0.00018476109283182258, + "loss": 2.7619, + "step": 3643 + }, + { + "epoch": 0.2940844161084658, + "grad_norm": 0.7871318459510803, + "learning_rate": 0.00018475271495975673, + "loss": 2.6695, + "step": 3644 + }, + { + "epoch": 0.29416511984504884, + "grad_norm": 0.7813127636909485, + "learning_rate": 0.00018474433497543165, + "loss": 2.735, + "step": 3645 + }, + { + "epoch": 0.2942458235816318, + "grad_norm": 0.7835291028022766, + "learning_rate": 0.00018473595287905623, + "loss": 2.7336, + "step": 3646 + }, + { + "epoch": 0.29432652731821485, + "grad_norm": 0.6970148682594299, + "learning_rate": 0.00018472756867083935, + "loss": 2.6912, + "step": 3647 + }, + { + "epoch": 0.2944072310547978, + "grad_norm": 0.7968462109565735, + "learning_rate": 0.00018471918235098998, + "loss": 2.6889, + "step": 3648 + }, + { + "epoch": 0.29448793479138086, + "grad_norm": 0.7011313438415527, + "learning_rate": 0.00018471079391971714, + "loss": 2.6989, + "step": 3649 + }, + { + "epoch": 0.29456863852796383, + "grad_norm": 0.8047335743904114, + "learning_rate": 0.00018470240337722991, + "loss": 2.6827, + "step": 3650 + }, + { + "epoch": 0.29464934226454687, + "grad_norm": 0.7446332573890686, + "learning_rate": 0.00018469401072373733, + "loss": 2.7089, + "step": 3651 + }, + { + "epoch": 0.29473004600112984, + "grad_norm": 0.7610359191894531, + "learning_rate": 0.00018468561595944862, + "loss": 2.6766, + "step": 3652 + }, + { + "epoch": 0.2948107497377129, + "grad_norm": 0.7705755233764648, + "learning_rate": 0.000184677219084573, + "loss": 2.7445, + "step": 3653 + }, + { + "epoch": 0.29489145347429585, + "grad_norm": 0.7466446757316589, + "learning_rate": 0.00018466882009931973, + "loss": 2.726, + "step": 3654 + }, + { + "epoch": 0.2949721572108789, + "grad_norm": 0.7912059426307678, + "learning_rate": 0.00018466041900389813, + "loss": 2.6865, + "step": 3655 + }, + { + "epoch": 0.29505286094746186, + "grad_norm": 0.722588837146759, + "learning_rate": 0.00018465201579851757, + "loss": 2.7039, + "step": 3656 + }, + { + "epoch": 0.2951335646840449, + "grad_norm": 0.739311933517456, + "learning_rate": 0.00018464361048338752, + "loss": 2.6991, + "step": 3657 + }, + { + "epoch": 0.29521426842062787, + "grad_norm": 0.7784128785133362, + "learning_rate": 0.00018463520305871743, + "loss": 2.753, + "step": 3658 + }, + { + "epoch": 0.2952949721572109, + "grad_norm": 0.8261777758598328, + "learning_rate": 0.00018462679352471682, + "loss": 2.7257, + "step": 3659 + }, + { + "epoch": 0.2953756758937939, + "grad_norm": 0.7510927319526672, + "learning_rate": 0.0001846183818815953, + "loss": 2.6981, + "step": 3660 + }, + { + "epoch": 0.2954563796303769, + "grad_norm": 0.7403035163879395, + "learning_rate": 0.00018460996812956254, + "loss": 2.744, + "step": 3661 + }, + { + "epoch": 0.2955370833669599, + "grad_norm": 0.7927733063697815, + "learning_rate": 0.00018460155226882817, + "loss": 2.6304, + "step": 3662 + }, + { + "epoch": 0.2956177871035429, + "grad_norm": 0.7923495769500732, + "learning_rate": 0.000184593134299602, + "loss": 2.7882, + "step": 3663 + }, + { + "epoch": 0.2956984908401259, + "grad_norm": 0.7639210224151611, + "learning_rate": 0.00018458471422209377, + "loss": 2.7171, + "step": 3664 + }, + { + "epoch": 0.2957791945767089, + "grad_norm": 0.736652672290802, + "learning_rate": 0.00018457629203651337, + "loss": 2.7479, + "step": 3665 + }, + { + "epoch": 0.2958598983132919, + "grad_norm": 0.7718610763549805, + "learning_rate": 0.00018456786774307066, + "loss": 2.7135, + "step": 3666 + }, + { + "epoch": 0.29594060204987493, + "grad_norm": 0.7711780071258545, + "learning_rate": 0.00018455944134197565, + "loss": 2.6867, + "step": 3667 + }, + { + "epoch": 0.2960213057864579, + "grad_norm": 0.7202491760253906, + "learning_rate": 0.0001845510128334383, + "loss": 2.6657, + "step": 3668 + }, + { + "epoch": 0.29610200952304094, + "grad_norm": 0.8155657649040222, + "learning_rate": 0.00018454258221766869, + "loss": 2.7342, + "step": 3669 + }, + { + "epoch": 0.2961827132596239, + "grad_norm": 0.7972069382667542, + "learning_rate": 0.00018453414949487696, + "loss": 2.7351, + "step": 3670 + }, + { + "epoch": 0.29626341699620695, + "grad_norm": 0.8645625710487366, + "learning_rate": 0.00018452571466527325, + "loss": 2.6778, + "step": 3671 + }, + { + "epoch": 0.29634412073278993, + "grad_norm": 0.7410334944725037, + "learning_rate": 0.00018451727772906775, + "loss": 2.7228, + "step": 3672 + }, + { + "epoch": 0.2964248244693729, + "grad_norm": 0.7845733165740967, + "learning_rate": 0.0001845088386864708, + "loss": 2.7068, + "step": 3673 + }, + { + "epoch": 0.29650552820595594, + "grad_norm": 0.7709881067276001, + "learning_rate": 0.00018450039753769266, + "loss": 2.676, + "step": 3674 + }, + { + "epoch": 0.2965862319425389, + "grad_norm": 0.7214749455451965, + "learning_rate": 0.00018449195428294371, + "loss": 2.6488, + "step": 3675 + }, + { + "epoch": 0.29666693567912195, + "grad_norm": 0.7467561960220337, + "learning_rate": 0.00018448350892243443, + "loss": 2.7262, + "step": 3676 + }, + { + "epoch": 0.2967476394157049, + "grad_norm": 0.8412678241729736, + "learning_rate": 0.00018447506145637522, + "loss": 2.7898, + "step": 3677 + }, + { + "epoch": 0.29682834315228795, + "grad_norm": 0.7130109071731567, + "learning_rate": 0.00018446661188497668, + "loss": 2.7344, + "step": 3678 + }, + { + "epoch": 0.29690904688887093, + "grad_norm": 0.7807374000549316, + "learning_rate": 0.00018445816020844937, + "loss": 2.7198, + "step": 3679 + }, + { + "epoch": 0.29698975062545396, + "grad_norm": 0.8497760891914368, + "learning_rate": 0.00018444970642700394, + "loss": 2.7479, + "step": 3680 + }, + { + "epoch": 0.29707045436203694, + "grad_norm": 0.6827178001403809, + "learning_rate": 0.0001844412505408511, + "loss": 2.727, + "step": 3681 + }, + { + "epoch": 0.29715115809861997, + "grad_norm": 0.8063304424285889, + "learning_rate": 0.00018443279255020152, + "loss": 2.7896, + "step": 3682 + }, + { + "epoch": 0.29723186183520295, + "grad_norm": 0.7759353518486023, + "learning_rate": 0.00018442433245526604, + "loss": 2.7014, + "step": 3683 + }, + { + "epoch": 0.297312565571786, + "grad_norm": 0.7380958199501038, + "learning_rate": 0.00018441587025625554, + "loss": 2.6665, + "step": 3684 + }, + { + "epoch": 0.29739326930836896, + "grad_norm": 0.7623556852340698, + "learning_rate": 0.00018440740595338087, + "loss": 2.6955, + "step": 3685 + }, + { + "epoch": 0.297473973044952, + "grad_norm": 0.8204537630081177, + "learning_rate": 0.000184398939546853, + "loss": 2.6854, + "step": 3686 + }, + { + "epoch": 0.29755467678153497, + "grad_norm": 0.7346726655960083, + "learning_rate": 0.00018439047103688293, + "loss": 2.6664, + "step": 3687 + }, + { + "epoch": 0.297635380518118, + "grad_norm": 0.777860701084137, + "learning_rate": 0.00018438200042368173, + "loss": 2.6423, + "step": 3688 + }, + { + "epoch": 0.297716084254701, + "grad_norm": 0.7331553101539612, + "learning_rate": 0.00018437352770746054, + "loss": 2.6137, + "step": 3689 + }, + { + "epoch": 0.297796787991284, + "grad_norm": 0.7634466290473938, + "learning_rate": 0.00018436505288843043, + "loss": 2.7266, + "step": 3690 + }, + { + "epoch": 0.297877491727867, + "grad_norm": 0.8151016235351562, + "learning_rate": 0.00018435657596680268, + "loss": 2.7373, + "step": 3691 + }, + { + "epoch": 0.29795819546445, + "grad_norm": 0.7806773781776428, + "learning_rate": 0.00018434809694278857, + "loss": 2.7011, + "step": 3692 + }, + { + "epoch": 0.298038899201033, + "grad_norm": 0.7575243711471558, + "learning_rate": 0.00018433961581659935, + "loss": 2.6601, + "step": 3693 + }, + { + "epoch": 0.298119602937616, + "grad_norm": 0.7527276873588562, + "learning_rate": 0.00018433113258844647, + "loss": 2.6864, + "step": 3694 + }, + { + "epoch": 0.298200306674199, + "grad_norm": 0.8024318218231201, + "learning_rate": 0.0001843226472585413, + "loss": 2.728, + "step": 3695 + }, + { + "epoch": 0.29828101041078203, + "grad_norm": 0.7549982666969299, + "learning_rate": 0.0001843141598270954, + "loss": 2.6834, + "step": 3696 + }, + { + "epoch": 0.298361714147365, + "grad_norm": 0.7699971199035645, + "learning_rate": 0.0001843056702943202, + "loss": 2.7209, + "step": 3697 + }, + { + "epoch": 0.29844241788394804, + "grad_norm": 0.823842465877533, + "learning_rate": 0.0001842971786604273, + "loss": 2.6924, + "step": 3698 + }, + { + "epoch": 0.298523121620531, + "grad_norm": 0.7645791172981262, + "learning_rate": 0.00018428868492562837, + "loss": 2.6821, + "step": 3699 + }, + { + "epoch": 0.29860382535711405, + "grad_norm": 0.7530989050865173, + "learning_rate": 0.00018428018909013506, + "loss": 2.7592, + "step": 3700 + }, + { + "epoch": 0.298684529093697, + "grad_norm": 0.7958168387413025, + "learning_rate": 0.00018427169115415914, + "loss": 2.6925, + "step": 3701 + }, + { + "epoch": 0.29876523283028006, + "grad_norm": 0.7777522802352905, + "learning_rate": 0.00018426319111791242, + "loss": 2.6757, + "step": 3702 + }, + { + "epoch": 0.29884593656686304, + "grad_norm": 0.7418079972267151, + "learning_rate": 0.00018425468898160667, + "loss": 2.6445, + "step": 3703 + }, + { + "epoch": 0.29892664030344607, + "grad_norm": 0.7591132521629333, + "learning_rate": 0.00018424618474545382, + "loss": 2.7157, + "step": 3704 + }, + { + "epoch": 0.29900734404002904, + "grad_norm": 0.7591627836227417, + "learning_rate": 0.00018423767840966586, + "loss": 2.6691, + "step": 3705 + }, + { + "epoch": 0.2990880477766121, + "grad_norm": 0.7934779524803162, + "learning_rate": 0.00018422916997445476, + "loss": 2.7262, + "step": 3706 + }, + { + "epoch": 0.29916875151319505, + "grad_norm": 0.7964254021644592, + "learning_rate": 0.00018422065944003252, + "loss": 2.6196, + "step": 3707 + }, + { + "epoch": 0.2992494552497781, + "grad_norm": 0.7448374032974243, + "learning_rate": 0.0001842121468066113, + "loss": 2.6732, + "step": 3708 + }, + { + "epoch": 0.29933015898636106, + "grad_norm": 0.7813000679016113, + "learning_rate": 0.00018420363207440329, + "loss": 2.6978, + "step": 3709 + }, + { + "epoch": 0.2994108627229441, + "grad_norm": 0.7760851979255676, + "learning_rate": 0.00018419511524362064, + "loss": 2.7466, + "step": 3710 + }, + { + "epoch": 0.29949156645952707, + "grad_norm": 0.7786797881126404, + "learning_rate": 0.00018418659631447564, + "loss": 2.7044, + "step": 3711 + }, + { + "epoch": 0.2995722701961101, + "grad_norm": 0.7860158085823059, + "learning_rate": 0.00018417807528718055, + "loss": 2.6587, + "step": 3712 + }, + { + "epoch": 0.2996529739326931, + "grad_norm": 0.8327339291572571, + "learning_rate": 0.0001841695521619478, + "loss": 2.7112, + "step": 3713 + }, + { + "epoch": 0.2997336776692761, + "grad_norm": 0.7535735368728638, + "learning_rate": 0.00018416102693898982, + "loss": 2.726, + "step": 3714 + }, + { + "epoch": 0.2998143814058591, + "grad_norm": 0.7781090140342712, + "learning_rate": 0.000184152499618519, + "loss": 2.7238, + "step": 3715 + }, + { + "epoch": 0.2998950851424421, + "grad_norm": 0.7700545191764832, + "learning_rate": 0.00018414397020074795, + "loss": 2.7081, + "step": 3716 + }, + { + "epoch": 0.2999757888790251, + "grad_norm": 0.7578303217887878, + "learning_rate": 0.0001841354386858892, + "loss": 2.6591, + "step": 3717 + }, + { + "epoch": 0.30005649261560813, + "grad_norm": 0.7506501078605652, + "learning_rate": 0.00018412690507415538, + "loss": 2.6551, + "step": 3718 + }, + { + "epoch": 0.3001371963521911, + "grad_norm": 0.7869547009468079, + "learning_rate": 0.00018411836936575918, + "loss": 2.7169, + "step": 3719 + }, + { + "epoch": 0.30021790008877414, + "grad_norm": 0.7547428607940674, + "learning_rate": 0.00018410983156091332, + "loss": 2.7498, + "step": 3720 + }, + { + "epoch": 0.3002986038253571, + "grad_norm": 0.7829383015632629, + "learning_rate": 0.0001841012916598306, + "loss": 2.6885, + "step": 3721 + }, + { + "epoch": 0.30037930756194015, + "grad_norm": 0.8469082117080688, + "learning_rate": 0.00018409274966272386, + "loss": 2.7594, + "step": 3722 + }, + { + "epoch": 0.3004600112985231, + "grad_norm": 0.7690171599388123, + "learning_rate": 0.00018408420556980596, + "loss": 2.7892, + "step": 3723 + }, + { + "epoch": 0.3005407150351061, + "grad_norm": 0.7295899987220764, + "learning_rate": 0.00018407565938128987, + "loss": 2.7023, + "step": 3724 + }, + { + "epoch": 0.30062141877168913, + "grad_norm": 0.7249528169631958, + "learning_rate": 0.00018406711109738856, + "loss": 2.7135, + "step": 3725 + }, + { + "epoch": 0.3007021225082721, + "grad_norm": 0.7237234711647034, + "learning_rate": 0.0001840585607183151, + "loss": 2.6117, + "step": 3726 + }, + { + "epoch": 0.30078282624485514, + "grad_norm": 0.7426557540893555, + "learning_rate": 0.00018405000824428256, + "loss": 2.7202, + "step": 3727 + }, + { + "epoch": 0.3008635299814381, + "grad_norm": 0.7572938799858093, + "learning_rate": 0.00018404145367550414, + "loss": 2.7373, + "step": 3728 + }, + { + "epoch": 0.30094423371802115, + "grad_norm": 0.7198675274848938, + "learning_rate": 0.00018403289701219295, + "loss": 2.6675, + "step": 3729 + }, + { + "epoch": 0.3010249374546041, + "grad_norm": 0.722532331943512, + "learning_rate": 0.00018402433825456235, + "loss": 2.6933, + "step": 3730 + }, + { + "epoch": 0.30110564119118716, + "grad_norm": 0.7621530890464783, + "learning_rate": 0.0001840157774028256, + "loss": 2.6951, + "step": 3731 + }, + { + "epoch": 0.30118634492777013, + "grad_norm": 0.7435615062713623, + "learning_rate": 0.00018400721445719604, + "loss": 2.7323, + "step": 3732 + }, + { + "epoch": 0.30126704866435317, + "grad_norm": 0.7233619689941406, + "learning_rate": 0.00018399864941788708, + "loss": 2.6789, + "step": 3733 + }, + { + "epoch": 0.30134775240093614, + "grad_norm": 0.7421496510505676, + "learning_rate": 0.00018399008228511224, + "loss": 2.72, + "step": 3734 + }, + { + "epoch": 0.3014284561375192, + "grad_norm": 0.7250909805297852, + "learning_rate": 0.000183981513059085, + "loss": 2.6717, + "step": 3735 + }, + { + "epoch": 0.30150915987410215, + "grad_norm": 0.7642899751663208, + "learning_rate": 0.0001839729417400189, + "loss": 2.6823, + "step": 3736 + }, + { + "epoch": 0.3015898636106852, + "grad_norm": 0.7434508204460144, + "learning_rate": 0.00018396436832812758, + "loss": 2.6441, + "step": 3737 + }, + { + "epoch": 0.30167056734726816, + "grad_norm": 0.7163311839103699, + "learning_rate": 0.00018395579282362473, + "loss": 2.6736, + "step": 3738 + }, + { + "epoch": 0.3017512710838512, + "grad_norm": 0.6936792731285095, + "learning_rate": 0.00018394721522672404, + "loss": 2.6792, + "step": 3739 + }, + { + "epoch": 0.30183197482043417, + "grad_norm": 0.7791975736618042, + "learning_rate": 0.0001839386355376393, + "loss": 2.653, + "step": 3740 + }, + { + "epoch": 0.3019126785570172, + "grad_norm": 0.7902694940567017, + "learning_rate": 0.00018393005375658437, + "loss": 2.7448, + "step": 3741 + }, + { + "epoch": 0.3019933822936002, + "grad_norm": 0.7405624389648438, + "learning_rate": 0.0001839214698837731, + "loss": 2.6977, + "step": 3742 + }, + { + "epoch": 0.3020740860301832, + "grad_norm": 0.8033632040023804, + "learning_rate": 0.00018391288391941943, + "loss": 2.7468, + "step": 3743 + }, + { + "epoch": 0.3021547897667662, + "grad_norm": 0.8148884177207947, + "learning_rate": 0.00018390429586373735, + "loss": 2.6992, + "step": 3744 + }, + { + "epoch": 0.3022354935033492, + "grad_norm": 0.7633625268936157, + "learning_rate": 0.00018389570571694089, + "loss": 2.6604, + "step": 3745 + }, + { + "epoch": 0.3023161972399322, + "grad_norm": 0.8687180876731873, + "learning_rate": 0.00018388711347924413, + "loss": 2.6808, + "step": 3746 + }, + { + "epoch": 0.3023969009765152, + "grad_norm": 0.6974104046821594, + "learning_rate": 0.0001838785191508612, + "loss": 2.7613, + "step": 3747 + }, + { + "epoch": 0.3024776047130982, + "grad_norm": 0.7919288873672485, + "learning_rate": 0.00018386992273200633, + "loss": 2.664, + "step": 3748 + }, + { + "epoch": 0.30255830844968123, + "grad_norm": 0.7708829045295715, + "learning_rate": 0.00018386132422289374, + "loss": 2.7703, + "step": 3749 + }, + { + "epoch": 0.3026390121862642, + "grad_norm": 0.7099813222885132, + "learning_rate": 0.00018385272362373775, + "loss": 2.6485, + "step": 3750 + }, + { + "epoch": 0.30271971592284724, + "grad_norm": 0.7629622220993042, + "learning_rate": 0.0001838441209347527, + "loss": 2.7339, + "step": 3751 + }, + { + "epoch": 0.3028004196594302, + "grad_norm": 0.727275550365448, + "learning_rate": 0.00018383551615615295, + "loss": 2.7194, + "step": 3752 + }, + { + "epoch": 0.30288112339601325, + "grad_norm": 0.7158832550048828, + "learning_rate": 0.00018382690928815302, + "loss": 2.6698, + "step": 3753 + }, + { + "epoch": 0.30296182713259623, + "grad_norm": 0.8075565099716187, + "learning_rate": 0.00018381830033096735, + "loss": 2.7198, + "step": 3754 + }, + { + "epoch": 0.30304253086917926, + "grad_norm": 0.7949094176292419, + "learning_rate": 0.00018380968928481057, + "loss": 2.7048, + "step": 3755 + }, + { + "epoch": 0.30312323460576224, + "grad_norm": 0.7009503841400146, + "learning_rate": 0.00018380107614989724, + "loss": 2.709, + "step": 3756 + }, + { + "epoch": 0.30320393834234527, + "grad_norm": 0.668574869632721, + "learning_rate": 0.00018379246092644204, + "loss": 2.6515, + "step": 3757 + }, + { + "epoch": 0.30328464207892825, + "grad_norm": 0.7470806241035461, + "learning_rate": 0.00018378384361465968, + "loss": 2.7577, + "step": 3758 + }, + { + "epoch": 0.3033653458155113, + "grad_norm": 0.7529913783073425, + "learning_rate": 0.0001837752242147649, + "loss": 2.7189, + "step": 3759 + }, + { + "epoch": 0.30344604955209425, + "grad_norm": 0.7373302578926086, + "learning_rate": 0.00018376660272697258, + "loss": 2.7197, + "step": 3760 + }, + { + "epoch": 0.3035267532886773, + "grad_norm": 0.7650466561317444, + "learning_rate": 0.0001837579791514975, + "loss": 2.6613, + "step": 3761 + }, + { + "epoch": 0.30360745702526026, + "grad_norm": 0.775209903717041, + "learning_rate": 0.00018374935348855468, + "loss": 2.6454, + "step": 3762 + }, + { + "epoch": 0.3036881607618433, + "grad_norm": 0.7049290537834167, + "learning_rate": 0.00018374072573835903, + "loss": 2.6663, + "step": 3763 + }, + { + "epoch": 0.30376886449842627, + "grad_norm": 0.7060630917549133, + "learning_rate": 0.0001837320959011256, + "loss": 2.6908, + "step": 3764 + }, + { + "epoch": 0.3038495682350093, + "grad_norm": 0.7561464905738831, + "learning_rate": 0.00018372346397706944, + "loss": 2.673, + "step": 3765 + }, + { + "epoch": 0.3039302719715923, + "grad_norm": 0.7293568849563599, + "learning_rate": 0.0001837148299664057, + "loss": 2.6431, + "step": 3766 + }, + { + "epoch": 0.3040109757081753, + "grad_norm": 0.8460379838943481, + "learning_rate": 0.00018370619386934962, + "loss": 2.7493, + "step": 3767 + }, + { + "epoch": 0.3040916794447583, + "grad_norm": 0.8136082291603088, + "learning_rate": 0.00018369755568611632, + "loss": 2.7298, + "step": 3768 + }, + { + "epoch": 0.3041723831813413, + "grad_norm": 0.6916636824607849, + "learning_rate": 0.00018368891541692116, + "loss": 2.7173, + "step": 3769 + }, + { + "epoch": 0.3042530869179243, + "grad_norm": 0.7547643780708313, + "learning_rate": 0.0001836802730619795, + "loss": 2.6343, + "step": 3770 + }, + { + "epoch": 0.30433379065450733, + "grad_norm": 0.7439205050468445, + "learning_rate": 0.00018367162862150665, + "loss": 2.6627, + "step": 3771 + }, + { + "epoch": 0.3044144943910903, + "grad_norm": 0.7781087756156921, + "learning_rate": 0.0001836629820957181, + "loss": 2.7223, + "step": 3772 + }, + { + "epoch": 0.30449519812767334, + "grad_norm": 0.7876880764961243, + "learning_rate": 0.00018365433348482935, + "loss": 2.7139, + "step": 3773 + }, + { + "epoch": 0.3045759018642563, + "grad_norm": 0.7571346163749695, + "learning_rate": 0.00018364568278905595, + "loss": 2.6939, + "step": 3774 + }, + { + "epoch": 0.3046566056008393, + "grad_norm": 0.9011813402175903, + "learning_rate": 0.00018363703000861346, + "loss": 2.7516, + "step": 3775 + }, + { + "epoch": 0.3047373093374223, + "grad_norm": 0.7809761762619019, + "learning_rate": 0.00018362837514371755, + "loss": 2.7587, + "step": 3776 + }, + { + "epoch": 0.3048180130740053, + "grad_norm": 0.7486867308616638, + "learning_rate": 0.00018361971819458393, + "loss": 2.6617, + "step": 3777 + }, + { + "epoch": 0.30489871681058833, + "grad_norm": 0.7434267401695251, + "learning_rate": 0.00018361105916142836, + "loss": 2.7328, + "step": 3778 + }, + { + "epoch": 0.3049794205471713, + "grad_norm": 0.7895822525024414, + "learning_rate": 0.0001836023980444666, + "loss": 2.7038, + "step": 3779 + }, + { + "epoch": 0.30506012428375434, + "grad_norm": 0.7329267263412476, + "learning_rate": 0.00018359373484391458, + "loss": 2.6533, + "step": 3780 + }, + { + "epoch": 0.3051408280203373, + "grad_norm": 0.7578477263450623, + "learning_rate": 0.00018358506955998817, + "loss": 2.723, + "step": 3781 + }, + { + "epoch": 0.30522153175692035, + "grad_norm": 0.7174215316772461, + "learning_rate": 0.0001835764021929033, + "loss": 2.7665, + "step": 3782 + }, + { + "epoch": 0.3053022354935033, + "grad_norm": 0.7261673808097839, + "learning_rate": 0.00018356773274287605, + "loss": 2.7239, + "step": 3783 + }, + { + "epoch": 0.30538293923008636, + "grad_norm": 0.7550768852233887, + "learning_rate": 0.00018355906121012244, + "loss": 2.6952, + "step": 3784 + }, + { + "epoch": 0.30546364296666934, + "grad_norm": 0.7805373668670654, + "learning_rate": 0.0001835503875948586, + "loss": 2.6453, + "step": 3785 + }, + { + "epoch": 0.30554434670325237, + "grad_norm": 0.7753674983978271, + "learning_rate": 0.0001835417118973007, + "loss": 2.7188, + "step": 3786 + }, + { + "epoch": 0.30562505043983534, + "grad_norm": 0.719774603843689, + "learning_rate": 0.00018353303411766496, + "loss": 2.69, + "step": 3787 + }, + { + "epoch": 0.3057057541764184, + "grad_norm": 0.786780059337616, + "learning_rate": 0.00018352435425616763, + "loss": 2.7015, + "step": 3788 + }, + { + "epoch": 0.30578645791300135, + "grad_norm": 0.7481613159179688, + "learning_rate": 0.00018351567231302508, + "loss": 2.6267, + "step": 3789 + }, + { + "epoch": 0.3058671616495844, + "grad_norm": 0.8138384222984314, + "learning_rate": 0.00018350698828845365, + "loss": 2.7301, + "step": 3790 + }, + { + "epoch": 0.30594786538616736, + "grad_norm": 0.7911081314086914, + "learning_rate": 0.00018349830218266982, + "loss": 2.6661, + "step": 3791 + }, + { + "epoch": 0.3060285691227504, + "grad_norm": 0.763179361820221, + "learning_rate": 0.00018348961399588997, + "loss": 2.6509, + "step": 3792 + }, + { + "epoch": 0.30610927285933337, + "grad_norm": 0.8214982748031616, + "learning_rate": 0.00018348092372833072, + "loss": 2.6951, + "step": 3793 + }, + { + "epoch": 0.3061899765959164, + "grad_norm": 0.7271003127098083, + "learning_rate": 0.00018347223138020865, + "loss": 2.7227, + "step": 3794 + }, + { + "epoch": 0.3062706803324994, + "grad_norm": 0.7727730870246887, + "learning_rate": 0.00018346353695174037, + "loss": 2.721, + "step": 3795 + }, + { + "epoch": 0.3063513840690824, + "grad_norm": 0.844895601272583, + "learning_rate": 0.00018345484044314257, + "loss": 2.6757, + "step": 3796 + }, + { + "epoch": 0.3064320878056654, + "grad_norm": 0.7409898638725281, + "learning_rate": 0.00018344614185463197, + "loss": 2.6798, + "step": 3797 + }, + { + "epoch": 0.3065127915422484, + "grad_norm": 0.8284425139427185, + "learning_rate": 0.00018343744118642542, + "loss": 2.7573, + "step": 3798 + }, + { + "epoch": 0.3065934952788314, + "grad_norm": 0.7535427808761597, + "learning_rate": 0.00018342873843873973, + "loss": 2.7026, + "step": 3799 + }, + { + "epoch": 0.30667419901541443, + "grad_norm": 0.8013898730278015, + "learning_rate": 0.00018342003361179176, + "loss": 2.7331, + "step": 3800 + }, + { + "epoch": 0.3067549027519974, + "grad_norm": 0.7458386421203613, + "learning_rate": 0.0001834113267057985, + "loss": 2.6976, + "step": 3801 + }, + { + "epoch": 0.30683560648858044, + "grad_norm": 0.8333673477172852, + "learning_rate": 0.00018340261772097695, + "loss": 2.7064, + "step": 3802 + }, + { + "epoch": 0.3069163102251634, + "grad_norm": 0.7273485064506531, + "learning_rate": 0.00018339390665754414, + "loss": 2.6619, + "step": 3803 + }, + { + "epoch": 0.30699701396174645, + "grad_norm": 0.8199014067649841, + "learning_rate": 0.0001833851935157172, + "loss": 2.654, + "step": 3804 + }, + { + "epoch": 0.3070777176983294, + "grad_norm": 0.780197024345398, + "learning_rate": 0.00018337647829571324, + "loss": 2.6814, + "step": 3805 + }, + { + "epoch": 0.30715842143491245, + "grad_norm": 0.7214049100875854, + "learning_rate": 0.0001833677609977495, + "loss": 2.709, + "step": 3806 + }, + { + "epoch": 0.30723912517149543, + "grad_norm": 0.7680457830429077, + "learning_rate": 0.00018335904162204326, + "loss": 2.6628, + "step": 3807 + }, + { + "epoch": 0.30731982890807846, + "grad_norm": 0.760728120803833, + "learning_rate": 0.00018335032016881178, + "loss": 2.7005, + "step": 3808 + }, + { + "epoch": 0.30740053264466144, + "grad_norm": 0.7631687521934509, + "learning_rate": 0.00018334159663827243, + "loss": 2.7012, + "step": 3809 + }, + { + "epoch": 0.30748123638124447, + "grad_norm": 0.7515785694122314, + "learning_rate": 0.00018333287103064266, + "loss": 2.7062, + "step": 3810 + }, + { + "epoch": 0.30756194011782745, + "grad_norm": 0.804500162601471, + "learning_rate": 0.00018332414334613987, + "loss": 2.7888, + "step": 3811 + }, + { + "epoch": 0.3076426438544105, + "grad_norm": 0.7551451325416565, + "learning_rate": 0.00018331541358498164, + "loss": 2.6345, + "step": 3812 + }, + { + "epoch": 0.30772334759099346, + "grad_norm": 0.7342958450317383, + "learning_rate": 0.0001833066817473855, + "loss": 2.6601, + "step": 3813 + }, + { + "epoch": 0.3078040513275765, + "grad_norm": 0.8059296607971191, + "learning_rate": 0.0001832979478335691, + "loss": 2.7694, + "step": 3814 + }, + { + "epoch": 0.30788475506415947, + "grad_norm": 0.7037352919578552, + "learning_rate": 0.0001832892118437501, + "loss": 2.6788, + "step": 3815 + }, + { + "epoch": 0.3079654588007425, + "grad_norm": 0.759509801864624, + "learning_rate": 0.0001832804737781462, + "loss": 2.7115, + "step": 3816 + }, + { + "epoch": 0.3080461625373255, + "grad_norm": 0.7911720871925354, + "learning_rate": 0.00018327173363697524, + "loss": 2.6676, + "step": 3817 + }, + { + "epoch": 0.3081268662739085, + "grad_norm": 0.7592991590499878, + "learning_rate": 0.00018326299142045496, + "loss": 2.7245, + "step": 3818 + }, + { + "epoch": 0.3082075700104915, + "grad_norm": 0.7620227932929993, + "learning_rate": 0.00018325424712880333, + "loss": 2.7224, + "step": 3819 + }, + { + "epoch": 0.3082882737470745, + "grad_norm": 0.7834638953208923, + "learning_rate": 0.0001832455007622382, + "loss": 2.7469, + "step": 3820 + }, + { + "epoch": 0.3083689774836575, + "grad_norm": 0.7765992879867554, + "learning_rate": 0.00018323675232097757, + "loss": 2.7193, + "step": 3821 + }, + { + "epoch": 0.3084496812202405, + "grad_norm": 0.7334728837013245, + "learning_rate": 0.00018322800180523949, + "loss": 2.667, + "step": 3822 + }, + { + "epoch": 0.3085303849568235, + "grad_norm": 0.7674607634544373, + "learning_rate": 0.00018321924921524207, + "loss": 2.6479, + "step": 3823 + }, + { + "epoch": 0.30861108869340653, + "grad_norm": 0.7616469860076904, + "learning_rate": 0.0001832104945512034, + "loss": 2.6535, + "step": 3824 + }, + { + "epoch": 0.3086917924299895, + "grad_norm": 0.7693164944648743, + "learning_rate": 0.00018320173781334172, + "loss": 2.7616, + "step": 3825 + }, + { + "epoch": 0.3087724961665725, + "grad_norm": 0.7099221348762512, + "learning_rate": 0.0001831929790018752, + "loss": 2.6729, + "step": 3826 + }, + { + "epoch": 0.3088531999031555, + "grad_norm": 0.7389346957206726, + "learning_rate": 0.00018318421811702222, + "loss": 2.6396, + "step": 3827 + }, + { + "epoch": 0.3089339036397385, + "grad_norm": 0.8302628397941589, + "learning_rate": 0.00018317545515900106, + "loss": 2.6786, + "step": 3828 + }, + { + "epoch": 0.3090146073763215, + "grad_norm": 0.7441998720169067, + "learning_rate": 0.00018316669012803015, + "loss": 2.6769, + "step": 3829 + }, + { + "epoch": 0.3090953111129045, + "grad_norm": 0.8454675674438477, + "learning_rate": 0.00018315792302432788, + "loss": 2.7275, + "step": 3830 + }, + { + "epoch": 0.30917601484948753, + "grad_norm": 0.8129739761352539, + "learning_rate": 0.00018314915384811282, + "loss": 2.7603, + "step": 3831 + }, + { + "epoch": 0.3092567185860705, + "grad_norm": 0.7525617480278015, + "learning_rate": 0.00018314038259960349, + "loss": 2.7156, + "step": 3832 + }, + { + "epoch": 0.30933742232265354, + "grad_norm": 0.7319022417068481, + "learning_rate": 0.0001831316092790185, + "loss": 2.676, + "step": 3833 + }, + { + "epoch": 0.3094181260592365, + "grad_norm": 0.7767768502235413, + "learning_rate": 0.00018312283388657646, + "loss": 2.7022, + "step": 3834 + }, + { + "epoch": 0.30949882979581955, + "grad_norm": 0.709293007850647, + "learning_rate": 0.00018311405642249616, + "loss": 2.6241, + "step": 3835 + }, + { + "epoch": 0.30957953353240253, + "grad_norm": 0.715360701084137, + "learning_rate": 0.0001831052768869963, + "loss": 2.6777, + "step": 3836 + }, + { + "epoch": 0.30966023726898556, + "grad_norm": 0.7361319065093994, + "learning_rate": 0.0001830964952802957, + "loss": 2.6539, + "step": 3837 + }, + { + "epoch": 0.30974094100556854, + "grad_norm": 0.7243087291717529, + "learning_rate": 0.0001830877116026132, + "loss": 2.7506, + "step": 3838 + }, + { + "epoch": 0.30982164474215157, + "grad_norm": 0.7361106872558594, + "learning_rate": 0.00018307892585416776, + "loss": 2.697, + "step": 3839 + }, + { + "epoch": 0.30990234847873455, + "grad_norm": 0.7541893720626831, + "learning_rate": 0.00018307013803517833, + "loss": 2.694, + "step": 3840 + }, + { + "epoch": 0.3099830522153176, + "grad_norm": 0.7235575914382935, + "learning_rate": 0.00018306134814586388, + "loss": 2.6711, + "step": 3841 + }, + { + "epoch": 0.31006375595190055, + "grad_norm": 0.7868196368217468, + "learning_rate": 0.00018305255618644354, + "loss": 2.7177, + "step": 3842 + }, + { + "epoch": 0.3101444596884836, + "grad_norm": 0.8074443340301514, + "learning_rate": 0.00018304376215713637, + "loss": 2.7293, + "step": 3843 + }, + { + "epoch": 0.31022516342506656, + "grad_norm": 0.6993385553359985, + "learning_rate": 0.00018303496605816158, + "loss": 2.6942, + "step": 3844 + }, + { + "epoch": 0.3103058671616496, + "grad_norm": 0.7272824645042419, + "learning_rate": 0.00018302616788973839, + "loss": 2.7093, + "step": 3845 + }, + { + "epoch": 0.31038657089823257, + "grad_norm": 0.7496963143348694, + "learning_rate": 0.00018301736765208605, + "loss": 2.7096, + "step": 3846 + }, + { + "epoch": 0.3104672746348156, + "grad_norm": 0.7407644987106323, + "learning_rate": 0.00018300856534542387, + "loss": 2.6956, + "step": 3847 + }, + { + "epoch": 0.3105479783713986, + "grad_norm": 0.742382287979126, + "learning_rate": 0.00018299976096997132, + "loss": 2.6744, + "step": 3848 + }, + { + "epoch": 0.3106286821079816, + "grad_norm": 0.7314567565917969, + "learning_rate": 0.0001829909545259477, + "loss": 2.7544, + "step": 3849 + }, + { + "epoch": 0.3107093858445646, + "grad_norm": 0.7550896406173706, + "learning_rate": 0.0001829821460135726, + "loss": 2.714, + "step": 3850 + }, + { + "epoch": 0.3107900895811476, + "grad_norm": 0.7496031522750854, + "learning_rate": 0.00018297333543306548, + "loss": 2.6718, + "step": 3851 + }, + { + "epoch": 0.3108707933177306, + "grad_norm": 0.7600073218345642, + "learning_rate": 0.00018296452278464596, + "loss": 2.7141, + "step": 3852 + }, + { + "epoch": 0.31095149705431363, + "grad_norm": 0.7242388129234314, + "learning_rate": 0.00018295570806853366, + "loss": 2.7407, + "step": 3853 + }, + { + "epoch": 0.3110322007908966, + "grad_norm": 0.723874568939209, + "learning_rate": 0.00018294689128494824, + "loss": 2.7253, + "step": 3854 + }, + { + "epoch": 0.31111290452747964, + "grad_norm": 0.7902834415435791, + "learning_rate": 0.00018293807243410947, + "loss": 2.7118, + "step": 3855 + }, + { + "epoch": 0.3111936082640626, + "grad_norm": 0.7676794528961182, + "learning_rate": 0.00018292925151623717, + "loss": 2.684, + "step": 3856 + }, + { + "epoch": 0.31127431200064565, + "grad_norm": 0.767431378364563, + "learning_rate": 0.0001829204285315511, + "loss": 2.6936, + "step": 3857 + }, + { + "epoch": 0.3113550157372286, + "grad_norm": 0.7802234888076782, + "learning_rate": 0.00018291160348027122, + "loss": 2.7181, + "step": 3858 + }, + { + "epoch": 0.31143571947381166, + "grad_norm": 0.7823610305786133, + "learning_rate": 0.00018290277636261743, + "loss": 2.7014, + "step": 3859 + }, + { + "epoch": 0.31151642321039463, + "grad_norm": 0.8199869394302368, + "learning_rate": 0.00018289394717880978, + "loss": 2.73, + "step": 3860 + }, + { + "epoch": 0.31159712694697766, + "grad_norm": 0.7725761532783508, + "learning_rate": 0.00018288511592906822, + "loss": 2.6978, + "step": 3861 + }, + { + "epoch": 0.31167783068356064, + "grad_norm": 0.752034068107605, + "learning_rate": 0.00018287628261361296, + "loss": 2.6635, + "step": 3862 + }, + { + "epoch": 0.3117585344201437, + "grad_norm": 0.7961714267730713, + "learning_rate": 0.0001828674472326641, + "loss": 2.7047, + "step": 3863 + }, + { + "epoch": 0.31183923815672665, + "grad_norm": 0.7413069605827332, + "learning_rate": 0.00018285860978644182, + "loss": 2.6872, + "step": 3864 + }, + { + "epoch": 0.3119199418933097, + "grad_norm": 0.8943146467208862, + "learning_rate": 0.00018284977027516636, + "loss": 2.7611, + "step": 3865 + }, + { + "epoch": 0.31200064562989266, + "grad_norm": 0.7663856744766235, + "learning_rate": 0.0001828409286990581, + "loss": 2.7541, + "step": 3866 + }, + { + "epoch": 0.3120813493664757, + "grad_norm": 0.7557348608970642, + "learning_rate": 0.00018283208505833731, + "loss": 2.6633, + "step": 3867 + }, + { + "epoch": 0.31216205310305867, + "grad_norm": 0.7690094113349915, + "learning_rate": 0.00018282323935322445, + "loss": 2.7117, + "step": 3868 + }, + { + "epoch": 0.3122427568396417, + "grad_norm": 0.8059033751487732, + "learning_rate": 0.00018281439158393997, + "loss": 2.6743, + "step": 3869 + }, + { + "epoch": 0.3123234605762247, + "grad_norm": 0.7877150774002075, + "learning_rate": 0.00018280554175070438, + "loss": 2.6546, + "step": 3870 + }, + { + "epoch": 0.3124041643128077, + "grad_norm": 0.799670934677124, + "learning_rate": 0.0001827966898537382, + "loss": 2.7184, + "step": 3871 + }, + { + "epoch": 0.3124848680493907, + "grad_norm": 0.8353915214538574, + "learning_rate": 0.0001827878358932621, + "loss": 2.7235, + "step": 3872 + }, + { + "epoch": 0.3125655717859737, + "grad_norm": 0.7954776883125305, + "learning_rate": 0.00018277897986949672, + "loss": 2.5992, + "step": 3873 + }, + { + "epoch": 0.3126462755225567, + "grad_norm": 0.7959856986999512, + "learning_rate": 0.00018277012178266277, + "loss": 2.6877, + "step": 3874 + }, + { + "epoch": 0.3127269792591397, + "grad_norm": 0.8220208883285522, + "learning_rate": 0.00018276126163298102, + "loss": 2.6891, + "step": 3875 + }, + { + "epoch": 0.3128076829957227, + "grad_norm": 0.7827965021133423, + "learning_rate": 0.0001827523994206723, + "loss": 2.7271, + "step": 3876 + }, + { + "epoch": 0.3128883867323057, + "grad_norm": 0.764369010925293, + "learning_rate": 0.00018274353514595746, + "loss": 2.6661, + "step": 3877 + }, + { + "epoch": 0.3129690904688887, + "grad_norm": 0.7440944314002991, + "learning_rate": 0.00018273466880905744, + "loss": 2.6621, + "step": 3878 + }, + { + "epoch": 0.3130497942054717, + "grad_norm": 0.8544813394546509, + "learning_rate": 0.00018272580041019319, + "loss": 2.7168, + "step": 3879 + }, + { + "epoch": 0.3131304979420547, + "grad_norm": 0.7232592701911926, + "learning_rate": 0.00018271692994958577, + "loss": 2.6666, + "step": 3880 + }, + { + "epoch": 0.3132112016786377, + "grad_norm": 0.750525712966919, + "learning_rate": 0.00018270805742745617, + "loss": 2.6984, + "step": 3881 + }, + { + "epoch": 0.31329190541522073, + "grad_norm": 0.8195550441741943, + "learning_rate": 0.00018269918284402565, + "loss": 2.7183, + "step": 3882 + }, + { + "epoch": 0.3133726091518037, + "grad_norm": 0.7695632576942444, + "learning_rate": 0.0001826903061995153, + "loss": 2.7092, + "step": 3883 + }, + { + "epoch": 0.31345331288838674, + "grad_norm": 0.7631582617759705, + "learning_rate": 0.0001826814274941463, + "loss": 2.7061, + "step": 3884 + }, + { + "epoch": 0.3135340166249697, + "grad_norm": 0.8318471908569336, + "learning_rate": 0.0001826725467281401, + "loss": 2.694, + "step": 3885 + }, + { + "epoch": 0.31361472036155275, + "grad_norm": 0.7313492298126221, + "learning_rate": 0.00018266366390171784, + "loss": 2.6729, + "step": 3886 + }, + { + "epoch": 0.3136954240981357, + "grad_norm": 0.7508631944656372, + "learning_rate": 0.00018265477901510105, + "loss": 2.731, + "step": 3887 + }, + { + "epoch": 0.31377612783471875, + "grad_norm": 0.8106402158737183, + "learning_rate": 0.00018264589206851107, + "loss": 2.7113, + "step": 3888 + }, + { + "epoch": 0.31385683157130173, + "grad_norm": 0.771542489528656, + "learning_rate": 0.00018263700306216945, + "loss": 2.644, + "step": 3889 + }, + { + "epoch": 0.31393753530788476, + "grad_norm": 0.812441885471344, + "learning_rate": 0.00018262811199629768, + "loss": 2.6889, + "step": 3890 + }, + { + "epoch": 0.31401823904446774, + "grad_norm": 0.8231199979782104, + "learning_rate": 0.00018261921887111738, + "loss": 2.6466, + "step": 3891 + }, + { + "epoch": 0.31409894278105077, + "grad_norm": 0.7492454051971436, + "learning_rate": 0.00018261032368685012, + "loss": 2.6693, + "step": 3892 + }, + { + "epoch": 0.31417964651763375, + "grad_norm": 0.7651814222335815, + "learning_rate": 0.00018260142644371772, + "loss": 2.6569, + "step": 3893 + }, + { + "epoch": 0.3142603502542168, + "grad_norm": 0.7504465579986572, + "learning_rate": 0.0001825925271419418, + "loss": 2.684, + "step": 3894 + }, + { + "epoch": 0.31434105399079976, + "grad_norm": 0.749650239944458, + "learning_rate": 0.00018258362578174424, + "loss": 2.6482, + "step": 3895 + }, + { + "epoch": 0.3144217577273828, + "grad_norm": 0.8445256352424622, + "learning_rate": 0.00018257472236334686, + "loss": 2.727, + "step": 3896 + }, + { + "epoch": 0.31450246146396577, + "grad_norm": 0.7628257870674133, + "learning_rate": 0.0001825658168869715, + "loss": 2.7314, + "step": 3897 + }, + { + "epoch": 0.3145831652005488, + "grad_norm": 0.7738446593284607, + "learning_rate": 0.00018255690935284019, + "loss": 2.7478, + "step": 3898 + }, + { + "epoch": 0.3146638689371318, + "grad_norm": 0.7578958868980408, + "learning_rate": 0.00018254799976117486, + "loss": 2.6922, + "step": 3899 + }, + { + "epoch": 0.3147445726737148, + "grad_norm": 0.8367362022399902, + "learning_rate": 0.00018253908811219764, + "loss": 2.7347, + "step": 3900 + }, + { + "epoch": 0.3148252764102978, + "grad_norm": 0.7530354857444763, + "learning_rate": 0.00018253017440613057, + "loss": 2.7151, + "step": 3901 + }, + { + "epoch": 0.3149059801468808, + "grad_norm": 0.7168053388595581, + "learning_rate": 0.00018252125864319578, + "loss": 2.7072, + "step": 3902 + }, + { + "epoch": 0.3149866838834638, + "grad_norm": 0.7480056285858154, + "learning_rate": 0.00018251234082361555, + "loss": 2.6489, + "step": 3903 + }, + { + "epoch": 0.3150673876200468, + "grad_norm": 0.8563880324363708, + "learning_rate": 0.0001825034209476121, + "loss": 2.7384, + "step": 3904 + }, + { + "epoch": 0.3151480913566298, + "grad_norm": 0.7959346771240234, + "learning_rate": 0.0001824944990154077, + "loss": 2.631, + "step": 3905 + }, + { + "epoch": 0.31522879509321283, + "grad_norm": 0.7385980486869812, + "learning_rate": 0.00018248557502722476, + "loss": 2.7394, + "step": 3906 + }, + { + "epoch": 0.3153094988297958, + "grad_norm": 0.7682650685310364, + "learning_rate": 0.00018247664898328567, + "loss": 2.7327, + "step": 3907 + }, + { + "epoch": 0.31539020256637884, + "grad_norm": 0.7720316648483276, + "learning_rate": 0.0001824677208838129, + "loss": 2.6442, + "step": 3908 + }, + { + "epoch": 0.3154709063029618, + "grad_norm": 0.7927379608154297, + "learning_rate": 0.00018245879072902895, + "loss": 2.7738, + "step": 3909 + }, + { + "epoch": 0.31555161003954485, + "grad_norm": 0.7506012916564941, + "learning_rate": 0.00018244985851915637, + "loss": 2.6825, + "step": 3910 + }, + { + "epoch": 0.3156323137761278, + "grad_norm": 0.6996353268623352, + "learning_rate": 0.00018244092425441781, + "loss": 2.6783, + "step": 3911 + }, + { + "epoch": 0.31571301751271086, + "grad_norm": 0.8039344549179077, + "learning_rate": 0.00018243198793503588, + "loss": 2.7628, + "step": 3912 + }, + { + "epoch": 0.31579372124929384, + "grad_norm": 0.7890963554382324, + "learning_rate": 0.0001824230495612334, + "loss": 2.7512, + "step": 3913 + }, + { + "epoch": 0.31587442498587687, + "grad_norm": 0.7470870614051819, + "learning_rate": 0.00018241410913323301, + "loss": 2.7058, + "step": 3914 + }, + { + "epoch": 0.31595512872245984, + "grad_norm": 0.7056336402893066, + "learning_rate": 0.0001824051666512576, + "loss": 2.6091, + "step": 3915 + }, + { + "epoch": 0.3160358324590429, + "grad_norm": 0.7818490862846375, + "learning_rate": 0.00018239622211553002, + "loss": 2.7509, + "step": 3916 + }, + { + "epoch": 0.31611653619562585, + "grad_norm": 0.7590607404708862, + "learning_rate": 0.0001823872755262732, + "loss": 2.7238, + "step": 3917 + }, + { + "epoch": 0.3161972399322089, + "grad_norm": 0.7157841920852661, + "learning_rate": 0.00018237832688371014, + "loss": 2.6639, + "step": 3918 + }, + { + "epoch": 0.31627794366879186, + "grad_norm": 0.7515804171562195, + "learning_rate": 0.00018236937618806382, + "loss": 2.6973, + "step": 3919 + }, + { + "epoch": 0.3163586474053749, + "grad_norm": 0.6691949963569641, + "learning_rate": 0.00018236042343955733, + "loss": 2.727, + "step": 3920 + }, + { + "epoch": 0.31643935114195787, + "grad_norm": 0.8122327327728271, + "learning_rate": 0.0001823514686384138, + "loss": 2.7513, + "step": 3921 + }, + { + "epoch": 0.3165200548785409, + "grad_norm": 0.7813653349876404, + "learning_rate": 0.0001823425117848564, + "loss": 2.7037, + "step": 3922 + }, + { + "epoch": 0.3166007586151239, + "grad_norm": 0.6869354844093323, + "learning_rate": 0.00018233355287910834, + "loss": 2.693, + "step": 3923 + }, + { + "epoch": 0.3166814623517069, + "grad_norm": 0.7773037552833557, + "learning_rate": 0.00018232459192139296, + "loss": 2.687, + "step": 3924 + }, + { + "epoch": 0.3167621660882899, + "grad_norm": 0.7644256949424744, + "learning_rate": 0.00018231562891193352, + "loss": 2.6753, + "step": 3925 + }, + { + "epoch": 0.3168428698248729, + "grad_norm": 0.8427005410194397, + "learning_rate": 0.00018230666385095343, + "loss": 2.6641, + "step": 3926 + }, + { + "epoch": 0.3169235735614559, + "grad_norm": 0.7194599509239197, + "learning_rate": 0.0001822976967386761, + "loss": 2.7091, + "step": 3927 + }, + { + "epoch": 0.3170042772980389, + "grad_norm": 0.7710655331611633, + "learning_rate": 0.00018228872757532512, + "loss": 2.6938, + "step": 3928 + }, + { + "epoch": 0.3170849810346219, + "grad_norm": 0.8003759980201721, + "learning_rate": 0.0001822797563611239, + "loss": 2.7019, + "step": 3929 + }, + { + "epoch": 0.3171656847712049, + "grad_norm": 0.7960470914840698, + "learning_rate": 0.00018227078309629606, + "loss": 2.661, + "step": 3930 + }, + { + "epoch": 0.3172463885077879, + "grad_norm": 0.7731126546859741, + "learning_rate": 0.00018226180778106526, + "loss": 2.7023, + "step": 3931 + }, + { + "epoch": 0.3173270922443709, + "grad_norm": 0.7561383843421936, + "learning_rate": 0.00018225283041565515, + "loss": 2.6768, + "step": 3932 + }, + { + "epoch": 0.3174077959809539, + "grad_norm": 0.7578409910202026, + "learning_rate": 0.0001822438510002895, + "loss": 2.7145, + "step": 3933 + }, + { + "epoch": 0.3174884997175369, + "grad_norm": 0.7901952862739563, + "learning_rate": 0.00018223486953519214, + "loss": 2.7121, + "step": 3934 + }, + { + "epoch": 0.31756920345411993, + "grad_norm": 0.82305908203125, + "learning_rate": 0.0001822258860205868, + "loss": 2.7553, + "step": 3935 + }, + { + "epoch": 0.3176499071907029, + "grad_norm": 0.748055636882782, + "learning_rate": 0.0001822169004566975, + "loss": 2.7236, + "step": 3936 + }, + { + "epoch": 0.31773061092728594, + "grad_norm": 0.7981358766555786, + "learning_rate": 0.0001822079128437481, + "loss": 2.7444, + "step": 3937 + }, + { + "epoch": 0.3178113146638689, + "grad_norm": 0.7938945889472961, + "learning_rate": 0.0001821989231819626, + "loss": 2.7512, + "step": 3938 + }, + { + "epoch": 0.31789201840045195, + "grad_norm": 0.7250397205352783, + "learning_rate": 0.0001821899314715651, + "loss": 2.6843, + "step": 3939 + }, + { + "epoch": 0.3179727221370349, + "grad_norm": 0.8844723701477051, + "learning_rate": 0.00018218093771277965, + "loss": 2.6295, + "step": 3940 + }, + { + "epoch": 0.31805342587361796, + "grad_norm": 0.7545698881149292, + "learning_rate": 0.0001821719419058304, + "loss": 2.7478, + "step": 3941 + }, + { + "epoch": 0.31813412961020093, + "grad_norm": 0.7254738807678223, + "learning_rate": 0.00018216294405094157, + "loss": 2.665, + "step": 3942 + }, + { + "epoch": 0.31821483334678397, + "grad_norm": 0.7664754390716553, + "learning_rate": 0.00018215394414833737, + "loss": 2.7431, + "step": 3943 + }, + { + "epoch": 0.31829553708336694, + "grad_norm": 0.8250303864479065, + "learning_rate": 0.00018214494219824217, + "loss": 2.6957, + "step": 3944 + }, + { + "epoch": 0.31837624081995, + "grad_norm": 0.7425532341003418, + "learning_rate": 0.00018213593820088026, + "loss": 2.666, + "step": 3945 + }, + { + "epoch": 0.31845694455653295, + "grad_norm": 0.6943121552467346, + "learning_rate": 0.00018212693215647604, + "loss": 2.716, + "step": 3946 + }, + { + "epoch": 0.318537648293116, + "grad_norm": 0.732829213142395, + "learning_rate": 0.00018211792406525403, + "loss": 2.6557, + "step": 3947 + }, + { + "epoch": 0.31861835202969896, + "grad_norm": 0.7666537165641785, + "learning_rate": 0.00018210891392743866, + "loss": 2.7275, + "step": 3948 + }, + { + "epoch": 0.318699055766282, + "grad_norm": 0.7652621865272522, + "learning_rate": 0.00018209990174325455, + "loss": 2.6372, + "step": 3949 + }, + { + "epoch": 0.31877975950286497, + "grad_norm": 0.7416055202484131, + "learning_rate": 0.00018209088751292626, + "loss": 2.6688, + "step": 3950 + }, + { + "epoch": 0.318860463239448, + "grad_norm": 0.7504609227180481, + "learning_rate": 0.00018208187123667848, + "loss": 2.6912, + "step": 3951 + }, + { + "epoch": 0.318941166976031, + "grad_norm": 0.7308809757232666, + "learning_rate": 0.00018207285291473588, + "loss": 2.7272, + "step": 3952 + }, + { + "epoch": 0.319021870712614, + "grad_norm": 0.8031618595123291, + "learning_rate": 0.00018206383254732326, + "loss": 2.7354, + "step": 3953 + }, + { + "epoch": 0.319102574449197, + "grad_norm": 0.81386798620224, + "learning_rate": 0.00018205481013466542, + "loss": 2.676, + "step": 3954 + }, + { + "epoch": 0.31918327818578, + "grad_norm": 0.7845911383628845, + "learning_rate": 0.0001820457856769872, + "loss": 2.7094, + "step": 3955 + }, + { + "epoch": 0.319263981922363, + "grad_norm": 0.7189298272132874, + "learning_rate": 0.00018203675917451357, + "loss": 2.6764, + "step": 3956 + }, + { + "epoch": 0.319344685658946, + "grad_norm": 0.8253228664398193, + "learning_rate": 0.00018202773062746944, + "loss": 2.6805, + "step": 3957 + }, + { + "epoch": 0.319425389395529, + "grad_norm": 0.7965289950370789, + "learning_rate": 0.0001820187000360798, + "loss": 2.7148, + "step": 3958 + }, + { + "epoch": 0.31950609313211203, + "grad_norm": 0.7505398988723755, + "learning_rate": 0.0001820096674005698, + "loss": 2.6732, + "step": 3959 + }, + { + "epoch": 0.319586796868695, + "grad_norm": 0.7554877400398254, + "learning_rate": 0.0001820006327211645, + "loss": 2.7467, + "step": 3960 + }, + { + "epoch": 0.31966750060527804, + "grad_norm": 0.7836194038391113, + "learning_rate": 0.00018199159599808907, + "loss": 2.7252, + "step": 3961 + }, + { + "epoch": 0.319748204341861, + "grad_norm": 0.7967261672019958, + "learning_rate": 0.00018198255723156877, + "loss": 2.6814, + "step": 3962 + }, + { + "epoch": 0.31982890807844405, + "grad_norm": 0.7411713600158691, + "learning_rate": 0.00018197351642182882, + "loss": 2.6928, + "step": 3963 + }, + { + "epoch": 0.31990961181502703, + "grad_norm": 0.6961422562599182, + "learning_rate": 0.00018196447356909454, + "loss": 2.6651, + "step": 3964 + }, + { + "epoch": 0.31999031555161006, + "grad_norm": 0.7245771884918213, + "learning_rate": 0.00018195542867359134, + "loss": 2.6726, + "step": 3965 + }, + { + "epoch": 0.32007101928819304, + "grad_norm": 0.784654974937439, + "learning_rate": 0.00018194638173554462, + "loss": 2.6829, + "step": 3966 + }, + { + "epoch": 0.32015172302477607, + "grad_norm": 0.7373329997062683, + "learning_rate": 0.00018193733275517985, + "loss": 2.6481, + "step": 3967 + }, + { + "epoch": 0.32023242676135905, + "grad_norm": 0.7878682613372803, + "learning_rate": 0.00018192828173272258, + "loss": 2.6701, + "step": 3968 + }, + { + "epoch": 0.3203131304979421, + "grad_norm": 0.759676992893219, + "learning_rate": 0.00018191922866839835, + "loss": 2.7218, + "step": 3969 + }, + { + "epoch": 0.32039383423452505, + "grad_norm": 0.7923088669776917, + "learning_rate": 0.00018191017356243282, + "loss": 2.6841, + "step": 3970 + }, + { + "epoch": 0.3204745379711081, + "grad_norm": 0.7084882855415344, + "learning_rate": 0.00018190111641505164, + "loss": 2.7167, + "step": 3971 + }, + { + "epoch": 0.32055524170769106, + "grad_norm": 0.7166235446929932, + "learning_rate": 0.00018189205722648054, + "loss": 2.6647, + "step": 3972 + }, + { + "epoch": 0.3206359454442741, + "grad_norm": 0.7997722029685974, + "learning_rate": 0.0001818829959969453, + "loss": 2.7199, + "step": 3973 + }, + { + "epoch": 0.32071664918085707, + "grad_norm": 0.8309516310691833, + "learning_rate": 0.0001818739327266718, + "loss": 2.8006, + "step": 3974 + }, + { + "epoch": 0.3207973529174401, + "grad_norm": 0.7164002656936646, + "learning_rate": 0.00018186486741588582, + "loss": 2.6258, + "step": 3975 + }, + { + "epoch": 0.3208780566540231, + "grad_norm": 0.7715865969657898, + "learning_rate": 0.0001818558000648134, + "loss": 2.7034, + "step": 3976 + }, + { + "epoch": 0.3209587603906061, + "grad_norm": 0.7806593775749207, + "learning_rate": 0.0001818467306736804, + "loss": 2.6758, + "step": 3977 + }, + { + "epoch": 0.3210394641271891, + "grad_norm": 0.8026594519615173, + "learning_rate": 0.00018183765924271298, + "loss": 2.6976, + "step": 3978 + }, + { + "epoch": 0.32112016786377207, + "grad_norm": 0.7971245050430298, + "learning_rate": 0.00018182858577213716, + "loss": 2.7312, + "step": 3979 + }, + { + "epoch": 0.3212008716003551, + "grad_norm": 0.7347297072410583, + "learning_rate": 0.00018181951026217908, + "loss": 2.6664, + "step": 3980 + }, + { + "epoch": 0.3212815753369381, + "grad_norm": 0.7929779291152954, + "learning_rate": 0.0001818104327130649, + "loss": 2.6603, + "step": 3981 + }, + { + "epoch": 0.3213622790735211, + "grad_norm": 0.7465224862098694, + "learning_rate": 0.00018180135312502089, + "loss": 2.6566, + "step": 3982 + }, + { + "epoch": 0.3214429828101041, + "grad_norm": 0.7114695906639099, + "learning_rate": 0.00018179227149827334, + "loss": 2.6492, + "step": 3983 + }, + { + "epoch": 0.3215236865466871, + "grad_norm": 0.7179337739944458, + "learning_rate": 0.00018178318783304857, + "loss": 2.6778, + "step": 3984 + }, + { + "epoch": 0.3216043902832701, + "grad_norm": 0.7182629704475403, + "learning_rate": 0.000181774102129573, + "loss": 2.7057, + "step": 3985 + }, + { + "epoch": 0.3216850940198531, + "grad_norm": 0.7383119463920593, + "learning_rate": 0.000181765014388073, + "loss": 2.6633, + "step": 3986 + }, + { + "epoch": 0.3217657977564361, + "grad_norm": 0.7340527176856995, + "learning_rate": 0.00018175592460877512, + "loss": 2.6838, + "step": 3987 + }, + { + "epoch": 0.32184650149301913, + "grad_norm": 0.7934359312057495, + "learning_rate": 0.00018174683279190593, + "loss": 2.6795, + "step": 3988 + }, + { + "epoch": 0.3219272052296021, + "grad_norm": 0.6960840821266174, + "learning_rate": 0.00018173773893769192, + "loss": 2.6669, + "step": 3989 + }, + { + "epoch": 0.32200790896618514, + "grad_norm": 0.7513574361801147, + "learning_rate": 0.00018172864304635985, + "loss": 2.6744, + "step": 3990 + }, + { + "epoch": 0.3220886127027681, + "grad_norm": 0.7516636848449707, + "learning_rate": 0.00018171954511813629, + "loss": 2.6652, + "step": 3991 + }, + { + "epoch": 0.32216931643935115, + "grad_norm": 0.7817716002464294, + "learning_rate": 0.00018171044515324808, + "loss": 2.6671, + "step": 3992 + }, + { + "epoch": 0.3222500201759341, + "grad_norm": 0.6859925389289856, + "learning_rate": 0.000181701343151922, + "loss": 2.6984, + "step": 3993 + }, + { + "epoch": 0.32233072391251716, + "grad_norm": 0.7669627666473389, + "learning_rate": 0.00018169223911438485, + "loss": 2.7102, + "step": 3994 + }, + { + "epoch": 0.32241142764910014, + "grad_norm": 0.784724235534668, + "learning_rate": 0.00018168313304086357, + "loss": 2.7413, + "step": 3995 + }, + { + "epoch": 0.32249213138568317, + "grad_norm": 0.7341497540473938, + "learning_rate": 0.00018167402493158509, + "loss": 2.706, + "step": 3996 + }, + { + "epoch": 0.32257283512226614, + "grad_norm": 0.7975730299949646, + "learning_rate": 0.00018166491478677641, + "loss": 2.6896, + "step": 3997 + }, + { + "epoch": 0.3226535388588492, + "grad_norm": 0.8138537406921387, + "learning_rate": 0.00018165580260666458, + "loss": 2.6986, + "step": 3998 + }, + { + "epoch": 0.32273424259543215, + "grad_norm": 0.6734997034072876, + "learning_rate": 0.0001816466883914767, + "loss": 2.6686, + "step": 3999 + }, + { + "epoch": 0.3228149463320152, + "grad_norm": 0.7742779850959778, + "learning_rate": 0.00018163757214143992, + "loss": 2.7222, + "step": 4000 + }, + { + "epoch": 0.3228149463320152, + "eval_loss": 2.615234375, + "eval_runtime": 783.0394, + "eval_samples_per_second": 3.346, + "eval_steps_per_second": 0.558, + "step": 4000 + }, + { + "epoch": 0.32289565006859816, + "grad_norm": 0.7654715180397034, + "learning_rate": 0.00018162845385678145, + "loss": 2.7016, + "step": 4001 + }, + { + "epoch": 0.3229763538051812, + "grad_norm": 0.8698763251304626, + "learning_rate": 0.0001816193335377285, + "loss": 2.6709, + "step": 4002 + }, + { + "epoch": 0.32305705754176417, + "grad_norm": 0.758056640625, + "learning_rate": 0.00018161021118450843, + "loss": 2.7277, + "step": 4003 + }, + { + "epoch": 0.3231377612783472, + "grad_norm": 0.7462654113769531, + "learning_rate": 0.00018160108679734856, + "loss": 2.623, + "step": 4004 + }, + { + "epoch": 0.3232184650149302, + "grad_norm": 0.7274953722953796, + "learning_rate": 0.00018159196037647628, + "loss": 2.6875, + "step": 4005 + }, + { + "epoch": 0.3232991687515132, + "grad_norm": 0.7737346887588501, + "learning_rate": 0.0001815828319221191, + "loss": 2.6967, + "step": 4006 + }, + { + "epoch": 0.3233798724880962, + "grad_norm": 0.7793172001838684, + "learning_rate": 0.00018157370143450448, + "loss": 2.724, + "step": 4007 + }, + { + "epoch": 0.3234605762246792, + "grad_norm": 0.7791805863380432, + "learning_rate": 0.00018156456891385995, + "loss": 2.6653, + "step": 4008 + }, + { + "epoch": 0.3235412799612622, + "grad_norm": 0.7225624918937683, + "learning_rate": 0.0001815554343604132, + "loss": 2.745, + "step": 4009 + }, + { + "epoch": 0.32362198369784523, + "grad_norm": 0.6958494782447815, + "learning_rate": 0.0001815462977743918, + "loss": 2.6856, + "step": 4010 + }, + { + "epoch": 0.3237026874344282, + "grad_norm": 0.7572030425071716, + "learning_rate": 0.0001815371591560235, + "loss": 2.7053, + "step": 4011 + }, + { + "epoch": 0.32378339117101124, + "grad_norm": 0.7133952975273132, + "learning_rate": 0.00018152801850553605, + "loss": 2.6984, + "step": 4012 + }, + { + "epoch": 0.3238640949075942, + "grad_norm": 0.7598705291748047, + "learning_rate": 0.00018151887582315728, + "loss": 2.6632, + "step": 4013 + }, + { + "epoch": 0.32394479864417725, + "grad_norm": 0.7670698165893555, + "learning_rate": 0.00018150973110911503, + "loss": 2.7035, + "step": 4014 + }, + { + "epoch": 0.3240255023807602, + "grad_norm": 0.7547060251235962, + "learning_rate": 0.00018150058436363723, + "loss": 2.6531, + "step": 4015 + }, + { + "epoch": 0.32410620611734325, + "grad_norm": 0.7943035364151001, + "learning_rate": 0.00018149143558695178, + "loss": 2.766, + "step": 4016 + }, + { + "epoch": 0.32418690985392623, + "grad_norm": 0.864356517791748, + "learning_rate": 0.00018148228477928675, + "loss": 2.7134, + "step": 4017 + }, + { + "epoch": 0.32426761359050926, + "grad_norm": 0.7773902416229248, + "learning_rate": 0.00018147313194087018, + "loss": 2.6948, + "step": 4018 + }, + { + "epoch": 0.32434831732709224, + "grad_norm": 0.839131772518158, + "learning_rate": 0.0001814639770719302, + "loss": 2.7393, + "step": 4019 + }, + { + "epoch": 0.32442902106367527, + "grad_norm": 0.807837963104248, + "learning_rate": 0.00018145482017269498, + "loss": 2.7835, + "step": 4020 + }, + { + "epoch": 0.32450972480025825, + "grad_norm": 0.7133228182792664, + "learning_rate": 0.00018144566124339272, + "loss": 2.6859, + "step": 4021 + }, + { + "epoch": 0.3245904285368413, + "grad_norm": 0.8450621962547302, + "learning_rate": 0.00018143650028425162, + "loss": 2.7548, + "step": 4022 + }, + { + "epoch": 0.32467113227342426, + "grad_norm": 0.8594980835914612, + "learning_rate": 0.00018142733729550013, + "loss": 2.6636, + "step": 4023 + }, + { + "epoch": 0.3247518360100073, + "grad_norm": 0.7134621739387512, + "learning_rate": 0.0001814181722773665, + "loss": 2.6501, + "step": 4024 + }, + { + "epoch": 0.32483253974659027, + "grad_norm": 0.8630430698394775, + "learning_rate": 0.0001814090052300792, + "loss": 2.6994, + "step": 4025 + }, + { + "epoch": 0.3249132434831733, + "grad_norm": 0.7044873237609863, + "learning_rate": 0.00018139983615386666, + "loss": 2.6603, + "step": 4026 + }, + { + "epoch": 0.3249939472197563, + "grad_norm": 0.6896052360534668, + "learning_rate": 0.00018139066504895744, + "loss": 2.6649, + "step": 4027 + }, + { + "epoch": 0.3250746509563393, + "grad_norm": 0.802855372428894, + "learning_rate": 0.00018138149191558012, + "loss": 2.7067, + "step": 4028 + }, + { + "epoch": 0.3251553546929223, + "grad_norm": 0.7555437088012695, + "learning_rate": 0.00018137231675396324, + "loss": 2.6471, + "step": 4029 + }, + { + "epoch": 0.32523605842950526, + "grad_norm": 0.6846967339515686, + "learning_rate": 0.00018136313956433552, + "loss": 2.6774, + "step": 4030 + }, + { + "epoch": 0.3253167621660883, + "grad_norm": 0.7435858249664307, + "learning_rate": 0.0001813539603469257, + "loss": 2.7135, + "step": 4031 + }, + { + "epoch": 0.32539746590267127, + "grad_norm": 0.7669098377227783, + "learning_rate": 0.00018134477910196253, + "loss": 2.7014, + "step": 4032 + }, + { + "epoch": 0.3254781696392543, + "grad_norm": 0.7797521352767944, + "learning_rate": 0.00018133559582967482, + "loss": 2.7229, + "step": 4033 + }, + { + "epoch": 0.3255588733758373, + "grad_norm": 0.7377886176109314, + "learning_rate": 0.00018132641053029142, + "loss": 2.7196, + "step": 4034 + }, + { + "epoch": 0.3256395771124203, + "grad_norm": 0.7387986779212952, + "learning_rate": 0.0001813172232040413, + "loss": 2.687, + "step": 4035 + }, + { + "epoch": 0.3257202808490033, + "grad_norm": 0.7276624441146851, + "learning_rate": 0.0001813080338511534, + "loss": 2.6954, + "step": 4036 + }, + { + "epoch": 0.3258009845855863, + "grad_norm": 0.7929670214653015, + "learning_rate": 0.00018129884247185683, + "loss": 2.7431, + "step": 4037 + }, + { + "epoch": 0.3258816883221693, + "grad_norm": 0.7896441221237183, + "learning_rate": 0.0001812896490663805, + "loss": 2.6823, + "step": 4038 + }, + { + "epoch": 0.3259623920587523, + "grad_norm": 0.8642957210540771, + "learning_rate": 0.00018128045363495368, + "loss": 2.7334, + "step": 4039 + }, + { + "epoch": 0.3260430957953353, + "grad_norm": 0.7156081795692444, + "learning_rate": 0.00018127125617780542, + "loss": 2.6886, + "step": 4040 + }, + { + "epoch": 0.32612379953191833, + "grad_norm": 0.8260853290557861, + "learning_rate": 0.00018126205669516507, + "loss": 2.6802, + "step": 4041 + }, + { + "epoch": 0.3262045032685013, + "grad_norm": 0.6853542327880859, + "learning_rate": 0.00018125285518726182, + "loss": 2.6392, + "step": 4042 + }, + { + "epoch": 0.32628520700508434, + "grad_norm": 0.7574017643928528, + "learning_rate": 0.00018124365165432505, + "loss": 2.7412, + "step": 4043 + }, + { + "epoch": 0.3263659107416673, + "grad_norm": 0.8656191825866699, + "learning_rate": 0.00018123444609658408, + "loss": 2.6903, + "step": 4044 + }, + { + "epoch": 0.32644661447825035, + "grad_norm": 0.7443257570266724, + "learning_rate": 0.00018122523851426837, + "loss": 2.682, + "step": 4045 + }, + { + "epoch": 0.32652731821483333, + "grad_norm": 0.7222229242324829, + "learning_rate": 0.0001812160289076074, + "loss": 2.6196, + "step": 4046 + }, + { + "epoch": 0.32660802195141636, + "grad_norm": 0.8531985878944397, + "learning_rate": 0.00018120681727683066, + "loss": 2.6777, + "step": 4047 + }, + { + "epoch": 0.32668872568799934, + "grad_norm": 0.7380290627479553, + "learning_rate": 0.0001811976036221678, + "loss": 2.6847, + "step": 4048 + }, + { + "epoch": 0.32676942942458237, + "grad_norm": 0.7250707149505615, + "learning_rate": 0.00018118838794384837, + "loss": 2.6846, + "step": 4049 + }, + { + "epoch": 0.32685013316116535, + "grad_norm": 0.763504147529602, + "learning_rate": 0.00018117917024210208, + "loss": 2.69, + "step": 4050 + }, + { + "epoch": 0.3269308368977484, + "grad_norm": 0.7740737795829773, + "learning_rate": 0.00018116995051715867, + "loss": 2.6945, + "step": 4051 + }, + { + "epoch": 0.32701154063433135, + "grad_norm": 0.7777624726295471, + "learning_rate": 0.00018116072876924792, + "loss": 2.6918, + "step": 4052 + }, + { + "epoch": 0.3270922443709144, + "grad_norm": 0.7957910895347595, + "learning_rate": 0.0001811515049985997, + "loss": 2.7237, + "step": 4053 + }, + { + "epoch": 0.32717294810749736, + "grad_norm": 0.7828991413116455, + "learning_rate": 0.00018114227920544375, + "loss": 2.7008, + "step": 4054 + }, + { + "epoch": 0.3272536518440804, + "grad_norm": 0.6695161461830139, + "learning_rate": 0.00018113305139001016, + "loss": 2.7311, + "step": 4055 + }, + { + "epoch": 0.32733435558066337, + "grad_norm": 0.7693436145782471, + "learning_rate": 0.00018112382155252883, + "loss": 2.7102, + "step": 4056 + }, + { + "epoch": 0.3274150593172464, + "grad_norm": 0.7520042657852173, + "learning_rate": 0.0001811145896932298, + "loss": 2.6455, + "step": 4057 + }, + { + "epoch": 0.3274957630538294, + "grad_norm": 0.786834716796875, + "learning_rate": 0.00018110535581234317, + "loss": 2.6965, + "step": 4058 + }, + { + "epoch": 0.3275764667904124, + "grad_norm": 0.742001473903656, + "learning_rate": 0.00018109611991009905, + "loss": 2.7341, + "step": 4059 + }, + { + "epoch": 0.3276571705269954, + "grad_norm": 0.813522219657898, + "learning_rate": 0.00018108688198672766, + "loss": 2.8116, + "step": 4060 + }, + { + "epoch": 0.3277378742635784, + "grad_norm": 0.7611314058303833, + "learning_rate": 0.00018107764204245916, + "loss": 2.6741, + "step": 4061 + }, + { + "epoch": 0.3278185780001614, + "grad_norm": 0.7285993695259094, + "learning_rate": 0.00018106840007752392, + "loss": 2.671, + "step": 4062 + }, + { + "epoch": 0.32789928173674443, + "grad_norm": 0.773151695728302, + "learning_rate": 0.0001810591560921522, + "loss": 2.7106, + "step": 4063 + }, + { + "epoch": 0.3279799854733274, + "grad_norm": 0.7448920011520386, + "learning_rate": 0.00018104991008657445, + "loss": 2.7176, + "step": 4064 + }, + { + "epoch": 0.32806068920991044, + "grad_norm": 0.7088467478752136, + "learning_rate": 0.0001810406620610211, + "loss": 2.7085, + "step": 4065 + }, + { + "epoch": 0.3281413929464934, + "grad_norm": 0.7507789731025696, + "learning_rate": 0.00018103141201572255, + "loss": 2.7361, + "step": 4066 + }, + { + "epoch": 0.32822209668307645, + "grad_norm": 0.7065643072128296, + "learning_rate": 0.00018102215995090943, + "loss": 2.6573, + "step": 4067 + }, + { + "epoch": 0.3283028004196594, + "grad_norm": 0.6888713836669922, + "learning_rate": 0.0001810129058668123, + "loss": 2.6699, + "step": 4068 + }, + { + "epoch": 0.32838350415624246, + "grad_norm": 0.736347496509552, + "learning_rate": 0.00018100364976366174, + "loss": 2.7089, + "step": 4069 + }, + { + "epoch": 0.32846420789282543, + "grad_norm": 0.6854562759399414, + "learning_rate": 0.0001809943916416885, + "loss": 2.7051, + "step": 4070 + }, + { + "epoch": 0.32854491162940846, + "grad_norm": 0.7481048107147217, + "learning_rate": 0.0001809851315011233, + "loss": 2.7428, + "step": 4071 + }, + { + "epoch": 0.32862561536599144, + "grad_norm": 0.7600961923599243, + "learning_rate": 0.0001809758693421969, + "loss": 2.7153, + "step": 4072 + }, + { + "epoch": 0.3287063191025745, + "grad_norm": 0.7545063495635986, + "learning_rate": 0.00018096660516514024, + "loss": 2.6736, + "step": 4073 + }, + { + "epoch": 0.32878702283915745, + "grad_norm": 0.7967175841331482, + "learning_rate": 0.0001809573389701841, + "loss": 2.6711, + "step": 4074 + }, + { + "epoch": 0.3288677265757405, + "grad_norm": 0.7115446925163269, + "learning_rate": 0.00018094807075755943, + "loss": 2.6761, + "step": 4075 + }, + { + "epoch": 0.32894843031232346, + "grad_norm": 0.8230876326560974, + "learning_rate": 0.00018093880052749725, + "loss": 2.6749, + "step": 4076 + }, + { + "epoch": 0.3290291340489065, + "grad_norm": 0.8549706935882568, + "learning_rate": 0.00018092952828022856, + "loss": 2.7084, + "step": 4077 + }, + { + "epoch": 0.32910983778548947, + "grad_norm": 0.7379534244537354, + "learning_rate": 0.00018092025401598448, + "loss": 2.7241, + "step": 4078 + }, + { + "epoch": 0.3291905415220725, + "grad_norm": 0.7659998536109924, + "learning_rate": 0.00018091097773499616, + "loss": 2.7108, + "step": 4079 + }, + { + "epoch": 0.3292712452586555, + "grad_norm": 0.8074536323547363, + "learning_rate": 0.00018090169943749476, + "loss": 2.676, + "step": 4080 + }, + { + "epoch": 0.32935194899523845, + "grad_norm": 0.7588536143302917, + "learning_rate": 0.00018089241912371153, + "loss": 2.639, + "step": 4081 + }, + { + "epoch": 0.3294326527318215, + "grad_norm": 0.7510811686515808, + "learning_rate": 0.00018088313679387775, + "loss": 2.6722, + "step": 4082 + }, + { + "epoch": 0.32951335646840446, + "grad_norm": 0.7538900971412659, + "learning_rate": 0.0001808738524482248, + "loss": 2.6917, + "step": 4083 + }, + { + "epoch": 0.3295940602049875, + "grad_norm": 0.8071155548095703, + "learning_rate": 0.00018086456608698402, + "loss": 2.6964, + "step": 4084 + }, + { + "epoch": 0.32967476394157047, + "grad_norm": 0.7778098583221436, + "learning_rate": 0.00018085527771038686, + "loss": 2.7301, + "step": 4085 + }, + { + "epoch": 0.3297554676781535, + "grad_norm": 0.7717564702033997, + "learning_rate": 0.00018084598731866485, + "loss": 2.7484, + "step": 4086 + }, + { + "epoch": 0.3298361714147365, + "grad_norm": 0.7361736297607422, + "learning_rate": 0.00018083669491204948, + "loss": 2.6299, + "step": 4087 + }, + { + "epoch": 0.3299168751513195, + "grad_norm": 0.736681342124939, + "learning_rate": 0.00018082740049077238, + "loss": 2.7521, + "step": 4088 + }, + { + "epoch": 0.3299975788879025, + "grad_norm": 0.8011857867240906, + "learning_rate": 0.00018081810405506517, + "loss": 2.724, + "step": 4089 + }, + { + "epoch": 0.3300782826244855, + "grad_norm": 0.7741932272911072, + "learning_rate": 0.00018080880560515956, + "loss": 2.6766, + "step": 4090 + }, + { + "epoch": 0.3301589863610685, + "grad_norm": 0.7321778535842896, + "learning_rate": 0.00018079950514128724, + "loss": 2.6614, + "step": 4091 + }, + { + "epoch": 0.33023969009765153, + "grad_norm": 0.7916514277458191, + "learning_rate": 0.00018079020266368006, + "loss": 2.7177, + "step": 4092 + }, + { + "epoch": 0.3303203938342345, + "grad_norm": 0.7961388826370239, + "learning_rate": 0.00018078089817256986, + "loss": 2.6671, + "step": 4093 + }, + { + "epoch": 0.33040109757081754, + "grad_norm": 0.7167038321495056, + "learning_rate": 0.0001807715916681885, + "loss": 2.6989, + "step": 4094 + }, + { + "epoch": 0.3304818013074005, + "grad_norm": 0.6924864649772644, + "learning_rate": 0.00018076228315076794, + "loss": 2.6484, + "step": 4095 + }, + { + "epoch": 0.33056250504398355, + "grad_norm": 0.777881383895874, + "learning_rate": 0.00018075297262054013, + "loss": 2.6498, + "step": 4096 + }, + { + "epoch": 0.3306432087805665, + "grad_norm": 0.7878376841545105, + "learning_rate": 0.0001807436600777372, + "loss": 2.7745, + "step": 4097 + }, + { + "epoch": 0.33072391251714955, + "grad_norm": 0.8418465256690979, + "learning_rate": 0.0001807343455225912, + "loss": 2.7195, + "step": 4098 + }, + { + "epoch": 0.33080461625373253, + "grad_norm": 0.7780830264091492, + "learning_rate": 0.00018072502895533424, + "loss": 2.6652, + "step": 4099 + }, + { + "epoch": 0.33088531999031556, + "grad_norm": 0.7102445960044861, + "learning_rate": 0.00018071571037619853, + "loss": 2.6618, + "step": 4100 + }, + { + "epoch": 0.33096602372689854, + "grad_norm": 0.7028098106384277, + "learning_rate": 0.00018070638978541633, + "loss": 2.7114, + "step": 4101 + }, + { + "epoch": 0.33104672746348157, + "grad_norm": 0.7529525756835938, + "learning_rate": 0.00018069706718321996, + "loss": 2.7231, + "step": 4102 + }, + { + "epoch": 0.33112743120006455, + "grad_norm": 0.7404564023017883, + "learning_rate": 0.0001806877425698417, + "loss": 2.6564, + "step": 4103 + }, + { + "epoch": 0.3312081349366476, + "grad_norm": 0.7725130319595337, + "learning_rate": 0.00018067841594551401, + "loss": 2.677, + "step": 4104 + }, + { + "epoch": 0.33128883867323056, + "grad_norm": 0.7616425156593323, + "learning_rate": 0.00018066908731046927, + "loss": 2.6586, + "step": 4105 + }, + { + "epoch": 0.3313695424098136, + "grad_norm": 0.7318183779716492, + "learning_rate": 0.00018065975666494002, + "loss": 2.6624, + "step": 4106 + }, + { + "epoch": 0.33145024614639657, + "grad_norm": 0.7012802958488464, + "learning_rate": 0.00018065042400915878, + "loss": 2.6663, + "step": 4107 + }, + { + "epoch": 0.3315309498829796, + "grad_norm": 0.815226674079895, + "learning_rate": 0.00018064108934335814, + "loss": 2.7248, + "step": 4108 + }, + { + "epoch": 0.3316116536195626, + "grad_norm": 0.68972247838974, + "learning_rate": 0.00018063175266777077, + "loss": 2.6961, + "step": 4109 + }, + { + "epoch": 0.3316923573561456, + "grad_norm": 0.7563794255256653, + "learning_rate": 0.00018062241398262937, + "loss": 2.6526, + "step": 4110 + }, + { + "epoch": 0.3317730610927286, + "grad_norm": 0.7878836989402771, + "learning_rate": 0.00018061307328816662, + "loss": 2.7316, + "step": 4111 + }, + { + "epoch": 0.3318537648293116, + "grad_norm": 0.7189129590988159, + "learning_rate": 0.00018060373058461537, + "loss": 2.6577, + "step": 4112 + }, + { + "epoch": 0.3319344685658946, + "grad_norm": 0.7517561912536621, + "learning_rate": 0.00018059438587220847, + "loss": 2.668, + "step": 4113 + }, + { + "epoch": 0.3320151723024776, + "grad_norm": 0.7602595686912537, + "learning_rate": 0.00018058503915117878, + "loss": 2.6741, + "step": 4114 + }, + { + "epoch": 0.3320958760390606, + "grad_norm": 0.7702187299728394, + "learning_rate": 0.00018057569042175927, + "loss": 2.7082, + "step": 4115 + }, + { + "epoch": 0.33217657977564363, + "grad_norm": 0.7289660573005676, + "learning_rate": 0.00018056633968418294, + "loss": 2.6728, + "step": 4116 + }, + { + "epoch": 0.3322572835122266, + "grad_norm": 0.6936683654785156, + "learning_rate": 0.0001805569869386828, + "loss": 2.6735, + "step": 4117 + }, + { + "epoch": 0.33233798724880964, + "grad_norm": 0.7128138542175293, + "learning_rate": 0.000180547632185492, + "loss": 2.646, + "step": 4118 + }, + { + "epoch": 0.3324186909853926, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00018053827542484363, + "loss": 2.6497, + "step": 4119 + }, + { + "epoch": 0.33249939472197565, + "grad_norm": 0.7084202170372009, + "learning_rate": 0.0001805289166569709, + "loss": 2.6328, + "step": 4120 + }, + { + "epoch": 0.3325800984585586, + "grad_norm": 0.8068051934242249, + "learning_rate": 0.00018051955588210708, + "loss": 2.6576, + "step": 4121 + }, + { + "epoch": 0.33266080219514166, + "grad_norm": 0.787680447101593, + "learning_rate": 0.00018051019310048544, + "loss": 2.7091, + "step": 4122 + }, + { + "epoch": 0.33274150593172463, + "grad_norm": 0.698946475982666, + "learning_rate": 0.00018050082831233931, + "loss": 2.6657, + "step": 4123 + }, + { + "epoch": 0.33282220966830767, + "grad_norm": 0.7946122288703918, + "learning_rate": 0.00018049146151790215, + "loss": 2.6981, + "step": 4124 + }, + { + "epoch": 0.33290291340489064, + "grad_norm": 0.8025123476982117, + "learning_rate": 0.00018048209271740736, + "loss": 2.6878, + "step": 4125 + }, + { + "epoch": 0.3329836171414737, + "grad_norm": 0.7493376135826111, + "learning_rate": 0.0001804727219110884, + "loss": 2.6556, + "step": 4126 + }, + { + "epoch": 0.33306432087805665, + "grad_norm": 0.7143186926841736, + "learning_rate": 0.00018046334909917886, + "loss": 2.6879, + "step": 4127 + }, + { + "epoch": 0.3331450246146397, + "grad_norm": 0.7375641465187073, + "learning_rate": 0.00018045397428191235, + "loss": 2.6817, + "step": 4128 + }, + { + "epoch": 0.33322572835122266, + "grad_norm": 0.7201291918754578, + "learning_rate": 0.00018044459745952248, + "loss": 2.6765, + "step": 4129 + }, + { + "epoch": 0.3333064320878057, + "grad_norm": 0.7924519777297974, + "learning_rate": 0.00018043521863224296, + "loss": 2.7748, + "step": 4130 + }, + { + "epoch": 0.33338713582438867, + "grad_norm": 0.7773354053497314, + "learning_rate": 0.00018042583780030752, + "loss": 2.6839, + "step": 4131 + }, + { + "epoch": 0.33346783956097165, + "grad_norm": 0.7527397274971008, + "learning_rate": 0.00018041645496394998, + "loss": 2.6749, + "step": 4132 + }, + { + "epoch": 0.3335485432975547, + "grad_norm": 0.7329208254814148, + "learning_rate": 0.00018040707012340418, + "loss": 2.7535, + "step": 4133 + }, + { + "epoch": 0.33362924703413765, + "grad_norm": 0.7637773752212524, + "learning_rate": 0.00018039768327890397, + "loss": 2.632, + "step": 4134 + }, + { + "epoch": 0.3337099507707207, + "grad_norm": 0.823623776435852, + "learning_rate": 0.00018038829443068333, + "loss": 2.7122, + "step": 4135 + }, + { + "epoch": 0.33379065450730366, + "grad_norm": 0.8040826916694641, + "learning_rate": 0.00018037890357897632, + "loss": 2.7197, + "step": 4136 + }, + { + "epoch": 0.3338713582438867, + "grad_norm": 0.7483998537063599, + "learning_rate": 0.00018036951072401686, + "loss": 2.6535, + "step": 4137 + }, + { + "epoch": 0.33395206198046967, + "grad_norm": 0.8141106367111206, + "learning_rate": 0.00018036011586603914, + "loss": 2.7127, + "step": 4138 + }, + { + "epoch": 0.3340327657170527, + "grad_norm": 0.7226041555404663, + "learning_rate": 0.00018035071900527724, + "loss": 2.6846, + "step": 4139 + }, + { + "epoch": 0.3341134694536357, + "grad_norm": 0.7624794840812683, + "learning_rate": 0.00018034132014196541, + "loss": 2.6725, + "step": 4140 + }, + { + "epoch": 0.3341941731902187, + "grad_norm": 0.7299962043762207, + "learning_rate": 0.00018033191927633785, + "loss": 2.6728, + "step": 4141 + }, + { + "epoch": 0.3342748769268017, + "grad_norm": 0.7920462489128113, + "learning_rate": 0.0001803225164086289, + "loss": 2.6544, + "step": 4142 + }, + { + "epoch": 0.3343555806633847, + "grad_norm": 0.7469778656959534, + "learning_rate": 0.00018031311153907282, + "loss": 2.7356, + "step": 4143 + }, + { + "epoch": 0.3344362843999677, + "grad_norm": 0.8831696510314941, + "learning_rate": 0.0001803037046679041, + "loss": 2.6584, + "step": 4144 + }, + { + "epoch": 0.33451698813655073, + "grad_norm": 0.8047679662704468, + "learning_rate": 0.00018029429579535715, + "loss": 2.6213, + "step": 4145 + }, + { + "epoch": 0.3345976918731337, + "grad_norm": 0.7109517455101013, + "learning_rate": 0.00018028488492166645, + "loss": 2.6622, + "step": 4146 + }, + { + "epoch": 0.33467839560971674, + "grad_norm": 0.7240141034126282, + "learning_rate": 0.0001802754720470665, + "loss": 2.6794, + "step": 4147 + }, + { + "epoch": 0.3347590993462997, + "grad_norm": 0.7292990684509277, + "learning_rate": 0.000180266057171792, + "loss": 2.6079, + "step": 4148 + }, + { + "epoch": 0.33483980308288275, + "grad_norm": 0.8055328130722046, + "learning_rate": 0.00018025664029607756, + "loss": 2.7044, + "step": 4149 + }, + { + "epoch": 0.3349205068194657, + "grad_norm": 0.8348979949951172, + "learning_rate": 0.00018024722142015781, + "loss": 2.6757, + "step": 4150 + }, + { + "epoch": 0.33500121055604876, + "grad_norm": 0.7797044515609741, + "learning_rate": 0.00018023780054426754, + "loss": 2.7125, + "step": 4151 + }, + { + "epoch": 0.33508191429263173, + "grad_norm": 0.802442729473114, + "learning_rate": 0.00018022837766864153, + "loss": 2.7121, + "step": 4152 + }, + { + "epoch": 0.33516261802921476, + "grad_norm": 0.7248829007148743, + "learning_rate": 0.00018021895279351463, + "loss": 2.7344, + "step": 4153 + }, + { + "epoch": 0.33524332176579774, + "grad_norm": 0.7458582520484924, + "learning_rate": 0.00018020952591912175, + "loss": 2.665, + "step": 4154 + }, + { + "epoch": 0.3353240255023808, + "grad_norm": 0.8153703808784485, + "learning_rate": 0.0001802000970456978, + "loss": 2.7416, + "step": 4155 + }, + { + "epoch": 0.33540472923896375, + "grad_norm": 0.7583708763122559, + "learning_rate": 0.00018019066617347779, + "loss": 2.7002, + "step": 4156 + }, + { + "epoch": 0.3354854329755468, + "grad_norm": 0.7522469162940979, + "learning_rate": 0.00018018123330269678, + "loss": 2.7196, + "step": 4157 + }, + { + "epoch": 0.33556613671212976, + "grad_norm": 0.7386923432350159, + "learning_rate": 0.00018017179843358983, + "loss": 2.6947, + "step": 4158 + }, + { + "epoch": 0.3356468404487128, + "grad_norm": 0.7366231083869934, + "learning_rate": 0.00018016236156639205, + "loss": 2.7377, + "step": 4159 + }, + { + "epoch": 0.33572754418529577, + "grad_norm": 0.7727232575416565, + "learning_rate": 0.00018015292270133872, + "loss": 2.7566, + "step": 4160 + }, + { + "epoch": 0.3358082479218788, + "grad_norm": 0.6781843304634094, + "learning_rate": 0.000180143481838665, + "loss": 2.6796, + "step": 4161 + }, + { + "epoch": 0.3358889516584618, + "grad_norm": 0.7036039233207703, + "learning_rate": 0.00018013403897860624, + "loss": 2.7012, + "step": 4162 + }, + { + "epoch": 0.3359696553950448, + "grad_norm": 0.8252625465393066, + "learning_rate": 0.00018012459412139776, + "loss": 2.6613, + "step": 4163 + }, + { + "epoch": 0.3360503591316278, + "grad_norm": 0.6924486756324768, + "learning_rate": 0.00018011514726727493, + "loss": 2.6425, + "step": 4164 + }, + { + "epoch": 0.3361310628682108, + "grad_norm": 0.7735962271690369, + "learning_rate": 0.0001801056984164732, + "loss": 2.7235, + "step": 4165 + }, + { + "epoch": 0.3362117666047938, + "grad_norm": 0.7439951300621033, + "learning_rate": 0.0001800962475692281, + "loss": 2.7428, + "step": 4166 + }, + { + "epoch": 0.3362924703413768, + "grad_norm": 0.6830539107322693, + "learning_rate": 0.0001800867947257751, + "loss": 2.5907, + "step": 4167 + }, + { + "epoch": 0.3363731740779598, + "grad_norm": 0.8355144262313843, + "learning_rate": 0.00018007733988634986, + "loss": 2.6978, + "step": 4168 + }, + { + "epoch": 0.33645387781454283, + "grad_norm": 0.6880978941917419, + "learning_rate": 0.00018006788305118798, + "loss": 2.6934, + "step": 4169 + }, + { + "epoch": 0.3365345815511258, + "grad_norm": 0.762709379196167, + "learning_rate": 0.0001800584242205251, + "loss": 2.684, + "step": 4170 + }, + { + "epoch": 0.33661528528770884, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001800489633945971, + "loss": 2.6857, + "step": 4171 + }, + { + "epoch": 0.3366959890242918, + "grad_norm": 0.787651777267456, + "learning_rate": 0.00018003950057363964, + "loss": 2.6979, + "step": 4172 + }, + { + "epoch": 0.33677669276087485, + "grad_norm": 0.7831481099128723, + "learning_rate": 0.00018003003575788856, + "loss": 2.7158, + "step": 4173 + }, + { + "epoch": 0.33685739649745783, + "grad_norm": 0.844904363155365, + "learning_rate": 0.00018002056894757986, + "loss": 2.6459, + "step": 4174 + }, + { + "epoch": 0.33693810023404086, + "grad_norm": 0.7529420852661133, + "learning_rate": 0.00018001110014294937, + "loss": 2.685, + "step": 4175 + }, + { + "epoch": 0.33701880397062384, + "grad_norm": 0.776719868183136, + "learning_rate": 0.0001800016293442331, + "loss": 2.6353, + "step": 4176 + }, + { + "epoch": 0.33709950770720687, + "grad_norm": 0.7988671660423279, + "learning_rate": 0.00017999215655166716, + "loss": 2.7241, + "step": 4177 + }, + { + "epoch": 0.33718021144378985, + "grad_norm": 0.7190617918968201, + "learning_rate": 0.00017998268176548752, + "loss": 2.7278, + "step": 4178 + }, + { + "epoch": 0.3372609151803729, + "grad_norm": 0.8337060809135437, + "learning_rate": 0.0001799732049859304, + "loss": 2.7059, + "step": 4179 + }, + { + "epoch": 0.33734161891695585, + "grad_norm": 0.7547435164451599, + "learning_rate": 0.0001799637262132319, + "loss": 2.7782, + "step": 4180 + }, + { + "epoch": 0.3374223226535389, + "grad_norm": 0.8067883253097534, + "learning_rate": 0.0001799542454476284, + "loss": 2.7978, + "step": 4181 + }, + { + "epoch": 0.33750302639012186, + "grad_norm": 0.7451581358909607, + "learning_rate": 0.00017994476268935609, + "loss": 2.6931, + "step": 4182 + }, + { + "epoch": 0.33758373012670484, + "grad_norm": 0.7521898746490479, + "learning_rate": 0.00017993527793865125, + "loss": 2.6939, + "step": 4183 + }, + { + "epoch": 0.33766443386328787, + "grad_norm": 0.7608996033668518, + "learning_rate": 0.0001799257911957504, + "loss": 2.715, + "step": 4184 + }, + { + "epoch": 0.33774513759987085, + "grad_norm": 0.7459948658943176, + "learning_rate": 0.00017991630246088987, + "loss": 2.6951, + "step": 4185 + }, + { + "epoch": 0.3378258413364539, + "grad_norm": 0.7549717426300049, + "learning_rate": 0.00017990681173430618, + "loss": 2.7353, + "step": 4186 + }, + { + "epoch": 0.33790654507303686, + "grad_norm": 0.7234344482421875, + "learning_rate": 0.0001798973190162359, + "loss": 2.6491, + "step": 4187 + }, + { + "epoch": 0.3379872488096199, + "grad_norm": 0.7652330994606018, + "learning_rate": 0.00017988782430691553, + "loss": 2.765, + "step": 4188 + }, + { + "epoch": 0.33806795254620287, + "grad_norm": 0.742953360080719, + "learning_rate": 0.00017987832760658177, + "loss": 2.7079, + "step": 4189 + }, + { + "epoch": 0.3381486562827859, + "grad_norm": 0.7440767288208008, + "learning_rate": 0.00017986882891547125, + "loss": 2.6751, + "step": 4190 + }, + { + "epoch": 0.3382293600193689, + "grad_norm": 0.7141925096511841, + "learning_rate": 0.00017985932823382078, + "loss": 2.6249, + "step": 4191 + }, + { + "epoch": 0.3383100637559519, + "grad_norm": 0.7200489044189453, + "learning_rate": 0.00017984982556186707, + "loss": 2.6811, + "step": 4192 + }, + { + "epoch": 0.3383907674925349, + "grad_norm": 0.7677409648895264, + "learning_rate": 0.00017984032089984696, + "loss": 2.6641, + "step": 4193 + }, + { + "epoch": 0.3384714712291179, + "grad_norm": 0.7386545538902283, + "learning_rate": 0.00017983081424799741, + "loss": 2.6504, + "step": 4194 + }, + { + "epoch": 0.3385521749657009, + "grad_norm": 0.7528583407402039, + "learning_rate": 0.00017982130560655526, + "loss": 2.6422, + "step": 4195 + }, + { + "epoch": 0.3386328787022839, + "grad_norm": 0.7339407801628113, + "learning_rate": 0.0001798117949757575, + "loss": 2.7047, + "step": 4196 + }, + { + "epoch": 0.3387135824388669, + "grad_norm": 0.7655882239341736, + "learning_rate": 0.00017980228235584117, + "loss": 2.7644, + "step": 4197 + }, + { + "epoch": 0.33879428617544993, + "grad_norm": 0.7602109909057617, + "learning_rate": 0.00017979276774704342, + "loss": 2.697, + "step": 4198 + }, + { + "epoch": 0.3388749899120329, + "grad_norm": 0.7188911437988281, + "learning_rate": 0.00017978325114960126, + "loss": 2.7147, + "step": 4199 + }, + { + "epoch": 0.33895569364861594, + "grad_norm": 0.7672597765922546, + "learning_rate": 0.00017977373256375194, + "loss": 2.6558, + "step": 4200 + }, + { + "epoch": 0.3390363973851989, + "grad_norm": 0.784187912940979, + "learning_rate": 0.0001797642119897327, + "loss": 2.7005, + "step": 4201 + }, + { + "epoch": 0.33911710112178195, + "grad_norm": 0.7359703779220581, + "learning_rate": 0.00017975468942778075, + "loss": 2.6578, + "step": 4202 + }, + { + "epoch": 0.3391978048583649, + "grad_norm": 0.7776080965995789, + "learning_rate": 0.00017974516487813345, + "loss": 2.6747, + "step": 4203 + }, + { + "epoch": 0.33927850859494796, + "grad_norm": 0.6934135556221008, + "learning_rate": 0.00017973563834102824, + "loss": 2.6335, + "step": 4204 + }, + { + "epoch": 0.33935921233153094, + "grad_norm": 0.7715818881988525, + "learning_rate": 0.00017972610981670245, + "loss": 2.6062, + "step": 4205 + }, + { + "epoch": 0.33943991606811397, + "grad_norm": 0.7466367483139038, + "learning_rate": 0.0001797165793053936, + "loss": 2.7243, + "step": 4206 + }, + { + "epoch": 0.33952061980469694, + "grad_norm": 0.7485085129737854, + "learning_rate": 0.00017970704680733926, + "loss": 2.6603, + "step": 4207 + }, + { + "epoch": 0.33960132354128, + "grad_norm": 0.7365782856941223, + "learning_rate": 0.0001796975123227769, + "loss": 2.7179, + "step": 4208 + }, + { + "epoch": 0.33968202727786295, + "grad_norm": 0.8405506014823914, + "learning_rate": 0.00017968797585194422, + "loss": 2.7413, + "step": 4209 + }, + { + "epoch": 0.339762731014446, + "grad_norm": 0.8227888941764832, + "learning_rate": 0.00017967843739507888, + "loss": 2.6814, + "step": 4210 + }, + { + "epoch": 0.33984343475102896, + "grad_norm": 0.8247283697128296, + "learning_rate": 0.0001796688969524186, + "loss": 2.6802, + "step": 4211 + }, + { + "epoch": 0.339924138487612, + "grad_norm": 0.7639476656913757, + "learning_rate": 0.00017965935452420116, + "loss": 2.7422, + "step": 4212 + }, + { + "epoch": 0.34000484222419497, + "grad_norm": 0.7846776247024536, + "learning_rate": 0.00017964981011066436, + "loss": 2.7443, + "step": 4213 + }, + { + "epoch": 0.340085545960778, + "grad_norm": 0.7593334913253784, + "learning_rate": 0.00017964026371204608, + "loss": 2.7179, + "step": 4214 + }, + { + "epoch": 0.340166249697361, + "grad_norm": 0.7878177165985107, + "learning_rate": 0.00017963071532858425, + "loss": 2.7118, + "step": 4215 + }, + { + "epoch": 0.340246953433944, + "grad_norm": 0.7728220224380493, + "learning_rate": 0.00017962116496051685, + "loss": 2.6646, + "step": 4216 + }, + { + "epoch": 0.340327657170527, + "grad_norm": 0.8419308066368103, + "learning_rate": 0.00017961161260808187, + "loss": 2.7829, + "step": 4217 + }, + { + "epoch": 0.34040836090711, + "grad_norm": 0.7066153883934021, + "learning_rate": 0.0001796020582715174, + "loss": 2.6498, + "step": 4218 + }, + { + "epoch": 0.340489064643693, + "grad_norm": 0.7976264953613281, + "learning_rate": 0.00017959250195106156, + "loss": 2.7496, + "step": 4219 + }, + { + "epoch": 0.34056976838027603, + "grad_norm": 0.736595630645752, + "learning_rate": 0.0001795829436469525, + "loss": 2.6497, + "step": 4220 + }, + { + "epoch": 0.340650472116859, + "grad_norm": 0.818550705909729, + "learning_rate": 0.0001795733833594285, + "loss": 2.6793, + "step": 4221 + }, + { + "epoch": 0.34073117585344204, + "grad_norm": 0.7712778449058533, + "learning_rate": 0.00017956382108872773, + "loss": 2.6215, + "step": 4222 + }, + { + "epoch": 0.340811879590025, + "grad_norm": 0.746306300163269, + "learning_rate": 0.00017955425683508858, + "loss": 2.7372, + "step": 4223 + }, + { + "epoch": 0.34089258332660805, + "grad_norm": 0.7269306778907776, + "learning_rate": 0.00017954469059874937, + "loss": 2.6438, + "step": 4224 + }, + { + "epoch": 0.340973287063191, + "grad_norm": 0.7426211833953857, + "learning_rate": 0.00017953512237994855, + "loss": 2.6539, + "step": 4225 + }, + { + "epoch": 0.34105399079977405, + "grad_norm": 0.7269948124885559, + "learning_rate": 0.0001795255521789246, + "loss": 2.6833, + "step": 4226 + }, + { + "epoch": 0.34113469453635703, + "grad_norm": 0.7279343605041504, + "learning_rate": 0.00017951597999591598, + "loss": 2.7011, + "step": 4227 + }, + { + "epoch": 0.34121539827294006, + "grad_norm": 0.7554663419723511, + "learning_rate": 0.0001795064058311613, + "loss": 2.7036, + "step": 4228 + }, + { + "epoch": 0.34129610200952304, + "grad_norm": 0.7516502141952515, + "learning_rate": 0.00017949682968489912, + "loss": 2.6699, + "step": 4229 + }, + { + "epoch": 0.34137680574610607, + "grad_norm": 0.7931745052337646, + "learning_rate": 0.00017948725155736818, + "loss": 2.6655, + "step": 4230 + }, + { + "epoch": 0.34145750948268905, + "grad_norm": 0.6981344223022461, + "learning_rate": 0.0001794776714488071, + "loss": 2.6987, + "step": 4231 + }, + { + "epoch": 0.3415382132192721, + "grad_norm": 0.7513911724090576, + "learning_rate": 0.00017946808935945474, + "loss": 2.6985, + "step": 4232 + }, + { + "epoch": 0.34161891695585506, + "grad_norm": 0.7373185753822327, + "learning_rate": 0.00017945850528954983, + "loss": 2.7269, + "step": 4233 + }, + { + "epoch": 0.34169962069243803, + "grad_norm": 0.6990259289741516, + "learning_rate": 0.0001794489192393313, + "loss": 2.6763, + "step": 4234 + }, + { + "epoch": 0.34178032442902107, + "grad_norm": 0.7661817669868469, + "learning_rate": 0.00017943933120903797, + "loss": 2.7057, + "step": 4235 + }, + { + "epoch": 0.34186102816560404, + "grad_norm": 0.7570027112960815, + "learning_rate": 0.0001794297411989089, + "loss": 2.7358, + "step": 4236 + }, + { + "epoch": 0.3419417319021871, + "grad_norm": 0.7751824855804443, + "learning_rate": 0.000179420149209183, + "loss": 2.6771, + "step": 4237 + }, + { + "epoch": 0.34202243563877005, + "grad_norm": 0.8028360605239868, + "learning_rate": 0.0001794105552400994, + "loss": 2.6399, + "step": 4238 + }, + { + "epoch": 0.3421031393753531, + "grad_norm": 0.7398171424865723, + "learning_rate": 0.00017940095929189716, + "loss": 2.6532, + "step": 4239 + }, + { + "epoch": 0.34218384311193606, + "grad_norm": 0.8300225138664246, + "learning_rate": 0.0001793913613648155, + "loss": 2.6798, + "step": 4240 + }, + { + "epoch": 0.3422645468485191, + "grad_norm": 0.7501145005226135, + "learning_rate": 0.00017938176145909356, + "loss": 2.7132, + "step": 4241 + }, + { + "epoch": 0.34234525058510207, + "grad_norm": 0.7178483605384827, + "learning_rate": 0.00017937215957497063, + "loss": 2.7172, + "step": 4242 + }, + { + "epoch": 0.3424259543216851, + "grad_norm": 0.7207306027412415, + "learning_rate": 0.00017936255571268599, + "loss": 2.629, + "step": 4243 + }, + { + "epoch": 0.3425066580582681, + "grad_norm": 0.7339839935302734, + "learning_rate": 0.00017935294987247899, + "loss": 2.6262, + "step": 4244 + }, + { + "epoch": 0.3425873617948511, + "grad_norm": 0.6977292895317078, + "learning_rate": 0.00017934334205458907, + "loss": 2.6949, + "step": 4245 + }, + { + "epoch": 0.3426680655314341, + "grad_norm": 0.7368096113204956, + "learning_rate": 0.00017933373225925564, + "loss": 2.681, + "step": 4246 + }, + { + "epoch": 0.3427487692680171, + "grad_norm": 0.7234459519386292, + "learning_rate": 0.00017932412048671825, + "loss": 2.6891, + "step": 4247 + }, + { + "epoch": 0.3428294730046001, + "grad_norm": 0.7659995555877686, + "learning_rate": 0.00017931450673721642, + "loss": 2.7394, + "step": 4248 + }, + { + "epoch": 0.3429101767411831, + "grad_norm": 0.7799893617630005, + "learning_rate": 0.00017930489101098974, + "loss": 2.7707, + "step": 4249 + }, + { + "epoch": 0.3429908804777661, + "grad_norm": 0.7063946723937988, + "learning_rate": 0.00017929527330827786, + "loss": 2.6573, + "step": 4250 + }, + { + "epoch": 0.34307158421434913, + "grad_norm": 0.7090561389923096, + "learning_rate": 0.0001792856536293205, + "loss": 2.7095, + "step": 4251 + }, + { + "epoch": 0.3431522879509321, + "grad_norm": 0.8020029067993164, + "learning_rate": 0.0001792760319743574, + "loss": 2.6905, + "step": 4252 + }, + { + "epoch": 0.34323299168751514, + "grad_norm": 0.7221484780311584, + "learning_rate": 0.00017926640834362836, + "loss": 2.6853, + "step": 4253 + }, + { + "epoch": 0.3433136954240981, + "grad_norm": 0.7102623581886292, + "learning_rate": 0.00017925678273737324, + "loss": 2.6821, + "step": 4254 + }, + { + "epoch": 0.34339439916068115, + "grad_norm": 0.7702807784080505, + "learning_rate": 0.00017924715515583187, + "loss": 2.6986, + "step": 4255 + }, + { + "epoch": 0.34347510289726413, + "grad_norm": 0.7938152551651001, + "learning_rate": 0.00017923752559924425, + "loss": 2.7162, + "step": 4256 + }, + { + "epoch": 0.34355580663384716, + "grad_norm": 0.7340937852859497, + "learning_rate": 0.00017922789406785036, + "loss": 2.6904, + "step": 4257 + }, + { + "epoch": 0.34363651037043014, + "grad_norm": 0.7010839581489563, + "learning_rate": 0.00017921826056189026, + "loss": 2.6969, + "step": 4258 + }, + { + "epoch": 0.34371721410701317, + "grad_norm": 0.758178174495697, + "learning_rate": 0.00017920862508160403, + "loss": 2.6391, + "step": 4259 + }, + { + "epoch": 0.34379791784359615, + "grad_norm": 0.7861726880073547, + "learning_rate": 0.0001791989876272318, + "loss": 2.7088, + "step": 4260 + }, + { + "epoch": 0.3438786215801792, + "grad_norm": 0.6764364242553711, + "learning_rate": 0.00017918934819901377, + "loss": 2.6221, + "step": 4261 + }, + { + "epoch": 0.34395932531676215, + "grad_norm": 0.76728355884552, + "learning_rate": 0.00017917970679719018, + "loss": 2.6854, + "step": 4262 + }, + { + "epoch": 0.3440400290533452, + "grad_norm": 0.7161166071891785, + "learning_rate": 0.00017917006342200133, + "loss": 2.7048, + "step": 4263 + }, + { + "epoch": 0.34412073278992816, + "grad_norm": 0.7182073593139648, + "learning_rate": 0.00017916041807368753, + "loss": 2.7559, + "step": 4264 + }, + { + "epoch": 0.3442014365265112, + "grad_norm": 0.832258403301239, + "learning_rate": 0.0001791507707524892, + "loss": 2.6743, + "step": 4265 + }, + { + "epoch": 0.34428214026309417, + "grad_norm": 0.7048495411872864, + "learning_rate": 0.00017914112145864675, + "loss": 2.693, + "step": 4266 + }, + { + "epoch": 0.3443628439996772, + "grad_norm": 0.7475518584251404, + "learning_rate": 0.00017913147019240068, + "loss": 2.6881, + "step": 4267 + }, + { + "epoch": 0.3444435477362602, + "grad_norm": 0.72830730676651, + "learning_rate": 0.00017912181695399154, + "loss": 2.659, + "step": 4268 + }, + { + "epoch": 0.3445242514728432, + "grad_norm": 0.7183662056922913, + "learning_rate": 0.00017911216174365988, + "loss": 2.6611, + "step": 4269 + }, + { + "epoch": 0.3446049552094262, + "grad_norm": 0.7487103343009949, + "learning_rate": 0.0001791025045616463, + "loss": 2.6518, + "step": 4270 + }, + { + "epoch": 0.3446856589460092, + "grad_norm": 0.7733812928199768, + "learning_rate": 0.0001790928454081916, + "loss": 2.6359, + "step": 4271 + }, + { + "epoch": 0.3447663626825922, + "grad_norm": 0.7774991393089294, + "learning_rate": 0.00017908318428353642, + "loss": 2.6654, + "step": 4272 + }, + { + "epoch": 0.34484706641917523, + "grad_norm": 0.6882895827293396, + "learning_rate": 0.00017907352118792157, + "loss": 2.686, + "step": 4273 + }, + { + "epoch": 0.3449277701557582, + "grad_norm": 0.7571535110473633, + "learning_rate": 0.00017906385612158785, + "loss": 2.7108, + "step": 4274 + }, + { + "epoch": 0.34500847389234124, + "grad_norm": 0.7324517369270325, + "learning_rate": 0.00017905418908477615, + "loss": 2.6663, + "step": 4275 + }, + { + "epoch": 0.3450891776289242, + "grad_norm": 0.7476221919059753, + "learning_rate": 0.00017904452007772744, + "loss": 2.7202, + "step": 4276 + }, + { + "epoch": 0.34516988136550725, + "grad_norm": 0.7648386359214783, + "learning_rate": 0.00017903484910068268, + "loss": 2.6759, + "step": 4277 + }, + { + "epoch": 0.3452505851020902, + "grad_norm": 0.7375434637069702, + "learning_rate": 0.00017902517615388282, + "loss": 2.6603, + "step": 4278 + }, + { + "epoch": 0.34533128883867326, + "grad_norm": 0.7248519062995911, + "learning_rate": 0.00017901550123756906, + "loss": 2.7147, + "step": 4279 + }, + { + "epoch": 0.34541199257525623, + "grad_norm": 0.7264916896820068, + "learning_rate": 0.0001790058243519824, + "loss": 2.6992, + "step": 4280 + }, + { + "epoch": 0.34549269631183926, + "grad_norm": 0.8370026350021362, + "learning_rate": 0.0001789961454973641, + "loss": 2.7114, + "step": 4281 + }, + { + "epoch": 0.34557340004842224, + "grad_norm": 0.72071373462677, + "learning_rate": 0.00017898646467395538, + "loss": 2.6957, + "step": 4282 + }, + { + "epoch": 0.3456541037850053, + "grad_norm": 0.7355397343635559, + "learning_rate": 0.0001789767818819975, + "loss": 2.6744, + "step": 4283 + }, + { + "epoch": 0.34573480752158825, + "grad_norm": 0.734756588935852, + "learning_rate": 0.00017896709712173173, + "loss": 2.726, + "step": 4284 + }, + { + "epoch": 0.3458155112581712, + "grad_norm": 0.7890543341636658, + "learning_rate": 0.00017895741039339945, + "loss": 2.6726, + "step": 4285 + }, + { + "epoch": 0.34589621499475426, + "grad_norm": 0.7768735885620117, + "learning_rate": 0.00017894772169724216, + "loss": 2.7617, + "step": 4286 + }, + { + "epoch": 0.34597691873133724, + "grad_norm": 0.7306547164916992, + "learning_rate": 0.00017893803103350125, + "loss": 2.6253, + "step": 4287 + }, + { + "epoch": 0.34605762246792027, + "grad_norm": 0.767066478729248, + "learning_rate": 0.00017892833840241828, + "loss": 2.6522, + "step": 4288 + }, + { + "epoch": 0.34613832620450324, + "grad_norm": 0.7018097639083862, + "learning_rate": 0.00017891864380423477, + "loss": 2.7111, + "step": 4289 + }, + { + "epoch": 0.3462190299410863, + "grad_norm": 0.7305615544319153, + "learning_rate": 0.00017890894723919236, + "loss": 2.6924, + "step": 4290 + }, + { + "epoch": 0.34629973367766925, + "grad_norm": 0.7588002681732178, + "learning_rate": 0.00017889924870753275, + "loss": 2.6952, + "step": 4291 + }, + { + "epoch": 0.3463804374142523, + "grad_norm": 0.7162861824035645, + "learning_rate": 0.0001788895482094976, + "loss": 2.6239, + "step": 4292 + }, + { + "epoch": 0.34646114115083526, + "grad_norm": 0.7494024634361267, + "learning_rate": 0.00017887984574532868, + "loss": 2.6763, + "step": 4293 + }, + { + "epoch": 0.3465418448874183, + "grad_norm": 0.7100037336349487, + "learning_rate": 0.0001788701413152678, + "loss": 2.6378, + "step": 4294 + }, + { + "epoch": 0.34662254862400127, + "grad_norm": 0.7316900491714478, + "learning_rate": 0.00017886043491955684, + "loss": 2.7001, + "step": 4295 + }, + { + "epoch": 0.3467032523605843, + "grad_norm": 0.8467028737068176, + "learning_rate": 0.00017885072655843772, + "loss": 2.7536, + "step": 4296 + }, + { + "epoch": 0.3467839560971673, + "grad_norm": 0.7248796820640564, + "learning_rate": 0.00017884101623215237, + "loss": 2.6956, + "step": 4297 + }, + { + "epoch": 0.3468646598337503, + "grad_norm": 0.7183107137680054, + "learning_rate": 0.0001788313039409428, + "loss": 2.743, + "step": 4298 + }, + { + "epoch": 0.3469453635703333, + "grad_norm": 0.6835163831710815, + "learning_rate": 0.00017882158968505105, + "loss": 2.7016, + "step": 4299 + }, + { + "epoch": 0.3470260673069163, + "grad_norm": 0.7973365783691406, + "learning_rate": 0.00017881187346471925, + "loss": 2.6927, + "step": 4300 + }, + { + "epoch": 0.3471067710434993, + "grad_norm": 0.700040876865387, + "learning_rate": 0.00017880215528018954, + "loss": 2.6961, + "step": 4301 + }, + { + "epoch": 0.34718747478008233, + "grad_norm": 0.8180583119392395, + "learning_rate": 0.00017879243513170415, + "loss": 2.642, + "step": 4302 + }, + { + "epoch": 0.3472681785166653, + "grad_norm": 0.7134599685668945, + "learning_rate": 0.0001787827130195053, + "loss": 2.6901, + "step": 4303 + }, + { + "epoch": 0.34734888225324834, + "grad_norm": 0.767998218536377, + "learning_rate": 0.0001787729889438353, + "loss": 2.6472, + "step": 4304 + }, + { + "epoch": 0.3474295859898313, + "grad_norm": 0.7260780930519104, + "learning_rate": 0.0001787632629049365, + "loss": 2.6791, + "step": 4305 + }, + { + "epoch": 0.34751028972641435, + "grad_norm": 0.6918236613273621, + "learning_rate": 0.00017875353490305132, + "loss": 2.6596, + "step": 4306 + }, + { + "epoch": 0.3475909934629973, + "grad_norm": 0.7734197974205017, + "learning_rate": 0.00017874380493842216, + "loss": 2.6402, + "step": 4307 + }, + { + "epoch": 0.34767169719958035, + "grad_norm": 0.7051037549972534, + "learning_rate": 0.00017873407301129154, + "loss": 2.7517, + "step": 4308 + }, + { + "epoch": 0.34775240093616333, + "grad_norm": 0.7026919722557068, + "learning_rate": 0.00017872433912190203, + "loss": 2.7058, + "step": 4309 + }, + { + "epoch": 0.34783310467274636, + "grad_norm": 0.7248546481132507, + "learning_rate": 0.00017871460327049618, + "loss": 2.666, + "step": 4310 + }, + { + "epoch": 0.34791380840932934, + "grad_norm": 0.7348842620849609, + "learning_rate": 0.0001787048654573167, + "loss": 2.7712, + "step": 4311 + }, + { + "epoch": 0.34799451214591237, + "grad_norm": 0.7923693656921387, + "learning_rate": 0.00017869512568260618, + "loss": 2.6469, + "step": 4312 + }, + { + "epoch": 0.34807521588249535, + "grad_norm": 0.7604066729545593, + "learning_rate": 0.00017868538394660743, + "loss": 2.7152, + "step": 4313 + }, + { + "epoch": 0.3481559196190784, + "grad_norm": 0.6811137795448303, + "learning_rate": 0.00017867564024956324, + "loss": 2.715, + "step": 4314 + }, + { + "epoch": 0.34823662335566136, + "grad_norm": 0.7292799353599548, + "learning_rate": 0.00017866589459171643, + "loss": 2.6374, + "step": 4315 + }, + { + "epoch": 0.3483173270922444, + "grad_norm": 0.6961250901222229, + "learning_rate": 0.0001786561469733099, + "loss": 2.6592, + "step": 4316 + }, + { + "epoch": 0.34839803082882737, + "grad_norm": 0.7447086572647095, + "learning_rate": 0.00017864639739458658, + "loss": 2.6965, + "step": 4317 + }, + { + "epoch": 0.3484787345654104, + "grad_norm": 0.7107378244400024, + "learning_rate": 0.00017863664585578942, + "loss": 2.7057, + "step": 4318 + }, + { + "epoch": 0.3485594383019934, + "grad_norm": 0.7372235655784607, + "learning_rate": 0.00017862689235716153, + "loss": 2.6289, + "step": 4319 + }, + { + "epoch": 0.3486401420385764, + "grad_norm": 0.7360481023788452, + "learning_rate": 0.00017861713689894593, + "loss": 2.7208, + "step": 4320 + }, + { + "epoch": 0.3487208457751594, + "grad_norm": 0.7378106713294983, + "learning_rate": 0.00017860737948138575, + "loss": 2.6836, + "step": 4321 + }, + { + "epoch": 0.3488015495117424, + "grad_norm": 0.7110548615455627, + "learning_rate": 0.00017859762010472423, + "loss": 2.6941, + "step": 4322 + }, + { + "epoch": 0.3488822532483254, + "grad_norm": 0.7419706583023071, + "learning_rate": 0.00017858785876920455, + "loss": 2.6591, + "step": 4323 + }, + { + "epoch": 0.3489629569849084, + "grad_norm": 0.7759542465209961, + "learning_rate": 0.00017857809547506997, + "loss": 2.6966, + "step": 4324 + }, + { + "epoch": 0.3490436607214914, + "grad_norm": 0.7894207239151001, + "learning_rate": 0.0001785683302225639, + "loss": 2.7298, + "step": 4325 + }, + { + "epoch": 0.34912436445807443, + "grad_norm": 0.7342399954795837, + "learning_rate": 0.0001785585630119296, + "loss": 2.6998, + "step": 4326 + }, + { + "epoch": 0.3492050681946574, + "grad_norm": 0.8684173822402954, + "learning_rate": 0.0001785487938434106, + "loss": 2.7179, + "step": 4327 + }, + { + "epoch": 0.34928577193124044, + "grad_norm": 0.7557523846626282, + "learning_rate": 0.00017853902271725033, + "loss": 2.7081, + "step": 4328 + }, + { + "epoch": 0.3493664756678234, + "grad_norm": 0.7910173535346985, + "learning_rate": 0.0001785292496336923, + "loss": 2.718, + "step": 4329 + }, + { + "epoch": 0.34944717940440645, + "grad_norm": 0.7878917455673218, + "learning_rate": 0.00017851947459298007, + "loss": 2.674, + "step": 4330 + }, + { + "epoch": 0.3495278831409894, + "grad_norm": 0.7290656566619873, + "learning_rate": 0.0001785096975953573, + "loss": 2.6962, + "step": 4331 + }, + { + "epoch": 0.34960858687757246, + "grad_norm": 0.8465737104415894, + "learning_rate": 0.00017849991864106763, + "loss": 2.6793, + "step": 4332 + }, + { + "epoch": 0.34968929061415543, + "grad_norm": 0.7183132171630859, + "learning_rate": 0.0001784901377303548, + "loss": 2.6902, + "step": 4333 + }, + { + "epoch": 0.34976999435073847, + "grad_norm": 0.7535461783409119, + "learning_rate": 0.00017848035486346255, + "loss": 2.7153, + "step": 4334 + }, + { + "epoch": 0.34985069808732144, + "grad_norm": 0.778734028339386, + "learning_rate": 0.0001784705700406347, + "loss": 2.6316, + "step": 4335 + }, + { + "epoch": 0.3499314018239044, + "grad_norm": 0.6937401294708252, + "learning_rate": 0.00017846078326211516, + "loss": 2.6902, + "step": 4336 + }, + { + "epoch": 0.35001210556048745, + "grad_norm": 0.7450751066207886, + "learning_rate": 0.00017845099452814774, + "loss": 2.6898, + "step": 4337 + }, + { + "epoch": 0.35009280929707043, + "grad_norm": 0.7535614967346191, + "learning_rate": 0.0001784412038389765, + "loss": 2.6969, + "step": 4338 + }, + { + "epoch": 0.35017351303365346, + "grad_norm": 0.6971385478973389, + "learning_rate": 0.00017843141119484543, + "loss": 2.6517, + "step": 4339 + }, + { + "epoch": 0.35025421677023644, + "grad_norm": 0.7233202457427979, + "learning_rate": 0.00017842161659599858, + "loss": 2.7332, + "step": 4340 + }, + { + "epoch": 0.35033492050681947, + "grad_norm": 0.7870340347290039, + "learning_rate": 0.00017841182004268, + "loss": 2.6485, + "step": 4341 + }, + { + "epoch": 0.35041562424340245, + "grad_norm": 0.7387053966522217, + "learning_rate": 0.0001784020215351339, + "loss": 2.6945, + "step": 4342 + }, + { + "epoch": 0.3504963279799855, + "grad_norm": 0.8357887268066406, + "learning_rate": 0.00017839222107360453, + "loss": 2.703, + "step": 4343 + }, + { + "epoch": 0.35057703171656845, + "grad_norm": 0.7197332978248596, + "learning_rate": 0.000178382418658336, + "loss": 2.6649, + "step": 4344 + }, + { + "epoch": 0.3506577354531515, + "grad_norm": 0.7416980862617493, + "learning_rate": 0.0001783726142895728, + "loss": 2.7393, + "step": 4345 + }, + { + "epoch": 0.35073843918973446, + "grad_norm": 0.6807832717895508, + "learning_rate": 0.00017836280796755912, + "loss": 2.6619, + "step": 4346 + }, + { + "epoch": 0.3508191429263175, + "grad_norm": 0.6858795285224915, + "learning_rate": 0.00017835299969253945, + "loss": 2.6266, + "step": 4347 + }, + { + "epoch": 0.35089984666290047, + "grad_norm": 0.8432363867759705, + "learning_rate": 0.0001783431894647582, + "loss": 2.6534, + "step": 4348 + }, + { + "epoch": 0.3509805503994835, + "grad_norm": 0.7240749001502991, + "learning_rate": 0.0001783333772844599, + "loss": 2.6851, + "step": 4349 + }, + { + "epoch": 0.3510612541360665, + "grad_norm": 0.7814531326293945, + "learning_rate": 0.00017832356315188906, + "loss": 2.7085, + "step": 4350 + }, + { + "epoch": 0.3511419578726495, + "grad_norm": 0.6989716291427612, + "learning_rate": 0.00017831374706729026, + "loss": 2.6674, + "step": 4351 + }, + { + "epoch": 0.3512226616092325, + "grad_norm": 0.7118446230888367, + "learning_rate": 0.0001783039290309082, + "loss": 2.6837, + "step": 4352 + }, + { + "epoch": 0.3513033653458155, + "grad_norm": 0.7641892433166504, + "learning_rate": 0.00017829410904298754, + "loss": 2.6415, + "step": 4353 + }, + { + "epoch": 0.3513840690823985, + "grad_norm": 0.6975794434547424, + "learning_rate": 0.000178284287103773, + "loss": 2.6679, + "step": 4354 + }, + { + "epoch": 0.35146477281898153, + "grad_norm": 0.7192546725273132, + "learning_rate": 0.00017827446321350943, + "loss": 2.6539, + "step": 4355 + }, + { + "epoch": 0.3515454765555645, + "grad_norm": 0.8749549388885498, + "learning_rate": 0.00017826463737244155, + "loss": 2.7254, + "step": 4356 + }, + { + "epoch": 0.35162618029214754, + "grad_norm": 0.8509732484817505, + "learning_rate": 0.0001782548095808144, + "loss": 2.7679, + "step": 4357 + }, + { + "epoch": 0.3517068840287305, + "grad_norm": 0.7647901773452759, + "learning_rate": 0.00017824497983887278, + "loss": 2.7049, + "step": 4358 + }, + { + "epoch": 0.35178758776531355, + "grad_norm": 0.7551973462104797, + "learning_rate": 0.00017823514814686178, + "loss": 2.7086, + "step": 4359 + }, + { + "epoch": 0.3518682915018965, + "grad_norm": 0.730140209197998, + "learning_rate": 0.00017822531450502633, + "loss": 2.6334, + "step": 4360 + }, + { + "epoch": 0.35194899523847956, + "grad_norm": 0.8210160136222839, + "learning_rate": 0.00017821547891361158, + "loss": 2.7248, + "step": 4361 + }, + { + "epoch": 0.35202969897506253, + "grad_norm": 0.761972963809967, + "learning_rate": 0.00017820564137286264, + "loss": 2.6502, + "step": 4362 + }, + { + "epoch": 0.35211040271164556, + "grad_norm": 0.7564061284065247, + "learning_rate": 0.00017819580188302466, + "loss": 2.6795, + "step": 4363 + }, + { + "epoch": 0.35219110644822854, + "grad_norm": 0.7382947206497192, + "learning_rate": 0.00017818596044434293, + "loss": 2.6754, + "step": 4364 + }, + { + "epoch": 0.3522718101848116, + "grad_norm": 0.737194836139679, + "learning_rate": 0.00017817611705706266, + "loss": 2.7098, + "step": 4365 + }, + { + "epoch": 0.35235251392139455, + "grad_norm": 0.7183281779289246, + "learning_rate": 0.0001781662717214292, + "loss": 2.6528, + "step": 4366 + }, + { + "epoch": 0.3524332176579776, + "grad_norm": 0.7785990238189697, + "learning_rate": 0.00017815642443768794, + "loss": 2.6419, + "step": 4367 + }, + { + "epoch": 0.35251392139456056, + "grad_norm": 0.7114452719688416, + "learning_rate": 0.00017814657520608427, + "loss": 2.7088, + "step": 4368 + }, + { + "epoch": 0.3525946251311436, + "grad_norm": 0.746969997882843, + "learning_rate": 0.00017813672402686365, + "loss": 2.7199, + "step": 4369 + }, + { + "epoch": 0.35267532886772657, + "grad_norm": 0.7700605988502502, + "learning_rate": 0.00017812687090027165, + "loss": 2.6713, + "step": 4370 + }, + { + "epoch": 0.3527560326043096, + "grad_norm": 0.7733504772186279, + "learning_rate": 0.0001781170158265538, + "loss": 2.6916, + "step": 4371 + }, + { + "epoch": 0.3528367363408926, + "grad_norm": 0.7769689559936523, + "learning_rate": 0.00017810715880595566, + "loss": 2.7787, + "step": 4372 + }, + { + "epoch": 0.3529174400774756, + "grad_norm": 0.7538996934890747, + "learning_rate": 0.000178097299838723, + "loss": 2.6964, + "step": 4373 + }, + { + "epoch": 0.3529981438140586, + "grad_norm": 0.7777890563011169, + "learning_rate": 0.00017808743892510146, + "loss": 2.6882, + "step": 4374 + }, + { + "epoch": 0.3530788475506416, + "grad_norm": 0.8331751823425293, + "learning_rate": 0.00017807757606533683, + "loss": 2.7113, + "step": 4375 + }, + { + "epoch": 0.3531595512872246, + "grad_norm": 0.8039207458496094, + "learning_rate": 0.00017806771125967492, + "loss": 2.6694, + "step": 4376 + }, + { + "epoch": 0.3532402550238076, + "grad_norm": 0.7727575898170471, + "learning_rate": 0.00017805784450836154, + "loss": 2.6639, + "step": 4377 + }, + { + "epoch": 0.3533209587603906, + "grad_norm": 0.8247967958450317, + "learning_rate": 0.00017804797581164264, + "loss": 2.6539, + "step": 4378 + }, + { + "epoch": 0.35340166249697363, + "grad_norm": 0.7574009299278259, + "learning_rate": 0.0001780381051697642, + "loss": 2.7163, + "step": 4379 + }, + { + "epoch": 0.3534823662335566, + "grad_norm": 0.7304368615150452, + "learning_rate": 0.0001780282325829721, + "loss": 2.5759, + "step": 4380 + }, + { + "epoch": 0.35356306997013964, + "grad_norm": 0.7133963704109192, + "learning_rate": 0.00017801835805151257, + "loss": 2.7008, + "step": 4381 + }, + { + "epoch": 0.3536437737067226, + "grad_norm": 0.7525407075881958, + "learning_rate": 0.00017800848157563157, + "loss": 2.6785, + "step": 4382 + }, + { + "epoch": 0.35372447744330565, + "grad_norm": 0.7306779623031616, + "learning_rate": 0.00017799860315557528, + "loss": 2.6454, + "step": 4383 + }, + { + "epoch": 0.35380518117988863, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.00017798872279158994, + "loss": 2.708, + "step": 4384 + }, + { + "epoch": 0.35388588491647166, + "grad_norm": 0.7655978202819824, + "learning_rate": 0.00017797884048392177, + "loss": 2.727, + "step": 4385 + }, + { + "epoch": 0.35396658865305464, + "grad_norm": 0.6802939176559448, + "learning_rate": 0.00017796895623281702, + "loss": 2.659, + "step": 4386 + }, + { + "epoch": 0.3540472923896376, + "grad_norm": 0.7191160917282104, + "learning_rate": 0.00017795907003852207, + "loss": 2.6335, + "step": 4387 + }, + { + "epoch": 0.35412799612622065, + "grad_norm": 0.7771886587142944, + "learning_rate": 0.00017794918190128337, + "loss": 2.6658, + "step": 4388 + }, + { + "epoch": 0.3542086998628036, + "grad_norm": 0.7133512496948242, + "learning_rate": 0.00017793929182134723, + "loss": 2.6701, + "step": 4389 + }, + { + "epoch": 0.35428940359938665, + "grad_norm": 0.7795221209526062, + "learning_rate": 0.00017792939979896022, + "loss": 2.6932, + "step": 4390 + }, + { + "epoch": 0.35437010733596963, + "grad_norm": 0.726767897605896, + "learning_rate": 0.00017791950583436887, + "loss": 2.676, + "step": 4391 + }, + { + "epoch": 0.35445081107255266, + "grad_norm": 0.7447288632392883, + "learning_rate": 0.00017790960992781972, + "loss": 2.7195, + "step": 4392 + }, + { + "epoch": 0.35453151480913564, + "grad_norm": 0.8053649663925171, + "learning_rate": 0.0001778997120795595, + "loss": 2.6851, + "step": 4393 + }, + { + "epoch": 0.35461221854571867, + "grad_norm": 0.7258884906768799, + "learning_rate": 0.00017788981228983474, + "loss": 2.6819, + "step": 4394 + }, + { + "epoch": 0.35469292228230165, + "grad_norm": 0.7279395461082458, + "learning_rate": 0.0001778799105588923, + "loss": 2.6954, + "step": 4395 + }, + { + "epoch": 0.3547736260188847, + "grad_norm": 0.7372962236404419, + "learning_rate": 0.0001778700068869789, + "loss": 2.7049, + "step": 4396 + }, + { + "epoch": 0.35485432975546766, + "grad_norm": 0.712003767490387, + "learning_rate": 0.00017786010127434135, + "loss": 2.7413, + "step": 4397 + }, + { + "epoch": 0.3549350334920507, + "grad_norm": 0.7487424612045288, + "learning_rate": 0.0001778501937212266, + "loss": 2.7231, + "step": 4398 + }, + { + "epoch": 0.35501573722863367, + "grad_norm": 0.73053377866745, + "learning_rate": 0.00017784028422788146, + "loss": 2.7029, + "step": 4399 + }, + { + "epoch": 0.3550964409652167, + "grad_norm": 0.697062611579895, + "learning_rate": 0.00017783037279455298, + "loss": 2.7139, + "step": 4400 + }, + { + "epoch": 0.3551771447017997, + "grad_norm": 0.7750880718231201, + "learning_rate": 0.00017782045942148819, + "loss": 2.6601, + "step": 4401 + }, + { + "epoch": 0.3552578484383827, + "grad_norm": 0.7124977111816406, + "learning_rate": 0.00017781054410893413, + "loss": 2.6119, + "step": 4402 + }, + { + "epoch": 0.3553385521749657, + "grad_norm": 0.7773111462593079, + "learning_rate": 0.00017780062685713785, + "loss": 2.7181, + "step": 4403 + }, + { + "epoch": 0.3554192559115487, + "grad_norm": 0.7282142639160156, + "learning_rate": 0.00017779070766634663, + "loss": 2.7141, + "step": 4404 + }, + { + "epoch": 0.3554999596481317, + "grad_norm": 0.8578598499298096, + "learning_rate": 0.0001777807865368076, + "loss": 2.7628, + "step": 4405 + }, + { + "epoch": 0.3555806633847147, + "grad_norm": 0.7126399874687195, + "learning_rate": 0.00017777086346876809, + "loss": 2.6914, + "step": 4406 + }, + { + "epoch": 0.3556613671212977, + "grad_norm": 0.8026365637779236, + "learning_rate": 0.00017776093846247533, + "loss": 2.7059, + "step": 4407 + }, + { + "epoch": 0.35574207085788073, + "grad_norm": 0.7839884161949158, + "learning_rate": 0.0001777510115181767, + "loss": 2.7265, + "step": 4408 + }, + { + "epoch": 0.3558227745944637, + "grad_norm": 0.7498767971992493, + "learning_rate": 0.00017774108263611966, + "loss": 2.7201, + "step": 4409 + }, + { + "epoch": 0.35590347833104674, + "grad_norm": 0.6996301412582397, + "learning_rate": 0.0001777311518165516, + "loss": 2.6271, + "step": 4410 + }, + { + "epoch": 0.3559841820676297, + "grad_norm": 0.7721461057662964, + "learning_rate": 0.00017772121905972003, + "loss": 2.6739, + "step": 4411 + }, + { + "epoch": 0.35606488580421275, + "grad_norm": 0.8018803000450134, + "learning_rate": 0.00017771128436587256, + "loss": 2.7092, + "step": 4412 + }, + { + "epoch": 0.3561455895407957, + "grad_norm": 0.7185639142990112, + "learning_rate": 0.0001777013477352567, + "loss": 2.6996, + "step": 4413 + }, + { + "epoch": 0.35622629327737876, + "grad_norm": 0.7218519449234009, + "learning_rate": 0.0001776914091681202, + "loss": 2.6555, + "step": 4414 + }, + { + "epoch": 0.35630699701396173, + "grad_norm": 0.7234479188919067, + "learning_rate": 0.00017768146866471062, + "loss": 2.6762, + "step": 4415 + }, + { + "epoch": 0.35638770075054477, + "grad_norm": 0.6723350286483765, + "learning_rate": 0.00017767152622527582, + "loss": 2.6272, + "step": 4416 + }, + { + "epoch": 0.35646840448712774, + "grad_norm": 0.7281947731971741, + "learning_rate": 0.00017766158185006356, + "loss": 2.7216, + "step": 4417 + }, + { + "epoch": 0.3565491082237108, + "grad_norm": 0.8350874781608582, + "learning_rate": 0.00017765163553932166, + "loss": 2.6619, + "step": 4418 + }, + { + "epoch": 0.35662981196029375, + "grad_norm": 0.7454007267951965, + "learning_rate": 0.00017764168729329801, + "loss": 2.6623, + "step": 4419 + }, + { + "epoch": 0.3567105156968768, + "grad_norm": 0.7419041395187378, + "learning_rate": 0.00017763173711224058, + "loss": 2.6773, + "step": 4420 + }, + { + "epoch": 0.35679121943345976, + "grad_norm": 0.7965987920761108, + "learning_rate": 0.0001776217849963973, + "loss": 2.6426, + "step": 4421 + }, + { + "epoch": 0.3568719231700428, + "grad_norm": 0.7093302607536316, + "learning_rate": 0.00017761183094601622, + "loss": 2.6745, + "step": 4422 + }, + { + "epoch": 0.35695262690662577, + "grad_norm": 0.7937216758728027, + "learning_rate": 0.00017760187496134548, + "loss": 2.7275, + "step": 4423 + }, + { + "epoch": 0.3570333306432088, + "grad_norm": 0.9185259938240051, + "learning_rate": 0.00017759191704263313, + "loss": 2.7055, + "step": 4424 + }, + { + "epoch": 0.3571140343797918, + "grad_norm": 0.7365124821662903, + "learning_rate": 0.00017758195719012743, + "loss": 2.6504, + "step": 4425 + }, + { + "epoch": 0.3571947381163748, + "grad_norm": 0.6992416977882385, + "learning_rate": 0.0001775719954040765, + "loss": 2.6684, + "step": 4426 + }, + { + "epoch": 0.3572754418529578, + "grad_norm": 0.7742372751235962, + "learning_rate": 0.00017756203168472866, + "loss": 2.6877, + "step": 4427 + }, + { + "epoch": 0.3573561455895408, + "grad_norm": 0.7448472380638123, + "learning_rate": 0.0001775520660323323, + "loss": 2.7027, + "step": 4428 + }, + { + "epoch": 0.3574368493261238, + "grad_norm": 0.7201915979385376, + "learning_rate": 0.00017754209844713569, + "loss": 2.7046, + "step": 4429 + }, + { + "epoch": 0.3575175530627068, + "grad_norm": 0.6675081253051758, + "learning_rate": 0.0001775321289293873, + "loss": 2.6503, + "step": 4430 + }, + { + "epoch": 0.3575982567992898, + "grad_norm": 0.7252706289291382, + "learning_rate": 0.0001775221574793356, + "loss": 2.6053, + "step": 4431 + }, + { + "epoch": 0.35767896053587284, + "grad_norm": 0.7134702801704407, + "learning_rate": 0.00017751218409722906, + "loss": 2.6857, + "step": 4432 + }, + { + "epoch": 0.3577596642724558, + "grad_norm": 0.7074102163314819, + "learning_rate": 0.0001775022087833163, + "loss": 2.6871, + "step": 4433 + }, + { + "epoch": 0.35784036800903885, + "grad_norm": 0.693520724773407, + "learning_rate": 0.00017749223153784588, + "loss": 2.6629, + "step": 4434 + }, + { + "epoch": 0.3579210717456218, + "grad_norm": 0.6933221817016602, + "learning_rate": 0.0001774822523610665, + "loss": 2.6793, + "step": 4435 + }, + { + "epoch": 0.35800177548220485, + "grad_norm": 0.75307297706604, + "learning_rate": 0.00017747227125322685, + "loss": 2.7012, + "step": 4436 + }, + { + "epoch": 0.35808247921878783, + "grad_norm": 0.7732915282249451, + "learning_rate": 0.0001774622882145757, + "loss": 2.6908, + "step": 4437 + }, + { + "epoch": 0.3581631829553708, + "grad_norm": 0.7067054510116577, + "learning_rate": 0.0001774523032453618, + "loss": 2.7494, + "step": 4438 + }, + { + "epoch": 0.35824388669195384, + "grad_norm": 0.7412838935852051, + "learning_rate": 0.00017744231634583406, + "loss": 2.6734, + "step": 4439 + }, + { + "epoch": 0.3583245904285368, + "grad_norm": 0.7663930654525757, + "learning_rate": 0.00017743232751624136, + "loss": 2.6952, + "step": 4440 + }, + { + "epoch": 0.35840529416511985, + "grad_norm": 0.70650714635849, + "learning_rate": 0.00017742233675683268, + "loss": 2.6806, + "step": 4441 + }, + { + "epoch": 0.3584859979017028, + "grad_norm": 0.698310375213623, + "learning_rate": 0.00017741234406785692, + "loss": 2.6471, + "step": 4442 + }, + { + "epoch": 0.35856670163828586, + "grad_norm": 0.7274026274681091, + "learning_rate": 0.00017740234944956323, + "loss": 2.6688, + "step": 4443 + }, + { + "epoch": 0.35864740537486883, + "grad_norm": 0.6944074034690857, + "learning_rate": 0.00017739235290220067, + "loss": 2.6954, + "step": 4444 + }, + { + "epoch": 0.35872810911145186, + "grad_norm": 0.841995358467102, + "learning_rate": 0.00017738235442601834, + "loss": 2.7169, + "step": 4445 + }, + { + "epoch": 0.35880881284803484, + "grad_norm": 0.74863201379776, + "learning_rate": 0.00017737235402126545, + "loss": 2.6534, + "step": 4446 + }, + { + "epoch": 0.3588895165846179, + "grad_norm": 0.7260422110557556, + "learning_rate": 0.00017736235168819126, + "loss": 2.6266, + "step": 4447 + }, + { + "epoch": 0.35897022032120085, + "grad_norm": 0.7450951337814331, + "learning_rate": 0.00017735234742704504, + "loss": 2.7328, + "step": 4448 + }, + { + "epoch": 0.3590509240577839, + "grad_norm": 0.6942493319511414, + "learning_rate": 0.00017734234123807614, + "loss": 2.7219, + "step": 4449 + }, + { + "epoch": 0.35913162779436686, + "grad_norm": 0.7676761746406555, + "learning_rate": 0.00017733233312153393, + "loss": 2.6594, + "step": 4450 + }, + { + "epoch": 0.3592123315309499, + "grad_norm": 0.7446104288101196, + "learning_rate": 0.00017732232307766778, + "loss": 2.6877, + "step": 4451 + }, + { + "epoch": 0.35929303526753287, + "grad_norm": 0.7551130056381226, + "learning_rate": 0.00017731231110672727, + "loss": 2.672, + "step": 4452 + }, + { + "epoch": 0.3593737390041159, + "grad_norm": 0.6876464486122131, + "learning_rate": 0.00017730229720896182, + "loss": 2.6658, + "step": 4453 + }, + { + "epoch": 0.3594544427406989, + "grad_norm": 0.6992844343185425, + "learning_rate": 0.00017729228138462107, + "loss": 2.6805, + "step": 4454 + }, + { + "epoch": 0.3595351464772819, + "grad_norm": 0.8437497615814209, + "learning_rate": 0.00017728226363395466, + "loss": 2.6884, + "step": 4455 + }, + { + "epoch": 0.3596158502138649, + "grad_norm": 0.7669322490692139, + "learning_rate": 0.00017727224395721217, + "loss": 2.6432, + "step": 4456 + }, + { + "epoch": 0.3596965539504479, + "grad_norm": 0.7613428831100464, + "learning_rate": 0.0001772622223546434, + "loss": 2.6124, + "step": 4457 + }, + { + "epoch": 0.3597772576870309, + "grad_norm": 0.719932496547699, + "learning_rate": 0.00017725219882649807, + "loss": 2.6623, + "step": 4458 + }, + { + "epoch": 0.3598579614236139, + "grad_norm": 0.7650800347328186, + "learning_rate": 0.000177242173373026, + "loss": 2.7551, + "step": 4459 + }, + { + "epoch": 0.3599386651601969, + "grad_norm": 0.7423754930496216, + "learning_rate": 0.0001772321459944771, + "loss": 2.7375, + "step": 4460 + }, + { + "epoch": 0.36001936889677993, + "grad_norm": 0.7602835297584534, + "learning_rate": 0.0001772221166911012, + "loss": 2.7086, + "step": 4461 + }, + { + "epoch": 0.3601000726333629, + "grad_norm": 0.7246943712234497, + "learning_rate": 0.00017721208546314827, + "loss": 2.7068, + "step": 4462 + }, + { + "epoch": 0.36018077636994594, + "grad_norm": 0.715965211391449, + "learning_rate": 0.00017720205231086837, + "loss": 2.689, + "step": 4463 + }, + { + "epoch": 0.3602614801065289, + "grad_norm": 0.7696218490600586, + "learning_rate": 0.00017719201723451151, + "loss": 2.611, + "step": 4464 + }, + { + "epoch": 0.36034218384311195, + "grad_norm": 0.7599236369132996, + "learning_rate": 0.00017718198023432779, + "loss": 2.6504, + "step": 4465 + }, + { + "epoch": 0.36042288757969493, + "grad_norm": 0.7674956321716309, + "learning_rate": 0.0001771719413105674, + "loss": 2.7559, + "step": 4466 + }, + { + "epoch": 0.36050359131627796, + "grad_norm": 0.7263289093971252, + "learning_rate": 0.00017716190046348045, + "loss": 2.6822, + "step": 4467 + }, + { + "epoch": 0.36058429505286094, + "grad_norm": 0.7564195990562439, + "learning_rate": 0.0001771518576933173, + "loss": 2.7319, + "step": 4468 + }, + { + "epoch": 0.36066499878944397, + "grad_norm": 0.7291253805160522, + "learning_rate": 0.00017714181300032813, + "loss": 2.704, + "step": 4469 + }, + { + "epoch": 0.36074570252602695, + "grad_norm": 0.7354169487953186, + "learning_rate": 0.00017713176638476332, + "loss": 2.6344, + "step": 4470 + }, + { + "epoch": 0.36082640626261, + "grad_norm": 0.7104110717773438, + "learning_rate": 0.0001771217178468733, + "loss": 2.665, + "step": 4471 + }, + { + "epoch": 0.36090710999919295, + "grad_norm": 0.6913934350013733, + "learning_rate": 0.00017711166738690847, + "loss": 2.6674, + "step": 4472 + }, + { + "epoch": 0.360987813735776, + "grad_norm": 0.7999634742736816, + "learning_rate": 0.0001771016150051193, + "loss": 2.6847, + "step": 4473 + }, + { + "epoch": 0.36106851747235896, + "grad_norm": 0.7878915667533875, + "learning_rate": 0.00017709156070175634, + "loss": 2.7125, + "step": 4474 + }, + { + "epoch": 0.361149221208942, + "grad_norm": 0.7145688533782959, + "learning_rate": 0.00017708150447707017, + "loss": 2.6863, + "step": 4475 + }, + { + "epoch": 0.36122992494552497, + "grad_norm": 0.7518604397773743, + "learning_rate": 0.00017707144633131143, + "loss": 2.6616, + "step": 4476 + }, + { + "epoch": 0.361310628682108, + "grad_norm": 0.735634982585907, + "learning_rate": 0.0001770613862647308, + "loss": 2.6315, + "step": 4477 + }, + { + "epoch": 0.361391332418691, + "grad_norm": 0.7925180196762085, + "learning_rate": 0.00017705132427757895, + "loss": 2.6951, + "step": 4478 + }, + { + "epoch": 0.361472036155274, + "grad_norm": 0.6949547529220581, + "learning_rate": 0.00017704126037010667, + "loss": 2.6934, + "step": 4479 + }, + { + "epoch": 0.361552739891857, + "grad_norm": 0.7233577966690063, + "learning_rate": 0.00017703119454256483, + "loss": 2.6773, + "step": 4480 + }, + { + "epoch": 0.36163344362844, + "grad_norm": 0.7303269505500793, + "learning_rate": 0.00017702112679520424, + "loss": 2.6351, + "step": 4481 + }, + { + "epoch": 0.361714147365023, + "grad_norm": 0.7620660066604614, + "learning_rate": 0.00017701105712827583, + "loss": 2.6748, + "step": 4482 + }, + { + "epoch": 0.36179485110160603, + "grad_norm": 0.7744965553283691, + "learning_rate": 0.00017700098554203057, + "loss": 2.7013, + "step": 4483 + }, + { + "epoch": 0.361875554838189, + "grad_norm": 0.8017357587814331, + "learning_rate": 0.00017699091203671947, + "loss": 2.7273, + "step": 4484 + }, + { + "epoch": 0.36195625857477204, + "grad_norm": 0.8014432191848755, + "learning_rate": 0.0001769808366125936, + "loss": 2.6864, + "step": 4485 + }, + { + "epoch": 0.362036962311355, + "grad_norm": 0.6914888620376587, + "learning_rate": 0.00017697075926990406, + "loss": 2.6851, + "step": 4486 + }, + { + "epoch": 0.36211766604793805, + "grad_norm": 0.7472698092460632, + "learning_rate": 0.00017696068000890196, + "loss": 2.695, + "step": 4487 + }, + { + "epoch": 0.362198369784521, + "grad_norm": 0.7506285309791565, + "learning_rate": 0.00017695059882983855, + "loss": 2.7055, + "step": 4488 + }, + { + "epoch": 0.362279073521104, + "grad_norm": 0.7501141428947449, + "learning_rate": 0.00017694051573296507, + "loss": 2.7109, + "step": 4489 + }, + { + "epoch": 0.36235977725768703, + "grad_norm": 0.6654670834541321, + "learning_rate": 0.00017693043071853284, + "loss": 2.6165, + "step": 4490 + }, + { + "epoch": 0.36244048099427, + "grad_norm": 0.7894664406776428, + "learning_rate": 0.00017692034378679315, + "loss": 2.7274, + "step": 4491 + }, + { + "epoch": 0.36252118473085304, + "grad_norm": 0.7206711173057556, + "learning_rate": 0.00017691025493799743, + "loss": 2.7047, + "step": 4492 + }, + { + "epoch": 0.362601888467436, + "grad_norm": 0.7656282186508179, + "learning_rate": 0.00017690016417239708, + "loss": 2.696, + "step": 4493 + }, + { + "epoch": 0.36268259220401905, + "grad_norm": 0.7357437610626221, + "learning_rate": 0.00017689007149024362, + "loss": 2.7279, + "step": 4494 + }, + { + "epoch": 0.362763295940602, + "grad_norm": 0.7262146472930908, + "learning_rate": 0.00017687997689178864, + "loss": 2.6964, + "step": 4495 + }, + { + "epoch": 0.36284399967718506, + "grad_norm": 0.7839891910552979, + "learning_rate": 0.00017686988037728365, + "loss": 2.651, + "step": 4496 + }, + { + "epoch": 0.36292470341376803, + "grad_norm": 0.7150306105613708, + "learning_rate": 0.00017685978194698028, + "loss": 2.6481, + "step": 4497 + }, + { + "epoch": 0.36300540715035107, + "grad_norm": 0.7144685387611389, + "learning_rate": 0.00017684968160113025, + "loss": 2.7169, + "step": 4498 + }, + { + "epoch": 0.36308611088693404, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00017683957933998525, + "loss": 2.7543, + "step": 4499 + }, + { + "epoch": 0.3631668146235171, + "grad_norm": 0.7301446199417114, + "learning_rate": 0.00017682947516379707, + "loss": 2.6806, + "step": 4500 + }, + { + "epoch": 0.36324751836010005, + "grad_norm": 0.7314243316650391, + "learning_rate": 0.00017681936907281757, + "loss": 2.7227, + "step": 4501 + }, + { + "epoch": 0.3633282220966831, + "grad_norm": 0.7695817351341248, + "learning_rate": 0.00017680926106729852, + "loss": 2.7229, + "step": 4502 + }, + { + "epoch": 0.36340892583326606, + "grad_norm": 0.6885762810707092, + "learning_rate": 0.00017679915114749198, + "loss": 2.7246, + "step": 4503 + }, + { + "epoch": 0.3634896295698491, + "grad_norm": 0.6893608570098877, + "learning_rate": 0.0001767890393136498, + "loss": 2.6572, + "step": 4504 + }, + { + "epoch": 0.36357033330643207, + "grad_norm": 0.7011978626251221, + "learning_rate": 0.00017677892556602402, + "loss": 2.6775, + "step": 4505 + }, + { + "epoch": 0.3636510370430151, + "grad_norm": 0.6693406105041504, + "learning_rate": 0.00017676880990486672, + "loss": 2.6183, + "step": 4506 + }, + { + "epoch": 0.3637317407795981, + "grad_norm": 0.7023048996925354, + "learning_rate": 0.00017675869233043002, + "loss": 2.6772, + "step": 4507 + }, + { + "epoch": 0.3638124445161811, + "grad_norm": 0.6903806328773499, + "learning_rate": 0.00017674857284296605, + "loss": 2.6486, + "step": 4508 + }, + { + "epoch": 0.3638931482527641, + "grad_norm": 0.6799258589744568, + "learning_rate": 0.000176738451442727, + "loss": 2.6305, + "step": 4509 + }, + { + "epoch": 0.3639738519893471, + "grad_norm": 0.7935682535171509, + "learning_rate": 0.00017672832812996517, + "loss": 2.7365, + "step": 4510 + }, + { + "epoch": 0.3640545557259301, + "grad_norm": 0.7593684196472168, + "learning_rate": 0.00017671820290493284, + "loss": 2.7029, + "step": 4511 + }, + { + "epoch": 0.36413525946251313, + "grad_norm": 0.7185288667678833, + "learning_rate": 0.00017670807576788234, + "loss": 2.6646, + "step": 4512 + }, + { + "epoch": 0.3642159631990961, + "grad_norm": 0.7260291576385498, + "learning_rate": 0.00017669794671906606, + "loss": 2.6615, + "step": 4513 + }, + { + "epoch": 0.36429666693567914, + "grad_norm": 0.6933417916297913, + "learning_rate": 0.00017668781575873646, + "loss": 2.6678, + "step": 4514 + }, + { + "epoch": 0.3643773706722621, + "grad_norm": 0.7657343149185181, + "learning_rate": 0.00017667768288714603, + "loss": 2.7155, + "step": 4515 + }, + { + "epoch": 0.36445807440884515, + "grad_norm": 0.7326949834823608, + "learning_rate": 0.0001766675481045473, + "loss": 2.732, + "step": 4516 + }, + { + "epoch": 0.3645387781454281, + "grad_norm": 0.7370324730873108, + "learning_rate": 0.0001766574114111929, + "loss": 2.6124, + "step": 4517 + }, + { + "epoch": 0.36461948188201115, + "grad_norm": 0.7280072569847107, + "learning_rate": 0.00017664727280733536, + "loss": 2.6793, + "step": 4518 + }, + { + "epoch": 0.36470018561859413, + "grad_norm": 0.7174237370491028, + "learning_rate": 0.00017663713229322748, + "loss": 2.629, + "step": 4519 + }, + { + "epoch": 0.36478088935517716, + "grad_norm": 0.6660771369934082, + "learning_rate": 0.0001766269898691219, + "loss": 2.6862, + "step": 4520 + }, + { + "epoch": 0.36486159309176014, + "grad_norm": 0.7024446725845337, + "learning_rate": 0.00017661684553527143, + "loss": 2.6602, + "step": 4521 + }, + { + "epoch": 0.36494229682834317, + "grad_norm": 0.7419618964195251, + "learning_rate": 0.0001766066992919289, + "loss": 2.6904, + "step": 4522 + }, + { + "epoch": 0.36502300056492615, + "grad_norm": 0.7425804138183594, + "learning_rate": 0.00017659655113934716, + "loss": 2.7312, + "step": 4523 + }, + { + "epoch": 0.3651037043015092, + "grad_norm": 0.7117013931274414, + "learning_rate": 0.00017658640107777915, + "loss": 2.6411, + "step": 4524 + }, + { + "epoch": 0.36518440803809216, + "grad_norm": 0.719613254070282, + "learning_rate": 0.00017657624910747782, + "loss": 2.6799, + "step": 4525 + }, + { + "epoch": 0.3652651117746752, + "grad_norm": 0.7654159665107727, + "learning_rate": 0.0001765660952286962, + "loss": 2.6675, + "step": 4526 + }, + { + "epoch": 0.36534581551125817, + "grad_norm": 0.7111814022064209, + "learning_rate": 0.00017655593944168734, + "loss": 2.6717, + "step": 4527 + }, + { + "epoch": 0.3654265192478412, + "grad_norm": 0.7494712471961975, + "learning_rate": 0.00017654578174670436, + "loss": 2.7181, + "step": 4528 + }, + { + "epoch": 0.3655072229844242, + "grad_norm": 0.8062291145324707, + "learning_rate": 0.0001765356221440004, + "loss": 2.6563, + "step": 4529 + }, + { + "epoch": 0.3655879267210072, + "grad_norm": 0.7923303842544556, + "learning_rate": 0.00017652546063382866, + "loss": 2.6295, + "step": 4530 + }, + { + "epoch": 0.3656686304575902, + "grad_norm": 0.7417340278625488, + "learning_rate": 0.00017651529721644238, + "loss": 2.6727, + "step": 4531 + }, + { + "epoch": 0.3657493341941732, + "grad_norm": 0.7326166033744812, + "learning_rate": 0.0001765051318920949, + "loss": 2.702, + "step": 4532 + }, + { + "epoch": 0.3658300379307562, + "grad_norm": 0.8133745193481445, + "learning_rate": 0.00017649496466103957, + "loss": 2.7157, + "step": 4533 + }, + { + "epoch": 0.3659107416673392, + "grad_norm": 0.710502564907074, + "learning_rate": 0.00017648479552352973, + "loss": 2.6668, + "step": 4534 + }, + { + "epoch": 0.3659914454039222, + "grad_norm": 0.6947012543678284, + "learning_rate": 0.00017647462447981885, + "loss": 2.6865, + "step": 4535 + }, + { + "epoch": 0.36607214914050523, + "grad_norm": 0.8432720899581909, + "learning_rate": 0.0001764644515301604, + "loss": 2.6226, + "step": 4536 + }, + { + "epoch": 0.3661528528770882, + "grad_norm": 0.7321269512176514, + "learning_rate": 0.00017645427667480802, + "loss": 2.662, + "step": 4537 + }, + { + "epoch": 0.36623355661367124, + "grad_norm": 0.8099743723869324, + "learning_rate": 0.00017644409991401515, + "loss": 2.6853, + "step": 4538 + }, + { + "epoch": 0.3663142603502542, + "grad_norm": 0.6885355114936829, + "learning_rate": 0.0001764339212480355, + "loss": 2.6672, + "step": 4539 + }, + { + "epoch": 0.3663949640868372, + "grad_norm": 0.911396324634552, + "learning_rate": 0.00017642374067712276, + "loss": 2.5778, + "step": 4540 + }, + { + "epoch": 0.3664756678234202, + "grad_norm": 0.7461941838264465, + "learning_rate": 0.0001764135582015306, + "loss": 2.6629, + "step": 4541 + }, + { + "epoch": 0.3665563715600032, + "grad_norm": 0.772741436958313, + "learning_rate": 0.0001764033738215128, + "loss": 2.725, + "step": 4542 + }, + { + "epoch": 0.36663707529658623, + "grad_norm": 0.7256152629852295, + "learning_rate": 0.0001763931875373232, + "loss": 2.6439, + "step": 4543 + }, + { + "epoch": 0.3667177790331692, + "grad_norm": 0.8089167475700378, + "learning_rate": 0.0001763829993492157, + "loss": 2.5972, + "step": 4544 + }, + { + "epoch": 0.36679848276975224, + "grad_norm": 0.7115232944488525, + "learning_rate": 0.0001763728092574442, + "loss": 2.633, + "step": 4545 + }, + { + "epoch": 0.3668791865063352, + "grad_norm": 0.7189347147941589, + "learning_rate": 0.00017636261726226266, + "loss": 2.619, + "step": 4546 + }, + { + "epoch": 0.36695989024291825, + "grad_norm": 0.7667742967605591, + "learning_rate": 0.00017635242336392506, + "loss": 2.667, + "step": 4547 + }, + { + "epoch": 0.36704059397950123, + "grad_norm": 0.7982457876205444, + "learning_rate": 0.00017634222756268545, + "loss": 2.6667, + "step": 4548 + }, + { + "epoch": 0.36712129771608426, + "grad_norm": 0.7465574145317078, + "learning_rate": 0.00017633202985879804, + "loss": 2.6436, + "step": 4549 + }, + { + "epoch": 0.36720200145266724, + "grad_norm": 0.7297804951667786, + "learning_rate": 0.00017632183025251686, + "loss": 2.6464, + "step": 4550 + }, + { + "epoch": 0.36728270518925027, + "grad_norm": 0.6885054111480713, + "learning_rate": 0.0001763116287440962, + "loss": 2.6742, + "step": 4551 + }, + { + "epoch": 0.36736340892583325, + "grad_norm": 0.7341574430465698, + "learning_rate": 0.00017630142533379023, + "loss": 2.6688, + "step": 4552 + }, + { + "epoch": 0.3674441126624163, + "grad_norm": 0.8565430045127869, + "learning_rate": 0.0001762912200218533, + "loss": 2.6889, + "step": 4553 + }, + { + "epoch": 0.36752481639899925, + "grad_norm": 0.7509489059448242, + "learning_rate": 0.00017628101280853974, + "loss": 2.6177, + "step": 4554 + }, + { + "epoch": 0.3676055201355823, + "grad_norm": 0.8128334879875183, + "learning_rate": 0.00017627080369410396, + "loss": 2.7301, + "step": 4555 + }, + { + "epoch": 0.36768622387216526, + "grad_norm": 0.7511637210845947, + "learning_rate": 0.00017626059267880035, + "loss": 2.7327, + "step": 4556 + }, + { + "epoch": 0.3677669276087483, + "grad_norm": 0.8350822925567627, + "learning_rate": 0.00017625037976288347, + "loss": 2.6073, + "step": 4557 + }, + { + "epoch": 0.36784763134533127, + "grad_norm": 0.7743313312530518, + "learning_rate": 0.00017624016494660776, + "loss": 2.7055, + "step": 4558 + }, + { + "epoch": 0.3679283350819143, + "grad_norm": 0.8196439146995544, + "learning_rate": 0.00017622994823022787, + "loss": 2.6565, + "step": 4559 + }, + { + "epoch": 0.3680090388184973, + "grad_norm": 0.7223393321037292, + "learning_rate": 0.00017621972961399837, + "loss": 2.68, + "step": 4560 + }, + { + "epoch": 0.3680897425550803, + "grad_norm": 0.7215418219566345, + "learning_rate": 0.000176209509098174, + "loss": 2.6627, + "step": 4561 + }, + { + "epoch": 0.3681704462916633, + "grad_norm": 0.8050473928451538, + "learning_rate": 0.00017619928668300946, + "loss": 2.5802, + "step": 4562 + }, + { + "epoch": 0.3682511500282463, + "grad_norm": 0.7452750205993652, + "learning_rate": 0.00017618906236875948, + "loss": 2.6524, + "step": 4563 + }, + { + "epoch": 0.3683318537648293, + "grad_norm": 0.7950742244720459, + "learning_rate": 0.00017617883615567888, + "loss": 2.6371, + "step": 4564 + }, + { + "epoch": 0.36841255750141233, + "grad_norm": 0.7185397744178772, + "learning_rate": 0.00017616860804402261, + "loss": 2.6531, + "step": 4565 + }, + { + "epoch": 0.3684932612379953, + "grad_norm": 0.7480553388595581, + "learning_rate": 0.0001761583780340455, + "loss": 2.6727, + "step": 4566 + }, + { + "epoch": 0.36857396497457834, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.00017614814612600251, + "loss": 2.6095, + "step": 4567 + }, + { + "epoch": 0.3686546687111613, + "grad_norm": 0.9159810543060303, + "learning_rate": 0.00017613791232014866, + "loss": 2.7039, + "step": 4568 + }, + { + "epoch": 0.36873537244774435, + "grad_norm": 0.7478305697441101, + "learning_rate": 0.00017612767661673905, + "loss": 2.6307, + "step": 4569 + }, + { + "epoch": 0.3688160761843273, + "grad_norm": 0.9154726266860962, + "learning_rate": 0.00017611743901602874, + "loss": 2.675, + "step": 4570 + }, + { + "epoch": 0.36889677992091036, + "grad_norm": 0.7903287410736084, + "learning_rate": 0.0001761071995182728, + "loss": 2.6938, + "step": 4571 + }, + { + "epoch": 0.36897748365749333, + "grad_norm": 0.7919119596481323, + "learning_rate": 0.0001760969581237266, + "loss": 2.7092, + "step": 4572 + }, + { + "epoch": 0.36905818739407636, + "grad_norm": 0.8052253723144531, + "learning_rate": 0.00017608671483264522, + "loss": 2.6914, + "step": 4573 + }, + { + "epoch": 0.36913889113065934, + "grad_norm": 0.7660435438156128, + "learning_rate": 0.00017607646964528403, + "loss": 2.674, + "step": 4574 + }, + { + "epoch": 0.3692195948672424, + "grad_norm": 0.8554383516311646, + "learning_rate": 0.00017606622256189836, + "loss": 2.6792, + "step": 4575 + }, + { + "epoch": 0.36930029860382535, + "grad_norm": 0.7719140648841858, + "learning_rate": 0.00017605597358274358, + "loss": 2.6836, + "step": 4576 + }, + { + "epoch": 0.3693810023404084, + "grad_norm": 0.733068585395813, + "learning_rate": 0.00017604572270807513, + "loss": 2.6496, + "step": 4577 + }, + { + "epoch": 0.36946170607699136, + "grad_norm": 0.7622445225715637, + "learning_rate": 0.00017603546993814849, + "loss": 2.7097, + "step": 4578 + }, + { + "epoch": 0.3695424098135744, + "grad_norm": 0.7326679825782776, + "learning_rate": 0.00017602521527321913, + "loss": 2.6786, + "step": 4579 + }, + { + "epoch": 0.36962311355015737, + "grad_norm": 0.7579432129859924, + "learning_rate": 0.00017601495871354272, + "loss": 2.6618, + "step": 4580 + }, + { + "epoch": 0.3697038172867404, + "grad_norm": 0.8812715411186218, + "learning_rate": 0.00017600470025937485, + "loss": 2.6942, + "step": 4581 + }, + { + "epoch": 0.3697845210233234, + "grad_norm": 0.7230449318885803, + "learning_rate": 0.00017599443991097116, + "loss": 2.6374, + "step": 4582 + }, + { + "epoch": 0.3698652247599064, + "grad_norm": 0.8347739577293396, + "learning_rate": 0.00017598417766858735, + "loss": 2.6653, + "step": 4583 + }, + { + "epoch": 0.3699459284964894, + "grad_norm": 0.7826598882675171, + "learning_rate": 0.0001759739135324792, + "loss": 2.6342, + "step": 4584 + }, + { + "epoch": 0.3700266322330724, + "grad_norm": 0.749060332775116, + "learning_rate": 0.00017596364750290254, + "loss": 2.7256, + "step": 4585 + }, + { + "epoch": 0.3701073359696554, + "grad_norm": 0.7470815181732178, + "learning_rate": 0.00017595337958011323, + "loss": 2.6485, + "step": 4586 + }, + { + "epoch": 0.3701880397062384, + "grad_norm": 0.7251530289649963, + "learning_rate": 0.00017594310976436716, + "loss": 2.6613, + "step": 4587 + }, + { + "epoch": 0.3702687434428214, + "grad_norm": 0.7143718004226685, + "learning_rate": 0.00017593283805592027, + "loss": 2.6101, + "step": 4588 + }, + { + "epoch": 0.37034944717940443, + "grad_norm": 0.7378203272819519, + "learning_rate": 0.00017592256445502855, + "loss": 2.6735, + "step": 4589 + }, + { + "epoch": 0.3704301509159874, + "grad_norm": 0.7193629741668701, + "learning_rate": 0.00017591228896194808, + "loss": 2.719, + "step": 4590 + }, + { + "epoch": 0.3705108546525704, + "grad_norm": 0.7377258539199829, + "learning_rate": 0.00017590201157693494, + "loss": 2.6789, + "step": 4591 + }, + { + "epoch": 0.3705915583891534, + "grad_norm": 0.7468351721763611, + "learning_rate": 0.00017589173230024522, + "loss": 2.6389, + "step": 4592 + }, + { + "epoch": 0.3706722621257364, + "grad_norm": 0.7612246870994568, + "learning_rate": 0.0001758814511321352, + "loss": 2.7045, + "step": 4593 + }, + { + "epoch": 0.37075296586231943, + "grad_norm": 0.7603838443756104, + "learning_rate": 0.00017587116807286102, + "loss": 2.7323, + "step": 4594 + }, + { + "epoch": 0.3708336695989024, + "grad_norm": 0.7436477541923523, + "learning_rate": 0.000175860883122679, + "loss": 2.7331, + "step": 4595 + }, + { + "epoch": 0.37091437333548544, + "grad_norm": 0.7004369497299194, + "learning_rate": 0.0001758505962818455, + "loss": 2.6418, + "step": 4596 + }, + { + "epoch": 0.3709950770720684, + "grad_norm": 0.711980938911438, + "learning_rate": 0.00017584030755061683, + "loss": 2.6184, + "step": 4597 + }, + { + "epoch": 0.37107578080865145, + "grad_norm": 0.6999367475509644, + "learning_rate": 0.0001758300169292495, + "loss": 2.6584, + "step": 4598 + }, + { + "epoch": 0.3711564845452344, + "grad_norm": 0.6755785942077637, + "learning_rate": 0.0001758197244179999, + "loss": 2.664, + "step": 4599 + }, + { + "epoch": 0.37123718828181745, + "grad_norm": 0.7174055576324463, + "learning_rate": 0.00017580943001712455, + "loss": 2.6821, + "step": 4600 + }, + { + "epoch": 0.37131789201840043, + "grad_norm": 0.8218933343887329, + "learning_rate": 0.00017579913372688005, + "loss": 2.6355, + "step": 4601 + }, + { + "epoch": 0.37139859575498346, + "grad_norm": 0.7417960166931152, + "learning_rate": 0.000175788835547523, + "loss": 2.7226, + "step": 4602 + }, + { + "epoch": 0.37147929949156644, + "grad_norm": 0.824421763420105, + "learning_rate": 0.00017577853547931006, + "loss": 2.6526, + "step": 4603 + }, + { + "epoch": 0.37156000322814947, + "grad_norm": 0.7391949892044067, + "learning_rate": 0.00017576823352249794, + "loss": 2.6702, + "step": 4604 + }, + { + "epoch": 0.37164070696473245, + "grad_norm": 0.7890247106552124, + "learning_rate": 0.00017575792967734337, + "loss": 2.7281, + "step": 4605 + }, + { + "epoch": 0.3717214107013155, + "grad_norm": 0.785527765750885, + "learning_rate": 0.00017574762394410317, + "loss": 2.6728, + "step": 4606 + }, + { + "epoch": 0.37180211443789846, + "grad_norm": 0.7195863127708435, + "learning_rate": 0.00017573731632303415, + "loss": 2.6329, + "step": 4607 + }, + { + "epoch": 0.3718828181744815, + "grad_norm": 0.7896780371665955, + "learning_rate": 0.0001757270068143932, + "loss": 2.6776, + "step": 4608 + }, + { + "epoch": 0.37196352191106447, + "grad_norm": 0.7568275332450867, + "learning_rate": 0.00017571669541843735, + "loss": 2.6668, + "step": 4609 + }, + { + "epoch": 0.3720442256476475, + "grad_norm": 0.7923939228057861, + "learning_rate": 0.00017570638213542348, + "loss": 2.7033, + "step": 4610 + }, + { + "epoch": 0.3721249293842305, + "grad_norm": 0.7586569786071777, + "learning_rate": 0.00017569606696560868, + "loss": 2.7286, + "step": 4611 + }, + { + "epoch": 0.3722056331208135, + "grad_norm": 0.8222009539604187, + "learning_rate": 0.00017568574990925004, + "loss": 2.6448, + "step": 4612 + }, + { + "epoch": 0.3722863368573965, + "grad_norm": 0.7144019603729248, + "learning_rate": 0.00017567543096660466, + "loss": 2.6671, + "step": 4613 + }, + { + "epoch": 0.3723670405939795, + "grad_norm": 0.7602240443229675, + "learning_rate": 0.00017566511013792973, + "loss": 2.6492, + "step": 4614 + }, + { + "epoch": 0.3724477443305625, + "grad_norm": 0.7949689626693726, + "learning_rate": 0.00017565478742348245, + "loss": 2.7002, + "step": 4615 + }, + { + "epoch": 0.3725284480671455, + "grad_norm": 0.6922519207000732, + "learning_rate": 0.00017564446282352012, + "loss": 2.6917, + "step": 4616 + }, + { + "epoch": 0.3726091518037285, + "grad_norm": 0.7382915616035461, + "learning_rate": 0.0001756341363383, + "loss": 2.6375, + "step": 4617 + }, + { + "epoch": 0.37268985554031153, + "grad_norm": 0.7511888742446899, + "learning_rate": 0.00017562380796807956, + "loss": 2.6823, + "step": 4618 + }, + { + "epoch": 0.3727705592768945, + "grad_norm": 0.7273457646369934, + "learning_rate": 0.00017561347771311608, + "loss": 2.6124, + "step": 4619 + }, + { + "epoch": 0.37285126301347754, + "grad_norm": 0.689440131187439, + "learning_rate": 0.0001756031455736671, + "loss": 2.6931, + "step": 4620 + }, + { + "epoch": 0.3729319667500605, + "grad_norm": 0.7755659222602844, + "learning_rate": 0.00017559281154999013, + "loss": 2.6273, + "step": 4621 + }, + { + "epoch": 0.37301267048664355, + "grad_norm": 0.6940193176269531, + "learning_rate": 0.00017558247564234265, + "loss": 2.641, + "step": 4622 + }, + { + "epoch": 0.3730933742232265, + "grad_norm": 0.7387529015541077, + "learning_rate": 0.00017557213785098232, + "loss": 2.7229, + "step": 4623 + }, + { + "epoch": 0.37317407795980956, + "grad_norm": 0.6807727217674255, + "learning_rate": 0.00017556179817616678, + "loss": 2.6469, + "step": 4624 + }, + { + "epoch": 0.37325478169639253, + "grad_norm": 0.7203819751739502, + "learning_rate": 0.0001755514566181537, + "loss": 2.6239, + "step": 4625 + }, + { + "epoch": 0.37333548543297557, + "grad_norm": 0.9345876574516296, + "learning_rate": 0.0001755411131772008, + "loss": 2.7154, + "step": 4626 + }, + { + "epoch": 0.37341618916955854, + "grad_norm": 0.6787357330322266, + "learning_rate": 0.00017553076785356594, + "loss": 2.6374, + "step": 4627 + }, + { + "epoch": 0.3734968929061416, + "grad_norm": 0.7153670191764832, + "learning_rate": 0.0001755204206475069, + "loss": 2.6734, + "step": 4628 + }, + { + "epoch": 0.37357759664272455, + "grad_norm": 0.736464262008667, + "learning_rate": 0.00017551007155928154, + "loss": 2.7241, + "step": 4629 + }, + { + "epoch": 0.3736583003793076, + "grad_norm": 0.7134939432144165, + "learning_rate": 0.0001754997205891478, + "loss": 2.682, + "step": 4630 + }, + { + "epoch": 0.37373900411589056, + "grad_norm": 0.7071199417114258, + "learning_rate": 0.0001754893677373637, + "loss": 2.7361, + "step": 4631 + }, + { + "epoch": 0.3738197078524736, + "grad_norm": 0.7040621638298035, + "learning_rate": 0.00017547901300418722, + "loss": 2.7031, + "step": 4632 + }, + { + "epoch": 0.37390041158905657, + "grad_norm": 0.7179287075996399, + "learning_rate": 0.00017546865638987642, + "loss": 2.6755, + "step": 4633 + }, + { + "epoch": 0.3739811153256396, + "grad_norm": 0.7579259276390076, + "learning_rate": 0.00017545829789468944, + "loss": 2.6514, + "step": 4634 + }, + { + "epoch": 0.3740618190622226, + "grad_norm": 0.7825835347175598, + "learning_rate": 0.0001754479375188844, + "loss": 2.6876, + "step": 4635 + }, + { + "epoch": 0.3741425227988056, + "grad_norm": 0.7913421988487244, + "learning_rate": 0.00017543757526271956, + "loss": 2.7153, + "step": 4636 + }, + { + "epoch": 0.3742232265353886, + "grad_norm": 0.7766042947769165, + "learning_rate": 0.00017542721112645313, + "loss": 2.645, + "step": 4637 + }, + { + "epoch": 0.3743039302719716, + "grad_norm": 0.7363953590393066, + "learning_rate": 0.00017541684511034343, + "loss": 2.6376, + "step": 4638 + }, + { + "epoch": 0.3743846340085546, + "grad_norm": 0.6928617358207703, + "learning_rate": 0.00017540647721464881, + "loss": 2.6882, + "step": 4639 + }, + { + "epoch": 0.3744653377451376, + "grad_norm": 0.7832257747650146, + "learning_rate": 0.0001753961074396277, + "loss": 2.7305, + "step": 4640 + }, + { + "epoch": 0.3745460414817206, + "grad_norm": 0.7180350422859192, + "learning_rate": 0.00017538573578553844, + "loss": 2.6783, + "step": 4641 + }, + { + "epoch": 0.3746267452183036, + "grad_norm": 0.718209981918335, + "learning_rate": 0.00017537536225263964, + "loss": 2.6961, + "step": 4642 + }, + { + "epoch": 0.3747074489548866, + "grad_norm": 0.7056655287742615, + "learning_rate": 0.00017536498684118975, + "loss": 2.7096, + "step": 4643 + }, + { + "epoch": 0.3747881526914696, + "grad_norm": 0.8004828691482544, + "learning_rate": 0.0001753546095514474, + "loss": 2.7168, + "step": 4644 + }, + { + "epoch": 0.3748688564280526, + "grad_norm": 0.7630821466445923, + "learning_rate": 0.0001753442303836712, + "loss": 2.7091, + "step": 4645 + }, + { + "epoch": 0.3749495601646356, + "grad_norm": 0.7539668083190918, + "learning_rate": 0.0001753338493381198, + "loss": 2.651, + "step": 4646 + }, + { + "epoch": 0.37503026390121863, + "grad_norm": 0.7243319749832153, + "learning_rate": 0.000175323466415052, + "loss": 2.6765, + "step": 4647 + }, + { + "epoch": 0.3751109676378016, + "grad_norm": 0.8906281590461731, + "learning_rate": 0.00017531308161472647, + "loss": 2.5938, + "step": 4648 + }, + { + "epoch": 0.37519167137438464, + "grad_norm": 0.787966251373291, + "learning_rate": 0.0001753026949374021, + "loss": 2.6011, + "step": 4649 + }, + { + "epoch": 0.3752723751109676, + "grad_norm": 0.7763915061950684, + "learning_rate": 0.00017529230638333772, + "loss": 2.7197, + "step": 4650 + }, + { + "epoch": 0.37535307884755065, + "grad_norm": 0.7717103362083435, + "learning_rate": 0.00017528191595279224, + "loss": 2.6605, + "step": 4651 + }, + { + "epoch": 0.3754337825841336, + "grad_norm": 0.7340055108070374, + "learning_rate": 0.00017527152364602464, + "loss": 2.6856, + "step": 4652 + }, + { + "epoch": 0.37551448632071666, + "grad_norm": 0.7805169820785522, + "learning_rate": 0.0001752611294632939, + "loss": 2.7088, + "step": 4653 + }, + { + "epoch": 0.37559519005729963, + "grad_norm": 0.7894891500473022, + "learning_rate": 0.00017525073340485912, + "loss": 2.6691, + "step": 4654 + }, + { + "epoch": 0.37567589379388266, + "grad_norm": 0.7627872824668884, + "learning_rate": 0.0001752403354709793, + "loss": 2.6536, + "step": 4655 + }, + { + "epoch": 0.37575659753046564, + "grad_norm": 0.8097225427627563, + "learning_rate": 0.00017522993566191367, + "loss": 2.7108, + "step": 4656 + }, + { + "epoch": 0.3758373012670487, + "grad_norm": 0.834449827671051, + "learning_rate": 0.00017521953397792137, + "loss": 2.7565, + "step": 4657 + }, + { + "epoch": 0.37591800500363165, + "grad_norm": 0.7924147844314575, + "learning_rate": 0.00017520913041926166, + "loss": 2.7101, + "step": 4658 + }, + { + "epoch": 0.3759987087402147, + "grad_norm": 0.7407249808311462, + "learning_rate": 0.00017519872498619385, + "loss": 2.6501, + "step": 4659 + }, + { + "epoch": 0.37607941247679766, + "grad_norm": 0.7251791954040527, + "learning_rate": 0.0001751883176789772, + "loss": 2.6786, + "step": 4660 + }, + { + "epoch": 0.3761601162133807, + "grad_norm": 0.7120431661605835, + "learning_rate": 0.00017517790849787116, + "loss": 2.7244, + "step": 4661 + }, + { + "epoch": 0.37624081994996367, + "grad_norm": 0.724836528301239, + "learning_rate": 0.00017516749744313513, + "loss": 2.7099, + "step": 4662 + }, + { + "epoch": 0.3763215236865467, + "grad_norm": 0.7788939476013184, + "learning_rate": 0.00017515708451502855, + "loss": 2.6206, + "step": 4663 + }, + { + "epoch": 0.3764022274231297, + "grad_norm": 0.7518914341926575, + "learning_rate": 0.00017514666971381099, + "loss": 2.7505, + "step": 4664 + }, + { + "epoch": 0.3764829311597127, + "grad_norm": 0.8004730939865112, + "learning_rate": 0.00017513625303974194, + "loss": 2.6119, + "step": 4665 + }, + { + "epoch": 0.3765636348962957, + "grad_norm": 0.7661109566688538, + "learning_rate": 0.00017512583449308107, + "loss": 2.724, + "step": 4666 + }, + { + "epoch": 0.3766443386328787, + "grad_norm": 0.7669692635536194, + "learning_rate": 0.00017511541407408805, + "loss": 2.7109, + "step": 4667 + }, + { + "epoch": 0.3767250423694617, + "grad_norm": 0.738608181476593, + "learning_rate": 0.00017510499178302253, + "loss": 2.6642, + "step": 4668 + }, + { + "epoch": 0.3768057461060447, + "grad_norm": 0.7194661498069763, + "learning_rate": 0.00017509456762014432, + "loss": 2.6906, + "step": 4669 + }, + { + "epoch": 0.3768864498426277, + "grad_norm": 0.7025040984153748, + "learning_rate": 0.00017508414158571314, + "loss": 2.6596, + "step": 4670 + }, + { + "epoch": 0.37696715357921073, + "grad_norm": 0.7756575345993042, + "learning_rate": 0.00017507371367998892, + "loss": 2.7114, + "step": 4671 + }, + { + "epoch": 0.3770478573157937, + "grad_norm": 0.834966778755188, + "learning_rate": 0.00017506328390323148, + "loss": 2.7554, + "step": 4672 + }, + { + "epoch": 0.37712856105237674, + "grad_norm": 0.6997280120849609, + "learning_rate": 0.0001750528522557008, + "loss": 2.6285, + "step": 4673 + }, + { + "epoch": 0.3772092647889597, + "grad_norm": 0.7101716995239258, + "learning_rate": 0.0001750424187376569, + "loss": 2.6465, + "step": 4674 + }, + { + "epoch": 0.37728996852554275, + "grad_norm": 0.6577222347259521, + "learning_rate": 0.0001750319833493597, + "loss": 2.6372, + "step": 4675 + }, + { + "epoch": 0.37737067226212573, + "grad_norm": 0.7402529120445251, + "learning_rate": 0.00017502154609106937, + "loss": 2.6464, + "step": 4676 + }, + { + "epoch": 0.37745137599870876, + "grad_norm": 0.6858490705490112, + "learning_rate": 0.00017501110696304596, + "loss": 2.6141, + "step": 4677 + }, + { + "epoch": 0.37753207973529174, + "grad_norm": 0.729468822479248, + "learning_rate": 0.0001750006659655497, + "loss": 2.6671, + "step": 4678 + }, + { + "epoch": 0.37761278347187477, + "grad_norm": 0.7197559475898743, + "learning_rate": 0.0001749902230988408, + "loss": 2.6462, + "step": 4679 + }, + { + "epoch": 0.37769348720845775, + "grad_norm": 0.7171144485473633, + "learning_rate": 0.00017497977836317957, + "loss": 2.6427, + "step": 4680 + }, + { + "epoch": 0.3777741909450408, + "grad_norm": 0.7423805594444275, + "learning_rate": 0.00017496933175882617, + "loss": 2.662, + "step": 4681 + }, + { + "epoch": 0.37785489468162375, + "grad_norm": 0.7498061060905457, + "learning_rate": 0.0001749588832860411, + "loss": 2.6243, + "step": 4682 + }, + { + "epoch": 0.3779355984182068, + "grad_norm": 0.7706165909767151, + "learning_rate": 0.0001749484329450847, + "loss": 2.6928, + "step": 4683 + }, + { + "epoch": 0.37801630215478976, + "grad_norm": 0.723363995552063, + "learning_rate": 0.00017493798073621745, + "loss": 2.6787, + "step": 4684 + }, + { + "epoch": 0.3780970058913728, + "grad_norm": 0.7444875836372375, + "learning_rate": 0.00017492752665969983, + "loss": 2.6789, + "step": 4685 + }, + { + "epoch": 0.37817770962795577, + "grad_norm": 0.6946491599082947, + "learning_rate": 0.00017491707071579237, + "loss": 2.6761, + "step": 4686 + }, + { + "epoch": 0.3782584133645388, + "grad_norm": 0.7171412706375122, + "learning_rate": 0.00017490661290475568, + "loss": 2.6788, + "step": 4687 + }, + { + "epoch": 0.3783391171011218, + "grad_norm": 0.7503272891044617, + "learning_rate": 0.00017489615322685038, + "loss": 2.7057, + "step": 4688 + }, + { + "epoch": 0.3784198208377048, + "grad_norm": 0.7458747625350952, + "learning_rate": 0.00017488569168233714, + "loss": 2.6857, + "step": 4689 + }, + { + "epoch": 0.3785005245742878, + "grad_norm": 0.7030516266822815, + "learning_rate": 0.0001748752282714768, + "loss": 2.6522, + "step": 4690 + }, + { + "epoch": 0.3785812283108708, + "grad_norm": 0.7717545628547668, + "learning_rate": 0.00017486476299452994, + "loss": 2.6527, + "step": 4691 + }, + { + "epoch": 0.3786619320474538, + "grad_norm": 0.6788322925567627, + "learning_rate": 0.0001748542958517575, + "loss": 2.6362, + "step": 4692 + }, + { + "epoch": 0.3787426357840368, + "grad_norm": 0.8518630266189575, + "learning_rate": 0.0001748438268434204, + "loss": 2.6812, + "step": 4693 + }, + { + "epoch": 0.3788233395206198, + "grad_norm": 0.7167141437530518, + "learning_rate": 0.00017483335596977945, + "loss": 2.6414, + "step": 4694 + }, + { + "epoch": 0.3789040432572028, + "grad_norm": 0.7748053073883057, + "learning_rate": 0.00017482288323109567, + "loss": 2.7291, + "step": 4695 + }, + { + "epoch": 0.3789847469937858, + "grad_norm": 0.7203041911125183, + "learning_rate": 0.00017481240862763002, + "loss": 2.6957, + "step": 4696 + }, + { + "epoch": 0.3790654507303688, + "grad_norm": 0.7973119020462036, + "learning_rate": 0.00017480193215964362, + "loss": 2.7456, + "step": 4697 + }, + { + "epoch": 0.3791461544669518, + "grad_norm": 0.7851223945617676, + "learning_rate": 0.00017479145382739755, + "loss": 2.6525, + "step": 4698 + }, + { + "epoch": 0.3792268582035348, + "grad_norm": 0.7012068629264832, + "learning_rate": 0.0001747809736311529, + "loss": 2.6662, + "step": 4699 + }, + { + "epoch": 0.37930756194011783, + "grad_norm": 0.7266128659248352, + "learning_rate": 0.00017477049157117093, + "loss": 2.5853, + "step": 4700 + }, + { + "epoch": 0.3793882656767008, + "grad_norm": 0.7264416217803955, + "learning_rate": 0.00017476000764771285, + "loss": 2.6972, + "step": 4701 + }, + { + "epoch": 0.37946896941328384, + "grad_norm": 0.797709047794342, + "learning_rate": 0.00017474952186103995, + "loss": 2.6997, + "step": 4702 + }, + { + "epoch": 0.3795496731498668, + "grad_norm": 0.7552568912506104, + "learning_rate": 0.00017473903421141358, + "loss": 2.7178, + "step": 4703 + }, + { + "epoch": 0.37963037688644985, + "grad_norm": 0.7611108422279358, + "learning_rate": 0.0001747285446990951, + "loss": 2.6997, + "step": 4704 + }, + { + "epoch": 0.3797110806230328, + "grad_norm": 0.8081753253936768, + "learning_rate": 0.00017471805332434595, + "loss": 2.7242, + "step": 4705 + }, + { + "epoch": 0.37979178435961586, + "grad_norm": 0.728301465511322, + "learning_rate": 0.0001747075600874276, + "loss": 2.5885, + "step": 4706 + }, + { + "epoch": 0.37987248809619883, + "grad_norm": 0.7548539638519287, + "learning_rate": 0.00017469706498860155, + "loss": 2.7038, + "step": 4707 + }, + { + "epoch": 0.37995319183278187, + "grad_norm": 0.7054354548454285, + "learning_rate": 0.00017468656802812938, + "loss": 2.6566, + "step": 4708 + }, + { + "epoch": 0.38003389556936484, + "grad_norm": 0.7231585383415222, + "learning_rate": 0.0001746760692062727, + "loss": 2.6564, + "step": 4709 + }, + { + "epoch": 0.3801145993059479, + "grad_norm": 0.6931934952735901, + "learning_rate": 0.00017466556852329318, + "loss": 2.6403, + "step": 4710 + }, + { + "epoch": 0.38019530304253085, + "grad_norm": 0.7882393598556519, + "learning_rate": 0.00017465506597945255, + "loss": 2.6337, + "step": 4711 + }, + { + "epoch": 0.3802760067791139, + "grad_norm": 0.7015109658241272, + "learning_rate": 0.0001746445615750125, + "loss": 2.6742, + "step": 4712 + }, + { + "epoch": 0.38035671051569686, + "grad_norm": 0.7653505802154541, + "learning_rate": 0.0001746340553102348, + "loss": 2.6742, + "step": 4713 + }, + { + "epoch": 0.3804374142522799, + "grad_norm": 0.7166270613670349, + "learning_rate": 0.0001746235471853814, + "loss": 2.5995, + "step": 4714 + }, + { + "epoch": 0.38051811798886287, + "grad_norm": 0.7612236738204956, + "learning_rate": 0.0001746130372007141, + "loss": 2.7595, + "step": 4715 + }, + { + "epoch": 0.3805988217254459, + "grad_norm": 0.6783852577209473, + "learning_rate": 0.00017460252535649493, + "loss": 2.6156, + "step": 4716 + }, + { + "epoch": 0.3806795254620289, + "grad_norm": 0.7495827078819275, + "learning_rate": 0.00017459201165298578, + "loss": 2.6847, + "step": 4717 + }, + { + "epoch": 0.3807602291986119, + "grad_norm": 0.814798891544342, + "learning_rate": 0.0001745814960904487, + "loss": 2.6211, + "step": 4718 + }, + { + "epoch": 0.3808409329351949, + "grad_norm": 0.7541367411613464, + "learning_rate": 0.0001745709786691458, + "loss": 2.6214, + "step": 4719 + }, + { + "epoch": 0.3809216366717779, + "grad_norm": 0.7065702676773071, + "learning_rate": 0.00017456045938933921, + "loss": 2.6699, + "step": 4720 + }, + { + "epoch": 0.3810023404083609, + "grad_norm": 0.751960813999176, + "learning_rate": 0.000174549938251291, + "loss": 2.6085, + "step": 4721 + }, + { + "epoch": 0.3810830441449439, + "grad_norm": 0.72068190574646, + "learning_rate": 0.00017453941525526353, + "loss": 2.6201, + "step": 4722 + }, + { + "epoch": 0.3811637478815269, + "grad_norm": 0.7201167941093445, + "learning_rate": 0.00017452889040151892, + "loss": 2.6775, + "step": 4723 + }, + { + "epoch": 0.38124445161810994, + "grad_norm": 0.7904958128929138, + "learning_rate": 0.00017451836369031956, + "loss": 2.7217, + "step": 4724 + }, + { + "epoch": 0.3813251553546929, + "grad_norm": 0.7096366882324219, + "learning_rate": 0.0001745078351219278, + "loss": 2.7004, + "step": 4725 + }, + { + "epoch": 0.38140585909127594, + "grad_norm": 0.6812441945075989, + "learning_rate": 0.00017449730469660602, + "loss": 2.6555, + "step": 4726 + }, + { + "epoch": 0.3814865628278589, + "grad_norm": 0.8037428855895996, + "learning_rate": 0.00017448677241461665, + "loss": 2.7094, + "step": 4727 + }, + { + "epoch": 0.38156726656444195, + "grad_norm": 0.7282679677009583, + "learning_rate": 0.00017447623827622223, + "loss": 2.6699, + "step": 4728 + }, + { + "epoch": 0.38164797030102493, + "grad_norm": 0.745705783367157, + "learning_rate": 0.00017446570228168523, + "loss": 2.6098, + "step": 4729 + }, + { + "epoch": 0.38172867403760796, + "grad_norm": 0.7098714113235474, + "learning_rate": 0.00017445516443126828, + "loss": 2.6628, + "step": 4730 + }, + { + "epoch": 0.38180937777419094, + "grad_norm": 0.7376620769500732, + "learning_rate": 0.00017444462472523405, + "loss": 2.7086, + "step": 4731 + }, + { + "epoch": 0.38189008151077397, + "grad_norm": 0.717800498008728, + "learning_rate": 0.00017443408316384512, + "loss": 2.6582, + "step": 4732 + }, + { + "epoch": 0.38197078524735695, + "grad_norm": 0.7061530947685242, + "learning_rate": 0.00017442353974736428, + "loss": 2.6817, + "step": 4733 + }, + { + "epoch": 0.38205148898394, + "grad_norm": 0.744667112827301, + "learning_rate": 0.0001744129944760543, + "loss": 2.6649, + "step": 4734 + }, + { + "epoch": 0.38213219272052296, + "grad_norm": 0.7302529215812683, + "learning_rate": 0.00017440244735017797, + "loss": 2.7313, + "step": 4735 + }, + { + "epoch": 0.382212896457106, + "grad_norm": 0.6845258474349976, + "learning_rate": 0.00017439189836999816, + "loss": 2.637, + "step": 4736 + }, + { + "epoch": 0.38229360019368896, + "grad_norm": 0.7060490250587463, + "learning_rate": 0.0001743813475357778, + "loss": 2.6674, + "step": 4737 + }, + { + "epoch": 0.382374303930272, + "grad_norm": 0.7146841287612915, + "learning_rate": 0.00017437079484777977, + "loss": 2.6607, + "step": 4738 + }, + { + "epoch": 0.382455007666855, + "grad_norm": 0.7107662558555603, + "learning_rate": 0.00017436024030626719, + "loss": 2.6777, + "step": 4739 + }, + { + "epoch": 0.382535711403438, + "grad_norm": 0.7356777191162109, + "learning_rate": 0.00017434968391150303, + "loss": 2.5801, + "step": 4740 + }, + { + "epoch": 0.382616415140021, + "grad_norm": 0.6839054226875305, + "learning_rate": 0.00017433912566375037, + "loss": 2.6319, + "step": 4741 + }, + { + "epoch": 0.382697118876604, + "grad_norm": 0.7049627900123596, + "learning_rate": 0.00017432856556327236, + "loss": 2.741, + "step": 4742 + }, + { + "epoch": 0.382777822613187, + "grad_norm": 0.7926551103591919, + "learning_rate": 0.00017431800361033224, + "loss": 2.64, + "step": 4743 + }, + { + "epoch": 0.38285852634976997, + "grad_norm": 0.734272301197052, + "learning_rate": 0.0001743074398051932, + "loss": 2.6575, + "step": 4744 + }, + { + "epoch": 0.382939230086353, + "grad_norm": 0.6959543824195862, + "learning_rate": 0.00017429687414811847, + "loss": 2.664, + "step": 4745 + }, + { + "epoch": 0.383019933822936, + "grad_norm": 0.7258255481719971, + "learning_rate": 0.00017428630663937148, + "loss": 2.6597, + "step": 4746 + }, + { + "epoch": 0.383100637559519, + "grad_norm": 0.8067473769187927, + "learning_rate": 0.0001742757372792155, + "loss": 2.6798, + "step": 4747 + }, + { + "epoch": 0.383181341296102, + "grad_norm": 0.7000626921653748, + "learning_rate": 0.000174265166067914, + "loss": 2.6561, + "step": 4748 + }, + { + "epoch": 0.383262045032685, + "grad_norm": 0.818914532661438, + "learning_rate": 0.00017425459300573045, + "loss": 2.6491, + "step": 4749 + }, + { + "epoch": 0.383342748769268, + "grad_norm": 0.7060543298721313, + "learning_rate": 0.00017424401809292833, + "loss": 2.6825, + "step": 4750 + }, + { + "epoch": 0.383423452505851, + "grad_norm": 0.893488883972168, + "learning_rate": 0.0001742334413297712, + "loss": 2.7201, + "step": 4751 + }, + { + "epoch": 0.383504156242434, + "grad_norm": 0.8131078481674194, + "learning_rate": 0.00017422286271652265, + "loss": 2.7828, + "step": 4752 + }, + { + "epoch": 0.38358485997901703, + "grad_norm": 0.7735587954521179, + "learning_rate": 0.00017421228225344634, + "loss": 2.6489, + "step": 4753 + }, + { + "epoch": 0.3836655637156, + "grad_norm": 0.713800311088562, + "learning_rate": 0.000174201699940806, + "loss": 2.6686, + "step": 4754 + }, + { + "epoch": 0.38374626745218304, + "grad_norm": 0.8246580362319946, + "learning_rate": 0.00017419111577886528, + "loss": 2.6771, + "step": 4755 + }, + { + "epoch": 0.383826971188766, + "grad_norm": 0.694542646408081, + "learning_rate": 0.00017418052976788805, + "loss": 2.6632, + "step": 4756 + }, + { + "epoch": 0.38390767492534905, + "grad_norm": 0.7200453281402588, + "learning_rate": 0.0001741699419081381, + "loss": 2.6386, + "step": 4757 + }, + { + "epoch": 0.38398837866193203, + "grad_norm": 0.7002073526382446, + "learning_rate": 0.00017415935219987933, + "loss": 2.6399, + "step": 4758 + }, + { + "epoch": 0.38406908239851506, + "grad_norm": 0.7056967616081238, + "learning_rate": 0.00017414876064337565, + "loss": 2.7048, + "step": 4759 + }, + { + "epoch": 0.38414978613509804, + "grad_norm": 0.7406448721885681, + "learning_rate": 0.000174138167238891, + "loss": 2.6256, + "step": 4760 + }, + { + "epoch": 0.38423048987168107, + "grad_norm": 0.7280529737472534, + "learning_rate": 0.00017412757198668945, + "loss": 2.6393, + "step": 4761 + }, + { + "epoch": 0.38431119360826405, + "grad_norm": 0.7626908421516418, + "learning_rate": 0.00017411697488703502, + "loss": 2.6717, + "step": 4762 + }, + { + "epoch": 0.3843918973448471, + "grad_norm": 0.716345489025116, + "learning_rate": 0.00017410637594019184, + "loss": 2.6457, + "step": 4763 + }, + { + "epoch": 0.38447260108143005, + "grad_norm": 0.8825077414512634, + "learning_rate": 0.00017409577514642405, + "loss": 2.7042, + "step": 4764 + }, + { + "epoch": 0.3845533048180131, + "grad_norm": 0.7301186919212341, + "learning_rate": 0.00017408517250599585, + "loss": 2.7065, + "step": 4765 + }, + { + "epoch": 0.38463400855459606, + "grad_norm": 0.8235788345336914, + "learning_rate": 0.0001740745680191715, + "loss": 2.6315, + "step": 4766 + }, + { + "epoch": 0.3847147122911791, + "grad_norm": 0.7355515956878662, + "learning_rate": 0.00017406396168621527, + "loss": 2.6939, + "step": 4767 + }, + { + "epoch": 0.38479541602776207, + "grad_norm": 0.6781682372093201, + "learning_rate": 0.0001740533535073915, + "loss": 2.6071, + "step": 4768 + }, + { + "epoch": 0.3848761197643451, + "grad_norm": 0.801191508769989, + "learning_rate": 0.0001740427434829646, + "loss": 2.6635, + "step": 4769 + }, + { + "epoch": 0.3849568235009281, + "grad_norm": 0.759682297706604, + "learning_rate": 0.00017403213161319903, + "loss": 2.6823, + "step": 4770 + }, + { + "epoch": 0.3850375272375111, + "grad_norm": 0.806498110294342, + "learning_rate": 0.00017402151789835916, + "loss": 2.7111, + "step": 4771 + }, + { + "epoch": 0.3851182309740941, + "grad_norm": 0.7677996158599854, + "learning_rate": 0.00017401090233870958, + "loss": 2.6701, + "step": 4772 + }, + { + "epoch": 0.3851989347106771, + "grad_norm": 0.7449933290481567, + "learning_rate": 0.00017400028493451487, + "loss": 2.7037, + "step": 4773 + }, + { + "epoch": 0.3852796384472601, + "grad_norm": 0.7506107091903687, + "learning_rate": 0.0001739896656860396, + "loss": 2.6587, + "step": 4774 + }, + { + "epoch": 0.38536034218384313, + "grad_norm": 0.8781036734580994, + "learning_rate": 0.00017397904459354844, + "loss": 2.7634, + "step": 4775 + }, + { + "epoch": 0.3854410459204261, + "grad_norm": 0.7067514657974243, + "learning_rate": 0.0001739684216573061, + "loss": 2.638, + "step": 4776 + }, + { + "epoch": 0.38552174965700914, + "grad_norm": 0.7742886543273926, + "learning_rate": 0.00017395779687757735, + "loss": 2.7043, + "step": 4777 + }, + { + "epoch": 0.3856024533935921, + "grad_norm": 0.7348291277885437, + "learning_rate": 0.00017394717025462697, + "loss": 2.7404, + "step": 4778 + }, + { + "epoch": 0.38568315713017515, + "grad_norm": 0.7449346780776978, + "learning_rate": 0.00017393654178871984, + "loss": 2.631, + "step": 4779 + }, + { + "epoch": 0.3857638608667581, + "grad_norm": 0.7191200256347656, + "learning_rate": 0.00017392591148012078, + "loss": 2.6776, + "step": 4780 + }, + { + "epoch": 0.38584456460334116, + "grad_norm": 0.7055533528327942, + "learning_rate": 0.00017391527932909476, + "loss": 2.6219, + "step": 4781 + }, + { + "epoch": 0.38592526833992413, + "grad_norm": 0.73755943775177, + "learning_rate": 0.0001739046453359068, + "loss": 2.6692, + "step": 4782 + }, + { + "epoch": 0.38600597207650716, + "grad_norm": 0.7469369769096375, + "learning_rate": 0.00017389400950082185, + "loss": 2.6572, + "step": 4783 + }, + { + "epoch": 0.38608667581309014, + "grad_norm": 0.7552534341812134, + "learning_rate": 0.00017388337182410504, + "loss": 2.6853, + "step": 4784 + }, + { + "epoch": 0.3861673795496732, + "grad_norm": 0.7453532814979553, + "learning_rate": 0.00017387273230602145, + "loss": 2.6601, + "step": 4785 + }, + { + "epoch": 0.38624808328625615, + "grad_norm": 0.7259301543235779, + "learning_rate": 0.0001738620909468363, + "loss": 2.6997, + "step": 4786 + }, + { + "epoch": 0.3863287870228392, + "grad_norm": 0.6970019936561584, + "learning_rate": 0.00017385144774681476, + "loss": 2.7497, + "step": 4787 + }, + { + "epoch": 0.38640949075942216, + "grad_norm": 0.7172032594680786, + "learning_rate": 0.00017384080270622208, + "loss": 2.7182, + "step": 4788 + }, + { + "epoch": 0.3864901944960052, + "grad_norm": 0.7184371948242188, + "learning_rate": 0.00017383015582532357, + "loss": 2.6358, + "step": 4789 + }, + { + "epoch": 0.38657089823258817, + "grad_norm": 0.7302096486091614, + "learning_rate": 0.00017381950710438458, + "loss": 2.6066, + "step": 4790 + }, + { + "epoch": 0.3866516019691712, + "grad_norm": 0.7043540477752686, + "learning_rate": 0.00017380885654367053, + "loss": 2.699, + "step": 4791 + }, + { + "epoch": 0.3867323057057542, + "grad_norm": 0.6919732689857483, + "learning_rate": 0.0001737982041434468, + "loss": 2.6025, + "step": 4792 + }, + { + "epoch": 0.3868130094423372, + "grad_norm": 0.7277705669403076, + "learning_rate": 0.00017378754990397894, + "loss": 2.6764, + "step": 4793 + }, + { + "epoch": 0.3868937131789202, + "grad_norm": 0.7546190619468689, + "learning_rate": 0.00017377689382553247, + "loss": 2.5865, + "step": 4794 + }, + { + "epoch": 0.38697441691550316, + "grad_norm": 0.7636401653289795, + "learning_rate": 0.00017376623590837294, + "loss": 2.6488, + "step": 4795 + }, + { + "epoch": 0.3870551206520862, + "grad_norm": 0.6945658922195435, + "learning_rate": 0.00017375557615276595, + "loss": 2.6739, + "step": 4796 + }, + { + "epoch": 0.38713582438866917, + "grad_norm": 0.7503637075424194, + "learning_rate": 0.00017374491455897722, + "loss": 2.6854, + "step": 4797 + }, + { + "epoch": 0.3872165281252522, + "grad_norm": 0.7457373142242432, + "learning_rate": 0.00017373425112727247, + "loss": 2.6659, + "step": 4798 + }, + { + "epoch": 0.3872972318618352, + "grad_norm": 0.7742534875869751, + "learning_rate": 0.0001737235858579174, + "loss": 2.6461, + "step": 4799 + }, + { + "epoch": 0.3873779355984182, + "grad_norm": 0.7397909760475159, + "learning_rate": 0.0001737129187511779, + "loss": 2.6779, + "step": 4800 + }, + { + "epoch": 0.3874586393350012, + "grad_norm": 0.7922031879425049, + "learning_rate": 0.00017370224980731974, + "loss": 2.6417, + "step": 4801 + }, + { + "epoch": 0.3875393430715842, + "grad_norm": 0.8503968715667725, + "learning_rate": 0.00017369157902660887, + "loss": 2.7063, + "step": 4802 + }, + { + "epoch": 0.3876200468081672, + "grad_norm": 0.7143701314926147, + "learning_rate": 0.00017368090640931125, + "loss": 2.6152, + "step": 4803 + }, + { + "epoch": 0.38770075054475023, + "grad_norm": 0.8016753196716309, + "learning_rate": 0.0001736702319556928, + "loss": 2.6005, + "step": 4804 + }, + { + "epoch": 0.3877814542813332, + "grad_norm": 0.7329538464546204, + "learning_rate": 0.00017365955566601962, + "loss": 2.6027, + "step": 4805 + }, + { + "epoch": 0.38786215801791624, + "grad_norm": 0.7005148530006409, + "learning_rate": 0.00017364887754055773, + "loss": 2.6585, + "step": 4806 + }, + { + "epoch": 0.3879428617544992, + "grad_norm": 0.7092769145965576, + "learning_rate": 0.00017363819757957333, + "loss": 2.6763, + "step": 4807 + }, + { + "epoch": 0.38802356549108225, + "grad_norm": 0.7475202679634094, + "learning_rate": 0.0001736275157833325, + "loss": 2.5969, + "step": 4808 + }, + { + "epoch": 0.3881042692276652, + "grad_norm": 0.822496235370636, + "learning_rate": 0.0001736168321521016, + "loss": 2.6758, + "step": 4809 + }, + { + "epoch": 0.38818497296424825, + "grad_norm": 0.7756842374801636, + "learning_rate": 0.0001736061466861467, + "loss": 2.6676, + "step": 4810 + }, + { + "epoch": 0.38826567670083123, + "grad_norm": 0.7192497849464417, + "learning_rate": 0.00017359545938573428, + "loss": 2.7045, + "step": 4811 + }, + { + "epoch": 0.38834638043741426, + "grad_norm": 0.7064149379730225, + "learning_rate": 0.00017358477025113063, + "loss": 2.6169, + "step": 4812 + }, + { + "epoch": 0.38842708417399724, + "grad_norm": 0.7297258973121643, + "learning_rate": 0.00017357407928260215, + "loss": 2.612, + "step": 4813 + }, + { + "epoch": 0.38850778791058027, + "grad_norm": 0.7011935114860535, + "learning_rate": 0.00017356338648041528, + "loss": 2.6507, + "step": 4814 + }, + { + "epoch": 0.38858849164716325, + "grad_norm": 0.7647256255149841, + "learning_rate": 0.00017355269184483651, + "loss": 2.6838, + "step": 4815 + }, + { + "epoch": 0.3886691953837463, + "grad_norm": 0.690182089805603, + "learning_rate": 0.0001735419953761324, + "loss": 2.6996, + "step": 4816 + }, + { + "epoch": 0.38874989912032926, + "grad_norm": 0.7142173647880554, + "learning_rate": 0.00017353129707456955, + "loss": 2.6705, + "step": 4817 + }, + { + "epoch": 0.3888306028569123, + "grad_norm": 0.801369309425354, + "learning_rate": 0.00017352059694041456, + "loss": 2.7002, + "step": 4818 + }, + { + "epoch": 0.38891130659349527, + "grad_norm": 0.7021649479866028, + "learning_rate": 0.0001735098949739341, + "loss": 2.7042, + "step": 4819 + }, + { + "epoch": 0.3889920103300783, + "grad_norm": 0.6802586317062378, + "learning_rate": 0.00017349919117539488, + "loss": 2.7186, + "step": 4820 + }, + { + "epoch": 0.3890727140666613, + "grad_norm": 0.7723212838172913, + "learning_rate": 0.0001734884855450637, + "loss": 2.608, + "step": 4821 + }, + { + "epoch": 0.3891534178032443, + "grad_norm": 0.7037193179130554, + "learning_rate": 0.00017347777808320735, + "loss": 2.6198, + "step": 4822 + }, + { + "epoch": 0.3892341215398273, + "grad_norm": 0.7172731161117554, + "learning_rate": 0.00017346706879009272, + "loss": 2.7037, + "step": 4823 + }, + { + "epoch": 0.3893148252764103, + "grad_norm": 0.7421539425849915, + "learning_rate": 0.00017345635766598667, + "loss": 2.6619, + "step": 4824 + }, + { + "epoch": 0.3893955290129933, + "grad_norm": 0.7587071061134338, + "learning_rate": 0.0001734456447111562, + "loss": 2.6229, + "step": 4825 + }, + { + "epoch": 0.3894762327495763, + "grad_norm": 0.6981459259986877, + "learning_rate": 0.00017343492992586822, + "loss": 2.5927, + "step": 4826 + }, + { + "epoch": 0.3895569364861593, + "grad_norm": 0.7628491520881653, + "learning_rate": 0.00017342421331038987, + "loss": 2.7047, + "step": 4827 + }, + { + "epoch": 0.38963764022274233, + "grad_norm": 0.8005064129829407, + "learning_rate": 0.00017341349486498818, + "loss": 2.6918, + "step": 4828 + }, + { + "epoch": 0.3897183439593253, + "grad_norm": 0.7756431102752686, + "learning_rate": 0.0001734027745899303, + "loss": 2.6621, + "step": 4829 + }, + { + "epoch": 0.38979904769590834, + "grad_norm": 0.7317833304405212, + "learning_rate": 0.00017339205248548338, + "loss": 2.7134, + "step": 4830 + }, + { + "epoch": 0.3898797514324913, + "grad_norm": 0.7293959259986877, + "learning_rate": 0.0001733813285519147, + "loss": 2.6865, + "step": 4831 + }, + { + "epoch": 0.38996045516907435, + "grad_norm": 0.7120299935340881, + "learning_rate": 0.00017337060278949147, + "loss": 2.6915, + "step": 4832 + }, + { + "epoch": 0.3900411589056573, + "grad_norm": 0.7255397439002991, + "learning_rate": 0.00017335987519848103, + "loss": 2.6671, + "step": 4833 + }, + { + "epoch": 0.39012186264224036, + "grad_norm": 0.7849408388137817, + "learning_rate": 0.0001733491457791507, + "loss": 2.6301, + "step": 4834 + }, + { + "epoch": 0.39020256637882333, + "grad_norm": 0.6998472809791565, + "learning_rate": 0.00017333841453176797, + "loss": 2.6587, + "step": 4835 + }, + { + "epoch": 0.39028327011540637, + "grad_norm": 0.7530023455619812, + "learning_rate": 0.00017332768145660024, + "loss": 2.7011, + "step": 4836 + }, + { + "epoch": 0.39036397385198934, + "grad_norm": 0.7251207828521729, + "learning_rate": 0.00017331694655391497, + "loss": 2.6416, + "step": 4837 + }, + { + "epoch": 0.3904446775885724, + "grad_norm": 0.7016854882240295, + "learning_rate": 0.00017330620982397975, + "loss": 2.7224, + "step": 4838 + }, + { + "epoch": 0.39052538132515535, + "grad_norm": 0.7253310084342957, + "learning_rate": 0.00017329547126706217, + "loss": 2.6747, + "step": 4839 + }, + { + "epoch": 0.3906060850617384, + "grad_norm": 0.7114601731300354, + "learning_rate": 0.00017328473088342987, + "loss": 2.6654, + "step": 4840 + }, + { + "epoch": 0.39068678879832136, + "grad_norm": 0.7773289680480957, + "learning_rate": 0.00017327398867335048, + "loss": 2.6625, + "step": 4841 + }, + { + "epoch": 0.3907674925349044, + "grad_norm": 0.7541868686676025, + "learning_rate": 0.00017326324463709175, + "loss": 2.667, + "step": 4842 + }, + { + "epoch": 0.39084819627148737, + "grad_norm": 0.8095890283584595, + "learning_rate": 0.00017325249877492147, + "loss": 2.706, + "step": 4843 + }, + { + "epoch": 0.3909289000080704, + "grad_norm": 0.7019474506378174, + "learning_rate": 0.00017324175108710742, + "loss": 2.6125, + "step": 4844 + }, + { + "epoch": 0.3910096037446534, + "grad_norm": 0.7055396437644958, + "learning_rate": 0.00017323100157391746, + "loss": 2.6373, + "step": 4845 + }, + { + "epoch": 0.39109030748123635, + "grad_norm": 0.7332476377487183, + "learning_rate": 0.00017322025023561955, + "loss": 2.6559, + "step": 4846 + }, + { + "epoch": 0.3911710112178194, + "grad_norm": 0.7740387916564941, + "learning_rate": 0.00017320949707248158, + "loss": 2.7341, + "step": 4847 + }, + { + "epoch": 0.39125171495440236, + "grad_norm": 0.7371044754981995, + "learning_rate": 0.0001731987420847716, + "loss": 2.7318, + "step": 4848 + }, + { + "epoch": 0.3913324186909854, + "grad_norm": 0.7897786498069763, + "learning_rate": 0.00017318798527275758, + "loss": 2.6759, + "step": 4849 + }, + { + "epoch": 0.39141312242756837, + "grad_norm": 0.7149896621704102, + "learning_rate": 0.0001731772266367077, + "loss": 2.7097, + "step": 4850 + }, + { + "epoch": 0.3914938261641514, + "grad_norm": 0.7824358344078064, + "learning_rate": 0.00017316646617689002, + "loss": 2.6376, + "step": 4851 + }, + { + "epoch": 0.3915745299007344, + "grad_norm": 0.7704496383666992, + "learning_rate": 0.00017315570389357272, + "loss": 2.6539, + "step": 4852 + }, + { + "epoch": 0.3916552336373174, + "grad_norm": 0.7489706873893738, + "learning_rate": 0.00017314493978702407, + "loss": 2.6716, + "step": 4853 + }, + { + "epoch": 0.3917359373739004, + "grad_norm": 0.7368690967559814, + "learning_rate": 0.00017313417385751234, + "loss": 2.7171, + "step": 4854 + }, + { + "epoch": 0.3918166411104834, + "grad_norm": 0.7215858697891235, + "learning_rate": 0.00017312340610530579, + "loss": 2.6306, + "step": 4855 + }, + { + "epoch": 0.3918973448470664, + "grad_norm": 0.7622217535972595, + "learning_rate": 0.00017311263653067285, + "loss": 2.6089, + "step": 4856 + }, + { + "epoch": 0.39197804858364943, + "grad_norm": 0.7317889332771301, + "learning_rate": 0.00017310186513388185, + "loss": 2.6831, + "step": 4857 + }, + { + "epoch": 0.3920587523202324, + "grad_norm": 0.894185483455658, + "learning_rate": 0.0001730910919152013, + "loss": 2.684, + "step": 4858 + }, + { + "epoch": 0.39213945605681544, + "grad_norm": 0.7313157916069031, + "learning_rate": 0.00017308031687489968, + "loss": 2.6465, + "step": 4859 + }, + { + "epoch": 0.3922201597933984, + "grad_norm": 0.7765825390815735, + "learning_rate": 0.00017306954001324552, + "loss": 2.6526, + "step": 4860 + }, + { + "epoch": 0.39230086352998145, + "grad_norm": 0.7171424031257629, + "learning_rate": 0.00017305876133050742, + "loss": 2.6212, + "step": 4861 + }, + { + "epoch": 0.3923815672665644, + "grad_norm": 0.7215112447738647, + "learning_rate": 0.000173047980826954, + "loss": 2.6329, + "step": 4862 + }, + { + "epoch": 0.39246227100314746, + "grad_norm": 0.7393578886985779, + "learning_rate": 0.00017303719850285396, + "loss": 2.7264, + "step": 4863 + }, + { + "epoch": 0.39254297473973043, + "grad_norm": 0.7620136737823486, + "learning_rate": 0.00017302641435847603, + "loss": 2.6686, + "step": 4864 + }, + { + "epoch": 0.39262367847631346, + "grad_norm": 0.7290963530540466, + "learning_rate": 0.00017301562839408893, + "loss": 2.578, + "step": 4865 + }, + { + "epoch": 0.39270438221289644, + "grad_norm": 0.6978541612625122, + "learning_rate": 0.00017300484060996153, + "loss": 2.6783, + "step": 4866 + }, + { + "epoch": 0.3927850859494795, + "grad_norm": 0.7212007641792297, + "learning_rate": 0.00017299405100636264, + "loss": 2.6282, + "step": 4867 + }, + { + "epoch": 0.39286578968606245, + "grad_norm": 0.757324755191803, + "learning_rate": 0.0001729832595835612, + "loss": 2.6933, + "step": 4868 + }, + { + "epoch": 0.3929464934226455, + "grad_norm": 0.7052869200706482, + "learning_rate": 0.00017297246634182618, + "loss": 2.7152, + "step": 4869 + }, + { + "epoch": 0.39302719715922846, + "grad_norm": 0.7326259016990662, + "learning_rate": 0.0001729616712814265, + "loss": 2.6792, + "step": 4870 + }, + { + "epoch": 0.3931079008958115, + "grad_norm": 0.7540302276611328, + "learning_rate": 0.00017295087440263128, + "loss": 2.6621, + "step": 4871 + }, + { + "epoch": 0.39318860463239447, + "grad_norm": 0.765454888343811, + "learning_rate": 0.00017294007570570956, + "loss": 2.7049, + "step": 4872 + }, + { + "epoch": 0.3932693083689775, + "grad_norm": 0.7303065061569214, + "learning_rate": 0.0001729292751909305, + "loss": 2.6867, + "step": 4873 + }, + { + "epoch": 0.3933500121055605, + "grad_norm": 0.7049854397773743, + "learning_rate": 0.00017291847285856325, + "loss": 2.7052, + "step": 4874 + }, + { + "epoch": 0.3934307158421435, + "grad_norm": 0.7199053764343262, + "learning_rate": 0.00017290766870887704, + "loss": 2.7195, + "step": 4875 + }, + { + "epoch": 0.3935114195787265, + "grad_norm": 0.7536180019378662, + "learning_rate": 0.00017289686274214118, + "loss": 2.6861, + "step": 4876 + }, + { + "epoch": 0.3935921233153095, + "grad_norm": 0.7295238971710205, + "learning_rate": 0.00017288605495862492, + "loss": 2.6684, + "step": 4877 + }, + { + "epoch": 0.3936728270518925, + "grad_norm": 0.7575719952583313, + "learning_rate": 0.00017287524535859763, + "loss": 2.6439, + "step": 4878 + }, + { + "epoch": 0.3937535307884755, + "grad_norm": 0.678909182548523, + "learning_rate": 0.00017286443394232874, + "loss": 2.6562, + "step": 4879 + }, + { + "epoch": 0.3938342345250585, + "grad_norm": 0.6908892393112183, + "learning_rate": 0.00017285362071008768, + "loss": 2.6364, + "step": 4880 + }, + { + "epoch": 0.39391493826164153, + "grad_norm": 0.7414079904556274, + "learning_rate": 0.00017284280566214397, + "loss": 2.5872, + "step": 4881 + }, + { + "epoch": 0.3939956419982245, + "grad_norm": 0.6824749112129211, + "learning_rate": 0.0001728319887987671, + "loss": 2.641, + "step": 4882 + }, + { + "epoch": 0.39407634573480754, + "grad_norm": 0.6908513903617859, + "learning_rate": 0.0001728211701202267, + "loss": 2.6977, + "step": 4883 + }, + { + "epoch": 0.3941570494713905, + "grad_norm": 0.7214735746383667, + "learning_rate": 0.0001728103496267924, + "loss": 2.5826, + "step": 4884 + }, + { + "epoch": 0.39423775320797355, + "grad_norm": 0.812781572341919, + "learning_rate": 0.00017279952731873385, + "loss": 2.6806, + "step": 4885 + }, + { + "epoch": 0.39431845694455653, + "grad_norm": 0.7610746026039124, + "learning_rate": 0.00017278870319632078, + "loss": 2.6046, + "step": 4886 + }, + { + "epoch": 0.39439916068113956, + "grad_norm": 0.7151652574539185, + "learning_rate": 0.00017277787725982293, + "loss": 2.6543, + "step": 4887 + }, + { + "epoch": 0.39447986441772254, + "grad_norm": 0.7293612360954285, + "learning_rate": 0.00017276704950951017, + "loss": 2.6384, + "step": 4888 + }, + { + "epoch": 0.39456056815430557, + "grad_norm": 0.8138254284858704, + "learning_rate": 0.00017275621994565233, + "loss": 2.7208, + "step": 4889 + }, + { + "epoch": 0.39464127189088855, + "grad_norm": 0.7557196021080017, + "learning_rate": 0.00017274538856851924, + "loss": 2.6571, + "step": 4890 + }, + { + "epoch": 0.3947219756274716, + "grad_norm": 0.7297266721725464, + "learning_rate": 0.00017273455537838097, + "loss": 2.6222, + "step": 4891 + }, + { + "epoch": 0.39480267936405455, + "grad_norm": 0.7838431596755981, + "learning_rate": 0.00017272372037550743, + "loss": 2.782, + "step": 4892 + }, + { + "epoch": 0.3948833831006376, + "grad_norm": 0.7799673676490784, + "learning_rate": 0.00017271288356016866, + "loss": 2.6658, + "step": 4893 + }, + { + "epoch": 0.39496408683722056, + "grad_norm": 0.8495545387268066, + "learning_rate": 0.0001727020449326348, + "loss": 2.6552, + "step": 4894 + }, + { + "epoch": 0.3950447905738036, + "grad_norm": 0.7317770719528198, + "learning_rate": 0.00017269120449317588, + "loss": 2.6616, + "step": 4895 + }, + { + "epoch": 0.39512549431038657, + "grad_norm": 0.7518885731697083, + "learning_rate": 0.00017268036224206217, + "loss": 2.6864, + "step": 4896 + }, + { + "epoch": 0.39520619804696955, + "grad_norm": 0.83487468957901, + "learning_rate": 0.00017266951817956382, + "loss": 2.7535, + "step": 4897 + }, + { + "epoch": 0.3952869017835526, + "grad_norm": 0.7440658211708069, + "learning_rate": 0.00017265867230595113, + "loss": 2.6584, + "step": 4898 + }, + { + "epoch": 0.39536760552013556, + "grad_norm": 0.7060485482215881, + "learning_rate": 0.00017264782462149438, + "loss": 2.6892, + "step": 4899 + }, + { + "epoch": 0.3954483092567186, + "grad_norm": 0.8410428166389465, + "learning_rate": 0.00017263697512646394, + "loss": 2.6425, + "step": 4900 + }, + { + "epoch": 0.39552901299330157, + "grad_norm": 0.757046639919281, + "learning_rate": 0.0001726261238211302, + "loss": 2.6159, + "step": 4901 + }, + { + "epoch": 0.3956097167298846, + "grad_norm": 0.7288908958435059, + "learning_rate": 0.00017261527070576365, + "loss": 2.6753, + "step": 4902 + }, + { + "epoch": 0.3956904204664676, + "grad_norm": 0.8194541335105896, + "learning_rate": 0.0001726044157806347, + "loss": 2.6673, + "step": 4903 + }, + { + "epoch": 0.3957711242030506, + "grad_norm": 0.7957740426063538, + "learning_rate": 0.00017259355904601393, + "loss": 2.6662, + "step": 4904 + }, + { + "epoch": 0.3958518279396336, + "grad_norm": 0.8790122270584106, + "learning_rate": 0.0001725827005021719, + "loss": 2.7513, + "step": 4905 + }, + { + "epoch": 0.3959325316762166, + "grad_norm": 0.7674984335899353, + "learning_rate": 0.00017257184014937924, + "loss": 2.6375, + "step": 4906 + }, + { + "epoch": 0.3960132354127996, + "grad_norm": 0.7250992655754089, + "learning_rate": 0.00017256097798790663, + "loss": 2.63, + "step": 4907 + }, + { + "epoch": 0.3960939391493826, + "grad_norm": 0.8578312397003174, + "learning_rate": 0.00017255011401802475, + "loss": 2.702, + "step": 4908 + }, + { + "epoch": 0.3961746428859656, + "grad_norm": 0.7365253567695618, + "learning_rate": 0.00017253924824000438, + "loss": 2.6156, + "step": 4909 + }, + { + "epoch": 0.39625534662254863, + "grad_norm": 0.7148925065994263, + "learning_rate": 0.00017252838065411633, + "loss": 2.6658, + "step": 4910 + }, + { + "epoch": 0.3963360503591316, + "grad_norm": 0.7517829537391663, + "learning_rate": 0.00017251751126063148, + "loss": 2.6347, + "step": 4911 + }, + { + "epoch": 0.39641675409571464, + "grad_norm": 0.7880864143371582, + "learning_rate": 0.00017250664005982066, + "loss": 2.7045, + "step": 4912 + }, + { + "epoch": 0.3964974578322976, + "grad_norm": 0.7460693120956421, + "learning_rate": 0.00017249576705195482, + "loss": 2.6976, + "step": 4913 + }, + { + "epoch": 0.39657816156888065, + "grad_norm": 0.7179895043373108, + "learning_rate": 0.00017248489223730496, + "loss": 2.6366, + "step": 4914 + }, + { + "epoch": 0.3966588653054636, + "grad_norm": 0.7737421989440918, + "learning_rate": 0.00017247401561614213, + "loss": 2.7116, + "step": 4915 + }, + { + "epoch": 0.39673956904204666, + "grad_norm": 0.8561483025550842, + "learning_rate": 0.0001724631371887374, + "loss": 2.6591, + "step": 4916 + }, + { + "epoch": 0.39682027277862963, + "grad_norm": 0.7616356611251831, + "learning_rate": 0.00017245225695536182, + "loss": 2.6436, + "step": 4917 + }, + { + "epoch": 0.39690097651521267, + "grad_norm": 0.7754645943641663, + "learning_rate": 0.0001724413749162866, + "loss": 2.6699, + "step": 4918 + }, + { + "epoch": 0.39698168025179564, + "grad_norm": 0.800165593624115, + "learning_rate": 0.000172430491071783, + "loss": 2.7155, + "step": 4919 + }, + { + "epoch": 0.3970623839883787, + "grad_norm": 0.8448799848556519, + "learning_rate": 0.00017241960542212223, + "loss": 2.6991, + "step": 4920 + }, + { + "epoch": 0.39714308772496165, + "grad_norm": 0.7106496095657349, + "learning_rate": 0.00017240871796757556, + "loss": 2.628, + "step": 4921 + }, + { + "epoch": 0.3972237914615447, + "grad_norm": 0.7332959175109863, + "learning_rate": 0.00017239782870841436, + "loss": 2.6159, + "step": 4922 + }, + { + "epoch": 0.39730449519812766, + "grad_norm": 0.7573551535606384, + "learning_rate": 0.00017238693764491002, + "loss": 2.67, + "step": 4923 + }, + { + "epoch": 0.3973851989347107, + "grad_norm": 0.7833136320114136, + "learning_rate": 0.00017237604477733399, + "loss": 2.7276, + "step": 4924 + }, + { + "epoch": 0.39746590267129367, + "grad_norm": 0.7233073711395264, + "learning_rate": 0.00017236515010595773, + "loss": 2.6654, + "step": 4925 + }, + { + "epoch": 0.3975466064078767, + "grad_norm": 0.7920324206352234, + "learning_rate": 0.00017235425363105273, + "loss": 2.7611, + "step": 4926 + }, + { + "epoch": 0.3976273101444597, + "grad_norm": 0.7096883058547974, + "learning_rate": 0.00017234335535289063, + "loss": 2.687, + "step": 4927 + }, + { + "epoch": 0.3977080138810427, + "grad_norm": 0.7231960296630859, + "learning_rate": 0.000172332455271743, + "loss": 2.6441, + "step": 4928 + }, + { + "epoch": 0.3977887176176257, + "grad_norm": 0.7852105498313904, + "learning_rate": 0.00017232155338788146, + "loss": 2.5948, + "step": 4929 + }, + { + "epoch": 0.3978694213542087, + "grad_norm": 0.788789689540863, + "learning_rate": 0.0001723106497015778, + "loss": 2.6797, + "step": 4930 + }, + { + "epoch": 0.3979501250907917, + "grad_norm": 0.7082793116569519, + "learning_rate": 0.00017229974421310377, + "loss": 2.6787, + "step": 4931 + }, + { + "epoch": 0.3980308288273747, + "grad_norm": 0.8157992362976074, + "learning_rate": 0.00017228883692273106, + "loss": 2.6367, + "step": 4932 + }, + { + "epoch": 0.3981115325639577, + "grad_norm": 0.7576673030853271, + "learning_rate": 0.00017227792783073157, + "loss": 2.6826, + "step": 4933 + }, + { + "epoch": 0.39819223630054074, + "grad_norm": 0.7225388884544373, + "learning_rate": 0.00017226701693737718, + "loss": 2.668, + "step": 4934 + }, + { + "epoch": 0.3982729400371237, + "grad_norm": 0.7029562592506409, + "learning_rate": 0.00017225610424293985, + "loss": 2.6613, + "step": 4935 + }, + { + "epoch": 0.39835364377370674, + "grad_norm": 0.73081374168396, + "learning_rate": 0.0001722451897476915, + "loss": 2.6378, + "step": 4936 + }, + { + "epoch": 0.3984343475102897, + "grad_norm": 0.744008481502533, + "learning_rate": 0.0001722342734519042, + "loss": 2.6501, + "step": 4937 + }, + { + "epoch": 0.39851505124687275, + "grad_norm": 0.7482618093490601, + "learning_rate": 0.00017222335535584996, + "loss": 2.7287, + "step": 4938 + }, + { + "epoch": 0.39859575498345573, + "grad_norm": 0.6487892866134644, + "learning_rate": 0.00017221243545980093, + "loss": 2.6417, + "step": 4939 + }, + { + "epoch": 0.39867645872003876, + "grad_norm": 0.7894789576530457, + "learning_rate": 0.00017220151376402923, + "loss": 2.7431, + "step": 4940 + }, + { + "epoch": 0.39875716245662174, + "grad_norm": 0.8232294321060181, + "learning_rate": 0.00017219059026880708, + "loss": 2.6824, + "step": 4941 + }, + { + "epoch": 0.39883786619320477, + "grad_norm": 0.6844691634178162, + "learning_rate": 0.00017217966497440668, + "loss": 2.6294, + "step": 4942 + }, + { + "epoch": 0.39891856992978775, + "grad_norm": 0.7245259881019592, + "learning_rate": 0.00017216873788110037, + "loss": 2.6815, + "step": 4943 + }, + { + "epoch": 0.3989992736663708, + "grad_norm": 0.7197226881980896, + "learning_rate": 0.00017215780898916045, + "loss": 2.725, + "step": 4944 + }, + { + "epoch": 0.39907997740295376, + "grad_norm": 0.8391285538673401, + "learning_rate": 0.00017214687829885934, + "loss": 2.6724, + "step": 4945 + }, + { + "epoch": 0.3991606811395368, + "grad_norm": 0.7357564568519592, + "learning_rate": 0.00017213594581046938, + "loss": 2.7052, + "step": 4946 + }, + { + "epoch": 0.39924138487611976, + "grad_norm": 0.7611483931541443, + "learning_rate": 0.00017212501152426312, + "loss": 2.7214, + "step": 4947 + }, + { + "epoch": 0.39932208861270274, + "grad_norm": 0.7314950227737427, + "learning_rate": 0.00017211407544051306, + "loss": 2.6594, + "step": 4948 + }, + { + "epoch": 0.3994027923492858, + "grad_norm": 0.774131178855896, + "learning_rate": 0.00017210313755949169, + "loss": 2.6812, + "step": 4949 + }, + { + "epoch": 0.39948349608586875, + "grad_norm": 0.707003116607666, + "learning_rate": 0.00017209219788147167, + "loss": 2.7334, + "step": 4950 + }, + { + "epoch": 0.3995641998224518, + "grad_norm": 0.8179643154144287, + "learning_rate": 0.0001720812564067256, + "loss": 2.6554, + "step": 4951 + }, + { + "epoch": 0.39964490355903476, + "grad_norm": 0.6572005152702332, + "learning_rate": 0.00017207031313552621, + "loss": 2.6423, + "step": 4952 + }, + { + "epoch": 0.3997256072956178, + "grad_norm": 0.7663072943687439, + "learning_rate": 0.00017205936806814623, + "loss": 2.689, + "step": 4953 + }, + { + "epoch": 0.39980631103220077, + "grad_norm": 0.7351107001304626, + "learning_rate": 0.00017204842120485846, + "loss": 2.631, + "step": 4954 + }, + { + "epoch": 0.3998870147687838, + "grad_norm": 0.7754253149032593, + "learning_rate": 0.00017203747254593564, + "loss": 2.6371, + "step": 4955 + }, + { + "epoch": 0.3999677185053668, + "grad_norm": 0.7471042275428772, + "learning_rate": 0.00017202652209165074, + "loss": 2.6542, + "step": 4956 + }, + { + "epoch": 0.4000484222419498, + "grad_norm": 0.7357343435287476, + "learning_rate": 0.00017201556984227664, + "loss": 2.6226, + "step": 4957 + }, + { + "epoch": 0.4001291259785328, + "grad_norm": 0.8096252679824829, + "learning_rate": 0.00017200461579808626, + "loss": 2.6458, + "step": 4958 + }, + { + "epoch": 0.4002098297151158, + "grad_norm": 0.7622970938682556, + "learning_rate": 0.0001719936599593526, + "loss": 2.7129, + "step": 4959 + }, + { + "epoch": 0.4002905334516988, + "grad_norm": 0.7374953627586365, + "learning_rate": 0.00017198270232634882, + "loss": 2.696, + "step": 4960 + }, + { + "epoch": 0.4003712371882818, + "grad_norm": 0.7897924184799194, + "learning_rate": 0.00017197174289934787, + "loss": 2.7508, + "step": 4961 + }, + { + "epoch": 0.4004519409248648, + "grad_norm": 0.7047984004020691, + "learning_rate": 0.00017196078167862298, + "loss": 2.6733, + "step": 4962 + }, + { + "epoch": 0.40053264466144783, + "grad_norm": 0.7866294980049133, + "learning_rate": 0.0001719498186644473, + "loss": 2.694, + "step": 4963 + }, + { + "epoch": 0.4006133483980308, + "grad_norm": 0.739923894405365, + "learning_rate": 0.00017193885385709409, + "loss": 2.7125, + "step": 4964 + }, + { + "epoch": 0.40069405213461384, + "grad_norm": 0.7506374716758728, + "learning_rate": 0.00017192788725683652, + "loss": 2.627, + "step": 4965 + }, + { + "epoch": 0.4007747558711968, + "grad_norm": 0.6591607928276062, + "learning_rate": 0.00017191691886394802, + "loss": 2.6723, + "step": 4966 + }, + { + "epoch": 0.40085545960777985, + "grad_norm": 0.7748788595199585, + "learning_rate": 0.00017190594867870192, + "loss": 2.6486, + "step": 4967 + }, + { + "epoch": 0.40093616334436283, + "grad_norm": 0.7518232464790344, + "learning_rate": 0.0001718949767013716, + "loss": 2.6879, + "step": 4968 + }, + { + "epoch": 0.40101686708094586, + "grad_norm": 0.7360039949417114, + "learning_rate": 0.00017188400293223052, + "loss": 2.6506, + "step": 4969 + }, + { + "epoch": 0.40109757081752884, + "grad_norm": 0.7217130064964294, + "learning_rate": 0.0001718730273715522, + "loss": 2.6263, + "step": 4970 + }, + { + "epoch": 0.40117827455411187, + "grad_norm": 0.7246078252792358, + "learning_rate": 0.00017186205001961015, + "loss": 2.6222, + "step": 4971 + }, + { + "epoch": 0.40125897829069485, + "grad_norm": 0.7566879391670227, + "learning_rate": 0.00017185107087667794, + "loss": 2.7003, + "step": 4972 + }, + { + "epoch": 0.4013396820272779, + "grad_norm": 0.7881271243095398, + "learning_rate": 0.00017184008994302924, + "loss": 2.6463, + "step": 4973 + }, + { + "epoch": 0.40142038576386085, + "grad_norm": 0.7307420372962952, + "learning_rate": 0.00017182910721893775, + "loss": 2.667, + "step": 4974 + }, + { + "epoch": 0.4015010895004439, + "grad_norm": 0.7088132500648499, + "learning_rate": 0.00017181812270467708, + "loss": 2.6073, + "step": 4975 + }, + { + "epoch": 0.40158179323702686, + "grad_norm": 0.7839647531509399, + "learning_rate": 0.0001718071364005211, + "loss": 2.6594, + "step": 4976 + }, + { + "epoch": 0.4016624969736099, + "grad_norm": 0.7472013235092163, + "learning_rate": 0.00017179614830674353, + "loss": 2.737, + "step": 4977 + }, + { + "epoch": 0.40174320071019287, + "grad_norm": 0.7241616249084473, + "learning_rate": 0.0001717851584236183, + "loss": 2.6615, + "step": 4978 + }, + { + "epoch": 0.4018239044467759, + "grad_norm": 0.7918941378593445, + "learning_rate": 0.00017177416675141929, + "loss": 2.6774, + "step": 4979 + }, + { + "epoch": 0.4019046081833589, + "grad_norm": 0.801003098487854, + "learning_rate": 0.00017176317329042039, + "loss": 2.6749, + "step": 4980 + }, + { + "epoch": 0.4019853119199419, + "grad_norm": 0.7556802034378052, + "learning_rate": 0.00017175217804089564, + "loss": 2.6197, + "step": 4981 + }, + { + "epoch": 0.4020660156565249, + "grad_norm": 0.7539604902267456, + "learning_rate": 0.00017174118100311904, + "loss": 2.6222, + "step": 4982 + }, + { + "epoch": 0.4021467193931079, + "grad_norm": 0.741436243057251, + "learning_rate": 0.0001717301821773647, + "loss": 2.6471, + "step": 4983 + }, + { + "epoch": 0.4022274231296909, + "grad_norm": 0.7449339628219604, + "learning_rate": 0.0001717191815639067, + "loss": 2.6448, + "step": 4984 + }, + { + "epoch": 0.40230812686627393, + "grad_norm": 0.7771497964859009, + "learning_rate": 0.0001717081791630192, + "loss": 2.673, + "step": 4985 + }, + { + "epoch": 0.4023888306028569, + "grad_norm": 0.6916669607162476, + "learning_rate": 0.00017169717497497646, + "loss": 2.6025, + "step": 4986 + }, + { + "epoch": 0.40246953433943994, + "grad_norm": 0.7373276948928833, + "learning_rate": 0.0001716861690000527, + "loss": 2.6783, + "step": 4987 + }, + { + "epoch": 0.4025502380760229, + "grad_norm": 0.7756158709526062, + "learning_rate": 0.0001716751612385222, + "loss": 2.7296, + "step": 4988 + }, + { + "epoch": 0.40263094181260595, + "grad_norm": 0.7725681066513062, + "learning_rate": 0.00017166415169065933, + "loss": 2.7169, + "step": 4989 + }, + { + "epoch": 0.4027116455491889, + "grad_norm": 0.7165024280548096, + "learning_rate": 0.00017165314035673846, + "loss": 2.677, + "step": 4990 + }, + { + "epoch": 0.40279234928577196, + "grad_norm": 0.8888981938362122, + "learning_rate": 0.00017164212723703404, + "loss": 2.7694, + "step": 4991 + }, + { + "epoch": 0.40287305302235493, + "grad_norm": 0.7439224720001221, + "learning_rate": 0.00017163111233182052, + "loss": 2.674, + "step": 4992 + }, + { + "epoch": 0.40295375675893796, + "grad_norm": 0.6948431730270386, + "learning_rate": 0.00017162009564137244, + "loss": 2.6595, + "step": 4993 + }, + { + "epoch": 0.40303446049552094, + "grad_norm": 0.7274380922317505, + "learning_rate": 0.00017160907716596438, + "loss": 2.649, + "step": 4994 + }, + { + "epoch": 0.403115164232104, + "grad_norm": 0.7127148509025574, + "learning_rate": 0.0001715980569058709, + "loss": 2.6883, + "step": 4995 + }, + { + "epoch": 0.40319586796868695, + "grad_norm": 0.7129155993461609, + "learning_rate": 0.00017158703486136668, + "loss": 2.6516, + "step": 4996 + }, + { + "epoch": 0.40327657170527, + "grad_norm": 0.7848126292228699, + "learning_rate": 0.00017157601103272646, + "loss": 2.6778, + "step": 4997 + }, + { + "epoch": 0.40335727544185296, + "grad_norm": 0.752268373966217, + "learning_rate": 0.0001715649854202249, + "loss": 2.7228, + "step": 4998 + }, + { + "epoch": 0.40343797917843593, + "grad_norm": 0.7750338912010193, + "learning_rate": 0.00017155395802413684, + "loss": 2.6338, + "step": 4999 + }, + { + "epoch": 0.40351868291501897, + "grad_norm": 0.7165457010269165, + "learning_rate": 0.00017154292884473713, + "loss": 2.6195, + "step": 5000 + }, + { + "epoch": 0.40351868291501897, + "eval_loss": 2.585501194000244, + "eval_runtime": 901.8519, + "eval_samples_per_second": 2.905, + "eval_steps_per_second": 0.485, + "step": 5000 + }, + { + "epoch": 0.40359938665160194, + "grad_norm": 0.8118943572044373, + "learning_rate": 0.00017153189788230062, + "loss": 2.6649, + "step": 5001 + }, + { + "epoch": 0.403680090388185, + "grad_norm": 0.722984790802002, + "learning_rate": 0.00017152086513710221, + "loss": 2.6929, + "step": 5002 + }, + { + "epoch": 0.40376079412476795, + "grad_norm": 0.700690507888794, + "learning_rate": 0.00017150983060941686, + "loss": 2.6368, + "step": 5003 + }, + { + "epoch": 0.403841497861351, + "grad_norm": 0.7331504225730896, + "learning_rate": 0.00017149879429951965, + "loss": 2.6826, + "step": 5004 + }, + { + "epoch": 0.40392220159793396, + "grad_norm": 0.7312643527984619, + "learning_rate": 0.00017148775620768553, + "loss": 2.6279, + "step": 5005 + }, + { + "epoch": 0.404002905334517, + "grad_norm": 0.7488462924957275, + "learning_rate": 0.00017147671633418972, + "loss": 2.6711, + "step": 5006 + }, + { + "epoch": 0.40408360907109997, + "grad_norm": 0.8620340824127197, + "learning_rate": 0.00017146567467930725, + "loss": 2.6637, + "step": 5007 + }, + { + "epoch": 0.404164312807683, + "grad_norm": 0.683907151222229, + "learning_rate": 0.00017145463124331335, + "loss": 2.6331, + "step": 5008 + }, + { + "epoch": 0.404245016544266, + "grad_norm": 0.7389389276504517, + "learning_rate": 0.0001714435860264833, + "loss": 2.7232, + "step": 5009 + }, + { + "epoch": 0.404325720280849, + "grad_norm": 0.7456515431404114, + "learning_rate": 0.00017143253902909228, + "loss": 2.6363, + "step": 5010 + }, + { + "epoch": 0.404406424017432, + "grad_norm": 0.7044962644577026, + "learning_rate": 0.0001714214902514157, + "loss": 2.6672, + "step": 5011 + }, + { + "epoch": 0.404487127754015, + "grad_norm": 0.7410328984260559, + "learning_rate": 0.00017141043969372887, + "loss": 2.6059, + "step": 5012 + }, + { + "epoch": 0.404567831490598, + "grad_norm": 0.6697140336036682, + "learning_rate": 0.00017139938735630722, + "loss": 2.7151, + "step": 5013 + }, + { + "epoch": 0.404648535227181, + "grad_norm": 0.746675431728363, + "learning_rate": 0.00017138833323942617, + "loss": 2.6792, + "step": 5014 + }, + { + "epoch": 0.404729238963764, + "grad_norm": 0.7724997401237488, + "learning_rate": 0.00017137727734336129, + "loss": 2.6234, + "step": 5015 + }, + { + "epoch": 0.40480994270034704, + "grad_norm": 0.8014429211616516, + "learning_rate": 0.00017136621966838805, + "loss": 2.6795, + "step": 5016 + }, + { + "epoch": 0.40489064643693, + "grad_norm": 0.6900430917739868, + "learning_rate": 0.00017135516021478205, + "loss": 2.7127, + "step": 5017 + }, + { + "epoch": 0.40497135017351304, + "grad_norm": 0.6648666858673096, + "learning_rate": 0.00017134409898281896, + "loss": 2.6564, + "step": 5018 + }, + { + "epoch": 0.405052053910096, + "grad_norm": 0.7054181098937988, + "learning_rate": 0.00017133303597277442, + "loss": 2.6652, + "step": 5019 + }, + { + "epoch": 0.40513275764667905, + "grad_norm": 0.6847733855247498, + "learning_rate": 0.00017132197118492414, + "loss": 2.6997, + "step": 5020 + }, + { + "epoch": 0.40521346138326203, + "grad_norm": 0.7047749757766724, + "learning_rate": 0.00017131090461954392, + "loss": 2.6752, + "step": 5021 + }, + { + "epoch": 0.40529416511984506, + "grad_norm": 0.7549976706504822, + "learning_rate": 0.00017129983627690957, + "loss": 2.6736, + "step": 5022 + }, + { + "epoch": 0.40537486885642804, + "grad_norm": 0.7436367273330688, + "learning_rate": 0.00017128876615729686, + "loss": 2.7189, + "step": 5023 + }, + { + "epoch": 0.40545557259301107, + "grad_norm": 0.6515071988105774, + "learning_rate": 0.00017127769426098177, + "loss": 2.6422, + "step": 5024 + }, + { + "epoch": 0.40553627632959405, + "grad_norm": 0.6960858702659607, + "learning_rate": 0.00017126662058824024, + "loss": 2.6619, + "step": 5025 + }, + { + "epoch": 0.4056169800661771, + "grad_norm": 0.8075968623161316, + "learning_rate": 0.0001712555451393482, + "loss": 2.6678, + "step": 5026 + }, + { + "epoch": 0.40569768380276006, + "grad_norm": 0.6864624619483948, + "learning_rate": 0.00017124446791458176, + "loss": 2.6331, + "step": 5027 + }, + { + "epoch": 0.4057783875393431, + "grad_norm": 0.7218763828277588, + "learning_rate": 0.0001712333889142169, + "loss": 2.6316, + "step": 5028 + }, + { + "epoch": 0.40585909127592606, + "grad_norm": 0.7024715542793274, + "learning_rate": 0.0001712223081385298, + "loss": 2.623, + "step": 5029 + }, + { + "epoch": 0.4059397950125091, + "grad_norm": 0.6681575775146484, + "learning_rate": 0.0001712112255877966, + "loss": 2.6786, + "step": 5030 + }, + { + "epoch": 0.4060204987490921, + "grad_norm": 0.7249817848205566, + "learning_rate": 0.0001712001412622935, + "loss": 2.6179, + "step": 5031 + }, + { + "epoch": 0.4061012024856751, + "grad_norm": 0.7178316116333008, + "learning_rate": 0.00017118905516229677, + "loss": 2.696, + "step": 5032 + }, + { + "epoch": 0.4061819062222581, + "grad_norm": 0.7838767766952515, + "learning_rate": 0.0001711779672880827, + "loss": 2.6881, + "step": 5033 + }, + { + "epoch": 0.4062626099588411, + "grad_norm": 0.799937903881073, + "learning_rate": 0.0001711668776399276, + "loss": 2.7587, + "step": 5034 + }, + { + "epoch": 0.4063433136954241, + "grad_norm": 0.7622246146202087, + "learning_rate": 0.0001711557862181079, + "loss": 2.6621, + "step": 5035 + }, + { + "epoch": 0.4064240174320071, + "grad_norm": 0.7158814072608948, + "learning_rate": 0.00017114469302290003, + "loss": 2.6421, + "step": 5036 + }, + { + "epoch": 0.4065047211685901, + "grad_norm": 0.7913404107093811, + "learning_rate": 0.0001711335980545804, + "loss": 2.6323, + "step": 5037 + }, + { + "epoch": 0.40658542490517313, + "grad_norm": 0.718325138092041, + "learning_rate": 0.00017112250131342556, + "loss": 2.6171, + "step": 5038 + }, + { + "epoch": 0.4066661286417561, + "grad_norm": 0.7793646454811096, + "learning_rate": 0.0001711114027997121, + "loss": 2.7494, + "step": 5039 + }, + { + "epoch": 0.40674683237833914, + "grad_norm": 0.7774816155433655, + "learning_rate": 0.00017110030251371656, + "loss": 2.5534, + "step": 5040 + }, + { + "epoch": 0.4068275361149221, + "grad_norm": 0.8547549247741699, + "learning_rate": 0.00017108920045571564, + "loss": 2.7155, + "step": 5041 + }, + { + "epoch": 0.40690823985150515, + "grad_norm": 0.7685851454734802, + "learning_rate": 0.000171078096625986, + "loss": 2.6109, + "step": 5042 + }, + { + "epoch": 0.4069889435880881, + "grad_norm": 0.7953611016273499, + "learning_rate": 0.00017106699102480445, + "loss": 2.7034, + "step": 5043 + }, + { + "epoch": 0.40706964732467116, + "grad_norm": 0.7550730109214783, + "learning_rate": 0.00017105588365244764, + "loss": 2.7026, + "step": 5044 + }, + { + "epoch": 0.40715035106125413, + "grad_norm": 0.7036548256874084, + "learning_rate": 0.0001710447745091925, + "loss": 2.6246, + "step": 5045 + }, + { + "epoch": 0.40723105479783717, + "grad_norm": 0.7154512405395508, + "learning_rate": 0.00017103366359531586, + "loss": 2.6592, + "step": 5046 + }, + { + "epoch": 0.40731175853442014, + "grad_norm": 0.7773932218551636, + "learning_rate": 0.00017102255091109463, + "loss": 2.6458, + "step": 5047 + }, + { + "epoch": 0.4073924622710032, + "grad_norm": 0.7458996176719666, + "learning_rate": 0.0001710114364568058, + "loss": 2.643, + "step": 5048 + }, + { + "epoch": 0.40747316600758615, + "grad_norm": 0.7465376257896423, + "learning_rate": 0.00017100032023272633, + "loss": 2.6677, + "step": 5049 + }, + { + "epoch": 0.40755386974416913, + "grad_norm": 0.7340850830078125, + "learning_rate": 0.0001709892022391333, + "loss": 2.6372, + "step": 5050 + }, + { + "epoch": 0.40763457348075216, + "grad_norm": 0.7189164757728577, + "learning_rate": 0.00017097808247630377, + "loss": 2.6524, + "step": 5051 + }, + { + "epoch": 0.40771527721733514, + "grad_norm": 0.6954184174537659, + "learning_rate": 0.0001709669609445149, + "loss": 2.7383, + "step": 5052 + }, + { + "epoch": 0.40779598095391817, + "grad_norm": 0.736409604549408, + "learning_rate": 0.00017095583764404384, + "loss": 2.6424, + "step": 5053 + }, + { + "epoch": 0.40787668469050115, + "grad_norm": 0.6773545742034912, + "learning_rate": 0.0001709447125751678, + "loss": 2.6557, + "step": 5054 + }, + { + "epoch": 0.4079573884270842, + "grad_norm": 0.718748927116394, + "learning_rate": 0.00017093358573816412, + "loss": 2.6884, + "step": 5055 + }, + { + "epoch": 0.40803809216366715, + "grad_norm": 0.8276848793029785, + "learning_rate": 0.00017092245713331002, + "loss": 2.6642, + "step": 5056 + }, + { + "epoch": 0.4081187959002502, + "grad_norm": 0.7694761157035828, + "learning_rate": 0.00017091132676088294, + "loss": 2.644, + "step": 5057 + }, + { + "epoch": 0.40819949963683316, + "grad_norm": 0.766724705696106, + "learning_rate": 0.0001709001946211602, + "loss": 2.6918, + "step": 5058 + }, + { + "epoch": 0.4082802033734162, + "grad_norm": 0.7067074775695801, + "learning_rate": 0.00017088906071441927, + "loss": 2.7228, + "step": 5059 + }, + { + "epoch": 0.40836090710999917, + "grad_norm": 0.7216899991035461, + "learning_rate": 0.00017087792504093767, + "loss": 2.7068, + "step": 5060 + }, + { + "epoch": 0.4084416108465822, + "grad_norm": 0.6728984713554382, + "learning_rate": 0.00017086678760099287, + "loss": 2.686, + "step": 5061 + }, + { + "epoch": 0.4085223145831652, + "grad_norm": 0.7546882033348083, + "learning_rate": 0.0001708556483948625, + "loss": 2.6907, + "step": 5062 + }, + { + "epoch": 0.4086030183197482, + "grad_norm": 0.7471179962158203, + "learning_rate": 0.00017084450742282416, + "loss": 2.6857, + "step": 5063 + }, + { + "epoch": 0.4086837220563312, + "grad_norm": 0.7879743576049805, + "learning_rate": 0.00017083336468515548, + "loss": 2.7224, + "step": 5064 + }, + { + "epoch": 0.4087644257929142, + "grad_norm": 0.691343367099762, + "learning_rate": 0.00017082222018213422, + "loss": 2.6561, + "step": 5065 + }, + { + "epoch": 0.4088451295294972, + "grad_norm": 0.7497386336326599, + "learning_rate": 0.00017081107391403805, + "loss": 2.6317, + "step": 5066 + }, + { + "epoch": 0.40892583326608023, + "grad_norm": 0.6846269965171814, + "learning_rate": 0.00017079992588114485, + "loss": 2.6522, + "step": 5067 + }, + { + "epoch": 0.4090065370026632, + "grad_norm": 0.7312905192375183, + "learning_rate": 0.0001707887760837324, + "loss": 2.588, + "step": 5068 + }, + { + "epoch": 0.40908724073924624, + "grad_norm": 0.6966867446899414, + "learning_rate": 0.00017077762452207866, + "loss": 2.6316, + "step": 5069 + }, + { + "epoch": 0.4091679444758292, + "grad_norm": 0.6882073283195496, + "learning_rate": 0.00017076647119646147, + "loss": 2.6977, + "step": 5070 + }, + { + "epoch": 0.40924864821241225, + "grad_norm": 0.7392483949661255, + "learning_rate": 0.00017075531610715884, + "loss": 2.6768, + "step": 5071 + }, + { + "epoch": 0.4093293519489952, + "grad_norm": 0.7311073541641235, + "learning_rate": 0.00017074415925444876, + "loss": 2.6628, + "step": 5072 + }, + { + "epoch": 0.40941005568557826, + "grad_norm": 0.6769934296607971, + "learning_rate": 0.00017073300063860934, + "loss": 2.6438, + "step": 5073 + }, + { + "epoch": 0.40949075942216123, + "grad_norm": 0.736456573009491, + "learning_rate": 0.00017072184025991862, + "loss": 2.6151, + "step": 5074 + }, + { + "epoch": 0.40957146315874426, + "grad_norm": 0.7026283740997314, + "learning_rate": 0.00017071067811865476, + "loss": 2.6726, + "step": 5075 + }, + { + "epoch": 0.40965216689532724, + "grad_norm": 0.6825234293937683, + "learning_rate": 0.00017069951421509597, + "loss": 2.6795, + "step": 5076 + }, + { + "epoch": 0.4097328706319103, + "grad_norm": 0.7243828773498535, + "learning_rate": 0.0001706883485495205, + "loss": 2.687, + "step": 5077 + }, + { + "epoch": 0.40981357436849325, + "grad_norm": 0.7300469875335693, + "learning_rate": 0.00017067718112220658, + "loss": 2.6268, + "step": 5078 + }, + { + "epoch": 0.4098942781050763, + "grad_norm": 0.698095440864563, + "learning_rate": 0.00017066601193343255, + "loss": 2.6461, + "step": 5079 + }, + { + "epoch": 0.40997498184165926, + "grad_norm": 0.7318777441978455, + "learning_rate": 0.00017065484098347677, + "loss": 2.6817, + "step": 5080 + }, + { + "epoch": 0.4100556855782423, + "grad_norm": 0.7681582570075989, + "learning_rate": 0.00017064366827261772, + "loss": 2.7309, + "step": 5081 + }, + { + "epoch": 0.41013638931482527, + "grad_norm": 0.7690179944038391, + "learning_rate": 0.0001706324938011337, + "loss": 2.6292, + "step": 5082 + }, + { + "epoch": 0.4102170930514083, + "grad_norm": 0.6745284199714661, + "learning_rate": 0.00017062131756930338, + "loss": 2.7133, + "step": 5083 + }, + { + "epoch": 0.4102977967879913, + "grad_norm": 0.7524279952049255, + "learning_rate": 0.00017061013957740518, + "loss": 2.6237, + "step": 5084 + }, + { + "epoch": 0.4103785005245743, + "grad_norm": 0.7813692092895508, + "learning_rate": 0.00017059895982571773, + "loss": 2.6953, + "step": 5085 + }, + { + "epoch": 0.4104592042611573, + "grad_norm": 0.7128829956054688, + "learning_rate": 0.00017058777831451967, + "loss": 2.6771, + "step": 5086 + }, + { + "epoch": 0.4105399079977403, + "grad_norm": 0.7249834537506104, + "learning_rate": 0.00017057659504408963, + "loss": 2.6376, + "step": 5087 + }, + { + "epoch": 0.4106206117343233, + "grad_norm": 0.7742593288421631, + "learning_rate": 0.00017056541001470637, + "loss": 2.6227, + "step": 5088 + }, + { + "epoch": 0.4107013154709063, + "grad_norm": 0.6994228959083557, + "learning_rate": 0.00017055422322664863, + "loss": 2.6573, + "step": 5089 + }, + { + "epoch": 0.4107820192074893, + "grad_norm": 0.7144249081611633, + "learning_rate": 0.00017054303468019518, + "loss": 2.6602, + "step": 5090 + }, + { + "epoch": 0.41086272294407233, + "grad_norm": 0.7695099711418152, + "learning_rate": 0.00017053184437562497, + "loss": 2.6516, + "step": 5091 + }, + { + "epoch": 0.4109434266806553, + "grad_norm": 0.7610031962394714, + "learning_rate": 0.00017052065231321678, + "loss": 2.6963, + "step": 5092 + }, + { + "epoch": 0.41102413041723834, + "grad_norm": 0.7117859721183777, + "learning_rate": 0.0001705094584932496, + "loss": 2.6954, + "step": 5093 + }, + { + "epoch": 0.4111048341538213, + "grad_norm": 0.7891486287117004, + "learning_rate": 0.00017049826291600244, + "loss": 2.7265, + "step": 5094 + }, + { + "epoch": 0.41118553789040435, + "grad_norm": 0.7347370386123657, + "learning_rate": 0.00017048706558175423, + "loss": 2.658, + "step": 5095 + }, + { + "epoch": 0.41126624162698733, + "grad_norm": 0.7541289925575256, + "learning_rate": 0.00017047586649078414, + "loss": 2.6596, + "step": 5096 + }, + { + "epoch": 0.41134694536357036, + "grad_norm": 0.7471255660057068, + "learning_rate": 0.00017046466564337118, + "loss": 2.7008, + "step": 5097 + }, + { + "epoch": 0.41142764910015334, + "grad_norm": 0.7566937208175659, + "learning_rate": 0.00017045346303979457, + "loss": 2.7006, + "step": 5098 + }, + { + "epoch": 0.41150835283673637, + "grad_norm": 0.6991304159164429, + "learning_rate": 0.00017044225868033353, + "loss": 2.6846, + "step": 5099 + }, + { + "epoch": 0.41158905657331935, + "grad_norm": 0.7286314368247986, + "learning_rate": 0.00017043105256526724, + "loss": 2.6219, + "step": 5100 + }, + { + "epoch": 0.4116697603099023, + "grad_norm": 0.6953727006912231, + "learning_rate": 0.000170419844694875, + "loss": 2.6093, + "step": 5101 + }, + { + "epoch": 0.41175046404648535, + "grad_norm": 0.6942756772041321, + "learning_rate": 0.00017040863506943615, + "loss": 2.6399, + "step": 5102 + }, + { + "epoch": 0.41183116778306833, + "grad_norm": 0.7513531446456909, + "learning_rate": 0.00017039742368923005, + "loss": 2.6187, + "step": 5103 + }, + { + "epoch": 0.41191187151965136, + "grad_norm": 0.7530633211135864, + "learning_rate": 0.00017038621055453617, + "loss": 2.6124, + "step": 5104 + }, + { + "epoch": 0.41199257525623434, + "grad_norm": 0.7487555146217346, + "learning_rate": 0.00017037499566563392, + "loss": 2.6331, + "step": 5105 + }, + { + "epoch": 0.41207327899281737, + "grad_norm": 0.7641858458518982, + "learning_rate": 0.00017036377902280282, + "loss": 2.6875, + "step": 5106 + }, + { + "epoch": 0.41215398272940035, + "grad_norm": 0.6962767839431763, + "learning_rate": 0.0001703525606263224, + "loss": 2.6538, + "step": 5107 + }, + { + "epoch": 0.4122346864659834, + "grad_norm": 0.8183409571647644, + "learning_rate": 0.0001703413404764723, + "loss": 2.6204, + "step": 5108 + }, + { + "epoch": 0.41231539020256636, + "grad_norm": 0.7029808759689331, + "learning_rate": 0.00017033011857353207, + "loss": 2.6369, + "step": 5109 + }, + { + "epoch": 0.4123960939391494, + "grad_norm": 0.7171663045883179, + "learning_rate": 0.00017031889491778149, + "loss": 2.6211, + "step": 5110 + }, + { + "epoch": 0.41247679767573237, + "grad_norm": 0.7456090450286865, + "learning_rate": 0.0001703076695095002, + "loss": 2.6574, + "step": 5111 + }, + { + "epoch": 0.4125575014123154, + "grad_norm": 0.7468575239181519, + "learning_rate": 0.000170296442348968, + "loss": 2.598, + "step": 5112 + }, + { + "epoch": 0.4126382051488984, + "grad_norm": 0.7106603384017944, + "learning_rate": 0.0001702852134364647, + "loss": 2.6577, + "step": 5113 + }, + { + "epoch": 0.4127189088854814, + "grad_norm": 0.7788330912590027, + "learning_rate": 0.00017027398277227017, + "loss": 2.6797, + "step": 5114 + }, + { + "epoch": 0.4127996126220644, + "grad_norm": 0.7794120907783508, + "learning_rate": 0.00017026275035666427, + "loss": 2.5834, + "step": 5115 + }, + { + "epoch": 0.4128803163586474, + "grad_norm": 0.7270684838294983, + "learning_rate": 0.00017025151618992702, + "loss": 2.7153, + "step": 5116 + }, + { + "epoch": 0.4129610200952304, + "grad_norm": 0.8169006109237671, + "learning_rate": 0.00017024028027233827, + "loss": 2.6786, + "step": 5117 + }, + { + "epoch": 0.4130417238318134, + "grad_norm": 0.8053112626075745, + "learning_rate": 0.00017022904260417815, + "loss": 2.6456, + "step": 5118 + }, + { + "epoch": 0.4131224275683964, + "grad_norm": 0.7646365165710449, + "learning_rate": 0.0001702178031857267, + "loss": 2.6784, + "step": 5119 + }, + { + "epoch": 0.41320313130497943, + "grad_norm": 0.7878902554512024, + "learning_rate": 0.00017020656201726406, + "loss": 2.66, + "step": 5120 + }, + { + "epoch": 0.4132838350415624, + "grad_norm": 0.8602383732795715, + "learning_rate": 0.00017019531909907037, + "loss": 2.7018, + "step": 5121 + }, + { + "epoch": 0.41336453877814544, + "grad_norm": 0.801092267036438, + "learning_rate": 0.00017018407443142585, + "loss": 2.7728, + "step": 5122 + }, + { + "epoch": 0.4134452425147284, + "grad_norm": 0.7372604012489319, + "learning_rate": 0.00017017282801461074, + "loss": 2.6588, + "step": 5123 + }, + { + "epoch": 0.41352594625131145, + "grad_norm": 0.7553830146789551, + "learning_rate": 0.0001701615798489053, + "loss": 2.6844, + "step": 5124 + }, + { + "epoch": 0.4136066499878944, + "grad_norm": 0.7699872255325317, + "learning_rate": 0.0001701503299345899, + "loss": 2.6523, + "step": 5125 + }, + { + "epoch": 0.41368735372447746, + "grad_norm": 0.7087047696113586, + "learning_rate": 0.0001701390782719449, + "loss": 2.6785, + "step": 5126 + }, + { + "epoch": 0.41376805746106043, + "grad_norm": 0.7835792303085327, + "learning_rate": 0.0001701278248612507, + "loss": 2.7064, + "step": 5127 + }, + { + "epoch": 0.41384876119764347, + "grad_norm": 0.7833154201507568, + "learning_rate": 0.0001701165697027878, + "loss": 2.6552, + "step": 5128 + }, + { + "epoch": 0.41392946493422644, + "grad_norm": 0.8240615725517273, + "learning_rate": 0.0001701053127968367, + "loss": 2.7074, + "step": 5129 + }, + { + "epoch": 0.4140101686708095, + "grad_norm": 0.7612149119377136, + "learning_rate": 0.0001700940541436779, + "loss": 2.7484, + "step": 5130 + }, + { + "epoch": 0.41409087240739245, + "grad_norm": 0.7795391082763672, + "learning_rate": 0.00017008279374359212, + "loss": 2.6022, + "step": 5131 + }, + { + "epoch": 0.4141715761439755, + "grad_norm": 0.7714587450027466, + "learning_rate": 0.00017007153159685992, + "loss": 2.6529, + "step": 5132 + }, + { + "epoch": 0.41425227988055846, + "grad_norm": 0.7821317911148071, + "learning_rate": 0.00017006026770376194, + "loss": 2.6356, + "step": 5133 + }, + { + "epoch": 0.4143329836171415, + "grad_norm": 0.7300596833229065, + "learning_rate": 0.00017004900206457897, + "loss": 2.6552, + "step": 5134 + }, + { + "epoch": 0.41441368735372447, + "grad_norm": 0.780505359172821, + "learning_rate": 0.00017003773467959174, + "loss": 2.675, + "step": 5135 + }, + { + "epoch": 0.4144943910903075, + "grad_norm": 0.7107391357421875, + "learning_rate": 0.00017002646554908107, + "loss": 2.7096, + "step": 5136 + }, + { + "epoch": 0.4145750948268905, + "grad_norm": 0.7358834743499756, + "learning_rate": 0.0001700151946733279, + "loss": 2.6619, + "step": 5137 + }, + { + "epoch": 0.4146557985634735, + "grad_norm": 0.7573859095573425, + "learning_rate": 0.00017000392205261298, + "loss": 2.6234, + "step": 5138 + }, + { + "epoch": 0.4147365023000565, + "grad_norm": 0.7032024264335632, + "learning_rate": 0.00016999264768721738, + "loss": 2.6096, + "step": 5139 + }, + { + "epoch": 0.4148172060366395, + "grad_norm": 0.743813693523407, + "learning_rate": 0.00016998137157742203, + "loss": 2.6782, + "step": 5140 + }, + { + "epoch": 0.4148979097732225, + "grad_norm": 0.8861347436904907, + "learning_rate": 0.00016997009372350793, + "loss": 2.6645, + "step": 5141 + }, + { + "epoch": 0.4149786135098055, + "grad_norm": 0.7598684430122375, + "learning_rate": 0.00016995881412575623, + "loss": 2.649, + "step": 5142 + }, + { + "epoch": 0.4150593172463885, + "grad_norm": 0.7535565495491028, + "learning_rate": 0.00016994753278444798, + "loss": 2.6449, + "step": 5143 + }, + { + "epoch": 0.41514002098297154, + "grad_norm": 0.7073138356208801, + "learning_rate": 0.0001699362496998644, + "loss": 2.6253, + "step": 5144 + }, + { + "epoch": 0.4152207247195545, + "grad_norm": 0.7161526679992676, + "learning_rate": 0.00016992496487228662, + "loss": 2.6623, + "step": 5145 + }, + { + "epoch": 0.41530142845613754, + "grad_norm": 0.8284714818000793, + "learning_rate": 0.00016991367830199595, + "loss": 2.7363, + "step": 5146 + }, + { + "epoch": 0.4153821321927205, + "grad_norm": 0.7127673625946045, + "learning_rate": 0.0001699023899892737, + "loss": 2.6274, + "step": 5147 + }, + { + "epoch": 0.41546283592930355, + "grad_norm": 0.7496370673179626, + "learning_rate": 0.00016989109993440112, + "loss": 2.6364, + "step": 5148 + }, + { + "epoch": 0.41554353966588653, + "grad_norm": 0.7616143822669983, + "learning_rate": 0.00016987980813765963, + "loss": 2.7225, + "step": 5149 + }, + { + "epoch": 0.41562424340246956, + "grad_norm": 0.6935909986495972, + "learning_rate": 0.00016986851459933067, + "loss": 2.6109, + "step": 5150 + }, + { + "epoch": 0.41570494713905254, + "grad_norm": 0.721023678779602, + "learning_rate": 0.00016985721931969566, + "loss": 2.6993, + "step": 5151 + }, + { + "epoch": 0.4157856508756355, + "grad_norm": 0.8216699361801147, + "learning_rate": 0.00016984592229903617, + "loss": 2.6512, + "step": 5152 + }, + { + "epoch": 0.41586635461221855, + "grad_norm": 0.7425234913825989, + "learning_rate": 0.00016983462353763372, + "loss": 2.5903, + "step": 5153 + }, + { + "epoch": 0.4159470583488015, + "grad_norm": 0.7292542457580566, + "learning_rate": 0.00016982332303576986, + "loss": 2.692, + "step": 5154 + }, + { + "epoch": 0.41602776208538456, + "grad_norm": 0.7466831803321838, + "learning_rate": 0.0001698120207937263, + "loss": 2.7145, + "step": 5155 + }, + { + "epoch": 0.41610846582196753, + "grad_norm": 0.7271949648857117, + "learning_rate": 0.00016980071681178471, + "loss": 2.655, + "step": 5156 + }, + { + "epoch": 0.41618916955855056, + "grad_norm": 0.7505547404289246, + "learning_rate": 0.00016978941109022677, + "loss": 2.7167, + "step": 5157 + }, + { + "epoch": 0.41626987329513354, + "grad_norm": 0.7307172417640686, + "learning_rate": 0.00016977810362933427, + "loss": 2.6735, + "step": 5158 + }, + { + "epoch": 0.4163505770317166, + "grad_norm": 0.7839170098304749, + "learning_rate": 0.00016976679442938904, + "loss": 2.6818, + "step": 5159 + }, + { + "epoch": 0.41643128076829955, + "grad_norm": 0.7131803631782532, + "learning_rate": 0.00016975548349067293, + "loss": 2.6921, + "step": 5160 + }, + { + "epoch": 0.4165119845048826, + "grad_norm": 0.8129798173904419, + "learning_rate": 0.0001697441708134678, + "loss": 2.6682, + "step": 5161 + }, + { + "epoch": 0.41659268824146556, + "grad_norm": 0.7634746432304382, + "learning_rate": 0.00016973285639805563, + "loss": 2.6684, + "step": 5162 + }, + { + "epoch": 0.4166733919780486, + "grad_norm": 0.7367348074913025, + "learning_rate": 0.0001697215402447184, + "loss": 2.6424, + "step": 5163 + }, + { + "epoch": 0.41675409571463157, + "grad_norm": 0.7235338687896729, + "learning_rate": 0.00016971022235373815, + "loss": 2.6817, + "step": 5164 + }, + { + "epoch": 0.4168347994512146, + "grad_norm": 0.7764291763305664, + "learning_rate": 0.0001696989027253969, + "loss": 2.6477, + "step": 5165 + }, + { + "epoch": 0.4169155031877976, + "grad_norm": 0.8207562565803528, + "learning_rate": 0.00016968758135997683, + "loss": 2.6408, + "step": 5166 + }, + { + "epoch": 0.4169962069243806, + "grad_norm": 0.7291484475135803, + "learning_rate": 0.00016967625825776005, + "loss": 2.6233, + "step": 5167 + }, + { + "epoch": 0.4170769106609636, + "grad_norm": 0.7060603499412537, + "learning_rate": 0.0001696649334190288, + "loss": 2.6204, + "step": 5168 + }, + { + "epoch": 0.4171576143975466, + "grad_norm": 0.7058241963386536, + "learning_rate": 0.00016965360684406528, + "loss": 2.6212, + "step": 5169 + }, + { + "epoch": 0.4172383181341296, + "grad_norm": 0.8248410224914551, + "learning_rate": 0.00016964227853315177, + "loss": 2.6688, + "step": 5170 + }, + { + "epoch": 0.4173190218707126, + "grad_norm": 0.7287606596946716, + "learning_rate": 0.0001696309484865707, + "loss": 2.6201, + "step": 5171 + }, + { + "epoch": 0.4173997256072956, + "grad_norm": 0.7214288115501404, + "learning_rate": 0.00016961961670460433, + "loss": 2.682, + "step": 5172 + }, + { + "epoch": 0.41748042934387863, + "grad_norm": 0.7133594155311584, + "learning_rate": 0.00016960828318753516, + "loss": 2.7167, + "step": 5173 + }, + { + "epoch": 0.4175611330804616, + "grad_norm": 0.6935842633247375, + "learning_rate": 0.00016959694793564558, + "loss": 2.6134, + "step": 5174 + }, + { + "epoch": 0.41764183681704464, + "grad_norm": 0.6863382458686829, + "learning_rate": 0.00016958561094921815, + "loss": 2.6396, + "step": 5175 + }, + { + "epoch": 0.4177225405536276, + "grad_norm": 0.7659433484077454, + "learning_rate": 0.0001695742722285354, + "loss": 2.6926, + "step": 5176 + }, + { + "epoch": 0.41780324429021065, + "grad_norm": 0.6997129917144775, + "learning_rate": 0.00016956293177387992, + "loss": 2.6983, + "step": 5177 + }, + { + "epoch": 0.41788394802679363, + "grad_norm": 0.6784526705741882, + "learning_rate": 0.00016955158958553433, + "loss": 2.6961, + "step": 5178 + }, + { + "epoch": 0.41796465176337666, + "grad_norm": 0.8227884769439697, + "learning_rate": 0.00016954024566378132, + "loss": 2.7008, + "step": 5179 + }, + { + "epoch": 0.41804535549995964, + "grad_norm": 0.7733054757118225, + "learning_rate": 0.0001695289000089036, + "loss": 2.6615, + "step": 5180 + }, + { + "epoch": 0.41812605923654267, + "grad_norm": 0.7077545523643494, + "learning_rate": 0.00016951755262118394, + "loss": 2.6388, + "step": 5181 + }, + { + "epoch": 0.41820676297312565, + "grad_norm": 0.7962050437927246, + "learning_rate": 0.00016950620350090513, + "loss": 2.7063, + "step": 5182 + }, + { + "epoch": 0.4182874667097087, + "grad_norm": 0.6950554847717285, + "learning_rate": 0.00016949485264835005, + "loss": 2.7076, + "step": 5183 + }, + { + "epoch": 0.41836817044629165, + "grad_norm": 0.8546960949897766, + "learning_rate": 0.00016948350006380162, + "loss": 2.6533, + "step": 5184 + }, + { + "epoch": 0.4184488741828747, + "grad_norm": 0.7469324469566345, + "learning_rate": 0.00016947214574754272, + "loss": 2.5884, + "step": 5185 + }, + { + "epoch": 0.41852957791945766, + "grad_norm": 0.7125554084777832, + "learning_rate": 0.0001694607896998563, + "loss": 2.6448, + "step": 5186 + }, + { + "epoch": 0.4186102816560407, + "grad_norm": 0.6998329758644104, + "learning_rate": 0.00016944943192102549, + "loss": 2.5569, + "step": 5187 + }, + { + "epoch": 0.41869098539262367, + "grad_norm": 0.9046749472618103, + "learning_rate": 0.00016943807241133328, + "loss": 2.7701, + "step": 5188 + }, + { + "epoch": 0.4187716891292067, + "grad_norm": 0.7842074036598206, + "learning_rate": 0.00016942671117106274, + "loss": 2.7124, + "step": 5189 + }, + { + "epoch": 0.4188523928657897, + "grad_norm": 0.7625874280929565, + "learning_rate": 0.00016941534820049713, + "loss": 2.6626, + "step": 5190 + }, + { + "epoch": 0.4189330966023727, + "grad_norm": 0.7006461024284363, + "learning_rate": 0.00016940398349991957, + "loss": 2.6283, + "step": 5191 + }, + { + "epoch": 0.4190138003389557, + "grad_norm": 0.7081875205039978, + "learning_rate": 0.00016939261706961332, + "loss": 2.69, + "step": 5192 + }, + { + "epoch": 0.4190945040755387, + "grad_norm": 0.7554503083229065, + "learning_rate": 0.00016938124890986166, + "loss": 2.641, + "step": 5193 + }, + { + "epoch": 0.4191752078121217, + "grad_norm": 0.7478535175323486, + "learning_rate": 0.0001693698790209479, + "loss": 2.7035, + "step": 5194 + }, + { + "epoch": 0.41925591154870473, + "grad_norm": 0.7323064208030701, + "learning_rate": 0.00016935850740315545, + "loss": 2.6713, + "step": 5195 + }, + { + "epoch": 0.4193366152852877, + "grad_norm": 0.8011505007743835, + "learning_rate": 0.00016934713405676764, + "loss": 2.6413, + "step": 5196 + }, + { + "epoch": 0.41941731902187074, + "grad_norm": 0.768851637840271, + "learning_rate": 0.00016933575898206804, + "loss": 2.6147, + "step": 5197 + }, + { + "epoch": 0.4194980227584537, + "grad_norm": 0.7255160808563232, + "learning_rate": 0.00016932438217934006, + "loss": 2.6093, + "step": 5198 + }, + { + "epoch": 0.41957872649503675, + "grad_norm": 0.7431769967079163, + "learning_rate": 0.00016931300364886722, + "loss": 2.6658, + "step": 5199 + }, + { + "epoch": 0.4196594302316197, + "grad_norm": 0.7532122731208801, + "learning_rate": 0.00016930162339093318, + "loss": 2.6371, + "step": 5200 + }, + { + "epoch": 0.41974013396820276, + "grad_norm": 0.7253943681716919, + "learning_rate": 0.00016929024140582152, + "loss": 2.6365, + "step": 5201 + }, + { + "epoch": 0.41982083770478573, + "grad_norm": 0.7323265075683594, + "learning_rate": 0.00016927885769381593, + "loss": 2.7096, + "step": 5202 + }, + { + "epoch": 0.4199015414413687, + "grad_norm": 0.7340009808540344, + "learning_rate": 0.00016926747225520008, + "loss": 2.6983, + "step": 5203 + }, + { + "epoch": 0.41998224517795174, + "grad_norm": 0.838706374168396, + "learning_rate": 0.00016925608509025776, + "loss": 2.7098, + "step": 5204 + }, + { + "epoch": 0.4200629489145347, + "grad_norm": 0.7320838570594788, + "learning_rate": 0.0001692446961992728, + "loss": 2.6767, + "step": 5205 + }, + { + "epoch": 0.42014365265111775, + "grad_norm": 0.7275335192680359, + "learning_rate": 0.00016923330558252898, + "loss": 2.6754, + "step": 5206 + }, + { + "epoch": 0.4202243563877007, + "grad_norm": 0.7572353482246399, + "learning_rate": 0.00016922191324031017, + "loss": 2.7076, + "step": 5207 + }, + { + "epoch": 0.42030506012428376, + "grad_norm": 0.7991098165512085, + "learning_rate": 0.0001692105191729004, + "loss": 2.7281, + "step": 5208 + }, + { + "epoch": 0.42038576386086673, + "grad_norm": 0.70769202709198, + "learning_rate": 0.00016919912338058356, + "loss": 2.684, + "step": 5209 + }, + { + "epoch": 0.42046646759744977, + "grad_norm": 0.6895349621772766, + "learning_rate": 0.0001691877258636436, + "loss": 2.6723, + "step": 5210 + }, + { + "epoch": 0.42054717133403274, + "grad_norm": 0.7368944883346558, + "learning_rate": 0.00016917632662236476, + "loss": 2.601, + "step": 5211 + }, + { + "epoch": 0.4206278750706158, + "grad_norm": 0.7122060060501099, + "learning_rate": 0.00016916492565703097, + "loss": 2.703, + "step": 5212 + }, + { + "epoch": 0.42070857880719875, + "grad_norm": 0.735251784324646, + "learning_rate": 0.00016915352296792646, + "loss": 2.7715, + "step": 5213 + }, + { + "epoch": 0.4207892825437818, + "grad_norm": 0.7686039805412292, + "learning_rate": 0.00016914211855533536, + "loss": 2.6935, + "step": 5214 + }, + { + "epoch": 0.42086998628036476, + "grad_norm": 0.8457472920417786, + "learning_rate": 0.00016913071241954195, + "loss": 2.6535, + "step": 5215 + }, + { + "epoch": 0.4209506900169478, + "grad_norm": 0.6913465261459351, + "learning_rate": 0.00016911930456083046, + "loss": 2.6453, + "step": 5216 + }, + { + "epoch": 0.42103139375353077, + "grad_norm": 0.6939878463745117, + "learning_rate": 0.00016910789497948524, + "loss": 2.6483, + "step": 5217 + }, + { + "epoch": 0.4211120974901138, + "grad_norm": 0.7240888476371765, + "learning_rate": 0.00016909648367579062, + "loss": 2.6649, + "step": 5218 + }, + { + "epoch": 0.4211928012266968, + "grad_norm": 0.7570972442626953, + "learning_rate": 0.00016908507065003102, + "loss": 2.6633, + "step": 5219 + }, + { + "epoch": 0.4212735049632798, + "grad_norm": 0.72161465883255, + "learning_rate": 0.00016907365590249082, + "loss": 2.6999, + "step": 5220 + }, + { + "epoch": 0.4213542086998628, + "grad_norm": 0.7818038463592529, + "learning_rate": 0.00016906223943345458, + "loss": 2.6478, + "step": 5221 + }, + { + "epoch": 0.4214349124364458, + "grad_norm": 0.7292464971542358, + "learning_rate": 0.00016905082124320684, + "loss": 2.6725, + "step": 5222 + }, + { + "epoch": 0.4215156161730288, + "grad_norm": 0.7612937092781067, + "learning_rate": 0.0001690394013320321, + "loss": 2.6474, + "step": 5223 + }, + { + "epoch": 0.4215963199096118, + "grad_norm": 0.7325131297111511, + "learning_rate": 0.000169027979700215, + "loss": 2.6525, + "step": 5224 + }, + { + "epoch": 0.4216770236461948, + "grad_norm": 0.7736644148826599, + "learning_rate": 0.00016901655634804022, + "loss": 2.662, + "step": 5225 + }, + { + "epoch": 0.42175772738277784, + "grad_norm": 0.758522629737854, + "learning_rate": 0.00016900513127579244, + "loss": 2.6558, + "step": 5226 + }, + { + "epoch": 0.4218384311193608, + "grad_norm": 0.7559491991996765, + "learning_rate": 0.00016899370448375642, + "loss": 2.7361, + "step": 5227 + }, + { + "epoch": 0.42191913485594384, + "grad_norm": 0.7791146039962769, + "learning_rate": 0.00016898227597221692, + "loss": 2.6739, + "step": 5228 + }, + { + "epoch": 0.4219998385925268, + "grad_norm": 0.7280717492103577, + "learning_rate": 0.00016897084574145878, + "loss": 2.6316, + "step": 5229 + }, + { + "epoch": 0.42208054232910985, + "grad_norm": 0.7455596327781677, + "learning_rate": 0.0001689594137917669, + "loss": 2.7244, + "step": 5230 + }, + { + "epoch": 0.42216124606569283, + "grad_norm": 0.7965813875198364, + "learning_rate": 0.00016894798012342613, + "loss": 2.6757, + "step": 5231 + }, + { + "epoch": 0.42224194980227586, + "grad_norm": 0.6740596294403076, + "learning_rate": 0.00016893654473672148, + "loss": 2.631, + "step": 5232 + }, + { + "epoch": 0.42232265353885884, + "grad_norm": 0.695105254650116, + "learning_rate": 0.00016892510763193795, + "loss": 2.6563, + "step": 5233 + }, + { + "epoch": 0.42240335727544187, + "grad_norm": 0.7623865008354187, + "learning_rate": 0.00016891366880936051, + "loss": 2.6738, + "step": 5234 + }, + { + "epoch": 0.42248406101202485, + "grad_norm": 0.7545912265777588, + "learning_rate": 0.00016890222826927435, + "loss": 2.6949, + "step": 5235 + }, + { + "epoch": 0.4225647647486079, + "grad_norm": 0.7280749678611755, + "learning_rate": 0.00016889078601196452, + "loss": 2.6571, + "step": 5236 + }, + { + "epoch": 0.42264546848519086, + "grad_norm": 0.6624523401260376, + "learning_rate": 0.00016887934203771625, + "loss": 2.6854, + "step": 5237 + }, + { + "epoch": 0.4227261722217739, + "grad_norm": 0.7835487127304077, + "learning_rate": 0.0001688678963468147, + "loss": 2.6437, + "step": 5238 + }, + { + "epoch": 0.42280687595835686, + "grad_norm": 0.7384940981864929, + "learning_rate": 0.00016885644893954518, + "loss": 2.6584, + "step": 5239 + }, + { + "epoch": 0.4228875796949399, + "grad_norm": 0.8227531313896179, + "learning_rate": 0.00016884499981619292, + "loss": 2.673, + "step": 5240 + }, + { + "epoch": 0.4229682834315229, + "grad_norm": 0.7442220449447632, + "learning_rate": 0.00016883354897704334, + "loss": 2.6729, + "step": 5241 + }, + { + "epoch": 0.4230489871681059, + "grad_norm": 0.7182636857032776, + "learning_rate": 0.00016882209642238175, + "loss": 2.6833, + "step": 5242 + }, + { + "epoch": 0.4231296909046889, + "grad_norm": 0.7061870098114014, + "learning_rate": 0.00016881064215249362, + "loss": 2.6696, + "step": 5243 + }, + { + "epoch": 0.4232103946412719, + "grad_norm": 0.6792885065078735, + "learning_rate": 0.00016879918616766445, + "loss": 2.6805, + "step": 5244 + }, + { + "epoch": 0.4232910983778549, + "grad_norm": 0.7439807057380676, + "learning_rate": 0.00016878772846817968, + "loss": 2.6522, + "step": 5245 + }, + { + "epoch": 0.4233718021144379, + "grad_norm": 0.7078969478607178, + "learning_rate": 0.00016877626905432492, + "loss": 2.6549, + "step": 5246 + }, + { + "epoch": 0.4234525058510209, + "grad_norm": 0.7103868126869202, + "learning_rate": 0.00016876480792638577, + "loss": 2.6812, + "step": 5247 + }, + { + "epoch": 0.42353320958760393, + "grad_norm": 0.7224452495574951, + "learning_rate": 0.00016875334508464782, + "loss": 2.6657, + "step": 5248 + }, + { + "epoch": 0.4236139133241869, + "grad_norm": 0.6885106563568115, + "learning_rate": 0.00016874188052939682, + "loss": 2.6421, + "step": 5249 + }, + { + "epoch": 0.42369461706076994, + "grad_norm": 0.6736720204353333, + "learning_rate": 0.00016873041426091845, + "loss": 2.6717, + "step": 5250 + }, + { + "epoch": 0.4237753207973529, + "grad_norm": 0.7597963809967041, + "learning_rate": 0.00016871894627949846, + "loss": 2.6231, + "step": 5251 + }, + { + "epoch": 0.42385602453393595, + "grad_norm": 0.8295687437057495, + "learning_rate": 0.00016870747658542275, + "loss": 2.6631, + "step": 5252 + }, + { + "epoch": 0.4239367282705189, + "grad_norm": 0.6750548481941223, + "learning_rate": 0.0001686960051789771, + "loss": 2.6997, + "step": 5253 + }, + { + "epoch": 0.4240174320071019, + "grad_norm": 0.7229160666465759, + "learning_rate": 0.0001686845320604474, + "loss": 2.6525, + "step": 5254 + }, + { + "epoch": 0.42409813574368493, + "grad_norm": 0.8318623900413513, + "learning_rate": 0.00016867305723011967, + "loss": 2.7774, + "step": 5255 + }, + { + "epoch": 0.4241788394802679, + "grad_norm": 0.8391026854515076, + "learning_rate": 0.00016866158068827979, + "loss": 2.6712, + "step": 5256 + }, + { + "epoch": 0.42425954321685094, + "grad_norm": 0.691146969795227, + "learning_rate": 0.00016865010243521388, + "loss": 2.6459, + "step": 5257 + }, + { + "epoch": 0.4243402469534339, + "grad_norm": 0.7223602533340454, + "learning_rate": 0.00016863862247120794, + "loss": 2.6675, + "step": 5258 + }, + { + "epoch": 0.42442095069001695, + "grad_norm": 0.8400631546974182, + "learning_rate": 0.0001686271407965481, + "loss": 2.6978, + "step": 5259 + }, + { + "epoch": 0.42450165442659993, + "grad_norm": 0.737684965133667, + "learning_rate": 0.0001686156574115205, + "loss": 2.6992, + "step": 5260 + }, + { + "epoch": 0.42458235816318296, + "grad_norm": 0.7511717677116394, + "learning_rate": 0.0001686041723164114, + "loss": 2.6947, + "step": 5261 + }, + { + "epoch": 0.42466306189976594, + "grad_norm": 0.7434492707252502, + "learning_rate": 0.00016859268551150698, + "loss": 2.7353, + "step": 5262 + }, + { + "epoch": 0.42474376563634897, + "grad_norm": 0.746609628200531, + "learning_rate": 0.00016858119699709353, + "loss": 2.7519, + "step": 5263 + }, + { + "epoch": 0.42482446937293195, + "grad_norm": 0.7709949612617493, + "learning_rate": 0.0001685697067734574, + "loss": 2.7018, + "step": 5264 + }, + { + "epoch": 0.424905173109515, + "grad_norm": 0.7496309876441956, + "learning_rate": 0.00016855821484088488, + "loss": 2.6761, + "step": 5265 + }, + { + "epoch": 0.42498587684609795, + "grad_norm": 0.7071252465248108, + "learning_rate": 0.00016854672119966243, + "loss": 2.6762, + "step": 5266 + }, + { + "epoch": 0.425066580582681, + "grad_norm": 0.7991356253623962, + "learning_rate": 0.00016853522585007658, + "loss": 2.6134, + "step": 5267 + }, + { + "epoch": 0.42514728431926396, + "grad_norm": 0.8194605708122253, + "learning_rate": 0.0001685237287924137, + "loss": 2.6601, + "step": 5268 + }, + { + "epoch": 0.425227988055847, + "grad_norm": 0.7451688051223755, + "learning_rate": 0.00016851223002696037, + "loss": 2.6631, + "step": 5269 + }, + { + "epoch": 0.42530869179242997, + "grad_norm": 0.7220263481140137, + "learning_rate": 0.0001685007295540032, + "loss": 2.6631, + "step": 5270 + }, + { + "epoch": 0.425389395529013, + "grad_norm": 0.7268854975700378, + "learning_rate": 0.00016848922737382874, + "loss": 2.6752, + "step": 5271 + }, + { + "epoch": 0.425470099265596, + "grad_norm": 0.8841642141342163, + "learning_rate": 0.00016847772348672378, + "loss": 2.7153, + "step": 5272 + }, + { + "epoch": 0.425550803002179, + "grad_norm": 0.7725942134857178, + "learning_rate": 0.00016846621789297489, + "loss": 2.6726, + "step": 5273 + }, + { + "epoch": 0.425631506738762, + "grad_norm": 0.7179448008537292, + "learning_rate": 0.00016845471059286887, + "loss": 2.6659, + "step": 5274 + }, + { + "epoch": 0.425712210475345, + "grad_norm": 0.7630325555801392, + "learning_rate": 0.00016844320158669257, + "loss": 2.7133, + "step": 5275 + }, + { + "epoch": 0.425792914211928, + "grad_norm": 0.7349739670753479, + "learning_rate": 0.00016843169087473272, + "loss": 2.6397, + "step": 5276 + }, + { + "epoch": 0.42587361794851103, + "grad_norm": 0.7670298218727112, + "learning_rate": 0.00016842017845727626, + "loss": 2.6485, + "step": 5277 + }, + { + "epoch": 0.425954321685094, + "grad_norm": 0.692095160484314, + "learning_rate": 0.00016840866433461013, + "loss": 2.6058, + "step": 5278 + }, + { + "epoch": 0.42603502542167704, + "grad_norm": 0.6888624429702759, + "learning_rate": 0.00016839714850702125, + "loss": 2.5757, + "step": 5279 + }, + { + "epoch": 0.42611572915826, + "grad_norm": 0.6816484332084656, + "learning_rate": 0.00016838563097479664, + "loss": 2.6656, + "step": 5280 + }, + { + "epoch": 0.42619643289484305, + "grad_norm": 0.7778486609458923, + "learning_rate": 0.00016837411173822333, + "loss": 2.6738, + "step": 5281 + }, + { + "epoch": 0.426277136631426, + "grad_norm": 0.73436439037323, + "learning_rate": 0.00016836259079758845, + "loss": 2.6346, + "step": 5282 + }, + { + "epoch": 0.42635784036800906, + "grad_norm": 0.673528254032135, + "learning_rate": 0.00016835106815317908, + "loss": 2.6636, + "step": 5283 + }, + { + "epoch": 0.42643854410459203, + "grad_norm": 0.6892737150192261, + "learning_rate": 0.00016833954380528242, + "loss": 2.6723, + "step": 5284 + }, + { + "epoch": 0.42651924784117506, + "grad_norm": 0.7404607534408569, + "learning_rate": 0.00016832801775418571, + "loss": 2.6751, + "step": 5285 + }, + { + "epoch": 0.42659995157775804, + "grad_norm": 0.7040587663650513, + "learning_rate": 0.00016831649000017618, + "loss": 2.6079, + "step": 5286 + }, + { + "epoch": 0.4266806553143411, + "grad_norm": 0.7295164465904236, + "learning_rate": 0.00016830496054354112, + "loss": 2.5928, + "step": 5287 + }, + { + "epoch": 0.42676135905092405, + "grad_norm": 0.7269962430000305, + "learning_rate": 0.00016829342938456788, + "loss": 2.6648, + "step": 5288 + }, + { + "epoch": 0.4268420627875071, + "grad_norm": 0.7296550273895264, + "learning_rate": 0.0001682818965235439, + "loss": 2.6814, + "step": 5289 + }, + { + "epoch": 0.42692276652409006, + "grad_norm": 0.8376085758209229, + "learning_rate": 0.00016827036196075655, + "loss": 2.702, + "step": 5290 + }, + { + "epoch": 0.4270034702606731, + "grad_norm": 0.7461032271385193, + "learning_rate": 0.00016825882569649332, + "loss": 2.6959, + "step": 5291 + }, + { + "epoch": 0.42708417399725607, + "grad_norm": 0.7218661308288574, + "learning_rate": 0.00016824728773104171, + "loss": 2.7182, + "step": 5292 + }, + { + "epoch": 0.4271648777338391, + "grad_norm": 0.7012860774993896, + "learning_rate": 0.00016823574806468933, + "loss": 2.6989, + "step": 5293 + }, + { + "epoch": 0.4272455814704221, + "grad_norm": 0.7039482593536377, + "learning_rate": 0.0001682242066977237, + "loss": 2.6153, + "step": 5294 + }, + { + "epoch": 0.4273262852070051, + "grad_norm": 0.8783851861953735, + "learning_rate": 0.0001682126636304325, + "loss": 2.7174, + "step": 5295 + }, + { + "epoch": 0.4274069889435881, + "grad_norm": 0.7266566157341003, + "learning_rate": 0.00016820111886310343, + "loss": 2.6571, + "step": 5296 + }, + { + "epoch": 0.4274876926801711, + "grad_norm": 0.7512212991714478, + "learning_rate": 0.0001681895723960242, + "loss": 2.6802, + "step": 5297 + }, + { + "epoch": 0.4275683964167541, + "grad_norm": 0.7786974310874939, + "learning_rate": 0.00016817802422948254, + "loss": 2.6514, + "step": 5298 + }, + { + "epoch": 0.4276491001533371, + "grad_norm": 0.7454531788825989, + "learning_rate": 0.00016816647436376634, + "loss": 2.6508, + "step": 5299 + }, + { + "epoch": 0.4277298038899201, + "grad_norm": 0.7542992830276489, + "learning_rate": 0.0001681549227991634, + "loss": 2.6455, + "step": 5300 + }, + { + "epoch": 0.42781050762650313, + "grad_norm": 0.7405722141265869, + "learning_rate": 0.0001681433695359616, + "loss": 2.6505, + "step": 5301 + }, + { + "epoch": 0.4278912113630861, + "grad_norm": 0.7120002508163452, + "learning_rate": 0.00016813181457444896, + "loss": 2.6652, + "step": 5302 + }, + { + "epoch": 0.42797191509966914, + "grad_norm": 0.7645997405052185, + "learning_rate": 0.00016812025791491334, + "loss": 2.6456, + "step": 5303 + }, + { + "epoch": 0.4280526188362521, + "grad_norm": 0.7214465141296387, + "learning_rate": 0.00016810869955764286, + "loss": 2.6261, + "step": 5304 + }, + { + "epoch": 0.4281333225728351, + "grad_norm": 0.7653367519378662, + "learning_rate": 0.00016809713950292551, + "loss": 2.7295, + "step": 5305 + }, + { + "epoch": 0.4282140263094181, + "grad_norm": 0.6798970103263855, + "learning_rate": 0.0001680855777510495, + "loss": 2.6549, + "step": 5306 + }, + { + "epoch": 0.4282947300460011, + "grad_norm": 0.7693684101104736, + "learning_rate": 0.00016807401430230288, + "loss": 2.7001, + "step": 5307 + }, + { + "epoch": 0.42837543378258414, + "grad_norm": 0.6962063312530518, + "learning_rate": 0.00016806244915697384, + "loss": 2.6582, + "step": 5308 + }, + { + "epoch": 0.4284561375191671, + "grad_norm": 0.7526959776878357, + "learning_rate": 0.00016805088231535068, + "loss": 2.7204, + "step": 5309 + }, + { + "epoch": 0.42853684125575014, + "grad_norm": 0.7403820753097534, + "learning_rate": 0.0001680393137777217, + "loss": 2.6505, + "step": 5310 + }, + { + "epoch": 0.4286175449923331, + "grad_norm": 0.7056909799575806, + "learning_rate": 0.00016802774354437506, + "loss": 2.5981, + "step": 5311 + }, + { + "epoch": 0.42869824872891615, + "grad_norm": 0.6756439805030823, + "learning_rate": 0.0001680161716155993, + "loss": 2.6845, + "step": 5312 + }, + { + "epoch": 0.42877895246549913, + "grad_norm": 0.7634297013282776, + "learning_rate": 0.0001680045979916827, + "loss": 2.6399, + "step": 5313 + }, + { + "epoch": 0.42885965620208216, + "grad_norm": 0.6793022751808167, + "learning_rate": 0.0001679930226729138, + "loss": 2.6808, + "step": 5314 + }, + { + "epoch": 0.42894035993866514, + "grad_norm": 0.7692369222640991, + "learning_rate": 0.00016798144565958103, + "loss": 2.673, + "step": 5315 + }, + { + "epoch": 0.42902106367524817, + "grad_norm": 0.668798565864563, + "learning_rate": 0.00016796986695197293, + "loss": 2.6465, + "step": 5316 + }, + { + "epoch": 0.42910176741183115, + "grad_norm": 0.719160795211792, + "learning_rate": 0.00016795828655037805, + "loss": 2.5876, + "step": 5317 + }, + { + "epoch": 0.4291824711484142, + "grad_norm": 0.7352864742279053, + "learning_rate": 0.000167946704455085, + "loss": 2.625, + "step": 5318 + }, + { + "epoch": 0.42926317488499716, + "grad_norm": 0.7103392481803894, + "learning_rate": 0.00016793512066638254, + "loss": 2.602, + "step": 5319 + }, + { + "epoch": 0.4293438786215802, + "grad_norm": 0.7005727291107178, + "learning_rate": 0.0001679235351845592, + "loss": 2.6723, + "step": 5320 + }, + { + "epoch": 0.42942458235816316, + "grad_norm": 0.7686243653297424, + "learning_rate": 0.00016791194800990387, + "loss": 2.693, + "step": 5321 + }, + { + "epoch": 0.4295052860947462, + "grad_norm": 0.7026933431625366, + "learning_rate": 0.00016790035914270526, + "loss": 2.6334, + "step": 5322 + }, + { + "epoch": 0.4295859898313292, + "grad_norm": 0.748938262462616, + "learning_rate": 0.0001678887685832522, + "loss": 2.6757, + "step": 5323 + }, + { + "epoch": 0.4296666935679122, + "grad_norm": 0.7753568887710571, + "learning_rate": 0.00016787717633183355, + "loss": 2.6782, + "step": 5324 + }, + { + "epoch": 0.4297473973044952, + "grad_norm": 0.7605767846107483, + "learning_rate": 0.00016786558238873823, + "loss": 2.6822, + "step": 5325 + }, + { + "epoch": 0.4298281010410782, + "grad_norm": 0.7516531348228455, + "learning_rate": 0.00016785398675425524, + "loss": 2.6802, + "step": 5326 + }, + { + "epoch": 0.4299088047776612, + "grad_norm": 0.7551677227020264, + "learning_rate": 0.0001678423894286735, + "loss": 2.6509, + "step": 5327 + }, + { + "epoch": 0.4299895085142442, + "grad_norm": 0.765364944934845, + "learning_rate": 0.00016783079041228206, + "loss": 2.6552, + "step": 5328 + }, + { + "epoch": 0.4300702122508272, + "grad_norm": 0.7016649842262268, + "learning_rate": 0.00016781918970537002, + "loss": 2.6861, + "step": 5329 + }, + { + "epoch": 0.43015091598741023, + "grad_norm": 0.7266311645507812, + "learning_rate": 0.0001678075873082265, + "loss": 2.7064, + "step": 5330 + }, + { + "epoch": 0.4302316197239932, + "grad_norm": 0.7414532899856567, + "learning_rate": 0.00016779598322114064, + "loss": 2.6273, + "step": 5331 + }, + { + "epoch": 0.43031232346057624, + "grad_norm": 0.7032443881034851, + "learning_rate": 0.00016778437744440167, + "loss": 2.6577, + "step": 5332 + }, + { + "epoch": 0.4303930271971592, + "grad_norm": 0.7150338888168335, + "learning_rate": 0.00016777276997829882, + "loss": 2.6586, + "step": 5333 + }, + { + "epoch": 0.43047373093374225, + "grad_norm": 0.6893971562385559, + "learning_rate": 0.0001677611608231214, + "loss": 2.6713, + "step": 5334 + }, + { + "epoch": 0.4305544346703252, + "grad_norm": 0.861935555934906, + "learning_rate": 0.00016774954997915867, + "loss": 2.7037, + "step": 5335 + }, + { + "epoch": 0.43063513840690826, + "grad_norm": 0.7140138745307922, + "learning_rate": 0.00016773793744670012, + "loss": 2.6684, + "step": 5336 + }, + { + "epoch": 0.43071584214349123, + "grad_norm": 0.7245929837226868, + "learning_rate": 0.00016772632322603506, + "loss": 2.6349, + "step": 5337 + }, + { + "epoch": 0.43079654588007427, + "grad_norm": 0.7216203808784485, + "learning_rate": 0.000167714707317453, + "loss": 2.6338, + "step": 5338 + }, + { + "epoch": 0.43087724961665724, + "grad_norm": 0.7076452374458313, + "learning_rate": 0.00016770308972124343, + "loss": 2.6614, + "step": 5339 + }, + { + "epoch": 0.4309579533532403, + "grad_norm": 0.7392035722732544, + "learning_rate": 0.00016769147043769586, + "loss": 2.6697, + "step": 5340 + }, + { + "epoch": 0.43103865708982325, + "grad_norm": 0.7235357761383057, + "learning_rate": 0.00016767984946709994, + "loss": 2.6664, + "step": 5341 + }, + { + "epoch": 0.4311193608264063, + "grad_norm": 0.6985526084899902, + "learning_rate": 0.00016766822680974524, + "loss": 2.6157, + "step": 5342 + }, + { + "epoch": 0.43120006456298926, + "grad_norm": 0.769963264465332, + "learning_rate": 0.0001676566024659214, + "loss": 2.6096, + "step": 5343 + }, + { + "epoch": 0.4312807682995723, + "grad_norm": 0.7504093050956726, + "learning_rate": 0.00016764497643591823, + "loss": 2.5795, + "step": 5344 + }, + { + "epoch": 0.43136147203615527, + "grad_norm": 0.7193379402160645, + "learning_rate": 0.0001676333487200254, + "loss": 2.6158, + "step": 5345 + }, + { + "epoch": 0.4314421757727383, + "grad_norm": 0.777357280254364, + "learning_rate": 0.00016762171931853273, + "loss": 2.6388, + "step": 5346 + }, + { + "epoch": 0.4315228795093213, + "grad_norm": 0.8590179085731506, + "learning_rate": 0.00016761008823173003, + "loss": 2.6597, + "step": 5347 + }, + { + "epoch": 0.4316035832459043, + "grad_norm": 0.7040170431137085, + "learning_rate": 0.0001675984554599072, + "loss": 2.6447, + "step": 5348 + }, + { + "epoch": 0.4316842869824873, + "grad_norm": 0.7682301998138428, + "learning_rate": 0.00016758682100335417, + "loss": 2.6738, + "step": 5349 + }, + { + "epoch": 0.4317649907190703, + "grad_norm": 0.8342414498329163, + "learning_rate": 0.00016757518486236087, + "loss": 2.7058, + "step": 5350 + }, + { + "epoch": 0.4318456944556533, + "grad_norm": 0.7410600781440735, + "learning_rate": 0.00016756354703721736, + "loss": 2.6597, + "step": 5351 + }, + { + "epoch": 0.4319263981922363, + "grad_norm": 0.7633174061775208, + "learning_rate": 0.00016755190752821363, + "loss": 2.6461, + "step": 5352 + }, + { + "epoch": 0.4320071019288193, + "grad_norm": 0.7855150103569031, + "learning_rate": 0.00016754026633563973, + "loss": 2.6556, + "step": 5353 + }, + { + "epoch": 0.43208780566540234, + "grad_norm": 0.7197602391242981, + "learning_rate": 0.00016752862345978587, + "loss": 2.6511, + "step": 5354 + }, + { + "epoch": 0.4321685094019853, + "grad_norm": 0.7748876810073853, + "learning_rate": 0.00016751697890094223, + "loss": 2.7, + "step": 5355 + }, + { + "epoch": 0.4322492131385683, + "grad_norm": 0.7457308173179626, + "learning_rate": 0.00016750533265939895, + "loss": 2.6934, + "step": 5356 + }, + { + "epoch": 0.4323299168751513, + "grad_norm": 0.8003394603729248, + "learning_rate": 0.00016749368473544633, + "loss": 2.6273, + "step": 5357 + }, + { + "epoch": 0.4324106206117343, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.00016748203512937464, + "loss": 2.6605, + "step": 5358 + }, + { + "epoch": 0.43249132434831733, + "grad_norm": 0.6859120726585388, + "learning_rate": 0.00016747038384147422, + "loss": 2.6748, + "step": 5359 + }, + { + "epoch": 0.4325720280849003, + "grad_norm": 0.7169440984725952, + "learning_rate": 0.0001674587308720355, + "loss": 2.6674, + "step": 5360 + }, + { + "epoch": 0.43265273182148334, + "grad_norm": 0.7762351036071777, + "learning_rate": 0.00016744707622134888, + "loss": 2.6673, + "step": 5361 + }, + { + "epoch": 0.4327334355580663, + "grad_norm": 0.7169542908668518, + "learning_rate": 0.0001674354198897048, + "loss": 2.7341, + "step": 5362 + }, + { + "epoch": 0.43281413929464935, + "grad_norm": 0.7903403043746948, + "learning_rate": 0.00016742376187739376, + "loss": 2.6019, + "step": 5363 + }, + { + "epoch": 0.4328948430312323, + "grad_norm": 0.8395403027534485, + "learning_rate": 0.00016741210218470634, + "loss": 2.6519, + "step": 5364 + }, + { + "epoch": 0.43297554676781536, + "grad_norm": 0.7521546483039856, + "learning_rate": 0.0001674004408119331, + "loss": 2.6067, + "step": 5365 + }, + { + "epoch": 0.43305625050439833, + "grad_norm": 0.7186779975891113, + "learning_rate": 0.0001673887777593647, + "loss": 2.6435, + "step": 5366 + }, + { + "epoch": 0.43313695424098136, + "grad_norm": 0.7362968921661377, + "learning_rate": 0.0001673771130272918, + "loss": 2.6031, + "step": 5367 + }, + { + "epoch": 0.43321765797756434, + "grad_norm": 0.8033537864685059, + "learning_rate": 0.0001673654466160051, + "loss": 2.7234, + "step": 5368 + }, + { + "epoch": 0.4332983617141474, + "grad_norm": 0.7109711766242981, + "learning_rate": 0.0001673537785257954, + "loss": 2.6621, + "step": 5369 + }, + { + "epoch": 0.43337906545073035, + "grad_norm": 0.7499226927757263, + "learning_rate": 0.0001673421087569535, + "loss": 2.706, + "step": 5370 + }, + { + "epoch": 0.4334597691873134, + "grad_norm": 0.7192875146865845, + "learning_rate": 0.00016733043730977017, + "loss": 2.6053, + "step": 5371 + }, + { + "epoch": 0.43354047292389636, + "grad_norm": 0.6939374208450317, + "learning_rate": 0.00016731876418453636, + "loss": 2.6621, + "step": 5372 + }, + { + "epoch": 0.4336211766604794, + "grad_norm": 0.720741331577301, + "learning_rate": 0.00016730708938154297, + "loss": 2.6358, + "step": 5373 + }, + { + "epoch": 0.43370188039706237, + "grad_norm": 0.6979780793190002, + "learning_rate": 0.00016729541290108095, + "loss": 2.6162, + "step": 5374 + }, + { + "epoch": 0.4337825841336454, + "grad_norm": 0.8014200925827026, + "learning_rate": 0.00016728373474344136, + "loss": 2.6255, + "step": 5375 + }, + { + "epoch": 0.4338632878702284, + "grad_norm": 0.7780057787895203, + "learning_rate": 0.0001672720549089152, + "loss": 2.6257, + "step": 5376 + }, + { + "epoch": 0.4339439916068114, + "grad_norm": 0.7111102938652039, + "learning_rate": 0.00016726037339779358, + "loss": 2.6384, + "step": 5377 + }, + { + "epoch": 0.4340246953433944, + "grad_norm": 0.7077106833457947, + "learning_rate": 0.00016724869021036764, + "loss": 2.6293, + "step": 5378 + }, + { + "epoch": 0.4341053990799774, + "grad_norm": 0.8328250646591187, + "learning_rate": 0.00016723700534692853, + "loss": 2.6186, + "step": 5379 + }, + { + "epoch": 0.4341861028165604, + "grad_norm": 0.6942149996757507, + "learning_rate": 0.00016722531880776752, + "loss": 2.6032, + "step": 5380 + }, + { + "epoch": 0.4342668065531434, + "grad_norm": 0.7180305123329163, + "learning_rate": 0.00016721363059317583, + "loss": 2.6166, + "step": 5381 + }, + { + "epoch": 0.4343475102897264, + "grad_norm": 0.8093443512916565, + "learning_rate": 0.00016720194070344476, + "loss": 2.6596, + "step": 5382 + }, + { + "epoch": 0.43442821402630943, + "grad_norm": 0.7337743043899536, + "learning_rate": 0.00016719024913886568, + "loss": 2.6137, + "step": 5383 + }, + { + "epoch": 0.4345089177628924, + "grad_norm": 0.7590384483337402, + "learning_rate": 0.00016717855589972993, + "loss": 2.6541, + "step": 5384 + }, + { + "epoch": 0.43458962149947544, + "grad_norm": 0.6945257186889648, + "learning_rate": 0.00016716686098632898, + "loss": 2.686, + "step": 5385 + }, + { + "epoch": 0.4346703252360584, + "grad_norm": 0.7175764441490173, + "learning_rate": 0.00016715516439895424, + "loss": 2.6081, + "step": 5386 + }, + { + "epoch": 0.43475102897264145, + "grad_norm": 0.7287259697914124, + "learning_rate": 0.00016714346613789732, + "loss": 2.6462, + "step": 5387 + }, + { + "epoch": 0.43483173270922443, + "grad_norm": 0.6864096522331238, + "learning_rate": 0.00016713176620344964, + "loss": 2.7104, + "step": 5388 + }, + { + "epoch": 0.43491243644580746, + "grad_norm": 0.6554383039474487, + "learning_rate": 0.00016712006459590289, + "loss": 2.6153, + "step": 5389 + }, + { + "epoch": 0.43499314018239044, + "grad_norm": 0.6415165662765503, + "learning_rate": 0.00016710836131554867, + "loss": 2.6198, + "step": 5390 + }, + { + "epoch": 0.43507384391897347, + "grad_norm": 0.6998475193977356, + "learning_rate": 0.00016709665636267869, + "loss": 2.6774, + "step": 5391 + }, + { + "epoch": 0.43515454765555645, + "grad_norm": 0.7437679171562195, + "learning_rate": 0.00016708494973758465, + "loss": 2.6176, + "step": 5392 + }, + { + "epoch": 0.4352352513921395, + "grad_norm": 0.6898311376571655, + "learning_rate": 0.00016707324144055825, + "loss": 2.6194, + "step": 5393 + }, + { + "epoch": 0.43531595512872245, + "grad_norm": 0.7536425590515137, + "learning_rate": 0.00016706153147189138, + "loss": 2.672, + "step": 5394 + }, + { + "epoch": 0.4353966588653055, + "grad_norm": 0.7576118111610413, + "learning_rate": 0.00016704981983187581, + "loss": 2.6473, + "step": 5395 + }, + { + "epoch": 0.43547736260188846, + "grad_norm": 0.7452495098114014, + "learning_rate": 0.00016703810652080349, + "loss": 2.6487, + "step": 5396 + }, + { + "epoch": 0.4355580663384715, + "grad_norm": 0.7817744612693787, + "learning_rate": 0.0001670263915389663, + "loss": 2.61, + "step": 5397 + }, + { + "epoch": 0.43563877007505447, + "grad_norm": 0.7195492386817932, + "learning_rate": 0.00016701467488665624, + "loss": 2.6745, + "step": 5398 + }, + { + "epoch": 0.4357194738116375, + "grad_norm": 0.7703930735588074, + "learning_rate": 0.0001670029565641653, + "loss": 2.7196, + "step": 5399 + }, + { + "epoch": 0.4358001775482205, + "grad_norm": 0.6859520673751831, + "learning_rate": 0.00016699123657178553, + "loss": 2.6317, + "step": 5400 + }, + { + "epoch": 0.4358808812848035, + "grad_norm": 0.7380268573760986, + "learning_rate": 0.00016697951490980903, + "loss": 2.6008, + "step": 5401 + }, + { + "epoch": 0.4359615850213865, + "grad_norm": 0.7903439402580261, + "learning_rate": 0.00016696779157852792, + "loss": 2.6411, + "step": 5402 + }, + { + "epoch": 0.4360422887579695, + "grad_norm": 0.7022606134414673, + "learning_rate": 0.0001669560665782344, + "loss": 2.6153, + "step": 5403 + }, + { + "epoch": 0.4361229924945525, + "grad_norm": 0.8196203112602234, + "learning_rate": 0.00016694433990922068, + "loss": 2.6128, + "step": 5404 + }, + { + "epoch": 0.43620369623113553, + "grad_norm": 0.7342696189880371, + "learning_rate": 0.000166932611571779, + "loss": 2.6802, + "step": 5405 + }, + { + "epoch": 0.4362843999677185, + "grad_norm": 0.7475131154060364, + "learning_rate": 0.0001669208815662017, + "loss": 2.6106, + "step": 5406 + }, + { + "epoch": 0.4363651037043015, + "grad_norm": 0.7067655324935913, + "learning_rate": 0.00016690914989278107, + "loss": 2.6362, + "step": 5407 + }, + { + "epoch": 0.4364458074408845, + "grad_norm": 0.7550163865089417, + "learning_rate": 0.00016689741655180956, + "loss": 2.6256, + "step": 5408 + }, + { + "epoch": 0.4365265111774675, + "grad_norm": 0.7341828346252441, + "learning_rate": 0.00016688568154357952, + "loss": 2.6912, + "step": 5409 + }, + { + "epoch": 0.4366072149140505, + "grad_norm": 0.7501869201660156, + "learning_rate": 0.00016687394486838349, + "loss": 2.7122, + "step": 5410 + }, + { + "epoch": 0.4366879186506335, + "grad_norm": 0.7041562795639038, + "learning_rate": 0.00016686220652651392, + "loss": 2.6755, + "step": 5411 + }, + { + "epoch": 0.43676862238721653, + "grad_norm": 0.7218217253684998, + "learning_rate": 0.00016685046651826338, + "loss": 2.693, + "step": 5412 + }, + { + "epoch": 0.4368493261237995, + "grad_norm": 0.6880577206611633, + "learning_rate": 0.00016683872484392448, + "loss": 2.638, + "step": 5413 + }, + { + "epoch": 0.43693002986038254, + "grad_norm": 0.6864475607872009, + "learning_rate": 0.0001668269815037898, + "loss": 2.6497, + "step": 5414 + }, + { + "epoch": 0.4370107335969655, + "grad_norm": 0.7326167821884155, + "learning_rate": 0.00016681523649815212, + "loss": 2.6858, + "step": 5415 + }, + { + "epoch": 0.43709143733354855, + "grad_norm": 0.6773428320884705, + "learning_rate": 0.00016680348982730405, + "loss": 2.6489, + "step": 5416 + }, + { + "epoch": 0.4371721410701315, + "grad_norm": 0.7117835283279419, + "learning_rate": 0.00016679174149153837, + "loss": 2.6607, + "step": 5417 + }, + { + "epoch": 0.43725284480671456, + "grad_norm": 0.7268334031105042, + "learning_rate": 0.00016677999149114793, + "loss": 2.703, + "step": 5418 + }, + { + "epoch": 0.43733354854329753, + "grad_norm": 0.7672972679138184, + "learning_rate": 0.00016676823982642554, + "loss": 2.5803, + "step": 5419 + }, + { + "epoch": 0.43741425227988057, + "grad_norm": 0.6966733932495117, + "learning_rate": 0.00016675648649766407, + "loss": 2.6149, + "step": 5420 + }, + { + "epoch": 0.43749495601646354, + "grad_norm": 0.752896249294281, + "learning_rate": 0.00016674473150515644, + "loss": 2.7108, + "step": 5421 + }, + { + "epoch": 0.4375756597530466, + "grad_norm": 0.7094796895980835, + "learning_rate": 0.00016673297484919565, + "loss": 2.6989, + "step": 5422 + }, + { + "epoch": 0.43765636348962955, + "grad_norm": 0.7631612420082092, + "learning_rate": 0.00016672121653007465, + "loss": 2.6673, + "step": 5423 + }, + { + "epoch": 0.4377370672262126, + "grad_norm": 0.7083843946456909, + "learning_rate": 0.00016670945654808655, + "loss": 2.6529, + "step": 5424 + }, + { + "epoch": 0.43781777096279556, + "grad_norm": 0.7291569709777832, + "learning_rate": 0.0001666976949035244, + "loss": 2.633, + "step": 5425 + }, + { + "epoch": 0.4378984746993786, + "grad_norm": 0.8351448774337769, + "learning_rate": 0.00016668593159668138, + "loss": 2.5993, + "step": 5426 + }, + { + "epoch": 0.43797917843596157, + "grad_norm": 0.7339642643928528, + "learning_rate": 0.00016667416662785058, + "loss": 2.6486, + "step": 5427 + }, + { + "epoch": 0.4380598821725446, + "grad_norm": 0.7257512211799622, + "learning_rate": 0.00016666239999732526, + "loss": 2.6453, + "step": 5428 + }, + { + "epoch": 0.4381405859091276, + "grad_norm": 0.7282476425170898, + "learning_rate": 0.00016665063170539872, + "loss": 2.6654, + "step": 5429 + }, + { + "epoch": 0.4382212896457106, + "grad_norm": 0.726685643196106, + "learning_rate": 0.00016663886175236417, + "loss": 2.65, + "step": 5430 + }, + { + "epoch": 0.4383019933822936, + "grad_norm": 0.7478880286216736, + "learning_rate": 0.000166627090138515, + "loss": 2.623, + "step": 5431 + }, + { + "epoch": 0.4383826971188766, + "grad_norm": 0.7624948024749756, + "learning_rate": 0.00016661531686414457, + "loss": 2.6438, + "step": 5432 + }, + { + "epoch": 0.4384634008554596, + "grad_norm": 0.8098936676979065, + "learning_rate": 0.00016660354192954633, + "loss": 2.6226, + "step": 5433 + }, + { + "epoch": 0.4385441045920426, + "grad_norm": 0.7305725812911987, + "learning_rate": 0.0001665917653350137, + "loss": 2.6425, + "step": 5434 + }, + { + "epoch": 0.4386248083286256, + "grad_norm": 0.7064421772956848, + "learning_rate": 0.00016657998708084027, + "loss": 2.6069, + "step": 5435 + }, + { + "epoch": 0.43870551206520864, + "grad_norm": 0.8279524445533752, + "learning_rate": 0.00016656820716731945, + "loss": 2.6609, + "step": 5436 + }, + { + "epoch": 0.4387862158017916, + "grad_norm": 0.742659866809845, + "learning_rate": 0.00016655642559474488, + "loss": 2.64, + "step": 5437 + }, + { + "epoch": 0.43886691953837464, + "grad_norm": 0.757780909538269, + "learning_rate": 0.00016654464236341026, + "loss": 2.6546, + "step": 5438 + }, + { + "epoch": 0.4389476232749576, + "grad_norm": 0.7439742684364319, + "learning_rate": 0.00016653285747360918, + "loss": 2.6717, + "step": 5439 + }, + { + "epoch": 0.43902832701154065, + "grad_norm": 0.7529581189155579, + "learning_rate": 0.0001665210709256354, + "loss": 2.6204, + "step": 5440 + }, + { + "epoch": 0.43910903074812363, + "grad_norm": 0.7224153876304626, + "learning_rate": 0.00016650928271978258, + "loss": 2.6417, + "step": 5441 + }, + { + "epoch": 0.43918973448470666, + "grad_norm": 0.6792185306549072, + "learning_rate": 0.00016649749285634462, + "loss": 2.6382, + "step": 5442 + }, + { + "epoch": 0.43927043822128964, + "grad_norm": 0.6887058019638062, + "learning_rate": 0.00016648570133561533, + "loss": 2.6302, + "step": 5443 + }, + { + "epoch": 0.43935114195787267, + "grad_norm": 0.7373671531677246, + "learning_rate": 0.00016647390815788853, + "loss": 2.625, + "step": 5444 + }, + { + "epoch": 0.43943184569445565, + "grad_norm": 0.7595719695091248, + "learning_rate": 0.0001664621133234582, + "loss": 2.6444, + "step": 5445 + }, + { + "epoch": 0.4395125494310387, + "grad_norm": 0.7331473231315613, + "learning_rate": 0.00016645031683261825, + "loss": 2.6308, + "step": 5446 + }, + { + "epoch": 0.43959325316762166, + "grad_norm": 0.7724922895431519, + "learning_rate": 0.0001664385186856627, + "loss": 2.6646, + "step": 5447 + }, + { + "epoch": 0.4396739569042047, + "grad_norm": 0.6960163712501526, + "learning_rate": 0.00016642671888288563, + "loss": 2.6196, + "step": 5448 + }, + { + "epoch": 0.43975466064078766, + "grad_norm": 0.6769189834594727, + "learning_rate": 0.00016641491742458103, + "loss": 2.6558, + "step": 5449 + }, + { + "epoch": 0.4398353643773707, + "grad_norm": 0.7435783743858337, + "learning_rate": 0.0001664031143110431, + "loss": 2.6717, + "step": 5450 + }, + { + "epoch": 0.4399160681139537, + "grad_norm": 0.7234118580818176, + "learning_rate": 0.00016639130954256603, + "loss": 2.6549, + "step": 5451 + }, + { + "epoch": 0.4399967718505367, + "grad_norm": 0.720825731754303, + "learning_rate": 0.00016637950311944392, + "loss": 2.6098, + "step": 5452 + }, + { + "epoch": 0.4400774755871197, + "grad_norm": 0.6977505087852478, + "learning_rate": 0.0001663676950419711, + "loss": 2.6351, + "step": 5453 + }, + { + "epoch": 0.4401581793237027, + "grad_norm": 0.6959076523780823, + "learning_rate": 0.00016635588531044185, + "loss": 2.6918, + "step": 5454 + }, + { + "epoch": 0.4402388830602857, + "grad_norm": 0.7022189497947693, + "learning_rate": 0.00016634407392515044, + "loss": 2.6218, + "step": 5455 + }, + { + "epoch": 0.4403195867968687, + "grad_norm": 0.7147775292396545, + "learning_rate": 0.0001663322608863913, + "loss": 2.6966, + "step": 5456 + }, + { + "epoch": 0.4404002905334517, + "grad_norm": 0.7592755556106567, + "learning_rate": 0.00016632044619445882, + "loss": 2.6326, + "step": 5457 + }, + { + "epoch": 0.4404809942700347, + "grad_norm": 0.6914302110671997, + "learning_rate": 0.00016630862984964745, + "loss": 2.603, + "step": 5458 + }, + { + "epoch": 0.4405616980066177, + "grad_norm": 0.7735368609428406, + "learning_rate": 0.0001662968118522517, + "loss": 2.6666, + "step": 5459 + }, + { + "epoch": 0.4406424017432007, + "grad_norm": 0.7175899744033813, + "learning_rate": 0.00016628499220256612, + "loss": 2.666, + "step": 5460 + }, + { + "epoch": 0.4407231054797837, + "grad_norm": 0.6735796332359314, + "learning_rate": 0.00016627317090088523, + "loss": 2.6451, + "step": 5461 + }, + { + "epoch": 0.4408038092163667, + "grad_norm": 0.72022545337677, + "learning_rate": 0.0001662613479475037, + "loss": 2.6295, + "step": 5462 + }, + { + "epoch": 0.4408845129529497, + "grad_norm": 0.7084751725196838, + "learning_rate": 0.00016624952334271616, + "loss": 2.6633, + "step": 5463 + }, + { + "epoch": 0.4409652166895327, + "grad_norm": 0.7399250864982605, + "learning_rate": 0.00016623769708681735, + "loss": 2.6076, + "step": 5464 + }, + { + "epoch": 0.44104592042611573, + "grad_norm": 0.6904892325401306, + "learning_rate": 0.00016622586918010193, + "loss": 2.6799, + "step": 5465 + }, + { + "epoch": 0.4411266241626987, + "grad_norm": 0.7419006824493408, + "learning_rate": 0.00016621403962286478, + "loss": 2.65, + "step": 5466 + }, + { + "epoch": 0.44120732789928174, + "grad_norm": 0.7201282978057861, + "learning_rate": 0.00016620220841540064, + "loss": 2.6769, + "step": 5467 + }, + { + "epoch": 0.4412880316358647, + "grad_norm": 0.7223218679428101, + "learning_rate": 0.00016619037555800443, + "loss": 2.6342, + "step": 5468 + }, + { + "epoch": 0.44136873537244775, + "grad_norm": 0.7517585754394531, + "learning_rate": 0.00016617854105097104, + "loss": 2.6103, + "step": 5469 + }, + { + "epoch": 0.44144943910903073, + "grad_norm": 0.6765139698982239, + "learning_rate": 0.0001661667048945954, + "loss": 2.624, + "step": 5470 + }, + { + "epoch": 0.44153014284561376, + "grad_norm": 0.7197677493095398, + "learning_rate": 0.00016615486708917255, + "loss": 2.5786, + "step": 5471 + }, + { + "epoch": 0.44161084658219674, + "grad_norm": 0.7196774482727051, + "learning_rate": 0.00016614302763499742, + "loss": 2.6147, + "step": 5472 + }, + { + "epoch": 0.44169155031877977, + "grad_norm": 0.7210293412208557, + "learning_rate": 0.00016613118653236518, + "loss": 2.6526, + "step": 5473 + }, + { + "epoch": 0.44177225405536275, + "grad_norm": 0.6870129108428955, + "learning_rate": 0.00016611934378157092, + "loss": 2.665, + "step": 5474 + }, + { + "epoch": 0.4418529577919458, + "grad_norm": 0.6925365328788757, + "learning_rate": 0.00016610749938290975, + "loss": 2.5734, + "step": 5475 + }, + { + "epoch": 0.44193366152852875, + "grad_norm": 0.7399131655693054, + "learning_rate": 0.0001660956533366769, + "loss": 2.6935, + "step": 5476 + }, + { + "epoch": 0.4420143652651118, + "grad_norm": 0.7348966002464294, + "learning_rate": 0.00016608380564316758, + "loss": 2.6788, + "step": 5477 + }, + { + "epoch": 0.44209506900169476, + "grad_norm": 0.7597334980964661, + "learning_rate": 0.00016607195630267708, + "loss": 2.6732, + "step": 5478 + }, + { + "epoch": 0.4421757727382778, + "grad_norm": 0.6847043037414551, + "learning_rate": 0.00016606010531550072, + "loss": 2.6475, + "step": 5479 + }, + { + "epoch": 0.44225647647486077, + "grad_norm": 0.7065151929855347, + "learning_rate": 0.00016604825268193388, + "loss": 2.6674, + "step": 5480 + }, + { + "epoch": 0.4423371802114438, + "grad_norm": 0.7102208137512207, + "learning_rate": 0.0001660363984022719, + "loss": 2.6723, + "step": 5481 + }, + { + "epoch": 0.4424178839480268, + "grad_norm": 0.6912767887115479, + "learning_rate": 0.00016602454247681024, + "loss": 2.628, + "step": 5482 + }, + { + "epoch": 0.4424985876846098, + "grad_norm": 0.7265123128890991, + "learning_rate": 0.0001660126849058444, + "loss": 2.5935, + "step": 5483 + }, + { + "epoch": 0.4425792914211928, + "grad_norm": 0.8177923560142517, + "learning_rate": 0.0001660008256896699, + "loss": 2.6402, + "step": 5484 + }, + { + "epoch": 0.4426599951577758, + "grad_norm": 0.7196556925773621, + "learning_rate": 0.00016598896482858231, + "loss": 2.6939, + "step": 5485 + }, + { + "epoch": 0.4427406988943588, + "grad_norm": 0.7459850907325745, + "learning_rate": 0.0001659771023228772, + "loss": 2.6343, + "step": 5486 + }, + { + "epoch": 0.44282140263094183, + "grad_norm": 0.7399095892906189, + "learning_rate": 0.00016596523817285024, + "loss": 2.6139, + "step": 5487 + }, + { + "epoch": 0.4429021063675248, + "grad_norm": 0.7517558336257935, + "learning_rate": 0.0001659533723787971, + "loss": 2.6609, + "step": 5488 + }, + { + "epoch": 0.44298281010410784, + "grad_norm": 0.7073537707328796, + "learning_rate": 0.00016594150494101355, + "loss": 2.6326, + "step": 5489 + }, + { + "epoch": 0.4430635138406908, + "grad_norm": 0.7414752244949341, + "learning_rate": 0.0001659296358597953, + "loss": 2.6759, + "step": 5490 + }, + { + "epoch": 0.44314421757727385, + "grad_norm": 0.7636380195617676, + "learning_rate": 0.0001659177651354382, + "loss": 2.5743, + "step": 5491 + }, + { + "epoch": 0.4432249213138568, + "grad_norm": 0.6839539408683777, + "learning_rate": 0.00016590589276823804, + "loss": 2.631, + "step": 5492 + }, + { + "epoch": 0.44330562505043986, + "grad_norm": 0.8057516813278198, + "learning_rate": 0.0001658940187584908, + "loss": 2.6916, + "step": 5493 + }, + { + "epoch": 0.44338632878702283, + "grad_norm": 0.7479767799377441, + "learning_rate": 0.00016588214310649232, + "loss": 2.6811, + "step": 5494 + }, + { + "epoch": 0.44346703252360586, + "grad_norm": 0.7854729294776917, + "learning_rate": 0.00016587026581253866, + "loss": 2.6746, + "step": 5495 + }, + { + "epoch": 0.44354773626018884, + "grad_norm": 0.7782836556434631, + "learning_rate": 0.00016585838687692577, + "loss": 2.61, + "step": 5496 + }, + { + "epoch": 0.4436284399967719, + "grad_norm": 0.7047034502029419, + "learning_rate": 0.00016584650629994968, + "loss": 2.6573, + "step": 5497 + }, + { + "epoch": 0.44370914373335485, + "grad_norm": 0.7398735880851746, + "learning_rate": 0.0001658346240819066, + "loss": 2.6338, + "step": 5498 + }, + { + "epoch": 0.4437898474699379, + "grad_norm": 0.7243468165397644, + "learning_rate": 0.00016582274022309258, + "loss": 2.5898, + "step": 5499 + }, + { + "epoch": 0.44387055120652086, + "grad_norm": 0.7415906190872192, + "learning_rate": 0.00016581085472380376, + "loss": 2.5893, + "step": 5500 + }, + { + "epoch": 0.4439512549431039, + "grad_norm": 0.6935107707977295, + "learning_rate": 0.00016579896758433645, + "loss": 2.6704, + "step": 5501 + }, + { + "epoch": 0.44403195867968687, + "grad_norm": 0.7188034653663635, + "learning_rate": 0.00016578707880498685, + "loss": 2.643, + "step": 5502 + }, + { + "epoch": 0.4441126624162699, + "grad_norm": 0.6697022914886475, + "learning_rate": 0.0001657751883860513, + "loss": 2.6313, + "step": 5503 + }, + { + "epoch": 0.4441933661528529, + "grad_norm": 0.760154664516449, + "learning_rate": 0.00016576329632782613, + "loss": 2.6604, + "step": 5504 + }, + { + "epoch": 0.4442740698894359, + "grad_norm": 0.6883447170257568, + "learning_rate": 0.00016575140263060765, + "loss": 2.64, + "step": 5505 + }, + { + "epoch": 0.4443547736260189, + "grad_norm": 0.8628804683685303, + "learning_rate": 0.0001657395072946924, + "loss": 2.6651, + "step": 5506 + }, + { + "epoch": 0.4444354773626019, + "grad_norm": 0.7125170230865479, + "learning_rate": 0.0001657276103203768, + "loss": 2.7132, + "step": 5507 + }, + { + "epoch": 0.4445161810991849, + "grad_norm": 0.6965304613113403, + "learning_rate": 0.00016571571170795725, + "loss": 2.7109, + "step": 5508 + }, + { + "epoch": 0.44459688483576787, + "grad_norm": 0.720327615737915, + "learning_rate": 0.00016570381145773042, + "loss": 2.6323, + "step": 5509 + }, + { + "epoch": 0.4446775885723509, + "grad_norm": 0.7097898125648499, + "learning_rate": 0.00016569190956999287, + "loss": 2.6461, + "step": 5510 + }, + { + "epoch": 0.4447582923089339, + "grad_norm": 0.7142884731292725, + "learning_rate": 0.0001656800060450412, + "loss": 2.6894, + "step": 5511 + }, + { + "epoch": 0.4448389960455169, + "grad_norm": 0.6992002725601196, + "learning_rate": 0.0001656681008831721, + "loss": 2.6116, + "step": 5512 + }, + { + "epoch": 0.4449196997820999, + "grad_norm": 0.763841450214386, + "learning_rate": 0.00016565619408468227, + "loss": 2.6441, + "step": 5513 + }, + { + "epoch": 0.4450004035186829, + "grad_norm": 0.6958404183387756, + "learning_rate": 0.00016564428564986848, + "loss": 2.5751, + "step": 5514 + }, + { + "epoch": 0.4450811072552659, + "grad_norm": 0.8804046511650085, + "learning_rate": 0.00016563237557902744, + "loss": 2.6353, + "step": 5515 + }, + { + "epoch": 0.4451618109918489, + "grad_norm": 0.744864821434021, + "learning_rate": 0.00016562046387245608, + "loss": 2.6887, + "step": 5516 + }, + { + "epoch": 0.4452425147284319, + "grad_norm": 0.7627978920936584, + "learning_rate": 0.0001656085505304512, + "loss": 2.6347, + "step": 5517 + }, + { + "epoch": 0.44532321846501494, + "grad_norm": 0.7728918194770813, + "learning_rate": 0.00016559663555330975, + "loss": 2.6344, + "step": 5518 + }, + { + "epoch": 0.4454039222015979, + "grad_norm": 0.7853842377662659, + "learning_rate": 0.00016558471894132865, + "loss": 2.7239, + "step": 5519 + }, + { + "epoch": 0.44548462593818094, + "grad_norm": 0.7981860041618347, + "learning_rate": 0.00016557280069480495, + "loss": 2.66, + "step": 5520 + }, + { + "epoch": 0.4455653296747639, + "grad_norm": 0.7555295825004578, + "learning_rate": 0.0001655608808140356, + "loss": 2.6636, + "step": 5521 + }, + { + "epoch": 0.44564603341134695, + "grad_norm": 0.6893854141235352, + "learning_rate": 0.00016554895929931778, + "loss": 2.5999, + "step": 5522 + }, + { + "epoch": 0.44572673714792993, + "grad_norm": 0.7740506529808044, + "learning_rate": 0.0001655370361509485, + "loss": 2.6308, + "step": 5523 + }, + { + "epoch": 0.44580744088451296, + "grad_norm": 0.6956021785736084, + "learning_rate": 0.00016552511136922498, + "loss": 2.6376, + "step": 5524 + }, + { + "epoch": 0.44588814462109594, + "grad_norm": 0.7408841252326965, + "learning_rate": 0.00016551318495444445, + "loss": 2.6644, + "step": 5525 + }, + { + "epoch": 0.44596884835767897, + "grad_norm": 0.7715663313865662, + "learning_rate": 0.000165501256906904, + "loss": 2.6791, + "step": 5526 + }, + { + "epoch": 0.44604955209426195, + "grad_norm": 0.6880629062652588, + "learning_rate": 0.0001654893272269011, + "loss": 2.7209, + "step": 5527 + }, + { + "epoch": 0.446130255830845, + "grad_norm": 0.6765853762626648, + "learning_rate": 0.0001654773959147329, + "loss": 2.6548, + "step": 5528 + }, + { + "epoch": 0.44621095956742796, + "grad_norm": 0.739248514175415, + "learning_rate": 0.00016546546297069688, + "loss": 2.69, + "step": 5529 + }, + { + "epoch": 0.446291663304011, + "grad_norm": 0.7655714750289917, + "learning_rate": 0.00016545352839509038, + "loss": 2.6238, + "step": 5530 + }, + { + "epoch": 0.44637236704059396, + "grad_norm": 0.706068217754364, + "learning_rate": 0.00016544159218821088, + "loss": 2.6528, + "step": 5531 + }, + { + "epoch": 0.446453070777177, + "grad_norm": 0.7411316633224487, + "learning_rate": 0.00016542965435035578, + "loss": 2.7034, + "step": 5532 + }, + { + "epoch": 0.44653377451376, + "grad_norm": 0.6550690531730652, + "learning_rate": 0.0001654177148818227, + "loss": 2.6388, + "step": 5533 + }, + { + "epoch": 0.446614478250343, + "grad_norm": 0.7151147127151489, + "learning_rate": 0.00016540577378290915, + "loss": 2.7382, + "step": 5534 + }, + { + "epoch": 0.446695181986926, + "grad_norm": 0.7343939542770386, + "learning_rate": 0.00016539383105391276, + "loss": 2.6316, + "step": 5535 + }, + { + "epoch": 0.446775885723509, + "grad_norm": 0.702036440372467, + "learning_rate": 0.00016538188669513115, + "loss": 2.6465, + "step": 5536 + }, + { + "epoch": 0.446856589460092, + "grad_norm": 0.7212840914726257, + "learning_rate": 0.00016536994070686197, + "loss": 2.6471, + "step": 5537 + }, + { + "epoch": 0.446937293196675, + "grad_norm": 0.7345479130744934, + "learning_rate": 0.00016535799308940304, + "loss": 2.6746, + "step": 5538 + }, + { + "epoch": 0.447017996933258, + "grad_norm": 0.7447341084480286, + "learning_rate": 0.00016534604384305207, + "loss": 2.6487, + "step": 5539 + }, + { + "epoch": 0.44709870066984103, + "grad_norm": 0.6865687370300293, + "learning_rate": 0.00016533409296810687, + "loss": 2.6202, + "step": 5540 + }, + { + "epoch": 0.447179404406424, + "grad_norm": 0.8210769891738892, + "learning_rate": 0.0001653221404648653, + "loss": 2.7155, + "step": 5541 + }, + { + "epoch": 0.44726010814300704, + "grad_norm": 0.7768925428390503, + "learning_rate": 0.0001653101863336252, + "loss": 2.6011, + "step": 5542 + }, + { + "epoch": 0.44734081187959, + "grad_norm": 0.7160049080848694, + "learning_rate": 0.00016529823057468456, + "loss": 2.6541, + "step": 5543 + }, + { + "epoch": 0.44742151561617305, + "grad_norm": 0.7386900782585144, + "learning_rate": 0.00016528627318834134, + "loss": 2.6586, + "step": 5544 + }, + { + "epoch": 0.447502219352756, + "grad_norm": 0.7415460348129272, + "learning_rate": 0.0001652743141748935, + "loss": 2.7032, + "step": 5545 + }, + { + "epoch": 0.44758292308933906, + "grad_norm": 0.8483054637908936, + "learning_rate": 0.00016526235353463912, + "loss": 2.6145, + "step": 5546 + }, + { + "epoch": 0.44766362682592203, + "grad_norm": 0.7428778409957886, + "learning_rate": 0.00016525039126787629, + "loss": 2.7005, + "step": 5547 + }, + { + "epoch": 0.44774433056250507, + "grad_norm": 0.7214285731315613, + "learning_rate": 0.00016523842737490316, + "loss": 2.6267, + "step": 5548 + }, + { + "epoch": 0.44782503429908804, + "grad_norm": 0.6753950715065002, + "learning_rate": 0.0001652264618560179, + "loss": 2.6732, + "step": 5549 + }, + { + "epoch": 0.4479057380356711, + "grad_norm": 0.6969403028488159, + "learning_rate": 0.00016521449471151867, + "loss": 2.6218, + "step": 5550 + }, + { + "epoch": 0.44798644177225405, + "grad_norm": 0.7562664151191711, + "learning_rate": 0.00016520252594170377, + "loss": 2.69, + "step": 5551 + }, + { + "epoch": 0.4480671455088371, + "grad_norm": 0.6831937432289124, + "learning_rate": 0.0001651905555468715, + "loss": 2.709, + "step": 5552 + }, + { + "epoch": 0.44814784924542006, + "grad_norm": 0.6753427386283875, + "learning_rate": 0.00016517858352732017, + "loss": 2.5852, + "step": 5553 + }, + { + "epoch": 0.4482285529820031, + "grad_norm": 0.7573871612548828, + "learning_rate": 0.00016516660988334815, + "loss": 2.6187, + "step": 5554 + }, + { + "epoch": 0.44830925671858607, + "grad_norm": 0.6424254775047302, + "learning_rate": 0.00016515463461525383, + "loss": 2.6411, + "step": 5555 + }, + { + "epoch": 0.4483899604551691, + "grad_norm": 0.7460073232650757, + "learning_rate": 0.0001651426577233358, + "loss": 2.6239, + "step": 5556 + }, + { + "epoch": 0.4484706641917521, + "grad_norm": 0.6980866193771362, + "learning_rate": 0.0001651306792078924, + "loss": 2.605, + "step": 5557 + }, + { + "epoch": 0.4485513679283351, + "grad_norm": 0.7376009225845337, + "learning_rate": 0.00016511869906922217, + "loss": 2.7114, + "step": 5558 + }, + { + "epoch": 0.4486320716649181, + "grad_norm": 0.7227364778518677, + "learning_rate": 0.0001651067173076238, + "loss": 2.6212, + "step": 5559 + }, + { + "epoch": 0.44871277540150106, + "grad_norm": 0.8989635705947876, + "learning_rate": 0.00016509473392339584, + "loss": 2.671, + "step": 5560 + }, + { + "epoch": 0.4487934791380841, + "grad_norm": 0.7273553609848022, + "learning_rate": 0.0001650827489168369, + "loss": 2.6556, + "step": 5561 + }, + { + "epoch": 0.44887418287466707, + "grad_norm": 0.839439868927002, + "learning_rate": 0.00016507076228824578, + "loss": 2.6959, + "step": 5562 + }, + { + "epoch": 0.4489548866112501, + "grad_norm": 0.6912770867347717, + "learning_rate": 0.00016505877403792115, + "loss": 2.6709, + "step": 5563 + }, + { + "epoch": 0.4490355903478331, + "grad_norm": 0.7850949168205261, + "learning_rate": 0.00016504678416616182, + "loss": 2.7257, + "step": 5564 + }, + { + "epoch": 0.4491162940844161, + "grad_norm": 0.7768355011940002, + "learning_rate": 0.0001650347926732666, + "loss": 2.5939, + "step": 5565 + }, + { + "epoch": 0.4491969978209991, + "grad_norm": 0.6518398523330688, + "learning_rate": 0.0001650227995595343, + "loss": 2.6589, + "step": 5566 + }, + { + "epoch": 0.4492777015575821, + "grad_norm": 0.6855975389480591, + "learning_rate": 0.0001650108048252639, + "loss": 2.6372, + "step": 5567 + }, + { + "epoch": 0.4493584052941651, + "grad_norm": 0.7176938056945801, + "learning_rate": 0.0001649988084707543, + "loss": 2.6506, + "step": 5568 + }, + { + "epoch": 0.44943910903074813, + "grad_norm": 0.735335648059845, + "learning_rate": 0.00016498681049630448, + "loss": 2.608, + "step": 5569 + }, + { + "epoch": 0.4495198127673311, + "grad_norm": 0.6862306594848633, + "learning_rate": 0.00016497481090221346, + "loss": 2.5982, + "step": 5570 + }, + { + "epoch": 0.44960051650391414, + "grad_norm": 0.7213380336761475, + "learning_rate": 0.0001649628096887803, + "loss": 2.6457, + "step": 5571 + }, + { + "epoch": 0.4496812202404971, + "grad_norm": 0.7118985652923584, + "learning_rate": 0.0001649508068563041, + "loss": 2.6321, + "step": 5572 + }, + { + "epoch": 0.44976192397708015, + "grad_norm": 0.7663396596908569, + "learning_rate": 0.00016493880240508405, + "loss": 2.5865, + "step": 5573 + }, + { + "epoch": 0.4498426277136631, + "grad_norm": 0.6854543089866638, + "learning_rate": 0.00016492679633541926, + "loss": 2.6536, + "step": 5574 + }, + { + "epoch": 0.44992333145024616, + "grad_norm": 0.7071701884269714, + "learning_rate": 0.000164914788647609, + "loss": 2.6149, + "step": 5575 + }, + { + "epoch": 0.45000403518682913, + "grad_norm": 0.7610478401184082, + "learning_rate": 0.00016490277934195252, + "loss": 2.6326, + "step": 5576 + }, + { + "epoch": 0.45008473892341216, + "grad_norm": 0.7117596864700317, + "learning_rate": 0.0001648907684187491, + "loss": 2.6938, + "step": 5577 + }, + { + "epoch": 0.45016544265999514, + "grad_norm": 0.6980494856834412, + "learning_rate": 0.00016487875587829813, + "loss": 2.6798, + "step": 5578 + }, + { + "epoch": 0.4502461463965782, + "grad_norm": 0.7957972288131714, + "learning_rate": 0.00016486674172089898, + "loss": 2.6029, + "step": 5579 + }, + { + "epoch": 0.45032685013316115, + "grad_norm": 0.7258082032203674, + "learning_rate": 0.00016485472594685103, + "loss": 2.6785, + "step": 5580 + }, + { + "epoch": 0.4504075538697442, + "grad_norm": 0.7402041554450989, + "learning_rate": 0.0001648427085564538, + "loss": 2.6263, + "step": 5581 + }, + { + "epoch": 0.45048825760632716, + "grad_norm": 0.6943814158439636, + "learning_rate": 0.00016483068955000673, + "loss": 2.6761, + "step": 5582 + }, + { + "epoch": 0.4505689613429102, + "grad_norm": 0.8021644353866577, + "learning_rate": 0.00016481866892780947, + "loss": 2.6376, + "step": 5583 + }, + { + "epoch": 0.45064966507949317, + "grad_norm": 0.7748533487319946, + "learning_rate": 0.0001648066466901615, + "loss": 2.7465, + "step": 5584 + }, + { + "epoch": 0.4507303688160762, + "grad_norm": 0.7432222366333008, + "learning_rate": 0.00016479462283736248, + "loss": 2.6368, + "step": 5585 + }, + { + "epoch": 0.4508110725526592, + "grad_norm": 0.7835286259651184, + "learning_rate": 0.00016478259736971214, + "loss": 2.6449, + "step": 5586 + }, + { + "epoch": 0.4508917762892422, + "grad_norm": 0.7372995018959045, + "learning_rate": 0.00016477057028751007, + "loss": 2.6091, + "step": 5587 + }, + { + "epoch": 0.4509724800258252, + "grad_norm": 0.8230665326118469, + "learning_rate": 0.0001647585415910561, + "loss": 2.6345, + "step": 5588 + }, + { + "epoch": 0.4510531837624082, + "grad_norm": 0.7490825057029724, + "learning_rate": 0.00016474651128065002, + "loss": 2.5996, + "step": 5589 + }, + { + "epoch": 0.4511338874989912, + "grad_norm": 0.7950569987297058, + "learning_rate": 0.00016473447935659157, + "loss": 2.7109, + "step": 5590 + }, + { + "epoch": 0.4512145912355742, + "grad_norm": 0.7648342251777649, + "learning_rate": 0.00016472244581918074, + "loss": 2.6268, + "step": 5591 + }, + { + "epoch": 0.4512952949721572, + "grad_norm": 0.726828396320343, + "learning_rate": 0.00016471041066871733, + "loss": 2.5959, + "step": 5592 + }, + { + "epoch": 0.45137599870874023, + "grad_norm": 0.7855841517448425, + "learning_rate": 0.00016469837390550133, + "loss": 2.6671, + "step": 5593 + }, + { + "epoch": 0.4514567024453232, + "grad_norm": 0.6858882904052734, + "learning_rate": 0.00016468633552983275, + "loss": 2.6003, + "step": 5594 + }, + { + "epoch": 0.45153740618190624, + "grad_norm": 0.710926353931427, + "learning_rate": 0.0001646742955420116, + "loss": 2.6049, + "step": 5595 + }, + { + "epoch": 0.4516181099184892, + "grad_norm": 0.8359978199005127, + "learning_rate": 0.0001646622539423379, + "loss": 2.6636, + "step": 5596 + }, + { + "epoch": 0.45169881365507225, + "grad_norm": 0.7628041505813599, + "learning_rate": 0.00016465021073111186, + "loss": 2.6586, + "step": 5597 + }, + { + "epoch": 0.4517795173916552, + "grad_norm": 0.7723419666290283, + "learning_rate": 0.00016463816590863356, + "loss": 2.6213, + "step": 5598 + }, + { + "epoch": 0.45186022112823826, + "grad_norm": 0.7210986018180847, + "learning_rate": 0.0001646261194752032, + "loss": 2.6674, + "step": 5599 + }, + { + "epoch": 0.45194092486482124, + "grad_norm": 0.7665949463844299, + "learning_rate": 0.00016461407143112097, + "loss": 2.68, + "step": 5600 + }, + { + "epoch": 0.45202162860140427, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016460202177668722, + "loss": 2.6473, + "step": 5601 + }, + { + "epoch": 0.45210233233798724, + "grad_norm": 0.6831738948822021, + "learning_rate": 0.0001645899705122022, + "loss": 2.6863, + "step": 5602 + }, + { + "epoch": 0.4521830360745703, + "grad_norm": 0.7006321549415588, + "learning_rate": 0.00016457791763796627, + "loss": 2.6242, + "step": 5603 + }, + { + "epoch": 0.45226373981115325, + "grad_norm": 0.7245663404464722, + "learning_rate": 0.00016456586315427983, + "loss": 2.6201, + "step": 5604 + }, + { + "epoch": 0.4523444435477363, + "grad_norm": 0.7444287538528442, + "learning_rate": 0.00016455380706144332, + "loss": 2.6684, + "step": 5605 + }, + { + "epoch": 0.45242514728431926, + "grad_norm": 0.6562673449516296, + "learning_rate": 0.00016454174935975714, + "loss": 2.5912, + "step": 5606 + }, + { + "epoch": 0.4525058510209023, + "grad_norm": 0.6494336724281311, + "learning_rate": 0.0001645296900495219, + "loss": 2.6245, + "step": 5607 + }, + { + "epoch": 0.45258655475748527, + "grad_norm": 0.6968161463737488, + "learning_rate": 0.0001645176291310381, + "loss": 2.6494, + "step": 5608 + }, + { + "epoch": 0.4526672584940683, + "grad_norm": 0.7351142764091492, + "learning_rate": 0.00016450556660460632, + "loss": 2.574, + "step": 5609 + }, + { + "epoch": 0.4527479622306513, + "grad_norm": 0.7522323131561279, + "learning_rate": 0.0001644935024705272, + "loss": 2.6512, + "step": 5610 + }, + { + "epoch": 0.45282866596723426, + "grad_norm": 0.6744225025177002, + "learning_rate": 0.0001644814367291014, + "loss": 2.6288, + "step": 5611 + }, + { + "epoch": 0.4529093697038173, + "grad_norm": 0.6933234333992004, + "learning_rate": 0.00016446936938062967, + "loss": 2.6076, + "step": 5612 + }, + { + "epoch": 0.45299007344040026, + "grad_norm": 0.7101204991340637, + "learning_rate": 0.00016445730042541272, + "loss": 2.6322, + "step": 5613 + }, + { + "epoch": 0.4530707771769833, + "grad_norm": 0.7647581696510315, + "learning_rate": 0.00016444522986375134, + "loss": 2.7021, + "step": 5614 + }, + { + "epoch": 0.4531514809135663, + "grad_norm": 0.7028820514678955, + "learning_rate": 0.00016443315769594635, + "loss": 2.6171, + "step": 5615 + }, + { + "epoch": 0.4532321846501493, + "grad_norm": 0.6933851838111877, + "learning_rate": 0.00016442108392229868, + "loss": 2.6119, + "step": 5616 + }, + { + "epoch": 0.4533128883867323, + "grad_norm": 0.7218462824821472, + "learning_rate": 0.0001644090085431092, + "loss": 2.6661, + "step": 5617 + }, + { + "epoch": 0.4533935921233153, + "grad_norm": 0.7390525341033936, + "learning_rate": 0.00016439693155867883, + "loss": 2.7084, + "step": 5618 + }, + { + "epoch": 0.4534742958598983, + "grad_norm": 0.734136164188385, + "learning_rate": 0.0001643848529693086, + "loss": 2.6896, + "step": 5619 + }, + { + "epoch": 0.4535549995964813, + "grad_norm": 0.8082060813903809, + "learning_rate": 0.00016437277277529954, + "loss": 2.5828, + "step": 5620 + }, + { + "epoch": 0.4536357033330643, + "grad_norm": 0.695988655090332, + "learning_rate": 0.0001643606909769527, + "loss": 2.6383, + "step": 5621 + }, + { + "epoch": 0.45371640706964733, + "grad_norm": 0.7415786385536194, + "learning_rate": 0.00016434860757456922, + "loss": 2.6388, + "step": 5622 + }, + { + "epoch": 0.4537971108062303, + "grad_norm": 0.7378649115562439, + "learning_rate": 0.0001643365225684502, + "loss": 2.6534, + "step": 5623 + }, + { + "epoch": 0.45387781454281334, + "grad_norm": 0.7686129808425903, + "learning_rate": 0.0001643244359588969, + "loss": 2.6637, + "step": 5624 + }, + { + "epoch": 0.4539585182793963, + "grad_norm": 0.7305558323860168, + "learning_rate": 0.00016431234774621047, + "loss": 2.6525, + "step": 5625 + }, + { + "epoch": 0.45403922201597935, + "grad_norm": 0.7994235157966614, + "learning_rate": 0.00016430025793069225, + "loss": 2.6316, + "step": 5626 + }, + { + "epoch": 0.4541199257525623, + "grad_norm": 0.6945801377296448, + "learning_rate": 0.0001642881665126435, + "loss": 2.6367, + "step": 5627 + }, + { + "epoch": 0.45420062948914536, + "grad_norm": 0.6855447292327881, + "learning_rate": 0.00016427607349236558, + "loss": 2.6317, + "step": 5628 + }, + { + "epoch": 0.45428133322572833, + "grad_norm": 0.6961888670921326, + "learning_rate": 0.00016426397887015992, + "loss": 2.6477, + "step": 5629 + }, + { + "epoch": 0.45436203696231137, + "grad_norm": 0.7531994581222534, + "learning_rate": 0.0001642518826463279, + "loss": 2.7219, + "step": 5630 + }, + { + "epoch": 0.45444274069889434, + "grad_norm": 0.7442335486412048, + "learning_rate": 0.00016423978482117102, + "loss": 2.706, + "step": 5631 + }, + { + "epoch": 0.4545234444354774, + "grad_norm": 0.7075700759887695, + "learning_rate": 0.00016422768539499076, + "loss": 2.6481, + "step": 5632 + }, + { + "epoch": 0.45460414817206035, + "grad_norm": 0.7831876873970032, + "learning_rate": 0.0001642155843680887, + "loss": 2.616, + "step": 5633 + }, + { + "epoch": 0.4546848519086434, + "grad_norm": 0.7514604926109314, + "learning_rate": 0.00016420348174076642, + "loss": 2.6282, + "step": 5634 + }, + { + "epoch": 0.45476555564522636, + "grad_norm": 0.7136685252189636, + "learning_rate": 0.0001641913775133255, + "loss": 2.6764, + "step": 5635 + }, + { + "epoch": 0.4548462593818094, + "grad_norm": 0.7406740784645081, + "learning_rate": 0.00016417927168606771, + "loss": 2.6126, + "step": 5636 + }, + { + "epoch": 0.45492696311839237, + "grad_norm": 0.7257869839668274, + "learning_rate": 0.0001641671642592947, + "loss": 2.6035, + "step": 5637 + }, + { + "epoch": 0.4550076668549754, + "grad_norm": 0.8378798961639404, + "learning_rate": 0.00016415505523330822, + "loss": 2.6657, + "step": 5638 + }, + { + "epoch": 0.4550883705915584, + "grad_norm": 0.7218836545944214, + "learning_rate": 0.00016414294460841003, + "loss": 2.6209, + "step": 5639 + }, + { + "epoch": 0.4551690743281414, + "grad_norm": 0.7792766690254211, + "learning_rate": 0.00016413083238490204, + "loss": 2.7208, + "step": 5640 + }, + { + "epoch": 0.4552497780647244, + "grad_norm": 0.7800823450088501, + "learning_rate": 0.000164118718563086, + "loss": 2.6351, + "step": 5641 + }, + { + "epoch": 0.4553304818013074, + "grad_norm": 0.7593275904655457, + "learning_rate": 0.00016410660314326395, + "loss": 2.7025, + "step": 5642 + }, + { + "epoch": 0.4554111855378904, + "grad_norm": 0.7561587691307068, + "learning_rate": 0.00016409448612573772, + "loss": 2.6188, + "step": 5643 + }, + { + "epoch": 0.4554918892744734, + "grad_norm": 0.7674516439437866, + "learning_rate": 0.00016408236751080937, + "loss": 2.629, + "step": 5644 + }, + { + "epoch": 0.4555725930110564, + "grad_norm": 0.7112495303153992, + "learning_rate": 0.00016407024729878095, + "loss": 2.6261, + "step": 5645 + }, + { + "epoch": 0.45565329674763944, + "grad_norm": 0.6861695647239685, + "learning_rate": 0.00016405812548995444, + "loss": 2.6984, + "step": 5646 + }, + { + "epoch": 0.4557340004842224, + "grad_norm": 0.7711648941040039, + "learning_rate": 0.000164046002084632, + "loss": 2.6839, + "step": 5647 + }, + { + "epoch": 0.45581470422080544, + "grad_norm": 0.6862967014312744, + "learning_rate": 0.00016403387708311578, + "loss": 2.5964, + "step": 5648 + }, + { + "epoch": 0.4558954079573884, + "grad_norm": 0.707374632358551, + "learning_rate": 0.00016402175048570793, + "loss": 2.6191, + "step": 5649 + }, + { + "epoch": 0.45597611169397145, + "grad_norm": 0.7980892658233643, + "learning_rate": 0.00016400962229271072, + "loss": 2.6288, + "step": 5650 + }, + { + "epoch": 0.45605681543055443, + "grad_norm": 0.686187744140625, + "learning_rate": 0.0001639974925044264, + "loss": 2.6277, + "step": 5651 + }, + { + "epoch": 0.45613751916713746, + "grad_norm": 0.6970425844192505, + "learning_rate": 0.0001639853611211573, + "loss": 2.5726, + "step": 5652 + }, + { + "epoch": 0.45621822290372044, + "grad_norm": 0.701500415802002, + "learning_rate": 0.00016397322814320573, + "loss": 2.6275, + "step": 5653 + }, + { + "epoch": 0.45629892664030347, + "grad_norm": 0.8432207107543945, + "learning_rate": 0.00016396109357087407, + "loss": 2.6185, + "step": 5654 + }, + { + "epoch": 0.45637963037688645, + "grad_norm": 0.7049770951271057, + "learning_rate": 0.00016394895740446476, + "loss": 2.674, + "step": 5655 + }, + { + "epoch": 0.4564603341134695, + "grad_norm": 0.7068646550178528, + "learning_rate": 0.00016393681964428026, + "loss": 2.6072, + "step": 5656 + }, + { + "epoch": 0.45654103785005246, + "grad_norm": 0.7698760032653809, + "learning_rate": 0.00016392468029062312, + "loss": 2.6547, + "step": 5657 + }, + { + "epoch": 0.4566217415866355, + "grad_norm": 0.7381031513214111, + "learning_rate": 0.00016391253934379583, + "loss": 2.6125, + "step": 5658 + }, + { + "epoch": 0.45670244532321846, + "grad_norm": 0.7367781400680542, + "learning_rate": 0.00016390039680410097, + "loss": 2.6763, + "step": 5659 + }, + { + "epoch": 0.4567831490598015, + "grad_norm": 0.7416272759437561, + "learning_rate": 0.00016388825267184121, + "loss": 2.7059, + "step": 5660 + }, + { + "epoch": 0.4568638527963845, + "grad_norm": 0.6933416724205017, + "learning_rate": 0.0001638761069473192, + "loss": 2.6028, + "step": 5661 + }, + { + "epoch": 0.45694455653296745, + "grad_norm": 0.7311314940452576, + "learning_rate": 0.00016386395963083756, + "loss": 2.6266, + "step": 5662 + }, + { + "epoch": 0.4570252602695505, + "grad_norm": 0.7172734141349792, + "learning_rate": 0.00016385181072269917, + "loss": 2.6754, + "step": 5663 + }, + { + "epoch": 0.45710596400613346, + "grad_norm": 0.7286428213119507, + "learning_rate": 0.00016383966022320671, + "loss": 2.6637, + "step": 5664 + }, + { + "epoch": 0.4571866677427165, + "grad_norm": 0.7296474575996399, + "learning_rate": 0.00016382750813266308, + "loss": 2.6655, + "step": 5665 + }, + { + "epoch": 0.45726737147929947, + "grad_norm": 0.6929224133491516, + "learning_rate": 0.00016381535445137105, + "loss": 2.6376, + "step": 5666 + }, + { + "epoch": 0.4573480752158825, + "grad_norm": 0.7012765407562256, + "learning_rate": 0.0001638031991796336, + "loss": 2.6222, + "step": 5667 + }, + { + "epoch": 0.4574287789524655, + "grad_norm": 0.7360745668411255, + "learning_rate": 0.00016379104231775368, + "loss": 2.6304, + "step": 5668 + }, + { + "epoch": 0.4575094826890485, + "grad_norm": 0.7276801466941833, + "learning_rate": 0.00016377888386603419, + "loss": 2.7046, + "step": 5669 + }, + { + "epoch": 0.4575901864256315, + "grad_norm": 0.688432514667511, + "learning_rate": 0.0001637667238247782, + "loss": 2.6598, + "step": 5670 + }, + { + "epoch": 0.4576708901622145, + "grad_norm": 0.6874414682388306, + "learning_rate": 0.00016375456219428877, + "loss": 2.7, + "step": 5671 + }, + { + "epoch": 0.4577515938987975, + "grad_norm": 0.711091160774231, + "learning_rate": 0.000163742398974869, + "loss": 2.6063, + "step": 5672 + }, + { + "epoch": 0.4578322976353805, + "grad_norm": 0.7131791710853577, + "learning_rate": 0.000163730234166822, + "loss": 2.5948, + "step": 5673 + }, + { + "epoch": 0.4579130013719635, + "grad_norm": 0.7166630625724792, + "learning_rate": 0.000163718067770451, + "loss": 2.6488, + "step": 5674 + }, + { + "epoch": 0.45799370510854653, + "grad_norm": 0.7285952568054199, + "learning_rate": 0.00016370589978605916, + "loss": 2.6445, + "step": 5675 + }, + { + "epoch": 0.4580744088451295, + "grad_norm": 0.728050172328949, + "learning_rate": 0.0001636937302139498, + "loss": 2.5425, + "step": 5676 + }, + { + "epoch": 0.45815511258171254, + "grad_norm": 0.7196047902107239, + "learning_rate": 0.00016368155905442615, + "loss": 2.7426, + "step": 5677 + }, + { + "epoch": 0.4582358163182955, + "grad_norm": 0.6844602823257446, + "learning_rate": 0.0001636693863077916, + "loss": 2.6157, + "step": 5678 + }, + { + "epoch": 0.45831652005487855, + "grad_norm": 0.7375781536102295, + "learning_rate": 0.0001636572119743495, + "loss": 2.7069, + "step": 5679 + }, + { + "epoch": 0.4583972237914615, + "grad_norm": 0.7667750120162964, + "learning_rate": 0.0001636450360544033, + "loss": 2.6589, + "step": 5680 + }, + { + "epoch": 0.45847792752804456, + "grad_norm": 0.6569861173629761, + "learning_rate": 0.00016363285854825642, + "loss": 2.6197, + "step": 5681 + }, + { + "epoch": 0.45855863126462754, + "grad_norm": 0.7177335023880005, + "learning_rate": 0.00016362067945621239, + "loss": 2.6104, + "step": 5682 + }, + { + "epoch": 0.45863933500121057, + "grad_norm": 0.7260481715202332, + "learning_rate": 0.00016360849877857469, + "loss": 2.6435, + "step": 5683 + }, + { + "epoch": 0.45872003873779355, + "grad_norm": 0.7083989381790161, + "learning_rate": 0.00016359631651564693, + "loss": 2.6366, + "step": 5684 + }, + { + "epoch": 0.4588007424743766, + "grad_norm": 0.6417020559310913, + "learning_rate": 0.00016358413266773271, + "loss": 2.6311, + "step": 5685 + }, + { + "epoch": 0.45888144621095955, + "grad_norm": 0.737856924533844, + "learning_rate": 0.0001635719472351357, + "loss": 2.6647, + "step": 5686 + }, + { + "epoch": 0.4589621499475426, + "grad_norm": 0.6774190068244934, + "learning_rate": 0.0001635597602181596, + "loss": 2.6366, + "step": 5687 + }, + { + "epoch": 0.45904285368412556, + "grad_norm": 0.6480480432510376, + "learning_rate": 0.0001635475716171081, + "loss": 2.6501, + "step": 5688 + }, + { + "epoch": 0.4591235574207086, + "grad_norm": 0.7886860370635986, + "learning_rate": 0.0001635353814322851, + "loss": 2.7239, + "step": 5689 + }, + { + "epoch": 0.45920426115729157, + "grad_norm": 0.7579021453857422, + "learning_rate": 0.0001635231896639942, + "loss": 2.6155, + "step": 5690 + }, + { + "epoch": 0.4592849648938746, + "grad_norm": 0.6853809356689453, + "learning_rate": 0.0001635109963125394, + "loss": 2.5933, + "step": 5691 + }, + { + "epoch": 0.4593656686304576, + "grad_norm": 0.661342978477478, + "learning_rate": 0.00016349880137822456, + "loss": 2.6277, + "step": 5692 + }, + { + "epoch": 0.4594463723670406, + "grad_norm": 0.6795682311058044, + "learning_rate": 0.0001634866048613536, + "loss": 2.6221, + "step": 5693 + }, + { + "epoch": 0.4595270761036236, + "grad_norm": 0.7375383377075195, + "learning_rate": 0.00016347440676223047, + "loss": 2.6082, + "step": 5694 + }, + { + "epoch": 0.4596077798402066, + "grad_norm": 0.7565153241157532, + "learning_rate": 0.0001634622070811592, + "loss": 2.6615, + "step": 5695 + }, + { + "epoch": 0.4596884835767896, + "grad_norm": 0.6869745254516602, + "learning_rate": 0.00016345000581844386, + "loss": 2.6172, + "step": 5696 + }, + { + "epoch": 0.45976918731337263, + "grad_norm": 0.7192853689193726, + "learning_rate": 0.0001634378029743885, + "loss": 2.6324, + "step": 5697 + }, + { + "epoch": 0.4598498910499556, + "grad_norm": 0.6919218301773071, + "learning_rate": 0.00016342559854929726, + "loss": 2.5965, + "step": 5698 + }, + { + "epoch": 0.45993059478653864, + "grad_norm": 0.6715282797813416, + "learning_rate": 0.00016341339254347432, + "loss": 2.6225, + "step": 5699 + }, + { + "epoch": 0.4600112985231216, + "grad_norm": 0.6768380999565125, + "learning_rate": 0.00016340118495722388, + "loss": 2.6376, + "step": 5700 + }, + { + "epoch": 0.46009200225970465, + "grad_norm": 0.6898325681686401, + "learning_rate": 0.00016338897579085018, + "loss": 2.667, + "step": 5701 + }, + { + "epoch": 0.4601727059962876, + "grad_norm": 0.7171810865402222, + "learning_rate": 0.00016337676504465747, + "loss": 2.678, + "step": 5702 + }, + { + "epoch": 0.46025340973287066, + "grad_norm": 0.7050724029541016, + "learning_rate": 0.00016336455271895016, + "loss": 2.619, + "step": 5703 + }, + { + "epoch": 0.46033411346945363, + "grad_norm": 0.8287240862846375, + "learning_rate": 0.00016335233881403248, + "loss": 2.71, + "step": 5704 + }, + { + "epoch": 0.46041481720603666, + "grad_norm": 0.6880568861961365, + "learning_rate": 0.000163340123330209, + "loss": 2.6516, + "step": 5705 + }, + { + "epoch": 0.46049552094261964, + "grad_norm": 0.7222896218299866, + "learning_rate": 0.00016332790626778402, + "loss": 2.5899, + "step": 5706 + }, + { + "epoch": 0.4605762246792027, + "grad_norm": 0.7707448601722717, + "learning_rate": 0.00016331568762706207, + "loss": 2.6116, + "step": 5707 + }, + { + "epoch": 0.46065692841578565, + "grad_norm": 0.7780653834342957, + "learning_rate": 0.0001633034674083477, + "loss": 2.6072, + "step": 5708 + }, + { + "epoch": 0.4607376321523687, + "grad_norm": 0.7551524639129639, + "learning_rate": 0.00016329124561194545, + "loss": 2.548, + "step": 5709 + }, + { + "epoch": 0.46081833588895166, + "grad_norm": 0.9312284588813782, + "learning_rate": 0.0001632790222381599, + "loss": 2.6557, + "step": 5710 + }, + { + "epoch": 0.4608990396255347, + "grad_norm": 0.7404753565788269, + "learning_rate": 0.0001632667972872957, + "loss": 2.6889, + "step": 5711 + }, + { + "epoch": 0.46097974336211767, + "grad_norm": 0.7423726916313171, + "learning_rate": 0.00016325457075965752, + "loss": 2.6265, + "step": 5712 + }, + { + "epoch": 0.46106044709870064, + "grad_norm": 1.0683187246322632, + "learning_rate": 0.0001632423426555501, + "loss": 2.6827, + "step": 5713 + }, + { + "epoch": 0.4611411508352837, + "grad_norm": 0.7204160094261169, + "learning_rate": 0.0001632301129752782, + "loss": 2.702, + "step": 5714 + }, + { + "epoch": 0.46122185457186665, + "grad_norm": 0.7591153383255005, + "learning_rate": 0.0001632178817191466, + "loss": 2.6031, + "step": 5715 + }, + { + "epoch": 0.4613025583084497, + "grad_norm": 0.8147456645965576, + "learning_rate": 0.00016320564888746013, + "loss": 2.6117, + "step": 5716 + }, + { + "epoch": 0.46138326204503266, + "grad_norm": 0.7880246639251709, + "learning_rate": 0.00016319341448052364, + "loss": 2.5896, + "step": 5717 + }, + { + "epoch": 0.4614639657816157, + "grad_norm": 0.6875137686729431, + "learning_rate": 0.00016318117849864206, + "loss": 2.6258, + "step": 5718 + }, + { + "epoch": 0.46154466951819867, + "grad_norm": 0.7197960615158081, + "learning_rate": 0.00016316894094212044, + "loss": 2.6656, + "step": 5719 + }, + { + "epoch": 0.4616253732547817, + "grad_norm": 0.7049540281295776, + "learning_rate": 0.0001631567018112636, + "loss": 2.6698, + "step": 5720 + }, + { + "epoch": 0.4617060769913647, + "grad_norm": 0.7128825783729553, + "learning_rate": 0.00016314446110637668, + "loss": 2.6552, + "step": 5721 + }, + { + "epoch": 0.4617867807279477, + "grad_norm": 0.7956201434135437, + "learning_rate": 0.00016313221882776477, + "loss": 2.6747, + "step": 5722 + }, + { + "epoch": 0.4618674844645307, + "grad_norm": 0.7598347663879395, + "learning_rate": 0.0001631199749757329, + "loss": 2.6187, + "step": 5723 + }, + { + "epoch": 0.4619481882011137, + "grad_norm": 0.6587582230567932, + "learning_rate": 0.00016310772955058627, + "loss": 2.596, + "step": 5724 + }, + { + "epoch": 0.4620288919376967, + "grad_norm": 0.700136125087738, + "learning_rate": 0.00016309548255263003, + "loss": 2.6527, + "step": 5725 + }, + { + "epoch": 0.4621095956742797, + "grad_norm": 0.7246582508087158, + "learning_rate": 0.00016308323398216945, + "loss": 2.6577, + "step": 5726 + }, + { + "epoch": 0.4621902994108627, + "grad_norm": 0.6951557993888855, + "learning_rate": 0.00016307098383950977, + "loss": 2.5816, + "step": 5727 + }, + { + "epoch": 0.46227100314744574, + "grad_norm": 0.7109191417694092, + "learning_rate": 0.0001630587321249563, + "loss": 2.6586, + "step": 5728 + }, + { + "epoch": 0.4623517068840287, + "grad_norm": 0.7357863783836365, + "learning_rate": 0.0001630464788388144, + "loss": 2.691, + "step": 5729 + }, + { + "epoch": 0.46243241062061174, + "grad_norm": 0.7916350960731506, + "learning_rate": 0.00016303422398138945, + "loss": 2.6584, + "step": 5730 + }, + { + "epoch": 0.4625131143571947, + "grad_norm": 0.6543231010437012, + "learning_rate": 0.00016302196755298685, + "loss": 2.6482, + "step": 5731 + }, + { + "epoch": 0.46259381809377775, + "grad_norm": 0.6978787183761597, + "learning_rate": 0.00016300970955391208, + "loss": 2.5956, + "step": 5732 + }, + { + "epoch": 0.46267452183036073, + "grad_norm": 0.7301886677742004, + "learning_rate": 0.00016299744998447065, + "loss": 2.6178, + "step": 5733 + }, + { + "epoch": 0.46275522556694376, + "grad_norm": 0.7381030321121216, + "learning_rate": 0.00016298518884496808, + "loss": 2.6712, + "step": 5734 + }, + { + "epoch": 0.46283592930352674, + "grad_norm": 0.7769027948379517, + "learning_rate": 0.00016297292613570995, + "loss": 2.6082, + "step": 5735 + }, + { + "epoch": 0.46291663304010977, + "grad_norm": 0.7698354721069336, + "learning_rate": 0.0001629606618570019, + "loss": 2.6543, + "step": 5736 + }, + { + "epoch": 0.46299733677669275, + "grad_norm": 0.7001554369926453, + "learning_rate": 0.00016294839600914957, + "loss": 2.6174, + "step": 5737 + }, + { + "epoch": 0.4630780405132758, + "grad_norm": 0.7589300274848938, + "learning_rate": 0.00016293612859245868, + "loss": 2.6338, + "step": 5738 + }, + { + "epoch": 0.46315874424985876, + "grad_norm": 0.7083945274353027, + "learning_rate": 0.00016292385960723493, + "loss": 2.6793, + "step": 5739 + }, + { + "epoch": 0.4632394479864418, + "grad_norm": 0.739439845085144, + "learning_rate": 0.00016291158905378412, + "loss": 2.7335, + "step": 5740 + }, + { + "epoch": 0.46332015172302476, + "grad_norm": 0.6868166923522949, + "learning_rate": 0.00016289931693241205, + "loss": 2.6139, + "step": 5741 + }, + { + "epoch": 0.4634008554596078, + "grad_norm": 0.7385871410369873, + "learning_rate": 0.0001628870432434246, + "loss": 2.6783, + "step": 5742 + }, + { + "epoch": 0.4634815591961908, + "grad_norm": 0.7227835655212402, + "learning_rate": 0.00016287476798712764, + "loss": 2.6732, + "step": 5743 + }, + { + "epoch": 0.4635622629327738, + "grad_norm": 0.6662411689758301, + "learning_rate": 0.00016286249116382709, + "loss": 2.6645, + "step": 5744 + }, + { + "epoch": 0.4636429666693568, + "grad_norm": 0.8110263347625732, + "learning_rate": 0.00016285021277382894, + "loss": 2.6448, + "step": 5745 + }, + { + "epoch": 0.4637236704059398, + "grad_norm": 0.7419269680976868, + "learning_rate": 0.0001628379328174392, + "loss": 2.7286, + "step": 5746 + }, + { + "epoch": 0.4638043741425228, + "grad_norm": 0.6518125534057617, + "learning_rate": 0.0001628256512949639, + "loss": 2.6545, + "step": 5747 + }, + { + "epoch": 0.4638850778791058, + "grad_norm": 0.6816060543060303, + "learning_rate": 0.00016281336820670917, + "loss": 2.6167, + "step": 5748 + }, + { + "epoch": 0.4639657816156888, + "grad_norm": 0.6537362337112427, + "learning_rate": 0.0001628010835529811, + "loss": 2.6522, + "step": 5749 + }, + { + "epoch": 0.46404648535227183, + "grad_norm": 0.6720992922782898, + "learning_rate": 0.00016278879733408585, + "loss": 2.6028, + "step": 5750 + }, + { + "epoch": 0.4641271890888548, + "grad_norm": 0.6778908371925354, + "learning_rate": 0.00016277650955032967, + "loss": 2.5591, + "step": 5751 + }, + { + "epoch": 0.46420789282543784, + "grad_norm": 0.6908471584320068, + "learning_rate": 0.0001627642202020187, + "loss": 2.6574, + "step": 5752 + }, + { + "epoch": 0.4642885965620208, + "grad_norm": 0.7034298181533813, + "learning_rate": 0.00016275192928945936, + "loss": 2.657, + "step": 5753 + }, + { + "epoch": 0.46436930029860385, + "grad_norm": 0.7245952486991882, + "learning_rate": 0.0001627396368129579, + "loss": 2.6572, + "step": 5754 + }, + { + "epoch": 0.4644500040351868, + "grad_norm": 0.6764482855796814, + "learning_rate": 0.0001627273427728207, + "loss": 2.6576, + "step": 5755 + }, + { + "epoch": 0.46453070777176986, + "grad_norm": 0.7074379920959473, + "learning_rate": 0.0001627150471693541, + "loss": 2.614, + "step": 5756 + }, + { + "epoch": 0.46461141150835283, + "grad_norm": 0.7292052507400513, + "learning_rate": 0.0001627027500028646, + "loss": 2.673, + "step": 5757 + }, + { + "epoch": 0.46469211524493587, + "grad_norm": 0.7554025650024414, + "learning_rate": 0.0001626904512736587, + "loss": 2.5919, + "step": 5758 + }, + { + "epoch": 0.46477281898151884, + "grad_norm": 0.6829606890678406, + "learning_rate": 0.00016267815098204284, + "loss": 2.7206, + "step": 5759 + }, + { + "epoch": 0.4648535227181019, + "grad_norm": 0.7201548218727112, + "learning_rate": 0.00016266584912832363, + "loss": 2.6651, + "step": 5760 + }, + { + "epoch": 0.46493422645468485, + "grad_norm": 0.6889227628707886, + "learning_rate": 0.00016265354571280764, + "loss": 2.6776, + "step": 5761 + }, + { + "epoch": 0.4650149301912679, + "grad_norm": 0.7286190986633301, + "learning_rate": 0.00016264124073580156, + "loss": 2.591, + "step": 5762 + }, + { + "epoch": 0.46509563392785086, + "grad_norm": 0.7222036123275757, + "learning_rate": 0.00016262893419761196, + "loss": 2.6422, + "step": 5763 + }, + { + "epoch": 0.46517633766443384, + "grad_norm": 0.6822768449783325, + "learning_rate": 0.00016261662609854562, + "loss": 2.6126, + "step": 5764 + }, + { + "epoch": 0.46525704140101687, + "grad_norm": 0.7263356447219849, + "learning_rate": 0.00016260431643890929, + "loss": 2.6304, + "step": 5765 + }, + { + "epoch": 0.46533774513759985, + "grad_norm": 0.7152180075645447, + "learning_rate": 0.00016259200521900972, + "loss": 2.6489, + "step": 5766 + }, + { + "epoch": 0.4654184488741829, + "grad_norm": 0.6988116502761841, + "learning_rate": 0.00016257969243915378, + "loss": 2.6151, + "step": 5767 + }, + { + "epoch": 0.46549915261076585, + "grad_norm": 0.7131790518760681, + "learning_rate": 0.00016256737809964831, + "loss": 2.6284, + "step": 5768 + }, + { + "epoch": 0.4655798563473489, + "grad_norm": 0.674196183681488, + "learning_rate": 0.00016255506220080025, + "loss": 2.5815, + "step": 5769 + }, + { + "epoch": 0.46566056008393186, + "grad_norm": 0.7166198492050171, + "learning_rate": 0.0001625427447429165, + "loss": 2.6594, + "step": 5770 + }, + { + "epoch": 0.4657412638205149, + "grad_norm": 0.6997127532958984, + "learning_rate": 0.00016253042572630407, + "loss": 2.6502, + "step": 5771 + }, + { + "epoch": 0.46582196755709787, + "grad_norm": 0.7761591076850891, + "learning_rate": 0.00016251810515126994, + "loss": 2.624, + "step": 5772 + }, + { + "epoch": 0.4659026712936809, + "grad_norm": 0.7038728594779968, + "learning_rate": 0.00016250578301812125, + "loss": 2.6096, + "step": 5773 + }, + { + "epoch": 0.4659833750302639, + "grad_norm": 0.7080080509185791, + "learning_rate": 0.00016249345932716505, + "loss": 2.6196, + "step": 5774 + }, + { + "epoch": 0.4660640787668469, + "grad_norm": 0.7461444735527039, + "learning_rate": 0.00016248113407870847, + "loss": 2.65, + "step": 5775 + }, + { + "epoch": 0.4661447825034299, + "grad_norm": 0.7914463877677917, + "learning_rate": 0.00016246880727305868, + "loss": 2.6539, + "step": 5776 + }, + { + "epoch": 0.4662254862400129, + "grad_norm": 0.7067776918411255, + "learning_rate": 0.00016245647891052295, + "loss": 2.72, + "step": 5777 + }, + { + "epoch": 0.4663061899765959, + "grad_norm": 0.7190818190574646, + "learning_rate": 0.00016244414899140852, + "loss": 2.7029, + "step": 5778 + }, + { + "epoch": 0.46638689371317893, + "grad_norm": 0.6740003824234009, + "learning_rate": 0.00016243181751602261, + "loss": 2.6404, + "step": 5779 + }, + { + "epoch": 0.4664675974497619, + "grad_norm": 0.7942661643028259, + "learning_rate": 0.00016241948448467267, + "loss": 2.6333, + "step": 5780 + }, + { + "epoch": 0.46654830118634494, + "grad_norm": 0.6415690183639526, + "learning_rate": 0.00016240714989766597, + "loss": 2.6354, + "step": 5781 + }, + { + "epoch": 0.4666290049229279, + "grad_norm": 0.7287769913673401, + "learning_rate": 0.00016239481375530997, + "loss": 2.6721, + "step": 5782 + }, + { + "epoch": 0.46670970865951095, + "grad_norm": 0.8197699189186096, + "learning_rate": 0.00016238247605791212, + "loss": 2.7577, + "step": 5783 + }, + { + "epoch": 0.4667904123960939, + "grad_norm": 0.8182012438774109, + "learning_rate": 0.0001623701368057799, + "loss": 2.6475, + "step": 5784 + }, + { + "epoch": 0.46687111613267696, + "grad_norm": 0.6974665522575378, + "learning_rate": 0.00016235779599922082, + "loss": 2.5897, + "step": 5785 + }, + { + "epoch": 0.46695181986925993, + "grad_norm": 0.7156379222869873, + "learning_rate": 0.00016234545363854247, + "loss": 2.5981, + "step": 5786 + }, + { + "epoch": 0.46703252360584296, + "grad_norm": 0.6875364780426025, + "learning_rate": 0.0001623331097240524, + "loss": 2.6333, + "step": 5787 + }, + { + "epoch": 0.46711322734242594, + "grad_norm": 0.7222917675971985, + "learning_rate": 0.00016232076425605835, + "loss": 2.5865, + "step": 5788 + }, + { + "epoch": 0.467193931079009, + "grad_norm": 0.7224915027618408, + "learning_rate": 0.00016230841723486792, + "loss": 2.667, + "step": 5789 + }, + { + "epoch": 0.46727463481559195, + "grad_norm": 0.7125402688980103, + "learning_rate": 0.00016229606866078887, + "loss": 2.6548, + "step": 5790 + }, + { + "epoch": 0.467355338552175, + "grad_norm": 0.6866132616996765, + "learning_rate": 0.00016228371853412894, + "loss": 2.6381, + "step": 5791 + }, + { + "epoch": 0.46743604228875796, + "grad_norm": 0.7573552131652832, + "learning_rate": 0.00016227136685519593, + "loss": 2.6766, + "step": 5792 + }, + { + "epoch": 0.467516746025341, + "grad_norm": 0.7565932273864746, + "learning_rate": 0.00016225901362429767, + "loss": 2.5965, + "step": 5793 + }, + { + "epoch": 0.46759744976192397, + "grad_norm": 0.7279250621795654, + "learning_rate": 0.00016224665884174207, + "loss": 2.6599, + "step": 5794 + }, + { + "epoch": 0.467678153498507, + "grad_norm": 0.7501276731491089, + "learning_rate": 0.000162234302507837, + "loss": 2.636, + "step": 5795 + }, + { + "epoch": 0.46775885723509, + "grad_norm": 0.7823930978775024, + "learning_rate": 0.00016222194462289042, + "loss": 2.6277, + "step": 5796 + }, + { + "epoch": 0.467839560971673, + "grad_norm": 0.7168415784835815, + "learning_rate": 0.00016220958518721034, + "loss": 2.6868, + "step": 5797 + }, + { + "epoch": 0.467920264708256, + "grad_norm": 0.7468454241752625, + "learning_rate": 0.00016219722420110478, + "loss": 2.7209, + "step": 5798 + }, + { + "epoch": 0.468000968444839, + "grad_norm": 0.6915228962898254, + "learning_rate": 0.0001621848616648818, + "loss": 2.6356, + "step": 5799 + }, + { + "epoch": 0.468081672181422, + "grad_norm": 0.7731573581695557, + "learning_rate": 0.00016217249757884955, + "loss": 2.6396, + "step": 5800 + }, + { + "epoch": 0.468162375918005, + "grad_norm": 0.6579388380050659, + "learning_rate": 0.0001621601319433161, + "loss": 2.6077, + "step": 5801 + }, + { + "epoch": 0.468243079654588, + "grad_norm": 0.7136246562004089, + "learning_rate": 0.00016214776475858967, + "loss": 2.6602, + "step": 5802 + }, + { + "epoch": 0.46832378339117103, + "grad_norm": 0.6929461359977722, + "learning_rate": 0.0001621353960249785, + "loss": 2.6851, + "step": 5803 + }, + { + "epoch": 0.468404487127754, + "grad_norm": 0.8001779913902283, + "learning_rate": 0.00016212302574279087, + "loss": 2.6577, + "step": 5804 + }, + { + "epoch": 0.46848519086433704, + "grad_norm": 0.7637671828269958, + "learning_rate": 0.00016211065391233498, + "loss": 2.6923, + "step": 5805 + }, + { + "epoch": 0.46856589460092, + "grad_norm": 0.6879906058311462, + "learning_rate": 0.0001620982805339193, + "loss": 2.6555, + "step": 5806 + }, + { + "epoch": 0.46864659833750305, + "grad_norm": 0.7731223702430725, + "learning_rate": 0.0001620859056078521, + "loss": 2.6301, + "step": 5807 + }, + { + "epoch": 0.468727302074086, + "grad_norm": 0.7351491451263428, + "learning_rate": 0.00016207352913444185, + "loss": 2.6154, + "step": 5808 + }, + { + "epoch": 0.46880800581066906, + "grad_norm": 0.716314435005188, + "learning_rate": 0.000162061151113997, + "loss": 2.6294, + "step": 5809 + }, + { + "epoch": 0.46888870954725204, + "grad_norm": 0.6974702477455139, + "learning_rate": 0.00016204877154682605, + "loss": 2.6046, + "step": 5810 + }, + { + "epoch": 0.46896941328383507, + "grad_norm": 0.7456035614013672, + "learning_rate": 0.00016203639043323745, + "loss": 2.6308, + "step": 5811 + }, + { + "epoch": 0.46905011702041804, + "grad_norm": 0.7198047637939453, + "learning_rate": 0.0001620240077735399, + "loss": 2.6303, + "step": 5812 + }, + { + "epoch": 0.4691308207570011, + "grad_norm": 0.7098269462585449, + "learning_rate": 0.00016201162356804192, + "loss": 2.6352, + "step": 5813 + }, + { + "epoch": 0.46921152449358405, + "grad_norm": 0.7060410976409912, + "learning_rate": 0.0001619992378170522, + "loss": 2.6489, + "step": 5814 + }, + { + "epoch": 0.46929222823016703, + "grad_norm": 0.7126092314720154, + "learning_rate": 0.0001619868505208794, + "loss": 2.66, + "step": 5815 + }, + { + "epoch": 0.46937293196675006, + "grad_norm": 0.7391123175621033, + "learning_rate": 0.00016197446167983223, + "loss": 2.6066, + "step": 5816 + }, + { + "epoch": 0.46945363570333304, + "grad_norm": 0.7282211780548096, + "learning_rate": 0.0001619620712942195, + "loss": 2.6422, + "step": 5817 + }, + { + "epoch": 0.46953433943991607, + "grad_norm": 0.7581801414489746, + "learning_rate": 0.00016194967936434998, + "loss": 2.702, + "step": 5818 + }, + { + "epoch": 0.46961504317649905, + "grad_norm": 0.6649011373519897, + "learning_rate": 0.00016193728589053248, + "loss": 2.6235, + "step": 5819 + }, + { + "epoch": 0.4696957469130821, + "grad_norm": 0.720312237739563, + "learning_rate": 0.00016192489087307592, + "loss": 2.5961, + "step": 5820 + }, + { + "epoch": 0.46977645064966506, + "grad_norm": 0.72076016664505, + "learning_rate": 0.0001619124943122892, + "loss": 2.6793, + "step": 5821 + }, + { + "epoch": 0.4698571543862481, + "grad_norm": 0.6695740818977356, + "learning_rate": 0.0001619000962084813, + "loss": 2.6325, + "step": 5822 + }, + { + "epoch": 0.46993785812283106, + "grad_norm": 0.7678804993629456, + "learning_rate": 0.0001618876965619612, + "loss": 2.7473, + "step": 5823 + }, + { + "epoch": 0.4700185618594141, + "grad_norm": 0.782349169254303, + "learning_rate": 0.00016187529537303792, + "loss": 2.6139, + "step": 5824 + }, + { + "epoch": 0.4700992655959971, + "grad_norm": 0.6906631588935852, + "learning_rate": 0.00016186289264202052, + "loss": 2.6529, + "step": 5825 + }, + { + "epoch": 0.4701799693325801, + "grad_norm": 0.732947051525116, + "learning_rate": 0.00016185048836921814, + "loss": 2.6416, + "step": 5826 + }, + { + "epoch": 0.4702606730691631, + "grad_norm": 0.8306718468666077, + "learning_rate": 0.0001618380825549399, + "loss": 2.6566, + "step": 5827 + }, + { + "epoch": 0.4703413768057461, + "grad_norm": 0.725764811038971, + "learning_rate": 0.00016182567519949502, + "loss": 2.6664, + "step": 5828 + }, + { + "epoch": 0.4704220805423291, + "grad_norm": 0.7301872372627258, + "learning_rate": 0.00016181326630319268, + "loss": 2.6666, + "step": 5829 + }, + { + "epoch": 0.4705027842789121, + "grad_norm": 0.7297122478485107, + "learning_rate": 0.00016180085586634216, + "loss": 2.6415, + "step": 5830 + }, + { + "epoch": 0.4705834880154951, + "grad_norm": 0.7445664405822754, + "learning_rate": 0.00016178844388925278, + "loss": 2.6112, + "step": 5831 + }, + { + "epoch": 0.47066419175207813, + "grad_norm": 0.7787267565727234, + "learning_rate": 0.00016177603037223384, + "loss": 2.6452, + "step": 5832 + }, + { + "epoch": 0.4707448954886611, + "grad_norm": 0.7386903762817383, + "learning_rate": 0.00016176361531559474, + "loss": 2.6919, + "step": 5833 + }, + { + "epoch": 0.47082559922524414, + "grad_norm": 0.7991776466369629, + "learning_rate": 0.0001617511987196449, + "loss": 2.6728, + "step": 5834 + }, + { + "epoch": 0.4709063029618271, + "grad_norm": 0.7196263670921326, + "learning_rate": 0.00016173878058469375, + "loss": 2.6008, + "step": 5835 + }, + { + "epoch": 0.47098700669841015, + "grad_norm": 0.6773477792739868, + "learning_rate": 0.00016172636091105086, + "loss": 2.6184, + "step": 5836 + }, + { + "epoch": 0.4710677104349931, + "grad_norm": 0.7238345742225647, + "learning_rate": 0.00016171393969902567, + "loss": 2.6221, + "step": 5837 + }, + { + "epoch": 0.47114841417157616, + "grad_norm": 0.702104926109314, + "learning_rate": 0.00016170151694892777, + "loss": 2.5909, + "step": 5838 + }, + { + "epoch": 0.47122911790815913, + "grad_norm": 0.7571590542793274, + "learning_rate": 0.00016168909266106677, + "loss": 2.6044, + "step": 5839 + }, + { + "epoch": 0.47130982164474217, + "grad_norm": 0.7408227324485779, + "learning_rate": 0.00016167666683575234, + "loss": 2.5771, + "step": 5840 + }, + { + "epoch": 0.47139052538132514, + "grad_norm": 0.6760764122009277, + "learning_rate": 0.00016166423947329414, + "loss": 2.6202, + "step": 5841 + }, + { + "epoch": 0.4714712291179082, + "grad_norm": 0.7085632681846619, + "learning_rate": 0.00016165181057400192, + "loss": 2.5887, + "step": 5842 + }, + { + "epoch": 0.47155193285449115, + "grad_norm": 0.7298943400382996, + "learning_rate": 0.00016163938013818538, + "loss": 2.609, + "step": 5843 + }, + { + "epoch": 0.4716326365910742, + "grad_norm": 0.7591157555580139, + "learning_rate": 0.0001616269481661544, + "loss": 2.6582, + "step": 5844 + }, + { + "epoch": 0.47171334032765716, + "grad_norm": 0.6727088093757629, + "learning_rate": 0.00016161451465821877, + "loss": 2.6289, + "step": 5845 + }, + { + "epoch": 0.4717940440642402, + "grad_norm": 0.6782706379890442, + "learning_rate": 0.00016160207961468835, + "loss": 2.6875, + "step": 5846 + }, + { + "epoch": 0.47187474780082317, + "grad_norm": 0.6839444041252136, + "learning_rate": 0.00016158964303587313, + "loss": 2.5687, + "step": 5847 + }, + { + "epoch": 0.4719554515374062, + "grad_norm": 0.7565997838973999, + "learning_rate": 0.00016157720492208295, + "loss": 2.6855, + "step": 5848 + }, + { + "epoch": 0.4720361552739892, + "grad_norm": 0.7286611199378967, + "learning_rate": 0.0001615647652736279, + "loss": 2.5906, + "step": 5849 + }, + { + "epoch": 0.4721168590105722, + "grad_norm": 0.7503396272659302, + "learning_rate": 0.00016155232409081793, + "loss": 2.6419, + "step": 5850 + }, + { + "epoch": 0.4721975627471552, + "grad_norm": 0.6924198865890503, + "learning_rate": 0.00016153988137396317, + "loss": 2.661, + "step": 5851 + }, + { + "epoch": 0.4722782664837382, + "grad_norm": 0.7731672525405884, + "learning_rate": 0.0001615274371233737, + "loss": 2.6993, + "step": 5852 + }, + { + "epoch": 0.4723589702203212, + "grad_norm": 0.7422799468040466, + "learning_rate": 0.00016151499133935964, + "loss": 2.6134, + "step": 5853 + }, + { + "epoch": 0.4724396739569042, + "grad_norm": 0.6924546957015991, + "learning_rate": 0.0001615025440222312, + "loss": 2.672, + "step": 5854 + }, + { + "epoch": 0.4725203776934872, + "grad_norm": 0.7205976843833923, + "learning_rate": 0.00016149009517229862, + "loss": 2.6722, + "step": 5855 + }, + { + "epoch": 0.47260108143007024, + "grad_norm": 0.6898519992828369, + "learning_rate": 0.0001614776447898721, + "loss": 2.6474, + "step": 5856 + }, + { + "epoch": 0.4726817851666532, + "grad_norm": 0.7512481212615967, + "learning_rate": 0.00016146519287526197, + "loss": 2.7413, + "step": 5857 + }, + { + "epoch": 0.47276248890323624, + "grad_norm": 0.6734220385551453, + "learning_rate": 0.0001614527394287786, + "loss": 2.6114, + "step": 5858 + }, + { + "epoch": 0.4728431926398192, + "grad_norm": 0.6745339632034302, + "learning_rate": 0.00016144028445073228, + "loss": 2.6039, + "step": 5859 + }, + { + "epoch": 0.47292389637640225, + "grad_norm": 0.7463086843490601, + "learning_rate": 0.0001614278279414335, + "loss": 2.6109, + "step": 5860 + }, + { + "epoch": 0.47300460011298523, + "grad_norm": 0.7203261256217957, + "learning_rate": 0.00016141536990119264, + "loss": 2.651, + "step": 5861 + }, + { + "epoch": 0.47308530384956826, + "grad_norm": 0.7718746066093445, + "learning_rate": 0.00016140291033032024, + "loss": 2.6953, + "step": 5862 + }, + { + "epoch": 0.47316600758615124, + "grad_norm": 0.7854858040809631, + "learning_rate": 0.0001613904492291268, + "loss": 2.5941, + "step": 5863 + }, + { + "epoch": 0.47324671132273427, + "grad_norm": 0.7218664288520813, + "learning_rate": 0.0001613779865979229, + "loss": 2.6447, + "step": 5864 + }, + { + "epoch": 0.47332741505931725, + "grad_norm": 0.7479045987129211, + "learning_rate": 0.0001613655224370191, + "loss": 2.6662, + "step": 5865 + }, + { + "epoch": 0.4734081187959002, + "grad_norm": 0.7335021495819092, + "learning_rate": 0.00016135305674672612, + "loss": 2.6283, + "step": 5866 + }, + { + "epoch": 0.47348882253248326, + "grad_norm": 0.7650331258773804, + "learning_rate": 0.00016134058952735453, + "loss": 2.7168, + "step": 5867 + }, + { + "epoch": 0.47356952626906623, + "grad_norm": 0.733383297920227, + "learning_rate": 0.00016132812077921513, + "loss": 2.6352, + "step": 5868 + }, + { + "epoch": 0.47365023000564926, + "grad_norm": 1.3944146633148193, + "learning_rate": 0.00016131565050261866, + "loss": 2.7518, + "step": 5869 + }, + { + "epoch": 0.47373093374223224, + "grad_norm": 0.746112585067749, + "learning_rate": 0.0001613031786978759, + "loss": 2.6253, + "step": 5870 + }, + { + "epoch": 0.4738116374788153, + "grad_norm": 0.9859737753868103, + "learning_rate": 0.00016129070536529766, + "loss": 2.6682, + "step": 5871 + }, + { + "epoch": 0.47389234121539825, + "grad_norm": 0.7358877062797546, + "learning_rate": 0.00016127823050519484, + "loss": 2.6712, + "step": 5872 + }, + { + "epoch": 0.4739730449519813, + "grad_norm": 0.7379923462867737, + "learning_rate": 0.0001612657541178783, + "loss": 2.6268, + "step": 5873 + }, + { + "epoch": 0.47405374868856426, + "grad_norm": 0.7671005725860596, + "learning_rate": 0.00016125327620365907, + "loss": 2.6127, + "step": 5874 + }, + { + "epoch": 0.4741344524251473, + "grad_norm": 0.8007156252861023, + "learning_rate": 0.00016124079676284805, + "loss": 2.6173, + "step": 5875 + }, + { + "epoch": 0.47421515616173027, + "grad_norm": 0.7930500507354736, + "learning_rate": 0.00016122831579575627, + "loss": 2.589, + "step": 5876 + }, + { + "epoch": 0.4742958598983133, + "grad_norm": 0.788006603717804, + "learning_rate": 0.00016121583330269484, + "loss": 2.6731, + "step": 5877 + }, + { + "epoch": 0.4743765636348963, + "grad_norm": 0.742148220539093, + "learning_rate": 0.00016120334928397483, + "loss": 2.674, + "step": 5878 + }, + { + "epoch": 0.4744572673714793, + "grad_norm": 0.6823038458824158, + "learning_rate": 0.00016119086373990736, + "loss": 2.6153, + "step": 5879 + }, + { + "epoch": 0.4745379711080623, + "grad_norm": 0.7542331218719482, + "learning_rate": 0.00016117837667080356, + "loss": 2.6739, + "step": 5880 + }, + { + "epoch": 0.4746186748446453, + "grad_norm": 0.8163543343544006, + "learning_rate": 0.00016116588807697476, + "loss": 2.6558, + "step": 5881 + }, + { + "epoch": 0.4746993785812283, + "grad_norm": 0.7528213858604431, + "learning_rate": 0.0001611533979587321, + "loss": 2.6243, + "step": 5882 + }, + { + "epoch": 0.4747800823178113, + "grad_norm": 0.7476626038551331, + "learning_rate": 0.00016114090631638695, + "loss": 2.5984, + "step": 5883 + }, + { + "epoch": 0.4748607860543943, + "grad_norm": 0.7436621785163879, + "learning_rate": 0.00016112841315025055, + "loss": 2.6118, + "step": 5884 + }, + { + "epoch": 0.47494148979097733, + "grad_norm": 0.8024004101753235, + "learning_rate": 0.0001611159184606343, + "loss": 2.6926, + "step": 5885 + }, + { + "epoch": 0.4750221935275603, + "grad_norm": 0.7475626468658447, + "learning_rate": 0.00016110342224784962, + "loss": 2.6175, + "step": 5886 + }, + { + "epoch": 0.47510289726414334, + "grad_norm": 0.7900637984275818, + "learning_rate": 0.00016109092451220796, + "loss": 2.6503, + "step": 5887 + }, + { + "epoch": 0.4751836010007263, + "grad_norm": 0.6988356113433838, + "learning_rate": 0.00016107842525402074, + "loss": 2.6494, + "step": 5888 + }, + { + "epoch": 0.47526430473730935, + "grad_norm": 1.0214186906814575, + "learning_rate": 0.00016106592447359948, + "loss": 2.6476, + "step": 5889 + }, + { + "epoch": 0.4753450084738923, + "grad_norm": 0.741527795791626, + "learning_rate": 0.00016105342217125578, + "loss": 2.6054, + "step": 5890 + }, + { + "epoch": 0.47542571221047536, + "grad_norm": 0.7196603417396545, + "learning_rate": 0.0001610409183473012, + "loss": 2.6146, + "step": 5891 + }, + { + "epoch": 0.47550641594705834, + "grad_norm": 0.8130923509597778, + "learning_rate": 0.00016102841300204737, + "loss": 2.6505, + "step": 5892 + }, + { + "epoch": 0.47558711968364137, + "grad_norm": 0.7929537892341614, + "learning_rate": 0.00016101590613580596, + "loss": 2.6725, + "step": 5893 + }, + { + "epoch": 0.47566782342022434, + "grad_norm": 0.7149303555488586, + "learning_rate": 0.00016100339774888865, + "loss": 2.6272, + "step": 5894 + }, + { + "epoch": 0.4757485271568074, + "grad_norm": 0.7242792248725891, + "learning_rate": 0.00016099088784160724, + "loss": 2.5948, + "step": 5895 + }, + { + "epoch": 0.47582923089339035, + "grad_norm": 0.7571540474891663, + "learning_rate": 0.00016097837641427346, + "loss": 2.689, + "step": 5896 + }, + { + "epoch": 0.4759099346299734, + "grad_norm": 0.7402021288871765, + "learning_rate": 0.00016096586346719916, + "loss": 2.7035, + "step": 5897 + }, + { + "epoch": 0.47599063836655636, + "grad_norm": 0.7195574045181274, + "learning_rate": 0.00016095334900069613, + "loss": 2.5862, + "step": 5898 + }, + { + "epoch": 0.4760713421031394, + "grad_norm": 0.7677412033081055, + "learning_rate": 0.00016094083301507634, + "loss": 2.6715, + "step": 5899 + }, + { + "epoch": 0.47615204583972237, + "grad_norm": 0.7131708860397339, + "learning_rate": 0.0001609283155106517, + "loss": 2.6555, + "step": 5900 + }, + { + "epoch": 0.4762327495763054, + "grad_norm": 0.6774055361747742, + "learning_rate": 0.00016091579648773414, + "loss": 2.621, + "step": 5901 + }, + { + "epoch": 0.4763134533128884, + "grad_norm": 0.6873257160186768, + "learning_rate": 0.00016090327594663571, + "loss": 2.6719, + "step": 5902 + }, + { + "epoch": 0.4763941570494714, + "grad_norm": 0.8004229068756104, + "learning_rate": 0.00016089075388766845, + "loss": 2.6926, + "step": 5903 + }, + { + "epoch": 0.4764748607860544, + "grad_norm": 0.7196173667907715, + "learning_rate": 0.00016087823031114438, + "loss": 2.6032, + "step": 5904 + }, + { + "epoch": 0.4765555645226374, + "grad_norm": 0.7665518522262573, + "learning_rate": 0.00016086570521737573, + "loss": 2.6359, + "step": 5905 + }, + { + "epoch": 0.4766362682592204, + "grad_norm": 0.7240240573883057, + "learning_rate": 0.0001608531786066746, + "loss": 2.6489, + "step": 5906 + }, + { + "epoch": 0.47671697199580343, + "grad_norm": 0.7603839039802551, + "learning_rate": 0.00016084065047935317, + "loss": 2.6064, + "step": 5907 + }, + { + "epoch": 0.4767976757323864, + "grad_norm": 0.7394058704376221, + "learning_rate": 0.0001608281208357237, + "loss": 2.6643, + "step": 5908 + }, + { + "epoch": 0.47687837946896944, + "grad_norm": 0.7183148860931396, + "learning_rate": 0.00016081558967609845, + "loss": 2.56, + "step": 5909 + }, + { + "epoch": 0.4769590832055524, + "grad_norm": 0.7181926965713501, + "learning_rate": 0.00016080305700078972, + "loss": 2.6665, + "step": 5910 + }, + { + "epoch": 0.47703978694213545, + "grad_norm": 0.7634081840515137, + "learning_rate": 0.00016079052281010988, + "loss": 2.7076, + "step": 5911 + }, + { + "epoch": 0.4771204906787184, + "grad_norm": 0.7928739190101624, + "learning_rate": 0.0001607779871043713, + "loss": 2.6512, + "step": 5912 + }, + { + "epoch": 0.47720119441530146, + "grad_norm": 0.7192893028259277, + "learning_rate": 0.00016076544988388643, + "loss": 2.6453, + "step": 5913 + }, + { + "epoch": 0.47728189815188443, + "grad_norm": 0.7171720862388611, + "learning_rate": 0.00016075291114896767, + "loss": 2.6501, + "step": 5914 + }, + { + "epoch": 0.47736260188846746, + "grad_norm": 0.6787160038948059, + "learning_rate": 0.00016074037089992756, + "loss": 2.6566, + "step": 5915 + }, + { + "epoch": 0.47744330562505044, + "grad_norm": 0.8118634819984436, + "learning_rate": 0.00016072782913707868, + "loss": 2.6635, + "step": 5916 + }, + { + "epoch": 0.4775240093616334, + "grad_norm": 0.7188509702682495, + "learning_rate": 0.0001607152858607335, + "loss": 2.6899, + "step": 5917 + }, + { + "epoch": 0.47760471309821645, + "grad_norm": 0.6742647290229797, + "learning_rate": 0.00016070274107120468, + "loss": 2.6221, + "step": 5918 + }, + { + "epoch": 0.4776854168347994, + "grad_norm": 0.7274083495140076, + "learning_rate": 0.00016069019476880488, + "loss": 2.6588, + "step": 5919 + }, + { + "epoch": 0.47776612057138246, + "grad_norm": 0.6984386444091797, + "learning_rate": 0.00016067764695384682, + "loss": 2.6376, + "step": 5920 + }, + { + "epoch": 0.47784682430796543, + "grad_norm": 0.7260883450508118, + "learning_rate": 0.00016066509762664315, + "loss": 2.6623, + "step": 5921 + }, + { + "epoch": 0.47792752804454847, + "grad_norm": 0.7540579438209534, + "learning_rate": 0.00016065254678750666, + "loss": 2.695, + "step": 5922 + }, + { + "epoch": 0.47800823178113144, + "grad_norm": 0.7032651305198669, + "learning_rate": 0.00016063999443675017, + "loss": 2.6791, + "step": 5923 + }, + { + "epoch": 0.4780889355177145, + "grad_norm": 0.682842493057251, + "learning_rate": 0.0001606274405746865, + "loss": 2.6198, + "step": 5924 + }, + { + "epoch": 0.47816963925429745, + "grad_norm": 0.6843859553337097, + "learning_rate": 0.00016061488520162853, + "loss": 2.6432, + "step": 5925 + }, + { + "epoch": 0.4782503429908805, + "grad_norm": 0.652119517326355, + "learning_rate": 0.00016060232831788918, + "loss": 2.6461, + "step": 5926 + }, + { + "epoch": 0.47833104672746346, + "grad_norm": 0.6986887454986572, + "learning_rate": 0.0001605897699237814, + "loss": 2.5885, + "step": 5927 + }, + { + "epoch": 0.4784117504640465, + "grad_norm": 0.7156725525856018, + "learning_rate": 0.00016057721001961817, + "loss": 2.6526, + "step": 5928 + }, + { + "epoch": 0.47849245420062947, + "grad_norm": 0.7367579936981201, + "learning_rate": 0.0001605646486057125, + "loss": 2.5842, + "step": 5929 + }, + { + "epoch": 0.4785731579372125, + "grad_norm": 0.7059770822525024, + "learning_rate": 0.00016055208568237746, + "loss": 2.617, + "step": 5930 + }, + { + "epoch": 0.4786538616737955, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016053952124992619, + "loss": 2.6499, + "step": 5931 + }, + { + "epoch": 0.4787345654103785, + "grad_norm": 0.7027475237846375, + "learning_rate": 0.00016052695530867177, + "loss": 2.5934, + "step": 5932 + }, + { + "epoch": 0.4788152691469615, + "grad_norm": 0.7031852602958679, + "learning_rate": 0.00016051438785892743, + "loss": 2.5947, + "step": 5933 + }, + { + "epoch": 0.4788959728835445, + "grad_norm": 0.6731768846511841, + "learning_rate": 0.00016050181890100635, + "loss": 2.6811, + "step": 5934 + }, + { + "epoch": 0.4789766766201275, + "grad_norm": 0.7120038866996765, + "learning_rate": 0.0001604892484352218, + "loss": 2.6625, + "step": 5935 + }, + { + "epoch": 0.4790573803567105, + "grad_norm": 0.6895150542259216, + "learning_rate": 0.00016047667646188702, + "loss": 2.6784, + "step": 5936 + }, + { + "epoch": 0.4791380840932935, + "grad_norm": 0.7080708742141724, + "learning_rate": 0.0001604641029813154, + "loss": 2.6491, + "step": 5937 + }, + { + "epoch": 0.47921878782987654, + "grad_norm": 0.6522819399833679, + "learning_rate": 0.00016045152799382025, + "loss": 2.6113, + "step": 5938 + }, + { + "epoch": 0.4792994915664595, + "grad_norm": 0.6988112926483154, + "learning_rate": 0.00016043895149971506, + "loss": 2.6892, + "step": 5939 + }, + { + "epoch": 0.47938019530304254, + "grad_norm": 0.7545368671417236, + "learning_rate": 0.00016042637349931318, + "loss": 2.6872, + "step": 5940 + }, + { + "epoch": 0.4794608990396255, + "grad_norm": 0.7083707451820374, + "learning_rate": 0.0001604137939929281, + "loss": 2.6726, + "step": 5941 + }, + { + "epoch": 0.47954160277620855, + "grad_norm": 0.8198027014732361, + "learning_rate": 0.00016040121298087337, + "loss": 2.647, + "step": 5942 + }, + { + "epoch": 0.47962230651279153, + "grad_norm": 0.7296201586723328, + "learning_rate": 0.00016038863046346252, + "loss": 2.7122, + "step": 5943 + }, + { + "epoch": 0.47970301024937456, + "grad_norm": 0.7262474298477173, + "learning_rate": 0.00016037604644100913, + "loss": 2.6903, + "step": 5944 + }, + { + "epoch": 0.47978371398595754, + "grad_norm": 0.8010182976722717, + "learning_rate": 0.00016036346091382686, + "loss": 2.6942, + "step": 5945 + }, + { + "epoch": 0.47986441772254057, + "grad_norm": 0.7227098345756531, + "learning_rate": 0.00016035087388222932, + "loss": 2.6661, + "step": 5946 + }, + { + "epoch": 0.47994512145912355, + "grad_norm": 0.7374662756919861, + "learning_rate": 0.00016033828534653028, + "loss": 2.6233, + "step": 5947 + }, + { + "epoch": 0.4800258251957066, + "grad_norm": 0.7139650583267212, + "learning_rate": 0.00016032569530704342, + "loss": 2.5859, + "step": 5948 + }, + { + "epoch": 0.48010652893228956, + "grad_norm": 0.7067660689353943, + "learning_rate": 0.00016031310376408254, + "loss": 2.6677, + "step": 5949 + }, + { + "epoch": 0.4801872326688726, + "grad_norm": 0.694715142250061, + "learning_rate": 0.00016030051071796146, + "loss": 2.6415, + "step": 5950 + }, + { + "epoch": 0.48026793640545556, + "grad_norm": 0.728918194770813, + "learning_rate": 0.00016028791616899403, + "loss": 2.6274, + "step": 5951 + }, + { + "epoch": 0.4803486401420386, + "grad_norm": 0.699846088886261, + "learning_rate": 0.00016027532011749412, + "loss": 2.6613, + "step": 5952 + }, + { + "epoch": 0.4804293438786216, + "grad_norm": 0.7177432179450989, + "learning_rate": 0.0001602627225637757, + "loss": 2.6107, + "step": 5953 + }, + { + "epoch": 0.4805100476152046, + "grad_norm": 0.7502370476722717, + "learning_rate": 0.00016025012350815267, + "loss": 2.6534, + "step": 5954 + }, + { + "epoch": 0.4805907513517876, + "grad_norm": 0.7730218172073364, + "learning_rate": 0.0001602375229509391, + "loss": 2.7037, + "step": 5955 + }, + { + "epoch": 0.4806714550883706, + "grad_norm": 0.7046666145324707, + "learning_rate": 0.00016022492089244898, + "loss": 2.6336, + "step": 5956 + }, + { + "epoch": 0.4807521588249536, + "grad_norm": 0.7991104125976562, + "learning_rate": 0.0001602123173329964, + "loss": 2.7024, + "step": 5957 + }, + { + "epoch": 0.4808328625615366, + "grad_norm": 0.7056288123130798, + "learning_rate": 0.00016019971227289548, + "loss": 2.6088, + "step": 5958 + }, + { + "epoch": 0.4809135662981196, + "grad_norm": 0.7277925610542297, + "learning_rate": 0.00016018710571246038, + "loss": 2.6245, + "step": 5959 + }, + { + "epoch": 0.48099427003470263, + "grad_norm": 0.7545790672302246, + "learning_rate": 0.00016017449765200526, + "loss": 2.6076, + "step": 5960 + }, + { + "epoch": 0.4810749737712856, + "grad_norm": 0.7106321454048157, + "learning_rate": 0.00016016188809184434, + "loss": 2.5561, + "step": 5961 + }, + { + "epoch": 0.48115567750786864, + "grad_norm": 0.7464704513549805, + "learning_rate": 0.0001601492770322919, + "loss": 2.6336, + "step": 5962 + }, + { + "epoch": 0.4812363812444516, + "grad_norm": 0.7531768083572388, + "learning_rate": 0.00016013666447366228, + "loss": 2.6236, + "step": 5963 + }, + { + "epoch": 0.48131708498103465, + "grad_norm": 0.7412876486778259, + "learning_rate": 0.00016012405041626978, + "loss": 2.6309, + "step": 5964 + }, + { + "epoch": 0.4813977887176176, + "grad_norm": 0.7030940055847168, + "learning_rate": 0.00016011143486042878, + "loss": 2.6252, + "step": 5965 + }, + { + "epoch": 0.48147849245420066, + "grad_norm": 0.7932302951812744, + "learning_rate": 0.00016009881780645367, + "loss": 2.6797, + "step": 5966 + }, + { + "epoch": 0.48155919619078363, + "grad_norm": 0.7366262078285217, + "learning_rate": 0.00016008619925465893, + "loss": 2.6616, + "step": 5967 + }, + { + "epoch": 0.4816398999273666, + "grad_norm": 0.6938421130180359, + "learning_rate": 0.00016007357920535902, + "loss": 2.6888, + "step": 5968 + }, + { + "epoch": 0.48172060366394964, + "grad_norm": 0.7560005784034729, + "learning_rate": 0.00016006095765886853, + "loss": 2.6044, + "step": 5969 + }, + { + "epoch": 0.4818013074005326, + "grad_norm": 0.7330430150032043, + "learning_rate": 0.0001600483346155019, + "loss": 2.7023, + "step": 5970 + }, + { + "epoch": 0.48188201113711565, + "grad_norm": 0.7257955074310303, + "learning_rate": 0.00016003571007557388, + "loss": 2.6763, + "step": 5971 + }, + { + "epoch": 0.4819627148736986, + "grad_norm": 0.704187273979187, + "learning_rate": 0.000160023084039399, + "loss": 2.6229, + "step": 5972 + }, + { + "epoch": 0.48204341861028166, + "grad_norm": 0.7014813423156738, + "learning_rate": 0.00016001045650729196, + "loss": 2.6207, + "step": 5973 + }, + { + "epoch": 0.48212412234686464, + "grad_norm": 0.8039405941963196, + "learning_rate": 0.00015999782747956747, + "loss": 2.6198, + "step": 5974 + }, + { + "epoch": 0.48220482608344767, + "grad_norm": 0.7114945650100708, + "learning_rate": 0.0001599851969565403, + "loss": 2.6154, + "step": 5975 + }, + { + "epoch": 0.48228552982003065, + "grad_norm": 0.7603329420089722, + "learning_rate": 0.00015997256493852517, + "loss": 2.6217, + "step": 5976 + }, + { + "epoch": 0.4823662335566137, + "grad_norm": 0.7773346900939941, + "learning_rate": 0.000159959931425837, + "loss": 2.7054, + "step": 5977 + }, + { + "epoch": 0.48244693729319665, + "grad_norm": 0.8022029399871826, + "learning_rate": 0.0001599472964187906, + "loss": 2.6844, + "step": 5978 + }, + { + "epoch": 0.4825276410297797, + "grad_norm": 0.7384541630744934, + "learning_rate": 0.00015993465991770087, + "loss": 2.6516, + "step": 5979 + }, + { + "epoch": 0.48260834476636266, + "grad_norm": 0.6993509531021118, + "learning_rate": 0.00015992202192288273, + "loss": 2.6837, + "step": 5980 + }, + { + "epoch": 0.4826890485029457, + "grad_norm": 0.7430509328842163, + "learning_rate": 0.00015990938243465116, + "loss": 2.6717, + "step": 5981 + }, + { + "epoch": 0.48276975223952867, + "grad_norm": 0.7544847726821899, + "learning_rate": 0.0001598967414533212, + "loss": 2.6573, + "step": 5982 + }, + { + "epoch": 0.4828504559761117, + "grad_norm": 0.736955463886261, + "learning_rate": 0.00015988409897920786, + "loss": 2.6865, + "step": 5983 + }, + { + "epoch": 0.4829311597126947, + "grad_norm": 0.7771684527397156, + "learning_rate": 0.00015987145501262622, + "loss": 2.6173, + "step": 5984 + }, + { + "epoch": 0.4830118634492777, + "grad_norm": 0.7504391670227051, + "learning_rate": 0.00015985880955389143, + "loss": 2.6218, + "step": 5985 + }, + { + "epoch": 0.4830925671858607, + "grad_norm": 0.7025442123413086, + "learning_rate": 0.00015984616260331861, + "loss": 2.6107, + "step": 5986 + }, + { + "epoch": 0.4831732709224437, + "grad_norm": 0.6906485557556152, + "learning_rate": 0.000159833514161223, + "loss": 2.633, + "step": 5987 + }, + { + "epoch": 0.4832539746590267, + "grad_norm": 0.7771004438400269, + "learning_rate": 0.00015982086422791983, + "loss": 2.5956, + "step": 5988 + }, + { + "epoch": 0.48333467839560973, + "grad_norm": 0.6927372813224792, + "learning_rate": 0.00015980821280372432, + "loss": 2.5984, + "step": 5989 + }, + { + "epoch": 0.4834153821321927, + "grad_norm": 0.7196357846260071, + "learning_rate": 0.00015979555988895184, + "loss": 2.6386, + "step": 5990 + }, + { + "epoch": 0.48349608586877574, + "grad_norm": 0.7601087689399719, + "learning_rate": 0.0001597829054839177, + "loss": 2.6707, + "step": 5991 + }, + { + "epoch": 0.4835767896053587, + "grad_norm": 0.7783588767051697, + "learning_rate": 0.00015977024958893722, + "loss": 2.5815, + "step": 5992 + }, + { + "epoch": 0.48365749334194175, + "grad_norm": 0.7651833891868591, + "learning_rate": 0.00015975759220432592, + "loss": 2.6235, + "step": 5993 + }, + { + "epoch": 0.4837381970785247, + "grad_norm": 0.7158511877059937, + "learning_rate": 0.0001597449333303992, + "loss": 2.6813, + "step": 5994 + }, + { + "epoch": 0.48381890081510776, + "grad_norm": 0.7411341667175293, + "learning_rate": 0.0001597322729674726, + "loss": 2.7231, + "step": 5995 + }, + { + "epoch": 0.48389960455169073, + "grad_norm": 0.7168158292770386, + "learning_rate": 0.0001597196111158616, + "loss": 2.6408, + "step": 5996 + }, + { + "epoch": 0.48398030828827376, + "grad_norm": 0.7603393793106079, + "learning_rate": 0.00015970694777588175, + "loss": 2.7821, + "step": 5997 + }, + { + "epoch": 0.48406101202485674, + "grad_norm": 0.7298564910888672, + "learning_rate": 0.0001596942829478487, + "loss": 2.6828, + "step": 5998 + }, + { + "epoch": 0.4841417157614398, + "grad_norm": 0.7850572466850281, + "learning_rate": 0.0001596816166320781, + "loss": 2.6191, + "step": 5999 + }, + { + "epoch": 0.48422241949802275, + "grad_norm": 0.7697601914405823, + "learning_rate": 0.00015966894882888562, + "loss": 2.6768, + "step": 6000 + }, + { + "epoch": 0.48422241949802275, + "eval_loss": 2.5610127449035645, + "eval_runtime": 760.0481, + "eval_samples_per_second": 3.447, + "eval_steps_per_second": 0.575, + "step": 6000 + }, + { + "epoch": 0.4843031232346058, + "grad_norm": 0.7212432026863098, + "learning_rate": 0.00015965627953858693, + "loss": 2.5967, + "step": 6001 + }, + { + "epoch": 0.48438382697118876, + "grad_norm": 0.7629631757736206, + "learning_rate": 0.0001596436087614978, + "loss": 2.7005, + "step": 6002 + }, + { + "epoch": 0.4844645307077718, + "grad_norm": 0.7154754400253296, + "learning_rate": 0.00015963093649793404, + "loss": 2.6909, + "step": 6003 + }, + { + "epoch": 0.48454523444435477, + "grad_norm": 0.7365279793739319, + "learning_rate": 0.00015961826274821147, + "loss": 2.6268, + "step": 6004 + }, + { + "epoch": 0.4846259381809378, + "grad_norm": 0.8114632964134216, + "learning_rate": 0.00015960558751264596, + "loss": 2.6647, + "step": 6005 + }, + { + "epoch": 0.4847066419175208, + "grad_norm": 0.7411556243896484, + "learning_rate": 0.00015959291079155338, + "loss": 2.6378, + "step": 6006 + }, + { + "epoch": 0.4847873456541038, + "grad_norm": 0.7137390375137329, + "learning_rate": 0.00015958023258524968, + "loss": 2.6454, + "step": 6007 + }, + { + "epoch": 0.4848680493906868, + "grad_norm": 0.7477054595947266, + "learning_rate": 0.00015956755289405088, + "loss": 2.6463, + "step": 6008 + }, + { + "epoch": 0.4849487531272698, + "grad_norm": 0.7198071479797363, + "learning_rate": 0.0001595548717182729, + "loss": 2.6537, + "step": 6009 + }, + { + "epoch": 0.4850294568638528, + "grad_norm": 0.6697781085968018, + "learning_rate": 0.00015954218905823186, + "loss": 2.7018, + "step": 6010 + }, + { + "epoch": 0.4851101606004358, + "grad_norm": 0.7577201724052429, + "learning_rate": 0.00015952950491424382, + "loss": 2.6531, + "step": 6011 + }, + { + "epoch": 0.4851908643370188, + "grad_norm": 0.6852774024009705, + "learning_rate": 0.0001595168192866249, + "loss": 2.5819, + "step": 6012 + }, + { + "epoch": 0.48527156807360183, + "grad_norm": 0.7116097807884216, + "learning_rate": 0.0001595041321756913, + "loss": 2.5691, + "step": 6013 + }, + { + "epoch": 0.4853522718101848, + "grad_norm": 0.7478477954864502, + "learning_rate": 0.00015949144358175916, + "loss": 2.6658, + "step": 6014 + }, + { + "epoch": 0.48543297554676784, + "grad_norm": 0.816969633102417, + "learning_rate": 0.0001594787535051447, + "loss": 2.6709, + "step": 6015 + }, + { + "epoch": 0.4855136792833508, + "grad_norm": 0.6953164339065552, + "learning_rate": 0.00015946606194616427, + "loss": 2.6139, + "step": 6016 + }, + { + "epoch": 0.48559438301993385, + "grad_norm": 0.6698834300041199, + "learning_rate": 0.0001594533689051341, + "loss": 2.574, + "step": 6017 + }, + { + "epoch": 0.4856750867565168, + "grad_norm": 0.7686784267425537, + "learning_rate": 0.0001594406743823706, + "loss": 2.6271, + "step": 6018 + }, + { + "epoch": 0.4857557904930998, + "grad_norm": 0.7713280916213989, + "learning_rate": 0.00015942797837819009, + "loss": 2.6682, + "step": 6019 + }, + { + "epoch": 0.48583649422968284, + "grad_norm": 0.8102596998214722, + "learning_rate": 0.00015941528089290902, + "loss": 2.6771, + "step": 6020 + }, + { + "epoch": 0.4859171979662658, + "grad_norm": 0.7140331864356995, + "learning_rate": 0.00015940258192684382, + "loss": 2.6267, + "step": 6021 + }, + { + "epoch": 0.48599790170284884, + "grad_norm": 0.7057615518569946, + "learning_rate": 0.000159389881480311, + "loss": 2.6011, + "step": 6022 + }, + { + "epoch": 0.4860786054394318, + "grad_norm": 0.7106850147247314, + "learning_rate": 0.0001593771795536271, + "loss": 2.6681, + "step": 6023 + }, + { + "epoch": 0.48615930917601485, + "grad_norm": 0.7618210315704346, + "learning_rate": 0.00015936447614710867, + "loss": 2.6545, + "step": 6024 + }, + { + "epoch": 0.48624001291259783, + "grad_norm": 0.7577608227729797, + "learning_rate": 0.00015935177126107233, + "loss": 2.6479, + "step": 6025 + }, + { + "epoch": 0.48632071664918086, + "grad_norm": 0.758745551109314, + "learning_rate": 0.00015933906489583468, + "loss": 2.7057, + "step": 6026 + }, + { + "epoch": 0.48640142038576384, + "grad_norm": 0.785906970500946, + "learning_rate": 0.00015932635705171241, + "loss": 2.7081, + "step": 6027 + }, + { + "epoch": 0.48648212412234687, + "grad_norm": 0.6744558215141296, + "learning_rate": 0.00015931364772902228, + "loss": 2.6438, + "step": 6028 + }, + { + "epoch": 0.48656282785892985, + "grad_norm": 0.7451377511024475, + "learning_rate": 0.00015930093692808099, + "loss": 2.6509, + "step": 6029 + }, + { + "epoch": 0.4866435315955129, + "grad_norm": 0.6590149402618408, + "learning_rate": 0.0001592882246492053, + "loss": 2.5683, + "step": 6030 + }, + { + "epoch": 0.48672423533209586, + "grad_norm": 0.7433840036392212, + "learning_rate": 0.0001592755108927121, + "loss": 2.6647, + "step": 6031 + }, + { + "epoch": 0.4868049390686789, + "grad_norm": 0.876806378364563, + "learning_rate": 0.00015926279565891822, + "loss": 2.6482, + "step": 6032 + }, + { + "epoch": 0.48688564280526186, + "grad_norm": 0.7495005130767822, + "learning_rate": 0.00015925007894814058, + "loss": 2.6346, + "step": 6033 + }, + { + "epoch": 0.4869663465418449, + "grad_norm": 0.7005730271339417, + "learning_rate": 0.00015923736076069604, + "loss": 2.6241, + "step": 6034 + }, + { + "epoch": 0.4870470502784279, + "grad_norm": 0.664098858833313, + "learning_rate": 0.00015922464109690166, + "loss": 2.6281, + "step": 6035 + }, + { + "epoch": 0.4871277540150109, + "grad_norm": 0.7482514977455139, + "learning_rate": 0.00015921191995707442, + "loss": 2.5764, + "step": 6036 + }, + { + "epoch": 0.4872084577515939, + "grad_norm": 0.7450351715087891, + "learning_rate": 0.0001591991973415313, + "loss": 2.6433, + "step": 6037 + }, + { + "epoch": 0.4872891614881769, + "grad_norm": 0.6738519072532654, + "learning_rate": 0.00015918647325058948, + "loss": 2.6688, + "step": 6038 + }, + { + "epoch": 0.4873698652247599, + "grad_norm": 0.7999960780143738, + "learning_rate": 0.000159173747684566, + "loss": 2.6309, + "step": 6039 + }, + { + "epoch": 0.4874505689613429, + "grad_norm": 0.7249687910079956, + "learning_rate": 0.00015916102064377806, + "loss": 2.5808, + "step": 6040 + }, + { + "epoch": 0.4875312726979259, + "grad_norm": 0.7014601826667786, + "learning_rate": 0.00015914829212854286, + "loss": 2.6646, + "step": 6041 + }, + { + "epoch": 0.48761197643450893, + "grad_norm": 0.7091174721717834, + "learning_rate": 0.00015913556213917757, + "loss": 2.6576, + "step": 6042 + }, + { + "epoch": 0.4876926801710919, + "grad_norm": 0.6949019432067871, + "learning_rate": 0.00015912283067599952, + "loss": 2.5883, + "step": 6043 + }, + { + "epoch": 0.48777338390767494, + "grad_norm": 0.6990448236465454, + "learning_rate": 0.00015911009773932598, + "loss": 2.6413, + "step": 6044 + }, + { + "epoch": 0.4878540876442579, + "grad_norm": 0.7106831073760986, + "learning_rate": 0.00015909736332947425, + "loss": 2.6122, + "step": 6045 + }, + { + "epoch": 0.48793479138084095, + "grad_norm": 0.7052395343780518, + "learning_rate": 0.00015908462744676177, + "loss": 2.572, + "step": 6046 + }, + { + "epoch": 0.4880154951174239, + "grad_norm": 0.7250158190727234, + "learning_rate": 0.00015907189009150592, + "loss": 2.6582, + "step": 6047 + }, + { + "epoch": 0.48809619885400696, + "grad_norm": 0.7213590145111084, + "learning_rate": 0.00015905915126402414, + "loss": 2.7025, + "step": 6048 + }, + { + "epoch": 0.48817690259058993, + "grad_norm": 0.7136254906654358, + "learning_rate": 0.00015904641096463394, + "loss": 2.6823, + "step": 6049 + }, + { + "epoch": 0.48825760632717297, + "grad_norm": 0.7163361310958862, + "learning_rate": 0.00015903366919365282, + "loss": 2.6642, + "step": 6050 + }, + { + "epoch": 0.48833831006375594, + "grad_norm": 0.6842724680900574, + "learning_rate": 0.00015902092595139838, + "loss": 2.6599, + "step": 6051 + }, + { + "epoch": 0.488419013800339, + "grad_norm": 0.7426519393920898, + "learning_rate": 0.0001590081812381882, + "loss": 2.6271, + "step": 6052 + }, + { + "epoch": 0.48849971753692195, + "grad_norm": 0.7415586709976196, + "learning_rate": 0.00015899543505433985, + "loss": 2.6105, + "step": 6053 + }, + { + "epoch": 0.488580421273505, + "grad_norm": 0.7286739945411682, + "learning_rate": 0.00015898268740017105, + "loss": 2.6304, + "step": 6054 + }, + { + "epoch": 0.48866112501008796, + "grad_norm": 0.6898483633995056, + "learning_rate": 0.00015896993827599947, + "loss": 2.6237, + "step": 6055 + }, + { + "epoch": 0.488741828746671, + "grad_norm": 0.7020056247711182, + "learning_rate": 0.00015895718768214293, + "loss": 2.6166, + "step": 6056 + }, + { + "epoch": 0.48882253248325397, + "grad_norm": 0.7145286798477173, + "learning_rate": 0.00015894443561891914, + "loss": 2.6729, + "step": 6057 + }, + { + "epoch": 0.488903236219837, + "grad_norm": 0.6888289451599121, + "learning_rate": 0.00015893168208664594, + "loss": 2.6154, + "step": 6058 + }, + { + "epoch": 0.48898393995642, + "grad_norm": 0.6929970383644104, + "learning_rate": 0.00015891892708564116, + "loss": 2.6748, + "step": 6059 + }, + { + "epoch": 0.489064643693003, + "grad_norm": 0.679853618144989, + "learning_rate": 0.0001589061706162227, + "loss": 2.605, + "step": 6060 + }, + { + "epoch": 0.489145347429586, + "grad_norm": 0.71812504529953, + "learning_rate": 0.0001588934126787085, + "loss": 2.7249, + "step": 6061 + }, + { + "epoch": 0.489226051166169, + "grad_norm": 0.7083466053009033, + "learning_rate": 0.00015888065327341648, + "loss": 2.5986, + "step": 6062 + }, + { + "epoch": 0.489306754902752, + "grad_norm": 0.7476792931556702, + "learning_rate": 0.00015886789240066466, + "loss": 2.5942, + "step": 6063 + }, + { + "epoch": 0.489387458639335, + "grad_norm": 0.7197855114936829, + "learning_rate": 0.00015885513006077114, + "loss": 2.6198, + "step": 6064 + }, + { + "epoch": 0.489468162375918, + "grad_norm": 0.6678233742713928, + "learning_rate": 0.00015884236625405385, + "loss": 2.5793, + "step": 6065 + }, + { + "epoch": 0.48954886611250104, + "grad_norm": 0.7371037602424622, + "learning_rate": 0.00015882960098083105, + "loss": 2.6231, + "step": 6066 + }, + { + "epoch": 0.489629569849084, + "grad_norm": 0.7087417244911194, + "learning_rate": 0.00015881683424142078, + "loss": 2.6483, + "step": 6067 + }, + { + "epoch": 0.48971027358566704, + "grad_norm": 0.7300292253494263, + "learning_rate": 0.00015880406603614126, + "loss": 2.6778, + "step": 6068 + }, + { + "epoch": 0.48979097732225, + "grad_norm": 0.8347866535186768, + "learning_rate": 0.0001587912963653107, + "loss": 2.554, + "step": 6069 + }, + { + "epoch": 0.489871681058833, + "grad_norm": 0.7717794179916382, + "learning_rate": 0.00015877852522924732, + "loss": 2.6904, + "step": 6070 + }, + { + "epoch": 0.48995238479541603, + "grad_norm": 0.6960952281951904, + "learning_rate": 0.00015876575262826944, + "loss": 2.6059, + "step": 6071 + }, + { + "epoch": 0.490033088531999, + "grad_norm": 0.7316592931747437, + "learning_rate": 0.00015875297856269543, + "loss": 2.6685, + "step": 6072 + }, + { + "epoch": 0.49011379226858204, + "grad_norm": 0.6775457859039307, + "learning_rate": 0.00015874020303284362, + "loss": 2.6232, + "step": 6073 + }, + { + "epoch": 0.490194496005165, + "grad_norm": 0.7741925120353699, + "learning_rate": 0.00015872742603903237, + "loss": 2.6767, + "step": 6074 + }, + { + "epoch": 0.49027519974174805, + "grad_norm": 0.857490599155426, + "learning_rate": 0.00015871464758158017, + "loss": 2.6649, + "step": 6075 + }, + { + "epoch": 0.490355903478331, + "grad_norm": 0.7474274039268494, + "learning_rate": 0.00015870186766080545, + "loss": 2.6926, + "step": 6076 + }, + { + "epoch": 0.49043660721491406, + "grad_norm": 0.7266567945480347, + "learning_rate": 0.00015868908627702675, + "loss": 2.5919, + "step": 6077 + }, + { + "epoch": 0.49051731095149703, + "grad_norm": 0.7247830629348755, + "learning_rate": 0.0001586763034305626, + "loss": 2.6158, + "step": 6078 + }, + { + "epoch": 0.49059801468808006, + "grad_norm": 0.7654951214790344, + "learning_rate": 0.00015866351912173157, + "loss": 2.7236, + "step": 6079 + }, + { + "epoch": 0.49067871842466304, + "grad_norm": 0.732431948184967, + "learning_rate": 0.00015865073335085236, + "loss": 2.6349, + "step": 6080 + }, + { + "epoch": 0.4907594221612461, + "grad_norm": 0.7240673303604126, + "learning_rate": 0.0001586379461182435, + "loss": 2.6282, + "step": 6081 + }, + { + "epoch": 0.49084012589782905, + "grad_norm": 0.767473042011261, + "learning_rate": 0.00015862515742422374, + "loss": 2.6939, + "step": 6082 + }, + { + "epoch": 0.4909208296344121, + "grad_norm": 0.6977359056472778, + "learning_rate": 0.00015861236726911183, + "loss": 2.6591, + "step": 6083 + }, + { + "epoch": 0.49100153337099506, + "grad_norm": 0.7676639556884766, + "learning_rate": 0.00015859957565322655, + "loss": 2.6189, + "step": 6084 + }, + { + "epoch": 0.4910822371075781, + "grad_norm": 0.7157976031303406, + "learning_rate": 0.0001585867825768866, + "loss": 2.644, + "step": 6085 + }, + { + "epoch": 0.49116294084416107, + "grad_norm": 0.7080803513526917, + "learning_rate": 0.0001585739880404109, + "loss": 2.6099, + "step": 6086 + }, + { + "epoch": 0.4912436445807441, + "grad_norm": 0.7109760046005249, + "learning_rate": 0.0001585611920441183, + "loss": 2.7087, + "step": 6087 + }, + { + "epoch": 0.4913243483173271, + "grad_norm": 0.7274255156517029, + "learning_rate": 0.00015854839458832772, + "loss": 2.6394, + "step": 6088 + }, + { + "epoch": 0.4914050520539101, + "grad_norm": 0.7407883405685425, + "learning_rate": 0.00015853559567335812, + "loss": 2.6729, + "step": 6089 + }, + { + "epoch": 0.4914857557904931, + "grad_norm": 0.6879885196685791, + "learning_rate": 0.00015852279529952843, + "loss": 2.5971, + "step": 6090 + }, + { + "epoch": 0.4915664595270761, + "grad_norm": 0.7678415179252625, + "learning_rate": 0.00015850999346715772, + "loss": 2.6606, + "step": 6091 + }, + { + "epoch": 0.4916471632636591, + "grad_norm": 0.7108608484268188, + "learning_rate": 0.00015849719017656504, + "loss": 2.6494, + "step": 6092 + }, + { + "epoch": 0.4917278670002421, + "grad_norm": 0.7238833904266357, + "learning_rate": 0.00015848438542806945, + "loss": 2.6742, + "step": 6093 + }, + { + "epoch": 0.4918085707368251, + "grad_norm": 0.7316902279853821, + "learning_rate": 0.0001584715792219901, + "loss": 2.6757, + "step": 6094 + }, + { + "epoch": 0.49188927447340813, + "grad_norm": 0.7339446544647217, + "learning_rate": 0.00015845877155864612, + "loss": 2.607, + "step": 6095 + }, + { + "epoch": 0.4919699782099911, + "grad_norm": 0.6931337714195251, + "learning_rate": 0.0001584459624383568, + "loss": 2.6203, + "step": 6096 + }, + { + "epoch": 0.49205068194657414, + "grad_norm": 0.734229326248169, + "learning_rate": 0.00015843315186144126, + "loss": 2.646, + "step": 6097 + }, + { + "epoch": 0.4921313856831571, + "grad_norm": 0.7764919400215149, + "learning_rate": 0.00015842033982821883, + "loss": 2.6698, + "step": 6098 + }, + { + "epoch": 0.49221208941974015, + "grad_norm": 0.7707986235618591, + "learning_rate": 0.00015840752633900887, + "loss": 2.6995, + "step": 6099 + }, + { + "epoch": 0.4922927931563231, + "grad_norm": 0.7321949601173401, + "learning_rate": 0.00015839471139413066, + "loss": 2.6517, + "step": 6100 + }, + { + "epoch": 0.49237349689290616, + "grad_norm": 0.7087488770484924, + "learning_rate": 0.00015838189499390353, + "loss": 2.6153, + "step": 6101 + }, + { + "epoch": 0.49245420062948914, + "grad_norm": 0.7300730347633362, + "learning_rate": 0.00015836907713864706, + "loss": 2.5868, + "step": 6102 + }, + { + "epoch": 0.49253490436607217, + "grad_norm": 0.8476536273956299, + "learning_rate": 0.00015835625782868054, + "loss": 2.7158, + "step": 6103 + }, + { + "epoch": 0.49261560810265514, + "grad_norm": 0.8062012791633606, + "learning_rate": 0.0001583434370643236, + "loss": 2.6896, + "step": 6104 + }, + { + "epoch": 0.4926963118392382, + "grad_norm": 0.7336686849594116, + "learning_rate": 0.00015833061484589562, + "loss": 2.6416, + "step": 6105 + }, + { + "epoch": 0.49277701557582115, + "grad_norm": 0.6976929306983948, + "learning_rate": 0.00015831779117371627, + "loss": 2.6279, + "step": 6106 + }, + { + "epoch": 0.4928577193124042, + "grad_norm": 0.7262609601020813, + "learning_rate": 0.00015830496604810513, + "loss": 2.6144, + "step": 6107 + }, + { + "epoch": 0.49293842304898716, + "grad_norm": 0.7274572253227234, + "learning_rate": 0.00015829213946938183, + "loss": 2.7409, + "step": 6108 + }, + { + "epoch": 0.4930191267855702, + "grad_norm": 0.7438454031944275, + "learning_rate": 0.000158279311437866, + "loss": 2.5928, + "step": 6109 + }, + { + "epoch": 0.49309983052215317, + "grad_norm": 0.6885421872138977, + "learning_rate": 0.00015826648195387742, + "loss": 2.6659, + "step": 6110 + }, + { + "epoch": 0.4931805342587362, + "grad_norm": 0.6781450510025024, + "learning_rate": 0.0001582536510177358, + "loss": 2.6068, + "step": 6111 + }, + { + "epoch": 0.4932612379953192, + "grad_norm": 0.7618128657341003, + "learning_rate": 0.0001582408186297609, + "loss": 2.6705, + "step": 6112 + }, + { + "epoch": 0.4933419417319022, + "grad_norm": 0.7011203765869141, + "learning_rate": 0.00015822798479027256, + "loss": 2.596, + "step": 6113 + }, + { + "epoch": 0.4934226454684852, + "grad_norm": 0.7727806568145752, + "learning_rate": 0.00015821514949959065, + "loss": 2.6458, + "step": 6114 + }, + { + "epoch": 0.4935033492050682, + "grad_norm": 0.7318129539489746, + "learning_rate": 0.00015820231275803502, + "loss": 2.6009, + "step": 6115 + }, + { + "epoch": 0.4935840529416512, + "grad_norm": 0.6836227178573608, + "learning_rate": 0.00015818947456592563, + "loss": 2.6311, + "step": 6116 + }, + { + "epoch": 0.49366475667823423, + "grad_norm": 0.7657275199890137, + "learning_rate": 0.0001581766349235824, + "loss": 2.6079, + "step": 6117 + }, + { + "epoch": 0.4937454604148172, + "grad_norm": 0.74736487865448, + "learning_rate": 0.0001581637938313254, + "loss": 2.6752, + "step": 6118 + }, + { + "epoch": 0.49382616415140024, + "grad_norm": 0.716708242893219, + "learning_rate": 0.00015815095128947454, + "loss": 2.5896, + "step": 6119 + }, + { + "epoch": 0.4939068678879832, + "grad_norm": 0.740727424621582, + "learning_rate": 0.00015813810729835002, + "loss": 2.6528, + "step": 6120 + }, + { + "epoch": 0.4939875716245662, + "grad_norm": 0.6746687293052673, + "learning_rate": 0.0001581252618582719, + "loss": 2.6438, + "step": 6121 + }, + { + "epoch": 0.4940682753611492, + "grad_norm": 0.7547900080680847, + "learning_rate": 0.00015811241496956028, + "loss": 2.631, + "step": 6122 + }, + { + "epoch": 0.4941489790977322, + "grad_norm": 0.7500903606414795, + "learning_rate": 0.0001580995666325354, + "loss": 2.7039, + "step": 6123 + }, + { + "epoch": 0.49422968283431523, + "grad_norm": 0.7692849636077881, + "learning_rate": 0.00015808671684751743, + "loss": 2.5922, + "step": 6124 + }, + { + "epoch": 0.4943103865708982, + "grad_norm": 0.6964236497879028, + "learning_rate": 0.00015807386561482662, + "loss": 2.6239, + "step": 6125 + }, + { + "epoch": 0.49439109030748124, + "grad_norm": 0.7094165086746216, + "learning_rate": 0.0001580610129347833, + "loss": 2.6239, + "step": 6126 + }, + { + "epoch": 0.4944717940440642, + "grad_norm": 0.7579131126403809, + "learning_rate": 0.00015804815880770775, + "loss": 2.6654, + "step": 6127 + }, + { + "epoch": 0.49455249778064725, + "grad_norm": 0.7687693238258362, + "learning_rate": 0.00015803530323392034, + "loss": 2.6557, + "step": 6128 + }, + { + "epoch": 0.4946332015172302, + "grad_norm": 0.6913540363311768, + "learning_rate": 0.0001580224462137415, + "loss": 2.6299, + "step": 6129 + }, + { + "epoch": 0.49471390525381326, + "grad_norm": 0.7574129700660706, + "learning_rate": 0.0001580095877474916, + "loss": 2.6327, + "step": 6130 + }, + { + "epoch": 0.49479460899039623, + "grad_norm": 0.6834598183631897, + "learning_rate": 0.0001579967278354911, + "loss": 2.6402, + "step": 6131 + }, + { + "epoch": 0.49487531272697927, + "grad_norm": 0.7872750163078308, + "learning_rate": 0.00015798386647806057, + "loss": 2.6647, + "step": 6132 + }, + { + "epoch": 0.49495601646356224, + "grad_norm": 0.705211341381073, + "learning_rate": 0.00015797100367552055, + "loss": 2.6288, + "step": 6133 + }, + { + "epoch": 0.4950367202001453, + "grad_norm": 0.7302640080451965, + "learning_rate": 0.00015795813942819155, + "loss": 2.6683, + "step": 6134 + }, + { + "epoch": 0.49511742393672825, + "grad_norm": 0.7522360682487488, + "learning_rate": 0.0001579452737363942, + "loss": 2.5885, + "step": 6135 + }, + { + "epoch": 0.4951981276733113, + "grad_norm": 0.657376229763031, + "learning_rate": 0.0001579324066004492, + "loss": 2.5775, + "step": 6136 + }, + { + "epoch": 0.49527883140989426, + "grad_norm": 0.7539556622505188, + "learning_rate": 0.00015791953802067715, + "loss": 2.6236, + "step": 6137 + }, + { + "epoch": 0.4953595351464773, + "grad_norm": 0.7090374827384949, + "learning_rate": 0.00015790666799739883, + "loss": 2.5845, + "step": 6138 + }, + { + "epoch": 0.49544023888306027, + "grad_norm": 0.6883948445320129, + "learning_rate": 0.00015789379653093497, + "loss": 2.6621, + "step": 6139 + }, + { + "epoch": 0.4955209426196433, + "grad_norm": 0.7466424107551575, + "learning_rate": 0.00015788092362160633, + "loss": 2.6289, + "step": 6140 + }, + { + "epoch": 0.4956016463562263, + "grad_norm": 0.7424437403678894, + "learning_rate": 0.00015786804926973383, + "loss": 2.6405, + "step": 6141 + }, + { + "epoch": 0.4956823500928093, + "grad_norm": 0.7227851748466492, + "learning_rate": 0.00015785517347563822, + "loss": 2.6537, + "step": 6142 + }, + { + "epoch": 0.4957630538293923, + "grad_norm": 0.7548653483390808, + "learning_rate": 0.00015784229623964048, + "loss": 2.7377, + "step": 6143 + }, + { + "epoch": 0.4958437575659753, + "grad_norm": 0.7086976170539856, + "learning_rate": 0.00015782941756206152, + "loss": 2.6194, + "step": 6144 + }, + { + "epoch": 0.4959244613025583, + "grad_norm": 0.6605533957481384, + "learning_rate": 0.0001578165374432223, + "loss": 2.6265, + "step": 6145 + }, + { + "epoch": 0.4960051650391413, + "grad_norm": 0.7187899947166443, + "learning_rate": 0.00015780365588344384, + "loss": 2.5639, + "step": 6146 + }, + { + "epoch": 0.4960858687757243, + "grad_norm": 0.7014074921607971, + "learning_rate": 0.00015779077288304716, + "loss": 2.6011, + "step": 6147 + }, + { + "epoch": 0.49616657251230734, + "grad_norm": 0.7463840842247009, + "learning_rate": 0.00015777788844235335, + "loss": 2.6059, + "step": 6148 + }, + { + "epoch": 0.4962472762488903, + "grad_norm": 0.8022417426109314, + "learning_rate": 0.00015776500256168356, + "loss": 2.6011, + "step": 6149 + }, + { + "epoch": 0.49632797998547334, + "grad_norm": 0.7140083909034729, + "learning_rate": 0.0001577521152413589, + "loss": 2.6891, + "step": 6150 + }, + { + "epoch": 0.4964086837220563, + "grad_norm": 0.7266198992729187, + "learning_rate": 0.00015773922648170053, + "loss": 2.6561, + "step": 6151 + }, + { + "epoch": 0.49648938745863935, + "grad_norm": 0.7241406440734863, + "learning_rate": 0.0001577263362830297, + "loss": 2.6835, + "step": 6152 + }, + { + "epoch": 0.49657009119522233, + "grad_norm": 0.7422344088554382, + "learning_rate": 0.0001577134446456677, + "loss": 2.6039, + "step": 6153 + }, + { + "epoch": 0.49665079493180536, + "grad_norm": 0.8764764666557312, + "learning_rate": 0.0001577005515699358, + "loss": 2.68, + "step": 6154 + }, + { + "epoch": 0.49673149866838834, + "grad_norm": 0.7224323749542236, + "learning_rate": 0.0001576876570561553, + "loss": 2.5824, + "step": 6155 + }, + { + "epoch": 0.49681220240497137, + "grad_norm": 0.7601075172424316, + "learning_rate": 0.00015767476110464758, + "loss": 2.7124, + "step": 6156 + }, + { + "epoch": 0.49689290614155435, + "grad_norm": 0.7425428628921509, + "learning_rate": 0.0001576618637157341, + "loss": 2.5913, + "step": 6157 + }, + { + "epoch": 0.4969736098781374, + "grad_norm": 0.721969723701477, + "learning_rate": 0.0001576489648897362, + "loss": 2.6482, + "step": 6158 + }, + { + "epoch": 0.49705431361472036, + "grad_norm": 0.8142126798629761, + "learning_rate": 0.00015763606462697544, + "loss": 2.6231, + "step": 6159 + }, + { + "epoch": 0.4971350173513034, + "grad_norm": 0.6636359691619873, + "learning_rate": 0.00015762316292777326, + "loss": 2.6388, + "step": 6160 + }, + { + "epoch": 0.49721572108788636, + "grad_norm": 0.7093132734298706, + "learning_rate": 0.00015761025979245123, + "loss": 2.6562, + "step": 6161 + }, + { + "epoch": 0.4972964248244694, + "grad_norm": 0.7130851745605469, + "learning_rate": 0.00015759735522133094, + "loss": 2.6856, + "step": 6162 + }, + { + "epoch": 0.4973771285610524, + "grad_norm": 0.7303292155265808, + "learning_rate": 0.000157584449214734, + "loss": 2.6077, + "step": 6163 + }, + { + "epoch": 0.4974578322976354, + "grad_norm": 0.6742258071899414, + "learning_rate": 0.00015757154177298204, + "loss": 2.6644, + "step": 6164 + }, + { + "epoch": 0.4975385360342184, + "grad_norm": 0.6882894039154053, + "learning_rate": 0.00015755863289639677, + "loss": 2.6462, + "step": 6165 + }, + { + "epoch": 0.4976192397708014, + "grad_norm": 0.7882276773452759, + "learning_rate": 0.00015754572258529993, + "loss": 2.6509, + "step": 6166 + }, + { + "epoch": 0.4976999435073844, + "grad_norm": 0.7163859009742737, + "learning_rate": 0.00015753281084001324, + "loss": 2.627, + "step": 6167 + }, + { + "epoch": 0.4977806472439674, + "grad_norm": 0.7194411158561707, + "learning_rate": 0.0001575198976608585, + "loss": 2.6798, + "step": 6168 + }, + { + "epoch": 0.4978613509805504, + "grad_norm": 0.7233198881149292, + "learning_rate": 0.0001575069830481576, + "loss": 2.6616, + "step": 6169 + }, + { + "epoch": 0.49794205471713343, + "grad_norm": 0.7246997952461243, + "learning_rate": 0.00015749406700223231, + "loss": 2.6262, + "step": 6170 + }, + { + "epoch": 0.4980227584537164, + "grad_norm": 0.7509368658065796, + "learning_rate": 0.00015748114952340457, + "loss": 2.6148, + "step": 6171 + }, + { + "epoch": 0.4981034621902994, + "grad_norm": 0.7079075574874878, + "learning_rate": 0.00015746823061199637, + "loss": 2.6712, + "step": 6172 + }, + { + "epoch": 0.4981841659268824, + "grad_norm": 0.6821560859680176, + "learning_rate": 0.0001574553102683296, + "loss": 2.6253, + "step": 6173 + }, + { + "epoch": 0.4982648696634654, + "grad_norm": 0.7623000741004944, + "learning_rate": 0.00015744238849272634, + "loss": 2.6252, + "step": 6174 + }, + { + "epoch": 0.4983455734000484, + "grad_norm": 0.709434449672699, + "learning_rate": 0.00015742946528550858, + "loss": 2.555, + "step": 6175 + }, + { + "epoch": 0.4984262771366314, + "grad_norm": 0.7277799844741821, + "learning_rate": 0.00015741654064699846, + "loss": 2.6551, + "step": 6176 + }, + { + "epoch": 0.49850698087321443, + "grad_norm": 0.7208690643310547, + "learning_rate": 0.00015740361457751802, + "loss": 2.6747, + "step": 6177 + }, + { + "epoch": 0.4985876846097974, + "grad_norm": 0.8458136916160583, + "learning_rate": 0.00015739068707738946, + "loss": 2.6551, + "step": 6178 + }, + { + "epoch": 0.49866838834638044, + "grad_norm": 0.7718539834022522, + "learning_rate": 0.00015737775814693498, + "loss": 2.6246, + "step": 6179 + }, + { + "epoch": 0.4987490920829634, + "grad_norm": 0.6982735395431519, + "learning_rate": 0.00015736482778647674, + "loss": 2.5726, + "step": 6180 + }, + { + "epoch": 0.49882979581954645, + "grad_norm": 0.6759411692619324, + "learning_rate": 0.00015735189599633707, + "loss": 2.6603, + "step": 6181 + }, + { + "epoch": 0.4989104995561294, + "grad_norm": 0.7016656994819641, + "learning_rate": 0.0001573389627768382, + "loss": 2.6045, + "step": 6182 + }, + { + "epoch": 0.49899120329271246, + "grad_norm": 0.7170618176460266, + "learning_rate": 0.00015732602812830253, + "loss": 2.6419, + "step": 6183 + }, + { + "epoch": 0.49907190702929544, + "grad_norm": 0.6963300704956055, + "learning_rate": 0.00015731309205105237, + "loss": 2.6377, + "step": 6184 + }, + { + "epoch": 0.49915261076587847, + "grad_norm": 0.7437995672225952, + "learning_rate": 0.00015730015454541014, + "loss": 2.7013, + "step": 6185 + }, + { + "epoch": 0.49923331450246144, + "grad_norm": 0.6846518516540527, + "learning_rate": 0.00015728721561169827, + "loss": 2.5526, + "step": 6186 + }, + { + "epoch": 0.4993140182390445, + "grad_norm": 0.7343618273735046, + "learning_rate": 0.00015727427525023924, + "loss": 2.6567, + "step": 6187 + }, + { + "epoch": 0.49939472197562745, + "grad_norm": 0.6947566270828247, + "learning_rate": 0.00015726133346135554, + "loss": 2.6642, + "step": 6188 + }, + { + "epoch": 0.4994754257122105, + "grad_norm": 0.7402610778808594, + "learning_rate": 0.00015724839024536976, + "loss": 2.6964, + "step": 6189 + }, + { + "epoch": 0.49955612944879346, + "grad_norm": 0.7318306565284729, + "learning_rate": 0.00015723544560260444, + "loss": 2.5864, + "step": 6190 + }, + { + "epoch": 0.4996368331853765, + "grad_norm": 0.752216100692749, + "learning_rate": 0.00015722249953338215, + "loss": 2.6357, + "step": 6191 + }, + { + "epoch": 0.49971753692195947, + "grad_norm": 0.70283442735672, + "learning_rate": 0.00015720955203802565, + "loss": 2.5892, + "step": 6192 + }, + { + "epoch": 0.4997982406585425, + "grad_norm": 0.7457823753356934, + "learning_rate": 0.00015719660311685755, + "loss": 2.6663, + "step": 6193 + }, + { + "epoch": 0.4998789443951255, + "grad_norm": 0.7296229600906372, + "learning_rate": 0.00015718365277020058, + "loss": 2.6238, + "step": 6194 + }, + { + "epoch": 0.4999596481317085, + "grad_norm": 0.6963346004486084, + "learning_rate": 0.0001571707009983775, + "loss": 2.6303, + "step": 6195 + }, + { + "epoch": 0.5000403518682915, + "grad_norm": 0.7074694633483887, + "learning_rate": 0.0001571577478017111, + "loss": 2.6077, + "step": 6196 + }, + { + "epoch": 0.5001210556048745, + "grad_norm": 0.7826260328292847, + "learning_rate": 0.00015714479318052423, + "loss": 2.6668, + "step": 6197 + }, + { + "epoch": 0.5002017593414575, + "grad_norm": 0.6908758282661438, + "learning_rate": 0.00015713183713513974, + "loss": 2.6195, + "step": 6198 + }, + { + "epoch": 0.5002824630780405, + "grad_norm": 0.7571602463722229, + "learning_rate": 0.0001571188796658805, + "loss": 2.6546, + "step": 6199 + }, + { + "epoch": 0.5003631668146236, + "grad_norm": 0.7359431385993958, + "learning_rate": 0.0001571059207730695, + "loss": 2.5792, + "step": 6200 + }, + { + "epoch": 0.5004438705512065, + "grad_norm": 0.6886340379714966, + "learning_rate": 0.00015709296045702967, + "loss": 2.6099, + "step": 6201 + }, + { + "epoch": 0.5005245742877895, + "grad_norm": 0.6900473833084106, + "learning_rate": 0.000157079998718084, + "loss": 2.6461, + "step": 6202 + }, + { + "epoch": 0.5006052780243725, + "grad_norm": 0.66212397813797, + "learning_rate": 0.00015706703555655555, + "loss": 2.6178, + "step": 6203 + }, + { + "epoch": 0.5006859817609556, + "grad_norm": 0.7666565179824829, + "learning_rate": 0.00015705407097276744, + "loss": 2.7097, + "step": 6204 + }, + { + "epoch": 0.5007666854975386, + "grad_norm": 0.7294591069221497, + "learning_rate": 0.0001570411049670427, + "loss": 2.5995, + "step": 6205 + }, + { + "epoch": 0.5008473892341215, + "grad_norm": 0.7279765009880066, + "learning_rate": 0.00015702813753970453, + "loss": 2.5554, + "step": 6206 + }, + { + "epoch": 0.5009280929707045, + "grad_norm": 0.7174742817878723, + "learning_rate": 0.0001570151686910761, + "loss": 2.6523, + "step": 6207 + }, + { + "epoch": 0.5010087967072876, + "grad_norm": 0.67017662525177, + "learning_rate": 0.00015700219842148063, + "loss": 2.5613, + "step": 6208 + }, + { + "epoch": 0.5010895004438706, + "grad_norm": 0.7000258564949036, + "learning_rate": 0.00015698922673124138, + "loss": 2.5658, + "step": 6209 + }, + { + "epoch": 0.5011702041804535, + "grad_norm": 0.6894544363021851, + "learning_rate": 0.00015697625362068164, + "loss": 2.6925, + "step": 6210 + }, + { + "epoch": 0.5012509079170365, + "grad_norm": 0.6742957234382629, + "learning_rate": 0.00015696327909012466, + "loss": 2.6429, + "step": 6211 + }, + { + "epoch": 0.5013316116536196, + "grad_norm": 0.7039656639099121, + "learning_rate": 0.0001569503031398939, + "loss": 2.6313, + "step": 6212 + }, + { + "epoch": 0.5014123153902026, + "grad_norm": 0.720003604888916, + "learning_rate": 0.00015693732577031272, + "loss": 2.6207, + "step": 6213 + }, + { + "epoch": 0.5014930191267856, + "grad_norm": 0.8611499071121216, + "learning_rate": 0.00015692434698170456, + "loss": 2.6855, + "step": 6214 + }, + { + "epoch": 0.5015737228633685, + "grad_norm": 0.6664702296257019, + "learning_rate": 0.00015691136677439284, + "loss": 2.6174, + "step": 6215 + }, + { + "epoch": 0.5016544265999516, + "grad_norm": 0.7258509993553162, + "learning_rate": 0.00015689838514870111, + "loss": 2.6558, + "step": 6216 + }, + { + "epoch": 0.5017351303365346, + "grad_norm": 0.6972211599349976, + "learning_rate": 0.0001568854021049529, + "loss": 2.5913, + "step": 6217 + }, + { + "epoch": 0.5018158340731176, + "grad_norm": 0.7927280068397522, + "learning_rate": 0.00015687241764347177, + "loss": 2.6466, + "step": 6218 + }, + { + "epoch": 0.5018965378097006, + "grad_norm": 0.7044646143913269, + "learning_rate": 0.00015685943176458128, + "loss": 2.6195, + "step": 6219 + }, + { + "epoch": 0.5019772415462836, + "grad_norm": 0.6935598254203796, + "learning_rate": 0.00015684644446860516, + "loss": 2.6486, + "step": 6220 + }, + { + "epoch": 0.5020579452828666, + "grad_norm": 0.7965792417526245, + "learning_rate": 0.00015683345575586704, + "loss": 2.6265, + "step": 6221 + }, + { + "epoch": 0.5021386490194496, + "grad_norm": 0.727053701877594, + "learning_rate": 0.00015682046562669064, + "loss": 2.6714, + "step": 6222 + }, + { + "epoch": 0.5022193527560326, + "grad_norm": 0.7919184565544128, + "learning_rate": 0.0001568074740813997, + "loss": 2.7115, + "step": 6223 + }, + { + "epoch": 0.5023000564926156, + "grad_norm": 0.7724714279174805, + "learning_rate": 0.00015679448112031801, + "loss": 2.6636, + "step": 6224 + }, + { + "epoch": 0.5023807602291986, + "grad_norm": 0.6893701553344727, + "learning_rate": 0.0001567814867437694, + "loss": 2.6562, + "step": 6225 + }, + { + "epoch": 0.5024614639657816, + "grad_norm": 0.7089633345603943, + "learning_rate": 0.00015676849095207769, + "loss": 2.6125, + "step": 6226 + }, + { + "epoch": 0.5025421677023646, + "grad_norm": 0.7620012760162354, + "learning_rate": 0.00015675549374556682, + "loss": 2.6935, + "step": 6227 + }, + { + "epoch": 0.5026228714389476, + "grad_norm": 0.7293741703033447, + "learning_rate": 0.00015674249512456065, + "loss": 2.66, + "step": 6228 + }, + { + "epoch": 0.5027035751755307, + "grad_norm": 0.7366519570350647, + "learning_rate": 0.00015672949508938318, + "loss": 2.5968, + "step": 6229 + }, + { + "epoch": 0.5027842789121136, + "grad_norm": 0.6646310091018677, + "learning_rate": 0.00015671649364035846, + "loss": 2.5751, + "step": 6230 + }, + { + "epoch": 0.5028649826486966, + "grad_norm": 0.6682632565498352, + "learning_rate": 0.00015670349077781038, + "loss": 2.5902, + "step": 6231 + }, + { + "epoch": 0.5029456863852796, + "grad_norm": 0.7327528595924377, + "learning_rate": 0.00015669048650206313, + "loss": 2.6487, + "step": 6232 + }, + { + "epoch": 0.5030263901218627, + "grad_norm": 0.7114281058311462, + "learning_rate": 0.00015667748081344074, + "loss": 2.5779, + "step": 6233 + }, + { + "epoch": 0.5031070938584457, + "grad_norm": 0.7908105850219727, + "learning_rate": 0.00015666447371226737, + "loss": 2.6099, + "step": 6234 + }, + { + "epoch": 0.5031877975950286, + "grad_norm": 0.7823575139045715, + "learning_rate": 0.00015665146519886725, + "loss": 2.6339, + "step": 6235 + }, + { + "epoch": 0.5032685013316116, + "grad_norm": 0.7404836416244507, + "learning_rate": 0.00015663845527356447, + "loss": 2.6035, + "step": 6236 + }, + { + "epoch": 0.5033492050681947, + "grad_norm": 0.7448995113372803, + "learning_rate": 0.00015662544393668334, + "loss": 2.6566, + "step": 6237 + }, + { + "epoch": 0.5034299088047777, + "grad_norm": 0.7209747433662415, + "learning_rate": 0.00015661243118854815, + "loss": 2.682, + "step": 6238 + }, + { + "epoch": 0.5035106125413606, + "grad_norm": 0.691759467124939, + "learning_rate": 0.00015659941702948315, + "loss": 2.6435, + "step": 6239 + }, + { + "epoch": 0.5035913162779436, + "grad_norm": 0.7646063566207886, + "learning_rate": 0.00015658640145981275, + "loss": 2.591, + "step": 6240 + }, + { + "epoch": 0.5036720200145267, + "grad_norm": 0.8319387435913086, + "learning_rate": 0.00015657338447986133, + "loss": 2.5937, + "step": 6241 + }, + { + "epoch": 0.5037527237511097, + "grad_norm": 0.729193389415741, + "learning_rate": 0.00015656036608995323, + "loss": 2.651, + "step": 6242 + }, + { + "epoch": 0.5038334274876927, + "grad_norm": 0.720098614692688, + "learning_rate": 0.000156547346290413, + "loss": 2.681, + "step": 6243 + }, + { + "epoch": 0.5039141312242756, + "grad_norm": 0.7172541618347168, + "learning_rate": 0.00015653432508156508, + "loss": 2.5906, + "step": 6244 + }, + { + "epoch": 0.5039948349608587, + "grad_norm": 0.7352481484413147, + "learning_rate": 0.00015652130246373398, + "loss": 2.6376, + "step": 6245 + }, + { + "epoch": 0.5040755386974417, + "grad_norm": 0.6664925813674927, + "learning_rate": 0.0001565082784372443, + "loss": 2.706, + "step": 6246 + }, + { + "epoch": 0.5041562424340247, + "grad_norm": 0.7292987704277039, + "learning_rate": 0.0001564952530024206, + "loss": 2.6149, + "step": 6247 + }, + { + "epoch": 0.5042369461706077, + "grad_norm": 0.6904531121253967, + "learning_rate": 0.00015648222615958747, + "loss": 2.579, + "step": 6248 + }, + { + "epoch": 0.5043176499071907, + "grad_norm": 0.7385311722755432, + "learning_rate": 0.00015646919790906965, + "loss": 2.6137, + "step": 6249 + }, + { + "epoch": 0.5043983536437737, + "grad_norm": 0.7869507074356079, + "learning_rate": 0.0001564561682511918, + "loss": 2.6831, + "step": 6250 + }, + { + "epoch": 0.5044790573803567, + "grad_norm": 0.723680317401886, + "learning_rate": 0.00015644313718627867, + "loss": 2.6083, + "step": 6251 + }, + { + "epoch": 0.5045597611169397, + "grad_norm": 0.7029969692230225, + "learning_rate": 0.00015643010471465502, + "loss": 2.6462, + "step": 6252 + }, + { + "epoch": 0.5046404648535228, + "grad_norm": 0.818975031375885, + "learning_rate": 0.00015641707083664566, + "loss": 2.6393, + "step": 6253 + }, + { + "epoch": 0.5047211685901057, + "grad_norm": 0.7237667441368103, + "learning_rate": 0.0001564040355525754, + "loss": 2.5995, + "step": 6254 + }, + { + "epoch": 0.5048018723266887, + "grad_norm": 0.8613824248313904, + "learning_rate": 0.00015639099886276912, + "loss": 2.748, + "step": 6255 + }, + { + "epoch": 0.5048825760632717, + "grad_norm": 0.6802194118499756, + "learning_rate": 0.00015637796076755178, + "loss": 2.6393, + "step": 6256 + }, + { + "epoch": 0.5049632797998548, + "grad_norm": 0.7816255688667297, + "learning_rate": 0.00015636492126724823, + "loss": 2.6218, + "step": 6257 + }, + { + "epoch": 0.5050439835364378, + "grad_norm": 0.7443990707397461, + "learning_rate": 0.00015635188036218356, + "loss": 2.6181, + "step": 6258 + }, + { + "epoch": 0.5051246872730207, + "grad_norm": 0.7869458794593811, + "learning_rate": 0.0001563388380526827, + "loss": 2.6641, + "step": 6259 + }, + { + "epoch": 0.5052053910096037, + "grad_norm": 0.7423158288002014, + "learning_rate": 0.00015632579433907072, + "loss": 2.5849, + "step": 6260 + }, + { + "epoch": 0.5052860947461868, + "grad_norm": 0.7888280153274536, + "learning_rate": 0.00015631274922167272, + "loss": 2.7095, + "step": 6261 + }, + { + "epoch": 0.5053667984827698, + "grad_norm": 0.7053405046463013, + "learning_rate": 0.0001562997027008138, + "loss": 2.5747, + "step": 6262 + }, + { + "epoch": 0.5054475022193528, + "grad_norm": 0.7930825352668762, + "learning_rate": 0.0001562866547768191, + "loss": 2.6359, + "step": 6263 + }, + { + "epoch": 0.5055282059559357, + "grad_norm": 0.7431469559669495, + "learning_rate": 0.0001562736054500139, + "loss": 2.6167, + "step": 6264 + }, + { + "epoch": 0.5056089096925188, + "grad_norm": 0.8395694494247437, + "learning_rate": 0.00015626055472072324, + "loss": 2.7217, + "step": 6265 + }, + { + "epoch": 0.5056896134291018, + "grad_norm": 0.7318898439407349, + "learning_rate": 0.0001562475025892726, + "loss": 2.6866, + "step": 6266 + }, + { + "epoch": 0.5057703171656848, + "grad_norm": 0.7487025856971741, + "learning_rate": 0.0001562344490559871, + "loss": 2.7206, + "step": 6267 + }, + { + "epoch": 0.5058510209022677, + "grad_norm": 0.8187269568443298, + "learning_rate": 0.00015622139412119212, + "loss": 2.658, + "step": 6268 + }, + { + "epoch": 0.5059317246388508, + "grad_norm": 0.6714495420455933, + "learning_rate": 0.00015620833778521307, + "loss": 2.6182, + "step": 6269 + }, + { + "epoch": 0.5060124283754338, + "grad_norm": 0.7556246519088745, + "learning_rate": 0.00015619528004837528, + "loss": 2.6502, + "step": 6270 + }, + { + "epoch": 0.5060931321120168, + "grad_norm": 0.6989960074424744, + "learning_rate": 0.00015618222091100424, + "loss": 2.6031, + "step": 6271 + }, + { + "epoch": 0.5061738358485998, + "grad_norm": 0.7002139091491699, + "learning_rate": 0.0001561691603734254, + "loss": 2.6563, + "step": 6272 + }, + { + "epoch": 0.5062545395851827, + "grad_norm": 0.7064816355705261, + "learning_rate": 0.00015615609843596423, + "loss": 2.6482, + "step": 6273 + }, + { + "epoch": 0.5063352433217658, + "grad_norm": 0.6971433162689209, + "learning_rate": 0.00015614303509894634, + "loss": 2.6522, + "step": 6274 + }, + { + "epoch": 0.5064159470583488, + "grad_norm": 0.6982942223548889, + "learning_rate": 0.0001561299703626972, + "loss": 2.6477, + "step": 6275 + }, + { + "epoch": 0.5064966507949318, + "grad_norm": 0.7219811081886292, + "learning_rate": 0.0001561169042275425, + "loss": 2.6514, + "step": 6276 + }, + { + "epoch": 0.5065773545315148, + "grad_norm": 0.7391932010650635, + "learning_rate": 0.00015610383669380787, + "loss": 2.698, + "step": 6277 + }, + { + "epoch": 0.5066580582680978, + "grad_norm": 0.7852853536605835, + "learning_rate": 0.00015609076776181894, + "loss": 2.6281, + "step": 6278 + }, + { + "epoch": 0.5067387620046808, + "grad_norm": 0.7435647249221802, + "learning_rate": 0.00015607769743190147, + "loss": 2.6403, + "step": 6279 + }, + { + "epoch": 0.5068194657412638, + "grad_norm": 0.7300949096679688, + "learning_rate": 0.00015606462570438119, + "loss": 2.6125, + "step": 6280 + }, + { + "epoch": 0.5069001694778468, + "grad_norm": 0.7081549167633057, + "learning_rate": 0.00015605155257958388, + "loss": 2.6192, + "step": 6281 + }, + { + "epoch": 0.5069808732144299, + "grad_norm": 0.709020733833313, + "learning_rate": 0.00015603847805783537, + "loss": 2.6745, + "step": 6282 + }, + { + "epoch": 0.5070615769510128, + "grad_norm": 0.691684901714325, + "learning_rate": 0.0001560254021394615, + "loss": 2.5638, + "step": 6283 + }, + { + "epoch": 0.5071422806875958, + "grad_norm": 0.8338537812232971, + "learning_rate": 0.00015601232482478813, + "loss": 2.5835, + "step": 6284 + }, + { + "epoch": 0.5072229844241788, + "grad_norm": 0.659436047077179, + "learning_rate": 0.00015599924611414126, + "loss": 2.601, + "step": 6285 + }, + { + "epoch": 0.5073036881607619, + "grad_norm": 0.72590172290802, + "learning_rate": 0.00015598616600784676, + "loss": 2.602, + "step": 6286 + }, + { + "epoch": 0.5073843918973449, + "grad_norm": 0.6704443693161011, + "learning_rate": 0.00015597308450623066, + "loss": 2.5703, + "step": 6287 + }, + { + "epoch": 0.5074650956339278, + "grad_norm": 0.7298632264137268, + "learning_rate": 0.00015596000160961898, + "loss": 2.6859, + "step": 6288 + }, + { + "epoch": 0.5075457993705108, + "grad_norm": 0.6900345087051392, + "learning_rate": 0.00015594691731833776, + "loss": 2.6264, + "step": 6289 + }, + { + "epoch": 0.5076265031070939, + "grad_norm": 0.6705992221832275, + "learning_rate": 0.0001559338316327131, + "loss": 2.6135, + "step": 6290 + }, + { + "epoch": 0.5077072068436769, + "grad_norm": 0.691545307636261, + "learning_rate": 0.0001559207445530712, + "loss": 2.6538, + "step": 6291 + }, + { + "epoch": 0.5077879105802598, + "grad_norm": 0.6579985618591309, + "learning_rate": 0.00015590765607973811, + "loss": 2.6224, + "step": 6292 + }, + { + "epoch": 0.5078686143168428, + "grad_norm": 0.6938790678977966, + "learning_rate": 0.00015589456621304014, + "loss": 2.5932, + "step": 6293 + }, + { + "epoch": 0.5079493180534259, + "grad_norm": 0.7421671748161316, + "learning_rate": 0.00015588147495330346, + "loss": 2.7098, + "step": 6294 + }, + { + "epoch": 0.5080300217900089, + "grad_norm": 0.7076674699783325, + "learning_rate": 0.0001558683823008543, + "loss": 2.664, + "step": 6295 + }, + { + "epoch": 0.5081107255265919, + "grad_norm": 0.6829726696014404, + "learning_rate": 0.00015585528825601906, + "loss": 2.6029, + "step": 6296 + }, + { + "epoch": 0.5081914292631748, + "grad_norm": 0.6968080401420593, + "learning_rate": 0.000155842192819124, + "loss": 2.6256, + "step": 6297 + }, + { + "epoch": 0.5082721329997579, + "grad_norm": 0.7453410625457764, + "learning_rate": 0.00015582909599049554, + "loss": 2.6577, + "step": 6298 + }, + { + "epoch": 0.5083528367363409, + "grad_norm": 0.6603519916534424, + "learning_rate": 0.00015581599777046007, + "loss": 2.6066, + "step": 6299 + }, + { + "epoch": 0.5084335404729239, + "grad_norm": 0.7096173763275146, + "learning_rate": 0.00015580289815934401, + "loss": 2.5488, + "step": 6300 + }, + { + "epoch": 0.5085142442095069, + "grad_norm": 0.799298107624054, + "learning_rate": 0.0001557897971574739, + "loss": 2.6021, + "step": 6301 + }, + { + "epoch": 0.50859494794609, + "grad_norm": 0.6820314526557922, + "learning_rate": 0.00015577669476517618, + "loss": 2.6276, + "step": 6302 + }, + { + "epoch": 0.5086756516826729, + "grad_norm": 0.7119347453117371, + "learning_rate": 0.00015576359098277742, + "loss": 2.6627, + "step": 6303 + }, + { + "epoch": 0.5087563554192559, + "grad_norm": 0.7638720273971558, + "learning_rate": 0.00015575048581060422, + "loss": 2.6824, + "step": 6304 + }, + { + "epoch": 0.5088370591558389, + "grad_norm": 0.7360339164733887, + "learning_rate": 0.00015573737924898316, + "loss": 2.5805, + "step": 6305 + }, + { + "epoch": 0.508917762892422, + "grad_norm": 0.7220984697341919, + "learning_rate": 0.00015572427129824091, + "loss": 2.6374, + "step": 6306 + }, + { + "epoch": 0.5089984666290049, + "grad_norm": 0.670964777469635, + "learning_rate": 0.00015571116195870418, + "loss": 2.6371, + "step": 6307 + }, + { + "epoch": 0.5090791703655879, + "grad_norm": 0.7826075553894043, + "learning_rate": 0.00015569805123069968, + "loss": 2.7666, + "step": 6308 + }, + { + "epoch": 0.5091598741021709, + "grad_norm": 0.7691593766212463, + "learning_rate": 0.00015568493911455412, + "loss": 2.6242, + "step": 6309 + }, + { + "epoch": 0.509240577838754, + "grad_norm": 0.714500367641449, + "learning_rate": 0.0001556718256105943, + "loss": 2.6551, + "step": 6310 + }, + { + "epoch": 0.509321281575337, + "grad_norm": 0.7634009718894958, + "learning_rate": 0.00015565871071914706, + "loss": 2.7069, + "step": 6311 + }, + { + "epoch": 0.5094019853119199, + "grad_norm": 0.7134168148040771, + "learning_rate": 0.00015564559444053926, + "loss": 2.5816, + "step": 6312 + }, + { + "epoch": 0.5094826890485029, + "grad_norm": 0.6548121571540833, + "learning_rate": 0.0001556324767750978, + "loss": 2.6192, + "step": 6313 + }, + { + "epoch": 0.509563392785086, + "grad_norm": 0.7244428992271423, + "learning_rate": 0.0001556193577231496, + "loss": 2.6072, + "step": 6314 + }, + { + "epoch": 0.509644096521669, + "grad_norm": 0.6976662278175354, + "learning_rate": 0.0001556062372850216, + "loss": 2.6148, + "step": 6315 + }, + { + "epoch": 0.509724800258252, + "grad_norm": 0.772726833820343, + "learning_rate": 0.00015559311546104083, + "loss": 2.6458, + "step": 6316 + }, + { + "epoch": 0.5098055039948349, + "grad_norm": 0.7976188659667969, + "learning_rate": 0.00015557999225153428, + "loss": 2.6772, + "step": 6317 + }, + { + "epoch": 0.509886207731418, + "grad_norm": 0.6458039283752441, + "learning_rate": 0.00015556686765682903, + "loss": 2.6143, + "step": 6318 + }, + { + "epoch": 0.509966911468001, + "grad_norm": 0.7295405268669128, + "learning_rate": 0.0001555537416772522, + "loss": 2.5919, + "step": 6319 + }, + { + "epoch": 0.510047615204584, + "grad_norm": 0.657978355884552, + "learning_rate": 0.00015554061431313093, + "loss": 2.6245, + "step": 6320 + }, + { + "epoch": 0.510128318941167, + "grad_norm": 0.6726922392845154, + "learning_rate": 0.00015552748556479232, + "loss": 2.6207, + "step": 6321 + }, + { + "epoch": 0.51020902267775, + "grad_norm": 0.7954673767089844, + "learning_rate": 0.00015551435543256363, + "loss": 2.7177, + "step": 6322 + }, + { + "epoch": 0.510289726414333, + "grad_norm": 0.7186735272407532, + "learning_rate": 0.00015550122391677211, + "loss": 2.5953, + "step": 6323 + }, + { + "epoch": 0.510370430150916, + "grad_norm": 0.7835420966148376, + "learning_rate": 0.00015548809101774498, + "loss": 2.7039, + "step": 6324 + }, + { + "epoch": 0.510451133887499, + "grad_norm": 0.6966592073440552, + "learning_rate": 0.00015547495673580962, + "loss": 2.6287, + "step": 6325 + }, + { + "epoch": 0.5105318376240819, + "grad_norm": 0.6676180362701416, + "learning_rate": 0.00015546182107129328, + "loss": 2.638, + "step": 6326 + }, + { + "epoch": 0.510612541360665, + "grad_norm": 0.7285657525062561, + "learning_rate": 0.0001554486840245234, + "loss": 2.6661, + "step": 6327 + }, + { + "epoch": 0.510693245097248, + "grad_norm": 0.6453657150268555, + "learning_rate": 0.00015543554559582735, + "loss": 2.715, + "step": 6328 + }, + { + "epoch": 0.510773948833831, + "grad_norm": 0.7364684343338013, + "learning_rate": 0.0001554224057855326, + "loss": 2.6475, + "step": 6329 + }, + { + "epoch": 0.510854652570414, + "grad_norm": 0.670894980430603, + "learning_rate": 0.00015540926459396665, + "loss": 2.6091, + "step": 6330 + }, + { + "epoch": 0.510935356306997, + "grad_norm": 0.6750168204307556, + "learning_rate": 0.00015539612202145696, + "loss": 2.6473, + "step": 6331 + }, + { + "epoch": 0.51101606004358, + "grad_norm": 0.6552454233169556, + "learning_rate": 0.0001553829780683311, + "loss": 2.6158, + "step": 6332 + }, + { + "epoch": 0.511096763780163, + "grad_norm": 0.7387828230857849, + "learning_rate": 0.00015536983273491668, + "loss": 2.6219, + "step": 6333 + }, + { + "epoch": 0.511177467516746, + "grad_norm": 0.6993975639343262, + "learning_rate": 0.00015535668602154127, + "loss": 2.6446, + "step": 6334 + }, + { + "epoch": 0.5112581712533291, + "grad_norm": 0.6491217613220215, + "learning_rate": 0.00015534353792853254, + "loss": 2.6404, + "step": 6335 + }, + { + "epoch": 0.511338874989912, + "grad_norm": 0.7165521383285522, + "learning_rate": 0.0001553303884562182, + "loss": 2.6339, + "step": 6336 + }, + { + "epoch": 0.511419578726495, + "grad_norm": 0.7363756895065308, + "learning_rate": 0.0001553172376049259, + "loss": 2.6411, + "step": 6337 + }, + { + "epoch": 0.511500282463078, + "grad_norm": 0.7148438096046448, + "learning_rate": 0.00015530408537498347, + "loss": 2.5617, + "step": 6338 + }, + { + "epoch": 0.5115809861996611, + "grad_norm": 0.7140451669692993, + "learning_rate": 0.00015529093176671864, + "loss": 2.5898, + "step": 6339 + }, + { + "epoch": 0.5116616899362441, + "grad_norm": 0.7799252271652222, + "learning_rate": 0.00015527777678045926, + "loss": 2.6176, + "step": 6340 + }, + { + "epoch": 0.511742393672827, + "grad_norm": 0.7292928099632263, + "learning_rate": 0.00015526462041653323, + "loss": 2.6722, + "step": 6341 + }, + { + "epoch": 0.51182309740941, + "grad_norm": 0.6986904740333557, + "learning_rate": 0.00015525146267526837, + "loss": 2.6154, + "step": 6342 + }, + { + "epoch": 0.5119038011459931, + "grad_norm": 0.7239612936973572, + "learning_rate": 0.00015523830355699262, + "loss": 2.5664, + "step": 6343 + }, + { + "epoch": 0.5119845048825761, + "grad_norm": 0.6805121898651123, + "learning_rate": 0.00015522514306203395, + "loss": 2.6204, + "step": 6344 + }, + { + "epoch": 0.512065208619159, + "grad_norm": 0.7036689519882202, + "learning_rate": 0.00015521198119072035, + "loss": 2.6211, + "step": 6345 + }, + { + "epoch": 0.512145912355742, + "grad_norm": 0.7155849933624268, + "learning_rate": 0.00015519881794337988, + "loss": 2.6074, + "step": 6346 + }, + { + "epoch": 0.5122266160923251, + "grad_norm": 0.7183938026428223, + "learning_rate": 0.00015518565332034057, + "loss": 2.6148, + "step": 6347 + }, + { + "epoch": 0.5123073198289081, + "grad_norm": 0.7053570747375488, + "learning_rate": 0.0001551724873219305, + "loss": 2.6476, + "step": 6348 + }, + { + "epoch": 0.5123880235654911, + "grad_norm": 0.714846670627594, + "learning_rate": 0.00015515931994847785, + "loss": 2.5728, + "step": 6349 + }, + { + "epoch": 0.512468727302074, + "grad_norm": 0.7504729628562927, + "learning_rate": 0.00015514615120031076, + "loss": 2.6415, + "step": 6350 + }, + { + "epoch": 0.5125494310386571, + "grad_norm": 0.6940335035324097, + "learning_rate": 0.0001551329810777574, + "loss": 2.6115, + "step": 6351 + }, + { + "epoch": 0.5126301347752401, + "grad_norm": 0.7166119813919067, + "learning_rate": 0.00015511980958114608, + "loss": 2.6284, + "step": 6352 + }, + { + "epoch": 0.5127108385118231, + "grad_norm": 0.7787839770317078, + "learning_rate": 0.00015510663671080497, + "loss": 2.6385, + "step": 6353 + }, + { + "epoch": 0.5127915422484061, + "grad_norm": 0.7298412322998047, + "learning_rate": 0.00015509346246706245, + "loss": 2.629, + "step": 6354 + }, + { + "epoch": 0.5128722459849892, + "grad_norm": 0.7918897271156311, + "learning_rate": 0.00015508028685024683, + "loss": 2.6777, + "step": 6355 + }, + { + "epoch": 0.5129529497215721, + "grad_norm": 0.6867843866348267, + "learning_rate": 0.00015506710986068646, + "loss": 2.6101, + "step": 6356 + }, + { + "epoch": 0.5130336534581551, + "grad_norm": 0.716468870639801, + "learning_rate": 0.00015505393149870978, + "loss": 2.6558, + "step": 6357 + }, + { + "epoch": 0.5131143571947381, + "grad_norm": 0.6704092621803284, + "learning_rate": 0.0001550407517646452, + "loss": 2.6128, + "step": 6358 + }, + { + "epoch": 0.5131950609313212, + "grad_norm": 0.820716381072998, + "learning_rate": 0.00015502757065882124, + "loss": 2.6052, + "step": 6359 + }, + { + "epoch": 0.5132757646679041, + "grad_norm": 0.7328094840049744, + "learning_rate": 0.00015501438818156635, + "loss": 2.6399, + "step": 6360 + }, + { + "epoch": 0.5133564684044871, + "grad_norm": 0.6602808833122253, + "learning_rate": 0.00015500120433320911, + "loss": 2.5509, + "step": 6361 + }, + { + "epoch": 0.5134371721410701, + "grad_norm": 0.7013166546821594, + "learning_rate": 0.00015498801911407805, + "loss": 2.6439, + "step": 6362 + }, + { + "epoch": 0.5135178758776532, + "grad_norm": 0.7415499091148376, + "learning_rate": 0.00015497483252450186, + "loss": 2.575, + "step": 6363 + }, + { + "epoch": 0.5135985796142362, + "grad_norm": 0.7262336015701294, + "learning_rate": 0.00015496164456480912, + "loss": 2.6815, + "step": 6364 + }, + { + "epoch": 0.5136792833508191, + "grad_norm": 0.7353699803352356, + "learning_rate": 0.0001549484552353285, + "loss": 2.6172, + "step": 6365 + }, + { + "epoch": 0.5137599870874021, + "grad_norm": 0.7005086541175842, + "learning_rate": 0.00015493526453638879, + "loss": 2.5945, + "step": 6366 + }, + { + "epoch": 0.5138406908239852, + "grad_norm": 0.7469770908355713, + "learning_rate": 0.00015492207246831864, + "loss": 2.6797, + "step": 6367 + }, + { + "epoch": 0.5139213945605682, + "grad_norm": 0.6768934726715088, + "learning_rate": 0.00015490887903144693, + "loss": 2.6369, + "step": 6368 + }, + { + "epoch": 0.5140020982971512, + "grad_norm": 0.7625820636749268, + "learning_rate": 0.00015489568422610237, + "loss": 2.6182, + "step": 6369 + }, + { + "epoch": 0.5140828020337341, + "grad_norm": 0.749351978302002, + "learning_rate": 0.00015488248805261388, + "loss": 2.6066, + "step": 6370 + }, + { + "epoch": 0.5141635057703172, + "grad_norm": 0.8369480967521667, + "learning_rate": 0.00015486929051131032, + "loss": 2.7627, + "step": 6371 + }, + { + "epoch": 0.5142442095069002, + "grad_norm": 0.6482037305831909, + "learning_rate": 0.0001548560916025206, + "loss": 2.609, + "step": 6372 + }, + { + "epoch": 0.5143249132434832, + "grad_norm": 0.6801851391792297, + "learning_rate": 0.0001548428913265737, + "loss": 2.5878, + "step": 6373 + }, + { + "epoch": 0.5144056169800661, + "grad_norm": 0.744926929473877, + "learning_rate": 0.0001548296896837986, + "loss": 2.6569, + "step": 6374 + }, + { + "epoch": 0.5144863207166491, + "grad_norm": 0.6862614750862122, + "learning_rate": 0.00015481648667452425, + "loss": 2.5626, + "step": 6375 + }, + { + "epoch": 0.5145670244532322, + "grad_norm": 0.7186449766159058, + "learning_rate": 0.0001548032822990798, + "loss": 2.6783, + "step": 6376 + }, + { + "epoch": 0.5146477281898152, + "grad_norm": 0.699715256690979, + "learning_rate": 0.0001547900765577943, + "loss": 2.6709, + "step": 6377 + }, + { + "epoch": 0.5147284319263982, + "grad_norm": 0.7272205352783203, + "learning_rate": 0.00015477686945099687, + "loss": 2.6076, + "step": 6378 + }, + { + "epoch": 0.5148091356629811, + "grad_norm": 0.7667459845542908, + "learning_rate": 0.00015476366097901667, + "loss": 2.6541, + "step": 6379 + }, + { + "epoch": 0.5148898393995642, + "grad_norm": 0.6538121700286865, + "learning_rate": 0.00015475045114218285, + "loss": 2.5806, + "step": 6380 + }, + { + "epoch": 0.5149705431361472, + "grad_norm": 0.7388994097709656, + "learning_rate": 0.00015473723994082473, + "loss": 2.6293, + "step": 6381 + }, + { + "epoch": 0.5150512468727302, + "grad_norm": 0.7044215202331543, + "learning_rate": 0.00015472402737527142, + "loss": 2.5755, + "step": 6382 + }, + { + "epoch": 0.5151319506093132, + "grad_norm": 0.6807994246482849, + "learning_rate": 0.00015471081344585236, + "loss": 2.6493, + "step": 6383 + }, + { + "epoch": 0.5152126543458962, + "grad_norm": 0.676278293132782, + "learning_rate": 0.00015469759815289681, + "loss": 2.6319, + "step": 6384 + }, + { + "epoch": 0.5152933580824792, + "grad_norm": 0.7515453696250916, + "learning_rate": 0.00015468438149673412, + "loss": 2.6415, + "step": 6385 + }, + { + "epoch": 0.5153740618190622, + "grad_norm": 0.8694239854812622, + "learning_rate": 0.0001546711634776937, + "loss": 2.5818, + "step": 6386 + }, + { + "epoch": 0.5154547655556452, + "grad_norm": 0.717090368270874, + "learning_rate": 0.000154657944096105, + "loss": 2.7132, + "step": 6387 + }, + { + "epoch": 0.5155354692922283, + "grad_norm": 0.7098804116249084, + "learning_rate": 0.00015464472335229742, + "loss": 2.564, + "step": 6388 + }, + { + "epoch": 0.5156161730288112, + "grad_norm": 0.6879690289497375, + "learning_rate": 0.0001546315012466005, + "loss": 2.6094, + "step": 6389 + }, + { + "epoch": 0.5156968767653942, + "grad_norm": 0.7110763788223267, + "learning_rate": 0.00015461827777934377, + "loss": 2.5982, + "step": 6390 + }, + { + "epoch": 0.5157775805019772, + "grad_norm": 0.7168039679527283, + "learning_rate": 0.00015460505295085677, + "loss": 2.5451, + "step": 6391 + }, + { + "epoch": 0.5158582842385603, + "grad_norm": 0.7059877514839172, + "learning_rate": 0.00015459182676146914, + "loss": 2.6655, + "step": 6392 + }, + { + "epoch": 0.5159389879751433, + "grad_norm": 0.7278143763542175, + "learning_rate": 0.00015457859921151043, + "loss": 2.6587, + "step": 6393 + }, + { + "epoch": 0.5160196917117262, + "grad_norm": 0.7301023602485657, + "learning_rate": 0.0001545653703013104, + "loss": 2.7672, + "step": 6394 + }, + { + "epoch": 0.5161003954483092, + "grad_norm": 0.6933302283287048, + "learning_rate": 0.0001545521400311987, + "loss": 2.5924, + "step": 6395 + }, + { + "epoch": 0.5161810991848923, + "grad_norm": 0.7074775099754333, + "learning_rate": 0.00015453890840150508, + "loss": 2.6663, + "step": 6396 + }, + { + "epoch": 0.5162618029214753, + "grad_norm": 0.7069801092147827, + "learning_rate": 0.00015452567541255924, + "loss": 2.6791, + "step": 6397 + }, + { + "epoch": 0.5163425066580583, + "grad_norm": 0.6586462259292603, + "learning_rate": 0.00015451244106469108, + "loss": 2.6368, + "step": 6398 + }, + { + "epoch": 0.5164232103946412, + "grad_norm": 0.6862531900405884, + "learning_rate": 0.00015449920535823042, + "loss": 2.7099, + "step": 6399 + }, + { + "epoch": 0.5165039141312243, + "grad_norm": 0.7177795767784119, + "learning_rate": 0.00015448596829350706, + "loss": 2.5921, + "step": 6400 + }, + { + "epoch": 0.5165846178678073, + "grad_norm": 0.6936569213867188, + "learning_rate": 0.00015447272987085094, + "loss": 2.5739, + "step": 6401 + }, + { + "epoch": 0.5166653216043903, + "grad_norm": 0.7394363284111023, + "learning_rate": 0.00015445949009059202, + "loss": 2.5941, + "step": 6402 + }, + { + "epoch": 0.5167460253409732, + "grad_norm": 0.6713366508483887, + "learning_rate": 0.00015444624895306027, + "loss": 2.574, + "step": 6403 + }, + { + "epoch": 0.5168267290775563, + "grad_norm": 0.679128885269165, + "learning_rate": 0.0001544330064585856, + "loss": 2.6422, + "step": 6404 + }, + { + "epoch": 0.5169074328141393, + "grad_norm": 0.6803367137908936, + "learning_rate": 0.0001544197626074982, + "loss": 2.6503, + "step": 6405 + }, + { + "epoch": 0.5169881365507223, + "grad_norm": 0.8009794354438782, + "learning_rate": 0.000154406517400128, + "loss": 2.6434, + "step": 6406 + }, + { + "epoch": 0.5170688402873053, + "grad_norm": 0.7292529344558716, + "learning_rate": 0.00015439327083680517, + "loss": 2.6333, + "step": 6407 + }, + { + "epoch": 0.5171495440238884, + "grad_norm": 0.67046719789505, + "learning_rate": 0.00015438002291785988, + "loss": 2.5791, + "step": 6408 + }, + { + "epoch": 0.5172302477604713, + "grad_norm": 0.755501925945282, + "learning_rate": 0.00015436677364362225, + "loss": 2.5558, + "step": 6409 + }, + { + "epoch": 0.5173109514970543, + "grad_norm": 0.6957115530967712, + "learning_rate": 0.0001543535230144225, + "loss": 2.5839, + "step": 6410 + }, + { + "epoch": 0.5173916552336373, + "grad_norm": 0.6629074215888977, + "learning_rate": 0.0001543402710305909, + "loss": 2.6529, + "step": 6411 + }, + { + "epoch": 0.5174723589702204, + "grad_norm": 0.6647019386291504, + "learning_rate": 0.00015432701769245766, + "loss": 2.589, + "step": 6412 + }, + { + "epoch": 0.5175530627068033, + "grad_norm": 0.6472512483596802, + "learning_rate": 0.00015431376300035316, + "loss": 2.6184, + "step": 6413 + }, + { + "epoch": 0.5176337664433863, + "grad_norm": 0.6900136470794678, + "learning_rate": 0.0001543005069546077, + "loss": 2.7029, + "step": 6414 + }, + { + "epoch": 0.5177144701799693, + "grad_norm": 0.7702177166938782, + "learning_rate": 0.00015428724955555165, + "loss": 2.6189, + "step": 6415 + }, + { + "epoch": 0.5177951739165524, + "grad_norm": 0.641655445098877, + "learning_rate": 0.00015427399080351545, + "loss": 2.6486, + "step": 6416 + }, + { + "epoch": 0.5178758776531354, + "grad_norm": 0.6826485991477966, + "learning_rate": 0.00015426073069882952, + "loss": 2.6105, + "step": 6417 + }, + { + "epoch": 0.5179565813897183, + "grad_norm": 0.749812662601471, + "learning_rate": 0.00015424746924182434, + "loss": 2.5644, + "step": 6418 + }, + { + "epoch": 0.5180372851263013, + "grad_norm": 0.6737890243530273, + "learning_rate": 0.0001542342064328304, + "loss": 2.686, + "step": 6419 + }, + { + "epoch": 0.5181179888628844, + "grad_norm": 0.7131822109222412, + "learning_rate": 0.0001542209422721783, + "loss": 2.697, + "step": 6420 + }, + { + "epoch": 0.5181986925994674, + "grad_norm": 0.7543746829032898, + "learning_rate": 0.0001542076767601986, + "loss": 2.6349, + "step": 6421 + }, + { + "epoch": 0.5182793963360504, + "grad_norm": 0.7589309215545654, + "learning_rate": 0.00015419440989722184, + "loss": 2.63, + "step": 6422 + }, + { + "epoch": 0.5183601000726333, + "grad_norm": 0.7036365866661072, + "learning_rate": 0.00015418114168357872, + "loss": 2.605, + "step": 6423 + }, + { + "epoch": 0.5184408038092164, + "grad_norm": 0.733161985874176, + "learning_rate": 0.00015416787211959998, + "loss": 2.6708, + "step": 6424 + }, + { + "epoch": 0.5185215075457994, + "grad_norm": 0.6928101181983948, + "learning_rate": 0.00015415460120561623, + "loss": 2.6549, + "step": 6425 + }, + { + "epoch": 0.5186022112823824, + "grad_norm": 0.6557250022888184, + "learning_rate": 0.00015414132894195825, + "loss": 2.6185, + "step": 6426 + }, + { + "epoch": 0.5186829150189654, + "grad_norm": 0.7236297726631165, + "learning_rate": 0.00015412805532895684, + "loss": 2.6185, + "step": 6427 + }, + { + "epoch": 0.5187636187555483, + "grad_norm": 0.7194060683250427, + "learning_rate": 0.0001541147803669428, + "loss": 2.6123, + "step": 6428 + }, + { + "epoch": 0.5188443224921314, + "grad_norm": 0.7077342867851257, + "learning_rate": 0.00015410150405624696, + "loss": 2.6628, + "step": 6429 + }, + { + "epoch": 0.5189250262287144, + "grad_norm": 0.7036150693893433, + "learning_rate": 0.00015408822639720023, + "loss": 2.5966, + "step": 6430 + }, + { + "epoch": 0.5190057299652974, + "grad_norm": 0.7047349810600281, + "learning_rate": 0.00015407494739013352, + "loss": 2.6626, + "step": 6431 + }, + { + "epoch": 0.5190864337018803, + "grad_norm": 0.7537584900856018, + "learning_rate": 0.00015406166703537777, + "loss": 2.6452, + "step": 6432 + }, + { + "epoch": 0.5191671374384634, + "grad_norm": 0.7944707870483398, + "learning_rate": 0.00015404838533326394, + "loss": 2.6834, + "step": 6433 + }, + { + "epoch": 0.5192478411750464, + "grad_norm": 0.8602458238601685, + "learning_rate": 0.00015403510228412305, + "loss": 2.6238, + "step": 6434 + }, + { + "epoch": 0.5193285449116294, + "grad_norm": 0.7181896567344666, + "learning_rate": 0.0001540218178882862, + "loss": 2.652, + "step": 6435 + }, + { + "epoch": 0.5194092486482124, + "grad_norm": 0.7470960021018982, + "learning_rate": 0.0001540085321460844, + "loss": 2.6703, + "step": 6436 + }, + { + "epoch": 0.5194899523847955, + "grad_norm": 0.8249944448471069, + "learning_rate": 0.00015399524505784883, + "loss": 2.5945, + "step": 6437 + }, + { + "epoch": 0.5195706561213784, + "grad_norm": 0.7332444190979004, + "learning_rate": 0.00015398195662391057, + "loss": 2.6472, + "step": 6438 + }, + { + "epoch": 0.5196513598579614, + "grad_norm": 0.7727739810943604, + "learning_rate": 0.0001539686668446009, + "loss": 2.6276, + "step": 6439 + }, + { + "epoch": 0.5197320635945444, + "grad_norm": 0.7161617279052734, + "learning_rate": 0.00015395537572025094, + "loss": 2.624, + "step": 6440 + }, + { + "epoch": 0.5198127673311275, + "grad_norm": 0.7657529711723328, + "learning_rate": 0.00015394208325119198, + "loss": 2.6604, + "step": 6441 + }, + { + "epoch": 0.5198934710677104, + "grad_norm": 0.732904314994812, + "learning_rate": 0.00015392878943775527, + "loss": 2.6334, + "step": 6442 + }, + { + "epoch": 0.5199741748042934, + "grad_norm": 0.7058991193771362, + "learning_rate": 0.0001539154942802722, + "loss": 2.5936, + "step": 6443 + }, + { + "epoch": 0.5200548785408764, + "grad_norm": 0.7328821420669556, + "learning_rate": 0.00015390219777907405, + "loss": 2.5969, + "step": 6444 + }, + { + "epoch": 0.5201355822774595, + "grad_norm": 0.7899969220161438, + "learning_rate": 0.00015388889993449224, + "loss": 2.5856, + "step": 6445 + }, + { + "epoch": 0.5202162860140425, + "grad_norm": 0.6963860392570496, + "learning_rate": 0.00015387560074685817, + "loss": 2.6139, + "step": 6446 + }, + { + "epoch": 0.5202969897506254, + "grad_norm": 0.812053918838501, + "learning_rate": 0.00015386230021650327, + "loss": 2.716, + "step": 6447 + }, + { + "epoch": 0.5203776934872084, + "grad_norm": 0.766781210899353, + "learning_rate": 0.0001538489983437591, + "loss": 2.6509, + "step": 6448 + }, + { + "epoch": 0.5204583972237915, + "grad_norm": 0.6877299547195435, + "learning_rate": 0.00015383569512895712, + "loss": 2.6076, + "step": 6449 + }, + { + "epoch": 0.5205391009603745, + "grad_norm": 0.7009176015853882, + "learning_rate": 0.00015382239057242888, + "loss": 2.608, + "step": 6450 + }, + { + "epoch": 0.5206198046969575, + "grad_norm": 0.7187578678131104, + "learning_rate": 0.000153809084674506, + "loss": 2.5946, + "step": 6451 + }, + { + "epoch": 0.5207005084335404, + "grad_norm": 0.7242687344551086, + "learning_rate": 0.00015379577743552001, + "loss": 2.6752, + "step": 6452 + }, + { + "epoch": 0.5207812121701235, + "grad_norm": 0.7668174505233765, + "learning_rate": 0.00015378246885580266, + "loss": 2.6694, + "step": 6453 + }, + { + "epoch": 0.5208619159067065, + "grad_norm": 0.7676039338111877, + "learning_rate": 0.00015376915893568557, + "loss": 2.6379, + "step": 6454 + }, + { + "epoch": 0.5209426196432895, + "grad_norm": 0.7394412159919739, + "learning_rate": 0.00015375584767550053, + "loss": 2.6046, + "step": 6455 + }, + { + "epoch": 0.5210233233798724, + "grad_norm": 0.7246636748313904, + "learning_rate": 0.00015374253507557923, + "loss": 2.592, + "step": 6456 + }, + { + "epoch": 0.5211040271164555, + "grad_norm": 0.7121255993843079, + "learning_rate": 0.00015372922113625345, + "loss": 2.634, + "step": 6457 + }, + { + "epoch": 0.5211847308530385, + "grad_norm": 0.7378345131874084, + "learning_rate": 0.00015371590585785505, + "loss": 2.5753, + "step": 6458 + }, + { + "epoch": 0.5212654345896215, + "grad_norm": 0.6682030558586121, + "learning_rate": 0.00015370258924071587, + "loss": 2.6305, + "step": 6459 + }, + { + "epoch": 0.5213461383262045, + "grad_norm": 0.7164177894592285, + "learning_rate": 0.00015368927128516776, + "loss": 2.7188, + "step": 6460 + }, + { + "epoch": 0.5214268420627876, + "grad_norm": 0.7341115474700928, + "learning_rate": 0.00015367595199154273, + "loss": 2.6204, + "step": 6461 + }, + { + "epoch": 0.5215075457993705, + "grad_norm": 0.6781840920448303, + "learning_rate": 0.00015366263136017258, + "loss": 2.6104, + "step": 6462 + }, + { + "epoch": 0.5215882495359535, + "grad_norm": 0.7029077410697937, + "learning_rate": 0.0001536493093913894, + "loss": 2.6055, + "step": 6463 + }, + { + "epoch": 0.5216689532725365, + "grad_norm": 0.6958553194999695, + "learning_rate": 0.00015363598608552522, + "loss": 2.5991, + "step": 6464 + }, + { + "epoch": 0.5217496570091196, + "grad_norm": 0.6919750571250916, + "learning_rate": 0.00015362266144291207, + "loss": 2.6022, + "step": 6465 + }, + { + "epoch": 0.5218303607457025, + "grad_norm": 0.6980622410774231, + "learning_rate": 0.000153609335463882, + "loss": 2.6289, + "step": 6466 + }, + { + "epoch": 0.5219110644822855, + "grad_norm": 0.7468248009681702, + "learning_rate": 0.00015359600814876715, + "loss": 2.6327, + "step": 6467 + }, + { + "epoch": 0.5219917682188685, + "grad_norm": 0.7183729410171509, + "learning_rate": 0.00015358267949789966, + "loss": 2.6389, + "step": 6468 + }, + { + "epoch": 0.5220724719554516, + "grad_norm": 0.6558868885040283, + "learning_rate": 0.00015356934951161178, + "loss": 2.6261, + "step": 6469 + }, + { + "epoch": 0.5221531756920346, + "grad_norm": 0.8000216484069824, + "learning_rate": 0.00015355601819023562, + "loss": 2.6908, + "step": 6470 + }, + { + "epoch": 0.5222338794286175, + "grad_norm": 0.775056004524231, + "learning_rate": 0.00015354268553410355, + "loss": 2.6763, + "step": 6471 + }, + { + "epoch": 0.5223145831652005, + "grad_norm": 0.7345123291015625, + "learning_rate": 0.00015352935154354776, + "loss": 2.582, + "step": 6472 + }, + { + "epoch": 0.5223952869017836, + "grad_norm": 0.731311023235321, + "learning_rate": 0.0001535160162189006, + "loss": 2.6519, + "step": 6473 + }, + { + "epoch": 0.5224759906383666, + "grad_norm": 0.6481007933616638, + "learning_rate": 0.00015350267956049443, + "loss": 2.5695, + "step": 6474 + }, + { + "epoch": 0.5225566943749496, + "grad_norm": 0.7698814868927002, + "learning_rate": 0.00015348934156866163, + "loss": 2.5732, + "step": 6475 + }, + { + "epoch": 0.5226373981115325, + "grad_norm": 0.7404680848121643, + "learning_rate": 0.00015347600224373462, + "loss": 2.5826, + "step": 6476 + }, + { + "epoch": 0.5227181018481155, + "grad_norm": 0.6965613961219788, + "learning_rate": 0.00015346266158604584, + "loss": 2.6069, + "step": 6477 + }, + { + "epoch": 0.5227988055846986, + "grad_norm": 0.6611152291297913, + "learning_rate": 0.00015344931959592777, + "loss": 2.4937, + "step": 6478 + }, + { + "epoch": 0.5228795093212816, + "grad_norm": 0.7418150305747986, + "learning_rate": 0.00015343597627371296, + "loss": 2.5747, + "step": 6479 + }, + { + "epoch": 0.5229602130578646, + "grad_norm": 0.6847610473632812, + "learning_rate": 0.00015342263161973393, + "loss": 2.5906, + "step": 6480 + }, + { + "epoch": 0.5230409167944475, + "grad_norm": 0.7054881453514099, + "learning_rate": 0.00015340928563432326, + "loss": 2.5914, + "step": 6481 + }, + { + "epoch": 0.5231216205310306, + "grad_norm": 0.6918888092041016, + "learning_rate": 0.0001533959383178136, + "loss": 2.6412, + "step": 6482 + }, + { + "epoch": 0.5232023242676136, + "grad_norm": 0.7232856154441833, + "learning_rate": 0.00015338258967053755, + "loss": 2.6364, + "step": 6483 + }, + { + "epoch": 0.5232830280041966, + "grad_norm": 0.7345031499862671, + "learning_rate": 0.00015336923969282786, + "loss": 2.6649, + "step": 6484 + }, + { + "epoch": 0.5233637317407795, + "grad_norm": 0.7644383907318115, + "learning_rate": 0.0001533558883850172, + "loss": 2.6949, + "step": 6485 + }, + { + "epoch": 0.5234444354773626, + "grad_norm": 0.6532372832298279, + "learning_rate": 0.0001533425357474383, + "loss": 2.5915, + "step": 6486 + }, + { + "epoch": 0.5235251392139456, + "grad_norm": 0.7089118361473083, + "learning_rate": 0.000153329181780424, + "loss": 2.6446, + "step": 6487 + }, + { + "epoch": 0.5236058429505286, + "grad_norm": 0.6966068148612976, + "learning_rate": 0.00015331582648430705, + "loss": 2.6764, + "step": 6488 + }, + { + "epoch": 0.5236865466871116, + "grad_norm": 0.7130835056304932, + "learning_rate": 0.00015330246985942035, + "loss": 2.6279, + "step": 6489 + }, + { + "epoch": 0.5237672504236947, + "grad_norm": 0.729727029800415, + "learning_rate": 0.00015328911190609678, + "loss": 2.612, + "step": 6490 + }, + { + "epoch": 0.5238479541602776, + "grad_norm": 0.6804213523864746, + "learning_rate": 0.0001532757526246692, + "loss": 2.6113, + "step": 6491 + }, + { + "epoch": 0.5239286578968606, + "grad_norm": 0.7324437499046326, + "learning_rate": 0.0001532623920154707, + "loss": 2.6054, + "step": 6492 + }, + { + "epoch": 0.5240093616334436, + "grad_norm": 0.6166699528694153, + "learning_rate": 0.00015324903007883406, + "loss": 2.5822, + "step": 6493 + }, + { + "epoch": 0.5240900653700267, + "grad_norm": 0.7339944839477539, + "learning_rate": 0.00015323566681509242, + "loss": 2.6204, + "step": 6494 + }, + { + "epoch": 0.5241707691066096, + "grad_norm": 0.7267727255821228, + "learning_rate": 0.00015322230222457886, + "loss": 2.6094, + "step": 6495 + }, + { + "epoch": 0.5242514728431926, + "grad_norm": 0.6417120695114136, + "learning_rate": 0.00015320893630762635, + "loss": 2.6044, + "step": 6496 + }, + { + "epoch": 0.5243321765797756, + "grad_norm": 0.7092922329902649, + "learning_rate": 0.00015319556906456808, + "loss": 2.6428, + "step": 6497 + }, + { + "epoch": 0.5244128803163587, + "grad_norm": 0.7482922673225403, + "learning_rate": 0.00015318220049573714, + "loss": 2.6025, + "step": 6498 + }, + { + "epoch": 0.5244935840529417, + "grad_norm": 0.691925048828125, + "learning_rate": 0.00015316883060146675, + "loss": 2.6308, + "step": 6499 + }, + { + "epoch": 0.5245742877895246, + "grad_norm": 0.7084488272666931, + "learning_rate": 0.00015315545938209015, + "loss": 2.6535, + "step": 6500 + }, + { + "epoch": 0.5246549915261076, + "grad_norm": 0.7182802557945251, + "learning_rate": 0.00015314208683794056, + "loss": 2.6045, + "step": 6501 + }, + { + "epoch": 0.5247356952626907, + "grad_norm": 0.7043096423149109, + "learning_rate": 0.00015312871296935122, + "loss": 2.6465, + "step": 6502 + }, + { + "epoch": 0.5248163989992737, + "grad_norm": 0.7679466009140015, + "learning_rate": 0.00015311533777665547, + "loss": 2.6624, + "step": 6503 + }, + { + "epoch": 0.5248971027358567, + "grad_norm": 0.6825870275497437, + "learning_rate": 0.00015310196126018668, + "loss": 2.5548, + "step": 6504 + }, + { + "epoch": 0.5249778064724396, + "grad_norm": 0.7364058494567871, + "learning_rate": 0.00015308858342027816, + "loss": 2.6495, + "step": 6505 + }, + { + "epoch": 0.5250585102090227, + "grad_norm": 0.7333239316940308, + "learning_rate": 0.00015307520425726341, + "loss": 2.5835, + "step": 6506 + }, + { + "epoch": 0.5251392139456057, + "grad_norm": 0.7479620575904846, + "learning_rate": 0.00015306182377147583, + "loss": 2.6065, + "step": 6507 + }, + { + "epoch": 0.5252199176821887, + "grad_norm": 0.7347591519355774, + "learning_rate": 0.00015304844196324888, + "loss": 2.6624, + "step": 6508 + }, + { + "epoch": 0.5253006214187717, + "grad_norm": 0.6879193782806396, + "learning_rate": 0.0001530350588329161, + "loss": 2.6598, + "step": 6509 + }, + { + "epoch": 0.5253813251553547, + "grad_norm": 0.7841597199440002, + "learning_rate": 0.000153021674380811, + "loss": 2.53, + "step": 6510 + }, + { + "epoch": 0.5254620288919377, + "grad_norm": 0.7916845679283142, + "learning_rate": 0.0001530082886072672, + "loss": 2.6995, + "step": 6511 + }, + { + "epoch": 0.5255427326285207, + "grad_norm": 0.7066318988800049, + "learning_rate": 0.0001529949015126183, + "loss": 2.58, + "step": 6512 + }, + { + "epoch": 0.5256234363651037, + "grad_norm": 0.6871134638786316, + "learning_rate": 0.00015298151309719787, + "loss": 2.6095, + "step": 6513 + }, + { + "epoch": 0.5257041401016868, + "grad_norm": 0.7479702830314636, + "learning_rate": 0.00015296812336133963, + "loss": 2.608, + "step": 6514 + }, + { + "epoch": 0.5257848438382697, + "grad_norm": 0.6772119402885437, + "learning_rate": 0.00015295473230537735, + "loss": 2.5679, + "step": 6515 + }, + { + "epoch": 0.5258655475748527, + "grad_norm": 0.7365416884422302, + "learning_rate": 0.0001529413399296447, + "loss": 2.6722, + "step": 6516 + }, + { + "epoch": 0.5259462513114357, + "grad_norm": 0.7538040280342102, + "learning_rate": 0.00015292794623447545, + "loss": 2.5562, + "step": 6517 + }, + { + "epoch": 0.5260269550480188, + "grad_norm": 0.7471820712089539, + "learning_rate": 0.00015291455122020344, + "loss": 2.7079, + "step": 6518 + }, + { + "epoch": 0.5261076587846018, + "grad_norm": 0.7605932354927063, + "learning_rate": 0.00015290115488716247, + "loss": 2.6696, + "step": 6519 + }, + { + "epoch": 0.5261883625211847, + "grad_norm": 0.7081854939460754, + "learning_rate": 0.00015288775723568647, + "loss": 2.6502, + "step": 6520 + }, + { + "epoch": 0.5262690662577677, + "grad_norm": 0.7236372828483582, + "learning_rate": 0.0001528743582661093, + "loss": 2.662, + "step": 6521 + }, + { + "epoch": 0.5263497699943508, + "grad_norm": 0.6710047721862793, + "learning_rate": 0.0001528609579787649, + "loss": 2.5947, + "step": 6522 + }, + { + "epoch": 0.5264304737309338, + "grad_norm": 0.709381103515625, + "learning_rate": 0.00015284755637398726, + "loss": 2.5922, + "step": 6523 + }, + { + "epoch": 0.5265111774675167, + "grad_norm": 0.7029775381088257, + "learning_rate": 0.00015283415345211033, + "loss": 2.6777, + "step": 6524 + }, + { + "epoch": 0.5265918812040997, + "grad_norm": 0.7250857949256897, + "learning_rate": 0.00015282074921346825, + "loss": 2.6027, + "step": 6525 + }, + { + "epoch": 0.5266725849406828, + "grad_norm": 0.7192760705947876, + "learning_rate": 0.00015280734365839498, + "loss": 2.6544, + "step": 6526 + }, + { + "epoch": 0.5267532886772658, + "grad_norm": 0.693583071231842, + "learning_rate": 0.0001527939367872247, + "loss": 2.6302, + "step": 6527 + }, + { + "epoch": 0.5268339924138488, + "grad_norm": 0.7031428217887878, + "learning_rate": 0.00015278052860029145, + "loss": 2.6944, + "step": 6528 + }, + { + "epoch": 0.5269146961504317, + "grad_norm": 0.6986895799636841, + "learning_rate": 0.00015276711909792949, + "loss": 2.6595, + "step": 6529 + }, + { + "epoch": 0.5269953998870147, + "grad_norm": 0.7375979423522949, + "learning_rate": 0.000152753708280473, + "loss": 2.6839, + "step": 6530 + }, + { + "epoch": 0.5270761036235978, + "grad_norm": 0.7126755714416504, + "learning_rate": 0.0001527402961482562, + "loss": 2.5597, + "step": 6531 + }, + { + "epoch": 0.5271568073601808, + "grad_norm": 0.6631070971488953, + "learning_rate": 0.00015272688270161338, + "loss": 2.5566, + "step": 6532 + }, + { + "epoch": 0.5272375110967638, + "grad_norm": 0.6896609663963318, + "learning_rate": 0.00015271346794087874, + "loss": 2.5801, + "step": 6533 + }, + { + "epoch": 0.5273182148333467, + "grad_norm": 0.7437502145767212, + "learning_rate": 0.00015270005186638673, + "loss": 2.6572, + "step": 6534 + }, + { + "epoch": 0.5273989185699298, + "grad_norm": 0.7013052701950073, + "learning_rate": 0.00015268663447847166, + "loss": 2.621, + "step": 6535 + }, + { + "epoch": 0.5274796223065128, + "grad_norm": 0.7161773443222046, + "learning_rate": 0.00015267321577746795, + "loss": 2.5989, + "step": 6536 + }, + { + "epoch": 0.5275603260430958, + "grad_norm": 0.7654534578323364, + "learning_rate": 0.00015265979576371, + "loss": 2.6338, + "step": 6537 + }, + { + "epoch": 0.5276410297796787, + "grad_norm": 0.694646954536438, + "learning_rate": 0.0001526463744375323, + "loss": 2.6036, + "step": 6538 + }, + { + "epoch": 0.5277217335162618, + "grad_norm": 0.6594679355621338, + "learning_rate": 0.0001526329517992693, + "loss": 2.6256, + "step": 6539 + }, + { + "epoch": 0.5278024372528448, + "grad_norm": 0.6424389481544495, + "learning_rate": 0.00015261952784925557, + "loss": 2.6389, + "step": 6540 + }, + { + "epoch": 0.5278831409894278, + "grad_norm": 0.7465235590934753, + "learning_rate": 0.0001526061025878257, + "loss": 2.5449, + "step": 6541 + }, + { + "epoch": 0.5279638447260108, + "grad_norm": 0.6900132298469543, + "learning_rate": 0.0001525926760153142, + "loss": 2.5597, + "step": 6542 + }, + { + "epoch": 0.5280445484625939, + "grad_norm": 0.7505282163619995, + "learning_rate": 0.00015257924813205572, + "loss": 2.6526, + "step": 6543 + }, + { + "epoch": 0.5281252521991768, + "grad_norm": 0.72642582654953, + "learning_rate": 0.00015256581893838495, + "loss": 2.6593, + "step": 6544 + }, + { + "epoch": 0.5282059559357598, + "grad_norm": 0.6901132464408875, + "learning_rate": 0.00015255238843463656, + "loss": 2.6726, + "step": 6545 + }, + { + "epoch": 0.5282866596723428, + "grad_norm": 0.7741395831108093, + "learning_rate": 0.0001525389566211453, + "loss": 2.5929, + "step": 6546 + }, + { + "epoch": 0.5283673634089259, + "grad_norm": 0.7282403111457825, + "learning_rate": 0.00015252552349824585, + "loss": 2.5696, + "step": 6547 + }, + { + "epoch": 0.5284480671455088, + "grad_norm": 0.7421764731407166, + "learning_rate": 0.0001525120890662731, + "loss": 2.5593, + "step": 6548 + }, + { + "epoch": 0.5285287708820918, + "grad_norm": 0.6830468773841858, + "learning_rate": 0.00015249865332556182, + "loss": 2.6396, + "step": 6549 + }, + { + "epoch": 0.5286094746186748, + "grad_norm": 0.6758440732955933, + "learning_rate": 0.00015248521627644684, + "loss": 2.5375, + "step": 6550 + }, + { + "epoch": 0.5286901783552579, + "grad_norm": 0.6897253394126892, + "learning_rate": 0.00015247177791926308, + "loss": 2.6148, + "step": 6551 + }, + { + "epoch": 0.5287708820918409, + "grad_norm": 0.6391426920890808, + "learning_rate": 0.00015245833825434547, + "loss": 2.5563, + "step": 6552 + }, + { + "epoch": 0.5288515858284238, + "grad_norm": 0.7213610410690308, + "learning_rate": 0.00015244489728202893, + "loss": 2.6158, + "step": 6553 + }, + { + "epoch": 0.5289322895650068, + "grad_norm": 0.6678160429000854, + "learning_rate": 0.00015243145500264845, + "loss": 2.6177, + "step": 6554 + }, + { + "epoch": 0.5290129933015899, + "grad_norm": 0.7041724324226379, + "learning_rate": 0.00015241801141653905, + "loss": 2.6504, + "step": 6555 + }, + { + "epoch": 0.5290936970381729, + "grad_norm": 0.6551648378372192, + "learning_rate": 0.0001524045665240358, + "loss": 2.577, + "step": 6556 + }, + { + "epoch": 0.5291744007747559, + "grad_norm": 0.7190412878990173, + "learning_rate": 0.00015239112032547377, + "loss": 2.596, + "step": 6557 + }, + { + "epoch": 0.5292551045113388, + "grad_norm": 0.6936302781105042, + "learning_rate": 0.00015237767282118807, + "loss": 2.6551, + "step": 6558 + }, + { + "epoch": 0.5293358082479219, + "grad_norm": 0.6901839971542358, + "learning_rate": 0.0001523642240115138, + "loss": 2.6263, + "step": 6559 + }, + { + "epoch": 0.5294165119845049, + "grad_norm": 0.6905068159103394, + "learning_rate": 0.00015235077389678624, + "loss": 2.6323, + "step": 6560 + }, + { + "epoch": 0.5294972157210879, + "grad_norm": 0.7495188117027283, + "learning_rate": 0.00015233732247734057, + "loss": 2.6243, + "step": 6561 + }, + { + "epoch": 0.5295779194576709, + "grad_norm": 0.6758708357810974, + "learning_rate": 0.00015232386975351197, + "loss": 2.6184, + "step": 6562 + }, + { + "epoch": 0.5296586231942539, + "grad_norm": 0.6443266868591309, + "learning_rate": 0.00015231041572563573, + "loss": 2.6543, + "step": 6563 + }, + { + "epoch": 0.5297393269308369, + "grad_norm": 0.7384275794029236, + "learning_rate": 0.00015229696039404723, + "loss": 2.6117, + "step": 6564 + }, + { + "epoch": 0.5298200306674199, + "grad_norm": 0.6873897314071655, + "learning_rate": 0.00015228350375908178, + "loss": 2.5689, + "step": 6565 + }, + { + "epoch": 0.5299007344040029, + "grad_norm": 0.6715645790100098, + "learning_rate": 0.00015227004582107472, + "loss": 2.5943, + "step": 6566 + }, + { + "epoch": 0.529981438140586, + "grad_norm": 0.6814208030700684, + "learning_rate": 0.00015225658658036151, + "loss": 2.5562, + "step": 6567 + }, + { + "epoch": 0.5300621418771689, + "grad_norm": 0.6942310929298401, + "learning_rate": 0.00015224312603727755, + "loss": 2.5902, + "step": 6568 + }, + { + "epoch": 0.5301428456137519, + "grad_norm": 0.6856299042701721, + "learning_rate": 0.0001522296641921583, + "loss": 2.6115, + "step": 6569 + }, + { + "epoch": 0.5302235493503349, + "grad_norm": 0.870833694934845, + "learning_rate": 0.0001522162010453393, + "loss": 2.7492, + "step": 6570 + }, + { + "epoch": 0.530304253086918, + "grad_norm": 0.6796989440917969, + "learning_rate": 0.0001522027365971561, + "loss": 2.6957, + "step": 6571 + }, + { + "epoch": 0.530384956823501, + "grad_norm": 0.7043026685714722, + "learning_rate": 0.00015218927084794423, + "loss": 2.604, + "step": 6572 + }, + { + "epoch": 0.5304656605600839, + "grad_norm": 0.7533933520317078, + "learning_rate": 0.00015217580379803933, + "loss": 2.6271, + "step": 6573 + }, + { + "epoch": 0.5305463642966669, + "grad_norm": 0.7526697516441345, + "learning_rate": 0.000152162335447777, + "loss": 2.553, + "step": 6574 + }, + { + "epoch": 0.53062706803325, + "grad_norm": 0.6942071318626404, + "learning_rate": 0.00015214886579749284, + "loss": 2.7206, + "step": 6575 + }, + { + "epoch": 0.530707771769833, + "grad_norm": 0.7133236527442932, + "learning_rate": 0.00015213539484752273, + "loss": 2.6545, + "step": 6576 + }, + { + "epoch": 0.530788475506416, + "grad_norm": 0.7229849696159363, + "learning_rate": 0.00015212192259820222, + "loss": 2.6647, + "step": 6577 + }, + { + "epoch": 0.5308691792429989, + "grad_norm": 0.7142449617385864, + "learning_rate": 0.0001521084490498672, + "loss": 2.5777, + "step": 6578 + }, + { + "epoch": 0.5309498829795819, + "grad_norm": 0.6950247287750244, + "learning_rate": 0.00015209497420285342, + "loss": 2.6159, + "step": 6579 + }, + { + "epoch": 0.531030586716165, + "grad_norm": 0.7492622137069702, + "learning_rate": 0.00015208149805749668, + "loss": 2.6927, + "step": 6580 + }, + { + "epoch": 0.531111290452748, + "grad_norm": 0.7618215084075928, + "learning_rate": 0.00015206802061413287, + "loss": 2.5831, + "step": 6581 + }, + { + "epoch": 0.5311919941893309, + "grad_norm": 0.7448660731315613, + "learning_rate": 0.0001520545418730979, + "loss": 2.6123, + "step": 6582 + }, + { + "epoch": 0.5312726979259139, + "grad_norm": 0.7450618147850037, + "learning_rate": 0.00015204106183472766, + "loss": 2.5768, + "step": 6583 + }, + { + "epoch": 0.531353401662497, + "grad_norm": 0.7426019310951233, + "learning_rate": 0.0001520275804993581, + "loss": 2.603, + "step": 6584 + }, + { + "epoch": 0.53143410539908, + "grad_norm": 0.7503333687782288, + "learning_rate": 0.00015201409786732526, + "loss": 2.6159, + "step": 6585 + }, + { + "epoch": 0.531514809135663, + "grad_norm": 0.6944373846054077, + "learning_rate": 0.00015200061393896513, + "loss": 2.5201, + "step": 6586 + }, + { + "epoch": 0.5315955128722459, + "grad_norm": 0.6958110332489014, + "learning_rate": 0.00015198712871461375, + "loss": 2.5592, + "step": 6587 + }, + { + "epoch": 0.531676216608829, + "grad_norm": 0.7838244438171387, + "learning_rate": 0.00015197364219460727, + "loss": 2.6663, + "step": 6588 + }, + { + "epoch": 0.531756920345412, + "grad_norm": 0.754338800907135, + "learning_rate": 0.00015196015437928174, + "loss": 2.6183, + "step": 6589 + }, + { + "epoch": 0.531837624081995, + "grad_norm": 0.7394337058067322, + "learning_rate": 0.00015194666526897332, + "loss": 2.5622, + "step": 6590 + }, + { + "epoch": 0.531918327818578, + "grad_norm": 0.7352069020271301, + "learning_rate": 0.00015193317486401824, + "loss": 2.6173, + "step": 6591 + }, + { + "epoch": 0.531999031555161, + "grad_norm": 0.6318944096565247, + "learning_rate": 0.00015191968316475267, + "loss": 2.6159, + "step": 6592 + }, + { + "epoch": 0.532079735291744, + "grad_norm": 0.7071281671524048, + "learning_rate": 0.00015190619017151291, + "loss": 2.633, + "step": 6593 + }, + { + "epoch": 0.532160439028327, + "grad_norm": 0.7762585282325745, + "learning_rate": 0.00015189269588463517, + "loss": 2.6445, + "step": 6594 + }, + { + "epoch": 0.53224114276491, + "grad_norm": 0.7979930639266968, + "learning_rate": 0.0001518792003044558, + "loss": 2.5825, + "step": 6595 + }, + { + "epoch": 0.5323218465014931, + "grad_norm": 0.7355580925941467, + "learning_rate": 0.00015186570343131114, + "loss": 2.6197, + "step": 6596 + }, + { + "epoch": 0.532402550238076, + "grad_norm": 0.7286938428878784, + "learning_rate": 0.0001518522052655376, + "loss": 2.6385, + "step": 6597 + }, + { + "epoch": 0.532483253974659, + "grad_norm": 0.689143180847168, + "learning_rate": 0.00015183870580747156, + "loss": 2.6593, + "step": 6598 + }, + { + "epoch": 0.532563957711242, + "grad_norm": 0.714746356010437, + "learning_rate": 0.00015182520505744945, + "loss": 2.6059, + "step": 6599 + }, + { + "epoch": 0.5326446614478251, + "grad_norm": 0.8055040240287781, + "learning_rate": 0.00015181170301580777, + "loss": 2.6983, + "step": 6600 + }, + { + "epoch": 0.532725365184408, + "grad_norm": 0.7104170918464661, + "learning_rate": 0.00015179819968288297, + "loss": 2.6578, + "step": 6601 + }, + { + "epoch": 0.532806068920991, + "grad_norm": 0.7175524830818176, + "learning_rate": 0.0001517846950590117, + "loss": 2.6263, + "step": 6602 + }, + { + "epoch": 0.532886772657574, + "grad_norm": 0.6755492091178894, + "learning_rate": 0.00015177118914453042, + "loss": 2.5752, + "step": 6603 + }, + { + "epoch": 0.5329674763941571, + "grad_norm": 0.7020289897918701, + "learning_rate": 0.00015175768193977578, + "loss": 2.6186, + "step": 6604 + }, + { + "epoch": 0.5330481801307401, + "grad_norm": 0.7550958395004272, + "learning_rate": 0.0001517441734450844, + "loss": 2.628, + "step": 6605 + }, + { + "epoch": 0.533128883867323, + "grad_norm": 0.6697603464126587, + "learning_rate": 0.00015173066366079297, + "loss": 2.6433, + "step": 6606 + }, + { + "epoch": 0.533209587603906, + "grad_norm": 0.715372622013092, + "learning_rate": 0.0001517171525872382, + "loss": 2.6022, + "step": 6607 + }, + { + "epoch": 0.5332902913404891, + "grad_norm": 0.7081933617591858, + "learning_rate": 0.00015170364022475675, + "loss": 2.675, + "step": 6608 + }, + { + "epoch": 0.5333709950770721, + "grad_norm": 0.7074152231216431, + "learning_rate": 0.00015169012657368546, + "loss": 2.6637, + "step": 6609 + }, + { + "epoch": 0.5334516988136551, + "grad_norm": 0.6692848801612854, + "learning_rate": 0.00015167661163436108, + "loss": 2.5855, + "step": 6610 + }, + { + "epoch": 0.533532402550238, + "grad_norm": 0.7307556867599487, + "learning_rate": 0.00015166309540712048, + "loss": 2.6105, + "step": 6611 + }, + { + "epoch": 0.5336131062868211, + "grad_norm": 0.7026669383049011, + "learning_rate": 0.00015164957789230048, + "loss": 2.6656, + "step": 6612 + }, + { + "epoch": 0.5336938100234041, + "grad_norm": 0.6579706072807312, + "learning_rate": 0.000151636059090238, + "loss": 2.6456, + "step": 6613 + }, + { + "epoch": 0.5337745137599871, + "grad_norm": 0.6854498386383057, + "learning_rate": 0.00015162253900126993, + "loss": 2.5969, + "step": 6614 + }, + { + "epoch": 0.5338552174965701, + "grad_norm": 0.7542434334754944, + "learning_rate": 0.00015160901762573323, + "loss": 2.6333, + "step": 6615 + }, + { + "epoch": 0.5339359212331531, + "grad_norm": 0.6795105934143066, + "learning_rate": 0.0001515954949639649, + "loss": 2.6268, + "step": 6616 + }, + { + "epoch": 0.5340166249697361, + "grad_norm": 0.6395254135131836, + "learning_rate": 0.000151581971016302, + "loss": 2.5684, + "step": 6617 + }, + { + "epoch": 0.5340973287063191, + "grad_norm": 0.7069850564002991, + "learning_rate": 0.00015156844578308155, + "loss": 2.64, + "step": 6618 + }, + { + "epoch": 0.5341780324429021, + "grad_norm": 0.6779203414916992, + "learning_rate": 0.0001515549192646406, + "loss": 2.6255, + "step": 6619 + }, + { + "epoch": 0.5342587361794852, + "grad_norm": 0.6403560638427734, + "learning_rate": 0.00015154139146131632, + "loss": 2.611, + "step": 6620 + }, + { + "epoch": 0.5343394399160681, + "grad_norm": 0.7532669901847839, + "learning_rate": 0.00015152786237344583, + "loss": 2.5641, + "step": 6621 + }, + { + "epoch": 0.5344201436526511, + "grad_norm": 0.6827573776245117, + "learning_rate": 0.00015151433200136629, + "loss": 2.6096, + "step": 6622 + }, + { + "epoch": 0.5345008473892341, + "grad_norm": 0.6691904067993164, + "learning_rate": 0.000151500800345415, + "loss": 2.6602, + "step": 6623 + }, + { + "epoch": 0.5345815511258172, + "grad_norm": 0.7288634777069092, + "learning_rate": 0.00015148726740592906, + "loss": 2.6468, + "step": 6624 + }, + { + "epoch": 0.5346622548624002, + "grad_norm": 0.7087839245796204, + "learning_rate": 0.00015147373318324586, + "loss": 2.5795, + "step": 6625 + }, + { + "epoch": 0.5347429585989831, + "grad_norm": 0.6618373394012451, + "learning_rate": 0.00015146019767770267, + "loss": 2.638, + "step": 6626 + }, + { + "epoch": 0.5348236623355661, + "grad_norm": 0.7384989857673645, + "learning_rate": 0.00015144666088963684, + "loss": 2.6104, + "step": 6627 + }, + { + "epoch": 0.5349043660721492, + "grad_norm": 0.6662275195121765, + "learning_rate": 0.00015143312281938576, + "loss": 2.6174, + "step": 6628 + }, + { + "epoch": 0.5349850698087322, + "grad_norm": 0.6617184281349182, + "learning_rate": 0.0001514195834672868, + "loss": 2.6154, + "step": 6629 + }, + { + "epoch": 0.5350657735453151, + "grad_norm": 0.7173622846603394, + "learning_rate": 0.0001514060428336774, + "loss": 2.5741, + "step": 6630 + }, + { + "epoch": 0.5351464772818981, + "grad_norm": 0.7773584127426147, + "learning_rate": 0.00015139250091889502, + "loss": 2.6333, + "step": 6631 + }, + { + "epoch": 0.5352271810184811, + "grad_norm": 0.7255204916000366, + "learning_rate": 0.0001513789577232772, + "loss": 2.5459, + "step": 6632 + }, + { + "epoch": 0.5353078847550642, + "grad_norm": 0.7308403849601746, + "learning_rate": 0.00015136541324716144, + "loss": 2.5934, + "step": 6633 + }, + { + "epoch": 0.5353885884916472, + "grad_norm": 0.699367880821228, + "learning_rate": 0.0001513518674908853, + "loss": 2.6797, + "step": 6634 + }, + { + "epoch": 0.5354692922282301, + "grad_norm": 0.7236449718475342, + "learning_rate": 0.0001513383204547864, + "loss": 2.6289, + "step": 6635 + }, + { + "epoch": 0.5355499959648131, + "grad_norm": 0.6860557794570923, + "learning_rate": 0.00015132477213920234, + "loss": 2.6736, + "step": 6636 + }, + { + "epoch": 0.5356306997013962, + "grad_norm": 0.6724153161048889, + "learning_rate": 0.00015131122254447084, + "loss": 2.5581, + "step": 6637 + }, + { + "epoch": 0.5357114034379792, + "grad_norm": 0.6818630695343018, + "learning_rate": 0.00015129767167092949, + "loss": 2.5979, + "step": 6638 + }, + { + "epoch": 0.5357921071745622, + "grad_norm": 0.6956631541252136, + "learning_rate": 0.00015128411951891607, + "loss": 2.6116, + "step": 6639 + }, + { + "epoch": 0.5358728109111451, + "grad_norm": 0.6698076128959656, + "learning_rate": 0.00015127056608876837, + "loss": 2.65, + "step": 6640 + }, + { + "epoch": 0.5359535146477282, + "grad_norm": 0.7763264179229736, + "learning_rate": 0.00015125701138082415, + "loss": 2.6164, + "step": 6641 + }, + { + "epoch": 0.5360342183843112, + "grad_norm": 0.7148340940475464, + "learning_rate": 0.00015124345539542118, + "loss": 2.6467, + "step": 6642 + }, + { + "epoch": 0.5361149221208942, + "grad_norm": 0.7350041270256042, + "learning_rate": 0.00015122989813289733, + "loss": 2.6477, + "step": 6643 + }, + { + "epoch": 0.5361956258574772, + "grad_norm": 0.6993441581726074, + "learning_rate": 0.00015121633959359055, + "loss": 2.7526, + "step": 6644 + }, + { + "epoch": 0.5362763295940602, + "grad_norm": 0.6828470826148987, + "learning_rate": 0.00015120277977783873, + "loss": 2.6439, + "step": 6645 + }, + { + "epoch": 0.5363570333306432, + "grad_norm": 0.7076796889305115, + "learning_rate": 0.0001511892186859797, + "loss": 2.6375, + "step": 6646 + }, + { + "epoch": 0.5364377370672262, + "grad_norm": 0.6830769777297974, + "learning_rate": 0.0001511756563183516, + "loss": 2.6052, + "step": 6647 + }, + { + "epoch": 0.5365184408038092, + "grad_norm": 0.6482179760932922, + "learning_rate": 0.00015116209267529237, + "loss": 2.6251, + "step": 6648 + }, + { + "epoch": 0.5365991445403923, + "grad_norm": 0.6687620878219604, + "learning_rate": 0.00015114852775714, + "loss": 2.659, + "step": 6649 + }, + { + "epoch": 0.5366798482769752, + "grad_norm": 0.734108030796051, + "learning_rate": 0.0001511349615642327, + "loss": 2.6542, + "step": 6650 + }, + { + "epoch": 0.5367605520135582, + "grad_norm": 0.7092111706733704, + "learning_rate": 0.00015112139409690842, + "loss": 2.6228, + "step": 6651 + }, + { + "epoch": 0.5368412557501412, + "grad_norm": 0.6544996500015259, + "learning_rate": 0.0001511078253555054, + "loss": 2.5661, + "step": 6652 + }, + { + "epoch": 0.5369219594867243, + "grad_norm": 0.7012531161308289, + "learning_rate": 0.00015109425534036176, + "loss": 2.6447, + "step": 6653 + }, + { + "epoch": 0.5370026632233073, + "grad_norm": 0.6813335418701172, + "learning_rate": 0.0001510806840518157, + "loss": 2.5723, + "step": 6654 + }, + { + "epoch": 0.5370833669598902, + "grad_norm": 0.6711288094520569, + "learning_rate": 0.0001510671114902055, + "loss": 2.6096, + "step": 6655 + }, + { + "epoch": 0.5371640706964732, + "grad_norm": 0.721866250038147, + "learning_rate": 0.00015105353765586935, + "loss": 2.6167, + "step": 6656 + }, + { + "epoch": 0.5372447744330563, + "grad_norm": 0.8140639066696167, + "learning_rate": 0.00015103996254914562, + "loss": 2.5768, + "step": 6657 + }, + { + "epoch": 0.5373254781696393, + "grad_norm": 0.6859177947044373, + "learning_rate": 0.0001510263861703726, + "loss": 2.5638, + "step": 6658 + }, + { + "epoch": 0.5374061819062222, + "grad_norm": 0.7254204154014587, + "learning_rate": 0.00015101280851988864, + "loss": 2.5855, + "step": 6659 + }, + { + "epoch": 0.5374868856428052, + "grad_norm": 0.7181829810142517, + "learning_rate": 0.00015099922959803218, + "loss": 2.5358, + "step": 6660 + }, + { + "epoch": 0.5375675893793883, + "grad_norm": 0.7092663645744324, + "learning_rate": 0.00015098564940514155, + "loss": 2.679, + "step": 6661 + }, + { + "epoch": 0.5376482931159713, + "grad_norm": 0.7126225233078003, + "learning_rate": 0.00015097206794155527, + "loss": 2.6167, + "step": 6662 + }, + { + "epoch": 0.5377289968525543, + "grad_norm": 0.7469925880432129, + "learning_rate": 0.00015095848520761186, + "loss": 2.5906, + "step": 6663 + }, + { + "epoch": 0.5378097005891372, + "grad_norm": 0.6911186575889587, + "learning_rate": 0.00015094490120364973, + "loss": 2.6488, + "step": 6664 + }, + { + "epoch": 0.5378904043257203, + "grad_norm": 0.6579635143280029, + "learning_rate": 0.00015093131593000753, + "loss": 2.5894, + "step": 6665 + }, + { + "epoch": 0.5379711080623033, + "grad_norm": 0.7107242345809937, + "learning_rate": 0.00015091772938702377, + "loss": 2.6568, + "step": 6666 + }, + { + "epoch": 0.5380518117988863, + "grad_norm": 0.6845428943634033, + "learning_rate": 0.00015090414157503714, + "loss": 2.5697, + "step": 6667 + }, + { + "epoch": 0.5381325155354693, + "grad_norm": 0.6713212132453918, + "learning_rate": 0.00015089055249438622, + "loss": 2.5747, + "step": 6668 + }, + { + "epoch": 0.5382132192720523, + "grad_norm": 0.7091513276100159, + "learning_rate": 0.0001508769621454097, + "loss": 2.6765, + "step": 6669 + }, + { + "epoch": 0.5382939230086353, + "grad_norm": 0.7403436899185181, + "learning_rate": 0.00015086337052844627, + "loss": 2.6841, + "step": 6670 + }, + { + "epoch": 0.5383746267452183, + "grad_norm": 0.6745626330375671, + "learning_rate": 0.0001508497776438347, + "loss": 2.6436, + "step": 6671 + }, + { + "epoch": 0.5384553304818013, + "grad_norm": 0.7491294145584106, + "learning_rate": 0.00015083618349191372, + "loss": 2.6376, + "step": 6672 + }, + { + "epoch": 0.5385360342183844, + "grad_norm": 0.719761848449707, + "learning_rate": 0.00015082258807302222, + "loss": 2.5885, + "step": 6673 + }, + { + "epoch": 0.5386167379549673, + "grad_norm": 0.7302667498588562, + "learning_rate": 0.00015080899138749895, + "loss": 2.7019, + "step": 6674 + }, + { + "epoch": 0.5386974416915503, + "grad_norm": 0.7640584111213684, + "learning_rate": 0.0001507953934356828, + "loss": 2.6404, + "step": 6675 + }, + { + "epoch": 0.5387781454281333, + "grad_norm": 0.699515700340271, + "learning_rate": 0.0001507817942179127, + "loss": 2.6407, + "step": 6676 + }, + { + "epoch": 0.5388588491647164, + "grad_norm": 0.7305224537849426, + "learning_rate": 0.00015076819373452746, + "loss": 2.5994, + "step": 6677 + }, + { + "epoch": 0.5389395529012994, + "grad_norm": 0.7125952243804932, + "learning_rate": 0.00015075459198586616, + "loss": 2.6472, + "step": 6678 + }, + { + "epoch": 0.5390202566378823, + "grad_norm": 0.7077293395996094, + "learning_rate": 0.00015074098897226778, + "loss": 2.6168, + "step": 6679 + }, + { + "epoch": 0.5391009603744653, + "grad_norm": 0.6713843941688538, + "learning_rate": 0.00015072738469407127, + "loss": 2.5736, + "step": 6680 + }, + { + "epoch": 0.5391816641110483, + "grad_norm": 0.7101294994354248, + "learning_rate": 0.00015071377915161578, + "loss": 2.6994, + "step": 6681 + }, + { + "epoch": 0.5392623678476314, + "grad_norm": 0.7132740020751953, + "learning_rate": 0.00015070017234524032, + "loss": 2.586, + "step": 6682 + }, + { + "epoch": 0.5393430715842144, + "grad_norm": 0.7043401598930359, + "learning_rate": 0.00015068656427528402, + "loss": 2.6025, + "step": 6683 + }, + { + "epoch": 0.5394237753207973, + "grad_norm": 0.6831551194190979, + "learning_rate": 0.00015067295494208607, + "loss": 2.6183, + "step": 6684 + }, + { + "epoch": 0.5395044790573803, + "grad_norm": 0.7066370844841003, + "learning_rate": 0.0001506593443459856, + "loss": 2.6467, + "step": 6685 + }, + { + "epoch": 0.5395851827939634, + "grad_norm": 0.7908033132553101, + "learning_rate": 0.0001506457324873219, + "loss": 2.6929, + "step": 6686 + }, + { + "epoch": 0.5396658865305464, + "grad_norm": 0.7186774611473083, + "learning_rate": 0.00015063211936643407, + "loss": 2.5841, + "step": 6687 + }, + { + "epoch": 0.5397465902671293, + "grad_norm": 0.6634512543678284, + "learning_rate": 0.0001506185049836615, + "loss": 2.5517, + "step": 6688 + }, + { + "epoch": 0.5398272940037123, + "grad_norm": 0.734406590461731, + "learning_rate": 0.00015060488933934353, + "loss": 2.6317, + "step": 6689 + }, + { + "epoch": 0.5399079977402954, + "grad_norm": 0.7754772305488586, + "learning_rate": 0.00015059127243381937, + "loss": 2.6885, + "step": 6690 + }, + { + "epoch": 0.5399887014768784, + "grad_norm": 0.7636603713035583, + "learning_rate": 0.00015057765426742848, + "loss": 2.5767, + "step": 6691 + }, + { + "epoch": 0.5400694052134614, + "grad_norm": 0.6621577143669128, + "learning_rate": 0.00015056403484051017, + "loss": 2.5905, + "step": 6692 + }, + { + "epoch": 0.5401501089500443, + "grad_norm": 0.7605881094932556, + "learning_rate": 0.00015055041415340404, + "loss": 2.6166, + "step": 6693 + }, + { + "epoch": 0.5402308126866274, + "grad_norm": 0.7603485584259033, + "learning_rate": 0.0001505367922064494, + "loss": 2.6123, + "step": 6694 + }, + { + "epoch": 0.5403115164232104, + "grad_norm": 0.7021469473838806, + "learning_rate": 0.0001505231689999858, + "loss": 2.6754, + "step": 6695 + }, + { + "epoch": 0.5403922201597934, + "grad_norm": 0.7291955947875977, + "learning_rate": 0.00015050954453435273, + "loss": 2.6393, + "step": 6696 + }, + { + "epoch": 0.5404729238963764, + "grad_norm": 0.6658700704574585, + "learning_rate": 0.00015049591880988977, + "loss": 2.5888, + "step": 6697 + }, + { + "epoch": 0.5405536276329594, + "grad_norm": 0.7080146074295044, + "learning_rate": 0.00015048229182693657, + "loss": 2.6318, + "step": 6698 + }, + { + "epoch": 0.5406343313695424, + "grad_norm": 0.7440849542617798, + "learning_rate": 0.00015046866358583267, + "loss": 2.596, + "step": 6699 + }, + { + "epoch": 0.5407150351061254, + "grad_norm": 0.886578381061554, + "learning_rate": 0.00015045503408691775, + "loss": 2.6479, + "step": 6700 + }, + { + "epoch": 0.5407957388427084, + "grad_norm": 0.7221408486366272, + "learning_rate": 0.00015044140333053148, + "loss": 2.625, + "step": 6701 + }, + { + "epoch": 0.5408764425792915, + "grad_norm": 0.7193209528923035, + "learning_rate": 0.0001504277713170136, + "loss": 2.6044, + "step": 6702 + }, + { + "epoch": 0.5409571463158744, + "grad_norm": 0.7139819860458374, + "learning_rate": 0.00015041413804670384, + "loss": 2.5572, + "step": 6703 + }, + { + "epoch": 0.5410378500524574, + "grad_norm": 0.728875994682312, + "learning_rate": 0.00015040050351994196, + "loss": 2.6373, + "step": 6704 + }, + { + "epoch": 0.5411185537890404, + "grad_norm": 0.6794858574867249, + "learning_rate": 0.0001503868677370678, + "loss": 2.6265, + "step": 6705 + }, + { + "epoch": 0.5411992575256235, + "grad_norm": 0.6874774098396301, + "learning_rate": 0.00015037323069842117, + "loss": 2.6146, + "step": 6706 + }, + { + "epoch": 0.5412799612622065, + "grad_norm": 0.7064409255981445, + "learning_rate": 0.00015035959240434197, + "loss": 2.6126, + "step": 6707 + }, + { + "epoch": 0.5413606649987894, + "grad_norm": 0.7212977409362793, + "learning_rate": 0.00015034595285517006, + "loss": 2.6836, + "step": 6708 + }, + { + "epoch": 0.5414413687353724, + "grad_norm": 0.7826492190361023, + "learning_rate": 0.0001503323120512454, + "loss": 2.6648, + "step": 6709 + }, + { + "epoch": 0.5415220724719555, + "grad_norm": 0.7228415608406067, + "learning_rate": 0.000150318669992908, + "loss": 2.5734, + "step": 6710 + }, + { + "epoch": 0.5416027762085385, + "grad_norm": 0.6929590702056885, + "learning_rate": 0.00015030502668049778, + "loss": 2.6023, + "step": 6711 + }, + { + "epoch": 0.5416834799451214, + "grad_norm": 0.679990291595459, + "learning_rate": 0.0001502913821143548, + "loss": 2.5867, + "step": 6712 + }, + { + "epoch": 0.5417641836817044, + "grad_norm": 0.7324180603027344, + "learning_rate": 0.00015027773629481907, + "loss": 2.5722, + "step": 6713 + }, + { + "epoch": 0.5418448874182875, + "grad_norm": 0.686826765537262, + "learning_rate": 0.00015026408922223078, + "loss": 2.6138, + "step": 6714 + }, + { + "epoch": 0.5419255911548705, + "grad_norm": 0.7045193314552307, + "learning_rate": 0.00015025044089693, + "loss": 2.619, + "step": 6715 + }, + { + "epoch": 0.5420062948914535, + "grad_norm": 0.6839936375617981, + "learning_rate": 0.00015023679131925683, + "loss": 2.5778, + "step": 6716 + }, + { + "epoch": 0.5420869986280364, + "grad_norm": 0.7613961696624756, + "learning_rate": 0.00015022314048955153, + "loss": 2.6262, + "step": 6717 + }, + { + "epoch": 0.5421677023646195, + "grad_norm": 0.7867478728294373, + "learning_rate": 0.00015020948840815428, + "loss": 2.6576, + "step": 6718 + }, + { + "epoch": 0.5422484061012025, + "grad_norm": 0.7371038794517517, + "learning_rate": 0.0001501958350754053, + "loss": 2.6495, + "step": 6719 + }, + { + "epoch": 0.5423291098377855, + "grad_norm": 0.7146512269973755, + "learning_rate": 0.00015018218049164494, + "loss": 2.6514, + "step": 6720 + }, + { + "epoch": 0.5424098135743685, + "grad_norm": 0.7507650256156921, + "learning_rate": 0.00015016852465721346, + "loss": 2.6509, + "step": 6721 + }, + { + "epoch": 0.5424905173109515, + "grad_norm": 0.6786547303199768, + "learning_rate": 0.0001501548675724512, + "loss": 2.5983, + "step": 6722 + }, + { + "epoch": 0.5425712210475345, + "grad_norm": 0.7077932357788086, + "learning_rate": 0.0001501412092376985, + "loss": 2.622, + "step": 6723 + }, + { + "epoch": 0.5426519247841175, + "grad_norm": 0.7191271781921387, + "learning_rate": 0.00015012754965329584, + "loss": 2.6632, + "step": 6724 + }, + { + "epoch": 0.5427326285207005, + "grad_norm": 0.6785906553268433, + "learning_rate": 0.00015011388881958356, + "loss": 2.6312, + "step": 6725 + }, + { + "epoch": 0.5428133322572836, + "grad_norm": 0.6880263090133667, + "learning_rate": 0.00015010022673690222, + "loss": 2.5951, + "step": 6726 + }, + { + "epoch": 0.5428940359938665, + "grad_norm": 0.7769095301628113, + "learning_rate": 0.0001500865634055923, + "loss": 2.5503, + "step": 6727 + }, + { + "epoch": 0.5429747397304495, + "grad_norm": 0.6847476959228516, + "learning_rate": 0.0001500728988259942, + "loss": 2.6824, + "step": 6728 + }, + { + "epoch": 0.5430554434670325, + "grad_norm": 0.6829310059547424, + "learning_rate": 0.00015005923299844863, + "loss": 2.5683, + "step": 6729 + }, + { + "epoch": 0.5431361472036156, + "grad_norm": 0.7436082363128662, + "learning_rate": 0.0001500455659232961, + "loss": 2.6165, + "step": 6730 + }, + { + "epoch": 0.5432168509401986, + "grad_norm": 0.7876375913619995, + "learning_rate": 0.00015003189760087724, + "loss": 2.6203, + "step": 6731 + }, + { + "epoch": 0.5432975546767815, + "grad_norm": 0.6869253516197205, + "learning_rate": 0.0001500182280315327, + "loss": 2.6136, + "step": 6732 + }, + { + "epoch": 0.5433782584133645, + "grad_norm": 0.7179432511329651, + "learning_rate": 0.00015000455721560316, + "loss": 2.6049, + "step": 6733 + }, + { + "epoch": 0.5434589621499475, + "grad_norm": 0.7286917567253113, + "learning_rate": 0.00014999088515342939, + "loss": 2.5704, + "step": 6734 + }, + { + "epoch": 0.5435396658865306, + "grad_norm": 0.6841779351234436, + "learning_rate": 0.00014997721184535206, + "loss": 2.6095, + "step": 6735 + }, + { + "epoch": 0.5436203696231136, + "grad_norm": 0.7661791443824768, + "learning_rate": 0.00014996353729171196, + "loss": 2.6193, + "step": 6736 + }, + { + "epoch": 0.5437010733596965, + "grad_norm": 0.7365885376930237, + "learning_rate": 0.0001499498614928499, + "loss": 2.586, + "step": 6737 + }, + { + "epoch": 0.5437817770962795, + "grad_norm": 0.7423815131187439, + "learning_rate": 0.00014993618444910674, + "loss": 2.6199, + "step": 6738 + }, + { + "epoch": 0.5438624808328626, + "grad_norm": 0.7667781114578247, + "learning_rate": 0.0001499225061608233, + "loss": 2.6584, + "step": 6739 + }, + { + "epoch": 0.5439431845694456, + "grad_norm": 0.7148830890655518, + "learning_rate": 0.00014990882662834057, + "loss": 2.7172, + "step": 6740 + }, + { + "epoch": 0.5440238883060285, + "grad_norm": 0.7206205725669861, + "learning_rate": 0.00014989514585199936, + "loss": 2.5682, + "step": 6741 + }, + { + "epoch": 0.5441045920426115, + "grad_norm": 0.7306448221206665, + "learning_rate": 0.0001498814638321407, + "loss": 2.6724, + "step": 6742 + }, + { + "epoch": 0.5441852957791946, + "grad_norm": 0.7058824896812439, + "learning_rate": 0.00014986778056910556, + "loss": 2.6573, + "step": 6743 + }, + { + "epoch": 0.5442659995157776, + "grad_norm": 0.770588755607605, + "learning_rate": 0.000149854096063235, + "loss": 2.658, + "step": 6744 + }, + { + "epoch": 0.5443467032523606, + "grad_norm": 0.8283931612968445, + "learning_rate": 0.00014984041031487001, + "loss": 2.6624, + "step": 6745 + }, + { + "epoch": 0.5444274069889435, + "grad_norm": 0.6814693808555603, + "learning_rate": 0.00014982672332435176, + "loss": 2.5835, + "step": 6746 + }, + { + "epoch": 0.5445081107255266, + "grad_norm": 0.7059363722801208, + "learning_rate": 0.00014981303509202127, + "loss": 2.5977, + "step": 6747 + }, + { + "epoch": 0.5445888144621096, + "grad_norm": 0.6678106188774109, + "learning_rate": 0.00014979934561821975, + "loss": 2.6479, + "step": 6748 + }, + { + "epoch": 0.5446695181986926, + "grad_norm": 0.8167592883110046, + "learning_rate": 0.00014978565490328835, + "loss": 2.6529, + "step": 6749 + }, + { + "epoch": 0.5447502219352756, + "grad_norm": 0.807209849357605, + "learning_rate": 0.00014977196294756832, + "loss": 2.6546, + "step": 6750 + }, + { + "epoch": 0.5448309256718586, + "grad_norm": 0.7099517583847046, + "learning_rate": 0.00014975826975140085, + "loss": 2.6178, + "step": 6751 + }, + { + "epoch": 0.5449116294084416, + "grad_norm": 0.7900758981704712, + "learning_rate": 0.0001497445753151272, + "loss": 2.586, + "step": 6752 + }, + { + "epoch": 0.5449923331450246, + "grad_norm": 0.6826134920120239, + "learning_rate": 0.00014973087963908875, + "loss": 2.5914, + "step": 6753 + }, + { + "epoch": 0.5450730368816076, + "grad_norm": 0.7383863925933838, + "learning_rate": 0.0001497171827236268, + "loss": 2.6357, + "step": 6754 + }, + { + "epoch": 0.5451537406181907, + "grad_norm": 0.7208051085472107, + "learning_rate": 0.0001497034845690826, + "loss": 2.5435, + "step": 6755 + }, + { + "epoch": 0.5452344443547736, + "grad_norm": 0.680794894695282, + "learning_rate": 0.00014968978517579772, + "loss": 2.5691, + "step": 6756 + }, + { + "epoch": 0.5453151480913566, + "grad_norm": 0.680759847164154, + "learning_rate": 0.00014967608454411347, + "loss": 2.5761, + "step": 6757 + }, + { + "epoch": 0.5453958518279396, + "grad_norm": 0.719634473323822, + "learning_rate": 0.00014966238267437134, + "loss": 2.637, + "step": 6758 + }, + { + "epoch": 0.5454765555645227, + "grad_norm": 0.777302086353302, + "learning_rate": 0.0001496486795669128, + "loss": 2.6457, + "step": 6759 + }, + { + "epoch": 0.5455572593011057, + "grad_norm": 0.6875059604644775, + "learning_rate": 0.0001496349752220794, + "loss": 2.6116, + "step": 6760 + }, + { + "epoch": 0.5456379630376886, + "grad_norm": 0.6884258985519409, + "learning_rate": 0.0001496212696402127, + "loss": 2.5863, + "step": 6761 + }, + { + "epoch": 0.5457186667742716, + "grad_norm": 0.6667922139167786, + "learning_rate": 0.00014960756282165422, + "loss": 2.5892, + "step": 6762 + }, + { + "epoch": 0.5457993705108547, + "grad_norm": 0.6712725162506104, + "learning_rate": 0.00014959385476674559, + "loss": 2.5478, + "step": 6763 + }, + { + "epoch": 0.5458800742474377, + "grad_norm": 0.6803874969482422, + "learning_rate": 0.00014958014547582845, + "loss": 2.5785, + "step": 6764 + }, + { + "epoch": 0.5459607779840207, + "grad_norm": 0.6975811123847961, + "learning_rate": 0.0001495664349492445, + "loss": 2.5765, + "step": 6765 + }, + { + "epoch": 0.5460414817206036, + "grad_norm": 0.7676273584365845, + "learning_rate": 0.00014955272318733544, + "loss": 2.634, + "step": 6766 + }, + { + "epoch": 0.5461221854571867, + "grad_norm": 0.7044547200202942, + "learning_rate": 0.000149539010190443, + "loss": 2.646, + "step": 6767 + }, + { + "epoch": 0.5462028891937697, + "grad_norm": 0.7453166842460632, + "learning_rate": 0.00014952529595890887, + "loss": 2.6137, + "step": 6768 + }, + { + "epoch": 0.5462835929303527, + "grad_norm": 0.7281681299209595, + "learning_rate": 0.00014951158049307493, + "loss": 2.6558, + "step": 6769 + }, + { + "epoch": 0.5463642966669356, + "grad_norm": 0.7131047248840332, + "learning_rate": 0.00014949786379328298, + "loss": 2.6441, + "step": 6770 + }, + { + "epoch": 0.5464450004035187, + "grad_norm": 0.7072219848632812, + "learning_rate": 0.00014948414585987487, + "loss": 2.5861, + "step": 6771 + }, + { + "epoch": 0.5465257041401017, + "grad_norm": 0.7270335555076599, + "learning_rate": 0.00014947042669319252, + "loss": 2.6703, + "step": 6772 + }, + { + "epoch": 0.5466064078766847, + "grad_norm": 0.7314150929450989, + "learning_rate": 0.0001494567062935778, + "loss": 2.6101, + "step": 6773 + }, + { + "epoch": 0.5466871116132677, + "grad_norm": 0.8168460130691528, + "learning_rate": 0.00014944298466137266, + "loss": 2.662, + "step": 6774 + }, + { + "epoch": 0.5467678153498507, + "grad_norm": 0.7338390350341797, + "learning_rate": 0.00014942926179691913, + "loss": 2.6481, + "step": 6775 + }, + { + "epoch": 0.5468485190864337, + "grad_norm": 0.7065639495849609, + "learning_rate": 0.00014941553770055917, + "loss": 2.6192, + "step": 6776 + }, + { + "epoch": 0.5469292228230167, + "grad_norm": 0.7675396203994751, + "learning_rate": 0.00014940181237263483, + "loss": 2.5828, + "step": 6777 + }, + { + "epoch": 0.5470099265595997, + "grad_norm": 0.7085692286491394, + "learning_rate": 0.0001493880858134882, + "loss": 2.5815, + "step": 6778 + }, + { + "epoch": 0.5470906302961828, + "grad_norm": 0.757591187953949, + "learning_rate": 0.00014937435802346135, + "loss": 2.691, + "step": 6779 + }, + { + "epoch": 0.5471713340327657, + "grad_norm": 0.7299168705940247, + "learning_rate": 0.00014936062900289647, + "loss": 2.6246, + "step": 6780 + }, + { + "epoch": 0.5472520377693487, + "grad_norm": 0.693692684173584, + "learning_rate": 0.00014934689875213564, + "loss": 2.6149, + "step": 6781 + }, + { + "epoch": 0.5473327415059317, + "grad_norm": 0.733657956123352, + "learning_rate": 0.00014933316727152113, + "loss": 2.582, + "step": 6782 + }, + { + "epoch": 0.5474134452425147, + "grad_norm": 0.6881953477859497, + "learning_rate": 0.00014931943456139514, + "loss": 2.6023, + "step": 6783 + }, + { + "epoch": 0.5474941489790978, + "grad_norm": 0.7102411985397339, + "learning_rate": 0.00014930570062209988, + "loss": 2.6296, + "step": 6784 + }, + { + "epoch": 0.5475748527156807, + "grad_norm": 0.7263364791870117, + "learning_rate": 0.00014929196545397771, + "loss": 2.6414, + "step": 6785 + }, + { + "epoch": 0.5476555564522637, + "grad_norm": 0.7239066958427429, + "learning_rate": 0.00014927822905737092, + "loss": 2.6174, + "step": 6786 + }, + { + "epoch": 0.5477362601888467, + "grad_norm": 0.6909911632537842, + "learning_rate": 0.0001492644914326218, + "loss": 2.6036, + "step": 6787 + }, + { + "epoch": 0.5478169639254298, + "grad_norm": 0.719693124294281, + "learning_rate": 0.00014925075258007283, + "loss": 2.6507, + "step": 6788 + }, + { + "epoch": 0.5478976676620128, + "grad_norm": 0.7722225785255432, + "learning_rate": 0.0001492370125000663, + "loss": 2.6268, + "step": 6789 + }, + { + "epoch": 0.5479783713985957, + "grad_norm": 0.7456568479537964, + "learning_rate": 0.00014922327119294476, + "loss": 2.6426, + "step": 6790 + }, + { + "epoch": 0.5480590751351787, + "grad_norm": 0.7430242300033569, + "learning_rate": 0.00014920952865905062, + "loss": 2.6632, + "step": 6791 + }, + { + "epoch": 0.5481397788717618, + "grad_norm": 0.7363260388374329, + "learning_rate": 0.0001491957848987264, + "loss": 2.6021, + "step": 6792 + }, + { + "epoch": 0.5482204826083448, + "grad_norm": 0.6903972029685974, + "learning_rate": 0.00014918203991231462, + "loss": 2.6086, + "step": 6793 + }, + { + "epoch": 0.5483011863449277, + "grad_norm": 0.6765161752700806, + "learning_rate": 0.00014916829370015781, + "loss": 2.5806, + "step": 6794 + }, + { + "epoch": 0.5483818900815107, + "grad_norm": 0.7533403635025024, + "learning_rate": 0.0001491545462625986, + "loss": 2.6351, + "step": 6795 + }, + { + "epoch": 0.5484625938180938, + "grad_norm": 0.6841829419136047, + "learning_rate": 0.00014914079759997963, + "loss": 2.606, + "step": 6796 + }, + { + "epoch": 0.5485432975546768, + "grad_norm": 0.7671411037445068, + "learning_rate": 0.00014912704771264353, + "loss": 2.6645, + "step": 6797 + }, + { + "epoch": 0.5486240012912598, + "grad_norm": 0.7218797206878662, + "learning_rate": 0.00014911329660093295, + "loss": 2.6302, + "step": 6798 + }, + { + "epoch": 0.5487047050278427, + "grad_norm": 0.7269994020462036, + "learning_rate": 0.00014909954426519067, + "loss": 2.6261, + "step": 6799 + }, + { + "epoch": 0.5487854087644258, + "grad_norm": 0.765353262424469, + "learning_rate": 0.00014908579070575936, + "loss": 2.5787, + "step": 6800 + }, + { + "epoch": 0.5488661125010088, + "grad_norm": 0.6503065228462219, + "learning_rate": 0.00014907203592298189, + "loss": 2.6404, + "step": 6801 + }, + { + "epoch": 0.5489468162375918, + "grad_norm": 0.6869633197784424, + "learning_rate": 0.00014905827991720097, + "loss": 2.6463, + "step": 6802 + }, + { + "epoch": 0.5490275199741748, + "grad_norm": 0.7221426963806152, + "learning_rate": 0.00014904452268875947, + "loss": 2.6686, + "step": 6803 + }, + { + "epoch": 0.5491082237107578, + "grad_norm": 0.6781399250030518, + "learning_rate": 0.00014903076423800028, + "loss": 2.6274, + "step": 6804 + }, + { + "epoch": 0.5491889274473408, + "grad_norm": 0.7451084852218628, + "learning_rate": 0.00014901700456526626, + "loss": 2.6449, + "step": 6805 + }, + { + "epoch": 0.5492696311839238, + "grad_norm": 0.7159574627876282, + "learning_rate": 0.0001490032436709004, + "loss": 2.6664, + "step": 6806 + }, + { + "epoch": 0.5493503349205068, + "grad_norm": 0.724039614200592, + "learning_rate": 0.00014898948155524558, + "loss": 2.5816, + "step": 6807 + }, + { + "epoch": 0.5494310386570899, + "grad_norm": 0.7194633483886719, + "learning_rate": 0.0001489757182186448, + "loss": 2.5625, + "step": 6808 + }, + { + "epoch": 0.5495117423936728, + "grad_norm": 0.704133927822113, + "learning_rate": 0.0001489619536614411, + "loss": 2.6295, + "step": 6809 + }, + { + "epoch": 0.5495924461302558, + "grad_norm": 0.6717158555984497, + "learning_rate": 0.00014894818788397757, + "loss": 2.6168, + "step": 6810 + }, + { + "epoch": 0.5496731498668388, + "grad_norm": 0.7096573710441589, + "learning_rate": 0.0001489344208865972, + "loss": 2.6316, + "step": 6811 + }, + { + "epoch": 0.5497538536034219, + "grad_norm": 0.6383458375930786, + "learning_rate": 0.00014892065266964316, + "loss": 2.5577, + "step": 6812 + }, + { + "epoch": 0.5498345573400049, + "grad_norm": 0.7606377601623535, + "learning_rate": 0.0001489068832334586, + "loss": 2.7078, + "step": 6813 + }, + { + "epoch": 0.5499152610765878, + "grad_norm": 0.649162232875824, + "learning_rate": 0.00014889311257838665, + "loss": 2.6023, + "step": 6814 + }, + { + "epoch": 0.5499959648131708, + "grad_norm": 0.6445025205612183, + "learning_rate": 0.00014887934070477053, + "loss": 2.6, + "step": 6815 + }, + { + "epoch": 0.5500766685497539, + "grad_norm": 0.6873729825019836, + "learning_rate": 0.00014886556761295342, + "loss": 2.6398, + "step": 6816 + }, + { + "epoch": 0.5501573722863369, + "grad_norm": 0.7814947366714478, + "learning_rate": 0.0001488517933032787, + "loss": 2.5803, + "step": 6817 + }, + { + "epoch": 0.5502380760229199, + "grad_norm": 0.7140909433364868, + "learning_rate": 0.00014883801777608953, + "loss": 2.6051, + "step": 6818 + }, + { + "epoch": 0.5503187797595028, + "grad_norm": 0.7326326370239258, + "learning_rate": 0.00014882424103172936, + "loss": 2.6123, + "step": 6819 + }, + { + "epoch": 0.5503994834960859, + "grad_norm": 0.7093667387962341, + "learning_rate": 0.00014881046307054142, + "loss": 2.6527, + "step": 6820 + }, + { + "epoch": 0.5504801872326689, + "grad_norm": 0.6877567768096924, + "learning_rate": 0.00014879668389286915, + "loss": 2.6086, + "step": 6821 + }, + { + "epoch": 0.5505608909692519, + "grad_norm": 0.7095615863800049, + "learning_rate": 0.000148782903499056, + "loss": 2.6469, + "step": 6822 + }, + { + "epoch": 0.5506415947058348, + "grad_norm": 0.6931191086769104, + "learning_rate": 0.00014876912188944535, + "loss": 2.6842, + "step": 6823 + }, + { + "epoch": 0.5507222984424179, + "grad_norm": 0.7016414403915405, + "learning_rate": 0.00014875533906438072, + "loss": 2.5753, + "step": 6824 + }, + { + "epoch": 0.5508030021790009, + "grad_norm": 0.6813814640045166, + "learning_rate": 0.00014874155502420558, + "loss": 2.5739, + "step": 6825 + }, + { + "epoch": 0.5508837059155839, + "grad_norm": 0.7068608403205872, + "learning_rate": 0.00014872776976926347, + "loss": 2.6325, + "step": 6826 + }, + { + "epoch": 0.5509644096521669, + "grad_norm": 0.6978127360343933, + "learning_rate": 0.00014871398329989796, + "loss": 2.5614, + "step": 6827 + }, + { + "epoch": 0.55104511338875, + "grad_norm": 0.6923051476478577, + "learning_rate": 0.00014870019561645265, + "loss": 2.6075, + "step": 6828 + }, + { + "epoch": 0.5511258171253329, + "grad_norm": 0.6708533763885498, + "learning_rate": 0.00014868640671927117, + "loss": 2.5883, + "step": 6829 + }, + { + "epoch": 0.5512065208619159, + "grad_norm": 0.7679650783538818, + "learning_rate": 0.00014867261660869713, + "loss": 2.6105, + "step": 6830 + }, + { + "epoch": 0.5512872245984989, + "grad_norm": 0.7080917358398438, + "learning_rate": 0.0001486588252850743, + "loss": 2.5855, + "step": 6831 + }, + { + "epoch": 0.551367928335082, + "grad_norm": 0.7218755483627319, + "learning_rate": 0.00014864503274874635, + "loss": 2.5872, + "step": 6832 + }, + { + "epoch": 0.551448632071665, + "grad_norm": 0.689038872718811, + "learning_rate": 0.000148631239000057, + "loss": 2.5902, + "step": 6833 + }, + { + "epoch": 0.5515293358082479, + "grad_norm": 0.6810954213142395, + "learning_rate": 0.00014861744403935005, + "loss": 2.5938, + "step": 6834 + }, + { + "epoch": 0.5516100395448309, + "grad_norm": 0.7509457468986511, + "learning_rate": 0.00014860364786696933, + "loss": 2.593, + "step": 6835 + }, + { + "epoch": 0.5516907432814139, + "grad_norm": 0.739536702632904, + "learning_rate": 0.00014858985048325863, + "loss": 2.6668, + "step": 6836 + }, + { + "epoch": 0.551771447017997, + "grad_norm": 0.661829948425293, + "learning_rate": 0.00014857605188856184, + "loss": 2.6407, + "step": 6837 + }, + { + "epoch": 0.5518521507545799, + "grad_norm": 0.6869735717773438, + "learning_rate": 0.00014856225208322287, + "loss": 2.535, + "step": 6838 + }, + { + "epoch": 0.5519328544911629, + "grad_norm": 0.6724792122840881, + "learning_rate": 0.00014854845106758563, + "loss": 2.5629, + "step": 6839 + }, + { + "epoch": 0.5520135582277459, + "grad_norm": 0.7066503763198853, + "learning_rate": 0.00014853464884199407, + "loss": 2.6002, + "step": 6840 + }, + { + "epoch": 0.552094261964329, + "grad_norm": 0.7354215979576111, + "learning_rate": 0.0001485208454067922, + "loss": 2.6032, + "step": 6841 + }, + { + "epoch": 0.552174965700912, + "grad_norm": 0.8124571442604065, + "learning_rate": 0.00014850704076232405, + "loss": 2.5884, + "step": 6842 + }, + { + "epoch": 0.5522556694374949, + "grad_norm": 0.6941336393356323, + "learning_rate": 0.00014849323490893364, + "loss": 2.6461, + "step": 6843 + }, + { + "epoch": 0.5523363731740779, + "grad_norm": 0.6848790049552917, + "learning_rate": 0.00014847942784696505, + "loss": 2.6098, + "step": 6844 + }, + { + "epoch": 0.552417076910661, + "grad_norm": 0.6688000559806824, + "learning_rate": 0.00014846561957676237, + "loss": 2.6115, + "step": 6845 + }, + { + "epoch": 0.552497780647244, + "grad_norm": 0.6647306084632874, + "learning_rate": 0.00014845181009866975, + "loss": 2.597, + "step": 6846 + }, + { + "epoch": 0.552578484383827, + "grad_norm": 0.7277785539627075, + "learning_rate": 0.0001484379994130314, + "loss": 2.6223, + "step": 6847 + }, + { + "epoch": 0.5526591881204099, + "grad_norm": 0.6623761057853699, + "learning_rate": 0.00014842418752019146, + "loss": 2.5657, + "step": 6848 + }, + { + "epoch": 0.552739891856993, + "grad_norm": 0.7207754254341125, + "learning_rate": 0.00014841037442049423, + "loss": 2.5711, + "step": 6849 + }, + { + "epoch": 0.552820595593576, + "grad_norm": 0.6963560581207275, + "learning_rate": 0.00014839656011428389, + "loss": 2.6078, + "step": 6850 + }, + { + "epoch": 0.552901299330159, + "grad_norm": 0.6875078678131104, + "learning_rate": 0.00014838274460190475, + "loss": 2.6109, + "step": 6851 + }, + { + "epoch": 0.552982003066742, + "grad_norm": 0.7049943804740906, + "learning_rate": 0.00014836892788370118, + "loss": 2.5755, + "step": 6852 + }, + { + "epoch": 0.553062706803325, + "grad_norm": 0.6941191554069519, + "learning_rate": 0.00014835510996001744, + "loss": 2.6694, + "step": 6853 + }, + { + "epoch": 0.553143410539908, + "grad_norm": 0.7589484453201294, + "learning_rate": 0.000148341290831198, + "loss": 2.5677, + "step": 6854 + }, + { + "epoch": 0.553224114276491, + "grad_norm": 0.6594784259796143, + "learning_rate": 0.00014832747049758723, + "loss": 2.6209, + "step": 6855 + }, + { + "epoch": 0.553304818013074, + "grad_norm": 0.726598858833313, + "learning_rate": 0.00014831364895952952, + "loss": 2.6492, + "step": 6856 + }, + { + "epoch": 0.553385521749657, + "grad_norm": 0.6668030023574829, + "learning_rate": 0.0001482998262173694, + "loss": 2.6057, + "step": 6857 + }, + { + "epoch": 0.55346622548624, + "grad_norm": 0.7698997855186462, + "learning_rate": 0.0001482860022714514, + "loss": 2.6215, + "step": 6858 + }, + { + "epoch": 0.553546929222823, + "grad_norm": 0.6805251836776733, + "learning_rate": 0.00014827217712211997, + "loss": 2.5855, + "step": 6859 + }, + { + "epoch": 0.553627632959406, + "grad_norm": 0.8481020331382751, + "learning_rate": 0.00014825835076971968, + "loss": 2.6218, + "step": 6860 + }, + { + "epoch": 0.5537083366959891, + "grad_norm": 0.6801722645759583, + "learning_rate": 0.00014824452321459517, + "loss": 2.5998, + "step": 6861 + }, + { + "epoch": 0.553789040432572, + "grad_norm": 0.7174597978591919, + "learning_rate": 0.00014823069445709104, + "loss": 2.5782, + "step": 6862 + }, + { + "epoch": 0.553869744169155, + "grad_norm": 0.7607117891311646, + "learning_rate": 0.0001482168644975519, + "loss": 2.6492, + "step": 6863 + }, + { + "epoch": 0.553950447905738, + "grad_norm": 0.7554265856742859, + "learning_rate": 0.00014820303333632246, + "loss": 2.6511, + "step": 6864 + }, + { + "epoch": 0.5540311516423211, + "grad_norm": 0.7520260214805603, + "learning_rate": 0.00014818920097374745, + "loss": 2.6258, + "step": 6865 + }, + { + "epoch": 0.5541118553789041, + "grad_norm": 0.7897995114326477, + "learning_rate": 0.00014817536741017152, + "loss": 2.6153, + "step": 6866 + }, + { + "epoch": 0.554192559115487, + "grad_norm": 0.7444615960121155, + "learning_rate": 0.00014816153264593957, + "loss": 2.5892, + "step": 6867 + }, + { + "epoch": 0.55427326285207, + "grad_norm": 0.6593222618103027, + "learning_rate": 0.0001481476966813963, + "loss": 2.6048, + "step": 6868 + }, + { + "epoch": 0.5543539665886531, + "grad_norm": 0.7517102360725403, + "learning_rate": 0.0001481338595168866, + "loss": 2.6496, + "step": 6869 + }, + { + "epoch": 0.5544346703252361, + "grad_norm": 0.7314056754112244, + "learning_rate": 0.00014812002115275529, + "loss": 2.6009, + "step": 6870 + }, + { + "epoch": 0.554515374061819, + "grad_norm": 0.6718037724494934, + "learning_rate": 0.00014810618158934722, + "loss": 2.6279, + "step": 6871 + }, + { + "epoch": 0.554596077798402, + "grad_norm": 0.6853529810905457, + "learning_rate": 0.00014809234082700735, + "loss": 2.6562, + "step": 6872 + }, + { + "epoch": 0.5546767815349851, + "grad_norm": 0.713599443435669, + "learning_rate": 0.0001480784988660807, + "loss": 2.5783, + "step": 6873 + }, + { + "epoch": 0.5547574852715681, + "grad_norm": 0.6820243000984192, + "learning_rate": 0.00014806465570691213, + "loss": 2.5753, + "step": 6874 + }, + { + "epoch": 0.5548381890081511, + "grad_norm": 0.6999152302742004, + "learning_rate": 0.00014805081134984673, + "loss": 2.5839, + "step": 6875 + }, + { + "epoch": 0.554918892744734, + "grad_norm": 0.7145923376083374, + "learning_rate": 0.00014803696579522948, + "loss": 2.6153, + "step": 6876 + }, + { + "epoch": 0.5549995964813171, + "grad_norm": 0.7569223046302795, + "learning_rate": 0.00014802311904340548, + "loss": 2.5879, + "step": 6877 + }, + { + "epoch": 0.5550803002179001, + "grad_norm": 0.6977131962776184, + "learning_rate": 0.00014800927109471983, + "loss": 2.6587, + "step": 6878 + }, + { + "epoch": 0.5551610039544831, + "grad_norm": 0.6693562865257263, + "learning_rate": 0.00014799542194951764, + "loss": 2.6271, + "step": 6879 + }, + { + "epoch": 0.5552417076910661, + "grad_norm": 0.6937456130981445, + "learning_rate": 0.00014798157160814406, + "loss": 2.6213, + "step": 6880 + }, + { + "epoch": 0.5553224114276492, + "grad_norm": 0.761538565158844, + "learning_rate": 0.0001479677200709443, + "loss": 2.6053, + "step": 6881 + }, + { + "epoch": 0.5554031151642321, + "grad_norm": 0.707457959651947, + "learning_rate": 0.00014795386733826356, + "loss": 2.5763, + "step": 6882 + }, + { + "epoch": 0.5554838189008151, + "grad_norm": 0.7323198318481445, + "learning_rate": 0.0001479400134104471, + "loss": 2.6899, + "step": 6883 + }, + { + "epoch": 0.5555645226373981, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.0001479261582878402, + "loss": 2.5743, + "step": 6884 + }, + { + "epoch": 0.5556452263739811, + "grad_norm": 0.7683241367340088, + "learning_rate": 0.00014791230197078813, + "loss": 2.5295, + "step": 6885 + }, + { + "epoch": 0.5557259301105641, + "grad_norm": 0.7248150706291199, + "learning_rate": 0.00014789844445963626, + "loss": 2.6131, + "step": 6886 + }, + { + "epoch": 0.5558066338471471, + "grad_norm": 0.6868402361869812, + "learning_rate": 0.00014788458575472997, + "loss": 2.6182, + "step": 6887 + }, + { + "epoch": 0.5558873375837301, + "grad_norm": 0.6995798945426941, + "learning_rate": 0.0001478707258564146, + "loss": 2.5969, + "step": 6888 + }, + { + "epoch": 0.5559680413203131, + "grad_norm": 0.6912558078765869, + "learning_rate": 0.00014785686476503565, + "loss": 2.6264, + "step": 6889 + }, + { + "epoch": 0.5560487450568962, + "grad_norm": 0.7485123872756958, + "learning_rate": 0.00014784300248093848, + "loss": 2.6036, + "step": 6890 + }, + { + "epoch": 0.5561294487934791, + "grad_norm": 0.7150819897651672, + "learning_rate": 0.00014782913900446864, + "loss": 2.5807, + "step": 6891 + }, + { + "epoch": 0.5562101525300621, + "grad_norm": 0.6715224385261536, + "learning_rate": 0.00014781527433597167, + "loss": 2.6164, + "step": 6892 + }, + { + "epoch": 0.5562908562666451, + "grad_norm": 0.6951256394386292, + "learning_rate": 0.000147801408475793, + "loss": 2.6106, + "step": 6893 + }, + { + "epoch": 0.5563715600032282, + "grad_norm": 0.7296997904777527, + "learning_rate": 0.00014778754142427832, + "loss": 2.6182, + "step": 6894 + }, + { + "epoch": 0.5564522637398112, + "grad_norm": 0.7484713196754456, + "learning_rate": 0.0001477736731817732, + "loss": 2.6384, + "step": 6895 + }, + { + "epoch": 0.5565329674763941, + "grad_norm": 0.6967526078224182, + "learning_rate": 0.00014775980374862326, + "loss": 2.5889, + "step": 6896 + }, + { + "epoch": 0.5566136712129771, + "grad_norm": 0.7004885077476501, + "learning_rate": 0.00014774593312517415, + "loss": 2.6549, + "step": 6897 + }, + { + "epoch": 0.5566943749495602, + "grad_norm": 0.7069302201271057, + "learning_rate": 0.00014773206131177158, + "loss": 2.6408, + "step": 6898 + }, + { + "epoch": 0.5567750786861432, + "grad_norm": 0.7048566341400146, + "learning_rate": 0.00014771818830876127, + "loss": 2.5909, + "step": 6899 + }, + { + "epoch": 0.5568557824227262, + "grad_norm": 0.7386630773544312, + "learning_rate": 0.00014770431411648897, + "loss": 2.6402, + "step": 6900 + }, + { + "epoch": 0.5569364861593091, + "grad_norm": 0.7244876027107239, + "learning_rate": 0.00014769043873530047, + "loss": 2.5548, + "step": 6901 + }, + { + "epoch": 0.5570171898958922, + "grad_norm": 0.6820651888847351, + "learning_rate": 0.00014767656216554156, + "loss": 2.682, + "step": 6902 + }, + { + "epoch": 0.5570978936324752, + "grad_norm": 0.7281784415245056, + "learning_rate": 0.00014766268440755812, + "loss": 2.622, + "step": 6903 + }, + { + "epoch": 0.5571785973690582, + "grad_norm": 0.6525030136108398, + "learning_rate": 0.00014764880546169594, + "loss": 2.5809, + "step": 6904 + }, + { + "epoch": 0.5572593011056411, + "grad_norm": 0.6735210418701172, + "learning_rate": 0.00014763492532830102, + "loss": 2.6645, + "step": 6905 + }, + { + "epoch": 0.5573400048422242, + "grad_norm": 0.674700140953064, + "learning_rate": 0.00014762104400771922, + "loss": 2.6466, + "step": 6906 + }, + { + "epoch": 0.5574207085788072, + "grad_norm": 0.7570134401321411, + "learning_rate": 0.00014760716150029652, + "loss": 2.57, + "step": 6907 + }, + { + "epoch": 0.5575014123153902, + "grad_norm": 0.6532449722290039, + "learning_rate": 0.00014759327780637893, + "loss": 2.6207, + "step": 6908 + }, + { + "epoch": 0.5575821160519732, + "grad_norm": 0.7697737812995911, + "learning_rate": 0.00014757939292631242, + "loss": 2.5846, + "step": 6909 + }, + { + "epoch": 0.5576628197885563, + "grad_norm": 0.6750194430351257, + "learning_rate": 0.00014756550686044308, + "loss": 2.6421, + "step": 6910 + }, + { + "epoch": 0.5577435235251392, + "grad_norm": 0.7357683777809143, + "learning_rate": 0.00014755161960911697, + "loss": 2.6173, + "step": 6911 + }, + { + "epoch": 0.5578242272617222, + "grad_norm": 0.6812090277671814, + "learning_rate": 0.0001475377311726802, + "loss": 2.5556, + "step": 6912 + }, + { + "epoch": 0.5579049309983052, + "grad_norm": 0.7633040547370911, + "learning_rate": 0.00014752384155147888, + "loss": 2.6505, + "step": 6913 + }, + { + "epoch": 0.5579856347348883, + "grad_norm": 0.7426417469978333, + "learning_rate": 0.00014750995074585922, + "loss": 2.5575, + "step": 6914 + }, + { + "epoch": 0.5580663384714712, + "grad_norm": 0.6926711201667786, + "learning_rate": 0.00014749605875616744, + "loss": 2.5751, + "step": 6915 + }, + { + "epoch": 0.5581470422080542, + "grad_norm": 0.70630943775177, + "learning_rate": 0.00014748216558274966, + "loss": 2.6228, + "step": 6916 + }, + { + "epoch": 0.5582277459446372, + "grad_norm": 0.7183346748352051, + "learning_rate": 0.0001474682712259522, + "loss": 2.5704, + "step": 6917 + }, + { + "epoch": 0.5583084496812203, + "grad_norm": 0.7622792720794678, + "learning_rate": 0.00014745437568612136, + "loss": 2.6031, + "step": 6918 + }, + { + "epoch": 0.5583891534178033, + "grad_norm": 0.6967802047729492, + "learning_rate": 0.00014744047896360344, + "loss": 2.6031, + "step": 6919 + }, + { + "epoch": 0.5584698571543862, + "grad_norm": 0.7827191948890686, + "learning_rate": 0.00014742658105874475, + "loss": 2.5427, + "step": 6920 + }, + { + "epoch": 0.5585505608909692, + "grad_norm": 0.6865705847740173, + "learning_rate": 0.0001474126819718917, + "loss": 2.6514, + "step": 6921 + }, + { + "epoch": 0.5586312646275523, + "grad_norm": 0.7181665897369385, + "learning_rate": 0.0001473987817033906, + "loss": 2.613, + "step": 6922 + }, + { + "epoch": 0.5587119683641353, + "grad_norm": 0.7198463082313538, + "learning_rate": 0.00014738488025358806, + "loss": 2.6423, + "step": 6923 + }, + { + "epoch": 0.5587926721007183, + "grad_norm": 0.773078441619873, + "learning_rate": 0.00014737097762283042, + "loss": 2.5946, + "step": 6924 + }, + { + "epoch": 0.5588733758373012, + "grad_norm": 0.7732799649238586, + "learning_rate": 0.00014735707381146416, + "loss": 2.6778, + "step": 6925 + }, + { + "epoch": 0.5589540795738843, + "grad_norm": 0.7639997601509094, + "learning_rate": 0.00014734316881983585, + "loss": 2.6064, + "step": 6926 + }, + { + "epoch": 0.5590347833104673, + "grad_norm": 0.7912085652351379, + "learning_rate": 0.00014732926264829198, + "loss": 2.5765, + "step": 6927 + }, + { + "epoch": 0.5591154870470503, + "grad_norm": 0.7460121512413025, + "learning_rate": 0.0001473153552971792, + "loss": 2.6724, + "step": 6928 + }, + { + "epoch": 0.5591961907836333, + "grad_norm": 0.6853603720664978, + "learning_rate": 0.00014730144676684408, + "loss": 2.5846, + "step": 6929 + }, + { + "epoch": 0.5592768945202163, + "grad_norm": 0.7368159294128418, + "learning_rate": 0.00014728753705763324, + "loss": 2.6626, + "step": 6930 + }, + { + "epoch": 0.5593575982567993, + "grad_norm": 0.6888907551765442, + "learning_rate": 0.0001472736261698934, + "loss": 2.6169, + "step": 6931 + }, + { + "epoch": 0.5594383019933823, + "grad_norm": 0.6978163719177246, + "learning_rate": 0.0001472597141039712, + "loss": 2.6367, + "step": 6932 + }, + { + "epoch": 0.5595190057299653, + "grad_norm": 0.7829774618148804, + "learning_rate": 0.00014724580086021335, + "loss": 2.5983, + "step": 6933 + }, + { + "epoch": 0.5595997094665484, + "grad_norm": 0.7872018218040466, + "learning_rate": 0.0001472318864389667, + "loss": 2.5418, + "step": 6934 + }, + { + "epoch": 0.5596804132031313, + "grad_norm": 0.6994973421096802, + "learning_rate": 0.00014721797084057793, + "loss": 2.6062, + "step": 6935 + }, + { + "epoch": 0.5597611169397143, + "grad_norm": 0.7281144857406616, + "learning_rate": 0.00014720405406539394, + "loss": 2.573, + "step": 6936 + }, + { + "epoch": 0.5598418206762973, + "grad_norm": 0.713513970375061, + "learning_rate": 0.0001471901361137615, + "loss": 2.6589, + "step": 6937 + }, + { + "epoch": 0.5599225244128803, + "grad_norm": 0.7752750515937805, + "learning_rate": 0.00014717621698602754, + "loss": 2.6478, + "step": 6938 + }, + { + "epoch": 0.5600032281494634, + "grad_norm": 0.6876000165939331, + "learning_rate": 0.00014716229668253889, + "loss": 2.6092, + "step": 6939 + }, + { + "epoch": 0.5600839318860463, + "grad_norm": 0.6371028423309326, + "learning_rate": 0.00014714837520364256, + "loss": 2.606, + "step": 6940 + }, + { + "epoch": 0.5601646356226293, + "grad_norm": 0.6488915085792542, + "learning_rate": 0.00014713445254968546, + "loss": 2.5769, + "step": 6941 + }, + { + "epoch": 0.5602453393592123, + "grad_norm": 0.7286413908004761, + "learning_rate": 0.00014712052872101458, + "loss": 2.6267, + "step": 6942 + }, + { + "epoch": 0.5603260430957954, + "grad_norm": 0.6863759160041809, + "learning_rate": 0.00014710660371797696, + "loss": 2.641, + "step": 6943 + }, + { + "epoch": 0.5604067468323783, + "grad_norm": 0.706900417804718, + "learning_rate": 0.00014709267754091964, + "loss": 2.6344, + "step": 6944 + }, + { + "epoch": 0.5604874505689613, + "grad_norm": 0.6462892293930054, + "learning_rate": 0.0001470787501901897, + "loss": 2.5561, + "step": 6945 + }, + { + "epoch": 0.5605681543055443, + "grad_norm": 0.7342472076416016, + "learning_rate": 0.00014706482166613425, + "loss": 2.583, + "step": 6946 + }, + { + "epoch": 0.5606488580421274, + "grad_norm": 0.7132803797721863, + "learning_rate": 0.00014705089196910038, + "loss": 2.558, + "step": 6947 + }, + { + "epoch": 0.5607295617787104, + "grad_norm": 0.7709125876426697, + "learning_rate": 0.00014703696109943533, + "loss": 2.6165, + "step": 6948 + }, + { + "epoch": 0.5608102655152933, + "grad_norm": 0.7108885645866394, + "learning_rate": 0.00014702302905748619, + "loss": 2.5788, + "step": 6949 + }, + { + "epoch": 0.5608909692518763, + "grad_norm": 0.7295591235160828, + "learning_rate": 0.0001470090958436003, + "loss": 2.6526, + "step": 6950 + }, + { + "epoch": 0.5609716729884594, + "grad_norm": 0.7235364317893982, + "learning_rate": 0.00014699516145812486, + "loss": 2.604, + "step": 6951 + }, + { + "epoch": 0.5610523767250424, + "grad_norm": 0.6723269820213318, + "learning_rate": 0.00014698122590140714, + "loss": 2.5838, + "step": 6952 + }, + { + "epoch": 0.5611330804616254, + "grad_norm": 0.7022266983985901, + "learning_rate": 0.00014696728917379447, + "loss": 2.6086, + "step": 6953 + }, + { + "epoch": 0.5612137841982083, + "grad_norm": 0.6923824548721313, + "learning_rate": 0.00014695335127563414, + "loss": 2.6678, + "step": 6954 + }, + { + "epoch": 0.5612944879347914, + "grad_norm": 0.6909339427947998, + "learning_rate": 0.0001469394122072736, + "loss": 2.6397, + "step": 6955 + }, + { + "epoch": 0.5613751916713744, + "grad_norm": 0.710299015045166, + "learning_rate": 0.00014692547196906022, + "loss": 2.5973, + "step": 6956 + }, + { + "epoch": 0.5614558954079574, + "grad_norm": 0.7141178250312805, + "learning_rate": 0.00014691153056134136, + "loss": 2.6111, + "step": 6957 + }, + { + "epoch": 0.5615365991445403, + "grad_norm": 0.6994750499725342, + "learning_rate": 0.00014689758798446456, + "loss": 2.6498, + "step": 6958 + }, + { + "epoch": 0.5616173028811234, + "grad_norm": 0.6951611638069153, + "learning_rate": 0.00014688364423877726, + "loss": 2.6208, + "step": 6959 + }, + { + "epoch": 0.5616980066177064, + "grad_norm": 0.6610642075538635, + "learning_rate": 0.000146869699324627, + "loss": 2.5725, + "step": 6960 + }, + { + "epoch": 0.5617787103542894, + "grad_norm": 0.6771267056465149, + "learning_rate": 0.00014685575324236135, + "loss": 2.6336, + "step": 6961 + }, + { + "epoch": 0.5618594140908724, + "grad_norm": 0.7431008815765381, + "learning_rate": 0.0001468418059923278, + "loss": 2.6782, + "step": 6962 + }, + { + "epoch": 0.5619401178274555, + "grad_norm": 0.7399705648422241, + "learning_rate": 0.000146827857574874, + "loss": 2.6212, + "step": 6963 + }, + { + "epoch": 0.5620208215640384, + "grad_norm": 0.7237067222595215, + "learning_rate": 0.00014681390799034763, + "loss": 2.6261, + "step": 6964 + }, + { + "epoch": 0.5621015253006214, + "grad_norm": 0.7033257484436035, + "learning_rate": 0.00014679995723909623, + "loss": 2.6912, + "step": 6965 + }, + { + "epoch": 0.5621822290372044, + "grad_norm": 0.6953759789466858, + "learning_rate": 0.00014678600532146762, + "loss": 2.6022, + "step": 6966 + }, + { + "epoch": 0.5622629327737875, + "grad_norm": 0.8338057994842529, + "learning_rate": 0.0001467720522378094, + "loss": 2.595, + "step": 6967 + }, + { + "epoch": 0.5623436365103704, + "grad_norm": 0.6506100296974182, + "learning_rate": 0.00014675809798846942, + "loss": 2.6033, + "step": 6968 + }, + { + "epoch": 0.5624243402469534, + "grad_norm": 0.7122468948364258, + "learning_rate": 0.0001467441425737954, + "loss": 2.56, + "step": 6969 + }, + { + "epoch": 0.5625050439835364, + "grad_norm": 0.7012680172920227, + "learning_rate": 0.00014673018599413516, + "loss": 2.6052, + "step": 6970 + }, + { + "epoch": 0.5625857477201195, + "grad_norm": 0.668187141418457, + "learning_rate": 0.00014671622824983653, + "loss": 2.6675, + "step": 6971 + }, + { + "epoch": 0.5626664514567025, + "grad_norm": 0.7259203791618347, + "learning_rate": 0.00014670226934124738, + "loss": 2.5977, + "step": 6972 + }, + { + "epoch": 0.5627471551932854, + "grad_norm": 0.6705875396728516, + "learning_rate": 0.00014668830926871555, + "loss": 2.649, + "step": 6973 + }, + { + "epoch": 0.5628278589298684, + "grad_norm": 0.682731568813324, + "learning_rate": 0.00014667434803258906, + "loss": 2.6084, + "step": 6974 + }, + { + "epoch": 0.5629085626664515, + "grad_norm": 0.7061700224876404, + "learning_rate": 0.00014666038563321577, + "loss": 2.6256, + "step": 6975 + }, + { + "epoch": 0.5629892664030345, + "grad_norm": 0.6839977502822876, + "learning_rate": 0.00014664642207094374, + "loss": 2.6342, + "step": 6976 + }, + { + "epoch": 0.5630699701396175, + "grad_norm": 0.7376503348350525, + "learning_rate": 0.00014663245734612094, + "loss": 2.6001, + "step": 6977 + }, + { + "epoch": 0.5631506738762004, + "grad_norm": 0.6901546716690063, + "learning_rate": 0.0001466184914590954, + "loss": 2.6715, + "step": 6978 + }, + { + "epoch": 0.5632313776127835, + "grad_norm": 0.816223680973053, + "learning_rate": 0.00014660452441021512, + "loss": 2.6407, + "step": 6979 + }, + { + "epoch": 0.5633120813493665, + "grad_norm": 0.6904644966125488, + "learning_rate": 0.00014659055619982835, + "loss": 2.5543, + "step": 6980 + }, + { + "epoch": 0.5633927850859495, + "grad_norm": 0.6784235239028931, + "learning_rate": 0.0001465765868282831, + "loss": 2.6184, + "step": 6981 + }, + { + "epoch": 0.5634734888225325, + "grad_norm": 0.7689006328582764, + "learning_rate": 0.00014656261629592755, + "loss": 2.644, + "step": 6982 + }, + { + "epoch": 0.5635541925591155, + "grad_norm": 0.7608775496482849, + "learning_rate": 0.0001465486446031099, + "loss": 2.5952, + "step": 6983 + }, + { + "epoch": 0.5636348962956985, + "grad_norm": 0.7266525626182556, + "learning_rate": 0.00014653467175017833, + "loss": 2.6479, + "step": 6984 + }, + { + "epoch": 0.5637156000322815, + "grad_norm": 0.6907477974891663, + "learning_rate": 0.00014652069773748113, + "loss": 2.5825, + "step": 6985 + }, + { + "epoch": 0.5637963037688645, + "grad_norm": 0.7790403366088867, + "learning_rate": 0.00014650672256536648, + "loss": 2.5948, + "step": 6986 + }, + { + "epoch": 0.5638770075054474, + "grad_norm": 0.7072858214378357, + "learning_rate": 0.00014649274623418278, + "loss": 2.6017, + "step": 6987 + }, + { + "epoch": 0.5639577112420305, + "grad_norm": 0.7140414118766785, + "learning_rate": 0.0001464787687442783, + "loss": 2.5709, + "step": 6988 + }, + { + "epoch": 0.5640384149786135, + "grad_norm": 0.857783317565918, + "learning_rate": 0.00014646479009600139, + "loss": 2.7049, + "step": 6989 + }, + { + "epoch": 0.5641191187151965, + "grad_norm": 0.7599344253540039, + "learning_rate": 0.00014645081028970047, + "loss": 2.6369, + "step": 6990 + }, + { + "epoch": 0.5641998224517795, + "grad_norm": 0.7286150455474854, + "learning_rate": 0.00014643682932572393, + "loss": 2.6238, + "step": 6991 + }, + { + "epoch": 0.5642805261883626, + "grad_norm": 0.7095075249671936, + "learning_rate": 0.0001464228472044202, + "loss": 2.5924, + "step": 6992 + }, + { + "epoch": 0.5643612299249455, + "grad_norm": 0.7583668828010559, + "learning_rate": 0.0001464088639261378, + "loss": 2.6098, + "step": 6993 + }, + { + "epoch": 0.5644419336615285, + "grad_norm": 0.7393970489501953, + "learning_rate": 0.00014639487949122515, + "loss": 2.6036, + "step": 6994 + }, + { + "epoch": 0.5645226373981115, + "grad_norm": 0.6789388656616211, + "learning_rate": 0.00014638089390003086, + "loss": 2.642, + "step": 6995 + }, + { + "epoch": 0.5646033411346946, + "grad_norm": 0.8021289706230164, + "learning_rate": 0.00014636690715290346, + "loss": 2.6851, + "step": 6996 + }, + { + "epoch": 0.5646840448712775, + "grad_norm": 0.6931039094924927, + "learning_rate": 0.00014635291925019152, + "loss": 2.6358, + "step": 6997 + }, + { + "epoch": 0.5647647486078605, + "grad_norm": 0.7356590032577515, + "learning_rate": 0.00014633893019224366, + "loss": 2.5661, + "step": 6998 + }, + { + "epoch": 0.5648454523444435, + "grad_norm": 0.6777941584587097, + "learning_rate": 0.0001463249399794085, + "loss": 2.5578, + "step": 6999 + }, + { + "epoch": 0.5649261560810266, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.0001463109486120348, + "loss": 2.5582, + "step": 7000 + }, + { + "epoch": 0.5649261560810266, + "eval_loss": 2.5298855304718018, + "eval_runtime": 757.774, + "eval_samples_per_second": 3.457, + "eval_steps_per_second": 0.577, + "step": 7000 + }, + { + "epoch": 0.5650068598176096, + "grad_norm": 0.7175148129463196, + "learning_rate": 0.0001462969560904712, + "loss": 2.568, + "step": 7001 + }, + { + "epoch": 0.5650875635541925, + "grad_norm": 0.6998937129974365, + "learning_rate": 0.00014628296241506636, + "loss": 2.6347, + "step": 7002 + }, + { + "epoch": 0.5651682672907755, + "grad_norm": 0.8140312433242798, + "learning_rate": 0.00014626896758616916, + "loss": 2.6566, + "step": 7003 + }, + { + "epoch": 0.5652489710273586, + "grad_norm": 0.7218164205551147, + "learning_rate": 0.00014625497160412833, + "loss": 2.5693, + "step": 7004 + }, + { + "epoch": 0.5653296747639416, + "grad_norm": 0.6974074244499207, + "learning_rate": 0.0001462409744692927, + "loss": 2.6084, + "step": 7005 + }, + { + "epoch": 0.5654103785005246, + "grad_norm": 0.7475053071975708, + "learning_rate": 0.00014622697618201113, + "loss": 2.6534, + "step": 7006 + }, + { + "epoch": 0.5654910822371075, + "grad_norm": 0.6768492460250854, + "learning_rate": 0.00014621297674263247, + "loss": 2.585, + "step": 7007 + }, + { + "epoch": 0.5655717859736906, + "grad_norm": 0.7023029923439026, + "learning_rate": 0.0001461989761515056, + "loss": 2.6219, + "step": 7008 + }, + { + "epoch": 0.5656524897102736, + "grad_norm": 0.7248445749282837, + "learning_rate": 0.0001461849744089795, + "loss": 2.6382, + "step": 7009 + }, + { + "epoch": 0.5657331934468566, + "grad_norm": 0.6961148381233215, + "learning_rate": 0.00014617097151540308, + "loss": 2.7184, + "step": 7010 + }, + { + "epoch": 0.5658138971834396, + "grad_norm": 0.6649057269096375, + "learning_rate": 0.0001461569674711254, + "loss": 2.6059, + "step": 7011 + }, + { + "epoch": 0.5658946009200226, + "grad_norm": 0.7451788783073425, + "learning_rate": 0.00014614296227649542, + "loss": 2.5697, + "step": 7012 + }, + { + "epoch": 0.5659753046566056, + "grad_norm": 0.6880216598510742, + "learning_rate": 0.0001461289559318622, + "loss": 2.5785, + "step": 7013 + }, + { + "epoch": 0.5660560083931886, + "grad_norm": 0.7505971789360046, + "learning_rate": 0.00014611494843757482, + "loss": 2.5479, + "step": 7014 + }, + { + "epoch": 0.5661367121297716, + "grad_norm": 0.745914876461029, + "learning_rate": 0.00014610093979398235, + "loss": 2.6367, + "step": 7015 + }, + { + "epoch": 0.5662174158663547, + "grad_norm": 0.6758660674095154, + "learning_rate": 0.000146086930001434, + "loss": 2.5673, + "step": 7016 + }, + { + "epoch": 0.5662981196029376, + "grad_norm": 0.7114273309707642, + "learning_rate": 0.00014607291906027886, + "loss": 2.6188, + "step": 7017 + }, + { + "epoch": 0.5663788233395206, + "grad_norm": 0.6791165471076965, + "learning_rate": 0.00014605890697086613, + "loss": 2.6197, + "step": 7018 + }, + { + "epoch": 0.5664595270761036, + "grad_norm": 0.6948217153549194, + "learning_rate": 0.00014604489373354503, + "loss": 2.5996, + "step": 7019 + }, + { + "epoch": 0.5665402308126867, + "grad_norm": 0.6993576884269714, + "learning_rate": 0.00014603087934866483, + "loss": 2.565, + "step": 7020 + }, + { + "epoch": 0.5666209345492697, + "grad_norm": 0.6936905384063721, + "learning_rate": 0.0001460168638165748, + "loss": 2.6524, + "step": 7021 + }, + { + "epoch": 0.5667016382858526, + "grad_norm": 0.6810741424560547, + "learning_rate": 0.00014600284713762424, + "loss": 2.6519, + "step": 7022 + }, + { + "epoch": 0.5667823420224356, + "grad_norm": 0.7540227770805359, + "learning_rate": 0.00014598882931216245, + "loss": 2.659, + "step": 7023 + }, + { + "epoch": 0.5668630457590187, + "grad_norm": 0.6520613431930542, + "learning_rate": 0.0001459748103405388, + "loss": 2.5341, + "step": 7024 + }, + { + "epoch": 0.5669437494956017, + "grad_norm": 0.7159109711647034, + "learning_rate": 0.00014596079022310277, + "loss": 2.6548, + "step": 7025 + }, + { + "epoch": 0.5670244532321846, + "grad_norm": 0.803284227848053, + "learning_rate": 0.00014594676896020366, + "loss": 2.705, + "step": 7026 + }, + { + "epoch": 0.5671051569687676, + "grad_norm": 0.7069976925849915, + "learning_rate": 0.00014593274655219095, + "loss": 2.5733, + "step": 7027 + }, + { + "epoch": 0.5671858607053507, + "grad_norm": 0.7085167169570923, + "learning_rate": 0.00014591872299941417, + "loss": 2.6247, + "step": 7028 + }, + { + "epoch": 0.5672665644419337, + "grad_norm": 0.6748499274253845, + "learning_rate": 0.00014590469830222272, + "loss": 2.6446, + "step": 7029 + }, + { + "epoch": 0.5673472681785167, + "grad_norm": 0.6885821223258972, + "learning_rate": 0.00014589067246096623, + "loss": 2.5879, + "step": 7030 + }, + { + "epoch": 0.5674279719150996, + "grad_norm": 0.7220324277877808, + "learning_rate": 0.0001458766454759942, + "loss": 2.6249, + "step": 7031 + }, + { + "epoch": 0.5675086756516827, + "grad_norm": 0.6712783575057983, + "learning_rate": 0.00014586261734765628, + "loss": 2.5971, + "step": 7032 + }, + { + "epoch": 0.5675893793882657, + "grad_norm": 0.6582161784172058, + "learning_rate": 0.00014584858807630203, + "loss": 2.6224, + "step": 7033 + }, + { + "epoch": 0.5676700831248487, + "grad_norm": 0.6699219346046448, + "learning_rate": 0.0001458345576622811, + "loss": 2.5926, + "step": 7034 + }, + { + "epoch": 0.5677507868614317, + "grad_norm": 0.6508033871650696, + "learning_rate": 0.0001458205261059432, + "loss": 2.6311, + "step": 7035 + }, + { + "epoch": 0.5678314905980147, + "grad_norm": 0.7551338076591492, + "learning_rate": 0.00014580649340763802, + "loss": 2.5729, + "step": 7036 + }, + { + "epoch": 0.5679121943345977, + "grad_norm": 0.6875829100608826, + "learning_rate": 0.00014579245956771527, + "loss": 2.6253, + "step": 7037 + }, + { + "epoch": 0.5679928980711807, + "grad_norm": 0.698204517364502, + "learning_rate": 0.00014577842458652474, + "loss": 2.6218, + "step": 7038 + }, + { + "epoch": 0.5680736018077637, + "grad_norm": 0.8258630037307739, + "learning_rate": 0.00014576438846441615, + "loss": 2.6307, + "step": 7039 + }, + { + "epoch": 0.5681543055443466, + "grad_norm": 0.753105878829956, + "learning_rate": 0.00014575035120173942, + "loss": 2.5664, + "step": 7040 + }, + { + "epoch": 0.5682350092809297, + "grad_norm": 0.6999726295471191, + "learning_rate": 0.00014573631279884435, + "loss": 2.6857, + "step": 7041 + }, + { + "epoch": 0.5683157130175127, + "grad_norm": 0.6484847068786621, + "learning_rate": 0.00014572227325608078, + "loss": 2.6068, + "step": 7042 + }, + { + "epoch": 0.5683964167540957, + "grad_norm": 0.7098011374473572, + "learning_rate": 0.00014570823257379866, + "loss": 2.6591, + "step": 7043 + }, + { + "epoch": 0.5684771204906787, + "grad_norm": 0.8304192423820496, + "learning_rate": 0.0001456941907523479, + "loss": 2.6582, + "step": 7044 + }, + { + "epoch": 0.5685578242272618, + "grad_norm": 0.763214111328125, + "learning_rate": 0.00014568014779207844, + "loss": 2.6605, + "step": 7045 + }, + { + "epoch": 0.5686385279638447, + "grad_norm": 0.6805880665779114, + "learning_rate": 0.00014566610369334032, + "loss": 2.6362, + "step": 7046 + }, + { + "epoch": 0.5687192317004277, + "grad_norm": 0.6753434538841248, + "learning_rate": 0.00014565205845648352, + "loss": 2.6352, + "step": 7047 + }, + { + "epoch": 0.5687999354370107, + "grad_norm": 0.7065438032150269, + "learning_rate": 0.00014563801208185807, + "loss": 2.5975, + "step": 7048 + }, + { + "epoch": 0.5688806391735938, + "grad_norm": 0.6863527894020081, + "learning_rate": 0.00014562396456981407, + "loss": 2.576, + "step": 7049 + }, + { + "epoch": 0.5689613429101767, + "grad_norm": 0.7344440817832947, + "learning_rate": 0.00014560991592070158, + "loss": 2.5933, + "step": 7050 + }, + { + "epoch": 0.5690420466467597, + "grad_norm": 0.699992835521698, + "learning_rate": 0.00014559586613487082, + "loss": 2.6161, + "step": 7051 + }, + { + "epoch": 0.5691227503833427, + "grad_norm": 0.7287258505821228, + "learning_rate": 0.00014558181521267185, + "loss": 2.665, + "step": 7052 + }, + { + "epoch": 0.5692034541199258, + "grad_norm": 0.7304692268371582, + "learning_rate": 0.0001455677631544549, + "loss": 2.5696, + "step": 7053 + }, + { + "epoch": 0.5692841578565088, + "grad_norm": 0.6556086540222168, + "learning_rate": 0.00014555370996057016, + "loss": 2.6405, + "step": 7054 + }, + { + "epoch": 0.5693648615930917, + "grad_norm": 0.6796221137046814, + "learning_rate": 0.0001455396556313679, + "loss": 2.6475, + "step": 7055 + }, + { + "epoch": 0.5694455653296747, + "grad_norm": 0.7067505717277527, + "learning_rate": 0.00014552560016719838, + "loss": 2.6344, + "step": 7056 + }, + { + "epoch": 0.5695262690662578, + "grad_norm": 0.7108997106552124, + "learning_rate": 0.00014551154356841193, + "loss": 2.6543, + "step": 7057 + }, + { + "epoch": 0.5696069728028408, + "grad_norm": 0.7296212911605835, + "learning_rate": 0.0001454974858353588, + "loss": 2.6152, + "step": 7058 + }, + { + "epoch": 0.5696876765394238, + "grad_norm": 0.7329154014587402, + "learning_rate": 0.00014548342696838943, + "loss": 2.6338, + "step": 7059 + }, + { + "epoch": 0.5697683802760067, + "grad_norm": 0.6880258321762085, + "learning_rate": 0.00014546936696785412, + "loss": 2.5834, + "step": 7060 + }, + { + "epoch": 0.5698490840125898, + "grad_norm": 0.7140741348266602, + "learning_rate": 0.00014545530583410336, + "loss": 2.6361, + "step": 7061 + }, + { + "epoch": 0.5699297877491728, + "grad_norm": 0.6419476866722107, + "learning_rate": 0.00014544124356748755, + "loss": 2.4982, + "step": 7062 + }, + { + "epoch": 0.5700104914857558, + "grad_norm": 0.6934036612510681, + "learning_rate": 0.00014542718016835718, + "loss": 2.5748, + "step": 7063 + }, + { + "epoch": 0.5700911952223388, + "grad_norm": 0.721663236618042, + "learning_rate": 0.0001454131156370627, + "loss": 2.5419, + "step": 7064 + }, + { + "epoch": 0.5701718989589218, + "grad_norm": 0.734062671661377, + "learning_rate": 0.00014539904997395468, + "loss": 2.6288, + "step": 7065 + }, + { + "epoch": 0.5702526026955048, + "grad_norm": 0.7927694320678711, + "learning_rate": 0.00014538498317938367, + "loss": 2.6331, + "step": 7066 + }, + { + "epoch": 0.5703333064320878, + "grad_norm": 0.715929388999939, + "learning_rate": 0.00014537091525370025, + "loss": 2.6333, + "step": 7067 + }, + { + "epoch": 0.5704140101686708, + "grad_norm": 0.772230327129364, + "learning_rate": 0.00014535684619725498, + "loss": 2.6019, + "step": 7068 + }, + { + "epoch": 0.5704947139052539, + "grad_norm": 0.7277318239212036, + "learning_rate": 0.0001453427760103986, + "loss": 2.6062, + "step": 7069 + }, + { + "epoch": 0.5705754176418368, + "grad_norm": 0.6708227396011353, + "learning_rate": 0.00014532870469348164, + "loss": 2.6613, + "step": 7070 + }, + { + "epoch": 0.5706561213784198, + "grad_norm": 0.7507323622703552, + "learning_rate": 0.0001453146322468549, + "loss": 2.6456, + "step": 7071 + }, + { + "epoch": 0.5707368251150028, + "grad_norm": 0.6864063739776611, + "learning_rate": 0.00014530055867086912, + "loss": 2.6361, + "step": 7072 + }, + { + "epoch": 0.5708175288515859, + "grad_norm": 0.6805310249328613, + "learning_rate": 0.00014528648396587498, + "loss": 2.6088, + "step": 7073 + }, + { + "epoch": 0.5708982325881689, + "grad_norm": 0.7946523427963257, + "learning_rate": 0.00014527240813222325, + "loss": 2.6533, + "step": 7074 + }, + { + "epoch": 0.5709789363247518, + "grad_norm": 0.6814306974411011, + "learning_rate": 0.00014525833117026474, + "loss": 2.6478, + "step": 7075 + }, + { + "epoch": 0.5710596400613348, + "grad_norm": 0.749664843082428, + "learning_rate": 0.00014524425308035034, + "loss": 2.6296, + "step": 7076 + }, + { + "epoch": 0.5711403437979179, + "grad_norm": 0.6774656772613525, + "learning_rate": 0.00014523017386283091, + "loss": 2.5867, + "step": 7077 + }, + { + "epoch": 0.5712210475345009, + "grad_norm": 0.7331634163856506, + "learning_rate": 0.00014521609351805733, + "loss": 2.6484, + "step": 7078 + }, + { + "epoch": 0.5713017512710838, + "grad_norm": 0.7076910734176636, + "learning_rate": 0.00014520201204638045, + "loss": 2.6464, + "step": 7079 + }, + { + "epoch": 0.5713824550076668, + "grad_norm": 0.74099200963974, + "learning_rate": 0.00014518792944815127, + "loss": 2.6304, + "step": 7080 + }, + { + "epoch": 0.5714631587442499, + "grad_norm": 0.6673823595046997, + "learning_rate": 0.00014517384572372078, + "loss": 2.5903, + "step": 7081 + }, + { + "epoch": 0.5715438624808329, + "grad_norm": 0.6872609257698059, + "learning_rate": 0.00014515976087343997, + "loss": 2.6189, + "step": 7082 + }, + { + "epoch": 0.5716245662174159, + "grad_norm": 0.7363224625587463, + "learning_rate": 0.0001451456748976599, + "loss": 2.5845, + "step": 7083 + }, + { + "epoch": 0.5717052699539988, + "grad_norm": 0.7672157287597656, + "learning_rate": 0.00014513158779673157, + "loss": 2.6331, + "step": 7084 + }, + { + "epoch": 0.5717859736905819, + "grad_norm": 0.661195695400238, + "learning_rate": 0.00014511749957100612, + "loss": 2.5827, + "step": 7085 + }, + { + "epoch": 0.5718666774271649, + "grad_norm": 0.8034788370132446, + "learning_rate": 0.0001451034102208346, + "loss": 2.6209, + "step": 7086 + }, + { + "epoch": 0.5719473811637479, + "grad_norm": 0.7318302392959595, + "learning_rate": 0.00014508931974656822, + "loss": 2.5898, + "step": 7087 + }, + { + "epoch": 0.5720280849003309, + "grad_norm": 0.7334744930267334, + "learning_rate": 0.00014507522814855814, + "loss": 2.5893, + "step": 7088 + }, + { + "epoch": 0.5721087886369138, + "grad_norm": 0.783051609992981, + "learning_rate": 0.00014506113542715553, + "loss": 2.6284, + "step": 7089 + }, + { + "epoch": 0.5721894923734969, + "grad_norm": 0.7319497466087341, + "learning_rate": 0.00014504704158271165, + "loss": 2.5705, + "step": 7090 + }, + { + "epoch": 0.5722701961100799, + "grad_norm": 0.7886925935745239, + "learning_rate": 0.00014503294661557772, + "loss": 2.641, + "step": 7091 + }, + { + "epoch": 0.5723508998466629, + "grad_norm": 0.6882795691490173, + "learning_rate": 0.00014501885052610502, + "loss": 2.5714, + "step": 7092 + }, + { + "epoch": 0.5724316035832459, + "grad_norm": 0.7089235186576843, + "learning_rate": 0.00014500475331464494, + "loss": 2.6073, + "step": 7093 + }, + { + "epoch": 0.5725123073198289, + "grad_norm": 0.7261029481887817, + "learning_rate": 0.00014499065498154874, + "loss": 2.5595, + "step": 7094 + }, + { + "epoch": 0.5725930110564119, + "grad_norm": 0.7625105977058411, + "learning_rate": 0.0001449765555271678, + "loss": 2.5978, + "step": 7095 + }, + { + "epoch": 0.5726737147929949, + "grad_norm": 0.7853986024856567, + "learning_rate": 0.00014496245495185353, + "loss": 2.6378, + "step": 7096 + }, + { + "epoch": 0.5727544185295779, + "grad_norm": 0.8070923686027527, + "learning_rate": 0.00014494835325595736, + "loss": 2.7062, + "step": 7097 + }, + { + "epoch": 0.572835122266161, + "grad_norm": 0.7074965834617615, + "learning_rate": 0.00014493425043983073, + "loss": 2.5177, + "step": 7098 + }, + { + "epoch": 0.5729158260027439, + "grad_norm": 0.6890520453453064, + "learning_rate": 0.00014492014650382512, + "loss": 2.6058, + "step": 7099 + }, + { + "epoch": 0.5729965297393269, + "grad_norm": 0.6979860067367554, + "learning_rate": 0.00014490604144829202, + "loss": 2.5274, + "step": 7100 + }, + { + "epoch": 0.5730772334759099, + "grad_norm": 0.7972229719161987, + "learning_rate": 0.000144891935273583, + "loss": 2.6369, + "step": 7101 + }, + { + "epoch": 0.573157937212493, + "grad_norm": 0.6994345188140869, + "learning_rate": 0.0001448778279800496, + "loss": 2.5975, + "step": 7102 + }, + { + "epoch": 0.573238640949076, + "grad_norm": 0.7943929433822632, + "learning_rate": 0.0001448637195680434, + "loss": 2.6317, + "step": 7103 + }, + { + "epoch": 0.5733193446856589, + "grad_norm": 0.6975306272506714, + "learning_rate": 0.00014484961003791605, + "loss": 2.6264, + "step": 7104 + }, + { + "epoch": 0.5734000484222419, + "grad_norm": 0.6889060735702515, + "learning_rate": 0.00014483549939001917, + "loss": 2.5974, + "step": 7105 + }, + { + "epoch": 0.573480752158825, + "grad_norm": 0.7372777462005615, + "learning_rate": 0.00014482138762470444, + "loss": 2.5851, + "step": 7106 + }, + { + "epoch": 0.573561455895408, + "grad_norm": 0.7045157551765442, + "learning_rate": 0.00014480727474232362, + "loss": 2.6451, + "step": 7107 + }, + { + "epoch": 0.5736421596319909, + "grad_norm": 0.6974517107009888, + "learning_rate": 0.00014479316074322832, + "loss": 2.6796, + "step": 7108 + }, + { + "epoch": 0.5737228633685739, + "grad_norm": 0.7328097224235535, + "learning_rate": 0.00014477904562777038, + "loss": 2.5923, + "step": 7109 + }, + { + "epoch": 0.573803567105157, + "grad_norm": 0.7288877964019775, + "learning_rate": 0.0001447649293963016, + "loss": 2.6012, + "step": 7110 + }, + { + "epoch": 0.57388427084174, + "grad_norm": 0.7054389119148254, + "learning_rate": 0.00014475081204917372, + "loss": 2.6666, + "step": 7111 + }, + { + "epoch": 0.573964974578323, + "grad_norm": 0.7447949647903442, + "learning_rate": 0.00014473669358673865, + "loss": 2.6093, + "step": 7112 + }, + { + "epoch": 0.5740456783149059, + "grad_norm": 0.6431592106819153, + "learning_rate": 0.0001447225740093482, + "loss": 2.6242, + "step": 7113 + }, + { + "epoch": 0.574126382051489, + "grad_norm": 0.7096747756004333, + "learning_rate": 0.00014470845331735434, + "loss": 2.6297, + "step": 7114 + }, + { + "epoch": 0.574207085788072, + "grad_norm": 0.6918880939483643, + "learning_rate": 0.00014469433151110894, + "loss": 2.5849, + "step": 7115 + }, + { + "epoch": 0.574287789524655, + "grad_norm": 0.6617783308029175, + "learning_rate": 0.00014468020859096395, + "loss": 2.5972, + "step": 7116 + }, + { + "epoch": 0.574368493261238, + "grad_norm": 0.6525121927261353, + "learning_rate": 0.0001446660845572714, + "loss": 2.5888, + "step": 7117 + }, + { + "epoch": 0.574449196997821, + "grad_norm": 0.7024720907211304, + "learning_rate": 0.00014465195941038326, + "loss": 2.6135, + "step": 7118 + }, + { + "epoch": 0.574529900734404, + "grad_norm": 0.7660520672798157, + "learning_rate": 0.00014463783315065153, + "loss": 2.5837, + "step": 7119 + }, + { + "epoch": 0.574610604470987, + "grad_norm": 0.8206443190574646, + "learning_rate": 0.00014462370577842838, + "loss": 2.6749, + "step": 7120 + }, + { + "epoch": 0.57469130820757, + "grad_norm": 0.7176216840744019, + "learning_rate": 0.00014460957729406577, + "loss": 2.5814, + "step": 7121 + }, + { + "epoch": 0.5747720119441531, + "grad_norm": 0.7867588400840759, + "learning_rate": 0.0001445954476979159, + "loss": 2.5697, + "step": 7122 + }, + { + "epoch": 0.574852715680736, + "grad_norm": 0.7150471806526184, + "learning_rate": 0.0001445813169903309, + "loss": 2.5689, + "step": 7123 + }, + { + "epoch": 0.574933419417319, + "grad_norm": 0.7082479596138, + "learning_rate": 0.00014456718517166296, + "loss": 2.6081, + "step": 7124 + }, + { + "epoch": 0.575014123153902, + "grad_norm": 0.7207253575325012, + "learning_rate": 0.00014455305224226426, + "loss": 2.6573, + "step": 7125 + }, + { + "epoch": 0.5750948268904851, + "grad_norm": 0.7451751232147217, + "learning_rate": 0.00014453891820248704, + "loss": 2.6057, + "step": 7126 + }, + { + "epoch": 0.575175530627068, + "grad_norm": 0.7030230164527893, + "learning_rate": 0.0001445247830526835, + "loss": 2.6122, + "step": 7127 + }, + { + "epoch": 0.575256234363651, + "grad_norm": 0.7233754396438599, + "learning_rate": 0.00014451064679320605, + "loss": 2.5937, + "step": 7128 + }, + { + "epoch": 0.575336938100234, + "grad_norm": 0.6943942904472351, + "learning_rate": 0.0001444965094244069, + "loss": 2.6327, + "step": 7129 + }, + { + "epoch": 0.5754176418368171, + "grad_norm": 0.682056725025177, + "learning_rate": 0.00014448237094663843, + "loss": 2.6212, + "step": 7130 + }, + { + "epoch": 0.5754983455734001, + "grad_norm": 0.7424136400222778, + "learning_rate": 0.00014446823136025298, + "loss": 2.6031, + "step": 7131 + }, + { + "epoch": 0.575579049309983, + "grad_norm": 0.7464002370834351, + "learning_rate": 0.00014445409066560298, + "loss": 2.6363, + "step": 7132 + }, + { + "epoch": 0.575659753046566, + "grad_norm": 0.7137650847434998, + "learning_rate": 0.00014443994886304085, + "loss": 2.5343, + "step": 7133 + }, + { + "epoch": 0.5757404567831491, + "grad_norm": 0.6744158864021301, + "learning_rate": 0.00014442580595291901, + "loss": 2.6463, + "step": 7134 + }, + { + "epoch": 0.5758211605197321, + "grad_norm": 0.6947084069252014, + "learning_rate": 0.00014441166193558991, + "loss": 2.6074, + "step": 7135 + }, + { + "epoch": 0.5759018642563151, + "grad_norm": 0.6981585621833801, + "learning_rate": 0.00014439751681140616, + "loss": 2.6257, + "step": 7136 + }, + { + "epoch": 0.575982567992898, + "grad_norm": 0.6800102591514587, + "learning_rate": 0.00014438337058072023, + "loss": 2.6447, + "step": 7137 + }, + { + "epoch": 0.5760632717294811, + "grad_norm": 0.6952316164970398, + "learning_rate": 0.00014436922324388465, + "loss": 2.5739, + "step": 7138 + }, + { + "epoch": 0.5761439754660641, + "grad_norm": 0.709170937538147, + "learning_rate": 0.0001443550748012521, + "loss": 2.5918, + "step": 7139 + }, + { + "epoch": 0.5762246792026471, + "grad_norm": 0.7677363157272339, + "learning_rate": 0.00014434092525317512, + "loss": 2.6322, + "step": 7140 + }, + { + "epoch": 0.5763053829392301, + "grad_norm": 0.6730263233184814, + "learning_rate": 0.00014432677460000636, + "loss": 2.6764, + "step": 7141 + }, + { + "epoch": 0.576386086675813, + "grad_norm": 0.6782239675521851, + "learning_rate": 0.0001443126228420985, + "loss": 2.5208, + "step": 7142 + }, + { + "epoch": 0.5764667904123961, + "grad_norm": 0.7737600207328796, + "learning_rate": 0.00014429846997980424, + "loss": 2.6964, + "step": 7143 + }, + { + "epoch": 0.5765474941489791, + "grad_norm": 0.7456403374671936, + "learning_rate": 0.00014428431601347635, + "loss": 2.6163, + "step": 7144 + }, + { + "epoch": 0.5766281978855621, + "grad_norm": 0.7824606895446777, + "learning_rate": 0.00014427016094346754, + "loss": 2.6499, + "step": 7145 + }, + { + "epoch": 0.576708901622145, + "grad_norm": 0.7233635187149048, + "learning_rate": 0.00014425600477013055, + "loss": 2.6064, + "step": 7146 + }, + { + "epoch": 0.5767896053587281, + "grad_norm": 0.7008275389671326, + "learning_rate": 0.00014424184749381824, + "loss": 2.5585, + "step": 7147 + }, + { + "epoch": 0.5768703090953111, + "grad_norm": 0.6817710995674133, + "learning_rate": 0.00014422768911488346, + "loss": 2.6215, + "step": 7148 + }, + { + "epoch": 0.5769510128318941, + "grad_norm": 0.6860779523849487, + "learning_rate": 0.00014421352963367906, + "loss": 2.5877, + "step": 7149 + }, + { + "epoch": 0.5770317165684771, + "grad_norm": 0.732865035533905, + "learning_rate": 0.00014419936905055793, + "loss": 2.5704, + "step": 7150 + }, + { + "epoch": 0.5771124203050602, + "grad_norm": 0.6992458701133728, + "learning_rate": 0.00014418520736587297, + "loss": 2.6654, + "step": 7151 + }, + { + "epoch": 0.5771931240416431, + "grad_norm": 0.6865053176879883, + "learning_rate": 0.00014417104457997715, + "loss": 2.6389, + "step": 7152 + }, + { + "epoch": 0.5772738277782261, + "grad_norm": 0.7652727365493774, + "learning_rate": 0.00014415688069322345, + "loss": 2.6478, + "step": 7153 + }, + { + "epoch": 0.5773545315148091, + "grad_norm": 0.708692193031311, + "learning_rate": 0.0001441427157059648, + "loss": 2.6065, + "step": 7154 + }, + { + "epoch": 0.5774352352513922, + "grad_norm": 0.7549232244491577, + "learning_rate": 0.00014412854961855435, + "loss": 2.6484, + "step": 7155 + }, + { + "epoch": 0.5775159389879752, + "grad_norm": 0.6410655975341797, + "learning_rate": 0.00014411438243134506, + "loss": 2.6061, + "step": 7156 + }, + { + "epoch": 0.5775966427245581, + "grad_norm": 0.7711724042892456, + "learning_rate": 0.00014410021414469005, + "loss": 2.628, + "step": 7157 + }, + { + "epoch": 0.5776773464611411, + "grad_norm": 0.6723695993423462, + "learning_rate": 0.0001440860447589424, + "loss": 2.6214, + "step": 7158 + }, + { + "epoch": 0.5777580501977242, + "grad_norm": 0.7359206676483154, + "learning_rate": 0.0001440718742744553, + "loss": 2.6157, + "step": 7159 + }, + { + "epoch": 0.5778387539343072, + "grad_norm": 0.7320525050163269, + "learning_rate": 0.0001440577026915819, + "loss": 2.6081, + "step": 7160 + }, + { + "epoch": 0.5779194576708901, + "grad_norm": 0.7728561162948608, + "learning_rate": 0.00014404353001067535, + "loss": 2.5989, + "step": 7161 + }, + { + "epoch": 0.5780001614074731, + "grad_norm": 0.7380329370498657, + "learning_rate": 0.0001440293562320889, + "loss": 2.6337, + "step": 7162 + }, + { + "epoch": 0.5780808651440562, + "grad_norm": 0.667789876461029, + "learning_rate": 0.00014401518135617581, + "loss": 2.6324, + "step": 7163 + }, + { + "epoch": 0.5781615688806392, + "grad_norm": 0.6907219886779785, + "learning_rate": 0.00014400100538328935, + "loss": 2.5897, + "step": 7164 + }, + { + "epoch": 0.5782422726172222, + "grad_norm": 0.9051530957221985, + "learning_rate": 0.00014398682831378283, + "loss": 2.6895, + "step": 7165 + }, + { + "epoch": 0.5783229763538051, + "grad_norm": 0.7189533114433289, + "learning_rate": 0.00014397265014800956, + "loss": 2.5948, + "step": 7166 + }, + { + "epoch": 0.5784036800903882, + "grad_norm": 0.7003059983253479, + "learning_rate": 0.00014395847088632285, + "loss": 2.5814, + "step": 7167 + }, + { + "epoch": 0.5784843838269712, + "grad_norm": 0.8083534240722656, + "learning_rate": 0.0001439442905290762, + "loss": 2.6131, + "step": 7168 + }, + { + "epoch": 0.5785650875635542, + "grad_norm": 0.7068585157394409, + "learning_rate": 0.0001439301090766229, + "loss": 2.6027, + "step": 7169 + }, + { + "epoch": 0.5786457913001372, + "grad_norm": 0.7010494470596313, + "learning_rate": 0.00014391592652931653, + "loss": 2.5296, + "step": 7170 + }, + { + "epoch": 0.5787264950367202, + "grad_norm": 0.7577467560768127, + "learning_rate": 0.00014390174288751045, + "loss": 2.6347, + "step": 7171 + }, + { + "epoch": 0.5788071987733032, + "grad_norm": 0.643799364566803, + "learning_rate": 0.00014388755815155813, + "loss": 2.6152, + "step": 7172 + }, + { + "epoch": 0.5788879025098862, + "grad_norm": 0.740352988243103, + "learning_rate": 0.00014387337232181315, + "loss": 2.6123, + "step": 7173 + }, + { + "epoch": 0.5789686062464692, + "grad_norm": 0.7309309840202332, + "learning_rate": 0.00014385918539862907, + "loss": 2.6072, + "step": 7174 + }, + { + "epoch": 0.5790493099830523, + "grad_norm": 0.7237016558647156, + "learning_rate": 0.00014384499738235941, + "loss": 2.6375, + "step": 7175 + }, + { + "epoch": 0.5791300137196352, + "grad_norm": 0.6600970029830933, + "learning_rate": 0.00014383080827335784, + "loss": 2.5285, + "step": 7176 + }, + { + "epoch": 0.5792107174562182, + "grad_norm": 0.6822233200073242, + "learning_rate": 0.00014381661807197794, + "loss": 2.5497, + "step": 7177 + }, + { + "epoch": 0.5792914211928012, + "grad_norm": 0.6990383863449097, + "learning_rate": 0.00014380242677857337, + "loss": 2.6283, + "step": 7178 + }, + { + "epoch": 0.5793721249293843, + "grad_norm": 0.64422208070755, + "learning_rate": 0.00014378823439349783, + "loss": 2.5762, + "step": 7179 + }, + { + "epoch": 0.5794528286659673, + "grad_norm": 0.63804692029953, + "learning_rate": 0.00014377404091710501, + "loss": 2.5523, + "step": 7180 + }, + { + "epoch": 0.5795335324025502, + "grad_norm": 0.6978863477706909, + "learning_rate": 0.0001437598463497487, + "loss": 2.5089, + "step": 7181 + }, + { + "epoch": 0.5796142361391332, + "grad_norm": 0.7091087698936462, + "learning_rate": 0.00014374565069178257, + "loss": 2.7005, + "step": 7182 + }, + { + "epoch": 0.5796949398757163, + "grad_norm": 0.683659553527832, + "learning_rate": 0.00014373145394356053, + "loss": 2.5988, + "step": 7183 + }, + { + "epoch": 0.5797756436122993, + "grad_norm": 0.7352960705757141, + "learning_rate": 0.00014371725610543633, + "loss": 2.5671, + "step": 7184 + }, + { + "epoch": 0.5798563473488823, + "grad_norm": 0.6951913237571716, + "learning_rate": 0.00014370305717776382, + "loss": 2.5917, + "step": 7185 + }, + { + "epoch": 0.5799370510854652, + "grad_norm": 0.6644465923309326, + "learning_rate": 0.0001436888571608969, + "loss": 2.5954, + "step": 7186 + }, + { + "epoch": 0.5800177548220483, + "grad_norm": 0.7406458258628845, + "learning_rate": 0.00014367465605518942, + "loss": 2.6369, + "step": 7187 + }, + { + "epoch": 0.5800984585586313, + "grad_norm": 0.6724697351455688, + "learning_rate": 0.00014366045386099535, + "loss": 2.6227, + "step": 7188 + }, + { + "epoch": 0.5801791622952143, + "grad_norm": 0.6804977059364319, + "learning_rate": 0.00014364625057866867, + "loss": 2.6445, + "step": 7189 + }, + { + "epoch": 0.5802598660317972, + "grad_norm": 0.7020019888877869, + "learning_rate": 0.00014363204620856335, + "loss": 2.6733, + "step": 7190 + }, + { + "epoch": 0.5803405697683802, + "grad_norm": 0.6458491086959839, + "learning_rate": 0.00014361784075103332, + "loss": 2.572, + "step": 7191 + }, + { + "epoch": 0.5804212735049633, + "grad_norm": 0.7078056335449219, + "learning_rate": 0.00014360363420643272, + "loss": 2.7032, + "step": 7192 + }, + { + "epoch": 0.5805019772415463, + "grad_norm": 0.6367471814155579, + "learning_rate": 0.00014358942657511557, + "loss": 2.5369, + "step": 7193 + }, + { + "epoch": 0.5805826809781293, + "grad_norm": 0.7311955094337463, + "learning_rate": 0.00014357521785743596, + "loss": 2.6513, + "step": 7194 + }, + { + "epoch": 0.5806633847147122, + "grad_norm": 0.6957442164421082, + "learning_rate": 0.00014356100805374805, + "loss": 2.6512, + "step": 7195 + }, + { + "epoch": 0.5807440884512953, + "grad_norm": 0.7026693224906921, + "learning_rate": 0.0001435467971644059, + "loss": 2.6049, + "step": 7196 + }, + { + "epoch": 0.5808247921878783, + "grad_norm": 0.7337697744369507, + "learning_rate": 0.00014353258518976376, + "loss": 2.5516, + "step": 7197 + }, + { + "epoch": 0.5809054959244613, + "grad_norm": 0.6891856789588928, + "learning_rate": 0.00014351837213017577, + "loss": 2.5894, + "step": 7198 + }, + { + "epoch": 0.5809861996610443, + "grad_norm": 0.6710659265518188, + "learning_rate": 0.0001435041579859962, + "loss": 2.596, + "step": 7199 + }, + { + "epoch": 0.5810669033976273, + "grad_norm": 0.7637245059013367, + "learning_rate": 0.00014348994275757931, + "loss": 2.6278, + "step": 7200 + }, + { + "epoch": 0.5811476071342103, + "grad_norm": 0.7558664679527283, + "learning_rate": 0.00014347572644527934, + "loss": 2.6917, + "step": 7201 + }, + { + "epoch": 0.5812283108707933, + "grad_norm": 0.7254986763000488, + "learning_rate": 0.00014346150904945065, + "loss": 2.6161, + "step": 7202 + }, + { + "epoch": 0.5813090146073763, + "grad_norm": 0.7177211046218872, + "learning_rate": 0.00014344729057044753, + "loss": 2.555, + "step": 7203 + }, + { + "epoch": 0.5813897183439594, + "grad_norm": 0.6408729553222656, + "learning_rate": 0.00014343307100862432, + "loss": 2.6071, + "step": 7204 + }, + { + "epoch": 0.5814704220805423, + "grad_norm": 0.7399997711181641, + "learning_rate": 0.0001434188503643355, + "loss": 2.6013, + "step": 7205 + }, + { + "epoch": 0.5815511258171253, + "grad_norm": 0.7796236276626587, + "learning_rate": 0.00014340462863793543, + "loss": 2.603, + "step": 7206 + }, + { + "epoch": 0.5816318295537083, + "grad_norm": 0.7420137524604797, + "learning_rate": 0.00014339040582977855, + "loss": 2.5858, + "step": 7207 + }, + { + "epoch": 0.5817125332902914, + "grad_norm": 0.738042414188385, + "learning_rate": 0.00014337618194021928, + "loss": 2.592, + "step": 7208 + }, + { + "epoch": 0.5817932370268744, + "grad_norm": 0.6910614371299744, + "learning_rate": 0.00014336195696961222, + "loss": 2.6448, + "step": 7209 + }, + { + "epoch": 0.5818739407634573, + "grad_norm": 0.7838915586471558, + "learning_rate": 0.00014334773091831185, + "loss": 2.6257, + "step": 7210 + }, + { + "epoch": 0.5819546445000403, + "grad_norm": 0.7362141013145447, + "learning_rate": 0.0001433335037866727, + "loss": 2.6505, + "step": 7211 + }, + { + "epoch": 0.5820353482366234, + "grad_norm": 0.6892269253730774, + "learning_rate": 0.00014331927557504934, + "loss": 2.6518, + "step": 7212 + }, + { + "epoch": 0.5821160519732064, + "grad_norm": 0.7444556951522827, + "learning_rate": 0.0001433050462837964, + "loss": 2.6785, + "step": 7213 + }, + { + "epoch": 0.5821967557097893, + "grad_norm": 0.6948450207710266, + "learning_rate": 0.00014329081591326853, + "loss": 2.5753, + "step": 7214 + }, + { + "epoch": 0.5822774594463723, + "grad_norm": 0.713741660118103, + "learning_rate": 0.00014327658446382032, + "loss": 2.6425, + "step": 7215 + }, + { + "epoch": 0.5823581631829554, + "grad_norm": 0.7352245450019836, + "learning_rate": 0.00014326235193580657, + "loss": 2.6859, + "step": 7216 + }, + { + "epoch": 0.5824388669195384, + "grad_norm": 0.7151867151260376, + "learning_rate": 0.00014324811832958187, + "loss": 2.6106, + "step": 7217 + }, + { + "epoch": 0.5825195706561214, + "grad_norm": 0.7003469467163086, + "learning_rate": 0.000143233883645501, + "loss": 2.618, + "step": 7218 + }, + { + "epoch": 0.5826002743927043, + "grad_norm": 0.7139034867286682, + "learning_rate": 0.00014321964788391878, + "loss": 2.5772, + "step": 7219 + }, + { + "epoch": 0.5826809781292874, + "grad_norm": 0.6368305683135986, + "learning_rate": 0.00014320541104518992, + "loss": 2.5259, + "step": 7220 + }, + { + "epoch": 0.5827616818658704, + "grad_norm": 0.6921548247337341, + "learning_rate": 0.0001431911731296693, + "loss": 2.6403, + "step": 7221 + }, + { + "epoch": 0.5828423856024534, + "grad_norm": 0.6995570659637451, + "learning_rate": 0.00014317693413771175, + "loss": 2.6172, + "step": 7222 + }, + { + "epoch": 0.5829230893390364, + "grad_norm": 0.7557246088981628, + "learning_rate": 0.0001431626940696721, + "loss": 2.6347, + "step": 7223 + }, + { + "epoch": 0.5830037930756194, + "grad_norm": 0.6912205219268799, + "learning_rate": 0.00014314845292590528, + "loss": 2.5958, + "step": 7224 + }, + { + "epoch": 0.5830844968122024, + "grad_norm": 0.6896184682846069, + "learning_rate": 0.00014313421070676625, + "loss": 2.569, + "step": 7225 + }, + { + "epoch": 0.5831652005487854, + "grad_norm": 0.6900814771652222, + "learning_rate": 0.00014311996741260994, + "loss": 2.5466, + "step": 7226 + }, + { + "epoch": 0.5832459042853684, + "grad_norm": 0.7319771647453308, + "learning_rate": 0.00014310572304379132, + "loss": 2.6181, + "step": 7227 + }, + { + "epoch": 0.5833266080219515, + "grad_norm": 0.728138267993927, + "learning_rate": 0.0001430914776006654, + "loss": 2.6644, + "step": 7228 + }, + { + "epoch": 0.5834073117585344, + "grad_norm": 0.7361802458763123, + "learning_rate": 0.0001430772310835872, + "loss": 2.6079, + "step": 7229 + }, + { + "epoch": 0.5834880154951174, + "grad_norm": 0.6893376708030701, + "learning_rate": 0.00014306298349291182, + "loss": 2.5615, + "step": 7230 + }, + { + "epoch": 0.5835687192317004, + "grad_norm": 0.6661401987075806, + "learning_rate": 0.00014304873482899431, + "loss": 2.6028, + "step": 7231 + }, + { + "epoch": 0.5836494229682835, + "grad_norm": 0.6571504473686218, + "learning_rate": 0.0001430344850921898, + "loss": 2.5553, + "step": 7232 + }, + { + "epoch": 0.5837301267048665, + "grad_norm": 0.6878423690795898, + "learning_rate": 0.00014302023428285342, + "loss": 2.5336, + "step": 7233 + }, + { + "epoch": 0.5838108304414494, + "grad_norm": 0.768117368221283, + "learning_rate": 0.00014300598240134035, + "loss": 2.6036, + "step": 7234 + }, + { + "epoch": 0.5838915341780324, + "grad_norm": 0.6876625418663025, + "learning_rate": 0.0001429917294480058, + "loss": 2.6314, + "step": 7235 + }, + { + "epoch": 0.5839722379146155, + "grad_norm": 0.7146790027618408, + "learning_rate": 0.00014297747542320495, + "loss": 2.6029, + "step": 7236 + }, + { + "epoch": 0.5840529416511985, + "grad_norm": 0.7032392024993896, + "learning_rate": 0.00014296322032729308, + "loss": 2.6163, + "step": 7237 + }, + { + "epoch": 0.5841336453877815, + "grad_norm": 0.7323551177978516, + "learning_rate": 0.00014294896416062544, + "loss": 2.6706, + "step": 7238 + }, + { + "epoch": 0.5842143491243644, + "grad_norm": 0.7647258639335632, + "learning_rate": 0.00014293470692355734, + "loss": 2.6744, + "step": 7239 + }, + { + "epoch": 0.5842950528609475, + "grad_norm": 0.6824506521224976, + "learning_rate": 0.00014292044861644414, + "loss": 2.579, + "step": 7240 + }, + { + "epoch": 0.5843757565975305, + "grad_norm": 0.7553619742393494, + "learning_rate": 0.00014290618923964115, + "loss": 2.6196, + "step": 7241 + }, + { + "epoch": 0.5844564603341135, + "grad_norm": 0.6872109770774841, + "learning_rate": 0.00014289192879350375, + "loss": 2.555, + "step": 7242 + }, + { + "epoch": 0.5845371640706964, + "grad_norm": 0.664658784866333, + "learning_rate": 0.00014287766727838735, + "loss": 2.5781, + "step": 7243 + }, + { + "epoch": 0.5846178678072794, + "grad_norm": 0.6709543466567993, + "learning_rate": 0.00014286340469464744, + "loss": 2.6022, + "step": 7244 + }, + { + "epoch": 0.5846985715438625, + "grad_norm": 0.7236210107803345, + "learning_rate": 0.00014284914104263941, + "loss": 2.5609, + "step": 7245 + }, + { + "epoch": 0.5847792752804455, + "grad_norm": 0.6751740574836731, + "learning_rate": 0.0001428348763227188, + "loss": 2.5792, + "step": 7246 + }, + { + "epoch": 0.5848599790170285, + "grad_norm": 0.6684607267379761, + "learning_rate": 0.0001428206105352411, + "loss": 2.5705, + "step": 7247 + }, + { + "epoch": 0.5849406827536114, + "grad_norm": 0.6876732707023621, + "learning_rate": 0.00014280634368056186, + "loss": 2.6576, + "step": 7248 + }, + { + "epoch": 0.5850213864901945, + "grad_norm": 0.758637547492981, + "learning_rate": 0.0001427920757590366, + "loss": 2.6215, + "step": 7249 + }, + { + "epoch": 0.5851020902267775, + "grad_norm": 0.6839025020599365, + "learning_rate": 0.00014277780677102097, + "loss": 2.5898, + "step": 7250 + }, + { + "epoch": 0.5851827939633605, + "grad_norm": 0.6912671327590942, + "learning_rate": 0.00014276353671687056, + "loss": 2.5879, + "step": 7251 + }, + { + "epoch": 0.5852634976999435, + "grad_norm": 0.6727048754692078, + "learning_rate": 0.00014274926559694107, + "loss": 2.5501, + "step": 7252 + }, + { + "epoch": 0.5853442014365265, + "grad_norm": 0.7031945586204529, + "learning_rate": 0.00014273499341158812, + "loss": 2.625, + "step": 7253 + }, + { + "epoch": 0.5854249051731095, + "grad_norm": 0.6886943578720093, + "learning_rate": 0.0001427207201611674, + "loss": 2.6141, + "step": 7254 + }, + { + "epoch": 0.5855056089096925, + "grad_norm": 0.7906915545463562, + "learning_rate": 0.00014270644584603466, + "loss": 2.7189, + "step": 7255 + }, + { + "epoch": 0.5855863126462755, + "grad_norm": 0.6873704195022583, + "learning_rate": 0.00014269217046654567, + "loss": 2.6031, + "step": 7256 + }, + { + "epoch": 0.5856670163828586, + "grad_norm": 0.6655381321907043, + "learning_rate": 0.00014267789402305618, + "loss": 2.5747, + "step": 7257 + }, + { + "epoch": 0.5857477201194415, + "grad_norm": 0.6655673384666443, + "learning_rate": 0.00014266361651592204, + "loss": 2.625, + "step": 7258 + }, + { + "epoch": 0.5858284238560245, + "grad_norm": 0.6752866506576538, + "learning_rate": 0.00014264933794549901, + "loss": 2.5914, + "step": 7259 + }, + { + "epoch": 0.5859091275926075, + "grad_norm": 0.6680975556373596, + "learning_rate": 0.00014263505831214302, + "loss": 2.5572, + "step": 7260 + }, + { + "epoch": 0.5859898313291906, + "grad_norm": 0.6873607039451599, + "learning_rate": 0.00014262077761620994, + "loss": 2.6696, + "step": 7261 + }, + { + "epoch": 0.5860705350657736, + "grad_norm": 0.6745384335517883, + "learning_rate": 0.00014260649585805566, + "loss": 2.5738, + "step": 7262 + }, + { + "epoch": 0.5861512388023565, + "grad_norm": 0.6524637937545776, + "learning_rate": 0.0001425922130380361, + "loss": 2.6209, + "step": 7263 + }, + { + "epoch": 0.5862319425389395, + "grad_norm": 0.6729850172996521, + "learning_rate": 0.00014257792915650728, + "loss": 2.652, + "step": 7264 + }, + { + "epoch": 0.5863126462755226, + "grad_norm": 0.6713503003120422, + "learning_rate": 0.00014256364421382514, + "loss": 2.5658, + "step": 7265 + }, + { + "epoch": 0.5863933500121056, + "grad_norm": 0.6835616827011108, + "learning_rate": 0.00014254935821034575, + "loss": 2.5535, + "step": 7266 + }, + { + "epoch": 0.5864740537486886, + "grad_norm": 0.7425376176834106, + "learning_rate": 0.00014253507114642515, + "loss": 2.6369, + "step": 7267 + }, + { + "epoch": 0.5865547574852715, + "grad_norm": 0.6788069605827332, + "learning_rate": 0.00014252078302241932, + "loss": 2.601, + "step": 7268 + }, + { + "epoch": 0.5866354612218546, + "grad_norm": 0.6828538179397583, + "learning_rate": 0.0001425064938386845, + "loss": 2.5861, + "step": 7269 + }, + { + "epoch": 0.5867161649584376, + "grad_norm": 0.6763372421264648, + "learning_rate": 0.0001424922035955767, + "loss": 2.6035, + "step": 7270 + }, + { + "epoch": 0.5867968686950206, + "grad_norm": 0.6517930626869202, + "learning_rate": 0.0001424779122934521, + "loss": 2.5564, + "step": 7271 + }, + { + "epoch": 0.5868775724316035, + "grad_norm": 0.6633113622665405, + "learning_rate": 0.00014246361993266692, + "loss": 2.6163, + "step": 7272 + }, + { + "epoch": 0.5869582761681866, + "grad_norm": 0.684822678565979, + "learning_rate": 0.00014244932651357733, + "loss": 2.6057, + "step": 7273 + }, + { + "epoch": 0.5870389799047696, + "grad_norm": 0.7679704427719116, + "learning_rate": 0.00014243503203653952, + "loss": 2.6522, + "step": 7274 + }, + { + "epoch": 0.5871196836413526, + "grad_norm": 0.6834188103675842, + "learning_rate": 0.00014242073650190984, + "loss": 2.652, + "step": 7275 + }, + { + "epoch": 0.5872003873779356, + "grad_norm": 0.6903846859931946, + "learning_rate": 0.00014240643991004449, + "loss": 2.5894, + "step": 7276 + }, + { + "epoch": 0.5872810911145186, + "grad_norm": 0.7060866951942444, + "learning_rate": 0.0001423921422612998, + "loss": 2.5994, + "step": 7277 + }, + { + "epoch": 0.5873617948511016, + "grad_norm": 0.6646741628646851, + "learning_rate": 0.0001423778435560321, + "loss": 2.6432, + "step": 7278 + }, + { + "epoch": 0.5874424985876846, + "grad_norm": 0.6930218935012817, + "learning_rate": 0.0001423635437945978, + "loss": 2.6233, + "step": 7279 + }, + { + "epoch": 0.5875232023242676, + "grad_norm": 0.6914143562316895, + "learning_rate": 0.00014234924297735322, + "loss": 2.6143, + "step": 7280 + }, + { + "epoch": 0.5876039060608507, + "grad_norm": 0.7351366281509399, + "learning_rate": 0.0001423349411046548, + "loss": 2.6323, + "step": 7281 + }, + { + "epoch": 0.5876846097974336, + "grad_norm": 0.6813770532608032, + "learning_rate": 0.000142320638176859, + "loss": 2.5964, + "step": 7282 + }, + { + "epoch": 0.5877653135340166, + "grad_norm": 0.7049702405929565, + "learning_rate": 0.00014230633419432226, + "loss": 2.6284, + "step": 7283 + }, + { + "epoch": 0.5878460172705996, + "grad_norm": 0.7140446901321411, + "learning_rate": 0.00014229202915740107, + "loss": 2.6113, + "step": 7284 + }, + { + "epoch": 0.5879267210071827, + "grad_norm": 0.696588933467865, + "learning_rate": 0.00014227772306645196, + "loss": 2.6384, + "step": 7285 + }, + { + "epoch": 0.5880074247437657, + "grad_norm": 0.6800615787506104, + "learning_rate": 0.0001422634159218315, + "loss": 2.5743, + "step": 7286 + }, + { + "epoch": 0.5880881284803486, + "grad_norm": 0.7586596608161926, + "learning_rate": 0.00014224910772389624, + "loss": 2.6504, + "step": 7287 + }, + { + "epoch": 0.5881688322169316, + "grad_norm": 0.73286372423172, + "learning_rate": 0.00014223479847300278, + "loss": 2.6026, + "step": 7288 + }, + { + "epoch": 0.5882495359535147, + "grad_norm": 0.6808766722679138, + "learning_rate": 0.00014222048816950772, + "loss": 2.5822, + "step": 7289 + }, + { + "epoch": 0.5883302396900977, + "grad_norm": 0.7424919009208679, + "learning_rate": 0.0001422061768137677, + "loss": 2.6474, + "step": 7290 + }, + { + "epoch": 0.5884109434266807, + "grad_norm": 0.658183753490448, + "learning_rate": 0.00014219186440613948, + "loss": 2.6051, + "step": 7291 + }, + { + "epoch": 0.5884916471632636, + "grad_norm": 0.6693006157875061, + "learning_rate": 0.0001421775509469797, + "loss": 2.5774, + "step": 7292 + }, + { + "epoch": 0.5885723508998466, + "grad_norm": 0.7298646569252014, + "learning_rate": 0.00014216323643664508, + "loss": 2.5688, + "step": 7293 + }, + { + "epoch": 0.5886530546364297, + "grad_norm": 0.6665881276130676, + "learning_rate": 0.00014214892087549238, + "loss": 2.608, + "step": 7294 + }, + { + "epoch": 0.5887337583730127, + "grad_norm": 0.7220060229301453, + "learning_rate": 0.00014213460426387841, + "loss": 2.6078, + "step": 7295 + }, + { + "epoch": 0.5888144621095956, + "grad_norm": 0.6693970561027527, + "learning_rate": 0.00014212028660215997, + "loss": 2.597, + "step": 7296 + }, + { + "epoch": 0.5888951658461786, + "grad_norm": 0.682331919670105, + "learning_rate": 0.00014210596789069387, + "loss": 2.5752, + "step": 7297 + }, + { + "epoch": 0.5889758695827617, + "grad_norm": 0.7586890459060669, + "learning_rate": 0.000142091648129837, + "loss": 2.6878, + "step": 7298 + }, + { + "epoch": 0.5890565733193447, + "grad_norm": 0.6740901470184326, + "learning_rate": 0.00014207732731994624, + "loss": 2.6083, + "step": 7299 + }, + { + "epoch": 0.5891372770559277, + "grad_norm": 0.6959021091461182, + "learning_rate": 0.00014206300546137842, + "loss": 2.5765, + "step": 7300 + }, + { + "epoch": 0.5892179807925106, + "grad_norm": 0.7446078658103943, + "learning_rate": 0.0001420486825544906, + "loss": 2.662, + "step": 7301 + }, + { + "epoch": 0.5892986845290937, + "grad_norm": 0.7418847680091858, + "learning_rate": 0.0001420343585996397, + "loss": 2.6606, + "step": 7302 + }, + { + "epoch": 0.5893793882656767, + "grad_norm": 0.7185709476470947, + "learning_rate": 0.00014202003359718273, + "loss": 2.563, + "step": 7303 + }, + { + "epoch": 0.5894600920022597, + "grad_norm": 0.6960515379905701, + "learning_rate": 0.00014200570754747664, + "loss": 2.6182, + "step": 7304 + }, + { + "epoch": 0.5895407957388427, + "grad_norm": 0.6589705348014832, + "learning_rate": 0.00014199138045087849, + "loss": 2.6714, + "step": 7305 + }, + { + "epoch": 0.5896214994754257, + "grad_norm": 0.7027507424354553, + "learning_rate": 0.00014197705230774543, + "loss": 2.6145, + "step": 7306 + }, + { + "epoch": 0.5897022032120087, + "grad_norm": 0.6761246919631958, + "learning_rate": 0.00014196272311843447, + "loss": 2.5688, + "step": 7307 + }, + { + "epoch": 0.5897829069485917, + "grad_norm": 0.6618059277534485, + "learning_rate": 0.00014194839288330277, + "loss": 2.6194, + "step": 7308 + }, + { + "epoch": 0.5898636106851747, + "grad_norm": 0.7182614803314209, + "learning_rate": 0.00014193406160270747, + "loss": 2.5452, + "step": 7309 + }, + { + "epoch": 0.5899443144217578, + "grad_norm": 0.6830565333366394, + "learning_rate": 0.0001419197292770057, + "loss": 2.5728, + "step": 7310 + }, + { + "epoch": 0.5900250181583407, + "grad_norm": 0.6744499802589417, + "learning_rate": 0.00014190539590655475, + "loss": 2.5736, + "step": 7311 + }, + { + "epoch": 0.5901057218949237, + "grad_norm": 0.7177874445915222, + "learning_rate": 0.00014189106149171176, + "loss": 2.6271, + "step": 7312 + }, + { + "epoch": 0.5901864256315067, + "grad_norm": 0.6770105361938477, + "learning_rate": 0.000141876726032834, + "loss": 2.5924, + "step": 7313 + }, + { + "epoch": 0.5902671293680898, + "grad_norm": 0.7295818328857422, + "learning_rate": 0.0001418623895302788, + "loss": 2.644, + "step": 7314 + }, + { + "epoch": 0.5903478331046728, + "grad_norm": 0.7244859933853149, + "learning_rate": 0.00014184805198440338, + "loss": 2.5892, + "step": 7315 + }, + { + "epoch": 0.5904285368412557, + "grad_norm": 0.7067728638648987, + "learning_rate": 0.00014183371339556512, + "loss": 2.5985, + "step": 7316 + }, + { + "epoch": 0.5905092405778387, + "grad_norm": 0.6732490062713623, + "learning_rate": 0.0001418193737641214, + "loss": 2.5771, + "step": 7317 + }, + { + "epoch": 0.5905899443144218, + "grad_norm": 0.7087544202804565, + "learning_rate": 0.00014180503309042957, + "loss": 2.6373, + "step": 7318 + }, + { + "epoch": 0.5906706480510048, + "grad_norm": 0.772174596786499, + "learning_rate": 0.00014179069137484703, + "loss": 2.6262, + "step": 7319 + }, + { + "epoch": 0.5907513517875878, + "grad_norm": 0.6855718493461609, + "learning_rate": 0.00014177634861773118, + "loss": 2.6268, + "step": 7320 + }, + { + "epoch": 0.5908320555241707, + "grad_norm": 0.7168720364570618, + "learning_rate": 0.00014176200481943953, + "loss": 2.5892, + "step": 7321 + }, + { + "epoch": 0.5909127592607538, + "grad_norm": 0.7126333713531494, + "learning_rate": 0.0001417476599803296, + "loss": 2.6079, + "step": 7322 + }, + { + "epoch": 0.5909934629973368, + "grad_norm": 0.7451913952827454, + "learning_rate": 0.0001417333141007588, + "loss": 2.635, + "step": 7323 + }, + { + "epoch": 0.5910741667339198, + "grad_norm": 0.7405436038970947, + "learning_rate": 0.00014171896718108475, + "loss": 2.6014, + "step": 7324 + }, + { + "epoch": 0.5911548704705027, + "grad_norm": 0.7583999037742615, + "learning_rate": 0.00014170461922166498, + "loss": 2.6815, + "step": 7325 + }, + { + "epoch": 0.5912355742070858, + "grad_norm": 0.6653509140014648, + "learning_rate": 0.00014169027022285706, + "loss": 2.6153, + "step": 7326 + }, + { + "epoch": 0.5913162779436688, + "grad_norm": 0.7145548462867737, + "learning_rate": 0.00014167592018501864, + "loss": 2.6022, + "step": 7327 + }, + { + "epoch": 0.5913969816802518, + "grad_norm": 0.6996089816093445, + "learning_rate": 0.00014166156910850737, + "loss": 2.6586, + "step": 7328 + }, + { + "epoch": 0.5914776854168348, + "grad_norm": 0.735653281211853, + "learning_rate": 0.0001416472169936809, + "loss": 2.6084, + "step": 7329 + }, + { + "epoch": 0.5915583891534179, + "grad_norm": 0.695036768913269, + "learning_rate": 0.00014163286384089686, + "loss": 2.5058, + "step": 7330 + }, + { + "epoch": 0.5916390928900008, + "grad_norm": 0.9014756679534912, + "learning_rate": 0.00014161850965051307, + "loss": 2.5991, + "step": 7331 + }, + { + "epoch": 0.5917197966265838, + "grad_norm": 0.7079846858978271, + "learning_rate": 0.0001416041544228872, + "loss": 2.6067, + "step": 7332 + }, + { + "epoch": 0.5918005003631668, + "grad_norm": 0.7681204080581665, + "learning_rate": 0.00014158979815837705, + "loss": 2.5414, + "step": 7333 + }, + { + "epoch": 0.5918812040997499, + "grad_norm": 0.6501670479774475, + "learning_rate": 0.00014157544085734042, + "loss": 2.617, + "step": 7334 + }, + { + "epoch": 0.5919619078363328, + "grad_norm": 0.7573496103286743, + "learning_rate": 0.00014156108252013513, + "loss": 2.6341, + "step": 7335 + }, + { + "epoch": 0.5920426115729158, + "grad_norm": 0.6865558624267578, + "learning_rate": 0.00014154672314711903, + "loss": 2.6229, + "step": 7336 + }, + { + "epoch": 0.5921233153094988, + "grad_norm": 0.6859166622161865, + "learning_rate": 0.00014153236273864995, + "loss": 2.6149, + "step": 7337 + }, + { + "epoch": 0.5922040190460819, + "grad_norm": 0.7603647112846375, + "learning_rate": 0.00014151800129508585, + "loss": 2.5645, + "step": 7338 + }, + { + "epoch": 0.5922847227826649, + "grad_norm": 0.6740217208862305, + "learning_rate": 0.00014150363881678464, + "loss": 2.5883, + "step": 7339 + }, + { + "epoch": 0.5923654265192478, + "grad_norm": 0.6412263512611389, + "learning_rate": 0.00014148927530410426, + "loss": 2.576, + "step": 7340 + }, + { + "epoch": 0.5924461302558308, + "grad_norm": 0.669834315776825, + "learning_rate": 0.00014147491075740265, + "loss": 2.542, + "step": 7341 + }, + { + "epoch": 0.5925268339924139, + "grad_norm": 0.720024049282074, + "learning_rate": 0.00014146054517703786, + "loss": 2.6491, + "step": 7342 + }, + { + "epoch": 0.5926075377289969, + "grad_norm": 0.7191612720489502, + "learning_rate": 0.00014144617856336794, + "loss": 2.5933, + "step": 7343 + }, + { + "epoch": 0.5926882414655799, + "grad_norm": 0.7012050747871399, + "learning_rate": 0.00014143181091675087, + "loss": 2.5253, + "step": 7344 + }, + { + "epoch": 0.5927689452021628, + "grad_norm": 0.7825081944465637, + "learning_rate": 0.00014141744223754478, + "loss": 2.6225, + "step": 7345 + }, + { + "epoch": 0.5928496489387458, + "grad_norm": 0.6699295043945312, + "learning_rate": 0.00014140307252610775, + "loss": 2.5893, + "step": 7346 + }, + { + "epoch": 0.5929303526753289, + "grad_norm": 0.6668846011161804, + "learning_rate": 0.00014138870178279794, + "loss": 2.5944, + "step": 7347 + }, + { + "epoch": 0.5930110564119119, + "grad_norm": 0.7681072950363159, + "learning_rate": 0.0001413743300079735, + "loss": 2.5715, + "step": 7348 + }, + { + "epoch": 0.5930917601484949, + "grad_norm": 0.653075635433197, + "learning_rate": 0.00014135995720199258, + "loss": 2.5924, + "step": 7349 + }, + { + "epoch": 0.5931724638850778, + "grad_norm": 0.6807504892349243, + "learning_rate": 0.00014134558336521342, + "loss": 2.5395, + "step": 7350 + }, + { + "epoch": 0.5932531676216609, + "grad_norm": 0.681175708770752, + "learning_rate": 0.00014133120849799423, + "loss": 2.5401, + "step": 7351 + }, + { + "epoch": 0.5933338713582439, + "grad_norm": 0.7159900665283203, + "learning_rate": 0.0001413168326006933, + "loss": 2.5684, + "step": 7352 + }, + { + "epoch": 0.5934145750948269, + "grad_norm": 0.6517181992530823, + "learning_rate": 0.00014130245567366888, + "loss": 2.5887, + "step": 7353 + }, + { + "epoch": 0.5934952788314098, + "grad_norm": 0.6982731223106384, + "learning_rate": 0.00014128807771727936, + "loss": 2.5707, + "step": 7354 + }, + { + "epoch": 0.5935759825679929, + "grad_norm": 0.7003650069236755, + "learning_rate": 0.00014127369873188296, + "loss": 2.6415, + "step": 7355 + }, + { + "epoch": 0.5936566863045759, + "grad_norm": 0.7408339977264404, + "learning_rate": 0.0001412593187178381, + "loss": 2.5655, + "step": 7356 + }, + { + "epoch": 0.5937373900411589, + "grad_norm": 0.717218279838562, + "learning_rate": 0.00014124493767550317, + "loss": 2.586, + "step": 7357 + }, + { + "epoch": 0.5938180937777419, + "grad_norm": 0.6723458766937256, + "learning_rate": 0.00014123055560523657, + "loss": 2.593, + "step": 7358 + }, + { + "epoch": 0.593898797514325, + "grad_norm": 0.6861262321472168, + "learning_rate": 0.00014121617250739677, + "loss": 2.612, + "step": 7359 + }, + { + "epoch": 0.5939795012509079, + "grad_norm": 0.6811453104019165, + "learning_rate": 0.00014120178838234222, + "loss": 2.5708, + "step": 7360 + }, + { + "epoch": 0.5940602049874909, + "grad_norm": 0.6249656677246094, + "learning_rate": 0.00014118740323043136, + "loss": 2.5604, + "step": 7361 + }, + { + "epoch": 0.5941409087240739, + "grad_norm": 0.7671588659286499, + "learning_rate": 0.00014117301705202274, + "loss": 2.547, + "step": 7362 + }, + { + "epoch": 0.594221612460657, + "grad_norm": 0.6856057643890381, + "learning_rate": 0.00014115862984747496, + "loss": 2.6108, + "step": 7363 + }, + { + "epoch": 0.5943023161972399, + "grad_norm": 0.692331850528717, + "learning_rate": 0.0001411442416171465, + "loss": 2.6347, + "step": 7364 + }, + { + "epoch": 0.5943830199338229, + "grad_norm": 0.7256516814231873, + "learning_rate": 0.000141129852361396, + "loss": 2.6098, + "step": 7365 + }, + { + "epoch": 0.5944637236704059, + "grad_norm": 0.7522590160369873, + "learning_rate": 0.00014111546208058203, + "loss": 2.5688, + "step": 7366 + }, + { + "epoch": 0.594544427406989, + "grad_norm": 0.6915806531906128, + "learning_rate": 0.0001411010707750633, + "loss": 2.5899, + "step": 7367 + }, + { + "epoch": 0.594625131143572, + "grad_norm": 0.7355465292930603, + "learning_rate": 0.00014108667844519844, + "loss": 2.5212, + "step": 7368 + }, + { + "epoch": 0.5947058348801549, + "grad_norm": 0.731002926826477, + "learning_rate": 0.00014107228509134615, + "loss": 2.6369, + "step": 7369 + }, + { + "epoch": 0.5947865386167379, + "grad_norm": 0.6764423251152039, + "learning_rate": 0.0001410578907138652, + "loss": 2.6012, + "step": 7370 + }, + { + "epoch": 0.594867242353321, + "grad_norm": 0.7466071844100952, + "learning_rate": 0.0001410434953131142, + "loss": 2.5822, + "step": 7371 + }, + { + "epoch": 0.594947946089904, + "grad_norm": 0.7276137471199036, + "learning_rate": 0.00014102909888945205, + "loss": 2.6055, + "step": 7372 + }, + { + "epoch": 0.595028649826487, + "grad_norm": 0.7411746978759766, + "learning_rate": 0.00014101470144323752, + "loss": 2.6489, + "step": 7373 + }, + { + "epoch": 0.5951093535630699, + "grad_norm": 0.7511908411979675, + "learning_rate": 0.0001410003029748294, + "loss": 2.6268, + "step": 7374 + }, + { + "epoch": 0.595190057299653, + "grad_norm": 0.6623562574386597, + "learning_rate": 0.0001409859034845866, + "loss": 2.58, + "step": 7375 + }, + { + "epoch": 0.595270761036236, + "grad_norm": 0.6948572397232056, + "learning_rate": 0.00014097150297286785, + "loss": 2.5811, + "step": 7376 + }, + { + "epoch": 0.595351464772819, + "grad_norm": 0.6836786270141602, + "learning_rate": 0.0001409571014400322, + "loss": 2.5861, + "step": 7377 + }, + { + "epoch": 0.595432168509402, + "grad_norm": 0.6644341945648193, + "learning_rate": 0.00014094269888643854, + "loss": 2.6339, + "step": 7378 + }, + { + "epoch": 0.595512872245985, + "grad_norm": 0.6434289813041687, + "learning_rate": 0.0001409282953124458, + "loss": 2.4897, + "step": 7379 + }, + { + "epoch": 0.595593575982568, + "grad_norm": 0.6745082139968872, + "learning_rate": 0.0001409138907184129, + "loss": 2.522, + "step": 7380 + }, + { + "epoch": 0.595674279719151, + "grad_norm": 0.725321352481842, + "learning_rate": 0.0001408994851046989, + "loss": 2.5711, + "step": 7381 + }, + { + "epoch": 0.595754983455734, + "grad_norm": 0.7485500574111938, + "learning_rate": 0.00014088507847166283, + "loss": 2.6095, + "step": 7382 + }, + { + "epoch": 0.595835687192317, + "grad_norm": 0.721125602722168, + "learning_rate": 0.00014087067081966376, + "loss": 2.6762, + "step": 7383 + }, + { + "epoch": 0.5959163909289, + "grad_norm": 0.7099901437759399, + "learning_rate": 0.00014085626214906073, + "loss": 2.5667, + "step": 7384 + }, + { + "epoch": 0.595997094665483, + "grad_norm": 0.6889060139656067, + "learning_rate": 0.00014084185246021283, + "loss": 2.6723, + "step": 7385 + }, + { + "epoch": 0.596077798402066, + "grad_norm": 0.735698938369751, + "learning_rate": 0.00014082744175347923, + "loss": 2.6434, + "step": 7386 + }, + { + "epoch": 0.5961585021386491, + "grad_norm": 0.7603070735931396, + "learning_rate": 0.00014081303002921902, + "loss": 2.665, + "step": 7387 + }, + { + "epoch": 0.596239205875232, + "grad_norm": 0.6786355376243591, + "learning_rate": 0.00014079861728779141, + "loss": 2.5842, + "step": 7388 + }, + { + "epoch": 0.596319909611815, + "grad_norm": 0.6693331003189087, + "learning_rate": 0.00014078420352955565, + "loss": 2.6211, + "step": 7389 + }, + { + "epoch": 0.596400613348398, + "grad_norm": 0.74013751745224, + "learning_rate": 0.0001407697887548709, + "loss": 2.5886, + "step": 7390 + }, + { + "epoch": 0.5964813170849811, + "grad_norm": 0.739507257938385, + "learning_rate": 0.00014075537296409646, + "loss": 2.607, + "step": 7391 + }, + { + "epoch": 0.5965620208215641, + "grad_norm": 0.7121848464012146, + "learning_rate": 0.00014074095615759156, + "loss": 2.6052, + "step": 7392 + }, + { + "epoch": 0.596642724558147, + "grad_norm": 0.7526760697364807, + "learning_rate": 0.00014072653833571556, + "loss": 2.6051, + "step": 7393 + }, + { + "epoch": 0.59672342829473, + "grad_norm": 0.7867496609687805, + "learning_rate": 0.00014071211949882777, + "loss": 2.6228, + "step": 7394 + }, + { + "epoch": 0.596804132031313, + "grad_norm": 0.7527757883071899, + "learning_rate": 0.00014069769964728752, + "loss": 2.6793, + "step": 7395 + }, + { + "epoch": 0.5968848357678961, + "grad_norm": 0.7096899747848511, + "learning_rate": 0.00014068327878145423, + "loss": 2.5207, + "step": 7396 + }, + { + "epoch": 0.5969655395044791, + "grad_norm": 0.6863983869552612, + "learning_rate": 0.00014066885690168726, + "loss": 2.7059, + "step": 7397 + }, + { + "epoch": 0.597046243241062, + "grad_norm": 0.7782251834869385, + "learning_rate": 0.0001406544340083461, + "loss": 2.6232, + "step": 7398 + }, + { + "epoch": 0.597126946977645, + "grad_norm": 0.6944136619567871, + "learning_rate": 0.00014064001010179013, + "loss": 2.6134, + "step": 7399 + }, + { + "epoch": 0.5972076507142281, + "grad_norm": 0.7629704475402832, + "learning_rate": 0.00014062558518237892, + "loss": 2.5358, + "step": 7400 + }, + { + "epoch": 0.5972883544508111, + "grad_norm": 0.6922330260276794, + "learning_rate": 0.0001406111592504719, + "loss": 2.5457, + "step": 7401 + }, + { + "epoch": 0.597369058187394, + "grad_norm": 0.6992952227592468, + "learning_rate": 0.00014059673230642865, + "loss": 2.6241, + "step": 7402 + }, + { + "epoch": 0.597449761923977, + "grad_norm": 0.6587642431259155, + "learning_rate": 0.0001405823043506087, + "loss": 2.5867, + "step": 7403 + }, + { + "epoch": 0.5975304656605601, + "grad_norm": 0.6993013024330139, + "learning_rate": 0.00014056787538337164, + "loss": 2.6194, + "step": 7404 + }, + { + "epoch": 0.5976111693971431, + "grad_norm": 0.7605414986610413, + "learning_rate": 0.0001405534454050771, + "loss": 2.607, + "step": 7405 + }, + { + "epoch": 0.5976918731337261, + "grad_norm": 0.6624562740325928, + "learning_rate": 0.00014053901441608466, + "loss": 2.5962, + "step": 7406 + }, + { + "epoch": 0.597772576870309, + "grad_norm": 0.7432621717453003, + "learning_rate": 0.000140524582416754, + "loss": 2.6434, + "step": 7407 + }, + { + "epoch": 0.5978532806068921, + "grad_norm": 0.7184053659439087, + "learning_rate": 0.00014051014940744488, + "loss": 2.6139, + "step": 7408 + }, + { + "epoch": 0.5979339843434751, + "grad_norm": 0.7567455768585205, + "learning_rate": 0.00014049571538851687, + "loss": 2.5788, + "step": 7409 + }, + { + "epoch": 0.5980146880800581, + "grad_norm": 0.6759883761405945, + "learning_rate": 0.00014048128036032984, + "loss": 2.5584, + "step": 7410 + }, + { + "epoch": 0.5980953918166411, + "grad_norm": 0.7607424855232239, + "learning_rate": 0.00014046684432324343, + "loss": 2.5675, + "step": 7411 + }, + { + "epoch": 0.5981760955532242, + "grad_norm": 0.7134036421775818, + "learning_rate": 0.00014045240727761748, + "loss": 2.6805, + "step": 7412 + }, + { + "epoch": 0.5982567992898071, + "grad_norm": 0.6996984481811523, + "learning_rate": 0.00014043796922381184, + "loss": 2.5874, + "step": 7413 + }, + { + "epoch": 0.5983375030263901, + "grad_norm": 0.7098252177238464, + "learning_rate": 0.00014042353016218627, + "loss": 2.5895, + "step": 7414 + }, + { + "epoch": 0.5984182067629731, + "grad_norm": 0.7160520553588867, + "learning_rate": 0.00014040909009310068, + "loss": 2.6042, + "step": 7415 + }, + { + "epoch": 0.5984989104995562, + "grad_norm": 0.6727281212806702, + "learning_rate": 0.00014039464901691493, + "loss": 2.5356, + "step": 7416 + }, + { + "epoch": 0.5985796142361391, + "grad_norm": 0.7052881717681885, + "learning_rate": 0.00014038020693398891, + "loss": 2.6093, + "step": 7417 + }, + { + "epoch": 0.5986603179727221, + "grad_norm": 0.7151781916618347, + "learning_rate": 0.00014036576384468262, + "loss": 2.5776, + "step": 7418 + }, + { + "epoch": 0.5987410217093051, + "grad_norm": 0.7376574873924255, + "learning_rate": 0.0001403513197493559, + "loss": 2.6246, + "step": 7419 + }, + { + "epoch": 0.5988217254458882, + "grad_norm": 0.6882135272026062, + "learning_rate": 0.00014033687464836892, + "loss": 2.6028, + "step": 7420 + }, + { + "epoch": 0.5989024291824712, + "grad_norm": 0.6603999137878418, + "learning_rate": 0.00014032242854208153, + "loss": 2.5897, + "step": 7421 + }, + { + "epoch": 0.5989831329190541, + "grad_norm": 0.7001559734344482, + "learning_rate": 0.0001403079814308538, + "loss": 2.6033, + "step": 7422 + }, + { + "epoch": 0.5990638366556371, + "grad_norm": 0.7184363603591919, + "learning_rate": 0.00014029353331504582, + "loss": 2.7464, + "step": 7423 + }, + { + "epoch": 0.5991445403922202, + "grad_norm": 0.6794769167900085, + "learning_rate": 0.00014027908419501767, + "loss": 2.569, + "step": 7424 + }, + { + "epoch": 0.5992252441288032, + "grad_norm": 0.6846041083335876, + "learning_rate": 0.00014026463407112942, + "loss": 2.5995, + "step": 7425 + }, + { + "epoch": 0.5993059478653862, + "grad_norm": 0.6539658308029175, + "learning_rate": 0.00014025018294374129, + "loss": 2.5749, + "step": 7426 + }, + { + "epoch": 0.5993866516019691, + "grad_norm": 0.6572301983833313, + "learning_rate": 0.00014023573081321336, + "loss": 2.5312, + "step": 7427 + }, + { + "epoch": 0.5994673553385522, + "grad_norm": 0.7010765671730042, + "learning_rate": 0.00014022127767990581, + "loss": 2.5088, + "step": 7428 + }, + { + "epoch": 0.5995480590751352, + "grad_norm": 0.7193396091461182, + "learning_rate": 0.0001402068235441789, + "loss": 2.6193, + "step": 7429 + }, + { + "epoch": 0.5996287628117182, + "grad_norm": 0.6928533315658569, + "learning_rate": 0.00014019236840639288, + "loss": 2.6149, + "step": 7430 + }, + { + "epoch": 0.5997094665483012, + "grad_norm": 0.743658185005188, + "learning_rate": 0.00014017791226690794, + "loss": 2.5466, + "step": 7431 + }, + { + "epoch": 0.5997901702848842, + "grad_norm": 0.752082347869873, + "learning_rate": 0.0001401634551260844, + "loss": 2.6605, + "step": 7432 + }, + { + "epoch": 0.5998708740214672, + "grad_norm": 0.7280415296554565, + "learning_rate": 0.00014014899698428255, + "loss": 2.6128, + "step": 7433 + }, + { + "epoch": 0.5999515777580502, + "grad_norm": 0.7037710547447205, + "learning_rate": 0.0001401345378418628, + "loss": 2.6157, + "step": 7434 + }, + { + "epoch": 0.6000322814946332, + "grad_norm": 0.6984395980834961, + "learning_rate": 0.00014012007769918542, + "loss": 2.5579, + "step": 7435 + }, + { + "epoch": 0.6001129852312163, + "grad_norm": 0.6853601336479187, + "learning_rate": 0.00014010561655661085, + "loss": 2.6316, + "step": 7436 + }, + { + "epoch": 0.6001936889677992, + "grad_norm": 0.7551750540733337, + "learning_rate": 0.00014009115441449948, + "loss": 2.6671, + "step": 7437 + }, + { + "epoch": 0.6002743927043822, + "grad_norm": 0.7680155038833618, + "learning_rate": 0.0001400766912732117, + "loss": 2.6301, + "step": 7438 + }, + { + "epoch": 0.6003550964409652, + "grad_norm": 0.6757175922393799, + "learning_rate": 0.00014006222713310807, + "loss": 2.5584, + "step": 7439 + }, + { + "epoch": 0.6004358001775483, + "grad_norm": 0.6636163592338562, + "learning_rate": 0.00014004776199454897, + "loss": 2.5437, + "step": 7440 + }, + { + "epoch": 0.6005165039141312, + "grad_norm": 0.7317774891853333, + "learning_rate": 0.00014003329585789498, + "loss": 2.594, + "step": 7441 + }, + { + "epoch": 0.6005972076507142, + "grad_norm": 0.6903451681137085, + "learning_rate": 0.0001400188287235066, + "loss": 2.6175, + "step": 7442 + }, + { + "epoch": 0.6006779113872972, + "grad_norm": 0.7137858867645264, + "learning_rate": 0.00014000436059174437, + "loss": 2.6411, + "step": 7443 + }, + { + "epoch": 0.6007586151238803, + "grad_norm": 0.7124149203300476, + "learning_rate": 0.00013998989146296893, + "loss": 2.6562, + "step": 7444 + }, + { + "epoch": 0.6008393188604633, + "grad_norm": 0.7518175840377808, + "learning_rate": 0.00013997542133754087, + "loss": 2.6213, + "step": 7445 + }, + { + "epoch": 0.6009200225970462, + "grad_norm": 0.6843053698539734, + "learning_rate": 0.0001399609502158208, + "loss": 2.6099, + "step": 7446 + }, + { + "epoch": 0.6010007263336292, + "grad_norm": 0.6668025255203247, + "learning_rate": 0.0001399464780981694, + "loss": 2.609, + "step": 7447 + }, + { + "epoch": 0.6010814300702122, + "grad_norm": 0.6849119067192078, + "learning_rate": 0.00013993200498494735, + "loss": 2.6097, + "step": 7448 + }, + { + "epoch": 0.6011621338067953, + "grad_norm": 0.7767381072044373, + "learning_rate": 0.0001399175308765153, + "loss": 2.6351, + "step": 7449 + }, + { + "epoch": 0.6012428375433783, + "grad_norm": 0.6630256772041321, + "learning_rate": 0.0001399030557732341, + "loss": 2.5924, + "step": 7450 + }, + { + "epoch": 0.6013235412799612, + "grad_norm": 0.6918755769729614, + "learning_rate": 0.00013988857967546444, + "loss": 2.6205, + "step": 7451 + }, + { + "epoch": 0.6014042450165442, + "grad_norm": 0.7179181575775146, + "learning_rate": 0.00013987410258356708, + "loss": 2.5971, + "step": 7452 + }, + { + "epoch": 0.6014849487531273, + "grad_norm": 0.7233672738075256, + "learning_rate": 0.00013985962449790284, + "loss": 2.595, + "step": 7453 + }, + { + "epoch": 0.6015656524897103, + "grad_norm": 0.6861593127250671, + "learning_rate": 0.0001398451454188326, + "loss": 2.6127, + "step": 7454 + }, + { + "epoch": 0.6016463562262933, + "grad_norm": 0.6818981170654297, + "learning_rate": 0.00013983066534671714, + "loss": 2.5923, + "step": 7455 + }, + { + "epoch": 0.6017270599628762, + "grad_norm": 0.700036346912384, + "learning_rate": 0.0001398161842819174, + "loss": 2.5474, + "step": 7456 + }, + { + "epoch": 0.6018077636994593, + "grad_norm": 0.6884824633598328, + "learning_rate": 0.00013980170222479426, + "loss": 2.6041, + "step": 7457 + }, + { + "epoch": 0.6018884674360423, + "grad_norm": 0.6745120286941528, + "learning_rate": 0.00013978721917570866, + "loss": 2.6638, + "step": 7458 + }, + { + "epoch": 0.6019691711726253, + "grad_norm": 0.6886256337165833, + "learning_rate": 0.00013977273513502157, + "loss": 2.5733, + "step": 7459 + }, + { + "epoch": 0.6020498749092082, + "grad_norm": 0.7220930457115173, + "learning_rate": 0.00013975825010309394, + "loss": 2.5739, + "step": 7460 + }, + { + "epoch": 0.6021305786457913, + "grad_norm": 0.7281780242919922, + "learning_rate": 0.0001397437640802868, + "loss": 2.5646, + "step": 7461 + }, + { + "epoch": 0.6022112823823743, + "grad_norm": 0.7316896915435791, + "learning_rate": 0.00013972927706696115, + "loss": 2.6532, + "step": 7462 + }, + { + "epoch": 0.6022919861189573, + "grad_norm": 0.6288646459579468, + "learning_rate": 0.00013971478906347806, + "loss": 2.5753, + "step": 7463 + }, + { + "epoch": 0.6023726898555403, + "grad_norm": 0.7110145688056946, + "learning_rate": 0.00013970030007019862, + "loss": 2.6421, + "step": 7464 + }, + { + "epoch": 0.6024533935921234, + "grad_norm": 0.7437754273414612, + "learning_rate": 0.00013968581008748393, + "loss": 2.585, + "step": 7465 + }, + { + "epoch": 0.6025340973287063, + "grad_norm": 0.6839718222618103, + "learning_rate": 0.00013967131911569514, + "loss": 2.6249, + "step": 7466 + }, + { + "epoch": 0.6026148010652893, + "grad_norm": 0.7358397841453552, + "learning_rate": 0.00013965682715519332, + "loss": 2.597, + "step": 7467 + }, + { + "epoch": 0.6026955048018723, + "grad_norm": 0.673651397228241, + "learning_rate": 0.00013964233420633973, + "loss": 2.6111, + "step": 7468 + }, + { + "epoch": 0.6027762085384554, + "grad_norm": 0.7390083074569702, + "learning_rate": 0.00013962784026949553, + "loss": 2.6131, + "step": 7469 + }, + { + "epoch": 0.6028569122750383, + "grad_norm": 0.6902220249176025, + "learning_rate": 0.00013961334534502197, + "loss": 2.6116, + "step": 7470 + }, + { + "epoch": 0.6029376160116213, + "grad_norm": 0.6946651935577393, + "learning_rate": 0.00013959884943328033, + "loss": 2.6307, + "step": 7471 + }, + { + "epoch": 0.6030183197482043, + "grad_norm": 0.7277294993400574, + "learning_rate": 0.00013958435253463183, + "loss": 2.6065, + "step": 7472 + }, + { + "epoch": 0.6030990234847874, + "grad_norm": 0.743833601474762, + "learning_rate": 0.00013956985464943776, + "loss": 2.6644, + "step": 7473 + }, + { + "epoch": 0.6031797272213704, + "grad_norm": 0.6480288505554199, + "learning_rate": 0.0001395553557780595, + "loss": 2.5386, + "step": 7474 + }, + { + "epoch": 0.6032604309579533, + "grad_norm": 0.799443781375885, + "learning_rate": 0.00013954085592085834, + "loss": 2.5653, + "step": 7475 + }, + { + "epoch": 0.6033411346945363, + "grad_norm": 0.6790705323219299, + "learning_rate": 0.00013952635507819575, + "loss": 2.6229, + "step": 7476 + }, + { + "epoch": 0.6034218384311194, + "grad_norm": 0.6871588826179504, + "learning_rate": 0.00013951185325043302, + "loss": 2.6514, + "step": 7477 + }, + { + "epoch": 0.6035025421677024, + "grad_norm": 0.7236921787261963, + "learning_rate": 0.00013949735043793164, + "loss": 2.5931, + "step": 7478 + }, + { + "epoch": 0.6035832459042854, + "grad_norm": 0.6888518929481506, + "learning_rate": 0.00013948284664105305, + "loss": 2.6408, + "step": 7479 + }, + { + "epoch": 0.6036639496408683, + "grad_norm": 0.7292625904083252, + "learning_rate": 0.00013946834186015868, + "loss": 2.5829, + "step": 7480 + }, + { + "epoch": 0.6037446533774514, + "grad_norm": 0.6755293607711792, + "learning_rate": 0.00013945383609561009, + "loss": 2.5917, + "step": 7481 + }, + { + "epoch": 0.6038253571140344, + "grad_norm": 0.6808032989501953, + "learning_rate": 0.00013943932934776877, + "loss": 2.6103, + "step": 7482 + }, + { + "epoch": 0.6039060608506174, + "grad_norm": 0.747173547744751, + "learning_rate": 0.00013942482161699625, + "loss": 2.624, + "step": 7483 + }, + { + "epoch": 0.6039867645872004, + "grad_norm": 0.7265594005584717, + "learning_rate": 0.00013941031290365413, + "loss": 2.5672, + "step": 7484 + }, + { + "epoch": 0.6040674683237834, + "grad_norm": 0.6434060335159302, + "learning_rate": 0.000139395803208104, + "loss": 2.5885, + "step": 7485 + }, + { + "epoch": 0.6041481720603664, + "grad_norm": 0.7148730754852295, + "learning_rate": 0.00013938129253070747, + "loss": 2.6466, + "step": 7486 + }, + { + "epoch": 0.6042288757969494, + "grad_norm": 0.7724708318710327, + "learning_rate": 0.00013936678087182616, + "loss": 2.6364, + "step": 7487 + }, + { + "epoch": 0.6043095795335324, + "grad_norm": 0.6886702179908752, + "learning_rate": 0.0001393522682318218, + "loss": 2.5844, + "step": 7488 + }, + { + "epoch": 0.6043902832701155, + "grad_norm": 0.6501082181930542, + "learning_rate": 0.00013933775461105603, + "loss": 2.5767, + "step": 7489 + }, + { + "epoch": 0.6044709870066984, + "grad_norm": 0.7333959341049194, + "learning_rate": 0.00013932324000989058, + "loss": 2.5735, + "step": 7490 + }, + { + "epoch": 0.6045516907432814, + "grad_norm": 0.7057361602783203, + "learning_rate": 0.00013930872442868722, + "loss": 2.627, + "step": 7491 + }, + { + "epoch": 0.6046323944798644, + "grad_norm": 0.705078661441803, + "learning_rate": 0.00013929420786780767, + "loss": 2.6012, + "step": 7492 + }, + { + "epoch": 0.6047130982164475, + "grad_norm": 0.7192156314849854, + "learning_rate": 0.00013927969032761378, + "loss": 2.5594, + "step": 7493 + }, + { + "epoch": 0.6047938019530305, + "grad_norm": 0.703116774559021, + "learning_rate": 0.00013926517180846726, + "loss": 2.6099, + "step": 7494 + }, + { + "epoch": 0.6048745056896134, + "grad_norm": 0.6970264315605164, + "learning_rate": 0.00013925065231073006, + "loss": 2.5832, + "step": 7495 + }, + { + "epoch": 0.6049552094261964, + "grad_norm": 0.7308031320571899, + "learning_rate": 0.00013923613183476402, + "loss": 2.586, + "step": 7496 + }, + { + "epoch": 0.6050359131627794, + "grad_norm": 0.7212777137756348, + "learning_rate": 0.00013922161038093097, + "loss": 2.6374, + "step": 7497 + }, + { + "epoch": 0.6051166168993625, + "grad_norm": 0.6644641757011414, + "learning_rate": 0.0001392070879495929, + "loss": 2.5226, + "step": 7498 + }, + { + "epoch": 0.6051973206359454, + "grad_norm": 0.6683016419410706, + "learning_rate": 0.0001391925645411117, + "loss": 2.5279, + "step": 7499 + }, + { + "epoch": 0.6052780243725284, + "grad_norm": 0.7341439127922058, + "learning_rate": 0.00013917804015584932, + "loss": 2.5995, + "step": 7500 + }, + { + "epoch": 0.6053587281091114, + "grad_norm": 0.753942608833313, + "learning_rate": 0.0001391635147941678, + "loss": 2.5706, + "step": 7501 + }, + { + "epoch": 0.6054394318456945, + "grad_norm": 0.7541958093643188, + "learning_rate": 0.00013914898845642908, + "loss": 2.6365, + "step": 7502 + }, + { + "epoch": 0.6055201355822775, + "grad_norm": 0.6583349108695984, + "learning_rate": 0.00013913446114299528, + "loss": 2.534, + "step": 7503 + }, + { + "epoch": 0.6056008393188604, + "grad_norm": 0.6545756459236145, + "learning_rate": 0.00013911993285422835, + "loss": 2.5443, + "step": 7504 + }, + { + "epoch": 0.6056815430554434, + "grad_norm": 0.8290210366249084, + "learning_rate": 0.00013910540359049045, + "loss": 2.6196, + "step": 7505 + }, + { + "epoch": 0.6057622467920265, + "grad_norm": 0.7032577395439148, + "learning_rate": 0.0001390908733521437, + "loss": 2.6575, + "step": 7506 + }, + { + "epoch": 0.6058429505286095, + "grad_norm": 0.7018071413040161, + "learning_rate": 0.0001390763421395502, + "loss": 2.6272, + "step": 7507 + }, + { + "epoch": 0.6059236542651925, + "grad_norm": 0.6288552284240723, + "learning_rate": 0.00013906180995307206, + "loss": 2.5295, + "step": 7508 + }, + { + "epoch": 0.6060043580017754, + "grad_norm": 0.7013774514198303, + "learning_rate": 0.00013904727679307153, + "loss": 2.5669, + "step": 7509 + }, + { + "epoch": 0.6060850617383585, + "grad_norm": 0.6811630129814148, + "learning_rate": 0.00013903274265991082, + "loss": 2.5827, + "step": 7510 + }, + { + "epoch": 0.6061657654749415, + "grad_norm": 0.6690269112586975, + "learning_rate": 0.0001390182075539521, + "loss": 2.5947, + "step": 7511 + }, + { + "epoch": 0.6062464692115245, + "grad_norm": 0.6946289539337158, + "learning_rate": 0.00013900367147555768, + "loss": 2.59, + "step": 7512 + }, + { + "epoch": 0.6063271729481075, + "grad_norm": 0.7302843332290649, + "learning_rate": 0.0001389891344250898, + "loss": 2.5994, + "step": 7513 + }, + { + "epoch": 0.6064078766846905, + "grad_norm": 0.7462306022644043, + "learning_rate": 0.00013897459640291074, + "loss": 2.5983, + "step": 7514 + }, + { + "epoch": 0.6064885804212735, + "grad_norm": 0.6948123574256897, + "learning_rate": 0.0001389600574093829, + "loss": 2.5737, + "step": 7515 + }, + { + "epoch": 0.6065692841578565, + "grad_norm": 0.6897372007369995, + "learning_rate": 0.00013894551744486857, + "loss": 2.607, + "step": 7516 + }, + { + "epoch": 0.6066499878944395, + "grad_norm": 0.6808069348335266, + "learning_rate": 0.00013893097650973015, + "loss": 2.5712, + "step": 7517 + }, + { + "epoch": 0.6067306916310226, + "grad_norm": 0.7000731229782104, + "learning_rate": 0.00013891643460433, + "loss": 2.5654, + "step": 7518 + }, + { + "epoch": 0.6068113953676055, + "grad_norm": 0.7197545766830444, + "learning_rate": 0.0001389018917290306, + "loss": 2.5705, + "step": 7519 + }, + { + "epoch": 0.6068920991041885, + "grad_norm": 0.7001069188117981, + "learning_rate": 0.00013888734788419433, + "loss": 2.5934, + "step": 7520 + }, + { + "epoch": 0.6069728028407715, + "grad_norm": 0.7480459213256836, + "learning_rate": 0.00013887280307018377, + "loss": 2.5211, + "step": 7521 + }, + { + "epoch": 0.6070535065773546, + "grad_norm": 0.6913945078849792, + "learning_rate": 0.00013885825728736132, + "loss": 2.6013, + "step": 7522 + }, + { + "epoch": 0.6071342103139376, + "grad_norm": 0.6527336239814758, + "learning_rate": 0.00013884371053608948, + "loss": 2.5901, + "step": 7523 + }, + { + "epoch": 0.6072149140505205, + "grad_norm": 0.6897335052490234, + "learning_rate": 0.00013882916281673086, + "loss": 2.5389, + "step": 7524 + }, + { + "epoch": 0.6072956177871035, + "grad_norm": 0.7159501910209656, + "learning_rate": 0.00013881461412964798, + "loss": 2.5399, + "step": 7525 + }, + { + "epoch": 0.6073763215236866, + "grad_norm": 0.6744364500045776, + "learning_rate": 0.00013880006447520346, + "loss": 2.5658, + "step": 7526 + }, + { + "epoch": 0.6074570252602696, + "grad_norm": 0.819950520992279, + "learning_rate": 0.00013878551385375994, + "loss": 2.6143, + "step": 7527 + }, + { + "epoch": 0.6075377289968525, + "grad_norm": 0.744293212890625, + "learning_rate": 0.00013877096226568, + "loss": 2.6565, + "step": 7528 + }, + { + "epoch": 0.6076184327334355, + "grad_norm": 0.7121254205703735, + "learning_rate": 0.00013875640971132636, + "loss": 2.6151, + "step": 7529 + }, + { + "epoch": 0.6076991364700186, + "grad_norm": 0.7616204023361206, + "learning_rate": 0.00013874185619106163, + "loss": 2.6395, + "step": 7530 + }, + { + "epoch": 0.6077798402066016, + "grad_norm": 0.7481076121330261, + "learning_rate": 0.0001387273017052486, + "loss": 2.597, + "step": 7531 + }, + { + "epoch": 0.6078605439431846, + "grad_norm": 0.6660816073417664, + "learning_rate": 0.00013871274625425, + "loss": 2.5696, + "step": 7532 + }, + { + "epoch": 0.6079412476797675, + "grad_norm": 0.7491411566734314, + "learning_rate": 0.00013869818983842854, + "loss": 2.552, + "step": 7533 + }, + { + "epoch": 0.6080219514163506, + "grad_norm": 0.7130792140960693, + "learning_rate": 0.00013868363245814704, + "loss": 2.5959, + "step": 7534 + }, + { + "epoch": 0.6081026551529336, + "grad_norm": 0.7157341241836548, + "learning_rate": 0.00013866907411376827, + "loss": 2.5598, + "step": 7535 + }, + { + "epoch": 0.6081833588895166, + "grad_norm": 0.7750656008720398, + "learning_rate": 0.00013865451480565513, + "loss": 2.6217, + "step": 7536 + }, + { + "epoch": 0.6082640626260996, + "grad_norm": 0.6915080547332764, + "learning_rate": 0.00013863995453417043, + "loss": 2.6211, + "step": 7537 + }, + { + "epoch": 0.6083447663626826, + "grad_norm": 0.7245940566062927, + "learning_rate": 0.00013862539329967706, + "loss": 2.5619, + "step": 7538 + }, + { + "epoch": 0.6084254700992656, + "grad_norm": 0.8884119391441345, + "learning_rate": 0.0001386108311025379, + "loss": 2.6349, + "step": 7539 + }, + { + "epoch": 0.6085061738358486, + "grad_norm": 0.7889477610588074, + "learning_rate": 0.0001385962679431159, + "loss": 2.6169, + "step": 7540 + }, + { + "epoch": 0.6085868775724316, + "grad_norm": 0.7187505960464478, + "learning_rate": 0.00013858170382177403, + "loss": 2.5582, + "step": 7541 + }, + { + "epoch": 0.6086675813090147, + "grad_norm": 0.7502198219299316, + "learning_rate": 0.00013856713873887526, + "loss": 2.5418, + "step": 7542 + }, + { + "epoch": 0.6087482850455976, + "grad_norm": 0.797704815864563, + "learning_rate": 0.00013855257269478256, + "loss": 2.5764, + "step": 7543 + }, + { + "epoch": 0.6088289887821806, + "grad_norm": 0.7651431560516357, + "learning_rate": 0.00013853800568985896, + "loss": 2.5995, + "step": 7544 + }, + { + "epoch": 0.6089096925187636, + "grad_norm": 0.7048482298851013, + "learning_rate": 0.00013852343772446753, + "loss": 2.5656, + "step": 7545 + }, + { + "epoch": 0.6089903962553467, + "grad_norm": 0.7252251505851746, + "learning_rate": 0.00013850886879897135, + "loss": 2.6509, + "step": 7546 + }, + { + "epoch": 0.6090710999919297, + "grad_norm": 0.7220067381858826, + "learning_rate": 0.00013849429891373344, + "loss": 2.5558, + "step": 7547 + }, + { + "epoch": 0.6091518037285126, + "grad_norm": 0.7672600746154785, + "learning_rate": 0.000138479728069117, + "loss": 2.5682, + "step": 7548 + }, + { + "epoch": 0.6092325074650956, + "grad_norm": 0.7753601670265198, + "learning_rate": 0.0001384651562654852, + "loss": 2.6459, + "step": 7549 + }, + { + "epoch": 0.6093132112016786, + "grad_norm": 0.7346559166908264, + "learning_rate": 0.00013845058350320108, + "loss": 2.5988, + "step": 7550 + }, + { + "epoch": 0.6093939149382617, + "grad_norm": 0.7386072874069214, + "learning_rate": 0.00013843600978262797, + "loss": 2.6366, + "step": 7551 + }, + { + "epoch": 0.6094746186748446, + "grad_norm": 0.7114188075065613, + "learning_rate": 0.00013842143510412898, + "loss": 2.5515, + "step": 7552 + }, + { + "epoch": 0.6095553224114276, + "grad_norm": 0.6836373209953308, + "learning_rate": 0.00013840685946806742, + "loss": 2.6301, + "step": 7553 + }, + { + "epoch": 0.6096360261480106, + "grad_norm": 0.7548927068710327, + "learning_rate": 0.00013839228287480652, + "loss": 2.6508, + "step": 7554 + }, + { + "epoch": 0.6097167298845937, + "grad_norm": 0.6931679248809814, + "learning_rate": 0.00013837770532470957, + "loss": 2.5535, + "step": 7555 + }, + { + "epoch": 0.6097974336211767, + "grad_norm": 0.7621145248413086, + "learning_rate": 0.00013836312681813988, + "loss": 2.6831, + "step": 7556 + }, + { + "epoch": 0.6098781373577596, + "grad_norm": 0.6735427975654602, + "learning_rate": 0.00013834854735546079, + "loss": 2.5338, + "step": 7557 + }, + { + "epoch": 0.6099588410943426, + "grad_norm": 0.7157600522041321, + "learning_rate": 0.00013833396693703565, + "loss": 2.5713, + "step": 7558 + }, + { + "epoch": 0.6100395448309257, + "grad_norm": 0.718032956123352, + "learning_rate": 0.00013831938556322789, + "loss": 2.5625, + "step": 7559 + }, + { + "epoch": 0.6101202485675087, + "grad_norm": 0.7290309071540833, + "learning_rate": 0.0001383048032344008, + "loss": 2.5956, + "step": 7560 + }, + { + "epoch": 0.6102009523040917, + "grad_norm": 0.675470769405365, + "learning_rate": 0.00013829021995091792, + "loss": 2.6053, + "step": 7561 + }, + { + "epoch": 0.6102816560406746, + "grad_norm": 0.7348767518997192, + "learning_rate": 0.00013827563571314268, + "loss": 2.6174, + "step": 7562 + }, + { + "epoch": 0.6103623597772577, + "grad_norm": 0.64495849609375, + "learning_rate": 0.00013826105052143852, + "loss": 2.5923, + "step": 7563 + }, + { + "epoch": 0.6104430635138407, + "grad_norm": 0.7379264235496521, + "learning_rate": 0.000138246464376169, + "loss": 2.6438, + "step": 7564 + }, + { + "epoch": 0.6105237672504237, + "grad_norm": 0.7802134156227112, + "learning_rate": 0.00013823187727769756, + "loss": 2.5884, + "step": 7565 + }, + { + "epoch": 0.6106044709870067, + "grad_norm": 0.6907222867012024, + "learning_rate": 0.00013821728922638782, + "loss": 2.596, + "step": 7566 + }, + { + "epoch": 0.6106851747235897, + "grad_norm": 0.6924182176589966, + "learning_rate": 0.00013820270022260335, + "loss": 2.5631, + "step": 7567 + }, + { + "epoch": 0.6107658784601727, + "grad_norm": 0.729258120059967, + "learning_rate": 0.0001381881102667077, + "loss": 2.5761, + "step": 7568 + }, + { + "epoch": 0.6108465821967557, + "grad_norm": 0.7141425013542175, + "learning_rate": 0.00013817351935906455, + "loss": 2.6214, + "step": 7569 + }, + { + "epoch": 0.6109272859333387, + "grad_norm": 0.7564505338668823, + "learning_rate": 0.00013815892750003748, + "loss": 2.6338, + "step": 7570 + }, + { + "epoch": 0.6110079896699218, + "grad_norm": 0.674705982208252, + "learning_rate": 0.00013814433468999022, + "loss": 2.5604, + "step": 7571 + }, + { + "epoch": 0.6110886934065047, + "grad_norm": 0.6956657767295837, + "learning_rate": 0.00013812974092928642, + "loss": 2.5805, + "step": 7572 + }, + { + "epoch": 0.6111693971430877, + "grad_norm": 0.7393823862075806, + "learning_rate": 0.0001381151462182898, + "loss": 2.6312, + "step": 7573 + }, + { + "epoch": 0.6112501008796707, + "grad_norm": 0.7048184275627136, + "learning_rate": 0.00013810055055736407, + "loss": 2.5948, + "step": 7574 + }, + { + "epoch": 0.6113308046162538, + "grad_norm": 0.748798668384552, + "learning_rate": 0.0001380859539468731, + "loss": 2.5815, + "step": 7575 + }, + { + "epoch": 0.6114115083528368, + "grad_norm": 0.7146531343460083, + "learning_rate": 0.00013807135638718048, + "loss": 2.5803, + "step": 7576 + }, + { + "epoch": 0.6114922120894197, + "grad_norm": 0.6883770823478699, + "learning_rate": 0.00013805675787865025, + "loss": 2.6005, + "step": 7577 + }, + { + "epoch": 0.6115729158260027, + "grad_norm": 0.7808375358581543, + "learning_rate": 0.0001380421584216461, + "loss": 2.6539, + "step": 7578 + }, + { + "epoch": 0.6116536195625858, + "grad_norm": 0.6919417977333069, + "learning_rate": 0.00013802755801653192, + "loss": 2.5812, + "step": 7579 + }, + { + "epoch": 0.6117343232991688, + "grad_norm": 0.6651085615158081, + "learning_rate": 0.0001380129566636716, + "loss": 2.5952, + "step": 7580 + }, + { + "epoch": 0.6118150270357517, + "grad_norm": 0.7806586623191833, + "learning_rate": 0.00013799835436342897, + "loss": 2.6509, + "step": 7581 + }, + { + "epoch": 0.6118957307723347, + "grad_norm": 0.6522969007492065, + "learning_rate": 0.0001379837511161681, + "loss": 2.606, + "step": 7582 + }, + { + "epoch": 0.6119764345089178, + "grad_norm": 0.7566540837287903, + "learning_rate": 0.0001379691469222528, + "loss": 2.6625, + "step": 7583 + }, + { + "epoch": 0.6120571382455008, + "grad_norm": 0.7126421928405762, + "learning_rate": 0.00013795454178204715, + "loss": 2.6396, + "step": 7584 + }, + { + "epoch": 0.6121378419820838, + "grad_norm": 0.6534276008605957, + "learning_rate": 0.0001379399356959151, + "loss": 2.5841, + "step": 7585 + }, + { + "epoch": 0.6122185457186667, + "grad_norm": 0.7663385272026062, + "learning_rate": 0.00013792532866422065, + "loss": 2.6685, + "step": 7586 + }, + { + "epoch": 0.6122992494552498, + "grad_norm": 0.6971656084060669, + "learning_rate": 0.0001379107206873279, + "loss": 2.6036, + "step": 7587 + }, + { + "epoch": 0.6123799531918328, + "grad_norm": 0.6807122230529785, + "learning_rate": 0.00013789611176560088, + "loss": 2.6499, + "step": 7588 + }, + { + "epoch": 0.6124606569284158, + "grad_norm": 0.6712431311607361, + "learning_rate": 0.0001378815018994037, + "loss": 2.6725, + "step": 7589 + }, + { + "epoch": 0.6125413606649988, + "grad_norm": 0.6986604928970337, + "learning_rate": 0.00013786689108910045, + "loss": 2.6159, + "step": 7590 + }, + { + "epoch": 0.6126220644015818, + "grad_norm": 0.7004108428955078, + "learning_rate": 0.0001378522793350553, + "loss": 2.5743, + "step": 7591 + }, + { + "epoch": 0.6127027681381648, + "grad_norm": 0.6782098412513733, + "learning_rate": 0.00013783766663763239, + "loss": 2.5776, + "step": 7592 + }, + { + "epoch": 0.6127834718747478, + "grad_norm": 0.6697036027908325, + "learning_rate": 0.00013782305299719593, + "loss": 2.6195, + "step": 7593 + }, + { + "epoch": 0.6128641756113308, + "grad_norm": 0.6894395351409912, + "learning_rate": 0.00013780843841411014, + "loss": 2.662, + "step": 7594 + }, + { + "epoch": 0.6129448793479139, + "grad_norm": 0.6775636672973633, + "learning_rate": 0.00013779382288873918, + "loss": 2.6083, + "step": 7595 + }, + { + "epoch": 0.6130255830844968, + "grad_norm": 0.7143577337265015, + "learning_rate": 0.00013777920642144738, + "loss": 2.581, + "step": 7596 + }, + { + "epoch": 0.6131062868210798, + "grad_norm": 0.6143797636032104, + "learning_rate": 0.00013776458901259905, + "loss": 2.541, + "step": 7597 + }, + { + "epoch": 0.6131869905576628, + "grad_norm": 0.7003727555274963, + "learning_rate": 0.00013774997066255839, + "loss": 2.5748, + "step": 7598 + }, + { + "epoch": 0.6132676942942458, + "grad_norm": 0.6796504259109497, + "learning_rate": 0.0001377353513716898, + "loss": 2.596, + "step": 7599 + }, + { + "epoch": 0.6133483980308289, + "grad_norm": 0.7011274695396423, + "learning_rate": 0.00013772073114035762, + "loss": 2.5318, + "step": 7600 + }, + { + "epoch": 0.6134291017674118, + "grad_norm": 0.6584382057189941, + "learning_rate": 0.0001377061099689262, + "loss": 2.5793, + "step": 7601 + }, + { + "epoch": 0.6135098055039948, + "grad_norm": 0.6586211919784546, + "learning_rate": 0.00013769148785775995, + "loss": 2.5969, + "step": 7602 + }, + { + "epoch": 0.6135905092405778, + "grad_norm": 0.7187132835388184, + "learning_rate": 0.0001376768648072233, + "loss": 2.6407, + "step": 7603 + }, + { + "epoch": 0.6136712129771609, + "grad_norm": 0.7394679188728333, + "learning_rate": 0.00013766224081768072, + "loss": 2.5959, + "step": 7604 + }, + { + "epoch": 0.6137519167137439, + "grad_norm": 0.6802375912666321, + "learning_rate": 0.00013764761588949665, + "loss": 2.5956, + "step": 7605 + }, + { + "epoch": 0.6138326204503268, + "grad_norm": 0.6949049234390259, + "learning_rate": 0.00013763299002303553, + "loss": 2.556, + "step": 7606 + }, + { + "epoch": 0.6139133241869098, + "grad_norm": 0.7406589388847351, + "learning_rate": 0.00013761836321866196, + "loss": 2.5495, + "step": 7607 + }, + { + "epoch": 0.6139940279234929, + "grad_norm": 0.742499053478241, + "learning_rate": 0.0001376037354767404, + "loss": 2.589, + "step": 7608 + }, + { + "epoch": 0.6140747316600759, + "grad_norm": 0.7669157385826111, + "learning_rate": 0.00013758910679763551, + "loss": 2.576, + "step": 7609 + }, + { + "epoch": 0.6141554353966588, + "grad_norm": 0.6506752967834473, + "learning_rate": 0.00013757447718171182, + "loss": 2.5792, + "step": 7610 + }, + { + "epoch": 0.6142361391332418, + "grad_norm": 0.698514461517334, + "learning_rate": 0.00013755984662933393, + "loss": 2.5809, + "step": 7611 + }, + { + "epoch": 0.6143168428698249, + "grad_norm": 0.6541082262992859, + "learning_rate": 0.00013754521514086645, + "loss": 2.5755, + "step": 7612 + }, + { + "epoch": 0.6143975466064079, + "grad_norm": 0.6619362235069275, + "learning_rate": 0.0001375305827166741, + "loss": 2.5886, + "step": 7613 + }, + { + "epoch": 0.6144782503429909, + "grad_norm": 0.7205569744110107, + "learning_rate": 0.00013751594935712148, + "loss": 2.6293, + "step": 7614 + }, + { + "epoch": 0.6145589540795738, + "grad_norm": 0.7382494211196899, + "learning_rate": 0.00013750131506257339, + "loss": 2.6977, + "step": 7615 + }, + { + "epoch": 0.6146396578161569, + "grad_norm": 0.7492627501487732, + "learning_rate": 0.00013748667983339444, + "loss": 2.6492, + "step": 7616 + }, + { + "epoch": 0.6147203615527399, + "grad_norm": 0.6627328991889954, + "learning_rate": 0.00013747204366994947, + "loss": 2.5458, + "step": 7617 + }, + { + "epoch": 0.6148010652893229, + "grad_norm": 0.7039626836776733, + "learning_rate": 0.00013745740657260323, + "loss": 2.6578, + "step": 7618 + }, + { + "epoch": 0.6148817690259059, + "grad_norm": 0.6999295353889465, + "learning_rate": 0.00013744276854172046, + "loss": 2.6189, + "step": 7619 + }, + { + "epoch": 0.6149624727624889, + "grad_norm": 0.7604365348815918, + "learning_rate": 0.00013742812957766607, + "loss": 2.5344, + "step": 7620 + }, + { + "epoch": 0.6150431764990719, + "grad_norm": 0.6860831379890442, + "learning_rate": 0.0001374134896808048, + "loss": 2.6309, + "step": 7621 + }, + { + "epoch": 0.6151238802356549, + "grad_norm": 0.6628854274749756, + "learning_rate": 0.0001373988488515016, + "loss": 2.6339, + "step": 7622 + }, + { + "epoch": 0.6152045839722379, + "grad_norm": 0.7112562656402588, + "learning_rate": 0.00013738420709012134, + "loss": 2.6064, + "step": 7623 + }, + { + "epoch": 0.615285287708821, + "grad_norm": 0.7068392634391785, + "learning_rate": 0.0001373695643970289, + "loss": 2.624, + "step": 7624 + }, + { + "epoch": 0.6153659914454039, + "grad_norm": 0.6534786224365234, + "learning_rate": 0.00013735492077258924, + "loss": 2.5582, + "step": 7625 + }, + { + "epoch": 0.6154466951819869, + "grad_norm": 0.7433418035507202, + "learning_rate": 0.00013734027621716729, + "loss": 2.5803, + "step": 7626 + }, + { + "epoch": 0.6155273989185699, + "grad_norm": 0.7172532081604004, + "learning_rate": 0.00013732563073112804, + "loss": 2.5906, + "step": 7627 + }, + { + "epoch": 0.615608102655153, + "grad_norm": 0.6712297201156616, + "learning_rate": 0.00013731098431483653, + "loss": 2.5597, + "step": 7628 + }, + { + "epoch": 0.615688806391736, + "grad_norm": 0.7079061269760132, + "learning_rate": 0.00013729633696865775, + "loss": 2.5538, + "step": 7629 + }, + { + "epoch": 0.6157695101283189, + "grad_norm": 0.6968971490859985, + "learning_rate": 0.00013728168869295678, + "loss": 2.6429, + "step": 7630 + }, + { + "epoch": 0.6158502138649019, + "grad_norm": 0.7123236060142517, + "learning_rate": 0.00013726703948809864, + "loss": 2.5607, + "step": 7631 + }, + { + "epoch": 0.615930917601485, + "grad_norm": 0.6441208124160767, + "learning_rate": 0.00013725238935444843, + "loss": 2.6176, + "step": 7632 + }, + { + "epoch": 0.616011621338068, + "grad_norm": 0.7145917415618896, + "learning_rate": 0.00013723773829237137, + "loss": 2.5698, + "step": 7633 + }, + { + "epoch": 0.616092325074651, + "grad_norm": 0.6397334337234497, + "learning_rate": 0.00013722308630223252, + "loss": 2.596, + "step": 7634 + }, + { + "epoch": 0.6161730288112339, + "grad_norm": 0.6372843980789185, + "learning_rate": 0.00013720843338439702, + "loss": 2.5679, + "step": 7635 + }, + { + "epoch": 0.616253732547817, + "grad_norm": 0.707842230796814, + "learning_rate": 0.00013719377953923012, + "loss": 2.6296, + "step": 7636 + }, + { + "epoch": 0.6163344362844, + "grad_norm": 0.6629409193992615, + "learning_rate": 0.000137179124767097, + "loss": 2.542, + "step": 7637 + }, + { + "epoch": 0.616415140020983, + "grad_norm": 0.753646194934845, + "learning_rate": 0.00013716446906836288, + "loss": 2.5741, + "step": 7638 + }, + { + "epoch": 0.6164958437575659, + "grad_norm": 0.6409948468208313, + "learning_rate": 0.0001371498124433931, + "loss": 2.6723, + "step": 7639 + }, + { + "epoch": 0.616576547494149, + "grad_norm": 0.6489264965057373, + "learning_rate": 0.0001371351548925528, + "loss": 2.5806, + "step": 7640 + }, + { + "epoch": 0.616657251230732, + "grad_norm": 0.6857934594154358, + "learning_rate": 0.00013712049641620745, + "loss": 2.6406, + "step": 7641 + }, + { + "epoch": 0.616737954967315, + "grad_norm": 0.6754183769226074, + "learning_rate": 0.00013710583701472226, + "loss": 2.5576, + "step": 7642 + }, + { + "epoch": 0.616818658703898, + "grad_norm": 0.7083800435066223, + "learning_rate": 0.0001370911766884626, + "loss": 2.5747, + "step": 7643 + }, + { + "epoch": 0.616899362440481, + "grad_norm": 0.7281948924064636, + "learning_rate": 0.0001370765154377939, + "loss": 2.5627, + "step": 7644 + }, + { + "epoch": 0.616980066177064, + "grad_norm": 0.655414342880249, + "learning_rate": 0.00013706185326308148, + "loss": 2.5897, + "step": 7645 + }, + { + "epoch": 0.617060769913647, + "grad_norm": 0.6771859526634216, + "learning_rate": 0.0001370471901646908, + "loss": 2.5761, + "step": 7646 + }, + { + "epoch": 0.61714147365023, + "grad_norm": 0.6813557147979736, + "learning_rate": 0.00013703252614298732, + "loss": 2.5807, + "step": 7647 + }, + { + "epoch": 0.6172221773868131, + "grad_norm": 0.6948046684265137, + "learning_rate": 0.00013701786119833646, + "loss": 2.586, + "step": 7648 + }, + { + "epoch": 0.617302881123396, + "grad_norm": 0.643455982208252, + "learning_rate": 0.00013700319533110377, + "loss": 2.592, + "step": 7649 + }, + { + "epoch": 0.617383584859979, + "grad_norm": 0.7292457818984985, + "learning_rate": 0.0001369885285416547, + "loss": 2.6396, + "step": 7650 + }, + { + "epoch": 0.617464288596562, + "grad_norm": 0.642902672290802, + "learning_rate": 0.00013697386083035478, + "loss": 2.6115, + "step": 7651 + }, + { + "epoch": 0.617544992333145, + "grad_norm": 0.6536445021629333, + "learning_rate": 0.00013695919219756966, + "loss": 2.5406, + "step": 7652 + }, + { + "epoch": 0.6176256960697281, + "grad_norm": 0.6643723249435425, + "learning_rate": 0.0001369445226436648, + "loss": 2.6188, + "step": 7653 + }, + { + "epoch": 0.617706399806311, + "grad_norm": 0.6481621265411377, + "learning_rate": 0.00013692985216900592, + "loss": 2.5489, + "step": 7654 + }, + { + "epoch": 0.617787103542894, + "grad_norm": 0.6828036904335022, + "learning_rate": 0.00013691518077395856, + "loss": 2.5114, + "step": 7655 + }, + { + "epoch": 0.617867807279477, + "grad_norm": 0.6802895665168762, + "learning_rate": 0.00013690050845888838, + "loss": 2.5973, + "step": 7656 + }, + { + "epoch": 0.6179485110160601, + "grad_norm": 0.6980829238891602, + "learning_rate": 0.00013688583522416107, + "loss": 2.6032, + "step": 7657 + }, + { + "epoch": 0.618029214752643, + "grad_norm": 0.7157626748085022, + "learning_rate": 0.00013687116107014236, + "loss": 2.5552, + "step": 7658 + }, + { + "epoch": 0.618109918489226, + "grad_norm": 0.69700688123703, + "learning_rate": 0.00013685648599719792, + "loss": 2.5988, + "step": 7659 + }, + { + "epoch": 0.618190622225809, + "grad_norm": 0.6859539151191711, + "learning_rate": 0.0001368418100056935, + "loss": 2.6268, + "step": 7660 + }, + { + "epoch": 0.6182713259623921, + "grad_norm": 0.6812828183174133, + "learning_rate": 0.00013682713309599487, + "loss": 2.6002, + "step": 7661 + }, + { + "epoch": 0.6183520296989751, + "grad_norm": 0.6461766362190247, + "learning_rate": 0.00013681245526846783, + "loss": 2.6064, + "step": 7662 + }, + { + "epoch": 0.618432733435558, + "grad_norm": 0.7198306322097778, + "learning_rate": 0.00013679777652347814, + "loss": 2.6012, + "step": 7663 + }, + { + "epoch": 0.618513437172141, + "grad_norm": 0.7367191910743713, + "learning_rate": 0.00013678309686139168, + "loss": 2.6661, + "step": 7664 + }, + { + "epoch": 0.6185941409087241, + "grad_norm": 0.6975768804550171, + "learning_rate": 0.0001367684162825743, + "loss": 2.6394, + "step": 7665 + }, + { + "epoch": 0.6186748446453071, + "grad_norm": 0.7545140385627747, + "learning_rate": 0.0001367537347873919, + "loss": 2.624, + "step": 7666 + }, + { + "epoch": 0.6187555483818901, + "grad_norm": 0.6683520674705505, + "learning_rate": 0.0001367390523762103, + "loss": 2.6345, + "step": 7667 + }, + { + "epoch": 0.618836252118473, + "grad_norm": 0.6964975595474243, + "learning_rate": 0.00013672436904939552, + "loss": 2.591, + "step": 7668 + }, + { + "epoch": 0.6189169558550561, + "grad_norm": 0.7033975124359131, + "learning_rate": 0.00013670968480731344, + "loss": 2.566, + "step": 7669 + }, + { + "epoch": 0.6189976595916391, + "grad_norm": 0.706136167049408, + "learning_rate": 0.00013669499965033007, + "loss": 2.6073, + "step": 7670 + }, + { + "epoch": 0.6190783633282221, + "grad_norm": 0.7146300673484802, + "learning_rate": 0.0001366803135788114, + "loss": 2.6602, + "step": 7671 + }, + { + "epoch": 0.6191590670648051, + "grad_norm": 0.7603063583374023, + "learning_rate": 0.00013666562659312342, + "loss": 2.5286, + "step": 7672 + }, + { + "epoch": 0.6192397708013881, + "grad_norm": 0.744955837726593, + "learning_rate": 0.00013665093869363217, + "loss": 2.5678, + "step": 7673 + }, + { + "epoch": 0.6193204745379711, + "grad_norm": 0.7548620104789734, + "learning_rate": 0.00013663624988070373, + "loss": 2.6081, + "step": 7674 + }, + { + "epoch": 0.6194011782745541, + "grad_norm": 0.7367276549339294, + "learning_rate": 0.0001366215601547042, + "loss": 2.5559, + "step": 7675 + }, + { + "epoch": 0.6194818820111371, + "grad_norm": 0.7243839502334595, + "learning_rate": 0.00013660686951599962, + "loss": 2.5545, + "step": 7676 + }, + { + "epoch": 0.6195625857477202, + "grad_norm": 0.7595756649971008, + "learning_rate": 0.00013659217796495616, + "loss": 2.6547, + "step": 7677 + }, + { + "epoch": 0.6196432894843031, + "grad_norm": 0.7566717863082886, + "learning_rate": 0.00013657748550193998, + "loss": 2.6521, + "step": 7678 + }, + { + "epoch": 0.6197239932208861, + "grad_norm": 0.8441942930221558, + "learning_rate": 0.00013656279212731728, + "loss": 2.6325, + "step": 7679 + }, + { + "epoch": 0.6198046969574691, + "grad_norm": 0.7481170296669006, + "learning_rate": 0.00013654809784145418, + "loss": 2.6037, + "step": 7680 + }, + { + "epoch": 0.6198854006940522, + "grad_norm": 0.6626241207122803, + "learning_rate": 0.00013653340264471695, + "loss": 2.6028, + "step": 7681 + }, + { + "epoch": 0.6199661044306352, + "grad_norm": 0.7658020853996277, + "learning_rate": 0.00013651870653747186, + "loss": 2.5553, + "step": 7682 + }, + { + "epoch": 0.6200468081672181, + "grad_norm": 0.8218126893043518, + "learning_rate": 0.0001365040095200851, + "loss": 2.5661, + "step": 7683 + }, + { + "epoch": 0.6201275119038011, + "grad_norm": 0.6481068134307861, + "learning_rate": 0.00013648931159292304, + "loss": 2.5675, + "step": 7684 + }, + { + "epoch": 0.6202082156403842, + "grad_norm": 0.7529950141906738, + "learning_rate": 0.0001364746127563519, + "loss": 2.6137, + "step": 7685 + }, + { + "epoch": 0.6202889193769672, + "grad_norm": 0.7133232355117798, + "learning_rate": 0.00013645991301073816, + "loss": 2.6004, + "step": 7686 + }, + { + "epoch": 0.6203696231135502, + "grad_norm": 0.7809340953826904, + "learning_rate": 0.000136445212356448, + "loss": 2.6317, + "step": 7687 + }, + { + "epoch": 0.6204503268501331, + "grad_norm": 0.7106895446777344, + "learning_rate": 0.00013643051079384789, + "loss": 2.6086, + "step": 7688 + }, + { + "epoch": 0.6205310305867162, + "grad_norm": 0.6960744261741638, + "learning_rate": 0.00013641580832330423, + "loss": 2.5554, + "step": 7689 + }, + { + "epoch": 0.6206117343232992, + "grad_norm": 0.7078820466995239, + "learning_rate": 0.00013640110494518343, + "loss": 2.5902, + "step": 7690 + }, + { + "epoch": 0.6206924380598822, + "grad_norm": 0.7150746583938599, + "learning_rate": 0.00013638640065985195, + "loss": 2.5947, + "step": 7691 + }, + { + "epoch": 0.6207731417964651, + "grad_norm": 0.7507869601249695, + "learning_rate": 0.00013637169546767625, + "loss": 2.559, + "step": 7692 + }, + { + "epoch": 0.6208538455330482, + "grad_norm": 0.7453179359436035, + "learning_rate": 0.00013635698936902282, + "loss": 2.5612, + "step": 7693 + }, + { + "epoch": 0.6209345492696312, + "grad_norm": 0.7174177765846252, + "learning_rate": 0.00013634228236425816, + "loss": 2.6221, + "step": 7694 + }, + { + "epoch": 0.6210152530062142, + "grad_norm": 0.7394092679023743, + "learning_rate": 0.00013632757445374884, + "loss": 2.6045, + "step": 7695 + }, + { + "epoch": 0.6210959567427972, + "grad_norm": 0.7346367239952087, + "learning_rate": 0.0001363128656378614, + "loss": 2.677, + "step": 7696 + }, + { + "epoch": 0.6211766604793802, + "grad_norm": 0.6697696447372437, + "learning_rate": 0.00013629815591696245, + "loss": 2.5741, + "step": 7697 + }, + { + "epoch": 0.6212573642159632, + "grad_norm": 0.6993793845176697, + "learning_rate": 0.00013628344529141852, + "loss": 2.5206, + "step": 7698 + }, + { + "epoch": 0.6213380679525462, + "grad_norm": 0.6946697235107422, + "learning_rate": 0.00013626873376159631, + "loss": 2.6046, + "step": 7699 + }, + { + "epoch": 0.6214187716891292, + "grad_norm": 0.7641928195953369, + "learning_rate": 0.00013625402132786248, + "loss": 2.5459, + "step": 7700 + }, + { + "epoch": 0.6214994754257122, + "grad_norm": 0.6513504981994629, + "learning_rate": 0.00013623930799058363, + "loss": 2.6137, + "step": 7701 + }, + { + "epoch": 0.6215801791622952, + "grad_norm": 0.6745209097862244, + "learning_rate": 0.00013622459375012651, + "loss": 2.5285, + "step": 7702 + }, + { + "epoch": 0.6216608828988782, + "grad_norm": 0.7162348628044128, + "learning_rate": 0.0001362098786068578, + "loss": 2.6224, + "step": 7703 + }, + { + "epoch": 0.6217415866354612, + "grad_norm": 0.7387436032295227, + "learning_rate": 0.00013619516256114427, + "loss": 2.6216, + "step": 7704 + }, + { + "epoch": 0.6218222903720442, + "grad_norm": 0.764955461025238, + "learning_rate": 0.00013618044561335268, + "loss": 2.612, + "step": 7705 + }, + { + "epoch": 0.6219029941086273, + "grad_norm": 0.6492719054222107, + "learning_rate": 0.00013616572776384983, + "loss": 2.5532, + "step": 7706 + }, + { + "epoch": 0.6219836978452102, + "grad_norm": 0.6870293617248535, + "learning_rate": 0.0001361510090130025, + "loss": 2.5705, + "step": 7707 + }, + { + "epoch": 0.6220644015817932, + "grad_norm": 0.6899540424346924, + "learning_rate": 0.0001361362893611775, + "loss": 2.5768, + "step": 7708 + }, + { + "epoch": 0.6221451053183762, + "grad_norm": 0.658941924571991, + "learning_rate": 0.0001361215688087417, + "loss": 2.5664, + "step": 7709 + }, + { + "epoch": 0.6222258090549593, + "grad_norm": 0.6875531673431396, + "learning_rate": 0.000136106847356062, + "loss": 2.6128, + "step": 7710 + }, + { + "epoch": 0.6223065127915423, + "grad_norm": 0.657073974609375, + "learning_rate": 0.0001360921250035053, + "loss": 2.6449, + "step": 7711 + }, + { + "epoch": 0.6223872165281252, + "grad_norm": 0.7051201462745667, + "learning_rate": 0.00013607740175143848, + "loss": 2.5925, + "step": 7712 + }, + { + "epoch": 0.6224679202647082, + "grad_norm": 0.702877938747406, + "learning_rate": 0.0001360626776002285, + "loss": 2.5338, + "step": 7713 + }, + { + "epoch": 0.6225486240012913, + "grad_norm": 0.650935709476471, + "learning_rate": 0.00013604795255024233, + "loss": 2.5799, + "step": 7714 + }, + { + "epoch": 0.6226293277378743, + "grad_norm": 0.7035139203071594, + "learning_rate": 0.00013603322660184694, + "loss": 2.5476, + "step": 7715 + }, + { + "epoch": 0.6227100314744572, + "grad_norm": 0.6549977660179138, + "learning_rate": 0.0001360184997554094, + "loss": 2.6117, + "step": 7716 + }, + { + "epoch": 0.6227907352110402, + "grad_norm": 0.6882792115211487, + "learning_rate": 0.00013600377201129662, + "loss": 2.53, + "step": 7717 + }, + { + "epoch": 0.6228714389476233, + "grad_norm": 0.7390840649604797, + "learning_rate": 0.0001359890433698758, + "loss": 2.6345, + "step": 7718 + }, + { + "epoch": 0.6229521426842063, + "grad_norm": 0.7577612400054932, + "learning_rate": 0.00013597431383151386, + "loss": 2.6386, + "step": 7719 + }, + { + "epoch": 0.6230328464207893, + "grad_norm": 0.6818724870681763, + "learning_rate": 0.00013595958339657804, + "loss": 2.5806, + "step": 7720 + }, + { + "epoch": 0.6231135501573722, + "grad_norm": 0.6954349279403687, + "learning_rate": 0.0001359448520654354, + "loss": 2.5913, + "step": 7721 + }, + { + "epoch": 0.6231942538939553, + "grad_norm": 0.7976544499397278, + "learning_rate": 0.00013593011983845308, + "loss": 2.5686, + "step": 7722 + }, + { + "epoch": 0.6232749576305383, + "grad_norm": 0.7362754940986633, + "learning_rate": 0.00013591538671599824, + "loss": 2.5596, + "step": 7723 + }, + { + "epoch": 0.6233556613671213, + "grad_norm": 0.6842390298843384, + "learning_rate": 0.00013590065269843805, + "loss": 2.5793, + "step": 7724 + }, + { + "epoch": 0.6234363651037043, + "grad_norm": 0.6816275715827942, + "learning_rate": 0.0001358859177861398, + "loss": 2.5948, + "step": 7725 + }, + { + "epoch": 0.6235170688402873, + "grad_norm": 0.6892915964126587, + "learning_rate": 0.00013587118197947066, + "loss": 2.6287, + "step": 7726 + }, + { + "epoch": 0.6235977725768703, + "grad_norm": 0.6851752996444702, + "learning_rate": 0.00013585644527879792, + "loss": 2.5781, + "step": 7727 + }, + { + "epoch": 0.6236784763134533, + "grad_norm": 0.7022164463996887, + "learning_rate": 0.00013584170768448877, + "loss": 2.5856, + "step": 7728 + }, + { + "epoch": 0.6237591800500363, + "grad_norm": 0.6752299070358276, + "learning_rate": 0.0001358269691969106, + "loss": 2.6042, + "step": 7729 + }, + { + "epoch": 0.6238398837866194, + "grad_norm": 0.6861466765403748, + "learning_rate": 0.00013581222981643074, + "loss": 2.5887, + "step": 7730 + }, + { + "epoch": 0.6239205875232023, + "grad_norm": 0.7147940397262573, + "learning_rate": 0.00013579748954341647, + "loss": 2.5796, + "step": 7731 + }, + { + "epoch": 0.6240012912597853, + "grad_norm": 0.6704726219177246, + "learning_rate": 0.0001357827483782352, + "loss": 2.6027, + "step": 7732 + }, + { + "epoch": 0.6240819949963683, + "grad_norm": 0.6984317898750305, + "learning_rate": 0.0001357680063212543, + "loss": 2.635, + "step": 7733 + }, + { + "epoch": 0.6241626987329514, + "grad_norm": 0.6205787658691406, + "learning_rate": 0.00013575326337284115, + "loss": 2.5715, + "step": 7734 + }, + { + "epoch": 0.6242434024695344, + "grad_norm": 0.7214726805686951, + "learning_rate": 0.00013573851953336326, + "loss": 2.5605, + "step": 7735 + }, + { + "epoch": 0.6243241062061173, + "grad_norm": 0.6716169714927673, + "learning_rate": 0.000135723774803188, + "loss": 2.6766, + "step": 7736 + }, + { + "epoch": 0.6244048099427003, + "grad_norm": 0.6446832418441772, + "learning_rate": 0.00013570902918268293, + "loss": 2.5629, + "step": 7737 + }, + { + "epoch": 0.6244855136792834, + "grad_norm": 0.6721374988555908, + "learning_rate": 0.0001356942826722155, + "loss": 2.6093, + "step": 7738 + }, + { + "epoch": 0.6245662174158664, + "grad_norm": 0.7430365681648254, + "learning_rate": 0.0001356795352721532, + "loss": 2.5966, + "step": 7739 + }, + { + "epoch": 0.6246469211524494, + "grad_norm": 0.6787518858909607, + "learning_rate": 0.00013566478698286366, + "loss": 2.5519, + "step": 7740 + }, + { + "epoch": 0.6247276248890323, + "grad_norm": 0.6340047121047974, + "learning_rate": 0.0001356500378047144, + "loss": 2.5181, + "step": 7741 + }, + { + "epoch": 0.6248083286256154, + "grad_norm": 0.7559040188789368, + "learning_rate": 0.000135635287738073, + "loss": 2.6068, + "step": 7742 + }, + { + "epoch": 0.6248890323621984, + "grad_norm": 0.6819902062416077, + "learning_rate": 0.00013562053678330707, + "loss": 2.5754, + "step": 7743 + }, + { + "epoch": 0.6249697360987814, + "grad_norm": 0.6463500261306763, + "learning_rate": 0.00013560578494078423, + "loss": 2.5915, + "step": 7744 + }, + { + "epoch": 0.6250504398353643, + "grad_norm": 0.7510617971420288, + "learning_rate": 0.0001355910322108722, + "loss": 2.5738, + "step": 7745 + }, + { + "epoch": 0.6251311435719474, + "grad_norm": 0.75312739610672, + "learning_rate": 0.00013557627859393855, + "loss": 2.5938, + "step": 7746 + }, + { + "epoch": 0.6252118473085304, + "grad_norm": 0.7784396409988403, + "learning_rate": 0.0001355615240903511, + "loss": 2.6634, + "step": 7747 + }, + { + "epoch": 0.6252925510451134, + "grad_norm": 0.7174746990203857, + "learning_rate": 0.00013554676870047752, + "loss": 2.5973, + "step": 7748 + }, + { + "epoch": 0.6253732547816964, + "grad_norm": 0.6854952573776245, + "learning_rate": 0.0001355320124246855, + "loss": 2.5397, + "step": 7749 + }, + { + "epoch": 0.6254539585182795, + "grad_norm": 0.6584961414337158, + "learning_rate": 0.00013551725526334284, + "loss": 2.5574, + "step": 7750 + }, + { + "epoch": 0.6255346622548624, + "grad_norm": 0.7067389488220215, + "learning_rate": 0.00013550249721681738, + "loss": 2.5524, + "step": 7751 + }, + { + "epoch": 0.6256153659914454, + "grad_norm": 0.6923872232437134, + "learning_rate": 0.00013548773828547686, + "loss": 2.5651, + "step": 7752 + }, + { + "epoch": 0.6256960697280284, + "grad_norm": 0.6612355709075928, + "learning_rate": 0.00013547297846968915, + "loss": 2.6075, + "step": 7753 + }, + { + "epoch": 0.6257767734646114, + "grad_norm": 0.6762828826904297, + "learning_rate": 0.00013545821776982206, + "loss": 2.6136, + "step": 7754 + }, + { + "epoch": 0.6258574772011944, + "grad_norm": 0.6940783858299255, + "learning_rate": 0.0001354434561862435, + "loss": 2.5566, + "step": 7755 + }, + { + "epoch": 0.6259381809377774, + "grad_norm": 0.7874250411987305, + "learning_rate": 0.0001354286937193214, + "loss": 2.6732, + "step": 7756 + }, + { + "epoch": 0.6260188846743604, + "grad_norm": 0.6974111795425415, + "learning_rate": 0.0001354139303694236, + "loss": 2.5455, + "step": 7757 + }, + { + "epoch": 0.6260995884109434, + "grad_norm": 0.6710802316665649, + "learning_rate": 0.0001353991661369181, + "loss": 2.5608, + "step": 7758 + }, + { + "epoch": 0.6261802921475265, + "grad_norm": 0.681635320186615, + "learning_rate": 0.00013538440102217286, + "loss": 2.6107, + "step": 7759 + }, + { + "epoch": 0.6262609958841094, + "grad_norm": 0.7229577898979187, + "learning_rate": 0.0001353696350255558, + "loss": 2.5936, + "step": 7760 + }, + { + "epoch": 0.6263416996206924, + "grad_norm": 0.6909681558609009, + "learning_rate": 0.00013535486814743504, + "loss": 2.5521, + "step": 7761 + }, + { + "epoch": 0.6264224033572754, + "grad_norm": 0.7003746032714844, + "learning_rate": 0.0001353401003881785, + "loss": 2.5606, + "step": 7762 + }, + { + "epoch": 0.6265031070938585, + "grad_norm": 0.6883233785629272, + "learning_rate": 0.0001353253317481543, + "loss": 2.5971, + "step": 7763 + }, + { + "epoch": 0.6265838108304415, + "grad_norm": 0.7382355332374573, + "learning_rate": 0.0001353105622277305, + "loss": 2.5449, + "step": 7764 + }, + { + "epoch": 0.6266645145670244, + "grad_norm": 0.7090556621551514, + "learning_rate": 0.00013529579182727515, + "loss": 2.5988, + "step": 7765 + }, + { + "epoch": 0.6267452183036074, + "grad_norm": 0.6842581629753113, + "learning_rate": 0.00013528102054715643, + "loss": 2.6214, + "step": 7766 + }, + { + "epoch": 0.6268259220401905, + "grad_norm": 0.6969670653343201, + "learning_rate": 0.00013526624838774246, + "loss": 2.5443, + "step": 7767 + }, + { + "epoch": 0.6269066257767735, + "grad_norm": 0.7244827151298523, + "learning_rate": 0.00013525147534940138, + "loss": 2.5967, + "step": 7768 + }, + { + "epoch": 0.6269873295133565, + "grad_norm": 0.7022162675857544, + "learning_rate": 0.0001352367014325014, + "loss": 2.599, + "step": 7769 + }, + { + "epoch": 0.6270680332499394, + "grad_norm": 0.7065250873565674, + "learning_rate": 0.00013522192663741067, + "loss": 2.6105, + "step": 7770 + }, + { + "epoch": 0.6271487369865225, + "grad_norm": 0.6690711975097656, + "learning_rate": 0.0001352071509644975, + "loss": 2.55, + "step": 7771 + }, + { + "epoch": 0.6272294407231055, + "grad_norm": 0.6405982971191406, + "learning_rate": 0.00013519237441413011, + "loss": 2.6078, + "step": 7772 + }, + { + "epoch": 0.6273101444596885, + "grad_norm": 0.7340127229690552, + "learning_rate": 0.00013517759698667672, + "loss": 2.6244, + "step": 7773 + }, + { + "epoch": 0.6273908481962714, + "grad_norm": 0.6609435677528381, + "learning_rate": 0.00013516281868250566, + "loss": 2.5746, + "step": 7774 + }, + { + "epoch": 0.6274715519328545, + "grad_norm": 0.6681997179985046, + "learning_rate": 0.00013514803950198523, + "loss": 2.6181, + "step": 7775 + }, + { + "epoch": 0.6275522556694375, + "grad_norm": 0.7120032906532288, + "learning_rate": 0.0001351332594454838, + "loss": 2.6018, + "step": 7776 + }, + { + "epoch": 0.6276329594060205, + "grad_norm": 0.6618601679801941, + "learning_rate": 0.0001351184785133697, + "loss": 2.5342, + "step": 7777 + }, + { + "epoch": 0.6277136631426035, + "grad_norm": 0.7250192165374756, + "learning_rate": 0.00013510369670601132, + "loss": 2.5795, + "step": 7778 + }, + { + "epoch": 0.6277943668791865, + "grad_norm": 0.7918543219566345, + "learning_rate": 0.00013508891402377708, + "loss": 2.6544, + "step": 7779 + }, + { + "epoch": 0.6278750706157695, + "grad_norm": 0.678895890712738, + "learning_rate": 0.00013507413046703534, + "loss": 2.5937, + "step": 7780 + }, + { + "epoch": 0.6279557743523525, + "grad_norm": 0.7336576581001282, + "learning_rate": 0.00013505934603615457, + "loss": 2.598, + "step": 7781 + }, + { + "epoch": 0.6280364780889355, + "grad_norm": 0.6891419291496277, + "learning_rate": 0.00013504456073150332, + "loss": 2.5063, + "step": 7782 + }, + { + "epoch": 0.6281171818255186, + "grad_norm": 0.7949386835098267, + "learning_rate": 0.00013502977455344997, + "loss": 2.5703, + "step": 7783 + }, + { + "epoch": 0.6281978855621015, + "grad_norm": 0.7917985320091248, + "learning_rate": 0.00013501498750236306, + "loss": 2.639, + "step": 7784 + }, + { + "epoch": 0.6282785892986845, + "grad_norm": 0.7387086749076843, + "learning_rate": 0.00013500019957861113, + "loss": 2.5864, + "step": 7785 + }, + { + "epoch": 0.6283592930352675, + "grad_norm": 0.7189435958862305, + "learning_rate": 0.00013498541078256273, + "loss": 2.5627, + "step": 7786 + }, + { + "epoch": 0.6284399967718506, + "grad_norm": 0.6709900498390198, + "learning_rate": 0.00013497062111458646, + "loss": 2.5973, + "step": 7787 + }, + { + "epoch": 0.6285207005084336, + "grad_norm": 0.6925386190414429, + "learning_rate": 0.0001349558305750509, + "loss": 2.615, + "step": 7788 + }, + { + "epoch": 0.6286014042450165, + "grad_norm": 0.7191932201385498, + "learning_rate": 0.00013494103916432466, + "loss": 2.576, + "step": 7789 + }, + { + "epoch": 0.6286821079815995, + "grad_norm": 0.6798804402351379, + "learning_rate": 0.00013492624688277638, + "loss": 2.5661, + "step": 7790 + }, + { + "epoch": 0.6287628117181826, + "grad_norm": 0.6514562964439392, + "learning_rate": 0.00013491145373077475, + "loss": 2.6135, + "step": 7791 + }, + { + "epoch": 0.6288435154547656, + "grad_norm": 0.7345223426818848, + "learning_rate": 0.00013489665970868838, + "loss": 2.6015, + "step": 7792 + }, + { + "epoch": 0.6289242191913486, + "grad_norm": 0.7102675437927246, + "learning_rate": 0.0001348818648168861, + "loss": 2.5545, + "step": 7793 + }, + { + "epoch": 0.6290049229279315, + "grad_norm": 0.7151654362678528, + "learning_rate": 0.0001348670690557365, + "loss": 2.6464, + "step": 7794 + }, + { + "epoch": 0.6290856266645146, + "grad_norm": 0.7344057559967041, + "learning_rate": 0.00013485227242560844, + "loss": 2.6777, + "step": 7795 + }, + { + "epoch": 0.6291663304010976, + "grad_norm": 0.6622766852378845, + "learning_rate": 0.00013483747492687065, + "loss": 2.5713, + "step": 7796 + }, + { + "epoch": 0.6292470341376806, + "grad_norm": 0.6899346709251404, + "learning_rate": 0.0001348226765598919, + "loss": 2.5188, + "step": 7797 + }, + { + "epoch": 0.6293277378742635, + "grad_norm": 0.6711421012878418, + "learning_rate": 0.000134807877325041, + "loss": 2.5603, + "step": 7798 + }, + { + "epoch": 0.6294084416108466, + "grad_norm": 0.6973204016685486, + "learning_rate": 0.00013479307722268687, + "loss": 2.6621, + "step": 7799 + }, + { + "epoch": 0.6294891453474296, + "grad_norm": 0.7782350778579712, + "learning_rate": 0.00013477827625319824, + "loss": 2.5929, + "step": 7800 + }, + { + "epoch": 0.6295698490840126, + "grad_norm": 0.8703733682632446, + "learning_rate": 0.0001347634744169441, + "loss": 2.6884, + "step": 7801 + }, + { + "epoch": 0.6296505528205956, + "grad_norm": 0.7196036577224731, + "learning_rate": 0.00013474867171429326, + "loss": 2.6002, + "step": 7802 + }, + { + "epoch": 0.6297312565571785, + "grad_norm": 0.7224054932594299, + "learning_rate": 0.00013473386814561475, + "loss": 2.6007, + "step": 7803 + }, + { + "epoch": 0.6298119602937616, + "grad_norm": 0.7615752816200256, + "learning_rate": 0.00013471906371127743, + "loss": 2.6459, + "step": 7804 + }, + { + "epoch": 0.6298926640303446, + "grad_norm": 0.7189914584159851, + "learning_rate": 0.00013470425841165024, + "loss": 2.5692, + "step": 7805 + }, + { + "epoch": 0.6299733677669276, + "grad_norm": 0.7101845741271973, + "learning_rate": 0.00013468945224710225, + "loss": 2.5776, + "step": 7806 + }, + { + "epoch": 0.6300540715035106, + "grad_norm": 0.6860305666923523, + "learning_rate": 0.00013467464521800244, + "loss": 2.5567, + "step": 7807 + }, + { + "epoch": 0.6301347752400936, + "grad_norm": 0.7003797292709351, + "learning_rate": 0.0001346598373247198, + "loss": 2.6444, + "step": 7808 + }, + { + "epoch": 0.6302154789766766, + "grad_norm": 0.6341832876205444, + "learning_rate": 0.00013464502856762344, + "loss": 2.5475, + "step": 7809 + }, + { + "epoch": 0.6302961827132596, + "grad_norm": 0.6255922317504883, + "learning_rate": 0.00013463021894708242, + "loss": 2.5875, + "step": 7810 + }, + { + "epoch": 0.6303768864498426, + "grad_norm": 0.7136420607566833, + "learning_rate": 0.00013461540846346575, + "loss": 2.5708, + "step": 7811 + }, + { + "epoch": 0.6304575901864257, + "grad_norm": 0.7164542078971863, + "learning_rate": 0.00013460059711714267, + "loss": 2.4975, + "step": 7812 + }, + { + "epoch": 0.6305382939230086, + "grad_norm": 0.7667872905731201, + "learning_rate": 0.00013458578490848226, + "loss": 2.6124, + "step": 7813 + }, + { + "epoch": 0.6306189976595916, + "grad_norm": 0.6631812453269958, + "learning_rate": 0.0001345709718378537, + "loss": 2.5318, + "step": 7814 + }, + { + "epoch": 0.6306997013961746, + "grad_norm": 0.696864664554596, + "learning_rate": 0.0001345561579056261, + "loss": 2.6171, + "step": 7815 + }, + { + "epoch": 0.6307804051327577, + "grad_norm": 0.7368598580360413, + "learning_rate": 0.00013454134311216873, + "loss": 2.5734, + "step": 7816 + }, + { + "epoch": 0.6308611088693407, + "grad_norm": 0.7279712557792664, + "learning_rate": 0.00013452652745785083, + "loss": 2.6231, + "step": 7817 + }, + { + "epoch": 0.6309418126059236, + "grad_norm": 0.8070993423461914, + "learning_rate": 0.00013451171094304158, + "loss": 2.5486, + "step": 7818 + }, + { + "epoch": 0.6310225163425066, + "grad_norm": 0.7522621750831604, + "learning_rate": 0.0001344968935681103, + "loss": 2.5576, + "step": 7819 + }, + { + "epoch": 0.6311032200790897, + "grad_norm": 0.8185423612594604, + "learning_rate": 0.00013448207533342624, + "loss": 2.6068, + "step": 7820 + }, + { + "epoch": 0.6311839238156727, + "grad_norm": 0.7542584538459778, + "learning_rate": 0.0001344672562393587, + "loss": 2.643, + "step": 7821 + }, + { + "epoch": 0.6312646275522557, + "grad_norm": 0.7892276644706726, + "learning_rate": 0.00013445243628627712, + "loss": 2.6211, + "step": 7822 + }, + { + "epoch": 0.6313453312888386, + "grad_norm": 0.7216602563858032, + "learning_rate": 0.00013443761547455072, + "loss": 2.5725, + "step": 7823 + }, + { + "epoch": 0.6314260350254217, + "grad_norm": 0.6750743985176086, + "learning_rate": 0.0001344227938045489, + "loss": 2.5319, + "step": 7824 + }, + { + "epoch": 0.6315067387620047, + "grad_norm": 0.6711540222167969, + "learning_rate": 0.0001344079712766411, + "loss": 2.5957, + "step": 7825 + }, + { + "epoch": 0.6315874424985877, + "grad_norm": 0.6923524737358093, + "learning_rate": 0.00013439314789119667, + "loss": 2.6084, + "step": 7826 + }, + { + "epoch": 0.6316681462351706, + "grad_norm": 0.6859166026115417, + "learning_rate": 0.00013437832364858517, + "loss": 2.5608, + "step": 7827 + }, + { + "epoch": 0.6317488499717537, + "grad_norm": 0.7340966463088989, + "learning_rate": 0.0001343634985491759, + "loss": 2.531, + "step": 7828 + }, + { + "epoch": 0.6318295537083367, + "grad_norm": 0.7374520301818848, + "learning_rate": 0.00013434867259333848, + "loss": 2.5972, + "step": 7829 + }, + { + "epoch": 0.6319102574449197, + "grad_norm": 0.7252814769744873, + "learning_rate": 0.00013433384578144232, + "loss": 2.5874, + "step": 7830 + }, + { + "epoch": 0.6319909611815027, + "grad_norm": 0.7000489830970764, + "learning_rate": 0.000134319018113857, + "loss": 2.6137, + "step": 7831 + }, + { + "epoch": 0.6320716649180858, + "grad_norm": 0.805981457233429, + "learning_rate": 0.00013430418959095198, + "loss": 2.5581, + "step": 7832 + }, + { + "epoch": 0.6321523686546687, + "grad_norm": 0.7459721565246582, + "learning_rate": 0.00013428936021309693, + "loss": 2.5284, + "step": 7833 + }, + { + "epoch": 0.6322330723912517, + "grad_norm": 0.749794065952301, + "learning_rate": 0.00013427452998066136, + "loss": 2.5927, + "step": 7834 + }, + { + "epoch": 0.6323137761278347, + "grad_norm": 0.6925346255302429, + "learning_rate": 0.00013425969889401494, + "loss": 2.5703, + "step": 7835 + }, + { + "epoch": 0.6323944798644178, + "grad_norm": 0.6647117137908936, + "learning_rate": 0.00013424486695352728, + "loss": 2.5649, + "step": 7836 + }, + { + "epoch": 0.6324751836010007, + "grad_norm": 0.7358147501945496, + "learning_rate": 0.00013423003415956796, + "loss": 2.6122, + "step": 7837 + }, + { + "epoch": 0.6325558873375837, + "grad_norm": 0.7798088788986206, + "learning_rate": 0.00013421520051250675, + "loss": 2.5805, + "step": 7838 + }, + { + "epoch": 0.6326365910741667, + "grad_norm": 0.7108271718025208, + "learning_rate": 0.00013420036601271334, + "loss": 2.5457, + "step": 7839 + }, + { + "epoch": 0.6327172948107498, + "grad_norm": 0.7108528017997742, + "learning_rate": 0.00013418553066055734, + "loss": 2.6313, + "step": 7840 + }, + { + "epoch": 0.6327979985473328, + "grad_norm": 0.7325249910354614, + "learning_rate": 0.00013417069445640858, + "loss": 2.5598, + "step": 7841 + }, + { + "epoch": 0.6328787022839157, + "grad_norm": 0.6861844062805176, + "learning_rate": 0.0001341558574006368, + "loss": 2.5899, + "step": 7842 + }, + { + "epoch": 0.6329594060204987, + "grad_norm": 0.7576130628585815, + "learning_rate": 0.00013414101949361175, + "loss": 2.6077, + "step": 7843 + }, + { + "epoch": 0.6330401097570818, + "grad_norm": 0.7756128907203674, + "learning_rate": 0.0001341261807357033, + "loss": 2.6111, + "step": 7844 + }, + { + "epoch": 0.6331208134936648, + "grad_norm": 0.7131127715110779, + "learning_rate": 0.00013411134112728114, + "loss": 2.5227, + "step": 7845 + }, + { + "epoch": 0.6332015172302478, + "grad_norm": 0.6517898440361023, + "learning_rate": 0.00013409650066871525, + "loss": 2.5825, + "step": 7846 + }, + { + "epoch": 0.6332822209668307, + "grad_norm": 0.8452722430229187, + "learning_rate": 0.0001340816593603754, + "loss": 2.6037, + "step": 7847 + }, + { + "epoch": 0.6333629247034138, + "grad_norm": 0.7421110272407532, + "learning_rate": 0.00013406681720263153, + "loss": 2.5684, + "step": 7848 + }, + { + "epoch": 0.6334436284399968, + "grad_norm": 0.695139467716217, + "learning_rate": 0.0001340519741958535, + "loss": 2.5648, + "step": 7849 + }, + { + "epoch": 0.6335243321765798, + "grad_norm": 0.7780016660690308, + "learning_rate": 0.0001340371303404113, + "loss": 2.6849, + "step": 7850 + }, + { + "epoch": 0.6336050359131628, + "grad_norm": 0.7276864051818848, + "learning_rate": 0.00013402228563667482, + "loss": 2.6198, + "step": 7851 + }, + { + "epoch": 0.6336857396497458, + "grad_norm": 0.7566827535629272, + "learning_rate": 0.00013400744008501404, + "loss": 2.5803, + "step": 7852 + }, + { + "epoch": 0.6337664433863288, + "grad_norm": 0.7933458089828491, + "learning_rate": 0.00013399259368579894, + "loss": 2.6029, + "step": 7853 + }, + { + "epoch": 0.6338471471229118, + "grad_norm": 0.6849822402000427, + "learning_rate": 0.00013397774643939957, + "loss": 2.5454, + "step": 7854 + }, + { + "epoch": 0.6339278508594948, + "grad_norm": 0.7054651379585266, + "learning_rate": 0.00013396289834618594, + "loss": 2.5905, + "step": 7855 + }, + { + "epoch": 0.6340085545960777, + "grad_norm": 0.7036863565444946, + "learning_rate": 0.00013394804940652813, + "loss": 2.6342, + "step": 7856 + }, + { + "epoch": 0.6340892583326608, + "grad_norm": 0.7101735472679138, + "learning_rate": 0.00013393319962079614, + "loss": 2.6402, + "step": 7857 + }, + { + "epoch": 0.6341699620692438, + "grad_norm": 0.7053956389427185, + "learning_rate": 0.0001339183489893601, + "loss": 2.5841, + "step": 7858 + }, + { + "epoch": 0.6342506658058268, + "grad_norm": 0.7734887003898621, + "learning_rate": 0.0001339034975125902, + "loss": 2.652, + "step": 7859 + }, + { + "epoch": 0.6343313695424098, + "grad_norm": 0.6714119911193848, + "learning_rate": 0.0001338886451908565, + "loss": 2.5927, + "step": 7860 + }, + { + "epoch": 0.6344120732789928, + "grad_norm": 0.6580910682678223, + "learning_rate": 0.00013387379202452917, + "loss": 2.6114, + "step": 7861 + }, + { + "epoch": 0.6344927770155758, + "grad_norm": 0.6810200214385986, + "learning_rate": 0.00013385893801397836, + "loss": 2.5616, + "step": 7862 + }, + { + "epoch": 0.6345734807521588, + "grad_norm": 0.6989572048187256, + "learning_rate": 0.00013384408315957432, + "loss": 2.5954, + "step": 7863 + }, + { + "epoch": 0.6346541844887418, + "grad_norm": 0.7033671736717224, + "learning_rate": 0.00013382922746168728, + "loss": 2.6015, + "step": 7864 + }, + { + "epoch": 0.6347348882253249, + "grad_norm": 0.6873033046722412, + "learning_rate": 0.0001338143709206875, + "loss": 2.562, + "step": 7865 + }, + { + "epoch": 0.6348155919619078, + "grad_norm": 0.7361463904380798, + "learning_rate": 0.00013379951353694513, + "loss": 2.6175, + "step": 7866 + }, + { + "epoch": 0.6348962956984908, + "grad_norm": 0.7623226046562195, + "learning_rate": 0.00013378465531083055, + "loss": 2.7342, + "step": 7867 + }, + { + "epoch": 0.6349769994350738, + "grad_norm": 0.7427035570144653, + "learning_rate": 0.0001337697962427141, + "loss": 2.5468, + "step": 7868 + }, + { + "epoch": 0.6350577031716569, + "grad_norm": 0.6865772008895874, + "learning_rate": 0.00013375493633296598, + "loss": 2.6112, + "step": 7869 + }, + { + "epoch": 0.6351384069082399, + "grad_norm": 0.663567304611206, + "learning_rate": 0.00013374007558195666, + "loss": 2.5896, + "step": 7870 + }, + { + "epoch": 0.6352191106448228, + "grad_norm": 0.6804360151290894, + "learning_rate": 0.00013372521399005643, + "loss": 2.58, + "step": 7871 + }, + { + "epoch": 0.6352998143814058, + "grad_norm": 0.6755216121673584, + "learning_rate": 0.0001337103515576357, + "loss": 2.5593, + "step": 7872 + }, + { + "epoch": 0.6353805181179889, + "grad_norm": 0.8148807883262634, + "learning_rate": 0.00013369548828506491, + "loss": 2.6473, + "step": 7873 + }, + { + "epoch": 0.6354612218545719, + "grad_norm": 0.713009774684906, + "learning_rate": 0.00013368062417271447, + "loss": 2.6002, + "step": 7874 + }, + { + "epoch": 0.6355419255911549, + "grad_norm": 0.6390172839164734, + "learning_rate": 0.00013366575922095484, + "loss": 2.5794, + "step": 7875 + }, + { + "epoch": 0.6356226293277378, + "grad_norm": 0.7228195667266846, + "learning_rate": 0.00013365089343015649, + "loss": 2.6051, + "step": 7876 + }, + { + "epoch": 0.6357033330643209, + "grad_norm": 0.7563474178314209, + "learning_rate": 0.00013363602680068986, + "loss": 2.6308, + "step": 7877 + }, + { + "epoch": 0.6357840368009039, + "grad_norm": 0.7366798520088196, + "learning_rate": 0.00013362115933292557, + "loss": 2.5589, + "step": 7878 + }, + { + "epoch": 0.6358647405374869, + "grad_norm": 0.7137070894241333, + "learning_rate": 0.00013360629102723409, + "loss": 2.6428, + "step": 7879 + }, + { + "epoch": 0.6359454442740698, + "grad_norm": 0.6799132823944092, + "learning_rate": 0.000133591421883986, + "loss": 2.5549, + "step": 7880 + }, + { + "epoch": 0.6360261480106529, + "grad_norm": 0.7031344771385193, + "learning_rate": 0.00013357655190355188, + "loss": 2.6298, + "step": 7881 + }, + { + "epoch": 0.6361068517472359, + "grad_norm": 0.7441670298576355, + "learning_rate": 0.00013356168108630227, + "loss": 2.5844, + "step": 7882 + }, + { + "epoch": 0.6361875554838189, + "grad_norm": 0.7281978726387024, + "learning_rate": 0.00013354680943260784, + "loss": 2.5773, + "step": 7883 + }, + { + "epoch": 0.6362682592204019, + "grad_norm": 0.6969650983810425, + "learning_rate": 0.00013353193694283928, + "loss": 2.6156, + "step": 7884 + }, + { + "epoch": 0.636348962956985, + "grad_norm": 0.6668435335159302, + "learning_rate": 0.00013351706361736714, + "loss": 2.6328, + "step": 7885 + }, + { + "epoch": 0.6364296666935679, + "grad_norm": 0.6909573078155518, + "learning_rate": 0.0001335021894565622, + "loss": 2.5772, + "step": 7886 + }, + { + "epoch": 0.6365103704301509, + "grad_norm": 0.6740022897720337, + "learning_rate": 0.0001334873144607951, + "loss": 2.6435, + "step": 7887 + }, + { + "epoch": 0.6365910741667339, + "grad_norm": 0.7203185558319092, + "learning_rate": 0.0001334724386304366, + "loss": 2.5401, + "step": 7888 + }, + { + "epoch": 0.636671777903317, + "grad_norm": 0.7343020439147949, + "learning_rate": 0.0001334575619658574, + "loss": 2.5811, + "step": 7889 + }, + { + "epoch": 0.6367524816399, + "grad_norm": 0.6941348314285278, + "learning_rate": 0.00013344268446742835, + "loss": 2.6267, + "step": 7890 + }, + { + "epoch": 0.6368331853764829, + "grad_norm": 0.6983792185783386, + "learning_rate": 0.00013342780613552016, + "loss": 2.533, + "step": 7891 + }, + { + "epoch": 0.6369138891130659, + "grad_norm": 0.7093533277511597, + "learning_rate": 0.00013341292697050365, + "loss": 2.6616, + "step": 7892 + }, + { + "epoch": 0.636994592849649, + "grad_norm": 0.7377648949623108, + "learning_rate": 0.00013339804697274965, + "loss": 2.6032, + "step": 7893 + }, + { + "epoch": 0.637075296586232, + "grad_norm": 0.6669821739196777, + "learning_rate": 0.00013338316614262903, + "loss": 2.6082, + "step": 7894 + }, + { + "epoch": 0.6371560003228149, + "grad_norm": 0.6665576100349426, + "learning_rate": 0.00013336828448051263, + "loss": 2.6114, + "step": 7895 + }, + { + "epoch": 0.6372367040593979, + "grad_norm": 0.6893584132194519, + "learning_rate": 0.0001333534019867714, + "loss": 2.5886, + "step": 7896 + }, + { + "epoch": 0.637317407795981, + "grad_norm": 0.7651494741439819, + "learning_rate": 0.00013333851866177617, + "loss": 2.5622, + "step": 7897 + }, + { + "epoch": 0.637398111532564, + "grad_norm": 0.8124055862426758, + "learning_rate": 0.00013332363450589788, + "loss": 2.6036, + "step": 7898 + }, + { + "epoch": 0.637478815269147, + "grad_norm": 0.7394436597824097, + "learning_rate": 0.00013330874951950755, + "loss": 2.6214, + "step": 7899 + }, + { + "epoch": 0.6375595190057299, + "grad_norm": 0.6279659867286682, + "learning_rate": 0.00013329386370297615, + "loss": 2.5652, + "step": 7900 + }, + { + "epoch": 0.637640222742313, + "grad_norm": 0.7289649248123169, + "learning_rate": 0.00013327897705667455, + "loss": 2.5628, + "step": 7901 + }, + { + "epoch": 0.637720926478896, + "grad_norm": 0.7267701625823975, + "learning_rate": 0.0001332640895809739, + "loss": 2.5475, + "step": 7902 + }, + { + "epoch": 0.637801630215479, + "grad_norm": 0.7470490336418152, + "learning_rate": 0.00013324920127624515, + "loss": 2.5054, + "step": 7903 + }, + { + "epoch": 0.637882333952062, + "grad_norm": 0.6963294148445129, + "learning_rate": 0.00013323431214285944, + "loss": 2.5992, + "step": 7904 + }, + { + "epoch": 0.6379630376886449, + "grad_norm": 0.6993808746337891, + "learning_rate": 0.00013321942218118778, + "loss": 2.6044, + "step": 7905 + }, + { + "epoch": 0.638043741425228, + "grad_norm": 0.6620917916297913, + "learning_rate": 0.00013320453139160126, + "loss": 2.5278, + "step": 7906 + }, + { + "epoch": 0.638124445161811, + "grad_norm": 0.6535444855690002, + "learning_rate": 0.00013318963977447106, + "loss": 2.6069, + "step": 7907 + }, + { + "epoch": 0.638205148898394, + "grad_norm": 0.6913008689880371, + "learning_rate": 0.00013317474733016824, + "loss": 2.5271, + "step": 7908 + }, + { + "epoch": 0.638285852634977, + "grad_norm": 0.6760269403457642, + "learning_rate": 0.000133159854059064, + "loss": 2.7029, + "step": 7909 + }, + { + "epoch": 0.63836655637156, + "grad_norm": 0.7026536464691162, + "learning_rate": 0.0001331449599615295, + "loss": 2.592, + "step": 7910 + }, + { + "epoch": 0.638447260108143, + "grad_norm": 0.7935923933982849, + "learning_rate": 0.000133130065037936, + "loss": 2.5674, + "step": 7911 + }, + { + "epoch": 0.638527963844726, + "grad_norm": 0.694675087928772, + "learning_rate": 0.00013311516928865466, + "loss": 2.6727, + "step": 7912 + }, + { + "epoch": 0.638608667581309, + "grad_norm": 0.7378186583518982, + "learning_rate": 0.00013310027271405672, + "loss": 2.5691, + "step": 7913 + }, + { + "epoch": 0.638689371317892, + "grad_norm": 0.7684193849563599, + "learning_rate": 0.00013308537531451345, + "loss": 2.5796, + "step": 7914 + }, + { + "epoch": 0.638770075054475, + "grad_norm": 0.6881510019302368, + "learning_rate": 0.00013307047709039619, + "loss": 2.6, + "step": 7915 + }, + { + "epoch": 0.638850778791058, + "grad_norm": 0.7341364026069641, + "learning_rate": 0.00013305557804207618, + "loss": 2.622, + "step": 7916 + }, + { + "epoch": 0.638931482527641, + "grad_norm": 0.7620663642883301, + "learning_rate": 0.00013304067816992474, + "loss": 2.5571, + "step": 7917 + }, + { + "epoch": 0.6390121862642241, + "grad_norm": 0.6929789781570435, + "learning_rate": 0.00013302577747431322, + "loss": 2.6204, + "step": 7918 + }, + { + "epoch": 0.639092890000807, + "grad_norm": 0.6942943334579468, + "learning_rate": 0.000133010875955613, + "loss": 2.6737, + "step": 7919 + }, + { + "epoch": 0.63917359373739, + "grad_norm": 0.69537752866745, + "learning_rate": 0.0001329959736141955, + "loss": 2.6105, + "step": 7920 + }, + { + "epoch": 0.639254297473973, + "grad_norm": 0.6690821051597595, + "learning_rate": 0.00013298107045043203, + "loss": 2.6279, + "step": 7921 + }, + { + "epoch": 0.6393350012105561, + "grad_norm": 0.7748103141784668, + "learning_rate": 0.00013296616646469412, + "loss": 2.6307, + "step": 7922 + }, + { + "epoch": 0.6394157049471391, + "grad_norm": 0.7509558200836182, + "learning_rate": 0.00013295126165735311, + "loss": 2.6388, + "step": 7923 + }, + { + "epoch": 0.639496408683722, + "grad_norm": 0.7641764283180237, + "learning_rate": 0.0001329363560287806, + "loss": 2.5819, + "step": 7924 + }, + { + "epoch": 0.639577112420305, + "grad_norm": 0.6912327408790588, + "learning_rate": 0.00013292144957934794, + "loss": 2.5588, + "step": 7925 + }, + { + "epoch": 0.6396578161568881, + "grad_norm": 0.7568803429603577, + "learning_rate": 0.0001329065423094267, + "loss": 2.5627, + "step": 7926 + }, + { + "epoch": 0.6397385198934711, + "grad_norm": 0.7272306084632874, + "learning_rate": 0.00013289163421938843, + "loss": 2.6101, + "step": 7927 + }, + { + "epoch": 0.6398192236300541, + "grad_norm": 0.6965963840484619, + "learning_rate": 0.00013287672530960465, + "loss": 2.5967, + "step": 7928 + }, + { + "epoch": 0.639899927366637, + "grad_norm": 0.7729843854904175, + "learning_rate": 0.00013286181558044694, + "loss": 2.6222, + "step": 7929 + }, + { + "epoch": 0.6399806311032201, + "grad_norm": 0.6876606941223145, + "learning_rate": 0.00013284690503228687, + "loss": 2.6162, + "step": 7930 + }, + { + "epoch": 0.6400613348398031, + "grad_norm": 0.7555204629898071, + "learning_rate": 0.0001328319936654961, + "loss": 2.588, + "step": 7931 + }, + { + "epoch": 0.6401420385763861, + "grad_norm": 0.7324720621109009, + "learning_rate": 0.0001328170814804462, + "loss": 2.6111, + "step": 7932 + }, + { + "epoch": 0.640222742312969, + "grad_norm": 0.6802392601966858, + "learning_rate": 0.0001328021684775088, + "loss": 2.5955, + "step": 7933 + }, + { + "epoch": 0.6403034460495521, + "grad_norm": 0.7564330697059631, + "learning_rate": 0.00013278725465705568, + "loss": 2.5355, + "step": 7934 + }, + { + "epoch": 0.6403841497861351, + "grad_norm": 0.6916235089302063, + "learning_rate": 0.00013277234001945844, + "loss": 2.6037, + "step": 7935 + }, + { + "epoch": 0.6404648535227181, + "grad_norm": 0.688819169998169, + "learning_rate": 0.00013275742456508885, + "loss": 2.5626, + "step": 7936 + }, + { + "epoch": 0.6405455572593011, + "grad_norm": 0.6647922992706299, + "learning_rate": 0.0001327425082943186, + "loss": 2.6166, + "step": 7937 + }, + { + "epoch": 0.6406262609958842, + "grad_norm": 0.6792626976966858, + "learning_rate": 0.00013272759120751943, + "loss": 2.6206, + "step": 7938 + }, + { + "epoch": 0.6407069647324671, + "grad_norm": 0.6482827663421631, + "learning_rate": 0.00013271267330506312, + "loss": 2.5558, + "step": 7939 + }, + { + "epoch": 0.6407876684690501, + "grad_norm": 0.6628372073173523, + "learning_rate": 0.0001326977545873215, + "loss": 2.5904, + "step": 7940 + }, + { + "epoch": 0.6408683722056331, + "grad_norm": 0.7168916463851929, + "learning_rate": 0.00013268283505466635, + "loss": 2.5189, + "step": 7941 + }, + { + "epoch": 0.6409490759422162, + "grad_norm": 0.6691678762435913, + "learning_rate": 0.00013266791470746957, + "loss": 2.608, + "step": 7942 + }, + { + "epoch": 0.6410297796787991, + "grad_norm": 0.6850359439849854, + "learning_rate": 0.00013265299354610292, + "loss": 2.5929, + "step": 7943 + }, + { + "epoch": 0.6411104834153821, + "grad_norm": 0.6807669401168823, + "learning_rate": 0.0001326380715709383, + "loss": 2.6016, + "step": 7944 + }, + { + "epoch": 0.6411911871519651, + "grad_norm": 0.6450446844100952, + "learning_rate": 0.00013262314878234767, + "loss": 2.6129, + "step": 7945 + }, + { + "epoch": 0.6412718908885482, + "grad_norm": 0.679115355014801, + "learning_rate": 0.00013260822518070285, + "loss": 2.6049, + "step": 7946 + }, + { + "epoch": 0.6413525946251312, + "grad_norm": 0.7082008123397827, + "learning_rate": 0.00013259330076637583, + "loss": 2.5673, + "step": 7947 + }, + { + "epoch": 0.6414332983617141, + "grad_norm": 0.7357851266860962, + "learning_rate": 0.00013257837553973855, + "loss": 2.6118, + "step": 7948 + }, + { + "epoch": 0.6415140020982971, + "grad_norm": 0.687035083770752, + "learning_rate": 0.000132563449501163, + "loss": 2.5359, + "step": 7949 + }, + { + "epoch": 0.6415947058348802, + "grad_norm": 0.6950698494911194, + "learning_rate": 0.00013254852265102117, + "loss": 2.5527, + "step": 7950 + }, + { + "epoch": 0.6416754095714632, + "grad_norm": 0.6878959536552429, + "learning_rate": 0.00013253359498968507, + "loss": 2.611, + "step": 7951 + }, + { + "epoch": 0.6417561133080462, + "grad_norm": 0.7224605083465576, + "learning_rate": 0.00013251866651752675, + "loss": 2.5459, + "step": 7952 + }, + { + "epoch": 0.6418368170446291, + "grad_norm": 0.7299731969833374, + "learning_rate": 0.00013250373723491826, + "loss": 2.5651, + "step": 7953 + }, + { + "epoch": 0.6419175207812122, + "grad_norm": 0.7663037776947021, + "learning_rate": 0.00013248880714223163, + "loss": 2.6073, + "step": 7954 + }, + { + "epoch": 0.6419982245177952, + "grad_norm": 0.6532007455825806, + "learning_rate": 0.00013247387623983902, + "loss": 2.6087, + "step": 7955 + }, + { + "epoch": 0.6420789282543782, + "grad_norm": 0.7520449757575989, + "learning_rate": 0.00013245894452811255, + "loss": 2.5998, + "step": 7956 + }, + { + "epoch": 0.6421596319909612, + "grad_norm": 0.7196050882339478, + "learning_rate": 0.0001324440120074243, + "loss": 2.6448, + "step": 7957 + }, + { + "epoch": 0.6422403357275441, + "grad_norm": 0.7093806862831116, + "learning_rate": 0.0001324290786781465, + "loss": 2.5935, + "step": 7958 + }, + { + "epoch": 0.6423210394641272, + "grad_norm": 0.695541501045227, + "learning_rate": 0.00013241414454065125, + "loss": 2.5872, + "step": 7959 + }, + { + "epoch": 0.6424017432007102, + "grad_norm": 0.6763006448745728, + "learning_rate": 0.0001323992095953108, + "loss": 2.572, + "step": 7960 + }, + { + "epoch": 0.6424824469372932, + "grad_norm": 0.6403522491455078, + "learning_rate": 0.00013238427384249738, + "loss": 2.6137, + "step": 7961 + }, + { + "epoch": 0.6425631506738761, + "grad_norm": 0.6647571325302124, + "learning_rate": 0.00013236933728258315, + "loss": 2.5904, + "step": 7962 + }, + { + "epoch": 0.6426438544104592, + "grad_norm": 0.6931071877479553, + "learning_rate": 0.0001323543999159405, + "loss": 2.6085, + "step": 7963 + }, + { + "epoch": 0.6427245581470422, + "grad_norm": 0.6899439096450806, + "learning_rate": 0.00013233946174294155, + "loss": 2.5555, + "step": 7964 + }, + { + "epoch": 0.6428052618836252, + "grad_norm": 0.6564984321594238, + "learning_rate": 0.0001323245227639587, + "loss": 2.576, + "step": 7965 + }, + { + "epoch": 0.6428859656202082, + "grad_norm": 0.7427607774734497, + "learning_rate": 0.00013230958297936427, + "loss": 2.6178, + "step": 7966 + }, + { + "epoch": 0.6429666693567913, + "grad_norm": 0.6884508728981018, + "learning_rate": 0.00013229464238953054, + "loss": 2.6519, + "step": 7967 + }, + { + "epoch": 0.6430473730933742, + "grad_norm": 0.692442774772644, + "learning_rate": 0.00013227970099482993, + "loss": 2.5784, + "step": 7968 + }, + { + "epoch": 0.6431280768299572, + "grad_norm": 0.6637876629829407, + "learning_rate": 0.00013226475879563477, + "loss": 2.5785, + "step": 7969 + }, + { + "epoch": 0.6432087805665402, + "grad_norm": 0.6844972372055054, + "learning_rate": 0.0001322498157923175, + "loss": 2.5745, + "step": 7970 + }, + { + "epoch": 0.6432894843031233, + "grad_norm": 0.7259756922721863, + "learning_rate": 0.0001322348719852505, + "loss": 2.5696, + "step": 7971 + }, + { + "epoch": 0.6433701880397062, + "grad_norm": 0.6719023585319519, + "learning_rate": 0.00013221992737480625, + "loss": 2.6049, + "step": 7972 + }, + { + "epoch": 0.6434508917762892, + "grad_norm": 0.7160155773162842, + "learning_rate": 0.00013220498196135717, + "loss": 2.572, + "step": 7973 + }, + { + "epoch": 0.6435315955128722, + "grad_norm": 0.6920225620269775, + "learning_rate": 0.00013219003574527576, + "loss": 2.6576, + "step": 7974 + }, + { + "epoch": 0.6436122992494553, + "grad_norm": 0.698518693447113, + "learning_rate": 0.0001321750887269345, + "loss": 2.6074, + "step": 7975 + }, + { + "epoch": 0.6436930029860383, + "grad_norm": 0.7607932090759277, + "learning_rate": 0.00013216014090670594, + "loss": 2.6173, + "step": 7976 + }, + { + "epoch": 0.6437737067226212, + "grad_norm": 0.8130847811698914, + "learning_rate": 0.0001321451922849626, + "loss": 2.6023, + "step": 7977 + }, + { + "epoch": 0.6438544104592042, + "grad_norm": 0.676675021648407, + "learning_rate": 0.00013213024286207702, + "loss": 2.6174, + "step": 7978 + }, + { + "epoch": 0.6439351141957873, + "grad_norm": 0.7018851041793823, + "learning_rate": 0.00013211529263842183, + "loss": 2.5713, + "step": 7979 + }, + { + "epoch": 0.6440158179323703, + "grad_norm": 0.796097457408905, + "learning_rate": 0.00013210034161436954, + "loss": 2.5937, + "step": 7980 + }, + { + "epoch": 0.6440965216689533, + "grad_norm": 0.7118527293205261, + "learning_rate": 0.0001320853897902929, + "loss": 2.5721, + "step": 7981 + }, + { + "epoch": 0.6441772254055362, + "grad_norm": 0.7282249331474304, + "learning_rate": 0.00013207043716656445, + "loss": 2.5975, + "step": 7982 + }, + { + "epoch": 0.6442579291421193, + "grad_norm": 0.6710900664329529, + "learning_rate": 0.00013205548374355686, + "loss": 2.5809, + "step": 7983 + }, + { + "epoch": 0.6443386328787023, + "grad_norm": 0.7045658230781555, + "learning_rate": 0.00013204052952164278, + "loss": 2.5715, + "step": 7984 + }, + { + "epoch": 0.6444193366152853, + "grad_norm": 0.719507098197937, + "learning_rate": 0.00013202557450119504, + "loss": 2.5948, + "step": 7985 + }, + { + "epoch": 0.6445000403518683, + "grad_norm": 0.7603922486305237, + "learning_rate": 0.0001320106186825862, + "loss": 2.6176, + "step": 7986 + }, + { + "epoch": 0.6445807440884513, + "grad_norm": 0.7057444453239441, + "learning_rate": 0.0001319956620661891, + "loss": 2.5905, + "step": 7987 + }, + { + "epoch": 0.6446614478250343, + "grad_norm": 0.7884874939918518, + "learning_rate": 0.00013198070465237645, + "loss": 2.5892, + "step": 7988 + }, + { + "epoch": 0.6447421515616173, + "grad_norm": 0.6932834386825562, + "learning_rate": 0.00013196574644152103, + "loss": 2.6032, + "step": 7989 + }, + { + "epoch": 0.6448228552982003, + "grad_norm": 0.7361180186271667, + "learning_rate": 0.00013195078743399568, + "loss": 2.5877, + "step": 7990 + }, + { + "epoch": 0.6449035590347834, + "grad_norm": 0.6843615174293518, + "learning_rate": 0.00013193582763017315, + "loss": 2.5804, + "step": 7991 + }, + { + "epoch": 0.6449842627713663, + "grad_norm": 0.7592078447341919, + "learning_rate": 0.00013192086703042635, + "loss": 2.6464, + "step": 7992 + }, + { + "epoch": 0.6450649665079493, + "grad_norm": 0.7362154126167297, + "learning_rate": 0.0001319059056351281, + "loss": 2.6154, + "step": 7993 + }, + { + "epoch": 0.6451456702445323, + "grad_norm": 0.6721758246421814, + "learning_rate": 0.00013189094344465125, + "loss": 2.5735, + "step": 7994 + }, + { + "epoch": 0.6452263739811154, + "grad_norm": 0.6221550107002258, + "learning_rate": 0.00013187598045936874, + "loss": 2.5612, + "step": 7995 + }, + { + "epoch": 0.6453070777176984, + "grad_norm": 0.7225528359413147, + "learning_rate": 0.00013186101667965344, + "loss": 2.6263, + "step": 7996 + }, + { + "epoch": 0.6453877814542813, + "grad_norm": 0.7599418759346008, + "learning_rate": 0.00013184605210587837, + "loss": 2.5814, + "step": 7997 + }, + { + "epoch": 0.6454684851908643, + "grad_norm": 0.6778777837753296, + "learning_rate": 0.00013183108673841642, + "loss": 2.6158, + "step": 7998 + }, + { + "epoch": 0.6455491889274474, + "grad_norm": 0.6860963106155396, + "learning_rate": 0.00013181612057764058, + "loss": 2.6207, + "step": 7999 + }, + { + "epoch": 0.6456298926640304, + "grad_norm": 0.6615182757377625, + "learning_rate": 0.00013180115362392382, + "loss": 2.5571, + "step": 8000 + }, + { + "epoch": 0.6456298926640304, + "eval_loss": 2.5128066539764404, + "eval_runtime": 754.3655, + "eval_samples_per_second": 3.473, + "eval_steps_per_second": 0.579, + "step": 8000 + }, + { + "epoch": 0.6457105964006133, + "grad_norm": 0.688169538974762, + "learning_rate": 0.0001317861858776392, + "loss": 2.6513, + "step": 8001 + }, + { + "epoch": 0.6457913001371963, + "grad_norm": 0.6726182103157043, + "learning_rate": 0.00013177121733915975, + "loss": 2.5909, + "step": 8002 + }, + { + "epoch": 0.6458720038737794, + "grad_norm": 0.7348085641860962, + "learning_rate": 0.00013175624800885853, + "loss": 2.577, + "step": 8003 + }, + { + "epoch": 0.6459527076103624, + "grad_norm": 0.677435040473938, + "learning_rate": 0.00013174127788710856, + "loss": 2.5056, + "step": 8004 + }, + { + "epoch": 0.6460334113469454, + "grad_norm": 0.6864951848983765, + "learning_rate": 0.000131726306974283, + "loss": 2.5733, + "step": 8005 + }, + { + "epoch": 0.6461141150835283, + "grad_norm": 0.7070075869560242, + "learning_rate": 0.0001317113352707549, + "loss": 2.5359, + "step": 8006 + }, + { + "epoch": 0.6461948188201113, + "grad_norm": 0.7065049409866333, + "learning_rate": 0.00013169636277689746, + "loss": 2.6261, + "step": 8007 + }, + { + "epoch": 0.6462755225566944, + "grad_norm": 0.6691577434539795, + "learning_rate": 0.0001316813894930838, + "loss": 2.6015, + "step": 8008 + }, + { + "epoch": 0.6463562262932774, + "grad_norm": 0.6754019260406494, + "learning_rate": 0.0001316664154196871, + "loss": 2.5954, + "step": 8009 + }, + { + "epoch": 0.6464369300298604, + "grad_norm": 0.6172776818275452, + "learning_rate": 0.00013165144055708055, + "loss": 2.5599, + "step": 8010 + }, + { + "epoch": 0.6465176337664433, + "grad_norm": 0.6778094172477722, + "learning_rate": 0.00013163646490563737, + "loss": 2.5407, + "step": 8011 + }, + { + "epoch": 0.6465983375030264, + "grad_norm": 0.7363924980163574, + "learning_rate": 0.00013162148846573076, + "loss": 2.6075, + "step": 8012 + }, + { + "epoch": 0.6466790412396094, + "grad_norm": 0.6662711501121521, + "learning_rate": 0.00013160651123773404, + "loss": 2.5611, + "step": 8013 + }, + { + "epoch": 0.6467597449761924, + "grad_norm": 0.699670135974884, + "learning_rate": 0.00013159153322202043, + "loss": 2.5612, + "step": 8014 + }, + { + "epoch": 0.6468404487127754, + "grad_norm": 0.7382899522781372, + "learning_rate": 0.0001315765544189632, + "loss": 2.6017, + "step": 8015 + }, + { + "epoch": 0.6469211524493584, + "grad_norm": 0.7624868154525757, + "learning_rate": 0.0001315615748289357, + "loss": 2.6174, + "step": 8016 + }, + { + "epoch": 0.6470018561859414, + "grad_norm": 0.704622745513916, + "learning_rate": 0.00013154659445231129, + "loss": 2.5367, + "step": 8017 + }, + { + "epoch": 0.6470825599225244, + "grad_norm": 0.7117413878440857, + "learning_rate": 0.00013153161328946324, + "loss": 2.5958, + "step": 8018 + }, + { + "epoch": 0.6471632636591074, + "grad_norm": 0.6825408339500427, + "learning_rate": 0.00013151663134076497, + "loss": 2.5118, + "step": 8019 + }, + { + "epoch": 0.6472439673956905, + "grad_norm": 0.6732384562492371, + "learning_rate": 0.00013150164860658986, + "loss": 2.6312, + "step": 8020 + }, + { + "epoch": 0.6473246711322734, + "grad_norm": 0.712812602519989, + "learning_rate": 0.00013148666508731134, + "loss": 2.576, + "step": 8021 + }, + { + "epoch": 0.6474053748688564, + "grad_norm": 0.8128857612609863, + "learning_rate": 0.0001314716807833028, + "loss": 2.5333, + "step": 8022 + }, + { + "epoch": 0.6474860786054394, + "grad_norm": 0.7817162275314331, + "learning_rate": 0.00013145669569493773, + "loss": 2.6835, + "step": 8023 + }, + { + "epoch": 0.6475667823420225, + "grad_norm": 0.7164301872253418, + "learning_rate": 0.00013144170982258956, + "loss": 2.5573, + "step": 8024 + }, + { + "epoch": 0.6476474860786054, + "grad_norm": 0.67625892162323, + "learning_rate": 0.00013142672316663177, + "loss": 2.5976, + "step": 8025 + }, + { + "epoch": 0.6477281898151884, + "grad_norm": 0.6919494867324829, + "learning_rate": 0.0001314117357274379, + "loss": 2.6179, + "step": 8026 + }, + { + "epoch": 0.6478088935517714, + "grad_norm": 0.6787464618682861, + "learning_rate": 0.0001313967475053815, + "loss": 2.5405, + "step": 8027 + }, + { + "epoch": 0.6478895972883545, + "grad_norm": 0.6305621862411499, + "learning_rate": 0.00013138175850083605, + "loss": 2.6016, + "step": 8028 + }, + { + "epoch": 0.6479703010249375, + "grad_norm": 0.7456182837486267, + "learning_rate": 0.00013136676871417516, + "loss": 2.6091, + "step": 8029 + }, + { + "epoch": 0.6480510047615204, + "grad_norm": 0.7047890424728394, + "learning_rate": 0.00013135177814577238, + "loss": 2.6108, + "step": 8030 + }, + { + "epoch": 0.6481317084981034, + "grad_norm": 0.7509389519691467, + "learning_rate": 0.00013133678679600133, + "loss": 2.6396, + "step": 8031 + }, + { + "epoch": 0.6482124122346865, + "grad_norm": 0.63836270570755, + "learning_rate": 0.00013132179466523566, + "loss": 2.5759, + "step": 8032 + }, + { + "epoch": 0.6482931159712695, + "grad_norm": 0.6994885206222534, + "learning_rate": 0.000131306801753849, + "loss": 2.61, + "step": 8033 + }, + { + "epoch": 0.6483738197078525, + "grad_norm": 0.6762083768844604, + "learning_rate": 0.00013129180806221497, + "loss": 2.5431, + "step": 8034 + }, + { + "epoch": 0.6484545234444354, + "grad_norm": 0.6890944242477417, + "learning_rate": 0.0001312768135907073, + "loss": 2.5922, + "step": 8035 + }, + { + "epoch": 0.6485352271810185, + "grad_norm": 0.7409473061561584, + "learning_rate": 0.0001312618183396997, + "loss": 2.6132, + "step": 8036 + }, + { + "epoch": 0.6486159309176015, + "grad_norm": 0.6660643815994263, + "learning_rate": 0.00013124682230956585, + "loss": 2.5816, + "step": 8037 + }, + { + "epoch": 0.6486966346541845, + "grad_norm": 0.714235246181488, + "learning_rate": 0.0001312318255006795, + "loss": 2.5613, + "step": 8038 + }, + { + "epoch": 0.6487773383907675, + "grad_norm": 0.6568472385406494, + "learning_rate": 0.00013121682791341442, + "loss": 2.6382, + "step": 8039 + }, + { + "epoch": 0.6488580421273505, + "grad_norm": 0.6874251961708069, + "learning_rate": 0.00013120182954814438, + "loss": 2.593, + "step": 8040 + }, + { + "epoch": 0.6489387458639335, + "grad_norm": 0.7620158791542053, + "learning_rate": 0.0001311868304052432, + "loss": 2.589, + "step": 8041 + }, + { + "epoch": 0.6490194496005165, + "grad_norm": 0.6755926609039307, + "learning_rate": 0.00013117183048508467, + "loss": 2.5876, + "step": 8042 + }, + { + "epoch": 0.6491001533370995, + "grad_norm": 0.6952808499336243, + "learning_rate": 0.00013115682978804264, + "loss": 2.5909, + "step": 8043 + }, + { + "epoch": 0.6491808570736826, + "grad_norm": 0.6599535346031189, + "learning_rate": 0.00013114182831449098, + "loss": 2.6031, + "step": 8044 + }, + { + "epoch": 0.6492615608102655, + "grad_norm": 0.7816598415374756, + "learning_rate": 0.00013112682606480355, + "loss": 2.5633, + "step": 8045 + }, + { + "epoch": 0.6493422645468485, + "grad_norm": 0.7188639640808105, + "learning_rate": 0.00013111182303935425, + "loss": 2.6292, + "step": 8046 + }, + { + "epoch": 0.6494229682834315, + "grad_norm": 0.7131505608558655, + "learning_rate": 0.00013109681923851698, + "loss": 2.5729, + "step": 8047 + }, + { + "epoch": 0.6495036720200146, + "grad_norm": 0.7466408014297485, + "learning_rate": 0.00013108181466266568, + "loss": 2.5742, + "step": 8048 + }, + { + "epoch": 0.6495843757565976, + "grad_norm": 0.6707943677902222, + "learning_rate": 0.00013106680931217437, + "loss": 2.5506, + "step": 8049 + }, + { + "epoch": 0.6496650794931805, + "grad_norm": 0.6913424730300903, + "learning_rate": 0.0001310518031874169, + "loss": 2.5639, + "step": 8050 + }, + { + "epoch": 0.6497457832297635, + "grad_norm": 0.8261755704879761, + "learning_rate": 0.00013103679628876733, + "loss": 2.601, + "step": 8051 + }, + { + "epoch": 0.6498264869663466, + "grad_norm": 0.7410566806793213, + "learning_rate": 0.0001310217886165997, + "loss": 2.5326, + "step": 8052 + }, + { + "epoch": 0.6499071907029296, + "grad_norm": 0.7032365202903748, + "learning_rate": 0.00013100678017128798, + "loss": 2.5907, + "step": 8053 + }, + { + "epoch": 0.6499878944395125, + "grad_norm": 0.7074568271636963, + "learning_rate": 0.00013099177095320626, + "loss": 2.6193, + "step": 8054 + }, + { + "epoch": 0.6500685981760955, + "grad_norm": 0.7754546999931335, + "learning_rate": 0.00013097676096272855, + "loss": 2.5832, + "step": 8055 + }, + { + "epoch": 0.6501493019126786, + "grad_norm": 0.7475717663764954, + "learning_rate": 0.00013096175020022903, + "loss": 2.6233, + "step": 8056 + }, + { + "epoch": 0.6502300056492616, + "grad_norm": 0.7863949537277222, + "learning_rate": 0.00013094673866608173, + "loss": 2.5745, + "step": 8057 + }, + { + "epoch": 0.6503107093858446, + "grad_norm": 0.69294673204422, + "learning_rate": 0.0001309317263606608, + "loss": 2.5982, + "step": 8058 + }, + { + "epoch": 0.6503914131224275, + "grad_norm": 0.7096135020256042, + "learning_rate": 0.00013091671328434046, + "loss": 2.5944, + "step": 8059 + }, + { + "epoch": 0.6504721168590105, + "grad_norm": 0.7001097202301025, + "learning_rate": 0.00013090169943749476, + "loss": 2.5435, + "step": 8060 + }, + { + "epoch": 0.6505528205955936, + "grad_norm": 0.7522539496421814, + "learning_rate": 0.00013088668482049792, + "loss": 2.5843, + "step": 8061 + }, + { + "epoch": 0.6506335243321766, + "grad_norm": 0.6675420999526978, + "learning_rate": 0.00013087166943372418, + "loss": 2.5623, + "step": 8062 + }, + { + "epoch": 0.6507142280687596, + "grad_norm": 0.7779181599617004, + "learning_rate": 0.00013085665327754772, + "loss": 2.6087, + "step": 8063 + }, + { + "epoch": 0.6507949318053425, + "grad_norm": 0.7385239005088806, + "learning_rate": 0.00013084163635234284, + "loss": 2.5725, + "step": 8064 + }, + { + "epoch": 0.6508756355419256, + "grad_norm": 0.6966612339019775, + "learning_rate": 0.00013082661865848375, + "loss": 2.5745, + "step": 8065 + }, + { + "epoch": 0.6509563392785086, + "grad_norm": 0.7098337411880493, + "learning_rate": 0.00013081160019634468, + "loss": 2.5461, + "step": 8066 + }, + { + "epoch": 0.6510370430150916, + "grad_norm": 0.6514503359794617, + "learning_rate": 0.00013079658096630002, + "loss": 2.5869, + "step": 8067 + }, + { + "epoch": 0.6511177467516746, + "grad_norm": 0.680422306060791, + "learning_rate": 0.0001307815609687241, + "loss": 2.6316, + "step": 8068 + }, + { + "epoch": 0.6511984504882576, + "grad_norm": 0.6892665028572083, + "learning_rate": 0.00013076654020399117, + "loss": 2.5862, + "step": 8069 + }, + { + "epoch": 0.6512791542248406, + "grad_norm": 0.7605568170547485, + "learning_rate": 0.00013075151867247568, + "loss": 2.5342, + "step": 8070 + }, + { + "epoch": 0.6513598579614236, + "grad_norm": 0.7571204900741577, + "learning_rate": 0.00013073649637455192, + "loss": 2.5762, + "step": 8071 + }, + { + "epoch": 0.6514405616980066, + "grad_norm": 0.6910812258720398, + "learning_rate": 0.00013072147331059431, + "loss": 2.6635, + "step": 8072 + }, + { + "epoch": 0.6515212654345897, + "grad_norm": 0.765559196472168, + "learning_rate": 0.00013070644948097733, + "loss": 2.5885, + "step": 8073 + }, + { + "epoch": 0.6516019691711726, + "grad_norm": 0.7533665299415588, + "learning_rate": 0.00013069142488607532, + "loss": 2.6545, + "step": 8074 + }, + { + "epoch": 0.6516826729077556, + "grad_norm": 0.685089647769928, + "learning_rate": 0.0001306763995262628, + "loss": 2.5955, + "step": 8075 + }, + { + "epoch": 0.6517633766443386, + "grad_norm": 0.7280653715133667, + "learning_rate": 0.00013066137340191422, + "loss": 2.5548, + "step": 8076 + }, + { + "epoch": 0.6518440803809217, + "grad_norm": 0.6881482601165771, + "learning_rate": 0.00013064634651340404, + "loss": 2.6143, + "step": 8077 + }, + { + "epoch": 0.6519247841175047, + "grad_norm": 0.6878265142440796, + "learning_rate": 0.0001306313188611068, + "loss": 2.5681, + "step": 8078 + }, + { + "epoch": 0.6520054878540876, + "grad_norm": 0.685238242149353, + "learning_rate": 0.00013061629044539702, + "loss": 2.5517, + "step": 8079 + }, + { + "epoch": 0.6520861915906706, + "grad_norm": 0.6689820885658264, + "learning_rate": 0.00013060126126664928, + "loss": 2.6201, + "step": 8080 + }, + { + "epoch": 0.6521668953272537, + "grad_norm": 0.7128999829292297, + "learning_rate": 0.00013058623132523807, + "loss": 2.5829, + "step": 8081 + }, + { + "epoch": 0.6522475990638367, + "grad_norm": 0.6835216879844666, + "learning_rate": 0.00013057120062153805, + "loss": 2.6312, + "step": 8082 + }, + { + "epoch": 0.6523283028004196, + "grad_norm": 0.7140012383460999, + "learning_rate": 0.00013055616915592382, + "loss": 2.6148, + "step": 8083 + }, + { + "epoch": 0.6524090065370026, + "grad_norm": 0.7378252148628235, + "learning_rate": 0.00013054113692876994, + "loss": 2.5805, + "step": 8084 + }, + { + "epoch": 0.6524897102735857, + "grad_norm": 0.7569258213043213, + "learning_rate": 0.0001305261039404511, + "loss": 2.6088, + "step": 8085 + }, + { + "epoch": 0.6525704140101687, + "grad_norm": 0.6909007430076599, + "learning_rate": 0.00013051107019134195, + "loss": 2.5285, + "step": 8086 + }, + { + "epoch": 0.6526511177467517, + "grad_norm": 0.6785587072372437, + "learning_rate": 0.0001304960356818172, + "loss": 2.5527, + "step": 8087 + }, + { + "epoch": 0.6527318214833346, + "grad_norm": 0.7058801054954529, + "learning_rate": 0.0001304810004122515, + "loss": 2.6789, + "step": 8088 + }, + { + "epoch": 0.6528125252199177, + "grad_norm": 0.6920512318611145, + "learning_rate": 0.0001304659643830196, + "loss": 2.5748, + "step": 8089 + }, + { + "epoch": 0.6528932289565007, + "grad_norm": 0.6829244494438171, + "learning_rate": 0.00013045092759449625, + "loss": 2.5389, + "step": 8090 + }, + { + "epoch": 0.6529739326930837, + "grad_norm": 0.6942421793937683, + "learning_rate": 0.00013043589004705614, + "loss": 2.5851, + "step": 8091 + }, + { + "epoch": 0.6530546364296667, + "grad_norm": 0.6473072171211243, + "learning_rate": 0.0001304208517410741, + "loss": 2.56, + "step": 8092 + }, + { + "epoch": 0.6531353401662497, + "grad_norm": 0.6692056655883789, + "learning_rate": 0.00013040581267692494, + "loss": 2.5977, + "step": 8093 + }, + { + "epoch": 0.6532160439028327, + "grad_norm": 0.6918915510177612, + "learning_rate": 0.00013039077285498344, + "loss": 2.551, + "step": 8094 + }, + { + "epoch": 0.6532967476394157, + "grad_norm": 0.7432852387428284, + "learning_rate": 0.00013037573227562443, + "loss": 2.5537, + "step": 8095 + }, + { + "epoch": 0.6533774513759987, + "grad_norm": 0.6737081408500671, + "learning_rate": 0.0001303606909392228, + "loss": 2.5947, + "step": 8096 + }, + { + "epoch": 0.6534581551125818, + "grad_norm": 0.6810599565505981, + "learning_rate": 0.0001303456488461533, + "loss": 2.5704, + "step": 8097 + }, + { + "epoch": 0.6535388588491647, + "grad_norm": 0.675240159034729, + "learning_rate": 0.00013033060599679098, + "loss": 2.591, + "step": 8098 + }, + { + "epoch": 0.6536195625857477, + "grad_norm": 0.6888695359230042, + "learning_rate": 0.00013031556239151066, + "loss": 2.5403, + "step": 8099 + }, + { + "epoch": 0.6537002663223307, + "grad_norm": 0.7154796719551086, + "learning_rate": 0.00013030051803068727, + "loss": 2.5654, + "step": 8100 + }, + { + "epoch": 0.6537809700589138, + "grad_norm": 0.6655243635177612, + "learning_rate": 0.0001302854729146958, + "loss": 2.5867, + "step": 8101 + }, + { + "epoch": 0.6538616737954968, + "grad_norm": 0.7070788145065308, + "learning_rate": 0.00013027042704391115, + "loss": 2.5593, + "step": 8102 + }, + { + "epoch": 0.6539423775320797, + "grad_norm": 0.7071834206581116, + "learning_rate": 0.0001302553804187083, + "loss": 2.536, + "step": 8103 + }, + { + "epoch": 0.6540230812686627, + "grad_norm": 0.7086542248725891, + "learning_rate": 0.00013024033303946233, + "loss": 2.5644, + "step": 8104 + }, + { + "epoch": 0.6541037850052458, + "grad_norm": 0.6714556813240051, + "learning_rate": 0.00013022528490654818, + "loss": 2.5167, + "step": 8105 + }, + { + "epoch": 0.6541844887418288, + "grad_norm": 0.6905114054679871, + "learning_rate": 0.00013021023602034095, + "loss": 2.5227, + "step": 8106 + }, + { + "epoch": 0.6542651924784118, + "grad_norm": 0.7050586342811584, + "learning_rate": 0.00013019518638121563, + "loss": 2.5725, + "step": 8107 + }, + { + "epoch": 0.6543458962149947, + "grad_norm": 0.6940500736236572, + "learning_rate": 0.00013018013598954737, + "loss": 2.5912, + "step": 8108 + }, + { + "epoch": 0.6544265999515777, + "grad_norm": 0.7136965990066528, + "learning_rate": 0.00013016508484571122, + "loss": 2.6101, + "step": 8109 + }, + { + "epoch": 0.6545073036881608, + "grad_norm": 0.7205774188041687, + "learning_rate": 0.0001301500329500823, + "loss": 2.5869, + "step": 8110 + }, + { + "epoch": 0.6545880074247438, + "grad_norm": 0.6831154823303223, + "learning_rate": 0.00013013498030303575, + "loss": 2.5309, + "step": 8111 + }, + { + "epoch": 0.6546687111613267, + "grad_norm": 0.6778538823127747, + "learning_rate": 0.0001301199269049467, + "loss": 2.6297, + "step": 8112 + }, + { + "epoch": 0.6547494148979097, + "grad_norm": 0.705055832862854, + "learning_rate": 0.00013010487275619034, + "loss": 2.6188, + "step": 8113 + }, + { + "epoch": 0.6548301186344928, + "grad_norm": 0.6927980780601501, + "learning_rate": 0.00013008981785714188, + "loss": 2.5744, + "step": 8114 + }, + { + "epoch": 0.6549108223710758, + "grad_norm": 0.7070884108543396, + "learning_rate": 0.0001300747622081765, + "loss": 2.618, + "step": 8115 + }, + { + "epoch": 0.6549915261076588, + "grad_norm": 0.723479688167572, + "learning_rate": 0.0001300597058096694, + "loss": 2.5928, + "step": 8116 + }, + { + "epoch": 0.6550722298442417, + "grad_norm": 0.6689562201499939, + "learning_rate": 0.00013004464866199587, + "loss": 2.5592, + "step": 8117 + }, + { + "epoch": 0.6551529335808248, + "grad_norm": 0.6685079336166382, + "learning_rate": 0.00013002959076553115, + "loss": 2.558, + "step": 8118 + }, + { + "epoch": 0.6552336373174078, + "grad_norm": 0.678105890750885, + "learning_rate": 0.00013001453212065057, + "loss": 2.6176, + "step": 8119 + }, + { + "epoch": 0.6553143410539908, + "grad_norm": 0.7355597019195557, + "learning_rate": 0.00012999947272772933, + "loss": 2.6293, + "step": 8120 + }, + { + "epoch": 0.6553950447905738, + "grad_norm": 0.735862672328949, + "learning_rate": 0.00012998441258714284, + "loss": 2.635, + "step": 8121 + }, + { + "epoch": 0.6554757485271568, + "grad_norm": 0.6766025424003601, + "learning_rate": 0.0001299693516992664, + "loss": 2.5829, + "step": 8122 + }, + { + "epoch": 0.6555564522637398, + "grad_norm": 0.6701885461807251, + "learning_rate": 0.00012995429006447542, + "loss": 2.5996, + "step": 8123 + }, + { + "epoch": 0.6556371560003228, + "grad_norm": 0.6814082264900208, + "learning_rate": 0.00012993922768314518, + "loss": 2.5906, + "step": 8124 + }, + { + "epoch": 0.6557178597369058, + "grad_norm": 0.7104958295822144, + "learning_rate": 0.00012992416455565113, + "loss": 2.6708, + "step": 8125 + }, + { + "epoch": 0.6557985634734889, + "grad_norm": 0.6451221108436584, + "learning_rate": 0.0001299091006823687, + "loss": 2.5512, + "step": 8126 + }, + { + "epoch": 0.6558792672100718, + "grad_norm": 0.6736068725585938, + "learning_rate": 0.0001298940360636733, + "loss": 2.5839, + "step": 8127 + }, + { + "epoch": 0.6559599709466548, + "grad_norm": 0.6873149871826172, + "learning_rate": 0.00012987897069994031, + "loss": 2.5804, + "step": 8128 + }, + { + "epoch": 0.6560406746832378, + "grad_norm": 0.6937728524208069, + "learning_rate": 0.00012986390459154533, + "loss": 2.5648, + "step": 8129 + }, + { + "epoch": 0.6561213784198209, + "grad_norm": 0.7109464406967163, + "learning_rate": 0.00012984883773886377, + "loss": 2.6132, + "step": 8130 + }, + { + "epoch": 0.6562020821564039, + "grad_norm": 0.7134159803390503, + "learning_rate": 0.00012983377014227115, + "loss": 2.6029, + "step": 8131 + }, + { + "epoch": 0.6562827858929868, + "grad_norm": 0.6788110733032227, + "learning_rate": 0.000129818701802143, + "loss": 2.6344, + "step": 8132 + }, + { + "epoch": 0.6563634896295698, + "grad_norm": 0.6798231601715088, + "learning_rate": 0.00012980363271885483, + "loss": 2.5758, + "step": 8133 + }, + { + "epoch": 0.6564441933661529, + "grad_norm": 0.6586930155754089, + "learning_rate": 0.00012978856289278226, + "loss": 2.5918, + "step": 8134 + }, + { + "epoch": 0.6565248971027359, + "grad_norm": 0.6614218950271606, + "learning_rate": 0.0001297734923243008, + "loss": 2.5777, + "step": 8135 + }, + { + "epoch": 0.6566056008393188, + "grad_norm": 0.6874340176582336, + "learning_rate": 0.0001297584210137861, + "loss": 2.5528, + "step": 8136 + }, + { + "epoch": 0.6566863045759018, + "grad_norm": 0.6972174048423767, + "learning_rate": 0.00012974334896161376, + "loss": 2.6551, + "step": 8137 + }, + { + "epoch": 0.6567670083124849, + "grad_norm": 0.7414106726646423, + "learning_rate": 0.0001297282761681594, + "loss": 2.5719, + "step": 8138 + }, + { + "epoch": 0.6568477120490679, + "grad_norm": 0.6678279042243958, + "learning_rate": 0.00012971320263379868, + "loss": 2.555, + "step": 8139 + }, + { + "epoch": 0.6569284157856509, + "grad_norm": 0.692149817943573, + "learning_rate": 0.0001296981283589073, + "loss": 2.5991, + "step": 8140 + }, + { + "epoch": 0.6570091195222338, + "grad_norm": 0.6937025189399719, + "learning_rate": 0.00012968305334386094, + "loss": 2.5635, + "step": 8141 + }, + { + "epoch": 0.6570898232588169, + "grad_norm": 0.6250358819961548, + "learning_rate": 0.00012966797758903528, + "loss": 2.55, + "step": 8142 + }, + { + "epoch": 0.6571705269953999, + "grad_norm": 0.7388221025466919, + "learning_rate": 0.00012965290109480607, + "loss": 2.5307, + "step": 8143 + }, + { + "epoch": 0.6572512307319829, + "grad_norm": 0.7165891528129578, + "learning_rate": 0.00012963782386154904, + "loss": 2.5482, + "step": 8144 + }, + { + "epoch": 0.6573319344685659, + "grad_norm": 0.7605282068252563, + "learning_rate": 0.00012962274588963996, + "loss": 2.5839, + "step": 8145 + }, + { + "epoch": 0.657412638205149, + "grad_norm": 0.7259613275527954, + "learning_rate": 0.00012960766717945465, + "loss": 2.5612, + "step": 8146 + }, + { + "epoch": 0.6574933419417319, + "grad_norm": 0.7301480770111084, + "learning_rate": 0.00012959258773136885, + "loss": 2.5365, + "step": 8147 + }, + { + "epoch": 0.6575740456783149, + "grad_norm": 0.6800966262817383, + "learning_rate": 0.0001295775075457584, + "loss": 2.5663, + "step": 8148 + }, + { + "epoch": 0.6576547494148979, + "grad_norm": 0.6968960165977478, + "learning_rate": 0.0001295624266229992, + "loss": 2.5626, + "step": 8149 + }, + { + "epoch": 0.657735453151481, + "grad_norm": 0.9044952392578125, + "learning_rate": 0.00012954734496346704, + "loss": 2.6479, + "step": 8150 + }, + { + "epoch": 0.6578161568880639, + "grad_norm": 0.6955156922340393, + "learning_rate": 0.00012953226256753777, + "loss": 2.5879, + "step": 8151 + }, + { + "epoch": 0.6578968606246469, + "grad_norm": 0.6535033583641052, + "learning_rate": 0.00012951717943558735, + "loss": 2.5372, + "step": 8152 + }, + { + "epoch": 0.6579775643612299, + "grad_norm": 0.720730721950531, + "learning_rate": 0.0001295020955679916, + "loss": 2.5813, + "step": 8153 + }, + { + "epoch": 0.658058268097813, + "grad_norm": 0.7190384268760681, + "learning_rate": 0.00012948701096512655, + "loss": 2.5923, + "step": 8154 + }, + { + "epoch": 0.658138971834396, + "grad_norm": 0.6624464988708496, + "learning_rate": 0.0001294719256273681, + "loss": 2.5548, + "step": 8155 + }, + { + "epoch": 0.6582196755709789, + "grad_norm": 0.7839831709861755, + "learning_rate": 0.00012945683955509224, + "loss": 2.531, + "step": 8156 + }, + { + "epoch": 0.6583003793075619, + "grad_norm": 0.694970965385437, + "learning_rate": 0.00012944175274867497, + "loss": 2.4693, + "step": 8157 + }, + { + "epoch": 0.658381083044145, + "grad_norm": 0.7409366965293884, + "learning_rate": 0.0001294266652084922, + "loss": 2.5706, + "step": 8158 + }, + { + "epoch": 0.658461786780728, + "grad_norm": 0.7502163052558899, + "learning_rate": 0.00012941157693492002, + "loss": 2.6137, + "step": 8159 + }, + { + "epoch": 0.658542490517311, + "grad_norm": 0.6627129912376404, + "learning_rate": 0.00012939648792833447, + "loss": 2.5781, + "step": 8160 + }, + { + "epoch": 0.6586231942538939, + "grad_norm": 0.6775660514831543, + "learning_rate": 0.00012938139818911157, + "loss": 2.5441, + "step": 8161 + }, + { + "epoch": 0.6587038979904769, + "grad_norm": 0.7150553464889526, + "learning_rate": 0.00012936630771762748, + "loss": 2.5763, + "step": 8162 + }, + { + "epoch": 0.65878460172706, + "grad_norm": 0.7461466193199158, + "learning_rate": 0.0001293512165142582, + "loss": 2.54, + "step": 8163 + }, + { + "epoch": 0.658865305463643, + "grad_norm": 0.7635199427604675, + "learning_rate": 0.00012933612457937988, + "loss": 2.5763, + "step": 8164 + }, + { + "epoch": 0.658946009200226, + "grad_norm": 0.7360543608665466, + "learning_rate": 0.00012932103191336865, + "loss": 2.5968, + "step": 8165 + }, + { + "epoch": 0.6590267129368089, + "grad_norm": 0.6482167840003967, + "learning_rate": 0.0001293059385166007, + "loss": 2.5704, + "step": 8166 + }, + { + "epoch": 0.659107416673392, + "grad_norm": 0.7024737596511841, + "learning_rate": 0.00012929084438945208, + "loss": 2.6221, + "step": 8167 + }, + { + "epoch": 0.659188120409975, + "grad_norm": 0.7192068696022034, + "learning_rate": 0.0001292757495322991, + "loss": 2.5574, + "step": 8168 + }, + { + "epoch": 0.659268824146558, + "grad_norm": 0.6900508403778076, + "learning_rate": 0.0001292606539455179, + "loss": 2.5969, + "step": 8169 + }, + { + "epoch": 0.6593495278831409, + "grad_norm": 0.7522475719451904, + "learning_rate": 0.00012924555762948474, + "loss": 2.592, + "step": 8170 + }, + { + "epoch": 0.659430231619724, + "grad_norm": 0.6610947251319885, + "learning_rate": 0.00012923046058457583, + "loss": 2.5404, + "step": 8171 + }, + { + "epoch": 0.659510935356307, + "grad_norm": 0.667628288269043, + "learning_rate": 0.00012921536281116738, + "loss": 2.5551, + "step": 8172 + }, + { + "epoch": 0.65959163909289, + "grad_norm": 0.7119980454444885, + "learning_rate": 0.00012920026430963578, + "loss": 2.6002, + "step": 8173 + }, + { + "epoch": 0.659672342829473, + "grad_norm": 0.712166428565979, + "learning_rate": 0.00012918516508035724, + "loss": 2.626, + "step": 8174 + }, + { + "epoch": 0.659753046566056, + "grad_norm": 0.6993290185928345, + "learning_rate": 0.0001291700651237081, + "loss": 2.6311, + "step": 8175 + }, + { + "epoch": 0.659833750302639, + "grad_norm": 0.6889405250549316, + "learning_rate": 0.0001291549644400647, + "loss": 2.6483, + "step": 8176 + }, + { + "epoch": 0.659914454039222, + "grad_norm": 0.7120937705039978, + "learning_rate": 0.00012913986302980334, + "loss": 2.5489, + "step": 8177 + }, + { + "epoch": 0.659995157775805, + "grad_norm": 0.7112947106361389, + "learning_rate": 0.00012912476089330043, + "loss": 2.6393, + "step": 8178 + }, + { + "epoch": 0.6600758615123881, + "grad_norm": 0.710342526435852, + "learning_rate": 0.00012910965803093237, + "loss": 2.5897, + "step": 8179 + }, + { + "epoch": 0.660156565248971, + "grad_norm": 0.6506931185722351, + "learning_rate": 0.0001290945544430755, + "loss": 2.6429, + "step": 8180 + }, + { + "epoch": 0.660237268985554, + "grad_norm": 0.7147021293640137, + "learning_rate": 0.00012907945013010633, + "loss": 2.5521, + "step": 8181 + }, + { + "epoch": 0.660317972722137, + "grad_norm": 0.6802387833595276, + "learning_rate": 0.0001290643450924012, + "loss": 2.581, + "step": 8182 + }, + { + "epoch": 0.6603986764587201, + "grad_norm": 0.7599670886993408, + "learning_rate": 0.00012904923933033664, + "loss": 2.5532, + "step": 8183 + }, + { + "epoch": 0.6604793801953031, + "grad_norm": 0.7105657458305359, + "learning_rate": 0.0001290341328442891, + "loss": 2.5744, + "step": 8184 + }, + { + "epoch": 0.660560083931886, + "grad_norm": 0.6786425113677979, + "learning_rate": 0.00012901902563463506, + "loss": 2.5326, + "step": 8185 + }, + { + "epoch": 0.660640787668469, + "grad_norm": 0.7305583357810974, + "learning_rate": 0.00012900391770175106, + "loss": 2.6103, + "step": 8186 + }, + { + "epoch": 0.6607214914050521, + "grad_norm": 0.6578992605209351, + "learning_rate": 0.00012898880904601363, + "loss": 2.5833, + "step": 8187 + }, + { + "epoch": 0.6608021951416351, + "grad_norm": 0.6498856544494629, + "learning_rate": 0.00012897369966779926, + "loss": 2.6333, + "step": 8188 + }, + { + "epoch": 0.660882898878218, + "grad_norm": 0.7065569162368774, + "learning_rate": 0.00012895858956748458, + "loss": 2.5326, + "step": 8189 + }, + { + "epoch": 0.660963602614801, + "grad_norm": 0.7676446437835693, + "learning_rate": 0.00012894347874544613, + "loss": 2.6233, + "step": 8190 + }, + { + "epoch": 0.6610443063513841, + "grad_norm": 0.6794395446777344, + "learning_rate": 0.00012892836720206056, + "loss": 2.5426, + "step": 8191 + }, + { + "epoch": 0.6611250100879671, + "grad_norm": 0.7448986768722534, + "learning_rate": 0.00012891325493770444, + "loss": 2.5832, + "step": 8192 + }, + { + "epoch": 0.6612057138245501, + "grad_norm": 0.7789760231971741, + "learning_rate": 0.0001288981419527544, + "loss": 2.6393, + "step": 8193 + }, + { + "epoch": 0.661286417561133, + "grad_norm": 0.7425827980041504, + "learning_rate": 0.00012888302824758718, + "loss": 2.6159, + "step": 8194 + }, + { + "epoch": 0.6613671212977161, + "grad_norm": 0.6677481532096863, + "learning_rate": 0.00012886791382257936, + "loss": 2.5399, + "step": 8195 + }, + { + "epoch": 0.6614478250342991, + "grad_norm": 0.698397159576416, + "learning_rate": 0.0001288527986781077, + "loss": 2.5443, + "step": 8196 + }, + { + "epoch": 0.6615285287708821, + "grad_norm": 0.6862680315971375, + "learning_rate": 0.00012883768281454885, + "loss": 2.5843, + "step": 8197 + }, + { + "epoch": 0.6616092325074651, + "grad_norm": 0.7421948313713074, + "learning_rate": 0.00012882256623227955, + "loss": 2.5885, + "step": 8198 + }, + { + "epoch": 0.6616899362440481, + "grad_norm": 0.7453073859214783, + "learning_rate": 0.00012880744893167654, + "loss": 2.5821, + "step": 8199 + }, + { + "epoch": 0.6617706399806311, + "grad_norm": 0.668218195438385, + "learning_rate": 0.00012879233091311667, + "loss": 2.5941, + "step": 8200 + }, + { + "epoch": 0.6618513437172141, + "grad_norm": 0.6864587664604187, + "learning_rate": 0.00012877721217697657, + "loss": 2.5321, + "step": 8201 + }, + { + "epoch": 0.6619320474537971, + "grad_norm": 0.6521022319793701, + "learning_rate": 0.00012876209272363317, + "loss": 2.5945, + "step": 8202 + }, + { + "epoch": 0.6620127511903802, + "grad_norm": 0.7564631104469299, + "learning_rate": 0.00012874697255346325, + "loss": 2.5901, + "step": 8203 + }, + { + "epoch": 0.6620934549269631, + "grad_norm": 0.731991171836853, + "learning_rate": 0.00012873185166684356, + "loss": 2.649, + "step": 8204 + }, + { + "epoch": 0.6621741586635461, + "grad_norm": 0.6804815530776978, + "learning_rate": 0.00012871673006415108, + "loss": 2.5417, + "step": 8205 + }, + { + "epoch": 0.6622548624001291, + "grad_norm": 0.6862792372703552, + "learning_rate": 0.0001287016077457626, + "loss": 2.6118, + "step": 8206 + }, + { + "epoch": 0.6623355661367122, + "grad_norm": 0.7013735175132751, + "learning_rate": 0.00012868648471205503, + "loss": 2.6296, + "step": 8207 + }, + { + "epoch": 0.6624162698732952, + "grad_norm": 0.7284584045410156, + "learning_rate": 0.00012867136096340529, + "loss": 2.6547, + "step": 8208 + }, + { + "epoch": 0.6624969736098781, + "grad_norm": 0.714546799659729, + "learning_rate": 0.00012865623650019025, + "loss": 2.5955, + "step": 8209 + }, + { + "epoch": 0.6625776773464611, + "grad_norm": 0.7645453214645386, + "learning_rate": 0.0001286411113227869, + "loss": 2.6132, + "step": 8210 + }, + { + "epoch": 0.6626583810830441, + "grad_norm": 0.6615093946456909, + "learning_rate": 0.0001286259854315722, + "loss": 2.5701, + "step": 8211 + }, + { + "epoch": 0.6627390848196272, + "grad_norm": 0.6565523147583008, + "learning_rate": 0.0001286108588269231, + "loss": 2.57, + "step": 8212 + }, + { + "epoch": 0.6628197885562102, + "grad_norm": 0.7173478007316589, + "learning_rate": 0.00012859573150921666, + "loss": 2.589, + "step": 8213 + }, + { + "epoch": 0.6629004922927931, + "grad_norm": 0.7069580554962158, + "learning_rate": 0.00012858060347882975, + "loss": 2.6146, + "step": 8214 + }, + { + "epoch": 0.6629811960293761, + "grad_norm": 0.7004678249359131, + "learning_rate": 0.00012856547473613953, + "loss": 2.5735, + "step": 8215 + }, + { + "epoch": 0.6630618997659592, + "grad_norm": 0.6589130163192749, + "learning_rate": 0.00012855034528152305, + "loss": 2.5731, + "step": 8216 + }, + { + "epoch": 0.6631426035025422, + "grad_norm": 0.7223117351531982, + "learning_rate": 0.0001285352151153573, + "loss": 2.5262, + "step": 8217 + }, + { + "epoch": 0.6632233072391251, + "grad_norm": 0.7045131325721741, + "learning_rate": 0.0001285200842380194, + "loss": 2.5789, + "step": 8218 + }, + { + "epoch": 0.6633040109757081, + "grad_norm": 0.7002174854278564, + "learning_rate": 0.00012850495264988645, + "loss": 2.6386, + "step": 8219 + }, + { + "epoch": 0.6633847147122912, + "grad_norm": 0.6844584941864014, + "learning_rate": 0.00012848982035133555, + "loss": 2.5394, + "step": 8220 + }, + { + "epoch": 0.6634654184488742, + "grad_norm": 0.7154871821403503, + "learning_rate": 0.00012847468734274387, + "loss": 2.5927, + "step": 8221 + }, + { + "epoch": 0.6635461221854572, + "grad_norm": 0.6856776475906372, + "learning_rate": 0.00012845955362448855, + "loss": 2.5694, + "step": 8222 + }, + { + "epoch": 0.6636268259220401, + "grad_norm": 0.7069089412689209, + "learning_rate": 0.00012844441919694676, + "loss": 2.5856, + "step": 8223 + }, + { + "epoch": 0.6637075296586232, + "grad_norm": 0.7084143161773682, + "learning_rate": 0.00012842928406049567, + "loss": 2.6301, + "step": 8224 + }, + { + "epoch": 0.6637882333952062, + "grad_norm": 0.6790862679481506, + "learning_rate": 0.00012841414821551252, + "loss": 2.5586, + "step": 8225 + }, + { + "epoch": 0.6638689371317892, + "grad_norm": 0.6537249684333801, + "learning_rate": 0.00012839901166237453, + "loss": 2.5652, + "step": 8226 + }, + { + "epoch": 0.6639496408683722, + "grad_norm": 0.6670125126838684, + "learning_rate": 0.00012838387440145893, + "loss": 2.5438, + "step": 8227 + }, + { + "epoch": 0.6640303446049552, + "grad_norm": 0.7202955484390259, + "learning_rate": 0.00012836873643314297, + "loss": 2.5632, + "step": 8228 + }, + { + "epoch": 0.6641110483415382, + "grad_norm": 0.6844765543937683, + "learning_rate": 0.00012835359775780394, + "loss": 2.5595, + "step": 8229 + }, + { + "epoch": 0.6641917520781212, + "grad_norm": 0.6557698249816895, + "learning_rate": 0.00012833845837581916, + "loss": 2.5998, + "step": 8230 + }, + { + "epoch": 0.6642724558147042, + "grad_norm": 0.6741784811019897, + "learning_rate": 0.0001283233182875659, + "loss": 2.5591, + "step": 8231 + }, + { + "epoch": 0.6643531595512873, + "grad_norm": 0.6926484704017639, + "learning_rate": 0.00012830817749342154, + "loss": 2.5557, + "step": 8232 + }, + { + "epoch": 0.6644338632878702, + "grad_norm": 0.6866984367370605, + "learning_rate": 0.00012829303599376336, + "loss": 2.5646, + "step": 8233 + }, + { + "epoch": 0.6645145670244532, + "grad_norm": 0.6772707104682922, + "learning_rate": 0.0001282778937889688, + "loss": 2.6028, + "step": 8234 + }, + { + "epoch": 0.6645952707610362, + "grad_norm": 0.693236768245697, + "learning_rate": 0.00012826275087941518, + "loss": 2.611, + "step": 8235 + }, + { + "epoch": 0.6646759744976193, + "grad_norm": 0.7181996703147888, + "learning_rate": 0.00012824760726547993, + "loss": 2.6081, + "step": 8236 + }, + { + "epoch": 0.6647566782342023, + "grad_norm": 0.6845484375953674, + "learning_rate": 0.00012823246294754048, + "loss": 2.5544, + "step": 8237 + }, + { + "epoch": 0.6648373819707852, + "grad_norm": 0.7106444239616394, + "learning_rate": 0.00012821731792597425, + "loss": 2.552, + "step": 8238 + }, + { + "epoch": 0.6649180857073682, + "grad_norm": 0.6930601000785828, + "learning_rate": 0.0001282021722011587, + "loss": 2.5401, + "step": 8239 + }, + { + "epoch": 0.6649987894439513, + "grad_norm": 0.6658228039741516, + "learning_rate": 0.00012818702577347129, + "loss": 2.6287, + "step": 8240 + }, + { + "epoch": 0.6650794931805343, + "grad_norm": 0.6919803619384766, + "learning_rate": 0.0001281718786432895, + "loss": 2.6142, + "step": 8241 + }, + { + "epoch": 0.6651601969171173, + "grad_norm": 0.6675698757171631, + "learning_rate": 0.00012815673081099086, + "loss": 2.5325, + "step": 8242 + }, + { + "epoch": 0.6652409006537002, + "grad_norm": 0.6669798493385315, + "learning_rate": 0.0001281415822769529, + "loss": 2.5355, + "step": 8243 + }, + { + "epoch": 0.6653216043902833, + "grad_norm": 0.6449857950210571, + "learning_rate": 0.00012812643304155316, + "loss": 2.5968, + "step": 8244 + }, + { + "epoch": 0.6654023081268663, + "grad_norm": 0.6972789168357849, + "learning_rate": 0.00012811128310516914, + "loss": 2.6133, + "step": 8245 + }, + { + "epoch": 0.6654830118634493, + "grad_norm": 0.7179878354072571, + "learning_rate": 0.0001280961324681785, + "loss": 2.5793, + "step": 8246 + }, + { + "epoch": 0.6655637156000322, + "grad_norm": 0.6736378073692322, + "learning_rate": 0.0001280809811309588, + "loss": 2.5543, + "step": 8247 + }, + { + "epoch": 0.6656444193366153, + "grad_norm": 0.7376420497894287, + "learning_rate": 0.00012806582909388763, + "loss": 2.5501, + "step": 8248 + }, + { + "epoch": 0.6657251230731983, + "grad_norm": 0.7163094878196716, + "learning_rate": 0.00012805067635734263, + "loss": 2.5538, + "step": 8249 + }, + { + "epoch": 0.6658058268097813, + "grad_norm": 0.7699353694915771, + "learning_rate": 0.00012803552292170144, + "loss": 2.5925, + "step": 8250 + }, + { + "epoch": 0.6658865305463643, + "grad_norm": 0.6504995822906494, + "learning_rate": 0.00012802036878734177, + "loss": 2.5944, + "step": 8251 + }, + { + "epoch": 0.6659672342829474, + "grad_norm": 0.7150379419326782, + "learning_rate": 0.0001280052139546412, + "loss": 2.5959, + "step": 8252 + }, + { + "epoch": 0.6660479380195303, + "grad_norm": 0.7562555074691772, + "learning_rate": 0.00012799005842397757, + "loss": 2.6041, + "step": 8253 + }, + { + "epoch": 0.6661286417561133, + "grad_norm": 0.7242838740348816, + "learning_rate": 0.00012797490219572846, + "loss": 2.6152, + "step": 8254 + }, + { + "epoch": 0.6662093454926963, + "grad_norm": 0.7062848210334778, + "learning_rate": 0.00012795974527027168, + "loss": 2.596, + "step": 8255 + }, + { + "epoch": 0.6662900492292794, + "grad_norm": 0.8179726004600525, + "learning_rate": 0.00012794458764798497, + "loss": 2.5792, + "step": 8256 + }, + { + "epoch": 0.6663707529658623, + "grad_norm": 0.692166268825531, + "learning_rate": 0.00012792942932924608, + "loss": 2.6025, + "step": 8257 + }, + { + "epoch": 0.6664514567024453, + "grad_norm": 0.6540334224700928, + "learning_rate": 0.0001279142703144328, + "loss": 2.5119, + "step": 8258 + }, + { + "epoch": 0.6665321604390283, + "grad_norm": 0.7087461352348328, + "learning_rate": 0.00012789911060392294, + "loss": 2.5808, + "step": 8259 + }, + { + "epoch": 0.6666128641756114, + "grad_norm": 0.6897622346878052, + "learning_rate": 0.0001278839501980943, + "loss": 2.5811, + "step": 8260 + }, + { + "epoch": 0.6666935679121944, + "grad_norm": 0.6653634905815125, + "learning_rate": 0.00012786878909732473, + "loss": 2.5498, + "step": 8261 + }, + { + "epoch": 0.6667742716487773, + "grad_norm": 0.6541483402252197, + "learning_rate": 0.0001278536273019921, + "loss": 2.605, + "step": 8262 + }, + { + "epoch": 0.6668549753853603, + "grad_norm": 0.6748146414756775, + "learning_rate": 0.00012783846481247428, + "loss": 2.5571, + "step": 8263 + }, + { + "epoch": 0.6669356791219433, + "grad_norm": 0.7258282899856567, + "learning_rate": 0.00012782330162914915, + "loss": 2.5562, + "step": 8264 + }, + { + "epoch": 0.6670163828585264, + "grad_norm": 0.6963080167770386, + "learning_rate": 0.00012780813775239457, + "loss": 2.6467, + "step": 8265 + }, + { + "epoch": 0.6670970865951094, + "grad_norm": 0.6627718806266785, + "learning_rate": 0.00012779297318258855, + "loss": 2.5369, + "step": 8266 + }, + { + "epoch": 0.6671777903316923, + "grad_norm": 0.7026168704032898, + "learning_rate": 0.00012777780792010897, + "loss": 2.5639, + "step": 8267 + }, + { + "epoch": 0.6672584940682753, + "grad_norm": 0.6969077587127686, + "learning_rate": 0.0001277626419653338, + "loss": 2.517, + "step": 8268 + }, + { + "epoch": 0.6673391978048584, + "grad_norm": 0.6918485760688782, + "learning_rate": 0.00012774747531864102, + "loss": 2.6388, + "step": 8269 + }, + { + "epoch": 0.6674199015414414, + "grad_norm": 0.6661256551742554, + "learning_rate": 0.00012773230798040862, + "loss": 2.5477, + "step": 8270 + }, + { + "epoch": 0.6675006052780244, + "grad_norm": 0.6778402328491211, + "learning_rate": 0.0001277171399510146, + "loss": 2.6032, + "step": 8271 + }, + { + "epoch": 0.6675813090146073, + "grad_norm": 0.6464864611625671, + "learning_rate": 0.00012770197123083702, + "loss": 2.5396, + "step": 8272 + }, + { + "epoch": 0.6676620127511904, + "grad_norm": 0.7154508233070374, + "learning_rate": 0.0001276868018202539, + "loss": 2.6163, + "step": 8273 + }, + { + "epoch": 0.6677427164877734, + "grad_norm": 0.6849631071090698, + "learning_rate": 0.0001276716317196433, + "loss": 2.549, + "step": 8274 + }, + { + "epoch": 0.6678234202243564, + "grad_norm": 0.6696017980575562, + "learning_rate": 0.00012765646092938334, + "loss": 2.5046, + "step": 8275 + }, + { + "epoch": 0.6679041239609393, + "grad_norm": 0.668153703212738, + "learning_rate": 0.00012764128944985203, + "loss": 2.5422, + "step": 8276 + }, + { + "epoch": 0.6679848276975224, + "grad_norm": 0.6600282192230225, + "learning_rate": 0.00012762611728142756, + "loss": 2.6117, + "step": 8277 + }, + { + "epoch": 0.6680655314341054, + "grad_norm": 0.6691608428955078, + "learning_rate": 0.000127610944424488, + "loss": 2.5761, + "step": 8278 + }, + { + "epoch": 0.6681462351706884, + "grad_norm": 0.695142924785614, + "learning_rate": 0.00012759577087941156, + "loss": 2.6123, + "step": 8279 + }, + { + "epoch": 0.6682269389072714, + "grad_norm": 0.6846559643745422, + "learning_rate": 0.00012758059664657635, + "loss": 2.5882, + "step": 8280 + }, + { + "epoch": 0.6683076426438544, + "grad_norm": 0.7616459131240845, + "learning_rate": 0.0001275654217263606, + "loss": 2.5559, + "step": 8281 + }, + { + "epoch": 0.6683883463804374, + "grad_norm": 0.6995570063591003, + "learning_rate": 0.00012755024611914246, + "loss": 2.5336, + "step": 8282 + }, + { + "epoch": 0.6684690501170204, + "grad_norm": 0.7199691534042358, + "learning_rate": 0.0001275350698253002, + "loss": 2.6618, + "step": 8283 + }, + { + "epoch": 0.6685497538536034, + "grad_norm": 0.6938748955726624, + "learning_rate": 0.000127519892845212, + "loss": 2.574, + "step": 8284 + }, + { + "epoch": 0.6686304575901865, + "grad_norm": 0.6827714443206787, + "learning_rate": 0.00012750471517925614, + "loss": 2.5647, + "step": 8285 + }, + { + "epoch": 0.6687111613267694, + "grad_norm": 0.6684606671333313, + "learning_rate": 0.00012748953682781083, + "loss": 2.528, + "step": 8286 + }, + { + "epoch": 0.6687918650633524, + "grad_norm": 0.6842156052589417, + "learning_rate": 0.00012747435779125448, + "loss": 2.5521, + "step": 8287 + }, + { + "epoch": 0.6688725687999354, + "grad_norm": 0.7440506219863892, + "learning_rate": 0.0001274591780699653, + "loss": 2.5646, + "step": 8288 + }, + { + "epoch": 0.6689532725365185, + "grad_norm": 0.769922137260437, + "learning_rate": 0.0001274439976643216, + "loss": 2.6104, + "step": 8289 + }, + { + "epoch": 0.6690339762731015, + "grad_norm": 0.7793089747428894, + "learning_rate": 0.00012742881657470175, + "loss": 2.6348, + "step": 8290 + }, + { + "epoch": 0.6691146800096844, + "grad_norm": 0.695060133934021, + "learning_rate": 0.0001274136348014841, + "loss": 2.5797, + "step": 8291 + }, + { + "epoch": 0.6691953837462674, + "grad_norm": 0.7089917659759521, + "learning_rate": 0.00012739845234504697, + "loss": 2.5431, + "step": 8292 + }, + { + "epoch": 0.6692760874828505, + "grad_norm": 0.7542717456817627, + "learning_rate": 0.00012738326920576885, + "loss": 2.6172, + "step": 8293 + }, + { + "epoch": 0.6693567912194335, + "grad_norm": 0.6947969794273376, + "learning_rate": 0.00012736808538402802, + "loss": 2.6026, + "step": 8294 + }, + { + "epoch": 0.6694374949560165, + "grad_norm": 0.6696321368217468, + "learning_rate": 0.00012735290088020302, + "loss": 2.5592, + "step": 8295 + }, + { + "epoch": 0.6695181986925994, + "grad_norm": 0.7001518607139587, + "learning_rate": 0.0001273377156946722, + "loss": 2.5994, + "step": 8296 + }, + { + "epoch": 0.6695989024291825, + "grad_norm": 0.6708101630210876, + "learning_rate": 0.000127322529827814, + "loss": 2.6392, + "step": 8297 + }, + { + "epoch": 0.6696796061657655, + "grad_norm": 0.6282601952552795, + "learning_rate": 0.000127307343280007, + "loss": 2.5762, + "step": 8298 + }, + { + "epoch": 0.6697603099023485, + "grad_norm": 0.6879595518112183, + "learning_rate": 0.0001272921560516296, + "loss": 2.5507, + "step": 8299 + }, + { + "epoch": 0.6698410136389314, + "grad_norm": 0.6108266115188599, + "learning_rate": 0.00012727696814306033, + "loss": 2.5865, + "step": 8300 + }, + { + "epoch": 0.6699217173755145, + "grad_norm": 0.6763970851898193, + "learning_rate": 0.0001272617795546777, + "loss": 2.6439, + "step": 8301 + }, + { + "epoch": 0.6700024211120975, + "grad_norm": 0.6997560858726501, + "learning_rate": 0.00012724659028686027, + "loss": 2.5291, + "step": 8302 + }, + { + "epoch": 0.6700831248486805, + "grad_norm": 0.675714910030365, + "learning_rate": 0.0001272314003399866, + "loss": 2.5452, + "step": 8303 + }, + { + "epoch": 0.6701638285852635, + "grad_norm": 0.6847789883613586, + "learning_rate": 0.00012721620971443525, + "loss": 2.6111, + "step": 8304 + }, + { + "epoch": 0.6702445323218466, + "grad_norm": 0.7283920645713806, + "learning_rate": 0.0001272010184105848, + "loss": 2.6322, + "step": 8305 + }, + { + "epoch": 0.6703252360584295, + "grad_norm": 0.7551796436309814, + "learning_rate": 0.00012718582642881382, + "loss": 2.5728, + "step": 8306 + }, + { + "epoch": 0.6704059397950125, + "grad_norm": 0.694526195526123, + "learning_rate": 0.00012717063376950104, + "loss": 2.6241, + "step": 8307 + }, + { + "epoch": 0.6704866435315955, + "grad_norm": 0.6956443190574646, + "learning_rate": 0.00012715544043302504, + "loss": 2.5531, + "step": 8308 + }, + { + "epoch": 0.6705673472681786, + "grad_norm": 0.7649452686309814, + "learning_rate": 0.00012714024641976446, + "loss": 2.5462, + "step": 8309 + }, + { + "epoch": 0.6706480510047615, + "grad_norm": 0.7711065411567688, + "learning_rate": 0.00012712505173009797, + "loss": 2.5878, + "step": 8310 + }, + { + "epoch": 0.6707287547413445, + "grad_norm": 0.68077552318573, + "learning_rate": 0.00012710985636440434, + "loss": 2.5668, + "step": 8311 + }, + { + "epoch": 0.6708094584779275, + "grad_norm": 0.7181024551391602, + "learning_rate": 0.0001270946603230622, + "loss": 2.6104, + "step": 8312 + }, + { + "epoch": 0.6708901622145105, + "grad_norm": 0.7136553525924683, + "learning_rate": 0.0001270794636064503, + "loss": 2.5282, + "step": 8313 + }, + { + "epoch": 0.6709708659510936, + "grad_norm": 0.880094587802887, + "learning_rate": 0.00012706426621494736, + "loss": 2.5837, + "step": 8314 + }, + { + "epoch": 0.6710515696876765, + "grad_norm": 0.7438541054725647, + "learning_rate": 0.00012704906814893217, + "loss": 2.5577, + "step": 8315 + }, + { + "epoch": 0.6711322734242595, + "grad_norm": 0.8197470903396606, + "learning_rate": 0.00012703386940878352, + "loss": 2.569, + "step": 8316 + }, + { + "epoch": 0.6712129771608425, + "grad_norm": 0.7728317975997925, + "learning_rate": 0.00012701866999488014, + "loss": 2.6407, + "step": 8317 + }, + { + "epoch": 0.6712936808974256, + "grad_norm": 0.7594823837280273, + "learning_rate": 0.0001270034699076009, + "loss": 2.5789, + "step": 8318 + }, + { + "epoch": 0.6713743846340086, + "grad_norm": 0.7502284646034241, + "learning_rate": 0.0001269882691473246, + "loss": 2.6068, + "step": 8319 + }, + { + "epoch": 0.6714550883705915, + "grad_norm": 0.7355664372444153, + "learning_rate": 0.0001269730677144301, + "loss": 2.6055, + "step": 8320 + }, + { + "epoch": 0.6715357921071745, + "grad_norm": 0.7218407392501831, + "learning_rate": 0.0001269578656092962, + "loss": 2.5953, + "step": 8321 + }, + { + "epoch": 0.6716164958437576, + "grad_norm": 0.6932538747787476, + "learning_rate": 0.00012694266283230185, + "loss": 2.5795, + "step": 8322 + }, + { + "epoch": 0.6716971995803406, + "grad_norm": 0.7337260246276855, + "learning_rate": 0.00012692745938382591, + "loss": 2.5606, + "step": 8323 + }, + { + "epoch": 0.6717779033169236, + "grad_norm": 0.6959026455879211, + "learning_rate": 0.00012691225526424731, + "loss": 2.5688, + "step": 8324 + }, + { + "epoch": 0.6718586070535065, + "grad_norm": 0.7352995872497559, + "learning_rate": 0.00012689705047394493, + "loss": 2.6308, + "step": 8325 + }, + { + "epoch": 0.6719393107900896, + "grad_norm": 0.7023616433143616, + "learning_rate": 0.00012688184501329777, + "loss": 2.6462, + "step": 8326 + }, + { + "epoch": 0.6720200145266726, + "grad_norm": 0.6581354737281799, + "learning_rate": 0.00012686663888268474, + "loss": 2.5997, + "step": 8327 + }, + { + "epoch": 0.6721007182632556, + "grad_norm": 0.6332606077194214, + "learning_rate": 0.00012685143208248484, + "loss": 2.6348, + "step": 8328 + }, + { + "epoch": 0.6721814219998385, + "grad_norm": 0.6826457977294922, + "learning_rate": 0.00012683622461307707, + "loss": 2.5092, + "step": 8329 + }, + { + "epoch": 0.6722621257364216, + "grad_norm": 0.7641614079475403, + "learning_rate": 0.00012682101647484042, + "loss": 2.7098, + "step": 8330 + }, + { + "epoch": 0.6723428294730046, + "grad_norm": 0.7153630256652832, + "learning_rate": 0.00012680580766815394, + "loss": 2.5647, + "step": 8331 + }, + { + "epoch": 0.6724235332095876, + "grad_norm": 0.6746379137039185, + "learning_rate": 0.00012679059819339664, + "loss": 2.6187, + "step": 8332 + }, + { + "epoch": 0.6725042369461706, + "grad_norm": 0.6748883128166199, + "learning_rate": 0.00012677538805094764, + "loss": 2.6045, + "step": 8333 + }, + { + "epoch": 0.6725849406827537, + "grad_norm": 0.7366370558738708, + "learning_rate": 0.00012676017724118596, + "loss": 2.5789, + "step": 8334 + }, + { + "epoch": 0.6726656444193366, + "grad_norm": 0.7381749153137207, + "learning_rate": 0.00012674496576449074, + "loss": 2.5958, + "step": 8335 + }, + { + "epoch": 0.6727463481559196, + "grad_norm": 0.7109243869781494, + "learning_rate": 0.00012672975362124103, + "loss": 2.5874, + "step": 8336 + }, + { + "epoch": 0.6728270518925026, + "grad_norm": 0.6904270052909851, + "learning_rate": 0.00012671454081181595, + "loss": 2.5891, + "step": 8337 + }, + { + "epoch": 0.6729077556290857, + "grad_norm": 0.6809365749359131, + "learning_rate": 0.00012669932733659476, + "loss": 2.5904, + "step": 8338 + }, + { + "epoch": 0.6729884593656686, + "grad_norm": 0.7527552843093872, + "learning_rate": 0.00012668411319595647, + "loss": 2.5602, + "step": 8339 + }, + { + "epoch": 0.6730691631022516, + "grad_norm": 0.6746577620506287, + "learning_rate": 0.00012666889839028038, + "loss": 2.5468, + "step": 8340 + }, + { + "epoch": 0.6731498668388346, + "grad_norm": 0.6904895305633545, + "learning_rate": 0.00012665368291994562, + "loss": 2.623, + "step": 8341 + }, + { + "epoch": 0.6732305705754177, + "grad_norm": 0.6495908498764038, + "learning_rate": 0.00012663846678533135, + "loss": 2.5843, + "step": 8342 + }, + { + "epoch": 0.6733112743120007, + "grad_norm": 0.6782342195510864, + "learning_rate": 0.00012662324998681692, + "loss": 2.6141, + "step": 8343 + }, + { + "epoch": 0.6733919780485836, + "grad_norm": 0.7090504765510559, + "learning_rate": 0.0001266080325247815, + "loss": 2.6654, + "step": 8344 + }, + { + "epoch": 0.6734726817851666, + "grad_norm": 0.7085515856742859, + "learning_rate": 0.00012659281439960434, + "loss": 2.5394, + "step": 8345 + }, + { + "epoch": 0.6735533855217497, + "grad_norm": 0.6813806295394897, + "learning_rate": 0.00012657759561166473, + "loss": 2.6522, + "step": 8346 + }, + { + "epoch": 0.6736340892583327, + "grad_norm": 0.726378858089447, + "learning_rate": 0.00012656237616134197, + "loss": 2.5922, + "step": 8347 + }, + { + "epoch": 0.6737147929949157, + "grad_norm": 0.6323714256286621, + "learning_rate": 0.00012654715604901534, + "loss": 2.4938, + "step": 8348 + }, + { + "epoch": 0.6737954967314986, + "grad_norm": 0.6925889253616333, + "learning_rate": 0.0001265319352750642, + "loss": 2.635, + "step": 8349 + }, + { + "epoch": 0.6738762004680817, + "grad_norm": 0.6676003932952881, + "learning_rate": 0.00012651671383986788, + "loss": 2.558, + "step": 8350 + }, + { + "epoch": 0.6739569042046647, + "grad_norm": 0.7464616298675537, + "learning_rate": 0.00012650149174380575, + "loss": 2.5777, + "step": 8351 + }, + { + "epoch": 0.6740376079412477, + "grad_norm": 0.6611667275428772, + "learning_rate": 0.00012648626898725715, + "loss": 2.5779, + "step": 8352 + }, + { + "epoch": 0.6741183116778307, + "grad_norm": 0.7391866445541382, + "learning_rate": 0.00012647104557060148, + "loss": 2.5624, + "step": 8353 + }, + { + "epoch": 0.6741990154144137, + "grad_norm": 0.7107826471328735, + "learning_rate": 0.00012645582149421817, + "loss": 2.5744, + "step": 8354 + }, + { + "epoch": 0.6742797191509967, + "grad_norm": 0.7385339736938477, + "learning_rate": 0.00012644059675848666, + "loss": 2.5752, + "step": 8355 + }, + { + "epoch": 0.6743604228875797, + "grad_norm": 0.6887345314025879, + "learning_rate": 0.00012642537136378634, + "loss": 2.5794, + "step": 8356 + }, + { + "epoch": 0.6744411266241627, + "grad_norm": 0.6934933662414551, + "learning_rate": 0.00012641014531049666, + "loss": 2.5361, + "step": 8357 + }, + { + "epoch": 0.6745218303607458, + "grad_norm": 0.7437291741371155, + "learning_rate": 0.00012639491859899716, + "loss": 2.5741, + "step": 8358 + }, + { + "epoch": 0.6746025340973287, + "grad_norm": 0.7088494896888733, + "learning_rate": 0.00012637969122966729, + "loss": 2.6449, + "step": 8359 + }, + { + "epoch": 0.6746832378339117, + "grad_norm": 0.7496390342712402, + "learning_rate": 0.00012636446320288654, + "loss": 2.6109, + "step": 8360 + }, + { + "epoch": 0.6747639415704947, + "grad_norm": 0.6949843764305115, + "learning_rate": 0.00012634923451903447, + "loss": 2.5769, + "step": 8361 + }, + { + "epoch": 0.6748446453070778, + "grad_norm": 0.7192673087120056, + "learning_rate": 0.00012633400517849056, + "loss": 2.6053, + "step": 8362 + }, + { + "epoch": 0.6749253490436607, + "grad_norm": 0.7003379464149475, + "learning_rate": 0.00012631877518163442, + "loss": 2.5745, + "step": 8363 + }, + { + "epoch": 0.6750060527802437, + "grad_norm": 0.7499879002571106, + "learning_rate": 0.00012630354452884563, + "loss": 2.6077, + "step": 8364 + }, + { + "epoch": 0.6750867565168267, + "grad_norm": 0.7047405242919922, + "learning_rate": 0.00012628831322050377, + "loss": 2.5955, + "step": 8365 + }, + { + "epoch": 0.6751674602534097, + "grad_norm": 0.7463203072547913, + "learning_rate": 0.00012627308125698838, + "loss": 2.5421, + "step": 8366 + }, + { + "epoch": 0.6752481639899928, + "grad_norm": 0.7377086877822876, + "learning_rate": 0.00012625784863867914, + "loss": 2.5804, + "step": 8367 + }, + { + "epoch": 0.6753288677265757, + "grad_norm": 0.7136400938034058, + "learning_rate": 0.00012624261536595566, + "loss": 2.5673, + "step": 8368 + }, + { + "epoch": 0.6754095714631587, + "grad_norm": 0.6923615336418152, + "learning_rate": 0.0001262273814391976, + "loss": 2.5832, + "step": 8369 + }, + { + "epoch": 0.6754902751997417, + "grad_norm": 0.7495028972625732, + "learning_rate": 0.00012621214685878469, + "loss": 2.5943, + "step": 8370 + }, + { + "epoch": 0.6755709789363248, + "grad_norm": 0.6751434206962585, + "learning_rate": 0.0001261969116250965, + "loss": 2.5495, + "step": 8371 + }, + { + "epoch": 0.6756516826729078, + "grad_norm": 0.7055973410606384, + "learning_rate": 0.00012618167573851284, + "loss": 2.5651, + "step": 8372 + }, + { + "epoch": 0.6757323864094907, + "grad_norm": 0.7479640245437622, + "learning_rate": 0.00012616643919941337, + "loss": 2.653, + "step": 8373 + }, + { + "epoch": 0.6758130901460737, + "grad_norm": 0.7075015902519226, + "learning_rate": 0.00012615120200817778, + "loss": 2.5787, + "step": 8374 + }, + { + "epoch": 0.6758937938826568, + "grad_norm": 0.7513934969902039, + "learning_rate": 0.00012613596416518593, + "loss": 2.6099, + "step": 8375 + }, + { + "epoch": 0.6759744976192398, + "grad_norm": 0.6742326021194458, + "learning_rate": 0.00012612072567081754, + "loss": 2.5335, + "step": 8376 + }, + { + "epoch": 0.6760552013558228, + "grad_norm": 0.7271459698677063, + "learning_rate": 0.00012610548652545239, + "loss": 2.6082, + "step": 8377 + }, + { + "epoch": 0.6761359050924057, + "grad_norm": 0.7481515407562256, + "learning_rate": 0.00012609024672947022, + "loss": 2.5805, + "step": 8378 + }, + { + "epoch": 0.6762166088289888, + "grad_norm": 0.7484803199768066, + "learning_rate": 0.00012607500628325093, + "loss": 2.6099, + "step": 8379 + }, + { + "epoch": 0.6762973125655718, + "grad_norm": 0.7462390661239624, + "learning_rate": 0.00012605976518717435, + "loss": 2.6054, + "step": 8380 + }, + { + "epoch": 0.6763780163021548, + "grad_norm": 0.7014410495758057, + "learning_rate": 0.00012604452344162028, + "loss": 2.5614, + "step": 8381 + }, + { + "epoch": 0.6764587200387377, + "grad_norm": 0.6902963519096375, + "learning_rate": 0.0001260292810469686, + "loss": 2.5813, + "step": 8382 + }, + { + "epoch": 0.6765394237753208, + "grad_norm": 0.6646186113357544, + "learning_rate": 0.00012601403800359919, + "loss": 2.545, + "step": 8383 + }, + { + "epoch": 0.6766201275119038, + "grad_norm": 0.7067462801933289, + "learning_rate": 0.00012599879431189197, + "loss": 2.6195, + "step": 8384 + }, + { + "epoch": 0.6767008312484868, + "grad_norm": 0.7263965010643005, + "learning_rate": 0.0001259835499722268, + "loss": 2.5929, + "step": 8385 + }, + { + "epoch": 0.6767815349850698, + "grad_norm": 0.6672000885009766, + "learning_rate": 0.0001259683049849837, + "loss": 2.5561, + "step": 8386 + }, + { + "epoch": 0.6768622387216529, + "grad_norm": 0.6543236374855042, + "learning_rate": 0.0001259530593505425, + "loss": 2.6256, + "step": 8387 + }, + { + "epoch": 0.6769429424582358, + "grad_norm": 0.6532339453697205, + "learning_rate": 0.00012593781306928324, + "loss": 2.5074, + "step": 8388 + }, + { + "epoch": 0.6770236461948188, + "grad_norm": 0.7442833185195923, + "learning_rate": 0.00012592256614158591, + "loss": 2.6124, + "step": 8389 + }, + { + "epoch": 0.6771043499314018, + "grad_norm": 0.786685585975647, + "learning_rate": 0.00012590731856783043, + "loss": 2.6077, + "step": 8390 + }, + { + "epoch": 0.6771850536679849, + "grad_norm": 0.7952337265014648, + "learning_rate": 0.00012589207034839687, + "loss": 2.5894, + "step": 8391 + }, + { + "epoch": 0.6772657574045678, + "grad_norm": 0.7847954034805298, + "learning_rate": 0.00012587682148366524, + "loss": 2.4934, + "step": 8392 + }, + { + "epoch": 0.6773464611411508, + "grad_norm": 0.6769007444381714, + "learning_rate": 0.00012586157197401552, + "loss": 2.5695, + "step": 8393 + }, + { + "epoch": 0.6774271648777338, + "grad_norm": 0.6583757996559143, + "learning_rate": 0.00012584632181982788, + "loss": 2.5866, + "step": 8394 + }, + { + "epoch": 0.6775078686143169, + "grad_norm": 0.7375823855400085, + "learning_rate": 0.0001258310710214823, + "loss": 2.5141, + "step": 8395 + }, + { + "epoch": 0.6775885723508999, + "grad_norm": 0.6901078224182129, + "learning_rate": 0.00012581581957935896, + "loss": 2.5732, + "step": 8396 + }, + { + "epoch": 0.6776692760874828, + "grad_norm": 0.687152624130249, + "learning_rate": 0.0001258005674938379, + "loss": 2.5916, + "step": 8397 + }, + { + "epoch": 0.6777499798240658, + "grad_norm": 0.7198586463928223, + "learning_rate": 0.00012578531476529917, + "loss": 2.5626, + "step": 8398 + }, + { + "epoch": 0.6778306835606489, + "grad_norm": 0.7417474985122681, + "learning_rate": 0.00012577006139412309, + "loss": 2.5486, + "step": 8399 + }, + { + "epoch": 0.6779113872972319, + "grad_norm": 0.6588087677955627, + "learning_rate": 0.0001257548073806897, + "loss": 2.6123, + "step": 8400 + }, + { + "epoch": 0.6779920910338149, + "grad_norm": 0.7211382389068604, + "learning_rate": 0.00012573955272537915, + "loss": 2.6402, + "step": 8401 + }, + { + "epoch": 0.6780727947703978, + "grad_norm": 0.7196084856987, + "learning_rate": 0.00012572429742857167, + "loss": 2.51, + "step": 8402 + }, + { + "epoch": 0.6781534985069809, + "grad_norm": 0.6399394273757935, + "learning_rate": 0.00012570904149064748, + "loss": 2.5309, + "step": 8403 + }, + { + "epoch": 0.6782342022435639, + "grad_norm": 0.6969572305679321, + "learning_rate": 0.00012569378491198674, + "loss": 2.5829, + "step": 8404 + }, + { + "epoch": 0.6783149059801469, + "grad_norm": 0.8005492091178894, + "learning_rate": 0.00012567852769296975, + "loss": 2.6277, + "step": 8405 + }, + { + "epoch": 0.6783956097167299, + "grad_norm": 0.6786207556724548, + "learning_rate": 0.0001256632698339767, + "loss": 2.5839, + "step": 8406 + }, + { + "epoch": 0.6784763134533129, + "grad_norm": 0.7047130465507507, + "learning_rate": 0.0001256480113353879, + "loss": 2.533, + "step": 8407 + }, + { + "epoch": 0.6785570171898959, + "grad_norm": 0.7640479803085327, + "learning_rate": 0.0001256327521975836, + "loss": 2.5855, + "step": 8408 + }, + { + "epoch": 0.6786377209264789, + "grad_norm": 0.728111207485199, + "learning_rate": 0.00012561749242094412, + "loss": 2.6184, + "step": 8409 + }, + { + "epoch": 0.6787184246630619, + "grad_norm": 0.7842772603034973, + "learning_rate": 0.00012560223200584975, + "loss": 2.5915, + "step": 8410 + }, + { + "epoch": 0.678799128399645, + "grad_norm": 0.7129092812538147, + "learning_rate": 0.00012558697095268085, + "loss": 2.6526, + "step": 8411 + }, + { + "epoch": 0.6788798321362279, + "grad_norm": 0.751103401184082, + "learning_rate": 0.00012557170926181773, + "loss": 2.605, + "step": 8412 + }, + { + "epoch": 0.6789605358728109, + "grad_norm": 0.6850594878196716, + "learning_rate": 0.0001255564469336408, + "loss": 2.6047, + "step": 8413 + }, + { + "epoch": 0.6790412396093939, + "grad_norm": 0.703037679195404, + "learning_rate": 0.00012554118396853036, + "loss": 2.653, + "step": 8414 + }, + { + "epoch": 0.6791219433459769, + "grad_norm": 0.8097915053367615, + "learning_rate": 0.0001255259203668669, + "loss": 2.5937, + "step": 8415 + }, + { + "epoch": 0.67920264708256, + "grad_norm": 0.700351357460022, + "learning_rate": 0.00012551065612903076, + "loss": 2.6089, + "step": 8416 + }, + { + "epoch": 0.6792833508191429, + "grad_norm": 0.6760888695716858, + "learning_rate": 0.00012549539125540236, + "loss": 2.547, + "step": 8417 + }, + { + "epoch": 0.6793640545557259, + "grad_norm": 0.6751723289489746, + "learning_rate": 0.0001254801257463622, + "loss": 2.625, + "step": 8418 + }, + { + "epoch": 0.6794447582923089, + "grad_norm": 0.6928921937942505, + "learning_rate": 0.00012546485960229065, + "loss": 2.5671, + "step": 8419 + }, + { + "epoch": 0.679525462028892, + "grad_norm": 0.6541565656661987, + "learning_rate": 0.0001254495928235683, + "loss": 2.5837, + "step": 8420 + }, + { + "epoch": 0.679606165765475, + "grad_norm": 0.6228676438331604, + "learning_rate": 0.00012543432541057555, + "loss": 2.5798, + "step": 8421 + }, + { + "epoch": 0.6796868695020579, + "grad_norm": 0.7620853185653687, + "learning_rate": 0.0001254190573636929, + "loss": 2.5885, + "step": 8422 + }, + { + "epoch": 0.6797675732386409, + "grad_norm": 0.7425604462623596, + "learning_rate": 0.0001254037886833009, + "loss": 2.6124, + "step": 8423 + }, + { + "epoch": 0.679848276975224, + "grad_norm": 0.7150974273681641, + "learning_rate": 0.0001253885193697801, + "loss": 2.5423, + "step": 8424 + }, + { + "epoch": 0.679928980711807, + "grad_norm": 0.672649621963501, + "learning_rate": 0.000125373249423511, + "loss": 2.5563, + "step": 8425 + }, + { + "epoch": 0.6800096844483899, + "grad_norm": 0.6913620829582214, + "learning_rate": 0.00012535797884487425, + "loss": 2.5261, + "step": 8426 + }, + { + "epoch": 0.6800903881849729, + "grad_norm": 0.712123692035675, + "learning_rate": 0.00012534270763425034, + "loss": 2.5958, + "step": 8427 + }, + { + "epoch": 0.680171091921556, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00012532743579201993, + "loss": 2.6036, + "step": 8428 + }, + { + "epoch": 0.680251795658139, + "grad_norm": 0.7108714580535889, + "learning_rate": 0.0001253121633185636, + "loss": 2.6004, + "step": 8429 + }, + { + "epoch": 0.680332499394722, + "grad_norm": 0.7142449021339417, + "learning_rate": 0.00012529689021426198, + "loss": 2.588, + "step": 8430 + }, + { + "epoch": 0.6804132031313049, + "grad_norm": 0.7579841017723083, + "learning_rate": 0.00012528161647949574, + "loss": 2.5927, + "step": 8431 + }, + { + "epoch": 0.680493906867888, + "grad_norm": 0.6522083878517151, + "learning_rate": 0.00012526634211464555, + "loss": 2.5619, + "step": 8432 + }, + { + "epoch": 0.680574610604471, + "grad_norm": 0.7681782245635986, + "learning_rate": 0.00012525106712009203, + "loss": 2.6065, + "step": 8433 + }, + { + "epoch": 0.680655314341054, + "grad_norm": 0.6900169253349304, + "learning_rate": 0.00012523579149621594, + "loss": 2.5507, + "step": 8434 + }, + { + "epoch": 0.680736018077637, + "grad_norm": 0.6907666325569153, + "learning_rate": 0.00012522051524339794, + "loss": 2.5213, + "step": 8435 + }, + { + "epoch": 0.68081672181422, + "grad_norm": 0.7202023267745972, + "learning_rate": 0.0001252052383620188, + "loss": 2.6367, + "step": 8436 + }, + { + "epoch": 0.680897425550803, + "grad_norm": 0.7893621325492859, + "learning_rate": 0.00012518996085245925, + "loss": 2.6066, + "step": 8437 + }, + { + "epoch": 0.680978129287386, + "grad_norm": 0.7693532109260559, + "learning_rate": 0.00012517468271509998, + "loss": 2.5346, + "step": 8438 + }, + { + "epoch": 0.681058833023969, + "grad_norm": 0.7976840734481812, + "learning_rate": 0.0001251594039503218, + "loss": 2.5991, + "step": 8439 + }, + { + "epoch": 0.6811395367605521, + "grad_norm": 0.7671225666999817, + "learning_rate": 0.00012514412455850554, + "loss": 2.5959, + "step": 8440 + }, + { + "epoch": 0.681220240497135, + "grad_norm": 0.7143450975418091, + "learning_rate": 0.00012512884454003194, + "loss": 2.5828, + "step": 8441 + }, + { + "epoch": 0.681300944233718, + "grad_norm": 0.6821861863136292, + "learning_rate": 0.00012511356389528192, + "loss": 2.5908, + "step": 8442 + }, + { + "epoch": 0.681381647970301, + "grad_norm": 0.7279960513114929, + "learning_rate": 0.00012509828262463615, + "loss": 2.578, + "step": 8443 + }, + { + "epoch": 0.6814623517068841, + "grad_norm": 0.6503065824508667, + "learning_rate": 0.0001250830007284756, + "loss": 2.525, + "step": 8444 + }, + { + "epoch": 0.681543055443467, + "grad_norm": 0.7276029586791992, + "learning_rate": 0.00012506771820718112, + "loss": 2.584, + "step": 8445 + }, + { + "epoch": 0.68162375918005, + "grad_norm": 0.7635578513145447, + "learning_rate": 0.00012505243506113356, + "loss": 2.627, + "step": 8446 + }, + { + "epoch": 0.681704462916633, + "grad_norm": 0.7086981534957886, + "learning_rate": 0.00012503715129071386, + "loss": 2.6164, + "step": 8447 + }, + { + "epoch": 0.6817851666532161, + "grad_norm": 0.7144165635108948, + "learning_rate": 0.00012502186689630285, + "loss": 2.5642, + "step": 8448 + }, + { + "epoch": 0.6818658703897991, + "grad_norm": 0.8135093450546265, + "learning_rate": 0.00012500658187828155, + "loss": 2.6161, + "step": 8449 + }, + { + "epoch": 0.681946574126382, + "grad_norm": 0.7223377227783203, + "learning_rate": 0.00012499129623703086, + "loss": 2.6192, + "step": 8450 + }, + { + "epoch": 0.682027277862965, + "grad_norm": 0.7189127206802368, + "learning_rate": 0.00012497600997293172, + "loss": 2.6086, + "step": 8451 + }, + { + "epoch": 0.6821079815995481, + "grad_norm": 0.6742144823074341, + "learning_rate": 0.00012496072308636514, + "loss": 2.5747, + "step": 8452 + }, + { + "epoch": 0.6821886853361311, + "grad_norm": 0.7432419657707214, + "learning_rate": 0.0001249454355777121, + "loss": 2.5687, + "step": 8453 + }, + { + "epoch": 0.6822693890727141, + "grad_norm": 0.6140317320823669, + "learning_rate": 0.00012493014744735357, + "loss": 2.5371, + "step": 8454 + }, + { + "epoch": 0.682350092809297, + "grad_norm": 0.7215768098831177, + "learning_rate": 0.0001249148586956706, + "loss": 2.6806, + "step": 8455 + }, + { + "epoch": 0.6824307965458801, + "grad_norm": 0.7485790252685547, + "learning_rate": 0.0001248995693230442, + "loss": 2.575, + "step": 8456 + }, + { + "epoch": 0.6825115002824631, + "grad_norm": 0.744349479675293, + "learning_rate": 0.00012488427932985552, + "loss": 2.5961, + "step": 8457 + }, + { + "epoch": 0.6825922040190461, + "grad_norm": 0.6784959435462952, + "learning_rate": 0.0001248689887164855, + "loss": 2.5501, + "step": 8458 + }, + { + "epoch": 0.682672907755629, + "grad_norm": 0.6664010286331177, + "learning_rate": 0.0001248536974833153, + "loss": 2.5741, + "step": 8459 + }, + { + "epoch": 0.6827536114922121, + "grad_norm": 0.7185953259468079, + "learning_rate": 0.00012483840563072592, + "loss": 2.5875, + "step": 8460 + }, + { + "epoch": 0.6828343152287951, + "grad_norm": 0.6553035378456116, + "learning_rate": 0.00012482311315909864, + "loss": 2.5321, + "step": 8461 + }, + { + "epoch": 0.6829150189653781, + "grad_norm": 0.6713398694992065, + "learning_rate": 0.00012480782006881442, + "loss": 2.6207, + "step": 8462 + }, + { + "epoch": 0.6829957227019611, + "grad_norm": 0.6733734607696533, + "learning_rate": 0.00012479252636025452, + "loss": 2.5746, + "step": 8463 + }, + { + "epoch": 0.6830764264385442, + "grad_norm": 0.7257994413375854, + "learning_rate": 0.00012477723203380004, + "loss": 2.5837, + "step": 8464 + }, + { + "epoch": 0.6831571301751271, + "grad_norm": 0.716242253780365, + "learning_rate": 0.00012476193708983214, + "loss": 2.5611, + "step": 8465 + }, + { + "epoch": 0.6832378339117101, + "grad_norm": 0.6797829866409302, + "learning_rate": 0.0001247466415287321, + "loss": 2.5763, + "step": 8466 + }, + { + "epoch": 0.6833185376482931, + "grad_norm": 0.679931640625, + "learning_rate": 0.000124731345350881, + "loss": 2.606, + "step": 8467 + }, + { + "epoch": 0.6833992413848761, + "grad_norm": 0.6767866611480713, + "learning_rate": 0.00012471604855666016, + "loss": 2.5682, + "step": 8468 + }, + { + "epoch": 0.6834799451214592, + "grad_norm": 0.7297048568725586, + "learning_rate": 0.00012470075114645078, + "loss": 2.5527, + "step": 8469 + }, + { + "epoch": 0.6835606488580421, + "grad_norm": 0.6882644295692444, + "learning_rate": 0.0001246854531206341, + "loss": 2.5712, + "step": 8470 + }, + { + "epoch": 0.6836413525946251, + "grad_norm": 0.7129159569740295, + "learning_rate": 0.00012467015447959143, + "loss": 2.5627, + "step": 8471 + }, + { + "epoch": 0.6837220563312081, + "grad_norm": 0.6671481728553772, + "learning_rate": 0.000124654855223704, + "loss": 2.6226, + "step": 8472 + }, + { + "epoch": 0.6838027600677912, + "grad_norm": 0.7096946835517883, + "learning_rate": 0.00012463955535335313, + "loss": 2.5373, + "step": 8473 + }, + { + "epoch": 0.6838834638043741, + "grad_norm": 0.6781395077705383, + "learning_rate": 0.00012462425486892012, + "loss": 2.5607, + "step": 8474 + }, + { + "epoch": 0.6839641675409571, + "grad_norm": 0.6777891516685486, + "learning_rate": 0.00012460895377078632, + "loss": 2.5991, + "step": 8475 + }, + { + "epoch": 0.6840448712775401, + "grad_norm": 0.7175275087356567, + "learning_rate": 0.00012459365205933306, + "loss": 2.6006, + "step": 8476 + }, + { + "epoch": 0.6841255750141232, + "grad_norm": 0.6832807660102844, + "learning_rate": 0.00012457834973494174, + "loss": 2.5757, + "step": 8477 + }, + { + "epoch": 0.6842062787507062, + "grad_norm": 0.7002938985824585, + "learning_rate": 0.00012456304679799366, + "loss": 2.554, + "step": 8478 + }, + { + "epoch": 0.6842869824872891, + "grad_norm": 0.7236241698265076, + "learning_rate": 0.00012454774324887027, + "loss": 2.6054, + "step": 8479 + }, + { + "epoch": 0.6843676862238721, + "grad_norm": 0.7327216267585754, + "learning_rate": 0.00012453243908795288, + "loss": 2.6101, + "step": 8480 + }, + { + "epoch": 0.6844483899604552, + "grad_norm": 0.7414156794548035, + "learning_rate": 0.00012451713431562306, + "loss": 2.5505, + "step": 8481 + }, + { + "epoch": 0.6845290936970382, + "grad_norm": 0.697795569896698, + "learning_rate": 0.00012450182893226214, + "loss": 2.539, + "step": 8482 + }, + { + "epoch": 0.6846097974336212, + "grad_norm": 0.7053593397140503, + "learning_rate": 0.00012448652293825158, + "loss": 2.6045, + "step": 8483 + }, + { + "epoch": 0.6846905011702041, + "grad_norm": 0.6710856556892395, + "learning_rate": 0.00012447121633397287, + "loss": 2.554, + "step": 8484 + }, + { + "epoch": 0.6847712049067872, + "grad_norm": 0.754454493522644, + "learning_rate": 0.0001244559091198075, + "loss": 2.5523, + "step": 8485 + }, + { + "epoch": 0.6848519086433702, + "grad_norm": 0.6468656659126282, + "learning_rate": 0.0001244406012961369, + "loss": 2.5931, + "step": 8486 + }, + { + "epoch": 0.6849326123799532, + "grad_norm": 0.7169063091278076, + "learning_rate": 0.00012442529286334266, + "loss": 2.5743, + "step": 8487 + }, + { + "epoch": 0.6850133161165362, + "grad_norm": 0.6737040877342224, + "learning_rate": 0.00012440998382180627, + "loss": 2.5734, + "step": 8488 + }, + { + "epoch": 0.6850940198531192, + "grad_norm": 0.7026428580284119, + "learning_rate": 0.0001243946741719093, + "loss": 2.4994, + "step": 8489 + }, + { + "epoch": 0.6851747235897022, + "grad_norm": 0.7378512024879456, + "learning_rate": 0.00012437936391403322, + "loss": 2.5611, + "step": 8490 + }, + { + "epoch": 0.6852554273262852, + "grad_norm": 0.7379863262176514, + "learning_rate": 0.0001243640530485597, + "loss": 2.538, + "step": 8491 + }, + { + "epoch": 0.6853361310628682, + "grad_norm": 0.68398118019104, + "learning_rate": 0.00012434874157587027, + "loss": 2.5593, + "step": 8492 + }, + { + "epoch": 0.6854168347994513, + "grad_norm": 0.6780444383621216, + "learning_rate": 0.0001243334294963466, + "loss": 2.5068, + "step": 8493 + }, + { + "epoch": 0.6854975385360342, + "grad_norm": 0.7425427436828613, + "learning_rate": 0.0001243181168103702, + "loss": 2.6607, + "step": 8494 + }, + { + "epoch": 0.6855782422726172, + "grad_norm": 0.7563300132751465, + "learning_rate": 0.0001243028035183228, + "loss": 2.5915, + "step": 8495 + }, + { + "epoch": 0.6856589460092002, + "grad_norm": 0.6746618151664734, + "learning_rate": 0.000124287489620586, + "loss": 2.5399, + "step": 8496 + }, + { + "epoch": 0.6857396497457833, + "grad_norm": 0.7100487947463989, + "learning_rate": 0.00012427217511754146, + "loss": 2.5927, + "step": 8497 + }, + { + "epoch": 0.6858203534823663, + "grad_norm": 0.6487080454826355, + "learning_rate": 0.00012425686000957088, + "loss": 2.5582, + "step": 8498 + }, + { + "epoch": 0.6859010572189492, + "grad_norm": 0.6577199697494507, + "learning_rate": 0.00012424154429705592, + "loss": 2.5589, + "step": 8499 + }, + { + "epoch": 0.6859817609555322, + "grad_norm": 0.6748726963996887, + "learning_rate": 0.00012422622798037832, + "loss": 2.5651, + "step": 8500 + }, + { + "epoch": 0.6860624646921153, + "grad_norm": 0.7159377336502075, + "learning_rate": 0.0001242109110599198, + "loss": 2.569, + "step": 8501 + }, + { + "epoch": 0.6861431684286983, + "grad_norm": 0.6772934198379517, + "learning_rate": 0.00012419559353606208, + "loss": 2.5533, + "step": 8502 + }, + { + "epoch": 0.6862238721652812, + "grad_norm": 0.6776062846183777, + "learning_rate": 0.00012418027540918693, + "loss": 2.5704, + "step": 8503 + }, + { + "epoch": 0.6863045759018642, + "grad_norm": 0.7009913921356201, + "learning_rate": 0.00012416495667967608, + "loss": 2.5928, + "step": 8504 + }, + { + "epoch": 0.6863852796384473, + "grad_norm": 0.607571005821228, + "learning_rate": 0.00012414963734791137, + "loss": 2.5459, + "step": 8505 + }, + { + "epoch": 0.6864659833750303, + "grad_norm": 0.6798292398452759, + "learning_rate": 0.00012413431741427458, + "loss": 2.6585, + "step": 8506 + }, + { + "epoch": 0.6865466871116133, + "grad_norm": 0.7892771363258362, + "learning_rate": 0.00012411899687914747, + "loss": 2.5781, + "step": 8507 + }, + { + "epoch": 0.6866273908481962, + "grad_norm": 0.6683816909790039, + "learning_rate": 0.00012410367574291199, + "loss": 2.5598, + "step": 8508 + }, + { + "epoch": 0.6867080945847793, + "grad_norm": 0.7591805458068848, + "learning_rate": 0.00012408835400594983, + "loss": 2.6478, + "step": 8509 + }, + { + "epoch": 0.6867887983213623, + "grad_norm": 0.6896353960037231, + "learning_rate": 0.00012407303166864293, + "loss": 2.5418, + "step": 8510 + }, + { + "epoch": 0.6868695020579453, + "grad_norm": 0.6657233834266663, + "learning_rate": 0.00012405770873137316, + "loss": 2.5753, + "step": 8511 + }, + { + "epoch": 0.6869502057945283, + "grad_norm": 0.6775455474853516, + "learning_rate": 0.00012404238519452237, + "loss": 2.4902, + "step": 8512 + }, + { + "epoch": 0.6870309095311113, + "grad_norm": 0.6572847962379456, + "learning_rate": 0.00012402706105847254, + "loss": 2.6189, + "step": 8513 + }, + { + "epoch": 0.6871116132676943, + "grad_norm": 0.7159940004348755, + "learning_rate": 0.00012401173632360557, + "loss": 2.5928, + "step": 8514 + }, + { + "epoch": 0.6871923170042773, + "grad_norm": 0.7178850173950195, + "learning_rate": 0.0001239964109903033, + "loss": 2.5342, + "step": 8515 + }, + { + "epoch": 0.6872730207408603, + "grad_norm": 0.6761649250984192, + "learning_rate": 0.00012398108505894774, + "loss": 2.5716, + "step": 8516 + }, + { + "epoch": 0.6873537244774433, + "grad_norm": 0.6831200122833252, + "learning_rate": 0.0001239657585299209, + "loss": 2.5506, + "step": 8517 + }, + { + "epoch": 0.6874344282140263, + "grad_norm": 0.7064316868782043, + "learning_rate": 0.00012395043140360468, + "loss": 2.541, + "step": 8518 + }, + { + "epoch": 0.6875151319506093, + "grad_norm": 0.7269963026046753, + "learning_rate": 0.00012393510368038113, + "loss": 2.541, + "step": 8519 + }, + { + "epoch": 0.6875958356871923, + "grad_norm": 0.6651471257209778, + "learning_rate": 0.00012391977536063218, + "loss": 2.5476, + "step": 8520 + }, + { + "epoch": 0.6876765394237753, + "grad_norm": 0.7649257779121399, + "learning_rate": 0.00012390444644473994, + "loss": 2.601, + "step": 8521 + }, + { + "epoch": 0.6877572431603584, + "grad_norm": 0.6637376546859741, + "learning_rate": 0.0001238891169330864, + "loss": 2.5582, + "step": 8522 + }, + { + "epoch": 0.6878379468969413, + "grad_norm": 0.6609189510345459, + "learning_rate": 0.0001238737868260536, + "loss": 2.5795, + "step": 8523 + }, + { + "epoch": 0.6879186506335243, + "grad_norm": 0.657494843006134, + "learning_rate": 0.00012385845612402363, + "loss": 2.6005, + "step": 8524 + }, + { + "epoch": 0.6879993543701073, + "grad_norm": 0.6780641674995422, + "learning_rate": 0.00012384312482737858, + "loss": 2.514, + "step": 8525 + }, + { + "epoch": 0.6880800581066904, + "grad_norm": 0.7310795187950134, + "learning_rate": 0.00012382779293650052, + "loss": 2.5707, + "step": 8526 + }, + { + "epoch": 0.6881607618432733, + "grad_norm": 0.6722557544708252, + "learning_rate": 0.0001238124604517716, + "loss": 2.5897, + "step": 8527 + }, + { + "epoch": 0.6882414655798563, + "grad_norm": 0.6502346992492676, + "learning_rate": 0.0001237971273735739, + "loss": 2.5554, + "step": 8528 + }, + { + "epoch": 0.6883221693164393, + "grad_norm": 0.6993897557258606, + "learning_rate": 0.0001237817937022896, + "loss": 2.6328, + "step": 8529 + }, + { + "epoch": 0.6884028730530224, + "grad_norm": 0.7069644331932068, + "learning_rate": 0.00012376645943830083, + "loss": 2.5957, + "step": 8530 + }, + { + "epoch": 0.6884835767896054, + "grad_norm": 0.7193333506584167, + "learning_rate": 0.00012375112458198973, + "loss": 2.6505, + "step": 8531 + }, + { + "epoch": 0.6885642805261883, + "grad_norm": 0.6821088194847107, + "learning_rate": 0.00012373578913373853, + "loss": 2.6129, + "step": 8532 + }, + { + "epoch": 0.6886449842627713, + "grad_norm": 0.6499428749084473, + "learning_rate": 0.00012372045309392947, + "loss": 2.6053, + "step": 8533 + }, + { + "epoch": 0.6887256879993544, + "grad_norm": 0.7469449639320374, + "learning_rate": 0.00012370511646294464, + "loss": 2.6423, + "step": 8534 + }, + { + "epoch": 0.6888063917359374, + "grad_norm": 0.7326325178146362, + "learning_rate": 0.00012368977924116637, + "loss": 2.5708, + "step": 8535 + }, + { + "epoch": 0.6888870954725204, + "grad_norm": 0.7459580302238464, + "learning_rate": 0.00012367444142897686, + "loss": 2.544, + "step": 8536 + }, + { + "epoch": 0.6889677992091033, + "grad_norm": 0.7198929786682129, + "learning_rate": 0.00012365910302675843, + "loss": 2.6295, + "step": 8537 + }, + { + "epoch": 0.6890485029456864, + "grad_norm": 0.8139802813529968, + "learning_rate": 0.0001236437640348933, + "loss": 2.549, + "step": 8538 + }, + { + "epoch": 0.6891292066822694, + "grad_norm": 0.6497162580490112, + "learning_rate": 0.00012362842445376372, + "loss": 2.5849, + "step": 8539 + }, + { + "epoch": 0.6892099104188524, + "grad_norm": 0.7378165125846863, + "learning_rate": 0.00012361308428375208, + "loss": 2.606, + "step": 8540 + }, + { + "epoch": 0.6892906141554354, + "grad_norm": 0.6807567477226257, + "learning_rate": 0.00012359774352524062, + "loss": 2.5892, + "step": 8541 + }, + { + "epoch": 0.6893713178920184, + "grad_norm": 0.6639370918273926, + "learning_rate": 0.0001235824021786117, + "loss": 2.5249, + "step": 8542 + }, + { + "epoch": 0.6894520216286014, + "grad_norm": 0.7140880823135376, + "learning_rate": 0.00012356706024424773, + "loss": 2.5877, + "step": 8543 + }, + { + "epoch": 0.6895327253651844, + "grad_norm": 0.7079257965087891, + "learning_rate": 0.00012355171772253097, + "loss": 2.6011, + "step": 8544 + }, + { + "epoch": 0.6896134291017674, + "grad_norm": 0.7150856852531433, + "learning_rate": 0.00012353637461384387, + "loss": 2.549, + "step": 8545 + }, + { + "epoch": 0.6896941328383505, + "grad_norm": 0.6896397471427917, + "learning_rate": 0.00012352103091856876, + "loss": 2.5452, + "step": 8546 + }, + { + "epoch": 0.6897748365749334, + "grad_norm": 0.696964681148529, + "learning_rate": 0.00012350568663708808, + "loss": 2.5075, + "step": 8547 + }, + { + "epoch": 0.6898555403115164, + "grad_norm": 0.6926069855690002, + "learning_rate": 0.00012349034176978427, + "loss": 2.5905, + "step": 8548 + }, + { + "epoch": 0.6899362440480994, + "grad_norm": 0.6949423551559448, + "learning_rate": 0.00012347499631703968, + "loss": 2.5284, + "step": 8549 + }, + { + "epoch": 0.6900169477846825, + "grad_norm": 0.6480536460876465, + "learning_rate": 0.0001234596502792369, + "loss": 2.5713, + "step": 8550 + }, + { + "epoch": 0.6900976515212655, + "grad_norm": 0.6990019679069519, + "learning_rate": 0.00012344430365675825, + "loss": 2.5826, + "step": 8551 + }, + { + "epoch": 0.6901783552578484, + "grad_norm": 0.7063903212547302, + "learning_rate": 0.00012342895644998627, + "loss": 2.5271, + "step": 8552 + }, + { + "epoch": 0.6902590589944314, + "grad_norm": 0.7037132978439331, + "learning_rate": 0.0001234136086593035, + "loss": 2.5855, + "step": 8553 + }, + { + "epoch": 0.6903397627310145, + "grad_norm": 0.679701030254364, + "learning_rate": 0.00012339826028509235, + "loss": 2.5577, + "step": 8554 + }, + { + "epoch": 0.6904204664675975, + "grad_norm": 0.7088965773582458, + "learning_rate": 0.0001233829113277354, + "loss": 2.5767, + "step": 8555 + }, + { + "epoch": 0.6905011702041804, + "grad_norm": 0.7115551829338074, + "learning_rate": 0.00012336756178761517, + "loss": 2.5651, + "step": 8556 + }, + { + "epoch": 0.6905818739407634, + "grad_norm": 0.6778836250305176, + "learning_rate": 0.00012335221166511425, + "loss": 2.6388, + "step": 8557 + }, + { + "epoch": 0.6906625776773465, + "grad_norm": 0.6358879804611206, + "learning_rate": 0.00012333686096061515, + "loss": 2.5493, + "step": 8558 + }, + { + "epoch": 0.6907432814139295, + "grad_norm": 0.688197135925293, + "learning_rate": 0.00012332150967450046, + "loss": 2.5707, + "step": 8559 + }, + { + "epoch": 0.6908239851505125, + "grad_norm": 0.6931524872779846, + "learning_rate": 0.0001233061578071528, + "loss": 2.5561, + "step": 8560 + }, + { + "epoch": 0.6909046888870954, + "grad_norm": 0.6684975624084473, + "learning_rate": 0.00012329080535895478, + "loss": 2.6442, + "step": 8561 + }, + { + "epoch": 0.6909853926236785, + "grad_norm": 0.6865811347961426, + "learning_rate": 0.00012327545233028898, + "loss": 2.564, + "step": 8562 + }, + { + "epoch": 0.6910660963602615, + "grad_norm": 0.6999006867408752, + "learning_rate": 0.0001232600987215381, + "loss": 2.5607, + "step": 8563 + }, + { + "epoch": 0.6911468000968445, + "grad_norm": 0.6734526753425598, + "learning_rate": 0.0001232447445330847, + "loss": 2.5261, + "step": 8564 + }, + { + "epoch": 0.6912275038334275, + "grad_norm": 0.7447343468666077, + "learning_rate": 0.00012322938976531153, + "loss": 2.5359, + "step": 8565 + }, + { + "epoch": 0.6913082075700105, + "grad_norm": 0.6498517394065857, + "learning_rate": 0.00012321403441860126, + "loss": 2.5345, + "step": 8566 + }, + { + "epoch": 0.6913889113065935, + "grad_norm": 0.692933976650238, + "learning_rate": 0.00012319867849333658, + "loss": 2.6293, + "step": 8567 + }, + { + "epoch": 0.6914696150431765, + "grad_norm": 0.728430449962616, + "learning_rate": 0.00012318332198990015, + "loss": 2.618, + "step": 8568 + }, + { + "epoch": 0.6915503187797595, + "grad_norm": 0.7029061913490295, + "learning_rate": 0.00012316796490867478, + "loss": 2.6151, + "step": 8569 + }, + { + "epoch": 0.6916310225163425, + "grad_norm": 0.6692330241203308, + "learning_rate": 0.00012315260725004313, + "loss": 2.5511, + "step": 8570 + }, + { + "epoch": 0.6917117262529255, + "grad_norm": 0.6811983585357666, + "learning_rate": 0.000123137249014388, + "loss": 2.6337, + "step": 8571 + }, + { + "epoch": 0.6917924299895085, + "grad_norm": 0.7387441992759705, + "learning_rate": 0.00012312189020209212, + "loss": 2.5679, + "step": 8572 + }, + { + "epoch": 0.6918731337260915, + "grad_norm": 0.7180185914039612, + "learning_rate": 0.0001231065308135383, + "loss": 2.639, + "step": 8573 + }, + { + "epoch": 0.6919538374626745, + "grad_norm": 0.6997829079627991, + "learning_rate": 0.00012309117084910936, + "loss": 2.5392, + "step": 8574 + }, + { + "epoch": 0.6920345411992576, + "grad_norm": 0.7004552483558655, + "learning_rate": 0.00012307581030918807, + "loss": 2.6033, + "step": 8575 + }, + { + "epoch": 0.6921152449358405, + "grad_norm": 0.7183418273925781, + "learning_rate": 0.00012306044919415724, + "loss": 2.6302, + "step": 8576 + }, + { + "epoch": 0.6921959486724235, + "grad_norm": 0.6645712852478027, + "learning_rate": 0.00012304508750439976, + "loss": 2.5401, + "step": 8577 + }, + { + "epoch": 0.6922766524090065, + "grad_norm": 0.6455898284912109, + "learning_rate": 0.00012302972524029848, + "loss": 2.5084, + "step": 8578 + }, + { + "epoch": 0.6923573561455896, + "grad_norm": 0.6933849453926086, + "learning_rate": 0.00012301436240223622, + "loss": 2.5734, + "step": 8579 + }, + { + "epoch": 0.6924380598821726, + "grad_norm": 0.7967655658721924, + "learning_rate": 0.00012299899899059587, + "loss": 2.5721, + "step": 8580 + }, + { + "epoch": 0.6925187636187555, + "grad_norm": 0.706730306148529, + "learning_rate": 0.0001229836350057604, + "loss": 2.6216, + "step": 8581 + }, + { + "epoch": 0.6925994673553385, + "grad_norm": 0.7021105885505676, + "learning_rate": 0.0001229682704481126, + "loss": 2.4877, + "step": 8582 + }, + { + "epoch": 0.6926801710919216, + "grad_norm": 0.7197253108024597, + "learning_rate": 0.00012295290531803553, + "loss": 2.6124, + "step": 8583 + }, + { + "epoch": 0.6927608748285046, + "grad_norm": 0.7559605836868286, + "learning_rate": 0.00012293753961591198, + "loss": 2.6391, + "step": 8584 + }, + { + "epoch": 0.6928415785650875, + "grad_norm": 0.7074676752090454, + "learning_rate": 0.00012292217334212505, + "loss": 2.5949, + "step": 8585 + }, + { + "epoch": 0.6929222823016705, + "grad_norm": 0.6843528747558594, + "learning_rate": 0.00012290680649705763, + "loss": 2.4981, + "step": 8586 + }, + { + "epoch": 0.6930029860382536, + "grad_norm": 0.6853117942810059, + "learning_rate": 0.00012289143908109266, + "loss": 2.6352, + "step": 8587 + }, + { + "epoch": 0.6930836897748366, + "grad_norm": 0.6545630097389221, + "learning_rate": 0.00012287607109461325, + "loss": 2.5344, + "step": 8588 + }, + { + "epoch": 0.6931643935114196, + "grad_norm": 0.7377945184707642, + "learning_rate": 0.00012286070253800233, + "loss": 2.5895, + "step": 8589 + }, + { + "epoch": 0.6932450972480025, + "grad_norm": 0.6919971108436584, + "learning_rate": 0.00012284533341164295, + "loss": 2.5825, + "step": 8590 + }, + { + "epoch": 0.6933258009845856, + "grad_norm": 0.6911910176277161, + "learning_rate": 0.00012282996371591816, + "loss": 2.6008, + "step": 8591 + }, + { + "epoch": 0.6934065047211686, + "grad_norm": 0.7486373782157898, + "learning_rate": 0.00012281459345121095, + "loss": 2.6056, + "step": 8592 + }, + { + "epoch": 0.6934872084577516, + "grad_norm": 0.6829040050506592, + "learning_rate": 0.00012279922261790443, + "loss": 2.5161, + "step": 8593 + }, + { + "epoch": 0.6935679121943346, + "grad_norm": 0.7410104870796204, + "learning_rate": 0.00012278385121638173, + "loss": 2.6114, + "step": 8594 + }, + { + "epoch": 0.6936486159309176, + "grad_norm": 0.7355940937995911, + "learning_rate": 0.00012276847924702587, + "loss": 2.6371, + "step": 8595 + }, + { + "epoch": 0.6937293196675006, + "grad_norm": 0.650641679763794, + "learning_rate": 0.00012275310671022003, + "loss": 2.5568, + "step": 8596 + }, + { + "epoch": 0.6938100234040836, + "grad_norm": 0.661573052406311, + "learning_rate": 0.00012273773360634726, + "loss": 2.5828, + "step": 8597 + }, + { + "epoch": 0.6938907271406666, + "grad_norm": 0.6848435401916504, + "learning_rate": 0.00012272235993579072, + "loss": 2.5226, + "step": 8598 + }, + { + "epoch": 0.6939714308772497, + "grad_norm": 0.7015430927276611, + "learning_rate": 0.0001227069856989336, + "loss": 2.6156, + "step": 8599 + }, + { + "epoch": 0.6940521346138326, + "grad_norm": 0.7058628797531128, + "learning_rate": 0.000122691610896159, + "loss": 2.6007, + "step": 8600 + }, + { + "epoch": 0.6941328383504156, + "grad_norm": 0.6589432954788208, + "learning_rate": 0.0001226762355278502, + "loss": 2.5551, + "step": 8601 + }, + { + "epoch": 0.6942135420869986, + "grad_norm": 0.6875284910202026, + "learning_rate": 0.0001226608595943903, + "loss": 2.5537, + "step": 8602 + }, + { + "epoch": 0.6942942458235817, + "grad_norm": 0.7178356051445007, + "learning_rate": 0.00012264548309616252, + "loss": 2.655, + "step": 8603 + }, + { + "epoch": 0.6943749495601647, + "grad_norm": 0.7327077388763428, + "learning_rate": 0.00012263010603355017, + "loss": 2.5574, + "step": 8604 + }, + { + "epoch": 0.6944556532967476, + "grad_norm": 0.6318337917327881, + "learning_rate": 0.0001226147284069364, + "loss": 2.577, + "step": 8605 + }, + { + "epoch": 0.6945363570333306, + "grad_norm": 0.674872875213623, + "learning_rate": 0.00012259935021670444, + "loss": 2.6225, + "step": 8606 + }, + { + "epoch": 0.6946170607699137, + "grad_norm": 0.6554198861122131, + "learning_rate": 0.0001225839714632376, + "loss": 2.5951, + "step": 8607 + }, + { + "epoch": 0.6946977645064967, + "grad_norm": 0.7086453437805176, + "learning_rate": 0.00012256859214691918, + "loss": 2.622, + "step": 8608 + }, + { + "epoch": 0.6947784682430796, + "grad_norm": 0.6609488129615784, + "learning_rate": 0.00012255321226813245, + "loss": 2.5623, + "step": 8609 + }, + { + "epoch": 0.6948591719796626, + "grad_norm": 0.7504609823226929, + "learning_rate": 0.00012253783182726075, + "loss": 2.5264, + "step": 8610 + }, + { + "epoch": 0.6949398757162457, + "grad_norm": 0.6702934503555298, + "learning_rate": 0.00012252245082468733, + "loss": 2.5877, + "step": 8611 + }, + { + "epoch": 0.6950205794528287, + "grad_norm": 0.7116326689720154, + "learning_rate": 0.00012250706926079553, + "loss": 2.5629, + "step": 8612 + }, + { + "epoch": 0.6951012831894117, + "grad_norm": 0.7495368719100952, + "learning_rate": 0.00012249168713596875, + "loss": 2.5731, + "step": 8613 + }, + { + "epoch": 0.6951819869259946, + "grad_norm": 0.7434844970703125, + "learning_rate": 0.0001224763044505904, + "loss": 2.6008, + "step": 8614 + }, + { + "epoch": 0.6952626906625777, + "grad_norm": 0.719667375087738, + "learning_rate": 0.00012246092120504371, + "loss": 2.6051, + "step": 8615 + }, + { + "epoch": 0.6953433943991607, + "grad_norm": 0.7189086079597473, + "learning_rate": 0.00012244553739971216, + "loss": 2.5662, + "step": 8616 + }, + { + "epoch": 0.6954240981357437, + "grad_norm": 0.7222673892974854, + "learning_rate": 0.00012243015303497917, + "loss": 2.609, + "step": 8617 + }, + { + "epoch": 0.6955048018723267, + "grad_norm": 0.7323142290115356, + "learning_rate": 0.00012241476811122813, + "loss": 2.5458, + "step": 8618 + }, + { + "epoch": 0.6955855056089096, + "grad_norm": 0.7374032735824585, + "learning_rate": 0.00012239938262884246, + "loss": 2.6147, + "step": 8619 + }, + { + "epoch": 0.6956662093454927, + "grad_norm": 0.6707843542098999, + "learning_rate": 0.00012238399658820562, + "loss": 2.6462, + "step": 8620 + }, + { + "epoch": 0.6957469130820757, + "grad_norm": 0.7603243589401245, + "learning_rate": 0.0001223686099897011, + "loss": 2.6295, + "step": 8621 + }, + { + "epoch": 0.6958276168186587, + "grad_norm": 0.6966906785964966, + "learning_rate": 0.00012235322283371232, + "loss": 2.545, + "step": 8622 + }, + { + "epoch": 0.6959083205552417, + "grad_norm": 0.6757891774177551, + "learning_rate": 0.0001223378351206228, + "loss": 2.5548, + "step": 8623 + }, + { + "epoch": 0.6959890242918247, + "grad_norm": 0.6901456713676453, + "learning_rate": 0.00012232244685081605, + "loss": 2.5734, + "step": 8624 + }, + { + "epoch": 0.6960697280284077, + "grad_norm": 0.6942903995513916, + "learning_rate": 0.00012230705802467558, + "loss": 2.5495, + "step": 8625 + }, + { + "epoch": 0.6961504317649907, + "grad_norm": 0.6774815320968628, + "learning_rate": 0.0001222916686425849, + "loss": 2.5076, + "step": 8626 + }, + { + "epoch": 0.6962311355015737, + "grad_norm": 0.8037571310997009, + "learning_rate": 0.00012227627870492754, + "loss": 2.6737, + "step": 8627 + }, + { + "epoch": 0.6963118392381568, + "grad_norm": 0.7027560472488403, + "learning_rate": 0.0001222608882120871, + "loss": 2.5401, + "step": 8628 + }, + { + "epoch": 0.6963925429747397, + "grad_norm": 0.6651299595832825, + "learning_rate": 0.00012224549716444714, + "loss": 2.5835, + "step": 8629 + }, + { + "epoch": 0.6964732467113227, + "grad_norm": 0.7082433104515076, + "learning_rate": 0.00012223010556239124, + "loss": 2.5622, + "step": 8630 + }, + { + "epoch": 0.6965539504479057, + "grad_norm": 0.7993464469909668, + "learning_rate": 0.00012221471340630305, + "loss": 2.655, + "step": 8631 + }, + { + "epoch": 0.6966346541844888, + "grad_norm": 0.7375298142433167, + "learning_rate": 0.00012219932069656606, + "loss": 2.598, + "step": 8632 + }, + { + "epoch": 0.6967153579210718, + "grad_norm": 0.6915456652641296, + "learning_rate": 0.00012218392743356397, + "loss": 2.5649, + "step": 8633 + }, + { + "epoch": 0.6967960616576547, + "grad_norm": 0.679256021976471, + "learning_rate": 0.00012216853361768045, + "loss": 2.545, + "step": 8634 + }, + { + "epoch": 0.6968767653942377, + "grad_norm": 0.7234694361686707, + "learning_rate": 0.0001221531392492991, + "loss": 2.5863, + "step": 8635 + }, + { + "epoch": 0.6969574691308208, + "grad_norm": 0.7053319811820984, + "learning_rate": 0.00012213774432880364, + "loss": 2.5829, + "step": 8636 + }, + { + "epoch": 0.6970381728674038, + "grad_norm": 0.7584449648857117, + "learning_rate": 0.00012212234885657772, + "loss": 2.5855, + "step": 8637 + }, + { + "epoch": 0.6971188766039867, + "grad_norm": 0.7098579406738281, + "learning_rate": 0.00012210695283300501, + "loss": 2.6057, + "step": 8638 + }, + { + "epoch": 0.6971995803405697, + "grad_norm": 0.7350205779075623, + "learning_rate": 0.00012209155625846928, + "loss": 2.546, + "step": 8639 + }, + { + "epoch": 0.6972802840771528, + "grad_norm": 0.6842331290245056, + "learning_rate": 0.0001220761591333542, + "loss": 2.5602, + "step": 8640 + }, + { + "epoch": 0.6973609878137358, + "grad_norm": 0.6731252074241638, + "learning_rate": 0.00012206076145804354, + "loss": 2.4676, + "step": 8641 + }, + { + "epoch": 0.6974416915503188, + "grad_norm": 0.7271167635917664, + "learning_rate": 0.00012204536323292104, + "loss": 2.5605, + "step": 8642 + }, + { + "epoch": 0.6975223952869017, + "grad_norm": 0.6860780715942383, + "learning_rate": 0.00012202996445837043, + "loss": 2.5041, + "step": 8643 + }, + { + "epoch": 0.6976030990234848, + "grad_norm": 0.7134578824043274, + "learning_rate": 0.00012201456513477554, + "loss": 2.614, + "step": 8644 + }, + { + "epoch": 0.6976838027600678, + "grad_norm": 0.6995248198509216, + "learning_rate": 0.00012199916526252014, + "loss": 2.5087, + "step": 8645 + }, + { + "epoch": 0.6977645064966508, + "grad_norm": 0.7280197143554688, + "learning_rate": 0.00012198376484198803, + "loss": 2.5723, + "step": 8646 + }, + { + "epoch": 0.6978452102332338, + "grad_norm": 0.6898967623710632, + "learning_rate": 0.00012196836387356306, + "loss": 2.6073, + "step": 8647 + }, + { + "epoch": 0.6979259139698168, + "grad_norm": 0.6670758128166199, + "learning_rate": 0.00012195296235762901, + "loss": 2.5276, + "step": 8648 + }, + { + "epoch": 0.6980066177063998, + "grad_norm": 0.6862780451774597, + "learning_rate": 0.00012193756029456973, + "loss": 2.5363, + "step": 8649 + }, + { + "epoch": 0.6980873214429828, + "grad_norm": 0.6568876504898071, + "learning_rate": 0.00012192215768476916, + "loss": 2.5828, + "step": 8650 + }, + { + "epoch": 0.6981680251795658, + "grad_norm": 0.7237746119499207, + "learning_rate": 0.00012190675452861107, + "loss": 2.6076, + "step": 8651 + }, + { + "epoch": 0.6982487289161489, + "grad_norm": 0.6831536293029785, + "learning_rate": 0.00012189135082647943, + "loss": 2.5199, + "step": 8652 + }, + { + "epoch": 0.6983294326527318, + "grad_norm": 0.6767029166221619, + "learning_rate": 0.00012187594657875805, + "loss": 2.5859, + "step": 8653 + }, + { + "epoch": 0.6984101363893148, + "grad_norm": 0.6977167129516602, + "learning_rate": 0.00012186054178583092, + "loss": 2.5831, + "step": 8654 + }, + { + "epoch": 0.6984908401258978, + "grad_norm": 0.6369525194168091, + "learning_rate": 0.00012184513644808197, + "loss": 2.5839, + "step": 8655 + }, + { + "epoch": 0.6985715438624809, + "grad_norm": 0.6814634203910828, + "learning_rate": 0.00012182973056589508, + "loss": 2.5493, + "step": 8656 + }, + { + "epoch": 0.6986522475990639, + "grad_norm": 0.6895000338554382, + "learning_rate": 0.00012181432413965428, + "loss": 2.5616, + "step": 8657 + }, + { + "epoch": 0.6987329513356468, + "grad_norm": 0.6689717769622803, + "learning_rate": 0.00012179891716974345, + "loss": 2.5481, + "step": 8658 + }, + { + "epoch": 0.6988136550722298, + "grad_norm": 0.6945160031318665, + "learning_rate": 0.00012178350965654666, + "loss": 2.5781, + "step": 8659 + }, + { + "epoch": 0.6988943588088129, + "grad_norm": 0.7226110696792603, + "learning_rate": 0.00012176810160044785, + "loss": 2.5767, + "step": 8660 + }, + { + "epoch": 0.6989750625453959, + "grad_norm": 0.6810569167137146, + "learning_rate": 0.00012175269300183105, + "loss": 2.5184, + "step": 8661 + }, + { + "epoch": 0.6990557662819789, + "grad_norm": 0.727281928062439, + "learning_rate": 0.0001217372838610803, + "loss": 2.5972, + "step": 8662 + }, + { + "epoch": 0.6991364700185618, + "grad_norm": 0.7111573219299316, + "learning_rate": 0.00012172187417857959, + "loss": 2.6445, + "step": 8663 + }, + { + "epoch": 0.6992171737551449, + "grad_norm": 0.6808965802192688, + "learning_rate": 0.00012170646395471296, + "loss": 2.5191, + "step": 8664 + }, + { + "epoch": 0.6992978774917279, + "grad_norm": 0.7063688635826111, + "learning_rate": 0.00012169105318986455, + "loss": 2.6021, + "step": 8665 + }, + { + "epoch": 0.6993785812283109, + "grad_norm": 0.6522886753082275, + "learning_rate": 0.0001216756418844184, + "loss": 2.5697, + "step": 8666 + }, + { + "epoch": 0.6994592849648938, + "grad_norm": 0.6706095337867737, + "learning_rate": 0.00012166023003875859, + "loss": 2.5706, + "step": 8667 + }, + { + "epoch": 0.6995399887014769, + "grad_norm": 0.6744416356086731, + "learning_rate": 0.00012164481765326923, + "loss": 2.5713, + "step": 8668 + }, + { + "epoch": 0.6996206924380599, + "grad_norm": 0.7385411858558655, + "learning_rate": 0.0001216294047283344, + "loss": 2.5543, + "step": 8669 + }, + { + "epoch": 0.6997013961746429, + "grad_norm": 0.7286678552627563, + "learning_rate": 0.0001216139912643383, + "loss": 2.588, + "step": 8670 + }, + { + "epoch": 0.6997820999112259, + "grad_norm": 0.7065937519073486, + "learning_rate": 0.00012159857726166503, + "loss": 2.5475, + "step": 8671 + }, + { + "epoch": 0.6998628036478088, + "grad_norm": 0.6609788537025452, + "learning_rate": 0.00012158316272069874, + "loss": 2.5664, + "step": 8672 + }, + { + "epoch": 0.6999435073843919, + "grad_norm": 0.7360579371452332, + "learning_rate": 0.00012156774764182364, + "loss": 2.5822, + "step": 8673 + }, + { + "epoch": 0.7000242111209749, + "grad_norm": 0.6265058517456055, + "learning_rate": 0.00012155233202542384, + "loss": 2.5849, + "step": 8674 + }, + { + "epoch": 0.7001049148575579, + "grad_norm": 0.646976888179779, + "learning_rate": 0.00012153691587188363, + "loss": 2.5839, + "step": 8675 + }, + { + "epoch": 0.7001856185941409, + "grad_norm": 0.6634985208511353, + "learning_rate": 0.0001215214991815872, + "loss": 2.5434, + "step": 8676 + }, + { + "epoch": 0.700266322330724, + "grad_norm": 0.6757560968399048, + "learning_rate": 0.00012150608195491871, + "loss": 2.6186, + "step": 8677 + }, + { + "epoch": 0.7003470260673069, + "grad_norm": 0.7077112197875977, + "learning_rate": 0.00012149066419226247, + "loss": 2.5757, + "step": 8678 + }, + { + "epoch": 0.7004277298038899, + "grad_norm": 0.698226273059845, + "learning_rate": 0.00012147524589400268, + "loss": 2.5307, + "step": 8679 + }, + { + "epoch": 0.7005084335404729, + "grad_norm": 0.6782405376434326, + "learning_rate": 0.00012145982706052361, + "loss": 2.5582, + "step": 8680 + }, + { + "epoch": 0.700589137277056, + "grad_norm": 0.6832882165908813, + "learning_rate": 0.0001214444076922096, + "loss": 2.574, + "step": 8681 + }, + { + "epoch": 0.7006698410136389, + "grad_norm": 0.7182612419128418, + "learning_rate": 0.00012142898778944485, + "loss": 2.6457, + "step": 8682 + }, + { + "epoch": 0.7007505447502219, + "grad_norm": 0.7043644785881042, + "learning_rate": 0.00012141356735261373, + "loss": 2.5244, + "step": 8683 + }, + { + "epoch": 0.7008312484868049, + "grad_norm": 0.6942669749259949, + "learning_rate": 0.00012139814638210054, + "loss": 2.5507, + "step": 8684 + }, + { + "epoch": 0.700911952223388, + "grad_norm": 0.8412066102027893, + "learning_rate": 0.00012138272487828959, + "loss": 2.6025, + "step": 8685 + }, + { + "epoch": 0.700992655959971, + "grad_norm": 0.6906788945198059, + "learning_rate": 0.00012136730284156525, + "loss": 2.5259, + "step": 8686 + }, + { + "epoch": 0.7010733596965539, + "grad_norm": 0.7258631587028503, + "learning_rate": 0.00012135188027231188, + "loss": 2.6311, + "step": 8687 + }, + { + "epoch": 0.7011540634331369, + "grad_norm": 0.6294744610786438, + "learning_rate": 0.00012133645717091382, + "loss": 2.5969, + "step": 8688 + }, + { + "epoch": 0.70123476716972, + "grad_norm": 0.6994131207466125, + "learning_rate": 0.00012132103353775548, + "loss": 2.5954, + "step": 8689 + }, + { + "epoch": 0.701315470906303, + "grad_norm": 0.671441912651062, + "learning_rate": 0.00012130560937322124, + "loss": 2.5628, + "step": 8690 + }, + { + "epoch": 0.701396174642886, + "grad_norm": 0.6915482878684998, + "learning_rate": 0.00012129018467769555, + "loss": 2.5173, + "step": 8691 + }, + { + "epoch": 0.7014768783794689, + "grad_norm": 0.6810318231582642, + "learning_rate": 0.00012127475945156279, + "loss": 2.6186, + "step": 8692 + }, + { + "epoch": 0.701557582116052, + "grad_norm": 0.7931910157203674, + "learning_rate": 0.00012125933369520741, + "loss": 2.6243, + "step": 8693 + }, + { + "epoch": 0.701638285852635, + "grad_norm": 0.6843162178993225, + "learning_rate": 0.00012124390740901386, + "loss": 2.6072, + "step": 8694 + }, + { + "epoch": 0.701718989589218, + "grad_norm": 0.672115683555603, + "learning_rate": 0.0001212284805933666, + "loss": 2.6027, + "step": 8695 + }, + { + "epoch": 0.7017996933258009, + "grad_norm": 0.65242600440979, + "learning_rate": 0.00012121305324865014, + "loss": 2.5128, + "step": 8696 + }, + { + "epoch": 0.701880397062384, + "grad_norm": 0.7253173589706421, + "learning_rate": 0.00012119762537524893, + "loss": 2.5776, + "step": 8697 + }, + { + "epoch": 0.701961100798967, + "grad_norm": 0.6536431312561035, + "learning_rate": 0.00012118219697354745, + "loss": 2.5656, + "step": 8698 + }, + { + "epoch": 0.70204180453555, + "grad_norm": 0.7121500372886658, + "learning_rate": 0.00012116676804393028, + "loss": 2.5878, + "step": 8699 + }, + { + "epoch": 0.702122508272133, + "grad_norm": 0.676449716091156, + "learning_rate": 0.00012115133858678191, + "loss": 2.6624, + "step": 8700 + }, + { + "epoch": 0.702203212008716, + "grad_norm": 0.7230382561683655, + "learning_rate": 0.0001211359086024869, + "loss": 2.5461, + "step": 8701 + }, + { + "epoch": 0.702283915745299, + "grad_norm": 0.6679937839508057, + "learning_rate": 0.00012112047809142979, + "loss": 2.5568, + "step": 8702 + }, + { + "epoch": 0.702364619481882, + "grad_norm": 0.6627704501152039, + "learning_rate": 0.0001211050470539952, + "loss": 2.4819, + "step": 8703 + }, + { + "epoch": 0.702445323218465, + "grad_norm": 0.6680646538734436, + "learning_rate": 0.0001210896154905676, + "loss": 2.5722, + "step": 8704 + }, + { + "epoch": 0.7025260269550481, + "grad_norm": 0.7406336665153503, + "learning_rate": 0.00012107418340153167, + "loss": 2.5722, + "step": 8705 + }, + { + "epoch": 0.702606730691631, + "grad_norm": 0.6634557247161865, + "learning_rate": 0.00012105875078727203, + "loss": 2.5747, + "step": 8706 + }, + { + "epoch": 0.702687434428214, + "grad_norm": 0.6521568894386292, + "learning_rate": 0.00012104331764817325, + "loss": 2.555, + "step": 8707 + }, + { + "epoch": 0.702768138164797, + "grad_norm": 0.677606463432312, + "learning_rate": 0.00012102788398461999, + "loss": 2.5544, + "step": 8708 + }, + { + "epoch": 0.7028488419013801, + "grad_norm": 0.6593700051307678, + "learning_rate": 0.0001210124497969969, + "loss": 2.5252, + "step": 8709 + }, + { + "epoch": 0.7029295456379631, + "grad_norm": 0.686903715133667, + "learning_rate": 0.00012099701508568863, + "loss": 2.6513, + "step": 8710 + }, + { + "epoch": 0.703010249374546, + "grad_norm": 0.6395620107650757, + "learning_rate": 0.00012098157985107987, + "loss": 2.5169, + "step": 8711 + }, + { + "epoch": 0.703090953111129, + "grad_norm": 0.7387555837631226, + "learning_rate": 0.00012096614409355526, + "loss": 2.5741, + "step": 8712 + }, + { + "epoch": 0.7031716568477121, + "grad_norm": 0.665900707244873, + "learning_rate": 0.00012095070781349957, + "loss": 2.5068, + "step": 8713 + }, + { + "epoch": 0.7032523605842951, + "grad_norm": 0.6983458399772644, + "learning_rate": 0.00012093527101129745, + "loss": 2.5028, + "step": 8714 + }, + { + "epoch": 0.703333064320878, + "grad_norm": 0.6250826120376587, + "learning_rate": 0.00012091983368733366, + "loss": 2.5765, + "step": 8715 + }, + { + "epoch": 0.703413768057461, + "grad_norm": 0.7031501531600952, + "learning_rate": 0.00012090439584199294, + "loss": 2.5885, + "step": 8716 + }, + { + "epoch": 0.7034944717940441, + "grad_norm": 0.7140926122665405, + "learning_rate": 0.00012088895747566002, + "loss": 2.6278, + "step": 8717 + }, + { + "epoch": 0.7035751755306271, + "grad_norm": 0.6753602027893066, + "learning_rate": 0.00012087351858871969, + "loss": 2.5664, + "step": 8718 + }, + { + "epoch": 0.7036558792672101, + "grad_norm": 0.7150039076805115, + "learning_rate": 0.0001208580791815567, + "loss": 2.6739, + "step": 8719 + }, + { + "epoch": 0.703736583003793, + "grad_norm": 0.7120389342308044, + "learning_rate": 0.00012084263925455583, + "loss": 2.565, + "step": 8720 + }, + { + "epoch": 0.703817286740376, + "grad_norm": 0.7775784134864807, + "learning_rate": 0.00012082719880810194, + "loss": 2.5861, + "step": 8721 + }, + { + "epoch": 0.7038979904769591, + "grad_norm": 0.6704322695732117, + "learning_rate": 0.0001208117578425798, + "loss": 2.5957, + "step": 8722 + }, + { + "epoch": 0.7039786942135421, + "grad_norm": 0.6761276721954346, + "learning_rate": 0.00012079631635837426, + "loss": 2.5472, + "step": 8723 + }, + { + "epoch": 0.7040593979501251, + "grad_norm": 0.7639868855476379, + "learning_rate": 0.00012078087435587016, + "loss": 2.6053, + "step": 8724 + }, + { + "epoch": 0.704140101686708, + "grad_norm": 0.7490074038505554, + "learning_rate": 0.0001207654318354523, + "loss": 2.5517, + "step": 8725 + }, + { + "epoch": 0.7042208054232911, + "grad_norm": 0.7068852782249451, + "learning_rate": 0.00012074998879750566, + "loss": 2.5357, + "step": 8726 + }, + { + "epoch": 0.7043015091598741, + "grad_norm": 0.7273775935173035, + "learning_rate": 0.00012073454524241503, + "loss": 2.6028, + "step": 8727 + }, + { + "epoch": 0.7043822128964571, + "grad_norm": 0.7146363258361816, + "learning_rate": 0.00012071910117056533, + "loss": 2.5982, + "step": 8728 + }, + { + "epoch": 0.7044629166330401, + "grad_norm": 0.7631390690803528, + "learning_rate": 0.00012070365658234149, + "loss": 2.6021, + "step": 8729 + }, + { + "epoch": 0.7045436203696231, + "grad_norm": 0.7065283060073853, + "learning_rate": 0.00012068821147812839, + "loss": 2.5538, + "step": 8730 + }, + { + "epoch": 0.7046243241062061, + "grad_norm": 0.7914319634437561, + "learning_rate": 0.00012067276585831097, + "loss": 2.5617, + "step": 8731 + }, + { + "epoch": 0.7047050278427891, + "grad_norm": 0.7036565542221069, + "learning_rate": 0.0001206573197232742, + "loss": 2.5354, + "step": 8732 + }, + { + "epoch": 0.7047857315793721, + "grad_norm": 0.657116711139679, + "learning_rate": 0.00012064187307340303, + "loss": 2.5084, + "step": 8733 + }, + { + "epoch": 0.7048664353159552, + "grad_norm": 0.7246817946434021, + "learning_rate": 0.00012062642590908242, + "loss": 2.5737, + "step": 8734 + }, + { + "epoch": 0.7049471390525381, + "grad_norm": 0.6895857453346252, + "learning_rate": 0.00012061097823069736, + "loss": 2.5792, + "step": 8735 + }, + { + "epoch": 0.7050278427891211, + "grad_norm": 0.7654988169670105, + "learning_rate": 0.00012059553003863282, + "loss": 2.5302, + "step": 8736 + }, + { + "epoch": 0.7051085465257041, + "grad_norm": 0.7611668109893799, + "learning_rate": 0.00012058008133327387, + "loss": 2.6073, + "step": 8737 + }, + { + "epoch": 0.7051892502622872, + "grad_norm": 0.728729784488678, + "learning_rate": 0.00012056463211500546, + "loss": 2.5714, + "step": 8738 + }, + { + "epoch": 0.7052699539988702, + "grad_norm": 0.7251634001731873, + "learning_rate": 0.00012054918238421271, + "loss": 2.627, + "step": 8739 + }, + { + "epoch": 0.7053506577354531, + "grad_norm": 0.827745795249939, + "learning_rate": 0.00012053373214128056, + "loss": 2.6303, + "step": 8740 + }, + { + "epoch": 0.7054313614720361, + "grad_norm": 0.6837510466575623, + "learning_rate": 0.00012051828138659416, + "loss": 2.5837, + "step": 8741 + }, + { + "epoch": 0.7055120652086192, + "grad_norm": 0.6763553619384766, + "learning_rate": 0.00012050283012053856, + "loss": 2.575, + "step": 8742 + }, + { + "epoch": 0.7055927689452022, + "grad_norm": 0.6779605150222778, + "learning_rate": 0.00012048737834349886, + "loss": 2.588, + "step": 8743 + }, + { + "epoch": 0.7056734726817852, + "grad_norm": 0.7207251191139221, + "learning_rate": 0.00012047192605586008, + "loss": 2.6182, + "step": 8744 + }, + { + "epoch": 0.7057541764183681, + "grad_norm": 0.6681165099143982, + "learning_rate": 0.00012045647325800742, + "loss": 2.5595, + "step": 8745 + }, + { + "epoch": 0.7058348801549512, + "grad_norm": 0.7520970702171326, + "learning_rate": 0.00012044101995032594, + "loss": 2.6306, + "step": 8746 + }, + { + "epoch": 0.7059155838915342, + "grad_norm": 0.7148429155349731, + "learning_rate": 0.00012042556613320087, + "loss": 2.5749, + "step": 8747 + }, + { + "epoch": 0.7059962876281172, + "grad_norm": 0.619369626045227, + "learning_rate": 0.00012041011180701729, + "loss": 2.5382, + "step": 8748 + }, + { + "epoch": 0.7060769913647001, + "grad_norm": 0.7450816035270691, + "learning_rate": 0.00012039465697216032, + "loss": 2.5547, + "step": 8749 + }, + { + "epoch": 0.7061576951012832, + "grad_norm": 0.7324537634849548, + "learning_rate": 0.00012037920162901521, + "loss": 2.5756, + "step": 8750 + }, + { + "epoch": 0.7062383988378662, + "grad_norm": 0.7881754636764526, + "learning_rate": 0.00012036374577796715, + "loss": 2.6376, + "step": 8751 + }, + { + "epoch": 0.7063191025744492, + "grad_norm": 0.7095965147018433, + "learning_rate": 0.00012034828941940128, + "loss": 2.5454, + "step": 8752 + }, + { + "epoch": 0.7063998063110322, + "grad_norm": 0.7142949104309082, + "learning_rate": 0.00012033283255370287, + "loss": 2.5738, + "step": 8753 + }, + { + "epoch": 0.7064805100476153, + "grad_norm": 0.6592378616333008, + "learning_rate": 0.0001203173751812571, + "loss": 2.5473, + "step": 8754 + }, + { + "epoch": 0.7065612137841982, + "grad_norm": 0.6964332461357117, + "learning_rate": 0.00012030191730244926, + "loss": 2.5829, + "step": 8755 + }, + { + "epoch": 0.7066419175207812, + "grad_norm": 0.707539975643158, + "learning_rate": 0.00012028645891766455, + "loss": 2.5652, + "step": 8756 + }, + { + "epoch": 0.7067226212573642, + "grad_norm": 0.6991387009620667, + "learning_rate": 0.00012027100002728824, + "loss": 2.5874, + "step": 8757 + }, + { + "epoch": 0.7068033249939473, + "grad_norm": 0.665746808052063, + "learning_rate": 0.00012025554063170566, + "loss": 2.5163, + "step": 8758 + }, + { + "epoch": 0.7068840287305302, + "grad_norm": 0.696130096912384, + "learning_rate": 0.00012024008073130204, + "loss": 2.5748, + "step": 8759 + }, + { + "epoch": 0.7069647324671132, + "grad_norm": 0.698885440826416, + "learning_rate": 0.00012022462032646269, + "loss": 2.5561, + "step": 8760 + }, + { + "epoch": 0.7070454362036962, + "grad_norm": 0.7052211761474609, + "learning_rate": 0.00012020915941757292, + "loss": 2.5979, + "step": 8761 + }, + { + "epoch": 0.7071261399402793, + "grad_norm": 0.7370811104774475, + "learning_rate": 0.00012019369800501808, + "loss": 2.5623, + "step": 8762 + }, + { + "epoch": 0.7072068436768623, + "grad_norm": 0.6699148416519165, + "learning_rate": 0.00012017823608918352, + "loss": 2.5816, + "step": 8763 + }, + { + "epoch": 0.7072875474134452, + "grad_norm": 0.6712930798530579, + "learning_rate": 0.00012016277367045457, + "loss": 2.5495, + "step": 8764 + }, + { + "epoch": 0.7073682511500282, + "grad_norm": 0.7238204479217529, + "learning_rate": 0.00012014731074921659, + "loss": 2.5936, + "step": 8765 + }, + { + "epoch": 0.7074489548866113, + "grad_norm": 0.7303668856620789, + "learning_rate": 0.00012013184732585494, + "loss": 2.6366, + "step": 8766 + }, + { + "epoch": 0.7075296586231943, + "grad_norm": 0.6883132457733154, + "learning_rate": 0.00012011638340075505, + "loss": 2.534, + "step": 8767 + }, + { + "epoch": 0.7076103623597773, + "grad_norm": 0.7057133316993713, + "learning_rate": 0.00012010091897430229, + "loss": 2.6035, + "step": 8768 + }, + { + "epoch": 0.7076910660963602, + "grad_norm": 0.7069352269172668, + "learning_rate": 0.0001200854540468821, + "loss": 2.5047, + "step": 8769 + }, + { + "epoch": 0.7077717698329433, + "grad_norm": 0.7192478775978088, + "learning_rate": 0.00012006998861887985, + "loss": 2.5698, + "step": 8770 + }, + { + "epoch": 0.7078524735695263, + "grad_norm": 0.6992887854576111, + "learning_rate": 0.00012005452269068107, + "loss": 2.5631, + "step": 8771 + }, + { + "epoch": 0.7079331773061093, + "grad_norm": 0.676154613494873, + "learning_rate": 0.00012003905626267114, + "loss": 2.5255, + "step": 8772 + }, + { + "epoch": 0.7080138810426923, + "grad_norm": 0.672269880771637, + "learning_rate": 0.00012002358933523555, + "loss": 2.5766, + "step": 8773 + }, + { + "epoch": 0.7080945847792752, + "grad_norm": 0.7334566712379456, + "learning_rate": 0.00012000812190875976, + "loss": 2.6068, + "step": 8774 + }, + { + "epoch": 0.7081752885158583, + "grad_norm": 0.6599388122558594, + "learning_rate": 0.00011999265398362931, + "loss": 2.6032, + "step": 8775 + }, + { + "epoch": 0.7082559922524413, + "grad_norm": 0.7158498167991638, + "learning_rate": 0.00011997718556022958, + "loss": 2.599, + "step": 8776 + }, + { + "epoch": 0.7083366959890243, + "grad_norm": 0.7470360994338989, + "learning_rate": 0.00011996171663894624, + "loss": 2.58, + "step": 8777 + }, + { + "epoch": 0.7084173997256072, + "grad_norm": 0.6251266002655029, + "learning_rate": 0.00011994624722016472, + "loss": 2.5996, + "step": 8778 + }, + { + "epoch": 0.7084981034621903, + "grad_norm": 0.6649689078330994, + "learning_rate": 0.00011993077730427058, + "loss": 2.6025, + "step": 8779 + }, + { + "epoch": 0.7085788071987733, + "grad_norm": 0.7554693818092346, + "learning_rate": 0.00011991530689164939, + "loss": 2.6207, + "step": 8780 + }, + { + "epoch": 0.7086595109353563, + "grad_norm": 0.7941430807113647, + "learning_rate": 0.00011989983598268661, + "loss": 2.584, + "step": 8781 + }, + { + "epoch": 0.7087402146719393, + "grad_norm": 0.7257998585700989, + "learning_rate": 0.00011988436457776799, + "loss": 2.6152, + "step": 8782 + }, + { + "epoch": 0.7088209184085223, + "grad_norm": 0.716354489326477, + "learning_rate": 0.00011986889267727899, + "loss": 2.585, + "step": 8783 + }, + { + "epoch": 0.7089016221451053, + "grad_norm": 0.7094400525093079, + "learning_rate": 0.00011985342028160525, + "loss": 2.5759, + "step": 8784 + }, + { + "epoch": 0.7089823258816883, + "grad_norm": 0.7211421728134155, + "learning_rate": 0.0001198379473911324, + "loss": 2.5645, + "step": 8785 + }, + { + "epoch": 0.7090630296182713, + "grad_norm": 0.7166693806648254, + "learning_rate": 0.000119822474006246, + "loss": 2.5357, + "step": 8786 + }, + { + "epoch": 0.7091437333548544, + "grad_norm": 0.6702254414558411, + "learning_rate": 0.00011980700012733175, + "loss": 2.5353, + "step": 8787 + }, + { + "epoch": 0.7092244370914373, + "grad_norm": 0.6784049868583679, + "learning_rate": 0.0001197915257547753, + "loss": 2.4942, + "step": 8788 + }, + { + "epoch": 0.7093051408280203, + "grad_norm": 0.6914299726486206, + "learning_rate": 0.00011977605088896226, + "loss": 2.5682, + "step": 8789 + }, + { + "epoch": 0.7093858445646033, + "grad_norm": 0.7324358820915222, + "learning_rate": 0.00011976057553027837, + "loss": 2.564, + "step": 8790 + }, + { + "epoch": 0.7094665483011864, + "grad_norm": 0.6927928924560547, + "learning_rate": 0.00011974509967910927, + "loss": 2.5728, + "step": 8791 + }, + { + "epoch": 0.7095472520377694, + "grad_norm": 0.6795603036880493, + "learning_rate": 0.00011972962333584066, + "loss": 2.588, + "step": 8792 + }, + { + "epoch": 0.7096279557743523, + "grad_norm": 0.7132226228713989, + "learning_rate": 0.00011971414650085828, + "loss": 2.5759, + "step": 8793 + }, + { + "epoch": 0.7097086595109353, + "grad_norm": 0.737195611000061, + "learning_rate": 0.00011969866917454782, + "loss": 2.5721, + "step": 8794 + }, + { + "epoch": 0.7097893632475184, + "grad_norm": 0.6776021718978882, + "learning_rate": 0.00011968319135729507, + "loss": 2.5794, + "step": 8795 + }, + { + "epoch": 0.7098700669841014, + "grad_norm": 0.7113735675811768, + "learning_rate": 0.0001196677130494857, + "loss": 2.5595, + "step": 8796 + }, + { + "epoch": 0.7099507707206844, + "grad_norm": 0.6277747750282288, + "learning_rate": 0.0001196522342515055, + "loss": 2.5003, + "step": 8797 + }, + { + "epoch": 0.7100314744572673, + "grad_norm": 0.6982879042625427, + "learning_rate": 0.00011963675496374028, + "loss": 2.542, + "step": 8798 + }, + { + "epoch": 0.7101121781938504, + "grad_norm": 0.7019705176353455, + "learning_rate": 0.00011962127518657578, + "loss": 2.5723, + "step": 8799 + }, + { + "epoch": 0.7101928819304334, + "grad_norm": 0.6831088662147522, + "learning_rate": 0.00011960579492039783, + "loss": 2.5676, + "step": 8800 + }, + { + "epoch": 0.7102735856670164, + "grad_norm": 0.6744031310081482, + "learning_rate": 0.0001195903141655922, + "loss": 2.58, + "step": 8801 + }, + { + "epoch": 0.7103542894035993, + "grad_norm": 0.6873177289962769, + "learning_rate": 0.00011957483292254473, + "loss": 2.6289, + "step": 8802 + }, + { + "epoch": 0.7104349931401824, + "grad_norm": 0.6340685486793518, + "learning_rate": 0.00011955935119164125, + "loss": 2.5688, + "step": 8803 + }, + { + "epoch": 0.7105156968767654, + "grad_norm": 0.7147708535194397, + "learning_rate": 0.00011954386897326764, + "loss": 2.5471, + "step": 8804 + }, + { + "epoch": 0.7105964006133484, + "grad_norm": 0.699605405330658, + "learning_rate": 0.00011952838626780971, + "loss": 2.6122, + "step": 8805 + }, + { + "epoch": 0.7106771043499314, + "grad_norm": 0.6685385704040527, + "learning_rate": 0.00011951290307565335, + "loss": 2.5423, + "step": 8806 + }, + { + "epoch": 0.7107578080865145, + "grad_norm": 0.6884726881980896, + "learning_rate": 0.00011949741939718439, + "loss": 2.5243, + "step": 8807 + }, + { + "epoch": 0.7108385118230974, + "grad_norm": 0.6991142630577087, + "learning_rate": 0.00011948193523278884, + "loss": 2.6271, + "step": 8808 + }, + { + "epoch": 0.7109192155596804, + "grad_norm": 0.6964353919029236, + "learning_rate": 0.00011946645058285253, + "loss": 2.6296, + "step": 8809 + }, + { + "epoch": 0.7109999192962634, + "grad_norm": 0.7592040300369263, + "learning_rate": 0.00011945096544776136, + "loss": 2.6601, + "step": 8810 + }, + { + "epoch": 0.7110806230328465, + "grad_norm": 0.7146934866905212, + "learning_rate": 0.00011943547982790131, + "loss": 2.54, + "step": 8811 + }, + { + "epoch": 0.7111613267694294, + "grad_norm": 0.6991123557090759, + "learning_rate": 0.00011941999372365827, + "loss": 2.5978, + "step": 8812 + }, + { + "epoch": 0.7112420305060124, + "grad_norm": 0.6835920810699463, + "learning_rate": 0.00011940450713541822, + "loss": 2.6096, + "step": 8813 + }, + { + "epoch": 0.7113227342425954, + "grad_norm": 0.6913917660713196, + "learning_rate": 0.00011938902006356716, + "loss": 2.5624, + "step": 8814 + }, + { + "epoch": 0.7114034379791785, + "grad_norm": 0.6620622873306274, + "learning_rate": 0.00011937353250849102, + "loss": 2.6211, + "step": 8815 + }, + { + "epoch": 0.7114841417157615, + "grad_norm": 0.6738792061805725, + "learning_rate": 0.00011935804447057581, + "loss": 2.5889, + "step": 8816 + }, + { + "epoch": 0.7115648454523444, + "grad_norm": 0.7101936936378479, + "learning_rate": 0.00011934255595020751, + "loss": 2.5846, + "step": 8817 + }, + { + "epoch": 0.7116455491889274, + "grad_norm": 0.6843911409378052, + "learning_rate": 0.00011932706694777216, + "loss": 2.5757, + "step": 8818 + }, + { + "epoch": 0.7117262529255105, + "grad_norm": 0.7217971086502075, + "learning_rate": 0.0001193115774636558, + "loss": 2.6174, + "step": 8819 + }, + { + "epoch": 0.7118069566620935, + "grad_norm": 0.6706245541572571, + "learning_rate": 0.00011929608749824445, + "loss": 2.5893, + "step": 8820 + }, + { + "epoch": 0.7118876603986765, + "grad_norm": 0.7057672739028931, + "learning_rate": 0.00011928059705192413, + "loss": 2.5426, + "step": 8821 + }, + { + "epoch": 0.7119683641352594, + "grad_norm": 0.7354697585105896, + "learning_rate": 0.00011926510612508095, + "loss": 2.5741, + "step": 8822 + }, + { + "epoch": 0.7120490678718424, + "grad_norm": 0.6618186235427856, + "learning_rate": 0.00011924961471810096, + "loss": 2.6007, + "step": 8823 + }, + { + "epoch": 0.7121297716084255, + "grad_norm": 0.6733995676040649, + "learning_rate": 0.00011923412283137028, + "loss": 2.5739, + "step": 8824 + }, + { + "epoch": 0.7122104753450085, + "grad_norm": 0.7324833869934082, + "learning_rate": 0.00011921863046527497, + "loss": 2.5461, + "step": 8825 + }, + { + "epoch": 0.7122911790815915, + "grad_norm": 0.6753048896789551, + "learning_rate": 0.00011920313762020113, + "loss": 2.5066, + "step": 8826 + }, + { + "epoch": 0.7123718828181744, + "grad_norm": 0.7861250638961792, + "learning_rate": 0.00011918764429653489, + "loss": 2.5229, + "step": 8827 + }, + { + "epoch": 0.7124525865547575, + "grad_norm": 0.7037342190742493, + "learning_rate": 0.00011917215049466244, + "loss": 2.5443, + "step": 8828 + }, + { + "epoch": 0.7125332902913405, + "grad_norm": 0.7112773060798645, + "learning_rate": 0.00011915665621496985, + "loss": 2.5656, + "step": 8829 + }, + { + "epoch": 0.7126139940279235, + "grad_norm": 0.6384316682815552, + "learning_rate": 0.00011914116145784333, + "loss": 2.5526, + "step": 8830 + }, + { + "epoch": 0.7126946977645064, + "grad_norm": 0.6673600077629089, + "learning_rate": 0.000119125666223669, + "loss": 2.5868, + "step": 8831 + }, + { + "epoch": 0.7127754015010895, + "grad_norm": 0.6927722692489624, + "learning_rate": 0.0001191101705128331, + "loss": 2.6237, + "step": 8832 + }, + { + "epoch": 0.7128561052376725, + "grad_norm": 0.7410106658935547, + "learning_rate": 0.00011909467432572182, + "loss": 2.5652, + "step": 8833 + }, + { + "epoch": 0.7129368089742555, + "grad_norm": 0.6780139803886414, + "learning_rate": 0.0001190791776627213, + "loss": 2.5343, + "step": 8834 + }, + { + "epoch": 0.7130175127108385, + "grad_norm": 0.7147949934005737, + "learning_rate": 0.00011906368052421781, + "loss": 2.5368, + "step": 8835 + }, + { + "epoch": 0.7130982164474216, + "grad_norm": 0.7092324495315552, + "learning_rate": 0.00011904818291059759, + "loss": 2.538, + "step": 8836 + }, + { + "epoch": 0.7131789201840045, + "grad_norm": 0.761763870716095, + "learning_rate": 0.00011903268482224684, + "loss": 2.5984, + "step": 8837 + }, + { + "epoch": 0.7132596239205875, + "grad_norm": 0.7011365294456482, + "learning_rate": 0.00011901718625955182, + "loss": 2.5383, + "step": 8838 + }, + { + "epoch": 0.7133403276571705, + "grad_norm": 0.7982703447341919, + "learning_rate": 0.00011900168722289882, + "loss": 2.5714, + "step": 8839 + }, + { + "epoch": 0.7134210313937536, + "grad_norm": 0.6788253784179688, + "learning_rate": 0.00011898618771267412, + "loss": 2.5675, + "step": 8840 + }, + { + "epoch": 0.7135017351303365, + "grad_norm": 0.6245018243789673, + "learning_rate": 0.00011897068772926397, + "loss": 2.5497, + "step": 8841 + }, + { + "epoch": 0.7135824388669195, + "grad_norm": 0.732109785079956, + "learning_rate": 0.0001189551872730547, + "loss": 2.5043, + "step": 8842 + }, + { + "epoch": 0.7136631426035025, + "grad_norm": 0.7640885710716248, + "learning_rate": 0.0001189396863444326, + "loss": 2.5974, + "step": 8843 + }, + { + "epoch": 0.7137438463400856, + "grad_norm": 0.6806808710098267, + "learning_rate": 0.00011892418494378403, + "loss": 2.5911, + "step": 8844 + }, + { + "epoch": 0.7138245500766686, + "grad_norm": 0.6730000376701355, + "learning_rate": 0.00011890868307149528, + "loss": 2.5405, + "step": 8845 + }, + { + "epoch": 0.7139052538132515, + "grad_norm": 0.6881929636001587, + "learning_rate": 0.00011889318072795275, + "loss": 2.6083, + "step": 8846 + }, + { + "epoch": 0.7139859575498345, + "grad_norm": 0.7079598307609558, + "learning_rate": 0.00011887767791354275, + "loss": 2.5743, + "step": 8847 + }, + { + "epoch": 0.7140666612864176, + "grad_norm": 0.6760475635528564, + "learning_rate": 0.00011886217462865166, + "loss": 2.5925, + "step": 8848 + }, + { + "epoch": 0.7141473650230006, + "grad_norm": 0.6851043701171875, + "learning_rate": 0.00011884667087366587, + "loss": 2.5839, + "step": 8849 + }, + { + "epoch": 0.7142280687595836, + "grad_norm": 0.6805267930030823, + "learning_rate": 0.00011883116664897178, + "loss": 2.562, + "step": 8850 + }, + { + "epoch": 0.7143087724961665, + "grad_norm": 0.6720704436302185, + "learning_rate": 0.00011881566195495581, + "loss": 2.5381, + "step": 8851 + }, + { + "epoch": 0.7143894762327496, + "grad_norm": 0.718166172504425, + "learning_rate": 0.00011880015679200436, + "loss": 2.5912, + "step": 8852 + }, + { + "epoch": 0.7144701799693326, + "grad_norm": 0.6643497943878174, + "learning_rate": 0.00011878465116050383, + "loss": 2.5122, + "step": 8853 + }, + { + "epoch": 0.7145508837059156, + "grad_norm": 0.705186665058136, + "learning_rate": 0.00011876914506084074, + "loss": 2.617, + "step": 8854 + }, + { + "epoch": 0.7146315874424986, + "grad_norm": 0.6417848467826843, + "learning_rate": 0.00011875363849340144, + "loss": 2.5552, + "step": 8855 + }, + { + "epoch": 0.7147122911790816, + "grad_norm": 0.6861358880996704, + "learning_rate": 0.00011873813145857249, + "loss": 2.6324, + "step": 8856 + }, + { + "epoch": 0.7147929949156646, + "grad_norm": 0.7134111523628235, + "learning_rate": 0.00011872262395674027, + "loss": 2.5892, + "step": 8857 + }, + { + "epoch": 0.7148736986522476, + "grad_norm": 0.7177506685256958, + "learning_rate": 0.00011870711598829135, + "loss": 2.5677, + "step": 8858 + }, + { + "epoch": 0.7149544023888306, + "grad_norm": 0.6435763835906982, + "learning_rate": 0.00011869160755361219, + "loss": 2.5452, + "step": 8859 + }, + { + "epoch": 0.7150351061254137, + "grad_norm": 0.6443132758140564, + "learning_rate": 0.00011867609865308935, + "loss": 2.5566, + "step": 8860 + }, + { + "epoch": 0.7151158098619966, + "grad_norm": 0.7132347822189331, + "learning_rate": 0.00011866058928710925, + "loss": 2.565, + "step": 8861 + }, + { + "epoch": 0.7151965135985796, + "grad_norm": 0.7803207039833069, + "learning_rate": 0.00011864507945605854, + "loss": 2.556, + "step": 8862 + }, + { + "epoch": 0.7152772173351626, + "grad_norm": 0.7277950644493103, + "learning_rate": 0.00011862956916032367, + "loss": 2.5623, + "step": 8863 + }, + { + "epoch": 0.7153579210717457, + "grad_norm": 0.6812277436256409, + "learning_rate": 0.00011861405840029125, + "loss": 2.6146, + "step": 8864 + }, + { + "epoch": 0.7154386248083286, + "grad_norm": 0.7170509099960327, + "learning_rate": 0.00011859854717634786, + "loss": 2.52, + "step": 8865 + }, + { + "epoch": 0.7155193285449116, + "grad_norm": 0.7282906174659729, + "learning_rate": 0.00011858303548888004, + "loss": 2.5605, + "step": 8866 + }, + { + "epoch": 0.7156000322814946, + "grad_norm": 0.7290246486663818, + "learning_rate": 0.00011856752333827439, + "loss": 2.6292, + "step": 8867 + }, + { + "epoch": 0.7156807360180777, + "grad_norm": 0.6870024800300598, + "learning_rate": 0.00011855201072491752, + "loss": 2.6396, + "step": 8868 + }, + { + "epoch": 0.7157614397546607, + "grad_norm": 0.7336156964302063, + "learning_rate": 0.00011853649764919605, + "loss": 2.6356, + "step": 8869 + }, + { + "epoch": 0.7158421434912436, + "grad_norm": 0.7181294560432434, + "learning_rate": 0.00011852098411149661, + "loss": 2.5163, + "step": 8870 + }, + { + "epoch": 0.7159228472278266, + "grad_norm": 0.7355513572692871, + "learning_rate": 0.00011850547011220583, + "loss": 2.5485, + "step": 8871 + }, + { + "epoch": 0.7160035509644097, + "grad_norm": 0.7005351185798645, + "learning_rate": 0.00011848995565171038, + "loss": 2.5187, + "step": 8872 + }, + { + "epoch": 0.7160842547009927, + "grad_norm": 0.6550194025039673, + "learning_rate": 0.00011847444073039686, + "loss": 2.5174, + "step": 8873 + }, + { + "epoch": 0.7161649584375757, + "grad_norm": 0.6568251252174377, + "learning_rate": 0.00011845892534865202, + "loss": 2.5128, + "step": 8874 + }, + { + "epoch": 0.7162456621741586, + "grad_norm": 0.6359419226646423, + "learning_rate": 0.0001184434095068625, + "loss": 2.5967, + "step": 8875 + }, + { + "epoch": 0.7163263659107416, + "grad_norm": 0.6730023622512817, + "learning_rate": 0.00011842789320541504, + "loss": 2.5243, + "step": 8876 + }, + { + "epoch": 0.7164070696473247, + "grad_norm": 0.6750187277793884, + "learning_rate": 0.00011841237644469625, + "loss": 2.602, + "step": 8877 + }, + { + "epoch": 0.7164877733839077, + "grad_norm": 0.7039143443107605, + "learning_rate": 0.00011839685922509291, + "loss": 2.5345, + "step": 8878 + }, + { + "epoch": 0.7165684771204907, + "grad_norm": 0.6602306962013245, + "learning_rate": 0.00011838134154699177, + "loss": 2.5995, + "step": 8879 + }, + { + "epoch": 0.7166491808570736, + "grad_norm": 0.6744598150253296, + "learning_rate": 0.00011836582341077955, + "loss": 2.6005, + "step": 8880 + }, + { + "epoch": 0.7167298845936567, + "grad_norm": 0.7136051058769226, + "learning_rate": 0.00011835030481684302, + "loss": 2.5424, + "step": 8881 + }, + { + "epoch": 0.7168105883302397, + "grad_norm": 0.7085986137390137, + "learning_rate": 0.00011833478576556889, + "loss": 2.5912, + "step": 8882 + }, + { + "epoch": 0.7168912920668227, + "grad_norm": 0.7635689377784729, + "learning_rate": 0.00011831926625734398, + "loss": 2.5836, + "step": 8883 + }, + { + "epoch": 0.7169719958034056, + "grad_norm": 0.6543256640434265, + "learning_rate": 0.00011830374629255508, + "loss": 2.5442, + "step": 8884 + }, + { + "epoch": 0.7170526995399887, + "grad_norm": 0.663840115070343, + "learning_rate": 0.00011828822587158896, + "loss": 2.5529, + "step": 8885 + }, + { + "epoch": 0.7171334032765717, + "grad_norm": 0.6868027448654175, + "learning_rate": 0.00011827270499483247, + "loss": 2.6678, + "step": 8886 + }, + { + "epoch": 0.7172141070131547, + "grad_norm": 0.649172842502594, + "learning_rate": 0.00011825718366267238, + "loss": 2.57, + "step": 8887 + }, + { + "epoch": 0.7172948107497377, + "grad_norm": 0.6818440556526184, + "learning_rate": 0.00011824166187549554, + "loss": 2.5602, + "step": 8888 + }, + { + "epoch": 0.7173755144863208, + "grad_norm": 0.7222314476966858, + "learning_rate": 0.00011822613963368885, + "loss": 2.5526, + "step": 8889 + }, + { + "epoch": 0.7174562182229037, + "grad_norm": 0.7309598922729492, + "learning_rate": 0.00011821061693763909, + "loss": 2.5515, + "step": 8890 + }, + { + "epoch": 0.7175369219594867, + "grad_norm": 0.6935746669769287, + "learning_rate": 0.00011819509378773314, + "loss": 2.5506, + "step": 8891 + }, + { + "epoch": 0.7176176256960697, + "grad_norm": 0.6754423975944519, + "learning_rate": 0.00011817957018435792, + "loss": 2.5621, + "step": 8892 + }, + { + "epoch": 0.7176983294326528, + "grad_norm": 0.7087355852127075, + "learning_rate": 0.00011816404612790026, + "loss": 2.5708, + "step": 8893 + }, + { + "epoch": 0.7177790331692357, + "grad_norm": 0.726820707321167, + "learning_rate": 0.0001181485216187471, + "loss": 2.5741, + "step": 8894 + }, + { + "epoch": 0.7178597369058187, + "grad_norm": 0.6539922952651978, + "learning_rate": 0.00011813299665728532, + "loss": 2.613, + "step": 8895 + }, + { + "epoch": 0.7179404406424017, + "grad_norm": 0.7008066773414612, + "learning_rate": 0.00011811747124390189, + "loss": 2.6029, + "step": 8896 + }, + { + "epoch": 0.7180211443789848, + "grad_norm": 0.6900522708892822, + "learning_rate": 0.00011810194537898374, + "loss": 2.5716, + "step": 8897 + }, + { + "epoch": 0.7181018481155678, + "grad_norm": 0.675345242023468, + "learning_rate": 0.00011808641906291776, + "loss": 2.5742, + "step": 8898 + }, + { + "epoch": 0.7181825518521507, + "grad_norm": 0.6697559356689453, + "learning_rate": 0.00011807089229609092, + "loss": 2.5717, + "step": 8899 + }, + { + "epoch": 0.7182632555887337, + "grad_norm": 0.6874344944953918, + "learning_rate": 0.00011805536507889021, + "loss": 2.5394, + "step": 8900 + }, + { + "epoch": 0.7183439593253168, + "grad_norm": 0.6675494313240051, + "learning_rate": 0.00011803983741170263, + "loss": 2.5655, + "step": 8901 + }, + { + "epoch": 0.7184246630618998, + "grad_norm": 0.6937244534492493, + "learning_rate": 0.00011802430929491517, + "loss": 2.5676, + "step": 8902 + }, + { + "epoch": 0.7185053667984828, + "grad_norm": 0.7591496109962463, + "learning_rate": 0.00011800878072891474, + "loss": 2.5849, + "step": 8903 + }, + { + "epoch": 0.7185860705350657, + "grad_norm": 0.6503129005432129, + "learning_rate": 0.00011799325171408846, + "loss": 2.5416, + "step": 8904 + }, + { + "epoch": 0.7186667742716488, + "grad_norm": 0.6450222134590149, + "learning_rate": 0.00011797772225082333, + "loss": 2.5395, + "step": 8905 + }, + { + "epoch": 0.7187474780082318, + "grad_norm": 0.7317619919776917, + "learning_rate": 0.00011796219233950632, + "loss": 2.609, + "step": 8906 + }, + { + "epoch": 0.7188281817448148, + "grad_norm": 0.7585787773132324, + "learning_rate": 0.00011794666198052455, + "loss": 2.5556, + "step": 8907 + }, + { + "epoch": 0.7189088854813978, + "grad_norm": 0.6718214750289917, + "learning_rate": 0.00011793113117426505, + "loss": 2.5914, + "step": 8908 + }, + { + "epoch": 0.7189895892179808, + "grad_norm": 0.6459314823150635, + "learning_rate": 0.00011791559992111487, + "loss": 2.5956, + "step": 8909 + }, + { + "epoch": 0.7190702929545638, + "grad_norm": 0.6592775583267212, + "learning_rate": 0.00011790006822146113, + "loss": 2.5568, + "step": 8910 + }, + { + "epoch": 0.7191509966911468, + "grad_norm": 0.7277452349662781, + "learning_rate": 0.0001178845360756909, + "loss": 2.5989, + "step": 8911 + }, + { + "epoch": 0.7192317004277298, + "grad_norm": 0.7020131945610046, + "learning_rate": 0.00011786900348419128, + "loss": 2.645, + "step": 8912 + }, + { + "epoch": 0.7193124041643129, + "grad_norm": 0.6746636629104614, + "learning_rate": 0.00011785347044734938, + "loss": 2.5173, + "step": 8913 + }, + { + "epoch": 0.7193931079008958, + "grad_norm": 0.6782798171043396, + "learning_rate": 0.0001178379369655523, + "loss": 2.6007, + "step": 8914 + }, + { + "epoch": 0.7194738116374788, + "grad_norm": 0.705498218536377, + "learning_rate": 0.00011782240303918724, + "loss": 2.5408, + "step": 8915 + }, + { + "epoch": 0.7195545153740618, + "grad_norm": 0.675532341003418, + "learning_rate": 0.00011780686866864128, + "loss": 2.5188, + "step": 8916 + }, + { + "epoch": 0.7196352191106449, + "grad_norm": 0.6552390456199646, + "learning_rate": 0.00011779133385430161, + "loss": 2.5409, + "step": 8917 + }, + { + "epoch": 0.7197159228472279, + "grad_norm": 0.6589654088020325, + "learning_rate": 0.00011777579859655544, + "loss": 2.5447, + "step": 8918 + }, + { + "epoch": 0.7197966265838108, + "grad_norm": 0.7548382878303528, + "learning_rate": 0.00011776026289578985, + "loss": 2.5239, + "step": 8919 + }, + { + "epoch": 0.7198773303203938, + "grad_norm": 0.697325587272644, + "learning_rate": 0.00011774472675239207, + "loss": 2.5887, + "step": 8920 + }, + { + "epoch": 0.7199580340569769, + "grad_norm": 0.734462320804596, + "learning_rate": 0.00011772919016674934, + "loss": 2.5847, + "step": 8921 + }, + { + "epoch": 0.7200387377935599, + "grad_norm": 0.6736955642700195, + "learning_rate": 0.00011771365313924886, + "loss": 2.558, + "step": 8922 + }, + { + "epoch": 0.7201194415301428, + "grad_norm": 0.7157856822013855, + "learning_rate": 0.00011769811567027784, + "loss": 2.6199, + "step": 8923 + }, + { + "epoch": 0.7202001452667258, + "grad_norm": 0.7045830488204956, + "learning_rate": 0.0001176825777602235, + "loss": 2.576, + "step": 8924 + }, + { + "epoch": 0.7202808490033088, + "grad_norm": 0.6875419020652771, + "learning_rate": 0.00011766703940947308, + "loss": 2.6045, + "step": 8925 + }, + { + "epoch": 0.7203615527398919, + "grad_norm": 0.7313494086265564, + "learning_rate": 0.00011765150061841387, + "loss": 2.5388, + "step": 8926 + }, + { + "epoch": 0.7204422564764749, + "grad_norm": 0.7223608493804932, + "learning_rate": 0.00011763596138743313, + "loss": 2.5466, + "step": 8927 + }, + { + "epoch": 0.7205229602130578, + "grad_norm": 0.7289614081382751, + "learning_rate": 0.00011762042171691816, + "loss": 2.5862, + "step": 8928 + }, + { + "epoch": 0.7206036639496408, + "grad_norm": 0.7098878026008606, + "learning_rate": 0.00011760488160725617, + "loss": 2.5497, + "step": 8929 + }, + { + "epoch": 0.7206843676862239, + "grad_norm": 0.7096838355064392, + "learning_rate": 0.00011758934105883452, + "loss": 2.558, + "step": 8930 + }, + { + "epoch": 0.7207650714228069, + "grad_norm": 0.7334743738174438, + "learning_rate": 0.00011757380007204055, + "loss": 2.5966, + "step": 8931 + }, + { + "epoch": 0.7208457751593899, + "grad_norm": 0.7192476391792297, + "learning_rate": 0.00011755825864726149, + "loss": 2.5307, + "step": 8932 + }, + { + "epoch": 0.7209264788959728, + "grad_norm": 0.7329632043838501, + "learning_rate": 0.00011754271678488478, + "loss": 2.6453, + "step": 8933 + }, + { + "epoch": 0.7210071826325559, + "grad_norm": 0.6827974915504456, + "learning_rate": 0.00011752717448529766, + "loss": 2.5507, + "step": 8934 + }, + { + "epoch": 0.7210878863691389, + "grad_norm": 0.8292449116706848, + "learning_rate": 0.00011751163174888756, + "loss": 2.6178, + "step": 8935 + }, + { + "epoch": 0.7211685901057219, + "grad_norm": 0.6504058837890625, + "learning_rate": 0.00011749608857604183, + "loss": 2.574, + "step": 8936 + }, + { + "epoch": 0.7212492938423049, + "grad_norm": 0.6567742824554443, + "learning_rate": 0.00011748054496714785, + "loss": 2.45, + "step": 8937 + }, + { + "epoch": 0.7213299975788879, + "grad_norm": 0.6699101328849792, + "learning_rate": 0.00011746500092259296, + "loss": 2.5827, + "step": 8938 + }, + { + "epoch": 0.7214107013154709, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.0001174494564427646, + "loss": 2.5246, + "step": 8939 + }, + { + "epoch": 0.7214914050520539, + "grad_norm": 0.7276309132575989, + "learning_rate": 0.00011743391152805017, + "loss": 2.6096, + "step": 8940 + }, + { + "epoch": 0.7215721087886369, + "grad_norm": 0.7248005867004395, + "learning_rate": 0.0001174183661788371, + "loss": 2.6362, + "step": 8941 + }, + { + "epoch": 0.72165281252522, + "grad_norm": 0.7773801684379578, + "learning_rate": 0.00011740282039551282, + "loss": 2.547, + "step": 8942 + }, + { + "epoch": 0.7217335162618029, + "grad_norm": 0.7346466779708862, + "learning_rate": 0.00011738727417846476, + "loss": 2.5635, + "step": 8943 + }, + { + "epoch": 0.7218142199983859, + "grad_norm": 0.7042707800865173, + "learning_rate": 0.0001173717275280804, + "loss": 2.5593, + "step": 8944 + }, + { + "epoch": 0.7218949237349689, + "grad_norm": 0.6894899010658264, + "learning_rate": 0.00011735618044474712, + "loss": 2.5272, + "step": 8945 + }, + { + "epoch": 0.721975627471552, + "grad_norm": 0.6643744111061096, + "learning_rate": 0.00011734063292885249, + "loss": 2.6001, + "step": 8946 + }, + { + "epoch": 0.722056331208135, + "grad_norm": 0.7543076276779175, + "learning_rate": 0.00011732508498078396, + "loss": 2.558, + "step": 8947 + }, + { + "epoch": 0.7221370349447179, + "grad_norm": 0.7065596580505371, + "learning_rate": 0.00011730953660092903, + "loss": 2.6255, + "step": 8948 + }, + { + "epoch": 0.7222177386813009, + "grad_norm": 0.6968158483505249, + "learning_rate": 0.0001172939877896752, + "loss": 2.5277, + "step": 8949 + }, + { + "epoch": 0.722298442417884, + "grad_norm": 0.6918557286262512, + "learning_rate": 0.00011727843854740996, + "loss": 2.5456, + "step": 8950 + }, + { + "epoch": 0.722379146154467, + "grad_norm": 0.7262142300605774, + "learning_rate": 0.00011726288887452088, + "loss": 2.5345, + "step": 8951 + }, + { + "epoch": 0.7224598498910499, + "grad_norm": 0.7423329949378967, + "learning_rate": 0.00011724733877139548, + "loss": 2.6335, + "step": 8952 + }, + { + "epoch": 0.7225405536276329, + "grad_norm": 0.7734495997428894, + "learning_rate": 0.00011723178823842136, + "loss": 2.5951, + "step": 8953 + }, + { + "epoch": 0.722621257364216, + "grad_norm": 0.6792804598808289, + "learning_rate": 0.00011721623727598597, + "loss": 2.5927, + "step": 8954 + }, + { + "epoch": 0.722701961100799, + "grad_norm": 0.7971853017807007, + "learning_rate": 0.00011720068588447697, + "loss": 2.5451, + "step": 8955 + }, + { + "epoch": 0.722782664837382, + "grad_norm": 0.7264395356178284, + "learning_rate": 0.00011718513406428189, + "loss": 2.5769, + "step": 8956 + }, + { + "epoch": 0.7228633685739649, + "grad_norm": 0.6536725759506226, + "learning_rate": 0.0001171695818157884, + "loss": 2.6285, + "step": 8957 + }, + { + "epoch": 0.722944072310548, + "grad_norm": 0.6676235198974609, + "learning_rate": 0.000117154029139384, + "loss": 2.5896, + "step": 8958 + }, + { + "epoch": 0.723024776047131, + "grad_norm": 0.7104088664054871, + "learning_rate": 0.00011713847603545636, + "loss": 2.5606, + "step": 8959 + }, + { + "epoch": 0.723105479783714, + "grad_norm": 0.6646785140037537, + "learning_rate": 0.0001171229225043931, + "loss": 2.5617, + "step": 8960 + }, + { + "epoch": 0.723186183520297, + "grad_norm": 0.7148672342300415, + "learning_rate": 0.00011710736854658186, + "loss": 2.5855, + "step": 8961 + }, + { + "epoch": 0.72326688725688, + "grad_norm": 0.6864955425262451, + "learning_rate": 0.00011709181416241028, + "loss": 2.6098, + "step": 8962 + }, + { + "epoch": 0.723347590993463, + "grad_norm": 0.7049087285995483, + "learning_rate": 0.00011707625935226602, + "loss": 2.506, + "step": 8963 + }, + { + "epoch": 0.723428294730046, + "grad_norm": 0.6419759392738342, + "learning_rate": 0.00011706070411653672, + "loss": 2.5485, + "step": 8964 + }, + { + "epoch": 0.723508998466629, + "grad_norm": 0.6879174709320068, + "learning_rate": 0.00011704514845561007, + "loss": 2.5373, + "step": 8965 + }, + { + "epoch": 0.7235897022032121, + "grad_norm": 0.6473780274391174, + "learning_rate": 0.00011702959236987378, + "loss": 2.5479, + "step": 8966 + }, + { + "epoch": 0.723670405939795, + "grad_norm": 0.6924241185188293, + "learning_rate": 0.00011701403585971553, + "loss": 2.5679, + "step": 8967 + }, + { + "epoch": 0.723751109676378, + "grad_norm": 0.7452483773231506, + "learning_rate": 0.00011699847892552305, + "loss": 2.5043, + "step": 8968 + }, + { + "epoch": 0.723831813412961, + "grad_norm": 0.7517218589782715, + "learning_rate": 0.00011698292156768402, + "loss": 2.5554, + "step": 8969 + }, + { + "epoch": 0.7239125171495441, + "grad_norm": 0.6492432355880737, + "learning_rate": 0.00011696736378658618, + "loss": 2.6091, + "step": 8970 + }, + { + "epoch": 0.723993220886127, + "grad_norm": 0.740093469619751, + "learning_rate": 0.0001169518055826173, + "loss": 2.5629, + "step": 8971 + }, + { + "epoch": 0.72407392462271, + "grad_norm": 0.7186923027038574, + "learning_rate": 0.00011693624695616509, + "loss": 2.5537, + "step": 8972 + }, + { + "epoch": 0.724154628359293, + "grad_norm": 0.7066059112548828, + "learning_rate": 0.00011692068790761737, + "loss": 2.5115, + "step": 8973 + }, + { + "epoch": 0.7242353320958761, + "grad_norm": 0.7031805515289307, + "learning_rate": 0.00011690512843736185, + "loss": 2.596, + "step": 8974 + }, + { + "epoch": 0.7243160358324591, + "grad_norm": 0.7308956384658813, + "learning_rate": 0.00011688956854578635, + "loss": 2.6311, + "step": 8975 + }, + { + "epoch": 0.724396739569042, + "grad_norm": 0.6926052570343018, + "learning_rate": 0.00011687400823327863, + "loss": 2.5659, + "step": 8976 + }, + { + "epoch": 0.724477443305625, + "grad_norm": 0.69638991355896, + "learning_rate": 0.00011685844750022654, + "loss": 2.4792, + "step": 8977 + }, + { + "epoch": 0.724558147042208, + "grad_norm": 0.6858355402946472, + "learning_rate": 0.00011684288634701785, + "loss": 2.5707, + "step": 8978 + }, + { + "epoch": 0.7246388507787911, + "grad_norm": 0.6673639416694641, + "learning_rate": 0.00011682732477404044, + "loss": 2.5627, + "step": 8979 + }, + { + "epoch": 0.7247195545153741, + "grad_norm": 0.7174322605133057, + "learning_rate": 0.00011681176278168206, + "loss": 2.5801, + "step": 8980 + }, + { + "epoch": 0.724800258251957, + "grad_norm": 0.6840930581092834, + "learning_rate": 0.00011679620037033064, + "loss": 2.4994, + "step": 8981 + }, + { + "epoch": 0.72488096198854, + "grad_norm": 0.7179884910583496, + "learning_rate": 0.00011678063754037399, + "loss": 2.6408, + "step": 8982 + }, + { + "epoch": 0.7249616657251231, + "grad_norm": 0.6564825773239136, + "learning_rate": 0.00011676507429219998, + "loss": 2.5412, + "step": 8983 + }, + { + "epoch": 0.7250423694617061, + "grad_norm": 0.7020624876022339, + "learning_rate": 0.00011674951062619652, + "loss": 2.5778, + "step": 8984 + }, + { + "epoch": 0.7251230731982891, + "grad_norm": 0.8061255812644958, + "learning_rate": 0.00011673394654275145, + "loss": 2.5581, + "step": 8985 + }, + { + "epoch": 0.725203776934872, + "grad_norm": 0.7653982043266296, + "learning_rate": 0.00011671838204225267, + "loss": 2.5324, + "step": 8986 + }, + { + "epoch": 0.7252844806714551, + "grad_norm": 0.7168377041816711, + "learning_rate": 0.00011670281712508816, + "loss": 2.6357, + "step": 8987 + }, + { + "epoch": 0.7253651844080381, + "grad_norm": 0.6860470771789551, + "learning_rate": 0.00011668725179164575, + "loss": 2.5367, + "step": 8988 + }, + { + "epoch": 0.7254458881446211, + "grad_norm": 0.7175878286361694, + "learning_rate": 0.00011667168604231342, + "loss": 2.549, + "step": 8989 + }, + { + "epoch": 0.725526591881204, + "grad_norm": 0.7124783992767334, + "learning_rate": 0.00011665611987747907, + "loss": 2.5566, + "step": 8990 + }, + { + "epoch": 0.7256072956177871, + "grad_norm": 0.6575417518615723, + "learning_rate": 0.00011664055329753067, + "loss": 2.5455, + "step": 8991 + }, + { + "epoch": 0.7256879993543701, + "grad_norm": 0.6576877236366272, + "learning_rate": 0.00011662498630285623, + "loss": 2.5596, + "step": 8992 + }, + { + "epoch": 0.7257687030909531, + "grad_norm": 0.7235110402107239, + "learning_rate": 0.00011660941889384365, + "loss": 2.6199, + "step": 8993 + }, + { + "epoch": 0.7258494068275361, + "grad_norm": 0.6623982787132263, + "learning_rate": 0.00011659385107088092, + "loss": 2.5642, + "step": 8994 + }, + { + "epoch": 0.7259301105641192, + "grad_norm": 0.7113857865333557, + "learning_rate": 0.00011657828283435605, + "loss": 2.5631, + "step": 8995 + }, + { + "epoch": 0.7260108143007021, + "grad_norm": 0.7076124548912048, + "learning_rate": 0.00011656271418465702, + "loss": 2.5141, + "step": 8996 + }, + { + "epoch": 0.7260915180372851, + "grad_norm": 0.7534562349319458, + "learning_rate": 0.00011654714512217188, + "loss": 2.5896, + "step": 8997 + }, + { + "epoch": 0.7261722217738681, + "grad_norm": 0.7393170595169067, + "learning_rate": 0.00011653157564728865, + "loss": 2.5848, + "step": 8998 + }, + { + "epoch": 0.7262529255104512, + "grad_norm": 0.6829591989517212, + "learning_rate": 0.0001165160057603953, + "loss": 2.5439, + "step": 8999 + }, + { + "epoch": 0.7263336292470342, + "grad_norm": 0.6527189016342163, + "learning_rate": 0.00011650043546187995, + "loss": 2.5655, + "step": 9000 + }, + { + "epoch": 0.7263336292470342, + "eval_loss": 2.487652063369751, + "eval_runtime": 845.9129, + "eval_samples_per_second": 3.097, + "eval_steps_per_second": 0.517, + "step": 9000 + }, + { + "epoch": 0.7264143329836171, + "grad_norm": 0.6545615196228027, + "learning_rate": 0.00011648486475213058, + "loss": 2.5366, + "step": 9001 + }, + { + "epoch": 0.7264950367202001, + "grad_norm": 0.6854971647262573, + "learning_rate": 0.00011646929363153529, + "loss": 2.5832, + "step": 9002 + }, + { + "epoch": 0.7265757404567832, + "grad_norm": 0.7745552062988281, + "learning_rate": 0.00011645372210048218, + "loss": 2.5854, + "step": 9003 + }, + { + "epoch": 0.7266564441933662, + "grad_norm": 0.7159156203269958, + "learning_rate": 0.00011643815015935928, + "loss": 2.614, + "step": 9004 + }, + { + "epoch": 0.7267371479299491, + "grad_norm": 0.700074315071106, + "learning_rate": 0.00011642257780855475, + "loss": 2.6124, + "step": 9005 + }, + { + "epoch": 0.7268178516665321, + "grad_norm": 0.7367869019508362, + "learning_rate": 0.0001164070050484566, + "loss": 2.5512, + "step": 9006 + }, + { + "epoch": 0.7268985554031152, + "grad_norm": 0.6623905897140503, + "learning_rate": 0.00011639143187945301, + "loss": 2.5724, + "step": 9007 + }, + { + "epoch": 0.7269792591396982, + "grad_norm": 0.7111610770225525, + "learning_rate": 0.0001163758583019321, + "loss": 2.547, + "step": 9008 + }, + { + "epoch": 0.7270599628762812, + "grad_norm": 0.6860959529876709, + "learning_rate": 0.00011636028431628199, + "loss": 2.532, + "step": 9009 + }, + { + "epoch": 0.7271406666128641, + "grad_norm": 0.7606309056282043, + "learning_rate": 0.00011634470992289084, + "loss": 2.5214, + "step": 9010 + }, + { + "epoch": 0.7272213703494472, + "grad_norm": 0.6440508365631104, + "learning_rate": 0.00011632913512214677, + "loss": 2.5554, + "step": 9011 + }, + { + "epoch": 0.7273020740860302, + "grad_norm": 0.6770462393760681, + "learning_rate": 0.00011631355991443796, + "loss": 2.5877, + "step": 9012 + }, + { + "epoch": 0.7273827778226132, + "grad_norm": 0.6419155597686768, + "learning_rate": 0.00011629798430015262, + "loss": 2.5337, + "step": 9013 + }, + { + "epoch": 0.7274634815591962, + "grad_norm": 0.6782121658325195, + "learning_rate": 0.00011628240827967891, + "loss": 2.5152, + "step": 9014 + }, + { + "epoch": 0.7275441852957792, + "grad_norm": 0.6972285509109497, + "learning_rate": 0.00011626683185340501, + "loss": 2.5628, + "step": 9015 + }, + { + "epoch": 0.7276248890323622, + "grad_norm": 0.6823342442512512, + "learning_rate": 0.00011625125502171914, + "loss": 2.5977, + "step": 9016 + }, + { + "epoch": 0.7277055927689452, + "grad_norm": 0.723311722278595, + "learning_rate": 0.0001162356777850095, + "loss": 2.5772, + "step": 9017 + }, + { + "epoch": 0.7277862965055282, + "grad_norm": 0.7395427227020264, + "learning_rate": 0.00011622010014366435, + "loss": 2.6068, + "step": 9018 + }, + { + "epoch": 0.7278670002421113, + "grad_norm": 0.6970974206924438, + "learning_rate": 0.00011620452209807192, + "loss": 2.5577, + "step": 9019 + }, + { + "epoch": 0.7279477039786942, + "grad_norm": 0.6921418309211731, + "learning_rate": 0.0001161889436486204, + "loss": 2.5476, + "step": 9020 + }, + { + "epoch": 0.7280284077152772, + "grad_norm": 0.7243841886520386, + "learning_rate": 0.0001161733647956981, + "loss": 2.579, + "step": 9021 + }, + { + "epoch": 0.7281091114518602, + "grad_norm": 0.7240262627601624, + "learning_rate": 0.0001161577855396933, + "loss": 2.5959, + "step": 9022 + }, + { + "epoch": 0.7281898151884433, + "grad_norm": 0.7215476632118225, + "learning_rate": 0.0001161422058809942, + "loss": 2.5979, + "step": 9023 + }, + { + "epoch": 0.7282705189250263, + "grad_norm": 0.7109708786010742, + "learning_rate": 0.00011612662581998917, + "loss": 2.5912, + "step": 9024 + }, + { + "epoch": 0.7283512226616092, + "grad_norm": 0.6814073920249939, + "learning_rate": 0.00011611104535706645, + "loss": 2.5742, + "step": 9025 + }, + { + "epoch": 0.7284319263981922, + "grad_norm": 0.6788144707679749, + "learning_rate": 0.0001160954644926144, + "loss": 2.5656, + "step": 9026 + }, + { + "epoch": 0.7285126301347752, + "grad_norm": 0.7312989830970764, + "learning_rate": 0.00011607988322702126, + "loss": 2.5877, + "step": 9027 + }, + { + "epoch": 0.7285933338713583, + "grad_norm": 0.6725338697433472, + "learning_rate": 0.0001160643015606754, + "loss": 2.5261, + "step": 9028 + }, + { + "epoch": 0.7286740376079412, + "grad_norm": 0.7439326047897339, + "learning_rate": 0.00011604871949396516, + "loss": 2.603, + "step": 9029 + }, + { + "epoch": 0.7287547413445242, + "grad_norm": 0.7091783285140991, + "learning_rate": 0.00011603313702727889, + "loss": 2.5227, + "step": 9030 + }, + { + "epoch": 0.7288354450811072, + "grad_norm": 0.7474398016929626, + "learning_rate": 0.00011601755416100492, + "loss": 2.616, + "step": 9031 + }, + { + "epoch": 0.7289161488176903, + "grad_norm": 0.6904098987579346, + "learning_rate": 0.00011600197089553162, + "loss": 2.556, + "step": 9032 + }, + { + "epoch": 0.7289968525542733, + "grad_norm": 0.7305783033370972, + "learning_rate": 0.00011598638723124739, + "loss": 2.5633, + "step": 9033 + }, + { + "epoch": 0.7290775562908562, + "grad_norm": 0.6626651883125305, + "learning_rate": 0.00011597080316854062, + "loss": 2.5862, + "step": 9034 + }, + { + "epoch": 0.7291582600274392, + "grad_norm": 0.683102548122406, + "learning_rate": 0.00011595521870779968, + "loss": 2.5629, + "step": 9035 + }, + { + "epoch": 0.7292389637640223, + "grad_norm": 0.7486757636070251, + "learning_rate": 0.00011593963384941295, + "loss": 2.5831, + "step": 9036 + }, + { + "epoch": 0.7293196675006053, + "grad_norm": 0.8059591054916382, + "learning_rate": 0.00011592404859376888, + "loss": 2.6414, + "step": 9037 + }, + { + "epoch": 0.7294003712371883, + "grad_norm": 0.8371721506118774, + "learning_rate": 0.00011590846294125594, + "loss": 2.643, + "step": 9038 + }, + { + "epoch": 0.7294810749737712, + "grad_norm": 0.7216931581497192, + "learning_rate": 0.00011589287689226246, + "loss": 2.6, + "step": 9039 + }, + { + "epoch": 0.7295617787103543, + "grad_norm": 0.6940354704856873, + "learning_rate": 0.00011587729044717701, + "loss": 2.546, + "step": 9040 + }, + { + "epoch": 0.7296424824469373, + "grad_norm": 0.6888829469680786, + "learning_rate": 0.00011586170360638792, + "loss": 2.5878, + "step": 9041 + }, + { + "epoch": 0.7297231861835203, + "grad_norm": 0.6863886117935181, + "learning_rate": 0.00011584611637028373, + "loss": 2.5389, + "step": 9042 + }, + { + "epoch": 0.7298038899201033, + "grad_norm": 0.6670756936073303, + "learning_rate": 0.00011583052873925294, + "loss": 2.5465, + "step": 9043 + }, + { + "epoch": 0.7298845936566863, + "grad_norm": 0.7441220879554749, + "learning_rate": 0.00011581494071368392, + "loss": 2.5679, + "step": 9044 + }, + { + "epoch": 0.7299652973932693, + "grad_norm": 0.7135717272758484, + "learning_rate": 0.0001157993522939653, + "loss": 2.5341, + "step": 9045 + }, + { + "epoch": 0.7300460011298523, + "grad_norm": 0.6837992072105408, + "learning_rate": 0.00011578376348048547, + "loss": 2.5233, + "step": 9046 + }, + { + "epoch": 0.7301267048664353, + "grad_norm": 0.706666886806488, + "learning_rate": 0.00011576817427363302, + "loss": 2.6109, + "step": 9047 + }, + { + "epoch": 0.7302074086030184, + "grad_norm": 0.6856269240379333, + "learning_rate": 0.00011575258467379646, + "loss": 2.5651, + "step": 9048 + }, + { + "epoch": 0.7302881123396013, + "grad_norm": 0.6931480169296265, + "learning_rate": 0.00011573699468136427, + "loss": 2.6031, + "step": 9049 + }, + { + "epoch": 0.7303688160761843, + "grad_norm": 0.6558480858802795, + "learning_rate": 0.00011572140429672508, + "loss": 2.5661, + "step": 9050 + }, + { + "epoch": 0.7304495198127673, + "grad_norm": 0.6468425393104553, + "learning_rate": 0.00011570581352026742, + "loss": 2.5171, + "step": 9051 + }, + { + "epoch": 0.7305302235493504, + "grad_norm": 0.7204702496528625, + "learning_rate": 0.00011569022235237974, + "loss": 2.5861, + "step": 9052 + }, + { + "epoch": 0.7306109272859334, + "grad_norm": 0.7536416053771973, + "learning_rate": 0.00011567463079345078, + "loss": 2.633, + "step": 9053 + }, + { + "epoch": 0.7306916310225163, + "grad_norm": 0.6597960591316223, + "learning_rate": 0.00011565903884386904, + "loss": 2.5327, + "step": 9054 + }, + { + "epoch": 0.7307723347590993, + "grad_norm": 0.689153254032135, + "learning_rate": 0.0001156434465040231, + "loss": 2.5397, + "step": 9055 + }, + { + "epoch": 0.7308530384956824, + "grad_norm": 0.7664844393730164, + "learning_rate": 0.00011562785377430159, + "loss": 2.4852, + "step": 9056 + }, + { + "epoch": 0.7309337422322654, + "grad_norm": 0.7122881412506104, + "learning_rate": 0.0001156122606550931, + "loss": 2.5401, + "step": 9057 + }, + { + "epoch": 0.7310144459688483, + "grad_norm": 0.6937551498413086, + "learning_rate": 0.00011559666714678627, + "loss": 2.5705, + "step": 9058 + }, + { + "epoch": 0.7310951497054313, + "grad_norm": 0.6504047513008118, + "learning_rate": 0.00011558107324976974, + "loss": 2.5638, + "step": 9059 + }, + { + "epoch": 0.7311758534420144, + "grad_norm": 0.7759538888931274, + "learning_rate": 0.0001155654789644321, + "loss": 2.5864, + "step": 9060 + }, + { + "epoch": 0.7312565571785974, + "grad_norm": 0.719859778881073, + "learning_rate": 0.00011554988429116207, + "loss": 2.519, + "step": 9061 + }, + { + "epoch": 0.7313372609151804, + "grad_norm": 0.7159178853034973, + "learning_rate": 0.00011553428923034826, + "loss": 2.5301, + "step": 9062 + }, + { + "epoch": 0.7314179646517633, + "grad_norm": 0.6584001183509827, + "learning_rate": 0.00011551869378237934, + "loss": 2.4716, + "step": 9063 + }, + { + "epoch": 0.7314986683883464, + "grad_norm": 0.6548463702201843, + "learning_rate": 0.00011550309794764405, + "loss": 2.5637, + "step": 9064 + }, + { + "epoch": 0.7315793721249294, + "grad_norm": 0.73887699842453, + "learning_rate": 0.000115487501726531, + "loss": 2.5813, + "step": 9065 + }, + { + "epoch": 0.7316600758615124, + "grad_norm": 0.7856181859970093, + "learning_rate": 0.00011547190511942893, + "loss": 2.592, + "step": 9066 + }, + { + "epoch": 0.7317407795980954, + "grad_norm": 0.7040740847587585, + "learning_rate": 0.00011545630812672654, + "loss": 2.5324, + "step": 9067 + }, + { + "epoch": 0.7318214833346784, + "grad_norm": 0.7316064238548279, + "learning_rate": 0.00011544071074881253, + "loss": 2.5487, + "step": 9068 + }, + { + "epoch": 0.7319021870712614, + "grad_norm": 0.7020413279533386, + "learning_rate": 0.00011542511298607568, + "loss": 2.5179, + "step": 9069 + }, + { + "epoch": 0.7319828908078444, + "grad_norm": 0.672605574131012, + "learning_rate": 0.00011540951483890468, + "loss": 2.5367, + "step": 9070 + }, + { + "epoch": 0.7320635945444274, + "grad_norm": 0.7668856382369995, + "learning_rate": 0.00011539391630768828, + "loss": 2.6089, + "step": 9071 + }, + { + "epoch": 0.7321442982810105, + "grad_norm": 0.6641809940338135, + "learning_rate": 0.00011537831739281524, + "loss": 2.5411, + "step": 9072 + }, + { + "epoch": 0.7322250020175934, + "grad_norm": 0.7142000198364258, + "learning_rate": 0.00011536271809467434, + "loss": 2.5469, + "step": 9073 + }, + { + "epoch": 0.7323057057541764, + "grad_norm": 0.7266140580177307, + "learning_rate": 0.00011534711841365435, + "loss": 2.5565, + "step": 9074 + }, + { + "epoch": 0.7323864094907594, + "grad_norm": 0.6763899326324463, + "learning_rate": 0.00011533151835014407, + "loss": 2.551, + "step": 9075 + }, + { + "epoch": 0.7324671132273425, + "grad_norm": 0.6517418026924133, + "learning_rate": 0.00011531591790453224, + "loss": 2.5415, + "step": 9076 + }, + { + "epoch": 0.7325478169639255, + "grad_norm": 0.6602214574813843, + "learning_rate": 0.00011530031707720772, + "loss": 2.593, + "step": 9077 + }, + { + "epoch": 0.7326285207005084, + "grad_norm": 0.7448844313621521, + "learning_rate": 0.00011528471586855931, + "loss": 2.5598, + "step": 9078 + }, + { + "epoch": 0.7327092244370914, + "grad_norm": 0.7197073698043823, + "learning_rate": 0.00011526911427897579, + "loss": 2.5128, + "step": 9079 + }, + { + "epoch": 0.7327899281736744, + "grad_norm": 0.7245968580245972, + "learning_rate": 0.00011525351230884606, + "loss": 2.5016, + "step": 9080 + }, + { + "epoch": 0.7328706319102575, + "grad_norm": 0.6715837717056274, + "learning_rate": 0.00011523790995855892, + "loss": 2.5469, + "step": 9081 + }, + { + "epoch": 0.7329513356468405, + "grad_norm": 0.7143638730049133, + "learning_rate": 0.00011522230722850325, + "loss": 2.5164, + "step": 9082 + }, + { + "epoch": 0.7330320393834234, + "grad_norm": 0.6809647083282471, + "learning_rate": 0.00011520670411906787, + "loss": 2.6071, + "step": 9083 + }, + { + "epoch": 0.7331127431200064, + "grad_norm": 0.7160956859588623, + "learning_rate": 0.00011519110063064167, + "loss": 2.5346, + "step": 9084 + }, + { + "epoch": 0.7331934468565895, + "grad_norm": 0.6814724802970886, + "learning_rate": 0.00011517549676361357, + "loss": 2.5499, + "step": 9085 + }, + { + "epoch": 0.7332741505931725, + "grad_norm": 0.6914821267127991, + "learning_rate": 0.00011515989251837239, + "loss": 2.5386, + "step": 9086 + }, + { + "epoch": 0.7333548543297554, + "grad_norm": 0.7292554378509521, + "learning_rate": 0.00011514428789530705, + "loss": 2.5642, + "step": 9087 + }, + { + "epoch": 0.7334355580663384, + "grad_norm": 0.6894826292991638, + "learning_rate": 0.00011512868289480647, + "loss": 2.6131, + "step": 9088 + }, + { + "epoch": 0.7335162618029215, + "grad_norm": 0.658770740032196, + "learning_rate": 0.00011511307751725957, + "loss": 2.5594, + "step": 9089 + }, + { + "epoch": 0.7335969655395045, + "grad_norm": 0.7508681416511536, + "learning_rate": 0.0001150974717630553, + "loss": 2.595, + "step": 9090 + }, + { + "epoch": 0.7336776692760875, + "grad_norm": 0.69661545753479, + "learning_rate": 0.00011508186563258256, + "loss": 2.5803, + "step": 9091 + }, + { + "epoch": 0.7337583730126704, + "grad_norm": 0.7277412414550781, + "learning_rate": 0.00011506625912623028, + "loss": 2.5456, + "step": 9092 + }, + { + "epoch": 0.7338390767492535, + "grad_norm": 0.658329963684082, + "learning_rate": 0.00011505065224438745, + "loss": 2.5177, + "step": 9093 + }, + { + "epoch": 0.7339197804858365, + "grad_norm": 0.7277211546897888, + "learning_rate": 0.00011503504498744302, + "loss": 2.553, + "step": 9094 + }, + { + "epoch": 0.7340004842224195, + "grad_norm": 0.7240201830863953, + "learning_rate": 0.00011501943735578598, + "loss": 2.5851, + "step": 9095 + }, + { + "epoch": 0.7340811879590025, + "grad_norm": 0.6565662026405334, + "learning_rate": 0.00011500382934980529, + "loss": 2.5865, + "step": 9096 + }, + { + "epoch": 0.7341618916955855, + "grad_norm": 0.658268392086029, + "learning_rate": 0.00011498822096988995, + "loss": 2.5402, + "step": 9097 + }, + { + "epoch": 0.7342425954321685, + "grad_norm": 0.7305087447166443, + "learning_rate": 0.00011497261221642894, + "loss": 2.5483, + "step": 9098 + }, + { + "epoch": 0.7343232991687515, + "grad_norm": 0.7271504402160645, + "learning_rate": 0.00011495700308981134, + "loss": 2.5303, + "step": 9099 + }, + { + "epoch": 0.7344040029053345, + "grad_norm": 0.70429527759552, + "learning_rate": 0.0001149413935904261, + "loss": 2.5878, + "step": 9100 + }, + { + "epoch": 0.7344847066419176, + "grad_norm": 0.7168769836425781, + "learning_rate": 0.00011492578371866229, + "loss": 2.6017, + "step": 9101 + }, + { + "epoch": 0.7345654103785005, + "grad_norm": 0.7131996154785156, + "learning_rate": 0.00011491017347490891, + "loss": 2.5439, + "step": 9102 + }, + { + "epoch": 0.7346461141150835, + "grad_norm": 0.660321056842804, + "learning_rate": 0.00011489456285955504, + "loss": 2.5236, + "step": 9103 + }, + { + "epoch": 0.7347268178516665, + "grad_norm": 0.6742995977401733, + "learning_rate": 0.00011487895187298977, + "loss": 2.5375, + "step": 9104 + }, + { + "epoch": 0.7348075215882496, + "grad_norm": 0.6380610466003418, + "learning_rate": 0.00011486334051560206, + "loss": 2.5173, + "step": 9105 + }, + { + "epoch": 0.7348882253248326, + "grad_norm": 0.6948198080062866, + "learning_rate": 0.0001148477287877811, + "loss": 2.5247, + "step": 9106 + }, + { + "epoch": 0.7349689290614155, + "grad_norm": 0.7088696360588074, + "learning_rate": 0.00011483211668991591, + "loss": 2.587, + "step": 9107 + }, + { + "epoch": 0.7350496327979985, + "grad_norm": 0.6278921961784363, + "learning_rate": 0.00011481650422239556, + "loss": 2.5652, + "step": 9108 + }, + { + "epoch": 0.7351303365345816, + "grad_norm": 0.6901956796646118, + "learning_rate": 0.00011480089138560926, + "loss": 2.5964, + "step": 9109 + }, + { + "epoch": 0.7352110402711646, + "grad_norm": 0.7264819145202637, + "learning_rate": 0.00011478527817994604, + "loss": 2.5437, + "step": 9110 + }, + { + "epoch": 0.7352917440077475, + "grad_norm": 0.6940708756446838, + "learning_rate": 0.00011476966460579501, + "loss": 2.5761, + "step": 9111 + }, + { + "epoch": 0.7353724477443305, + "grad_norm": 0.689588189125061, + "learning_rate": 0.00011475405066354536, + "loss": 2.5457, + "step": 9112 + }, + { + "epoch": 0.7354531514809136, + "grad_norm": 0.6938436031341553, + "learning_rate": 0.00011473843635358618, + "loss": 2.6026, + "step": 9113 + }, + { + "epoch": 0.7355338552174966, + "grad_norm": 0.7122177481651306, + "learning_rate": 0.00011472282167630663, + "loss": 2.5701, + "step": 9114 + }, + { + "epoch": 0.7356145589540796, + "grad_norm": 0.6667213439941406, + "learning_rate": 0.00011470720663209591, + "loss": 2.5944, + "step": 9115 + }, + { + "epoch": 0.7356952626906625, + "grad_norm": 0.705910861492157, + "learning_rate": 0.00011469159122134314, + "loss": 2.6183, + "step": 9116 + }, + { + "epoch": 0.7357759664272456, + "grad_norm": 0.709937572479248, + "learning_rate": 0.00011467597544443751, + "loss": 2.5153, + "step": 9117 + }, + { + "epoch": 0.7358566701638286, + "grad_norm": 0.6870958805084229, + "learning_rate": 0.00011466035930176822, + "loss": 2.5334, + "step": 9118 + }, + { + "epoch": 0.7359373739004116, + "grad_norm": 0.7274392247200012, + "learning_rate": 0.00011464474279372443, + "loss": 2.5336, + "step": 9119 + }, + { + "epoch": 0.7360180776369946, + "grad_norm": 0.6360952258110046, + "learning_rate": 0.0001146291259206954, + "loss": 2.5604, + "step": 9120 + }, + { + "epoch": 0.7360987813735776, + "grad_norm": 0.7990559935569763, + "learning_rate": 0.00011461350868307028, + "loss": 2.624, + "step": 9121 + }, + { + "epoch": 0.7361794851101606, + "grad_norm": 0.6670079827308655, + "learning_rate": 0.00011459789108123835, + "loss": 2.5761, + "step": 9122 + }, + { + "epoch": 0.7362601888467436, + "grad_norm": 0.6994437575340271, + "learning_rate": 0.00011458227311558877, + "loss": 2.5679, + "step": 9123 + }, + { + "epoch": 0.7363408925833266, + "grad_norm": 0.7428358197212219, + "learning_rate": 0.00011456665478651087, + "loss": 2.5874, + "step": 9124 + }, + { + "epoch": 0.7364215963199097, + "grad_norm": 0.7079486846923828, + "learning_rate": 0.00011455103609439387, + "loss": 2.5999, + "step": 9125 + }, + { + "epoch": 0.7365023000564926, + "grad_norm": 0.646244466304779, + "learning_rate": 0.00011453541703962695, + "loss": 2.5053, + "step": 9126 + }, + { + "epoch": 0.7365830037930756, + "grad_norm": 0.6671318411827087, + "learning_rate": 0.0001145197976225995, + "loss": 2.5277, + "step": 9127 + }, + { + "epoch": 0.7366637075296586, + "grad_norm": 0.7060399055480957, + "learning_rate": 0.00011450417784370072, + "loss": 2.6092, + "step": 9128 + }, + { + "epoch": 0.7367444112662416, + "grad_norm": 0.741547703742981, + "learning_rate": 0.00011448855770331989, + "loss": 2.6121, + "step": 9129 + }, + { + "epoch": 0.7368251150028247, + "grad_norm": 0.710267961025238, + "learning_rate": 0.00011447293720184636, + "loss": 2.5141, + "step": 9130 + }, + { + "epoch": 0.7369058187394076, + "grad_norm": 0.6914308071136475, + "learning_rate": 0.0001144573163396694, + "loss": 2.5489, + "step": 9131 + }, + { + "epoch": 0.7369865224759906, + "grad_norm": 0.7051414847373962, + "learning_rate": 0.0001144416951171783, + "loss": 2.5925, + "step": 9132 + }, + { + "epoch": 0.7370672262125736, + "grad_norm": 0.6765387058258057, + "learning_rate": 0.00011442607353476245, + "loss": 2.5864, + "step": 9133 + }, + { + "epoch": 0.7371479299491567, + "grad_norm": 0.706672191619873, + "learning_rate": 0.00011441045159281108, + "loss": 2.4823, + "step": 9134 + }, + { + "epoch": 0.7372286336857397, + "grad_norm": 0.7534066438674927, + "learning_rate": 0.00011439482929171362, + "loss": 2.5728, + "step": 9135 + }, + { + "epoch": 0.7373093374223226, + "grad_norm": 0.6628777384757996, + "learning_rate": 0.00011437920663185939, + "loss": 2.5538, + "step": 9136 + }, + { + "epoch": 0.7373900411589056, + "grad_norm": 0.6575733423233032, + "learning_rate": 0.00011436358361363773, + "loss": 2.4802, + "step": 9137 + }, + { + "epoch": 0.7374707448954887, + "grad_norm": 0.7629329562187195, + "learning_rate": 0.00011434796023743803, + "loss": 2.6169, + "step": 9138 + }, + { + "epoch": 0.7375514486320717, + "grad_norm": 0.7148225903511047, + "learning_rate": 0.00011433233650364965, + "loss": 2.6335, + "step": 9139 + }, + { + "epoch": 0.7376321523686546, + "grad_norm": 0.705210268497467, + "learning_rate": 0.00011431671241266198, + "loss": 2.6261, + "step": 9140 + }, + { + "epoch": 0.7377128561052376, + "grad_norm": 0.7137441635131836, + "learning_rate": 0.00011430108796486441, + "loss": 2.5021, + "step": 9141 + }, + { + "epoch": 0.7377935598418207, + "grad_norm": 0.6979854702949524, + "learning_rate": 0.00011428546316064635, + "loss": 2.5436, + "step": 9142 + }, + { + "epoch": 0.7378742635784037, + "grad_norm": 0.6568784713745117, + "learning_rate": 0.00011426983800039721, + "loss": 2.5882, + "step": 9143 + }, + { + "epoch": 0.7379549673149867, + "grad_norm": 0.666606605052948, + "learning_rate": 0.00011425421248450638, + "loss": 2.5472, + "step": 9144 + }, + { + "epoch": 0.7380356710515696, + "grad_norm": 0.7240840792655945, + "learning_rate": 0.00011423858661336333, + "loss": 2.6057, + "step": 9145 + }, + { + "epoch": 0.7381163747881527, + "grad_norm": 0.7342149615287781, + "learning_rate": 0.0001142229603873575, + "loss": 2.508, + "step": 9146 + }, + { + "epoch": 0.7381970785247357, + "grad_norm": 0.7089941501617432, + "learning_rate": 0.0001142073338068783, + "loss": 2.6115, + "step": 9147 + }, + { + "epoch": 0.7382777822613187, + "grad_norm": 0.6883555054664612, + "learning_rate": 0.00011419170687231519, + "loss": 2.5254, + "step": 9148 + }, + { + "epoch": 0.7383584859979017, + "grad_norm": 0.6819528937339783, + "learning_rate": 0.00011417607958405765, + "loss": 2.5498, + "step": 9149 + }, + { + "epoch": 0.7384391897344847, + "grad_norm": 0.7348979711532593, + "learning_rate": 0.00011416045194249516, + "loss": 2.5547, + "step": 9150 + }, + { + "epoch": 0.7385198934710677, + "grad_norm": 0.6733320355415344, + "learning_rate": 0.00011414482394801719, + "loss": 2.5985, + "step": 9151 + }, + { + "epoch": 0.7386005972076507, + "grad_norm": 0.714771032333374, + "learning_rate": 0.00011412919560101327, + "loss": 2.571, + "step": 9152 + }, + { + "epoch": 0.7386813009442337, + "grad_norm": 0.7010024189949036, + "learning_rate": 0.0001141135669018728, + "loss": 2.5755, + "step": 9153 + }, + { + "epoch": 0.7387620046808168, + "grad_norm": 0.7014826536178589, + "learning_rate": 0.00011409793785098536, + "loss": 2.6033, + "step": 9154 + }, + { + "epoch": 0.7388427084173997, + "grad_norm": 0.7286051511764526, + "learning_rate": 0.0001140823084487405, + "loss": 2.515, + "step": 9155 + }, + { + "epoch": 0.7389234121539827, + "grad_norm": 0.669365406036377, + "learning_rate": 0.00011406667869552768, + "loss": 2.506, + "step": 9156 + }, + { + "epoch": 0.7390041158905657, + "grad_norm": 0.6886852979660034, + "learning_rate": 0.00011405104859173645, + "loss": 2.6123, + "step": 9157 + }, + { + "epoch": 0.7390848196271488, + "grad_norm": 0.6344162225723267, + "learning_rate": 0.00011403541813775635, + "loss": 2.5483, + "step": 9158 + }, + { + "epoch": 0.7391655233637318, + "grad_norm": 0.7043579816818237, + "learning_rate": 0.00011401978733397694, + "loss": 2.5545, + "step": 9159 + }, + { + "epoch": 0.7392462271003147, + "grad_norm": 0.7960262298583984, + "learning_rate": 0.00011400415618078781, + "loss": 2.5666, + "step": 9160 + }, + { + "epoch": 0.7393269308368977, + "grad_norm": 0.6771546006202698, + "learning_rate": 0.00011398852467857848, + "loss": 2.6016, + "step": 9161 + }, + { + "epoch": 0.7394076345734808, + "grad_norm": 0.6522069573402405, + "learning_rate": 0.00011397289282773855, + "loss": 2.5493, + "step": 9162 + }, + { + "epoch": 0.7394883383100638, + "grad_norm": 0.6804657578468323, + "learning_rate": 0.00011395726062865762, + "loss": 2.5856, + "step": 9163 + }, + { + "epoch": 0.7395690420466468, + "grad_norm": 0.7562841176986694, + "learning_rate": 0.00011394162808172526, + "loss": 2.557, + "step": 9164 + }, + { + "epoch": 0.7396497457832297, + "grad_norm": 0.6464113593101501, + "learning_rate": 0.00011392599518733107, + "loss": 2.5292, + "step": 9165 + }, + { + "epoch": 0.7397304495198128, + "grad_norm": 0.7469549775123596, + "learning_rate": 0.00011391036194586466, + "loss": 2.6168, + "step": 9166 + }, + { + "epoch": 0.7398111532563958, + "grad_norm": 0.7095946669578552, + "learning_rate": 0.00011389472835771572, + "loss": 2.5468, + "step": 9167 + }, + { + "epoch": 0.7398918569929788, + "grad_norm": 0.7376375794410706, + "learning_rate": 0.00011387909442327382, + "loss": 2.5576, + "step": 9168 + }, + { + "epoch": 0.7399725607295617, + "grad_norm": 0.736727774143219, + "learning_rate": 0.00011386346014292859, + "loss": 2.6034, + "step": 9169 + }, + { + "epoch": 0.7400532644661448, + "grad_norm": 0.7026904821395874, + "learning_rate": 0.00011384782551706967, + "loss": 2.5848, + "step": 9170 + }, + { + "epoch": 0.7401339682027278, + "grad_norm": 0.6894888877868652, + "learning_rate": 0.00011383219054608678, + "loss": 2.5475, + "step": 9171 + }, + { + "epoch": 0.7402146719393108, + "grad_norm": 0.6754137277603149, + "learning_rate": 0.00011381655523036954, + "loss": 2.5124, + "step": 9172 + }, + { + "epoch": 0.7402953756758938, + "grad_norm": 0.7935643196105957, + "learning_rate": 0.00011380091957030762, + "loss": 2.5898, + "step": 9173 + }, + { + "epoch": 0.7403760794124769, + "grad_norm": 0.7017118334770203, + "learning_rate": 0.0001137852835662907, + "loss": 2.6139, + "step": 9174 + }, + { + "epoch": 0.7404567831490598, + "grad_norm": 0.7246189117431641, + "learning_rate": 0.00011376964721870847, + "loss": 2.4627, + "step": 9175 + }, + { + "epoch": 0.7405374868856428, + "grad_norm": 0.6835598349571228, + "learning_rate": 0.00011375401052795064, + "loss": 2.5707, + "step": 9176 + }, + { + "epoch": 0.7406181906222258, + "grad_norm": 0.6439787745475769, + "learning_rate": 0.00011373837349440693, + "loss": 2.5161, + "step": 9177 + }, + { + "epoch": 0.7406988943588089, + "grad_norm": 0.7249091267585754, + "learning_rate": 0.00011372273611846704, + "loss": 2.5054, + "step": 9178 + }, + { + "epoch": 0.7407795980953918, + "grad_norm": 0.7653267979621887, + "learning_rate": 0.0001137070984005207, + "loss": 2.6016, + "step": 9179 + }, + { + "epoch": 0.7408603018319748, + "grad_norm": 0.7195165157318115, + "learning_rate": 0.0001136914603409576, + "loss": 2.5931, + "step": 9180 + }, + { + "epoch": 0.7409410055685578, + "grad_norm": 0.7093746662139893, + "learning_rate": 0.00011367582194016756, + "loss": 2.5567, + "step": 9181 + }, + { + "epoch": 0.7410217093051408, + "grad_norm": 0.6868107318878174, + "learning_rate": 0.00011366018319854026, + "loss": 2.5769, + "step": 9182 + }, + { + "epoch": 0.7411024130417239, + "grad_norm": 0.6870261430740356, + "learning_rate": 0.00011364454411646552, + "loss": 2.5418, + "step": 9183 + }, + { + "epoch": 0.7411831167783068, + "grad_norm": 0.7034662365913391, + "learning_rate": 0.00011362890469433306, + "loss": 2.5798, + "step": 9184 + }, + { + "epoch": 0.7412638205148898, + "grad_norm": 0.7200794816017151, + "learning_rate": 0.00011361326493253264, + "loss": 2.5523, + "step": 9185 + }, + { + "epoch": 0.7413445242514728, + "grad_norm": 0.7034540772438049, + "learning_rate": 0.0001135976248314541, + "loss": 2.5107, + "step": 9186 + }, + { + "epoch": 0.7414252279880559, + "grad_norm": 0.7155053019523621, + "learning_rate": 0.00011358198439148721, + "loss": 2.5804, + "step": 9187 + }, + { + "epoch": 0.7415059317246389, + "grad_norm": 0.6965398788452148, + "learning_rate": 0.00011356634361302175, + "loss": 2.5532, + "step": 9188 + }, + { + "epoch": 0.7415866354612218, + "grad_norm": 0.65416419506073, + "learning_rate": 0.00011355070249644755, + "loss": 2.5411, + "step": 9189 + }, + { + "epoch": 0.7416673391978048, + "grad_norm": 0.6798486709594727, + "learning_rate": 0.0001135350610421544, + "loss": 2.4957, + "step": 9190 + }, + { + "epoch": 0.7417480429343879, + "grad_norm": 0.6839874386787415, + "learning_rate": 0.00011351941925053218, + "loss": 2.5745, + "step": 9191 + }, + { + "epoch": 0.7418287466709709, + "grad_norm": 0.7374398708343506, + "learning_rate": 0.00011350377712197068, + "loss": 2.4923, + "step": 9192 + }, + { + "epoch": 0.7419094504075538, + "grad_norm": 0.7517396807670593, + "learning_rate": 0.00011348813465685974, + "loss": 2.538, + "step": 9193 + }, + { + "epoch": 0.7419901541441368, + "grad_norm": 0.6670863628387451, + "learning_rate": 0.00011347249185558926, + "loss": 2.5442, + "step": 9194 + }, + { + "epoch": 0.7420708578807199, + "grad_norm": 0.6508080363273621, + "learning_rate": 0.00011345684871854905, + "loss": 2.6665, + "step": 9195 + }, + { + "epoch": 0.7421515616173029, + "grad_norm": 0.6935258507728577, + "learning_rate": 0.00011344120524612898, + "loss": 2.5388, + "step": 9196 + }, + { + "epoch": 0.7422322653538859, + "grad_norm": 0.696067750453949, + "learning_rate": 0.00011342556143871897, + "loss": 2.574, + "step": 9197 + }, + { + "epoch": 0.7423129690904688, + "grad_norm": 0.7486966252326965, + "learning_rate": 0.00011340991729670882, + "loss": 2.5924, + "step": 9198 + }, + { + "epoch": 0.7423936728270519, + "grad_norm": 0.676407516002655, + "learning_rate": 0.00011339427282048854, + "loss": 2.5907, + "step": 9199 + }, + { + "epoch": 0.7424743765636349, + "grad_norm": 0.7241318225860596, + "learning_rate": 0.00011337862801044792, + "loss": 2.5685, + "step": 9200 + }, + { + "epoch": 0.7425550803002179, + "grad_norm": 0.7012883424758911, + "learning_rate": 0.00011336298286697692, + "loss": 2.56, + "step": 9201 + }, + { + "epoch": 0.7426357840368009, + "grad_norm": 0.7313060164451599, + "learning_rate": 0.0001133473373904655, + "loss": 2.632, + "step": 9202 + }, + { + "epoch": 0.742716487773384, + "grad_norm": 0.6829206943511963, + "learning_rate": 0.00011333169158130353, + "loss": 2.5006, + "step": 9203 + }, + { + "epoch": 0.7427971915099669, + "grad_norm": 0.7324578166007996, + "learning_rate": 0.00011331604543988093, + "loss": 2.5004, + "step": 9204 + }, + { + "epoch": 0.7428778952465499, + "grad_norm": 0.6761097311973572, + "learning_rate": 0.00011330039896658766, + "loss": 2.5516, + "step": 9205 + }, + { + "epoch": 0.7429585989831329, + "grad_norm": 0.6909754276275635, + "learning_rate": 0.00011328475216181369, + "loss": 2.5273, + "step": 9206 + }, + { + "epoch": 0.743039302719716, + "grad_norm": 0.6420674324035645, + "learning_rate": 0.00011326910502594899, + "loss": 2.5507, + "step": 9207 + }, + { + "epoch": 0.7431200064562989, + "grad_norm": 0.6442455053329468, + "learning_rate": 0.0001132534575593835, + "loss": 2.542, + "step": 9208 + }, + { + "epoch": 0.7432007101928819, + "grad_norm": 0.7053101658821106, + "learning_rate": 0.0001132378097625072, + "loss": 2.5116, + "step": 9209 + }, + { + "epoch": 0.7432814139294649, + "grad_norm": 0.7570765614509583, + "learning_rate": 0.00011322216163571007, + "loss": 2.5576, + "step": 9210 + }, + { + "epoch": 0.743362117666048, + "grad_norm": 0.6937675476074219, + "learning_rate": 0.00011320651317938214, + "loss": 2.6212, + "step": 9211 + }, + { + "epoch": 0.743442821402631, + "grad_norm": 0.6741313934326172, + "learning_rate": 0.00011319086439391333, + "loss": 2.5723, + "step": 9212 + }, + { + "epoch": 0.7435235251392139, + "grad_norm": 0.711358904838562, + "learning_rate": 0.00011317521527969374, + "loss": 2.5713, + "step": 9213 + }, + { + "epoch": 0.7436042288757969, + "grad_norm": 0.7443268895149231, + "learning_rate": 0.00011315956583711331, + "loss": 2.5301, + "step": 9214 + }, + { + "epoch": 0.74368493261238, + "grad_norm": 0.7001742720603943, + "learning_rate": 0.00011314391606656212, + "loss": 2.5545, + "step": 9215 + }, + { + "epoch": 0.743765636348963, + "grad_norm": 0.7294990420341492, + "learning_rate": 0.00011312826596843019, + "loss": 2.5897, + "step": 9216 + }, + { + "epoch": 0.743846340085546, + "grad_norm": 0.706924319267273, + "learning_rate": 0.00011311261554310753, + "loss": 2.6477, + "step": 9217 + }, + { + "epoch": 0.7439270438221289, + "grad_norm": 0.7065039277076721, + "learning_rate": 0.00011309696479098423, + "loss": 2.5326, + "step": 9218 + }, + { + "epoch": 0.744007747558712, + "grad_norm": 0.6502599716186523, + "learning_rate": 0.00011308131371245037, + "loss": 2.5833, + "step": 9219 + }, + { + "epoch": 0.744088451295295, + "grad_norm": 0.7135158181190491, + "learning_rate": 0.00011306566230789592, + "loss": 2.5686, + "step": 9220 + }, + { + "epoch": 0.744169155031878, + "grad_norm": 0.7239195108413696, + "learning_rate": 0.00011305001057771101, + "loss": 2.6303, + "step": 9221 + }, + { + "epoch": 0.744249858768461, + "grad_norm": 0.6442604660987854, + "learning_rate": 0.00011303435852228574, + "loss": 2.5495, + "step": 9222 + }, + { + "epoch": 0.744330562505044, + "grad_norm": 0.6700316071510315, + "learning_rate": 0.0001130187061420102, + "loss": 2.5575, + "step": 9223 + }, + { + "epoch": 0.744411266241627, + "grad_norm": 0.7532816529273987, + "learning_rate": 0.00011300305343727446, + "loss": 2.5174, + "step": 9224 + }, + { + "epoch": 0.74449196997821, + "grad_norm": 0.7614738941192627, + "learning_rate": 0.00011298740040846862, + "loss": 2.5995, + "step": 9225 + }, + { + "epoch": 0.744572673714793, + "grad_norm": 0.6781208515167236, + "learning_rate": 0.00011297174705598283, + "loss": 2.5225, + "step": 9226 + }, + { + "epoch": 0.744653377451376, + "grad_norm": 0.680525541305542, + "learning_rate": 0.0001129560933802072, + "loss": 2.5844, + "step": 9227 + }, + { + "epoch": 0.744734081187959, + "grad_norm": 0.7196657657623291, + "learning_rate": 0.00011294043938153185, + "loss": 2.564, + "step": 9228 + }, + { + "epoch": 0.744814784924542, + "grad_norm": 0.6997412443161011, + "learning_rate": 0.00011292478506034694, + "loss": 2.6486, + "step": 9229 + }, + { + "epoch": 0.744895488661125, + "grad_norm": 0.7438939809799194, + "learning_rate": 0.00011290913041704256, + "loss": 2.5667, + "step": 9230 + }, + { + "epoch": 0.744976192397708, + "grad_norm": 0.7391374707221985, + "learning_rate": 0.00011289347545200892, + "loss": 2.5974, + "step": 9231 + }, + { + "epoch": 0.745056896134291, + "grad_norm": 0.7845481634140015, + "learning_rate": 0.0001128778201656362, + "loss": 2.5168, + "step": 9232 + }, + { + "epoch": 0.745137599870874, + "grad_norm": 0.728712797164917, + "learning_rate": 0.00011286216455831449, + "loss": 2.5241, + "step": 9233 + }, + { + "epoch": 0.745218303607457, + "grad_norm": 0.7310191988945007, + "learning_rate": 0.00011284650863043407, + "loss": 2.5777, + "step": 9234 + }, + { + "epoch": 0.74529900734404, + "grad_norm": 0.6661474704742432, + "learning_rate": 0.00011283085238238503, + "loss": 2.5471, + "step": 9235 + }, + { + "epoch": 0.7453797110806231, + "grad_norm": 0.7697983384132385, + "learning_rate": 0.00011281519581455761, + "loss": 2.587, + "step": 9236 + }, + { + "epoch": 0.745460414817206, + "grad_norm": 0.7336567640304565, + "learning_rate": 0.00011279953892734203, + "loss": 2.5756, + "step": 9237 + }, + { + "epoch": 0.745541118553789, + "grad_norm": 0.6192059516906738, + "learning_rate": 0.00011278388172112848, + "loss": 2.5038, + "step": 9238 + }, + { + "epoch": 0.745621822290372, + "grad_norm": 0.7180300354957581, + "learning_rate": 0.00011276822419630719, + "loss": 2.5469, + "step": 9239 + }, + { + "epoch": 0.7457025260269551, + "grad_norm": 0.7583367824554443, + "learning_rate": 0.00011275256635326837, + "loss": 2.6274, + "step": 9240 + }, + { + "epoch": 0.7457832297635381, + "grad_norm": 0.6848096251487732, + "learning_rate": 0.00011273690819240221, + "loss": 2.5117, + "step": 9241 + }, + { + "epoch": 0.745863933500121, + "grad_norm": 0.6830503344535828, + "learning_rate": 0.00011272124971409907, + "loss": 2.5114, + "step": 9242 + }, + { + "epoch": 0.745944637236704, + "grad_norm": 0.780240535736084, + "learning_rate": 0.0001127055909187491, + "loss": 2.6432, + "step": 9243 + }, + { + "epoch": 0.7460253409732871, + "grad_norm": 0.7421274185180664, + "learning_rate": 0.00011268993180674261, + "loss": 2.5723, + "step": 9244 + }, + { + "epoch": 0.7461060447098701, + "grad_norm": 0.6695685386657715, + "learning_rate": 0.00011267427237846986, + "loss": 2.5335, + "step": 9245 + }, + { + "epoch": 0.746186748446453, + "grad_norm": 0.8390316963195801, + "learning_rate": 0.00011265861263432104, + "loss": 2.5125, + "step": 9246 + }, + { + "epoch": 0.746267452183036, + "grad_norm": 0.7030535936355591, + "learning_rate": 0.00011264295257468658, + "loss": 2.5986, + "step": 9247 + }, + { + "epoch": 0.7463481559196191, + "grad_norm": 0.6754253506660461, + "learning_rate": 0.00011262729219995669, + "loss": 2.5067, + "step": 9248 + }, + { + "epoch": 0.7464288596562021, + "grad_norm": 0.6809592843055725, + "learning_rate": 0.00011261163151052163, + "loss": 2.5359, + "step": 9249 + }, + { + "epoch": 0.7465095633927851, + "grad_norm": 0.6546878218650818, + "learning_rate": 0.00011259597050677178, + "loss": 2.5357, + "step": 9250 + }, + { + "epoch": 0.746590267129368, + "grad_norm": 0.6514731645584106, + "learning_rate": 0.00011258030918909739, + "loss": 2.5591, + "step": 9251 + }, + { + "epoch": 0.7466709708659511, + "grad_norm": 0.6981258392333984, + "learning_rate": 0.0001125646475578888, + "loss": 2.6171, + "step": 9252 + }, + { + "epoch": 0.7467516746025341, + "grad_norm": 0.6763784885406494, + "learning_rate": 0.00011254898561353639, + "loss": 2.5455, + "step": 9253 + }, + { + "epoch": 0.7468323783391171, + "grad_norm": 0.6241726279258728, + "learning_rate": 0.00011253332335643043, + "loss": 2.6073, + "step": 9254 + }, + { + "epoch": 0.7469130820757001, + "grad_norm": 0.6810312271118164, + "learning_rate": 0.00011251766078696132, + "loss": 2.5285, + "step": 9255 + }, + { + "epoch": 0.7469937858122832, + "grad_norm": 0.6603971123695374, + "learning_rate": 0.00011250199790551934, + "loss": 2.5985, + "step": 9256 + }, + { + "epoch": 0.7470744895488661, + "grad_norm": 0.69618159532547, + "learning_rate": 0.0001124863347124949, + "loss": 2.5728, + "step": 9257 + }, + { + "epoch": 0.7471551932854491, + "grad_norm": 0.6878889203071594, + "learning_rate": 0.00011247067120827837, + "loss": 2.5459, + "step": 9258 + }, + { + "epoch": 0.7472358970220321, + "grad_norm": 0.6613149046897888, + "learning_rate": 0.00011245500739326011, + "loss": 2.6559, + "step": 9259 + }, + { + "epoch": 0.7473166007586152, + "grad_norm": 0.6397448778152466, + "learning_rate": 0.00011243934326783053, + "loss": 2.5712, + "step": 9260 + }, + { + "epoch": 0.7473973044951981, + "grad_norm": 0.6804259419441223, + "learning_rate": 0.00011242367883237996, + "loss": 2.6143, + "step": 9261 + }, + { + "epoch": 0.7474780082317811, + "grad_norm": 0.8029066324234009, + "learning_rate": 0.00011240801408729884, + "loss": 2.5702, + "step": 9262 + }, + { + "epoch": 0.7475587119683641, + "grad_norm": 0.7086285948753357, + "learning_rate": 0.00011239234903297761, + "loss": 2.6113, + "step": 9263 + }, + { + "epoch": 0.7476394157049472, + "grad_norm": 0.6980452537536621, + "learning_rate": 0.00011237668366980665, + "loss": 2.6355, + "step": 9264 + }, + { + "epoch": 0.7477201194415302, + "grad_norm": 0.6906906962394714, + "learning_rate": 0.00011236101799817636, + "loss": 2.5605, + "step": 9265 + }, + { + "epoch": 0.7478008231781131, + "grad_norm": 0.7412894368171692, + "learning_rate": 0.00011234535201847716, + "loss": 2.6073, + "step": 9266 + }, + { + "epoch": 0.7478815269146961, + "grad_norm": 0.6949330568313599, + "learning_rate": 0.00011232968573109955, + "loss": 2.5623, + "step": 9267 + }, + { + "epoch": 0.7479622306512792, + "grad_norm": 0.6916515827178955, + "learning_rate": 0.00011231401913643393, + "loss": 2.5348, + "step": 9268 + }, + { + "epoch": 0.7480429343878622, + "grad_norm": 0.7576180696487427, + "learning_rate": 0.0001122983522348708, + "loss": 2.5968, + "step": 9269 + }, + { + "epoch": 0.7481236381244452, + "grad_norm": 0.6734197735786438, + "learning_rate": 0.00011228268502680052, + "loss": 2.5185, + "step": 9270 + }, + { + "epoch": 0.7482043418610281, + "grad_norm": 0.6952544450759888, + "learning_rate": 0.00011226701751261367, + "loss": 2.57, + "step": 9271 + }, + { + "epoch": 0.7482850455976112, + "grad_norm": 0.6504654884338379, + "learning_rate": 0.00011225134969270068, + "loss": 2.5677, + "step": 9272 + }, + { + "epoch": 0.7483657493341942, + "grad_norm": 0.6843643188476562, + "learning_rate": 0.00011223568156745198, + "loss": 2.5686, + "step": 9273 + }, + { + "epoch": 0.7484464530707772, + "grad_norm": 0.6786371469497681, + "learning_rate": 0.00011222001313725816, + "loss": 2.5024, + "step": 9274 + }, + { + "epoch": 0.7485271568073602, + "grad_norm": 0.6431117057800293, + "learning_rate": 0.00011220434440250967, + "loss": 2.5206, + "step": 9275 + }, + { + "epoch": 0.7486078605439432, + "grad_norm": 0.699547290802002, + "learning_rate": 0.000112188675363597, + "loss": 2.5974, + "step": 9276 + }, + { + "epoch": 0.7486885642805262, + "grad_norm": 0.6870436072349548, + "learning_rate": 0.00011217300602091067, + "loss": 2.5303, + "step": 9277 + }, + { + "epoch": 0.7487692680171092, + "grad_norm": 0.7032173871994019, + "learning_rate": 0.0001121573363748412, + "loss": 2.5045, + "step": 9278 + }, + { + "epoch": 0.7488499717536922, + "grad_norm": 0.6890417337417603, + "learning_rate": 0.00011214166642577917, + "loss": 2.5945, + "step": 9279 + }, + { + "epoch": 0.7489306754902753, + "grad_norm": 0.7257806062698364, + "learning_rate": 0.00011212599617411506, + "loss": 2.6013, + "step": 9280 + }, + { + "epoch": 0.7490113792268582, + "grad_norm": 0.722561240196228, + "learning_rate": 0.0001121103256202394, + "loss": 2.5809, + "step": 9281 + }, + { + "epoch": 0.7490920829634412, + "grad_norm": 0.7360994219779968, + "learning_rate": 0.00011209465476454277, + "loss": 2.5036, + "step": 9282 + }, + { + "epoch": 0.7491727867000242, + "grad_norm": 0.6561676263809204, + "learning_rate": 0.00011207898360741574, + "loss": 2.5302, + "step": 9283 + }, + { + "epoch": 0.7492534904366072, + "grad_norm": 0.7454147338867188, + "learning_rate": 0.00011206331214924887, + "loss": 2.5511, + "step": 9284 + }, + { + "epoch": 0.7493341941731902, + "grad_norm": 0.7085482478141785, + "learning_rate": 0.00011204764039043275, + "loss": 2.5743, + "step": 9285 + }, + { + "epoch": 0.7494148979097732, + "grad_norm": 0.691872775554657, + "learning_rate": 0.0001120319683313579, + "loss": 2.5414, + "step": 9286 + }, + { + "epoch": 0.7494956016463562, + "grad_norm": 0.6661050915718079, + "learning_rate": 0.00011201629597241496, + "loss": 2.5418, + "step": 9287 + }, + { + "epoch": 0.7495763053829392, + "grad_norm": 0.7440990805625916, + "learning_rate": 0.00011200062331399452, + "loss": 2.5543, + "step": 9288 + }, + { + "epoch": 0.7496570091195223, + "grad_norm": 0.6655303835868835, + "learning_rate": 0.00011198495035648715, + "loss": 2.5629, + "step": 9289 + }, + { + "epoch": 0.7497377128561052, + "grad_norm": 0.7550996541976929, + "learning_rate": 0.00011196927710028353, + "loss": 2.5376, + "step": 9290 + }, + { + "epoch": 0.7498184165926882, + "grad_norm": 0.692915678024292, + "learning_rate": 0.00011195360354577422, + "loss": 2.4661, + "step": 9291 + }, + { + "epoch": 0.7498991203292712, + "grad_norm": 0.7572253346443176, + "learning_rate": 0.00011193792969334985, + "loss": 2.5641, + "step": 9292 + }, + { + "epoch": 0.7499798240658543, + "grad_norm": 0.6550531387329102, + "learning_rate": 0.00011192225554340107, + "loss": 2.5591, + "step": 9293 + }, + { + "epoch": 0.7500605278024373, + "grad_norm": 0.677130401134491, + "learning_rate": 0.0001119065810963185, + "loss": 2.5859, + "step": 9294 + }, + { + "epoch": 0.7501412315390202, + "grad_norm": 0.680673360824585, + "learning_rate": 0.00011189090635249287, + "loss": 2.5343, + "step": 9295 + }, + { + "epoch": 0.7502219352756032, + "grad_norm": 0.7574957609176636, + "learning_rate": 0.00011187523131231472, + "loss": 2.5966, + "step": 9296 + }, + { + "epoch": 0.7503026390121863, + "grad_norm": 0.7099971175193787, + "learning_rate": 0.00011185955597617474, + "loss": 2.5547, + "step": 9297 + }, + { + "epoch": 0.7503833427487693, + "grad_norm": 0.7153162956237793, + "learning_rate": 0.00011184388034446367, + "loss": 2.5986, + "step": 9298 + }, + { + "epoch": 0.7504640464853523, + "grad_norm": 0.7154852747917175, + "learning_rate": 0.00011182820441757212, + "loss": 2.5214, + "step": 9299 + }, + { + "epoch": 0.7505447502219352, + "grad_norm": 0.6899208426475525, + "learning_rate": 0.00011181252819589081, + "loss": 2.5026, + "step": 9300 + }, + { + "epoch": 0.7506254539585183, + "grad_norm": 0.6719048023223877, + "learning_rate": 0.00011179685167981041, + "loss": 2.5915, + "step": 9301 + }, + { + "epoch": 0.7507061576951013, + "grad_norm": 0.6664413213729858, + "learning_rate": 0.00011178117486972164, + "loss": 2.5479, + "step": 9302 + }, + { + "epoch": 0.7507868614316843, + "grad_norm": 0.7433286905288696, + "learning_rate": 0.00011176549776601517, + "loss": 2.5941, + "step": 9303 + }, + { + "epoch": 0.7508675651682672, + "grad_norm": 0.7868518233299255, + "learning_rate": 0.00011174982036908177, + "loss": 2.5537, + "step": 9304 + }, + { + "epoch": 0.7509482689048503, + "grad_norm": 0.7037336826324463, + "learning_rate": 0.0001117341426793121, + "loss": 2.568, + "step": 9305 + }, + { + "epoch": 0.7510289726414333, + "grad_norm": 0.6630405783653259, + "learning_rate": 0.00011171846469709697, + "loss": 2.4906, + "step": 9306 + }, + { + "epoch": 0.7511096763780163, + "grad_norm": 0.7398669719696045, + "learning_rate": 0.00011170278642282701, + "loss": 2.574, + "step": 9307 + }, + { + "epoch": 0.7511903801145993, + "grad_norm": 0.7557641267776489, + "learning_rate": 0.00011168710785689304, + "loss": 2.5237, + "step": 9308 + }, + { + "epoch": 0.7512710838511824, + "grad_norm": 0.6883708238601685, + "learning_rate": 0.00011167142899968581, + "loss": 2.5643, + "step": 9309 + }, + { + "epoch": 0.7513517875877653, + "grad_norm": 0.6623669862747192, + "learning_rate": 0.00011165574985159606, + "loss": 2.5319, + "step": 9310 + }, + { + "epoch": 0.7514324913243483, + "grad_norm": 0.6938778758049011, + "learning_rate": 0.00011164007041301454, + "loss": 2.5083, + "step": 9311 + }, + { + "epoch": 0.7515131950609313, + "grad_norm": 0.718534529209137, + "learning_rate": 0.00011162439068433204, + "loss": 2.4791, + "step": 9312 + }, + { + "epoch": 0.7515938987975144, + "grad_norm": 0.672113299369812, + "learning_rate": 0.00011160871066593934, + "loss": 2.5264, + "step": 9313 + }, + { + "epoch": 0.7516746025340973, + "grad_norm": 0.6854343414306641, + "learning_rate": 0.00011159303035822723, + "loss": 2.5734, + "step": 9314 + }, + { + "epoch": 0.7517553062706803, + "grad_norm": 0.6494589447975159, + "learning_rate": 0.0001115773497615865, + "loss": 2.5564, + "step": 9315 + }, + { + "epoch": 0.7518360100072633, + "grad_norm": 0.7219608426094055, + "learning_rate": 0.00011156166887640793, + "loss": 2.6049, + "step": 9316 + }, + { + "epoch": 0.7519167137438464, + "grad_norm": 0.6892502903938293, + "learning_rate": 0.00011154598770308236, + "loss": 2.5333, + "step": 9317 + }, + { + "epoch": 0.7519974174804294, + "grad_norm": 0.6670175790786743, + "learning_rate": 0.0001115303062420006, + "loss": 2.5882, + "step": 9318 + }, + { + "epoch": 0.7520781212170123, + "grad_norm": 0.7367776036262512, + "learning_rate": 0.00011151462449355347, + "loss": 2.5634, + "step": 9319 + }, + { + "epoch": 0.7521588249535953, + "grad_norm": 0.6971952319145203, + "learning_rate": 0.00011149894245813182, + "loss": 2.5323, + "step": 9320 + }, + { + "epoch": 0.7522395286901784, + "grad_norm": 0.6555755734443665, + "learning_rate": 0.00011148326013612642, + "loss": 2.5597, + "step": 9321 + }, + { + "epoch": 0.7523202324267614, + "grad_norm": 0.7004384994506836, + "learning_rate": 0.00011146757752792819, + "loss": 2.4761, + "step": 9322 + }, + { + "epoch": 0.7524009361633444, + "grad_norm": 0.7151978015899658, + "learning_rate": 0.00011145189463392791, + "loss": 2.5825, + "step": 9323 + }, + { + "epoch": 0.7524816398999273, + "grad_norm": 0.7176918387413025, + "learning_rate": 0.00011143621145451653, + "loss": 2.6112, + "step": 9324 + }, + { + "epoch": 0.7525623436365104, + "grad_norm": 0.7156146168708801, + "learning_rate": 0.00011142052799008487, + "loss": 2.5293, + "step": 9325 + }, + { + "epoch": 0.7526430473730934, + "grad_norm": 0.7360113263130188, + "learning_rate": 0.00011140484424102375, + "loss": 2.5703, + "step": 9326 + }, + { + "epoch": 0.7527237511096764, + "grad_norm": 0.65630042552948, + "learning_rate": 0.00011138916020772414, + "loss": 2.5224, + "step": 9327 + }, + { + "epoch": 0.7528044548462594, + "grad_norm": 0.7088161110877991, + "learning_rate": 0.00011137347589057687, + "loss": 2.6673, + "step": 9328 + }, + { + "epoch": 0.7528851585828424, + "grad_norm": 0.7335243821144104, + "learning_rate": 0.00011135779128997283, + "loss": 2.5693, + "step": 9329 + }, + { + "epoch": 0.7529658623194254, + "grad_norm": 0.7166211605072021, + "learning_rate": 0.00011134210640630298, + "loss": 2.5612, + "step": 9330 + }, + { + "epoch": 0.7530465660560084, + "grad_norm": 0.7324960231781006, + "learning_rate": 0.00011132642123995816, + "loss": 2.5682, + "step": 9331 + }, + { + "epoch": 0.7531272697925914, + "grad_norm": 0.7133917808532715, + "learning_rate": 0.00011131073579132936, + "loss": 2.6131, + "step": 9332 + }, + { + "epoch": 0.7532079735291743, + "grad_norm": 0.678741455078125, + "learning_rate": 0.0001112950500608074, + "loss": 2.6109, + "step": 9333 + }, + { + "epoch": 0.7532886772657574, + "grad_norm": 0.7000784277915955, + "learning_rate": 0.0001112793640487833, + "loss": 2.5087, + "step": 9334 + }, + { + "epoch": 0.7533693810023404, + "grad_norm": 0.719976544380188, + "learning_rate": 0.00011126367775564795, + "loss": 2.4665, + "step": 9335 + }, + { + "epoch": 0.7534500847389234, + "grad_norm": 0.7127155065536499, + "learning_rate": 0.00011124799118179232, + "loss": 2.5254, + "step": 9336 + }, + { + "epoch": 0.7535307884755064, + "grad_norm": 0.6306474804878235, + "learning_rate": 0.00011123230432760734, + "loss": 2.5487, + "step": 9337 + }, + { + "epoch": 0.7536114922120895, + "grad_norm": 0.667019784450531, + "learning_rate": 0.00011121661719348397, + "loss": 2.5576, + "step": 9338 + }, + { + "epoch": 0.7536921959486724, + "grad_norm": 0.6869673132896423, + "learning_rate": 0.00011120092977981318, + "loss": 2.544, + "step": 9339 + }, + { + "epoch": 0.7537728996852554, + "grad_norm": 0.6688670516014099, + "learning_rate": 0.00011118524208698596, + "loss": 2.6017, + "step": 9340 + }, + { + "epoch": 0.7538536034218384, + "grad_norm": 0.6717860102653503, + "learning_rate": 0.00011116955411539325, + "loss": 2.5571, + "step": 9341 + }, + { + "epoch": 0.7539343071584215, + "grad_norm": 0.7113999724388123, + "learning_rate": 0.00011115386586542604, + "loss": 2.5684, + "step": 9342 + }, + { + "epoch": 0.7540150108950044, + "grad_norm": 0.6687907576560974, + "learning_rate": 0.00011113817733747536, + "loss": 2.548, + "step": 9343 + }, + { + "epoch": 0.7540957146315874, + "grad_norm": 0.6828920841217041, + "learning_rate": 0.00011112248853193219, + "loss": 2.5544, + "step": 9344 + }, + { + "epoch": 0.7541764183681704, + "grad_norm": 0.6793262362480164, + "learning_rate": 0.00011110679944918749, + "loss": 2.4655, + "step": 9345 + }, + { + "epoch": 0.7542571221047535, + "grad_norm": 0.6812230348587036, + "learning_rate": 0.00011109111008963235, + "loss": 2.5473, + "step": 9346 + }, + { + "epoch": 0.7543378258413365, + "grad_norm": 0.6838300824165344, + "learning_rate": 0.00011107542045365775, + "loss": 2.5248, + "step": 9347 + }, + { + "epoch": 0.7544185295779194, + "grad_norm": 0.7101932764053345, + "learning_rate": 0.0001110597305416547, + "loss": 2.5235, + "step": 9348 + }, + { + "epoch": 0.7544992333145024, + "grad_norm": 0.7136144042015076, + "learning_rate": 0.0001110440403540143, + "loss": 2.5592, + "step": 9349 + }, + { + "epoch": 0.7545799370510855, + "grad_norm": 0.6673154234886169, + "learning_rate": 0.00011102834989112751, + "loss": 2.4962, + "step": 9350 + }, + { + "epoch": 0.7546606407876685, + "grad_norm": 0.6849049925804138, + "learning_rate": 0.00011101265915338544, + "loss": 2.5793, + "step": 9351 + }, + { + "epoch": 0.7547413445242515, + "grad_norm": 0.7239733338356018, + "learning_rate": 0.0001109969681411791, + "loss": 2.5556, + "step": 9352 + }, + { + "epoch": 0.7548220482608344, + "grad_norm": 0.6738215684890747, + "learning_rate": 0.00011098127685489955, + "loss": 2.6181, + "step": 9353 + }, + { + "epoch": 0.7549027519974175, + "grad_norm": 0.6212114095687866, + "learning_rate": 0.00011096558529493787, + "loss": 2.5509, + "step": 9354 + }, + { + "epoch": 0.7549834557340005, + "grad_norm": 0.6801952123641968, + "learning_rate": 0.00011094989346168517, + "loss": 2.6454, + "step": 9355 + }, + { + "epoch": 0.7550641594705835, + "grad_norm": 0.6605944037437439, + "learning_rate": 0.0001109342013555325, + "loss": 2.5218, + "step": 9356 + }, + { + "epoch": 0.7551448632071665, + "grad_norm": 0.6486438512802124, + "learning_rate": 0.00011091850897687096, + "loss": 2.5431, + "step": 9357 + }, + { + "epoch": 0.7552255669437495, + "grad_norm": 0.6701794266700745, + "learning_rate": 0.0001109028163260916, + "loss": 2.563, + "step": 9358 + }, + { + "epoch": 0.7553062706803325, + "grad_norm": 0.6486446261405945, + "learning_rate": 0.00011088712340358555, + "loss": 2.5147, + "step": 9359 + }, + { + "epoch": 0.7553869744169155, + "grad_norm": 0.695197582244873, + "learning_rate": 0.00011087143020974396, + "loss": 2.5707, + "step": 9360 + }, + { + "epoch": 0.7554676781534985, + "grad_norm": 0.6910821199417114, + "learning_rate": 0.00011085573674495791, + "loss": 2.5797, + "step": 9361 + }, + { + "epoch": 0.7555483818900816, + "grad_norm": 0.7084208726882935, + "learning_rate": 0.00011084004300961852, + "loss": 2.5362, + "step": 9362 + }, + { + "epoch": 0.7556290856266645, + "grad_norm": 0.6750916242599487, + "learning_rate": 0.00011082434900411691, + "loss": 2.5554, + "step": 9363 + }, + { + "epoch": 0.7557097893632475, + "grad_norm": 0.6711466908454895, + "learning_rate": 0.0001108086547288442, + "loss": 2.5577, + "step": 9364 + }, + { + "epoch": 0.7557904930998305, + "grad_norm": 0.7267118096351624, + "learning_rate": 0.00011079296018419163, + "loss": 2.5422, + "step": 9365 + }, + { + "epoch": 0.7558711968364136, + "grad_norm": 0.692730188369751, + "learning_rate": 0.00011077726537055021, + "loss": 2.5281, + "step": 9366 + }, + { + "epoch": 0.7559519005729965, + "grad_norm": 0.7071926593780518, + "learning_rate": 0.00011076157028831122, + "loss": 2.5273, + "step": 9367 + }, + { + "epoch": 0.7560326043095795, + "grad_norm": 0.7662521600723267, + "learning_rate": 0.00011074587493786574, + "loss": 2.5433, + "step": 9368 + }, + { + "epoch": 0.7561133080461625, + "grad_norm": 0.7173436880111694, + "learning_rate": 0.00011073017931960496, + "loss": 2.579, + "step": 9369 + }, + { + "epoch": 0.7561940117827456, + "grad_norm": 0.6401154398918152, + "learning_rate": 0.00011071448343392008, + "loss": 2.5189, + "step": 9370 + }, + { + "epoch": 0.7562747155193286, + "grad_norm": 0.6510714292526245, + "learning_rate": 0.00011069878728120224, + "loss": 2.5682, + "step": 9371 + }, + { + "epoch": 0.7563554192559115, + "grad_norm": 0.7189988493919373, + "learning_rate": 0.00011068309086184269, + "loss": 2.5247, + "step": 9372 + }, + { + "epoch": 0.7564361229924945, + "grad_norm": 0.678753137588501, + "learning_rate": 0.00011066739417623258, + "loss": 2.5083, + "step": 9373 + }, + { + "epoch": 0.7565168267290776, + "grad_norm": 0.6903115510940552, + "learning_rate": 0.0001106516972247631, + "loss": 2.5658, + "step": 9374 + }, + { + "epoch": 0.7565975304656606, + "grad_norm": 0.6772382855415344, + "learning_rate": 0.0001106360000078255, + "loss": 2.5445, + "step": 9375 + }, + { + "epoch": 0.7566782342022436, + "grad_norm": 0.6655055284500122, + "learning_rate": 0.00011062030252581097, + "loss": 2.5186, + "step": 9376 + }, + { + "epoch": 0.7567589379388265, + "grad_norm": 0.7173851728439331, + "learning_rate": 0.00011060460477911074, + "loss": 2.5297, + "step": 9377 + }, + { + "epoch": 0.7568396416754096, + "grad_norm": 0.6891282200813293, + "learning_rate": 0.00011058890676811606, + "loss": 2.5706, + "step": 9378 + }, + { + "epoch": 0.7569203454119926, + "grad_norm": 0.7053082585334778, + "learning_rate": 0.0001105732084932181, + "loss": 2.5475, + "step": 9379 + }, + { + "epoch": 0.7570010491485756, + "grad_norm": 0.7503373622894287, + "learning_rate": 0.00011055750995480818, + "loss": 2.6438, + "step": 9380 + }, + { + "epoch": 0.7570817528851586, + "grad_norm": 0.6703453660011292, + "learning_rate": 0.0001105418111532775, + "loss": 2.5485, + "step": 9381 + }, + { + "epoch": 0.7571624566217416, + "grad_norm": 0.6651757955551147, + "learning_rate": 0.00011052611208901733, + "loss": 2.6079, + "step": 9382 + }, + { + "epoch": 0.7572431603583246, + "grad_norm": 0.6738902926445007, + "learning_rate": 0.00011051041276241895, + "loss": 2.5279, + "step": 9383 + }, + { + "epoch": 0.7573238640949076, + "grad_norm": 0.6803816556930542, + "learning_rate": 0.00011049471317387357, + "loss": 2.5972, + "step": 9384 + }, + { + "epoch": 0.7574045678314906, + "grad_norm": 0.7127584218978882, + "learning_rate": 0.00011047901332377253, + "loss": 2.5275, + "step": 9385 + }, + { + "epoch": 0.7574852715680735, + "grad_norm": 0.7655676007270813, + "learning_rate": 0.00011046331321250711, + "loss": 2.6491, + "step": 9386 + }, + { + "epoch": 0.7575659753046566, + "grad_norm": 0.7005762457847595, + "learning_rate": 0.00011044761284046854, + "loss": 2.5266, + "step": 9387 + }, + { + "epoch": 0.7576466790412396, + "grad_norm": 0.701931357383728, + "learning_rate": 0.00011043191220804817, + "loss": 2.5556, + "step": 9388 + }, + { + "epoch": 0.7577273827778226, + "grad_norm": 0.6888757944107056, + "learning_rate": 0.00011041621131563724, + "loss": 2.5654, + "step": 9389 + }, + { + "epoch": 0.7578080865144056, + "grad_norm": 0.7119149565696716, + "learning_rate": 0.00011040051016362711, + "loss": 2.5925, + "step": 9390 + }, + { + "epoch": 0.7578887902509887, + "grad_norm": 0.7378301024436951, + "learning_rate": 0.00011038480875240911, + "loss": 2.5604, + "step": 9391 + }, + { + "epoch": 0.7579694939875716, + "grad_norm": 0.7221272587776184, + "learning_rate": 0.00011036910708237449, + "loss": 2.5293, + "step": 9392 + }, + { + "epoch": 0.7580501977241546, + "grad_norm": 0.6895891427993774, + "learning_rate": 0.00011035340515391465, + "loss": 2.5177, + "step": 9393 + }, + { + "epoch": 0.7581309014607376, + "grad_norm": 0.6812298893928528, + "learning_rate": 0.00011033770296742086, + "loss": 2.6345, + "step": 9394 + }, + { + "epoch": 0.7582116051973207, + "grad_norm": 0.6733750700950623, + "learning_rate": 0.00011032200052328449, + "loss": 2.5548, + "step": 9395 + }, + { + "epoch": 0.7582923089339036, + "grad_norm": 0.7667728066444397, + "learning_rate": 0.00011030629782189692, + "loss": 2.5858, + "step": 9396 + }, + { + "epoch": 0.7583730126704866, + "grad_norm": 0.6809018850326538, + "learning_rate": 0.00011029059486364946, + "loss": 2.6028, + "step": 9397 + }, + { + "epoch": 0.7584537164070696, + "grad_norm": 0.6817305684089661, + "learning_rate": 0.00011027489164893345, + "loss": 2.5594, + "step": 9398 + }, + { + "epoch": 0.7585344201436527, + "grad_norm": 0.6936343908309937, + "learning_rate": 0.00011025918817814027, + "loss": 2.4997, + "step": 9399 + }, + { + "epoch": 0.7586151238802357, + "grad_norm": 0.7046801447868347, + "learning_rate": 0.00011024348445166133, + "loss": 2.5199, + "step": 9400 + }, + { + "epoch": 0.7586958276168186, + "grad_norm": 0.7247316241264343, + "learning_rate": 0.00011022778046988798, + "loss": 2.5233, + "step": 9401 + }, + { + "epoch": 0.7587765313534016, + "grad_norm": 0.675652265548706, + "learning_rate": 0.00011021207623321162, + "loss": 2.5213, + "step": 9402 + }, + { + "epoch": 0.7588572350899847, + "grad_norm": 0.6866120100021362, + "learning_rate": 0.0001101963717420236, + "loss": 2.6026, + "step": 9403 + }, + { + "epoch": 0.7589379388265677, + "grad_norm": 0.7168806791305542, + "learning_rate": 0.00011018066699671534, + "loss": 2.5707, + "step": 9404 + }, + { + "epoch": 0.7590186425631507, + "grad_norm": 0.6858265995979309, + "learning_rate": 0.00011016496199767825, + "loss": 2.5313, + "step": 9405 + }, + { + "epoch": 0.7590993462997336, + "grad_norm": 0.7064315676689148, + "learning_rate": 0.00011014925674530375, + "loss": 2.5362, + "step": 9406 + }, + { + "epoch": 0.7591800500363167, + "grad_norm": 0.658385694026947, + "learning_rate": 0.00011013355123998324, + "loss": 2.5773, + "step": 9407 + }, + { + "epoch": 0.7592607537728997, + "grad_norm": 0.7112493515014648, + "learning_rate": 0.00011011784548210813, + "loss": 2.589, + "step": 9408 + }, + { + "epoch": 0.7593414575094827, + "grad_norm": 0.6835871934890747, + "learning_rate": 0.00011010213947206986, + "loss": 2.5952, + "step": 9409 + }, + { + "epoch": 0.7594221612460657, + "grad_norm": 0.6920506358146667, + "learning_rate": 0.00011008643321025989, + "loss": 2.5433, + "step": 9410 + }, + { + "epoch": 0.7595028649826487, + "grad_norm": 0.7239150404930115, + "learning_rate": 0.00011007072669706962, + "loss": 2.5291, + "step": 9411 + }, + { + "epoch": 0.7595835687192317, + "grad_norm": 0.644568145275116, + "learning_rate": 0.00011005501993289052, + "loss": 2.5324, + "step": 9412 + }, + { + "epoch": 0.7596642724558147, + "grad_norm": 0.6604863405227661, + "learning_rate": 0.00011003931291811405, + "loss": 2.561, + "step": 9413 + }, + { + "epoch": 0.7597449761923977, + "grad_norm": 0.7056753635406494, + "learning_rate": 0.00011002360565313164, + "loss": 2.6537, + "step": 9414 + }, + { + "epoch": 0.7598256799289808, + "grad_norm": 0.6712720394134521, + "learning_rate": 0.00011000789813833476, + "loss": 2.5222, + "step": 9415 + }, + { + "epoch": 0.7599063836655637, + "grad_norm": 0.6829253435134888, + "learning_rate": 0.00010999219037411492, + "loss": 2.5156, + "step": 9416 + }, + { + "epoch": 0.7599870874021467, + "grad_norm": 0.7386518120765686, + "learning_rate": 0.00010997648236086359, + "loss": 2.5378, + "step": 9417 + }, + { + "epoch": 0.7600677911387297, + "grad_norm": 0.6711105108261108, + "learning_rate": 0.00010996077409897223, + "loss": 2.4985, + "step": 9418 + }, + { + "epoch": 0.7601484948753128, + "grad_norm": 0.6936883926391602, + "learning_rate": 0.00010994506558883233, + "loss": 2.4912, + "step": 9419 + }, + { + "epoch": 0.7602291986118958, + "grad_norm": 0.6927978992462158, + "learning_rate": 0.00010992935683083541, + "loss": 2.5526, + "step": 9420 + }, + { + "epoch": 0.7603099023484787, + "grad_norm": 0.7661495804786682, + "learning_rate": 0.00010991364782537297, + "loss": 2.5778, + "step": 9421 + }, + { + "epoch": 0.7603906060850617, + "grad_norm": 0.7092108726501465, + "learning_rate": 0.0001098979385728365, + "loss": 2.6557, + "step": 9422 + }, + { + "epoch": 0.7604713098216448, + "grad_norm": 0.696666419506073, + "learning_rate": 0.00010988222907361754, + "loss": 2.4897, + "step": 9423 + }, + { + "epoch": 0.7605520135582278, + "grad_norm": 0.6836280822753906, + "learning_rate": 0.00010986651932810756, + "loss": 2.5146, + "step": 9424 + }, + { + "epoch": 0.7606327172948107, + "grad_norm": 0.7269579768180847, + "learning_rate": 0.00010985080933669815, + "loss": 2.5314, + "step": 9425 + }, + { + "epoch": 0.7607134210313937, + "grad_norm": 0.6862092018127441, + "learning_rate": 0.00010983509909978085, + "loss": 2.5415, + "step": 9426 + }, + { + "epoch": 0.7607941247679768, + "grad_norm": 0.7068747878074646, + "learning_rate": 0.00010981938861774713, + "loss": 2.5919, + "step": 9427 + }, + { + "epoch": 0.7608748285045598, + "grad_norm": 0.699999213218689, + "learning_rate": 0.0001098036778909886, + "loss": 2.5175, + "step": 9428 + }, + { + "epoch": 0.7609555322411428, + "grad_norm": 0.6642772555351257, + "learning_rate": 0.0001097879669198968, + "loss": 2.5721, + "step": 9429 + }, + { + "epoch": 0.7610362359777257, + "grad_norm": 0.7100533843040466, + "learning_rate": 0.00010977225570486323, + "loss": 2.5189, + "step": 9430 + }, + { + "epoch": 0.7611169397143088, + "grad_norm": 0.7289063930511475, + "learning_rate": 0.00010975654424627955, + "loss": 2.6139, + "step": 9431 + }, + { + "epoch": 0.7611976434508918, + "grad_norm": 0.7289659380912781, + "learning_rate": 0.00010974083254453726, + "loss": 2.5201, + "step": 9432 + }, + { + "epoch": 0.7612783471874748, + "grad_norm": 0.7389557957649231, + "learning_rate": 0.000109725120600028, + "loss": 2.559, + "step": 9433 + }, + { + "epoch": 0.7613590509240578, + "grad_norm": 0.7021538615226746, + "learning_rate": 0.00010970940841314327, + "loss": 2.6353, + "step": 9434 + }, + { + "epoch": 0.7614397546606407, + "grad_norm": 0.6614113450050354, + "learning_rate": 0.0001096936959842747, + "loss": 2.54, + "step": 9435 + }, + { + "epoch": 0.7615204583972238, + "grad_norm": 0.6905426979064941, + "learning_rate": 0.00010967798331381392, + "loss": 2.5845, + "step": 9436 + }, + { + "epoch": 0.7616011621338068, + "grad_norm": 0.8183904886245728, + "learning_rate": 0.00010966227040215247, + "loss": 2.5255, + "step": 9437 + }, + { + "epoch": 0.7616818658703898, + "grad_norm": 0.7404630780220032, + "learning_rate": 0.00010964655724968199, + "loss": 2.5726, + "step": 9438 + }, + { + "epoch": 0.7617625696069728, + "grad_norm": 0.657127320766449, + "learning_rate": 0.0001096308438567941, + "loss": 2.6233, + "step": 9439 + }, + { + "epoch": 0.7618432733435558, + "grad_norm": 0.7417906522750854, + "learning_rate": 0.00010961513022388039, + "loss": 2.6361, + "step": 9440 + }, + { + "epoch": 0.7619239770801388, + "grad_norm": 0.6930029988288879, + "learning_rate": 0.00010959941635133249, + "loss": 2.5164, + "step": 9441 + }, + { + "epoch": 0.7620046808167218, + "grad_norm": 0.6897261738777161, + "learning_rate": 0.00010958370223954207, + "loss": 2.5626, + "step": 9442 + }, + { + "epoch": 0.7620853845533048, + "grad_norm": 0.6737398505210876, + "learning_rate": 0.00010956798788890072, + "loss": 2.5342, + "step": 9443 + }, + { + "epoch": 0.7621660882898879, + "grad_norm": 0.6550001502037048, + "learning_rate": 0.0001095522732998001, + "loss": 2.5604, + "step": 9444 + }, + { + "epoch": 0.7622467920264708, + "grad_norm": 0.7184637784957886, + "learning_rate": 0.00010953655847263187, + "loss": 2.6006, + "step": 9445 + }, + { + "epoch": 0.7623274957630538, + "grad_norm": 0.6188609600067139, + "learning_rate": 0.00010952084340778766, + "loss": 2.4875, + "step": 9446 + }, + { + "epoch": 0.7624081994996368, + "grad_norm": 0.6550862789154053, + "learning_rate": 0.00010950512810565917, + "loss": 2.5794, + "step": 9447 + }, + { + "epoch": 0.7624889032362199, + "grad_norm": 0.6659231781959534, + "learning_rate": 0.000109489412566638, + "loss": 2.5137, + "step": 9448 + }, + { + "epoch": 0.7625696069728028, + "grad_norm": 0.749376118183136, + "learning_rate": 0.00010947369679111592, + "loss": 2.5923, + "step": 9449 + }, + { + "epoch": 0.7626503107093858, + "grad_norm": 0.6597894430160522, + "learning_rate": 0.0001094579807794845, + "loss": 2.5677, + "step": 9450 + }, + { + "epoch": 0.7627310144459688, + "grad_norm": 0.7194519639015198, + "learning_rate": 0.00010944226453213548, + "loss": 2.5754, + "step": 9451 + }, + { + "epoch": 0.7628117181825519, + "grad_norm": 0.6734583377838135, + "learning_rate": 0.00010942654804946057, + "loss": 2.535, + "step": 9452 + }, + { + "epoch": 0.7628924219191349, + "grad_norm": 0.7171904444694519, + "learning_rate": 0.00010941083133185146, + "loss": 2.5431, + "step": 9453 + }, + { + "epoch": 0.7629731256557178, + "grad_norm": 0.6760339736938477, + "learning_rate": 0.00010939511437969978, + "loss": 2.5163, + "step": 9454 + }, + { + "epoch": 0.7630538293923008, + "grad_norm": 0.6720966696739197, + "learning_rate": 0.00010937939719339731, + "loss": 2.5621, + "step": 9455 + }, + { + "epoch": 0.7631345331288839, + "grad_norm": 0.6374503970146179, + "learning_rate": 0.00010936367977333574, + "loss": 2.5007, + "step": 9456 + }, + { + "epoch": 0.7632152368654669, + "grad_norm": 0.6407146453857422, + "learning_rate": 0.00010934796211990684, + "loss": 2.5724, + "step": 9457 + }, + { + "epoch": 0.7632959406020499, + "grad_norm": 0.6685383319854736, + "learning_rate": 0.00010933224423350225, + "loss": 2.501, + "step": 9458 + }, + { + "epoch": 0.7633766443386328, + "grad_norm": 0.664806604385376, + "learning_rate": 0.00010931652611451373, + "loss": 2.6174, + "step": 9459 + }, + { + "epoch": 0.7634573480752159, + "grad_norm": 0.6383369565010071, + "learning_rate": 0.00010930080776333303, + "loss": 2.557, + "step": 9460 + }, + { + "epoch": 0.7635380518117989, + "grad_norm": 0.6747864484786987, + "learning_rate": 0.0001092850891803519, + "loss": 2.5406, + "step": 9461 + }, + { + "epoch": 0.7636187555483819, + "grad_norm": 0.7312811613082886, + "learning_rate": 0.00010926937036596205, + "loss": 2.5903, + "step": 9462 + }, + { + "epoch": 0.7636994592849649, + "grad_norm": 0.645847737789154, + "learning_rate": 0.00010925365132055529, + "loss": 2.5254, + "step": 9463 + }, + { + "epoch": 0.7637801630215479, + "grad_norm": 0.6466063857078552, + "learning_rate": 0.00010923793204452335, + "loss": 2.5322, + "step": 9464 + }, + { + "epoch": 0.7638608667581309, + "grad_norm": 0.6450574994087219, + "learning_rate": 0.000109222212538258, + "loss": 2.522, + "step": 9465 + }, + { + "epoch": 0.7639415704947139, + "grad_norm": 0.6491848826408386, + "learning_rate": 0.00010920649280215096, + "loss": 2.5545, + "step": 9466 + }, + { + "epoch": 0.7640222742312969, + "grad_norm": 0.6888336539268494, + "learning_rate": 0.0001091907728365941, + "loss": 2.5217, + "step": 9467 + }, + { + "epoch": 0.76410297796788, + "grad_norm": 0.702557384967804, + "learning_rate": 0.00010917505264197914, + "loss": 2.5351, + "step": 9468 + }, + { + "epoch": 0.7641836817044629, + "grad_norm": 0.6552408933639526, + "learning_rate": 0.0001091593322186979, + "loss": 2.5115, + "step": 9469 + }, + { + "epoch": 0.7642643854410459, + "grad_norm": 0.7514002919197083, + "learning_rate": 0.00010914361156714212, + "loss": 2.5196, + "step": 9470 + }, + { + "epoch": 0.7643450891776289, + "grad_norm": 0.6692500710487366, + "learning_rate": 0.00010912789068770366, + "loss": 2.5639, + "step": 9471 + }, + { + "epoch": 0.764425792914212, + "grad_norm": 0.6567397117614746, + "learning_rate": 0.0001091121695807743, + "loss": 2.5027, + "step": 9472 + }, + { + "epoch": 0.764506496650795, + "grad_norm": 0.6876057982444763, + "learning_rate": 0.00010909644824674587, + "loss": 2.519, + "step": 9473 + }, + { + "epoch": 0.7645872003873779, + "grad_norm": 0.747949481010437, + "learning_rate": 0.00010908072668601017, + "loss": 2.5604, + "step": 9474 + }, + { + "epoch": 0.7646679041239609, + "grad_norm": 0.6371368169784546, + "learning_rate": 0.000109065004898959, + "loss": 2.5853, + "step": 9475 + }, + { + "epoch": 0.764748607860544, + "grad_norm": 0.6472185254096985, + "learning_rate": 0.00010904928288598422, + "loss": 2.5662, + "step": 9476 + }, + { + "epoch": 0.764829311597127, + "grad_norm": 0.7009313702583313, + "learning_rate": 0.00010903356064747765, + "loss": 2.5244, + "step": 9477 + }, + { + "epoch": 0.76491001533371, + "grad_norm": 0.7405661940574646, + "learning_rate": 0.00010901783818383116, + "loss": 2.4963, + "step": 9478 + }, + { + "epoch": 0.7649907190702929, + "grad_norm": 0.7693421840667725, + "learning_rate": 0.00010900211549543658, + "loss": 2.6018, + "step": 9479 + }, + { + "epoch": 0.765071422806876, + "grad_norm": 0.6965410709381104, + "learning_rate": 0.00010898639258268571, + "loss": 2.627, + "step": 9480 + }, + { + "epoch": 0.765152126543459, + "grad_norm": 0.7167130708694458, + "learning_rate": 0.00010897066944597046, + "loss": 2.5298, + "step": 9481 + }, + { + "epoch": 0.765232830280042, + "grad_norm": 0.7159689664840698, + "learning_rate": 0.00010895494608568268, + "loss": 2.5179, + "step": 9482 + }, + { + "epoch": 0.7653135340166249, + "grad_norm": 0.7329332232475281, + "learning_rate": 0.00010893922250221423, + "loss": 2.6498, + "step": 9483 + }, + { + "epoch": 0.765394237753208, + "grad_norm": 0.6912567019462585, + "learning_rate": 0.000108923498695957, + "loss": 2.5679, + "step": 9484 + }, + { + "epoch": 0.765474941489791, + "grad_norm": 0.7030324935913086, + "learning_rate": 0.00010890777466730285, + "loss": 2.5678, + "step": 9485 + }, + { + "epoch": 0.765555645226374, + "grad_norm": 0.7238864898681641, + "learning_rate": 0.00010889205041664365, + "loss": 2.5525, + "step": 9486 + }, + { + "epoch": 0.765636348962957, + "grad_norm": 0.6623672842979431, + "learning_rate": 0.00010887632594437134, + "loss": 2.4857, + "step": 9487 + }, + { + "epoch": 0.7657170526995399, + "grad_norm": 0.726645827293396, + "learning_rate": 0.00010886060125087776, + "loss": 2.5405, + "step": 9488 + }, + { + "epoch": 0.765797756436123, + "grad_norm": 0.6624459624290466, + "learning_rate": 0.00010884487633655487, + "loss": 2.5538, + "step": 9489 + }, + { + "epoch": 0.765878460172706, + "grad_norm": 0.7198002934455872, + "learning_rate": 0.00010882915120179453, + "loss": 2.5808, + "step": 9490 + }, + { + "epoch": 0.765959163909289, + "grad_norm": 0.7545582056045532, + "learning_rate": 0.00010881342584698862, + "loss": 2.6059, + "step": 9491 + }, + { + "epoch": 0.766039867645872, + "grad_norm": 0.6748257279396057, + "learning_rate": 0.00010879770027252915, + "loss": 2.5203, + "step": 9492 + }, + { + "epoch": 0.766120571382455, + "grad_norm": 0.7376208901405334, + "learning_rate": 0.00010878197447880796, + "loss": 2.5255, + "step": 9493 + }, + { + "epoch": 0.766201275119038, + "grad_norm": 0.7589401006698608, + "learning_rate": 0.00010876624846621704, + "loss": 2.6304, + "step": 9494 + }, + { + "epoch": 0.766281978855621, + "grad_norm": 0.6963146924972534, + "learning_rate": 0.00010875052223514827, + "loss": 2.5547, + "step": 9495 + }, + { + "epoch": 0.766362682592204, + "grad_norm": 0.6660788059234619, + "learning_rate": 0.00010873479578599361, + "loss": 2.5922, + "step": 9496 + }, + { + "epoch": 0.7664433863287871, + "grad_norm": 0.7506482005119324, + "learning_rate": 0.00010871906911914502, + "loss": 2.5383, + "step": 9497 + }, + { + "epoch": 0.76652409006537, + "grad_norm": 0.7514285445213318, + "learning_rate": 0.00010870334223499443, + "loss": 2.5551, + "step": 9498 + }, + { + "epoch": 0.766604793801953, + "grad_norm": 0.6461809873580933, + "learning_rate": 0.00010868761513393379, + "loss": 2.5367, + "step": 9499 + }, + { + "epoch": 0.766685497538536, + "grad_norm": 0.6328238844871521, + "learning_rate": 0.00010867188781635512, + "loss": 2.5505, + "step": 9500 + }, + { + "epoch": 0.7667662012751191, + "grad_norm": 0.7090224027633667, + "learning_rate": 0.00010865616028265027, + "loss": 2.5921, + "step": 9501 + }, + { + "epoch": 0.766846905011702, + "grad_norm": 0.6404605507850647, + "learning_rate": 0.0001086404325332113, + "loss": 2.5357, + "step": 9502 + }, + { + "epoch": 0.766927608748285, + "grad_norm": 0.652477502822876, + "learning_rate": 0.00010862470456843016, + "loss": 2.5277, + "step": 9503 + }, + { + "epoch": 0.767008312484868, + "grad_norm": 0.7045448422431946, + "learning_rate": 0.00010860897638869887, + "loss": 2.5712, + "step": 9504 + }, + { + "epoch": 0.7670890162214511, + "grad_norm": 0.7024295926094055, + "learning_rate": 0.00010859324799440936, + "loss": 2.5976, + "step": 9505 + }, + { + "epoch": 0.7671697199580341, + "grad_norm": 0.7165585160255432, + "learning_rate": 0.00010857751938595364, + "loss": 2.5378, + "step": 9506 + }, + { + "epoch": 0.767250423694617, + "grad_norm": 0.7037522196769714, + "learning_rate": 0.0001085617905637237, + "loss": 2.554, + "step": 9507 + }, + { + "epoch": 0.7673311274312, + "grad_norm": 0.738210916519165, + "learning_rate": 0.00010854606152811163, + "loss": 2.5102, + "step": 9508 + }, + { + "epoch": 0.7674118311677831, + "grad_norm": 0.7500020861625671, + "learning_rate": 0.0001085303322795093, + "loss": 2.5908, + "step": 9509 + }, + { + "epoch": 0.7674925349043661, + "grad_norm": 0.7669610977172852, + "learning_rate": 0.00010851460281830883, + "loss": 2.5119, + "step": 9510 + }, + { + "epoch": 0.7675732386409491, + "grad_norm": 0.6619212031364441, + "learning_rate": 0.00010849887314490217, + "loss": 2.5622, + "step": 9511 + }, + { + "epoch": 0.767653942377532, + "grad_norm": 0.7142546772956848, + "learning_rate": 0.00010848314325968136, + "loss": 2.596, + "step": 9512 + }, + { + "epoch": 0.7677346461141151, + "grad_norm": 0.7365403175354004, + "learning_rate": 0.0001084674131630385, + "loss": 2.5695, + "step": 9513 + }, + { + "epoch": 0.7678153498506981, + "grad_norm": 0.7843711972236633, + "learning_rate": 0.00010845168285536555, + "loss": 2.5707, + "step": 9514 + }, + { + "epoch": 0.7678960535872811, + "grad_norm": 0.6391385197639465, + "learning_rate": 0.00010843595233705454, + "loss": 2.5523, + "step": 9515 + }, + { + "epoch": 0.7679767573238641, + "grad_norm": 0.6955631971359253, + "learning_rate": 0.00010842022160849758, + "loss": 2.5072, + "step": 9516 + }, + { + "epoch": 0.7680574610604471, + "grad_norm": 0.7291388511657715, + "learning_rate": 0.00010840449067008665, + "loss": 2.5786, + "step": 9517 + }, + { + "epoch": 0.7681381647970301, + "grad_norm": 0.7988889813423157, + "learning_rate": 0.00010838875952221387, + "loss": 2.5622, + "step": 9518 + }, + { + "epoch": 0.7682188685336131, + "grad_norm": 0.726271390914917, + "learning_rate": 0.00010837302816527129, + "loss": 2.5479, + "step": 9519 + }, + { + "epoch": 0.7682995722701961, + "grad_norm": 0.7305205464363098, + "learning_rate": 0.00010835729659965095, + "loss": 2.5946, + "step": 9520 + }, + { + "epoch": 0.7683802760067792, + "grad_norm": 0.7843366265296936, + "learning_rate": 0.00010834156482574493, + "loss": 2.5212, + "step": 9521 + }, + { + "epoch": 0.7684609797433621, + "grad_norm": 0.6988845467567444, + "learning_rate": 0.00010832583284394529, + "loss": 2.5174, + "step": 9522 + }, + { + "epoch": 0.7685416834799451, + "grad_norm": 0.7088077068328857, + "learning_rate": 0.00010831010065464414, + "loss": 2.5253, + "step": 9523 + }, + { + "epoch": 0.7686223872165281, + "grad_norm": 0.7447031140327454, + "learning_rate": 0.00010829436825823358, + "loss": 2.6045, + "step": 9524 + }, + { + "epoch": 0.7687030909531112, + "grad_norm": 0.6865237951278687, + "learning_rate": 0.00010827863565510566, + "loss": 2.558, + "step": 9525 + }, + { + "epoch": 0.7687837946896942, + "grad_norm": 0.7748900651931763, + "learning_rate": 0.0001082629028456525, + "loss": 2.5694, + "step": 9526 + }, + { + "epoch": 0.7688644984262771, + "grad_norm": 0.7031759023666382, + "learning_rate": 0.00010824716983026622, + "loss": 2.5171, + "step": 9527 + }, + { + "epoch": 0.7689452021628601, + "grad_norm": 0.7627702355384827, + "learning_rate": 0.00010823143660933888, + "loss": 2.5715, + "step": 9528 + }, + { + "epoch": 0.7690259058994432, + "grad_norm": 0.707815945148468, + "learning_rate": 0.00010821570318326264, + "loss": 2.5281, + "step": 9529 + }, + { + "epoch": 0.7691066096360262, + "grad_norm": 0.6833841800689697, + "learning_rate": 0.00010819996955242962, + "loss": 2.5702, + "step": 9530 + }, + { + "epoch": 0.7691873133726091, + "grad_norm": 0.7029415369033813, + "learning_rate": 0.00010818423571723189, + "loss": 2.5331, + "step": 9531 + }, + { + "epoch": 0.7692680171091921, + "grad_norm": 0.6442921161651611, + "learning_rate": 0.00010816850167806161, + "loss": 2.5423, + "step": 9532 + }, + { + "epoch": 0.7693487208457752, + "grad_norm": 0.7259004712104797, + "learning_rate": 0.00010815276743531093, + "loss": 2.6014, + "step": 9533 + }, + { + "epoch": 0.7694294245823582, + "grad_norm": 0.6483473777770996, + "learning_rate": 0.00010813703298937199, + "loss": 2.5268, + "step": 9534 + }, + { + "epoch": 0.7695101283189412, + "grad_norm": 0.6805520057678223, + "learning_rate": 0.00010812129834063691, + "loss": 2.5536, + "step": 9535 + }, + { + "epoch": 0.7695908320555241, + "grad_norm": 0.7120587825775146, + "learning_rate": 0.00010810556348949783, + "loss": 2.518, + "step": 9536 + }, + { + "epoch": 0.7696715357921071, + "grad_norm": 0.7280872464179993, + "learning_rate": 0.00010808982843634692, + "loss": 2.5525, + "step": 9537 + }, + { + "epoch": 0.7697522395286902, + "grad_norm": 0.68332439661026, + "learning_rate": 0.00010807409318157636, + "loss": 2.6318, + "step": 9538 + }, + { + "epoch": 0.7698329432652732, + "grad_norm": 0.655352771282196, + "learning_rate": 0.00010805835772557826, + "loss": 2.5781, + "step": 9539 + }, + { + "epoch": 0.7699136470018562, + "grad_norm": 0.7675400972366333, + "learning_rate": 0.00010804262206874484, + "loss": 2.5542, + "step": 9540 + }, + { + "epoch": 0.7699943507384391, + "grad_norm": 0.6676837205886841, + "learning_rate": 0.00010802688621146826, + "loss": 2.5411, + "step": 9541 + }, + { + "epoch": 0.7700750544750222, + "grad_norm": 0.7378436326980591, + "learning_rate": 0.00010801115015414067, + "loss": 2.5416, + "step": 9542 + }, + { + "epoch": 0.7701557582116052, + "grad_norm": 0.7330371141433716, + "learning_rate": 0.0001079954138971543, + "loss": 2.5154, + "step": 9543 + }, + { + "epoch": 0.7702364619481882, + "grad_norm": 0.6792974472045898, + "learning_rate": 0.00010797967744090131, + "loss": 2.5328, + "step": 9544 + }, + { + "epoch": 0.7703171656847712, + "grad_norm": 0.7129618525505066, + "learning_rate": 0.00010796394078577392, + "loss": 2.5688, + "step": 9545 + }, + { + "epoch": 0.7703978694213542, + "grad_norm": 0.6900608539581299, + "learning_rate": 0.00010794820393216429, + "loss": 2.5659, + "step": 9546 + }, + { + "epoch": 0.7704785731579372, + "grad_norm": 0.6798564195632935, + "learning_rate": 0.00010793246688046464, + "loss": 2.5746, + "step": 9547 + }, + { + "epoch": 0.7705592768945202, + "grad_norm": 0.7132395505905151, + "learning_rate": 0.00010791672963106715, + "loss": 2.6277, + "step": 9548 + }, + { + "epoch": 0.7706399806311032, + "grad_norm": 0.6762476563453674, + "learning_rate": 0.0001079009921843641, + "loss": 2.5265, + "step": 9549 + }, + { + "epoch": 0.7707206843676863, + "grad_norm": 0.7223351001739502, + "learning_rate": 0.00010788525454074765, + "loss": 2.6255, + "step": 9550 + }, + { + "epoch": 0.7708013881042692, + "grad_norm": 0.7383624315261841, + "learning_rate": 0.00010786951670061008, + "loss": 2.5744, + "step": 9551 + }, + { + "epoch": 0.7708820918408522, + "grad_norm": 0.6677328944206238, + "learning_rate": 0.00010785377866434355, + "loss": 2.5594, + "step": 9552 + }, + { + "epoch": 0.7709627955774352, + "grad_norm": 0.6572195887565613, + "learning_rate": 0.00010783804043234032, + "loss": 2.5582, + "step": 9553 + }, + { + "epoch": 0.7710434993140183, + "grad_norm": 0.6837800741195679, + "learning_rate": 0.00010782230200499265, + "loss": 2.5311, + "step": 9554 + }, + { + "epoch": 0.7711242030506013, + "grad_norm": 0.7232153415679932, + "learning_rate": 0.00010780656338269277, + "loss": 2.5074, + "step": 9555 + }, + { + "epoch": 0.7712049067871842, + "grad_norm": 0.6722296476364136, + "learning_rate": 0.00010779082456583291, + "loss": 2.551, + "step": 9556 + }, + { + "epoch": 0.7712856105237672, + "grad_norm": 0.6461100578308105, + "learning_rate": 0.00010777508555480535, + "loss": 2.5723, + "step": 9557 + }, + { + "epoch": 0.7713663142603503, + "grad_norm": 0.6573290824890137, + "learning_rate": 0.0001077593463500023, + "loss": 2.4967, + "step": 9558 + }, + { + "epoch": 0.7714470179969333, + "grad_norm": 0.7184738516807556, + "learning_rate": 0.0001077436069518161, + "loss": 2.6703, + "step": 9559 + }, + { + "epoch": 0.7715277217335162, + "grad_norm": 0.7226557731628418, + "learning_rate": 0.00010772786736063895, + "loss": 2.6118, + "step": 9560 + }, + { + "epoch": 0.7716084254700992, + "grad_norm": 0.6800956130027771, + "learning_rate": 0.00010771212757686318, + "loss": 2.578, + "step": 9561 + }, + { + "epoch": 0.7716891292066823, + "grad_norm": 0.6657535433769226, + "learning_rate": 0.00010769638760088099, + "loss": 2.5291, + "step": 9562 + }, + { + "epoch": 0.7717698329432653, + "grad_norm": 0.620527982711792, + "learning_rate": 0.00010768064743308471, + "loss": 2.5518, + "step": 9563 + }, + { + "epoch": 0.7718505366798483, + "grad_norm": 0.693760097026825, + "learning_rate": 0.00010766490707386663, + "loss": 2.52, + "step": 9564 + }, + { + "epoch": 0.7719312404164312, + "grad_norm": 0.6674148440361023, + "learning_rate": 0.000107649166523619, + "loss": 2.5197, + "step": 9565 + }, + { + "epoch": 0.7720119441530143, + "grad_norm": 0.6844033598899841, + "learning_rate": 0.00010763342578273419, + "loss": 2.5842, + "step": 9566 + }, + { + "epoch": 0.7720926478895973, + "grad_norm": 0.6891880035400391, + "learning_rate": 0.00010761768485160442, + "loss": 2.5349, + "step": 9567 + }, + { + "epoch": 0.7721733516261803, + "grad_norm": 0.7157394289970398, + "learning_rate": 0.00010760194373062204, + "loss": 2.5762, + "step": 9568 + }, + { + "epoch": 0.7722540553627633, + "grad_norm": 0.7522526383399963, + "learning_rate": 0.00010758620242017936, + "loss": 2.5348, + "step": 9569 + }, + { + "epoch": 0.7723347590993463, + "grad_norm": 0.6817746162414551, + "learning_rate": 0.00010757046092066869, + "loss": 2.5836, + "step": 9570 + }, + { + "epoch": 0.7724154628359293, + "grad_norm": 0.7274518013000488, + "learning_rate": 0.00010755471923248232, + "loss": 2.5276, + "step": 9571 + }, + { + "epoch": 0.7724961665725123, + "grad_norm": 0.6735557913780212, + "learning_rate": 0.00010753897735601264, + "loss": 2.6116, + "step": 9572 + }, + { + "epoch": 0.7725768703090953, + "grad_norm": 0.6626406908035278, + "learning_rate": 0.00010752323529165186, + "loss": 2.5778, + "step": 9573 + }, + { + "epoch": 0.7726575740456784, + "grad_norm": 0.6627367734909058, + "learning_rate": 0.00010750749303979246, + "loss": 2.5839, + "step": 9574 + }, + { + "epoch": 0.7727382777822613, + "grad_norm": 0.6658251881599426, + "learning_rate": 0.0001074917506008267, + "loss": 2.5233, + "step": 9575 + }, + { + "epoch": 0.7728189815188443, + "grad_norm": 0.6969848871231079, + "learning_rate": 0.00010747600797514692, + "loss": 2.5169, + "step": 9576 + }, + { + "epoch": 0.7728996852554273, + "grad_norm": 0.7313554883003235, + "learning_rate": 0.00010746026516314549, + "loss": 2.5528, + "step": 9577 + }, + { + "epoch": 0.7729803889920104, + "grad_norm": 0.6467077136039734, + "learning_rate": 0.00010744452216521472, + "loss": 2.5158, + "step": 9578 + }, + { + "epoch": 0.7730610927285934, + "grad_norm": 0.6808056235313416, + "learning_rate": 0.00010742877898174702, + "loss": 2.5346, + "step": 9579 + }, + { + "epoch": 0.7731417964651763, + "grad_norm": 0.7537400722503662, + "learning_rate": 0.00010741303561313474, + "loss": 2.5621, + "step": 9580 + }, + { + "epoch": 0.7732225002017593, + "grad_norm": 0.6715610027313232, + "learning_rate": 0.00010739729205977021, + "loss": 2.5384, + "step": 9581 + }, + { + "epoch": 0.7733032039383424, + "grad_norm": 0.7129234075546265, + "learning_rate": 0.00010738154832204586, + "loss": 2.5639, + "step": 9582 + }, + { + "epoch": 0.7733839076749254, + "grad_norm": 0.7156025171279907, + "learning_rate": 0.00010736580440035397, + "loss": 2.5427, + "step": 9583 + }, + { + "epoch": 0.7734646114115084, + "grad_norm": 0.7394191026687622, + "learning_rate": 0.00010735006029508703, + "loss": 2.5809, + "step": 9584 + }, + { + "epoch": 0.7735453151480913, + "grad_norm": 0.7117684483528137, + "learning_rate": 0.00010733431600663737, + "loss": 2.5807, + "step": 9585 + }, + { + "epoch": 0.7736260188846744, + "grad_norm": 0.6622862219810486, + "learning_rate": 0.00010731857153539737, + "loss": 2.5277, + "step": 9586 + }, + { + "epoch": 0.7737067226212574, + "grad_norm": 0.7744547128677368, + "learning_rate": 0.00010730282688175943, + "loss": 2.6119, + "step": 9587 + }, + { + "epoch": 0.7737874263578404, + "grad_norm": 0.6804926991462708, + "learning_rate": 0.00010728708204611597, + "loss": 2.534, + "step": 9588 + }, + { + "epoch": 0.7738681300944233, + "grad_norm": 0.7115367650985718, + "learning_rate": 0.00010727133702885937, + "loss": 2.542, + "step": 9589 + }, + { + "epoch": 0.7739488338310063, + "grad_norm": 0.7623847723007202, + "learning_rate": 0.00010725559183038205, + "loss": 2.587, + "step": 9590 + }, + { + "epoch": 0.7740295375675894, + "grad_norm": 0.6612982153892517, + "learning_rate": 0.00010723984645107641, + "loss": 2.5257, + "step": 9591 + }, + { + "epoch": 0.7741102413041724, + "grad_norm": 0.7553900480270386, + "learning_rate": 0.00010722410089133488, + "loss": 2.6311, + "step": 9592 + }, + { + "epoch": 0.7741909450407554, + "grad_norm": 0.7541414499282837, + "learning_rate": 0.00010720835515154983, + "loss": 2.5978, + "step": 9593 + }, + { + "epoch": 0.7742716487773383, + "grad_norm": 0.6690947413444519, + "learning_rate": 0.00010719260923211376, + "loss": 2.568, + "step": 9594 + }, + { + "epoch": 0.7743523525139214, + "grad_norm": 0.7282151579856873, + "learning_rate": 0.00010717686313341909, + "loss": 2.5375, + "step": 9595 + }, + { + "epoch": 0.7744330562505044, + "grad_norm": 0.6862902045249939, + "learning_rate": 0.00010716111685585821, + "loss": 2.5503, + "step": 9596 + }, + { + "epoch": 0.7745137599870874, + "grad_norm": 0.7076265811920166, + "learning_rate": 0.00010714537039982357, + "loss": 2.4766, + "step": 9597 + }, + { + "epoch": 0.7745944637236704, + "grad_norm": 0.7063891887664795, + "learning_rate": 0.00010712962376570761, + "loss": 2.5822, + "step": 9598 + }, + { + "epoch": 0.7746751674602534, + "grad_norm": 0.6975609064102173, + "learning_rate": 0.00010711387695390282, + "loss": 2.597, + "step": 9599 + }, + { + "epoch": 0.7747558711968364, + "grad_norm": 0.6790002584457397, + "learning_rate": 0.0001070981299648016, + "loss": 2.5705, + "step": 9600 + }, + { + "epoch": 0.7748365749334194, + "grad_norm": 0.6493679881095886, + "learning_rate": 0.00010708238279879643, + "loss": 2.49, + "step": 9601 + }, + { + "epoch": 0.7749172786700024, + "grad_norm": 0.6741142868995667, + "learning_rate": 0.00010706663545627977, + "loss": 2.6008, + "step": 9602 + }, + { + "epoch": 0.7749979824065855, + "grad_norm": 0.6753309965133667, + "learning_rate": 0.00010705088793764408, + "loss": 2.536, + "step": 9603 + }, + { + "epoch": 0.7750786861431684, + "grad_norm": 0.6879377365112305, + "learning_rate": 0.00010703514024328183, + "loss": 2.5884, + "step": 9604 + }, + { + "epoch": 0.7751593898797514, + "grad_norm": 0.6535949110984802, + "learning_rate": 0.00010701939237358549, + "loss": 2.5489, + "step": 9605 + }, + { + "epoch": 0.7752400936163344, + "grad_norm": 0.7308230400085449, + "learning_rate": 0.00010700364432894756, + "loss": 2.5679, + "step": 9606 + }, + { + "epoch": 0.7753207973529175, + "grad_norm": 0.7016584277153015, + "learning_rate": 0.00010698789610976052, + "loss": 2.5678, + "step": 9607 + }, + { + "epoch": 0.7754015010895005, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.00010697214771641682, + "loss": 2.5004, + "step": 9608 + }, + { + "epoch": 0.7754822048260834, + "grad_norm": 0.6414844989776611, + "learning_rate": 0.00010695639914930895, + "loss": 2.4896, + "step": 9609 + }, + { + "epoch": 0.7755629085626664, + "grad_norm": 0.7288017868995667, + "learning_rate": 0.00010694065040882943, + "loss": 2.5945, + "step": 9610 + }, + { + "epoch": 0.7756436122992495, + "grad_norm": 0.6808066368103027, + "learning_rate": 0.00010692490149537079, + "loss": 2.5973, + "step": 9611 + }, + { + "epoch": 0.7757243160358325, + "grad_norm": 0.7924454212188721, + "learning_rate": 0.00010690915240932553, + "loss": 2.5448, + "step": 9612 + }, + { + "epoch": 0.7758050197724154, + "grad_norm": 0.6466094851493835, + "learning_rate": 0.00010689340315108606, + "loss": 2.5065, + "step": 9613 + }, + { + "epoch": 0.7758857235089984, + "grad_norm": 0.6775460243225098, + "learning_rate": 0.00010687765372104502, + "loss": 2.5238, + "step": 9614 + }, + { + "epoch": 0.7759664272455815, + "grad_norm": 0.6901230812072754, + "learning_rate": 0.00010686190411959484, + "loss": 2.5109, + "step": 9615 + }, + { + "epoch": 0.7760471309821645, + "grad_norm": 0.7032039165496826, + "learning_rate": 0.00010684615434712808, + "loss": 2.6094, + "step": 9616 + }, + { + "epoch": 0.7761278347187475, + "grad_norm": 0.7008969187736511, + "learning_rate": 0.00010683040440403727, + "loss": 2.5758, + "step": 9617 + }, + { + "epoch": 0.7762085384553304, + "grad_norm": 0.6909677386283875, + "learning_rate": 0.00010681465429071491, + "loss": 2.5373, + "step": 9618 + }, + { + "epoch": 0.7762892421919135, + "grad_norm": 0.699030339717865, + "learning_rate": 0.00010679890400755355, + "loss": 2.577, + "step": 9619 + }, + { + "epoch": 0.7763699459284965, + "grad_norm": 0.7012344598770142, + "learning_rate": 0.00010678315355494575, + "loss": 2.5205, + "step": 9620 + }, + { + "epoch": 0.7764506496650795, + "grad_norm": 0.7693915367126465, + "learning_rate": 0.000106767402933284, + "loss": 2.5947, + "step": 9621 + }, + { + "epoch": 0.7765313534016625, + "grad_norm": 0.7635772228240967, + "learning_rate": 0.00010675165214296093, + "loss": 2.6221, + "step": 9622 + }, + { + "epoch": 0.7766120571382455, + "grad_norm": 0.701411783695221, + "learning_rate": 0.000106735901184369, + "loss": 2.5236, + "step": 9623 + }, + { + "epoch": 0.7766927608748285, + "grad_norm": 0.7283998727798462, + "learning_rate": 0.00010672015005790079, + "loss": 2.5581, + "step": 9624 + }, + { + "epoch": 0.7767734646114115, + "grad_norm": 0.7069897055625916, + "learning_rate": 0.0001067043987639489, + "loss": 2.5541, + "step": 9625 + }, + { + "epoch": 0.7768541683479945, + "grad_norm": 0.7419753074645996, + "learning_rate": 0.00010668864730290586, + "loss": 2.5992, + "step": 9626 + }, + { + "epoch": 0.7769348720845776, + "grad_norm": 0.6651501059532166, + "learning_rate": 0.00010667289567516426, + "loss": 2.546, + "step": 9627 + }, + { + "epoch": 0.7770155758211605, + "grad_norm": 0.7265670895576477, + "learning_rate": 0.00010665714388111665, + "loss": 2.611, + "step": 9628 + }, + { + "epoch": 0.7770962795577435, + "grad_norm": 0.6520028114318848, + "learning_rate": 0.00010664139192115559, + "loss": 2.5433, + "step": 9629 + }, + { + "epoch": 0.7771769832943265, + "grad_norm": 0.6990057826042175, + "learning_rate": 0.0001066256397956737, + "loss": 2.5325, + "step": 9630 + }, + { + "epoch": 0.7772576870309096, + "grad_norm": 0.7353312373161316, + "learning_rate": 0.00010660988750506355, + "loss": 2.4707, + "step": 9631 + }, + { + "epoch": 0.7773383907674926, + "grad_norm": 0.6810272932052612, + "learning_rate": 0.00010659413504971774, + "loss": 2.5618, + "step": 9632 + }, + { + "epoch": 0.7774190945040755, + "grad_norm": 0.6480081081390381, + "learning_rate": 0.00010657838243002883, + "loss": 2.4543, + "step": 9633 + }, + { + "epoch": 0.7774997982406585, + "grad_norm": 0.6617380976676941, + "learning_rate": 0.00010656262964638942, + "loss": 2.5628, + "step": 9634 + }, + { + "epoch": 0.7775805019772416, + "grad_norm": 0.6761382222175598, + "learning_rate": 0.00010654687669919212, + "loss": 2.5433, + "step": 9635 + }, + { + "epoch": 0.7776612057138246, + "grad_norm": 0.6733867526054382, + "learning_rate": 0.00010653112358882957, + "loss": 2.5282, + "step": 9636 + }, + { + "epoch": 0.7777419094504076, + "grad_norm": 0.6854631304740906, + "learning_rate": 0.00010651537031569433, + "loss": 2.5997, + "step": 9637 + }, + { + "epoch": 0.7778226131869905, + "grad_norm": 0.7451226115226746, + "learning_rate": 0.00010649961688017904, + "loss": 2.5058, + "step": 9638 + }, + { + "epoch": 0.7779033169235735, + "grad_norm": 0.6744229197502136, + "learning_rate": 0.0001064838632826763, + "loss": 2.5962, + "step": 9639 + }, + { + "epoch": 0.7779840206601566, + "grad_norm": 0.7568119764328003, + "learning_rate": 0.00010646810952357873, + "loss": 2.5896, + "step": 9640 + }, + { + "epoch": 0.7780647243967396, + "grad_norm": 0.6860085725784302, + "learning_rate": 0.00010645235560327899, + "loss": 2.5675, + "step": 9641 + }, + { + "epoch": 0.7781454281333225, + "grad_norm": 0.6491742134094238, + "learning_rate": 0.00010643660152216965, + "loss": 2.5374, + "step": 9642 + }, + { + "epoch": 0.7782261318699055, + "grad_norm": 0.6664023399353027, + "learning_rate": 0.0001064208472806434, + "loss": 2.4679, + "step": 9643 + }, + { + "epoch": 0.7783068356064886, + "grad_norm": 0.6595140099525452, + "learning_rate": 0.00010640509287909284, + "loss": 2.5045, + "step": 9644 + }, + { + "epoch": 0.7783875393430716, + "grad_norm": 0.6788576245307922, + "learning_rate": 0.0001063893383179106, + "loss": 2.5706, + "step": 9645 + }, + { + "epoch": 0.7784682430796546, + "grad_norm": 0.6741334199905396, + "learning_rate": 0.00010637358359748939, + "loss": 2.5763, + "step": 9646 + }, + { + "epoch": 0.7785489468162375, + "grad_norm": 0.6837517023086548, + "learning_rate": 0.0001063578287182218, + "loss": 2.5484, + "step": 9647 + }, + { + "epoch": 0.7786296505528206, + "grad_norm": 0.6604229211807251, + "learning_rate": 0.00010634207368050048, + "loss": 2.5465, + "step": 9648 + }, + { + "epoch": 0.7787103542894036, + "grad_norm": 0.6528951525688171, + "learning_rate": 0.00010632631848471813, + "loss": 2.5409, + "step": 9649 + }, + { + "epoch": 0.7787910580259866, + "grad_norm": 0.6615377068519592, + "learning_rate": 0.00010631056313126734, + "loss": 2.5545, + "step": 9650 + }, + { + "epoch": 0.7788717617625696, + "grad_norm": 0.666033923625946, + "learning_rate": 0.00010629480762054089, + "loss": 2.5341, + "step": 9651 + }, + { + "epoch": 0.7789524654991526, + "grad_norm": 0.7022622227668762, + "learning_rate": 0.00010627905195293135, + "loss": 2.5206, + "step": 9652 + }, + { + "epoch": 0.7790331692357356, + "grad_norm": 0.7175850868225098, + "learning_rate": 0.00010626329612883141, + "loss": 2.5912, + "step": 9653 + }, + { + "epoch": 0.7791138729723186, + "grad_norm": 0.6592069268226624, + "learning_rate": 0.00010624754014863379, + "loss": 2.5076, + "step": 9654 + }, + { + "epoch": 0.7791945767089016, + "grad_norm": 0.645893931388855, + "learning_rate": 0.0001062317840127311, + "loss": 2.5124, + "step": 9655 + }, + { + "epoch": 0.7792752804454847, + "grad_norm": 0.6638232469558716, + "learning_rate": 0.00010621602772151607, + "loss": 2.5182, + "step": 9656 + }, + { + "epoch": 0.7793559841820676, + "grad_norm": 0.6718387603759766, + "learning_rate": 0.0001062002712753814, + "loss": 2.4773, + "step": 9657 + }, + { + "epoch": 0.7794366879186506, + "grad_norm": 0.6402876377105713, + "learning_rate": 0.00010618451467471972, + "loss": 2.5557, + "step": 9658 + }, + { + "epoch": 0.7795173916552336, + "grad_norm": 0.6898398399353027, + "learning_rate": 0.00010616875791992382, + "loss": 2.5557, + "step": 9659 + }, + { + "epoch": 0.7795980953918167, + "grad_norm": 0.6718475222587585, + "learning_rate": 0.00010615300101138633, + "loss": 2.5335, + "step": 9660 + }, + { + "epoch": 0.7796787991283997, + "grad_norm": 0.6436911225318909, + "learning_rate": 0.00010613724394949995, + "loss": 2.5214, + "step": 9661 + }, + { + "epoch": 0.7797595028649826, + "grad_norm": 0.7554156184196472, + "learning_rate": 0.00010612148673465743, + "loss": 2.5526, + "step": 9662 + }, + { + "epoch": 0.7798402066015656, + "grad_norm": 0.6728504300117493, + "learning_rate": 0.00010610572936725147, + "loss": 2.5935, + "step": 9663 + }, + { + "epoch": 0.7799209103381487, + "grad_norm": 0.6793323159217834, + "learning_rate": 0.00010608997184767476, + "loss": 2.5515, + "step": 9664 + }, + { + "epoch": 0.7800016140747317, + "grad_norm": 0.7242898941040039, + "learning_rate": 0.00010607421417631999, + "loss": 2.5332, + "step": 9665 + }, + { + "epoch": 0.7800823178113147, + "grad_norm": 0.6719244718551636, + "learning_rate": 0.00010605845635357996, + "loss": 2.5191, + "step": 9666 + }, + { + "epoch": 0.7801630215478976, + "grad_norm": 0.6836631894111633, + "learning_rate": 0.00010604269837984737, + "loss": 2.6489, + "step": 9667 + }, + { + "epoch": 0.7802437252844807, + "grad_norm": 0.6833824515342712, + "learning_rate": 0.00010602694025551496, + "loss": 2.4906, + "step": 9668 + }, + { + "epoch": 0.7803244290210637, + "grad_norm": 0.7449159026145935, + "learning_rate": 0.0001060111819809754, + "loss": 2.5301, + "step": 9669 + }, + { + "epoch": 0.7804051327576467, + "grad_norm": 0.7149158120155334, + "learning_rate": 0.00010599542355662149, + "loss": 2.5097, + "step": 9670 + }, + { + "epoch": 0.7804858364942296, + "grad_norm": 0.6616973876953125, + "learning_rate": 0.00010597966498284595, + "loss": 2.5928, + "step": 9671 + }, + { + "epoch": 0.7805665402308127, + "grad_norm": 0.6556531190872192, + "learning_rate": 0.00010596390626004154, + "loss": 2.5543, + "step": 9672 + }, + { + "epoch": 0.7806472439673957, + "grad_norm": 0.6585283875465393, + "learning_rate": 0.000105948147388601, + "loss": 2.5244, + "step": 9673 + }, + { + "epoch": 0.7807279477039787, + "grad_norm": 0.6484133005142212, + "learning_rate": 0.00010593238836891704, + "loss": 2.4996, + "step": 9674 + }, + { + "epoch": 0.7808086514405617, + "grad_norm": 0.6681119799613953, + "learning_rate": 0.00010591662920138248, + "loss": 2.5322, + "step": 9675 + }, + { + "epoch": 0.7808893551771448, + "grad_norm": 0.709403395652771, + "learning_rate": 0.00010590086988639005, + "loss": 2.5554, + "step": 9676 + }, + { + "epoch": 0.7809700589137277, + "grad_norm": 0.6734669804573059, + "learning_rate": 0.00010588511042433251, + "loss": 2.5452, + "step": 9677 + }, + { + "epoch": 0.7810507626503107, + "grad_norm": 0.6800141930580139, + "learning_rate": 0.00010586935081560268, + "loss": 2.5154, + "step": 9678 + }, + { + "epoch": 0.7811314663868937, + "grad_norm": 0.7757244110107422, + "learning_rate": 0.00010585359106059326, + "loss": 2.5935, + "step": 9679 + }, + { + "epoch": 0.7812121701234768, + "grad_norm": 0.7288491725921631, + "learning_rate": 0.00010583783115969699, + "loss": 2.5276, + "step": 9680 + }, + { + "epoch": 0.7812928738600597, + "grad_norm": 0.6785164475440979, + "learning_rate": 0.00010582207111330678, + "loss": 2.5907, + "step": 9681 + }, + { + "epoch": 0.7813735775966427, + "grad_norm": 0.6651367545127869, + "learning_rate": 0.0001058063109218153, + "loss": 2.545, + "step": 9682 + }, + { + "epoch": 0.7814542813332257, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.0001057905505856154, + "loss": 2.5548, + "step": 9683 + }, + { + "epoch": 0.7815349850698088, + "grad_norm": 0.6486692428588867, + "learning_rate": 0.00010577479010509986, + "loss": 2.5589, + "step": 9684 + }, + { + "epoch": 0.7816156888063918, + "grad_norm": 0.700749397277832, + "learning_rate": 0.0001057590294806614, + "loss": 2.6008, + "step": 9685 + }, + { + "epoch": 0.7816963925429747, + "grad_norm": 0.647051215171814, + "learning_rate": 0.00010574326871269289, + "loss": 2.4894, + "step": 9686 + }, + { + "epoch": 0.7817770962795577, + "grad_norm": 0.6932066679000854, + "learning_rate": 0.00010572750780158713, + "loss": 2.5256, + "step": 9687 + }, + { + "epoch": 0.7818578000161408, + "grad_norm": 0.6330733895301819, + "learning_rate": 0.00010571174674773689, + "loss": 2.5242, + "step": 9688 + }, + { + "epoch": 0.7819385037527238, + "grad_norm": 0.6476379036903381, + "learning_rate": 0.00010569598555153499, + "loss": 2.552, + "step": 9689 + }, + { + "epoch": 0.7820192074893068, + "grad_norm": 0.661204993724823, + "learning_rate": 0.00010568022421337424, + "loss": 2.4869, + "step": 9690 + }, + { + "epoch": 0.7820999112258897, + "grad_norm": 0.6663263440132141, + "learning_rate": 0.00010566446273364746, + "loss": 2.5134, + "step": 9691 + }, + { + "epoch": 0.7821806149624727, + "grad_norm": 0.6982834339141846, + "learning_rate": 0.00010564870111274748, + "loss": 2.5755, + "step": 9692 + }, + { + "epoch": 0.7822613186990558, + "grad_norm": 0.6266167759895325, + "learning_rate": 0.00010563293935106706, + "loss": 2.5413, + "step": 9693 + }, + { + "epoch": 0.7823420224356388, + "grad_norm": 0.6484279632568359, + "learning_rate": 0.0001056171774489991, + "loss": 2.5579, + "step": 9694 + }, + { + "epoch": 0.7824227261722217, + "grad_norm": 0.674933910369873, + "learning_rate": 0.00010560141540693638, + "loss": 2.5364, + "step": 9695 + }, + { + "epoch": 0.7825034299088047, + "grad_norm": 0.7961840033531189, + "learning_rate": 0.00010558565322527174, + "loss": 2.5143, + "step": 9696 + }, + { + "epoch": 0.7825841336453878, + "grad_norm": 0.697158694267273, + "learning_rate": 0.00010556989090439804, + "loss": 2.5341, + "step": 9697 + }, + { + "epoch": 0.7826648373819708, + "grad_norm": 0.6912708282470703, + "learning_rate": 0.00010555412844470806, + "loss": 2.5331, + "step": 9698 + }, + { + "epoch": 0.7827455411185538, + "grad_norm": 0.7078350186347961, + "learning_rate": 0.00010553836584659474, + "loss": 2.5752, + "step": 9699 + }, + { + "epoch": 0.7828262448551367, + "grad_norm": 0.6421065926551819, + "learning_rate": 0.00010552260311045082, + "loss": 2.5393, + "step": 9700 + }, + { + "epoch": 0.7829069485917198, + "grad_norm": 0.644120454788208, + "learning_rate": 0.00010550684023666918, + "loss": 2.5062, + "step": 9701 + }, + { + "epoch": 0.7829876523283028, + "grad_norm": 0.7038589715957642, + "learning_rate": 0.00010549107722564275, + "loss": 2.6074, + "step": 9702 + }, + { + "epoch": 0.7830683560648858, + "grad_norm": 0.6692953109741211, + "learning_rate": 0.00010547531407776427, + "loss": 2.5801, + "step": 9703 + }, + { + "epoch": 0.7831490598014688, + "grad_norm": 0.7059200406074524, + "learning_rate": 0.00010545955079342669, + "loss": 2.5579, + "step": 9704 + }, + { + "epoch": 0.7832297635380518, + "grad_norm": 0.7126718759536743, + "learning_rate": 0.0001054437873730228, + "loss": 2.5764, + "step": 9705 + }, + { + "epoch": 0.7833104672746348, + "grad_norm": 0.696784257888794, + "learning_rate": 0.0001054280238169455, + "loss": 2.5256, + "step": 9706 + }, + { + "epoch": 0.7833911710112178, + "grad_norm": 0.7473082542419434, + "learning_rate": 0.00010541226012558767, + "loss": 2.5983, + "step": 9707 + }, + { + "epoch": 0.7834718747478008, + "grad_norm": 0.6598967909812927, + "learning_rate": 0.00010539649629934219, + "loss": 2.5267, + "step": 9708 + }, + { + "epoch": 0.7835525784843839, + "grad_norm": 0.7168934345245361, + "learning_rate": 0.00010538073233860188, + "loss": 2.5278, + "step": 9709 + }, + { + "epoch": 0.7836332822209668, + "grad_norm": 0.6848951578140259, + "learning_rate": 0.00010536496824375968, + "loss": 2.5267, + "step": 9710 + }, + { + "epoch": 0.7837139859575498, + "grad_norm": 0.7276272773742676, + "learning_rate": 0.0001053492040152084, + "loss": 2.5706, + "step": 9711 + }, + { + "epoch": 0.7837946896941328, + "grad_norm": 0.6929399371147156, + "learning_rate": 0.00010533343965334101, + "loss": 2.5184, + "step": 9712 + }, + { + "epoch": 0.7838753934307159, + "grad_norm": 0.7497181296348572, + "learning_rate": 0.00010531767515855037, + "loss": 2.5626, + "step": 9713 + }, + { + "epoch": 0.7839560971672989, + "grad_norm": 0.6536200046539307, + "learning_rate": 0.00010530191053122935, + "loss": 2.5909, + "step": 9714 + }, + { + "epoch": 0.7840368009038818, + "grad_norm": 0.6750395894050598, + "learning_rate": 0.00010528614577177087, + "loss": 2.5119, + "step": 9715 + }, + { + "epoch": 0.7841175046404648, + "grad_norm": 0.6284878849983215, + "learning_rate": 0.00010527038088056782, + "loss": 2.5417, + "step": 9716 + }, + { + "epoch": 0.7841982083770479, + "grad_norm": 0.6529444456100464, + "learning_rate": 0.00010525461585801308, + "loss": 2.5865, + "step": 9717 + }, + { + "epoch": 0.7842789121136309, + "grad_norm": 0.7332968711853027, + "learning_rate": 0.00010523885070449959, + "loss": 2.561, + "step": 9718 + }, + { + "epoch": 0.7843596158502139, + "grad_norm": 0.7054178714752197, + "learning_rate": 0.00010522308542042025, + "loss": 2.623, + "step": 9719 + }, + { + "epoch": 0.7844403195867968, + "grad_norm": 0.6837820410728455, + "learning_rate": 0.00010520732000616798, + "loss": 2.5586, + "step": 9720 + }, + { + "epoch": 0.7845210233233799, + "grad_norm": 0.7339439392089844, + "learning_rate": 0.00010519155446213565, + "loss": 2.5374, + "step": 9721 + }, + { + "epoch": 0.7846017270599629, + "grad_norm": 0.7625028491020203, + "learning_rate": 0.00010517578878871624, + "loss": 2.5663, + "step": 9722 + }, + { + "epoch": 0.7846824307965459, + "grad_norm": 0.6749752759933472, + "learning_rate": 0.00010516002298630263, + "loss": 2.5744, + "step": 9723 + }, + { + "epoch": 0.7847631345331288, + "grad_norm": 0.6702882647514343, + "learning_rate": 0.00010514425705528776, + "loss": 2.6247, + "step": 9724 + }, + { + "epoch": 0.7848438382697119, + "grad_norm": 0.6641737222671509, + "learning_rate": 0.00010512849099606457, + "loss": 2.5792, + "step": 9725 + }, + { + "epoch": 0.7849245420062949, + "grad_norm": 0.7522993683815002, + "learning_rate": 0.00010511272480902597, + "loss": 2.5941, + "step": 9726 + }, + { + "epoch": 0.7850052457428779, + "grad_norm": 0.7507709860801697, + "learning_rate": 0.00010509695849456487, + "loss": 2.5312, + "step": 9727 + }, + { + "epoch": 0.7850859494794609, + "grad_norm": 0.7101978063583374, + "learning_rate": 0.0001050811920530743, + "loss": 2.5833, + "step": 9728 + }, + { + "epoch": 0.785166653216044, + "grad_norm": 0.6814672946929932, + "learning_rate": 0.0001050654254849471, + "loss": 2.5466, + "step": 9729 + }, + { + "epoch": 0.7852473569526269, + "grad_norm": 0.7250106930732727, + "learning_rate": 0.0001050496587905763, + "loss": 2.5144, + "step": 9730 + }, + { + "epoch": 0.7853280606892099, + "grad_norm": 0.7125658392906189, + "learning_rate": 0.00010503389197035474, + "loss": 2.5384, + "step": 9731 + }, + { + "epoch": 0.7854087644257929, + "grad_norm": 0.7076827883720398, + "learning_rate": 0.00010501812502467547, + "loss": 2.4879, + "step": 9732 + }, + { + "epoch": 0.785489468162376, + "grad_norm": 0.632216215133667, + "learning_rate": 0.00010500235795393141, + "loss": 2.5678, + "step": 9733 + }, + { + "epoch": 0.785570171898959, + "grad_norm": 0.7376949191093445, + "learning_rate": 0.00010498659075851551, + "loss": 2.5024, + "step": 9734 + }, + { + "epoch": 0.7856508756355419, + "grad_norm": 0.6730546951293945, + "learning_rate": 0.00010497082343882072, + "loss": 2.5001, + "step": 9735 + }, + { + "epoch": 0.7857315793721249, + "grad_norm": 0.6958187818527222, + "learning_rate": 0.00010495505599524002, + "loss": 2.538, + "step": 9736 + }, + { + "epoch": 0.785812283108708, + "grad_norm": 0.6882508397102356, + "learning_rate": 0.00010493928842816638, + "loss": 2.5247, + "step": 9737 + }, + { + "epoch": 0.785892986845291, + "grad_norm": 0.711086630821228, + "learning_rate": 0.00010492352073799276, + "loss": 2.5721, + "step": 9738 + }, + { + "epoch": 0.7859736905818739, + "grad_norm": 0.7217094898223877, + "learning_rate": 0.00010490775292511214, + "loss": 2.5827, + "step": 9739 + }, + { + "epoch": 0.7860543943184569, + "grad_norm": 0.6812087893486023, + "learning_rate": 0.0001048919849899175, + "loss": 2.532, + "step": 9740 + }, + { + "epoch": 0.7861350980550399, + "grad_norm": 0.7449110150337219, + "learning_rate": 0.00010487621693280176, + "loss": 2.5611, + "step": 9741 + }, + { + "epoch": 0.786215801791623, + "grad_norm": 0.7297104001045227, + "learning_rate": 0.00010486044875415797, + "loss": 2.5173, + "step": 9742 + }, + { + "epoch": 0.786296505528206, + "grad_norm": 0.6741474270820618, + "learning_rate": 0.0001048446804543791, + "loss": 2.5451, + "step": 9743 + }, + { + "epoch": 0.7863772092647889, + "grad_norm": 0.6450859308242798, + "learning_rate": 0.00010482891203385812, + "loss": 2.551, + "step": 9744 + }, + { + "epoch": 0.7864579130013719, + "grad_norm": 0.6867123246192932, + "learning_rate": 0.00010481314349298805, + "loss": 2.4875, + "step": 9745 + }, + { + "epoch": 0.786538616737955, + "grad_norm": 0.6951552629470825, + "learning_rate": 0.00010479737483216183, + "loss": 2.6253, + "step": 9746 + }, + { + "epoch": 0.786619320474538, + "grad_norm": 0.6786869764328003, + "learning_rate": 0.0001047816060517725, + "loss": 2.5551, + "step": 9747 + }, + { + "epoch": 0.786700024211121, + "grad_norm": 0.698957622051239, + "learning_rate": 0.00010476583715221306, + "loss": 2.5554, + "step": 9748 + }, + { + "epoch": 0.7867807279477039, + "grad_norm": 0.6407502889633179, + "learning_rate": 0.00010475006813387648, + "loss": 2.5112, + "step": 9749 + }, + { + "epoch": 0.786861431684287, + "grad_norm": 0.660418689250946, + "learning_rate": 0.00010473429899715581, + "loss": 2.5557, + "step": 9750 + }, + { + "epoch": 0.78694213542087, + "grad_norm": 0.71445631980896, + "learning_rate": 0.00010471852974244403, + "loss": 2.5169, + "step": 9751 + }, + { + "epoch": 0.787022839157453, + "grad_norm": 0.6620494723320007, + "learning_rate": 0.00010470276037013414, + "loss": 2.5517, + "step": 9752 + }, + { + "epoch": 0.787103542894036, + "grad_norm": 0.6921235918998718, + "learning_rate": 0.00010468699088061917, + "loss": 2.5246, + "step": 9753 + }, + { + "epoch": 0.787184246630619, + "grad_norm": 0.6617140769958496, + "learning_rate": 0.00010467122127429214, + "loss": 2.4941, + "step": 9754 + }, + { + "epoch": 0.787264950367202, + "grad_norm": 0.6549816727638245, + "learning_rate": 0.00010465545155154608, + "loss": 2.5189, + "step": 9755 + }, + { + "epoch": 0.787345654103785, + "grad_norm": 0.7030060887336731, + "learning_rate": 0.00010463968171277396, + "loss": 2.5058, + "step": 9756 + }, + { + "epoch": 0.787426357840368, + "grad_norm": 0.7294049859046936, + "learning_rate": 0.00010462391175836886, + "loss": 2.5166, + "step": 9757 + }, + { + "epoch": 0.787507061576951, + "grad_norm": 0.6407562494277954, + "learning_rate": 0.00010460814168872382, + "loss": 2.5391, + "step": 9758 + }, + { + "epoch": 0.787587765313534, + "grad_norm": 0.8024646639823914, + "learning_rate": 0.0001045923715042318, + "loss": 2.7034, + "step": 9759 + }, + { + "epoch": 0.787668469050117, + "grad_norm": 0.7160943150520325, + "learning_rate": 0.00010457660120528592, + "loss": 2.6016, + "step": 9760 + }, + { + "epoch": 0.7877491727867, + "grad_norm": 0.6987707018852234, + "learning_rate": 0.00010456083079227916, + "loss": 2.5428, + "step": 9761 + }, + { + "epoch": 0.7878298765232831, + "grad_norm": 0.7235369086265564, + "learning_rate": 0.00010454506026560453, + "loss": 2.517, + "step": 9762 + }, + { + "epoch": 0.787910580259866, + "grad_norm": 0.6827502846717834, + "learning_rate": 0.00010452928962565518, + "loss": 2.5777, + "step": 9763 + }, + { + "epoch": 0.787991283996449, + "grad_norm": 0.71755450963974, + "learning_rate": 0.00010451351887282408, + "loss": 2.6004, + "step": 9764 + }, + { + "epoch": 0.788071987733032, + "grad_norm": 0.6988046765327454, + "learning_rate": 0.00010449774800750427, + "loss": 2.6116, + "step": 9765 + }, + { + "epoch": 0.7881526914696151, + "grad_norm": 0.6959548592567444, + "learning_rate": 0.00010448197703008884, + "loss": 2.5856, + "step": 9766 + }, + { + "epoch": 0.7882333952061981, + "grad_norm": 0.687042772769928, + "learning_rate": 0.00010446620594097079, + "loss": 2.5167, + "step": 9767 + }, + { + "epoch": 0.788314098942781, + "grad_norm": 0.6950173377990723, + "learning_rate": 0.00010445043474054325, + "loss": 2.5157, + "step": 9768 + }, + { + "epoch": 0.788394802679364, + "grad_norm": 0.680768609046936, + "learning_rate": 0.00010443466342919926, + "loss": 2.6177, + "step": 9769 + }, + { + "epoch": 0.7884755064159471, + "grad_norm": 0.7790142893791199, + "learning_rate": 0.00010441889200733181, + "loss": 2.5761, + "step": 9770 + }, + { + "epoch": 0.7885562101525301, + "grad_norm": 0.6207798719406128, + "learning_rate": 0.00010440312047533406, + "loss": 2.5305, + "step": 9771 + }, + { + "epoch": 0.7886369138891131, + "grad_norm": 0.7143635749816895, + "learning_rate": 0.00010438734883359903, + "loss": 2.5922, + "step": 9772 + }, + { + "epoch": 0.788717617625696, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00010437157708251977, + "loss": 2.6051, + "step": 9773 + }, + { + "epoch": 0.7887983213622791, + "grad_norm": 0.6602753400802612, + "learning_rate": 0.00010435580522248942, + "loss": 2.6002, + "step": 9774 + }, + { + "epoch": 0.7888790250988621, + "grad_norm": 0.6929246783256531, + "learning_rate": 0.00010434003325390101, + "loss": 2.5798, + "step": 9775 + }, + { + "epoch": 0.7889597288354451, + "grad_norm": 0.7355811595916748, + "learning_rate": 0.00010432426117714762, + "loss": 2.5859, + "step": 9776 + }, + { + "epoch": 0.789040432572028, + "grad_norm": 0.7009611129760742, + "learning_rate": 0.00010430848899262233, + "loss": 2.5535, + "step": 9777 + }, + { + "epoch": 0.7891211363086111, + "grad_norm": 0.6699070930480957, + "learning_rate": 0.00010429271670071823, + "loss": 2.5687, + "step": 9778 + }, + { + "epoch": 0.7892018400451941, + "grad_norm": 0.6632630228996277, + "learning_rate": 0.00010427694430182844, + "loss": 2.5359, + "step": 9779 + }, + { + "epoch": 0.7892825437817771, + "grad_norm": 0.7256911993026733, + "learning_rate": 0.000104261171796346, + "loss": 2.5432, + "step": 9780 + }, + { + "epoch": 0.7893632475183601, + "grad_norm": 0.6654312610626221, + "learning_rate": 0.000104245399184664, + "loss": 2.5432, + "step": 9781 + }, + { + "epoch": 0.7894439512549432, + "grad_norm": 0.6808900237083435, + "learning_rate": 0.00010422962646717557, + "loss": 2.4951, + "step": 9782 + }, + { + "epoch": 0.7895246549915261, + "grad_norm": 0.6655945181846619, + "learning_rate": 0.00010421385364427378, + "loss": 2.5152, + "step": 9783 + }, + { + "epoch": 0.7896053587281091, + "grad_norm": 0.8399274349212646, + "learning_rate": 0.00010419808071635178, + "loss": 2.5688, + "step": 9784 + }, + { + "epoch": 0.7896860624646921, + "grad_norm": 0.6412226557731628, + "learning_rate": 0.00010418230768380262, + "loss": 2.5527, + "step": 9785 + }, + { + "epoch": 0.7897667662012752, + "grad_norm": 0.6505058407783508, + "learning_rate": 0.0001041665345470194, + "loss": 2.5768, + "step": 9786 + }, + { + "epoch": 0.7898474699378581, + "grad_norm": 0.6297653317451477, + "learning_rate": 0.00010415076130639526, + "loss": 2.5372, + "step": 9787 + }, + { + "epoch": 0.7899281736744411, + "grad_norm": 0.6524460315704346, + "learning_rate": 0.00010413498796232331, + "loss": 2.5047, + "step": 9788 + }, + { + "epoch": 0.7900088774110241, + "grad_norm": 0.6637924313545227, + "learning_rate": 0.00010411921451519662, + "loss": 2.508, + "step": 9789 + }, + { + "epoch": 0.7900895811476072, + "grad_norm": 0.6423435211181641, + "learning_rate": 0.00010410344096540836, + "loss": 2.4597, + "step": 9790 + }, + { + "epoch": 0.7901702848841902, + "grad_norm": 0.6361977458000183, + "learning_rate": 0.00010408766731335163, + "loss": 2.5921, + "step": 9791 + }, + { + "epoch": 0.7902509886207731, + "grad_norm": 0.6792182922363281, + "learning_rate": 0.00010407189355941953, + "loss": 2.5543, + "step": 9792 + }, + { + "epoch": 0.7903316923573561, + "grad_norm": 0.6998419761657715, + "learning_rate": 0.00010405611970400519, + "loss": 2.5333, + "step": 9793 + }, + { + "epoch": 0.7904123960939391, + "grad_norm": 0.6730015873908997, + "learning_rate": 0.00010404034574750174, + "loss": 2.596, + "step": 9794 + }, + { + "epoch": 0.7904930998305222, + "grad_norm": 0.7120258808135986, + "learning_rate": 0.00010402457169030235, + "loss": 2.5314, + "step": 9795 + }, + { + "epoch": 0.7905738035671052, + "grad_norm": 0.6553651690483093, + "learning_rate": 0.0001040087975328001, + "loss": 2.4973, + "step": 9796 + }, + { + "epoch": 0.7906545073036881, + "grad_norm": 0.6506681442260742, + "learning_rate": 0.00010399302327538812, + "loss": 2.588, + "step": 9797 + }, + { + "epoch": 0.7907352110402711, + "grad_norm": 0.6737257242202759, + "learning_rate": 0.00010397724891845957, + "loss": 2.5454, + "step": 9798 + }, + { + "epoch": 0.7908159147768542, + "grad_norm": 0.670120894908905, + "learning_rate": 0.00010396147446240756, + "loss": 2.4926, + "step": 9799 + }, + { + "epoch": 0.7908966185134372, + "grad_norm": 0.7028468251228333, + "learning_rate": 0.00010394569990762529, + "loss": 2.5727, + "step": 9800 + }, + { + "epoch": 0.7909773222500202, + "grad_norm": 0.7084455490112305, + "learning_rate": 0.00010392992525450584, + "loss": 2.547, + "step": 9801 + }, + { + "epoch": 0.7910580259866031, + "grad_norm": 0.732694685459137, + "learning_rate": 0.0001039141505034424, + "loss": 2.5871, + "step": 9802 + }, + { + "epoch": 0.7911387297231862, + "grad_norm": 0.7214515209197998, + "learning_rate": 0.00010389837565482807, + "loss": 2.5672, + "step": 9803 + }, + { + "epoch": 0.7912194334597692, + "grad_norm": 0.6495330333709717, + "learning_rate": 0.00010388260070905604, + "loss": 2.5266, + "step": 9804 + }, + { + "epoch": 0.7913001371963522, + "grad_norm": 0.6930941343307495, + "learning_rate": 0.00010386682566651945, + "loss": 2.5734, + "step": 9805 + }, + { + "epoch": 0.7913808409329351, + "grad_norm": 0.714214563369751, + "learning_rate": 0.00010385105052761148, + "loss": 2.4987, + "step": 9806 + }, + { + "epoch": 0.7914615446695182, + "grad_norm": 0.7525388598442078, + "learning_rate": 0.00010383527529272523, + "loss": 2.5427, + "step": 9807 + }, + { + "epoch": 0.7915422484061012, + "grad_norm": 0.6088642477989197, + "learning_rate": 0.00010381949996225389, + "loss": 2.5018, + "step": 9808 + }, + { + "epoch": 0.7916229521426842, + "grad_norm": 0.6797540187835693, + "learning_rate": 0.00010380372453659066, + "loss": 2.5235, + "step": 9809 + }, + { + "epoch": 0.7917036558792672, + "grad_norm": 0.6754054427146912, + "learning_rate": 0.00010378794901612865, + "loss": 2.5343, + "step": 9810 + }, + { + "epoch": 0.7917843596158503, + "grad_norm": 0.7375015020370483, + "learning_rate": 0.00010377217340126106, + "loss": 2.6101, + "step": 9811 + }, + { + "epoch": 0.7918650633524332, + "grad_norm": 0.6487904191017151, + "learning_rate": 0.00010375639769238103, + "loss": 2.5408, + "step": 9812 + }, + { + "epoch": 0.7919457670890162, + "grad_norm": 0.7280275821685791, + "learning_rate": 0.00010374062188988176, + "loss": 2.5503, + "step": 9813 + }, + { + "epoch": 0.7920264708255992, + "grad_norm": 0.6944922208786011, + "learning_rate": 0.00010372484599415644, + "loss": 2.5815, + "step": 9814 + }, + { + "epoch": 0.7921071745621823, + "grad_norm": 0.6970139741897583, + "learning_rate": 0.00010370907000559818, + "loss": 2.546, + "step": 9815 + }, + { + "epoch": 0.7921878782987652, + "grad_norm": 0.7338151335716248, + "learning_rate": 0.00010369329392460023, + "loss": 2.5449, + "step": 9816 + }, + { + "epoch": 0.7922685820353482, + "grad_norm": 0.7763465642929077, + "learning_rate": 0.00010367751775155574, + "loss": 2.5331, + "step": 9817 + }, + { + "epoch": 0.7923492857719312, + "grad_norm": 0.6892645955085754, + "learning_rate": 0.00010366174148685786, + "loss": 2.5617, + "step": 9818 + }, + { + "epoch": 0.7924299895085143, + "grad_norm": 0.7388250231742859, + "learning_rate": 0.00010364596513089984, + "loss": 2.5236, + "step": 9819 + }, + { + "epoch": 0.7925106932450973, + "grad_norm": 0.7035132646560669, + "learning_rate": 0.00010363018868407482, + "loss": 2.5711, + "step": 9820 + }, + { + "epoch": 0.7925913969816802, + "grad_norm": 0.7087043523788452, + "learning_rate": 0.00010361441214677603, + "loss": 2.5416, + "step": 9821 + }, + { + "epoch": 0.7926721007182632, + "grad_norm": 0.7173168063163757, + "learning_rate": 0.00010359863551939664, + "loss": 2.529, + "step": 9822 + }, + { + "epoch": 0.7927528044548463, + "grad_norm": 0.7007408738136292, + "learning_rate": 0.00010358285880232983, + "loss": 2.5287, + "step": 9823 + }, + { + "epoch": 0.7928335081914293, + "grad_norm": 0.7731965780258179, + "learning_rate": 0.0001035670819959688, + "loss": 2.5913, + "step": 9824 + }, + { + "epoch": 0.7929142119280123, + "grad_norm": 0.6625120639801025, + "learning_rate": 0.00010355130510070681, + "loss": 2.5815, + "step": 9825 + }, + { + "epoch": 0.7929949156645952, + "grad_norm": 0.6628395318984985, + "learning_rate": 0.00010353552811693699, + "loss": 2.512, + "step": 9826 + }, + { + "epoch": 0.7930756194011783, + "grad_norm": 0.6565915942192078, + "learning_rate": 0.00010351975104505256, + "loss": 2.54, + "step": 9827 + }, + { + "epoch": 0.7931563231377613, + "grad_norm": 0.6581636667251587, + "learning_rate": 0.00010350397388544672, + "loss": 2.5462, + "step": 9828 + }, + { + "epoch": 0.7932370268743443, + "grad_norm": 0.705668568611145, + "learning_rate": 0.0001034881966385127, + "loss": 2.5241, + "step": 9829 + }, + { + "epoch": 0.7933177306109273, + "grad_norm": 0.7047126293182373, + "learning_rate": 0.00010347241930464373, + "loss": 2.5275, + "step": 9830 + }, + { + "epoch": 0.7933984343475103, + "grad_norm": 0.6285849213600159, + "learning_rate": 0.00010345664188423296, + "loss": 2.518, + "step": 9831 + }, + { + "epoch": 0.7934791380840933, + "grad_norm": 0.697542130947113, + "learning_rate": 0.00010344086437767366, + "loss": 2.5219, + "step": 9832 + }, + { + "epoch": 0.7935598418206763, + "grad_norm": 0.6349283456802368, + "learning_rate": 0.00010342508678535903, + "loss": 2.5277, + "step": 9833 + }, + { + "epoch": 0.7936405455572593, + "grad_norm": 0.7084335088729858, + "learning_rate": 0.00010340930910768225, + "loss": 2.476, + "step": 9834 + }, + { + "epoch": 0.7937212492938424, + "grad_norm": 0.6714156866073608, + "learning_rate": 0.00010339353134503662, + "loss": 2.556, + "step": 9835 + }, + { + "epoch": 0.7938019530304253, + "grad_norm": 0.6687895059585571, + "learning_rate": 0.00010337775349781527, + "loss": 2.5756, + "step": 9836 + }, + { + "epoch": 0.7938826567670083, + "grad_norm": 0.669784665107727, + "learning_rate": 0.00010336197556641152, + "loss": 2.5545, + "step": 9837 + }, + { + "epoch": 0.7939633605035913, + "grad_norm": 0.6738600134849548, + "learning_rate": 0.0001033461975512185, + "loss": 2.5807, + "step": 9838 + }, + { + "epoch": 0.7940440642401744, + "grad_norm": 0.691443681716919, + "learning_rate": 0.00010333041945262953, + "loss": 2.5279, + "step": 9839 + }, + { + "epoch": 0.7941247679767574, + "grad_norm": 0.6283861398696899, + "learning_rate": 0.0001033146412710378, + "loss": 2.5355, + "step": 9840 + }, + { + "epoch": 0.7942054717133403, + "grad_norm": 0.6491204500198364, + "learning_rate": 0.00010329886300683655, + "loss": 2.5431, + "step": 9841 + }, + { + "epoch": 0.7942861754499233, + "grad_norm": 0.6673988103866577, + "learning_rate": 0.00010328308466041898, + "loss": 2.5845, + "step": 9842 + }, + { + "epoch": 0.7943668791865063, + "grad_norm": 0.6669130325317383, + "learning_rate": 0.00010326730623217837, + "loss": 2.5348, + "step": 9843 + }, + { + "epoch": 0.7944475829230894, + "grad_norm": 0.7003189921379089, + "learning_rate": 0.00010325152772250795, + "loss": 2.5779, + "step": 9844 + }, + { + "epoch": 0.7945282866596723, + "grad_norm": 0.6602177619934082, + "learning_rate": 0.00010323574913180097, + "loss": 2.5527, + "step": 9845 + }, + { + "epoch": 0.7946089903962553, + "grad_norm": 0.7053726315498352, + "learning_rate": 0.00010321997046045066, + "loss": 2.566, + "step": 9846 + }, + { + "epoch": 0.7946896941328383, + "grad_norm": 0.7428076863288879, + "learning_rate": 0.00010320419170885025, + "loss": 2.5348, + "step": 9847 + }, + { + "epoch": 0.7947703978694214, + "grad_norm": 0.7029163837432861, + "learning_rate": 0.00010318841287739303, + "loss": 2.5387, + "step": 9848 + }, + { + "epoch": 0.7948511016060044, + "grad_norm": 0.6159133911132812, + "learning_rate": 0.00010317263396647221, + "loss": 2.5408, + "step": 9849 + }, + { + "epoch": 0.7949318053425873, + "grad_norm": 0.6748857498168945, + "learning_rate": 0.00010315685497648106, + "loss": 2.5299, + "step": 9850 + }, + { + "epoch": 0.7950125090791703, + "grad_norm": 0.6281898021697998, + "learning_rate": 0.00010314107590781284, + "loss": 2.5202, + "step": 9851 + }, + { + "epoch": 0.7950932128157534, + "grad_norm": 0.6602163910865784, + "learning_rate": 0.00010312529676086078, + "loss": 2.5119, + "step": 9852 + }, + { + "epoch": 0.7951739165523364, + "grad_norm": 0.6665403246879578, + "learning_rate": 0.00010310951753601818, + "loss": 2.5913, + "step": 9853 + }, + { + "epoch": 0.7952546202889194, + "grad_norm": 0.6705873012542725, + "learning_rate": 0.00010309373823367827, + "loss": 2.6039, + "step": 9854 + }, + { + "epoch": 0.7953353240255023, + "grad_norm": 0.6571313738822937, + "learning_rate": 0.0001030779588542343, + "loss": 2.5629, + "step": 9855 + }, + { + "epoch": 0.7954160277620854, + "grad_norm": 0.6597230434417725, + "learning_rate": 0.00010306217939807956, + "loss": 2.5569, + "step": 9856 + }, + { + "epoch": 0.7954967314986684, + "grad_norm": 0.7098817229270935, + "learning_rate": 0.00010304639986560733, + "loss": 2.4736, + "step": 9857 + }, + { + "epoch": 0.7955774352352514, + "grad_norm": 0.628663957118988, + "learning_rate": 0.00010303062025721082, + "loss": 2.5241, + "step": 9858 + }, + { + "epoch": 0.7956581389718343, + "grad_norm": 0.630843460559845, + "learning_rate": 0.00010301484057328333, + "loss": 2.5604, + "step": 9859 + }, + { + "epoch": 0.7957388427084174, + "grad_norm": 0.7457596659660339, + "learning_rate": 0.00010299906081421813, + "loss": 2.5675, + "step": 9860 + }, + { + "epoch": 0.7958195464450004, + "grad_norm": 0.6566091775894165, + "learning_rate": 0.00010298328098040851, + "loss": 2.4918, + "step": 9861 + }, + { + "epoch": 0.7959002501815834, + "grad_norm": 0.657357931137085, + "learning_rate": 0.00010296750107224773, + "loss": 2.5268, + "step": 9862 + }, + { + "epoch": 0.7959809539181664, + "grad_norm": 0.7021927833557129, + "learning_rate": 0.00010295172109012905, + "loss": 2.528, + "step": 9863 + }, + { + "epoch": 0.7960616576547495, + "grad_norm": 0.662053108215332, + "learning_rate": 0.00010293594103444578, + "loss": 2.5483, + "step": 9864 + }, + { + "epoch": 0.7961423613913324, + "grad_norm": 0.776407778263092, + "learning_rate": 0.00010292016090559118, + "loss": 2.6089, + "step": 9865 + }, + { + "epoch": 0.7962230651279154, + "grad_norm": 0.6499512791633606, + "learning_rate": 0.00010290438070395854, + "loss": 2.5609, + "step": 9866 + }, + { + "epoch": 0.7963037688644984, + "grad_norm": 0.6802246570587158, + "learning_rate": 0.00010288860042994113, + "loss": 2.5217, + "step": 9867 + }, + { + "epoch": 0.7963844726010815, + "grad_norm": 0.6371235847473145, + "learning_rate": 0.00010287282008393224, + "loss": 2.4783, + "step": 9868 + }, + { + "epoch": 0.7964651763376644, + "grad_norm": 0.7070169448852539, + "learning_rate": 0.00010285703966632518, + "loss": 2.5006, + "step": 9869 + }, + { + "epoch": 0.7965458800742474, + "grad_norm": 0.657738208770752, + "learning_rate": 0.00010284125917751323, + "loss": 2.551, + "step": 9870 + }, + { + "epoch": 0.7966265838108304, + "grad_norm": 0.7936853170394897, + "learning_rate": 0.00010282547861788964, + "loss": 2.574, + "step": 9871 + }, + { + "epoch": 0.7967072875474135, + "grad_norm": 0.675715982913971, + "learning_rate": 0.00010280969798784779, + "loss": 2.5288, + "step": 9872 + }, + { + "epoch": 0.7967879912839965, + "grad_norm": 0.6980394124984741, + "learning_rate": 0.00010279391728778092, + "loss": 2.5437, + "step": 9873 + }, + { + "epoch": 0.7968686950205794, + "grad_norm": 0.6580469608306885, + "learning_rate": 0.00010277813651808226, + "loss": 2.5574, + "step": 9874 + }, + { + "epoch": 0.7969493987571624, + "grad_norm": 0.6960238218307495, + "learning_rate": 0.00010276235567914522, + "loss": 2.5477, + "step": 9875 + }, + { + "epoch": 0.7970301024937455, + "grad_norm": 0.704140841960907, + "learning_rate": 0.00010274657477136304, + "loss": 2.5099, + "step": 9876 + }, + { + "epoch": 0.7971108062303285, + "grad_norm": 0.7238990068435669, + "learning_rate": 0.00010273079379512906, + "loss": 2.6182, + "step": 9877 + }, + { + "epoch": 0.7971915099669115, + "grad_norm": 0.6527700424194336, + "learning_rate": 0.00010271501275083657, + "loss": 2.5148, + "step": 9878 + }, + { + "epoch": 0.7972722137034944, + "grad_norm": 0.6665365695953369, + "learning_rate": 0.00010269923163887884, + "loss": 2.5624, + "step": 9879 + }, + { + "epoch": 0.7973529174400775, + "grad_norm": 0.7304019927978516, + "learning_rate": 0.0001026834504596492, + "loss": 2.5537, + "step": 9880 + }, + { + "epoch": 0.7974336211766605, + "grad_norm": 0.6645877957344055, + "learning_rate": 0.00010266766921354099, + "loss": 2.5381, + "step": 9881 + }, + { + "epoch": 0.7975143249132435, + "grad_norm": 0.6817314624786377, + "learning_rate": 0.00010265188790094744, + "loss": 2.5399, + "step": 9882 + }, + { + "epoch": 0.7975950286498265, + "grad_norm": 0.7477232217788696, + "learning_rate": 0.00010263610652226194, + "loss": 2.6461, + "step": 9883 + }, + { + "epoch": 0.7976757323864095, + "grad_norm": 0.7087170481681824, + "learning_rate": 0.00010262032507787777, + "loss": 2.5469, + "step": 9884 + }, + { + "epoch": 0.7977564361229925, + "grad_norm": 0.7093435525894165, + "learning_rate": 0.00010260454356818825, + "loss": 2.5606, + "step": 9885 + }, + { + "epoch": 0.7978371398595755, + "grad_norm": 0.6662636399269104, + "learning_rate": 0.00010258876199358672, + "loss": 2.5415, + "step": 9886 + }, + { + "epoch": 0.7979178435961585, + "grad_norm": 0.6829736232757568, + "learning_rate": 0.00010257298035446644, + "loss": 2.5618, + "step": 9887 + }, + { + "epoch": 0.7979985473327416, + "grad_norm": 0.6872264742851257, + "learning_rate": 0.00010255719865122077, + "loss": 2.5629, + "step": 9888 + }, + { + "epoch": 0.7980792510693245, + "grad_norm": 0.6988633871078491, + "learning_rate": 0.00010254141688424303, + "loss": 2.5191, + "step": 9889 + }, + { + "epoch": 0.7981599548059075, + "grad_norm": 0.6787285804748535, + "learning_rate": 0.00010252563505392654, + "loss": 2.5003, + "step": 9890 + }, + { + "epoch": 0.7982406585424905, + "grad_norm": 0.6703466773033142, + "learning_rate": 0.00010250985316066461, + "loss": 2.5442, + "step": 9891 + }, + { + "epoch": 0.7983213622790736, + "grad_norm": 0.6463642120361328, + "learning_rate": 0.0001024940712048506, + "loss": 2.5236, + "step": 9892 + }, + { + "epoch": 0.7984020660156566, + "grad_norm": 0.6835207939147949, + "learning_rate": 0.0001024782891868778, + "loss": 2.5094, + "step": 9893 + }, + { + "epoch": 0.7984827697522395, + "grad_norm": 0.6621001958847046, + "learning_rate": 0.00010246250710713956, + "loss": 2.5456, + "step": 9894 + }, + { + "epoch": 0.7985634734888225, + "grad_norm": 0.6675469875335693, + "learning_rate": 0.0001024467249660292, + "loss": 2.5312, + "step": 9895 + }, + { + "epoch": 0.7986441772254055, + "grad_norm": 0.7357796430587769, + "learning_rate": 0.00010243094276394007, + "loss": 2.5374, + "step": 9896 + }, + { + "epoch": 0.7987248809619886, + "grad_norm": 0.7005879878997803, + "learning_rate": 0.00010241516050126549, + "loss": 2.5667, + "step": 9897 + }, + { + "epoch": 0.7988055846985715, + "grad_norm": 0.669870913028717, + "learning_rate": 0.0001023993781783988, + "loss": 2.533, + "step": 9898 + }, + { + "epoch": 0.7988862884351545, + "grad_norm": 0.7584091424942017, + "learning_rate": 0.00010238359579573333, + "loss": 2.5995, + "step": 9899 + }, + { + "epoch": 0.7989669921717375, + "grad_norm": 0.6931570172309875, + "learning_rate": 0.00010236781335366239, + "loss": 2.5506, + "step": 9900 + }, + { + "epoch": 0.7990476959083206, + "grad_norm": 0.6810948848724365, + "learning_rate": 0.0001023520308525794, + "loss": 2.5048, + "step": 9901 + }, + { + "epoch": 0.7991283996449036, + "grad_norm": 0.6857194900512695, + "learning_rate": 0.00010233624829287765, + "loss": 2.5559, + "step": 9902 + }, + { + "epoch": 0.7992091033814865, + "grad_norm": 0.6685707569122314, + "learning_rate": 0.00010232046567495046, + "loss": 2.5661, + "step": 9903 + }, + { + "epoch": 0.7992898071180695, + "grad_norm": 0.6626694202423096, + "learning_rate": 0.00010230468299919121, + "loss": 2.6293, + "step": 9904 + }, + { + "epoch": 0.7993705108546526, + "grad_norm": 0.6407302021980286, + "learning_rate": 0.00010228890026599323, + "loss": 2.5552, + "step": 9905 + }, + { + "epoch": 0.7994512145912356, + "grad_norm": 0.762235701084137, + "learning_rate": 0.00010227311747574986, + "loss": 2.4904, + "step": 9906 + }, + { + "epoch": 0.7995319183278186, + "grad_norm": 0.703507661819458, + "learning_rate": 0.0001022573346288545, + "loss": 2.5684, + "step": 9907 + }, + { + "epoch": 0.7996126220644015, + "grad_norm": 0.82541823387146, + "learning_rate": 0.00010224155172570043, + "loss": 2.521, + "step": 9908 + }, + { + "epoch": 0.7996933258009846, + "grad_norm": 0.6836804747581482, + "learning_rate": 0.00010222576876668104, + "loss": 2.5364, + "step": 9909 + }, + { + "epoch": 0.7997740295375676, + "grad_norm": 0.7388977408409119, + "learning_rate": 0.00010220998575218966, + "loss": 2.5724, + "step": 9910 + }, + { + "epoch": 0.7998547332741506, + "grad_norm": 0.7380896806716919, + "learning_rate": 0.00010219420268261966, + "loss": 2.5918, + "step": 9911 + }, + { + "epoch": 0.7999354370107336, + "grad_norm": 0.7303522825241089, + "learning_rate": 0.00010217841955836442, + "loss": 2.5432, + "step": 9912 + }, + { + "epoch": 0.8000161407473166, + "grad_norm": 0.6859301924705505, + "learning_rate": 0.00010216263637981727, + "loss": 2.5734, + "step": 9913 + }, + { + "epoch": 0.8000968444838996, + "grad_norm": 0.731910228729248, + "learning_rate": 0.00010214685314737154, + "loss": 2.5227, + "step": 9914 + }, + { + "epoch": 0.8001775482204826, + "grad_norm": 0.7105006575584412, + "learning_rate": 0.00010213106986142062, + "loss": 2.5335, + "step": 9915 + }, + { + "epoch": 0.8002582519570656, + "grad_norm": 0.7337056994438171, + "learning_rate": 0.00010211528652235786, + "loss": 2.6204, + "step": 9916 + }, + { + "epoch": 0.8003389556936487, + "grad_norm": 0.7350614666938782, + "learning_rate": 0.00010209950313057668, + "loss": 2.5264, + "step": 9917 + }, + { + "epoch": 0.8004196594302316, + "grad_norm": 0.6411921977996826, + "learning_rate": 0.00010208371968647036, + "loss": 2.4642, + "step": 9918 + }, + { + "epoch": 0.8005003631668146, + "grad_norm": 0.7601611018180847, + "learning_rate": 0.00010206793619043229, + "loss": 2.6249, + "step": 9919 + }, + { + "epoch": 0.8005810669033976, + "grad_norm": 0.7086012363433838, + "learning_rate": 0.00010205215264285585, + "loss": 2.5508, + "step": 9920 + }, + { + "epoch": 0.8006617706399807, + "grad_norm": 0.7267128825187683, + "learning_rate": 0.00010203636904413443, + "loss": 2.5109, + "step": 9921 + }, + { + "epoch": 0.8007424743765637, + "grad_norm": 0.7606067657470703, + "learning_rate": 0.00010202058539466132, + "loss": 2.5172, + "step": 9922 + }, + { + "epoch": 0.8008231781131466, + "grad_norm": 0.7610498666763306, + "learning_rate": 0.00010200480169483, + "loss": 2.5085, + "step": 9923 + }, + { + "epoch": 0.8009038818497296, + "grad_norm": 0.7604225873947144, + "learning_rate": 0.00010198901794503373, + "loss": 2.5615, + "step": 9924 + }, + { + "epoch": 0.8009845855863127, + "grad_norm": 0.739532470703125, + "learning_rate": 0.00010197323414566596, + "loss": 2.5574, + "step": 9925 + }, + { + "epoch": 0.8010652893228957, + "grad_norm": 0.6913303136825562, + "learning_rate": 0.00010195745029712003, + "loss": 2.5403, + "step": 9926 + }, + { + "epoch": 0.8011459930594786, + "grad_norm": 0.6963592767715454, + "learning_rate": 0.0001019416663997893, + "loss": 2.5615, + "step": 9927 + }, + { + "epoch": 0.8012266967960616, + "grad_norm": 0.681481122970581, + "learning_rate": 0.0001019258824540672, + "loss": 2.5125, + "step": 9928 + }, + { + "epoch": 0.8013074005326447, + "grad_norm": 0.7192744016647339, + "learning_rate": 0.00010191009846034709, + "loss": 2.5952, + "step": 9929 + }, + { + "epoch": 0.8013881042692277, + "grad_norm": 0.7030046582221985, + "learning_rate": 0.00010189431441902228, + "loss": 2.5445, + "step": 9930 + }, + { + "epoch": 0.8014688080058107, + "grad_norm": 0.6180598139762878, + "learning_rate": 0.00010187853033048622, + "loss": 2.4902, + "step": 9931 + }, + { + "epoch": 0.8015495117423936, + "grad_norm": 0.7479971051216125, + "learning_rate": 0.0001018627461951323, + "loss": 2.5703, + "step": 9932 + }, + { + "epoch": 0.8016302154789767, + "grad_norm": 0.7339857220649719, + "learning_rate": 0.00010184696201335387, + "loss": 2.5744, + "step": 9933 + }, + { + "epoch": 0.8017109192155597, + "grad_norm": 0.6741397380828857, + "learning_rate": 0.00010183117778554432, + "loss": 2.5777, + "step": 9934 + }, + { + "epoch": 0.8017916229521427, + "grad_norm": 0.6731706857681274, + "learning_rate": 0.00010181539351209699, + "loss": 2.5438, + "step": 9935 + }, + { + "epoch": 0.8018723266887257, + "grad_norm": 0.6929418444633484, + "learning_rate": 0.00010179960919340535, + "loss": 2.5308, + "step": 9936 + }, + { + "epoch": 0.8019530304253087, + "grad_norm": 0.7383175492286682, + "learning_rate": 0.00010178382482986271, + "loss": 2.5623, + "step": 9937 + }, + { + "epoch": 0.8020337341618917, + "grad_norm": 0.6872193217277527, + "learning_rate": 0.00010176804042186252, + "loss": 2.5271, + "step": 9938 + }, + { + "epoch": 0.8021144378984747, + "grad_norm": 0.7354295253753662, + "learning_rate": 0.00010175225596979816, + "loss": 2.5122, + "step": 9939 + }, + { + "epoch": 0.8021951416350577, + "grad_norm": 0.7589237689971924, + "learning_rate": 0.00010173647147406297, + "loss": 2.5529, + "step": 9940 + }, + { + "epoch": 0.8022758453716408, + "grad_norm": 0.6998353004455566, + "learning_rate": 0.00010172068693505037, + "loss": 2.4683, + "step": 9941 + }, + { + "epoch": 0.8023565491082237, + "grad_norm": 0.6816055178642273, + "learning_rate": 0.00010170490235315377, + "loss": 2.567, + "step": 9942 + }, + { + "epoch": 0.8024372528448067, + "grad_norm": 0.7188318371772766, + "learning_rate": 0.00010168911772876652, + "loss": 2.5631, + "step": 9943 + }, + { + "epoch": 0.8025179565813897, + "grad_norm": 0.6925922632217407, + "learning_rate": 0.00010167333306228209, + "loss": 2.4872, + "step": 9944 + }, + { + "epoch": 0.8025986603179727, + "grad_norm": 0.7081493735313416, + "learning_rate": 0.00010165754835409377, + "loss": 2.5482, + "step": 9945 + }, + { + "epoch": 0.8026793640545558, + "grad_norm": 0.6838935613632202, + "learning_rate": 0.00010164176360459505, + "loss": 2.541, + "step": 9946 + }, + { + "epoch": 0.8027600677911387, + "grad_norm": 0.6959214210510254, + "learning_rate": 0.00010162597881417928, + "loss": 2.4574, + "step": 9947 + }, + { + "epoch": 0.8028407715277217, + "grad_norm": 0.693004310131073, + "learning_rate": 0.00010161019398323986, + "loss": 2.5553, + "step": 9948 + }, + { + "epoch": 0.8029214752643047, + "grad_norm": 0.6683690547943115, + "learning_rate": 0.00010159440911217022, + "loss": 2.5501, + "step": 9949 + }, + { + "epoch": 0.8030021790008878, + "grad_norm": 0.6797001361846924, + "learning_rate": 0.0001015786242013637, + "loss": 2.5731, + "step": 9950 + }, + { + "epoch": 0.8030828827374707, + "grad_norm": 0.6621012091636658, + "learning_rate": 0.00010156283925121375, + "loss": 2.5278, + "step": 9951 + }, + { + "epoch": 0.8031635864740537, + "grad_norm": 0.7024650573730469, + "learning_rate": 0.00010154705426211377, + "loss": 2.5939, + "step": 9952 + }, + { + "epoch": 0.8032442902106367, + "grad_norm": 0.6756548285484314, + "learning_rate": 0.00010153126923445714, + "loss": 2.5797, + "step": 9953 + }, + { + "epoch": 0.8033249939472198, + "grad_norm": 0.6560662984848022, + "learning_rate": 0.00010151548416863732, + "loss": 2.5358, + "step": 9954 + }, + { + "epoch": 0.8034056976838028, + "grad_norm": 0.7172456979751587, + "learning_rate": 0.00010149969906504766, + "loss": 2.5054, + "step": 9955 + }, + { + "epoch": 0.8034864014203857, + "grad_norm": 0.6379461288452148, + "learning_rate": 0.00010148391392408152, + "loss": 2.5341, + "step": 9956 + }, + { + "epoch": 0.8035671051569687, + "grad_norm": 0.6553892493247986, + "learning_rate": 0.00010146812874613243, + "loss": 2.5618, + "step": 9957 + }, + { + "epoch": 0.8036478088935518, + "grad_norm": 0.6940072178840637, + "learning_rate": 0.00010145234353159372, + "loss": 2.5686, + "step": 9958 + }, + { + "epoch": 0.8037285126301348, + "grad_norm": 0.6641896963119507, + "learning_rate": 0.00010143655828085878, + "loss": 2.5188, + "step": 9959 + }, + { + "epoch": 0.8038092163667178, + "grad_norm": 0.6622887253761292, + "learning_rate": 0.00010142077299432111, + "loss": 2.54, + "step": 9960 + }, + { + "epoch": 0.8038899201033007, + "grad_norm": 0.7216808795928955, + "learning_rate": 0.000101404987672374, + "loss": 2.5775, + "step": 9961 + }, + { + "epoch": 0.8039706238398838, + "grad_norm": 0.6544952988624573, + "learning_rate": 0.00010138920231541095, + "loss": 2.6066, + "step": 9962 + }, + { + "epoch": 0.8040513275764668, + "grad_norm": 0.6869354248046875, + "learning_rate": 0.00010137341692382539, + "loss": 2.5157, + "step": 9963 + }, + { + "epoch": 0.8041320313130498, + "grad_norm": 0.6731898784637451, + "learning_rate": 0.00010135763149801063, + "loss": 2.4369, + "step": 9964 + }, + { + "epoch": 0.8042127350496328, + "grad_norm": 0.6943373084068298, + "learning_rate": 0.00010134184603836017, + "loss": 2.5529, + "step": 9965 + }, + { + "epoch": 0.8042934387862158, + "grad_norm": 0.729928195476532, + "learning_rate": 0.00010132606054526739, + "loss": 2.5814, + "step": 9966 + }, + { + "epoch": 0.8043741425227988, + "grad_norm": 0.6491130590438843, + "learning_rate": 0.00010131027501912571, + "loss": 2.5246, + "step": 9967 + }, + { + "epoch": 0.8044548462593818, + "grad_norm": 0.747756838798523, + "learning_rate": 0.00010129448946032857, + "loss": 2.513, + "step": 9968 + }, + { + "epoch": 0.8045355499959648, + "grad_norm": 0.6449645757675171, + "learning_rate": 0.00010127870386926935, + "loss": 2.5232, + "step": 9969 + }, + { + "epoch": 0.8046162537325479, + "grad_norm": 0.6425037980079651, + "learning_rate": 0.0001012629182463415, + "loss": 2.5065, + "step": 9970 + }, + { + "epoch": 0.8046969574691308, + "grad_norm": 0.7340624332427979, + "learning_rate": 0.00010124713259193843, + "loss": 2.5325, + "step": 9971 + }, + { + "epoch": 0.8047776612057138, + "grad_norm": 0.7308940291404724, + "learning_rate": 0.00010123134690645352, + "loss": 2.5717, + "step": 9972 + }, + { + "epoch": 0.8048583649422968, + "grad_norm": 0.7128338813781738, + "learning_rate": 0.00010121556119028028, + "loss": 2.5548, + "step": 9973 + }, + { + "epoch": 0.8049390686788799, + "grad_norm": 0.7027677893638611, + "learning_rate": 0.00010119977544381207, + "loss": 2.5311, + "step": 9974 + }, + { + "epoch": 0.8050197724154629, + "grad_norm": 0.7022054195404053, + "learning_rate": 0.00010118398966744229, + "loss": 2.5177, + "step": 9975 + }, + { + "epoch": 0.8051004761520458, + "grad_norm": 0.7382696270942688, + "learning_rate": 0.00010116820386156441, + "loss": 2.532, + "step": 9976 + }, + { + "epoch": 0.8051811798886288, + "grad_norm": 0.6968613862991333, + "learning_rate": 0.00010115241802657181, + "loss": 2.536, + "step": 9977 + }, + { + "epoch": 0.8052618836252119, + "grad_norm": 0.8277899026870728, + "learning_rate": 0.00010113663216285798, + "loss": 2.5963, + "step": 9978 + }, + { + "epoch": 0.8053425873617949, + "grad_norm": 0.677707314491272, + "learning_rate": 0.00010112084627081629, + "loss": 2.5041, + "step": 9979 + }, + { + "epoch": 0.8054232910983778, + "grad_norm": 0.6943314075469971, + "learning_rate": 0.00010110506035084017, + "loss": 2.4776, + "step": 9980 + }, + { + "epoch": 0.8055039948349608, + "grad_norm": 0.6948177218437195, + "learning_rate": 0.00010108927440332306, + "loss": 2.5306, + "step": 9981 + }, + { + "epoch": 0.8055846985715439, + "grad_norm": 0.6873918771743774, + "learning_rate": 0.0001010734884286584, + "loss": 2.5783, + "step": 9982 + }, + { + "epoch": 0.8056654023081269, + "grad_norm": 0.6370649933815002, + "learning_rate": 0.00010105770242723958, + "loss": 2.5584, + "step": 9983 + }, + { + "epoch": 0.8057461060447099, + "grad_norm": 0.7594422698020935, + "learning_rate": 0.00010104191639946008, + "loss": 2.543, + "step": 9984 + }, + { + "epoch": 0.8058268097812928, + "grad_norm": 0.697380542755127, + "learning_rate": 0.00010102613034571327, + "loss": 2.5295, + "step": 9985 + }, + { + "epoch": 0.8059075135178759, + "grad_norm": 0.6597251892089844, + "learning_rate": 0.00010101034426639264, + "loss": 2.5917, + "step": 9986 + }, + { + "epoch": 0.8059882172544589, + "grad_norm": 0.6583479046821594, + "learning_rate": 0.00010099455816189156, + "loss": 2.6206, + "step": 9987 + }, + { + "epoch": 0.8060689209910419, + "grad_norm": 0.6603943705558777, + "learning_rate": 0.00010097877203260349, + "loss": 2.5223, + "step": 9988 + }, + { + "epoch": 0.8061496247276249, + "grad_norm": 0.716454267501831, + "learning_rate": 0.00010096298587892188, + "loss": 2.5572, + "step": 9989 + }, + { + "epoch": 0.806230328464208, + "grad_norm": 0.6511488556861877, + "learning_rate": 0.00010094719970124016, + "loss": 2.5815, + "step": 9990 + }, + { + "epoch": 0.8063110322007909, + "grad_norm": 0.6969261169433594, + "learning_rate": 0.00010093141349995173, + "loss": 2.5902, + "step": 9991 + }, + { + "epoch": 0.8063917359373739, + "grad_norm": 0.7012695074081421, + "learning_rate": 0.00010091562727545001, + "loss": 2.5134, + "step": 9992 + }, + { + "epoch": 0.8064724396739569, + "grad_norm": 0.6368406414985657, + "learning_rate": 0.00010089984102812848, + "loss": 2.568, + "step": 9993 + }, + { + "epoch": 0.80655314341054, + "grad_norm": 0.6552153825759888, + "learning_rate": 0.00010088405475838059, + "loss": 2.5101, + "step": 9994 + }, + { + "epoch": 0.8066338471471229, + "grad_norm": 0.6949633359909058, + "learning_rate": 0.00010086826846659974, + "loss": 2.5427, + "step": 9995 + }, + { + "epoch": 0.8067145508837059, + "grad_norm": 0.6593093872070312, + "learning_rate": 0.00010085248215317935, + "loss": 2.5551, + "step": 9996 + }, + { + "epoch": 0.8067952546202889, + "grad_norm": 0.6963745355606079, + "learning_rate": 0.00010083669581851287, + "loss": 2.4956, + "step": 9997 + }, + { + "epoch": 0.8068759583568719, + "grad_norm": 0.7093523144721985, + "learning_rate": 0.00010082090946299377, + "loss": 2.5876, + "step": 9998 + }, + { + "epoch": 0.806956662093455, + "grad_norm": 0.6796671152114868, + "learning_rate": 0.00010080512308701544, + "loss": 2.5302, + "step": 9999 + }, + { + "epoch": 0.8070373658300379, + "grad_norm": 0.7170542478561401, + "learning_rate": 0.00010078933669097135, + "loss": 2.5886, + "step": 10000 + }, + { + "epoch": 0.8070373658300379, + "eval_loss": 2.4734926223754883, + "eval_runtime": 788.2594, + "eval_samples_per_second": 3.324, + "eval_steps_per_second": 0.554, + "step": 10000 + }, + { + "epoch": 0.8071180695666209, + "grad_norm": 0.6566126346588135, + "learning_rate": 0.0001007735502752549, + "loss": 2.4441, + "step": 10001 + }, + { + "epoch": 0.8071987733032039, + "grad_norm": 0.6739515662193298, + "learning_rate": 0.00010075776384025957, + "loss": 2.5767, + "step": 10002 + }, + { + "epoch": 0.807279477039787, + "grad_norm": 0.6334208846092224, + "learning_rate": 0.00010074197738637881, + "loss": 2.5321, + "step": 10003 + }, + { + "epoch": 0.80736018077637, + "grad_norm": 0.6764520406723022, + "learning_rate": 0.000100726190914006, + "loss": 2.5144, + "step": 10004 + }, + { + "epoch": 0.8074408845129529, + "grad_norm": 0.7090082764625549, + "learning_rate": 0.00010071040442353464, + "loss": 2.5626, + "step": 10005 + }, + { + "epoch": 0.8075215882495359, + "grad_norm": 0.6915304064750671, + "learning_rate": 0.00010069461791535814, + "loss": 2.5261, + "step": 10006 + }, + { + "epoch": 0.807602291986119, + "grad_norm": 0.6685747504234314, + "learning_rate": 0.00010067883138986991, + "loss": 2.492, + "step": 10007 + }, + { + "epoch": 0.807682995722702, + "grad_norm": 0.7179074883460999, + "learning_rate": 0.00010066304484746347, + "loss": 2.4601, + "step": 10008 + }, + { + "epoch": 0.807763699459285, + "grad_norm": 0.7032761573791504, + "learning_rate": 0.00010064725828853219, + "loss": 2.578, + "step": 10009 + }, + { + "epoch": 0.8078444031958679, + "grad_norm": 0.710322916507721, + "learning_rate": 0.00010063147171346959, + "loss": 2.5514, + "step": 10010 + }, + { + "epoch": 0.807925106932451, + "grad_norm": 0.6552841067314148, + "learning_rate": 0.00010061568512266903, + "loss": 2.5474, + "step": 10011 + }, + { + "epoch": 0.808005810669034, + "grad_norm": 0.6862452626228333, + "learning_rate": 0.00010059989851652398, + "loss": 2.5772, + "step": 10012 + }, + { + "epoch": 0.808086514405617, + "grad_norm": 0.7123851180076599, + "learning_rate": 0.00010058411189542788, + "loss": 2.4936, + "step": 10013 + }, + { + "epoch": 0.8081672181421999, + "grad_norm": 0.6889944672584534, + "learning_rate": 0.00010056832525977422, + "loss": 2.5041, + "step": 10014 + }, + { + "epoch": 0.808247921878783, + "grad_norm": 0.6986924409866333, + "learning_rate": 0.0001005525386099564, + "loss": 2.5591, + "step": 10015 + }, + { + "epoch": 0.808328625615366, + "grad_norm": 0.6935306787490845, + "learning_rate": 0.00010053675194636787, + "loss": 2.5423, + "step": 10016 + }, + { + "epoch": 0.808409329351949, + "grad_norm": 0.6751969456672668, + "learning_rate": 0.00010052096526940207, + "loss": 2.5666, + "step": 10017 + }, + { + "epoch": 0.808490033088532, + "grad_norm": 0.676909327507019, + "learning_rate": 0.00010050517857945243, + "loss": 2.5394, + "step": 10018 + }, + { + "epoch": 0.808570736825115, + "grad_norm": 0.7439377307891846, + "learning_rate": 0.00010048939187691246, + "loss": 2.5011, + "step": 10019 + }, + { + "epoch": 0.808651440561698, + "grad_norm": 0.6594791412353516, + "learning_rate": 0.00010047360516217554, + "loss": 2.5159, + "step": 10020 + }, + { + "epoch": 0.808732144298281, + "grad_norm": 0.7013304233551025, + "learning_rate": 0.00010045781843563517, + "loss": 2.5439, + "step": 10021 + }, + { + "epoch": 0.808812848034864, + "grad_norm": 0.7537491917610168, + "learning_rate": 0.00010044203169768476, + "loss": 2.5837, + "step": 10022 + }, + { + "epoch": 0.8088935517714471, + "grad_norm": 0.7273866534233093, + "learning_rate": 0.00010042624494871773, + "loss": 2.5546, + "step": 10023 + }, + { + "epoch": 0.80897425550803, + "grad_norm": 0.6716369986534119, + "learning_rate": 0.0001004104581891276, + "loss": 2.5264, + "step": 10024 + }, + { + "epoch": 0.809054959244613, + "grad_norm": 0.7544769644737244, + "learning_rate": 0.00010039467141930777, + "loss": 2.5502, + "step": 10025 + }, + { + "epoch": 0.809135662981196, + "grad_norm": 0.8713179230690002, + "learning_rate": 0.0001003788846396517, + "loss": 2.5178, + "step": 10026 + }, + { + "epoch": 0.8092163667177791, + "grad_norm": 0.6704887747764587, + "learning_rate": 0.00010036309785055283, + "loss": 2.5136, + "step": 10027 + }, + { + "epoch": 0.809297070454362, + "grad_norm": 0.7308552861213684, + "learning_rate": 0.00010034731105240458, + "loss": 2.4781, + "step": 10028 + }, + { + "epoch": 0.809377774190945, + "grad_norm": 0.7214144468307495, + "learning_rate": 0.00010033152424560049, + "loss": 2.5946, + "step": 10029 + }, + { + "epoch": 0.809458477927528, + "grad_norm": 0.6946821808815002, + "learning_rate": 0.00010031573743053393, + "loss": 2.4937, + "step": 10030 + }, + { + "epoch": 0.8095391816641111, + "grad_norm": 0.7348416447639465, + "learning_rate": 0.00010029995060759833, + "loss": 2.5959, + "step": 10031 + }, + { + "epoch": 0.8096198854006941, + "grad_norm": 0.7482579350471497, + "learning_rate": 0.00010028416377718721, + "loss": 2.6, + "step": 10032 + }, + { + "epoch": 0.809700589137277, + "grad_norm": 0.7114939093589783, + "learning_rate": 0.00010026837693969397, + "loss": 2.5376, + "step": 10033 + }, + { + "epoch": 0.80978129287386, + "grad_norm": 0.6559228897094727, + "learning_rate": 0.00010025259009551209, + "loss": 2.4961, + "step": 10034 + }, + { + "epoch": 0.8098619966104431, + "grad_norm": 0.7494906187057495, + "learning_rate": 0.00010023680324503501, + "loss": 2.5723, + "step": 10035 + }, + { + "epoch": 0.8099427003470261, + "grad_norm": 0.7207093834877014, + "learning_rate": 0.00010022101638865618, + "loss": 2.5523, + "step": 10036 + }, + { + "epoch": 0.8100234040836091, + "grad_norm": 0.6730504035949707, + "learning_rate": 0.00010020522952676903, + "loss": 2.5135, + "step": 10037 + }, + { + "epoch": 0.810104107820192, + "grad_norm": 0.6805168390274048, + "learning_rate": 0.000100189442659767, + "loss": 2.5598, + "step": 10038 + }, + { + "epoch": 0.8101848115567751, + "grad_norm": 0.6639137268066406, + "learning_rate": 0.00010017365578804358, + "loss": 2.5152, + "step": 10039 + }, + { + "epoch": 0.8102655152933581, + "grad_norm": 0.6604194641113281, + "learning_rate": 0.00010015786891199221, + "loss": 2.5302, + "step": 10040 + }, + { + "epoch": 0.8103462190299411, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.00010014208203200634, + "loss": 2.5437, + "step": 10041 + }, + { + "epoch": 0.8104269227665241, + "grad_norm": 0.7404079437255859, + "learning_rate": 0.00010012629514847942, + "loss": 2.6559, + "step": 10042 + }, + { + "epoch": 0.8105076265031071, + "grad_norm": 0.694006085395813, + "learning_rate": 0.00010011050826180488, + "loss": 2.5571, + "step": 10043 + }, + { + "epoch": 0.8105883302396901, + "grad_norm": 0.7007058262825012, + "learning_rate": 0.00010009472137237616, + "loss": 2.5639, + "step": 10044 + }, + { + "epoch": 0.8106690339762731, + "grad_norm": 0.7331913113594055, + "learning_rate": 0.00010007893448058678, + "loss": 2.5499, + "step": 10045 + }, + { + "epoch": 0.8107497377128561, + "grad_norm": 0.7636487483978271, + "learning_rate": 0.00010006314758683015, + "loss": 2.6068, + "step": 10046 + }, + { + "epoch": 0.810830441449439, + "grad_norm": 0.6505223512649536, + "learning_rate": 0.0001000473606914997, + "loss": 2.5313, + "step": 10047 + }, + { + "epoch": 0.8109111451860221, + "grad_norm": 0.6425966620445251, + "learning_rate": 0.00010003157379498886, + "loss": 2.5998, + "step": 10048 + }, + { + "epoch": 0.8109918489226051, + "grad_norm": 0.7163281440734863, + "learning_rate": 0.00010001578689769116, + "loss": 2.5493, + "step": 10049 + }, + { + "epoch": 0.8110725526591881, + "grad_norm": 0.7345306873321533, + "learning_rate": 0.0001, + "loss": 2.5609, + "step": 10050 + }, + { + "epoch": 0.8111532563957711, + "grad_norm": 0.6808427572250366, + "learning_rate": 9.998421310230884e-05, + "loss": 2.4823, + "step": 10051 + }, + { + "epoch": 0.8112339601323542, + "grad_norm": 0.7456082105636597, + "learning_rate": 9.996842620501115e-05, + "loss": 2.4782, + "step": 10052 + }, + { + "epoch": 0.8113146638689371, + "grad_norm": 0.7061728239059448, + "learning_rate": 9.995263930850034e-05, + "loss": 2.4906, + "step": 10053 + }, + { + "epoch": 0.8113953676055201, + "grad_norm": 0.691663920879364, + "learning_rate": 9.993685241316986e-05, + "loss": 2.5842, + "step": 10054 + }, + { + "epoch": 0.8114760713421031, + "grad_norm": 0.6899400353431702, + "learning_rate": 9.992106551941325e-05, + "loss": 2.5628, + "step": 10055 + }, + { + "epoch": 0.8115567750786862, + "grad_norm": 0.6909289360046387, + "learning_rate": 9.990527862762385e-05, + "loss": 2.5173, + "step": 10056 + }, + { + "epoch": 0.8116374788152692, + "grad_norm": 0.6507968306541443, + "learning_rate": 9.988949173819514e-05, + "loss": 2.5763, + "step": 10057 + }, + { + "epoch": 0.8117181825518521, + "grad_norm": 0.6972371339797974, + "learning_rate": 9.98737048515206e-05, + "loss": 2.604, + "step": 10058 + }, + { + "epoch": 0.8117988862884351, + "grad_norm": 0.6500107049942017, + "learning_rate": 9.985791796799368e-05, + "loss": 2.509, + "step": 10059 + }, + { + "epoch": 0.8118795900250182, + "grad_norm": 0.704501211643219, + "learning_rate": 9.98421310880078e-05, + "loss": 2.5773, + "step": 10060 + }, + { + "epoch": 0.8119602937616012, + "grad_norm": 0.7037203311920166, + "learning_rate": 9.982634421195641e-05, + "loss": 2.5968, + "step": 10061 + }, + { + "epoch": 0.8120409974981841, + "grad_norm": 0.7161232829093933, + "learning_rate": 9.981055734023304e-05, + "loss": 2.5373, + "step": 10062 + }, + { + "epoch": 0.8121217012347671, + "grad_norm": 0.6602928638458252, + "learning_rate": 9.979477047323099e-05, + "loss": 2.5851, + "step": 10063 + }, + { + "epoch": 0.8122024049713502, + "grad_norm": 0.6685947775840759, + "learning_rate": 9.977898361134383e-05, + "loss": 2.5543, + "step": 10064 + }, + { + "epoch": 0.8122831087079332, + "grad_norm": 0.6772760152816772, + "learning_rate": 9.976319675496502e-05, + "loss": 2.5355, + "step": 10065 + }, + { + "epoch": 0.8123638124445162, + "grad_norm": 0.6140885949134827, + "learning_rate": 9.974740990448792e-05, + "loss": 2.489, + "step": 10066 + }, + { + "epoch": 0.8124445161810991, + "grad_norm": 0.6597142219543457, + "learning_rate": 9.973162306030604e-05, + "loss": 2.5619, + "step": 10067 + }, + { + "epoch": 0.8125252199176822, + "grad_norm": 0.6768592000007629, + "learning_rate": 9.971583622281281e-05, + "loss": 2.5107, + "step": 10068 + }, + { + "epoch": 0.8126059236542652, + "grad_norm": 0.682296633720398, + "learning_rate": 9.970004939240168e-05, + "loss": 2.5003, + "step": 10069 + }, + { + "epoch": 0.8126866273908482, + "grad_norm": 0.7356325387954712, + "learning_rate": 9.96842625694661e-05, + "loss": 2.5864, + "step": 10070 + }, + { + "epoch": 0.8127673311274312, + "grad_norm": 0.6818091869354248, + "learning_rate": 9.966847575439956e-05, + "loss": 2.5375, + "step": 10071 + }, + { + "epoch": 0.8128480348640142, + "grad_norm": 0.6954368352890015, + "learning_rate": 9.965268894759543e-05, + "loss": 2.5314, + "step": 10072 + }, + { + "epoch": 0.8129287386005972, + "grad_norm": 0.6759306192398071, + "learning_rate": 9.963690214944721e-05, + "loss": 2.5881, + "step": 10073 + }, + { + "epoch": 0.8130094423371802, + "grad_norm": 0.6546545624732971, + "learning_rate": 9.962111536034832e-05, + "loss": 2.5264, + "step": 10074 + }, + { + "epoch": 0.8130901460737632, + "grad_norm": 0.6709586977958679, + "learning_rate": 9.960532858069226e-05, + "loss": 2.5906, + "step": 10075 + }, + { + "epoch": 0.8131708498103463, + "grad_norm": 0.7310851812362671, + "learning_rate": 9.958954181087241e-05, + "loss": 2.5134, + "step": 10076 + }, + { + "epoch": 0.8132515535469292, + "grad_norm": 0.6793027520179749, + "learning_rate": 9.957375505128227e-05, + "loss": 2.5387, + "step": 10077 + }, + { + "epoch": 0.8133322572835122, + "grad_norm": 0.6965875029563904, + "learning_rate": 9.955796830231528e-05, + "loss": 2.5649, + "step": 10078 + }, + { + "epoch": 0.8134129610200952, + "grad_norm": 0.6597574353218079, + "learning_rate": 9.954218156436485e-05, + "loss": 2.5281, + "step": 10079 + }, + { + "epoch": 0.8134936647566783, + "grad_norm": 0.7911555171012878, + "learning_rate": 9.952639483782445e-05, + "loss": 2.535, + "step": 10080 + }, + { + "epoch": 0.8135743684932613, + "grad_norm": 0.7405688762664795, + "learning_rate": 9.951060812308757e-05, + "loss": 2.5303, + "step": 10081 + }, + { + "epoch": 0.8136550722298442, + "grad_norm": 0.6961480379104614, + "learning_rate": 9.949482142054758e-05, + "loss": 2.4959, + "step": 10082 + }, + { + "epoch": 0.8137357759664272, + "grad_norm": 0.6761718392372131, + "learning_rate": 9.947903473059797e-05, + "loss": 2.5591, + "step": 10083 + }, + { + "epoch": 0.8138164797030103, + "grad_norm": 0.7383104562759399, + "learning_rate": 9.946324805363218e-05, + "loss": 2.5848, + "step": 10084 + }, + { + "epoch": 0.8138971834395933, + "grad_norm": 0.6495873928070068, + "learning_rate": 9.944746139004364e-05, + "loss": 2.4972, + "step": 10085 + }, + { + "epoch": 0.8139778871761763, + "grad_norm": 0.7247152328491211, + "learning_rate": 9.94316747402258e-05, + "loss": 2.5361, + "step": 10086 + }, + { + "epoch": 0.8140585909127592, + "grad_norm": 0.6965751051902771, + "learning_rate": 9.941588810457215e-05, + "loss": 2.4997, + "step": 10087 + }, + { + "epoch": 0.8141392946493423, + "grad_norm": 0.7138223648071289, + "learning_rate": 9.940010148347603e-05, + "loss": 2.5226, + "step": 10088 + }, + { + "epoch": 0.8142199983859253, + "grad_norm": 0.6571210622787476, + "learning_rate": 9.938431487733099e-05, + "loss": 2.5388, + "step": 10089 + }, + { + "epoch": 0.8143007021225083, + "grad_norm": 0.6721277832984924, + "learning_rate": 9.936852828653042e-05, + "loss": 2.5219, + "step": 10090 + }, + { + "epoch": 0.8143814058590912, + "grad_norm": 0.647520124912262, + "learning_rate": 9.935274171146782e-05, + "loss": 2.6199, + "step": 10091 + }, + { + "epoch": 0.8144621095956743, + "grad_norm": 0.6892204284667969, + "learning_rate": 9.933695515253654e-05, + "loss": 2.5132, + "step": 10092 + }, + { + "epoch": 0.8145428133322573, + "grad_norm": 0.6979050636291504, + "learning_rate": 9.932116861013008e-05, + "loss": 2.5148, + "step": 10093 + }, + { + "epoch": 0.8146235170688403, + "grad_norm": 0.6682664752006531, + "learning_rate": 9.930538208464189e-05, + "loss": 2.5795, + "step": 10094 + }, + { + "epoch": 0.8147042208054233, + "grad_norm": 0.734121561050415, + "learning_rate": 9.928959557646537e-05, + "loss": 2.5469, + "step": 10095 + }, + { + "epoch": 0.8147849245420064, + "grad_norm": 0.6669620275497437, + "learning_rate": 9.9273809085994e-05, + "loss": 2.5277, + "step": 10096 + }, + { + "epoch": 0.8148656282785893, + "grad_norm": 0.6750600934028625, + "learning_rate": 9.925802261362124e-05, + "loss": 2.5869, + "step": 10097 + }, + { + "epoch": 0.8149463320151723, + "grad_norm": 0.6813061237335205, + "learning_rate": 9.924223615974044e-05, + "loss": 2.585, + "step": 10098 + }, + { + "epoch": 0.8150270357517553, + "grad_norm": 0.6775497794151306, + "learning_rate": 9.92264497247451e-05, + "loss": 2.5353, + "step": 10099 + }, + { + "epoch": 0.8151077394883383, + "grad_norm": 0.6877530813217163, + "learning_rate": 9.92106633090287e-05, + "loss": 2.5349, + "step": 10100 + }, + { + "epoch": 0.8151884432249213, + "grad_norm": 0.6984169483184814, + "learning_rate": 9.91948769129846e-05, + "loss": 2.5986, + "step": 10101 + }, + { + "epoch": 0.8152691469615043, + "grad_norm": 0.7144806981086731, + "learning_rate": 9.917909053700626e-05, + "loss": 2.5797, + "step": 10102 + }, + { + "epoch": 0.8153498506980873, + "grad_norm": 0.6494203209877014, + "learning_rate": 9.916330418148715e-05, + "loss": 2.5035, + "step": 10103 + }, + { + "epoch": 0.8154305544346703, + "grad_norm": 0.6669752597808838, + "learning_rate": 9.914751784682069e-05, + "loss": 2.5489, + "step": 10104 + }, + { + "epoch": 0.8155112581712534, + "grad_norm": 0.6557981371879578, + "learning_rate": 9.913173153340029e-05, + "loss": 2.5266, + "step": 10105 + }, + { + "epoch": 0.8155919619078363, + "grad_norm": 0.6633948087692261, + "learning_rate": 9.911594524161941e-05, + "loss": 2.5263, + "step": 10106 + }, + { + "epoch": 0.8156726656444193, + "grad_norm": 0.7191522717475891, + "learning_rate": 9.910015897187154e-05, + "loss": 2.5625, + "step": 10107 + }, + { + "epoch": 0.8157533693810023, + "grad_norm": 0.7089062929153442, + "learning_rate": 9.908437272455001e-05, + "loss": 2.5644, + "step": 10108 + }, + { + "epoch": 0.8158340731175854, + "grad_norm": 0.7662761211395264, + "learning_rate": 9.906858650004831e-05, + "loss": 2.5875, + "step": 10109 + }, + { + "epoch": 0.8159147768541684, + "grad_norm": 0.6658861041069031, + "learning_rate": 9.905280029875988e-05, + "loss": 2.5818, + "step": 10110 + }, + { + "epoch": 0.8159954805907513, + "grad_norm": 0.7229514718055725, + "learning_rate": 9.903701412107815e-05, + "loss": 2.5421, + "step": 10111 + }, + { + "epoch": 0.8160761843273343, + "grad_norm": 0.7295149564743042, + "learning_rate": 9.902122796739652e-05, + "loss": 2.5298, + "step": 10112 + }, + { + "epoch": 0.8161568880639174, + "grad_norm": 0.6805420517921448, + "learning_rate": 9.900544183810849e-05, + "loss": 2.6693, + "step": 10113 + }, + { + "epoch": 0.8162375918005004, + "grad_norm": 0.6560602188110352, + "learning_rate": 9.898965573360738e-05, + "loss": 2.5445, + "step": 10114 + }, + { + "epoch": 0.8163182955370833, + "grad_norm": 0.690396785736084, + "learning_rate": 9.897386965428674e-05, + "loss": 2.5281, + "step": 10115 + }, + { + "epoch": 0.8163989992736663, + "grad_norm": 0.6905054450035095, + "learning_rate": 9.895808360053998e-05, + "loss": 2.5406, + "step": 10116 + }, + { + "epoch": 0.8164797030102494, + "grad_norm": 0.6905301213264465, + "learning_rate": 9.894229757276045e-05, + "loss": 2.5458, + "step": 10117 + }, + { + "epoch": 0.8165604067468324, + "grad_norm": 0.6827620267868042, + "learning_rate": 9.892651157134162e-05, + "loss": 2.4403, + "step": 10118 + }, + { + "epoch": 0.8166411104834154, + "grad_norm": 0.7614343166351318, + "learning_rate": 9.891072559667697e-05, + "loss": 2.6369, + "step": 10119 + }, + { + "epoch": 0.8167218142199983, + "grad_norm": 0.6913704872131348, + "learning_rate": 9.889493964915985e-05, + "loss": 2.5914, + "step": 10120 + }, + { + "epoch": 0.8168025179565814, + "grad_norm": 0.7026088237762451, + "learning_rate": 9.887915372918372e-05, + "loss": 2.5139, + "step": 10121 + }, + { + "epoch": 0.8168832216931644, + "grad_norm": 0.7064465284347534, + "learning_rate": 9.886336783714203e-05, + "loss": 2.549, + "step": 10122 + }, + { + "epoch": 0.8169639254297474, + "grad_norm": 0.7345553040504456, + "learning_rate": 9.884758197342821e-05, + "loss": 2.5887, + "step": 10123 + }, + { + "epoch": 0.8170446291663304, + "grad_norm": 0.6916251182556152, + "learning_rate": 9.883179613843563e-05, + "loss": 2.5659, + "step": 10124 + }, + { + "epoch": 0.8171253329029134, + "grad_norm": 0.6428200602531433, + "learning_rate": 9.881601033255771e-05, + "loss": 2.5379, + "step": 10125 + }, + { + "epoch": 0.8172060366394964, + "grad_norm": 0.7433571815490723, + "learning_rate": 9.880022455618796e-05, + "loss": 2.5751, + "step": 10126 + }, + { + "epoch": 0.8172867403760794, + "grad_norm": 0.733256995677948, + "learning_rate": 9.878443880971974e-05, + "loss": 2.4971, + "step": 10127 + }, + { + "epoch": 0.8173674441126624, + "grad_norm": 0.708289384841919, + "learning_rate": 9.876865309354646e-05, + "loss": 2.635, + "step": 10128 + }, + { + "epoch": 0.8174481478492455, + "grad_norm": 0.6877188682556152, + "learning_rate": 9.87528674080616e-05, + "loss": 2.5827, + "step": 10129 + }, + { + "epoch": 0.8175288515858284, + "grad_norm": 0.7108712792396545, + "learning_rate": 9.873708175365852e-05, + "loss": 2.5643, + "step": 10130 + }, + { + "epoch": 0.8176095553224114, + "grad_norm": 0.7435629367828369, + "learning_rate": 9.872129613073065e-05, + "loss": 2.5267, + "step": 10131 + }, + { + "epoch": 0.8176902590589944, + "grad_norm": 0.669913113117218, + "learning_rate": 9.870551053967148e-05, + "loss": 2.5684, + "step": 10132 + }, + { + "epoch": 0.8177709627955775, + "grad_norm": 0.6981424689292908, + "learning_rate": 9.868972498087431e-05, + "loss": 2.592, + "step": 10133 + }, + { + "epoch": 0.8178516665321605, + "grad_norm": 0.6661834716796875, + "learning_rate": 9.867393945473263e-05, + "loss": 2.5082, + "step": 10134 + }, + { + "epoch": 0.8179323702687434, + "grad_norm": 0.6611261367797852, + "learning_rate": 9.865815396163987e-05, + "loss": 2.556, + "step": 10135 + }, + { + "epoch": 0.8180130740053264, + "grad_norm": 0.6732283234596252, + "learning_rate": 9.86423685019894e-05, + "loss": 2.5668, + "step": 10136 + }, + { + "epoch": 0.8180937777419095, + "grad_norm": 0.6768637299537659, + "learning_rate": 9.862658307617465e-05, + "loss": 2.5467, + "step": 10137 + }, + { + "epoch": 0.8181744814784925, + "grad_norm": 0.6943596601486206, + "learning_rate": 9.861079768458904e-05, + "loss": 2.5989, + "step": 10138 + }, + { + "epoch": 0.8182551852150755, + "grad_norm": 0.7369638681411743, + "learning_rate": 9.859501232762601e-05, + "loss": 2.5189, + "step": 10139 + }, + { + "epoch": 0.8183358889516584, + "grad_norm": 0.7443112730979919, + "learning_rate": 9.857922700567892e-05, + "loss": 2.5979, + "step": 10140 + }, + { + "epoch": 0.8184165926882415, + "grad_norm": 0.6726163029670715, + "learning_rate": 9.85634417191412e-05, + "loss": 2.5451, + "step": 10141 + }, + { + "epoch": 0.8184972964248245, + "grad_norm": 0.720492422580719, + "learning_rate": 9.854765646840632e-05, + "loss": 2.6116, + "step": 10142 + }, + { + "epoch": 0.8185780001614075, + "grad_norm": 0.6998233795166016, + "learning_rate": 9.85318712538676e-05, + "loss": 2.556, + "step": 10143 + }, + { + "epoch": 0.8186587038979904, + "grad_norm": 0.7580110430717468, + "learning_rate": 9.851608607591848e-05, + "loss": 2.5222, + "step": 10144 + }, + { + "epoch": 0.8187394076345735, + "grad_norm": 0.6893007755279541, + "learning_rate": 9.85003009349524e-05, + "loss": 2.4639, + "step": 10145 + }, + { + "epoch": 0.8188201113711565, + "grad_norm": 0.6448441743850708, + "learning_rate": 9.84845158313627e-05, + "loss": 2.5249, + "step": 10146 + }, + { + "epoch": 0.8189008151077395, + "grad_norm": 0.7591872215270996, + "learning_rate": 9.846873076554285e-05, + "loss": 2.5173, + "step": 10147 + }, + { + "epoch": 0.8189815188443225, + "grad_norm": 0.6994685530662537, + "learning_rate": 9.845294573788626e-05, + "loss": 2.5181, + "step": 10148 + }, + { + "epoch": 0.8190622225809054, + "grad_norm": 0.6822378635406494, + "learning_rate": 9.843716074878628e-05, + "loss": 2.5109, + "step": 10149 + }, + { + "epoch": 0.8191429263174885, + "grad_norm": 0.6730359792709351, + "learning_rate": 9.842137579863632e-05, + "loss": 2.5402, + "step": 10150 + }, + { + "epoch": 0.8192236300540715, + "grad_norm": 0.6280627846717834, + "learning_rate": 9.840559088782984e-05, + "loss": 2.4806, + "step": 10151 + }, + { + "epoch": 0.8193043337906545, + "grad_norm": 0.6887876391410828, + "learning_rate": 9.838980601676017e-05, + "loss": 2.5498, + "step": 10152 + }, + { + "epoch": 0.8193850375272375, + "grad_norm": 0.7823790907859802, + "learning_rate": 9.837402118582075e-05, + "loss": 2.467, + "step": 10153 + }, + { + "epoch": 0.8194657412638205, + "grad_norm": 0.8109384179115295, + "learning_rate": 9.835823639540496e-05, + "loss": 2.5898, + "step": 10154 + }, + { + "epoch": 0.8195464450004035, + "grad_norm": 0.6883066892623901, + "learning_rate": 9.834245164590624e-05, + "loss": 2.5589, + "step": 10155 + }, + { + "epoch": 0.8196271487369865, + "grad_norm": 0.7291175723075867, + "learning_rate": 9.832666693771794e-05, + "loss": 2.5317, + "step": 10156 + }, + { + "epoch": 0.8197078524735695, + "grad_norm": 0.6819449663162231, + "learning_rate": 9.831088227123346e-05, + "loss": 2.5513, + "step": 10157 + }, + { + "epoch": 0.8197885562101526, + "grad_norm": 0.7038870453834534, + "learning_rate": 9.829509764684626e-05, + "loss": 2.5301, + "step": 10158 + }, + { + "epoch": 0.8198692599467355, + "grad_norm": 0.7483033537864685, + "learning_rate": 9.827931306494965e-05, + "loss": 2.5273, + "step": 10159 + }, + { + "epoch": 0.8199499636833185, + "grad_norm": 0.6998303532600403, + "learning_rate": 9.826352852593705e-05, + "loss": 2.5083, + "step": 10160 + }, + { + "epoch": 0.8200306674199015, + "grad_norm": 0.6865512728691101, + "learning_rate": 9.824774403020188e-05, + "loss": 2.5693, + "step": 10161 + }, + { + "epoch": 0.8201113711564846, + "grad_norm": 0.8144257068634033, + "learning_rate": 9.823195957813749e-05, + "loss": 2.6052, + "step": 10162 + }, + { + "epoch": 0.8201920748930676, + "grad_norm": 0.6920810341835022, + "learning_rate": 9.821617517013729e-05, + "loss": 2.5467, + "step": 10163 + }, + { + "epoch": 0.8202727786296505, + "grad_norm": 0.7538061141967773, + "learning_rate": 9.820039080659469e-05, + "loss": 2.5933, + "step": 10164 + }, + { + "epoch": 0.8203534823662335, + "grad_norm": 0.6744310259819031, + "learning_rate": 9.818460648790302e-05, + "loss": 2.5633, + "step": 10165 + }, + { + "epoch": 0.8204341861028166, + "grad_norm": 0.6943854689598083, + "learning_rate": 9.816882221445571e-05, + "loss": 2.5868, + "step": 10166 + }, + { + "epoch": 0.8205148898393996, + "grad_norm": 0.6486902832984924, + "learning_rate": 9.815303798664614e-05, + "loss": 2.4983, + "step": 10167 + }, + { + "epoch": 0.8205955935759826, + "grad_norm": 0.6699065566062927, + "learning_rate": 9.813725380486773e-05, + "loss": 2.563, + "step": 10168 + }, + { + "epoch": 0.8206762973125655, + "grad_norm": 0.6547110080718994, + "learning_rate": 9.812146966951379e-05, + "loss": 2.5404, + "step": 10169 + }, + { + "epoch": 0.8207570010491486, + "grad_norm": 0.692592203617096, + "learning_rate": 9.810568558097774e-05, + "loss": 2.5625, + "step": 10170 + }, + { + "epoch": 0.8208377047857316, + "grad_norm": 0.6696702837944031, + "learning_rate": 9.808990153965296e-05, + "loss": 2.5866, + "step": 10171 + }, + { + "epoch": 0.8209184085223146, + "grad_norm": 0.6425998210906982, + "learning_rate": 9.807411754593282e-05, + "loss": 2.5487, + "step": 10172 + }, + { + "epoch": 0.8209991122588975, + "grad_norm": 0.6849769949913025, + "learning_rate": 9.805833360021069e-05, + "loss": 2.5772, + "step": 10173 + }, + { + "epoch": 0.8210798159954806, + "grad_norm": 0.7451414465904236, + "learning_rate": 9.804254970288001e-05, + "loss": 2.5089, + "step": 10174 + }, + { + "epoch": 0.8211605197320636, + "grad_norm": 0.7134390473365784, + "learning_rate": 9.802676585433408e-05, + "loss": 2.541, + "step": 10175 + }, + { + "epoch": 0.8212412234686466, + "grad_norm": 0.7490564584732056, + "learning_rate": 9.801098205496627e-05, + "loss": 2.5299, + "step": 10176 + }, + { + "epoch": 0.8213219272052296, + "grad_norm": 0.6614408493041992, + "learning_rate": 9.799519830517005e-05, + "loss": 2.5252, + "step": 10177 + }, + { + "epoch": 0.8214026309418127, + "grad_norm": 0.761049211025238, + "learning_rate": 9.797941460533869e-05, + "loss": 2.5153, + "step": 10178 + }, + { + "epoch": 0.8214833346783956, + "grad_norm": 0.6352702379226685, + "learning_rate": 9.796363095586561e-05, + "loss": 2.5407, + "step": 10179 + }, + { + "epoch": 0.8215640384149786, + "grad_norm": 0.684212863445282, + "learning_rate": 9.794784735714417e-05, + "loss": 2.5425, + "step": 10180 + }, + { + "epoch": 0.8216447421515616, + "grad_norm": 0.652987539768219, + "learning_rate": 9.793206380956772e-05, + "loss": 2.5542, + "step": 10181 + }, + { + "epoch": 0.8217254458881447, + "grad_norm": 0.6912897229194641, + "learning_rate": 9.791628031352966e-05, + "loss": 2.5041, + "step": 10182 + }, + { + "epoch": 0.8218061496247276, + "grad_norm": 0.7025408744812012, + "learning_rate": 9.790049686942333e-05, + "loss": 2.5296, + "step": 10183 + }, + { + "epoch": 0.8218868533613106, + "grad_norm": 0.7580777406692505, + "learning_rate": 9.788471347764215e-05, + "loss": 2.578, + "step": 10184 + }, + { + "epoch": 0.8219675570978936, + "grad_norm": 0.7044378519058228, + "learning_rate": 9.78689301385794e-05, + "loss": 2.5093, + "step": 10185 + }, + { + "epoch": 0.8220482608344767, + "grad_norm": 0.7339754700660706, + "learning_rate": 9.785314685262849e-05, + "loss": 2.5202, + "step": 10186 + }, + { + "epoch": 0.8221289645710597, + "grad_norm": 0.6872244477272034, + "learning_rate": 9.783736362018277e-05, + "loss": 2.541, + "step": 10187 + }, + { + "epoch": 0.8222096683076426, + "grad_norm": 0.7052434682846069, + "learning_rate": 9.78215804416356e-05, + "loss": 2.4968, + "step": 10188 + }, + { + "epoch": 0.8222903720442256, + "grad_norm": 0.6739610433578491, + "learning_rate": 9.780579731738033e-05, + "loss": 2.5137, + "step": 10189 + }, + { + "epoch": 0.8223710757808087, + "grad_norm": 0.6842939853668213, + "learning_rate": 9.779001424781035e-05, + "loss": 2.5329, + "step": 10190 + }, + { + "epoch": 0.8224517795173917, + "grad_norm": 0.7057977914810181, + "learning_rate": 9.777423123331898e-05, + "loss": 2.5657, + "step": 10191 + }, + { + "epoch": 0.8225324832539747, + "grad_norm": 0.6748424172401428, + "learning_rate": 9.775844827429958e-05, + "loss": 2.6104, + "step": 10192 + }, + { + "epoch": 0.8226131869905576, + "grad_norm": 0.6492514610290527, + "learning_rate": 9.774266537114555e-05, + "loss": 2.58, + "step": 10193 + }, + { + "epoch": 0.8226938907271407, + "grad_norm": 0.6987641453742981, + "learning_rate": 9.772688252425016e-05, + "loss": 2.5301, + "step": 10194 + }, + { + "epoch": 0.8227745944637237, + "grad_norm": 0.710921585559845, + "learning_rate": 9.771109973400679e-05, + "loss": 2.6245, + "step": 10195 + }, + { + "epoch": 0.8228552982003067, + "grad_norm": 0.6673738360404968, + "learning_rate": 9.769531700080883e-05, + "loss": 2.5205, + "step": 10196 + }, + { + "epoch": 0.8229360019368896, + "grad_norm": 0.6705252528190613, + "learning_rate": 9.767953432504958e-05, + "loss": 2.4932, + "step": 10197 + }, + { + "epoch": 0.8230167056734727, + "grad_norm": 0.6587076783180237, + "learning_rate": 9.766375170712237e-05, + "loss": 2.5085, + "step": 10198 + }, + { + "epoch": 0.8230974094100557, + "grad_norm": 0.7285338640213013, + "learning_rate": 9.764796914742061e-05, + "loss": 2.5481, + "step": 10199 + }, + { + "epoch": 0.8231781131466387, + "grad_norm": 0.6971831321716309, + "learning_rate": 9.763218664633763e-05, + "loss": 2.6092, + "step": 10200 + }, + { + "epoch": 0.8232588168832217, + "grad_norm": 0.6940265893936157, + "learning_rate": 9.761640420426669e-05, + "loss": 2.5325, + "step": 10201 + }, + { + "epoch": 0.8233395206198046, + "grad_norm": 0.6612978577613831, + "learning_rate": 9.76006218216012e-05, + "loss": 2.5532, + "step": 10202 + }, + { + "epoch": 0.8234202243563877, + "grad_norm": 0.6707638502120972, + "learning_rate": 9.758483949873453e-05, + "loss": 2.512, + "step": 10203 + }, + { + "epoch": 0.8235009280929707, + "grad_norm": 0.6636764407157898, + "learning_rate": 9.756905723605994e-05, + "loss": 2.5446, + "step": 10204 + }, + { + "epoch": 0.8235816318295537, + "grad_norm": 0.6996643543243408, + "learning_rate": 9.755327503397081e-05, + "loss": 2.5504, + "step": 10205 + }, + { + "epoch": 0.8236623355661367, + "grad_norm": 0.604487955570221, + "learning_rate": 9.753749289286046e-05, + "loss": 2.4767, + "step": 10206 + }, + { + "epoch": 0.8237430393027197, + "grad_norm": 0.6484553217887878, + "learning_rate": 9.752171081312222e-05, + "loss": 2.5522, + "step": 10207 + }, + { + "epoch": 0.8238237430393027, + "grad_norm": 0.6890987753868103, + "learning_rate": 9.75059287951494e-05, + "loss": 2.5545, + "step": 10208 + }, + { + "epoch": 0.8239044467758857, + "grad_norm": 0.6786034107208252, + "learning_rate": 9.749014683933541e-05, + "loss": 2.591, + "step": 10209 + }, + { + "epoch": 0.8239851505124687, + "grad_norm": 0.751192033290863, + "learning_rate": 9.747436494607349e-05, + "loss": 2.5335, + "step": 10210 + }, + { + "epoch": 0.8240658542490518, + "grad_norm": 0.6611589789390564, + "learning_rate": 9.7458583115757e-05, + "loss": 2.5104, + "step": 10211 + }, + { + "epoch": 0.8241465579856347, + "grad_norm": 0.6602892875671387, + "learning_rate": 9.744280134877926e-05, + "loss": 2.5319, + "step": 10212 + }, + { + "epoch": 0.8242272617222177, + "grad_norm": 0.6856467127799988, + "learning_rate": 9.742701964553359e-05, + "loss": 2.5418, + "step": 10213 + }, + { + "epoch": 0.8243079654588007, + "grad_norm": 0.6810153126716614, + "learning_rate": 9.741123800641332e-05, + "loss": 2.5691, + "step": 10214 + }, + { + "epoch": 0.8243886691953838, + "grad_norm": 0.7044229507446289, + "learning_rate": 9.739545643181175e-05, + "loss": 2.5911, + "step": 10215 + }, + { + "epoch": 0.8244693729319668, + "grad_norm": 0.6689271330833435, + "learning_rate": 9.737967492212225e-05, + "loss": 2.5374, + "step": 10216 + }, + { + "epoch": 0.8245500766685497, + "grad_norm": 0.6558904051780701, + "learning_rate": 9.736389347773807e-05, + "loss": 2.5118, + "step": 10217 + }, + { + "epoch": 0.8246307804051327, + "grad_norm": 0.6900291442871094, + "learning_rate": 9.734811209905255e-05, + "loss": 2.515, + "step": 10218 + }, + { + "epoch": 0.8247114841417158, + "grad_norm": 0.7129492163658142, + "learning_rate": 9.733233078645907e-05, + "loss": 2.5191, + "step": 10219 + }, + { + "epoch": 0.8247921878782988, + "grad_norm": 0.7031866908073425, + "learning_rate": 9.731654954035082e-05, + "loss": 2.5616, + "step": 10220 + }, + { + "epoch": 0.8248728916148818, + "grad_norm": 0.6418820023536682, + "learning_rate": 9.730076836112118e-05, + "loss": 2.537, + "step": 10221 + }, + { + "epoch": 0.8249535953514647, + "grad_norm": 0.6731035113334656, + "learning_rate": 9.728498724916347e-05, + "loss": 2.5483, + "step": 10222 + }, + { + "epoch": 0.8250342990880478, + "grad_norm": 0.6941342353820801, + "learning_rate": 9.726920620487096e-05, + "loss": 2.5314, + "step": 10223 + }, + { + "epoch": 0.8251150028246308, + "grad_norm": 0.6808927059173584, + "learning_rate": 9.725342522863696e-05, + "loss": 2.5521, + "step": 10224 + }, + { + "epoch": 0.8251957065612138, + "grad_norm": 0.6873155832290649, + "learning_rate": 9.723764432085481e-05, + "loss": 2.5205, + "step": 10225 + }, + { + "epoch": 0.8252764102977967, + "grad_norm": 0.8590287566184998, + "learning_rate": 9.722186348191776e-05, + "loss": 2.5378, + "step": 10226 + }, + { + "epoch": 0.8253571140343798, + "grad_norm": 0.691523015499115, + "learning_rate": 9.720608271221912e-05, + "loss": 2.5062, + "step": 10227 + }, + { + "epoch": 0.8254378177709628, + "grad_norm": 0.6695523262023926, + "learning_rate": 9.719030201215226e-05, + "loss": 2.5164, + "step": 10228 + }, + { + "epoch": 0.8255185215075458, + "grad_norm": 0.745516300201416, + "learning_rate": 9.717452138211037e-05, + "loss": 2.5207, + "step": 10229 + }, + { + "epoch": 0.8255992252441288, + "grad_norm": 0.6628115773200989, + "learning_rate": 9.715874082248679e-05, + "loss": 2.5293, + "step": 10230 + }, + { + "epoch": 0.8256799289807119, + "grad_norm": 0.6531884074211121, + "learning_rate": 9.714296033367482e-05, + "loss": 2.4812, + "step": 10231 + }, + { + "epoch": 0.8257606327172948, + "grad_norm": 0.7444833517074585, + "learning_rate": 9.712717991606777e-05, + "loss": 2.5422, + "step": 10232 + }, + { + "epoch": 0.8258413364538778, + "grad_norm": 0.7013139128684998, + "learning_rate": 9.711139957005888e-05, + "loss": 2.5117, + "step": 10233 + }, + { + "epoch": 0.8259220401904608, + "grad_norm": 0.6588132977485657, + "learning_rate": 9.709561929604147e-05, + "loss": 2.5257, + "step": 10234 + }, + { + "epoch": 0.8260027439270439, + "grad_norm": 0.7538537383079529, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5225, + "step": 10235 + }, + { + "epoch": 0.8260834476636268, + "grad_norm": Infinity, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5532, + "step": 10236 + }, + { + "epoch": 0.8261641514002098, + "grad_norm": 0.7414929270744324, + "learning_rate": 9.706405896555425e-05, + "loss": 2.5653, + "step": 10237 + }, + { + "epoch": 0.8262448551367928, + "grad_norm": 0.757057785987854, + "learning_rate": 9.704827890987097e-05, + "loss": 2.5732, + "step": 10238 + }, + { + "epoch": 0.8263255588733759, + "grad_norm": 0.730721652507782, + "learning_rate": 9.703249892775232e-05, + "loss": 2.5317, + "step": 10239 + }, + { + "epoch": 0.8264062626099589, + "grad_norm": 0.6943208575248718, + "learning_rate": 9.701671901959151e-05, + "loss": 2.5849, + "step": 10240 + }, + { + "epoch": 0.8264869663465418, + "grad_norm": 0.7111102938652039, + "learning_rate": 9.700093918578188e-05, + "loss": 2.5007, + "step": 10241 + }, + { + "epoch": 0.8265676700831248, + "grad_norm": 0.7240251302719116, + "learning_rate": 9.69851594267167e-05, + "loss": 2.5002, + "step": 10242 + }, + { + "epoch": 0.8266483738197079, + "grad_norm": 0.6624411344528198, + "learning_rate": 9.696937974278922e-05, + "loss": 2.5175, + "step": 10243 + }, + { + "epoch": 0.8267290775562909, + "grad_norm": 0.6972576975822449, + "learning_rate": 9.695360013439269e-05, + "loss": 2.5285, + "step": 10244 + }, + { + "epoch": 0.8268097812928739, + "grad_norm": 0.684446394443512, + "learning_rate": 9.693782060192046e-05, + "loss": 2.57, + "step": 10245 + }, + { + "epoch": 0.8268904850294568, + "grad_norm": 0.6920011639595032, + "learning_rate": 9.692204114576573e-05, + "loss": 2.5042, + "step": 10246 + }, + { + "epoch": 0.8269711887660399, + "grad_norm": 0.7526013851165771, + "learning_rate": 9.690626176632176e-05, + "loss": 2.5878, + "step": 10247 + }, + { + "epoch": 0.8270518925026229, + "grad_norm": 0.6936177611351013, + "learning_rate": 9.689048246398184e-05, + "loss": 2.5572, + "step": 10248 + }, + { + "epoch": 0.8271325962392059, + "grad_norm": 0.672168493270874, + "learning_rate": 9.687470323913922e-05, + "loss": 2.5127, + "step": 10249 + }, + { + "epoch": 0.8272132999757889, + "grad_norm": 0.6847899556159973, + "learning_rate": 9.685892409218717e-05, + "loss": 2.5443, + "step": 10250 + }, + { + "epoch": 0.8272940037123718, + "grad_norm": 0.6877103447914124, + "learning_rate": 9.684314502351894e-05, + "loss": 2.4924, + "step": 10251 + }, + { + "epoch": 0.8273747074489549, + "grad_norm": 0.6894243359565735, + "learning_rate": 9.682736603352783e-05, + "loss": 2.5107, + "step": 10252 + }, + { + "epoch": 0.8274554111855379, + "grad_norm": 0.7318278551101685, + "learning_rate": 9.681158712260698e-05, + "loss": 2.5276, + "step": 10253 + }, + { + "epoch": 0.8275361149221209, + "grad_norm": 0.6949039101600647, + "learning_rate": 9.679580829114975e-05, + "loss": 2.5128, + "step": 10254 + }, + { + "epoch": 0.8276168186587038, + "grad_norm": 0.6523800492286682, + "learning_rate": 9.678002953954939e-05, + "loss": 2.5584, + "step": 10255 + }, + { + "epoch": 0.8276975223952869, + "grad_norm": 0.6914480328559875, + "learning_rate": 9.676425086819905e-05, + "loss": 2.5597, + "step": 10256 + }, + { + "epoch": 0.8277782261318699, + "grad_norm": 0.7107869982719421, + "learning_rate": 9.674847227749206e-05, + "loss": 2.5009, + "step": 10257 + }, + { + "epoch": 0.8278589298684529, + "grad_norm": 0.7066758275032043, + "learning_rate": 9.673269376782166e-05, + "loss": 2.4599, + "step": 10258 + }, + { + "epoch": 0.8279396336050359, + "grad_norm": 0.7147037982940674, + "learning_rate": 9.671691533958104e-05, + "loss": 2.4478, + "step": 10259 + }, + { + "epoch": 0.828020337341619, + "grad_norm": 0.666265606880188, + "learning_rate": 9.670113699316347e-05, + "loss": 2.5652, + "step": 10260 + }, + { + "epoch": 0.8281010410782019, + "grad_norm": 0.7026315927505493, + "learning_rate": 9.668535872896225e-05, + "loss": 2.5397, + "step": 10261 + }, + { + "epoch": 0.8281817448147849, + "grad_norm": 0.6611438393592834, + "learning_rate": 9.66695805473705e-05, + "loss": 2.5628, + "step": 10262 + }, + { + "epoch": 0.8282624485513679, + "grad_norm": 0.7211201190948486, + "learning_rate": 9.66538024487815e-05, + "loss": 2.5551, + "step": 10263 + }, + { + "epoch": 0.828343152287951, + "grad_norm": 0.7224553227424622, + "learning_rate": 9.663802443358849e-05, + "loss": 2.5329, + "step": 10264 + }, + { + "epoch": 0.8284238560245339, + "grad_norm": 0.6805843710899353, + "learning_rate": 9.662224650218474e-05, + "loss": 2.5744, + "step": 10265 + }, + { + "epoch": 0.8285045597611169, + "grad_norm": 0.7101335525512695, + "learning_rate": 9.66064686549634e-05, + "loss": 2.5281, + "step": 10266 + }, + { + "epoch": 0.8285852634976999, + "grad_norm": 0.7208443284034729, + "learning_rate": 9.659069089231774e-05, + "loss": 2.5326, + "step": 10267 + }, + { + "epoch": 0.828665967234283, + "grad_norm": 0.747894287109375, + "learning_rate": 9.6574913214641e-05, + "loss": 2.4909, + "step": 10268 + }, + { + "epoch": 0.828746670970866, + "grad_norm": 0.6618027091026306, + "learning_rate": 9.655913562232635e-05, + "loss": 2.6091, + "step": 10269 + }, + { + "epoch": 0.8288273747074489, + "grad_norm": 0.7101535201072693, + "learning_rate": 9.654335811576704e-05, + "loss": 2.5194, + "step": 10270 + }, + { + "epoch": 0.8289080784440319, + "grad_norm": 0.727763831615448, + "learning_rate": 9.652758069535631e-05, + "loss": 2.5767, + "step": 10271 + }, + { + "epoch": 0.828988782180615, + "grad_norm": 0.6936737895011902, + "learning_rate": 9.65118033614873e-05, + "loss": 2.498, + "step": 10272 + }, + { + "epoch": 0.829069485917198, + "grad_norm": 0.699462354183197, + "learning_rate": 9.64960261145533e-05, + "loss": 2.5033, + "step": 10273 + }, + { + "epoch": 0.829150189653781, + "grad_norm": 0.7024868726730347, + "learning_rate": 9.648024895494749e-05, + "loss": 2.5937, + "step": 10274 + }, + { + "epoch": 0.8292308933903639, + "grad_norm": 0.7028421759605408, + "learning_rate": 9.646447188306305e-05, + "loss": 2.5528, + "step": 10275 + }, + { + "epoch": 0.829311597126947, + "grad_norm": 0.7216476202011108, + "learning_rate": 9.644869489929321e-05, + "loss": 2.5298, + "step": 10276 + }, + { + "epoch": 0.82939230086353, + "grad_norm": 0.6815251111984253, + "learning_rate": 9.643291800403123e-05, + "loss": 2.5138, + "step": 10277 + }, + { + "epoch": 0.829473004600113, + "grad_norm": 0.6961970925331116, + "learning_rate": 9.64171411976702e-05, + "loss": 2.5441, + "step": 10278 + }, + { + "epoch": 0.829553708336696, + "grad_norm": 0.7317311763763428, + "learning_rate": 9.640136448060337e-05, + "loss": 2.5885, + "step": 10279 + }, + { + "epoch": 0.829634412073279, + "grad_norm": 0.729086697101593, + "learning_rate": 9.638558785322396e-05, + "loss": 2.475, + "step": 10280 + }, + { + "epoch": 0.829715115809862, + "grad_norm": 0.7790165543556213, + "learning_rate": 9.636981131592521e-05, + "loss": 2.5538, + "step": 10281 + }, + { + "epoch": 0.829795819546445, + "grad_norm": 0.7066864967346191, + "learning_rate": 9.635403486910018e-05, + "loss": 2.5916, + "step": 10282 + }, + { + "epoch": 0.829876523283028, + "grad_norm": 0.7070252299308777, + "learning_rate": 9.633825851314215e-05, + "loss": 2.5879, + "step": 10283 + }, + { + "epoch": 0.829957227019611, + "grad_norm": 0.7604004740715027, + "learning_rate": 9.63224822484443e-05, + "loss": 2.5298, + "step": 10284 + }, + { + "epoch": 0.830037930756194, + "grad_norm": 0.7548386454582214, + "learning_rate": 9.63067060753998e-05, + "loss": 2.5313, + "step": 10285 + }, + { + "epoch": 0.830118634492777, + "grad_norm": 0.7241540551185608, + "learning_rate": 9.629092999440183e-05, + "loss": 2.5498, + "step": 10286 + }, + { + "epoch": 0.83019933822936, + "grad_norm": 0.6748291850090027, + "learning_rate": 9.627515400584361e-05, + "loss": 2.523, + "step": 10287 + }, + { + "epoch": 0.8302800419659431, + "grad_norm": 0.6624683141708374, + "learning_rate": 9.625937811011826e-05, + "loss": 2.568, + "step": 10288 + }, + { + "epoch": 0.830360745702526, + "grad_norm": 0.6681114435195923, + "learning_rate": 9.624360230761899e-05, + "loss": 2.5255, + "step": 10289 + }, + { + "epoch": 0.830441449439109, + "grad_norm": 0.6895325183868408, + "learning_rate": 9.622782659873899e-05, + "loss": 2.5275, + "step": 10290 + }, + { + "epoch": 0.830522153175692, + "grad_norm": 0.7257826924324036, + "learning_rate": 9.621205098387137e-05, + "loss": 2.5102, + "step": 10291 + }, + { + "epoch": 0.8306028569122751, + "grad_norm": 0.6567066311836243, + "learning_rate": 9.619627546340935e-05, + "loss": 2.5721, + "step": 10292 + }, + { + "epoch": 0.8306835606488581, + "grad_norm": 0.6571428179740906, + "learning_rate": 9.61805000377461e-05, + "loss": 2.5014, + "step": 10293 + }, + { + "epoch": 0.830764264385441, + "grad_norm": 0.7807042598724365, + "learning_rate": 9.61647247072748e-05, + "loss": 2.632, + "step": 10294 + }, + { + "epoch": 0.830844968122024, + "grad_norm": 0.6688913702964783, + "learning_rate": 9.614894947238854e-05, + "loss": 2.5457, + "step": 10295 + }, + { + "epoch": 0.8309256718586071, + "grad_norm": 0.7769338488578796, + "learning_rate": 9.613317433348055e-05, + "loss": 2.4775, + "step": 10296 + }, + { + "epoch": 0.8310063755951901, + "grad_norm": 0.7089162468910217, + "learning_rate": 9.611739929094399e-05, + "loss": 2.4887, + "step": 10297 + }, + { + "epoch": 0.8310870793317731, + "grad_norm": 0.6901174783706665, + "learning_rate": 9.610162434517196e-05, + "loss": 2.6127, + "step": 10298 + }, + { + "epoch": 0.831167783068356, + "grad_norm": 0.6862173676490784, + "learning_rate": 9.608584949655764e-05, + "loss": 2.5432, + "step": 10299 + }, + { + "epoch": 0.8312484868049391, + "grad_norm": 0.6789367198944092, + "learning_rate": 9.607007474549418e-05, + "loss": 2.5135, + "step": 10300 + }, + { + "epoch": 0.8313291905415221, + "grad_norm": 0.6548805832862854, + "learning_rate": 9.605430009237474e-05, + "loss": 2.5466, + "step": 10301 + }, + { + "epoch": 0.8314098942781051, + "grad_norm": 0.6873800158500671, + "learning_rate": 9.603852553759244e-05, + "loss": 2.4954, + "step": 10302 + }, + { + "epoch": 0.831490598014688, + "grad_norm": 0.6816138029098511, + "learning_rate": 9.602275108154046e-05, + "loss": 2.5556, + "step": 10303 + }, + { + "epoch": 0.831571301751271, + "grad_norm": 0.6890314221382141, + "learning_rate": 9.600697672461189e-05, + "loss": 2.5253, + "step": 10304 + }, + { + "epoch": 0.8316520054878541, + "grad_norm": 0.6217427849769592, + "learning_rate": 9.599120246719992e-05, + "loss": 2.53, + "step": 10305 + }, + { + "epoch": 0.8317327092244371, + "grad_norm": 0.6638299226760864, + "learning_rate": 9.59754283096977e-05, + "loss": 2.5323, + "step": 10306 + }, + { + "epoch": 0.8318134129610201, + "grad_norm": 0.6834245920181274, + "learning_rate": 9.595965425249828e-05, + "loss": 2.5339, + "step": 10307 + }, + { + "epoch": 0.831894116697603, + "grad_norm": 0.8013476729393005, + "learning_rate": 9.594388029599484e-05, + "loss": 2.4925, + "step": 10308 + }, + { + "epoch": 0.8319748204341861, + "grad_norm": 0.7677187323570251, + "learning_rate": 9.592810644058049e-05, + "loss": 2.5717, + "step": 10309 + }, + { + "epoch": 0.8320555241707691, + "grad_norm": 0.6558046340942383, + "learning_rate": 9.591233268664841e-05, + "loss": 2.5631, + "step": 10310 + }, + { + "epoch": 0.8321362279073521, + "grad_norm": 0.6648481488227844, + "learning_rate": 9.589655903459165e-05, + "loss": 2.5232, + "step": 10311 + }, + { + "epoch": 0.8322169316439351, + "grad_norm": 0.6907756328582764, + "learning_rate": 9.588078548480338e-05, + "loss": 2.4804, + "step": 10312 + }, + { + "epoch": 0.8322976353805182, + "grad_norm": 0.6924928426742554, + "learning_rate": 9.586501203767675e-05, + "loss": 2.4648, + "step": 10313 + }, + { + "epoch": 0.8323783391171011, + "grad_norm": 0.7654799222946167, + "learning_rate": 9.584923869360477e-05, + "loss": 2.6184, + "step": 10314 + }, + { + "epoch": 0.8324590428536841, + "grad_norm": 0.7056179046630859, + "learning_rate": 9.58334654529806e-05, + "loss": 2.5862, + "step": 10315 + }, + { + "epoch": 0.8325397465902671, + "grad_norm": 0.7245064973831177, + "learning_rate": 9.581769231619743e-05, + "loss": 2.4866, + "step": 10316 + }, + { + "epoch": 0.8326204503268502, + "grad_norm": 0.6782355308532715, + "learning_rate": 9.580191928364824e-05, + "loss": 2.5519, + "step": 10317 + }, + { + "epoch": 0.8327011540634331, + "grad_norm": 0.6910805106163025, + "learning_rate": 9.578614635572621e-05, + "loss": 2.542, + "step": 10318 + }, + { + "epoch": 0.8327818578000161, + "grad_norm": 0.6858026385307312, + "learning_rate": 9.577037353282444e-05, + "loss": 2.5601, + "step": 10319 + }, + { + "epoch": 0.8328625615365991, + "grad_norm": 0.6886423230171204, + "learning_rate": 9.5754600815336e-05, + "loss": 2.5817, + "step": 10320 + }, + { + "epoch": 0.8329432652731822, + "grad_norm": 0.7585750818252563, + "learning_rate": 9.573882820365402e-05, + "loss": 2.5153, + "step": 10321 + }, + { + "epoch": 0.8330239690097652, + "grad_norm": 0.7004472613334656, + "learning_rate": 9.57230556981716e-05, + "loss": 2.5456, + "step": 10322 + }, + { + "epoch": 0.8331046727463481, + "grad_norm": 0.6530508399009705, + "learning_rate": 9.570728329928179e-05, + "loss": 2.5453, + "step": 10323 + }, + { + "epoch": 0.8331853764829311, + "grad_norm": 0.6767956614494324, + "learning_rate": 9.569151100737769e-05, + "loss": 2.5311, + "step": 10324 + }, + { + "epoch": 0.8332660802195142, + "grad_norm": 0.6835905909538269, + "learning_rate": 9.56757388228524e-05, + "loss": 2.5417, + "step": 10325 + }, + { + "epoch": 0.8333467839560972, + "grad_norm": 0.6582748889923096, + "learning_rate": 9.565996674609901e-05, + "loss": 2.5144, + "step": 10326 + }, + { + "epoch": 0.8334274876926802, + "grad_norm": 0.6815205216407776, + "learning_rate": 9.56441947775106e-05, + "loss": 2.5272, + "step": 10327 + }, + { + "epoch": 0.8335081914292631, + "grad_norm": 0.6810150146484375, + "learning_rate": 9.562842291748022e-05, + "loss": 2.5475, + "step": 10328 + }, + { + "epoch": 0.8335888951658462, + "grad_norm": 0.7220990657806396, + "learning_rate": 9.5612651166401e-05, + "loss": 2.54, + "step": 10329 + }, + { + "epoch": 0.8336695989024292, + "grad_norm": 0.6840164065361023, + "learning_rate": 9.559687952466596e-05, + "loss": 2.5987, + "step": 10330 + }, + { + "epoch": 0.8337503026390122, + "grad_norm": 0.7085031867027283, + "learning_rate": 9.558110799266819e-05, + "loss": 2.5674, + "step": 10331 + }, + { + "epoch": 0.8338310063755952, + "grad_norm": 0.6658117175102234, + "learning_rate": 9.55653365708008e-05, + "loss": 2.5793, + "step": 10332 + }, + { + "epoch": 0.8339117101121782, + "grad_norm": 0.782648503780365, + "learning_rate": 9.554956525945677e-05, + "loss": 2.5463, + "step": 10333 + }, + { + "epoch": 0.8339924138487612, + "grad_norm": 0.6999937891960144, + "learning_rate": 9.553379405902922e-05, + "loss": 2.5961, + "step": 10334 + }, + { + "epoch": 0.8340731175853442, + "grad_norm": 0.6681220531463623, + "learning_rate": 9.55180229699112e-05, + "loss": 2.6055, + "step": 10335 + }, + { + "epoch": 0.8341538213219272, + "grad_norm": 0.7127133011817932, + "learning_rate": 9.550225199249577e-05, + "loss": 2.5571, + "step": 10336 + }, + { + "epoch": 0.8342345250585103, + "grad_norm": 0.6939001679420471, + "learning_rate": 9.548648112717596e-05, + "loss": 2.5653, + "step": 10337 + }, + { + "epoch": 0.8343152287950932, + "grad_norm": 0.7483924031257629, + "learning_rate": 9.547071037434487e-05, + "loss": 2.5316, + "step": 10338 + }, + { + "epoch": 0.8343959325316762, + "grad_norm": 0.7975850105285645, + "learning_rate": 9.545493973439548e-05, + "loss": 2.6039, + "step": 10339 + }, + { + "epoch": 0.8344766362682592, + "grad_norm": 0.6893026232719421, + "learning_rate": 9.543916920772087e-05, + "loss": 2.5797, + "step": 10340 + }, + { + "epoch": 0.8345573400048423, + "grad_norm": 0.752869188785553, + "learning_rate": 9.542339879471409e-05, + "loss": 2.5677, + "step": 10341 + }, + { + "epoch": 0.8346380437414253, + "grad_norm": 0.7336339354515076, + "learning_rate": 9.540762849576822e-05, + "loss": 2.5212, + "step": 10342 + }, + { + "epoch": 0.8347187474780082, + "grad_norm": 0.7742713689804077, + "learning_rate": 9.539185831127621e-05, + "loss": 2.5599, + "step": 10343 + }, + { + "epoch": 0.8347994512145912, + "grad_norm": 0.7205352783203125, + "learning_rate": 9.537608824163114e-05, + "loss": 2.5591, + "step": 10344 + }, + { + "epoch": 0.8348801549511743, + "grad_norm": 0.7794787287712097, + "learning_rate": 9.536031828722605e-05, + "loss": 2.5858, + "step": 10345 + }, + { + "epoch": 0.8349608586877573, + "grad_norm": 0.7129528522491455, + "learning_rate": 9.534454844845396e-05, + "loss": 2.5591, + "step": 10346 + }, + { + "epoch": 0.8350415624243402, + "grad_norm": 0.731038510799408, + "learning_rate": 9.532877872570787e-05, + "loss": 2.5774, + "step": 10347 + }, + { + "epoch": 0.8351222661609232, + "grad_norm": 0.7706510424613953, + "learning_rate": 9.531300911938087e-05, + "loss": 2.6102, + "step": 10348 + }, + { + "epoch": 0.8352029698975063, + "grad_norm": 0.6890363097190857, + "learning_rate": 9.52972396298659e-05, + "loss": 2.5393, + "step": 10349 + }, + { + "epoch": 0.8352836736340893, + "grad_norm": 0.6792402863502502, + "learning_rate": 9.528147025755601e-05, + "loss": 2.5607, + "step": 10350 + }, + { + "epoch": 0.8353643773706723, + "grad_norm": 0.7097377777099609, + "learning_rate": 9.526570100284422e-05, + "loss": 2.5681, + "step": 10351 + }, + { + "epoch": 0.8354450811072552, + "grad_norm": 0.7530940771102905, + "learning_rate": 9.524993186612353e-05, + "loss": 2.5405, + "step": 10352 + }, + { + "epoch": 0.8355257848438382, + "grad_norm": 0.714080810546875, + "learning_rate": 9.523416284778696e-05, + "loss": 2.5365, + "step": 10353 + }, + { + "epoch": 0.8356064885804213, + "grad_norm": 0.6745832562446594, + "learning_rate": 9.521839394822752e-05, + "loss": 2.5553, + "step": 10354 + }, + { + "epoch": 0.8356871923170043, + "grad_norm": 0.7163450121879578, + "learning_rate": 9.52026251678382e-05, + "loss": 2.5074, + "step": 10355 + }, + { + "epoch": 0.8357678960535873, + "grad_norm": 0.6876534223556519, + "learning_rate": 9.518685650701197e-05, + "loss": 2.5652, + "step": 10356 + }, + { + "epoch": 0.8358485997901702, + "grad_norm": 0.6424533128738403, + "learning_rate": 9.517108796614187e-05, + "loss": 2.4823, + "step": 10357 + }, + { + "epoch": 0.8359293035267533, + "grad_norm": 0.646802544593811, + "learning_rate": 9.515531954562094e-05, + "loss": 2.5602, + "step": 10358 + }, + { + "epoch": 0.8360100072633363, + "grad_norm": 0.7266993522644043, + "learning_rate": 9.513955124584205e-05, + "loss": 2.5384, + "step": 10359 + }, + { + "epoch": 0.8360907109999193, + "grad_norm": 0.7358742356300354, + "learning_rate": 9.512378306719826e-05, + "loss": 2.5798, + "step": 10360 + }, + { + "epoch": 0.8361714147365022, + "grad_norm": 0.7191498279571533, + "learning_rate": 9.510801501008256e-05, + "loss": 2.5229, + "step": 10361 + }, + { + "epoch": 0.8362521184730853, + "grad_norm": 0.7058876156806946, + "learning_rate": 9.509224707488788e-05, + "loss": 2.5146, + "step": 10362 + }, + { + "epoch": 0.8363328222096683, + "grad_norm": 0.7348346710205078, + "learning_rate": 9.507647926200725e-05, + "loss": 2.5878, + "step": 10363 + }, + { + "epoch": 0.8364135259462513, + "grad_norm": 0.7464115619659424, + "learning_rate": 9.506071157183366e-05, + "loss": 2.6056, + "step": 10364 + }, + { + "epoch": 0.8364942296828343, + "grad_norm": 0.7077332139015198, + "learning_rate": 9.504494400476e-05, + "loss": 2.5161, + "step": 10365 + }, + { + "epoch": 0.8365749334194174, + "grad_norm": 0.7381827235221863, + "learning_rate": 9.502917656117928e-05, + "loss": 2.519, + "step": 10366 + }, + { + "epoch": 0.8366556371560003, + "grad_norm": 0.743180513381958, + "learning_rate": 9.501340924148452e-05, + "loss": 2.6149, + "step": 10367 + }, + { + "epoch": 0.8367363408925833, + "grad_norm": 0.6496078372001648, + "learning_rate": 9.499764204606863e-05, + "loss": 2.4969, + "step": 10368 + }, + { + "epoch": 0.8368170446291663, + "grad_norm": 0.6796541810035706, + "learning_rate": 9.498187497532454e-05, + "loss": 2.5304, + "step": 10369 + }, + { + "epoch": 0.8368977483657494, + "grad_norm": 0.6555948853492737, + "learning_rate": 9.496610802964529e-05, + "loss": 2.6029, + "step": 10370 + }, + { + "epoch": 0.8369784521023323, + "grad_norm": 0.6990405321121216, + "learning_rate": 9.495034120942374e-05, + "loss": 2.5286, + "step": 10371 + }, + { + "epoch": 0.8370591558389153, + "grad_norm": 0.7417613863945007, + "learning_rate": 9.49345745150529e-05, + "loss": 2.5301, + "step": 10372 + }, + { + "epoch": 0.8371398595754983, + "grad_norm": 0.6809872388839722, + "learning_rate": 9.49188079469257e-05, + "loss": 2.5075, + "step": 10373 + }, + { + "epoch": 0.8372205633120814, + "grad_norm": 0.6537099480628967, + "learning_rate": 9.490304150543514e-05, + "loss": 2.5515, + "step": 10374 + }, + { + "epoch": 0.8373012670486644, + "grad_norm": 0.6660431027412415, + "learning_rate": 9.488727519097407e-05, + "loss": 2.549, + "step": 10375 + }, + { + "epoch": 0.8373819707852473, + "grad_norm": 0.7257838249206543, + "learning_rate": 9.487150900393546e-05, + "loss": 2.546, + "step": 10376 + }, + { + "epoch": 0.8374626745218303, + "grad_norm": 0.742085874080658, + "learning_rate": 9.485574294471226e-05, + "loss": 2.5302, + "step": 10377 + }, + { + "epoch": 0.8375433782584134, + "grad_norm": 0.659934401512146, + "learning_rate": 9.48399770136974e-05, + "loss": 2.5553, + "step": 10378 + }, + { + "epoch": 0.8376240819949964, + "grad_norm": 0.7219613790512085, + "learning_rate": 9.482421121128377e-05, + "loss": 2.6186, + "step": 10379 + }, + { + "epoch": 0.8377047857315794, + "grad_norm": 0.706444263458252, + "learning_rate": 9.480844553786436e-05, + "loss": 2.5082, + "step": 10380 + }, + { + "epoch": 0.8377854894681623, + "grad_norm": 0.7527014017105103, + "learning_rate": 9.479267999383204e-05, + "loss": 2.5625, + "step": 10381 + }, + { + "epoch": 0.8378661932047454, + "grad_norm": 0.7488746643066406, + "learning_rate": 9.477691457957976e-05, + "loss": 2.528, + "step": 10382 + }, + { + "epoch": 0.8379468969413284, + "grad_norm": 0.7394229173660278, + "learning_rate": 9.476114929550045e-05, + "loss": 2.5387, + "step": 10383 + }, + { + "epoch": 0.8380276006779114, + "grad_norm": 0.7490981817245483, + "learning_rate": 9.474538414198695e-05, + "loss": 2.548, + "step": 10384 + }, + { + "epoch": 0.8381083044144944, + "grad_norm": 0.7203173041343689, + "learning_rate": 9.472961911943222e-05, + "loss": 2.5547, + "step": 10385 + }, + { + "epoch": 0.8381890081510774, + "grad_norm": 0.6929850578308105, + "learning_rate": 9.471385422822917e-05, + "loss": 2.4831, + "step": 10386 + }, + { + "epoch": 0.8382697118876604, + "grad_norm": 0.6303263902664185, + "learning_rate": 9.469808946877067e-05, + "loss": 2.4569, + "step": 10387 + }, + { + "epoch": 0.8383504156242434, + "grad_norm": 0.6986981630325317, + "learning_rate": 9.468232484144964e-05, + "loss": 2.5278, + "step": 10388 + }, + { + "epoch": 0.8384311193608264, + "grad_norm": 0.6910964846611023, + "learning_rate": 9.466656034665898e-05, + "loss": 2.5657, + "step": 10389 + }, + { + "epoch": 0.8385118230974095, + "grad_norm": 0.6571134924888611, + "learning_rate": 9.465079598479163e-05, + "loss": 2.6017, + "step": 10390 + }, + { + "epoch": 0.8385925268339924, + "grad_norm": 0.7117733359336853, + "learning_rate": 9.463503175624034e-05, + "loss": 2.56, + "step": 10391 + }, + { + "epoch": 0.8386732305705754, + "grad_norm": 0.7052998542785645, + "learning_rate": 9.461926766139813e-05, + "loss": 2.4998, + "step": 10392 + }, + { + "epoch": 0.8387539343071584, + "grad_norm": 0.7306597232818604, + "learning_rate": 9.460350370065786e-05, + "loss": 2.5292, + "step": 10393 + }, + { + "epoch": 0.8388346380437415, + "grad_norm": 0.681069552898407, + "learning_rate": 9.458773987441235e-05, + "loss": 2.5469, + "step": 10394 + }, + { + "epoch": 0.8389153417803245, + "grad_norm": 0.6681767702102661, + "learning_rate": 9.45719761830545e-05, + "loss": 2.5476, + "step": 10395 + }, + { + "epoch": 0.8389960455169074, + "grad_norm": 0.6759339570999146, + "learning_rate": 9.455621262697723e-05, + "loss": 2.4806, + "step": 10396 + }, + { + "epoch": 0.8390767492534904, + "grad_norm": 0.695829451084137, + "learning_rate": 9.454044920657333e-05, + "loss": 2.5255, + "step": 10397 + }, + { + "epoch": 0.8391574529900735, + "grad_norm": 0.686568558216095, + "learning_rate": 9.452468592223572e-05, + "loss": 2.5655, + "step": 10398 + }, + { + "epoch": 0.8392381567266565, + "grad_norm": 0.6529035568237305, + "learning_rate": 9.45089227743573e-05, + "loss": 2.5026, + "step": 10399 + }, + { + "epoch": 0.8393188604632394, + "grad_norm": 0.6809061765670776, + "learning_rate": 9.449315976333082e-05, + "loss": 2.5549, + "step": 10400 + }, + { + "epoch": 0.8393995641998224, + "grad_norm": 0.6920269727706909, + "learning_rate": 9.447739688954919e-05, + "loss": 2.517, + "step": 10401 + }, + { + "epoch": 0.8394802679364055, + "grad_norm": 0.6626712083816528, + "learning_rate": 9.446163415340526e-05, + "loss": 2.605, + "step": 10402 + }, + { + "epoch": 0.8395609716729885, + "grad_norm": 0.6912916898727417, + "learning_rate": 9.444587155529195e-05, + "loss": 2.588, + "step": 10403 + }, + { + "epoch": 0.8396416754095715, + "grad_norm": 0.6771352291107178, + "learning_rate": 9.443010909560198e-05, + "loss": 2.5148, + "step": 10404 + }, + { + "epoch": 0.8397223791461544, + "grad_norm": 0.7015509009361267, + "learning_rate": 9.441434677472827e-05, + "loss": 2.5425, + "step": 10405 + }, + { + "epoch": 0.8398030828827374, + "grad_norm": 0.6789976358413696, + "learning_rate": 9.439858459306364e-05, + "loss": 2.598, + "step": 10406 + }, + { + "epoch": 0.8398837866193205, + "grad_norm": 0.674391508102417, + "learning_rate": 9.438282255100091e-05, + "loss": 2.5581, + "step": 10407 + }, + { + "epoch": 0.8399644903559035, + "grad_norm": 0.6944772005081177, + "learning_rate": 9.436706064893294e-05, + "loss": 2.5591, + "step": 10408 + }, + { + "epoch": 0.8400451940924865, + "grad_norm": 0.6750832200050354, + "learning_rate": 9.435129888725259e-05, + "loss": 2.533, + "step": 10409 + }, + { + "epoch": 0.8401258978290694, + "grad_norm": 0.6927465200424194, + "learning_rate": 9.433553726635257e-05, + "loss": 2.536, + "step": 10410 + }, + { + "epoch": 0.8402066015656525, + "grad_norm": 0.6399651765823364, + "learning_rate": 9.431977578662578e-05, + "loss": 2.5123, + "step": 10411 + }, + { + "epoch": 0.8402873053022355, + "grad_norm": 0.7588143944740295, + "learning_rate": 9.430401444846505e-05, + "loss": 2.6133, + "step": 10412 + }, + { + "epoch": 0.8403680090388185, + "grad_norm": 0.8010972738265991, + "learning_rate": 9.428825325226313e-05, + "loss": 2.5407, + "step": 10413 + }, + { + "epoch": 0.8404487127754015, + "grad_norm": 0.6847307085990906, + "learning_rate": 9.427249219841288e-05, + "loss": 2.5912, + "step": 10414 + }, + { + "epoch": 0.8405294165119845, + "grad_norm": 0.7005963325500488, + "learning_rate": 9.425673128730716e-05, + "loss": 2.5059, + "step": 10415 + }, + { + "epoch": 0.8406101202485675, + "grad_norm": 0.7383962273597717, + "learning_rate": 9.424097051933862e-05, + "loss": 2.5157, + "step": 10416 + }, + { + "epoch": 0.8406908239851505, + "grad_norm": 0.7078843712806702, + "learning_rate": 9.422520989490018e-05, + "loss": 2.6093, + "step": 10417 + }, + { + "epoch": 0.8407715277217335, + "grad_norm": 0.7449501752853394, + "learning_rate": 9.42094494143846e-05, + "loss": 2.594, + "step": 10418 + }, + { + "epoch": 0.8408522314583166, + "grad_norm": 0.6823872923851013, + "learning_rate": 9.419368907818473e-05, + "loss": 2.5653, + "step": 10419 + }, + { + "epoch": 0.8409329351948995, + "grad_norm": 0.7403056025505066, + "learning_rate": 9.417792888669325e-05, + "loss": 2.5296, + "step": 10420 + }, + { + "epoch": 0.8410136389314825, + "grad_norm": 0.6858980655670166, + "learning_rate": 9.4162168840303e-05, + "loss": 2.5401, + "step": 10421 + }, + { + "epoch": 0.8410943426680655, + "grad_norm": 0.692348837852478, + "learning_rate": 9.41464089394068e-05, + "loss": 2.4797, + "step": 10422 + }, + { + "epoch": 0.8411750464046486, + "grad_norm": 0.6939836144447327, + "learning_rate": 9.413064918439736e-05, + "loss": 2.505, + "step": 10423 + }, + { + "epoch": 0.8412557501412316, + "grad_norm": 0.7334314584732056, + "learning_rate": 9.411488957566748e-05, + "loss": 2.5792, + "step": 10424 + }, + { + "epoch": 0.8413364538778145, + "grad_norm": 0.6977920532226562, + "learning_rate": 9.409913011360999e-05, + "loss": 2.5204, + "step": 10425 + }, + { + "epoch": 0.8414171576143975, + "grad_norm": 0.7121822834014893, + "learning_rate": 9.408337079861756e-05, + "loss": 2.571, + "step": 10426 + }, + { + "epoch": 0.8414978613509806, + "grad_norm": 0.761476993560791, + "learning_rate": 9.406761163108297e-05, + "loss": 2.5845, + "step": 10427 + }, + { + "epoch": 0.8415785650875636, + "grad_norm": 0.7160221934318542, + "learning_rate": 9.405185261139906e-05, + "loss": 2.5331, + "step": 10428 + }, + { + "epoch": 0.8416592688241465, + "grad_norm": 0.6828827857971191, + "learning_rate": 9.40360937399585e-05, + "loss": 2.5596, + "step": 10429 + }, + { + "epoch": 0.8417399725607295, + "grad_norm": 0.756473183631897, + "learning_rate": 9.402033501715406e-05, + "loss": 2.6107, + "step": 10430 + }, + { + "epoch": 0.8418206762973126, + "grad_norm": 0.7486895322799683, + "learning_rate": 9.400457644337853e-05, + "loss": 2.5388, + "step": 10431 + }, + { + "epoch": 0.8419013800338956, + "grad_norm": 0.7759146690368652, + "learning_rate": 9.398881801902461e-05, + "loss": 2.5559, + "step": 10432 + }, + { + "epoch": 0.8419820837704786, + "grad_norm": 0.71756911277771, + "learning_rate": 9.397305974448506e-05, + "loss": 2.6109, + "step": 10433 + }, + { + "epoch": 0.8420627875070615, + "grad_norm": 0.7741644382476807, + "learning_rate": 9.395730162015261e-05, + "loss": 2.5664, + "step": 10434 + }, + { + "epoch": 0.8421434912436446, + "grad_norm": 0.7155938744544983, + "learning_rate": 9.394154364642006e-05, + "loss": 2.5693, + "step": 10435 + }, + { + "epoch": 0.8422241949802276, + "grad_norm": 0.6862725019454956, + "learning_rate": 9.392578582368002e-05, + "loss": 2.4942, + "step": 10436 + }, + { + "epoch": 0.8423048987168106, + "grad_norm": 0.6698417067527771, + "learning_rate": 9.391002815232528e-05, + "loss": 2.5258, + "step": 10437 + }, + { + "epoch": 0.8423856024533936, + "grad_norm": 0.7756468057632446, + "learning_rate": 9.389427063274858e-05, + "loss": 2.5008, + "step": 10438 + }, + { + "epoch": 0.8424663061899766, + "grad_norm": 0.6579857468605042, + "learning_rate": 9.387851326534259e-05, + "loss": 2.5335, + "step": 10439 + }, + { + "epoch": 0.8425470099265596, + "grad_norm": 0.7673436403274536, + "learning_rate": 9.386275605050006e-05, + "loss": 2.5646, + "step": 10440 + }, + { + "epoch": 0.8426277136631426, + "grad_norm": 0.7377188205718994, + "learning_rate": 9.384699898861372e-05, + "loss": 2.568, + "step": 10441 + }, + { + "epoch": 0.8427084173997256, + "grad_norm": 0.6502123475074768, + "learning_rate": 9.38312420800762e-05, + "loss": 2.6091, + "step": 10442 + }, + { + "epoch": 0.8427891211363087, + "grad_norm": 0.729852020740509, + "learning_rate": 9.381548532528026e-05, + "loss": 2.4873, + "step": 10443 + }, + { + "epoch": 0.8428698248728916, + "grad_norm": 0.7419102191925049, + "learning_rate": 9.379972872461865e-05, + "loss": 2.4966, + "step": 10444 + }, + { + "epoch": 0.8429505286094746, + "grad_norm": 0.6921093463897705, + "learning_rate": 9.378397227848395e-05, + "loss": 2.4895, + "step": 10445 + }, + { + "epoch": 0.8430312323460576, + "grad_norm": 0.7697325944900513, + "learning_rate": 9.376821598726892e-05, + "loss": 2.5779, + "step": 10446 + }, + { + "epoch": 0.8431119360826407, + "grad_norm": 0.6441029906272888, + "learning_rate": 9.375245985136626e-05, + "loss": 2.4909, + "step": 10447 + }, + { + "epoch": 0.8431926398192237, + "grad_norm": 0.6962057948112488, + "learning_rate": 9.373670387116861e-05, + "loss": 2.5602, + "step": 10448 + }, + { + "epoch": 0.8432733435558066, + "grad_norm": 0.7030641436576843, + "learning_rate": 9.372094804706867e-05, + "loss": 2.5641, + "step": 10449 + }, + { + "epoch": 0.8433540472923896, + "grad_norm": 0.6969063878059387, + "learning_rate": 9.370519237945912e-05, + "loss": 2.5555, + "step": 10450 + }, + { + "epoch": 0.8434347510289727, + "grad_norm": 0.7169879674911499, + "learning_rate": 9.368943686873267e-05, + "loss": 2.5258, + "step": 10451 + }, + { + "epoch": 0.8435154547655557, + "grad_norm": 0.7198735475540161, + "learning_rate": 9.36736815152819e-05, + "loss": 2.5192, + "step": 10452 + }, + { + "epoch": 0.8435961585021386, + "grad_norm": 0.6613535284996033, + "learning_rate": 9.365792631949951e-05, + "loss": 2.5596, + "step": 10453 + }, + { + "epoch": 0.8436768622387216, + "grad_norm": 0.6377065777778625, + "learning_rate": 9.364217128177824e-05, + "loss": 2.5518, + "step": 10454 + }, + { + "epoch": 0.8437575659753046, + "grad_norm": 0.6670635938644409, + "learning_rate": 9.362641640251063e-05, + "loss": 2.4793, + "step": 10455 + }, + { + "epoch": 0.8438382697118877, + "grad_norm": 0.6556122899055481, + "learning_rate": 9.361066168208939e-05, + "loss": 2.5492, + "step": 10456 + }, + { + "epoch": 0.8439189734484707, + "grad_norm": 0.7262280583381653, + "learning_rate": 9.35949071209072e-05, + "loss": 2.6059, + "step": 10457 + }, + { + "epoch": 0.8439996771850536, + "grad_norm": 0.702953040599823, + "learning_rate": 9.357915271935662e-05, + "loss": 2.5445, + "step": 10458 + }, + { + "epoch": 0.8440803809216366, + "grad_norm": 0.6619930267333984, + "learning_rate": 9.356339847783036e-05, + "loss": 2.5688, + "step": 10459 + }, + { + "epoch": 0.8441610846582197, + "grad_norm": 0.7038032412528992, + "learning_rate": 9.354764439672106e-05, + "loss": 2.5195, + "step": 10460 + }, + { + "epoch": 0.8442417883948027, + "grad_norm": 0.6615132689476013, + "learning_rate": 9.353189047642129e-05, + "loss": 2.5176, + "step": 10461 + }, + { + "epoch": 0.8443224921313857, + "grad_norm": 0.6524826288223267, + "learning_rate": 9.351613671732372e-05, + "loss": 2.4294, + "step": 10462 + }, + { + "epoch": 0.8444031958679686, + "grad_norm": 0.6526279449462891, + "learning_rate": 9.350038311982099e-05, + "loss": 2.595, + "step": 10463 + }, + { + "epoch": 0.8444838996045517, + "grad_norm": 0.6610859632492065, + "learning_rate": 9.348462968430569e-05, + "loss": 2.5311, + "step": 10464 + }, + { + "epoch": 0.8445646033411347, + "grad_norm": 0.6835470795631409, + "learning_rate": 9.346887641117045e-05, + "loss": 2.5694, + "step": 10465 + }, + { + "epoch": 0.8446453070777177, + "grad_norm": 0.6768551468849182, + "learning_rate": 9.345312330080787e-05, + "loss": 2.6082, + "step": 10466 + }, + { + "epoch": 0.8447260108143007, + "grad_norm": 0.6368672847747803, + "learning_rate": 9.343737035361059e-05, + "loss": 2.5221, + "step": 10467 + }, + { + "epoch": 0.8448067145508837, + "grad_norm": 0.6952844858169556, + "learning_rate": 9.34216175699712e-05, + "loss": 2.5003, + "step": 10468 + }, + { + "epoch": 0.8448874182874667, + "grad_norm": 0.6663931012153625, + "learning_rate": 9.340586495028227e-05, + "loss": 2.5469, + "step": 10469 + }, + { + "epoch": 0.8449681220240497, + "grad_norm": 0.6840688586235046, + "learning_rate": 9.339011249493647e-05, + "loss": 2.5499, + "step": 10470 + }, + { + "epoch": 0.8450488257606327, + "grad_norm": 0.6832869052886963, + "learning_rate": 9.337436020432632e-05, + "loss": 2.5492, + "step": 10471 + }, + { + "epoch": 0.8451295294972158, + "grad_norm": 0.7444044947624207, + "learning_rate": 9.335860807884442e-05, + "loss": 2.5791, + "step": 10472 + }, + { + "epoch": 0.8452102332337987, + "grad_norm": 0.6821839809417725, + "learning_rate": 9.334285611888339e-05, + "loss": 2.4772, + "step": 10473 + }, + { + "epoch": 0.8452909369703817, + "grad_norm": 0.6209141612052917, + "learning_rate": 9.332710432483577e-05, + "loss": 2.5656, + "step": 10474 + }, + { + "epoch": 0.8453716407069647, + "grad_norm": 0.6531212329864502, + "learning_rate": 9.331135269709415e-05, + "loss": 2.5285, + "step": 10475 + }, + { + "epoch": 0.8454523444435478, + "grad_norm": 0.6418079137802124, + "learning_rate": 9.329560123605115e-05, + "loss": 2.5503, + "step": 10476 + }, + { + "epoch": 0.8455330481801308, + "grad_norm": 0.6636360287666321, + "learning_rate": 9.327984994209924e-05, + "loss": 2.528, + "step": 10477 + }, + { + "epoch": 0.8456137519167137, + "grad_norm": 0.6196488738059998, + "learning_rate": 9.326409881563102e-05, + "loss": 2.4907, + "step": 10478 + }, + { + "epoch": 0.8456944556532967, + "grad_norm": 0.6339137554168701, + "learning_rate": 9.324834785703913e-05, + "loss": 2.4672, + "step": 10479 + }, + { + "epoch": 0.8457751593898798, + "grad_norm": 0.6803932189941406, + "learning_rate": 9.323259706671602e-05, + "loss": 2.5538, + "step": 10480 + }, + { + "epoch": 0.8458558631264628, + "grad_norm": 0.6815275549888611, + "learning_rate": 9.321684644505429e-05, + "loss": 2.5291, + "step": 10481 + }, + { + "epoch": 0.8459365668630457, + "grad_norm": 0.6497374773025513, + "learning_rate": 9.320109599244646e-05, + "loss": 2.5499, + "step": 10482 + }, + { + "epoch": 0.8460172705996287, + "grad_norm": 0.7966926097869873, + "learning_rate": 9.318534570928512e-05, + "loss": 2.523, + "step": 10483 + }, + { + "epoch": 0.8460979743362118, + "grad_norm": 0.6532156467437744, + "learning_rate": 9.316959559596276e-05, + "loss": 2.5138, + "step": 10484 + }, + { + "epoch": 0.8461786780727948, + "grad_norm": 0.7292522192001343, + "learning_rate": 9.315384565287193e-05, + "loss": 2.5413, + "step": 10485 + }, + { + "epoch": 0.8462593818093778, + "grad_norm": 0.7610795497894287, + "learning_rate": 9.313809588040519e-05, + "loss": 2.5071, + "step": 10486 + }, + { + "epoch": 0.8463400855459607, + "grad_norm": 0.7038258910179138, + "learning_rate": 9.312234627895502e-05, + "loss": 2.5568, + "step": 10487 + }, + { + "epoch": 0.8464207892825438, + "grad_norm": 0.7136046290397644, + "learning_rate": 9.310659684891395e-05, + "loss": 2.5372, + "step": 10488 + }, + { + "epoch": 0.8465014930191268, + "grad_norm": 0.7512896060943604, + "learning_rate": 9.309084759067452e-05, + "loss": 2.5821, + "step": 10489 + }, + { + "epoch": 0.8465821967557098, + "grad_norm": 0.7436400651931763, + "learning_rate": 9.307509850462922e-05, + "loss": 2.5489, + "step": 10490 + }, + { + "epoch": 0.8466629004922928, + "grad_norm": 0.6858603954315186, + "learning_rate": 9.305934959117056e-05, + "loss": 2.5622, + "step": 10491 + }, + { + "epoch": 0.8467436042288758, + "grad_norm": 0.707185685634613, + "learning_rate": 9.304360085069107e-05, + "loss": 2.5275, + "step": 10492 + }, + { + "epoch": 0.8468243079654588, + "grad_norm": 0.7207933068275452, + "learning_rate": 9.302785228358322e-05, + "loss": 2.5877, + "step": 10493 + }, + { + "epoch": 0.8469050117020418, + "grad_norm": 0.6470080614089966, + "learning_rate": 9.30121038902395e-05, + "loss": 2.5117, + "step": 10494 + }, + { + "epoch": 0.8469857154386248, + "grad_norm": 0.75248783826828, + "learning_rate": 9.299635567105247e-05, + "loss": 2.5259, + "step": 10495 + }, + { + "epoch": 0.8470664191752079, + "grad_norm": 0.7150708436965942, + "learning_rate": 9.298060762641452e-05, + "loss": 2.551, + "step": 10496 + }, + { + "epoch": 0.8471471229117908, + "grad_norm": 0.6865069270133972, + "learning_rate": 9.296485975671818e-05, + "loss": 2.5184, + "step": 10497 + }, + { + "epoch": 0.8472278266483738, + "grad_norm": 0.7188237309455872, + "learning_rate": 9.294911206235593e-05, + "loss": 2.5207, + "step": 10498 + }, + { + "epoch": 0.8473085303849568, + "grad_norm": 0.6907880902290344, + "learning_rate": 9.293336454372026e-05, + "loss": 2.5544, + "step": 10499 + }, + { + "epoch": 0.8473892341215399, + "grad_norm": 0.7626079320907593, + "learning_rate": 9.291761720120358e-05, + "loss": 2.5741, + "step": 10500 + }, + { + "epoch": 0.8474699378581229, + "grad_norm": 0.6731963753700256, + "learning_rate": 9.29018700351984e-05, + "loss": 2.5433, + "step": 10501 + }, + { + "epoch": 0.8475506415947058, + "grad_norm": 0.7256288528442383, + "learning_rate": 9.288612304609723e-05, + "loss": 2.5131, + "step": 10502 + }, + { + "epoch": 0.8476313453312888, + "grad_norm": 0.7129119634628296, + "learning_rate": 9.287037623429242e-05, + "loss": 2.5054, + "step": 10503 + }, + { + "epoch": 0.8477120490678719, + "grad_norm": 0.6711156964302063, + "learning_rate": 9.285462960017644e-05, + "loss": 2.5671, + "step": 10504 + }, + { + "epoch": 0.8477927528044549, + "grad_norm": 0.7268081903457642, + "learning_rate": 9.283888314414184e-05, + "loss": 2.5627, + "step": 10505 + }, + { + "epoch": 0.8478734565410379, + "grad_norm": 0.8635050058364868, + "learning_rate": 9.282313686658094e-05, + "loss": 2.517, + "step": 10506 + }, + { + "epoch": 0.8479541602776208, + "grad_norm": 0.7077138423919678, + "learning_rate": 9.280739076788624e-05, + "loss": 2.5551, + "step": 10507 + }, + { + "epoch": 0.8480348640142038, + "grad_norm": 0.6312204599380493, + "learning_rate": 9.279164484845018e-05, + "loss": 2.5329, + "step": 10508 + }, + { + "epoch": 0.8481155677507869, + "grad_norm": 0.6749829649925232, + "learning_rate": 9.277589910866516e-05, + "loss": 2.5092, + "step": 10509 + }, + { + "epoch": 0.8481962714873699, + "grad_norm": 0.753391683101654, + "learning_rate": 9.27601535489236e-05, + "loss": 2.6244, + "step": 10510 + }, + { + "epoch": 0.8482769752239528, + "grad_norm": 0.7230119109153748, + "learning_rate": 9.2744408169618e-05, + "loss": 2.5021, + "step": 10511 + }, + { + "epoch": 0.8483576789605358, + "grad_norm": 0.6759157776832581, + "learning_rate": 9.272866297114067e-05, + "loss": 2.5399, + "step": 10512 + }, + { + "epoch": 0.8484383826971189, + "grad_norm": 0.7049473524093628, + "learning_rate": 9.271291795388406e-05, + "loss": 2.5024, + "step": 10513 + }, + { + "epoch": 0.8485190864337019, + "grad_norm": 0.6579850912094116, + "learning_rate": 9.269717311824058e-05, + "loss": 2.5019, + "step": 10514 + }, + { + "epoch": 0.8485997901702849, + "grad_norm": 0.7091391086578369, + "learning_rate": 9.268142846460265e-05, + "loss": 2.5785, + "step": 10515 + }, + { + "epoch": 0.8486804939068678, + "grad_norm": 0.6612898707389832, + "learning_rate": 9.266568399336266e-05, + "loss": 2.5046, + "step": 10516 + }, + { + "epoch": 0.8487611976434509, + "grad_norm": 0.6348623633384705, + "learning_rate": 9.264993970491298e-05, + "loss": 2.543, + "step": 10517 + }, + { + "epoch": 0.8488419013800339, + "grad_norm": 0.688360869884491, + "learning_rate": 9.263419559964604e-05, + "loss": 2.5294, + "step": 10518 + }, + { + "epoch": 0.8489226051166169, + "grad_norm": 0.6483190059661865, + "learning_rate": 9.261845167795418e-05, + "loss": 2.5623, + "step": 10519 + }, + { + "epoch": 0.8490033088531999, + "grad_norm": 0.689379096031189, + "learning_rate": 9.26027079402298e-05, + "loss": 2.4871, + "step": 10520 + }, + { + "epoch": 0.8490840125897829, + "grad_norm": 0.6627655625343323, + "learning_rate": 9.25869643868653e-05, + "loss": 2.5353, + "step": 10521 + }, + { + "epoch": 0.8491647163263659, + "grad_norm": 0.6701192259788513, + "learning_rate": 9.2571221018253e-05, + "loss": 2.5003, + "step": 10522 + }, + { + "epoch": 0.8492454200629489, + "grad_norm": 0.7413944005966187, + "learning_rate": 9.255547783478529e-05, + "loss": 2.5473, + "step": 10523 + }, + { + "epoch": 0.8493261237995319, + "grad_norm": 0.6490365266799927, + "learning_rate": 9.253973483685455e-05, + "loss": 2.5168, + "step": 10524 + }, + { + "epoch": 0.849406827536115, + "grad_norm": 0.7303688526153564, + "learning_rate": 9.25239920248531e-05, + "loss": 2.5953, + "step": 10525 + }, + { + "epoch": 0.8494875312726979, + "grad_norm": 0.7132991552352905, + "learning_rate": 9.250824939917331e-05, + "loss": 2.475, + "step": 10526 + }, + { + "epoch": 0.8495682350092809, + "grad_norm": 0.6935676336288452, + "learning_rate": 9.249250696020753e-05, + "loss": 2.5212, + "step": 10527 + }, + { + "epoch": 0.8496489387458639, + "grad_norm": 0.732961118221283, + "learning_rate": 9.247676470834814e-05, + "loss": 2.5848, + "step": 10528 + }, + { + "epoch": 0.849729642482447, + "grad_norm": 0.6899160146713257, + "learning_rate": 9.246102264398739e-05, + "loss": 2.4551, + "step": 10529 + }, + { + "epoch": 0.84981034621903, + "grad_norm": 0.6941123604774475, + "learning_rate": 9.244528076751766e-05, + "loss": 2.5441, + "step": 10530 + }, + { + "epoch": 0.8498910499556129, + "grad_norm": 0.7351016998291016, + "learning_rate": 9.242953907933134e-05, + "loss": 2.6519, + "step": 10531 + }, + { + "epoch": 0.8499717536921959, + "grad_norm": 0.7156691551208496, + "learning_rate": 9.241379757982065e-05, + "loss": 2.573, + "step": 10532 + }, + { + "epoch": 0.850052457428779, + "grad_norm": 0.7137688994407654, + "learning_rate": 9.239805626937797e-05, + "loss": 2.5688, + "step": 10533 + }, + { + "epoch": 0.850133161165362, + "grad_norm": 0.7018687129020691, + "learning_rate": 9.238231514839559e-05, + "loss": 2.5725, + "step": 10534 + }, + { + "epoch": 0.850213864901945, + "grad_norm": 0.6723659634590149, + "learning_rate": 9.236657421726583e-05, + "loss": 2.5661, + "step": 10535 + }, + { + "epoch": 0.8502945686385279, + "grad_norm": 0.7105850577354431, + "learning_rate": 9.235083347638098e-05, + "loss": 2.5676, + "step": 10536 + }, + { + "epoch": 0.850375272375111, + "grad_norm": 0.682601809501648, + "learning_rate": 9.233509292613341e-05, + "loss": 2.5489, + "step": 10537 + }, + { + "epoch": 0.850455976111694, + "grad_norm": 0.6703988313674927, + "learning_rate": 9.231935256691531e-05, + "loss": 2.5349, + "step": 10538 + }, + { + "epoch": 0.850536679848277, + "grad_norm": 0.6430882215499878, + "learning_rate": 9.230361239911903e-05, + "loss": 2.4959, + "step": 10539 + }, + { + "epoch": 0.8506173835848599, + "grad_norm": 0.7164519429206848, + "learning_rate": 9.228787242313687e-05, + "loss": 2.4999, + "step": 10540 + }, + { + "epoch": 0.850698087321443, + "grad_norm": 0.7463028430938721, + "learning_rate": 9.227213263936107e-05, + "loss": 2.545, + "step": 10541 + }, + { + "epoch": 0.850778791058026, + "grad_norm": 0.650577187538147, + "learning_rate": 9.22563930481839e-05, + "loss": 2.5707, + "step": 10542 + }, + { + "epoch": 0.850859494794609, + "grad_norm": 0.6808211207389832, + "learning_rate": 9.224065364999768e-05, + "loss": 2.5236, + "step": 10543 + }, + { + "epoch": 0.850940198531192, + "grad_norm": 0.6947758793830872, + "learning_rate": 9.222491444519467e-05, + "loss": 2.555, + "step": 10544 + }, + { + "epoch": 0.851020902267775, + "grad_norm": 0.6805624961853027, + "learning_rate": 9.22091754341671e-05, + "loss": 2.517, + "step": 10545 + }, + { + "epoch": 0.851101606004358, + "grad_norm": 0.6645655035972595, + "learning_rate": 9.219343661730724e-05, + "loss": 2.5237, + "step": 10546 + }, + { + "epoch": 0.851182309740941, + "grad_norm": 0.6912586092948914, + "learning_rate": 9.217769799500738e-05, + "loss": 2.5345, + "step": 10547 + }, + { + "epoch": 0.851263013477524, + "grad_norm": 0.6713781356811523, + "learning_rate": 9.21619595676597e-05, + "loss": 2.56, + "step": 10548 + }, + { + "epoch": 0.8513437172141071, + "grad_norm": 0.7031502723693848, + "learning_rate": 9.214622133565648e-05, + "loss": 2.4885, + "step": 10549 + }, + { + "epoch": 0.85142442095069, + "grad_norm": 0.6616455316543579, + "learning_rate": 9.213048329938997e-05, + "loss": 2.5101, + "step": 10550 + }, + { + "epoch": 0.851505124687273, + "grad_norm": 0.711077094078064, + "learning_rate": 9.211474545925236e-05, + "loss": 2.6264, + "step": 10551 + }, + { + "epoch": 0.851585828423856, + "grad_norm": 0.7534502744674683, + "learning_rate": 9.209900781563592e-05, + "loss": 2.5417, + "step": 10552 + }, + { + "epoch": 0.8516665321604391, + "grad_norm": 0.7405222058296204, + "learning_rate": 9.208327036893288e-05, + "loss": 2.546, + "step": 10553 + }, + { + "epoch": 0.8517472358970221, + "grad_norm": 0.7014057040214539, + "learning_rate": 9.20675331195354e-05, + "loss": 2.5211, + "step": 10554 + }, + { + "epoch": 0.851827939633605, + "grad_norm": 0.6984074115753174, + "learning_rate": 9.205179606783573e-05, + "loss": 2.5181, + "step": 10555 + }, + { + "epoch": 0.851908643370188, + "grad_norm": 0.7312670350074768, + "learning_rate": 9.203605921422613e-05, + "loss": 2.5345, + "step": 10556 + }, + { + "epoch": 0.851989347106771, + "grad_norm": 0.6861104369163513, + "learning_rate": 9.202032255909871e-05, + "loss": 2.5426, + "step": 10557 + }, + { + "epoch": 0.8520700508433541, + "grad_norm": 0.6989030838012695, + "learning_rate": 9.200458610284571e-05, + "loss": 2.5221, + "step": 10558 + }, + { + "epoch": 0.852150754579937, + "grad_norm": 0.6645115613937378, + "learning_rate": 9.198884984585932e-05, + "loss": 2.4755, + "step": 10559 + }, + { + "epoch": 0.85223145831652, + "grad_norm": 0.6577785015106201, + "learning_rate": 9.197311378853176e-05, + "loss": 2.5491, + "step": 10560 + }, + { + "epoch": 0.852312162053103, + "grad_norm": 0.7311568856239319, + "learning_rate": 9.195737793125517e-05, + "loss": 2.5653, + "step": 10561 + }, + { + "epoch": 0.8523928657896861, + "grad_norm": 0.6469970345497131, + "learning_rate": 9.194164227442174e-05, + "loss": 2.5384, + "step": 10562 + }, + { + "epoch": 0.8524735695262691, + "grad_norm": 0.6562933325767517, + "learning_rate": 9.19259068184237e-05, + "loss": 2.5644, + "step": 10563 + }, + { + "epoch": 0.852554273262852, + "grad_norm": 0.7740273475646973, + "learning_rate": 9.19101715636531e-05, + "loss": 2.5868, + "step": 10564 + }, + { + "epoch": 0.852634976999435, + "grad_norm": 0.6461195349693298, + "learning_rate": 9.18944365105022e-05, + "loss": 2.4862, + "step": 10565 + }, + { + "epoch": 0.8527156807360181, + "grad_norm": 0.7230537533760071, + "learning_rate": 9.187870165936313e-05, + "loss": 2.5125, + "step": 10566 + }, + { + "epoch": 0.8527963844726011, + "grad_norm": 0.6858233213424683, + "learning_rate": 9.186296701062805e-05, + "loss": 2.5463, + "step": 10567 + }, + { + "epoch": 0.8528770882091841, + "grad_norm": 0.717407763004303, + "learning_rate": 9.184723256468908e-05, + "loss": 2.5399, + "step": 10568 + }, + { + "epoch": 0.852957791945767, + "grad_norm": 0.7537745237350464, + "learning_rate": 9.18314983219384e-05, + "loss": 2.5164, + "step": 10569 + }, + { + "epoch": 0.8530384956823501, + "grad_norm": 0.7068665027618408, + "learning_rate": 9.181576428276814e-05, + "loss": 2.5747, + "step": 10570 + }, + { + "epoch": 0.8531191994189331, + "grad_norm": 0.8013456463813782, + "learning_rate": 9.18000304475704e-05, + "loss": 2.5401, + "step": 10571 + }, + { + "epoch": 0.8531999031555161, + "grad_norm": 0.6458969712257385, + "learning_rate": 9.178429681673741e-05, + "loss": 2.4781, + "step": 10572 + }, + { + "epoch": 0.8532806068920991, + "grad_norm": 0.7235112190246582, + "learning_rate": 9.176856339066114e-05, + "loss": 2.5753, + "step": 10573 + }, + { + "epoch": 0.8533613106286821, + "grad_norm": 0.6815706491470337, + "learning_rate": 9.175283016973382e-05, + "loss": 2.5526, + "step": 10574 + }, + { + "epoch": 0.8534420143652651, + "grad_norm": 0.739747166633606, + "learning_rate": 9.173709715434751e-05, + "loss": 2.5631, + "step": 10575 + }, + { + "epoch": 0.8535227181018481, + "grad_norm": 0.7325060963630676, + "learning_rate": 9.172136434489437e-05, + "loss": 2.4925, + "step": 10576 + }, + { + "epoch": 0.8536034218384311, + "grad_norm": 0.6505454182624817, + "learning_rate": 9.170563174176645e-05, + "loss": 2.5423, + "step": 10577 + }, + { + "epoch": 0.8536841255750142, + "grad_norm": 0.7267098426818848, + "learning_rate": 9.168989934535586e-05, + "loss": 2.5687, + "step": 10578 + }, + { + "epoch": 0.8537648293115971, + "grad_norm": 0.7264497876167297, + "learning_rate": 9.167416715605476e-05, + "loss": 2.5165, + "step": 10579 + }, + { + "epoch": 0.8538455330481801, + "grad_norm": 0.7473852634429932, + "learning_rate": 9.165843517425509e-05, + "loss": 2.5837, + "step": 10580 + }, + { + "epoch": 0.8539262367847631, + "grad_norm": 0.7249133586883545, + "learning_rate": 9.164270340034906e-05, + "loss": 2.5805, + "step": 10581 + }, + { + "epoch": 0.8540069405213462, + "grad_norm": 0.7463760375976562, + "learning_rate": 9.162697183472875e-05, + "loss": 2.5067, + "step": 10582 + }, + { + "epoch": 0.8540876442579292, + "grad_norm": 0.7125511169433594, + "learning_rate": 9.161124047778614e-05, + "loss": 2.5093, + "step": 10583 + }, + { + "epoch": 0.8541683479945121, + "grad_norm": 0.7247455716133118, + "learning_rate": 9.159550932991335e-05, + "loss": 2.5356, + "step": 10584 + }, + { + "epoch": 0.8542490517310951, + "grad_norm": 0.7593860030174255, + "learning_rate": 9.157977839150246e-05, + "loss": 2.5477, + "step": 10585 + }, + { + "epoch": 0.8543297554676782, + "grad_norm": 0.6758295297622681, + "learning_rate": 9.156404766294547e-05, + "loss": 2.4748, + "step": 10586 + }, + { + "epoch": 0.8544104592042612, + "grad_norm": 0.7114073634147644, + "learning_rate": 9.154831714463447e-05, + "loss": 2.5479, + "step": 10587 + }, + { + "epoch": 0.8544911629408442, + "grad_norm": 0.6881263256072998, + "learning_rate": 9.153258683696156e-05, + "loss": 2.5471, + "step": 10588 + }, + { + "epoch": 0.8545718666774271, + "grad_norm": 0.6509317755699158, + "learning_rate": 9.151685674031866e-05, + "loss": 2.5239, + "step": 10589 + }, + { + "epoch": 0.8546525704140102, + "grad_norm": 0.7754644751548767, + "learning_rate": 9.150112685509787e-05, + "loss": 2.5572, + "step": 10590 + }, + { + "epoch": 0.8547332741505932, + "grad_norm": 0.707080602645874, + "learning_rate": 9.148539718169118e-05, + "loss": 2.5572, + "step": 10591 + }, + { + "epoch": 0.8548139778871762, + "grad_norm": 0.6996685266494751, + "learning_rate": 9.146966772049073e-05, + "loss": 2.4968, + "step": 10592 + }, + { + "epoch": 0.8548946816237591, + "grad_norm": 0.6830589771270752, + "learning_rate": 9.145393847188841e-05, + "loss": 2.5795, + "step": 10593 + }, + { + "epoch": 0.8549753853603422, + "grad_norm": 0.7507784366607666, + "learning_rate": 9.143820943627628e-05, + "loss": 2.6135, + "step": 10594 + }, + { + "epoch": 0.8550560890969252, + "grad_norm": 0.673218309879303, + "learning_rate": 9.142248061404638e-05, + "loss": 2.5875, + "step": 10595 + }, + { + "epoch": 0.8551367928335082, + "grad_norm": 0.6861804723739624, + "learning_rate": 9.140675200559065e-05, + "loss": 2.5892, + "step": 10596 + }, + { + "epoch": 0.8552174965700912, + "grad_norm": 0.6928709149360657, + "learning_rate": 9.139102361130114e-05, + "loss": 2.5303, + "step": 10597 + }, + { + "epoch": 0.8552982003066743, + "grad_norm": 0.6958343386650085, + "learning_rate": 9.137529543156986e-05, + "loss": 2.5567, + "step": 10598 + }, + { + "epoch": 0.8553789040432572, + "grad_norm": 0.703845739364624, + "learning_rate": 9.135956746678873e-05, + "loss": 2.5215, + "step": 10599 + }, + { + "epoch": 0.8554596077798402, + "grad_norm": 0.7108649015426636, + "learning_rate": 9.134383971734975e-05, + "loss": 2.5687, + "step": 10600 + }, + { + "epoch": 0.8555403115164232, + "grad_norm": 0.7249850034713745, + "learning_rate": 9.132811218364495e-05, + "loss": 2.565, + "step": 10601 + }, + { + "epoch": 0.8556210152530063, + "grad_norm": 0.7060014009475708, + "learning_rate": 9.131238486606623e-05, + "loss": 2.5366, + "step": 10602 + }, + { + "epoch": 0.8557017189895892, + "grad_norm": 0.6915088891983032, + "learning_rate": 9.129665776500559e-05, + "loss": 2.527, + "step": 10603 + }, + { + "epoch": 0.8557824227261722, + "grad_norm": 0.7226938605308533, + "learning_rate": 9.128093088085503e-05, + "loss": 2.5999, + "step": 10604 + }, + { + "epoch": 0.8558631264627552, + "grad_norm": 0.6802428364753723, + "learning_rate": 9.126520421400641e-05, + "loss": 2.4788, + "step": 10605 + }, + { + "epoch": 0.8559438301993383, + "grad_norm": 0.7855350375175476, + "learning_rate": 9.124947776485175e-05, + "loss": 2.5349, + "step": 10606 + }, + { + "epoch": 0.8560245339359213, + "grad_norm": 0.6758337020874023, + "learning_rate": 9.123375153378296e-05, + "loss": 2.5874, + "step": 10607 + }, + { + "epoch": 0.8561052376725042, + "grad_norm": 0.675061821937561, + "learning_rate": 9.121802552119206e-05, + "loss": 2.5343, + "step": 10608 + }, + { + "epoch": 0.8561859414090872, + "grad_norm": 0.7044726014137268, + "learning_rate": 9.120229972747087e-05, + "loss": 2.5361, + "step": 10609 + }, + { + "epoch": 0.8562666451456702, + "grad_norm": 0.6324402689933777, + "learning_rate": 9.118657415301137e-05, + "loss": 2.5039, + "step": 10610 + }, + { + "epoch": 0.8563473488822533, + "grad_norm": 0.6621509790420532, + "learning_rate": 9.11708487982055e-05, + "loss": 2.5346, + "step": 10611 + }, + { + "epoch": 0.8564280526188363, + "grad_norm": 0.6709887981414795, + "learning_rate": 9.115512366344516e-05, + "loss": 2.5409, + "step": 10612 + }, + { + "epoch": 0.8565087563554192, + "grad_norm": 0.7237712740898132, + "learning_rate": 9.113939874912223e-05, + "loss": 2.5051, + "step": 10613 + }, + { + "epoch": 0.8565894600920022, + "grad_norm": 0.6646109223365784, + "learning_rate": 9.11236740556287e-05, + "loss": 2.5866, + "step": 10614 + }, + { + "epoch": 0.8566701638285853, + "grad_norm": 0.7131930589675903, + "learning_rate": 9.110794958335637e-05, + "loss": 2.5472, + "step": 10615 + }, + { + "epoch": 0.8567508675651683, + "grad_norm": 0.6662428975105286, + "learning_rate": 9.109222533269715e-05, + "loss": 2.4863, + "step": 10616 + }, + { + "epoch": 0.8568315713017512, + "grad_norm": 0.6527226567268372, + "learning_rate": 9.107650130404304e-05, + "loss": 2.5594, + "step": 10617 + }, + { + "epoch": 0.8569122750383342, + "grad_norm": 0.6639060378074646, + "learning_rate": 9.106077749778578e-05, + "loss": 2.5519, + "step": 10618 + }, + { + "epoch": 0.8569929787749173, + "grad_norm": 0.7088096737861633, + "learning_rate": 9.104505391431734e-05, + "loss": 2.5404, + "step": 10619 + }, + { + "epoch": 0.8570736825115003, + "grad_norm": 0.7155873775482178, + "learning_rate": 9.102933055402957e-05, + "loss": 2.5636, + "step": 10620 + }, + { + "epoch": 0.8571543862480833, + "grad_norm": 0.6522316932678223, + "learning_rate": 9.101360741731431e-05, + "loss": 2.5216, + "step": 10621 + }, + { + "epoch": 0.8572350899846662, + "grad_norm": 0.6515649557113647, + "learning_rate": 9.099788450456345e-05, + "loss": 2.5804, + "step": 10622 + }, + { + "epoch": 0.8573157937212493, + "grad_norm": 0.6791853904724121, + "learning_rate": 9.098216181616883e-05, + "loss": 2.5353, + "step": 10623 + }, + { + "epoch": 0.8573964974578323, + "grad_norm": 0.6946877241134644, + "learning_rate": 9.096643935252236e-05, + "loss": 2.5492, + "step": 10624 + }, + { + "epoch": 0.8574772011944153, + "grad_norm": 0.7235898375511169, + "learning_rate": 9.095071711401581e-05, + "loss": 2.5178, + "step": 10625 + }, + { + "epoch": 0.8575579049309983, + "grad_norm": 0.6740610003471375, + "learning_rate": 9.093499510104102e-05, + "loss": 2.5699, + "step": 10626 + }, + { + "epoch": 0.8576386086675813, + "grad_norm": 0.7441792488098145, + "learning_rate": 9.091927331398988e-05, + "loss": 2.579, + "step": 10627 + }, + { + "epoch": 0.8577193124041643, + "grad_norm": 0.6986937522888184, + "learning_rate": 9.090355175325416e-05, + "loss": 2.5556, + "step": 10628 + }, + { + "epoch": 0.8578000161407473, + "grad_norm": 0.6960151791572571, + "learning_rate": 9.08878304192257e-05, + "loss": 2.5448, + "step": 10629 + }, + { + "epoch": 0.8578807198773303, + "grad_norm": 0.6376819014549255, + "learning_rate": 9.087210931229636e-05, + "loss": 2.4636, + "step": 10630 + }, + { + "epoch": 0.8579614236139134, + "grad_norm": 0.752473771572113, + "learning_rate": 9.08563884328579e-05, + "loss": 2.5451, + "step": 10631 + }, + { + "epoch": 0.8580421273504963, + "grad_norm": 0.6879361867904663, + "learning_rate": 9.084066778130213e-05, + "loss": 2.5365, + "step": 10632 + }, + { + "epoch": 0.8581228310870793, + "grad_norm": 0.6630483865737915, + "learning_rate": 9.082494735802091e-05, + "loss": 2.5085, + "step": 10633 + }, + { + "epoch": 0.8582035348236623, + "grad_norm": 0.689602792263031, + "learning_rate": 9.080922716340594e-05, + "loss": 2.5087, + "step": 10634 + }, + { + "epoch": 0.8582842385602454, + "grad_norm": 0.7333599925041199, + "learning_rate": 9.079350719784905e-05, + "loss": 2.5476, + "step": 10635 + }, + { + "epoch": 0.8583649422968284, + "grad_norm": 0.6895802021026611, + "learning_rate": 9.077778746174204e-05, + "loss": 2.5099, + "step": 10636 + }, + { + "epoch": 0.8584456460334113, + "grad_norm": 0.7202162146568298, + "learning_rate": 9.076206795547668e-05, + "loss": 2.5197, + "step": 10637 + }, + { + "epoch": 0.8585263497699943, + "grad_norm": 0.6454200148582458, + "learning_rate": 9.074634867944472e-05, + "loss": 2.5303, + "step": 10638 + }, + { + "epoch": 0.8586070535065774, + "grad_norm": 0.6842506527900696, + "learning_rate": 9.073062963403795e-05, + "loss": 2.5051, + "step": 10639 + }, + { + "epoch": 0.8586877572431604, + "grad_norm": 0.6979129314422607, + "learning_rate": 9.071491081964815e-05, + "loss": 2.5209, + "step": 10640 + }, + { + "epoch": 0.8587684609797434, + "grad_norm": 0.6851540803909302, + "learning_rate": 9.0699192236667e-05, + "loss": 2.5003, + "step": 10641 + }, + { + "epoch": 0.8588491647163263, + "grad_norm": 0.7528585195541382, + "learning_rate": 9.068347388548627e-05, + "loss": 2.5524, + "step": 10642 + }, + { + "epoch": 0.8589298684529094, + "grad_norm": 0.6297397613525391, + "learning_rate": 9.06677557664978e-05, + "loss": 2.5412, + "step": 10643 + }, + { + "epoch": 0.8590105721894924, + "grad_norm": 0.7034026980400085, + "learning_rate": 9.06520378800932e-05, + "loss": 2.4958, + "step": 10644 + }, + { + "epoch": 0.8590912759260754, + "grad_norm": 0.690258800983429, + "learning_rate": 9.063632022666425e-05, + "loss": 2.4894, + "step": 10645 + }, + { + "epoch": 0.8591719796626583, + "grad_norm": 0.6449949145317078, + "learning_rate": 9.06206028066027e-05, + "loss": 2.507, + "step": 10646 + }, + { + "epoch": 0.8592526833992414, + "grad_norm": 0.6328588724136353, + "learning_rate": 9.060488562030023e-05, + "loss": 2.5503, + "step": 10647 + }, + { + "epoch": 0.8593333871358244, + "grad_norm": 0.6570547819137573, + "learning_rate": 9.058916866814858e-05, + "loss": 2.4993, + "step": 10648 + }, + { + "epoch": 0.8594140908724074, + "grad_norm": 0.7689602375030518, + "learning_rate": 9.057345195053945e-05, + "loss": 2.5498, + "step": 10649 + }, + { + "epoch": 0.8594947946089904, + "grad_norm": 0.6727081537246704, + "learning_rate": 9.055773546786454e-05, + "loss": 2.5172, + "step": 10650 + }, + { + "epoch": 0.8595754983455735, + "grad_norm": 0.694722056388855, + "learning_rate": 9.054201922051552e-05, + "loss": 2.5485, + "step": 10651 + }, + { + "epoch": 0.8596562020821564, + "grad_norm": 0.6638815999031067, + "learning_rate": 9.052630320888411e-05, + "loss": 2.5134, + "step": 10652 + }, + { + "epoch": 0.8597369058187394, + "grad_norm": 0.6600833535194397, + "learning_rate": 9.0510587433362e-05, + "loss": 2.5206, + "step": 10653 + }, + { + "epoch": 0.8598176095553224, + "grad_norm": 0.7193894386291504, + "learning_rate": 9.049487189434084e-05, + "loss": 2.5485, + "step": 10654 + }, + { + "epoch": 0.8598983132919055, + "grad_norm": 0.6651753187179565, + "learning_rate": 9.047915659221233e-05, + "loss": 2.5703, + "step": 10655 + }, + { + "epoch": 0.8599790170284884, + "grad_norm": 0.7346364855766296, + "learning_rate": 9.046344152736815e-05, + "loss": 2.5301, + "step": 10656 + }, + { + "epoch": 0.8600597207650714, + "grad_norm": 0.6681811809539795, + "learning_rate": 9.04477267001999e-05, + "loss": 2.5124, + "step": 10657 + }, + { + "epoch": 0.8601404245016544, + "grad_norm": 0.6928461790084839, + "learning_rate": 9.043201211109929e-05, + "loss": 2.5153, + "step": 10658 + }, + { + "epoch": 0.8602211282382374, + "grad_norm": 0.6957700252532959, + "learning_rate": 9.041629776045797e-05, + "loss": 2.4697, + "step": 10659 + }, + { + "epoch": 0.8603018319748205, + "grad_norm": 0.6361939311027527, + "learning_rate": 9.040058364866752e-05, + "loss": 2.5162, + "step": 10660 + }, + { + "epoch": 0.8603825357114034, + "grad_norm": 0.6827390193939209, + "learning_rate": 9.038486977611964e-05, + "loss": 2.4856, + "step": 10661 + }, + { + "epoch": 0.8604632394479864, + "grad_norm": 0.6638801097869873, + "learning_rate": 9.036915614320595e-05, + "loss": 2.5224, + "step": 10662 + }, + { + "epoch": 0.8605439431845694, + "grad_norm": 0.7249652743339539, + "learning_rate": 9.035344275031802e-05, + "loss": 2.5461, + "step": 10663 + }, + { + "epoch": 0.8606246469211525, + "grad_norm": 0.6693316102027893, + "learning_rate": 9.033772959784754e-05, + "loss": 2.5676, + "step": 10664 + }, + { + "epoch": 0.8607053506577355, + "grad_norm": 0.6787340641021729, + "learning_rate": 9.032201668618614e-05, + "loss": 2.5374, + "step": 10665 + }, + { + "epoch": 0.8607860543943184, + "grad_norm": 0.6581670641899109, + "learning_rate": 9.030630401572533e-05, + "loss": 2.5052, + "step": 10666 + }, + { + "epoch": 0.8608667581309014, + "grad_norm": 0.6975873112678528, + "learning_rate": 9.029059158685675e-05, + "loss": 2.4823, + "step": 10667 + }, + { + "epoch": 0.8609474618674845, + "grad_norm": 0.6632521748542786, + "learning_rate": 9.027487939997201e-05, + "loss": 2.5992, + "step": 10668 + }, + { + "epoch": 0.8610281656040675, + "grad_norm": 0.6793977618217468, + "learning_rate": 9.025916745546276e-05, + "loss": 2.5308, + "step": 10669 + }, + { + "epoch": 0.8611088693406505, + "grad_norm": 0.6499481797218323, + "learning_rate": 9.024345575372046e-05, + "loss": 2.4964, + "step": 10670 + }, + { + "epoch": 0.8611895730772334, + "grad_norm": 0.6858868598937988, + "learning_rate": 9.022774429513677e-05, + "loss": 2.5388, + "step": 10671 + }, + { + "epoch": 0.8612702768138165, + "grad_norm": 0.7586160898208618, + "learning_rate": 9.021203308010324e-05, + "loss": 2.5166, + "step": 10672 + }, + { + "epoch": 0.8613509805503995, + "grad_norm": 0.7179701328277588, + "learning_rate": 9.019632210901141e-05, + "loss": 2.5501, + "step": 10673 + }, + { + "epoch": 0.8614316842869825, + "grad_norm": 0.6830369830131531, + "learning_rate": 9.018061138225287e-05, + "loss": 2.4956, + "step": 10674 + }, + { + "epoch": 0.8615123880235654, + "grad_norm": 0.6710512042045593, + "learning_rate": 9.01649009002192e-05, + "loss": 2.5722, + "step": 10675 + }, + { + "epoch": 0.8615930917601485, + "grad_norm": 0.640011727809906, + "learning_rate": 9.014919066330186e-05, + "loss": 2.5197, + "step": 10676 + }, + { + "epoch": 0.8616737954967315, + "grad_norm": 0.6803860664367676, + "learning_rate": 9.013348067189245e-05, + "loss": 2.4794, + "step": 10677 + }, + { + "epoch": 0.8617544992333145, + "grad_norm": 0.6734865307807922, + "learning_rate": 9.011777092638251e-05, + "loss": 2.5831, + "step": 10678 + }, + { + "epoch": 0.8618352029698975, + "grad_norm": 0.6525718569755554, + "learning_rate": 9.010206142716353e-05, + "loss": 2.4925, + "step": 10679 + }, + { + "epoch": 0.8619159067064806, + "grad_norm": 0.6886672377586365, + "learning_rate": 9.008635217462706e-05, + "loss": 2.491, + "step": 10680 + }, + { + "epoch": 0.8619966104430635, + "grad_norm": 0.6397131085395813, + "learning_rate": 9.007064316916461e-05, + "loss": 2.4684, + "step": 10681 + }, + { + "epoch": 0.8620773141796465, + "grad_norm": 0.6308462023735046, + "learning_rate": 9.005493441116768e-05, + "loss": 2.504, + "step": 10682 + }, + { + "epoch": 0.8621580179162295, + "grad_norm": 0.7223808169364929, + "learning_rate": 9.003922590102778e-05, + "loss": 2.5342, + "step": 10683 + }, + { + "epoch": 0.8622387216528126, + "grad_norm": 0.687515914440155, + "learning_rate": 9.002351763913642e-05, + "loss": 2.4822, + "step": 10684 + }, + { + "epoch": 0.8623194253893955, + "grad_norm": 0.6888468265533447, + "learning_rate": 9.00078096258851e-05, + "loss": 2.5497, + "step": 10685 + }, + { + "epoch": 0.8624001291259785, + "grad_norm": 0.7429301738739014, + "learning_rate": 8.999210186166525e-05, + "loss": 2.624, + "step": 10686 + }, + { + "epoch": 0.8624808328625615, + "grad_norm": 0.6901945471763611, + "learning_rate": 8.997639434686839e-05, + "loss": 2.5268, + "step": 10687 + }, + { + "epoch": 0.8625615365991446, + "grad_norm": 0.7396681308746338, + "learning_rate": 8.9960687081886e-05, + "loss": 2.5427, + "step": 10688 + }, + { + "epoch": 0.8626422403357276, + "grad_norm": 0.6825531125068665, + "learning_rate": 8.99449800671095e-05, + "loss": 2.5722, + "step": 10689 + }, + { + "epoch": 0.8627229440723105, + "grad_norm": 0.6719860434532166, + "learning_rate": 8.992927330293039e-05, + "loss": 2.4939, + "step": 10690 + }, + { + "epoch": 0.8628036478088935, + "grad_norm": 0.644567608833313, + "learning_rate": 8.991356678974017e-05, + "loss": 2.5495, + "step": 10691 + }, + { + "epoch": 0.8628843515454766, + "grad_norm": 0.7066643834114075, + "learning_rate": 8.989786052793015e-05, + "loss": 2.5508, + "step": 10692 + }, + { + "epoch": 0.8629650552820596, + "grad_norm": 0.6697196364402771, + "learning_rate": 8.988215451789187e-05, + "loss": 2.5231, + "step": 10693 + }, + { + "epoch": 0.8630457590186426, + "grad_norm": 0.7143658399581909, + "learning_rate": 8.986644876001681e-05, + "loss": 2.5368, + "step": 10694 + }, + { + "epoch": 0.8631264627552255, + "grad_norm": 0.7597684264183044, + "learning_rate": 8.985074325469628e-05, + "loss": 2.5983, + "step": 10695 + }, + { + "epoch": 0.8632071664918086, + "grad_norm": 0.7418014407157898, + "learning_rate": 8.983503800232176e-05, + "loss": 2.5736, + "step": 10696 + }, + { + "epoch": 0.8632878702283916, + "grad_norm": 0.654435932636261, + "learning_rate": 8.981933300328468e-05, + "loss": 2.5389, + "step": 10697 + }, + { + "epoch": 0.8633685739649746, + "grad_norm": 0.658203661441803, + "learning_rate": 8.980362825797643e-05, + "loss": 2.5204, + "step": 10698 + }, + { + "epoch": 0.8634492777015575, + "grad_norm": 0.7132784724235535, + "learning_rate": 8.97879237667884e-05, + "loss": 2.4982, + "step": 10699 + }, + { + "epoch": 0.8635299814381406, + "grad_norm": 0.6901868581771851, + "learning_rate": 8.9772219530112e-05, + "loss": 2.5599, + "step": 10700 + }, + { + "epoch": 0.8636106851747236, + "grad_norm": 0.6241179704666138, + "learning_rate": 8.975651554833869e-05, + "loss": 2.5185, + "step": 10701 + }, + { + "epoch": 0.8636913889113066, + "grad_norm": 0.693692147731781, + "learning_rate": 8.974081182185974e-05, + "loss": 2.506, + "step": 10702 + }, + { + "epoch": 0.8637720926478896, + "grad_norm": 0.6699246168136597, + "learning_rate": 8.972510835106658e-05, + "loss": 2.557, + "step": 10703 + }, + { + "epoch": 0.8638527963844727, + "grad_norm": 0.7339062094688416, + "learning_rate": 8.970940513635059e-05, + "loss": 2.5614, + "step": 10704 + }, + { + "epoch": 0.8639335001210556, + "grad_norm": 0.7558815479278564, + "learning_rate": 8.969370217810311e-05, + "loss": 2.5949, + "step": 10705 + }, + { + "epoch": 0.8640142038576386, + "grad_norm": 0.6992602348327637, + "learning_rate": 8.96779994767155e-05, + "loss": 2.4755, + "step": 10706 + }, + { + "epoch": 0.8640949075942216, + "grad_norm": 0.6836397647857666, + "learning_rate": 8.966229703257915e-05, + "loss": 2.5172, + "step": 10707 + }, + { + "epoch": 0.8641756113308047, + "grad_norm": 0.7054563760757446, + "learning_rate": 8.964659484608537e-05, + "loss": 2.5186, + "step": 10708 + }, + { + "epoch": 0.8642563150673876, + "grad_norm": 0.7096611261367798, + "learning_rate": 8.963089291762551e-05, + "loss": 2.5157, + "step": 10709 + }, + { + "epoch": 0.8643370188039706, + "grad_norm": 0.657465934753418, + "learning_rate": 8.961519124759094e-05, + "loss": 2.5332, + "step": 10710 + }, + { + "epoch": 0.8644177225405536, + "grad_norm": 0.7490121126174927, + "learning_rate": 8.959948983637291e-05, + "loss": 2.512, + "step": 10711 + }, + { + "epoch": 0.8644984262771366, + "grad_norm": 0.7074166536331177, + "learning_rate": 8.958378868436279e-05, + "loss": 2.4745, + "step": 10712 + }, + { + "epoch": 0.8645791300137197, + "grad_norm": 0.7496227025985718, + "learning_rate": 8.956808779195188e-05, + "loss": 2.5533, + "step": 10713 + }, + { + "epoch": 0.8646598337503026, + "grad_norm": 0.6624657511711121, + "learning_rate": 8.95523871595315e-05, + "loss": 2.5346, + "step": 10714 + }, + { + "epoch": 0.8647405374868856, + "grad_norm": 0.6829125881195068, + "learning_rate": 8.953668678749292e-05, + "loss": 2.558, + "step": 10715 + }, + { + "epoch": 0.8648212412234686, + "grad_norm": 0.6954498887062073, + "learning_rate": 8.952098667622745e-05, + "loss": 2.5617, + "step": 10716 + }, + { + "epoch": 0.8649019449600517, + "grad_norm": 0.6722636818885803, + "learning_rate": 8.950528682612645e-05, + "loss": 2.5565, + "step": 10717 + }, + { + "epoch": 0.8649826486966347, + "grad_norm": 0.6793767213821411, + "learning_rate": 8.948958723758107e-05, + "loss": 2.5803, + "step": 10718 + }, + { + "epoch": 0.8650633524332176, + "grad_norm": 0.7159373760223389, + "learning_rate": 8.947388791098266e-05, + "loss": 2.5465, + "step": 10719 + }, + { + "epoch": 0.8651440561698006, + "grad_norm": 0.6823835372924805, + "learning_rate": 8.945818884672253e-05, + "loss": 2.5079, + "step": 10720 + }, + { + "epoch": 0.8652247599063837, + "grad_norm": 0.7521452903747559, + "learning_rate": 8.944249004519185e-05, + "loss": 2.5628, + "step": 10721 + }, + { + "epoch": 0.8653054636429667, + "grad_norm": 0.6774886846542358, + "learning_rate": 8.94267915067819e-05, + "loss": 2.6042, + "step": 10722 + }, + { + "epoch": 0.8653861673795497, + "grad_norm": 0.6915935277938843, + "learning_rate": 8.941109323188398e-05, + "loss": 2.5563, + "step": 10723 + }, + { + "epoch": 0.8654668711161326, + "grad_norm": 0.6609061360359192, + "learning_rate": 8.939539522088927e-05, + "loss": 2.5083, + "step": 10724 + }, + { + "epoch": 0.8655475748527157, + "grad_norm": 0.6457223892211914, + "learning_rate": 8.937969747418903e-05, + "loss": 2.573, + "step": 10725 + }, + { + "epoch": 0.8656282785892987, + "grad_norm": 0.6960360407829285, + "learning_rate": 8.936399999217455e-05, + "loss": 2.516, + "step": 10726 + }, + { + "epoch": 0.8657089823258817, + "grad_norm": 0.7269721627235413, + "learning_rate": 8.934830277523693e-05, + "loss": 2.5932, + "step": 10727 + }, + { + "epoch": 0.8657896860624646, + "grad_norm": 0.7057532668113708, + "learning_rate": 8.933260582376745e-05, + "loss": 2.5022, + "step": 10728 + }, + { + "epoch": 0.8658703897990477, + "grad_norm": 0.6698749661445618, + "learning_rate": 8.931690913815735e-05, + "loss": 2.5357, + "step": 10729 + }, + { + "epoch": 0.8659510935356307, + "grad_norm": 0.6616599559783936, + "learning_rate": 8.930121271879777e-05, + "loss": 2.4776, + "step": 10730 + }, + { + "epoch": 0.8660317972722137, + "grad_norm": 0.7457093000411987, + "learning_rate": 8.928551656607993e-05, + "loss": 2.5799, + "step": 10731 + }, + { + "epoch": 0.8661125010087967, + "grad_norm": 0.7199469804763794, + "learning_rate": 8.926982068039505e-05, + "loss": 2.5278, + "step": 10732 + }, + { + "epoch": 0.8661932047453798, + "grad_norm": 0.7579182386398315, + "learning_rate": 8.925412506213428e-05, + "loss": 2.5227, + "step": 10733 + }, + { + "epoch": 0.8662739084819627, + "grad_norm": 0.687455952167511, + "learning_rate": 8.92384297116888e-05, + "loss": 2.5099, + "step": 10734 + }, + { + "epoch": 0.8663546122185457, + "grad_norm": 0.7616521120071411, + "learning_rate": 8.922273462944978e-05, + "loss": 2.598, + "step": 10735 + }, + { + "epoch": 0.8664353159551287, + "grad_norm": 0.6730697751045227, + "learning_rate": 8.920703981580842e-05, + "loss": 2.5517, + "step": 10736 + }, + { + "epoch": 0.8665160196917118, + "grad_norm": 0.6769895553588867, + "learning_rate": 8.91913452711558e-05, + "loss": 2.5535, + "step": 10737 + }, + { + "epoch": 0.8665967234282947, + "grad_norm": 0.6284549832344055, + "learning_rate": 8.917565099588312e-05, + "loss": 2.4597, + "step": 10738 + }, + { + "epoch": 0.8666774271648777, + "grad_norm": 0.6900805830955505, + "learning_rate": 8.915995699038152e-05, + "loss": 2.5236, + "step": 10739 + }, + { + "epoch": 0.8667581309014607, + "grad_norm": 0.6842896938323975, + "learning_rate": 8.914426325504211e-05, + "loss": 2.5199, + "step": 10740 + }, + { + "epoch": 0.8668388346380438, + "grad_norm": 0.6637243628501892, + "learning_rate": 8.912856979025604e-05, + "loss": 2.5368, + "step": 10741 + }, + { + "epoch": 0.8669195383746268, + "grad_norm": 0.7474464178085327, + "learning_rate": 8.911287659641449e-05, + "loss": 2.4902, + "step": 10742 + }, + { + "epoch": 0.8670002421112097, + "grad_norm": 0.6977849006652832, + "learning_rate": 8.909718367390843e-05, + "loss": 2.5034, + "step": 10743 + }, + { + "epoch": 0.8670809458477927, + "grad_norm": 0.6968807578086853, + "learning_rate": 8.908149102312907e-05, + "loss": 2.5396, + "step": 10744 + }, + { + "epoch": 0.8671616495843758, + "grad_norm": 0.6656209230422974, + "learning_rate": 8.906579864446755e-05, + "loss": 2.5702, + "step": 10745 + }, + { + "epoch": 0.8672423533209588, + "grad_norm": 0.7079079151153564, + "learning_rate": 8.905010653831486e-05, + "loss": 2.5344, + "step": 10746 + }, + { + "epoch": 0.8673230570575418, + "grad_norm": 0.7423387765884399, + "learning_rate": 8.903441470506214e-05, + "loss": 2.5635, + "step": 10747 + }, + { + "epoch": 0.8674037607941247, + "grad_norm": 0.6607224941253662, + "learning_rate": 8.901872314510046e-05, + "loss": 2.54, + "step": 10748 + }, + { + "epoch": 0.8674844645307078, + "grad_norm": 0.6646947860717773, + "learning_rate": 8.900303185882095e-05, + "loss": 2.4661, + "step": 10749 + }, + { + "epoch": 0.8675651682672908, + "grad_norm": 0.6943496465682983, + "learning_rate": 8.89873408466146e-05, + "loss": 2.5213, + "step": 10750 + }, + { + "epoch": 0.8676458720038738, + "grad_norm": 0.7048123478889465, + "learning_rate": 8.89716501088725e-05, + "loss": 2.5529, + "step": 10751 + }, + { + "epoch": 0.8677265757404568, + "grad_norm": 0.654617428779602, + "learning_rate": 8.895595964598574e-05, + "loss": 2.5535, + "step": 10752 + }, + { + "epoch": 0.8678072794770398, + "grad_norm": 0.672063410282135, + "learning_rate": 8.894026945834531e-05, + "loss": 2.5279, + "step": 10753 + }, + { + "epoch": 0.8678879832136228, + "grad_norm": 0.7134148478507996, + "learning_rate": 8.892457954634225e-05, + "loss": 2.5403, + "step": 10754 + }, + { + "epoch": 0.8679686869502058, + "grad_norm": 0.6457598805427551, + "learning_rate": 8.890888991036768e-05, + "loss": 2.515, + "step": 10755 + }, + { + "epoch": 0.8680493906867888, + "grad_norm": 0.6725220084190369, + "learning_rate": 8.889320055081252e-05, + "loss": 2.4829, + "step": 10756 + }, + { + "epoch": 0.8681300944233719, + "grad_norm": 0.6425862312316895, + "learning_rate": 8.887751146806785e-05, + "loss": 2.4965, + "step": 10757 + }, + { + "epoch": 0.8682107981599548, + "grad_norm": 0.6654682755470276, + "learning_rate": 8.886182266252468e-05, + "loss": 2.48, + "step": 10758 + }, + { + "epoch": 0.8682915018965378, + "grad_norm": 0.7102493643760681, + "learning_rate": 8.884613413457398e-05, + "loss": 2.5415, + "step": 10759 + }, + { + "epoch": 0.8683722056331208, + "grad_norm": 0.6996567249298096, + "learning_rate": 8.883044588460677e-05, + "loss": 2.542, + "step": 10760 + }, + { + "epoch": 0.8684529093697038, + "grad_norm": 0.7011905312538147, + "learning_rate": 8.881475791301405e-05, + "loss": 2.5391, + "step": 10761 + }, + { + "epoch": 0.8685336131062869, + "grad_norm": 0.6508356928825378, + "learning_rate": 8.879907022018686e-05, + "loss": 2.4892, + "step": 10762 + }, + { + "epoch": 0.8686143168428698, + "grad_norm": 0.7104009985923767, + "learning_rate": 8.878338280651605e-05, + "loss": 2.5152, + "step": 10763 + }, + { + "epoch": 0.8686950205794528, + "grad_norm": 0.6501138210296631, + "learning_rate": 8.876769567239268e-05, + "loss": 2.5767, + "step": 10764 + }, + { + "epoch": 0.8687757243160358, + "grad_norm": 0.6463173031806946, + "learning_rate": 8.875200881820771e-05, + "loss": 2.4758, + "step": 10765 + }, + { + "epoch": 0.8688564280526189, + "grad_norm": 0.6494991779327393, + "learning_rate": 8.873632224435206e-05, + "loss": 2.5364, + "step": 10766 + }, + { + "epoch": 0.8689371317892018, + "grad_norm": 0.6926043033599854, + "learning_rate": 8.872063595121671e-05, + "loss": 2.5288, + "step": 10767 + }, + { + "epoch": 0.8690178355257848, + "grad_norm": 0.7076035737991333, + "learning_rate": 8.870494993919261e-05, + "loss": 2.5118, + "step": 10768 + }, + { + "epoch": 0.8690985392623678, + "grad_norm": 0.6456892490386963, + "learning_rate": 8.868926420867068e-05, + "loss": 2.4957, + "step": 10769 + }, + { + "epoch": 0.8691792429989509, + "grad_norm": 0.6585200428962708, + "learning_rate": 8.867357876004183e-05, + "loss": 2.5049, + "step": 10770 + }, + { + "epoch": 0.8692599467355339, + "grad_norm": 0.6893252730369568, + "learning_rate": 8.865789359369706e-05, + "loss": 2.4808, + "step": 10771 + }, + { + "epoch": 0.8693406504721168, + "grad_norm": 0.6700639724731445, + "learning_rate": 8.864220871002719e-05, + "loss": 2.5475, + "step": 10772 + }, + { + "epoch": 0.8694213542086998, + "grad_norm": 0.6551913619041443, + "learning_rate": 8.862652410942315e-05, + "loss": 2.5063, + "step": 10773 + }, + { + "epoch": 0.8695020579452829, + "grad_norm": 0.6870427131652832, + "learning_rate": 8.86108397922759e-05, + "loss": 2.5785, + "step": 10774 + }, + { + "epoch": 0.8695827616818659, + "grad_norm": 0.6489934325218201, + "learning_rate": 8.859515575897626e-05, + "loss": 2.5584, + "step": 10775 + }, + { + "epoch": 0.8696634654184489, + "grad_norm": 0.6726663112640381, + "learning_rate": 8.857947200991517e-05, + "loss": 2.5707, + "step": 10776 + }, + { + "epoch": 0.8697441691550318, + "grad_norm": 0.7696183323860168, + "learning_rate": 8.856378854548347e-05, + "loss": 2.501, + "step": 10777 + }, + { + "epoch": 0.8698248728916149, + "grad_norm": 0.7002642154693604, + "learning_rate": 8.854810536607212e-05, + "loss": 2.5792, + "step": 10778 + }, + { + "epoch": 0.8699055766281979, + "grad_norm": 0.6429435610771179, + "learning_rate": 8.853242247207185e-05, + "loss": 2.5463, + "step": 10779 + }, + { + "epoch": 0.8699862803647809, + "grad_norm": 0.7006216645240784, + "learning_rate": 8.851673986387358e-05, + "loss": 2.5698, + "step": 10780 + }, + { + "epoch": 0.8700669841013638, + "grad_norm": 0.7053292989730835, + "learning_rate": 8.850105754186824e-05, + "loss": 2.5468, + "step": 10781 + }, + { + "epoch": 0.8701476878379469, + "grad_norm": 0.6592122912406921, + "learning_rate": 8.848537550644654e-05, + "loss": 2.5271, + "step": 10782 + }, + { + "epoch": 0.8702283915745299, + "grad_norm": 0.679132342338562, + "learning_rate": 8.846969375799941e-05, + "loss": 2.5281, + "step": 10783 + }, + { + "epoch": 0.8703090953111129, + "grad_norm": 0.6868568062782288, + "learning_rate": 8.845401229691765e-05, + "loss": 2.5415, + "step": 10784 + }, + { + "epoch": 0.8703897990476959, + "grad_norm": 0.7060674428939819, + "learning_rate": 8.843833112359208e-05, + "loss": 2.5649, + "step": 10785 + }, + { + "epoch": 0.870470502784279, + "grad_norm": 0.6663981676101685, + "learning_rate": 8.842265023841352e-05, + "loss": 2.5055, + "step": 10786 + }, + { + "epoch": 0.8705512065208619, + "grad_norm": 0.7095218896865845, + "learning_rate": 8.840696964177282e-05, + "loss": 2.5442, + "step": 10787 + }, + { + "epoch": 0.8706319102574449, + "grad_norm": 0.6884104013442993, + "learning_rate": 8.839128933406069e-05, + "loss": 2.5285, + "step": 10788 + }, + { + "epoch": 0.8707126139940279, + "grad_norm": 0.6427462697029114, + "learning_rate": 8.837560931566798e-05, + "loss": 2.5197, + "step": 10789 + }, + { + "epoch": 0.870793317730611, + "grad_norm": 0.6870493292808533, + "learning_rate": 8.835992958698548e-05, + "loss": 2.4937, + "step": 10790 + }, + { + "epoch": 0.870874021467194, + "grad_norm": 0.7006319761276245, + "learning_rate": 8.834425014840398e-05, + "loss": 2.5148, + "step": 10791 + }, + { + "epoch": 0.8709547252037769, + "grad_norm": 0.690601646900177, + "learning_rate": 8.83285710003142e-05, + "loss": 2.5454, + "step": 10792 + }, + { + "epoch": 0.8710354289403599, + "grad_norm": 0.7205955982208252, + "learning_rate": 8.831289214310695e-05, + "loss": 2.5221, + "step": 10793 + }, + { + "epoch": 0.871116132676943, + "grad_norm": 0.7134295105934143, + "learning_rate": 8.8297213577173e-05, + "loss": 2.5626, + "step": 10794 + }, + { + "epoch": 0.871196836413526, + "grad_norm": 0.6560496091842651, + "learning_rate": 8.828153530290307e-05, + "loss": 2.5408, + "step": 10795 + }, + { + "epoch": 0.8712775401501089, + "grad_norm": 0.7055882215499878, + "learning_rate": 8.82658573206879e-05, + "loss": 2.5173, + "step": 10796 + }, + { + "epoch": 0.8713582438866919, + "grad_norm": 0.6751883029937744, + "learning_rate": 8.825017963091827e-05, + "loss": 2.5378, + "step": 10797 + }, + { + "epoch": 0.871438947623275, + "grad_norm": 0.6794824600219727, + "learning_rate": 8.823450223398485e-05, + "loss": 2.592, + "step": 10798 + }, + { + "epoch": 0.871519651359858, + "grad_norm": 0.675729513168335, + "learning_rate": 8.821882513027838e-05, + "loss": 2.5253, + "step": 10799 + }, + { + "epoch": 0.871600355096441, + "grad_norm": 0.7185894250869751, + "learning_rate": 8.820314832018962e-05, + "loss": 2.5073, + "step": 10800 + }, + { + "epoch": 0.8716810588330239, + "grad_norm": 0.6605187654495239, + "learning_rate": 8.818747180410921e-05, + "loss": 2.5141, + "step": 10801 + }, + { + "epoch": 0.871761762569607, + "grad_norm": 0.6955205798149109, + "learning_rate": 8.817179558242788e-05, + "loss": 2.5313, + "step": 10802 + }, + { + "epoch": 0.87184246630619, + "grad_norm": 0.6307928562164307, + "learning_rate": 8.815611965553638e-05, + "loss": 2.4975, + "step": 10803 + }, + { + "epoch": 0.871923170042773, + "grad_norm": 0.7283728122711182, + "learning_rate": 8.814044402382527e-05, + "loss": 2.4623, + "step": 10804 + }, + { + "epoch": 0.872003873779356, + "grad_norm": 0.7019702792167664, + "learning_rate": 8.81247686876853e-05, + "loss": 2.4755, + "step": 10805 + }, + { + "epoch": 0.872084577515939, + "grad_norm": 0.6769137382507324, + "learning_rate": 8.81090936475072e-05, + "loss": 2.59, + "step": 10806 + }, + { + "epoch": 0.872165281252522, + "grad_norm": 0.6185588836669922, + "learning_rate": 8.80934189036815e-05, + "loss": 2.5308, + "step": 10807 + }, + { + "epoch": 0.872245984989105, + "grad_norm": 0.7127000689506531, + "learning_rate": 8.807774445659894e-05, + "loss": 2.5301, + "step": 10808 + }, + { + "epoch": 0.872326688725688, + "grad_norm": 0.7039114236831665, + "learning_rate": 8.806207030665016e-05, + "loss": 2.5176, + "step": 10809 + }, + { + "epoch": 0.8724073924622711, + "grad_norm": 0.6763370633125305, + "learning_rate": 8.804639645422582e-05, + "loss": 2.5324, + "step": 10810 + }, + { + "epoch": 0.872488096198854, + "grad_norm": 0.7546409368515015, + "learning_rate": 8.803072289971648e-05, + "loss": 2.5446, + "step": 10811 + }, + { + "epoch": 0.872568799935437, + "grad_norm": 0.6916004419326782, + "learning_rate": 8.801504964351284e-05, + "loss": 2.5056, + "step": 10812 + }, + { + "epoch": 0.87264950367202, + "grad_norm": 0.7108416557312012, + "learning_rate": 8.799937668600552e-05, + "loss": 2.5966, + "step": 10813 + }, + { + "epoch": 0.872730207408603, + "grad_norm": 0.7146576046943665, + "learning_rate": 8.798370402758506e-05, + "loss": 2.5152, + "step": 10814 + }, + { + "epoch": 0.872810911145186, + "grad_norm": 0.6708142757415771, + "learning_rate": 8.796803166864211e-05, + "loss": 2.5248, + "step": 10815 + }, + { + "epoch": 0.872891614881769, + "grad_norm": 0.6687600612640381, + "learning_rate": 8.795235960956729e-05, + "loss": 2.4451, + "step": 10816 + }, + { + "epoch": 0.872972318618352, + "grad_norm": 0.724012553691864, + "learning_rate": 8.793668785075114e-05, + "loss": 2.4816, + "step": 10817 + }, + { + "epoch": 0.873053022354935, + "grad_norm": 0.6938769221305847, + "learning_rate": 8.792101639258426e-05, + "loss": 2.5435, + "step": 10818 + }, + { + "epoch": 0.8731337260915181, + "grad_norm": 0.7066235542297363, + "learning_rate": 8.790534523545724e-05, + "loss": 2.5167, + "step": 10819 + }, + { + "epoch": 0.873214429828101, + "grad_norm": 0.7129037380218506, + "learning_rate": 8.788967437976062e-05, + "loss": 2.5079, + "step": 10820 + }, + { + "epoch": 0.873295133564684, + "grad_norm": 0.6949728727340698, + "learning_rate": 8.787400382588497e-05, + "loss": 2.5564, + "step": 10821 + }, + { + "epoch": 0.873375837301267, + "grad_norm": 0.7924233675003052, + "learning_rate": 8.785833357422088e-05, + "loss": 2.5748, + "step": 10822 + }, + { + "epoch": 0.8734565410378501, + "grad_norm": 0.7486331462860107, + "learning_rate": 8.784266362515882e-05, + "loss": 2.565, + "step": 10823 + }, + { + "epoch": 0.8735372447744331, + "grad_norm": 0.7036460638046265, + "learning_rate": 8.782699397908935e-05, + "loss": 2.5101, + "step": 10824 + }, + { + "epoch": 0.873617948511016, + "grad_norm": 0.6691471338272095, + "learning_rate": 8.781132463640302e-05, + "loss": 2.5262, + "step": 10825 + }, + { + "epoch": 0.873698652247599, + "grad_norm": 0.6836682558059692, + "learning_rate": 8.779565559749037e-05, + "loss": 2.5651, + "step": 10826 + }, + { + "epoch": 0.8737793559841821, + "grad_norm": 0.6634507775306702, + "learning_rate": 8.777998686274185e-05, + "loss": 2.5383, + "step": 10827 + }, + { + "epoch": 0.8738600597207651, + "grad_norm": 0.6903105974197388, + "learning_rate": 8.7764318432548e-05, + "loss": 2.5659, + "step": 10828 + }, + { + "epoch": 0.8739407634573481, + "grad_norm": 0.737859308719635, + "learning_rate": 8.774865030729937e-05, + "loss": 2.5859, + "step": 10829 + }, + { + "epoch": 0.874021467193931, + "grad_norm": 0.696843683719635, + "learning_rate": 8.773298248738633e-05, + "loss": 2.5244, + "step": 10830 + }, + { + "epoch": 0.8741021709305141, + "grad_norm": 0.7342235445976257, + "learning_rate": 8.771731497319946e-05, + "loss": 2.5073, + "step": 10831 + }, + { + "epoch": 0.8741828746670971, + "grad_norm": 0.6676939725875854, + "learning_rate": 8.770164776512926e-05, + "loss": 2.5408, + "step": 10832 + }, + { + "epoch": 0.8742635784036801, + "grad_norm": 0.6957886219024658, + "learning_rate": 8.768598086356608e-05, + "loss": 2.5566, + "step": 10833 + }, + { + "epoch": 0.874344282140263, + "grad_norm": 0.6938990950584412, + "learning_rate": 8.767031426890046e-05, + "loss": 2.517, + "step": 10834 + }, + { + "epoch": 0.8744249858768461, + "grad_norm": 0.8387169241905212, + "learning_rate": 8.765464798152286e-05, + "loss": 2.5507, + "step": 10835 + }, + { + "epoch": 0.8745056896134291, + "grad_norm": 0.6396276354789734, + "learning_rate": 8.763898200182368e-05, + "loss": 2.5063, + "step": 10836 + }, + { + "epoch": 0.8745863933500121, + "grad_norm": 0.7122719883918762, + "learning_rate": 8.762331633019339e-05, + "loss": 2.5816, + "step": 10837 + }, + { + "epoch": 0.8746670970865951, + "grad_norm": 0.6807141304016113, + "learning_rate": 8.760765096702244e-05, + "loss": 2.6004, + "step": 10838 + }, + { + "epoch": 0.8747478008231782, + "grad_norm": 0.6764848232269287, + "learning_rate": 8.759198591270117e-05, + "loss": 2.5303, + "step": 10839 + }, + { + "epoch": 0.8748285045597611, + "grad_norm": 0.718515932559967, + "learning_rate": 8.757632116762006e-05, + "loss": 2.5088, + "step": 10840 + }, + { + "epoch": 0.8749092082963441, + "grad_norm": 0.7084362506866455, + "learning_rate": 8.75606567321695e-05, + "loss": 2.5496, + "step": 10841 + }, + { + "epoch": 0.8749899120329271, + "grad_norm": 0.7191734910011292, + "learning_rate": 8.754499260673991e-05, + "loss": 2.5525, + "step": 10842 + }, + { + "epoch": 0.8750706157695102, + "grad_norm": 0.7167977094650269, + "learning_rate": 8.752932879172164e-05, + "loss": 2.5479, + "step": 10843 + }, + { + "epoch": 0.8751513195060932, + "grad_norm": 0.6994979381561279, + "learning_rate": 8.751366528750511e-05, + "loss": 2.4942, + "step": 10844 + }, + { + "epoch": 0.8752320232426761, + "grad_norm": 0.7192725539207458, + "learning_rate": 8.749800209448068e-05, + "loss": 2.5233, + "step": 10845 + }, + { + "epoch": 0.8753127269792591, + "grad_norm": 0.7728807330131531, + "learning_rate": 8.748233921303871e-05, + "loss": 2.5698, + "step": 10846 + }, + { + "epoch": 0.8753934307158422, + "grad_norm": 0.7305434942245483, + "learning_rate": 8.746667664356956e-05, + "loss": 2.5096, + "step": 10847 + }, + { + "epoch": 0.8754741344524252, + "grad_norm": 0.7117629051208496, + "learning_rate": 8.745101438646365e-05, + "loss": 2.5272, + "step": 10848 + }, + { + "epoch": 0.8755548381890081, + "grad_norm": 0.7180361151695251, + "learning_rate": 8.743535244211121e-05, + "loss": 2.4718, + "step": 10849 + }, + { + "epoch": 0.8756355419255911, + "grad_norm": 0.6419457793235779, + "learning_rate": 8.741969081090263e-05, + "loss": 2.5407, + "step": 10850 + }, + { + "epoch": 0.8757162456621742, + "grad_norm": 0.7928328514099121, + "learning_rate": 8.740402949322827e-05, + "loss": 2.488, + "step": 10851 + }, + { + "epoch": 0.8757969493987572, + "grad_norm": 0.7449139952659607, + "learning_rate": 8.738836848947839e-05, + "loss": 2.5943, + "step": 10852 + }, + { + "epoch": 0.8758776531353402, + "grad_norm": 0.7919576168060303, + "learning_rate": 8.737270780004334e-05, + "loss": 2.5556, + "step": 10853 + }, + { + "epoch": 0.8759583568719231, + "grad_norm": 0.6867526769638062, + "learning_rate": 8.735704742531346e-05, + "loss": 2.5395, + "step": 10854 + }, + { + "epoch": 0.8760390606085062, + "grad_norm": 0.7195394039154053, + "learning_rate": 8.734138736567896e-05, + "loss": 2.4404, + "step": 10855 + }, + { + "epoch": 0.8761197643450892, + "grad_norm": 0.68385910987854, + "learning_rate": 8.732572762153016e-05, + "loss": 2.502, + "step": 10856 + }, + { + "epoch": 0.8762004680816722, + "grad_norm": 0.6957393884658813, + "learning_rate": 8.731006819325739e-05, + "loss": 2.5788, + "step": 10857 + }, + { + "epoch": 0.8762811718182552, + "grad_norm": 0.6973037123680115, + "learning_rate": 8.729440908125092e-05, + "loss": 2.4927, + "step": 10858 + }, + { + "epoch": 0.8763618755548382, + "grad_norm": 0.6535985469818115, + "learning_rate": 8.727875028590095e-05, + "loss": 2.596, + "step": 10859 + }, + { + "epoch": 0.8764425792914212, + "grad_norm": 0.7447848320007324, + "learning_rate": 8.726309180759777e-05, + "loss": 2.5825, + "step": 10860 + }, + { + "epoch": 0.8765232830280042, + "grad_norm": 0.7155942320823669, + "learning_rate": 8.724743364673168e-05, + "loss": 2.5105, + "step": 10861 + }, + { + "epoch": 0.8766039867645872, + "grad_norm": 0.6664694547653198, + "learning_rate": 8.723177580369285e-05, + "loss": 2.5244, + "step": 10862 + }, + { + "epoch": 0.8766846905011701, + "grad_norm": 0.7437852025032043, + "learning_rate": 8.721611827887153e-05, + "loss": 2.534, + "step": 10863 + }, + { + "epoch": 0.8767653942377532, + "grad_norm": 0.6752577424049377, + "learning_rate": 8.7200461072658e-05, + "loss": 2.5025, + "step": 10864 + }, + { + "epoch": 0.8768460979743362, + "grad_norm": 0.7420764565467834, + "learning_rate": 8.718480418544241e-05, + "loss": 2.5261, + "step": 10865 + }, + { + "epoch": 0.8769268017109192, + "grad_norm": 0.669384777545929, + "learning_rate": 8.7169147617615e-05, + "loss": 2.5258, + "step": 10866 + }, + { + "epoch": 0.8770075054475022, + "grad_norm": 0.6649587750434875, + "learning_rate": 8.715349136956599e-05, + "loss": 2.5308, + "step": 10867 + }, + { + "epoch": 0.8770882091840853, + "grad_norm": 0.728922426700592, + "learning_rate": 8.713783544168552e-05, + "loss": 2.5251, + "step": 10868 + }, + { + "epoch": 0.8771689129206682, + "grad_norm": 0.6957671642303467, + "learning_rate": 8.712217983436384e-05, + "loss": 2.5818, + "step": 10869 + }, + { + "epoch": 0.8772496166572512, + "grad_norm": 0.6796830892562866, + "learning_rate": 8.710652454799108e-05, + "loss": 2.5122, + "step": 10870 + }, + { + "epoch": 0.8773303203938342, + "grad_norm": 0.7230980396270752, + "learning_rate": 8.709086958295746e-05, + "loss": 2.5836, + "step": 10871 + }, + { + "epoch": 0.8774110241304173, + "grad_norm": 0.6992264986038208, + "learning_rate": 8.707521493965309e-05, + "loss": 2.5907, + "step": 10872 + }, + { + "epoch": 0.8774917278670002, + "grad_norm": 0.7066535353660583, + "learning_rate": 8.705956061846816e-05, + "loss": 2.5508, + "step": 10873 + }, + { + "epoch": 0.8775724316035832, + "grad_norm": 0.6559327244758606, + "learning_rate": 8.704390661979283e-05, + "loss": 2.611, + "step": 10874 + }, + { + "epoch": 0.8776531353401662, + "grad_norm": 0.6673287749290466, + "learning_rate": 8.70282529440172e-05, + "loss": 2.5778, + "step": 10875 + }, + { + "epoch": 0.8777338390767493, + "grad_norm": 0.6715971231460571, + "learning_rate": 8.701259959153139e-05, + "loss": 2.5342, + "step": 10876 + }, + { + "epoch": 0.8778145428133323, + "grad_norm": 0.7456488609313965, + "learning_rate": 8.699694656272557e-05, + "loss": 2.5365, + "step": 10877 + }, + { + "epoch": 0.8778952465499152, + "grad_norm": 0.6658159494400024, + "learning_rate": 8.698129385798983e-05, + "loss": 2.4387, + "step": 10878 + }, + { + "epoch": 0.8779759502864982, + "grad_norm": 0.6653816103935242, + "learning_rate": 8.696564147771427e-05, + "loss": 2.5791, + "step": 10879 + }, + { + "epoch": 0.8780566540230813, + "grad_norm": 0.6763200163841248, + "learning_rate": 8.694998942228902e-05, + "loss": 2.5356, + "step": 10880 + }, + { + "epoch": 0.8781373577596643, + "grad_norm": 0.6534504890441895, + "learning_rate": 8.69343376921041e-05, + "loss": 2.5358, + "step": 10881 + }, + { + "epoch": 0.8782180614962473, + "grad_norm": 0.6341667771339417, + "learning_rate": 8.691868628754967e-05, + "loss": 2.4927, + "step": 10882 + }, + { + "epoch": 0.8782987652328302, + "grad_norm": 0.6215559244155884, + "learning_rate": 8.690303520901579e-05, + "loss": 2.4312, + "step": 10883 + }, + { + "epoch": 0.8783794689694133, + "grad_norm": 0.6705841422080994, + "learning_rate": 8.688738445689248e-05, + "loss": 2.4778, + "step": 10884 + }, + { + "epoch": 0.8784601727059963, + "grad_norm": 0.680275559425354, + "learning_rate": 8.687173403156982e-05, + "loss": 2.5577, + "step": 10885 + }, + { + "epoch": 0.8785408764425793, + "grad_norm": 0.6918728351593018, + "learning_rate": 8.685608393343789e-05, + "loss": 2.5212, + "step": 10886 + }, + { + "epoch": 0.8786215801791623, + "grad_norm": 0.623636782169342, + "learning_rate": 8.68404341628867e-05, + "loss": 2.5131, + "step": 10887 + }, + { + "epoch": 0.8787022839157453, + "grad_norm": 0.7200562357902527, + "learning_rate": 8.682478472030628e-05, + "loss": 2.5517, + "step": 10888 + }, + { + "epoch": 0.8787829876523283, + "grad_norm": 0.6902644634246826, + "learning_rate": 8.680913560608666e-05, + "loss": 2.511, + "step": 10889 + }, + { + "epoch": 0.8788636913889113, + "grad_norm": 0.6855802536010742, + "learning_rate": 8.679348682061792e-05, + "loss": 2.5169, + "step": 10890 + }, + { + "epoch": 0.8789443951254943, + "grad_norm": 0.7229284048080444, + "learning_rate": 8.677783836428995e-05, + "loss": 2.5634, + "step": 10891 + }, + { + "epoch": 0.8790250988620774, + "grad_norm": 0.6350376605987549, + "learning_rate": 8.676219023749281e-05, + "loss": 2.443, + "step": 10892 + }, + { + "epoch": 0.8791058025986603, + "grad_norm": 0.6884307265281677, + "learning_rate": 8.674654244061653e-05, + "loss": 2.524, + "step": 10893 + }, + { + "epoch": 0.8791865063352433, + "grad_norm": 0.6571067571640015, + "learning_rate": 8.673089497405102e-05, + "loss": 2.5322, + "step": 10894 + }, + { + "epoch": 0.8792672100718263, + "grad_norm": 0.7078021764755249, + "learning_rate": 8.67152478381863e-05, + "loss": 2.5317, + "step": 10895 + }, + { + "epoch": 0.8793479138084094, + "grad_norm": 0.6809059381484985, + "learning_rate": 8.669960103341236e-05, + "loss": 2.5767, + "step": 10896 + }, + { + "epoch": 0.8794286175449924, + "grad_norm": 0.7399441003799438, + "learning_rate": 8.66839545601191e-05, + "loss": 2.5194, + "step": 10897 + }, + { + "epoch": 0.8795093212815753, + "grad_norm": 0.6762270927429199, + "learning_rate": 8.66683084186965e-05, + "loss": 2.5306, + "step": 10898 + }, + { + "epoch": 0.8795900250181583, + "grad_norm": 0.7394620776176453, + "learning_rate": 8.665266260953455e-05, + "loss": 2.4516, + "step": 10899 + }, + { + "epoch": 0.8796707287547414, + "grad_norm": 0.6775416135787964, + "learning_rate": 8.663701713302309e-05, + "loss": 2.5574, + "step": 10900 + }, + { + "epoch": 0.8797514324913244, + "grad_norm": 0.7630520462989807, + "learning_rate": 8.66213719895521e-05, + "loss": 2.5516, + "step": 10901 + }, + { + "epoch": 0.8798321362279073, + "grad_norm": 0.6555768847465515, + "learning_rate": 8.660572717951149e-05, + "loss": 2.5267, + "step": 10902 + }, + { + "epoch": 0.8799128399644903, + "grad_norm": 0.6899500489234924, + "learning_rate": 8.659008270329119e-05, + "loss": 2.4938, + "step": 10903 + }, + { + "epoch": 0.8799935437010734, + "grad_norm": 0.6939221024513245, + "learning_rate": 8.657443856128107e-05, + "loss": 2.5358, + "step": 10904 + }, + { + "epoch": 0.8800742474376564, + "grad_norm": 0.6454630494117737, + "learning_rate": 8.655879475387102e-05, + "loss": 2.5528, + "step": 10905 + }, + { + "epoch": 0.8801549511742394, + "grad_norm": 0.7142425775527954, + "learning_rate": 8.654315128145099e-05, + "loss": 2.5668, + "step": 10906 + }, + { + "epoch": 0.8802356549108223, + "grad_norm": 0.7512764930725098, + "learning_rate": 8.652750814441075e-05, + "loss": 2.5224, + "step": 10907 + }, + { + "epoch": 0.8803163586474054, + "grad_norm": 0.6599575877189636, + "learning_rate": 8.651186534314026e-05, + "loss": 2.5363, + "step": 10908 + }, + { + "epoch": 0.8803970623839884, + "grad_norm": 0.6787410974502563, + "learning_rate": 8.649622287802935e-05, + "loss": 2.4587, + "step": 10909 + }, + { + "epoch": 0.8804777661205714, + "grad_norm": 0.7124783396720886, + "learning_rate": 8.648058074946786e-05, + "loss": 2.5842, + "step": 10910 + }, + { + "epoch": 0.8805584698571544, + "grad_norm": 0.6698839664459229, + "learning_rate": 8.646493895784562e-05, + "loss": 2.513, + "step": 10911 + }, + { + "epoch": 0.8806391735937374, + "grad_norm": 0.6660044193267822, + "learning_rate": 8.644929750355249e-05, + "loss": 2.4996, + "step": 10912 + }, + { + "epoch": 0.8807198773303204, + "grad_norm": 0.7060455083847046, + "learning_rate": 8.643365638697828e-05, + "loss": 2.5497, + "step": 10913 + }, + { + "epoch": 0.8808005810669034, + "grad_norm": 0.6835277676582336, + "learning_rate": 8.641801560851281e-05, + "loss": 2.5198, + "step": 10914 + }, + { + "epoch": 0.8808812848034864, + "grad_norm": 0.6994042992591858, + "learning_rate": 8.640237516854595e-05, + "loss": 2.5692, + "step": 10915 + }, + { + "epoch": 0.8809619885400694, + "grad_norm": 0.6583377718925476, + "learning_rate": 8.63867350674674e-05, + "loss": 2.5025, + "step": 10916 + }, + { + "epoch": 0.8810426922766524, + "grad_norm": 0.6882332563400269, + "learning_rate": 8.637109530566698e-05, + "loss": 2.5343, + "step": 10917 + }, + { + "epoch": 0.8811233960132354, + "grad_norm": 0.6329876184463501, + "learning_rate": 8.635545588353449e-05, + "loss": 2.5335, + "step": 10918 + }, + { + "epoch": 0.8812040997498184, + "grad_norm": 0.713196337223053, + "learning_rate": 8.633981680145975e-05, + "loss": 2.4814, + "step": 10919 + }, + { + "epoch": 0.8812848034864014, + "grad_norm": 0.7388820648193359, + "learning_rate": 8.632417805983246e-05, + "loss": 2.4927, + "step": 10920 + }, + { + "epoch": 0.8813655072229845, + "grad_norm": 0.7316160798072815, + "learning_rate": 8.63085396590424e-05, + "loss": 2.508, + "step": 10921 + }, + { + "epoch": 0.8814462109595674, + "grad_norm": 0.6690139174461365, + "learning_rate": 8.629290159947934e-05, + "loss": 2.5719, + "step": 10922 + }, + { + "epoch": 0.8815269146961504, + "grad_norm": 0.6369553208351135, + "learning_rate": 8.627726388153297e-05, + "loss": 2.5277, + "step": 10923 + }, + { + "epoch": 0.8816076184327334, + "grad_norm": 0.6870365738868713, + "learning_rate": 8.626162650559306e-05, + "loss": 2.4731, + "step": 10924 + }, + { + "epoch": 0.8816883221693165, + "grad_norm": 0.6890872716903687, + "learning_rate": 8.624598947204938e-05, + "loss": 2.5417, + "step": 10925 + }, + { + "epoch": 0.8817690259058995, + "grad_norm": 0.6548230051994324, + "learning_rate": 8.623035278129156e-05, + "loss": 2.4888, + "step": 10926 + }, + { + "epoch": 0.8818497296424824, + "grad_norm": 0.6835262775421143, + "learning_rate": 8.621471643370933e-05, + "loss": 2.531, + "step": 10927 + }, + { + "epoch": 0.8819304333790654, + "grad_norm": 0.6910626292228699, + "learning_rate": 8.619908042969243e-05, + "loss": 2.4864, + "step": 10928 + }, + { + "epoch": 0.8820111371156485, + "grad_norm": 0.6727725267410278, + "learning_rate": 8.618344476963049e-05, + "loss": 2.5063, + "step": 10929 + }, + { + "epoch": 0.8820918408522315, + "grad_norm": 0.7285245656967163, + "learning_rate": 8.616780945391323e-05, + "loss": 2.5036, + "step": 10930 + }, + { + "epoch": 0.8821725445888144, + "grad_norm": 0.6561840176582336, + "learning_rate": 8.615217448293035e-05, + "loss": 2.5152, + "step": 10931 + }, + { + "epoch": 0.8822532483253974, + "grad_norm": 0.6524627208709717, + "learning_rate": 8.613653985707144e-05, + "loss": 2.4827, + "step": 10932 + }, + { + "epoch": 0.8823339520619805, + "grad_norm": 0.6815671920776367, + "learning_rate": 8.612090557672619e-05, + "loss": 2.5385, + "step": 10933 + }, + { + "epoch": 0.8824146557985635, + "grad_norm": 0.7479865550994873, + "learning_rate": 8.610527164228429e-05, + "loss": 2.5311, + "step": 10934 + }, + { + "epoch": 0.8824953595351465, + "grad_norm": 0.699504554271698, + "learning_rate": 8.608963805413535e-05, + "loss": 2.5332, + "step": 10935 + }, + { + "epoch": 0.8825760632717294, + "grad_norm": 0.7081198692321777, + "learning_rate": 8.607400481266896e-05, + "loss": 2.5636, + "step": 10936 + }, + { + "epoch": 0.8826567670083125, + "grad_norm": 0.7020730972290039, + "learning_rate": 8.605837191827478e-05, + "loss": 2.498, + "step": 10937 + }, + { + "epoch": 0.8827374707448955, + "grad_norm": 0.8004096150398254, + "learning_rate": 8.604273937134242e-05, + "loss": 2.5352, + "step": 10938 + }, + { + "epoch": 0.8828181744814785, + "grad_norm": 0.6399645209312439, + "learning_rate": 8.602710717226147e-05, + "loss": 2.5673, + "step": 10939 + }, + { + "epoch": 0.8828988782180615, + "grad_norm": 0.683195173740387, + "learning_rate": 8.601147532142153e-05, + "loss": 2.4812, + "step": 10940 + }, + { + "epoch": 0.8829795819546445, + "grad_norm": 0.7783642411231995, + "learning_rate": 8.599584381921224e-05, + "loss": 2.4812, + "step": 10941 + }, + { + "epoch": 0.8830602856912275, + "grad_norm": 0.7107423543930054, + "learning_rate": 8.598021266602308e-05, + "loss": 2.5527, + "step": 10942 + }, + { + "epoch": 0.8831409894278105, + "grad_norm": 0.6419345140457153, + "learning_rate": 8.596458186224365e-05, + "loss": 2.5642, + "step": 10943 + }, + { + "epoch": 0.8832216931643935, + "grad_norm": 0.6897309422492981, + "learning_rate": 8.59489514082636e-05, + "loss": 2.5743, + "step": 10944 + }, + { + "epoch": 0.8833023969009766, + "grad_norm": 0.6901495456695557, + "learning_rate": 8.593332130447236e-05, + "loss": 2.5139, + "step": 10945 + }, + { + "epoch": 0.8833831006375595, + "grad_norm": 0.6865388751029968, + "learning_rate": 8.591769155125953e-05, + "loss": 2.5281, + "step": 10946 + }, + { + "epoch": 0.8834638043741425, + "grad_norm": 0.7070403099060059, + "learning_rate": 8.590206214901465e-05, + "loss": 2.4648, + "step": 10947 + }, + { + "epoch": 0.8835445081107255, + "grad_norm": 0.6846395134925842, + "learning_rate": 8.588643309812721e-05, + "loss": 2.4792, + "step": 10948 + }, + { + "epoch": 0.8836252118473086, + "grad_norm": 0.6875495314598083, + "learning_rate": 8.587080439898675e-05, + "loss": 2.5126, + "step": 10949 + }, + { + "epoch": 0.8837059155838916, + "grad_norm": 0.670098066329956, + "learning_rate": 8.58551760519828e-05, + "loss": 2.4922, + "step": 10950 + }, + { + "epoch": 0.8837866193204745, + "grad_norm": 0.6675527691841125, + "learning_rate": 8.583954805750487e-05, + "loss": 2.499, + "step": 10951 + }, + { + "epoch": 0.8838673230570575, + "grad_norm": 0.6694127321243286, + "learning_rate": 8.582392041594236e-05, + "loss": 2.5286, + "step": 10952 + }, + { + "epoch": 0.8839480267936406, + "grad_norm": 0.7291092872619629, + "learning_rate": 8.580829312768482e-05, + "loss": 2.5705, + "step": 10953 + }, + { + "epoch": 0.8840287305302236, + "grad_norm": 0.709904670715332, + "learning_rate": 8.579266619312174e-05, + "loss": 2.5238, + "step": 10954 + }, + { + "epoch": 0.8841094342668065, + "grad_norm": 0.7037622332572937, + "learning_rate": 8.577703961264254e-05, + "loss": 2.5491, + "step": 10955 + }, + { + "epoch": 0.8841901380033895, + "grad_norm": 0.7553049325942993, + "learning_rate": 8.576141338663668e-05, + "loss": 2.5643, + "step": 10956 + }, + { + "epoch": 0.8842708417399726, + "grad_norm": 0.7177377343177795, + "learning_rate": 8.574578751549364e-05, + "loss": 2.49, + "step": 10957 + }, + { + "epoch": 0.8843515454765556, + "grad_norm": 0.682668149471283, + "learning_rate": 8.573016199960283e-05, + "loss": 2.5221, + "step": 10958 + }, + { + "epoch": 0.8844322492131386, + "grad_norm": 0.7508956789970398, + "learning_rate": 8.571453683935366e-05, + "loss": 2.5766, + "step": 10959 + }, + { + "epoch": 0.8845129529497215, + "grad_norm": 0.6495946645736694, + "learning_rate": 8.569891203513562e-05, + "loss": 2.534, + "step": 10960 + }, + { + "epoch": 0.8845936566863046, + "grad_norm": 0.7362824082374573, + "learning_rate": 8.568328758733806e-05, + "loss": 2.4614, + "step": 10961 + }, + { + "epoch": 0.8846743604228876, + "grad_norm": 0.6571496725082397, + "learning_rate": 8.566766349635037e-05, + "loss": 2.4393, + "step": 10962 + }, + { + "epoch": 0.8847550641594706, + "grad_norm": 0.7088329195976257, + "learning_rate": 8.5652039762562e-05, + "loss": 2.5476, + "step": 10963 + }, + { + "epoch": 0.8848357678960536, + "grad_norm": 0.6414440274238586, + "learning_rate": 8.56364163863623e-05, + "loss": 2.4668, + "step": 10964 + }, + { + "epoch": 0.8849164716326365, + "grad_norm": 0.7333478331565857, + "learning_rate": 8.562079336814063e-05, + "loss": 2.5151, + "step": 10965 + }, + { + "epoch": 0.8849971753692196, + "grad_norm": 0.638038694858551, + "learning_rate": 8.560517070828638e-05, + "loss": 2.5063, + "step": 10966 + }, + { + "epoch": 0.8850778791058026, + "grad_norm": 0.638921320438385, + "learning_rate": 8.558954840718896e-05, + "loss": 2.4769, + "step": 10967 + }, + { + "epoch": 0.8851585828423856, + "grad_norm": 0.6923465728759766, + "learning_rate": 8.557392646523759e-05, + "loss": 2.5388, + "step": 10968 + }, + { + "epoch": 0.8852392865789686, + "grad_norm": 0.7095212936401367, + "learning_rate": 8.555830488282169e-05, + "loss": 2.4955, + "step": 10969 + }, + { + "epoch": 0.8853199903155516, + "grad_norm": 0.689908504486084, + "learning_rate": 8.554268366033065e-05, + "loss": 2.4998, + "step": 10970 + }, + { + "epoch": 0.8854006940521346, + "grad_norm": 0.6551975011825562, + "learning_rate": 8.552706279815366e-05, + "loss": 2.4965, + "step": 10971 + }, + { + "epoch": 0.8854813977887176, + "grad_norm": 0.7239118218421936, + "learning_rate": 8.551144229668012e-05, + "loss": 2.5785, + "step": 10972 + }, + { + "epoch": 0.8855621015253006, + "grad_norm": 0.6743230819702148, + "learning_rate": 8.549582215629932e-05, + "loss": 2.5146, + "step": 10973 + }, + { + "epoch": 0.8856428052618837, + "grad_norm": 0.6991584300994873, + "learning_rate": 8.548020237740052e-05, + "loss": 2.5524, + "step": 10974 + }, + { + "epoch": 0.8857235089984666, + "grad_norm": 0.6605305075645447, + "learning_rate": 8.546458296037304e-05, + "loss": 2.5505, + "step": 10975 + }, + { + "epoch": 0.8858042127350496, + "grad_norm": 0.7011568546295166, + "learning_rate": 8.54489639056062e-05, + "loss": 2.4381, + "step": 10976 + }, + { + "epoch": 0.8858849164716326, + "grad_norm": 0.7015339136123657, + "learning_rate": 8.543334521348916e-05, + "loss": 2.5432, + "step": 10977 + }, + { + "epoch": 0.8859656202082157, + "grad_norm": 0.6892278790473938, + "learning_rate": 8.541772688441124e-05, + "loss": 2.5286, + "step": 10978 + }, + { + "epoch": 0.8860463239447987, + "grad_norm": 0.6680187582969666, + "learning_rate": 8.540210891876168e-05, + "loss": 2.439, + "step": 10979 + }, + { + "epoch": 0.8861270276813816, + "grad_norm": 0.7043240666389465, + "learning_rate": 8.538649131692975e-05, + "loss": 2.5558, + "step": 10980 + }, + { + "epoch": 0.8862077314179646, + "grad_norm": 0.6940229535102844, + "learning_rate": 8.537087407930463e-05, + "loss": 2.5219, + "step": 10981 + }, + { + "epoch": 0.8862884351545477, + "grad_norm": 0.6571553945541382, + "learning_rate": 8.535525720627558e-05, + "loss": 2.5054, + "step": 10982 + }, + { + "epoch": 0.8863691388911307, + "grad_norm": 0.6846656203269958, + "learning_rate": 8.533964069823182e-05, + "loss": 2.497, + "step": 10983 + }, + { + "epoch": 0.8864498426277136, + "grad_norm": 0.6838627457618713, + "learning_rate": 8.53240245555625e-05, + "loss": 2.5495, + "step": 10984 + }, + { + "epoch": 0.8865305463642966, + "grad_norm": 0.6825091242790222, + "learning_rate": 8.530840877865687e-05, + "loss": 2.5656, + "step": 10985 + }, + { + "epoch": 0.8866112501008797, + "grad_norm": 0.7368674278259277, + "learning_rate": 8.529279336790414e-05, + "loss": 2.5378, + "step": 10986 + }, + { + "epoch": 0.8866919538374627, + "grad_norm": 0.7333693504333496, + "learning_rate": 8.527717832369338e-05, + "loss": 2.506, + "step": 10987 + }, + { + "epoch": 0.8867726575740457, + "grad_norm": 0.6623306274414062, + "learning_rate": 8.526156364641384e-05, + "loss": 2.4824, + "step": 10988 + }, + { + "epoch": 0.8868533613106286, + "grad_norm": 0.6863973140716553, + "learning_rate": 8.524594933645468e-05, + "loss": 2.536, + "step": 10989 + }, + { + "epoch": 0.8869340650472117, + "grad_norm": 0.6805100440979004, + "learning_rate": 8.523033539420501e-05, + "loss": 2.4954, + "step": 10990 + }, + { + "epoch": 0.8870147687837947, + "grad_norm": 0.6672216653823853, + "learning_rate": 8.521472182005399e-05, + "loss": 2.4893, + "step": 10991 + }, + { + "epoch": 0.8870954725203777, + "grad_norm": 0.7310158610343933, + "learning_rate": 8.519910861439079e-05, + "loss": 2.5317, + "step": 10992 + }, + { + "epoch": 0.8871761762569607, + "grad_norm": 0.6820743083953857, + "learning_rate": 8.518349577760445e-05, + "loss": 2.4482, + "step": 10993 + }, + { + "epoch": 0.8872568799935437, + "grad_norm": 0.6660269498825073, + "learning_rate": 8.516788331008411e-05, + "loss": 2.5353, + "step": 10994 + }, + { + "epoch": 0.8873375837301267, + "grad_norm": 0.676243007183075, + "learning_rate": 8.51522712122189e-05, + "loss": 2.531, + "step": 10995 + }, + { + "epoch": 0.8874182874667097, + "grad_norm": 0.6677152514457703, + "learning_rate": 8.513665948439796e-05, + "loss": 2.4732, + "step": 10996 + }, + { + "epoch": 0.8874989912032927, + "grad_norm": 0.7341045141220093, + "learning_rate": 8.512104812701027e-05, + "loss": 2.5668, + "step": 10997 + }, + { + "epoch": 0.8875796949398758, + "grad_norm": 0.6475326418876648, + "learning_rate": 8.510543714044496e-05, + "loss": 2.5026, + "step": 10998 + }, + { + "epoch": 0.8876603986764587, + "grad_norm": 0.7335529923439026, + "learning_rate": 8.50898265250911e-05, + "loss": 2.4946, + "step": 10999 + }, + { + "epoch": 0.8877411024130417, + "grad_norm": 0.760108232498169, + "learning_rate": 8.507421628133772e-05, + "loss": 2.5697, + "step": 11000 + }, + { + "epoch": 0.8877411024130417, + "eval_loss": 2.450413465499878, + "eval_runtime": 975.281, + "eval_samples_per_second": 2.686, + "eval_steps_per_second": 0.448, + "step": 11000 + }, + { + "epoch": 0.8878218061496247, + "grad_norm": 0.6420160531997681, + "learning_rate": 8.505860640957391e-05, + "loss": 2.5842, + "step": 11001 + }, + { + "epoch": 0.8879025098862078, + "grad_norm": 0.6625204086303711, + "learning_rate": 8.50429969101887e-05, + "loss": 2.4771, + "step": 11002 + }, + { + "epoch": 0.8879832136227908, + "grad_norm": 0.7430149912834167, + "learning_rate": 8.502738778357107e-05, + "loss": 2.5509, + "step": 11003 + }, + { + "epoch": 0.8880639173593737, + "grad_norm": 0.663624107837677, + "learning_rate": 8.501177903011008e-05, + "loss": 2.504, + "step": 11004 + }, + { + "epoch": 0.8881446210959567, + "grad_norm": 0.6638087630271912, + "learning_rate": 8.499617065019476e-05, + "loss": 2.492, + "step": 11005 + }, + { + "epoch": 0.8882253248325398, + "grad_norm": 0.7321780323982239, + "learning_rate": 8.498056264421406e-05, + "loss": 2.5808, + "step": 11006 + }, + { + "epoch": 0.8883060285691228, + "grad_norm": 0.7108619809150696, + "learning_rate": 8.4964955012557e-05, + "loss": 2.6185, + "step": 11007 + }, + { + "epoch": 0.8883867323057058, + "grad_norm": 0.6745856404304504, + "learning_rate": 8.494934775561258e-05, + "loss": 2.576, + "step": 11008 + }, + { + "epoch": 0.8884674360422887, + "grad_norm": 0.8002225756645203, + "learning_rate": 8.493374087376976e-05, + "loss": 2.5598, + "step": 11009 + }, + { + "epoch": 0.8885481397788718, + "grad_norm": 0.6848840713500977, + "learning_rate": 8.491813436741746e-05, + "loss": 2.5218, + "step": 11010 + }, + { + "epoch": 0.8886288435154548, + "grad_norm": 0.6464105248451233, + "learning_rate": 8.490252823694471e-05, + "loss": 2.5503, + "step": 11011 + }, + { + "epoch": 0.8887095472520378, + "grad_norm": 0.7165790796279907, + "learning_rate": 8.488692248274045e-05, + "loss": 2.5104, + "step": 11012 + }, + { + "epoch": 0.8887902509886207, + "grad_norm": 0.6832898259162903, + "learning_rate": 8.487131710519355e-05, + "loss": 2.5379, + "step": 11013 + }, + { + "epoch": 0.8888709547252038, + "grad_norm": 0.6992432475090027, + "learning_rate": 8.485571210469296e-05, + "loss": 2.5388, + "step": 11014 + }, + { + "epoch": 0.8889516584617868, + "grad_norm": 0.6410119533538818, + "learning_rate": 8.484010748162765e-05, + "loss": 2.5237, + "step": 11015 + }, + { + "epoch": 0.8890323621983698, + "grad_norm": 0.716248095035553, + "learning_rate": 8.482450323638647e-05, + "loss": 2.4977, + "step": 11016 + }, + { + "epoch": 0.8891130659349528, + "grad_norm": 0.6620567440986633, + "learning_rate": 8.480889936935833e-05, + "loss": 2.5088, + "step": 11017 + }, + { + "epoch": 0.8891937696715357, + "grad_norm": 0.7311015129089355, + "learning_rate": 8.479329588093217e-05, + "loss": 2.5547, + "step": 11018 + }, + { + "epoch": 0.8892744734081188, + "grad_norm": 0.757203996181488, + "learning_rate": 8.477769277149676e-05, + "loss": 2.5681, + "step": 11019 + }, + { + "epoch": 0.8893551771447018, + "grad_norm": 0.6941282153129578, + "learning_rate": 8.476209004144107e-05, + "loss": 2.5078, + "step": 11020 + }, + { + "epoch": 0.8894358808812848, + "grad_norm": 0.6381667256355286, + "learning_rate": 8.474648769115396e-05, + "loss": 2.5371, + "step": 11021 + }, + { + "epoch": 0.8895165846178678, + "grad_norm": 0.7978621125221252, + "learning_rate": 8.473088572102422e-05, + "loss": 2.5384, + "step": 11022 + }, + { + "epoch": 0.8895972883544508, + "grad_norm": 0.7229189872741699, + "learning_rate": 8.471528413144072e-05, + "loss": 2.5469, + "step": 11023 + }, + { + "epoch": 0.8896779920910338, + "grad_norm": 0.705545961856842, + "learning_rate": 8.469968292279231e-05, + "loss": 2.5281, + "step": 11024 + }, + { + "epoch": 0.8897586958276168, + "grad_norm": 0.7259972095489502, + "learning_rate": 8.468408209546777e-05, + "loss": 2.5485, + "step": 11025 + }, + { + "epoch": 0.8898393995641998, + "grad_norm": 0.6859608888626099, + "learning_rate": 8.466848164985594e-05, + "loss": 2.5548, + "step": 11026 + }, + { + "epoch": 0.8899201033007829, + "grad_norm": 0.7036644816398621, + "learning_rate": 8.465288158634565e-05, + "loss": 2.5159, + "step": 11027 + }, + { + "epoch": 0.8900008070373658, + "grad_norm": 0.6899380087852478, + "learning_rate": 8.463728190532569e-05, + "loss": 2.5037, + "step": 11028 + }, + { + "epoch": 0.8900815107739488, + "grad_norm": 0.7428410649299622, + "learning_rate": 8.462168260718477e-05, + "loss": 2.5074, + "step": 11029 + }, + { + "epoch": 0.8901622145105318, + "grad_norm": 0.6724158525466919, + "learning_rate": 8.460608369231173e-05, + "loss": 2.5544, + "step": 11030 + }, + { + "epoch": 0.8902429182471149, + "grad_norm": 0.6516450643539429, + "learning_rate": 8.459048516109535e-05, + "loss": 2.5152, + "step": 11031 + }, + { + "epoch": 0.8903236219836979, + "grad_norm": 0.7013405561447144, + "learning_rate": 8.457488701392434e-05, + "loss": 2.5116, + "step": 11032 + }, + { + "epoch": 0.8904043257202808, + "grad_norm": 0.7207479476928711, + "learning_rate": 8.455928925118747e-05, + "loss": 2.6041, + "step": 11033 + }, + { + "epoch": 0.8904850294568638, + "grad_norm": 0.69600510597229, + "learning_rate": 8.454369187327348e-05, + "loss": 2.5794, + "step": 11034 + }, + { + "epoch": 0.8905657331934469, + "grad_norm": 0.6831288933753967, + "learning_rate": 8.452809488057108e-05, + "loss": 2.4682, + "step": 11035 + }, + { + "epoch": 0.8906464369300299, + "grad_norm": 0.6978991627693176, + "learning_rate": 8.451249827346901e-05, + "loss": 2.4862, + "step": 11036 + }, + { + "epoch": 0.8907271406666128, + "grad_norm": 0.6772337555885315, + "learning_rate": 8.4496902052356e-05, + "loss": 2.5357, + "step": 11037 + }, + { + "epoch": 0.8908078444031958, + "grad_norm": 0.6735778450965881, + "learning_rate": 8.448130621762067e-05, + "loss": 2.5115, + "step": 11038 + }, + { + "epoch": 0.8908885481397789, + "grad_norm": 0.6695345044136047, + "learning_rate": 8.446571076965177e-05, + "loss": 2.5083, + "step": 11039 + }, + { + "epoch": 0.8909692518763619, + "grad_norm": 0.685343325138092, + "learning_rate": 8.445011570883796e-05, + "loss": 2.5221, + "step": 11040 + }, + { + "epoch": 0.8910499556129449, + "grad_norm": 0.7030319571495056, + "learning_rate": 8.443452103556792e-05, + "loss": 2.5708, + "step": 11041 + }, + { + "epoch": 0.8911306593495278, + "grad_norm": 0.6910343766212463, + "learning_rate": 8.441892675023029e-05, + "loss": 2.5373, + "step": 11042 + }, + { + "epoch": 0.8912113630861109, + "grad_norm": 0.7207868099212646, + "learning_rate": 8.440333285321374e-05, + "loss": 2.5862, + "step": 11043 + }, + { + "epoch": 0.8912920668226939, + "grad_norm": 0.6780788898468018, + "learning_rate": 8.438773934490692e-05, + "loss": 2.562, + "step": 11044 + }, + { + "epoch": 0.8913727705592769, + "grad_norm": 0.7010074257850647, + "learning_rate": 8.437214622569842e-05, + "loss": 2.4556, + "step": 11045 + }, + { + "epoch": 0.8914534742958599, + "grad_norm": 0.6763667464256287, + "learning_rate": 8.435655349597689e-05, + "loss": 2.5402, + "step": 11046 + }, + { + "epoch": 0.891534178032443, + "grad_norm": 0.6870944499969482, + "learning_rate": 8.4340961156131e-05, + "loss": 2.5307, + "step": 11047 + }, + { + "epoch": 0.8916148817690259, + "grad_norm": 0.7835623025894165, + "learning_rate": 8.432536920654923e-05, + "loss": 2.4974, + "step": 11048 + }, + { + "epoch": 0.8916955855056089, + "grad_norm": 0.7551318407058716, + "learning_rate": 8.430977764762024e-05, + "loss": 2.5206, + "step": 11049 + }, + { + "epoch": 0.8917762892421919, + "grad_norm": 0.6486842632293701, + "learning_rate": 8.429418647973265e-05, + "loss": 2.4909, + "step": 11050 + }, + { + "epoch": 0.891856992978775, + "grad_norm": 0.6894064545631409, + "learning_rate": 8.427859570327494e-05, + "loss": 2.5846, + "step": 11051 + }, + { + "epoch": 0.8919376967153579, + "grad_norm": 0.7597395181655884, + "learning_rate": 8.426300531863571e-05, + "loss": 2.5259, + "step": 11052 + }, + { + "epoch": 0.8920184004519409, + "grad_norm": 0.6784652471542358, + "learning_rate": 8.42474153262036e-05, + "loss": 2.5048, + "step": 11053 + }, + { + "epoch": 0.8920991041885239, + "grad_norm": 0.7703847885131836, + "learning_rate": 8.4231825726367e-05, + "loss": 2.4962, + "step": 11054 + }, + { + "epoch": 0.892179807925107, + "grad_norm": 0.6646561026573181, + "learning_rate": 8.421623651951454e-05, + "loss": 2.491, + "step": 11055 + }, + { + "epoch": 0.89226051166169, + "grad_norm": 0.6901054978370667, + "learning_rate": 8.420064770603475e-05, + "loss": 2.515, + "step": 11056 + }, + { + "epoch": 0.8923412153982729, + "grad_norm": 0.6789328455924988, + "learning_rate": 8.41850592863161e-05, + "loss": 2.5481, + "step": 11057 + }, + { + "epoch": 0.8924219191348559, + "grad_norm": 0.6211017370223999, + "learning_rate": 8.41694712607471e-05, + "loss": 2.51, + "step": 11058 + }, + { + "epoch": 0.892502622871439, + "grad_norm": 0.6482260823249817, + "learning_rate": 8.415388362971626e-05, + "loss": 2.5418, + "step": 11059 + }, + { + "epoch": 0.892583326608022, + "grad_norm": 0.7627651691436768, + "learning_rate": 8.413829639361209e-05, + "loss": 2.5033, + "step": 11060 + }, + { + "epoch": 0.892664030344605, + "grad_norm": 0.6560852527618408, + "learning_rate": 8.412270955282302e-05, + "loss": 2.5442, + "step": 11061 + }, + { + "epoch": 0.8927447340811879, + "grad_norm": 0.7479087114334106, + "learning_rate": 8.410712310773752e-05, + "loss": 2.5189, + "step": 11062 + }, + { + "epoch": 0.892825437817771, + "grad_norm": 0.6970879435539246, + "learning_rate": 8.409153705874411e-05, + "loss": 2.5418, + "step": 11063 + }, + { + "epoch": 0.892906141554354, + "grad_norm": 0.6514548659324646, + "learning_rate": 8.407595140623113e-05, + "loss": 2.5277, + "step": 11064 + }, + { + "epoch": 0.892986845290937, + "grad_norm": 0.6745554804801941, + "learning_rate": 8.406036615058707e-05, + "loss": 2.5085, + "step": 11065 + }, + { + "epoch": 0.89306754902752, + "grad_norm": 0.7510363459587097, + "learning_rate": 8.404478129220037e-05, + "loss": 2.4941, + "step": 11066 + }, + { + "epoch": 0.8931482527641029, + "grad_norm": 0.6531470417976379, + "learning_rate": 8.402919683145941e-05, + "loss": 2.5363, + "step": 11067 + }, + { + "epoch": 0.893228956500686, + "grad_norm": 0.6861493587493896, + "learning_rate": 8.401361276875262e-05, + "loss": 2.6369, + "step": 11068 + }, + { + "epoch": 0.893309660237269, + "grad_norm": 0.6029497981071472, + "learning_rate": 8.39980291044684e-05, + "loss": 2.4953, + "step": 11069 + }, + { + "epoch": 0.893390363973852, + "grad_norm": 0.6831715106964111, + "learning_rate": 8.39824458389951e-05, + "loss": 2.5074, + "step": 11070 + }, + { + "epoch": 0.8934710677104349, + "grad_norm": 0.7076299786567688, + "learning_rate": 8.396686297272112e-05, + "loss": 2.5934, + "step": 11071 + }, + { + "epoch": 0.893551771447018, + "grad_norm": 0.6941438913345337, + "learning_rate": 8.395128050603487e-05, + "loss": 2.5338, + "step": 11072 + }, + { + "epoch": 0.893632475183601, + "grad_norm": 0.6867249011993408, + "learning_rate": 8.393569843932463e-05, + "loss": 2.5311, + "step": 11073 + }, + { + "epoch": 0.893713178920184, + "grad_norm": 0.623991847038269, + "learning_rate": 8.392011677297877e-05, + "loss": 2.5133, + "step": 11074 + }, + { + "epoch": 0.893793882656767, + "grad_norm": 0.6808422803878784, + "learning_rate": 8.390453550738564e-05, + "loss": 2.5398, + "step": 11075 + }, + { + "epoch": 0.89387458639335, + "grad_norm": 0.7136701345443726, + "learning_rate": 8.388895464293357e-05, + "loss": 2.5415, + "step": 11076 + }, + { + "epoch": 0.893955290129933, + "grad_norm": 0.6814287304878235, + "learning_rate": 8.387337418001084e-05, + "loss": 2.4782, + "step": 11077 + }, + { + "epoch": 0.894035993866516, + "grad_norm": 0.8101940155029297, + "learning_rate": 8.385779411900579e-05, + "loss": 2.5292, + "step": 11078 + }, + { + "epoch": 0.894116697603099, + "grad_norm": 0.7106796503067017, + "learning_rate": 8.384221446030676e-05, + "loss": 2.5819, + "step": 11079 + }, + { + "epoch": 0.8941974013396821, + "grad_norm": 0.7840015292167664, + "learning_rate": 8.382663520430191e-05, + "loss": 2.5243, + "step": 11080 + }, + { + "epoch": 0.894278105076265, + "grad_norm": 0.7037288546562195, + "learning_rate": 8.381105635137959e-05, + "loss": 2.5606, + "step": 11081 + }, + { + "epoch": 0.894358808812848, + "grad_norm": 0.671558678150177, + "learning_rate": 8.379547790192812e-05, + "loss": 2.4923, + "step": 11082 + }, + { + "epoch": 0.894439512549431, + "grad_norm": 0.6789675951004028, + "learning_rate": 8.377989985633567e-05, + "loss": 2.5281, + "step": 11083 + }, + { + "epoch": 0.8945202162860141, + "grad_norm": 0.6777840852737427, + "learning_rate": 8.37643222149905e-05, + "loss": 2.5159, + "step": 11084 + }, + { + "epoch": 0.8946009200225971, + "grad_norm": 0.6920693516731262, + "learning_rate": 8.374874497828089e-05, + "loss": 2.4952, + "step": 11085 + }, + { + "epoch": 0.89468162375918, + "grad_norm": 0.7394022941589355, + "learning_rate": 8.373316814659502e-05, + "loss": 2.5035, + "step": 11086 + }, + { + "epoch": 0.894762327495763, + "grad_norm": 0.625960648059845, + "learning_rate": 8.37175917203211e-05, + "loss": 2.5324, + "step": 11087 + }, + { + "epoch": 0.8948430312323461, + "grad_norm": 0.6848758459091187, + "learning_rate": 8.370201569984742e-05, + "loss": 2.5312, + "step": 11088 + }, + { + "epoch": 0.8949237349689291, + "grad_norm": 0.7207037210464478, + "learning_rate": 8.368644008556205e-05, + "loss": 2.5807, + "step": 11089 + }, + { + "epoch": 0.895004438705512, + "grad_norm": 0.7582261562347412, + "learning_rate": 8.367086487785326e-05, + "loss": 2.532, + "step": 11090 + }, + { + "epoch": 0.895085142442095, + "grad_norm": 0.6916806101799011, + "learning_rate": 8.36552900771092e-05, + "loss": 2.4772, + "step": 11091 + }, + { + "epoch": 0.8951658461786781, + "grad_norm": 0.6457386016845703, + "learning_rate": 8.363971568371805e-05, + "loss": 2.4952, + "step": 11092 + }, + { + "epoch": 0.8952465499152611, + "grad_norm": 0.7006754279136658, + "learning_rate": 8.362414169806792e-05, + "loss": 2.5818, + "step": 11093 + }, + { + "epoch": 0.8953272536518441, + "grad_norm": 0.6939932703971863, + "learning_rate": 8.3608568120547e-05, + "loss": 2.5411, + "step": 11094 + }, + { + "epoch": 0.895407957388427, + "grad_norm": 0.6314546465873718, + "learning_rate": 8.359299495154343e-05, + "loss": 2.5408, + "step": 11095 + }, + { + "epoch": 0.8954886611250101, + "grad_norm": 0.7202826738357544, + "learning_rate": 8.357742219144529e-05, + "loss": 2.4925, + "step": 11096 + }, + { + "epoch": 0.8955693648615931, + "grad_norm": 0.6475295424461365, + "learning_rate": 8.356184984064071e-05, + "loss": 2.5023, + "step": 11097 + }, + { + "epoch": 0.8956500685981761, + "grad_norm": 0.6161238551139832, + "learning_rate": 8.354627789951785e-05, + "loss": 2.5053, + "step": 11098 + }, + { + "epoch": 0.8957307723347591, + "grad_norm": 0.6919825077056885, + "learning_rate": 8.353070636846472e-05, + "loss": 2.5387, + "step": 11099 + }, + { + "epoch": 0.8958114760713421, + "grad_norm": 0.6374878883361816, + "learning_rate": 8.351513524786944e-05, + "loss": 2.5526, + "step": 11100 + }, + { + "epoch": 0.8958921798079251, + "grad_norm": 0.7041093707084656, + "learning_rate": 8.349956453812009e-05, + "loss": 2.5282, + "step": 11101 + }, + { + "epoch": 0.8959728835445081, + "grad_norm": 0.7252324819564819, + "learning_rate": 8.348399423960471e-05, + "loss": 2.5723, + "step": 11102 + }, + { + "epoch": 0.8960535872810911, + "grad_norm": 0.681682825088501, + "learning_rate": 8.346842435271137e-05, + "loss": 2.5284, + "step": 11103 + }, + { + "epoch": 0.8961342910176742, + "grad_norm": 0.7293850183486938, + "learning_rate": 8.34528548778281e-05, + "loss": 2.5014, + "step": 11104 + }, + { + "epoch": 0.8962149947542571, + "grad_norm": 0.7057846188545227, + "learning_rate": 8.343728581534299e-05, + "loss": 2.5502, + "step": 11105 + }, + { + "epoch": 0.8962956984908401, + "grad_norm": 0.6740830540657043, + "learning_rate": 8.342171716564398e-05, + "loss": 2.5205, + "step": 11106 + }, + { + "epoch": 0.8963764022274231, + "grad_norm": 0.6917470097541809, + "learning_rate": 8.340614892911907e-05, + "loss": 2.5216, + "step": 11107 + }, + { + "epoch": 0.8964571059640062, + "grad_norm": 0.7495635151863098, + "learning_rate": 8.339058110615638e-05, + "loss": 2.5509, + "step": 11108 + }, + { + "epoch": 0.8965378097005892, + "grad_norm": 0.6687765717506409, + "learning_rate": 8.33750136971438e-05, + "loss": 2.5286, + "step": 11109 + }, + { + "epoch": 0.8966185134371721, + "grad_norm": 0.6901381015777588, + "learning_rate": 8.335944670246931e-05, + "loss": 2.5545, + "step": 11110 + }, + { + "epoch": 0.8966992171737551, + "grad_norm": 0.6645506024360657, + "learning_rate": 8.334388012252094e-05, + "loss": 2.4883, + "step": 11111 + }, + { + "epoch": 0.8967799209103382, + "grad_norm": 0.6427997350692749, + "learning_rate": 8.332831395768662e-05, + "loss": 2.5103, + "step": 11112 + }, + { + "epoch": 0.8968606246469212, + "grad_norm": 0.7224035263061523, + "learning_rate": 8.331274820835425e-05, + "loss": 2.5086, + "step": 11113 + }, + { + "epoch": 0.8969413283835042, + "grad_norm": 0.6918233036994934, + "learning_rate": 8.329718287491188e-05, + "loss": 2.5222, + "step": 11114 + }, + { + "epoch": 0.8970220321200871, + "grad_norm": 0.735583484172821, + "learning_rate": 8.328161795774734e-05, + "loss": 2.5277, + "step": 11115 + }, + { + "epoch": 0.8971027358566702, + "grad_norm": 0.6624864339828491, + "learning_rate": 8.326605345724857e-05, + "loss": 2.532, + "step": 11116 + }, + { + "epoch": 0.8971834395932532, + "grad_norm": 0.6227770447731018, + "learning_rate": 8.325048937380352e-05, + "loss": 2.5386, + "step": 11117 + }, + { + "epoch": 0.8972641433298362, + "grad_norm": 0.6483022570610046, + "learning_rate": 8.323492570780004e-05, + "loss": 2.4958, + "step": 11118 + }, + { + "epoch": 0.8973448470664191, + "grad_norm": 0.7072618007659912, + "learning_rate": 8.321936245962602e-05, + "loss": 2.4931, + "step": 11119 + }, + { + "epoch": 0.8974255508030021, + "grad_norm": 0.6848764419555664, + "learning_rate": 8.320379962966937e-05, + "loss": 2.4549, + "step": 11120 + }, + { + "epoch": 0.8975062545395852, + "grad_norm": 0.6819620132446289, + "learning_rate": 8.318823721831795e-05, + "loss": 2.5156, + "step": 11121 + }, + { + "epoch": 0.8975869582761682, + "grad_norm": 0.6834476590156555, + "learning_rate": 8.31726752259596e-05, + "loss": 2.507, + "step": 11122 + }, + { + "epoch": 0.8976676620127512, + "grad_norm": 0.6785772442817688, + "learning_rate": 8.315711365298214e-05, + "loss": 2.5086, + "step": 11123 + }, + { + "epoch": 0.8977483657493341, + "grad_norm": 0.6303566098213196, + "learning_rate": 8.314155249977351e-05, + "loss": 2.5087, + "step": 11124 + }, + { + "epoch": 0.8978290694859172, + "grad_norm": 0.6544361710548401, + "learning_rate": 8.31259917667214e-05, + "loss": 2.505, + "step": 11125 + }, + { + "epoch": 0.8979097732225002, + "grad_norm": 0.8135818243026733, + "learning_rate": 8.311043145421369e-05, + "loss": 2.5139, + "step": 11126 + }, + { + "epoch": 0.8979904769590832, + "grad_norm": 0.6744341254234314, + "learning_rate": 8.309487156263818e-05, + "loss": 2.4797, + "step": 11127 + }, + { + "epoch": 0.8980711806956662, + "grad_norm": 0.6138790845870972, + "learning_rate": 8.307931209238267e-05, + "loss": 2.5334, + "step": 11128 + }, + { + "epoch": 0.8981518844322492, + "grad_norm": 0.702434241771698, + "learning_rate": 8.306375304383492e-05, + "loss": 2.5343, + "step": 11129 + }, + { + "epoch": 0.8982325881688322, + "grad_norm": 0.6787155270576477, + "learning_rate": 8.304819441738275e-05, + "loss": 2.507, + "step": 11130 + }, + { + "epoch": 0.8983132919054152, + "grad_norm": 0.6963719129562378, + "learning_rate": 8.303263621341386e-05, + "loss": 2.5238, + "step": 11131 + }, + { + "epoch": 0.8983939956419982, + "grad_norm": 0.6623271107673645, + "learning_rate": 8.3017078432316e-05, + "loss": 2.5206, + "step": 11132 + }, + { + "epoch": 0.8984746993785813, + "grad_norm": 0.777222752571106, + "learning_rate": 8.300152107447701e-05, + "loss": 2.5004, + "step": 11133 + }, + { + "epoch": 0.8985554031151642, + "grad_norm": 0.6788455247879028, + "learning_rate": 8.29859641402845e-05, + "loss": 2.5735, + "step": 11134 + }, + { + "epoch": 0.8986361068517472, + "grad_norm": 0.6595063209533691, + "learning_rate": 8.297040763012624e-05, + "loss": 2.4988, + "step": 11135 + }, + { + "epoch": 0.8987168105883302, + "grad_norm": 0.7105697989463806, + "learning_rate": 8.295485154438994e-05, + "loss": 2.5531, + "step": 11136 + }, + { + "epoch": 0.8987975143249133, + "grad_norm": 0.6884949803352356, + "learning_rate": 8.29392958834633e-05, + "loss": 2.5158, + "step": 11137 + }, + { + "epoch": 0.8988782180614963, + "grad_norm": 0.7178345322608948, + "learning_rate": 8.2923740647734e-05, + "loss": 2.5836, + "step": 11138 + }, + { + "epoch": 0.8989589217980792, + "grad_norm": 0.7000541687011719, + "learning_rate": 8.290818583758973e-05, + "loss": 2.5345, + "step": 11139 + }, + { + "epoch": 0.8990396255346622, + "grad_norm": 0.6808128952980042, + "learning_rate": 8.289263145341816e-05, + "loss": 2.5227, + "step": 11140 + }, + { + "epoch": 0.8991203292712453, + "grad_norm": 0.7047473788261414, + "learning_rate": 8.287707749560691e-05, + "loss": 2.477, + "step": 11141 + }, + { + "epoch": 0.8992010330078283, + "grad_norm": 0.6654812693595886, + "learning_rate": 8.286152396454365e-05, + "loss": 2.4575, + "step": 11142 + }, + { + "epoch": 0.8992817367444113, + "grad_norm": 0.6690360307693481, + "learning_rate": 8.284597086061603e-05, + "loss": 2.4755, + "step": 11143 + }, + { + "epoch": 0.8993624404809942, + "grad_norm": 0.7270147204399109, + "learning_rate": 8.283041818421164e-05, + "loss": 2.5893, + "step": 11144 + }, + { + "epoch": 0.8994431442175773, + "grad_norm": 0.5977498888969421, + "learning_rate": 8.28148659357181e-05, + "loss": 2.5108, + "step": 11145 + }, + { + "epoch": 0.8995238479541603, + "grad_norm": 0.694593071937561, + "learning_rate": 8.279931411552307e-05, + "loss": 2.5036, + "step": 11146 + }, + { + "epoch": 0.8996045516907433, + "grad_norm": 0.7395440936088562, + "learning_rate": 8.278376272401404e-05, + "loss": 2.5244, + "step": 11147 + }, + { + "epoch": 0.8996852554273262, + "grad_norm": 0.6483517289161682, + "learning_rate": 8.276821176157867e-05, + "loss": 2.5619, + "step": 11148 + }, + { + "epoch": 0.8997659591639093, + "grad_norm": 0.6996768116950989, + "learning_rate": 8.275266122860454e-05, + "loss": 2.5275, + "step": 11149 + }, + { + "epoch": 0.8998466629004923, + "grad_norm": 0.661122739315033, + "learning_rate": 8.273711112547914e-05, + "loss": 2.5053, + "step": 11150 + }, + { + "epoch": 0.8999273666370753, + "grad_norm": 0.6919111609458923, + "learning_rate": 8.272156145259006e-05, + "loss": 2.578, + "step": 11151 + }, + { + "epoch": 0.9000080703736583, + "grad_norm": 0.6680958867073059, + "learning_rate": 8.270601221032482e-05, + "loss": 2.4942, + "step": 11152 + }, + { + "epoch": 0.9000887741102414, + "grad_norm": 0.6782989501953125, + "learning_rate": 8.269046339907101e-05, + "loss": 2.5461, + "step": 11153 + }, + { + "epoch": 0.9001694778468243, + "grad_norm": 0.743468165397644, + "learning_rate": 8.267491501921605e-05, + "loss": 2.629, + "step": 11154 + }, + { + "epoch": 0.9002501815834073, + "grad_norm": 0.709562361240387, + "learning_rate": 8.265936707114751e-05, + "loss": 2.566, + "step": 11155 + }, + { + "epoch": 0.9003308853199903, + "grad_norm": 0.7075676918029785, + "learning_rate": 8.264381955525291e-05, + "loss": 2.5409, + "step": 11156 + }, + { + "epoch": 0.9004115890565734, + "grad_norm": 0.7021335959434509, + "learning_rate": 8.262827247191963e-05, + "loss": 2.5606, + "step": 11157 + }, + { + "epoch": 0.9004922927931563, + "grad_norm": 0.6507331132888794, + "learning_rate": 8.261272582153524e-05, + "loss": 2.5557, + "step": 11158 + }, + { + "epoch": 0.9005729965297393, + "grad_norm": 0.7182760238647461, + "learning_rate": 8.25971796044872e-05, + "loss": 2.5567, + "step": 11159 + }, + { + "epoch": 0.9006537002663223, + "grad_norm": 0.6632338762283325, + "learning_rate": 8.258163382116291e-05, + "loss": 2.5081, + "step": 11160 + }, + { + "epoch": 0.9007344040029054, + "grad_norm": 0.6889928579330444, + "learning_rate": 8.256608847194983e-05, + "loss": 2.5034, + "step": 11161 + }, + { + "epoch": 0.9008151077394884, + "grad_norm": 0.6374824047088623, + "learning_rate": 8.255054355723542e-05, + "loss": 2.4826, + "step": 11162 + }, + { + "epoch": 0.9008958114760713, + "grad_norm": 0.7100771069526672, + "learning_rate": 8.253499907740706e-05, + "loss": 2.4666, + "step": 11163 + }, + { + "epoch": 0.9009765152126543, + "grad_norm": 0.8141123652458191, + "learning_rate": 8.251945503285218e-05, + "loss": 2.5339, + "step": 11164 + }, + { + "epoch": 0.9010572189492374, + "grad_norm": 0.6621670722961426, + "learning_rate": 8.250391142395822e-05, + "loss": 2.4805, + "step": 11165 + }, + { + "epoch": 0.9011379226858204, + "grad_norm": 0.6624772548675537, + "learning_rate": 8.248836825111245e-05, + "loss": 2.5148, + "step": 11166 + }, + { + "epoch": 0.9012186264224034, + "grad_norm": 0.6783565282821655, + "learning_rate": 8.247282551470235e-05, + "loss": 2.4481, + "step": 11167 + }, + { + "epoch": 0.9012993301589863, + "grad_norm": 0.700089156627655, + "learning_rate": 8.245728321511525e-05, + "loss": 2.5649, + "step": 11168 + }, + { + "epoch": 0.9013800338955693, + "grad_norm": 0.6765339970588684, + "learning_rate": 8.244174135273852e-05, + "loss": 2.5221, + "step": 11169 + }, + { + "epoch": 0.9014607376321524, + "grad_norm": 0.6896056532859802, + "learning_rate": 8.242619992795948e-05, + "loss": 2.4742, + "step": 11170 + }, + { + "epoch": 0.9015414413687354, + "grad_norm": 0.7134374976158142, + "learning_rate": 8.241065894116547e-05, + "loss": 2.5231, + "step": 11171 + }, + { + "epoch": 0.9016221451053184, + "grad_norm": 0.6939442753791809, + "learning_rate": 8.239511839274385e-05, + "loss": 2.5159, + "step": 11172 + }, + { + "epoch": 0.9017028488419013, + "grad_norm": 0.6780345439910889, + "learning_rate": 8.237957828308187e-05, + "loss": 2.5474, + "step": 11173 + }, + { + "epoch": 0.9017835525784844, + "grad_norm": 0.6532382965087891, + "learning_rate": 8.236403861256687e-05, + "loss": 2.4982, + "step": 11174 + }, + { + "epoch": 0.9018642563150674, + "grad_norm": 0.6918137073516846, + "learning_rate": 8.234849938158615e-05, + "loss": 2.4657, + "step": 11175 + }, + { + "epoch": 0.9019449600516504, + "grad_norm": 0.6838762164115906, + "learning_rate": 8.233296059052695e-05, + "loss": 2.5405, + "step": 11176 + }, + { + "epoch": 0.9020256637882333, + "grad_norm": 0.7560290098190308, + "learning_rate": 8.231742223977653e-05, + "loss": 2.5379, + "step": 11177 + }, + { + "epoch": 0.9021063675248164, + "grad_norm": 0.6673319339752197, + "learning_rate": 8.230188432972221e-05, + "loss": 2.4669, + "step": 11178 + }, + { + "epoch": 0.9021870712613994, + "grad_norm": 0.7486294507980347, + "learning_rate": 8.228634686075116e-05, + "loss": 2.526, + "step": 11179 + }, + { + "epoch": 0.9022677749979824, + "grad_norm": 0.7012811303138733, + "learning_rate": 8.227080983325067e-05, + "loss": 2.5544, + "step": 11180 + }, + { + "epoch": 0.9023484787345654, + "grad_norm": 0.6807447075843811, + "learning_rate": 8.225527324760796e-05, + "loss": 2.5139, + "step": 11181 + }, + { + "epoch": 0.9024291824711484, + "grad_norm": 0.7594932317733765, + "learning_rate": 8.223973710421018e-05, + "loss": 2.539, + "step": 11182 + }, + { + "epoch": 0.9025098862077314, + "grad_norm": 0.6764204502105713, + "learning_rate": 8.22242014034446e-05, + "loss": 2.6128, + "step": 11183 + }, + { + "epoch": 0.9025905899443144, + "grad_norm": 0.6499967575073242, + "learning_rate": 8.220866614569837e-05, + "loss": 2.5459, + "step": 11184 + }, + { + "epoch": 0.9026712936808974, + "grad_norm": 0.673076331615448, + "learning_rate": 8.219313133135876e-05, + "loss": 2.5852, + "step": 11185 + }, + { + "epoch": 0.9027519974174805, + "grad_norm": 0.784854531288147, + "learning_rate": 8.21775969608128e-05, + "loss": 2.5586, + "step": 11186 + }, + { + "epoch": 0.9028327011540634, + "grad_norm": 0.658963680267334, + "learning_rate": 8.216206303444771e-05, + "loss": 2.4376, + "step": 11187 + }, + { + "epoch": 0.9029134048906464, + "grad_norm": 0.6456249356269836, + "learning_rate": 8.214652955265067e-05, + "loss": 2.5166, + "step": 11188 + }, + { + "epoch": 0.9029941086272294, + "grad_norm": 0.6940007209777832, + "learning_rate": 8.213099651580874e-05, + "loss": 2.4992, + "step": 11189 + }, + { + "epoch": 0.9030748123638125, + "grad_norm": 0.6661425828933716, + "learning_rate": 8.211546392430911e-05, + "loss": 2.5177, + "step": 11190 + }, + { + "epoch": 0.9031555161003955, + "grad_norm": 0.647834300994873, + "learning_rate": 8.20999317785389e-05, + "loss": 2.4666, + "step": 11191 + }, + { + "epoch": 0.9032362198369784, + "grad_norm": 0.7673383355140686, + "learning_rate": 8.208440007888515e-05, + "loss": 2.4852, + "step": 11192 + }, + { + "epoch": 0.9033169235735614, + "grad_norm": 0.7033390998840332, + "learning_rate": 8.206886882573498e-05, + "loss": 2.5549, + "step": 11193 + }, + { + "epoch": 0.9033976273101445, + "grad_norm": 0.6871141195297241, + "learning_rate": 8.205333801947548e-05, + "loss": 2.4585, + "step": 11194 + }, + { + "epoch": 0.9034783310467275, + "grad_norm": 0.7201984524726868, + "learning_rate": 8.20378076604937e-05, + "loss": 2.5271, + "step": 11195 + }, + { + "epoch": 0.9035590347833105, + "grad_norm": 0.704060971736908, + "learning_rate": 8.202227774917671e-05, + "loss": 2.4915, + "step": 11196 + }, + { + "epoch": 0.9036397385198934, + "grad_norm": 0.6833879947662354, + "learning_rate": 8.200674828591156e-05, + "loss": 2.4496, + "step": 11197 + }, + { + "epoch": 0.9037204422564765, + "grad_norm": 0.6564866304397583, + "learning_rate": 8.199121927108527e-05, + "loss": 2.4818, + "step": 11198 + }, + { + "epoch": 0.9038011459930595, + "grad_norm": 0.6970151662826538, + "learning_rate": 8.197569070508486e-05, + "loss": 2.5812, + "step": 11199 + }, + { + "epoch": 0.9038818497296425, + "grad_norm": 0.7147194743156433, + "learning_rate": 8.196016258829737e-05, + "loss": 2.5543, + "step": 11200 + }, + { + "epoch": 0.9039625534662254, + "grad_norm": 0.6357648968696594, + "learning_rate": 8.194463492110981e-05, + "loss": 2.5254, + "step": 11201 + }, + { + "epoch": 0.9040432572028085, + "grad_norm": 0.7113756537437439, + "learning_rate": 8.19291077039091e-05, + "loss": 2.5179, + "step": 11202 + }, + { + "epoch": 0.9041239609393915, + "grad_norm": 0.7252987623214722, + "learning_rate": 8.191358093708228e-05, + "loss": 2.5658, + "step": 11203 + }, + { + "epoch": 0.9042046646759745, + "grad_norm": 0.7095803618431091, + "learning_rate": 8.189805462101631e-05, + "loss": 2.583, + "step": 11204 + }, + { + "epoch": 0.9042853684125575, + "grad_norm": 0.7447760105133057, + "learning_rate": 8.188252875609812e-05, + "loss": 2.5608, + "step": 11205 + }, + { + "epoch": 0.9043660721491406, + "grad_norm": 0.6578439474105835, + "learning_rate": 8.186700334271468e-05, + "loss": 2.508, + "step": 11206 + }, + { + "epoch": 0.9044467758857235, + "grad_norm": 0.6776832938194275, + "learning_rate": 8.185147838125296e-05, + "loss": 2.6188, + "step": 11207 + }, + { + "epoch": 0.9045274796223065, + "grad_norm": 0.6559253931045532, + "learning_rate": 8.183595387209976e-05, + "loss": 2.5307, + "step": 11208 + }, + { + "epoch": 0.9046081833588895, + "grad_norm": 0.7078405022621155, + "learning_rate": 8.18204298156421e-05, + "loss": 2.5545, + "step": 11209 + }, + { + "epoch": 0.9046888870954726, + "grad_norm": 0.6790273189544678, + "learning_rate": 8.18049062122669e-05, + "loss": 2.4963, + "step": 11210 + }, + { + "epoch": 0.9047695908320555, + "grad_norm": 0.6888250708580017, + "learning_rate": 8.178938306236095e-05, + "loss": 2.5108, + "step": 11211 + }, + { + "epoch": 0.9048502945686385, + "grad_norm": 0.6438474059104919, + "learning_rate": 8.177386036631119e-05, + "loss": 2.4976, + "step": 11212 + }, + { + "epoch": 0.9049309983052215, + "grad_norm": 0.6786646842956543, + "learning_rate": 8.175833812450445e-05, + "loss": 2.4584, + "step": 11213 + }, + { + "epoch": 0.9050117020418046, + "grad_norm": 0.6480324268341064, + "learning_rate": 8.174281633732764e-05, + "loss": 2.5021, + "step": 11214 + }, + { + "epoch": 0.9050924057783876, + "grad_norm": 0.7232171893119812, + "learning_rate": 8.172729500516756e-05, + "loss": 2.4742, + "step": 11215 + }, + { + "epoch": 0.9051731095149705, + "grad_norm": 0.7048845291137695, + "learning_rate": 8.171177412841105e-05, + "loss": 2.518, + "step": 11216 + }, + { + "epoch": 0.9052538132515535, + "grad_norm": 0.6363180875778198, + "learning_rate": 8.169625370744496e-05, + "loss": 2.5154, + "step": 11217 + }, + { + "epoch": 0.9053345169881366, + "grad_norm": 0.7176045179367065, + "learning_rate": 8.168073374265605e-05, + "loss": 2.5182, + "step": 11218 + }, + { + "epoch": 0.9054152207247196, + "grad_norm": 0.7011643052101135, + "learning_rate": 8.166521423443112e-05, + "loss": 2.5615, + "step": 11219 + }, + { + "epoch": 0.9054959244613026, + "grad_norm": 0.6853327751159668, + "learning_rate": 8.164969518315704e-05, + "loss": 2.5057, + "step": 11220 + }, + { + "epoch": 0.9055766281978855, + "grad_norm": 0.6972528696060181, + "learning_rate": 8.163417658922049e-05, + "loss": 2.4949, + "step": 11221 + }, + { + "epoch": 0.9056573319344685, + "grad_norm": 0.6780978441238403, + "learning_rate": 8.161865845300824e-05, + "loss": 2.5601, + "step": 11222 + }, + { + "epoch": 0.9057380356710516, + "grad_norm": 0.6454098224639893, + "learning_rate": 8.160314077490711e-05, + "loss": 2.4203, + "step": 11223 + }, + { + "epoch": 0.9058187394076346, + "grad_norm": 0.7300907969474792, + "learning_rate": 8.158762355530378e-05, + "loss": 2.4818, + "step": 11224 + }, + { + "epoch": 0.9058994431442176, + "grad_norm": 0.682475745677948, + "learning_rate": 8.1572106794585e-05, + "loss": 2.4852, + "step": 11225 + }, + { + "epoch": 0.9059801468808005, + "grad_norm": 0.6666192412376404, + "learning_rate": 8.155659049313754e-05, + "loss": 2.5642, + "step": 11226 + }, + { + "epoch": 0.9060608506173836, + "grad_norm": 0.6873177886009216, + "learning_rate": 8.154107465134801e-05, + "loss": 2.5163, + "step": 11227 + }, + { + "epoch": 0.9061415543539666, + "grad_norm": 0.6704845428466797, + "learning_rate": 8.152555926960315e-05, + "loss": 2.5481, + "step": 11228 + }, + { + "epoch": 0.9062222580905496, + "grad_norm": 0.6340618133544922, + "learning_rate": 8.151004434828963e-05, + "loss": 2.4701, + "step": 11229 + }, + { + "epoch": 0.9063029618271325, + "grad_norm": 0.7886226177215576, + "learning_rate": 8.14945298877942e-05, + "loss": 2.5322, + "step": 11230 + }, + { + "epoch": 0.9063836655637156, + "grad_norm": 0.7086018919944763, + "learning_rate": 8.14790158885034e-05, + "loss": 2.4909, + "step": 11231 + }, + { + "epoch": 0.9064643693002986, + "grad_norm": 0.6791329979896545, + "learning_rate": 8.146350235080396e-05, + "loss": 2.4438, + "step": 11232 + }, + { + "epoch": 0.9065450730368816, + "grad_norm": 0.7070720791816711, + "learning_rate": 8.14479892750825e-05, + "loss": 2.528, + "step": 11233 + }, + { + "epoch": 0.9066257767734646, + "grad_norm": 0.6551348567008972, + "learning_rate": 8.143247666172564e-05, + "loss": 2.4747, + "step": 11234 + }, + { + "epoch": 0.9067064805100477, + "grad_norm": 0.6691645979881287, + "learning_rate": 8.141696451111997e-05, + "loss": 2.5038, + "step": 11235 + }, + { + "epoch": 0.9067871842466306, + "grad_norm": 0.6814864277839661, + "learning_rate": 8.14014528236522e-05, + "loss": 2.5737, + "step": 11236 + }, + { + "epoch": 0.9068678879832136, + "grad_norm": 0.7442377209663391, + "learning_rate": 8.138594159970877e-05, + "loss": 2.5839, + "step": 11237 + }, + { + "epoch": 0.9069485917197966, + "grad_norm": 0.6861338019371033, + "learning_rate": 8.137043083967634e-05, + "loss": 2.567, + "step": 11238 + }, + { + "epoch": 0.9070292954563797, + "grad_norm": 0.7056479454040527, + "learning_rate": 8.135492054394151e-05, + "loss": 2.5297, + "step": 11239 + }, + { + "epoch": 0.9071099991929626, + "grad_norm": 0.7166962623596191, + "learning_rate": 8.133941071289076e-05, + "loss": 2.4834, + "step": 11240 + }, + { + "epoch": 0.9071907029295456, + "grad_norm": 0.6285616159439087, + "learning_rate": 8.132390134691068e-05, + "loss": 2.5066, + "step": 11241 + }, + { + "epoch": 0.9072714066661286, + "grad_norm": 0.681915283203125, + "learning_rate": 8.130839244638783e-05, + "loss": 2.5387, + "step": 11242 + }, + { + "epoch": 0.9073521104027117, + "grad_norm": 0.6876898407936096, + "learning_rate": 8.129288401170866e-05, + "loss": 2.4465, + "step": 11243 + }, + { + "epoch": 0.9074328141392947, + "grad_norm": 0.657132625579834, + "learning_rate": 8.127737604325975e-05, + "loss": 2.499, + "step": 11244 + }, + { + "epoch": 0.9075135178758776, + "grad_norm": 0.6678825616836548, + "learning_rate": 8.126186854142752e-05, + "loss": 2.4872, + "step": 11245 + }, + { + "epoch": 0.9075942216124606, + "grad_norm": 0.7296879291534424, + "learning_rate": 8.124636150659858e-05, + "loss": 2.4783, + "step": 11246 + }, + { + "epoch": 0.9076749253490437, + "grad_norm": 0.7087056040763855, + "learning_rate": 8.12308549391593e-05, + "loss": 2.507, + "step": 11247 + }, + { + "epoch": 0.9077556290856267, + "grad_norm": 0.7099738121032715, + "learning_rate": 8.121534883949616e-05, + "loss": 2.5317, + "step": 11248 + }, + { + "epoch": 0.9078363328222097, + "grad_norm": 0.6421170830726624, + "learning_rate": 8.119984320799566e-05, + "loss": 2.5291, + "step": 11249 + }, + { + "epoch": 0.9079170365587926, + "grad_norm": 0.6835018396377563, + "learning_rate": 8.11843380450442e-05, + "loss": 2.5523, + "step": 11250 + }, + { + "epoch": 0.9079977402953757, + "grad_norm": 0.6638229489326477, + "learning_rate": 8.11688333510282e-05, + "loss": 2.5128, + "step": 11251 + }, + { + "epoch": 0.9080784440319587, + "grad_norm": 0.6783459186553955, + "learning_rate": 8.115332912633415e-05, + "loss": 2.5485, + "step": 11252 + }, + { + "epoch": 0.9081591477685417, + "grad_norm": 0.65911865234375, + "learning_rate": 8.113782537134838e-05, + "loss": 2.5408, + "step": 11253 + }, + { + "epoch": 0.9082398515051247, + "grad_norm": 0.6844244003295898, + "learning_rate": 8.112232208645729e-05, + "loss": 2.6067, + "step": 11254 + }, + { + "epoch": 0.9083205552417077, + "grad_norm": 0.6896870136260986, + "learning_rate": 8.110681927204729e-05, + "loss": 2.5444, + "step": 11255 + }, + { + "epoch": 0.9084012589782907, + "grad_norm": 0.6693820953369141, + "learning_rate": 8.109131692850473e-05, + "loss": 2.5118, + "step": 11256 + }, + { + "epoch": 0.9084819627148737, + "grad_norm": 0.6401854753494263, + "learning_rate": 8.107581505621599e-05, + "loss": 2.4811, + "step": 11257 + }, + { + "epoch": 0.9085626664514567, + "grad_norm": 0.6861663460731506, + "learning_rate": 8.106031365556743e-05, + "loss": 2.4633, + "step": 11258 + }, + { + "epoch": 0.9086433701880398, + "grad_norm": 0.6631655097007751, + "learning_rate": 8.104481272694533e-05, + "loss": 2.5748, + "step": 11259 + }, + { + "epoch": 0.9087240739246227, + "grad_norm": 0.6499454975128174, + "learning_rate": 8.102931227073604e-05, + "loss": 2.5573, + "step": 11260 + }, + { + "epoch": 0.9088047776612057, + "grad_norm": 0.7214524149894714, + "learning_rate": 8.10138122873259e-05, + "loss": 2.4905, + "step": 11261 + }, + { + "epoch": 0.9088854813977887, + "grad_norm": 0.6481152176856995, + "learning_rate": 8.099831277710122e-05, + "loss": 2.5073, + "step": 11262 + }, + { + "epoch": 0.9089661851343718, + "grad_norm": 0.6666486859321594, + "learning_rate": 8.09828137404482e-05, + "loss": 2.5379, + "step": 11263 + }, + { + "epoch": 0.9090468888709548, + "grad_norm": 0.7186474800109863, + "learning_rate": 8.096731517775319e-05, + "loss": 2.5164, + "step": 11264 + }, + { + "epoch": 0.9091275926075377, + "grad_norm": 0.6838653087615967, + "learning_rate": 8.095181708940245e-05, + "loss": 2.49, + "step": 11265 + }, + { + "epoch": 0.9092082963441207, + "grad_norm": 0.7740866541862488, + "learning_rate": 8.093631947578221e-05, + "loss": 2.5487, + "step": 11266 + }, + { + "epoch": 0.9092890000807038, + "grad_norm": 0.7198607325553894, + "learning_rate": 8.092082233727871e-05, + "loss": 2.4477, + "step": 11267 + }, + { + "epoch": 0.9093697038172868, + "grad_norm": 0.6454673409461975, + "learning_rate": 8.090532567427825e-05, + "loss": 2.523, + "step": 11268 + }, + { + "epoch": 0.9094504075538697, + "grad_norm": 0.6169581413269043, + "learning_rate": 8.088982948716692e-05, + "loss": 2.4924, + "step": 11269 + }, + { + "epoch": 0.9095311112904527, + "grad_norm": 0.7034861445426941, + "learning_rate": 8.0874333776331e-05, + "loss": 2.4756, + "step": 11270 + }, + { + "epoch": 0.9096118150270357, + "grad_norm": 0.7231355309486389, + "learning_rate": 8.085883854215671e-05, + "loss": 2.4963, + "step": 11271 + }, + { + "epoch": 0.9096925187636188, + "grad_norm": 0.6597892045974731, + "learning_rate": 8.084334378503017e-05, + "loss": 2.5617, + "step": 11272 + }, + { + "epoch": 0.9097732225002018, + "grad_norm": 0.7257365584373474, + "learning_rate": 8.082784950533759e-05, + "loss": 2.5293, + "step": 11273 + }, + { + "epoch": 0.9098539262367847, + "grad_norm": 0.7305313944816589, + "learning_rate": 8.081235570346512e-05, + "loss": 2.5355, + "step": 11274 + }, + { + "epoch": 0.9099346299733677, + "grad_norm": 0.6814435720443726, + "learning_rate": 8.07968623797989e-05, + "loss": 2.4842, + "step": 11275 + }, + { + "epoch": 0.9100153337099508, + "grad_norm": 0.7342902421951294, + "learning_rate": 8.078136953472506e-05, + "loss": 2.4817, + "step": 11276 + }, + { + "epoch": 0.9100960374465338, + "grad_norm": 0.6456516981124878, + "learning_rate": 8.076587716862973e-05, + "loss": 2.5119, + "step": 11277 + }, + { + "epoch": 0.9101767411831168, + "grad_norm": 0.7268881797790527, + "learning_rate": 8.075038528189906e-05, + "loss": 2.4614, + "step": 11278 + }, + { + "epoch": 0.9102574449196997, + "grad_norm": 0.6901549696922302, + "learning_rate": 8.073489387491906e-05, + "loss": 2.5411, + "step": 11279 + }, + { + "epoch": 0.9103381486562828, + "grad_norm": 0.6850160956382751, + "learning_rate": 8.071940294807588e-05, + "loss": 2.5078, + "step": 11280 + }, + { + "epoch": 0.9104188523928658, + "grad_norm": 0.6550731658935547, + "learning_rate": 8.070391250175558e-05, + "loss": 2.5502, + "step": 11281 + }, + { + "epoch": 0.9104995561294488, + "grad_norm": 0.7524412274360657, + "learning_rate": 8.068842253634421e-05, + "loss": 2.4699, + "step": 11282 + }, + { + "epoch": 0.9105802598660317, + "grad_norm": 0.6659243702888489, + "learning_rate": 8.067293305222784e-05, + "loss": 2.557, + "step": 11283 + }, + { + "epoch": 0.9106609636026148, + "grad_norm": 0.67015540599823, + "learning_rate": 8.065744404979251e-05, + "loss": 2.5929, + "step": 11284 + }, + { + "epoch": 0.9107416673391978, + "grad_norm": 0.7139000296592712, + "learning_rate": 8.064195552942422e-05, + "loss": 2.5262, + "step": 11285 + }, + { + "epoch": 0.9108223710757808, + "grad_norm": 0.6918016672134399, + "learning_rate": 8.062646749150899e-05, + "loss": 2.5161, + "step": 11286 + }, + { + "epoch": 0.9109030748123638, + "grad_norm": 0.7395541667938232, + "learning_rate": 8.061097993643289e-05, + "loss": 2.5351, + "step": 11287 + }, + { + "epoch": 0.9109837785489469, + "grad_norm": 0.6794499158859253, + "learning_rate": 8.05954928645818e-05, + "loss": 2.4617, + "step": 11288 + }, + { + "epoch": 0.9110644822855298, + "grad_norm": 0.6906577348709106, + "learning_rate": 8.058000627634176e-05, + "loss": 2.5701, + "step": 11289 + }, + { + "epoch": 0.9111451860221128, + "grad_norm": 0.6954079866409302, + "learning_rate": 8.056452017209874e-05, + "loss": 2.5137, + "step": 11290 + }, + { + "epoch": 0.9112258897586958, + "grad_norm": 0.7381381988525391, + "learning_rate": 8.054903455223866e-05, + "loss": 2.6666, + "step": 11291 + }, + { + "epoch": 0.9113065934952789, + "grad_norm": 0.6731518507003784, + "learning_rate": 8.053354941714749e-05, + "loss": 2.5173, + "step": 11292 + }, + { + "epoch": 0.9113872972318618, + "grad_norm": 0.6976885795593262, + "learning_rate": 8.051806476721116e-05, + "loss": 2.5089, + "step": 11293 + }, + { + "epoch": 0.9114680009684448, + "grad_norm": 0.6401965618133545, + "learning_rate": 8.050258060281562e-05, + "loss": 2.5295, + "step": 11294 + }, + { + "epoch": 0.9115487047050278, + "grad_norm": 0.7409671545028687, + "learning_rate": 8.048709692434667e-05, + "loss": 2.5074, + "step": 11295 + }, + { + "epoch": 0.9116294084416109, + "grad_norm": 0.6028234958648682, + "learning_rate": 8.04716137321903e-05, + "loss": 2.5437, + "step": 11296 + }, + { + "epoch": 0.9117101121781939, + "grad_norm": 0.727643609046936, + "learning_rate": 8.04561310267324e-05, + "loss": 2.5272, + "step": 11297 + }, + { + "epoch": 0.9117908159147768, + "grad_norm": 0.6912926435470581, + "learning_rate": 8.044064880835876e-05, + "loss": 2.5166, + "step": 11298 + }, + { + "epoch": 0.9118715196513598, + "grad_norm": 0.6971367001533508, + "learning_rate": 8.042516707745528e-05, + "loss": 2.5421, + "step": 11299 + }, + { + "epoch": 0.9119522233879429, + "grad_norm": 0.6722451448440552, + "learning_rate": 8.040968583440783e-05, + "loss": 2.5088, + "step": 11300 + }, + { + "epoch": 0.9120329271245259, + "grad_norm": 0.6469144225120544, + "learning_rate": 8.03942050796022e-05, + "loss": 2.4921, + "step": 11301 + }, + { + "epoch": 0.9121136308611089, + "grad_norm": 0.6709008812904358, + "learning_rate": 8.037872481342423e-05, + "loss": 2.4553, + "step": 11302 + }, + { + "epoch": 0.9121943345976918, + "grad_norm": 0.6540920734405518, + "learning_rate": 8.036324503625977e-05, + "loss": 2.489, + "step": 11303 + }, + { + "epoch": 0.9122750383342749, + "grad_norm": 0.6589755415916443, + "learning_rate": 8.034776574849453e-05, + "loss": 2.5195, + "step": 11304 + }, + { + "epoch": 0.9123557420708579, + "grad_norm": 0.676943838596344, + "learning_rate": 8.033228695051434e-05, + "loss": 2.4877, + "step": 11305 + }, + { + "epoch": 0.9124364458074409, + "grad_norm": 0.6509177088737488, + "learning_rate": 8.031680864270498e-05, + "loss": 2.5229, + "step": 11306 + }, + { + "epoch": 0.9125171495440239, + "grad_norm": 0.7480820417404175, + "learning_rate": 8.030133082545219e-05, + "loss": 2.5016, + "step": 11307 + }, + { + "epoch": 0.9125978532806069, + "grad_norm": 0.7130550742149353, + "learning_rate": 8.028585349914174e-05, + "loss": 2.5251, + "step": 11308 + }, + { + "epoch": 0.9126785570171899, + "grad_norm": 0.6959688067436218, + "learning_rate": 8.027037666415934e-05, + "loss": 2.4776, + "step": 11309 + }, + { + "epoch": 0.9127592607537729, + "grad_norm": 0.7540854215621948, + "learning_rate": 8.025490032089076e-05, + "loss": 2.5097, + "step": 11310 + }, + { + "epoch": 0.9128399644903559, + "grad_norm": 0.6921199560165405, + "learning_rate": 8.023942446972165e-05, + "loss": 2.5354, + "step": 11311 + }, + { + "epoch": 0.912920668226939, + "grad_norm": 0.649824857711792, + "learning_rate": 8.022394911103774e-05, + "loss": 2.5398, + "step": 11312 + }, + { + "epoch": 0.9130013719635219, + "grad_norm": 0.6951068639755249, + "learning_rate": 8.020847424522474e-05, + "loss": 2.5302, + "step": 11313 + }, + { + "epoch": 0.9130820757001049, + "grad_norm": 0.6906851530075073, + "learning_rate": 8.019299987266827e-05, + "loss": 2.581, + "step": 11314 + }, + { + "epoch": 0.9131627794366879, + "grad_norm": 0.6758459210395813, + "learning_rate": 8.0177525993754e-05, + "loss": 2.5208, + "step": 11315 + }, + { + "epoch": 0.913243483173271, + "grad_norm": 0.6915175318717957, + "learning_rate": 8.016205260886766e-05, + "loss": 2.5386, + "step": 11316 + }, + { + "epoch": 0.913324186909854, + "grad_norm": 0.7083550691604614, + "learning_rate": 8.014657971839476e-05, + "loss": 2.4895, + "step": 11317 + }, + { + "epoch": 0.9134048906464369, + "grad_norm": 0.7052562832832336, + "learning_rate": 8.013110732272102e-05, + "loss": 2.4896, + "step": 11318 + }, + { + "epoch": 0.9134855943830199, + "grad_norm": 0.7811834216117859, + "learning_rate": 8.011563542223206e-05, + "loss": 2.5082, + "step": 11319 + }, + { + "epoch": 0.913566298119603, + "grad_norm": 0.6207153797149658, + "learning_rate": 8.01001640173134e-05, + "loss": 2.4967, + "step": 11320 + }, + { + "epoch": 0.913647001856186, + "grad_norm": 0.7637950778007507, + "learning_rate": 8.008469310835065e-05, + "loss": 2.4907, + "step": 11321 + }, + { + "epoch": 0.913727705592769, + "grad_norm": 0.7263950705528259, + "learning_rate": 8.006922269572947e-05, + "loss": 2.5259, + "step": 11322 + }, + { + "epoch": 0.9138084093293519, + "grad_norm": 0.6965721845626831, + "learning_rate": 8.005375277983531e-05, + "loss": 2.5648, + "step": 11323 + }, + { + "epoch": 0.9138891130659349, + "grad_norm": 0.7146127223968506, + "learning_rate": 8.003828336105377e-05, + "loss": 2.53, + "step": 11324 + }, + { + "epoch": 0.913969816802518, + "grad_norm": 0.7083697319030762, + "learning_rate": 8.00228144397704e-05, + "loss": 2.4923, + "step": 11325 + }, + { + "epoch": 0.914050520539101, + "grad_norm": 0.7259312868118286, + "learning_rate": 8.000734601637074e-05, + "loss": 2.5303, + "step": 11326 + }, + { + "epoch": 0.9141312242756839, + "grad_norm": 0.7072086930274963, + "learning_rate": 7.999187809124025e-05, + "loss": 2.4662, + "step": 11327 + }, + { + "epoch": 0.9142119280122669, + "grad_norm": 0.7216035723686218, + "learning_rate": 7.997641066476445e-05, + "loss": 2.5069, + "step": 11328 + }, + { + "epoch": 0.91429263174885, + "grad_norm": 0.6925712823867798, + "learning_rate": 7.99609437373289e-05, + "loss": 2.5107, + "step": 11329 + }, + { + "epoch": 0.914373335485433, + "grad_norm": 0.6672701835632324, + "learning_rate": 7.994547730931896e-05, + "loss": 2.5248, + "step": 11330 + }, + { + "epoch": 0.914454039222016, + "grad_norm": 0.8058515787124634, + "learning_rate": 7.993001138112016e-05, + "loss": 2.4427, + "step": 11331 + }, + { + "epoch": 0.9145347429585989, + "grad_norm": 0.6942592859268188, + "learning_rate": 7.991454595311795e-05, + "loss": 2.6163, + "step": 11332 + }, + { + "epoch": 0.914615446695182, + "grad_norm": 0.7051894068717957, + "learning_rate": 7.989908102569774e-05, + "loss": 2.5327, + "step": 11333 + }, + { + "epoch": 0.914696150431765, + "grad_norm": 0.6824771761894226, + "learning_rate": 7.988361659924496e-05, + "loss": 2.4843, + "step": 11334 + }, + { + "epoch": 0.914776854168348, + "grad_norm": 0.6756488084793091, + "learning_rate": 7.98681526741451e-05, + "loss": 2.5215, + "step": 11335 + }, + { + "epoch": 0.914857557904931, + "grad_norm": 0.6988239288330078, + "learning_rate": 7.985268925078344e-05, + "loss": 2.5153, + "step": 11336 + }, + { + "epoch": 0.914938261641514, + "grad_norm": 0.6446006298065186, + "learning_rate": 7.983722632954544e-05, + "loss": 2.5081, + "step": 11337 + }, + { + "epoch": 0.915018965378097, + "grad_norm": 0.6828100681304932, + "learning_rate": 7.982176391081649e-05, + "loss": 2.5607, + "step": 11338 + }, + { + "epoch": 0.91509966911468, + "grad_norm": 0.659721851348877, + "learning_rate": 7.980630199498193e-05, + "loss": 2.531, + "step": 11339 + }, + { + "epoch": 0.915180372851263, + "grad_norm": 0.6298564076423645, + "learning_rate": 7.979084058242709e-05, + "loss": 2.513, + "step": 11340 + }, + { + "epoch": 0.9152610765878461, + "grad_norm": 0.664299726486206, + "learning_rate": 7.977537967353735e-05, + "loss": 2.5533, + "step": 11341 + }, + { + "epoch": 0.915341780324429, + "grad_norm": 0.7035108804702759, + "learning_rate": 7.975991926869801e-05, + "loss": 2.4868, + "step": 11342 + }, + { + "epoch": 0.915422484061012, + "grad_norm": 0.7428407073020935, + "learning_rate": 7.974445936829438e-05, + "loss": 2.5694, + "step": 11343 + }, + { + "epoch": 0.915503187797595, + "grad_norm": 0.6845505237579346, + "learning_rate": 7.972899997271176e-05, + "loss": 2.5092, + "step": 11344 + }, + { + "epoch": 0.9155838915341781, + "grad_norm": 0.7135340571403503, + "learning_rate": 7.971354108233551e-05, + "loss": 2.5157, + "step": 11345 + }, + { + "epoch": 0.915664595270761, + "grad_norm": 0.7032433152198792, + "learning_rate": 7.969808269755077e-05, + "loss": 2.5292, + "step": 11346 + }, + { + "epoch": 0.915745299007344, + "grad_norm": 0.6874690651893616, + "learning_rate": 7.96826248187429e-05, + "loss": 2.5312, + "step": 11347 + }, + { + "epoch": 0.915826002743927, + "grad_norm": 0.6497030258178711, + "learning_rate": 7.966716744629718e-05, + "loss": 2.505, + "step": 11348 + }, + { + "epoch": 0.9159067064805101, + "grad_norm": 0.6618520021438599, + "learning_rate": 7.965171058059874e-05, + "loss": 2.5287, + "step": 11349 + }, + { + "epoch": 0.9159874102170931, + "grad_norm": 0.6737041473388672, + "learning_rate": 7.963625422203288e-05, + "loss": 2.5494, + "step": 11350 + }, + { + "epoch": 0.916068113953676, + "grad_norm": 0.705646276473999, + "learning_rate": 7.96207983709848e-05, + "loss": 2.5402, + "step": 11351 + }, + { + "epoch": 0.916148817690259, + "grad_norm": 0.6852068901062012, + "learning_rate": 7.96053430278397e-05, + "loss": 2.51, + "step": 11352 + }, + { + "epoch": 0.9162295214268421, + "grad_norm": 0.7166822552680969, + "learning_rate": 7.958988819298274e-05, + "loss": 2.576, + "step": 11353 + }, + { + "epoch": 0.9163102251634251, + "grad_norm": 0.6349207162857056, + "learning_rate": 7.957443386679913e-05, + "loss": 2.5219, + "step": 11354 + }, + { + "epoch": 0.9163909289000081, + "grad_norm": 0.6504647135734558, + "learning_rate": 7.955898004967406e-05, + "loss": 2.4593, + "step": 11355 + }, + { + "epoch": 0.916471632636591, + "grad_norm": 0.7313871383666992, + "learning_rate": 7.95435267419926e-05, + "loss": 2.5616, + "step": 11356 + }, + { + "epoch": 0.9165523363731741, + "grad_norm": 0.6948587894439697, + "learning_rate": 7.95280739441399e-05, + "loss": 2.4608, + "step": 11357 + }, + { + "epoch": 0.9166330401097571, + "grad_norm": 0.6130328178405762, + "learning_rate": 7.95126216565012e-05, + "loss": 2.5563, + "step": 11358 + }, + { + "epoch": 0.9167137438463401, + "grad_norm": 0.7149228453636169, + "learning_rate": 7.949716987946145e-05, + "loss": 2.5664, + "step": 11359 + }, + { + "epoch": 0.916794447582923, + "grad_norm": 0.7452285289764404, + "learning_rate": 7.948171861340584e-05, + "loss": 2.525, + "step": 11360 + }, + { + "epoch": 0.9168751513195061, + "grad_norm": 0.6840611100196838, + "learning_rate": 7.946626785871945e-05, + "loss": 2.537, + "step": 11361 + }, + { + "epoch": 0.9169558550560891, + "grad_norm": 0.7269708514213562, + "learning_rate": 7.945081761578732e-05, + "loss": 2.5227, + "step": 11362 + }, + { + "epoch": 0.9170365587926721, + "grad_norm": 0.6521697044372559, + "learning_rate": 7.943536788499452e-05, + "loss": 2.54, + "step": 11363 + }, + { + "epoch": 0.9171172625292551, + "grad_norm": 0.6516863107681274, + "learning_rate": 7.941991866672618e-05, + "loss": 2.4788, + "step": 11364 + }, + { + "epoch": 0.9171979662658382, + "grad_norm": 0.7673580050468445, + "learning_rate": 7.94044699613672e-05, + "loss": 2.4678, + "step": 11365 + }, + { + "epoch": 0.9172786700024211, + "grad_norm": 0.6666994690895081, + "learning_rate": 7.938902176930268e-05, + "loss": 2.5251, + "step": 11366 + }, + { + "epoch": 0.9173593737390041, + "grad_norm": 0.7261863946914673, + "learning_rate": 7.937357409091761e-05, + "loss": 2.4977, + "step": 11367 + }, + { + "epoch": 0.9174400774755871, + "grad_norm": 0.6920679807662964, + "learning_rate": 7.9358126926597e-05, + "loss": 2.5367, + "step": 11368 + }, + { + "epoch": 0.9175207812121702, + "grad_norm": 0.6715712547302246, + "learning_rate": 7.93426802767258e-05, + "loss": 2.4898, + "step": 11369 + }, + { + "epoch": 0.9176014849487532, + "grad_norm": 0.7014333605766296, + "learning_rate": 7.932723414168904e-05, + "loss": 2.4507, + "step": 11370 + }, + { + "epoch": 0.9176821886853361, + "grad_norm": 0.6755761504173279, + "learning_rate": 7.931178852187163e-05, + "loss": 2.5895, + "step": 11371 + }, + { + "epoch": 0.9177628924219191, + "grad_norm": 0.6846731305122375, + "learning_rate": 7.929634341765852e-05, + "loss": 2.5002, + "step": 11372 + }, + { + "epoch": 0.9178435961585021, + "grad_norm": 0.6422831416130066, + "learning_rate": 7.928089882943466e-05, + "loss": 2.5326, + "step": 11373 + }, + { + "epoch": 0.9179242998950852, + "grad_norm": 0.7256442308425903, + "learning_rate": 7.9265454757585e-05, + "loss": 2.5706, + "step": 11374 + }, + { + "epoch": 0.9180050036316681, + "grad_norm": 0.6514387130737305, + "learning_rate": 7.925001120249436e-05, + "loss": 2.5349, + "step": 11375 + }, + { + "epoch": 0.9180857073682511, + "grad_norm": 0.7596457600593567, + "learning_rate": 7.923456816454768e-05, + "loss": 2.4767, + "step": 11376 + }, + { + "epoch": 0.9181664111048341, + "grad_norm": 0.673283040523529, + "learning_rate": 7.921912564412988e-05, + "loss": 2.5156, + "step": 11377 + }, + { + "epoch": 0.9182471148414172, + "grad_norm": 0.6964103579521179, + "learning_rate": 7.920368364162575e-05, + "loss": 2.5293, + "step": 11378 + }, + { + "epoch": 0.9183278185780002, + "grad_norm": 0.6765062212944031, + "learning_rate": 7.91882421574202e-05, + "loss": 2.5757, + "step": 11379 + }, + { + "epoch": 0.9184085223145831, + "grad_norm": 0.7039035558700562, + "learning_rate": 7.917280119189811e-05, + "loss": 2.513, + "step": 11380 + }, + { + "epoch": 0.9184892260511661, + "grad_norm": 0.6523976922035217, + "learning_rate": 7.915736074544419e-05, + "loss": 2.4712, + "step": 11381 + }, + { + "epoch": 0.9185699297877492, + "grad_norm": 0.7159552574157715, + "learning_rate": 7.914192081844334e-05, + "loss": 2.4713, + "step": 11382 + }, + { + "epoch": 0.9186506335243322, + "grad_norm": 0.7071694731712341, + "learning_rate": 7.912648141128036e-05, + "loss": 2.5367, + "step": 11383 + }, + { + "epoch": 0.9187313372609152, + "grad_norm": 0.6675183773040771, + "learning_rate": 7.911104252434e-05, + "loss": 2.5372, + "step": 11384 + }, + { + "epoch": 0.9188120409974981, + "grad_norm": 0.7293995022773743, + "learning_rate": 7.909560415800707e-05, + "loss": 2.5469, + "step": 11385 + }, + { + "epoch": 0.9188927447340812, + "grad_norm": 0.6774035096168518, + "learning_rate": 7.908016631266635e-05, + "loss": 2.5655, + "step": 11386 + }, + { + "epoch": 0.9189734484706642, + "grad_norm": 0.7068144083023071, + "learning_rate": 7.906472898870256e-05, + "loss": 2.5265, + "step": 11387 + }, + { + "epoch": 0.9190541522072472, + "grad_norm": 0.6756324172019958, + "learning_rate": 7.904929218650044e-05, + "loss": 2.4966, + "step": 11388 + }, + { + "epoch": 0.9191348559438302, + "grad_norm": 0.6964625120162964, + "learning_rate": 7.903385590644473e-05, + "loss": 2.5646, + "step": 11389 + }, + { + "epoch": 0.9192155596804132, + "grad_norm": 0.6760976314544678, + "learning_rate": 7.901842014892018e-05, + "loss": 2.5159, + "step": 11390 + }, + { + "epoch": 0.9192962634169962, + "grad_norm": 0.6648714542388916, + "learning_rate": 7.900298491431139e-05, + "loss": 2.5715, + "step": 11391 + }, + { + "epoch": 0.9193769671535792, + "grad_norm": 0.7492914199829102, + "learning_rate": 7.898755020300312e-05, + "loss": 2.5226, + "step": 11392 + }, + { + "epoch": 0.9194576708901622, + "grad_norm": 0.7041164040565491, + "learning_rate": 7.897211601538004e-05, + "loss": 2.5809, + "step": 11393 + }, + { + "epoch": 0.9195383746267453, + "grad_norm": 0.6746383309364319, + "learning_rate": 7.895668235182677e-05, + "loss": 2.5369, + "step": 11394 + }, + { + "epoch": 0.9196190783633282, + "grad_norm": 0.6486156582832336, + "learning_rate": 7.894124921272798e-05, + "loss": 2.5406, + "step": 11395 + }, + { + "epoch": 0.9196997820999112, + "grad_norm": 0.6828807592391968, + "learning_rate": 7.892581659846834e-05, + "loss": 2.5241, + "step": 11396 + }, + { + "epoch": 0.9197804858364942, + "grad_norm": 0.694970428943634, + "learning_rate": 7.891038450943242e-05, + "loss": 2.4402, + "step": 11397 + }, + { + "epoch": 0.9198611895730773, + "grad_norm": 0.7187039852142334, + "learning_rate": 7.889495294600484e-05, + "loss": 2.5052, + "step": 11398 + }, + { + "epoch": 0.9199418933096603, + "grad_norm": 0.6919832825660706, + "learning_rate": 7.887952190857024e-05, + "loss": 2.5078, + "step": 11399 + }, + { + "epoch": 0.9200225970462432, + "grad_norm": 0.7129504084587097, + "learning_rate": 7.886409139751313e-05, + "loss": 2.5047, + "step": 11400 + }, + { + "epoch": 0.9201033007828262, + "grad_norm": 0.6755272746086121, + "learning_rate": 7.88486614132181e-05, + "loss": 2.4821, + "step": 11401 + }, + { + "epoch": 0.9201840045194093, + "grad_norm": 0.7253937125205994, + "learning_rate": 7.883323195606973e-05, + "loss": 2.5062, + "step": 11402 + }, + { + "epoch": 0.9202647082559923, + "grad_norm": 0.7057155966758728, + "learning_rate": 7.881780302645257e-05, + "loss": 2.5475, + "step": 11403 + }, + { + "epoch": 0.9203454119925752, + "grad_norm": 0.713869571685791, + "learning_rate": 7.880237462475111e-05, + "loss": 2.5335, + "step": 11404 + }, + { + "epoch": 0.9204261157291582, + "grad_norm": 0.769648551940918, + "learning_rate": 7.878694675134987e-05, + "loss": 2.4944, + "step": 11405 + }, + { + "epoch": 0.9205068194657413, + "grad_norm": 0.6444964408874512, + "learning_rate": 7.877151940663343e-05, + "loss": 2.5755, + "step": 11406 + }, + { + "epoch": 0.9205875232023243, + "grad_norm": 0.6811819672584534, + "learning_rate": 7.875609259098618e-05, + "loss": 2.5475, + "step": 11407 + }, + { + "epoch": 0.9206682269389073, + "grad_norm": 0.6959417462348938, + "learning_rate": 7.874066630479259e-05, + "loss": 2.5095, + "step": 11408 + }, + { + "epoch": 0.9207489306754902, + "grad_norm": 0.6721363067626953, + "learning_rate": 7.872524054843724e-05, + "loss": 2.5166, + "step": 11409 + }, + { + "epoch": 0.9208296344120733, + "grad_norm": 0.713122546672821, + "learning_rate": 7.870981532230447e-05, + "loss": 2.5084, + "step": 11410 + }, + { + "epoch": 0.9209103381486563, + "grad_norm": 0.7059469819068909, + "learning_rate": 7.869439062677876e-05, + "loss": 2.437, + "step": 11411 + }, + { + "epoch": 0.9209910418852393, + "grad_norm": 0.6808314323425293, + "learning_rate": 7.867896646224454e-05, + "loss": 2.5658, + "step": 11412 + }, + { + "epoch": 0.9210717456218223, + "grad_norm": 0.7060894966125488, + "learning_rate": 7.86635428290862e-05, + "loss": 2.515, + "step": 11413 + }, + { + "epoch": 0.9211524493584053, + "grad_norm": 0.7538465857505798, + "learning_rate": 7.864811972768813e-05, + "loss": 2.4448, + "step": 11414 + }, + { + "epoch": 0.9212331530949883, + "grad_norm": 0.6824522018432617, + "learning_rate": 7.863269715843478e-05, + "loss": 2.503, + "step": 11415 + }, + { + "epoch": 0.9213138568315713, + "grad_norm": 0.7068174481391907, + "learning_rate": 7.861727512171044e-05, + "loss": 2.5198, + "step": 11416 + }, + { + "epoch": 0.9213945605681543, + "grad_norm": 0.6742961406707764, + "learning_rate": 7.860185361789948e-05, + "loss": 2.5167, + "step": 11417 + }, + { + "epoch": 0.9214752643047374, + "grad_norm": 0.7643383741378784, + "learning_rate": 7.858643264738628e-05, + "loss": 2.5508, + "step": 11418 + }, + { + "epoch": 0.9215559680413203, + "grad_norm": 0.6737802028656006, + "learning_rate": 7.857101221055518e-05, + "loss": 2.589, + "step": 11419 + }, + { + "epoch": 0.9216366717779033, + "grad_norm": 0.668214738368988, + "learning_rate": 7.855559230779043e-05, + "loss": 2.4747, + "step": 11420 + }, + { + "epoch": 0.9217173755144863, + "grad_norm": 0.6933084726333618, + "learning_rate": 7.854017293947638e-05, + "loss": 2.5171, + "step": 11421 + }, + { + "epoch": 0.9217980792510694, + "grad_norm": 0.6320228576660156, + "learning_rate": 7.852475410599736e-05, + "loss": 2.5213, + "step": 11422 + }, + { + "epoch": 0.9218787829876524, + "grad_norm": 0.6578245759010315, + "learning_rate": 7.850933580773756e-05, + "loss": 2.5085, + "step": 11423 + }, + { + "epoch": 0.9219594867242353, + "grad_norm": 0.6741796135902405, + "learning_rate": 7.849391804508129e-05, + "loss": 2.5294, + "step": 11424 + }, + { + "epoch": 0.9220401904608183, + "grad_norm": 0.6875781416893005, + "learning_rate": 7.847850081841285e-05, + "loss": 2.5034, + "step": 11425 + }, + { + "epoch": 0.9221208941974013, + "grad_norm": 0.6515244245529175, + "learning_rate": 7.846308412811638e-05, + "loss": 2.4707, + "step": 11426 + }, + { + "epoch": 0.9222015979339844, + "grad_norm": 0.7326812148094177, + "learning_rate": 7.844766797457615e-05, + "loss": 2.5049, + "step": 11427 + }, + { + "epoch": 0.9222823016705674, + "grad_norm": 0.7539918422698975, + "learning_rate": 7.84322523581764e-05, + "loss": 2.4726, + "step": 11428 + }, + { + "epoch": 0.9223630054071503, + "grad_norm": 0.745468020439148, + "learning_rate": 7.841683727930129e-05, + "loss": 2.5003, + "step": 11429 + }, + { + "epoch": 0.9224437091437333, + "grad_norm": 0.726362943649292, + "learning_rate": 7.840142273833499e-05, + "loss": 2.5056, + "step": 11430 + }, + { + "epoch": 0.9225244128803164, + "grad_norm": 0.7275403738021851, + "learning_rate": 7.838600873566175e-05, + "loss": 2.5188, + "step": 11431 + }, + { + "epoch": 0.9226051166168994, + "grad_norm": 0.6908789873123169, + "learning_rate": 7.837059527166563e-05, + "loss": 2.5349, + "step": 11432 + }, + { + "epoch": 0.9226858203534823, + "grad_norm": 0.7220396399497986, + "learning_rate": 7.835518234673079e-05, + "loss": 2.4863, + "step": 11433 + }, + { + "epoch": 0.9227665240900653, + "grad_norm": 0.6516178846359253, + "learning_rate": 7.833976996124142e-05, + "loss": 2.556, + "step": 11434 + }, + { + "epoch": 0.9228472278266484, + "grad_norm": 0.6958726644515991, + "learning_rate": 7.832435811558163e-05, + "loss": 2.5286, + "step": 11435 + }, + { + "epoch": 0.9229279315632314, + "grad_norm": 0.7734121680259705, + "learning_rate": 7.830894681013546e-05, + "loss": 2.5087, + "step": 11436 + }, + { + "epoch": 0.9230086352998144, + "grad_norm": 0.709064245223999, + "learning_rate": 7.829353604528703e-05, + "loss": 2.4817, + "step": 11437 + }, + { + "epoch": 0.9230893390363973, + "grad_norm": 0.7224971652030945, + "learning_rate": 7.827812582142045e-05, + "loss": 2.5179, + "step": 11438 + }, + { + "epoch": 0.9231700427729804, + "grad_norm": 0.7139936685562134, + "learning_rate": 7.826271613891973e-05, + "loss": 2.537, + "step": 11439 + }, + { + "epoch": 0.9232507465095634, + "grad_norm": 0.671138346195221, + "learning_rate": 7.824730699816896e-05, + "loss": 2.4865, + "step": 11440 + }, + { + "epoch": 0.9233314502461464, + "grad_norm": 0.6547425389289856, + "learning_rate": 7.823189839955218e-05, + "loss": 2.509, + "step": 11441 + }, + { + "epoch": 0.9234121539827294, + "grad_norm": 0.719765305519104, + "learning_rate": 7.821649034345338e-05, + "loss": 2.591, + "step": 11442 + }, + { + "epoch": 0.9234928577193124, + "grad_norm": 0.7128504514694214, + "learning_rate": 7.820108283025656e-05, + "loss": 2.541, + "step": 11443 + }, + { + "epoch": 0.9235735614558954, + "grad_norm": 0.7711538672447205, + "learning_rate": 7.818567586034577e-05, + "loss": 2.5388, + "step": 11444 + }, + { + "epoch": 0.9236542651924784, + "grad_norm": 0.7151121497154236, + "learning_rate": 7.817026943410494e-05, + "loss": 2.5539, + "step": 11445 + }, + { + "epoch": 0.9237349689290614, + "grad_norm": 0.7009569406509399, + "learning_rate": 7.815486355191805e-05, + "loss": 2.4793, + "step": 11446 + }, + { + "epoch": 0.9238156726656445, + "grad_norm": 0.7251109480857849, + "learning_rate": 7.813945821416909e-05, + "loss": 2.5406, + "step": 11447 + }, + { + "epoch": 0.9238963764022274, + "grad_norm": 0.6907934546470642, + "learning_rate": 7.812405342124196e-05, + "loss": 2.5069, + "step": 11448 + }, + { + "epoch": 0.9239770801388104, + "grad_norm": 0.699207067489624, + "learning_rate": 7.810864917352061e-05, + "loss": 2.4844, + "step": 11449 + }, + { + "epoch": 0.9240577838753934, + "grad_norm": 0.718386173248291, + "learning_rate": 7.809324547138893e-05, + "loss": 2.5666, + "step": 11450 + }, + { + "epoch": 0.9241384876119765, + "grad_norm": 0.6420444846153259, + "learning_rate": 7.807784231523089e-05, + "loss": 2.506, + "step": 11451 + }, + { + "epoch": 0.9242191913485595, + "grad_norm": 0.6777252554893494, + "learning_rate": 7.806243970543028e-05, + "loss": 2.487, + "step": 11452 + }, + { + "epoch": 0.9242998950851424, + "grad_norm": 0.6907702684402466, + "learning_rate": 7.804703764237102e-05, + "loss": 2.5284, + "step": 11453 + }, + { + "epoch": 0.9243805988217254, + "grad_norm": 0.6383422613143921, + "learning_rate": 7.803163612643698e-05, + "loss": 2.4704, + "step": 11454 + }, + { + "epoch": 0.9244613025583085, + "grad_norm": 0.6879577040672302, + "learning_rate": 7.801623515801198e-05, + "loss": 2.5103, + "step": 11455 + }, + { + "epoch": 0.9245420062948915, + "grad_norm": 0.6856719851493835, + "learning_rate": 7.800083473747986e-05, + "loss": 2.5086, + "step": 11456 + }, + { + "epoch": 0.9246227100314744, + "grad_norm": 0.7463707327842712, + "learning_rate": 7.79854348652245e-05, + "loss": 2.5456, + "step": 11457 + }, + { + "epoch": 0.9247034137680574, + "grad_norm": 0.7352643013000488, + "learning_rate": 7.79700355416296e-05, + "loss": 2.5335, + "step": 11458 + }, + { + "epoch": 0.9247841175046405, + "grad_norm": 0.7525908350944519, + "learning_rate": 7.795463676707897e-05, + "loss": 2.5855, + "step": 11459 + }, + { + "epoch": 0.9248648212412235, + "grad_norm": 0.7323870658874512, + "learning_rate": 7.79392385419565e-05, + "loss": 2.5471, + "step": 11460 + }, + { + "epoch": 0.9249455249778065, + "grad_norm": 0.7443860769271851, + "learning_rate": 7.792384086664582e-05, + "loss": 2.5449, + "step": 11461 + }, + { + "epoch": 0.9250262287143894, + "grad_norm": 0.6928641200065613, + "learning_rate": 7.790844374153073e-05, + "loss": 2.505, + "step": 11462 + }, + { + "epoch": 0.9251069324509725, + "grad_norm": 0.6491222381591797, + "learning_rate": 7.789304716699498e-05, + "loss": 2.5447, + "step": 11463 + }, + { + "epoch": 0.9251876361875555, + "grad_norm": 0.7351166009902954, + "learning_rate": 7.78776511434223e-05, + "loss": 2.524, + "step": 11464 + }, + { + "epoch": 0.9252683399241385, + "grad_norm": 0.6680036783218384, + "learning_rate": 7.786225567119637e-05, + "loss": 2.5019, + "step": 11465 + }, + { + "epoch": 0.9253490436607215, + "grad_norm": 0.7070801258087158, + "learning_rate": 7.784686075070089e-05, + "loss": 2.5052, + "step": 11466 + }, + { + "epoch": 0.9254297473973045, + "grad_norm": 0.7095211148262024, + "learning_rate": 7.783146638231957e-05, + "loss": 2.4998, + "step": 11467 + }, + { + "epoch": 0.9255104511338875, + "grad_norm": 0.6725812554359436, + "learning_rate": 7.781607256643604e-05, + "loss": 2.4909, + "step": 11468 + }, + { + "epoch": 0.9255911548704705, + "grad_norm": 0.684177577495575, + "learning_rate": 7.780067930343396e-05, + "loss": 2.5636, + "step": 11469 + }, + { + "epoch": 0.9256718586070535, + "grad_norm": 0.703419029712677, + "learning_rate": 7.778528659369702e-05, + "loss": 2.4295, + "step": 11470 + }, + { + "epoch": 0.9257525623436366, + "grad_norm": 0.6850195527076721, + "learning_rate": 7.776989443760877e-05, + "loss": 2.5143, + "step": 11471 + }, + { + "epoch": 0.9258332660802195, + "grad_norm": 0.7322348952293396, + "learning_rate": 7.775450283555286e-05, + "loss": 2.5616, + "step": 11472 + }, + { + "epoch": 0.9259139698168025, + "grad_norm": 0.6924510598182678, + "learning_rate": 7.77391117879129e-05, + "loss": 2.4796, + "step": 11473 + }, + { + "epoch": 0.9259946735533855, + "grad_norm": 0.7006441354751587, + "learning_rate": 7.772372129507249e-05, + "loss": 2.5142, + "step": 11474 + }, + { + "epoch": 0.9260753772899685, + "grad_norm": 0.6379218697547913, + "learning_rate": 7.770833135741513e-05, + "loss": 2.5366, + "step": 11475 + }, + { + "epoch": 0.9261560810265516, + "grad_norm": 0.676163375377655, + "learning_rate": 7.769294197532448e-05, + "loss": 2.4936, + "step": 11476 + }, + { + "epoch": 0.9262367847631345, + "grad_norm": 0.6964210271835327, + "learning_rate": 7.767755314918399e-05, + "loss": 2.429, + "step": 11477 + }, + { + "epoch": 0.9263174884997175, + "grad_norm": 0.7017048597335815, + "learning_rate": 7.766216487937722e-05, + "loss": 2.5488, + "step": 11478 + }, + { + "epoch": 0.9263981922363005, + "grad_norm": 0.6742509603500366, + "learning_rate": 7.76467771662877e-05, + "loss": 2.5121, + "step": 11479 + }, + { + "epoch": 0.9264788959728836, + "grad_norm": 0.6751403212547302, + "learning_rate": 7.763139001029893e-05, + "loss": 2.5897, + "step": 11480 + }, + { + "epoch": 0.9265595997094666, + "grad_norm": 0.6639657616615295, + "learning_rate": 7.761600341179439e-05, + "loss": 2.5015, + "step": 11481 + }, + { + "epoch": 0.9266403034460495, + "grad_norm": 0.6332827210426331, + "learning_rate": 7.760061737115756e-05, + "loss": 2.5518, + "step": 11482 + }, + { + "epoch": 0.9267210071826325, + "grad_norm": 0.6751062870025635, + "learning_rate": 7.758523188877192e-05, + "loss": 2.4252, + "step": 11483 + }, + { + "epoch": 0.9268017109192156, + "grad_norm": 0.6763231754302979, + "learning_rate": 7.756984696502084e-05, + "loss": 2.5683, + "step": 11484 + }, + { + "epoch": 0.9268824146557986, + "grad_norm": 0.6480380296707153, + "learning_rate": 7.755446260028784e-05, + "loss": 2.558, + "step": 11485 + }, + { + "epoch": 0.9269631183923815, + "grad_norm": 0.6925072073936462, + "learning_rate": 7.753907879495634e-05, + "loss": 2.5374, + "step": 11486 + }, + { + "epoch": 0.9270438221289645, + "grad_norm": 0.6771834492683411, + "learning_rate": 7.752369554940966e-05, + "loss": 2.5652, + "step": 11487 + }, + { + "epoch": 0.9271245258655476, + "grad_norm": 0.6747026443481445, + "learning_rate": 7.750831286403124e-05, + "loss": 2.5076, + "step": 11488 + }, + { + "epoch": 0.9272052296021306, + "grad_norm": 0.6727211475372314, + "learning_rate": 7.749293073920448e-05, + "loss": 2.4774, + "step": 11489 + }, + { + "epoch": 0.9272859333387136, + "grad_norm": 0.6334055066108704, + "learning_rate": 7.747754917531272e-05, + "loss": 2.5245, + "step": 11490 + }, + { + "epoch": 0.9273666370752965, + "grad_norm": 0.740700900554657, + "learning_rate": 7.746216817273928e-05, + "loss": 2.5485, + "step": 11491 + }, + { + "epoch": 0.9274473408118796, + "grad_norm": 0.6500691771507263, + "learning_rate": 7.744678773186757e-05, + "loss": 2.5277, + "step": 11492 + }, + { + "epoch": 0.9275280445484626, + "grad_norm": 0.6592985987663269, + "learning_rate": 7.743140785308084e-05, + "loss": 2.5304, + "step": 11493 + }, + { + "epoch": 0.9276087482850456, + "grad_norm": 0.6980452537536621, + "learning_rate": 7.741602853676241e-05, + "loss": 2.544, + "step": 11494 + }, + { + "epoch": 0.9276894520216286, + "grad_norm": 0.643190860748291, + "learning_rate": 7.740064978329555e-05, + "loss": 2.5167, + "step": 11495 + }, + { + "epoch": 0.9277701557582116, + "grad_norm": 0.6789804100990295, + "learning_rate": 7.738527159306366e-05, + "loss": 2.5117, + "step": 11496 + }, + { + "epoch": 0.9278508594947946, + "grad_norm": 0.7109663486480713, + "learning_rate": 7.736989396644987e-05, + "loss": 2.5294, + "step": 11497 + }, + { + "epoch": 0.9279315632313776, + "grad_norm": 0.6752706170082092, + "learning_rate": 7.735451690383746e-05, + "loss": 2.4851, + "step": 11498 + }, + { + "epoch": 0.9280122669679606, + "grad_norm": 0.6947829723358154, + "learning_rate": 7.733914040560972e-05, + "loss": 2.5792, + "step": 11499 + }, + { + "epoch": 0.9280929707045437, + "grad_norm": 0.6701157689094543, + "learning_rate": 7.732376447214981e-05, + "loss": 2.4884, + "step": 11500 + }, + { + "epoch": 0.9281736744411266, + "grad_norm": 0.64533531665802, + "learning_rate": 7.730838910384097e-05, + "loss": 2.4644, + "step": 11501 + }, + { + "epoch": 0.9282543781777096, + "grad_norm": 0.6664395332336426, + "learning_rate": 7.729301430106644e-05, + "loss": 2.5286, + "step": 11502 + }, + { + "epoch": 0.9283350819142926, + "grad_norm": 0.6982395648956299, + "learning_rate": 7.72776400642093e-05, + "loss": 2.5092, + "step": 11503 + }, + { + "epoch": 0.9284157856508757, + "grad_norm": 0.6656171679496765, + "learning_rate": 7.726226639365278e-05, + "loss": 2.4945, + "step": 11504 + }, + { + "epoch": 0.9284964893874587, + "grad_norm": 0.6213308572769165, + "learning_rate": 7.724689328978001e-05, + "loss": 2.5042, + "step": 11505 + }, + { + "epoch": 0.9285771931240416, + "grad_norm": 0.6855599880218506, + "learning_rate": 7.723152075297414e-05, + "loss": 2.5207, + "step": 11506 + }, + { + "epoch": 0.9286578968606246, + "grad_norm": 0.7724171280860901, + "learning_rate": 7.721614878361828e-05, + "loss": 2.4842, + "step": 11507 + }, + { + "epoch": 0.9287386005972077, + "grad_norm": 0.708634614944458, + "learning_rate": 7.720077738209559e-05, + "loss": 2.58, + "step": 11508 + }, + { + "epoch": 0.9288193043337907, + "grad_norm": 0.6766082644462585, + "learning_rate": 7.718540654878907e-05, + "loss": 2.492, + "step": 11509 + }, + { + "epoch": 0.9289000080703737, + "grad_norm": 0.6856982707977295, + "learning_rate": 7.717003628408187e-05, + "loss": 2.5186, + "step": 11510 + }, + { + "epoch": 0.9289807118069566, + "grad_norm": 0.680647611618042, + "learning_rate": 7.715466658835705e-05, + "loss": 2.5305, + "step": 11511 + }, + { + "epoch": 0.9290614155435397, + "grad_norm": 0.7174721360206604, + "learning_rate": 7.713929746199771e-05, + "loss": 2.4498, + "step": 11512 + }, + { + "epoch": 0.9291421192801227, + "grad_norm": 0.6507031321525574, + "learning_rate": 7.712392890538676e-05, + "loss": 2.5334, + "step": 11513 + }, + { + "epoch": 0.9292228230167057, + "grad_norm": 0.7545748353004456, + "learning_rate": 7.710856091890732e-05, + "loss": 2.505, + "step": 11514 + }, + { + "epoch": 0.9293035267532886, + "grad_norm": 0.6978560090065002, + "learning_rate": 7.709319350294242e-05, + "loss": 2.5243, + "step": 11515 + }, + { + "epoch": 0.9293842304898717, + "grad_norm": 0.6620199084281921, + "learning_rate": 7.707782665787497e-05, + "loss": 2.5114, + "step": 11516 + }, + { + "epoch": 0.9294649342264547, + "grad_norm": 0.7160476446151733, + "learning_rate": 7.7062460384088e-05, + "loss": 2.5322, + "step": 11517 + }, + { + "epoch": 0.9295456379630377, + "grad_norm": 0.6637005805969238, + "learning_rate": 7.704709468196454e-05, + "loss": 2.456, + "step": 11518 + }, + { + "epoch": 0.9296263416996207, + "grad_norm": 0.6668851375579834, + "learning_rate": 7.703172955188742e-05, + "loss": 2.5251, + "step": 11519 + }, + { + "epoch": 0.9297070454362037, + "grad_norm": 0.6840329170227051, + "learning_rate": 7.701636499423965e-05, + "loss": 2.5068, + "step": 11520 + }, + { + "epoch": 0.9297877491727867, + "grad_norm": 0.695122241973877, + "learning_rate": 7.700100100940415e-05, + "loss": 2.4822, + "step": 11521 + }, + { + "epoch": 0.9298684529093697, + "grad_norm": 0.6784923672676086, + "learning_rate": 7.698563759776382e-05, + "loss": 2.4978, + "step": 11522 + }, + { + "epoch": 0.9299491566459527, + "grad_norm": 0.6949357986450195, + "learning_rate": 7.697027475970154e-05, + "loss": 2.5392, + "step": 11523 + }, + { + "epoch": 0.9300298603825358, + "grad_norm": 0.7128093242645264, + "learning_rate": 7.695491249560025e-05, + "loss": 2.455, + "step": 11524 + }, + { + "epoch": 0.9301105641191187, + "grad_norm": 0.6534962058067322, + "learning_rate": 7.693955080584277e-05, + "loss": 2.5272, + "step": 11525 + }, + { + "epoch": 0.9301912678557017, + "grad_norm": 0.6893511414527893, + "learning_rate": 7.692418969081194e-05, + "loss": 2.5366, + "step": 11526 + }, + { + "epoch": 0.9302719715922847, + "grad_norm": 0.6335335373878479, + "learning_rate": 7.690882915089064e-05, + "loss": 2.5781, + "step": 11527 + }, + { + "epoch": 0.9303526753288677, + "grad_norm": 0.7264769077301025, + "learning_rate": 7.689346918646172e-05, + "loss": 2.5322, + "step": 11528 + }, + { + "epoch": 0.9304333790654508, + "grad_norm": 0.7156329154968262, + "learning_rate": 7.68781097979079e-05, + "loss": 2.5558, + "step": 11529 + }, + { + "epoch": 0.9305140828020337, + "grad_norm": 0.6914563775062561, + "learning_rate": 7.686275098561203e-05, + "loss": 2.5058, + "step": 11530 + }, + { + "epoch": 0.9305947865386167, + "grad_norm": 0.6939939260482788, + "learning_rate": 7.684739274995691e-05, + "loss": 2.4764, + "step": 11531 + }, + { + "epoch": 0.9306754902751997, + "grad_norm": 0.7103014588356018, + "learning_rate": 7.683203509132526e-05, + "loss": 2.5062, + "step": 11532 + }, + { + "epoch": 0.9307561940117828, + "grad_norm": 0.6558870077133179, + "learning_rate": 7.681667801009985e-05, + "loss": 2.4869, + "step": 11533 + }, + { + "epoch": 0.9308368977483658, + "grad_norm": 0.7280104160308838, + "learning_rate": 7.680132150666348e-05, + "loss": 2.566, + "step": 11534 + }, + { + "epoch": 0.9309176014849487, + "grad_norm": 0.6814180612564087, + "learning_rate": 7.678596558139875e-05, + "loss": 2.4926, + "step": 11535 + }, + { + "epoch": 0.9309983052215317, + "grad_norm": 0.6916589736938477, + "learning_rate": 7.677061023468846e-05, + "loss": 2.5189, + "step": 11536 + }, + { + "epoch": 0.9310790089581148, + "grad_norm": 0.6527554988861084, + "learning_rate": 7.675525546691533e-05, + "loss": 2.4969, + "step": 11537 + }, + { + "epoch": 0.9311597126946978, + "grad_norm": 0.6458954811096191, + "learning_rate": 7.673990127846196e-05, + "loss": 2.5159, + "step": 11538 + }, + { + "epoch": 0.9312404164312807, + "grad_norm": 0.6704902052879333, + "learning_rate": 7.672454766971105e-05, + "loss": 2.49, + "step": 11539 + }, + { + "epoch": 0.9313211201678637, + "grad_norm": 0.6599698066711426, + "learning_rate": 7.670919464104527e-05, + "loss": 2.4872, + "step": 11540 + }, + { + "epoch": 0.9314018239044468, + "grad_norm": 0.7638888955116272, + "learning_rate": 7.669384219284722e-05, + "loss": 2.5228, + "step": 11541 + }, + { + "epoch": 0.9314825276410298, + "grad_norm": 0.6911981105804443, + "learning_rate": 7.667849032549954e-05, + "loss": 2.4675, + "step": 11542 + }, + { + "epoch": 0.9315632313776128, + "grad_norm": 0.6414669156074524, + "learning_rate": 7.666313903938486e-05, + "loss": 2.5137, + "step": 11543 + }, + { + "epoch": 0.9316439351141957, + "grad_norm": 0.7552139759063721, + "learning_rate": 7.66477883348858e-05, + "loss": 2.5778, + "step": 11544 + }, + { + "epoch": 0.9317246388507788, + "grad_norm": 0.6738760471343994, + "learning_rate": 7.663243821238484e-05, + "loss": 2.5326, + "step": 11545 + }, + { + "epoch": 0.9318053425873618, + "grad_norm": 0.7406899333000183, + "learning_rate": 7.661708867226459e-05, + "loss": 2.4608, + "step": 11546 + }, + { + "epoch": 0.9318860463239448, + "grad_norm": 0.7261415719985962, + "learning_rate": 7.660173971490769e-05, + "loss": 2.5684, + "step": 11547 + }, + { + "epoch": 0.9319667500605278, + "grad_norm": 0.636542797088623, + "learning_rate": 7.658639134069654e-05, + "loss": 2.5159, + "step": 11548 + }, + { + "epoch": 0.9320474537971108, + "grad_norm": 0.7730209231376648, + "learning_rate": 7.657104355001373e-05, + "loss": 2.487, + "step": 11549 + }, + { + "epoch": 0.9321281575336938, + "grad_norm": 0.6553641557693481, + "learning_rate": 7.655569634324178e-05, + "loss": 2.5105, + "step": 11550 + }, + { + "epoch": 0.9322088612702768, + "grad_norm": 0.7008326649665833, + "learning_rate": 7.654034972076314e-05, + "loss": 2.492, + "step": 11551 + }, + { + "epoch": 0.9322895650068598, + "grad_norm": 0.7074279189109802, + "learning_rate": 7.65250036829603e-05, + "loss": 2.5221, + "step": 11552 + }, + { + "epoch": 0.9323702687434429, + "grad_norm": 0.7235530018806458, + "learning_rate": 7.650965823021578e-05, + "loss": 2.5285, + "step": 11553 + }, + { + "epoch": 0.9324509724800258, + "grad_norm": 0.7601436376571655, + "learning_rate": 7.649431336291194e-05, + "loss": 2.5071, + "step": 11554 + }, + { + "epoch": 0.9325316762166088, + "grad_norm": 0.6446424126625061, + "learning_rate": 7.647896908143127e-05, + "loss": 2.5032, + "step": 11555 + }, + { + "epoch": 0.9326123799531918, + "grad_norm": 0.7032139897346497, + "learning_rate": 7.646362538615614e-05, + "loss": 2.6096, + "step": 11556 + }, + { + "epoch": 0.9326930836897749, + "grad_norm": 0.6727899312973022, + "learning_rate": 7.644828227746904e-05, + "loss": 2.5041, + "step": 11557 + }, + { + "epoch": 0.9327737874263579, + "grad_norm": 0.6817529201507568, + "learning_rate": 7.643293975575229e-05, + "loss": 2.4474, + "step": 11558 + }, + { + "epoch": 0.9328544911629408, + "grad_norm": 0.6374444365501404, + "learning_rate": 7.641759782138827e-05, + "loss": 2.5204, + "step": 11559 + }, + { + "epoch": 0.9329351948995238, + "grad_norm": 0.6889457702636719, + "learning_rate": 7.640225647475939e-05, + "loss": 2.6344, + "step": 11560 + }, + { + "epoch": 0.9330158986361069, + "grad_norm": 0.6657958626747131, + "learning_rate": 7.638691571624794e-05, + "loss": 2.4672, + "step": 11561 + }, + { + "epoch": 0.9330966023726899, + "grad_norm": 0.6425464749336243, + "learning_rate": 7.637157554623627e-05, + "loss": 2.4756, + "step": 11562 + }, + { + "epoch": 0.9331773061092729, + "grad_norm": 0.7193450927734375, + "learning_rate": 7.635623596510675e-05, + "loss": 2.4969, + "step": 11563 + }, + { + "epoch": 0.9332580098458558, + "grad_norm": 0.6595252156257629, + "learning_rate": 7.634089697324159e-05, + "loss": 2.4647, + "step": 11564 + }, + { + "epoch": 0.9333387135824389, + "grad_norm": 0.6505268812179565, + "learning_rate": 7.632555857102312e-05, + "loss": 2.5059, + "step": 11565 + }, + { + "epoch": 0.9334194173190219, + "grad_norm": 0.6877838969230652, + "learning_rate": 7.631022075883365e-05, + "loss": 2.4855, + "step": 11566 + }, + { + "epoch": 0.9335001210556049, + "grad_norm": 0.6376198530197144, + "learning_rate": 7.629488353705538e-05, + "loss": 2.5024, + "step": 11567 + }, + { + "epoch": 0.9335808247921878, + "grad_norm": 0.6807642579078674, + "learning_rate": 7.627954690607058e-05, + "loss": 2.4954, + "step": 11568 + }, + { + "epoch": 0.9336615285287709, + "grad_norm": 0.6785219311714172, + "learning_rate": 7.62642108662615e-05, + "loss": 2.4854, + "step": 11569 + }, + { + "epoch": 0.9337422322653539, + "grad_norm": 0.8159591555595398, + "learning_rate": 7.624887541801032e-05, + "loss": 2.524, + "step": 11570 + }, + { + "epoch": 0.9338229360019369, + "grad_norm": 0.6912592053413391, + "learning_rate": 7.62335405616992e-05, + "loss": 2.5111, + "step": 11571 + }, + { + "epoch": 0.9339036397385199, + "grad_norm": 0.6772454977035522, + "learning_rate": 7.621820629771041e-05, + "loss": 2.5603, + "step": 11572 + }, + { + "epoch": 0.933984343475103, + "grad_norm": 0.6720221638679504, + "learning_rate": 7.620287262642613e-05, + "loss": 2.5016, + "step": 11573 + }, + { + "epoch": 0.9340650472116859, + "grad_norm": 0.651935338973999, + "learning_rate": 7.618753954822841e-05, + "loss": 2.445, + "step": 11574 + }, + { + "epoch": 0.9341457509482689, + "grad_norm": 0.6731166839599609, + "learning_rate": 7.617220706349947e-05, + "loss": 2.4703, + "step": 11575 + }, + { + "epoch": 0.9342264546848519, + "grad_norm": 0.6283879280090332, + "learning_rate": 7.615687517262143e-05, + "loss": 2.5232, + "step": 11576 + }, + { + "epoch": 0.9343071584214349, + "grad_norm": 0.7193455696105957, + "learning_rate": 7.614154387597638e-05, + "loss": 2.5268, + "step": 11577 + }, + { + "epoch": 0.934387862158018, + "grad_norm": 0.6992828845977783, + "learning_rate": 7.61262131739464e-05, + "loss": 2.5834, + "step": 11578 + }, + { + "epoch": 0.9344685658946009, + "grad_norm": 0.6501220464706421, + "learning_rate": 7.611088306691365e-05, + "loss": 2.5146, + "step": 11579 + }, + { + "epoch": 0.9345492696311839, + "grad_norm": 0.7246220111846924, + "learning_rate": 7.60955535552601e-05, + "loss": 2.5665, + "step": 11580 + }, + { + "epoch": 0.9346299733677669, + "grad_norm": 0.7190428376197815, + "learning_rate": 7.608022463936783e-05, + "loss": 2.5061, + "step": 11581 + }, + { + "epoch": 0.93471067710435, + "grad_norm": 0.7144324779510498, + "learning_rate": 7.606489631961893e-05, + "loss": 2.4982, + "step": 11582 + }, + { + "epoch": 0.9347913808409329, + "grad_norm": 0.7144657373428345, + "learning_rate": 7.604956859639535e-05, + "loss": 2.5506, + "step": 11583 + }, + { + "epoch": 0.9348720845775159, + "grad_norm": 0.6596626043319702, + "learning_rate": 7.603424147007913e-05, + "loss": 2.4911, + "step": 11584 + }, + { + "epoch": 0.9349527883140989, + "grad_norm": 0.7090883851051331, + "learning_rate": 7.601891494105227e-05, + "loss": 2.5087, + "step": 11585 + }, + { + "epoch": 0.935033492050682, + "grad_norm": 0.6679760217666626, + "learning_rate": 7.600358900969671e-05, + "loss": 2.497, + "step": 11586 + }, + { + "epoch": 0.935114195787265, + "grad_norm": 0.6795344948768616, + "learning_rate": 7.598826367639447e-05, + "loss": 2.4839, + "step": 11587 + }, + { + "epoch": 0.9351948995238479, + "grad_norm": 0.6378790736198425, + "learning_rate": 7.597293894152744e-05, + "loss": 2.4656, + "step": 11588 + }, + { + "epoch": 0.9352756032604309, + "grad_norm": 0.6646658182144165, + "learning_rate": 7.595761480547762e-05, + "loss": 2.4739, + "step": 11589 + }, + { + "epoch": 0.935356306997014, + "grad_norm": 0.6662073731422424, + "learning_rate": 7.594229126862687e-05, + "loss": 2.4872, + "step": 11590 + }, + { + "epoch": 0.935437010733597, + "grad_norm": 0.6698113679885864, + "learning_rate": 7.592696833135708e-05, + "loss": 2.4964, + "step": 11591 + }, + { + "epoch": 0.93551771447018, + "grad_norm": 0.6520004272460938, + "learning_rate": 7.59116459940502e-05, + "loss": 2.5616, + "step": 11592 + }, + { + "epoch": 0.9355984182067629, + "grad_norm": 0.6675869226455688, + "learning_rate": 7.589632425708806e-05, + "loss": 2.4854, + "step": 11593 + }, + { + "epoch": 0.935679121943346, + "grad_norm": 0.6914103031158447, + "learning_rate": 7.588100312085251e-05, + "loss": 2.5252, + "step": 11594 + }, + { + "epoch": 0.935759825679929, + "grad_norm": 0.7283286452293396, + "learning_rate": 7.586568258572546e-05, + "loss": 2.543, + "step": 11595 + }, + { + "epoch": 0.935840529416512, + "grad_norm": 0.6881958246231079, + "learning_rate": 7.585036265208864e-05, + "loss": 2.4499, + "step": 11596 + }, + { + "epoch": 0.935921233153095, + "grad_norm": 0.7733677625656128, + "learning_rate": 7.58350433203239e-05, + "loss": 2.5595, + "step": 11597 + }, + { + "epoch": 0.936001936889678, + "grad_norm": 0.672711968421936, + "learning_rate": 7.58197245908131e-05, + "loss": 2.4757, + "step": 11598 + }, + { + "epoch": 0.936082640626261, + "grad_norm": 0.691780686378479, + "learning_rate": 7.580440646393794e-05, + "loss": 2.5134, + "step": 11599 + }, + { + "epoch": 0.936163344362844, + "grad_norm": 0.6935102343559265, + "learning_rate": 7.578908894008021e-05, + "loss": 2.5128, + "step": 11600 + }, + { + "epoch": 0.936244048099427, + "grad_norm": 0.7005696892738342, + "learning_rate": 7.57737720196217e-05, + "loss": 2.5338, + "step": 11601 + }, + { + "epoch": 0.93632475183601, + "grad_norm": 0.6729815602302551, + "learning_rate": 7.575845570294409e-05, + "loss": 2.5373, + "step": 11602 + }, + { + "epoch": 0.936405455572593, + "grad_norm": 0.6694760918617249, + "learning_rate": 7.574313999042913e-05, + "loss": 2.5165, + "step": 11603 + }, + { + "epoch": 0.936486159309176, + "grad_norm": 0.6425337791442871, + "learning_rate": 7.572782488245854e-05, + "loss": 2.5102, + "step": 11604 + }, + { + "epoch": 0.936566863045759, + "grad_norm": 0.6613046526908875, + "learning_rate": 7.571251037941405e-05, + "loss": 2.5108, + "step": 11605 + }, + { + "epoch": 0.9366475667823421, + "grad_norm": 0.7396309971809387, + "learning_rate": 7.569719648167723e-05, + "loss": 2.5261, + "step": 11606 + }, + { + "epoch": 0.936728270518925, + "grad_norm": 0.6783239245414734, + "learning_rate": 7.568188318962981e-05, + "loss": 2.5725, + "step": 11607 + }, + { + "epoch": 0.936808974255508, + "grad_norm": 0.7591684460639954, + "learning_rate": 7.566657050365345e-05, + "loss": 2.5085, + "step": 11608 + }, + { + "epoch": 0.936889677992091, + "grad_norm": 0.6805615425109863, + "learning_rate": 7.565125842412974e-05, + "loss": 2.5598, + "step": 11609 + }, + { + "epoch": 0.9369703817286741, + "grad_norm": 0.680203378200531, + "learning_rate": 7.563594695144032e-05, + "loss": 2.5072, + "step": 11610 + }, + { + "epoch": 0.9370510854652571, + "grad_norm": 0.7035777568817139, + "learning_rate": 7.56206360859668e-05, + "loss": 2.4882, + "step": 11611 + }, + { + "epoch": 0.93713178920184, + "grad_norm": 0.7457048892974854, + "learning_rate": 7.560532582809075e-05, + "loss": 2.4975, + "step": 11612 + }, + { + "epoch": 0.937212492938423, + "grad_norm": 0.702055037021637, + "learning_rate": 7.559001617819374e-05, + "loss": 2.5522, + "step": 11613 + }, + { + "epoch": 0.9372931966750061, + "grad_norm": 0.7618527412414551, + "learning_rate": 7.557470713665738e-05, + "loss": 2.5503, + "step": 11614 + }, + { + "epoch": 0.9373739004115891, + "grad_norm": 0.8611559867858887, + "learning_rate": 7.555939870386312e-05, + "loss": 2.4866, + "step": 11615 + }, + { + "epoch": 0.937454604148172, + "grad_norm": 0.7285227179527283, + "learning_rate": 7.554409088019254e-05, + "loss": 2.4855, + "step": 11616 + }, + { + "epoch": 0.937535307884755, + "grad_norm": 0.7512121796607971, + "learning_rate": 7.552878366602716e-05, + "loss": 2.5496, + "step": 11617 + }, + { + "epoch": 0.9376160116213381, + "grad_norm": 0.7353625297546387, + "learning_rate": 7.551347706174844e-05, + "loss": 2.5754, + "step": 11618 + }, + { + "epoch": 0.9376967153579211, + "grad_norm": 0.7131205797195435, + "learning_rate": 7.549817106773788e-05, + "loss": 2.4927, + "step": 11619 + }, + { + "epoch": 0.9377774190945041, + "grad_norm": 0.6562477946281433, + "learning_rate": 7.548286568437695e-05, + "loss": 2.5247, + "step": 11620 + }, + { + "epoch": 0.937858122831087, + "grad_norm": 0.7094948887825012, + "learning_rate": 7.546756091204713e-05, + "loss": 2.5084, + "step": 11621 + }, + { + "epoch": 0.9379388265676701, + "grad_norm": 0.6890475153923035, + "learning_rate": 7.545225675112977e-05, + "loss": 2.5178, + "step": 11622 + }, + { + "epoch": 0.9380195303042531, + "grad_norm": 0.6801474094390869, + "learning_rate": 7.543695320200634e-05, + "loss": 2.5457, + "step": 11623 + }, + { + "epoch": 0.9381002340408361, + "grad_norm": 0.7093712687492371, + "learning_rate": 7.54216502650583e-05, + "loss": 2.6122, + "step": 11624 + }, + { + "epoch": 0.9381809377774191, + "grad_norm": 0.7246927618980408, + "learning_rate": 7.540634794066695e-05, + "loss": 2.5251, + "step": 11625 + }, + { + "epoch": 0.9382616415140022, + "grad_norm": 0.7358111143112183, + "learning_rate": 7.539104622921368e-05, + "loss": 2.5444, + "step": 11626 + }, + { + "epoch": 0.9383423452505851, + "grad_norm": 0.6915993690490723, + "learning_rate": 7.53757451310799e-05, + "loss": 2.448, + "step": 11627 + }, + { + "epoch": 0.9384230489871681, + "grad_norm": 0.6864039301872253, + "learning_rate": 7.536044464664689e-05, + "loss": 2.5267, + "step": 11628 + }, + { + "epoch": 0.9385037527237511, + "grad_norm": 0.664799690246582, + "learning_rate": 7.534514477629602e-05, + "loss": 2.5602, + "step": 11629 + }, + { + "epoch": 0.9385844564603341, + "grad_norm": 0.6770062446594238, + "learning_rate": 7.532984552040862e-05, + "loss": 2.5034, + "step": 11630 + }, + { + "epoch": 0.9386651601969171, + "grad_norm": 0.6961095929145813, + "learning_rate": 7.531454687936592e-05, + "loss": 2.4523, + "step": 11631 + }, + { + "epoch": 0.9387458639335001, + "grad_norm": 0.6776804327964783, + "learning_rate": 7.529924885354924e-05, + "loss": 2.5526, + "step": 11632 + }, + { + "epoch": 0.9388265676700831, + "grad_norm": 0.785796582698822, + "learning_rate": 7.528395144333988e-05, + "loss": 2.5256, + "step": 11633 + }, + { + "epoch": 0.9389072714066661, + "grad_norm": 0.7016655206680298, + "learning_rate": 7.526865464911902e-05, + "loss": 2.4781, + "step": 11634 + }, + { + "epoch": 0.9389879751432492, + "grad_norm": 0.7027767300605774, + "learning_rate": 7.525335847126795e-05, + "loss": 2.5287, + "step": 11635 + }, + { + "epoch": 0.9390686788798321, + "grad_norm": 0.710624098777771, + "learning_rate": 7.523806291016787e-05, + "loss": 2.5486, + "step": 11636 + }, + { + "epoch": 0.9391493826164151, + "grad_norm": 0.7029656767845154, + "learning_rate": 7.52227679662e-05, + "loss": 2.5244, + "step": 11637 + }, + { + "epoch": 0.9392300863529981, + "grad_norm": 0.7417333722114563, + "learning_rate": 7.520747363974551e-05, + "loss": 2.5561, + "step": 11638 + }, + { + "epoch": 0.9393107900895812, + "grad_norm": 0.6595067381858826, + "learning_rate": 7.519217993118559e-05, + "loss": 2.617, + "step": 11639 + }, + { + "epoch": 0.9393914938261642, + "grad_norm": 0.6808187365531921, + "learning_rate": 7.517688684090141e-05, + "loss": 2.5279, + "step": 11640 + }, + { + "epoch": 0.9394721975627471, + "grad_norm": 0.6618706583976746, + "learning_rate": 7.516159436927408e-05, + "loss": 2.4976, + "step": 11641 + }, + { + "epoch": 0.9395529012993301, + "grad_norm": 0.6979385018348694, + "learning_rate": 7.514630251668475e-05, + "loss": 2.4542, + "step": 11642 + }, + { + "epoch": 0.9396336050359132, + "grad_norm": 0.6380844116210938, + "learning_rate": 7.513101128351454e-05, + "loss": 2.48, + "step": 11643 + }, + { + "epoch": 0.9397143087724962, + "grad_norm": 0.6390014290809631, + "learning_rate": 7.511572067014452e-05, + "loss": 2.5111, + "step": 11644 + }, + { + "epoch": 0.9397950125090792, + "grad_norm": 0.7592498064041138, + "learning_rate": 7.510043067695578e-05, + "loss": 2.5161, + "step": 11645 + }, + { + "epoch": 0.9398757162456621, + "grad_norm": 0.6269322037696838, + "learning_rate": 7.508514130432945e-05, + "loss": 2.491, + "step": 11646 + }, + { + "epoch": 0.9399564199822452, + "grad_norm": 0.6372053623199463, + "learning_rate": 7.506985255264646e-05, + "loss": 2.4826, + "step": 11647 + }, + { + "epoch": 0.9400371237188282, + "grad_norm": 0.6962460875511169, + "learning_rate": 7.505456442228794e-05, + "loss": 2.5605, + "step": 11648 + }, + { + "epoch": 0.9401178274554112, + "grad_norm": 0.7931656241416931, + "learning_rate": 7.503927691363491e-05, + "loss": 2.4909, + "step": 11649 + }, + { + "epoch": 0.9401985311919941, + "grad_norm": 0.688792884349823, + "learning_rate": 7.502399002706832e-05, + "loss": 2.4888, + "step": 11650 + }, + { + "epoch": 0.9402792349285772, + "grad_norm": 0.6683691143989563, + "learning_rate": 7.500870376296918e-05, + "loss": 2.5233, + "step": 11651 + }, + { + "epoch": 0.9403599386651602, + "grad_norm": 0.6537527441978455, + "learning_rate": 7.499341812171846e-05, + "loss": 2.5061, + "step": 11652 + }, + { + "epoch": 0.9404406424017432, + "grad_norm": 0.6657658219337463, + "learning_rate": 7.497813310369717e-05, + "loss": 2.4844, + "step": 11653 + }, + { + "epoch": 0.9405213461383262, + "grad_norm": 0.6865110993385315, + "learning_rate": 7.496284870928618e-05, + "loss": 2.4986, + "step": 11654 + }, + { + "epoch": 0.9406020498749093, + "grad_norm": 0.6724923849105835, + "learning_rate": 7.494756493886644e-05, + "loss": 2.4818, + "step": 11655 + }, + { + "epoch": 0.9406827536114922, + "grad_norm": 0.6478626728057861, + "learning_rate": 7.493228179281892e-05, + "loss": 2.5321, + "step": 11656 + }, + { + "epoch": 0.9407634573480752, + "grad_norm": 0.6474425792694092, + "learning_rate": 7.491699927152443e-05, + "loss": 2.5276, + "step": 11657 + }, + { + "epoch": 0.9408441610846582, + "grad_norm": 0.6736220717430115, + "learning_rate": 7.490171737536387e-05, + "loss": 2.4734, + "step": 11658 + }, + { + "epoch": 0.9409248648212413, + "grad_norm": 0.6714746952056885, + "learning_rate": 7.488643610471815e-05, + "loss": 2.5754, + "step": 11659 + }, + { + "epoch": 0.9410055685578242, + "grad_norm": 0.6714532375335693, + "learning_rate": 7.487115545996805e-05, + "loss": 2.4855, + "step": 11660 + }, + { + "epoch": 0.9410862722944072, + "grad_norm": 0.7601683139801025, + "learning_rate": 7.485587544149447e-05, + "loss": 2.4887, + "step": 11661 + }, + { + "epoch": 0.9411669760309902, + "grad_norm": 0.7655646204948425, + "learning_rate": 7.484059604967821e-05, + "loss": 2.4904, + "step": 11662 + }, + { + "epoch": 0.9412476797675733, + "grad_norm": 0.6841822862625122, + "learning_rate": 7.482531728490006e-05, + "loss": 2.5272, + "step": 11663 + }, + { + "epoch": 0.9413283835041563, + "grad_norm": 0.7683621048927307, + "learning_rate": 7.481003914754078e-05, + "loss": 2.5218, + "step": 11664 + }, + { + "epoch": 0.9414090872407392, + "grad_norm": 0.6597647070884705, + "learning_rate": 7.479476163798124e-05, + "loss": 2.4925, + "step": 11665 + }, + { + "epoch": 0.9414897909773222, + "grad_norm": 0.6573941111564636, + "learning_rate": 7.477948475660208e-05, + "loss": 2.4854, + "step": 11666 + }, + { + "epoch": 0.9415704947139053, + "grad_norm": 0.6639125943183899, + "learning_rate": 7.476420850378407e-05, + "loss": 2.5207, + "step": 11667 + }, + { + "epoch": 0.9416511984504883, + "grad_norm": 0.6770366430282593, + "learning_rate": 7.474893287990796e-05, + "loss": 2.5167, + "step": 11668 + }, + { + "epoch": 0.9417319021870713, + "grad_norm": 0.6908389925956726, + "learning_rate": 7.473365788535447e-05, + "loss": 2.4606, + "step": 11669 + }, + { + "epoch": 0.9418126059236542, + "grad_norm": 0.6625069975852966, + "learning_rate": 7.471838352050427e-05, + "loss": 2.5344, + "step": 11670 + }, + { + "epoch": 0.9418933096602373, + "grad_norm": 0.6690869331359863, + "learning_rate": 7.470310978573803e-05, + "loss": 2.4507, + "step": 11671 + }, + { + "epoch": 0.9419740133968203, + "grad_norm": 0.6741886734962463, + "learning_rate": 7.468783668143645e-05, + "loss": 2.5755, + "step": 11672 + }, + { + "epoch": 0.9420547171334033, + "grad_norm": 0.6876424551010132, + "learning_rate": 7.467256420798009e-05, + "loss": 2.483, + "step": 11673 + }, + { + "epoch": 0.9421354208699863, + "grad_norm": 0.7044318318367004, + "learning_rate": 7.465729236574965e-05, + "loss": 2.5025, + "step": 11674 + }, + { + "epoch": 0.9422161246065693, + "grad_norm": 0.6608660817146301, + "learning_rate": 7.46420211551258e-05, + "loss": 2.5253, + "step": 11675 + }, + { + "epoch": 0.9422968283431523, + "grad_norm": 0.6944260001182556, + "learning_rate": 7.4626750576489e-05, + "loss": 2.5002, + "step": 11676 + }, + { + "epoch": 0.9423775320797353, + "grad_norm": 0.7304964065551758, + "learning_rate": 7.46114806302199e-05, + "loss": 2.5501, + "step": 11677 + }, + { + "epoch": 0.9424582358163183, + "grad_norm": 0.688525378704071, + "learning_rate": 7.459621131669911e-05, + "loss": 2.5291, + "step": 11678 + }, + { + "epoch": 0.9425389395529012, + "grad_norm": 0.7388432025909424, + "learning_rate": 7.45809426363071e-05, + "loss": 2.5391, + "step": 11679 + }, + { + "epoch": 0.9426196432894843, + "grad_norm": 0.6777819991111755, + "learning_rate": 7.456567458942447e-05, + "loss": 2.5425, + "step": 11680 + }, + { + "epoch": 0.9427003470260673, + "grad_norm": 0.7208845615386963, + "learning_rate": 7.455040717643169e-05, + "loss": 2.5306, + "step": 11681 + }, + { + "epoch": 0.9427810507626503, + "grad_norm": 0.745384693145752, + "learning_rate": 7.453514039770934e-05, + "loss": 2.4695, + "step": 11682 + }, + { + "epoch": 0.9428617544992333, + "grad_norm": 0.7088115215301514, + "learning_rate": 7.451987425363782e-05, + "loss": 2.5413, + "step": 11683 + }, + { + "epoch": 0.9429424582358163, + "grad_norm": 0.7287998795509338, + "learning_rate": 7.450460874459762e-05, + "loss": 2.5773, + "step": 11684 + }, + { + "epoch": 0.9430231619723993, + "grad_norm": 0.6897092461585999, + "learning_rate": 7.448934387096928e-05, + "loss": 2.5255, + "step": 11685 + }, + { + "epoch": 0.9431038657089823, + "grad_norm": 0.6227227449417114, + "learning_rate": 7.447407963313313e-05, + "loss": 2.5027, + "step": 11686 + }, + { + "epoch": 0.9431845694455653, + "grad_norm": 0.6954305768013, + "learning_rate": 7.445881603146964e-05, + "loss": 2.5477, + "step": 11687 + }, + { + "epoch": 0.9432652731821484, + "grad_norm": 0.7860052585601807, + "learning_rate": 7.444355306635924e-05, + "loss": 2.469, + "step": 11688 + }, + { + "epoch": 0.9433459769187313, + "grad_norm": 0.6851965188980103, + "learning_rate": 7.442829073818227e-05, + "loss": 2.4997, + "step": 11689 + }, + { + "epoch": 0.9434266806553143, + "grad_norm": 0.7011744379997253, + "learning_rate": 7.441302904731916e-05, + "loss": 2.5399, + "step": 11690 + }, + { + "epoch": 0.9435073843918973, + "grad_norm": 0.703167200088501, + "learning_rate": 7.439776799415028e-05, + "loss": 2.5323, + "step": 11691 + }, + { + "epoch": 0.9435880881284804, + "grad_norm": 0.6747310161590576, + "learning_rate": 7.438250757905591e-05, + "loss": 2.5406, + "step": 11692 + }, + { + "epoch": 0.9436687918650634, + "grad_norm": 0.8631153106689453, + "learning_rate": 7.436724780241642e-05, + "loss": 2.5215, + "step": 11693 + }, + { + "epoch": 0.9437494956016463, + "grad_norm": 0.6919798254966736, + "learning_rate": 7.435198866461214e-05, + "loss": 2.4654, + "step": 11694 + }, + { + "epoch": 0.9438301993382293, + "grad_norm": 0.6747070550918579, + "learning_rate": 7.433673016602332e-05, + "loss": 2.5186, + "step": 11695 + }, + { + "epoch": 0.9439109030748124, + "grad_norm": 0.7368776798248291, + "learning_rate": 7.432147230703026e-05, + "loss": 2.5365, + "step": 11696 + }, + { + "epoch": 0.9439916068113954, + "grad_norm": 0.7443639636039734, + "learning_rate": 7.430621508801325e-05, + "loss": 2.4966, + "step": 11697 + }, + { + "epoch": 0.9440723105479784, + "grad_norm": 0.7371395230293274, + "learning_rate": 7.429095850935255e-05, + "loss": 2.4638, + "step": 11698 + }, + { + "epoch": 0.9441530142845613, + "grad_norm": 0.6917321681976318, + "learning_rate": 7.427570257142832e-05, + "loss": 2.5341, + "step": 11699 + }, + { + "epoch": 0.9442337180211444, + "grad_norm": 0.7704101800918579, + "learning_rate": 7.426044727462085e-05, + "loss": 2.5144, + "step": 11700 + }, + { + "epoch": 0.9443144217577274, + "grad_norm": 0.692197859287262, + "learning_rate": 7.424519261931036e-05, + "loss": 2.5293, + "step": 11701 + }, + { + "epoch": 0.9443951254943104, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.422993860587695e-05, + "loss": 2.5236, + "step": 11702 + }, + { + "epoch": 0.9444758292308933, + "grad_norm": 0.6955052018165588, + "learning_rate": 7.421468523470081e-05, + "loss": 2.4765, + "step": 11703 + }, + { + "epoch": 0.9445565329674764, + "grad_norm": 0.7394432425498962, + "learning_rate": 7.419943250616216e-05, + "loss": 2.5053, + "step": 11704 + }, + { + "epoch": 0.9446372367040594, + "grad_norm": 0.679044246673584, + "learning_rate": 7.418418042064108e-05, + "loss": 2.5413, + "step": 11705 + }, + { + "epoch": 0.9447179404406424, + "grad_norm": 0.7153440117835999, + "learning_rate": 7.41689289785177e-05, + "loss": 2.4938, + "step": 11706 + }, + { + "epoch": 0.9447986441772254, + "grad_norm": 0.697068452835083, + "learning_rate": 7.415367818017217e-05, + "loss": 2.5157, + "step": 11707 + }, + { + "epoch": 0.9448793479138085, + "grad_norm": 0.664616048336029, + "learning_rate": 7.41384280259845e-05, + "loss": 2.4859, + "step": 11708 + }, + { + "epoch": 0.9449600516503914, + "grad_norm": 0.7275365591049194, + "learning_rate": 7.412317851633479e-05, + "loss": 2.523, + "step": 11709 + }, + { + "epoch": 0.9450407553869744, + "grad_norm": 0.7408944368362427, + "learning_rate": 7.410792965160318e-05, + "loss": 2.4994, + "step": 11710 + }, + { + "epoch": 0.9451214591235574, + "grad_norm": 0.7222678065299988, + "learning_rate": 7.40926814321696e-05, + "loss": 2.5084, + "step": 11711 + }, + { + "epoch": 0.9452021628601405, + "grad_norm": 0.7242292761802673, + "learning_rate": 7.407743385841412e-05, + "loss": 2.5165, + "step": 11712 + }, + { + "epoch": 0.9452828665967234, + "grad_norm": 0.6634014844894409, + "learning_rate": 7.406218693071677e-05, + "loss": 2.4947, + "step": 11713 + }, + { + "epoch": 0.9453635703333064, + "grad_norm": 0.8126605153083801, + "learning_rate": 7.404694064945751e-05, + "loss": 2.5553, + "step": 11714 + }, + { + "epoch": 0.9454442740698894, + "grad_norm": 0.679344654083252, + "learning_rate": 7.403169501501632e-05, + "loss": 2.5475, + "step": 11715 + }, + { + "epoch": 0.9455249778064725, + "grad_norm": 0.7584314346313477, + "learning_rate": 7.401645002777318e-05, + "loss": 2.5498, + "step": 11716 + }, + { + "epoch": 0.9456056815430555, + "grad_norm": 0.7191590666770935, + "learning_rate": 7.400120568810806e-05, + "loss": 2.5161, + "step": 11717 + }, + { + "epoch": 0.9456863852796384, + "grad_norm": 0.6738762855529785, + "learning_rate": 7.398596199640084e-05, + "loss": 2.4819, + "step": 11718 + }, + { + "epoch": 0.9457670890162214, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.397071895303143e-05, + "loss": 2.4842, + "step": 11719 + }, + { + "epoch": 0.9458477927528045, + "grad_norm": 0.6885291337966919, + "learning_rate": 7.395547655837976e-05, + "loss": 2.5016, + "step": 11720 + }, + { + "epoch": 0.9459284964893875, + "grad_norm": 0.6807307600975037, + "learning_rate": 7.394023481282568e-05, + "loss": 2.4949, + "step": 11721 + }, + { + "epoch": 0.9460092002259705, + "grad_norm": 0.6683849096298218, + "learning_rate": 7.392499371674907e-05, + "loss": 2.4974, + "step": 11722 + }, + { + "epoch": 0.9460899039625534, + "grad_norm": 0.6615697741508484, + "learning_rate": 7.39097532705298e-05, + "loss": 2.4744, + "step": 11723 + }, + { + "epoch": 0.9461706076991365, + "grad_norm": 0.6463690996170044, + "learning_rate": 7.389451347454765e-05, + "loss": 2.478, + "step": 11724 + }, + { + "epoch": 0.9462513114357195, + "grad_norm": 0.6848269701004028, + "learning_rate": 7.387927432918247e-05, + "loss": 2.5491, + "step": 11725 + }, + { + "epoch": 0.9463320151723025, + "grad_norm": 0.7251551747322083, + "learning_rate": 7.386403583481409e-05, + "loss": 2.4936, + "step": 11726 + }, + { + "epoch": 0.9464127189088855, + "grad_norm": 0.6562095284461975, + "learning_rate": 7.384879799182223e-05, + "loss": 2.4895, + "step": 11727 + }, + { + "epoch": 0.9464934226454685, + "grad_norm": 0.6891352534294128, + "learning_rate": 7.383356080058668e-05, + "loss": 2.508, + "step": 11728 + }, + { + "epoch": 0.9465741263820515, + "grad_norm": 0.7220255136489868, + "learning_rate": 7.381832426148719e-05, + "loss": 2.5181, + "step": 11729 + }, + { + "epoch": 0.9466548301186345, + "grad_norm": 0.7213689088821411, + "learning_rate": 7.38030883749035e-05, + "loss": 2.5136, + "step": 11730 + }, + { + "epoch": 0.9467355338552175, + "grad_norm": 0.6711129546165466, + "learning_rate": 7.378785314121535e-05, + "loss": 2.5463, + "step": 11731 + }, + { + "epoch": 0.9468162375918004, + "grad_norm": 0.6380139589309692, + "learning_rate": 7.377261856080239e-05, + "loss": 2.5092, + "step": 11732 + }, + { + "epoch": 0.9468969413283835, + "grad_norm": 0.66046142578125, + "learning_rate": 7.375738463404437e-05, + "loss": 2.5561, + "step": 11733 + }, + { + "epoch": 0.9469776450649665, + "grad_norm": 0.6857354044914246, + "learning_rate": 7.37421513613209e-05, + "loss": 2.5774, + "step": 11734 + }, + { + "epoch": 0.9470583488015495, + "grad_norm": 0.6811589598655701, + "learning_rate": 7.372691874301163e-05, + "loss": 2.4918, + "step": 11735 + }, + { + "epoch": 0.9471390525381325, + "grad_norm": 0.6401017308235168, + "learning_rate": 7.37116867794963e-05, + "loss": 2.4994, + "step": 11736 + }, + { + "epoch": 0.9472197562747156, + "grad_norm": 0.6967078447341919, + "learning_rate": 7.369645547115438e-05, + "loss": 2.5809, + "step": 11737 + }, + { + "epoch": 0.9473004600112985, + "grad_norm": 0.6695219278335571, + "learning_rate": 7.368122481836557e-05, + "loss": 2.4735, + "step": 11738 + }, + { + "epoch": 0.9473811637478815, + "grad_norm": 0.6540528535842896, + "learning_rate": 7.366599482150944e-05, + "loss": 2.4998, + "step": 11739 + }, + { + "epoch": 0.9474618674844645, + "grad_norm": 0.700683057308197, + "learning_rate": 7.365076548096556e-05, + "loss": 2.5258, + "step": 11740 + }, + { + "epoch": 0.9475425712210476, + "grad_norm": 0.7125419974327087, + "learning_rate": 7.363553679711347e-05, + "loss": 2.4653, + "step": 11741 + }, + { + "epoch": 0.9476232749576305, + "grad_norm": 0.7285346984863281, + "learning_rate": 7.362030877033275e-05, + "loss": 2.5523, + "step": 11742 + }, + { + "epoch": 0.9477039786942135, + "grad_norm": 0.7310814261436462, + "learning_rate": 7.360508140100288e-05, + "loss": 2.5027, + "step": 11743 + }, + { + "epoch": 0.9477846824307965, + "grad_norm": 0.746961772441864, + "learning_rate": 7.358985468950335e-05, + "loss": 2.5485, + "step": 11744 + }, + { + "epoch": 0.9478653861673796, + "grad_norm": 0.6880186796188354, + "learning_rate": 7.357462863621369e-05, + "loss": 2.5243, + "step": 11745 + }, + { + "epoch": 0.9479460899039626, + "grad_norm": 0.6406471133232117, + "learning_rate": 7.355940324151339e-05, + "loss": 2.512, + "step": 11746 + }, + { + "epoch": 0.9480267936405455, + "grad_norm": 0.6503005027770996, + "learning_rate": 7.354417850578184e-05, + "loss": 2.5318, + "step": 11747 + }, + { + "epoch": 0.9481074973771285, + "grad_norm": 0.6458879113197327, + "learning_rate": 7.352895442939852e-05, + "loss": 2.5451, + "step": 11748 + }, + { + "epoch": 0.9481882011137116, + "grad_norm": 0.7382936477661133, + "learning_rate": 7.351373101274288e-05, + "loss": 2.5393, + "step": 11749 + }, + { + "epoch": 0.9482689048502946, + "grad_norm": 0.7366087436676025, + "learning_rate": 7.349850825619429e-05, + "loss": 2.5591, + "step": 11750 + }, + { + "epoch": 0.9483496085868776, + "grad_norm": 0.6652588248252869, + "learning_rate": 7.348328616013213e-05, + "loss": 2.5348, + "step": 11751 + }, + { + "epoch": 0.9484303123234605, + "grad_norm": 0.7515435814857483, + "learning_rate": 7.346806472493584e-05, + "loss": 2.5208, + "step": 11752 + }, + { + "epoch": 0.9485110160600436, + "grad_norm": 0.7161263227462769, + "learning_rate": 7.345284395098469e-05, + "loss": 2.5518, + "step": 11753 + }, + { + "epoch": 0.9485917197966266, + "grad_norm": 0.7433953285217285, + "learning_rate": 7.343762383865807e-05, + "loss": 2.5914, + "step": 11754 + }, + { + "epoch": 0.9486724235332096, + "grad_norm": 0.674991250038147, + "learning_rate": 7.342240438833532e-05, + "loss": 2.5566, + "step": 11755 + }, + { + "epoch": 0.9487531272697926, + "grad_norm": 0.7511670589447021, + "learning_rate": 7.34071856003957e-05, + "loss": 2.5253, + "step": 11756 + }, + { + "epoch": 0.9488338310063756, + "grad_norm": 0.6672492623329163, + "learning_rate": 7.339196747521853e-05, + "loss": 2.4887, + "step": 11757 + }, + { + "epoch": 0.9489145347429586, + "grad_norm": 0.6826158761978149, + "learning_rate": 7.337675001318312e-05, + "loss": 2.5072, + "step": 11758 + }, + { + "epoch": 0.9489952384795416, + "grad_norm": 0.7189450860023499, + "learning_rate": 7.336153321466867e-05, + "loss": 2.5583, + "step": 11759 + }, + { + "epoch": 0.9490759422161246, + "grad_norm": 0.6923015117645264, + "learning_rate": 7.33463170800544e-05, + "loss": 2.5416, + "step": 11760 + }, + { + "epoch": 0.9491566459527077, + "grad_norm": 0.690060555934906, + "learning_rate": 7.333110160971963e-05, + "loss": 2.4931, + "step": 11761 + }, + { + "epoch": 0.9492373496892906, + "grad_norm": 0.6887977719306946, + "learning_rate": 7.331588680404354e-05, + "loss": 2.4676, + "step": 11762 + }, + { + "epoch": 0.9493180534258736, + "grad_norm": 0.8573753237724304, + "learning_rate": 7.330067266340528e-05, + "loss": 2.5074, + "step": 11763 + }, + { + "epoch": 0.9493987571624566, + "grad_norm": 0.6760974526405334, + "learning_rate": 7.328545918818403e-05, + "loss": 2.5395, + "step": 11764 + }, + { + "epoch": 0.9494794608990397, + "grad_norm": 0.6946160197257996, + "learning_rate": 7.327024637875901e-05, + "loss": 2.535, + "step": 11765 + }, + { + "epoch": 0.9495601646356226, + "grad_norm": 0.6851378679275513, + "learning_rate": 7.32550342355093e-05, + "loss": 2.487, + "step": 11766 + }, + { + "epoch": 0.9496408683722056, + "grad_norm": 0.6480168104171753, + "learning_rate": 7.323982275881404e-05, + "loss": 2.513, + "step": 11767 + }, + { + "epoch": 0.9497215721087886, + "grad_norm": 0.6492218971252441, + "learning_rate": 7.322461194905239e-05, + "loss": 2.4532, + "step": 11768 + }, + { + "epoch": 0.9498022758453717, + "grad_norm": 0.6670051217079163, + "learning_rate": 7.320940180660337e-05, + "loss": 2.5258, + "step": 11769 + }, + { + "epoch": 0.9498829795819547, + "grad_norm": 0.6678066253662109, + "learning_rate": 7.319419233184608e-05, + "loss": 2.5388, + "step": 11770 + }, + { + "epoch": 0.9499636833185376, + "grad_norm": 0.693545937538147, + "learning_rate": 7.31789835251596e-05, + "loss": 2.5304, + "step": 11771 + }, + { + "epoch": 0.9500443870551206, + "grad_norm": 0.680486798286438, + "learning_rate": 7.316377538692297e-05, + "loss": 2.5024, + "step": 11772 + }, + { + "epoch": 0.9501250907917037, + "grad_norm": 0.7271847128868103, + "learning_rate": 7.314856791751518e-05, + "loss": 2.5947, + "step": 11773 + }, + { + "epoch": 0.9502057945282867, + "grad_norm": 0.6889839172363281, + "learning_rate": 7.31333611173153e-05, + "loss": 2.5135, + "step": 11774 + }, + { + "epoch": 0.9502864982648697, + "grad_norm": 0.7431777119636536, + "learning_rate": 7.311815498670226e-05, + "loss": 2.5856, + "step": 11775 + }, + { + "epoch": 0.9503672020014526, + "grad_norm": 0.7168101072311401, + "learning_rate": 7.310294952605508e-05, + "loss": 2.4383, + "step": 11776 + }, + { + "epoch": 0.9504479057380357, + "grad_norm": 0.654803454875946, + "learning_rate": 7.308774473575271e-05, + "loss": 2.4908, + "step": 11777 + }, + { + "epoch": 0.9505286094746187, + "grad_norm": 0.6810718774795532, + "learning_rate": 7.307254061617412e-05, + "loss": 2.5073, + "step": 11778 + }, + { + "epoch": 0.9506093132112017, + "grad_norm": 0.637980043888092, + "learning_rate": 7.305733716769817e-05, + "loss": 2.5686, + "step": 11779 + }, + { + "epoch": 0.9506900169477847, + "grad_norm": 0.6549471020698547, + "learning_rate": 7.30421343907038e-05, + "loss": 2.5502, + "step": 11780 + }, + { + "epoch": 0.9507707206843676, + "grad_norm": 0.7087163329124451, + "learning_rate": 7.302693228556994e-05, + "loss": 2.4773, + "step": 11781 + }, + { + "epoch": 0.9508514244209507, + "grad_norm": 0.6230717897415161, + "learning_rate": 7.301173085267541e-05, + "loss": 2.4806, + "step": 11782 + }, + { + "epoch": 0.9509321281575337, + "grad_norm": 0.7145688533782959, + "learning_rate": 7.299653009239911e-05, + "loss": 2.5259, + "step": 11783 + }, + { + "epoch": 0.9510128318941167, + "grad_norm": 0.679100513458252, + "learning_rate": 7.298133000511988e-05, + "loss": 2.5012, + "step": 11784 + }, + { + "epoch": 0.9510935356306996, + "grad_norm": 0.7057691216468811, + "learning_rate": 7.29661305912165e-05, + "loss": 2.4826, + "step": 11785 + }, + { + "epoch": 0.9511742393672827, + "grad_norm": 0.65343177318573, + "learning_rate": 7.295093185106782e-05, + "loss": 2.4553, + "step": 11786 + }, + { + "epoch": 0.9512549431038657, + "grad_norm": 0.7948461174964905, + "learning_rate": 7.293573378505268e-05, + "loss": 2.478, + "step": 11787 + }, + { + "epoch": 0.9513356468404487, + "grad_norm": 0.6511468887329102, + "learning_rate": 7.292053639354975e-05, + "loss": 2.4862, + "step": 11788 + }, + { + "epoch": 0.9514163505770317, + "grad_norm": 0.7293919324874878, + "learning_rate": 7.290533967693782e-05, + "loss": 2.5956, + "step": 11789 + }, + { + "epoch": 0.9514970543136148, + "grad_norm": 0.6691277623176575, + "learning_rate": 7.289014363559567e-05, + "loss": 2.5659, + "step": 11790 + }, + { + "epoch": 0.9515777580501977, + "grad_norm": 0.7054625749588013, + "learning_rate": 7.287494826990203e-05, + "loss": 2.5875, + "step": 11791 + }, + { + "epoch": 0.9516584617867807, + "grad_norm": 0.6597220301628113, + "learning_rate": 7.285975358023555e-05, + "loss": 2.5215, + "step": 11792 + }, + { + "epoch": 0.9517391655233637, + "grad_norm": 0.6719489097595215, + "learning_rate": 7.284455956697497e-05, + "loss": 2.4752, + "step": 11793 + }, + { + "epoch": 0.9518198692599468, + "grad_norm": 0.7325637340545654, + "learning_rate": 7.2829366230499e-05, + "loss": 2.5504, + "step": 11794 + }, + { + "epoch": 0.9519005729965297, + "grad_norm": 0.637668788433075, + "learning_rate": 7.281417357118619e-05, + "loss": 2.5105, + "step": 11795 + }, + { + "epoch": 0.9519812767331127, + "grad_norm": 0.7815340161323547, + "learning_rate": 7.279898158941525e-05, + "loss": 2.4998, + "step": 11796 + }, + { + "epoch": 0.9520619804696957, + "grad_norm": 0.6555821299552917, + "learning_rate": 7.278379028556481e-05, + "loss": 2.4326, + "step": 11797 + }, + { + "epoch": 0.9521426842062788, + "grad_norm": 0.7298933863639832, + "learning_rate": 7.276859966001344e-05, + "loss": 2.4779, + "step": 11798 + }, + { + "epoch": 0.9522233879428618, + "grad_norm": 0.683455765247345, + "learning_rate": 7.275340971313974e-05, + "loss": 2.4416, + "step": 11799 + }, + { + "epoch": 0.9523040916794447, + "grad_norm": 0.6353151798248291, + "learning_rate": 7.273822044532232e-05, + "loss": 2.4777, + "step": 11800 + }, + { + "epoch": 0.9523847954160277, + "grad_norm": 0.6898894309997559, + "learning_rate": 7.27230318569397e-05, + "loss": 2.5351, + "step": 11801 + }, + { + "epoch": 0.9524654991526108, + "grad_norm": 0.6528690457344055, + "learning_rate": 7.270784394837041e-05, + "loss": 2.5145, + "step": 11802 + }, + { + "epoch": 0.9525462028891938, + "grad_norm": 0.6432619094848633, + "learning_rate": 7.269265671999304e-05, + "loss": 2.5002, + "step": 11803 + }, + { + "epoch": 0.9526269066257768, + "grad_norm": 0.7317861318588257, + "learning_rate": 7.267747017218601e-05, + "loss": 2.5318, + "step": 11804 + }, + { + "epoch": 0.9527076103623597, + "grad_norm": 0.7581185698509216, + "learning_rate": 7.266228430532785e-05, + "loss": 2.5313, + "step": 11805 + }, + { + "epoch": 0.9527883140989428, + "grad_norm": 0.7316486239433289, + "learning_rate": 7.264709911979702e-05, + "loss": 2.5147, + "step": 11806 + }, + { + "epoch": 0.9528690178355258, + "grad_norm": 0.7378978729248047, + "learning_rate": 7.263191461597199e-05, + "loss": 2.5149, + "step": 11807 + }, + { + "epoch": 0.9529497215721088, + "grad_norm": 0.6603738069534302, + "learning_rate": 7.26167307942312e-05, + "loss": 2.4684, + "step": 11808 + }, + { + "epoch": 0.9530304253086918, + "grad_norm": 0.7566502690315247, + "learning_rate": 7.260154765495302e-05, + "loss": 2.5535, + "step": 11809 + }, + { + "epoch": 0.9531111290452748, + "grad_norm": 0.693067729473114, + "learning_rate": 7.258636519851596e-05, + "loss": 2.5103, + "step": 11810 + }, + { + "epoch": 0.9531918327818578, + "grad_norm": 0.7049208283424377, + "learning_rate": 7.257118342529826e-05, + "loss": 2.5482, + "step": 11811 + }, + { + "epoch": 0.9532725365184408, + "grad_norm": 0.6986998319625854, + "learning_rate": 7.25560023356784e-05, + "loss": 2.4921, + "step": 11812 + }, + { + "epoch": 0.9533532402550238, + "grad_norm": 0.7079482674598694, + "learning_rate": 7.254082193003476e-05, + "loss": 2.5339, + "step": 11813 + }, + { + "epoch": 0.9534339439916069, + "grad_norm": 0.7283922433853149, + "learning_rate": 7.252564220874553e-05, + "loss": 2.5056, + "step": 11814 + }, + { + "epoch": 0.9535146477281898, + "grad_norm": 0.6965533494949341, + "learning_rate": 7.251046317218914e-05, + "loss": 2.5512, + "step": 11815 + }, + { + "epoch": 0.9535953514647728, + "grad_norm": 0.7367159128189087, + "learning_rate": 7.24952848207439e-05, + "loss": 2.5015, + "step": 11816 + }, + { + "epoch": 0.9536760552013558, + "grad_norm": 0.6959818601608276, + "learning_rate": 7.248010715478802e-05, + "loss": 2.4969, + "step": 11817 + }, + { + "epoch": 0.9537567589379389, + "grad_norm": 0.69304358959198, + "learning_rate": 7.246493017469981e-05, + "loss": 2.5098, + "step": 11818 + }, + { + "epoch": 0.9538374626745219, + "grad_norm": 0.6830596327781677, + "learning_rate": 7.244975388085757e-05, + "loss": 2.5206, + "step": 11819 + }, + { + "epoch": 0.9539181664111048, + "grad_norm": 0.7354303598403931, + "learning_rate": 7.243457827363944e-05, + "loss": 2.5223, + "step": 11820 + }, + { + "epoch": 0.9539988701476878, + "grad_norm": 0.7046182751655579, + "learning_rate": 7.241940335342366e-05, + "loss": 2.4931, + "step": 11821 + }, + { + "epoch": 0.9540795738842709, + "grad_norm": 0.6990540623664856, + "learning_rate": 7.240422912058843e-05, + "loss": 2.4302, + "step": 11822 + }, + { + "epoch": 0.9541602776208539, + "grad_norm": 0.7562115788459778, + "learning_rate": 7.238905557551202e-05, + "loss": 2.5118, + "step": 11823 + }, + { + "epoch": 0.9542409813574368, + "grad_norm": 0.8212862014770508, + "learning_rate": 7.237388271857248e-05, + "loss": 2.5476, + "step": 11824 + }, + { + "epoch": 0.9543216850940198, + "grad_norm": 0.7095397710800171, + "learning_rate": 7.235871055014798e-05, + "loss": 2.5073, + "step": 11825 + }, + { + "epoch": 0.9544023888306029, + "grad_norm": 0.7174660563468933, + "learning_rate": 7.23435390706167e-05, + "loss": 2.4553, + "step": 11826 + }, + { + "epoch": 0.9544830925671859, + "grad_norm": 0.7121314406394958, + "learning_rate": 7.23283682803567e-05, + "loss": 2.5164, + "step": 11827 + }, + { + "epoch": 0.9545637963037689, + "grad_norm": 0.7354126572608948, + "learning_rate": 7.231319817974609e-05, + "loss": 2.5413, + "step": 11828 + }, + { + "epoch": 0.9546445000403518, + "grad_norm": 0.7770543694496155, + "learning_rate": 7.2298028769163e-05, + "loss": 2.5244, + "step": 11829 + }, + { + "epoch": 0.9547252037769349, + "grad_norm": 0.6770393252372742, + "learning_rate": 7.228286004898541e-05, + "loss": 2.4707, + "step": 11830 + }, + { + "epoch": 0.9548059075135179, + "grad_norm": 0.6916880011558533, + "learning_rate": 7.22676920195914e-05, + "loss": 2.506, + "step": 11831 + }, + { + "epoch": 0.9548866112501009, + "grad_norm": 0.6299161314964294, + "learning_rate": 7.225252468135901e-05, + "loss": 2.5042, + "step": 11832 + }, + { + "epoch": 0.9549673149866839, + "grad_norm": 0.7081227898597717, + "learning_rate": 7.223735803466623e-05, + "loss": 2.5537, + "step": 11833 + }, + { + "epoch": 0.9550480187232668, + "grad_norm": 0.6600900888442993, + "learning_rate": 7.222219207989104e-05, + "loss": 2.5329, + "step": 11834 + }, + { + "epoch": 0.9551287224598499, + "grad_norm": 0.6715366244316101, + "learning_rate": 7.22070268174115e-05, + "loss": 2.5273, + "step": 11835 + }, + { + "epoch": 0.9552094261964329, + "grad_norm": 0.6655930280685425, + "learning_rate": 7.219186224760543e-05, + "loss": 2.4254, + "step": 11836 + }, + { + "epoch": 0.9552901299330159, + "grad_norm": 0.6925715208053589, + "learning_rate": 7.217669837085088e-05, + "loss": 2.5104, + "step": 11837 + }, + { + "epoch": 0.9553708336695989, + "grad_norm": 0.7132978439331055, + "learning_rate": 7.216153518752571e-05, + "loss": 2.5238, + "step": 11838 + }, + { + "epoch": 0.9554515374061819, + "grad_norm": 0.661651611328125, + "learning_rate": 7.214637269800791e-05, + "loss": 2.445, + "step": 11839 + }, + { + "epoch": 0.9555322411427649, + "grad_norm": 0.6635430455207825, + "learning_rate": 7.213121090267528e-05, + "loss": 2.4707, + "step": 11840 + }, + { + "epoch": 0.9556129448793479, + "grad_norm": 0.6303616166114807, + "learning_rate": 7.211604980190571e-05, + "loss": 2.4923, + "step": 11841 + }, + { + "epoch": 0.9556936486159309, + "grad_norm": 0.7027459144592285, + "learning_rate": 7.210088939607708e-05, + "loss": 2.5592, + "step": 11842 + }, + { + "epoch": 0.955774352352514, + "grad_norm": 0.6539996862411499, + "learning_rate": 7.208572968556722e-05, + "loss": 2.5256, + "step": 11843 + }, + { + "epoch": 0.9558550560890969, + "grad_norm": 0.7019872069358826, + "learning_rate": 7.207057067075393e-05, + "loss": 2.488, + "step": 11844 + }, + { + "epoch": 0.9559357598256799, + "grad_norm": 0.6848211288452148, + "learning_rate": 7.205541235201507e-05, + "loss": 2.4883, + "step": 11845 + }, + { + "epoch": 0.9560164635622629, + "grad_norm": 0.7806351184844971, + "learning_rate": 7.204025472972834e-05, + "loss": 2.5563, + "step": 11846 + }, + { + "epoch": 0.956097167298846, + "grad_norm": 0.7327724695205688, + "learning_rate": 7.202509780427156e-05, + "loss": 2.5275, + "step": 11847 + }, + { + "epoch": 0.956177871035429, + "grad_norm": 0.6805681586265564, + "learning_rate": 7.200994157602248e-05, + "loss": 2.4723, + "step": 11848 + }, + { + "epoch": 0.9562585747720119, + "grad_norm": 0.7053409814834595, + "learning_rate": 7.19947860453588e-05, + "loss": 2.4471, + "step": 11849 + }, + { + "epoch": 0.9563392785085949, + "grad_norm": 0.6783127188682556, + "learning_rate": 7.197963121265826e-05, + "loss": 2.4586, + "step": 11850 + }, + { + "epoch": 0.956419982245178, + "grad_norm": 0.6639916300773621, + "learning_rate": 7.196447707829857e-05, + "loss": 2.4966, + "step": 11851 + }, + { + "epoch": 0.956500685981761, + "grad_norm": 0.684066891670227, + "learning_rate": 7.194932364265739e-05, + "loss": 2.5676, + "step": 11852 + }, + { + "epoch": 0.9565813897183439, + "grad_norm": 0.7872990965843201, + "learning_rate": 7.193417090611239e-05, + "loss": 2.5101, + "step": 11853 + }, + { + "epoch": 0.9566620934549269, + "grad_norm": 0.7543401122093201, + "learning_rate": 7.19190188690412e-05, + "loss": 2.5503, + "step": 11854 + }, + { + "epoch": 0.95674279719151, + "grad_norm": 0.6514382362365723, + "learning_rate": 7.190386753182152e-05, + "loss": 2.4902, + "step": 11855 + }, + { + "epoch": 0.956823500928093, + "grad_norm": 0.6867108345031738, + "learning_rate": 7.188871689483087e-05, + "loss": 2.5054, + "step": 11856 + }, + { + "epoch": 0.956904204664676, + "grad_norm": 0.6536040306091309, + "learning_rate": 7.187356695844687e-05, + "loss": 2.5462, + "step": 11857 + }, + { + "epoch": 0.9569849084012589, + "grad_norm": 0.690237820148468, + "learning_rate": 7.185841772304711e-05, + "loss": 2.5673, + "step": 11858 + }, + { + "epoch": 0.957065612137842, + "grad_norm": 0.6699091196060181, + "learning_rate": 7.184326918900915e-05, + "loss": 2.4733, + "step": 11859 + }, + { + "epoch": 0.957146315874425, + "grad_norm": 0.6482241153717041, + "learning_rate": 7.18281213567105e-05, + "loss": 2.4897, + "step": 11860 + }, + { + "epoch": 0.957227019611008, + "grad_norm": 0.686130166053772, + "learning_rate": 7.181297422652874e-05, + "loss": 2.4596, + "step": 11861 + }, + { + "epoch": 0.957307723347591, + "grad_norm": 0.6507205367088318, + "learning_rate": 7.179782779884132e-05, + "loss": 2.5527, + "step": 11862 + }, + { + "epoch": 0.957388427084174, + "grad_norm": 0.6578813195228577, + "learning_rate": 7.178268207402577e-05, + "loss": 2.4975, + "step": 11863 + }, + { + "epoch": 0.957469130820757, + "grad_norm": 0.6931977272033691, + "learning_rate": 7.176753705245956e-05, + "loss": 2.5533, + "step": 11864 + }, + { + "epoch": 0.95754983455734, + "grad_norm": 0.7306256890296936, + "learning_rate": 7.17523927345201e-05, + "loss": 2.534, + "step": 11865 + }, + { + "epoch": 0.957630538293923, + "grad_norm": 0.6337448358535767, + "learning_rate": 7.173724912058483e-05, + "loss": 2.5015, + "step": 11866 + }, + { + "epoch": 0.9577112420305061, + "grad_norm": 0.6561456322669983, + "learning_rate": 7.172210621103124e-05, + "loss": 2.4946, + "step": 11867 + }, + { + "epoch": 0.957791945767089, + "grad_norm": 0.6341130137443542, + "learning_rate": 7.170696400623666e-05, + "loss": 2.5611, + "step": 11868 + }, + { + "epoch": 0.957872649503672, + "grad_norm": 0.7202804088592529, + "learning_rate": 7.169182250657849e-05, + "loss": 2.5209, + "step": 11869 + }, + { + "epoch": 0.957953353240255, + "grad_norm": 0.6620556712150574, + "learning_rate": 7.167668171243408e-05, + "loss": 2.4895, + "step": 11870 + }, + { + "epoch": 0.9580340569768381, + "grad_norm": 0.6842508912086487, + "learning_rate": 7.166154162418087e-05, + "loss": 2.4417, + "step": 11871 + }, + { + "epoch": 0.958114760713421, + "grad_norm": 0.7539907693862915, + "learning_rate": 7.164640224219608e-05, + "loss": 2.5153, + "step": 11872 + }, + { + "epoch": 0.958195464450004, + "grad_norm": 0.6524286270141602, + "learning_rate": 7.163126356685703e-05, + "loss": 2.509, + "step": 11873 + }, + { + "epoch": 0.958276168186587, + "grad_norm": 0.7022691965103149, + "learning_rate": 7.16161255985411e-05, + "loss": 2.5223, + "step": 11874 + }, + { + "epoch": 0.9583568719231701, + "grad_norm": 0.6659076809883118, + "learning_rate": 7.160098833762549e-05, + "loss": 2.5231, + "step": 11875 + }, + { + "epoch": 0.9584375756597531, + "grad_norm": 0.6756494641304016, + "learning_rate": 7.15858517844875e-05, + "loss": 2.5017, + "step": 11876 + }, + { + "epoch": 0.958518279396336, + "grad_norm": 0.729850709438324, + "learning_rate": 7.157071593950436e-05, + "loss": 2.4583, + "step": 11877 + }, + { + "epoch": 0.958598983132919, + "grad_norm": 0.7155230641365051, + "learning_rate": 7.155558080305326e-05, + "loss": 2.4753, + "step": 11878 + }, + { + "epoch": 0.9586796868695021, + "grad_norm": 0.6553284525871277, + "learning_rate": 7.154044637551147e-05, + "loss": 2.5093, + "step": 11879 + }, + { + "epoch": 0.9587603906060851, + "grad_norm": 0.6516379117965698, + "learning_rate": 7.152531265725617e-05, + "loss": 2.4996, + "step": 11880 + }, + { + "epoch": 0.9588410943426681, + "grad_norm": 0.6871184706687927, + "learning_rate": 7.151017964866449e-05, + "loss": 2.5322, + "step": 11881 + }, + { + "epoch": 0.958921798079251, + "grad_norm": 0.6998933553695679, + "learning_rate": 7.149504735011358e-05, + "loss": 2.5328, + "step": 11882 + }, + { + "epoch": 0.959002501815834, + "grad_norm": 0.7065120935440063, + "learning_rate": 7.147991576198065e-05, + "loss": 2.5251, + "step": 11883 + }, + { + "epoch": 0.9590832055524171, + "grad_norm": 0.6718337535858154, + "learning_rate": 7.146478488464275e-05, + "loss": 2.5596, + "step": 11884 + }, + { + "epoch": 0.9591639092890001, + "grad_norm": 0.6394883990287781, + "learning_rate": 7.144965471847698e-05, + "loss": 2.5022, + "step": 11885 + }, + { + "epoch": 0.9592446130255831, + "grad_norm": 0.6867207288742065, + "learning_rate": 7.143452526386045e-05, + "loss": 2.4927, + "step": 11886 + }, + { + "epoch": 0.959325316762166, + "grad_norm": 0.6710157990455627, + "learning_rate": 7.141939652117026e-05, + "loss": 2.5127, + "step": 11887 + }, + { + "epoch": 0.9594060204987491, + "grad_norm": 0.6286540627479553, + "learning_rate": 7.14042684907834e-05, + "loss": 2.4966, + "step": 11888 + }, + { + "epoch": 0.9594867242353321, + "grad_norm": 0.7295787334442139, + "learning_rate": 7.13891411730769e-05, + "loss": 2.5127, + "step": 11889 + }, + { + "epoch": 0.9595674279719151, + "grad_norm": 0.646084189414978, + "learning_rate": 7.137401456842784e-05, + "loss": 2.5575, + "step": 11890 + }, + { + "epoch": 0.959648131708498, + "grad_norm": 0.7884495258331299, + "learning_rate": 7.135888867721312e-05, + "loss": 2.4807, + "step": 11891 + }, + { + "epoch": 0.9597288354450811, + "grad_norm": 0.638469934463501, + "learning_rate": 7.134376349980977e-05, + "loss": 2.4989, + "step": 11892 + }, + { + "epoch": 0.9598095391816641, + "grad_norm": 0.6802849769592285, + "learning_rate": 7.132863903659476e-05, + "loss": 2.5139, + "step": 11893 + }, + { + "epoch": 0.9598902429182471, + "grad_norm": 0.6657521724700928, + "learning_rate": 7.131351528794499e-05, + "loss": 2.4488, + "step": 11894 + }, + { + "epoch": 0.9599709466548301, + "grad_norm": 0.6537562012672424, + "learning_rate": 7.129839225423741e-05, + "loss": 2.4664, + "step": 11895 + }, + { + "epoch": 0.9600516503914132, + "grad_norm": 0.689637303352356, + "learning_rate": 7.128326993584897e-05, + "loss": 2.582, + "step": 11896 + }, + { + "epoch": 0.9601323541279961, + "grad_norm": 0.6701640486717224, + "learning_rate": 7.126814833315646e-05, + "loss": 2.4963, + "step": 11897 + }, + { + "epoch": 0.9602130578645791, + "grad_norm": 0.7466658353805542, + "learning_rate": 7.125302744653677e-05, + "loss": 2.5015, + "step": 11898 + }, + { + "epoch": 0.9602937616011621, + "grad_norm": 0.6487225294113159, + "learning_rate": 7.123790727636685e-05, + "loss": 2.5393, + "step": 11899 + }, + { + "epoch": 0.9603744653377452, + "grad_norm": 0.7204654216766357, + "learning_rate": 7.122278782302343e-05, + "loss": 2.4668, + "step": 11900 + }, + { + "epoch": 0.9604551690743282, + "grad_norm": 0.6852861046791077, + "learning_rate": 7.120766908688336e-05, + "loss": 2.5893, + "step": 11901 + }, + { + "epoch": 0.9605358728109111, + "grad_norm": 0.6483901739120483, + "learning_rate": 7.119255106832344e-05, + "loss": 2.48, + "step": 11902 + }, + { + "epoch": 0.9606165765474941, + "grad_norm": 0.6670375466346741, + "learning_rate": 7.117743376772049e-05, + "loss": 2.5225, + "step": 11903 + }, + { + "epoch": 0.9606972802840772, + "grad_norm": 0.6805974841117859, + "learning_rate": 7.116231718545118e-05, + "loss": 2.4652, + "step": 11904 + }, + { + "epoch": 0.9607779840206602, + "grad_norm": 0.6700397729873657, + "learning_rate": 7.114720132189232e-05, + "loss": 2.5115, + "step": 11905 + }, + { + "epoch": 0.9608586877572431, + "grad_norm": 0.7167409062385559, + "learning_rate": 7.113208617742066e-05, + "loss": 2.5062, + "step": 11906 + }, + { + "epoch": 0.9609393914938261, + "grad_norm": 0.7337077856063843, + "learning_rate": 7.111697175241286e-05, + "loss": 2.5768, + "step": 11907 + }, + { + "epoch": 0.9610200952304092, + "grad_norm": 0.6681819558143616, + "learning_rate": 7.110185804724558e-05, + "loss": 2.5058, + "step": 11908 + }, + { + "epoch": 0.9611007989669922, + "grad_norm": 0.7235603332519531, + "learning_rate": 7.10867450622956e-05, + "loss": 2.4606, + "step": 11909 + }, + { + "epoch": 0.9611815027035752, + "grad_norm": 0.6931360363960266, + "learning_rate": 7.107163279793947e-05, + "loss": 2.5129, + "step": 11910 + }, + { + "epoch": 0.9612622064401581, + "grad_norm": 0.7331648468971252, + "learning_rate": 7.105652125455388e-05, + "loss": 2.4916, + "step": 11911 + }, + { + "epoch": 0.9613429101767412, + "grad_norm": 0.6538143754005432, + "learning_rate": 7.104141043251545e-05, + "loss": 2.5184, + "step": 11912 + }, + { + "epoch": 0.9614236139133242, + "grad_norm": 0.7018921375274658, + "learning_rate": 7.102630033220077e-05, + "loss": 2.5446, + "step": 11913 + }, + { + "epoch": 0.9615043176499072, + "grad_norm": 0.7528507709503174, + "learning_rate": 7.10111909539864e-05, + "loss": 2.4404, + "step": 11914 + }, + { + "epoch": 0.9615850213864902, + "grad_norm": 0.7258831858634949, + "learning_rate": 7.099608229824894e-05, + "loss": 2.4758, + "step": 11915 + }, + { + "epoch": 0.9616657251230732, + "grad_norm": 0.6954349875450134, + "learning_rate": 7.098097436536498e-05, + "loss": 2.4894, + "step": 11916 + }, + { + "epoch": 0.9617464288596562, + "grad_norm": 0.691584050655365, + "learning_rate": 7.096586715571092e-05, + "loss": 2.544, + "step": 11917 + }, + { + "epoch": 0.9618271325962392, + "grad_norm": 0.7107009291648865, + "learning_rate": 7.095076066966337e-05, + "loss": 2.4994, + "step": 11918 + }, + { + "epoch": 0.9619078363328222, + "grad_norm": 0.6492058634757996, + "learning_rate": 7.093565490759881e-05, + "loss": 2.5751, + "step": 11919 + }, + { + "epoch": 0.9619885400694053, + "grad_norm": 0.6817753314971924, + "learning_rate": 7.092054986989371e-05, + "loss": 2.5129, + "step": 11920 + }, + { + "epoch": 0.9620692438059882, + "grad_norm": 0.6991822123527527, + "learning_rate": 7.090544555692448e-05, + "loss": 2.5728, + "step": 11921 + }, + { + "epoch": 0.9621499475425712, + "grad_norm": 0.6627625226974487, + "learning_rate": 7.089034196906768e-05, + "loss": 2.4479, + "step": 11922 + }, + { + "epoch": 0.9622306512791542, + "grad_norm": 0.6889652013778687, + "learning_rate": 7.087523910669957e-05, + "loss": 2.5323, + "step": 11923 + }, + { + "epoch": 0.9623113550157373, + "grad_norm": 0.7863786816596985, + "learning_rate": 7.086013697019667e-05, + "loss": 2.5146, + "step": 11924 + }, + { + "epoch": 0.9623920587523203, + "grad_norm": 0.6885324716567993, + "learning_rate": 7.084503555993536e-05, + "loss": 2.5072, + "step": 11925 + }, + { + "epoch": 0.9624727624889032, + "grad_norm": 0.619239091873169, + "learning_rate": 7.082993487629192e-05, + "loss": 2.4622, + "step": 11926 + }, + { + "epoch": 0.9625534662254862, + "grad_norm": 0.6762447953224182, + "learning_rate": 7.081483491964278e-05, + "loss": 2.5155, + "step": 11927 + }, + { + "epoch": 0.9626341699620693, + "grad_norm": 0.6559715867042542, + "learning_rate": 7.079973569036424e-05, + "loss": 2.4729, + "step": 11928 + }, + { + "epoch": 0.9627148736986523, + "grad_norm": 0.633280873298645, + "learning_rate": 7.078463718883261e-05, + "loss": 2.4715, + "step": 11929 + }, + { + "epoch": 0.9627955774352353, + "grad_norm": 0.7740094065666199, + "learning_rate": 7.07695394154242e-05, + "loss": 2.4871, + "step": 11930 + }, + { + "epoch": 0.9628762811718182, + "grad_norm": 0.7103284597396851, + "learning_rate": 7.075444237051527e-05, + "loss": 2.5299, + "step": 11931 + }, + { + "epoch": 0.9629569849084013, + "grad_norm": 0.6800934076309204, + "learning_rate": 7.073934605448212e-05, + "loss": 2.5919, + "step": 11932 + }, + { + "epoch": 0.9630376886449843, + "grad_norm": 0.6680917143821716, + "learning_rate": 7.072425046770092e-05, + "loss": 2.4942, + "step": 11933 + }, + { + "epoch": 0.9631183923815673, + "grad_norm": 0.7248062491416931, + "learning_rate": 7.070915561054792e-05, + "loss": 2.4956, + "step": 11934 + }, + { + "epoch": 0.9631990961181502, + "grad_norm": 0.6635782122612, + "learning_rate": 7.069406148339936e-05, + "loss": 2.4658, + "step": 11935 + }, + { + "epoch": 0.9632797998547332, + "grad_norm": 0.6751061081886292, + "learning_rate": 7.067896808663137e-05, + "loss": 2.4912, + "step": 11936 + }, + { + "epoch": 0.9633605035913163, + "grad_norm": 0.7476027607917786, + "learning_rate": 7.066387542062013e-05, + "loss": 2.4858, + "step": 11937 + }, + { + "epoch": 0.9634412073278993, + "grad_norm": 0.6770931482315063, + "learning_rate": 7.064878348574183e-05, + "loss": 2.4574, + "step": 11938 + }, + { + "epoch": 0.9635219110644823, + "grad_norm": 0.7105392813682556, + "learning_rate": 7.063369228237255e-05, + "loss": 2.5523, + "step": 11939 + }, + { + "epoch": 0.9636026148010652, + "grad_norm": 0.6806207299232483, + "learning_rate": 7.061860181088842e-05, + "loss": 2.4992, + "step": 11940 + }, + { + "epoch": 0.9636833185376483, + "grad_norm": 0.7059600353240967, + "learning_rate": 7.060351207166558e-05, + "loss": 2.5778, + "step": 11941 + }, + { + "epoch": 0.9637640222742313, + "grad_norm": 0.6306884288787842, + "learning_rate": 7.058842306508002e-05, + "loss": 2.5389, + "step": 11942 + }, + { + "epoch": 0.9638447260108143, + "grad_norm": 0.6997150778770447, + "learning_rate": 7.057333479150783e-05, + "loss": 2.5077, + "step": 11943 + }, + { + "epoch": 0.9639254297473973, + "grad_norm": 0.7073743343353271, + "learning_rate": 7.05582472513251e-05, + "loss": 2.5274, + "step": 11944 + }, + { + "epoch": 0.9640061334839803, + "grad_norm": 0.6768803596496582, + "learning_rate": 7.054316044490777e-05, + "loss": 2.5155, + "step": 11945 + }, + { + "epoch": 0.9640868372205633, + "grad_norm": 0.6792057752609253, + "learning_rate": 7.052807437263189e-05, + "loss": 2.5509, + "step": 11946 + }, + { + "epoch": 0.9641675409571463, + "grad_norm": 0.6883981823921204, + "learning_rate": 7.051298903487344e-05, + "loss": 2.5176, + "step": 11947 + }, + { + "epoch": 0.9642482446937293, + "grad_norm": 0.6934401392936707, + "learning_rate": 7.049790443200844e-05, + "loss": 2.502, + "step": 11948 + }, + { + "epoch": 0.9643289484303124, + "grad_norm": 0.6882597804069519, + "learning_rate": 7.048282056441269e-05, + "loss": 2.487, + "step": 11949 + }, + { + "epoch": 0.9644096521668953, + "grad_norm": 0.6972896456718445, + "learning_rate": 7.046773743246225e-05, + "loss": 2.5304, + "step": 11950 + }, + { + "epoch": 0.9644903559034783, + "grad_norm": 0.6591988205909729, + "learning_rate": 7.045265503653303e-05, + "loss": 2.4734, + "step": 11951 + }, + { + "epoch": 0.9645710596400613, + "grad_norm": 0.6890063285827637, + "learning_rate": 7.043757337700082e-05, + "loss": 2.5289, + "step": 11952 + }, + { + "epoch": 0.9646517633766444, + "grad_norm": 0.6931065917015076, + "learning_rate": 7.042249245424157e-05, + "loss": 2.484, + "step": 11953 + }, + { + "epoch": 0.9647324671132274, + "grad_norm": 0.6943762898445129, + "learning_rate": 7.040741226863117e-05, + "loss": 2.501, + "step": 11954 + }, + { + "epoch": 0.9648131708498103, + "grad_norm": 0.677154004573822, + "learning_rate": 7.039233282054536e-05, + "loss": 2.4976, + "step": 11955 + }, + { + "epoch": 0.9648938745863933, + "grad_norm": 0.6662883758544922, + "learning_rate": 7.037725411036003e-05, + "loss": 2.4928, + "step": 11956 + }, + { + "epoch": 0.9649745783229764, + "grad_norm": 0.6854663491249084, + "learning_rate": 7.0362176138451e-05, + "loss": 2.4657, + "step": 11957 + }, + { + "epoch": 0.9650552820595594, + "grad_norm": 0.6703238487243652, + "learning_rate": 7.034709890519397e-05, + "loss": 2.4879, + "step": 11958 + }, + { + "epoch": 0.9651359857961423, + "grad_norm": 0.7023652791976929, + "learning_rate": 7.033202241096474e-05, + "loss": 2.4619, + "step": 11959 + }, + { + "epoch": 0.9652166895327253, + "grad_norm": 0.6950454711914062, + "learning_rate": 7.031694665613911e-05, + "loss": 2.5125, + "step": 11960 + }, + { + "epoch": 0.9652973932693084, + "grad_norm": 0.6740411520004272, + "learning_rate": 7.030187164109272e-05, + "loss": 2.436, + "step": 11961 + }, + { + "epoch": 0.9653780970058914, + "grad_norm": 0.6697152256965637, + "learning_rate": 7.028679736620132e-05, + "loss": 2.5513, + "step": 11962 + }, + { + "epoch": 0.9654588007424744, + "grad_norm": 0.6920599937438965, + "learning_rate": 7.027172383184061e-05, + "loss": 2.5264, + "step": 11963 + }, + { + "epoch": 0.9655395044790573, + "grad_norm": 0.6493465304374695, + "learning_rate": 7.025665103838627e-05, + "loss": 2.4834, + "step": 11964 + }, + { + "epoch": 0.9656202082156404, + "grad_norm": 0.684092104434967, + "learning_rate": 7.02415789862139e-05, + "loss": 2.4662, + "step": 11965 + }, + { + "epoch": 0.9657009119522234, + "grad_norm": 0.7161515355110168, + "learning_rate": 7.022650767569921e-05, + "loss": 2.4648, + "step": 11966 + }, + { + "epoch": 0.9657816156888064, + "grad_norm": 0.6994524002075195, + "learning_rate": 7.021143710721778e-05, + "loss": 2.5186, + "step": 11967 + }, + { + "epoch": 0.9658623194253894, + "grad_norm": 0.7105295062065125, + "learning_rate": 7.019636728114518e-05, + "loss": 2.5132, + "step": 11968 + }, + { + "epoch": 0.9659430231619724, + "grad_norm": 0.7182292938232422, + "learning_rate": 7.018129819785702e-05, + "loss": 2.5469, + "step": 11969 + }, + { + "epoch": 0.9660237268985554, + "grad_norm": 0.7021759152412415, + "learning_rate": 7.016622985772887e-05, + "loss": 2.5477, + "step": 11970 + }, + { + "epoch": 0.9661044306351384, + "grad_norm": 0.6751413941383362, + "learning_rate": 7.015116226113624e-05, + "loss": 2.5174, + "step": 11971 + }, + { + "epoch": 0.9661851343717214, + "grad_norm": 0.6341918110847473, + "learning_rate": 7.013609540845468e-05, + "loss": 2.4778, + "step": 11972 + }, + { + "epoch": 0.9662658381083045, + "grad_norm": 0.7080956697463989, + "learning_rate": 7.012102930005971e-05, + "loss": 2.5304, + "step": 11973 + }, + { + "epoch": 0.9663465418448874, + "grad_norm": 0.6367003321647644, + "learning_rate": 7.010596393632674e-05, + "loss": 2.4857, + "step": 11974 + }, + { + "epoch": 0.9664272455814704, + "grad_norm": 0.6841328740119934, + "learning_rate": 7.009089931763131e-05, + "loss": 2.5365, + "step": 11975 + }, + { + "epoch": 0.9665079493180534, + "grad_norm": 0.6568236351013184, + "learning_rate": 7.00758354443489e-05, + "loss": 2.5286, + "step": 11976 + }, + { + "epoch": 0.9665886530546365, + "grad_norm": 0.7071812152862549, + "learning_rate": 7.006077231685485e-05, + "loss": 2.458, + "step": 11977 + }, + { + "epoch": 0.9666693567912195, + "grad_norm": 0.6997712850570679, + "learning_rate": 7.004570993552462e-05, + "loss": 2.4571, + "step": 11978 + }, + { + "epoch": 0.9667500605278024, + "grad_norm": 0.6920793056488037, + "learning_rate": 7.003064830073359e-05, + "loss": 2.4172, + "step": 11979 + }, + { + "epoch": 0.9668307642643854, + "grad_norm": 0.6823387742042542, + "learning_rate": 7.001558741285718e-05, + "loss": 2.4895, + "step": 11980 + }, + { + "epoch": 0.9669114680009685, + "grad_norm": 0.7309569716453552, + "learning_rate": 7.000052727227068e-05, + "loss": 2.502, + "step": 11981 + }, + { + "epoch": 0.9669921717375515, + "grad_norm": 0.734708845615387, + "learning_rate": 6.998546787934946e-05, + "loss": 2.4918, + "step": 11982 + }, + { + "epoch": 0.9670728754741345, + "grad_norm": 0.690406084060669, + "learning_rate": 6.997040923446889e-05, + "loss": 2.4994, + "step": 11983 + }, + { + "epoch": 0.9671535792107174, + "grad_norm": 0.7126687169075012, + "learning_rate": 6.995535133800416e-05, + "loss": 2.4824, + "step": 11984 + }, + { + "epoch": 0.9672342829473004, + "grad_norm": 0.7020599246025085, + "learning_rate": 6.994029419033062e-05, + "loss": 2.4889, + "step": 11985 + }, + { + "epoch": 0.9673149866838835, + "grad_norm": 0.7690796852111816, + "learning_rate": 6.992523779182356e-05, + "loss": 2.4997, + "step": 11986 + }, + { + "epoch": 0.9673956904204665, + "grad_norm": 0.6635778546333313, + "learning_rate": 6.991018214285816e-05, + "loss": 2.4989, + "step": 11987 + }, + { + "epoch": 0.9674763941570494, + "grad_norm": 0.7088577747344971, + "learning_rate": 6.989512724380967e-05, + "loss": 2.549, + "step": 11988 + }, + { + "epoch": 0.9675570978936324, + "grad_norm": 0.6420924663543701, + "learning_rate": 6.988007309505333e-05, + "loss": 2.4585, + "step": 11989 + }, + { + "epoch": 0.9676378016302155, + "grad_norm": 0.7902400493621826, + "learning_rate": 6.986501969696428e-05, + "loss": 2.5009, + "step": 11990 + }, + { + "epoch": 0.9677185053667985, + "grad_norm": 0.700907289981842, + "learning_rate": 6.984996704991773e-05, + "loss": 2.4778, + "step": 11991 + }, + { + "epoch": 0.9677992091033815, + "grad_norm": 0.664378821849823, + "learning_rate": 6.983491515428883e-05, + "loss": 2.5116, + "step": 11992 + }, + { + "epoch": 0.9678799128399644, + "grad_norm": 0.6314663887023926, + "learning_rate": 6.981986401045266e-05, + "loss": 2.4588, + "step": 11993 + }, + { + "epoch": 0.9679606165765475, + "grad_norm": 0.6521078944206238, + "learning_rate": 6.980481361878438e-05, + "loss": 2.5224, + "step": 11994 + }, + { + "epoch": 0.9680413203131305, + "grad_norm": 0.6336014270782471, + "learning_rate": 6.978976397965907e-05, + "loss": 2.4297, + "step": 11995 + }, + { + "epoch": 0.9681220240497135, + "grad_norm": 0.7321500778198242, + "learning_rate": 6.977471509345183e-05, + "loss": 2.5252, + "step": 11996 + }, + { + "epoch": 0.9682027277862965, + "grad_norm": 0.686950147151947, + "learning_rate": 6.97596669605377e-05, + "loss": 2.5188, + "step": 11997 + }, + { + "epoch": 0.9682834315228795, + "grad_norm": 0.729343056678772, + "learning_rate": 6.97446195812917e-05, + "loss": 2.5157, + "step": 11998 + }, + { + "epoch": 0.9683641352594625, + "grad_norm": 0.6447068452835083, + "learning_rate": 6.972957295608889e-05, + "loss": 2.5041, + "step": 11999 + }, + { + "epoch": 0.9684448389960455, + "grad_norm": 0.6847280859947205, + "learning_rate": 6.971452708530423e-05, + "loss": 2.443, + "step": 12000 + }, + { + "epoch": 0.9684448389960455, + "eval_loss": 2.431878089904785, + "eval_runtime": 758.167, + "eval_samples_per_second": 3.456, + "eval_steps_per_second": 0.576, + "step": 12000 + }, + { + "epoch": 0.9685255427326285, + "grad_norm": 0.6440466046333313, + "learning_rate": 6.969948196931272e-05, + "loss": 2.5091, + "step": 12001 + }, + { + "epoch": 0.9686062464692116, + "grad_norm": 0.6570029258728027, + "learning_rate": 6.968443760848937e-05, + "loss": 2.491, + "step": 12002 + }, + { + "epoch": 0.9686869502057945, + "grad_norm": 0.7610877752304077, + "learning_rate": 6.966939400320905e-05, + "loss": 2.4713, + "step": 12003 + }, + { + "epoch": 0.9687676539423775, + "grad_norm": 0.7187781929969788, + "learning_rate": 6.965435115384669e-05, + "loss": 2.4303, + "step": 12004 + }, + { + "epoch": 0.9688483576789605, + "grad_norm": 0.7668420672416687, + "learning_rate": 6.963930906077727e-05, + "loss": 2.5513, + "step": 12005 + }, + { + "epoch": 0.9689290614155436, + "grad_norm": 0.7025619745254517, + "learning_rate": 6.96242677243756e-05, + "loss": 2.4349, + "step": 12006 + }, + { + "epoch": 0.9690097651521266, + "grad_norm": 0.7066935896873474, + "learning_rate": 6.960922714501657e-05, + "loss": 2.5465, + "step": 12007 + }, + { + "epoch": 0.9690904688887095, + "grad_norm": 0.6758970618247986, + "learning_rate": 6.95941873230751e-05, + "loss": 2.4827, + "step": 12008 + }, + { + "epoch": 0.9691711726252925, + "grad_norm": 0.7108862996101379, + "learning_rate": 6.957914825892591e-05, + "loss": 2.5412, + "step": 12009 + }, + { + "epoch": 0.9692518763618756, + "grad_norm": 0.660784125328064, + "learning_rate": 6.956410995294389e-05, + "loss": 2.5173, + "step": 12010 + }, + { + "epoch": 0.9693325800984586, + "grad_norm": 0.6966561079025269, + "learning_rate": 6.954907240550377e-05, + "loss": 2.5196, + "step": 12011 + }, + { + "epoch": 0.9694132838350416, + "grad_norm": 0.6889416575431824, + "learning_rate": 6.953403561698042e-05, + "loss": 2.5351, + "step": 12012 + }, + { + "epoch": 0.9694939875716245, + "grad_norm": 0.7578341960906982, + "learning_rate": 6.951899958774852e-05, + "loss": 2.5184, + "step": 12013 + }, + { + "epoch": 0.9695746913082076, + "grad_norm": 0.6735317707061768, + "learning_rate": 6.950396431818282e-05, + "loss": 2.4592, + "step": 12014 + }, + { + "epoch": 0.9696553950447906, + "grad_norm": 0.6903232932090759, + "learning_rate": 6.948892980865806e-05, + "loss": 2.5212, + "step": 12015 + }, + { + "epoch": 0.9697360987813736, + "grad_norm": 0.6477165818214417, + "learning_rate": 6.94738960595489e-05, + "loss": 2.4423, + "step": 12016 + }, + { + "epoch": 0.9698168025179565, + "grad_norm": 0.6778751015663147, + "learning_rate": 6.945886307123007e-05, + "loss": 2.547, + "step": 12017 + }, + { + "epoch": 0.9698975062545396, + "grad_norm": 0.690558135509491, + "learning_rate": 6.944383084407623e-05, + "loss": 2.5081, + "step": 12018 + }, + { + "epoch": 0.9699782099911226, + "grad_norm": 0.7210639119148254, + "learning_rate": 6.942879937846196e-05, + "loss": 2.496, + "step": 12019 + }, + { + "epoch": 0.9700589137277056, + "grad_norm": 0.7182444930076599, + "learning_rate": 6.941376867476194e-05, + "loss": 2.6138, + "step": 12020 + }, + { + "epoch": 0.9701396174642886, + "grad_norm": 0.6929295063018799, + "learning_rate": 6.939873873335077e-05, + "loss": 2.4828, + "step": 12021 + }, + { + "epoch": 0.9702203212008716, + "grad_norm": 0.6919693350791931, + "learning_rate": 6.938370955460298e-05, + "loss": 2.5123, + "step": 12022 + }, + { + "epoch": 0.9703010249374546, + "grad_norm": 0.6475244164466858, + "learning_rate": 6.93686811388932e-05, + "loss": 2.4992, + "step": 12023 + }, + { + "epoch": 0.9703817286740376, + "grad_norm": 0.6728265881538391, + "learning_rate": 6.935365348659597e-05, + "loss": 2.4486, + "step": 12024 + }, + { + "epoch": 0.9704624324106206, + "grad_norm": 0.6791470646858215, + "learning_rate": 6.933862659808582e-05, + "loss": 2.4657, + "step": 12025 + }, + { + "epoch": 0.9705431361472037, + "grad_norm": 0.7611662745475769, + "learning_rate": 6.932360047373721e-05, + "loss": 2.5243, + "step": 12026 + }, + { + "epoch": 0.9706238398837866, + "grad_norm": 0.6642355918884277, + "learning_rate": 6.930857511392467e-05, + "loss": 2.5308, + "step": 12027 + }, + { + "epoch": 0.9707045436203696, + "grad_norm": 0.7270805239677429, + "learning_rate": 6.92935505190227e-05, + "loss": 2.4708, + "step": 12028 + }, + { + "epoch": 0.9707852473569526, + "grad_norm": 0.6706295013427734, + "learning_rate": 6.927852668940568e-05, + "loss": 2.5136, + "step": 12029 + }, + { + "epoch": 0.9708659510935357, + "grad_norm": 0.6923376321792603, + "learning_rate": 6.92635036254481e-05, + "loss": 2.5238, + "step": 12030 + }, + { + "epoch": 0.9709466548301187, + "grad_norm": 0.7154483199119568, + "learning_rate": 6.924848132752436e-05, + "loss": 2.488, + "step": 12031 + }, + { + "epoch": 0.9710273585667016, + "grad_norm": 0.6675701141357422, + "learning_rate": 6.923345979600884e-05, + "loss": 2.5066, + "step": 12032 + }, + { + "epoch": 0.9711080623032846, + "grad_norm": 0.7282043695449829, + "learning_rate": 6.921843903127592e-05, + "loss": 2.5096, + "step": 12033 + }, + { + "epoch": 0.9711887660398677, + "grad_norm": 0.663526177406311, + "learning_rate": 6.92034190337e-05, + "loss": 2.5276, + "step": 12034 + }, + { + "epoch": 0.9712694697764507, + "grad_norm": 0.7491087913513184, + "learning_rate": 6.918839980365534e-05, + "loss": 2.5044, + "step": 12035 + }, + { + "epoch": 0.9713501735130337, + "grad_norm": 0.6977766156196594, + "learning_rate": 6.917338134151629e-05, + "loss": 2.6102, + "step": 12036 + }, + { + "epoch": 0.9714308772496166, + "grad_norm": 0.6447446346282959, + "learning_rate": 6.915836364765722e-05, + "loss": 2.5137, + "step": 12037 + }, + { + "epoch": 0.9715115809861996, + "grad_norm": 0.6801442503929138, + "learning_rate": 6.91433467224523e-05, + "loss": 2.5145, + "step": 12038 + }, + { + "epoch": 0.9715922847227827, + "grad_norm": 0.6843627691268921, + "learning_rate": 6.912833056627583e-05, + "loss": 2.6099, + "step": 12039 + }, + { + "epoch": 0.9716729884593657, + "grad_norm": 0.6862856149673462, + "learning_rate": 6.911331517950209e-05, + "loss": 2.5358, + "step": 12040 + }, + { + "epoch": 0.9717536921959486, + "grad_norm": 0.6835047602653503, + "learning_rate": 6.909830056250527e-05, + "loss": 2.5257, + "step": 12041 + }, + { + "epoch": 0.9718343959325316, + "grad_norm": 0.6958080530166626, + "learning_rate": 6.908328671565956e-05, + "loss": 2.5008, + "step": 12042 + }, + { + "epoch": 0.9719150996691147, + "grad_norm": 0.7556219100952148, + "learning_rate": 6.906827363933917e-05, + "loss": 2.5283, + "step": 12043 + }, + { + "epoch": 0.9719958034056977, + "grad_norm": 0.7074917554855347, + "learning_rate": 6.90532613339183e-05, + "loss": 2.4898, + "step": 12044 + }, + { + "epoch": 0.9720765071422807, + "grad_norm": 0.6456350684165955, + "learning_rate": 6.903824979977101e-05, + "loss": 2.4989, + "step": 12045 + }, + { + "epoch": 0.9721572108788636, + "grad_norm": 0.6609941720962524, + "learning_rate": 6.902323903727146e-05, + "loss": 2.4883, + "step": 12046 + }, + { + "epoch": 0.9722379146154467, + "grad_norm": 0.7132936716079712, + "learning_rate": 6.90082290467938e-05, + "loss": 2.4983, + "step": 12047 + }, + { + "epoch": 0.9723186183520297, + "grad_norm": 0.6686434745788574, + "learning_rate": 6.899321982871206e-05, + "loss": 2.4862, + "step": 12048 + }, + { + "epoch": 0.9723993220886127, + "grad_norm": 0.6792194247245789, + "learning_rate": 6.897821138340033e-05, + "loss": 2.5368, + "step": 12049 + }, + { + "epoch": 0.9724800258251957, + "grad_norm": 0.6829379796981812, + "learning_rate": 6.896320371123268e-05, + "loss": 2.4842, + "step": 12050 + }, + { + "epoch": 0.9725607295617787, + "grad_norm": 0.7459573745727539, + "learning_rate": 6.894819681258312e-05, + "loss": 2.5023, + "step": 12051 + }, + { + "epoch": 0.9726414332983617, + "grad_norm": 0.6700068712234497, + "learning_rate": 6.893319068782566e-05, + "loss": 2.552, + "step": 12052 + }, + { + "epoch": 0.9727221370349447, + "grad_norm": 0.7093638777732849, + "learning_rate": 6.891818533733434e-05, + "loss": 2.445, + "step": 12053 + }, + { + "epoch": 0.9728028407715277, + "grad_norm": 0.703599214553833, + "learning_rate": 6.890318076148304e-05, + "loss": 2.5536, + "step": 12054 + }, + { + "epoch": 0.9728835445081108, + "grad_norm": 0.6214482188224792, + "learning_rate": 6.888817696064578e-05, + "loss": 2.5188, + "step": 12055 + }, + { + "epoch": 0.9729642482446937, + "grad_norm": 0.6893547773361206, + "learning_rate": 6.887317393519645e-05, + "loss": 2.5596, + "step": 12056 + }, + { + "epoch": 0.9730449519812767, + "grad_norm": 0.6282656788825989, + "learning_rate": 6.885817168550903e-05, + "loss": 2.4873, + "step": 12057 + }, + { + "epoch": 0.9731256557178597, + "grad_norm": 0.6979188323020935, + "learning_rate": 6.884317021195737e-05, + "loss": 2.5358, + "step": 12058 + }, + { + "epoch": 0.9732063594544428, + "grad_norm": 0.7925785183906555, + "learning_rate": 6.882816951491533e-05, + "loss": 2.5358, + "step": 12059 + }, + { + "epoch": 0.9732870631910258, + "grad_norm": 0.6449821591377258, + "learning_rate": 6.881316959475684e-05, + "loss": 2.4784, + "step": 12060 + }, + { + "epoch": 0.9733677669276087, + "grad_norm": 0.7013393044471741, + "learning_rate": 6.879817045185565e-05, + "loss": 2.4804, + "step": 12061 + }, + { + "epoch": 0.9734484706641917, + "grad_norm": 0.8338057398796082, + "learning_rate": 6.878317208658559e-05, + "loss": 2.512, + "step": 12062 + }, + { + "epoch": 0.9735291744007748, + "grad_norm": 0.6815133094787598, + "learning_rate": 6.876817449932054e-05, + "loss": 2.467, + "step": 12063 + }, + { + "epoch": 0.9736098781373578, + "grad_norm": 0.659156858921051, + "learning_rate": 6.87531776904342e-05, + "loss": 2.503, + "step": 12064 + }, + { + "epoch": 0.9736905818739408, + "grad_norm": 0.7149603962898254, + "learning_rate": 6.873818166030033e-05, + "loss": 2.5135, + "step": 12065 + }, + { + "epoch": 0.9737712856105237, + "grad_norm": 0.7010510563850403, + "learning_rate": 6.872318640929272e-05, + "loss": 2.5133, + "step": 12066 + }, + { + "epoch": 0.9738519893471068, + "grad_norm": 0.6247616410255432, + "learning_rate": 6.870819193778504e-05, + "loss": 2.5189, + "step": 12067 + }, + { + "epoch": 0.9739326930836898, + "grad_norm": 0.6938940286636353, + "learning_rate": 6.869319824615101e-05, + "loss": 2.5053, + "step": 12068 + }, + { + "epoch": 0.9740133968202728, + "grad_norm": 0.7636895179748535, + "learning_rate": 6.867820533476436e-05, + "loss": 2.4989, + "step": 12069 + }, + { + "epoch": 0.9740941005568557, + "grad_norm": 0.6489234566688538, + "learning_rate": 6.866321320399869e-05, + "loss": 2.4935, + "step": 12070 + }, + { + "epoch": 0.9741748042934388, + "grad_norm": 0.6752095818519592, + "learning_rate": 6.864822185422764e-05, + "loss": 2.4835, + "step": 12071 + }, + { + "epoch": 0.9742555080300218, + "grad_norm": 0.6947118639945984, + "learning_rate": 6.863323128582486e-05, + "loss": 2.504, + "step": 12072 + }, + { + "epoch": 0.9743362117666048, + "grad_norm": 0.6815536618232727, + "learning_rate": 6.861824149916398e-05, + "loss": 2.5369, + "step": 12073 + }, + { + "epoch": 0.9744169155031878, + "grad_norm": 0.6550236344337463, + "learning_rate": 6.860325249461852e-05, + "loss": 2.4753, + "step": 12074 + }, + { + "epoch": 0.9744976192397709, + "grad_norm": 0.6833250522613525, + "learning_rate": 6.858826427256209e-05, + "loss": 2.4687, + "step": 12075 + }, + { + "epoch": 0.9745783229763538, + "grad_norm": 0.6925075650215149, + "learning_rate": 6.857327683336824e-05, + "loss": 2.5363, + "step": 12076 + }, + { + "epoch": 0.9746590267129368, + "grad_norm": 0.6754821538925171, + "learning_rate": 6.855829017741046e-05, + "loss": 2.4696, + "step": 12077 + }, + { + "epoch": 0.9747397304495198, + "grad_norm": 0.7360671162605286, + "learning_rate": 6.854330430506228e-05, + "loss": 2.5144, + "step": 12078 + }, + { + "epoch": 0.9748204341861029, + "grad_norm": 0.6814733743667603, + "learning_rate": 6.852831921669723e-05, + "loss": 2.5059, + "step": 12079 + }, + { + "epoch": 0.9749011379226858, + "grad_norm": 0.7106744647026062, + "learning_rate": 6.851333491268869e-05, + "loss": 2.453, + "step": 12080 + }, + { + "epoch": 0.9749818416592688, + "grad_norm": 0.6623831987380981, + "learning_rate": 6.849835139341015e-05, + "loss": 2.5244, + "step": 12081 + }, + { + "epoch": 0.9750625453958518, + "grad_norm": 0.6723372936248779, + "learning_rate": 6.848336865923506e-05, + "loss": 2.5159, + "step": 12082 + }, + { + "epoch": 0.9751432491324349, + "grad_norm": 0.7256618142127991, + "learning_rate": 6.84683867105368e-05, + "loss": 2.494, + "step": 12083 + }, + { + "epoch": 0.9752239528690179, + "grad_norm": 0.6881731152534485, + "learning_rate": 6.845340554768874e-05, + "loss": 2.4374, + "step": 12084 + }, + { + "epoch": 0.9753046566056008, + "grad_norm": 0.6759666204452515, + "learning_rate": 6.843842517106434e-05, + "loss": 2.5082, + "step": 12085 + }, + { + "epoch": 0.9753853603421838, + "grad_norm": 0.6983315348625183, + "learning_rate": 6.842344558103684e-05, + "loss": 2.5191, + "step": 12086 + }, + { + "epoch": 0.9754660640787668, + "grad_norm": 0.6805596351623535, + "learning_rate": 6.840846677797959e-05, + "loss": 2.5289, + "step": 12087 + }, + { + "epoch": 0.9755467678153499, + "grad_norm": 0.712942361831665, + "learning_rate": 6.839348876226595e-05, + "loss": 2.5544, + "step": 12088 + }, + { + "epoch": 0.9756274715519329, + "grad_norm": 0.6931124329566956, + "learning_rate": 6.837851153426924e-05, + "loss": 2.5407, + "step": 12089 + }, + { + "epoch": 0.9757081752885158, + "grad_norm": 0.6939486265182495, + "learning_rate": 6.836353509436264e-05, + "loss": 2.5236, + "step": 12090 + }, + { + "epoch": 0.9757888790250988, + "grad_norm": 0.7434083223342896, + "learning_rate": 6.834855944291944e-05, + "loss": 2.4903, + "step": 12091 + }, + { + "epoch": 0.9758695827616819, + "grad_norm": 0.672177255153656, + "learning_rate": 6.833358458031292e-05, + "loss": 2.4995, + "step": 12092 + }, + { + "epoch": 0.9759502864982649, + "grad_norm": 0.6631280779838562, + "learning_rate": 6.831861050691619e-05, + "loss": 2.4689, + "step": 12093 + }, + { + "epoch": 0.9760309902348479, + "grad_norm": 0.7485793232917786, + "learning_rate": 6.830363722310253e-05, + "loss": 2.5526, + "step": 12094 + }, + { + "epoch": 0.9761116939714308, + "grad_norm": 0.6592193245887756, + "learning_rate": 6.828866472924511e-05, + "loss": 2.4425, + "step": 12095 + }, + { + "epoch": 0.9761923977080139, + "grad_norm": 0.6479860544204712, + "learning_rate": 6.827369302571703e-05, + "loss": 2.4637, + "step": 12096 + }, + { + "epoch": 0.9762731014445969, + "grad_norm": 0.6694966554641724, + "learning_rate": 6.825872211289146e-05, + "loss": 2.5256, + "step": 12097 + }, + { + "epoch": 0.9763538051811799, + "grad_norm": 0.675751805305481, + "learning_rate": 6.82437519911415e-05, + "loss": 2.5021, + "step": 12098 + }, + { + "epoch": 0.9764345089177628, + "grad_norm": 0.7255450487136841, + "learning_rate": 6.822878266084026e-05, + "loss": 2.5275, + "step": 12099 + }, + { + "epoch": 0.9765152126543459, + "grad_norm": 0.7034213542938232, + "learning_rate": 6.821381412236079e-05, + "loss": 2.5432, + "step": 12100 + }, + { + "epoch": 0.9765959163909289, + "grad_norm": 0.6808038949966431, + "learning_rate": 6.819884637607619e-05, + "loss": 2.5044, + "step": 12101 + }, + { + "epoch": 0.9766766201275119, + "grad_norm": 0.6601580381393433, + "learning_rate": 6.818387942235945e-05, + "loss": 2.4602, + "step": 12102 + }, + { + "epoch": 0.9767573238640949, + "grad_norm": 0.7163928151130676, + "learning_rate": 6.816891326158359e-05, + "loss": 2.4785, + "step": 12103 + }, + { + "epoch": 0.976838027600678, + "grad_norm": 0.6616904735565186, + "learning_rate": 6.815394789412164e-05, + "loss": 2.5081, + "step": 12104 + }, + { + "epoch": 0.9769187313372609, + "grad_norm": 0.6476422548294067, + "learning_rate": 6.813898332034657e-05, + "loss": 2.4624, + "step": 12105 + }, + { + "epoch": 0.9769994350738439, + "grad_norm": 0.6468440890312195, + "learning_rate": 6.812401954063131e-05, + "loss": 2.4948, + "step": 12106 + }, + { + "epoch": 0.9770801388104269, + "grad_norm": 0.6988391876220703, + "learning_rate": 6.810905655534878e-05, + "loss": 2.4958, + "step": 12107 + }, + { + "epoch": 0.97716084254701, + "grad_norm": 0.6777953505516052, + "learning_rate": 6.809409436487196e-05, + "loss": 2.5304, + "step": 12108 + }, + { + "epoch": 0.9772415462835929, + "grad_norm": 0.7115550637245178, + "learning_rate": 6.807913296957368e-05, + "loss": 2.5321, + "step": 12109 + }, + { + "epoch": 0.9773222500201759, + "grad_norm": 0.737823486328125, + "learning_rate": 6.806417236982684e-05, + "loss": 2.5121, + "step": 12110 + }, + { + "epoch": 0.9774029537567589, + "grad_norm": 0.6797437071800232, + "learning_rate": 6.804921256600439e-05, + "loss": 2.4783, + "step": 12111 + }, + { + "epoch": 0.977483657493342, + "grad_norm": 0.7240802645683289, + "learning_rate": 6.803425355847897e-05, + "loss": 2.4949, + "step": 12112 + }, + { + "epoch": 0.977564361229925, + "grad_norm": 0.6433781981468201, + "learning_rate": 6.801929534762357e-05, + "loss": 2.4937, + "step": 12113 + }, + { + "epoch": 0.9776450649665079, + "grad_norm": 0.6935293078422546, + "learning_rate": 6.800433793381095e-05, + "loss": 2.5025, + "step": 12114 + }, + { + "epoch": 0.9777257687030909, + "grad_norm": 0.699780285358429, + "learning_rate": 6.798938131741383e-05, + "loss": 2.5231, + "step": 12115 + }, + { + "epoch": 0.977806472439674, + "grad_norm": 0.6414729952812195, + "learning_rate": 6.7974425498805e-05, + "loss": 2.4422, + "step": 12116 + }, + { + "epoch": 0.977887176176257, + "grad_norm": 0.6733608841896057, + "learning_rate": 6.795947047835722e-05, + "loss": 2.4873, + "step": 12117 + }, + { + "epoch": 0.97796787991284, + "grad_norm": 0.6985765099525452, + "learning_rate": 6.794451625644318e-05, + "loss": 2.4994, + "step": 12118 + }, + { + "epoch": 0.9780485836494229, + "grad_norm": 0.6429893374443054, + "learning_rate": 6.792956283343559e-05, + "loss": 2.4968, + "step": 12119 + }, + { + "epoch": 0.978129287386006, + "grad_norm": 0.7129024267196655, + "learning_rate": 6.79146102097071e-05, + "loss": 2.5457, + "step": 12120 + }, + { + "epoch": 0.978209991122589, + "grad_norm": 0.6811943650245667, + "learning_rate": 6.789965838563047e-05, + "loss": 2.5012, + "step": 12121 + }, + { + "epoch": 0.978290694859172, + "grad_norm": 0.7269948720932007, + "learning_rate": 6.788470736157821e-05, + "loss": 2.5124, + "step": 12122 + }, + { + "epoch": 0.978371398595755, + "grad_norm": 0.7396084666252136, + "learning_rate": 6.786975713792299e-05, + "loss": 2.5631, + "step": 12123 + }, + { + "epoch": 0.978452102332338, + "grad_norm": 0.6880094408988953, + "learning_rate": 6.785480771503745e-05, + "loss": 2.5103, + "step": 12124 + }, + { + "epoch": 0.978532806068921, + "grad_norm": 0.737095057964325, + "learning_rate": 6.783985909329409e-05, + "loss": 2.5062, + "step": 12125 + }, + { + "epoch": 0.978613509805504, + "grad_norm": 0.6540948152542114, + "learning_rate": 6.782491127306552e-05, + "loss": 2.5568, + "step": 12126 + }, + { + "epoch": 0.978694213542087, + "grad_norm": 0.669706404209137, + "learning_rate": 6.780996425472427e-05, + "loss": 2.5156, + "step": 12127 + }, + { + "epoch": 0.97877491727867, + "grad_norm": 0.6722843647003174, + "learning_rate": 6.779501803864286e-05, + "loss": 2.4784, + "step": 12128 + }, + { + "epoch": 0.978855621015253, + "grad_norm": 0.6545475125312805, + "learning_rate": 6.778007262519377e-05, + "loss": 2.5159, + "step": 12129 + }, + { + "epoch": 0.978936324751836, + "grad_norm": 0.7010136246681213, + "learning_rate": 6.776512801474953e-05, + "loss": 2.5244, + "step": 12130 + }, + { + "epoch": 0.979017028488419, + "grad_norm": 0.6912714242935181, + "learning_rate": 6.775018420768253e-05, + "loss": 2.5223, + "step": 12131 + }, + { + "epoch": 0.9790977322250021, + "grad_norm": 0.6864827275276184, + "learning_rate": 6.773524120436525e-05, + "loss": 2.5027, + "step": 12132 + }, + { + "epoch": 0.979178435961585, + "grad_norm": 0.7586981058120728, + "learning_rate": 6.77202990051701e-05, + "loss": 2.4554, + "step": 12133 + }, + { + "epoch": 0.979259139698168, + "grad_norm": 0.6487839818000793, + "learning_rate": 6.770535761046948e-05, + "loss": 2.5035, + "step": 12134 + }, + { + "epoch": 0.979339843434751, + "grad_norm": 0.7193071246147156, + "learning_rate": 6.769041702063575e-05, + "loss": 2.4669, + "step": 12135 + }, + { + "epoch": 0.9794205471713341, + "grad_norm": 0.7118960618972778, + "learning_rate": 6.76754772360413e-05, + "loss": 2.493, + "step": 12136 + }, + { + "epoch": 0.9795012509079171, + "grad_norm": 0.6617394685745239, + "learning_rate": 6.766053825705847e-05, + "loss": 2.4771, + "step": 12137 + }, + { + "epoch": 0.9795819546445, + "grad_norm": 0.7664859294891357, + "learning_rate": 6.764560008405953e-05, + "loss": 2.5191, + "step": 12138 + }, + { + "epoch": 0.979662658381083, + "grad_norm": 0.708063542842865, + "learning_rate": 6.763066271741682e-05, + "loss": 2.5521, + "step": 12139 + }, + { + "epoch": 0.979743362117666, + "grad_norm": 0.6951049566268921, + "learning_rate": 6.761572615750267e-05, + "loss": 2.4708, + "step": 12140 + }, + { + "epoch": 0.9798240658542491, + "grad_norm": 0.6914932727813721, + "learning_rate": 6.760079040468921e-05, + "loss": 2.5101, + "step": 12141 + }, + { + "epoch": 0.9799047695908321, + "grad_norm": 0.6843075752258301, + "learning_rate": 6.758585545934876e-05, + "loss": 2.4932, + "step": 12142 + }, + { + "epoch": 0.979985473327415, + "grad_norm": 0.6567733883857727, + "learning_rate": 6.757092132185354e-05, + "loss": 2.4577, + "step": 12143 + }, + { + "epoch": 0.980066177063998, + "grad_norm": 0.6874415874481201, + "learning_rate": 6.75559879925757e-05, + "loss": 2.4818, + "step": 12144 + }, + { + "epoch": 0.9801468808005811, + "grad_norm": 0.7274627685546875, + "learning_rate": 6.754105547188746e-05, + "loss": 2.523, + "step": 12145 + }, + { + "epoch": 0.9802275845371641, + "grad_norm": 0.6991173028945923, + "learning_rate": 6.7526123760161e-05, + "loss": 2.4864, + "step": 12146 + }, + { + "epoch": 0.980308288273747, + "grad_norm": 0.670078456401825, + "learning_rate": 6.75111928577684e-05, + "loss": 2.4889, + "step": 12147 + }, + { + "epoch": 0.98038899201033, + "grad_norm": 0.6653482913970947, + "learning_rate": 6.749626276508178e-05, + "loss": 2.4652, + "step": 12148 + }, + { + "epoch": 0.9804696957469131, + "grad_norm": 0.7329251766204834, + "learning_rate": 6.748133348247326e-05, + "loss": 2.518, + "step": 12149 + }, + { + "epoch": 0.9805503994834961, + "grad_norm": 0.7792871594429016, + "learning_rate": 6.746640501031495e-05, + "loss": 2.5018, + "step": 12150 + }, + { + "epoch": 0.9806311032200791, + "grad_norm": 0.6962797045707703, + "learning_rate": 6.745147734897883e-05, + "loss": 2.4388, + "step": 12151 + }, + { + "epoch": 0.980711806956662, + "grad_norm": 0.6981272101402283, + "learning_rate": 6.7436550498837e-05, + "loss": 2.4886, + "step": 12152 + }, + { + "epoch": 0.9807925106932451, + "grad_norm": 0.6696565747261047, + "learning_rate": 6.742162446026146e-05, + "loss": 2.5258, + "step": 12153 + }, + { + "epoch": 0.9808732144298281, + "grad_norm": 0.6922139525413513, + "learning_rate": 6.740669923362417e-05, + "loss": 2.493, + "step": 12154 + }, + { + "epoch": 0.9809539181664111, + "grad_norm": 0.6745694875717163, + "learning_rate": 6.739177481929715e-05, + "loss": 2.5209, + "step": 12155 + }, + { + "epoch": 0.9810346219029941, + "grad_norm": 0.7023215889930725, + "learning_rate": 6.737685121765238e-05, + "loss": 2.4987, + "step": 12156 + }, + { + "epoch": 0.9811153256395772, + "grad_norm": 0.6337805390357971, + "learning_rate": 6.73619284290617e-05, + "loss": 2.4838, + "step": 12157 + }, + { + "epoch": 0.9811960293761601, + "grad_norm": 0.6747817397117615, + "learning_rate": 6.73470064538971e-05, + "loss": 2.4834, + "step": 12158 + }, + { + "epoch": 0.9812767331127431, + "grad_norm": 0.6714580655097961, + "learning_rate": 6.733208529253047e-05, + "loss": 2.4724, + "step": 12159 + }, + { + "epoch": 0.9813574368493261, + "grad_norm": 0.6927861571311951, + "learning_rate": 6.731716494533364e-05, + "loss": 2.495, + "step": 12160 + }, + { + "epoch": 0.9814381405859092, + "grad_norm": 0.6576036214828491, + "learning_rate": 6.73022454126785e-05, + "loss": 2.5415, + "step": 12161 + }, + { + "epoch": 0.9815188443224921, + "grad_norm": 0.6495294570922852, + "learning_rate": 6.728732669493691e-05, + "loss": 2.4889, + "step": 12162 + }, + { + "epoch": 0.9815995480590751, + "grad_norm": 0.6680364012718201, + "learning_rate": 6.72724087924806e-05, + "loss": 2.4733, + "step": 12163 + }, + { + "epoch": 0.9816802517956581, + "grad_norm": 0.6816582083702087, + "learning_rate": 6.725749170568143e-05, + "loss": 2.4688, + "step": 12164 + }, + { + "epoch": 0.9817609555322412, + "grad_norm": 0.6995956897735596, + "learning_rate": 6.724257543491116e-05, + "loss": 2.4962, + "step": 12165 + }, + { + "epoch": 0.9818416592688242, + "grad_norm": 0.6728340983390808, + "learning_rate": 6.722765998054157e-05, + "loss": 2.5218, + "step": 12166 + }, + { + "epoch": 0.9819223630054071, + "grad_norm": 0.6835319995880127, + "learning_rate": 6.721274534294433e-05, + "loss": 2.4845, + "step": 12167 + }, + { + "epoch": 0.9820030667419901, + "grad_norm": 0.6969910264015198, + "learning_rate": 6.719783152249119e-05, + "loss": 2.4983, + "step": 12168 + }, + { + "epoch": 0.9820837704785732, + "grad_norm": 0.7327036261558533, + "learning_rate": 6.718291851955383e-05, + "loss": 2.5893, + "step": 12169 + }, + { + "epoch": 0.9821644742151562, + "grad_norm": 0.7092839479446411, + "learning_rate": 6.716800633450393e-05, + "loss": 2.5104, + "step": 12170 + }, + { + "epoch": 0.9822451779517392, + "grad_norm": 0.7384308576583862, + "learning_rate": 6.715309496771311e-05, + "loss": 2.5066, + "step": 12171 + }, + { + "epoch": 0.9823258816883221, + "grad_norm": 0.6744845509529114, + "learning_rate": 6.713818441955308e-05, + "loss": 2.469, + "step": 12172 + }, + { + "epoch": 0.9824065854249052, + "grad_norm": 0.6497980952262878, + "learning_rate": 6.712327469039536e-05, + "loss": 2.4943, + "step": 12173 + }, + { + "epoch": 0.9824872891614882, + "grad_norm": 0.6550357937812805, + "learning_rate": 6.710836578061156e-05, + "loss": 2.5019, + "step": 12174 + }, + { + "epoch": 0.9825679928980712, + "grad_norm": 0.6813549995422363, + "learning_rate": 6.709345769057331e-05, + "loss": 2.4314, + "step": 12175 + }, + { + "epoch": 0.9826486966346542, + "grad_norm": 0.6636531352996826, + "learning_rate": 6.707855042065209e-05, + "loss": 2.5202, + "step": 12176 + }, + { + "epoch": 0.9827294003712372, + "grad_norm": 0.6684894561767578, + "learning_rate": 6.706364397121944e-05, + "loss": 2.4353, + "step": 12177 + }, + { + "epoch": 0.9828101041078202, + "grad_norm": 0.6813677549362183, + "learning_rate": 6.704873834264688e-05, + "loss": 2.4254, + "step": 12178 + }, + { + "epoch": 0.9828908078444032, + "grad_norm": 0.6584975719451904, + "learning_rate": 6.70338335353059e-05, + "loss": 2.5647, + "step": 12179 + }, + { + "epoch": 0.9829715115809862, + "grad_norm": 0.6959114074707031, + "learning_rate": 6.701892954956796e-05, + "loss": 2.5203, + "step": 12180 + }, + { + "epoch": 0.9830522153175693, + "grad_norm": 0.6399044990539551, + "learning_rate": 6.700402638580452e-05, + "loss": 2.4697, + "step": 12181 + }, + { + "epoch": 0.9831329190541522, + "grad_norm": 0.6838750839233398, + "learning_rate": 6.698912404438702e-05, + "loss": 2.5261, + "step": 12182 + }, + { + "epoch": 0.9832136227907352, + "grad_norm": 0.6286367177963257, + "learning_rate": 6.697422252568679e-05, + "loss": 2.4264, + "step": 12183 + }, + { + "epoch": 0.9832943265273182, + "grad_norm": 0.901637852191925, + "learning_rate": 6.695932183007528e-05, + "loss": 2.4908, + "step": 12184 + }, + { + "epoch": 0.9833750302639013, + "grad_norm": 0.8361458778381348, + "learning_rate": 6.694442195792386e-05, + "loss": 2.5183, + "step": 12185 + }, + { + "epoch": 0.9834557340004842, + "grad_norm": 0.7033401727676392, + "learning_rate": 6.692952290960384e-05, + "loss": 2.5702, + "step": 12186 + }, + { + "epoch": 0.9835364377370672, + "grad_norm": 0.669486939907074, + "learning_rate": 6.691462468548653e-05, + "loss": 2.5143, + "step": 12187 + }, + { + "epoch": 0.9836171414736502, + "grad_norm": 0.7043797969818115, + "learning_rate": 6.689972728594329e-05, + "loss": 2.5638, + "step": 12188 + }, + { + "epoch": 0.9836978452102332, + "grad_norm": 0.6532511115074158, + "learning_rate": 6.688483071134537e-05, + "loss": 2.5227, + "step": 12189 + }, + { + "epoch": 0.9837785489468163, + "grad_norm": 0.7363922595977783, + "learning_rate": 6.6869934962064e-05, + "loss": 2.4953, + "step": 12190 + }, + { + "epoch": 0.9838592526833992, + "grad_norm": 0.6746651530265808, + "learning_rate": 6.685504003847051e-05, + "loss": 2.5021, + "step": 12191 + }, + { + "epoch": 0.9839399564199822, + "grad_norm": 0.665459930896759, + "learning_rate": 6.684014594093604e-05, + "loss": 2.5126, + "step": 12192 + }, + { + "epoch": 0.9840206601565652, + "grad_norm": 0.6618975400924683, + "learning_rate": 6.682525266983179e-05, + "loss": 2.5046, + "step": 12193 + }, + { + "epoch": 0.9841013638931483, + "grad_norm": 0.6536173224449158, + "learning_rate": 6.6810360225529e-05, + "loss": 2.4222, + "step": 12194 + }, + { + "epoch": 0.9841820676297313, + "grad_norm": 0.6882187724113464, + "learning_rate": 6.679546860839876e-05, + "loss": 2.475, + "step": 12195 + }, + { + "epoch": 0.9842627713663142, + "grad_norm": 0.6941187977790833, + "learning_rate": 6.678057781881224e-05, + "loss": 2.5642, + "step": 12196 + }, + { + "epoch": 0.9843434751028972, + "grad_norm": 0.7057064175605774, + "learning_rate": 6.676568785714057e-05, + "loss": 2.4817, + "step": 12197 + }, + { + "epoch": 0.9844241788394803, + "grad_norm": 0.6455948352813721, + "learning_rate": 6.675079872375487e-05, + "loss": 2.5206, + "step": 12198 + }, + { + "epoch": 0.9845048825760633, + "grad_norm": 0.6559014320373535, + "learning_rate": 6.673591041902613e-05, + "loss": 2.4082, + "step": 12199 + }, + { + "epoch": 0.9845855863126463, + "grad_norm": 0.6732046008110046, + "learning_rate": 6.672102294332542e-05, + "loss": 2.5472, + "step": 12200 + }, + { + "epoch": 0.9846662900492292, + "grad_norm": 0.7074914574623108, + "learning_rate": 6.670613629702391e-05, + "loss": 2.5243, + "step": 12201 + }, + { + "epoch": 0.9847469937858123, + "grad_norm": 0.6780694127082825, + "learning_rate": 6.669125048049246e-05, + "loss": 2.494, + "step": 12202 + }, + { + "epoch": 0.9848276975223953, + "grad_norm": 0.6361132264137268, + "learning_rate": 6.66763654941021e-05, + "loss": 2.4764, + "step": 12203 + }, + { + "epoch": 0.9849084012589783, + "grad_norm": 0.752727210521698, + "learning_rate": 6.666148133822387e-05, + "loss": 2.4942, + "step": 12204 + }, + { + "epoch": 0.9849891049955612, + "grad_norm": 0.7282724976539612, + "learning_rate": 6.664659801322863e-05, + "loss": 2.471, + "step": 12205 + }, + { + "epoch": 0.9850698087321443, + "grad_norm": 0.6977601051330566, + "learning_rate": 6.663171551948736e-05, + "loss": 2.4695, + "step": 12206 + }, + { + "epoch": 0.9851505124687273, + "grad_norm": 0.6957824230194092, + "learning_rate": 6.661683385737101e-05, + "loss": 2.5096, + "step": 12207 + }, + { + "epoch": 0.9852312162053103, + "grad_norm": 0.6197221279144287, + "learning_rate": 6.660195302725037e-05, + "loss": 2.4199, + "step": 12208 + }, + { + "epoch": 0.9853119199418933, + "grad_norm": 0.747558057308197, + "learning_rate": 6.658707302949638e-05, + "loss": 2.5988, + "step": 12209 + }, + { + "epoch": 0.9853926236784764, + "grad_norm": 0.6593184471130371, + "learning_rate": 6.657219386447989e-05, + "loss": 2.4837, + "step": 12210 + }, + { + "epoch": 0.9854733274150593, + "grad_norm": 0.6795992255210876, + "learning_rate": 6.655731553257169e-05, + "loss": 2.498, + "step": 12211 + }, + { + "epoch": 0.9855540311516423, + "grad_norm": 0.7588422298431396, + "learning_rate": 6.65424380341426e-05, + "loss": 2.444, + "step": 12212 + }, + { + "epoch": 0.9856347348882253, + "grad_norm": 0.7791433930397034, + "learning_rate": 6.652756136956342e-05, + "loss": 2.4893, + "step": 12213 + }, + { + "epoch": 0.9857154386248084, + "grad_norm": 0.6320767998695374, + "learning_rate": 6.651268553920493e-05, + "loss": 2.4831, + "step": 12214 + }, + { + "epoch": 0.9857961423613913, + "grad_norm": 0.6818140745162964, + "learning_rate": 6.649781054343783e-05, + "loss": 2.4316, + "step": 12215 + }, + { + "epoch": 0.9858768460979743, + "grad_norm": 0.7460113763809204, + "learning_rate": 6.648293638263285e-05, + "loss": 2.5335, + "step": 12216 + }, + { + "epoch": 0.9859575498345573, + "grad_norm": 0.714074432849884, + "learning_rate": 6.646806305716079e-05, + "loss": 2.4573, + "step": 12217 + }, + { + "epoch": 0.9860382535711404, + "grad_norm": 0.6815951466560364, + "learning_rate": 6.645319056739217e-05, + "loss": 2.4758, + "step": 12218 + }, + { + "epoch": 0.9861189573077234, + "grad_norm": 0.6842799782752991, + "learning_rate": 6.643831891369775e-05, + "loss": 2.4998, + "step": 12219 + }, + { + "epoch": 0.9861996610443063, + "grad_norm": 0.6725212335586548, + "learning_rate": 6.642344809644818e-05, + "loss": 2.5179, + "step": 12220 + }, + { + "epoch": 0.9862803647808893, + "grad_norm": 0.7859417796134949, + "learning_rate": 6.640857811601402e-05, + "loss": 2.5801, + "step": 12221 + }, + { + "epoch": 0.9863610685174724, + "grad_norm": 0.6438577771186829, + "learning_rate": 6.639370897276591e-05, + "loss": 2.4659, + "step": 12222 + }, + { + "epoch": 0.9864417722540554, + "grad_norm": 0.7036609053611755, + "learning_rate": 6.637884066707447e-05, + "loss": 2.5637, + "step": 12223 + }, + { + "epoch": 0.9865224759906384, + "grad_norm": 0.6756969094276428, + "learning_rate": 6.636397319931016e-05, + "loss": 2.5381, + "step": 12224 + }, + { + "epoch": 0.9866031797272213, + "grad_norm": 0.6907589435577393, + "learning_rate": 6.634910656984354e-05, + "loss": 2.4927, + "step": 12225 + }, + { + "epoch": 0.9866838834638044, + "grad_norm": 0.7347010374069214, + "learning_rate": 6.63342407790452e-05, + "loss": 2.5131, + "step": 12226 + }, + { + "epoch": 0.9867645872003874, + "grad_norm": 0.6835876107215881, + "learning_rate": 6.631937582728555e-05, + "loss": 2.4611, + "step": 12227 + }, + { + "epoch": 0.9868452909369704, + "grad_norm": 0.8199172616004944, + "learning_rate": 6.630451171493511e-05, + "loss": 2.5341, + "step": 12228 + }, + { + "epoch": 0.9869259946735534, + "grad_norm": 0.7537188529968262, + "learning_rate": 6.62896484423643e-05, + "loss": 2.5218, + "step": 12229 + }, + { + "epoch": 0.9870066984101364, + "grad_norm": 0.7254310250282288, + "learning_rate": 6.62747860099436e-05, + "loss": 2.4766, + "step": 12230 + }, + { + "epoch": 0.9870874021467194, + "grad_norm": 0.6852995157241821, + "learning_rate": 6.625992441804338e-05, + "loss": 2.548, + "step": 12231 + }, + { + "epoch": 0.9871681058833024, + "grad_norm": 0.7089388966560364, + "learning_rate": 6.624506366703402e-05, + "loss": 2.5125, + "step": 12232 + }, + { + "epoch": 0.9872488096198854, + "grad_norm": 0.7114216685295105, + "learning_rate": 6.623020375728597e-05, + "loss": 2.5408, + "step": 12233 + }, + { + "epoch": 0.9873295133564685, + "grad_norm": 0.7891978025436401, + "learning_rate": 6.621534468916946e-05, + "loss": 2.5946, + "step": 12234 + }, + { + "epoch": 0.9874102170930514, + "grad_norm": 0.671399712562561, + "learning_rate": 6.620048646305488e-05, + "loss": 2.4732, + "step": 12235 + }, + { + "epoch": 0.9874909208296344, + "grad_norm": 0.6712855696678162, + "learning_rate": 6.618562907931256e-05, + "loss": 2.4376, + "step": 12236 + }, + { + "epoch": 0.9875716245662174, + "grad_norm": 0.7183727025985718, + "learning_rate": 6.617077253831272e-05, + "loss": 2.5406, + "step": 12237 + }, + { + "epoch": 0.9876523283028005, + "grad_norm": 0.6857761144638062, + "learning_rate": 6.615591684042568e-05, + "loss": 2.5279, + "step": 12238 + }, + { + "epoch": 0.9877330320393835, + "grad_norm": 0.7268103957176208, + "learning_rate": 6.614106198602165e-05, + "loss": 2.5283, + "step": 12239 + }, + { + "epoch": 0.9878137357759664, + "grad_norm": 0.6703717708587646, + "learning_rate": 6.612620797547087e-05, + "loss": 2.4254, + "step": 12240 + }, + { + "epoch": 0.9878944395125494, + "grad_norm": 0.7110719680786133, + "learning_rate": 6.611135480914352e-05, + "loss": 2.496, + "step": 12241 + }, + { + "epoch": 0.9879751432491324, + "grad_norm": 0.7268263697624207, + "learning_rate": 6.609650248740983e-05, + "loss": 2.5489, + "step": 12242 + }, + { + "epoch": 0.9880558469857155, + "grad_norm": 0.7413432598114014, + "learning_rate": 6.60816510106399e-05, + "loss": 2.4998, + "step": 12243 + }, + { + "epoch": 0.9881365507222984, + "grad_norm": 0.7443360090255737, + "learning_rate": 6.606680037920389e-05, + "loss": 2.5282, + "step": 12244 + }, + { + "epoch": 0.9882172544588814, + "grad_norm": 0.7787832021713257, + "learning_rate": 6.605195059347191e-05, + "loss": 2.5221, + "step": 12245 + }, + { + "epoch": 0.9882979581954644, + "grad_norm": 0.6921473741531372, + "learning_rate": 6.603710165381409e-05, + "loss": 2.5434, + "step": 12246 + }, + { + "epoch": 0.9883786619320475, + "grad_norm": 0.737328827381134, + "learning_rate": 6.602225356060044e-05, + "loss": 2.5222, + "step": 12247 + }, + { + "epoch": 0.9884593656686305, + "grad_norm": 0.698823094367981, + "learning_rate": 6.600740631420106e-05, + "loss": 2.528, + "step": 12248 + }, + { + "epoch": 0.9885400694052134, + "grad_norm": 0.6735067963600159, + "learning_rate": 6.599255991498601e-05, + "loss": 2.4942, + "step": 12249 + }, + { + "epoch": 0.9886207731417964, + "grad_norm": 0.659622311592102, + "learning_rate": 6.59777143633252e-05, + "loss": 2.4822, + "step": 12250 + }, + { + "epoch": 0.9887014768783795, + "grad_norm": 0.6973726153373718, + "learning_rate": 6.596286965958872e-05, + "loss": 2.5499, + "step": 12251 + }, + { + "epoch": 0.9887821806149625, + "grad_norm": 0.6771909594535828, + "learning_rate": 6.594802580414651e-05, + "loss": 2.4968, + "step": 12252 + }, + { + "epoch": 0.9888628843515455, + "grad_norm": 0.68080073595047, + "learning_rate": 6.593318279736849e-05, + "loss": 2.5142, + "step": 12253 + }, + { + "epoch": 0.9889435880881284, + "grad_norm": NaN, + "learning_rate": 6.593318279736849e-05, + "loss": 2.466, + "step": 12254 + }, + { + "epoch": 0.9890242918247115, + "grad_norm": 0.6865221858024597, + "learning_rate": 6.591834063962461e-05, + "loss": 2.4894, + "step": 12255 + }, + { + "epoch": 0.9891049955612945, + "grad_norm": 0.7050445079803467, + "learning_rate": 6.590349933128478e-05, + "loss": 2.5733, + "step": 12256 + }, + { + "epoch": 0.9891856992978775, + "grad_norm": 0.6971526741981506, + "learning_rate": 6.588865887271887e-05, + "loss": 2.4997, + "step": 12257 + }, + { + "epoch": 0.9892664030344605, + "grad_norm": 0.6465088725090027, + "learning_rate": 6.587381926429674e-05, + "loss": 2.5155, + "step": 12258 + }, + { + "epoch": 0.9893471067710435, + "grad_norm": 0.6521422266960144, + "learning_rate": 6.585898050638823e-05, + "loss": 2.4803, + "step": 12259 + }, + { + "epoch": 0.9894278105076265, + "grad_norm": 0.6798849105834961, + "learning_rate": 6.584414259936324e-05, + "loss": 2.5301, + "step": 12260 + }, + { + "epoch": 0.9895085142442095, + "grad_norm": 0.6903446912765503, + "learning_rate": 6.582930554359144e-05, + "loss": 2.4662, + "step": 12261 + }, + { + "epoch": 0.9895892179807925, + "grad_norm": 0.7183516621589661, + "learning_rate": 6.581446933944267e-05, + "loss": 2.4711, + "step": 12262 + }, + { + "epoch": 0.9896699217173756, + "grad_norm": 0.702738344669342, + "learning_rate": 6.579963398728671e-05, + "loss": 2.531, + "step": 12263 + }, + { + "epoch": 0.9897506254539585, + "grad_norm": 0.7187048196792603, + "learning_rate": 6.578479948749325e-05, + "loss": 2.4933, + "step": 12264 + }, + { + "epoch": 0.9898313291905415, + "grad_norm": 0.6988784670829773, + "learning_rate": 6.576996584043202e-05, + "loss": 2.5179, + "step": 12265 + }, + { + "epoch": 0.9899120329271245, + "grad_norm": 0.7434641122817993, + "learning_rate": 6.575513304647276e-05, + "loss": 2.5157, + "step": 12266 + }, + { + "epoch": 0.9899927366637076, + "grad_norm": 0.667881429195404, + "learning_rate": 6.574030110598505e-05, + "loss": 2.5152, + "step": 12267 + }, + { + "epoch": 0.9900734404002905, + "grad_norm": 0.6766676902770996, + "learning_rate": 6.572547001933862e-05, + "loss": 2.5041, + "step": 12268 + }, + { + "epoch": 0.9901541441368735, + "grad_norm": 0.6531797051429749, + "learning_rate": 6.571063978690311e-05, + "loss": 2.5457, + "step": 12269 + }, + { + "epoch": 0.9902348478734565, + "grad_norm": 0.6557255983352661, + "learning_rate": 6.569581040904804e-05, + "loss": 2.5253, + "step": 12270 + }, + { + "epoch": 0.9903155516100396, + "grad_norm": 0.6818893551826477, + "learning_rate": 6.568098188614304e-05, + "loss": 2.5031, + "step": 12271 + }, + { + "epoch": 0.9903962553466226, + "grad_norm": 0.6644853949546814, + "learning_rate": 6.56661542185577e-05, + "loss": 2.5285, + "step": 12272 + }, + { + "epoch": 0.9904769590832055, + "grad_norm": 0.6035603284835815, + "learning_rate": 6.565132740666155e-05, + "loss": 2.46, + "step": 12273 + }, + { + "epoch": 0.9905576628197885, + "grad_norm": 0.7061343193054199, + "learning_rate": 6.56365014508241e-05, + "loss": 2.4731, + "step": 12274 + }, + { + "epoch": 0.9906383665563716, + "grad_norm": 0.6981248259544373, + "learning_rate": 6.562167635141486e-05, + "loss": 2.4518, + "step": 12275 + }, + { + "epoch": 0.9907190702929546, + "grad_norm": 0.6718073487281799, + "learning_rate": 6.560685210880334e-05, + "loss": 2.4919, + "step": 12276 + }, + { + "epoch": 0.9907997740295376, + "grad_norm": 0.7095392942428589, + "learning_rate": 6.559202872335893e-05, + "loss": 2.5284, + "step": 12277 + }, + { + "epoch": 0.9908804777661205, + "grad_norm": 0.7052092552185059, + "learning_rate": 6.557720619545111e-05, + "loss": 2.4781, + "step": 12278 + }, + { + "epoch": 0.9909611815027036, + "grad_norm": 0.653570830821991, + "learning_rate": 6.556238452544934e-05, + "loss": 2.5293, + "step": 12279 + }, + { + "epoch": 0.9910418852392866, + "grad_norm": 0.6705330610275269, + "learning_rate": 6.554756371372293e-05, + "loss": 2.4437, + "step": 12280 + }, + { + "epoch": 0.9911225889758696, + "grad_norm": 0.6494189500808716, + "learning_rate": 6.553274376064127e-05, + "loss": 2.4833, + "step": 12281 + }, + { + "epoch": 0.9912032927124526, + "grad_norm": 0.6497724652290344, + "learning_rate": 6.551792466657378e-05, + "loss": 2.4803, + "step": 12282 + }, + { + "epoch": 0.9912839964490356, + "grad_norm": 0.7740494608879089, + "learning_rate": 6.550310643188972e-05, + "loss": 2.4907, + "step": 12283 + }, + { + "epoch": 0.9913647001856186, + "grad_norm": 0.699562668800354, + "learning_rate": 6.548828905695843e-05, + "loss": 2.4576, + "step": 12284 + }, + { + "epoch": 0.9914454039222016, + "grad_norm": 0.8123162984848022, + "learning_rate": 6.547347254214921e-05, + "loss": 2.5118, + "step": 12285 + }, + { + "epoch": 0.9915261076587846, + "grad_norm": 0.7227715253829956, + "learning_rate": 6.545865688783129e-05, + "loss": 2.4688, + "step": 12286 + }, + { + "epoch": 0.9916068113953677, + "grad_norm": 0.6498493552207947, + "learning_rate": 6.544384209437392e-05, + "loss": 2.477, + "step": 12287 + }, + { + "epoch": 0.9916875151319506, + "grad_norm": 0.6427823901176453, + "learning_rate": 6.542902816214636e-05, + "loss": 2.4388, + "step": 12288 + }, + { + "epoch": 0.9917682188685336, + "grad_norm": 0.6803679466247559, + "learning_rate": 6.541421509151778e-05, + "loss": 2.5095, + "step": 12289 + }, + { + "epoch": 0.9918489226051166, + "grad_norm": 0.7025790810585022, + "learning_rate": 6.539940288285734e-05, + "loss": 2.4881, + "step": 12290 + }, + { + "epoch": 0.9919296263416996, + "grad_norm": 0.6899270415306091, + "learning_rate": 6.538459153653424e-05, + "loss": 2.486, + "step": 12291 + }, + { + "epoch": 0.9920103300782827, + "grad_norm": 0.7379609942436218, + "learning_rate": 6.536978105291762e-05, + "loss": 2.5368, + "step": 12292 + }, + { + "epoch": 0.9920910338148656, + "grad_norm": 0.7279202342033386, + "learning_rate": 6.535497143237657e-05, + "loss": 2.5275, + "step": 12293 + }, + { + "epoch": 0.9921717375514486, + "grad_norm": 0.6810527443885803, + "learning_rate": 6.53401626752802e-05, + "loss": 2.5053, + "step": 12294 + }, + { + "epoch": 0.9922524412880316, + "grad_norm": 0.6578424572944641, + "learning_rate": 6.532535478199759e-05, + "loss": 2.5334, + "step": 12295 + }, + { + "epoch": 0.9923331450246147, + "grad_norm": 0.6819284558296204, + "learning_rate": 6.531054775289778e-05, + "loss": 2.4879, + "step": 12296 + }, + { + "epoch": 0.9924138487611976, + "grad_norm": 0.6524500846862793, + "learning_rate": 6.529574158834977e-05, + "loss": 2.5349, + "step": 12297 + }, + { + "epoch": 0.9924945524977806, + "grad_norm": 0.6853352785110474, + "learning_rate": 6.528093628872263e-05, + "loss": 2.4217, + "step": 12298 + }, + { + "epoch": 0.9925752562343636, + "grad_norm": 0.6731893420219421, + "learning_rate": 6.526613185438529e-05, + "loss": 2.4739, + "step": 12299 + }, + { + "epoch": 0.9926559599709467, + "grad_norm": 0.6515606641769409, + "learning_rate": 6.525132828570673e-05, + "loss": 2.5348, + "step": 12300 + }, + { + "epoch": 0.9927366637075297, + "grad_norm": 0.6819963455200195, + "learning_rate": 6.523652558305596e-05, + "loss": 2.5052, + "step": 12301 + }, + { + "epoch": 0.9928173674441126, + "grad_norm": 0.6521475911140442, + "learning_rate": 6.522172374680177e-05, + "loss": 2.5283, + "step": 12302 + }, + { + "epoch": 0.9928980711806956, + "grad_norm": 0.6488186717033386, + "learning_rate": 6.520692277731315e-05, + "loss": 2.4779, + "step": 12303 + }, + { + "epoch": 0.9929787749172787, + "grad_norm": 0.6509760022163391, + "learning_rate": 6.519212267495903e-05, + "loss": 2.5426, + "step": 12304 + }, + { + "epoch": 0.9930594786538617, + "grad_norm": 0.621366560459137, + "learning_rate": 6.517732344010814e-05, + "loss": 2.4804, + "step": 12305 + }, + { + "epoch": 0.9931401823904447, + "grad_norm": 0.6907268166542053, + "learning_rate": 6.516252507312938e-05, + "loss": 2.4883, + "step": 12306 + }, + { + "epoch": 0.9932208861270276, + "grad_norm": 0.7739343643188477, + "learning_rate": 6.514772757439157e-05, + "loss": 2.481, + "step": 12307 + }, + { + "epoch": 0.9933015898636107, + "grad_norm": 0.6794601082801819, + "learning_rate": 6.513293094426352e-05, + "loss": 2.5244, + "step": 12308 + }, + { + "epoch": 0.9933822936001937, + "grad_norm": 0.7189902663230896, + "learning_rate": 6.511813518311394e-05, + "loss": 2.5221, + "step": 12309 + }, + { + "epoch": 0.9934629973367767, + "grad_norm": 0.733318030834198, + "learning_rate": 6.510334029131163e-05, + "loss": 2.521, + "step": 12310 + }, + { + "epoch": 0.9935437010733597, + "grad_norm": 0.7584299445152283, + "learning_rate": 6.508854626922531e-05, + "loss": 2.4962, + "step": 12311 + }, + { + "epoch": 0.9936244048099427, + "grad_norm": 0.6442410349845886, + "learning_rate": 6.507375311722366e-05, + "loss": 2.4775, + "step": 12312 + }, + { + "epoch": 0.9937051085465257, + "grad_norm": 0.6609243154525757, + "learning_rate": 6.505896083567536e-05, + "loss": 2.4706, + "step": 12313 + }, + { + "epoch": 0.9937858122831087, + "grad_norm": 0.6527631878852844, + "learning_rate": 6.504416942494914e-05, + "loss": 2.4612, + "step": 12314 + }, + { + "epoch": 0.9938665160196917, + "grad_norm": 0.6798218488693237, + "learning_rate": 6.502937888541357e-05, + "loss": 2.5502, + "step": 12315 + }, + { + "epoch": 0.9939472197562748, + "grad_norm": 0.6573790907859802, + "learning_rate": 6.501458921743728e-05, + "loss": 2.5598, + "step": 12316 + }, + { + "epoch": 0.9940279234928577, + "grad_norm": 0.6945913434028625, + "learning_rate": 6.49998004213889e-05, + "loss": 2.5323, + "step": 12317 + }, + { + "epoch": 0.9941086272294407, + "grad_norm": 0.7609078288078308, + "learning_rate": 6.498501249763697e-05, + "loss": 2.5211, + "step": 12318 + }, + { + "epoch": 0.9941893309660237, + "grad_norm": 0.6878666281700134, + "learning_rate": 6.497022544655006e-05, + "loss": 2.5366, + "step": 12319 + }, + { + "epoch": 0.9942700347026068, + "grad_norm": 0.6675810813903809, + "learning_rate": 6.495543926849674e-05, + "loss": 2.512, + "step": 12320 + }, + { + "epoch": 0.9943507384391898, + "grad_norm": 0.7285950779914856, + "learning_rate": 6.494065396384544e-05, + "loss": 2.4741, + "step": 12321 + }, + { + "epoch": 0.9944314421757727, + "grad_norm": 0.6287158131599426, + "learning_rate": 6.49258695329647e-05, + "loss": 2.4824, + "step": 12322 + }, + { + "epoch": 0.9945121459123557, + "grad_norm": 0.6506727337837219, + "learning_rate": 6.491108597622296e-05, + "loss": 2.5126, + "step": 12323 + }, + { + "epoch": 0.9945928496489388, + "grad_norm": 0.7679052352905273, + "learning_rate": 6.489630329398869e-05, + "loss": 2.5503, + "step": 12324 + }, + { + "epoch": 0.9946735533855218, + "grad_norm": 0.637184202671051, + "learning_rate": 6.488152148663029e-05, + "loss": 2.5098, + "step": 12325 + }, + { + "epoch": 0.9947542571221047, + "grad_norm": 0.6747186779975891, + "learning_rate": 6.486674055451619e-05, + "loss": 2.5154, + "step": 12326 + }, + { + "epoch": 0.9948349608586877, + "grad_norm": 0.7288245558738708, + "learning_rate": 6.485196049801476e-05, + "loss": 2.5077, + "step": 12327 + }, + { + "epoch": 0.9949156645952708, + "grad_norm": 0.6914251446723938, + "learning_rate": 6.483718131749435e-05, + "loss": 2.4877, + "step": 12328 + }, + { + "epoch": 0.9949963683318538, + "grad_norm": 0.7224392294883728, + "learning_rate": 6.48224030133233e-05, + "loss": 2.4862, + "step": 12329 + }, + { + "epoch": 0.9950770720684368, + "grad_norm": 0.7365561723709106, + "learning_rate": 6.480762558586995e-05, + "loss": 2.477, + "step": 12330 + }, + { + "epoch": 0.9951577758050197, + "grad_norm": 0.7673236131668091, + "learning_rate": 6.47928490355025e-05, + "loss": 2.5423, + "step": 12331 + }, + { + "epoch": 0.9952384795416028, + "grad_norm": 0.6638002395629883, + "learning_rate": 6.477807336258931e-05, + "loss": 2.5007, + "step": 12332 + }, + { + "epoch": 0.9953191832781858, + "grad_norm": 0.6415974497795105, + "learning_rate": 6.476329856749864e-05, + "loss": 2.4924, + "step": 12333 + }, + { + "epoch": 0.9953998870147688, + "grad_norm": 0.7129398584365845, + "learning_rate": 6.474852465059864e-05, + "loss": 2.5313, + "step": 12334 + }, + { + "epoch": 0.9954805907513518, + "grad_norm": 0.6896344423294067, + "learning_rate": 6.473375161225756e-05, + "loss": 2.5073, + "step": 12335 + }, + { + "epoch": 0.9955612944879348, + "grad_norm": 0.7009317874908447, + "learning_rate": 6.47189794528436e-05, + "loss": 2.574, + "step": 12336 + }, + { + "epoch": 0.9956419982245178, + "grad_norm": 0.6555172801017761, + "learning_rate": 6.470420817272488e-05, + "loss": 2.4769, + "step": 12337 + }, + { + "epoch": 0.9957227019611008, + "grad_norm": 0.7569532990455627, + "learning_rate": 6.468943777226954e-05, + "loss": 2.4691, + "step": 12338 + }, + { + "epoch": 0.9958034056976838, + "grad_norm": 0.68092280626297, + "learning_rate": 6.467466825184569e-05, + "loss": 2.4793, + "step": 12339 + }, + { + "epoch": 0.9958841094342669, + "grad_norm": 0.6977378726005554, + "learning_rate": 6.465989961182152e-05, + "loss": 2.4678, + "step": 12340 + }, + { + "epoch": 0.9959648131708498, + "grad_norm": 0.6702281832695007, + "learning_rate": 6.4645131852565e-05, + "loss": 2.5398, + "step": 12341 + }, + { + "epoch": 0.9960455169074328, + "grad_norm": 0.7584038972854614, + "learning_rate": 6.46303649744442e-05, + "loss": 2.5355, + "step": 12342 + }, + { + "epoch": 0.9961262206440158, + "grad_norm": 0.6779505610466003, + "learning_rate": 6.461559897782718e-05, + "loss": 2.4828, + "step": 12343 + }, + { + "epoch": 0.9962069243805988, + "grad_norm": 0.6968233585357666, + "learning_rate": 6.460083386308192e-05, + "loss": 2.5108, + "step": 12344 + }, + { + "epoch": 0.9962876281171819, + "grad_norm": 0.7114594578742981, + "learning_rate": 6.45860696305764e-05, + "loss": 2.5236, + "step": 12345 + }, + { + "epoch": 0.9963683318537648, + "grad_norm": 0.6850530505180359, + "learning_rate": 6.457130628067865e-05, + "loss": 2.458, + "step": 12346 + }, + { + "epoch": 0.9964490355903478, + "grad_norm": 0.7135400772094727, + "learning_rate": 6.455654381375651e-05, + "loss": 2.539, + "step": 12347 + }, + { + "epoch": 0.9965297393269308, + "grad_norm": 0.6736366748809814, + "learning_rate": 6.454178223017797e-05, + "loss": 2.4721, + "step": 12348 + }, + { + "epoch": 0.9966104430635139, + "grad_norm": 0.6806206107139587, + "learning_rate": 6.45270215303109e-05, + "loss": 2.5035, + "step": 12349 + }, + { + "epoch": 0.9966911468000968, + "grad_norm": 0.7120711803436279, + "learning_rate": 6.451226171452318e-05, + "loss": 2.5344, + "step": 12350 + }, + { + "epoch": 0.9967718505366798, + "grad_norm": 0.6865986585617065, + "learning_rate": 6.449750278318264e-05, + "loss": 2.4807, + "step": 12351 + }, + { + "epoch": 0.9968525542732628, + "grad_norm": 0.6461294889450073, + "learning_rate": 6.448274473665717e-05, + "loss": 2.4878, + "step": 12352 + }, + { + "epoch": 0.9969332580098459, + "grad_norm": 0.7090638279914856, + "learning_rate": 6.446798757531454e-05, + "loss": 2.4599, + "step": 12353 + }, + { + "epoch": 0.9970139617464289, + "grad_norm": 0.6933324337005615, + "learning_rate": 6.445323129952252e-05, + "loss": 2.5398, + "step": 12354 + }, + { + "epoch": 0.9970946654830118, + "grad_norm": 0.7018197774887085, + "learning_rate": 6.443847590964888e-05, + "loss": 2.5159, + "step": 12355 + }, + { + "epoch": 0.9971753692195948, + "grad_norm": 0.7292604446411133, + "learning_rate": 6.442372140606145e-05, + "loss": 2.4934, + "step": 12356 + }, + { + "epoch": 0.9972560729561779, + "grad_norm": 0.6686378121376038, + "learning_rate": 6.440896778912783e-05, + "loss": 2.5076, + "step": 12357 + }, + { + "epoch": 0.9973367766927609, + "grad_norm": 0.7194764018058777, + "learning_rate": 6.439421505921576e-05, + "loss": 2.4958, + "step": 12358 + }, + { + "epoch": 0.9974174804293439, + "grad_norm": 0.662467360496521, + "learning_rate": 6.437946321669296e-05, + "loss": 2.5202, + "step": 12359 + }, + { + "epoch": 0.9974981841659268, + "grad_norm": 0.7222515940666199, + "learning_rate": 6.436471226192703e-05, + "loss": 2.5058, + "step": 12360 + }, + { + "epoch": 0.9975788879025099, + "grad_norm": 0.6354855895042419, + "learning_rate": 6.434996219528562e-05, + "loss": 2.4849, + "step": 12361 + }, + { + "epoch": 0.9976595916390929, + "grad_norm": 0.7689539790153503, + "learning_rate": 6.433521301713636e-05, + "loss": 2.4959, + "step": 12362 + }, + { + "epoch": 0.9977402953756759, + "grad_norm": 0.6894338130950928, + "learning_rate": 6.43204647278468e-05, + "loss": 2.5098, + "step": 12363 + }, + { + "epoch": 0.9978209991122589, + "grad_norm": 0.7694165110588074, + "learning_rate": 6.430571732778451e-05, + "loss": 2.513, + "step": 12364 + }, + { + "epoch": 0.9979017028488419, + "grad_norm": 0.6512044668197632, + "learning_rate": 6.42909708173171e-05, + "loss": 2.4785, + "step": 12365 + }, + { + "epoch": 0.9979824065854249, + "grad_norm": 0.6605672836303711, + "learning_rate": 6.427622519681201e-05, + "loss": 2.4804, + "step": 12366 + }, + { + "epoch": 0.9980631103220079, + "grad_norm": 0.7123624086380005, + "learning_rate": 6.426148046663677e-05, + "loss": 2.4854, + "step": 12367 + }, + { + "epoch": 0.9981438140585909, + "grad_norm": 0.662645697593689, + "learning_rate": 6.424673662715886e-05, + "loss": 2.5314, + "step": 12368 + }, + { + "epoch": 0.998224517795174, + "grad_norm": 0.6482149362564087, + "learning_rate": 6.423199367874573e-05, + "loss": 2.4492, + "step": 12369 + }, + { + "epoch": 0.9983052215317569, + "grad_norm": 0.6545752286911011, + "learning_rate": 6.421725162176482e-05, + "loss": 2.5042, + "step": 12370 + }, + { + "epoch": 0.9983859252683399, + "grad_norm": 0.6698874235153198, + "learning_rate": 6.420251045658353e-05, + "loss": 2.4523, + "step": 12371 + }, + { + "epoch": 0.9984666290049229, + "grad_norm": 0.6961477398872375, + "learning_rate": 6.418777018356929e-05, + "loss": 2.556, + "step": 12372 + }, + { + "epoch": 0.998547332741506, + "grad_norm": 0.67090904712677, + "learning_rate": 6.41730308030894e-05, + "loss": 2.5237, + "step": 12373 + }, + { + "epoch": 0.998628036478089, + "grad_norm": 0.6828685402870178, + "learning_rate": 6.415829231551124e-05, + "loss": 2.453, + "step": 12374 + }, + { + "epoch": 0.9987087402146719, + "grad_norm": 0.6699565649032593, + "learning_rate": 6.414355472120213e-05, + "loss": 2.4632, + "step": 12375 + }, + { + "epoch": 0.9987894439512549, + "grad_norm": 0.6918730735778809, + "learning_rate": 6.412881802052936e-05, + "loss": 2.4532, + "step": 12376 + }, + { + "epoch": 0.998870147687838, + "grad_norm": 0.7222442030906677, + "learning_rate": 6.411408221386021e-05, + "loss": 2.5113, + "step": 12377 + }, + { + "epoch": 0.998950851424421, + "grad_norm": 0.7479627132415771, + "learning_rate": 6.409934730156195e-05, + "loss": 2.4857, + "step": 12378 + }, + { + "epoch": 0.999031555161004, + "grad_norm": 0.6552882194519043, + "learning_rate": 6.40846132840018e-05, + "loss": 2.4816, + "step": 12379 + }, + { + "epoch": 0.9991122588975869, + "grad_norm": 0.5990073084831238, + "learning_rate": 6.406988016154694e-05, + "loss": 2.4753, + "step": 12380 + }, + { + "epoch": 0.99919296263417, + "grad_norm": 0.6671901941299438, + "learning_rate": 6.405514793456465e-05, + "loss": 2.5298, + "step": 12381 + }, + { + "epoch": 0.999273666370753, + "grad_norm": 0.6630427241325378, + "learning_rate": 6.4040416603422e-05, + "loss": 2.485, + "step": 12382 + }, + { + "epoch": 0.999354370107336, + "grad_norm": 0.6873636841773987, + "learning_rate": 6.402568616848614e-05, + "loss": 2.4902, + "step": 12383 + }, + { + "epoch": 0.9994350738439189, + "grad_norm": 0.6912413239479065, + "learning_rate": 6.401095663012424e-05, + "loss": 2.5339, + "step": 12384 + }, + { + "epoch": 0.999515777580502, + "grad_norm": 0.6491912603378296, + "learning_rate": 6.39962279887034e-05, + "loss": 2.5367, + "step": 12385 + }, + { + "epoch": 0.999596481317085, + "grad_norm": 0.6668288111686707, + "learning_rate": 6.398150024459065e-05, + "loss": 2.5294, + "step": 12386 + }, + { + "epoch": 0.999677185053668, + "grad_norm": 0.6603856086730957, + "learning_rate": 6.396677339815306e-05, + "loss": 2.4378, + "step": 12387 + }, + { + "epoch": 0.999757888790251, + "grad_norm": 0.6461218595504761, + "learning_rate": 6.395204744975772e-05, + "loss": 2.4835, + "step": 12388 + }, + { + "epoch": 0.999838592526834, + "grad_norm": 0.6621688604354858, + "learning_rate": 6.39373223997715e-05, + "loss": 2.4834, + "step": 12389 + }, + { + "epoch": 0.999919296263417, + "grad_norm": 0.6758724451065063, + "learning_rate": 6.392259824856153e-05, + "loss": 2.4549, + "step": 12390 + }, + { + "epoch": 1.0, + "grad_norm": 1.1304112672805786, + "learning_rate": 6.390787499649473e-05, + "loss": 2.5547, + "step": 12391 + }, + { + "epoch": 1.000080703736583, + "grad_norm": 0.6919478178024292, + "learning_rate": 6.389315264393801e-05, + "loss": 2.47, + "step": 12392 + }, + { + "epoch": 1.000161407473166, + "grad_norm": 0.6916815638542175, + "learning_rate": 6.38784311912583e-05, + "loss": 2.4636, + "step": 12393 + }, + { + "epoch": 1.000242111209749, + "grad_norm": 0.6627040505409241, + "learning_rate": 6.386371063882252e-05, + "loss": 2.5094, + "step": 12394 + }, + { + "epoch": 1.000322814946332, + "grad_norm": 0.6408648490905762, + "learning_rate": 6.384899098699754e-05, + "loss": 2.426, + "step": 12395 + }, + { + "epoch": 1.000403518682915, + "grad_norm": 0.70432448387146, + "learning_rate": 6.38342722361502e-05, + "loss": 2.4861, + "step": 12396 + }, + { + "epoch": 1.000484222419498, + "grad_norm": 0.7115964889526367, + "learning_rate": 6.381955438664735e-05, + "loss": 2.4824, + "step": 12397 + }, + { + "epoch": 1.000564926156081, + "grad_norm": 0.6547040939331055, + "learning_rate": 6.380483743885574e-05, + "loss": 2.488, + "step": 12398 + }, + { + "epoch": 1.000645629892664, + "grad_norm": 0.6916625499725342, + "learning_rate": 6.379012139314223e-05, + "loss": 2.4864, + "step": 12399 + }, + { + "epoch": 1.0007263336292471, + "grad_norm": 0.6311133503913879, + "learning_rate": 6.377540624987352e-05, + "loss": 2.4672, + "step": 12400 + }, + { + "epoch": 1.00080703736583, + "grad_norm": 0.7115580439567566, + "learning_rate": 6.376069200941642e-05, + "loss": 2.4359, + "step": 12401 + }, + { + "epoch": 1.000887741102413, + "grad_norm": 0.6734051704406738, + "learning_rate": 6.374597867213756e-05, + "loss": 2.4896, + "step": 12402 + }, + { + "epoch": 1.000968444838996, + "grad_norm": 0.6910715699195862, + "learning_rate": 6.373126623840368e-05, + "loss": 2.4502, + "step": 12403 + }, + { + "epoch": 1.001049148575579, + "grad_norm": 0.6807514429092407, + "learning_rate": 6.37165547085815e-05, + "loss": 2.4791, + "step": 12404 + }, + { + "epoch": 1.0011298523121621, + "grad_norm": 0.679350733757019, + "learning_rate": 6.370184408303759e-05, + "loss": 2.4758, + "step": 12405 + }, + { + "epoch": 1.001210556048745, + "grad_norm": 0.6516300439834595, + "learning_rate": 6.36871343621386e-05, + "loss": 2.4338, + "step": 12406 + }, + { + "epoch": 1.001291259785328, + "grad_norm": 0.7033620476722717, + "learning_rate": 6.367242554625119e-05, + "loss": 2.429, + "step": 12407 + }, + { + "epoch": 1.0013719635219112, + "grad_norm": 0.6750274896621704, + "learning_rate": 6.365771763574186e-05, + "loss": 2.4283, + "step": 12408 + }, + { + "epoch": 1.001452667258494, + "grad_norm": 0.7188721895217896, + "learning_rate": 6.364301063097722e-05, + "loss": 2.4509, + "step": 12409 + }, + { + "epoch": 1.001533370995077, + "grad_norm": 0.6936308741569519, + "learning_rate": 6.362830453232379e-05, + "loss": 2.4469, + "step": 12410 + }, + { + "epoch": 1.00161407473166, + "grad_norm": 0.673060953617096, + "learning_rate": 6.361359934014808e-05, + "loss": 2.4444, + "step": 12411 + }, + { + "epoch": 1.001694778468243, + "grad_norm": 0.7465113997459412, + "learning_rate": 6.359889505481658e-05, + "loss": 2.4376, + "step": 12412 + }, + { + "epoch": 1.0017754822048262, + "grad_norm": 0.7180366516113281, + "learning_rate": 6.358419167669582e-05, + "loss": 2.4223, + "step": 12413 + }, + { + "epoch": 1.001856185941409, + "grad_norm": 0.6582302451133728, + "learning_rate": 6.356948920615214e-05, + "loss": 2.4723, + "step": 12414 + }, + { + "epoch": 1.001936889677992, + "grad_norm": 0.6452654600143433, + "learning_rate": 6.3554787643552e-05, + "loss": 2.4609, + "step": 12415 + }, + { + "epoch": 1.0020175934145752, + "grad_norm": 0.7170321345329285, + "learning_rate": 6.354008698926185e-05, + "loss": 2.5377, + "step": 12416 + }, + { + "epoch": 1.002098297151158, + "grad_norm": 0.6483680605888367, + "learning_rate": 6.352538724364809e-05, + "loss": 2.4349, + "step": 12417 + }, + { + "epoch": 1.0021790008877411, + "grad_norm": 0.6567494869232178, + "learning_rate": 6.351068840707697e-05, + "loss": 2.4421, + "step": 12418 + }, + { + "epoch": 1.002259704624324, + "grad_norm": 0.7498565912246704, + "learning_rate": 6.349599047991488e-05, + "loss": 2.4212, + "step": 12419 + }, + { + "epoch": 1.002340408360907, + "grad_norm": 0.6894906759262085, + "learning_rate": 6.348129346252816e-05, + "loss": 2.4356, + "step": 12420 + }, + { + "epoch": 1.0024211120974902, + "grad_norm": 0.657361626625061, + "learning_rate": 6.346659735528304e-05, + "loss": 2.4164, + "step": 12421 + }, + { + "epoch": 1.002501815834073, + "grad_norm": 0.6369211673736572, + "learning_rate": 6.345190215854581e-05, + "loss": 2.4229, + "step": 12422 + }, + { + "epoch": 1.0025825195706561, + "grad_norm": 0.7033721208572388, + "learning_rate": 6.343720787268277e-05, + "loss": 2.5052, + "step": 12423 + }, + { + "epoch": 1.0026632233072392, + "grad_norm": 0.7125518918037415, + "learning_rate": 6.342251449806003e-05, + "loss": 2.514, + "step": 12424 + }, + { + "epoch": 1.002743927043822, + "grad_norm": 0.7355595827102661, + "learning_rate": 6.340782203504385e-05, + "loss": 2.4459, + "step": 12425 + }, + { + "epoch": 1.0028246307804052, + "grad_norm": 0.7244594693183899, + "learning_rate": 6.339313048400042e-05, + "loss": 2.452, + "step": 12426 + }, + { + "epoch": 1.002905334516988, + "grad_norm": 0.7112728357315063, + "learning_rate": 6.337843984529585e-05, + "loss": 2.4951, + "step": 12427 + }, + { + "epoch": 1.0029860382535711, + "grad_norm": 0.7235615849494934, + "learning_rate": 6.336375011929628e-05, + "loss": 2.4697, + "step": 12428 + }, + { + "epoch": 1.0030667419901542, + "grad_norm": 0.653865396976471, + "learning_rate": 6.334906130636784e-05, + "loss": 2.4804, + "step": 12429 + }, + { + "epoch": 1.003147445726737, + "grad_norm": 0.7845149636268616, + "learning_rate": 6.33343734068766e-05, + "loss": 2.5415, + "step": 12430 + }, + { + "epoch": 1.0032281494633202, + "grad_norm": 0.7356342077255249, + "learning_rate": 6.33196864211886e-05, + "loss": 2.5321, + "step": 12431 + }, + { + "epoch": 1.0033088531999033, + "grad_norm": 0.6828265190124512, + "learning_rate": 6.330500034966991e-05, + "loss": 2.3849, + "step": 12432 + }, + { + "epoch": 1.0033895569364861, + "grad_norm": 0.7226579189300537, + "learning_rate": 6.329031519268658e-05, + "loss": 2.512, + "step": 12433 + }, + { + "epoch": 1.0034702606730692, + "grad_norm": 0.6490235924720764, + "learning_rate": 6.327563095060449e-05, + "loss": 2.487, + "step": 12434 + }, + { + "epoch": 1.003550964409652, + "grad_norm": 0.6889309883117676, + "learning_rate": 6.326094762378969e-05, + "loss": 2.4677, + "step": 12435 + }, + { + "epoch": 1.0036316681462352, + "grad_norm": 0.695854127407074, + "learning_rate": 6.324626521260815e-05, + "loss": 2.4362, + "step": 12436 + }, + { + "epoch": 1.0037123718828183, + "grad_norm": 0.7045256495475769, + "learning_rate": 6.32315837174257e-05, + "loss": 2.4307, + "step": 12437 + }, + { + "epoch": 1.0037930756194011, + "grad_norm": 0.662604570388794, + "learning_rate": 6.321690313860833e-05, + "loss": 2.4271, + "step": 12438 + }, + { + "epoch": 1.0038737793559842, + "grad_norm": 0.7682240009307861, + "learning_rate": 6.320222347652191e-05, + "loss": 2.4617, + "step": 12439 + }, + { + "epoch": 1.0039544830925673, + "grad_norm": 0.6599584817886353, + "learning_rate": 6.318754473153221e-05, + "loss": 2.405, + "step": 12440 + }, + { + "epoch": 1.0040351868291502, + "grad_norm": 0.7423116564750671, + "learning_rate": 6.317286690400515e-05, + "loss": 2.5496, + "step": 12441 + }, + { + "epoch": 1.0041158905657332, + "grad_norm": 0.6928953528404236, + "learning_rate": 6.315818999430654e-05, + "loss": 2.4265, + "step": 12442 + }, + { + "epoch": 1.0041965943023161, + "grad_norm": 0.699990451335907, + "learning_rate": 6.314351400280211e-05, + "loss": 2.4747, + "step": 12443 + }, + { + "epoch": 1.0042772980388992, + "grad_norm": 0.673384964466095, + "learning_rate": 6.312883892985765e-05, + "loss": 2.4891, + "step": 12444 + }, + { + "epoch": 1.0043580017754823, + "grad_norm": 0.6668596863746643, + "learning_rate": 6.311416477583893e-05, + "loss": 2.4312, + "step": 12445 + }, + { + "epoch": 1.0044387055120652, + "grad_norm": 0.6931218504905701, + "learning_rate": 6.309949154111163e-05, + "loss": 2.4907, + "step": 12446 + }, + { + "epoch": 1.0045194092486482, + "grad_norm": 0.687683641910553, + "learning_rate": 6.308481922604146e-05, + "loss": 2.4302, + "step": 12447 + }, + { + "epoch": 1.004600112985231, + "grad_norm": 0.6887302398681641, + "learning_rate": 6.30701478309941e-05, + "loss": 2.4749, + "step": 12448 + }, + { + "epoch": 1.0046808167218142, + "grad_norm": 0.6713404655456543, + "learning_rate": 6.305547735633522e-05, + "loss": 2.5046, + "step": 12449 + }, + { + "epoch": 1.0047615204583973, + "grad_norm": 0.7147336006164551, + "learning_rate": 6.304080780243038e-05, + "loss": 2.4578, + "step": 12450 + }, + { + "epoch": 1.0048422241949801, + "grad_norm": 0.87425297498703, + "learning_rate": 6.30261391696452e-05, + "loss": 2.4487, + "step": 12451 + }, + { + "epoch": 1.0049229279315632, + "grad_norm": 0.6641440987586975, + "learning_rate": 6.301147145834534e-05, + "loss": 2.4657, + "step": 12452 + }, + { + "epoch": 1.0050036316681463, + "grad_norm": 0.7311998009681702, + "learning_rate": 6.299680466889626e-05, + "loss": 2.4784, + "step": 12453 + }, + { + "epoch": 1.0050843354047292, + "grad_norm": 0.6722697615623474, + "learning_rate": 6.298213880166354e-05, + "loss": 2.4653, + "step": 12454 + }, + { + "epoch": 1.0051650391413123, + "grad_norm": 0.6886328458786011, + "learning_rate": 6.29674738570127e-05, + "loss": 2.3949, + "step": 12455 + }, + { + "epoch": 1.0052457428778951, + "grad_norm": 0.684688925743103, + "learning_rate": 6.295280983530921e-05, + "loss": 2.4334, + "step": 12456 + }, + { + "epoch": 1.0053264466144782, + "grad_norm": 0.7436798214912415, + "learning_rate": 6.293814673691853e-05, + "loss": 2.5316, + "step": 12457 + }, + { + "epoch": 1.0054071503510613, + "grad_norm": 0.7401304244995117, + "learning_rate": 6.292348456220615e-05, + "loss": 2.4556, + "step": 12458 + }, + { + "epoch": 1.0054878540876442, + "grad_norm": 0.7330329418182373, + "learning_rate": 6.290882331153742e-05, + "loss": 2.4321, + "step": 12459 + }, + { + "epoch": 1.0055685578242273, + "grad_norm": 0.8005052208900452, + "learning_rate": 6.289416298527776e-05, + "loss": 2.415, + "step": 12460 + }, + { + "epoch": 1.0056492615608104, + "grad_norm": 0.8047310709953308, + "learning_rate": 6.28795035837926e-05, + "loss": 2.4144, + "step": 12461 + }, + { + "epoch": 1.0057299652973932, + "grad_norm": 0.7384032011032104, + "learning_rate": 6.28648451074472e-05, + "loss": 2.5237, + "step": 12462 + }, + { + "epoch": 1.0058106690339763, + "grad_norm": 0.7240314483642578, + "learning_rate": 6.285018755660695e-05, + "loss": 2.4894, + "step": 12463 + }, + { + "epoch": 1.0058913727705592, + "grad_norm": 0.6901080012321472, + "learning_rate": 6.283553093163712e-05, + "loss": 2.4244, + "step": 12464 + }, + { + "epoch": 1.0059720765071423, + "grad_norm": 0.6572268605232239, + "learning_rate": 6.282087523290304e-05, + "loss": 2.456, + "step": 12465 + }, + { + "epoch": 1.0060527802437254, + "grad_norm": 0.7207481861114502, + "learning_rate": 6.28062204607699e-05, + "loss": 2.4153, + "step": 12466 + }, + { + "epoch": 1.0061334839803082, + "grad_norm": 0.6901980042457581, + "learning_rate": 6.279156661560299e-05, + "loss": 2.4776, + "step": 12467 + }, + { + "epoch": 1.0062141877168913, + "grad_norm": 0.7003545761108398, + "learning_rate": 6.277691369776752e-05, + "loss": 2.4206, + "step": 12468 + }, + { + "epoch": 1.0062948914534744, + "grad_norm": 0.6978366374969482, + "learning_rate": 6.276226170762865e-05, + "loss": 2.3866, + "step": 12469 + }, + { + "epoch": 1.0063755951900573, + "grad_norm": 0.6763097643852234, + "learning_rate": 6.274761064555154e-05, + "loss": 2.5439, + "step": 12470 + }, + { + "epoch": 1.0064562989266403, + "grad_norm": 0.7146836519241333, + "learning_rate": 6.273296051190139e-05, + "loss": 2.5486, + "step": 12471 + }, + { + "epoch": 1.0065370026632232, + "grad_norm": 0.7448136806488037, + "learning_rate": 6.271831130704326e-05, + "loss": 2.4539, + "step": 12472 + }, + { + "epoch": 1.0066177063998063, + "grad_norm": 0.6918472051620483, + "learning_rate": 6.270366303134226e-05, + "loss": 2.4756, + "step": 12473 + }, + { + "epoch": 1.0066984101363894, + "grad_norm": 0.7067514657974243, + "learning_rate": 6.26890156851635e-05, + "loss": 2.4925, + "step": 12474 + }, + { + "epoch": 1.0067791138729723, + "grad_norm": 0.6517517566680908, + "learning_rate": 6.267436926887197e-05, + "loss": 2.4339, + "step": 12475 + }, + { + "epoch": 1.0068598176095553, + "grad_norm": 0.673367977142334, + "learning_rate": 6.265972378283274e-05, + "loss": 2.416, + "step": 12476 + }, + { + "epoch": 1.0069405213461384, + "grad_norm": 0.7190212607383728, + "learning_rate": 6.26450792274108e-05, + "loss": 2.4822, + "step": 12477 + }, + { + "epoch": 1.0070212250827213, + "grad_norm": 0.7568029165267944, + "learning_rate": 6.263043560297112e-05, + "loss": 2.4607, + "step": 12478 + }, + { + "epoch": 1.0071019288193044, + "grad_norm": 0.6860609650611877, + "learning_rate": 6.261579290987866e-05, + "loss": 2.4429, + "step": 12479 + }, + { + "epoch": 1.0071826325558872, + "grad_norm": 0.7066059112548828, + "learning_rate": 6.260115114849839e-05, + "loss": 2.5504, + "step": 12480 + }, + { + "epoch": 1.0072633362924703, + "grad_norm": 0.6857946515083313, + "learning_rate": 6.25865103191952e-05, + "loss": 2.4776, + "step": 12481 + }, + { + "epoch": 1.0073440400290534, + "grad_norm": 0.6879859566688538, + "learning_rate": 6.257187042233396e-05, + "loss": 2.3651, + "step": 12482 + }, + { + "epoch": 1.0074247437656363, + "grad_norm": 0.6900867223739624, + "learning_rate": 6.255723145827954e-05, + "loss": 2.4644, + "step": 12483 + }, + { + "epoch": 1.0075054475022194, + "grad_norm": 0.7144716382026672, + "learning_rate": 6.254259342739683e-05, + "loss": 2.4219, + "step": 12484 + }, + { + "epoch": 1.0075861512388025, + "grad_norm": 0.674619197845459, + "learning_rate": 6.252795633005056e-05, + "loss": 2.5038, + "step": 12485 + }, + { + "epoch": 1.0076668549753853, + "grad_norm": 0.7036965489387512, + "learning_rate": 6.251332016660558e-05, + "loss": 2.4784, + "step": 12486 + }, + { + "epoch": 1.0077475587119684, + "grad_norm": 0.7046369910240173, + "learning_rate": 6.249868493742668e-05, + "loss": 2.514, + "step": 12487 + }, + { + "epoch": 1.0078282624485513, + "grad_norm": 0.6933087110519409, + "learning_rate": 6.248405064287854e-05, + "loss": 2.4855, + "step": 12488 + }, + { + "epoch": 1.0079089661851344, + "grad_norm": 0.7210546731948853, + "learning_rate": 6.246941728332594e-05, + "loss": 2.5101, + "step": 12489 + }, + { + "epoch": 1.0079896699217175, + "grad_norm": 0.6738288402557373, + "learning_rate": 6.245478485913361e-05, + "loss": 2.4891, + "step": 12490 + }, + { + "epoch": 1.0080703736583003, + "grad_norm": 0.7023273706436157, + "learning_rate": 6.244015337066611e-05, + "loss": 2.4977, + "step": 12491 + }, + { + "epoch": 1.0081510773948834, + "grad_norm": 0.6761355996131897, + "learning_rate": 6.24255228182882e-05, + "loss": 2.4948, + "step": 12492 + }, + { + "epoch": 1.0082317811314665, + "grad_norm": 0.6427976489067078, + "learning_rate": 6.241089320236448e-05, + "loss": 2.466, + "step": 12493 + }, + { + "epoch": 1.0083124848680494, + "grad_norm": 0.6907719969749451, + "learning_rate": 6.23962645232596e-05, + "loss": 2.437, + "step": 12494 + }, + { + "epoch": 1.0083931886046325, + "grad_norm": 0.709032416343689, + "learning_rate": 6.238163678133807e-05, + "loss": 2.4298, + "step": 12495 + }, + { + "epoch": 1.0084738923412153, + "grad_norm": 0.7395734786987305, + "learning_rate": 6.236700997696448e-05, + "loss": 2.4502, + "step": 12496 + }, + { + "epoch": 1.0085545960777984, + "grad_norm": 0.6535435914993286, + "learning_rate": 6.23523841105034e-05, + "loss": 2.4494, + "step": 12497 + }, + { + "epoch": 1.0086352998143815, + "grad_norm": 0.6597761511802673, + "learning_rate": 6.23377591823193e-05, + "loss": 2.4377, + "step": 12498 + }, + { + "epoch": 1.0087160035509644, + "grad_norm": 0.6610515713691711, + "learning_rate": 6.232313519277668e-05, + "loss": 2.4328, + "step": 12499 + }, + { + "epoch": 1.0087967072875474, + "grad_norm": 0.6785424947738647, + "learning_rate": 6.230851214224009e-05, + "loss": 2.457, + "step": 12500 + }, + { + "epoch": 1.0088774110241303, + "grad_norm": 0.6939748525619507, + "learning_rate": 6.229389003107383e-05, + "loss": 2.383, + "step": 12501 + }, + { + "epoch": 1.0089581147607134, + "grad_norm": 0.7592256665229797, + "learning_rate": 6.22792688596424e-05, + "loss": 2.4665, + "step": 12502 + }, + { + "epoch": 1.0090388184972965, + "grad_norm": 0.6751298308372498, + "learning_rate": 6.226464862831023e-05, + "loss": 2.491, + "step": 12503 + }, + { + "epoch": 1.0091195222338794, + "grad_norm": 0.682771623134613, + "learning_rate": 6.225002933744164e-05, + "loss": 2.4275, + "step": 12504 + }, + { + "epoch": 1.0092002259704624, + "grad_norm": 0.7314651608467102, + "learning_rate": 6.223541098740098e-05, + "loss": 2.4489, + "step": 12505 + }, + { + "epoch": 1.0092809297070455, + "grad_norm": 0.7132120132446289, + "learning_rate": 6.222079357855261e-05, + "loss": 2.4819, + "step": 12506 + }, + { + "epoch": 1.0093616334436284, + "grad_norm": 0.6571424007415771, + "learning_rate": 6.220617711126082e-05, + "loss": 2.455, + "step": 12507 + }, + { + "epoch": 1.0094423371802115, + "grad_norm": 0.7675301432609558, + "learning_rate": 6.21915615858899e-05, + "loss": 2.5282, + "step": 12508 + }, + { + "epoch": 1.0095230409167943, + "grad_norm": 0.6907868385314941, + "learning_rate": 6.217694700280408e-05, + "loss": 2.4639, + "step": 12509 + }, + { + "epoch": 1.0096037446533774, + "grad_norm": 0.7223815321922302, + "learning_rate": 6.216233336236764e-05, + "loss": 2.4682, + "step": 12510 + }, + { + "epoch": 1.0096844483899605, + "grad_norm": 0.7325109839439392, + "learning_rate": 6.214772066494474e-05, + "loss": 2.4591, + "step": 12511 + }, + { + "epoch": 1.0097651521265434, + "grad_norm": 0.6589400768280029, + "learning_rate": 6.213310891089957e-05, + "loss": 2.4883, + "step": 12512 + }, + { + "epoch": 1.0098458558631265, + "grad_norm": 0.6692262291908264, + "learning_rate": 6.211849810059635e-05, + "loss": 2.4635, + "step": 12513 + }, + { + "epoch": 1.0099265595997096, + "grad_norm": 0.7352520823478699, + "learning_rate": 6.210388823439914e-05, + "loss": 2.4743, + "step": 12514 + }, + { + "epoch": 1.0100072633362924, + "grad_norm": 0.6631996035575867, + "learning_rate": 6.208927931267212e-05, + "loss": 2.4848, + "step": 12515 + }, + { + "epoch": 1.0100879670728755, + "grad_norm": 0.6985767483711243, + "learning_rate": 6.207467133577937e-05, + "loss": 2.5044, + "step": 12516 + }, + { + "epoch": 1.0101686708094584, + "grad_norm": 0.665635347366333, + "learning_rate": 6.206006430408494e-05, + "loss": 2.4718, + "step": 12517 + }, + { + "epoch": 1.0102493745460415, + "grad_norm": 0.6859133243560791, + "learning_rate": 6.204545821795286e-05, + "loss": 2.4702, + "step": 12518 + }, + { + "epoch": 1.0103300782826246, + "grad_norm": 0.6578841805458069, + "learning_rate": 6.203085307774722e-05, + "loss": 2.4614, + "step": 12519 + }, + { + "epoch": 1.0104107820192074, + "grad_norm": 0.717523455619812, + "learning_rate": 6.201624888383194e-05, + "loss": 2.4412, + "step": 12520 + }, + { + "epoch": 1.0104914857557905, + "grad_norm": 0.7333831787109375, + "learning_rate": 6.200164563657103e-05, + "loss": 2.4157, + "step": 12521 + }, + { + "epoch": 1.0105721894923736, + "grad_norm": 0.6968720555305481, + "learning_rate": 6.198704333632845e-05, + "loss": 2.4556, + "step": 12522 + }, + { + "epoch": 1.0106528932289565, + "grad_norm": 0.6533070802688599, + "learning_rate": 6.19724419834681e-05, + "loss": 2.43, + "step": 12523 + }, + { + "epoch": 1.0107335969655395, + "grad_norm": 0.7341824769973755, + "learning_rate": 6.195784157835391e-05, + "loss": 2.5326, + "step": 12524 + }, + { + "epoch": 1.0108143007021224, + "grad_norm": 0.752912163734436, + "learning_rate": 6.194324212134974e-05, + "loss": 2.4282, + "step": 12525 + }, + { + "epoch": 1.0108950044387055, + "grad_norm": 0.6538611650466919, + "learning_rate": 6.192864361281951e-05, + "loss": 2.4135, + "step": 12526 + }, + { + "epoch": 1.0109757081752886, + "grad_norm": 0.6931454539299011, + "learning_rate": 6.191404605312695e-05, + "loss": 2.5097, + "step": 12527 + }, + { + "epoch": 1.0110564119118715, + "grad_norm": 0.6317688822746277, + "learning_rate": 6.18994494426359e-05, + "loss": 2.4977, + "step": 12528 + }, + { + "epoch": 1.0111371156484545, + "grad_norm": 0.6793715953826904, + "learning_rate": 6.188485378171024e-05, + "loss": 2.4619, + "step": 12529 + }, + { + "epoch": 1.0112178193850376, + "grad_norm": 0.6696654558181763, + "learning_rate": 6.187025907071361e-05, + "loss": 2.4658, + "step": 12530 + }, + { + "epoch": 1.0112985231216205, + "grad_norm": 0.6788807511329651, + "learning_rate": 6.185566531000979e-05, + "loss": 2.4793, + "step": 12531 + }, + { + "epoch": 1.0113792268582036, + "grad_norm": 0.6933971643447876, + "learning_rate": 6.184107249996253e-05, + "loss": 2.4772, + "step": 12532 + }, + { + "epoch": 1.0114599305947864, + "grad_norm": 0.6866000294685364, + "learning_rate": 6.182648064093546e-05, + "loss": 2.428, + "step": 12533 + }, + { + "epoch": 1.0115406343313695, + "grad_norm": 0.7013841271400452, + "learning_rate": 6.181188973329229e-05, + "loss": 2.5273, + "step": 12534 + }, + { + "epoch": 1.0116213380679526, + "grad_norm": 0.6569108963012695, + "learning_rate": 6.179729977739669e-05, + "loss": 2.4125, + "step": 12535 + }, + { + "epoch": 1.0117020418045355, + "grad_norm": 0.7503486275672913, + "learning_rate": 6.17827107736122e-05, + "loss": 2.4385, + "step": 12536 + }, + { + "epoch": 1.0117827455411186, + "grad_norm": 0.6757314205169678, + "learning_rate": 6.176812272230246e-05, + "loss": 2.4364, + "step": 12537 + }, + { + "epoch": 1.0118634492777017, + "grad_norm": 0.6567254662513733, + "learning_rate": 6.175353562383106e-05, + "loss": 2.4992, + "step": 12538 + }, + { + "epoch": 1.0119441530142845, + "grad_norm": 0.7564988732337952, + "learning_rate": 6.17389494785615e-05, + "loss": 2.4777, + "step": 12539 + }, + { + "epoch": 1.0120248567508676, + "grad_norm": 0.6972391605377197, + "learning_rate": 6.172436428685735e-05, + "loss": 2.5041, + "step": 12540 + }, + { + "epoch": 1.0121055604874505, + "grad_norm": 0.6861580610275269, + "learning_rate": 6.170978004908209e-05, + "loss": 2.4684, + "step": 12541 + }, + { + "epoch": 1.0121862642240336, + "grad_norm": 0.6621903777122498, + "learning_rate": 6.169519676559921e-05, + "loss": 2.4614, + "step": 12542 + }, + { + "epoch": 1.0122669679606167, + "grad_norm": 0.6879795789718628, + "learning_rate": 6.168061443677215e-05, + "loss": 2.4765, + "step": 12543 + }, + { + "epoch": 1.0123476716971995, + "grad_norm": 0.6361081004142761, + "learning_rate": 6.166603306296434e-05, + "loss": 2.4792, + "step": 12544 + }, + { + "epoch": 1.0124283754337826, + "grad_norm": 0.6660729050636292, + "learning_rate": 6.165145264453924e-05, + "loss": 2.489, + "step": 12545 + }, + { + "epoch": 1.0125090791703655, + "grad_norm": 0.6900594234466553, + "learning_rate": 6.163687318186015e-05, + "loss": 2.4543, + "step": 12546 + }, + { + "epoch": 1.0125897829069486, + "grad_norm": 0.7195869088172913, + "learning_rate": 6.162229467529046e-05, + "loss": 2.4137, + "step": 12547 + }, + { + "epoch": 1.0126704866435317, + "grad_norm": 0.7030326128005981, + "learning_rate": 6.16077171251935e-05, + "loss": 2.4657, + "step": 12548 + }, + { + "epoch": 1.0127511903801145, + "grad_norm": 0.6712052822113037, + "learning_rate": 6.15931405319326e-05, + "loss": 2.4718, + "step": 12549 + }, + { + "epoch": 1.0128318941166976, + "grad_norm": 0.7471029162406921, + "learning_rate": 6.157856489587102e-05, + "loss": 2.4705, + "step": 12550 + }, + { + "epoch": 1.0129125978532807, + "grad_norm": 0.6813762187957764, + "learning_rate": 6.15639902173721e-05, + "loss": 2.4479, + "step": 12551 + }, + { + "epoch": 1.0129933015898636, + "grad_norm": 0.6657249927520752, + "learning_rate": 6.154941649679894e-05, + "loss": 2.4911, + "step": 12552 + }, + { + "epoch": 1.0130740053264466, + "grad_norm": 0.6700132489204407, + "learning_rate": 6.153484373451483e-05, + "loss": 2.4962, + "step": 12553 + }, + { + "epoch": 1.0131547090630295, + "grad_norm": 0.7058695554733276, + "learning_rate": 6.152027193088302e-05, + "loss": 2.3935, + "step": 12554 + }, + { + "epoch": 1.0132354127996126, + "grad_norm": 0.7390396595001221, + "learning_rate": 6.150570108626658e-05, + "loss": 2.4454, + "step": 12555 + }, + { + "epoch": 1.0133161165361957, + "grad_norm": 0.7251414060592651, + "learning_rate": 6.149113120102869e-05, + "loss": 2.4146, + "step": 12556 + }, + { + "epoch": 1.0133968202727786, + "grad_norm": 0.8262537717819214, + "learning_rate": 6.14765622755325e-05, + "loss": 2.4638, + "step": 12557 + }, + { + "epoch": 1.0134775240093616, + "grad_norm": 0.7184064984321594, + "learning_rate": 6.146199431014106e-05, + "loss": 2.3958, + "step": 12558 + }, + { + "epoch": 1.0135582277459447, + "grad_norm": 0.7544865012168884, + "learning_rate": 6.144742730521746e-05, + "loss": 2.4662, + "step": 12559 + }, + { + "epoch": 1.0136389314825276, + "grad_norm": 0.6866207718849182, + "learning_rate": 6.143286126112475e-05, + "loss": 2.4951, + "step": 12560 + }, + { + "epoch": 1.0137196352191107, + "grad_norm": 0.6566087603569031, + "learning_rate": 6.1418296178226e-05, + "loss": 2.4002, + "step": 12561 + }, + { + "epoch": 1.0138003389556935, + "grad_norm": 0.6999008059501648, + "learning_rate": 6.140373205688411e-05, + "loss": 2.5306, + "step": 12562 + }, + { + "epoch": 1.0138810426922766, + "grad_norm": 0.6682353615760803, + "learning_rate": 6.138916889746212e-05, + "loss": 2.5565, + "step": 12563 + }, + { + "epoch": 1.0139617464288597, + "grad_norm": 0.7443362474441528, + "learning_rate": 6.137460670032298e-05, + "loss": 2.3958, + "step": 12564 + }, + { + "epoch": 1.0140424501654426, + "grad_norm": 0.6542403697967529, + "learning_rate": 6.136004546582958e-05, + "loss": 2.4394, + "step": 12565 + }, + { + "epoch": 1.0141231539020257, + "grad_norm": 0.6524317264556885, + "learning_rate": 6.134548519434488e-05, + "loss": 2.4979, + "step": 12566 + }, + { + "epoch": 1.0142038576386088, + "grad_norm": 0.6605600118637085, + "learning_rate": 6.133092588623174e-05, + "loss": 2.4827, + "step": 12567 + }, + { + "epoch": 1.0142845613751916, + "grad_norm": 0.7114397883415222, + "learning_rate": 6.1316367541853e-05, + "loss": 2.4799, + "step": 12568 + }, + { + "epoch": 1.0143652651117747, + "grad_norm": 0.6607296466827393, + "learning_rate": 6.130181016157148e-05, + "loss": 2.4991, + "step": 12569 + }, + { + "epoch": 1.0144459688483576, + "grad_norm": 0.6750844717025757, + "learning_rate": 6.128725374575005e-05, + "loss": 2.4451, + "step": 12570 + }, + { + "epoch": 1.0145266725849407, + "grad_norm": 0.6978901624679565, + "learning_rate": 6.127269829475141e-05, + "loss": 2.4608, + "step": 12571 + }, + { + "epoch": 1.0146073763215238, + "grad_norm": 0.676343560218811, + "learning_rate": 6.125814380893838e-05, + "loss": 2.4536, + "step": 12572 + }, + { + "epoch": 1.0146880800581066, + "grad_norm": 0.7082604765892029, + "learning_rate": 6.124359028867368e-05, + "loss": 2.45, + "step": 12573 + }, + { + "epoch": 1.0147687837946897, + "grad_norm": 0.7049853205680847, + "learning_rate": 6.122903773432003e-05, + "loss": 2.4378, + "step": 12574 + }, + { + "epoch": 1.0148494875312728, + "grad_norm": 0.6329593062400818, + "learning_rate": 6.121448614624009e-05, + "loss": 2.4386, + "step": 12575 + }, + { + "epoch": 1.0149301912678557, + "grad_norm": 0.7249468564987183, + "learning_rate": 6.119993552479655e-05, + "loss": 2.5191, + "step": 12576 + }, + { + "epoch": 1.0150108950044388, + "grad_norm": 0.7028193473815918, + "learning_rate": 6.118538587035206e-05, + "loss": 2.4376, + "step": 12577 + }, + { + "epoch": 1.0150915987410216, + "grad_norm": 0.697382926940918, + "learning_rate": 6.117083718326917e-05, + "loss": 2.4797, + "step": 12578 + }, + { + "epoch": 1.0151723024776047, + "grad_norm": 0.7386965155601501, + "learning_rate": 6.115628946391055e-05, + "loss": 2.4512, + "step": 12579 + }, + { + "epoch": 1.0152530062141878, + "grad_norm": 0.6614577174186707, + "learning_rate": 6.114174271263875e-05, + "loss": 2.4404, + "step": 12580 + }, + { + "epoch": 1.0153337099507707, + "grad_norm": 0.6927464604377747, + "learning_rate": 6.112719692981627e-05, + "loss": 2.47, + "step": 12581 + }, + { + "epoch": 1.0154144136873537, + "grad_norm": 0.7004262208938599, + "learning_rate": 6.111265211580566e-05, + "loss": 2.4212, + "step": 12582 + }, + { + "epoch": 1.0154951174239368, + "grad_norm": 0.71146559715271, + "learning_rate": 6.109810827096942e-05, + "loss": 2.4431, + "step": 12583 + }, + { + "epoch": 1.0155758211605197, + "grad_norm": 0.6857032775878906, + "learning_rate": 6.108356539567e-05, + "loss": 2.453, + "step": 12584 + }, + { + "epoch": 1.0156565248971028, + "grad_norm": 0.6976168155670166, + "learning_rate": 6.106902349026986e-05, + "loss": 2.4718, + "step": 12585 + }, + { + "epoch": 1.0157372286336857, + "grad_norm": 0.7158414125442505, + "learning_rate": 6.105448255513146e-05, + "loss": 2.425, + "step": 12586 + }, + { + "epoch": 1.0158179323702687, + "grad_norm": 0.6611737608909607, + "learning_rate": 6.103994259061714e-05, + "loss": 2.4563, + "step": 12587 + }, + { + "epoch": 1.0158986361068518, + "grad_norm": 0.7262980937957764, + "learning_rate": 6.102540359708926e-05, + "loss": 2.4538, + "step": 12588 + }, + { + "epoch": 1.0159793398434347, + "grad_norm": 0.7123451828956604, + "learning_rate": 6.10108655749102e-05, + "loss": 2.4677, + "step": 12589 + }, + { + "epoch": 1.0160600435800178, + "grad_norm": 0.7135589122772217, + "learning_rate": 6.099632852444235e-05, + "loss": 2.4312, + "step": 12590 + }, + { + "epoch": 1.0161407473166009, + "grad_norm": 0.6509461998939514, + "learning_rate": 6.09817924460479e-05, + "loss": 2.4716, + "step": 12591 + }, + { + "epoch": 1.0162214510531837, + "grad_norm": 0.8835915923118591, + "learning_rate": 6.096725734008919e-05, + "loss": 2.4817, + "step": 12592 + }, + { + "epoch": 1.0163021547897668, + "grad_norm": 0.7084136605262756, + "learning_rate": 6.095272320692846e-05, + "loss": 2.483, + "step": 12593 + }, + { + "epoch": 1.0163828585263497, + "grad_norm": 0.6866818070411682, + "learning_rate": 6.0938190046927934e-05, + "loss": 2.4838, + "step": 12594 + }, + { + "epoch": 1.0164635622629328, + "grad_norm": 0.7297510504722595, + "learning_rate": 6.0923657860449824e-05, + "loss": 2.4675, + "step": 12595 + }, + { + "epoch": 1.0165442659995159, + "grad_norm": 0.6735619306564331, + "learning_rate": 6.090912664785633e-05, + "loss": 2.444, + "step": 12596 + }, + { + "epoch": 1.0166249697360987, + "grad_norm": 0.7046451568603516, + "learning_rate": 6.0894596409509565e-05, + "loss": 2.4757, + "step": 12597 + }, + { + "epoch": 1.0167056734726818, + "grad_norm": 0.6646085977554321, + "learning_rate": 6.0880067145771656e-05, + "loss": 2.4772, + "step": 12598 + }, + { + "epoch": 1.0167863772092647, + "grad_norm": 0.7217094302177429, + "learning_rate": 6.086553885700478e-05, + "loss": 2.4589, + "step": 12599 + }, + { + "epoch": 1.0168670809458478, + "grad_norm": 0.647378146648407, + "learning_rate": 6.085101154357093e-05, + "loss": 2.4327, + "step": 12600 + }, + { + "epoch": 1.0169477846824309, + "grad_norm": 0.6907125115394592, + "learning_rate": 6.083648520583223e-05, + "loss": 2.467, + "step": 12601 + }, + { + "epoch": 1.0170284884190137, + "grad_norm": 0.690433919429779, + "learning_rate": 6.0821959844150687e-05, + "loss": 2.488, + "step": 12602 + }, + { + "epoch": 1.0171091921555968, + "grad_norm": 0.6528738737106323, + "learning_rate": 6.080743545888833e-05, + "loss": 2.5028, + "step": 12603 + }, + { + "epoch": 1.01718989589218, + "grad_norm": 0.6962323784828186, + "learning_rate": 6.079291205040711e-05, + "loss": 2.5381, + "step": 12604 + }, + { + "epoch": 1.0172705996287628, + "grad_norm": 0.7386075854301453, + "learning_rate": 6.077838961906902e-05, + "loss": 2.4445, + "step": 12605 + }, + { + "epoch": 1.0173513033653458, + "grad_norm": 0.7382189631462097, + "learning_rate": 6.0763868165236025e-05, + "loss": 2.4926, + "step": 12606 + }, + { + "epoch": 1.0174320071019287, + "grad_norm": 0.7291865944862366, + "learning_rate": 6.074934768926995e-05, + "loss": 2.4624, + "step": 12607 + }, + { + "epoch": 1.0175127108385118, + "grad_norm": 0.754843533039093, + "learning_rate": 6.073482819153275e-05, + "loss": 2.4291, + "step": 12608 + }, + { + "epoch": 1.017593414575095, + "grad_norm": 0.6827771663665771, + "learning_rate": 6.072030967238628e-05, + "loss": 2.453, + "step": 12609 + }, + { + "epoch": 1.0176741183116778, + "grad_norm": 0.7138541340827942, + "learning_rate": 6.0705792132192355e-05, + "loss": 2.5172, + "step": 12610 + }, + { + "epoch": 1.0177548220482608, + "grad_norm": 0.6539924740791321, + "learning_rate": 6.06912755713128e-05, + "loss": 2.4393, + "step": 12611 + }, + { + "epoch": 1.017835525784844, + "grad_norm": 0.7021273970603943, + "learning_rate": 6.067675999010945e-05, + "loss": 2.4519, + "step": 12612 + }, + { + "epoch": 1.0179162295214268, + "grad_norm": 0.7124225497245789, + "learning_rate": 6.0662245388944004e-05, + "loss": 2.4417, + "step": 12613 + }, + { + "epoch": 1.0179969332580099, + "grad_norm": 0.7214948534965515, + "learning_rate": 6.064773176817823e-05, + "loss": 2.4708, + "step": 12614 + }, + { + "epoch": 1.0180776369945927, + "grad_norm": 0.6738584041595459, + "learning_rate": 6.063321912817386e-05, + "loss": 2.4574, + "step": 12615 + }, + { + "epoch": 1.0181583407311758, + "grad_norm": 0.7215890884399414, + "learning_rate": 6.061870746929257e-05, + "loss": 2.4903, + "step": 12616 + }, + { + "epoch": 1.018239044467759, + "grad_norm": 0.6720155477523804, + "learning_rate": 6.0604196791896016e-05, + "loss": 2.4251, + "step": 12617 + }, + { + "epoch": 1.0183197482043418, + "grad_norm": 0.7046420574188232, + "learning_rate": 6.058968709634587e-05, + "loss": 2.446, + "step": 12618 + }, + { + "epoch": 1.0184004519409249, + "grad_norm": 0.6419540047645569, + "learning_rate": 6.0575178383003764e-05, + "loss": 2.4052, + "step": 12619 + }, + { + "epoch": 1.018481155677508, + "grad_norm": 0.6948695182800293, + "learning_rate": 6.0560670652231235e-05, + "loss": 2.5068, + "step": 12620 + }, + { + "epoch": 1.0185618594140908, + "grad_norm": 0.7274870276451111, + "learning_rate": 6.05461639043899e-05, + "loss": 2.4705, + "step": 12621 + }, + { + "epoch": 1.018642563150674, + "grad_norm": 0.6809766292572021, + "learning_rate": 6.053165813984134e-05, + "loss": 2.3767, + "step": 12622 + }, + { + "epoch": 1.0187232668872568, + "grad_norm": 0.6197625994682312, + "learning_rate": 6.0517153358946985e-05, + "loss": 2.4639, + "step": 12623 + }, + { + "epoch": 1.0188039706238399, + "grad_norm": 0.6613010764122009, + "learning_rate": 6.050264956206837e-05, + "loss": 2.5155, + "step": 12624 + }, + { + "epoch": 1.018884674360423, + "grad_norm": 0.7335553765296936, + "learning_rate": 6.0488146749567e-05, + "loss": 2.5344, + "step": 12625 + }, + { + "epoch": 1.0189653780970058, + "grad_norm": 0.7175146341323853, + "learning_rate": 6.047364492180428e-05, + "loss": 2.4972, + "step": 12626 + }, + { + "epoch": 1.019046081833589, + "grad_norm": 0.6825357675552368, + "learning_rate": 6.045914407914166e-05, + "loss": 2.4356, + "step": 12627 + }, + { + "epoch": 1.019126785570172, + "grad_norm": 0.6369633078575134, + "learning_rate": 6.044464422194056e-05, + "loss": 2.4692, + "step": 12628 + }, + { + "epoch": 1.0192074893067549, + "grad_norm": 0.7407073378562927, + "learning_rate": 6.0430145350562264e-05, + "loss": 2.4565, + "step": 12629 + }, + { + "epoch": 1.019288193043338, + "grad_norm": 0.6836552619934082, + "learning_rate": 6.041564746536821e-05, + "loss": 2.4357, + "step": 12630 + }, + { + "epoch": 1.0193688967799208, + "grad_norm": 0.6778741478919983, + "learning_rate": 6.040115056671972e-05, + "loss": 2.424, + "step": 12631 + }, + { + "epoch": 1.019449600516504, + "grad_norm": 0.6440724730491638, + "learning_rate": 6.0386654654978035e-05, + "loss": 2.4455, + "step": 12632 + }, + { + "epoch": 1.019530304253087, + "grad_norm": 0.681376039981842, + "learning_rate": 6.0372159730504476e-05, + "loss": 2.4562, + "step": 12633 + }, + { + "epoch": 1.0196110079896699, + "grad_norm": 0.657462477684021, + "learning_rate": 6.035766579366029e-05, + "loss": 2.4315, + "step": 12634 + }, + { + "epoch": 1.019691711726253, + "grad_norm": 0.6540380716323853, + "learning_rate": 6.0343172844806706e-05, + "loss": 2.4789, + "step": 12635 + }, + { + "epoch": 1.019772415462836, + "grad_norm": 0.711883008480072, + "learning_rate": 6.03286808843049e-05, + "loss": 2.4178, + "step": 12636 + }, + { + "epoch": 1.019853119199419, + "grad_norm": 0.6746736168861389, + "learning_rate": 6.031418991251607e-05, + "loss": 2.4351, + "step": 12637 + }, + { + "epoch": 1.019933822936002, + "grad_norm": 0.677237331867218, + "learning_rate": 6.02996999298014e-05, + "loss": 2.4335, + "step": 12638 + }, + { + "epoch": 1.0200145266725849, + "grad_norm": 0.6950497627258301, + "learning_rate": 6.0285210936521955e-05, + "loss": 2.5178, + "step": 12639 + }, + { + "epoch": 1.020095230409168, + "grad_norm": 0.6349243521690369, + "learning_rate": 6.027072293303885e-05, + "loss": 2.4405, + "step": 12640 + }, + { + "epoch": 1.020175934145751, + "grad_norm": 0.744276762008667, + "learning_rate": 6.0256235919713236e-05, + "loss": 2.5156, + "step": 12641 + }, + { + "epoch": 1.020256637882334, + "grad_norm": 0.7697997689247131, + "learning_rate": 6.0241749896906075e-05, + "loss": 2.4393, + "step": 12642 + }, + { + "epoch": 1.020337341618917, + "grad_norm": 0.7784204483032227, + "learning_rate": 6.022726486497844e-05, + "loss": 2.4565, + "step": 12643 + }, + { + "epoch": 1.0204180453555, + "grad_norm": 0.7434312701225281, + "learning_rate": 6.021278082429136e-05, + "loss": 2.4637, + "step": 12644 + }, + { + "epoch": 1.020498749092083, + "grad_norm": 0.7770118117332458, + "learning_rate": 6.019829777520575e-05, + "loss": 2.4998, + "step": 12645 + }, + { + "epoch": 1.020579452828666, + "grad_norm": 0.7021752595901489, + "learning_rate": 6.01838157180826e-05, + "loss": 2.4661, + "step": 12646 + }, + { + "epoch": 1.0206601565652489, + "grad_norm": 0.6812437176704407, + "learning_rate": 6.0169334653282895e-05, + "loss": 2.4611, + "step": 12647 + }, + { + "epoch": 1.020740860301832, + "grad_norm": 0.757724940776825, + "learning_rate": 6.0154854581167455e-05, + "loss": 2.4427, + "step": 12648 + }, + { + "epoch": 1.020821564038415, + "grad_norm": 0.7386252880096436, + "learning_rate": 6.014037550209718e-05, + "loss": 2.424, + "step": 12649 + }, + { + "epoch": 1.020902267774998, + "grad_norm": 0.7138059735298157, + "learning_rate": 6.012589741643295e-05, + "loss": 2.4951, + "step": 12650 + }, + { + "epoch": 1.020982971511581, + "grad_norm": 0.714022159576416, + "learning_rate": 6.011142032453561e-05, + "loss": 2.4398, + "step": 12651 + }, + { + "epoch": 1.0210636752481639, + "grad_norm": 0.6961550712585449, + "learning_rate": 6.00969442267659e-05, + "loss": 2.4495, + "step": 12652 + }, + { + "epoch": 1.021144378984747, + "grad_norm": 0.7196643948554993, + "learning_rate": 6.008246912348467e-05, + "loss": 2.4449, + "step": 12653 + }, + { + "epoch": 1.02122508272133, + "grad_norm": 0.6163341999053955, + "learning_rate": 6.006799501505268e-05, + "loss": 2.4108, + "step": 12654 + }, + { + "epoch": 1.021305786457913, + "grad_norm": 0.6657030582427979, + "learning_rate": 6.005352190183061e-05, + "loss": 2.4328, + "step": 12655 + }, + { + "epoch": 1.021386490194496, + "grad_norm": 0.7183353900909424, + "learning_rate": 6.00390497841792e-05, + "loss": 2.4912, + "step": 12656 + }, + { + "epoch": 1.021467193931079, + "grad_norm": 0.6912575364112854, + "learning_rate": 6.002457866245916e-05, + "loss": 2.4597, + "step": 12657 + }, + { + "epoch": 1.021547897667662, + "grad_norm": 0.7395210266113281, + "learning_rate": 6.0010108537031084e-05, + "loss": 2.4823, + "step": 12658 + }, + { + "epoch": 1.021628601404245, + "grad_norm": 0.722618043422699, + "learning_rate": 5.9995639408255636e-05, + "loss": 2.4924, + "step": 12659 + }, + { + "epoch": 1.021709305140828, + "grad_norm": 0.739009439945221, + "learning_rate": 5.998117127649344e-05, + "loss": 2.4454, + "step": 12660 + }, + { + "epoch": 1.021790008877411, + "grad_norm": 0.7017633318901062, + "learning_rate": 5.996670414210506e-05, + "loss": 2.5058, + "step": 12661 + }, + { + "epoch": 1.021870712613994, + "grad_norm": 0.742664635181427, + "learning_rate": 5.9952238005451046e-05, + "loss": 2.436, + "step": 12662 + }, + { + "epoch": 1.021951416350577, + "grad_norm": 0.6865660548210144, + "learning_rate": 5.9937772866892e-05, + "loss": 2.4364, + "step": 12663 + }, + { + "epoch": 1.02203212008716, + "grad_norm": 0.7376219034194946, + "learning_rate": 5.992330872678833e-05, + "loss": 2.4975, + "step": 12664 + }, + { + "epoch": 1.0221128238237431, + "grad_norm": 0.6496078372001648, + "learning_rate": 5.990884558550054e-05, + "loss": 2.4651, + "step": 12665 + }, + { + "epoch": 1.022193527560326, + "grad_norm": 0.7178322076797485, + "learning_rate": 5.989438344338915e-05, + "loss": 2.5015, + "step": 12666 + }, + { + "epoch": 1.022274231296909, + "grad_norm": 0.7084102034568787, + "learning_rate": 5.987992230081459e-05, + "loss": 2.4741, + "step": 12667 + }, + { + "epoch": 1.022354935033492, + "grad_norm": 0.6634935736656189, + "learning_rate": 5.986546215813722e-05, + "loss": 2.4255, + "step": 12668 + }, + { + "epoch": 1.022435638770075, + "grad_norm": 0.6897543668746948, + "learning_rate": 5.985100301571742e-05, + "loss": 2.4682, + "step": 12669 + }, + { + "epoch": 1.0225163425066581, + "grad_norm": 0.6643948554992676, + "learning_rate": 5.9836544873915614e-05, + "loss": 2.4009, + "step": 12670 + }, + { + "epoch": 1.022597046243241, + "grad_norm": 0.681252658367157, + "learning_rate": 5.982208773309208e-05, + "loss": 2.4542, + "step": 12671 + }, + { + "epoch": 1.022677749979824, + "grad_norm": 0.7608681917190552, + "learning_rate": 5.980763159360714e-05, + "loss": 2.5614, + "step": 12672 + }, + { + "epoch": 1.0227584537164072, + "grad_norm": 0.6855095028877258, + "learning_rate": 5.979317645582112e-05, + "loss": 2.4505, + "step": 12673 + }, + { + "epoch": 1.02283915745299, + "grad_norm": 0.6846089363098145, + "learning_rate": 5.97787223200942e-05, + "loss": 2.4438, + "step": 12674 + }, + { + "epoch": 1.0229198611895731, + "grad_norm": 0.7198090553283691, + "learning_rate": 5.9764269186786684e-05, + "loss": 2.4469, + "step": 12675 + }, + { + "epoch": 1.023000564926156, + "grad_norm": 0.7120245099067688, + "learning_rate": 5.9749817056258764e-05, + "loss": 2.4626, + "step": 12676 + }, + { + "epoch": 1.023081268662739, + "grad_norm": 0.6839897036552429, + "learning_rate": 5.973536592887059e-05, + "loss": 2.4384, + "step": 12677 + }, + { + "epoch": 1.0231619723993222, + "grad_norm": 0.7053773999214172, + "learning_rate": 5.9720915804982356e-05, + "loss": 2.4554, + "step": 12678 + }, + { + "epoch": 1.023242676135905, + "grad_norm": 0.7114294767379761, + "learning_rate": 5.970646668495421e-05, + "loss": 2.3964, + "step": 12679 + }, + { + "epoch": 1.0233233798724881, + "grad_norm": 0.7001516819000244, + "learning_rate": 5.9692018569146224e-05, + "loss": 2.5216, + "step": 12680 + }, + { + "epoch": 1.0234040836090712, + "grad_norm": 0.6715773940086365, + "learning_rate": 5.96775714579185e-05, + "loss": 2.4595, + "step": 12681 + }, + { + "epoch": 1.023484787345654, + "grad_norm": 0.6856278777122498, + "learning_rate": 5.96631253516311e-05, + "loss": 2.4637, + "step": 12682 + }, + { + "epoch": 1.0235654910822372, + "grad_norm": 0.6785625219345093, + "learning_rate": 5.96486802506441e-05, + "loss": 2.4615, + "step": 12683 + }, + { + "epoch": 1.02364619481882, + "grad_norm": 0.6834213137626648, + "learning_rate": 5.963423615531743e-05, + "loss": 2.4729, + "step": 12684 + }, + { + "epoch": 1.023726898555403, + "grad_norm": 0.6729516386985779, + "learning_rate": 5.961979306601109e-05, + "loss": 2.4013, + "step": 12685 + }, + { + "epoch": 1.0238076022919862, + "grad_norm": 0.6785775423049927, + "learning_rate": 5.960535098308511e-05, + "loss": 2.4825, + "step": 12686 + }, + { + "epoch": 1.023888306028569, + "grad_norm": 0.67277991771698, + "learning_rate": 5.959090990689934e-05, + "loss": 2.4606, + "step": 12687 + }, + { + "epoch": 1.0239690097651521, + "grad_norm": 0.7679588198661804, + "learning_rate": 5.957646983781373e-05, + "loss": 2.5234, + "step": 12688 + }, + { + "epoch": 1.0240497135017352, + "grad_norm": 0.6597407460212708, + "learning_rate": 5.956203077618821e-05, + "loss": 2.4699, + "step": 12689 + }, + { + "epoch": 1.024130417238318, + "grad_norm": 0.6743008494377136, + "learning_rate": 5.9547592722382525e-05, + "loss": 2.4266, + "step": 12690 + }, + { + "epoch": 1.0242111209749012, + "grad_norm": 0.7223396897315979, + "learning_rate": 5.953315567675657e-05, + "loss": 2.5117, + "step": 12691 + }, + { + "epoch": 1.024291824711484, + "grad_norm": 0.6729528307914734, + "learning_rate": 5.951871963967022e-05, + "loss": 2.4586, + "step": 12692 + }, + { + "epoch": 1.0243725284480671, + "grad_norm": 0.6523739695549011, + "learning_rate": 5.950428461148314e-05, + "loss": 2.4408, + "step": 12693 + }, + { + "epoch": 1.0244532321846502, + "grad_norm": 0.6830984950065613, + "learning_rate": 5.9489850592555164e-05, + "loss": 2.4094, + "step": 12694 + }, + { + "epoch": 1.024533935921233, + "grad_norm": 0.6223493814468384, + "learning_rate": 5.9475417583246006e-05, + "loss": 2.4105, + "step": 12695 + }, + { + "epoch": 1.0246146396578162, + "grad_norm": 0.6506635546684265, + "learning_rate": 5.9460985583915374e-05, + "loss": 2.4451, + "step": 12696 + }, + { + "epoch": 1.024695343394399, + "grad_norm": 0.7626760005950928, + "learning_rate": 5.944655459492293e-05, + "loss": 2.4643, + "step": 12697 + }, + { + "epoch": 1.0247760471309821, + "grad_norm": 0.7074631452560425, + "learning_rate": 5.943212461662837e-05, + "loss": 2.4662, + "step": 12698 + }, + { + "epoch": 1.0248567508675652, + "grad_norm": 0.718083918094635, + "learning_rate": 5.9417695649391346e-05, + "loss": 2.4686, + "step": 12699 + }, + { + "epoch": 1.024937454604148, + "grad_norm": 0.6850628852844238, + "learning_rate": 5.9403267693571384e-05, + "loss": 2.4542, + "step": 12700 + }, + { + "epoch": 1.0250181583407312, + "grad_norm": 0.6662585735321045, + "learning_rate": 5.938884074952812e-05, + "loss": 2.4676, + "step": 12701 + }, + { + "epoch": 1.0250988620773143, + "grad_norm": 0.6806240677833557, + "learning_rate": 5.9374414817621114e-05, + "loss": 2.4243, + "step": 12702 + }, + { + "epoch": 1.0251795658138971, + "grad_norm": 0.6763548851013184, + "learning_rate": 5.9359989898209876e-05, + "loss": 2.4389, + "step": 12703 + }, + { + "epoch": 1.0252602695504802, + "grad_norm": 0.7390143275260925, + "learning_rate": 5.934556599165393e-05, + "loss": 2.4667, + "step": 12704 + }, + { + "epoch": 1.025340973287063, + "grad_norm": 0.6159299612045288, + "learning_rate": 5.933114309831276e-05, + "loss": 2.3832, + "step": 12705 + }, + { + "epoch": 1.0254216770236462, + "grad_norm": 0.6779586672782898, + "learning_rate": 5.931672121854579e-05, + "loss": 2.4615, + "step": 12706 + }, + { + "epoch": 1.0255023807602293, + "grad_norm": 0.643800675868988, + "learning_rate": 5.930230035271247e-05, + "loss": 2.4725, + "step": 12707 + }, + { + "epoch": 1.0255830844968121, + "grad_norm": 0.6605903506278992, + "learning_rate": 5.928788050117227e-05, + "loss": 2.4332, + "step": 12708 + }, + { + "epoch": 1.0256637882333952, + "grad_norm": 0.7046334743499756, + "learning_rate": 5.927346166428446e-05, + "loss": 2.4445, + "step": 12709 + }, + { + "epoch": 1.0257444919699783, + "grad_norm": 0.6536325216293335, + "learning_rate": 5.925904384240843e-05, + "loss": 2.4168, + "step": 12710 + }, + { + "epoch": 1.0258251957065612, + "grad_norm": 0.6861097812652588, + "learning_rate": 5.9244627035903564e-05, + "loss": 2.512, + "step": 12711 + }, + { + "epoch": 1.0259058994431443, + "grad_norm": 0.6782278418540955, + "learning_rate": 5.923021124512911e-05, + "loss": 2.4667, + "step": 12712 + }, + { + "epoch": 1.0259866031797271, + "grad_norm": 0.724435031414032, + "learning_rate": 5.921579647044436e-05, + "loss": 2.4828, + "step": 12713 + }, + { + "epoch": 1.0260673069163102, + "grad_norm": 0.6690630316734314, + "learning_rate": 5.9201382712208575e-05, + "loss": 2.4832, + "step": 12714 + }, + { + "epoch": 1.0261480106528933, + "grad_norm": 0.7045348286628723, + "learning_rate": 5.9186969970781015e-05, + "loss": 2.4576, + "step": 12715 + }, + { + "epoch": 1.0262287143894762, + "grad_norm": 0.673321008682251, + "learning_rate": 5.9172558246520796e-05, + "loss": 2.3986, + "step": 12716 + }, + { + "epoch": 1.0263094181260592, + "grad_norm": 0.7184785008430481, + "learning_rate": 5.915814753978717e-05, + "loss": 2.4008, + "step": 12717 + }, + { + "epoch": 1.0263901218626423, + "grad_norm": 0.6971293091773987, + "learning_rate": 5.914373785093931e-05, + "loss": 2.4559, + "step": 12718 + }, + { + "epoch": 1.0264708255992252, + "grad_norm": 0.6941563487052917, + "learning_rate": 5.912932918033626e-05, + "loss": 2.4787, + "step": 12719 + }, + { + "epoch": 1.0265515293358083, + "grad_norm": 0.6276142001152039, + "learning_rate": 5.911492152833715e-05, + "loss": 2.4275, + "step": 12720 + }, + { + "epoch": 1.0266322330723912, + "grad_norm": 0.715928316116333, + "learning_rate": 5.9100514895301106e-05, + "loss": 2.4127, + "step": 12721 + }, + { + "epoch": 1.0267129368089742, + "grad_norm": 0.7004076838493347, + "learning_rate": 5.908610928158713e-05, + "loss": 2.4651, + "step": 12722 + }, + { + "epoch": 1.0267936405455573, + "grad_norm": 0.6761921048164368, + "learning_rate": 5.907170468755425e-05, + "loss": 2.4245, + "step": 12723 + }, + { + "epoch": 1.0268743442821402, + "grad_norm": 0.7246574759483337, + "learning_rate": 5.9057301113561515e-05, + "loss": 2.4489, + "step": 12724 + }, + { + "epoch": 1.0269550480187233, + "grad_norm": 0.7196606397628784, + "learning_rate": 5.904289855996783e-05, + "loss": 2.4357, + "step": 12725 + }, + { + "epoch": 1.0270357517553064, + "grad_norm": 0.7142692804336548, + "learning_rate": 5.902849702713216e-05, + "loss": 2.4821, + "step": 12726 + }, + { + "epoch": 1.0271164554918892, + "grad_norm": 0.7207832336425781, + "learning_rate": 5.9014096515413454e-05, + "loss": 2.4337, + "step": 12727 + }, + { + "epoch": 1.0271971592284723, + "grad_norm": 0.6865695714950562, + "learning_rate": 5.899969702517063e-05, + "loss": 2.4549, + "step": 12728 + }, + { + "epoch": 1.0272778629650552, + "grad_norm": 0.7136662006378174, + "learning_rate": 5.898529855676249e-05, + "loss": 2.4606, + "step": 12729 + }, + { + "epoch": 1.0273585667016383, + "grad_norm": 0.701885998249054, + "learning_rate": 5.897090111054795e-05, + "loss": 2.4913, + "step": 12730 + }, + { + "epoch": 1.0274392704382214, + "grad_norm": 0.6671354174613953, + "learning_rate": 5.8956504686885805e-05, + "loss": 2.4064, + "step": 12731 + }, + { + "epoch": 1.0275199741748042, + "grad_norm": 0.6720621585845947, + "learning_rate": 5.894210928613484e-05, + "loss": 2.4908, + "step": 12732 + }, + { + "epoch": 1.0276006779113873, + "grad_norm": 0.7530980706214905, + "learning_rate": 5.892771490865383e-05, + "loss": 2.4486, + "step": 12733 + }, + { + "epoch": 1.0276813816479704, + "grad_norm": 0.6771122813224792, + "learning_rate": 5.891332155480158e-05, + "loss": 2.3954, + "step": 12734 + }, + { + "epoch": 1.0277620853845533, + "grad_norm": 0.6779236793518066, + "learning_rate": 5.889892922493671e-05, + "loss": 2.4404, + "step": 12735 + }, + { + "epoch": 1.0278427891211364, + "grad_norm": 0.7593358755111694, + "learning_rate": 5.8884537919417974e-05, + "loss": 2.4997, + "step": 12736 + }, + { + "epoch": 1.0279234928577192, + "grad_norm": 0.672686755657196, + "learning_rate": 5.8870147638604044e-05, + "loss": 2.5394, + "step": 12737 + }, + { + "epoch": 1.0280041965943023, + "grad_norm": 0.6727546453475952, + "learning_rate": 5.885575838285353e-05, + "loss": 2.4554, + "step": 12738 + }, + { + "epoch": 1.0280849003308854, + "grad_norm": 0.7092764377593994, + "learning_rate": 5.884137015252507e-05, + "loss": 2.4568, + "step": 12739 + }, + { + "epoch": 1.0281656040674683, + "grad_norm": 0.6988070011138916, + "learning_rate": 5.882698294797728e-05, + "loss": 2.4453, + "step": 12740 + }, + { + "epoch": 1.0282463078040514, + "grad_norm": 0.7578697204589844, + "learning_rate": 5.8812596769568676e-05, + "loss": 2.5648, + "step": 12741 + }, + { + "epoch": 1.0283270115406344, + "grad_norm": 0.6523683667182922, + "learning_rate": 5.879821161765782e-05, + "loss": 2.4088, + "step": 12742 + }, + { + "epoch": 1.0284077152772173, + "grad_norm": 0.6797270178794861, + "learning_rate": 5.878382749260323e-05, + "loss": 2.4465, + "step": 12743 + }, + { + "epoch": 1.0284884190138004, + "grad_norm": 0.6823786497116089, + "learning_rate": 5.876944439476345e-05, + "loss": 2.5053, + "step": 12744 + }, + { + "epoch": 1.0285691227503833, + "grad_norm": 0.6840088367462158, + "learning_rate": 5.875506232449686e-05, + "loss": 2.3771, + "step": 12745 + }, + { + "epoch": 1.0286498264869663, + "grad_norm": 0.6985318064689636, + "learning_rate": 5.8740681282161914e-05, + "loss": 2.4456, + "step": 12746 + }, + { + "epoch": 1.0287305302235494, + "grad_norm": 0.7102388739585876, + "learning_rate": 5.872630126811707e-05, + "loss": 2.4802, + "step": 12747 + }, + { + "epoch": 1.0288112339601323, + "grad_norm": 0.7917937636375427, + "learning_rate": 5.871192228272067e-05, + "loss": 2.4606, + "step": 12748 + }, + { + "epoch": 1.0288919376967154, + "grad_norm": 0.683397114276886, + "learning_rate": 5.86975443263311e-05, + "loss": 2.5011, + "step": 12749 + }, + { + "epoch": 1.0289726414332985, + "grad_norm": 0.7543408870697021, + "learning_rate": 5.8683167399306724e-05, + "loss": 2.4705, + "step": 12750 + }, + { + "epoch": 1.0290533451698813, + "grad_norm": 0.6946283578872681, + "learning_rate": 5.866879150200579e-05, + "loss": 2.4986, + "step": 12751 + }, + { + "epoch": 1.0291340489064644, + "grad_norm": 0.6535125374794006, + "learning_rate": 5.8654416634786605e-05, + "loss": 2.4203, + "step": 12752 + }, + { + "epoch": 1.0292147526430473, + "grad_norm": 0.7470195889472961, + "learning_rate": 5.8640042798007455e-05, + "loss": 2.5103, + "step": 12753 + }, + { + "epoch": 1.0292954563796304, + "grad_norm": 0.6782363653182983, + "learning_rate": 5.8625669992026535e-05, + "loss": 2.4087, + "step": 12754 + }, + { + "epoch": 1.0293761601162135, + "grad_norm": 0.7601497173309326, + "learning_rate": 5.861129821720207e-05, + "loss": 2.4752, + "step": 12755 + }, + { + "epoch": 1.0294568638527963, + "grad_norm": 0.6875388026237488, + "learning_rate": 5.859692747389227e-05, + "loss": 2.448, + "step": 12756 + }, + { + "epoch": 1.0295375675893794, + "grad_norm": 0.7153629064559937, + "learning_rate": 5.858255776245525e-05, + "loss": 2.4641, + "step": 12757 + }, + { + "epoch": 1.0296182713259623, + "grad_norm": 0.682954728603363, + "learning_rate": 5.8568189083249145e-05, + "loss": 2.441, + "step": 12758 + }, + { + "epoch": 1.0296989750625454, + "grad_norm": 0.6959100961685181, + "learning_rate": 5.855382143663209e-05, + "loss": 2.4316, + "step": 12759 + }, + { + "epoch": 1.0297796787991285, + "grad_norm": 0.7062023878097534, + "learning_rate": 5.8539454822962167e-05, + "loss": 2.4287, + "step": 12760 + }, + { + "epoch": 1.0298603825357113, + "grad_norm": 0.706523597240448, + "learning_rate": 5.852508924259736e-05, + "loss": 2.4596, + "step": 12761 + }, + { + "epoch": 1.0299410862722944, + "grad_norm": 0.6908385753631592, + "learning_rate": 5.851072469589578e-05, + "loss": 2.4428, + "step": 12762 + }, + { + "epoch": 1.0300217900088775, + "grad_norm": 0.6810726523399353, + "learning_rate": 5.8496361183215386e-05, + "loss": 2.4902, + "step": 12763 + }, + { + "epoch": 1.0301024937454604, + "grad_norm": 0.661613941192627, + "learning_rate": 5.8481998704914156e-05, + "loss": 2.4256, + "step": 12764 + }, + { + "epoch": 1.0301831974820435, + "grad_norm": 0.6633132100105286, + "learning_rate": 5.846763726135005e-05, + "loss": 2.4512, + "step": 12765 + }, + { + "epoch": 1.0302639012186263, + "grad_norm": 0.6991820335388184, + "learning_rate": 5.8453276852881025e-05, + "loss": 2.3747, + "step": 12766 + }, + { + "epoch": 1.0303446049552094, + "grad_norm": 0.7392076253890991, + "learning_rate": 5.843891747986487e-05, + "loss": 2.438, + "step": 12767 + }, + { + "epoch": 1.0304253086917925, + "grad_norm": 0.6371724605560303, + "learning_rate": 5.842455914265958e-05, + "loss": 2.4627, + "step": 12768 + }, + { + "epoch": 1.0305060124283754, + "grad_norm": 0.6475048661231995, + "learning_rate": 5.841020184162298e-05, + "loss": 2.4883, + "step": 12769 + }, + { + "epoch": 1.0305867161649584, + "grad_norm": 0.6848995685577393, + "learning_rate": 5.839584557711283e-05, + "loss": 2.4452, + "step": 12770 + }, + { + "epoch": 1.0306674199015415, + "grad_norm": 0.7345505952835083, + "learning_rate": 5.838149034948697e-05, + "loss": 2.5121, + "step": 12771 + }, + { + "epoch": 1.0307481236381244, + "grad_norm": 0.715373158454895, + "learning_rate": 5.836713615910318e-05, + "loss": 2.4549, + "step": 12772 + }, + { + "epoch": 1.0308288273747075, + "grad_norm": 0.7371035814285278, + "learning_rate": 5.8352783006319166e-05, + "loss": 2.4633, + "step": 12773 + }, + { + "epoch": 1.0309095311112904, + "grad_norm": 0.6843077540397644, + "learning_rate": 5.833843089149267e-05, + "loss": 2.4067, + "step": 12774 + }, + { + "epoch": 1.0309902348478734, + "grad_norm": 0.7398965954780579, + "learning_rate": 5.832407981498136e-05, + "loss": 2.5199, + "step": 12775 + }, + { + "epoch": 1.0310709385844565, + "grad_norm": 0.6860283017158508, + "learning_rate": 5.830972977714294e-05, + "loss": 2.4564, + "step": 12776 + }, + { + "epoch": 1.0311516423210394, + "grad_norm": 0.683893084526062, + "learning_rate": 5.829538077833503e-05, + "loss": 2.4635, + "step": 12777 + }, + { + "epoch": 1.0312323460576225, + "grad_norm": 0.6412089467048645, + "learning_rate": 5.828103281891525e-05, + "loss": 2.4806, + "step": 12778 + }, + { + "epoch": 1.0313130497942056, + "grad_norm": 0.646393895149231, + "learning_rate": 5.826668589924123e-05, + "loss": 2.4674, + "step": 12779 + }, + { + "epoch": 1.0313937535307884, + "grad_norm": 0.6805605292320251, + "learning_rate": 5.825234001967044e-05, + "loss": 2.5145, + "step": 12780 + }, + { + "epoch": 1.0314744572673715, + "grad_norm": 0.681532084941864, + "learning_rate": 5.8237995180560455e-05, + "loss": 2.5041, + "step": 12781 + }, + { + "epoch": 1.0315551610039544, + "grad_norm": 0.6971312165260315, + "learning_rate": 5.8223651382268865e-05, + "loss": 2.5324, + "step": 12782 + }, + { + "epoch": 1.0316358647405375, + "grad_norm": 0.6634463667869568, + "learning_rate": 5.8209308625153026e-05, + "loss": 2.5086, + "step": 12783 + }, + { + "epoch": 1.0317165684771206, + "grad_norm": 0.6752117276191711, + "learning_rate": 5.819496690957047e-05, + "loss": 2.4805, + "step": 12784 + }, + { + "epoch": 1.0317972722137034, + "grad_norm": 0.7242109775543213, + "learning_rate": 5.818062623587861e-05, + "loss": 2.4205, + "step": 12785 + }, + { + "epoch": 1.0318779759502865, + "grad_norm": 0.7338563203811646, + "learning_rate": 5.816628660443486e-05, + "loss": 2.4277, + "step": 12786 + }, + { + "epoch": 1.0319586796868696, + "grad_norm": 0.6764293313026428, + "learning_rate": 5.81519480155966e-05, + "loss": 2.5096, + "step": 12787 + }, + { + "epoch": 1.0320393834234525, + "grad_norm": 0.6757099032402039, + "learning_rate": 5.813761046972124e-05, + "loss": 2.468, + "step": 12788 + }, + { + "epoch": 1.0321200871600356, + "grad_norm": 0.7072502374649048, + "learning_rate": 5.8123273967166017e-05, + "loss": 2.4642, + "step": 12789 + }, + { + "epoch": 1.0322007908966184, + "grad_norm": 0.6470256447792053, + "learning_rate": 5.810893850828827e-05, + "loss": 2.4146, + "step": 12790 + }, + { + "epoch": 1.0322814946332015, + "grad_norm": 0.7403351068496704, + "learning_rate": 5.809460409344527e-05, + "loss": 2.512, + "step": 12791 + }, + { + "epoch": 1.0323621983697846, + "grad_norm": 0.6711490154266357, + "learning_rate": 5.808027072299432e-05, + "loss": 2.4602, + "step": 12792 + }, + { + "epoch": 1.0324429021063675, + "grad_norm": 0.7920248508453369, + "learning_rate": 5.806593839729258e-05, + "loss": 2.4512, + "step": 12793 + }, + { + "epoch": 1.0325236058429506, + "grad_norm": 0.6442045569419861, + "learning_rate": 5.805160711669725e-05, + "loss": 2.4165, + "step": 12794 + }, + { + "epoch": 1.0326043095795336, + "grad_norm": 0.6681340932846069, + "learning_rate": 5.803727688156553e-05, + "loss": 2.4296, + "step": 12795 + }, + { + "epoch": 1.0326850133161165, + "grad_norm": 0.6653337478637695, + "learning_rate": 5.802294769225457e-05, + "loss": 2.5165, + "step": 12796 + }, + { + "epoch": 1.0327657170526996, + "grad_norm": 0.6444782018661499, + "learning_rate": 5.8008619549121476e-05, + "loss": 2.4266, + "step": 12797 + }, + { + "epoch": 1.0328464207892825, + "grad_norm": 0.6741451621055603, + "learning_rate": 5.7994292452523394e-05, + "loss": 2.4837, + "step": 12798 + }, + { + "epoch": 1.0329271245258655, + "grad_norm": 0.6629341840744019, + "learning_rate": 5.797996640281731e-05, + "loss": 2.4368, + "step": 12799 + }, + { + "epoch": 1.0330078282624486, + "grad_norm": 0.6755850315093994, + "learning_rate": 5.796564140036029e-05, + "loss": 2.4834, + "step": 12800 + }, + { + "epoch": 1.0330885319990315, + "grad_norm": 0.7271782755851746, + "learning_rate": 5.795131744550942e-05, + "loss": 2.5025, + "step": 12801 + }, + { + "epoch": 1.0331692357356146, + "grad_norm": 0.6870545744895935, + "learning_rate": 5.7936994538621605e-05, + "loss": 2.4443, + "step": 12802 + }, + { + "epoch": 1.0332499394721975, + "grad_norm": 0.7231935858726501, + "learning_rate": 5.792267268005382e-05, + "loss": 2.4917, + "step": 12803 + }, + { + "epoch": 1.0333306432087805, + "grad_norm": 0.6905832290649414, + "learning_rate": 5.790835187016307e-05, + "loss": 2.4902, + "step": 12804 + }, + { + "epoch": 1.0334113469453636, + "grad_norm": 0.711814284324646, + "learning_rate": 5.789403210930613e-05, + "loss": 2.4579, + "step": 12805 + }, + { + "epoch": 1.0334920506819465, + "grad_norm": 0.6982280015945435, + "learning_rate": 5.787971339784004e-05, + "loss": 2.5275, + "step": 12806 + }, + { + "epoch": 1.0335727544185296, + "grad_norm": 0.6871493458747864, + "learning_rate": 5.7865395736121575e-05, + "loss": 2.4401, + "step": 12807 + }, + { + "epoch": 1.0336534581551127, + "grad_norm": 0.6898353099822998, + "learning_rate": 5.785107912450763e-05, + "loss": 2.4005, + "step": 12808 + }, + { + "epoch": 1.0337341618916955, + "grad_norm": 0.6264411807060242, + "learning_rate": 5.7836763563354946e-05, + "loss": 2.4497, + "step": 12809 + }, + { + "epoch": 1.0338148656282786, + "grad_norm": 0.6997092962265015, + "learning_rate": 5.782244905302032e-05, + "loss": 2.4388, + "step": 12810 + }, + { + "epoch": 1.0338955693648615, + "grad_norm": 0.6834601759910583, + "learning_rate": 5.7808135593860555e-05, + "loss": 2.4298, + "step": 12811 + }, + { + "epoch": 1.0339762731014446, + "grad_norm": 0.664315402507782, + "learning_rate": 5.77938231862323e-05, + "loss": 2.4289, + "step": 12812 + }, + { + "epoch": 1.0340569768380277, + "grad_norm": 0.6660603284835815, + "learning_rate": 5.7779511830492306e-05, + "loss": 2.4772, + "step": 12813 + }, + { + "epoch": 1.0341376805746105, + "grad_norm": 0.6457028388977051, + "learning_rate": 5.776520152699728e-05, + "loss": 2.4408, + "step": 12814 + }, + { + "epoch": 1.0342183843111936, + "grad_norm": 0.7132207155227661, + "learning_rate": 5.7750892276103794e-05, + "loss": 2.4953, + "step": 12815 + }, + { + "epoch": 1.0342990880477767, + "grad_norm": 0.7397382259368896, + "learning_rate": 5.773658407816848e-05, + "loss": 2.4396, + "step": 12816 + }, + { + "epoch": 1.0343797917843596, + "grad_norm": 0.6951746344566345, + "learning_rate": 5.7722276933548034e-05, + "loss": 2.5021, + "step": 12817 + }, + { + "epoch": 1.0344604955209427, + "grad_norm": 0.6789736151695251, + "learning_rate": 5.7707970842598935e-05, + "loss": 2.4883, + "step": 12818 + }, + { + "epoch": 1.0345411992575255, + "grad_norm": 0.7231541872024536, + "learning_rate": 5.7693665805677747e-05, + "loss": 2.4761, + "step": 12819 + }, + { + "epoch": 1.0346219029941086, + "grad_norm": 0.685943603515625, + "learning_rate": 5.767936182314104e-05, + "loss": 2.4489, + "step": 12820 + }, + { + "epoch": 1.0347026067306917, + "grad_norm": 0.7081817984580994, + "learning_rate": 5.7665058895345236e-05, + "loss": 2.4329, + "step": 12821 + }, + { + "epoch": 1.0347833104672746, + "grad_norm": 0.6700818538665771, + "learning_rate": 5.7650757022646804e-05, + "loss": 2.4252, + "step": 12822 + }, + { + "epoch": 1.0348640142038577, + "grad_norm": 0.6712214946746826, + "learning_rate": 5.763645620540223e-05, + "loss": 2.419, + "step": 12823 + }, + { + "epoch": 1.0349447179404407, + "grad_norm": 0.6732817888259888, + "learning_rate": 5.762215644396793e-05, + "loss": 2.3928, + "step": 12824 + }, + { + "epoch": 1.0350254216770236, + "grad_norm": 0.6689301133155823, + "learning_rate": 5.760785773870024e-05, + "loss": 2.3981, + "step": 12825 + }, + { + "epoch": 1.0351061254136067, + "grad_norm": 0.6822957992553711, + "learning_rate": 5.759356008995556e-05, + "loss": 2.5265, + "step": 12826 + }, + { + "epoch": 1.0351868291501896, + "grad_norm": 0.7316287755966187, + "learning_rate": 5.7579263498090194e-05, + "loss": 2.4132, + "step": 12827 + }, + { + "epoch": 1.0352675328867726, + "grad_norm": 0.6688703894615173, + "learning_rate": 5.756496796346047e-05, + "loss": 2.4195, + "step": 12828 + }, + { + "epoch": 1.0353482366233557, + "grad_norm": 0.6894570589065552, + "learning_rate": 5.755067348642268e-05, + "loss": 2.4897, + "step": 12829 + }, + { + "epoch": 1.0354289403599386, + "grad_norm": 0.7635753750801086, + "learning_rate": 5.753638006733311e-05, + "loss": 2.4643, + "step": 12830 + }, + { + "epoch": 1.0355096440965217, + "grad_norm": 0.6353672742843628, + "learning_rate": 5.75220877065479e-05, + "loss": 2.4533, + "step": 12831 + }, + { + "epoch": 1.0355903478331048, + "grad_norm": 0.6725208759307861, + "learning_rate": 5.750779640442332e-05, + "loss": 2.4958, + "step": 12832 + }, + { + "epoch": 1.0356710515696876, + "grad_norm": 0.7350767254829407, + "learning_rate": 5.749350616131556e-05, + "loss": 2.4192, + "step": 12833 + }, + { + "epoch": 1.0357517553062707, + "grad_norm": 0.7322222590446472, + "learning_rate": 5.7479216977580695e-05, + "loss": 2.4719, + "step": 12834 + }, + { + "epoch": 1.0358324590428536, + "grad_norm": 0.7233425974845886, + "learning_rate": 5.7464928853574904e-05, + "loss": 2.4707, + "step": 12835 + }, + { + "epoch": 1.0359131627794367, + "grad_norm": 0.7117420434951782, + "learning_rate": 5.745064178965427e-05, + "loss": 2.4463, + "step": 12836 + }, + { + "epoch": 1.0359938665160198, + "grad_norm": 0.7615050077438354, + "learning_rate": 5.743635578617486e-05, + "loss": 2.4256, + "step": 12837 + }, + { + "epoch": 1.0360745702526026, + "grad_norm": 0.7056093215942383, + "learning_rate": 5.7422070843492734e-05, + "loss": 2.4628, + "step": 12838 + }, + { + "epoch": 1.0361552739891857, + "grad_norm": 0.685989499092102, + "learning_rate": 5.740778696196389e-05, + "loss": 2.4271, + "step": 12839 + }, + { + "epoch": 1.0362359777257688, + "grad_norm": 0.7286686301231384, + "learning_rate": 5.739350414194439e-05, + "loss": 2.4984, + "step": 12840 + }, + { + "epoch": 1.0363166814623517, + "grad_norm": 0.6939802765846252, + "learning_rate": 5.737922238379009e-05, + "loss": 2.4601, + "step": 12841 + }, + { + "epoch": 1.0363973851989348, + "grad_norm": 0.7077060341835022, + "learning_rate": 5.736494168785698e-05, + "loss": 2.4264, + "step": 12842 + }, + { + "epoch": 1.0364780889355176, + "grad_norm": 0.667086124420166, + "learning_rate": 5.7350662054501016e-05, + "loss": 2.4733, + "step": 12843 + }, + { + "epoch": 1.0365587926721007, + "grad_norm": 0.6531338691711426, + "learning_rate": 5.7336383484078004e-05, + "loss": 2.4709, + "step": 12844 + }, + { + "epoch": 1.0366394964086838, + "grad_norm": 0.7141630053520203, + "learning_rate": 5.732210597694383e-05, + "loss": 2.4747, + "step": 12845 + }, + { + "epoch": 1.0367202001452667, + "grad_norm": 0.7186396718025208, + "learning_rate": 5.730782953345435e-05, + "loss": 2.4401, + "step": 12846 + }, + { + "epoch": 1.0368009038818498, + "grad_norm": 0.6709686517715454, + "learning_rate": 5.7293554153965345e-05, + "loss": 2.456, + "step": 12847 + }, + { + "epoch": 1.0368816076184326, + "grad_norm": 0.6867267489433289, + "learning_rate": 5.727927983883261e-05, + "loss": 2.4522, + "step": 12848 + }, + { + "epoch": 1.0369623113550157, + "grad_norm": 0.7016724348068237, + "learning_rate": 5.7265006588411926e-05, + "loss": 2.4348, + "step": 12849 + }, + { + "epoch": 1.0370430150915988, + "grad_norm": 0.6764764785766602, + "learning_rate": 5.725073440305896e-05, + "loss": 2.4241, + "step": 12850 + }, + { + "epoch": 1.0371237188281817, + "grad_norm": 0.6965062618255615, + "learning_rate": 5.7236463283129435e-05, + "loss": 2.4559, + "step": 12851 + }, + { + "epoch": 1.0372044225647647, + "grad_norm": 0.6878135800361633, + "learning_rate": 5.7222193228979037e-05, + "loss": 2.4874, + "step": 12852 + }, + { + "epoch": 1.0372851263013478, + "grad_norm": 0.6576557755470276, + "learning_rate": 5.720792424096344e-05, + "loss": 2.4273, + "step": 12853 + }, + { + "epoch": 1.0373658300379307, + "grad_norm": 0.7463123798370361, + "learning_rate": 5.719365631943818e-05, + "loss": 2.4933, + "step": 12854 + }, + { + "epoch": 1.0374465337745138, + "grad_norm": 0.6920896768569946, + "learning_rate": 5.7179389464758914e-05, + "loss": 2.4799, + "step": 12855 + }, + { + "epoch": 1.0375272375110969, + "grad_norm": 0.7330591082572937, + "learning_rate": 5.71651236772812e-05, + "loss": 2.469, + "step": 12856 + }, + { + "epoch": 1.0376079412476797, + "grad_norm": 0.6766076683998108, + "learning_rate": 5.715085895736057e-05, + "loss": 2.4787, + "step": 12857 + }, + { + "epoch": 1.0376886449842628, + "grad_norm": 0.724278450012207, + "learning_rate": 5.713659530535255e-05, + "loss": 2.4524, + "step": 12858 + }, + { + "epoch": 1.0377693487208457, + "grad_norm": 0.6816281676292419, + "learning_rate": 5.712233272161265e-05, + "loss": 2.4993, + "step": 12859 + }, + { + "epoch": 1.0378500524574288, + "grad_norm": 0.7186439633369446, + "learning_rate": 5.710807120649626e-05, + "loss": 2.4108, + "step": 12860 + }, + { + "epoch": 1.0379307561940119, + "grad_norm": 0.6616777181625366, + "learning_rate": 5.709381076035887e-05, + "loss": 2.4797, + "step": 12861 + }, + { + "epoch": 1.0380114599305947, + "grad_norm": 0.6956895589828491, + "learning_rate": 5.7079551383555906e-05, + "loss": 2.4017, + "step": 12862 + }, + { + "epoch": 1.0380921636671778, + "grad_norm": 0.6650584936141968, + "learning_rate": 5.706529307644268e-05, + "loss": 2.4808, + "step": 12863 + }, + { + "epoch": 1.0381728674037607, + "grad_norm": 0.6362698674201965, + "learning_rate": 5.705103583937458e-05, + "loss": 2.4077, + "step": 12864 + }, + { + "epoch": 1.0382535711403438, + "grad_norm": 0.6962565183639526, + "learning_rate": 5.703677967270697e-05, + "loss": 2.4715, + "step": 12865 + }, + { + "epoch": 1.0383342748769269, + "grad_norm": 0.6927294135093689, + "learning_rate": 5.702252457679509e-05, + "loss": 2.4983, + "step": 12866 + }, + { + "epoch": 1.0384149786135097, + "grad_norm": 0.7107497453689575, + "learning_rate": 5.70082705519942e-05, + "loss": 2.4198, + "step": 12867 + }, + { + "epoch": 1.0384956823500928, + "grad_norm": 0.6459221243858337, + "learning_rate": 5.6994017598659634e-05, + "loss": 2.4423, + "step": 12868 + }, + { + "epoch": 1.038576386086676, + "grad_norm": 0.705563485622406, + "learning_rate": 5.697976571714658e-05, + "loss": 2.5346, + "step": 12869 + }, + { + "epoch": 1.0386570898232588, + "grad_norm": 0.7424784898757935, + "learning_rate": 5.696551490781021e-05, + "loss": 2.4824, + "step": 12870 + }, + { + "epoch": 1.0387377935598419, + "grad_norm": 0.6820988059043884, + "learning_rate": 5.695126517100569e-05, + "loss": 2.4965, + "step": 12871 + }, + { + "epoch": 1.0388184972964247, + "grad_norm": 0.8209595680236816, + "learning_rate": 5.6937016507088225e-05, + "loss": 2.475, + "step": 12872 + }, + { + "epoch": 1.0388992010330078, + "grad_norm": 0.7407695055007935, + "learning_rate": 5.6922768916412815e-05, + "loss": 2.4683, + "step": 12873 + }, + { + "epoch": 1.038979904769591, + "grad_norm": 0.7335677742958069, + "learning_rate": 5.690852239933462e-05, + "loss": 2.4621, + "step": 12874 + }, + { + "epoch": 1.0390606085061738, + "grad_norm": 0.6731325387954712, + "learning_rate": 5.689427695620873e-05, + "loss": 2.4882, + "step": 12875 + }, + { + "epoch": 1.0391413122427569, + "grad_norm": 0.7256175875663757, + "learning_rate": 5.68800325873901e-05, + "loss": 2.4827, + "step": 12876 + }, + { + "epoch": 1.03922201597934, + "grad_norm": 0.711928129196167, + "learning_rate": 5.686578929323377e-05, + "loss": 2.4447, + "step": 12877 + }, + { + "epoch": 1.0393027197159228, + "grad_norm": 0.6445996165275574, + "learning_rate": 5.685154707409473e-05, + "loss": 2.453, + "step": 12878 + }, + { + "epoch": 1.039383423452506, + "grad_norm": 0.6656066179275513, + "learning_rate": 5.6837305930327923e-05, + "loss": 2.4863, + "step": 12879 + }, + { + "epoch": 1.0394641271890888, + "grad_norm": 0.6844663619995117, + "learning_rate": 5.682306586228828e-05, + "loss": 2.4524, + "step": 12880 + }, + { + "epoch": 1.0395448309256718, + "grad_norm": 0.6436383724212646, + "learning_rate": 5.6808826870330746e-05, + "loss": 2.4137, + "step": 12881 + }, + { + "epoch": 1.039625534662255, + "grad_norm": 0.6731196641921997, + "learning_rate": 5.6794588954810104e-05, + "loss": 2.4176, + "step": 12882 + }, + { + "epoch": 1.0397062383988378, + "grad_norm": 0.6994587779045105, + "learning_rate": 5.678035211608125e-05, + "loss": 2.4651, + "step": 12883 + }, + { + "epoch": 1.0397869421354209, + "grad_norm": 0.6912599205970764, + "learning_rate": 5.6766116354499e-05, + "loss": 2.3918, + "step": 12884 + }, + { + "epoch": 1.039867645872004, + "grad_norm": 0.7627033591270447, + "learning_rate": 5.6751881670418185e-05, + "loss": 2.4278, + "step": 12885 + }, + { + "epoch": 1.0399483496085868, + "grad_norm": 0.7107213139533997, + "learning_rate": 5.6737648064193485e-05, + "loss": 2.5249, + "step": 12886 + }, + { + "epoch": 1.04002905334517, + "grad_norm": 0.7254211902618408, + "learning_rate": 5.672341553617968e-05, + "loss": 2.4454, + "step": 12887 + }, + { + "epoch": 1.0401097570817528, + "grad_norm": 0.6776205897331238, + "learning_rate": 5.670918408673149e-05, + "loss": 2.4333, + "step": 12888 + }, + { + "epoch": 1.0401904608183359, + "grad_norm": 0.6824465394020081, + "learning_rate": 5.669495371620359e-05, + "loss": 2.427, + "step": 12889 + }, + { + "epoch": 1.040271164554919, + "grad_norm": 0.6633001565933228, + "learning_rate": 5.668072442495066e-05, + "loss": 2.4874, + "step": 12890 + }, + { + "epoch": 1.0403518682915018, + "grad_norm": 0.6655289530754089, + "learning_rate": 5.666649621332735e-05, + "loss": 2.5023, + "step": 12891 + }, + { + "epoch": 1.040432572028085, + "grad_norm": 0.6892853379249573, + "learning_rate": 5.665226908168818e-05, + "loss": 2.4505, + "step": 12892 + }, + { + "epoch": 1.040513275764668, + "grad_norm": 0.7154649496078491, + "learning_rate": 5.6638043030387774e-05, + "loss": 2.4916, + "step": 12893 + }, + { + "epoch": 1.0405939795012509, + "grad_norm": 0.6780592799186707, + "learning_rate": 5.662381805978074e-05, + "loss": 2.4116, + "step": 12894 + }, + { + "epoch": 1.040674683237834, + "grad_norm": 0.6737352013587952, + "learning_rate": 5.66095941702215e-05, + "loss": 2.3903, + "step": 12895 + }, + { + "epoch": 1.0407553869744168, + "grad_norm": 0.7623820304870605, + "learning_rate": 5.659537136206461e-05, + "loss": 2.4334, + "step": 12896 + }, + { + "epoch": 1.040836090711, + "grad_norm": 0.7043081521987915, + "learning_rate": 5.65811496356645e-05, + "loss": 2.4403, + "step": 12897 + }, + { + "epoch": 1.040916794447583, + "grad_norm": 0.6704873442649841, + "learning_rate": 5.6566928991375654e-05, + "loss": 2.4416, + "step": 12898 + }, + { + "epoch": 1.0409974981841659, + "grad_norm": 0.6556837558746338, + "learning_rate": 5.6552709429552474e-05, + "loss": 2.4904, + "step": 12899 + }, + { + "epoch": 1.041078201920749, + "grad_norm": 0.6926451325416565, + "learning_rate": 5.653849095054935e-05, + "loss": 2.4889, + "step": 12900 + }, + { + "epoch": 1.041158905657332, + "grad_norm": 0.6407613158226013, + "learning_rate": 5.6524273554720674e-05, + "loss": 2.3951, + "step": 12901 + }, + { + "epoch": 1.041239609393915, + "grad_norm": 0.7812615633010864, + "learning_rate": 5.651005724242071e-05, + "loss": 2.4535, + "step": 12902 + }, + { + "epoch": 1.041320313130498, + "grad_norm": 0.6868990659713745, + "learning_rate": 5.6495842014003796e-05, + "loss": 2.4373, + "step": 12903 + }, + { + "epoch": 1.0414010168670809, + "grad_norm": 0.6467776894569397, + "learning_rate": 5.648162786982427e-05, + "loss": 2.4929, + "step": 12904 + }, + { + "epoch": 1.041481720603664, + "grad_norm": 0.6588063836097717, + "learning_rate": 5.64674148102363e-05, + "loss": 2.4445, + "step": 12905 + }, + { + "epoch": 1.041562424340247, + "grad_norm": 0.6880654096603394, + "learning_rate": 5.6453202835594136e-05, + "loss": 2.4298, + "step": 12906 + }, + { + "epoch": 1.04164312807683, + "grad_norm": 0.7471407055854797, + "learning_rate": 5.6438991946251996e-05, + "loss": 2.4669, + "step": 12907 + }, + { + "epoch": 1.041723831813413, + "grad_norm": 0.7069533467292786, + "learning_rate": 5.6424782142564034e-05, + "loss": 2.4498, + "step": 12908 + }, + { + "epoch": 1.0418045355499959, + "grad_norm": 0.7013602256774902, + "learning_rate": 5.641057342488443e-05, + "loss": 2.4993, + "step": 12909 + }, + { + "epoch": 1.041885239286579, + "grad_norm": 0.6870697736740112, + "learning_rate": 5.6396365793567305e-05, + "loss": 2.5338, + "step": 12910 + }, + { + "epoch": 1.041965943023162, + "grad_norm": 0.6569130420684814, + "learning_rate": 5.638215924896669e-05, + "loss": 2.4538, + "step": 12911 + }, + { + "epoch": 1.042046646759745, + "grad_norm": 0.6900331377983093, + "learning_rate": 5.636795379143669e-05, + "loss": 2.4013, + "step": 12912 + }, + { + "epoch": 1.042127350496328, + "grad_norm": 0.6800071001052856, + "learning_rate": 5.635374942133136e-05, + "loss": 2.4733, + "step": 12913 + }, + { + "epoch": 1.042208054232911, + "grad_norm": 0.703601598739624, + "learning_rate": 5.6339546139004663e-05, + "loss": 2.432, + "step": 12914 + }, + { + "epoch": 1.042288757969494, + "grad_norm": 0.6781988739967346, + "learning_rate": 5.6325343944810594e-05, + "loss": 2.4418, + "step": 12915 + }, + { + "epoch": 1.042369461706077, + "grad_norm": 0.7247167825698853, + "learning_rate": 5.6311142839103125e-05, + "loss": 2.5133, + "step": 12916 + }, + { + "epoch": 1.04245016544266, + "grad_norm": 0.7738155126571655, + "learning_rate": 5.629694282223619e-05, + "loss": 2.5137, + "step": 12917 + }, + { + "epoch": 1.042530869179243, + "grad_norm": 0.74723219871521, + "learning_rate": 5.628274389456367e-05, + "loss": 2.3996, + "step": 12918 + }, + { + "epoch": 1.042611572915826, + "grad_norm": 0.7245466709136963, + "learning_rate": 5.6268546056439456e-05, + "loss": 2.4213, + "step": 12919 + }, + { + "epoch": 1.042692276652409, + "grad_norm": 0.6307608485221863, + "learning_rate": 5.625434930821742e-05, + "loss": 2.4195, + "step": 12920 + }, + { + "epoch": 1.042772980388992, + "grad_norm": 0.7138007879257202, + "learning_rate": 5.6240153650251326e-05, + "loss": 2.463, + "step": 12921 + }, + { + "epoch": 1.042853684125575, + "grad_norm": 0.779659628868103, + "learning_rate": 5.622595908289498e-05, + "loss": 2.4898, + "step": 12922 + }, + { + "epoch": 1.042934387862158, + "grad_norm": 0.7144278287887573, + "learning_rate": 5.621176560650221e-05, + "loss": 2.4083, + "step": 12923 + }, + { + "epoch": 1.043015091598741, + "grad_norm": 0.7724754214286804, + "learning_rate": 5.619757322142667e-05, + "loss": 2.3917, + "step": 12924 + }, + { + "epoch": 1.043095795335324, + "grad_norm": 0.7667245268821716, + "learning_rate": 5.618338192802208e-05, + "loss": 2.4943, + "step": 12925 + }, + { + "epoch": 1.043176499071907, + "grad_norm": 0.6528030037879944, + "learning_rate": 5.616919172664221e-05, + "loss": 2.4323, + "step": 12926 + }, + { + "epoch": 1.04325720280849, + "grad_norm": 0.6790263652801514, + "learning_rate": 5.6155002617640615e-05, + "loss": 2.4304, + "step": 12927 + }, + { + "epoch": 1.043337906545073, + "grad_norm": 0.7554369568824768, + "learning_rate": 5.614081460137097e-05, + "loss": 2.4637, + "step": 12928 + }, + { + "epoch": 1.043418610281656, + "grad_norm": 0.7126293182373047, + "learning_rate": 5.612662767818686e-05, + "loss": 2.4765, + "step": 12929 + }, + { + "epoch": 1.0434993140182391, + "grad_norm": 0.6705749034881592, + "learning_rate": 5.611244184844189e-05, + "loss": 2.4746, + "step": 12930 + }, + { + "epoch": 1.043580017754822, + "grad_norm": 0.6595145463943481, + "learning_rate": 5.609825711248958e-05, + "loss": 2.463, + "step": 12931 + }, + { + "epoch": 1.043660721491405, + "grad_norm": 0.6942049860954285, + "learning_rate": 5.6084073470683476e-05, + "loss": 2.5101, + "step": 12932 + }, + { + "epoch": 1.043741425227988, + "grad_norm": 0.7285810708999634, + "learning_rate": 5.6069890923377087e-05, + "loss": 2.467, + "step": 12933 + }, + { + "epoch": 1.043822128964571, + "grad_norm": 0.7702928185462952, + "learning_rate": 5.605570947092382e-05, + "loss": 2.4998, + "step": 12934 + }, + { + "epoch": 1.0439028327011541, + "grad_norm": 0.6631895899772644, + "learning_rate": 5.604152911367713e-05, + "loss": 2.4277, + "step": 12935 + }, + { + "epoch": 1.043983536437737, + "grad_norm": 0.6447882652282715, + "learning_rate": 5.6027349851990494e-05, + "loss": 2.4868, + "step": 12936 + }, + { + "epoch": 1.04406424017432, + "grad_norm": 0.695160448551178, + "learning_rate": 5.6013171686217205e-05, + "loss": 2.3917, + "step": 12937 + }, + { + "epoch": 1.0441449439109032, + "grad_norm": 0.6579271554946899, + "learning_rate": 5.5998994616710656e-05, + "loss": 2.4245, + "step": 12938 + }, + { + "epoch": 1.044225647647486, + "grad_norm": 0.7053574323654175, + "learning_rate": 5.598481864382419e-05, + "loss": 2.4809, + "step": 12939 + }, + { + "epoch": 1.0443063513840691, + "grad_norm": 0.7008736729621887, + "learning_rate": 5.5970643767911105e-05, + "loss": 2.4481, + "step": 12940 + }, + { + "epoch": 1.044387055120652, + "grad_norm": 0.6577918529510498, + "learning_rate": 5.5956469989324644e-05, + "loss": 2.4211, + "step": 12941 + }, + { + "epoch": 1.044467758857235, + "grad_norm": 0.6662739515304565, + "learning_rate": 5.594229730841815e-05, + "loss": 2.4607, + "step": 12942 + }, + { + "epoch": 1.0445484625938182, + "grad_norm": 0.6637060046195984, + "learning_rate": 5.592812572554471e-05, + "loss": 2.4388, + "step": 12943 + }, + { + "epoch": 1.044629166330401, + "grad_norm": 0.7282097935676575, + "learning_rate": 5.5913955241057605e-05, + "loss": 2.4536, + "step": 12944 + }, + { + "epoch": 1.0447098700669841, + "grad_norm": 0.6470810174942017, + "learning_rate": 5.589978585530997e-05, + "loss": 2.4032, + "step": 12945 + }, + { + "epoch": 1.0447905738035672, + "grad_norm": 0.6958881616592407, + "learning_rate": 5.588561756865498e-05, + "loss": 2.4577, + "step": 12946 + }, + { + "epoch": 1.04487127754015, + "grad_norm": 0.6999812722206116, + "learning_rate": 5.587145038144569e-05, + "loss": 2.454, + "step": 12947 + }, + { + "epoch": 1.0449519812767332, + "grad_norm": 0.6919988989830017, + "learning_rate": 5.58572842940352e-05, + "loss": 2.4505, + "step": 12948 + }, + { + "epoch": 1.045032685013316, + "grad_norm": 0.6813084483146667, + "learning_rate": 5.584311930677659e-05, + "loss": 2.4873, + "step": 12949 + }, + { + "epoch": 1.0451133887498991, + "grad_norm": 0.6587427854537964, + "learning_rate": 5.582895542002286e-05, + "loss": 2.4658, + "step": 12950 + }, + { + "epoch": 1.0451940924864822, + "grad_norm": 0.6942041516304016, + "learning_rate": 5.581479263412703e-05, + "loss": 2.47, + "step": 12951 + }, + { + "epoch": 1.045274796223065, + "grad_norm": 0.7330117225646973, + "learning_rate": 5.58006309494421e-05, + "loss": 2.4826, + "step": 12952 + }, + { + "epoch": 1.0453554999596482, + "grad_norm": 0.7197144031524658, + "learning_rate": 5.578647036632096e-05, + "loss": 2.4425, + "step": 12953 + }, + { + "epoch": 1.045436203696231, + "grad_norm": 0.7442573308944702, + "learning_rate": 5.577231088511654e-05, + "loss": 2.4946, + "step": 12954 + }, + { + "epoch": 1.0455169074328141, + "grad_norm": 0.7039753198623657, + "learning_rate": 5.575815250618179e-05, + "loss": 2.4188, + "step": 12955 + }, + { + "epoch": 1.0455976111693972, + "grad_norm": 0.7374606728553772, + "learning_rate": 5.574399522986951e-05, + "loss": 2.3916, + "step": 12956 + }, + { + "epoch": 1.04567831490598, + "grad_norm": 0.6358140707015991, + "learning_rate": 5.572983905653253e-05, + "loss": 2.4502, + "step": 12957 + }, + { + "epoch": 1.0457590186425632, + "grad_norm": 0.712858259677887, + "learning_rate": 5.5715683986523694e-05, + "loss": 2.4746, + "step": 12958 + }, + { + "epoch": 1.0458397223791462, + "grad_norm": 0.6757933497428894, + "learning_rate": 5.5701530020195756e-05, + "loss": 2.4836, + "step": 12959 + }, + { + "epoch": 1.045920426115729, + "grad_norm": 0.7509831786155701, + "learning_rate": 5.568737715790151e-05, + "loss": 2.4061, + "step": 12960 + }, + { + "epoch": 1.0460011298523122, + "grad_norm": 0.7120335102081299, + "learning_rate": 5.5673225399993646e-05, + "loss": 2.4772, + "step": 12961 + }, + { + "epoch": 1.046081833588895, + "grad_norm": 0.7213751673698425, + "learning_rate": 5.5659074746824924e-05, + "loss": 2.4637, + "step": 12962 + }, + { + "epoch": 1.0461625373254781, + "grad_norm": 0.7161290645599365, + "learning_rate": 5.5644925198747934e-05, + "loss": 2.4552, + "step": 12963 + }, + { + "epoch": 1.0462432410620612, + "grad_norm": 0.7303922772407532, + "learning_rate": 5.563077675611534e-05, + "loss": 2.5091, + "step": 12964 + }, + { + "epoch": 1.046323944798644, + "grad_norm": 0.7051636576652527, + "learning_rate": 5.561662941927981e-05, + "loss": 2.3717, + "step": 12965 + }, + { + "epoch": 1.0464046485352272, + "grad_norm": 0.6880733370780945, + "learning_rate": 5.5602483188593866e-05, + "loss": 2.4205, + "step": 12966 + }, + { + "epoch": 1.0464853522718103, + "grad_norm": 0.6942360401153564, + "learning_rate": 5.558833806441008e-05, + "loss": 2.4601, + "step": 12967 + }, + { + "epoch": 1.0465660560083931, + "grad_norm": 0.7264992594718933, + "learning_rate": 5.5574194047081016e-05, + "loss": 2.4612, + "step": 12968 + }, + { + "epoch": 1.0466467597449762, + "grad_norm": 0.7502472996711731, + "learning_rate": 5.5560051136959166e-05, + "loss": 2.4099, + "step": 12969 + }, + { + "epoch": 1.046727463481559, + "grad_norm": 0.691694438457489, + "learning_rate": 5.5545909334397004e-05, + "loss": 2.5071, + "step": 12970 + }, + { + "epoch": 1.0468081672181422, + "grad_norm": 0.7120653986930847, + "learning_rate": 5.5531768639747026e-05, + "loss": 2.4066, + "step": 12971 + }, + { + "epoch": 1.0468888709547253, + "grad_norm": 0.6501363515853882, + "learning_rate": 5.551762905336159e-05, + "loss": 2.4186, + "step": 12972 + }, + { + "epoch": 1.0469695746913081, + "grad_norm": 0.6924965977668762, + "learning_rate": 5.5503490575593095e-05, + "loss": 2.4864, + "step": 12973 + }, + { + "epoch": 1.0470502784278912, + "grad_norm": 0.6772900819778442, + "learning_rate": 5.548935320679398e-05, + "loss": 2.4101, + "step": 12974 + }, + { + "epoch": 1.0471309821644743, + "grad_norm": 0.6950967311859131, + "learning_rate": 5.54752169473165e-05, + "loss": 2.4893, + "step": 12975 + }, + { + "epoch": 1.0472116859010572, + "grad_norm": 0.6663516163825989, + "learning_rate": 5.5461081797512994e-05, + "loss": 2.4136, + "step": 12976 + }, + { + "epoch": 1.0472923896376403, + "grad_norm": 0.7337449789047241, + "learning_rate": 5.5446947757735754e-05, + "loss": 2.473, + "step": 12977 + }, + { + "epoch": 1.0473730933742231, + "grad_norm": 0.6808840036392212, + "learning_rate": 5.543281482833709e-05, + "loss": 2.4473, + "step": 12978 + }, + { + "epoch": 1.0474537971108062, + "grad_norm": 0.6472508907318115, + "learning_rate": 5.5418683009669124e-05, + "loss": 2.4077, + "step": 12979 + }, + { + "epoch": 1.0475345008473893, + "grad_norm": 0.6904192566871643, + "learning_rate": 5.540455230208409e-05, + "loss": 2.482, + "step": 12980 + }, + { + "epoch": 1.0476152045839722, + "grad_norm": 0.6781610250473022, + "learning_rate": 5.5390422705934264e-05, + "loss": 2.4458, + "step": 12981 + }, + { + "epoch": 1.0476959083205553, + "grad_norm": 0.7130050659179688, + "learning_rate": 5.5376294221571666e-05, + "loss": 2.5136, + "step": 12982 + }, + { + "epoch": 1.0477766120571383, + "grad_norm": 0.7727184891700745, + "learning_rate": 5.536216684934846e-05, + "loss": 2.5346, + "step": 12983 + }, + { + "epoch": 1.0478573157937212, + "grad_norm": 0.7177208662033081, + "learning_rate": 5.534804058961679e-05, + "loss": 2.4153, + "step": 12984 + }, + { + "epoch": 1.0479380195303043, + "grad_norm": 0.7333023548126221, + "learning_rate": 5.5333915442728634e-05, + "loss": 2.4171, + "step": 12985 + }, + { + "epoch": 1.0480187232668872, + "grad_norm": 0.658423125743866, + "learning_rate": 5.5319791409036046e-05, + "loss": 2.446, + "step": 12986 + }, + { + "epoch": 1.0480994270034703, + "grad_norm": 0.8305184841156006, + "learning_rate": 5.5305668488891114e-05, + "loss": 2.5026, + "step": 12987 + }, + { + "epoch": 1.0481801307400533, + "grad_norm": 0.7083305716514587, + "learning_rate": 5.52915466826457e-05, + "loss": 2.5366, + "step": 12988 + }, + { + "epoch": 1.0482608344766362, + "grad_norm": 0.7924454212188721, + "learning_rate": 5.5277425990651824e-05, + "loss": 2.528, + "step": 12989 + }, + { + "epoch": 1.0483415382132193, + "grad_norm": 0.633376955986023, + "learning_rate": 5.5263306413261384e-05, + "loss": 2.4442, + "step": 12990 + }, + { + "epoch": 1.0484222419498024, + "grad_norm": 0.7387240529060364, + "learning_rate": 5.5249187950826295e-05, + "loss": 2.4761, + "step": 12991 + }, + { + "epoch": 1.0485029456863852, + "grad_norm": 0.6796224117279053, + "learning_rate": 5.523507060369843e-05, + "loss": 2.4828, + "step": 12992 + }, + { + "epoch": 1.0485836494229683, + "grad_norm": 0.6925581097602844, + "learning_rate": 5.5220954372229604e-05, + "loss": 2.4861, + "step": 12993 + }, + { + "epoch": 1.0486643531595512, + "grad_norm": 0.6854318380355835, + "learning_rate": 5.5206839256771704e-05, + "loss": 2.473, + "step": 12994 + }, + { + "epoch": 1.0487450568961343, + "grad_norm": 0.706375241279602, + "learning_rate": 5.519272525767643e-05, + "loss": 2.4284, + "step": 12995 + }, + { + "epoch": 1.0488257606327174, + "grad_norm": 0.6917428374290466, + "learning_rate": 5.517861237529556e-05, + "loss": 2.4702, + "step": 12996 + }, + { + "epoch": 1.0489064643693002, + "grad_norm": 0.6903818845748901, + "learning_rate": 5.516450060998086e-05, + "loss": 2.4679, + "step": 12997 + }, + { + "epoch": 1.0489871681058833, + "grad_norm": 0.6403356194496155, + "learning_rate": 5.515038996208398e-05, + "loss": 2.396, + "step": 12998 + }, + { + "epoch": 1.0490678718424662, + "grad_norm": 0.6491792798042297, + "learning_rate": 5.513628043195662e-05, + "loss": 2.4543, + "step": 12999 + }, + { + "epoch": 1.0491485755790493, + "grad_norm": 0.687303900718689, + "learning_rate": 5.512217201995043e-05, + "loss": 2.4716, + "step": 13000 + }, + { + "epoch": 1.0491485755790493, + "eval_loss": 2.4177169799804688, + "eval_runtime": 763.9215, + "eval_samples_per_second": 3.43, + "eval_steps_per_second": 0.572, + "step": 13000 + }, + { + "epoch": 1.0492292793156324, + "grad_norm": 0.7020761370658875, + "learning_rate": 5.510806472641701e-05, + "loss": 2.3591, + "step": 13001 + }, + { + "epoch": 1.0493099830522152, + "grad_norm": 0.6978075504302979, + "learning_rate": 5.509395855170798e-05, + "loss": 2.4585, + "step": 13002 + }, + { + "epoch": 1.0493906867887983, + "grad_norm": 0.7327752113342285, + "learning_rate": 5.5079853496174925e-05, + "loss": 2.5265, + "step": 13003 + }, + { + "epoch": 1.0494713905253814, + "grad_norm": 0.7552505135536194, + "learning_rate": 5.50657495601693e-05, + "loss": 2.4821, + "step": 13004 + }, + { + "epoch": 1.0495520942619643, + "grad_norm": 0.7100770473480225, + "learning_rate": 5.5051646744042664e-05, + "loss": 2.4566, + "step": 13005 + }, + { + "epoch": 1.0496327979985474, + "grad_norm": 0.7008209824562073, + "learning_rate": 5.503754504814651e-05, + "loss": 2.4476, + "step": 13006 + }, + { + "epoch": 1.0497135017351304, + "grad_norm": 0.640724241733551, + "learning_rate": 5.502344447283223e-05, + "loss": 2.437, + "step": 13007 + }, + { + "epoch": 1.0497942054717133, + "grad_norm": 0.7064981460571289, + "learning_rate": 5.5009345018451297e-05, + "loss": 2.5129, + "step": 13008 + }, + { + "epoch": 1.0498749092082964, + "grad_norm": 0.6729782223701477, + "learning_rate": 5.49952466853551e-05, + "loss": 2.4867, + "step": 13009 + }, + { + "epoch": 1.0499556129448793, + "grad_norm": 0.7245302200317383, + "learning_rate": 5.4981149473894966e-05, + "loss": 2.4485, + "step": 13010 + }, + { + "epoch": 1.0500363166814624, + "grad_norm": 0.6686248779296875, + "learning_rate": 5.4967053384422294e-05, + "loss": 2.4314, + "step": 13011 + }, + { + "epoch": 1.0501170204180454, + "grad_norm": 0.6790863871574402, + "learning_rate": 5.495295841728836e-05, + "loss": 2.4847, + "step": 13012 + }, + { + "epoch": 1.0501977241546283, + "grad_norm": 0.6516931653022766, + "learning_rate": 5.49388645728445e-05, + "loss": 2.4306, + "step": 13013 + }, + { + "epoch": 1.0502784278912114, + "grad_norm": 0.6967600584030151, + "learning_rate": 5.492477185144189e-05, + "loss": 2.4942, + "step": 13014 + }, + { + "epoch": 1.0503591316277943, + "grad_norm": 0.696246325969696, + "learning_rate": 5.491068025343178e-05, + "loss": 2.4647, + "step": 13015 + }, + { + "epoch": 1.0504398353643774, + "grad_norm": 0.6962751150131226, + "learning_rate": 5.489658977916543e-05, + "loss": 2.5095, + "step": 13016 + }, + { + "epoch": 1.0505205391009604, + "grad_norm": 0.6982631087303162, + "learning_rate": 5.488250042899392e-05, + "loss": 2.4327, + "step": 13017 + }, + { + "epoch": 1.0506012428375433, + "grad_norm": 0.6932644844055176, + "learning_rate": 5.486841220326845e-05, + "loss": 2.4777, + "step": 13018 + }, + { + "epoch": 1.0506819465741264, + "grad_norm": 0.6923339366912842, + "learning_rate": 5.485432510234012e-05, + "loss": 2.4321, + "step": 13019 + }, + { + "epoch": 1.0507626503107095, + "grad_norm": 0.7445859313011169, + "learning_rate": 5.4840239126560015e-05, + "loss": 2.4425, + "step": 13020 + }, + { + "epoch": 1.0508433540472923, + "grad_norm": 0.7122324705123901, + "learning_rate": 5.48261542762792e-05, + "loss": 2.4545, + "step": 13021 + }, + { + "epoch": 1.0509240577838754, + "grad_norm": 0.734779417514801, + "learning_rate": 5.4812070551848736e-05, + "loss": 2.4764, + "step": 13022 + }, + { + "epoch": 1.0510047615204583, + "grad_norm": 0.6544109582901001, + "learning_rate": 5.4797987953619566e-05, + "loss": 2.4492, + "step": 13023 + }, + { + "epoch": 1.0510854652570414, + "grad_norm": 0.6366097331047058, + "learning_rate": 5.4783906481942704e-05, + "loss": 2.4695, + "step": 13024 + }, + { + "epoch": 1.0511661689936245, + "grad_norm": 0.6966270804405212, + "learning_rate": 5.476982613716908e-05, + "loss": 2.4505, + "step": 13025 + }, + { + "epoch": 1.0512468727302073, + "grad_norm": 0.7010120153427124, + "learning_rate": 5.4755746919649665e-05, + "loss": 2.4545, + "step": 13026 + }, + { + "epoch": 1.0513275764667904, + "grad_norm": 0.6704719662666321, + "learning_rate": 5.474166882973526e-05, + "loss": 2.3899, + "step": 13027 + }, + { + "epoch": 1.0514082802033735, + "grad_norm": 0.757152259349823, + "learning_rate": 5.472759186777679e-05, + "loss": 2.5112, + "step": 13028 + }, + { + "epoch": 1.0514889839399564, + "grad_norm": 0.6668868660926819, + "learning_rate": 5.471351603412509e-05, + "loss": 2.4797, + "step": 13029 + }, + { + "epoch": 1.0515696876765395, + "grad_norm": 0.7919496893882751, + "learning_rate": 5.4699441329130887e-05, + "loss": 2.4874, + "step": 13030 + }, + { + "epoch": 1.0516503914131223, + "grad_norm": 0.7595484852790833, + "learning_rate": 5.468536775314506e-05, + "loss": 2.4621, + "step": 13031 + }, + { + "epoch": 1.0517310951497054, + "grad_norm": 0.6575995683670044, + "learning_rate": 5.467129530651835e-05, + "loss": 2.4474, + "step": 13032 + }, + { + "epoch": 1.0518117988862885, + "grad_norm": 0.6817733645439148, + "learning_rate": 5.4657223989601425e-05, + "loss": 2.4329, + "step": 13033 + }, + { + "epoch": 1.0518925026228714, + "grad_norm": 0.722882091999054, + "learning_rate": 5.464315380274501e-05, + "loss": 2.4544, + "step": 13034 + }, + { + "epoch": 1.0519732063594545, + "grad_norm": 0.6957377791404724, + "learning_rate": 5.4629084746299796e-05, + "loss": 2.5669, + "step": 13035 + }, + { + "epoch": 1.0520539100960375, + "grad_norm": 0.6749420166015625, + "learning_rate": 5.461501682061636e-05, + "loss": 2.5053, + "step": 13036 + }, + { + "epoch": 1.0521346138326204, + "grad_norm": 0.8158369064331055, + "learning_rate": 5.4600950026045326e-05, + "loss": 2.429, + "step": 13037 + }, + { + "epoch": 1.0522153175692035, + "grad_norm": 0.6960736513137817, + "learning_rate": 5.458688436293735e-05, + "loss": 2.4731, + "step": 13038 + }, + { + "epoch": 1.0522960213057864, + "grad_norm": 0.6686301231384277, + "learning_rate": 5.457281983164287e-05, + "loss": 2.4495, + "step": 13039 + }, + { + "epoch": 1.0523767250423695, + "grad_norm": 0.6691476106643677, + "learning_rate": 5.455875643251248e-05, + "loss": 2.4329, + "step": 13040 + }, + { + "epoch": 1.0524574287789525, + "grad_norm": 0.7737297415733337, + "learning_rate": 5.454469416589666e-05, + "loss": 2.4664, + "step": 13041 + }, + { + "epoch": 1.0525381325155354, + "grad_norm": 0.7848188281059265, + "learning_rate": 5.453063303214588e-05, + "loss": 2.4799, + "step": 13042 + }, + { + "epoch": 1.0526188362521185, + "grad_norm": 0.7831119894981384, + "learning_rate": 5.45165730316106e-05, + "loss": 2.5076, + "step": 13043 + }, + { + "epoch": 1.0526995399887016, + "grad_norm": 0.691635012626648, + "learning_rate": 5.4502514164641196e-05, + "loss": 2.4866, + "step": 13044 + }, + { + "epoch": 1.0527802437252844, + "grad_norm": 0.6667110919952393, + "learning_rate": 5.4488456431588106e-05, + "loss": 2.4162, + "step": 13045 + }, + { + "epoch": 1.0528609474618675, + "grad_norm": 0.7201905846595764, + "learning_rate": 5.447439983280163e-05, + "loss": 2.498, + "step": 13046 + }, + { + "epoch": 1.0529416511984504, + "grad_norm": 0.8538106083869934, + "learning_rate": 5.44603443686321e-05, + "loss": 2.4477, + "step": 13047 + }, + { + "epoch": 1.0530223549350335, + "grad_norm": 0.6661962270736694, + "learning_rate": 5.444629003942987e-05, + "loss": 2.5253, + "step": 13048 + }, + { + "epoch": 1.0531030586716166, + "grad_norm": 0.7239834666252136, + "learning_rate": 5.4432236845545146e-05, + "loss": 2.4786, + "step": 13049 + }, + { + "epoch": 1.0531837624081994, + "grad_norm": 0.7328412532806396, + "learning_rate": 5.4418184787328186e-05, + "loss": 2.4841, + "step": 13050 + }, + { + "epoch": 1.0532644661447825, + "grad_norm": 0.6395559310913086, + "learning_rate": 5.440413386512922e-05, + "loss": 2.3544, + "step": 13051 + }, + { + "epoch": 1.0533451698813656, + "grad_norm": 0.6632471084594727, + "learning_rate": 5.43900840792984e-05, + "loss": 2.4753, + "step": 13052 + }, + { + "epoch": 1.0534258736179485, + "grad_norm": 0.7262828350067139, + "learning_rate": 5.4376035430185935e-05, + "loss": 2.4162, + "step": 13053 + }, + { + "epoch": 1.0535065773545316, + "grad_norm": 0.7897952198982239, + "learning_rate": 5.436198791814196e-05, + "loss": 2.4571, + "step": 13054 + }, + { + "epoch": 1.0535872810911144, + "grad_norm": 0.7281489372253418, + "learning_rate": 5.434794154351651e-05, + "loss": 2.4531, + "step": 13055 + }, + { + "epoch": 1.0536679848276975, + "grad_norm": 0.7322356700897217, + "learning_rate": 5.4333896306659694e-05, + "loss": 2.4102, + "step": 13056 + }, + { + "epoch": 1.0537486885642806, + "grad_norm": 0.7657945156097412, + "learning_rate": 5.4319852207921554e-05, + "loss": 2.4526, + "step": 13057 + }, + { + "epoch": 1.0538293923008635, + "grad_norm": 0.6732973456382751, + "learning_rate": 5.430580924765214e-05, + "loss": 2.4516, + "step": 13058 + }, + { + "epoch": 1.0539100960374466, + "grad_norm": 0.663398027420044, + "learning_rate": 5.429176742620137e-05, + "loss": 2.4437, + "step": 13059 + }, + { + "epoch": 1.0539907997740294, + "grad_norm": 0.6363258957862854, + "learning_rate": 5.4277726743919244e-05, + "loss": 2.414, + "step": 13060 + }, + { + "epoch": 1.0540715035106125, + "grad_norm": 0.6600647568702698, + "learning_rate": 5.426368720115568e-05, + "loss": 2.4319, + "step": 13061 + }, + { + "epoch": 1.0541522072471956, + "grad_norm": 0.6941983699798584, + "learning_rate": 5.4249648798260574e-05, + "loss": 2.5247, + "step": 13062 + }, + { + "epoch": 1.0542329109837785, + "grad_norm": 0.7419719099998474, + "learning_rate": 5.423561153558383e-05, + "loss": 2.5088, + "step": 13063 + }, + { + "epoch": 1.0543136147203616, + "grad_norm": 0.708073079586029, + "learning_rate": 5.4221575413475326e-05, + "loss": 2.4037, + "step": 13064 + }, + { + "epoch": 1.0543943184569446, + "grad_norm": 0.7081628441810608, + "learning_rate": 5.4207540432284764e-05, + "loss": 2.4556, + "step": 13065 + }, + { + "epoch": 1.0544750221935275, + "grad_norm": 0.7058689594268799, + "learning_rate": 5.419350659236201e-05, + "loss": 2.4244, + "step": 13066 + }, + { + "epoch": 1.0545557259301106, + "grad_norm": 0.6858707070350647, + "learning_rate": 5.417947389405684e-05, + "loss": 2.4431, + "step": 13067 + }, + { + "epoch": 1.0546364296666935, + "grad_norm": 0.6769983768463135, + "learning_rate": 5.416544233771893e-05, + "loss": 2.4257, + "step": 13068 + }, + { + "epoch": 1.0547171334032766, + "grad_norm": 0.7128089070320129, + "learning_rate": 5.4151411923698e-05, + "loss": 2.4558, + "step": 13069 + }, + { + "epoch": 1.0547978371398596, + "grad_norm": 0.6419198513031006, + "learning_rate": 5.413738265234374e-05, + "loss": 2.4421, + "step": 13070 + }, + { + "epoch": 1.0548785408764425, + "grad_norm": 0.760848879814148, + "learning_rate": 5.4123354524005784e-05, + "loss": 2.4427, + "step": 13071 + }, + { + "epoch": 1.0549592446130256, + "grad_norm": 0.6749173998832703, + "learning_rate": 5.410932753903377e-05, + "loss": 2.4902, + "step": 13072 + }, + { + "epoch": 1.0550399483496087, + "grad_norm": 0.6908800601959229, + "learning_rate": 5.4095301697777265e-05, + "loss": 2.4219, + "step": 13073 + }, + { + "epoch": 1.0551206520861915, + "grad_norm": 0.6779965758323669, + "learning_rate": 5.408127700058587e-05, + "loss": 2.4533, + "step": 13074 + }, + { + "epoch": 1.0552013558227746, + "grad_norm": 0.6832355260848999, + "learning_rate": 5.406725344780906e-05, + "loss": 2.418, + "step": 13075 + }, + { + "epoch": 1.0552820595593575, + "grad_norm": 0.6766698956489563, + "learning_rate": 5.4053231039796357e-05, + "loss": 2.4493, + "step": 13076 + }, + { + "epoch": 1.0553627632959406, + "grad_norm": 0.7256276607513428, + "learning_rate": 5.4039209776897285e-05, + "loss": 2.4126, + "step": 13077 + }, + { + "epoch": 1.0554434670325237, + "grad_norm": 0.6687275171279907, + "learning_rate": 5.4025189659461196e-05, + "loss": 2.435, + "step": 13078 + }, + { + "epoch": 1.0555241707691065, + "grad_norm": 0.6800444722175598, + "learning_rate": 5.401117068783758e-05, + "loss": 2.4608, + "step": 13079 + }, + { + "epoch": 1.0556048745056896, + "grad_norm": 0.6947116851806641, + "learning_rate": 5.399715286237583e-05, + "loss": 2.4908, + "step": 13080 + }, + { + "epoch": 1.0556855782422727, + "grad_norm": 0.6907915472984314, + "learning_rate": 5.398313618342521e-05, + "loss": 2.4805, + "step": 13081 + }, + { + "epoch": 1.0557662819788556, + "grad_norm": 0.7429100275039673, + "learning_rate": 5.396912065133516e-05, + "loss": 2.458, + "step": 13082 + }, + { + "epoch": 1.0558469857154387, + "grad_norm": 0.7186924815177917, + "learning_rate": 5.3955106266454994e-05, + "loss": 2.4924, + "step": 13083 + }, + { + "epoch": 1.0559276894520215, + "grad_norm": 0.7017999887466431, + "learning_rate": 5.394109302913391e-05, + "loss": 2.4103, + "step": 13084 + }, + { + "epoch": 1.0560083931886046, + "grad_norm": 0.7318955659866333, + "learning_rate": 5.392708093972117e-05, + "loss": 2.4424, + "step": 13085 + }, + { + "epoch": 1.0560890969251877, + "grad_norm": 0.6278600692749023, + "learning_rate": 5.391306999856602e-05, + "loss": 2.4433, + "step": 13086 + }, + { + "epoch": 1.0561698006617706, + "grad_norm": 0.6895800232887268, + "learning_rate": 5.389906020601767e-05, + "loss": 2.4275, + "step": 13087 + }, + { + "epoch": 1.0562505043983537, + "grad_norm": 0.7197345495223999, + "learning_rate": 5.388505156242522e-05, + "loss": 2.4309, + "step": 13088 + }, + { + "epoch": 1.0563312081349367, + "grad_norm": 0.636433482170105, + "learning_rate": 5.3871044068137824e-05, + "loss": 2.4258, + "step": 13089 + }, + { + "epoch": 1.0564119118715196, + "grad_norm": 0.6884748339653015, + "learning_rate": 5.3857037723504634e-05, + "loss": 2.4543, + "step": 13090 + }, + { + "epoch": 1.0564926156081027, + "grad_norm": 0.7277036309242249, + "learning_rate": 5.384303252887464e-05, + "loss": 2.4911, + "step": 13091 + }, + { + "epoch": 1.0565733193446856, + "grad_norm": 0.6940809488296509, + "learning_rate": 5.38290284845969e-05, + "loss": 2.4112, + "step": 13092 + }, + { + "epoch": 1.0566540230812687, + "grad_norm": 0.6729177236557007, + "learning_rate": 5.3815025591020526e-05, + "loss": 2.4394, + "step": 13093 + }, + { + "epoch": 1.0567347268178517, + "grad_norm": 0.6941854357719421, + "learning_rate": 5.3801023848494416e-05, + "loss": 2.4263, + "step": 13094 + }, + { + "epoch": 1.0568154305544346, + "grad_norm": 0.7046812772750854, + "learning_rate": 5.3787023257367554e-05, + "loss": 2.5196, + "step": 13095 + }, + { + "epoch": 1.0568961342910177, + "grad_norm": 0.6896177530288696, + "learning_rate": 5.377302381798891e-05, + "loss": 2.4178, + "step": 13096 + }, + { + "epoch": 1.0569768380276008, + "grad_norm": 0.6693699955940247, + "learning_rate": 5.375902553070731e-05, + "loss": 2.4908, + "step": 13097 + }, + { + "epoch": 1.0570575417641837, + "grad_norm": 0.6751677989959717, + "learning_rate": 5.3745028395871674e-05, + "loss": 2.4222, + "step": 13098 + }, + { + "epoch": 1.0571382455007667, + "grad_norm": 0.7666265368461609, + "learning_rate": 5.373103241383088e-05, + "loss": 2.4965, + "step": 13099 + }, + { + "epoch": 1.0572189492373496, + "grad_norm": 0.8069329857826233, + "learning_rate": 5.3717037584933674e-05, + "loss": 2.4988, + "step": 13100 + }, + { + "epoch": 1.0572996529739327, + "grad_norm": 0.7160749435424805, + "learning_rate": 5.370304390952887e-05, + "loss": 2.4311, + "step": 13101 + }, + { + "epoch": 1.0573803567105158, + "grad_norm": 0.6936448812484741, + "learning_rate": 5.368905138796523e-05, + "loss": 2.4877, + "step": 13102 + }, + { + "epoch": 1.0574610604470986, + "grad_norm": 0.7202793955802917, + "learning_rate": 5.3675060020591494e-05, + "loss": 2.4841, + "step": 13103 + }, + { + "epoch": 1.0575417641836817, + "grad_norm": 0.7750168442726135, + "learning_rate": 5.366106980775636e-05, + "loss": 2.4828, + "step": 13104 + }, + { + "epoch": 1.0576224679202646, + "grad_norm": 0.7079972624778748, + "learning_rate": 5.364708074980849e-05, + "loss": 2.4912, + "step": 13105 + }, + { + "epoch": 1.0577031716568477, + "grad_norm": 0.704066276550293, + "learning_rate": 5.363309284709657e-05, + "loss": 2.4731, + "step": 13106 + }, + { + "epoch": 1.0577838753934308, + "grad_norm": 0.7040490508079529, + "learning_rate": 5.361910609996915e-05, + "loss": 2.3811, + "step": 13107 + }, + { + "epoch": 1.0578645791300136, + "grad_norm": 0.6669453978538513, + "learning_rate": 5.360512050877484e-05, + "loss": 2.5372, + "step": 13108 + }, + { + "epoch": 1.0579452828665967, + "grad_norm": 0.7197996973991394, + "learning_rate": 5.359113607386226e-05, + "loss": 2.4612, + "step": 13109 + }, + { + "epoch": 1.0580259866031798, + "grad_norm": 0.7192320823669434, + "learning_rate": 5.3577152795579824e-05, + "loss": 2.4636, + "step": 13110 + }, + { + "epoch": 1.0581066903397627, + "grad_norm": 0.6907937526702881, + "learning_rate": 5.35631706742761e-05, + "loss": 2.4791, + "step": 13111 + }, + { + "epoch": 1.0581873940763458, + "grad_norm": 0.687035083770752, + "learning_rate": 5.354918971029954e-05, + "loss": 2.4706, + "step": 13112 + }, + { + "epoch": 1.0582680978129286, + "grad_norm": 0.6666533350944519, + "learning_rate": 5.353520990399861e-05, + "loss": 2.4789, + "step": 13113 + }, + { + "epoch": 1.0583488015495117, + "grad_norm": 0.6261809468269348, + "learning_rate": 5.35212312557217e-05, + "loss": 2.4485, + "step": 13114 + }, + { + "epoch": 1.0584295052860948, + "grad_norm": 0.6740814447402954, + "learning_rate": 5.350725376581725e-05, + "loss": 2.47, + "step": 13115 + }, + { + "epoch": 1.0585102090226777, + "grad_norm": 0.7634154558181763, + "learning_rate": 5.3493277434633526e-05, + "loss": 2.4685, + "step": 13116 + }, + { + "epoch": 1.0585909127592608, + "grad_norm": 0.6674611568450928, + "learning_rate": 5.34793022625189e-05, + "loss": 2.4362, + "step": 13117 + }, + { + "epoch": 1.0586716164958438, + "grad_norm": 0.7584757804870605, + "learning_rate": 5.346532824982167e-05, + "loss": 2.499, + "step": 13118 + }, + { + "epoch": 1.0587523202324267, + "grad_norm": 0.6453456282615662, + "learning_rate": 5.345135539689015e-05, + "loss": 2.4341, + "step": 13119 + }, + { + "epoch": 1.0588330239690098, + "grad_norm": 0.70013427734375, + "learning_rate": 5.343738370407247e-05, + "loss": 2.3448, + "step": 13120 + }, + { + "epoch": 1.0589137277055927, + "grad_norm": 0.6763362884521484, + "learning_rate": 5.342341317171693e-05, + "loss": 2.4234, + "step": 13121 + }, + { + "epoch": 1.0589944314421758, + "grad_norm": 0.6896576881408691, + "learning_rate": 5.3409443800171664e-05, + "loss": 2.4753, + "step": 13122 + }, + { + "epoch": 1.0590751351787588, + "grad_norm": 0.6984997987747192, + "learning_rate": 5.339547558978486e-05, + "loss": 2.4581, + "step": 13123 + }, + { + "epoch": 1.0591558389153417, + "grad_norm": 0.7276118993759155, + "learning_rate": 5.338150854090462e-05, + "loss": 2.4765, + "step": 13124 + }, + { + "epoch": 1.0592365426519248, + "grad_norm": 0.6943252086639404, + "learning_rate": 5.336754265387911e-05, + "loss": 2.4514, + "step": 13125 + }, + { + "epoch": 1.0593172463885079, + "grad_norm": 0.7070014476776123, + "learning_rate": 5.335357792905628e-05, + "loss": 2.4365, + "step": 13126 + }, + { + "epoch": 1.0593979501250907, + "grad_norm": 0.6887189149856567, + "learning_rate": 5.333961436678422e-05, + "loss": 2.4834, + "step": 13127 + }, + { + "epoch": 1.0594786538616738, + "grad_norm": 0.8150162696838379, + "learning_rate": 5.332565196741098e-05, + "loss": 2.4474, + "step": 13128 + }, + { + "epoch": 1.0595593575982567, + "grad_norm": 0.6681316494941711, + "learning_rate": 5.331169073128447e-05, + "loss": 2.4888, + "step": 13129 + }, + { + "epoch": 1.0596400613348398, + "grad_norm": 0.6696690320968628, + "learning_rate": 5.329773065875267e-05, + "loss": 2.3874, + "step": 13130 + }, + { + "epoch": 1.0597207650714229, + "grad_norm": 0.729807436466217, + "learning_rate": 5.32837717501635e-05, + "loss": 2.4442, + "step": 13131 + }, + { + "epoch": 1.0598014688080057, + "grad_norm": 0.6959047913551331, + "learning_rate": 5.326981400586486e-05, + "loss": 2.4697, + "step": 13132 + }, + { + "epoch": 1.0598821725445888, + "grad_norm": 0.667294442653656, + "learning_rate": 5.3255857426204606e-05, + "loss": 2.3986, + "step": 13133 + }, + { + "epoch": 1.059962876281172, + "grad_norm": 0.6953842639923096, + "learning_rate": 5.3241902011530566e-05, + "loss": 2.396, + "step": 13134 + }, + { + "epoch": 1.0600435800177548, + "grad_norm": 0.6544597148895264, + "learning_rate": 5.32279477621906e-05, + "loss": 2.426, + "step": 13135 + }, + { + "epoch": 1.0601242837543379, + "grad_norm": 0.708017885684967, + "learning_rate": 5.321399467853241e-05, + "loss": 2.4931, + "step": 13136 + }, + { + "epoch": 1.0602049874909207, + "grad_norm": 0.6669809818267822, + "learning_rate": 5.3200042760903764e-05, + "loss": 2.4354, + "step": 13137 + }, + { + "epoch": 1.0602856912275038, + "grad_norm": 1.0144098997116089, + "learning_rate": 5.3186092009652435e-05, + "loss": 2.4803, + "step": 13138 + }, + { + "epoch": 1.060366394964087, + "grad_norm": 0.7213768362998962, + "learning_rate": 5.317214242512601e-05, + "loss": 2.4318, + "step": 13139 + }, + { + "epoch": 1.0604470987006698, + "grad_norm": 0.6429069638252258, + "learning_rate": 5.315819400767223e-05, + "loss": 2.458, + "step": 13140 + }, + { + "epoch": 1.0605278024372529, + "grad_norm": 0.6480485796928406, + "learning_rate": 5.3144246757638714e-05, + "loss": 2.4586, + "step": 13141 + }, + { + "epoch": 1.060608506173836, + "grad_norm": 0.7037697434425354, + "learning_rate": 5.3130300675373035e-05, + "loss": 2.4698, + "step": 13142 + }, + { + "epoch": 1.0606892099104188, + "grad_norm": 0.7307559251785278, + "learning_rate": 5.3116355761222725e-05, + "loss": 2.4027, + "step": 13143 + }, + { + "epoch": 1.060769913647002, + "grad_norm": 0.6684615612030029, + "learning_rate": 5.310241201553547e-05, + "loss": 2.478, + "step": 13144 + }, + { + "epoch": 1.0608506173835848, + "grad_norm": 0.7018016576766968, + "learning_rate": 5.308846943865866e-05, + "loss": 2.4229, + "step": 13145 + }, + { + "epoch": 1.0609313211201679, + "grad_norm": 0.7538621425628662, + "learning_rate": 5.307452803093982e-05, + "loss": 2.5201, + "step": 13146 + }, + { + "epoch": 1.061012024856751, + "grad_norm": 0.6957963109016418, + "learning_rate": 5.306058779272645e-05, + "loss": 2.4233, + "step": 13147 + }, + { + "epoch": 1.0610927285933338, + "grad_norm": 0.6280590295791626, + "learning_rate": 5.304664872436588e-05, + "loss": 2.5117, + "step": 13148 + }, + { + "epoch": 1.061173432329917, + "grad_norm": 0.6937280297279358, + "learning_rate": 5.3032710826205564e-05, + "loss": 2.4889, + "step": 13149 + }, + { + "epoch": 1.0612541360664998, + "grad_norm": 0.6750391125679016, + "learning_rate": 5.3018774098592884e-05, + "loss": 2.4472, + "step": 13150 + }, + { + "epoch": 1.0613348398030829, + "grad_norm": 0.6931902766227722, + "learning_rate": 5.300483854187519e-05, + "loss": 2.3883, + "step": 13151 + }, + { + "epoch": 1.061415543539666, + "grad_norm": 0.6982774138450623, + "learning_rate": 5.2990904156399726e-05, + "loss": 2.4688, + "step": 13152 + }, + { + "epoch": 1.0614962472762488, + "grad_norm": 0.6873522996902466, + "learning_rate": 5.297697094251382e-05, + "loss": 2.4818, + "step": 13153 + }, + { + "epoch": 1.061576951012832, + "grad_norm": 0.635377049446106, + "learning_rate": 5.296303890056471e-05, + "loss": 2.3906, + "step": 13154 + }, + { + "epoch": 1.061657654749415, + "grad_norm": 0.6368159651756287, + "learning_rate": 5.294910803089963e-05, + "loss": 2.4714, + "step": 13155 + }, + { + "epoch": 1.0617383584859978, + "grad_norm": 0.7147238254547119, + "learning_rate": 5.293517833386576e-05, + "loss": 2.4746, + "step": 13156 + }, + { + "epoch": 1.061819062222581, + "grad_norm": 0.742189884185791, + "learning_rate": 5.2921249809810326e-05, + "loss": 2.3913, + "step": 13157 + }, + { + "epoch": 1.061899765959164, + "grad_norm": 0.6665734648704529, + "learning_rate": 5.290732245908038e-05, + "loss": 2.4263, + "step": 13158 + }, + { + "epoch": 1.0619804696957469, + "grad_norm": 0.6894757747650146, + "learning_rate": 5.2893396282023055e-05, + "loss": 2.4204, + "step": 13159 + }, + { + "epoch": 1.06206117343233, + "grad_norm": 0.6394561529159546, + "learning_rate": 5.287947127898546e-05, + "loss": 2.4183, + "step": 13160 + }, + { + "epoch": 1.0621418771689128, + "grad_norm": 0.7422548532485962, + "learning_rate": 5.2865547450314576e-05, + "loss": 2.4454, + "step": 13161 + }, + { + "epoch": 1.062222580905496, + "grad_norm": 0.7486133575439453, + "learning_rate": 5.285162479635748e-05, + "loss": 2.4856, + "step": 13162 + }, + { + "epoch": 1.062303284642079, + "grad_norm": 0.6743031144142151, + "learning_rate": 5.283770331746112e-05, + "loss": 2.4318, + "step": 13163 + }, + { + "epoch": 1.0623839883786619, + "grad_norm": 0.6461686491966248, + "learning_rate": 5.282378301397248e-05, + "loss": 2.4133, + "step": 13164 + }, + { + "epoch": 1.062464692115245, + "grad_norm": 0.6745431423187256, + "learning_rate": 5.28098638862385e-05, + "loss": 2.4463, + "step": 13165 + }, + { + "epoch": 1.0625453958518278, + "grad_norm": 0.6646310687065125, + "learning_rate": 5.279594593460606e-05, + "loss": 2.4211, + "step": 13166 + }, + { + "epoch": 1.062626099588411, + "grad_norm": 0.6789249777793884, + "learning_rate": 5.278202915942207e-05, + "loss": 2.4832, + "step": 13167 + }, + { + "epoch": 1.062706803324994, + "grad_norm": 0.7082679867744446, + "learning_rate": 5.2768113561033326e-05, + "loss": 2.4303, + "step": 13168 + }, + { + "epoch": 1.0627875070615769, + "grad_norm": 0.6875587701797485, + "learning_rate": 5.275419913978664e-05, + "loss": 2.4601, + "step": 13169 + }, + { + "epoch": 1.06286821079816, + "grad_norm": 0.6556203961372375, + "learning_rate": 5.274028589602886e-05, + "loss": 2.4359, + "step": 13170 + }, + { + "epoch": 1.062948914534743, + "grad_norm": 0.7280015349388123, + "learning_rate": 5.272637383010666e-05, + "loss": 2.4999, + "step": 13171 + }, + { + "epoch": 1.063029618271326, + "grad_norm": 0.664654016494751, + "learning_rate": 5.271246294236678e-05, + "loss": 2.3951, + "step": 13172 + }, + { + "epoch": 1.063110322007909, + "grad_norm": 0.6941719055175781, + "learning_rate": 5.2698553233155945e-05, + "loss": 2.45, + "step": 13173 + }, + { + "epoch": 1.0631910257444919, + "grad_norm": 0.7212931513786316, + "learning_rate": 5.268464470282082e-05, + "loss": 2.4615, + "step": 13174 + }, + { + "epoch": 1.063271729481075, + "grad_norm": 0.6877106428146362, + "learning_rate": 5.2670737351708014e-05, + "loss": 2.4495, + "step": 13175 + }, + { + "epoch": 1.063352433217658, + "grad_norm": 0.737718939781189, + "learning_rate": 5.26568311801642e-05, + "loss": 2.4971, + "step": 13176 + }, + { + "epoch": 1.063433136954241, + "grad_norm": 0.6909129619598389, + "learning_rate": 5.264292618853587e-05, + "loss": 2.4889, + "step": 13177 + }, + { + "epoch": 1.063513840690824, + "grad_norm": 0.6750304102897644, + "learning_rate": 5.262902237716961e-05, + "loss": 2.4779, + "step": 13178 + }, + { + "epoch": 1.063594544427407, + "grad_norm": 0.7256019115447998, + "learning_rate": 5.2615119746411954e-05, + "loss": 2.4904, + "step": 13179 + }, + { + "epoch": 1.06367524816399, + "grad_norm": 0.7335983514785767, + "learning_rate": 5.26012182966094e-05, + "loss": 2.4357, + "step": 13180 + }, + { + "epoch": 1.063755951900573, + "grad_norm": 0.6534200310707092, + "learning_rate": 5.258731802810837e-05, + "loss": 2.4213, + "step": 13181 + }, + { + "epoch": 1.063836655637156, + "grad_norm": 0.6899768114089966, + "learning_rate": 5.257341894125529e-05, + "loss": 2.4963, + "step": 13182 + }, + { + "epoch": 1.063917359373739, + "grad_norm": 0.7016159892082214, + "learning_rate": 5.25595210363966e-05, + "loss": 2.4583, + "step": 13183 + }, + { + "epoch": 1.063998063110322, + "grad_norm": 0.6868152022361755, + "learning_rate": 5.2545624313878636e-05, + "loss": 2.4523, + "step": 13184 + }, + { + "epoch": 1.064078766846905, + "grad_norm": 0.7442622184753418, + "learning_rate": 5.2531728774047785e-05, + "loss": 2.425, + "step": 13185 + }, + { + "epoch": 1.064159470583488, + "grad_norm": 0.6900869011878967, + "learning_rate": 5.251783441725037e-05, + "loss": 2.459, + "step": 13186 + }, + { + "epoch": 1.0642401743200711, + "grad_norm": 0.6910288333892822, + "learning_rate": 5.25039412438326e-05, + "loss": 2.4882, + "step": 13187 + }, + { + "epoch": 1.064320878056654, + "grad_norm": 0.7644359469413757, + "learning_rate": 5.249004925414076e-05, + "loss": 2.4663, + "step": 13188 + }, + { + "epoch": 1.064401581793237, + "grad_norm": 0.6703082919120789, + "learning_rate": 5.247615844852114e-05, + "loss": 2.4309, + "step": 13189 + }, + { + "epoch": 1.06448228552982, + "grad_norm": 0.6449835896492004, + "learning_rate": 5.246226882731983e-05, + "loss": 2.4307, + "step": 13190 + }, + { + "epoch": 1.064562989266403, + "grad_norm": 0.7332713603973389, + "learning_rate": 5.244838039088305e-05, + "loss": 2.3763, + "step": 13191 + }, + { + "epoch": 1.0646436930029861, + "grad_norm": 0.7626641988754272, + "learning_rate": 5.2434493139556974e-05, + "loss": 2.4167, + "step": 13192 + }, + { + "epoch": 1.064724396739569, + "grad_norm": 0.6924002170562744, + "learning_rate": 5.2420607073687614e-05, + "loss": 2.4751, + "step": 13193 + }, + { + "epoch": 1.064805100476152, + "grad_norm": 0.6815003156661987, + "learning_rate": 5.2406722193621074e-05, + "loss": 2.4731, + "step": 13194 + }, + { + "epoch": 1.064885804212735, + "grad_norm": 0.7632609009742737, + "learning_rate": 5.239283849970347e-05, + "loss": 2.4562, + "step": 13195 + }, + { + "epoch": 1.064966507949318, + "grad_norm": 0.7157592177391052, + "learning_rate": 5.23789559922808e-05, + "loss": 2.4507, + "step": 13196 + }, + { + "epoch": 1.065047211685901, + "grad_norm": 0.7035543918609619, + "learning_rate": 5.2365074671699e-05, + "loss": 2.4616, + "step": 13197 + }, + { + "epoch": 1.065127915422484, + "grad_norm": 0.7566644549369812, + "learning_rate": 5.235119453830406e-05, + "loss": 2.4751, + "step": 13198 + }, + { + "epoch": 1.065208619159067, + "grad_norm": 0.7030916213989258, + "learning_rate": 5.233731559244194e-05, + "loss": 2.381, + "step": 13199 + }, + { + "epoch": 1.0652893228956501, + "grad_norm": 0.7663755416870117, + "learning_rate": 5.232343783445847e-05, + "loss": 2.4822, + "step": 13200 + }, + { + "epoch": 1.065370026632233, + "grad_norm": 0.717767596244812, + "learning_rate": 5.230956126469955e-05, + "loss": 2.4807, + "step": 13201 + }, + { + "epoch": 1.065450730368816, + "grad_norm": 0.6920818090438843, + "learning_rate": 5.229568588351108e-05, + "loss": 2.4643, + "step": 13202 + }, + { + "epoch": 1.0655314341053992, + "grad_norm": 0.6812553405761719, + "learning_rate": 5.228181169123877e-05, + "loss": 2.4443, + "step": 13203 + }, + { + "epoch": 1.065612137841982, + "grad_norm": 0.7241889834403992, + "learning_rate": 5.226793868822846e-05, + "loss": 2.4581, + "step": 13204 + }, + { + "epoch": 1.0656928415785651, + "grad_norm": 0.7254642248153687, + "learning_rate": 5.225406687482588e-05, + "loss": 2.4999, + "step": 13205 + }, + { + "epoch": 1.065773545315148, + "grad_norm": 0.7316950559616089, + "learning_rate": 5.2240196251376764e-05, + "loss": 2.4493, + "step": 13206 + }, + { + "epoch": 1.065854249051731, + "grad_norm": 0.7208307385444641, + "learning_rate": 5.22263268182268e-05, + "loss": 2.5083, + "step": 13207 + }, + { + "epoch": 1.0659349527883142, + "grad_norm": 0.6552214622497559, + "learning_rate": 5.22124585757217e-05, + "loss": 2.4662, + "step": 13208 + }, + { + "epoch": 1.066015656524897, + "grad_norm": 0.7949681878089905, + "learning_rate": 5.219859152420701e-05, + "loss": 2.4584, + "step": 13209 + }, + { + "epoch": 1.0660963602614801, + "grad_norm": 0.7012154459953308, + "learning_rate": 5.2184725664028366e-05, + "loss": 2.4702, + "step": 13210 + }, + { + "epoch": 1.066177063998063, + "grad_norm": 0.7431927919387817, + "learning_rate": 5.217086099553136e-05, + "loss": 2.4422, + "step": 13211 + }, + { + "epoch": 1.066257767734646, + "grad_norm": 0.7235366702079773, + "learning_rate": 5.2156997519061554e-05, + "loss": 2.4173, + "step": 13212 + }, + { + "epoch": 1.0663384714712292, + "grad_norm": 0.7475029826164246, + "learning_rate": 5.214313523496439e-05, + "loss": 2.4924, + "step": 13213 + }, + { + "epoch": 1.066419175207812, + "grad_norm": 0.6326786875724792, + "learning_rate": 5.212927414358542e-05, + "loss": 2.4154, + "step": 13214 + }, + { + "epoch": 1.0664998789443951, + "grad_norm": 0.6755837798118591, + "learning_rate": 5.211541424527004e-05, + "loss": 2.4248, + "step": 13215 + }, + { + "epoch": 1.0665805826809782, + "grad_norm": 0.645395040512085, + "learning_rate": 5.210155554036373e-05, + "loss": 2.4078, + "step": 13216 + }, + { + "epoch": 1.066661286417561, + "grad_norm": 0.799913763999939, + "learning_rate": 5.208769802921185e-05, + "loss": 2.5067, + "step": 13217 + }, + { + "epoch": 1.0667419901541442, + "grad_norm": 0.7056344747543335, + "learning_rate": 5.207384171215983e-05, + "loss": 2.4817, + "step": 13218 + }, + { + "epoch": 1.0668226938907273, + "grad_norm": 0.7082187533378601, + "learning_rate": 5.205998658955291e-05, + "loss": 2.4495, + "step": 13219 + }, + { + "epoch": 1.0669033976273101, + "grad_norm": 0.6948464512825012, + "learning_rate": 5.204613266173646e-05, + "loss": 2.4584, + "step": 13220 + }, + { + "epoch": 1.0669841013638932, + "grad_norm": 0.7812542915344238, + "learning_rate": 5.203227992905575e-05, + "loss": 2.4803, + "step": 13221 + }, + { + "epoch": 1.067064805100476, + "grad_norm": 0.6892200708389282, + "learning_rate": 5.201842839185598e-05, + "loss": 2.4424, + "step": 13222 + }, + { + "epoch": 1.0671455088370592, + "grad_norm": 0.6982070803642273, + "learning_rate": 5.20045780504824e-05, + "loss": 2.4654, + "step": 13223 + }, + { + "epoch": 1.0672262125736423, + "grad_norm": 0.6799101233482361, + "learning_rate": 5.1990728905280205e-05, + "loss": 2.4748, + "step": 13224 + }, + { + "epoch": 1.0673069163102251, + "grad_norm": 0.6703687906265259, + "learning_rate": 5.1976880956594544e-05, + "loss": 2.4459, + "step": 13225 + }, + { + "epoch": 1.0673876200468082, + "grad_norm": 0.6821435689926147, + "learning_rate": 5.196303420477053e-05, + "loss": 2.4517, + "step": 13226 + }, + { + "epoch": 1.067468323783391, + "grad_norm": 0.6369695067405701, + "learning_rate": 5.194918865015328e-05, + "loss": 2.4388, + "step": 13227 + }, + { + "epoch": 1.0675490275199742, + "grad_norm": 0.6465736627578735, + "learning_rate": 5.1935344293087885e-05, + "loss": 2.3839, + "step": 13228 + }, + { + "epoch": 1.0676297312565572, + "grad_norm": 0.6745415329933167, + "learning_rate": 5.192150113391933e-05, + "loss": 2.4676, + "step": 13229 + }, + { + "epoch": 1.0677104349931401, + "grad_norm": 0.7605211138725281, + "learning_rate": 5.190765917299263e-05, + "loss": 2.4764, + "step": 13230 + }, + { + "epoch": 1.0677911387297232, + "grad_norm": 0.7040959596633911, + "learning_rate": 5.1893818410652825e-05, + "loss": 2.4727, + "step": 13231 + }, + { + "epoch": 1.0678718424663063, + "grad_norm": 0.6718928813934326, + "learning_rate": 5.1879978847244785e-05, + "loss": 2.4308, + "step": 13232 + }, + { + "epoch": 1.0679525462028892, + "grad_norm": 0.6788188219070435, + "learning_rate": 5.1866140483113445e-05, + "loss": 2.4278, + "step": 13233 + }, + { + "epoch": 1.0680332499394722, + "grad_norm": 0.7310218811035156, + "learning_rate": 5.185230331860371e-05, + "loss": 2.4585, + "step": 13234 + }, + { + "epoch": 1.068113953676055, + "grad_norm": 0.8092277646064758, + "learning_rate": 5.183846735406044e-05, + "loss": 2.4128, + "step": 13235 + }, + { + "epoch": 1.0681946574126382, + "grad_norm": 0.6469862461090088, + "learning_rate": 5.182463258982846e-05, + "loss": 2.4315, + "step": 13236 + }, + { + "epoch": 1.0682753611492213, + "grad_norm": 0.7948115468025208, + "learning_rate": 5.181079902625261e-05, + "loss": 2.5127, + "step": 13237 + }, + { + "epoch": 1.0683560648858041, + "grad_norm": 0.6988852620124817, + "learning_rate": 5.179696666367757e-05, + "loss": 2.432, + "step": 13238 + }, + { + "epoch": 1.0684367686223872, + "grad_norm": 0.6914555430412292, + "learning_rate": 5.1783135502448124e-05, + "loss": 2.4748, + "step": 13239 + }, + { + "epoch": 1.0685174723589703, + "grad_norm": 0.7586313486099243, + "learning_rate": 5.176930554290902e-05, + "loss": 2.4522, + "step": 13240 + }, + { + "epoch": 1.0685981760955532, + "grad_norm": 0.6763948798179626, + "learning_rate": 5.175547678540487e-05, + "loss": 2.4477, + "step": 13241 + }, + { + "epoch": 1.0686788798321363, + "grad_norm": 0.7625983357429504, + "learning_rate": 5.1741649230280334e-05, + "loss": 2.4725, + "step": 13242 + }, + { + "epoch": 1.0687595835687191, + "grad_norm": 0.6574710011482239, + "learning_rate": 5.172782287788005e-05, + "loss": 2.4212, + "step": 13243 + }, + { + "epoch": 1.0688402873053022, + "grad_norm": 0.770062267780304, + "learning_rate": 5.1713997728548615e-05, + "loss": 2.5065, + "step": 13244 + }, + { + "epoch": 1.0689209910418853, + "grad_norm": 0.7719037532806396, + "learning_rate": 5.170017378263057e-05, + "loss": 2.5082, + "step": 13245 + }, + { + "epoch": 1.0690016947784682, + "grad_norm": 0.7106119394302368, + "learning_rate": 5.168635104047046e-05, + "loss": 2.4922, + "step": 13246 + }, + { + "epoch": 1.0690823985150513, + "grad_norm": 0.711815595626831, + "learning_rate": 5.167252950241281e-05, + "loss": 2.498, + "step": 13247 + }, + { + "epoch": 1.0691631022516344, + "grad_norm": 0.6926038265228271, + "learning_rate": 5.165870916880201e-05, + "loss": 2.4464, + "step": 13248 + }, + { + "epoch": 1.0692438059882172, + "grad_norm": 0.6959360241889954, + "learning_rate": 5.164489003998254e-05, + "loss": 2.4668, + "step": 13249 + }, + { + "epoch": 1.0693245097248003, + "grad_norm": 0.7165184617042542, + "learning_rate": 5.1631072116298875e-05, + "loss": 2.4198, + "step": 13250 + }, + { + "epoch": 1.0694052134613832, + "grad_norm": 0.7133236527442932, + "learning_rate": 5.161725539809527e-05, + "loss": 2.4691, + "step": 13251 + }, + { + "epoch": 1.0694859171979663, + "grad_norm": 0.7057758569717407, + "learning_rate": 5.160343988571613e-05, + "loss": 2.466, + "step": 13252 + }, + { + "epoch": 1.0695666209345494, + "grad_norm": 0.6808326244354248, + "learning_rate": 5.158962557950583e-05, + "loss": 2.4248, + "step": 13253 + }, + { + "epoch": 1.0696473246711322, + "grad_norm": 0.7166025638580322, + "learning_rate": 5.1575812479808563e-05, + "loss": 2.4753, + "step": 13254 + }, + { + "epoch": 1.0697280284077153, + "grad_norm": 0.7395358085632324, + "learning_rate": 5.156200058696863e-05, + "loss": 2.485, + "step": 13255 + }, + { + "epoch": 1.0698087321442982, + "grad_norm": 0.681106686592102, + "learning_rate": 5.154818990133026e-05, + "loss": 2.5077, + "step": 13256 + }, + { + "epoch": 1.0698894358808813, + "grad_norm": 0.7517002820968628, + "learning_rate": 5.153438042323766e-05, + "loss": 2.5093, + "step": 13257 + }, + { + "epoch": 1.0699701396174643, + "grad_norm": 0.6516926288604736, + "learning_rate": 5.152057215303499e-05, + "loss": 2.4416, + "step": 13258 + }, + { + "epoch": 1.0700508433540472, + "grad_norm": 0.6930893063545227, + "learning_rate": 5.150676509106638e-05, + "loss": 2.506, + "step": 13259 + }, + { + "epoch": 1.0701315470906303, + "grad_norm": 0.7737041115760803, + "learning_rate": 5.1492959237675986e-05, + "loss": 2.4355, + "step": 13260 + }, + { + "epoch": 1.0702122508272134, + "grad_norm": 0.7274872660636902, + "learning_rate": 5.14791545932078e-05, + "loss": 2.5552, + "step": 13261 + }, + { + "epoch": 1.0702929545637963, + "grad_norm": 0.7112408876419067, + "learning_rate": 5.146535115800593e-05, + "loss": 2.4041, + "step": 13262 + }, + { + "epoch": 1.0703736583003793, + "grad_norm": 0.6822024583816528, + "learning_rate": 5.1451548932414415e-05, + "loss": 2.4346, + "step": 13263 + }, + { + "epoch": 1.0704543620369624, + "grad_norm": 0.6590598225593567, + "learning_rate": 5.1437747916777165e-05, + "loss": 2.3946, + "step": 13264 + }, + { + "epoch": 1.0705350657735453, + "grad_norm": 0.643014132976532, + "learning_rate": 5.142394811143818e-05, + "loss": 2.4455, + "step": 13265 + }, + { + "epoch": 1.0706157695101284, + "grad_norm": 0.6480194926261902, + "learning_rate": 5.141014951674139e-05, + "loss": 2.4304, + "step": 13266 + }, + { + "epoch": 1.0706964732467112, + "grad_norm": 0.6933526992797852, + "learning_rate": 5.139635213303069e-05, + "loss": 2.4627, + "step": 13267 + }, + { + "epoch": 1.0707771769832943, + "grad_norm": 0.6832638382911682, + "learning_rate": 5.138255596064995e-05, + "loss": 2.4645, + "step": 13268 + }, + { + "epoch": 1.0708578807198774, + "grad_norm": 0.6579757928848267, + "learning_rate": 5.1368760999943034e-05, + "loss": 2.3928, + "step": 13269 + }, + { + "epoch": 1.0709385844564603, + "grad_norm": 0.6658132672309875, + "learning_rate": 5.1354967251253684e-05, + "loss": 2.4732, + "step": 13270 + }, + { + "epoch": 1.0710192881930434, + "grad_norm": 0.7610828876495361, + "learning_rate": 5.13411747149257e-05, + "loss": 2.4781, + "step": 13271 + }, + { + "epoch": 1.0710999919296262, + "grad_norm": 0.682858943939209, + "learning_rate": 5.1327383391302895e-05, + "loss": 2.4545, + "step": 13272 + }, + { + "epoch": 1.0711806956662093, + "grad_norm": 0.7461360692977905, + "learning_rate": 5.131359328072887e-05, + "loss": 2.4647, + "step": 13273 + }, + { + "epoch": 1.0712613994027924, + "grad_norm": 0.6767961382865906, + "learning_rate": 5.129980438354738e-05, + "loss": 2.4562, + "step": 13274 + }, + { + "epoch": 1.0713421031393753, + "grad_norm": 0.6768184304237366, + "learning_rate": 5.1286016700102066e-05, + "loss": 2.4662, + "step": 13275 + }, + { + "epoch": 1.0714228068759584, + "grad_norm": 0.7022743225097656, + "learning_rate": 5.1272230230736554e-05, + "loss": 2.4321, + "step": 13276 + }, + { + "epoch": 1.0715035106125415, + "grad_norm": 0.725488007068634, + "learning_rate": 5.125844497579444e-05, + "loss": 2.457, + "step": 13277 + }, + { + "epoch": 1.0715842143491243, + "grad_norm": 0.7542931437492371, + "learning_rate": 5.124466093561928e-05, + "loss": 2.4302, + "step": 13278 + }, + { + "epoch": 1.0716649180857074, + "grad_norm": 0.6598316431045532, + "learning_rate": 5.123087811055467e-05, + "loss": 2.4552, + "step": 13279 + }, + { + "epoch": 1.0717456218222903, + "grad_norm": 0.7533490061759949, + "learning_rate": 5.1217096500944017e-05, + "loss": 2.4778, + "step": 13280 + }, + { + "epoch": 1.0718263255588734, + "grad_norm": 0.6890795826911926, + "learning_rate": 5.1203316107130825e-05, + "loss": 2.4349, + "step": 13281 + }, + { + "epoch": 1.0719070292954564, + "grad_norm": 0.7004082202911377, + "learning_rate": 5.118953692945862e-05, + "loss": 2.4645, + "step": 13282 + }, + { + "epoch": 1.0719877330320393, + "grad_norm": 0.7409259676933289, + "learning_rate": 5.117575896827068e-05, + "loss": 2.4734, + "step": 13283 + }, + { + "epoch": 1.0720684367686224, + "grad_norm": 0.7035481929779053, + "learning_rate": 5.116198222391046e-05, + "loss": 2.5027, + "step": 13284 + }, + { + "epoch": 1.0721491405052055, + "grad_norm": 0.7146698236465454, + "learning_rate": 5.114820669672132e-05, + "loss": 2.4623, + "step": 13285 + }, + { + "epoch": 1.0722298442417884, + "grad_norm": 0.7813882231712341, + "learning_rate": 5.113443238704656e-05, + "loss": 2.4644, + "step": 13286 + }, + { + "epoch": 1.0723105479783714, + "grad_norm": 0.6592430472373962, + "learning_rate": 5.1120659295229486e-05, + "loss": 2.4682, + "step": 13287 + }, + { + "epoch": 1.0723912517149543, + "grad_norm": 0.7047967910766602, + "learning_rate": 5.1106887421613395e-05, + "loss": 2.4368, + "step": 13288 + }, + { + "epoch": 1.0724719554515374, + "grad_norm": 0.700977087020874, + "learning_rate": 5.109311676654143e-05, + "loss": 2.4471, + "step": 13289 + }, + { + "epoch": 1.0725526591881205, + "grad_norm": 0.6821093559265137, + "learning_rate": 5.107934733035684e-05, + "loss": 2.433, + "step": 13290 + }, + { + "epoch": 1.0726333629247033, + "grad_norm": 0.6579930186271667, + "learning_rate": 5.1065579113402794e-05, + "loss": 2.4527, + "step": 13291 + }, + { + "epoch": 1.0727140666612864, + "grad_norm": 0.658514678478241, + "learning_rate": 5.105181211602248e-05, + "loss": 2.4443, + "step": 13292 + }, + { + "epoch": 1.0727947703978695, + "grad_norm": 0.6963977217674255, + "learning_rate": 5.103804633855891e-05, + "loss": 2.4699, + "step": 13293 + }, + { + "epoch": 1.0728754741344524, + "grad_norm": 0.6670787334442139, + "learning_rate": 5.102428178135522e-05, + "loss": 2.4672, + "step": 13294 + }, + { + "epoch": 1.0729561778710355, + "grad_norm": 0.6959822773933411, + "learning_rate": 5.1010518444754454e-05, + "loss": 2.4338, + "step": 13295 + }, + { + "epoch": 1.0730368816076183, + "grad_norm": 0.6534817218780518, + "learning_rate": 5.0996756329099614e-05, + "loss": 2.4491, + "step": 13296 + }, + { + "epoch": 1.0731175853442014, + "grad_norm": 0.7265146970748901, + "learning_rate": 5.098299543473371e-05, + "loss": 2.4718, + "step": 13297 + }, + { + "epoch": 1.0731982890807845, + "grad_norm": 0.6554745435714722, + "learning_rate": 5.0969235761999746e-05, + "loss": 2.4286, + "step": 13298 + }, + { + "epoch": 1.0732789928173674, + "grad_norm": 0.7003172039985657, + "learning_rate": 5.095547731124053e-05, + "loss": 2.4182, + "step": 13299 + }, + { + "epoch": 1.0733596965539505, + "grad_norm": 0.6700341105461121, + "learning_rate": 5.094172008279904e-05, + "loss": 2.428, + "step": 13300 + }, + { + "epoch": 1.0734404002905333, + "grad_norm": 0.7290289402008057, + "learning_rate": 5.0927964077018164e-05, + "loss": 2.4324, + "step": 13301 + }, + { + "epoch": 1.0735211040271164, + "grad_norm": 0.6999204158782959, + "learning_rate": 5.0914209294240644e-05, + "loss": 2.5386, + "step": 13302 + }, + { + "epoch": 1.0736018077636995, + "grad_norm": 0.7008000612258911, + "learning_rate": 5.090045573480935e-05, + "loss": 2.5295, + "step": 13303 + }, + { + "epoch": 1.0736825115002824, + "grad_norm": 0.7023071646690369, + "learning_rate": 5.088670339906705e-05, + "loss": 2.4418, + "step": 13304 + }, + { + "epoch": 1.0737632152368655, + "grad_norm": 0.627174437046051, + "learning_rate": 5.0872952287356525e-05, + "loss": 2.3782, + "step": 13305 + }, + { + "epoch": 1.0738439189734486, + "grad_norm": 0.6992766857147217, + "learning_rate": 5.0859202400020364e-05, + "loss": 2.4698, + "step": 13306 + }, + { + "epoch": 1.0739246227100314, + "grad_norm": 0.7189817428588867, + "learning_rate": 5.084545373740138e-05, + "loss": 2.5248, + "step": 13307 + }, + { + "epoch": 1.0740053264466145, + "grad_norm": 0.6849164962768555, + "learning_rate": 5.0831706299842216e-05, + "loss": 2.4084, + "step": 13308 + }, + { + "epoch": 1.0740860301831976, + "grad_norm": 0.6985825300216675, + "learning_rate": 5.0817960087685424e-05, + "loss": 2.4893, + "step": 13309 + }, + { + "epoch": 1.0741667339197805, + "grad_norm": 0.6519783139228821, + "learning_rate": 5.080421510127362e-05, + "loss": 2.5144, + "step": 13310 + }, + { + "epoch": 1.0742474376563635, + "grad_norm": 0.6605731248855591, + "learning_rate": 5.079047134094941e-05, + "loss": 2.4487, + "step": 13311 + }, + { + "epoch": 1.0743281413929464, + "grad_norm": 0.7236705422401428, + "learning_rate": 5.077672880705526e-05, + "loss": 2.4578, + "step": 13312 + }, + { + "epoch": 1.0744088451295295, + "grad_norm": 0.7126381397247314, + "learning_rate": 5.07629874999337e-05, + "loss": 2.4528, + "step": 13313 + }, + { + "epoch": 1.0744895488661126, + "grad_norm": 0.7247878313064575, + "learning_rate": 5.0749247419927236e-05, + "loss": 2.563, + "step": 13314 + }, + { + "epoch": 1.0745702526026955, + "grad_norm": 0.728349506855011, + "learning_rate": 5.0735508567378234e-05, + "loss": 2.4229, + "step": 13315 + }, + { + "epoch": 1.0746509563392785, + "grad_norm": 0.6593719124794006, + "learning_rate": 5.072177094262913e-05, + "loss": 2.4853, + "step": 13316 + }, + { + "epoch": 1.0747316600758614, + "grad_norm": 0.6519735455513, + "learning_rate": 5.070803454602231e-05, + "loss": 2.4507, + "step": 13317 + }, + { + "epoch": 1.0748123638124445, + "grad_norm": 0.6660017371177673, + "learning_rate": 5.0694299377900115e-05, + "loss": 2.4286, + "step": 13318 + }, + { + "epoch": 1.0748930675490276, + "grad_norm": 0.7506695985794067, + "learning_rate": 5.0680565438604876e-05, + "loss": 2.4841, + "step": 13319 + }, + { + "epoch": 1.0749737712856104, + "grad_norm": 0.6855955719947815, + "learning_rate": 5.0666832728478863e-05, + "loss": 2.3817, + "step": 13320 + }, + { + "epoch": 1.0750544750221935, + "grad_norm": 0.7151634693145752, + "learning_rate": 5.065310124786438e-05, + "loss": 2.3984, + "step": 13321 + }, + { + "epoch": 1.0751351787587766, + "grad_norm": 0.6551649570465088, + "learning_rate": 5.063937099710356e-05, + "loss": 2.4574, + "step": 13322 + }, + { + "epoch": 1.0752158824953595, + "grad_norm": 0.7443479895591736, + "learning_rate": 5.062564197653865e-05, + "loss": 2.52, + "step": 13323 + }, + { + "epoch": 1.0752965862319426, + "grad_norm": 0.7554972767829895, + "learning_rate": 5.061191418651186e-05, + "loss": 2.483, + "step": 13324 + }, + { + "epoch": 1.0753772899685254, + "grad_norm": 0.7661007642745972, + "learning_rate": 5.059818762736521e-05, + "loss": 2.566, + "step": 13325 + }, + { + "epoch": 1.0754579937051085, + "grad_norm": 0.7416480183601379, + "learning_rate": 5.058446229944087e-05, + "loss": 2.465, + "step": 13326 + }, + { + "epoch": 1.0755386974416916, + "grad_norm": 0.6997848749160767, + "learning_rate": 5.057073820308089e-05, + "loss": 2.4936, + "step": 13327 + }, + { + "epoch": 1.0756194011782745, + "grad_norm": 0.7570235133171082, + "learning_rate": 5.0557015338627345e-05, + "loss": 2.519, + "step": 13328 + }, + { + "epoch": 1.0757001049148576, + "grad_norm": 0.7910803556442261, + "learning_rate": 5.0543293706422214e-05, + "loss": 2.4932, + "step": 13329 + }, + { + "epoch": 1.0757808086514407, + "grad_norm": 0.7068312168121338, + "learning_rate": 5.052957330680752e-05, + "loss": 2.4489, + "step": 13330 + }, + { + "epoch": 1.0758615123880235, + "grad_norm": 0.7818215489387512, + "learning_rate": 5.051585414012514e-05, + "loss": 2.4467, + "step": 13331 + }, + { + "epoch": 1.0759422161246066, + "grad_norm": 0.7359446287155151, + "learning_rate": 5.0502136206717046e-05, + "loss": 2.4348, + "step": 13332 + }, + { + "epoch": 1.0760229198611895, + "grad_norm": 0.694726824760437, + "learning_rate": 5.0488419506925124e-05, + "loss": 2.4554, + "step": 13333 + }, + { + "epoch": 1.0761036235977726, + "grad_norm": 0.6776530742645264, + "learning_rate": 5.047470404109118e-05, + "loss": 2.4206, + "step": 13334 + }, + { + "epoch": 1.0761843273343557, + "grad_norm": 0.6977556943893433, + "learning_rate": 5.0460989809557066e-05, + "loss": 2.4748, + "step": 13335 + }, + { + "epoch": 1.0762650310709385, + "grad_norm": 0.6888061761856079, + "learning_rate": 5.044727681266459e-05, + "loss": 2.4129, + "step": 13336 + }, + { + "epoch": 1.0763457348075216, + "grad_norm": 0.744110643863678, + "learning_rate": 5.043356505075549e-05, + "loss": 2.4815, + "step": 13337 + }, + { + "epoch": 1.0764264385441047, + "grad_norm": 0.6726455688476562, + "learning_rate": 5.041985452417154e-05, + "loss": 2.4299, + "step": 13338 + }, + { + "epoch": 1.0765071422806876, + "grad_norm": 0.6755545735359192, + "learning_rate": 5.040614523325441e-05, + "loss": 2.4188, + "step": 13339 + }, + { + "epoch": 1.0765878460172706, + "grad_norm": 0.7152739763259888, + "learning_rate": 5.039243717834582e-05, + "loss": 2.4366, + "step": 13340 + }, + { + "epoch": 1.0766685497538535, + "grad_norm": 0.7253085374832153, + "learning_rate": 5.037873035978733e-05, + "loss": 2.4681, + "step": 13341 + }, + { + "epoch": 1.0767492534904366, + "grad_norm": 0.6780266165733337, + "learning_rate": 5.03650247779206e-05, + "loss": 2.5163, + "step": 13342 + }, + { + "epoch": 1.0768299572270197, + "grad_norm": 0.7440996170043945, + "learning_rate": 5.035132043308722e-05, + "loss": 2.4831, + "step": 13343 + }, + { + "epoch": 1.0769106609636026, + "grad_norm": 0.6619833111763, + "learning_rate": 5.0337617325628695e-05, + "loss": 2.433, + "step": 13344 + }, + { + "epoch": 1.0769913647001856, + "grad_norm": 0.7518059015274048, + "learning_rate": 5.032391545588656e-05, + "loss": 2.4241, + "step": 13345 + }, + { + "epoch": 1.0770720684367687, + "grad_norm": 0.6592784523963928, + "learning_rate": 5.031021482420231e-05, + "loss": 2.4902, + "step": 13346 + }, + { + "epoch": 1.0771527721733516, + "grad_norm": 0.7192299365997314, + "learning_rate": 5.029651543091739e-05, + "loss": 2.4445, + "step": 13347 + }, + { + "epoch": 1.0772334759099347, + "grad_norm": 0.7376793622970581, + "learning_rate": 5.028281727637323e-05, + "loss": 2.4532, + "step": 13348 + }, + { + "epoch": 1.0773141796465175, + "grad_norm": 0.7344524264335632, + "learning_rate": 5.026912036091127e-05, + "loss": 2.4193, + "step": 13349 + }, + { + "epoch": 1.0773948833831006, + "grad_norm": 0.7343986630439758, + "learning_rate": 5.0255424684872785e-05, + "loss": 2.4912, + "step": 13350 + }, + { + "epoch": 1.0774755871196837, + "grad_norm": 0.7103631496429443, + "learning_rate": 5.024173024859916e-05, + "loss": 2.4611, + "step": 13351 + }, + { + "epoch": 1.0775562908562666, + "grad_norm": 0.7554094791412354, + "learning_rate": 5.022803705243169e-05, + "loss": 2.4875, + "step": 13352 + }, + { + "epoch": 1.0776369945928497, + "grad_norm": 0.6754978895187378, + "learning_rate": 5.0214345096711655e-05, + "loss": 2.4585, + "step": 13353 + }, + { + "epoch": 1.0777176983294328, + "grad_norm": 0.690747857093811, + "learning_rate": 5.020065438178026e-05, + "loss": 2.4751, + "step": 13354 + }, + { + "epoch": 1.0777984020660156, + "grad_norm": 0.7012028694152832, + "learning_rate": 5.018696490797874e-05, + "loss": 2.4443, + "step": 13355 + }, + { + "epoch": 1.0778791058025987, + "grad_norm": 0.6788459420204163, + "learning_rate": 5.017327667564831e-05, + "loss": 2.4135, + "step": 13356 + }, + { + "epoch": 1.0779598095391816, + "grad_norm": 0.6662794351577759, + "learning_rate": 5.015958968512997e-05, + "loss": 2.3801, + "step": 13357 + }, + { + "epoch": 1.0780405132757647, + "grad_norm": 0.7873939275741577, + "learning_rate": 5.0145903936764994e-05, + "loss": 2.4629, + "step": 13358 + }, + { + "epoch": 1.0781212170123478, + "grad_norm": 0.7484980225563049, + "learning_rate": 5.0132219430894455e-05, + "loss": 2.4307, + "step": 13359 + }, + { + "epoch": 1.0782019207489306, + "grad_norm": 0.7559076547622681, + "learning_rate": 5.011853616785932e-05, + "loss": 2.4846, + "step": 13360 + }, + { + "epoch": 1.0782826244855137, + "grad_norm": 0.6822710633277893, + "learning_rate": 5.010485414800066e-05, + "loss": 2.4448, + "step": 13361 + }, + { + "epoch": 1.0783633282220966, + "grad_norm": 0.6665955185890198, + "learning_rate": 5.0091173371659496e-05, + "loss": 2.4562, + "step": 13362 + }, + { + "epoch": 1.0784440319586797, + "grad_norm": 0.6645659804344177, + "learning_rate": 5.0077493839176714e-05, + "loss": 2.4545, + "step": 13363 + }, + { + "epoch": 1.0785247356952627, + "grad_norm": 0.6648181080818176, + "learning_rate": 5.0063815550893276e-05, + "loss": 2.4565, + "step": 13364 + }, + { + "epoch": 1.0786054394318456, + "grad_norm": 0.6679299473762512, + "learning_rate": 5.005013850715014e-05, + "loss": 2.4301, + "step": 13365 + }, + { + "epoch": 1.0786861431684287, + "grad_norm": 0.7116484642028809, + "learning_rate": 5.003646270828808e-05, + "loss": 2.4174, + "step": 13366 + }, + { + "epoch": 1.0787668469050118, + "grad_norm": 0.6850735545158386, + "learning_rate": 5.002278815464798e-05, + "loss": 2.4386, + "step": 13367 + }, + { + "epoch": 1.0788475506415947, + "grad_norm": 0.6613513827323914, + "learning_rate": 5.00091148465706e-05, + "loss": 2.4038, + "step": 13368 + }, + { + "epoch": 1.0789282543781777, + "grad_norm": 0.659635603427887, + "learning_rate": 4.9995442784396827e-05, + "loss": 2.4346, + "step": 13369 + }, + { + "epoch": 1.0790089581147608, + "grad_norm": 0.6775132417678833, + "learning_rate": 4.998177196846731e-05, + "loss": 2.4853, + "step": 13370 + }, + { + "epoch": 1.0790896618513437, + "grad_norm": 0.719860851764679, + "learning_rate": 4.996810239912277e-05, + "loss": 2.4018, + "step": 13371 + }, + { + "epoch": 1.0791703655879268, + "grad_norm": 0.7316389083862305, + "learning_rate": 4.9954434076703946e-05, + "loss": 2.424, + "step": 13372 + }, + { + "epoch": 1.0792510693245096, + "grad_norm": 0.6779622435569763, + "learning_rate": 4.99407670015514e-05, + "loss": 2.4743, + "step": 13373 + }, + { + "epoch": 1.0793317730610927, + "grad_norm": 0.7357139587402344, + "learning_rate": 4.992710117400581e-05, + "loss": 2.4385, + "step": 13374 + }, + { + "epoch": 1.0794124767976758, + "grad_norm": 0.671441912651062, + "learning_rate": 4.9913436594407784e-05, + "loss": 2.3988, + "step": 13375 + }, + { + "epoch": 1.0794931805342587, + "grad_norm": 0.7205149531364441, + "learning_rate": 4.9899773263097804e-05, + "loss": 2.4594, + "step": 13376 + }, + { + "epoch": 1.0795738842708418, + "grad_norm": 0.702910840511322, + "learning_rate": 4.988611118041644e-05, + "loss": 2.4831, + "step": 13377 + }, + { + "epoch": 1.0796545880074246, + "grad_norm": 0.6977962255477905, + "learning_rate": 4.987245034670418e-05, + "loss": 2.422, + "step": 13378 + }, + { + "epoch": 1.0797352917440077, + "grad_norm": 0.7106757760047913, + "learning_rate": 4.985879076230149e-05, + "loss": 2.4073, + "step": 13379 + }, + { + "epoch": 1.0798159954805908, + "grad_norm": 0.7046806812286377, + "learning_rate": 4.9845132427548814e-05, + "loss": 2.4065, + "step": 13380 + }, + { + "epoch": 1.0798966992171737, + "grad_norm": 0.7476605772972107, + "learning_rate": 4.9831475342786574e-05, + "loss": 2.4886, + "step": 13381 + }, + { + "epoch": 1.0799774029537568, + "grad_norm": 0.696977972984314, + "learning_rate": 4.981781950835508e-05, + "loss": 2.4732, + "step": 13382 + }, + { + "epoch": 1.0800581066903399, + "grad_norm": 0.6596804857254028, + "learning_rate": 4.98041649245947e-05, + "loss": 2.4497, + "step": 13383 + }, + { + "epoch": 1.0801388104269227, + "grad_norm": 0.7216050028800964, + "learning_rate": 4.979051159184573e-05, + "loss": 2.4745, + "step": 13384 + }, + { + "epoch": 1.0802195141635058, + "grad_norm": 0.6636630296707153, + "learning_rate": 4.977685951044852e-05, + "loss": 2.4904, + "step": 13385 + }, + { + "epoch": 1.0803002179000887, + "grad_norm": 0.7030208110809326, + "learning_rate": 4.97632086807432e-05, + "loss": 2.4302, + "step": 13386 + }, + { + "epoch": 1.0803809216366718, + "grad_norm": 0.7158327102661133, + "learning_rate": 4.974955910307004e-05, + "loss": 2.4735, + "step": 13387 + }, + { + "epoch": 1.0804616253732549, + "grad_norm": 0.6736464500427246, + "learning_rate": 4.9735910777769234e-05, + "loss": 2.4334, + "step": 13388 + }, + { + "epoch": 1.0805423291098377, + "grad_norm": 0.6913403272628784, + "learning_rate": 4.972226370518092e-05, + "loss": 2.468, + "step": 13389 + }, + { + "epoch": 1.0806230328464208, + "grad_norm": 0.7006524205207825, + "learning_rate": 4.970861788564522e-05, + "loss": 2.4598, + "step": 13390 + }, + { + "epoch": 1.080703736583004, + "grad_norm": 0.6892947554588318, + "learning_rate": 4.969497331950227e-05, + "loss": 2.4297, + "step": 13391 + }, + { + "epoch": 1.0807844403195868, + "grad_norm": 0.7270283699035645, + "learning_rate": 4.968133000709203e-05, + "loss": 2.5344, + "step": 13392 + }, + { + "epoch": 1.0808651440561698, + "grad_norm": 0.735342264175415, + "learning_rate": 4.9667687948754594e-05, + "loss": 2.4431, + "step": 13393 + }, + { + "epoch": 1.0809458477927527, + "grad_norm": 0.6869279146194458, + "learning_rate": 4.9654047144829974e-05, + "loss": 2.5581, + "step": 13394 + }, + { + "epoch": 1.0810265515293358, + "grad_norm": 0.6975715160369873, + "learning_rate": 4.964040759565808e-05, + "loss": 2.4328, + "step": 13395 + }, + { + "epoch": 1.0811072552659189, + "grad_norm": 0.7312532067298889, + "learning_rate": 4.9626769301578856e-05, + "loss": 2.4686, + "step": 13396 + }, + { + "epoch": 1.0811879590025018, + "grad_norm": 0.7824496626853943, + "learning_rate": 4.9613132262932215e-05, + "loss": 2.4564, + "step": 13397 + }, + { + "epoch": 1.0812686627390848, + "grad_norm": 0.7337941527366638, + "learning_rate": 4.959949648005805e-05, + "loss": 2.4752, + "step": 13398 + }, + { + "epoch": 1.081349366475668, + "grad_norm": 0.7450836300849915, + "learning_rate": 4.958586195329617e-05, + "loss": 2.4457, + "step": 13399 + }, + { + "epoch": 1.0814300702122508, + "grad_norm": 0.6990504860877991, + "learning_rate": 4.9572228682986385e-05, + "loss": 2.4172, + "step": 13400 + }, + { + "epoch": 1.0815107739488339, + "grad_norm": 0.7293999791145325, + "learning_rate": 4.955859666946853e-05, + "loss": 2.5295, + "step": 13401 + }, + { + "epoch": 1.0815914776854167, + "grad_norm": 0.6872537136077881, + "learning_rate": 4.9544965913082264e-05, + "loss": 2.5029, + "step": 13402 + }, + { + "epoch": 1.0816721814219998, + "grad_norm": 0.6821706891059875, + "learning_rate": 4.953133641416733e-05, + "loss": 2.4738, + "step": 13403 + }, + { + "epoch": 1.081752885158583, + "grad_norm": 0.6811527609825134, + "learning_rate": 4.951770817306346e-05, + "loss": 2.4323, + "step": 13404 + }, + { + "epoch": 1.0818335888951658, + "grad_norm": 0.7138943076133728, + "learning_rate": 4.950408119011023e-05, + "loss": 2.5155, + "step": 13405 + }, + { + "epoch": 1.0819142926317489, + "grad_norm": 0.6777952909469604, + "learning_rate": 4.949045546564729e-05, + "loss": 2.4414, + "step": 13406 + }, + { + "epoch": 1.0819949963683317, + "grad_norm": 0.7065548896789551, + "learning_rate": 4.9476831000014276e-05, + "loss": 2.4913, + "step": 13407 + }, + { + "epoch": 1.0820757001049148, + "grad_norm": 0.7286355495452881, + "learning_rate": 4.9463207793550626e-05, + "loss": 2.4171, + "step": 13408 + }, + { + "epoch": 1.082156403841498, + "grad_norm": 0.6703049540519714, + "learning_rate": 4.944958584659597e-05, + "loss": 2.4387, + "step": 13409 + }, + { + "epoch": 1.0822371075780808, + "grad_norm": 0.6572019457817078, + "learning_rate": 4.943596515948983e-05, + "loss": 2.4324, + "step": 13410 + }, + { + "epoch": 1.0823178113146639, + "grad_norm": 0.6722360849380493, + "learning_rate": 4.942234573257156e-05, + "loss": 2.4802, + "step": 13411 + }, + { + "epoch": 1.082398515051247, + "grad_norm": 0.7122535109519958, + "learning_rate": 4.9408727566180655e-05, + "loss": 2.4531, + "step": 13412 + }, + { + "epoch": 1.0824792187878298, + "grad_norm": 0.6769903898239136, + "learning_rate": 4.9395110660656505e-05, + "loss": 2.4549, + "step": 13413 + }, + { + "epoch": 1.082559922524413, + "grad_norm": 0.766251266002655, + "learning_rate": 4.938149501633852e-05, + "loss": 2.4416, + "step": 13414 + }, + { + "epoch": 1.082640626260996, + "grad_norm": 0.6677987575531006, + "learning_rate": 4.936788063356596e-05, + "loss": 2.4578, + "step": 13415 + }, + { + "epoch": 1.0827213299975789, + "grad_norm": 0.7461380362510681, + "learning_rate": 4.9354267512678156e-05, + "loss": 2.4776, + "step": 13416 + }, + { + "epoch": 1.082802033734162, + "grad_norm": 0.6681976914405823, + "learning_rate": 4.934065565401443e-05, + "loss": 2.5044, + "step": 13417 + }, + { + "epoch": 1.0828827374707448, + "grad_norm": 0.6809324622154236, + "learning_rate": 4.932704505791397e-05, + "loss": 2.4651, + "step": 13418 + }, + { + "epoch": 1.082963441207328, + "grad_norm": 0.6926563382148743, + "learning_rate": 4.931343572471596e-05, + "loss": 2.4633, + "step": 13419 + }, + { + "epoch": 1.083044144943911, + "grad_norm": 0.6451820135116577, + "learning_rate": 4.929982765475971e-05, + "loss": 2.474, + "step": 13420 + }, + { + "epoch": 1.0831248486804939, + "grad_norm": 0.7088493704795837, + "learning_rate": 4.9286220848384247e-05, + "loss": 2.462, + "step": 13421 + }, + { + "epoch": 1.083205552417077, + "grad_norm": 0.7819172739982605, + "learning_rate": 4.9272615305928725e-05, + "loss": 2.4534, + "step": 13422 + }, + { + "epoch": 1.0832862561536598, + "grad_norm": 0.6579666137695312, + "learning_rate": 4.925901102773227e-05, + "loss": 2.4101, + "step": 13423 + }, + { + "epoch": 1.083366959890243, + "grad_norm": 0.6999555230140686, + "learning_rate": 4.924540801413385e-05, + "loss": 2.4534, + "step": 13424 + }, + { + "epoch": 1.083447663626826, + "grad_norm": 0.7034400105476379, + "learning_rate": 4.9231806265472555e-05, + "loss": 2.4741, + "step": 13425 + }, + { + "epoch": 1.0835283673634089, + "grad_norm": 0.6595034599304199, + "learning_rate": 4.921820578208739e-05, + "loss": 2.4011, + "step": 13426 + }, + { + "epoch": 1.083609071099992, + "grad_norm": 0.666419267654419, + "learning_rate": 4.920460656431723e-05, + "loss": 2.4399, + "step": 13427 + }, + { + "epoch": 1.083689774836575, + "grad_norm": 0.7058294415473938, + "learning_rate": 4.919100861250108e-05, + "loss": 2.434, + "step": 13428 + }, + { + "epoch": 1.083770478573158, + "grad_norm": 0.7045806050300598, + "learning_rate": 4.917741192697779e-05, + "loss": 2.4616, + "step": 13429 + }, + { + "epoch": 1.083851182309741, + "grad_norm": 0.6565639972686768, + "learning_rate": 4.916381650808626e-05, + "loss": 2.3864, + "step": 13430 + }, + { + "epoch": 1.0839318860463238, + "grad_norm": 0.6939674615859985, + "learning_rate": 4.9150222356165295e-05, + "loss": 2.4217, + "step": 13431 + }, + { + "epoch": 1.084012589782907, + "grad_norm": 0.7240599989891052, + "learning_rate": 4.913662947155373e-05, + "loss": 2.447, + "step": 13432 + }, + { + "epoch": 1.08409329351949, + "grad_norm": 0.7369012832641602, + "learning_rate": 4.9123037854590336e-05, + "loss": 2.4588, + "step": 13433 + }, + { + "epoch": 1.0841739972560729, + "grad_norm": 0.714269757270813, + "learning_rate": 4.9109447505613803e-05, + "loss": 2.4921, + "step": 13434 + }, + { + "epoch": 1.084254700992656, + "grad_norm": 0.7541659474372864, + "learning_rate": 4.909585842496287e-05, + "loss": 2.4191, + "step": 13435 + }, + { + "epoch": 1.084335404729239, + "grad_norm": 0.7245596051216125, + "learning_rate": 4.9082270612976243e-05, + "loss": 2.4904, + "step": 13436 + }, + { + "epoch": 1.084416108465822, + "grad_norm": 0.7301090359687805, + "learning_rate": 4.90686840699925e-05, + "loss": 2.4461, + "step": 13437 + }, + { + "epoch": 1.084496812202405, + "grad_norm": 0.7404102683067322, + "learning_rate": 4.905509879635028e-05, + "loss": 2.4826, + "step": 13438 + }, + { + "epoch": 1.0845775159389879, + "grad_norm": 0.7053710222244263, + "learning_rate": 4.9041514792388175e-05, + "loss": 2.4231, + "step": 13439 + }, + { + "epoch": 1.084658219675571, + "grad_norm": 0.6171362400054932, + "learning_rate": 4.9027932058444724e-05, + "loss": 2.4472, + "step": 13440 + }, + { + "epoch": 1.084738923412154, + "grad_norm": 0.7367038130760193, + "learning_rate": 4.901435059485845e-05, + "loss": 2.4847, + "step": 13441 + }, + { + "epoch": 1.084819627148737, + "grad_norm": 0.754828691482544, + "learning_rate": 4.900077040196788e-05, + "loss": 2.4731, + "step": 13442 + }, + { + "epoch": 1.08490033088532, + "grad_norm": 0.7380684018135071, + "learning_rate": 4.8987191480111386e-05, + "loss": 2.4227, + "step": 13443 + }, + { + "epoch": 1.084981034621903, + "grad_norm": 0.6711444854736328, + "learning_rate": 4.897361382962742e-05, + "loss": 2.4744, + "step": 13444 + }, + { + "epoch": 1.085061738358486, + "grad_norm": 0.7709227204322815, + "learning_rate": 4.896003745085438e-05, + "loss": 2.5422, + "step": 13445 + }, + { + "epoch": 1.085142442095069, + "grad_norm": 0.6778519153594971, + "learning_rate": 4.8946462344130675e-05, + "loss": 2.4757, + "step": 13446 + }, + { + "epoch": 1.085223145831652, + "grad_norm": 0.7390698194503784, + "learning_rate": 4.893288850979454e-05, + "loss": 2.4214, + "step": 13447 + }, + { + "epoch": 1.085303849568235, + "grad_norm": 0.6632684469223022, + "learning_rate": 4.891931594818432e-05, + "loss": 2.4689, + "step": 13448 + }, + { + "epoch": 1.085384553304818, + "grad_norm": 0.68693608045578, + "learning_rate": 4.890574465963827e-05, + "loss": 2.4788, + "step": 13449 + }, + { + "epoch": 1.085465257041401, + "grad_norm": 0.6910344362258911, + "learning_rate": 4.8892174644494625e-05, + "loss": 2.4611, + "step": 13450 + }, + { + "epoch": 1.085545960777984, + "grad_norm": 0.6935380101203918, + "learning_rate": 4.887860590309158e-05, + "loss": 2.4481, + "step": 13451 + }, + { + "epoch": 1.085626664514567, + "grad_norm": 0.7086954712867737, + "learning_rate": 4.886503843576735e-05, + "loss": 2.4583, + "step": 13452 + }, + { + "epoch": 1.08570736825115, + "grad_norm": 0.7447777986526489, + "learning_rate": 4.8851472242859994e-05, + "loss": 2.5035, + "step": 13453 + }, + { + "epoch": 1.085788071987733, + "grad_norm": 0.6896036267280579, + "learning_rate": 4.8837907324707656e-05, + "loss": 2.4622, + "step": 13454 + }, + { + "epoch": 1.085868775724316, + "grad_norm": 0.7261155247688293, + "learning_rate": 4.882434368164843e-05, + "loss": 2.4958, + "step": 13455 + }, + { + "epoch": 1.085949479460899, + "grad_norm": 0.6868197321891785, + "learning_rate": 4.881078131402031e-05, + "loss": 2.4952, + "step": 13456 + }, + { + "epoch": 1.0860301831974821, + "grad_norm": 0.6338867545127869, + "learning_rate": 4.879722022216132e-05, + "loss": 2.4553, + "step": 13457 + }, + { + "epoch": 1.086110886934065, + "grad_norm": 0.7214454412460327, + "learning_rate": 4.878366040640946e-05, + "loss": 2.4433, + "step": 13458 + }, + { + "epoch": 1.086191590670648, + "grad_norm": 0.6871301531791687, + "learning_rate": 4.877010186710266e-05, + "loss": 2.4118, + "step": 13459 + }, + { + "epoch": 1.0862722944072312, + "grad_norm": 0.6845650672912598, + "learning_rate": 4.875654460457883e-05, + "loss": 2.4684, + "step": 13460 + }, + { + "epoch": 1.086352998143814, + "grad_norm": 0.7027513980865479, + "learning_rate": 4.8742988619175865e-05, + "loss": 2.4569, + "step": 13461 + }, + { + "epoch": 1.0864337018803971, + "grad_norm": 0.6428621411323547, + "learning_rate": 4.8729433911231646e-05, + "loss": 2.4211, + "step": 13462 + }, + { + "epoch": 1.08651440561698, + "grad_norm": 0.6921488046646118, + "learning_rate": 4.8715880481083934e-05, + "loss": 2.4668, + "step": 13463 + }, + { + "epoch": 1.086595109353563, + "grad_norm": 0.7001025676727295, + "learning_rate": 4.870232832907051e-05, + "loss": 2.4685, + "step": 13464 + }, + { + "epoch": 1.0866758130901462, + "grad_norm": 0.7460644245147705, + "learning_rate": 4.868877745552922e-05, + "loss": 2.3922, + "step": 13465 + }, + { + "epoch": 1.086756516826729, + "grad_norm": 0.7418891191482544, + "learning_rate": 4.867522786079768e-05, + "loss": 2.3777, + "step": 13466 + }, + { + "epoch": 1.0868372205633121, + "grad_norm": 0.6430083513259888, + "learning_rate": 4.8661679545213625e-05, + "loss": 2.4385, + "step": 13467 + }, + { + "epoch": 1.086917924299895, + "grad_norm": 0.6963593363761902, + "learning_rate": 4.864813250911475e-05, + "loss": 2.4083, + "step": 13468 + }, + { + "epoch": 1.086998628036478, + "grad_norm": 0.6796097159385681, + "learning_rate": 4.8634586752838606e-05, + "loss": 2.4984, + "step": 13469 + }, + { + "epoch": 1.0870793317730612, + "grad_norm": 0.6845307946205139, + "learning_rate": 4.862104227672281e-05, + "loss": 2.4168, + "step": 13470 + }, + { + "epoch": 1.087160035509644, + "grad_norm": 0.705348014831543, + "learning_rate": 4.8607499081105e-05, + "loss": 2.4216, + "step": 13471 + }, + { + "epoch": 1.087240739246227, + "grad_norm": 0.6906474828720093, + "learning_rate": 4.8593957166322636e-05, + "loss": 2.4955, + "step": 13472 + }, + { + "epoch": 1.0873214429828102, + "grad_norm": 0.696489691734314, + "learning_rate": 4.858041653271323e-05, + "loss": 2.4186, + "step": 13473 + }, + { + "epoch": 1.087402146719393, + "grad_norm": 0.6997761726379395, + "learning_rate": 4.856687718061429e-05, + "loss": 2.441, + "step": 13474 + }, + { + "epoch": 1.0874828504559761, + "grad_norm": 0.6515649557113647, + "learning_rate": 4.8553339110363184e-05, + "loss": 2.3997, + "step": 13475 + }, + { + "epoch": 1.087563554192559, + "grad_norm": 0.6902725696563721, + "learning_rate": 4.853980232229734e-05, + "loss": 2.4765, + "step": 13476 + }, + { + "epoch": 1.087644257929142, + "grad_norm": 0.6832055449485779, + "learning_rate": 4.852626681675415e-05, + "loss": 2.411, + "step": 13477 + }, + { + "epoch": 1.0877249616657252, + "grad_norm": 0.668520987033844, + "learning_rate": 4.8512732594070984e-05, + "loss": 2.4742, + "step": 13478 + }, + { + "epoch": 1.087805665402308, + "grad_norm": 0.7019832134246826, + "learning_rate": 4.849919965458507e-05, + "loss": 2.4638, + "step": 13479 + }, + { + "epoch": 1.0878863691388911, + "grad_norm": 0.6986027359962463, + "learning_rate": 4.8485667998633724e-05, + "loss": 2.4866, + "step": 13480 + }, + { + "epoch": 1.0879670728754742, + "grad_norm": 0.659037709236145, + "learning_rate": 4.8472137626554195e-05, + "loss": 2.4821, + "step": 13481 + }, + { + "epoch": 1.088047776612057, + "grad_norm": 0.6506801247596741, + "learning_rate": 4.8458608538683694e-05, + "loss": 2.4686, + "step": 13482 + }, + { + "epoch": 1.0881284803486402, + "grad_norm": 0.7136878967285156, + "learning_rate": 4.844508073535939e-05, + "loss": 2.4523, + "step": 13483 + }, + { + "epoch": 1.088209184085223, + "grad_norm": 0.6663414239883423, + "learning_rate": 4.843155421691848e-05, + "loss": 2.4287, + "step": 13484 + }, + { + "epoch": 1.0882898878218061, + "grad_norm": 0.7192783355712891, + "learning_rate": 4.8418028983698006e-05, + "loss": 2.4433, + "step": 13485 + }, + { + "epoch": 1.0883705915583892, + "grad_norm": 0.6620980501174927, + "learning_rate": 4.8404505036035086e-05, + "loss": 2.4823, + "step": 13486 + }, + { + "epoch": 1.088451295294972, + "grad_norm": 0.6282123327255249, + "learning_rate": 4.83909823742668e-05, + "loss": 2.4641, + "step": 13487 + }, + { + "epoch": 1.0885319990315552, + "grad_norm": 0.6384354829788208, + "learning_rate": 4.837746099873012e-05, + "loss": 2.4234, + "step": 13488 + }, + { + "epoch": 1.0886127027681383, + "grad_norm": 0.6550076603889465, + "learning_rate": 4.836394090976204e-05, + "loss": 2.4743, + "step": 13489 + }, + { + "epoch": 1.0886934065047211, + "grad_norm": 0.6987888216972351, + "learning_rate": 4.8350422107699545e-05, + "loss": 2.4263, + "step": 13490 + }, + { + "epoch": 1.0887741102413042, + "grad_norm": 0.7012613415718079, + "learning_rate": 4.833690459287953e-05, + "loss": 2.4801, + "step": 13491 + }, + { + "epoch": 1.088854813977887, + "grad_norm": 0.6986923217773438, + "learning_rate": 4.832338836563891e-05, + "loss": 2.426, + "step": 13492 + }, + { + "epoch": 1.0889355177144702, + "grad_norm": 0.6936241984367371, + "learning_rate": 4.830987342631453e-05, + "loss": 2.4361, + "step": 13493 + }, + { + "epoch": 1.0890162214510533, + "grad_norm": 0.6612359881401062, + "learning_rate": 4.8296359775243275e-05, + "loss": 2.4385, + "step": 13494 + }, + { + "epoch": 1.0890969251876361, + "grad_norm": 0.6927692294120789, + "learning_rate": 4.828284741276183e-05, + "loss": 2.4692, + "step": 13495 + }, + { + "epoch": 1.0891776289242192, + "grad_norm": 0.6710225343704224, + "learning_rate": 4.8269336339207036e-05, + "loss": 2.4078, + "step": 13496 + }, + { + "epoch": 1.0892583326608023, + "grad_norm": 0.639076828956604, + "learning_rate": 4.825582655491564e-05, + "loss": 2.4368, + "step": 13497 + }, + { + "epoch": 1.0893390363973852, + "grad_norm": 0.7050483226776123, + "learning_rate": 4.824231806022426e-05, + "loss": 2.4308, + "step": 13498 + }, + { + "epoch": 1.0894197401339683, + "grad_norm": 0.7097769975662231, + "learning_rate": 4.822881085546962e-05, + "loss": 2.4378, + "step": 13499 + }, + { + "epoch": 1.0895004438705511, + "grad_norm": 0.6939458847045898, + "learning_rate": 4.821530494098834e-05, + "loss": 2.4678, + "step": 13500 + }, + { + "epoch": 1.0895811476071342, + "grad_norm": 0.6797441840171814, + "learning_rate": 4.8201800317117016e-05, + "loss": 2.4837, + "step": 13501 + }, + { + "epoch": 1.0896618513437173, + "grad_norm": 0.7451521158218384, + "learning_rate": 4.818829698419225e-05, + "loss": 2.4651, + "step": 13502 + }, + { + "epoch": 1.0897425550803002, + "grad_norm": 0.6749109625816345, + "learning_rate": 4.8174794942550585e-05, + "loss": 2.4569, + "step": 13503 + }, + { + "epoch": 1.0898232588168832, + "grad_norm": 0.6321636438369751, + "learning_rate": 4.8161294192528474e-05, + "loss": 2.4049, + "step": 13504 + }, + { + "epoch": 1.0899039625534663, + "grad_norm": 0.7002367377281189, + "learning_rate": 4.8147794734462415e-05, + "loss": 2.4489, + "step": 13505 + }, + { + "epoch": 1.0899846662900492, + "grad_norm": 0.758057713508606, + "learning_rate": 4.813429656868889e-05, + "loss": 2.436, + "step": 13506 + }, + { + "epoch": 1.0900653700266323, + "grad_norm": 0.6665529012680054, + "learning_rate": 4.812079969554424e-05, + "loss": 2.3805, + "step": 13507 + }, + { + "epoch": 1.0901460737632152, + "grad_norm": 0.6962547898292542, + "learning_rate": 4.810730411536487e-05, + "loss": 2.4203, + "step": 13508 + }, + { + "epoch": 1.0902267774997982, + "grad_norm": 0.6860647201538086, + "learning_rate": 4.809380982848712e-05, + "loss": 2.4482, + "step": 13509 + }, + { + "epoch": 1.0903074812363813, + "grad_norm": 0.7045090198516846, + "learning_rate": 4.808031683524733e-05, + "loss": 2.4155, + "step": 13510 + }, + { + "epoch": 1.0903881849729642, + "grad_norm": 0.6609304547309875, + "learning_rate": 4.806682513598176e-05, + "loss": 2.4295, + "step": 13511 + }, + { + "epoch": 1.0904688887095473, + "grad_norm": 0.7647323608398438, + "learning_rate": 4.8053334731026665e-05, + "loss": 2.4704, + "step": 13512 + }, + { + "epoch": 1.0905495924461301, + "grad_norm": 0.677449643611908, + "learning_rate": 4.803984562071829e-05, + "loss": 2.4501, + "step": 13513 + }, + { + "epoch": 1.0906302961827132, + "grad_norm": 0.645866334438324, + "learning_rate": 4.8026357805392754e-05, + "loss": 2.427, + "step": 13514 + }, + { + "epoch": 1.0907109999192963, + "grad_norm": 0.6968488097190857, + "learning_rate": 4.801287128538624e-05, + "loss": 2.3933, + "step": 13515 + }, + { + "epoch": 1.0907917036558792, + "grad_norm": 0.7137444615364075, + "learning_rate": 4.799938606103491e-05, + "loss": 2.4611, + "step": 13516 + }, + { + "epoch": 1.0908724073924623, + "grad_norm": 0.6860007047653198, + "learning_rate": 4.7985902132674765e-05, + "loss": 2.4252, + "step": 13517 + }, + { + "epoch": 1.0909531111290454, + "grad_norm": 0.726290762424469, + "learning_rate": 4.797241950064192e-05, + "loss": 2.44, + "step": 13518 + }, + { + "epoch": 1.0910338148656282, + "grad_norm": 0.6833362579345703, + "learning_rate": 4.795893816527241e-05, + "loss": 2.4199, + "step": 13519 + }, + { + "epoch": 1.0911145186022113, + "grad_norm": 0.7412242293357849, + "learning_rate": 4.794545812690212e-05, + "loss": 2.5412, + "step": 13520 + }, + { + "epoch": 1.0911952223387944, + "grad_norm": 0.6882274150848389, + "learning_rate": 4.793197938586712e-05, + "loss": 2.473, + "step": 13521 + }, + { + "epoch": 1.0912759260753773, + "grad_norm": 0.7334007024765015, + "learning_rate": 4.791850194250335e-05, + "loss": 2.4357, + "step": 13522 + }, + { + "epoch": 1.0913566298119604, + "grad_norm": 0.6564081311225891, + "learning_rate": 4.790502579714661e-05, + "loss": 2.4425, + "step": 13523 + }, + { + "epoch": 1.0914373335485432, + "grad_norm": 0.7045762538909912, + "learning_rate": 4.78915509501328e-05, + "loss": 2.4929, + "step": 13524 + }, + { + "epoch": 1.0915180372851263, + "grad_norm": 0.7512505650520325, + "learning_rate": 4.787807740179776e-05, + "loss": 2.4187, + "step": 13525 + }, + { + "epoch": 1.0915987410217094, + "grad_norm": 0.6592997908592224, + "learning_rate": 4.786460515247732e-05, + "loss": 2.4344, + "step": 13526 + }, + { + "epoch": 1.0916794447582923, + "grad_norm": 0.6721770763397217, + "learning_rate": 4.785113420250715e-05, + "loss": 2.4415, + "step": 13527 + }, + { + "epoch": 1.0917601484948753, + "grad_norm": 0.7544431686401367, + "learning_rate": 4.783766455222305e-05, + "loss": 2.4831, + "step": 13528 + }, + { + "epoch": 1.0918408522314582, + "grad_norm": 0.7226355671882629, + "learning_rate": 4.782419620196073e-05, + "loss": 2.4807, + "step": 13529 + }, + { + "epoch": 1.0919215559680413, + "grad_norm": 0.6386340260505676, + "learning_rate": 4.78107291520558e-05, + "loss": 2.4062, + "step": 13530 + }, + { + "epoch": 1.0920022597046244, + "grad_norm": 0.6670595407485962, + "learning_rate": 4.7797263402843926e-05, + "loss": 2.4009, + "step": 13531 + }, + { + "epoch": 1.0920829634412073, + "grad_norm": 0.6600756049156189, + "learning_rate": 4.778379895466071e-05, + "loss": 2.4321, + "step": 13532 + }, + { + "epoch": 1.0921636671777903, + "grad_norm": 0.7190701961517334, + "learning_rate": 4.77703358078417e-05, + "loss": 2.4229, + "step": 13533 + }, + { + "epoch": 1.0922443709143734, + "grad_norm": 0.6554828882217407, + "learning_rate": 4.775687396272247e-05, + "loss": 2.442, + "step": 13534 + }, + { + "epoch": 1.0923250746509563, + "grad_norm": 0.6720205545425415, + "learning_rate": 4.774341341963853e-05, + "loss": 2.4994, + "step": 13535 + }, + { + "epoch": 1.0924057783875394, + "grad_norm": 0.7161003947257996, + "learning_rate": 4.7729954178925295e-05, + "loss": 2.4666, + "step": 13536 + }, + { + "epoch": 1.0924864821241222, + "grad_norm": 0.6817156672477722, + "learning_rate": 4.771649624091824e-05, + "loss": 2.4203, + "step": 13537 + }, + { + "epoch": 1.0925671858607053, + "grad_norm": 0.7167035937309265, + "learning_rate": 4.770303960595277e-05, + "loss": 2.4214, + "step": 13538 + }, + { + "epoch": 1.0926478895972884, + "grad_norm": 0.6373945474624634, + "learning_rate": 4.768958427436429e-05, + "loss": 2.485, + "step": 13539 + }, + { + "epoch": 1.0927285933338713, + "grad_norm": 0.7361387014389038, + "learning_rate": 4.767613024648808e-05, + "loss": 2.5192, + "step": 13540 + }, + { + "epoch": 1.0928092970704544, + "grad_norm": 0.7034375667572021, + "learning_rate": 4.766267752265947e-05, + "loss": 2.4324, + "step": 13541 + }, + { + "epoch": 1.0928900008070375, + "grad_norm": 0.7355689406394958, + "learning_rate": 4.7649226103213765e-05, + "loss": 2.5048, + "step": 13542 + }, + { + "epoch": 1.0929707045436203, + "grad_norm": 0.7120445966720581, + "learning_rate": 4.7635775988486176e-05, + "loss": 2.449, + "step": 13543 + }, + { + "epoch": 1.0930514082802034, + "grad_norm": 0.695888876914978, + "learning_rate": 4.7622327178811935e-05, + "loss": 2.4974, + "step": 13544 + }, + { + "epoch": 1.0931321120167863, + "grad_norm": 0.6953639984130859, + "learning_rate": 4.760887967452625e-05, + "loss": 2.3927, + "step": 13545 + }, + { + "epoch": 1.0932128157533694, + "grad_norm": 0.6457183957099915, + "learning_rate": 4.759543347596421e-05, + "loss": 2.4501, + "step": 13546 + }, + { + "epoch": 1.0932935194899525, + "grad_norm": 0.7259296774864197, + "learning_rate": 4.7581988583460946e-05, + "loss": 2.4896, + "step": 13547 + }, + { + "epoch": 1.0933742232265353, + "grad_norm": 0.6897724270820618, + "learning_rate": 4.7568544997351586e-05, + "loss": 2.4181, + "step": 13548 + }, + { + "epoch": 1.0934549269631184, + "grad_norm": 0.6723688840866089, + "learning_rate": 4.755510271797111e-05, + "loss": 2.5097, + "step": 13549 + }, + { + "epoch": 1.0935356306997015, + "grad_norm": 0.7353307604789734, + "learning_rate": 4.754166174565456e-05, + "loss": 2.4548, + "step": 13550 + }, + { + "epoch": 1.0936163344362844, + "grad_norm": 0.7334069013595581, + "learning_rate": 4.752822208073693e-05, + "loss": 2.5113, + "step": 13551 + }, + { + "epoch": 1.0936970381728675, + "grad_norm": 0.6581420302391052, + "learning_rate": 4.751478372355317e-05, + "loss": 2.4546, + "step": 13552 + }, + { + "epoch": 1.0937777419094503, + "grad_norm": 0.7890802621841431, + "learning_rate": 4.75013466744382e-05, + "loss": 2.4092, + "step": 13553 + }, + { + "epoch": 1.0938584456460334, + "grad_norm": 0.7226595282554626, + "learning_rate": 4.7487910933726895e-05, + "loss": 2.457, + "step": 13554 + }, + { + "epoch": 1.0939391493826165, + "grad_norm": 0.7108014225959778, + "learning_rate": 4.7474476501754165e-05, + "loss": 2.471, + "step": 13555 + }, + { + "epoch": 1.0940198531191994, + "grad_norm": 0.6864863038063049, + "learning_rate": 4.746104337885473e-05, + "loss": 2.4778, + "step": 13556 + }, + { + "epoch": 1.0941005568557824, + "grad_norm": 0.6890624165534973, + "learning_rate": 4.744761156536345e-05, + "loss": 2.456, + "step": 13557 + }, + { + "epoch": 1.0941812605923653, + "grad_norm": 0.7052781581878662, + "learning_rate": 4.743418106161509e-05, + "loss": 2.4796, + "step": 13558 + }, + { + "epoch": 1.0942619643289484, + "grad_norm": 0.6569164991378784, + "learning_rate": 4.742075186794431e-05, + "loss": 2.469, + "step": 13559 + }, + { + "epoch": 1.0943426680655315, + "grad_norm": 0.7302874326705933, + "learning_rate": 4.7407323984685836e-05, + "loss": 2.4543, + "step": 13560 + }, + { + "epoch": 1.0944233718021144, + "grad_norm": 0.6499345898628235, + "learning_rate": 4.7393897412174335e-05, + "loss": 2.4037, + "step": 13561 + }, + { + "epoch": 1.0945040755386974, + "grad_norm": 0.6643944382667542, + "learning_rate": 4.7380472150744416e-05, + "loss": 2.4067, + "step": 13562 + }, + { + "epoch": 1.0945847792752805, + "grad_norm": 0.7491872906684875, + "learning_rate": 4.736704820073069e-05, + "loss": 2.4277, + "step": 13563 + }, + { + "epoch": 1.0946654830118634, + "grad_norm": 0.7319512367248535, + "learning_rate": 4.735362556246773e-05, + "loss": 2.4588, + "step": 13564 + }, + { + "epoch": 1.0947461867484465, + "grad_norm": 0.7404350638389587, + "learning_rate": 4.734020423629001e-05, + "loss": 2.432, + "step": 13565 + }, + { + "epoch": 1.0948268904850296, + "grad_norm": 0.6462193727493286, + "learning_rate": 4.732678422253206e-05, + "loss": 2.4417, + "step": 13566 + }, + { + "epoch": 1.0949075942216124, + "grad_norm": 0.6711323857307434, + "learning_rate": 4.731336552152836e-05, + "loss": 2.4023, + "step": 13567 + }, + { + "epoch": 1.0949882979581955, + "grad_norm": 0.658261239528656, + "learning_rate": 4.729994813361329e-05, + "loss": 2.4132, + "step": 13568 + }, + { + "epoch": 1.0950690016947784, + "grad_norm": 0.8081904053688049, + "learning_rate": 4.728653205912127e-05, + "loss": 2.4412, + "step": 13569 + }, + { + "epoch": 1.0951497054313615, + "grad_norm": 0.6620786786079407, + "learning_rate": 4.727311729838666e-05, + "loss": 2.4357, + "step": 13570 + }, + { + "epoch": 1.0952304091679446, + "grad_norm": 0.7026848793029785, + "learning_rate": 4.725970385174381e-05, + "loss": 2.4159, + "step": 13571 + }, + { + "epoch": 1.0953111129045274, + "grad_norm": 0.7017392516136169, + "learning_rate": 4.7246291719526995e-05, + "loss": 2.4253, + "step": 13572 + }, + { + "epoch": 1.0953918166411105, + "grad_norm": 0.710172712802887, + "learning_rate": 4.7232880902070483e-05, + "loss": 2.4057, + "step": 13573 + }, + { + "epoch": 1.0954725203776934, + "grad_norm": 0.7208876013755798, + "learning_rate": 4.721947139970856e-05, + "loss": 2.4803, + "step": 13574 + }, + { + "epoch": 1.0955532241142765, + "grad_norm": 0.693219006061554, + "learning_rate": 4.720606321277534e-05, + "loss": 2.3611, + "step": 13575 + }, + { + "epoch": 1.0956339278508596, + "grad_norm": 0.737206757068634, + "learning_rate": 4.7192656341605026e-05, + "loss": 2.3873, + "step": 13576 + }, + { + "epoch": 1.0957146315874424, + "grad_norm": 0.6605268120765686, + "learning_rate": 4.717925078653179e-05, + "loss": 2.4155, + "step": 13577 + }, + { + "epoch": 1.0957953353240255, + "grad_norm": 0.7143047451972961, + "learning_rate": 4.716584654788967e-05, + "loss": 2.4526, + "step": 13578 + }, + { + "epoch": 1.0958760390606086, + "grad_norm": 0.6980953216552734, + "learning_rate": 4.715244362601277e-05, + "loss": 2.4422, + "step": 13579 + }, + { + "epoch": 1.0959567427971915, + "grad_norm": 0.6852009892463684, + "learning_rate": 4.713904202123515e-05, + "loss": 2.4599, + "step": 13580 + }, + { + "epoch": 1.0960374465337746, + "grad_norm": 0.7436656355857849, + "learning_rate": 4.712564173389074e-05, + "loss": 2.4441, + "step": 13581 + }, + { + "epoch": 1.0961181502703574, + "grad_norm": 0.7090624570846558, + "learning_rate": 4.711224276431352e-05, + "loss": 2.4741, + "step": 13582 + }, + { + "epoch": 1.0961988540069405, + "grad_norm": 0.6611043810844421, + "learning_rate": 4.709884511283753e-05, + "loss": 2.4589, + "step": 13583 + }, + { + "epoch": 1.0962795577435236, + "grad_norm": 0.6932426691055298, + "learning_rate": 4.708544877979658e-05, + "loss": 2.4199, + "step": 13584 + }, + { + "epoch": 1.0963602614801065, + "grad_norm": 0.7629422545433044, + "learning_rate": 4.707205376552456e-05, + "loss": 2.4588, + "step": 13585 + }, + { + "epoch": 1.0964409652166895, + "grad_norm": 0.8116739392280579, + "learning_rate": 4.705866007035531e-05, + "loss": 2.472, + "step": 13586 + }, + { + "epoch": 1.0965216689532726, + "grad_norm": 0.6711297631263733, + "learning_rate": 4.704526769462269e-05, + "loss": 2.4086, + "step": 13587 + }, + { + "epoch": 1.0966023726898555, + "grad_norm": 0.716015636920929, + "learning_rate": 4.703187663866037e-05, + "loss": 2.4411, + "step": 13588 + }, + { + "epoch": 1.0966830764264386, + "grad_norm": 0.6982430219650269, + "learning_rate": 4.701848690280215e-05, + "loss": 2.4438, + "step": 13589 + }, + { + "epoch": 1.0967637801630215, + "grad_norm": 0.7183159589767456, + "learning_rate": 4.7005098487381785e-05, + "loss": 2.4464, + "step": 13590 + }, + { + "epoch": 1.0968444838996045, + "grad_norm": 0.6983399391174316, + "learning_rate": 4.699171139273284e-05, + "loss": 2.4354, + "step": 13591 + }, + { + "epoch": 1.0969251876361876, + "grad_norm": 0.7157938480377197, + "learning_rate": 4.697832561918901e-05, + "loss": 2.4393, + "step": 13592 + }, + { + "epoch": 1.0970058913727705, + "grad_norm": 0.6991363763809204, + "learning_rate": 4.696494116708392e-05, + "loss": 2.4723, + "step": 13593 + }, + { + "epoch": 1.0970865951093536, + "grad_norm": 0.6722309589385986, + "learning_rate": 4.695155803675112e-05, + "loss": 2.447, + "step": 13594 + }, + { + "epoch": 1.0971672988459367, + "grad_norm": 0.6492688655853271, + "learning_rate": 4.6938176228524175e-05, + "loss": 2.4213, + "step": 13595 + }, + { + "epoch": 1.0972480025825195, + "grad_norm": 0.6941642165184021, + "learning_rate": 4.6924795742736616e-05, + "loss": 2.4714, + "step": 13596 + }, + { + "epoch": 1.0973287063191026, + "grad_norm": 0.7506042122840881, + "learning_rate": 4.691141657972185e-05, + "loss": 2.4563, + "step": 13597 + }, + { + "epoch": 1.0974094100556855, + "grad_norm": 0.7032836675643921, + "learning_rate": 4.6898038739813356e-05, + "loss": 2.4824, + "step": 13598 + }, + { + "epoch": 1.0974901137922686, + "grad_norm": 0.6908734440803528, + "learning_rate": 4.6884662223344575e-05, + "loss": 2.4486, + "step": 13599 + }, + { + "epoch": 1.0975708175288517, + "grad_norm": 0.714971661567688, + "learning_rate": 4.687128703064883e-05, + "loss": 2.4372, + "step": 13600 + }, + { + "epoch": 1.0976515212654345, + "grad_norm": 0.6989198327064514, + "learning_rate": 4.6857913162059486e-05, + "loss": 2.395, + "step": 13601 + }, + { + "epoch": 1.0977322250020176, + "grad_norm": 0.7163406014442444, + "learning_rate": 4.684454061790987e-05, + "loss": 2.4868, + "step": 13602 + }, + { + "epoch": 1.0978129287386005, + "grad_norm": 0.6600626707077026, + "learning_rate": 4.6831169398533245e-05, + "loss": 2.5134, + "step": 13603 + }, + { + "epoch": 1.0978936324751836, + "grad_norm": 0.6657080054283142, + "learning_rate": 4.681779950426286e-05, + "loss": 2.4701, + "step": 13604 + }, + { + "epoch": 1.0979743362117667, + "grad_norm": 0.665860116481781, + "learning_rate": 4.680443093543194e-05, + "loss": 2.4593, + "step": 13605 + }, + { + "epoch": 1.0980550399483495, + "grad_norm": 0.7000327110290527, + "learning_rate": 4.679106369237368e-05, + "loss": 2.4523, + "step": 13606 + }, + { + "epoch": 1.0981357436849326, + "grad_norm": 0.6969157457351685, + "learning_rate": 4.677769777542118e-05, + "loss": 2.4935, + "step": 13607 + }, + { + "epoch": 1.0982164474215157, + "grad_norm": 0.6864836812019348, + "learning_rate": 4.676433318490757e-05, + "loss": 2.457, + "step": 13608 + }, + { + "epoch": 1.0982971511580986, + "grad_norm": 0.7331364750862122, + "learning_rate": 4.675096992116598e-05, + "loss": 2.4253, + "step": 13609 + }, + { + "epoch": 1.0983778548946816, + "grad_norm": 0.75, + "learning_rate": 4.673760798452936e-05, + "loss": 2.4147, + "step": 13610 + }, + { + "epoch": 1.0984585586312647, + "grad_norm": 0.6589440703392029, + "learning_rate": 4.6724247375330786e-05, + "loss": 2.4718, + "step": 13611 + }, + { + "epoch": 1.0985392623678476, + "grad_norm": 0.7032667994499207, + "learning_rate": 4.671088809390324e-05, + "loss": 2.4724, + "step": 13612 + }, + { + "epoch": 1.0986199661044307, + "grad_norm": 0.7544135451316833, + "learning_rate": 4.6697530140579646e-05, + "loss": 2.4804, + "step": 13613 + }, + { + "epoch": 1.0987006698410136, + "grad_norm": 0.6503081917762756, + "learning_rate": 4.668417351569295e-05, + "loss": 2.3829, + "step": 13614 + }, + { + "epoch": 1.0987813735775966, + "grad_norm": 0.6928786039352417, + "learning_rate": 4.667081821957605e-05, + "loss": 2.5678, + "step": 13615 + }, + { + "epoch": 1.0988620773141797, + "grad_norm": 0.6652864217758179, + "learning_rate": 4.665746425256173e-05, + "loss": 2.4585, + "step": 13616 + }, + { + "epoch": 1.0989427810507626, + "grad_norm": 0.700265109539032, + "learning_rate": 4.664411161498283e-05, + "loss": 2.4785, + "step": 13617 + }, + { + "epoch": 1.0990234847873457, + "grad_norm": 0.7443608045578003, + "learning_rate": 4.663076030717216e-05, + "loss": 2.4869, + "step": 13618 + }, + { + "epoch": 1.0991041885239285, + "grad_norm": 0.7037705779075623, + "learning_rate": 4.6617410329462477e-05, + "loss": 2.4518, + "step": 13619 + }, + { + "epoch": 1.0991848922605116, + "grad_norm": 0.7528365850448608, + "learning_rate": 4.660406168218643e-05, + "loss": 2.4616, + "step": 13620 + }, + { + "epoch": 1.0992655959970947, + "grad_norm": 0.7149221301078796, + "learning_rate": 4.659071436567676e-05, + "loss": 2.4661, + "step": 13621 + }, + { + "epoch": 1.0993462997336776, + "grad_norm": 0.7212862968444824, + "learning_rate": 4.657736838026608e-05, + "loss": 2.4424, + "step": 13622 + }, + { + "epoch": 1.0994270034702607, + "grad_norm": 0.6934216022491455, + "learning_rate": 4.6564023726287045e-05, + "loss": 2.4633, + "step": 13623 + }, + { + "epoch": 1.0995077072068438, + "grad_norm": 0.7244036793708801, + "learning_rate": 4.655068040407221e-05, + "loss": 2.409, + "step": 13624 + }, + { + "epoch": 1.0995884109434266, + "grad_norm": 0.6911318898200989, + "learning_rate": 4.653733841395419e-05, + "loss": 2.5117, + "step": 13625 + }, + { + "epoch": 1.0996691146800097, + "grad_norm": 0.7579816579818726, + "learning_rate": 4.65239977562654e-05, + "loss": 2.4927, + "step": 13626 + }, + { + "epoch": 1.0997498184165928, + "grad_norm": 0.7699651122093201, + "learning_rate": 4.651065843133837e-05, + "loss": 2.4083, + "step": 13627 + }, + { + "epoch": 1.0998305221531757, + "grad_norm": 0.6669431328773499, + "learning_rate": 4.649732043950561e-05, + "loss": 2.4402, + "step": 13628 + }, + { + "epoch": 1.0999112258897588, + "grad_norm": 0.7134940028190613, + "learning_rate": 4.6483983781099426e-05, + "loss": 2.4275, + "step": 13629 + }, + { + "epoch": 1.0999919296263416, + "grad_norm": 0.7107651233673096, + "learning_rate": 4.647064845645227e-05, + "loss": 2.4654, + "step": 13630 + }, + { + "epoch": 1.1000726333629247, + "grad_norm": 0.7101391553878784, + "learning_rate": 4.645731446589652e-05, + "loss": 2.4357, + "step": 13631 + }, + { + "epoch": 1.1001533370995078, + "grad_norm": 0.7511606216430664, + "learning_rate": 4.6443981809764405e-05, + "loss": 2.5016, + "step": 13632 + }, + { + "epoch": 1.1002340408360907, + "grad_norm": 0.7315953373908997, + "learning_rate": 4.6430650488388226e-05, + "loss": 2.4541, + "step": 13633 + }, + { + "epoch": 1.1003147445726738, + "grad_norm": 0.6701769232749939, + "learning_rate": 4.6417320502100316e-05, + "loss": 2.4071, + "step": 13634 + }, + { + "epoch": 1.1003954483092566, + "grad_norm": 0.7164294123649597, + "learning_rate": 4.6403991851232876e-05, + "loss": 2.478, + "step": 13635 + }, + { + "epoch": 1.1004761520458397, + "grad_norm": 0.7003894448280334, + "learning_rate": 4.639066453611802e-05, + "loss": 2.4686, + "step": 13636 + }, + { + "epoch": 1.1005568557824228, + "grad_norm": 0.6855250000953674, + "learning_rate": 4.6377338557087957e-05, + "loss": 2.4531, + "step": 13637 + }, + { + "epoch": 1.1006375595190057, + "grad_norm": 0.6581299901008606, + "learning_rate": 4.6364013914474816e-05, + "loss": 2.4511, + "step": 13638 + }, + { + "epoch": 1.1007182632555887, + "grad_norm": 0.7599080204963684, + "learning_rate": 4.6350690608610604e-05, + "loss": 2.5143, + "step": 13639 + }, + { + "epoch": 1.1007989669921718, + "grad_norm": 0.7029981017112732, + "learning_rate": 4.633736863982744e-05, + "loss": 2.4541, + "step": 13640 + }, + { + "epoch": 1.1008796707287547, + "grad_norm": 0.7378708720207214, + "learning_rate": 4.6324048008457357e-05, + "loss": 2.4319, + "step": 13641 + }, + { + "epoch": 1.1009603744653378, + "grad_norm": 0.7087826728820801, + "learning_rate": 4.631072871483226e-05, + "loss": 2.4148, + "step": 13642 + }, + { + "epoch": 1.1010410782019207, + "grad_norm": 0.7000819444656372, + "learning_rate": 4.629741075928415e-05, + "loss": 2.4692, + "step": 13643 + }, + { + "epoch": 1.1011217819385037, + "grad_norm": 0.7363965511322021, + "learning_rate": 4.628409414214496e-05, + "loss": 2.4584, + "step": 13644 + }, + { + "epoch": 1.1012024856750868, + "grad_norm": 0.6691753268241882, + "learning_rate": 4.627077886374656e-05, + "loss": 2.4356, + "step": 13645 + }, + { + "epoch": 1.1012831894116697, + "grad_norm": 0.6864185929298401, + "learning_rate": 4.625746492442078e-05, + "loss": 2.4713, + "step": 13646 + }, + { + "epoch": 1.1013638931482528, + "grad_norm": 0.714318573474884, + "learning_rate": 4.624415232449947e-05, + "loss": 2.4482, + "step": 13647 + }, + { + "epoch": 1.1014445968848359, + "grad_norm": 0.6383495330810547, + "learning_rate": 4.623084106431444e-05, + "loss": 2.4248, + "step": 13648 + }, + { + "epoch": 1.1015253006214187, + "grad_norm": 0.7014495730400085, + "learning_rate": 4.6217531144197365e-05, + "loss": 2.4393, + "step": 13649 + }, + { + "epoch": 1.1016060043580018, + "grad_norm": 0.8128634095191956, + "learning_rate": 4.620422256448e-05, + "loss": 2.4741, + "step": 13650 + }, + { + "epoch": 1.1016867080945847, + "grad_norm": 0.7333208322525024, + "learning_rate": 4.619091532549408e-05, + "loss": 2.4288, + "step": 13651 + }, + { + "epoch": 1.1017674118311678, + "grad_norm": 0.7023218274116516, + "learning_rate": 4.617760942757117e-05, + "loss": 2.5025, + "step": 13652 + }, + { + "epoch": 1.1018481155677509, + "grad_norm": 0.6420873403549194, + "learning_rate": 4.616430487104292e-05, + "loss": 2.4165, + "step": 13653 + }, + { + "epoch": 1.1019288193043337, + "grad_norm": 0.6767684817314148, + "learning_rate": 4.615100165624092e-05, + "loss": 2.4642, + "step": 13654 + }, + { + "epoch": 1.1020095230409168, + "grad_norm": 0.7361159920692444, + "learning_rate": 4.613769978349672e-05, + "loss": 2.5343, + "step": 13655 + }, + { + "epoch": 1.1020902267775, + "grad_norm": 0.6642624735832214, + "learning_rate": 4.6124399253141846e-05, + "loss": 2.3769, + "step": 13656 + }, + { + "epoch": 1.1021709305140828, + "grad_norm": 0.6912256479263306, + "learning_rate": 4.611110006550781e-05, + "loss": 2.455, + "step": 13657 + }, + { + "epoch": 1.1022516342506659, + "grad_norm": 0.7419310212135315, + "learning_rate": 4.609780222092599e-05, + "loss": 2.4171, + "step": 13658 + }, + { + "epoch": 1.1023323379872487, + "grad_norm": 0.718953549861908, + "learning_rate": 4.6084505719727835e-05, + "loss": 2.4791, + "step": 13659 + }, + { + "epoch": 1.1024130417238318, + "grad_norm": 0.7904248237609863, + "learning_rate": 4.607121056224477e-05, + "loss": 2.4429, + "step": 13660 + }, + { + "epoch": 1.102493745460415, + "grad_norm": 0.6743534803390503, + "learning_rate": 4.605791674880808e-05, + "loss": 2.4481, + "step": 13661 + }, + { + "epoch": 1.1025744491969978, + "grad_norm": 0.6829143166542053, + "learning_rate": 4.6044624279749106e-05, + "loss": 2.4078, + "step": 13662 + }, + { + "epoch": 1.1026551529335809, + "grad_norm": 0.6803167462348938, + "learning_rate": 4.6031333155399136e-05, + "loss": 2.4509, + "step": 13663 + }, + { + "epoch": 1.1027358566701637, + "grad_norm": 0.7474592328071594, + "learning_rate": 4.601804337608943e-05, + "loss": 2.4563, + "step": 13664 + }, + { + "epoch": 1.1028165604067468, + "grad_norm": 0.6753630042076111, + "learning_rate": 4.6004754942151174e-05, + "loss": 2.4285, + "step": 13665 + }, + { + "epoch": 1.10289726414333, + "grad_norm": 0.7990161180496216, + "learning_rate": 4.599146785391558e-05, + "loss": 2.4907, + "step": 13666 + }, + { + "epoch": 1.1029779678799128, + "grad_norm": 0.8161290287971497, + "learning_rate": 4.597818211171383e-05, + "loss": 2.4599, + "step": 13667 + }, + { + "epoch": 1.1030586716164958, + "grad_norm": 0.6813610792160034, + "learning_rate": 4.596489771587695e-05, + "loss": 2.4484, + "step": 13668 + }, + { + "epoch": 1.103139375353079, + "grad_norm": 0.6598966121673584, + "learning_rate": 4.5951614666736076e-05, + "loss": 2.4326, + "step": 13669 + }, + { + "epoch": 1.1032200790896618, + "grad_norm": 0.7084827423095703, + "learning_rate": 4.593833296462228e-05, + "loss": 2.4188, + "step": 13670 + }, + { + "epoch": 1.1033007828262449, + "grad_norm": 0.6876685619354248, + "learning_rate": 4.59250526098665e-05, + "loss": 2.4482, + "step": 13671 + }, + { + "epoch": 1.103381486562828, + "grad_norm": 0.7292699813842773, + "learning_rate": 4.591177360279978e-05, + "loss": 2.4452, + "step": 13672 + }, + { + "epoch": 1.1034621902994108, + "grad_norm": 0.7057675123214722, + "learning_rate": 4.589849594375304e-05, + "loss": 2.4336, + "step": 13673 + }, + { + "epoch": 1.103542894035994, + "grad_norm": 0.7684180736541748, + "learning_rate": 4.5885219633057196e-05, + "loss": 2.4453, + "step": 13674 + }, + { + "epoch": 1.1036235977725768, + "grad_norm": 0.7107112407684326, + "learning_rate": 4.5871944671043154e-05, + "loss": 2.4116, + "step": 13675 + }, + { + "epoch": 1.1037043015091599, + "grad_norm": 0.659501314163208, + "learning_rate": 4.585867105804177e-05, + "loss": 2.4907, + "step": 13676 + }, + { + "epoch": 1.103785005245743, + "grad_norm": 0.7553967833518982, + "learning_rate": 4.5845398794383786e-05, + "loss": 2.3982, + "step": 13677 + }, + { + "epoch": 1.1038657089823258, + "grad_norm": 0.6861104965209961, + "learning_rate": 4.583212788040003e-05, + "loss": 2.416, + "step": 13678 + }, + { + "epoch": 1.103946412718909, + "grad_norm": 0.6546811461448669, + "learning_rate": 4.5818858316421254e-05, + "loss": 2.4506, + "step": 13679 + }, + { + "epoch": 1.1040271164554918, + "grad_norm": 0.7012909650802612, + "learning_rate": 4.58055901027782e-05, + "loss": 2.439, + "step": 13680 + }, + { + "epoch": 1.1041078201920749, + "grad_norm": 0.7594780325889587, + "learning_rate": 4.5792323239801446e-05, + "loss": 2.4437, + "step": 13681 + }, + { + "epoch": 1.104188523928658, + "grad_norm": 0.6576492190361023, + "learning_rate": 4.577905772782172e-05, + "loss": 2.443, + "step": 13682 + }, + { + "epoch": 1.1042692276652408, + "grad_norm": 0.6751925349235535, + "learning_rate": 4.576579356716963e-05, + "loss": 2.507, + "step": 13683 + }, + { + "epoch": 1.104349931401824, + "grad_norm": 0.7206710577011108, + "learning_rate": 4.575253075817567e-05, + "loss": 2.4236, + "step": 13684 + }, + { + "epoch": 1.104430635138407, + "grad_norm": 0.7736170291900635, + "learning_rate": 4.5739269301170485e-05, + "loss": 2.4095, + "step": 13685 + }, + { + "epoch": 1.1045113388749899, + "grad_norm": 0.6901736855506897, + "learning_rate": 4.572600919648457e-05, + "loss": 2.4519, + "step": 13686 + }, + { + "epoch": 1.104592042611573, + "grad_norm": 0.7762539982795715, + "learning_rate": 4.571275044444836e-05, + "loss": 2.5018, + "step": 13687 + }, + { + "epoch": 1.1046727463481558, + "grad_norm": 0.7231423854827881, + "learning_rate": 4.569949304539232e-05, + "loss": 2.4553, + "step": 13688 + }, + { + "epoch": 1.104753450084739, + "grad_norm": 0.7713531255722046, + "learning_rate": 4.568623699964688e-05, + "loss": 2.49, + "step": 13689 + }, + { + "epoch": 1.104834153821322, + "grad_norm": 0.7355079650878906, + "learning_rate": 4.5672982307542354e-05, + "loss": 2.5191, + "step": 13690 + }, + { + "epoch": 1.1049148575579049, + "grad_norm": 0.6916452050209045, + "learning_rate": 4.565972896940913e-05, + "loss": 2.3867, + "step": 13691 + }, + { + "epoch": 1.104995561294488, + "grad_norm": 0.6622549295425415, + "learning_rate": 4.5646476985577544e-05, + "loss": 2.4364, + "step": 13692 + }, + { + "epoch": 1.105076265031071, + "grad_norm": 0.6683297157287598, + "learning_rate": 4.563322635637779e-05, + "loss": 2.43, + "step": 13693 + }, + { + "epoch": 1.105156968767654, + "grad_norm": 0.6857880353927612, + "learning_rate": 4.561997708214015e-05, + "loss": 2.4515, + "step": 13694 + }, + { + "epoch": 1.105237672504237, + "grad_norm": 0.7473817467689514, + "learning_rate": 4.5606729163194807e-05, + "loss": 2.442, + "step": 13695 + }, + { + "epoch": 1.1053183762408199, + "grad_norm": 0.6988846063613892, + "learning_rate": 4.559348259987203e-05, + "loss": 2.3886, + "step": 13696 + }, + { + "epoch": 1.105399079977403, + "grad_norm": 0.6450650691986084, + "learning_rate": 4.5580237392501836e-05, + "loss": 2.4647, + "step": 13697 + }, + { + "epoch": 1.105479783713986, + "grad_norm": 0.7669623494148254, + "learning_rate": 4.556699354141439e-05, + "loss": 2.4362, + "step": 13698 + }, + { + "epoch": 1.105560487450569, + "grad_norm": 0.7019730806350708, + "learning_rate": 4.55537510469398e-05, + "loss": 2.49, + "step": 13699 + }, + { + "epoch": 1.105641191187152, + "grad_norm": 0.6736636757850647, + "learning_rate": 4.5540509909408e-05, + "loss": 2.43, + "step": 13700 + }, + { + "epoch": 1.105721894923735, + "grad_norm": 0.6872034668922424, + "learning_rate": 4.552727012914907e-05, + "loss": 2.4507, + "step": 13701 + }, + { + "epoch": 1.105802598660318, + "grad_norm": 0.6726621985435486, + "learning_rate": 4.5514031706492986e-05, + "loss": 2.4193, + "step": 13702 + }, + { + "epoch": 1.105883302396901, + "grad_norm": 0.7345453500747681, + "learning_rate": 4.550079464176963e-05, + "loss": 2.4257, + "step": 13703 + }, + { + "epoch": 1.105964006133484, + "grad_norm": 0.6764804124832153, + "learning_rate": 4.548755893530894e-05, + "loss": 2.4656, + "step": 13704 + }, + { + "epoch": 1.106044709870067, + "grad_norm": 0.6915058493614197, + "learning_rate": 4.5474324587440766e-05, + "loss": 2.4148, + "step": 13705 + }, + { + "epoch": 1.10612541360665, + "grad_norm": 0.7960236668586731, + "learning_rate": 4.5461091598494954e-05, + "loss": 2.4148, + "step": 13706 + }, + { + "epoch": 1.106206117343233, + "grad_norm": 0.7058970928192139, + "learning_rate": 4.544785996880131e-05, + "loss": 2.4795, + "step": 13707 + }, + { + "epoch": 1.106286821079816, + "grad_norm": 0.6979549527168274, + "learning_rate": 4.5434629698689634e-05, + "loss": 2.4329, + "step": 13708 + }, + { + "epoch": 1.1063675248163989, + "grad_norm": 0.6805241107940674, + "learning_rate": 4.5421400788489586e-05, + "loss": 2.4303, + "step": 13709 + }, + { + "epoch": 1.106448228552982, + "grad_norm": 0.7566354274749756, + "learning_rate": 4.5408173238530905e-05, + "loss": 2.4769, + "step": 13710 + }, + { + "epoch": 1.106528932289565, + "grad_norm": 0.647773802280426, + "learning_rate": 4.539494704914324e-05, + "loss": 2.4037, + "step": 13711 + }, + { + "epoch": 1.106609636026148, + "grad_norm": 0.7248135209083557, + "learning_rate": 4.538172222065628e-05, + "loss": 2.4366, + "step": 13712 + }, + { + "epoch": 1.106690339762731, + "grad_norm": 0.6861057281494141, + "learning_rate": 4.536849875339953e-05, + "loss": 2.456, + "step": 13713 + }, + { + "epoch": 1.106771043499314, + "grad_norm": 0.7386166453361511, + "learning_rate": 4.5355276647702605e-05, + "loss": 2.4806, + "step": 13714 + }, + { + "epoch": 1.106851747235897, + "grad_norm": 0.664402961730957, + "learning_rate": 4.534205590389503e-05, + "loss": 2.4846, + "step": 13715 + }, + { + "epoch": 1.10693245097248, + "grad_norm": 0.8123969435691833, + "learning_rate": 4.5328836522306296e-05, + "loss": 2.4945, + "step": 13716 + }, + { + "epoch": 1.1070131547090631, + "grad_norm": 0.7375624775886536, + "learning_rate": 4.5315618503265865e-05, + "loss": 2.4533, + "step": 13717 + }, + { + "epoch": 1.107093858445646, + "grad_norm": 0.70960932970047, + "learning_rate": 4.53024018471032e-05, + "loss": 2.4351, + "step": 13718 + }, + { + "epoch": 1.107174562182229, + "grad_norm": 0.7170885801315308, + "learning_rate": 4.5289186554147645e-05, + "loss": 2.4654, + "step": 13719 + }, + { + "epoch": 1.107255265918812, + "grad_norm": 0.6986895203590393, + "learning_rate": 4.5275972624728556e-05, + "loss": 2.4079, + "step": 13720 + }, + { + "epoch": 1.107335969655395, + "grad_norm": 0.6948813796043396, + "learning_rate": 4.526276005917532e-05, + "loss": 2.4981, + "step": 13721 + }, + { + "epoch": 1.1074166733919781, + "grad_norm": 0.7719457149505615, + "learning_rate": 4.524954885781717e-05, + "loss": 2.4853, + "step": 13722 + }, + { + "epoch": 1.107497377128561, + "grad_norm": 0.652686357498169, + "learning_rate": 4.5236339020983363e-05, + "loss": 2.3672, + "step": 13723 + }, + { + "epoch": 1.107578080865144, + "grad_norm": 0.7517427802085876, + "learning_rate": 4.5223130549003144e-05, + "loss": 2.3947, + "step": 13724 + }, + { + "epoch": 1.107658784601727, + "grad_norm": 0.6755498647689819, + "learning_rate": 4.5209923442205705e-05, + "loss": 2.4173, + "step": 13725 + }, + { + "epoch": 1.10773948833831, + "grad_norm": 0.6801806688308716, + "learning_rate": 4.519671770092019e-05, + "loss": 2.4366, + "step": 13726 + }, + { + "epoch": 1.1078201920748931, + "grad_norm": 0.6665045619010925, + "learning_rate": 4.5183513325475724e-05, + "loss": 2.4797, + "step": 13727 + }, + { + "epoch": 1.107900895811476, + "grad_norm": 0.7303451299667358, + "learning_rate": 4.517031031620145e-05, + "loss": 2.4487, + "step": 13728 + }, + { + "epoch": 1.107981599548059, + "grad_norm": 0.7241206765174866, + "learning_rate": 4.515710867342632e-05, + "loss": 2.4632, + "step": 13729 + }, + { + "epoch": 1.1080623032846422, + "grad_norm": 0.738835334777832, + "learning_rate": 4.514390839747941e-05, + "loss": 2.3937, + "step": 13730 + }, + { + "epoch": 1.108143007021225, + "grad_norm": 0.7062843441963196, + "learning_rate": 4.5130709488689726e-05, + "loss": 2.4576, + "step": 13731 + }, + { + "epoch": 1.1082237107578081, + "grad_norm": 0.7074100971221924, + "learning_rate": 4.511751194738616e-05, + "loss": 2.4843, + "step": 13732 + }, + { + "epoch": 1.108304414494391, + "grad_norm": 0.751742959022522, + "learning_rate": 4.510431577389765e-05, + "loss": 2.4607, + "step": 13733 + }, + { + "epoch": 1.108385118230974, + "grad_norm": 0.7370054125785828, + "learning_rate": 4.50911209685531e-05, + "loss": 2.4877, + "step": 13734 + }, + { + "epoch": 1.1084658219675572, + "grad_norm": 0.6410251259803772, + "learning_rate": 4.507792753168135e-05, + "loss": 2.4254, + "step": 13735 + }, + { + "epoch": 1.10854652570414, + "grad_norm": 0.7141317129135132, + "learning_rate": 4.506473546361121e-05, + "loss": 2.4962, + "step": 13736 + }, + { + "epoch": 1.1086272294407231, + "grad_norm": 0.6903412342071533, + "learning_rate": 4.50515447646715e-05, + "loss": 2.4315, + "step": 13737 + }, + { + "epoch": 1.1087079331773062, + "grad_norm": 0.7068564891815186, + "learning_rate": 4.50383554351909e-05, + "loss": 2.5795, + "step": 13738 + }, + { + "epoch": 1.108788636913889, + "grad_norm": 0.6880627274513245, + "learning_rate": 4.5025167475498154e-05, + "loss": 2.4399, + "step": 13739 + }, + { + "epoch": 1.1088693406504722, + "grad_norm": 0.6721192598342896, + "learning_rate": 4.5011980885921965e-05, + "loss": 2.4651, + "step": 13740 + }, + { + "epoch": 1.108950044387055, + "grad_norm": 0.7084259986877441, + "learning_rate": 4.499879566679093e-05, + "loss": 2.4121, + "step": 13741 + }, + { + "epoch": 1.109030748123638, + "grad_norm": 0.6809335947036743, + "learning_rate": 4.498561181843368e-05, + "loss": 2.4714, + "step": 13742 + }, + { + "epoch": 1.1091114518602212, + "grad_norm": 0.690416693687439, + "learning_rate": 4.497242934117879e-05, + "loss": 2.4744, + "step": 13743 + }, + { + "epoch": 1.109192155596804, + "grad_norm": 0.728522002696991, + "learning_rate": 4.495924823535483e-05, + "loss": 2.4374, + "step": 13744 + }, + { + "epoch": 1.1092728593333872, + "grad_norm": 0.7000796794891357, + "learning_rate": 4.494606850129026e-05, + "loss": 2.4635, + "step": 13745 + }, + { + "epoch": 1.1093535630699702, + "grad_norm": 0.824645459651947, + "learning_rate": 4.493289013931353e-05, + "loss": 2.3724, + "step": 13746 + }, + { + "epoch": 1.109434266806553, + "grad_norm": 0.6561198830604553, + "learning_rate": 4.491971314975321e-05, + "loss": 2.3726, + "step": 13747 + }, + { + "epoch": 1.1095149705431362, + "grad_norm": 0.7067599892616272, + "learning_rate": 4.490653753293757e-05, + "loss": 2.4285, + "step": 13748 + }, + { + "epoch": 1.109595674279719, + "grad_norm": 0.6954898834228516, + "learning_rate": 4.489336328919503e-05, + "loss": 2.4252, + "step": 13749 + }, + { + "epoch": 1.1096763780163021, + "grad_norm": 0.6683667302131653, + "learning_rate": 4.4880190418853974e-05, + "loss": 2.4815, + "step": 13750 + }, + { + "epoch": 1.1097570817528852, + "grad_norm": 0.7554971575737, + "learning_rate": 4.486701892224261e-05, + "loss": 2.5036, + "step": 13751 + }, + { + "epoch": 1.109837785489468, + "grad_norm": 0.7043242454528809, + "learning_rate": 4.485384879968926e-05, + "loss": 2.3757, + "step": 13752 + }, + { + "epoch": 1.1099184892260512, + "grad_norm": 0.8016893863677979, + "learning_rate": 4.4840680051522186e-05, + "loss": 2.4655, + "step": 13753 + }, + { + "epoch": 1.1099991929626343, + "grad_norm": 0.7022131085395813, + "learning_rate": 4.4827512678069515e-05, + "loss": 2.475, + "step": 13754 + }, + { + "epoch": 1.1100798966992171, + "grad_norm": 0.6963247656822205, + "learning_rate": 4.4814346679659455e-05, + "loss": 2.4866, + "step": 13755 + }, + { + "epoch": 1.1101606004358002, + "grad_norm": 0.6980907917022705, + "learning_rate": 4.4801182056620125e-05, + "loss": 2.4322, + "step": 13756 + }, + { + "epoch": 1.110241304172383, + "grad_norm": 0.68063884973526, + "learning_rate": 4.478801880927964e-05, + "loss": 2.426, + "step": 13757 + }, + { + "epoch": 1.1103220079089662, + "grad_norm": 0.7454195618629456, + "learning_rate": 4.477485693796605e-05, + "loss": 2.5042, + "step": 13758 + }, + { + "epoch": 1.1104027116455493, + "grad_norm": 0.685975193977356, + "learning_rate": 4.476169644300737e-05, + "loss": 2.4874, + "step": 13759 + }, + { + "epoch": 1.1104834153821321, + "grad_norm": 0.7060961723327637, + "learning_rate": 4.4748537324731664e-05, + "loss": 2.4126, + "step": 13760 + }, + { + "epoch": 1.1105641191187152, + "grad_norm": 0.6794416904449463, + "learning_rate": 4.4735379583466795e-05, + "loss": 2.4112, + "step": 13761 + }, + { + "epoch": 1.1106448228552983, + "grad_norm": 0.6854961514472961, + "learning_rate": 4.472222321954073e-05, + "loss": 2.4909, + "step": 13762 + }, + { + "epoch": 1.1107255265918812, + "grad_norm": 0.7660776972770691, + "learning_rate": 4.470906823328139e-05, + "loss": 2.5021, + "step": 13763 + }, + { + "epoch": 1.1108062303284643, + "grad_norm": 0.7027743458747864, + "learning_rate": 4.4695914625016564e-05, + "loss": 2.4375, + "step": 13764 + }, + { + "epoch": 1.1108869340650471, + "grad_norm": 0.6896719336509705, + "learning_rate": 4.468276239507413e-05, + "loss": 2.4574, + "step": 13765 + }, + { + "epoch": 1.1109676378016302, + "grad_norm": 0.685141384601593, + "learning_rate": 4.4669611543781844e-05, + "loss": 2.4311, + "step": 13766 + }, + { + "epoch": 1.1110483415382133, + "grad_norm": 0.7108263373374939, + "learning_rate": 4.465646207146746e-05, + "loss": 2.4565, + "step": 13767 + }, + { + "epoch": 1.1111290452747962, + "grad_norm": 0.63578861951828, + "learning_rate": 4.464331397845873e-05, + "loss": 2.449, + "step": 13768 + }, + { + "epoch": 1.1112097490113793, + "grad_norm": 0.6917306780815125, + "learning_rate": 4.463016726508335e-05, + "loss": 2.4681, + "step": 13769 + }, + { + "epoch": 1.1112904527479621, + "grad_norm": 0.7328054308891296, + "learning_rate": 4.4617021931668914e-05, + "loss": 2.404, + "step": 13770 + }, + { + "epoch": 1.1113711564845452, + "grad_norm": 0.6501660943031311, + "learning_rate": 4.460387797854305e-05, + "loss": 2.4228, + "step": 13771 + }, + { + "epoch": 1.1114518602211283, + "grad_norm": 0.6656771302223206, + "learning_rate": 4.459073540603336e-05, + "loss": 2.4814, + "step": 13772 + }, + { + "epoch": 1.1115325639577112, + "grad_norm": 0.671017587184906, + "learning_rate": 4.457759421446742e-05, + "loss": 2.4605, + "step": 13773 + }, + { + "epoch": 1.1116132676942942, + "grad_norm": 0.6715343594551086, + "learning_rate": 4.456445440417267e-05, + "loss": 2.424, + "step": 13774 + }, + { + "epoch": 1.1116939714308773, + "grad_norm": 0.7051515579223633, + "learning_rate": 4.4551315975476626e-05, + "loss": 2.4358, + "step": 13775 + }, + { + "epoch": 1.1117746751674602, + "grad_norm": 0.7810437679290771, + "learning_rate": 4.453817892870673e-05, + "loss": 2.4718, + "step": 13776 + }, + { + "epoch": 1.1118553789040433, + "grad_norm": 0.7072561383247375, + "learning_rate": 4.4525043264190405e-05, + "loss": 2.4429, + "step": 13777 + }, + { + "epoch": 1.1119360826406264, + "grad_norm": 0.7949702143669128, + "learning_rate": 4.4511908982255e-05, + "loss": 2.4413, + "step": 13778 + }, + { + "epoch": 1.1120167863772092, + "grad_norm": 0.6716235876083374, + "learning_rate": 4.449877608322792e-05, + "loss": 2.427, + "step": 13779 + }, + { + "epoch": 1.1120974901137923, + "grad_norm": 0.7332563996315002, + "learning_rate": 4.448564456743638e-05, + "loss": 2.4567, + "step": 13780 + }, + { + "epoch": 1.1121781938503752, + "grad_norm": 0.7264607548713684, + "learning_rate": 4.447251443520769e-05, + "loss": 2.4844, + "step": 13781 + }, + { + "epoch": 1.1122588975869583, + "grad_norm": 0.7819967865943909, + "learning_rate": 4.4459385686869136e-05, + "loss": 2.5129, + "step": 13782 + }, + { + "epoch": 1.1123396013235414, + "grad_norm": 0.7587651610374451, + "learning_rate": 4.4446258322747824e-05, + "loss": 2.4714, + "step": 13783 + }, + { + "epoch": 1.1124203050601242, + "grad_norm": 0.6392871141433716, + "learning_rate": 4.443313234317099e-05, + "loss": 2.462, + "step": 13784 + }, + { + "epoch": 1.1125010087967073, + "grad_norm": 0.6609585881233215, + "learning_rate": 4.442000774846574e-05, + "loss": 2.4566, + "step": 13785 + }, + { + "epoch": 1.1125817125332902, + "grad_norm": 0.762924075126648, + "learning_rate": 4.440688453895919e-05, + "loss": 2.4613, + "step": 13786 + }, + { + "epoch": 1.1126624162698733, + "grad_norm": 0.7096089124679565, + "learning_rate": 4.4393762714978394e-05, + "loss": 2.4195, + "step": 13787 + }, + { + "epoch": 1.1127431200064564, + "grad_norm": 0.6663284301757812, + "learning_rate": 4.438064227685039e-05, + "loss": 2.422, + "step": 13788 + }, + { + "epoch": 1.1128238237430392, + "grad_norm": 0.6653628945350647, + "learning_rate": 4.436752322490221e-05, + "loss": 2.4477, + "step": 13789 + }, + { + "epoch": 1.1129045274796223, + "grad_norm": 0.6527605056762695, + "learning_rate": 4.435440555946073e-05, + "loss": 2.3874, + "step": 13790 + }, + { + "epoch": 1.1129852312162054, + "grad_norm": 0.6801275014877319, + "learning_rate": 4.4341289280852935e-05, + "loss": 2.4474, + "step": 13791 + }, + { + "epoch": 1.1130659349527883, + "grad_norm": 0.729905366897583, + "learning_rate": 4.432817438940574e-05, + "loss": 2.4711, + "step": 13792 + }, + { + "epoch": 1.1131466386893714, + "grad_norm": 0.7074751853942871, + "learning_rate": 4.431506088544593e-05, + "loss": 2.451, + "step": 13793 + }, + { + "epoch": 1.1132273424259542, + "grad_norm": 0.7241154313087463, + "learning_rate": 4.430194876930035e-05, + "loss": 2.4883, + "step": 13794 + }, + { + "epoch": 1.1133080461625373, + "grad_norm": 0.6549142003059387, + "learning_rate": 4.428883804129586e-05, + "loss": 2.4243, + "step": 13795 + }, + { + "epoch": 1.1133887498991204, + "grad_norm": 0.7046780586242676, + "learning_rate": 4.427572870175907e-05, + "loss": 2.4143, + "step": 13796 + }, + { + "epoch": 1.1134694536357033, + "grad_norm": 0.6563952565193176, + "learning_rate": 4.426262075101682e-05, + "loss": 2.416, + "step": 13797 + }, + { + "epoch": 1.1135501573722864, + "grad_norm": 0.7002081871032715, + "learning_rate": 4.4249514189395803e-05, + "loss": 2.3673, + "step": 13798 + }, + { + "epoch": 1.1136308611088694, + "grad_norm": 0.6766571998596191, + "learning_rate": 4.423640901722259e-05, + "loss": 2.4941, + "step": 13799 + }, + { + "epoch": 1.1137115648454523, + "grad_norm": 0.7404381632804871, + "learning_rate": 4.422330523482383e-05, + "loss": 2.4794, + "step": 13800 + }, + { + "epoch": 1.1137922685820354, + "grad_norm": 0.6670998930931091, + "learning_rate": 4.421020284252614e-05, + "loss": 2.5131, + "step": 13801 + }, + { + "epoch": 1.1138729723186183, + "grad_norm": 0.803720235824585, + "learning_rate": 4.4197101840655995e-05, + "loss": 2.4751, + "step": 13802 + }, + { + "epoch": 1.1139536760552013, + "grad_norm": 0.6532074809074402, + "learning_rate": 4.4184002229539947e-05, + "loss": 2.4147, + "step": 13803 + }, + { + "epoch": 1.1140343797917844, + "grad_norm": 0.6548035144805908, + "learning_rate": 4.417090400950447e-05, + "loss": 2.4601, + "step": 13804 + }, + { + "epoch": 1.1141150835283673, + "grad_norm": 0.6971763968467712, + "learning_rate": 4.415780718087603e-05, + "loss": 2.4752, + "step": 13805 + }, + { + "epoch": 1.1141957872649504, + "grad_norm": 0.6624024510383606, + "learning_rate": 4.414471174398098e-05, + "loss": 2.4183, + "step": 13806 + }, + { + "epoch": 1.1142764910015335, + "grad_norm": 0.6571507453918457, + "learning_rate": 4.4131617699145714e-05, + "loss": 2.4747, + "step": 13807 + }, + { + "epoch": 1.1143571947381163, + "grad_norm": 0.7165808081626892, + "learning_rate": 4.411852504669658e-05, + "loss": 2.453, + "step": 13808 + }, + { + "epoch": 1.1144378984746994, + "grad_norm": 0.6708057522773743, + "learning_rate": 4.410543378695988e-05, + "loss": 2.4858, + "step": 13809 + }, + { + "epoch": 1.1145186022112823, + "grad_norm": 0.889302134513855, + "learning_rate": 4.409234392026187e-05, + "loss": 2.4333, + "step": 13810 + }, + { + "epoch": 1.1145993059478654, + "grad_norm": 0.7440677881240845, + "learning_rate": 4.407925544692884e-05, + "loss": 2.49, + "step": 13811 + }, + { + "epoch": 1.1146800096844485, + "grad_norm": 0.6688372492790222, + "learning_rate": 4.406616836728691e-05, + "loss": 2.4663, + "step": 13812 + }, + { + "epoch": 1.1147607134210313, + "grad_norm": 0.7108204364776611, + "learning_rate": 4.4053082681662264e-05, + "loss": 2.4843, + "step": 13813 + }, + { + "epoch": 1.1148414171576144, + "grad_norm": 0.7270475029945374, + "learning_rate": 4.4039998390381087e-05, + "loss": 2.4158, + "step": 13814 + }, + { + "epoch": 1.1149221208941973, + "grad_norm": 0.7243396639823914, + "learning_rate": 4.402691549376939e-05, + "loss": 2.3969, + "step": 13815 + }, + { + "epoch": 1.1150028246307804, + "grad_norm": 0.6687803268432617, + "learning_rate": 4.4013833992153285e-05, + "loss": 2.42, + "step": 13816 + }, + { + "epoch": 1.1150835283673635, + "grad_norm": 0.6892626285552979, + "learning_rate": 4.400075388585877e-05, + "loss": 2.4086, + "step": 13817 + }, + { + "epoch": 1.1151642321039463, + "grad_norm": 0.7556231021881104, + "learning_rate": 4.398767517521186e-05, + "loss": 2.4201, + "step": 13818 + }, + { + "epoch": 1.1152449358405294, + "grad_norm": 0.6872838735580444, + "learning_rate": 4.397459786053851e-05, + "loss": 2.4143, + "step": 13819 + }, + { + "epoch": 1.1153256395771125, + "grad_norm": 0.6681817770004272, + "learning_rate": 4.396152194216463e-05, + "loss": 2.4404, + "step": 13820 + }, + { + "epoch": 1.1154063433136954, + "grad_norm": 0.7107201218605042, + "learning_rate": 4.394844742041614e-05, + "loss": 2.4503, + "step": 13821 + }, + { + "epoch": 1.1154870470502785, + "grad_norm": 0.706541121006012, + "learning_rate": 4.3935374295618824e-05, + "loss": 2.5106, + "step": 13822 + }, + { + "epoch": 1.1155677507868615, + "grad_norm": 0.6659905910491943, + "learning_rate": 4.392230256809854e-05, + "loss": 2.3839, + "step": 13823 + }, + { + "epoch": 1.1156484545234444, + "grad_norm": 0.7125810980796814, + "learning_rate": 4.3909232238181095e-05, + "loss": 2.4463, + "step": 13824 + }, + { + "epoch": 1.1157291582600275, + "grad_norm": 0.6581901907920837, + "learning_rate": 4.389616330619217e-05, + "loss": 2.4004, + "step": 13825 + }, + { + "epoch": 1.1158098619966104, + "grad_norm": 0.7660872340202332, + "learning_rate": 4.388309577245752e-05, + "loss": 2.4685, + "step": 13826 + }, + { + "epoch": 1.1158905657331935, + "grad_norm": 0.699526846408844, + "learning_rate": 4.387002963730281e-05, + "loss": 2.4131, + "step": 13827 + }, + { + "epoch": 1.1159712694697765, + "grad_norm": 0.7031015753746033, + "learning_rate": 4.3856964901053685e-05, + "loss": 2.4476, + "step": 13828 + }, + { + "epoch": 1.1160519732063594, + "grad_norm": 0.6876828074455261, + "learning_rate": 4.384390156403575e-05, + "loss": 2.4402, + "step": 13829 + }, + { + "epoch": 1.1161326769429425, + "grad_norm": 0.7188935279846191, + "learning_rate": 4.3830839626574626e-05, + "loss": 2.4473, + "step": 13830 + }, + { + "epoch": 1.1162133806795254, + "grad_norm": 0.6825287938117981, + "learning_rate": 4.381777908899577e-05, + "loss": 2.4757, + "step": 13831 + }, + { + "epoch": 1.1162940844161084, + "grad_norm": 0.718267560005188, + "learning_rate": 4.380471995162472e-05, + "loss": 2.483, + "step": 13832 + }, + { + "epoch": 1.1163747881526915, + "grad_norm": 0.6526767611503601, + "learning_rate": 4.379166221478697e-05, + "loss": 2.4161, + "step": 13833 + }, + { + "epoch": 1.1164554918892744, + "grad_norm": 0.7541480660438538, + "learning_rate": 4.37786058788079e-05, + "loss": 2.4876, + "step": 13834 + }, + { + "epoch": 1.1165361956258575, + "grad_norm": 0.7144232988357544, + "learning_rate": 4.376555094401294e-05, + "loss": 2.4153, + "step": 13835 + }, + { + "epoch": 1.1166168993624406, + "grad_norm": 0.7544882297515869, + "learning_rate": 4.3752497410727445e-05, + "loss": 2.4634, + "step": 13836 + }, + { + "epoch": 1.1166976030990234, + "grad_norm": 0.7263267040252686, + "learning_rate": 4.373944527927674e-05, + "loss": 2.5189, + "step": 13837 + }, + { + "epoch": 1.1167783068356065, + "grad_norm": 0.7709252834320068, + "learning_rate": 4.3726394549986135e-05, + "loss": 2.5036, + "step": 13838 + }, + { + "epoch": 1.1168590105721894, + "grad_norm": 0.6849128007888794, + "learning_rate": 4.3713345223180866e-05, + "loss": 2.414, + "step": 13839 + }, + { + "epoch": 1.1169397143087725, + "grad_norm": 0.6807512044906616, + "learning_rate": 4.3700297299186224e-05, + "loss": 2.4924, + "step": 13840 + }, + { + "epoch": 1.1170204180453556, + "grad_norm": 0.6894977688789368, + "learning_rate": 4.3687250778327294e-05, + "loss": 2.4183, + "step": 13841 + }, + { + "epoch": 1.1171011217819384, + "grad_norm": 0.6657617092132568, + "learning_rate": 4.367420566092928e-05, + "loss": 2.448, + "step": 13842 + }, + { + "epoch": 1.1171818255185215, + "grad_norm": 0.7104446291923523, + "learning_rate": 4.366116194731733e-05, + "loss": 2.4862, + "step": 13843 + }, + { + "epoch": 1.1172625292551046, + "grad_norm": 0.7485257387161255, + "learning_rate": 4.3648119637816465e-05, + "loss": 2.4253, + "step": 13844 + }, + { + "epoch": 1.1173432329916875, + "grad_norm": 0.7079899907112122, + "learning_rate": 4.363507873275177e-05, + "loss": 2.4235, + "step": 13845 + }, + { + "epoch": 1.1174239367282706, + "grad_norm": 0.6891573667526245, + "learning_rate": 4.3622039232448274e-05, + "loss": 2.4382, + "step": 13846 + }, + { + "epoch": 1.1175046404648534, + "grad_norm": 0.6886103749275208, + "learning_rate": 4.360900113723086e-05, + "loss": 2.5115, + "step": 13847 + }, + { + "epoch": 1.1175853442014365, + "grad_norm": 0.7511457800865173, + "learning_rate": 4.35959644474246e-05, + "loss": 2.4071, + "step": 13848 + }, + { + "epoch": 1.1176660479380196, + "grad_norm": 0.6526182293891907, + "learning_rate": 4.358292916335437e-05, + "loss": 2.4242, + "step": 13849 + }, + { + "epoch": 1.1177467516746025, + "grad_norm": 0.7385138273239136, + "learning_rate": 4.356989528534499e-05, + "loss": 2.4459, + "step": 13850 + }, + { + "epoch": 1.1178274554111856, + "grad_norm": 0.6668610572814941, + "learning_rate": 4.355686281372132e-05, + "loss": 2.4188, + "step": 13851 + }, + { + "epoch": 1.1179081591477686, + "grad_norm": 0.6950691342353821, + "learning_rate": 4.354383174880818e-05, + "loss": 2.4339, + "step": 13852 + }, + { + "epoch": 1.1179888628843515, + "grad_norm": 0.7017496824264526, + "learning_rate": 4.3530802090930375e-05, + "loss": 2.4733, + "step": 13853 + }, + { + "epoch": 1.1180695666209346, + "grad_norm": 0.8118221759796143, + "learning_rate": 4.351777384041254e-05, + "loss": 2.4826, + "step": 13854 + }, + { + "epoch": 1.1181502703575175, + "grad_norm": 0.7233164310455322, + "learning_rate": 4.350474699757945e-05, + "loss": 2.4637, + "step": 13855 + }, + { + "epoch": 1.1182309740941005, + "grad_norm": 0.6354575157165527, + "learning_rate": 4.349172156275576e-05, + "loss": 2.4487, + "step": 13856 + }, + { + "epoch": 1.1183116778306836, + "grad_norm": 0.6776937246322632, + "learning_rate": 4.347869753626606e-05, + "loss": 2.4292, + "step": 13857 + }, + { + "epoch": 1.1183923815672665, + "grad_norm": 0.6656864881515503, + "learning_rate": 4.3465674918434953e-05, + "loss": 2.484, + "step": 13858 + }, + { + "epoch": 1.1184730853038496, + "grad_norm": 0.7659650444984436, + "learning_rate": 4.345265370958702e-05, + "loss": 2.4181, + "step": 13859 + }, + { + "epoch": 1.1185537890404325, + "grad_norm": 0.6546063423156738, + "learning_rate": 4.3439633910046764e-05, + "loss": 2.4657, + "step": 13860 + }, + { + "epoch": 1.1186344927770155, + "grad_norm": 0.6869762539863586, + "learning_rate": 4.342661552013869e-05, + "loss": 2.513, + "step": 13861 + }, + { + "epoch": 1.1187151965135986, + "grad_norm": 0.6633490324020386, + "learning_rate": 4.3413598540187275e-05, + "loss": 2.4716, + "step": 13862 + }, + { + "epoch": 1.1187959002501815, + "grad_norm": 0.7238267660140991, + "learning_rate": 4.340058297051687e-05, + "loss": 2.4353, + "step": 13863 + }, + { + "epoch": 1.1188766039867646, + "grad_norm": 0.67429119348526, + "learning_rate": 4.3387568811451875e-05, + "loss": 2.4808, + "step": 13864 + }, + { + "epoch": 1.1189573077233477, + "grad_norm": 0.6901153326034546, + "learning_rate": 4.33745560633167e-05, + "loss": 2.4785, + "step": 13865 + }, + { + "epoch": 1.1190380114599305, + "grad_norm": 0.7227689027786255, + "learning_rate": 4.336154472643556e-05, + "loss": 2.4414, + "step": 13866 + }, + { + "epoch": 1.1191187151965136, + "grad_norm": 0.713793933391571, + "learning_rate": 4.33485348011328e-05, + "loss": 2.5136, + "step": 13867 + }, + { + "epoch": 1.1191994189330967, + "grad_norm": 0.6495655179023743, + "learning_rate": 4.333552628773263e-05, + "loss": 2.4267, + "step": 13868 + }, + { + "epoch": 1.1192801226696796, + "grad_norm": 0.7265790104866028, + "learning_rate": 4.3322519186559274e-05, + "loss": 2.4406, + "step": 13869 + }, + { + "epoch": 1.1193608264062627, + "grad_norm": 0.6700571179389954, + "learning_rate": 4.330951349793688e-05, + "loss": 2.4457, + "step": 13870 + }, + { + "epoch": 1.1194415301428455, + "grad_norm": 0.7112334966659546, + "learning_rate": 4.3296509222189616e-05, + "loss": 2.4788, + "step": 13871 + }, + { + "epoch": 1.1195222338794286, + "grad_norm": 0.7056662440299988, + "learning_rate": 4.32835063596416e-05, + "loss": 2.5195, + "step": 13872 + }, + { + "epoch": 1.1196029376160117, + "grad_norm": 0.7198836207389832, + "learning_rate": 4.327050491061683e-05, + "loss": 2.4827, + "step": 13873 + }, + { + "epoch": 1.1196836413525946, + "grad_norm": 0.7384079694747925, + "learning_rate": 4.325750487543936e-05, + "loss": 2.4556, + "step": 13874 + }, + { + "epoch": 1.1197643450891777, + "grad_norm": 0.7315430641174316, + "learning_rate": 4.324450625443324e-05, + "loss": 2.4302, + "step": 13875 + }, + { + "epoch": 1.1198450488257605, + "grad_norm": 0.6692587733268738, + "learning_rate": 4.323150904792234e-05, + "loss": 2.5283, + "step": 13876 + }, + { + "epoch": 1.1199257525623436, + "grad_norm": 0.7407168745994568, + "learning_rate": 4.321851325623063e-05, + "loss": 2.4757, + "step": 13877 + }, + { + "epoch": 1.1200064562989267, + "grad_norm": 0.7387246489524841, + "learning_rate": 4.3205518879682e-05, + "loss": 2.5025, + "step": 13878 + }, + { + "epoch": 1.1200871600355096, + "grad_norm": 0.8058405518531799, + "learning_rate": 4.319252591860031e-05, + "loss": 2.4951, + "step": 13879 + }, + { + "epoch": 1.1201678637720927, + "grad_norm": 0.6964818835258484, + "learning_rate": 4.317953437330936e-05, + "loss": 2.4462, + "step": 13880 + }, + { + "epoch": 1.1202485675086757, + "grad_norm": 0.6904557347297668, + "learning_rate": 4.316654424413294e-05, + "loss": 2.3981, + "step": 13881 + }, + { + "epoch": 1.1203292712452586, + "grad_norm": 0.6555196046829224, + "learning_rate": 4.315355553139485e-05, + "loss": 2.418, + "step": 13882 + }, + { + "epoch": 1.1204099749818417, + "grad_norm": 0.7745094299316406, + "learning_rate": 4.3140568235418724e-05, + "loss": 2.4635, + "step": 13883 + }, + { + "epoch": 1.1204906787184246, + "grad_norm": 0.686676025390625, + "learning_rate": 4.312758235652825e-05, + "loss": 2.4847, + "step": 13884 + }, + { + "epoch": 1.1205713824550076, + "grad_norm": 0.6937002539634705, + "learning_rate": 4.311459789504714e-05, + "loss": 2.4632, + "step": 13885 + }, + { + "epoch": 1.1206520861915907, + "grad_norm": 0.7024590373039246, + "learning_rate": 4.310161485129891e-05, + "loss": 2.4268, + "step": 13886 + }, + { + "epoch": 1.1207327899281736, + "grad_norm": 0.6848484873771667, + "learning_rate": 4.308863322560717e-05, + "loss": 2.4895, + "step": 13887 + }, + { + "epoch": 1.1208134936647567, + "grad_norm": 0.7071602940559387, + "learning_rate": 4.307565301829546e-05, + "loss": 2.4348, + "step": 13888 + }, + { + "epoch": 1.1208941974013398, + "grad_norm": 0.6868199706077576, + "learning_rate": 4.3062674229687274e-05, + "loss": 2.4613, + "step": 13889 + }, + { + "epoch": 1.1209749011379226, + "grad_norm": 0.7283496260643005, + "learning_rate": 4.304969686010608e-05, + "loss": 2.478, + "step": 13890 + }, + { + "epoch": 1.1210556048745057, + "grad_norm": 0.6907255053520203, + "learning_rate": 4.303672090987535e-05, + "loss": 2.4431, + "step": 13891 + }, + { + "epoch": 1.1211363086110886, + "grad_norm": 0.675089418888092, + "learning_rate": 4.302374637931841e-05, + "loss": 2.4398, + "step": 13892 + }, + { + "epoch": 1.1212170123476717, + "grad_norm": 0.6929863095283508, + "learning_rate": 4.301077326875863e-05, + "loss": 2.3909, + "step": 13893 + }, + { + "epoch": 1.1212977160842548, + "grad_norm": 0.6746132969856262, + "learning_rate": 4.29978015785194e-05, + "loss": 2.4726, + "step": 13894 + }, + { + "epoch": 1.1213784198208376, + "grad_norm": 0.720781147480011, + "learning_rate": 4.298483130892392e-05, + "loss": 2.4445, + "step": 13895 + }, + { + "epoch": 1.1214591235574207, + "grad_norm": 0.6624416708946228, + "learning_rate": 4.297186246029549e-05, + "loss": 2.3868, + "step": 13896 + }, + { + "epoch": 1.1215398272940038, + "grad_norm": 0.7849127054214478, + "learning_rate": 4.295889503295731e-05, + "loss": 2.4479, + "step": 13897 + }, + { + "epoch": 1.1216205310305867, + "grad_norm": 0.6655337810516357, + "learning_rate": 4.294592902723259e-05, + "loss": 2.5093, + "step": 13898 + }, + { + "epoch": 1.1217012347671698, + "grad_norm": 0.7055402398109436, + "learning_rate": 4.293296444344445e-05, + "loss": 2.4385, + "step": 13899 + }, + { + "epoch": 1.1217819385037526, + "grad_norm": 0.7388767600059509, + "learning_rate": 4.2920001281916e-05, + "loss": 2.4863, + "step": 13900 + }, + { + "epoch": 1.1218626422403357, + "grad_norm": 0.6915223002433777, + "learning_rate": 4.2907039542970373e-05, + "loss": 2.4218, + "step": 13901 + }, + { + "epoch": 1.1219433459769188, + "grad_norm": 0.7124893665313721, + "learning_rate": 4.289407922693053e-05, + "loss": 2.4514, + "step": 13902 + }, + { + "epoch": 1.1220240497135017, + "grad_norm": 0.6552406549453735, + "learning_rate": 4.28811203341195e-05, + "loss": 2.4558, + "step": 13903 + }, + { + "epoch": 1.1221047534500848, + "grad_norm": 0.6641791462898254, + "learning_rate": 4.286816286486031e-05, + "loss": 2.4277, + "step": 13904 + }, + { + "epoch": 1.1221854571866678, + "grad_norm": 0.677733838558197, + "learning_rate": 4.285520681947579e-05, + "loss": 2.4861, + "step": 13905 + }, + { + "epoch": 1.1222661609232507, + "grad_norm": 0.6572888493537903, + "learning_rate": 4.284225219828891e-05, + "loss": 2.4657, + "step": 13906 + }, + { + "epoch": 1.1223468646598338, + "grad_norm": 0.6923860907554626, + "learning_rate": 4.2829299001622546e-05, + "loss": 2.4857, + "step": 13907 + }, + { + "epoch": 1.1224275683964167, + "grad_norm": 0.6971977949142456, + "learning_rate": 4.281634722979947e-05, + "loss": 2.4434, + "step": 13908 + }, + { + "epoch": 1.1225082721329998, + "grad_norm": 0.6828060746192932, + "learning_rate": 4.2803396883142456e-05, + "loss": 2.4342, + "step": 13909 + }, + { + "epoch": 1.1225889758695828, + "grad_norm": 0.7001270651817322, + "learning_rate": 4.279044796197438e-05, + "loss": 2.5222, + "step": 13910 + }, + { + "epoch": 1.1226696796061657, + "grad_norm": 0.6425578594207764, + "learning_rate": 4.277750046661785e-05, + "loss": 2.42, + "step": 13911 + }, + { + "epoch": 1.1227503833427488, + "grad_norm": 0.6498209834098816, + "learning_rate": 4.2764554397395585e-05, + "loss": 2.4448, + "step": 13912 + }, + { + "epoch": 1.1228310870793319, + "grad_norm": 0.6894031763076782, + "learning_rate": 4.275160975463025e-05, + "loss": 2.4508, + "step": 13913 + }, + { + "epoch": 1.1229117908159147, + "grad_norm": 0.7286608219146729, + "learning_rate": 4.273866653864448e-05, + "loss": 2.4557, + "step": 13914 + }, + { + "epoch": 1.1229924945524978, + "grad_norm": 0.753826379776001, + "learning_rate": 4.272572474976079e-05, + "loss": 2.4635, + "step": 13915 + }, + { + "epoch": 1.1230731982890807, + "grad_norm": 0.6715937256813049, + "learning_rate": 4.271278438830174e-05, + "loss": 2.5107, + "step": 13916 + }, + { + "epoch": 1.1231539020256638, + "grad_norm": 0.6833200454711914, + "learning_rate": 4.26998454545899e-05, + "loss": 2.4883, + "step": 13917 + }, + { + "epoch": 1.1232346057622469, + "grad_norm": 0.6763597130775452, + "learning_rate": 4.2686907948947666e-05, + "loss": 2.4178, + "step": 13918 + }, + { + "epoch": 1.1233153094988297, + "grad_norm": 0.7336227297782898, + "learning_rate": 4.26739718716975e-05, + "loss": 2.4542, + "step": 13919 + }, + { + "epoch": 1.1233960132354128, + "grad_norm": 0.6583260297775269, + "learning_rate": 4.2661037223161806e-05, + "loss": 2.3998, + "step": 13920 + }, + { + "epoch": 1.1234767169719957, + "grad_norm": 0.6444356441497803, + "learning_rate": 4.264810400366295e-05, + "loss": 2.4354, + "step": 13921 + }, + { + "epoch": 1.1235574207085788, + "grad_norm": 0.6786002516746521, + "learning_rate": 4.2635172213523255e-05, + "loss": 2.3989, + "step": 13922 + }, + { + "epoch": 1.1236381244451619, + "grad_norm": 0.6838372349739075, + "learning_rate": 4.262224185306507e-05, + "loss": 2.4431, + "step": 13923 + }, + { + "epoch": 1.1237188281817447, + "grad_norm": 0.7516793012619019, + "learning_rate": 4.260931292261056e-05, + "loss": 2.4373, + "step": 13924 + }, + { + "epoch": 1.1237995319183278, + "grad_norm": 0.6860260367393494, + "learning_rate": 4.2596385422481985e-05, + "loss": 2.4457, + "step": 13925 + }, + { + "epoch": 1.123880235654911, + "grad_norm": 0.6556448936462402, + "learning_rate": 4.2583459353001595e-05, + "loss": 2.4165, + "step": 13926 + }, + { + "epoch": 1.1239609393914938, + "grad_norm": 0.729131281375885, + "learning_rate": 4.257053471449144e-05, + "loss": 2.4124, + "step": 13927 + }, + { + "epoch": 1.1240416431280769, + "grad_norm": 0.6941910982131958, + "learning_rate": 4.2557611507273684e-05, + "loss": 2.4095, + "step": 13928 + }, + { + "epoch": 1.12412234686466, + "grad_norm": 0.6390536427497864, + "learning_rate": 4.25446897316704e-05, + "loss": 2.4221, + "step": 13929 + }, + { + "epoch": 1.1242030506012428, + "grad_norm": 0.7034881114959717, + "learning_rate": 4.253176938800365e-05, + "loss": 2.4685, + "step": 13930 + }, + { + "epoch": 1.124283754337826, + "grad_norm": 0.6975526809692383, + "learning_rate": 4.251885047659542e-05, + "loss": 2.4771, + "step": 13931 + }, + { + "epoch": 1.1243644580744088, + "grad_norm": 0.7020023465156555, + "learning_rate": 4.2505932997767695e-05, + "loss": 2.4746, + "step": 13932 + }, + { + "epoch": 1.1244451618109919, + "grad_norm": 0.7207093238830566, + "learning_rate": 4.2493016951842444e-05, + "loss": 2.4707, + "step": 13933 + }, + { + "epoch": 1.124525865547575, + "grad_norm": 0.7711251974105835, + "learning_rate": 4.24801023391415e-05, + "loss": 2.5104, + "step": 13934 + }, + { + "epoch": 1.1246065692841578, + "grad_norm": 0.7324040532112122, + "learning_rate": 4.246718915998677e-05, + "loss": 2.4257, + "step": 13935 + }, + { + "epoch": 1.124687273020741, + "grad_norm": 0.6532757878303528, + "learning_rate": 4.2454277414700116e-05, + "loss": 2.3708, + "step": 13936 + }, + { + "epoch": 1.1247679767573238, + "grad_norm": 0.6933012008666992, + "learning_rate": 4.244136710360325e-05, + "loss": 2.4985, + "step": 13937 + }, + { + "epoch": 1.1248486804939068, + "grad_norm": 0.6787589192390442, + "learning_rate": 4.242845822701798e-05, + "loss": 2.402, + "step": 13938 + }, + { + "epoch": 1.12492938423049, + "grad_norm": 0.6567786931991577, + "learning_rate": 4.241555078526602e-05, + "loss": 2.4295, + "step": 13939 + }, + { + "epoch": 1.1250100879670728, + "grad_norm": 0.6962547302246094, + "learning_rate": 4.2402644778669074e-05, + "loss": 2.4006, + "step": 13940 + }, + { + "epoch": 1.125090791703656, + "grad_norm": 0.7152721285820007, + "learning_rate": 4.238974020754877e-05, + "loss": 2.4757, + "step": 13941 + }, + { + "epoch": 1.125171495440239, + "grad_norm": 0.6869861483573914, + "learning_rate": 4.237683707222677e-05, + "loss": 2.3877, + "step": 13942 + }, + { + "epoch": 1.1252521991768218, + "grad_norm": 0.6951470971107483, + "learning_rate": 4.236393537302459e-05, + "loss": 2.3755, + "step": 13943 + }, + { + "epoch": 1.125332902913405, + "grad_norm": 0.6997567415237427, + "learning_rate": 4.2351035110263805e-05, + "loss": 2.4731, + "step": 13944 + }, + { + "epoch": 1.125413606649988, + "grad_norm": 0.6765854358673096, + "learning_rate": 4.23381362842659e-05, + "loss": 2.4004, + "step": 13945 + }, + { + "epoch": 1.1254943103865709, + "grad_norm": 0.7046722173690796, + "learning_rate": 4.2325238895352426e-05, + "loss": 2.4379, + "step": 13946 + }, + { + "epoch": 1.125575014123154, + "grad_norm": 0.6862985491752625, + "learning_rate": 4.231234294384472e-05, + "loss": 2.4614, + "step": 13947 + }, + { + "epoch": 1.1256557178597368, + "grad_norm": 0.6637778282165527, + "learning_rate": 4.229944843006422e-05, + "loss": 2.4412, + "step": 13948 + }, + { + "epoch": 1.12573642159632, + "grad_norm": 0.7042228579521179, + "learning_rate": 4.228655535433231e-05, + "loss": 2.4296, + "step": 13949 + }, + { + "epoch": 1.1258171253329028, + "grad_norm": 0.6767764687538147, + "learning_rate": 4.227366371697029e-05, + "loss": 2.409, + "step": 13950 + }, + { + "epoch": 1.1258978290694859, + "grad_norm": 0.6886798143386841, + "learning_rate": 4.226077351829948e-05, + "loss": 2.4786, + "step": 13951 + }, + { + "epoch": 1.125978532806069, + "grad_norm": 0.7723653316497803, + "learning_rate": 4.224788475864115e-05, + "loss": 2.4111, + "step": 13952 + }, + { + "epoch": 1.1260592365426518, + "grad_norm": 0.7614055275917053, + "learning_rate": 4.2234997438316473e-05, + "loss": 2.5055, + "step": 13953 + }, + { + "epoch": 1.126139940279235, + "grad_norm": 0.7195241451263428, + "learning_rate": 4.222211155764665e-05, + "loss": 2.411, + "step": 13954 + }, + { + "epoch": 1.126220644015818, + "grad_norm": 0.7130021452903748, + "learning_rate": 4.220922711695288e-05, + "loss": 2.4819, + "step": 13955 + }, + { + "epoch": 1.1263013477524009, + "grad_norm": 0.6972241401672363, + "learning_rate": 4.2196344116556194e-05, + "loss": 2.4611, + "step": 13956 + }, + { + "epoch": 1.126382051488984, + "grad_norm": 0.7023231387138367, + "learning_rate": 4.218346255677772e-05, + "loss": 2.4509, + "step": 13957 + }, + { + "epoch": 1.126462755225567, + "grad_norm": 0.6959301829338074, + "learning_rate": 4.2170582437938534e-05, + "loss": 2.4441, + "step": 13958 + }, + { + "epoch": 1.12654345896215, + "grad_norm": 0.7423149347305298, + "learning_rate": 4.2157703760359555e-05, + "loss": 2.4452, + "step": 13959 + }, + { + "epoch": 1.126624162698733, + "grad_norm": 0.6587820053100586, + "learning_rate": 4.214482652436177e-05, + "loss": 2.3936, + "step": 13960 + }, + { + "epoch": 1.1267048664353159, + "grad_norm": 0.6601768136024475, + "learning_rate": 4.213195073026618e-05, + "loss": 2.453, + "step": 13961 + }, + { + "epoch": 1.126785570171899, + "grad_norm": 0.6986891031265259, + "learning_rate": 4.2119076378393676e-05, + "loss": 2.452, + "step": 13962 + }, + { + "epoch": 1.126866273908482, + "grad_norm": 0.7207025289535522, + "learning_rate": 4.2106203469065055e-05, + "loss": 2.4048, + "step": 13963 + }, + { + "epoch": 1.126946977645065, + "grad_norm": 0.6731177568435669, + "learning_rate": 4.2093332002601184e-05, + "loss": 2.4573, + "step": 13964 + }, + { + "epoch": 1.127027681381648, + "grad_norm": 0.7330070734024048, + "learning_rate": 4.208046197932288e-05, + "loss": 2.4274, + "step": 13965 + }, + { + "epoch": 1.1271083851182309, + "grad_norm": 0.7008770704269409, + "learning_rate": 4.206759339955084e-05, + "loss": 2.4933, + "step": 13966 + }, + { + "epoch": 1.127189088854814, + "grad_norm": 0.8309584259986877, + "learning_rate": 4.20547262636058e-05, + "loss": 2.3857, + "step": 13967 + }, + { + "epoch": 1.127269792591397, + "grad_norm": 0.6705843210220337, + "learning_rate": 4.204186057180849e-05, + "loss": 2.4303, + "step": 13968 + }, + { + "epoch": 1.12735049632798, + "grad_norm": 0.7526851296424866, + "learning_rate": 4.202899632447949e-05, + "loss": 2.455, + "step": 13969 + }, + { + "epoch": 1.127431200064563, + "grad_norm": 0.6690995097160339, + "learning_rate": 4.201613352193943e-05, + "loss": 2.4398, + "step": 13970 + }, + { + "epoch": 1.127511903801146, + "grad_norm": 0.6946840286254883, + "learning_rate": 4.20032721645089e-05, + "loss": 2.4032, + "step": 13971 + }, + { + "epoch": 1.127592607537729, + "grad_norm": 0.7438863515853882, + "learning_rate": 4.1990412252508426e-05, + "loss": 2.4644, + "step": 13972 + }, + { + "epoch": 1.127673311274312, + "grad_norm": 0.6975359916687012, + "learning_rate": 4.197755378625852e-05, + "loss": 2.3991, + "step": 13973 + }, + { + "epoch": 1.1277540150108951, + "grad_norm": 0.6799279451370239, + "learning_rate": 4.196469676607968e-05, + "loss": 2.4328, + "step": 13974 + }, + { + "epoch": 1.127834718747478, + "grad_norm": 0.7014481425285339, + "learning_rate": 4.1951841192292274e-05, + "loss": 2.5045, + "step": 13975 + }, + { + "epoch": 1.127915422484061, + "grad_norm": 0.7074011564254761, + "learning_rate": 4.1938987065216716e-05, + "loss": 2.4583, + "step": 13976 + }, + { + "epoch": 1.127996126220644, + "grad_norm": 0.7246339917182922, + "learning_rate": 4.192613438517338e-05, + "loss": 2.447, + "step": 13977 + }, + { + "epoch": 1.128076829957227, + "grad_norm": 0.6757462620735168, + "learning_rate": 4.191328315248262e-05, + "loss": 2.4181, + "step": 13978 + }, + { + "epoch": 1.12815753369381, + "grad_norm": 0.6758493185043335, + "learning_rate": 4.1900433367464644e-05, + "loss": 2.4837, + "step": 13979 + }, + { + "epoch": 1.128238237430393, + "grad_norm": 0.6782165765762329, + "learning_rate": 4.1887585030439736e-05, + "loss": 2.3946, + "step": 13980 + }, + { + "epoch": 1.128318941166976, + "grad_norm": 0.7176415324211121, + "learning_rate": 4.187473814172812e-05, + "loss": 2.4538, + "step": 13981 + }, + { + "epoch": 1.128399644903559, + "grad_norm": 0.6636224985122681, + "learning_rate": 4.186189270164997e-05, + "loss": 2.4493, + "step": 13982 + }, + { + "epoch": 1.128480348640142, + "grad_norm": 0.6613143086433411, + "learning_rate": 4.184904871052544e-05, + "loss": 2.4994, + "step": 13983 + }, + { + "epoch": 1.128561052376725, + "grad_norm": 0.7148364186286926, + "learning_rate": 4.183620616867465e-05, + "loss": 2.4673, + "step": 13984 + }, + { + "epoch": 1.128641756113308, + "grad_norm": 0.6657952070236206, + "learning_rate": 4.1823365076417606e-05, + "loss": 2.3915, + "step": 13985 + }, + { + "epoch": 1.128722459849891, + "grad_norm": 0.7135687470436096, + "learning_rate": 4.181052543407439e-05, + "loss": 2.4961, + "step": 13986 + }, + { + "epoch": 1.1288031635864741, + "grad_norm": 0.7245377898216248, + "learning_rate": 4.179768724196501e-05, + "loss": 2.4519, + "step": 13987 + }, + { + "epoch": 1.128883867323057, + "grad_norm": 0.6832938194274902, + "learning_rate": 4.1784850500409376e-05, + "loss": 2.4471, + "step": 13988 + }, + { + "epoch": 1.12896457105964, + "grad_norm": 0.7303032279014587, + "learning_rate": 4.177201520972746e-05, + "loss": 2.3906, + "step": 13989 + }, + { + "epoch": 1.1290452747962232, + "grad_norm": 0.698581874370575, + "learning_rate": 4.175918137023911e-05, + "loss": 2.4667, + "step": 13990 + }, + { + "epoch": 1.129125978532806, + "grad_norm": 0.69133061170578, + "learning_rate": 4.174634898226422e-05, + "loss": 2.4285, + "step": 13991 + }, + { + "epoch": 1.1292066822693891, + "grad_norm": 0.7029501795768738, + "learning_rate": 4.1733518046122576e-05, + "loss": 2.4839, + "step": 13992 + }, + { + "epoch": 1.129287386005972, + "grad_norm": 0.7566521167755127, + "learning_rate": 4.172068856213398e-05, + "loss": 2.5019, + "step": 13993 + }, + { + "epoch": 1.129368089742555, + "grad_norm": 0.697998046875, + "learning_rate": 4.1707860530618204e-05, + "loss": 2.4305, + "step": 13994 + }, + { + "epoch": 1.1294487934791382, + "grad_norm": 0.674194872379303, + "learning_rate": 4.169503395189489e-05, + "loss": 2.4361, + "step": 13995 + }, + { + "epoch": 1.129529497215721, + "grad_norm": 0.6936436891555786, + "learning_rate": 4.168220882628373e-05, + "loss": 2.518, + "step": 13996 + }, + { + "epoch": 1.1296102009523041, + "grad_norm": 0.6831670999526978, + "learning_rate": 4.166938515410442e-05, + "loss": 2.4197, + "step": 13997 + }, + { + "epoch": 1.129690904688887, + "grad_norm": 0.7323662638664246, + "learning_rate": 4.165656293567647e-05, + "loss": 2.4555, + "step": 13998 + }, + { + "epoch": 1.12977160842547, + "grad_norm": 0.7699782848358154, + "learning_rate": 4.164374217131948e-05, + "loss": 2.4456, + "step": 13999 + }, + { + "epoch": 1.1298523121620532, + "grad_norm": 0.7009051442146301, + "learning_rate": 4.163092286135297e-05, + "loss": 2.4429, + "step": 14000 + }, + { + "epoch": 1.1298523121620532, + "eval_loss": 2.4034411907196045, + "eval_runtime": 771.1158, + "eval_samples_per_second": 3.398, + "eval_steps_per_second": 0.567, + "step": 14000 + }, + { + "epoch": 1.129933015898636, + "grad_norm": 0.674665093421936, + "learning_rate": 4.1618105006096456e-05, + "loss": 2.4127, + "step": 14001 + }, + { + "epoch": 1.1300137196352191, + "grad_norm": 0.7332403659820557, + "learning_rate": 4.1605288605869365e-05, + "loss": 2.4854, + "step": 14002 + }, + { + "epoch": 1.1300944233718022, + "grad_norm": 0.70233553647995, + "learning_rate": 4.159247366099117e-05, + "loss": 2.4433, + "step": 14003 + }, + { + "epoch": 1.130175127108385, + "grad_norm": 0.6259445548057556, + "learning_rate": 4.157966017178118e-05, + "loss": 2.3605, + "step": 14004 + }, + { + "epoch": 1.1302558308449682, + "grad_norm": 0.717408299446106, + "learning_rate": 4.1566848138558755e-05, + "loss": 2.4378, + "step": 14005 + }, + { + "epoch": 1.130336534581551, + "grad_norm": 0.6973297595977783, + "learning_rate": 4.155403756164323e-05, + "loss": 2.4363, + "step": 14006 + }, + { + "epoch": 1.1304172383181341, + "grad_norm": 0.7204940915107727, + "learning_rate": 4.154122844135391e-05, + "loss": 2.4814, + "step": 14007 + }, + { + "epoch": 1.1304979420547172, + "grad_norm": 0.8976696133613586, + "learning_rate": 4.1528420778009935e-05, + "loss": 2.4654, + "step": 14008 + }, + { + "epoch": 1.1305786457913, + "grad_norm": 0.7270354628562927, + "learning_rate": 4.151561457193057e-05, + "loss": 2.4088, + "step": 14009 + }, + { + "epoch": 1.1306593495278832, + "grad_norm": 0.7200367450714111, + "learning_rate": 4.1502809823434985e-05, + "loss": 2.4412, + "step": 14010 + }, + { + "epoch": 1.130740053264466, + "grad_norm": 0.7593986392021179, + "learning_rate": 4.149000653284227e-05, + "loss": 2.5058, + "step": 14011 + }, + { + "epoch": 1.1308207570010491, + "grad_norm": 0.7322795987129211, + "learning_rate": 4.147720470047155e-05, + "loss": 2.4899, + "step": 14012 + }, + { + "epoch": 1.1309014607376322, + "grad_norm": 0.6649030447006226, + "learning_rate": 4.1464404326641905e-05, + "loss": 2.4358, + "step": 14013 + }, + { + "epoch": 1.130982164474215, + "grad_norm": 0.7258814573287964, + "learning_rate": 4.145160541167228e-05, + "loss": 2.4732, + "step": 14014 + }, + { + "epoch": 1.1310628682107982, + "grad_norm": 0.7414976358413696, + "learning_rate": 4.1438807955881695e-05, + "loss": 2.4157, + "step": 14015 + }, + { + "epoch": 1.1311435719473812, + "grad_norm": 0.6813236474990845, + "learning_rate": 4.142601195958914e-05, + "loss": 2.3966, + "step": 14016 + }, + { + "epoch": 1.131224275683964, + "grad_norm": 0.6715923547744751, + "learning_rate": 4.141321742311344e-05, + "loss": 2.4358, + "step": 14017 + }, + { + "epoch": 1.1313049794205472, + "grad_norm": 0.7174912691116333, + "learning_rate": 4.14004243467735e-05, + "loss": 2.4838, + "step": 14018 + }, + { + "epoch": 1.1313856831571303, + "grad_norm": 0.6945109963417053, + "learning_rate": 4.138763273088821e-05, + "loss": 2.4674, + "step": 14019 + }, + { + "epoch": 1.1314663868937131, + "grad_norm": 0.6759494543075562, + "learning_rate": 4.137484257577629e-05, + "loss": 2.4659, + "step": 14020 + }, + { + "epoch": 1.1315470906302962, + "grad_norm": 0.7077876925468445, + "learning_rate": 4.1362053881756534e-05, + "loss": 2.4731, + "step": 14021 + }, + { + "epoch": 1.131627794366879, + "grad_norm": 0.6769500970840454, + "learning_rate": 4.1349266649147654e-05, + "loss": 2.3606, + "step": 14022 + }, + { + "epoch": 1.1317084981034622, + "grad_norm": 0.7104208469390869, + "learning_rate": 4.1336480878268424e-05, + "loss": 2.4626, + "step": 14023 + }, + { + "epoch": 1.1317892018400453, + "grad_norm": 0.7102686762809753, + "learning_rate": 4.132369656943741e-05, + "loss": 2.4545, + "step": 14024 + }, + { + "epoch": 1.1318699055766281, + "grad_norm": 0.7773897647857666, + "learning_rate": 4.1310913722973256e-05, + "loss": 2.5107, + "step": 14025 + }, + { + "epoch": 1.1319506093132112, + "grad_norm": 0.6427130103111267, + "learning_rate": 4.1298132339194585e-05, + "loss": 2.4349, + "step": 14026 + }, + { + "epoch": 1.132031313049794, + "grad_norm": 0.6725162863731384, + "learning_rate": 4.128535241841987e-05, + "loss": 2.4566, + "step": 14027 + }, + { + "epoch": 1.1321120167863772, + "grad_norm": 0.7182251214981079, + "learning_rate": 4.127257396096764e-05, + "loss": 2.4472, + "step": 14028 + }, + { + "epoch": 1.1321927205229603, + "grad_norm": 0.6712302565574646, + "learning_rate": 4.1259796967156426e-05, + "loss": 2.4326, + "step": 14029 + }, + { + "epoch": 1.1322734242595431, + "grad_norm": 0.7726041078567505, + "learning_rate": 4.124702143730459e-05, + "loss": 2.4994, + "step": 14030 + }, + { + "epoch": 1.1323541279961262, + "grad_norm": 0.651899516582489, + "learning_rate": 4.123424737173056e-05, + "loss": 2.4244, + "step": 14031 + }, + { + "epoch": 1.1324348317327093, + "grad_norm": 0.6646261215209961, + "learning_rate": 4.12214747707527e-05, + "loss": 2.5027, + "step": 14032 + }, + { + "epoch": 1.1325155354692922, + "grad_norm": 0.729098916053772, + "learning_rate": 4.120870363468933e-05, + "loss": 2.5117, + "step": 14033 + }, + { + "epoch": 1.1325962392058753, + "grad_norm": 0.7056638598442078, + "learning_rate": 4.119593396385876e-05, + "loss": 2.4279, + "step": 14034 + }, + { + "epoch": 1.1326769429424584, + "grad_norm": 0.7051844000816345, + "learning_rate": 4.1183165758579255e-05, + "loss": 2.3844, + "step": 14035 + }, + { + "epoch": 1.1327576466790412, + "grad_norm": 0.6954311728477478, + "learning_rate": 4.1170399019168984e-05, + "loss": 2.4041, + "step": 14036 + }, + { + "epoch": 1.1328383504156243, + "grad_norm": 0.650044858455658, + "learning_rate": 4.1157633745946135e-05, + "loss": 2.4397, + "step": 14037 + }, + { + "epoch": 1.1329190541522072, + "grad_norm": 0.6974380016326904, + "learning_rate": 4.114486993922888e-05, + "loss": 2.4391, + "step": 14038 + }, + { + "epoch": 1.1329997578887903, + "grad_norm": 0.7252807021141052, + "learning_rate": 4.113210759933536e-05, + "loss": 2.4471, + "step": 14039 + }, + { + "epoch": 1.1330804616253733, + "grad_norm": 0.7001414895057678, + "learning_rate": 4.111934672658354e-05, + "loss": 2.402, + "step": 14040 + }, + { + "epoch": 1.1331611653619562, + "grad_norm": 0.7420533895492554, + "learning_rate": 4.110658732129153e-05, + "loss": 2.4987, + "step": 14041 + }, + { + "epoch": 1.1332418690985393, + "grad_norm": 0.6850644946098328, + "learning_rate": 4.1093829383777315e-05, + "loss": 2.4355, + "step": 14042 + }, + { + "epoch": 1.1333225728351222, + "grad_norm": 0.6905977725982666, + "learning_rate": 4.108107291435885e-05, + "loss": 2.4818, + "step": 14043 + }, + { + "epoch": 1.1334032765717053, + "grad_norm": 0.6555112600326538, + "learning_rate": 4.106831791335407e-05, + "loss": 2.425, + "step": 14044 + }, + { + "epoch": 1.1334839803082883, + "grad_norm": 0.6570355892181396, + "learning_rate": 4.105556438108089e-05, + "loss": 2.4232, + "step": 14045 + }, + { + "epoch": 1.1335646840448712, + "grad_norm": 0.7910747528076172, + "learning_rate": 4.104281231785708e-05, + "loss": 2.484, + "step": 14046 + }, + { + "epoch": 1.1336453877814543, + "grad_norm": 0.6581952571868896, + "learning_rate": 4.103006172400052e-05, + "loss": 2.4102, + "step": 14047 + }, + { + "epoch": 1.1337260915180374, + "grad_norm": 0.6834773421287537, + "learning_rate": 4.1017312599828994e-05, + "loss": 2.4602, + "step": 14048 + }, + { + "epoch": 1.1338067952546202, + "grad_norm": 0.7588350772857666, + "learning_rate": 4.1004564945660195e-05, + "loss": 2.5059, + "step": 14049 + }, + { + "epoch": 1.1338874989912033, + "grad_norm": 0.6604699492454529, + "learning_rate": 4.099181876181185e-05, + "loss": 2.4403, + "step": 14050 + }, + { + "epoch": 1.1339682027277862, + "grad_norm": 0.6957669258117676, + "learning_rate": 4.097907404860163e-05, + "loss": 2.4218, + "step": 14051 + }, + { + "epoch": 1.1340489064643693, + "grad_norm": 0.7091849446296692, + "learning_rate": 4.0966330806347166e-05, + "loss": 2.4396, + "step": 14052 + }, + { + "epoch": 1.1341296102009524, + "grad_norm": 0.6637482047080994, + "learning_rate": 4.095358903536605e-05, + "loss": 2.4514, + "step": 14053 + }, + { + "epoch": 1.1342103139375352, + "grad_norm": 0.7485960125923157, + "learning_rate": 4.0940848735975846e-05, + "loss": 2.4401, + "step": 14054 + }, + { + "epoch": 1.1342910176741183, + "grad_norm": 0.6509774327278137, + "learning_rate": 4.092810990849411e-05, + "loss": 2.4575, + "step": 14055 + }, + { + "epoch": 1.1343717214107012, + "grad_norm": 0.7151626348495483, + "learning_rate": 4.091537255323825e-05, + "loss": 2.45, + "step": 14056 + }, + { + "epoch": 1.1344524251472843, + "grad_norm": 0.7536267042160034, + "learning_rate": 4.0902636670525764e-05, + "loss": 2.497, + "step": 14057 + }, + { + "epoch": 1.1345331288838674, + "grad_norm": 0.7779545783996582, + "learning_rate": 4.0889902260674086e-05, + "loss": 2.412, + "step": 14058 + }, + { + "epoch": 1.1346138326204502, + "grad_norm": 0.7211748957633972, + "learning_rate": 4.087716932400052e-05, + "loss": 2.4727, + "step": 14059 + }, + { + "epoch": 1.1346945363570333, + "grad_norm": 0.6710701584815979, + "learning_rate": 4.086443786082245e-05, + "loss": 2.4318, + "step": 14060 + }, + { + "epoch": 1.1347752400936164, + "grad_norm": 0.7072857022285461, + "learning_rate": 4.085170787145717e-05, + "loss": 2.4672, + "step": 14061 + }, + { + "epoch": 1.1348559438301993, + "grad_norm": 0.6475152969360352, + "learning_rate": 4.083897935622194e-05, + "loss": 2.4104, + "step": 14062 + }, + { + "epoch": 1.1349366475667824, + "grad_norm": 0.7408067584037781, + "learning_rate": 4.0826252315433986e-05, + "loss": 2.4129, + "step": 14063 + }, + { + "epoch": 1.1350173513033655, + "grad_norm": 0.732540488243103, + "learning_rate": 4.081352674941056e-05, + "loss": 2.4209, + "step": 14064 + }, + { + "epoch": 1.1350980550399483, + "grad_norm": 0.6933332681655884, + "learning_rate": 4.080080265846872e-05, + "loss": 2.3797, + "step": 14065 + }, + { + "epoch": 1.1351787587765314, + "grad_norm": 0.6507896780967712, + "learning_rate": 4.078808004292561e-05, + "loss": 2.4372, + "step": 14066 + }, + { + "epoch": 1.1352594625131143, + "grad_norm": 0.729292094707489, + "learning_rate": 4.0775358903098384e-05, + "loss": 2.5513, + "step": 14067 + }, + { + "epoch": 1.1353401662496974, + "grad_norm": 0.692757248878479, + "learning_rate": 4.076263923930398e-05, + "loss": 2.4228, + "step": 14068 + }, + { + "epoch": 1.1354208699862804, + "grad_norm": 0.7028260231018066, + "learning_rate": 4.074992105185946e-05, + "loss": 2.4478, + "step": 14069 + }, + { + "epoch": 1.1355015737228633, + "grad_norm": 0.65067058801651, + "learning_rate": 4.073720434108179e-05, + "loss": 2.3729, + "step": 14070 + }, + { + "epoch": 1.1355822774594464, + "grad_norm": 0.6884061098098755, + "learning_rate": 4.0724489107287933e-05, + "loss": 2.3693, + "step": 14071 + }, + { + "epoch": 1.1356629811960293, + "grad_norm": 0.70686936378479, + "learning_rate": 4.071177535079472e-05, + "loss": 2.4989, + "step": 14072 + }, + { + "epoch": 1.1357436849326124, + "grad_norm": 0.6792482733726501, + "learning_rate": 4.0699063071919016e-05, + "loss": 2.393, + "step": 14073 + }, + { + "epoch": 1.1358243886691954, + "grad_norm": 0.7231085896492004, + "learning_rate": 4.0686352270977745e-05, + "loss": 2.4597, + "step": 14074 + }, + { + "epoch": 1.1359050924057783, + "grad_norm": 0.8024532198905945, + "learning_rate": 4.067364294828758e-05, + "loss": 2.4409, + "step": 14075 + }, + { + "epoch": 1.1359857961423614, + "grad_norm": 0.6761424541473389, + "learning_rate": 4.066093510416532e-05, + "loss": 2.4598, + "step": 14076 + }, + { + "epoch": 1.1360664998789445, + "grad_norm": 0.7075559496879578, + "learning_rate": 4.064822873892771e-05, + "loss": 2.4649, + "step": 14077 + }, + { + "epoch": 1.1361472036155273, + "grad_norm": 0.6292272806167603, + "learning_rate": 4.063552385289134e-05, + "loss": 2.445, + "step": 14078 + }, + { + "epoch": 1.1362279073521104, + "grad_norm": 0.6435273885726929, + "learning_rate": 4.06228204463729e-05, + "loss": 2.4105, + "step": 14079 + }, + { + "epoch": 1.1363086110886935, + "grad_norm": 0.7135637402534485, + "learning_rate": 4.061011851968903e-05, + "loss": 2.3907, + "step": 14080 + }, + { + "epoch": 1.1363893148252764, + "grad_norm": 0.7424013614654541, + "learning_rate": 4.059741807315621e-05, + "loss": 2.4405, + "step": 14081 + }, + { + "epoch": 1.1364700185618595, + "grad_norm": 0.6649916768074036, + "learning_rate": 4.0584719107091016e-05, + "loss": 2.4314, + "step": 14082 + }, + { + "epoch": 1.1365507222984423, + "grad_norm": 0.6700563430786133, + "learning_rate": 4.0572021621809944e-05, + "loss": 2.4093, + "step": 14083 + }, + { + "epoch": 1.1366314260350254, + "grad_norm": 0.6740709543228149, + "learning_rate": 4.055932561762942e-05, + "loss": 2.4301, + "step": 14084 + }, + { + "epoch": 1.1367121297716085, + "grad_norm": 0.7039555907249451, + "learning_rate": 4.0546631094865895e-05, + "loss": 2.4427, + "step": 14085 + }, + { + "epoch": 1.1367928335081914, + "grad_norm": 0.7461164593696594, + "learning_rate": 4.053393805383573e-05, + "loss": 2.3865, + "step": 14086 + }, + { + "epoch": 1.1368735372447745, + "grad_norm": 0.6808290481567383, + "learning_rate": 4.0521246494855316e-05, + "loss": 2.3738, + "step": 14087 + }, + { + "epoch": 1.1369542409813573, + "grad_norm": 0.6942760944366455, + "learning_rate": 4.0508556418240875e-05, + "loss": 2.4351, + "step": 14088 + }, + { + "epoch": 1.1370349447179404, + "grad_norm": 0.7615510821342468, + "learning_rate": 4.049586782430872e-05, + "loss": 2.3968, + "step": 14089 + }, + { + "epoch": 1.1371156484545235, + "grad_norm": 0.7240662574768066, + "learning_rate": 4.048318071337512e-05, + "loss": 2.4046, + "step": 14090 + }, + { + "epoch": 1.1371963521911064, + "grad_norm": 0.7286471128463745, + "learning_rate": 4.047049508575621e-05, + "loss": 2.4039, + "step": 14091 + }, + { + "epoch": 1.1372770559276895, + "grad_norm": 0.7031459212303162, + "learning_rate": 4.045781094176816e-05, + "loss": 2.4494, + "step": 14092 + }, + { + "epoch": 1.1373577596642725, + "grad_norm": 0.7116301655769348, + "learning_rate": 4.0445128281727116e-05, + "loss": 2.3991, + "step": 14093 + }, + { + "epoch": 1.1374384634008554, + "grad_norm": 0.6719788312911987, + "learning_rate": 4.043244710594914e-05, + "loss": 2.4823, + "step": 14094 + }, + { + "epoch": 1.1375191671374385, + "grad_norm": 0.6770508885383606, + "learning_rate": 4.041976741475031e-05, + "loss": 2.4362, + "step": 14095 + }, + { + "epoch": 1.1375998708740216, + "grad_norm": 0.6808609962463379, + "learning_rate": 4.040708920844666e-05, + "loss": 2.435, + "step": 14096 + }, + { + "epoch": 1.1376805746106045, + "grad_norm": 0.7445514798164368, + "learning_rate": 4.0394412487354074e-05, + "loss": 2.4749, + "step": 14097 + }, + { + "epoch": 1.1377612783471875, + "grad_norm": 0.7024775743484497, + "learning_rate": 4.038173725178854e-05, + "loss": 2.4354, + "step": 14098 + }, + { + "epoch": 1.1378419820837704, + "grad_norm": 0.6925685405731201, + "learning_rate": 4.0369063502066e-05, + "loss": 2.4462, + "step": 14099 + }, + { + "epoch": 1.1379226858203535, + "grad_norm": 0.6970539689064026, + "learning_rate": 4.035639123850223e-05, + "loss": 2.3842, + "step": 14100 + }, + { + "epoch": 1.1380033895569364, + "grad_norm": 0.6571836471557617, + "learning_rate": 4.0343720461413107e-05, + "loss": 2.4213, + "step": 14101 + }, + { + "epoch": 1.1380840932935194, + "grad_norm": 0.7264918684959412, + "learning_rate": 4.033105117111441e-05, + "loss": 2.4697, + "step": 14102 + }, + { + "epoch": 1.1381647970301025, + "grad_norm": 0.6929560899734497, + "learning_rate": 4.03183833679219e-05, + "loss": 2.461, + "step": 14103 + }, + { + "epoch": 1.1382455007666854, + "grad_norm": 0.6533559560775757, + "learning_rate": 4.030571705215128e-05, + "loss": 2.4336, + "step": 14104 + }, + { + "epoch": 1.1383262045032685, + "grad_norm": 0.7372364401817322, + "learning_rate": 4.0293052224118234e-05, + "loss": 2.4396, + "step": 14105 + }, + { + "epoch": 1.1384069082398516, + "grad_norm": 0.6736310720443726, + "learning_rate": 4.028038888413844e-05, + "loss": 2.4123, + "step": 14106 + }, + { + "epoch": 1.1384876119764344, + "grad_norm": 0.6898338794708252, + "learning_rate": 4.026772703252742e-05, + "loss": 2.431, + "step": 14107 + }, + { + "epoch": 1.1385683157130175, + "grad_norm": 0.7933369278907776, + "learning_rate": 4.02550666696008e-05, + "loss": 2.4669, + "step": 14108 + }, + { + "epoch": 1.1386490194496006, + "grad_norm": 0.7218122482299805, + "learning_rate": 4.024240779567412e-05, + "loss": 2.3761, + "step": 14109 + }, + { + "epoch": 1.1387297231861835, + "grad_norm": 0.7018248438835144, + "learning_rate": 4.022975041106281e-05, + "loss": 2.4011, + "step": 14110 + }, + { + "epoch": 1.1388104269227666, + "grad_norm": 0.6709668040275574, + "learning_rate": 4.0217094516082364e-05, + "loss": 2.426, + "step": 14111 + }, + { + "epoch": 1.1388911306593494, + "grad_norm": 0.7241504192352295, + "learning_rate": 4.0204440111048195e-05, + "loss": 2.4085, + "step": 14112 + }, + { + "epoch": 1.1389718343959325, + "grad_norm": 0.731347382068634, + "learning_rate": 4.0191787196275675e-05, + "loss": 2.502, + "step": 14113 + }, + { + "epoch": 1.1390525381325156, + "grad_norm": 0.6630167365074158, + "learning_rate": 4.0179135772080166e-05, + "loss": 2.3999, + "step": 14114 + }, + { + "epoch": 1.1391332418690985, + "grad_norm": 0.7094748616218567, + "learning_rate": 4.016648583877698e-05, + "loss": 2.4666, + "step": 14115 + }, + { + "epoch": 1.1392139456056816, + "grad_norm": 0.7262436151504517, + "learning_rate": 4.0153837396681395e-05, + "loss": 2.4369, + "step": 14116 + }, + { + "epoch": 1.1392946493422644, + "grad_norm": 0.6796039938926697, + "learning_rate": 4.014119044610859e-05, + "loss": 2.4607, + "step": 14117 + }, + { + "epoch": 1.1393753530788475, + "grad_norm": 0.6690036058425903, + "learning_rate": 4.0128544987373785e-05, + "loss": 2.4145, + "step": 14118 + }, + { + "epoch": 1.1394560568154306, + "grad_norm": 0.6987181305885315, + "learning_rate": 4.011590102079219e-05, + "loss": 2.4294, + "step": 14119 + }, + { + "epoch": 1.1395367605520135, + "grad_norm": 0.6756789684295654, + "learning_rate": 4.0103258546678836e-05, + "loss": 2.396, + "step": 14120 + }, + { + "epoch": 1.1396174642885966, + "grad_norm": 0.7027772068977356, + "learning_rate": 4.009061756534885e-05, + "loss": 2.3971, + "step": 14121 + }, + { + "epoch": 1.1396981680251796, + "grad_norm": 0.6872174143791199, + "learning_rate": 4.007797807711732e-05, + "loss": 2.4297, + "step": 14122 + }, + { + "epoch": 1.1397788717617625, + "grad_norm": 0.7213007211685181, + "learning_rate": 4.006534008229914e-05, + "loss": 2.4792, + "step": 14123 + }, + { + "epoch": 1.1398595754983456, + "grad_norm": 0.6771649122238159, + "learning_rate": 4.0052703581209395e-05, + "loss": 2.4397, + "step": 14124 + }, + { + "epoch": 1.1399402792349287, + "grad_norm": 0.6577184796333313, + "learning_rate": 4.0040068574163013e-05, + "loss": 2.4113, + "step": 14125 + }, + { + "epoch": 1.1400209829715116, + "grad_norm": 0.7493160367012024, + "learning_rate": 4.002743506147483e-05, + "loss": 2.4454, + "step": 14126 + }, + { + "epoch": 1.1401016867080946, + "grad_norm": 0.6820357441902161, + "learning_rate": 4.0014803043459726e-05, + "loss": 2.4126, + "step": 14127 + }, + { + "epoch": 1.1401823904446775, + "grad_norm": 0.7177188992500305, + "learning_rate": 4.000217252043258e-05, + "loss": 2.4355, + "step": 14128 + }, + { + "epoch": 1.1402630941812606, + "grad_norm": 0.654371440410614, + "learning_rate": 3.998954349270808e-05, + "loss": 2.4932, + "step": 14129 + }, + { + "epoch": 1.1403437979178437, + "grad_norm": 0.7029837965965271, + "learning_rate": 3.997691596060104e-05, + "loss": 2.4341, + "step": 14130 + }, + { + "epoch": 1.1404245016544265, + "grad_norm": 0.7971171140670776, + "learning_rate": 3.996428992442615e-05, + "loss": 2.4466, + "step": 14131 + }, + { + "epoch": 1.1405052053910096, + "grad_norm": 0.6941849589347839, + "learning_rate": 3.9951665384498114e-05, + "loss": 2.4861, + "step": 14132 + }, + { + "epoch": 1.1405859091275925, + "grad_norm": 0.6657733917236328, + "learning_rate": 3.993904234113153e-05, + "loss": 2.4266, + "step": 14133 + }, + { + "epoch": 1.1406666128641756, + "grad_norm": 0.6780329346656799, + "learning_rate": 3.9926420794641e-05, + "loss": 2.458, + "step": 14134 + }, + { + "epoch": 1.1407473166007587, + "grad_norm": 0.7070702910423279, + "learning_rate": 3.991380074534109e-05, + "loss": 2.368, + "step": 14135 + }, + { + "epoch": 1.1408280203373415, + "grad_norm": 0.7186575531959534, + "learning_rate": 3.990118219354635e-05, + "loss": 2.4611, + "step": 14136 + }, + { + "epoch": 1.1409087240739246, + "grad_norm": 0.7171763777732849, + "learning_rate": 3.988856513957123e-05, + "loss": 2.4315, + "step": 14137 + }, + { + "epoch": 1.1409894278105077, + "grad_norm": 0.7090228796005249, + "learning_rate": 3.987594958373025e-05, + "loss": 2.4668, + "step": 14138 + }, + { + "epoch": 1.1410701315470906, + "grad_norm": 0.6523951888084412, + "learning_rate": 3.986333552633773e-05, + "loss": 2.4392, + "step": 14139 + }, + { + "epoch": 1.1411508352836737, + "grad_norm": 0.706000804901123, + "learning_rate": 3.98507229677081e-05, + "loss": 2.4382, + "step": 14140 + }, + { + "epoch": 1.1412315390202568, + "grad_norm": 0.6537537574768066, + "learning_rate": 3.983811190815571e-05, + "loss": 2.456, + "step": 14141 + }, + { + "epoch": 1.1413122427568396, + "grad_norm": 0.7509549856185913, + "learning_rate": 3.982550234799479e-05, + "loss": 2.4744, + "step": 14142 + }, + { + "epoch": 1.1413929464934227, + "grad_norm": 0.7188650965690613, + "learning_rate": 3.981289428753967e-05, + "loss": 2.4632, + "step": 14143 + }, + { + "epoch": 1.1414736502300056, + "grad_norm": 0.7563674449920654, + "learning_rate": 3.9800287727104544e-05, + "loss": 2.5063, + "step": 14144 + }, + { + "epoch": 1.1415543539665887, + "grad_norm": 0.8374128341674805, + "learning_rate": 3.978768266700361e-05, + "loss": 2.4942, + "step": 14145 + }, + { + "epoch": 1.1416350577031718, + "grad_norm": 0.7020177841186523, + "learning_rate": 3.9775079107551027e-05, + "loss": 2.4404, + "step": 14146 + }, + { + "epoch": 1.1417157614397546, + "grad_norm": 0.7326170802116394, + "learning_rate": 3.9762477049060895e-05, + "loss": 2.4127, + "step": 14147 + }, + { + "epoch": 1.1417964651763377, + "grad_norm": 0.6661173105239868, + "learning_rate": 3.974987649184734e-05, + "loss": 2.4649, + "step": 14148 + }, + { + "epoch": 1.1418771689129206, + "grad_norm": 0.7186033129692078, + "learning_rate": 3.973727743622432e-05, + "loss": 2.4275, + "step": 14149 + }, + { + "epoch": 1.1419578726495037, + "grad_norm": 0.7193881869316101, + "learning_rate": 3.972467988250588e-05, + "loss": 2.4997, + "step": 14150 + }, + { + "epoch": 1.1420385763860867, + "grad_norm": 0.7139542102813721, + "learning_rate": 3.971208383100601e-05, + "loss": 2.4211, + "step": 14151 + }, + { + "epoch": 1.1421192801226696, + "grad_norm": 0.6840166449546814, + "learning_rate": 3.969948928203856e-05, + "loss": 2.4504, + "step": 14152 + }, + { + "epoch": 1.1421999838592527, + "grad_norm": 0.8261072039604187, + "learning_rate": 3.968689623591747e-05, + "loss": 2.4901, + "step": 14153 + }, + { + "epoch": 1.1422806875958358, + "grad_norm": 0.7636086940765381, + "learning_rate": 3.96743046929566e-05, + "loss": 2.4202, + "step": 14154 + }, + { + "epoch": 1.1423613913324187, + "grad_norm": 0.7477976679801941, + "learning_rate": 3.966171465346973e-05, + "loss": 2.492, + "step": 14155 + }, + { + "epoch": 1.1424420950690017, + "grad_norm": 0.7516389489173889, + "learning_rate": 3.9649126117770665e-05, + "loss": 2.4512, + "step": 14156 + }, + { + "epoch": 1.1425227988055846, + "grad_norm": 0.6987521648406982, + "learning_rate": 3.9636539086173174e-05, + "loss": 2.4005, + "step": 14157 + }, + { + "epoch": 1.1426035025421677, + "grad_norm": 0.7242532968521118, + "learning_rate": 3.962395355899088e-05, + "loss": 2.4414, + "step": 14158 + }, + { + "epoch": 1.1426842062787508, + "grad_norm": 0.6616180539131165, + "learning_rate": 3.961136953653749e-05, + "loss": 2.4442, + "step": 14159 + }, + { + "epoch": 1.1427649100153336, + "grad_norm": 0.7165415287017822, + "learning_rate": 3.959878701912667e-05, + "loss": 2.4658, + "step": 14160 + }, + { + "epoch": 1.1428456137519167, + "grad_norm": 0.6619318127632141, + "learning_rate": 3.9586206007071926e-05, + "loss": 2.3803, + "step": 14161 + }, + { + "epoch": 1.1429263174884996, + "grad_norm": 0.6654838919639587, + "learning_rate": 3.957362650068684e-05, + "loss": 2.4584, + "step": 14162 + }, + { + "epoch": 1.1430070212250827, + "grad_norm": 0.6947140097618103, + "learning_rate": 3.956104850028496e-05, + "loss": 2.4236, + "step": 14163 + }, + { + "epoch": 1.1430877249616658, + "grad_norm": 0.6510412096977234, + "learning_rate": 3.954847200617973e-05, + "loss": 2.3589, + "step": 14164 + }, + { + "epoch": 1.1431684286982486, + "grad_norm": 0.7550667524337769, + "learning_rate": 3.95358970186846e-05, + "loss": 2.419, + "step": 14165 + }, + { + "epoch": 1.1432491324348317, + "grad_norm": 0.7898361682891846, + "learning_rate": 3.9523323538112975e-05, + "loss": 2.4549, + "step": 14166 + }, + { + "epoch": 1.1433298361714148, + "grad_norm": 0.7162390947341919, + "learning_rate": 3.9510751564778246e-05, + "loss": 2.4493, + "step": 14167 + }, + { + "epoch": 1.1434105399079977, + "grad_norm": 0.8251990079879761, + "learning_rate": 3.949818109899367e-05, + "loss": 2.4474, + "step": 14168 + }, + { + "epoch": 1.1434912436445808, + "grad_norm": 0.6739209890365601, + "learning_rate": 3.948561214107258e-05, + "loss": 2.4564, + "step": 14169 + }, + { + "epoch": 1.1435719473811639, + "grad_norm": 0.6606340408325195, + "learning_rate": 3.9473044691328254e-05, + "loss": 2.3838, + "step": 14170 + }, + { + "epoch": 1.1436526511177467, + "grad_norm": 0.7297452092170715, + "learning_rate": 3.946047875007384e-05, + "loss": 2.4673, + "step": 14171 + }, + { + "epoch": 1.1437333548543298, + "grad_norm": 0.7382420301437378, + "learning_rate": 3.9447914317622546e-05, + "loss": 2.4279, + "step": 14172 + }, + { + "epoch": 1.1438140585909127, + "grad_norm": 0.6947354674339294, + "learning_rate": 3.9435351394287546e-05, + "loss": 2.4553, + "step": 14173 + }, + { + "epoch": 1.1438947623274958, + "grad_norm": 0.670369565486908, + "learning_rate": 3.942278998038183e-05, + "loss": 2.4285, + "step": 14174 + }, + { + "epoch": 1.1439754660640788, + "grad_norm": 0.7097954154014587, + "learning_rate": 3.941023007621859e-05, + "loss": 2.477, + "step": 14175 + }, + { + "epoch": 1.1440561698006617, + "grad_norm": 0.6490213871002197, + "learning_rate": 3.9397671682110826e-05, + "loss": 2.3943, + "step": 14176 + }, + { + "epoch": 1.1441368735372448, + "grad_norm": 0.6505936980247498, + "learning_rate": 3.938511479837147e-05, + "loss": 2.4188, + "step": 14177 + }, + { + "epoch": 1.1442175772738277, + "grad_norm": 0.6696773767471313, + "learning_rate": 3.9372559425313496e-05, + "loss": 2.4377, + "step": 14178 + }, + { + "epoch": 1.1442982810104108, + "grad_norm": 0.6747034192085266, + "learning_rate": 3.936000556324982e-05, + "loss": 2.4111, + "step": 14179 + }, + { + "epoch": 1.1443789847469938, + "grad_norm": 0.7766546607017517, + "learning_rate": 3.934745321249336e-05, + "loss": 2.3873, + "step": 14180 + }, + { + "epoch": 1.1444596884835767, + "grad_norm": 0.7608100175857544, + "learning_rate": 3.933490237335688e-05, + "loss": 2.4567, + "step": 14181 + }, + { + "epoch": 1.1445403922201598, + "grad_norm": 0.7724356055259705, + "learning_rate": 3.9322353046153205e-05, + "loss": 2.4729, + "step": 14182 + }, + { + "epoch": 1.1446210959567429, + "grad_norm": 0.6908414363861084, + "learning_rate": 3.930980523119515e-05, + "loss": 2.41, + "step": 14183 + }, + { + "epoch": 1.1447017996933257, + "grad_norm": 0.7209733128547668, + "learning_rate": 3.9297258928795356e-05, + "loss": 2.4629, + "step": 14184 + }, + { + "epoch": 1.1447825034299088, + "grad_norm": 0.7116519212722778, + "learning_rate": 3.928471413926651e-05, + "loss": 2.5081, + "step": 14185 + }, + { + "epoch": 1.144863207166492, + "grad_norm": 0.6704578995704651, + "learning_rate": 3.9272170862921365e-05, + "loss": 2.494, + "step": 14186 + }, + { + "epoch": 1.1449439109030748, + "grad_norm": 0.6914607882499695, + "learning_rate": 3.9259629100072435e-05, + "loss": 2.3979, + "step": 14187 + }, + { + "epoch": 1.1450246146396579, + "grad_norm": 0.7413245439529419, + "learning_rate": 3.924708885103233e-05, + "loss": 2.4534, + "step": 14188 + }, + { + "epoch": 1.1451053183762407, + "grad_norm": 0.7411661744117737, + "learning_rate": 3.923455011611362e-05, + "loss": 2.4191, + "step": 14189 + }, + { + "epoch": 1.1451860221128238, + "grad_norm": 0.6581972241401672, + "learning_rate": 3.9222012895628716e-05, + "loss": 2.4494, + "step": 14190 + }, + { + "epoch": 1.145266725849407, + "grad_norm": 0.6628647446632385, + "learning_rate": 3.920947718989013e-05, + "loss": 2.4483, + "step": 14191 + }, + { + "epoch": 1.1453474295859898, + "grad_norm": 0.7068151831626892, + "learning_rate": 3.9196942999210316e-05, + "loss": 2.4549, + "step": 14192 + }, + { + "epoch": 1.1454281333225729, + "grad_norm": 0.6727713942527771, + "learning_rate": 3.918441032390159e-05, + "loss": 2.4261, + "step": 14193 + }, + { + "epoch": 1.1455088370591557, + "grad_norm": 0.6680718660354614, + "learning_rate": 3.9171879164276334e-05, + "loss": 2.4705, + "step": 14194 + }, + { + "epoch": 1.1455895407957388, + "grad_norm": 0.710096538066864, + "learning_rate": 3.915934952064685e-05, + "loss": 2.474, + "step": 14195 + }, + { + "epoch": 1.145670244532322, + "grad_norm": 0.6927496790885925, + "learning_rate": 3.9146821393325414e-05, + "loss": 2.3979, + "step": 14196 + }, + { + "epoch": 1.1457509482689048, + "grad_norm": 0.6887550354003906, + "learning_rate": 3.913429478262427e-05, + "loss": 2.4588, + "step": 14197 + }, + { + "epoch": 1.1458316520054879, + "grad_norm": 0.6847062706947327, + "learning_rate": 3.912176968885559e-05, + "loss": 2.4602, + "step": 14198 + }, + { + "epoch": 1.145912355742071, + "grad_norm": 0.6832349300384521, + "learning_rate": 3.91092461123316e-05, + "loss": 2.4672, + "step": 14199 + }, + { + "epoch": 1.1459930594786538, + "grad_norm": 0.6789066791534424, + "learning_rate": 3.909672405336432e-05, + "loss": 2.5029, + "step": 14200 + }, + { + "epoch": 1.146073763215237, + "grad_norm": 0.6953951120376587, + "learning_rate": 3.9084203512265885e-05, + "loss": 2.4223, + "step": 14201 + }, + { + "epoch": 1.1461544669518198, + "grad_norm": 0.6629688739776611, + "learning_rate": 3.907168448934836e-05, + "loss": 2.4028, + "step": 14202 + }, + { + "epoch": 1.1462351706884029, + "grad_norm": 0.6661216020584106, + "learning_rate": 3.90591669849237e-05, + "loss": 2.4668, + "step": 14203 + }, + { + "epoch": 1.146315874424986, + "grad_norm": 0.6814442276954651, + "learning_rate": 3.9046650999303894e-05, + "loss": 2.4273, + "step": 14204 + }, + { + "epoch": 1.1463965781615688, + "grad_norm": 0.6678626537322998, + "learning_rate": 3.903413653280088e-05, + "loss": 2.444, + "step": 14205 + }, + { + "epoch": 1.146477281898152, + "grad_norm": 0.6703703999519348, + "learning_rate": 3.902162358572655e-05, + "loss": 2.4273, + "step": 14206 + }, + { + "epoch": 1.1465579856347348, + "grad_norm": 0.7052578926086426, + "learning_rate": 3.900911215839276e-05, + "loss": 2.4397, + "step": 14207 + }, + { + "epoch": 1.1466386893713179, + "grad_norm": 0.6792036294937134, + "learning_rate": 3.899660225111136e-05, + "loss": 2.439, + "step": 14208 + }, + { + "epoch": 1.146719393107901, + "grad_norm": 0.6995401978492737, + "learning_rate": 3.898409386419407e-05, + "loss": 2.5002, + "step": 14209 + }, + { + "epoch": 1.1468000968444838, + "grad_norm": 0.6527338027954102, + "learning_rate": 3.897158699795265e-05, + "loss": 2.4523, + "step": 14210 + }, + { + "epoch": 1.146880800581067, + "grad_norm": 0.7509400248527527, + "learning_rate": 3.8959081652698814e-05, + "loss": 2.4193, + "step": 14211 + }, + { + "epoch": 1.14696150431765, + "grad_norm": 0.6985350251197815, + "learning_rate": 3.894657782874426e-05, + "loss": 2.4251, + "step": 14212 + }, + { + "epoch": 1.1470422080542328, + "grad_norm": 0.6831483840942383, + "learning_rate": 3.893407552640055e-05, + "loss": 2.4172, + "step": 14213 + }, + { + "epoch": 1.147122911790816, + "grad_norm": 0.7281469702720642, + "learning_rate": 3.892157474597929e-05, + "loss": 2.4451, + "step": 14214 + }, + { + "epoch": 1.147203615527399, + "grad_norm": 0.7326027750968933, + "learning_rate": 3.8909075487792066e-05, + "loss": 2.3926, + "step": 14215 + }, + { + "epoch": 1.1472843192639819, + "grad_norm": 0.7030496597290039, + "learning_rate": 3.889657775215036e-05, + "loss": 2.435, + "step": 14216 + }, + { + "epoch": 1.147365023000565, + "grad_norm": 0.6915596127510071, + "learning_rate": 3.888408153936568e-05, + "loss": 2.4622, + "step": 14217 + }, + { + "epoch": 1.1474457267371478, + "grad_norm": 0.678600013256073, + "learning_rate": 3.8871586849749474e-05, + "loss": 2.4264, + "step": 14218 + }, + { + "epoch": 1.147526430473731, + "grad_norm": 0.7487786412239075, + "learning_rate": 3.885909368361308e-05, + "loss": 2.4038, + "step": 14219 + }, + { + "epoch": 1.147607134210314, + "grad_norm": 0.6658064723014832, + "learning_rate": 3.8846602041267886e-05, + "loss": 2.4079, + "step": 14220 + }, + { + "epoch": 1.1476878379468969, + "grad_norm": 0.6985111832618713, + "learning_rate": 3.883411192302527e-05, + "loss": 2.481, + "step": 14221 + }, + { + "epoch": 1.14776854168348, + "grad_norm": 0.7056208848953247, + "learning_rate": 3.8821623329196445e-05, + "loss": 2.4409, + "step": 14222 + }, + { + "epoch": 1.1478492454200628, + "grad_norm": 0.7107830047607422, + "learning_rate": 3.880913626009268e-05, + "loss": 2.4578, + "step": 14223 + }, + { + "epoch": 1.147929949156646, + "grad_norm": 0.6678555607795715, + "learning_rate": 3.87966507160252e-05, + "loss": 2.4548, + "step": 14224 + }, + { + "epoch": 1.148010652893229, + "grad_norm": 0.6699830293655396, + "learning_rate": 3.8784166697305157e-05, + "loss": 2.3763, + "step": 14225 + }, + { + "epoch": 1.1480913566298119, + "grad_norm": 0.7695464491844177, + "learning_rate": 3.8771684204243716e-05, + "loss": 2.4774, + "step": 14226 + }, + { + "epoch": 1.148172060366395, + "grad_norm": 0.7801330089569092, + "learning_rate": 3.8759203237151954e-05, + "loss": 2.4598, + "step": 14227 + }, + { + "epoch": 1.148252764102978, + "grad_norm": 0.7029622793197632, + "learning_rate": 3.8746723796340955e-05, + "loss": 2.3901, + "step": 14228 + }, + { + "epoch": 1.148333467839561, + "grad_norm": 0.7472359538078308, + "learning_rate": 3.873424588212169e-05, + "loss": 2.4724, + "step": 14229 + }, + { + "epoch": 1.148414171576144, + "grad_norm": 0.6621725559234619, + "learning_rate": 3.872176949480517e-05, + "loss": 2.4523, + "step": 14230 + }, + { + "epoch": 1.148494875312727, + "grad_norm": 0.722658634185791, + "learning_rate": 3.8709294634702376e-05, + "loss": 2.4032, + "step": 14231 + }, + { + "epoch": 1.14857557904931, + "grad_norm": 0.7743202447891235, + "learning_rate": 3.869682130212413e-05, + "loss": 2.4373, + "step": 14232 + }, + { + "epoch": 1.148656282785893, + "grad_norm": 0.6906178593635559, + "learning_rate": 3.868434949738136e-05, + "loss": 2.4765, + "step": 14233 + }, + { + "epoch": 1.148736986522476, + "grad_norm": 0.6708275675773621, + "learning_rate": 3.86718792207849e-05, + "loss": 2.4263, + "step": 14234 + }, + { + "epoch": 1.148817690259059, + "grad_norm": 0.6992776989936829, + "learning_rate": 3.8659410472645494e-05, + "loss": 2.378, + "step": 14235 + }, + { + "epoch": 1.148898393995642, + "grad_norm": 0.7229011058807373, + "learning_rate": 3.864694325327389e-05, + "loss": 2.4075, + "step": 14236 + }, + { + "epoch": 1.148979097732225, + "grad_norm": 0.6622509956359863, + "learning_rate": 3.863447756298091e-05, + "loss": 2.3954, + "step": 14237 + }, + { + "epoch": 1.149059801468808, + "grad_norm": 0.7233534455299377, + "learning_rate": 3.862201340207712e-05, + "loss": 2.4506, + "step": 14238 + }, + { + "epoch": 1.149140505205391, + "grad_norm": 0.716869056224823, + "learning_rate": 3.860955077087321e-05, + "loss": 2.4304, + "step": 14239 + }, + { + "epoch": 1.149221208941974, + "grad_norm": 0.6550257205963135, + "learning_rate": 3.8597089669679766e-05, + "loss": 2.4261, + "step": 14240 + }, + { + "epoch": 1.149301912678557, + "grad_norm": 0.6981741786003113, + "learning_rate": 3.858463009880738e-05, + "loss": 2.4115, + "step": 14241 + }, + { + "epoch": 1.14938261641514, + "grad_norm": 0.6792196035385132, + "learning_rate": 3.8572172058566534e-05, + "loss": 2.4195, + "step": 14242 + }, + { + "epoch": 1.149463320151723, + "grad_norm": 0.7278807163238525, + "learning_rate": 3.855971554926773e-05, + "loss": 2.418, + "step": 14243 + }, + { + "epoch": 1.1495440238883061, + "grad_norm": 0.6451076865196228, + "learning_rate": 3.8547260571221456e-05, + "loss": 2.4591, + "step": 14244 + }, + { + "epoch": 1.149624727624889, + "grad_norm": 0.7052451968193054, + "learning_rate": 3.853480712473805e-05, + "loss": 2.4023, + "step": 14245 + }, + { + "epoch": 1.149705431361472, + "grad_norm": 0.7016182541847229, + "learning_rate": 3.852235521012793e-05, + "loss": 2.4959, + "step": 14246 + }, + { + "epoch": 1.1497861350980552, + "grad_norm": 0.7287492156028748, + "learning_rate": 3.850990482770141e-05, + "loss": 2.3884, + "step": 14247 + }, + { + "epoch": 1.149866838834638, + "grad_norm": 0.6648508310317993, + "learning_rate": 3.84974559777688e-05, + "loss": 2.4632, + "step": 14248 + }, + { + "epoch": 1.1499475425712211, + "grad_norm": 0.7387828230857849, + "learning_rate": 3.848500866064036e-05, + "loss": 2.4053, + "step": 14249 + }, + { + "epoch": 1.150028246307804, + "grad_norm": 0.7230356931686401, + "learning_rate": 3.847256287662635e-05, + "loss": 2.5128, + "step": 14250 + }, + { + "epoch": 1.150108950044387, + "grad_norm": 0.7209547162055969, + "learning_rate": 3.846011862603686e-05, + "loss": 2.4626, + "step": 14251 + }, + { + "epoch": 1.1501896537809702, + "grad_norm": 0.7177916765213013, + "learning_rate": 3.844767590918209e-05, + "loss": 2.4469, + "step": 14252 + }, + { + "epoch": 1.150270357517553, + "grad_norm": 0.7850151658058167, + "learning_rate": 3.843523472637216e-05, + "loss": 2.4731, + "step": 14253 + }, + { + "epoch": 1.150351061254136, + "grad_norm": 0.7051519155502319, + "learning_rate": 3.8422795077917084e-05, + "loss": 2.3696, + "step": 14254 + }, + { + "epoch": 1.150431764990719, + "grad_norm": 0.7434025406837463, + "learning_rate": 3.841035696412692e-05, + "loss": 2.444, + "step": 14255 + }, + { + "epoch": 1.150512468727302, + "grad_norm": 0.7404719591140747, + "learning_rate": 3.839792038531166e-05, + "loss": 2.4415, + "step": 14256 + }, + { + "epoch": 1.1505931724638851, + "grad_norm": 0.6883764266967773, + "learning_rate": 3.838548534178125e-05, + "loss": 2.4887, + "step": 14257 + }, + { + "epoch": 1.150673876200468, + "grad_norm": 0.6697155237197876, + "learning_rate": 3.83730518338456e-05, + "loss": 2.3721, + "step": 14258 + }, + { + "epoch": 1.150754579937051, + "grad_norm": 0.68825763463974, + "learning_rate": 3.836061986181459e-05, + "loss": 2.4712, + "step": 14259 + }, + { + "epoch": 1.1508352836736342, + "grad_norm": 0.6810611486434937, + "learning_rate": 3.8348189425998114e-05, + "loss": 2.3995, + "step": 14260 + }, + { + "epoch": 1.150915987410217, + "grad_norm": 0.6718329787254333, + "learning_rate": 3.8335760526705866e-05, + "loss": 2.4068, + "step": 14261 + }, + { + "epoch": 1.1509966911468001, + "grad_norm": 0.694618284702301, + "learning_rate": 3.832333316424767e-05, + "loss": 2.458, + "step": 14262 + }, + { + "epoch": 1.151077394883383, + "grad_norm": 0.6824250817298889, + "learning_rate": 3.8310907338933266e-05, + "loss": 2.4623, + "step": 14263 + }, + { + "epoch": 1.151158098619966, + "grad_norm": 0.6875178217887878, + "learning_rate": 3.8298483051072264e-05, + "loss": 2.4827, + "step": 14264 + }, + { + "epoch": 1.1512388023565492, + "grad_norm": 0.7868281602859497, + "learning_rate": 3.828606030097437e-05, + "loss": 2.4638, + "step": 14265 + }, + { + "epoch": 1.151319506093132, + "grad_norm": 0.7003639936447144, + "learning_rate": 3.8273639088949165e-05, + "loss": 2.4885, + "step": 14266 + }, + { + "epoch": 1.1514002098297151, + "grad_norm": 0.6965197920799255, + "learning_rate": 3.826121941530623e-05, + "loss": 2.3983, + "step": 14267 + }, + { + "epoch": 1.151480913566298, + "grad_norm": 0.7241101264953613, + "learning_rate": 3.824880128035509e-05, + "loss": 2.4598, + "step": 14268 + }, + { + "epoch": 1.151561617302881, + "grad_norm": 0.700764536857605, + "learning_rate": 3.823638468440528e-05, + "loss": 2.3627, + "step": 14269 + }, + { + "epoch": 1.1516423210394642, + "grad_norm": 0.6889846324920654, + "learning_rate": 3.822396962776619e-05, + "loss": 2.4442, + "step": 14270 + }, + { + "epoch": 1.151723024776047, + "grad_norm": 0.6660009026527405, + "learning_rate": 3.8211556110747245e-05, + "loss": 2.403, + "step": 14271 + }, + { + "epoch": 1.1518037285126301, + "grad_norm": 0.6537240743637085, + "learning_rate": 3.819914413365785e-05, + "loss": 2.4358, + "step": 14272 + }, + { + "epoch": 1.1518844322492132, + "grad_norm": 0.6852741837501526, + "learning_rate": 3.818673369680735e-05, + "loss": 2.4272, + "step": 14273 + }, + { + "epoch": 1.151965135985796, + "grad_norm": 0.701874852180481, + "learning_rate": 3.817432480050501e-05, + "loss": 2.4419, + "step": 14274 + }, + { + "epoch": 1.1520458397223792, + "grad_norm": 0.7089500427246094, + "learning_rate": 3.816191744506011e-05, + "loss": 2.4537, + "step": 14275 + }, + { + "epoch": 1.1521265434589623, + "grad_norm": 0.698564887046814, + "learning_rate": 3.8149511630781866e-05, + "loss": 2.3991, + "step": 14276 + }, + { + "epoch": 1.1522072471955451, + "grad_norm": 0.6940335035324097, + "learning_rate": 3.813710735797947e-05, + "loss": 2.5022, + "step": 14277 + }, + { + "epoch": 1.1522879509321282, + "grad_norm": 0.6916826367378235, + "learning_rate": 3.812470462696208e-05, + "loss": 2.4449, + "step": 14278 + }, + { + "epoch": 1.152368654668711, + "grad_norm": 0.7115256190299988, + "learning_rate": 3.811230343803882e-05, + "loss": 2.4371, + "step": 14279 + }, + { + "epoch": 1.1524493584052942, + "grad_norm": 0.6857369542121887, + "learning_rate": 3.80999037915187e-05, + "loss": 2.4426, + "step": 14280 + }, + { + "epoch": 1.1525300621418773, + "grad_norm": 0.7605363130569458, + "learning_rate": 3.808750568771079e-05, + "loss": 2.4999, + "step": 14281 + }, + { + "epoch": 1.1526107658784601, + "grad_norm": 0.6604358553886414, + "learning_rate": 3.8075109126924115e-05, + "loss": 2.419, + "step": 14282 + }, + { + "epoch": 1.1526914696150432, + "grad_norm": 0.6945412755012512, + "learning_rate": 3.806271410946756e-05, + "loss": 2.4555, + "step": 14283 + }, + { + "epoch": 1.152772173351626, + "grad_norm": 0.7205908894538879, + "learning_rate": 3.805032063565007e-05, + "loss": 2.4745, + "step": 14284 + }, + { + "epoch": 1.1528528770882092, + "grad_norm": 0.7198025584220886, + "learning_rate": 3.8037928705780554e-05, + "loss": 2.4358, + "step": 14285 + }, + { + "epoch": 1.1529335808247922, + "grad_norm": 0.7231044769287109, + "learning_rate": 3.802553832016781e-05, + "loss": 2.4713, + "step": 14286 + }, + { + "epoch": 1.1530142845613751, + "grad_norm": 0.6878815293312073, + "learning_rate": 3.80131494791206e-05, + "loss": 2.4479, + "step": 14287 + }, + { + "epoch": 1.1530949882979582, + "grad_norm": 0.6930533647537231, + "learning_rate": 3.800076218294779e-05, + "loss": 2.3912, + "step": 14288 + }, + { + "epoch": 1.1531756920345413, + "grad_norm": 0.703521192073822, + "learning_rate": 3.798837643195808e-05, + "loss": 2.451, + "step": 14289 + }, + { + "epoch": 1.1532563957711242, + "grad_norm": 0.7099746465682983, + "learning_rate": 3.79759922264601e-05, + "loss": 2.4957, + "step": 14290 + }, + { + "epoch": 1.1533370995077072, + "grad_norm": 0.7268218398094177, + "learning_rate": 3.7963609566762527e-05, + "loss": 2.4242, + "step": 14291 + }, + { + "epoch": 1.1534178032442903, + "grad_norm": 0.7465239763259888, + "learning_rate": 3.7951228453174004e-05, + "loss": 2.3867, + "step": 14292 + }, + { + "epoch": 1.1534985069808732, + "grad_norm": 0.704584002494812, + "learning_rate": 3.793884888600302e-05, + "loss": 2.5009, + "step": 14293 + }, + { + "epoch": 1.1535792107174563, + "grad_norm": 0.7057262063026428, + "learning_rate": 3.792647086555816e-05, + "loss": 2.4381, + "step": 14294 + }, + { + "epoch": 1.1536599144540391, + "grad_norm": 0.7045955061912537, + "learning_rate": 3.791409439214794e-05, + "loss": 2.4456, + "step": 14295 + }, + { + "epoch": 1.1537406181906222, + "grad_norm": 0.705476701259613, + "learning_rate": 3.790171946608074e-05, + "loss": 2.466, + "step": 14296 + }, + { + "epoch": 1.1538213219272053, + "grad_norm": 0.7128286957740784, + "learning_rate": 3.788934608766503e-05, + "loss": 2.4891, + "step": 14297 + }, + { + "epoch": 1.1539020256637882, + "grad_norm": 0.678144633769989, + "learning_rate": 3.787697425720918e-05, + "loss": 2.4453, + "step": 14298 + }, + { + "epoch": 1.1539827294003713, + "grad_norm": 0.754216730594635, + "learning_rate": 3.786460397502151e-05, + "loss": 2.4331, + "step": 14299 + }, + { + "epoch": 1.1540634331369541, + "grad_norm": 0.6881092190742493, + "learning_rate": 3.7852235241410325e-05, + "loss": 2.3692, + "step": 14300 + }, + { + "epoch": 1.1541441368735372, + "grad_norm": 0.7498507499694824, + "learning_rate": 3.783986805668395e-05, + "loss": 2.4556, + "step": 14301 + }, + { + "epoch": 1.1542248406101203, + "grad_norm": 0.6312216520309448, + "learning_rate": 3.7827502421150496e-05, + "loss": 2.4727, + "step": 14302 + }, + { + "epoch": 1.1543055443467032, + "grad_norm": 0.7156404256820679, + "learning_rate": 3.781513833511822e-05, + "loss": 2.4003, + "step": 14303 + }, + { + "epoch": 1.1543862480832863, + "grad_norm": 0.6589376926422119, + "learning_rate": 3.7802775798895226e-05, + "loss": 2.4461, + "step": 14304 + }, + { + "epoch": 1.1544669518198694, + "grad_norm": 0.7259865999221802, + "learning_rate": 3.77904148127897e-05, + "loss": 2.4021, + "step": 14305 + }, + { + "epoch": 1.1545476555564522, + "grad_norm": 0.7248456478118896, + "learning_rate": 3.777805537710961e-05, + "loss": 2.4784, + "step": 14306 + }, + { + "epoch": 1.1546283592930353, + "grad_norm": 0.7085593342781067, + "learning_rate": 3.7765697492163034e-05, + "loss": 2.4394, + "step": 14307 + }, + { + "epoch": 1.1547090630296182, + "grad_norm": 0.7394313216209412, + "learning_rate": 3.775334115825796e-05, + "loss": 2.5055, + "step": 14308 + }, + { + "epoch": 1.1547897667662013, + "grad_norm": 0.7231999039649963, + "learning_rate": 3.7740986375702336e-05, + "loss": 2.4551, + "step": 14309 + }, + { + "epoch": 1.1548704705027844, + "grad_norm": 0.6875953078269958, + "learning_rate": 3.7728633144804084e-05, + "loss": 2.4641, + "step": 14310 + }, + { + "epoch": 1.1549511742393672, + "grad_norm": 0.7477203607559204, + "learning_rate": 3.7716281465871094e-05, + "loss": 2.4929, + "step": 14311 + }, + { + "epoch": 1.1550318779759503, + "grad_norm": 0.6653971076011658, + "learning_rate": 3.770393133921115e-05, + "loss": 2.4819, + "step": 14312 + }, + { + "epoch": 1.1551125817125332, + "grad_norm": 0.7267318964004517, + "learning_rate": 3.769158276513209e-05, + "loss": 2.4568, + "step": 14313 + }, + { + "epoch": 1.1551932854491163, + "grad_norm": 0.6675654053688049, + "learning_rate": 3.76792357439417e-05, + "loss": 2.4789, + "step": 14314 + }, + { + "epoch": 1.1552739891856993, + "grad_norm": 0.6847487688064575, + "learning_rate": 3.7666890275947616e-05, + "loss": 2.4034, + "step": 14315 + }, + { + "epoch": 1.1553546929222822, + "grad_norm": 0.811553418636322, + "learning_rate": 3.765454636145758e-05, + "loss": 2.5051, + "step": 14316 + }, + { + "epoch": 1.1554353966588653, + "grad_norm": 0.690026581287384, + "learning_rate": 3.7642204000779204e-05, + "loss": 2.4477, + "step": 14317 + }, + { + "epoch": 1.1555161003954484, + "grad_norm": 0.695810079574585, + "learning_rate": 3.762986319422013e-05, + "loss": 2.4516, + "step": 14318 + }, + { + "epoch": 1.1555968041320313, + "grad_norm": 0.6869217753410339, + "learning_rate": 3.7617523942087886e-05, + "loss": 2.3802, + "step": 14319 + }, + { + "epoch": 1.1556775078686143, + "grad_norm": 0.7109078764915466, + "learning_rate": 3.7605186244690016e-05, + "loss": 2.4306, + "step": 14320 + }, + { + "epoch": 1.1557582116051974, + "grad_norm": 0.7385044693946838, + "learning_rate": 3.759285010233404e-05, + "loss": 2.4288, + "step": 14321 + }, + { + "epoch": 1.1558389153417803, + "grad_norm": 0.6775605082511902, + "learning_rate": 3.7580515515327355e-05, + "loss": 2.4155, + "step": 14322 + }, + { + "epoch": 1.1559196190783634, + "grad_norm": 0.7325694561004639, + "learning_rate": 3.7568182483977375e-05, + "loss": 2.5035, + "step": 14323 + }, + { + "epoch": 1.1560003228149462, + "grad_norm": 0.6896799206733704, + "learning_rate": 3.7555851008591526e-05, + "loss": 2.4739, + "step": 14324 + }, + { + "epoch": 1.1560810265515293, + "grad_norm": 0.7086506485939026, + "learning_rate": 3.7543521089477065e-05, + "loss": 2.4815, + "step": 14325 + }, + { + "epoch": 1.1561617302881124, + "grad_norm": 0.6886687874794006, + "learning_rate": 3.753119272694132e-05, + "loss": 2.4261, + "step": 14326 + }, + { + "epoch": 1.1562424340246953, + "grad_norm": 0.675136148929596, + "learning_rate": 3.751886592129155e-05, + "loss": 2.3946, + "step": 14327 + }, + { + "epoch": 1.1563231377612784, + "grad_norm": 0.706729531288147, + "learning_rate": 3.7506540672834964e-05, + "loss": 2.4199, + "step": 14328 + }, + { + "epoch": 1.1564038414978612, + "grad_norm": 0.6790904998779297, + "learning_rate": 3.749421698187875e-05, + "loss": 2.4419, + "step": 14329 + }, + { + "epoch": 1.1564845452344443, + "grad_norm": 0.6688171029090881, + "learning_rate": 3.748189484873007e-05, + "loss": 2.4516, + "step": 14330 + }, + { + "epoch": 1.1565652489710274, + "grad_norm": 0.6782420873641968, + "learning_rate": 3.746957427369596e-05, + "loss": 2.4586, + "step": 14331 + }, + { + "epoch": 1.1566459527076103, + "grad_norm": 0.7633399367332458, + "learning_rate": 3.7457255257083514e-05, + "loss": 2.3776, + "step": 14332 + }, + { + "epoch": 1.1567266564441934, + "grad_norm": 0.680000364780426, + "learning_rate": 3.744493779919976e-05, + "loss": 2.4978, + "step": 14333 + }, + { + "epoch": 1.1568073601807765, + "grad_norm": 0.6993350386619568, + "learning_rate": 3.743262190035171e-05, + "loss": 2.3974, + "step": 14334 + }, + { + "epoch": 1.1568880639173593, + "grad_norm": 0.7316375374794006, + "learning_rate": 3.7420307560846234e-05, + "loss": 2.4423, + "step": 14335 + }, + { + "epoch": 1.1569687676539424, + "grad_norm": 0.7384842038154602, + "learning_rate": 3.7407994780990285e-05, + "loss": 2.4604, + "step": 14336 + }, + { + "epoch": 1.1570494713905255, + "grad_norm": 0.6980708837509155, + "learning_rate": 3.739568356109072e-05, + "loss": 2.4408, + "step": 14337 + }, + { + "epoch": 1.1571301751271084, + "grad_norm": 0.6510182619094849, + "learning_rate": 3.738337390145438e-05, + "loss": 2.4076, + "step": 14338 + }, + { + "epoch": 1.1572108788636915, + "grad_norm": 0.7458614706993103, + "learning_rate": 3.737106580238804e-05, + "loss": 2.4976, + "step": 14339 + }, + { + "epoch": 1.1572915826002743, + "grad_norm": 0.6663469672203064, + "learning_rate": 3.735875926419849e-05, + "loss": 2.4414, + "step": 14340 + }, + { + "epoch": 1.1573722863368574, + "grad_norm": 0.6611858606338501, + "learning_rate": 3.7346454287192355e-05, + "loss": 2.3783, + "step": 14341 + }, + { + "epoch": 1.1574529900734405, + "grad_norm": 0.6605291366577148, + "learning_rate": 3.7334150871676364e-05, + "loss": 2.4291, + "step": 14342 + }, + { + "epoch": 1.1575336938100234, + "grad_norm": 0.6879985928535461, + "learning_rate": 3.7321849017957186e-05, + "loss": 2.4229, + "step": 14343 + }, + { + "epoch": 1.1576143975466064, + "grad_norm": 0.7466493844985962, + "learning_rate": 3.7309548726341334e-05, + "loss": 2.4278, + "step": 14344 + }, + { + "epoch": 1.1576951012831893, + "grad_norm": 0.7476457357406616, + "learning_rate": 3.72972499971354e-05, + "loss": 2.4944, + "step": 14345 + }, + { + "epoch": 1.1577758050197724, + "grad_norm": 0.6339364647865295, + "learning_rate": 3.728495283064594e-05, + "loss": 2.3753, + "step": 14346 + }, + { + "epoch": 1.1578565087563555, + "grad_norm": 0.6885230541229248, + "learning_rate": 3.7272657227179355e-05, + "loss": 2.4519, + "step": 14347 + }, + { + "epoch": 1.1579372124929384, + "grad_norm": 0.7561741471290588, + "learning_rate": 3.7260363187042126e-05, + "loss": 2.4808, + "step": 14348 + }, + { + "epoch": 1.1580179162295214, + "grad_norm": 0.8007705211639404, + "learning_rate": 3.724807071054062e-05, + "loss": 2.4649, + "step": 14349 + }, + { + "epoch": 1.1580986199661045, + "grad_norm": 0.6920937895774841, + "learning_rate": 3.72357797979813e-05, + "loss": 2.4145, + "step": 14350 + }, + { + "epoch": 1.1581793237026874, + "grad_norm": 0.7310675978660583, + "learning_rate": 3.7223490449670364e-05, + "loss": 2.4475, + "step": 14351 + }, + { + "epoch": 1.1582600274392705, + "grad_norm": 0.6600463390350342, + "learning_rate": 3.7211202665914155e-05, + "loss": 2.3938, + "step": 14352 + }, + { + "epoch": 1.1583407311758536, + "grad_norm": 0.690258800983429, + "learning_rate": 3.719891644701894e-05, + "loss": 2.3944, + "step": 14353 + }, + { + "epoch": 1.1584214349124364, + "grad_norm": 0.7075135111808777, + "learning_rate": 3.718663179329085e-05, + "loss": 2.3931, + "step": 14354 + }, + { + "epoch": 1.1585021386490195, + "grad_norm": 0.7416332960128784, + "learning_rate": 3.71743487050361e-05, + "loss": 2.4566, + "step": 14355 + }, + { + "epoch": 1.1585828423856024, + "grad_norm": 0.7459710836410522, + "learning_rate": 3.7162067182560846e-05, + "loss": 2.4232, + "step": 14356 + }, + { + "epoch": 1.1586635461221855, + "grad_norm": 0.7265400886535645, + "learning_rate": 3.71497872261711e-05, + "loss": 2.4798, + "step": 14357 + }, + { + "epoch": 1.1587442498587683, + "grad_norm": 0.7142636775970459, + "learning_rate": 3.713750883617294e-05, + "loss": 2.4576, + "step": 14358 + }, + { + "epoch": 1.1588249535953514, + "grad_norm": 0.7279871702194214, + "learning_rate": 3.712523201287239e-05, + "loss": 2.439, + "step": 14359 + }, + { + "epoch": 1.1589056573319345, + "grad_norm": 0.7151274681091309, + "learning_rate": 3.7112956756575414e-05, + "loss": 2.4684, + "step": 14360 + }, + { + "epoch": 1.1589863610685174, + "grad_norm": 0.7142657041549683, + "learning_rate": 3.7100683067587946e-05, + "loss": 2.4582, + "step": 14361 + }, + { + "epoch": 1.1590670648051005, + "grad_norm": 0.7716035842895508, + "learning_rate": 3.7088410946215914e-05, + "loss": 2.5038, + "step": 14362 + }, + { + "epoch": 1.1591477685416836, + "grad_norm": 0.7232338190078735, + "learning_rate": 3.707614039276509e-05, + "loss": 2.4558, + "step": 14363 + }, + { + "epoch": 1.1592284722782664, + "grad_norm": 0.7388719916343689, + "learning_rate": 3.706387140754134e-05, + "loss": 2.4535, + "step": 14364 + }, + { + "epoch": 1.1593091760148495, + "grad_norm": 0.7022652626037598, + "learning_rate": 3.7051603990850425e-05, + "loss": 2.4479, + "step": 14365 + }, + { + "epoch": 1.1593898797514326, + "grad_norm": 0.7861798405647278, + "learning_rate": 3.703933814299813e-05, + "loss": 2.4219, + "step": 14366 + }, + { + "epoch": 1.1594705834880155, + "grad_norm": 0.6928723454475403, + "learning_rate": 3.7027073864290074e-05, + "loss": 2.4401, + "step": 14367 + }, + { + "epoch": 1.1595512872245985, + "grad_norm": 0.6312821507453918, + "learning_rate": 3.701481115503194e-05, + "loss": 2.3975, + "step": 14368 + }, + { + "epoch": 1.1596319909611814, + "grad_norm": 0.7008257508277893, + "learning_rate": 3.700255001552937e-05, + "loss": 2.4988, + "step": 14369 + }, + { + "epoch": 1.1597126946977645, + "grad_norm": 0.6664693355560303, + "learning_rate": 3.699029044608792e-05, + "loss": 2.4123, + "step": 14370 + }, + { + "epoch": 1.1597933984343476, + "grad_norm": 0.6613842844963074, + "learning_rate": 3.6978032447013145e-05, + "loss": 2.4802, + "step": 14371 + }, + { + "epoch": 1.1598741021709305, + "grad_norm": 0.707788348197937, + "learning_rate": 3.696577601861057e-05, + "loss": 2.4432, + "step": 14372 + }, + { + "epoch": 1.1599548059075135, + "grad_norm": 0.6547604203224182, + "learning_rate": 3.695352116118561e-05, + "loss": 2.412, + "step": 14373 + }, + { + "epoch": 1.1600355096440964, + "grad_norm": 0.7238109707832336, + "learning_rate": 3.69412678750437e-05, + "loss": 2.4858, + "step": 14374 + }, + { + "epoch": 1.1601162133806795, + "grad_norm": 0.8156580328941345, + "learning_rate": 3.692901616049026e-05, + "loss": 2.4063, + "step": 14375 + }, + { + "epoch": 1.1601969171172626, + "grad_norm": 0.7035481333732605, + "learning_rate": 3.6916766017830585e-05, + "loss": 2.4586, + "step": 14376 + }, + { + "epoch": 1.1602776208538454, + "grad_norm": 0.7523401379585266, + "learning_rate": 3.690451744736999e-05, + "loss": 2.4262, + "step": 14377 + }, + { + "epoch": 1.1603583245904285, + "grad_norm": 0.6740732192993164, + "learning_rate": 3.689227044941376e-05, + "loss": 2.5215, + "step": 14378 + }, + { + "epoch": 1.1604390283270116, + "grad_norm": 0.6502695083618164, + "learning_rate": 3.6880025024267115e-05, + "loss": 2.4292, + "step": 14379 + }, + { + "epoch": 1.1605197320635945, + "grad_norm": 0.7000409364700317, + "learning_rate": 3.686778117223524e-05, + "loss": 2.4323, + "step": 14380 + }, + { + "epoch": 1.1606004358001776, + "grad_norm": 0.7415478229522705, + "learning_rate": 3.68555388936233e-05, + "loss": 2.4515, + "step": 14381 + }, + { + "epoch": 1.1606811395367607, + "grad_norm": 0.6890547871589661, + "learning_rate": 3.684329818873641e-05, + "loss": 2.4115, + "step": 14382 + }, + { + "epoch": 1.1607618432733435, + "grad_norm": 0.8238685727119446, + "learning_rate": 3.68310590578796e-05, + "loss": 2.4666, + "step": 14383 + }, + { + "epoch": 1.1608425470099266, + "grad_norm": 0.8098889589309692, + "learning_rate": 3.681882150135791e-05, + "loss": 2.4667, + "step": 14384 + }, + { + "epoch": 1.1609232507465095, + "grad_norm": 0.6932713985443115, + "learning_rate": 3.680658551947639e-05, + "loss": 2.4574, + "step": 14385 + }, + { + "epoch": 1.1610039544830926, + "grad_norm": 0.7062943577766418, + "learning_rate": 3.6794351112539915e-05, + "loss": 2.4408, + "step": 14386 + }, + { + "epoch": 1.1610846582196757, + "grad_norm": 0.7859255075454712, + "learning_rate": 3.678211828085343e-05, + "loss": 2.3946, + "step": 14387 + }, + { + "epoch": 1.1611653619562585, + "grad_norm": 0.674609899520874, + "learning_rate": 3.676988702472181e-05, + "loss": 2.4456, + "step": 14388 + }, + { + "epoch": 1.1612460656928416, + "grad_norm": 0.7068402171134949, + "learning_rate": 3.675765734444989e-05, + "loss": 2.4393, + "step": 14389 + }, + { + "epoch": 1.1613267694294245, + "grad_norm": 0.7276526689529419, + "learning_rate": 3.674542924034246e-05, + "loss": 2.456, + "step": 14390 + }, + { + "epoch": 1.1614074731660076, + "grad_norm": 0.7670585513114929, + "learning_rate": 3.673320271270433e-05, + "loss": 2.3774, + "step": 14391 + }, + { + "epoch": 1.1614881769025907, + "grad_norm": 0.702173113822937, + "learning_rate": 3.672097776184013e-05, + "loss": 2.3974, + "step": 14392 + }, + { + "epoch": 1.1615688806391735, + "grad_norm": 0.6922066807746887, + "learning_rate": 3.670875438805457e-05, + "loss": 2.4035, + "step": 14393 + }, + { + "epoch": 1.1616495843757566, + "grad_norm": 0.6675707697868347, + "learning_rate": 3.6696532591652335e-05, + "loss": 2.4369, + "step": 14394 + }, + { + "epoch": 1.1617302881123397, + "grad_norm": 0.6939712762832642, + "learning_rate": 3.668431237293796e-05, + "loss": 2.4265, + "step": 14395 + }, + { + "epoch": 1.1618109918489226, + "grad_norm": 0.719510018825531, + "learning_rate": 3.667209373221602e-05, + "loss": 2.4686, + "step": 14396 + }, + { + "epoch": 1.1618916955855056, + "grad_norm": 0.7167489528656006, + "learning_rate": 3.665987666979104e-05, + "loss": 2.5077, + "step": 14397 + }, + { + "epoch": 1.1619723993220887, + "grad_norm": 0.6539514064788818, + "learning_rate": 3.664766118596754e-05, + "loss": 2.4476, + "step": 14398 + }, + { + "epoch": 1.1620531030586716, + "grad_norm": 0.6926440596580505, + "learning_rate": 3.6635447281049876e-05, + "loss": 2.4336, + "step": 14399 + }, + { + "epoch": 1.1621338067952547, + "grad_norm": 0.7124993205070496, + "learning_rate": 3.662323495534252e-05, + "loss": 2.3938, + "step": 14400 + }, + { + "epoch": 1.1622145105318376, + "grad_norm": 0.7073954939842224, + "learning_rate": 3.661102420914986e-05, + "loss": 2.4232, + "step": 14401 + }, + { + "epoch": 1.1622952142684206, + "grad_norm": 0.7491076588630676, + "learning_rate": 3.659881504277613e-05, + "loss": 2.5047, + "step": 14402 + }, + { + "epoch": 1.1623759180050037, + "grad_norm": 0.6698675155639648, + "learning_rate": 3.658660745652568e-05, + "loss": 2.4164, + "step": 14403 + }, + { + "epoch": 1.1624566217415866, + "grad_norm": 0.6576815843582153, + "learning_rate": 3.657440145070276e-05, + "loss": 2.4368, + "step": 14404 + }, + { + "epoch": 1.1625373254781697, + "grad_norm": 0.8236953020095825, + "learning_rate": 3.6562197025611524e-05, + "loss": 2.5041, + "step": 14405 + }, + { + "epoch": 1.1626180292147525, + "grad_norm": 0.7391532063484192, + "learning_rate": 3.6549994181556157e-05, + "loss": 2.4556, + "step": 14406 + }, + { + "epoch": 1.1626987329513356, + "grad_norm": 0.6529936790466309, + "learning_rate": 3.653779291884084e-05, + "loss": 2.4559, + "step": 14407 + }, + { + "epoch": 1.1627794366879187, + "grad_norm": 0.7101796269416809, + "learning_rate": 3.652559323776957e-05, + "loss": 2.3937, + "step": 14408 + }, + { + "epoch": 1.1628601404245016, + "grad_norm": 0.6890308260917664, + "learning_rate": 3.651339513864645e-05, + "loss": 2.4694, + "step": 14409 + }, + { + "epoch": 1.1629408441610847, + "grad_norm": 0.6919918060302734, + "learning_rate": 3.650119862177548e-05, + "loss": 2.4793, + "step": 14410 + }, + { + "epoch": 1.1630215478976678, + "grad_norm": 0.6553575992584229, + "learning_rate": 3.6489003687460624e-05, + "loss": 2.454, + "step": 14411 + }, + { + "epoch": 1.1631022516342506, + "grad_norm": 0.7095460891723633, + "learning_rate": 3.6476810336005804e-05, + "loss": 2.4672, + "step": 14412 + }, + { + "epoch": 1.1631829553708337, + "grad_norm": 0.738301694393158, + "learning_rate": 3.6464618567714935e-05, + "loss": 2.4369, + "step": 14413 + }, + { + "epoch": 1.1632636591074166, + "grad_norm": 0.7574542760848999, + "learning_rate": 3.645242838289189e-05, + "loss": 2.4981, + "step": 14414 + }, + { + "epoch": 1.1633443628439997, + "grad_norm": 0.6780585646629333, + "learning_rate": 3.64402397818404e-05, + "loss": 2.4811, + "step": 14415 + }, + { + "epoch": 1.1634250665805828, + "grad_norm": 0.7050060629844666, + "learning_rate": 3.6428052764864287e-05, + "loss": 2.4607, + "step": 14416 + }, + { + "epoch": 1.1635057703171656, + "grad_norm": 0.6946923136711121, + "learning_rate": 3.6415867332267316e-05, + "loss": 2.4482, + "step": 14417 + }, + { + "epoch": 1.1635864740537487, + "grad_norm": 0.7202015519142151, + "learning_rate": 3.64036834843531e-05, + "loss": 2.4764, + "step": 14418 + }, + { + "epoch": 1.1636671777903316, + "grad_norm": 0.7845996618270874, + "learning_rate": 3.639150122142534e-05, + "loss": 2.4926, + "step": 14419 + }, + { + "epoch": 1.1637478815269147, + "grad_norm": 0.6924630403518677, + "learning_rate": 3.6379320543787645e-05, + "loss": 2.4664, + "step": 14420 + }, + { + "epoch": 1.1638285852634978, + "grad_norm": 0.7225920557975769, + "learning_rate": 3.636714145174358e-05, + "loss": 2.4638, + "step": 14421 + }, + { + "epoch": 1.1639092890000806, + "grad_norm": 0.6587103605270386, + "learning_rate": 3.63549639455967e-05, + "loss": 2.3629, + "step": 14422 + }, + { + "epoch": 1.1639899927366637, + "grad_norm": 0.7537658214569092, + "learning_rate": 3.634278802565051e-05, + "loss": 2.4971, + "step": 14423 + }, + { + "epoch": 1.1640706964732468, + "grad_norm": 0.6881381273269653, + "learning_rate": 3.633061369220841e-05, + "loss": 2.3737, + "step": 14424 + }, + { + "epoch": 1.1641514002098297, + "grad_norm": 0.693779468536377, + "learning_rate": 3.6318440945573864e-05, + "loss": 2.4346, + "step": 14425 + }, + { + "epoch": 1.1642321039464127, + "grad_norm": 0.777563750743866, + "learning_rate": 3.6306269786050265e-05, + "loss": 2.4288, + "step": 14426 + }, + { + "epoch": 1.1643128076829958, + "grad_norm": 0.6786738634109497, + "learning_rate": 3.629410021394087e-05, + "loss": 2.4094, + "step": 14427 + }, + { + "epoch": 1.1643935114195787, + "grad_norm": 0.7478442788124084, + "learning_rate": 3.628193222954904e-05, + "loss": 2.4163, + "step": 14428 + }, + { + "epoch": 1.1644742151561618, + "grad_norm": 0.6530766487121582, + "learning_rate": 3.626976583317803e-05, + "loss": 2.4328, + "step": 14429 + }, + { + "epoch": 1.1645549188927447, + "grad_norm": 0.6665371060371399, + "learning_rate": 3.6257601025131026e-05, + "loss": 2.4006, + "step": 14430 + }, + { + "epoch": 1.1646356226293277, + "grad_norm": 0.7184741497039795, + "learning_rate": 3.624543780571125e-05, + "loss": 2.462, + "step": 14431 + }, + { + "epoch": 1.1647163263659108, + "grad_norm": 0.7039462327957153, + "learning_rate": 3.6233276175221794e-05, + "loss": 2.4321, + "step": 14432 + }, + { + "epoch": 1.1647970301024937, + "grad_norm": 0.7039144039154053, + "learning_rate": 3.622111613396584e-05, + "loss": 2.4399, + "step": 14433 + }, + { + "epoch": 1.1648777338390768, + "grad_norm": 0.6690253615379333, + "learning_rate": 3.620895768224635e-05, + "loss": 2.3976, + "step": 14434 + }, + { + "epoch": 1.1649584375756596, + "grad_norm": 0.7048032879829407, + "learning_rate": 3.6196800820366384e-05, + "loss": 2.4848, + "step": 14435 + }, + { + "epoch": 1.1650391413122427, + "grad_norm": 0.668971836566925, + "learning_rate": 3.618464554862896e-05, + "loss": 2.4614, + "step": 14436 + }, + { + "epoch": 1.1651198450488258, + "grad_norm": 0.704858660697937, + "learning_rate": 3.617249186733695e-05, + "loss": 2.3962, + "step": 14437 + }, + { + "epoch": 1.1652005487854087, + "grad_norm": 0.692435085773468, + "learning_rate": 3.6160339776793296e-05, + "loss": 2.4059, + "step": 14438 + }, + { + "epoch": 1.1652812525219918, + "grad_norm": 0.6774182319641113, + "learning_rate": 3.614818927730085e-05, + "loss": 2.4975, + "step": 14439 + }, + { + "epoch": 1.1653619562585749, + "grad_norm": 0.6507411003112793, + "learning_rate": 3.613604036916243e-05, + "loss": 2.5029, + "step": 14440 + }, + { + "epoch": 1.1654426599951577, + "grad_norm": 0.7223206162452698, + "learning_rate": 3.612389305268084e-05, + "loss": 2.4599, + "step": 14441 + }, + { + "epoch": 1.1655233637317408, + "grad_norm": 0.6523364186286926, + "learning_rate": 3.611174732815883e-05, + "loss": 2.4521, + "step": 14442 + }, + { + "epoch": 1.165604067468324, + "grad_norm": 0.6668452024459839, + "learning_rate": 3.6099603195899046e-05, + "loss": 2.4082, + "step": 14443 + }, + { + "epoch": 1.1656847712049068, + "grad_norm": 0.6878299117088318, + "learning_rate": 3.60874606562042e-05, + "loss": 2.4144, + "step": 14444 + }, + { + "epoch": 1.1657654749414899, + "grad_norm": 0.6662277579307556, + "learning_rate": 3.6075319709376895e-05, + "loss": 2.438, + "step": 14445 + }, + { + "epoch": 1.1658461786780727, + "grad_norm": 0.721422553062439, + "learning_rate": 3.606318035571976e-05, + "loss": 2.4414, + "step": 14446 + }, + { + "epoch": 1.1659268824146558, + "grad_norm": 0.6739782691001892, + "learning_rate": 3.6051042595535264e-05, + "loss": 2.4093, + "step": 14447 + }, + { + "epoch": 1.166007586151239, + "grad_norm": 0.6890884637832642, + "learning_rate": 3.603890642912596e-05, + "loss": 2.4385, + "step": 14448 + }, + { + "epoch": 1.1660882898878218, + "grad_norm": 0.6503998637199402, + "learning_rate": 3.602677185679433e-05, + "loss": 2.4498, + "step": 14449 + }, + { + "epoch": 1.1661689936244048, + "grad_norm": 0.6748046875, + "learning_rate": 3.601463887884271e-05, + "loss": 2.3739, + "step": 14450 + }, + { + "epoch": 1.1662496973609877, + "grad_norm": 0.6843422651290894, + "learning_rate": 3.600250749557358e-05, + "loss": 2.4323, + "step": 14451 + }, + { + "epoch": 1.1663304010975708, + "grad_norm": 0.7061208486557007, + "learning_rate": 3.599037770728929e-05, + "loss": 2.4611, + "step": 14452 + }, + { + "epoch": 1.166411104834154, + "grad_norm": 0.6614537239074707, + "learning_rate": 3.597824951429208e-05, + "loss": 2.4656, + "step": 14453 + }, + { + "epoch": 1.1664918085707368, + "grad_norm": 0.6620328426361084, + "learning_rate": 3.596612291688424e-05, + "loss": 2.415, + "step": 14454 + }, + { + "epoch": 1.1665725123073198, + "grad_norm": 0.6936565041542053, + "learning_rate": 3.595399791536804e-05, + "loss": 2.4655, + "step": 14455 + }, + { + "epoch": 1.166653216043903, + "grad_norm": 0.6766063570976257, + "learning_rate": 3.594187451004559e-05, + "loss": 2.4628, + "step": 14456 + }, + { + "epoch": 1.1667339197804858, + "grad_norm": 0.6588734984397888, + "learning_rate": 3.592975270121909e-05, + "loss": 2.4503, + "step": 14457 + }, + { + "epoch": 1.1668146235170689, + "grad_norm": 0.7290894985198975, + "learning_rate": 3.591763248919062e-05, + "loss": 2.5075, + "step": 14458 + }, + { + "epoch": 1.1668953272536517, + "grad_norm": 0.6952784657478333, + "learning_rate": 3.590551387426231e-05, + "loss": 2.4258, + "step": 14459 + }, + { + "epoch": 1.1669760309902348, + "grad_norm": 0.6737042665481567, + "learning_rate": 3.5893396856736096e-05, + "loss": 2.4459, + "step": 14460 + }, + { + "epoch": 1.167056734726818, + "grad_norm": 0.6616976857185364, + "learning_rate": 3.588128143691397e-05, + "loss": 2.4726, + "step": 14461 + }, + { + "epoch": 1.1671374384634008, + "grad_norm": 0.7017171382904053, + "learning_rate": 3.5869167615098e-05, + "loss": 2.375, + "step": 14462 + }, + { + "epoch": 1.1672181421999839, + "grad_norm": 0.7153809666633606, + "learning_rate": 3.585705539158997e-05, + "loss": 2.4271, + "step": 14463 + }, + { + "epoch": 1.1672988459365667, + "grad_norm": 0.749196469783783, + "learning_rate": 3.584494476669179e-05, + "loss": 2.4713, + "step": 14464 + }, + { + "epoch": 1.1673795496731498, + "grad_norm": 0.6593676209449768, + "learning_rate": 3.583283574070533e-05, + "loss": 2.4276, + "step": 14465 + }, + { + "epoch": 1.167460253409733, + "grad_norm": 0.6949084401130676, + "learning_rate": 3.5820728313932295e-05, + "loss": 2.4128, + "step": 14466 + }, + { + "epoch": 1.1675409571463158, + "grad_norm": 0.6795482039451599, + "learning_rate": 3.5808622486674484e-05, + "loss": 2.485, + "step": 14467 + }, + { + "epoch": 1.1676216608828989, + "grad_norm": 0.6763483881950378, + "learning_rate": 3.5796518259233625e-05, + "loss": 2.4063, + "step": 14468 + }, + { + "epoch": 1.167702364619482, + "grad_norm": 0.665687620639801, + "learning_rate": 3.578441563191133e-05, + "loss": 2.437, + "step": 14469 + }, + { + "epoch": 1.1677830683560648, + "grad_norm": 0.6338435411453247, + "learning_rate": 3.577231460500926e-05, + "loss": 2.3747, + "step": 14470 + }, + { + "epoch": 1.167863772092648, + "grad_norm": 0.7031865119934082, + "learning_rate": 3.5760215178829e-05, + "loss": 2.3952, + "step": 14471 + }, + { + "epoch": 1.167944475829231, + "grad_norm": 0.7544599771499634, + "learning_rate": 3.5748117353672106e-05, + "loss": 2.3941, + "step": 14472 + }, + { + "epoch": 1.1680251795658139, + "grad_norm": 0.7271532416343689, + "learning_rate": 3.5736021129840083e-05, + "loss": 2.4371, + "step": 14473 + }, + { + "epoch": 1.168105883302397, + "grad_norm": 0.709048867225647, + "learning_rate": 3.572392650763441e-05, + "loss": 2.482, + "step": 14474 + }, + { + "epoch": 1.1681865870389798, + "grad_norm": 0.6894589066505432, + "learning_rate": 3.571183348735653e-05, + "loss": 2.4347, + "step": 14475 + }, + { + "epoch": 1.168267290775563, + "grad_norm": 0.6680620908737183, + "learning_rate": 3.5699742069307774e-05, + "loss": 2.3995, + "step": 14476 + }, + { + "epoch": 1.168347994512146, + "grad_norm": 0.701669454574585, + "learning_rate": 3.568765225378954e-05, + "loss": 2.4045, + "step": 14477 + }, + { + "epoch": 1.1684286982487289, + "grad_norm": 0.7102392911911011, + "learning_rate": 3.567556404110315e-05, + "loss": 2.4695, + "step": 14478 + }, + { + "epoch": 1.168509401985312, + "grad_norm": 0.6820430755615234, + "learning_rate": 3.566347743154982e-05, + "loss": 2.4155, + "step": 14479 + }, + { + "epoch": 1.1685901057218948, + "grad_norm": 0.6611022353172302, + "learning_rate": 3.565139242543081e-05, + "loss": 2.3992, + "step": 14480 + }, + { + "epoch": 1.168670809458478, + "grad_norm": 0.6844382882118225, + "learning_rate": 3.5639309023047306e-05, + "loss": 2.4345, + "step": 14481 + }, + { + "epoch": 1.168751513195061, + "grad_norm": 0.7557988166809082, + "learning_rate": 3.5627227224700464e-05, + "loss": 2.4454, + "step": 14482 + }, + { + "epoch": 1.1688322169316439, + "grad_norm": 0.6652555465698242, + "learning_rate": 3.5615147030691384e-05, + "loss": 2.3749, + "step": 14483 + }, + { + "epoch": 1.168912920668227, + "grad_norm": 0.6912989020347595, + "learning_rate": 3.56030684413212e-05, + "loss": 2.4737, + "step": 14484 + }, + { + "epoch": 1.16899362440481, + "grad_norm": 0.735103964805603, + "learning_rate": 3.559099145689083e-05, + "loss": 2.4098, + "step": 14485 + }, + { + "epoch": 1.169074328141393, + "grad_norm": 0.6873028874397278, + "learning_rate": 3.557891607770133e-05, + "loss": 2.4247, + "step": 14486 + }, + { + "epoch": 1.169155031877976, + "grad_norm": 0.7364680171012878, + "learning_rate": 3.556684230405367e-05, + "loss": 2.4314, + "step": 14487 + }, + { + "epoch": 1.169235735614559, + "grad_norm": 0.679122269153595, + "learning_rate": 3.55547701362487e-05, + "loss": 2.4196, + "step": 14488 + }, + { + "epoch": 1.169316439351142, + "grad_norm": 0.6783872246742249, + "learning_rate": 3.554269957458731e-05, + "loss": 2.4212, + "step": 14489 + }, + { + "epoch": 1.169397143087725, + "grad_norm": 0.7434942126274109, + "learning_rate": 3.553063061937034e-05, + "loss": 2.4139, + "step": 14490 + }, + { + "epoch": 1.1694778468243079, + "grad_norm": 0.6799852252006531, + "learning_rate": 3.55185632708986e-05, + "loss": 2.4252, + "step": 14491 + }, + { + "epoch": 1.169558550560891, + "grad_norm": 0.7040107250213623, + "learning_rate": 3.5506497529472795e-05, + "loss": 2.3937, + "step": 14492 + }, + { + "epoch": 1.169639254297474, + "grad_norm": 0.7350315451622009, + "learning_rate": 3.549443339539368e-05, + "loss": 2.4063, + "step": 14493 + }, + { + "epoch": 1.169719958034057, + "grad_norm": 0.694521963596344, + "learning_rate": 3.548237086896192e-05, + "loss": 2.4715, + "step": 14494 + }, + { + "epoch": 1.16980066177064, + "grad_norm": 0.6648221015930176, + "learning_rate": 3.5470309950478096e-05, + "loss": 2.4365, + "step": 14495 + }, + { + "epoch": 1.1698813655072229, + "grad_norm": 0.688024640083313, + "learning_rate": 3.545825064024284e-05, + "loss": 2.449, + "step": 14496 + }, + { + "epoch": 1.169962069243806, + "grad_norm": 0.6743311882019043, + "learning_rate": 3.544619293855672e-05, + "loss": 2.4283, + "step": 14497 + }, + { + "epoch": 1.170042772980389, + "grad_norm": 0.669119119644165, + "learning_rate": 3.543413684572019e-05, + "loss": 2.4363, + "step": 14498 + }, + { + "epoch": 1.170123476716972, + "grad_norm": 0.6998667120933533, + "learning_rate": 3.5422082362033745e-05, + "loss": 2.425, + "step": 14499 + }, + { + "epoch": 1.170204180453555, + "grad_norm": 0.7681630253791809, + "learning_rate": 3.5410029487797845e-05, + "loss": 2.4382, + "step": 14500 + }, + { + "epoch": 1.170284884190138, + "grad_norm": 0.6925049424171448, + "learning_rate": 3.539797822331279e-05, + "loss": 2.4261, + "step": 14501 + }, + { + "epoch": 1.170365587926721, + "grad_norm": 0.7145542502403259, + "learning_rate": 3.538592856887901e-05, + "loss": 2.4681, + "step": 14502 + }, + { + "epoch": 1.170446291663304, + "grad_norm": 0.6441611647605896, + "learning_rate": 3.537388052479684e-05, + "loss": 2.4187, + "step": 14503 + }, + { + "epoch": 1.1705269953998871, + "grad_norm": 0.6622560620307922, + "learning_rate": 3.5361834091366466e-05, + "loss": 2.4615, + "step": 14504 + }, + { + "epoch": 1.17060769913647, + "grad_norm": 0.6987677812576294, + "learning_rate": 3.5349789268888144e-05, + "loss": 2.413, + "step": 14505 + }, + { + "epoch": 1.170688402873053, + "grad_norm": 0.668358325958252, + "learning_rate": 3.533774605766207e-05, + "loss": 2.5146, + "step": 14506 + }, + { + "epoch": 1.170769106609636, + "grad_norm": 0.7514958381652832, + "learning_rate": 3.532570445798844e-05, + "loss": 2.4474, + "step": 14507 + }, + { + "epoch": 1.170849810346219, + "grad_norm": 0.6454465389251709, + "learning_rate": 3.5313664470167276e-05, + "loss": 2.3911, + "step": 14508 + }, + { + "epoch": 1.170930514082802, + "grad_norm": 0.6653602719306946, + "learning_rate": 3.5301626094498674e-05, + "loss": 2.4223, + "step": 14509 + }, + { + "epoch": 1.171011217819385, + "grad_norm": 0.6782815456390381, + "learning_rate": 3.5289589331282715e-05, + "loss": 2.457, + "step": 14510 + }, + { + "epoch": 1.171091921555968, + "grad_norm": 0.720973014831543, + "learning_rate": 3.527755418081932e-05, + "loss": 2.4541, + "step": 14511 + }, + { + "epoch": 1.171172625292551, + "grad_norm": 0.6300156712532043, + "learning_rate": 3.526552064340841e-05, + "loss": 2.4451, + "step": 14512 + }, + { + "epoch": 1.171253329029134, + "grad_norm": 0.7660964727401733, + "learning_rate": 3.5253488719350026e-05, + "loss": 2.5031, + "step": 14513 + }, + { + "epoch": 1.1713340327657171, + "grad_norm": 0.6931602358818054, + "learning_rate": 3.5241458408943905e-05, + "loss": 2.4249, + "step": 14514 + }, + { + "epoch": 1.1714147365023, + "grad_norm": 0.6863045692443848, + "learning_rate": 3.522942971248993e-05, + "loss": 2.4429, + "step": 14515 + }, + { + "epoch": 1.171495440238883, + "grad_norm": 0.6993531584739685, + "learning_rate": 3.521740263028791e-05, + "loss": 2.3864, + "step": 14516 + }, + { + "epoch": 1.1715761439754662, + "grad_norm": 0.807991087436676, + "learning_rate": 3.520537716263753e-05, + "loss": 2.459, + "step": 14517 + }, + { + "epoch": 1.171656847712049, + "grad_norm": 0.6722908020019531, + "learning_rate": 3.519335330983852e-05, + "loss": 2.4426, + "step": 14518 + }, + { + "epoch": 1.1717375514486321, + "grad_norm": 0.6934377551078796, + "learning_rate": 3.5181331072190585e-05, + "loss": 2.4326, + "step": 14519 + }, + { + "epoch": 1.171818255185215, + "grad_norm": 0.6532938480377197, + "learning_rate": 3.516931044999329e-05, + "loss": 2.3778, + "step": 14520 + }, + { + "epoch": 1.171898958921798, + "grad_norm": 0.6779183745384216, + "learning_rate": 3.5157291443546247e-05, + "loss": 2.4089, + "step": 14521 + }, + { + "epoch": 1.1719796626583812, + "grad_norm": 0.687005877494812, + "learning_rate": 3.514527405314899e-05, + "loss": 2.4669, + "step": 14522 + }, + { + "epoch": 1.172060366394964, + "grad_norm": 0.6804830431938171, + "learning_rate": 3.5133258279101045e-05, + "loss": 2.4789, + "step": 14523 + }, + { + "epoch": 1.1721410701315471, + "grad_norm": 0.8345538973808289, + "learning_rate": 3.512124412170187e-05, + "loss": 2.4506, + "step": 14524 + }, + { + "epoch": 1.17222177386813, + "grad_norm": 0.6571901440620422, + "learning_rate": 3.510923158125088e-05, + "loss": 2.4911, + "step": 14525 + }, + { + "epoch": 1.172302477604713, + "grad_norm": 0.6607047915458679, + "learning_rate": 3.5097220658047504e-05, + "loss": 2.4882, + "step": 14526 + }, + { + "epoch": 1.1723831813412962, + "grad_norm": 0.6883669495582581, + "learning_rate": 3.508521135239101e-05, + "loss": 2.4083, + "step": 14527 + }, + { + "epoch": 1.172463885077879, + "grad_norm": 0.6792941689491272, + "learning_rate": 3.5073203664580746e-05, + "loss": 2.368, + "step": 14528 + }, + { + "epoch": 1.172544588814462, + "grad_norm": 0.6675198674201965, + "learning_rate": 3.506119759491598e-05, + "loss": 2.4193, + "step": 14529 + }, + { + "epoch": 1.1726252925510452, + "grad_norm": 0.7267464399337769, + "learning_rate": 3.504919314369591e-05, + "loss": 2.3906, + "step": 14530 + }, + { + "epoch": 1.172705996287628, + "grad_norm": 0.6927710175514221, + "learning_rate": 3.503719031121973e-05, + "loss": 2.4082, + "step": 14531 + }, + { + "epoch": 1.1727867000242111, + "grad_norm": 0.7231000065803528, + "learning_rate": 3.502518909778656e-05, + "loss": 2.4845, + "step": 14532 + }, + { + "epoch": 1.1728674037607942, + "grad_norm": 0.7087520360946655, + "learning_rate": 3.5013189503695544e-05, + "loss": 2.4622, + "step": 14533 + }, + { + "epoch": 1.172948107497377, + "grad_norm": 0.6669846177101135, + "learning_rate": 3.5001191529245716e-05, + "loss": 2.4151, + "step": 14534 + }, + { + "epoch": 1.1730288112339602, + "grad_norm": 0.7338447570800781, + "learning_rate": 3.4989195174736134e-05, + "loss": 2.4274, + "step": 14535 + }, + { + "epoch": 1.173109514970543, + "grad_norm": 0.7032054662704468, + "learning_rate": 3.497720044046572e-05, + "loss": 2.4066, + "step": 14536 + }, + { + "epoch": 1.1731902187071261, + "grad_norm": 0.6571083068847656, + "learning_rate": 3.496520732673344e-05, + "loss": 2.4581, + "step": 14537 + }, + { + "epoch": 1.1732709224437092, + "grad_norm": 0.6618444919586182, + "learning_rate": 3.495321583383819e-05, + "loss": 2.3675, + "step": 14538 + }, + { + "epoch": 1.173351626180292, + "grad_norm": 0.6597652435302734, + "learning_rate": 3.4941225962078885e-05, + "loss": 2.416, + "step": 14539 + }, + { + "epoch": 1.1734323299168752, + "grad_norm": 0.682634711265564, + "learning_rate": 3.492923771175425e-05, + "loss": 2.5081, + "step": 14540 + }, + { + "epoch": 1.173513033653458, + "grad_norm": 0.7046132683753967, + "learning_rate": 3.49172510831631e-05, + "loss": 2.4439, + "step": 14541 + }, + { + "epoch": 1.1735937373900411, + "grad_norm": 0.6734833717346191, + "learning_rate": 3.4905266076604196e-05, + "loss": 2.4348, + "step": 14542 + }, + { + "epoch": 1.1736744411266242, + "grad_norm": 0.6624744534492493, + "learning_rate": 3.4893282692376214e-05, + "loss": 2.4364, + "step": 14543 + }, + { + "epoch": 1.173755144863207, + "grad_norm": 0.8425754308700562, + "learning_rate": 3.4881300930777815e-05, + "loss": 2.4803, + "step": 14544 + }, + { + "epoch": 1.1738358485997902, + "grad_norm": 0.6438888311386108, + "learning_rate": 3.486932079210766e-05, + "loss": 2.3973, + "step": 14545 + }, + { + "epoch": 1.1739165523363733, + "grad_norm": 0.650399923324585, + "learning_rate": 3.485734227666424e-05, + "loss": 2.4183, + "step": 14546 + }, + { + "epoch": 1.1739972560729561, + "grad_norm": 0.6857002973556519, + "learning_rate": 3.4845365384746144e-05, + "loss": 2.4061, + "step": 14547 + }, + { + "epoch": 1.1740779598095392, + "grad_norm": 0.6680994629859924, + "learning_rate": 3.483339011665189e-05, + "loss": 2.421, + "step": 14548 + }, + { + "epoch": 1.1741586635461223, + "grad_norm": 0.6440950632095337, + "learning_rate": 3.482141647267987e-05, + "loss": 2.3914, + "step": 14549 + }, + { + "epoch": 1.1742393672827052, + "grad_norm": 0.7329740524291992, + "learning_rate": 3.480944445312853e-05, + "loss": 2.4805, + "step": 14550 + }, + { + "epoch": 1.1743200710192883, + "grad_norm": 0.6848189234733582, + "learning_rate": 3.4797474058296245e-05, + "loss": 2.3611, + "step": 14551 + }, + { + "epoch": 1.1744007747558711, + "grad_norm": 0.6994072794914246, + "learning_rate": 3.478550528848134e-05, + "loss": 2.5106, + "step": 14552 + }, + { + "epoch": 1.1744814784924542, + "grad_norm": 0.6826444268226624, + "learning_rate": 3.477353814398212e-05, + "loss": 2.467, + "step": 14553 + }, + { + "epoch": 1.1745621822290373, + "grad_norm": 0.6658408045768738, + "learning_rate": 3.476157262509683e-05, + "loss": 2.423, + "step": 14554 + }, + { + "epoch": 1.1746428859656202, + "grad_norm": 0.6963697075843811, + "learning_rate": 3.474960873212372e-05, + "loss": 2.457, + "step": 14555 + }, + { + "epoch": 1.1747235897022033, + "grad_norm": 0.7574479579925537, + "learning_rate": 3.4737646465360894e-05, + "loss": 2.4292, + "step": 14556 + }, + { + "epoch": 1.1748042934387861, + "grad_norm": 0.7494931817054749, + "learning_rate": 3.472568582510652e-05, + "loss": 2.4395, + "step": 14557 + }, + { + "epoch": 1.1748849971753692, + "grad_norm": 0.7062687873840332, + "learning_rate": 3.471372681165872e-05, + "loss": 2.4561, + "step": 14558 + }, + { + "epoch": 1.1749657009119523, + "grad_norm": 0.6875349879264832, + "learning_rate": 3.4701769425315465e-05, + "loss": 2.4728, + "step": 14559 + }, + { + "epoch": 1.1750464046485352, + "grad_norm": 0.7009960412979126, + "learning_rate": 3.46898136663748e-05, + "loss": 2.5364, + "step": 14560 + }, + { + "epoch": 1.1751271083851182, + "grad_norm": 0.673791766166687, + "learning_rate": 3.467785953513475e-05, + "loss": 2.4611, + "step": 14561 + }, + { + "epoch": 1.1752078121217013, + "grad_norm": 0.7166882753372192, + "learning_rate": 3.4665907031893164e-05, + "loss": 2.4451, + "step": 14562 + }, + { + "epoch": 1.1752885158582842, + "grad_norm": 0.6868429780006409, + "learning_rate": 3.465395615694791e-05, + "loss": 2.4282, + "step": 14563 + }, + { + "epoch": 1.1753692195948673, + "grad_norm": 0.7212893962860107, + "learning_rate": 3.464200691059697e-05, + "loss": 2.4239, + "step": 14564 + }, + { + "epoch": 1.1754499233314502, + "grad_norm": 0.7213432192802429, + "learning_rate": 3.463005929313802e-05, + "loss": 2.4872, + "step": 14565 + }, + { + "epoch": 1.1755306270680332, + "grad_norm": 0.6805179119110107, + "learning_rate": 3.461811330486887e-05, + "loss": 2.4192, + "step": 14566 + }, + { + "epoch": 1.1756113308046163, + "grad_norm": 0.6746333241462708, + "learning_rate": 3.460616894608725e-05, + "loss": 2.3911, + "step": 14567 + }, + { + "epoch": 1.1756920345411992, + "grad_norm": 0.7388630509376526, + "learning_rate": 3.459422621709088e-05, + "loss": 2.4758, + "step": 14568 + }, + { + "epoch": 1.1757727382777823, + "grad_norm": 0.7730274200439453, + "learning_rate": 3.458228511817731e-05, + "loss": 2.4159, + "step": 14569 + }, + { + "epoch": 1.1758534420143651, + "grad_norm": 0.721075177192688, + "learning_rate": 3.457034564964422e-05, + "loss": 2.4673, + "step": 14570 + }, + { + "epoch": 1.1759341457509482, + "grad_norm": 0.6647645235061646, + "learning_rate": 3.4558407811789184e-05, + "loss": 2.395, + "step": 14571 + }, + { + "epoch": 1.1760148494875313, + "grad_norm": 0.7155466675758362, + "learning_rate": 3.454647160490965e-05, + "loss": 2.503, + "step": 14572 + }, + { + "epoch": 1.1760955532241142, + "grad_norm": 0.6789268851280212, + "learning_rate": 3.453453702930314e-05, + "loss": 2.401, + "step": 14573 + }, + { + "epoch": 1.1761762569606973, + "grad_norm": 0.7488093376159668, + "learning_rate": 3.4522604085267105e-05, + "loss": 2.4434, + "step": 14574 + }, + { + "epoch": 1.1762569606972804, + "grad_norm": 0.7954889535903931, + "learning_rate": 3.451067277309893e-05, + "loss": 2.5302, + "step": 14575 + }, + { + "epoch": 1.1763376644338632, + "grad_norm": 0.7008484601974487, + "learning_rate": 3.4498743093095975e-05, + "loss": 2.3935, + "step": 14576 + }, + { + "epoch": 1.1764183681704463, + "grad_norm": 0.6725437641143799, + "learning_rate": 3.448681504555561e-05, + "loss": 2.399, + "step": 14577 + }, + { + "epoch": 1.1764990719070294, + "grad_norm": 0.6778931617736816, + "learning_rate": 3.4474888630775026e-05, + "loss": 2.4178, + "step": 14578 + }, + { + "epoch": 1.1765797756436123, + "grad_norm": 0.7043762803077698, + "learning_rate": 3.44629638490515e-05, + "loss": 2.5581, + "step": 14579 + }, + { + "epoch": 1.1766604793801954, + "grad_norm": 0.6848085522651672, + "learning_rate": 3.445104070068227e-05, + "loss": 2.436, + "step": 14580 + }, + { + "epoch": 1.1767411831167782, + "grad_norm": 0.7504082322120667, + "learning_rate": 3.443911918596441e-05, + "loss": 2.4138, + "step": 14581 + }, + { + "epoch": 1.1768218868533613, + "grad_norm": 0.7441161870956421, + "learning_rate": 3.442719930519508e-05, + "loss": 2.4333, + "step": 14582 + }, + { + "epoch": 1.1769025905899444, + "grad_norm": 0.663894772529602, + "learning_rate": 3.4415281058671354e-05, + "loss": 2.4672, + "step": 14583 + }, + { + "epoch": 1.1769832943265273, + "grad_norm": 0.6814345121383667, + "learning_rate": 3.440336444669027e-05, + "loss": 2.4196, + "step": 14584 + }, + { + "epoch": 1.1770639980631104, + "grad_norm": 0.7566598057746887, + "learning_rate": 3.439144946954881e-05, + "loss": 2.4586, + "step": 14585 + }, + { + "epoch": 1.1771447017996932, + "grad_norm": 0.7324996590614319, + "learning_rate": 3.4379536127543934e-05, + "loss": 2.4286, + "step": 14586 + }, + { + "epoch": 1.1772254055362763, + "grad_norm": 0.6632608771324158, + "learning_rate": 3.436762442097259e-05, + "loss": 2.4713, + "step": 14587 + }, + { + "epoch": 1.1773061092728594, + "grad_norm": 0.7246156930923462, + "learning_rate": 3.4355714350131564e-05, + "loss": 2.4374, + "step": 14588 + }, + { + "epoch": 1.1773868130094423, + "grad_norm": 0.7096351981163025, + "learning_rate": 3.4343805915317737e-05, + "loss": 2.4649, + "step": 14589 + }, + { + "epoch": 1.1774675167460253, + "grad_norm": 0.7090620398521423, + "learning_rate": 3.433189911682793e-05, + "loss": 2.396, + "step": 14590 + }, + { + "epoch": 1.1775482204826084, + "grad_norm": 0.7782440185546875, + "learning_rate": 3.431999395495882e-05, + "loss": 2.4506, + "step": 14591 + }, + { + "epoch": 1.1776289242191913, + "grad_norm": 0.6933457851409912, + "learning_rate": 3.4308090430007155e-05, + "loss": 2.3985, + "step": 14592 + }, + { + "epoch": 1.1777096279557744, + "grad_norm": 0.6935414671897888, + "learning_rate": 3.429618854226959e-05, + "loss": 2.4372, + "step": 14593 + }, + { + "epoch": 1.1777903316923575, + "grad_norm": 0.6971156597137451, + "learning_rate": 3.428428829204276e-05, + "loss": 2.4837, + "step": 14594 + }, + { + "epoch": 1.1778710354289403, + "grad_norm": 0.6460022926330566, + "learning_rate": 3.427238967962325e-05, + "loss": 2.3742, + "step": 14595 + }, + { + "epoch": 1.1779517391655234, + "grad_norm": 0.6941941976547241, + "learning_rate": 3.426049270530763e-05, + "loss": 2.4706, + "step": 14596 + }, + { + "epoch": 1.1780324429021063, + "grad_norm": 0.7062166333198547, + "learning_rate": 3.424859736939236e-05, + "loss": 2.3893, + "step": 14597 + }, + { + "epoch": 1.1781131466386894, + "grad_norm": 0.6586433053016663, + "learning_rate": 3.42367036721739e-05, + "loss": 2.4385, + "step": 14598 + }, + { + "epoch": 1.1781938503752725, + "grad_norm": 0.6781242489814758, + "learning_rate": 3.422481161394869e-05, + "loss": 2.3876, + "step": 14599 + }, + { + "epoch": 1.1782745541118553, + "grad_norm": 0.710127592086792, + "learning_rate": 3.421292119501316e-05, + "loss": 2.4067, + "step": 14600 + }, + { + "epoch": 1.1783552578484384, + "grad_norm": 0.6856096982955933, + "learning_rate": 3.420103241566357e-05, + "loss": 2.4855, + "step": 14601 + }, + { + "epoch": 1.1784359615850213, + "grad_norm": 0.7173380851745605, + "learning_rate": 3.4189145276196245e-05, + "loss": 2.4871, + "step": 14602 + }, + { + "epoch": 1.1785166653216044, + "grad_norm": 0.6895382404327393, + "learning_rate": 3.417725977690745e-05, + "loss": 2.4066, + "step": 14603 + }, + { + "epoch": 1.1785973690581875, + "grad_norm": 0.7417690753936768, + "learning_rate": 3.416537591809341e-05, + "loss": 2.3779, + "step": 14604 + }, + { + "epoch": 1.1786780727947703, + "grad_norm": 0.7258411049842834, + "learning_rate": 3.4153493700050286e-05, + "loss": 2.4334, + "step": 14605 + }, + { + "epoch": 1.1787587765313534, + "grad_norm": 0.65704345703125, + "learning_rate": 3.414161312307427e-05, + "loss": 2.4531, + "step": 14606 + }, + { + "epoch": 1.1788394802679365, + "grad_norm": 0.6937118172645569, + "learning_rate": 3.4129734187461374e-05, + "loss": 2.4562, + "step": 14607 + }, + { + "epoch": 1.1789201840045194, + "grad_norm": 0.7331998348236084, + "learning_rate": 3.411785689350768e-05, + "loss": 2.4418, + "step": 14608 + }, + { + "epoch": 1.1790008877411025, + "grad_norm": 0.666582465171814, + "learning_rate": 3.410598124150924e-05, + "loss": 2.4154, + "step": 14609 + }, + { + "epoch": 1.1790815914776853, + "grad_norm": 0.6684321165084839, + "learning_rate": 3.409410723176197e-05, + "loss": 2.4155, + "step": 14610 + }, + { + "epoch": 1.1791622952142684, + "grad_norm": 0.6413382291793823, + "learning_rate": 3.408223486456184e-05, + "loss": 2.3924, + "step": 14611 + }, + { + "epoch": 1.1792429989508515, + "grad_norm": 0.7081305384635925, + "learning_rate": 3.407036414020475e-05, + "loss": 2.3811, + "step": 14612 + }, + { + "epoch": 1.1793237026874344, + "grad_norm": 0.7550063133239746, + "learning_rate": 3.405849505898645e-05, + "loss": 2.4425, + "step": 14613 + }, + { + "epoch": 1.1794044064240174, + "grad_norm": 0.677200198173523, + "learning_rate": 3.404662762120288e-05, + "loss": 2.5182, + "step": 14614 + }, + { + "epoch": 1.1794851101606003, + "grad_norm": 0.6829770803451538, + "learning_rate": 3.4034761827149745e-05, + "loss": 2.5068, + "step": 14615 + }, + { + "epoch": 1.1795658138971834, + "grad_norm": 0.7069409489631653, + "learning_rate": 3.4022897677122815e-05, + "loss": 2.4449, + "step": 14616 + }, + { + "epoch": 1.1796465176337665, + "grad_norm": 0.6604448556900024, + "learning_rate": 3.4011035171417696e-05, + "loss": 2.3996, + "step": 14617 + }, + { + "epoch": 1.1797272213703494, + "grad_norm": 0.6577324271202087, + "learning_rate": 3.3999174310330084e-05, + "loss": 2.4723, + "step": 14618 + }, + { + "epoch": 1.1798079251069324, + "grad_norm": 0.8159187436103821, + "learning_rate": 3.398731509415561e-05, + "loss": 2.4655, + "step": 14619 + }, + { + "epoch": 1.1798886288435155, + "grad_norm": 0.7170652747154236, + "learning_rate": 3.397545752318977e-05, + "loss": 2.5095, + "step": 14620 + }, + { + "epoch": 1.1799693325800984, + "grad_norm": 0.6865009665489197, + "learning_rate": 3.396360159772812e-05, + "loss": 2.4358, + "step": 14621 + }, + { + "epoch": 1.1800500363166815, + "grad_norm": 0.6485020518302917, + "learning_rate": 3.3951747318066175e-05, + "loss": 2.4576, + "step": 14622 + }, + { + "epoch": 1.1801307400532646, + "grad_norm": 0.6626582145690918, + "learning_rate": 3.39398946844993e-05, + "loss": 2.4824, + "step": 14623 + }, + { + "epoch": 1.1802114437898474, + "grad_norm": 0.718588650226593, + "learning_rate": 3.392804369732293e-05, + "loss": 2.4211, + "step": 14624 + }, + { + "epoch": 1.1802921475264305, + "grad_norm": 0.7449582815170288, + "learning_rate": 3.391619435683243e-05, + "loss": 2.444, + "step": 14625 + }, + { + "epoch": 1.1803728512630134, + "grad_norm": 0.6988492012023926, + "learning_rate": 3.3904346663323115e-05, + "loss": 2.4262, + "step": 14626 + }, + { + "epoch": 1.1804535549995965, + "grad_norm": 0.6779490113258362, + "learning_rate": 3.389250061709025e-05, + "loss": 2.4751, + "step": 14627 + }, + { + "epoch": 1.1805342587361796, + "grad_norm": 0.6883673667907715, + "learning_rate": 3.388065621842912e-05, + "loss": 2.4995, + "step": 14628 + }, + { + "epoch": 1.1806149624727624, + "grad_norm": 0.7112017273902893, + "learning_rate": 3.386881346763483e-05, + "loss": 2.4181, + "step": 14629 + }, + { + "epoch": 1.1806956662093455, + "grad_norm": 0.6960459351539612, + "learning_rate": 3.385697236500258e-05, + "loss": 2.4888, + "step": 14630 + }, + { + "epoch": 1.1807763699459284, + "grad_norm": 0.6874156594276428, + "learning_rate": 3.3845132910827484e-05, + "loss": 2.4175, + "step": 14631 + }, + { + "epoch": 1.1808570736825115, + "grad_norm": 0.7075642347335815, + "learning_rate": 3.383329510540463e-05, + "loss": 2.4315, + "step": 14632 + }, + { + "epoch": 1.1809377774190946, + "grad_norm": 0.674907386302948, + "learning_rate": 3.3821458949028995e-05, + "loss": 2.4216, + "step": 14633 + }, + { + "epoch": 1.1810184811556774, + "grad_norm": 0.7008463740348816, + "learning_rate": 3.380962444199559e-05, + "loss": 2.4114, + "step": 14634 + }, + { + "epoch": 1.1810991848922605, + "grad_norm": 0.6784217953681946, + "learning_rate": 3.379779158459937e-05, + "loss": 2.3663, + "step": 14635 + }, + { + "epoch": 1.1811798886288436, + "grad_norm": 0.7174829244613647, + "learning_rate": 3.378596037713525e-05, + "loss": 2.4582, + "step": 14636 + }, + { + "epoch": 1.1812605923654265, + "grad_norm": 0.7106035947799683, + "learning_rate": 3.3774130819898065e-05, + "loss": 2.5095, + "step": 14637 + }, + { + "epoch": 1.1813412961020096, + "grad_norm": 0.809107780456543, + "learning_rate": 3.3762302913182696e-05, + "loss": 2.4942, + "step": 14638 + }, + { + "epoch": 1.1814219998385926, + "grad_norm": 0.7150272727012634, + "learning_rate": 3.375047665728386e-05, + "loss": 2.378, + "step": 14639 + }, + { + "epoch": 1.1815027035751755, + "grad_norm": 0.7016271352767944, + "learning_rate": 3.373865205249632e-05, + "loss": 2.4393, + "step": 14640 + }, + { + "epoch": 1.1815834073117586, + "grad_norm": 0.6387282013893127, + "learning_rate": 3.372682909911481e-05, + "loss": 2.4399, + "step": 14641 + }, + { + "epoch": 1.1816641110483415, + "grad_norm": 0.834181010723114, + "learning_rate": 3.371500779743393e-05, + "loss": 2.4312, + "step": 14642 + }, + { + "epoch": 1.1817448147849245, + "grad_norm": 0.6690472960472107, + "learning_rate": 3.370318814774832e-05, + "loss": 2.407, + "step": 14643 + }, + { + "epoch": 1.1818255185215076, + "grad_norm": 0.6594302654266357, + "learning_rate": 3.369137015035256e-05, + "loss": 2.4275, + "step": 14644 + }, + { + "epoch": 1.1819062222580905, + "grad_norm": 0.7284699082374573, + "learning_rate": 3.3679553805541194e-05, + "loss": 2.3981, + "step": 14645 + }, + { + "epoch": 1.1819869259946736, + "grad_norm": 0.7109572291374207, + "learning_rate": 3.366773911360871e-05, + "loss": 2.4345, + "step": 14646 + }, + { + "epoch": 1.1820676297312565, + "grad_norm": 0.6874241828918457, + "learning_rate": 3.3655926074849566e-05, + "loss": 2.4488, + "step": 14647 + }, + { + "epoch": 1.1821483334678395, + "grad_norm": 0.6698973178863525, + "learning_rate": 3.364411468955819e-05, + "loss": 2.42, + "step": 14648 + }, + { + "epoch": 1.1822290372044226, + "grad_norm": 0.7816089391708374, + "learning_rate": 3.3632304958028915e-05, + "loss": 2.4638, + "step": 14649 + }, + { + "epoch": 1.1823097409410055, + "grad_norm": 0.6718220710754395, + "learning_rate": 3.3620496880556075e-05, + "loss": 2.413, + "step": 14650 + }, + { + "epoch": 1.1823904446775886, + "grad_norm": 0.753463089466095, + "learning_rate": 3.360869045743401e-05, + "loss": 2.3772, + "step": 14651 + }, + { + "epoch": 1.1824711484141717, + "grad_norm": 0.7031456828117371, + "learning_rate": 3.359688568895689e-05, + "loss": 2.4198, + "step": 14652 + }, + { + "epoch": 1.1825518521507545, + "grad_norm": 0.7857323288917542, + "learning_rate": 3.358508257541897e-05, + "loss": 2.4223, + "step": 14653 + }, + { + "epoch": 1.1826325558873376, + "grad_norm": 0.7779297828674316, + "learning_rate": 3.357328111711439e-05, + "loss": 2.5266, + "step": 14654 + }, + { + "epoch": 1.1827132596239207, + "grad_norm": 0.7382386326789856, + "learning_rate": 3.356148131433728e-05, + "loss": 2.4673, + "step": 14655 + }, + { + "epoch": 1.1827939633605036, + "grad_norm": 0.7868054509162903, + "learning_rate": 3.354968316738174e-05, + "loss": 2.4285, + "step": 14656 + }, + { + "epoch": 1.1828746670970867, + "grad_norm": 0.7007591724395752, + "learning_rate": 3.353788667654183e-05, + "loss": 2.4054, + "step": 14657 + }, + { + "epoch": 1.1829553708336695, + "grad_norm": 0.6627741456031799, + "learning_rate": 3.352609184211148e-05, + "loss": 2.4224, + "step": 14658 + }, + { + "epoch": 1.1830360745702526, + "grad_norm": 0.6865360736846924, + "learning_rate": 3.351429866438469e-05, + "loss": 2.4084, + "step": 14659 + }, + { + "epoch": 1.1831167783068357, + "grad_norm": 0.7572095990180969, + "learning_rate": 3.3502507143655404e-05, + "loss": 2.4339, + "step": 14660 + }, + { + "epoch": 1.1831974820434186, + "grad_norm": 0.6907969117164612, + "learning_rate": 3.349071728021743e-05, + "loss": 2.4578, + "step": 14661 + }, + { + "epoch": 1.1832781857800017, + "grad_norm": 0.6618743538856506, + "learning_rate": 3.347892907436465e-05, + "loss": 2.4131, + "step": 14662 + }, + { + "epoch": 1.1833588895165845, + "grad_norm": 0.777159571647644, + "learning_rate": 3.346714252639084e-05, + "loss": 2.419, + "step": 14663 + }, + { + "epoch": 1.1834395932531676, + "grad_norm": 0.666344165802002, + "learning_rate": 3.345535763658975e-05, + "loss": 2.4155, + "step": 14664 + }, + { + "epoch": 1.1835202969897507, + "grad_norm": 0.708848774433136, + "learning_rate": 3.3443574405255095e-05, + "loss": 2.4794, + "step": 14665 + }, + { + "epoch": 1.1836010007263336, + "grad_norm": 0.7247438430786133, + "learning_rate": 3.3431792832680555e-05, + "loss": 2.4445, + "step": 14666 + }, + { + "epoch": 1.1836817044629167, + "grad_norm": 0.6870034337043762, + "learning_rate": 3.342001291915978e-05, + "loss": 2.4309, + "step": 14667 + }, + { + "epoch": 1.1837624081994997, + "grad_norm": 0.7088049650192261, + "learning_rate": 3.340823466498629e-05, + "loss": 2.4456, + "step": 14668 + }, + { + "epoch": 1.1838431119360826, + "grad_norm": 0.695148229598999, + "learning_rate": 3.3396458070453676e-05, + "loss": 2.4018, + "step": 14669 + }, + { + "epoch": 1.1839238156726657, + "grad_norm": 0.7947117686271667, + "learning_rate": 3.3384683135855444e-05, + "loss": 2.4099, + "step": 14670 + }, + { + "epoch": 1.1840045194092486, + "grad_norm": 0.7268195748329163, + "learning_rate": 3.337290986148502e-05, + "loss": 2.3955, + "step": 14671 + }, + { + "epoch": 1.1840852231458316, + "grad_norm": 0.6932024955749512, + "learning_rate": 3.336113824763585e-05, + "loss": 2.4046, + "step": 14672 + }, + { + "epoch": 1.1841659268824147, + "grad_norm": 0.7408114671707153, + "learning_rate": 3.3349368294601334e-05, + "loss": 2.4186, + "step": 14673 + }, + { + "epoch": 1.1842466306189976, + "grad_norm": 0.6678428053855896, + "learning_rate": 3.3337600002674765e-05, + "loss": 2.4324, + "step": 14674 + }, + { + "epoch": 1.1843273343555807, + "grad_norm": 0.7221381664276123, + "learning_rate": 3.3325833372149416e-05, + "loss": 2.4474, + "step": 14675 + }, + { + "epoch": 1.1844080380921636, + "grad_norm": 0.6971224546432495, + "learning_rate": 3.3314068403318654e-05, + "loss": 2.4197, + "step": 14676 + }, + { + "epoch": 1.1844887418287466, + "grad_norm": 0.65053391456604, + "learning_rate": 3.3302305096475604e-05, + "loss": 2.4169, + "step": 14677 + }, + { + "epoch": 1.1845694455653297, + "grad_norm": 0.7231155633926392, + "learning_rate": 3.3290543451913457e-05, + "loss": 2.4222, + "step": 14678 + }, + { + "epoch": 1.1846501493019126, + "grad_norm": 0.6458824872970581, + "learning_rate": 3.3278783469925345e-05, + "loss": 2.422, + "step": 14679 + }, + { + "epoch": 1.1847308530384957, + "grad_norm": 0.6783488392829895, + "learning_rate": 3.32670251508044e-05, + "loss": 2.4231, + "step": 14680 + }, + { + "epoch": 1.1848115567750788, + "grad_norm": 0.6742293238639832, + "learning_rate": 3.3255268494843586e-05, + "loss": 2.409, + "step": 14681 + }, + { + "epoch": 1.1848922605116616, + "grad_norm": 0.7455186247825623, + "learning_rate": 3.3243513502335956e-05, + "loss": 2.4121, + "step": 14682 + }, + { + "epoch": 1.1849729642482447, + "grad_norm": 0.7042234539985657, + "learning_rate": 3.323176017357451e-05, + "loss": 2.4574, + "step": 14683 + }, + { + "epoch": 1.1850536679848278, + "grad_norm": 0.7897992134094238, + "learning_rate": 3.3220008508852094e-05, + "loss": 2.4796, + "step": 14684 + }, + { + "epoch": 1.1851343717214107, + "grad_norm": 0.6894058585166931, + "learning_rate": 3.3208258508461644e-05, + "loss": 2.4125, + "step": 14685 + }, + { + "epoch": 1.1852150754579938, + "grad_norm": 0.7574072480201721, + "learning_rate": 3.319651017269597e-05, + "loss": 2.4714, + "step": 14686 + }, + { + "epoch": 1.1852957791945766, + "grad_norm": 0.7457531094551086, + "learning_rate": 3.3184763501847905e-05, + "loss": 2.4793, + "step": 14687 + }, + { + "epoch": 1.1853764829311597, + "grad_norm": 0.6819709539413452, + "learning_rate": 3.317301849621018e-05, + "loss": 2.4563, + "step": 14688 + }, + { + "epoch": 1.1854571866677428, + "grad_norm": 0.6998026371002197, + "learning_rate": 3.316127515607555e-05, + "loss": 2.4548, + "step": 14689 + }, + { + "epoch": 1.1855378904043257, + "grad_norm": 0.7148768305778503, + "learning_rate": 3.314953348173664e-05, + "loss": 2.4897, + "step": 14690 + }, + { + "epoch": 1.1856185941409088, + "grad_norm": 0.6581987738609314, + "learning_rate": 3.31377934734861e-05, + "loss": 2.4683, + "step": 14691 + }, + { + "epoch": 1.1856992978774916, + "grad_norm": 0.7493093609809875, + "learning_rate": 3.312605513161653e-05, + "loss": 2.4564, + "step": 14692 + }, + { + "epoch": 1.1857800016140747, + "grad_norm": 0.7095562219619751, + "learning_rate": 3.311431845642051e-05, + "loss": 2.4595, + "step": 14693 + }, + { + "epoch": 1.1858607053506578, + "grad_norm": 0.8045323491096497, + "learning_rate": 3.310258344819047e-05, + "loss": 2.5044, + "step": 14694 + }, + { + "epoch": 1.1859414090872407, + "grad_norm": 0.7381219267845154, + "learning_rate": 3.3090850107218943e-05, + "loss": 2.415, + "step": 14695 + }, + { + "epoch": 1.1860221128238237, + "grad_norm": 0.6859883069992065, + "learning_rate": 3.307911843379832e-05, + "loss": 2.4314, + "step": 14696 + }, + { + "epoch": 1.1861028165604068, + "grad_norm": 0.7084196209907532, + "learning_rate": 3.306738842822099e-05, + "loss": 2.4404, + "step": 14697 + }, + { + "epoch": 1.1861835202969897, + "grad_norm": 0.6964806318283081, + "learning_rate": 3.305566009077932e-05, + "loss": 2.4391, + "step": 14698 + }, + { + "epoch": 1.1862642240335728, + "grad_norm": 0.7272049188613892, + "learning_rate": 3.304393342176562e-05, + "loss": 2.4395, + "step": 14699 + }, + { + "epoch": 1.1863449277701559, + "grad_norm": 0.6651458144187927, + "learning_rate": 3.303220842147209e-05, + "loss": 2.4059, + "step": 14700 + }, + { + "epoch": 1.1864256315067387, + "grad_norm": 0.7599130868911743, + "learning_rate": 3.302048509019099e-05, + "loss": 2.5044, + "step": 14701 + }, + { + "epoch": 1.1865063352433218, + "grad_norm": 0.6694391965866089, + "learning_rate": 3.3008763428214505e-05, + "loss": 2.4817, + "step": 14702 + }, + { + "epoch": 1.1865870389799047, + "grad_norm": 0.7176856398582458, + "learning_rate": 3.299704343583473e-05, + "loss": 2.4702, + "step": 14703 + }, + { + "epoch": 1.1866677427164878, + "grad_norm": 0.7133145332336426, + "learning_rate": 3.298532511334378e-05, + "loss": 2.4685, + "step": 14704 + }, + { + "epoch": 1.1867484464530709, + "grad_norm": 0.7170277833938599, + "learning_rate": 3.297360846103371e-05, + "loss": 2.4203, + "step": 14705 + }, + { + "epoch": 1.1868291501896537, + "grad_norm": 0.6853376626968384, + "learning_rate": 3.296189347919652e-05, + "loss": 2.4067, + "step": 14706 + }, + { + "epoch": 1.1869098539262368, + "grad_norm": 0.7269156575202942, + "learning_rate": 3.2950180168124175e-05, + "loss": 2.4211, + "step": 14707 + }, + { + "epoch": 1.1869905576628197, + "grad_norm": 0.8649005889892578, + "learning_rate": 3.2938468528108626e-05, + "loss": 2.4611, + "step": 14708 + }, + { + "epoch": 1.1870712613994028, + "grad_norm": 0.7256221771240234, + "learning_rate": 3.292675855944177e-05, + "loss": 2.4618, + "step": 14709 + }, + { + "epoch": 1.1871519651359859, + "grad_norm": 0.6854279637336731, + "learning_rate": 3.291505026241539e-05, + "loss": 2.4466, + "step": 14710 + }, + { + "epoch": 1.1872326688725687, + "grad_norm": 0.7182712554931641, + "learning_rate": 3.2903343637321316e-05, + "loss": 2.4847, + "step": 14711 + }, + { + "epoch": 1.1873133726091518, + "grad_norm": 0.6795300841331482, + "learning_rate": 3.289163868445134e-05, + "loss": 2.4407, + "step": 14712 + }, + { + "epoch": 1.187394076345735, + "grad_norm": 0.685146689414978, + "learning_rate": 3.287993540409713e-05, + "loss": 2.4537, + "step": 14713 + }, + { + "epoch": 1.1874747800823178, + "grad_norm": 0.7891005873680115, + "learning_rate": 3.2868233796550375e-05, + "loss": 2.4085, + "step": 14714 + }, + { + "epoch": 1.1875554838189009, + "grad_norm": 0.6521769762039185, + "learning_rate": 3.2856533862102724e-05, + "loss": 2.4174, + "step": 14715 + }, + { + "epoch": 1.1876361875554837, + "grad_norm": 0.7486612200737, + "learning_rate": 3.284483560104575e-05, + "loss": 2.4072, + "step": 14716 + }, + { + "epoch": 1.1877168912920668, + "grad_norm": 0.6895913481712341, + "learning_rate": 3.283313901367103e-05, + "loss": 2.4398, + "step": 14717 + }, + { + "epoch": 1.18779759502865, + "grad_norm": 0.6595678329467773, + "learning_rate": 3.282144410027009e-05, + "loss": 2.4407, + "step": 14718 + }, + { + "epoch": 1.1878782987652328, + "grad_norm": 0.7724249958992004, + "learning_rate": 3.280975086113435e-05, + "loss": 2.464, + "step": 14719 + }, + { + "epoch": 1.1879590025018159, + "grad_norm": 0.659472644329071, + "learning_rate": 3.279805929655524e-05, + "loss": 2.4774, + "step": 14720 + }, + { + "epoch": 1.1880397062383987, + "grad_norm": 0.7187919020652771, + "learning_rate": 3.27863694068242e-05, + "loss": 2.4767, + "step": 14721 + }, + { + "epoch": 1.1881204099749818, + "grad_norm": 0.7740198373794556, + "learning_rate": 3.2774681192232506e-05, + "loss": 2.4762, + "step": 14722 + }, + { + "epoch": 1.188201113711565, + "grad_norm": 0.700591504573822, + "learning_rate": 3.2762994653071464e-05, + "loss": 2.448, + "step": 14723 + }, + { + "epoch": 1.1882818174481478, + "grad_norm": 0.7168558239936829, + "learning_rate": 3.275130978963237e-05, + "loss": 2.4084, + "step": 14724 + }, + { + "epoch": 1.1883625211847308, + "grad_norm": 0.8039551973342896, + "learning_rate": 3.273962660220646e-05, + "loss": 2.3849, + "step": 14725 + }, + { + "epoch": 1.188443224921314, + "grad_norm": 0.6453016400337219, + "learning_rate": 3.27279450910848e-05, + "loss": 2.3856, + "step": 14726 + }, + { + "epoch": 1.1885239286578968, + "grad_norm": 0.7194651365280151, + "learning_rate": 3.2716265256558644e-05, + "loss": 2.4337, + "step": 14727 + }, + { + "epoch": 1.1886046323944799, + "grad_norm": 0.7298597097396851, + "learning_rate": 3.270458709891906e-05, + "loss": 2.4491, + "step": 14728 + }, + { + "epoch": 1.188685336131063, + "grad_norm": 0.7127524614334106, + "learning_rate": 3.269291061845705e-05, + "loss": 2.4319, + "step": 14729 + }, + { + "epoch": 1.1887660398676458, + "grad_norm": 0.6782705783843994, + "learning_rate": 3.2681235815463654e-05, + "loss": 2.4375, + "step": 14730 + }, + { + "epoch": 1.188846743604229, + "grad_norm": 0.7418326735496521, + "learning_rate": 3.266956269022987e-05, + "loss": 2.4149, + "step": 14731 + }, + { + "epoch": 1.1889274473408118, + "grad_norm": 0.7442455291748047, + "learning_rate": 3.265789124304654e-05, + "loss": 2.3935, + "step": 14732 + }, + { + "epoch": 1.1890081510773949, + "grad_norm": 0.7238253951072693, + "learning_rate": 3.264622147420461e-05, + "loss": 2.4592, + "step": 14733 + }, + { + "epoch": 1.189088854813978, + "grad_norm": 0.6488127708435059, + "learning_rate": 3.2634553383994925e-05, + "loss": 2.3468, + "step": 14734 + }, + { + "epoch": 1.1891695585505608, + "grad_norm": 0.7182446718215942, + "learning_rate": 3.2622886972708246e-05, + "loss": 2.4457, + "step": 14735 + }, + { + "epoch": 1.189250262287144, + "grad_norm": 0.6885523796081543, + "learning_rate": 3.261122224063534e-05, + "loss": 2.3943, + "step": 14736 + }, + { + "epoch": 1.1893309660237268, + "grad_norm": 0.653367817401886, + "learning_rate": 3.259955918806693e-05, + "loss": 2.4188, + "step": 14737 + }, + { + "epoch": 1.1894116697603099, + "grad_norm": 0.6968675851821899, + "learning_rate": 3.2587897815293686e-05, + "loss": 2.4276, + "step": 14738 + }, + { + "epoch": 1.189492373496893, + "grad_norm": 0.6827409267425537, + "learning_rate": 3.257623812260626e-05, + "loss": 2.4417, + "step": 14739 + }, + { + "epoch": 1.1895730772334758, + "grad_norm": 0.6807438731193542, + "learning_rate": 3.256458011029523e-05, + "loss": 2.4495, + "step": 14740 + }, + { + "epoch": 1.189653780970059, + "grad_norm": 0.6692882180213928, + "learning_rate": 3.255292377865116e-05, + "loss": 2.3789, + "step": 14741 + }, + { + "epoch": 1.189734484706642, + "grad_norm": 0.6581685543060303, + "learning_rate": 3.2541269127964515e-05, + "loss": 2.4073, + "step": 14742 + }, + { + "epoch": 1.1898151884432249, + "grad_norm": 0.6458544731140137, + "learning_rate": 3.252961615852578e-05, + "loss": 2.4657, + "step": 14743 + }, + { + "epoch": 1.189895892179808, + "grad_norm": 0.6971322298049927, + "learning_rate": 3.251796487062541e-05, + "loss": 2.4404, + "step": 14744 + }, + { + "epoch": 1.189976595916391, + "grad_norm": 0.6770374178886414, + "learning_rate": 3.2506315264553724e-05, + "loss": 2.4329, + "step": 14745 + }, + { + "epoch": 1.190057299652974, + "grad_norm": 0.7634715437889099, + "learning_rate": 3.2494667340601085e-05, + "loss": 2.4234, + "step": 14746 + }, + { + "epoch": 1.190138003389557, + "grad_norm": 0.7717967629432678, + "learning_rate": 3.24830210990578e-05, + "loss": 2.5009, + "step": 14747 + }, + { + "epoch": 1.1902187071261399, + "grad_norm": 0.7133559584617615, + "learning_rate": 3.2471376540214124e-05, + "loss": 2.4272, + "step": 14748 + }, + { + "epoch": 1.190299410862723, + "grad_norm": 0.7273291349411011, + "learning_rate": 3.245973366436027e-05, + "loss": 2.4174, + "step": 14749 + }, + { + "epoch": 1.190380114599306, + "grad_norm": 0.6955052614212036, + "learning_rate": 3.244809247178643e-05, + "loss": 2.3605, + "step": 14750 + }, + { + "epoch": 1.190460818335889, + "grad_norm": 0.7072615027427673, + "learning_rate": 3.2436452962782685e-05, + "loss": 2.4897, + "step": 14751 + }, + { + "epoch": 1.190541522072472, + "grad_norm": 0.7095344662666321, + "learning_rate": 3.242481513763913e-05, + "loss": 2.4172, + "step": 14752 + }, + { + "epoch": 1.1906222258090549, + "grad_norm": 0.7260944247245789, + "learning_rate": 3.2413178996645864e-05, + "loss": 2.4272, + "step": 14753 + }, + { + "epoch": 1.190702929545638, + "grad_norm": 0.6601141691207886, + "learning_rate": 3.2401544540092824e-05, + "loss": 2.4072, + "step": 14754 + }, + { + "epoch": 1.190783633282221, + "grad_norm": 0.6684936881065369, + "learning_rate": 3.238991176827e-05, + "loss": 2.3968, + "step": 14755 + }, + { + "epoch": 1.190864337018804, + "grad_norm": 0.7264483571052551, + "learning_rate": 3.23782806814673e-05, + "loss": 2.4263, + "step": 14756 + }, + { + "epoch": 1.190945040755387, + "grad_norm": 0.6927621960639954, + "learning_rate": 3.2366651279974614e-05, + "loss": 2.4495, + "step": 14757 + }, + { + "epoch": 1.19102574449197, + "grad_norm": 0.7007272243499756, + "learning_rate": 3.2355023564081775e-05, + "loss": 2.4373, + "step": 14758 + }, + { + "epoch": 1.191106448228553, + "grad_norm": 0.6756663918495178, + "learning_rate": 3.234339753407857e-05, + "loss": 2.4148, + "step": 14759 + }, + { + "epoch": 1.191187151965136, + "grad_norm": 0.6741094589233398, + "learning_rate": 3.233177319025479e-05, + "loss": 2.3976, + "step": 14760 + }, + { + "epoch": 1.1912678557017191, + "grad_norm": 0.7098578810691833, + "learning_rate": 3.2320150532900085e-05, + "loss": 2.4326, + "step": 14761 + }, + { + "epoch": 1.191348559438302, + "grad_norm": 0.750271737575531, + "learning_rate": 3.230852956230413e-05, + "loss": 2.4766, + "step": 14762 + }, + { + "epoch": 1.191429263174885, + "grad_norm": 0.68764728307724, + "learning_rate": 3.229691027875661e-05, + "loss": 2.4128, + "step": 14763 + }, + { + "epoch": 1.191509966911468, + "grad_norm": 0.656295657157898, + "learning_rate": 3.228529268254702e-05, + "loss": 2.3928, + "step": 14764 + }, + { + "epoch": 1.191590670648051, + "grad_norm": 0.6690353155136108, + "learning_rate": 3.2273676773964955e-05, + "loss": 2.408, + "step": 14765 + }, + { + "epoch": 1.1916713743846339, + "grad_norm": 0.8111640214920044, + "learning_rate": 3.22620625532999e-05, + "loss": 2.4644, + "step": 14766 + }, + { + "epoch": 1.191752078121217, + "grad_norm": 0.7329768538475037, + "learning_rate": 3.2250450020841316e-05, + "loss": 2.4235, + "step": 14767 + }, + { + "epoch": 1.1918327818578, + "grad_norm": 0.6902688145637512, + "learning_rate": 3.223883917687861e-05, + "loss": 2.3883, + "step": 14768 + }, + { + "epoch": 1.191913485594383, + "grad_norm": 0.797249972820282, + "learning_rate": 3.2227230021701205e-05, + "loss": 2.523, + "step": 14769 + }, + { + "epoch": 1.191994189330966, + "grad_norm": 0.6294408440589905, + "learning_rate": 3.221562255559834e-05, + "loss": 2.4156, + "step": 14770 + }, + { + "epoch": 1.192074893067549, + "grad_norm": 0.7326164245605469, + "learning_rate": 3.220401677885936e-05, + "loss": 2.3828, + "step": 14771 + }, + { + "epoch": 1.192155596804132, + "grad_norm": 0.783747673034668, + "learning_rate": 3.219241269177351e-05, + "loss": 2.4321, + "step": 14772 + }, + { + "epoch": 1.192236300540715, + "grad_norm": 0.7415335178375244, + "learning_rate": 3.2180810294630005e-05, + "loss": 2.4446, + "step": 14773 + }, + { + "epoch": 1.1923170042772981, + "grad_norm": 0.7125591039657593, + "learning_rate": 3.2169209587717966e-05, + "loss": 2.3914, + "step": 14774 + }, + { + "epoch": 1.192397708013881, + "grad_norm": 0.6714075207710266, + "learning_rate": 3.215761057132652e-05, + "loss": 2.3918, + "step": 14775 + }, + { + "epoch": 1.192478411750464, + "grad_norm": 0.7147830724716187, + "learning_rate": 3.214601324574481e-05, + "loss": 2.4389, + "step": 14776 + }, + { + "epoch": 1.192559115487047, + "grad_norm": 0.6780480146408081, + "learning_rate": 3.2134417611261755e-05, + "loss": 2.4119, + "step": 14777 + }, + { + "epoch": 1.19263981922363, + "grad_norm": 0.7473881840705872, + "learning_rate": 3.212282366816645e-05, + "loss": 2.4547, + "step": 14778 + }, + { + "epoch": 1.1927205229602131, + "grad_norm": 0.7418377995491028, + "learning_rate": 3.211123141674784e-05, + "loss": 2.4156, + "step": 14779 + }, + { + "epoch": 1.192801226696796, + "grad_norm": 0.687524139881134, + "learning_rate": 3.209964085729477e-05, + "loss": 2.4309, + "step": 14780 + }, + { + "epoch": 1.192881930433379, + "grad_norm": 0.6965883374214172, + "learning_rate": 3.208805199009615e-05, + "loss": 2.4028, + "step": 14781 + }, + { + "epoch": 1.192962634169962, + "grad_norm": 0.7024682760238647, + "learning_rate": 3.207646481544082e-05, + "loss": 2.4482, + "step": 14782 + }, + { + "epoch": 1.193043337906545, + "grad_norm": 0.6835834383964539, + "learning_rate": 3.2064879333617514e-05, + "loss": 2.3898, + "step": 14783 + }, + { + "epoch": 1.1931240416431281, + "grad_norm": 0.7002003788948059, + "learning_rate": 3.2053295544915e-05, + "loss": 2.487, + "step": 14784 + }, + { + "epoch": 1.193204745379711, + "grad_norm": 0.7128168940544128, + "learning_rate": 3.2041713449622e-05, + "loss": 2.4591, + "step": 14785 + }, + { + "epoch": 1.193285449116294, + "grad_norm": 0.6897242665290833, + "learning_rate": 3.203013304802712e-05, + "loss": 2.4458, + "step": 14786 + }, + { + "epoch": 1.1933661528528772, + "grad_norm": 0.7281817197799683, + "learning_rate": 3.2018554340419004e-05, + "loss": 2.3772, + "step": 14787 + }, + { + "epoch": 1.19344685658946, + "grad_norm": 0.6956086754798889, + "learning_rate": 3.200697732708619e-05, + "loss": 2.4316, + "step": 14788 + }, + { + "epoch": 1.1935275603260431, + "grad_norm": 0.7679805159568787, + "learning_rate": 3.199540200831729e-05, + "loss": 2.4464, + "step": 14789 + }, + { + "epoch": 1.1936082640626262, + "grad_norm": 0.6993041634559631, + "learning_rate": 3.19838283844007e-05, + "loss": 2.3881, + "step": 14790 + }, + { + "epoch": 1.193688967799209, + "grad_norm": 0.689618706703186, + "learning_rate": 3.197225645562493e-05, + "loss": 2.4184, + "step": 14791 + }, + { + "epoch": 1.1937696715357922, + "grad_norm": 0.6896520853042603, + "learning_rate": 3.1960686222278354e-05, + "loss": 2.4484, + "step": 14792 + }, + { + "epoch": 1.193850375272375, + "grad_norm": 0.6743811368942261, + "learning_rate": 3.1949117684649334e-05, + "loss": 2.4636, + "step": 14793 + }, + { + "epoch": 1.1939310790089581, + "grad_norm": 0.7028046250343323, + "learning_rate": 3.1937550843026163e-05, + "loss": 2.4576, + "step": 14794 + }, + { + "epoch": 1.1940117827455412, + "grad_norm": 0.7219679951667786, + "learning_rate": 3.192598569769718e-05, + "loss": 2.4495, + "step": 14795 + }, + { + "epoch": 1.194092486482124, + "grad_norm": 0.731438159942627, + "learning_rate": 3.191442224895056e-05, + "loss": 2.4699, + "step": 14796 + }, + { + "epoch": 1.1941731902187072, + "grad_norm": 0.6731431484222412, + "learning_rate": 3.19028604970745e-05, + "loss": 2.4292, + "step": 14797 + }, + { + "epoch": 1.19425389395529, + "grad_norm": 0.6720147728919983, + "learning_rate": 3.1891300442357174e-05, + "loss": 2.4482, + "step": 14798 + }, + { + "epoch": 1.1943345976918731, + "grad_norm": 0.7504273653030396, + "learning_rate": 3.187974208508667e-05, + "loss": 2.4233, + "step": 14799 + }, + { + "epoch": 1.1944153014284562, + "grad_norm": 0.6882641315460205, + "learning_rate": 3.186818542555108e-05, + "loss": 2.4633, + "step": 14800 + }, + { + "epoch": 1.194496005165039, + "grad_norm": 0.7337899208068848, + "learning_rate": 3.1856630464038385e-05, + "loss": 2.4257, + "step": 14801 + }, + { + "epoch": 1.1945767089016222, + "grad_norm": 0.7026493549346924, + "learning_rate": 3.1845077200836636e-05, + "loss": 2.482, + "step": 14802 + }, + { + "epoch": 1.1946574126382052, + "grad_norm": 0.763351321220398, + "learning_rate": 3.1833525636233675e-05, + "loss": 2.4428, + "step": 14803 + }, + { + "epoch": 1.194738116374788, + "grad_norm": 0.6568076610565186, + "learning_rate": 3.182197577051745e-05, + "loss": 2.4373, + "step": 14804 + }, + { + "epoch": 1.1948188201113712, + "grad_norm": 0.6954717040061951, + "learning_rate": 3.1810427603975844e-05, + "loss": 2.4582, + "step": 14805 + }, + { + "epoch": 1.1948995238479543, + "grad_norm": 0.7130215167999268, + "learning_rate": 3.179888113689661e-05, + "loss": 2.443, + "step": 14806 + }, + { + "epoch": 1.1949802275845371, + "grad_norm": 0.6789865493774414, + "learning_rate": 3.178733636956752e-05, + "loss": 2.4138, + "step": 14807 + }, + { + "epoch": 1.1950609313211202, + "grad_norm": 0.7725361585617065, + "learning_rate": 3.177579330227633e-05, + "loss": 2.4783, + "step": 14808 + }, + { + "epoch": 1.195141635057703, + "grad_norm": 0.6952371001243591, + "learning_rate": 3.17642519353107e-05, + "loss": 2.4571, + "step": 14809 + }, + { + "epoch": 1.1952223387942862, + "grad_norm": 0.7541885375976562, + "learning_rate": 3.1752712268958275e-05, + "loss": 2.4075, + "step": 14810 + }, + { + "epoch": 1.1953030425308693, + "grad_norm": 0.6974624395370483, + "learning_rate": 3.174117430350671e-05, + "loss": 2.4525, + "step": 14811 + }, + { + "epoch": 1.1953837462674521, + "grad_norm": 0.7293709516525269, + "learning_rate": 3.172963803924347e-05, + "loss": 2.4646, + "step": 14812 + }, + { + "epoch": 1.1954644500040352, + "grad_norm": 0.6944144368171692, + "learning_rate": 3.1718103476456106e-05, + "loss": 2.462, + "step": 14813 + }, + { + "epoch": 1.195545153740618, + "grad_norm": 0.6415363550186157, + "learning_rate": 3.170657061543214e-05, + "loss": 2.4086, + "step": 14814 + }, + { + "epoch": 1.1956258574772012, + "grad_norm": 0.6511349081993103, + "learning_rate": 3.169503945645892e-05, + "loss": 2.4376, + "step": 14815 + }, + { + "epoch": 1.1957065612137843, + "grad_norm": 0.7420210242271423, + "learning_rate": 3.1683509999823854e-05, + "loss": 2.4317, + "step": 14816 + }, + { + "epoch": 1.1957872649503671, + "grad_norm": 0.7291967272758484, + "learning_rate": 3.1671982245814316e-05, + "loss": 2.4369, + "step": 14817 + }, + { + "epoch": 1.1958679686869502, + "grad_norm": 0.685743510723114, + "learning_rate": 3.166045619471758e-05, + "loss": 2.465, + "step": 14818 + }, + { + "epoch": 1.1959486724235333, + "grad_norm": 0.7130060195922852, + "learning_rate": 3.164893184682093e-05, + "loss": 2.4305, + "step": 14819 + }, + { + "epoch": 1.1960293761601162, + "grad_norm": 0.694508969783783, + "learning_rate": 3.163740920241156e-05, + "loss": 2.4278, + "step": 14820 + }, + { + "epoch": 1.1961100798966993, + "grad_norm": 0.6478514075279236, + "learning_rate": 3.162588826177669e-05, + "loss": 2.4721, + "step": 14821 + }, + { + "epoch": 1.1961907836332821, + "grad_norm": 0.6586465835571289, + "learning_rate": 3.1614369025203386e-05, + "loss": 2.4716, + "step": 14822 + }, + { + "epoch": 1.1962714873698652, + "grad_norm": 0.7558106184005737, + "learning_rate": 3.160285149297876e-05, + "loss": 2.4656, + "step": 14823 + }, + { + "epoch": 1.1963521911064483, + "grad_norm": 0.7208340764045715, + "learning_rate": 3.1591335665389896e-05, + "loss": 2.4374, + "step": 14824 + }, + { + "epoch": 1.1964328948430312, + "grad_norm": 0.70301353931427, + "learning_rate": 3.157982154272375e-05, + "loss": 2.397, + "step": 14825 + }, + { + "epoch": 1.1965135985796143, + "grad_norm": 0.6857609152793884, + "learning_rate": 3.15683091252673e-05, + "loss": 2.4258, + "step": 14826 + }, + { + "epoch": 1.1965943023161971, + "grad_norm": 0.6954602003097534, + "learning_rate": 3.155679841330747e-05, + "loss": 2.4566, + "step": 14827 + }, + { + "epoch": 1.1966750060527802, + "grad_norm": 0.6923913955688477, + "learning_rate": 3.154528940713113e-05, + "loss": 2.4, + "step": 14828 + }, + { + "epoch": 1.1967557097893633, + "grad_norm": 0.6641134023666382, + "learning_rate": 3.1533782107025124e-05, + "loss": 2.4721, + "step": 14829 + }, + { + "epoch": 1.1968364135259462, + "grad_norm": 0.7470134496688843, + "learning_rate": 3.152227651327627e-05, + "loss": 2.4253, + "step": 14830 + }, + { + "epoch": 1.1969171172625293, + "grad_norm": 0.7234545350074768, + "learning_rate": 3.151077262617126e-05, + "loss": 2.4109, + "step": 14831 + }, + { + "epoch": 1.1969978209991123, + "grad_norm": 0.7814013957977295, + "learning_rate": 3.149927044599682e-05, + "loss": 2.4522, + "step": 14832 + }, + { + "epoch": 1.1970785247356952, + "grad_norm": 0.6825435161590576, + "learning_rate": 3.1487769973039624e-05, + "loss": 2.4728, + "step": 14833 + }, + { + "epoch": 1.1971592284722783, + "grad_norm": 0.7091361880302429, + "learning_rate": 3.147627120758634e-05, + "loss": 2.4615, + "step": 14834 + }, + { + "epoch": 1.1972399322088614, + "grad_norm": 0.7271433472633362, + "learning_rate": 3.146477414992346e-05, + "loss": 2.4154, + "step": 14835 + }, + { + "epoch": 1.1973206359454442, + "grad_norm": 0.6557306051254272, + "learning_rate": 3.145327880033756e-05, + "loss": 2.4348, + "step": 14836 + }, + { + "epoch": 1.1974013396820273, + "grad_norm": 0.6667891144752502, + "learning_rate": 3.1441785159115166e-05, + "loss": 2.4123, + "step": 14837 + }, + { + "epoch": 1.1974820434186102, + "grad_norm": 0.6755266189575195, + "learning_rate": 3.143029322654266e-05, + "loss": 2.4287, + "step": 14838 + }, + { + "epoch": 1.1975627471551933, + "grad_norm": 0.7647396922111511, + "learning_rate": 3.1418803002906475e-05, + "loss": 2.4343, + "step": 14839 + }, + { + "epoch": 1.1976434508917764, + "grad_norm": 0.7288243174552917, + "learning_rate": 3.140731448849305e-05, + "loss": 2.4536, + "step": 14840 + }, + { + "epoch": 1.1977241546283592, + "grad_norm": 0.6126244068145752, + "learning_rate": 3.1395827683588605e-05, + "loss": 2.4187, + "step": 14841 + }, + { + "epoch": 1.1978048583649423, + "grad_norm": 0.6773896217346191, + "learning_rate": 3.138434258847948e-05, + "loss": 2.3916, + "step": 14842 + }, + { + "epoch": 1.1978855621015252, + "grad_norm": 0.724413275718689, + "learning_rate": 3.1372859203451934e-05, + "loss": 2.4614, + "step": 14843 + }, + { + "epoch": 1.1979662658381083, + "grad_norm": 0.7043039798736572, + "learning_rate": 3.136137752879209e-05, + "loss": 2.4343, + "step": 14844 + }, + { + "epoch": 1.1980469695746914, + "grad_norm": 0.7543383240699768, + "learning_rate": 3.134989756478615e-05, + "loss": 2.4345, + "step": 14845 + }, + { + "epoch": 1.1981276733112742, + "grad_norm": 0.7193408608436584, + "learning_rate": 3.1338419311720244e-05, + "loss": 2.4728, + "step": 14846 + }, + { + "epoch": 1.1982083770478573, + "grad_norm": 0.8090186715126038, + "learning_rate": 3.132694276988038e-05, + "loss": 2.4246, + "step": 14847 + }, + { + "epoch": 1.1982890807844404, + "grad_norm": 0.7154600620269775, + "learning_rate": 3.131546793955261e-05, + "loss": 2.4061, + "step": 14848 + }, + { + "epoch": 1.1983697845210233, + "grad_norm": 0.6987032890319824, + "learning_rate": 3.130399482102293e-05, + "loss": 2.4525, + "step": 14849 + }, + { + "epoch": 1.1984504882576064, + "grad_norm": 0.7123507261276245, + "learning_rate": 3.129252341457727e-05, + "loss": 2.4017, + "step": 14850 + }, + { + "epoch": 1.1985311919941894, + "grad_norm": 0.6475987434387207, + "learning_rate": 3.128105372050153e-05, + "loss": 2.4617, + "step": 14851 + }, + { + "epoch": 1.1986118957307723, + "grad_norm": 0.6799046993255615, + "learning_rate": 3.126958573908156e-05, + "loss": 2.4337, + "step": 14852 + }, + { + "epoch": 1.1986925994673554, + "grad_norm": 0.6910607218742371, + "learning_rate": 3.125811947060322e-05, + "loss": 2.415, + "step": 14853 + }, + { + "epoch": 1.1987733032039383, + "grad_norm": 0.6879963278770447, + "learning_rate": 3.124665491535219e-05, + "loss": 2.4912, + "step": 14854 + }, + { + "epoch": 1.1988540069405214, + "grad_norm": 0.7038810849189758, + "learning_rate": 3.123519207361425e-05, + "loss": 2.4528, + "step": 14855 + }, + { + "epoch": 1.1989347106771044, + "grad_norm": 0.6771957278251648, + "learning_rate": 3.1223730945675104e-05, + "loss": 2.4524, + "step": 14856 + }, + { + "epoch": 1.1990154144136873, + "grad_norm": 0.7529320120811462, + "learning_rate": 3.1212271531820336e-05, + "loss": 2.4667, + "step": 14857 + }, + { + "epoch": 1.1990961181502704, + "grad_norm": 0.6498474478721619, + "learning_rate": 3.1200813832335574e-05, + "loss": 2.3863, + "step": 14858 + }, + { + "epoch": 1.1991768218868533, + "grad_norm": 0.7587705850601196, + "learning_rate": 3.1189357847506383e-05, + "loss": 2.4962, + "step": 14859 + }, + { + "epoch": 1.1992575256234363, + "grad_norm": 0.674013078212738, + "learning_rate": 3.117790357761825e-05, + "loss": 2.3939, + "step": 14860 + }, + { + "epoch": 1.1993382293600194, + "grad_norm": 0.6546844840049744, + "learning_rate": 3.116645102295668e-05, + "loss": 2.4775, + "step": 14861 + }, + { + "epoch": 1.1994189330966023, + "grad_norm": 0.7558320760726929, + "learning_rate": 3.11550001838071e-05, + "loss": 2.3918, + "step": 14862 + }, + { + "epoch": 1.1994996368331854, + "grad_norm": 0.7074883580207825, + "learning_rate": 3.114355106045486e-05, + "loss": 2.3969, + "step": 14863 + }, + { + "epoch": 1.1995803405697685, + "grad_norm": 0.706078290939331, + "learning_rate": 3.1132103653185305e-05, + "loss": 2.5028, + "step": 14864 + }, + { + "epoch": 1.1996610443063513, + "grad_norm": 0.6883544921875, + "learning_rate": 3.1120657962283764e-05, + "loss": 2.4407, + "step": 14865 + }, + { + "epoch": 1.1997417480429344, + "grad_norm": 0.6905466914176941, + "learning_rate": 3.110921398803551e-05, + "loss": 2.3893, + "step": 14866 + }, + { + "epoch": 1.1998224517795173, + "grad_norm": 0.6584910154342651, + "learning_rate": 3.109777173072569e-05, + "loss": 2.4515, + "step": 14867 + }, + { + "epoch": 1.1999031555161004, + "grad_norm": 0.6957471370697021, + "learning_rate": 3.108633119063951e-05, + "loss": 2.4483, + "step": 14868 + }, + { + "epoch": 1.1999838592526835, + "grad_norm": 0.6716276407241821, + "learning_rate": 3.1074892368062095e-05, + "loss": 2.4298, + "step": 14869 + }, + { + "epoch": 1.2000645629892663, + "grad_norm": 0.7350820302963257, + "learning_rate": 3.1063455263278543e-05, + "loss": 2.4088, + "step": 14870 + }, + { + "epoch": 1.2001452667258494, + "grad_norm": 0.7409771680831909, + "learning_rate": 3.105201987657388e-05, + "loss": 2.4089, + "step": 14871 + }, + { + "epoch": 1.2002259704624323, + "grad_norm": 0.7273266911506653, + "learning_rate": 3.104058620823315e-05, + "loss": 2.5149, + "step": 14872 + }, + { + "epoch": 1.2003066741990154, + "grad_norm": 0.6793962717056274, + "learning_rate": 3.102915425854124e-05, + "loss": 2.4422, + "step": 14873 + }, + { + "epoch": 1.2003873779355985, + "grad_norm": 0.72386234998703, + "learning_rate": 3.101772402778309e-05, + "loss": 2.4756, + "step": 14874 + }, + { + "epoch": 1.2004680816721813, + "grad_norm": 0.6530055999755859, + "learning_rate": 3.1006295516243625e-05, + "loss": 2.4145, + "step": 14875 + }, + { + "epoch": 1.2005487854087644, + "grad_norm": 0.7288365960121155, + "learning_rate": 3.099486872420758e-05, + "loss": 2.4565, + "step": 14876 + }, + { + "epoch": 1.2006294891453475, + "grad_norm": 0.6982102394104004, + "learning_rate": 3.09834436519598e-05, + "loss": 2.4788, + "step": 14877 + }, + { + "epoch": 1.2007101928819304, + "grad_norm": 0.7208256125450134, + "learning_rate": 3.0972020299785007e-05, + "loss": 2.4186, + "step": 14878 + }, + { + "epoch": 1.2007908966185135, + "grad_norm": 0.6928278803825378, + "learning_rate": 3.096059866796791e-05, + "loss": 2.4177, + "step": 14879 + }, + { + "epoch": 1.2008716003550965, + "grad_norm": 0.7145438194274902, + "learning_rate": 3.094917875679317e-05, + "loss": 2.4796, + "step": 14880 + }, + { + "epoch": 1.2009523040916794, + "grad_norm": 0.7126322388648987, + "learning_rate": 3.093776056654539e-05, + "loss": 2.4926, + "step": 14881 + }, + { + "epoch": 1.2010330078282625, + "grad_norm": 0.7775046825408936, + "learning_rate": 3.092634409750919e-05, + "loss": 2.4386, + "step": 14882 + }, + { + "epoch": 1.2011137115648454, + "grad_norm": 0.6387330889701843, + "learning_rate": 3.091492934996901e-05, + "loss": 2.4302, + "step": 14883 + }, + { + "epoch": 1.2011944153014285, + "grad_norm": 0.6883525252342224, + "learning_rate": 3.090351632420939e-05, + "loss": 2.4644, + "step": 14884 + }, + { + "epoch": 1.2012751190380115, + "grad_norm": 0.6698900461196899, + "learning_rate": 3.0892105020514795e-05, + "loss": 2.414, + "step": 14885 + }, + { + "epoch": 1.2013558227745944, + "grad_norm": 0.7124409079551697, + "learning_rate": 3.088069543916956e-05, + "loss": 2.4275, + "step": 14886 + }, + { + "epoch": 1.2014365265111775, + "grad_norm": 0.6996601223945618, + "learning_rate": 3.0869287580458076e-05, + "loss": 2.4725, + "step": 14887 + }, + { + "epoch": 1.2015172302477604, + "grad_norm": 0.653087317943573, + "learning_rate": 3.085788144466468e-05, + "loss": 2.383, + "step": 14888 + }, + { + "epoch": 1.2015979339843434, + "grad_norm": 0.7426899671554565, + "learning_rate": 3.0846477032073554e-05, + "loss": 2.4064, + "step": 14889 + }, + { + "epoch": 1.2016786377209265, + "grad_norm": 0.6417646408081055, + "learning_rate": 3.083507434296903e-05, + "loss": 2.3964, + "step": 14890 + }, + { + "epoch": 1.2017593414575094, + "grad_norm": 0.6301923394203186, + "learning_rate": 3.0823673377635274e-05, + "loss": 2.4285, + "step": 14891 + }, + { + "epoch": 1.2018400451940925, + "grad_norm": 0.7621259093284607, + "learning_rate": 3.081227413635638e-05, + "loss": 2.4731, + "step": 14892 + }, + { + "epoch": 1.2019207489306756, + "grad_norm": 0.6637598872184753, + "learning_rate": 3.080087661941648e-05, + "loss": 2.4126, + "step": 14893 + }, + { + "epoch": 1.2020014526672584, + "grad_norm": 0.6820287108421326, + "learning_rate": 3.078948082709964e-05, + "loss": 2.4108, + "step": 14894 + }, + { + "epoch": 1.2020821564038415, + "grad_norm": 0.7090989351272583, + "learning_rate": 3.077808675968983e-05, + "loss": 2.4678, + "step": 14895 + }, + { + "epoch": 1.2021628601404246, + "grad_norm": 0.7242181897163391, + "learning_rate": 3.076669441747105e-05, + "loss": 2.5346, + "step": 14896 + }, + { + "epoch": 1.2022435638770075, + "grad_norm": 0.7790088653564453, + "learning_rate": 3.075530380072722e-05, + "loss": 2.4436, + "step": 14897 + }, + { + "epoch": 1.2023242676135906, + "grad_norm": 0.6828821301460266, + "learning_rate": 3.074391490974225e-05, + "loss": 2.3767, + "step": 14898 + }, + { + "epoch": 1.2024049713501734, + "grad_norm": 0.709815502166748, + "learning_rate": 3.0732527744799945e-05, + "loss": 2.4139, + "step": 14899 + }, + { + "epoch": 1.2024856750867565, + "grad_norm": 0.6561180353164673, + "learning_rate": 3.07211423061841e-05, + "loss": 2.399, + "step": 14900 + }, + { + "epoch": 1.2025663788233396, + "grad_norm": 0.7122004628181458, + "learning_rate": 3.0709758594178495e-05, + "loss": 2.4314, + "step": 14901 + }, + { + "epoch": 1.2026470825599225, + "grad_norm": 0.6817516684532166, + "learning_rate": 3.0698376609066825e-05, + "loss": 2.4241, + "step": 14902 + }, + { + "epoch": 1.2027277862965056, + "grad_norm": 0.6848475337028503, + "learning_rate": 3.068699635113277e-05, + "loss": 2.4583, + "step": 14903 + }, + { + "epoch": 1.2028084900330884, + "grad_norm": 0.6567823886871338, + "learning_rate": 3.067561782065999e-05, + "loss": 2.3818, + "step": 14904 + }, + { + "epoch": 1.2028891937696715, + "grad_norm": 0.7373961806297302, + "learning_rate": 3.066424101793198e-05, + "loss": 2.4075, + "step": 14905 + }, + { + "epoch": 1.2029698975062546, + "grad_norm": 0.6968079209327698, + "learning_rate": 3.0652865943232346e-05, + "loss": 2.4701, + "step": 14906 + }, + { + "epoch": 1.2030506012428375, + "grad_norm": 0.7356292009353638, + "learning_rate": 3.064149259684459e-05, + "loss": 2.4188, + "step": 14907 + }, + { + "epoch": 1.2031313049794206, + "grad_norm": 0.7144857048988342, + "learning_rate": 3.063012097905211e-05, + "loss": 2.4411, + "step": 14908 + }, + { + "epoch": 1.2032120087160036, + "grad_norm": 0.734531044960022, + "learning_rate": 3.0618751090138365e-05, + "loss": 2.4595, + "step": 14909 + }, + { + "epoch": 1.2032927124525865, + "grad_norm": 0.6658234000205994, + "learning_rate": 3.060738293038669e-05, + "loss": 2.4206, + "step": 14910 + }, + { + "epoch": 1.2033734161891696, + "grad_norm": 0.678424596786499, + "learning_rate": 3.059601650008044e-05, + "loss": 2.4704, + "step": 14911 + }, + { + "epoch": 1.2034541199257527, + "grad_norm": 0.6852440237998962, + "learning_rate": 3.058465179950287e-05, + "loss": 2.46, + "step": 14912 + }, + { + "epoch": 1.2035348236623356, + "grad_norm": 0.702881395816803, + "learning_rate": 3.057328882893724e-05, + "loss": 2.4372, + "step": 14913 + }, + { + "epoch": 1.2036155273989186, + "grad_norm": 0.6978999972343445, + "learning_rate": 3.056192758866676e-05, + "loss": 2.401, + "step": 14914 + }, + { + "epoch": 1.2036962311355015, + "grad_norm": 0.7070993185043335, + "learning_rate": 3.055056807897454e-05, + "loss": 2.3967, + "step": 14915 + }, + { + "epoch": 1.2037769348720846, + "grad_norm": 0.7159305810928345, + "learning_rate": 3.0539210300143693e-05, + "loss": 2.4388, + "step": 14916 + }, + { + "epoch": 1.2038576386086675, + "grad_norm": 0.6920869946479797, + "learning_rate": 3.0527854252457333e-05, + "loss": 2.441, + "step": 14917 + }, + { + "epoch": 1.2039383423452505, + "grad_norm": 0.7014884352684021, + "learning_rate": 3.0516499936198417e-05, + "loss": 2.4115, + "step": 14918 + }, + { + "epoch": 1.2040190460818336, + "grad_norm": 0.6754150986671448, + "learning_rate": 3.0505147351649955e-05, + "loss": 2.3722, + "step": 14919 + }, + { + "epoch": 1.2040997498184165, + "grad_norm": 0.7681791186332703, + "learning_rate": 3.0493796499094874e-05, + "loss": 2.4331, + "step": 14920 + }, + { + "epoch": 1.2041804535549996, + "grad_norm": 0.7265221476554871, + "learning_rate": 3.0482447378816082e-05, + "loss": 2.4806, + "step": 14921 + }, + { + "epoch": 1.2042611572915827, + "grad_norm": 0.6841520667076111, + "learning_rate": 3.047109999109642e-05, + "loss": 2.3896, + "step": 14922 + }, + { + "epoch": 1.2043418610281655, + "grad_norm": 0.746347963809967, + "learning_rate": 3.0459754336218737e-05, + "loss": 2.4081, + "step": 14923 + }, + { + "epoch": 1.2044225647647486, + "grad_norm": 0.6679818034172058, + "learning_rate": 3.0448410414465712e-05, + "loss": 2.4206, + "step": 14924 + }, + { + "epoch": 1.2045032685013317, + "grad_norm": 0.7122265100479126, + "learning_rate": 3.0437068226120114e-05, + "loss": 2.4217, + "step": 14925 + }, + { + "epoch": 1.2045839722379146, + "grad_norm": 0.7023499011993408, + "learning_rate": 3.0425727771464618e-05, + "loss": 2.4597, + "step": 14926 + }, + { + "epoch": 1.2046646759744977, + "grad_norm": 0.7304259538650513, + "learning_rate": 3.0414389050781876e-05, + "loss": 2.4915, + "step": 14927 + }, + { + "epoch": 1.2047453797110805, + "grad_norm": 0.7209908962249756, + "learning_rate": 3.0403052064354442e-05, + "loss": 2.4163, + "step": 14928 + }, + { + "epoch": 1.2048260834476636, + "grad_norm": 0.7367275953292847, + "learning_rate": 3.0391716812464865e-05, + "loss": 2.4192, + "step": 14929 + }, + { + "epoch": 1.2049067871842467, + "grad_norm": 0.6576591730117798, + "learning_rate": 3.0380383295395674e-05, + "loss": 2.4606, + "step": 14930 + }, + { + "epoch": 1.2049874909208296, + "grad_norm": 0.7082500457763672, + "learning_rate": 3.0369051513429315e-05, + "loss": 2.4079, + "step": 14931 + }, + { + "epoch": 1.2050681946574127, + "grad_norm": 0.6770346760749817, + "learning_rate": 3.03577214668482e-05, + "loss": 2.45, + "step": 14932 + }, + { + "epoch": 1.2051488983939955, + "grad_norm": 0.6979790925979614, + "learning_rate": 3.034639315593476e-05, + "loss": 2.3966, + "step": 14933 + }, + { + "epoch": 1.2052296021305786, + "grad_norm": 0.6863394975662231, + "learning_rate": 3.033506658097124e-05, + "loss": 2.4637, + "step": 14934 + }, + { + "epoch": 1.2053103058671617, + "grad_norm": 0.7522799372673035, + "learning_rate": 3.0323741742239963e-05, + "loss": 2.4585, + "step": 14935 + }, + { + "epoch": 1.2053910096037446, + "grad_norm": 0.7119878530502319, + "learning_rate": 3.031241864002321e-05, + "loss": 2.4473, + "step": 14936 + }, + { + "epoch": 1.2054717133403277, + "grad_norm": 0.690861701965332, + "learning_rate": 3.030109727460312e-05, + "loss": 2.4564, + "step": 14937 + }, + { + "epoch": 1.2055524170769107, + "grad_norm": 0.6825447082519531, + "learning_rate": 3.0289777646261886e-05, + "loss": 2.4511, + "step": 14938 + }, + { + "epoch": 1.2056331208134936, + "grad_norm": 0.7404600977897644, + "learning_rate": 3.027845975528164e-05, + "loss": 2.4461, + "step": 14939 + }, + { + "epoch": 1.2057138245500767, + "grad_norm": 0.6871766448020935, + "learning_rate": 3.026714360194437e-05, + "loss": 2.4486, + "step": 14940 + }, + { + "epoch": 1.2057945282866598, + "grad_norm": 0.6646476984024048, + "learning_rate": 3.02558291865322e-05, + "loss": 2.378, + "step": 14941 + }, + { + "epoch": 1.2058752320232426, + "grad_norm": 0.6998385787010193, + "learning_rate": 3.024451650932707e-05, + "loss": 2.4646, + "step": 14942 + }, + { + "epoch": 1.2059559357598257, + "grad_norm": 0.6763097047805786, + "learning_rate": 3.023320557061098e-05, + "loss": 2.3971, + "step": 14943 + }, + { + "epoch": 1.2060366394964086, + "grad_norm": 0.7409633994102478, + "learning_rate": 3.0221896370665736e-05, + "loss": 2.4405, + "step": 14944 + }, + { + "epoch": 1.2061173432329917, + "grad_norm": 0.6972076892852783, + "learning_rate": 3.0210588909773242e-05, + "loss": 2.3935, + "step": 14945 + }, + { + "epoch": 1.2061980469695748, + "grad_norm": 0.6898512840270996, + "learning_rate": 3.0199283188215333e-05, + "loss": 2.4173, + "step": 14946 + }, + { + "epoch": 1.2062787507061576, + "grad_norm": 0.6878097057342529, + "learning_rate": 3.0187979206273707e-05, + "loss": 2.44, + "step": 14947 + }, + { + "epoch": 1.2063594544427407, + "grad_norm": 0.6629695296287537, + "learning_rate": 3.0176676964230143e-05, + "loss": 2.3836, + "step": 14948 + }, + { + "epoch": 1.2064401581793236, + "grad_norm": 0.717654824256897, + "learning_rate": 3.0165376462366336e-05, + "loss": 2.415, + "step": 14949 + }, + { + "epoch": 1.2065208619159067, + "grad_norm": 0.7526129484176636, + "learning_rate": 3.0154077700963867e-05, + "loss": 2.4985, + "step": 14950 + }, + { + "epoch": 1.2066015656524898, + "grad_norm": 0.6867300271987915, + "learning_rate": 3.014278068030435e-05, + "loss": 2.395, + "step": 14951 + }, + { + "epoch": 1.2066822693890726, + "grad_norm": 0.7321466207504272, + "learning_rate": 3.0131485400669356e-05, + "loss": 2.4503, + "step": 14952 + }, + { + "epoch": 1.2067629731256557, + "grad_norm": 0.6915534734725952, + "learning_rate": 3.0120191862340387e-05, + "loss": 2.398, + "step": 14953 + }, + { + "epoch": 1.2068436768622388, + "grad_norm": 0.7017377018928528, + "learning_rate": 3.01089000655989e-05, + "loss": 2.4367, + "step": 14954 + }, + { + "epoch": 1.2069243805988217, + "grad_norm": 0.7032245397567749, + "learning_rate": 3.0097610010726353e-05, + "loss": 2.4078, + "step": 14955 + }, + { + "epoch": 1.2070050843354048, + "grad_norm": 0.6795478463172913, + "learning_rate": 3.008632169800406e-05, + "loss": 2.3508, + "step": 14956 + }, + { + "epoch": 1.2070857880719879, + "grad_norm": 0.7149559855461121, + "learning_rate": 3.007503512771339e-05, + "loss": 2.4023, + "step": 14957 + }, + { + "epoch": 1.2071664918085707, + "grad_norm": 0.724756121635437, + "learning_rate": 3.006375030013563e-05, + "loss": 2.4439, + "step": 14958 + }, + { + "epoch": 1.2072471955451538, + "grad_norm": 0.7233348488807678, + "learning_rate": 3.005246721555205e-05, + "loss": 2.3819, + "step": 14959 + }, + { + "epoch": 1.2073278992817367, + "grad_norm": 0.700322151184082, + "learning_rate": 3.0041185874243815e-05, + "loss": 2.4222, + "step": 14960 + }, + { + "epoch": 1.2074086030183198, + "grad_norm": 0.7268145680427551, + "learning_rate": 3.002990627649209e-05, + "loss": 2.4698, + "step": 14961 + }, + { + "epoch": 1.2074893067549028, + "grad_norm": 0.6885111331939697, + "learning_rate": 3.001862842257801e-05, + "loss": 2.4505, + "step": 14962 + }, + { + "epoch": 1.2075700104914857, + "grad_norm": 0.7237974405288696, + "learning_rate": 3.0007352312782632e-05, + "loss": 2.422, + "step": 14963 + }, + { + "epoch": 1.2076507142280688, + "grad_norm": 0.7214741110801697, + "learning_rate": 2.9996077947387015e-05, + "loss": 2.4428, + "step": 14964 + }, + { + "epoch": 1.2077314179646517, + "grad_norm": 0.7264460921287537, + "learning_rate": 2.998480532667215e-05, + "loss": 2.4669, + "step": 14965 + }, + { + "epoch": 1.2078121217012348, + "grad_norm": 0.7055517435073853, + "learning_rate": 2.9973534450918928e-05, + "loss": 2.5082, + "step": 14966 + }, + { + "epoch": 1.2078928254378178, + "grad_norm": 0.6886781454086304, + "learning_rate": 2.9962265320408268e-05, + "loss": 2.4697, + "step": 14967 + }, + { + "epoch": 1.2079735291744007, + "grad_norm": 0.6875878572463989, + "learning_rate": 2.9950997935421076e-05, + "loss": 2.4384, + "step": 14968 + }, + { + "epoch": 1.2080542329109838, + "grad_norm": 0.7586886882781982, + "learning_rate": 2.99397322962381e-05, + "loss": 2.4088, + "step": 14969 + }, + { + "epoch": 1.2081349366475669, + "grad_norm": 0.6744365096092224, + "learning_rate": 2.992846840314013e-05, + "loss": 2.4109, + "step": 14970 + }, + { + "epoch": 1.2082156403841497, + "grad_norm": 0.6589661240577698, + "learning_rate": 2.9917206256407893e-05, + "loss": 2.4386, + "step": 14971 + }, + { + "epoch": 1.2082963441207328, + "grad_norm": 0.6787264943122864, + "learning_rate": 2.990594585632208e-05, + "loss": 2.401, + "step": 14972 + }, + { + "epoch": 1.2083770478573157, + "grad_norm": 0.710517406463623, + "learning_rate": 2.9894687203163317e-05, + "loss": 2.4813, + "step": 14973 + }, + { + "epoch": 1.2084577515938988, + "grad_norm": 0.676110029220581, + "learning_rate": 2.988343029721221e-05, + "loss": 2.4654, + "step": 14974 + }, + { + "epoch": 1.2085384553304819, + "grad_norm": 0.6940518617630005, + "learning_rate": 2.9872175138749336e-05, + "loss": 2.4188, + "step": 14975 + }, + { + "epoch": 1.2086191590670647, + "grad_norm": 0.6849910020828247, + "learning_rate": 2.9860921728055147e-05, + "loss": 2.384, + "step": 14976 + }, + { + "epoch": 1.2086998628036478, + "grad_norm": 0.6902467608451843, + "learning_rate": 2.9849670065410128e-05, + "loss": 2.4364, + "step": 14977 + }, + { + "epoch": 1.2087805665402307, + "grad_norm": 0.6742224097251892, + "learning_rate": 2.9838420151094747e-05, + "loss": 2.5085, + "step": 14978 + }, + { + "epoch": 1.2088612702768138, + "grad_norm": 0.6635094285011292, + "learning_rate": 2.9827171985389303e-05, + "loss": 2.3635, + "step": 14979 + }, + { + "epoch": 1.2089419740133969, + "grad_norm": 0.7189158201217651, + "learning_rate": 2.9815925568574165e-05, + "loss": 2.458, + "step": 14980 + }, + { + "epoch": 1.2090226777499797, + "grad_norm": 0.7370143532752991, + "learning_rate": 2.9804680900929628e-05, + "loss": 2.4543, + "step": 14981 + }, + { + "epoch": 1.2091033814865628, + "grad_norm": 0.7410217523574829, + "learning_rate": 2.979343798273593e-05, + "loss": 2.4537, + "step": 14982 + }, + { + "epoch": 1.209184085223146, + "grad_norm": 0.7525770664215088, + "learning_rate": 2.9782196814273277e-05, + "loss": 2.5147, + "step": 14983 + }, + { + "epoch": 1.2092647889597288, + "grad_norm": 0.7302291393280029, + "learning_rate": 2.9770957395821863e-05, + "loss": 2.4711, + "step": 14984 + }, + { + "epoch": 1.2093454926963119, + "grad_norm": 0.7154920101165771, + "learning_rate": 2.975971972766175e-05, + "loss": 2.5224, + "step": 14985 + }, + { + "epoch": 1.209426196432895, + "grad_norm": 0.6827684640884399, + "learning_rate": 2.9748483810073025e-05, + "loss": 2.4477, + "step": 14986 + }, + { + "epoch": 1.2095069001694778, + "grad_norm": 0.7753484845161438, + "learning_rate": 2.973724964333575e-05, + "loss": 2.4257, + "step": 14987 + }, + { + "epoch": 1.209587603906061, + "grad_norm": 0.7146809101104736, + "learning_rate": 2.9726017227729862e-05, + "loss": 2.3953, + "step": 14988 + }, + { + "epoch": 1.2096683076426438, + "grad_norm": 0.7360730767250061, + "learning_rate": 2.9714786563535313e-05, + "loss": 2.3774, + "step": 14989 + }, + { + "epoch": 1.2097490113792269, + "grad_norm": 0.7159923911094666, + "learning_rate": 2.970355765103201e-05, + "loss": 2.4068, + "step": 14990 + }, + { + "epoch": 1.20982971511581, + "grad_norm": 0.6732171773910522, + "learning_rate": 2.969233049049982e-05, + "loss": 2.4215, + "step": 14991 + }, + { + "epoch": 1.2099104188523928, + "grad_norm": 0.749812126159668, + "learning_rate": 2.968110508221853e-05, + "loss": 2.4415, + "step": 14992 + }, + { + "epoch": 1.209991122588976, + "grad_norm": 0.7185530662536621, + "learning_rate": 2.9669881426467916e-05, + "loss": 2.4536, + "step": 14993 + }, + { + "epoch": 1.2100718263255588, + "grad_norm": 0.6757143139839172, + "learning_rate": 2.9658659523527733e-05, + "loss": 2.3892, + "step": 14994 + }, + { + "epoch": 1.2101525300621419, + "grad_norm": 0.7187495231628418, + "learning_rate": 2.96474393736776e-05, + "loss": 2.434, + "step": 14995 + }, + { + "epoch": 1.210233233798725, + "grad_norm": 0.7016372680664062, + "learning_rate": 2.9636220977197182e-05, + "loss": 2.4903, + "step": 14996 + }, + { + "epoch": 1.2103139375353078, + "grad_norm": 0.7528983950614929, + "learning_rate": 2.9625004334366103e-05, + "loss": 2.3829, + "step": 14997 + }, + { + "epoch": 1.210394641271891, + "grad_norm": 0.6735692024230957, + "learning_rate": 2.9613789445463837e-05, + "loss": 2.3844, + "step": 14998 + }, + { + "epoch": 1.210475345008474, + "grad_norm": 0.6825322508811951, + "learning_rate": 2.9602576310769935e-05, + "loss": 2.4691, + "step": 14999 + }, + { + "epoch": 1.2105560487450568, + "grad_norm": 0.7507675290107727, + "learning_rate": 2.959136493056389e-05, + "loss": 2.4605, + "step": 15000 + }, + { + "epoch": 1.2105560487450568, + "eval_loss": 2.3882925510406494, + "eval_runtime": 1014.0781, + "eval_samples_per_second": 2.584, + "eval_steps_per_second": 0.431, + "step": 15000 + }, + { + "epoch": 1.21063675248164, + "grad_norm": 0.6937146782875061, + "learning_rate": 2.9580155305125044e-05, + "loss": 2.4444, + "step": 15001 + }, + { + "epoch": 1.210717456218223, + "grad_norm": 0.6572179794311523, + "learning_rate": 2.9568947434732775e-05, + "loss": 2.4373, + "step": 15002 + }, + { + "epoch": 1.2107981599548059, + "grad_norm": 0.7420738935470581, + "learning_rate": 2.955774131966651e-05, + "loss": 2.4046, + "step": 15003 + }, + { + "epoch": 1.210878863691389, + "grad_norm": 0.7952237129211426, + "learning_rate": 2.954653696020543e-05, + "loss": 2.4082, + "step": 15004 + }, + { + "epoch": 1.2109595674279718, + "grad_norm": 0.6640750765800476, + "learning_rate": 2.9535334356628817e-05, + "loss": 2.4109, + "step": 15005 + }, + { + "epoch": 1.211040271164555, + "grad_norm": 0.6968019008636475, + "learning_rate": 2.952413350921588e-05, + "loss": 2.3991, + "step": 15006 + }, + { + "epoch": 1.211120974901138, + "grad_norm": 0.7174221277236938, + "learning_rate": 2.9512934418245787e-05, + "loss": 2.3909, + "step": 15007 + }, + { + "epoch": 1.2112016786377209, + "grad_norm": 0.6854268908500671, + "learning_rate": 2.9501737083997595e-05, + "loss": 2.4321, + "step": 15008 + }, + { + "epoch": 1.211282382374304, + "grad_norm": 0.6705672740936279, + "learning_rate": 2.949054150675039e-05, + "loss": 2.4749, + "step": 15009 + }, + { + "epoch": 1.2113630861108868, + "grad_norm": 0.7871068716049194, + "learning_rate": 2.9479347686783244e-05, + "loss": 2.424, + "step": 15010 + }, + { + "epoch": 1.21144378984747, + "grad_norm": 0.8194620609283447, + "learning_rate": 2.946815562437506e-05, + "loss": 2.461, + "step": 15011 + }, + { + "epoch": 1.211524493584053, + "grad_norm": 0.673367977142334, + "learning_rate": 2.9456965319804818e-05, + "loss": 2.4212, + "step": 15012 + }, + { + "epoch": 1.2116051973206359, + "grad_norm": 0.6630001068115234, + "learning_rate": 2.9445776773351397e-05, + "loss": 2.4393, + "step": 15013 + }, + { + "epoch": 1.211685901057219, + "grad_norm": 0.676170825958252, + "learning_rate": 2.943458998529365e-05, + "loss": 2.3889, + "step": 15014 + }, + { + "epoch": 1.211766604793802, + "grad_norm": 0.6951417326927185, + "learning_rate": 2.942340495591037e-05, + "loss": 2.4088, + "step": 15015 + }, + { + "epoch": 1.211847308530385, + "grad_norm": 0.6909857988357544, + "learning_rate": 2.941222168548037e-05, + "loss": 2.4282, + "step": 15016 + }, + { + "epoch": 1.211928012266968, + "grad_norm": 0.653264045715332, + "learning_rate": 2.9401040174282292e-05, + "loss": 2.4369, + "step": 15017 + }, + { + "epoch": 1.2120087160035509, + "grad_norm": 0.6994543075561523, + "learning_rate": 2.938986042259484e-05, + "loss": 2.419, + "step": 15018 + }, + { + "epoch": 1.212089419740134, + "grad_norm": 0.709015965461731, + "learning_rate": 2.9378682430696668e-05, + "loss": 2.4747, + "step": 15019 + }, + { + "epoch": 1.212170123476717, + "grad_norm": 0.6899579167366028, + "learning_rate": 2.9367506198866313e-05, + "loss": 2.4134, + "step": 15020 + }, + { + "epoch": 1.2122508272133, + "grad_norm": 0.6811912059783936, + "learning_rate": 2.9356331727382337e-05, + "loss": 2.449, + "step": 15021 + }, + { + "epoch": 1.212331530949883, + "grad_norm": 0.8119748830795288, + "learning_rate": 2.9345159016523237e-05, + "loss": 2.4463, + "step": 15022 + }, + { + "epoch": 1.2124122346864659, + "grad_norm": 0.7323578000068665, + "learning_rate": 2.9333988066567463e-05, + "loss": 2.4305, + "step": 15023 + }, + { + "epoch": 1.212492938423049, + "grad_norm": 0.6639837622642517, + "learning_rate": 2.9322818877793436e-05, + "loss": 2.4237, + "step": 15024 + }, + { + "epoch": 1.212573642159632, + "grad_norm": 0.669623076915741, + "learning_rate": 2.9311651450479516e-05, + "loss": 2.4436, + "step": 15025 + }, + { + "epoch": 1.212654345896215, + "grad_norm": 0.7200437784194946, + "learning_rate": 2.9300485784904054e-05, + "loss": 2.4399, + "step": 15026 + }, + { + "epoch": 1.212735049632798, + "grad_norm": 0.7015525102615356, + "learning_rate": 2.9289321881345254e-05, + "loss": 2.4696, + "step": 15027 + }, + { + "epoch": 1.212815753369381, + "grad_norm": 0.74539715051651, + "learning_rate": 2.9278159740081402e-05, + "loss": 2.4204, + "step": 15028 + }, + { + "epoch": 1.212896457105964, + "grad_norm": 0.6373662352561951, + "learning_rate": 2.9266999361390713e-05, + "loss": 2.4273, + "step": 15029 + }, + { + "epoch": 1.212977160842547, + "grad_norm": 0.8213370442390442, + "learning_rate": 2.9255840745551256e-05, + "loss": 2.4166, + "step": 15030 + }, + { + "epoch": 1.2130578645791301, + "grad_norm": 0.7386181354522705, + "learning_rate": 2.9244683892841185e-05, + "loss": 2.3973, + "step": 15031 + }, + { + "epoch": 1.213138568315713, + "grad_norm": 0.7939273118972778, + "learning_rate": 2.9233528803538534e-05, + "loss": 2.5593, + "step": 15032 + }, + { + "epoch": 1.213219272052296, + "grad_norm": 0.7580689191818237, + "learning_rate": 2.9222375477921347e-05, + "loss": 2.4255, + "step": 15033 + }, + { + "epoch": 1.213299975788879, + "grad_norm": 0.7680409550666809, + "learning_rate": 2.9211223916267573e-05, + "loss": 2.4447, + "step": 15034 + }, + { + "epoch": 1.213380679525462, + "grad_norm": 0.6998565196990967, + "learning_rate": 2.9200074118855135e-05, + "loss": 2.4061, + "step": 15035 + }, + { + "epoch": 1.2134613832620451, + "grad_norm": 0.6673001050949097, + "learning_rate": 2.9188926085961954e-05, + "loss": 2.3989, + "step": 15036 + }, + { + "epoch": 1.213542086998628, + "grad_norm": 0.683215320110321, + "learning_rate": 2.9177779817865815e-05, + "loss": 2.4078, + "step": 15037 + }, + { + "epoch": 1.213622790735211, + "grad_norm": 0.696967363357544, + "learning_rate": 2.9166635314844527e-05, + "loss": 2.4224, + "step": 15038 + }, + { + "epoch": 1.213703494471794, + "grad_norm": 0.6930364370346069, + "learning_rate": 2.915549257717588e-05, + "loss": 2.4112, + "step": 15039 + }, + { + "epoch": 1.213784198208377, + "grad_norm": 0.7387405633926392, + "learning_rate": 2.914435160513752e-05, + "loss": 2.4458, + "step": 15040 + }, + { + "epoch": 1.21386490194496, + "grad_norm": 0.6615941524505615, + "learning_rate": 2.913321239900714e-05, + "loss": 2.4406, + "step": 15041 + }, + { + "epoch": 1.213945605681543, + "grad_norm": 0.7520569562911987, + "learning_rate": 2.912207495906235e-05, + "loss": 2.3991, + "step": 15042 + }, + { + "epoch": 1.214026309418126, + "grad_norm": 0.6952454447746277, + "learning_rate": 2.911093928558072e-05, + "loss": 2.4404, + "step": 15043 + }, + { + "epoch": 1.2141070131547091, + "grad_norm": 0.7595344185829163, + "learning_rate": 2.9099805378839794e-05, + "loss": 2.551, + "step": 15044 + }, + { + "epoch": 1.214187716891292, + "grad_norm": 0.6645220518112183, + "learning_rate": 2.9088673239117094e-05, + "loss": 2.4167, + "step": 15045 + }, + { + "epoch": 1.214268420627875, + "grad_norm": 0.6433377861976624, + "learning_rate": 2.907754286668998e-05, + "loss": 2.3873, + "step": 15046 + }, + { + "epoch": 1.2143491243644582, + "grad_norm": 0.6806936860084534, + "learning_rate": 2.9066414261835894e-05, + "loss": 2.3868, + "step": 15047 + }, + { + "epoch": 1.214429828101041, + "grad_norm": 0.7261343598365784, + "learning_rate": 2.905528742483222e-05, + "loss": 2.4785, + "step": 15048 + }, + { + "epoch": 1.2145105318376241, + "grad_norm": 0.6495440602302551, + "learning_rate": 2.9044162355956196e-05, + "loss": 2.4167, + "step": 15049 + }, + { + "epoch": 1.214591235574207, + "grad_norm": 0.6816607117652893, + "learning_rate": 2.9033039055485135e-05, + "loss": 2.459, + "step": 15050 + }, + { + "epoch": 1.21467193931079, + "grad_norm": 0.6624214053153992, + "learning_rate": 2.902191752369624e-05, + "loss": 2.4498, + "step": 15051 + }, + { + "epoch": 1.2147526430473732, + "grad_norm": 0.6800024509429932, + "learning_rate": 2.9010797760866737e-05, + "loss": 2.4442, + "step": 15052 + }, + { + "epoch": 1.214833346783956, + "grad_norm": 0.711705207824707, + "learning_rate": 2.8999679767273667e-05, + "loss": 2.422, + "step": 15053 + }, + { + "epoch": 1.2149140505205391, + "grad_norm": 0.6854784488677979, + "learning_rate": 2.898856354319419e-05, + "loss": 2.4567, + "step": 15054 + }, + { + "epoch": 1.214994754257122, + "grad_norm": 0.6676114797592163, + "learning_rate": 2.8977449088905373e-05, + "loss": 2.3913, + "step": 15055 + }, + { + "epoch": 1.215075457993705, + "grad_norm": 0.6893348693847656, + "learning_rate": 2.8966336404684145e-05, + "loss": 2.4407, + "step": 15056 + }, + { + "epoch": 1.2151561617302882, + "grad_norm": 0.6749289035797119, + "learning_rate": 2.8955225490807514e-05, + "loss": 2.409, + "step": 15057 + }, + { + "epoch": 1.215236865466871, + "grad_norm": 0.6998956203460693, + "learning_rate": 2.8944116347552387e-05, + "loss": 2.4297, + "step": 15058 + }, + { + "epoch": 1.2153175692034541, + "grad_norm": 0.7040024399757385, + "learning_rate": 2.8933008975195596e-05, + "loss": 2.4262, + "step": 15059 + }, + { + "epoch": 1.2153982729400372, + "grad_norm": 0.6638362407684326, + "learning_rate": 2.8921903374014005e-05, + "loss": 2.4355, + "step": 15060 + }, + { + "epoch": 1.21547897667662, + "grad_norm": 0.6864547729492188, + "learning_rate": 2.8910799544284407e-05, + "loss": 2.4493, + "step": 15061 + }, + { + "epoch": 1.2155596804132032, + "grad_norm": 0.707383394241333, + "learning_rate": 2.8899697486283474e-05, + "loss": 2.4604, + "step": 15062 + }, + { + "epoch": 1.2156403841497863, + "grad_norm": 0.7121397852897644, + "learning_rate": 2.888859720028795e-05, + "loss": 2.4272, + "step": 15063 + }, + { + "epoch": 1.2157210878863691, + "grad_norm": 0.7600439786911011, + "learning_rate": 2.8877498686574455e-05, + "loss": 2.4499, + "step": 15064 + }, + { + "epoch": 1.2158017916229522, + "grad_norm": 0.6654962301254272, + "learning_rate": 2.886640194541962e-05, + "loss": 2.4632, + "step": 15065 + }, + { + "epoch": 1.215882495359535, + "grad_norm": 0.7138063311576843, + "learning_rate": 2.8855306977099994e-05, + "loss": 2.4321, + "step": 15066 + }, + { + "epoch": 1.2159631990961182, + "grad_norm": 0.672604501247406, + "learning_rate": 2.884421378189208e-05, + "loss": 2.4026, + "step": 15067 + }, + { + "epoch": 1.2160439028327013, + "grad_norm": 0.6894693970680237, + "learning_rate": 2.8833122360072405e-05, + "loss": 2.4213, + "step": 15068 + }, + { + "epoch": 1.2161246065692841, + "grad_norm": 0.6784985065460205, + "learning_rate": 2.8822032711917325e-05, + "loss": 2.4207, + "step": 15069 + }, + { + "epoch": 1.2162053103058672, + "grad_norm": 0.6569294929504395, + "learning_rate": 2.8810944837703248e-05, + "loss": 2.4142, + "step": 15070 + }, + { + "epoch": 1.21628601404245, + "grad_norm": 0.7240702509880066, + "learning_rate": 2.879985873770654e-05, + "loss": 2.4173, + "step": 15071 + }, + { + "epoch": 1.2163667177790332, + "grad_norm": 0.6935575604438782, + "learning_rate": 2.8788774412203444e-05, + "loss": 2.4487, + "step": 15072 + }, + { + "epoch": 1.2164474215156162, + "grad_norm": 0.6903246641159058, + "learning_rate": 2.8777691861470234e-05, + "loss": 2.4193, + "step": 15073 + }, + { + "epoch": 1.216528125252199, + "grad_norm": 0.7982182502746582, + "learning_rate": 2.8766611085783123e-05, + "loss": 2.492, + "step": 15074 + }, + { + "epoch": 1.2166088289887822, + "grad_norm": 0.6958058476448059, + "learning_rate": 2.875553208541827e-05, + "loss": 2.4198, + "step": 15075 + }, + { + "epoch": 1.2166895327253653, + "grad_norm": 0.6869969964027405, + "learning_rate": 2.8744454860651794e-05, + "loss": 2.3768, + "step": 15076 + }, + { + "epoch": 1.2167702364619482, + "grad_norm": 0.7263007760047913, + "learning_rate": 2.8733379411759796e-05, + "loss": 2.386, + "step": 15077 + }, + { + "epoch": 1.2168509401985312, + "grad_norm": 0.7010302543640137, + "learning_rate": 2.872230573901825e-05, + "loss": 2.4417, + "step": 15078 + }, + { + "epoch": 1.216931643935114, + "grad_norm": 0.818980872631073, + "learning_rate": 2.8711233842703156e-05, + "loss": 2.433, + "step": 15079 + }, + { + "epoch": 1.2170123476716972, + "grad_norm": 0.6937929391860962, + "learning_rate": 2.87001637230905e-05, + "loss": 2.379, + "step": 15080 + }, + { + "epoch": 1.2170930514082803, + "grad_norm": 0.6954175233840942, + "learning_rate": 2.868909538045612e-05, + "loss": 2.4296, + "step": 15081 + }, + { + "epoch": 1.2171737551448631, + "grad_norm": 0.7177354097366333, + "learning_rate": 2.8678028815075887e-05, + "loss": 2.3978, + "step": 15082 + }, + { + "epoch": 1.2172544588814462, + "grad_norm": 0.7100846171379089, + "learning_rate": 2.8666964027225607e-05, + "loss": 2.4566, + "step": 15083 + }, + { + "epoch": 1.217335162618029, + "grad_norm": 0.6909635066986084, + "learning_rate": 2.8655901017181064e-05, + "loss": 2.4772, + "step": 15084 + }, + { + "epoch": 1.2174158663546122, + "grad_norm": 0.7319501638412476, + "learning_rate": 2.8644839785217947e-05, + "loss": 2.4402, + "step": 15085 + }, + { + "epoch": 1.2174965700911953, + "grad_norm": 0.6691421270370483, + "learning_rate": 2.8633780331611958e-05, + "loss": 2.4465, + "step": 15086 + }, + { + "epoch": 1.2175772738277781, + "grad_norm": 0.7028824687004089, + "learning_rate": 2.8622722656638745e-05, + "loss": 2.4765, + "step": 15087 + }, + { + "epoch": 1.2176579775643612, + "grad_norm": 0.7428398728370667, + "learning_rate": 2.861166676057383e-05, + "loss": 2.441, + "step": 15088 + }, + { + "epoch": 1.2177386813009443, + "grad_norm": 0.6715269684791565, + "learning_rate": 2.8600612643692803e-05, + "loss": 2.4621, + "step": 15089 + }, + { + "epoch": 1.2178193850375272, + "grad_norm": 0.6768512725830078, + "learning_rate": 2.8589560306271168e-05, + "loss": 2.4257, + "step": 15090 + }, + { + "epoch": 1.2179000887741103, + "grad_norm": 0.7442535758018494, + "learning_rate": 2.8578509748584326e-05, + "loss": 2.424, + "step": 15091 + }, + { + "epoch": 1.2179807925106934, + "grad_norm": 0.7275974154472351, + "learning_rate": 2.8567460970907722e-05, + "loss": 2.4698, + "step": 15092 + }, + { + "epoch": 1.2180614962472762, + "grad_norm": 0.7050346732139587, + "learning_rate": 2.8556413973516727e-05, + "loss": 2.4734, + "step": 15093 + }, + { + "epoch": 1.2181421999838593, + "grad_norm": 0.7325939536094666, + "learning_rate": 2.854536875668664e-05, + "loss": 2.4166, + "step": 15094 + }, + { + "epoch": 1.2182229037204422, + "grad_norm": 0.6764184236526489, + "learning_rate": 2.8534325320692746e-05, + "loss": 2.4742, + "step": 15095 + }, + { + "epoch": 1.2183036074570253, + "grad_norm": 0.7405500411987305, + "learning_rate": 2.8523283665810318e-05, + "loss": 2.3959, + "step": 15096 + }, + { + "epoch": 1.2183843111936083, + "grad_norm": 0.6714199185371399, + "learning_rate": 2.8512243792314465e-05, + "loss": 2.4571, + "step": 15097 + }, + { + "epoch": 1.2184650149301912, + "grad_norm": 0.6779391169548035, + "learning_rate": 2.8501205700480372e-05, + "loss": 2.3745, + "step": 15098 + }, + { + "epoch": 1.2185457186667743, + "grad_norm": 0.6876079440116882, + "learning_rate": 2.8490169390583134e-05, + "loss": 2.4432, + "step": 15099 + }, + { + "epoch": 1.2186264224033572, + "grad_norm": 0.7092362642288208, + "learning_rate": 2.8479134862897826e-05, + "loss": 2.4716, + "step": 15100 + }, + { + "epoch": 1.2187071261399403, + "grad_norm": 0.6901989579200745, + "learning_rate": 2.8468102117699414e-05, + "loss": 2.417, + "step": 15101 + }, + { + "epoch": 1.2187878298765233, + "grad_norm": 0.7011592984199524, + "learning_rate": 2.8457071155262884e-05, + "loss": 2.4439, + "step": 15102 + }, + { + "epoch": 1.2188685336131062, + "grad_norm": 0.6923472285270691, + "learning_rate": 2.8446041975863146e-05, + "loss": 2.4247, + "step": 15103 + }, + { + "epoch": 1.2189492373496893, + "grad_norm": 0.6948748230934143, + "learning_rate": 2.843501457977509e-05, + "loss": 2.3902, + "step": 15104 + }, + { + "epoch": 1.2190299410862724, + "grad_norm": 0.7034386396408081, + "learning_rate": 2.842398896727354e-05, + "loss": 2.4277, + "step": 15105 + }, + { + "epoch": 1.2191106448228552, + "grad_norm": 0.7965617775917053, + "learning_rate": 2.8412965138633318e-05, + "loss": 2.435, + "step": 15106 + }, + { + "epoch": 1.2191913485594383, + "grad_norm": 0.7371121644973755, + "learning_rate": 2.8401943094129112e-05, + "loss": 2.3928, + "step": 15107 + }, + { + "epoch": 1.2192720522960214, + "grad_norm": 0.7079561352729797, + "learning_rate": 2.839092283403564e-05, + "loss": 2.4706, + "step": 15108 + }, + { + "epoch": 1.2193527560326043, + "grad_norm": 0.6711337566375732, + "learning_rate": 2.8379904358627584e-05, + "loss": 2.4272, + "step": 15109 + }, + { + "epoch": 1.2194334597691874, + "grad_norm": 0.6840410828590393, + "learning_rate": 2.836888766817951e-05, + "loss": 2.4174, + "step": 15110 + }, + { + "epoch": 1.2195141635057702, + "grad_norm": 0.700366199016571, + "learning_rate": 2.8357872762965986e-05, + "loss": 2.4667, + "step": 15111 + }, + { + "epoch": 1.2195948672423533, + "grad_norm": 0.7090682983398438, + "learning_rate": 2.8346859643261593e-05, + "loss": 2.3748, + "step": 15112 + }, + { + "epoch": 1.2196755709789364, + "grad_norm": 0.7965148687362671, + "learning_rate": 2.8335848309340717e-05, + "loss": 2.5138, + "step": 15113 + }, + { + "epoch": 1.2197562747155193, + "grad_norm": 0.7845773696899414, + "learning_rate": 2.8324838761477833e-05, + "loss": 2.4274, + "step": 15114 + }, + { + "epoch": 1.2198369784521024, + "grad_norm": 0.6545087099075317, + "learning_rate": 2.831383099994731e-05, + "loss": 2.4311, + "step": 15115 + }, + { + "epoch": 1.2199176821886852, + "grad_norm": 0.6846331357955933, + "learning_rate": 2.830282502502356e-05, + "loss": 2.4239, + "step": 15116 + }, + { + "epoch": 1.2199983859252683, + "grad_norm": 0.7062236070632935, + "learning_rate": 2.8291820836980798e-05, + "loss": 2.4429, + "step": 15117 + }, + { + "epoch": 1.2200790896618514, + "grad_norm": 0.7526285648345947, + "learning_rate": 2.8280818436093315e-05, + "loss": 2.4882, + "step": 15118 + }, + { + "epoch": 1.2201597933984343, + "grad_norm": 0.6853364109992981, + "learning_rate": 2.8269817822635337e-05, + "loss": 2.3803, + "step": 15119 + }, + { + "epoch": 1.2202404971350174, + "grad_norm": 0.7796143293380737, + "learning_rate": 2.8258818996880964e-05, + "loss": 2.4157, + "step": 15120 + }, + { + "epoch": 1.2203212008716005, + "grad_norm": 0.7202157378196716, + "learning_rate": 2.824782195910437e-05, + "loss": 2.5101, + "step": 15121 + }, + { + "epoch": 1.2204019046081833, + "grad_norm": 0.6730707287788391, + "learning_rate": 2.8236826709579644e-05, + "loss": 2.4397, + "step": 15122 + }, + { + "epoch": 1.2204826083447664, + "grad_norm": 0.7840865850448608, + "learning_rate": 2.8225833248580745e-05, + "loss": 2.4452, + "step": 15123 + }, + { + "epoch": 1.2205633120813493, + "grad_norm": 0.8323497772216797, + "learning_rate": 2.821484157638171e-05, + "loss": 2.4775, + "step": 15124 + }, + { + "epoch": 1.2206440158179324, + "grad_norm": 0.6699438691139221, + "learning_rate": 2.8203851693256466e-05, + "loss": 2.3958, + "step": 15125 + }, + { + "epoch": 1.2207247195545154, + "grad_norm": 0.6711557507514954, + "learning_rate": 2.8192863599478923e-05, + "loss": 2.477, + "step": 15126 + }, + { + "epoch": 1.2208054232910983, + "grad_norm": 0.6255797743797302, + "learning_rate": 2.8181877295322922e-05, + "loss": 2.4222, + "step": 15127 + }, + { + "epoch": 1.2208861270276814, + "grad_norm": 0.7313731908798218, + "learning_rate": 2.8170892781062297e-05, + "loss": 2.4343, + "step": 15128 + }, + { + "epoch": 1.2209668307642643, + "grad_norm": 0.6611476540565491, + "learning_rate": 2.815991005697076e-05, + "loss": 2.3844, + "step": 15129 + }, + { + "epoch": 1.2210475345008474, + "grad_norm": 0.7293661236763, + "learning_rate": 2.8148929123322065e-05, + "loss": 2.3912, + "step": 15130 + }, + { + "epoch": 1.2211282382374304, + "grad_norm": 0.7150777578353882, + "learning_rate": 2.8137949980389866e-05, + "loss": 2.4227, + "step": 15131 + }, + { + "epoch": 1.2212089419740133, + "grad_norm": 0.7001000642776489, + "learning_rate": 2.8126972628447845e-05, + "loss": 2.4751, + "step": 15132 + }, + { + "epoch": 1.2212896457105964, + "grad_norm": 0.7106043100357056, + "learning_rate": 2.8115997067769505e-05, + "loss": 2.4127, + "step": 15133 + }, + { + "epoch": 1.2213703494471795, + "grad_norm": 0.6969115138053894, + "learning_rate": 2.810502329862842e-05, + "loss": 2.4073, + "step": 15134 + }, + { + "epoch": 1.2214510531837623, + "grad_norm": 0.7493317127227783, + "learning_rate": 2.8094051321298098e-05, + "loss": 2.4541, + "step": 15135 + }, + { + "epoch": 1.2215317569203454, + "grad_norm": 0.6499322652816772, + "learning_rate": 2.808308113605198e-05, + "loss": 2.4057, + "step": 15136 + }, + { + "epoch": 1.2216124606569285, + "grad_norm": 0.6716788411140442, + "learning_rate": 2.807211274316347e-05, + "loss": 2.3856, + "step": 15137 + }, + { + "epoch": 1.2216931643935114, + "grad_norm": 0.7724741101264954, + "learning_rate": 2.8061146142905958e-05, + "loss": 2.4652, + "step": 15138 + }, + { + "epoch": 1.2217738681300945, + "grad_norm": 0.7014325261116028, + "learning_rate": 2.8050181335552718e-05, + "loss": 2.4506, + "step": 15139 + }, + { + "epoch": 1.2218545718666773, + "grad_norm": 0.6705317497253418, + "learning_rate": 2.8039218321377026e-05, + "loss": 2.4581, + "step": 15140 + }, + { + "epoch": 1.2219352756032604, + "grad_norm": 0.709973931312561, + "learning_rate": 2.8028257100652156e-05, + "loss": 2.427, + "step": 15141 + }, + { + "epoch": 1.2220159793398435, + "grad_norm": 0.7021297812461853, + "learning_rate": 2.801729767365122e-05, + "loss": 2.3784, + "step": 15142 + }, + { + "epoch": 1.2220966830764264, + "grad_norm": 0.7431899905204773, + "learning_rate": 2.8006340040647393e-05, + "loss": 2.4135, + "step": 15143 + }, + { + "epoch": 1.2221773868130095, + "grad_norm": 0.6724472045898438, + "learning_rate": 2.7995384201913765e-05, + "loss": 2.3966, + "step": 15144 + }, + { + "epoch": 1.2222580905495923, + "grad_norm": 0.7381375432014465, + "learning_rate": 2.7984430157723384e-05, + "loss": 2.4853, + "step": 15145 + }, + { + "epoch": 1.2223387942861754, + "grad_norm": 0.6809988617897034, + "learning_rate": 2.7973477908349255e-05, + "loss": 2.408, + "step": 15146 + }, + { + "epoch": 1.2224194980227585, + "grad_norm": 0.7042898535728455, + "learning_rate": 2.7962527454064337e-05, + "loss": 2.3981, + "step": 15147 + }, + { + "epoch": 1.2225002017593414, + "grad_norm": 0.7096118330955505, + "learning_rate": 2.7951578795141576e-05, + "loss": 2.4175, + "step": 15148 + }, + { + "epoch": 1.2225809054959245, + "grad_norm": 0.7271720767021179, + "learning_rate": 2.794063193185378e-05, + "loss": 2.4193, + "step": 15149 + }, + { + "epoch": 1.2226616092325076, + "grad_norm": 0.7000352740287781, + "learning_rate": 2.7929686864473792e-05, + "loss": 2.422, + "step": 15150 + }, + { + "epoch": 1.2227423129690904, + "grad_norm": 0.6983076333999634, + "learning_rate": 2.791874359327443e-05, + "loss": 2.4613, + "step": 15151 + }, + { + "epoch": 1.2228230167056735, + "grad_norm": 0.7520100474357605, + "learning_rate": 2.7907802118528383e-05, + "loss": 2.4147, + "step": 15152 + }, + { + "epoch": 1.2229037204422566, + "grad_norm": 0.7056650519371033, + "learning_rate": 2.789686244050834e-05, + "loss": 2.4568, + "step": 15153 + }, + { + "epoch": 1.2229844241788395, + "grad_norm": 0.7092614769935608, + "learning_rate": 2.7885924559486975e-05, + "loss": 2.4758, + "step": 15154 + }, + { + "epoch": 1.2230651279154225, + "grad_norm": 0.702521562576294, + "learning_rate": 2.7874988475736885e-05, + "loss": 2.4893, + "step": 15155 + }, + { + "epoch": 1.2231458316520054, + "grad_norm": 0.7454921007156372, + "learning_rate": 2.786405418953061e-05, + "loss": 2.4277, + "step": 15156 + }, + { + "epoch": 1.2232265353885885, + "grad_norm": 0.659503161907196, + "learning_rate": 2.7853121701140694e-05, + "loss": 2.4664, + "step": 15157 + }, + { + "epoch": 1.2233072391251716, + "grad_norm": 0.6368914842605591, + "learning_rate": 2.7842191010839556e-05, + "loss": 2.3728, + "step": 15158 + }, + { + "epoch": 1.2233879428617545, + "grad_norm": 0.7076737880706787, + "learning_rate": 2.783126211889965e-05, + "loss": 2.4204, + "step": 15159 + }, + { + "epoch": 1.2234686465983375, + "grad_norm": 0.718100905418396, + "learning_rate": 2.7820335025593325e-05, + "loss": 2.478, + "step": 15160 + }, + { + "epoch": 1.2235493503349204, + "grad_norm": 0.6804678440093994, + "learning_rate": 2.7809409731192972e-05, + "loss": 2.3755, + "step": 15161 + }, + { + "epoch": 1.2236300540715035, + "grad_norm": 0.7068643569946289, + "learning_rate": 2.77984862359708e-05, + "loss": 2.3713, + "step": 15162 + }, + { + "epoch": 1.2237107578080866, + "grad_norm": 0.7047072052955627, + "learning_rate": 2.7787564540199097e-05, + "loss": 2.4264, + "step": 15163 + }, + { + "epoch": 1.2237914615446694, + "grad_norm": 0.6985021829605103, + "learning_rate": 2.7776644644150076e-05, + "loss": 2.4101, + "step": 15164 + }, + { + "epoch": 1.2238721652812525, + "grad_norm": 0.7543687224388123, + "learning_rate": 2.776572654809583e-05, + "loss": 2.3722, + "step": 15165 + }, + { + "epoch": 1.2239528690178356, + "grad_norm": 0.7199926972389221, + "learning_rate": 2.7754810252308473e-05, + "loss": 2.3819, + "step": 15166 + }, + { + "epoch": 1.2240335727544185, + "grad_norm": 0.696756899356842, + "learning_rate": 2.7743895757060156e-05, + "loss": 2.4245, + "step": 15167 + }, + { + "epoch": 1.2241142764910016, + "grad_norm": 0.7848933339118958, + "learning_rate": 2.773298306262281e-05, + "loss": 2.4725, + "step": 15168 + }, + { + "epoch": 1.2241949802275847, + "grad_norm": 0.6819389462471008, + "learning_rate": 2.7722072169268432e-05, + "loss": 2.4338, + "step": 15169 + }, + { + "epoch": 1.2242756839641675, + "grad_norm": 0.7185801267623901, + "learning_rate": 2.7711163077268977e-05, + "loss": 2.4745, + "step": 15170 + }, + { + "epoch": 1.2243563877007506, + "grad_norm": 0.7645030617713928, + "learning_rate": 2.7700255786896278e-05, + "loss": 2.4677, + "step": 15171 + }, + { + "epoch": 1.2244370914373335, + "grad_norm": 0.6559275388717651, + "learning_rate": 2.7689350298422202e-05, + "loss": 2.386, + "step": 15172 + }, + { + "epoch": 1.2245177951739166, + "grad_norm": 0.6965066194534302, + "learning_rate": 2.767844661211856e-05, + "loss": 2.4022, + "step": 15173 + }, + { + "epoch": 1.2245984989104994, + "grad_norm": 0.6618858575820923, + "learning_rate": 2.7667544728257057e-05, + "loss": 2.3541, + "step": 15174 + }, + { + "epoch": 1.2246792026470825, + "grad_norm": 0.6635501980781555, + "learning_rate": 2.765664464710941e-05, + "loss": 2.3984, + "step": 15175 + }, + { + "epoch": 1.2247599063836656, + "grad_norm": 0.6987191438674927, + "learning_rate": 2.764574636894729e-05, + "loss": 2.4637, + "step": 15176 + }, + { + "epoch": 1.2248406101202485, + "grad_norm": 0.7289232611656189, + "learning_rate": 2.7634849894042303e-05, + "loss": 2.4033, + "step": 15177 + }, + { + "epoch": 1.2249213138568316, + "grad_norm": 0.7245565056800842, + "learning_rate": 2.762395522266602e-05, + "loss": 2.4281, + "step": 15178 + }, + { + "epoch": 1.2250020175934146, + "grad_norm": 0.6946065425872803, + "learning_rate": 2.761306235508997e-05, + "loss": 2.3869, + "step": 15179 + }, + { + "epoch": 1.2250827213299975, + "grad_norm": 0.6381784677505493, + "learning_rate": 2.7602171291585666e-05, + "loss": 2.404, + "step": 15180 + }, + { + "epoch": 1.2251634250665806, + "grad_norm": 0.6893685460090637, + "learning_rate": 2.759128203242446e-05, + "loss": 2.4807, + "step": 15181 + }, + { + "epoch": 1.2252441288031637, + "grad_norm": 0.6640260815620422, + "learning_rate": 2.7580394577877787e-05, + "loss": 2.4036, + "step": 15182 + }, + { + "epoch": 1.2253248325397466, + "grad_norm": 0.7125177979469299, + "learning_rate": 2.7569508928217026e-05, + "loss": 2.3869, + "step": 15183 + }, + { + "epoch": 1.2254055362763296, + "grad_norm": 0.657865583896637, + "learning_rate": 2.7558625083713397e-05, + "loss": 2.3869, + "step": 15184 + }, + { + "epoch": 1.2254862400129125, + "grad_norm": 0.6776065230369568, + "learning_rate": 2.7547743044638197e-05, + "loss": 2.4128, + "step": 15185 + }, + { + "epoch": 1.2255669437494956, + "grad_norm": 0.7126299738883972, + "learning_rate": 2.753686281126263e-05, + "loss": 2.4465, + "step": 15186 + }, + { + "epoch": 1.2256476474860787, + "grad_norm": 0.6918273568153381, + "learning_rate": 2.7525984383857873e-05, + "loss": 2.428, + "step": 15187 + }, + { + "epoch": 1.2257283512226615, + "grad_norm": 0.7742759585380554, + "learning_rate": 2.7515107762695025e-05, + "loss": 2.4299, + "step": 15188 + }, + { + "epoch": 1.2258090549592446, + "grad_norm": 0.7194607853889465, + "learning_rate": 2.7504232948045205e-05, + "loss": 2.4315, + "step": 15189 + }, + { + "epoch": 1.2258897586958275, + "grad_norm": 0.6962646245956421, + "learning_rate": 2.7493359940179363e-05, + "loss": 2.4494, + "step": 15190 + }, + { + "epoch": 1.2259704624324106, + "grad_norm": 0.6681686639785767, + "learning_rate": 2.7482488739368538e-05, + "loss": 2.427, + "step": 15191 + }, + { + "epoch": 1.2260511661689937, + "grad_norm": 0.6589877009391785, + "learning_rate": 2.747161934588366e-05, + "loss": 2.4333, + "step": 15192 + }, + { + "epoch": 1.2261318699055765, + "grad_norm": 0.7415218949317932, + "learning_rate": 2.746075175999564e-05, + "loss": 2.4203, + "step": 15193 + }, + { + "epoch": 1.2262125736421596, + "grad_norm": 0.7371910214424133, + "learning_rate": 2.7449885981975276e-05, + "loss": 2.4684, + "step": 15194 + }, + { + "epoch": 1.2262932773787427, + "grad_norm": 0.7010802626609802, + "learning_rate": 2.7439022012093407e-05, + "loss": 2.4625, + "step": 15195 + }, + { + "epoch": 1.2263739811153256, + "grad_norm": 0.7125125527381897, + "learning_rate": 2.7428159850620773e-05, + "loss": 2.4075, + "step": 15196 + }, + { + "epoch": 1.2264546848519087, + "grad_norm": 0.701133668422699, + "learning_rate": 2.7417299497828107e-05, + "loss": 2.4525, + "step": 15197 + }, + { + "epoch": 1.2265353885884918, + "grad_norm": 0.7543410658836365, + "learning_rate": 2.7406440953986078e-05, + "loss": 2.474, + "step": 15198 + }, + { + "epoch": 1.2266160923250746, + "grad_norm": 0.69012051820755, + "learning_rate": 2.7395584219365323e-05, + "loss": 2.4853, + "step": 15199 + }, + { + "epoch": 1.2266967960616577, + "grad_norm": 0.6559048295021057, + "learning_rate": 2.7384729294236378e-05, + "loss": 2.4252, + "step": 15200 + }, + { + "epoch": 1.2267774997982406, + "grad_norm": 0.6603518128395081, + "learning_rate": 2.7373876178869794e-05, + "loss": 2.4047, + "step": 15201 + }, + { + "epoch": 1.2268582035348237, + "grad_norm": 0.7159265279769897, + "learning_rate": 2.736302487353609e-05, + "loss": 2.4352, + "step": 15202 + }, + { + "epoch": 1.2269389072714068, + "grad_norm": 0.6784560084342957, + "learning_rate": 2.735217537850565e-05, + "loss": 2.3933, + "step": 15203 + }, + { + "epoch": 1.2270196110079896, + "grad_norm": 0.7341950535774231, + "learning_rate": 2.7341327694048903e-05, + "loss": 2.4514, + "step": 15204 + }, + { + "epoch": 1.2271003147445727, + "grad_norm": 0.726046621799469, + "learning_rate": 2.7330481820436204e-05, + "loss": 2.4427, + "step": 15205 + }, + { + "epoch": 1.2271810184811556, + "grad_norm": 0.6897192001342773, + "learning_rate": 2.7319637757937854e-05, + "loss": 2.4587, + "step": 15206 + }, + { + "epoch": 1.2272617222177387, + "grad_norm": 0.6981058716773987, + "learning_rate": 2.7308795506824124e-05, + "loss": 2.4297, + "step": 15207 + }, + { + "epoch": 1.2273424259543217, + "grad_norm": 0.694583535194397, + "learning_rate": 2.729795506736522e-05, + "loss": 2.3608, + "step": 15208 + }, + { + "epoch": 1.2274231296909046, + "grad_norm": 0.710192084312439, + "learning_rate": 2.728711643983136e-05, + "loss": 2.3733, + "step": 15209 + }, + { + "epoch": 1.2275038334274877, + "grad_norm": 0.7203633785247803, + "learning_rate": 2.7276279624492595e-05, + "loss": 2.389, + "step": 15210 + }, + { + "epoch": 1.2275845371640708, + "grad_norm": 0.7298668622970581, + "learning_rate": 2.726544462161905e-05, + "loss": 2.3981, + "step": 15211 + }, + { + "epoch": 1.2276652409006537, + "grad_norm": 0.6640039682388306, + "learning_rate": 2.725461143148078e-05, + "loss": 2.4073, + "step": 15212 + }, + { + "epoch": 1.2277459446372367, + "grad_norm": 0.7203015685081482, + "learning_rate": 2.724378005434772e-05, + "loss": 2.4901, + "step": 15213 + }, + { + "epoch": 1.2278266483738198, + "grad_norm": 0.6668895483016968, + "learning_rate": 2.723295049048985e-05, + "loss": 2.4482, + "step": 15214 + }, + { + "epoch": 1.2279073521104027, + "grad_norm": 0.7551584839820862, + "learning_rate": 2.7222122740177103e-05, + "loss": 2.4877, + "step": 15215 + }, + { + "epoch": 1.2279880558469858, + "grad_norm": 0.707202672958374, + "learning_rate": 2.721129680367923e-05, + "loss": 2.4577, + "step": 15216 + }, + { + "epoch": 1.2280687595835686, + "grad_norm": 0.685153603553772, + "learning_rate": 2.7200472681266155e-05, + "loss": 2.476, + "step": 15217 + }, + { + "epoch": 1.2281494633201517, + "grad_norm": 0.6843041181564331, + "learning_rate": 2.718965037320762e-05, + "loss": 2.4164, + "step": 15218 + }, + { + "epoch": 1.2282301670567348, + "grad_norm": 0.6548978686332703, + "learning_rate": 2.7178829879773306e-05, + "loss": 2.4187, + "step": 15219 + }, + { + "epoch": 1.2283108707933177, + "grad_norm": 0.7037245035171509, + "learning_rate": 2.7168011201232902e-05, + "loss": 2.3621, + "step": 15220 + }, + { + "epoch": 1.2283915745299008, + "grad_norm": 0.6540676951408386, + "learning_rate": 2.7157194337856074e-05, + "loss": 2.4542, + "step": 15221 + }, + { + "epoch": 1.2284722782664836, + "grad_norm": 0.7699899673461914, + "learning_rate": 2.7146379289912338e-05, + "loss": 2.4639, + "step": 15222 + }, + { + "epoch": 1.2285529820030667, + "grad_norm": 0.7178743481636047, + "learning_rate": 2.713556605767128e-05, + "loss": 2.4222, + "step": 15223 + }, + { + "epoch": 1.2286336857396498, + "grad_norm": 0.6749793887138367, + "learning_rate": 2.7124754641402383e-05, + "loss": 2.4323, + "step": 15224 + }, + { + "epoch": 1.2287143894762327, + "grad_norm": 0.7035594582557678, + "learning_rate": 2.711394504137513e-05, + "loss": 2.4466, + "step": 15225 + }, + { + "epoch": 1.2287950932128158, + "grad_norm": 0.6518487930297852, + "learning_rate": 2.7103137257858868e-05, + "loss": 2.4969, + "step": 15226 + }, + { + "epoch": 1.2288757969493989, + "grad_norm": 0.6739057898521423, + "learning_rate": 2.7092331291122974e-05, + "loss": 2.406, + "step": 15227 + }, + { + "epoch": 1.2289565006859817, + "grad_norm": 0.6584770083427429, + "learning_rate": 2.7081527141436767e-05, + "loss": 2.4304, + "step": 15228 + }, + { + "epoch": 1.2290372044225648, + "grad_norm": 0.6846301555633545, + "learning_rate": 2.7070724809069514e-05, + "loss": 2.3995, + "step": 15229 + }, + { + "epoch": 1.2291179081591477, + "grad_norm": 0.6778364777565002, + "learning_rate": 2.705992429429044e-05, + "loss": 2.38, + "step": 15230 + }, + { + "epoch": 1.2291986118957308, + "grad_norm": 0.6957302689552307, + "learning_rate": 2.7049125597368753e-05, + "loss": 2.3973, + "step": 15231 + }, + { + "epoch": 1.2292793156323139, + "grad_norm": 0.730269193649292, + "learning_rate": 2.7038328718573514e-05, + "loss": 2.4829, + "step": 15232 + }, + { + "epoch": 1.2293600193688967, + "grad_norm": 0.7114049196243286, + "learning_rate": 2.702753365817384e-05, + "loss": 2.3902, + "step": 15233 + }, + { + "epoch": 1.2294407231054798, + "grad_norm": 0.7137531638145447, + "learning_rate": 2.7016740416438823e-05, + "loss": 2.3957, + "step": 15234 + }, + { + "epoch": 1.2295214268420627, + "grad_norm": 0.7178330421447754, + "learning_rate": 2.7005948993637386e-05, + "loss": 2.4429, + "step": 15235 + }, + { + "epoch": 1.2296021305786458, + "grad_norm": 0.6767767071723938, + "learning_rate": 2.6995159390038506e-05, + "loss": 2.4009, + "step": 15236 + }, + { + "epoch": 1.2296828343152288, + "grad_norm": 0.7713541984558105, + "learning_rate": 2.6984371605911086e-05, + "loss": 2.4326, + "step": 15237 + }, + { + "epoch": 1.2297635380518117, + "grad_norm": 0.7218228578567505, + "learning_rate": 2.6973585641523992e-05, + "loss": 2.4358, + "step": 15238 + }, + { + "epoch": 1.2298442417883948, + "grad_norm": 0.6782575249671936, + "learning_rate": 2.696280149714604e-05, + "loss": 2.3844, + "step": 15239 + }, + { + "epoch": 1.2299249455249779, + "grad_norm": 0.6825734972953796, + "learning_rate": 2.6952019173045982e-05, + "loss": 2.4621, + "step": 15240 + }, + { + "epoch": 1.2300056492615608, + "grad_norm": 0.6587522625923157, + "learning_rate": 2.6941238669492608e-05, + "loss": 2.4465, + "step": 15241 + }, + { + "epoch": 1.2300863529981438, + "grad_norm": 0.6898796558380127, + "learning_rate": 2.6930459986754498e-05, + "loss": 2.4469, + "step": 15242 + }, + { + "epoch": 1.230167056734727, + "grad_norm": 0.6764062643051147, + "learning_rate": 2.6919683125100338e-05, + "loss": 2.4476, + "step": 15243 + }, + { + "epoch": 1.2302477604713098, + "grad_norm": 0.6647047400474548, + "learning_rate": 2.6908908084798733e-05, + "loss": 2.3677, + "step": 15244 + }, + { + "epoch": 1.2303284642078929, + "grad_norm": 0.7091608047485352, + "learning_rate": 2.6898134866118174e-05, + "loss": 2.4605, + "step": 15245 + }, + { + "epoch": 1.2304091679444757, + "grad_norm": 0.691007137298584, + "learning_rate": 2.6887363469327188e-05, + "loss": 2.4397, + "step": 15246 + }, + { + "epoch": 1.2304898716810588, + "grad_norm": 0.6685532927513123, + "learning_rate": 2.6876593894694214e-05, + "loss": 2.4279, + "step": 15247 + }, + { + "epoch": 1.230570575417642, + "grad_norm": 0.684474766254425, + "learning_rate": 2.686582614248767e-05, + "loss": 2.4162, + "step": 15248 + }, + { + "epoch": 1.2306512791542248, + "grad_norm": 0.657293975353241, + "learning_rate": 2.6855060212975915e-05, + "loss": 2.4337, + "step": 15249 + }, + { + "epoch": 1.2307319828908079, + "grad_norm": 0.7136504650115967, + "learning_rate": 2.684429610642729e-05, + "loss": 2.4156, + "step": 15250 + }, + { + "epoch": 1.2308126866273907, + "grad_norm": 0.6564410924911499, + "learning_rate": 2.6833533823110013e-05, + "loss": 2.5101, + "step": 15251 + }, + { + "epoch": 1.2308933903639738, + "grad_norm": 0.6628747582435608, + "learning_rate": 2.682277336329233e-05, + "loss": 2.3933, + "step": 15252 + }, + { + "epoch": 1.230974094100557, + "grad_norm": 0.7362595796585083, + "learning_rate": 2.681201472724244e-05, + "loss": 2.4541, + "step": 15253 + }, + { + "epoch": 1.2310547978371398, + "grad_norm": 0.7604697346687317, + "learning_rate": 2.680125791522844e-05, + "loss": 2.4383, + "step": 15254 + }, + { + "epoch": 1.2311355015737229, + "grad_norm": 0.7128429412841797, + "learning_rate": 2.6790502927518434e-05, + "loss": 2.4492, + "step": 15255 + }, + { + "epoch": 1.231216205310306, + "grad_norm": 0.6761955618858337, + "learning_rate": 2.677974976438047e-05, + "loss": 2.4355, + "step": 15256 + }, + { + "epoch": 1.2312969090468888, + "grad_norm": 0.6687077879905701, + "learning_rate": 2.6768998426082538e-05, + "loss": 2.4317, + "step": 15257 + }, + { + "epoch": 1.231377612783472, + "grad_norm": 0.7423825860023499, + "learning_rate": 2.675824891289259e-05, + "loss": 2.4216, + "step": 15258 + }, + { + "epoch": 1.231458316520055, + "grad_norm": 0.671130359172821, + "learning_rate": 2.6747501225078542e-05, + "loss": 2.4775, + "step": 15259 + }, + { + "epoch": 1.2315390202566379, + "grad_norm": 0.7421461939811707, + "learning_rate": 2.6736755362908273e-05, + "loss": 2.4042, + "step": 15260 + }, + { + "epoch": 1.231619723993221, + "grad_norm": 0.7084131240844727, + "learning_rate": 2.6726011326649547e-05, + "loss": 2.4506, + "step": 15261 + }, + { + "epoch": 1.2317004277298038, + "grad_norm": 0.641852855682373, + "learning_rate": 2.671526911657015e-05, + "loss": 2.4261, + "step": 15262 + }, + { + "epoch": 1.231781131466387, + "grad_norm": 0.7627724409103394, + "learning_rate": 2.670452873293785e-05, + "loss": 2.4647, + "step": 15263 + }, + { + "epoch": 1.23186183520297, + "grad_norm": 0.6638163924217224, + "learning_rate": 2.669379017602026e-05, + "loss": 2.4208, + "step": 15264 + }, + { + "epoch": 1.2319425389395529, + "grad_norm": 0.6815361380577087, + "learning_rate": 2.668305344608505e-05, + "loss": 2.4404, + "step": 15265 + }, + { + "epoch": 1.232023242676136, + "grad_norm": 0.6466485857963562, + "learning_rate": 2.6672318543399823e-05, + "loss": 2.4327, + "step": 15266 + }, + { + "epoch": 1.2321039464127188, + "grad_norm": 0.7119305729866028, + "learning_rate": 2.6661585468232042e-05, + "loss": 2.4266, + "step": 15267 + }, + { + "epoch": 1.232184650149302, + "grad_norm": 0.7245718836784363, + "learning_rate": 2.6650854220849286e-05, + "loss": 2.4484, + "step": 15268 + }, + { + "epoch": 1.232265353885885, + "grad_norm": 0.7050287127494812, + "learning_rate": 2.6640124801518972e-05, + "loss": 2.4441, + "step": 15269 + }, + { + "epoch": 1.2323460576224678, + "grad_norm": 0.6906494498252869, + "learning_rate": 2.6629397210508556e-05, + "loss": 2.4297, + "step": 15270 + }, + { + "epoch": 1.232426761359051, + "grad_norm": 0.7224171757698059, + "learning_rate": 2.661867144808532e-05, + "loss": 2.4279, + "step": 15271 + }, + { + "epoch": 1.232507465095634, + "grad_norm": 0.688804030418396, + "learning_rate": 2.6607947514516606e-05, + "loss": 2.4741, + "step": 15272 + }, + { + "epoch": 1.232588168832217, + "grad_norm": 0.6462350487709045, + "learning_rate": 2.6597225410069726e-05, + "loss": 2.4499, + "step": 15273 + }, + { + "epoch": 1.2326688725688, + "grad_norm": 0.6860110759735107, + "learning_rate": 2.658650513501184e-05, + "loss": 2.4488, + "step": 15274 + }, + { + "epoch": 1.2327495763053828, + "grad_norm": 0.7158305644989014, + "learning_rate": 2.6575786689610138e-05, + "loss": 2.4318, + "step": 15275 + }, + { + "epoch": 1.232830280041966, + "grad_norm": 0.7740959525108337, + "learning_rate": 2.6565070074131804e-05, + "loss": 2.4824, + "step": 15276 + }, + { + "epoch": 1.232910983778549, + "grad_norm": 0.7573856711387634, + "learning_rate": 2.6554355288843847e-05, + "loss": 2.4034, + "step": 15277 + }, + { + "epoch": 1.2329916875151319, + "grad_norm": 0.6809369921684265, + "learning_rate": 2.654364233401332e-05, + "loss": 2.5085, + "step": 15278 + }, + { + "epoch": 1.233072391251715, + "grad_norm": 0.6695643067359924, + "learning_rate": 2.6532931209907307e-05, + "loss": 2.4697, + "step": 15279 + }, + { + "epoch": 1.2331530949882978, + "grad_norm": 0.7218750715255737, + "learning_rate": 2.6522221916792655e-05, + "loss": 2.4753, + "step": 15280 + }, + { + "epoch": 1.233233798724881, + "grad_norm": 0.8171822428703308, + "learning_rate": 2.6511514454936314e-05, + "loss": 2.45, + "step": 15281 + }, + { + "epoch": 1.233314502461464, + "grad_norm": 0.7234573364257812, + "learning_rate": 2.6500808824605162e-05, + "loss": 2.3963, + "step": 15282 + }, + { + "epoch": 1.2333952061980469, + "grad_norm": 0.6993409395217896, + "learning_rate": 2.6490105026065948e-05, + "loss": 2.4449, + "step": 15283 + }, + { + "epoch": 1.23347590993463, + "grad_norm": 0.7984449863433838, + "learning_rate": 2.6479403059585472e-05, + "loss": 2.4322, + "step": 15284 + }, + { + "epoch": 1.233556613671213, + "grad_norm": 0.683971107006073, + "learning_rate": 2.6468702925430466e-05, + "loss": 2.4125, + "step": 15285 + }, + { + "epoch": 1.233637317407796, + "grad_norm": 0.6739822626113892, + "learning_rate": 2.6458004623867617e-05, + "loss": 2.4487, + "step": 15286 + }, + { + "epoch": 1.233718021144379, + "grad_norm": 0.7003912925720215, + "learning_rate": 2.644730815516351e-05, + "loss": 2.4437, + "step": 15287 + }, + { + "epoch": 1.233798724880962, + "grad_norm": 0.7011744379997253, + "learning_rate": 2.643661351958474e-05, + "loss": 2.4798, + "step": 15288 + }, + { + "epoch": 1.233879428617545, + "grad_norm": 0.7003397941589355, + "learning_rate": 2.6425920717397867e-05, + "loss": 2.4554, + "step": 15289 + }, + { + "epoch": 1.233960132354128, + "grad_norm": 0.6682165265083313, + "learning_rate": 2.6415229748869374e-05, + "loss": 2.4252, + "step": 15290 + }, + { + "epoch": 1.234040836090711, + "grad_norm": 0.6712457537651062, + "learning_rate": 2.6404540614265715e-05, + "loss": 2.4225, + "step": 15291 + }, + { + "epoch": 1.234121539827294, + "grad_norm": 0.654464602470398, + "learning_rate": 2.63938533138533e-05, + "loss": 2.4462, + "step": 15292 + }, + { + "epoch": 1.234202243563877, + "grad_norm": 0.7311797738075256, + "learning_rate": 2.638316784789845e-05, + "loss": 2.502, + "step": 15293 + }, + { + "epoch": 1.23428294730046, + "grad_norm": 0.6836559176445007, + "learning_rate": 2.6372484216667492e-05, + "loss": 2.5134, + "step": 15294 + }, + { + "epoch": 1.234363651037043, + "grad_norm": 0.6961826086044312, + "learning_rate": 2.636180242042672e-05, + "loss": 2.4479, + "step": 15295 + }, + { + "epoch": 1.234444354773626, + "grad_norm": 0.6824259161949158, + "learning_rate": 2.635112245944229e-05, + "loss": 2.4299, + "step": 15296 + }, + { + "epoch": 1.234525058510209, + "grad_norm": 0.7594609260559082, + "learning_rate": 2.634044433398042e-05, + "loss": 2.4469, + "step": 15297 + }, + { + "epoch": 1.234605762246792, + "grad_norm": 0.7044653296470642, + "learning_rate": 2.632976804430721e-05, + "loss": 2.447, + "step": 15298 + }, + { + "epoch": 1.234686465983375, + "grad_norm": 0.6986916065216064, + "learning_rate": 2.631909359068876e-05, + "loss": 2.4705, + "step": 15299 + }, + { + "epoch": 1.234767169719958, + "grad_norm": 0.7025431990623474, + "learning_rate": 2.630842097339111e-05, + "loss": 2.3951, + "step": 15300 + }, + { + "epoch": 1.2348478734565411, + "grad_norm": 0.6533786058425903, + "learning_rate": 2.6297750192680237e-05, + "loss": 2.3769, + "step": 15301 + }, + { + "epoch": 1.234928577193124, + "grad_norm": 0.6575472354888916, + "learning_rate": 2.628708124882212e-05, + "loss": 2.4293, + "step": 15302 + }, + { + "epoch": 1.235009280929707, + "grad_norm": 0.6712046265602112, + "learning_rate": 2.6276414142082584e-05, + "loss": 2.4819, + "step": 15303 + }, + { + "epoch": 1.2350899846662902, + "grad_norm": 0.6947652101516724, + "learning_rate": 2.6265748872727535e-05, + "loss": 2.449, + "step": 15304 + }, + { + "epoch": 1.235170688402873, + "grad_norm": 0.6881443858146667, + "learning_rate": 2.62550854410228e-05, + "loss": 2.3991, + "step": 15305 + }, + { + "epoch": 1.2352513921394561, + "grad_norm": 0.6681519746780396, + "learning_rate": 2.624442384723407e-05, + "loss": 2.4005, + "step": 15306 + }, + { + "epoch": 1.235332095876039, + "grad_norm": 0.6728120446205139, + "learning_rate": 2.62337640916271e-05, + "loss": 2.4242, + "step": 15307 + }, + { + "epoch": 1.235412799612622, + "grad_norm": 0.707360029220581, + "learning_rate": 2.622310617446755e-05, + "loss": 2.4385, + "step": 15308 + }, + { + "epoch": 1.2354935033492052, + "grad_norm": 0.6890079975128174, + "learning_rate": 2.6212450096021058e-05, + "loss": 2.443, + "step": 15309 + }, + { + "epoch": 1.235574207085788, + "grad_norm": 0.7022379636764526, + "learning_rate": 2.620179585655318e-05, + "loss": 2.3982, + "step": 15310 + }, + { + "epoch": 1.235654910822371, + "grad_norm": 0.7283182740211487, + "learning_rate": 2.61911434563295e-05, + "loss": 2.4197, + "step": 15311 + }, + { + "epoch": 1.235735614558954, + "grad_norm": 0.6721852421760559, + "learning_rate": 2.6180492895615426e-05, + "loss": 2.4356, + "step": 15312 + }, + { + "epoch": 1.235816318295537, + "grad_norm": 0.6817916631698608, + "learning_rate": 2.616984417467645e-05, + "loss": 2.4325, + "step": 15313 + }, + { + "epoch": 1.2358970220321202, + "grad_norm": 0.6826596260070801, + "learning_rate": 2.6159197293777972e-05, + "loss": 2.4043, + "step": 15314 + }, + { + "epoch": 1.235977725768703, + "grad_norm": 0.7135530114173889, + "learning_rate": 2.6148552253185288e-05, + "loss": 2.4269, + "step": 15315 + }, + { + "epoch": 1.236058429505286, + "grad_norm": 0.7027753591537476, + "learning_rate": 2.6137909053163722e-05, + "loss": 2.4266, + "step": 15316 + }, + { + "epoch": 1.2361391332418692, + "grad_norm": 0.6597041487693787, + "learning_rate": 2.6127267693978552e-05, + "loss": 2.4073, + "step": 15317 + }, + { + "epoch": 1.236219836978452, + "grad_norm": 0.6450026631355286, + "learning_rate": 2.6116628175894974e-05, + "loss": 2.4299, + "step": 15318 + }, + { + "epoch": 1.2363005407150351, + "grad_norm": 0.7740476727485657, + "learning_rate": 2.6105990499178156e-05, + "loss": 2.4088, + "step": 15319 + }, + { + "epoch": 1.2363812444516182, + "grad_norm": 0.6460183262825012, + "learning_rate": 2.609535466409322e-05, + "loss": 2.4311, + "step": 15320 + }, + { + "epoch": 1.236461948188201, + "grad_norm": 0.6514838337898254, + "learning_rate": 2.608472067090525e-05, + "loss": 2.4069, + "step": 15321 + }, + { + "epoch": 1.2365426519247842, + "grad_norm": 0.7281234860420227, + "learning_rate": 2.6074088519879237e-05, + "loss": 2.4245, + "step": 15322 + }, + { + "epoch": 1.236623355661367, + "grad_norm": 0.752983570098877, + "learning_rate": 2.606345821128018e-05, + "loss": 2.4149, + "step": 15323 + }, + { + "epoch": 1.2367040593979501, + "grad_norm": 0.6912856101989746, + "learning_rate": 2.6052829745373054e-05, + "loss": 2.4489, + "step": 15324 + }, + { + "epoch": 1.236784763134533, + "grad_norm": 0.6719293594360352, + "learning_rate": 2.604220312242267e-05, + "loss": 2.457, + "step": 15325 + }, + { + "epoch": 1.236865466871116, + "grad_norm": 0.7440586090087891, + "learning_rate": 2.6031578342693918e-05, + "loss": 2.4657, + "step": 15326 + }, + { + "epoch": 1.2369461706076992, + "grad_norm": 0.694442629814148, + "learning_rate": 2.602095540645162e-05, + "loss": 2.4422, + "step": 15327 + }, + { + "epoch": 1.237026874344282, + "grad_norm": 0.7186843752861023, + "learning_rate": 2.601033431396046e-05, + "loss": 2.4229, + "step": 15328 + }, + { + "epoch": 1.2371075780808651, + "grad_norm": 0.7401825785636902, + "learning_rate": 2.5999715065485153e-05, + "loss": 2.45, + "step": 15329 + }, + { + "epoch": 1.2371882818174482, + "grad_norm": 0.6710138916969299, + "learning_rate": 2.598909766129045e-05, + "loss": 2.4074, + "step": 15330 + }, + { + "epoch": 1.237268985554031, + "grad_norm": 0.7867769598960876, + "learning_rate": 2.5978482101640867e-05, + "loss": 2.4709, + "step": 15331 + }, + { + "epoch": 1.2373496892906142, + "grad_norm": 0.7076219916343689, + "learning_rate": 2.5967868386801e-05, + "loss": 2.4887, + "step": 15332 + }, + { + "epoch": 1.2374303930271973, + "grad_norm": 0.7277626991271973, + "learning_rate": 2.5957256517035378e-05, + "loss": 2.4295, + "step": 15333 + }, + { + "epoch": 1.2375110967637801, + "grad_norm": 0.7339804768562317, + "learning_rate": 2.5946646492608506e-05, + "loss": 2.4624, + "step": 15334 + }, + { + "epoch": 1.2375918005003632, + "grad_norm": 0.6707656383514404, + "learning_rate": 2.593603831378475e-05, + "loss": 2.4159, + "step": 15335 + }, + { + "epoch": 1.237672504236946, + "grad_norm": 0.7118813991546631, + "learning_rate": 2.592543198082852e-05, + "loss": 2.4496, + "step": 15336 + }, + { + "epoch": 1.2377532079735292, + "grad_norm": 0.675167977809906, + "learning_rate": 2.591482749400419e-05, + "loss": 2.4519, + "step": 15337 + }, + { + "epoch": 1.2378339117101123, + "grad_norm": 0.8245306611061096, + "learning_rate": 2.5904224853575986e-05, + "loss": 2.4732, + "step": 15338 + }, + { + "epoch": 1.2379146154466951, + "grad_norm": 0.7411863207817078, + "learning_rate": 2.5893624059808184e-05, + "loss": 2.4458, + "step": 15339 + }, + { + "epoch": 1.2379953191832782, + "grad_norm": 0.6864522695541382, + "learning_rate": 2.5883025112964997e-05, + "loss": 2.4264, + "step": 15340 + }, + { + "epoch": 1.238076022919861, + "grad_norm": 0.6585919260978699, + "learning_rate": 2.5872428013310567e-05, + "loss": 2.3904, + "step": 15341 + }, + { + "epoch": 1.2381567266564442, + "grad_norm": 0.6605508327484131, + "learning_rate": 2.5861832761108995e-05, + "loss": 2.4828, + "step": 15342 + }, + { + "epoch": 1.2382374303930272, + "grad_norm": 0.7353223562240601, + "learning_rate": 2.5851239356624392e-05, + "loss": 2.4335, + "step": 15343 + }, + { + "epoch": 1.2383181341296101, + "grad_norm": 0.6907783150672913, + "learning_rate": 2.5840647800120688e-05, + "loss": 2.4394, + "step": 15344 + }, + { + "epoch": 1.2383988378661932, + "grad_norm": 0.7239590287208557, + "learning_rate": 2.5830058091861896e-05, + "loss": 2.4221, + "step": 15345 + }, + { + "epoch": 1.2384795416027763, + "grad_norm": 0.7001412510871887, + "learning_rate": 2.5819470232111975e-05, + "loss": 2.4521, + "step": 15346 + }, + { + "epoch": 1.2385602453393592, + "grad_norm": 0.6983658671379089, + "learning_rate": 2.580888422113473e-05, + "loss": 2.4839, + "step": 15347 + }, + { + "epoch": 1.2386409490759422, + "grad_norm": 0.7829005718231201, + "learning_rate": 2.5798300059194037e-05, + "loss": 2.4546, + "step": 15348 + }, + { + "epoch": 1.2387216528125253, + "grad_norm": 0.7248061299324036, + "learning_rate": 2.5787717746553664e-05, + "loss": 2.4341, + "step": 15349 + }, + { + "epoch": 1.2388023565491082, + "grad_norm": 0.7921163439750671, + "learning_rate": 2.577713728347736e-05, + "loss": 2.475, + "step": 15350 + }, + { + "epoch": 1.2388830602856913, + "grad_norm": 0.6571238040924072, + "learning_rate": 2.5766558670228813e-05, + "loss": 2.4636, + "step": 15351 + }, + { + "epoch": 1.2389637640222741, + "grad_norm": 0.7436683177947998, + "learning_rate": 2.575598190707168e-05, + "loss": 2.4868, + "step": 15352 + }, + { + "epoch": 1.2390444677588572, + "grad_norm": 0.6471900939941406, + "learning_rate": 2.5745406994269573e-05, + "loss": 2.4349, + "step": 15353 + }, + { + "epoch": 1.2391251714954403, + "grad_norm": 0.6612011194229126, + "learning_rate": 2.5734833932086012e-05, + "loss": 2.4088, + "step": 15354 + }, + { + "epoch": 1.2392058752320232, + "grad_norm": 0.6882977485656738, + "learning_rate": 2.572426272078451e-05, + "loss": 2.4344, + "step": 15355 + }, + { + "epoch": 1.2392865789686063, + "grad_norm": 0.6836830973625183, + "learning_rate": 2.5713693360628565e-05, + "loss": 2.4325, + "step": 15356 + }, + { + "epoch": 1.2393672827051891, + "grad_norm": 0.712127149105072, + "learning_rate": 2.5703125851881536e-05, + "loss": 2.4505, + "step": 15357 + }, + { + "epoch": 1.2394479864417722, + "grad_norm": 0.7162468433380127, + "learning_rate": 2.5692560194806837e-05, + "loss": 2.4167, + "step": 15358 + }, + { + "epoch": 1.2395286901783553, + "grad_norm": 0.7770177125930786, + "learning_rate": 2.568199638966777e-05, + "loss": 2.4072, + "step": 15359 + }, + { + "epoch": 1.2396093939149382, + "grad_norm": 0.7049651741981506, + "learning_rate": 2.5671434436727636e-05, + "loss": 2.434, + "step": 15360 + }, + { + "epoch": 1.2396900976515213, + "grad_norm": 0.7793349027633667, + "learning_rate": 2.566087433624964e-05, + "loss": 2.4762, + "step": 15361 + }, + { + "epoch": 1.2397708013881044, + "grad_norm": 0.6776690483093262, + "learning_rate": 2.5650316088497018e-05, + "loss": 2.402, + "step": 15362 + }, + { + "epoch": 1.2398515051246872, + "grad_norm": 0.7207701802253723, + "learning_rate": 2.5639759693732834e-05, + "loss": 2.4398, + "step": 15363 + }, + { + "epoch": 1.2399322088612703, + "grad_norm": 0.759787917137146, + "learning_rate": 2.5629205152220215e-05, + "loss": 2.4268, + "step": 15364 + }, + { + "epoch": 1.2400129125978534, + "grad_norm": 0.6906142830848694, + "learning_rate": 2.5618652464222215e-05, + "loss": 2.4075, + "step": 15365 + }, + { + "epoch": 1.2400936163344363, + "grad_norm": 0.7002954483032227, + "learning_rate": 2.560810163000187e-05, + "loss": 2.4516, + "step": 15366 + }, + { + "epoch": 1.2401743200710194, + "grad_norm": 0.7287559509277344, + "learning_rate": 2.5597552649822053e-05, + "loss": 2.4975, + "step": 15367 + }, + { + "epoch": 1.2402550238076022, + "grad_norm": 0.6523926854133606, + "learning_rate": 2.558700552394572e-05, + "loss": 2.4085, + "step": 15368 + }, + { + "epoch": 1.2403357275441853, + "grad_norm": 0.7289387583732605, + "learning_rate": 2.5576460252635727e-05, + "loss": 2.4789, + "step": 15369 + }, + { + "epoch": 1.2404164312807684, + "grad_norm": 0.6613432765007019, + "learning_rate": 2.5565916836154878e-05, + "loss": 2.4263, + "step": 15370 + }, + { + "epoch": 1.2404971350173513, + "grad_norm": 0.7275245785713196, + "learning_rate": 2.555537527476597e-05, + "loss": 2.4652, + "step": 15371 + }, + { + "epoch": 1.2405778387539343, + "grad_norm": 0.6726976037025452, + "learning_rate": 2.554483556873173e-05, + "loss": 2.4092, + "step": 15372 + }, + { + "epoch": 1.2406585424905172, + "grad_norm": 0.6908233761787415, + "learning_rate": 2.5534297718314794e-05, + "loss": 2.3678, + "step": 15373 + }, + { + "epoch": 1.2407392462271003, + "grad_norm": 0.6893147826194763, + "learning_rate": 2.5523761723777806e-05, + "loss": 2.4625, + "step": 15374 + }, + { + "epoch": 1.2408199499636834, + "grad_norm": 0.7640267014503479, + "learning_rate": 2.551322758538339e-05, + "loss": 2.446, + "step": 15375 + }, + { + "epoch": 1.2409006537002663, + "grad_norm": 0.7187458276748657, + "learning_rate": 2.550269530339402e-05, + "loss": 2.4215, + "step": 15376 + }, + { + "epoch": 1.2409813574368493, + "grad_norm": 0.8041789531707764, + "learning_rate": 2.5492164878072234e-05, + "loss": 2.5085, + "step": 15377 + }, + { + "epoch": 1.2410620611734324, + "grad_norm": 0.6582188010215759, + "learning_rate": 2.5481636309680445e-05, + "loss": 2.467, + "step": 15378 + }, + { + "epoch": 1.2411427649100153, + "grad_norm": 0.705731213092804, + "learning_rate": 2.5471109598481112e-05, + "loss": 2.3764, + "step": 15379 + }, + { + "epoch": 1.2412234686465984, + "grad_norm": 0.6918940544128418, + "learning_rate": 2.5460584744736495e-05, + "loss": 2.4513, + "step": 15380 + }, + { + "epoch": 1.2413041723831812, + "grad_norm": 0.7402673959732056, + "learning_rate": 2.5450061748708975e-05, + "loss": 2.5133, + "step": 15381 + }, + { + "epoch": 1.2413848761197643, + "grad_norm": 0.6740667223930359, + "learning_rate": 2.543954061066083e-05, + "loss": 2.4649, + "step": 15382 + }, + { + "epoch": 1.2414655798563474, + "grad_norm": 0.6665407419204712, + "learning_rate": 2.5429021330854197e-05, + "loss": 2.4321, + "step": 15383 + }, + { + "epoch": 1.2415462835929303, + "grad_norm": 0.7324530482292175, + "learning_rate": 2.5418503909551296e-05, + "loss": 2.3574, + "step": 15384 + }, + { + "epoch": 1.2416269873295134, + "grad_norm": 0.7117868661880493, + "learning_rate": 2.5407988347014255e-05, + "loss": 2.4552, + "step": 15385 + }, + { + "epoch": 1.2417076910660962, + "grad_norm": 0.7162930965423584, + "learning_rate": 2.5397474643505103e-05, + "loss": 2.4135, + "step": 15386 + }, + { + "epoch": 1.2417883948026793, + "grad_norm": 0.7301257848739624, + "learning_rate": 2.5386962799285895e-05, + "loss": 2.4277, + "step": 15387 + }, + { + "epoch": 1.2418690985392624, + "grad_norm": 0.7404977679252625, + "learning_rate": 2.5376452814618645e-05, + "loss": 2.478, + "step": 15388 + }, + { + "epoch": 1.2419498022758453, + "grad_norm": 0.6546272039413452, + "learning_rate": 2.536594468976522e-05, + "loss": 2.4879, + "step": 15389 + }, + { + "epoch": 1.2420305060124284, + "grad_norm": 0.6501599550247192, + "learning_rate": 2.5355438424987565e-05, + "loss": 2.3964, + "step": 15390 + }, + { + "epoch": 1.2421112097490115, + "grad_norm": 0.6711748242378235, + "learning_rate": 2.5344934020547496e-05, + "loss": 2.4123, + "step": 15391 + }, + { + "epoch": 1.2421919134855943, + "grad_norm": 0.6803534030914307, + "learning_rate": 2.5334431476706823e-05, + "loss": 2.4271, + "step": 15392 + }, + { + "epoch": 1.2422726172221774, + "grad_norm": 0.7407296299934387, + "learning_rate": 2.5323930793727302e-05, + "loss": 2.49, + "step": 15393 + }, + { + "epoch": 1.2423533209587605, + "grad_norm": 0.701870858669281, + "learning_rate": 2.5313431971870617e-05, + "loss": 2.4534, + "step": 15394 + }, + { + "epoch": 1.2424340246953434, + "grad_norm": 0.6658090353012085, + "learning_rate": 2.5302935011398475e-05, + "loss": 2.4581, + "step": 15395 + }, + { + "epoch": 1.2425147284319265, + "grad_norm": 0.6616473197937012, + "learning_rate": 2.529243991257243e-05, + "loss": 2.4169, + "step": 15396 + }, + { + "epoch": 1.2425954321685093, + "grad_norm": 0.6714773178100586, + "learning_rate": 2.5281946675654067e-05, + "loss": 2.4159, + "step": 15397 + }, + { + "epoch": 1.2426761359050924, + "grad_norm": 0.6789337396621704, + "learning_rate": 2.5271455300904935e-05, + "loss": 2.4211, + "step": 15398 + }, + { + "epoch": 1.2427568396416755, + "grad_norm": 0.6793739795684814, + "learning_rate": 2.5260965788586456e-05, + "loss": 2.4337, + "step": 15399 + }, + { + "epoch": 1.2428375433782584, + "grad_norm": 0.6432294249534607, + "learning_rate": 2.5250478138960076e-05, + "loss": 2.4268, + "step": 15400 + }, + { + "epoch": 1.2429182471148414, + "grad_norm": 0.6960669159889221, + "learning_rate": 2.523999235228718e-05, + "loss": 2.3535, + "step": 15401 + }, + { + "epoch": 1.2429989508514243, + "grad_norm": 0.6724488735198975, + "learning_rate": 2.5229508428829096e-05, + "loss": 2.4294, + "step": 15402 + }, + { + "epoch": 1.2430796545880074, + "grad_norm": 0.636105477809906, + "learning_rate": 2.521902636884711e-05, + "loss": 2.4438, + "step": 15403 + }, + { + "epoch": 1.2431603583245905, + "grad_norm": 0.6865580677986145, + "learning_rate": 2.52085461726025e-05, + "loss": 2.4473, + "step": 15404 + }, + { + "epoch": 1.2432410620611734, + "grad_norm": 0.6740261316299438, + "learning_rate": 2.5198067840356398e-05, + "loss": 2.4642, + "step": 15405 + }, + { + "epoch": 1.2433217657977564, + "grad_norm": 0.7241789698600769, + "learning_rate": 2.518759137236998e-05, + "loss": 2.4294, + "step": 15406 + }, + { + "epoch": 1.2434024695343395, + "grad_norm": 0.6839794516563416, + "learning_rate": 2.5177116768904373e-05, + "loss": 2.4697, + "step": 15407 + }, + { + "epoch": 1.2434831732709224, + "grad_norm": 0.677390992641449, + "learning_rate": 2.5166644030220578e-05, + "loss": 2.4411, + "step": 15408 + }, + { + "epoch": 1.2435638770075055, + "grad_norm": 0.709065854549408, + "learning_rate": 2.515617315657962e-05, + "loss": 2.4392, + "step": 15409 + }, + { + "epoch": 1.2436445807440886, + "grad_norm": 0.6735498905181885, + "learning_rate": 2.514570414824249e-05, + "loss": 2.3924, + "step": 15410 + }, + { + "epoch": 1.2437252844806714, + "grad_norm": 0.6729374527931213, + "learning_rate": 2.513523700547007e-05, + "loss": 2.4464, + "step": 15411 + }, + { + "epoch": 1.2438059882172545, + "grad_norm": 0.7232720851898193, + "learning_rate": 2.5124771728523244e-05, + "loss": 2.3975, + "step": 15412 + }, + { + "epoch": 1.2438866919538374, + "grad_norm": 0.7467584609985352, + "learning_rate": 2.5114308317662837e-05, + "loss": 2.4191, + "step": 15413 + }, + { + "epoch": 1.2439673956904205, + "grad_norm": 0.6951141953468323, + "learning_rate": 2.5103846773149642e-05, + "loss": 2.4207, + "step": 15414 + }, + { + "epoch": 1.2440480994270036, + "grad_norm": 0.6427489519119263, + "learning_rate": 2.5093387095244336e-05, + "loss": 2.3539, + "step": 15415 + }, + { + "epoch": 1.2441288031635864, + "grad_norm": 0.729580283164978, + "learning_rate": 2.5082929284207644e-05, + "loss": 2.4464, + "step": 15416 + }, + { + "epoch": 1.2442095069001695, + "grad_norm": 0.7247009873390198, + "learning_rate": 2.5072473340300207e-05, + "loss": 2.4294, + "step": 15417 + }, + { + "epoch": 1.2442902106367524, + "grad_norm": 0.7037674784660339, + "learning_rate": 2.5062019263782577e-05, + "loss": 2.4294, + "step": 15418 + }, + { + "epoch": 1.2443709143733355, + "grad_norm": 0.6997841596603394, + "learning_rate": 2.5051567054915303e-05, + "loss": 2.4976, + "step": 15419 + }, + { + "epoch": 1.2444516181099186, + "grad_norm": 0.7001172304153442, + "learning_rate": 2.504111671395891e-05, + "loss": 2.371, + "step": 15420 + }, + { + "epoch": 1.2445323218465014, + "grad_norm": 0.6781473159790039, + "learning_rate": 2.5030668241173827e-05, + "loss": 2.4124, + "step": 15421 + }, + { + "epoch": 1.2446130255830845, + "grad_norm": 0.7053182125091553, + "learning_rate": 2.5020221636820463e-05, + "loss": 2.4109, + "step": 15422 + }, + { + "epoch": 1.2446937293196676, + "grad_norm": 0.68635493516922, + "learning_rate": 2.50097769011592e-05, + "loss": 2.4548, + "step": 15423 + }, + { + "epoch": 1.2447744330562505, + "grad_norm": 0.7015564441680908, + "learning_rate": 2.4999334034450293e-05, + "loss": 2.4537, + "step": 15424 + }, + { + "epoch": 1.2448551367928335, + "grad_norm": 0.694054901599884, + "learning_rate": 2.4988893036954043e-05, + "loss": 2.4396, + "step": 15425 + }, + { + "epoch": 1.2449358405294164, + "grad_norm": 0.702518880367279, + "learning_rate": 2.4978453908930665e-05, + "loss": 2.4015, + "step": 15426 + }, + { + "epoch": 1.2450165442659995, + "grad_norm": 0.7237387895584106, + "learning_rate": 2.4968016650640348e-05, + "loss": 2.4257, + "step": 15427 + }, + { + "epoch": 1.2450972480025826, + "grad_norm": 0.7133163809776306, + "learning_rate": 2.4957581262343154e-05, + "loss": 2.4532, + "step": 15428 + }, + { + "epoch": 1.2451779517391655, + "grad_norm": 0.8339287042617798, + "learning_rate": 2.4947147744299203e-05, + "loss": 2.4621, + "step": 15429 + }, + { + "epoch": 1.2452586554757485, + "grad_norm": 0.7620034217834473, + "learning_rate": 2.493671609676852e-05, + "loss": 2.365, + "step": 15430 + }, + { + "epoch": 1.2453393592123314, + "grad_norm": 0.7445465922355652, + "learning_rate": 2.4926286320011094e-05, + "loss": 2.4764, + "step": 15431 + }, + { + "epoch": 1.2454200629489145, + "grad_norm": 0.7366160154342651, + "learning_rate": 2.4915858414286852e-05, + "loss": 2.4597, + "step": 15432 + }, + { + "epoch": 1.2455007666854976, + "grad_norm": 0.7098437547683716, + "learning_rate": 2.490543237985572e-05, + "loss": 2.4202, + "step": 15433 + }, + { + "epoch": 1.2455814704220805, + "grad_norm": 0.6483333706855774, + "learning_rate": 2.4895008216977478e-05, + "loss": 2.4108, + "step": 15434 + }, + { + "epoch": 1.2456621741586635, + "grad_norm": 0.6797904968261719, + "learning_rate": 2.4884585925911963e-05, + "loss": 2.4414, + "step": 15435 + }, + { + "epoch": 1.2457428778952466, + "grad_norm": 0.6853424310684204, + "learning_rate": 2.4874165506918957e-05, + "loss": 2.4226, + "step": 15436 + }, + { + "epoch": 1.2458235816318295, + "grad_norm": 0.6861590147018433, + "learning_rate": 2.4863746960258094e-05, + "loss": 2.3748, + "step": 15437 + }, + { + "epoch": 1.2459042853684126, + "grad_norm": 0.7360263466835022, + "learning_rate": 2.4853330286189058e-05, + "loss": 2.4441, + "step": 15438 + }, + { + "epoch": 1.2459849891049957, + "grad_norm": 0.6894183158874512, + "learning_rate": 2.4842915484971496e-05, + "loss": 2.3495, + "step": 15439 + }, + { + "epoch": 1.2460656928415785, + "grad_norm": 0.7570669651031494, + "learning_rate": 2.4832502556864923e-05, + "loss": 2.4622, + "step": 15440 + }, + { + "epoch": 1.2461463965781616, + "grad_norm": 0.6986069083213806, + "learning_rate": 2.4822091502128876e-05, + "loss": 2.3647, + "step": 15441 + }, + { + "epoch": 1.2462271003147445, + "grad_norm": 0.681450366973877, + "learning_rate": 2.481168232102279e-05, + "loss": 2.3872, + "step": 15442 + }, + { + "epoch": 1.2463078040513276, + "grad_norm": 0.7241837978363037, + "learning_rate": 2.480127501380618e-05, + "loss": 2.4692, + "step": 15443 + }, + { + "epoch": 1.2463885077879107, + "grad_norm": 0.6575295329093933, + "learning_rate": 2.479086958073834e-05, + "loss": 2.5057, + "step": 15444 + }, + { + "epoch": 1.2464692115244935, + "grad_norm": 0.7289770841598511, + "learning_rate": 2.478046602207864e-05, + "loss": 2.4164, + "step": 15445 + }, + { + "epoch": 1.2465499152610766, + "grad_norm": 0.6682024598121643, + "learning_rate": 2.4770064338086374e-05, + "loss": 2.4466, + "step": 15446 + }, + { + "epoch": 1.2466306189976595, + "grad_norm": 0.7238918542861938, + "learning_rate": 2.475966452902072e-05, + "loss": 2.4367, + "step": 15447 + }, + { + "epoch": 1.2467113227342426, + "grad_norm": 0.6825705170631409, + "learning_rate": 2.4749266595140918e-05, + "loss": 2.4337, + "step": 15448 + }, + { + "epoch": 1.2467920264708257, + "grad_norm": 0.7352269887924194, + "learning_rate": 2.4738870536706126e-05, + "loss": 2.4103, + "step": 15449 + }, + { + "epoch": 1.2468727302074085, + "grad_norm": 0.658930778503418, + "learning_rate": 2.4728476353975394e-05, + "loss": 2.4281, + "step": 15450 + }, + { + "epoch": 1.2469534339439916, + "grad_norm": 0.6933601498603821, + "learning_rate": 2.4718084047207778e-05, + "loss": 2.4502, + "step": 15451 + }, + { + "epoch": 1.2470341376805747, + "grad_norm": 0.6901879906654358, + "learning_rate": 2.4707693616662308e-05, + "loss": 2.4057, + "step": 15452 + }, + { + "epoch": 1.2471148414171576, + "grad_norm": 0.7648913860321045, + "learning_rate": 2.469730506259792e-05, + "loss": 2.4163, + "step": 15453 + }, + { + "epoch": 1.2471955451537406, + "grad_norm": 0.6496175527572632, + "learning_rate": 2.4686918385273537e-05, + "loss": 2.4373, + "step": 15454 + }, + { + "epoch": 1.2472762488903237, + "grad_norm": 0.6949105858802795, + "learning_rate": 2.4676533584948048e-05, + "loss": 2.4108, + "step": 15455 + }, + { + "epoch": 1.2473569526269066, + "grad_norm": 0.7018688321113586, + "learning_rate": 2.4666150661880206e-05, + "loss": 2.4589, + "step": 15456 + }, + { + "epoch": 1.2474376563634897, + "grad_norm": 0.7141219973564148, + "learning_rate": 2.4655769616328827e-05, + "loss": 2.4022, + "step": 15457 + }, + { + "epoch": 1.2475183601000726, + "grad_norm": 0.7276743054389954, + "learning_rate": 2.4645390448552608e-05, + "loss": 2.4443, + "step": 15458 + }, + { + "epoch": 1.2475990638366556, + "grad_norm": 0.6861153244972229, + "learning_rate": 2.463501315881027e-05, + "loss": 2.4478, + "step": 15459 + }, + { + "epoch": 1.2476797675732387, + "grad_norm": 0.7252256274223328, + "learning_rate": 2.462463774736038e-05, + "loss": 2.446, + "step": 15460 + }, + { + "epoch": 1.2477604713098216, + "grad_norm": 0.6914857625961304, + "learning_rate": 2.4614264214461557e-05, + "loss": 2.4294, + "step": 15461 + }, + { + "epoch": 1.2478411750464047, + "grad_norm": 0.6815036535263062, + "learning_rate": 2.460389256037232e-05, + "loss": 2.4389, + "step": 15462 + }, + { + "epoch": 1.2479218787829875, + "grad_norm": 0.7420194745063782, + "learning_rate": 2.4593522785351176e-05, + "loss": 2.4932, + "step": 15463 + }, + { + "epoch": 1.2480025825195706, + "grad_norm": 0.6622182130813599, + "learning_rate": 2.4583154889656556e-05, + "loss": 2.4327, + "step": 15464 + }, + { + "epoch": 1.2480832862561537, + "grad_norm": 0.6527934074401855, + "learning_rate": 2.457278887354689e-05, + "loss": 2.3857, + "step": 15465 + }, + { + "epoch": 1.2481639899927366, + "grad_norm": 0.6942344903945923, + "learning_rate": 2.4562424737280465e-05, + "loss": 2.4181, + "step": 15466 + }, + { + "epoch": 1.2482446937293197, + "grad_norm": 0.7449823021888733, + "learning_rate": 2.45520624811156e-05, + "loss": 2.4575, + "step": 15467 + }, + { + "epoch": 1.2483253974659028, + "grad_norm": 0.6905208826065063, + "learning_rate": 2.4541702105310605e-05, + "loss": 2.3858, + "step": 15468 + }, + { + "epoch": 1.2484061012024856, + "grad_norm": 0.6928502917289734, + "learning_rate": 2.4531343610123603e-05, + "loss": 2.4212, + "step": 15469 + }, + { + "epoch": 1.2484868049390687, + "grad_norm": 0.7182145118713379, + "learning_rate": 2.45209869958128e-05, + "loss": 2.4063, + "step": 15470 + }, + { + "epoch": 1.2485675086756518, + "grad_norm": 0.7379452586174011, + "learning_rate": 2.4510632262636314e-05, + "loss": 2.4612, + "step": 15471 + }, + { + "epoch": 1.2486482124122347, + "grad_norm": 0.6663349270820618, + "learning_rate": 2.450027941085219e-05, + "loss": 2.4583, + "step": 15472 + }, + { + "epoch": 1.2487289161488178, + "grad_norm": 0.7266560792922974, + "learning_rate": 2.4489928440718467e-05, + "loss": 2.4483, + "step": 15473 + }, + { + "epoch": 1.2488096198854006, + "grad_norm": 0.7046550512313843, + "learning_rate": 2.447957935249311e-05, + "loss": 2.4087, + "step": 15474 + }, + { + "epoch": 1.2488903236219837, + "grad_norm": 0.684248685836792, + "learning_rate": 2.4469232146434084e-05, + "loss": 2.4352, + "step": 15475 + }, + { + "epoch": 1.2489710273585668, + "grad_norm": 0.6864973902702332, + "learning_rate": 2.4458886822799198e-05, + "loss": 2.3872, + "step": 15476 + }, + { + "epoch": 1.2490517310951497, + "grad_norm": 0.6964752674102783, + "learning_rate": 2.444854338184631e-05, + "loss": 2.437, + "step": 15477 + }, + { + "epoch": 1.2491324348317328, + "grad_norm": 0.6755973100662231, + "learning_rate": 2.4438201823833252e-05, + "loss": 2.4302, + "step": 15478 + }, + { + "epoch": 1.2492131385683156, + "grad_norm": 0.6434857249259949, + "learning_rate": 2.44278621490177e-05, + "loss": 2.406, + "step": 15479 + }, + { + "epoch": 1.2492938423048987, + "grad_norm": 0.7342328429222107, + "learning_rate": 2.441752435765736e-05, + "loss": 2.451, + "step": 15480 + }, + { + "epoch": 1.2493745460414818, + "grad_norm": 0.7486860752105713, + "learning_rate": 2.44071884500099e-05, + "loss": 2.4536, + "step": 15481 + }, + { + "epoch": 1.2494552497780647, + "grad_norm": 0.7274537086486816, + "learning_rate": 2.4396854426332903e-05, + "loss": 2.4599, + "step": 15482 + }, + { + "epoch": 1.2495359535146477, + "grad_norm": 0.7580124735832214, + "learning_rate": 2.4386522286883918e-05, + "loss": 2.4038, + "step": 15483 + }, + { + "epoch": 1.2496166572512308, + "grad_norm": 0.6776975393295288, + "learning_rate": 2.4376192031920488e-05, + "loss": 2.4246, + "step": 15484 + }, + { + "epoch": 1.2496973609878137, + "grad_norm": 0.6899511814117432, + "learning_rate": 2.4365863661699996e-05, + "loss": 2.3922, + "step": 15485 + }, + { + "epoch": 1.2497780647243968, + "grad_norm": 0.7487930059432983, + "learning_rate": 2.4355537176479903e-05, + "loss": 2.4573, + "step": 15486 + }, + { + "epoch": 1.2498587684609797, + "grad_norm": 0.7306599617004395, + "learning_rate": 2.4345212576517575e-05, + "loss": 2.4745, + "step": 15487 + }, + { + "epoch": 1.2499394721975627, + "grad_norm": 0.7152543067932129, + "learning_rate": 2.43348898620703e-05, + "loss": 2.4768, + "step": 15488 + }, + { + "epoch": 1.2500201759341458, + "grad_norm": 0.6576277017593384, + "learning_rate": 2.432456903339535e-05, + "loss": 2.4289, + "step": 15489 + }, + { + "epoch": 1.2501008796707287, + "grad_norm": 0.6974572539329529, + "learning_rate": 2.4314250090749956e-05, + "loss": 2.4218, + "step": 15490 + }, + { + "epoch": 1.2501815834073118, + "grad_norm": 0.7869577407836914, + "learning_rate": 2.4303933034391323e-05, + "loss": 2.3899, + "step": 15491 + }, + { + "epoch": 1.2502622871438946, + "grad_norm": 0.6723129749298096, + "learning_rate": 2.42936178645765e-05, + "loss": 2.4238, + "step": 15492 + }, + { + "epoch": 1.2503429908804777, + "grad_norm": 0.6839526891708374, + "learning_rate": 2.428330458156265e-05, + "loss": 2.4037, + "step": 15493 + }, + { + "epoch": 1.2504236946170608, + "grad_norm": 0.6866093277931213, + "learning_rate": 2.4272993185606796e-05, + "loss": 2.4228, + "step": 15494 + }, + { + "epoch": 1.2505043983536437, + "grad_norm": 0.6992947459220886, + "learning_rate": 2.426268367696588e-05, + "loss": 2.4248, + "step": 15495 + }, + { + "epoch": 1.2505851020902268, + "grad_norm": 0.6836698651313782, + "learning_rate": 2.4252376055896862e-05, + "loss": 2.5387, + "step": 15496 + }, + { + "epoch": 1.2506658058268099, + "grad_norm": 0.6990752816200256, + "learning_rate": 2.4242070322656663e-05, + "loss": 2.4438, + "step": 15497 + }, + { + "epoch": 1.2507465095633927, + "grad_norm": 0.7143029570579529, + "learning_rate": 2.4231766477502082e-05, + "loss": 2.4, + "step": 15498 + }, + { + "epoch": 1.2508272132999758, + "grad_norm": 0.6585043668746948, + "learning_rate": 2.422146452068994e-05, + "loss": 2.4256, + "step": 15499 + }, + { + "epoch": 1.250907917036559, + "grad_norm": 0.739107072353363, + "learning_rate": 2.421116445247702e-05, + "loss": 2.428, + "step": 15500 + }, + { + "epoch": 1.2509886207731418, + "grad_norm": 0.6675287485122681, + "learning_rate": 2.420086627311997e-05, + "loss": 2.5095, + "step": 15501 + }, + { + "epoch": 1.2510693245097249, + "grad_norm": 0.7133405804634094, + "learning_rate": 2.4190569982875467e-05, + "loss": 2.4719, + "step": 15502 + }, + { + "epoch": 1.2511500282463077, + "grad_norm": 0.710904061794281, + "learning_rate": 2.4180275582000134e-05, + "loss": 2.4449, + "step": 15503 + }, + { + "epoch": 1.2512307319828908, + "grad_norm": 0.7088729739189148, + "learning_rate": 2.4169983070750525e-05, + "loss": 2.4059, + "step": 15504 + }, + { + "epoch": 1.2513114357194737, + "grad_norm": 0.7187358736991882, + "learning_rate": 2.4159692449383152e-05, + "loss": 2.4577, + "step": 15505 + }, + { + "epoch": 1.2513921394560568, + "grad_norm": 0.7531955242156982, + "learning_rate": 2.4149403718154497e-05, + "loss": 2.4101, + "step": 15506 + }, + { + "epoch": 1.2514728431926398, + "grad_norm": 0.7565199136734009, + "learning_rate": 2.413911687732101e-05, + "loss": 2.4805, + "step": 15507 + }, + { + "epoch": 1.2515535469292227, + "grad_norm": 0.706471860408783, + "learning_rate": 2.4128831927139008e-05, + "loss": 2.4494, + "step": 15508 + }, + { + "epoch": 1.2516342506658058, + "grad_norm": 0.7022314667701721, + "learning_rate": 2.4118548867864832e-05, + "loss": 2.4442, + "step": 15509 + }, + { + "epoch": 1.251714954402389, + "grad_norm": 0.6885591745376587, + "learning_rate": 2.4108267699754806e-05, + "loss": 2.4186, + "step": 15510 + }, + { + "epoch": 1.2517956581389718, + "grad_norm": 0.6963610649108887, + "learning_rate": 2.409798842306511e-05, + "loss": 2.4209, + "step": 15511 + }, + { + "epoch": 1.2518763618755548, + "grad_norm": 0.7117185592651367, + "learning_rate": 2.4087711038051942e-05, + "loss": 2.4106, + "step": 15512 + }, + { + "epoch": 1.251957065612138, + "grad_norm": 0.6944519281387329, + "learning_rate": 2.407743554497146e-05, + "loss": 2.4493, + "step": 15513 + }, + { + "epoch": 1.2520377693487208, + "grad_norm": 0.689818263053894, + "learning_rate": 2.406716194407974e-05, + "loss": 2.4358, + "step": 15514 + }, + { + "epoch": 1.2521184730853039, + "grad_norm": 0.8132768273353577, + "learning_rate": 2.4056890235632846e-05, + "loss": 2.4574, + "step": 15515 + }, + { + "epoch": 1.252199176821887, + "grad_norm": 0.6855002045631409, + "learning_rate": 2.4046620419886777e-05, + "loss": 2.4118, + "step": 15516 + }, + { + "epoch": 1.2522798805584698, + "grad_norm": 0.6616373658180237, + "learning_rate": 2.4036352497097458e-05, + "loss": 2.4332, + "step": 15517 + }, + { + "epoch": 1.252360584295053, + "grad_norm": 0.6657225489616394, + "learning_rate": 2.4026086467520803e-05, + "loss": 2.3989, + "step": 15518 + }, + { + "epoch": 1.2524412880316358, + "grad_norm": 0.6796447038650513, + "learning_rate": 2.4015822331412664e-05, + "loss": 2.4269, + "step": 15519 + }, + { + "epoch": 1.2525219917682189, + "grad_norm": 0.7168079614639282, + "learning_rate": 2.400556008902889e-05, + "loss": 2.4263, + "step": 15520 + }, + { + "epoch": 1.2526026955048017, + "grad_norm": 0.6985058188438416, + "learning_rate": 2.3995299740625186e-05, + "loss": 2.437, + "step": 15521 + }, + { + "epoch": 1.2526833992413848, + "grad_norm": 0.7078086137771606, + "learning_rate": 2.3985041286457287e-05, + "loss": 2.3996, + "step": 15522 + }, + { + "epoch": 1.252764102977968, + "grad_norm": 0.6989054083824158, + "learning_rate": 2.3974784726780865e-05, + "loss": 2.4717, + "step": 15523 + }, + { + "epoch": 1.2528448067145508, + "grad_norm": 0.747606098651886, + "learning_rate": 2.396453006185153e-05, + "loss": 2.4228, + "step": 15524 + }, + { + "epoch": 1.2529255104511339, + "grad_norm": 0.7500887513160706, + "learning_rate": 2.3954277291924876e-05, + "loss": 2.4636, + "step": 15525 + }, + { + "epoch": 1.253006214187717, + "grad_norm": 0.7710712552070618, + "learning_rate": 2.3944026417256437e-05, + "loss": 2.4405, + "step": 15526 + }, + { + "epoch": 1.2530869179242998, + "grad_norm": 0.7278285622596741, + "learning_rate": 2.3933777438101657e-05, + "loss": 2.4279, + "step": 15527 + }, + { + "epoch": 1.253167621660883, + "grad_norm": 0.6979010701179504, + "learning_rate": 2.3923530354715973e-05, + "loss": 2.4272, + "step": 15528 + }, + { + "epoch": 1.253248325397466, + "grad_norm": 0.7330336570739746, + "learning_rate": 2.3913285167354804e-05, + "loss": 2.3861, + "step": 15529 + }, + { + "epoch": 1.2533290291340489, + "grad_norm": 0.675499677658081, + "learning_rate": 2.3903041876273436e-05, + "loss": 2.3987, + "step": 15530 + }, + { + "epoch": 1.253409732870632, + "grad_norm": 0.6854682564735413, + "learning_rate": 2.3892800481727186e-05, + "loss": 2.4085, + "step": 15531 + }, + { + "epoch": 1.253490436607215, + "grad_norm": 0.713810384273529, + "learning_rate": 2.388256098397129e-05, + "loss": 2.3897, + "step": 15532 + }, + { + "epoch": 1.253571140343798, + "grad_norm": 0.683214545249939, + "learning_rate": 2.3872323383260953e-05, + "loss": 2.4526, + "step": 15533 + }, + { + "epoch": 1.253651844080381, + "grad_norm": 0.6718357801437378, + "learning_rate": 2.3862087679851318e-05, + "loss": 2.4612, + "step": 15534 + }, + { + "epoch": 1.2537325478169639, + "grad_norm": 0.722283124923706, + "learning_rate": 2.3851853873997488e-05, + "loss": 2.4163, + "step": 15535 + }, + { + "epoch": 1.253813251553547, + "grad_norm": 0.689393162727356, + "learning_rate": 2.384162196595453e-05, + "loss": 2.3984, + "step": 15536 + }, + { + "epoch": 1.2538939552901298, + "grad_norm": 0.7146410346031189, + "learning_rate": 2.3831391955977412e-05, + "loss": 2.4442, + "step": 15537 + }, + { + "epoch": 1.253974659026713, + "grad_norm": 0.6651021838188171, + "learning_rate": 2.3821163844321104e-05, + "loss": 2.4064, + "step": 15538 + }, + { + "epoch": 1.254055362763296, + "grad_norm": 0.7088985443115234, + "learning_rate": 2.381093763124056e-05, + "loss": 2.4831, + "step": 15539 + }, + { + "epoch": 1.2541360664998789, + "grad_norm": 0.661375105381012, + "learning_rate": 2.3800713316990588e-05, + "loss": 2.3657, + "step": 15540 + }, + { + "epoch": 1.254216770236462, + "grad_norm": 0.6870979070663452, + "learning_rate": 2.3790490901826012e-05, + "loss": 2.4208, + "step": 15541 + }, + { + "epoch": 1.254297473973045, + "grad_norm": 0.6256219148635864, + "learning_rate": 2.3780270386001657e-05, + "loss": 2.4182, + "step": 15542 + }, + { + "epoch": 1.254378177709628, + "grad_norm": 0.7070638537406921, + "learning_rate": 2.377005176977215e-05, + "loss": 2.3758, + "step": 15543 + }, + { + "epoch": 1.254458881446211, + "grad_norm": 0.6571370363235474, + "learning_rate": 2.3759835053392242e-05, + "loss": 2.3927, + "step": 15544 + }, + { + "epoch": 1.254539585182794, + "grad_norm": 0.644263744354248, + "learning_rate": 2.3749620237116565e-05, + "loss": 2.3992, + "step": 15545 + }, + { + "epoch": 1.254620288919377, + "grad_norm": 0.7127394676208496, + "learning_rate": 2.3739407321199648e-05, + "loss": 2.3942, + "step": 15546 + }, + { + "epoch": 1.25470099265596, + "grad_norm": 0.7274866104125977, + "learning_rate": 2.372919630589605e-05, + "loss": 2.5232, + "step": 15547 + }, + { + "epoch": 1.2547816963925431, + "grad_norm": 0.690138041973114, + "learning_rate": 2.3718987191460274e-05, + "loss": 2.4371, + "step": 15548 + }, + { + "epoch": 1.254862400129126, + "grad_norm": 0.6990681886672974, + "learning_rate": 2.3708779978146724e-05, + "loss": 2.4568, + "step": 15549 + }, + { + "epoch": 1.254943103865709, + "grad_norm": 0.7430790662765503, + "learning_rate": 2.3698574666209793e-05, + "loss": 2.423, + "step": 15550 + }, + { + "epoch": 1.255023807602292, + "grad_norm": 0.6991416215896606, + "learning_rate": 2.3688371255903828e-05, + "loss": 2.4529, + "step": 15551 + }, + { + "epoch": 1.255104511338875, + "grad_norm": 0.6733322739601135, + "learning_rate": 2.367816974748317e-05, + "loss": 2.4531, + "step": 15552 + }, + { + "epoch": 1.2551852150754579, + "grad_norm": 0.7460463047027588, + "learning_rate": 2.3667970141202e-05, + "loss": 2.4267, + "step": 15553 + }, + { + "epoch": 1.255265918812041, + "grad_norm": 0.6784021854400635, + "learning_rate": 2.3657772437314517e-05, + "loss": 2.4996, + "step": 15554 + }, + { + "epoch": 1.255346622548624, + "grad_norm": 0.7499529719352722, + "learning_rate": 2.3647576636074975e-05, + "loss": 2.4749, + "step": 15555 + }, + { + "epoch": 1.255427326285207, + "grad_norm": 0.6698335409164429, + "learning_rate": 2.3637382737737368e-05, + "loss": 2.4499, + "step": 15556 + }, + { + "epoch": 1.25550803002179, + "grad_norm": 0.6644846200942993, + "learning_rate": 2.3627190742555806e-05, + "loss": 2.397, + "step": 15557 + }, + { + "epoch": 1.255588733758373, + "grad_norm": 0.7041488289833069, + "learning_rate": 2.3617000650784315e-05, + "loss": 2.4012, + "step": 15558 + }, + { + "epoch": 1.255669437494956, + "grad_norm": 0.72523033618927, + "learning_rate": 2.3606812462676798e-05, + "loss": 2.4151, + "step": 15559 + }, + { + "epoch": 1.255750141231539, + "grad_norm": 0.77669757604599, + "learning_rate": 2.3596626178487225e-05, + "loss": 2.4478, + "step": 15560 + }, + { + "epoch": 1.2558308449681221, + "grad_norm": 0.6919559836387634, + "learning_rate": 2.3586441798469462e-05, + "loss": 2.4548, + "step": 15561 + }, + { + "epoch": 1.255911548704705, + "grad_norm": 0.7613349556922913, + "learning_rate": 2.3576259322877292e-05, + "loss": 2.4475, + "step": 15562 + }, + { + "epoch": 1.255992252441288, + "grad_norm": 0.6738333106040955, + "learning_rate": 2.3566078751964515e-05, + "loss": 2.4242, + "step": 15563 + }, + { + "epoch": 1.256072956177871, + "grad_norm": 0.7242118716239929, + "learning_rate": 2.355590008598486e-05, + "loss": 2.4047, + "step": 15564 + }, + { + "epoch": 1.256153659914454, + "grad_norm": 0.7117685675621033, + "learning_rate": 2.354572332519199e-05, + "loss": 2.4473, + "step": 15565 + }, + { + "epoch": 1.256234363651037, + "grad_norm": 0.7466531991958618, + "learning_rate": 2.3535548469839564e-05, + "loss": 2.453, + "step": 15566 + }, + { + "epoch": 1.25631506738762, + "grad_norm": 0.6750668883323669, + "learning_rate": 2.3525375520181136e-05, + "loss": 2.4367, + "step": 15567 + }, + { + "epoch": 1.256395771124203, + "grad_norm": 0.7640851736068726, + "learning_rate": 2.35152044764703e-05, + "loss": 2.5014, + "step": 15568 + }, + { + "epoch": 1.256476474860786, + "grad_norm": 0.7198928594589233, + "learning_rate": 2.3505035338960456e-05, + "loss": 2.5138, + "step": 15569 + }, + { + "epoch": 1.256557178597369, + "grad_norm": 0.7079946398735046, + "learning_rate": 2.349486810790511e-05, + "loss": 2.4172, + "step": 15570 + }, + { + "epoch": 1.2566378823339521, + "grad_norm": 0.7477186918258667, + "learning_rate": 2.3484702783557655e-05, + "loss": 2.4224, + "step": 15571 + }, + { + "epoch": 1.256718586070535, + "grad_norm": 0.6875394582748413, + "learning_rate": 2.3474539366171388e-05, + "loss": 2.4621, + "step": 15572 + }, + { + "epoch": 1.256799289807118, + "grad_norm": 0.7164824604988098, + "learning_rate": 2.346437785599964e-05, + "loss": 2.4416, + "step": 15573 + }, + { + "epoch": 1.2568799935437012, + "grad_norm": 0.7031935453414917, + "learning_rate": 2.3454218253295668e-05, + "loss": 2.3943, + "step": 15574 + }, + { + "epoch": 1.256960697280284, + "grad_norm": 0.6739614009857178, + "learning_rate": 2.3444060558312665e-05, + "loss": 2.4114, + "step": 15575 + }, + { + "epoch": 1.2570414010168671, + "grad_norm": 0.6710866689682007, + "learning_rate": 2.3433904771303794e-05, + "loss": 2.4077, + "step": 15576 + }, + { + "epoch": 1.2571221047534502, + "grad_norm": 0.6589750051498413, + "learning_rate": 2.342375089252219e-05, + "loss": 2.3494, + "step": 15577 + }, + { + "epoch": 1.257202808490033, + "grad_norm": 0.7018333077430725, + "learning_rate": 2.3413598922220857e-05, + "loss": 2.459, + "step": 15578 + }, + { + "epoch": 1.2572835122266162, + "grad_norm": 0.7735301852226257, + "learning_rate": 2.3403448860652842e-05, + "loss": 2.4524, + "step": 15579 + }, + { + "epoch": 1.257364215963199, + "grad_norm": 0.7009726762771606, + "learning_rate": 2.339330070807113e-05, + "loss": 2.4244, + "step": 15580 + }, + { + "epoch": 1.2574449196997821, + "grad_norm": 0.671521008014679, + "learning_rate": 2.3383154464728595e-05, + "loss": 2.3808, + "step": 15581 + }, + { + "epoch": 1.257525623436365, + "grad_norm": 0.7736711502075195, + "learning_rate": 2.3373010130878126e-05, + "loss": 2.4936, + "step": 15582 + }, + { + "epoch": 1.257606327172948, + "grad_norm": 0.6987056136131287, + "learning_rate": 2.336286770677255e-05, + "loss": 2.4484, + "step": 15583 + }, + { + "epoch": 1.2576870309095312, + "grad_norm": 0.6337067484855652, + "learning_rate": 2.3352727192664635e-05, + "loss": 2.4196, + "step": 15584 + }, + { + "epoch": 1.257767734646114, + "grad_norm": 0.6832795143127441, + "learning_rate": 2.3342588588807123e-05, + "loss": 2.3681, + "step": 15585 + }, + { + "epoch": 1.257848438382697, + "grad_norm": 0.7208079695701599, + "learning_rate": 2.3332451895452688e-05, + "loss": 2.4436, + "step": 15586 + }, + { + "epoch": 1.2579291421192802, + "grad_norm": 0.6607621312141418, + "learning_rate": 2.3322317112853986e-05, + "loss": 2.4088, + "step": 15587 + }, + { + "epoch": 1.258009845855863, + "grad_norm": 0.7261247038841248, + "learning_rate": 2.331218424126356e-05, + "loss": 2.4389, + "step": 15588 + }, + { + "epoch": 1.2580905495924462, + "grad_norm": 0.6187729239463806, + "learning_rate": 2.3302053280933954e-05, + "loss": 2.3568, + "step": 15589 + }, + { + "epoch": 1.2581712533290292, + "grad_norm": 0.6196430921554565, + "learning_rate": 2.3291924232117713e-05, + "loss": 2.4285, + "step": 15590 + }, + { + "epoch": 1.258251957065612, + "grad_norm": 0.7271853685379028, + "learning_rate": 2.3281797095067193e-05, + "loss": 2.4058, + "step": 15591 + }, + { + "epoch": 1.2583326608021952, + "grad_norm": 0.7141130566596985, + "learning_rate": 2.327167187003484e-05, + "loss": 2.3971, + "step": 15592 + }, + { + "epoch": 1.2584133645387783, + "grad_norm": 0.680743932723999, + "learning_rate": 2.3261548557273027e-05, + "loss": 2.4387, + "step": 15593 + }, + { + "epoch": 1.2584940682753611, + "grad_norm": 0.718173086643219, + "learning_rate": 2.3251427157033955e-05, + "loss": 2.43, + "step": 15594 + }, + { + "epoch": 1.2585747720119442, + "grad_norm": 0.7600045800209045, + "learning_rate": 2.324130766956998e-05, + "loss": 2.4584, + "step": 15595 + }, + { + "epoch": 1.258655475748527, + "grad_norm": 0.7432500123977661, + "learning_rate": 2.3231190095133294e-05, + "loss": 2.4717, + "step": 15596 + }, + { + "epoch": 1.2587361794851102, + "grad_norm": 0.6603000164031982, + "learning_rate": 2.3221074433975988e-05, + "loss": 2.3952, + "step": 15597 + }, + { + "epoch": 1.258816883221693, + "grad_norm": 0.7020140290260315, + "learning_rate": 2.3210960686350213e-05, + "loss": 2.4064, + "step": 15598 + }, + { + "epoch": 1.2588975869582761, + "grad_norm": 0.7434887290000916, + "learning_rate": 2.320084885250804e-05, + "loss": 2.4708, + "step": 15599 + }, + { + "epoch": 1.2589782906948592, + "grad_norm": 0.6626797318458557, + "learning_rate": 2.3190738932701482e-05, + "loss": 2.4503, + "step": 15600 + }, + { + "epoch": 1.259058994431442, + "grad_norm": 0.7880598902702332, + "learning_rate": 2.3180630927182466e-05, + "loss": 2.384, + "step": 15601 + }, + { + "epoch": 1.2591396981680252, + "grad_norm": 0.7766147255897522, + "learning_rate": 2.3170524836202933e-05, + "loss": 2.4019, + "step": 15602 + }, + { + "epoch": 1.2592204019046083, + "grad_norm": 0.7817980051040649, + "learning_rate": 2.3160420660014792e-05, + "loss": 2.4729, + "step": 15603 + }, + { + "epoch": 1.2593011056411911, + "grad_norm": 0.6915614604949951, + "learning_rate": 2.3150318398869787e-05, + "loss": 2.4028, + "step": 15604 + }, + { + "epoch": 1.2593818093777742, + "grad_norm": 0.690882682800293, + "learning_rate": 2.3140218053019714e-05, + "loss": 2.4386, + "step": 15605 + }, + { + "epoch": 1.2594625131143573, + "grad_norm": 0.6670350432395935, + "learning_rate": 2.3130119622716382e-05, + "loss": 2.4224, + "step": 15606 + }, + { + "epoch": 1.2595432168509402, + "grad_norm": 0.6680006980895996, + "learning_rate": 2.3120023108211375e-05, + "loss": 2.3475, + "step": 15607 + }, + { + "epoch": 1.2596239205875233, + "grad_norm": 0.7003577947616577, + "learning_rate": 2.310992850975636e-05, + "loss": 2.4198, + "step": 15608 + }, + { + "epoch": 1.2597046243241061, + "grad_norm": 0.7444167733192444, + "learning_rate": 2.3099835827602944e-05, + "loss": 2.3756, + "step": 15609 + }, + { + "epoch": 1.2597853280606892, + "grad_norm": 0.6757989525794983, + "learning_rate": 2.3089745062002612e-05, + "loss": 2.3955, + "step": 15610 + }, + { + "epoch": 1.259866031797272, + "grad_norm": 0.6955820322036743, + "learning_rate": 2.3079656213206878e-05, + "loss": 2.4031, + "step": 15611 + }, + { + "epoch": 1.2599467355338552, + "grad_norm": 0.6646408438682556, + "learning_rate": 2.3069569281467184e-05, + "loss": 2.4246, + "step": 15612 + }, + { + "epoch": 1.2600274392704383, + "grad_norm": 0.6922882199287415, + "learning_rate": 2.3059484267034958e-05, + "loss": 2.4157, + "step": 15613 + }, + { + "epoch": 1.2601081430070211, + "grad_norm": 0.8092310428619385, + "learning_rate": 2.3049401170161468e-05, + "loss": 2.4137, + "step": 15614 + }, + { + "epoch": 1.2601888467436042, + "grad_norm": 0.7024559378623962, + "learning_rate": 2.3039319991098063e-05, + "loss": 2.4497, + "step": 15615 + }, + { + "epoch": 1.2602695504801873, + "grad_norm": 0.7096099853515625, + "learning_rate": 2.302924073009597e-05, + "loss": 2.4045, + "step": 15616 + }, + { + "epoch": 1.2603502542167702, + "grad_norm": 0.6777564287185669, + "learning_rate": 2.3019163387406406e-05, + "loss": 2.4607, + "step": 15617 + }, + { + "epoch": 1.2604309579533532, + "grad_norm": 0.7564159035682678, + "learning_rate": 2.300908796328052e-05, + "loss": 2.4985, + "step": 15618 + }, + { + "epoch": 1.2605116616899363, + "grad_norm": 0.7432986497879028, + "learning_rate": 2.2999014457969447e-05, + "loss": 2.4326, + "step": 15619 + }, + { + "epoch": 1.2605923654265192, + "grad_norm": 0.7178141474723816, + "learning_rate": 2.2988942871724182e-05, + "loss": 2.4118, + "step": 15620 + }, + { + "epoch": 1.2606730691631023, + "grad_norm": 0.7074497938156128, + "learning_rate": 2.2978873204795782e-05, + "loss": 2.4163, + "step": 15621 + }, + { + "epoch": 1.2607537728996854, + "grad_norm": 0.670200765132904, + "learning_rate": 2.2968805457435217e-05, + "loss": 2.4081, + "step": 15622 + }, + { + "epoch": 1.2608344766362682, + "grad_norm": 0.7258187532424927, + "learning_rate": 2.2958739629893355e-05, + "loss": 2.4889, + "step": 15623 + }, + { + "epoch": 1.2609151803728513, + "grad_norm": 0.6999781727790833, + "learning_rate": 2.2948675722421086e-05, + "loss": 2.3945, + "step": 15624 + }, + { + "epoch": 1.2609958841094342, + "grad_norm": 0.7030084133148193, + "learning_rate": 2.2938613735269243e-05, + "loss": 2.4509, + "step": 15625 + }, + { + "epoch": 1.2610765878460173, + "grad_norm": 0.6875420212745667, + "learning_rate": 2.292855366868858e-05, + "loss": 2.3658, + "step": 15626 + }, + { + "epoch": 1.2611572915826001, + "grad_norm": 0.7375235557556152, + "learning_rate": 2.2918495522929817e-05, + "loss": 2.4308, + "step": 15627 + }, + { + "epoch": 1.2612379953191832, + "grad_norm": 0.7021106481552124, + "learning_rate": 2.2908439298243644e-05, + "loss": 2.4046, + "step": 15628 + }, + { + "epoch": 1.2613186990557663, + "grad_norm": 0.76661616563797, + "learning_rate": 2.2898384994880716e-05, + "loss": 2.5156, + "step": 15629 + }, + { + "epoch": 1.2613994027923492, + "grad_norm": 0.6684869527816772, + "learning_rate": 2.2888332613091558e-05, + "loss": 2.4342, + "step": 15630 + }, + { + "epoch": 1.2614801065289323, + "grad_norm": 0.6878669261932373, + "learning_rate": 2.2878282153126706e-05, + "loss": 2.4544, + "step": 15631 + }, + { + "epoch": 1.2615608102655154, + "grad_norm": 0.6659132838249207, + "learning_rate": 2.2868233615236702e-05, + "loss": 2.4341, + "step": 15632 + }, + { + "epoch": 1.2616415140020982, + "grad_norm": 0.657474160194397, + "learning_rate": 2.2858186999671905e-05, + "loss": 2.3515, + "step": 15633 + }, + { + "epoch": 1.2617222177386813, + "grad_norm": 0.7245650291442871, + "learning_rate": 2.284814230668274e-05, + "loss": 2.3983, + "step": 15634 + }, + { + "epoch": 1.2618029214752644, + "grad_norm": 0.6400195360183716, + "learning_rate": 2.2838099536519554e-05, + "loss": 2.3535, + "step": 15635 + }, + { + "epoch": 1.2618836252118473, + "grad_norm": 0.6719450950622559, + "learning_rate": 2.282805868943262e-05, + "loss": 2.3906, + "step": 15636 + }, + { + "epoch": 1.2619643289484304, + "grad_norm": 0.682746946811676, + "learning_rate": 2.2818019765672207e-05, + "loss": 2.4045, + "step": 15637 + }, + { + "epoch": 1.2620450326850134, + "grad_norm": 0.6631760597229004, + "learning_rate": 2.2807982765488513e-05, + "loss": 2.4896, + "step": 15638 + }, + { + "epoch": 1.2621257364215963, + "grad_norm": 0.782202422618866, + "learning_rate": 2.279794768913164e-05, + "loss": 2.4628, + "step": 15639 + }, + { + "epoch": 1.2622064401581794, + "grad_norm": 0.7579823732376099, + "learning_rate": 2.278791453685173e-05, + "loss": 2.4635, + "step": 15640 + }, + { + "epoch": 1.2622871438947623, + "grad_norm": 0.665096640586853, + "learning_rate": 2.277788330889884e-05, + "loss": 2.4899, + "step": 15641 + }, + { + "epoch": 1.2623678476313454, + "grad_norm": 0.7635685205459595, + "learning_rate": 2.2767854005522936e-05, + "loss": 2.4146, + "step": 15642 + }, + { + "epoch": 1.2624485513679282, + "grad_norm": 0.7579118609428406, + "learning_rate": 2.2757826626974e-05, + "loss": 2.3692, + "step": 15643 + }, + { + "epoch": 1.2625292551045113, + "grad_norm": 0.6772074699401855, + "learning_rate": 2.2747801173501938e-05, + "loss": 2.3954, + "step": 15644 + }, + { + "epoch": 1.2626099588410944, + "grad_norm": 0.7028382420539856, + "learning_rate": 2.2737777645356606e-05, + "loss": 2.4799, + "step": 15645 + }, + { + "epoch": 1.2626906625776773, + "grad_norm": 0.7152617573738098, + "learning_rate": 2.2727756042787818e-05, + "loss": 2.4095, + "step": 15646 + }, + { + "epoch": 1.2627713663142603, + "grad_norm": 0.7286608219146729, + "learning_rate": 2.271773636604535e-05, + "loss": 2.4496, + "step": 15647 + }, + { + "epoch": 1.2628520700508434, + "grad_norm": 0.7006896734237671, + "learning_rate": 2.2707718615378935e-05, + "loss": 2.4128, + "step": 15648 + }, + { + "epoch": 1.2629327737874263, + "grad_norm": 0.6856697797775269, + "learning_rate": 2.2697702791038177e-05, + "loss": 2.4169, + "step": 15649 + }, + { + "epoch": 1.2630134775240094, + "grad_norm": 0.7582918405532837, + "learning_rate": 2.268768889327275e-05, + "loss": 2.4007, + "step": 15650 + }, + { + "epoch": 1.2630941812605925, + "grad_norm": 0.664633572101593, + "learning_rate": 2.2677676922332237e-05, + "loss": 2.3876, + "step": 15651 + }, + { + "epoch": 1.2631748849971753, + "grad_norm": 0.7283070087432861, + "learning_rate": 2.266766687846611e-05, + "loss": 2.4175, + "step": 15652 + }, + { + "epoch": 1.2632555887337584, + "grad_norm": 0.7309537529945374, + "learning_rate": 2.2657658761923863e-05, + "loss": 2.3998, + "step": 15653 + }, + { + "epoch": 1.2633362924703415, + "grad_norm": 0.6386510133743286, + "learning_rate": 2.2647652572954968e-05, + "loss": 2.3723, + "step": 15654 + }, + { + "epoch": 1.2634169962069244, + "grad_norm": 0.6805689930915833, + "learning_rate": 2.263764831180876e-05, + "loss": 2.3989, + "step": 15655 + }, + { + "epoch": 1.2634976999435072, + "grad_norm": 0.7147208452224731, + "learning_rate": 2.2627645978734536e-05, + "loss": 2.4748, + "step": 15656 + }, + { + "epoch": 1.2635784036800903, + "grad_norm": 0.6835155487060547, + "learning_rate": 2.2617645573981683e-05, + "loss": 2.4266, + "step": 15657 + }, + { + "epoch": 1.2636591074166734, + "grad_norm": 0.7631552219390869, + "learning_rate": 2.2607647097799368e-05, + "loss": 2.4152, + "step": 15658 + }, + { + "epoch": 1.2637398111532563, + "grad_norm": 0.6793624758720398, + "learning_rate": 2.2597650550436777e-05, + "loss": 2.3491, + "step": 15659 + }, + { + "epoch": 1.2638205148898394, + "grad_norm": 0.6465637683868408, + "learning_rate": 2.2587655932143083e-05, + "loss": 2.3774, + "step": 15660 + }, + { + "epoch": 1.2639012186264225, + "grad_norm": 0.6920284628868103, + "learning_rate": 2.2577663243167368e-05, + "loss": 2.4321, + "step": 15661 + }, + { + "epoch": 1.2639819223630053, + "grad_norm": 0.6922522783279419, + "learning_rate": 2.256767248375866e-05, + "loss": 2.4242, + "step": 15662 + }, + { + "epoch": 1.2640626260995884, + "grad_norm": 0.6811214089393616, + "learning_rate": 2.255768365416595e-05, + "loss": 2.4101, + "step": 15663 + }, + { + "epoch": 1.2641433298361715, + "grad_norm": 0.6704947352409363, + "learning_rate": 2.2547696754638238e-05, + "loss": 2.4792, + "step": 15664 + }, + { + "epoch": 1.2642240335727544, + "grad_norm": 0.6814701557159424, + "learning_rate": 2.2537711785424354e-05, + "loss": 2.4429, + "step": 15665 + }, + { + "epoch": 1.2643047373093375, + "grad_norm": 0.6778244972229004, + "learning_rate": 2.252772874677318e-05, + "loss": 2.3882, + "step": 15666 + }, + { + "epoch": 1.2643854410459205, + "grad_norm": 0.6570093035697937, + "learning_rate": 2.2517747638933518e-05, + "loss": 2.4162, + "step": 15667 + }, + { + "epoch": 1.2644661447825034, + "grad_norm": 0.6973466873168945, + "learning_rate": 2.2507768462154133e-05, + "loss": 2.3646, + "step": 15668 + }, + { + "epoch": 1.2645468485190865, + "grad_norm": 0.7258623242378235, + "learning_rate": 2.2497791216683715e-05, + "loss": 2.404, + "step": 15669 + }, + { + "epoch": 1.2646275522556694, + "grad_norm": 0.7462170124053955, + "learning_rate": 2.248781590277097e-05, + "loss": 2.5076, + "step": 15670 + }, + { + "epoch": 1.2647082559922525, + "grad_norm": 0.7070441246032715, + "learning_rate": 2.247784252066444e-05, + "loss": 2.3817, + "step": 15671 + }, + { + "epoch": 1.2647889597288353, + "grad_norm": 0.7150183916091919, + "learning_rate": 2.246787107061272e-05, + "loss": 2.461, + "step": 15672 + }, + { + "epoch": 1.2648696634654184, + "grad_norm": 0.668436586856842, + "learning_rate": 2.2457901552864347e-05, + "loss": 2.466, + "step": 15673 + }, + { + "epoch": 1.2649503672020015, + "grad_norm": 0.7011097073554993, + "learning_rate": 2.2447933967667745e-05, + "loss": 2.4582, + "step": 15674 + }, + { + "epoch": 1.2650310709385844, + "grad_norm": 0.7149096727371216, + "learning_rate": 2.243796831527134e-05, + "loss": 2.4461, + "step": 15675 + }, + { + "epoch": 1.2651117746751674, + "grad_norm": 0.6810914278030396, + "learning_rate": 2.2428004595923525e-05, + "loss": 2.4043, + "step": 15676 + }, + { + "epoch": 1.2651924784117505, + "grad_norm": 0.7700765132904053, + "learning_rate": 2.241804280987261e-05, + "loss": 2.4197, + "step": 15677 + }, + { + "epoch": 1.2652731821483334, + "grad_norm": 0.6897448897361755, + "learning_rate": 2.240808295736686e-05, + "loss": 2.4052, + "step": 15678 + }, + { + "epoch": 1.2653538858849165, + "grad_norm": 0.7092932462692261, + "learning_rate": 2.2398125038654515e-05, + "loss": 2.4088, + "step": 15679 + }, + { + "epoch": 1.2654345896214996, + "grad_norm": 0.6930294632911682, + "learning_rate": 2.2388169053983777e-05, + "loss": 2.4504, + "step": 15680 + }, + { + "epoch": 1.2655152933580824, + "grad_norm": 0.7056782245635986, + "learning_rate": 2.237821500360271e-05, + "loss": 2.3975, + "step": 15681 + }, + { + "epoch": 1.2655959970946655, + "grad_norm": 0.651772141456604, + "learning_rate": 2.236826288775944e-05, + "loss": 2.3941, + "step": 15682 + }, + { + "epoch": 1.2656767008312486, + "grad_norm": 0.7254980206489563, + "learning_rate": 2.2358312706702012e-05, + "loss": 2.4149, + "step": 15683 + }, + { + "epoch": 1.2657574045678315, + "grad_norm": 0.6553635597229004, + "learning_rate": 2.2348364460678373e-05, + "loss": 2.4099, + "step": 15684 + }, + { + "epoch": 1.2658381083044146, + "grad_norm": 0.6952616572380066, + "learning_rate": 2.233841814993646e-05, + "loss": 2.384, + "step": 15685 + }, + { + "epoch": 1.2659188120409974, + "grad_norm": 0.72947096824646, + "learning_rate": 2.2328473774724178e-05, + "loss": 2.5033, + "step": 15686 + }, + { + "epoch": 1.2659995157775805, + "grad_norm": 0.7419683933258057, + "learning_rate": 2.231853133528937e-05, + "loss": 2.4881, + "step": 15687 + }, + { + "epoch": 1.2660802195141634, + "grad_norm": 0.7125211358070374, + "learning_rate": 2.2308590831879827e-05, + "loss": 2.4334, + "step": 15688 + }, + { + "epoch": 1.2661609232507465, + "grad_norm": 0.6668617129325867, + "learning_rate": 2.2298652264743315e-05, + "loss": 2.4144, + "step": 15689 + }, + { + "epoch": 1.2662416269873296, + "grad_norm": 0.8075512051582336, + "learning_rate": 2.2288715634127465e-05, + "loss": 2.421, + "step": 15690 + }, + { + "epoch": 1.2663223307239124, + "grad_norm": 0.6894629001617432, + "learning_rate": 2.2278780940279965e-05, + "loss": 2.4142, + "step": 15691 + }, + { + "epoch": 1.2664030344604955, + "grad_norm": 0.7418074011802673, + "learning_rate": 2.226884818344841e-05, + "loss": 2.4214, + "step": 15692 + }, + { + "epoch": 1.2664837381970786, + "grad_norm": 0.6724219918251038, + "learning_rate": 2.225891736388037e-05, + "loss": 2.4455, + "step": 15693 + }, + { + "epoch": 1.2665644419336615, + "grad_norm": 0.7202882766723633, + "learning_rate": 2.224898848182331e-05, + "loss": 2.4017, + "step": 15694 + }, + { + "epoch": 1.2666451456702446, + "grad_norm": 0.7671259641647339, + "learning_rate": 2.2239061537524698e-05, + "loss": 2.4386, + "step": 15695 + }, + { + "epoch": 1.2667258494068276, + "grad_norm": 0.7154317498207092, + "learning_rate": 2.222913653123194e-05, + "loss": 2.3754, + "step": 15696 + }, + { + "epoch": 1.2668065531434105, + "grad_norm": 0.7203264236450195, + "learning_rate": 2.221921346319239e-05, + "loss": 2.3926, + "step": 15697 + }, + { + "epoch": 1.2668872568799936, + "grad_norm": 0.7104187607765198, + "learning_rate": 2.2209292333653365e-05, + "loss": 2.4528, + "step": 15698 + }, + { + "epoch": 1.2669679606165767, + "grad_norm": 0.7650138139724731, + "learning_rate": 2.2199373142862158e-05, + "loss": 2.4372, + "step": 15699 + }, + { + "epoch": 1.2670486643531595, + "grad_norm": 0.6796044111251831, + "learning_rate": 2.2189455891065903e-05, + "loss": 2.415, + "step": 15700 + }, + { + "epoch": 1.2671293680897426, + "grad_norm": 0.6749297380447388, + "learning_rate": 2.2179540578511813e-05, + "loss": 2.4337, + "step": 15701 + }, + { + "epoch": 1.2672100718263255, + "grad_norm": 0.7330272793769836, + "learning_rate": 2.216962720544703e-05, + "loss": 2.4322, + "step": 15702 + }, + { + "epoch": 1.2672907755629086, + "grad_norm": 0.6793510913848877, + "learning_rate": 2.215971577211855e-05, + "loss": 2.4473, + "step": 15703 + }, + { + "epoch": 1.2673714792994915, + "grad_norm": 0.7477267384529114, + "learning_rate": 2.2149806278773433e-05, + "loss": 2.4699, + "step": 15704 + }, + { + "epoch": 1.2674521830360745, + "grad_norm": 0.7048643827438354, + "learning_rate": 2.213989872565867e-05, + "loss": 2.4341, + "step": 15705 + }, + { + "epoch": 1.2675328867726576, + "grad_norm": 0.647433340549469, + "learning_rate": 2.2129993113021108e-05, + "loss": 2.423, + "step": 15706 + }, + { + "epoch": 1.2676135905092405, + "grad_norm": 0.6886507272720337, + "learning_rate": 2.2120089441107706e-05, + "loss": 2.4185, + "step": 15707 + }, + { + "epoch": 1.2676942942458236, + "grad_norm": 0.6720516085624695, + "learning_rate": 2.2110187710165242e-05, + "loss": 2.4587, + "step": 15708 + }, + { + "epoch": 1.2677749979824067, + "grad_norm": 0.676665723323822, + "learning_rate": 2.2100287920440543e-05, + "loss": 2.4241, + "step": 15709 + }, + { + "epoch": 1.2678557017189895, + "grad_norm": 0.6939559578895569, + "learning_rate": 2.209039007218028e-05, + "loss": 2.3974, + "step": 15710 + }, + { + "epoch": 1.2679364054555726, + "grad_norm": 0.6485786437988281, + "learning_rate": 2.2080494165631137e-05, + "loss": 2.4041, + "step": 15711 + }, + { + "epoch": 1.2680171091921557, + "grad_norm": 0.668319582939148, + "learning_rate": 2.2070600201039802e-05, + "loss": 2.4705, + "step": 15712 + }, + { + "epoch": 1.2680978129287386, + "grad_norm": 0.6837478280067444, + "learning_rate": 2.206070817865279e-05, + "loss": 2.4474, + "step": 15713 + }, + { + "epoch": 1.2681785166653217, + "grad_norm": 0.7000131011009216, + "learning_rate": 2.2050818098716664e-05, + "loss": 2.4463, + "step": 15714 + }, + { + "epoch": 1.2682592204019045, + "grad_norm": 0.7063068151473999, + "learning_rate": 2.204092996147794e-05, + "loss": 2.4226, + "step": 15715 + }, + { + "epoch": 1.2683399241384876, + "grad_norm": 0.6497172117233276, + "learning_rate": 2.2031043767183003e-05, + "loss": 2.3678, + "step": 15716 + }, + { + "epoch": 1.2684206278750705, + "grad_norm": 0.6558645963668823, + "learning_rate": 2.2021159516078262e-05, + "loss": 2.4021, + "step": 15717 + }, + { + "epoch": 1.2685013316116536, + "grad_norm": 0.7411713600158691, + "learning_rate": 2.2011277208410062e-05, + "loss": 2.4346, + "step": 15718 + }, + { + "epoch": 1.2685820353482367, + "grad_norm": 0.7275578379631042, + "learning_rate": 2.2001396844424714e-05, + "loss": 2.4262, + "step": 15719 + }, + { + "epoch": 1.2686627390848195, + "grad_norm": 0.7010936141014099, + "learning_rate": 2.199151842436844e-05, + "loss": 2.4774, + "step": 15720 + }, + { + "epoch": 1.2687434428214026, + "grad_norm": 0.7551137208938599, + "learning_rate": 2.1981641948487462e-05, + "loss": 2.5286, + "step": 15721 + }, + { + "epoch": 1.2688241465579857, + "grad_norm": 0.6510799527168274, + "learning_rate": 2.1971767417027888e-05, + "loss": 2.3813, + "step": 15722 + }, + { + "epoch": 1.2689048502945686, + "grad_norm": 0.636050283908844, + "learning_rate": 2.196189483023584e-05, + "loss": 2.4226, + "step": 15723 + }, + { + "epoch": 1.2689855540311517, + "grad_norm": 0.6939265131950378, + "learning_rate": 2.1952024188357368e-05, + "loss": 2.4516, + "step": 15724 + }, + { + "epoch": 1.2690662577677347, + "grad_norm": 0.6715239882469177, + "learning_rate": 2.1942155491638494e-05, + "loss": 2.4358, + "step": 15725 + }, + { + "epoch": 1.2691469615043176, + "grad_norm": 0.740680456161499, + "learning_rate": 2.1932288740325123e-05, + "loss": 2.4135, + "step": 15726 + }, + { + "epoch": 1.2692276652409007, + "grad_norm": 0.6969335079193115, + "learning_rate": 2.1922423934663193e-05, + "loss": 2.43, + "step": 15727 + }, + { + "epoch": 1.2693083689774838, + "grad_norm": 0.6390758156776428, + "learning_rate": 2.1912561074898554e-05, + "loss": 2.4492, + "step": 15728 + }, + { + "epoch": 1.2693890727140666, + "grad_norm": 0.7129701375961304, + "learning_rate": 2.190270016127701e-05, + "loss": 2.3799, + "step": 15729 + }, + { + "epoch": 1.2694697764506497, + "grad_norm": 0.7309553027153015, + "learning_rate": 2.1892841194044332e-05, + "loss": 2.4955, + "step": 15730 + }, + { + "epoch": 1.2695504801872326, + "grad_norm": 0.7257225513458252, + "learning_rate": 2.1882984173446252e-05, + "loss": 2.4184, + "step": 15731 + }, + { + "epoch": 1.2696311839238157, + "grad_norm": 0.7434510588645935, + "learning_rate": 2.1873129099728384e-05, + "loss": 2.453, + "step": 15732 + }, + { + "epoch": 1.2697118876603986, + "grad_norm": 0.6643160581588745, + "learning_rate": 2.1863275973136356e-05, + "loss": 2.3619, + "step": 15733 + }, + { + "epoch": 1.2697925913969816, + "grad_norm": 0.6677344441413879, + "learning_rate": 2.1853424793915778e-05, + "loss": 2.406, + "step": 15734 + }, + { + "epoch": 1.2698732951335647, + "grad_norm": 0.760028064250946, + "learning_rate": 2.1843575562312092e-05, + "loss": 2.5479, + "step": 15735 + }, + { + "epoch": 1.2699539988701476, + "grad_norm": 0.6668389439582825, + "learning_rate": 2.183372827857082e-05, + "loss": 2.4104, + "step": 15736 + }, + { + "epoch": 1.2700347026067307, + "grad_norm": 0.651155412197113, + "learning_rate": 2.182388294293736e-05, + "loss": 2.3738, + "step": 15737 + }, + { + "epoch": 1.2701154063433138, + "grad_norm": 0.736907958984375, + "learning_rate": 2.1814039555657084e-05, + "loss": 2.4179, + "step": 15738 + }, + { + "epoch": 1.2701961100798966, + "grad_norm": 0.7068225741386414, + "learning_rate": 2.180419811697534e-05, + "loss": 2.3911, + "step": 15739 + }, + { + "epoch": 1.2702768138164797, + "grad_norm": 0.6959261894226074, + "learning_rate": 2.1794358627137368e-05, + "loss": 2.452, + "step": 15740 + }, + { + "epoch": 1.2703575175530628, + "grad_norm": 0.6886181235313416, + "learning_rate": 2.1784521086388442e-05, + "loss": 2.4166, + "step": 15741 + }, + { + "epoch": 1.2704382212896457, + "grad_norm": 0.6494541168212891, + "learning_rate": 2.177468549497369e-05, + "loss": 2.3589, + "step": 15742 + }, + { + "epoch": 1.2705189250262288, + "grad_norm": 0.7008326649665833, + "learning_rate": 2.1764851853138247e-05, + "loss": 2.3697, + "step": 15743 + }, + { + "epoch": 1.2705996287628119, + "grad_norm": 0.6800456643104553, + "learning_rate": 2.1755020161127238e-05, + "loss": 2.4162, + "step": 15744 + }, + { + "epoch": 1.2706803324993947, + "grad_norm": 0.6836018562316895, + "learning_rate": 2.1745190419185634e-05, + "loss": 2.3977, + "step": 15745 + }, + { + "epoch": 1.2707610362359778, + "grad_norm": 0.6489691138267517, + "learning_rate": 2.173536262755844e-05, + "loss": 2.464, + "step": 15746 + }, + { + "epoch": 1.2708417399725607, + "grad_norm": 0.7309786677360535, + "learning_rate": 2.172553678649061e-05, + "loss": 2.4065, + "step": 15747 + }, + { + "epoch": 1.2709224437091438, + "grad_norm": 0.6752686500549316, + "learning_rate": 2.1715712896227004e-05, + "loss": 2.3935, + "step": 15748 + }, + { + "epoch": 1.2710031474457266, + "grad_norm": 0.7039850354194641, + "learning_rate": 2.1705890957012465e-05, + "loss": 2.4605, + "step": 15749 + }, + { + "epoch": 1.2710838511823097, + "grad_norm": 0.6904652714729309, + "learning_rate": 2.169607096909182e-05, + "loss": 2.4264, + "step": 15750 + }, + { + "epoch": 1.2711645549188928, + "grad_norm": 0.7104331254959106, + "learning_rate": 2.168625293270974e-05, + "loss": 2.378, + "step": 15751 + }, + { + "epoch": 1.2712452586554757, + "grad_norm": 0.6732800602912903, + "learning_rate": 2.167643684811096e-05, + "loss": 2.4216, + "step": 15752 + }, + { + "epoch": 1.2713259623920588, + "grad_norm": 0.7207335829734802, + "learning_rate": 2.166662271554011e-05, + "loss": 2.3861, + "step": 15753 + }, + { + "epoch": 1.2714066661286418, + "grad_norm": 0.7561055421829224, + "learning_rate": 2.1656810535241813e-05, + "loss": 2.4753, + "step": 15754 + }, + { + "epoch": 1.2714873698652247, + "grad_norm": 0.7018210887908936, + "learning_rate": 2.1647000307460564e-05, + "loss": 2.401, + "step": 15755 + }, + { + "epoch": 1.2715680736018078, + "grad_norm": 0.6908013224601746, + "learning_rate": 2.163719203244089e-05, + "loss": 2.4451, + "step": 15756 + }, + { + "epoch": 1.2716487773383909, + "grad_norm": 0.734909176826477, + "learning_rate": 2.162738571042723e-05, + "loss": 2.4221, + "step": 15757 + }, + { + "epoch": 1.2717294810749737, + "grad_norm": 0.7047279477119446, + "learning_rate": 2.1617581341663973e-05, + "loss": 2.4149, + "step": 15758 + }, + { + "epoch": 1.2718101848115568, + "grad_norm": 0.6875640749931335, + "learning_rate": 2.1607778926395496e-05, + "loss": 2.3874, + "step": 15759 + }, + { + "epoch": 1.2718908885481397, + "grad_norm": 0.7300851345062256, + "learning_rate": 2.159797846486611e-05, + "loss": 2.4706, + "step": 15760 + }, + { + "epoch": 1.2719715922847228, + "grad_norm": 0.733775794506073, + "learning_rate": 2.1588179957320022e-05, + "loss": 2.4208, + "step": 15761 + }, + { + "epoch": 1.2720522960213057, + "grad_norm": 0.8375213742256165, + "learning_rate": 2.1578383404001458e-05, + "loss": 2.4672, + "step": 15762 + }, + { + "epoch": 1.2721329997578887, + "grad_norm": 0.7276780009269714, + "learning_rate": 2.15685888051546e-05, + "loss": 2.4536, + "step": 15763 + }, + { + "epoch": 1.2722137034944718, + "grad_norm": 0.7765224575996399, + "learning_rate": 2.1558796161023508e-05, + "loss": 2.3671, + "step": 15764 + }, + { + "epoch": 1.2722944072310547, + "grad_norm": 0.7225642204284668, + "learning_rate": 2.1549005471852256e-05, + "loss": 2.4316, + "step": 15765 + }, + { + "epoch": 1.2723751109676378, + "grad_norm": 0.6959484219551086, + "learning_rate": 2.1539216737884904e-05, + "loss": 2.4581, + "step": 15766 + }, + { + "epoch": 1.2724558147042209, + "grad_norm": 0.6943621039390564, + "learning_rate": 2.1529429959365332e-05, + "loss": 2.4372, + "step": 15767 + }, + { + "epoch": 1.2725365184408037, + "grad_norm": 0.7067148089408875, + "learning_rate": 2.151964513653746e-05, + "loss": 2.431, + "step": 15768 + }, + { + "epoch": 1.2726172221773868, + "grad_norm": 0.8317076563835144, + "learning_rate": 2.150986226964521e-05, + "loss": 2.4177, + "step": 15769 + }, + { + "epoch": 1.27269792591397, + "grad_norm": 0.7390087246894836, + "learning_rate": 2.150008135893239e-05, + "loss": 2.4711, + "step": 15770 + }, + { + "epoch": 1.2727786296505528, + "grad_norm": 0.6829150915145874, + "learning_rate": 2.1490302404642725e-05, + "loss": 2.4477, + "step": 15771 + }, + { + "epoch": 1.2728593333871359, + "grad_norm": 0.7355613708496094, + "learning_rate": 2.148052540701995e-05, + "loss": 2.493, + "step": 15772 + }, + { + "epoch": 1.272940037123719, + "grad_norm": 0.6872289776802063, + "learning_rate": 2.1470750366307747e-05, + "loss": 2.4363, + "step": 15773 + }, + { + "epoch": 1.2730207408603018, + "grad_norm": 0.7753220796585083, + "learning_rate": 2.1460977282749705e-05, + "loss": 2.4376, + "step": 15774 + }, + { + "epoch": 1.273101444596885, + "grad_norm": 0.6717056632041931, + "learning_rate": 2.145120615658942e-05, + "loss": 2.4383, + "step": 15775 + }, + { + "epoch": 1.2731821483334678, + "grad_norm": 0.7441569566726685, + "learning_rate": 2.1441436988070428e-05, + "loss": 2.462, + "step": 15776 + }, + { + "epoch": 1.2732628520700509, + "grad_norm": 0.6824371814727783, + "learning_rate": 2.143166977743615e-05, + "loss": 2.4173, + "step": 15777 + }, + { + "epoch": 1.2733435558066337, + "grad_norm": 0.7310225963592529, + "learning_rate": 2.1421904524930038e-05, + "loss": 2.4222, + "step": 15778 + }, + { + "epoch": 1.2734242595432168, + "grad_norm": 0.7198066115379333, + "learning_rate": 2.141214123079548e-05, + "loss": 2.4262, + "step": 15779 + }, + { + "epoch": 1.2735049632798, + "grad_norm": 0.7081776857376099, + "learning_rate": 2.1402379895275783e-05, + "loss": 2.4473, + "step": 15780 + }, + { + "epoch": 1.2735856670163828, + "grad_norm": 0.6909368634223938, + "learning_rate": 2.1392620518614235e-05, + "loss": 2.4528, + "step": 15781 + }, + { + "epoch": 1.2736663707529658, + "grad_norm": 0.7170675992965698, + "learning_rate": 2.1382863101054107e-05, + "loss": 2.4214, + "step": 15782 + }, + { + "epoch": 1.273747074489549, + "grad_norm": 0.6992846727371216, + "learning_rate": 2.1373107642838497e-05, + "loss": 2.4397, + "step": 15783 + }, + { + "epoch": 1.2738277782261318, + "grad_norm": 0.7245237231254578, + "learning_rate": 2.1363354144210578e-05, + "loss": 2.373, + "step": 15784 + }, + { + "epoch": 1.273908481962715, + "grad_norm": 0.6929232478141785, + "learning_rate": 2.1353602605413435e-05, + "loss": 2.4297, + "step": 15785 + }, + { + "epoch": 1.273989185699298, + "grad_norm": 0.7243950366973877, + "learning_rate": 2.134385302669013e-05, + "loss": 2.3856, + "step": 15786 + }, + { + "epoch": 1.2740698894358808, + "grad_norm": 0.6712679266929626, + "learning_rate": 2.133410540828359e-05, + "loss": 2.3818, + "step": 15787 + }, + { + "epoch": 1.274150593172464, + "grad_norm": 0.7433474063873291, + "learning_rate": 2.1324359750436774e-05, + "loss": 2.4148, + "step": 15788 + }, + { + "epoch": 1.274231296909047, + "grad_norm": 0.7225894927978516, + "learning_rate": 2.1314616053392577e-05, + "loss": 2.395, + "step": 15789 + }, + { + "epoch": 1.2743120006456299, + "grad_norm": 0.7026889324188232, + "learning_rate": 2.130487431739383e-05, + "loss": 2.4693, + "step": 15790 + }, + { + "epoch": 1.274392704382213, + "grad_norm": 0.6898565292358398, + "learning_rate": 2.1295134542683325e-05, + "loss": 2.3643, + "step": 15791 + }, + { + "epoch": 1.2744734081187958, + "grad_norm": 0.7212820649147034, + "learning_rate": 2.1285396729503826e-05, + "loss": 2.4178, + "step": 15792 + }, + { + "epoch": 1.274554111855379, + "grad_norm": 0.7149149179458618, + "learning_rate": 2.127566087809798e-05, + "loss": 2.4023, + "step": 15793 + }, + { + "epoch": 1.2746348155919618, + "grad_norm": 0.7039671540260315, + "learning_rate": 2.126592698870846e-05, + "loss": 2.4667, + "step": 15794 + }, + { + "epoch": 1.2747155193285449, + "grad_norm": 0.806849479675293, + "learning_rate": 2.1256195061577877e-05, + "loss": 2.4741, + "step": 15795 + }, + { + "epoch": 1.274796223065128, + "grad_norm": 0.7544776797294617, + "learning_rate": 2.124646509694872e-05, + "loss": 2.4258, + "step": 15796 + }, + { + "epoch": 1.2748769268017108, + "grad_norm": 0.6946810483932495, + "learning_rate": 2.1236737095063518e-05, + "loss": 2.4088, + "step": 15797 + }, + { + "epoch": 1.274957630538294, + "grad_norm": 0.7714219093322754, + "learning_rate": 2.1227011056164714e-05, + "loss": 2.4705, + "step": 15798 + }, + { + "epoch": 1.275038334274877, + "grad_norm": 0.6789658665657043, + "learning_rate": 2.121728698049471e-05, + "loss": 2.4692, + "step": 15799 + }, + { + "epoch": 1.2751190380114599, + "grad_norm": 0.7003477215766907, + "learning_rate": 2.120756486829586e-05, + "loss": 2.4437, + "step": 15800 + }, + { + "epoch": 1.275199741748043, + "grad_norm": 0.6802948117256165, + "learning_rate": 2.1197844719810455e-05, + "loss": 2.4002, + "step": 15801 + }, + { + "epoch": 1.275280445484626, + "grad_norm": 0.67823326587677, + "learning_rate": 2.1188126535280773e-05, + "loss": 2.5119, + "step": 15802 + }, + { + "epoch": 1.275361149221209, + "grad_norm": 0.6580843925476074, + "learning_rate": 2.1178410314948972e-05, + "loss": 2.3814, + "step": 15803 + }, + { + "epoch": 1.275441852957792, + "grad_norm": 0.681642472743988, + "learning_rate": 2.1168696059057226e-05, + "loss": 2.4206, + "step": 15804 + }, + { + "epoch": 1.275522556694375, + "grad_norm": 0.7483543753623962, + "learning_rate": 2.1158983767847674e-05, + "loss": 2.4633, + "step": 15805 + }, + { + "epoch": 1.275603260430958, + "grad_norm": 0.6565235257148743, + "learning_rate": 2.11492734415623e-05, + "loss": 2.4145, + "step": 15806 + }, + { + "epoch": 1.275683964167541, + "grad_norm": 0.6606764793395996, + "learning_rate": 2.1139565080443157e-05, + "loss": 2.3935, + "step": 15807 + }, + { + "epoch": 1.275764667904124, + "grad_norm": 0.7915800213813782, + "learning_rate": 2.1129858684732206e-05, + "loss": 2.4288, + "step": 15808 + }, + { + "epoch": 1.275845371640707, + "grad_norm": 0.6763594746589661, + "learning_rate": 2.112015425467133e-05, + "loss": 2.4147, + "step": 15809 + }, + { + "epoch": 1.2759260753772899, + "grad_norm": 0.6886053085327148, + "learning_rate": 2.1110451790502405e-05, + "loss": 2.3798, + "step": 15810 + }, + { + "epoch": 1.276006779113873, + "grad_norm": 0.686122715473175, + "learning_rate": 2.110075129246728e-05, + "loss": 2.3896, + "step": 15811 + }, + { + "epoch": 1.276087482850456, + "grad_norm": 0.6989614367485046, + "learning_rate": 2.109105276080764e-05, + "loss": 2.4533, + "step": 15812 + }, + { + "epoch": 1.276168186587039, + "grad_norm": 0.6818450689315796, + "learning_rate": 2.1081356195765232e-05, + "loss": 2.4012, + "step": 15813 + }, + { + "epoch": 1.276248890323622, + "grad_norm": 0.7492663860321045, + "learning_rate": 2.107166159758176e-05, + "loss": 2.4269, + "step": 15814 + }, + { + "epoch": 1.276329594060205, + "grad_norm": 0.6752359867095947, + "learning_rate": 2.1061968966498767e-05, + "loss": 2.4478, + "step": 15815 + }, + { + "epoch": 1.276410297796788, + "grad_norm": 0.6784162521362305, + "learning_rate": 2.1052278302757854e-05, + "loss": 2.4853, + "step": 15816 + }, + { + "epoch": 1.276491001533371, + "grad_norm": 0.7273215651512146, + "learning_rate": 2.104258960660055e-05, + "loss": 2.4365, + "step": 15817 + }, + { + "epoch": 1.2765717052699541, + "grad_norm": 0.7021621465682983, + "learning_rate": 2.1032902878268323e-05, + "loss": 2.4665, + "step": 15818 + }, + { + "epoch": 1.276652409006537, + "grad_norm": 0.666828989982605, + "learning_rate": 2.102321811800253e-05, + "loss": 2.3922, + "step": 15819 + }, + { + "epoch": 1.27673311274312, + "grad_norm": 0.6780487298965454, + "learning_rate": 2.1013535326044608e-05, + "loss": 2.4072, + "step": 15820 + }, + { + "epoch": 1.276813816479703, + "grad_norm": 0.6474688053131104, + "learning_rate": 2.1003854502635888e-05, + "loss": 2.4145, + "step": 15821 + }, + { + "epoch": 1.276894520216286, + "grad_norm": 0.6712753772735596, + "learning_rate": 2.0994175648017587e-05, + "loss": 2.4349, + "step": 15822 + }, + { + "epoch": 1.2769752239528689, + "grad_norm": 0.6705189943313599, + "learning_rate": 2.098449876243096e-05, + "loss": 2.4376, + "step": 15823 + }, + { + "epoch": 1.277055927689452, + "grad_norm": 0.6794685125350952, + "learning_rate": 2.0974823846117197e-05, + "loss": 2.3717, + "step": 15824 + }, + { + "epoch": 1.277136631426035, + "grad_norm": 0.7145677804946899, + "learning_rate": 2.0965150899317364e-05, + "loss": 2.3829, + "step": 15825 + }, + { + "epoch": 1.277217335162618, + "grad_norm": 0.7043245434761047, + "learning_rate": 2.095547992227257e-05, + "loss": 2.405, + "step": 15826 + }, + { + "epoch": 1.277298038899201, + "grad_norm": 0.7969205379486084, + "learning_rate": 2.0945810915223873e-05, + "loss": 2.4115, + "step": 15827 + }, + { + "epoch": 1.277378742635784, + "grad_norm": 0.657482385635376, + "learning_rate": 2.0936143878412186e-05, + "loss": 2.372, + "step": 15828 + }, + { + "epoch": 1.277459446372367, + "grad_norm": 0.7315167784690857, + "learning_rate": 2.0926478812078466e-05, + "loss": 2.4372, + "step": 15829 + }, + { + "epoch": 1.27754015010895, + "grad_norm": 0.6985061764717102, + "learning_rate": 2.09168157164636e-05, + "loss": 2.3901, + "step": 15830 + }, + { + "epoch": 1.2776208538455331, + "grad_norm": 0.6906184554100037, + "learning_rate": 2.0907154591808408e-05, + "loss": 2.4562, + "step": 15831 + }, + { + "epoch": 1.277701557582116, + "grad_norm": 0.655094563961029, + "learning_rate": 2.0897495438353676e-05, + "loss": 2.451, + "step": 15832 + }, + { + "epoch": 1.277782261318699, + "grad_norm": 0.7663134932518005, + "learning_rate": 2.0887838256340143e-05, + "loss": 2.4634, + "step": 15833 + }, + { + "epoch": 1.2778629650552822, + "grad_norm": 0.7164491415023804, + "learning_rate": 2.087818304600849e-05, + "loss": 2.4624, + "step": 15834 + }, + { + "epoch": 1.277943668791865, + "grad_norm": 0.6962822079658508, + "learning_rate": 2.0868529807599336e-05, + "loss": 2.4325, + "step": 15835 + }, + { + "epoch": 1.2780243725284481, + "grad_norm": 0.702985405921936, + "learning_rate": 2.0858878541353255e-05, + "loss": 2.4219, + "step": 15836 + }, + { + "epoch": 1.278105076265031, + "grad_norm": 0.7605595588684082, + "learning_rate": 2.0849229247510826e-05, + "loss": 2.4201, + "step": 15837 + }, + { + "epoch": 1.278185780001614, + "grad_norm": 0.8479344248771667, + "learning_rate": 2.083958192631249e-05, + "loss": 2.4689, + "step": 15838 + }, + { + "epoch": 1.278266483738197, + "grad_norm": 0.7241235375404358, + "learning_rate": 2.082993657799869e-05, + "loss": 2.4861, + "step": 15839 + }, + { + "epoch": 1.27834718747478, + "grad_norm": 0.7069835066795349, + "learning_rate": 2.0820293202809827e-05, + "loss": 2.3759, + "step": 15840 + }, + { + "epoch": 1.2784278912113631, + "grad_norm": 0.6606370210647583, + "learning_rate": 2.0810651800986237e-05, + "loss": 2.4444, + "step": 15841 + }, + { + "epoch": 1.278508594947946, + "grad_norm": 0.6608174443244934, + "learning_rate": 2.08010123727682e-05, + "loss": 2.4339, + "step": 15842 + }, + { + "epoch": 1.278589298684529, + "grad_norm": 0.751000702381134, + "learning_rate": 2.0791374918396e-05, + "loss": 2.4327, + "step": 15843 + }, + { + "epoch": 1.2786700024211122, + "grad_norm": 0.7223808765411377, + "learning_rate": 2.0781739438109748e-05, + "loss": 2.3573, + "step": 15844 + }, + { + "epoch": 1.278750706157695, + "grad_norm": 0.6872109770774841, + "learning_rate": 2.0772105932149642e-05, + "loss": 2.3973, + "step": 15845 + }, + { + "epoch": 1.2788314098942781, + "grad_norm": 0.6967385411262512, + "learning_rate": 2.0762474400755762e-05, + "loss": 2.4622, + "step": 15846 + }, + { + "epoch": 1.2789121136308612, + "grad_norm": 0.7289159893989563, + "learning_rate": 2.0752844844168163e-05, + "loss": 2.4507, + "step": 15847 + }, + { + "epoch": 1.278992817367444, + "grad_norm": 0.7735978364944458, + "learning_rate": 2.0743217262626802e-05, + "loss": 2.4341, + "step": 15848 + }, + { + "epoch": 1.2790735211040272, + "grad_norm": 0.7209177017211914, + "learning_rate": 2.0733591656371655e-05, + "loss": 2.4024, + "step": 15849 + }, + { + "epoch": 1.2791542248406103, + "grad_norm": 0.6789259314537048, + "learning_rate": 2.0723968025642604e-05, + "loss": 2.3809, + "step": 15850 + }, + { + "epoch": 1.2792349285771931, + "grad_norm": 0.6972812414169312, + "learning_rate": 2.0714346370679495e-05, + "loss": 2.3986, + "step": 15851 + }, + { + "epoch": 1.2793156323137762, + "grad_norm": 0.7144166827201843, + "learning_rate": 2.070472669172213e-05, + "loss": 2.4241, + "step": 15852 + }, + { + "epoch": 1.279396336050359, + "grad_norm": 0.7325223088264465, + "learning_rate": 2.0695108989010282e-05, + "loss": 2.452, + "step": 15853 + }, + { + "epoch": 1.2794770397869422, + "grad_norm": 0.6900116205215454, + "learning_rate": 2.0685493262783608e-05, + "loss": 2.4091, + "step": 15854 + }, + { + "epoch": 1.279557743523525, + "grad_norm": 0.6846197843551636, + "learning_rate": 2.0675879513281758e-05, + "loss": 2.4337, + "step": 15855 + }, + { + "epoch": 1.2796384472601081, + "grad_norm": 0.6901541352272034, + "learning_rate": 2.0666267740744372e-05, + "loss": 2.4586, + "step": 15856 + }, + { + "epoch": 1.2797191509966912, + "grad_norm": 0.6842665672302246, + "learning_rate": 2.0656657945410953e-05, + "loss": 2.4383, + "step": 15857 + }, + { + "epoch": 1.279799854733274, + "grad_norm": 0.7450493574142456, + "learning_rate": 2.0647050127521028e-05, + "loss": 2.4308, + "step": 15858 + }, + { + "epoch": 1.2798805584698572, + "grad_norm": 0.6928436160087585, + "learning_rate": 2.0637444287314033e-05, + "loss": 2.4726, + "step": 15859 + }, + { + "epoch": 1.2799612622064402, + "grad_norm": 0.6539968252182007, + "learning_rate": 2.06278404250294e-05, + "loss": 2.3983, + "step": 15860 + }, + { + "epoch": 1.280041965943023, + "grad_norm": 0.7183163166046143, + "learning_rate": 2.0618238540906444e-05, + "loss": 2.4172, + "step": 15861 + }, + { + "epoch": 1.2801226696796062, + "grad_norm": 0.7070814371109009, + "learning_rate": 2.0608638635184507e-05, + "loss": 2.4018, + "step": 15862 + }, + { + "epoch": 1.2802033734161893, + "grad_norm": 0.7589142918586731, + "learning_rate": 2.0599040708102847e-05, + "loss": 2.4175, + "step": 15863 + }, + { + "epoch": 1.2802840771527721, + "grad_norm": 0.6945414543151855, + "learning_rate": 2.0589444759900613e-05, + "loss": 2.4093, + "step": 15864 + }, + { + "epoch": 1.2803647808893552, + "grad_norm": 0.685482919216156, + "learning_rate": 2.0579850790817003e-05, + "loss": 2.4388, + "step": 15865 + }, + { + "epoch": 1.280445484625938, + "grad_norm": 0.7089706063270569, + "learning_rate": 2.0570258801091148e-05, + "loss": 2.3779, + "step": 15866 + }, + { + "epoch": 1.2805261883625212, + "grad_norm": 0.6994217038154602, + "learning_rate": 2.0560668790962046e-05, + "loss": 2.3757, + "step": 15867 + }, + { + "epoch": 1.280606892099104, + "grad_norm": 0.7170232534408569, + "learning_rate": 2.055108076066874e-05, + "loss": 2.4087, + "step": 15868 + }, + { + "epoch": 1.2806875958356871, + "grad_norm": 0.7008751034736633, + "learning_rate": 2.0541494710450206e-05, + "loss": 2.4384, + "step": 15869 + }, + { + "epoch": 1.2807682995722702, + "grad_norm": 0.6795800924301147, + "learning_rate": 2.053191064054527e-05, + "loss": 2.415, + "step": 15870 + }, + { + "epoch": 1.280849003308853, + "grad_norm": 0.6650210022926331, + "learning_rate": 2.0522328551192882e-05, + "loss": 2.4421, + "step": 15871 + }, + { + "epoch": 1.2809297070454362, + "grad_norm": 0.7045374512672424, + "learning_rate": 2.0512748442631858e-05, + "loss": 2.4285, + "step": 15872 + }, + { + "epoch": 1.2810104107820193, + "grad_norm": 0.6585350632667542, + "learning_rate": 2.0503170315100883e-05, + "loss": 2.3806, + "step": 15873 + }, + { + "epoch": 1.2810911145186021, + "grad_norm": 0.7833496332168579, + "learning_rate": 2.0493594168838725e-05, + "loss": 2.4557, + "step": 15874 + }, + { + "epoch": 1.2811718182551852, + "grad_norm": 0.7237457036972046, + "learning_rate": 2.0484020004084048e-05, + "loss": 2.3966, + "step": 15875 + }, + { + "epoch": 1.2812525219917683, + "grad_norm": 0.7416609525680542, + "learning_rate": 2.0474447821075426e-05, + "loss": 2.3729, + "step": 15876 + }, + { + "epoch": 1.2813332257283512, + "grad_norm": 0.7148095369338989, + "learning_rate": 2.046487762005146e-05, + "loss": 2.4163, + "step": 15877 + }, + { + "epoch": 1.2814139294649343, + "grad_norm": 0.670281171798706, + "learning_rate": 2.0455309401250632e-05, + "loss": 2.383, + "step": 15878 + }, + { + "epoch": 1.2814946332015174, + "grad_norm": 0.6968950629234314, + "learning_rate": 2.0445743164911457e-05, + "loss": 2.3967, + "step": 15879 + }, + { + "epoch": 1.2815753369381002, + "grad_norm": 0.783441960811615, + "learning_rate": 2.0436178911272298e-05, + "loss": 2.455, + "step": 15880 + }, + { + "epoch": 1.2816560406746833, + "grad_norm": 0.709032416343689, + "learning_rate": 2.0426616640571518e-05, + "loss": 2.4207, + "step": 15881 + }, + { + "epoch": 1.2817367444112662, + "grad_norm": 0.6727990508079529, + "learning_rate": 2.0417056353047504e-05, + "loss": 2.4115, + "step": 15882 + }, + { + "epoch": 1.2818174481478493, + "grad_norm": 0.7336034774780273, + "learning_rate": 2.0407498048938445e-05, + "loss": 2.43, + "step": 15883 + }, + { + "epoch": 1.2818981518844321, + "grad_norm": 0.7649042010307312, + "learning_rate": 2.0397941728482604e-05, + "loss": 2.4655, + "step": 15884 + }, + { + "epoch": 1.2819788556210152, + "grad_norm": 0.7218052744865417, + "learning_rate": 2.038838739191816e-05, + "loss": 2.4872, + "step": 15885 + }, + { + "epoch": 1.2820595593575983, + "grad_norm": 0.7192350625991821, + "learning_rate": 2.0378835039483178e-05, + "loss": 2.4751, + "step": 15886 + }, + { + "epoch": 1.2821402630941812, + "grad_norm": 0.7059212923049927, + "learning_rate": 2.0369284671415768e-05, + "loss": 2.43, + "step": 15887 + }, + { + "epoch": 1.2822209668307643, + "grad_norm": 0.7387098073959351, + "learning_rate": 2.0359736287953956e-05, + "loss": 2.4281, + "step": 15888 + }, + { + "epoch": 1.2823016705673473, + "grad_norm": 0.7454321980476379, + "learning_rate": 2.035018988933568e-05, + "loss": 2.4372, + "step": 15889 + }, + { + "epoch": 1.2823823743039302, + "grad_norm": 0.6822765469551086, + "learning_rate": 2.034064547579888e-05, + "loss": 2.3728, + "step": 15890 + }, + { + "epoch": 1.2824630780405133, + "grad_norm": 0.6917527914047241, + "learning_rate": 2.0331103047581412e-05, + "loss": 2.3997, + "step": 15891 + }, + { + "epoch": 1.2825437817770964, + "grad_norm": 0.6734376549720764, + "learning_rate": 2.032156260492113e-05, + "loss": 2.4495, + "step": 15892 + }, + { + "epoch": 1.2826244855136792, + "grad_norm": 0.7222443222999573, + "learning_rate": 2.0312024148055776e-05, + "loss": 2.3466, + "step": 15893 + }, + { + "epoch": 1.2827051892502623, + "grad_norm": 0.703714907169342, + "learning_rate": 2.030248767722309e-05, + "loss": 2.4599, + "step": 15894 + }, + { + "epoch": 1.2827858929868454, + "grad_norm": 0.655161440372467, + "learning_rate": 2.029295319266078e-05, + "loss": 2.3896, + "step": 15895 + }, + { + "epoch": 1.2828665967234283, + "grad_norm": 0.6449242234230042, + "learning_rate": 2.028342069460639e-05, + "loss": 2.3511, + "step": 15896 + }, + { + "epoch": 1.2829473004600114, + "grad_norm": 0.6578382849693298, + "learning_rate": 2.027389018329755e-05, + "loss": 2.3678, + "step": 15897 + }, + { + "epoch": 1.2830280041965942, + "grad_norm": 0.7047572731971741, + "learning_rate": 2.0264361658971797e-05, + "loss": 2.4522, + "step": 15898 + }, + { + "epoch": 1.2831087079331773, + "grad_norm": 0.7310267090797424, + "learning_rate": 2.0254835121866554e-05, + "loss": 2.4117, + "step": 15899 + }, + { + "epoch": 1.2831894116697602, + "grad_norm": 0.7020776867866516, + "learning_rate": 2.024531057221927e-05, + "loss": 2.4033, + "step": 15900 + }, + { + "epoch": 1.2832701154063433, + "grad_norm": 0.6967746615409851, + "learning_rate": 2.023578801026733e-05, + "loss": 2.3491, + "step": 15901 + }, + { + "epoch": 1.2833508191429264, + "grad_norm": 0.7062339782714844, + "learning_rate": 2.022626743624807e-05, + "loss": 2.4598, + "step": 15902 + }, + { + "epoch": 1.2834315228795092, + "grad_norm": 0.730625331401825, + "learning_rate": 2.0216748850398748e-05, + "loss": 2.4995, + "step": 15903 + }, + { + "epoch": 1.2835122266160923, + "grad_norm": 0.6634403467178345, + "learning_rate": 2.020723225295662e-05, + "loss": 2.3843, + "step": 15904 + }, + { + "epoch": 1.2835929303526754, + "grad_norm": 0.6924816966056824, + "learning_rate": 2.019771764415883e-05, + "loss": 2.4258, + "step": 15905 + }, + { + "epoch": 1.2836736340892583, + "grad_norm": 0.7127227187156677, + "learning_rate": 2.018820502424251e-05, + "loss": 2.4038, + "step": 15906 + }, + { + "epoch": 1.2837543378258414, + "grad_norm": 0.7108431458473206, + "learning_rate": 2.0178694393444785e-05, + "loss": 2.4571, + "step": 15907 + }, + { + "epoch": 1.2838350415624245, + "grad_norm": 0.7478229999542236, + "learning_rate": 2.016918575200262e-05, + "loss": 2.4526, + "step": 15908 + }, + { + "epoch": 1.2839157452990073, + "grad_norm": 0.65651935338974, + "learning_rate": 2.015967910015303e-05, + "loss": 2.434, + "step": 15909 + }, + { + "epoch": 1.2839964490355904, + "grad_norm": 0.7285312414169312, + "learning_rate": 2.015017443813294e-05, + "loss": 2.3857, + "step": 15910 + }, + { + "epoch": 1.2840771527721733, + "grad_norm": 0.6947231292724609, + "learning_rate": 2.014067176617923e-05, + "loss": 2.4294, + "step": 15911 + }, + { + "epoch": 1.2841578565087564, + "grad_norm": 0.6965867877006531, + "learning_rate": 2.0131171084528744e-05, + "loss": 2.4514, + "step": 15912 + }, + { + "epoch": 1.2842385602453392, + "grad_norm": 0.6962311863899231, + "learning_rate": 2.0121672393418246e-05, + "loss": 2.4391, + "step": 15913 + }, + { + "epoch": 1.2843192639819223, + "grad_norm": 0.6687992215156555, + "learning_rate": 2.01121756930845e-05, + "loss": 2.4266, + "step": 15914 + }, + { + "epoch": 1.2843999677185054, + "grad_norm": 0.7118954658508301, + "learning_rate": 2.0102680983764145e-05, + "loss": 2.3436, + "step": 15915 + }, + { + "epoch": 1.2844806714550883, + "grad_norm": 0.6866199970245361, + "learning_rate": 2.009318826569382e-05, + "loss": 2.3719, + "step": 15916 + }, + { + "epoch": 1.2845613751916714, + "grad_norm": 0.6701404452323914, + "learning_rate": 2.008369753911016e-05, + "loss": 2.4875, + "step": 15917 + }, + { + "epoch": 1.2846420789282544, + "grad_norm": 0.7020917534828186, + "learning_rate": 2.007420880424963e-05, + "loss": 2.3871, + "step": 15918 + }, + { + "epoch": 1.2847227826648373, + "grad_norm": 0.6865704655647278, + "learning_rate": 2.006472206134875e-05, + "loss": 2.3815, + "step": 15919 + }, + { + "epoch": 1.2848034864014204, + "grad_norm": 0.7106871008872986, + "learning_rate": 2.0055237310643948e-05, + "loss": 2.4276, + "step": 15920 + }, + { + "epoch": 1.2848841901380035, + "grad_norm": 0.6891976594924927, + "learning_rate": 2.004575455237161e-05, + "loss": 2.3641, + "step": 15921 + }, + { + "epoch": 1.2849648938745863, + "grad_norm": 0.6385056972503662, + "learning_rate": 2.0036273786768067e-05, + "loss": 2.3898, + "step": 15922 + }, + { + "epoch": 1.2850455976111694, + "grad_norm": 0.7038321495056152, + "learning_rate": 2.0026795014069633e-05, + "loss": 2.4688, + "step": 15923 + }, + { + "epoch": 1.2851263013477525, + "grad_norm": 0.6310208439826965, + "learning_rate": 2.0017318234512494e-05, + "loss": 2.3821, + "step": 15924 + }, + { + "epoch": 1.2852070050843354, + "grad_norm": 0.6989426016807556, + "learning_rate": 2.0007843448332865e-05, + "loss": 2.434, + "step": 15925 + }, + { + "epoch": 1.2852877088209185, + "grad_norm": 0.6666426658630371, + "learning_rate": 1.9998370655766886e-05, + "loss": 2.4687, + "step": 15926 + }, + { + "epoch": 1.2853684125575013, + "grad_norm": 0.6421633958816528, + "learning_rate": 1.9988899857050648e-05, + "loss": 2.4269, + "step": 15927 + }, + { + "epoch": 1.2854491162940844, + "grad_norm": 0.7229343056678772, + "learning_rate": 1.997943105242016e-05, + "loss": 2.4139, + "step": 15928 + }, + { + "epoch": 1.2855298200306673, + "grad_norm": 0.7168964743614197, + "learning_rate": 1.9969964242111427e-05, + "loss": 2.405, + "step": 15929 + }, + { + "epoch": 1.2856105237672504, + "grad_norm": 0.6824480891227722, + "learning_rate": 1.99604994263604e-05, + "loss": 2.3955, + "step": 15930 + }, + { + "epoch": 1.2856912275038335, + "grad_norm": 0.670956552028656, + "learning_rate": 1.995103660540294e-05, + "loss": 2.3743, + "step": 15931 + }, + { + "epoch": 1.2857719312404163, + "grad_norm": 0.7057971954345703, + "learning_rate": 1.9941575779474864e-05, + "loss": 2.4496, + "step": 15932 + }, + { + "epoch": 1.2858526349769994, + "grad_norm": 0.7802264094352722, + "learning_rate": 1.9932116948812052e-05, + "loss": 2.4231, + "step": 15933 + }, + { + "epoch": 1.2859333387135825, + "grad_norm": 0.7151160836219788, + "learning_rate": 1.992266011365016e-05, + "loss": 2.4319, + "step": 15934 + }, + { + "epoch": 1.2860140424501654, + "grad_norm": 0.7078769207000732, + "learning_rate": 1.991320527422489e-05, + "loss": 2.4037, + "step": 15935 + }, + { + "epoch": 1.2860947461867485, + "grad_norm": 0.7483938336372375, + "learning_rate": 1.9903752430771927e-05, + "loss": 2.4946, + "step": 15936 + }, + { + "epoch": 1.2861754499233315, + "grad_norm": 0.7774620056152344, + "learning_rate": 1.9894301583526808e-05, + "loss": 2.4536, + "step": 15937 + }, + { + "epoch": 1.2862561536599144, + "grad_norm": 0.7311348915100098, + "learning_rate": 1.988485273272509e-05, + "loss": 2.4178, + "step": 15938 + }, + { + "epoch": 1.2863368573964975, + "grad_norm": 0.6821309328079224, + "learning_rate": 1.9875405878602282e-05, + "loss": 2.4851, + "step": 15939 + }, + { + "epoch": 1.2864175611330806, + "grad_norm": 0.7081651091575623, + "learning_rate": 1.9865961021393785e-05, + "loss": 2.4377, + "step": 15940 + }, + { + "epoch": 1.2864982648696635, + "grad_norm": 0.8093439340591431, + "learning_rate": 1.9856518161335014e-05, + "loss": 2.4681, + "step": 15941 + }, + { + "epoch": 1.2865789686062465, + "grad_norm": 0.6769521832466125, + "learning_rate": 1.984707729866131e-05, + "loss": 2.4231, + "step": 15942 + }, + { + "epoch": 1.2866596723428294, + "grad_norm": 0.6973356604576111, + "learning_rate": 1.983763843360795e-05, + "loss": 2.4144, + "step": 15943 + }, + { + "epoch": 1.2867403760794125, + "grad_norm": 0.7814682722091675, + "learning_rate": 1.9828201566410197e-05, + "loss": 2.3935, + "step": 15944 + }, + { + "epoch": 1.2868210798159954, + "grad_norm": 0.7545498609542847, + "learning_rate": 1.9818766697303236e-05, + "loss": 2.4136, + "step": 15945 + }, + { + "epoch": 1.2869017835525784, + "grad_norm": 0.7165581583976746, + "learning_rate": 1.9809333826522225e-05, + "loss": 2.3757, + "step": 15946 + }, + { + "epoch": 1.2869824872891615, + "grad_norm": 0.6812456846237183, + "learning_rate": 1.9799902954302208e-05, + "loss": 2.4143, + "step": 15947 + }, + { + "epoch": 1.2870631910257444, + "grad_norm": 0.7231366634368896, + "learning_rate": 1.9790474080878262e-05, + "loss": 2.4837, + "step": 15948 + }, + { + "epoch": 1.2871438947623275, + "grad_norm": 0.690916121006012, + "learning_rate": 1.9781047206485393e-05, + "loss": 2.4513, + "step": 15949 + }, + { + "epoch": 1.2872245984989106, + "grad_norm": 0.6608129143714905, + "learning_rate": 1.9771622331358485e-05, + "loss": 2.3908, + "step": 15950 + }, + { + "epoch": 1.2873053022354934, + "grad_norm": 0.7194501161575317, + "learning_rate": 1.976219945573249e-05, + "loss": 2.38, + "step": 15951 + }, + { + "epoch": 1.2873860059720765, + "grad_norm": 0.7315083146095276, + "learning_rate": 1.9752778579842213e-05, + "loss": 2.4351, + "step": 15952 + }, + { + "epoch": 1.2874667097086596, + "grad_norm": 0.7313492298126221, + "learning_rate": 1.974335970392246e-05, + "loss": 2.3531, + "step": 15953 + }, + { + "epoch": 1.2875474134452425, + "grad_norm": 0.6982418894767761, + "learning_rate": 1.9733942828207985e-05, + "loss": 2.4319, + "step": 15954 + }, + { + "epoch": 1.2876281171818256, + "grad_norm": 0.6664792895317078, + "learning_rate": 1.972452795293347e-05, + "loss": 2.3981, + "step": 15955 + }, + { + "epoch": 1.2877088209184087, + "grad_norm": 0.6849696040153503, + "learning_rate": 1.9715115078333578e-05, + "loss": 2.3952, + "step": 15956 + }, + { + "epoch": 1.2877895246549915, + "grad_norm": 0.7355225086212158, + "learning_rate": 1.9705704204642873e-05, + "loss": 2.4556, + "step": 15957 + }, + { + "epoch": 1.2878702283915746, + "grad_norm": 0.6850876808166504, + "learning_rate": 1.9696295332095906e-05, + "loss": 2.3873, + "step": 15958 + }, + { + "epoch": 1.2879509321281575, + "grad_norm": 0.6449069976806641, + "learning_rate": 1.9686888460927198e-05, + "loss": 2.4226, + "step": 15959 + }, + { + "epoch": 1.2880316358647406, + "grad_norm": 0.7517794966697693, + "learning_rate": 1.967748359137114e-05, + "loss": 2.377, + "step": 15960 + }, + { + "epoch": 1.2881123396013234, + "grad_norm": 0.6861303448677063, + "learning_rate": 1.9668080723662162e-05, + "loss": 2.4451, + "step": 15961 + }, + { + "epoch": 1.2881930433379065, + "grad_norm": 0.7025154829025269, + "learning_rate": 1.9658679858034602e-05, + "loss": 2.3856, + "step": 15962 + }, + { + "epoch": 1.2882737470744896, + "grad_norm": 0.6775577068328857, + "learning_rate": 1.964928099472275e-05, + "loss": 2.4383, + "step": 15963 + }, + { + "epoch": 1.2883544508110725, + "grad_norm": 0.6889605522155762, + "learning_rate": 1.963988413396086e-05, + "loss": 2.3766, + "step": 15964 + }, + { + "epoch": 1.2884351545476556, + "grad_norm": 0.6697166562080383, + "learning_rate": 1.9630489275983156e-05, + "loss": 2.44, + "step": 15965 + }, + { + "epoch": 1.2885158582842386, + "grad_norm": 0.6895437836647034, + "learning_rate": 1.96210964210237e-05, + "loss": 2.4242, + "step": 15966 + }, + { + "epoch": 1.2885965620208215, + "grad_norm": 0.6955164670944214, + "learning_rate": 1.9611705569316652e-05, + "loss": 2.3915, + "step": 15967 + }, + { + "epoch": 1.2886772657574046, + "grad_norm": 0.7133461236953735, + "learning_rate": 1.960231672109605e-05, + "loss": 2.4307, + "step": 15968 + }, + { + "epoch": 1.2887579694939877, + "grad_norm": 0.6874761581420898, + "learning_rate": 1.9592929876595857e-05, + "loss": 2.4371, + "step": 15969 + }, + { + "epoch": 1.2888386732305706, + "grad_norm": 0.7168406248092651, + "learning_rate": 1.9583545036050044e-05, + "loss": 2.4681, + "step": 15970 + }, + { + "epoch": 1.2889193769671536, + "grad_norm": 0.701874852180481, + "learning_rate": 1.9574162199692492e-05, + "loss": 2.4746, + "step": 15971 + }, + { + "epoch": 1.2890000807037365, + "grad_norm": 0.7118390202522278, + "learning_rate": 1.9564781367757058e-05, + "loss": 2.4139, + "step": 15972 + }, + { + "epoch": 1.2890807844403196, + "grad_norm": 0.6597239971160889, + "learning_rate": 1.955540254047753e-05, + "loss": 2.4346, + "step": 15973 + }, + { + "epoch": 1.2891614881769025, + "grad_norm": 0.7461068630218506, + "learning_rate": 1.9546025718087645e-05, + "loss": 2.4331, + "step": 15974 + }, + { + "epoch": 1.2892421919134855, + "grad_norm": 0.6992977857589722, + "learning_rate": 1.953665090082115e-05, + "loss": 2.424, + "step": 15975 + }, + { + "epoch": 1.2893228956500686, + "grad_norm": 0.6674031615257263, + "learning_rate": 1.9527278088911617e-05, + "loss": 2.4545, + "step": 15976 + }, + { + "epoch": 1.2894035993866515, + "grad_norm": 0.7377402782440186, + "learning_rate": 1.9517907282592662e-05, + "loss": 2.4625, + "step": 15977 + }, + { + "epoch": 1.2894843031232346, + "grad_norm": 0.720579206943512, + "learning_rate": 1.950853848209788e-05, + "loss": 2.4073, + "step": 15978 + }, + { + "epoch": 1.2895650068598177, + "grad_norm": 0.7221893668174744, + "learning_rate": 1.9499171687660688e-05, + "loss": 2.4056, + "step": 15979 + }, + { + "epoch": 1.2896457105964005, + "grad_norm": 0.7409725189208984, + "learning_rate": 1.9489806899514574e-05, + "loss": 2.3899, + "step": 15980 + }, + { + "epoch": 1.2897264143329836, + "grad_norm": 0.6946583986282349, + "learning_rate": 1.948044411789296e-05, + "loss": 2.4832, + "step": 15981 + }, + { + "epoch": 1.2898071180695667, + "grad_norm": 0.7031306028366089, + "learning_rate": 1.9471083343029096e-05, + "loss": 2.4265, + "step": 15982 + }, + { + "epoch": 1.2898878218061496, + "grad_norm": 0.660093367099762, + "learning_rate": 1.946172457515637e-05, + "loss": 2.4883, + "step": 15983 + }, + { + "epoch": 1.2899685255427327, + "grad_norm": 0.700641930103302, + "learning_rate": 1.945236781450802e-05, + "loss": 2.4096, + "step": 15984 + }, + { + "epoch": 1.2900492292793158, + "grad_norm": 0.7350760698318481, + "learning_rate": 1.9443013061317205e-05, + "loss": 2.4161, + "step": 15985 + }, + { + "epoch": 1.2901299330158986, + "grad_norm": 0.7567386031150818, + "learning_rate": 1.9433660315817072e-05, + "loss": 2.3978, + "step": 15986 + }, + { + "epoch": 1.2902106367524817, + "grad_norm": 0.7471369504928589, + "learning_rate": 1.9424309578240717e-05, + "loss": 2.4079, + "step": 15987 + }, + { + "epoch": 1.2902913404890646, + "grad_norm": 0.6630815267562866, + "learning_rate": 1.941496084882124e-05, + "loss": 2.4223, + "step": 15988 + }, + { + "epoch": 1.2903720442256477, + "grad_norm": 0.687224268913269, + "learning_rate": 1.940561412779155e-05, + "loss": 2.4413, + "step": 15989 + }, + { + "epoch": 1.2904527479622305, + "grad_norm": 0.6989685297012329, + "learning_rate": 1.9396269415384637e-05, + "loss": 2.3651, + "step": 15990 + }, + { + "epoch": 1.2905334516988136, + "grad_norm": 0.7256720066070557, + "learning_rate": 1.938692671183342e-05, + "loss": 2.4526, + "step": 15991 + }, + { + "epoch": 1.2906141554353967, + "grad_norm": 0.692032516002655, + "learning_rate": 1.9377586017370685e-05, + "loss": 2.3936, + "step": 15992 + }, + { + "epoch": 1.2906948591719796, + "grad_norm": 0.6733511686325073, + "learning_rate": 1.936824733222925e-05, + "loss": 2.4691, + "step": 15993 + }, + { + "epoch": 1.2907755629085627, + "grad_norm": 0.6698563098907471, + "learning_rate": 1.935891065664187e-05, + "loss": 2.3904, + "step": 15994 + }, + { + "epoch": 1.2908562666451457, + "grad_norm": 0.660521388053894, + "learning_rate": 1.934957599084123e-05, + "loss": 2.4647, + "step": 15995 + }, + { + "epoch": 1.2909369703817286, + "grad_norm": 0.6714615821838379, + "learning_rate": 1.9340243335059982e-05, + "loss": 2.403, + "step": 15996 + }, + { + "epoch": 1.2910176741183117, + "grad_norm": 0.726099967956543, + "learning_rate": 1.9330912689530746e-05, + "loss": 2.4101, + "step": 15997 + }, + { + "epoch": 1.2910983778548948, + "grad_norm": 0.6585896015167236, + "learning_rate": 1.932158405448601e-05, + "loss": 2.3813, + "step": 15998 + }, + { + "epoch": 1.2911790815914777, + "grad_norm": 0.7967908382415771, + "learning_rate": 1.9312257430158286e-05, + "loss": 2.4188, + "step": 15999 + }, + { + "epoch": 1.2912597853280607, + "grad_norm": 0.7340367436408997, + "learning_rate": 1.9302932816780063e-05, + "loss": 2.4642, + "step": 16000 + }, + { + "epoch": 1.2912597853280607, + "eval_loss": 2.3791537284851074, + "eval_runtime": 780.6124, + "eval_samples_per_second": 3.356, + "eval_steps_per_second": 0.56, + "step": 16000 + }, + { + "epoch": 1.2913404890646438, + "grad_norm": 0.6778663992881775, + "learning_rate": 1.929361021458367e-05, + "loss": 2.4057, + "step": 16001 + }, + { + "epoch": 1.2914211928012267, + "grad_norm": 0.6982381343841553, + "learning_rate": 1.9284289623801477e-05, + "loss": 2.4376, + "step": 16002 + }, + { + "epoch": 1.2915018965378098, + "grad_norm": 0.6956612467765808, + "learning_rate": 1.927497104466578e-05, + "loss": 2.4485, + "step": 16003 + }, + { + "epoch": 1.2915826002743926, + "grad_norm": 0.6780211925506592, + "learning_rate": 1.9265654477408825e-05, + "loss": 2.4233, + "step": 16004 + }, + { + "epoch": 1.2916633040109757, + "grad_norm": 0.6869028806686401, + "learning_rate": 1.92563399222628e-05, + "loss": 2.4156, + "step": 16005 + }, + { + "epoch": 1.2917440077475586, + "grad_norm": 0.6402696967124939, + "learning_rate": 1.9247027379459848e-05, + "loss": 2.4208, + "step": 16006 + }, + { + "epoch": 1.2918247114841417, + "grad_norm": 0.6868177652359009, + "learning_rate": 1.92377168492321e-05, + "loss": 2.4067, + "step": 16007 + }, + { + "epoch": 1.2919054152207248, + "grad_norm": 0.7152438759803772, + "learning_rate": 1.922840833181152e-05, + "loss": 2.3944, + "step": 16008 + }, + { + "epoch": 1.2919861189573076, + "grad_norm": 0.6467335820198059, + "learning_rate": 1.921910182743015e-05, + "loss": 2.4064, + "step": 16009 + }, + { + "epoch": 1.2920668226938907, + "grad_norm": 0.6918551325798035, + "learning_rate": 1.9209797336319956e-05, + "loss": 2.4457, + "step": 16010 + }, + { + "epoch": 1.2921475264304738, + "grad_norm": 0.7308588027954102, + "learning_rate": 1.920049485871278e-05, + "loss": 2.3785, + "step": 16011 + }, + { + "epoch": 1.2922282301670567, + "grad_norm": 0.6918718814849854, + "learning_rate": 1.9191194394840472e-05, + "loss": 2.4645, + "step": 16012 + }, + { + "epoch": 1.2923089339036398, + "grad_norm": 0.7048078775405884, + "learning_rate": 1.9181895944934848e-05, + "loss": 2.4082, + "step": 16013 + }, + { + "epoch": 1.2923896376402229, + "grad_norm": 0.7175794839859009, + "learning_rate": 1.917259950922763e-05, + "loss": 2.4521, + "step": 16014 + }, + { + "epoch": 1.2924703413768057, + "grad_norm": 0.6895543932914734, + "learning_rate": 1.916330508795051e-05, + "loss": 2.4058, + "step": 16015 + }, + { + "epoch": 1.2925510451133888, + "grad_norm": 0.6951895952224731, + "learning_rate": 1.9154012681335176e-05, + "loss": 2.4274, + "step": 16016 + }, + { + "epoch": 1.2926317488499717, + "grad_norm": 0.6807428598403931, + "learning_rate": 1.9144722289613148e-05, + "loss": 2.4008, + "step": 16017 + }, + { + "epoch": 1.2927124525865548, + "grad_norm": 0.6643410325050354, + "learning_rate": 1.9135433913015997e-05, + "loss": 2.4036, + "step": 16018 + }, + { + "epoch": 1.2927931563231376, + "grad_norm": 0.7283294796943665, + "learning_rate": 1.912614755177522e-05, + "loss": 2.4118, + "step": 16019 + }, + { + "epoch": 1.2928738600597207, + "grad_norm": 0.7516021132469177, + "learning_rate": 1.911686320612227e-05, + "loss": 2.3983, + "step": 16020 + }, + { + "epoch": 1.2929545637963038, + "grad_norm": 0.7314203381538391, + "learning_rate": 1.91075808762885e-05, + "loss": 2.4352, + "step": 16021 + }, + { + "epoch": 1.2930352675328867, + "grad_norm": 0.6904106736183167, + "learning_rate": 1.9098300562505266e-05, + "loss": 2.3734, + "step": 16022 + }, + { + "epoch": 1.2931159712694698, + "grad_norm": 0.6936709880828857, + "learning_rate": 1.9089022265003863e-05, + "loss": 2.4356, + "step": 16023 + }, + { + "epoch": 1.2931966750060528, + "grad_norm": 0.6753442883491516, + "learning_rate": 1.9079745984015528e-05, + "loss": 2.4713, + "step": 16024 + }, + { + "epoch": 1.2932773787426357, + "grad_norm": 0.7185340523719788, + "learning_rate": 1.9070471719771445e-05, + "loss": 2.4021, + "step": 16025 + }, + { + "epoch": 1.2933580824792188, + "grad_norm": 0.7486871480941772, + "learning_rate": 1.9061199472502798e-05, + "loss": 2.4144, + "step": 16026 + }, + { + "epoch": 1.2934387862158019, + "grad_norm": 0.6790735721588135, + "learning_rate": 1.90519292424406e-05, + "loss": 2.413, + "step": 16027 + }, + { + "epoch": 1.2935194899523847, + "grad_norm": 0.7104402780532837, + "learning_rate": 1.9042661029815922e-05, + "loss": 2.452, + "step": 16028 + }, + { + "epoch": 1.2936001936889678, + "grad_norm": 0.6975364685058594, + "learning_rate": 1.9033394834859796e-05, + "loss": 2.4169, + "step": 16029 + }, + { + "epoch": 1.293680897425551, + "grad_norm": 0.7619667649269104, + "learning_rate": 1.9024130657803085e-05, + "loss": 2.4106, + "step": 16030 + }, + { + "epoch": 1.2937616011621338, + "grad_norm": 0.6600254774093628, + "learning_rate": 1.9014868498876716e-05, + "loss": 2.3955, + "step": 16031 + }, + { + "epoch": 1.2938423048987169, + "grad_norm": 0.6790784597396851, + "learning_rate": 1.9005608358311533e-05, + "loss": 2.437, + "step": 16032 + }, + { + "epoch": 1.2939230086352997, + "grad_norm": 0.7085568308830261, + "learning_rate": 1.899635023633828e-05, + "loss": 2.4729, + "step": 16033 + }, + { + "epoch": 1.2940037123718828, + "grad_norm": 0.6940603256225586, + "learning_rate": 1.8987094133187732e-05, + "loss": 2.4099, + "step": 16034 + }, + { + "epoch": 1.2940844161084657, + "grad_norm": 0.7387171387672424, + "learning_rate": 1.897784004909058e-05, + "loss": 2.4509, + "step": 16035 + }, + { + "epoch": 1.2941651198450488, + "grad_norm": 0.8263981938362122, + "learning_rate": 1.8968587984277463e-05, + "loss": 2.4208, + "step": 16036 + }, + { + "epoch": 1.2942458235816319, + "grad_norm": 0.7393552660942078, + "learning_rate": 1.8959337938978937e-05, + "loss": 2.4458, + "step": 16037 + }, + { + "epoch": 1.2943265273182147, + "grad_norm": 0.652787983417511, + "learning_rate": 1.895008991342555e-05, + "loss": 2.3593, + "step": 16038 + }, + { + "epoch": 1.2944072310547978, + "grad_norm": 0.6533015370368958, + "learning_rate": 1.8940843907847817e-05, + "loss": 2.4538, + "step": 16039 + }, + { + "epoch": 1.294487934791381, + "grad_norm": 0.6723785400390625, + "learning_rate": 1.8931599922476106e-05, + "loss": 2.4528, + "step": 16040 + }, + { + "epoch": 1.2945686385279638, + "grad_norm": 0.693242073059082, + "learning_rate": 1.892235795754085e-05, + "loss": 2.4006, + "step": 16041 + }, + { + "epoch": 1.2946493422645469, + "grad_norm": 0.6849604845046997, + "learning_rate": 1.8913118013272403e-05, + "loss": 2.3758, + "step": 16042 + }, + { + "epoch": 1.29473004600113, + "grad_norm": 0.7252739667892456, + "learning_rate": 1.8903880089900983e-05, + "loss": 2.4101, + "step": 16043 + }, + { + "epoch": 1.2948107497377128, + "grad_norm": 0.720431923866272, + "learning_rate": 1.8894644187656864e-05, + "loss": 2.4241, + "step": 16044 + }, + { + "epoch": 1.294891453474296, + "grad_norm": 0.6936169862747192, + "learning_rate": 1.8885410306770225e-05, + "loss": 2.4225, + "step": 16045 + }, + { + "epoch": 1.294972157210879, + "grad_norm": 0.7698646187782288, + "learning_rate": 1.8876178447471193e-05, + "loss": 2.4031, + "step": 16046 + }, + { + "epoch": 1.2950528609474619, + "grad_norm": 0.6800495982170105, + "learning_rate": 1.8866948609989854e-05, + "loss": 2.3679, + "step": 16047 + }, + { + "epoch": 1.295133564684045, + "grad_norm": 0.7348111867904663, + "learning_rate": 1.8857720794556267e-05, + "loss": 2.4263, + "step": 16048 + }, + { + "epoch": 1.2952142684206278, + "grad_norm": 0.6614782214164734, + "learning_rate": 1.8848495001400356e-05, + "loss": 2.4396, + "step": 16049 + }, + { + "epoch": 1.295294972157211, + "grad_norm": 0.6683650612831116, + "learning_rate": 1.8839271230752075e-05, + "loss": 2.4189, + "step": 16050 + }, + { + "epoch": 1.2953756758937938, + "grad_norm": 0.711040198802948, + "learning_rate": 1.8830049482841328e-05, + "loss": 2.3974, + "step": 16051 + }, + { + "epoch": 1.2954563796303769, + "grad_norm": 0.6663193702697754, + "learning_rate": 1.882082975789795e-05, + "loss": 2.4196, + "step": 16052 + }, + { + "epoch": 1.29553708336696, + "grad_norm": 0.6551210284233093, + "learning_rate": 1.881161205615166e-05, + "loss": 2.3793, + "step": 16053 + }, + { + "epoch": 1.2956177871035428, + "grad_norm": 0.6849039793014526, + "learning_rate": 1.8802396377832243e-05, + "loss": 2.3941, + "step": 16054 + }, + { + "epoch": 1.295698490840126, + "grad_norm": 0.7642949223518372, + "learning_rate": 1.8793182723169357e-05, + "loss": 2.4296, + "step": 16055 + }, + { + "epoch": 1.295779194576709, + "grad_norm": 0.7104716897010803, + "learning_rate": 1.878397109239263e-05, + "loss": 2.4124, + "step": 16056 + }, + { + "epoch": 1.2958598983132918, + "grad_norm": 0.6822344064712524, + "learning_rate": 1.877476148573164e-05, + "loss": 2.4072, + "step": 16057 + }, + { + "epoch": 1.295940602049875, + "grad_norm": 0.6824066042900085, + "learning_rate": 1.8765553903415956e-05, + "loss": 2.4137, + "step": 16058 + }, + { + "epoch": 1.296021305786458, + "grad_norm": 0.7083307504653931, + "learning_rate": 1.875634834567498e-05, + "loss": 2.4423, + "step": 16059 + }, + { + "epoch": 1.2961020095230409, + "grad_norm": 0.7301077246665955, + "learning_rate": 1.874714481273818e-05, + "loss": 2.3926, + "step": 16060 + }, + { + "epoch": 1.296182713259624, + "grad_norm": 0.685656726360321, + "learning_rate": 1.873794330483496e-05, + "loss": 2.4409, + "step": 16061 + }, + { + "epoch": 1.296263416996207, + "grad_norm": 0.6916719675064087, + "learning_rate": 1.8728743822194584e-05, + "loss": 2.4141, + "step": 16062 + }, + { + "epoch": 1.29634412073279, + "grad_norm": 0.7188845276832581, + "learning_rate": 1.871954636504636e-05, + "loss": 2.4186, + "step": 16063 + }, + { + "epoch": 1.2964248244693728, + "grad_norm": 0.6637440919876099, + "learning_rate": 1.8710350933619504e-05, + "loss": 2.4526, + "step": 16064 + }, + { + "epoch": 1.2965055282059559, + "grad_norm": 0.7000349760055542, + "learning_rate": 1.87011575281432e-05, + "loss": 2.4096, + "step": 16065 + }, + { + "epoch": 1.296586231942539, + "grad_norm": 0.693513810634613, + "learning_rate": 1.8691966148846573e-05, + "loss": 2.3931, + "step": 16066 + }, + { + "epoch": 1.2966669356791218, + "grad_norm": 0.6928985118865967, + "learning_rate": 1.8682776795958678e-05, + "loss": 2.4384, + "step": 16067 + }, + { + "epoch": 1.296747639415705, + "grad_norm": 0.6474096179008484, + "learning_rate": 1.8673589469708585e-05, + "loss": 2.3985, + "step": 16068 + }, + { + "epoch": 1.296828343152288, + "grad_norm": 0.6827313899993896, + "learning_rate": 1.866440417032521e-05, + "loss": 2.4607, + "step": 16069 + }, + { + "epoch": 1.2969090468888709, + "grad_norm": 0.7183445692062378, + "learning_rate": 1.8655220898037485e-05, + "loss": 2.4396, + "step": 16070 + }, + { + "epoch": 1.296989750625454, + "grad_norm": 0.6997376680374146, + "learning_rate": 1.8646039653074333e-05, + "loss": 2.4627, + "step": 16071 + }, + { + "epoch": 1.297070454362037, + "grad_norm": 0.7358444333076477, + "learning_rate": 1.8636860435664493e-05, + "loss": 2.4165, + "step": 16072 + }, + { + "epoch": 1.29715115809862, + "grad_norm": 0.8126270771026611, + "learning_rate": 1.8627683246036787e-05, + "loss": 2.4681, + "step": 16073 + }, + { + "epoch": 1.297231861835203, + "grad_norm": 0.7364177107810974, + "learning_rate": 1.8618508084419918e-05, + "loss": 2.44, + "step": 16074 + }, + { + "epoch": 1.297312565571786, + "grad_norm": 0.7480010390281677, + "learning_rate": 1.8609334951042567e-05, + "loss": 2.4759, + "step": 16075 + }, + { + "epoch": 1.297393269308369, + "grad_norm": 0.6563693284988403, + "learning_rate": 1.8600163846133335e-05, + "loss": 2.3865, + "step": 16076 + }, + { + "epoch": 1.297473973044952, + "grad_norm": 0.6961230039596558, + "learning_rate": 1.8590994769920832e-05, + "loss": 2.3851, + "step": 16077 + }, + { + "epoch": 1.297554676781535, + "grad_norm": 0.7137415409088135, + "learning_rate": 1.8581827722633527e-05, + "loss": 2.4115, + "step": 16078 + }, + { + "epoch": 1.297635380518118, + "grad_norm": 0.6579335331916809, + "learning_rate": 1.85726627044999e-05, + "loss": 2.4464, + "step": 16079 + }, + { + "epoch": 1.2977160842547009, + "grad_norm": 0.7069905400276184, + "learning_rate": 1.8563499715748366e-05, + "loss": 2.4057, + "step": 16080 + }, + { + "epoch": 1.297796787991284, + "grad_norm": 0.771925687789917, + "learning_rate": 1.8554338756607325e-05, + "loss": 2.4696, + "step": 16081 + }, + { + "epoch": 1.297877491727867, + "grad_norm": 0.7268456816673279, + "learning_rate": 1.8545179827305048e-05, + "loss": 2.3949, + "step": 16082 + }, + { + "epoch": 1.29795819546445, + "grad_norm": 0.7049130797386169, + "learning_rate": 1.8536022928069796e-05, + "loss": 2.4448, + "step": 16083 + }, + { + "epoch": 1.298038899201033, + "grad_norm": 0.6716888546943665, + "learning_rate": 1.852686805912982e-05, + "loss": 2.3356, + "step": 16084 + }, + { + "epoch": 1.298119602937616, + "grad_norm": 0.666386604309082, + "learning_rate": 1.851771522071325e-05, + "loss": 2.4226, + "step": 16085 + }, + { + "epoch": 1.298200306674199, + "grad_norm": 0.7084901332855225, + "learning_rate": 1.8508564413048223e-05, + "loss": 2.4452, + "step": 16086 + }, + { + "epoch": 1.298281010410782, + "grad_norm": 0.6615412831306458, + "learning_rate": 1.8499415636362815e-05, + "loss": 2.4193, + "step": 16087 + }, + { + "epoch": 1.2983617141473651, + "grad_norm": 0.7143606543540955, + "learning_rate": 1.849026889088499e-05, + "loss": 2.4513, + "step": 16088 + }, + { + "epoch": 1.298442417883948, + "grad_norm": 0.7241482734680176, + "learning_rate": 1.8481124176842723e-05, + "loss": 2.458, + "step": 16089 + }, + { + "epoch": 1.298523121620531, + "grad_norm": 0.6762149930000305, + "learning_rate": 1.8471981494463963e-05, + "loss": 2.4386, + "step": 16090 + }, + { + "epoch": 1.2986038253571142, + "grad_norm": 0.6672768592834473, + "learning_rate": 1.8462840843976525e-05, + "loss": 2.375, + "step": 16091 + }, + { + "epoch": 1.298684529093697, + "grad_norm": 0.6871693134307861, + "learning_rate": 1.8453702225608226e-05, + "loss": 2.4342, + "step": 16092 + }, + { + "epoch": 1.2987652328302801, + "grad_norm": 0.6771275401115417, + "learning_rate": 1.8444565639586864e-05, + "loss": 2.402, + "step": 16093 + }, + { + "epoch": 1.298845936566863, + "grad_norm": 0.6627403497695923, + "learning_rate": 1.8435431086140077e-05, + "loss": 2.4667, + "step": 16094 + }, + { + "epoch": 1.298926640303446, + "grad_norm": 0.7001610398292542, + "learning_rate": 1.8426298565495538e-05, + "loss": 2.4396, + "step": 16095 + }, + { + "epoch": 1.299007344040029, + "grad_norm": 0.7574489712715149, + "learning_rate": 1.8417168077880908e-05, + "loss": 2.4601, + "step": 16096 + }, + { + "epoch": 1.299088047776612, + "grad_norm": 0.7771055698394775, + "learning_rate": 1.840803962352372e-05, + "loss": 2.4371, + "step": 16097 + }, + { + "epoch": 1.299168751513195, + "grad_norm": 0.6738649606704712, + "learning_rate": 1.8398913202651457e-05, + "loss": 2.3921, + "step": 16098 + }, + { + "epoch": 1.299249455249778, + "grad_norm": 0.7014862895011902, + "learning_rate": 1.8389788815491583e-05, + "loss": 2.451, + "step": 16099 + }, + { + "epoch": 1.299330158986361, + "grad_norm": 0.7026070952415466, + "learning_rate": 1.8380666462271523e-05, + "loss": 2.4583, + "step": 16100 + }, + { + "epoch": 1.2994108627229441, + "grad_norm": 0.6904535293579102, + "learning_rate": 1.8371546143218588e-05, + "loss": 2.4453, + "step": 16101 + }, + { + "epoch": 1.299491566459527, + "grad_norm": 0.6974804997444153, + "learning_rate": 1.8362427858560093e-05, + "loss": 2.4291, + "step": 16102 + }, + { + "epoch": 1.29957227019611, + "grad_norm": 0.6826989650726318, + "learning_rate": 1.8353311608523326e-05, + "loss": 2.4183, + "step": 16103 + }, + { + "epoch": 1.2996529739326932, + "grad_norm": 0.6804787516593933, + "learning_rate": 1.8344197393335448e-05, + "loss": 2.434, + "step": 16104 + }, + { + "epoch": 1.299733677669276, + "grad_norm": 0.7144587635993958, + "learning_rate": 1.8335085213223613e-05, + "loss": 2.4296, + "step": 16105 + }, + { + "epoch": 1.2998143814058591, + "grad_norm": 0.7228755354881287, + "learning_rate": 1.8325975068414924e-05, + "loss": 2.3987, + "step": 16106 + }, + { + "epoch": 1.2998950851424422, + "grad_norm": 0.7417716383934021, + "learning_rate": 1.8316866959136438e-05, + "loss": 2.4076, + "step": 16107 + }, + { + "epoch": 1.299975788879025, + "grad_norm": 0.6737387776374817, + "learning_rate": 1.8307760885615154e-05, + "loss": 2.4175, + "step": 16108 + }, + { + "epoch": 1.3000564926156082, + "grad_norm": 0.7294918298721313, + "learning_rate": 1.8298656848078035e-05, + "loss": 2.4022, + "step": 16109 + }, + { + "epoch": 1.300137196352191, + "grad_norm": 0.7200861573219299, + "learning_rate": 1.828955484675193e-05, + "loss": 2.4018, + "step": 16110 + }, + { + "epoch": 1.3002179000887741, + "grad_norm": 0.7704176306724548, + "learning_rate": 1.8280454881863718e-05, + "loss": 2.4539, + "step": 16111 + }, + { + "epoch": 1.300298603825357, + "grad_norm": 0.6790730953216553, + "learning_rate": 1.8271356953640184e-05, + "loss": 2.4196, + "step": 16112 + }, + { + "epoch": 1.30037930756194, + "grad_norm": 0.7165740132331848, + "learning_rate": 1.8262261062308096e-05, + "loss": 2.4234, + "step": 16113 + }, + { + "epoch": 1.3004600112985232, + "grad_norm": 0.7716830372810364, + "learning_rate": 1.82531672080941e-05, + "loss": 2.4255, + "step": 16114 + }, + { + "epoch": 1.300540715035106, + "grad_norm": 0.6525317430496216, + "learning_rate": 1.824407539122488e-05, + "loss": 2.4482, + "step": 16115 + }, + { + "epoch": 1.3006214187716891, + "grad_norm": 0.7397769093513489, + "learning_rate": 1.8234985611927003e-05, + "loss": 2.33, + "step": 16116 + }, + { + "epoch": 1.3007021225082722, + "grad_norm": 0.7106032967567444, + "learning_rate": 1.822589787042702e-05, + "loss": 2.485, + "step": 16117 + }, + { + "epoch": 1.300782826244855, + "grad_norm": 0.7030045390129089, + "learning_rate": 1.8216812166951425e-05, + "loss": 2.454, + "step": 16118 + }, + { + "epoch": 1.3008635299814382, + "grad_norm": 0.7075662612915039, + "learning_rate": 1.8207728501726683e-05, + "loss": 2.4589, + "step": 16119 + }, + { + "epoch": 1.3009442337180213, + "grad_norm": 0.6700533032417297, + "learning_rate": 1.819864687497912e-05, + "loss": 2.4398, + "step": 16120 + }, + { + "epoch": 1.3010249374546041, + "grad_norm": 0.6951712369918823, + "learning_rate": 1.8189567286935117e-05, + "loss": 2.3998, + "step": 16121 + }, + { + "epoch": 1.3011056411911872, + "grad_norm": 0.708344578742981, + "learning_rate": 1.818048973782097e-05, + "loss": 2.4142, + "step": 16122 + }, + { + "epoch": 1.30118634492777, + "grad_norm": 0.7078592777252197, + "learning_rate": 1.817141422786287e-05, + "loss": 2.451, + "step": 16123 + }, + { + "epoch": 1.3012670486643532, + "grad_norm": 0.7111849784851074, + "learning_rate": 1.816234075728703e-05, + "loss": 2.4762, + "step": 16124 + }, + { + "epoch": 1.301347752400936, + "grad_norm": 0.6716348528862, + "learning_rate": 1.8153269326319588e-05, + "loss": 2.4373, + "step": 16125 + }, + { + "epoch": 1.3014284561375191, + "grad_norm": 0.6592512130737305, + "learning_rate": 1.8144199935186623e-05, + "loss": 2.412, + "step": 16126 + }, + { + "epoch": 1.3015091598741022, + "grad_norm": 0.6958334445953369, + "learning_rate": 1.8135132584114167e-05, + "loss": 2.4077, + "step": 16127 + }, + { + "epoch": 1.301589863610685, + "grad_norm": 0.6911341547966003, + "learning_rate": 1.8126067273328207e-05, + "loss": 2.409, + "step": 16128 + }, + { + "epoch": 1.3016705673472682, + "grad_norm": 0.676114022731781, + "learning_rate": 1.8117004003054693e-05, + "loss": 2.4463, + "step": 16129 + }, + { + "epoch": 1.3017512710838512, + "grad_norm": 0.6493322849273682, + "learning_rate": 1.810794277351947e-05, + "loss": 2.4377, + "step": 16130 + }, + { + "epoch": 1.3018319748204341, + "grad_norm": 0.6938454508781433, + "learning_rate": 1.8098883584948367e-05, + "loss": 2.4298, + "step": 16131 + }, + { + "epoch": 1.3019126785570172, + "grad_norm": 0.69407719373703, + "learning_rate": 1.8089826437567214e-05, + "loss": 2.4107, + "step": 16132 + }, + { + "epoch": 1.3019933822936003, + "grad_norm": 0.6898862719535828, + "learning_rate": 1.8080771331601664e-05, + "loss": 2.4182, + "step": 16133 + }, + { + "epoch": 1.3020740860301832, + "grad_norm": 0.7377758026123047, + "learning_rate": 1.807171826727744e-05, + "loss": 2.4112, + "step": 16134 + }, + { + "epoch": 1.3021547897667662, + "grad_norm": 0.674057126045227, + "learning_rate": 1.8062667244820154e-05, + "loss": 2.4276, + "step": 16135 + }, + { + "epoch": 1.3022354935033493, + "grad_norm": 0.7087522745132446, + "learning_rate": 1.8053618264455384e-05, + "loss": 2.4338, + "step": 16136 + }, + { + "epoch": 1.3023161972399322, + "grad_norm": 0.70958411693573, + "learning_rate": 1.8044571326408667e-05, + "loss": 2.4369, + "step": 16137 + }, + { + "epoch": 1.3023969009765153, + "grad_norm": 0.7023837566375732, + "learning_rate": 1.803552643090548e-05, + "loss": 2.4185, + "step": 16138 + }, + { + "epoch": 1.3024776047130981, + "grad_norm": 0.708543598651886, + "learning_rate": 1.8026483578171216e-05, + "loss": 2.4053, + "step": 16139 + }, + { + "epoch": 1.3025583084496812, + "grad_norm": 0.748601496219635, + "learning_rate": 1.8017442768431257e-05, + "loss": 2.3948, + "step": 16140 + }, + { + "epoch": 1.302639012186264, + "grad_norm": 0.6626949310302734, + "learning_rate": 1.800840400191096e-05, + "loss": 2.4636, + "step": 16141 + }, + { + "epoch": 1.3027197159228472, + "grad_norm": 0.7079617977142334, + "learning_rate": 1.7999367278835534e-05, + "loss": 2.4091, + "step": 16142 + }, + { + "epoch": 1.3028004196594303, + "grad_norm": 0.7025624513626099, + "learning_rate": 1.7990332599430225e-05, + "loss": 2.3732, + "step": 16143 + }, + { + "epoch": 1.3028811233960131, + "grad_norm": 0.7365758419036865, + "learning_rate": 1.7981299963920205e-05, + "loss": 2.4725, + "step": 16144 + }, + { + "epoch": 1.3029618271325962, + "grad_norm": 0.7511963248252869, + "learning_rate": 1.7972269372530615e-05, + "loss": 2.4304, + "step": 16145 + }, + { + "epoch": 1.3030425308691793, + "grad_norm": 0.7055985331535339, + "learning_rate": 1.796324082548644e-05, + "loss": 2.4259, + "step": 16146 + }, + { + "epoch": 1.3031232346057622, + "grad_norm": 0.691162645816803, + "learning_rate": 1.7954214323012775e-05, + "loss": 2.4262, + "step": 16147 + }, + { + "epoch": 1.3032039383423453, + "grad_norm": 0.7179710268974304, + "learning_rate": 1.7945189865334587e-05, + "loss": 2.4301, + "step": 16148 + }, + { + "epoch": 1.3032846420789284, + "grad_norm": 0.7391623258590698, + "learning_rate": 1.7936167452676744e-05, + "loss": 2.4302, + "step": 16149 + }, + { + "epoch": 1.3033653458155112, + "grad_norm": 0.7297981381416321, + "learning_rate": 1.7927147085264117e-05, + "loss": 2.3911, + "step": 16150 + }, + { + "epoch": 1.3034460495520943, + "grad_norm": 0.7571932673454285, + "learning_rate": 1.7918128763321552e-05, + "loss": 2.4348, + "step": 16151 + }, + { + "epoch": 1.3035267532886774, + "grad_norm": 0.7074765563011169, + "learning_rate": 1.7909112487073754e-05, + "loss": 2.4164, + "step": 16152 + }, + { + "epoch": 1.3036074570252603, + "grad_norm": 0.7534131407737732, + "learning_rate": 1.7900098256745467e-05, + "loss": 2.3784, + "step": 16153 + }, + { + "epoch": 1.3036881607618434, + "grad_norm": 0.675398588180542, + "learning_rate": 1.789108607256136e-05, + "loss": 2.4305, + "step": 16154 + }, + { + "epoch": 1.3037688644984262, + "grad_norm": 0.7099249362945557, + "learning_rate": 1.7882075934746002e-05, + "loss": 2.4053, + "step": 16155 + }, + { + "epoch": 1.3038495682350093, + "grad_norm": 0.6914681196212769, + "learning_rate": 1.787306784352397e-05, + "loss": 2.3902, + "step": 16156 + }, + { + "epoch": 1.3039302719715922, + "grad_norm": 0.6956958770751953, + "learning_rate": 1.786406179911977e-05, + "loss": 2.4026, + "step": 16157 + }, + { + "epoch": 1.3040109757081753, + "grad_norm": 0.6873000860214233, + "learning_rate": 1.7855057801757857e-05, + "loss": 2.4082, + "step": 16158 + }, + { + "epoch": 1.3040916794447583, + "grad_norm": 0.7340587377548218, + "learning_rate": 1.7846055851662625e-05, + "loss": 2.4894, + "step": 16159 + }, + { + "epoch": 1.3041723831813412, + "grad_norm": 0.6956963539123535, + "learning_rate": 1.7837055949058444e-05, + "loss": 2.3976, + "step": 16160 + }, + { + "epoch": 1.3042530869179243, + "grad_norm": 0.7654300332069397, + "learning_rate": 1.782805809416962e-05, + "loss": 2.4272, + "step": 16161 + }, + { + "epoch": 1.3043337906545074, + "grad_norm": 0.7735971212387085, + "learning_rate": 1.7819062287220368e-05, + "loss": 2.4513, + "step": 16162 + }, + { + "epoch": 1.3044144943910903, + "grad_norm": 0.6897203326225281, + "learning_rate": 1.7810068528434908e-05, + "loss": 2.3974, + "step": 16163 + }, + { + "epoch": 1.3044951981276733, + "grad_norm": 0.7328432202339172, + "learning_rate": 1.780107681803741e-05, + "loss": 2.4455, + "step": 16164 + }, + { + "epoch": 1.3045759018642564, + "grad_norm": 0.7098489999771118, + "learning_rate": 1.7792087156251924e-05, + "loss": 2.4173, + "step": 16165 + }, + { + "epoch": 1.3046566056008393, + "grad_norm": 0.6593194007873535, + "learning_rate": 1.7783099543302518e-05, + "loss": 2.4102, + "step": 16166 + }, + { + "epoch": 1.3047373093374224, + "grad_norm": 0.7329291105270386, + "learning_rate": 1.7774113979413188e-05, + "loss": 2.4856, + "step": 16167 + }, + { + "epoch": 1.3048180130740052, + "grad_norm": 0.7033355236053467, + "learning_rate": 1.776513046480788e-05, + "loss": 2.4503, + "step": 16168 + }, + { + "epoch": 1.3048987168105883, + "grad_norm": 0.7063608765602112, + "learning_rate": 1.7756148999710486e-05, + "loss": 2.4523, + "step": 16169 + }, + { + "epoch": 1.3049794205471712, + "grad_norm": 0.6905883550643921, + "learning_rate": 1.774716958434487e-05, + "loss": 2.4149, + "step": 16170 + }, + { + "epoch": 1.3050601242837543, + "grad_norm": 0.694551408290863, + "learning_rate": 1.7738192218934778e-05, + "loss": 2.437, + "step": 16171 + }, + { + "epoch": 1.3051408280203374, + "grad_norm": 0.7173176407814026, + "learning_rate": 1.772921690370396e-05, + "loss": 2.4817, + "step": 16172 + }, + { + "epoch": 1.3052215317569202, + "grad_norm": 0.7197130918502808, + "learning_rate": 1.7720243638876153e-05, + "loss": 2.4481, + "step": 16173 + }, + { + "epoch": 1.3053022354935033, + "grad_norm": 0.710811197757721, + "learning_rate": 1.771127242467493e-05, + "loss": 2.397, + "step": 16174 + }, + { + "epoch": 1.3053829392300864, + "grad_norm": 0.9194550514221191, + "learning_rate": 1.7702303261323894e-05, + "loss": 2.5206, + "step": 16175 + }, + { + "epoch": 1.3054636429666693, + "grad_norm": 0.7003832459449768, + "learning_rate": 1.769333614904659e-05, + "loss": 2.4175, + "step": 16176 + }, + { + "epoch": 1.3055443467032524, + "grad_norm": 0.7161554098129272, + "learning_rate": 1.768437108806651e-05, + "loss": 2.3892, + "step": 16177 + }, + { + "epoch": 1.3056250504398355, + "grad_norm": 0.6516181826591492, + "learning_rate": 1.767540807860707e-05, + "loss": 2.4361, + "step": 16178 + }, + { + "epoch": 1.3057057541764183, + "grad_norm": 0.7518061399459839, + "learning_rate": 1.7666447120891662e-05, + "loss": 2.4572, + "step": 16179 + }, + { + "epoch": 1.3057864579130014, + "grad_norm": 0.735388994216919, + "learning_rate": 1.7657488215143637e-05, + "loss": 2.3965, + "step": 16180 + }, + { + "epoch": 1.3058671616495845, + "grad_norm": 0.6994282007217407, + "learning_rate": 1.764853136158622e-05, + "loss": 2.4052, + "step": 16181 + }, + { + "epoch": 1.3059478653861674, + "grad_norm": 0.7095311880111694, + "learning_rate": 1.7639576560442684e-05, + "loss": 2.4818, + "step": 16182 + }, + { + "epoch": 1.3060285691227504, + "grad_norm": 0.6527207493782043, + "learning_rate": 1.7630623811936208e-05, + "loss": 2.3962, + "step": 16183 + }, + { + "epoch": 1.3061092728593333, + "grad_norm": 0.6668451428413391, + "learning_rate": 1.7621673116289882e-05, + "loss": 2.4514, + "step": 16184 + }, + { + "epoch": 1.3061899765959164, + "grad_norm": 0.7119911909103394, + "learning_rate": 1.7612724473726795e-05, + "loss": 2.4313, + "step": 16185 + }, + { + "epoch": 1.3062706803324993, + "grad_norm": 0.706249475479126, + "learning_rate": 1.7603777884469984e-05, + "loss": 2.4131, + "step": 16186 + }, + { + "epoch": 1.3063513840690824, + "grad_norm": 0.6634086966514587, + "learning_rate": 1.759483334874241e-05, + "loss": 2.3532, + "step": 16187 + }, + { + "epoch": 1.3064320878056654, + "grad_norm": 0.8096393942832947, + "learning_rate": 1.7585890866766995e-05, + "loss": 2.4485, + "step": 16188 + }, + { + "epoch": 1.3065127915422483, + "grad_norm": 0.675308883190155, + "learning_rate": 1.7576950438766615e-05, + "loss": 2.388, + "step": 16189 + }, + { + "epoch": 1.3065934952788314, + "grad_norm": 0.738275408744812, + "learning_rate": 1.756801206496411e-05, + "loss": 2.4485, + "step": 16190 + }, + { + "epoch": 1.3066741990154145, + "grad_norm": 0.7045620083808899, + "learning_rate": 1.755907574558221e-05, + "loss": 2.3985, + "step": 16191 + }, + { + "epoch": 1.3067549027519973, + "grad_norm": 0.6499879360198975, + "learning_rate": 1.755014148084363e-05, + "loss": 2.3992, + "step": 16192 + }, + { + "epoch": 1.3068356064885804, + "grad_norm": 0.7101179361343384, + "learning_rate": 1.7541209270971083e-05, + "loss": 2.4217, + "step": 16193 + }, + { + "epoch": 1.3069163102251635, + "grad_norm": 0.6865181922912598, + "learning_rate": 1.7532279116187124e-05, + "loss": 2.4805, + "step": 16194 + }, + { + "epoch": 1.3069970139617464, + "grad_norm": 0.7710141539573669, + "learning_rate": 1.752335101671434e-05, + "loss": 2.3654, + "step": 16195 + }, + { + "epoch": 1.3070777176983295, + "grad_norm": 0.695936381816864, + "learning_rate": 1.7514424972775244e-05, + "loss": 2.4315, + "step": 16196 + }, + { + "epoch": 1.3071584214349126, + "grad_norm": 0.6781535148620605, + "learning_rate": 1.7505500984592304e-05, + "loss": 2.4238, + "step": 16197 + }, + { + "epoch": 1.3072391251714954, + "grad_norm": 0.6549252271652222, + "learning_rate": 1.7496579052387918e-05, + "loss": 2.3766, + "step": 16198 + }, + { + "epoch": 1.3073198289080785, + "grad_norm": 0.6599059700965881, + "learning_rate": 1.7487659176384474e-05, + "loss": 2.4613, + "step": 16199 + }, + { + "epoch": 1.3074005326446614, + "grad_norm": 0.6742514967918396, + "learning_rate": 1.7478741356804228e-05, + "loss": 2.3917, + "step": 16200 + }, + { + "epoch": 1.3074812363812445, + "grad_norm": 0.6542397141456604, + "learning_rate": 1.746982559386946e-05, + "loss": 2.44, + "step": 16201 + }, + { + "epoch": 1.3075619401178273, + "grad_norm": 0.7200478315353394, + "learning_rate": 1.74609118878024e-05, + "loss": 2.4324, + "step": 16202 + }, + { + "epoch": 1.3076426438544104, + "grad_norm": 0.717628002166748, + "learning_rate": 1.745200023882515e-05, + "loss": 2.3996, + "step": 16203 + }, + { + "epoch": 1.3077233475909935, + "grad_norm": 0.7350025177001953, + "learning_rate": 1.744309064715983e-05, + "loss": 2.4812, + "step": 16204 + }, + { + "epoch": 1.3078040513275764, + "grad_norm": 0.7253599762916565, + "learning_rate": 1.74341831130285e-05, + "loss": 2.4454, + "step": 16205 + }, + { + "epoch": 1.3078847550641595, + "grad_norm": 0.7537909746170044, + "learning_rate": 1.7425277636653193e-05, + "loss": 2.4247, + "step": 16206 + }, + { + "epoch": 1.3079654588007426, + "grad_norm": 0.7563284039497375, + "learning_rate": 1.7416374218255783e-05, + "loss": 2.3893, + "step": 16207 + }, + { + "epoch": 1.3080461625373254, + "grad_norm": 0.7118926048278809, + "learning_rate": 1.740747285805818e-05, + "loss": 2.4146, + "step": 16208 + }, + { + "epoch": 1.3081268662739085, + "grad_norm": 0.7805569171905518, + "learning_rate": 1.7398573556282304e-05, + "loss": 2.396, + "step": 16209 + }, + { + "epoch": 1.3082075700104916, + "grad_norm": 0.7357630133628845, + "learning_rate": 1.738967631314987e-05, + "loss": 2.5405, + "step": 16210 + }, + { + "epoch": 1.3082882737470745, + "grad_norm": 0.6670438647270203, + "learning_rate": 1.7380781128882652e-05, + "loss": 2.4452, + "step": 16211 + }, + { + "epoch": 1.3083689774836575, + "grad_norm": 0.7374427318572998, + "learning_rate": 1.7371888003702353e-05, + "loss": 2.5143, + "step": 16212 + }, + { + "epoch": 1.3084496812202406, + "grad_norm": 0.672207236289978, + "learning_rate": 1.736299693783058e-05, + "loss": 2.4178, + "step": 16213 + }, + { + "epoch": 1.3085303849568235, + "grad_norm": 0.6926576495170593, + "learning_rate": 1.735410793148894e-05, + "loss": 2.3466, + "step": 16214 + }, + { + "epoch": 1.3086110886934066, + "grad_norm": 0.6928917169570923, + "learning_rate": 1.734522098489899e-05, + "loss": 2.4654, + "step": 16215 + }, + { + "epoch": 1.3086917924299895, + "grad_norm": 0.6536242961883545, + "learning_rate": 1.733633609828217e-05, + "loss": 2.3761, + "step": 16216 + }, + { + "epoch": 1.3087724961665725, + "grad_norm": 0.6993953585624695, + "learning_rate": 1.732745327185994e-05, + "loss": 2.3963, + "step": 16217 + }, + { + "epoch": 1.3088531999031554, + "grad_norm": 0.6851957440376282, + "learning_rate": 1.731857250585368e-05, + "loss": 2.4253, + "step": 16218 + }, + { + "epoch": 1.3089339036397385, + "grad_norm": 0.6620005965232849, + "learning_rate": 1.7309693800484728e-05, + "loss": 2.4302, + "step": 16219 + }, + { + "epoch": 1.3090146073763216, + "grad_norm": 0.6704410314559937, + "learning_rate": 1.7300817155974356e-05, + "loss": 2.4065, + "step": 16220 + }, + { + "epoch": 1.3090953111129044, + "grad_norm": 0.6882327198982239, + "learning_rate": 1.7291942572543807e-05, + "loss": 2.4526, + "step": 16221 + }, + { + "epoch": 1.3091760148494875, + "grad_norm": 0.6971533298492432, + "learning_rate": 1.7283070050414275e-05, + "loss": 2.4076, + "step": 16222 + }, + { + "epoch": 1.3092567185860706, + "grad_norm": 0.6662544012069702, + "learning_rate": 1.7274199589806827e-05, + "loss": 2.3678, + "step": 16223 + }, + { + "epoch": 1.3093374223226535, + "grad_norm": 0.6342894434928894, + "learning_rate": 1.726533119094258e-05, + "loss": 2.3424, + "step": 16224 + }, + { + "epoch": 1.3094181260592366, + "grad_norm": 0.6808488965034485, + "learning_rate": 1.7256464854042577e-05, + "loss": 2.4286, + "step": 16225 + }, + { + "epoch": 1.3094988297958197, + "grad_norm": 0.6417922973632812, + "learning_rate": 1.7247600579327738e-05, + "loss": 2.3677, + "step": 16226 + }, + { + "epoch": 1.3095795335324025, + "grad_norm": 0.7267102599143982, + "learning_rate": 1.7238738367019002e-05, + "loss": 2.3974, + "step": 16227 + }, + { + "epoch": 1.3096602372689856, + "grad_norm": 0.6915002465248108, + "learning_rate": 1.722987821733725e-05, + "loss": 2.4429, + "step": 16228 + }, + { + "epoch": 1.3097409410055685, + "grad_norm": 0.6930112242698669, + "learning_rate": 1.7221020130503296e-05, + "loss": 2.4272, + "step": 16229 + }, + { + "epoch": 1.3098216447421516, + "grad_norm": 0.7049465179443359, + "learning_rate": 1.7212164106737904e-05, + "loss": 2.4089, + "step": 16230 + }, + { + "epoch": 1.3099023484787344, + "grad_norm": 0.7230044603347778, + "learning_rate": 1.720331014626182e-05, + "loss": 2.4313, + "step": 16231 + }, + { + "epoch": 1.3099830522153175, + "grad_norm": 0.6513530015945435, + "learning_rate": 1.7194458249295665e-05, + "loss": 2.3293, + "step": 16232 + }, + { + "epoch": 1.3100637559519006, + "grad_norm": 0.6880534291267395, + "learning_rate": 1.718560841606005e-05, + "loss": 2.4556, + "step": 16233 + }, + { + "epoch": 1.3101444596884835, + "grad_norm": 0.7075292468070984, + "learning_rate": 1.717676064677559e-05, + "loss": 2.4747, + "step": 16234 + }, + { + "epoch": 1.3102251634250666, + "grad_norm": 0.7713594436645508, + "learning_rate": 1.7167914941662723e-05, + "loss": 2.4135, + "step": 16235 + }, + { + "epoch": 1.3103058671616497, + "grad_norm": 0.7883979082107544, + "learning_rate": 1.7159071300941943e-05, + "loss": 2.418, + "step": 16236 + }, + { + "epoch": 1.3103865708982325, + "grad_norm": 0.6588975787162781, + "learning_rate": 1.7150229724833655e-05, + "loss": 2.3295, + "step": 16237 + }, + { + "epoch": 1.3104672746348156, + "grad_norm": 0.679086446762085, + "learning_rate": 1.7141390213558217e-05, + "loss": 2.413, + "step": 16238 + }, + { + "epoch": 1.3105479783713987, + "grad_norm": 0.6803067326545715, + "learning_rate": 1.713255276733592e-05, + "loss": 2.4338, + "step": 16239 + }, + { + "epoch": 1.3106286821079816, + "grad_norm": 0.7041650414466858, + "learning_rate": 1.712371738638704e-05, + "loss": 2.469, + "step": 16240 + }, + { + "epoch": 1.3107093858445646, + "grad_norm": 0.6560962796211243, + "learning_rate": 1.711488407093178e-05, + "loss": 2.4353, + "step": 16241 + }, + { + "epoch": 1.3107900895811477, + "grad_norm": 0.6637921333312988, + "learning_rate": 1.7106052821190244e-05, + "loss": 2.3996, + "step": 16242 + }, + { + "epoch": 1.3108707933177306, + "grad_norm": 0.8131709098815918, + "learning_rate": 1.7097223637382565e-05, + "loss": 2.466, + "step": 16243 + }, + { + "epoch": 1.3109514970543137, + "grad_norm": 0.6637253165245056, + "learning_rate": 1.708839651972881e-05, + "loss": 2.3811, + "step": 16244 + }, + { + "epoch": 1.3110322007908966, + "grad_norm": 0.71912682056427, + "learning_rate": 1.7079571468448917e-05, + "loss": 2.4175, + "step": 16245 + }, + { + "epoch": 1.3111129045274796, + "grad_norm": 0.7028010487556458, + "learning_rate": 1.7070748483762854e-05, + "loss": 2.41, + "step": 16246 + }, + { + "epoch": 1.3111936082640625, + "grad_norm": 0.7241945862770081, + "learning_rate": 1.7061927565890522e-05, + "loss": 2.4171, + "step": 16247 + }, + { + "epoch": 1.3112743120006456, + "grad_norm": 0.7039221525192261, + "learning_rate": 1.705310871505177e-05, + "loss": 2.4154, + "step": 16248 + }, + { + "epoch": 1.3113550157372287, + "grad_norm": 0.672444760799408, + "learning_rate": 1.704429193146636e-05, + "loss": 2.4025, + "step": 16249 + }, + { + "epoch": 1.3114357194738115, + "grad_norm": 0.7240859866142273, + "learning_rate": 1.7035477215354068e-05, + "loss": 2.3864, + "step": 16250 + }, + { + "epoch": 1.3115164232103946, + "grad_norm": 0.7379294633865356, + "learning_rate": 1.7026664566934536e-05, + "loss": 2.4663, + "step": 16251 + }, + { + "epoch": 1.3115971269469777, + "grad_norm": 0.6928708553314209, + "learning_rate": 1.7017853986427425e-05, + "loss": 2.4407, + "step": 16252 + }, + { + "epoch": 1.3116778306835606, + "grad_norm": 0.6304093599319458, + "learning_rate": 1.7009045474052298e-05, + "loss": 2.4755, + "step": 16253 + }, + { + "epoch": 1.3117585344201437, + "grad_norm": 0.6945829391479492, + "learning_rate": 1.700023903002872e-05, + "loss": 2.3817, + "step": 16254 + }, + { + "epoch": 1.3118392381567268, + "grad_norm": 0.6899009346961975, + "learning_rate": 1.6991434654576133e-05, + "loss": 2.3989, + "step": 16255 + }, + { + "epoch": 1.3119199418933096, + "grad_norm": 0.7359157204627991, + "learning_rate": 1.6982632347913985e-05, + "loss": 2.3788, + "step": 16256 + }, + { + "epoch": 1.3120006456298927, + "grad_norm": 0.6562486886978149, + "learning_rate": 1.6973832110261658e-05, + "loss": 2.3955, + "step": 16257 + }, + { + "epoch": 1.3120813493664758, + "grad_norm": 0.6772989630699158, + "learning_rate": 1.696503394183846e-05, + "loss": 2.4788, + "step": 16258 + }, + { + "epoch": 1.3121620531030587, + "grad_norm": 0.7214391231536865, + "learning_rate": 1.695623784286363e-05, + "loss": 2.3836, + "step": 16259 + }, + { + "epoch": 1.3122427568396418, + "grad_norm": 0.7041679620742798, + "learning_rate": 1.6947443813556495e-05, + "loss": 2.4547, + "step": 16260 + }, + { + "epoch": 1.3123234605762246, + "grad_norm": 0.6819555163383484, + "learning_rate": 1.6938651854136135e-05, + "loss": 2.468, + "step": 16261 + }, + { + "epoch": 1.3124041643128077, + "grad_norm": 0.6466858983039856, + "learning_rate": 1.6929861964821693e-05, + "loss": 2.4572, + "step": 16262 + }, + { + "epoch": 1.3124848680493906, + "grad_norm": 0.688709557056427, + "learning_rate": 1.6921074145832248e-05, + "loss": 2.3891, + "step": 16263 + }, + { + "epoch": 1.3125655717859737, + "grad_norm": 0.6896470785140991, + "learning_rate": 1.69122883973868e-05, + "loss": 2.3825, + "step": 16264 + }, + { + "epoch": 1.3126462755225567, + "grad_norm": 0.8242524266242981, + "learning_rate": 1.690350471970431e-05, + "loss": 2.4804, + "step": 16265 + }, + { + "epoch": 1.3127269792591396, + "grad_norm": 0.7506044507026672, + "learning_rate": 1.689472311300373e-05, + "loss": 2.4671, + "step": 16266 + }, + { + "epoch": 1.3128076829957227, + "grad_norm": 0.6776263117790222, + "learning_rate": 1.688594357750386e-05, + "loss": 2.4646, + "step": 16267 + }, + { + "epoch": 1.3128883867323058, + "grad_norm": 0.6843759417533875, + "learning_rate": 1.6877166113423548e-05, + "loss": 2.4147, + "step": 16268 + }, + { + "epoch": 1.3129690904688887, + "grad_norm": 0.6650474667549133, + "learning_rate": 1.686839072098153e-05, + "loss": 2.4379, + "step": 16269 + }, + { + "epoch": 1.3130497942054717, + "grad_norm": 0.6636466383934021, + "learning_rate": 1.6859617400396533e-05, + "loss": 2.4334, + "step": 16270 + }, + { + "epoch": 1.3131304979420548, + "grad_norm": 0.649217963218689, + "learning_rate": 1.685084615188719e-05, + "loss": 2.319, + "step": 16271 + }, + { + "epoch": 1.3132112016786377, + "grad_norm": 0.7343039512634277, + "learning_rate": 1.6842076975672126e-05, + "loss": 2.3844, + "step": 16272 + }, + { + "epoch": 1.3132919054152208, + "grad_norm": 0.6916847825050354, + "learning_rate": 1.6833309871969894e-05, + "loss": 2.4544, + "step": 16273 + }, + { + "epoch": 1.3133726091518036, + "grad_norm": 0.6762102842330933, + "learning_rate": 1.6824544840998967e-05, + "loss": 2.3912, + "step": 16274 + }, + { + "epoch": 1.3134533128883867, + "grad_norm": 0.7327221035957336, + "learning_rate": 1.68157818829778e-05, + "loss": 2.4403, + "step": 16275 + }, + { + "epoch": 1.3135340166249696, + "grad_norm": 0.7362363338470459, + "learning_rate": 1.6807020998124812e-05, + "loss": 2.5169, + "step": 16276 + }, + { + "epoch": 1.3136147203615527, + "grad_norm": 0.6882300972938538, + "learning_rate": 1.679826218665832e-05, + "loss": 2.4139, + "step": 16277 + }, + { + "epoch": 1.3136954240981358, + "grad_norm": 0.7146984934806824, + "learning_rate": 1.6789505448796615e-05, + "loss": 2.4738, + "step": 16278 + }, + { + "epoch": 1.3137761278347186, + "grad_norm": 0.6581223607063293, + "learning_rate": 1.6780750784757947e-05, + "loss": 2.4617, + "step": 16279 + }, + { + "epoch": 1.3138568315713017, + "grad_norm": 0.7729318141937256, + "learning_rate": 1.6771998194760518e-05, + "loss": 2.4541, + "step": 16280 + }, + { + "epoch": 1.3139375353078848, + "grad_norm": 0.7617159485816956, + "learning_rate": 1.6763247679022442e-05, + "loss": 2.4727, + "step": 16281 + }, + { + "epoch": 1.3140182390444677, + "grad_norm": 0.6640555262565613, + "learning_rate": 1.6754499237761844e-05, + "loss": 2.4717, + "step": 16282 + }, + { + "epoch": 1.3140989427810508, + "grad_norm": 0.7289882898330688, + "learning_rate": 1.6745752871196707e-05, + "loss": 2.4515, + "step": 16283 + }, + { + "epoch": 1.3141796465176339, + "grad_norm": 0.7075887322425842, + "learning_rate": 1.6737008579545043e-05, + "loss": 2.4586, + "step": 16284 + }, + { + "epoch": 1.3142603502542167, + "grad_norm": 0.7152252197265625, + "learning_rate": 1.672826636302477e-05, + "loss": 2.512, + "step": 16285 + }, + { + "epoch": 1.3143410539907998, + "grad_norm": 0.6875295639038086, + "learning_rate": 1.6719526221853808e-05, + "loss": 2.4049, + "step": 16286 + }, + { + "epoch": 1.314421757727383, + "grad_norm": 0.6812484860420227, + "learning_rate": 1.671078815624991e-05, + "loss": 2.3705, + "step": 16287 + }, + { + "epoch": 1.3145024614639658, + "grad_norm": 0.664282500743866, + "learning_rate": 1.6702052166430904e-05, + "loss": 2.3776, + "step": 16288 + }, + { + "epoch": 1.3145831652005489, + "grad_norm": 0.7460842728614807, + "learning_rate": 1.66933182526145e-05, + "loss": 2.4525, + "step": 16289 + }, + { + "epoch": 1.3146638689371317, + "grad_norm": 0.6555477380752563, + "learning_rate": 1.6684586415018366e-05, + "loss": 2.3902, + "step": 16290 + }, + { + "epoch": 1.3147445726737148, + "grad_norm": 0.7191921472549438, + "learning_rate": 1.6675856653860135e-05, + "loss": 2.4957, + "step": 16291 + }, + { + "epoch": 1.3148252764102977, + "grad_norm": 0.738667368888855, + "learning_rate": 1.666712896935738e-05, + "loss": 2.4182, + "step": 16292 + }, + { + "epoch": 1.3149059801468808, + "grad_norm": 0.6764421463012695, + "learning_rate": 1.6658403361727593e-05, + "loss": 2.4179, + "step": 16293 + }, + { + "epoch": 1.3149866838834638, + "grad_norm": 0.6981594562530518, + "learning_rate": 1.6649679831188247e-05, + "loss": 2.4288, + "step": 16294 + }, + { + "epoch": 1.3150673876200467, + "grad_norm": 0.6657801866531372, + "learning_rate": 1.6640958377956784e-05, + "loss": 2.3716, + "step": 16295 + }, + { + "epoch": 1.3151480913566298, + "grad_norm": 0.7238973379135132, + "learning_rate": 1.6632239002250505e-05, + "loss": 2.438, + "step": 16296 + }, + { + "epoch": 1.3152287950932129, + "grad_norm": 0.6727766990661621, + "learning_rate": 1.6623521704286772e-05, + "loss": 2.4406, + "step": 16297 + }, + { + "epoch": 1.3153094988297958, + "grad_norm": 0.6741603016853333, + "learning_rate": 1.661480648428282e-05, + "loss": 2.4379, + "step": 16298 + }, + { + "epoch": 1.3153902025663788, + "grad_norm": 0.7174610495567322, + "learning_rate": 1.6606093342455865e-05, + "loss": 2.4368, + "step": 16299 + }, + { + "epoch": 1.315470906302962, + "grad_norm": 0.6604920029640198, + "learning_rate": 1.6597382279023057e-05, + "loss": 2.4431, + "step": 16300 + }, + { + "epoch": 1.3155516100395448, + "grad_norm": 0.6930821537971497, + "learning_rate": 1.6588673294201494e-05, + "loss": 2.4064, + "step": 16301 + }, + { + "epoch": 1.3156323137761279, + "grad_norm": 0.6489799618721008, + "learning_rate": 1.657996638820826e-05, + "loss": 2.4256, + "step": 16302 + }, + { + "epoch": 1.315713017512711, + "grad_norm": 0.6781083345413208, + "learning_rate": 1.65712615612603e-05, + "loss": 2.4731, + "step": 16303 + }, + { + "epoch": 1.3157937212492938, + "grad_norm": 0.6710748076438904, + "learning_rate": 1.656255881357458e-05, + "loss": 2.4065, + "step": 16304 + }, + { + "epoch": 1.315874424985877, + "grad_norm": 0.7099822163581848, + "learning_rate": 1.655385814536804e-05, + "loss": 2.3978, + "step": 16305 + }, + { + "epoch": 1.3159551287224598, + "grad_norm": 0.7215133905410767, + "learning_rate": 1.6545159556857447e-05, + "loss": 2.4655, + "step": 16306 + }, + { + "epoch": 1.3160358324590429, + "grad_norm": 0.7705253958702087, + "learning_rate": 1.6536463048259643e-05, + "loss": 2.4576, + "step": 16307 + }, + { + "epoch": 1.3161165361956257, + "grad_norm": 0.6232311725616455, + "learning_rate": 1.6527768619791372e-05, + "loss": 2.3923, + "step": 16308 + }, + { + "epoch": 1.3161972399322088, + "grad_norm": 0.6599528789520264, + "learning_rate": 1.6519076271669264e-05, + "loss": 2.4236, + "step": 16309 + }, + { + "epoch": 1.316277943668792, + "grad_norm": 0.6598034501075745, + "learning_rate": 1.6510386004110023e-05, + "loss": 2.368, + "step": 16310 + }, + { + "epoch": 1.3163586474053748, + "grad_norm": 0.6949655413627625, + "learning_rate": 1.650169781733022e-05, + "loss": 2.4277, + "step": 16311 + }, + { + "epoch": 1.3164393511419579, + "grad_norm": 0.6838186383247375, + "learning_rate": 1.6493011711546358e-05, + "loss": 2.4413, + "step": 16312 + }, + { + "epoch": 1.316520054878541, + "grad_norm": 0.7026765942573547, + "learning_rate": 1.6484327686974933e-05, + "loss": 2.4628, + "step": 16313 + }, + { + "epoch": 1.3166007586151238, + "grad_norm": 0.745360791683197, + "learning_rate": 1.647564574383237e-05, + "loss": 2.4358, + "step": 16314 + }, + { + "epoch": 1.316681462351707, + "grad_norm": 0.676225483417511, + "learning_rate": 1.6466965882335083e-05, + "loss": 2.4119, + "step": 16315 + }, + { + "epoch": 1.31676216608829, + "grad_norm": 0.6767755150794983, + "learning_rate": 1.6458288102699325e-05, + "loss": 2.4322, + "step": 16316 + }, + { + "epoch": 1.3168428698248729, + "grad_norm": 0.6957309246063232, + "learning_rate": 1.6449612405141424e-05, + "loss": 2.4327, + "step": 16317 + }, + { + "epoch": 1.316923573561456, + "grad_norm": 0.6773050427436829, + "learning_rate": 1.64409387898776e-05, + "loss": 2.4207, + "step": 16318 + }, + { + "epoch": 1.3170042772980388, + "grad_norm": 0.7319278717041016, + "learning_rate": 1.6432267257123978e-05, + "loss": 2.445, + "step": 16319 + }, + { + "epoch": 1.317084981034622, + "grad_norm": 0.7531326413154602, + "learning_rate": 1.6423597807096714e-05, + "loss": 2.3948, + "step": 16320 + }, + { + "epoch": 1.3171656847712048, + "grad_norm": 0.6741669178009033, + "learning_rate": 1.6414930440011854e-05, + "loss": 2.4177, + "step": 16321 + }, + { + "epoch": 1.3172463885077879, + "grad_norm": 0.6814963221549988, + "learning_rate": 1.640626515608543e-05, + "loss": 2.4419, + "step": 16322 + }, + { + "epoch": 1.317327092244371, + "grad_norm": 0.6740893721580505, + "learning_rate": 1.6397601955533392e-05, + "loss": 2.3516, + "step": 16323 + }, + { + "epoch": 1.3174077959809538, + "grad_norm": 0.7172163724899292, + "learning_rate": 1.6388940838571675e-05, + "loss": 2.4665, + "step": 16324 + }, + { + "epoch": 1.317488499717537, + "grad_norm": 0.6690489053726196, + "learning_rate": 1.6380281805416085e-05, + "loss": 2.3957, + "step": 16325 + }, + { + "epoch": 1.31756920345412, + "grad_norm": 0.7182994484901428, + "learning_rate": 1.6371624856282462e-05, + "loss": 2.4456, + "step": 16326 + }, + { + "epoch": 1.3176499071907029, + "grad_norm": 0.6324366927146912, + "learning_rate": 1.636296999138659e-05, + "loss": 2.4111, + "step": 16327 + }, + { + "epoch": 1.317730610927286, + "grad_norm": 0.6740162372589111, + "learning_rate": 1.6354317210944093e-05, + "loss": 2.451, + "step": 16328 + }, + { + "epoch": 1.317811314663869, + "grad_norm": 0.6964122653007507, + "learning_rate": 1.6345666515170665e-05, + "loss": 2.4269, + "step": 16329 + }, + { + "epoch": 1.317892018400452, + "grad_norm": 0.7093058824539185, + "learning_rate": 1.6337017904281915e-05, + "loss": 2.4686, + "step": 16330 + }, + { + "epoch": 1.317972722137035, + "grad_norm": 0.693233072757721, + "learning_rate": 1.6328371378493367e-05, + "loss": 2.4149, + "step": 16331 + }, + { + "epoch": 1.318053425873618, + "grad_norm": 0.6418019533157349, + "learning_rate": 1.631972693802052e-05, + "loss": 2.4268, + "step": 16332 + }, + { + "epoch": 1.318134129610201, + "grad_norm": 0.6815310120582581, + "learning_rate": 1.631108458307883e-05, + "loss": 2.4274, + "step": 16333 + }, + { + "epoch": 1.318214833346784, + "grad_norm": 0.6774280071258545, + "learning_rate": 1.630244431388369e-05, + "loss": 2.3927, + "step": 16334 + }, + { + "epoch": 1.3182955370833669, + "grad_norm": 0.688090443611145, + "learning_rate": 1.6293806130650413e-05, + "loss": 2.4013, + "step": 16335 + }, + { + "epoch": 1.31837624081995, + "grad_norm": 0.7300553321838379, + "learning_rate": 1.6285170033594288e-05, + "loss": 2.4716, + "step": 16336 + }, + { + "epoch": 1.3184569445565328, + "grad_norm": 0.6798286437988281, + "learning_rate": 1.627653602293059e-05, + "loss": 2.3893, + "step": 16337 + }, + { + "epoch": 1.318537648293116, + "grad_norm": 0.6699275970458984, + "learning_rate": 1.6267904098874442e-05, + "loss": 2.4446, + "step": 16338 + }, + { + "epoch": 1.318618352029699, + "grad_norm": 0.7632322311401367, + "learning_rate": 1.6259274261641e-05, + "loss": 2.4434, + "step": 16339 + }, + { + "epoch": 1.3186990557662819, + "grad_norm": 0.7156099677085876, + "learning_rate": 1.6250646511445343e-05, + "loss": 2.4142, + "step": 16340 + }, + { + "epoch": 1.318779759502865, + "grad_norm": 0.7525599598884583, + "learning_rate": 1.6242020848502505e-05, + "loss": 2.3543, + "step": 16341 + }, + { + "epoch": 1.318860463239448, + "grad_norm": 0.7063113451004028, + "learning_rate": 1.623339727302745e-05, + "loss": 2.4754, + "step": 16342 + }, + { + "epoch": 1.318941166976031, + "grad_norm": 0.7138137221336365, + "learning_rate": 1.6224775785235123e-05, + "loss": 2.4223, + "step": 16343 + }, + { + "epoch": 1.319021870712614, + "grad_norm": 0.6976706981658936, + "learning_rate": 1.6216156385340352e-05, + "loss": 2.4878, + "step": 16344 + }, + { + "epoch": 1.319102574449197, + "grad_norm": 0.6931003332138062, + "learning_rate": 1.6207539073557974e-05, + "loss": 2.39, + "step": 16345 + }, + { + "epoch": 1.31918327818578, + "grad_norm": 0.6919357180595398, + "learning_rate": 1.6198923850102765e-05, + "loss": 2.4197, + "step": 16346 + }, + { + "epoch": 1.319263981922363, + "grad_norm": 0.7453805804252625, + "learning_rate": 1.619031071518945e-05, + "loss": 2.4226, + "step": 16347 + }, + { + "epoch": 1.3193446856589461, + "grad_norm": 0.6990562677383423, + "learning_rate": 1.6181699669032658e-05, + "loss": 2.3925, + "step": 16348 + }, + { + "epoch": 1.319425389395529, + "grad_norm": 0.6974303126335144, + "learning_rate": 1.6173090711847006e-05, + "loss": 2.445, + "step": 16349 + }, + { + "epoch": 1.319506093132112, + "grad_norm": 0.7278286814689636, + "learning_rate": 1.6164483843847057e-05, + "loss": 2.3869, + "step": 16350 + }, + { + "epoch": 1.319586796868695, + "grad_norm": 0.7282646298408508, + "learning_rate": 1.6155879065247326e-05, + "loss": 2.3694, + "step": 16351 + }, + { + "epoch": 1.319667500605278, + "grad_norm": 0.7329844832420349, + "learning_rate": 1.6147276376262255e-05, + "loss": 2.4369, + "step": 16352 + }, + { + "epoch": 1.319748204341861, + "grad_norm": 0.6499385833740234, + "learning_rate": 1.613867577710627e-05, + "loss": 2.441, + "step": 16353 + }, + { + "epoch": 1.319828908078444, + "grad_norm": 0.7026061415672302, + "learning_rate": 1.6130077267993683e-05, + "loss": 2.4117, + "step": 16354 + }, + { + "epoch": 1.319909611815027, + "grad_norm": 0.7007814049720764, + "learning_rate": 1.6121480849138803e-05, + "loss": 2.4287, + "step": 16355 + }, + { + "epoch": 1.31999031555161, + "grad_norm": 0.6525697708129883, + "learning_rate": 1.611288652075591e-05, + "loss": 2.3969, + "step": 16356 + }, + { + "epoch": 1.320071019288193, + "grad_norm": 0.7268216609954834, + "learning_rate": 1.610429428305914e-05, + "loss": 2.4227, + "step": 16357 + }, + { + "epoch": 1.3201517230247761, + "grad_norm": 0.6665107011795044, + "learning_rate": 1.6095704136262668e-05, + "loss": 2.3694, + "step": 16358 + }, + { + "epoch": 1.320232426761359, + "grad_norm": 0.6832399368286133, + "learning_rate": 1.60871160805806e-05, + "loss": 2.4001, + "step": 16359 + }, + { + "epoch": 1.320313130497942, + "grad_norm": 0.6788592338562012, + "learning_rate": 1.6078530116226897e-05, + "loss": 2.4294, + "step": 16360 + }, + { + "epoch": 1.3203938342345252, + "grad_norm": 0.7147449254989624, + "learning_rate": 1.6069946243415625e-05, + "loss": 2.3904, + "step": 16361 + }, + { + "epoch": 1.320474537971108, + "grad_norm": 0.7014418840408325, + "learning_rate": 1.6061364462360683e-05, + "loss": 2.4026, + "step": 16362 + }, + { + "epoch": 1.3205552417076911, + "grad_norm": 0.6867612600326538, + "learning_rate": 1.6052784773275987e-05, + "loss": 2.4092, + "step": 16363 + }, + { + "epoch": 1.3206359454442742, + "grad_norm": 0.6588961482048035, + "learning_rate": 1.6044207176375303e-05, + "loss": 2.4588, + "step": 16364 + }, + { + "epoch": 1.320716649180857, + "grad_norm": 0.688671350479126, + "learning_rate": 1.6035631671872444e-05, + "loss": 2.3957, + "step": 16365 + }, + { + "epoch": 1.3207973529174402, + "grad_norm": 0.7548064589500427, + "learning_rate": 1.6027058259981154e-05, + "loss": 2.4168, + "step": 16366 + }, + { + "epoch": 1.320878056654023, + "grad_norm": 0.7251972556114197, + "learning_rate": 1.6018486940915044e-05, + "loss": 2.4704, + "step": 16367 + }, + { + "epoch": 1.3209587603906061, + "grad_norm": 0.73149174451828, + "learning_rate": 1.6009917714887778e-05, + "loss": 2.4597, + "step": 16368 + }, + { + "epoch": 1.321039464127189, + "grad_norm": 0.6741003394126892, + "learning_rate": 1.600135058211294e-05, + "loss": 2.3876, + "step": 16369 + }, + { + "epoch": 1.321120167863772, + "grad_norm": 0.6891310214996338, + "learning_rate": 1.5992785542804e-05, + "loss": 2.4229, + "step": 16370 + }, + { + "epoch": 1.3212008716003552, + "grad_norm": 0.7529458403587341, + "learning_rate": 1.5984222597174415e-05, + "loss": 2.45, + "step": 16371 + }, + { + "epoch": 1.321281575336938, + "grad_norm": 0.708134651184082, + "learning_rate": 1.5975661745437664e-05, + "loss": 2.454, + "step": 16372 + }, + { + "epoch": 1.321362279073521, + "grad_norm": 0.7511130571365356, + "learning_rate": 1.596710298780705e-05, + "loss": 2.4201, + "step": 16373 + }, + { + "epoch": 1.3214429828101042, + "grad_norm": 0.6599537134170532, + "learning_rate": 1.595854632449588e-05, + "loss": 2.3982, + "step": 16374 + }, + { + "epoch": 1.321523686546687, + "grad_norm": 0.6821228861808777, + "learning_rate": 1.5949991755717453e-05, + "loss": 2.4525, + "step": 16375 + }, + { + "epoch": 1.3216043902832701, + "grad_norm": 0.6872302293777466, + "learning_rate": 1.5941439281684923e-05, + "loss": 2.3631, + "step": 16376 + }, + { + "epoch": 1.3216850940198532, + "grad_norm": 0.6650066375732422, + "learning_rate": 1.5932888902611453e-05, + "loss": 2.3718, + "step": 16377 + }, + { + "epoch": 1.321765797756436, + "grad_norm": 0.6620016694068909, + "learning_rate": 1.5924340618710143e-05, + "loss": 2.4076, + "step": 16378 + }, + { + "epoch": 1.3218465014930192, + "grad_norm": 0.694807231426239, + "learning_rate": 1.5915794430194066e-05, + "loss": 2.4369, + "step": 16379 + }, + { + "epoch": 1.321927205229602, + "grad_norm": 0.6810131669044495, + "learning_rate": 1.590725033727616e-05, + "loss": 2.4151, + "step": 16380 + }, + { + "epoch": 1.3220079089661851, + "grad_norm": 0.768846333026886, + "learning_rate": 1.58987083401694e-05, + "loss": 2.4991, + "step": 16381 + }, + { + "epoch": 1.322088612702768, + "grad_norm": 0.6581698656082153, + "learning_rate": 1.5890168439086672e-05, + "loss": 2.4263, + "step": 16382 + }, + { + "epoch": 1.322169316439351, + "grad_norm": 0.7267034649848938, + "learning_rate": 1.5881630634240818e-05, + "loss": 2.4219, + "step": 16383 + }, + { + "epoch": 1.3222500201759342, + "grad_norm": 0.7391555905342102, + "learning_rate": 1.5873094925844612e-05, + "loss": 2.427, + "step": 16384 + }, + { + "epoch": 1.322330723912517, + "grad_norm": 0.6612021923065186, + "learning_rate": 1.5864561314110815e-05, + "loss": 2.4108, + "step": 16385 + }, + { + "epoch": 1.3224114276491001, + "grad_norm": 0.7118437886238098, + "learning_rate": 1.585602979925206e-05, + "loss": 2.3839, + "step": 16386 + }, + { + "epoch": 1.3224921313856832, + "grad_norm": 0.6663616299629211, + "learning_rate": 1.5847500381480997e-05, + "loss": 2.4302, + "step": 16387 + }, + { + "epoch": 1.322572835122266, + "grad_norm": 0.6848715543746948, + "learning_rate": 1.583897306101022e-05, + "loss": 2.4228, + "step": 16388 + }, + { + "epoch": 1.3226535388588492, + "grad_norm": 0.680895209312439, + "learning_rate": 1.5830447838052208e-05, + "loss": 2.4457, + "step": 16389 + }, + { + "epoch": 1.3227342425954323, + "grad_norm": 0.683276891708374, + "learning_rate": 1.582192471281946e-05, + "loss": 2.4412, + "step": 16390 + }, + { + "epoch": 1.3228149463320151, + "grad_norm": 0.7311880588531494, + "learning_rate": 1.5813403685524396e-05, + "loss": 2.4604, + "step": 16391 + }, + { + "epoch": 1.3228956500685982, + "grad_norm": 0.6769095659255981, + "learning_rate": 1.580488475637937e-05, + "loss": 2.4311, + "step": 16392 + }, + { + "epoch": 1.3229763538051813, + "grad_norm": 0.6683096289634705, + "learning_rate": 1.579636792559671e-05, + "loss": 2.445, + "step": 16393 + }, + { + "epoch": 1.3230570575417642, + "grad_norm": 0.7268782258033752, + "learning_rate": 1.5787853193388667e-05, + "loss": 2.4176, + "step": 16394 + }, + { + "epoch": 1.3231377612783473, + "grad_norm": 0.6878541707992554, + "learning_rate": 1.5779340559967494e-05, + "loss": 2.4615, + "step": 16395 + }, + { + "epoch": 1.3232184650149301, + "grad_norm": 0.7031291127204895, + "learning_rate": 1.577083002554527e-05, + "loss": 2.3726, + "step": 16396 + }, + { + "epoch": 1.3232991687515132, + "grad_norm": 0.7738708853721619, + "learning_rate": 1.5762321590334138e-05, + "loss": 2.5046, + "step": 16397 + }, + { + "epoch": 1.323379872488096, + "grad_norm": 0.6660913228988647, + "learning_rate": 1.575381525454619e-05, + "loss": 2.3759, + "step": 16398 + }, + { + "epoch": 1.3234605762246792, + "grad_norm": 0.6534021496772766, + "learning_rate": 1.574531101839335e-05, + "loss": 2.3983, + "step": 16399 + }, + { + "epoch": 1.3235412799612623, + "grad_norm": 0.6645511388778687, + "learning_rate": 1.5736808882087606e-05, + "loss": 2.3958, + "step": 16400 + }, + { + "epoch": 1.3236219836978451, + "grad_norm": 0.6723225712776184, + "learning_rate": 1.5728308845840855e-05, + "loss": 2.4248, + "step": 16401 + }, + { + "epoch": 1.3237026874344282, + "grad_norm": 0.6609976887702942, + "learning_rate": 1.5719810909864942e-05, + "loss": 2.3888, + "step": 16402 + }, + { + "epoch": 1.3237833911710113, + "grad_norm": 0.6713845729827881, + "learning_rate": 1.5711315074371635e-05, + "loss": 2.4474, + "step": 16403 + }, + { + "epoch": 1.3238640949075942, + "grad_norm": 0.701438307762146, + "learning_rate": 1.5702821339572726e-05, + "loss": 2.4673, + "step": 16404 + }, + { + "epoch": 1.3239447986441772, + "grad_norm": 0.7235428094863892, + "learning_rate": 1.5694329705679834e-05, + "loss": 2.3825, + "step": 16405 + }, + { + "epoch": 1.3240255023807603, + "grad_norm": 0.6785053610801697, + "learning_rate": 1.568584017290462e-05, + "loss": 2.4668, + "step": 16406 + }, + { + "epoch": 1.3241062061173432, + "grad_norm": 0.6918929815292358, + "learning_rate": 1.5677352741458705e-05, + "loss": 2.4329, + "step": 16407 + }, + { + "epoch": 1.3241869098539263, + "grad_norm": 0.7194826006889343, + "learning_rate": 1.5668867411553544e-05, + "loss": 2.3717, + "step": 16408 + }, + { + "epoch": 1.3242676135905094, + "grad_norm": 0.7299134731292725, + "learning_rate": 1.5660384183400658e-05, + "loss": 2.4695, + "step": 16409 + }, + { + "epoch": 1.3243483173270922, + "grad_norm": 0.7047600746154785, + "learning_rate": 1.565190305721147e-05, + "loss": 2.4525, + "step": 16410 + }, + { + "epoch": 1.3244290210636753, + "grad_norm": 0.685001015663147, + "learning_rate": 1.5643424033197328e-05, + "loss": 2.322, + "step": 16411 + }, + { + "epoch": 1.3245097248002582, + "grad_norm": 0.7696635127067566, + "learning_rate": 1.5634947111569588e-05, + "loss": 2.4464, + "step": 16412 + }, + { + "epoch": 1.3245904285368413, + "grad_norm": 0.7066066265106201, + "learning_rate": 1.5626472292539485e-05, + "loss": 2.4315, + "step": 16413 + }, + { + "epoch": 1.3246711322734241, + "grad_norm": 0.6553033590316772, + "learning_rate": 1.5617999576318276e-05, + "loss": 2.4296, + "step": 16414 + }, + { + "epoch": 1.3247518360100072, + "grad_norm": 0.7031354308128357, + "learning_rate": 1.560952896311707e-05, + "loss": 2.4565, + "step": 16415 + }, + { + "epoch": 1.3248325397465903, + "grad_norm": 0.7826353311538696, + "learning_rate": 1.560106045314701e-05, + "loss": 2.4275, + "step": 16416 + }, + { + "epoch": 1.3249132434831732, + "grad_norm": 0.6408981084823608, + "learning_rate": 1.559259404661916e-05, + "loss": 2.3869, + "step": 16417 + }, + { + "epoch": 1.3249939472197563, + "grad_norm": 0.7487547993659973, + "learning_rate": 1.558412974374448e-05, + "loss": 2.3678, + "step": 16418 + }, + { + "epoch": 1.3250746509563394, + "grad_norm": 0.7163991332054138, + "learning_rate": 1.5575667544733963e-05, + "loss": 2.397, + "step": 16419 + }, + { + "epoch": 1.3251553546929222, + "grad_norm": 0.6933553814888, + "learning_rate": 1.5567207449798515e-05, + "loss": 2.424, + "step": 16420 + }, + { + "epoch": 1.3252360584295053, + "grad_norm": 0.687406063079834, + "learning_rate": 1.5558749459148945e-05, + "loss": 2.4346, + "step": 16421 + }, + { + "epoch": 1.3253167621660884, + "grad_norm": 0.6781243681907654, + "learning_rate": 1.5550293572996054e-05, + "loss": 2.4526, + "step": 16422 + }, + { + "epoch": 1.3253974659026713, + "grad_norm": 0.6632506847381592, + "learning_rate": 1.5541839791550616e-05, + "loss": 2.4559, + "step": 16423 + }, + { + "epoch": 1.3254781696392544, + "grad_norm": 0.668396532535553, + "learning_rate": 1.5533388115023327e-05, + "loss": 2.4463, + "step": 16424 + }, + { + "epoch": 1.3255588733758372, + "grad_norm": 0.6853309869766235, + "learning_rate": 1.552493854362479e-05, + "loss": 2.429, + "step": 16425 + }, + { + "epoch": 1.3256395771124203, + "grad_norm": 0.7443413138389587, + "learning_rate": 1.5516491077565597e-05, + "loss": 2.4091, + "step": 16426 + }, + { + "epoch": 1.3257202808490032, + "grad_norm": 0.690170168876648, + "learning_rate": 1.550804571705632e-05, + "loss": 2.3942, + "step": 16427 + }, + { + "epoch": 1.3258009845855863, + "grad_norm": NaN, + "learning_rate": 1.550804571705632e-05, + "loss": 2.3788, + "step": 16428 + }, + { + "epoch": 1.3258816883221693, + "grad_norm": 0.6901132464408875, + "learning_rate": 1.5499602462307373e-05, + "loss": 2.3859, + "step": 16429 + }, + { + "epoch": 1.3259623920587522, + "grad_norm": 0.6639334559440613, + "learning_rate": 1.5491161313529223e-05, + "loss": 2.4271, + "step": 16430 + }, + { + "epoch": 1.3260430957953353, + "grad_norm": 0.7121936678886414, + "learning_rate": 1.548272227093227e-05, + "loss": 2.3818, + "step": 16431 + }, + { + "epoch": 1.3261237995319184, + "grad_norm": 0.6863218545913696, + "learning_rate": 1.5474285334726778e-05, + "loss": 2.3744, + "step": 16432 + }, + { + "epoch": 1.3262045032685013, + "grad_norm": 0.6697081327438354, + "learning_rate": 1.5465850505123057e-05, + "loss": 2.4001, + "step": 16433 + }, + { + "epoch": 1.3262852070050843, + "grad_norm": 0.7258912324905396, + "learning_rate": 1.5457417782331308e-05, + "loss": 2.4556, + "step": 16434 + }, + { + "epoch": 1.3263659107416674, + "grad_norm": 0.6930057406425476, + "learning_rate": 1.5448987166561712e-05, + "loss": 2.4979, + "step": 16435 + }, + { + "epoch": 1.3264466144782503, + "grad_norm": 0.6475574970245361, + "learning_rate": 1.5440558658024363e-05, + "loss": 2.3821, + "step": 16436 + }, + { + "epoch": 1.3265273182148334, + "grad_norm": 0.7489237785339355, + "learning_rate": 1.5432132256929367e-05, + "loss": 2.465, + "step": 16437 + }, + { + "epoch": 1.3266080219514165, + "grad_norm": 0.704391360282898, + "learning_rate": 1.5423707963486667e-05, + "loss": 2.433, + "step": 16438 + }, + { + "epoch": 1.3266887256879993, + "grad_norm": 0.669452965259552, + "learning_rate": 1.5415285777906253e-05, + "loss": 2.3981, + "step": 16439 + }, + { + "epoch": 1.3267694294245824, + "grad_norm": 0.6961604356765747, + "learning_rate": 1.540686570039802e-05, + "loss": 2.4684, + "step": 16440 + }, + { + "epoch": 1.3268501331611653, + "grad_norm": 0.6613924503326416, + "learning_rate": 1.539844773117185e-05, + "loss": 2.3711, + "step": 16441 + }, + { + "epoch": 1.3269308368977484, + "grad_norm": 0.7019763588905334, + "learning_rate": 1.5390031870437492e-05, + "loss": 2.3716, + "step": 16442 + }, + { + "epoch": 1.3270115406343312, + "grad_norm": 0.700176477432251, + "learning_rate": 1.5381618118404707e-05, + "loss": 2.4305, + "step": 16443 + }, + { + "epoch": 1.3270922443709143, + "grad_norm": 0.6716598272323608, + "learning_rate": 1.5373206475283197e-05, + "loss": 2.3835, + "step": 16444 + }, + { + "epoch": 1.3271729481074974, + "grad_norm": 0.6449697017669678, + "learning_rate": 1.53647969412826e-05, + "loss": 2.3707, + "step": 16445 + }, + { + "epoch": 1.3272536518440803, + "grad_norm": 0.7276685237884521, + "learning_rate": 1.535638951661249e-05, + "loss": 2.4313, + "step": 16446 + }, + { + "epoch": 1.3273343555806634, + "grad_norm": 0.7144705057144165, + "learning_rate": 1.5347984201482456e-05, + "loss": 2.4122, + "step": 16447 + }, + { + "epoch": 1.3274150593172465, + "grad_norm": 0.660225510597229, + "learning_rate": 1.53395809961019e-05, + "loss": 2.4282, + "step": 16448 + }, + { + "epoch": 1.3274957630538293, + "grad_norm": 0.7431676983833313, + "learning_rate": 1.5331179900680293e-05, + "loss": 2.3863, + "step": 16449 + }, + { + "epoch": 1.3275764667904124, + "grad_norm": 0.6670290231704712, + "learning_rate": 1.5322780915427036e-05, + "loss": 2.4266, + "step": 16450 + }, + { + "epoch": 1.3276571705269955, + "grad_norm": 0.711098313331604, + "learning_rate": 1.531438404055141e-05, + "loss": 2.4431, + "step": 16451 + }, + { + "epoch": 1.3277378742635784, + "grad_norm": 0.6908091902732849, + "learning_rate": 1.5305989276262688e-05, + "loss": 2.4153, + "step": 16452 + }, + { + "epoch": 1.3278185780001615, + "grad_norm": 0.7458107471466064, + "learning_rate": 1.5297596622770115e-05, + "loss": 2.4076, + "step": 16453 + }, + { + "epoch": 1.3278992817367445, + "grad_norm": 0.7406951189041138, + "learning_rate": 1.528920608028285e-05, + "loss": 2.3585, + "step": 16454 + }, + { + "epoch": 1.3279799854733274, + "grad_norm": 0.718824565410614, + "learning_rate": 1.5280817649010005e-05, + "loss": 2.4092, + "step": 16455 + }, + { + "epoch": 1.3280606892099105, + "grad_norm": 0.7163959741592407, + "learning_rate": 1.527243132916064e-05, + "loss": 2.4344, + "step": 16456 + }, + { + "epoch": 1.3281413929464934, + "grad_norm": 0.6695916652679443, + "learning_rate": 1.5264047120943793e-05, + "loss": 2.4144, + "step": 16457 + }, + { + "epoch": 1.3282220966830764, + "grad_norm": 0.6858509182929993, + "learning_rate": 1.5255665024568366e-05, + "loss": 2.4345, + "step": 16458 + }, + { + "epoch": 1.3283028004196593, + "grad_norm": 0.7277235388755798, + "learning_rate": 1.5247285040243297e-05, + "loss": 2.4219, + "step": 16459 + }, + { + "epoch": 1.3283835041562424, + "grad_norm": 0.6481949090957642, + "learning_rate": 1.5238907168177441e-05, + "loss": 2.4483, + "step": 16460 + }, + { + "epoch": 1.3284642078928255, + "grad_norm": 0.6956833600997925, + "learning_rate": 1.5230531408579574e-05, + "loss": 2.4241, + "step": 16461 + }, + { + "epoch": 1.3285449116294084, + "grad_norm": 0.7266185879707336, + "learning_rate": 1.522215776165845e-05, + "loss": 2.4577, + "step": 16462 + }, + { + "epoch": 1.3286256153659914, + "grad_norm": 0.725574254989624, + "learning_rate": 1.5213786227622773e-05, + "loss": 2.4451, + "step": 16463 + }, + { + "epoch": 1.3287063191025745, + "grad_norm": 0.7550850510597229, + "learning_rate": 1.5205416806681172e-05, + "loss": 2.4262, + "step": 16464 + }, + { + "epoch": 1.3287870228391574, + "grad_norm": 0.6391028761863708, + "learning_rate": 1.5197049499042237e-05, + "loss": 2.4116, + "step": 16465 + }, + { + "epoch": 1.3288677265757405, + "grad_norm": 0.6899027824401855, + "learning_rate": 1.5188684304914524e-05, + "loss": 2.3754, + "step": 16466 + }, + { + "epoch": 1.3289484303123236, + "grad_norm": 0.696681022644043, + "learning_rate": 1.518032122450649e-05, + "loss": 2.471, + "step": 16467 + }, + { + "epoch": 1.3290291340489064, + "grad_norm": 0.7090939283370972, + "learning_rate": 1.5171960258026551e-05, + "loss": 2.4153, + "step": 16468 + }, + { + "epoch": 1.3291098377854895, + "grad_norm": 0.7125746607780457, + "learning_rate": 1.5163601405683148e-05, + "loss": 2.4102, + "step": 16469 + }, + { + "epoch": 1.3291905415220726, + "grad_norm": 0.7407518029212952, + "learning_rate": 1.5155244667684531e-05, + "loss": 2.429, + "step": 16470 + }, + { + "epoch": 1.3292712452586555, + "grad_norm": 0.7401885390281677, + "learning_rate": 1.5146890044239004e-05, + "loss": 2.4577, + "step": 16471 + }, + { + "epoch": 1.3293519489952383, + "grad_norm": 0.7625757455825806, + "learning_rate": 1.5138537535554786e-05, + "loss": 2.3813, + "step": 16472 + }, + { + "epoch": 1.3294326527318214, + "grad_norm": 0.7423396706581116, + "learning_rate": 1.5130187141840057e-05, + "loss": 2.3797, + "step": 16473 + }, + { + "epoch": 1.3295133564684045, + "grad_norm": 0.7029228806495667, + "learning_rate": 1.5121838863302884e-05, + "loss": 2.4203, + "step": 16474 + }, + { + "epoch": 1.3295940602049874, + "grad_norm": 0.8062863349914551, + "learning_rate": 1.5113492700151378e-05, + "loss": 2.3743, + "step": 16475 + }, + { + "epoch": 1.3296747639415705, + "grad_norm": 0.7113343477249146, + "learning_rate": 1.5105148652593548e-05, + "loss": 2.3837, + "step": 16476 + }, + { + "epoch": 1.3297554676781536, + "grad_norm": 0.6733126044273376, + "learning_rate": 1.5096806720837309e-05, + "loss": 2.4677, + "step": 16477 + }, + { + "epoch": 1.3298361714147364, + "grad_norm": 0.6936657428741455, + "learning_rate": 1.5088466905090593e-05, + "loss": 2.3677, + "step": 16478 + }, + { + "epoch": 1.3299168751513195, + "grad_norm": 0.746746301651001, + "learning_rate": 1.5080129205561255e-05, + "loss": 2.423, + "step": 16479 + }, + { + "epoch": 1.3299975788879026, + "grad_norm": 0.6879116296768188, + "learning_rate": 1.5071793622457065e-05, + "loss": 2.4867, + "step": 16480 + }, + { + "epoch": 1.3300782826244855, + "grad_norm": 0.6841214299201965, + "learning_rate": 1.5063460155985776e-05, + "loss": 2.5015, + "step": 16481 + }, + { + "epoch": 1.3301589863610686, + "grad_norm": 0.6955111622810364, + "learning_rate": 1.5055128806355123e-05, + "loss": 2.3975, + "step": 16482 + }, + { + "epoch": 1.3302396900976516, + "grad_norm": 0.7084987163543701, + "learning_rate": 1.5046799573772673e-05, + "loss": 2.4511, + "step": 16483 + }, + { + "epoch": 1.3303203938342345, + "grad_norm": 0.6905840039253235, + "learning_rate": 1.5038472458446051e-05, + "loss": 2.3542, + "step": 16484 + }, + { + "epoch": 1.3304010975708176, + "grad_norm": 0.7182672023773193, + "learning_rate": 1.5030147460582788e-05, + "loss": 2.3673, + "step": 16485 + }, + { + "epoch": 1.3304818013074005, + "grad_norm": 0.6805183291435242, + "learning_rate": 1.5021824580390353e-05, + "loss": 2.3751, + "step": 16486 + }, + { + "epoch": 1.3305625050439835, + "grad_norm": 0.6278836727142334, + "learning_rate": 1.5013503818076202e-05, + "loss": 2.3508, + "step": 16487 + }, + { + "epoch": 1.3306432087805664, + "grad_norm": 0.664000391960144, + "learning_rate": 1.500518517384768e-05, + "loss": 2.4039, + "step": 16488 + }, + { + "epoch": 1.3307239125171495, + "grad_norm": 0.6906681060791016, + "learning_rate": 1.4996868647912155e-05, + "loss": 2.4068, + "step": 16489 + }, + { + "epoch": 1.3308046162537326, + "grad_norm": 0.6756102442741394, + "learning_rate": 1.4988554240476826e-05, + "loss": 2.4423, + "step": 16490 + }, + { + "epoch": 1.3308853199903155, + "grad_norm": 0.7013095021247864, + "learning_rate": 1.4980241951748964e-05, + "loss": 2.3536, + "step": 16491 + }, + { + "epoch": 1.3309660237268985, + "grad_norm": 0.6689851880073547, + "learning_rate": 1.4971931781935732e-05, + "loss": 2.4192, + "step": 16492 + }, + { + "epoch": 1.3310467274634816, + "grad_norm": 0.6411572694778442, + "learning_rate": 1.4963623731244202e-05, + "loss": 2.4012, + "step": 16493 + }, + { + "epoch": 1.3311274312000645, + "grad_norm": 0.7209812998771667, + "learning_rate": 1.4955317799881453e-05, + "loss": 2.378, + "step": 16494 + }, + { + "epoch": 1.3312081349366476, + "grad_norm": 0.7041119933128357, + "learning_rate": 1.4947013988054504e-05, + "loss": 2.4047, + "step": 16495 + }, + { + "epoch": 1.3312888386732307, + "grad_norm": 0.6928852796554565, + "learning_rate": 1.4938712295970292e-05, + "loss": 2.4489, + "step": 16496 + }, + { + "epoch": 1.3313695424098135, + "grad_norm": 0.6923524141311646, + "learning_rate": 1.4930412723835718e-05, + "loss": 2.3752, + "step": 16497 + }, + { + "epoch": 1.3314502461463966, + "grad_norm": 0.7034686803817749, + "learning_rate": 1.4922115271857662e-05, + "loss": 2.3898, + "step": 16498 + }, + { + "epoch": 1.3315309498829797, + "grad_norm": 0.6717320084571838, + "learning_rate": 1.4913819940242856e-05, + "loss": 2.3629, + "step": 16499 + }, + { + "epoch": 1.3316116536195626, + "grad_norm": 0.6885079741477966, + "learning_rate": 1.4905526729198083e-05, + "loss": 2.4321, + "step": 16500 + }, + { + "epoch": 1.3316923573561457, + "grad_norm": 0.662452757358551, + "learning_rate": 1.489723563893004e-05, + "loss": 2.4532, + "step": 16501 + }, + { + "epoch": 1.3317730610927285, + "grad_norm": 0.6650903224945068, + "learning_rate": 1.4888946669645332e-05, + "loss": 2.4347, + "step": 16502 + }, + { + "epoch": 1.3318537648293116, + "grad_norm": 0.7217590808868408, + "learning_rate": 1.4880659821550546e-05, + "loss": 2.4641, + "step": 16503 + }, + { + "epoch": 1.3319344685658945, + "grad_norm": 0.7063763737678528, + "learning_rate": 1.4872375094852232e-05, + "loss": 2.4365, + "step": 16504 + }, + { + "epoch": 1.3320151723024776, + "grad_norm": 0.7366454005241394, + "learning_rate": 1.4864092489756853e-05, + "loss": 2.4223, + "step": 16505 + }, + { + "epoch": 1.3320958760390607, + "grad_norm": 0.7132206559181213, + "learning_rate": 1.4855812006470838e-05, + "loss": 2.4404, + "step": 16506 + }, + { + "epoch": 1.3321765797756435, + "grad_norm": 0.665553867816925, + "learning_rate": 1.484753364520055e-05, + "loss": 2.3818, + "step": 16507 + }, + { + "epoch": 1.3322572835122266, + "grad_norm": 0.7854028344154358, + "learning_rate": 1.483925740615234e-05, + "loss": 2.4111, + "step": 16508 + }, + { + "epoch": 1.3323379872488097, + "grad_norm": 0.7331317663192749, + "learning_rate": 1.4830983289532418e-05, + "loss": 2.4446, + "step": 16509 + }, + { + "epoch": 1.3324186909853926, + "grad_norm": 0.670315146446228, + "learning_rate": 1.4822711295547042e-05, + "loss": 2.4017, + "step": 16510 + }, + { + "epoch": 1.3324993947219756, + "grad_norm": 0.7242144346237183, + "learning_rate": 1.481444142440237e-05, + "loss": 2.4281, + "step": 16511 + }, + { + "epoch": 1.3325800984585587, + "grad_norm": 0.7108538746833801, + "learning_rate": 1.4806173676304468e-05, + "loss": 2.4331, + "step": 16512 + }, + { + "epoch": 1.3326608021951416, + "grad_norm": 0.658989667892456, + "learning_rate": 1.479790805145943e-05, + "loss": 2.4321, + "step": 16513 + }, + { + "epoch": 1.3327415059317247, + "grad_norm": 0.6596404314041138, + "learning_rate": 1.4789644550073233e-05, + "loss": 2.3817, + "step": 16514 + }, + { + "epoch": 1.3328222096683078, + "grad_norm": 0.6922028064727783, + "learning_rate": 1.4781383172351837e-05, + "loss": 2.399, + "step": 16515 + }, + { + "epoch": 1.3329029134048906, + "grad_norm": 0.750747799873352, + "learning_rate": 1.4773123918501141e-05, + "loss": 2.4502, + "step": 16516 + }, + { + "epoch": 1.3329836171414737, + "grad_norm": 0.6887632608413696, + "learning_rate": 1.4764866788727006e-05, + "loss": 2.3636, + "step": 16517 + }, + { + "epoch": 1.3330643208780566, + "grad_norm": 0.6751166582107544, + "learning_rate": 1.4756611783235163e-05, + "loss": 2.3956, + "step": 16518 + }, + { + "epoch": 1.3331450246146397, + "grad_norm": 0.679040253162384, + "learning_rate": 1.4748358902231395e-05, + "loss": 2.4044, + "step": 16519 + }, + { + "epoch": 1.3332257283512225, + "grad_norm": 0.6396780610084534, + "learning_rate": 1.4740108145921373e-05, + "loss": 2.4114, + "step": 16520 + }, + { + "epoch": 1.3333064320878056, + "grad_norm": 0.6686230301856995, + "learning_rate": 1.4731859514510738e-05, + "loss": 2.4535, + "step": 16521 + }, + { + "epoch": 1.3333871358243887, + "grad_norm": 0.6693681478500366, + "learning_rate": 1.472361300820505e-05, + "loss": 2.3885, + "step": 16522 + }, + { + "epoch": 1.3334678395609716, + "grad_norm": 0.7700718641281128, + "learning_rate": 1.4715368627209836e-05, + "loss": 2.3939, + "step": 16523 + }, + { + "epoch": 1.3335485432975547, + "grad_norm": 0.7203121781349182, + "learning_rate": 1.4707126371730561e-05, + "loss": 2.4644, + "step": 16524 + }, + { + "epoch": 1.3336292470341378, + "grad_norm": 0.7798308730125427, + "learning_rate": 1.4698886241972665e-05, + "loss": 2.4293, + "step": 16525 + }, + { + "epoch": 1.3337099507707206, + "grad_norm": 0.7017160654067993, + "learning_rate": 1.4690648238141503e-05, + "loss": 2.4327, + "step": 16526 + }, + { + "epoch": 1.3337906545073037, + "grad_norm": 0.6522603631019592, + "learning_rate": 1.468241236044241e-05, + "loss": 2.3955, + "step": 16527 + }, + { + "epoch": 1.3338713582438868, + "grad_norm": 0.766222357749939, + "learning_rate": 1.4674178609080602e-05, + "loss": 2.4652, + "step": 16528 + }, + { + "epoch": 1.3339520619804697, + "grad_norm": 0.7351565361022949, + "learning_rate": 1.4665946984261303e-05, + "loss": 2.4607, + "step": 16529 + }, + { + "epoch": 1.3340327657170528, + "grad_norm": 0.6817728281021118, + "learning_rate": 1.4657717486189693e-05, + "loss": 2.3687, + "step": 16530 + }, + { + "epoch": 1.3341134694536356, + "grad_norm": 0.7401643395423889, + "learning_rate": 1.464949011507083e-05, + "loss": 2.4179, + "step": 16531 + }, + { + "epoch": 1.3341941731902187, + "grad_norm": 0.7783530354499817, + "learning_rate": 1.4641264871109784e-05, + "loss": 2.4088, + "step": 16532 + }, + { + "epoch": 1.3342748769268016, + "grad_norm": 0.6761943697929382, + "learning_rate": 1.4633041754511534e-05, + "loss": 2.4141, + "step": 16533 + }, + { + "epoch": 1.3343555806633847, + "grad_norm": 0.6842260360717773, + "learning_rate": 1.4624820765481073e-05, + "loss": 2.4918, + "step": 16534 + }, + { + "epoch": 1.3344362843999678, + "grad_norm": 0.6906094551086426, + "learning_rate": 1.4616601904223225e-05, + "loss": 2.4576, + "step": 16535 + }, + { + "epoch": 1.3345169881365506, + "grad_norm": 0.6549125909805298, + "learning_rate": 1.4608385170942829e-05, + "loss": 2.3748, + "step": 16536 + }, + { + "epoch": 1.3345976918731337, + "grad_norm": 0.6603896617889404, + "learning_rate": 1.4600170565844728e-05, + "loss": 2.3739, + "step": 16537 + }, + { + "epoch": 1.3346783956097168, + "grad_norm": 0.6413096189498901, + "learning_rate": 1.4591958089133606e-05, + "loss": 2.3979, + "step": 16538 + }, + { + "epoch": 1.3347590993462997, + "grad_norm": 0.7085204720497131, + "learning_rate": 1.4583747741014142e-05, + "loss": 2.4185, + "step": 16539 + }, + { + "epoch": 1.3348398030828827, + "grad_norm": 0.6517937183380127, + "learning_rate": 1.4575539521690983e-05, + "loss": 2.3938, + "step": 16540 + }, + { + "epoch": 1.3349205068194658, + "grad_norm": 0.6326449513435364, + "learning_rate": 1.4567333431368658e-05, + "loss": 2.4613, + "step": 16541 + }, + { + "epoch": 1.3350012105560487, + "grad_norm": 0.8046317100524902, + "learning_rate": 1.4559129470251708e-05, + "loss": 2.4547, + "step": 16542 + }, + { + "epoch": 1.3350819142926318, + "grad_norm": 0.6661570072174072, + "learning_rate": 1.455092763854462e-05, + "loss": 2.3636, + "step": 16543 + }, + { + "epoch": 1.3351626180292149, + "grad_norm": 0.6806541085243225, + "learning_rate": 1.454272793645176e-05, + "loss": 2.4309, + "step": 16544 + }, + { + "epoch": 1.3352433217657977, + "grad_norm": 0.651836097240448, + "learning_rate": 1.45345303641775e-05, + "loss": 2.3862, + "step": 16545 + }, + { + "epoch": 1.3353240255023808, + "grad_norm": 0.7448983192443848, + "learning_rate": 1.4526334921926165e-05, + "loss": 2.4654, + "step": 16546 + }, + { + "epoch": 1.3354047292389637, + "grad_norm": 0.6885285973548889, + "learning_rate": 1.4518141609901992e-05, + "loss": 2.3943, + "step": 16547 + }, + { + "epoch": 1.3354854329755468, + "grad_norm": 0.7204004526138306, + "learning_rate": 1.450995042830917e-05, + "loss": 2.4117, + "step": 16548 + }, + { + "epoch": 1.3355661367121296, + "grad_norm": 0.6551961898803711, + "learning_rate": 1.4501761377351864e-05, + "loss": 2.4269, + "step": 16549 + }, + { + "epoch": 1.3356468404487127, + "grad_norm": 0.7191253304481506, + "learning_rate": 1.4493574457234182e-05, + "loss": 2.3472, + "step": 16550 + }, + { + "epoch": 1.3357275441852958, + "grad_norm": 0.6793580651283264, + "learning_rate": 1.4485389668160121e-05, + "loss": 2.4264, + "step": 16551 + }, + { + "epoch": 1.3358082479218787, + "grad_norm": 0.704250693321228, + "learning_rate": 1.4477207010333682e-05, + "loss": 2.5236, + "step": 16552 + }, + { + "epoch": 1.3358889516584618, + "grad_norm": 0.6826470494270325, + "learning_rate": 1.4469026483958837e-05, + "loss": 2.4473, + "step": 16553 + }, + { + "epoch": 1.3359696553950449, + "grad_norm": 0.6646167039871216, + "learning_rate": 1.4460848089239399e-05, + "loss": 2.4232, + "step": 16554 + }, + { + "epoch": 1.3360503591316277, + "grad_norm": 0.7604451179504395, + "learning_rate": 1.4452671826379227e-05, + "loss": 2.4208, + "step": 16555 + }, + { + "epoch": 1.3361310628682108, + "grad_norm": 0.7129300236701965, + "learning_rate": 1.4444497695582093e-05, + "loss": 2.4304, + "step": 16556 + }, + { + "epoch": 1.336211766604794, + "grad_norm": 0.6769927740097046, + "learning_rate": 1.4436325697051733e-05, + "loss": 2.3467, + "step": 16557 + }, + { + "epoch": 1.3362924703413768, + "grad_norm": 0.6568608283996582, + "learning_rate": 1.4428155830991797e-05, + "loss": 2.4285, + "step": 16558 + }, + { + "epoch": 1.3363731740779599, + "grad_norm": 0.7687276005744934, + "learning_rate": 1.4419988097605919e-05, + "loss": 2.4815, + "step": 16559 + }, + { + "epoch": 1.336453877814543, + "grad_norm": 0.7001463770866394, + "learning_rate": 1.4411822497097638e-05, + "loss": 2.4629, + "step": 16560 + }, + { + "epoch": 1.3365345815511258, + "grad_norm": 0.7211995720863342, + "learning_rate": 1.4403659029670458e-05, + "loss": 2.4323, + "step": 16561 + }, + { + "epoch": 1.336615285287709, + "grad_norm": 0.7371769547462463, + "learning_rate": 1.439549769552787e-05, + "loss": 2.3962, + "step": 16562 + }, + { + "epoch": 1.3366959890242918, + "grad_norm": 0.7475463151931763, + "learning_rate": 1.4387338494873237e-05, + "loss": 2.3593, + "step": 16563 + }, + { + "epoch": 1.3367766927608749, + "grad_norm": 0.7215834856033325, + "learning_rate": 1.4379181427909916e-05, + "loss": 2.3687, + "step": 16564 + }, + { + "epoch": 1.3368573964974577, + "grad_norm": 0.7160200476646423, + "learning_rate": 1.4371026494841211e-05, + "loss": 2.3652, + "step": 16565 + }, + { + "epoch": 1.3369381002340408, + "grad_norm": 0.6636231541633606, + "learning_rate": 1.436287369587036e-05, + "loss": 2.4628, + "step": 16566 + }, + { + "epoch": 1.337018803970624, + "grad_norm": 0.657774806022644, + "learning_rate": 1.4354723031200556e-05, + "loss": 2.4082, + "step": 16567 + }, + { + "epoch": 1.3370995077072068, + "grad_norm": 0.7020300626754761, + "learning_rate": 1.4346574501034936e-05, + "loss": 2.3821, + "step": 16568 + }, + { + "epoch": 1.3371802114437898, + "grad_norm": 0.6800786256790161, + "learning_rate": 1.4338428105576595e-05, + "loss": 2.3839, + "step": 16569 + }, + { + "epoch": 1.337260915180373, + "grad_norm": 0.7176932692527771, + "learning_rate": 1.4330283845028536e-05, + "loss": 2.4614, + "step": 16570 + }, + { + "epoch": 1.3373416189169558, + "grad_norm": 0.7233355641365051, + "learning_rate": 1.432214171959374e-05, + "loss": 2.4048, + "step": 16571 + }, + { + "epoch": 1.3374223226535389, + "grad_norm": 0.7721874117851257, + "learning_rate": 1.4314001729475157e-05, + "loss": 2.4169, + "step": 16572 + }, + { + "epoch": 1.337503026390122, + "grad_norm": 0.7123380303382874, + "learning_rate": 1.4305863874875613e-05, + "loss": 2.3799, + "step": 16573 + }, + { + "epoch": 1.3375837301267048, + "grad_norm": 0.7297765016555786, + "learning_rate": 1.4297728155997958e-05, + "loss": 2.4655, + "step": 16574 + }, + { + "epoch": 1.337664433863288, + "grad_norm": 0.6806401610374451, + "learning_rate": 1.428959457304493e-05, + "loss": 2.4102, + "step": 16575 + }, + { + "epoch": 1.3377451375998708, + "grad_norm": 0.6811275482177734, + "learning_rate": 1.4281463126219264e-05, + "loss": 2.4298, + "step": 16576 + }, + { + "epoch": 1.3378258413364539, + "grad_norm": 0.6900678277015686, + "learning_rate": 1.427333381572361e-05, + "loss": 2.4745, + "step": 16577 + }, + { + "epoch": 1.3379065450730367, + "grad_norm": 0.7815307974815369, + "learning_rate": 1.4265206641760587e-05, + "loss": 2.3624, + "step": 16578 + }, + { + "epoch": 1.3379872488096198, + "grad_norm": 0.6948800683021545, + "learning_rate": 1.4257081604532708e-05, + "loss": 2.4142, + "step": 16579 + }, + { + "epoch": 1.338067952546203, + "grad_norm": 0.7387657165527344, + "learning_rate": 1.4248958704242488e-05, + "loss": 2.4241, + "step": 16580 + }, + { + "epoch": 1.3381486562827858, + "grad_norm": 0.7158597111701965, + "learning_rate": 1.4240837941092367e-05, + "loss": 2.4473, + "step": 16581 + }, + { + "epoch": 1.3382293600193689, + "grad_norm": 0.758674144744873, + "learning_rate": 1.423271931528477e-05, + "loss": 2.4504, + "step": 16582 + }, + { + "epoch": 1.338310063755952, + "grad_norm": 0.6904417872428894, + "learning_rate": 1.4224602827021982e-05, + "loss": 2.4288, + "step": 16583 + }, + { + "epoch": 1.3383907674925348, + "grad_norm": 0.6988760828971863, + "learning_rate": 1.4216488476506307e-05, + "loss": 2.3874, + "step": 16584 + }, + { + "epoch": 1.338471471229118, + "grad_norm": 0.6969872117042542, + "learning_rate": 1.4208376263940003e-05, + "loss": 2.3388, + "step": 16585 + }, + { + "epoch": 1.338552174965701, + "grad_norm": 0.687179684638977, + "learning_rate": 1.420026618952518e-05, + "loss": 2.431, + "step": 16586 + }, + { + "epoch": 1.3386328787022839, + "grad_norm": 0.6319810152053833, + "learning_rate": 1.4192158253464038e-05, + "loss": 2.4415, + "step": 16587 + }, + { + "epoch": 1.338713582438867, + "grad_norm": 0.7554977536201477, + "learning_rate": 1.4184052455958629e-05, + "loss": 2.3863, + "step": 16588 + }, + { + "epoch": 1.33879428617545, + "grad_norm": 0.7025974988937378, + "learning_rate": 1.4175948797210936e-05, + "loss": 2.3957, + "step": 16589 + }, + { + "epoch": 1.338874989912033, + "grad_norm": 0.7270370721817017, + "learning_rate": 1.4167847277422952e-05, + "loss": 2.4309, + "step": 16590 + }, + { + "epoch": 1.338955693648616, + "grad_norm": 0.7017608284950256, + "learning_rate": 1.4159747896796593e-05, + "loss": 2.4142, + "step": 16591 + }, + { + "epoch": 1.3390363973851989, + "grad_norm": 0.7114055156707764, + "learning_rate": 1.4151650655533687e-05, + "loss": 2.473, + "step": 16592 + }, + { + "epoch": 1.339117101121782, + "grad_norm": 0.6420357823371887, + "learning_rate": 1.4143555553836063e-05, + "loss": 2.3671, + "step": 16593 + }, + { + "epoch": 1.3391978048583648, + "grad_norm": 0.7067350745201111, + "learning_rate": 1.413546259190548e-05, + "loss": 2.4422, + "step": 16594 + }, + { + "epoch": 1.339278508594948, + "grad_norm": 0.7376763224601746, + "learning_rate": 1.4127371769943598e-05, + "loss": 2.4443, + "step": 16595 + }, + { + "epoch": 1.339359212331531, + "grad_norm": 0.646515965461731, + "learning_rate": 1.4119283088152092e-05, + "loss": 2.3949, + "step": 16596 + }, + { + "epoch": 1.3394399160681139, + "grad_norm": 0.6896061301231384, + "learning_rate": 1.411119654673254e-05, + "loss": 2.4535, + "step": 16597 + }, + { + "epoch": 1.339520619804697, + "grad_norm": 0.6992611289024353, + "learning_rate": 1.4103112145886489e-05, + "loss": 2.3983, + "step": 16598 + }, + { + "epoch": 1.33960132354128, + "grad_norm": 0.7176348567008972, + "learning_rate": 1.4095029885815426e-05, + "loss": 2.4671, + "step": 16599 + }, + { + "epoch": 1.339682027277863, + "grad_norm": 0.6635856628417969, + "learning_rate": 1.4086949766720759e-05, + "loss": 2.4235, + "step": 16600 + }, + { + "epoch": 1.339762731014446, + "grad_norm": 0.673332154750824, + "learning_rate": 1.4078871788803915e-05, + "loss": 2.4328, + "step": 16601 + }, + { + "epoch": 1.339843434751029, + "grad_norm": 0.6738821864128113, + "learning_rate": 1.407079595226617e-05, + "loss": 2.4786, + "step": 16602 + }, + { + "epoch": 1.339924138487612, + "grad_norm": 0.690605103969574, + "learning_rate": 1.4062722257308803e-05, + "loss": 2.4025, + "step": 16603 + }, + { + "epoch": 1.340004842224195, + "grad_norm": 0.7186758518218994, + "learning_rate": 1.4054650704133066e-05, + "loss": 2.4793, + "step": 16604 + }, + { + "epoch": 1.3400855459607781, + "grad_norm": 0.6484951376914978, + "learning_rate": 1.4046581292940075e-05, + "loss": 2.3855, + "step": 16605 + }, + { + "epoch": 1.340166249697361, + "grad_norm": 0.6993771195411682, + "learning_rate": 1.403851402393096e-05, + "loss": 2.3872, + "step": 16606 + }, + { + "epoch": 1.340246953433944, + "grad_norm": 0.7446531653404236, + "learning_rate": 1.403044889730678e-05, + "loss": 2.4253, + "step": 16607 + }, + { + "epoch": 1.340327657170527, + "grad_norm": 0.6873160004615784, + "learning_rate": 1.4022385913268542e-05, + "loss": 2.464, + "step": 16608 + }, + { + "epoch": 1.34040836090711, + "grad_norm": 0.6570948362350464, + "learning_rate": 1.4014325072017198e-05, + "loss": 2.4063, + "step": 16609 + }, + { + "epoch": 1.3404890646436929, + "grad_norm": 0.7209224104881287, + "learning_rate": 1.4006266373753651e-05, + "loss": 2.4827, + "step": 16610 + }, + { + "epoch": 1.340569768380276, + "grad_norm": 0.7283413410186768, + "learning_rate": 1.3998209818678732e-05, + "loss": 2.4009, + "step": 16611 + }, + { + "epoch": 1.340650472116859, + "grad_norm": 0.6650960445404053, + "learning_rate": 1.3990155406993221e-05, + "loss": 2.3576, + "step": 16612 + }, + { + "epoch": 1.340731175853442, + "grad_norm": 0.6857860088348389, + "learning_rate": 1.3982103138897873e-05, + "loss": 2.4686, + "step": 16613 + }, + { + "epoch": 1.340811879590025, + "grad_norm": 0.7065873146057129, + "learning_rate": 1.3974053014593402e-05, + "loss": 2.3999, + "step": 16614 + }, + { + "epoch": 1.340892583326608, + "grad_norm": 0.8093010783195496, + "learning_rate": 1.3966005034280372e-05, + "loss": 2.4273, + "step": 16615 + }, + { + "epoch": 1.340973287063191, + "grad_norm": 0.649132251739502, + "learning_rate": 1.3957959198159387e-05, + "loss": 2.3418, + "step": 16616 + }, + { + "epoch": 1.341053990799774, + "grad_norm": 0.7114978432655334, + "learning_rate": 1.3949915506430976e-05, + "loss": 2.4393, + "step": 16617 + }, + { + "epoch": 1.3411346945363571, + "grad_norm": 0.7989282608032227, + "learning_rate": 1.3941873959295615e-05, + "loss": 2.4044, + "step": 16618 + }, + { + "epoch": 1.34121539827294, + "grad_norm": 0.7373676896095276, + "learning_rate": 1.3933834556953707e-05, + "loss": 2.4758, + "step": 16619 + }, + { + "epoch": 1.341296102009523, + "grad_norm": 0.7076435089111328, + "learning_rate": 1.3925797299605647e-05, + "loss": 2.4429, + "step": 16620 + }, + { + "epoch": 1.3413768057461062, + "grad_norm": 0.6739028692245483, + "learning_rate": 1.39177621874517e-05, + "loss": 2.4275, + "step": 16621 + }, + { + "epoch": 1.341457509482689, + "grad_norm": 0.7134198546409607, + "learning_rate": 1.3909729220692125e-05, + "loss": 2.4541, + "step": 16622 + }, + { + "epoch": 1.3415382132192721, + "grad_norm": 0.6770301461219788, + "learning_rate": 1.3901698399527175e-05, + "loss": 2.4143, + "step": 16623 + }, + { + "epoch": 1.341618916955855, + "grad_norm": 0.7146373987197876, + "learning_rate": 1.3893669724156943e-05, + "loss": 2.4886, + "step": 16624 + }, + { + "epoch": 1.341699620692438, + "grad_norm": 0.6801536083221436, + "learning_rate": 1.3885643194781539e-05, + "loss": 2.4154, + "step": 16625 + }, + { + "epoch": 1.341780324429021, + "grad_norm": 0.7350363731384277, + "learning_rate": 1.3877618811601024e-05, + "loss": 2.3918, + "step": 16626 + }, + { + "epoch": 1.341861028165604, + "grad_norm": 0.7088882327079773, + "learning_rate": 1.3869596574815358e-05, + "loss": 2.412, + "step": 16627 + }, + { + "epoch": 1.3419417319021871, + "grad_norm": 0.7199791669845581, + "learning_rate": 1.3861576484624506e-05, + "loss": 2.3912, + "step": 16628 + }, + { + "epoch": 1.34202243563877, + "grad_norm": 0.692971408367157, + "learning_rate": 1.3853558541228328e-05, + "loss": 2.3826, + "step": 16629 + }, + { + "epoch": 1.342103139375353, + "grad_norm": 0.7524722814559937, + "learning_rate": 1.3845542744826679e-05, + "loss": 2.4227, + "step": 16630 + }, + { + "epoch": 1.3421838431119362, + "grad_norm": 0.6624585390090942, + "learning_rate": 1.3837529095619307e-05, + "loss": 2.3649, + "step": 16631 + }, + { + "epoch": 1.342264546848519, + "grad_norm": 0.6884489059448242, + "learning_rate": 1.3829517593805929e-05, + "loss": 2.3687, + "step": 16632 + }, + { + "epoch": 1.3423452505851021, + "grad_norm": 0.6766197085380554, + "learning_rate": 1.3821508239586246e-05, + "loss": 2.4191, + "step": 16633 + }, + { + "epoch": 1.3424259543216852, + "grad_norm": 0.6744453310966492, + "learning_rate": 1.3813501033159837e-05, + "loss": 2.4254, + "step": 16634 + }, + { + "epoch": 1.342506658058268, + "grad_norm": 0.6906216144561768, + "learning_rate": 1.3805495974726267e-05, + "loss": 2.4763, + "step": 16635 + }, + { + "epoch": 1.3425873617948512, + "grad_norm": 0.7052608132362366, + "learning_rate": 1.3797493064485078e-05, + "loss": 2.4307, + "step": 16636 + }, + { + "epoch": 1.342668065531434, + "grad_norm": 0.6701127290725708, + "learning_rate": 1.3789492302635653e-05, + "loss": 2.4529, + "step": 16637 + }, + { + "epoch": 1.3427487692680171, + "grad_norm": 0.7440397143363953, + "learning_rate": 1.3781493689377455e-05, + "loss": 2.4471, + "step": 16638 + }, + { + "epoch": 1.3428294730046, + "grad_norm": 0.7340207695960999, + "learning_rate": 1.3773497224909848e-05, + "loss": 2.4434, + "step": 16639 + }, + { + "epoch": 1.342910176741183, + "grad_norm": 0.6836793422698975, + "learning_rate": 1.376550290943205e-05, + "loss": 2.4072, + "step": 16640 + }, + { + "epoch": 1.3429908804777662, + "grad_norm": 0.6820472478866577, + "learning_rate": 1.3757510743143342e-05, + "loss": 2.4078, + "step": 16641 + }, + { + "epoch": 1.343071584214349, + "grad_norm": 0.6608061194419861, + "learning_rate": 1.3749520726242938e-05, + "loss": 2.3995, + "step": 16642 + }, + { + "epoch": 1.3431522879509321, + "grad_norm": 0.6582421064376831, + "learning_rate": 1.3741532858929906e-05, + "loss": 2.3768, + "step": 16643 + }, + { + "epoch": 1.3432329916875152, + "grad_norm": 0.7032744288444519, + "learning_rate": 1.3733547141403358e-05, + "loss": 2.4367, + "step": 16644 + }, + { + "epoch": 1.343313695424098, + "grad_norm": 0.7149307727813721, + "learning_rate": 1.3725563573862321e-05, + "loss": 2.4425, + "step": 16645 + }, + { + "epoch": 1.3433943991606812, + "grad_norm": 0.7375392913818359, + "learning_rate": 1.3717582156505793e-05, + "loss": 2.409, + "step": 16646 + }, + { + "epoch": 1.3434751028972642, + "grad_norm": 0.8422170877456665, + "learning_rate": 1.3709602889532624e-05, + "loss": 2.4758, + "step": 16647 + }, + { + "epoch": 1.343555806633847, + "grad_norm": 0.6542177796363831, + "learning_rate": 1.3701625773141712e-05, + "loss": 2.4199, + "step": 16648 + }, + { + "epoch": 1.3436365103704302, + "grad_norm": 0.6639342904090881, + "learning_rate": 1.3693650807531898e-05, + "loss": 2.4366, + "step": 16649 + }, + { + "epoch": 1.3437172141070133, + "grad_norm": 0.7270925045013428, + "learning_rate": 1.3685677992901901e-05, + "loss": 2.3745, + "step": 16650 + }, + { + "epoch": 1.3437979178435961, + "grad_norm": 0.7325547337532043, + "learning_rate": 1.367770732945044e-05, + "loss": 2.5053, + "step": 16651 + }, + { + "epoch": 1.3438786215801792, + "grad_norm": 0.7752320766448975, + "learning_rate": 1.3669738817376177e-05, + "loss": 2.4505, + "step": 16652 + }, + { + "epoch": 1.343959325316762, + "grad_norm": 0.6538182497024536, + "learning_rate": 1.3661772456877675e-05, + "loss": 2.4164, + "step": 16653 + }, + { + "epoch": 1.3440400290533452, + "grad_norm": 0.6886051297187805, + "learning_rate": 1.3653808248153487e-05, + "loss": 2.4156, + "step": 16654 + }, + { + "epoch": 1.344120732789928, + "grad_norm": 0.6990679502487183, + "learning_rate": 1.3645846191402134e-05, + "loss": 2.418, + "step": 16655 + }, + { + "epoch": 1.3442014365265111, + "grad_norm": 0.7006608247756958, + "learning_rate": 1.3637886286821999e-05, + "loss": 2.3987, + "step": 16656 + }, + { + "epoch": 1.3442821402630942, + "grad_norm": 0.6858758926391602, + "learning_rate": 1.3629928534611502e-05, + "loss": 2.3571, + "step": 16657 + }, + { + "epoch": 1.344362843999677, + "grad_norm": 0.7273774147033691, + "learning_rate": 1.3621972934968951e-05, + "loss": 2.4141, + "step": 16658 + }, + { + "epoch": 1.3444435477362602, + "grad_norm": 0.6770352721214294, + "learning_rate": 1.3614019488092633e-05, + "loss": 2.4602, + "step": 16659 + }, + { + "epoch": 1.3445242514728433, + "grad_norm": 0.7473095655441284, + "learning_rate": 1.3606068194180766e-05, + "loss": 2.3884, + "step": 16660 + }, + { + "epoch": 1.3446049552094261, + "grad_norm": 0.7271387577056885, + "learning_rate": 1.3598119053431512e-05, + "loss": 2.4705, + "step": 16661 + }, + { + "epoch": 1.3446856589460092, + "grad_norm": 0.658349335193634, + "learning_rate": 1.3590172066043006e-05, + "loss": 2.4271, + "step": 16662 + }, + { + "epoch": 1.3447663626825923, + "grad_norm": 0.6479319930076599, + "learning_rate": 1.3582227232213273e-05, + "loss": 2.3428, + "step": 16663 + }, + { + "epoch": 1.3448470664191752, + "grad_norm": 0.700951874256134, + "learning_rate": 1.3574284552140337e-05, + "loss": 2.4926, + "step": 16664 + }, + { + "epoch": 1.3449277701557583, + "grad_norm": 0.6699960231781006, + "learning_rate": 1.3566344026022171e-05, + "loss": 2.4372, + "step": 16665 + }, + { + "epoch": 1.3450084738923413, + "grad_norm": 0.6743033528327942, + "learning_rate": 1.3558405654056617e-05, + "loss": 2.4142, + "step": 16666 + }, + { + "epoch": 1.3450891776289242, + "grad_norm": 0.6619464755058289, + "learning_rate": 1.355046943644157e-05, + "loss": 2.4099, + "step": 16667 + }, + { + "epoch": 1.3451698813655073, + "grad_norm": 0.668084442615509, + "learning_rate": 1.3542535373374798e-05, + "loss": 2.3895, + "step": 16668 + }, + { + "epoch": 1.3452505851020902, + "grad_norm": 0.7954626679420471, + "learning_rate": 1.3534603465054052e-05, + "loss": 2.479, + "step": 16669 + }, + { + "epoch": 1.3453312888386733, + "grad_norm": 0.6742919683456421, + "learning_rate": 1.3526673711677008e-05, + "loss": 2.4289, + "step": 16670 + }, + { + "epoch": 1.3454119925752561, + "grad_norm": 0.6564723253250122, + "learning_rate": 1.3518746113441316e-05, + "loss": 2.404, + "step": 16671 + }, + { + "epoch": 1.3454926963118392, + "grad_norm": 0.6955705881118774, + "learning_rate": 1.3510820670544521e-05, + "loss": 2.4274, + "step": 16672 + }, + { + "epoch": 1.3455734000484223, + "grad_norm": 0.6687749028205872, + "learning_rate": 1.3502897383184154e-05, + "loss": 2.4564, + "step": 16673 + }, + { + "epoch": 1.3456541037850052, + "grad_norm": 0.7984250783920288, + "learning_rate": 1.34949762515577e-05, + "loss": 2.3426, + "step": 16674 + }, + { + "epoch": 1.3457348075215882, + "grad_norm": 0.7334223389625549, + "learning_rate": 1.348705727586258e-05, + "loss": 2.4712, + "step": 16675 + }, + { + "epoch": 1.3458155112581713, + "grad_norm": 0.6732765436172485, + "learning_rate": 1.3479140456296114e-05, + "loss": 2.424, + "step": 16676 + }, + { + "epoch": 1.3458962149947542, + "grad_norm": 0.7944334149360657, + "learning_rate": 1.3471225793055641e-05, + "loss": 2.3951, + "step": 16677 + }, + { + "epoch": 1.3459769187313373, + "grad_norm": 0.6829007863998413, + "learning_rate": 1.3463313286338408e-05, + "loss": 2.4158, + "step": 16678 + }, + { + "epoch": 1.3460576224679204, + "grad_norm": 0.7019640207290649, + "learning_rate": 1.345540293634161e-05, + "loss": 2.4093, + "step": 16679 + }, + { + "epoch": 1.3461383262045032, + "grad_norm": 0.6839374303817749, + "learning_rate": 1.3447494743262412e-05, + "loss": 2.3959, + "step": 16680 + }, + { + "epoch": 1.3462190299410863, + "grad_norm": 0.7211155295372009, + "learning_rate": 1.3439588707297911e-05, + "loss": 2.4052, + "step": 16681 + }, + { + "epoch": 1.3462997336776692, + "grad_norm": 0.73811274766922, + "learning_rate": 1.3431684828645109e-05, + "loss": 2.4179, + "step": 16682 + }, + { + "epoch": 1.3463804374142523, + "grad_norm": 0.6634721159934998, + "learning_rate": 1.3423783107501009e-05, + "loss": 2.379, + "step": 16683 + }, + { + "epoch": 1.3464611411508352, + "grad_norm": 0.6884057521820068, + "learning_rate": 1.3415883544062579e-05, + "loss": 2.4144, + "step": 16684 + }, + { + "epoch": 1.3465418448874182, + "grad_norm": 0.7239587306976318, + "learning_rate": 1.340798613852664e-05, + "loss": 2.3856, + "step": 16685 + }, + { + "epoch": 1.3466225486240013, + "grad_norm": 0.7201077342033386, + "learning_rate": 1.3400090891090033e-05, + "loss": 2.4552, + "step": 16686 + }, + { + "epoch": 1.3467032523605842, + "grad_norm": 0.7049584984779358, + "learning_rate": 1.3392197801949558e-05, + "loss": 2.4424, + "step": 16687 + }, + { + "epoch": 1.3467839560971673, + "grad_norm": 0.7240790128707886, + "learning_rate": 1.3384306871301877e-05, + "loss": 2.4156, + "step": 16688 + }, + { + "epoch": 1.3468646598337504, + "grad_norm": 0.7276458740234375, + "learning_rate": 1.337641809934369e-05, + "loss": 2.3882, + "step": 16689 + }, + { + "epoch": 1.3469453635703332, + "grad_norm": 0.6650896072387695, + "learning_rate": 1.3368531486271607e-05, + "loss": 2.396, + "step": 16690 + }, + { + "epoch": 1.3470260673069163, + "grad_norm": 0.6946447491645813, + "learning_rate": 1.3360647032282203e-05, + "loss": 2.3779, + "step": 16691 + }, + { + "epoch": 1.3471067710434994, + "grad_norm": 0.7507699728012085, + "learning_rate": 1.3352764737571932e-05, + "loss": 2.4378, + "step": 16692 + }, + { + "epoch": 1.3471874747800823, + "grad_norm": 0.6548876762390137, + "learning_rate": 1.334488460233725e-05, + "loss": 2.4181, + "step": 16693 + }, + { + "epoch": 1.3472681785166654, + "grad_norm": 0.7000874280929565, + "learning_rate": 1.3337006626774595e-05, + "loss": 2.4463, + "step": 16694 + }, + { + "epoch": 1.3473488822532484, + "grad_norm": 0.6487517356872559, + "learning_rate": 1.3329130811080249e-05, + "loss": 2.3703, + "step": 16695 + }, + { + "epoch": 1.3474295859898313, + "grad_norm": 0.6447827219963074, + "learning_rate": 1.3321257155450517e-05, + "loss": 2.3779, + "step": 16696 + }, + { + "epoch": 1.3475102897264144, + "grad_norm": 0.6309572458267212, + "learning_rate": 1.3313385660081667e-05, + "loss": 2.4443, + "step": 16697 + }, + { + "epoch": 1.3475909934629973, + "grad_norm": 0.6366227865219116, + "learning_rate": 1.330551632516982e-05, + "loss": 2.3418, + "step": 16698 + }, + { + "epoch": 1.3476716971995804, + "grad_norm": 0.6864019632339478, + "learning_rate": 1.3297649150911117e-05, + "loss": 2.4416, + "step": 16699 + }, + { + "epoch": 1.3477524009361632, + "grad_norm": 0.6807940006256104, + "learning_rate": 1.3289784137501671e-05, + "loss": 2.4465, + "step": 16700 + }, + { + "epoch": 1.3478331046727463, + "grad_norm": 0.6991185545921326, + "learning_rate": 1.3281921285137455e-05, + "loss": 2.3929, + "step": 16701 + }, + { + "epoch": 1.3479138084093294, + "grad_norm": 0.691908061504364, + "learning_rate": 1.3274060594014437e-05, + "loss": 2.4237, + "step": 16702 + }, + { + "epoch": 1.3479945121459123, + "grad_norm": 0.6909685730934143, + "learning_rate": 1.3266202064328548e-05, + "loss": 2.3695, + "step": 16703 + }, + { + "epoch": 1.3480752158824953, + "grad_norm": 0.6473715901374817, + "learning_rate": 1.325834569627562e-05, + "loss": 2.384, + "step": 16704 + }, + { + "epoch": 1.3481559196190784, + "grad_norm": 0.7433453798294067, + "learning_rate": 1.3250491490051454e-05, + "loss": 2.4546, + "step": 16705 + }, + { + "epoch": 1.3482366233556613, + "grad_norm": 0.7432501316070557, + "learning_rate": 1.3242639445851812e-05, + "loss": 2.4204, + "step": 16706 + }, + { + "epoch": 1.3483173270922444, + "grad_norm": 0.6661228537559509, + "learning_rate": 1.3234789563872397e-05, + "loss": 2.4454, + "step": 16707 + }, + { + "epoch": 1.3483980308288275, + "grad_norm": 0.7481260895729065, + "learning_rate": 1.3226941844308816e-05, + "loss": 2.4348, + "step": 16708 + }, + { + "epoch": 1.3484787345654103, + "grad_norm": 0.6986531019210815, + "learning_rate": 1.3219096287356669e-05, + "loss": 2.3622, + "step": 16709 + }, + { + "epoch": 1.3485594383019934, + "grad_norm": 0.7457645535469055, + "learning_rate": 1.321125289321149e-05, + "loss": 2.4399, + "step": 16710 + }, + { + "epoch": 1.3486401420385765, + "grad_norm": 0.6710307598114014, + "learning_rate": 1.3203411662068754e-05, + "loss": 2.3857, + "step": 16711 + }, + { + "epoch": 1.3487208457751594, + "grad_norm": 0.767304539680481, + "learning_rate": 1.3195572594123884e-05, + "loss": 2.4666, + "step": 16712 + }, + { + "epoch": 1.3488015495117425, + "grad_norm": 0.6720963716506958, + "learning_rate": 1.3187735689572289e-05, + "loss": 2.3952, + "step": 16713 + }, + { + "epoch": 1.3488822532483253, + "grad_norm": 0.6381734609603882, + "learning_rate": 1.3179900948609213e-05, + "loss": 2.3632, + "step": 16714 + }, + { + "epoch": 1.3489629569849084, + "grad_norm": 0.6697315573692322, + "learning_rate": 1.317206837142997e-05, + "loss": 2.4117, + "step": 16715 + }, + { + "epoch": 1.3490436607214913, + "grad_norm": 0.723676323890686, + "learning_rate": 1.3164237958229764e-05, + "loss": 2.3772, + "step": 16716 + }, + { + "epoch": 1.3491243644580744, + "grad_norm": 0.7021055817604065, + "learning_rate": 1.3156409709203732e-05, + "loss": 2.3808, + "step": 16717 + }, + { + "epoch": 1.3492050681946575, + "grad_norm": 0.7128920555114746, + "learning_rate": 1.3148583624546962e-05, + "loss": 2.3854, + "step": 16718 + }, + { + "epoch": 1.3492857719312403, + "grad_norm": 0.6684797406196594, + "learning_rate": 1.314075970445453e-05, + "loss": 2.3722, + "step": 16719 + }, + { + "epoch": 1.3493664756678234, + "grad_norm": 0.6710386276245117, + "learning_rate": 1.3132937949121426e-05, + "loss": 2.412, + "step": 16720 + }, + { + "epoch": 1.3494471794044065, + "grad_norm": 0.7207252979278564, + "learning_rate": 1.3125118358742572e-05, + "loss": 2.4506, + "step": 16721 + }, + { + "epoch": 1.3495278831409894, + "grad_norm": 0.685516893863678, + "learning_rate": 1.3117300933512865e-05, + "loss": 2.435, + "step": 16722 + }, + { + "epoch": 1.3496085868775725, + "grad_norm": 0.71708744764328, + "learning_rate": 1.3109485673627154e-05, + "loss": 2.4735, + "step": 16723 + }, + { + "epoch": 1.3496892906141555, + "grad_norm": 0.7293861508369446, + "learning_rate": 1.3101672579280166e-05, + "loss": 2.4545, + "step": 16724 + }, + { + "epoch": 1.3497699943507384, + "grad_norm": 0.6448976993560791, + "learning_rate": 1.3093861650666661e-05, + "loss": 2.386, + "step": 16725 + }, + { + "epoch": 1.3498506980873215, + "grad_norm": 0.8111226558685303, + "learning_rate": 1.3086052887981315e-05, + "loss": 2.4733, + "step": 16726 + }, + { + "epoch": 1.3499314018239044, + "grad_norm": 0.7673875093460083, + "learning_rate": 1.3078246291418706e-05, + "loss": 2.4119, + "step": 16727 + }, + { + "epoch": 1.3500121055604875, + "grad_norm": 0.7296731472015381, + "learning_rate": 1.307044186117341e-05, + "loss": 2.3724, + "step": 16728 + }, + { + "epoch": 1.3500928092970703, + "grad_norm": 0.6947155594825745, + "learning_rate": 1.306263959743994e-05, + "loss": 2.3989, + "step": 16729 + }, + { + "epoch": 1.3501735130336534, + "grad_norm": 0.6781659722328186, + "learning_rate": 1.3054839500412753e-05, + "loss": 2.429, + "step": 16730 + }, + { + "epoch": 1.3502542167702365, + "grad_norm": 0.7498819231987, + "learning_rate": 1.3047041570286244e-05, + "loss": 2.459, + "step": 16731 + }, + { + "epoch": 1.3503349205068194, + "grad_norm": 0.6651057004928589, + "learning_rate": 1.3039245807254774e-05, + "loss": 2.4049, + "step": 16732 + }, + { + "epoch": 1.3504156242434024, + "grad_norm": 0.6998507380485535, + "learning_rate": 1.3031452211512596e-05, + "loss": 2.4083, + "step": 16733 + }, + { + "epoch": 1.3504963279799855, + "grad_norm": 0.6522402167320251, + "learning_rate": 1.3023660783253966e-05, + "loss": 2.3987, + "step": 16734 + }, + { + "epoch": 1.3505770317165684, + "grad_norm": 0.6618130207061768, + "learning_rate": 1.3015871522673096e-05, + "loss": 2.4514, + "step": 16735 + }, + { + "epoch": 1.3506577354531515, + "grad_norm": 0.7139489650726318, + "learning_rate": 1.300808442996405e-05, + "loss": 2.484, + "step": 16736 + }, + { + "epoch": 1.3507384391897346, + "grad_norm": 0.6582522988319397, + "learning_rate": 1.3000299505320956e-05, + "loss": 2.4463, + "step": 16737 + }, + { + "epoch": 1.3508191429263174, + "grad_norm": 0.7115446329116821, + "learning_rate": 1.2992516748937811e-05, + "loss": 2.4795, + "step": 16738 + }, + { + "epoch": 1.3508998466629005, + "grad_norm": 0.7243752479553223, + "learning_rate": 1.2984736161008581e-05, + "loss": 2.4151, + "step": 16739 + }, + { + "epoch": 1.3509805503994836, + "grad_norm": 0.758084774017334, + "learning_rate": 1.297695774172719e-05, + "loss": 2.4028, + "step": 16740 + }, + { + "epoch": 1.3510612541360665, + "grad_norm": 0.6555618643760681, + "learning_rate": 1.2969181491287496e-05, + "loss": 2.4184, + "step": 16741 + }, + { + "epoch": 1.3511419578726496, + "grad_norm": 0.6657842993736267, + "learning_rate": 1.2961407409883331e-05, + "loss": 2.375, + "step": 16742 + }, + { + "epoch": 1.3512226616092324, + "grad_norm": 0.6355723142623901, + "learning_rate": 1.2953635497708382e-05, + "loss": 2.4202, + "step": 16743 + }, + { + "epoch": 1.3513033653458155, + "grad_norm": 0.7384408116340637, + "learning_rate": 1.2945865754956377e-05, + "loss": 2.4298, + "step": 16744 + }, + { + "epoch": 1.3513840690823984, + "grad_norm": 0.7300455570220947, + "learning_rate": 1.2938098181820979e-05, + "loss": 2.3842, + "step": 16745 + }, + { + "epoch": 1.3514647728189815, + "grad_norm": 0.7378895282745361, + "learning_rate": 1.2930332778495735e-05, + "loss": 2.4025, + "step": 16746 + }, + { + "epoch": 1.3515454765555646, + "grad_norm": 0.6542565822601318, + "learning_rate": 1.2922569545174212e-05, + "loss": 2.3995, + "step": 16747 + }, + { + "epoch": 1.3516261802921474, + "grad_norm": 0.669829249382019, + "learning_rate": 1.291480848204989e-05, + "loss": 2.3843, + "step": 16748 + }, + { + "epoch": 1.3517068840287305, + "grad_norm": 0.6747604608535767, + "learning_rate": 1.2907049589316167e-05, + "loss": 2.4108, + "step": 16749 + }, + { + "epoch": 1.3517875877653136, + "grad_norm": 0.7003559470176697, + "learning_rate": 1.2899292867166402e-05, + "loss": 2.4233, + "step": 16750 + }, + { + "epoch": 1.3518682915018965, + "grad_norm": 0.7365099191665649, + "learning_rate": 1.2891538315793994e-05, + "loss": 2.3592, + "step": 16751 + }, + { + "epoch": 1.3519489952384796, + "grad_norm": 0.6849377751350403, + "learning_rate": 1.2883785935392123e-05, + "loss": 2.3943, + "step": 16752 + }, + { + "epoch": 1.3520296989750626, + "grad_norm": 0.7263002395629883, + "learning_rate": 1.2876035726154045e-05, + "loss": 2.4078, + "step": 16753 + }, + { + "epoch": 1.3521104027116455, + "grad_norm": 0.7341182827949524, + "learning_rate": 1.2868287688272884e-05, + "loss": 2.3568, + "step": 16754 + }, + { + "epoch": 1.3521911064482286, + "grad_norm": 0.7281078100204468, + "learning_rate": 1.2860541821941796e-05, + "loss": 2.4073, + "step": 16755 + }, + { + "epoch": 1.3522718101848117, + "grad_norm": 0.6302868127822876, + "learning_rate": 1.285279812735376e-05, + "loss": 2.3946, + "step": 16756 + }, + { + "epoch": 1.3523525139213946, + "grad_norm": 0.7333062887191772, + "learning_rate": 1.28450566047018e-05, + "loss": 2.3892, + "step": 16757 + }, + { + "epoch": 1.3524332176579776, + "grad_norm": 0.74838787317276, + "learning_rate": 1.2837317254178882e-05, + "loss": 2.4844, + "step": 16758 + }, + { + "epoch": 1.3525139213945605, + "grad_norm": 0.7085757255554199, + "learning_rate": 1.2829580075977843e-05, + "loss": 2.3583, + "step": 16759 + }, + { + "epoch": 1.3525946251311436, + "grad_norm": 0.7182579040527344, + "learning_rate": 1.2821845070291527e-05, + "loss": 2.4326, + "step": 16760 + }, + { + "epoch": 1.3526753288677265, + "grad_norm": 0.6857885718345642, + "learning_rate": 1.2814112237312714e-05, + "loss": 2.4406, + "step": 16761 + }, + { + "epoch": 1.3527560326043095, + "grad_norm": 0.7629652619361877, + "learning_rate": 1.2806381577234139e-05, + "loss": 2.4839, + "step": 16762 + }, + { + "epoch": 1.3528367363408926, + "grad_norm": 0.6940319538116455, + "learning_rate": 1.2798653090248458e-05, + "loss": 2.3918, + "step": 16763 + }, + { + "epoch": 1.3529174400774755, + "grad_norm": 0.6825633645057678, + "learning_rate": 1.2790926776548318e-05, + "loss": 2.3828, + "step": 16764 + }, + { + "epoch": 1.3529981438140586, + "grad_norm": 0.6830280423164368, + "learning_rate": 1.278320263632622e-05, + "loss": 2.3727, + "step": 16765 + }, + { + "epoch": 1.3530788475506417, + "grad_norm": 0.6782984733581543, + "learning_rate": 1.2775480669774698e-05, + "loss": 2.3984, + "step": 16766 + }, + { + "epoch": 1.3531595512872245, + "grad_norm": 0.6939808130264282, + "learning_rate": 1.276776087708621e-05, + "loss": 2.3724, + "step": 16767 + }, + { + "epoch": 1.3532402550238076, + "grad_norm": 0.7562546133995056, + "learning_rate": 1.276004325845317e-05, + "loss": 2.4178, + "step": 16768 + }, + { + "epoch": 1.3533209587603907, + "grad_norm": 0.6692922115325928, + "learning_rate": 1.2752327814067877e-05, + "loss": 2.4072, + "step": 16769 + }, + { + "epoch": 1.3534016624969736, + "grad_norm": 0.6783415079116821, + "learning_rate": 1.2744614544122635e-05, + "loss": 2.3993, + "step": 16770 + }, + { + "epoch": 1.3534823662335567, + "grad_norm": 0.6608997583389282, + "learning_rate": 1.27369034488097e-05, + "loss": 2.3883, + "step": 16771 + }, + { + "epoch": 1.3535630699701398, + "grad_norm": 0.6849228739738464, + "learning_rate": 1.2729194528321231e-05, + "loss": 2.4009, + "step": 16772 + }, + { + "epoch": 1.3536437737067226, + "grad_norm": 0.7059305906295776, + "learning_rate": 1.2721487782849362e-05, + "loss": 2.508, + "step": 16773 + }, + { + "epoch": 1.3537244774433057, + "grad_norm": 0.6471492052078247, + "learning_rate": 1.2713783212586183e-05, + "loss": 2.3813, + "step": 16774 + }, + { + "epoch": 1.3538051811798886, + "grad_norm": 0.7108949422836304, + "learning_rate": 1.2706080817723687e-05, + "loss": 2.4189, + "step": 16775 + }, + { + "epoch": 1.3538858849164717, + "grad_norm": 0.6623945236206055, + "learning_rate": 1.269838059845383e-05, + "loss": 2.4128, + "step": 16776 + }, + { + "epoch": 1.3539665886530545, + "grad_norm": 0.6595518589019775, + "learning_rate": 1.269068255496857e-05, + "loss": 2.3984, + "step": 16777 + }, + { + "epoch": 1.3540472923896376, + "grad_norm": 0.6932248473167419, + "learning_rate": 1.2682986687459708e-05, + "loss": 2.3951, + "step": 16778 + }, + { + "epoch": 1.3541279961262207, + "grad_norm": 0.6914867162704468, + "learning_rate": 1.2675292996119059e-05, + "loss": 2.4602, + "step": 16779 + }, + { + "epoch": 1.3542086998628036, + "grad_norm": 0.6633034348487854, + "learning_rate": 1.266760148113838e-05, + "loss": 2.43, + "step": 16780 + }, + { + "epoch": 1.3542894035993867, + "grad_norm": 0.6987594366073608, + "learning_rate": 1.2659912142709363e-05, + "loss": 2.3962, + "step": 16781 + }, + { + "epoch": 1.3543701073359697, + "grad_norm": 0.7429597973823547, + "learning_rate": 1.2652224981023652e-05, + "loss": 2.4838, + "step": 16782 + }, + { + "epoch": 1.3544508110725526, + "grad_norm": 0.6402504444122314, + "learning_rate": 1.2644539996272808e-05, + "loss": 2.43, + "step": 16783 + }, + { + "epoch": 1.3545315148091357, + "grad_norm": 0.6763156652450562, + "learning_rate": 1.263685718864841e-05, + "loss": 2.4911, + "step": 16784 + }, + { + "epoch": 1.3546122185457188, + "grad_norm": 0.8133900165557861, + "learning_rate": 1.2629176558341881e-05, + "loss": 2.45, + "step": 16785 + }, + { + "epoch": 1.3546929222823016, + "grad_norm": 0.6946277022361755, + "learning_rate": 1.262149810554465e-05, + "loss": 2.43, + "step": 16786 + }, + { + "epoch": 1.3547736260188847, + "grad_norm": 0.7667170166969299, + "learning_rate": 1.2613821830448125e-05, + "loss": 2.4464, + "step": 16787 + }, + { + "epoch": 1.3548543297554676, + "grad_norm": 0.672662615776062, + "learning_rate": 1.2606147733243567e-05, + "loss": 2.3653, + "step": 16788 + }, + { + "epoch": 1.3549350334920507, + "grad_norm": 0.6856412291526794, + "learning_rate": 1.2598475814122258e-05, + "loss": 2.3924, + "step": 16789 + }, + { + "epoch": 1.3550157372286336, + "grad_norm": 0.6966650485992432, + "learning_rate": 1.2590806073275407e-05, + "loss": 2.4039, + "step": 16790 + }, + { + "epoch": 1.3550964409652166, + "grad_norm": 0.7397874593734741, + "learning_rate": 1.2583138510894143e-05, + "loss": 2.4769, + "step": 16791 + }, + { + "epoch": 1.3551771447017997, + "grad_norm": 0.6960996985435486, + "learning_rate": 1.2575473127169591e-05, + "loss": 2.4342, + "step": 16792 + }, + { + "epoch": 1.3552578484383826, + "grad_norm": 0.7324376702308655, + "learning_rate": 1.2567809922292795e-05, + "loss": 2.4779, + "step": 16793 + }, + { + "epoch": 1.3553385521749657, + "grad_norm": 0.6891930103302002, + "learning_rate": 1.2560148896454704e-05, + "loss": 2.4228, + "step": 16794 + }, + { + "epoch": 1.3554192559115488, + "grad_norm": 0.6919474601745605, + "learning_rate": 1.2552490049846278e-05, + "loss": 2.4178, + "step": 16795 + }, + { + "epoch": 1.3554999596481316, + "grad_norm": 0.7067604660987854, + "learning_rate": 1.2544833382658405e-05, + "loss": 2.457, + "step": 16796 + }, + { + "epoch": 1.3555806633847147, + "grad_norm": 0.7667992115020752, + "learning_rate": 1.253717889508188e-05, + "loss": 2.3951, + "step": 16797 + }, + { + "epoch": 1.3556613671212978, + "grad_norm": 0.6337998509407043, + "learning_rate": 1.2529526587307482e-05, + "loss": 2.3788, + "step": 16798 + }, + { + "epoch": 1.3557420708578807, + "grad_norm": 0.6591900587081909, + "learning_rate": 1.2521876459525927e-05, + "loss": 2.4101, + "step": 16799 + }, + { + "epoch": 1.3558227745944638, + "grad_norm": 0.7115298509597778, + "learning_rate": 1.2514228511927895e-05, + "loss": 2.4417, + "step": 16800 + }, + { + "epoch": 1.3559034783310469, + "grad_norm": 0.6851321458816528, + "learning_rate": 1.2506582744703965e-05, + "loss": 2.4081, + "step": 16801 + }, + { + "epoch": 1.3559841820676297, + "grad_norm": 0.7469603419303894, + "learning_rate": 1.249893915804471e-05, + "loss": 2.3703, + "step": 16802 + }, + { + "epoch": 1.3560648858042128, + "grad_norm": 0.6972614526748657, + "learning_rate": 1.2491297752140641e-05, + "loss": 2.3549, + "step": 16803 + }, + { + "epoch": 1.3561455895407957, + "grad_norm": 0.6669485569000244, + "learning_rate": 1.2483658527182151e-05, + "loss": 2.4261, + "step": 16804 + }, + { + "epoch": 1.3562262932773788, + "grad_norm": 0.7516919374465942, + "learning_rate": 1.247602148335968e-05, + "loss": 2.4323, + "step": 16805 + }, + { + "epoch": 1.3563069970139616, + "grad_norm": 0.7191836833953857, + "learning_rate": 1.2468386620863548e-05, + "loss": 2.4242, + "step": 16806 + }, + { + "epoch": 1.3563877007505447, + "grad_norm": 0.660237729549408, + "learning_rate": 1.2460753939884017e-05, + "loss": 2.4154, + "step": 16807 + }, + { + "epoch": 1.3564684044871278, + "grad_norm": 0.749531626701355, + "learning_rate": 1.2453123440611325e-05, + "loss": 2.4138, + "step": 16808 + }, + { + "epoch": 1.3565491082237107, + "grad_norm": 0.6808986067771912, + "learning_rate": 1.2445495123235673e-05, + "loss": 2.3918, + "step": 16809 + }, + { + "epoch": 1.3566298119602938, + "grad_norm": 0.686183750629425, + "learning_rate": 1.2437868987947133e-05, + "loss": 2.4172, + "step": 16810 + }, + { + "epoch": 1.3567105156968768, + "grad_norm": 0.6487868428230286, + "learning_rate": 1.2430245034935784e-05, + "loss": 2.4199, + "step": 16811 + }, + { + "epoch": 1.3567912194334597, + "grad_norm": 0.7352244257926941, + "learning_rate": 1.242262326439163e-05, + "loss": 2.3779, + "step": 16812 + }, + { + "epoch": 1.3568719231700428, + "grad_norm": 0.7250565886497498, + "learning_rate": 1.2415003676504644e-05, + "loss": 2.4106, + "step": 16813 + }, + { + "epoch": 1.3569526269066259, + "grad_norm": 0.6843926906585693, + "learning_rate": 1.2407386271464716e-05, + "loss": 2.3725, + "step": 16814 + }, + { + "epoch": 1.3570333306432087, + "grad_norm": 0.686326801776886, + "learning_rate": 1.2399771049461684e-05, + "loss": 2.3709, + "step": 16815 + }, + { + "epoch": 1.3571140343797918, + "grad_norm": 0.6796969771385193, + "learning_rate": 1.2392158010685373e-05, + "loss": 2.4545, + "step": 16816 + }, + { + "epoch": 1.357194738116375, + "grad_norm": 0.6469466090202332, + "learning_rate": 1.2384547155325466e-05, + "loss": 2.4263, + "step": 16817 + }, + { + "epoch": 1.3572754418529578, + "grad_norm": 0.7089909911155701, + "learning_rate": 1.2376938483571688e-05, + "loss": 2.378, + "step": 16818 + }, + { + "epoch": 1.3573561455895409, + "grad_norm": 0.7313235402107239, + "learning_rate": 1.2369331995613665e-05, + "loss": 2.46, + "step": 16819 + }, + { + "epoch": 1.3574368493261237, + "grad_norm": 0.7555651664733887, + "learning_rate": 1.2361727691640934e-05, + "loss": 2.531, + "step": 16820 + }, + { + "epoch": 1.3575175530627068, + "grad_norm": 0.7563485503196716, + "learning_rate": 1.2354125571843033e-05, + "loss": 2.4205, + "step": 16821 + }, + { + "epoch": 1.3575982567992897, + "grad_norm": 0.7996519804000854, + "learning_rate": 1.2346525636409434e-05, + "loss": 2.4223, + "step": 16822 + }, + { + "epoch": 1.3576789605358728, + "grad_norm": 0.7141731977462769, + "learning_rate": 1.233892788552955e-05, + "loss": 2.4554, + "step": 16823 + }, + { + "epoch": 1.3577596642724559, + "grad_norm": 0.6715070605278015, + "learning_rate": 1.233133231939273e-05, + "loss": 2.4386, + "step": 16824 + }, + { + "epoch": 1.3578403680090387, + "grad_norm": 0.6893020272254944, + "learning_rate": 1.2323738938188301e-05, + "loss": 2.4065, + "step": 16825 + }, + { + "epoch": 1.3579210717456218, + "grad_norm": 0.7542821764945984, + "learning_rate": 1.2316147742105454e-05, + "loss": 2.3974, + "step": 16826 + }, + { + "epoch": 1.358001775482205, + "grad_norm": 0.7177664041519165, + "learning_rate": 1.230855873133343e-05, + "loss": 2.4306, + "step": 16827 + }, + { + "epoch": 1.3580824792187878, + "grad_norm": 0.7056576013565063, + "learning_rate": 1.2300971906061354e-05, + "loss": 2.4238, + "step": 16828 + }, + { + "epoch": 1.3581631829553709, + "grad_norm": 0.686903715133667, + "learning_rate": 1.2293387266478296e-05, + "loss": 2.3902, + "step": 16829 + }, + { + "epoch": 1.358243886691954, + "grad_norm": 0.7377725839614868, + "learning_rate": 1.2285804812773293e-05, + "loss": 2.4294, + "step": 16830 + }, + { + "epoch": 1.3583245904285368, + "grad_norm": 0.6537891030311584, + "learning_rate": 1.227822454513532e-05, + "loss": 2.374, + "step": 16831 + }, + { + "epoch": 1.35840529416512, + "grad_norm": 0.684699296951294, + "learning_rate": 1.2270646463753288e-05, + "loss": 2.4105, + "step": 16832 + }, + { + "epoch": 1.3584859979017028, + "grad_norm": 0.7042316794395447, + "learning_rate": 1.2263070568816081e-05, + "loss": 2.4246, + "step": 16833 + }, + { + "epoch": 1.3585667016382859, + "grad_norm": 0.7610476613044739, + "learning_rate": 1.2255496860512505e-05, + "loss": 2.4581, + "step": 16834 + }, + { + "epoch": 1.3586474053748687, + "grad_norm": 0.6620839834213257, + "learning_rate": 1.224792533903134e-05, + "loss": 2.4138, + "step": 16835 + }, + { + "epoch": 1.3587281091114518, + "grad_norm": 0.6861035823822021, + "learning_rate": 1.2240356004561227e-05, + "loss": 2.4195, + "step": 16836 + }, + { + "epoch": 1.358808812848035, + "grad_norm": 0.7186882495880127, + "learning_rate": 1.2232788857290855e-05, + "loss": 2.404, + "step": 16837 + }, + { + "epoch": 1.3588895165846178, + "grad_norm": 0.7219386696815491, + "learning_rate": 1.2225223897408833e-05, + "loss": 2.3778, + "step": 16838 + }, + { + "epoch": 1.3589702203212009, + "grad_norm": 0.6935911774635315, + "learning_rate": 1.2217661125103663e-05, + "loss": 2.4617, + "step": 16839 + }, + { + "epoch": 1.359050924057784, + "grad_norm": 0.7885910272598267, + "learning_rate": 1.2210100540563828e-05, + "loss": 2.4467, + "step": 16840 + }, + { + "epoch": 1.3591316277943668, + "grad_norm": 0.6690255403518677, + "learning_rate": 1.220254214397778e-05, + "loss": 2.381, + "step": 16841 + }, + { + "epoch": 1.35921233153095, + "grad_norm": 0.7592741847038269, + "learning_rate": 1.2194985935533887e-05, + "loss": 2.4459, + "step": 16842 + }, + { + "epoch": 1.359293035267533, + "grad_norm": 0.827460527420044, + "learning_rate": 1.2187431915420466e-05, + "loss": 2.3842, + "step": 16843 + }, + { + "epoch": 1.3593737390041158, + "grad_norm": 0.7313764691352844, + "learning_rate": 1.2179880083825811e-05, + "loss": 2.3938, + "step": 16844 + }, + { + "epoch": 1.359454442740699, + "grad_norm": 0.7093486189842224, + "learning_rate": 1.2172330440938084e-05, + "loss": 2.4316, + "step": 16845 + }, + { + "epoch": 1.359535146477282, + "grad_norm": 0.6805742383003235, + "learning_rate": 1.2164782986945467e-05, + "loss": 2.4372, + "step": 16846 + }, + { + "epoch": 1.3596158502138649, + "grad_norm": 0.7525961399078369, + "learning_rate": 1.2157237722036064e-05, + "loss": 2.3867, + "step": 16847 + }, + { + "epoch": 1.359696553950448, + "grad_norm": 0.723896861076355, + "learning_rate": 1.2149694646397947e-05, + "loss": 2.4685, + "step": 16848 + }, + { + "epoch": 1.3597772576870308, + "grad_norm": 0.704448938369751, + "learning_rate": 1.2142153760219055e-05, + "loss": 2.4463, + "step": 16849 + }, + { + "epoch": 1.359857961423614, + "grad_norm": 0.7207927703857422, + "learning_rate": 1.2134615063687349e-05, + "loss": 2.3549, + "step": 16850 + }, + { + "epoch": 1.3599386651601968, + "grad_norm": 0.7106234431266785, + "learning_rate": 1.2127078556990724e-05, + "loss": 2.4145, + "step": 16851 + }, + { + "epoch": 1.3600193688967799, + "grad_norm": 0.7740694284439087, + "learning_rate": 1.2119544240316993e-05, + "loss": 2.3999, + "step": 16852 + }, + { + "epoch": 1.360100072633363, + "grad_norm": 0.6696181297302246, + "learning_rate": 1.2112012113853954e-05, + "loss": 2.4046, + "step": 16853 + }, + { + "epoch": 1.3601807763699458, + "grad_norm": 0.6758043169975281, + "learning_rate": 1.2104482177789334e-05, + "loss": 2.4021, + "step": 16854 + }, + { + "epoch": 1.360261480106529, + "grad_norm": 0.6659380793571472, + "learning_rate": 1.2096954432310758e-05, + "loss": 2.4145, + "step": 16855 + }, + { + "epoch": 1.360342183843112, + "grad_norm": 0.6889290809631348, + "learning_rate": 1.2089428877605858e-05, + "loss": 2.3486, + "step": 16856 + }, + { + "epoch": 1.3604228875796949, + "grad_norm": 0.6755563020706177, + "learning_rate": 1.2081905513862201e-05, + "loss": 2.4294, + "step": 16857 + }, + { + "epoch": 1.360503591316278, + "grad_norm": 0.7662243843078613, + "learning_rate": 1.2074384341267276e-05, + "loss": 2.414, + "step": 16858 + }, + { + "epoch": 1.360584295052861, + "grad_norm": 0.7432721853256226, + "learning_rate": 1.2066865360008517e-05, + "loss": 2.4314, + "step": 16859 + }, + { + "epoch": 1.360664998789444, + "grad_norm": 0.6465074419975281, + "learning_rate": 1.2059348570273366e-05, + "loss": 2.3349, + "step": 16860 + }, + { + "epoch": 1.360745702526027, + "grad_norm": 0.6940968632698059, + "learning_rate": 1.2051833972249105e-05, + "loss": 2.4539, + "step": 16861 + }, + { + "epoch": 1.36082640626261, + "grad_norm": 0.7211138010025024, + "learning_rate": 1.2044321566123019e-05, + "loss": 2.4041, + "step": 16862 + }, + { + "epoch": 1.360907109999193, + "grad_norm": 0.6746649146080017, + "learning_rate": 1.2036811352082367e-05, + "loss": 2.4329, + "step": 16863 + }, + { + "epoch": 1.360987813735776, + "grad_norm": 0.7502184510231018, + "learning_rate": 1.2029303330314345e-05, + "loss": 2.407, + "step": 16864 + }, + { + "epoch": 1.361068517472359, + "grad_norm": 0.7192596793174744, + "learning_rate": 1.2021797501006027e-05, + "loss": 2.3907, + "step": 16865 + }, + { + "epoch": 1.361149221208942, + "grad_norm": 0.6682254672050476, + "learning_rate": 1.2014293864344483e-05, + "loss": 2.391, + "step": 16866 + }, + { + "epoch": 1.3612299249455249, + "grad_norm": 0.680969774723053, + "learning_rate": 1.2006792420516755e-05, + "loss": 2.3479, + "step": 16867 + }, + { + "epoch": 1.361310628682108, + "grad_norm": 0.682671308517456, + "learning_rate": 1.1999293169709757e-05, + "loss": 2.4097, + "step": 16868 + }, + { + "epoch": 1.361391332418691, + "grad_norm": 0.7030573487281799, + "learning_rate": 1.199179611211041e-05, + "loss": 2.4514, + "step": 16869 + }, + { + "epoch": 1.361472036155274, + "grad_norm": 0.670630693435669, + "learning_rate": 1.1984301247905582e-05, + "loss": 2.3982, + "step": 16870 + }, + { + "epoch": 1.361552739891857, + "grad_norm": 0.6993644833564758, + "learning_rate": 1.1976808577282017e-05, + "loss": 2.4297, + "step": 16871 + }, + { + "epoch": 1.36163344362844, + "grad_norm": 0.7448122501373291, + "learning_rate": 1.1969318100426486e-05, + "loss": 2.3612, + "step": 16872 + }, + { + "epoch": 1.361714147365023, + "grad_norm": 0.7014498114585876, + "learning_rate": 1.1961829817525649e-05, + "loss": 2.3451, + "step": 16873 + }, + { + "epoch": 1.361794851101606, + "grad_norm": 0.7140750885009766, + "learning_rate": 1.195434372876616e-05, + "loss": 2.4231, + "step": 16874 + }, + { + "epoch": 1.3618755548381891, + "grad_norm": 0.7377427816390991, + "learning_rate": 1.1946859834334567e-05, + "loss": 2.4055, + "step": 16875 + }, + { + "epoch": 1.361956258574772, + "grad_norm": 0.7969191670417786, + "learning_rate": 1.1939378134417433e-05, + "loss": 2.3503, + "step": 16876 + }, + { + "epoch": 1.362036962311355, + "grad_norm": 0.6821554899215698, + "learning_rate": 1.1931898629201155e-05, + "loss": 2.4259, + "step": 16877 + }, + { + "epoch": 1.3621176660479382, + "grad_norm": 0.6598221659660339, + "learning_rate": 1.1924421318872182e-05, + "loss": 2.3833, + "step": 16878 + }, + { + "epoch": 1.362198369784521, + "grad_norm": 0.8031432628631592, + "learning_rate": 1.1916946203616863e-05, + "loss": 2.5077, + "step": 16879 + }, + { + "epoch": 1.362279073521104, + "grad_norm": 0.7247405648231506, + "learning_rate": 1.190947328362152e-05, + "loss": 2.426, + "step": 16880 + }, + { + "epoch": 1.362359777257687, + "grad_norm": 0.7256691455841064, + "learning_rate": 1.1902002559072344e-05, + "loss": 2.474, + "step": 16881 + }, + { + "epoch": 1.36244048099427, + "grad_norm": 0.7382180094718933, + "learning_rate": 1.1894534030155558e-05, + "loss": 2.4487, + "step": 16882 + }, + { + "epoch": 1.362521184730853, + "grad_norm": 0.700179398059845, + "learning_rate": 1.1887067697057297e-05, + "loss": 2.3836, + "step": 16883 + }, + { + "epoch": 1.362601888467436, + "grad_norm": 0.706106424331665, + "learning_rate": 1.1879603559963638e-05, + "loss": 2.4304, + "step": 16884 + }, + { + "epoch": 1.362682592204019, + "grad_norm": 0.7514815926551819, + "learning_rate": 1.1872141619060606e-05, + "loss": 2.4895, + "step": 16885 + }, + { + "epoch": 1.362763295940602, + "grad_norm": 0.6605612635612488, + "learning_rate": 1.1864681874534201e-05, + "loss": 2.3569, + "step": 16886 + }, + { + "epoch": 1.362843999677185, + "grad_norm": 0.6366496682167053, + "learning_rate": 1.1857224326570283e-05, + "loss": 2.3919, + "step": 16887 + }, + { + "epoch": 1.3629247034137681, + "grad_norm": 0.8100820183753967, + "learning_rate": 1.1849768975354736e-05, + "loss": 2.5063, + "step": 16888 + }, + { + "epoch": 1.363005407150351, + "grad_norm": 0.685127854347229, + "learning_rate": 1.1842315821073403e-05, + "loss": 2.4647, + "step": 16889 + }, + { + "epoch": 1.363086110886934, + "grad_norm": 0.696172833442688, + "learning_rate": 1.1834864863911987e-05, + "loss": 2.4224, + "step": 16890 + }, + { + "epoch": 1.3631668146235172, + "grad_norm": 0.6558032035827637, + "learning_rate": 1.1827416104056199e-05, + "loss": 2.3619, + "step": 16891 + }, + { + "epoch": 1.3632475183601, + "grad_norm": 0.744687020778656, + "learning_rate": 1.1819969541691689e-05, + "loss": 2.4669, + "step": 16892 + }, + { + "epoch": 1.3633282220966831, + "grad_norm": 0.6925212740898132, + "learning_rate": 1.1812525177004052e-05, + "loss": 2.3967, + "step": 16893 + }, + { + "epoch": 1.363408925833266, + "grad_norm": 0.6861244440078735, + "learning_rate": 1.1805083010178797e-05, + "loss": 2.3979, + "step": 16894 + }, + { + "epoch": 1.363489629569849, + "grad_norm": 0.6987108588218689, + "learning_rate": 1.179764304140143e-05, + "loss": 2.4263, + "step": 16895 + }, + { + "epoch": 1.363570333306432, + "grad_norm": 0.6940091848373413, + "learning_rate": 1.179020527085738e-05, + "loss": 2.4328, + "step": 16896 + }, + { + "epoch": 1.363651037043015, + "grad_norm": 0.6831968426704407, + "learning_rate": 1.1782769698731966e-05, + "loss": 2.427, + "step": 16897 + }, + { + "epoch": 1.3637317407795981, + "grad_norm": 0.7370985746383667, + "learning_rate": 1.177533632521054e-05, + "loss": 2.3711, + "step": 16898 + }, + { + "epoch": 1.363812444516181, + "grad_norm": 0.8176774978637695, + "learning_rate": 1.1767905150478376e-05, + "loss": 2.4337, + "step": 16899 + }, + { + "epoch": 1.363893148252764, + "grad_norm": 0.786318302154541, + "learning_rate": 1.1760476174720637e-05, + "loss": 2.5099, + "step": 16900 + }, + { + "epoch": 1.3639738519893472, + "grad_norm": 0.7309854626655579, + "learning_rate": 1.1753049398122495e-05, + "loss": 2.46, + "step": 16901 + }, + { + "epoch": 1.36405455572593, + "grad_norm": 0.7410863637924194, + "learning_rate": 1.1745624820869039e-05, + "loss": 2.4249, + "step": 16902 + }, + { + "epoch": 1.3641352594625131, + "grad_norm": 0.7059988379478455, + "learning_rate": 1.1738202443145308e-05, + "loss": 2.4964, + "step": 16903 + }, + { + "epoch": 1.3642159631990962, + "grad_norm": 0.7351845502853394, + "learning_rate": 1.1730782265136287e-05, + "loss": 2.4694, + "step": 16904 + }, + { + "epoch": 1.364296666935679, + "grad_norm": 0.6928153038024902, + "learning_rate": 1.1723364287026938e-05, + "loss": 2.426, + "step": 16905 + }, + { + "epoch": 1.3643773706722622, + "grad_norm": 0.759920060634613, + "learning_rate": 1.1715948509002083e-05, + "loss": 2.4359, + "step": 16906 + }, + { + "epoch": 1.3644580744088453, + "grad_norm": 0.6655696630477905, + "learning_rate": 1.1708534931246573e-05, + "loss": 2.4118, + "step": 16907 + }, + { + "epoch": 1.3645387781454281, + "grad_norm": 0.6912528872489929, + "learning_rate": 1.170112355394517e-05, + "loss": 2.4257, + "step": 16908 + }, + { + "epoch": 1.3646194818820112, + "grad_norm": 0.6612871289253235, + "learning_rate": 1.1693714377282604e-05, + "loss": 2.4192, + "step": 16909 + }, + { + "epoch": 1.364700185618594, + "grad_norm": 0.6548018455505371, + "learning_rate": 1.1686307401443486e-05, + "loss": 2.4054, + "step": 16910 + }, + { + "epoch": 1.3647808893551772, + "grad_norm": 0.7749961018562317, + "learning_rate": 1.1678902626612443e-05, + "loss": 2.44, + "step": 16911 + }, + { + "epoch": 1.36486159309176, + "grad_norm": 0.7187496423721313, + "learning_rate": 1.1671500052974039e-05, + "loss": 2.4033, + "step": 16912 + }, + { + "epoch": 1.3649422968283431, + "grad_norm": 0.7002814412117004, + "learning_rate": 1.1664099680712715e-05, + "loss": 2.4442, + "step": 16913 + }, + { + "epoch": 1.3650230005649262, + "grad_norm": 0.6852529644966125, + "learning_rate": 1.1656701510012946e-05, + "loss": 2.4253, + "step": 16914 + }, + { + "epoch": 1.365103704301509, + "grad_norm": 0.6922035813331604, + "learning_rate": 1.1649305541059142e-05, + "loss": 2.4406, + "step": 16915 + }, + { + "epoch": 1.3651844080380922, + "grad_norm": 0.6883397698402405, + "learning_rate": 1.1641911774035563e-05, + "loss": 2.4064, + "step": 16916 + }, + { + "epoch": 1.3652651117746752, + "grad_norm": 0.7101531624794006, + "learning_rate": 1.163452020912652e-05, + "loss": 2.4068, + "step": 16917 + }, + { + "epoch": 1.365345815511258, + "grad_norm": 0.728369951248169, + "learning_rate": 1.1627130846516231e-05, + "loss": 2.4319, + "step": 16918 + }, + { + "epoch": 1.3654265192478412, + "grad_norm": 0.6765053272247314, + "learning_rate": 1.161974368638884e-05, + "loss": 2.3922, + "step": 16919 + }, + { + "epoch": 1.3655072229844243, + "grad_norm": 0.6909242868423462, + "learning_rate": 1.1612358728928475e-05, + "loss": 2.4124, + "step": 16920 + }, + { + "epoch": 1.3655879267210072, + "grad_norm": 0.735650897026062, + "learning_rate": 1.1604975974319177e-05, + "loss": 2.5137, + "step": 16921 + }, + { + "epoch": 1.3656686304575902, + "grad_norm": 0.6587653756141663, + "learning_rate": 1.1597595422744934e-05, + "loss": 2.4163, + "step": 16922 + }, + { + "epoch": 1.3657493341941733, + "grad_norm": 0.700282096862793, + "learning_rate": 1.159021707438971e-05, + "loss": 2.4272, + "step": 16923 + }, + { + "epoch": 1.3658300379307562, + "grad_norm": 0.7175682783126831, + "learning_rate": 1.1582840929437365e-05, + "loss": 2.4598, + "step": 16924 + }, + { + "epoch": 1.3659107416673393, + "grad_norm": 0.6725881695747375, + "learning_rate": 1.157546698807176e-05, + "loss": 2.4064, + "step": 16925 + }, + { + "epoch": 1.3659914454039221, + "grad_norm": 0.7130467295646667, + "learning_rate": 1.1568095250476651e-05, + "loss": 2.3851, + "step": 16926 + }, + { + "epoch": 1.3660721491405052, + "grad_norm": 0.6859269142150879, + "learning_rate": 1.1560725716835785e-05, + "loss": 2.3577, + "step": 16927 + }, + { + "epoch": 1.366152852877088, + "grad_norm": 0.7037541270256042, + "learning_rate": 1.1553358387332824e-05, + "loss": 2.4402, + "step": 16928 + }, + { + "epoch": 1.3662335566136712, + "grad_norm": 0.7094031572341919, + "learning_rate": 1.1545993262151366e-05, + "loss": 2.4036, + "step": 16929 + }, + { + "epoch": 1.3663142603502543, + "grad_norm": 0.6953302025794983, + "learning_rate": 1.1538630341474965e-05, + "loss": 2.4192, + "step": 16930 + }, + { + "epoch": 1.3663949640868371, + "grad_norm": 0.7012252807617188, + "learning_rate": 1.1531269625487163e-05, + "loss": 2.4207, + "step": 16931 + }, + { + "epoch": 1.3664756678234202, + "grad_norm": 0.6616495847702026, + "learning_rate": 1.1523911114371366e-05, + "loss": 2.4187, + "step": 16932 + }, + { + "epoch": 1.3665563715600033, + "grad_norm": 0.6819868087768555, + "learning_rate": 1.1516554808310975e-05, + "loss": 2.448, + "step": 16933 + }, + { + "epoch": 1.3666370752965862, + "grad_norm": 0.6869969964027405, + "learning_rate": 1.1509200707489343e-05, + "loss": 2.4134, + "step": 16934 + }, + { + "epoch": 1.3667177790331693, + "grad_norm": 0.6600778698921204, + "learning_rate": 1.1501848812089733e-05, + "loss": 2.4159, + "step": 16935 + }, + { + "epoch": 1.3667984827697524, + "grad_norm": 0.668712317943573, + "learning_rate": 1.1494499122295398e-05, + "loss": 2.41, + "step": 16936 + }, + { + "epoch": 1.3668791865063352, + "grad_norm": 0.767365574836731, + "learning_rate": 1.1487151638289518e-05, + "loss": 2.3856, + "step": 16937 + }, + { + "epoch": 1.3669598902429183, + "grad_norm": 0.721546471118927, + "learning_rate": 1.1479806360255174e-05, + "loss": 2.4038, + "step": 16938 + }, + { + "epoch": 1.3670405939795012, + "grad_norm": 0.6796963810920715, + "learning_rate": 1.1472463288375456e-05, + "loss": 2.3698, + "step": 16939 + }, + { + "epoch": 1.3671212977160843, + "grad_norm": 0.7340671420097351, + "learning_rate": 1.1465122422833363e-05, + "loss": 2.4296, + "step": 16940 + }, + { + "epoch": 1.3672020014526671, + "grad_norm": 0.7173369526863098, + "learning_rate": 1.145778376381187e-05, + "loss": 2.3923, + "step": 16941 + }, + { + "epoch": 1.3672827051892502, + "grad_norm": 0.6683956980705261, + "learning_rate": 1.1450447311493839e-05, + "loss": 2.4092, + "step": 16942 + }, + { + "epoch": 1.3673634089258333, + "grad_norm": 0.6457851529121399, + "learning_rate": 1.1443113066062129e-05, + "loss": 2.3467, + "step": 16943 + }, + { + "epoch": 1.3674441126624162, + "grad_norm": 0.6870608925819397, + "learning_rate": 1.1435781027699532e-05, + "loss": 2.3766, + "step": 16944 + }, + { + "epoch": 1.3675248163989993, + "grad_norm": 0.6496049165725708, + "learning_rate": 1.1428451196588775e-05, + "loss": 2.4464, + "step": 16945 + }, + { + "epoch": 1.3676055201355823, + "grad_norm": 0.7554739117622375, + "learning_rate": 1.1421123572912551e-05, + "loss": 2.4243, + "step": 16946 + }, + { + "epoch": 1.3676862238721652, + "grad_norm": 0.7208122611045837, + "learning_rate": 1.1413798156853495e-05, + "loss": 2.3699, + "step": 16947 + }, + { + "epoch": 1.3677669276087483, + "grad_norm": 0.7072176337242126, + "learning_rate": 1.1406474948594126e-05, + "loss": 2.4011, + "step": 16948 + }, + { + "epoch": 1.3678476313453314, + "grad_norm": 0.7316476106643677, + "learning_rate": 1.1399153948316999e-05, + "loss": 2.4508, + "step": 16949 + }, + { + "epoch": 1.3679283350819142, + "grad_norm": 0.8518069386482239, + "learning_rate": 1.1391835156204577e-05, + "loss": 2.4197, + "step": 16950 + }, + { + "epoch": 1.3680090388184973, + "grad_norm": 0.6700364947319031, + "learning_rate": 1.1384518572439228e-05, + "loss": 2.4272, + "step": 16951 + }, + { + "epoch": 1.3680897425550804, + "grad_norm": 0.7007749676704407, + "learning_rate": 1.1377204197203317e-05, + "loss": 2.3777, + "step": 16952 + }, + { + "epoch": 1.3681704462916633, + "grad_norm": 0.6792053580284119, + "learning_rate": 1.1369892030679141e-05, + "loss": 2.4487, + "step": 16953 + }, + { + "epoch": 1.3682511500282464, + "grad_norm": 0.6913022398948669, + "learning_rate": 1.1362582073048932e-05, + "loss": 2.3757, + "step": 16954 + }, + { + "epoch": 1.3683318537648292, + "grad_norm": 0.648248016834259, + "learning_rate": 1.135527432449488e-05, + "loss": 2.3482, + "step": 16955 + }, + { + "epoch": 1.3684125575014123, + "grad_norm": 0.6711798906326294, + "learning_rate": 1.1347968785199115e-05, + "loss": 2.4096, + "step": 16956 + }, + { + "epoch": 1.3684932612379952, + "grad_norm": 0.6932381987571716, + "learning_rate": 1.1340665455343724e-05, + "loss": 2.3834, + "step": 16957 + }, + { + "epoch": 1.3685739649745783, + "grad_norm": 0.6890178918838501, + "learning_rate": 1.1333364335110697e-05, + "loss": 2.4182, + "step": 16958 + }, + { + "epoch": 1.3686546687111614, + "grad_norm": 0.6612519025802612, + "learning_rate": 1.1326065424681997e-05, + "loss": 2.3691, + "step": 16959 + }, + { + "epoch": 1.3687353724477442, + "grad_norm": 0.7123190760612488, + "learning_rate": 1.131876872423957e-05, + "loss": 2.3919, + "step": 16960 + }, + { + "epoch": 1.3688160761843273, + "grad_norm": 0.6615463495254517, + "learning_rate": 1.1311474233965214e-05, + "loss": 2.4266, + "step": 16961 + }, + { + "epoch": 1.3688967799209104, + "grad_norm": 0.7320190668106079, + "learning_rate": 1.130418195404076e-05, + "loss": 2.4268, + "step": 16962 + }, + { + "epoch": 1.3689774836574933, + "grad_norm": 0.6845116019248962, + "learning_rate": 1.1296891884647965e-05, + "loss": 2.3972, + "step": 16963 + }, + { + "epoch": 1.3690581873940764, + "grad_norm": 0.70455002784729, + "learning_rate": 1.1289604025968448e-05, + "loss": 2.4183, + "step": 16964 + }, + { + "epoch": 1.3691388911306595, + "grad_norm": 0.6952407956123352, + "learning_rate": 1.128231837818392e-05, + "loss": 2.4276, + "step": 16965 + }, + { + "epoch": 1.3692195948672423, + "grad_norm": 0.7939464449882507, + "learning_rate": 1.1275034941475938e-05, + "loss": 2.4072, + "step": 16966 + }, + { + "epoch": 1.3693002986038254, + "grad_norm": 0.6974930763244629, + "learning_rate": 1.1267753716026007e-05, + "loss": 2.4133, + "step": 16967 + }, + { + "epoch": 1.3693810023404085, + "grad_norm": 0.7187508344650269, + "learning_rate": 1.126047470201559e-05, + "loss": 2.3588, + "step": 16968 + }, + { + "epoch": 1.3694617060769914, + "grad_norm": 0.6887609958648682, + "learning_rate": 1.1253197899626134e-05, + "loss": 2.4322, + "step": 16969 + }, + { + "epoch": 1.3695424098135744, + "grad_norm": 0.679957389831543, + "learning_rate": 1.1245923309038964e-05, + "loss": 2.3907, + "step": 16970 + }, + { + "epoch": 1.3696231135501573, + "grad_norm": 0.7540870308876038, + "learning_rate": 1.1238650930435378e-05, + "loss": 2.4752, + "step": 16971 + }, + { + "epoch": 1.3697038172867404, + "grad_norm": 0.7697634100914001, + "learning_rate": 1.1231380763996635e-05, + "loss": 2.4366, + "step": 16972 + }, + { + "epoch": 1.3697845210233233, + "grad_norm": 0.6836850643157959, + "learning_rate": 1.1224112809903954e-05, + "loss": 2.3511, + "step": 16973 + }, + { + "epoch": 1.3698652247599064, + "grad_norm": 0.6904506683349609, + "learning_rate": 1.1216847068338421e-05, + "loss": 2.4109, + "step": 16974 + }, + { + "epoch": 1.3699459284964894, + "grad_norm": 0.6579318046569824, + "learning_rate": 1.1209583539481127e-05, + "loss": 2.4391, + "step": 16975 + }, + { + "epoch": 1.3700266322330723, + "grad_norm": 0.7107192277908325, + "learning_rate": 1.120232222351314e-05, + "loss": 2.399, + "step": 16976 + }, + { + "epoch": 1.3701073359696554, + "grad_norm": 0.7581583261489868, + "learning_rate": 1.119506312061539e-05, + "loss": 2.4817, + "step": 16977 + }, + { + "epoch": 1.3701880397062385, + "grad_norm": 0.6836642622947693, + "learning_rate": 1.11878062309688e-05, + "loss": 2.4415, + "step": 16978 + }, + { + "epoch": 1.3702687434428213, + "grad_norm": 0.6842699646949768, + "learning_rate": 1.118055155475426e-05, + "loss": 2.4045, + "step": 16979 + }, + { + "epoch": 1.3703494471794044, + "grad_norm": 0.7630519270896912, + "learning_rate": 1.1173299092152534e-05, + "loss": 2.4314, + "step": 16980 + }, + { + "epoch": 1.3704301509159875, + "grad_norm": 0.7334303259849548, + "learning_rate": 1.116604884334439e-05, + "loss": 2.3564, + "step": 16981 + }, + { + "epoch": 1.3705108546525704, + "grad_norm": 0.6929439306259155, + "learning_rate": 1.1158800808510538e-05, + "loss": 2.4258, + "step": 16982 + }, + { + "epoch": 1.3705915583891535, + "grad_norm": 0.6387187838554382, + "learning_rate": 1.1151554987831591e-05, + "loss": 2.3263, + "step": 16983 + }, + { + "epoch": 1.3706722621257363, + "grad_norm": 0.7279032468795776, + "learning_rate": 1.1144311381488136e-05, + "loss": 2.4074, + "step": 16984 + }, + { + "epoch": 1.3707529658623194, + "grad_norm": 0.7066916227340698, + "learning_rate": 1.113706998966072e-05, + "loss": 2.4358, + "step": 16985 + }, + { + "epoch": 1.3708336695989023, + "grad_norm": 0.6753098964691162, + "learning_rate": 1.1129830812529807e-05, + "loss": 2.4195, + "step": 16986 + }, + { + "epoch": 1.3709143733354854, + "grad_norm": 0.6728894114494324, + "learning_rate": 1.112259385027582e-05, + "loss": 2.3712, + "step": 16987 + }, + { + "epoch": 1.3709950770720685, + "grad_norm": 0.7251775860786438, + "learning_rate": 1.1115359103079115e-05, + "loss": 2.4063, + "step": 16988 + }, + { + "epoch": 1.3710757808086513, + "grad_norm": 0.6797254085540771, + "learning_rate": 1.1108126571120036e-05, + "loss": 2.395, + "step": 16989 + }, + { + "epoch": 1.3711564845452344, + "grad_norm": 0.7505605220794678, + "learning_rate": 1.1100896254578786e-05, + "loss": 2.4044, + "step": 16990 + }, + { + "epoch": 1.3712371882818175, + "grad_norm": 0.7126416563987732, + "learning_rate": 1.1093668153635594e-05, + "loss": 2.4043, + "step": 16991 + }, + { + "epoch": 1.3713178920184004, + "grad_norm": 0.6550771594047546, + "learning_rate": 1.1086442268470609e-05, + "loss": 2.3515, + "step": 16992 + }, + { + "epoch": 1.3713985957549835, + "grad_norm": 0.7253621816635132, + "learning_rate": 1.1079218599263874e-05, + "loss": 2.4109, + "step": 16993 + }, + { + "epoch": 1.3714792994915666, + "grad_norm": 0.7272186875343323, + "learning_rate": 1.1071997146195468e-05, + "loss": 2.3531, + "step": 16994 + }, + { + "epoch": 1.3715600032281494, + "grad_norm": 0.6841129660606384, + "learning_rate": 1.1064777909445345e-05, + "loss": 2.4031, + "step": 16995 + }, + { + "epoch": 1.3716407069647325, + "grad_norm": 0.692945659160614, + "learning_rate": 1.1057560889193441e-05, + "loss": 2.3858, + "step": 16996 + }, + { + "epoch": 1.3717214107013156, + "grad_norm": 0.721182644367218, + "learning_rate": 1.1050346085619612e-05, + "loss": 2.3871, + "step": 16997 + }, + { + "epoch": 1.3718021144378985, + "grad_norm": 0.722960889339447, + "learning_rate": 1.1043133498903702e-05, + "loss": 2.3452, + "step": 16998 + }, + { + "epoch": 1.3718828181744815, + "grad_norm": 0.7148451805114746, + "learning_rate": 1.1035923129225412e-05, + "loss": 2.3905, + "step": 16999 + }, + { + "epoch": 1.3719635219110644, + "grad_norm": 0.7118532061576843, + "learning_rate": 1.1028714976764486e-05, + "loss": 2.3894, + "step": 17000 + }, + { + "epoch": 1.3719635219110644, + "eval_loss": 2.3730249404907227, + "eval_runtime": 769.4165, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.568, + "step": 17000 + }, + { + "epoch": 1.3720442256476475, + "grad_norm": 0.6933719515800476, + "learning_rate": 1.1021509041700539e-05, + "loss": 2.394, + "step": 17001 + }, + { + "epoch": 1.3721249293842304, + "grad_norm": 0.7330136895179749, + "learning_rate": 1.1014305324213215e-05, + "loss": 2.4466, + "step": 17002 + }, + { + "epoch": 1.3722056331208135, + "grad_norm": 0.6614598631858826, + "learning_rate": 1.1007103824481979e-05, + "loss": 2.4441, + "step": 17003 + }, + { + "epoch": 1.3722863368573965, + "grad_norm": 0.8030059933662415, + "learning_rate": 1.0999904542686356e-05, + "loss": 2.4284, + "step": 17004 + }, + { + "epoch": 1.3723670405939794, + "grad_norm": 0.6881710886955261, + "learning_rate": 1.099270747900576e-05, + "loss": 2.4433, + "step": 17005 + }, + { + "epoch": 1.3724477443305625, + "grad_norm": 0.661325216293335, + "learning_rate": 1.0985512633619555e-05, + "loss": 2.4144, + "step": 17006 + }, + { + "epoch": 1.3725284480671456, + "grad_norm": 0.6896070241928101, + "learning_rate": 1.0978320006707065e-05, + "loss": 2.3972, + "step": 17007 + }, + { + "epoch": 1.3726091518037284, + "grad_norm": 0.7043858766555786, + "learning_rate": 1.0971129598447561e-05, + "loss": 2.4082, + "step": 17008 + }, + { + "epoch": 1.3726898555403115, + "grad_norm": 0.7162652611732483, + "learning_rate": 1.0963941409020217e-05, + "loss": 2.3696, + "step": 17009 + }, + { + "epoch": 1.3727705592768946, + "grad_norm": 0.6809261441230774, + "learning_rate": 1.0956755438604194e-05, + "loss": 2.4392, + "step": 17010 + }, + { + "epoch": 1.3728512630134775, + "grad_norm": 0.6897100806236267, + "learning_rate": 1.0949571687378602e-05, + "loss": 2.4942, + "step": 17011 + }, + { + "epoch": 1.3729319667500606, + "grad_norm": 0.6903488039970398, + "learning_rate": 1.0942390155522442e-05, + "loss": 2.3936, + "step": 17012 + }, + { + "epoch": 1.3730126704866437, + "grad_norm": 0.676643431186676, + "learning_rate": 1.0935210843214727e-05, + "loss": 2.3972, + "step": 17013 + }, + { + "epoch": 1.3730933742232265, + "grad_norm": 0.6523454189300537, + "learning_rate": 1.092803375063437e-05, + "loss": 2.4914, + "step": 17014 + }, + { + "epoch": 1.3731740779598096, + "grad_norm": 0.7250776886940002, + "learning_rate": 1.092085887796026e-05, + "loss": 2.4493, + "step": 17015 + }, + { + "epoch": 1.3732547816963925, + "grad_norm": 0.6791245937347412, + "learning_rate": 1.091368622537119e-05, + "loss": 2.4553, + "step": 17016 + }, + { + "epoch": 1.3733354854329756, + "grad_norm": 0.8086698651313782, + "learning_rate": 1.0906515793045934e-05, + "loss": 2.457, + "step": 17017 + }, + { + "epoch": 1.3734161891695584, + "grad_norm": 0.6653520464897156, + "learning_rate": 1.0899347581163221e-05, + "loss": 2.3974, + "step": 17018 + }, + { + "epoch": 1.3734968929061415, + "grad_norm": 0.6596232056617737, + "learning_rate": 1.0892181589901651e-05, + "loss": 2.3771, + "step": 17019 + }, + { + "epoch": 1.3735775966427246, + "grad_norm": 0.7042080760002136, + "learning_rate": 1.0885017819439858e-05, + "loss": 2.4493, + "step": 17020 + }, + { + "epoch": 1.3736583003793075, + "grad_norm": 0.6882427930831909, + "learning_rate": 1.0877856269956377e-05, + "loss": 2.4293, + "step": 17021 + }, + { + "epoch": 1.3737390041158906, + "grad_norm": 0.6881027221679688, + "learning_rate": 1.0870696941629676e-05, + "loss": 2.4503, + "step": 17022 + }, + { + "epoch": 1.3738197078524736, + "grad_norm": 0.7282640337944031, + "learning_rate": 1.086353983463818e-05, + "loss": 2.4173, + "step": 17023 + }, + { + "epoch": 1.3739004115890565, + "grad_norm": 0.7281018495559692, + "learning_rate": 1.0856384949160314e-05, + "loss": 2.4514, + "step": 17024 + }, + { + "epoch": 1.3739811153256396, + "grad_norm": 0.7185690402984619, + "learning_rate": 1.0849232285374323e-05, + "loss": 2.4244, + "step": 17025 + }, + { + "epoch": 1.3740618190622227, + "grad_norm": 0.7732044458389282, + "learning_rate": 1.0842081843458496e-05, + "loss": 2.4855, + "step": 17026 + }, + { + "epoch": 1.3741425227988056, + "grad_norm": 0.6599788665771484, + "learning_rate": 1.0834933623591093e-05, + "loss": 2.4339, + "step": 17027 + }, + { + "epoch": 1.3742232265353886, + "grad_norm": 0.7193527817726135, + "learning_rate": 1.0827787625950192e-05, + "loss": 2.4284, + "step": 17028 + }, + { + "epoch": 1.3743039302719717, + "grad_norm": 0.7255674004554749, + "learning_rate": 1.082064385071393e-05, + "loss": 2.4056, + "step": 17029 + }, + { + "epoch": 1.3743846340085546, + "grad_norm": 0.7823398113250732, + "learning_rate": 1.0813502298060363e-05, + "loss": 2.4268, + "step": 17030 + }, + { + "epoch": 1.3744653377451377, + "grad_norm": 0.6839333176612854, + "learning_rate": 1.0806362968167427e-05, + "loss": 2.4415, + "step": 17031 + }, + { + "epoch": 1.3745460414817205, + "grad_norm": 0.798973560333252, + "learning_rate": 1.079922586121308e-05, + "loss": 2.4251, + "step": 17032 + }, + { + "epoch": 1.3746267452183036, + "grad_norm": 0.7234559655189514, + "learning_rate": 1.0792090977375203e-05, + "loss": 2.3821, + "step": 17033 + }, + { + "epoch": 1.3747074489548865, + "grad_norm": 0.6686646938323975, + "learning_rate": 1.0784958316831628e-05, + "loss": 2.4123, + "step": 17034 + }, + { + "epoch": 1.3747881526914696, + "grad_norm": 0.6656081676483154, + "learning_rate": 1.0777827879760084e-05, + "loss": 2.3527, + "step": 17035 + }, + { + "epoch": 1.3748688564280527, + "grad_norm": 0.6609933972358704, + "learning_rate": 1.0770699666338303e-05, + "loss": 2.4128, + "step": 17036 + }, + { + "epoch": 1.3749495601646355, + "grad_norm": 0.710719108581543, + "learning_rate": 1.0763573676743921e-05, + "loss": 2.4634, + "step": 17037 + }, + { + "epoch": 1.3750302639012186, + "grad_norm": 0.6638451814651489, + "learning_rate": 1.0756449911154554e-05, + "loss": 2.3828, + "step": 17038 + }, + { + "epoch": 1.3751109676378017, + "grad_norm": 0.7525094151496887, + "learning_rate": 1.0749328369747746e-05, + "loss": 2.4078, + "step": 17039 + }, + { + "epoch": 1.3751916713743846, + "grad_norm": 0.7343288064002991, + "learning_rate": 1.0742209052701002e-05, + "loss": 2.4731, + "step": 17040 + }, + { + "epoch": 1.3752723751109677, + "grad_norm": 0.7966243624687195, + "learning_rate": 1.0735091960191701e-05, + "loss": 2.3501, + "step": 17041 + }, + { + "epoch": 1.3753530788475508, + "grad_norm": 0.6693055033683777, + "learning_rate": 1.0727977092397256e-05, + "loss": 2.4214, + "step": 17042 + }, + { + "epoch": 1.3754337825841336, + "grad_norm": 0.6831601858139038, + "learning_rate": 1.0720864449494994e-05, + "loss": 2.4029, + "step": 17043 + }, + { + "epoch": 1.3755144863207167, + "grad_norm": 0.7081588506698608, + "learning_rate": 1.0713754031662149e-05, + "loss": 2.4532, + "step": 17044 + }, + { + "epoch": 1.3755951900572996, + "grad_norm": 0.698469877243042, + "learning_rate": 1.0706645839075957e-05, + "loss": 2.4181, + "step": 17045 + }, + { + "epoch": 1.3756758937938827, + "grad_norm": 0.652568519115448, + "learning_rate": 1.0699539871913556e-05, + "loss": 2.4761, + "step": 17046 + }, + { + "epoch": 1.3757565975304655, + "grad_norm": 0.7698256969451904, + "learning_rate": 1.0692436130352068e-05, + "loss": 2.4742, + "step": 17047 + }, + { + "epoch": 1.3758373012670486, + "grad_norm": 0.7192606329917908, + "learning_rate": 1.068533461456851e-05, + "loss": 2.401, + "step": 17048 + }, + { + "epoch": 1.3759180050036317, + "grad_norm": 0.6296666860580444, + "learning_rate": 1.0678235324739894e-05, + "loss": 2.4628, + "step": 17049 + }, + { + "epoch": 1.3759987087402146, + "grad_norm": 0.7048724293708801, + "learning_rate": 1.0671138261043156e-05, + "loss": 2.4799, + "step": 17050 + }, + { + "epoch": 1.3760794124767977, + "grad_norm": 0.6724091172218323, + "learning_rate": 1.0664043423655146e-05, + "loss": 2.4108, + "step": 17051 + }, + { + "epoch": 1.3761601162133807, + "grad_norm": 0.6380212306976318, + "learning_rate": 1.0656950812752709e-05, + "loss": 2.3943, + "step": 17052 + }, + { + "epoch": 1.3762408199499636, + "grad_norm": 0.7005279660224915, + "learning_rate": 1.0649860428512604e-05, + "loss": 2.3623, + "step": 17053 + }, + { + "epoch": 1.3763215236865467, + "grad_norm": 0.719219982624054, + "learning_rate": 1.0642772271111534e-05, + "loss": 2.3873, + "step": 17054 + }, + { + "epoch": 1.3764022274231298, + "grad_norm": 0.7318363785743713, + "learning_rate": 1.063568634072616e-05, + "loss": 2.4335, + "step": 17055 + }, + { + "epoch": 1.3764829311597127, + "grad_norm": 0.7131830453872681, + "learning_rate": 1.062860263753308e-05, + "loss": 2.3829, + "step": 17056 + }, + { + "epoch": 1.3765636348962957, + "grad_norm": 0.7030664086341858, + "learning_rate": 1.0621521161708836e-05, + "loss": 2.3216, + "step": 17057 + }, + { + "epoch": 1.3766443386328788, + "grad_norm": 0.738999605178833, + "learning_rate": 1.0614441913429929e-05, + "loss": 2.4951, + "step": 17058 + }, + { + "epoch": 1.3767250423694617, + "grad_norm": 0.6926800012588501, + "learning_rate": 1.0607364892872806e-05, + "loss": 2.3977, + "step": 17059 + }, + { + "epoch": 1.3768057461060448, + "grad_norm": 0.6439639925956726, + "learning_rate": 1.0600290100213805e-05, + "loss": 2.4049, + "step": 17060 + }, + { + "epoch": 1.3768864498426276, + "grad_norm": 0.7035220265388489, + "learning_rate": 1.0593217535629264e-05, + "loss": 2.4212, + "step": 17061 + }, + { + "epoch": 1.3769671535792107, + "grad_norm": 0.705183207988739, + "learning_rate": 1.0586147199295482e-05, + "loss": 2.4244, + "step": 17062 + }, + { + "epoch": 1.3770478573157936, + "grad_norm": 0.7036949396133423, + "learning_rate": 1.057907909138861e-05, + "loss": 2.4254, + "step": 17063 + }, + { + "epoch": 1.3771285610523767, + "grad_norm": 0.7137075066566467, + "learning_rate": 1.0572013212084841e-05, + "loss": 2.4135, + "step": 17064 + }, + { + "epoch": 1.3772092647889598, + "grad_norm": 0.6973327398300171, + "learning_rate": 1.0564949561560267e-05, + "loss": 2.4568, + "step": 17065 + }, + { + "epoch": 1.3772899685255426, + "grad_norm": 0.7157370448112488, + "learning_rate": 1.0557888139990946e-05, + "loss": 2.3877, + "step": 17066 + }, + { + "epoch": 1.3773706722621257, + "grad_norm": 0.6622396111488342, + "learning_rate": 1.0550828947552848e-05, + "loss": 2.3636, + "step": 17067 + }, + { + "epoch": 1.3774513759987088, + "grad_norm": 0.7295750975608826, + "learning_rate": 1.0543771984421913e-05, + "loss": 2.4192, + "step": 17068 + }, + { + "epoch": 1.3775320797352917, + "grad_norm": 0.7245587110519409, + "learning_rate": 1.0536717250774053e-05, + "loss": 2.3575, + "step": 17069 + }, + { + "epoch": 1.3776127834718748, + "grad_norm": 0.6923871040344238, + "learning_rate": 1.052966474678503e-05, + "loss": 2.4547, + "step": 17070 + }, + { + "epoch": 1.3776934872084579, + "grad_norm": 0.6754410862922668, + "learning_rate": 1.0522614472630632e-05, + "loss": 2.4469, + "step": 17071 + }, + { + "epoch": 1.3777741909450407, + "grad_norm": 0.6979227662086487, + "learning_rate": 1.0515566428486612e-05, + "loss": 2.407, + "step": 17072 + }, + { + "epoch": 1.3778548946816238, + "grad_norm": 0.7050029635429382, + "learning_rate": 1.050852061452856e-05, + "loss": 2.3937, + "step": 17073 + }, + { + "epoch": 1.377935598418207, + "grad_norm": 0.676030158996582, + "learning_rate": 1.0501477030932117e-05, + "loss": 2.4144, + "step": 17074 + }, + { + "epoch": 1.3780163021547898, + "grad_norm": 0.6984726786613464, + "learning_rate": 1.0494435677872827e-05, + "loss": 2.4541, + "step": 17075 + }, + { + "epoch": 1.3780970058913729, + "grad_norm": 0.6987836956977844, + "learning_rate": 1.0487396555526141e-05, + "loss": 2.3984, + "step": 17076 + }, + { + "epoch": 1.3781777096279557, + "grad_norm": 0.7071307897567749, + "learning_rate": 1.0480359664067529e-05, + "loss": 2.3861, + "step": 17077 + }, + { + "epoch": 1.3782584133645388, + "grad_norm": 0.6713467836380005, + "learning_rate": 1.0473325003672384e-05, + "loss": 2.4029, + "step": 17078 + }, + { + "epoch": 1.3783391171011217, + "grad_norm": 0.7389634847640991, + "learning_rate": 1.046629257451599e-05, + "loss": 2.415, + "step": 17079 + }, + { + "epoch": 1.3784198208377048, + "grad_norm": 0.7122809886932373, + "learning_rate": 1.0459262376773627e-05, + "loss": 2.4278, + "step": 17080 + }, + { + "epoch": 1.3785005245742878, + "grad_norm": 0.7036066651344299, + "learning_rate": 1.045223441062051e-05, + "loss": 2.4276, + "step": 17081 + }, + { + "epoch": 1.3785812283108707, + "grad_norm": 0.7709795236587524, + "learning_rate": 1.0445208676231811e-05, + "loss": 2.4398, + "step": 17082 + }, + { + "epoch": 1.3786619320474538, + "grad_norm": 0.7131057977676392, + "learning_rate": 1.0438185173782589e-05, + "loss": 2.4414, + "step": 17083 + }, + { + "epoch": 1.3787426357840369, + "grad_norm": 0.7172132730484009, + "learning_rate": 1.0431163903447904e-05, + "loss": 2.4574, + "step": 17084 + }, + { + "epoch": 1.3788233395206198, + "grad_norm": 0.6760988831520081, + "learning_rate": 1.0424144865402774e-05, + "loss": 2.442, + "step": 17085 + }, + { + "epoch": 1.3789040432572028, + "grad_norm": 0.701665997505188, + "learning_rate": 1.041712805982209e-05, + "loss": 2.4012, + "step": 17086 + }, + { + "epoch": 1.378984746993786, + "grad_norm": 0.661851167678833, + "learning_rate": 1.0410113486880746e-05, + "loss": 2.3591, + "step": 17087 + }, + { + "epoch": 1.3790654507303688, + "grad_norm": 0.6929948925971985, + "learning_rate": 1.0403101146753569e-05, + "loss": 2.4285, + "step": 17088 + }, + { + "epoch": 1.3791461544669519, + "grad_norm": 0.703576922416687, + "learning_rate": 1.0396091039615308e-05, + "loss": 2.4643, + "step": 17089 + }, + { + "epoch": 1.3792268582035347, + "grad_norm": 0.6697961688041687, + "learning_rate": 1.038908316564069e-05, + "loss": 2.4046, + "step": 17090 + }, + { + "epoch": 1.3793075619401178, + "grad_norm": 0.7338510155677795, + "learning_rate": 1.0382077525004396e-05, + "loss": 2.3507, + "step": 17091 + }, + { + "epoch": 1.3793882656767007, + "grad_norm": 0.6967883110046387, + "learning_rate": 1.0375074117880956e-05, + "loss": 2.4458, + "step": 17092 + }, + { + "epoch": 1.3794689694132838, + "grad_norm": 0.7204736471176147, + "learning_rate": 1.0368072944444962e-05, + "loss": 2.427, + "step": 17093 + }, + { + "epoch": 1.3795496731498669, + "grad_norm": 0.7665053606033325, + "learning_rate": 1.0361074004870907e-05, + "loss": 2.3985, + "step": 17094 + }, + { + "epoch": 1.3796303768864497, + "grad_norm": 0.7157881855964661, + "learning_rate": 1.0354077299333187e-05, + "loss": 2.4229, + "step": 17095 + }, + { + "epoch": 1.3797110806230328, + "grad_norm": 0.6643819808959961, + "learning_rate": 1.0347082828006194e-05, + "loss": 2.357, + "step": 17096 + }, + { + "epoch": 1.379791784359616, + "grad_norm": 0.6965252757072449, + "learning_rate": 1.0340090591064255e-05, + "loss": 2.42, + "step": 17097 + }, + { + "epoch": 1.3798724880961988, + "grad_norm": 0.767876923084259, + "learning_rate": 1.0333100588681633e-05, + "loss": 2.4019, + "step": 17098 + }, + { + "epoch": 1.3799531918327819, + "grad_norm": 0.6687513589859009, + "learning_rate": 1.0326112821032541e-05, + "loss": 2.3515, + "step": 17099 + }, + { + "epoch": 1.380033895569365, + "grad_norm": 0.674007773399353, + "learning_rate": 1.031912728829112e-05, + "loss": 2.4281, + "step": 17100 + }, + { + "epoch": 1.3801145993059478, + "grad_norm": 0.6486735939979553, + "learning_rate": 1.0312143990631495e-05, + "loss": 2.4324, + "step": 17101 + }, + { + "epoch": 1.380195303042531, + "grad_norm": 0.7174487709999084, + "learning_rate": 1.0305162928227674e-05, + "loss": 2.4445, + "step": 17102 + }, + { + "epoch": 1.380276006779114, + "grad_norm": 0.6515870690345764, + "learning_rate": 1.029818410125365e-05, + "loss": 2.4078, + "step": 17103 + }, + { + "epoch": 1.3803567105156969, + "grad_norm": 0.697830080986023, + "learning_rate": 1.0291207509883383e-05, + "loss": 2.4024, + "step": 17104 + }, + { + "epoch": 1.38043741425228, + "grad_norm": 0.7636575102806091, + "learning_rate": 1.0284233154290711e-05, + "loss": 2.3912, + "step": 17105 + }, + { + "epoch": 1.3805181179888628, + "grad_norm": 0.6910358667373657, + "learning_rate": 1.0277261034649466e-05, + "loss": 2.4099, + "step": 17106 + }, + { + "epoch": 1.380598821725446, + "grad_norm": 0.6778038740158081, + "learning_rate": 1.0270291151133415e-05, + "loss": 2.4111, + "step": 17107 + }, + { + "epoch": 1.3806795254620288, + "grad_norm": 0.6927553415298462, + "learning_rate": 1.0263323503916255e-05, + "loss": 2.4239, + "step": 17108 + }, + { + "epoch": 1.3807602291986119, + "grad_norm": 0.6654019355773926, + "learning_rate": 1.0256358093171658e-05, + "loss": 2.4374, + "step": 17109 + }, + { + "epoch": 1.380840932935195, + "grad_norm": 0.7174705266952515, + "learning_rate": 1.0249394919073219e-05, + "loss": 2.4142, + "step": 17110 + }, + { + "epoch": 1.3809216366717778, + "grad_norm": 0.7386046648025513, + "learning_rate": 1.0242433981794463e-05, + "loss": 2.4453, + "step": 17111 + }, + { + "epoch": 1.381002340408361, + "grad_norm": 0.6723792552947998, + "learning_rate": 1.0235475281508866e-05, + "loss": 2.4595, + "step": 17112 + }, + { + "epoch": 1.381083044144944, + "grad_norm": 0.7069140672683716, + "learning_rate": 1.0228518818389887e-05, + "loss": 2.4434, + "step": 17113 + }, + { + "epoch": 1.3811637478815268, + "grad_norm": 0.7239270210266113, + "learning_rate": 1.0221564592610888e-05, + "loss": 2.5121, + "step": 17114 + }, + { + "epoch": 1.38124445161811, + "grad_norm": 0.6907179951667786, + "learning_rate": 1.0214612604345175e-05, + "loss": 2.3673, + "step": 17115 + }, + { + "epoch": 1.381325155354693, + "grad_norm": 0.6908708810806274, + "learning_rate": 1.020766285376602e-05, + "loss": 2.4419, + "step": 17116 + }, + { + "epoch": 1.381405859091276, + "grad_norm": 0.6947401165962219, + "learning_rate": 1.0200715341046618e-05, + "loss": 2.4566, + "step": 17117 + }, + { + "epoch": 1.381486562827859, + "grad_norm": 0.687776505947113, + "learning_rate": 1.019377006636012e-05, + "loss": 2.4631, + "step": 17118 + }, + { + "epoch": 1.381567266564442, + "grad_norm": 0.7059805989265442, + "learning_rate": 1.0186827029879642e-05, + "loss": 2.3892, + "step": 17119 + }, + { + "epoch": 1.381647970301025, + "grad_norm": 0.685351550579071, + "learning_rate": 1.0179886231778224e-05, + "loss": 2.4041, + "step": 17120 + }, + { + "epoch": 1.381728674037608, + "grad_norm": 0.6662759184837341, + "learning_rate": 1.0172947672228817e-05, + "loss": 2.4254, + "step": 17121 + }, + { + "epoch": 1.3818093777741909, + "grad_norm": 0.6769386529922485, + "learning_rate": 1.0166011351404358e-05, + "loss": 2.5057, + "step": 17122 + }, + { + "epoch": 1.381890081510774, + "grad_norm": 0.8168340921401978, + "learning_rate": 1.0159077269477746e-05, + "loss": 2.4936, + "step": 17123 + }, + { + "epoch": 1.3819707852473568, + "grad_norm": 0.6659611463546753, + "learning_rate": 1.0152145426621751e-05, + "loss": 2.4062, + "step": 17124 + }, + { + "epoch": 1.38205148898394, + "grad_norm": 0.7131680846214294, + "learning_rate": 1.0145215823009158e-05, + "loss": 2.3767, + "step": 17125 + }, + { + "epoch": 1.382132192720523, + "grad_norm": 0.7241190075874329, + "learning_rate": 1.0138288458812673e-05, + "loss": 2.4082, + "step": 17126 + }, + { + "epoch": 1.3822128964571059, + "grad_norm": 0.6905619502067566, + "learning_rate": 1.0131363334204947e-05, + "loss": 2.3859, + "step": 17127 + }, + { + "epoch": 1.382293600193689, + "grad_norm": 0.7163190245628357, + "learning_rate": 1.0124440449358551e-05, + "loss": 2.4238, + "step": 17128 + }, + { + "epoch": 1.382374303930272, + "grad_norm": 0.6857485175132751, + "learning_rate": 1.0117519804446041e-05, + "loss": 2.4076, + "step": 17129 + }, + { + "epoch": 1.382455007666855, + "grad_norm": 0.6817807555198669, + "learning_rate": 1.0110601399639918e-05, + "loss": 2.4226, + "step": 17130 + }, + { + "epoch": 1.382535711403438, + "grad_norm": 0.714421808719635, + "learning_rate": 1.0103685235112558e-05, + "loss": 2.3581, + "step": 17131 + }, + { + "epoch": 1.382616415140021, + "grad_norm": 0.7885473370552063, + "learning_rate": 1.0096771311036357e-05, + "loss": 2.3821, + "step": 17132 + }, + { + "epoch": 1.382697118876604, + "grad_norm": 0.6432569026947021, + "learning_rate": 1.0089859627583642e-05, + "loss": 2.3899, + "step": 17133 + }, + { + "epoch": 1.382777822613187, + "grad_norm": 0.6620168089866638, + "learning_rate": 1.0082950184926632e-05, + "loss": 2.4503, + "step": 17134 + }, + { + "epoch": 1.38285852634977, + "grad_norm": 0.6495606303215027, + "learning_rate": 1.0076042983237544e-05, + "loss": 2.3606, + "step": 17135 + }, + { + "epoch": 1.382939230086353, + "grad_norm": 0.7192469835281372, + "learning_rate": 1.006913802268855e-05, + "loss": 2.425, + "step": 17136 + }, + { + "epoch": 1.3830199338229359, + "grad_norm": 0.6835115551948547, + "learning_rate": 1.0062235303451706e-05, + "loss": 2.3605, + "step": 17137 + }, + { + "epoch": 1.383100637559519, + "grad_norm": 0.7469161748886108, + "learning_rate": 1.0055334825699059e-05, + "loss": 2.4811, + "step": 17138 + }, + { + "epoch": 1.383181341296102, + "grad_norm": 0.7641372084617615, + "learning_rate": 1.0048436589602572e-05, + "loss": 2.4317, + "step": 17139 + }, + { + "epoch": 1.383262045032685, + "grad_norm": 0.7059566378593445, + "learning_rate": 1.0041540595334186e-05, + "loss": 2.4677, + "step": 17140 + }, + { + "epoch": 1.383342748769268, + "grad_norm": 0.7218295931816101, + "learning_rate": 1.0034646843065777e-05, + "loss": 2.3889, + "step": 17141 + }, + { + "epoch": 1.383423452505851, + "grad_norm": 0.7059688568115234, + "learning_rate": 1.0027755332969124e-05, + "loss": 2.4276, + "step": 17142 + }, + { + "epoch": 1.383504156242434, + "grad_norm": 0.7444838285446167, + "learning_rate": 1.0020866065216017e-05, + "loss": 2.4647, + "step": 17143 + }, + { + "epoch": 1.383584859979017, + "grad_norm": 0.662229597568512, + "learning_rate": 1.0013979039978127e-05, + "loss": 2.3913, + "step": 17144 + }, + { + "epoch": 1.3836655637156001, + "grad_norm": 0.6696064472198486, + "learning_rate": 1.0007094257427097e-05, + "loss": 2.3904, + "step": 17145 + }, + { + "epoch": 1.383746267452183, + "grad_norm": 0.7516316175460815, + "learning_rate": 1.0000211717734541e-05, + "loss": 2.3621, + "step": 17146 + }, + { + "epoch": 1.383826971188766, + "grad_norm": 0.6833345293998718, + "learning_rate": 9.993331421071961e-06, + "loss": 2.4113, + "step": 17147 + }, + { + "epoch": 1.3839076749253492, + "grad_norm": 0.675074577331543, + "learning_rate": 9.986453367610827e-06, + "loss": 2.398, + "step": 17148 + }, + { + "epoch": 1.383988378661932, + "grad_norm": 0.7046546936035156, + "learning_rate": 9.979577557522579e-06, + "loss": 2.4441, + "step": 17149 + }, + { + "epoch": 1.3840690823985151, + "grad_norm": 0.7228004336357117, + "learning_rate": 9.972703990978582e-06, + "loss": 2.4451, + "step": 17150 + }, + { + "epoch": 1.384149786135098, + "grad_norm": 0.6642273664474487, + "learning_rate": 9.965832668150132e-06, + "loss": 2.3809, + "step": 17151 + }, + { + "epoch": 1.384230489871681, + "grad_norm": 0.7238738536834717, + "learning_rate": 9.958963589208493e-06, + "loss": 2.4283, + "step": 17152 + }, + { + "epoch": 1.384311193608264, + "grad_norm": 0.7356482744216919, + "learning_rate": 9.952096754324847e-06, + "loss": 2.4666, + "step": 17153 + }, + { + "epoch": 1.384391897344847, + "grad_norm": 0.7092667818069458, + "learning_rate": 9.945232163670327e-06, + "loss": 2.5028, + "step": 17154 + }, + { + "epoch": 1.38447260108143, + "grad_norm": 0.6972974538803101, + "learning_rate": 9.938369817416049e-06, + "loss": 2.4223, + "step": 17155 + }, + { + "epoch": 1.384553304818013, + "grad_norm": 0.7163854837417603, + "learning_rate": 9.931509715733e-06, + "loss": 2.4256, + "step": 17156 + }, + { + "epoch": 1.384634008554596, + "grad_norm": 0.7319930195808411, + "learning_rate": 9.924651858792166e-06, + "loss": 2.4208, + "step": 17157 + }, + { + "epoch": 1.3847147122911792, + "grad_norm": 0.6813424825668335, + "learning_rate": 9.917796246764466e-06, + "loss": 2.3794, + "step": 17158 + }, + { + "epoch": 1.384795416027762, + "grad_norm": 0.7059821486473083, + "learning_rate": 9.910942879820761e-06, + "loss": 2.4462, + "step": 17159 + }, + { + "epoch": 1.384876119764345, + "grad_norm": 0.726754903793335, + "learning_rate": 9.904091758131862e-06, + "loss": 2.4037, + "step": 17160 + }, + { + "epoch": 1.3849568235009282, + "grad_norm": 0.6972840428352356, + "learning_rate": 9.897242881868508e-06, + "loss": 2.4275, + "step": 17161 + }, + { + "epoch": 1.385037527237511, + "grad_norm": 0.6906942129135132, + "learning_rate": 9.890396251201405e-06, + "loss": 2.4547, + "step": 17162 + }, + { + "epoch": 1.3851182309740941, + "grad_norm": 0.6928840279579163, + "learning_rate": 9.883551866301165e-06, + "loss": 2.4622, + "step": 17163 + }, + { + "epoch": 1.3851989347106772, + "grad_norm": 0.6840118169784546, + "learning_rate": 9.876709727338374e-06, + "loss": 2.4546, + "step": 17164 + }, + { + "epoch": 1.38527963844726, + "grad_norm": 0.6800721287727356, + "learning_rate": 9.86986983448358e-06, + "loss": 2.508, + "step": 17165 + }, + { + "epoch": 1.3853603421838432, + "grad_norm": 0.678666353225708, + "learning_rate": 9.863032187907217e-06, + "loss": 2.383, + "step": 17166 + }, + { + "epoch": 1.385441045920426, + "grad_norm": 0.7311298251152039, + "learning_rate": 9.856196787779714e-06, + "loss": 2.4111, + "step": 17167 + }, + { + "epoch": 1.3855217496570091, + "grad_norm": 0.6527237296104431, + "learning_rate": 9.849363634271425e-06, + "loss": 2.3592, + "step": 17168 + }, + { + "epoch": 1.385602453393592, + "grad_norm": 0.7478907108306885, + "learning_rate": 9.842532727552645e-06, + "loss": 2.4321, + "step": 17169 + }, + { + "epoch": 1.385683157130175, + "grad_norm": 0.6855963468551636, + "learning_rate": 9.835704067793628e-06, + "loss": 2.3966, + "step": 17170 + }, + { + "epoch": 1.3857638608667582, + "grad_norm": 0.7468744516372681, + "learning_rate": 9.828877655164571e-06, + "loss": 2.3695, + "step": 17171 + }, + { + "epoch": 1.385844564603341, + "grad_norm": 0.7127626538276672, + "learning_rate": 9.82205348983558e-06, + "loss": 2.4718, + "step": 17172 + }, + { + "epoch": 1.3859252683399241, + "grad_norm": 0.6831564903259277, + "learning_rate": 9.815231571976735e-06, + "loss": 2.373, + "step": 17173 + }, + { + "epoch": 1.3860059720765072, + "grad_norm": 0.7020923495292664, + "learning_rate": 9.808411901758075e-06, + "loss": 2.4516, + "step": 17174 + }, + { + "epoch": 1.38608667581309, + "grad_norm": 0.8129574060440063, + "learning_rate": 9.801594479349563e-06, + "loss": 2.4157, + "step": 17175 + }, + { + "epoch": 1.3861673795496732, + "grad_norm": 0.6603944301605225, + "learning_rate": 9.794779304921087e-06, + "loss": 2.386, + "step": 17176 + }, + { + "epoch": 1.3862480832862563, + "grad_norm": 0.669863224029541, + "learning_rate": 9.78796637864251e-06, + "loss": 2.4273, + "step": 17177 + }, + { + "epoch": 1.3863287870228391, + "grad_norm": 0.7654524445533752, + "learning_rate": 9.78115570068362e-06, + "loss": 2.4868, + "step": 17178 + }, + { + "epoch": 1.3864094907594222, + "grad_norm": 0.7104062438011169, + "learning_rate": 9.774347271214169e-06, + "loss": 2.4684, + "step": 17179 + }, + { + "epoch": 1.3864901944960053, + "grad_norm": 0.6499059796333313, + "learning_rate": 9.767541090403831e-06, + "loss": 2.4131, + "step": 17180 + }, + { + "epoch": 1.3865708982325882, + "grad_norm": 0.7515703439712524, + "learning_rate": 9.760737158422262e-06, + "loss": 2.4484, + "step": 17181 + }, + { + "epoch": 1.3866516019691713, + "grad_norm": 0.7019369006156921, + "learning_rate": 9.753935475438991e-06, + "loss": 2.4393, + "step": 17182 + }, + { + "epoch": 1.3867323057057541, + "grad_norm": 0.7191709280014038, + "learning_rate": 9.747136041623562e-06, + "loss": 2.4533, + "step": 17183 + }, + { + "epoch": 1.3868130094423372, + "grad_norm": 0.6970816254615784, + "learning_rate": 9.740338857145438e-06, + "loss": 2.4886, + "step": 17184 + }, + { + "epoch": 1.38689371317892, + "grad_norm": 0.6682983636856079, + "learning_rate": 9.733543922173982e-06, + "loss": 2.3896, + "step": 17185 + }, + { + "epoch": 1.3869744169155032, + "grad_norm": 0.735559344291687, + "learning_rate": 9.726751236878584e-06, + "loss": 2.4777, + "step": 17186 + }, + { + "epoch": 1.3870551206520862, + "grad_norm": 0.790460467338562, + "learning_rate": 9.71996080142854e-06, + "loss": 2.3773, + "step": 17187 + }, + { + "epoch": 1.3871358243886691, + "grad_norm": 0.6593269109725952, + "learning_rate": 9.713172615993038e-06, + "loss": 2.461, + "step": 17188 + }, + { + "epoch": 1.3872165281252522, + "grad_norm": 0.7211339473724365, + "learning_rate": 9.706386680741275e-06, + "loss": 2.4155, + "step": 17189 + }, + { + "epoch": 1.3872972318618353, + "grad_norm": 0.7158735990524292, + "learning_rate": 9.699602995842406e-06, + "loss": 2.4214, + "step": 17190 + }, + { + "epoch": 1.3873779355984182, + "grad_norm": 0.7172560095787048, + "learning_rate": 9.692821561465493e-06, + "loss": 2.3617, + "step": 17191 + }, + { + "epoch": 1.3874586393350012, + "grad_norm": 0.721144437789917, + "learning_rate": 9.686042377779513e-06, + "loss": 2.3984, + "step": 17192 + }, + { + "epoch": 1.3875393430715843, + "grad_norm": 0.7066751718521118, + "learning_rate": 9.679265444953444e-06, + "loss": 2.4735, + "step": 17193 + }, + { + "epoch": 1.3876200468081672, + "grad_norm": 0.7111334204673767, + "learning_rate": 9.672490763156194e-06, + "loss": 2.4336, + "step": 17194 + }, + { + "epoch": 1.3877007505447503, + "grad_norm": 0.6845266222953796, + "learning_rate": 9.665718332556584e-06, + "loss": 2.466, + "step": 17195 + }, + { + "epoch": 1.3877814542813331, + "grad_norm": 0.6982793807983398, + "learning_rate": 9.6589481533234e-06, + "loss": 2.3819, + "step": 17196 + }, + { + "epoch": 1.3878621580179162, + "grad_norm": 0.8404912352561951, + "learning_rate": 9.652180225625407e-06, + "loss": 2.4329, + "step": 17197 + }, + { + "epoch": 1.387942861754499, + "grad_norm": 0.7335420250892639, + "learning_rate": 9.645414549631227e-06, + "loss": 2.4368, + "step": 17198 + }, + { + "epoch": 1.3880235654910822, + "grad_norm": 0.7425113916397095, + "learning_rate": 9.638651125509513e-06, + "loss": 2.41, + "step": 17199 + }, + { + "epoch": 1.3881042692276653, + "grad_norm": 0.6818472146987915, + "learning_rate": 9.631889953428818e-06, + "loss": 2.4227, + "step": 17200 + }, + { + "epoch": 1.3881849729642481, + "grad_norm": 0.6991598010063171, + "learning_rate": 9.625131033557655e-06, + "loss": 2.422, + "step": 17201 + }, + { + "epoch": 1.3882656767008312, + "grad_norm": 0.6927391886711121, + "learning_rate": 9.618374366064465e-06, + "loss": 2.4092, + "step": 17202 + }, + { + "epoch": 1.3883463804374143, + "grad_norm": 0.6987093687057495, + "learning_rate": 9.611619951117657e-06, + "loss": 2.419, + "step": 17203 + }, + { + "epoch": 1.3884270841739972, + "grad_norm": 0.7766227722167969, + "learning_rate": 9.604867788885552e-06, + "loss": 2.4174, + "step": 17204 + }, + { + "epoch": 1.3885077879105803, + "grad_norm": 0.77024245262146, + "learning_rate": 9.598117879536427e-06, + "loss": 2.3851, + "step": 17205 + }, + { + "epoch": 1.3885884916471634, + "grad_norm": 0.7106937170028687, + "learning_rate": 9.591370223238515e-06, + "loss": 2.3322, + "step": 17206 + }, + { + "epoch": 1.3886691953837462, + "grad_norm": 0.7056468725204468, + "learning_rate": 9.584624820160016e-06, + "loss": 2.4496, + "step": 17207 + }, + { + "epoch": 1.3887498991203293, + "grad_norm": 0.6738306879997253, + "learning_rate": 9.57788167046899e-06, + "loss": 2.3853, + "step": 17208 + }, + { + "epoch": 1.3888306028569124, + "grad_norm": 0.6830081343650818, + "learning_rate": 9.57114077433352e-06, + "loss": 2.3974, + "step": 17209 + }, + { + "epoch": 1.3889113065934953, + "grad_norm": 0.6968281865119934, + "learning_rate": 9.564402131921612e-06, + "loss": 2.4349, + "step": 17210 + }, + { + "epoch": 1.3889920103300784, + "grad_norm": 0.720506489276886, + "learning_rate": 9.55766574340119e-06, + "loss": 2.386, + "step": 17211 + }, + { + "epoch": 1.3890727140666612, + "grad_norm": 0.7361373901367188, + "learning_rate": 9.550931608940161e-06, + "loss": 2.4303, + "step": 17212 + }, + { + "epoch": 1.3891534178032443, + "grad_norm": 0.6967737674713135, + "learning_rate": 9.544199728706383e-06, + "loss": 2.4073, + "step": 17213 + }, + { + "epoch": 1.3892341215398272, + "grad_norm": 0.6645474433898926, + "learning_rate": 9.537470102867573e-06, + "loss": 2.4236, + "step": 17214 + }, + { + "epoch": 1.3893148252764103, + "grad_norm": 0.7314795851707458, + "learning_rate": 9.53074273159148e-06, + "loss": 2.4362, + "step": 17215 + }, + { + "epoch": 1.3893955290129933, + "grad_norm": 0.7935917377471924, + "learning_rate": 9.524017615045789e-06, + "loss": 2.3982, + "step": 17216 + }, + { + "epoch": 1.3894762327495762, + "grad_norm": 0.7083787322044373, + "learning_rate": 9.517294753398064e-06, + "loss": 2.4095, + "step": 17217 + }, + { + "epoch": 1.3895569364861593, + "grad_norm": 0.6737664937973022, + "learning_rate": 9.510574146815876e-06, + "loss": 2.457, + "step": 17218 + }, + { + "epoch": 1.3896376402227424, + "grad_norm": 0.6705507040023804, + "learning_rate": 9.50385579546672e-06, + "loss": 2.3893, + "step": 17219 + }, + { + "epoch": 1.3897183439593253, + "grad_norm": 0.6711611151695251, + "learning_rate": 9.497139699518042e-06, + "loss": 2.3982, + "step": 17220 + }, + { + "epoch": 1.3897990476959083, + "grad_norm": 0.7133504748344421, + "learning_rate": 9.490425859137219e-06, + "loss": 2.4178, + "step": 17221 + }, + { + "epoch": 1.3898797514324914, + "grad_norm": 0.6962296366691589, + "learning_rate": 9.483714274491572e-06, + "loss": 2.4126, + "step": 17222 + }, + { + "epoch": 1.3899604551690743, + "grad_norm": 0.7658503651618958, + "learning_rate": 9.477004945748402e-06, + "loss": 2.3047, + "step": 17223 + }, + { + "epoch": 1.3900411589056574, + "grad_norm": 0.706066370010376, + "learning_rate": 9.470297873074885e-06, + "loss": 2.4055, + "step": 17224 + }, + { + "epoch": 1.3901218626422405, + "grad_norm": 0.6563149094581604, + "learning_rate": 9.463593056638187e-06, + "loss": 2.4425, + "step": 17225 + }, + { + "epoch": 1.3902025663788233, + "grad_norm": 0.7133740782737732, + "learning_rate": 9.45689049660543e-06, + "loss": 2.3917, + "step": 17226 + }, + { + "epoch": 1.3902832701154064, + "grad_norm": 0.6759207248687744, + "learning_rate": 9.450190193143626e-06, + "loss": 2.4261, + "step": 17227 + }, + { + "epoch": 1.3903639738519893, + "grad_norm": 0.7461724877357483, + "learning_rate": 9.443492146419786e-06, + "loss": 2.4121, + "step": 17228 + }, + { + "epoch": 1.3904446775885724, + "grad_norm": 0.6825011372566223, + "learning_rate": 9.436796356600842e-06, + "loss": 2.3746, + "step": 17229 + }, + { + "epoch": 1.3905253813251552, + "grad_norm": 0.7314637303352356, + "learning_rate": 9.430102823853659e-06, + "loss": 2.4246, + "step": 17230 + }, + { + "epoch": 1.3906060850617383, + "grad_norm": 0.6963483095169067, + "learning_rate": 9.423411548345063e-06, + "loss": 2.3504, + "step": 17231 + }, + { + "epoch": 1.3906867887983214, + "grad_norm": 0.7879536747932434, + "learning_rate": 9.41672253024185e-06, + "loss": 2.4454, + "step": 17232 + }, + { + "epoch": 1.3907674925349043, + "grad_norm": 0.6961038708686829, + "learning_rate": 9.410035769710668e-06, + "loss": 2.4107, + "step": 17233 + }, + { + "epoch": 1.3908481962714874, + "grad_norm": 0.6528958082199097, + "learning_rate": 9.403351266918215e-06, + "loss": 2.4131, + "step": 17234 + }, + { + "epoch": 1.3909289000080705, + "grad_norm": 0.8091046810150146, + "learning_rate": 9.396669022031057e-06, + "loss": 2.4143, + "step": 17235 + }, + { + "epoch": 1.3910096037446533, + "grad_norm": 0.7430968880653381, + "learning_rate": 9.389989035215774e-06, + "loss": 2.4197, + "step": 17236 + }, + { + "epoch": 1.3910903074812364, + "grad_norm": 0.7089489102363586, + "learning_rate": 9.383311306638797e-06, + "loss": 2.4179, + "step": 17237 + }, + { + "epoch": 1.3911710112178195, + "grad_norm": 0.7121657729148865, + "learning_rate": 9.376635836466574e-06, + "loss": 2.4136, + "step": 17238 + }, + { + "epoch": 1.3912517149544024, + "grad_norm": 0.6793569326400757, + "learning_rate": 9.369962624865503e-06, + "loss": 2.4029, + "step": 17239 + }, + { + "epoch": 1.3913324186909855, + "grad_norm": 0.7534452080726624, + "learning_rate": 9.363291672001828e-06, + "loss": 2.421, + "step": 17240 + }, + { + "epoch": 1.3914131224275683, + "grad_norm": 0.6758937239646912, + "learning_rate": 9.356622978041873e-06, + "loss": 2.378, + "step": 17241 + }, + { + "epoch": 1.3914938261641514, + "grad_norm": 0.7330620288848877, + "learning_rate": 9.349956543151839e-06, + "loss": 2.3983, + "step": 17242 + }, + { + "epoch": 1.3915745299007343, + "grad_norm": 0.7044413089752197, + "learning_rate": 9.343292367497835e-06, + "loss": 2.4204, + "step": 17243 + }, + { + "epoch": 1.3916552336373174, + "grad_norm": 0.7051666975021362, + "learning_rate": 9.336630451245954e-06, + "loss": 2.3994, + "step": 17244 + }, + { + "epoch": 1.3917359373739004, + "grad_norm": 0.721764326095581, + "learning_rate": 9.32997079456227e-06, + "loss": 2.4127, + "step": 17245 + }, + { + "epoch": 1.3918166411104833, + "grad_norm": 0.7074810862541199, + "learning_rate": 9.323313397612698e-06, + "loss": 2.4449, + "step": 17246 + }, + { + "epoch": 1.3918973448470664, + "grad_norm": 0.7203366160392761, + "learning_rate": 9.316658260563193e-06, + "loss": 2.3564, + "step": 17247 + }, + { + "epoch": 1.3919780485836495, + "grad_norm": 0.6879156827926636, + "learning_rate": 9.310005383579623e-06, + "loss": 2.3568, + "step": 17248 + }, + { + "epoch": 1.3920587523202324, + "grad_norm": 0.6491550803184509, + "learning_rate": 9.303354766827776e-06, + "loss": 2.421, + "step": 17249 + }, + { + "epoch": 1.3921394560568154, + "grad_norm": 0.683704674243927, + "learning_rate": 9.29670641047341e-06, + "loss": 2.4633, + "step": 17250 + }, + { + "epoch": 1.3922201597933985, + "grad_norm": 0.6716236472129822, + "learning_rate": 9.290060314682203e-06, + "loss": 2.4423, + "step": 17251 + }, + { + "epoch": 1.3923008635299814, + "grad_norm": 0.7086344957351685, + "learning_rate": 9.283416479619844e-06, + "loss": 2.3877, + "step": 17252 + }, + { + "epoch": 1.3923815672665645, + "grad_norm": 0.6638349294662476, + "learning_rate": 9.276774905451869e-06, + "loss": 2.4499, + "step": 17253 + }, + { + "epoch": 1.3924622710031476, + "grad_norm": 0.7091326713562012, + "learning_rate": 9.27013559234381e-06, + "loss": 2.4659, + "step": 17254 + }, + { + "epoch": 1.3925429747397304, + "grad_norm": 0.6906822323799133, + "learning_rate": 9.263498540461157e-06, + "loss": 2.4195, + "step": 17255 + }, + { + "epoch": 1.3926236784763135, + "grad_norm": 0.7003819942474365, + "learning_rate": 9.256863749969302e-06, + "loss": 2.4156, + "step": 17256 + }, + { + "epoch": 1.3927043822128964, + "grad_norm": 0.7270472645759583, + "learning_rate": 9.250231221033601e-06, + "loss": 2.4197, + "step": 17257 + }, + { + "epoch": 1.3927850859494795, + "grad_norm": 0.7070592641830444, + "learning_rate": 9.243600953819376e-06, + "loss": 2.4296, + "step": 17258 + }, + { + "epoch": 1.3928657896860623, + "grad_norm": 0.6560600996017456, + "learning_rate": 9.23697294849184e-06, + "loss": 2.4441, + "step": 17259 + }, + { + "epoch": 1.3929464934226454, + "grad_norm": 0.6654617190361023, + "learning_rate": 9.230347205216194e-06, + "loss": 2.3406, + "step": 17260 + }, + { + "epoch": 1.3930271971592285, + "grad_norm": 0.7147239446640015, + "learning_rate": 9.223723724157563e-06, + "loss": 2.4203, + "step": 17261 + }, + { + "epoch": 1.3931079008958114, + "grad_norm": 0.7148180603981018, + "learning_rate": 9.217102505481046e-06, + "loss": 2.4525, + "step": 17262 + }, + { + "epoch": 1.3931886046323945, + "grad_norm": 0.6779814958572388, + "learning_rate": 9.210483549351623e-06, + "loss": 2.4051, + "step": 17263 + }, + { + "epoch": 1.3932693083689776, + "grad_norm": 0.6880484223365784, + "learning_rate": 9.203866855934307e-06, + "loss": 2.4492, + "step": 17264 + }, + { + "epoch": 1.3933500121055604, + "grad_norm": 0.7845660448074341, + "learning_rate": 9.197252425393954e-06, + "loss": 2.4448, + "step": 17265 + }, + { + "epoch": 1.3934307158421435, + "grad_norm": 0.7001363635063171, + "learning_rate": 9.190640257895433e-06, + "loss": 2.4226, + "step": 17266 + }, + { + "epoch": 1.3935114195787266, + "grad_norm": 0.7282695770263672, + "learning_rate": 9.184030353603524e-06, + "loss": 2.4354, + "step": 17267 + }, + { + "epoch": 1.3935921233153095, + "grad_norm": 0.7547619342803955, + "learning_rate": 9.177422712683003e-06, + "loss": 2.456, + "step": 17268 + }, + { + "epoch": 1.3936728270518925, + "grad_norm": 0.7191921472549438, + "learning_rate": 9.170817335298499e-06, + "loss": 2.3923, + "step": 17269 + }, + { + "epoch": 1.3937535307884756, + "grad_norm": 0.6578717827796936, + "learning_rate": 9.164214221614654e-06, + "loss": 2.4354, + "step": 17270 + }, + { + "epoch": 1.3938342345250585, + "grad_norm": 0.7156858444213867, + "learning_rate": 9.157613371796036e-06, + "loss": 2.3983, + "step": 17271 + }, + { + "epoch": 1.3939149382616416, + "grad_norm": 0.6779402494430542, + "learning_rate": 9.151014786007162e-06, + "loss": 2.435, + "step": 17272 + }, + { + "epoch": 1.3939956419982245, + "grad_norm": 0.7038381099700928, + "learning_rate": 9.144418464412486e-06, + "loss": 2.3848, + "step": 17273 + }, + { + "epoch": 1.3940763457348075, + "grad_norm": 0.7381990551948547, + "learning_rate": 9.13782440717641e-06, + "loss": 2.3693, + "step": 17274 + }, + { + "epoch": 1.3941570494713904, + "grad_norm": 0.6982381939888, + "learning_rate": 9.131232614463247e-06, + "loss": 2.4095, + "step": 17275 + }, + { + "epoch": 1.3942377532079735, + "grad_norm": 0.6968829035758972, + "learning_rate": 9.124643086437312e-06, + "loss": 2.3802, + "step": 17276 + }, + { + "epoch": 1.3943184569445566, + "grad_norm": 0.7584258317947388, + "learning_rate": 9.118055823262828e-06, + "loss": 2.4153, + "step": 17277 + }, + { + "epoch": 1.3943991606811394, + "grad_norm": 0.7331502437591553, + "learning_rate": 9.11147082510395e-06, + "loss": 2.4404, + "step": 17278 + }, + { + "epoch": 1.3944798644177225, + "grad_norm": 0.7939555048942566, + "learning_rate": 9.104888092124796e-06, + "loss": 2.4568, + "step": 17279 + }, + { + "epoch": 1.3945605681543056, + "grad_norm": 0.6752094626426697, + "learning_rate": 9.098307624489443e-06, + "loss": 2.3298, + "step": 17280 + }, + { + "epoch": 1.3946412718908885, + "grad_norm": 0.682428240776062, + "learning_rate": 9.091729422361872e-06, + "loss": 2.4449, + "step": 17281 + }, + { + "epoch": 1.3947219756274716, + "grad_norm": 0.7422902584075928, + "learning_rate": 9.085153485906051e-06, + "loss": 2.4, + "step": 17282 + }, + { + "epoch": 1.3948026793640547, + "grad_norm": 0.7528017163276672, + "learning_rate": 9.07857981528586e-06, + "loss": 2.4045, + "step": 17283 + }, + { + "epoch": 1.3948833831006375, + "grad_norm": 0.622075080871582, + "learning_rate": 9.072008410665133e-06, + "loss": 2.3865, + "step": 17284 + }, + { + "epoch": 1.3949640868372206, + "grad_norm": 0.7127060890197754, + "learning_rate": 9.065439272207642e-06, + "loss": 2.4108, + "step": 17285 + }, + { + "epoch": 1.3950447905738037, + "grad_norm": 0.7381206750869751, + "learning_rate": 9.0588724000771e-06, + "loss": 2.4459, + "step": 17286 + }, + { + "epoch": 1.3951254943103866, + "grad_norm": 0.7453467845916748, + "learning_rate": 9.05230779443721e-06, + "loss": 2.4144, + "step": 17287 + }, + { + "epoch": 1.3952061980469694, + "grad_norm": 0.6772522330284119, + "learning_rate": 9.045745455451527e-06, + "loss": 2.4373, + "step": 17288 + }, + { + "epoch": 1.3952869017835525, + "grad_norm": 0.7005482316017151, + "learning_rate": 9.039185383283622e-06, + "loss": 2.3991, + "step": 17289 + }, + { + "epoch": 1.3953676055201356, + "grad_norm": 0.7172494530677795, + "learning_rate": 9.032627578096986e-06, + "loss": 2.4535, + "step": 17290 + }, + { + "epoch": 1.3954483092567185, + "grad_norm": 0.6911814212799072, + "learning_rate": 9.026072040055067e-06, + "loss": 2.3586, + "step": 17291 + }, + { + "epoch": 1.3955290129933016, + "grad_norm": 0.6708523035049438, + "learning_rate": 9.019518769321245e-06, + "loss": 2.4189, + "step": 17292 + }, + { + "epoch": 1.3956097167298847, + "grad_norm": 0.6716340780258179, + "learning_rate": 9.012967766058855e-06, + "loss": 2.3982, + "step": 17293 + }, + { + "epoch": 1.3956904204664675, + "grad_norm": 0.7001132965087891, + "learning_rate": 9.006419030431135e-06, + "loss": 2.3722, + "step": 17294 + }, + { + "epoch": 1.3957711242030506, + "grad_norm": 0.6912658214569092, + "learning_rate": 8.999872562601308e-06, + "loss": 2.371, + "step": 17295 + }, + { + "epoch": 1.3958518279396337, + "grad_norm": 0.7627947330474854, + "learning_rate": 8.993328362732545e-06, + "loss": 2.4123, + "step": 17296 + }, + { + "epoch": 1.3959325316762166, + "grad_norm": 0.6897323131561279, + "learning_rate": 8.986786430987926e-06, + "loss": 2.4466, + "step": 17297 + }, + { + "epoch": 1.3960132354127996, + "grad_norm": 0.7040663361549377, + "learning_rate": 8.980246767530498e-06, + "loss": 2.4008, + "step": 17298 + }, + { + "epoch": 1.3960939391493827, + "grad_norm": 0.7423021197319031, + "learning_rate": 8.973709372523254e-06, + "loss": 2.421, + "step": 17299 + }, + { + "epoch": 1.3961746428859656, + "grad_norm": 0.7053872346878052, + "learning_rate": 8.967174246129128e-06, + "loss": 2.4217, + "step": 17300 + }, + { + "epoch": 1.3962553466225487, + "grad_norm": 0.7772163152694702, + "learning_rate": 8.960641388510959e-06, + "loss": 2.3686, + "step": 17301 + }, + { + "epoch": 1.3963360503591316, + "grad_norm": 0.7254317402839661, + "learning_rate": 8.954110799831582e-06, + "loss": 2.3974, + "step": 17302 + }, + { + "epoch": 1.3964167540957146, + "grad_norm": 0.6462311744689941, + "learning_rate": 8.94758248025378e-06, + "loss": 2.3506, + "step": 17303 + }, + { + "epoch": 1.3964974578322975, + "grad_norm": 0.693526029586792, + "learning_rate": 8.94105642994023e-06, + "loss": 2.3774, + "step": 17304 + }, + { + "epoch": 1.3965781615688806, + "grad_norm": 0.6220893263816833, + "learning_rate": 8.934532649053585e-06, + "loss": 2.3588, + "step": 17305 + }, + { + "epoch": 1.3966588653054637, + "grad_norm": 0.6866275668144226, + "learning_rate": 8.928011137756443e-06, + "loss": 2.4001, + "step": 17306 + }, + { + "epoch": 1.3967395690420465, + "grad_norm": 0.7290368676185608, + "learning_rate": 8.92149189621132e-06, + "loss": 2.3936, + "step": 17307 + }, + { + "epoch": 1.3968202727786296, + "grad_norm": 0.6699230670928955, + "learning_rate": 8.914974924580688e-06, + "loss": 2.3656, + "step": 17308 + }, + { + "epoch": 1.3969009765152127, + "grad_norm": 0.6863143444061279, + "learning_rate": 8.908460223027016e-06, + "loss": 2.4157, + "step": 17309 + }, + { + "epoch": 1.3969816802517956, + "grad_norm": 0.7856658697128296, + "learning_rate": 8.901947791712594e-06, + "loss": 2.3927, + "step": 17310 + }, + { + "epoch": 1.3970623839883787, + "grad_norm": 0.692934513092041, + "learning_rate": 8.895437630799775e-06, + "loss": 2.4089, + "step": 17311 + }, + { + "epoch": 1.3971430877249618, + "grad_norm": 0.6908941268920898, + "learning_rate": 8.888929740450802e-06, + "loss": 2.3907, + "step": 17312 + }, + { + "epoch": 1.3972237914615446, + "grad_norm": 0.662405788898468, + "learning_rate": 8.88242412082786e-06, + "loss": 2.4287, + "step": 17313 + }, + { + "epoch": 1.3973044951981277, + "grad_norm": 0.6889618635177612, + "learning_rate": 8.875920772093094e-06, + "loss": 2.3815, + "step": 17314 + }, + { + "epoch": 1.3973851989347108, + "grad_norm": 0.6734819412231445, + "learning_rate": 8.869419694408586e-06, + "loss": 2.4046, + "step": 17315 + }, + { + "epoch": 1.3974659026712937, + "grad_norm": 0.6958059668540955, + "learning_rate": 8.862920887936378e-06, + "loss": 2.4449, + "step": 17316 + }, + { + "epoch": 1.3975466064078768, + "grad_norm": 0.6793306469917297, + "learning_rate": 8.856424352838389e-06, + "loss": 2.4023, + "step": 17317 + }, + { + "epoch": 1.3976273101444596, + "grad_norm": 0.6622069478034973, + "learning_rate": 8.84993008927656e-06, + "loss": 2.4098, + "step": 17318 + }, + { + "epoch": 1.3977080138810427, + "grad_norm": 0.6999792456626892, + "learning_rate": 8.843438097412771e-06, + "loss": 2.4205, + "step": 17319 + }, + { + "epoch": 1.3977887176176256, + "grad_norm": 0.693848192691803, + "learning_rate": 8.83694837740876e-06, + "loss": 2.4284, + "step": 17320 + }, + { + "epoch": 1.3978694213542087, + "grad_norm": 0.6813297271728516, + "learning_rate": 8.830460929426299e-06, + "loss": 2.3887, + "step": 17321 + }, + { + "epoch": 1.3979501250907918, + "grad_norm": 0.6795780658721924, + "learning_rate": 8.823975753627079e-06, + "loss": 2.4428, + "step": 17322 + }, + { + "epoch": 1.3980308288273746, + "grad_norm": 0.7395818829536438, + "learning_rate": 8.817492850172703e-06, + "loss": 2.4842, + "step": 17323 + }, + { + "epoch": 1.3981115325639577, + "grad_norm": 0.6772391200065613, + "learning_rate": 8.811012219224778e-06, + "loss": 2.4555, + "step": 17324 + }, + { + "epoch": 1.3981922363005408, + "grad_norm": 0.66059809923172, + "learning_rate": 8.804533860944808e-06, + "loss": 2.3565, + "step": 17325 + }, + { + "epoch": 1.3982729400371237, + "grad_norm": 0.7336263656616211, + "learning_rate": 8.798057775494229e-06, + "loss": 2.4575, + "step": 17326 + }, + { + "epoch": 1.3983536437737067, + "grad_norm": 0.7758119702339172, + "learning_rate": 8.791583963034444e-06, + "loss": 2.4239, + "step": 17327 + }, + { + "epoch": 1.3984343475102898, + "grad_norm": 0.7417536377906799, + "learning_rate": 8.785112423726827e-06, + "loss": 2.4547, + "step": 17328 + }, + { + "epoch": 1.3985150512468727, + "grad_norm": 0.6901140213012695, + "learning_rate": 8.778643157732636e-06, + "loss": 2.4253, + "step": 17329 + }, + { + "epoch": 1.3985957549834558, + "grad_norm": 0.6766345500946045, + "learning_rate": 8.772176165213109e-06, + "loss": 2.4312, + "step": 17330 + }, + { + "epoch": 1.3986764587200389, + "grad_norm": 0.7406117916107178, + "learning_rate": 8.765711446329427e-06, + "loss": 2.4223, + "step": 17331 + }, + { + "epoch": 1.3987571624566217, + "grad_norm": 0.7236598134040833, + "learning_rate": 8.759249001242697e-06, + "loss": 2.4078, + "step": 17332 + }, + { + "epoch": 1.3988378661932048, + "grad_norm": 0.7009963393211365, + "learning_rate": 8.752788830114e-06, + "loss": 2.3573, + "step": 17333 + }, + { + "epoch": 1.3989185699297877, + "grad_norm": 0.7128826975822449, + "learning_rate": 8.746330933104319e-06, + "loss": 2.4039, + "step": 17334 + }, + { + "epoch": 1.3989992736663708, + "grad_norm": 0.6832678914070129, + "learning_rate": 8.739875310374635e-06, + "loss": 2.3917, + "step": 17335 + }, + { + "epoch": 1.3990799774029536, + "grad_norm": 0.6790578961372375, + "learning_rate": 8.733421962085786e-06, + "loss": 2.3908, + "step": 17336 + }, + { + "epoch": 1.3991606811395367, + "grad_norm": 0.7215133905410767, + "learning_rate": 8.726970888398644e-06, + "loss": 2.3494, + "step": 17337 + }, + { + "epoch": 1.3992413848761198, + "grad_norm": 0.677761435508728, + "learning_rate": 8.720522089473992e-06, + "loss": 2.3747, + "step": 17338 + }, + { + "epoch": 1.3993220886127027, + "grad_norm": 0.6423436403274536, + "learning_rate": 8.714075565472513e-06, + "loss": 2.3386, + "step": 17339 + }, + { + "epoch": 1.3994027923492858, + "grad_norm": 0.798370897769928, + "learning_rate": 8.707631316554909e-06, + "loss": 2.3901, + "step": 17340 + }, + { + "epoch": 1.3994834960858689, + "grad_norm": 0.6572564840316772, + "learning_rate": 8.701189342881767e-06, + "loss": 2.4311, + "step": 17341 + }, + { + "epoch": 1.3995641998224517, + "grad_norm": 0.721610426902771, + "learning_rate": 8.694749644613642e-06, + "loss": 2.4158, + "step": 17342 + }, + { + "epoch": 1.3996449035590348, + "grad_norm": 0.8007451891899109, + "learning_rate": 8.688312221911022e-06, + "loss": 2.3931, + "step": 17343 + }, + { + "epoch": 1.399725607295618, + "grad_norm": 0.7181806564331055, + "learning_rate": 8.681877074934363e-06, + "loss": 2.4062, + "step": 17344 + }, + { + "epoch": 1.3998063110322008, + "grad_norm": 0.6630976796150208, + "learning_rate": 8.675444203844053e-06, + "loss": 2.3936, + "step": 17345 + }, + { + "epoch": 1.3998870147687839, + "grad_norm": 0.7093006372451782, + "learning_rate": 8.66901360880038e-06, + "loss": 2.4065, + "step": 17346 + }, + { + "epoch": 1.3999677185053667, + "grad_norm": 0.6685216426849365, + "learning_rate": 8.662585289963621e-06, + "loss": 2.4589, + "step": 17347 + }, + { + "epoch": 1.4000484222419498, + "grad_norm": 0.7227702140808105, + "learning_rate": 8.656159247494023e-06, + "loss": 2.3946, + "step": 17348 + }, + { + "epoch": 1.4001291259785327, + "grad_norm": 0.7459855079650879, + "learning_rate": 8.64973548155169e-06, + "loss": 2.4766, + "step": 17349 + }, + { + "epoch": 1.4002098297151158, + "grad_norm": 0.713190495967865, + "learning_rate": 8.643313992296743e-06, + "loss": 2.3974, + "step": 17350 + }, + { + "epoch": 1.4002905334516988, + "grad_norm": 0.6921802759170532, + "learning_rate": 8.636894779889237e-06, + "loss": 2.4483, + "step": 17351 + }, + { + "epoch": 1.4003712371882817, + "grad_norm": 0.7517138719558716, + "learning_rate": 8.630477844489116e-06, + "loss": 2.402, + "step": 17352 + }, + { + "epoch": 1.4004519409248648, + "grad_norm": 0.728131115436554, + "learning_rate": 8.624063186256326e-06, + "loss": 2.4363, + "step": 17353 + }, + { + "epoch": 1.400532644661448, + "grad_norm": 0.6918095350265503, + "learning_rate": 8.617650805350763e-06, + "loss": 2.4424, + "step": 17354 + }, + { + "epoch": 1.4006133483980308, + "grad_norm": 0.6802886128425598, + "learning_rate": 8.6112407019322e-06, + "loss": 2.4133, + "step": 17355 + }, + { + "epoch": 1.4006940521346138, + "grad_norm": 0.6760320663452148, + "learning_rate": 8.604832876160418e-06, + "loss": 2.4187, + "step": 17356 + }, + { + "epoch": 1.400774755871197, + "grad_norm": 0.7422602772712708, + "learning_rate": 8.598427328195124e-06, + "loss": 2.4051, + "step": 17357 + }, + { + "epoch": 1.4008554596077798, + "grad_norm": 0.7278845906257629, + "learning_rate": 8.592024058195925e-06, + "loss": 2.4256, + "step": 17358 + }, + { + "epoch": 1.4009361633443629, + "grad_norm": 0.7399848699569702, + "learning_rate": 8.585623066322435e-06, + "loss": 2.4045, + "step": 17359 + }, + { + "epoch": 1.401016867080946, + "grad_norm": 0.703372061252594, + "learning_rate": 8.579224352734184e-06, + "loss": 2.404, + "step": 17360 + }, + { + "epoch": 1.4010975708175288, + "grad_norm": 0.6849603056907654, + "learning_rate": 8.572827917590642e-06, + "loss": 2.3808, + "step": 17361 + }, + { + "epoch": 1.401178274554112, + "grad_norm": 0.6907341480255127, + "learning_rate": 8.566433761051207e-06, + "loss": 2.3777, + "step": 17362 + }, + { + "epoch": 1.4012589782906948, + "grad_norm": 0.7436221837997437, + "learning_rate": 8.560041883275261e-06, + "loss": 2.4027, + "step": 17363 + }, + { + "epoch": 1.4013396820272779, + "grad_norm": 0.6975259780883789, + "learning_rate": 8.553652284422088e-06, + "loss": 2.4235, + "step": 17364 + }, + { + "epoch": 1.4014203857638607, + "grad_norm": 0.7692399024963379, + "learning_rate": 8.547264964650948e-06, + "loss": 2.4615, + "step": 17365 + }, + { + "epoch": 1.4015010895004438, + "grad_norm": 0.7096135020256042, + "learning_rate": 8.540879924121025e-06, + "loss": 2.3972, + "step": 17366 + }, + { + "epoch": 1.401581793237027, + "grad_norm": 0.6851587891578674, + "learning_rate": 8.534497162991473e-06, + "loss": 2.3697, + "step": 17367 + }, + { + "epoch": 1.4016624969736098, + "grad_norm": 0.6977655291557312, + "learning_rate": 8.528116681421317e-06, + "loss": 2.4413, + "step": 17368 + }, + { + "epoch": 1.4017432007101929, + "grad_norm": 0.715307354927063, + "learning_rate": 8.521738479569618e-06, + "loss": 2.4006, + "step": 17369 + }, + { + "epoch": 1.401823904446776, + "grad_norm": 0.7282734513282776, + "learning_rate": 8.51536255759533e-06, + "loss": 2.4418, + "step": 17370 + }, + { + "epoch": 1.4019046081833588, + "grad_norm": 0.6996017098426819, + "learning_rate": 8.508988915657334e-06, + "loss": 2.435, + "step": 17371 + }, + { + "epoch": 1.401985311919942, + "grad_norm": 0.7084866762161255, + "learning_rate": 8.502617553914494e-06, + "loss": 2.4314, + "step": 17372 + }, + { + "epoch": 1.402066015656525, + "grad_norm": 0.7217462658882141, + "learning_rate": 8.496248472525603e-06, + "loss": 2.4811, + "step": 17373 + }, + { + "epoch": 1.4021467193931079, + "grad_norm": 0.7414960265159607, + "learning_rate": 8.489881671649391e-06, + "loss": 2.4016, + "step": 17374 + }, + { + "epoch": 1.402227423129691, + "grad_norm": 0.7439210414886475, + "learning_rate": 8.483517151444532e-06, + "loss": 2.4711, + "step": 17375 + }, + { + "epoch": 1.402308126866274, + "grad_norm": 0.7277424335479736, + "learning_rate": 8.477154912069663e-06, + "loss": 2.4095, + "step": 17376 + }, + { + "epoch": 1.402388830602857, + "grad_norm": 0.7506297826766968, + "learning_rate": 8.470794953683347e-06, + "loss": 2.4187, + "step": 17377 + }, + { + "epoch": 1.40246953433944, + "grad_norm": 0.7137917280197144, + "learning_rate": 8.464437276444059e-06, + "loss": 2.4069, + "step": 17378 + }, + { + "epoch": 1.4025502380760229, + "grad_norm": 0.6610304117202759, + "learning_rate": 8.458081880510282e-06, + "loss": 2.4709, + "step": 17379 + }, + { + "epoch": 1.402630941812606, + "grad_norm": 0.7147911190986633, + "learning_rate": 8.451728766040411e-06, + "loss": 2.4147, + "step": 17380 + }, + { + "epoch": 1.4027116455491888, + "grad_norm": 0.7196649312973022, + "learning_rate": 8.445377933192745e-06, + "loss": 2.4611, + "step": 17381 + }, + { + "epoch": 1.402792349285772, + "grad_norm": 0.6550390124320984, + "learning_rate": 8.439029382125596e-06, + "loss": 2.4229, + "step": 17382 + }, + { + "epoch": 1.402873053022355, + "grad_norm": 0.6517959833145142, + "learning_rate": 8.432683112997175e-06, + "loss": 2.421, + "step": 17383 + }, + { + "epoch": 1.4029537567589379, + "grad_norm": 0.6660284399986267, + "learning_rate": 8.426339125965643e-06, + "loss": 2.3918, + "step": 17384 + }, + { + "epoch": 1.403034460495521, + "grad_norm": 0.696163535118103, + "learning_rate": 8.41999742118913e-06, + "loss": 2.4334, + "step": 17385 + }, + { + "epoch": 1.403115164232104, + "grad_norm": 0.7146298885345459, + "learning_rate": 8.413657998825674e-06, + "loss": 2.3984, + "step": 17386 + }, + { + "epoch": 1.403195867968687, + "grad_norm": 0.7084376215934753, + "learning_rate": 8.407320859033262e-06, + "loss": 2.4098, + "step": 17387 + }, + { + "epoch": 1.40327657170527, + "grad_norm": 0.7499445080757141, + "learning_rate": 8.400986001969846e-06, + "loss": 2.4315, + "step": 17388 + }, + { + "epoch": 1.403357275441853, + "grad_norm": 0.6822247505187988, + "learning_rate": 8.394653427793308e-06, + "loss": 2.3816, + "step": 17389 + }, + { + "epoch": 1.403437979178436, + "grad_norm": 0.6859664916992188, + "learning_rate": 8.388323136661458e-06, + "loss": 2.3772, + "step": 17390 + }, + { + "epoch": 1.403518682915019, + "grad_norm": 0.6771109104156494, + "learning_rate": 8.381995128732057e-06, + "loss": 2.4295, + "step": 17391 + }, + { + "epoch": 1.4035993866516019, + "grad_norm": 0.7589800357818604, + "learning_rate": 8.375669404162845e-06, + "loss": 2.3806, + "step": 17392 + }, + { + "epoch": 1.403680090388185, + "grad_norm": 0.665472149848938, + "learning_rate": 8.369345963111453e-06, + "loss": 2.383, + "step": 17393 + }, + { + "epoch": 1.4037607941247678, + "grad_norm": 0.6658698916435242, + "learning_rate": 8.363024805735475e-06, + "loss": 2.3682, + "step": 17394 + }, + { + "epoch": 1.403841497861351, + "grad_norm": 0.7445670366287231, + "learning_rate": 8.356705932192477e-06, + "loss": 2.5224, + "step": 17395 + }, + { + "epoch": 1.403922201597934, + "grad_norm": 0.6812258362770081, + "learning_rate": 8.35038934263993e-06, + "loss": 2.426, + "step": 17396 + }, + { + "epoch": 1.4040029053345169, + "grad_norm": 0.6613782644271851, + "learning_rate": 8.344075037235243e-06, + "loss": 2.3756, + "step": 17397 + }, + { + "epoch": 1.4040836090711, + "grad_norm": 0.6314469575881958, + "learning_rate": 8.337763016135792e-06, + "loss": 2.3703, + "step": 17398 + }, + { + "epoch": 1.404164312807683, + "grad_norm": 0.6611869931221008, + "learning_rate": 8.331453279498914e-06, + "loss": 2.3951, + "step": 17399 + }, + { + "epoch": 1.404245016544266, + "grad_norm": 0.6668544411659241, + "learning_rate": 8.325145827481828e-06, + "loss": 2.4732, + "step": 17400 + }, + { + "epoch": 1.404325720280849, + "grad_norm": 0.7428251504898071, + "learning_rate": 8.318840660241755e-06, + "loss": 2.391, + "step": 17401 + }, + { + "epoch": 1.404406424017432, + "grad_norm": 0.7163440585136414, + "learning_rate": 8.312537777935836e-06, + "loss": 2.4379, + "step": 17402 + }, + { + "epoch": 1.404487127754015, + "grad_norm": 0.7152317762374878, + "learning_rate": 8.306237180721121e-06, + "loss": 2.426, + "step": 17403 + }, + { + "epoch": 1.404567831490598, + "grad_norm": 0.7675083875656128, + "learning_rate": 8.299938868754686e-06, + "loss": 2.4014, + "step": 17404 + }, + { + "epoch": 1.4046485352271811, + "grad_norm": 0.7118947505950928, + "learning_rate": 8.293642842193494e-06, + "loss": 2.3998, + "step": 17405 + }, + { + "epoch": 1.404729238963764, + "grad_norm": 0.713556706905365, + "learning_rate": 8.28734910119442e-06, + "loss": 2.4134, + "step": 17406 + }, + { + "epoch": 1.404809942700347, + "grad_norm": 0.7631849646568298, + "learning_rate": 8.281057645914359e-06, + "loss": 2.4866, + "step": 17407 + }, + { + "epoch": 1.40489064643693, + "grad_norm": 0.7348508834838867, + "learning_rate": 8.274768476510087e-06, + "loss": 2.4067, + "step": 17408 + }, + { + "epoch": 1.404971350173513, + "grad_norm": 0.7371857762336731, + "learning_rate": 8.268481593138377e-06, + "loss": 2.429, + "step": 17409 + }, + { + "epoch": 1.405052053910096, + "grad_norm": 0.674980640411377, + "learning_rate": 8.262196995955874e-06, + "loss": 2.3897, + "step": 17410 + }, + { + "epoch": 1.405132757646679, + "grad_norm": 0.6975973844528198, + "learning_rate": 8.255914685119237e-06, + "loss": 2.445, + "step": 17411 + }, + { + "epoch": 1.405213461383262, + "grad_norm": 0.6854067444801331, + "learning_rate": 8.249634660785033e-06, + "loss": 2.3528, + "step": 17412 + }, + { + "epoch": 1.405294165119845, + "grad_norm": 0.6678418517112732, + "learning_rate": 8.243356923109768e-06, + "loss": 2.4078, + "step": 17413 + }, + { + "epoch": 1.405374868856428, + "grad_norm": 0.6600239276885986, + "learning_rate": 8.237081472249885e-06, + "loss": 2.3719, + "step": 17414 + }, + { + "epoch": 1.4054555725930111, + "grad_norm": 0.7209253907203674, + "learning_rate": 8.230808308361815e-06, + "loss": 2.4203, + "step": 17415 + }, + { + "epoch": 1.405536276329594, + "grad_norm": 0.6849339604377747, + "learning_rate": 8.224537431601886e-06, + "loss": 2.3898, + "step": 17416 + }, + { + "epoch": 1.405616980066177, + "grad_norm": 0.718558132648468, + "learning_rate": 8.218268842126387e-06, + "loss": 2.4063, + "step": 17417 + }, + { + "epoch": 1.4056976838027602, + "grad_norm": 0.7118551731109619, + "learning_rate": 8.212002540091567e-06, + "loss": 2.3942, + "step": 17418 + }, + { + "epoch": 1.405778387539343, + "grad_norm": 0.7138789892196655, + "learning_rate": 8.205738525653562e-06, + "loss": 2.4614, + "step": 17419 + }, + { + "epoch": 1.4058590912759261, + "grad_norm": 0.7254295349121094, + "learning_rate": 8.199476798968508e-06, + "loss": 2.4126, + "step": 17420 + }, + { + "epoch": 1.4059397950125092, + "grad_norm": 0.691965639591217, + "learning_rate": 8.193217360192473e-06, + "loss": 2.4233, + "step": 17421 + }, + { + "epoch": 1.406020498749092, + "grad_norm": 0.7132619619369507, + "learning_rate": 8.186960209481431e-06, + "loss": 2.3764, + "step": 17422 + }, + { + "epoch": 1.4061012024856752, + "grad_norm": 0.6838160753250122, + "learning_rate": 8.180705346991346e-06, + "loss": 2.3927, + "step": 17423 + }, + { + "epoch": 1.406181906222258, + "grad_norm": 0.6755721569061279, + "learning_rate": 8.174452772878094e-06, + "loss": 2.435, + "step": 17424 + }, + { + "epoch": 1.4062626099588411, + "grad_norm": 0.774718701839447, + "learning_rate": 8.168202487297527e-06, + "loss": 2.4811, + "step": 17425 + }, + { + "epoch": 1.406343313695424, + "grad_norm": 0.6601200699806213, + "learning_rate": 8.161954490405388e-06, + "loss": 2.3494, + "step": 17426 + }, + { + "epoch": 1.406424017432007, + "grad_norm": 0.6854710578918457, + "learning_rate": 8.155708782357419e-06, + "loss": 2.4214, + "step": 17427 + }, + { + "epoch": 1.4065047211685902, + "grad_norm": 0.7471936345100403, + "learning_rate": 8.149465363309294e-06, + "loss": 2.3702, + "step": 17428 + }, + { + "epoch": 1.406585424905173, + "grad_norm": 0.7129673957824707, + "learning_rate": 8.143224233416569e-06, + "loss": 2.4078, + "step": 17429 + }, + { + "epoch": 1.406666128641756, + "grad_norm": 0.7168975472450256, + "learning_rate": 8.136985392834807e-06, + "loss": 2.4265, + "step": 17430 + }, + { + "epoch": 1.4067468323783392, + "grad_norm": 0.709699809551239, + "learning_rate": 8.130748841719526e-06, + "loss": 2.4069, + "step": 17431 + }, + { + "epoch": 1.406827536114922, + "grad_norm": 0.7571663856506348, + "learning_rate": 8.124514580226105e-06, + "loss": 2.3949, + "step": 17432 + }, + { + "epoch": 1.4069082398515051, + "grad_norm": 0.6844212412834167, + "learning_rate": 8.118282608509952e-06, + "loss": 2.4156, + "step": 17433 + }, + { + "epoch": 1.4069889435880882, + "grad_norm": 0.6632293462753296, + "learning_rate": 8.112052926726376e-06, + "loss": 2.3973, + "step": 17434 + }, + { + "epoch": 1.407069647324671, + "grad_norm": 0.6375966668128967, + "learning_rate": 8.105825535030643e-06, + "loss": 2.4168, + "step": 17435 + }, + { + "epoch": 1.4071503510612542, + "grad_norm": 0.6997824907302856, + "learning_rate": 8.099600433577947e-06, + "loss": 2.3279, + "step": 17436 + }, + { + "epoch": 1.4072310547978373, + "grad_norm": 0.7491862177848816, + "learning_rate": 8.093377622523458e-06, + "loss": 2.403, + "step": 17437 + }, + { + "epoch": 1.4073117585344201, + "grad_norm": 0.6938888430595398, + "learning_rate": 8.087157102022235e-06, + "loss": 2.3965, + "step": 17438 + }, + { + "epoch": 1.4073924622710032, + "grad_norm": 0.708043098449707, + "learning_rate": 8.080938872229304e-06, + "loss": 2.4429, + "step": 17439 + }, + { + "epoch": 1.407473166007586, + "grad_norm": 0.6587165594100952, + "learning_rate": 8.074722933299673e-06, + "loss": 2.3951, + "step": 17440 + }, + { + "epoch": 1.4075538697441692, + "grad_norm": 0.6987459659576416, + "learning_rate": 8.068509285388248e-06, + "loss": 2.41, + "step": 17441 + }, + { + "epoch": 1.407634573480752, + "grad_norm": 0.6864002346992493, + "learning_rate": 8.062297928649865e-06, + "loss": 2.3867, + "step": 17442 + }, + { + "epoch": 1.4077152772173351, + "grad_norm": 0.6478279829025269, + "learning_rate": 8.056088863239342e-06, + "loss": 2.391, + "step": 17443 + }, + { + "epoch": 1.4077959809539182, + "grad_norm": 0.658235490322113, + "learning_rate": 8.049882089311433e-06, + "loss": 2.3646, + "step": 17444 + }, + { + "epoch": 1.407876684690501, + "grad_norm": 0.6664391160011292, + "learning_rate": 8.043677607020828e-06, + "loss": 2.4101, + "step": 17445 + }, + { + "epoch": 1.4079573884270842, + "grad_norm": 0.6662336587905884, + "learning_rate": 8.037475416522144e-06, + "loss": 2.4461, + "step": 17446 + }, + { + "epoch": 1.4080380921636673, + "grad_norm": 0.6629661321640015, + "learning_rate": 8.031275517969982e-06, + "loss": 2.4191, + "step": 17447 + }, + { + "epoch": 1.4081187959002501, + "grad_norm": 0.6586340665817261, + "learning_rate": 8.02507791151883e-06, + "loss": 2.4213, + "step": 17448 + }, + { + "epoch": 1.4081994996368332, + "grad_norm": 0.692555844783783, + "learning_rate": 8.018882597323163e-06, + "loss": 2.4148, + "step": 17449 + }, + { + "epoch": 1.4082802033734163, + "grad_norm": 0.6890958547592163, + "learning_rate": 8.012689575537402e-06, + "loss": 2.4121, + "step": 17450 + }, + { + "epoch": 1.4083609071099992, + "grad_norm": 0.7425588965415955, + "learning_rate": 8.006498846315846e-06, + "loss": 2.4426, + "step": 17451 + }, + { + "epoch": 1.4084416108465823, + "grad_norm": 0.6801562309265137, + "learning_rate": 8.000310409812828e-06, + "loss": 2.3786, + "step": 17452 + }, + { + "epoch": 1.4085223145831651, + "grad_norm": 0.7273206114768982, + "learning_rate": 7.994124266182568e-06, + "loss": 2.3635, + "step": 17453 + }, + { + "epoch": 1.4086030183197482, + "grad_norm": 0.6684201955795288, + "learning_rate": 7.987940415579209e-06, + "loss": 2.4565, + "step": 17454 + }, + { + "epoch": 1.408683722056331, + "grad_norm": 0.7803860902786255, + "learning_rate": 7.981758858156908e-06, + "loss": 2.3957, + "step": 17455 + }, + { + "epoch": 1.4087644257929142, + "grad_norm": 0.7033873200416565, + "learning_rate": 7.975579594069727e-06, + "loss": 2.3273, + "step": 17456 + }, + { + "epoch": 1.4088451295294973, + "grad_norm": 0.7338894009590149, + "learning_rate": 7.969402623471656e-06, + "loss": 2.4657, + "step": 17457 + }, + { + "epoch": 1.4089258332660801, + "grad_norm": 0.6912354230880737, + "learning_rate": 7.963227946516637e-06, + "loss": 2.4329, + "step": 17458 + }, + { + "epoch": 1.4090065370026632, + "grad_norm": 0.7227259278297424, + "learning_rate": 7.957055563358561e-06, + "loss": 2.4043, + "step": 17459 + }, + { + "epoch": 1.4090872407392463, + "grad_norm": 0.7320930361747742, + "learning_rate": 7.950885474151281e-06, + "loss": 2.3889, + "step": 17460 + }, + { + "epoch": 1.4091679444758292, + "grad_norm": 0.6754814982414246, + "learning_rate": 7.944717679048542e-06, + "loss": 2.4199, + "step": 17461 + }, + { + "epoch": 1.4092486482124122, + "grad_norm": 0.6574978828430176, + "learning_rate": 7.938552178204061e-06, + "loss": 2.3846, + "step": 17462 + }, + { + "epoch": 1.4093293519489953, + "grad_norm": 0.6976850628852844, + "learning_rate": 7.932388971771543e-06, + "loss": 2.4647, + "step": 17463 + }, + { + "epoch": 1.4094100556855782, + "grad_norm": 0.7376202344894409, + "learning_rate": 7.926228059904529e-06, + "loss": 2.4279, + "step": 17464 + }, + { + "epoch": 1.4094907594221613, + "grad_norm": 0.6907104253768921, + "learning_rate": 7.920069442756584e-06, + "loss": 2.4238, + "step": 17465 + }, + { + "epoch": 1.4095714631587444, + "grad_norm": 0.7079440951347351, + "learning_rate": 7.913913120481243e-06, + "loss": 2.4173, + "step": 17466 + }, + { + "epoch": 1.4096521668953272, + "grad_norm": 0.7188387513160706, + "learning_rate": 7.907759093231882e-06, + "loss": 2.4134, + "step": 17467 + }, + { + "epoch": 1.4097328706319103, + "grad_norm": 0.6877745389938354, + "learning_rate": 7.901607361161889e-06, + "loss": 2.4098, + "step": 17468 + }, + { + "epoch": 1.4098135743684932, + "grad_norm": 0.6914156079292297, + "learning_rate": 7.8954579244246e-06, + "loss": 2.4244, + "step": 17469 + }, + { + "epoch": 1.4098942781050763, + "grad_norm": 0.6616036295890808, + "learning_rate": 7.889310783173277e-06, + "loss": 2.4617, + "step": 17470 + }, + { + "epoch": 1.4099749818416591, + "grad_norm": 0.7090594172477722, + "learning_rate": 7.883165937561088e-06, + "loss": 2.4234, + "step": 17471 + }, + { + "epoch": 1.4100556855782422, + "grad_norm": 0.7596384286880493, + "learning_rate": 7.8770233877412e-06, + "loss": 2.39, + "step": 17472 + }, + { + "epoch": 1.4101363893148253, + "grad_norm": 0.7311475872993469, + "learning_rate": 7.870883133866725e-06, + "loss": 2.418, + "step": 17473 + }, + { + "epoch": 1.4102170930514082, + "grad_norm": 0.6628947854042053, + "learning_rate": 7.86474517609065e-06, + "loss": 2.4177, + "step": 17474 + }, + { + "epoch": 1.4102977967879913, + "grad_norm": 0.7169137597084045, + "learning_rate": 7.858609514565974e-06, + "loss": 2.4359, + "step": 17475 + }, + { + "epoch": 1.4103785005245744, + "grad_norm": 0.7364529371261597, + "learning_rate": 7.852476149445598e-06, + "loss": 2.45, + "step": 17476 + }, + { + "epoch": 1.4104592042611572, + "grad_norm": 0.7494707703590393, + "learning_rate": 7.8463450808824e-06, + "loss": 2.403, + "step": 17477 + }, + { + "epoch": 1.4105399079977403, + "grad_norm": 0.6723065376281738, + "learning_rate": 7.84021630902917e-06, + "loss": 2.4089, + "step": 17478 + }, + { + "epoch": 1.4106206117343234, + "grad_norm": 0.7032917141914368, + "learning_rate": 7.83408983403867e-06, + "loss": 2.4285, + "step": 17479 + }, + { + "epoch": 1.4107013154709063, + "grad_norm": 0.6634184718132019, + "learning_rate": 7.827965656063573e-06, + "loss": 2.3701, + "step": 17480 + }, + { + "epoch": 1.4107820192074894, + "grad_norm": 0.6645818948745728, + "learning_rate": 7.821843775256498e-06, + "loss": 2.3891, + "step": 17481 + }, + { + "epoch": 1.4108627229440724, + "grad_norm": 0.6750596165657043, + "learning_rate": 7.815724191770058e-06, + "loss": 2.4043, + "step": 17482 + }, + { + "epoch": 1.4109434266806553, + "grad_norm": 0.7519060969352722, + "learning_rate": 7.809606905756727e-06, + "loss": 2.4287, + "step": 17483 + }, + { + "epoch": 1.4110241304172384, + "grad_norm": 0.69886714220047, + "learning_rate": 7.803491917368977e-06, + "loss": 2.4565, + "step": 17484 + }, + { + "epoch": 1.4111048341538213, + "grad_norm": 0.6600854992866516, + "learning_rate": 7.797379226759216e-06, + "loss": 2.3743, + "step": 17485 + }, + { + "epoch": 1.4111855378904044, + "grad_norm": 0.65254807472229, + "learning_rate": 7.791268834079779e-06, + "loss": 2.435, + "step": 17486 + }, + { + "epoch": 1.4112662416269872, + "grad_norm": 0.6900071501731873, + "learning_rate": 7.785160739482955e-06, + "loss": 2.4073, + "step": 17487 + }, + { + "epoch": 1.4113469453635703, + "grad_norm": 0.6831900477409363, + "learning_rate": 7.779054943120989e-06, + "loss": 2.4325, + "step": 17488 + }, + { + "epoch": 1.4114276491001534, + "grad_norm": 0.7446292042732239, + "learning_rate": 7.772951445146049e-06, + "loss": 2.4693, + "step": 17489 + }, + { + "epoch": 1.4115083528367363, + "grad_norm": 0.6620200872421265, + "learning_rate": 7.766850245710233e-06, + "loss": 2.4345, + "step": 17490 + }, + { + "epoch": 1.4115890565733193, + "grad_norm": 0.7509312629699707, + "learning_rate": 7.76075134496561e-06, + "loss": 2.3596, + "step": 17491 + }, + { + "epoch": 1.4116697603099024, + "grad_norm": 0.7003920078277588, + "learning_rate": 7.754654743064194e-06, + "loss": 2.4016, + "step": 17492 + }, + { + "epoch": 1.4117504640464853, + "grad_norm": 0.6603164076805115, + "learning_rate": 7.748560440157892e-06, + "loss": 2.4031, + "step": 17493 + }, + { + "epoch": 1.4118311677830684, + "grad_norm": 0.7125976085662842, + "learning_rate": 7.742468436398608e-06, + "loss": 2.4199, + "step": 17494 + }, + { + "epoch": 1.4119118715196515, + "grad_norm": 0.7279991507530212, + "learning_rate": 7.736378731938187e-06, + "loss": 2.4263, + "step": 17495 + }, + { + "epoch": 1.4119925752562343, + "grad_norm": 0.7445220351219177, + "learning_rate": 7.730291326928385e-06, + "loss": 2.4256, + "step": 17496 + }, + { + "epoch": 1.4120732789928174, + "grad_norm": 0.7625001072883606, + "learning_rate": 7.724206221520913e-06, + "loss": 2.4307, + "step": 17497 + }, + { + "epoch": 1.4121539827294003, + "grad_norm": 0.7109429240226746, + "learning_rate": 7.71812341586745e-06, + "loss": 2.4157, + "step": 17498 + }, + { + "epoch": 1.4122346864659834, + "grad_norm": 0.7360411882400513, + "learning_rate": 7.712042910119566e-06, + "loss": 2.3855, + "step": 17499 + }, + { + "epoch": 1.4123153902025662, + "grad_norm": 0.6878146529197693, + "learning_rate": 7.705964704428815e-06, + "loss": 2.4059, + "step": 17500 + }, + { + "epoch": 1.4123960939391493, + "grad_norm": 0.7399710416793823, + "learning_rate": 7.699888798946674e-06, + "loss": 2.4234, + "step": 17501 + }, + { + "epoch": 1.4124767976757324, + "grad_norm": 0.6825466156005859, + "learning_rate": 7.693815193824605e-06, + "loss": 2.4428, + "step": 17502 + }, + { + "epoch": 1.4125575014123153, + "grad_norm": 0.6567744016647339, + "learning_rate": 7.687743889213938e-06, + "loss": 2.3609, + "step": 17503 + }, + { + "epoch": 1.4126382051488984, + "grad_norm": 0.7361522316932678, + "learning_rate": 7.681674885265989e-06, + "loss": 2.4006, + "step": 17504 + }, + { + "epoch": 1.4127189088854815, + "grad_norm": 0.7350279688835144, + "learning_rate": 7.675608182132033e-06, + "loss": 2.4395, + "step": 17505 + }, + { + "epoch": 1.4127996126220643, + "grad_norm": 0.6630931496620178, + "learning_rate": 7.669543779963262e-06, + "loss": 2.4451, + "step": 17506 + }, + { + "epoch": 1.4128803163586474, + "grad_norm": 0.6845518350601196, + "learning_rate": 7.6634816789108e-06, + "loss": 2.436, + "step": 17507 + }, + { + "epoch": 1.4129610200952305, + "grad_norm": 0.6736167073249817, + "learning_rate": 7.657421879125782e-06, + "loss": 2.3628, + "step": 17508 + }, + { + "epoch": 1.4130417238318134, + "grad_norm": 0.6932296752929688, + "learning_rate": 7.651364380759163e-06, + "loss": 2.4353, + "step": 17509 + }, + { + "epoch": 1.4131224275683965, + "grad_norm": 0.7034411430358887, + "learning_rate": 7.645309183961947e-06, + "loss": 2.3853, + "step": 17510 + }, + { + "epoch": 1.4132031313049795, + "grad_norm": 0.6912705898284912, + "learning_rate": 7.639256288885065e-06, + "loss": 2.2978, + "step": 17511 + }, + { + "epoch": 1.4132838350415624, + "grad_norm": 0.6716031432151794, + "learning_rate": 7.633205695679336e-06, + "loss": 2.3602, + "step": 17512 + }, + { + "epoch": 1.4133645387781455, + "grad_norm": 0.707477331161499, + "learning_rate": 7.6271574044955664e-06, + "loss": 2.434, + "step": 17513 + }, + { + "epoch": 1.4134452425147284, + "grad_norm": 0.7031993269920349, + "learning_rate": 7.621111415484517e-06, + "loss": 2.3718, + "step": 17514 + }, + { + "epoch": 1.4135259462513114, + "grad_norm": 0.6708939671516418, + "learning_rate": 7.615067728796832e-06, + "loss": 2.4218, + "step": 17515 + }, + { + "epoch": 1.4136066499878943, + "grad_norm": 0.7508932948112488, + "learning_rate": 7.609026344583148e-06, + "loss": 2.4273, + "step": 17516 + }, + { + "epoch": 1.4136873537244774, + "grad_norm": 0.6981049180030823, + "learning_rate": 7.602987262994055e-06, + "loss": 2.3941, + "step": 17517 + }, + { + "epoch": 1.4137680574610605, + "grad_norm": 0.7662717700004578, + "learning_rate": 7.5969504841800544e-06, + "loss": 2.3875, + "step": 17518 + }, + { + "epoch": 1.4138487611976434, + "grad_norm": 0.688423752784729, + "learning_rate": 7.590916008291582e-06, + "loss": 2.4091, + "step": 17519 + }, + { + "epoch": 1.4139294649342264, + "grad_norm": 0.6867286562919617, + "learning_rate": 7.584883835479039e-06, + "loss": 2.3983, + "step": 17520 + }, + { + "epoch": 1.4140101686708095, + "grad_norm": 0.7491776943206787, + "learning_rate": 7.578853965892785e-06, + "loss": 2.4151, + "step": 17521 + }, + { + "epoch": 1.4140908724073924, + "grad_norm": 0.6946732997894287, + "learning_rate": 7.572826399683064e-06, + "loss": 2.4196, + "step": 17522 + }, + { + "epoch": 1.4141715761439755, + "grad_norm": 0.6638106107711792, + "learning_rate": 7.566801137000123e-06, + "loss": 2.441, + "step": 17523 + }, + { + "epoch": 1.4142522798805586, + "grad_norm": 0.7190408110618591, + "learning_rate": 7.5607781779941325e-06, + "loss": 2.4026, + "step": 17524 + }, + { + "epoch": 1.4143329836171414, + "grad_norm": 0.708963930606842, + "learning_rate": 7.55475752281517e-06, + "loss": 2.3842, + "step": 17525 + }, + { + "epoch": 1.4144136873537245, + "grad_norm": 0.6763237118721008, + "learning_rate": 7.548739171613306e-06, + "loss": 2.4259, + "step": 17526 + }, + { + "epoch": 1.4144943910903076, + "grad_norm": 0.7374435067176819, + "learning_rate": 7.542723124538531e-06, + "loss": 2.4603, + "step": 17527 + }, + { + "epoch": 1.4145750948268905, + "grad_norm": 0.7165411114692688, + "learning_rate": 7.5367093817407805e-06, + "loss": 2.4103, + "step": 17528 + }, + { + "epoch": 1.4146557985634736, + "grad_norm": 0.7794588804244995, + "learning_rate": 7.530697943369935e-06, + "loss": 2.3912, + "step": 17529 + }, + { + "epoch": 1.4147365023000564, + "grad_norm": 0.691405713558197, + "learning_rate": 7.5246888095758305e-06, + "loss": 2.4357, + "step": 17530 + }, + { + "epoch": 1.4148172060366395, + "grad_norm": 0.6955364346504211, + "learning_rate": 7.518681980508191e-06, + "loss": 2.3645, + "step": 17531 + }, + { + "epoch": 1.4148979097732224, + "grad_norm": 0.6848856210708618, + "learning_rate": 7.512677456316753e-06, + "loss": 2.4145, + "step": 17532 + }, + { + "epoch": 1.4149786135098055, + "grad_norm": 0.668624997138977, + "learning_rate": 7.506675237151151e-06, + "loss": 2.4367, + "step": 17533 + }, + { + "epoch": 1.4150593172463886, + "grad_norm": 0.7547643780708313, + "learning_rate": 7.50067532316101e-06, + "loss": 2.437, + "step": 17534 + }, + { + "epoch": 1.4151400209829714, + "grad_norm": 0.6710182428359985, + "learning_rate": 7.494677714495812e-06, + "loss": 2.3596, + "step": 17535 + }, + { + "epoch": 1.4152207247195545, + "grad_norm": 0.7603517770767212, + "learning_rate": 7.488682411305048e-06, + "loss": 2.4277, + "step": 17536 + }, + { + "epoch": 1.4153014284561376, + "grad_norm": 0.7142195105552673, + "learning_rate": 7.482689413738153e-06, + "loss": 2.386, + "step": 17537 + }, + { + "epoch": 1.4153821321927205, + "grad_norm": 0.6910836100578308, + "learning_rate": 7.4766987219444865e-06, + "loss": 2.4394, + "step": 17538 + }, + { + "epoch": 1.4154628359293036, + "grad_norm": 0.7568751573562622, + "learning_rate": 7.470710336073339e-06, + "loss": 2.4621, + "step": 17539 + }, + { + "epoch": 1.4155435396658866, + "grad_norm": 0.7378259301185608, + "learning_rate": 7.46472425627398e-06, + "loss": 2.3677, + "step": 17540 + }, + { + "epoch": 1.4156242434024695, + "grad_norm": 0.7365754842758179, + "learning_rate": 7.458740482695569e-06, + "loss": 2.3881, + "step": 17541 + }, + { + "epoch": 1.4157049471390526, + "grad_norm": 0.6753227114677429, + "learning_rate": 7.452759015487254e-06, + "loss": 2.3997, + "step": 17542 + }, + { + "epoch": 1.4157856508756355, + "grad_norm": 0.6384701728820801, + "learning_rate": 7.446779854798114e-06, + "loss": 2.4029, + "step": 17543 + }, + { + "epoch": 1.4158663546122185, + "grad_norm": 0.6766810417175293, + "learning_rate": 7.4408030007771416e-06, + "loss": 2.4083, + "step": 17544 + }, + { + "epoch": 1.4159470583488014, + "grad_norm": 0.6948650479316711, + "learning_rate": 7.434828453573317e-06, + "loss": 2.3521, + "step": 17545 + }, + { + "epoch": 1.4160277620853845, + "grad_norm": 0.7690626978874207, + "learning_rate": 7.428856213335533e-06, + "loss": 2.4318, + "step": 17546 + }, + { + "epoch": 1.4161084658219676, + "grad_norm": 0.7151117920875549, + "learning_rate": 7.422886280212626e-06, + "loss": 2.4261, + "step": 17547 + }, + { + "epoch": 1.4161891695585505, + "grad_norm": 0.6966549754142761, + "learning_rate": 7.4169186543534e-06, + "loss": 2.4112, + "step": 17548 + }, + { + "epoch": 1.4162698732951335, + "grad_norm": 0.6930578947067261, + "learning_rate": 7.410953335906578e-06, + "loss": 2.4155, + "step": 17549 + }, + { + "epoch": 1.4163505770317166, + "grad_norm": 0.7319084405899048, + "learning_rate": 7.404990325020844e-06, + "loss": 2.4015, + "step": 17550 + }, + { + "epoch": 1.4164312807682995, + "grad_norm": 0.6913621425628662, + "learning_rate": 7.399029621844778e-06, + "loss": 2.4474, + "step": 17551 + }, + { + "epoch": 1.4165119845048826, + "grad_norm": 0.7726523280143738, + "learning_rate": 7.3930712265269595e-06, + "loss": 2.4815, + "step": 17552 + }, + { + "epoch": 1.4165926882414657, + "grad_norm": 0.6549103856086731, + "learning_rate": 7.387115139215894e-06, + "loss": 2.378, + "step": 17553 + }, + { + "epoch": 1.4166733919780485, + "grad_norm": 0.6902545094490051, + "learning_rate": 7.381161360059996e-06, + "loss": 2.3993, + "step": 17554 + }, + { + "epoch": 1.4167540957146316, + "grad_norm": 0.6871094107627869, + "learning_rate": 7.375209889207668e-06, + "loss": 2.4211, + "step": 17555 + }, + { + "epoch": 1.4168347994512147, + "grad_norm": 0.7043696641921997, + "learning_rate": 7.369260726807226e-06, + "loss": 2.4395, + "step": 17556 + }, + { + "epoch": 1.4169155031877976, + "grad_norm": 0.6889273524284363, + "learning_rate": 7.363313873006949e-06, + "loss": 2.4014, + "step": 17557 + }, + { + "epoch": 1.4169962069243807, + "grad_norm": 0.6670657992362976, + "learning_rate": 7.3573693279550545e-06, + "loss": 2.3943, + "step": 17558 + }, + { + "epoch": 1.4170769106609635, + "grad_norm": 0.7316192984580994, + "learning_rate": 7.3514270917996895e-06, + "loss": 2.3763, + "step": 17559 + }, + { + "epoch": 1.4171576143975466, + "grad_norm": 0.6922768950462341, + "learning_rate": 7.345487164688947e-06, + "loss": 2.4102, + "step": 17560 + }, + { + "epoch": 1.4172383181341295, + "grad_norm": 0.7255418300628662, + "learning_rate": 7.339549546770852e-06, + "loss": 2.4874, + "step": 17561 + }, + { + "epoch": 1.4173190218707126, + "grad_norm": 0.7474549412727356, + "learning_rate": 7.3336142381934206e-06, + "loss": 2.4817, + "step": 17562 + }, + { + "epoch": 1.4173997256072957, + "grad_norm": 0.6574866771697998, + "learning_rate": 7.327681239104534e-06, + "loss": 2.4504, + "step": 17563 + }, + { + "epoch": 1.4174804293438785, + "grad_norm": 0.751109778881073, + "learning_rate": 7.321750549652084e-06, + "loss": 2.482, + "step": 17564 + }, + { + "epoch": 1.4175611330804616, + "grad_norm": 0.6917319297790527, + "learning_rate": 7.315822169983866e-06, + "loss": 2.426, + "step": 17565 + }, + { + "epoch": 1.4176418368170447, + "grad_norm": 0.7236911058425903, + "learning_rate": 7.309896100247671e-06, + "loss": 2.4222, + "step": 17566 + }, + { + "epoch": 1.4177225405536276, + "grad_norm": 0.7382739186286926, + "learning_rate": 7.3039723405911145e-06, + "loss": 2.4673, + "step": 17567 + }, + { + "epoch": 1.4178032442902107, + "grad_norm": 0.6394448280334473, + "learning_rate": 7.2980508911618895e-06, + "loss": 2.4301, + "step": 17568 + }, + { + "epoch": 1.4178839480267937, + "grad_norm": 0.7402171492576599, + "learning_rate": 7.292131752107589e-06, + "loss": 2.4345, + "step": 17569 + }, + { + "epoch": 1.4179646517633766, + "grad_norm": 0.6540209054946899, + "learning_rate": 7.286214923575685e-06, + "loss": 2.4025, + "step": 17570 + }, + { + "epoch": 1.4180453554999597, + "grad_norm": 0.7361408472061157, + "learning_rate": 7.280300405713658e-06, + "loss": 2.4383, + "step": 17571 + }, + { + "epoch": 1.4181260592365428, + "grad_norm": 0.7483302354812622, + "learning_rate": 7.274388198668936e-06, + "loss": 2.3909, + "step": 17572 + }, + { + "epoch": 1.4182067629731256, + "grad_norm": 0.7666492462158203, + "learning_rate": 7.268478302588833e-06, + "loss": 2.3646, + "step": 17573 + }, + { + "epoch": 1.4182874667097087, + "grad_norm": 0.7461634278297424, + "learning_rate": 7.262570717620642e-06, + "loss": 2.4247, + "step": 17574 + }, + { + "epoch": 1.4183681704462916, + "grad_norm": 0.6593511700630188, + "learning_rate": 7.256665443911637e-06, + "loss": 2.4373, + "step": 17575 + }, + { + "epoch": 1.4184488741828747, + "grad_norm": 0.6628448963165283, + "learning_rate": 7.250762481608941e-06, + "loss": 2.4028, + "step": 17576 + }, + { + "epoch": 1.4185295779194576, + "grad_norm": 0.7371554970741272, + "learning_rate": 7.244861830859695e-06, + "loss": 2.3893, + "step": 17577 + }, + { + "epoch": 1.4186102816560406, + "grad_norm": 0.6896550059318542, + "learning_rate": 7.238963491810935e-06, + "loss": 2.4039, + "step": 17578 + }, + { + "epoch": 1.4186909853926237, + "grad_norm": 0.6840630173683167, + "learning_rate": 7.233067464609722e-06, + "loss": 2.3658, + "step": 17579 + }, + { + "epoch": 1.4187716891292066, + "grad_norm": 0.7413774728775024, + "learning_rate": 7.227173749402949e-06, + "loss": 2.4429, + "step": 17580 + }, + { + "epoch": 1.4188523928657897, + "grad_norm": 0.7088857889175415, + "learning_rate": 7.22128234633751e-06, + "loss": 2.4487, + "step": 17581 + }, + { + "epoch": 1.4189330966023728, + "grad_norm": 0.7451753616333008, + "learning_rate": 7.215393255560265e-06, + "loss": 2.43, + "step": 17582 + }, + { + "epoch": 1.4190138003389556, + "grad_norm": 0.7113354802131653, + "learning_rate": 7.209506477217942e-06, + "loss": 2.4079, + "step": 17583 + }, + { + "epoch": 1.4190945040755387, + "grad_norm": 0.6877462863922119, + "learning_rate": 7.203622011457268e-06, + "loss": 2.4638, + "step": 17584 + }, + { + "epoch": 1.4191752078121218, + "grad_norm": 0.6908687353134155, + "learning_rate": 7.1977398584249345e-06, + "loss": 2.4117, + "step": 17585 + }, + { + "epoch": 1.4192559115487047, + "grad_norm": 0.7053657174110413, + "learning_rate": 7.191860018267482e-06, + "loss": 2.4128, + "step": 17586 + }, + { + "epoch": 1.4193366152852878, + "grad_norm": 0.6886352896690369, + "learning_rate": 7.185982491131493e-06, + "loss": 2.4201, + "step": 17587 + }, + { + "epoch": 1.4194173190218708, + "grad_norm": 0.7148453593254089, + "learning_rate": 7.180107277163428e-06, + "loss": 2.456, + "step": 17588 + }, + { + "epoch": 1.4194980227584537, + "grad_norm": 0.7405968904495239, + "learning_rate": 7.174234376509725e-06, + "loss": 2.371, + "step": 17589 + }, + { + "epoch": 1.4195787264950368, + "grad_norm": 0.6733896136283875, + "learning_rate": 7.168363789316757e-06, + "loss": 2.439, + "step": 17590 + }, + { + "epoch": 1.4196594302316197, + "grad_norm": 0.7196522355079651, + "learning_rate": 7.162495515730838e-06, + "loss": 2.4666, + "step": 17591 + }, + { + "epoch": 1.4197401339682028, + "grad_norm": 0.7885043025016785, + "learning_rate": 7.156629555898198e-06, + "loss": 2.3704, + "step": 17592 + }, + { + "epoch": 1.4198208377047856, + "grad_norm": 0.7290148735046387, + "learning_rate": 7.15076590996504e-06, + "loss": 2.4693, + "step": 17593 + }, + { + "epoch": 1.4199015414413687, + "grad_norm": 0.7527376413345337, + "learning_rate": 7.144904578077505e-06, + "loss": 2.5135, + "step": 17594 + }, + { + "epoch": 1.4199822451779518, + "grad_norm": 0.740208625793457, + "learning_rate": 7.139045560381697e-06, + "loss": 2.4153, + "step": 17595 + }, + { + "epoch": 1.4200629489145347, + "grad_norm": 0.7285439968109131, + "learning_rate": 7.133188857023599e-06, + "loss": 2.391, + "step": 17596 + }, + { + "epoch": 1.4201436526511177, + "grad_norm": 0.6705127358436584, + "learning_rate": 7.1273344681491824e-06, + "loss": 2.4037, + "step": 17597 + }, + { + "epoch": 1.4202243563877008, + "grad_norm": 0.7113380432128906, + "learning_rate": 7.121482393904366e-06, + "loss": 2.4395, + "step": 17598 + }, + { + "epoch": 1.4203050601242837, + "grad_norm": 0.6606113314628601, + "learning_rate": 7.1156326344349985e-06, + "loss": 2.4618, + "step": 17599 + }, + { + "epoch": 1.4203857638608668, + "grad_norm": 0.6471076607704163, + "learning_rate": 7.109785189886864e-06, + "loss": 2.4263, + "step": 17600 + }, + { + "epoch": 1.4204664675974499, + "grad_norm": 0.7686622142791748, + "learning_rate": 7.103940060405712e-06, + "loss": 2.3989, + "step": 17601 + }, + { + "epoch": 1.4205471713340327, + "grad_norm": 0.6636856198310852, + "learning_rate": 7.0980972461372035e-06, + "loss": 2.4012, + "step": 17602 + }, + { + "epoch": 1.4206278750706158, + "grad_norm": 0.719194769859314, + "learning_rate": 7.0922567472269444e-06, + "loss": 2.4121, + "step": 17603 + }, + { + "epoch": 1.4207085788071987, + "grad_norm": 0.6569145321846008, + "learning_rate": 7.0864185638205404e-06, + "loss": 2.368, + "step": 17604 + }, + { + "epoch": 1.4207892825437818, + "grad_norm": 0.6548880338668823, + "learning_rate": 7.080582696063442e-06, + "loss": 2.4081, + "step": 17605 + }, + { + "epoch": 1.4208699862803646, + "grad_norm": 0.6192221641540527, + "learning_rate": 7.074749144101112e-06, + "loss": 2.3765, + "step": 17606 + }, + { + "epoch": 1.4209506900169477, + "grad_norm": 0.733065128326416, + "learning_rate": 7.068917908078942e-06, + "loss": 2.4429, + "step": 17607 + }, + { + "epoch": 1.4210313937535308, + "grad_norm": 0.7430265545845032, + "learning_rate": 7.063088988142275e-06, + "loss": 2.4041, + "step": 17608 + }, + { + "epoch": 1.4211120974901137, + "grad_norm": 0.7140394449234009, + "learning_rate": 7.0572623844363584e-06, + "loss": 2.3897, + "step": 17609 + }, + { + "epoch": 1.4211928012266968, + "grad_norm": 0.7149982452392578, + "learning_rate": 7.051438097106422e-06, + "loss": 2.4124, + "step": 17610 + }, + { + "epoch": 1.4212735049632799, + "grad_norm": 0.7337482571601868, + "learning_rate": 7.045616126297638e-06, + "loss": 2.4636, + "step": 17611 + }, + { + "epoch": 1.4213542086998627, + "grad_norm": 0.6936220526695251, + "learning_rate": 7.039796472155058e-06, + "loss": 2.4287, + "step": 17612 + }, + { + "epoch": 1.4214349124364458, + "grad_norm": 0.7598823308944702, + "learning_rate": 7.033979134823765e-06, + "loss": 2.3592, + "step": 17613 + }, + { + "epoch": 1.421515616173029, + "grad_norm": 0.7291054725646973, + "learning_rate": 7.028164114448732e-06, + "loss": 2.4433, + "step": 17614 + }, + { + "epoch": 1.4215963199096118, + "grad_norm": 0.7178683876991272, + "learning_rate": 7.022351411174866e-06, + "loss": 2.4615, + "step": 17615 + }, + { + "epoch": 1.4216770236461949, + "grad_norm": 0.6711047887802124, + "learning_rate": 7.01654102514705e-06, + "loss": 2.3828, + "step": 17616 + }, + { + "epoch": 1.421757727382778, + "grad_norm": 0.7782542705535889, + "learning_rate": 7.010732956510091e-06, + "loss": 2.3609, + "step": 17617 + }, + { + "epoch": 1.4218384311193608, + "grad_norm": 0.7100348472595215, + "learning_rate": 7.004927205408751e-06, + "loss": 2.4107, + "step": 17618 + }, + { + "epoch": 1.421919134855944, + "grad_norm": 0.7031453251838684, + "learning_rate": 6.9991237719877145e-06, + "loss": 2.3806, + "step": 17619 + }, + { + "epoch": 1.4219998385925268, + "grad_norm": 0.6231544613838196, + "learning_rate": 6.993322656391632e-06, + "loss": 2.3515, + "step": 17620 + }, + { + "epoch": 1.4220805423291099, + "grad_norm": 0.7339803576469421, + "learning_rate": 6.987523858765055e-06, + "loss": 2.4218, + "step": 17621 + }, + { + "epoch": 1.4221612460656927, + "grad_norm": 0.6874008774757385, + "learning_rate": 6.9817273792525224e-06, + "loss": 2.4308, + "step": 17622 + }, + { + "epoch": 1.4222419498022758, + "grad_norm": 0.692850649356842, + "learning_rate": 6.97593321799851e-06, + "loss": 2.4159, + "step": 17623 + }, + { + "epoch": 1.422322653538859, + "grad_norm": 0.7120705842971802, + "learning_rate": 6.970141375147398e-06, + "loss": 2.4639, + "step": 17624 + }, + { + "epoch": 1.4224033572754418, + "grad_norm": 0.6556580662727356, + "learning_rate": 6.9643518508435425e-06, + "loss": 2.425, + "step": 17625 + }, + { + "epoch": 1.4224840610120248, + "grad_norm": 0.6515032052993774, + "learning_rate": 6.958564645231225e-06, + "loss": 2.3712, + "step": 17626 + }, + { + "epoch": 1.422564764748608, + "grad_norm": 0.6835498213768005, + "learning_rate": 6.95277975845472e-06, + "loss": 2.4274, + "step": 17627 + }, + { + "epoch": 1.4226454684851908, + "grad_norm": 0.7465600967407227, + "learning_rate": 6.9469971906581555e-06, + "loss": 2.4905, + "step": 17628 + }, + { + "epoch": 1.4227261722217739, + "grad_norm": 0.7540421485900879, + "learning_rate": 6.94121694198564e-06, + "loss": 2.4636, + "step": 17629 + }, + { + "epoch": 1.422806875958357, + "grad_norm": 0.8491081595420837, + "learning_rate": 6.935439012581291e-06, + "loss": 2.345, + "step": 17630 + }, + { + "epoch": 1.4228875796949398, + "grad_norm": 0.6806172728538513, + "learning_rate": 6.92966340258906e-06, + "loss": 2.3937, + "step": 17631 + }, + { + "epoch": 1.422968283431523, + "grad_norm": 0.7586994171142578, + "learning_rate": 6.9238901121529085e-06, + "loss": 2.3645, + "step": 17632 + }, + { + "epoch": 1.423048987168106, + "grad_norm": 0.6934102773666382, + "learning_rate": 6.918119141416735e-06, + "loss": 2.3861, + "step": 17633 + }, + { + "epoch": 1.4231296909046889, + "grad_norm": 0.7167627215385437, + "learning_rate": 6.912350490524322e-06, + "loss": 2.4044, + "step": 17634 + }, + { + "epoch": 1.423210394641272, + "grad_norm": 0.6630876660346985, + "learning_rate": 6.906584159619478e-06, + "loss": 2.4214, + "step": 17635 + }, + { + "epoch": 1.4232910983778548, + "grad_norm": 0.7125325798988342, + "learning_rate": 6.9008201488459325e-06, + "loss": 2.4516, + "step": 17636 + }, + { + "epoch": 1.423371802114438, + "grad_norm": 0.6531164050102234, + "learning_rate": 6.895058458347281e-06, + "loss": 2.4223, + "step": 17637 + }, + { + "epoch": 1.4234525058510208, + "grad_norm": 0.727008581161499, + "learning_rate": 6.889299088267154e-06, + "loss": 2.446, + "step": 17638 + }, + { + "epoch": 1.4235332095876039, + "grad_norm": 0.7188040614128113, + "learning_rate": 6.883542038749091e-06, + "loss": 2.4109, + "step": 17639 + }, + { + "epoch": 1.423613913324187, + "grad_norm": 0.73248291015625, + "learning_rate": 6.877787309936568e-06, + "loss": 2.4398, + "step": 17640 + }, + { + "epoch": 1.4236946170607698, + "grad_norm": 0.7350964546203613, + "learning_rate": 6.872034901973012e-06, + "loss": 2.4766, + "step": 17641 + }, + { + "epoch": 1.423775320797353, + "grad_norm": 0.7280460596084595, + "learning_rate": 6.866284815001777e-06, + "loss": 2.4588, + "step": 17642 + }, + { + "epoch": 1.423856024533936, + "grad_norm": 0.68912672996521, + "learning_rate": 6.860537049166205e-06, + "loss": 2.353, + "step": 17643 + }, + { + "epoch": 1.4239367282705189, + "grad_norm": 0.6742156147956848, + "learning_rate": 6.85479160460949e-06, + "loss": 2.4123, + "step": 17644 + }, + { + "epoch": 1.424017432007102, + "grad_norm": 0.6858388185501099, + "learning_rate": 6.849048481474863e-06, + "loss": 2.4243, + "step": 17645 + }, + { + "epoch": 1.424098135743685, + "grad_norm": 0.7317911386489868, + "learning_rate": 6.8433076799054644e-06, + "loss": 2.3713, + "step": 17646 + }, + { + "epoch": 1.424178839480268, + "grad_norm": 0.6934579014778137, + "learning_rate": 6.837569200044325e-06, + "loss": 2.4667, + "step": 17647 + }, + { + "epoch": 1.424259543216851, + "grad_norm": 0.7017713189125061, + "learning_rate": 6.831833042034497e-06, + "loss": 2.3543, + "step": 17648 + }, + { + "epoch": 1.4243402469534339, + "grad_norm": 0.7379886507987976, + "learning_rate": 6.8260992060189325e-06, + "loss": 2.4392, + "step": 17649 + }, + { + "epoch": 1.424420950690017, + "grad_norm": 0.6645724177360535, + "learning_rate": 6.820367692140539e-06, + "loss": 2.4329, + "step": 17650 + }, + { + "epoch": 1.4245016544265998, + "grad_norm": 0.642423689365387, + "learning_rate": 6.814638500542159e-06, + "loss": 2.4157, + "step": 17651 + }, + { + "epoch": 1.424582358163183, + "grad_norm": 0.6720073819160461, + "learning_rate": 6.808911631366588e-06, + "loss": 2.44, + "step": 17652 + }, + { + "epoch": 1.424663061899766, + "grad_norm": 0.6966024041175842, + "learning_rate": 6.803187084756524e-06, + "loss": 2.4087, + "step": 17653 + }, + { + "epoch": 1.4247437656363489, + "grad_norm": 0.6998239755630493, + "learning_rate": 6.797464860854652e-06, + "loss": 2.4335, + "step": 17654 + }, + { + "epoch": 1.424824469372932, + "grad_norm": 0.6885339617729187, + "learning_rate": 6.791744959803614e-06, + "loss": 2.4327, + "step": 17655 + }, + { + "epoch": 1.424905173109515, + "grad_norm": 0.6395631432533264, + "learning_rate": 6.7860273817459294e-06, + "loss": 2.3941, + "step": 17656 + }, + { + "epoch": 1.424985876846098, + "grad_norm": 0.7010350823402405, + "learning_rate": 6.7803121268240956e-06, + "loss": 2.4118, + "step": 17657 + }, + { + "epoch": 1.425066580582681, + "grad_norm": 0.6954346895217896, + "learning_rate": 6.774599195180565e-06, + "loss": 2.416, + "step": 17658 + }, + { + "epoch": 1.425147284319264, + "grad_norm": 0.6685010194778442, + "learning_rate": 6.768888586957722e-06, + "loss": 2.4246, + "step": 17659 + }, + { + "epoch": 1.425227988055847, + "grad_norm": 0.7244373559951782, + "learning_rate": 6.7631803022978776e-06, + "loss": 2.4385, + "step": 17660 + }, + { + "epoch": 1.42530869179243, + "grad_norm": 0.6633989810943604, + "learning_rate": 6.757474341343306e-06, + "loss": 2.413, + "step": 17661 + }, + { + "epoch": 1.4253893955290131, + "grad_norm": 0.6696286797523499, + "learning_rate": 6.751770704236226e-06, + "loss": 2.4586, + "step": 17662 + }, + { + "epoch": 1.425470099265596, + "grad_norm": 0.7322936654090881, + "learning_rate": 6.746069391118759e-06, + "loss": 2.414, + "step": 17663 + }, + { + "epoch": 1.425550803002179, + "grad_norm": 0.6786227226257324, + "learning_rate": 6.740370402133012e-06, + "loss": 2.3964, + "step": 17664 + }, + { + "epoch": 1.425631506738762, + "grad_norm": 0.6408207416534424, + "learning_rate": 6.734673737421027e-06, + "loss": 2.4064, + "step": 17665 + }, + { + "epoch": 1.425712210475345, + "grad_norm": 0.7589663863182068, + "learning_rate": 6.728979397124768e-06, + "loss": 2.3765, + "step": 17666 + }, + { + "epoch": 1.4257929142119279, + "grad_norm": 0.6696135401725769, + "learning_rate": 6.723287381386145e-06, + "loss": 2.4317, + "step": 17667 + }, + { + "epoch": 1.425873617948511, + "grad_norm": 0.6599292159080505, + "learning_rate": 6.7175976903470325e-06, + "loss": 2.3867, + "step": 17668 + }, + { + "epoch": 1.425954321685094, + "grad_norm": 0.692328929901123, + "learning_rate": 6.711910324149228e-06, + "loss": 2.3996, + "step": 17669 + }, + { + "epoch": 1.426035025421677, + "grad_norm": 0.7615126371383667, + "learning_rate": 6.706225282934475e-06, + "loss": 2.4436, + "step": 17670 + }, + { + "epoch": 1.42611572915826, + "grad_norm": 0.7187603712081909, + "learning_rate": 6.70054256684447e-06, + "loss": 2.4128, + "step": 17671 + }, + { + "epoch": 1.426196432894843, + "grad_norm": 0.6679204702377319, + "learning_rate": 6.694862176020822e-06, + "loss": 2.423, + "step": 17672 + }, + { + "epoch": 1.426277136631426, + "grad_norm": 0.759952962398529, + "learning_rate": 6.689184110605106e-06, + "loss": 2.4279, + "step": 17673 + }, + { + "epoch": 1.426357840368009, + "grad_norm": 0.6619845628738403, + "learning_rate": 6.683508370738845e-06, + "loss": 2.4219, + "step": 17674 + }, + { + "epoch": 1.4264385441045921, + "grad_norm": 0.6806942224502563, + "learning_rate": 6.6778349565635005e-06, + "loss": 2.4214, + "step": 17675 + }, + { + "epoch": 1.426519247841175, + "grad_norm": 0.6780219674110413, + "learning_rate": 6.672163868220449e-06, + "loss": 2.4404, + "step": 17676 + }, + { + "epoch": 1.426599951577758, + "grad_norm": 0.7276327013969421, + "learning_rate": 6.6664951058510224e-06, + "loss": 2.4088, + "step": 17677 + }, + { + "epoch": 1.4266806553143412, + "grad_norm": 0.7608953714370728, + "learning_rate": 6.66082866959653e-06, + "loss": 2.4102, + "step": 17678 + }, + { + "epoch": 1.426761359050924, + "grad_norm": 0.6784111261367798, + "learning_rate": 6.6551645595981485e-06, + "loss": 2.4823, + "step": 17679 + }, + { + "epoch": 1.4268420627875071, + "grad_norm": 0.6937912106513977, + "learning_rate": 6.649502775997096e-06, + "loss": 2.4118, + "step": 17680 + }, + { + "epoch": 1.42692276652409, + "grad_norm": 0.7426064014434814, + "learning_rate": 6.643843318934462e-06, + "loss": 2.4407, + "step": 17681 + }, + { + "epoch": 1.427003470260673, + "grad_norm": 0.6722440719604492, + "learning_rate": 6.638186188551277e-06, + "loss": 2.3981, + "step": 17682 + }, + { + "epoch": 1.427084173997256, + "grad_norm": 0.6830718517303467, + "learning_rate": 6.632531384988538e-06, + "loss": 2.4076, + "step": 17683 + }, + { + "epoch": 1.427164877733839, + "grad_norm": 0.6521410942077637, + "learning_rate": 6.626878908387202e-06, + "loss": 2.4311, + "step": 17684 + }, + { + "epoch": 1.4272455814704221, + "grad_norm": 0.7150115966796875, + "learning_rate": 6.6212287588880985e-06, + "loss": 2.4776, + "step": 17685 + }, + { + "epoch": 1.427326285207005, + "grad_norm": 0.6741146445274353, + "learning_rate": 6.615580936632082e-06, + "loss": 2.4134, + "step": 17686 + }, + { + "epoch": 1.427406988943588, + "grad_norm": 0.6979508996009827, + "learning_rate": 6.6099354417599064e-06, + "loss": 2.4022, + "step": 17687 + }, + { + "epoch": 1.4274876926801712, + "grad_norm": 0.7078632712364197, + "learning_rate": 6.604292274412249e-06, + "loss": 2.4259, + "step": 17688 + }, + { + "epoch": 1.427568396416754, + "grad_norm": 0.6485830545425415, + "learning_rate": 6.598651434729764e-06, + "loss": 2.3641, + "step": 17689 + }, + { + "epoch": 1.4276491001533371, + "grad_norm": 0.7130312919616699, + "learning_rate": 6.593012922853048e-06, + "loss": 2.3965, + "step": 17690 + }, + { + "epoch": 1.4277298038899202, + "grad_norm": 0.6736258268356323, + "learning_rate": 6.587376738922613e-06, + "loss": 2.3729, + "step": 17691 + }, + { + "epoch": 1.427810507626503, + "grad_norm": 0.6798346638679504, + "learning_rate": 6.581742883078923e-06, + "loss": 2.4479, + "step": 17692 + }, + { + "epoch": 1.4278912113630862, + "grad_norm": 0.6962637901306152, + "learning_rate": 6.576111355462411e-06, + "loss": 2.4433, + "step": 17693 + }, + { + "epoch": 1.4279719150996693, + "grad_norm": 0.6981319785118103, + "learning_rate": 6.570482156213431e-06, + "loss": 2.4564, + "step": 17694 + }, + { + "epoch": 1.4280526188362521, + "grad_norm": 0.6484888195991516, + "learning_rate": 6.564855285472238e-06, + "loss": 2.3709, + "step": 17695 + }, + { + "epoch": 1.428133322572835, + "grad_norm": 0.6646093726158142, + "learning_rate": 6.5592307433791074e-06, + "loss": 2.3716, + "step": 17696 + }, + { + "epoch": 1.428214026309418, + "grad_norm": 0.7607010006904602, + "learning_rate": 6.5536085300742065e-06, + "loss": 2.4029, + "step": 17697 + }, + { + "epoch": 1.4282947300460012, + "grad_norm": 0.7242185473442078, + "learning_rate": 6.547988645697644e-06, + "loss": 2.4091, + "step": 17698 + }, + { + "epoch": 1.428375433782584, + "grad_norm": 0.7394922375679016, + "learning_rate": 6.542371090389487e-06, + "loss": 2.4288, + "step": 17699 + }, + { + "epoch": 1.4284561375191671, + "grad_norm": 0.6763161420822144, + "learning_rate": 6.536755864289745e-06, + "loss": 2.3556, + "step": 17700 + }, + { + "epoch": 1.4285368412557502, + "grad_norm": 0.6837669610977173, + "learning_rate": 6.531142967538362e-06, + "loss": 2.4312, + "step": 17701 + }, + { + "epoch": 1.428617544992333, + "grad_norm": 0.6702602505683899, + "learning_rate": 6.525532400275225e-06, + "loss": 2.4144, + "step": 17702 + }, + { + "epoch": 1.4286982487289162, + "grad_norm": 0.7338566780090332, + "learning_rate": 6.519924162640167e-06, + "loss": 2.4536, + "step": 17703 + }, + { + "epoch": 1.4287789524654992, + "grad_norm": 0.7169400453567505, + "learning_rate": 6.514318254772967e-06, + "loss": 2.4236, + "step": 17704 + }, + { + "epoch": 1.428859656202082, + "grad_norm": 0.7129381895065308, + "learning_rate": 6.508714676813321e-06, + "loss": 2.393, + "step": 17705 + }, + { + "epoch": 1.4289403599386652, + "grad_norm": 0.7212249636650085, + "learning_rate": 6.503113428900898e-06, + "loss": 2.3907, + "step": 17706 + }, + { + "epoch": 1.4290210636752483, + "grad_norm": 0.7539047002792358, + "learning_rate": 6.497514511175296e-06, + "loss": 2.434, + "step": 17707 + }, + { + "epoch": 1.4291017674118311, + "grad_norm": 0.6876792907714844, + "learning_rate": 6.491917923776048e-06, + "loss": 2.4172, + "step": 17708 + }, + { + "epoch": 1.4291824711484142, + "grad_norm": 0.6665194034576416, + "learning_rate": 6.486323666842631e-06, + "loss": 2.4277, + "step": 17709 + }, + { + "epoch": 1.429263174884997, + "grad_norm": 0.7311907410621643, + "learning_rate": 6.4807317405144675e-06, + "loss": 2.4201, + "step": 17710 + }, + { + "epoch": 1.4293438786215802, + "grad_norm": 0.6492041349411011, + "learning_rate": 6.475142144930946e-06, + "loss": 2.425, + "step": 17711 + }, + { + "epoch": 1.429424582358163, + "grad_norm": 0.7610225677490234, + "learning_rate": 6.469554880231343e-06, + "loss": 2.4694, + "step": 17712 + }, + { + "epoch": 1.4295052860947461, + "grad_norm": 0.7112852931022644, + "learning_rate": 6.463969946554948e-06, + "loss": 2.4431, + "step": 17713 + }, + { + "epoch": 1.4295859898313292, + "grad_norm": 0.6712578535079956, + "learning_rate": 6.458387344040917e-06, + "loss": 2.4067, + "step": 17714 + }, + { + "epoch": 1.429666693567912, + "grad_norm": 0.6936217546463013, + "learning_rate": 6.452807072828393e-06, + "loss": 2.4229, + "step": 17715 + }, + { + "epoch": 1.4297473973044952, + "grad_norm": 0.6615330576896667, + "learning_rate": 6.4472291330564535e-06, + "loss": 2.3567, + "step": 17716 + }, + { + "epoch": 1.4298281010410783, + "grad_norm": 0.7209796905517578, + "learning_rate": 6.441653524864111e-06, + "loss": 2.3577, + "step": 17717 + }, + { + "epoch": 1.4299088047776611, + "grad_norm": 0.7022082805633545, + "learning_rate": 6.436080248390319e-06, + "loss": 2.3681, + "step": 17718 + }, + { + "epoch": 1.4299895085142442, + "grad_norm": 0.6859815120697021, + "learning_rate": 6.430509303773991e-06, + "loss": 2.4193, + "step": 17719 + }, + { + "epoch": 1.4300702122508273, + "grad_norm": 0.7126015424728394, + "learning_rate": 6.424940691153969e-06, + "loss": 2.3746, + "step": 17720 + }, + { + "epoch": 1.4301509159874102, + "grad_norm": 0.6499980092048645, + "learning_rate": 6.419374410669021e-06, + "loss": 2.445, + "step": 17721 + }, + { + "epoch": 1.4302316197239933, + "grad_norm": 0.6867473125457764, + "learning_rate": 6.413810462457892e-06, + "loss": 2.3323, + "step": 17722 + }, + { + "epoch": 1.4303123234605764, + "grad_norm": 0.7272062301635742, + "learning_rate": 6.4082488466592596e-06, + "loss": 2.4058, + "step": 17723 + }, + { + "epoch": 1.4303930271971592, + "grad_norm": 0.7681101560592651, + "learning_rate": 6.40268956341169e-06, + "loss": 2.4534, + "step": 17724 + }, + { + "epoch": 1.4304737309337423, + "grad_norm": 0.8149757981300354, + "learning_rate": 6.397132612853773e-06, + "loss": 2.4165, + "step": 17725 + }, + { + "epoch": 1.4305544346703252, + "grad_norm": 0.6749057769775391, + "learning_rate": 6.39157799512401e-06, + "loss": 2.364, + "step": 17726 + }, + { + "epoch": 1.4306351384069083, + "grad_norm": 0.716894268989563, + "learning_rate": 6.386025710360799e-06, + "loss": 2.4379, + "step": 17727 + }, + { + "epoch": 1.4307158421434911, + "grad_norm": 0.738310694694519, + "learning_rate": 6.380475758702531e-06, + "loss": 2.3938, + "step": 17728 + }, + { + "epoch": 1.4307965458800742, + "grad_norm": 0.7101424336433411, + "learning_rate": 6.3749281402875505e-06, + "loss": 2.4629, + "step": 17729 + }, + { + "epoch": 1.4308772496166573, + "grad_norm": 0.6945566534996033, + "learning_rate": 6.369382855254069e-06, + "loss": 2.4235, + "step": 17730 + }, + { + "epoch": 1.4309579533532402, + "grad_norm": 0.7886360287666321, + "learning_rate": 6.363839903740332e-06, + "loss": 2.4284, + "step": 17731 + }, + { + "epoch": 1.4310386570898233, + "grad_norm": 0.7391656637191772, + "learning_rate": 6.358299285884495e-06, + "loss": 2.379, + "step": 17732 + }, + { + "epoch": 1.4311193608264063, + "grad_norm": 0.6601181626319885, + "learning_rate": 6.352761001824603e-06, + "loss": 2.3646, + "step": 17733 + }, + { + "epoch": 1.4312000645629892, + "grad_norm": 0.7043817043304443, + "learning_rate": 6.347225051698702e-06, + "loss": 2.4055, + "step": 17734 + }, + { + "epoch": 1.4312807682995723, + "grad_norm": 0.7078529000282288, + "learning_rate": 6.341691435644759e-06, + "loss": 2.3811, + "step": 17735 + }, + { + "epoch": 1.4313614720361554, + "grad_norm": 0.7172150015830994, + "learning_rate": 6.336160153800707e-06, + "loss": 2.3854, + "step": 17736 + }, + { + "epoch": 1.4314421757727382, + "grad_norm": 0.6997926235198975, + "learning_rate": 6.330631206304383e-06, + "loss": 2.3534, + "step": 17737 + }, + { + "epoch": 1.4315228795093213, + "grad_norm": 0.7089913487434387, + "learning_rate": 6.325104593293563e-06, + "loss": 2.4508, + "step": 17738 + }, + { + "epoch": 1.4316035832459044, + "grad_norm": 0.7183980345726013, + "learning_rate": 6.319580314906037e-06, + "loss": 2.3972, + "step": 17739 + }, + { + "epoch": 1.4316842869824873, + "grad_norm": 0.6621310710906982, + "learning_rate": 6.3140583712794295e-06, + "loss": 2.3512, + "step": 17740 + }, + { + "epoch": 1.4317649907190704, + "grad_norm": 0.7076746821403503, + "learning_rate": 6.308538762551386e-06, + "loss": 2.4544, + "step": 17741 + }, + { + "epoch": 1.4318456944556532, + "grad_norm": 0.7050352692604065, + "learning_rate": 6.303021488859462e-06, + "loss": 2.3314, + "step": 17742 + }, + { + "epoch": 1.4319263981922363, + "grad_norm": 0.7305126190185547, + "learning_rate": 6.297506550341181e-06, + "loss": 2.4232, + "step": 17743 + }, + { + "epoch": 1.4320071019288192, + "grad_norm": 0.7779221534729004, + "learning_rate": 6.291993947133967e-06, + "loss": 2.4861, + "step": 17744 + }, + { + "epoch": 1.4320878056654023, + "grad_norm": 0.7207643389701843, + "learning_rate": 6.286483679375244e-06, + "loss": 2.4184, + "step": 17745 + }, + { + "epoch": 1.4321685094019854, + "grad_norm": 0.7540406584739685, + "learning_rate": 6.280975747202289e-06, + "loss": 2.4741, + "step": 17746 + }, + { + "epoch": 1.4322492131385682, + "grad_norm": 0.7011128067970276, + "learning_rate": 6.275470150752416e-06, + "loss": 2.3661, + "step": 17747 + }, + { + "epoch": 1.4323299168751513, + "grad_norm": 0.666495680809021, + "learning_rate": 6.269966890162837e-06, + "loss": 2.4294, + "step": 17748 + }, + { + "epoch": 1.4324106206117344, + "grad_norm": 0.7928789854049683, + "learning_rate": 6.264465965570676e-06, + "loss": 2.3722, + "step": 17749 + }, + { + "epoch": 1.4324913243483173, + "grad_norm": 0.778322160243988, + "learning_rate": 6.258967377113056e-06, + "loss": 2.4365, + "step": 17750 + }, + { + "epoch": 1.4325720280849004, + "grad_norm": 0.7157254815101624, + "learning_rate": 6.2534711249270015e-06, + "loss": 2.4222, + "step": 17751 + }, + { + "epoch": 1.4326527318214834, + "grad_norm": 0.752855122089386, + "learning_rate": 6.247977209149514e-06, + "loss": 2.4195, + "step": 17752 + }, + { + "epoch": 1.4327334355580663, + "grad_norm": 0.6898384690284729, + "learning_rate": 6.242485629917494e-06, + "loss": 2.372, + "step": 17753 + }, + { + "epoch": 1.4328141392946494, + "grad_norm": 0.6400893330574036, + "learning_rate": 6.236996387367822e-06, + "loss": 2.3678, + "step": 17754 + }, + { + "epoch": 1.4328948430312323, + "grad_norm": 0.6957802176475525, + "learning_rate": 6.23150948163731e-06, + "loss": 2.4423, + "step": 17755 + }, + { + "epoch": 1.4329755467678154, + "grad_norm": 0.6983963251113892, + "learning_rate": 6.226024912862683e-06, + "loss": 2.3467, + "step": 17756 + }, + { + "epoch": 1.4330562505043982, + "grad_norm": 0.697910487651825, + "learning_rate": 6.220542681180652e-06, + "loss": 2.3676, + "step": 17757 + }, + { + "epoch": 1.4331369542409813, + "grad_norm": 0.6732818484306335, + "learning_rate": 6.215062786727843e-06, + "loss": 2.4259, + "step": 17758 + }, + { + "epoch": 1.4332176579775644, + "grad_norm": 0.6379408240318298, + "learning_rate": 6.209585229640813e-06, + "loss": 2.409, + "step": 17759 + }, + { + "epoch": 1.4332983617141473, + "grad_norm": 0.6726407408714294, + "learning_rate": 6.2041100100560856e-06, + "loss": 2.3732, + "step": 17760 + }, + { + "epoch": 1.4333790654507303, + "grad_norm": 0.7126357555389404, + "learning_rate": 6.19863712811013e-06, + "loss": 2.4324, + "step": 17761 + }, + { + "epoch": 1.4334597691873134, + "grad_norm": 0.7055345773696899, + "learning_rate": 6.193166583939336e-06, + "loss": 2.463, + "step": 17762 + }, + { + "epoch": 1.4335404729238963, + "grad_norm": 0.6864510774612427, + "learning_rate": 6.18769837768004e-06, + "loss": 2.4155, + "step": 17763 + }, + { + "epoch": 1.4336211766604794, + "grad_norm": 0.7269968390464783, + "learning_rate": 6.182232509468544e-06, + "loss": 2.4197, + "step": 17764 + }, + { + "epoch": 1.4337018803970625, + "grad_norm": 0.7829548716545105, + "learning_rate": 6.176768979441039e-06, + "loss": 2.4054, + "step": 17765 + }, + { + "epoch": 1.4337825841336453, + "grad_norm": 0.6840609312057495, + "learning_rate": 6.171307787733704e-06, + "loss": 2.4177, + "step": 17766 + }, + { + "epoch": 1.4338632878702284, + "grad_norm": 0.7106159925460815, + "learning_rate": 6.165848934482654e-06, + "loss": 2.4039, + "step": 17767 + }, + { + "epoch": 1.4339439916068115, + "grad_norm": 0.6945303082466125, + "learning_rate": 6.160392419823957e-06, + "loss": 2.45, + "step": 17768 + }, + { + "epoch": 1.4340246953433944, + "grad_norm": 0.6924156546592712, + "learning_rate": 6.15493824389356e-06, + "loss": 2.4059, + "step": 17769 + }, + { + "epoch": 1.4341053990799775, + "grad_norm": 0.6932214498519897, + "learning_rate": 6.149486406827409e-06, + "loss": 2.4046, + "step": 17770 + }, + { + "epoch": 1.4341861028165603, + "grad_norm": 0.6683449149131775, + "learning_rate": 6.144036908761386e-06, + "loss": 2.4074, + "step": 17771 + }, + { + "epoch": 1.4342668065531434, + "grad_norm": 0.7230218052864075, + "learning_rate": 6.138589749831314e-06, + "loss": 2.3718, + "step": 17772 + }, + { + "epoch": 1.4343475102897263, + "grad_norm": 0.68938809633255, + "learning_rate": 6.133144930172929e-06, + "loss": 2.3776, + "step": 17773 + }, + { + "epoch": 1.4344282140263094, + "grad_norm": 0.6659870743751526, + "learning_rate": 6.127702449921968e-06, + "loss": 2.3779, + "step": 17774 + }, + { + "epoch": 1.4345089177628925, + "grad_norm": 0.7351429462432861, + "learning_rate": 6.122262309214033e-06, + "loss": 2.334, + "step": 17775 + }, + { + "epoch": 1.4345896214994753, + "grad_norm": 0.6995889544487, + "learning_rate": 6.116824508184715e-06, + "loss": 2.4139, + "step": 17776 + }, + { + "epoch": 1.4346703252360584, + "grad_norm": 0.6568582653999329, + "learning_rate": 6.111389046969551e-06, + "loss": 2.4348, + "step": 17777 + }, + { + "epoch": 1.4347510289726415, + "grad_norm": 0.7047903537750244, + "learning_rate": 6.1059559257039985e-06, + "loss": 2.3877, + "step": 17778 + }, + { + "epoch": 1.4348317327092244, + "grad_norm": 0.7299826145172119, + "learning_rate": 6.10052514452345e-06, + "loss": 2.4533, + "step": 17779 + }, + { + "epoch": 1.4349124364458075, + "grad_norm": 0.6617172956466675, + "learning_rate": 6.095096703563296e-06, + "loss": 2.4276, + "step": 17780 + }, + { + "epoch": 1.4349931401823905, + "grad_norm": 0.7248536944389343, + "learning_rate": 6.089670602958775e-06, + "loss": 2.4145, + "step": 17781 + }, + { + "epoch": 1.4350738439189734, + "grad_norm": 0.7404766082763672, + "learning_rate": 6.084246842845154e-06, + "loss": 2.4556, + "step": 17782 + }, + { + "epoch": 1.4351545476555565, + "grad_norm": 0.6808308362960815, + "learning_rate": 6.0788254233576035e-06, + "loss": 2.3648, + "step": 17783 + }, + { + "epoch": 1.4352352513921396, + "grad_norm": 0.6631487011909485, + "learning_rate": 6.073406344631249e-06, + "loss": 2.4064, + "step": 17784 + }, + { + "epoch": 1.4353159551287225, + "grad_norm": 0.6690654158592224, + "learning_rate": 6.067989606801128e-06, + "loss": 2.4749, + "step": 17785 + }, + { + "epoch": 1.4353966588653055, + "grad_norm": 0.6438129544258118, + "learning_rate": 6.062575210002241e-06, + "loss": 2.424, + "step": 17786 + }, + { + "epoch": 1.4354773626018884, + "grad_norm": 0.710590124130249, + "learning_rate": 6.05716315436955e-06, + "loss": 2.4419, + "step": 17787 + }, + { + "epoch": 1.4355580663384715, + "grad_norm": 0.72870272397995, + "learning_rate": 6.0517534400378995e-06, + "loss": 2.4341, + "step": 17788 + }, + { + "epoch": 1.4356387700750544, + "grad_norm": 0.6548538208007812, + "learning_rate": 6.04634606714215e-06, + "loss": 2.3721, + "step": 17789 + }, + { + "epoch": 1.4357194738116374, + "grad_norm": 0.7368030548095703, + "learning_rate": 6.040941035817061e-06, + "loss": 2.461, + "step": 17790 + }, + { + "epoch": 1.4358001775482205, + "grad_norm": 0.7763129472732544, + "learning_rate": 6.035538346197311e-06, + "loss": 2.4701, + "step": 17791 + }, + { + "epoch": 1.4358808812848034, + "grad_norm": 0.7631728649139404, + "learning_rate": 6.030137998417573e-06, + "loss": 2.4796, + "step": 17792 + }, + { + "epoch": 1.4359615850213865, + "grad_norm": 0.7032707929611206, + "learning_rate": 6.024739992612449e-06, + "loss": 2.4119, + "step": 17793 + }, + { + "epoch": 1.4360422887579696, + "grad_norm": 0.701252818107605, + "learning_rate": 6.019344328916454e-06, + "loss": 2.4501, + "step": 17794 + }, + { + "epoch": 1.4361229924945524, + "grad_norm": 0.7271695733070374, + "learning_rate": 6.013951007464058e-06, + "loss": 2.4136, + "step": 17795 + }, + { + "epoch": 1.4362036962311355, + "grad_norm": 0.6560700535774231, + "learning_rate": 6.0085600283897095e-06, + "loss": 2.3737, + "step": 17796 + }, + { + "epoch": 1.4362843999677186, + "grad_norm": 0.6831890344619751, + "learning_rate": 6.003171391827722e-06, + "loss": 2.3986, + "step": 17797 + }, + { + "epoch": 1.4363651037043015, + "grad_norm": 0.6875705718994141, + "learning_rate": 5.997785097912412e-06, + "loss": 2.4159, + "step": 17798 + }, + { + "epoch": 1.4364458074408846, + "grad_norm": 0.704727053642273, + "learning_rate": 5.992401146778026e-06, + "loss": 2.3833, + "step": 17799 + }, + { + "epoch": 1.4365265111774674, + "grad_norm": 0.6632246971130371, + "learning_rate": 5.987019538558758e-06, + "loss": 2.3907, + "step": 17800 + }, + { + "epoch": 1.4366072149140505, + "grad_norm": 0.7065477967262268, + "learning_rate": 5.981640273388689e-06, + "loss": 2.3473, + "step": 17801 + }, + { + "epoch": 1.4366879186506334, + "grad_norm": 0.6765400171279907, + "learning_rate": 5.976263351401923e-06, + "loss": 2.4051, + "step": 17802 + }, + { + "epoch": 1.4367686223872165, + "grad_norm": 0.6867364645004272, + "learning_rate": 5.9708887727324525e-06, + "loss": 2.3452, + "step": 17803 + }, + { + "epoch": 1.4368493261237996, + "grad_norm": 0.644715428352356, + "learning_rate": 5.965516537514215e-06, + "loss": 2.3826, + "step": 17804 + }, + { + "epoch": 1.4369300298603824, + "grad_norm": 0.7649596333503723, + "learning_rate": 5.9601466458811265e-06, + "loss": 2.436, + "step": 17805 + }, + { + "epoch": 1.4370107335969655, + "grad_norm": 0.699653148651123, + "learning_rate": 5.954779097967023e-06, + "loss": 2.3694, + "step": 17806 + }, + { + "epoch": 1.4370914373335486, + "grad_norm": 0.7054964900016785, + "learning_rate": 5.949413893905642e-06, + "loss": 2.4194, + "step": 17807 + }, + { + "epoch": 1.4371721410701315, + "grad_norm": 0.7534568309783936, + "learning_rate": 5.944051033830722e-06, + "loss": 2.4175, + "step": 17808 + }, + { + "epoch": 1.4372528448067146, + "grad_norm": 0.7056108117103577, + "learning_rate": 5.9386905178759225e-06, + "loss": 2.4232, + "step": 17809 + }, + { + "epoch": 1.4373335485432976, + "grad_norm": 0.6868974566459656, + "learning_rate": 5.933332346174825e-06, + "loss": 2.3799, + "step": 17810 + }, + { + "epoch": 1.4374142522798805, + "grad_norm": 0.7155748009681702, + "learning_rate": 5.927976518860978e-06, + "loss": 2.4151, + "step": 17811 + }, + { + "epoch": 1.4374949560164636, + "grad_norm": 0.7482681274414062, + "learning_rate": 5.922623036067853e-06, + "loss": 2.4568, + "step": 17812 + }, + { + "epoch": 1.4375756597530467, + "grad_norm": 0.6348850727081299, + "learning_rate": 5.917271897928889e-06, + "loss": 2.4202, + "step": 17813 + }, + { + "epoch": 1.4376563634896296, + "grad_norm": 0.7463829517364502, + "learning_rate": 5.911923104577455e-06, + "loss": 2.4288, + "step": 17814 + }, + { + "epoch": 1.4377370672262126, + "grad_norm": 0.7019917964935303, + "learning_rate": 5.9065766561468335e-06, + "loss": 2.475, + "step": 17815 + }, + { + "epoch": 1.4378177709627955, + "grad_norm": 0.7005626559257507, + "learning_rate": 5.9012325527702975e-06, + "loss": 2.3869, + "step": 17816 + }, + { + "epoch": 1.4378984746993786, + "grad_norm": 0.7216863632202148, + "learning_rate": 5.895890794581016e-06, + "loss": 2.4224, + "step": 17817 + }, + { + "epoch": 1.4379791784359615, + "grad_norm": 0.7037425637245178, + "learning_rate": 5.890551381712128e-06, + "loss": 2.4347, + "step": 17818 + }, + { + "epoch": 1.4380598821725445, + "grad_norm": 0.7240646481513977, + "learning_rate": 5.8852143142967055e-06, + "loss": 2.4275, + "step": 17819 + }, + { + "epoch": 1.4381405859091276, + "grad_norm": 0.6970441937446594, + "learning_rate": 5.879879592467763e-06, + "loss": 2.4526, + "step": 17820 + }, + { + "epoch": 1.4382212896457105, + "grad_norm": 0.6941537857055664, + "learning_rate": 5.8745472163582395e-06, + "loss": 2.4882, + "step": 17821 + }, + { + "epoch": 1.4383019933822936, + "grad_norm": 0.668228030204773, + "learning_rate": 5.86921718610105e-06, + "loss": 2.3824, + "step": 17822 + }, + { + "epoch": 1.4383826971188767, + "grad_norm": 0.6851341128349304, + "learning_rate": 5.863889501829034e-06, + "loss": 2.3931, + "step": 17823 + }, + { + "epoch": 1.4384634008554595, + "grad_norm": 0.6785841584205627, + "learning_rate": 5.858564163674962e-06, + "loss": 2.4268, + "step": 17824 + }, + { + "epoch": 1.4385441045920426, + "grad_norm": 0.7137345671653748, + "learning_rate": 5.853241171771573e-06, + "loss": 2.3509, + "step": 17825 + }, + { + "epoch": 1.4386248083286257, + "grad_norm": 0.7188790440559387, + "learning_rate": 5.847920526251505e-06, + "loss": 2.422, + "step": 17826 + }, + { + "epoch": 1.4387055120652086, + "grad_norm": 0.6798515915870667, + "learning_rate": 5.842602227247374e-06, + "loss": 2.3917, + "step": 17827 + }, + { + "epoch": 1.4387862158017917, + "grad_norm": 0.7113839387893677, + "learning_rate": 5.837286274891718e-06, + "loss": 2.4119, + "step": 17828 + }, + { + "epoch": 1.4388669195383748, + "grad_norm": 0.6735878586769104, + "learning_rate": 5.831972669317054e-06, + "loss": 2.3973, + "step": 17829 + }, + { + "epoch": 1.4389476232749576, + "grad_norm": 0.6665332913398743, + "learning_rate": 5.8266614106557645e-06, + "loss": 2.3567, + "step": 17830 + }, + { + "epoch": 1.4390283270115407, + "grad_norm": 0.6652774214744568, + "learning_rate": 5.821352499040256e-06, + "loss": 2.4022, + "step": 17831 + }, + { + "epoch": 1.4391090307481236, + "grad_norm": 0.672563910484314, + "learning_rate": 5.8160459346028205e-06, + "loss": 2.4142, + "step": 17832 + }, + { + "epoch": 1.4391897344847067, + "grad_norm": 0.6333127021789551, + "learning_rate": 5.8107417174757205e-06, + "loss": 2.3679, + "step": 17833 + }, + { + "epoch": 1.4392704382212895, + "grad_norm": 0.7484139204025269, + "learning_rate": 5.80543984779115e-06, + "loss": 2.408, + "step": 17834 + }, + { + "epoch": 1.4393511419578726, + "grad_norm": 0.687872052192688, + "learning_rate": 5.800140325681269e-06, + "loss": 2.3956, + "step": 17835 + }, + { + "epoch": 1.4394318456944557, + "grad_norm": 0.716371476650238, + "learning_rate": 5.794843151278107e-06, + "loss": 2.4134, + "step": 17836 + }, + { + "epoch": 1.4395125494310386, + "grad_norm": 0.7058377265930176, + "learning_rate": 5.789548324713711e-06, + "loss": 2.3758, + "step": 17837 + }, + { + "epoch": 1.4395932531676217, + "grad_norm": 0.6678213477134705, + "learning_rate": 5.784255846120057e-06, + "loss": 2.437, + "step": 17838 + }, + { + "epoch": 1.4396739569042047, + "grad_norm": 0.659657895565033, + "learning_rate": 5.778965715629015e-06, + "loss": 2.4551, + "step": 17839 + }, + { + "epoch": 1.4397546606407876, + "grad_norm": 0.7233473062515259, + "learning_rate": 5.773677933372445e-06, + "loss": 2.422, + "step": 17840 + }, + { + "epoch": 1.4398353643773707, + "grad_norm": 0.6661399006843567, + "learning_rate": 5.768392499482144e-06, + "loss": 2.4354, + "step": 17841 + }, + { + "epoch": 1.4399160681139538, + "grad_norm": 0.700758695602417, + "learning_rate": 5.763109414089807e-06, + "loss": 2.4248, + "step": 17842 + }, + { + "epoch": 1.4399967718505366, + "grad_norm": 0.7119004130363464, + "learning_rate": 5.757828677327104e-06, + "loss": 2.4281, + "step": 17843 + }, + { + "epoch": 1.4400774755871197, + "grad_norm": 0.6928756237030029, + "learning_rate": 5.752550289325687e-06, + "loss": 2.431, + "step": 17844 + }, + { + "epoch": 1.4401581793237028, + "grad_norm": 0.7062112092971802, + "learning_rate": 5.747274250217094e-06, + "loss": 2.3986, + "step": 17845 + }, + { + "epoch": 1.4402388830602857, + "grad_norm": 0.7257757782936096, + "learning_rate": 5.742000560132787e-06, + "loss": 2.398, + "step": 17846 + }, + { + "epoch": 1.4403195867968688, + "grad_norm": 0.7206892371177673, + "learning_rate": 5.736729219204218e-06, + "loss": 2.4126, + "step": 17847 + }, + { + "epoch": 1.4404002905334516, + "grad_norm": 0.6752306818962097, + "learning_rate": 5.73146022756278e-06, + "loss": 2.3732, + "step": 17848 + }, + { + "epoch": 1.4404809942700347, + "grad_norm": 0.6507758498191833, + "learning_rate": 5.726193585339756e-06, + "loss": 2.42, + "step": 17849 + }, + { + "epoch": 1.4405616980066176, + "grad_norm": 0.6858177781105042, + "learning_rate": 5.7209292926664325e-06, + "loss": 2.3956, + "step": 17850 + }, + { + "epoch": 1.4406424017432007, + "grad_norm": 0.7283064723014832, + "learning_rate": 5.715667349674003e-06, + "loss": 2.4295, + "step": 17851 + }, + { + "epoch": 1.4407231054797838, + "grad_norm": 0.7306254506111145, + "learning_rate": 5.710407756493597e-06, + "loss": 2.4017, + "step": 17852 + }, + { + "epoch": 1.4408038092163666, + "grad_norm": 0.6728531122207642, + "learning_rate": 5.7051505132562965e-06, + "loss": 2.3767, + "step": 17853 + }, + { + "epoch": 1.4408845129529497, + "grad_norm": 0.6739331483840942, + "learning_rate": 5.699895620093143e-06, + "loss": 2.4215, + "step": 17854 + }, + { + "epoch": 1.4409652166895328, + "grad_norm": 0.6646329760551453, + "learning_rate": 5.6946430771350975e-06, + "loss": 2.3565, + "step": 17855 + }, + { + "epoch": 1.4410459204261157, + "grad_norm": 0.7297715544700623, + "learning_rate": 5.6893928845130565e-06, + "loss": 2.4182, + "step": 17856 + }, + { + "epoch": 1.4411266241626988, + "grad_norm": 0.7202762961387634, + "learning_rate": 5.684145042357891e-06, + "loss": 2.4061, + "step": 17857 + }, + { + "epoch": 1.4412073278992819, + "grad_norm": 0.6860011219978333, + "learning_rate": 5.678899550800354e-06, + "loss": 2.4116, + "step": 17858 + }, + { + "epoch": 1.4412880316358647, + "grad_norm": 0.8249632120132446, + "learning_rate": 5.6736564099712064e-06, + "loss": 2.44, + "step": 17859 + }, + { + "epoch": 1.4413687353724478, + "grad_norm": 0.6403428912162781, + "learning_rate": 5.668415620001111e-06, + "loss": 2.4067, + "step": 17860 + }, + { + "epoch": 1.4414494391090307, + "grad_norm": 0.7119578123092651, + "learning_rate": 5.663177181020696e-06, + "loss": 2.4161, + "step": 17861 + }, + { + "epoch": 1.4415301428456138, + "grad_norm": 0.6670625805854797, + "learning_rate": 5.65794109316049e-06, + "loss": 2.4548, + "step": 17862 + }, + { + "epoch": 1.4416108465821966, + "grad_norm": 0.7028807997703552, + "learning_rate": 5.652707356551001e-06, + "loss": 2.4008, + "step": 17863 + }, + { + "epoch": 1.4416915503187797, + "grad_norm": 0.7150121331214905, + "learning_rate": 5.64747597132268e-06, + "loss": 2.3776, + "step": 17864 + }, + { + "epoch": 1.4417722540553628, + "grad_norm": 0.6778405904769897, + "learning_rate": 5.642246937605888e-06, + "loss": 2.4485, + "step": 17865 + }, + { + "epoch": 1.4418529577919457, + "grad_norm": 0.7118825316429138, + "learning_rate": 5.637020255530967e-06, + "loss": 2.3808, + "step": 17866 + }, + { + "epoch": 1.4419336615285288, + "grad_norm": 0.7020435929298401, + "learning_rate": 5.631795925228178e-06, + "loss": 2.3947, + "step": 17867 + }, + { + "epoch": 1.4420143652651118, + "grad_norm": 0.6727933287620544, + "learning_rate": 5.626573946827696e-06, + "loss": 2.3789, + "step": 17868 + }, + { + "epoch": 1.4420950690016947, + "grad_norm": 0.7938553690910339, + "learning_rate": 5.621354320459693e-06, + "loss": 2.4262, + "step": 17869 + }, + { + "epoch": 1.4421757727382778, + "grad_norm": 0.6903455853462219, + "learning_rate": 5.616137046254255e-06, + "loss": 2.3382, + "step": 17870 + }, + { + "epoch": 1.4422564764748609, + "grad_norm": 0.6873618960380554, + "learning_rate": 5.6109221243414e-06, + "loss": 2.3795, + "step": 17871 + }, + { + "epoch": 1.4423371802114437, + "grad_norm": 0.667328953742981, + "learning_rate": 5.60570955485109e-06, + "loss": 2.4353, + "step": 17872 + }, + { + "epoch": 1.4424178839480268, + "grad_norm": 0.7091758847236633, + "learning_rate": 5.600499337913256e-06, + "loss": 2.3897, + "step": 17873 + }, + { + "epoch": 1.44249858768461, + "grad_norm": 0.6954033374786377, + "learning_rate": 5.5952914736577375e-06, + "loss": 2.4334, + "step": 17874 + }, + { + "epoch": 1.4425792914211928, + "grad_norm": 0.692724347114563, + "learning_rate": 5.590085962214331e-06, + "loss": 2.3355, + "step": 17875 + }, + { + "epoch": 1.4426599951577759, + "grad_norm": 0.7159389853477478, + "learning_rate": 5.584882803712777e-06, + "loss": 2.4425, + "step": 17876 + }, + { + "epoch": 1.4427406988943587, + "grad_norm": 0.7154572606086731, + "learning_rate": 5.579681998282759e-06, + "loss": 2.4353, + "step": 17877 + }, + { + "epoch": 1.4428214026309418, + "grad_norm": 0.6575120687484741, + "learning_rate": 5.574483546053866e-06, + "loss": 2.4038, + "step": 17878 + }, + { + "epoch": 1.4429021063675247, + "grad_norm": 0.7108171582221985, + "learning_rate": 5.56928744715568e-06, + "loss": 2.3661, + "step": 17879 + }, + { + "epoch": 1.4429828101041078, + "grad_norm": 0.7755489349365234, + "learning_rate": 5.564093701717698e-06, + "loss": 2.4026, + "step": 17880 + }, + { + "epoch": 1.4430635138406909, + "grad_norm": 0.7044881582260132, + "learning_rate": 5.5589023098693625e-06, + "loss": 2.433, + "step": 17881 + }, + { + "epoch": 1.4431442175772737, + "grad_norm": 0.6959014534950256, + "learning_rate": 5.553713271740035e-06, + "loss": 2.3399, + "step": 17882 + }, + { + "epoch": 1.4432249213138568, + "grad_norm": 0.6273486614227295, + "learning_rate": 5.5485265874590685e-06, + "loss": 2.4085, + "step": 17883 + }, + { + "epoch": 1.44330562505044, + "grad_norm": 0.711344301700592, + "learning_rate": 5.5433422571557145e-06, + "loss": 2.5058, + "step": 17884 + }, + { + "epoch": 1.4433863287870228, + "grad_norm": 0.7118481397628784, + "learning_rate": 5.5381602809591815e-06, + "loss": 2.4213, + "step": 17885 + }, + { + "epoch": 1.4434670325236059, + "grad_norm": 0.6486421227455139, + "learning_rate": 5.5329806589986435e-06, + "loss": 2.4225, + "step": 17886 + }, + { + "epoch": 1.443547736260189, + "grad_norm": 0.6768030524253845, + "learning_rate": 5.527803391403141e-06, + "loss": 2.4155, + "step": 17887 + }, + { + "epoch": 1.4436284399967718, + "grad_norm": 0.6921476721763611, + "learning_rate": 5.522628478301739e-06, + "loss": 2.4487, + "step": 17888 + }, + { + "epoch": 1.443709143733355, + "grad_norm": 0.6598425507545471, + "learning_rate": 5.517455919823411e-06, + "loss": 2.3929, + "step": 17889 + }, + { + "epoch": 1.443789847469938, + "grad_norm": 0.6784876585006714, + "learning_rate": 5.512285716097043e-06, + "loss": 2.4357, + "step": 17890 + }, + { + "epoch": 1.4438705512065209, + "grad_norm": 0.6828306913375854, + "learning_rate": 5.507117867251521e-06, + "loss": 2.3931, + "step": 17891 + }, + { + "epoch": 1.443951254943104, + "grad_norm": 0.708244800567627, + "learning_rate": 5.5019523734156195e-06, + "loss": 2.3955, + "step": 17892 + }, + { + "epoch": 1.4440319586796868, + "grad_norm": 0.7499315142631531, + "learning_rate": 5.496789234718081e-06, + "loss": 2.4862, + "step": 17893 + }, + { + "epoch": 1.44411266241627, + "grad_norm": 0.6969838738441467, + "learning_rate": 5.491628451287601e-06, + "loss": 2.4367, + "step": 17894 + }, + { + "epoch": 1.4441933661528528, + "grad_norm": 0.6904775500297546, + "learning_rate": 5.486470023252777e-06, + "loss": 2.4772, + "step": 17895 + }, + { + "epoch": 1.4442740698894359, + "grad_norm": 0.7058213949203491, + "learning_rate": 5.481313950742195e-06, + "loss": 2.4059, + "step": 17896 + }, + { + "epoch": 1.444354773626019, + "grad_norm": 0.6824650764465332, + "learning_rate": 5.4761602338843425e-06, + "loss": 2.4058, + "step": 17897 + }, + { + "epoch": 1.4444354773626018, + "grad_norm": 0.6874315738677979, + "learning_rate": 5.471008872807648e-06, + "loss": 2.4055, + "step": 17898 + }, + { + "epoch": 1.444516181099185, + "grad_norm": 0.7096625566482544, + "learning_rate": 5.465859867640544e-06, + "loss": 2.4319, + "step": 17899 + }, + { + "epoch": 1.444596884835768, + "grad_norm": 0.6456719636917114, + "learning_rate": 5.460713218511304e-06, + "loss": 2.3403, + "step": 17900 + }, + { + "epoch": 1.4446775885723508, + "grad_norm": 0.6711640357971191, + "learning_rate": 5.4555689255482156e-06, + "loss": 2.4333, + "step": 17901 + }, + { + "epoch": 1.444758292308934, + "grad_norm": 0.6594802737236023, + "learning_rate": 5.450426988879509e-06, + "loss": 2.4027, + "step": 17902 + }, + { + "epoch": 1.444838996045517, + "grad_norm": 0.6931496858596802, + "learning_rate": 5.445287408633304e-06, + "loss": 2.4085, + "step": 17903 + }, + { + "epoch": 1.4449196997820999, + "grad_norm": 0.6932462453842163, + "learning_rate": 5.440150184937709e-06, + "loss": 2.3989, + "step": 17904 + }, + { + "epoch": 1.445000403518683, + "grad_norm": 0.7502899765968323, + "learning_rate": 5.435015317920744e-06, + "loss": 2.4083, + "step": 17905 + }, + { + "epoch": 1.4450811072552658, + "grad_norm": 0.6513844132423401, + "learning_rate": 5.429882807710396e-06, + "loss": 2.3895, + "step": 17906 + }, + { + "epoch": 1.445161810991849, + "grad_norm": 0.6809015274047852, + "learning_rate": 5.4247526544345835e-06, + "loss": 2.3957, + "step": 17907 + }, + { + "epoch": 1.4452425147284318, + "grad_norm": 0.6784202456474304, + "learning_rate": 5.419624858221151e-06, + "loss": 2.3735, + "step": 17908 + }, + { + "epoch": 1.4453232184650149, + "grad_norm": 0.8005407452583313, + "learning_rate": 5.414499419197916e-06, + "loss": 2.3888, + "step": 17909 + }, + { + "epoch": 1.445403922201598, + "grad_norm": 0.7133296728134155, + "learning_rate": 5.409376337492589e-06, + "loss": 2.4347, + "step": 17910 + }, + { + "epoch": 1.4454846259381808, + "grad_norm": 0.6852008104324341, + "learning_rate": 5.404255613232867e-06, + "loss": 2.4154, + "step": 17911 + }, + { + "epoch": 1.445565329674764, + "grad_norm": 0.7864294648170471, + "learning_rate": 5.399137246546393e-06, + "loss": 2.4104, + "step": 17912 + }, + { + "epoch": 1.445646033411347, + "grad_norm": 0.7150406837463379, + "learning_rate": 5.394021237560687e-06, + "loss": 2.4423, + "step": 17913 + }, + { + "epoch": 1.4457267371479299, + "grad_norm": 0.6756410598754883, + "learning_rate": 5.388907586403269e-06, + "loss": 2.4038, + "step": 17914 + }, + { + "epoch": 1.445807440884513, + "grad_norm": 0.662440836429596, + "learning_rate": 5.383796293201604e-06, + "loss": 2.3529, + "step": 17915 + }, + { + "epoch": 1.445888144621096, + "grad_norm": 0.7391942739486694, + "learning_rate": 5.378687358083057e-06, + "loss": 2.4062, + "step": 17916 + }, + { + "epoch": 1.445968848357679, + "grad_norm": 0.762143611907959, + "learning_rate": 5.373580781174958e-06, + "loss": 2.4344, + "step": 17917 + }, + { + "epoch": 1.446049552094262, + "grad_norm": 0.7365298867225647, + "learning_rate": 5.368476562604608e-06, + "loss": 2.4144, + "step": 17918 + }, + { + "epoch": 1.446130255830845, + "grad_norm": 0.7313491702079773, + "learning_rate": 5.3633747024991685e-06, + "loss": 2.3671, + "step": 17919 + }, + { + "epoch": 1.446210959567428, + "grad_norm": 0.7121514081954956, + "learning_rate": 5.358275200985818e-06, + "loss": 2.3573, + "step": 17920 + }, + { + "epoch": 1.446291663304011, + "grad_norm": 0.6716858744621277, + "learning_rate": 5.353178058191643e-06, + "loss": 2.4398, + "step": 17921 + }, + { + "epoch": 1.446372367040594, + "grad_norm": 0.7036706805229187, + "learning_rate": 5.348083274243687e-06, + "loss": 2.3913, + "step": 17922 + }, + { + "epoch": 1.446453070777177, + "grad_norm": 0.7855868935585022, + "learning_rate": 5.342990849268914e-06, + "loss": 2.4195, + "step": 17923 + }, + { + "epoch": 1.4465337745137599, + "grad_norm": 0.627890408039093, + "learning_rate": 5.337900783394245e-06, + "loss": 2.3954, + "step": 17924 + }, + { + "epoch": 1.446614478250343, + "grad_norm": 0.7047661542892456, + "learning_rate": 5.332813076746535e-06, + "loss": 2.5015, + "step": 17925 + }, + { + "epoch": 1.446695181986926, + "grad_norm": 0.6752549409866333, + "learning_rate": 5.327727729452592e-06, + "loss": 2.4384, + "step": 17926 + }, + { + "epoch": 1.446775885723509, + "grad_norm": 0.8034621477127075, + "learning_rate": 5.322644741639138e-06, + "loss": 2.444, + "step": 17927 + }, + { + "epoch": 1.446856589460092, + "grad_norm": 0.7055982947349548, + "learning_rate": 5.317564113432882e-06, + "loss": 2.4228, + "step": 17928 + }, + { + "epoch": 1.446937293196675, + "grad_norm": 0.7311068177223206, + "learning_rate": 5.312485844960424e-06, + "loss": 2.3979, + "step": 17929 + }, + { + "epoch": 1.447017996933258, + "grad_norm": 0.7067704796791077, + "learning_rate": 5.307409936348329e-06, + "loss": 2.3724, + "step": 17930 + }, + { + "epoch": 1.447098700669841, + "grad_norm": 0.7303062677383423, + "learning_rate": 5.302336387723128e-06, + "loss": 2.444, + "step": 17931 + }, + { + "epoch": 1.4471794044064241, + "grad_norm": 0.7445392608642578, + "learning_rate": 5.297265199211232e-06, + "loss": 2.4629, + "step": 17932 + }, + { + "epoch": 1.447260108143007, + "grad_norm": 0.6778857707977295, + "learning_rate": 5.2921963709390394e-06, + "loss": 2.3836, + "step": 17933 + }, + { + "epoch": 1.44734081187959, + "grad_norm": 0.6575925350189209, + "learning_rate": 5.287129903032873e-06, + "loss": 2.3851, + "step": 17934 + }, + { + "epoch": 1.4474215156161732, + "grad_norm": 0.736710250377655, + "learning_rate": 5.282065795619029e-06, + "loss": 2.4644, + "step": 17935 + }, + { + "epoch": 1.447502219352756, + "grad_norm": 0.6607224941253662, + "learning_rate": 5.277004048823686e-06, + "loss": 2.3838, + "step": 17936 + }, + { + "epoch": 1.4475829230893391, + "grad_norm": 0.6364536881446838, + "learning_rate": 5.271944662773021e-06, + "loss": 2.3929, + "step": 17937 + }, + { + "epoch": 1.447663626825922, + "grad_norm": 0.7810595631599426, + "learning_rate": 5.266887637593121e-06, + "loss": 2.3823, + "step": 17938 + }, + { + "epoch": 1.447744330562505, + "grad_norm": 0.6959996819496155, + "learning_rate": 5.261832973410008e-06, + "loss": 2.4392, + "step": 17939 + }, + { + "epoch": 1.447825034299088, + "grad_norm": 0.7112187147140503, + "learning_rate": 5.256780670349659e-06, + "loss": 2.356, + "step": 17940 + }, + { + "epoch": 1.447905738035671, + "grad_norm": 0.7003504633903503, + "learning_rate": 5.251730728538018e-06, + "loss": 2.4182, + "step": 17941 + }, + { + "epoch": 1.447986441772254, + "grad_norm": 0.7685346603393555, + "learning_rate": 5.246683148100906e-06, + "loss": 2.3814, + "step": 17942 + }, + { + "epoch": 1.448067145508837, + "grad_norm": 0.6874574422836304, + "learning_rate": 5.2416379291641336e-06, + "loss": 2.5082, + "step": 17943 + }, + { + "epoch": 1.44814784924542, + "grad_norm": 0.6901064515113831, + "learning_rate": 5.236595071853456e-06, + "loss": 2.484, + "step": 17944 + }, + { + "epoch": 1.4482285529820031, + "grad_norm": 0.7325465083122253, + "learning_rate": 5.231554576294528e-06, + "loss": 2.3479, + "step": 17945 + }, + { + "epoch": 1.448309256718586, + "grad_norm": 0.6547845005989075, + "learning_rate": 5.226516442612994e-06, + "loss": 2.4001, + "step": 17946 + }, + { + "epoch": 1.448389960455169, + "grad_norm": 0.7091573476791382, + "learning_rate": 5.221480670934431e-06, + "loss": 2.3743, + "step": 17947 + }, + { + "epoch": 1.4484706641917522, + "grad_norm": 0.6750717163085938, + "learning_rate": 5.216447261384306e-06, + "loss": 2.3841, + "step": 17948 + }, + { + "epoch": 1.448551367928335, + "grad_norm": 0.682778537273407, + "learning_rate": 5.2114162140880715e-06, + "loss": 2.3735, + "step": 17949 + }, + { + "epoch": 1.4486320716649181, + "grad_norm": 0.702796995639801, + "learning_rate": 5.206387529171153e-06, + "loss": 2.397, + "step": 17950 + }, + { + "epoch": 1.448712775401501, + "grad_norm": 0.7154842615127563, + "learning_rate": 5.2013612067588254e-06, + "loss": 2.4072, + "step": 17951 + }, + { + "epoch": 1.448793479138084, + "grad_norm": 0.7017061710357666, + "learning_rate": 5.1963372469763905e-06, + "loss": 2.3638, + "step": 17952 + }, + { + "epoch": 1.448874182874667, + "grad_norm": 0.7153539657592773, + "learning_rate": 5.191315649949047e-06, + "loss": 2.4159, + "step": 17953 + }, + { + "epoch": 1.44895488661125, + "grad_norm": 0.7425200939178467, + "learning_rate": 5.1862964158019615e-06, + "loss": 2.3536, + "step": 17954 + }, + { + "epoch": 1.4490355903478331, + "grad_norm": 0.6961267590522766, + "learning_rate": 5.1812795446602115e-06, + "loss": 2.4257, + "step": 17955 + }, + { + "epoch": 1.449116294084416, + "grad_norm": 0.6912462115287781, + "learning_rate": 5.176265036648808e-06, + "loss": 2.4573, + "step": 17956 + }, + { + "epoch": 1.449196997820999, + "grad_norm": 0.7435596585273743, + "learning_rate": 5.171252891892786e-06, + "loss": 2.4134, + "step": 17957 + }, + { + "epoch": 1.4492777015575822, + "grad_norm": 0.7270591259002686, + "learning_rate": 5.166243110517011e-06, + "loss": 2.3162, + "step": 17958 + }, + { + "epoch": 1.449358405294165, + "grad_norm": 0.6728709936141968, + "learning_rate": 5.161235692646349e-06, + "loss": 2.3991, + "step": 17959 + }, + { + "epoch": 1.4494391090307481, + "grad_norm": 0.6676486134529114, + "learning_rate": 5.156230638405624e-06, + "loss": 2.4215, + "step": 17960 + }, + { + "epoch": 1.4495198127673312, + "grad_norm": 0.7242336869239807, + "learning_rate": 5.1512279479195455e-06, + "loss": 2.4144, + "step": 17961 + }, + { + "epoch": 1.449600516503914, + "grad_norm": 0.6936756372451782, + "learning_rate": 5.146227621312804e-06, + "loss": 2.3752, + "step": 17962 + }, + { + "epoch": 1.4496812202404972, + "grad_norm": 0.7574671506881714, + "learning_rate": 5.141229658710034e-06, + "loss": 2.4536, + "step": 17963 + }, + { + "epoch": 1.4497619239770803, + "grad_norm": 0.6585906147956848, + "learning_rate": 5.136234060235767e-06, + "loss": 2.4192, + "step": 17964 + }, + { + "epoch": 1.4498426277136631, + "grad_norm": 0.7344881296157837, + "learning_rate": 5.131240826014516e-06, + "loss": 2.375, + "step": 17965 + }, + { + "epoch": 1.4499233314502462, + "grad_norm": 0.6896358132362366, + "learning_rate": 5.126249956170748e-06, + "loss": 2.3417, + "step": 17966 + }, + { + "epoch": 1.450004035186829, + "grad_norm": 0.7076104283332825, + "learning_rate": 5.1212614508288185e-06, + "loss": 2.4131, + "step": 17967 + }, + { + "epoch": 1.4500847389234122, + "grad_norm": 0.6901896595954895, + "learning_rate": 5.116275310113083e-06, + "loss": 2.4232, + "step": 17968 + }, + { + "epoch": 1.450165442659995, + "grad_norm": 0.7986876964569092, + "learning_rate": 5.111291534147788e-06, + "loss": 2.4545, + "step": 17969 + }, + { + "epoch": 1.4502461463965781, + "grad_norm": 0.723733127117157, + "learning_rate": 5.106310123057167e-06, + "loss": 2.3816, + "step": 17970 + }, + { + "epoch": 1.4503268501331612, + "grad_norm": 0.6440990567207336, + "learning_rate": 5.101331076965332e-06, + "loss": 2.3819, + "step": 17971 + }, + { + "epoch": 1.450407553869744, + "grad_norm": 0.718396782875061, + "learning_rate": 5.096354395996405e-06, + "loss": 2.406, + "step": 17972 + }, + { + "epoch": 1.4504882576063272, + "grad_norm": 0.6515427231788635, + "learning_rate": 5.0913800802744105e-06, + "loss": 2.4555, + "step": 17973 + }, + { + "epoch": 1.4505689613429102, + "grad_norm": 0.7006518244743347, + "learning_rate": 5.0864081299233035e-06, + "loss": 2.3532, + "step": 17974 + }, + { + "epoch": 1.4506496650794931, + "grad_norm": 0.6596084237098694, + "learning_rate": 5.081438545067019e-06, + "loss": 2.3521, + "step": 17975 + }, + { + "epoch": 1.4507303688160762, + "grad_norm": 0.7091804146766663, + "learning_rate": 5.076471325829413e-06, + "loss": 2.397, + "step": 17976 + }, + { + "epoch": 1.4508110725526593, + "grad_norm": 0.6768068671226501, + "learning_rate": 5.071506472334264e-06, + "loss": 2.3692, + "step": 17977 + }, + { + "epoch": 1.4508917762892422, + "grad_norm": 0.6937921643257141, + "learning_rate": 5.066543984705318e-06, + "loss": 2.4674, + "step": 17978 + }, + { + "epoch": 1.4509724800258252, + "grad_norm": 0.6987953186035156, + "learning_rate": 5.061583863066266e-06, + "loss": 2.388, + "step": 17979 + }, + { + "epoch": 1.4510531837624083, + "grad_norm": 0.7390346527099609, + "learning_rate": 5.056626107540708e-06, + "loss": 2.4279, + "step": 17980 + }, + { + "epoch": 1.4511338874989912, + "grad_norm": 0.6433011889457703, + "learning_rate": 5.05167071825221e-06, + "loss": 2.3897, + "step": 17981 + }, + { + "epoch": 1.4512145912355743, + "grad_norm": 0.6530279517173767, + "learning_rate": 5.046717695324288e-06, + "loss": 2.3794, + "step": 17982 + }, + { + "epoch": 1.4512952949721571, + "grad_norm": 0.7322575449943542, + "learning_rate": 5.041767038880363e-06, + "loss": 2.3391, + "step": 17983 + }, + { + "epoch": 1.4513759987087402, + "grad_norm": 0.7013799548149109, + "learning_rate": 5.036818749043825e-06, + "loss": 2.417, + "step": 17984 + }, + { + "epoch": 1.451456702445323, + "grad_norm": 0.6833368539810181, + "learning_rate": 5.031872825937989e-06, + "loss": 2.4109, + "step": 17985 + }, + { + "epoch": 1.4515374061819062, + "grad_norm": 0.6758227348327637, + "learning_rate": 5.026929269686143e-06, + "loss": 2.3913, + "step": 17986 + }, + { + "epoch": 1.4516181099184893, + "grad_norm": 0.6799556016921997, + "learning_rate": 5.021988080411477e-06, + "loss": 2.3963, + "step": 17987 + }, + { + "epoch": 1.4516988136550721, + "grad_norm": 0.670512318611145, + "learning_rate": 5.01704925823715e-06, + "loss": 2.4372, + "step": 17988 + }, + { + "epoch": 1.4517795173916552, + "grad_norm": 0.7226561903953552, + "learning_rate": 5.01211280328625e-06, + "loss": 2.3723, + "step": 17989 + }, + { + "epoch": 1.4518602211282383, + "grad_norm": 0.7119970917701721, + "learning_rate": 5.007178715681793e-06, + "loss": 2.454, + "step": 17990 + }, + { + "epoch": 1.4519409248648212, + "grad_norm": 0.670310378074646, + "learning_rate": 5.002246995546744e-06, + "loss": 2.4751, + "step": 17991 + }, + { + "epoch": 1.4520216286014043, + "grad_norm": 0.6663460731506348, + "learning_rate": 4.9973176430040515e-06, + "loss": 2.4779, + "step": 17992 + }, + { + "epoch": 1.4521023323379874, + "grad_norm": 0.72465980052948, + "learning_rate": 4.992390658176526e-06, + "loss": 2.429, + "step": 17993 + }, + { + "epoch": 1.4521830360745702, + "grad_norm": 0.7189087867736816, + "learning_rate": 4.987466041186972e-06, + "loss": 2.4086, + "step": 17994 + }, + { + "epoch": 1.4522637398111533, + "grad_norm": 0.6699924468994141, + "learning_rate": 4.982543792158134e-06, + "loss": 2.3932, + "step": 17995 + }, + { + "epoch": 1.4523444435477364, + "grad_norm": 0.6420440077781677, + "learning_rate": 4.977623911212681e-06, + "loss": 2.4164, + "step": 17996 + }, + { + "epoch": 1.4524251472843193, + "grad_norm": 0.6452329754829407, + "learning_rate": 4.972706398473237e-06, + "loss": 2.3391, + "step": 17997 + }, + { + "epoch": 1.4525058510209023, + "grad_norm": 0.6906129121780396, + "learning_rate": 4.967791254062359e-06, + "loss": 2.4345, + "step": 17998 + }, + { + "epoch": 1.4525865547574852, + "grad_norm": 0.6918602585792542, + "learning_rate": 4.96287847810254e-06, + "loss": 2.3304, + "step": 17999 + }, + { + "epoch": 1.4526672584940683, + "grad_norm": 0.727873682975769, + "learning_rate": 4.957968070716201e-06, + "loss": 2.417, + "step": 18000 + }, + { + "epoch": 1.4526672584940683, + "eval_loss": 2.3678998947143555, + "eval_runtime": 764.534, + "eval_samples_per_second": 3.427, + "eval_steps_per_second": 0.572, + "step": 18000 + }, + { + "epoch": 1.4527479622306512, + "grad_norm": 0.6551083922386169, + "learning_rate": 4.953060032025747e-06, + "loss": 2.3777, + "step": 18001 + }, + { + "epoch": 1.4528286659672343, + "grad_norm": 0.6975324153900146, + "learning_rate": 4.948154362153512e-06, + "loss": 2.4277, + "step": 18002 + }, + { + "epoch": 1.4529093697038173, + "grad_norm": 0.6673024892807007, + "learning_rate": 4.943251061221721e-06, + "loss": 2.3652, + "step": 18003 + }, + { + "epoch": 1.4529900734404002, + "grad_norm": 0.713287889957428, + "learning_rate": 4.938350129352587e-06, + "loss": 2.3868, + "step": 18004 + }, + { + "epoch": 1.4530707771769833, + "grad_norm": 0.6872570514678955, + "learning_rate": 4.9334515666682905e-06, + "loss": 2.3639, + "step": 18005 + }, + { + "epoch": 1.4531514809135664, + "grad_norm": 0.7270746827125549, + "learning_rate": 4.928555373290844e-06, + "loss": 2.4394, + "step": 18006 + }, + { + "epoch": 1.4532321846501493, + "grad_norm": 0.7313820123672485, + "learning_rate": 4.9236615493423395e-06, + "loss": 2.4312, + "step": 18007 + }, + { + "epoch": 1.4533128883867323, + "grad_norm": 0.7104899287223816, + "learning_rate": 4.918770094944736e-06, + "loss": 2.4121, + "step": 18008 + }, + { + "epoch": 1.4533935921233154, + "grad_norm": 0.6785389184951782, + "learning_rate": 4.913881010219912e-06, + "loss": 2.4871, + "step": 18009 + }, + { + "epoch": 1.4534742958598983, + "grad_norm": 0.71209716796875, + "learning_rate": 4.908994295289726e-06, + "loss": 2.4822, + "step": 18010 + }, + { + "epoch": 1.4535549995964814, + "grad_norm": 0.7160407900810242, + "learning_rate": 4.904109950275992e-06, + "loss": 2.4656, + "step": 18011 + }, + { + "epoch": 1.4536357033330642, + "grad_norm": 0.7023136615753174, + "learning_rate": 4.899227975300402e-06, + "loss": 2.4387, + "step": 18012 + }, + { + "epoch": 1.4537164070696473, + "grad_norm": 0.7554822564125061, + "learning_rate": 4.8943483704846475e-06, + "loss": 2.4355, + "step": 18013 + }, + { + "epoch": 1.4537971108062302, + "grad_norm": 0.685516893863678, + "learning_rate": 4.889471135950352e-06, + "loss": 2.4362, + "step": 18014 + }, + { + "epoch": 1.4538778145428133, + "grad_norm": 0.6651094555854797, + "learning_rate": 4.884596271819053e-06, + "loss": 2.4479, + "step": 18015 + }, + { + "epoch": 1.4539585182793964, + "grad_norm": 0.7710262537002563, + "learning_rate": 4.879723778212242e-06, + "loss": 2.4509, + "step": 18016 + }, + { + "epoch": 1.4540392220159792, + "grad_norm": 0.7243364453315735, + "learning_rate": 4.874853655251365e-06, + "loss": 2.4253, + "step": 18017 + }, + { + "epoch": 1.4541199257525623, + "grad_norm": 0.7639968395233154, + "learning_rate": 4.869985903057783e-06, + "loss": 2.3748, + "step": 18018 + }, + { + "epoch": 1.4542006294891454, + "grad_norm": 0.7307243347167969, + "learning_rate": 4.865120521752842e-06, + "loss": 2.4043, + "step": 18019 + }, + { + "epoch": 1.4542813332257283, + "grad_norm": 0.6940774321556091, + "learning_rate": 4.860257511457767e-06, + "loss": 2.3836, + "step": 18020 + }, + { + "epoch": 1.4543620369623114, + "grad_norm": 0.6808940172195435, + "learning_rate": 4.855396872293794e-06, + "loss": 2.4482, + "step": 18021 + }, + { + "epoch": 1.4544427406988945, + "grad_norm": 0.6618911027908325, + "learning_rate": 4.8505386043820265e-06, + "loss": 2.4141, + "step": 18022 + }, + { + "epoch": 1.4545234444354773, + "grad_norm": 0.7657433748245239, + "learning_rate": 4.845682707843569e-06, + "loss": 2.3576, + "step": 18023 + }, + { + "epoch": 1.4546041481720604, + "grad_norm": 0.7346564531326294, + "learning_rate": 4.840829182799434e-06, + "loss": 2.4335, + "step": 18024 + }, + { + "epoch": 1.4546848519086435, + "grad_norm": 0.6671693325042725, + "learning_rate": 4.83597802937058e-06, + "loss": 2.3965, + "step": 18025 + }, + { + "epoch": 1.4547655556452264, + "grad_norm": 0.7164655327796936, + "learning_rate": 4.831129247677913e-06, + "loss": 2.3631, + "step": 18026 + }, + { + "epoch": 1.4548462593818094, + "grad_norm": 0.6799946427345276, + "learning_rate": 4.826282837842278e-06, + "loss": 2.4018, + "step": 18027 + }, + { + "epoch": 1.4549269631183923, + "grad_norm": 0.6891220211982727, + "learning_rate": 4.821438799984457e-06, + "loss": 2.3942, + "step": 18028 + }, + { + "epoch": 1.4550076668549754, + "grad_norm": 0.6948480010032654, + "learning_rate": 4.816597134225187e-06, + "loss": 2.4359, + "step": 18029 + }, + { + "epoch": 1.4550883705915583, + "grad_norm": 0.7973241209983826, + "learning_rate": 4.8117578406851385e-06, + "loss": 2.4464, + "step": 18030 + }, + { + "epoch": 1.4551690743281414, + "grad_norm": 0.7553974390029907, + "learning_rate": 4.806920919484903e-06, + "loss": 2.3943, + "step": 18031 + }, + { + "epoch": 1.4552497780647244, + "grad_norm": 0.6626315116882324, + "learning_rate": 4.8020863707450185e-06, + "loss": 2.3603, + "step": 18032 + }, + { + "epoch": 1.4553304818013073, + "grad_norm": 0.6878045797348022, + "learning_rate": 4.79725419458601e-06, + "loss": 2.4646, + "step": 18033 + }, + { + "epoch": 1.4554111855378904, + "grad_norm": 0.7127307057380676, + "learning_rate": 4.792424391128292e-06, + "loss": 2.3914, + "step": 18034 + }, + { + "epoch": 1.4554918892744735, + "grad_norm": 0.6839823722839355, + "learning_rate": 4.787596960492224e-06, + "loss": 2.4282, + "step": 18035 + }, + { + "epoch": 1.4555725930110563, + "grad_norm": 0.6685464978218079, + "learning_rate": 4.782771902798122e-06, + "loss": 2.427, + "step": 18036 + }, + { + "epoch": 1.4556532967476394, + "grad_norm": 0.7302927374839783, + "learning_rate": 4.777949218166256e-06, + "loss": 2.4019, + "step": 18037 + }, + { + "epoch": 1.4557340004842225, + "grad_norm": 0.6756429672241211, + "learning_rate": 4.773128906716795e-06, + "loss": 2.4271, + "step": 18038 + }, + { + "epoch": 1.4558147042208054, + "grad_norm": 0.6744102835655212, + "learning_rate": 4.768310968569889e-06, + "loss": 2.4165, + "step": 18039 + }, + { + "epoch": 1.4558954079573885, + "grad_norm": 0.7034773826599121, + "learning_rate": 4.76349540384563e-06, + "loss": 2.4079, + "step": 18040 + }, + { + "epoch": 1.4559761116939716, + "grad_norm": 0.6483279466629028, + "learning_rate": 4.758682212664012e-06, + "loss": 2.3873, + "step": 18041 + }, + { + "epoch": 1.4560568154305544, + "grad_norm": 0.6655837893486023, + "learning_rate": 4.753871395144982e-06, + "loss": 2.4022, + "step": 18042 + }, + { + "epoch": 1.4561375191671375, + "grad_norm": 0.7327212691307068, + "learning_rate": 4.749062951408467e-06, + "loss": 2.4068, + "step": 18043 + }, + { + "epoch": 1.4562182229037204, + "grad_norm": 0.6827791333198547, + "learning_rate": 4.744256881574283e-06, + "loss": 2.4941, + "step": 18044 + }, + { + "epoch": 1.4562989266403035, + "grad_norm": 0.7078829407691956, + "learning_rate": 4.739453185762221e-06, + "loss": 2.4065, + "step": 18045 + }, + { + "epoch": 1.4563796303768863, + "grad_norm": 0.7201517820358276, + "learning_rate": 4.734651864091999e-06, + "loss": 2.3617, + "step": 18046 + }, + { + "epoch": 1.4564603341134694, + "grad_norm": 0.6765565872192383, + "learning_rate": 4.729852916683275e-06, + "loss": 2.4026, + "step": 18047 + }, + { + "epoch": 1.4565410378500525, + "grad_norm": 0.6781981587409973, + "learning_rate": 4.725056343655654e-06, + "loss": 2.4638, + "step": 18048 + }, + { + "epoch": 1.4566217415866354, + "grad_norm": 0.7230713367462158, + "learning_rate": 4.720262145128684e-06, + "loss": 2.382, + "step": 18049 + }, + { + "epoch": 1.4567024453232185, + "grad_norm": 0.918341338634491, + "learning_rate": 4.71547032122186e-06, + "loss": 2.447, + "step": 18050 + }, + { + "epoch": 1.4567831490598016, + "grad_norm": 0.683489978313446, + "learning_rate": 4.710680872054574e-06, + "loss": 2.4175, + "step": 18051 + }, + { + "epoch": 1.4568638527963844, + "grad_norm": 0.6769242882728577, + "learning_rate": 4.7058937977462085e-06, + "loss": 2.4192, + "step": 18052 + }, + { + "epoch": 1.4569445565329675, + "grad_norm": 0.681427001953125, + "learning_rate": 4.701109098416079e-06, + "loss": 2.4194, + "step": 18053 + }, + { + "epoch": 1.4570252602695506, + "grad_norm": 0.8209199905395508, + "learning_rate": 4.6963267741834235e-06, + "loss": 2.4703, + "step": 18054 + }, + { + "epoch": 1.4571059640061335, + "grad_norm": 0.6629942059516907, + "learning_rate": 4.691546825167425e-06, + "loss": 2.4278, + "step": 18055 + }, + { + "epoch": 1.4571866677427165, + "grad_norm": 0.6706543564796448, + "learning_rate": 4.686769251487233e-06, + "loss": 2.4137, + "step": 18056 + }, + { + "epoch": 1.4572673714792994, + "grad_norm": 0.6950179934501648, + "learning_rate": 4.6819940532618735e-06, + "loss": 2.4491, + "step": 18057 + }, + { + "epoch": 1.4573480752158825, + "grad_norm": 0.6982719898223877, + "learning_rate": 4.677221230610407e-06, + "loss": 2.3487, + "step": 18058 + }, + { + "epoch": 1.4574287789524654, + "grad_norm": 0.7230788469314575, + "learning_rate": 4.672450783651772e-06, + "loss": 2.433, + "step": 18059 + }, + { + "epoch": 1.4575094826890485, + "grad_norm": 0.6349153518676758, + "learning_rate": 4.6676827125048394e-06, + "loss": 2.4531, + "step": 18060 + }, + { + "epoch": 1.4575901864256315, + "grad_norm": 0.6164267659187317, + "learning_rate": 4.662917017288449e-06, + "loss": 2.3774, + "step": 18061 + }, + { + "epoch": 1.4576708901622144, + "grad_norm": 0.660593569278717, + "learning_rate": 4.658153698121382e-06, + "loss": 2.4419, + "step": 18062 + }, + { + "epoch": 1.4577515938987975, + "grad_norm": 0.7083500027656555, + "learning_rate": 4.653392755122365e-06, + "loss": 2.453, + "step": 18063 + }, + { + "epoch": 1.4578322976353806, + "grad_norm": 0.6704061627388, + "learning_rate": 4.648634188410028e-06, + "loss": 2.3893, + "step": 18064 + }, + { + "epoch": 1.4579130013719634, + "grad_norm": 0.6892523765563965, + "learning_rate": 4.643877998102985e-06, + "loss": 2.344, + "step": 18065 + }, + { + "epoch": 1.4579937051085465, + "grad_norm": NaN, + "learning_rate": 4.643877998102985e-06, + "loss": 2.4214, + "step": 18066 + }, + { + "epoch": 1.4580744088451296, + "grad_norm": 0.6861626505851746, + "learning_rate": 4.639124184319765e-06, + "loss": 2.4126, + "step": 18067 + }, + { + "epoch": 1.4581551125817125, + "grad_norm": 0.7208431363105774, + "learning_rate": 4.63437274717885e-06, + "loss": 2.4176, + "step": 18068 + }, + { + "epoch": 1.4582358163182956, + "grad_norm": 0.692640483379364, + "learning_rate": 4.629623686798623e-06, + "loss": 2.4041, + "step": 18069 + }, + { + "epoch": 1.4583165200548787, + "grad_norm": 0.7293663620948792, + "learning_rate": 4.624877003297512e-06, + "loss": 2.4739, + "step": 18070 + }, + { + "epoch": 1.4583972237914615, + "grad_norm": 0.7625227570533752, + "learning_rate": 4.6201326967937665e-06, + "loss": 2.44, + "step": 18071 + }, + { + "epoch": 1.4584779275280446, + "grad_norm": 0.6759201884269714, + "learning_rate": 4.615390767405636e-06, + "loss": 2.4204, + "step": 18072 + }, + { + "epoch": 1.4585586312646275, + "grad_norm": 0.6490656137466431, + "learning_rate": 4.610651215251316e-06, + "loss": 2.3858, + "step": 18073 + }, + { + "epoch": 1.4586393350012106, + "grad_norm": 0.7280056476593018, + "learning_rate": 4.605914040448911e-06, + "loss": 2.4262, + "step": 18074 + }, + { + "epoch": 1.4587200387377934, + "grad_norm": 0.78135746717453, + "learning_rate": 4.6011792431164826e-06, + "loss": 2.4533, + "step": 18075 + }, + { + "epoch": 1.4588007424743765, + "grad_norm": 0.7509358525276184, + "learning_rate": 4.596446823372058e-06, + "loss": 2.4183, + "step": 18076 + }, + { + "epoch": 1.4588814462109596, + "grad_norm": 0.7389116883277893, + "learning_rate": 4.591716781333555e-06, + "loss": 2.4201, + "step": 18077 + }, + { + "epoch": 1.4589621499475425, + "grad_norm": 0.7294317483901978, + "learning_rate": 4.586989117118867e-06, + "loss": 2.4412, + "step": 18078 + }, + { + "epoch": 1.4590428536841256, + "grad_norm": 0.8043732047080994, + "learning_rate": 4.582263830845834e-06, + "loss": 2.4385, + "step": 18079 + }, + { + "epoch": 1.4591235574207087, + "grad_norm": 0.6626152396202087, + "learning_rate": 4.5775409226321955e-06, + "loss": 2.3706, + "step": 18080 + }, + { + "epoch": 1.4592042611572915, + "grad_norm": 0.7048769593238831, + "learning_rate": 4.572820392595678e-06, + "loss": 2.3855, + "step": 18081 + }, + { + "epoch": 1.4592849648938746, + "grad_norm": 0.6663374304771423, + "learning_rate": 4.568102240853933e-06, + "loss": 2.4205, + "step": 18082 + }, + { + "epoch": 1.4593656686304577, + "grad_norm": 0.7204031944274902, + "learning_rate": 4.563386467524544e-06, + "loss": 2.4484, + "step": 18083 + }, + { + "epoch": 1.4594463723670406, + "grad_norm": 0.7225900888442993, + "learning_rate": 4.55867307272504e-06, + "loss": 2.3677, + "step": 18084 + }, + { + "epoch": 1.4595270761036236, + "grad_norm": 0.7384055852890015, + "learning_rate": 4.55396205657288e-06, + "loss": 2.414, + "step": 18085 + }, + { + "epoch": 1.4596077798402067, + "grad_norm": 0.7159018516540527, + "learning_rate": 4.5492534191854955e-06, + "loss": 2.4265, + "step": 18086 + }, + { + "epoch": 1.4596884835767896, + "grad_norm": 0.7001106142997742, + "learning_rate": 4.544547160680213e-06, + "loss": 2.407, + "step": 18087 + }, + { + "epoch": 1.4597691873133727, + "grad_norm": 0.7521629929542542, + "learning_rate": 4.539843281174339e-06, + "loss": 2.42, + "step": 18088 + }, + { + "epoch": 1.4598498910499556, + "grad_norm": 0.6956350207328796, + "learning_rate": 4.535141780785102e-06, + "loss": 2.4639, + "step": 18089 + }, + { + "epoch": 1.4599305947865386, + "grad_norm": 0.7860763072967529, + "learning_rate": 4.530442659629686e-06, + "loss": 2.3979, + "step": 18090 + }, + { + "epoch": 1.4600112985231215, + "grad_norm": 0.69307541847229, + "learning_rate": 4.5257459178251974e-06, + "loss": 2.3511, + "step": 18091 + }, + { + "epoch": 1.4600920022597046, + "grad_norm": 0.6837919354438782, + "learning_rate": 4.521051555488709e-06, + "loss": 2.3985, + "step": 18092 + }, + { + "epoch": 1.4601727059962877, + "grad_norm": 0.7990331053733826, + "learning_rate": 4.516359572737183e-06, + "loss": 2.4066, + "step": 18093 + }, + { + "epoch": 1.4602534097328705, + "grad_norm": 0.6431984901428223, + "learning_rate": 4.511669969687571e-06, + "loss": 2.4111, + "step": 18094 + }, + { + "epoch": 1.4603341134694536, + "grad_norm": 0.6853081583976746, + "learning_rate": 4.506982746456756e-06, + "loss": 2.3837, + "step": 18095 + }, + { + "epoch": 1.4604148172060367, + "grad_norm": 0.6754196882247925, + "learning_rate": 4.502297903161568e-06, + "loss": 2.357, + "step": 18096 + }, + { + "epoch": 1.4604955209426196, + "grad_norm": 0.7235881686210632, + "learning_rate": 4.497615439918734e-06, + "loss": 2.4749, + "step": 18097 + }, + { + "epoch": 1.4605762246792027, + "grad_norm": 0.7340710163116455, + "learning_rate": 4.4929353568449735e-06, + "loss": 2.4776, + "step": 18098 + }, + { + "epoch": 1.4606569284157858, + "grad_norm": 0.7013822793960571, + "learning_rate": 4.488257654056915e-06, + "loss": 2.4716, + "step": 18099 + }, + { + "epoch": 1.4607376321523686, + "grad_norm": 0.7052991986274719, + "learning_rate": 4.483582331671143e-06, + "loss": 2.4296, + "step": 18100 + }, + { + "epoch": 1.4608183358889517, + "grad_norm": 0.710962176322937, + "learning_rate": 4.478909389804187e-06, + "loss": 2.401, + "step": 18101 + }, + { + "epoch": 1.4608990396255348, + "grad_norm": 0.670494019985199, + "learning_rate": 4.474238828572519e-06, + "loss": 2.4259, + "step": 18102 + }, + { + "epoch": 1.4609797433621177, + "grad_norm": 0.7328322529792786, + "learning_rate": 4.4695706480925136e-06, + "loss": 2.4196, + "step": 18103 + }, + { + "epoch": 1.4610604470987005, + "grad_norm": 0.6856482028961182, + "learning_rate": 4.464904848480523e-06, + "loss": 2.3896, + "step": 18104 + }, + { + "epoch": 1.4611411508352836, + "grad_norm": 0.6747605204582214, + "learning_rate": 4.4602414298528405e-06, + "loss": 2.3924, + "step": 18105 + }, + { + "epoch": 1.4612218545718667, + "grad_norm": 0.7371439337730408, + "learning_rate": 4.455580392325687e-06, + "loss": 2.3831, + "step": 18106 + }, + { + "epoch": 1.4613025583084496, + "grad_norm": 0.6863524317741394, + "learning_rate": 4.450921736015212e-06, + "loss": 2.4224, + "step": 18107 + }, + { + "epoch": 1.4613832620450327, + "grad_norm": 0.6699609160423279, + "learning_rate": 4.4462654610375465e-06, + "loss": 2.4119, + "step": 18108 + }, + { + "epoch": 1.4614639657816157, + "grad_norm": 0.6912252306938171, + "learning_rate": 4.441611567508719e-06, + "loss": 2.3899, + "step": 18109 + }, + { + "epoch": 1.4615446695181986, + "grad_norm": 0.7110146284103394, + "learning_rate": 4.436960055544726e-06, + "loss": 2.4768, + "step": 18110 + }, + { + "epoch": 1.4616253732547817, + "grad_norm": 0.7201465368270874, + "learning_rate": 4.432310925261496e-06, + "loss": 2.3887, + "step": 18111 + }, + { + "epoch": 1.4617060769913648, + "grad_norm": 0.6860183477401733, + "learning_rate": 4.4276641767749035e-06, + "loss": 2.37, + "step": 18112 + }, + { + "epoch": 1.4617867807279477, + "grad_norm": 0.6903096437454224, + "learning_rate": 4.4230198102007344e-06, + "loss": 2.4226, + "step": 18113 + }, + { + "epoch": 1.4618674844645307, + "grad_norm": 0.72129225730896, + "learning_rate": 4.418377825654752e-06, + "loss": 2.4313, + "step": 18114 + }, + { + "epoch": 1.4619481882011138, + "grad_norm": 0.686478316783905, + "learning_rate": 4.4137382232526615e-06, + "loss": 2.4766, + "step": 18115 + }, + { + "epoch": 1.4620288919376967, + "grad_norm": 0.69380784034729, + "learning_rate": 4.409101003110061e-06, + "loss": 2.4182, + "step": 18116 + }, + { + "epoch": 1.4621095956742798, + "grad_norm": 0.7099065184593201, + "learning_rate": 4.404466165342547e-06, + "loss": 2.4172, + "step": 18117 + }, + { + "epoch": 1.4621902994108626, + "grad_norm": 0.7571132779121399, + "learning_rate": 4.399833710065637e-06, + "loss": 2.4231, + "step": 18118 + }, + { + "epoch": 1.4622710031474457, + "grad_norm": 0.7232388854026794, + "learning_rate": 4.3952036373947625e-06, + "loss": 2.4282, + "step": 18119 + }, + { + "epoch": 1.4623517068840286, + "grad_norm": 0.6481829881668091, + "learning_rate": 4.390575947445308e-06, + "loss": 2.3985, + "step": 18120 + }, + { + "epoch": 1.4624324106206117, + "grad_norm": 0.6784008741378784, + "learning_rate": 4.385950640332659e-06, + "loss": 2.4314, + "step": 18121 + }, + { + "epoch": 1.4625131143571948, + "grad_norm": 0.6858715415000916, + "learning_rate": 4.381327716172046e-06, + "loss": 2.4649, + "step": 18122 + }, + { + "epoch": 1.4625938180937776, + "grad_norm": 0.6565954089164734, + "learning_rate": 4.376707175078687e-06, + "loss": 2.377, + "step": 18123 + }, + { + "epoch": 1.4626745218303607, + "grad_norm": 0.6645387411117554, + "learning_rate": 4.372089017167769e-06, + "loss": 2.4133, + "step": 18124 + }, + { + "epoch": 1.4627552255669438, + "grad_norm": 0.7109405398368835, + "learning_rate": 4.367473242554343e-06, + "loss": 2.4048, + "step": 18125 + }, + { + "epoch": 1.4628359293035267, + "grad_norm": 0.6737244129180908, + "learning_rate": 4.362859851353473e-06, + "loss": 2.4009, + "step": 18126 + }, + { + "epoch": 1.4629166330401098, + "grad_norm": 0.7147111892700195, + "learning_rate": 4.358248843680135e-06, + "loss": 2.402, + "step": 18127 + }, + { + "epoch": 1.4629973367766929, + "grad_norm": 0.7494312524795532, + "learning_rate": 4.353640219649269e-06, + "loss": 2.3841, + "step": 18128 + }, + { + "epoch": 1.4630780405132757, + "grad_norm": 0.6915758848190308, + "learning_rate": 4.349033979375683e-06, + "loss": 2.4388, + "step": 18129 + }, + { + "epoch": 1.4631587442498588, + "grad_norm": 0.7709435820579529, + "learning_rate": 4.344430122974208e-06, + "loss": 2.5079, + "step": 18130 + }, + { + "epoch": 1.463239447986442, + "grad_norm": 0.6913777589797974, + "learning_rate": 4.3398286505595854e-06, + "loss": 2.4108, + "step": 18131 + }, + { + "epoch": 1.4633201517230248, + "grad_norm": 0.7236559987068176, + "learning_rate": 4.33522956224649e-06, + "loss": 2.4486, + "step": 18132 + }, + { + "epoch": 1.4634008554596079, + "grad_norm": 0.7122974395751953, + "learning_rate": 4.330632858149541e-06, + "loss": 2.377, + "step": 18133 + }, + { + "epoch": 1.4634815591961907, + "grad_norm": 0.713534951210022, + "learning_rate": 4.326038538383315e-06, + "loss": 2.4272, + "step": 18134 + }, + { + "epoch": 1.4635622629327738, + "grad_norm": 0.7163103222846985, + "learning_rate": 4.3214466030622955e-06, + "loss": 2.4787, + "step": 18135 + }, + { + "epoch": 1.4636429666693567, + "grad_norm": 0.6943918466567993, + "learning_rate": 4.316857052300927e-06, + "loss": 2.3893, + "step": 18136 + }, + { + "epoch": 1.4637236704059398, + "grad_norm": 0.6980963945388794, + "learning_rate": 4.312269886213615e-06, + "loss": 2.3745, + "step": 18137 + }, + { + "epoch": 1.4638043741425228, + "grad_norm": 0.6529614925384521, + "learning_rate": 4.3076851049146605e-06, + "loss": 2.4438, + "step": 18138 + }, + { + "epoch": 1.4638850778791057, + "grad_norm": 0.7353845238685608, + "learning_rate": 4.303102708518325e-06, + "loss": 2.4655, + "step": 18139 + }, + { + "epoch": 1.4639657816156888, + "grad_norm": 0.6540514826774597, + "learning_rate": 4.29852269713883e-06, + "loss": 2.389, + "step": 18140 + }, + { + "epoch": 1.4640464853522719, + "grad_norm": 0.6866925954818726, + "learning_rate": 4.293945070890315e-06, + "loss": 2.4197, + "step": 18141 + }, + { + "epoch": 1.4641271890888548, + "grad_norm": 0.701850175857544, + "learning_rate": 4.289369829886869e-06, + "loss": 2.4213, + "step": 18142 + }, + { + "epoch": 1.4642078928254378, + "grad_norm": 0.700334906578064, + "learning_rate": 4.284796974242511e-06, + "loss": 2.3587, + "step": 18143 + }, + { + "epoch": 1.464288596562021, + "grad_norm": 0.7060009241104126, + "learning_rate": 4.2802265040712275e-06, + "loss": 2.4579, + "step": 18144 + }, + { + "epoch": 1.4643693002986038, + "grad_norm": 0.6994202136993408, + "learning_rate": 4.2756584194869055e-06, + "loss": 2.4344, + "step": 18145 + }, + { + "epoch": 1.4644500040351869, + "grad_norm": 0.6504814624786377, + "learning_rate": 4.271092720603409e-06, + "loss": 2.3715, + "step": 18146 + }, + { + "epoch": 1.46453070777177, + "grad_norm": 0.6882978677749634, + "learning_rate": 4.266529407534514e-06, + "loss": 2.4387, + "step": 18147 + }, + { + "epoch": 1.4646114115083528, + "grad_norm": 0.6723669767379761, + "learning_rate": 4.261968480393963e-06, + "loss": 2.4423, + "step": 18148 + }, + { + "epoch": 1.464692115244936, + "grad_norm": 0.6500051021575928, + "learning_rate": 4.257409939295409e-06, + "loss": 2.4027, + "step": 18149 + }, + { + "epoch": 1.4647728189815188, + "grad_norm": 0.7253198623657227, + "learning_rate": 4.252853784352473e-06, + "loss": 2.4454, + "step": 18150 + }, + { + "epoch": 1.4648535227181019, + "grad_norm": 0.6945883631706238, + "learning_rate": 4.248300015678696e-06, + "loss": 2.4018, + "step": 18151 + }, + { + "epoch": 1.4649342264546847, + "grad_norm": 0.6615251302719116, + "learning_rate": 4.243748633387601e-06, + "loss": 2.367, + "step": 18152 + }, + { + "epoch": 1.4650149301912678, + "grad_norm": 0.7132222056388855, + "learning_rate": 4.239199637592595e-06, + "loss": 2.3724, + "step": 18153 + }, + { + "epoch": 1.465095633927851, + "grad_norm": 0.7064909338951111, + "learning_rate": 4.234653028407054e-06, + "loss": 2.3697, + "step": 18154 + }, + { + "epoch": 1.4651763376644338, + "grad_norm": 0.6656587719917297, + "learning_rate": 4.2301088059442884e-06, + "loss": 2.358, + "step": 18155 + }, + { + "epoch": 1.4652570414010169, + "grad_norm": 0.6481126546859741, + "learning_rate": 4.225566970317552e-06, + "loss": 2.4053, + "step": 18156 + }, + { + "epoch": 1.4653377451376, + "grad_norm": 0.7085857391357422, + "learning_rate": 4.221027521640064e-06, + "loss": 2.4376, + "step": 18157 + }, + { + "epoch": 1.4654184488741828, + "grad_norm": 0.6920461058616638, + "learning_rate": 4.216490460024914e-06, + "loss": 2.4671, + "step": 18158 + }, + { + "epoch": 1.465499152610766, + "grad_norm": 0.7046825885772705, + "learning_rate": 4.21195578558522e-06, + "loss": 2.4248, + "step": 18159 + }, + { + "epoch": 1.465579856347349, + "grad_norm": 0.7101480960845947, + "learning_rate": 4.2074234984339715e-06, + "loss": 2.4433, + "step": 18160 + }, + { + "epoch": 1.4656605600839319, + "grad_norm": 0.7143067121505737, + "learning_rate": 4.202893598684132e-06, + "loss": 2.4073, + "step": 18161 + }, + { + "epoch": 1.465741263820515, + "grad_norm": 0.7557536959648132, + "learning_rate": 4.198366086448602e-06, + "loss": 2.4053, + "step": 18162 + }, + { + "epoch": 1.4658219675570978, + "grad_norm": 0.6909283399581909, + "learning_rate": 4.193840961840223e-06, + "loss": 2.3831, + "step": 18163 + }, + { + "epoch": 1.465902671293681, + "grad_norm": 0.7262178659439087, + "learning_rate": 4.189318224971761e-06, + "loss": 2.4886, + "step": 18164 + }, + { + "epoch": 1.4659833750302638, + "grad_norm": 0.699925422668457, + "learning_rate": 4.184797875955937e-06, + "loss": 2.4073, + "step": 18165 + }, + { + "epoch": 1.4660640787668469, + "grad_norm": 0.6438626050949097, + "learning_rate": 4.180279914905439e-06, + "loss": 2.3531, + "step": 18166 + }, + { + "epoch": 1.46614478250343, + "grad_norm": 0.729622483253479, + "learning_rate": 4.175764341932809e-06, + "loss": 2.4312, + "step": 18167 + }, + { + "epoch": 1.4662254862400128, + "grad_norm": 0.6617357730865479, + "learning_rate": 4.1712511571506354e-06, + "loss": 2.3947, + "step": 18168 + }, + { + "epoch": 1.466306189976596, + "grad_norm": 0.7361389994621277, + "learning_rate": 4.166740360671384e-06, + "loss": 2.3975, + "step": 18169 + }, + { + "epoch": 1.466386893713179, + "grad_norm": 0.711264967918396, + "learning_rate": 4.1622319526074645e-06, + "loss": 2.4527, + "step": 18170 + }, + { + "epoch": 1.4664675974497619, + "grad_norm": 0.71773362159729, + "learning_rate": 4.157725933071233e-06, + "loss": 2.4529, + "step": 18171 + }, + { + "epoch": 1.466548301186345, + "grad_norm": 0.7069514393806458, + "learning_rate": 4.153222302175019e-06, + "loss": 2.4674, + "step": 18172 + }, + { + "epoch": 1.466629004922928, + "grad_norm": 0.795305073261261, + "learning_rate": 4.148721060031069e-06, + "loss": 2.4234, + "step": 18173 + }, + { + "epoch": 1.466709708659511, + "grad_norm": 0.6819591522216797, + "learning_rate": 4.144222206751524e-06, + "loss": 2.3764, + "step": 18174 + }, + { + "epoch": 1.466790412396094, + "grad_norm": 0.6816638112068176, + "learning_rate": 4.139725742448541e-06, + "loss": 2.447, + "step": 18175 + }, + { + "epoch": 1.466871116132677, + "grad_norm": 0.7039487361907959, + "learning_rate": 4.135231667234185e-06, + "loss": 2.3506, + "step": 18176 + }, + { + "epoch": 1.46695181986926, + "grad_norm": 0.6754382252693176, + "learning_rate": 4.130739981220433e-06, + "loss": 2.409, + "step": 18177 + }, + { + "epoch": 1.467032523605843, + "grad_norm": 0.7245250344276428, + "learning_rate": 4.12625068451924e-06, + "loss": 2.4222, + "step": 18178 + }, + { + "epoch": 1.4671132273424259, + "grad_norm": 0.7069350481033325, + "learning_rate": 4.121763777242515e-06, + "loss": 2.4346, + "step": 18179 + }, + { + "epoch": 1.467193931079009, + "grad_norm": 0.7400095462799072, + "learning_rate": 4.117279259502061e-06, + "loss": 2.4172, + "step": 18180 + }, + { + "epoch": 1.4672746348155918, + "grad_norm": 0.7178627252578735, + "learning_rate": 4.11279713140964e-06, + "loss": 2.3841, + "step": 18181 + }, + { + "epoch": 1.467355338552175, + "grad_norm": 0.6641840934753418, + "learning_rate": 4.108317393076966e-06, + "loss": 2.4728, + "step": 18182 + }, + { + "epoch": 1.467436042288758, + "grad_norm": 0.6809187531471252, + "learning_rate": 4.103840044615681e-06, + "loss": 2.4372, + "step": 18183 + }, + { + "epoch": 1.4675167460253409, + "grad_norm": 0.6674811244010925, + "learning_rate": 4.099365086137385e-06, + "loss": 2.3998, + "step": 18184 + }, + { + "epoch": 1.467597449761924, + "grad_norm": 0.7920583486557007, + "learning_rate": 4.094892517753601e-06, + "loss": 2.4203, + "step": 18185 + }, + { + "epoch": 1.467678153498507, + "grad_norm": 0.6881268620491028, + "learning_rate": 4.090422339575795e-06, + "loss": 2.3943, + "step": 18186 + }, + { + "epoch": 1.46775885723509, + "grad_norm": 0.6778728365898132, + "learning_rate": 4.085954551715365e-06, + "loss": 2.4208, + "step": 18187 + }, + { + "epoch": 1.467839560971673, + "grad_norm": 0.6784557104110718, + "learning_rate": 4.081489154283669e-06, + "loss": 2.4067, + "step": 18188 + }, + { + "epoch": 1.467920264708256, + "grad_norm": 0.6981526017189026, + "learning_rate": 4.0770261473920155e-06, + "loss": 2.42, + "step": 18189 + }, + { + "epoch": 1.468000968444839, + "grad_norm": 0.6901406645774841, + "learning_rate": 4.072565531151595e-06, + "loss": 2.4133, + "step": 18190 + }, + { + "epoch": 1.468081672181422, + "grad_norm": 0.6496356129646301, + "learning_rate": 4.068107305673608e-06, + "loss": 2.38, + "step": 18191 + }, + { + "epoch": 1.4681623759180051, + "grad_norm": 0.7348635792732239, + "learning_rate": 4.063651471069152e-06, + "loss": 2.4665, + "step": 18192 + }, + { + "epoch": 1.468243079654588, + "grad_norm": 0.8344720005989075, + "learning_rate": 4.059198027449274e-06, + "loss": 2.3849, + "step": 18193 + }, + { + "epoch": 1.468323783391171, + "grad_norm": 0.7210039496421814, + "learning_rate": 4.0547469749249835e-06, + "loss": 2.4604, + "step": 18194 + }, + { + "epoch": 1.468404487127754, + "grad_norm": 0.7330215573310852, + "learning_rate": 4.050298313607203e-06, + "loss": 2.4768, + "step": 18195 + }, + { + "epoch": 1.468485190864337, + "grad_norm": 0.759384274482727, + "learning_rate": 4.045852043606801e-06, + "loss": 2.3686, + "step": 18196 + }, + { + "epoch": 1.46856589460092, + "grad_norm": 0.7119100689888, + "learning_rate": 4.041408165034588e-06, + "loss": 2.401, + "step": 18197 + }, + { + "epoch": 1.468646598337503, + "grad_norm": 0.7030404210090637, + "learning_rate": 4.036966678001342e-06, + "loss": 2.4242, + "step": 18198 + }, + { + "epoch": 1.468727302074086, + "grad_norm": 0.6559282541275024, + "learning_rate": 4.032527582617718e-06, + "loss": 2.4567, + "step": 18199 + }, + { + "epoch": 1.468808005810669, + "grad_norm": 0.7369895577430725, + "learning_rate": 4.028090878994361e-06, + "loss": 2.3795, + "step": 18200 + }, + { + "epoch": 1.468888709547252, + "grad_norm": 0.6997527480125427, + "learning_rate": 4.0236565672418624e-06, + "loss": 2.4247, + "step": 18201 + }, + { + "epoch": 1.4689694132838351, + "grad_norm": 0.6773854494094849, + "learning_rate": 4.0192246474707205e-06, + "loss": 2.4022, + "step": 18202 + }, + { + "epoch": 1.469050117020418, + "grad_norm": 0.6483170390129089, + "learning_rate": 4.014795119791404e-06, + "loss": 2.4022, + "step": 18203 + }, + { + "epoch": 1.469130820757001, + "grad_norm": 0.6568546295166016, + "learning_rate": 4.0103679843142895e-06, + "loss": 2.3802, + "step": 18204 + }, + { + "epoch": 1.4692115244935842, + "grad_norm": 0.6876521706581116, + "learning_rate": 4.005943241149746e-06, + "loss": 2.4521, + "step": 18205 + }, + { + "epoch": 1.469292228230167, + "grad_norm": 0.7165477275848389, + "learning_rate": 4.001520890408017e-06, + "loss": 2.3683, + "step": 18206 + }, + { + "epoch": 1.4693729319667501, + "grad_norm": 0.7466868162155151, + "learning_rate": 3.997100932199327e-06, + "loss": 2.4073, + "step": 18207 + }, + { + "epoch": 1.469453635703333, + "grad_norm": 0.6731385588645935, + "learning_rate": 3.992683366633842e-06, + "loss": 2.4025, + "step": 18208 + }, + { + "epoch": 1.469534339439916, + "grad_norm": 0.7291627526283264, + "learning_rate": 3.988268193821654e-06, + "loss": 2.4205, + "step": 18209 + }, + { + "epoch": 1.469615043176499, + "grad_norm": 0.6596493721008301, + "learning_rate": 3.983855413872795e-06, + "loss": 2.3996, + "step": 18210 + }, + { + "epoch": 1.469695746913082, + "grad_norm": 0.7010817527770996, + "learning_rate": 3.979445026897244e-06, + "loss": 2.4094, + "step": 18211 + }, + { + "epoch": 1.4697764506496651, + "grad_norm": 0.715941309928894, + "learning_rate": 3.975037033004925e-06, + "loss": 2.4256, + "step": 18212 + }, + { + "epoch": 1.469857154386248, + "grad_norm": 0.728072464466095, + "learning_rate": 3.970631432305694e-06, + "loss": 2.4084, + "step": 18213 + }, + { + "epoch": 1.469937858122831, + "grad_norm": 0.7201817035675049, + "learning_rate": 3.966228224909363e-06, + "loss": 2.3945, + "step": 18214 + }, + { + "epoch": 1.4700185618594142, + "grad_norm": 0.70964115858078, + "learning_rate": 3.961827410925644e-06, + "loss": 2.3664, + "step": 18215 + }, + { + "epoch": 1.470099265595997, + "grad_norm": 0.692813515663147, + "learning_rate": 3.957428990464229e-06, + "loss": 2.3622, + "step": 18216 + }, + { + "epoch": 1.47017996933258, + "grad_norm": 0.6732754707336426, + "learning_rate": 3.953032963634762e-06, + "loss": 2.3618, + "step": 18217 + }, + { + "epoch": 1.4702606730691632, + "grad_norm": 0.726357638835907, + "learning_rate": 3.9486393305467775e-06, + "loss": 2.4024, + "step": 18218 + }, + { + "epoch": 1.470341376805746, + "grad_norm": 0.7013699412345886, + "learning_rate": 3.944248091309765e-06, + "loss": 2.4343, + "step": 18219 + }, + { + "epoch": 1.4704220805423291, + "grad_norm": 0.6978548169136047, + "learning_rate": 3.939859246033195e-06, + "loss": 2.4206, + "step": 18220 + }, + { + "epoch": 1.4705027842789122, + "grad_norm": 0.8108847141265869, + "learning_rate": 3.935472794826434e-06, + "loss": 2.3756, + "step": 18221 + }, + { + "epoch": 1.470583488015495, + "grad_norm": 0.6821001768112183, + "learning_rate": 3.931088737798805e-06, + "loss": 2.4323, + "step": 18222 + }, + { + "epoch": 1.4706641917520782, + "grad_norm": 0.688704252243042, + "learning_rate": 3.9267070750595654e-06, + "loss": 2.412, + "step": 18223 + }, + { + "epoch": 1.470744895488661, + "grad_norm": 0.7279560565948486, + "learning_rate": 3.92232780671794e-06, + "loss": 2.3503, + "step": 18224 + }, + { + "epoch": 1.4708255992252441, + "grad_norm": 0.6519368886947632, + "learning_rate": 3.917950932883052e-06, + "loss": 2.4106, + "step": 18225 + }, + { + "epoch": 1.470906302961827, + "grad_norm": 0.7112751603126526, + "learning_rate": 3.91357645366397e-06, + "loss": 2.4313, + "step": 18226 + }, + { + "epoch": 1.47098700669841, + "grad_norm": 0.7301532626152039, + "learning_rate": 3.909204369169761e-06, + "loss": 2.4218, + "step": 18227 + }, + { + "epoch": 1.4710677104349932, + "grad_norm": 0.7091543078422546, + "learning_rate": 3.90483467950935e-06, + "loss": 2.4016, + "step": 18228 + }, + { + "epoch": 1.471148414171576, + "grad_norm": 0.6589071750640869, + "learning_rate": 3.900467384791651e-06, + "loss": 2.4202, + "step": 18229 + }, + { + "epoch": 1.4712291179081591, + "grad_norm": 0.6887986063957214, + "learning_rate": 3.896102485125519e-06, + "loss": 2.3689, + "step": 18230 + }, + { + "epoch": 1.4713098216447422, + "grad_norm": 0.6951364278793335, + "learning_rate": 3.891739980619724e-06, + "loss": 2.4067, + "step": 18231 + }, + { + "epoch": 1.471390525381325, + "grad_norm": 0.6578256487846375, + "learning_rate": 3.887379871383001e-06, + "loss": 2.4535, + "step": 18232 + }, + { + "epoch": 1.4714712291179082, + "grad_norm": 0.6622738838195801, + "learning_rate": 3.883022157524008e-06, + "loss": 2.4566, + "step": 18233 + }, + { + "epoch": 1.4715519328544913, + "grad_norm": 0.699840784072876, + "learning_rate": 3.878666839151357e-06, + "loss": 2.4145, + "step": 18234 + }, + { + "epoch": 1.4716326365910741, + "grad_norm": 0.7405043840408325, + "learning_rate": 3.874313916373595e-06, + "loss": 2.3819, + "step": 18235 + }, + { + "epoch": 1.4717133403276572, + "grad_norm": 0.6740127801895142, + "learning_rate": 3.869963389299203e-06, + "loss": 2.4152, + "step": 18236 + }, + { + "epoch": 1.4717940440642403, + "grad_norm": 0.6735069155693054, + "learning_rate": 3.865615258036615e-06, + "loss": 2.4111, + "step": 18237 + }, + { + "epoch": 1.4718747478008232, + "grad_norm": 0.654137134552002, + "learning_rate": 3.861269522694188e-06, + "loss": 2.4427, + "step": 18238 + }, + { + "epoch": 1.4719554515374063, + "grad_norm": 0.6844269633293152, + "learning_rate": 3.856926183380227e-06, + "loss": 2.4553, + "step": 18239 + }, + { + "epoch": 1.4720361552739891, + "grad_norm": 0.6604157090187073, + "learning_rate": 3.85258524020301e-06, + "loss": 2.3749, + "step": 18240 + }, + { + "epoch": 1.4721168590105722, + "grad_norm": 0.6837483048439026, + "learning_rate": 3.848246693270674e-06, + "loss": 2.384, + "step": 18241 + }, + { + "epoch": 1.472197562747155, + "grad_norm": 0.6852267384529114, + "learning_rate": 3.8439105426913865e-06, + "loss": 2.4112, + "step": 18242 + }, + { + "epoch": 1.4722782664837382, + "grad_norm": 0.6974645256996155, + "learning_rate": 3.839576788573196e-06, + "loss": 2.3884, + "step": 18243 + }, + { + "epoch": 1.4723589702203213, + "grad_norm": 0.6737220287322998, + "learning_rate": 3.835245431024126e-06, + "loss": 2.424, + "step": 18244 + }, + { + "epoch": 1.4724396739569041, + "grad_norm": 0.695035457611084, + "learning_rate": 3.8309164701521016e-06, + "loss": 2.3358, + "step": 18245 + }, + { + "epoch": 1.4725203776934872, + "grad_norm": 0.6795023679733276, + "learning_rate": 3.826589906065048e-06, + "loss": 2.3569, + "step": 18246 + }, + { + "epoch": 1.4726010814300703, + "grad_norm": 0.6965143084526062, + "learning_rate": 3.8222657388707675e-06, + "loss": 2.4078, + "step": 18247 + }, + { + "epoch": 1.4726817851666532, + "grad_norm": 0.6551299095153809, + "learning_rate": 3.817943968677029e-06, + "loss": 2.3622, + "step": 18248 + }, + { + "epoch": 1.4727624889032362, + "grad_norm": 0.7963354587554932, + "learning_rate": 3.8136245955915582e-06, + "loss": 2.4108, + "step": 18249 + }, + { + "epoch": 1.4728431926398193, + "grad_norm": 0.6898682117462158, + "learning_rate": 3.8093076197219913e-06, + "loss": 2.405, + "step": 18250 + }, + { + "epoch": 1.4729238963764022, + "grad_norm": 0.7282465100288391, + "learning_rate": 3.8049930411759195e-06, + "loss": 2.3696, + "step": 18251 + }, + { + "epoch": 1.4730046001129853, + "grad_norm": 0.7880160212516785, + "learning_rate": 3.800680860060879e-06, + "loss": 2.4156, + "step": 18252 + }, + { + "epoch": 1.4730853038495684, + "grad_norm": 0.7149094343185425, + "learning_rate": 3.7963710764843397e-06, + "loss": 2.415, + "step": 18253 + }, + { + "epoch": 1.4731660075861512, + "grad_norm": 0.7015249133110046, + "learning_rate": 3.7920636905537155e-06, + "loss": 2.3672, + "step": 18254 + }, + { + "epoch": 1.4732467113227343, + "grad_norm": 0.6848294138908386, + "learning_rate": 3.787758702376343e-06, + "loss": 2.4225, + "step": 18255 + }, + { + "epoch": 1.4733274150593172, + "grad_norm": 0.6866233348846436, + "learning_rate": 3.7834561120595467e-06, + "loss": 2.4119, + "step": 18256 + }, + { + "epoch": 1.4734081187959003, + "grad_norm": 0.7697205543518066, + "learning_rate": 3.7791559197105197e-06, + "loss": 2.3943, + "step": 18257 + }, + { + "epoch": 1.4734888225324831, + "grad_norm": 0.6798329949378967, + "learning_rate": 3.7748581254364533e-06, + "loss": 2.4329, + "step": 18258 + }, + { + "epoch": 1.4735695262690662, + "grad_norm": 0.6995163559913635, + "learning_rate": 3.7705627293444732e-06, + "loss": 2.3561, + "step": 18259 + }, + { + "epoch": 1.4736502300056493, + "grad_norm": 0.6825453042984009, + "learning_rate": 3.766269731541594e-06, + "loss": 2.3887, + "step": 18260 + }, + { + "epoch": 1.4737309337422322, + "grad_norm": 0.7159842848777771, + "learning_rate": 3.7619791321348407e-06, + "loss": 2.4096, + "step": 18261 + }, + { + "epoch": 1.4738116374788153, + "grad_norm": 0.6697775721549988, + "learning_rate": 3.757690931231139e-06, + "loss": 2.4095, + "step": 18262 + }, + { + "epoch": 1.4738923412153984, + "grad_norm": 0.71161949634552, + "learning_rate": 3.7534051289373486e-06, + "loss": 2.4444, + "step": 18263 + }, + { + "epoch": 1.4739730449519812, + "grad_norm": 0.6437444090843201, + "learning_rate": 3.749121725360294e-06, + "loss": 2.386, + "step": 18264 + }, + { + "epoch": 1.4740537486885643, + "grad_norm": 0.7008254528045654, + "learning_rate": 3.744840720606746e-06, + "loss": 2.417, + "step": 18265 + }, + { + "epoch": 1.4741344524251474, + "grad_norm": 0.6326326727867126, + "learning_rate": 3.7405621147833634e-06, + "loss": 2.3346, + "step": 18266 + }, + { + "epoch": 1.4742151561617303, + "grad_norm": 0.6802831888198853, + "learning_rate": 3.736285907996806e-06, + "loss": 2.3794, + "step": 18267 + }, + { + "epoch": 1.4742958598983134, + "grad_norm": 0.6425875425338745, + "learning_rate": 3.7320121003536323e-06, + "loss": 2.3931, + "step": 18268 + }, + { + "epoch": 1.4743765636348962, + "grad_norm": 0.6619433760643005, + "learning_rate": 3.7277406919603797e-06, + "loss": 2.4162, + "step": 18269 + }, + { + "epoch": 1.4744572673714793, + "grad_norm": 0.6600280404090881, + "learning_rate": 3.723471682923474e-06, + "loss": 2.4334, + "step": 18270 + }, + { + "epoch": 1.4745379711080622, + "grad_norm": 0.7603200078010559, + "learning_rate": 3.719205073349319e-06, + "loss": 2.4413, + "step": 18271 + }, + { + "epoch": 1.4746186748446453, + "grad_norm": 0.6581423878669739, + "learning_rate": 3.714940863344263e-06, + "loss": 2.4117, + "step": 18272 + }, + { + "epoch": 1.4746993785812283, + "grad_norm": 0.6989814043045044, + "learning_rate": 3.710679053014565e-06, + "loss": 2.377, + "step": 18273 + }, + { + "epoch": 1.4747800823178112, + "grad_norm": 0.6707834005355835, + "learning_rate": 3.7064196424664522e-06, + "loss": 2.3407, + "step": 18274 + }, + { + "epoch": 1.4748607860543943, + "grad_norm": 0.7205011248588562, + "learning_rate": 3.702162631806083e-06, + "loss": 2.4182, + "step": 18275 + }, + { + "epoch": 1.4749414897909774, + "grad_norm": 0.7529718279838562, + "learning_rate": 3.69790802113954e-06, + "loss": 2.3434, + "step": 18276 + }, + { + "epoch": 1.4750221935275603, + "grad_norm": 0.6794082522392273, + "learning_rate": 3.69365581057286e-06, + "loss": 2.4157, + "step": 18277 + }, + { + "epoch": 1.4751028972641433, + "grad_norm": 0.7068135738372803, + "learning_rate": 3.689406000212037e-06, + "loss": 2.3516, + "step": 18278 + }, + { + "epoch": 1.4751836010007264, + "grad_norm": 0.7128797769546509, + "learning_rate": 3.6851585901629736e-06, + "loss": 2.3809, + "step": 18279 + }, + { + "epoch": 1.4752643047373093, + "grad_norm": 0.7014521956443787, + "learning_rate": 3.68091358053152e-06, + "loss": 2.4091, + "step": 18280 + }, + { + "epoch": 1.4753450084738924, + "grad_norm": 0.7495442628860474, + "learning_rate": 3.6766709714234793e-06, + "loss": 2.3977, + "step": 18281 + }, + { + "epoch": 1.4754257122104755, + "grad_norm": 0.6657838225364685, + "learning_rate": 3.6724307629446007e-06, + "loss": 2.3892, + "step": 18282 + }, + { + "epoch": 1.4755064159470583, + "grad_norm": 0.688546895980835, + "learning_rate": 3.668192955200522e-06, + "loss": 2.4159, + "step": 18283 + }, + { + "epoch": 1.4755871196836414, + "grad_norm": 0.6888083219528198, + "learning_rate": 3.6639575482969034e-06, + "loss": 2.3221, + "step": 18284 + }, + { + "epoch": 1.4756678234202243, + "grad_norm": 0.8717848658561707, + "learning_rate": 3.6597245423393046e-06, + "loss": 2.4453, + "step": 18285 + }, + { + "epoch": 1.4757485271568074, + "grad_norm": 0.6860103011131287, + "learning_rate": 3.6554939374331963e-06, + "loss": 2.4251, + "step": 18286 + }, + { + "epoch": 1.4758292308933902, + "grad_norm": 0.6638378500938416, + "learning_rate": 3.6512657336840174e-06, + "loss": 2.4506, + "step": 18287 + }, + { + "epoch": 1.4759099346299733, + "grad_norm": 0.6854584813117981, + "learning_rate": 3.6470399311971716e-06, + "loss": 2.3474, + "step": 18288 + }, + { + "epoch": 1.4759906383665564, + "grad_norm": 0.6957666873931885, + "learning_rate": 3.6428165300779526e-06, + "loss": 2.3452, + "step": 18289 + }, + { + "epoch": 1.4760713421031393, + "grad_norm": 0.646803081035614, + "learning_rate": 3.638595530431621e-06, + "loss": 2.3617, + "step": 18290 + }, + { + "epoch": 1.4761520458397224, + "grad_norm": 0.6761566996574402, + "learning_rate": 3.6343769323633924e-06, + "loss": 2.4115, + "step": 18291 + }, + { + "epoch": 1.4762327495763055, + "grad_norm": 0.7071232795715332, + "learning_rate": 3.6301607359783827e-06, + "loss": 2.4088, + "step": 18292 + }, + { + "epoch": 1.4763134533128883, + "grad_norm": 0.6781535148620605, + "learning_rate": 3.625946941381675e-06, + "loss": 2.3733, + "step": 18293 + }, + { + "epoch": 1.4763941570494714, + "grad_norm": 0.6833710670471191, + "learning_rate": 3.6217355486782957e-06, + "loss": 2.4711, + "step": 18294 + }, + { + "epoch": 1.4764748607860545, + "grad_norm": 0.7589881420135498, + "learning_rate": 3.6175265579732055e-06, + "loss": 2.3845, + "step": 18295 + }, + { + "epoch": 1.4765555645226374, + "grad_norm": 0.6896101236343384, + "learning_rate": 3.6133199693712983e-06, + "loss": 2.3758, + "step": 18296 + }, + { + "epoch": 1.4766362682592205, + "grad_norm": 0.6634401082992554, + "learning_rate": 3.6091157829774127e-06, + "loss": 2.369, + "step": 18297 + }, + { + "epoch": 1.4767169719958035, + "grad_norm": 0.6652467846870422, + "learning_rate": 3.604913998896342e-06, + "loss": 2.4098, + "step": 18298 + }, + { + "epoch": 1.4767976757323864, + "grad_norm": 0.7705509662628174, + "learning_rate": 3.600714617232781e-06, + "loss": 2.395, + "step": 18299 + }, + { + "epoch": 1.4768783794689695, + "grad_norm": 0.6642572283744812, + "learning_rate": 3.5965176380914122e-06, + "loss": 2.4144, + "step": 18300 + }, + { + "epoch": 1.4769590832055524, + "grad_norm": 0.7557141184806824, + "learning_rate": 3.59232306157683e-06, + "loss": 2.3329, + "step": 18301 + }, + { + "epoch": 1.4770397869421354, + "grad_norm": 0.715446949005127, + "learning_rate": 3.5881308877935504e-06, + "loss": 2.4349, + "step": 18302 + }, + { + "epoch": 1.4771204906787183, + "grad_norm": 0.7579060196876526, + "learning_rate": 3.583941116846079e-06, + "loss": 2.4296, + "step": 18303 + }, + { + "epoch": 1.4772011944153014, + "grad_norm": 0.6764013767242432, + "learning_rate": 3.5797537488388323e-06, + "loss": 2.4128, + "step": 18304 + }, + { + "epoch": 1.4772818981518845, + "grad_norm": 0.7495453953742981, + "learning_rate": 3.57556878387616e-06, + "loss": 2.5065, + "step": 18305 + }, + { + "epoch": 1.4773626018884674, + "grad_norm": 0.7046003341674805, + "learning_rate": 3.5713862220623785e-06, + "loss": 2.4498, + "step": 18306 + }, + { + "epoch": 1.4774433056250504, + "grad_norm": 0.6819034814834595, + "learning_rate": 3.567206063501727e-06, + "loss": 2.4052, + "step": 18307 + }, + { + "epoch": 1.4775240093616335, + "grad_norm": 0.6607410907745361, + "learning_rate": 3.5630283082983663e-06, + "loss": 2.396, + "step": 18308 + }, + { + "epoch": 1.4776047130982164, + "grad_norm": 0.7284536957740784, + "learning_rate": 3.5588529565564244e-06, + "loss": 2.4311, + "step": 18309 + }, + { + "epoch": 1.4776854168347995, + "grad_norm": 0.7704942226409912, + "learning_rate": 3.554680008379985e-06, + "loss": 2.4481, + "step": 18310 + }, + { + "epoch": 1.4777661205713826, + "grad_norm": 0.7008868455886841, + "learning_rate": 3.5505094638730083e-06, + "loss": 2.4954, + "step": 18311 + }, + { + "epoch": 1.4778468243079654, + "grad_norm": 0.6746332049369812, + "learning_rate": 3.546341323139468e-06, + "loss": 2.3946, + "step": 18312 + }, + { + "epoch": 1.4779275280445485, + "grad_norm": 0.6415507197380066, + "learning_rate": 3.5421755862832253e-06, + "loss": 2.3786, + "step": 18313 + }, + { + "epoch": 1.4780082317811314, + "grad_norm": 0.7158175110816956, + "learning_rate": 3.5380122534081184e-06, + "loss": 2.4348, + "step": 18314 + }, + { + "epoch": 1.4780889355177145, + "grad_norm": 0.7158238887786865, + "learning_rate": 3.5338513246178985e-06, + "loss": 2.385, + "step": 18315 + }, + { + "epoch": 1.4781696392542973, + "grad_norm": 0.6766643524169922, + "learning_rate": 3.529692800016271e-06, + "loss": 2.4401, + "step": 18316 + }, + { + "epoch": 1.4782503429908804, + "grad_norm": 0.7073598504066467, + "learning_rate": 3.525536679706887e-06, + "loss": 2.4669, + "step": 18317 + }, + { + "epoch": 1.4783310467274635, + "grad_norm": 0.7213411927223206, + "learning_rate": 3.521382963793296e-06, + "loss": 2.4186, + "step": 18318 + }, + { + "epoch": 1.4784117504640464, + "grad_norm": 0.7676820755004883, + "learning_rate": 3.5172316523790384e-06, + "loss": 2.4653, + "step": 18319 + }, + { + "epoch": 1.4784924542006295, + "grad_norm": 0.8283714056015015, + "learning_rate": 3.5130827455675975e-06, + "loss": 2.3896, + "step": 18320 + }, + { + "epoch": 1.4785731579372126, + "grad_norm": 0.685022234916687, + "learning_rate": 3.508936243462335e-06, + "loss": 2.3726, + "step": 18321 + }, + { + "epoch": 1.4786538616737954, + "grad_norm": 0.6866634488105774, + "learning_rate": 3.5047921461666135e-06, + "loss": 2.4511, + "step": 18322 + }, + { + "epoch": 1.4787345654103785, + "grad_norm": 0.6487671732902527, + "learning_rate": 3.500650453783716e-06, + "loss": 2.4113, + "step": 18323 + }, + { + "epoch": 1.4788152691469616, + "grad_norm": 0.6886214017868042, + "learning_rate": 3.4965111664168604e-06, + "loss": 2.4272, + "step": 18324 + }, + { + "epoch": 1.4788959728835445, + "grad_norm": 0.6808422207832336, + "learning_rate": 3.4923742841692085e-06, + "loss": 2.3936, + "step": 18325 + }, + { + "epoch": 1.4789766766201276, + "grad_norm": 0.713890552520752, + "learning_rate": 3.4882398071438783e-06, + "loss": 2.3742, + "step": 18326 + }, + { + "epoch": 1.4790573803567106, + "grad_norm": 0.6884218454360962, + "learning_rate": 3.4841077354438758e-06, + "loss": 2.3663, + "step": 18327 + }, + { + "epoch": 1.4791380840932935, + "grad_norm": 0.6903060674667358, + "learning_rate": 3.4799780691722074e-06, + "loss": 2.4586, + "step": 18328 + }, + { + "epoch": 1.4792187878298766, + "grad_norm": 0.7081164717674255, + "learning_rate": 3.475850808431791e-06, + "loss": 2.3848, + "step": 18329 + }, + { + "epoch": 1.4792994915664595, + "grad_norm": 0.7136076092720032, + "learning_rate": 3.4717259533254997e-06, + "loss": 2.4092, + "step": 18330 + }, + { + "epoch": 1.4793801953030425, + "grad_norm": 0.6860584616661072, + "learning_rate": 3.4676035039561182e-06, + "loss": 2.4348, + "step": 18331 + }, + { + "epoch": 1.4794608990396254, + "grad_norm": 0.6885141730308533, + "learning_rate": 3.4634834604263978e-06, + "loss": 2.4029, + "step": 18332 + }, + { + "epoch": 1.4795416027762085, + "grad_norm": 0.6577363610267639, + "learning_rate": 3.4593658228390223e-06, + "loss": 2.3659, + "step": 18333 + }, + { + "epoch": 1.4796223065127916, + "grad_norm": 0.6664844155311584, + "learning_rate": 3.4552505912965884e-06, + "loss": 2.3532, + "step": 18334 + }, + { + "epoch": 1.4797030102493745, + "grad_norm": 0.7257712483406067, + "learning_rate": 3.451137765901702e-06, + "loss": 2.4117, + "step": 18335 + }, + { + "epoch": 1.4797837139859575, + "grad_norm": 0.7410221099853516, + "learning_rate": 3.447027346756837e-06, + "loss": 2.4092, + "step": 18336 + }, + { + "epoch": 1.4798644177225406, + "grad_norm": 0.7233858108520508, + "learning_rate": 3.442919333964445e-06, + "loss": 2.3718, + "step": 18337 + }, + { + "epoch": 1.4799451214591235, + "grad_norm": 0.704576849937439, + "learning_rate": 3.4388137276268996e-06, + "loss": 2.4513, + "step": 18338 + }, + { + "epoch": 1.4800258251957066, + "grad_norm": 0.662105143070221, + "learning_rate": 3.434710527846552e-06, + "loss": 2.3803, + "step": 18339 + }, + { + "epoch": 1.4801065289322897, + "grad_norm": 0.6548754572868347, + "learning_rate": 3.4306097347256207e-06, + "loss": 2.3922, + "step": 18340 + }, + { + "epoch": 1.4801872326688725, + "grad_norm": 0.6719009280204773, + "learning_rate": 3.4265113483663238e-06, + "loss": 2.3943, + "step": 18341 + }, + { + "epoch": 1.4802679364054556, + "grad_norm": 0.7208795547485352, + "learning_rate": 3.422415368870835e-06, + "loss": 2.362, + "step": 18342 + }, + { + "epoch": 1.4803486401420387, + "grad_norm": 0.7121373414993286, + "learning_rate": 3.4183217963411953e-06, + "loss": 2.4508, + "step": 18343 + }, + { + "epoch": 1.4804293438786216, + "grad_norm": 0.651792585849762, + "learning_rate": 3.4142306308794334e-06, + "loss": 2.3923, + "step": 18344 + }, + { + "epoch": 1.4805100476152047, + "grad_norm": 0.6823711395263672, + "learning_rate": 3.4101418725875245e-06, + "loss": 2.4156, + "step": 18345 + }, + { + "epoch": 1.4805907513517875, + "grad_norm": 0.6949301362037659, + "learning_rate": 3.406055521567386e-06, + "loss": 2.3666, + "step": 18346 + }, + { + "epoch": 1.4806714550883706, + "grad_norm": 0.723517894744873, + "learning_rate": 3.401971577920826e-06, + "loss": 2.4534, + "step": 18347 + }, + { + "epoch": 1.4807521588249535, + "grad_norm": 0.6967771053314209, + "learning_rate": 3.3978900417496516e-06, + "loss": 2.4345, + "step": 18348 + }, + { + "epoch": 1.4808328625615366, + "grad_norm": 0.6820134520530701, + "learning_rate": 3.393810913155593e-06, + "loss": 2.3905, + "step": 18349 + }, + { + "epoch": 1.4809135662981197, + "grad_norm": 0.7566741704940796, + "learning_rate": 3.3897341922402794e-06, + "loss": 2.457, + "step": 18350 + }, + { + "epoch": 1.4809942700347025, + "grad_norm": 0.732586145401001, + "learning_rate": 3.3856598791053297e-06, + "loss": 2.395, + "step": 18351 + }, + { + "epoch": 1.4810749737712856, + "grad_norm": 0.7377440333366394, + "learning_rate": 3.3815879738523073e-06, + "loss": 2.412, + "step": 18352 + }, + { + "epoch": 1.4811556775078687, + "grad_norm": 0.6709005832672119, + "learning_rate": 3.3775184765826527e-06, + "loss": 2.4048, + "step": 18353 + }, + { + "epoch": 1.4812363812444516, + "grad_norm": 0.6626690030097961, + "learning_rate": 3.373451387397819e-06, + "loss": 2.3663, + "step": 18354 + }, + { + "epoch": 1.4813170849810346, + "grad_norm": 0.671341598033905, + "learning_rate": 3.369386706399158e-06, + "loss": 2.4147, + "step": 18355 + }, + { + "epoch": 1.4813977887176177, + "grad_norm": 0.7172929048538208, + "learning_rate": 3.3653244336879773e-06, + "loss": 2.4042, + "step": 18356 + }, + { + "epoch": 1.4814784924542006, + "grad_norm": 0.6489603519439697, + "learning_rate": 3.361264569365519e-06, + "loss": 2.3811, + "step": 18357 + }, + { + "epoch": 1.4815591961907837, + "grad_norm": 0.7350562214851379, + "learning_rate": 3.3572071135329786e-06, + "loss": 2.4428, + "step": 18358 + }, + { + "epoch": 1.4816398999273666, + "grad_norm": 0.6472034454345703, + "learning_rate": 3.3531520662914428e-06, + "loss": 2.3613, + "step": 18359 + }, + { + "epoch": 1.4817206036639496, + "grad_norm": 0.6974912285804749, + "learning_rate": 3.3490994277419975e-06, + "loss": 2.3932, + "step": 18360 + }, + { + "epoch": 1.4818013074005325, + "grad_norm": 0.7560031414031982, + "learning_rate": 3.34504919798565e-06, + "loss": 2.3633, + "step": 18361 + }, + { + "epoch": 1.4818820111371156, + "grad_norm": 0.6837224364280701, + "learning_rate": 3.341001377123343e-06, + "loss": 2.4298, + "step": 18362 + }, + { + "epoch": 1.4819627148736987, + "grad_norm": 0.6952646970748901, + "learning_rate": 3.336955965255939e-06, + "loss": 2.4155, + "step": 18363 + }, + { + "epoch": 1.4820434186102815, + "grad_norm": 0.6897403597831726, + "learning_rate": 3.332912962484269e-06, + "loss": 2.3911, + "step": 18364 + }, + { + "epoch": 1.4821241223468646, + "grad_norm": 0.7033999562263489, + "learning_rate": 3.3288723689090973e-06, + "loss": 2.503, + "step": 18365 + }, + { + "epoch": 1.4822048260834477, + "grad_norm": 0.6422268152236938, + "learning_rate": 3.3248341846311317e-06, + "loss": 2.4095, + "step": 18366 + }, + { + "epoch": 1.4822855298200306, + "grad_norm": 0.7891619205474854, + "learning_rate": 3.3207984097510024e-06, + "loss": 2.4561, + "step": 18367 + }, + { + "epoch": 1.4823662335566137, + "grad_norm": 0.8084300756454468, + "learning_rate": 3.3167650443693186e-06, + "loss": 2.3867, + "step": 18368 + }, + { + "epoch": 1.4824469372931968, + "grad_norm": 0.6958054900169373, + "learning_rate": 3.3127340885865666e-06, + "loss": 2.4479, + "step": 18369 + }, + { + "epoch": 1.4825276410297796, + "grad_norm": 0.6672516465187073, + "learning_rate": 3.308705542503232e-06, + "loss": 2.3908, + "step": 18370 + }, + { + "epoch": 1.4826083447663627, + "grad_norm": 0.6914852261543274, + "learning_rate": 3.3046794062197127e-06, + "loss": 2.3808, + "step": 18371 + }, + { + "epoch": 1.4826890485029458, + "grad_norm": 0.661186695098877, + "learning_rate": 3.3006556798363284e-06, + "loss": 2.4348, + "step": 18372 + }, + { + "epoch": 1.4827697522395287, + "grad_norm": 0.6800875067710876, + "learning_rate": 3.296634363453388e-06, + "loss": 2.4277, + "step": 18373 + }, + { + "epoch": 1.4828504559761118, + "grad_norm": 0.7118602395057678, + "learning_rate": 3.292615457171111e-06, + "loss": 2.3708, + "step": 18374 + }, + { + "epoch": 1.4829311597126946, + "grad_norm": 0.8216844797134399, + "learning_rate": 3.2885989610896395e-06, + "loss": 2.4083, + "step": 18375 + }, + { + "epoch": 1.4830118634492777, + "grad_norm": 0.6696308851242065, + "learning_rate": 3.2845848753090935e-06, + "loss": 2.4329, + "step": 18376 + }, + { + "epoch": 1.4830925671858606, + "grad_norm": 0.7084461450576782, + "learning_rate": 3.280573199929515e-06, + "loss": 2.4084, + "step": 18377 + }, + { + "epoch": 1.4831732709224437, + "grad_norm": 0.6815770268440247, + "learning_rate": 3.2765639350508802e-06, + "loss": 2.4622, + "step": 18378 + }, + { + "epoch": 1.4832539746590268, + "grad_norm": 0.7094982862472534, + "learning_rate": 3.2725570807730975e-06, + "loss": 2.4213, + "step": 18379 + }, + { + "epoch": 1.4833346783956096, + "grad_norm": 0.6778813004493713, + "learning_rate": 3.2685526371960538e-06, + "loss": 2.4003, + "step": 18380 + }, + { + "epoch": 1.4834153821321927, + "grad_norm": 0.6944702863693237, + "learning_rate": 3.2645506044195363e-06, + "loss": 2.3931, + "step": 18381 + }, + { + "epoch": 1.4834960858687758, + "grad_norm": 0.7213063836097717, + "learning_rate": 3.2605509825432755e-06, + "loss": 2.465, + "step": 18382 + }, + { + "epoch": 1.4835767896053587, + "grad_norm": 0.6559615731239319, + "learning_rate": 3.2565537716669703e-06, + "loss": 2.4149, + "step": 18383 + }, + { + "epoch": 1.4836574933419417, + "grad_norm": 0.7576823830604553, + "learning_rate": 3.2525589718902515e-06, + "loss": 2.4355, + "step": 18384 + }, + { + "epoch": 1.4837381970785248, + "grad_norm": 0.6799216866493225, + "learning_rate": 3.248566583312629e-06, + "loss": 2.3853, + "step": 18385 + }, + { + "epoch": 1.4838189008151077, + "grad_norm": 0.6761351823806763, + "learning_rate": 3.244576606033656e-06, + "loss": 2.4577, + "step": 18386 + }, + { + "epoch": 1.4838996045516908, + "grad_norm": 0.6876667141914368, + "learning_rate": 3.240589040152764e-06, + "loss": 2.3898, + "step": 18387 + }, + { + "epoch": 1.4839803082882739, + "grad_norm": 0.6555415987968445, + "learning_rate": 3.236603885769307e-06, + "loss": 2.3773, + "step": 18388 + }, + { + "epoch": 1.4840610120248567, + "grad_norm": 0.782966673374176, + "learning_rate": 3.232621142982628e-06, + "loss": 2.3987, + "step": 18389 + }, + { + "epoch": 1.4841417157614398, + "grad_norm": 0.6703657507896423, + "learning_rate": 3.228640811891992e-06, + "loss": 2.3617, + "step": 18390 + }, + { + "epoch": 1.4842224194980227, + "grad_norm": 0.7010387778282166, + "learning_rate": 3.224662892596586e-06, + "loss": 2.396, + "step": 18391 + }, + { + "epoch": 1.4843031232346058, + "grad_norm": 0.7821521162986755, + "learning_rate": 3.2206873851955535e-06, + "loss": 2.4362, + "step": 18392 + }, + { + "epoch": 1.4843838269711886, + "grad_norm": 0.7236925959587097, + "learning_rate": 3.21671428978797e-06, + "loss": 2.4813, + "step": 18393 + }, + { + "epoch": 1.4844645307077717, + "grad_norm": 0.6522866487503052, + "learning_rate": 3.2127436064728788e-06, + "loss": 2.3885, + "step": 18394 + }, + { + "epoch": 1.4845452344443548, + "grad_norm": 0.7148615121841431, + "learning_rate": 3.2087753353492013e-06, + "loss": 2.4546, + "step": 18395 + }, + { + "epoch": 1.4846259381809377, + "grad_norm": 0.6313709020614624, + "learning_rate": 3.2048094765158463e-06, + "loss": 2.3596, + "step": 18396 + }, + { + "epoch": 1.4847066419175208, + "grad_norm": 0.7160886526107788, + "learning_rate": 3.2008460300716914e-06, + "loss": 2.3852, + "step": 18397 + }, + { + "epoch": 1.4847873456541039, + "grad_norm": 0.6922785043716431, + "learning_rate": 3.196884996115479e-06, + "loss": 2.4601, + "step": 18398 + }, + { + "epoch": 1.4848680493906867, + "grad_norm": 0.7803853154182434, + "learning_rate": 3.1929263747459414e-06, + "loss": 2.3588, + "step": 18399 + }, + { + "epoch": 1.4849487531272698, + "grad_norm": 0.7317460179328918, + "learning_rate": 3.1889701660617333e-06, + "loss": 2.4515, + "step": 18400 + }, + { + "epoch": 1.485029456863853, + "grad_norm": 0.6729404330253601, + "learning_rate": 3.1850163701614533e-06, + "loss": 2.4158, + "step": 18401 + }, + { + "epoch": 1.4851101606004358, + "grad_norm": 0.7266910672187805, + "learning_rate": 3.181064987143645e-06, + "loss": 2.4365, + "step": 18402 + }, + { + "epoch": 1.4851908643370189, + "grad_norm": 0.6553283333778381, + "learning_rate": 3.177116017106785e-06, + "loss": 2.4125, + "step": 18403 + }, + { + "epoch": 1.485271568073602, + "grad_norm": 0.7099964618682861, + "learning_rate": 3.1731694601492833e-06, + "loss": 2.3994, + "step": 18404 + }, + { + "epoch": 1.4853522718101848, + "grad_norm": 0.7573987245559692, + "learning_rate": 3.1692253163695173e-06, + "loss": 2.4295, + "step": 18405 + }, + { + "epoch": 1.485432975546768, + "grad_norm": 0.6570815443992615, + "learning_rate": 3.165283585865764e-06, + "loss": 2.4129, + "step": 18406 + }, + { + "epoch": 1.4855136792833508, + "grad_norm": 0.6884456276893616, + "learning_rate": 3.1613442687362772e-06, + "loss": 2.4729, + "step": 18407 + }, + { + "epoch": 1.4855943830199339, + "grad_norm": 0.6423753499984741, + "learning_rate": 3.1574073650792234e-06, + "loss": 2.433, + "step": 18408 + }, + { + "epoch": 1.4856750867565167, + "grad_norm": 0.7291930913925171, + "learning_rate": 3.1534728749927358e-06, + "loss": 2.4329, + "step": 18409 + }, + { + "epoch": 1.4857557904930998, + "grad_norm": 0.6597060561180115, + "learning_rate": 3.149540798574868e-06, + "loss": 2.4525, + "step": 18410 + }, + { + "epoch": 1.485836494229683, + "grad_norm": 0.6662060618400574, + "learning_rate": 3.1456111359235986e-06, + "loss": 2.3624, + "step": 18411 + }, + { + "epoch": 1.4859171979662658, + "grad_norm": 0.710584282875061, + "learning_rate": 3.1416838871368924e-06, + "loss": 2.4229, + "step": 18412 + }, + { + "epoch": 1.4859979017028488, + "grad_norm": 0.7081347107887268, + "learning_rate": 3.1377590523126165e-06, + "loss": 2.3889, + "step": 18413 + }, + { + "epoch": 1.486078605439432, + "grad_norm": 0.6779326796531677, + "learning_rate": 3.1338366315485703e-06, + "loss": 2.4074, + "step": 18414 + }, + { + "epoch": 1.4861593091760148, + "grad_norm": 0.6911298036575317, + "learning_rate": 3.1299166249425305e-06, + "loss": 2.4258, + "step": 18415 + }, + { + "epoch": 1.4862400129125979, + "grad_norm": 0.670421302318573, + "learning_rate": 3.1259990325921973e-06, + "loss": 2.4211, + "step": 18416 + }, + { + "epoch": 1.486320716649181, + "grad_norm": 0.6860554814338684, + "learning_rate": 3.1220838545951925e-06, + "loss": 2.3699, + "step": 18417 + }, + { + "epoch": 1.4864014203857638, + "grad_norm": 0.7171792984008789, + "learning_rate": 3.1181710910490935e-06, + "loss": 2.395, + "step": 18418 + }, + { + "epoch": 1.486482124122347, + "grad_norm": 0.6713120341300964, + "learning_rate": 3.1142607420514446e-06, + "loss": 2.4065, + "step": 18419 + }, + { + "epoch": 1.4865628278589298, + "grad_norm": 0.6774618625640869, + "learning_rate": 3.1103528076996568e-06, + "loss": 2.3923, + "step": 18420 + }, + { + "epoch": 1.4866435315955129, + "grad_norm": 0.6554906368255615, + "learning_rate": 3.1064472880911632e-06, + "loss": 2.4161, + "step": 18421 + }, + { + "epoch": 1.4867242353320957, + "grad_norm": 0.6858103275299072, + "learning_rate": 3.102544183323275e-06, + "loss": 2.4297, + "step": 18422 + }, + { + "epoch": 1.4868049390686788, + "grad_norm": 0.727878212928772, + "learning_rate": 3.0986434934932916e-06, + "loss": 2.3525, + "step": 18423 + }, + { + "epoch": 1.486885642805262, + "grad_norm": 0.6654942035675049, + "learning_rate": 3.094745218698403e-06, + "loss": 2.4297, + "step": 18424 + }, + { + "epoch": 1.4869663465418448, + "grad_norm": 0.658942461013794, + "learning_rate": 3.0908493590357856e-06, + "loss": 2.3723, + "step": 18425 + }, + { + "epoch": 1.4870470502784279, + "grad_norm": 0.6851345896720886, + "learning_rate": 3.0869559146025185e-06, + "loss": 2.4382, + "step": 18426 + }, + { + "epoch": 1.487127754015011, + "grad_norm": 0.6994932889938354, + "learning_rate": 3.0830648854956347e-06, + "loss": 2.3655, + "step": 18427 + }, + { + "epoch": 1.4872084577515938, + "grad_norm": 0.6469771862030029, + "learning_rate": 3.079176271812134e-06, + "loss": 2.4389, + "step": 18428 + }, + { + "epoch": 1.487289161488177, + "grad_norm": 0.7069564461708069, + "learning_rate": 3.0752900736489178e-06, + "loss": 2.3458, + "step": 18429 + }, + { + "epoch": 1.48736986522476, + "grad_norm": 0.7221277952194214, + "learning_rate": 3.0714062911028184e-06, + "loss": 2.4314, + "step": 18430 + }, + { + "epoch": 1.4874505689613429, + "grad_norm": 0.6999499201774597, + "learning_rate": 3.0675249242706593e-06, + "loss": 2.4113, + "step": 18431 + }, + { + "epoch": 1.487531272697926, + "grad_norm": 0.7012192606925964, + "learning_rate": 3.0636459732491628e-06, + "loss": 2.4281, + "step": 18432 + }, + { + "epoch": 1.487611976434509, + "grad_norm": 0.6578752994537354, + "learning_rate": 3.059769438135007e-06, + "loss": 2.391, + "step": 18433 + }, + { + "epoch": 1.487692680171092, + "grad_norm": 0.6541566848754883, + "learning_rate": 3.055895319024782e-06, + "loss": 2.4021, + "step": 18434 + }, + { + "epoch": 1.487773383907675, + "grad_norm": 0.6928902864456177, + "learning_rate": 3.052023616015076e-06, + "loss": 2.3543, + "step": 18435 + }, + { + "epoch": 1.4878540876442579, + "grad_norm": 0.6487705111503601, + "learning_rate": 3.048154329202357e-06, + "loss": 2.4147, + "step": 18436 + }, + { + "epoch": 1.487934791380841, + "grad_norm": 0.6711629629135132, + "learning_rate": 3.0442874586830705e-06, + "loss": 2.4273, + "step": 18437 + }, + { + "epoch": 1.4880154951174238, + "grad_norm": 0.6932334899902344, + "learning_rate": 3.0404230045535942e-06, + "loss": 2.3515, + "step": 18438 + }, + { + "epoch": 1.488096198854007, + "grad_norm": 0.7008633017539978, + "learning_rate": 3.036560966910229e-06, + "loss": 2.4667, + "step": 18439 + }, + { + "epoch": 1.48817690259059, + "grad_norm": 0.6920375823974609, + "learning_rate": 3.0327013458492203e-06, + "loss": 2.3934, + "step": 18440 + }, + { + "epoch": 1.4882576063271729, + "grad_norm": 0.7152913808822632, + "learning_rate": 3.028844141466769e-06, + "loss": 2.4049, + "step": 18441 + }, + { + "epoch": 1.488338310063756, + "grad_norm": 0.7209664583206177, + "learning_rate": 3.0249893538590202e-06, + "loss": 2.3956, + "step": 18442 + }, + { + "epoch": 1.488419013800339, + "grad_norm": 0.7767702341079712, + "learning_rate": 3.0211369831220305e-06, + "loss": 2.449, + "step": 18443 + }, + { + "epoch": 1.488499717536922, + "grad_norm": 0.7306828498840332, + "learning_rate": 3.017287029351801e-06, + "loss": 2.4244, + "step": 18444 + }, + { + "epoch": 1.488580421273505, + "grad_norm": 0.7171465158462524, + "learning_rate": 3.01343949264431e-06, + "loss": 2.4145, + "step": 18445 + }, + { + "epoch": 1.488661125010088, + "grad_norm": 0.6547496914863586, + "learning_rate": 3.0095943730954146e-06, + "loss": 2.3829, + "step": 18446 + }, + { + "epoch": 1.488741828746671, + "grad_norm": 0.68947833776474, + "learning_rate": 3.00575167080096e-06, + "loss": 2.3469, + "step": 18447 + }, + { + "epoch": 1.488822532483254, + "grad_norm": 0.6359937191009521, + "learning_rate": 3.001911385856737e-06, + "loss": 2.419, + "step": 18448 + }, + { + "epoch": 1.4889032362198371, + "grad_norm": 0.7035027146339417, + "learning_rate": 2.998073518358424e-06, + "loss": 2.4082, + "step": 18449 + }, + { + "epoch": 1.48898393995642, + "grad_norm": 0.7352398037910461, + "learning_rate": 2.994238068401689e-06, + "loss": 2.3677, + "step": 18450 + }, + { + "epoch": 1.489064643693003, + "grad_norm": 0.6598670482635498, + "learning_rate": 2.9904050360821222e-06, + "loss": 2.3775, + "step": 18451 + }, + { + "epoch": 1.489145347429586, + "grad_norm": 0.698826014995575, + "learning_rate": 2.9865744214952472e-06, + "loss": 2.4086, + "step": 18452 + }, + { + "epoch": 1.489226051166169, + "grad_norm": 0.6918448209762573, + "learning_rate": 2.982746224736521e-06, + "loss": 2.4418, + "step": 18453 + }, + { + "epoch": 1.4893067549027519, + "grad_norm": 0.7679443359375, + "learning_rate": 2.9789204459013785e-06, + "loss": 2.4279, + "step": 18454 + }, + { + "epoch": 1.489387458639335, + "grad_norm": 0.6985172033309937, + "learning_rate": 2.9750970850851544e-06, + "loss": 2.3943, + "step": 18455 + }, + { + "epoch": 1.489468162375918, + "grad_norm": 0.705737829208374, + "learning_rate": 2.971276142383128e-06, + "loss": 2.3632, + "step": 18456 + }, + { + "epoch": 1.489548866112501, + "grad_norm": 0.68868488073349, + "learning_rate": 2.9674576178905343e-06, + "loss": 2.4607, + "step": 18457 + }, + { + "epoch": 1.489629569849084, + "grad_norm": 0.6910532712936401, + "learning_rate": 2.9636415117025416e-06, + "loss": 2.3732, + "step": 18458 + }, + { + "epoch": 1.489710273585667, + "grad_norm": 0.6957756280899048, + "learning_rate": 2.959827823914263e-06, + "loss": 2.3696, + "step": 18459 + }, + { + "epoch": 1.48979097732225, + "grad_norm": 0.698004961013794, + "learning_rate": 2.956016554620744e-06, + "loss": 2.3999, + "step": 18460 + }, + { + "epoch": 1.489871681058833, + "grad_norm": 0.6441684365272522, + "learning_rate": 2.952207703916965e-06, + "loss": 2.3946, + "step": 18461 + }, + { + "epoch": 1.4899523847954161, + "grad_norm": 0.68703693151474, + "learning_rate": 2.9484012718978605e-06, + "loss": 2.4102, + "step": 18462 + }, + { + "epoch": 1.490033088531999, + "grad_norm": 0.6793025732040405, + "learning_rate": 2.944597258658277e-06, + "loss": 2.4356, + "step": 18463 + }, + { + "epoch": 1.490113792268582, + "grad_norm": 0.6771492958068848, + "learning_rate": 2.9407956642930613e-06, + "loss": 2.3779, + "step": 18464 + }, + { + "epoch": 1.490194496005165, + "grad_norm": 0.8017939925193787, + "learning_rate": 2.9369964888969147e-06, + "loss": 2.4128, + "step": 18465 + }, + { + "epoch": 1.490275199741748, + "grad_norm": 0.7499281764030457, + "learning_rate": 2.93319973256454e-06, + "loss": 2.4646, + "step": 18466 + }, + { + "epoch": 1.490355903478331, + "grad_norm": 0.7264615297317505, + "learning_rate": 2.929405395390561e-06, + "loss": 2.42, + "step": 18467 + }, + { + "epoch": 1.490436607214914, + "grad_norm": 0.6842880845069885, + "learning_rate": 2.9256134774695464e-06, + "loss": 2.3864, + "step": 18468 + }, + { + "epoch": 1.490517310951497, + "grad_norm": 0.7287806272506714, + "learning_rate": 2.9218239788959987e-06, + "loss": 2.4208, + "step": 18469 + }, + { + "epoch": 1.49059801468808, + "grad_norm": 0.683708906173706, + "learning_rate": 2.9180368997643646e-06, + "loss": 2.379, + "step": 18470 + }, + { + "epoch": 1.490678718424663, + "grad_norm": 0.7012128233909607, + "learning_rate": 2.9142522401690353e-06, + "loss": 2.4046, + "step": 18471 + }, + { + "epoch": 1.4907594221612461, + "grad_norm": 0.7036008834838867, + "learning_rate": 2.9104700002043128e-06, + "loss": 2.4177, + "step": 18472 + }, + { + "epoch": 1.490840125897829, + "grad_norm": 0.6707095503807068, + "learning_rate": 2.9066901799644776e-06, + "loss": 2.4333, + "step": 18473 + }, + { + "epoch": 1.490920829634412, + "grad_norm": 0.6534161567687988, + "learning_rate": 2.9029127795437317e-06, + "loss": 2.4293, + "step": 18474 + }, + { + "epoch": 1.4910015333709952, + "grad_norm": 0.7266476154327393, + "learning_rate": 2.8991377990362e-06, + "loss": 2.4023, + "step": 18475 + }, + { + "epoch": 1.491082237107578, + "grad_norm": 0.68699049949646, + "learning_rate": 2.8953652385359852e-06, + "loss": 2.4531, + "step": 18476 + }, + { + "epoch": 1.4911629408441611, + "grad_norm": 0.710686206817627, + "learning_rate": 2.891595098137101e-06, + "loss": 2.3729, + "step": 18477 + }, + { + "epoch": 1.4912436445807442, + "grad_norm": 0.7585535049438477, + "learning_rate": 2.8878273779335165e-06, + "loss": 2.4254, + "step": 18478 + }, + { + "epoch": 1.491324348317327, + "grad_norm": 0.7347260117530823, + "learning_rate": 2.884062078019123e-06, + "loss": 2.3753, + "step": 18479 + }, + { + "epoch": 1.4914050520539102, + "grad_norm": 0.662326455116272, + "learning_rate": 2.880299198487779e-06, + "loss": 2.4229, + "step": 18480 + }, + { + "epoch": 1.491485755790493, + "grad_norm": 0.7223392128944397, + "learning_rate": 2.8765387394332323e-06, + "loss": 2.4101, + "step": 18481 + }, + { + "epoch": 1.4915664595270761, + "grad_norm": 0.6733242869377136, + "learning_rate": 2.8727807009492293e-06, + "loss": 2.4009, + "step": 18482 + }, + { + "epoch": 1.491647163263659, + "grad_norm": 0.6901989579200745, + "learning_rate": 2.8690250831294398e-06, + "loss": 2.3742, + "step": 18483 + }, + { + "epoch": 1.491727867000242, + "grad_norm": 0.734670877456665, + "learning_rate": 2.8652718860674333e-06, + "loss": 2.374, + "step": 18484 + }, + { + "epoch": 1.4918085707368252, + "grad_norm": 0.6870261430740356, + "learning_rate": 2.8615211098567686e-06, + "loss": 2.4386, + "step": 18485 + }, + { + "epoch": 1.491889274473408, + "grad_norm": 0.7317399382591248, + "learning_rate": 2.8577727545909148e-06, + "loss": 2.3601, + "step": 18486 + }, + { + "epoch": 1.491969978209991, + "grad_norm": 0.7105548977851868, + "learning_rate": 2.854026820363298e-06, + "loss": 2.4112, + "step": 18487 + }, + { + "epoch": 1.4920506819465742, + "grad_norm": 0.7378930449485779, + "learning_rate": 2.8502833072672763e-06, + "loss": 2.4487, + "step": 18488 + }, + { + "epoch": 1.492131385683157, + "grad_norm": 0.69692462682724, + "learning_rate": 2.8465422153961418e-06, + "loss": 2.4672, + "step": 18489 + }, + { + "epoch": 1.4922120894197402, + "grad_norm": 0.6905173063278198, + "learning_rate": 2.8428035448431534e-06, + "loss": 2.3586, + "step": 18490 + }, + { + "epoch": 1.4922927931563232, + "grad_norm": 0.6969714760780334, + "learning_rate": 2.8390672957014586e-06, + "loss": 2.4488, + "step": 18491 + }, + { + "epoch": 1.492373496892906, + "grad_norm": 0.6935562491416931, + "learning_rate": 2.835333468064183e-06, + "loss": 2.4342, + "step": 18492 + }, + { + "epoch": 1.4924542006294892, + "grad_norm": 0.7018017768859863, + "learning_rate": 2.831602062024408e-06, + "loss": 2.3931, + "step": 18493 + }, + { + "epoch": 1.4925349043660723, + "grad_norm": 0.7257668375968933, + "learning_rate": 2.8278730776750917e-06, + "loss": 2.4752, + "step": 18494 + }, + { + "epoch": 1.4926156081026551, + "grad_norm": 0.7172815799713135, + "learning_rate": 2.824146515109194e-06, + "loss": 2.4264, + "step": 18495 + }, + { + "epoch": 1.4926963118392382, + "grad_norm": 0.6975371241569519, + "learning_rate": 2.8204223744195958e-06, + "loss": 2.4833, + "step": 18496 + }, + { + "epoch": 1.492777015575821, + "grad_norm": 0.741058886051178, + "learning_rate": 2.8167006556990893e-06, + "loss": 2.4557, + "step": 18497 + }, + { + "epoch": 1.4928577193124042, + "grad_norm": 0.7467125654220581, + "learning_rate": 2.8129813590404342e-06, + "loss": 2.4586, + "step": 18498 + }, + { + "epoch": 1.492938423048987, + "grad_norm": 0.7192440032958984, + "learning_rate": 2.809264484536356e-06, + "loss": 2.3789, + "step": 18499 + }, + { + "epoch": 1.4930191267855701, + "grad_norm": 0.7029628753662109, + "learning_rate": 2.805550032279458e-06, + "loss": 2.3833, + "step": 18500 + }, + { + "epoch": 1.4930998305221532, + "grad_norm": 0.8207079172134399, + "learning_rate": 2.8018380023623116e-06, + "loss": 2.4767, + "step": 18501 + }, + { + "epoch": 1.493180534258736, + "grad_norm": 0.6775376796722412, + "learning_rate": 2.7981283948774527e-06, + "loss": 2.4166, + "step": 18502 + }, + { + "epoch": 1.4932612379953192, + "grad_norm": 0.7079663276672363, + "learning_rate": 2.7944212099173194e-06, + "loss": 2.4247, + "step": 18503 + }, + { + "epoch": 1.4933419417319023, + "grad_norm": 0.7320355772972107, + "learning_rate": 2.7907164475743043e-06, + "loss": 2.4352, + "step": 18504 + }, + { + "epoch": 1.4934226454684851, + "grad_norm": 0.6638190150260925, + "learning_rate": 2.7870141079407442e-06, + "loss": 2.4045, + "step": 18505 + }, + { + "epoch": 1.4935033492050682, + "grad_norm": 0.6977740526199341, + "learning_rate": 2.7833141911089213e-06, + "loss": 2.3973, + "step": 18506 + }, + { + "epoch": 1.4935840529416513, + "grad_norm": 0.6586610078811646, + "learning_rate": 2.7796166971710167e-06, + "loss": 2.4308, + "step": 18507 + }, + { + "epoch": 1.4936647566782342, + "grad_norm": 0.6625449657440186, + "learning_rate": 2.7759216262192133e-06, + "loss": 2.4498, + "step": 18508 + }, + { + "epoch": 1.4937454604148173, + "grad_norm": 0.760132908821106, + "learning_rate": 2.772228978345581e-06, + "loss": 2.4554, + "step": 18509 + }, + { + "epoch": 1.4938261641514003, + "grad_norm": 0.7072888612747192, + "learning_rate": 2.7685387536421582e-06, + "loss": 2.3822, + "step": 18510 + }, + { + "epoch": 1.4939068678879832, + "grad_norm": 0.7946352362632751, + "learning_rate": 2.764850952200915e-06, + "loss": 2.3972, + "step": 18511 + }, + { + "epoch": 1.493987571624566, + "grad_norm": 0.6885955929756165, + "learning_rate": 2.7611655741137775e-06, + "loss": 2.4101, + "step": 18512 + }, + { + "epoch": 1.4940682753611492, + "grad_norm": 0.7515766620635986, + "learning_rate": 2.7574826194725622e-06, + "loss": 2.4282, + "step": 18513 + }, + { + "epoch": 1.4941489790977323, + "grad_norm": 0.6854525804519653, + "learning_rate": 2.7538020883690727e-06, + "loss": 2.3898, + "step": 18514 + }, + { + "epoch": 1.4942296828343151, + "grad_norm": 0.6416916251182556, + "learning_rate": 2.7501239808950473e-06, + "loss": 2.3419, + "step": 18515 + }, + { + "epoch": 1.4943103865708982, + "grad_norm": 0.6626073122024536, + "learning_rate": 2.746448297142157e-06, + "loss": 2.4021, + "step": 18516 + }, + { + "epoch": 1.4943910903074813, + "grad_norm": 0.6947335004806519, + "learning_rate": 2.7427750372019833e-06, + "loss": 2.4233, + "step": 18517 + }, + { + "epoch": 1.4944717940440642, + "grad_norm": 0.7005210518836975, + "learning_rate": 2.739104201166087e-06, + "loss": 2.3649, + "step": 18518 + }, + { + "epoch": 1.4945524977806472, + "grad_norm": 0.7207785248756409, + "learning_rate": 2.735435789125962e-06, + "loss": 2.4612, + "step": 18519 + }, + { + "epoch": 1.4946332015172303, + "grad_norm": 0.6695407629013062, + "learning_rate": 2.731769801173023e-06, + "loss": 2.4302, + "step": 18520 + }, + { + "epoch": 1.4947139052538132, + "grad_norm": 0.6625963449478149, + "learning_rate": 2.728106237398642e-06, + "loss": 2.4016, + "step": 18521 + }, + { + "epoch": 1.4947946089903963, + "grad_norm": 0.6939513087272644, + "learning_rate": 2.724445097894135e-06, + "loss": 2.3906, + "step": 18522 + }, + { + "epoch": 1.4948753127269794, + "grad_norm": 0.6693980097770691, + "learning_rate": 2.7207863827507395e-06, + "loss": 2.3769, + "step": 18523 + }, + { + "epoch": 1.4949560164635622, + "grad_norm": 0.7011690735816956, + "learning_rate": 2.717130092059628e-06, + "loss": 2.3497, + "step": 18524 + }, + { + "epoch": 1.4950367202001453, + "grad_norm": 0.7054407596588135, + "learning_rate": 2.7134762259119373e-06, + "loss": 2.4087, + "step": 18525 + }, + { + "epoch": 1.4951174239367282, + "grad_norm": 0.7248849272727966, + "learning_rate": 2.709824784398729e-06, + "loss": 2.4658, + "step": 18526 + }, + { + "epoch": 1.4951981276733113, + "grad_norm": 0.6783565282821655, + "learning_rate": 2.706175767611008e-06, + "loss": 2.4486, + "step": 18527 + }, + { + "epoch": 1.4952788314098941, + "grad_norm": 0.7590169310569763, + "learning_rate": 2.702529175639712e-06, + "loss": 2.415, + "step": 18528 + }, + { + "epoch": 1.4953595351464772, + "grad_norm": 0.6909342408180237, + "learning_rate": 2.6988850085757244e-06, + "loss": 2.4161, + "step": 18529 + }, + { + "epoch": 1.4954402388830603, + "grad_norm": 0.7009775638580322, + "learning_rate": 2.6952432665098724e-06, + "loss": 2.4345, + "step": 18530 + }, + { + "epoch": 1.4955209426196432, + "grad_norm": 0.6565183997154236, + "learning_rate": 2.691603949532917e-06, + "loss": 2.4248, + "step": 18531 + }, + { + "epoch": 1.4956016463562263, + "grad_norm": 0.6656069755554199, + "learning_rate": 2.687967057735563e-06, + "loss": 2.3897, + "step": 18532 + }, + { + "epoch": 1.4956823500928094, + "grad_norm": 0.6860701441764832, + "learning_rate": 2.6843325912084383e-06, + "loss": 2.435, + "step": 18533 + }, + { + "epoch": 1.4957630538293922, + "grad_norm": 0.7380251288414001, + "learning_rate": 2.6807005500421256e-06, + "loss": 2.4544, + "step": 18534 + }, + { + "epoch": 1.4958437575659753, + "grad_norm": 0.7232703566551208, + "learning_rate": 2.677070934327175e-06, + "loss": 2.4701, + "step": 18535 + }, + { + "epoch": 1.4959244613025584, + "grad_norm": 0.6819149851799011, + "learning_rate": 2.673443744154003e-06, + "loss": 2.3664, + "step": 18536 + }, + { + "epoch": 1.4960051650391413, + "grad_norm": 0.7755081057548523, + "learning_rate": 2.669818979613026e-06, + "loss": 2.4371, + "step": 18537 + }, + { + "epoch": 1.4960858687757244, + "grad_norm": 0.7655733823776245, + "learning_rate": 2.6661966407945826e-06, + "loss": 2.4068, + "step": 18538 + }, + { + "epoch": 1.4961665725123074, + "grad_norm": 0.711729884147644, + "learning_rate": 2.6625767277889567e-06, + "loss": 2.4384, + "step": 18539 + }, + { + "epoch": 1.4962472762488903, + "grad_norm": 0.7411779761314392, + "learning_rate": 2.658959240686354e-06, + "loss": 2.3928, + "step": 18540 + }, + { + "epoch": 1.4963279799854734, + "grad_norm": 0.7470163106918335, + "learning_rate": 2.6553441795769574e-06, + "loss": 2.4121, + "step": 18541 + }, + { + "epoch": 1.4964086837220563, + "grad_norm": 0.6805182695388794, + "learning_rate": 2.6517315445508285e-06, + "loss": 2.4439, + "step": 18542 + }, + { + "epoch": 1.4964893874586394, + "grad_norm": 0.6465758085250854, + "learning_rate": 2.6481213356980285e-06, + "loss": 2.3996, + "step": 18543 + }, + { + "epoch": 1.4965700911952222, + "grad_norm": 0.7103277444839478, + "learning_rate": 2.6445135531085297e-06, + "loss": 2.4107, + "step": 18544 + }, + { + "epoch": 1.4966507949318053, + "grad_norm": 0.7064812779426575, + "learning_rate": 2.640908196872227e-06, + "loss": 2.437, + "step": 18545 + }, + { + "epoch": 1.4967314986683884, + "grad_norm": 0.7219479084014893, + "learning_rate": 2.6373052670790043e-06, + "loss": 2.3647, + "step": 18546 + }, + { + "epoch": 1.4968122024049713, + "grad_norm": 0.655364453792572, + "learning_rate": 2.633704763818634e-06, + "loss": 2.4055, + "step": 18547 + }, + { + "epoch": 1.4968929061415543, + "grad_norm": 0.7051714658737183, + "learning_rate": 2.6301066871808668e-06, + "loss": 2.4221, + "step": 18548 + }, + { + "epoch": 1.4969736098781374, + "grad_norm": 0.6792117953300476, + "learning_rate": 2.626511037255364e-06, + "loss": 2.4437, + "step": 18549 + }, + { + "epoch": 1.4970543136147203, + "grad_norm": 0.7968631982803345, + "learning_rate": 2.6229178141317314e-06, + "loss": 2.3948, + "step": 18550 + }, + { + "epoch": 1.4971350173513034, + "grad_norm": 0.8141141533851624, + "learning_rate": 2.6193270178995644e-06, + "loss": 2.4079, + "step": 18551 + }, + { + "epoch": 1.4972157210878865, + "grad_norm": 0.7343787550926208, + "learning_rate": 2.6157386486483027e-06, + "loss": 2.3716, + "step": 18552 + }, + { + "epoch": 1.4972964248244693, + "grad_norm": 0.7314772009849548, + "learning_rate": 2.612152706467397e-06, + "loss": 2.4201, + "step": 18553 + }, + { + "epoch": 1.4973771285610524, + "grad_norm": 0.6845466494560242, + "learning_rate": 2.6085691914462306e-06, + "loss": 2.4698, + "step": 18554 + }, + { + "epoch": 1.4974578322976355, + "grad_norm": 0.7247948050498962, + "learning_rate": 2.6049881036741e-06, + "loss": 2.4039, + "step": 18555 + }, + { + "epoch": 1.4975385360342184, + "grad_norm": 0.6975938081741333, + "learning_rate": 2.601409443240255e-06, + "loss": 2.4121, + "step": 18556 + }, + { + "epoch": 1.4976192397708015, + "grad_norm": 0.7096135020256042, + "learning_rate": 2.597833210233891e-06, + "loss": 2.3661, + "step": 18557 + }, + { + "epoch": 1.4976999435073843, + "grad_norm": 0.7084534168243408, + "learning_rate": 2.594259404744137e-06, + "loss": 2.4388, + "step": 18558 + }, + { + "epoch": 1.4977806472439674, + "grad_norm": 0.7675961852073669, + "learning_rate": 2.5906880268600442e-06, + "loss": 2.4495, + "step": 18559 + }, + { + "epoch": 1.4978613509805503, + "grad_norm": 0.6656114459037781, + "learning_rate": 2.5871190766706632e-06, + "loss": 2.3662, + "step": 18560 + }, + { + "epoch": 1.4979420547171334, + "grad_norm": 0.7376806139945984, + "learning_rate": 2.583552554264901e-06, + "loss": 2.4522, + "step": 18561 + }, + { + "epoch": 1.4980227584537165, + "grad_norm": 0.6656897664070129, + "learning_rate": 2.5799884597316527e-06, + "loss": 2.3719, + "step": 18562 + }, + { + "epoch": 1.4981034621902993, + "grad_norm": 0.686014711856842, + "learning_rate": 2.5764267931597586e-06, + "loss": 2.3807, + "step": 18563 + }, + { + "epoch": 1.4981841659268824, + "grad_norm": 0.739297091960907, + "learning_rate": 2.572867554637981e-06, + "loss": 2.4135, + "step": 18564 + }, + { + "epoch": 1.4982648696634655, + "grad_norm": 0.6836863160133362, + "learning_rate": 2.569310744255016e-06, + "loss": 2.4243, + "step": 18565 + }, + { + "epoch": 1.4983455734000484, + "grad_norm": 0.6839776039123535, + "learning_rate": 2.565756362099503e-06, + "loss": 2.3698, + "step": 18566 + }, + { + "epoch": 1.4984262771366315, + "grad_norm": 0.717965841293335, + "learning_rate": 2.5622044082600604e-06, + "loss": 2.4255, + "step": 18567 + }, + { + "epoch": 1.4985069808732145, + "grad_norm": 0.7073249816894531, + "learning_rate": 2.5586548828251733e-06, + "loss": 2.3958, + "step": 18568 + }, + { + "epoch": 1.4985876846097974, + "grad_norm": 0.6807124018669128, + "learning_rate": 2.555107785883315e-06, + "loss": 2.3746, + "step": 18569 + }, + { + "epoch": 1.4986683883463805, + "grad_norm": 0.6823258996009827, + "learning_rate": 2.5515631175229037e-06, + "loss": 2.4117, + "step": 18570 + }, + { + "epoch": 1.4987490920829634, + "grad_norm": 0.6415054202079773, + "learning_rate": 2.548020877832269e-06, + "loss": 2.3362, + "step": 18571 + }, + { + "epoch": 1.4988297958195465, + "grad_norm": 0.6377396583557129, + "learning_rate": 2.5444810668996956e-06, + "loss": 2.3808, + "step": 18572 + }, + { + "epoch": 1.4989104995561293, + "grad_norm": 0.6864121556282043, + "learning_rate": 2.5409436848134127e-06, + "loss": 2.4115, + "step": 18573 + }, + { + "epoch": 1.4989912032927124, + "grad_norm": 0.6817963719367981, + "learning_rate": 2.5374087316615726e-06, + "loss": 2.4278, + "step": 18574 + }, + { + "epoch": 1.4990719070292955, + "grad_norm": 0.7278866171836853, + "learning_rate": 2.533876207532271e-06, + "loss": 2.3838, + "step": 18575 + }, + { + "epoch": 1.4991526107658784, + "grad_norm": 0.6872361898422241, + "learning_rate": 2.5303461125135596e-06, + "loss": 2.3583, + "step": 18576 + }, + { + "epoch": 1.4992333145024614, + "grad_norm": 0.7112752795219421, + "learning_rate": 2.526818446693402e-06, + "loss": 2.3556, + "step": 18577 + }, + { + "epoch": 1.4993140182390445, + "grad_norm": 0.6485861539840698, + "learning_rate": 2.5232932101597273e-06, + "loss": 2.4051, + "step": 18578 + }, + { + "epoch": 1.4993947219756274, + "grad_norm": 0.796795129776001, + "learning_rate": 2.519770403000399e-06, + "loss": 2.4487, + "step": 18579 + }, + { + "epoch": 1.4994754257122105, + "grad_norm": 0.6965582370758057, + "learning_rate": 2.5162500253032016e-06, + "loss": 2.4096, + "step": 18580 + }, + { + "epoch": 1.4995561294487936, + "grad_norm": 0.6711980104446411, + "learning_rate": 2.5127320771558772e-06, + "loss": 2.3684, + "step": 18581 + }, + { + "epoch": 1.4996368331853764, + "grad_norm": 0.6734749674797058, + "learning_rate": 2.50921655864611e-06, + "loss": 2.4111, + "step": 18582 + }, + { + "epoch": 1.4997175369219595, + "grad_norm": 0.6705273389816284, + "learning_rate": 2.505703469861509e-06, + "loss": 2.4486, + "step": 18583 + }, + { + "epoch": 1.4997982406585426, + "grad_norm": 0.6863572597503662, + "learning_rate": 2.5021928108896365e-06, + "loss": 2.3861, + "step": 18584 + }, + { + "epoch": 1.4998789443951255, + "grad_norm": 0.7196049094200134, + "learning_rate": 2.498684581817967e-06, + "loss": 2.4723, + "step": 18585 + }, + { + "epoch": 1.4999596481317086, + "grad_norm": 0.6990470290184021, + "learning_rate": 2.4951787827339644e-06, + "loss": 2.4122, + "step": 18586 + }, + { + "epoch": 1.5000403518682917, + "grad_norm": 0.6765878796577454, + "learning_rate": 2.49167541372497e-06, + "loss": 2.4416, + "step": 18587 + }, + { + "epoch": 1.5001210556048745, + "grad_norm": 0.695720911026001, + "learning_rate": 2.488174474878324e-06, + "loss": 2.4378, + "step": 18588 + }, + { + "epoch": 1.5002017593414574, + "grad_norm": 0.6874660849571228, + "learning_rate": 2.484675966281269e-06, + "loss": 2.4061, + "step": 18589 + }, + { + "epoch": 1.5002824630780405, + "grad_norm": 0.7196346521377563, + "learning_rate": 2.4811798880209903e-06, + "loss": 2.4147, + "step": 18590 + }, + { + "epoch": 1.5003631668146236, + "grad_norm": 0.7235828042030334, + "learning_rate": 2.477686240184629e-06, + "loss": 2.3971, + "step": 18591 + }, + { + "epoch": 1.5004438705512064, + "grad_norm": 0.690998911857605, + "learning_rate": 2.47419502285926e-06, + "loss": 2.4617, + "step": 18592 + }, + { + "epoch": 1.5005245742877895, + "grad_norm": 0.704179048538208, + "learning_rate": 2.47070623613187e-06, + "loss": 2.3694, + "step": 18593 + }, + { + "epoch": 1.5006052780243726, + "grad_norm": 0.6459659934043884, + "learning_rate": 2.467219880089433e-06, + "loss": 2.3735, + "step": 18594 + }, + { + "epoch": 1.5006859817609555, + "grad_norm": 0.6891184449195862, + "learning_rate": 2.463735954818824e-06, + "loss": 2.4479, + "step": 18595 + }, + { + "epoch": 1.5007666854975386, + "grad_norm": 0.7227807641029358, + "learning_rate": 2.460254460406897e-06, + "loss": 2.3642, + "step": 18596 + }, + { + "epoch": 1.5008473892341216, + "grad_norm": 0.7072375416755676, + "learning_rate": 2.4567753969403807e-06, + "loss": 2.385, + "step": 18597 + }, + { + "epoch": 1.5009280929707045, + "grad_norm": 0.7210230231285095, + "learning_rate": 2.453298764506007e-06, + "loss": 2.4116, + "step": 18598 + }, + { + "epoch": 1.5010087967072876, + "grad_norm": 0.7932078242301941, + "learning_rate": 2.449824563190417e-06, + "loss": 2.4631, + "step": 18599 + }, + { + "epoch": 1.5010895004438707, + "grad_norm": 0.6900286078453064, + "learning_rate": 2.4463527930801977e-06, + "loss": 2.4342, + "step": 18600 + }, + { + "epoch": 1.5011702041804535, + "grad_norm": 0.6741199493408203, + "learning_rate": 2.4428834542618796e-06, + "loss": 2.4389, + "step": 18601 + }, + { + "epoch": 1.5012509079170364, + "grad_norm": 0.6513713002204895, + "learning_rate": 2.4394165468219264e-06, + "loss": 2.3851, + "step": 18602 + }, + { + "epoch": 1.5013316116536197, + "grad_norm": 0.7287545204162598, + "learning_rate": 2.4359520708467255e-06, + "loss": 2.4199, + "step": 18603 + }, + { + "epoch": 1.5014123153902026, + "grad_norm": 0.6606385111808777, + "learning_rate": 2.4324900264226403e-06, + "loss": 2.4127, + "step": 18604 + }, + { + "epoch": 1.5014930191267855, + "grad_norm": 0.6798221468925476, + "learning_rate": 2.4290304136359575e-06, + "loss": 2.429, + "step": 18605 + }, + { + "epoch": 1.5015737228633685, + "grad_norm": 0.6801900863647461, + "learning_rate": 2.425573232572875e-06, + "loss": 2.4403, + "step": 18606 + }, + { + "epoch": 1.5016544265999516, + "grad_norm": 0.6709669232368469, + "learning_rate": 2.422118483319569e-06, + "loss": 2.4102, + "step": 18607 + }, + { + "epoch": 1.5017351303365345, + "grad_norm": 0.6942405700683594, + "learning_rate": 2.418666165962158e-06, + "loss": 2.3717, + "step": 18608 + }, + { + "epoch": 1.5018158340731176, + "grad_norm": 0.7532398700714111, + "learning_rate": 2.415216280586652e-06, + "loss": 2.3848, + "step": 18609 + }, + { + "epoch": 1.5018965378097007, + "grad_norm": 0.7056287527084351, + "learning_rate": 2.4117688272790373e-06, + "loss": 2.4101, + "step": 18610 + }, + { + "epoch": 1.5019772415462835, + "grad_norm": 0.7303447723388672, + "learning_rate": 2.4083238061252567e-06, + "loss": 2.4206, + "step": 18611 + }, + { + "epoch": 1.5020579452828666, + "grad_norm": 0.7364635467529297, + "learning_rate": 2.404881217211152e-06, + "loss": 2.4063, + "step": 18612 + }, + { + "epoch": 1.5021386490194497, + "grad_norm": 0.6893425583839417, + "learning_rate": 2.4014410606225225e-06, + "loss": 2.4183, + "step": 18613 + }, + { + "epoch": 1.5022193527560326, + "grad_norm": 0.6890718936920166, + "learning_rate": 2.3980033364451094e-06, + "loss": 2.4023, + "step": 18614 + }, + { + "epoch": 1.5023000564926154, + "grad_norm": 0.6982435584068298, + "learning_rate": 2.394568044764589e-06, + "loss": 2.3832, + "step": 18615 + }, + { + "epoch": 1.5023807602291988, + "grad_norm": 0.7023438811302185, + "learning_rate": 2.391135185666571e-06, + "loss": 2.401, + "step": 18616 + }, + { + "epoch": 1.5024614639657816, + "grad_norm": 0.7713298201560974, + "learning_rate": 2.3877047592366195e-06, + "loss": 2.3814, + "step": 18617 + }, + { + "epoch": 1.5025421677023645, + "grad_norm": 0.6758377552032471, + "learning_rate": 2.384276765560234e-06, + "loss": 2.3654, + "step": 18618 + }, + { + "epoch": 1.5026228714389476, + "grad_norm": 0.7223884463310242, + "learning_rate": 2.3808512047228227e-06, + "loss": 2.4036, + "step": 18619 + }, + { + "epoch": 1.5027035751755307, + "grad_norm": 0.6677948832511902, + "learning_rate": 2.3774280768097843e-06, + "loss": 2.454, + "step": 18620 + }, + { + "epoch": 1.5027842789121135, + "grad_norm": 0.6792545914649963, + "learning_rate": 2.374007381906429e-06, + "loss": 2.4515, + "step": 18621 + }, + { + "epoch": 1.5028649826486966, + "grad_norm": 0.6737624406814575, + "learning_rate": 2.3705891200980103e-06, + "loss": 2.3978, + "step": 18622 + }, + { + "epoch": 1.5029456863852797, + "grad_norm": 0.6470539569854736, + "learning_rate": 2.367173291469704e-06, + "loss": 2.4051, + "step": 18623 + }, + { + "epoch": 1.5030263901218626, + "grad_norm": 0.6720410585403442, + "learning_rate": 2.3637598961066655e-06, + "loss": 2.3405, + "step": 18624 + }, + { + "epoch": 1.5031070938584457, + "grad_norm": 0.6465243101119995, + "learning_rate": 2.3603489340939588e-06, + "loss": 2.3998, + "step": 18625 + }, + { + "epoch": 1.5031877975950287, + "grad_norm": 0.7025001645088196, + "learning_rate": 2.3569404055165836e-06, + "loss": 2.4181, + "step": 18626 + }, + { + "epoch": 1.5032685013316116, + "grad_norm": 0.72223961353302, + "learning_rate": 2.353534310459493e-06, + "loss": 2.3888, + "step": 18627 + }, + { + "epoch": 1.5033492050681947, + "grad_norm": 0.7461752891540527, + "learning_rate": 2.350130649007587e-06, + "loss": 2.3983, + "step": 18628 + }, + { + "epoch": 1.5034299088047778, + "grad_norm": 0.7365756034851074, + "learning_rate": 2.346729421245675e-06, + "loss": 2.4019, + "step": 18629 + }, + { + "epoch": 1.5035106125413606, + "grad_norm": 0.6703508496284485, + "learning_rate": 2.343330627258533e-06, + "loss": 2.3518, + "step": 18630 + }, + { + "epoch": 1.5035913162779435, + "grad_norm": 0.7155243158340454, + "learning_rate": 2.3399342671308722e-06, + "loss": 2.4097, + "step": 18631 + }, + { + "epoch": 1.5036720200145268, + "grad_norm": 0.7172690629959106, + "learning_rate": 2.336540340947324e-06, + "loss": 2.4041, + "step": 18632 + }, + { + "epoch": 1.5037527237511097, + "grad_norm": 0.7039667367935181, + "learning_rate": 2.333148848792499e-06, + "loss": 2.3767, + "step": 18633 + }, + { + "epoch": 1.5038334274876926, + "grad_norm": 0.6833097338676453, + "learning_rate": 2.329759790750907e-06, + "loss": 2.4188, + "step": 18634 + }, + { + "epoch": 1.5039141312242756, + "grad_norm": 0.6812809109687805, + "learning_rate": 2.3263731669070145e-06, + "loss": 2.443, + "step": 18635 + }, + { + "epoch": 1.5039948349608587, + "grad_norm": 0.6669073104858398, + "learning_rate": 2.3229889773452195e-06, + "loss": 2.4097, + "step": 18636 + }, + { + "epoch": 1.5040755386974416, + "grad_norm": 0.6794682145118713, + "learning_rate": 2.3196072221498778e-06, + "loss": 2.4558, + "step": 18637 + }, + { + "epoch": 1.5041562424340247, + "grad_norm": 0.6677505970001221, + "learning_rate": 2.3162279014052547e-06, + "loss": 2.4204, + "step": 18638 + }, + { + "epoch": 1.5042369461706078, + "grad_norm": 0.6727068424224854, + "learning_rate": 2.312851015195583e-06, + "loss": 2.3996, + "step": 18639 + }, + { + "epoch": 1.5043176499071906, + "grad_norm": 0.6639944911003113, + "learning_rate": 2.3094765636050177e-06, + "loss": 2.384, + "step": 18640 + }, + { + "epoch": 1.5043983536437737, + "grad_norm": 0.7160700559616089, + "learning_rate": 2.306104546717658e-06, + "loss": 2.4036, + "step": 18641 + }, + { + "epoch": 1.5044790573803568, + "grad_norm": 0.7650535702705383, + "learning_rate": 2.3027349646175588e-06, + "loss": 2.4178, + "step": 18642 + }, + { + "epoch": 1.5045597611169397, + "grad_norm": 0.7348201870918274, + "learning_rate": 2.299367817388676e-06, + "loss": 2.4216, + "step": 18643 + }, + { + "epoch": 1.5046404648535228, + "grad_norm": 0.7645912170410156, + "learning_rate": 2.2960031051149524e-06, + "loss": 2.4465, + "step": 18644 + }, + { + "epoch": 1.5047211685901059, + "grad_norm": 0.7808031439781189, + "learning_rate": 2.2926408278802327e-06, + "loss": 2.4039, + "step": 18645 + }, + { + "epoch": 1.5048018723266887, + "grad_norm": 0.8323469161987305, + "learning_rate": 2.2892809857683053e-06, + "loss": 2.4223, + "step": 18646 + }, + { + "epoch": 1.5048825760632716, + "grad_norm": 0.7380712032318115, + "learning_rate": 2.285923578862914e-06, + "loss": 2.3822, + "step": 18647 + }, + { + "epoch": 1.504963279799855, + "grad_norm": 0.734913170337677, + "learning_rate": 2.282568607247737e-06, + "loss": 2.4136, + "step": 18648 + }, + { + "epoch": 1.5050439835364378, + "grad_norm": 0.6847864389419556, + "learning_rate": 2.2792160710063846e-06, + "loss": 2.4458, + "step": 18649 + }, + { + "epoch": 1.5051246872730206, + "grad_norm": 0.7042723298072815, + "learning_rate": 2.2758659702224127e-06, + "loss": 2.4205, + "step": 18650 + }, + { + "epoch": 1.5052053910096037, + "grad_norm": 0.7443733811378479, + "learning_rate": 2.2725183049793096e-06, + "loss": 2.4135, + "step": 18651 + }, + { + "epoch": 1.5052860947461868, + "grad_norm": 0.6596884727478027, + "learning_rate": 2.26917307536052e-06, + "loss": 2.4134, + "step": 18652 + }, + { + "epoch": 1.5053667984827697, + "grad_norm": 0.6547135710716248, + "learning_rate": 2.2658302814494103e-06, + "loss": 2.3842, + "step": 18653 + }, + { + "epoch": 1.5054475022193528, + "grad_norm": 0.7708645462989807, + "learning_rate": 2.2624899233292806e-06, + "loss": 2.4263, + "step": 18654 + }, + { + "epoch": 1.5055282059559358, + "grad_norm": 0.7285633087158203, + "learning_rate": 2.2591520010833978e-06, + "loss": 2.4192, + "step": 18655 + }, + { + "epoch": 1.5056089096925187, + "grad_norm": 0.7440153956413269, + "learning_rate": 2.255816514794928e-06, + "loss": 2.4419, + "step": 18656 + }, + { + "epoch": 1.5056896134291018, + "grad_norm": 0.7068066596984863, + "learning_rate": 2.2524834645470395e-06, + "loss": 2.4174, + "step": 18657 + }, + { + "epoch": 1.5057703171656849, + "grad_norm": 0.7280914187431335, + "learning_rate": 2.249152850422764e-06, + "loss": 2.4275, + "step": 18658 + }, + { + "epoch": 1.5058510209022677, + "grad_norm": 0.6725744009017944, + "learning_rate": 2.245824672505126e-06, + "loss": 2.3799, + "step": 18659 + }, + { + "epoch": 1.5059317246388508, + "grad_norm": 0.6966879367828369, + "learning_rate": 2.2424989308770796e-06, + "loss": 2.4448, + "step": 18660 + }, + { + "epoch": 1.506012428375434, + "grad_norm": 0.6617816090583801, + "learning_rate": 2.2391756256214813e-06, + "loss": 2.3881, + "step": 18661 + }, + { + "epoch": 1.5060931321120168, + "grad_norm": 0.6595850586891174, + "learning_rate": 2.2358547568211873e-06, + "loss": 2.3878, + "step": 18662 + }, + { + "epoch": 1.5061738358485997, + "grad_norm": 0.769210696220398, + "learning_rate": 2.2325363245589535e-06, + "loss": 2.3398, + "step": 18663 + }, + { + "epoch": 1.5062545395851827, + "grad_norm": 0.6378950476646423, + "learning_rate": 2.2292203289174695e-06, + "loss": 2.3622, + "step": 18664 + }, + { + "epoch": 1.5063352433217658, + "grad_norm": 0.7006397843360901, + "learning_rate": 2.225906769979402e-06, + "loss": 2.454, + "step": 18665 + }, + { + "epoch": 1.5064159470583487, + "grad_norm": 0.7044196128845215, + "learning_rate": 2.222595647827319e-06, + "loss": 2.4629, + "step": 18666 + }, + { + "epoch": 1.5064966507949318, + "grad_norm": 0.7604904770851135, + "learning_rate": 2.219286962543743e-06, + "loss": 2.4704, + "step": 18667 + }, + { + "epoch": 1.5065773545315149, + "grad_norm": 0.6727971434593201, + "learning_rate": 2.215980714211141e-06, + "loss": 2.4113, + "step": 18668 + }, + { + "epoch": 1.5066580582680977, + "grad_norm": 0.7251582741737366, + "learning_rate": 2.2126769029119143e-06, + "loss": 2.441, + "step": 18669 + }, + { + "epoch": 1.5067387620046808, + "grad_norm": 0.7177818417549133, + "learning_rate": 2.209375528728386e-06, + "loss": 2.3668, + "step": 18670 + }, + { + "epoch": 1.506819465741264, + "grad_norm": 0.7172769904136658, + "learning_rate": 2.206076591742845e-06, + "loss": 2.4247, + "step": 18671 + }, + { + "epoch": 1.5069001694778468, + "grad_norm": 0.6539075374603271, + "learning_rate": 2.202780092037504e-06, + "loss": 2.3896, + "step": 18672 + }, + { + "epoch": 1.5069808732144299, + "grad_norm": 0.7096640467643738, + "learning_rate": 2.199486029694553e-06, + "loss": 2.4369, + "step": 18673 + }, + { + "epoch": 1.507061576951013, + "grad_norm": 0.64681476354599, + "learning_rate": 2.196194404796048e-06, + "loss": 2.3674, + "step": 18674 + }, + { + "epoch": 1.5071422806875958, + "grad_norm": 0.6609311699867249, + "learning_rate": 2.192905217424035e-06, + "loss": 2.4007, + "step": 18675 + }, + { + "epoch": 1.5072229844241787, + "grad_norm": 0.7324950098991394, + "learning_rate": 2.1896184676605145e-06, + "loss": 2.42, + "step": 18676 + }, + { + "epoch": 1.507303688160762, + "grad_norm": 0.686190128326416, + "learning_rate": 2.186334155587366e-06, + "loss": 2.4413, + "step": 18677 + }, + { + "epoch": 1.5073843918973449, + "grad_norm": 0.7591853141784668, + "learning_rate": 2.183052281286457e-06, + "loss": 2.408, + "step": 18678 + }, + { + "epoch": 1.5074650956339277, + "grad_norm": 0.681408703327179, + "learning_rate": 2.1797728448395893e-06, + "loss": 2.4814, + "step": 18679 + }, + { + "epoch": 1.5075457993705108, + "grad_norm": 0.695336639881134, + "learning_rate": 2.1764958463284855e-06, + "loss": 2.3995, + "step": 18680 + }, + { + "epoch": 1.507626503107094, + "grad_norm": 0.7404937148094177, + "learning_rate": 2.1732212858348143e-06, + "loss": 2.4041, + "step": 18681 + }, + { + "epoch": 1.5077072068436768, + "grad_norm": 0.7484709620475769, + "learning_rate": 2.169949163440188e-06, + "loss": 2.4133, + "step": 18682 + }, + { + "epoch": 1.5077879105802598, + "grad_norm": 0.6750720143318176, + "learning_rate": 2.1666794792261524e-06, + "loss": 2.387, + "step": 18683 + }, + { + "epoch": 1.507868614316843, + "grad_norm": 0.6828570365905762, + "learning_rate": 2.1634122332742093e-06, + "loss": 2.3908, + "step": 18684 + }, + { + "epoch": 1.5079493180534258, + "grad_norm": 0.7603326439857483, + "learning_rate": 2.1601474256657927e-06, + "loss": 2.4337, + "step": 18685 + }, + { + "epoch": 1.508030021790009, + "grad_norm": 0.7744943499565125, + "learning_rate": 2.15688505648225e-06, + "loss": 2.4279, + "step": 18686 + }, + { + "epoch": 1.508110725526592, + "grad_norm": 0.6829258799552917, + "learning_rate": 2.153625125804892e-06, + "loss": 2.4895, + "step": 18687 + }, + { + "epoch": 1.5081914292631748, + "grad_norm": 0.6903569102287292, + "learning_rate": 2.150367633714978e-06, + "loss": 2.4086, + "step": 18688 + }, + { + "epoch": 1.508272132999758, + "grad_norm": 0.6580927968025208, + "learning_rate": 2.1471125802936863e-06, + "loss": 2.3969, + "step": 18689 + }, + { + "epoch": 1.508352836736341, + "grad_norm": 0.7075905203819275, + "learning_rate": 2.1438599656221303e-06, + "loss": 2.4096, + "step": 18690 + }, + { + "epoch": 1.5084335404729239, + "grad_norm": 0.6775155067443848, + "learning_rate": 2.1406097897813783e-06, + "loss": 2.4142, + "step": 18691 + }, + { + "epoch": 1.5085142442095067, + "grad_norm": 0.6592757701873779, + "learning_rate": 2.137362052852443e-06, + "loss": 2.4354, + "step": 18692 + }, + { + "epoch": 1.50859494794609, + "grad_norm": 0.6985810399055481, + "learning_rate": 2.13411675491626e-06, + "loss": 2.403, + "step": 18693 + }, + { + "epoch": 1.508675651682673, + "grad_norm": 0.6725364327430725, + "learning_rate": 2.130873896053709e-06, + "loss": 2.3974, + "step": 18694 + }, + { + "epoch": 1.5087563554192558, + "grad_norm": 0.8433510661125183, + "learning_rate": 2.127633476345625e-06, + "loss": 2.499, + "step": 18695 + }, + { + "epoch": 1.5088370591558389, + "grad_norm": 0.7117698788642883, + "learning_rate": 2.124395495872744e-06, + "loss": 2.4069, + "step": 18696 + }, + { + "epoch": 1.508917762892422, + "grad_norm": 0.6914052367210388, + "learning_rate": 2.121159954715779e-06, + "loss": 2.414, + "step": 18697 + }, + { + "epoch": 1.5089984666290048, + "grad_norm": 0.6826418042182922, + "learning_rate": 2.117926852955365e-06, + "loss": 2.3616, + "step": 18698 + }, + { + "epoch": 1.509079170365588, + "grad_norm": 0.687097430229187, + "learning_rate": 2.114696190672083e-06, + "loss": 2.4434, + "step": 18699 + }, + { + "epoch": 1.509159874102171, + "grad_norm": 0.7137446403503418, + "learning_rate": 2.1114679679464454e-06, + "loss": 2.4431, + "step": 18700 + }, + { + "epoch": 1.5092405778387539, + "grad_norm": 0.7330455780029297, + "learning_rate": 2.1082421848588996e-06, + "loss": 2.4451, + "step": 18701 + }, + { + "epoch": 1.509321281575337, + "grad_norm": 0.701392650604248, + "learning_rate": 2.1050188414898584e-06, + "loss": 2.4038, + "step": 18702 + }, + { + "epoch": 1.50940198531192, + "grad_norm": 0.6891985535621643, + "learning_rate": 2.1017979379196474e-06, + "loss": 2.3863, + "step": 18703 + }, + { + "epoch": 1.509482689048503, + "grad_norm": 0.6793761849403381, + "learning_rate": 2.098579474228546e-06, + "loss": 2.4171, + "step": 18704 + }, + { + "epoch": 1.509563392785086, + "grad_norm": 0.7276668548583984, + "learning_rate": 2.095363450496757e-06, + "loss": 2.4207, + "step": 18705 + }, + { + "epoch": 1.509644096521669, + "grad_norm": 0.6547731757164001, + "learning_rate": 2.0921498668044383e-06, + "loss": 2.4113, + "step": 18706 + }, + { + "epoch": 1.509724800258252, + "grad_norm": 0.6921097636222839, + "learning_rate": 2.0889387232316703e-06, + "loss": 2.4162, + "step": 18707 + }, + { + "epoch": 1.5098055039948348, + "grad_norm": 0.7069120407104492, + "learning_rate": 2.085730019858512e-06, + "loss": 2.3696, + "step": 18708 + }, + { + "epoch": 1.5098862077314181, + "grad_norm": 0.6641648411750793, + "learning_rate": 2.082523756764898e-06, + "loss": 2.3926, + "step": 18709 + }, + { + "epoch": 1.509966911468001, + "grad_norm": 0.658637523651123, + "learning_rate": 2.0793199340307433e-06, + "loss": 2.3748, + "step": 18710 + }, + { + "epoch": 1.5100476152045839, + "grad_norm": 0.695314884185791, + "learning_rate": 2.076118551735906e-06, + "loss": 2.4386, + "step": 18711 + }, + { + "epoch": 1.510128318941167, + "grad_norm": 0.8113142848014832, + "learning_rate": 2.072919609960178e-06, + "loss": 2.4162, + "step": 18712 + }, + { + "epoch": 1.51020902267775, + "grad_norm": 0.677663266658783, + "learning_rate": 2.0697231087832724e-06, + "loss": 2.4099, + "step": 18713 + }, + { + "epoch": 1.510289726414333, + "grad_norm": 0.8038804531097412, + "learning_rate": 2.0665290482848597e-06, + "loss": 2.4721, + "step": 18714 + }, + { + "epoch": 1.510370430150916, + "grad_norm": 0.7014409303665161, + "learning_rate": 2.0633374285445427e-06, + "loss": 2.3641, + "step": 18715 + }, + { + "epoch": 1.510451133887499, + "grad_norm": 0.7066230773925781, + "learning_rate": 2.060148249641869e-06, + "loss": 2.4361, + "step": 18716 + }, + { + "epoch": 1.510531837624082, + "grad_norm": 0.6830186247825623, + "learning_rate": 2.056961511656319e-06, + "loss": 2.3958, + "step": 18717 + }, + { + "epoch": 1.510612541360665, + "grad_norm": 0.7098764181137085, + "learning_rate": 2.0537772146673182e-06, + "loss": 2.4474, + "step": 18718 + }, + { + "epoch": 1.5106932450972481, + "grad_norm": 0.6630643010139465, + "learning_rate": 2.050595358754215e-06, + "loss": 2.3363, + "step": 18719 + }, + { + "epoch": 1.510773948833831, + "grad_norm": 0.7090222835540771, + "learning_rate": 2.0474159439963115e-06, + "loss": 2.3895, + "step": 18720 + }, + { + "epoch": 1.5108546525704138, + "grad_norm": 0.6796701550483704, + "learning_rate": 2.044238970472867e-06, + "loss": 2.3925, + "step": 18721 + }, + { + "epoch": 1.5109353563069972, + "grad_norm": 0.7596279978752136, + "learning_rate": 2.0410644382630408e-06, + "loss": 2.4606, + "step": 18722 + }, + { + "epoch": 1.51101606004358, + "grad_norm": 0.6724212765693665, + "learning_rate": 2.0378923474459466e-06, + "loss": 2.4033, + "step": 18723 + }, + { + "epoch": 1.5110967637801629, + "grad_norm": 0.6791815161705017, + "learning_rate": 2.034722698100666e-06, + "loss": 2.4433, + "step": 18724 + }, + { + "epoch": 1.511177467516746, + "grad_norm": 0.686861515045166, + "learning_rate": 2.0315554903061697e-06, + "loss": 2.3319, + "step": 18725 + }, + { + "epoch": 1.511258171253329, + "grad_norm": 0.671930730342865, + "learning_rate": 2.0283907241414047e-06, + "loss": 2.3423, + "step": 18726 + }, + { + "epoch": 1.511338874989912, + "grad_norm": 0.6657836437225342, + "learning_rate": 2.025228399685253e-06, + "loss": 2.3696, + "step": 18727 + }, + { + "epoch": 1.511419578726495, + "grad_norm": 0.7551192045211792, + "learning_rate": 2.0220685170165067e-06, + "loss": 2.3879, + "step": 18728 + }, + { + "epoch": 1.511500282463078, + "grad_norm": 0.7677510380744934, + "learning_rate": 2.018911076213936e-06, + "loss": 2.4264, + "step": 18729 + }, + { + "epoch": 1.511580986199661, + "grad_norm": 0.7070802450180054, + "learning_rate": 2.0157560773562346e-06, + "loss": 2.4055, + "step": 18730 + }, + { + "epoch": 1.511661689936244, + "grad_norm": 0.7047102451324463, + "learning_rate": 2.012603520522005e-06, + "loss": 2.4127, + "step": 18731 + }, + { + "epoch": 1.5117423936728271, + "grad_norm": 0.7608091235160828, + "learning_rate": 2.0094534057898517e-06, + "loss": 2.4461, + "step": 18732 + }, + { + "epoch": 1.51182309740941, + "grad_norm": 0.69472336769104, + "learning_rate": 2.006305733238256e-06, + "loss": 2.3927, + "step": 18733 + }, + { + "epoch": 1.511903801145993, + "grad_norm": 0.7638588547706604, + "learning_rate": 2.0031605029456892e-06, + "loss": 2.4585, + "step": 18734 + }, + { + "epoch": 1.5119845048825762, + "grad_norm": 0.7421556711196899, + "learning_rate": 2.0000177149905208e-06, + "loss": 2.4123, + "step": 18735 + }, + { + "epoch": 1.512065208619159, + "grad_norm": 0.7327919602394104, + "learning_rate": 1.9968773694511e-06, + "loss": 2.416, + "step": 18736 + }, + { + "epoch": 1.512145912355742, + "grad_norm": 0.6789775490760803, + "learning_rate": 1.9937394664056753e-06, + "loss": 2.4116, + "step": 18737 + }, + { + "epoch": 1.5122266160923252, + "grad_norm": 0.8124228715896606, + "learning_rate": 1.9906040059324504e-06, + "loss": 2.3691, + "step": 18738 + }, + { + "epoch": 1.512307319828908, + "grad_norm": 0.7483124136924744, + "learning_rate": 1.987470988109563e-06, + "loss": 2.3636, + "step": 18739 + }, + { + "epoch": 1.512388023565491, + "grad_norm": 0.7223673462867737, + "learning_rate": 1.9843404130151176e-06, + "loss": 2.3638, + "step": 18740 + }, + { + "epoch": 1.512468727302074, + "grad_norm": 0.6911413669586182, + "learning_rate": 1.9812122807271293e-06, + "loss": 2.3337, + "step": 18741 + }, + { + "epoch": 1.5125494310386571, + "grad_norm": 0.7634989619255066, + "learning_rate": 1.978086591323536e-06, + "loss": 2.393, + "step": 18742 + }, + { + "epoch": 1.51263013477524, + "grad_norm": 0.747278094291687, + "learning_rate": 1.9749633448822748e-06, + "loss": 2.4688, + "step": 18743 + }, + { + "epoch": 1.512710838511823, + "grad_norm": 0.6391082406044006, + "learning_rate": 1.9718425414811502e-06, + "loss": 2.3856, + "step": 18744 + }, + { + "epoch": 1.5127915422484062, + "grad_norm": 0.7871484756469727, + "learning_rate": 1.968724181197967e-06, + "loss": 2.3737, + "step": 18745 + }, + { + "epoch": 1.512872245984989, + "grad_norm": 0.6946254968643188, + "learning_rate": 1.965608264110441e-06, + "loss": 2.3711, + "step": 18746 + }, + { + "epoch": 1.5129529497215721, + "grad_norm": 0.6642282009124756, + "learning_rate": 1.9624947902962098e-06, + "loss": 2.4034, + "step": 18747 + }, + { + "epoch": 1.5130336534581552, + "grad_norm": 0.6511447429656982, + "learning_rate": 1.959383759832889e-06, + "loss": 2.4114, + "step": 18748 + }, + { + "epoch": 1.513114357194738, + "grad_norm": 0.6886571049690247, + "learning_rate": 1.9562751727979943e-06, + "loss": 2.3954, + "step": 18749 + }, + { + "epoch": 1.5131950609313212, + "grad_norm": 0.7461123466491699, + "learning_rate": 1.9531690292690308e-06, + "loss": 2.4607, + "step": 18750 + }, + { + "epoch": 1.5132757646679043, + "grad_norm": 0.6922837495803833, + "learning_rate": 1.9500653293233808e-06, + "loss": 2.4126, + "step": 18751 + }, + { + "epoch": 1.5133564684044871, + "grad_norm": 0.736294150352478, + "learning_rate": 1.9469640730384042e-06, + "loss": 2.4562, + "step": 18752 + }, + { + "epoch": 1.51343717214107, + "grad_norm": 0.6553577780723572, + "learning_rate": 1.9438652604913955e-06, + "loss": 2.3973, + "step": 18753 + }, + { + "epoch": 1.5135178758776533, + "grad_norm": 0.7067225575447083, + "learning_rate": 1.9407688917595925e-06, + "loss": 2.4333, + "step": 18754 + }, + { + "epoch": 1.5135985796142362, + "grad_norm": 0.7250834107398987, + "learning_rate": 1.9376749669201553e-06, + "loss": 2.4195, + "step": 18755 + }, + { + "epoch": 1.513679283350819, + "grad_norm": 0.7244740724563599, + "learning_rate": 1.934583486050201e-06, + "loss": 2.4422, + "step": 18756 + }, + { + "epoch": 1.5137599870874021, + "grad_norm": 0.6884569525718689, + "learning_rate": 1.931494449226756e-06, + "loss": 2.3681, + "step": 18757 + }, + { + "epoch": 1.5138406908239852, + "grad_norm": 0.7152425646781921, + "learning_rate": 1.9284078565268373e-06, + "loss": 2.4023, + "step": 18758 + }, + { + "epoch": 1.513921394560568, + "grad_norm": 0.6469550132751465, + "learning_rate": 1.92532370802736e-06, + "loss": 2.4102, + "step": 18759 + }, + { + "epoch": 1.5140020982971512, + "grad_norm": 0.6262938380241394, + "learning_rate": 1.9222420038051747e-06, + "loss": 2.3668, + "step": 18760 + }, + { + "epoch": 1.5140828020337342, + "grad_norm": 0.6930738091468811, + "learning_rate": 1.9191627439370974e-06, + "loss": 2.4345, + "step": 18761 + }, + { + "epoch": 1.514163505770317, + "grad_norm": 0.6779739260673523, + "learning_rate": 1.9160859284998777e-06, + "loss": 2.4353, + "step": 18762 + }, + { + "epoch": 1.5142442095069002, + "grad_norm": 0.7086219191551208, + "learning_rate": 1.913011557570177e-06, + "loss": 2.3804, + "step": 18763 + }, + { + "epoch": 1.5143249132434833, + "grad_norm": 0.6894867420196533, + "learning_rate": 1.909939631224644e-06, + "loss": 2.3749, + "step": 18764 + }, + { + "epoch": 1.5144056169800661, + "grad_norm": 0.6909998059272766, + "learning_rate": 1.906870149539819e-06, + "loss": 2.4083, + "step": 18765 + }, + { + "epoch": 1.514486320716649, + "grad_norm": 0.6844708323478699, + "learning_rate": 1.9038031125922174e-06, + "loss": 2.4039, + "step": 18766 + }, + { + "epoch": 1.5145670244532323, + "grad_norm": 0.6927101016044617, + "learning_rate": 1.900738520458256e-06, + "loss": 2.3549, + "step": 18767 + }, + { + "epoch": 1.5146477281898152, + "grad_norm": 0.6853668093681335, + "learning_rate": 1.8976763732143298e-06, + "loss": 2.4001, + "step": 18768 + }, + { + "epoch": 1.514728431926398, + "grad_norm": 0.7288877367973328, + "learning_rate": 1.8946166709367553e-06, + "loss": 2.4295, + "step": 18769 + }, + { + "epoch": 1.5148091356629811, + "grad_norm": 0.6837958097457886, + "learning_rate": 1.891559413701771e-06, + "loss": 2.3687, + "step": 18770 + }, + { + "epoch": 1.5148898393995642, + "grad_norm": 0.7109480500221252, + "learning_rate": 1.8885046015855946e-06, + "loss": 2.4561, + "step": 18771 + }, + { + "epoch": 1.514970543136147, + "grad_norm": 0.6929563283920288, + "learning_rate": 1.8854522346643533e-06, + "loss": 2.3597, + "step": 18772 + }, + { + "epoch": 1.5150512468727302, + "grad_norm": 0.6835468411445618, + "learning_rate": 1.8824023130140978e-06, + "loss": 2.4212, + "step": 18773 + }, + { + "epoch": 1.5151319506093133, + "grad_norm": 0.6762038469314575, + "learning_rate": 1.8793548367108671e-06, + "loss": 2.3742, + "step": 18774 + }, + { + "epoch": 1.5152126543458961, + "grad_norm": 0.6824073195457458, + "learning_rate": 1.8763098058306118e-06, + "loss": 2.4822, + "step": 18775 + }, + { + "epoch": 1.5152933580824792, + "grad_norm": 0.7239061594009399, + "learning_rate": 1.873267220449204e-06, + "loss": 2.4036, + "step": 18776 + }, + { + "epoch": 1.5153740618190623, + "grad_norm": 0.6647765040397644, + "learning_rate": 1.8702270806424837e-06, + "loss": 2.4164, + "step": 18777 + }, + { + "epoch": 1.5154547655556452, + "grad_norm": 0.6472916007041931, + "learning_rate": 1.8671893864862345e-06, + "loss": 2.3915, + "step": 18778 + }, + { + "epoch": 1.5155354692922283, + "grad_norm": 0.7041392922401428, + "learning_rate": 1.864154138056129e-06, + "loss": 2.4124, + "step": 18779 + }, + { + "epoch": 1.5156161730288114, + "grad_norm": 0.6630376577377319, + "learning_rate": 1.86112133542784e-06, + "loss": 2.36, + "step": 18780 + }, + { + "epoch": 1.5156968767653942, + "grad_norm": 0.6880913972854614, + "learning_rate": 1.8580909786769406e-06, + "loss": 2.3711, + "step": 18781 + }, + { + "epoch": 1.515777580501977, + "grad_norm": 0.6794038414955139, + "learning_rate": 1.8550630678789705e-06, + "loss": 2.4399, + "step": 18782 + }, + { + "epoch": 1.5158582842385604, + "grad_norm": 0.7231845259666443, + "learning_rate": 1.8520376031093688e-06, + "loss": 2.4661, + "step": 18783 + }, + { + "epoch": 1.5159389879751433, + "grad_norm": 0.640635073184967, + "learning_rate": 1.8490145844435646e-06, + "loss": 2.3447, + "step": 18784 + }, + { + "epoch": 1.5160196917117261, + "grad_norm": 0.6949231624603271, + "learning_rate": 1.8459940119568753e-06, + "loss": 2.413, + "step": 18785 + }, + { + "epoch": 1.5161003954483092, + "grad_norm": 0.7331423759460449, + "learning_rate": 1.8429758857245849e-06, + "loss": 2.3968, + "step": 18786 + }, + { + "epoch": 1.5161810991848923, + "grad_norm": 0.7337766289710999, + "learning_rate": 1.8399602058219334e-06, + "loss": 2.3721, + "step": 18787 + }, + { + "epoch": 1.5162618029214752, + "grad_norm": 0.6949995160102844, + "learning_rate": 1.8369469723240717e-06, + "loss": 2.3815, + "step": 18788 + }, + { + "epoch": 1.5163425066580583, + "grad_norm": 0.6975441575050354, + "learning_rate": 1.8339361853060843e-06, + "loss": 2.4681, + "step": 18789 + }, + { + "epoch": 1.5164232103946413, + "grad_norm": 0.682364284992218, + "learning_rate": 1.8309278448430111e-06, + "loss": 2.3789, + "step": 18790 + }, + { + "epoch": 1.5165039141312242, + "grad_norm": 0.795218288898468, + "learning_rate": 1.8279219510098478e-06, + "loss": 2.4204, + "step": 18791 + }, + { + "epoch": 1.5165846178678073, + "grad_norm": 0.6837748885154724, + "learning_rate": 1.8249185038814786e-06, + "loss": 2.4165, + "step": 18792 + }, + { + "epoch": 1.5166653216043904, + "grad_norm": 0.7043229341506958, + "learning_rate": 1.8219175035327773e-06, + "loss": 2.4357, + "step": 18793 + }, + { + "epoch": 1.5167460253409732, + "grad_norm": 0.7295538187026978, + "learning_rate": 1.8189189500385283e-06, + "loss": 2.4108, + "step": 18794 + }, + { + "epoch": 1.5168267290775563, + "grad_norm": 0.7195125222206116, + "learning_rate": 1.8159228434734722e-06, + "loss": 2.4056, + "step": 18795 + }, + { + "epoch": 1.5169074328141394, + "grad_norm": 0.679076075553894, + "learning_rate": 1.812929183912271e-06, + "loss": 2.3591, + "step": 18796 + }, + { + "epoch": 1.5169881365507223, + "grad_norm": 0.7039214372634888, + "learning_rate": 1.8099379714295427e-06, + "loss": 2.4075, + "step": 18797 + }, + { + "epoch": 1.5170688402873052, + "grad_norm": 0.7246118783950806, + "learning_rate": 1.8069492060998393e-06, + "loss": 2.3952, + "step": 18798 + }, + { + "epoch": 1.5171495440238885, + "grad_norm": 0.740473747253418, + "learning_rate": 1.8039628879976233e-06, + "loss": 2.3529, + "step": 18799 + }, + { + "epoch": 1.5172302477604713, + "grad_norm": 0.8230307102203369, + "learning_rate": 1.8009790171973462e-06, + "loss": 2.3789, + "step": 18800 + }, + { + "epoch": 1.5173109514970542, + "grad_norm": 0.6905292868614197, + "learning_rate": 1.7979975937733706e-06, + "loss": 2.3314, + "step": 18801 + }, + { + "epoch": 1.5173916552336373, + "grad_norm": 0.7145891189575195, + "learning_rate": 1.7950186177999928e-06, + "loss": 2.3905, + "step": 18802 + }, + { + "epoch": 1.5174723589702204, + "grad_norm": 0.7292607426643372, + "learning_rate": 1.7920420893514645e-06, + "loss": 2.4806, + "step": 18803 + }, + { + "epoch": 1.5175530627068032, + "grad_norm": 0.6705700159072876, + "learning_rate": 1.7890680085019595e-06, + "loss": 2.4328, + "step": 18804 + }, + { + "epoch": 1.5176337664433863, + "grad_norm": 0.7559483051300049, + "learning_rate": 1.7860963753256077e-06, + "loss": 2.3555, + "step": 18805 + }, + { + "epoch": 1.5177144701799694, + "grad_norm": 0.703779399394989, + "learning_rate": 1.783127189896472e-06, + "loss": 2.4989, + "step": 18806 + }, + { + "epoch": 1.5177951739165523, + "grad_norm": 0.6725503206253052, + "learning_rate": 1.7801604522885596e-06, + "loss": 2.4035, + "step": 18807 + }, + { + "epoch": 1.5178758776531354, + "grad_norm": 0.7030585408210754, + "learning_rate": 1.7771961625757782e-06, + "loss": 2.4594, + "step": 18808 + }, + { + "epoch": 1.5179565813897185, + "grad_norm": 0.7017019987106323, + "learning_rate": 1.7742343208320355e-06, + "loss": 2.4053, + "step": 18809 + }, + { + "epoch": 1.5180372851263013, + "grad_norm": 0.6798418760299683, + "learning_rate": 1.771274927131139e-06, + "loss": 2.3945, + "step": 18810 + }, + { + "epoch": 1.5181179888628844, + "grad_norm": 0.7820610404014587, + "learning_rate": 1.7683179815468408e-06, + "loss": 2.4243, + "step": 18811 + }, + { + "epoch": 1.5181986925994675, + "grad_norm": 0.780927300453186, + "learning_rate": 1.7653634841528377e-06, + "loss": 2.3786, + "step": 18812 + }, + { + "epoch": 1.5182793963360504, + "grad_norm": 0.6910156011581421, + "learning_rate": 1.7624114350227595e-06, + "loss": 2.3687, + "step": 18813 + }, + { + "epoch": 1.5183601000726332, + "grad_norm": 0.74334716796875, + "learning_rate": 1.7594618342301917e-06, + "loss": 2.4245, + "step": 18814 + }, + { + "epoch": 1.5184408038092165, + "grad_norm": 0.7189802527427673, + "learning_rate": 1.7565146818486311e-06, + "loss": 2.4617, + "step": 18815 + }, + { + "epoch": 1.5185215075457994, + "grad_norm": 0.6682239770889282, + "learning_rate": 1.7535699779515412e-06, + "loss": 2.3924, + "step": 18816 + }, + { + "epoch": 1.5186022112823823, + "grad_norm": 0.7187373638153076, + "learning_rate": 1.750627722612308e-06, + "loss": 2.3686, + "step": 18817 + }, + { + "epoch": 1.5186829150189654, + "grad_norm": 0.6907529830932617, + "learning_rate": 1.7476879159042503e-06, + "loss": 2.3942, + "step": 18818 + }, + { + "epoch": 1.5187636187555484, + "grad_norm": 0.7133082747459412, + "learning_rate": 1.744750557900654e-06, + "loss": 2.495, + "step": 18819 + }, + { + "epoch": 1.5188443224921313, + "grad_norm": 0.666289210319519, + "learning_rate": 1.7418156486747162e-06, + "loss": 2.3726, + "step": 18820 + }, + { + "epoch": 1.5189250262287144, + "grad_norm": 0.7055099010467529, + "learning_rate": 1.7388831882995782e-06, + "loss": 2.4071, + "step": 18821 + }, + { + "epoch": 1.5190057299652975, + "grad_norm": 0.6810482740402222, + "learning_rate": 1.7359531768483261e-06, + "loss": 2.4183, + "step": 18822 + }, + { + "epoch": 1.5190864337018803, + "grad_norm": 0.7321486473083496, + "learning_rate": 1.7330256143939905e-06, + "loss": 2.4529, + "step": 18823 + }, + { + "epoch": 1.5191671374384634, + "grad_norm": 0.7226361036300659, + "learning_rate": 1.7301005010095128e-06, + "loss": 2.4364, + "step": 18824 + }, + { + "epoch": 1.5192478411750465, + "grad_norm": 0.6732020974159241, + "learning_rate": 1.7271778367678237e-06, + "loss": 2.4198, + "step": 18825 + }, + { + "epoch": 1.5193285449116294, + "grad_norm": 0.6751465201377869, + "learning_rate": 1.7242576217417538e-06, + "loss": 2.4273, + "step": 18826 + }, + { + "epoch": 1.5194092486482123, + "grad_norm": 0.7088303565979004, + "learning_rate": 1.7213398560040783e-06, + "loss": 2.3857, + "step": 18827 + }, + { + "epoch": 1.5194899523847956, + "grad_norm": 0.7239326238632202, + "learning_rate": 1.7184245396275056e-06, + "loss": 2.3681, + "step": 18828 + }, + { + "epoch": 1.5195706561213784, + "grad_norm": 0.7118703722953796, + "learning_rate": 1.7155116726847109e-06, + "loss": 2.4401, + "step": 18829 + }, + { + "epoch": 1.5196513598579613, + "grad_norm": 0.6479594111442566, + "learning_rate": 1.7126012552482917e-06, + "loss": 2.3794, + "step": 18830 + }, + { + "epoch": 1.5197320635945444, + "grad_norm": 0.6913226842880249, + "learning_rate": 1.7096932873907679e-06, + "loss": 2.3875, + "step": 18831 + }, + { + "epoch": 1.5198127673311275, + "grad_norm": 0.6577833890914917, + "learning_rate": 1.7067877691846258e-06, + "loss": 2.4328, + "step": 18832 + }, + { + "epoch": 1.5198934710677103, + "grad_norm": 0.7346724271774292, + "learning_rate": 1.703884700702274e-06, + "loss": 2.4161, + "step": 18833 + }, + { + "epoch": 1.5199741748042934, + "grad_norm": 0.7034791111946106, + "learning_rate": 1.700984082016055e-06, + "loss": 2.4166, + "step": 18834 + }, + { + "epoch": 1.5200548785408765, + "grad_norm": 0.69721919298172, + "learning_rate": 1.6980859131982662e-06, + "loss": 2.3892, + "step": 18835 + }, + { + "epoch": 1.5201355822774594, + "grad_norm": 0.77543705701828, + "learning_rate": 1.69519019432115e-06, + "loss": 2.4424, + "step": 18836 + }, + { + "epoch": 1.5202162860140425, + "grad_norm": 0.6738883852958679, + "learning_rate": 1.69229692545686e-06, + "loss": 2.4521, + "step": 18837 + }, + { + "epoch": 1.5202969897506255, + "grad_norm": 0.7213564515113831, + "learning_rate": 1.6894061066775158e-06, + "loss": 2.3824, + "step": 18838 + }, + { + "epoch": 1.5203776934872084, + "grad_norm": 0.6511073112487793, + "learning_rate": 1.68651773805516e-06, + "loss": 2.4027, + "step": 18839 + }, + { + "epoch": 1.5204583972237915, + "grad_norm": 0.707277774810791, + "learning_rate": 1.6836318196617684e-06, + "loss": 2.4513, + "step": 18840 + }, + { + "epoch": 1.5205391009603746, + "grad_norm": 0.7205690741539001, + "learning_rate": 1.6807483515692724e-06, + "loss": 2.3609, + "step": 18841 + }, + { + "epoch": 1.5206198046969575, + "grad_norm": 0.7299683690071106, + "learning_rate": 1.6778673338495476e-06, + "loss": 2.4653, + "step": 18842 + }, + { + "epoch": 1.5207005084335403, + "grad_norm": 0.6780205368995667, + "learning_rate": 1.6749887665743703e-06, + "loss": 2.4108, + "step": 18843 + }, + { + "epoch": 1.5207812121701236, + "grad_norm": 0.6702545285224915, + "learning_rate": 1.6721126498155048e-06, + "loss": 2.3838, + "step": 18844 + }, + { + "epoch": 1.5208619159067065, + "grad_norm": 0.7097615003585815, + "learning_rate": 1.6692389836446165e-06, + "loss": 2.4273, + "step": 18845 + }, + { + "epoch": 1.5209426196432894, + "grad_norm": 0.6766102910041809, + "learning_rate": 1.6663677681333368e-06, + "loss": 2.4357, + "step": 18846 + }, + { + "epoch": 1.5210233233798724, + "grad_norm": 0.7652571797370911, + "learning_rate": 1.6634990033532194e-06, + "loss": 2.4562, + "step": 18847 + }, + { + "epoch": 1.5211040271164555, + "grad_norm": 0.6772809624671936, + "learning_rate": 1.6606326893757628e-06, + "loss": 2.4173, + "step": 18848 + }, + { + "epoch": 1.5211847308530384, + "grad_norm": 0.7474905848503113, + "learning_rate": 1.65776882627241e-06, + "loss": 2.3759, + "step": 18849 + }, + { + "epoch": 1.5212654345896215, + "grad_norm": 0.7467244267463684, + "learning_rate": 1.6549074141145149e-06, + "loss": 2.3935, + "step": 18850 + }, + { + "epoch": 1.5213461383262046, + "grad_norm": 0.7091644406318665, + "learning_rate": 1.6520484529734092e-06, + "loss": 2.3507, + "step": 18851 + }, + { + "epoch": 1.5214268420627874, + "grad_norm": 0.7161739468574524, + "learning_rate": 1.6491919429203473e-06, + "loss": 2.4125, + "step": 18852 + }, + { + "epoch": 1.5215075457993705, + "grad_norm": 0.6733263731002808, + "learning_rate": 1.6463378840264941e-06, + "loss": 2.4026, + "step": 18853 + }, + { + "epoch": 1.5215882495359536, + "grad_norm": 0.6848629713058472, + "learning_rate": 1.6434862763630155e-06, + "loss": 2.3753, + "step": 18854 + }, + { + "epoch": 1.5216689532725365, + "grad_norm": 0.840535044670105, + "learning_rate": 1.640637120000954e-06, + "loss": 2.4067, + "step": 18855 + }, + { + "epoch": 1.5217496570091196, + "grad_norm": 0.7456166744232178, + "learning_rate": 1.637790415011342e-06, + "loss": 2.384, + "step": 18856 + }, + { + "epoch": 1.5218303607457027, + "grad_norm": 0.7038760781288147, + "learning_rate": 1.6349461614651008e-06, + "loss": 2.3857, + "step": 18857 + }, + { + "epoch": 1.5219110644822855, + "grad_norm": 0.6688199639320374, + "learning_rate": 1.6321043594331399e-06, + "loss": 2.4, + "step": 18858 + }, + { + "epoch": 1.5219917682188684, + "grad_norm": 0.7367751598358154, + "learning_rate": 1.6292650089862694e-06, + "loss": 2.458, + "step": 18859 + }, + { + "epoch": 1.5220724719554517, + "grad_norm": 0.7959186434745789, + "learning_rate": 1.626428110195266e-06, + "loss": 2.463, + "step": 18860 + }, + { + "epoch": 1.5221531756920346, + "grad_norm": 0.6830917596817017, + "learning_rate": 1.6235936631308179e-06, + "loss": 2.3843, + "step": 18861 + }, + { + "epoch": 1.5222338794286174, + "grad_norm": 0.6762063503265381, + "learning_rate": 1.6207616678635795e-06, + "loss": 2.4006, + "step": 18862 + }, + { + "epoch": 1.5223145831652005, + "grad_norm": 0.7410191893577576, + "learning_rate": 1.6179321244641277e-06, + "loss": 2.3894, + "step": 18863 + }, + { + "epoch": 1.5223952869017836, + "grad_norm": 0.6335217952728271, + "learning_rate": 1.6151050330029726e-06, + "loss": 2.3622, + "step": 18864 + }, + { + "epoch": 1.5224759906383665, + "grad_norm": 0.6569252014160156, + "learning_rate": 1.6122803935505804e-06, + "loss": 2.4683, + "step": 18865 + }, + { + "epoch": 1.5225566943749496, + "grad_norm": 0.755725085735321, + "learning_rate": 1.60945820617735e-06, + "loss": 2.3681, + "step": 18866 + }, + { + "epoch": 1.5226373981115326, + "grad_norm": 0.7522092461585999, + "learning_rate": 1.6066384709536253e-06, + "loss": 2.4316, + "step": 18867 + }, + { + "epoch": 1.5227181018481155, + "grad_norm": 0.7349351048469543, + "learning_rate": 1.6038211879496723e-06, + "loss": 2.4419, + "step": 18868 + }, + { + "epoch": 1.5227988055846986, + "grad_norm": 0.7310368418693542, + "learning_rate": 1.6010063572357014e-06, + "loss": 2.3956, + "step": 18869 + }, + { + "epoch": 1.5228795093212817, + "grad_norm": 0.7016099691390991, + "learning_rate": 1.5981939788818678e-06, + "loss": 2.3434, + "step": 18870 + }, + { + "epoch": 1.5229602130578646, + "grad_norm": 0.7399678230285645, + "learning_rate": 1.5953840529582708e-06, + "loss": 2.4468, + "step": 18871 + }, + { + "epoch": 1.5230409167944474, + "grad_norm": 0.7483804225921631, + "learning_rate": 1.5925765795349213e-06, + "loss": 2.4589, + "step": 18872 + }, + { + "epoch": 1.5231216205310307, + "grad_norm": 0.7376934885978699, + "learning_rate": 1.5897715586818185e-06, + "loss": 2.4414, + "step": 18873 + }, + { + "epoch": 1.5232023242676136, + "grad_norm": 0.6889188289642334, + "learning_rate": 1.5869689904688401e-06, + "loss": 2.3904, + "step": 18874 + }, + { + "epoch": 1.5232830280041965, + "grad_norm": 0.7198030948638916, + "learning_rate": 1.5841688749658634e-06, + "loss": 2.3654, + "step": 18875 + }, + { + "epoch": 1.5233637317407795, + "grad_norm": 0.7398289442062378, + "learning_rate": 1.581371212242655e-06, + "loss": 2.3903, + "step": 18876 + }, + { + "epoch": 1.5234444354773626, + "grad_norm": 0.6917053461074829, + "learning_rate": 1.5785760023689366e-06, + "loss": 2.4462, + "step": 18877 + }, + { + "epoch": 1.5235251392139455, + "grad_norm": 0.707867443561554, + "learning_rate": 1.5757832454143972e-06, + "loss": 2.4399, + "step": 18878 + }, + { + "epoch": 1.5236058429505286, + "grad_norm": 0.6719911098480225, + "learning_rate": 1.5729929414486144e-06, + "loss": 2.3984, + "step": 18879 + }, + { + "epoch": 1.5236865466871117, + "grad_norm": 0.7843443155288696, + "learning_rate": 1.5702050905411326e-06, + "loss": 2.3631, + "step": 18880 + }, + { + "epoch": 1.5237672504236945, + "grad_norm": 0.7120097279548645, + "learning_rate": 1.5674196927614516e-06, + "loss": 2.3608, + "step": 18881 + }, + { + "epoch": 1.5238479541602776, + "grad_norm": 0.7455726861953735, + "learning_rate": 1.5646367481789604e-06, + "loss": 2.4499, + "step": 18882 + }, + { + "epoch": 1.5239286578968607, + "grad_norm": 0.720418393611908, + "learning_rate": 1.561856256863048e-06, + "loss": 2.421, + "step": 18883 + }, + { + "epoch": 1.5240093616334436, + "grad_norm": 0.6765218377113342, + "learning_rate": 1.5590782188829923e-06, + "loss": 2.3552, + "step": 18884 + }, + { + "epoch": 1.5240900653700267, + "grad_norm": 0.6665711402893066, + "learning_rate": 1.5563026343080378e-06, + "loss": 2.4116, + "step": 18885 + }, + { + "epoch": 1.5241707691066098, + "grad_norm": 0.6785176992416382, + "learning_rate": 1.5535295032073405e-06, + "loss": 2.3543, + "step": 18886 + }, + { + "epoch": 1.5242514728431926, + "grad_norm": 0.692261278629303, + "learning_rate": 1.550758825650045e-06, + "loss": 2.4613, + "step": 18887 + }, + { + "epoch": 1.5243321765797755, + "grad_norm": 0.7043518424034119, + "learning_rate": 1.547990601705185e-06, + "loss": 2.3802, + "step": 18888 + }, + { + "epoch": 1.5244128803163588, + "grad_norm": 0.677109956741333, + "learning_rate": 1.5452248314417605e-06, + "loss": 2.4045, + "step": 18889 + }, + { + "epoch": 1.5244935840529417, + "grad_norm": 0.7338987588882446, + "learning_rate": 1.5424615149286835e-06, + "loss": 2.3944, + "step": 18890 + }, + { + "epoch": 1.5245742877895245, + "grad_norm": 0.7003028392791748, + "learning_rate": 1.5397006522348546e-06, + "loss": 2.4482, + "step": 18891 + }, + { + "epoch": 1.5246549915261076, + "grad_norm": 0.679331362247467, + "learning_rate": 1.5369422434290515e-06, + "loss": 2.435, + "step": 18892 + }, + { + "epoch": 1.5247356952626907, + "grad_norm": 0.7156202793121338, + "learning_rate": 1.5341862885800307e-06, + "loss": 2.4535, + "step": 18893 + }, + { + "epoch": 1.5248163989992736, + "grad_norm": 0.6846185922622681, + "learning_rate": 1.5314327877564926e-06, + "loss": 2.4047, + "step": 18894 + }, + { + "epoch": 1.5248971027358567, + "grad_norm": 0.7099572420120239, + "learning_rate": 1.5286817410270382e-06, + "loss": 2.4283, + "step": 18895 + }, + { + "epoch": 1.5249778064724397, + "grad_norm": 0.7120501399040222, + "learning_rate": 1.5259331484602345e-06, + "loss": 2.4255, + "step": 18896 + }, + { + "epoch": 1.5250585102090226, + "grad_norm": 0.7055281400680542, + "learning_rate": 1.5231870101245937e-06, + "loss": 2.3463, + "step": 18897 + }, + { + "epoch": 1.5251392139456057, + "grad_norm": 0.6632781624794006, + "learning_rate": 1.5204433260885608e-06, + "loss": 2.3487, + "step": 18898 + }, + { + "epoch": 1.5252199176821888, + "grad_norm": 0.6453731656074524, + "learning_rate": 1.5177020964205034e-06, + "loss": 2.3545, + "step": 18899 + }, + { + "epoch": 1.5253006214187717, + "grad_norm": 0.8149442672729492, + "learning_rate": 1.514963321188756e-06, + "loss": 2.431, + "step": 18900 + }, + { + "epoch": 1.5253813251553547, + "grad_norm": 0.730827271938324, + "learning_rate": 1.5122270004615525e-06, + "loss": 2.3812, + "step": 18901 + }, + { + "epoch": 1.5254620288919378, + "grad_norm": 0.6867875456809998, + "learning_rate": 1.5094931343071051e-06, + "loss": 2.4262, + "step": 18902 + }, + { + "epoch": 1.5255427326285207, + "grad_norm": 0.7112615704536438, + "learning_rate": 1.5067617227935593e-06, + "loss": 2.4221, + "step": 18903 + }, + { + "epoch": 1.5256234363651036, + "grad_norm": 0.7412725687026978, + "learning_rate": 1.5040327659889608e-06, + "loss": 2.3338, + "step": 18904 + }, + { + "epoch": 1.5257041401016869, + "grad_norm": 0.7514991164207458, + "learning_rate": 1.501306263961333e-06, + "loss": 2.45, + "step": 18905 + }, + { + "epoch": 1.5257848438382697, + "grad_norm": 0.7420109510421753, + "learning_rate": 1.4985822167786323e-06, + "loss": 2.342, + "step": 18906 + }, + { + "epoch": 1.5258655475748526, + "grad_norm": 0.6807692050933838, + "learning_rate": 1.4958606245087602e-06, + "loss": 2.4438, + "step": 18907 + }, + { + "epoch": 1.5259462513114357, + "grad_norm": 0.6926922798156738, + "learning_rate": 1.493141487219518e-06, + "loss": 2.3726, + "step": 18908 + }, + { + "epoch": 1.5260269550480188, + "grad_norm": 0.7947930693626404, + "learning_rate": 1.490424804978696e-06, + "loss": 2.3887, + "step": 18909 + }, + { + "epoch": 1.5261076587846016, + "grad_norm": 0.6710916757583618, + "learning_rate": 1.4877105778540069e-06, + "loss": 2.3674, + "step": 18910 + }, + { + "epoch": 1.5261883625211847, + "grad_norm": 0.7039839029312134, + "learning_rate": 1.4849988059130738e-06, + "loss": 2.4165, + "step": 18911 + }, + { + "epoch": 1.5262690662577678, + "grad_norm": 0.7044761180877686, + "learning_rate": 1.4822894892234874e-06, + "loss": 2.4431, + "step": 18912 + }, + { + "epoch": 1.5263497699943507, + "grad_norm": 0.7750450372695923, + "learning_rate": 1.4795826278527824e-06, + "loss": 2.3867, + "step": 18913 + }, + { + "epoch": 1.5264304737309338, + "grad_norm": 0.6689462661743164, + "learning_rate": 1.4768782218684052e-06, + "loss": 2.4665, + "step": 18914 + }, + { + "epoch": 1.5265111774675169, + "grad_norm": 0.7244156002998352, + "learning_rate": 1.4741762713377682e-06, + "loss": 2.4075, + "step": 18915 + }, + { + "epoch": 1.5265918812040997, + "grad_norm": 0.659988284111023, + "learning_rate": 1.4714767763282067e-06, + "loss": 2.3702, + "step": 18916 + }, + { + "epoch": 1.5266725849406828, + "grad_norm": 0.6512012481689453, + "learning_rate": 1.468779736907e-06, + "loss": 2.447, + "step": 18917 + }, + { + "epoch": 1.526753288677266, + "grad_norm": 0.7002681493759155, + "learning_rate": 1.4660851531413722e-06, + "loss": 2.3993, + "step": 18918 + }, + { + "epoch": 1.5268339924138488, + "grad_norm": 0.7057614922523499, + "learning_rate": 1.4633930250984695e-06, + "loss": 2.4794, + "step": 18919 + }, + { + "epoch": 1.5269146961504316, + "grad_norm": 0.6431131362915039, + "learning_rate": 1.4607033528453829e-06, + "loss": 2.4572, + "step": 18920 + }, + { + "epoch": 1.5269953998870147, + "grad_norm": 0.7665689587593079, + "learning_rate": 1.4580161364491584e-06, + "loss": 2.3644, + "step": 18921 + }, + { + "epoch": 1.5270761036235978, + "grad_norm": 0.7558016180992126, + "learning_rate": 1.455331375976765e-06, + "loss": 2.4114, + "step": 18922 + }, + { + "epoch": 1.5271568073601807, + "grad_norm": 0.738858699798584, + "learning_rate": 1.4526490714951158e-06, + "loss": 2.4036, + "step": 18923 + }, + { + "epoch": 1.5272375110967638, + "grad_norm": 0.6631876230239868, + "learning_rate": 1.4499692230710459e-06, + "loss": 2.3717, + "step": 18924 + }, + { + "epoch": 1.5273182148333468, + "grad_norm": 0.661270022392273, + "learning_rate": 1.4472918307713579e-06, + "loss": 2.3438, + "step": 18925 + }, + { + "epoch": 1.5273989185699297, + "grad_norm": 0.6621153354644775, + "learning_rate": 1.4446168946627757e-06, + "loss": 2.3787, + "step": 18926 + }, + { + "epoch": 1.5274796223065128, + "grad_norm": 0.7466804385185242, + "learning_rate": 1.4419444148119798e-06, + "loss": 2.4215, + "step": 18927 + }, + { + "epoch": 1.5275603260430959, + "grad_norm": 0.6980069279670715, + "learning_rate": 1.43927439128555e-06, + "loss": 2.3845, + "step": 18928 + }, + { + "epoch": 1.5276410297796787, + "grad_norm": 0.716249406337738, + "learning_rate": 1.4366068241500442e-06, + "loss": 2.4502, + "step": 18929 + }, + { + "epoch": 1.5277217335162618, + "grad_norm": 0.754284679889679, + "learning_rate": 1.4339417134719536e-06, + "loss": 2.3767, + "step": 18930 + }, + { + "epoch": 1.527802437252845, + "grad_norm": 0.6864803433418274, + "learning_rate": 1.4312790593176807e-06, + "loss": 2.3783, + "step": 18931 + }, + { + "epoch": 1.5278831409894278, + "grad_norm": 0.7305008769035339, + "learning_rate": 1.4286188617535945e-06, + "loss": 2.4186, + "step": 18932 + }, + { + "epoch": 1.5279638447260107, + "grad_norm": 0.7028940320014954, + "learning_rate": 1.4259611208459979e-06, + "loss": 2.4659, + "step": 18933 + }, + { + "epoch": 1.528044548462594, + "grad_norm": 0.7353081703186035, + "learning_rate": 1.4233058366611151e-06, + "loss": 2.4355, + "step": 18934 + }, + { + "epoch": 1.5281252521991768, + "grad_norm": 0.6228030323982239, + "learning_rate": 1.4206530092651494e-06, + "loss": 2.3496, + "step": 18935 + }, + { + "epoch": 1.5282059559357597, + "grad_norm": 0.7117124795913696, + "learning_rate": 1.4180026387241918e-06, + "loss": 2.4108, + "step": 18936 + }, + { + "epoch": 1.5282866596723428, + "grad_norm": 0.7654587030410767, + "learning_rate": 1.415354725104301e-06, + "loss": 2.3717, + "step": 18937 + }, + { + "epoch": 1.5283673634089259, + "grad_norm": 0.6835399866104126, + "learning_rate": 1.4127092684714683e-06, + "loss": 2.3403, + "step": 18938 + }, + { + "epoch": 1.5284480671455087, + "grad_norm": 0.7172822952270508, + "learning_rate": 1.410066268891641e-06, + "loss": 2.3928, + "step": 18939 + }, + { + "epoch": 1.5285287708820918, + "grad_norm": 0.6987513303756714, + "learning_rate": 1.407425726430678e-06, + "loss": 2.3965, + "step": 18940 + }, + { + "epoch": 1.528609474618675, + "grad_norm": 0.7663477063179016, + "learning_rate": 1.4047876411543925e-06, + "loss": 2.4411, + "step": 18941 + }, + { + "epoch": 1.5286901783552578, + "grad_norm": 0.6900299191474915, + "learning_rate": 1.4021520131285216e-06, + "loss": 2.4464, + "step": 18942 + }, + { + "epoch": 1.5287708820918409, + "grad_norm": 0.6860430836677551, + "learning_rate": 1.3995188424187676e-06, + "loss": 2.3512, + "step": 18943 + }, + { + "epoch": 1.528851585828424, + "grad_norm": 0.6658843755722046, + "learning_rate": 1.3968881290907453e-06, + "loss": 2.4124, + "step": 18944 + }, + { + "epoch": 1.5289322895650068, + "grad_norm": 0.6960515975952148, + "learning_rate": 1.3942598732100243e-06, + "loss": 2.3591, + "step": 18945 + }, + { + "epoch": 1.52901299330159, + "grad_norm": 0.7546302676200867, + "learning_rate": 1.3916340748420963e-06, + "loss": 2.407, + "step": 18946 + }, + { + "epoch": 1.529093697038173, + "grad_norm": 0.7384806871414185, + "learning_rate": 1.3890107340524205e-06, + "loss": 2.3563, + "step": 18947 + }, + { + "epoch": 1.5291744007747559, + "grad_norm": 0.6989250779151917, + "learning_rate": 1.3863898509063555e-06, + "loss": 2.4044, + "step": 18948 + }, + { + "epoch": 1.5292551045113387, + "grad_norm": 0.6974141597747803, + "learning_rate": 1.383771425469249e-06, + "loss": 2.4159, + "step": 18949 + }, + { + "epoch": 1.529335808247922, + "grad_norm": 0.7042572498321533, + "learning_rate": 1.381155457806338e-06, + "loss": 2.3826, + "step": 18950 + }, + { + "epoch": 1.529416511984505, + "grad_norm": 0.737964391708374, + "learning_rate": 1.3785419479828255e-06, + "loss": 2.4146, + "step": 18951 + }, + { + "epoch": 1.5294972157210878, + "grad_norm": 0.698883593082428, + "learning_rate": 1.3759308960638484e-06, + "loss": 2.4203, + "step": 18952 + }, + { + "epoch": 1.5295779194576709, + "grad_norm": 0.6545951962471008, + "learning_rate": 1.373322302114477e-06, + "loss": 2.3445, + "step": 18953 + }, + { + "epoch": 1.529658623194254, + "grad_norm": 0.663454532623291, + "learning_rate": 1.370716166199726e-06, + "loss": 2.3787, + "step": 18954 + }, + { + "epoch": 1.5297393269308368, + "grad_norm": 0.7036040425300598, + "learning_rate": 1.3681124883845543e-06, + "loss": 2.4202, + "step": 18955 + }, + { + "epoch": 1.52982003066742, + "grad_norm": 0.6702279448509216, + "learning_rate": 1.3655112687338434e-06, + "loss": 2.4357, + "step": 18956 + }, + { + "epoch": 1.529900734404003, + "grad_norm": 0.722159206867218, + "learning_rate": 1.3629125073124193e-06, + "loss": 2.4469, + "step": 18957 + }, + { + "epoch": 1.5299814381405858, + "grad_norm": 0.6717368364334106, + "learning_rate": 1.3603162041850636e-06, + "loss": 2.3563, + "step": 18958 + }, + { + "epoch": 1.530062141877169, + "grad_norm": 0.6880894899368286, + "learning_rate": 1.357722359416469e-06, + "loss": 2.3829, + "step": 18959 + }, + { + "epoch": 1.530142845613752, + "grad_norm": 0.6776503920555115, + "learning_rate": 1.3551309730712835e-06, + "loss": 2.3516, + "step": 18960 + }, + { + "epoch": 1.5302235493503349, + "grad_norm": 0.6807117462158203, + "learning_rate": 1.3525420452141002e-06, + "loss": 2.4042, + "step": 18961 + }, + { + "epoch": 1.530304253086918, + "grad_norm": 0.7218049764633179, + "learning_rate": 1.349955575909434e-06, + "loss": 2.4208, + "step": 18962 + }, + { + "epoch": 1.530384956823501, + "grad_norm": 0.6765930652618408, + "learning_rate": 1.3473715652217556e-06, + "loss": 2.4686, + "step": 18963 + }, + { + "epoch": 1.530465660560084, + "grad_norm": 0.7073772549629211, + "learning_rate": 1.3447900132154578e-06, + "loss": 2.3915, + "step": 18964 + }, + { + "epoch": 1.5305463642966668, + "grad_norm": 0.7247893810272217, + "learning_rate": 1.3422109199548672e-06, + "loss": 2.4182, + "step": 18965 + }, + { + "epoch": 1.53062706803325, + "grad_norm": 0.6569304466247559, + "learning_rate": 1.3396342855042876e-06, + "loss": 2.4686, + "step": 18966 + }, + { + "epoch": 1.530707771769833, + "grad_norm": 0.7075461745262146, + "learning_rate": 1.3370601099279122e-06, + "loss": 2.4054, + "step": 18967 + }, + { + "epoch": 1.5307884755064158, + "grad_norm": 0.6850137114524841, + "learning_rate": 1.334488393289912e-06, + "loss": 2.4359, + "step": 18968 + }, + { + "epoch": 1.530869179242999, + "grad_norm": 0.7391964793205261, + "learning_rate": 1.3319191356543691e-06, + "loss": 2.4276, + "step": 18969 + }, + { + "epoch": 1.530949882979582, + "grad_norm": 0.7017062902450562, + "learning_rate": 1.3293523370853211e-06, + "loss": 2.3984, + "step": 18970 + }, + { + "epoch": 1.5310305867161649, + "grad_norm": 0.7009238600730896, + "learning_rate": 1.3267879976467612e-06, + "loss": 2.4359, + "step": 18971 + }, + { + "epoch": 1.531111290452748, + "grad_norm": 0.6929598450660706, + "learning_rate": 1.3242261174025606e-06, + "loss": 2.4326, + "step": 18972 + }, + { + "epoch": 1.531191994189331, + "grad_norm": 0.7422237992286682, + "learning_rate": 1.3216666964165902e-06, + "loss": 2.3896, + "step": 18973 + }, + { + "epoch": 1.531272697925914, + "grad_norm": 0.7049415111541748, + "learning_rate": 1.3191097347526328e-06, + "loss": 2.4069, + "step": 18974 + }, + { + "epoch": 1.531353401662497, + "grad_norm": 0.7242603302001953, + "learning_rate": 1.3165552324744145e-06, + "loss": 2.3738, + "step": 18975 + }, + { + "epoch": 1.53143410539908, + "grad_norm": 0.6795815825462341, + "learning_rate": 1.3140031896456073e-06, + "loss": 2.4512, + "step": 18976 + }, + { + "epoch": 1.531514809135663, + "grad_norm": 0.6888797283172607, + "learning_rate": 1.3114536063297932e-06, + "loss": 2.4532, + "step": 18977 + }, + { + "epoch": 1.5315955128722458, + "grad_norm": 0.6484637260437012, + "learning_rate": 1.3089064825905438e-06, + "loss": 2.3946, + "step": 18978 + }, + { + "epoch": 1.5316762166088291, + "grad_norm": 0.7018564939498901, + "learning_rate": 1.3063618184913196e-06, + "loss": 2.3645, + "step": 18979 + }, + { + "epoch": 1.531756920345412, + "grad_norm": 0.673145055770874, + "learning_rate": 1.3038196140955584e-06, + "loss": 2.395, + "step": 18980 + }, + { + "epoch": 1.5318376240819949, + "grad_norm": 0.7300434112548828, + "learning_rate": 1.3012798694665873e-06, + "loss": 2.4325, + "step": 18981 + }, + { + "epoch": 1.531918327818578, + "grad_norm": 0.706119954586029, + "learning_rate": 1.2987425846677337e-06, + "loss": 2.4204, + "step": 18982 + }, + { + "epoch": 1.531999031555161, + "grad_norm": 0.7130329608917236, + "learning_rate": 1.2962077597622247e-06, + "loss": 2.4483, + "step": 18983 + }, + { + "epoch": 1.532079735291744, + "grad_norm": 0.716433584690094, + "learning_rate": 1.2936753948132318e-06, + "loss": 2.4296, + "step": 18984 + }, + { + "epoch": 1.532160439028327, + "grad_norm": 0.7647578120231628, + "learning_rate": 1.2911454898838714e-06, + "loss": 2.4262, + "step": 18985 + }, + { + "epoch": 1.53224114276491, + "grad_norm": 0.6844768524169922, + "learning_rate": 1.2886180450371822e-06, + "loss": 2.3896, + "step": 18986 + }, + { + "epoch": 1.532321846501493, + "grad_norm": 0.6975526809692383, + "learning_rate": 1.2860930603361686e-06, + "loss": 2.4362, + "step": 18987 + }, + { + "epoch": 1.532402550238076, + "grad_norm": 0.7288907170295715, + "learning_rate": 1.2835705358437588e-06, + "loss": 2.3775, + "step": 18988 + }, + { + "epoch": 1.5324832539746591, + "grad_norm": 0.718291163444519, + "learning_rate": 1.2810504716228245e-06, + "loss": 2.3863, + "step": 18989 + }, + { + "epoch": 1.532563957711242, + "grad_norm": 0.7351683974266052, + "learning_rate": 1.2785328677361597e-06, + "loss": 2.4136, + "step": 18990 + }, + { + "epoch": 1.532644661447825, + "grad_norm": 0.665600061416626, + "learning_rate": 1.2760177242465254e-06, + "loss": 2.3741, + "step": 18991 + }, + { + "epoch": 1.5327253651844082, + "grad_norm": 0.7038269639015198, + "learning_rate": 1.2735050412165827e-06, + "loss": 2.3613, + "step": 18992 + }, + { + "epoch": 1.532806068920991, + "grad_norm": 0.6893567442893982, + "learning_rate": 1.2709948187089814e-06, + "loss": 2.3785, + "step": 18993 + }, + { + "epoch": 1.532886772657574, + "grad_norm": 0.7487246990203857, + "learning_rate": 1.2684870567862605e-06, + "loss": 2.414, + "step": 18994 + }, + { + "epoch": 1.5329674763941572, + "grad_norm": 0.6581461429595947, + "learning_rate": 1.2659817555109367e-06, + "loss": 2.3777, + "step": 18995 + }, + { + "epoch": 1.53304818013074, + "grad_norm": 0.7202548384666443, + "learning_rate": 1.2634789149454374e-06, + "loss": 2.4328, + "step": 18996 + }, + { + "epoch": 1.533128883867323, + "grad_norm": 0.7678282260894775, + "learning_rate": 1.2609785351521352e-06, + "loss": 2.452, + "step": 18997 + }, + { + "epoch": 1.533209587603906, + "grad_norm": 0.7092801332473755, + "learning_rate": 1.2584806161933582e-06, + "loss": 2.3806, + "step": 18998 + }, + { + "epoch": 1.533290291340489, + "grad_norm": 0.6543184518814087, + "learning_rate": 1.2559851581313565e-06, + "loss": 2.4002, + "step": 18999 + }, + { + "epoch": 1.533370995077072, + "grad_norm": 0.7272716164588928, + "learning_rate": 1.2534921610283356e-06, + "loss": 2.4519, + "step": 19000 + }, + { + "epoch": 1.533370995077072, + "eval_loss": 2.36470365524292, + "eval_runtime": 766.3392, + "eval_samples_per_second": 3.419, + "eval_steps_per_second": 0.57, + "step": 19000 + } + ], + "logging_steps": 1, + "max_steps": 20000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.18397521613312e+17, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-19000/training_args.bin b/out/checkpoint-19000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae4a8b118e2a671c30e37a5d24a42d8090b49055 --- /dev/null +++ b/out/checkpoint-19000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2928f4418c9a306cbe65ca0c1b156ae660c125ec9122008a9f527a50891704 +size 5112 diff --git a/out/checkpoint-20000/config.json b/out/checkpoint-20000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..16f06bb1cdbf882eb90d57ea1906b3790e298a3f --- /dev/null +++ b/out/checkpoint-20000/config.json @@ -0,0 +1,40 @@ +{ + "_name_or_path": "./models/checkpoint-10000", + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.1, + "bos_token_id": 50256, + "embd_pdrop": 0.1, + "eos_token_id": 50256, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 768, + "n_head": 12, + "n_inner": null, + "n_layer": 12, + "n_positions": 1877, + "pad_token_id": 1026, + "reorder_and_upcast_attn": false, + "resid_pdrop": 0.1, + "scale_attn_by_inverse_layer_idx": false, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "torch_dtype": "float32", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 6027 +} diff --git a/out/checkpoint-20000/generation_config.json b/out/checkpoint-20000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..51f4dbe1c89cfa9da69401685604ff16254d9d20 --- /dev/null +++ b/out/checkpoint-20000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 50256, + "eos_token_id": 50256, + "pad_token_id": 1026, + "transformers_version": "4.41.2" +} diff --git a/out/checkpoint-20000/model.safetensors b/out/checkpoint-20000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..feae506b707aab24866bac3e02dd4a2224c12799 --- /dev/null +++ b/out/checkpoint-20000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d31b3734b3cffed46c557d3becb7e65e90b4ab55ebc18eff64ff3aad4999d24 +size 364520064 diff --git a/out/checkpoint-20000/optimizer.pt b/out/checkpoint-20000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..76953bedb96729b16a7b7bcb97718639ac8f734f --- /dev/null +++ b/out/checkpoint-20000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:894f39d8501ad12d19c12229534224ad092f71d7fad8682f34f0d8b975b60eb8 +size 729134010 diff --git a/out/checkpoint-20000/rng_state.pth b/out/checkpoint-20000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc0ba13df0d6bdf50353c71f54585c695319149b --- /dev/null +++ b/out/checkpoint-20000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c870fe7abda53c93097dd06715dc0179f87f4864ac260a67d000db16ffe7298d +size 14244 diff --git a/out/checkpoint-20000/scheduler.pt b/out/checkpoint-20000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6290f9a1e68923b1c1d5fc7c219cd069d8cab8b6 --- /dev/null +++ b/out/checkpoint-20000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bf461c5e4bed04e168cc27aed066d45328f7ff5de3945fde4b51483e103707 +size 1064 diff --git a/out/checkpoint-20000/special_tokens_map.json b/out/checkpoint-20000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1b9fa6207c25267215ce16bfacdcb9089df3e897 --- /dev/null +++ b/out/checkpoint-20000/special_tokens_map.json @@ -0,0 +1,9 @@ +{ + "pad_token": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-20000/tokenizer.json b/out/checkpoint-20000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..2bf66a33fda75b69f9b1a9597987f418f5acfb49 --- /dev/null +++ b/out/checkpoint-20000/tokenizer.json @@ -0,0 +1,20279 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "content": "<|audio:0|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1, + "content": "<|audio:1|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 2, + "content": "<|audio:2|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 3, + "content": "<|audio:3|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 4, + "content": "<|audio:4|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 5, + "content": "<|audio:5|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 6, + "content": "<|audio:6|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 7, + "content": "<|audio:7|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 8, + "content": "<|audio:8|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 9, + "content": "<|audio:9|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 10, + "content": "<|audio:10|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 11, + "content": "<|audio:11|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 12, + "content": "<|audio:12|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 13, + "content": "<|audio:13|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 14, + "content": "<|audio:14|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 15, + "content": "<|audio:15|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 16, + "content": "<|audio:16|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 17, + "content": "<|audio:17|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 18, + "content": "<|audio:18|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 19, + "content": "<|audio:19|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 20, + "content": "<|audio:20|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 21, + "content": "<|audio:21|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 22, + "content": "<|audio:22|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 23, + "content": "<|audio:23|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 24, + "content": "<|audio:24|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 25, + "content": "<|audio:25|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 26, + "content": "<|audio:26|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 27, + "content": "<|audio:27|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 28, + "content": "<|audio:28|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 29, + "content": "<|audio:29|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 30, + "content": "<|audio:30|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 31, + "content": "<|audio:31|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 32, + "content": "<|audio:32|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 33, + "content": "<|audio:33|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 34, + "content": "<|audio:34|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 35, + "content": "<|audio:35|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 36, + "content": "<|audio:36|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 37, + "content": "<|audio:37|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 38, + "content": "<|audio:38|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 39, + "content": "<|audio:39|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 40, + "content": "<|audio:40|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 41, + "content": "<|audio:41|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 42, + "content": "<|audio:42|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 43, + "content": "<|audio:43|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 44, + "content": "<|audio:44|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 45, + "content": "<|audio:45|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 46, + "content": "<|audio:46|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 47, + "content": "<|audio:47|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 48, + "content": "<|audio:48|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 49, + "content": "<|audio:49|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 50, + "content": "<|audio:50|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 51, + "content": "<|audio:51|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 52, + "content": "<|audio:52|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 53, + "content": "<|audio:53|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 54, + "content": "<|audio:54|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 55, + "content": "<|audio:55|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 56, + "content": "<|audio:56|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 57, + "content": "<|audio:57|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 58, + "content": "<|audio:58|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 59, + "content": "<|audio:59|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 60, + "content": "<|audio:60|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 61, + "content": "<|audio:61|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 62, + "content": "<|audio:62|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 63, + "content": "<|audio:63|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 64, + "content": "<|audio:64|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 65, + "content": "<|audio:65|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 66, + "content": "<|audio:66|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 67, + "content": "<|audio:67|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 68, + "content": "<|audio:68|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 69, + "content": "<|audio:69|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 70, + "content": "<|audio:70|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 71, + "content": "<|audio:71|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 72, + "content": "<|audio:72|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 73, + "content": "<|audio:73|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 74, + "content": "<|audio:74|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 75, + "content": "<|audio:75|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 76, + "content": "<|audio:76|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 77, + "content": "<|audio:77|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 78, + "content": "<|audio:78|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 79, + "content": "<|audio:79|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 80, + "content": "<|audio:80|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 81, + "content": "<|audio:81|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 82, + "content": "<|audio:82|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 83, + "content": "<|audio:83|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 84, + "content": "<|audio:84|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 85, + "content": "<|audio:85|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 86, + "content": "<|audio:86|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 87, + "content": "<|audio:87|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 88, + "content": "<|audio:88|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 89, + "content": "<|audio:89|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 90, + "content": "<|audio:90|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 91, + "content": "<|audio:91|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 92, + "content": "<|audio:92|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 93, + "content": "<|audio:93|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 94, + "content": "<|audio:94|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 95, + "content": "<|audio:95|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 96, + "content": "<|audio:96|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 97, + "content": "<|audio:97|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 98, + "content": "<|audio:98|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 99, + "content": "<|audio:99|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 100, + "content": "<|audio:100|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 101, + "content": "<|audio:101|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 102, + "content": "<|audio:102|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 103, + "content": "<|audio:103|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 104, + "content": "<|audio:104|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 105, + "content": "<|audio:105|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 106, + "content": "<|audio:106|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 107, + "content": "<|audio:107|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 108, + "content": "<|audio:108|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 109, + "content": "<|audio:109|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 110, + "content": "<|audio:110|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 111, + "content": "<|audio:111|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 112, + "content": "<|audio:112|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 113, + "content": "<|audio:113|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 114, + "content": "<|audio:114|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 115, + "content": "<|audio:115|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 116, + "content": "<|audio:116|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 117, + "content": "<|audio:117|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 118, + "content": "<|audio:118|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 119, + "content": "<|audio:119|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 120, + "content": "<|audio:120|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 121, + "content": "<|audio:121|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 122, + "content": "<|audio:122|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 123, + "content": "<|audio:123|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 124, + "content": "<|audio:124|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 125, + "content": "<|audio:125|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 126, + "content": "<|audio:126|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 127, + "content": "<|audio:127|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 128, + "content": "<|audio:128|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 129, + "content": "<|audio:129|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 130, + "content": "<|audio:130|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 131, + "content": "<|audio:131|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 132, + "content": "<|audio:132|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 133, + "content": "<|audio:133|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 134, + "content": "<|audio:134|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 135, + "content": "<|audio:135|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 136, + "content": "<|audio:136|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 137, + "content": "<|audio:137|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 138, + "content": "<|audio:138|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 139, + "content": "<|audio:139|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 140, + "content": "<|audio:140|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 141, + "content": "<|audio:141|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 142, + "content": "<|audio:142|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 143, + "content": "<|audio:143|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 144, + "content": "<|audio:144|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 145, + "content": "<|audio:145|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 146, + "content": "<|audio:146|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 147, + "content": "<|audio:147|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 148, + "content": "<|audio:148|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 149, + "content": "<|audio:149|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 150, + "content": "<|audio:150|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 151, + "content": "<|audio:151|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 152, + "content": "<|audio:152|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 153, + "content": "<|audio:153|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 154, + "content": "<|audio:154|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 155, + "content": "<|audio:155|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 156, + "content": "<|audio:156|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 157, + "content": "<|audio:157|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 158, + "content": "<|audio:158|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 159, + "content": "<|audio:159|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 160, + "content": "<|audio:160|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 161, + "content": "<|audio:161|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 162, + "content": "<|audio:162|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 163, + "content": "<|audio:163|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 164, + "content": "<|audio:164|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 165, + "content": "<|audio:165|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 166, + "content": "<|audio:166|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 167, + "content": "<|audio:167|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 168, + "content": "<|audio:168|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 169, + "content": "<|audio:169|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 170, + "content": "<|audio:170|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 171, + "content": "<|audio:171|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 172, + "content": "<|audio:172|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 173, + "content": "<|audio:173|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 174, + "content": "<|audio:174|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 175, + "content": "<|audio:175|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 176, + "content": "<|audio:176|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 177, + "content": "<|audio:177|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 178, + "content": "<|audio:178|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 179, + "content": "<|audio:179|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 180, + "content": "<|audio:180|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 181, + "content": "<|audio:181|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 182, + "content": "<|audio:182|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 183, + "content": "<|audio:183|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 184, + "content": "<|audio:184|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 185, + "content": "<|audio:185|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 186, + "content": "<|audio:186|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 187, + "content": "<|audio:187|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 188, + "content": "<|audio:188|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 189, + "content": "<|audio:189|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 190, + "content": "<|audio:190|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 191, + "content": "<|audio:191|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 192, + "content": "<|audio:192|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 193, + "content": "<|audio:193|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 194, + "content": "<|audio:194|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 195, + "content": "<|audio:195|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 196, + "content": "<|audio:196|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 197, + "content": "<|audio:197|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 198, + "content": "<|audio:198|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 199, + "content": "<|audio:199|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 200, + "content": "<|audio:200|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 201, + "content": "<|audio:201|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 202, + "content": "<|audio:202|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 203, + "content": "<|audio:203|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 204, + "content": "<|audio:204|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 205, + "content": "<|audio:205|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 206, + "content": "<|audio:206|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 207, + "content": "<|audio:207|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 208, + "content": "<|audio:208|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 209, + "content": "<|audio:209|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 210, + "content": "<|audio:210|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 211, + "content": "<|audio:211|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 212, + "content": "<|audio:212|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 213, + "content": "<|audio:213|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 214, + "content": "<|audio:214|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 215, + "content": "<|audio:215|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 216, + "content": "<|audio:216|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 217, + "content": "<|audio:217|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 218, + "content": "<|audio:218|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 219, + "content": "<|audio:219|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 220, + "content": "<|audio:220|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 221, + "content": "<|audio:221|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 222, + "content": "<|audio:222|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 223, + "content": "<|audio:223|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 224, + "content": "<|audio:224|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 225, + "content": "<|audio:225|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 226, + "content": "<|audio:226|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 227, + "content": "<|audio:227|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 228, + "content": "<|audio:228|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 229, + "content": "<|audio:229|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 230, + "content": "<|audio:230|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 231, + "content": "<|audio:231|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 232, + "content": "<|audio:232|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 233, + "content": "<|audio:233|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 234, + "content": "<|audio:234|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 235, + "content": "<|audio:235|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 236, + "content": "<|audio:236|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 237, + "content": "<|audio:237|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 238, + "content": "<|audio:238|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 239, + "content": "<|audio:239|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 240, + "content": "<|audio:240|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 241, + "content": "<|audio:241|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 242, + "content": "<|audio:242|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 243, + "content": "<|audio:243|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 244, + "content": "<|audio:244|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 245, + "content": "<|audio:245|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 246, + "content": "<|audio:246|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 247, + "content": "<|audio:247|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 248, + "content": "<|audio:248|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 249, + "content": "<|audio:249|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 250, + "content": "<|audio:250|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 251, + "content": "<|audio:251|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 252, + "content": "<|audio:252|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 253, + "content": "<|audio:253|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 254, + "content": "<|audio:254|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 255, + "content": "<|audio:255|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 256, + "content": "<|audio:256|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 257, + "content": "<|audio:257|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 258, + "content": "<|audio:258|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 259, + "content": "<|audio:259|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 260, + "content": "<|audio:260|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 261, + "content": "<|audio:261|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 262, + "content": "<|audio:262|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 263, + "content": "<|audio:263|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 264, + "content": "<|audio:264|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 265, + "content": "<|audio:265|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 266, + "content": "<|audio:266|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 267, + "content": "<|audio:267|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 268, + "content": "<|audio:268|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 269, + "content": "<|audio:269|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 270, + "content": "<|audio:270|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 271, + "content": "<|audio:271|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 272, + "content": "<|audio:272|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 273, + "content": "<|audio:273|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 274, + "content": "<|audio:274|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 275, + "content": "<|audio:275|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 276, + "content": "<|audio:276|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 277, + "content": "<|audio:277|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 278, + "content": "<|audio:278|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 279, + "content": "<|audio:279|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 280, + "content": "<|audio:280|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 281, + "content": "<|audio:281|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 282, + "content": "<|audio:282|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 283, + "content": "<|audio:283|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 284, + "content": "<|audio:284|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 285, + "content": "<|audio:285|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 286, + "content": "<|audio:286|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 287, + "content": "<|audio:287|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 288, + "content": "<|audio:288|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 289, + "content": "<|audio:289|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 290, + "content": "<|audio:290|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 291, + "content": "<|audio:291|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 292, + "content": "<|audio:292|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 293, + "content": "<|audio:293|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 294, + "content": "<|audio:294|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 295, + "content": "<|audio:295|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 296, + "content": "<|audio:296|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 297, + "content": "<|audio:297|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 298, + "content": "<|audio:298|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 299, + "content": "<|audio:299|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 300, + "content": "<|audio:300|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 301, + "content": "<|audio:301|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 302, + "content": "<|audio:302|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 303, + "content": "<|audio:303|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 304, + "content": "<|audio:304|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 305, + "content": "<|audio:305|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 306, + "content": "<|audio:306|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 307, + "content": "<|audio:307|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 308, + "content": "<|audio:308|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 309, + "content": "<|audio:309|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 310, + "content": "<|audio:310|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 311, + "content": "<|audio:311|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 312, + "content": "<|audio:312|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 313, + "content": "<|audio:313|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 314, + "content": "<|audio:314|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 315, + "content": "<|audio:315|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 316, + "content": "<|audio:316|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 317, + "content": "<|audio:317|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 318, + "content": "<|audio:318|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 319, + "content": "<|audio:319|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 320, + "content": "<|audio:320|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 321, + "content": "<|audio:321|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 322, + "content": "<|audio:322|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 323, + "content": "<|audio:323|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 324, + "content": "<|audio:324|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 325, + "content": "<|audio:325|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 326, + "content": "<|audio:326|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 327, + "content": "<|audio:327|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 328, + "content": "<|audio:328|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 329, + "content": "<|audio:329|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 330, + "content": "<|audio:330|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 331, + "content": "<|audio:331|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 332, + "content": "<|audio:332|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 333, + "content": "<|audio:333|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 334, + "content": "<|audio:334|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 335, + "content": "<|audio:335|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 336, + "content": "<|audio:336|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 337, + "content": "<|audio:337|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 338, + "content": "<|audio:338|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 339, + "content": "<|audio:339|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 340, + "content": "<|audio:340|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 341, + "content": "<|audio:341|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 342, + "content": "<|audio:342|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 343, + "content": "<|audio:343|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 344, + "content": "<|audio:344|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 345, + "content": "<|audio:345|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 346, + "content": "<|audio:346|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 347, + "content": "<|audio:347|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 348, + "content": "<|audio:348|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 349, + "content": "<|audio:349|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 350, + "content": "<|audio:350|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 351, + "content": "<|audio:351|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 352, + "content": "<|audio:352|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 353, + "content": "<|audio:353|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 354, + "content": "<|audio:354|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 355, + "content": "<|audio:355|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 356, + "content": "<|audio:356|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 357, + "content": "<|audio:357|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 358, + "content": "<|audio:358|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 359, + "content": "<|audio:359|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 360, + "content": "<|audio:360|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 361, + "content": "<|audio:361|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 362, + "content": "<|audio:362|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 363, + "content": "<|audio:363|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 364, + "content": "<|audio:364|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 365, + "content": "<|audio:365|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 366, + "content": "<|audio:366|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 367, + "content": "<|audio:367|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 368, + "content": "<|audio:368|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 369, + "content": "<|audio:369|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 370, + "content": "<|audio:370|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 371, + "content": "<|audio:371|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 372, + "content": "<|audio:372|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 373, + "content": "<|audio:373|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 374, + "content": "<|audio:374|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 375, + "content": "<|audio:375|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 376, + "content": "<|audio:376|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 377, + "content": "<|audio:377|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 378, + "content": "<|audio:378|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 379, + "content": "<|audio:379|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 380, + "content": "<|audio:380|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 381, + "content": "<|audio:381|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 382, + "content": "<|audio:382|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 383, + "content": "<|audio:383|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 384, + "content": "<|audio:384|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 385, + "content": "<|audio:385|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 386, + "content": "<|audio:386|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 387, + "content": "<|audio:387|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 388, + "content": "<|audio:388|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 389, + "content": "<|audio:389|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 390, + "content": "<|audio:390|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 391, + "content": "<|audio:391|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 392, + "content": "<|audio:392|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 393, + "content": "<|audio:393|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 394, + "content": "<|audio:394|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 395, + "content": "<|audio:395|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 396, + "content": "<|audio:396|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 397, + "content": "<|audio:397|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 398, + "content": "<|audio:398|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 399, + "content": "<|audio:399|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 400, + "content": "<|audio:400|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 401, + "content": "<|audio:401|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 402, + "content": "<|audio:402|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 403, + "content": "<|audio:403|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 404, + "content": "<|audio:404|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 405, + "content": "<|audio:405|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 406, + "content": "<|audio:406|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 407, + "content": "<|audio:407|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 408, + "content": "<|audio:408|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 409, + "content": "<|audio:409|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 410, + "content": "<|audio:410|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 411, + "content": "<|audio:411|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 412, + "content": "<|audio:412|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 413, + "content": "<|audio:413|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 414, + "content": "<|audio:414|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 415, + "content": "<|audio:415|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 416, + "content": "<|audio:416|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 417, + "content": "<|audio:417|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 418, + "content": "<|audio:418|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 419, + "content": "<|audio:419|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 420, + "content": "<|audio:420|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 421, + "content": "<|audio:421|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 422, + "content": "<|audio:422|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 423, + "content": "<|audio:423|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 424, + "content": "<|audio:424|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 425, + "content": "<|audio:425|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 426, + "content": "<|audio:426|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 427, + "content": "<|audio:427|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 428, + "content": "<|audio:428|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 429, + "content": "<|audio:429|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 430, + "content": "<|audio:430|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 431, + "content": "<|audio:431|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 432, + "content": "<|audio:432|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 433, + "content": "<|audio:433|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 434, + "content": "<|audio:434|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 435, + "content": "<|audio:435|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 436, + "content": "<|audio:436|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 437, + "content": "<|audio:437|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 438, + "content": "<|audio:438|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 439, + "content": "<|audio:439|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 440, + "content": "<|audio:440|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 441, + "content": "<|audio:441|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 442, + "content": "<|audio:442|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 443, + "content": "<|audio:443|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 444, + "content": "<|audio:444|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 445, + "content": "<|audio:445|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 446, + "content": "<|audio:446|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 447, + "content": "<|audio:447|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 448, + "content": "<|audio:448|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 449, + "content": "<|audio:449|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 450, + "content": "<|audio:450|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 451, + "content": "<|audio:451|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 452, + "content": "<|audio:452|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 453, + "content": "<|audio:453|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 454, + "content": "<|audio:454|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 455, + "content": "<|audio:455|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 456, + "content": "<|audio:456|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 457, + "content": "<|audio:457|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 458, + "content": "<|audio:458|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 459, + "content": "<|audio:459|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 460, + "content": "<|audio:460|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 461, + "content": "<|audio:461|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 462, + "content": "<|audio:462|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 463, + "content": "<|audio:463|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 464, + "content": "<|audio:464|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 465, + "content": "<|audio:465|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 466, + "content": "<|audio:466|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 467, + "content": "<|audio:467|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 468, + "content": "<|audio:468|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 469, + "content": "<|audio:469|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 470, + "content": "<|audio:470|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 471, + "content": "<|audio:471|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 472, + "content": "<|audio:472|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 473, + "content": "<|audio:473|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 474, + "content": "<|audio:474|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 475, + "content": "<|audio:475|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 476, + "content": "<|audio:476|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 477, + "content": "<|audio:477|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 478, + "content": "<|audio:478|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 479, + "content": "<|audio:479|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 480, + "content": "<|audio:480|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 481, + "content": "<|audio:481|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 482, + "content": "<|audio:482|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 483, + "content": "<|audio:483|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 484, + "content": "<|audio:484|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 485, + "content": "<|audio:485|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 486, + "content": "<|audio:486|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 487, + "content": "<|audio:487|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 488, + "content": "<|audio:488|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 489, + "content": "<|audio:489|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 490, + "content": "<|audio:490|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 491, + "content": "<|audio:491|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 492, + "content": "<|audio:492|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 493, + "content": "<|audio:493|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 494, + "content": "<|audio:494|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 495, + "content": "<|audio:495|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 496, + "content": "<|audio:496|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 497, + "content": "<|audio:497|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 498, + "content": "<|audio:498|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 499, + "content": "<|audio:499|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 500, + "content": "<|audio:500|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 501, + "content": "<|audio:501|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 502, + "content": "<|audio:502|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 503, + "content": "<|audio:503|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 504, + "content": "<|audio:504|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 505, + "content": "<|audio:505|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 506, + "content": "<|audio:506|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 507, + "content": "<|audio:507|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 508, + "content": "<|audio:508|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 509, + "content": "<|audio:509|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 510, + "content": "<|audio:510|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 511, + "content": "<|audio:511|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 512, + "content": "<|audio:512|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 513, + "content": "<|audio:513|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 514, + "content": "<|audio:514|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 515, + "content": "<|audio:515|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 516, + "content": "<|audio:516|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 517, + "content": "<|audio:517|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 518, + "content": "<|audio:518|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 519, + "content": "<|audio:519|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 520, + "content": "<|audio:520|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 521, + "content": "<|audio:521|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 522, + "content": "<|audio:522|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 523, + "content": "<|audio:523|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 524, + "content": "<|audio:524|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 525, + "content": "<|audio:525|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 526, + "content": "<|audio:526|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 527, + "content": "<|audio:527|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 528, + "content": "<|audio:528|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 529, + "content": "<|audio:529|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 530, + "content": "<|audio:530|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 531, + "content": "<|audio:531|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 532, + "content": "<|audio:532|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 533, + "content": "<|audio:533|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 534, + "content": "<|audio:534|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 535, + "content": "<|audio:535|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 536, + "content": "<|audio:536|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 537, + "content": "<|audio:537|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 538, + "content": "<|audio:538|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 539, + "content": "<|audio:539|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 540, + "content": "<|audio:540|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 541, + "content": "<|audio:541|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 542, + "content": "<|audio:542|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 543, + "content": "<|audio:543|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 544, + "content": "<|audio:544|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 545, + "content": "<|audio:545|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 546, + "content": "<|audio:546|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 547, + "content": "<|audio:547|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 548, + "content": "<|audio:548|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 549, + "content": "<|audio:549|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 550, + "content": "<|audio:550|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 551, + "content": "<|audio:551|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 552, + "content": "<|audio:552|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 553, + "content": "<|audio:553|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 554, + "content": "<|audio:554|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 555, + "content": "<|audio:555|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 556, + "content": "<|audio:556|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 557, + "content": "<|audio:557|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 558, + "content": "<|audio:558|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 559, + "content": "<|audio:559|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 560, + "content": "<|audio:560|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 561, + "content": "<|audio:561|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 562, + "content": "<|audio:562|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 563, + "content": "<|audio:563|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 564, + "content": "<|audio:564|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 565, + "content": "<|audio:565|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 566, + "content": "<|audio:566|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 567, + "content": "<|audio:567|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 568, + "content": "<|audio:568|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 569, + "content": "<|audio:569|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 570, + "content": "<|audio:570|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 571, + "content": "<|audio:571|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 572, + "content": "<|audio:572|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 573, + "content": "<|audio:573|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 574, + "content": "<|audio:574|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 575, + "content": "<|audio:575|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 576, + "content": "<|audio:576|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 577, + "content": "<|audio:577|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 578, + "content": "<|audio:578|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 579, + "content": "<|audio:579|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 580, + "content": "<|audio:580|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 581, + "content": "<|audio:581|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 582, + "content": "<|audio:582|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 583, + "content": "<|audio:583|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 584, + "content": "<|audio:584|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 585, + "content": "<|audio:585|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 586, + "content": "<|audio:586|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 587, + "content": "<|audio:587|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 588, + "content": "<|audio:588|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 589, + "content": "<|audio:589|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 590, + "content": "<|audio:590|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 591, + "content": "<|audio:591|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 592, + "content": "<|audio:592|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 593, + "content": "<|audio:593|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 594, + "content": "<|audio:594|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 595, + "content": "<|audio:595|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 596, + "content": "<|audio:596|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 597, + "content": "<|audio:597|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 598, + "content": "<|audio:598|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 599, + "content": "<|audio:599|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 600, + "content": "<|audio:600|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 601, + "content": "<|audio:601|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 602, + "content": "<|audio:602|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 603, + "content": "<|audio:603|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 604, + "content": "<|audio:604|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "<|audio:605|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "<|audio:606|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "<|audio:607|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "<|audio:608|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "<|audio:609|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "<|audio:610|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "<|audio:611|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "<|audio:612|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "<|audio:613|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "<|audio:614|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "<|audio:615|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "<|audio:616|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "<|audio:617|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "<|audio:618|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "<|audio:619|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "<|audio:620|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "<|audio:621|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "<|audio:622|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "<|audio:623|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "<|audio:624|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "<|audio:625|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "<|audio:626|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "<|audio:627|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "<|audio:628|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "<|audio:629|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "<|audio:630|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "<|audio:631|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "<|audio:632|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "<|audio:633|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "<|audio:634|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "<|audio:635|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "<|audio:636|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "<|audio:637|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "<|audio:638|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "<|audio:639|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 640, + "content": "<|audio:640|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 641, + "content": "<|audio:641|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 642, + "content": "<|audio:642|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 643, + "content": "<|audio:643|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 644, + "content": "<|audio:644|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 645, + "content": "<|audio:645|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 646, + "content": "<|audio:646|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 647, + "content": "<|audio:647|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 648, + "content": "<|audio:648|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 649, + "content": "<|audio:649|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 650, + "content": "<|audio:650|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 651, + "content": "<|audio:651|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 652, + "content": "<|audio:652|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 653, + "content": "<|audio:653|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 654, + "content": "<|audio:654|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 655, + "content": "<|audio:655|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 656, + "content": "<|audio:656|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 657, + "content": "<|audio:657|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 658, + "content": "<|audio:658|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 659, + "content": "<|audio:659|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 660, + "content": "<|audio:660|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 661, + "content": "<|audio:661|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 662, + "content": "<|audio:662|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 663, + "content": "<|audio:663|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 664, + "content": "<|audio:664|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 665, + "content": "<|audio:665|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 666, + "content": "<|audio:666|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 667, + "content": "<|audio:667|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 668, + "content": "<|audio:668|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 669, + "content": "<|audio:669|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 670, + "content": "<|audio:670|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 671, + "content": "<|audio:671|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 672, + "content": "<|audio:672|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 673, + "content": "<|audio:673|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 674, + "content": "<|audio:674|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 675, + "content": "<|audio:675|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 676, + "content": "<|audio:676|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 677, + "content": "<|audio:677|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 678, + "content": "<|audio:678|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 679, + "content": "<|audio:679|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 680, + "content": "<|audio:680|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 681, + "content": "<|audio:681|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 682, + "content": "<|audio:682|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 683, + "content": "<|audio:683|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 684, + "content": "<|audio:684|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 685, + "content": "<|audio:685|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 686, + "content": "<|audio:686|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 687, + "content": "<|audio:687|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 688, + "content": "<|audio:688|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 689, + "content": "<|audio:689|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 690, + "content": "<|audio:690|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 691, + "content": "<|audio:691|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 692, + "content": "<|audio:692|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 693, + "content": "<|audio:693|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 694, + "content": "<|audio:694|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "<|audio:695|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "<|audio:696|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "<|audio:697|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "<|audio:698|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "<|audio:699|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "<|audio:700|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "<|audio:701|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "<|audio:702|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "<|audio:703|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 704, + "content": "<|audio:704|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 705, + "content": "<|audio:705|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 706, + "content": "<|audio:706|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 707, + "content": "<|audio:707|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 708, + "content": "<|audio:708|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 709, + "content": "<|audio:709|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 710, + "content": "<|audio:710|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 711, + "content": "<|audio:711|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 712, + "content": "<|audio:712|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 713, + "content": "<|audio:713|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 714, + "content": "<|audio:714|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 715, + "content": "<|audio:715|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 716, + "content": "<|audio:716|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 717, + "content": "<|audio:717|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 718, + "content": "<|audio:718|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 719, + "content": "<|audio:719|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 720, + "content": "<|audio:720|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 721, + "content": "<|audio:721|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 722, + "content": "<|audio:722|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 723, + "content": "<|audio:723|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 724, + "content": "<|audio:724|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 725, + "content": "<|audio:725|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 726, + "content": "<|audio:726|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 727, + "content": "<|audio:727|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 728, + "content": "<|audio:728|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 729, + "content": "<|audio:729|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 730, + "content": "<|audio:730|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 731, + "content": "<|audio:731|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 732, + "content": "<|audio:732|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 733, + "content": "<|audio:733|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 734, + "content": "<|audio:734|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 735, + "content": "<|audio:735|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 736, + "content": "<|audio:736|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 737, + "content": "<|audio:737|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 738, + "content": "<|audio:738|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 739, + "content": "<|audio:739|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 740, + "content": "<|audio:740|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 741, + "content": "<|audio:741|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 742, + "content": "<|audio:742|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 743, + "content": "<|audio:743|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 744, + "content": "<|audio:744|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 745, + "content": "<|audio:745|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 746, + "content": "<|audio:746|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 747, + "content": "<|audio:747|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 748, + "content": "<|audio:748|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 749, + "content": "<|audio:749|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 750, + "content": "<|audio:750|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 751, + "content": "<|audio:751|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 752, + "content": "<|audio:752|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 753, + "content": "<|audio:753|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 754, + "content": "<|audio:754|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 755, + "content": "<|audio:755|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 756, + "content": "<|audio:756|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 757, + "content": "<|audio:757|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 758, + "content": "<|audio:758|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 759, + "content": "<|audio:759|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 760, + "content": "<|audio:760|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 761, + "content": "<|audio:761|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 762, + "content": "<|audio:762|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 763, + "content": "<|audio:763|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 764, + "content": "<|audio:764|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 765, + "content": "<|audio:765|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 766, + "content": "<|audio:766|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 767, + "content": "<|audio:767|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 768, + "content": "<|audio:768|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 769, + "content": "<|audio:769|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 770, + "content": "<|audio:770|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 771, + "content": "<|audio:771|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 772, + "content": "<|audio:772|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 773, + "content": "<|audio:773|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 774, + "content": "<|audio:774|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 775, + "content": "<|audio:775|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 776, + "content": "<|audio:776|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 777, + "content": "<|audio:777|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 778, + "content": "<|audio:778|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 779, + "content": "<|audio:779|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 780, + "content": "<|audio:780|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 781, + "content": "<|audio:781|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 782, + "content": "<|audio:782|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 783, + "content": "<|audio:783|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 784, + "content": "<|audio:784|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 785, + "content": "<|audio:785|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 786, + "content": "<|audio:786|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 787, + "content": "<|audio:787|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 788, + "content": "<|audio:788|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 789, + "content": "<|audio:789|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 790, + "content": "<|audio:790|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 791, + "content": "<|audio:791|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 792, + "content": "<|audio:792|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 793, + "content": "<|audio:793|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 794, + "content": "<|audio:794|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 795, + "content": "<|audio:795|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 796, + "content": "<|audio:796|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 797, + "content": "<|audio:797|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 798, + "content": "<|audio:798|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 799, + "content": "<|audio:799|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 800, + "content": "<|audio:800|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 801, + "content": "<|audio:801|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 802, + "content": "<|audio:802|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 803, + "content": "<|audio:803|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 804, + "content": "<|audio:804|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 805, + "content": "<|audio:805|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 806, + "content": "<|audio:806|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 807, + "content": "<|audio:807|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 808, + "content": "<|audio:808|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 809, + "content": "<|audio:809|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 810, + "content": "<|audio:810|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 811, + "content": "<|audio:811|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 812, + "content": "<|audio:812|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 813, + "content": "<|audio:813|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 814, + "content": "<|audio:814|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 815, + "content": "<|audio:815|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 816, + "content": "<|audio:816|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 817, + "content": "<|audio:817|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 818, + "content": "<|audio:818|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 819, + "content": "<|audio:819|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 820, + "content": "<|audio:820|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 821, + "content": "<|audio:821|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 822, + "content": "<|audio:822|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 823, + "content": "<|audio:823|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 824, + "content": "<|audio:824|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 825, + "content": "<|audio:825|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 826, + "content": "<|audio:826|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 827, + "content": "<|audio:827|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 828, + "content": "<|audio:828|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 829, + "content": "<|audio:829|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 830, + "content": "<|audio:830|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 831, + "content": "<|audio:831|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 832, + "content": "<|audio:832|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 833, + "content": "<|audio:833|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 834, + "content": "<|audio:834|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 835, + "content": "<|audio:835|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 836, + "content": "<|audio:836|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 837, + "content": "<|audio:837|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 838, + "content": "<|audio:838|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 839, + "content": "<|audio:839|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 840, + "content": "<|audio:840|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 841, + "content": "<|audio:841|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 842, + "content": "<|audio:842|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 843, + "content": "<|audio:843|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 844, + "content": "<|audio:844|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 845, + "content": "<|audio:845|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 846, + "content": "<|audio:846|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 847, + "content": "<|audio:847|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 848, + "content": "<|audio:848|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 849, + "content": "<|audio:849|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 850, + "content": "<|audio:850|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 851, + "content": "<|audio:851|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 852, + "content": "<|audio:852|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 853, + "content": "<|audio:853|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 854, + "content": "<|audio:854|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 855, + "content": "<|audio:855|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 856, + "content": "<|audio:856|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 857, + "content": "<|audio:857|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 858, + "content": "<|audio:858|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 859, + "content": "<|audio:859|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 860, + "content": "<|audio:860|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 861, + "content": "<|audio:861|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 862, + "content": "<|audio:862|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 863, + "content": "<|audio:863|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 864, + "content": "<|audio:864|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 865, + "content": "<|audio:865|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 866, + "content": "<|audio:866|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 867, + "content": "<|audio:867|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 868, + "content": "<|audio:868|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 869, + "content": "<|audio:869|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 870, + "content": "<|audio:870|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 871, + "content": "<|audio:871|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 872, + "content": "<|audio:872|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 873, + "content": "<|audio:873|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 874, + "content": "<|audio:874|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 875, + "content": "<|audio:875|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 876, + "content": "<|audio:876|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 877, + "content": "<|audio:877|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 878, + "content": "<|audio:878|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 879, + "content": "<|audio:879|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 880, + "content": "<|audio:880|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 881, + "content": "<|audio:881|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 882, + "content": "<|audio:882|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 883, + "content": "<|audio:883|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 884, + "content": "<|audio:884|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 885, + "content": "<|audio:885|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 886, + "content": "<|audio:886|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 887, + "content": "<|audio:887|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 888, + "content": "<|audio:888|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 889, + "content": "<|audio:889|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 890, + "content": "<|audio:890|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 891, + "content": "<|audio:891|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 892, + "content": "<|audio:892|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 893, + "content": "<|audio:893|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 894, + "content": "<|audio:894|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 895, + "content": "<|audio:895|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 896, + "content": "<|audio:896|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 897, + "content": "<|audio:897|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 898, + "content": "<|audio:898|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 899, + "content": "<|audio:899|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 900, + "content": "<|audio:900|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 901, + "content": "<|audio:901|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 902, + "content": "<|audio:902|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 903, + "content": "<|audio:903|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 904, + "content": "<|audio:904|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 905, + "content": "<|audio:905|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 906, + "content": "<|audio:906|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 907, + "content": "<|audio:907|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 908, + "content": "<|audio:908|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 909, + "content": "<|audio:909|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 910, + "content": "<|audio:910|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 911, + "content": "<|audio:911|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 912, + "content": "<|audio:912|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 913, + "content": "<|audio:913|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 914, + "content": "<|audio:914|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 915, + "content": "<|audio:915|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 916, + "content": "<|audio:916|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 917, + "content": "<|audio:917|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 918, + "content": "<|audio:918|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 919, + "content": "<|audio:919|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 920, + "content": "<|audio:920|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 921, + "content": "<|audio:921|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 922, + "content": "<|audio:922|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 923, + "content": "<|audio:923|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 924, + "content": "<|audio:924|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 925, + "content": "<|audio:925|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 926, + "content": "<|audio:926|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 927, + "content": "<|audio:927|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 928, + "content": "<|audio:928|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 929, + "content": "<|audio:929|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 930, + "content": "<|audio:930|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 931, + "content": "<|audio:931|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 932, + "content": "<|audio:932|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 933, + "content": "<|audio:933|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 934, + "content": "<|audio:934|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 935, + "content": "<|audio:935|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 936, + "content": "<|audio:936|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 937, + "content": "<|audio:937|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 938, + "content": "<|audio:938|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 939, + "content": "<|audio:939|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 940, + "content": "<|audio:940|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 941, + "content": "<|audio:941|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 942, + "content": "<|audio:942|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 943, + "content": "<|audio:943|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 944, + "content": "<|audio:944|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 945, + "content": "<|audio:945|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 946, + "content": "<|audio:946|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 947, + "content": "<|audio:947|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 948, + "content": "<|audio:948|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 949, + "content": "<|audio:949|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 950, + "content": "<|audio:950|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 951, + "content": "<|audio:951|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 952, + "content": "<|audio:952|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 953, + "content": "<|audio:953|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 954, + "content": "<|audio:954|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 955, + "content": "<|audio:955|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 956, + "content": "<|audio:956|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 957, + "content": "<|audio:957|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 958, + "content": "<|audio:958|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 959, + "content": "<|audio:959|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 960, + "content": "<|audio:960|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 961, + "content": "<|audio:961|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 962, + "content": "<|audio:962|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 963, + "content": "<|audio:963|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 964, + "content": "<|audio:964|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 965, + "content": "<|audio:965|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 966, + "content": "<|audio:966|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 967, + "content": "<|audio:967|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 968, + "content": "<|audio:968|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 969, + "content": "<|audio:969|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 970, + "content": "<|audio:970|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 971, + "content": "<|audio:971|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 972, + "content": "<|audio:972|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 973, + "content": "<|audio:973|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 974, + "content": "<|audio:974|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 975, + "content": "<|audio:975|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 976, + "content": "<|audio:976|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 977, + "content": "<|audio:977|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 978, + "content": "<|audio:978|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 979, + "content": "<|audio:979|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 980, + "content": "<|audio:980|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 981, + "content": "<|audio:981|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 982, + "content": "<|audio:982|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 983, + "content": "<|audio:983|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 984, + "content": "<|audio:984|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 985, + "content": "<|audio:985|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 986, + "content": "<|audio:986|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 987, + "content": "<|audio:987|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 988, + "content": "<|audio:988|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 989, + "content": "<|audio:989|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 990, + "content": "<|audio:990|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 991, + "content": "<|audio:991|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 992, + "content": "<|audio:992|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 993, + "content": "<|audio:993|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 994, + "content": "<|audio:994|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 995, + "content": "<|audio:995|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 996, + "content": "<|audio:996|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 997, + "content": "<|audio:997|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 998, + "content": "<|audio:998|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 999, + "content": "<|audio:999|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1000, + "content": "<|audio:1000|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1001, + "content": "<|audio:1001|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1002, + "content": "<|audio:1002|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1003, + "content": "<|audio:1003|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1004, + "content": "<|audio:1004|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1005, + "content": "<|audio:1005|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1006, + "content": "<|audio:1006|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1007, + "content": "<|audio:1007|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1008, + "content": "<|audio:1008|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1009, + "content": "<|audio:1009|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1010, + "content": "<|audio:1010|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1011, + "content": "<|audio:1011|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1012, + "content": "<|audio:1012|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1013, + "content": "<|audio:1013|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1014, + "content": "<|audio:1014|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1015, + "content": "<|audio:1015|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1016, + "content": "<|audio:1016|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1017, + "content": "<|audio:1017|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1018, + "content": "<|audio:1018|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1019, + "content": "<|audio:1019|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1020, + "content": "<|audio:1020|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1021, + "content": "<|audio:1021|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1022, + "content": "<|audio:1022|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1023, + "content": "<|audio:1023|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1024, + "content": "<|startoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1025, + "content": "<|endoftranscript|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 1026, + "content": "<|padding|>", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": { + "type": "NFKC" + }, + "pre_tokenizer": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "post_processor": null, + "decoder": { + "type": "Metaspace", + "replacement": "▁", + "prepend_scheme": "always", + "split": true + }, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "byte_fallback": false, + "ignore_merges": false, + "vocab": { + "<|audio:0|>": 0, + "<|audio:1|>": 1, + "<|audio:2|>": 2, + "<|audio:3|>": 3, + "<|audio:4|>": 4, + "<|audio:5|>": 5, + "<|audio:6|>": 6, + "<|audio:7|>": 7, + "<|audio:8|>": 8, + "<|audio:9|>": 9, + "<|audio:10|>": 10, + "<|audio:11|>": 11, + "<|audio:12|>": 12, + "<|audio:13|>": 13, + "<|audio:14|>": 14, + "<|audio:15|>": 15, + "<|audio:16|>": 16, + "<|audio:17|>": 17, + "<|audio:18|>": 18, + "<|audio:19|>": 19, + "<|audio:20|>": 20, + "<|audio:21|>": 21, + "<|audio:22|>": 22, + "<|audio:23|>": 23, + "<|audio:24|>": 24, + "<|audio:25|>": 25, + "<|audio:26|>": 26, + "<|audio:27|>": 27, + "<|audio:28|>": 28, + "<|audio:29|>": 29, + "<|audio:30|>": 30, + "<|audio:31|>": 31, + "<|audio:32|>": 32, + "<|audio:33|>": 33, + "<|audio:34|>": 34, + "<|audio:35|>": 35, + "<|audio:36|>": 36, + "<|audio:37|>": 37, + "<|audio:38|>": 38, + "<|audio:39|>": 39, + "<|audio:40|>": 40, + "<|audio:41|>": 41, + "<|audio:42|>": 42, + "<|audio:43|>": 43, + "<|audio:44|>": 44, + "<|audio:45|>": 45, + "<|audio:46|>": 46, + "<|audio:47|>": 47, + "<|audio:48|>": 48, + "<|audio:49|>": 49, + "<|audio:50|>": 50, + "<|audio:51|>": 51, + "<|audio:52|>": 52, + "<|audio:53|>": 53, + "<|audio:54|>": 54, + "<|audio:55|>": 55, + "<|audio:56|>": 56, + "<|audio:57|>": 57, + "<|audio:58|>": 58, + "<|audio:59|>": 59, + "<|audio:60|>": 60, + "<|audio:61|>": 61, + "<|audio:62|>": 62, + "<|audio:63|>": 63, + "<|audio:64|>": 64, + "<|audio:65|>": 65, + "<|audio:66|>": 66, + "<|audio:67|>": 67, + "<|audio:68|>": 68, + "<|audio:69|>": 69, + "<|audio:70|>": 70, + "<|audio:71|>": 71, + "<|audio:72|>": 72, + "<|audio:73|>": 73, + "<|audio:74|>": 74, + "<|audio:75|>": 75, + "<|audio:76|>": 76, + "<|audio:77|>": 77, + "<|audio:78|>": 78, + "<|audio:79|>": 79, + "<|audio:80|>": 80, + "<|audio:81|>": 81, + "<|audio:82|>": 82, + "<|audio:83|>": 83, + "<|audio:84|>": 84, + "<|audio:85|>": 85, + "<|audio:86|>": 86, + "<|audio:87|>": 87, + "<|audio:88|>": 88, + "<|audio:89|>": 89, + "<|audio:90|>": 90, + "<|audio:91|>": 91, + "<|audio:92|>": 92, + "<|audio:93|>": 93, + "<|audio:94|>": 94, + "<|audio:95|>": 95, + "<|audio:96|>": 96, + "<|audio:97|>": 97, + "<|audio:98|>": 98, + "<|audio:99|>": 99, + "<|audio:100|>": 100, + "<|audio:101|>": 101, + "<|audio:102|>": 102, + "<|audio:103|>": 103, + "<|audio:104|>": 104, + "<|audio:105|>": 105, + "<|audio:106|>": 106, + "<|audio:107|>": 107, + "<|audio:108|>": 108, + "<|audio:109|>": 109, + "<|audio:110|>": 110, + "<|audio:111|>": 111, + "<|audio:112|>": 112, + "<|audio:113|>": 113, + "<|audio:114|>": 114, + "<|audio:115|>": 115, + "<|audio:116|>": 116, + "<|audio:117|>": 117, + "<|audio:118|>": 118, + "<|audio:119|>": 119, + "<|audio:120|>": 120, + "<|audio:121|>": 121, + "<|audio:122|>": 122, + "<|audio:123|>": 123, + "<|audio:124|>": 124, + "<|audio:125|>": 125, + "<|audio:126|>": 126, + "<|audio:127|>": 127, + "<|audio:128|>": 128, + "<|audio:129|>": 129, + "<|audio:130|>": 130, + "<|audio:131|>": 131, + "<|audio:132|>": 132, + "<|audio:133|>": 133, + "<|audio:134|>": 134, + "<|audio:135|>": 135, + "<|audio:136|>": 136, + "<|audio:137|>": 137, + "<|audio:138|>": 138, + "<|audio:139|>": 139, + "<|audio:140|>": 140, + "<|audio:141|>": 141, + "<|audio:142|>": 142, + "<|audio:143|>": 143, + "<|audio:144|>": 144, + "<|audio:145|>": 145, + "<|audio:146|>": 146, + "<|audio:147|>": 147, + "<|audio:148|>": 148, + "<|audio:149|>": 149, + "<|audio:150|>": 150, + "<|audio:151|>": 151, + "<|audio:152|>": 152, + "<|audio:153|>": 153, + "<|audio:154|>": 154, + "<|audio:155|>": 155, + "<|audio:156|>": 156, + "<|audio:157|>": 157, + "<|audio:158|>": 158, + "<|audio:159|>": 159, + "<|audio:160|>": 160, + "<|audio:161|>": 161, + "<|audio:162|>": 162, + "<|audio:163|>": 163, + "<|audio:164|>": 164, + "<|audio:165|>": 165, + "<|audio:166|>": 166, + "<|audio:167|>": 167, + "<|audio:168|>": 168, + "<|audio:169|>": 169, + "<|audio:170|>": 170, + "<|audio:171|>": 171, + "<|audio:172|>": 172, + "<|audio:173|>": 173, + "<|audio:174|>": 174, + "<|audio:175|>": 175, + "<|audio:176|>": 176, + "<|audio:177|>": 177, + "<|audio:178|>": 178, + "<|audio:179|>": 179, + "<|audio:180|>": 180, + "<|audio:181|>": 181, + "<|audio:182|>": 182, + "<|audio:183|>": 183, + "<|audio:184|>": 184, + "<|audio:185|>": 185, + "<|audio:186|>": 186, + "<|audio:187|>": 187, + "<|audio:188|>": 188, + "<|audio:189|>": 189, + "<|audio:190|>": 190, + "<|audio:191|>": 191, + "<|audio:192|>": 192, + "<|audio:193|>": 193, + "<|audio:194|>": 194, + "<|audio:195|>": 195, + "<|audio:196|>": 196, + "<|audio:197|>": 197, + "<|audio:198|>": 198, + "<|audio:199|>": 199, + "<|audio:200|>": 200, + "<|audio:201|>": 201, + "<|audio:202|>": 202, + "<|audio:203|>": 203, + "<|audio:204|>": 204, + "<|audio:205|>": 205, + "<|audio:206|>": 206, + "<|audio:207|>": 207, + "<|audio:208|>": 208, + "<|audio:209|>": 209, + "<|audio:210|>": 210, + "<|audio:211|>": 211, + "<|audio:212|>": 212, + "<|audio:213|>": 213, + "<|audio:214|>": 214, + "<|audio:215|>": 215, + "<|audio:216|>": 216, + "<|audio:217|>": 217, + "<|audio:218|>": 218, + "<|audio:219|>": 219, + "<|audio:220|>": 220, + "<|audio:221|>": 221, + "<|audio:222|>": 222, + "<|audio:223|>": 223, + "<|audio:224|>": 224, + "<|audio:225|>": 225, + "<|audio:226|>": 226, + "<|audio:227|>": 227, + "<|audio:228|>": 228, + "<|audio:229|>": 229, + "<|audio:230|>": 230, + "<|audio:231|>": 231, + "<|audio:232|>": 232, + "<|audio:233|>": 233, + "<|audio:234|>": 234, + "<|audio:235|>": 235, + "<|audio:236|>": 236, + "<|audio:237|>": 237, + "<|audio:238|>": 238, + "<|audio:239|>": 239, + "<|audio:240|>": 240, + "<|audio:241|>": 241, + "<|audio:242|>": 242, + "<|audio:243|>": 243, + "<|audio:244|>": 244, + "<|audio:245|>": 245, + "<|audio:246|>": 246, + "<|audio:247|>": 247, + "<|audio:248|>": 248, + "<|audio:249|>": 249, + "<|audio:250|>": 250, + "<|audio:251|>": 251, + "<|audio:252|>": 252, + "<|audio:253|>": 253, + "<|audio:254|>": 254, + "<|audio:255|>": 255, + "<|audio:256|>": 256, + "<|audio:257|>": 257, + "<|audio:258|>": 258, + "<|audio:259|>": 259, + "<|audio:260|>": 260, + "<|audio:261|>": 261, + "<|audio:262|>": 262, + "<|audio:263|>": 263, + "<|audio:264|>": 264, + "<|audio:265|>": 265, + "<|audio:266|>": 266, + "<|audio:267|>": 267, + "<|audio:268|>": 268, + "<|audio:269|>": 269, + "<|audio:270|>": 270, + "<|audio:271|>": 271, + "<|audio:272|>": 272, + "<|audio:273|>": 273, + "<|audio:274|>": 274, + "<|audio:275|>": 275, + "<|audio:276|>": 276, + "<|audio:277|>": 277, + "<|audio:278|>": 278, + "<|audio:279|>": 279, + "<|audio:280|>": 280, + "<|audio:281|>": 281, + "<|audio:282|>": 282, + "<|audio:283|>": 283, + "<|audio:284|>": 284, + "<|audio:285|>": 285, + "<|audio:286|>": 286, + "<|audio:287|>": 287, + "<|audio:288|>": 288, + "<|audio:289|>": 289, + "<|audio:290|>": 290, + "<|audio:291|>": 291, + "<|audio:292|>": 292, + "<|audio:293|>": 293, + "<|audio:294|>": 294, + "<|audio:295|>": 295, + "<|audio:296|>": 296, + "<|audio:297|>": 297, + "<|audio:298|>": 298, + "<|audio:299|>": 299, + "<|audio:300|>": 300, + "<|audio:301|>": 301, + "<|audio:302|>": 302, + "<|audio:303|>": 303, + "<|audio:304|>": 304, + "<|audio:305|>": 305, + "<|audio:306|>": 306, + "<|audio:307|>": 307, + "<|audio:308|>": 308, + "<|audio:309|>": 309, + "<|audio:310|>": 310, + "<|audio:311|>": 311, + "<|audio:312|>": 312, + "<|audio:313|>": 313, + "<|audio:314|>": 314, + "<|audio:315|>": 315, + "<|audio:316|>": 316, + "<|audio:317|>": 317, + "<|audio:318|>": 318, + "<|audio:319|>": 319, + "<|audio:320|>": 320, + "<|audio:321|>": 321, + "<|audio:322|>": 322, + "<|audio:323|>": 323, + "<|audio:324|>": 324, + "<|audio:325|>": 325, + "<|audio:326|>": 326, + "<|audio:327|>": 327, + "<|audio:328|>": 328, + "<|audio:329|>": 329, + "<|audio:330|>": 330, + "<|audio:331|>": 331, + "<|audio:332|>": 332, + "<|audio:333|>": 333, + "<|audio:334|>": 334, + "<|audio:335|>": 335, + "<|audio:336|>": 336, + "<|audio:337|>": 337, + "<|audio:338|>": 338, + "<|audio:339|>": 339, + "<|audio:340|>": 340, + "<|audio:341|>": 341, + "<|audio:342|>": 342, + "<|audio:343|>": 343, + "<|audio:344|>": 344, + "<|audio:345|>": 345, + "<|audio:346|>": 346, + "<|audio:347|>": 347, + "<|audio:348|>": 348, + "<|audio:349|>": 349, + "<|audio:350|>": 350, + "<|audio:351|>": 351, + "<|audio:352|>": 352, + "<|audio:353|>": 353, + "<|audio:354|>": 354, + "<|audio:355|>": 355, + "<|audio:356|>": 356, + "<|audio:357|>": 357, + "<|audio:358|>": 358, + "<|audio:359|>": 359, + "<|audio:360|>": 360, + "<|audio:361|>": 361, + "<|audio:362|>": 362, + "<|audio:363|>": 363, + "<|audio:364|>": 364, + "<|audio:365|>": 365, + "<|audio:366|>": 366, + "<|audio:367|>": 367, + "<|audio:368|>": 368, + "<|audio:369|>": 369, + "<|audio:370|>": 370, + "<|audio:371|>": 371, + "<|audio:372|>": 372, + "<|audio:373|>": 373, + "<|audio:374|>": 374, + "<|audio:375|>": 375, + "<|audio:376|>": 376, + "<|audio:377|>": 377, + "<|audio:378|>": 378, + "<|audio:379|>": 379, + "<|audio:380|>": 380, + "<|audio:381|>": 381, + "<|audio:382|>": 382, + "<|audio:383|>": 383, + "<|audio:384|>": 384, + "<|audio:385|>": 385, + "<|audio:386|>": 386, + "<|audio:387|>": 387, + "<|audio:388|>": 388, + "<|audio:389|>": 389, + "<|audio:390|>": 390, + "<|audio:391|>": 391, + "<|audio:392|>": 392, + "<|audio:393|>": 393, + "<|audio:394|>": 394, + "<|audio:395|>": 395, + "<|audio:396|>": 396, + "<|audio:397|>": 397, + "<|audio:398|>": 398, + "<|audio:399|>": 399, + "<|audio:400|>": 400, + "<|audio:401|>": 401, + "<|audio:402|>": 402, + "<|audio:403|>": 403, + "<|audio:404|>": 404, + "<|audio:405|>": 405, + "<|audio:406|>": 406, + "<|audio:407|>": 407, + "<|audio:408|>": 408, + "<|audio:409|>": 409, + "<|audio:410|>": 410, + "<|audio:411|>": 411, + "<|audio:412|>": 412, + "<|audio:413|>": 413, + "<|audio:414|>": 414, + "<|audio:415|>": 415, + "<|audio:416|>": 416, + "<|audio:417|>": 417, + "<|audio:418|>": 418, + "<|audio:419|>": 419, + "<|audio:420|>": 420, + "<|audio:421|>": 421, + "<|audio:422|>": 422, + "<|audio:423|>": 423, + "<|audio:424|>": 424, + "<|audio:425|>": 425, + "<|audio:426|>": 426, + "<|audio:427|>": 427, + "<|audio:428|>": 428, + "<|audio:429|>": 429, + "<|audio:430|>": 430, + "<|audio:431|>": 431, + "<|audio:432|>": 432, + "<|audio:433|>": 433, + "<|audio:434|>": 434, + "<|audio:435|>": 435, + "<|audio:436|>": 436, + "<|audio:437|>": 437, + "<|audio:438|>": 438, + "<|audio:439|>": 439, + "<|audio:440|>": 440, + "<|audio:441|>": 441, + "<|audio:442|>": 442, + "<|audio:443|>": 443, + "<|audio:444|>": 444, + "<|audio:445|>": 445, + "<|audio:446|>": 446, + "<|audio:447|>": 447, + "<|audio:448|>": 448, + "<|audio:449|>": 449, + "<|audio:450|>": 450, + "<|audio:451|>": 451, + "<|audio:452|>": 452, + "<|audio:453|>": 453, + "<|audio:454|>": 454, + "<|audio:455|>": 455, + "<|audio:456|>": 456, + "<|audio:457|>": 457, + "<|audio:458|>": 458, + "<|audio:459|>": 459, + "<|audio:460|>": 460, + "<|audio:461|>": 461, + "<|audio:462|>": 462, + "<|audio:463|>": 463, + "<|audio:464|>": 464, + "<|audio:465|>": 465, + "<|audio:466|>": 466, + "<|audio:467|>": 467, + "<|audio:468|>": 468, + "<|audio:469|>": 469, + "<|audio:470|>": 470, + "<|audio:471|>": 471, + "<|audio:472|>": 472, + "<|audio:473|>": 473, + "<|audio:474|>": 474, + "<|audio:475|>": 475, + "<|audio:476|>": 476, + "<|audio:477|>": 477, + "<|audio:478|>": 478, + "<|audio:479|>": 479, + "<|audio:480|>": 480, + "<|audio:481|>": 481, + "<|audio:482|>": 482, + "<|audio:483|>": 483, + "<|audio:484|>": 484, + "<|audio:485|>": 485, + "<|audio:486|>": 486, + "<|audio:487|>": 487, + "<|audio:488|>": 488, + "<|audio:489|>": 489, + "<|audio:490|>": 490, + "<|audio:491|>": 491, + "<|audio:492|>": 492, + "<|audio:493|>": 493, + "<|audio:494|>": 494, + "<|audio:495|>": 495, + "<|audio:496|>": 496, + "<|audio:497|>": 497, + "<|audio:498|>": 498, + "<|audio:499|>": 499, + "<|audio:500|>": 500, + "<|audio:501|>": 501, + "<|audio:502|>": 502, + "<|audio:503|>": 503, + "<|audio:504|>": 504, + "<|audio:505|>": 505, + "<|audio:506|>": 506, + "<|audio:507|>": 507, + "<|audio:508|>": 508, + "<|audio:509|>": 509, + "<|audio:510|>": 510, + "<|audio:511|>": 511, + "<|audio:512|>": 512, + "<|audio:513|>": 513, + "<|audio:514|>": 514, + "<|audio:515|>": 515, + "<|audio:516|>": 516, + "<|audio:517|>": 517, + "<|audio:518|>": 518, + "<|audio:519|>": 519, + "<|audio:520|>": 520, + "<|audio:521|>": 521, + "<|audio:522|>": 522, + "<|audio:523|>": 523, + "<|audio:524|>": 524, + "<|audio:525|>": 525, + "<|audio:526|>": 526, + "<|audio:527|>": 527, + "<|audio:528|>": 528, + "<|audio:529|>": 529, + "<|audio:530|>": 530, + "<|audio:531|>": 531, + "<|audio:532|>": 532, + "<|audio:533|>": 533, + "<|audio:534|>": 534, + "<|audio:535|>": 535, + "<|audio:536|>": 536, + "<|audio:537|>": 537, + "<|audio:538|>": 538, + "<|audio:539|>": 539, + "<|audio:540|>": 540, + "<|audio:541|>": 541, + "<|audio:542|>": 542, + "<|audio:543|>": 543, + "<|audio:544|>": 544, + "<|audio:545|>": 545, + "<|audio:546|>": 546, + "<|audio:547|>": 547, + "<|audio:548|>": 548, + "<|audio:549|>": 549, + "<|audio:550|>": 550, + "<|audio:551|>": 551, + "<|audio:552|>": 552, + "<|audio:553|>": 553, + "<|audio:554|>": 554, + "<|audio:555|>": 555, + "<|audio:556|>": 556, + "<|audio:557|>": 557, + "<|audio:558|>": 558, + "<|audio:559|>": 559, + "<|audio:560|>": 560, + "<|audio:561|>": 561, + "<|audio:562|>": 562, + "<|audio:563|>": 563, + "<|audio:564|>": 564, + "<|audio:565|>": 565, + "<|audio:566|>": 566, + "<|audio:567|>": 567, + "<|audio:568|>": 568, + "<|audio:569|>": 569, + "<|audio:570|>": 570, + "<|audio:571|>": 571, + "<|audio:572|>": 572, + "<|audio:573|>": 573, + "<|audio:574|>": 574, + "<|audio:575|>": 575, + "<|audio:576|>": 576, + "<|audio:577|>": 577, + "<|audio:578|>": 578, + "<|audio:579|>": 579, + "<|audio:580|>": 580, + "<|audio:581|>": 581, + "<|audio:582|>": 582, + "<|audio:583|>": 583, + "<|audio:584|>": 584, + "<|audio:585|>": 585, + "<|audio:586|>": 586, + "<|audio:587|>": 587, + "<|audio:588|>": 588, + "<|audio:589|>": 589, + "<|audio:590|>": 590, + "<|audio:591|>": 591, + "<|audio:592|>": 592, + "<|audio:593|>": 593, + "<|audio:594|>": 594, + "<|audio:595|>": 595, + "<|audio:596|>": 596, + "<|audio:597|>": 597, + "<|audio:598|>": 598, + "<|audio:599|>": 599, + "<|audio:600|>": 600, + "<|audio:601|>": 601, + "<|audio:602|>": 602, + "<|audio:603|>": 603, + "<|audio:604|>": 604, + "<|audio:605|>": 605, + "<|audio:606|>": 606, + "<|audio:607|>": 607, + "<|audio:608|>": 608, + "<|audio:609|>": 609, + "<|audio:610|>": 610, + "<|audio:611|>": 611, + "<|audio:612|>": 612, + "<|audio:613|>": 613, + "<|audio:614|>": 614, + "<|audio:615|>": 615, + "<|audio:616|>": 616, + "<|audio:617|>": 617, + "<|audio:618|>": 618, + "<|audio:619|>": 619, + "<|audio:620|>": 620, + "<|audio:621|>": 621, + "<|audio:622|>": 622, + "<|audio:623|>": 623, + "<|audio:624|>": 624, + "<|audio:625|>": 625, + "<|audio:626|>": 626, + "<|audio:627|>": 627, + "<|audio:628|>": 628, + "<|audio:629|>": 629, + "<|audio:630|>": 630, + "<|audio:631|>": 631, + "<|audio:632|>": 632, + "<|audio:633|>": 633, + "<|audio:634|>": 634, + "<|audio:635|>": 635, + "<|audio:636|>": 636, + "<|audio:637|>": 637, + "<|audio:638|>": 638, + "<|audio:639|>": 639, + "<|audio:640|>": 640, + "<|audio:641|>": 641, + "<|audio:642|>": 642, + "<|audio:643|>": 643, + "<|audio:644|>": 644, + "<|audio:645|>": 645, + "<|audio:646|>": 646, + "<|audio:647|>": 647, + "<|audio:648|>": 648, + "<|audio:649|>": 649, + "<|audio:650|>": 650, + "<|audio:651|>": 651, + "<|audio:652|>": 652, + "<|audio:653|>": 653, + "<|audio:654|>": 654, + "<|audio:655|>": 655, + "<|audio:656|>": 656, + "<|audio:657|>": 657, + "<|audio:658|>": 658, + "<|audio:659|>": 659, + "<|audio:660|>": 660, + "<|audio:661|>": 661, + "<|audio:662|>": 662, + "<|audio:663|>": 663, + "<|audio:664|>": 664, + "<|audio:665|>": 665, + "<|audio:666|>": 666, + "<|audio:667|>": 667, + "<|audio:668|>": 668, + "<|audio:669|>": 669, + "<|audio:670|>": 670, + "<|audio:671|>": 671, + "<|audio:672|>": 672, + "<|audio:673|>": 673, + "<|audio:674|>": 674, + "<|audio:675|>": 675, + "<|audio:676|>": 676, + "<|audio:677|>": 677, + "<|audio:678|>": 678, + "<|audio:679|>": 679, + "<|audio:680|>": 680, + "<|audio:681|>": 681, + "<|audio:682|>": 682, + "<|audio:683|>": 683, + "<|audio:684|>": 684, + "<|audio:685|>": 685, + "<|audio:686|>": 686, + "<|audio:687|>": 687, + "<|audio:688|>": 688, + "<|audio:689|>": 689, + "<|audio:690|>": 690, + "<|audio:691|>": 691, + "<|audio:692|>": 692, + "<|audio:693|>": 693, + "<|audio:694|>": 694, + "<|audio:695|>": 695, + "<|audio:696|>": 696, + "<|audio:697|>": 697, + "<|audio:698|>": 698, + "<|audio:699|>": 699, + "<|audio:700|>": 700, + "<|audio:701|>": 701, + "<|audio:702|>": 702, + "<|audio:703|>": 703, + "<|audio:704|>": 704, + "<|audio:705|>": 705, + "<|audio:706|>": 706, + "<|audio:707|>": 707, + "<|audio:708|>": 708, + "<|audio:709|>": 709, + "<|audio:710|>": 710, + "<|audio:711|>": 711, + "<|audio:712|>": 712, + "<|audio:713|>": 713, + "<|audio:714|>": 714, + "<|audio:715|>": 715, + "<|audio:716|>": 716, + "<|audio:717|>": 717, + "<|audio:718|>": 718, + "<|audio:719|>": 719, + "<|audio:720|>": 720, + "<|audio:721|>": 721, + "<|audio:722|>": 722, + "<|audio:723|>": 723, + "<|audio:724|>": 724, + "<|audio:725|>": 725, + "<|audio:726|>": 726, + "<|audio:727|>": 727, + "<|audio:728|>": 728, + "<|audio:729|>": 729, + "<|audio:730|>": 730, + "<|audio:731|>": 731, + "<|audio:732|>": 732, + "<|audio:733|>": 733, + "<|audio:734|>": 734, + "<|audio:735|>": 735, + "<|audio:736|>": 736, + "<|audio:737|>": 737, + "<|audio:738|>": 738, + "<|audio:739|>": 739, + "<|audio:740|>": 740, + "<|audio:741|>": 741, + "<|audio:742|>": 742, + "<|audio:743|>": 743, + "<|audio:744|>": 744, + "<|audio:745|>": 745, + "<|audio:746|>": 746, + "<|audio:747|>": 747, + "<|audio:748|>": 748, + "<|audio:749|>": 749, + "<|audio:750|>": 750, + "<|audio:751|>": 751, + "<|audio:752|>": 752, + "<|audio:753|>": 753, + "<|audio:754|>": 754, + "<|audio:755|>": 755, + "<|audio:756|>": 756, + "<|audio:757|>": 757, + "<|audio:758|>": 758, + "<|audio:759|>": 759, + "<|audio:760|>": 760, + "<|audio:761|>": 761, + "<|audio:762|>": 762, + "<|audio:763|>": 763, + "<|audio:764|>": 764, + "<|audio:765|>": 765, + "<|audio:766|>": 766, + "<|audio:767|>": 767, + "<|audio:768|>": 768, + "<|audio:769|>": 769, + "<|audio:770|>": 770, + "<|audio:771|>": 771, + "<|audio:772|>": 772, + "<|audio:773|>": 773, + "<|audio:774|>": 774, + "<|audio:775|>": 775, + "<|audio:776|>": 776, + "<|audio:777|>": 777, + "<|audio:778|>": 778, + "<|audio:779|>": 779, + "<|audio:780|>": 780, + "<|audio:781|>": 781, + "<|audio:782|>": 782, + "<|audio:783|>": 783, + "<|audio:784|>": 784, + "<|audio:785|>": 785, + "<|audio:786|>": 786, + "<|audio:787|>": 787, + "<|audio:788|>": 788, + "<|audio:789|>": 789, + "<|audio:790|>": 790, + "<|audio:791|>": 791, + "<|audio:792|>": 792, + "<|audio:793|>": 793, + "<|audio:794|>": 794, + "<|audio:795|>": 795, + "<|audio:796|>": 796, + "<|audio:797|>": 797, + "<|audio:798|>": 798, + "<|audio:799|>": 799, + "<|audio:800|>": 800, + "<|audio:801|>": 801, + "<|audio:802|>": 802, + "<|audio:803|>": 803, + "<|audio:804|>": 804, + "<|audio:805|>": 805, + "<|audio:806|>": 806, + "<|audio:807|>": 807, + "<|audio:808|>": 808, + "<|audio:809|>": 809, + "<|audio:810|>": 810, + "<|audio:811|>": 811, + "<|audio:812|>": 812, + "<|audio:813|>": 813, + "<|audio:814|>": 814, + "<|audio:815|>": 815, + "<|audio:816|>": 816, + "<|audio:817|>": 817, + "<|audio:818|>": 818, + "<|audio:819|>": 819, + "<|audio:820|>": 820, + "<|audio:821|>": 821, + "<|audio:822|>": 822, + "<|audio:823|>": 823, + "<|audio:824|>": 824, + "<|audio:825|>": 825, + "<|audio:826|>": 826, + "<|audio:827|>": 827, + "<|audio:828|>": 828, + "<|audio:829|>": 829, + "<|audio:830|>": 830, + "<|audio:831|>": 831, + "<|audio:832|>": 832, + "<|audio:833|>": 833, + "<|audio:834|>": 834, + "<|audio:835|>": 835, + "<|audio:836|>": 836, + "<|audio:837|>": 837, + "<|audio:838|>": 838, + "<|audio:839|>": 839, + "<|audio:840|>": 840, + "<|audio:841|>": 841, + "<|audio:842|>": 842, + "<|audio:843|>": 843, + "<|audio:844|>": 844, + "<|audio:845|>": 845, + "<|audio:846|>": 846, + "<|audio:847|>": 847, + "<|audio:848|>": 848, + "<|audio:849|>": 849, + "<|audio:850|>": 850, + "<|audio:851|>": 851, + "<|audio:852|>": 852, + "<|audio:853|>": 853, + "<|audio:854|>": 854, + "<|audio:855|>": 855, + "<|audio:856|>": 856, + "<|audio:857|>": 857, + "<|audio:858|>": 858, + "<|audio:859|>": 859, + "<|audio:860|>": 860, + "<|audio:861|>": 861, + "<|audio:862|>": 862, + "<|audio:863|>": 863, + "<|audio:864|>": 864, + "<|audio:865|>": 865, + "<|audio:866|>": 866, + "<|audio:867|>": 867, + "<|audio:868|>": 868, + "<|audio:869|>": 869, + "<|audio:870|>": 870, + "<|audio:871|>": 871, + "<|audio:872|>": 872, + "<|audio:873|>": 873, + "<|audio:874|>": 874, + "<|audio:875|>": 875, + "<|audio:876|>": 876, + "<|audio:877|>": 877, + "<|audio:878|>": 878, + "<|audio:879|>": 879, + "<|audio:880|>": 880, + "<|audio:881|>": 881, + "<|audio:882|>": 882, + "<|audio:883|>": 883, + "<|audio:884|>": 884, + "<|audio:885|>": 885, + "<|audio:886|>": 886, + "<|audio:887|>": 887, + "<|audio:888|>": 888, + "<|audio:889|>": 889, + "<|audio:890|>": 890, + "<|audio:891|>": 891, + "<|audio:892|>": 892, + "<|audio:893|>": 893, + "<|audio:894|>": 894, + "<|audio:895|>": 895, + "<|audio:896|>": 896, + "<|audio:897|>": 897, + "<|audio:898|>": 898, + "<|audio:899|>": 899, + "<|audio:900|>": 900, + "<|audio:901|>": 901, + "<|audio:902|>": 902, + "<|audio:903|>": 903, + "<|audio:904|>": 904, + "<|audio:905|>": 905, + "<|audio:906|>": 906, + "<|audio:907|>": 907, + "<|audio:908|>": 908, + "<|audio:909|>": 909, + "<|audio:910|>": 910, + "<|audio:911|>": 911, + "<|audio:912|>": 912, + "<|audio:913|>": 913, + "<|audio:914|>": 914, + "<|audio:915|>": 915, + "<|audio:916|>": 916, + "<|audio:917|>": 917, + "<|audio:918|>": 918, + "<|audio:919|>": 919, + "<|audio:920|>": 920, + "<|audio:921|>": 921, + "<|audio:922|>": 922, + "<|audio:923|>": 923, + "<|audio:924|>": 924, + "<|audio:925|>": 925, + "<|audio:926|>": 926, + "<|audio:927|>": 927, + "<|audio:928|>": 928, + "<|audio:929|>": 929, + "<|audio:930|>": 930, + "<|audio:931|>": 931, + "<|audio:932|>": 932, + "<|audio:933|>": 933, + "<|audio:934|>": 934, + "<|audio:935|>": 935, + "<|audio:936|>": 936, + "<|audio:937|>": 937, + "<|audio:938|>": 938, + "<|audio:939|>": 939, + "<|audio:940|>": 940, + "<|audio:941|>": 941, + "<|audio:942|>": 942, + "<|audio:943|>": 943, + "<|audio:944|>": 944, + "<|audio:945|>": 945, + "<|audio:946|>": 946, + "<|audio:947|>": 947, + "<|audio:948|>": 948, + "<|audio:949|>": 949, + "<|audio:950|>": 950, + "<|audio:951|>": 951, + "<|audio:952|>": 952, + "<|audio:953|>": 953, + "<|audio:954|>": 954, + "<|audio:955|>": 955, + "<|audio:956|>": 956, + "<|audio:957|>": 957, + "<|audio:958|>": 958, + "<|audio:959|>": 959, + "<|audio:960|>": 960, + "<|audio:961|>": 961, + "<|audio:962|>": 962, + "<|audio:963|>": 963, + "<|audio:964|>": 964, + "<|audio:965|>": 965, + "<|audio:966|>": 966, + "<|audio:967|>": 967, + "<|audio:968|>": 968, + "<|audio:969|>": 969, + "<|audio:970|>": 970, + "<|audio:971|>": 971, + "<|audio:972|>": 972, + "<|audio:973|>": 973, + "<|audio:974|>": 974, + "<|audio:975|>": 975, + "<|audio:976|>": 976, + "<|audio:977|>": 977, + "<|audio:978|>": 978, + "<|audio:979|>": 979, + "<|audio:980|>": 980, + "<|audio:981|>": 981, + "<|audio:982|>": 982, + "<|audio:983|>": 983, + "<|audio:984|>": 984, + "<|audio:985|>": 985, + "<|audio:986|>": 986, + "<|audio:987|>": 987, + "<|audio:988|>": 988, + "<|audio:989|>": 989, + "<|audio:990|>": 990, + "<|audio:991|>": 991, + "<|audio:992|>": 992, + "<|audio:993|>": 993, + "<|audio:994|>": 994, + "<|audio:995|>": 995, + "<|audio:996|>": 996, + "<|audio:997|>": 997, + "<|audio:998|>": 998, + "<|audio:999|>": 999, + "<|audio:1000|>": 1000, + "<|audio:1001|>": 1001, + "<|audio:1002|>": 1002, + "<|audio:1003|>": 1003, + "<|audio:1004|>": 1004, + "<|audio:1005|>": 1005, + "<|audio:1006|>": 1006, + "<|audio:1007|>": 1007, + "<|audio:1008|>": 1008, + "<|audio:1009|>": 1009, + "<|audio:1010|>": 1010, + "<|audio:1011|>": 1011, + "<|audio:1012|>": 1012, + "<|audio:1013|>": 1013, + "<|audio:1014|>": 1014, + "<|audio:1015|>": 1015, + "<|audio:1016|>": 1016, + "<|audio:1017|>": 1017, + "<|audio:1018|>": 1018, + "<|audio:1019|>": 1019, + "<|audio:1020|>": 1020, + "<|audio:1021|>": 1021, + "<|audio:1022|>": 1022, + "<|audio:1023|>": 1023, + "<|startoftranscript|>": 1024, + "<|endoftranscript|>": 1025, + "<|padding|>": 1026, + "'": 1027, + "a": 1028, + "b": 1029, + "c": 1030, + "d": 1031, + "e": 1032, + "f": 1033, + "g": 1034, + "h": 1035, + "i": 1036, + "j": 1037, + "k": 1038, + "l": 1039, + "m": 1040, + "n": 1041, + "o": 1042, + "p": 1043, + "q": 1044, + "r": 1045, + "s": 1046, + "t": 1047, + "u": 1048, + "v": 1049, + "w": 1050, + "x": 1051, + "y": 1052, + "z": 1053, + "▁": 1054, + "▁t": 1055, + "he": 1056, + "▁a": 1057, + "▁the": 1058, + "in": 1059, + "▁s": 1060, + "▁w": 1061, + "▁o": 1062, + "re": 1063, + "nd": 1064, + "▁b": 1065, + "▁h": 1066, + "er": 1067, + "▁m": 1068, + "▁i": 1069, + "ou": 1070, + "▁c": 1071, + "▁f": 1072, + "at": 1073, + "ed": 1074, + "▁and": 1075, + "en": 1076, + "▁to": 1077, + "▁of": 1078, + "on": 1079, + "is": 1080, + "▁d": 1081, + "ing": 1082, + "▁th": 1083, + "▁p": 1084, + "▁he": 1085, + "or": 1086, + "▁l": 1087, + "es": 1088, + "▁in": 1089, + "ll": 1090, + "it": 1091, + "ar": 1092, + "as": 1093, + "an": 1094, + "▁n": 1095, + "▁g": 1096, + "om": 1097, + "▁be": 1098, + "▁ha": 1099, + "▁e": 1100, + "le": 1101, + "ot": 1102, + "▁y": 1103, + "ut": 1104, + "ow": 1105, + "ic": 1106, + "▁wh": 1107, + "▁it": 1108, + "ld": 1109, + "ve": 1110, + "▁that": 1111, + "ly": 1112, + "▁was": 1113, + "id": 1114, + "se": 1115, + "st": 1116, + "▁on": 1117, + "gh": 1118, + "ent": 1119, + "▁re": 1120, + "▁you": 1121, + "im": 1122, + "ce": 1123, + "▁u": 1124, + "ver": 1125, + "ion": 1126, + "▁as": 1127, + "et": 1128, + "▁for": 1129, + "ay": 1130, + "▁his": 1131, + "▁we": 1132, + "ith": 1133, + "al": 1134, + "ir": 1135, + "▁r": 1136, + "▁with": 1137, + "▁st": 1138, + "ad": 1139, + "ur": 1140, + "ght": 1141, + "▁an": 1142, + "▁her": 1143, + "▁not": 1144, + "▁is": 1145, + "▁had": 1146, + "ter": 1147, + "her": 1148, + "ac": 1149, + "am": 1150, + "▁at": 1151, + "oo": 1152, + "▁but": 1153, + "ould": 1154, + "▁she": 1155, + "▁k": 1156, + "▁se": 1157, + "▁sa": 1158, + "▁sh": 1159, + "▁fr": 1160, + "▁him": 1161, + "▁so": 1162, + "▁me": 1163, + "ill": 1164, + "ain": 1165, + "▁su": 1166, + "ight": 1167, + "ch": 1168, + "red": 1169, + "ct": 1170, + "all": 1171, + "ro": 1172, + "ke": 1173, + "ess": 1174, + "il": 1175, + "'s": 1176, + "ore": 1177, + "▁de": 1178, + "▁my": 1179, + "▁they": 1180, + "▁whe": 1181, + "▁all": 1182, + "ich": 1183, + "▁ne": 1184, + "ri": 1185, + "▁by": 1186, + "▁have": 1187, + "ome": 1188, + "pp": 1189, + "▁this": 1190, + "▁li": 1191, + "▁do": 1192, + "▁con": 1193, + "us": 1194, + "▁which": 1195, + "▁ch": 1196, + "ul": 1197, + "qu": 1198, + "▁j": 1199, + "▁up": 1200, + "▁said": 1201, + "▁from": 1202, + "ard": 1203, + "ge": 1204, + "▁or": 1205, + "▁v": 1206, + "▁one": 1207, + "▁no": 1208, + "th": 1209, + "▁ex": 1210, + "▁were": 1211, + "▁there": 1212, + "pe": 1213, + "and": 1214, + "est": 1215, + "▁man": 1216, + "▁who": 1217, + "ble": 1218, + "ie": 1219, + "▁al": 1220, + "ant": 1221, + "res": 1222, + "ous": 1223, + "ust": 1224, + "very": 1225, + "ation": 1226, + "▁fe": 1227, + "▁them": 1228, + "lf": 1229, + "▁when": 1230, + "nt": 1231, + "ame": 1232, + "ind": 1233, + "ra": 1234, + "▁go": 1235, + "ers": 1236, + "ast": 1237, + "fe": 1238, + "ood": 1239, + "▁kn": 1240, + "▁int": 1241, + "ist": 1242, + "▁are": 1243, + "art": 1244, + "out": 1245, + "▁would": 1246, + "▁le": 1247, + "▁what": 1248, + "os": 1249, + "▁their": 1250, + "ong": 1251, + "our": 1252, + "▁if": 1253, + "▁com": 1254, + "ound": 1255, + "▁ab": 1256, + "▁out": 1257, + "▁wor": 1258, + "em": 1259, + "▁will": 1260, + "ak": 1261, + "▁mis": 1262, + "ate": 1263, + "ol": 1264, + "um": 1265, + "un": 1266, + "itt": 1267, + "ough": 1268, + "ked": 1269, + "ig": 1270, + "ap": 1271, + "one": 1272, + "▁been": 1273, + "own": 1274, + "ive": 1275, + "▁then": 1276, + "▁br": 1277, + "ven": 1278, + "if": 1279, + "▁ar": 1280, + "'t": 1281, + "self": 1282, + "▁tr": 1283, + "▁pl": 1284, + "▁ro": 1285, + "▁pr": 1286, + "ther": 1287, + "reat": 1288, + "▁un": 1289, + "▁af": 1290, + "▁sp": 1291, + "▁qu": 1292, + "▁pro": 1293, + "ity": 1294, + "hed": 1295, + "▁tw": 1296, + "▁ag": 1297, + "▁could": 1298, + "ost": 1299, + "ace": 1300, + "ort": 1301, + "ure": 1302, + "ake": 1303, + "▁am": 1304, + "ack": 1305, + "▁any": 1306, + "▁some": 1307, + "▁your": 1308, + "▁more": 1309, + "▁can": 1310, + "au": 1311, + "▁tim": 1312, + "ep": 1313, + "ag": 1314, + "▁en": 1315, + "ck": 1316, + "▁into": 1317, + "▁cl": 1318, + "ry": 1319, + "▁now": 1320, + "hing": 1321, + "nder": 1322, + "are": 1323, + "▁very": 1324, + "▁gr": 1325, + "el": 1326, + "ose": 1327, + "▁loo": 1328, + "▁bo": 1329, + "ved": 1330, + "op": 1331, + "▁other": 1332, + "▁did": 1333, + "ance": 1334, + "▁than": 1335, + "ittle": 1336, + "▁little": 1337, + "ine": 1338, + "ies": 1339, + "way": 1340, + "ite": 1341, + "▁like": 1342, + "ide": 1343, + "▁lo": 1344, + "ass": 1345, + "▁bl": 1346, + "able": 1347, + "urn": 1348, + "ought": 1349, + "▁know": 1350, + "other": 1351, + "▁time": 1352, + "▁im": 1353, + "▁dis": 1354, + "▁us": 1355, + "▁co": 1356, + "fore": 1357, + "▁how": 1358, + "▁te": 1359, + "ence": 1360, + "▁day": 1361, + "▁ad": 1362, + "ade": 1363, + "ice": 1364, + "▁about": 1365, + "▁see": 1366, + "▁over": 1367, + "pt": 1368, + "cc": 1369, + "▁too": 1370, + "ink": 1371, + "▁fl": 1372, + "wn": 1373, + "▁great": 1374, + "▁after": 1375, + "pl": 1376, + "de": 1377, + "▁per": 1378, + "ment": 1379, + "▁again": 1380, + "▁upon": 1381, + "▁hand": 1382, + "ab": 1383, + "▁has": 1384, + "ree": 1385, + "ish": 1386, + "ci": 1387, + "▁only": 1388, + "ally": 1389, + "▁well": 1390, + "▁should": 1391, + "▁po": 1392, + "▁mar": 1393, + "ress": 1394, + "▁say": 1395, + "▁good": 1396, + "ather": 1397, + "▁two": 1398, + "ings": 1399, + "▁pe": 1400, + "ount": 1401, + "▁our": 1402, + "ire": 1403, + "ving": 1404, + "▁down": 1405, + "ars": 1406, + "ert": 1407, + "we": 1408, + "▁before": 1409, + "ile": 1410, + "ves": 1411, + "▁app": 1412, + "▁every": 1413, + "▁its": 1414, + "▁old": 1415, + "▁thr": 1416, + "▁mu": 1417, + "▁made": 1418, + "ied": 1419, + "ick": 1420, + "▁long": 1421, + "age": 1422, + "te": 1423, + "ft": 1424, + "▁where": 1425, + "ang": 1426, + "▁never": 1427, + "▁must": 1428, + "▁pre": 1429, + "▁sm": 1430, + "ful": 1431, + "▁such": 1432, + "ull": 1433, + "▁str": 1434, + "ions": 1435, + "▁off": 1436, + "▁sc": 1437, + "▁came": 1438, + "ious": 1439, + "ue": 1440, + "▁miss": 1441, + "ward": 1442, + "ild": 1443, + "▁fir": 1444, + "▁even": 1445, + "▁under": 1446, + "act": 1447, + "▁these": 1448, + "▁come": 1449, + "▁part": 1450, + "▁fo": 1451, + "ated": 1452, + "ness": 1453, + "▁rem": 1454, + "ord": 1455, + "▁bec": 1456, + "ty": 1457, + "▁may": 1458, + "▁much": 1459, + "▁think": 1460, + "per": 1461, + "▁way": 1462, + "▁mister": 1463, + "led": 1464, + "▁let": 1465, + "orn": 1466, + "▁ey": 1467, + "▁gl": 1468, + "▁cont": 1469, + "▁thought": 1470, + "▁look": 1471, + "ect": 1472, + "▁spe": 1473, + "ise": 1474, + "▁back": 1475, + "▁bet": 1476, + "ady": 1477, + "▁ye": 1478, + "ans": 1479, + "ach": 1480, + "▁here": 1481, + "▁just": 1482, + "ren": 1483, + "▁first": 1484, + "▁ho": 1485, + "▁own": 1486, + "▁des": 1487, + "▁ob": 1488, + "ried": 1489, + "ud": 1490, + "ary": 1491, + "▁went": 1492, + "▁mo": 1493, + "▁himself": 1494, + "▁men": 1495, + "air": 1496, + "cl": 1497, + "ave": 1498, + "ath": 1499, + "ff": 1500, + "▁sl": 1501, + "co": 1502, + "on't": 1503, + "llow": 1504, + "▁cr": 1505, + "▁res": 1506, + "▁i'": 1507, + "▁might": 1508, + "ily": 1509, + "▁seem": 1510, + "int": 1511, + "ip": 1512, + "▁beg": 1513, + "ouse": 1514, + "anc": 1515, + "n't": 1516, + "▁wat": 1517, + "▁through": 1518, + "▁comp": 1519, + "ber": 1520, + "▁away": 1521, + "▁car": 1522, + "▁em": 1523, + "▁get": 1524, + "▁imp": 1525, + "▁head": 1526, + "oss": 1527, + "▁life": 1528, + "▁bel": 1529, + "▁without": 1530, + "▁most": 1531, + "▁pass": 1532, + "▁make": 1533, + "▁cons": 1534, + "ened": 1535, + "▁som": 1536, + "▁turn": 1537, + "av": 1538, + "ng": 1539, + "▁shall": 1540, + "▁acc": 1541, + "▁those": 1542, + "▁pres": 1543, + "▁eyes": 1544, + "▁house": 1545, + "iz": 1546, + "▁somet": 1547, + "▁jo": 1548, + "▁still": 1549, + "▁call": 1550, + "▁night": 1551, + "hes": 1552, + "▁op": 1553, + "ause": 1554, + "▁wom": 1555, + "▁last": 1556, + "ks": 1557, + "less": 1558, + "ared": 1559, + "▁comm": 1560, + "▁don't": 1561, + "▁tell": 1562, + "▁ent": 1563, + "▁nothing": 1564, + "▁new": 1565, + "ign": 1566, + "▁take": 1567, + "▁being": 1568, + "▁many": 1569, + "▁word": 1570, + "ons": 1571, + "▁found": 1572, + "▁ret": 1573, + "ase": 1574, + "▁ear": 1575, + "▁while": 1576, + "▁att": 1577, + "ory": 1578, + "ix": 1579, + "▁ser": 1580, + "▁saw": 1581, + "▁put": 1582, + "ne": 1583, + "oth": 1584, + "iend": 1585, + "▁peop": 1586, + "▁wr": 1587, + "▁young": 1588, + "ark": 1589, + "dy": 1590, + "aking": 1591, + "les": 1592, + "▁count": 1593, + "▁once": 1594, + "▁friend": 1595, + "▁la": 1596, + "ens": 1597, + "▁people": 1598, + "pect": 1599, + "ors": 1600, + "fect": 1601, + "▁mat": 1602, + "ince": 1603, + "ible": 1604, + "ered": 1605, + "▁room": 1606, + "▁three": 1607, + "▁yet": 1608, + "ail": 1609, + "▁same": 1610, + "▁father": 1611, + "▁right": 1612, + "▁child": 1613, + "▁cour": 1614, + "igh": 1615, + "▁place": 1616, + "▁another": 1617, + "ult": 1618, + "iv": 1619, + "ition": 1620, + "▁ind": 1621, + "▁want": 1622, + "▁though": 1623, + "▁nor": 1624, + "▁far": 1625, + "▁king": 1626, + "▁happ": 1627, + "▁heart": 1628, + "▁face": 1629, + "▁end": 1630, + "▁ever": 1631, + "▁nat": 1632, + "thing": 1633, + "▁love": 1634, + "get": 1635, + "▁took": 1636, + "▁dist": 1637, + "ever": 1638, + "ian": 1639, + "▁hu": 1640, + "ew": 1641, + "▁arm": 1642, + "▁inst": 1643, + "man": 1644, + "▁work": 1645, + "▁light": 1646, + "▁char": 1647, + "▁ple": 1648, + "ict": 1649, + "▁set": 1650, + "▁ac": 1651, + "▁looked": 1652, + "▁missus": 1653, + "▁asked": 1654, + "▁mind": 1655, + "▁yes": 1656, + "▁supp": 1657, + "▁inte": 1658, + "▁rep": 1659, + "cess": 1660, + "ently": 1661, + "▁left": 1662, + "gg": 1663, + "ertain": 1664, + "▁ke": 1665, + "ished": 1666, + "ub": 1667, + "▁pers": 1668, + "ways": 1669, + "▁things": 1670, + "alk": 1671, + "irl": 1672, + "▁mom": 1673, + "▁sir": 1674, + "▁wa": 1675, + "▁moment": 1676, + "ations": 1677, + "▁sat": 1678, + "sel": 1679, + "▁find": 1680, + "ower": 1681, + "ia": 1682, + "vent": 1683, + "rew": 1684, + "▁world": 1685, + "ject": 1686, + "▁give": 1687, + "▁cap": 1688, + "▁why": 1689, + "so": 1690, + "▁gu": 1691, + "▁mother": 1692, + "▁gen": 1693, + "▁sw": 1694, + "▁always": 1695, + "der": 1696, + "lt": 1697, + "ling": 1698, + "▁ans": 1699, + "pped": 1700, + "▁soon": 1701, + "▁act": 1702, + "▁form": 1703, + "▁el": 1704, + "dd": 1705, + "▁heard": 1706, + "ret": 1707, + "▁thing": 1708, + "▁something": 1709, + "▁seemed": 1710, + "▁sub": 1711, + "▁door": 1712, + "ange": 1713, + "▁girl": 1714, + "ced": 1715, + "▁appe": 1716, + "ither": 1717, + "▁wind": 1718, + "▁because": 1719, + "▁dif": 1720, + "▁mon": 1721, + "ss": 1722, + "▁going": 1723, + "▁told": 1724, + "orm": 1725, + "▁home": 1726, + "ained": 1727, + "▁got": 1728, + "▁war": 1729, + "▁god": 1730, + "aught": 1731, + "▁gi": 1732, + "▁eng": 1733, + "▁sur": 1734, + "ning": 1735, + "▁hands": 1736, + "▁woman": 1737, + "▁follow": 1738, + "land": 1739, + "aut": 1740, + "▁vo": 1741, + "▁feel": 1742, + "▁rel": 1743, + "▁poss": 1744, + "ched": 1745, + "ical": 1746, + "ple": 1747, + "ph": 1748, + "▁boy": 1749, + "▁return": 1750, + "▁reg": 1751, + "▁rest": 1752, + "ook": 1753, + "▁knew": 1754, + "ner": 1755, + "▁each": 1756, + "▁oh": 1757, + "▁sil": 1758, + "▁kind": 1759, + "▁exp": 1760, + "▁ma": 1761, + "▁cle": 1762, + "▁hel": 1763, + "iver": 1764, + "ting": 1765, + "▁del": 1766, + "ual": 1767, + "▁inf": 1768, + "▁ass": 1769, + "▁water": 1770, + "▁conf": 1771, + "▁bre": 1772, + "▁wo": 1773, + "cept": 1774, + "▁belie": 1775, + "▁certain": 1776, + "▁against": 1777, + "▁hard": 1778, + "▁ph": 1779, + "row": 1780, + "▁unt": 1781, + "▁years": 1782, + "▁quite": 1783, + "▁side": 1784, + "iness": 1785, + "ined": 1786, + "▁near": 1787, + "▁hor": 1788, + "ters": 1789, + "ired": 1790, + "ool": 1791, + "▁four": 1792, + "▁few": 1793, + "▁done": 1794, + "ier": 1795, + "▁che": 1796, + "rest": 1797, + "ited": 1798, + "most": 1799, + "▁better": 1800, + "▁half": 1801, + "▁min": 1802, + "▁tre": 1803, + "ps": 1804, + "▁also": 1805, + "▁care": 1806, + "ock": 1807, + "uck": 1808, + "oub": 1809, + "▁began": 1810, + "ully": 1811, + "▁enough": 1812, + "ised": 1813, + "ru": 1814, + "▁having": 1815, + "▁seen": 1816, + "▁gener": 1817, + "▁lady": 1818, + "▁dra": 1819, + "▁hum": 1820, + "aps": 1821, + "ott": 1822, + "▁pur": 1823, + "aken": 1824, + "ross": 1825, + "ying": 1826, + "▁ter": 1827, + "▁hour": 1828, + "▁inde": 1829, + "ank": 1830, + "▁called": 1831, + "ial": 1832, + "ason": 1833, + "▁beh": 1834, + "▁does": 1835, + "▁whole": 1836, + "▁morn": 1837, + "▁turned": 1838, + "▁pleas": 1839, + "▁ste": 1840, + "▁ref": 1841, + "▁gave": 1842, + "ense": 1843, + "▁occ": 1844, + "ib": 1845, + "▁course": 1846, + "▁ins": 1847, + "ream": 1848, + "gether": 1849, + "uth": 1850, + "▁both": 1851, + "▁sou": 1852, + "▁cur": 1853, + "▁add": 1854, + "een": 1855, + "▁col": 1856, + "▁read": 1857, + "ween": 1858, + "selves": 1859, + "▁among": 1860, + "▁between": 1861, + "▁inc": 1862, + "▁keep": 1863, + "▁beaut": 1864, + "ular": 1865, + "▁poor": 1866, + "▁it's": 1867, + "▁sure": 1868, + "▁morning": 1869, + "▁white": 1870, + "ged": 1871, + "▁name": 1872, + "▁dear": 1873, + "▁toward": 1874, + "ute": 1875, + "▁small": 1876, + "▁whom": 1877, + "▁repl": 1878, + "▁sk": 1879, + "▁lar": 1880, + "▁felt": 1881, + "bo": 1882, + "osed": 1883, + "ating": 1884, + "▁myself": 1885, + "▁open": 1886, + "▁six": 1887, + "▁herself": 1888, + "▁however": 1889, + "▁bu": 1890, + "ond": 1891, + "aint": 1892, + "xt": 1893, + "▁fore": 1894, + "▁inter": 1895, + "▁ev": 1896, + "▁high": 1897, + "ction": 1898, + "▁hund": 1899, + "▁stood": 1900, + "▁hundred": 1901, + "aster": 1902, + "▁tra": 1903, + "▁show": 1904, + "▁sent": 1905, + "ife": 1906, + "▁round": 1907, + "▁sim": 1908, + "▁dr": 1909, + "▁gra": 1910, + "▁words": 1911, + "▁days": 1912, + "▁almost": 1913, + "ale": 1914, + "vel": 1915, + "▁point": 1916, + "ents": 1917, + "▁gre": 1918, + "▁eight": 1919, + "ces": 1920, + "ates": 1921, + "dden": 1922, + "▁fam": 1923, + "▁stand": 1924, + "▁bus": 1925, + "▁land": 1926, + "▁ed": 1927, + "▁mean": 1928, + "ung": 1929, + "haps": 1930, + "▁sun": 1931, + "ures": 1932, + "▁since": 1933, + "iet": 1934, + "ird": 1935, + "▁perhaps": 1936, + "ned": 1937, + "▁sle": 1938, + "iss": 1939, + "▁best": 1940, + "▁sudden": 1941, + "▁dark": 1942, + "▁replied": 1943, + "▁voice": 1944, + "▁met": 1945, + "▁anything": 1946, + "▁till": 1947, + "▁underst": 1948, + "▁bar": 1949, + "its": 1950, + "▁until": 1951, + "ins": 1952, + "oud": 1953, + "▁black": 1954, + "▁bro": 1955, + "▁hear": 1956, + "▁looking": 1957, + "▁cried": 1958, + "▁you'": 1959, + "▁fact": 1960, + "amp": 1961, + "▁prin": 1962, + "▁less": 1963, + "▁lay": 1964, + "▁next": 1965, + "▁law": 1966, + "up": 1967, + "▁power": 1968, + "▁prop": 1969, + "not": 1970, + "rent": 1971, + "▁brought": 1972, + "ately": 1973, + "enty": 1974, + "▁country": 1975, + "▁help": 1976, + "als": 1977, + "▁quest": 1978, + "med": 1979, + "▁use": 1980, + "▁vis": 1981, + "▁sn": 1982, + "▁i'm": 1983, + "fully": 1984, + "▁spo": 1985, + "▁together": 1986, + "▁need": 1987, + "▁air": 1988, + "▁adv": 1989, + "▁person": 1990, + "▁indeed": 1991, + "▁contin": 1992, + "▁unc": 1993, + "oney": 1994, + "▁gent": 1995, + "▁present": 1996, + "▁aw": 1997, + "▁par": 1998, + "ows": 1999, + "ured": 2000, + "▁full": 2001, + "tain": 2002, + "▁run": 2003, + "▁rather": 2004, + "▁ide": 2005, + "▁cond": 2006, + "nded": 2007, + "▁lat": 2008, + "▁sy": 2009, + "be": 2010, + "du": 2011, + "▁har": 2012, + "▁feet": 2013, + "▁fin": 2014, + "eter": 2015, + "▁fall": 2016, + "cei": 2017, + "▁five": 2018, + "▁mil": 2019, + "▁bed": 2020, + "oc": 2021, + "▁doct": 2022, + "▁interest": 2023, + "ressed": 2024, + "▁matter": 2025, + "▁lord": 2026, + "▁gone": 2027, + "▁es": 2028, + "fort": 2029, + "▁death": 2030, + "▁wife": 2031, + "▁serv": 2032, + "▁pat": 2033, + "ering": 2034, + "oubt": 2035, + "▁adm": 2036, + "▁talk": 2037, + "▁taken": 2038, + "▁art": 2039, + "▁tri": 2040, + "▁others": 2041, + "▁hope": 2042, + "ash": 2043, + "az": 2044, + "▁ext": 2045, + "▁cannot": 2046, + "ief": 2047, + "▁speak": 2048, + "▁lau": 2049, + "▁themselves": 2050, + "▁along": 2051, + "▁dire": 2052, + "ove": 2053, + "mb": 2054, + "pr": 2055, + "▁bes": 2056, + "▁cou": 2057, + "▁mor": 2058, + "ten": 2059, + "▁gentle": 2060, + "uring": 2061, + "▁fire": 2062, + "▁large": 2063, + "▁pol": 2064, + "▁cat": 2065, + "▁swe": 2066, + "ention": 2067, + "vers": 2068, + "▁thus": 2069, + "app": 2070, + "▁sec": 2071, + "▁play": 2072, + "▁real": 2073, + "▁prom": 2074, + "ments": 2075, + "wered": 2076, + "ield": 2077, + "ains": 2078, + "ison": 2079, + "ached": 2080, + "▁thou": 2081, + "▁reason": 2082, + "▁thous": 2083, + "iting": 2084, + "▁brother": 2085, + "akes": 2086, + "▁thousand": 2087, + "ont": 2088, + "▁money": 2089, + "▁remem": 2090, + "▁dep": 2091, + "▁answered": 2092, + "▁true": 2093, + "▁children": 2094, + "▁behind": 2095, + "oy": 2096, + "▁sound": 2097, + "ants": 2098, + "ably": 2099, + "▁wood": 2100, + "used": 2101, + "▁dec": 2102, + "▁whose": 2103, + "od": 2104, + "▁ele": 2105, + "▁twenty": 2106, + "▁ra": 2107, + "itu": 2108, + "▁believe": 2109, + "▁wonder": 2110, + "ene": 2111, + "▁inv": 2112, + "▁hon": 2113, + "aring": 2114, + "sh": 2115, + "ued": 2116, + "▁suff": 2117, + "▁opp": 2118, + "▁doubt": 2119, + "▁rec": 2120, + "ton": 2121, + "▁hold": 2122, + "▁diffe": 2123, + "▁passed": 2124, + "▁cor": 2125, + "me": 2126, + "ided": 2127, + "ities": 2128, + "▁mer": 2129, + "▁sing": 2130, + "▁nature": 2131, + "▁alone": 2132, + "▁dead": 2133, + "▁pri": 2134, + "ken": 2135, + "lic": 2136, + "▁red": 2137, + "▁bur": 2138, + "aces": 2139, + "▁close": 2140, + "▁gold": 2141, + "▁start": 2142, + "▁hur": 2143, + "▁fur": 2144, + "og": 2145, + "ances": 2146, + "▁ask": 2147, + "▁doctor": 2148, + "▁son": 2149, + "▁ground": 2150, + "wer": 2151, + "ets": 2152, + "▁sea": 2153, + "▁strong": 2154, + "▁leave": 2155, + "▁compan": 2156, + "▁i'll": 2157, + "ery": 2158, + "cy": 2159, + "illed": 2160, + "ept": 2161, + "ides": 2162, + "tle": 2163, + "▁ce": 2164, + "▁obs": 2165, + "body": 2166, + "▁fell": 2167, + "▁sign": 2168, + "cond": 2169, + "▁mount": 2170, + "▁fair": 2171, + "▁given": 2172, + "▁therefore": 2173, + "ane": 2174, + "▁ir": 2175, + "▁deep": 2176, + "iful": 2177, + "fic": 2178, + "ys": 2179, + "▁often": 2180, + "▁body": 2181, + "unt": 2182, + "▁short": 2183, + "▁tem": 2184, + "▁fa": 2185, + "▁master": 2186, + "▁earth": 2187, + "▁pap": 2188, + "ceed": 2189, + "▁stre": 2190, + "▁second": 2191, + "▁fort": 2192, + "bed": 2193, + "gth": 2194, + "owed": 2195, + "▁horse": 2196, + "idd": 2197, + "▁mad": 2198, + "ually": 2199, + "▁pa": 2200, + "▁chr": 2201, + "▁order": 2202, + "▁ten": 2203, + "vered": 2204, + "▁const": 2205, + "▁wish": 2206, + "▁fif": 2207, + "▁eas": 2208, + "▁cir": 2209, + "▁dro": 2210, + "aim": 2211, + "hen": 2212, + "▁ca": 2213, + "▁really": 2214, + "read": 2215, + "ceived": 2216, + "▁ill": 2217, + "▁fear": 2218, + "osition": 2219, + "▁understand": 2220, + "▁spir": 2221, + "▁list": 2222, + "▁abs": 2223, + "▁spr": 2224, + "aced": 2225, + "▁question": 2226, + "anger": 2227, + "▁everything": 2228, + "aughter": 2229, + "▁aff": 2230, + "▁wall": 2231, + "▁coming": 2232, + "ching": 2233, + "ready": 2234, + "ider": 2235, + "▁above": 2236, + "▁prince": 2237, + "▁already": 2238, + "▁least": 2239, + "▁reco": 2240, + "▁expl": 2241, + "▁step": 2242, + "▁used": 2243, + "▁ru": 2244, + "▁itself": 2245, + "ister": 2246, + "▁necess": 2247, + "▁case": 2248, + "▁around": 2249, + "hn": 2250, + "▁soul": 2251, + "▁suddenly": 2252, + "ger": 2253, + "▁lad": 2254, + "▁evening": 2255, + "▁mag": 2256, + "▁general": 2257, + "▁num": 2258, + "imes": 2259, + "▁known": 2260, + "▁wal": 2261, + "▁quick": 2262, + "ized": 2263, + "▁mus": 2264, + "▁sch": 2265, + "▁captain": 2266, + "▁that's": 2267, + "ific": 2268, + "▁whether": 2269, + "▁lear": 2270, + "gn": 2271, + "▁within": 2272, + "men": 2273, + "▁live": 2274, + "vern": 2275, + "▁times": 2276, + "▁expect": 2277, + "▁state": 2278, + "▁friends": 2279, + "▁bring": 2280, + "▁sort": 2281, + "▁women": 2282, + "▁table": 2283, + "▁meet": 2284, + "▁john": 2285, + "▁circ": 2286, + "▁sum": 2287, + "▁returned": 2288, + "iled": 2289, + "▁dri": 2290, + "▁held": 2291, + "▁exc": 2292, + "▁big": 2293, + "▁says": 2294, + "▁perfect": 2295, + "▁lea": 2296, + "▁obser": 2297, + "▁else": 2298, + "▁during": 2299, + "ident": 2300, + "▁hus": 2301, + "ted": 2302, + "▁beautiful": 2303, + "▁clear": 2304, + "▁either": 2305, + "▁town": 2306, + "▁sight": 2307, + "▁lost": 2308, + "▁sleep": 2309, + "▁means": 2310, + "▁foot": 2311, + "▁cut": 2312, + "▁cal": 2313, + "▁kept": 2314, + "▁ran": 2315, + "ience": 2316, + "▁prof": 2317, + "tered": 2318, + "here": 2319, + "ety": 2320, + "▁fellow": 2321, + "▁can't": 2322, + "▁mist": 2323, + "▁past": 2324, + "▁dream": 2325, + "ages": 2326, + "▁became": 2327, + "▁pret": 2328, + "▁disc": 2329, + "▁bad": 2330, + "▁making": 2331, + "ution": 2332, + "▁object": 2333, + "▁towards": 2334, + "▁low": 2335, + "ught": 2336, + "▁dev": 2337, + "▁human": 2338, + "▁manner": 2339, + "▁strange": 2340, + "▁year": 2341, + "old": 2342, + "ient": 2343, + "ines": 2344, + "▁sever": 2345, + "mon": 2346, + "▁ann": 2347, + "airs": 2348, + "ches": 2349, + "▁city": 2350, + "▁sometimes": 2351, + "'d": 2352, + "▁rose": 2353, + "▁est": 2354, + "ility": 2355, + "▁walk": 2356, + "▁ready": 2357, + "▁pal": 2358, + "▁leg": 2359, + "▁road": 2360, + "ians": 2361, + "cious": 2362, + "▁corn": 2363, + "▁thy": 2364, + "▁cold": 2365, + "lly": 2366, + "iously": 2367, + "lish": 2368, + "▁stra": 2369, + "mer": 2370, + "▁bat": 2371, + "owing": 2372, + "iew": 2373, + "▁christ": 2374, + "▁squ": 2375, + "▁truth": 2376, + "cri": 2377, + "lled": 2378, + "▁thir": 2379, + "▁didn't": 2380, + "bert": 2381, + "▁soci": 2382, + "br": 2383, + "▁bit": 2384, + "▁subject": 2385, + "▁ship": 2386, + "▁mur": 2387, + "▁appro": 2388, + "▁pie": 2389, + "▁answer": 2390, + "▁free": 2391, + "▁business": 2392, + "▁ut": 2393, + "ape": 2394, + "▁appear": 2395, + "▁river": 2396, + "▁sto": 2397, + "▁cast": 2398, + "▁family": 2399, + "▁jud": 2400, + "▁excl": 2401, + "▁letter": 2402, + "ingly": 2403, + "rie": 2404, + "▁hair": 2405, + "ote": 2406, + "▁arms": 2407, + "▁become": 2408, + "ern": 2409, + "ouble": 2410, + "▁different": 2411, + "▁val": 2412, + "ffect": 2413, + "▁natur": 2414, + "▁possible": 2415, + "▁several": 2416, + "▁fine": 2417, + "ah": 2418, + "▁lead": 2419, + "▁forg": 2420, + "▁express": 2421, + "li": 2422, + "▁sus": 2423, + "▁glad": 2424, + "oon": 2425, + "▁arri": 2426, + "▁blood": 2427, + "itting": 2428, + "▁quiet": 2429, + "rence": 2430, + "▁idea": 2431, + "▁able": 2432, + "itted": 2433, + "ster": 2434, + "▁charac": 2435, + "▁begin": 2436, + "▁chur": 2437, + "▁tou": 2438, + "▁story": 2439, + "▁eye": 2440, + "band": 2441, + "ative": 2442, + "▁grand": 2443, + "▁consider": 2444, + "▁across": 2445, + "▁pen": 2446, + "▁except": 2447, + "▁fre": 2448, + "▁win": 2449, + "▁equ": 2450, + "eth": 2451, + "▁cent": 2452, + "isf": 2453, + "▁partic": 2454, + "▁diffic": 2455, + "▁window": 2456, + "▁surpr": 2457, + "llect": 2458, + "▁prov": 2459, + "▁direct": 2460, + "▁conc": 2461, + "ey": 2462, + "aw": 2463, + "▁govern": 2464, + "▁disco": 2465, + "▁wild": 2466, + "▁dog": 2467, + "▁flo": 2468, + "▁soft": 2469, + "teen": 2470, + "▁cross": 2471, + "ased": 2472, + "▁effect": 2473, + "▁sor": 2474, + "▁longer": 2475, + "▁hen": 2476, + "▁followed": 2477, + "▁sold": 2478, + "▁thee": 2479, + "▁pub": 2480, + "▁husband": 2481, + "ards": 2482, + "antly": 2483, + "by": 2484, + "▁ap": 2485, + "▁suppose": 2486, + "▁respect": 2487, + "ts": 2488, + "▁hast": 2489, + "▁sal": 2490, + "▁comple": 2491, + "▁heav": 2492, + "▁happy": 2493, + "▁rich": 2494, + "▁creat": 2495, + "une": 2496, + "▁taking": 2497, + "▁requ": 2498, + "▁stay": 2499, + "▁spoke": 2500, + "▁daughter": 2501, + "▁wee": 2502, + "▁ve": 2503, + "▁du": 2504, + "▁green": 2505, + "▁anim": 2506, + "▁din": 2507, + "'ll": 2508, + "▁bird": 2509, + "alth": 2510, + "▁mere": 2511, + "▁gard": 2512, + "ny": 2513, + "ley": 2514, + "▁possess": 2515, + "empt": 2516, + "▁reached": 2517, + "▁appeared": 2518, + "ov": 2519, + "▁exist": 2520, + "ination": 2521, + "▁pretty": 2522, + "▁remember": 2523, + "▁hea": 2524, + "▁opened": 2525, + "▁tom": 2526, + "anged": 2527, + "▁slow": 2528, + "▁imag": 2529, + "▁i've": 2530, + "ract": 2531, + "▁saying": 2532, + "king": 2533, + "utes": 2534, + "▁common": 2535, + "▁occas": 2536, + "▁book": 2537, + "▁rus": 2538, + "ames": 2539, + "ices": 2540, + "▁bright": 2541, + "ms": 2542, + "▁satisf": 2543, + "▁sense": 2544, + "▁fav": 2545, + "▁succ": 2546, + "ump": 2547, + "ising": 2548, + "▁lu": 2549, + "▁accord": 2550, + "tern": 2551, + "▁break": 2552, + "▁exper": 2553, + "▁month": 2554, + "use": 2555, + "▁dem": 2556, + "▁scar": 2557, + "▁continued": 2558, + "▁secret": 2559, + "▁church": 2560, + "▁tree": 2561, + "▁stri": 2562, + "▁carried": 2563, + "▁cry": 2564, + "nding": 2565, + "▁spirit": 2566, + "▁wanted": 2567, + "eric": 2568, + "▁certainly": 2569, + "▁command": 2570, + "▁dest": 2571, + "▁move": 2572, + "oun": 2573, + "▁sweet": 2574, + "▁street": 2575, + "▁ought": 2576, + "▁account": 2577, + "▁def": 2578, + "ham": 2579, + "▁prep": 2580, + "▁sens": 2581, + "▁esc": 2582, + "▁rock": 2583, + "ots": 2584, + "▁decl": 2585, + "▁purp": 2586, + "riage": 2587, + "outh": 2588, + "owers": 2589, + "▁draw": 2590, + "▁eat": 2591, + "▁breat": 2592, + "▁character": 2593, + "ime": 2594, + "cul": 2595, + "medi": 2596, + "▁stud": 2597, + "▁school": 2598, + "itude": 2599, + "▁heaven": 2600, + "▁feeling": 2601, + "▁sad": 2602, + "▁regard": 2603, + "ement": 2604, + "▁pain": 2605, + "▁worth": 2606, + "▁bra": 2607, + "ney": 2608, + "▁dut": 2609, + "▁smo": 2610, + "aimed": 2611, + "▁trans": 2612, + "▁delight": 2613, + "▁quar": 2614, + "▁hung": 2615, + "▁mot": 2616, + "▁blue": 2617, + "▁hot": 2618, + "▁hill": 2619, + "▁div": 2620, + "umb": 2621, + "▁disapp": 2622, + "▁marg": 2623, + "▁laugh": 2624, + "idence": 2625, + "▁produ": 2626, + "▁success": 2627, + "ury": 2628, + "son": 2629, + "▁fast": 2630, + "▁english": 2631, + "▁dress": 2632, + "▁hat": 2633, + "▁terri": 2634, + "▁port": 2635, + "▁neither": 2636, + "▁court": 2637, + "▁seven": 2638, + "▁fight": 2639, + "▁princess": 2640, + "▁lived": 2641, + "▁view": 2642, + "▁immedi": 2643, + "▁self": 2644, + "▁var": 2645, + "▁hours": 2646, + "▁mill": 2647, + "▁sol": 2648, + "▁exam": 2649, + "▁tried": 2650, + "▁won't": 2651, + "▁entered": 2652, + "▁disp": 2653, + "to": 2654, + "ric": 2655, + "▁carry": 2656, + "▁import": 2657, + "▁ang": 2658, + "ze": 2659, + "ony": 2660, + "▁danger": 2661, + "ledge": 2662, + "▁offic": 2663, + "▁cause": 2664, + "▁none": 2665, + "▁forward": 2666, + "▁uncle": 2667, + "▁tor": 2668, + "▁det": 2669, + "ask": 2670, + "▁len": 2671, + "▁further": 2672, + "▁pay": 2673, + "▁added": 2674, + "▁front": 2675, + "ror": 2676, + "▁ge": 2677, + "▁particular": 2678, + "▁deal": 2679, + "▁prot": 2680, + "▁led": 2681, + "▁acqu": 2682, + "▁pray": 2683, + "▁eff": 2684, + "▁happened": 2685, + "▁chief": 2686, + "lect": 2687, + "▁walked": 2688, + "▁later": 2689, + "▁joy": 2690, + "iar": 2691, + "day": 2692, + "▁ord": 2693, + "▁alth": 2694, + "▁comfort": 2695, + "▁prob": 2696, + "▁maj": 2697, + "▁affect": 2698, + "▁public": 2699, + "▁bene": 2700, + "ening": 2701, + "▁although": 2702, + "gr": 2703, + "▁sho": 2704, + "▁fig": 2705, + "resh": 2706, + "▁fail": 2707, + "uct": 2708, + "ug": 2709, + "ality": 2710, + "▁mem": 2711, + "▁seems": 2712, + "▁yourself": 2713, + "ship": 2714, + "ead": 2715, + "iam": 2716, + "▁number": 2717, + "side": 2718, + "▁ah": 2719, + "▁doing": 2720, + "▁living": 2721, + "arent": 2722, + "▁desp": 2723, + "ize": 2724, + "oof": 2725, + "▁field": 2726, + "▁received": 2727, + "▁shad": 2728, + "▁bey": 2729, + "▁beyond": 2730, + "▁phil": 2731, + "▁line": 2732, + "▁visit": 2733, + "inct": 2734, + "rig": 2735, + "▁party": 2736, + "▁garden": 2737, + "▁je": 2738, + "▁mouth": 2739, + "▁hall": 2740, + "▁queen": 2741, + "▁boat": 2742, + "▁bear": 2743, + "▁americ": 2744, + "ism": 2745, + "▁gentleman": 2746, + "▁vi": 2747, + "irt": 2748, + "uff": 2749, + "▁laid": 2750, + "raid": 2751, + "▁occasion": 2752, + "▁entire": 2753, + "▁age": 2754, + "▁sister": 2755, + "▁clot": 2756, + "▁repe": 2757, + "ously": 2758, + "▁prison": 2759, + "▁accom": 2760, + "▁whis": 2761, + "▁nearly": 2762, + "▁trees": 2763, + "iling": 2764, + "iff": 2765, + "▁eighteen": 2766, + "bit": 2767, + "wards": 2768, + "▁early": 2769, + "▁tal": 2770, + "▁lab": 2771, + "▁forth": 2772, + "ming": 2773, + "ones": 2774, + "▁med": 2775, + "▁try": 2776, + "▁da": 2777, + "ilt": 2778, + "anced": 2779, + "▁princi": 2780, + "▁enem": 2781, + "▁thinking": 2782, + "▁chance": 2783, + "where": 2784, + "▁cre": 2785, + "▁minutes": 2786, + "▁anx": 2787, + "▁mary": 2788, + "▁pict": 2789, + "▁wait": 2790, + "▁vill": 2791, + "▁stren": 2792, + "▁afraid": 2793, + "▁crow": 2794, + "▁smile": 2795, + "▁late": 2796, + "▁england": 2797, + "▁pleasure": 2798, + "▁aunt": 2799, + "▁news": 2800, + "▁wis": 2801, + "▁fle": 2802, + "▁seeing": 2803, + "▁super": 2804, + "▁faith": 2805, + "▁rob": 2806, + "iment": 2807, + "oint": 2808, + "▁bill": 2809, + "lling": 2810, + "▁neigh": 2811, + "▁trouble": 2812, + "▁silence": 2813, + "▁plain": 2814, + "▁there's": 2815, + "aret": 2816, + "pend": 2817, + "▁exclaimed": 2818, + "rench": 2819, + "gy": 2820, + "▁miles": 2821, + "ply": 2822, + "▁glass": 2823, + "▁drew": 2824, + "▁neighb": 2825, + "els": 2826, + "▁mine": 2827, + "▁pract": 2828, + "▁heavy": 2829, + "▁standing": 2830, + "▁sevent": 2831, + "▁shar": 2832, + "▁change": 2833, + "▁necessary": 2834, + "▁chap": 2835, + "▁purpose": 2836, + "▁inqu": 2837, + "▁natural": 2838, + "▁deter": 2839, + "icked": 2840, + "▁bott": 2841, + "▁hardly": 2842, + "▁bell": 2843, + "▁top": 2844, + "▁caught": 2845, + "fered": 2846, + "wh": 2847, + "ives": 2848, + "ounded": 2849, + "▁auth": 2850, + "▁circum": 2851, + "▁fing": 2852, + "▁stopped": 2853, + "uc": 2854, + "▁wit": 2855, + "ament": 2856, + "▁opin": 2857, + "▁av": 2858, + "▁priv": 2859, + "aining": 2860, + "▁instead": 2861, + "rupt": 2862, + "▁grew": 2863, + "▁loved": 2864, + "▁island": 2865, + "▁knight": 2866, + "▁ago": 2867, + "▁length": 2868, + "▁inn": 2869, + "▁peace": 2870, + "ls": 2871, + "inary": 2872, + "ior": 2873, + "ues": 2874, + "▁third": 2875, + "ush": 2876, + "▁beauty": 2877, + "▁hig": 2878, + "▁he's": 2879, + "the": 2880, + "form": 2881, + "head": 2882, + "ically": 2883, + "asp": 2884, + "ancy": 2885, + "▁determ": 2886, + "▁straight": 2887, + "▁cra": 2888, + "ining": 2889, + "pper": 2890, + "ler": 2891, + "▁infl": 2892, + "▁thor": 2893, + "▁convers": 2894, + "▁besides": 2895, + "▁position": 2896, + "▁thirty": 2897, + "▁den": 2898, + "rage": 2899, + "▁attention": 2900, + "ma": 2901, + "▁conv": 2902, + "ager": 2903, + "▁hist": 2904, + "ored": 2905, + "▁comes": 2906, + "aged": 2907, + "▁force": 2908, + "▁sitting": 2909, + "▁please": 2910, + "tend": 2911, + "iter": 2912, + "▁whatever": 2913, + "▁inform": 2914, + "▁hop": 2915, + "▁chair": 2916, + "▁build": 2917, + "▁bab": 2918, + "ustom": 2919, + "▁girls": 2920, + "▁rom": 2921, + "▁french": 2922, + "▁struck": 2923, + "▁pull": 2924, + "▁ast": 2925, + "▁lie": 2926, + "▁wrong": 2927, + "▁knowledge": 2928, + "▁grace": 2929, + "▁scarce": 2930, + "ghed": 2931, + "▁resol": 2932, + "▁watch": 2933, + "▁thoughts": 2934, + "▁rid": 2935, + "▁attempt": 2936, + "▁fifty": 2937, + "▁rap": 2938, + "▁box": 2939, + "hood": 2940, + "▁getting": 2941, + "▁ver": 2942, + "▁fat": 2943, + "▁company": 2944, + "▁arr": 2945, + "▁crowd": 2946, + "▁burn": 2947, + "▁slight": 2948, + "▁class": 2949, + "▁south": 2950, + "▁die": 2951, + "▁exact": 2952, + "▁drink": 2953, + "▁enj": 2954, + "▁thick": 2955, + "▁dinner": 2956, + "▁save": 2957, + "▁maid": 2958, + "▁plan": 2959, + "▁saint": 2960, + "▁immediately": 2961, + "iers": 2962, + "▁born": 2963, + "ius": 2964, + "▁rev": 2965, + "▁tears": 2966, + "ists": 2967, + "▁treat": 2968, + "usion": 2969, + "▁meant": 2970, + "▁boys": 2971, + "pping": 2972, + "▁slowly": 2973, + "▁incl": 2974, + "▁lim": 2975, + "▁died": 2976, + "iced": 2977, + "▁compl": 2978, + "▁fool": 2979, + "▁forest": 2980, + "▁sugg": 2981, + "▁post": 2982, + "▁accept": 2983, + "▁result": 2984, + "▁author": 2985, + "ndon": 2986, + "ceive": 2987, + "▁suggest": 2988, + "cient": 2989, + "▁stone": 2990, + "▁fright": 2991, + "▁paper": 2992, + "▁conse": 2993, + "▁jour": 2994, + "▁ty": 2995, + "▁enc": 2996, + "▁quickly": 2997, + "▁contr": 2998, + "▁youth": 2999, + "▁send": 3000, + "▁vict": 3001, + "ified": 3002, + "▁belong": 3003, + "▁warm": 3004, + "▁fix": 3005, + "▁imposs": 3006, + "▁beside": 3007, + "▁er": 3008, + "▁tone": 3009, + "▁camp": 3010, + "▁desire": 3011, + "▁bound": 3012, + "▁makes": 3013, + "▁margaret": 3014, + "▁north": 3015, + "▁brown": 3016, + "▁moon": 3017, + "▁lips": 3018, + "▁placed": 3019, + "val": 3020, + "▁circumst": 3021, + "▁food": 3022, + "▁filled": 3023, + "ics": 3024, + "ift": 3025, + "ann": 3026, + "▁london": 3027, + "▁distance": 3028, + "ging": 3029, + "▁strength": 3030, + "▁id": 3031, + "▁floor": 3032, + "▁forget": 3033, + "▁obl": 3034, + "▁mid": 3035, + "ries": 3036, + "itions": 3037, + "bs": 3038, + "▁spring": 3039, + "▁you're": 3040, + "▁viol": 3041, + "▁jack": 3042, + "▁pock": 3043, + "ooks": 3044, + "▁following": 3045, + "▁sac": 3046, + "▁remained": 3047, + "arch": 3048, + "▁grow": 3049, + "▁snow": 3050, + "▁government": 3051, + "▁ball": 3052, + "▁hors": 3053, + "▁nar": 3054, + "aded": 3055, + "▁broken": 3056, + "▁laughed": 3057, + "▁descri": 3058, + "▁safe": 3059, + "itten": 3060, + "ively": 3061, + "▁profess": 3062, + "▁o'": 3063, + "amed": 3064, + "▁depart": 3065, + "▁easy": 3066, + "oured": 3067, + "▁und": 3068, + "▁coun": 3069, + "▁thank": 3070, + "▁knows": 3071, + "▁waiting": 3072, + "dom": 3073, + "ats": 3074, + "▁ger": 3075, + "▁van": 3076, + "▁anne": 3077, + "▁horses": 3078, + "ugg": 3079, + "▁dread": 3080, + "▁une": 3081, + "ges": 3082, + "acy": 3083, + "▁proceed": 3084, + "▁gaz": 3085, + "▁shout": 3086, + "▁started": 3087, + "ented": 3088, + "▁complete": 3089, + "ope": 3090, + "▁gall": 3091, + "dered": 3092, + "▁wide": 3093, + "ires": 3094, + "▁neck": 3095, + "asure": 3096, + "isted": 3097, + "▁service": 3098, + "▁piece": 3099, + "cially": 3100, + "ences": 3101, + "▁sail": 3102, + "▁palace": 3103, + "erv": 3104, + "▁guard": 3105, + "▁doll": 3106, + "▁talking": 3107, + "▁man's": 3108, + "▁lift": 3109, + "▁grave": 3110, + "▁week": 3111, + "let": 3112, + "▁impossible": 3113, + "▁effort": 3114, + "▁imm": 3115, + "▁army": 3116, + "well": 3117, + "▁difficult": 3118, + "und": 3119, + "▁fresh": 3120, + "▁fun": 3121, + "reme": 3122, + "▁stop": 3123, + "▁mess": 3124, + "▁gar": 3125, + "▁deg": 3126, + "▁incre": 3127, + "▁corner": 3128, + "▁society": 3129, + "▁weak": 3130, + "▁shut": 3131, + "▁hy": 3132, + "▁proper": 3133, + "aching": 3134, + "▁cloud": 3135, + "iddle": 3136, + "ivid": 3137, + "▁demand": 3138, + "▁nine": 3139, + "▁sit": 3140, + "▁recogn": 3141, + "▁beat": 3142, + "uss": 3143, + "▁turning": 3144, + "▁sky": 3145, + "▁opinion": 3146, + "▁single": 3147, + "pic": 3148, + "▁fly": 3149, + "▁lang": 3150, + "▁mass": 3151, + "cell": 3152, + "▁outside": 3153, + "▁kiss": 3154, + "▁trust": 3155, + "▁occup": 3156, + "▁evil": 3157, + "▁below": 3158, + "▁appearance": 3159, + "uit": 3160, + "▁aftern": 3161, + "▁glo": 3162, + "▁gun": 3163, + "▁west": 3164, + "ency": 3165, + "par": 3166, + "▁showed": 3167, + "▁conversation": 3168, + "ises": 3169, + "▁conn": 3170, + "▁couldn't": 3171, + "▁running": 3172, + "▁mention": 3173, + "▁greater": 3174, + "▁music": 3175, + "▁breath": 3176, + "ases": 3177, + "▁nin": 3178, + "▁ant": 3179, + "arer": 3180, + "▁morrow": 3181, + "▁bank": 3182, + "▁espe": 3183, + "▁peter": 3184, + "ork": 3185, + "cial": 3186, + "▁presence": 3187, + "▁battle": 3188, + "▁winter": 3189, + "hered": 3190, + "▁probably": 3191, + "▁clothes": 3192, + "▁fash": 3193, + "▁mark": 3194, + "▁wished": 3195, + "vere": 3196, + "▁coll": 3197, + "▁emb": 3198, + "▁kne": 3199, + "▁married": 3200, + "▁arrived": 3201, + "▁pun": 3202, + "▁event": 3203, + "ushed": 3204, + "▁suffic": 3205, + "▁eager": 3206, + "▁former": 3207, + "▁giving": 3208, + "▁pop": 3209, + "▁sand": 3210, + "▁neg": 3211, + "▁usual": 3212, + "▁relig": 3213, + "▁simple": 3214, + "▁sym": 3215, + "itation": 3216, + "▁gro": 3217, + "ories": 3218, + "▁moved": 3219, + "▁months": 3220, + "▁speaking": 3221, + "▁pet": 3222, + "▁silent": 3223, + "▁cab": 3224, + "▁mountain": 3225, + "▁expression": 3226, + "gar": 3227, + "▁covered": 3228, + "▁hunt": 3229, + "▁afternoon": 3230, + "aped": 3231, + "▁occur": 3232, + "rief": 3233, + "▁states": 3234, + "▁z": 3235, + "str": 3236, + "▁loc": 3237, + "light": 3238, + "▁shore": 3239, + "che": 3240, + "▁easily": 3241, + "▁pale": 3242, + "unity": 3243, + "▁remark": 3244, + "▁phys": 3245, + "▁beginning": 3246, + "▁duty": 3247, + "▁chapter": 3248, + "▁influ": 3249, + "cho": 3250, + "▁concl": 3251, + "amb": 3252, + "▁instant": 3253, + "▁polit": 3254, + "zz": 3255, + "▁enjoy": 3256, + "▁sick": 3257, + "▁remain": 3258, + "uel": 3259, + "▁stream": 3260, + "▁figure": 3261, + "ald": 3262, + "▁tur": 3263, + "▁path": 3264, + "▁vol": 3265, + "▁minute": 3266, + "▁pleasant": 3267, + "▁scarcely": 3268, + "▁conscious": 3269, + "▁terrible": 3270, + "▁kill": 3271, + "▁raised": 3272, + "▁fashion": 3273, + "▁twel": 3274, + "yal": 3275, + "▁leaving": 3276, + "▁twelve": 3277, + "ature": 3278, + "▁fut": 3279, + "▁threw": 3280, + "▁star": 3281, + "▁flowers": 3282, + "olog": 3283, + "▁trying": 3284, + "rib": 3285, + "▁sword": 3286, + "▁tall": 3287, + "▁marry": 3288, + "▁ben": 3289, + "▁expected": 3290, + "▁according": 3291, + "▁forty": 3292, + "▁stick": 3293, + "inal": 3294, + "▁guess": 3295, + "▁silver": 3296, + "▁iron": 3297, + "▁oblig": 3298, + "▁office": 3299, + "▁rapid": 3300, + "▁ladies": 3301, + "▁especially": 3302, + "ipped": 3303, + "orted": 3304, + "▁bread": 3305, + "ech": 3306, + "▁tender": 3307, + "orth": 3308, + "▁learned": 3309, + "▁books": 3310, + "▁isn't": 3311, + "▁surprise": 3312, + "▁write": 3313, + "▁purs": 3314, + "pered": 3315, + "▁written": 3316, + "▁killed": 3317, + "▁consequ": 3318, + "▁exh": 3319, + "▁places": 3320, + "▁condition": 3321, + "▁direction": 3322, + "▁cho": 3323, + "ulty": 3324, + "jo": 3325, + "mit": 3326, + "▁entirely": 3327, + "tering": 3328, + "▁enter": 3329, + "▁action": 3330, + "wise": 3331, + "▁suc": 3332, + "ibly": 3333, + "▁happiness": 3334, + "▁decided": 3335, + "▁golden": 3336, + "▁langu": 3337, + "eness": 3338, + "▁note": 3339, + "▁unless": 3340, + "uous": 3341, + "▁fal": 3342, + "aled": 3343, + "▁you'll": 3344, + "▁wonderful": 3345, + "ounds": 3346, + "ume": 3347, + "'re": 3348, + "▁shook": 3349, + "er's": 3350, + "oop": 3351, + "onel": 3352, + "▁perfectly": 3353, + "▁geor": 3354, + "ndered": 3355, + "▁broad": 3356, + "atic": 3357, + "▁closed": 3358, + "a's": 3359, + "▁spot": 3360, + "tended": 3361, + "▁latter": 3362, + "▁steps": 3363, + "▁merely": 3364, + "▁history": 3365, + "fer": 3366, + "▁wise": 3367, + "ishing": 3368, + "osing": 3369, + "▁middle": 3370, + "idered": 3371, + "▁understood": 3372, + "▁enemy": 3373, + "▁sole": 3374, + "llig": 3375, + "▁jew": 3376, + "▁simply": 3377, + "gan": 3378, + "▁conduct": 3379, + "▁tast": 3380, + "▁board": 3381, + "▁sav": 3382, + "▁wouldn't": 3383, + "▁shot": 3384, + "▁reply": 3385, + "▁changed": 3386, + "mn": 3387, + "▁grass": 3388, + "▁finally": 3389, + "▁admir": 3390, + "ital": 3391, + "▁sharp": 3392, + "itch": 3393, + "▁fortune": 3394, + "▁summer": 3395, + "▁experience": 3396, + "▁succeed": 3397, + "gress": 3398, + "uted": 3399, + "▁orig": 3400, + "retched": 3401, + "▁journey": 3402, + "▁excell": 3403, + "▁observed": 3404, + "ax": 3405, + "▁afterwards": 3406, + "fast": 3407, + "sy": 3408, + "▁bow": 3409, + "▁flat": 3410, + "▁persons": 3411, + "▁lean": 3412, + "▁earn": 3413, + "▁broke": 3414, + "▁mir": 3415, + "▁fit": 3416, + "osp": 3417, + "▁marriage": 3418, + "▁repres": 3419, + "io": 3420, + "▁lying": 3421, + "unk": 3422, + "▁trave": 3423, + "▁situ": 3424, + "▁listen": 3425, + "▁acquaint": 3426, + "▁ring": 3427, + "cience": 3428, + "▁faint": 3429, + "olute": 3430, + "▁calm": 3431, + "bered": 3432, + "▁lives": 3433, + "▁escape": 3434, + "▁beneath": 3435, + "ouses": 3436, + "▁clim": 3437, + "▁bless": 3438, + "▁repeated": 3439, + "▁pocket": 3440, + "ests": 3441, + "▁tail": 3442, + "▁passion": 3443, + "▁dick": 3444, + "▁ven": 3445, + "oses": 3446, + "clock": 3447, + "▁mut": 3448, + "▁becom": 3449, + "▁oper": 3450, + "▁o'clock": 3451, + "▁fish": 3452, + "▁lou": 3453, + "semb": 3454, + "▁prev": 3455, + "▁allowed": 3456, + "▁famil": 3457, + "hel": 3458, + "▁gate": 3459, + "▁spite": 3460, + "ivers": 3461, + "▁health": 3462, + "ission": 3463, + "▁ign": 3464, + "▁reach": 3465, + "▁cand": 3466, + "▁rain": 3467, + "▁empl": 3468, + "▁ban": 3469, + "▁strugg": 3470, + "▁firm": 3471, + "▁bitter": 3472, + "▁sorry": 3473, + "bing": 3474, + "▁father's": 3475, + "▁temper": 3476, + "▁madame": 3477, + "ples": 3478, + "▁furn": 3479, + "▁future": 3480, + "umed": 3481, + "▁nice": 3482, + "▁separ": 3483, + "▁presently": 3484, + "▁circumstances": 3485, + "▁connect": 3486, + "iding": 3487, + "▁sett": 3488, + "kes": 3489, + "▁loud": 3490, + "▁worse": 3491, + "▁wand": 3492, + "▁spread": 3493, + "▁i'd": 3494, + "▁letters": 3495, + "▁yellow": 3496, + "▁magn": 3497, + "▁passing": 3498, + "▁kit": 3499, + "▁pleased": 3500, + "▁darkness": 3501, + "▁remar": 3502, + "idden": 3503, + "come": 3504, + "▁tea": 3505, + "▁civ": 3506, + "▁apart": 3507, + "▁disappe": 3508, + "▁important": 3509, + "▁legs": 3510, + "▁nation": 3511, + "▁delic": 3512, + "▁dressed": 3513, + "▁game": 3514, + "▁walls": 3515, + "ec": 3516, + "▁dry": 3517, + "▁virt": 3518, + "▁dim": 3519, + "idently": 3520, + "rel": 3521, + "▁rub": 3522, + "▁absolute": 3523, + "▁blind": 3524, + "▁discovered": 3525, + "▁exactly": 3526, + "▁dam": 3527, + "otten": 3528, + "▁sorrow": 3529, + "my": 3530, + "▁cost": 3531, + "ference": 3532, + "▁employ": 3533, + "velop": 3534, + "▁cous": 3535, + "▁beast": 3536, + "▁spec": 3537, + "▁opport": 3538, + "▁ears": 3539, + "▁dropped": 3540, + "▁subst": 3541, + "▁chee": 3542, + "▁protect": 3543, + "ils": 3544, + "▁smiled": 3545, + "ina": 3546, + "▁resp": 3547, + "▁promise": 3548, + "▁bag": 3549, + "▁host": 3550, + "urs": 3551, + "▁creature": 3552, + "▁notice": 3553, + "▁knowing": 3554, + "▁heads": 3555, + "▁concer": 3556, + "▁seat": 3557, + "ishment": 3558, + "▁individ": 3559, + "▁existence": 3560, + "▁determined": 3561, + "lend": 3562, + "▁storm": 3563, + "roy": 3564, + "ours": 3565, + "▁conce": 3566, + "anging": 3567, + "▁fixed": 3568, + "▁press": 3569, + "▁major": 3570, + "oved": 3571, + "▁ves": 3572, + "iod": 3573, + "▁learn": 3574, + "▁motion": 3575, + "▁empt": 3576, + "▁leaves": 3577, + "▁bottom": 3578, + "▁arg": 3579, + "iety": 3580, + "▁nobody": 3581, + "▁pros": 3582, + "que": 3583, + "▁utter": 3584, + "▁pick": 3585, + "acked": 3586, + "▁intellig": 3587, + "▁hes": 3588, + "▁stir": 3589, + "▁prevent": 3590, + "▁assist": 3591, + "▁dom": 3592, + "▁disg": 3593, + "▁advant": 3594, + "erable": 3595, + "▁vent": 3596, + "ument": 3597, + "▁tired": 3598, + "rect": 3599, + "ashed": 3600, + "action": 3601, + "▁considered": 3602, + "▁wrote": 3603, + "▁houses": 3604, + "▁suit": 3605, + "▁cheer": 3606, + "▁castle": 3607, + "▁pra": 3608, + "▁perform": 3609, + "ancing": 3610, + "▁clean": 3611, + "ruct": 3612, + "▁stro": 3613, + "▁frequ": 3614, + "▁drawing": 3615, + "▁luck": 3616, + "▁habit": 3617, + "idge": 3618, + "ell": 3619, + "▁ones": 3620, + "▁noble": 3621, + "▁splend": 3622, + "▁honor": 3623, + "zen": 3624, + "▁paid": 3625, + "▁speech": 3626, + "▁estab": 3627, + "▁ur": 3628, + "istr": 3629, + "▁individual": 3630, + "inite": 3631, + "▁vall": 3632, + "▁birds": 3633, + "rodu": 3634, + "▁dar": 3635, + "▁allow": 3636, + "▁confess": 3637, + "▁impress": 3638, + "▁propert": 3639, + "▁jane": 3640, + "▁song": 3641, + "▁various": 3642, + "▁narrow": 3643, + "▁moder": 3644, + "▁believed": 3645, + "ays": 3646, + "▁extra": 3647, + "▁pure": 3648, + "arily": 3649, + "▁period": 3650, + "▁shadow": 3651, + "▁somewh": 3652, + "▁mal": 3653, + "▁cott": 3654, + "▁extreme": 3655, + "▁judge": 3656, + "▁village": 3657, + "▁royal": 3658, + "▁somewhat": 3659, + "▁lower": 3660, + "▁ham": 3661, + "▁agree": 3662, + "▁remembered": 3663, + "▁aston": 3664, + "enth": 3665, + "▁declared": 3666, + "pan": 3667, + "▁train": 3668, + "▁parts": 3669, + "▁colonel": 3670, + "amber": 3671, + "▁breakfast": 3672, + "▁surely": 3673, + "▁sin": 3674, + "ayed": 3675, + "▁scene": 3676, + "go": 3677, + "▁greatest": 3678, + "▁influence": 3679, + "▁custom": 3680, + "itary": 3681, + "▁animal": 3682, + "▁sake": 3683, + "▁mod": 3684, + "▁soldiers": 3685, + "iny": 3686, + "▁ancient": 3687, + "▁drawn": 3688, + "▁evidently": 3689, + "▁ways": 3690, + "▁looks": 3691, + "▁revol": 3692, + "ator": 3693, + "anted": 3694, + "▁reflect": 3695, + "▁picture": 3696, + "▁likely": 3697, + "▁shr": 3698, + "▁laws": 3699, + "▁holding": 3700, + "▁difficulty": 3701, + "▁inj": 3702, + "▁mel": 3703, + "▁courage": 3704, + "nes": 3705, + "▁mort": 3706, + "▁troub": 3707, + "▁burst": 3708, + "▁angry": 3709, + "▁proud": 3710, + "gged": 3711, + "▁spoken": 3712, + "ision": 3713, + "▁desert": 3714, + "ption": 3715, + "▁comb": 3716, + "▁apparent": 3717, + "ring": 3718, + "▁watched": 3719, + "na": 3720, + "▁east": 3721, + "▁shop": 3722, + "▁agre": 3723, + "▁private": 3724, + "esty": 3725, + "▁jul": 3726, + "▁finished": 3727, + "▁anxious": 3728, + "otion": 3729, + "▁fifteen": 3730, + "▁social": 3731, + "under": 3732, + "▁dism": 3733, + "▁touch": 3734, + "▁wine": 3735, + "▁attack": 3736, + "▁ideas": 3737, + "▁george": 3738, + "af": 3739, + "rer": 3740, + "oose": 3741, + "▁space": 3742, + "▁scr": 3743, + "▁inside": 3744, + "▁gentlemen": 3745, + "▁civil": 3746, + "iently": 3747, + "▁formed": 3748, + "▁fol": 3749, + "▁goes": 3750, + "▁you've": 3751, + "▁thin": 3752, + "▁surf": 3753, + "▁servant": 3754, + "▁bal": 3755, + "▁cover": 3756, + "▁ourselves": 3757, + "▁fallen": 3758, + "▁henry": 3759, + "▁lot": 3760, + "ium": 3761, + "▁advent": 3762, + "▁carriage": 3763, + "▁baby": 3764, + "▁elect": 3765, + "▁tong": 3766, + "▁appre": 3767, + "▁everybody": 3768, + "uded": 3769, + "▁commun": 3770, + "▁ine": 3771, + "itive": 3772, + "▁waited": 3773, + "cise": 3774, + "▁grou": 3775, + "het": 3776, + "▁vain": 3777, + "▁impro": 3778, + "▁favor": 3779, + "erial": 3780, + "▁speed": 3781, + "▁windows": 3782, + "▁carefully": 3783, + "▁ice": 3784, + "▁noise": 3785, + "▁hero": 3786, + "▁jim": 3787, + "▁william": 3788, + "▁pecul": 3789, + "▁promised": 3790, + "▁walking": 3791, + "▁forgotten": 3792, + "▁obliged": 3793, + "▁earnest": 3794, + "▁main": 3795, + "▁lose": 3796, + "▁glance": 3797, + "▁vessel": 3798, + "▁grad": 3799, + "▁thro": 3800, + "▁bod": 3801, + "▁shoulder": 3802, + "▁meth": 3803, + "▁animals": 3804, + "▁noticed": 3805, + "ables": 3806, + "▁peculiar": 3807, + "▁fier": 3808, + "▁pot": 3809, + "▁quietly": 3810, + "▁cup": 3811, + "▁serious": 3812, + "▁tremb": 3813, + "▁generally": 3814, + "▁american": 3815, + "▁symp": 3816, + "ral": 3817, + "▁don": 3818, + "▁france": 3819, + "iction": 3820, + "▁property": 3821, + "▁shoulders": 3822, + "▁stranger": 3823, + "▁san": 3824, + "▁cow": 3825, + "▁what's": 3826, + "▁dust": 3827, + "▁affection": 3828, + "▁handsome": 3829, + "▁higher": 3830, + "iant": 3831, + "nday": 3832, + "▁wel": 3833, + "▁poet": 3834, + "▁sla": 3835, + "▁distinct": 3836, + "▁mam": 3837, + "▁pier": 3838, + "acing": 3839, + "ague": 3840, + "▁grown": 3841, + "uly": 3842, + "▁d'": 3843, + "▁chamber": 3844, + "▁desce": 3845, + "▁murm": 3846, + "stem": 3847, + "▁personal": 3848, + "▁fancy": 3849, + "▁offered": 3850, + "osite": 3851, + "onsie": 3852, + "▁built": 3853, + "▁edge": 3854, + "▁whispered": 3855, + "▁skin": 3856, + "▁pieces": 3857, + "itated": 3858, + "cher": 3859, + "osity": 3860, + "▁pit": 3861, + "▁contro": 3862, + "▁faces": 3863, + "▁spent": 3864, + "▁interrupt": 3865, + "how": 3866, + "isters": 3867, + "▁butter": 3868, + "▁develop": 3869, + "▁unk": 3870, + "hip": 3871, + "▁heat": 3872, + "▁fond": 3873, + "▁coat": 3874, + "▁touched": 3875, + "▁hol": 3876, + "ingu": 3877, + "▁pi": 3878, + "▁race": 3879, + "▁jump": 3880, + "▁surprised": 3881, + "oted": 3882, + "▁defe": 3883, + "enced": 3884, + "▁wasn't": 3885, + "▁wear": 3886, + "andon": 3887, + "▁fan": 3888, + "acher": 3889, + "▁arch": 3890, + "▁educ": 3891, + "▁brave": 3892, + "athered": 3893, + "▁eld": 3894, + "▁wealth": 3895, + "▁system": 3896, + "▁german": 3897, + "▁false": 3898, + "wood": 3899, + "▁dare": 3900, + "aked": 3901, + "▁cousin": 3902, + "▁fer": 3903, + "key": 3904, + "▁lin": 3905, + "▁intellect": 3906, + "▁prepared": 3907, + "▁fingers": 3908, + "▁surr": 3909, + "▁mountains": 3910, + "ipp": 3911, + "▁opportunity": 3912, + "aff": 3913, + "▁bare": 3914, + "▁dor": 3915, + "▁introdu": 3916, + "▁collect": 3917, + "▁lovely": 3918, + "▁rag": 3919, + "▁crown": 3920, + "▁matters": 3921, + "▁companion": 3922, + "▁weather": 3923, + "▁alar": 3924, + "▁innoc": 3925, + "▁ris": 3926, + "▁mix": 3927, + "▁lake": 3928, + "▁store": 3929, + "▁unh": 3930, + "▁meaning": 3931, + "▁memory": 3932, + "over": 3933, + "▁band": 3934, + "leep": 3935, + "▁finding": 3936, + "ee": 3937, + "▁charge": 3938, + "▁grat": 3939, + "▁attract": 3940, + "▁gray": 3941, + "▁quarter": 3942, + "▁avo": 3943, + "▁greatly": 3944, + "▁mach": 3945, + "▁inh": 3946, + "▁asleep": 3947, + "▁paris": 3948, + "▁dav": 3949, + "▁alto": 3950, + "▁offer": 3951, + "▁opposite": 3952, + "ounced": 3953, + "erve": 3954, + "▁breast": 3955, + "nown": 3956, + "▁reading": 3957, + "▁altogether": 3958, + "▁writing": 3959, + "pected": 3960, + "▁degree": 3961, + "cing": 3962, + "night": 3963, + "▁exec": 3964, + "fortun": 3965, + "▁stat": 3966, + "▁feelings": 3967, + "▁hath": 3968, + "▁cook": 3969, + "▁rail": 3970, + "▁honour": 3971, + "ding": 3972, + "▁fate": 3973, + "▁por": 3974, + "▁frank": 3975, + "▁meeting": 3976, + "▁rough": 3977, + "▁alive": 3978, + "▁hide": 3979, + "ites": 3980, + "ilar": 3981, + "▁blow": 3982, + "▁cruel": 3983, + "raph": 3984, + "▁hurt": 3985, + "▁loss": 3986, + "▁thrown": 3987, + "▁caused": 3988, + "▁we'll": 3989, + "▁serve": 3990, + "▁duke": 3991, + "▁bent": 3992, + "▁united": 3993, + "▁seek": 3994, + "▁kingdom": 3995, + "▁situation": 3996, + "▁empty": 3997, + "ners": 3998, + "▁due": 3999, + "▁liked": 4000, + "▁swift": 4001, + "▁opening": 4002, + "▁servants": 4003, + "chen": 4004, + "oura": 4005, + "▁gh": 4006, + "▁suspic": 4007, + "▁freed": 4008, + "ointed": 4009, + "▁surface": 4010, + "cil": 4011, + "▁questions": 4012, + "▁ess": 4013, + "▁curious": 4014, + "▁constit": 4015, + "▁accompan": 4016, + "▁christian": 4017, + "▁fill": 4018, + "arest": 4019, + "▁satisfied": 4020, + "ron": 4021, + "▁sides": 4022, + "▁pity": 4023, + "▁reve": 4024, + "▁equal": 4025, + "▁height": 4026, + "▁ordered": 4027, + "osop": 4028, + "▁grey": 4029, + "▁listened": 4030, + "pet": 4031, + "▁rejo": 4032, + "▁capt": 4033, + "ibility": 4034, + "ob": 4035, + "▁mart": 4036, + "▁happen": 4037, + "▁hurried": 4038, + "▁dollars": 4039, + "▁language": 4040, + "▁ange": 4041, + "▁yours": 4042, + "▁supposed": 4043, + "▁laughing": 4044, + "▁settled": 4045, + "▁rode": 4046, + "▁perm": 4047, + "▁distingu": 4048, + "▁hurry": 4049, + "▁destroy": 4050, + "▁talked": 4051, + "▁lifted": 4052, + "ocr": 4053, + "▁square": 4054, + "▁value": 4055, + "▁taste": 4056, + "▁vast": 4057, + "▁king's": 4058, + "▁rul": 4059, + "▁roof": 4060, + "▁telling": 4061, + "▁study": 4062, + "▁ow": 4063, + "▁pan": 4064, + "▁bas": 4065, + "▁rising": 4066, + "▁sufficient": 4067, + "▁forced": 4068, + "▁rise": 4069, + "▁attend": 4070, + "▁philosop": 4071, + "▁nose": 4072, + "▁sixty": 4073, + "hest": 4074, + "▁pin": 4075, + "▁egg": 4076, + "▁amb": 4077, + "▁fault": 4078, + "bur": 4079, + "▁station": 4080, + "▁distur": 4081, + "▁regular": 4082, + "ille": 4083, + "▁pack": 4084, + "▁special": 4085, + "▁honest": 4086, + "▁building": 4087, + "▁season": 4088, + "▁shape": 4089, + "▁pride": 4090, + "▁smiling": 4091, + "like": 4092, + "▁orders": 4093, + "yn": 4094, + "▁woods": 4095, + "▁accompl": 4096, + "con": 4097, + "▁sam": 4098, + "▁usually": 4099, + "▁watching": 4100, + "▁sacri": 4101, + "erved": 4102, + "▁passage": 4103, + "▁material": 4104, + "▁valley": 4105, + "yr": 4106, + "▁stairs": 4107, + "▁libert": 4108, + "▁frightened": 4109, + "▁remarked": 4110, + "▁tit": 4111, + "▁wed": 4112, + "▁mistress": 4113, + "▁directly": 4114, + "▁suffer": 4115, + "▁gloom": 4116, + "▁lines": 4117, + "▁stock": 4118, + "▁justice": 4119, + "▁diam": 4120, + "ested": 4121, + "▁growing": 4122, + "▁doesn't": 4123, + "▁gathered": 4124, + "▁ordinary": 4125, + "uce": 4126, + "▁eur": 4127, + "▁unf": 4128, + "▁kitchen": 4129, + "▁threat": 4130, + "▁depend": 4131, + "▁weeks": 4132, + "▁despair": 4133, + "▁method": 4134, + "▁seized": 4135, + "▁discuss": 4136, + "▁exer": 4137, + "ify": 4138, + "▁flower": 4139, + "▁ignor": 4140, + "eer": 4141, + "ades": 4142, + "▁deb": 4143, + "eping": 4144, + "▁ale": 4145, + "▁yo": 4146, + "chief": 4147, + "▁supper": 4148, + "ik": 4149, + "▁bold": 4150, + "▁putting": 4151, + "▁nearer": 4152, + "uses": 4153, + "▁one's": 4154, + "▁ble": 4155, + "▁york": 4156, + "▁ende": 4157, + "▁affairs": 4158, + "▁soldier": 4159, + "▁contrary": 4160, + "▁moving": 4161, + "▁streets": 4162, + "▁bir": 4163, + "rance": 4164, + "hens": 4165, + "▁cit": 4166, + "icated": 4167, + "▁catch": 4168, + "▁imagine": 4169, + "eds": 4170, + "▁march": 4171, + "▁search": 4172, + "ara": 4173, + "▁receive": 4174, + "imate": 4175, + "▁monsie": 4176, + "▁twice": 4177, + "▁papa": 4178, + "▁monsieur": 4179, + "▁reck": 4180, + "min": 4181, + "ude": 4182, + "▁process": 4183, + "▁hole": 4184, + "aly": 4185, + "lin": 4186, + "▁cro": 4187, + "▁favour": 4188, + "▁dign": 4189, + "▁working": 4190, + "▁harm": 4191, + "▁europe": 4192, + "antic": 4193, + "▁proved": 4194, + "ocked": 4195, + "▁prove": 4196, + "▁cler": 4197, + "▁lod": 4198, + "ception": 4199, + "▁pulled": 4200, + "▁arth": 4201, + "▁authority": 4202, + "▁haven": 4203, + "▁jer": 4204, + "▁uns": 4205, + "▁movement": 4206, + "usted": 4207, + "▁engaged": 4208, + "▁brothers": 4209, + "▁advantage": 4210, + "lished": 4211, + "ole": 4212, + "▁arthur": 4213, + "▁aut": 4214, + "▁stones": 4215, + "▁farm": 4216, + "▁difference": 4217, + "▁fart": 4218, + "▁aside": 4219, + "▁mas": 4220, + "▁observ": 4221, + "▁hence": 4222, + "▁possession": 4223, + "▁hills": 4224, + "▁fortun": 4225, + "uls": 4226, + "ails": 4227, + "▁instance": 4228, + "▁she's": 4229, + "▁ol": 4230, + "▁holy": 4231, + "▁flew": 4232, + "ky": 4233, + "▁color": 4234, + "▁rate": 4235, + "▁doors": 4236, + "▁busy": 4237, + "set": 4238, + "▁address": 4239, + "▁familiar": 4240, + "▁weight": 4241, + "▁aware": 4242, + "▁played": 4243, + "▁sympath": 4244, + "lls": 4245, + "▁solemn": 4246, + "▁liter": 4247, + "▁test": 4248, + "▁emper": 4249, + "▁indian": 4250, + "▁distant": 4251, + "▁interesting": 4252, + "▁bull": 4253, + "▁thorough": 4254, + "▁wore": 4255, + "▁worked": 4256, + "▁explained": 4257, + "▁excellent": 4258, + "▁splendid": 4259, + "▁tongue": 4260, + "▁di": 4261, + "▁pard": 4262, + "▁named": 4263, + "▁shame": 4264, + "▁franc": 4265, + "▁spect": 4266, + "▁moments": 4267, + "bers": 4268, + "▁wil": 4269, + "▁myster": 4270, + "▁seated": 4271, + "▁instantly": 4272, + "▁similar": 4273, + "▁endeav": 4274, + "▁measure": 4275, + "▁naturally": 4276, + "nds": 4277, + "▁suf": 4278, + "▁amount": 4279, + "▁imper": 4280, + "▁dogs": 4281, + "itable": 4282, + "▁brit": 4283, + "▁necessity": 4284, + "rid": 4285, + "ulous": 4286, + "▁confidence": 4287, + "den": 4288, + "▁parent": 4289, + "▁wid": 4290, + "▁vir": 4291, + "▁neverthe": 4292, + "▁agreed": 4293, + "▁nevertheless": 4294, + "unch": 4295, + "▁hearing": 4296, + "▁takes": 4297, + "▁aug": 4298, + "▁univers": 4299, + "enance": 4300, + "▁unw": 4301, + "▁earl": 4302, + "▁keeping": 4303, + "▁drive": 4304, + "▁produced": 4305, + "▁aud": 4306, + "on's": 4307, + "▁names": 4308, + "agn": 4309, + "▁disappeared": 4310, + "▁throw": 4311, + "▁president": 4312, + "▁gods": 4313, + "▁magic": 4314, + "▁represent": 4315, + "▁unknown": 4316, + "por": 4317, + "▁terror": 4318, + "▁haven't": 4319, + "asc": 4320, + "▁support": 4321, + "▁smoke": 4322, + "▁wicked": 4323, + "ker": 4324, + "▁works": 4325, + "▁artic": 4326, + "▁dull": 4327, + "▁yester": 4328, + "▁falling": 4329, + "▁worthy": 4330, + "▁liberty": 4331, + "ulation": 4332, + "▁design": 4333, + "▁wants": 4334, + "▁evidence": 4335, + "▁companions": 4336, + "▁spirits": 4337, + "▁coast": 4338, + "▁mighty": 4339, + "▁particularly": 4340, + "▁witness": 4341, + "▁discover": 4342, + "▁sought": 4343, + "▁span": 4344, + "'ve": 4345, + "▁rare": 4346, + "▁officers": 4347, + "lv": 4348, + "zy": 4349, + "▁yesterday": 4350, + "vey": 4351, + "cent": 4352, + "▁powers": 4353, + "▁yield": 4354, + "▁cool": 4355, + "▁organ": 4356, + "▁amaz": 4357, + "▁pointed": 4358, + "ford": 4359, + "▁claim": 4360, + "▁content": 4361, + "▁possibly": 4362, + "▁terms": 4363, + "▁trium": 4364, + "▁officer": 4365, + "▁persu": 4366, + "▁ceased": 4367, + "▁drove": 4368, + "▁occurred": 4369, + "▁gree": 4370, + "▁lies": 4371, + "▁otherwise": 4372, + "▁emperor": 4373, + "▁hom": 4374, + "▁stars": 4375, + "▁knees": 4376, + "▁triumph": 4377, + "ruction": 4378, + "▁paused": 4379, + "oms": 4380, + "▁required": 4381, + "▁failed": 4382, + "▁unhapp": 4383, + "▁diamond": 4384, + "▁rat": 4385, + "▁ali": 4386, + "▁double": 4387, + "▁forms": 4388, + "▁gives": 4389, + "▁finger": 4390, + "race": 4391, + "▁pair": 4392, + "alous": 4393, + "illa": 4394, + "▁bob": 4395, + "▁eliz": 4396, + "▁travel": 4397, + "▁carrying": 4398, + "▁gle": 4399, + "iles": 4400, + "▁teeth": 4401, + "esh": 4402, + "▁shown": 4403, + "▁fruit": 4404, + "▁waters": 4405, + "▁entertain": 4406, + "▁hearts": 4407, + "umn": 4408, + "▁labor": 4409, + "in't": 4410, + "▁pill": 4411, + "▁ener": 4412, + "soci": 4413, + "▁example": 4414, + "▁upper": 4415, + "▁foreign": 4416, + "▁moral": 4417, + "▁softly": 4418, + "rose": 4419, + "▁huge": 4420, + "▁charles": 4421, + "▁priest": 4422, + "▁excit": 4423, + "▁fet": 4424, + "▁mother's": 4425, + "▁possessed": 4426, + "▁cases": 4427, + "▁report": 4428, + "▁milk": 4429, + "▁affair": 4430, + "▁principle": 4431, + "▁inhab": 4432, + "▁freedom": 4433, + "▁proof": 4434, + "▁intended": 4435, + "▁satisfaction": 4436, + "▁shouted": 4437, + "isc": 4438, + "▁plat": 4439, + "▁bask": 4440, + "ental": 4441, + "▁group": 4442, + "▁farther": 4443, + "asm": 4444, + "▁unfortun": 4445, + "▁unto": 4446, + "▁singing": 4447, + "▁arrange": 4448, + "▁religion": 4449, + "▁ber": 4450, + "▁rocks": 4451, + "▁seventeen": 4452, + "▁der": 4453, + "▁james": 4454, + "▁buy": 4455, + "▁succeeded": 4456, + "▁rooms": 4457, + "▁leading": 4458, + "▁majesty": 4459, + "▁events": 4460, + "▁dance": 4461, + "▁paint": 4462, + "▁gently": 4463, + "acle": 4464, + "▁tele": 4465, + "▁pardon": 4466, + "using": 4467, + "▁drop": 4468, + "father": 4469, + "▁invent": 4470, + "▁key": 4471, + "▁mentioned": 4472, + "▁seventy": 4473, + "▁ros": 4474, + "▁suffering": 4475, + "▁record": 4476, + "▁cabin": 4477, + "road": 4478, + "▁diss": 4479, + "ival": 4480, + "▁demanded": 4481, + "▁excitement": 4482, + "▁associ": 4483, + "▁progress": 4484, + "angers": 4485, + "▁curi": 4486, + "▁america": 4487, + "▁rule": 4488, + "▁bor": 4489, + "▁vig": 4490, + "lessly": 4491, + "▁clearly": 4492, + "▁bore": 4493, + "▁sheep": 4494, + "▁regret": 4495, + "▁neighbour": 4496, + "bly": 4497, + "iance": 4498, + "▁instinct": 4499, + "▁advice": 4500, + "▁awful": 4501, + "▁sen": 4502, + "▁fully": 4503, + "▁gather": 4504, + "▁papers": 4505, + "▁hidden": 4506, + "▁chest": 4507, + "▁birth": 4508, + "hy": 4509, + "pap": 4510, + "▁hither": 4511, + "▁stuff": 4512, + "▁impat": 4513, + "▁calling": 4514, + "▁fourth": 4515, + "▁dreadful": 4516, + "▁pos": 4517, + "▁grief": 4518, + "▁brill": 4519, + "▁powerful": 4520, + "▁presented": 4521, + "▁fairy": 4522, + "▁explain": 4523, + "▁shoot": 4524, + "▁prisoner": 4525, + "▁joined": 4526, + "▁afford": 4527, + "mond": 4528, + "attered": 4529, + "▁ing": 4530, + "iments": 4531, + "▁shel": 4532, + "▁prefer": 4533, + "▁considerable": 4534, + "▁obey": 4535, + "▁voices": 4536, + "▁interv": 4537, + "▁interested": 4538, + "▁virg": 4539, + "▁cred": 4540, + "▁card": 4541, + "▁ep": 4542, + "▁needed": 4543, + "▁pounds": 4544, + "▁conqu": 4545, + "▁clever": 4546, + "▁advanced": 4547, + "▁cord": 4548, + "ighed": 4549, + "▁undert": 4550, + "▁resolved": 4551, + "▁wag": 4552, + "istic": 4553, + "▁paul": 4554, + "▁excited": 4555, + "▁conditions": 4556, + "▁pictures": 4557, + "acious": 4558, + "▁shining": 4559, + "▁sunday": 4560, + "▁served": 4561, + "▁steam": 4562, + "▁police": 4563, + "▁sprang": 4564, + "sie": 4565, + "ora": 4566, + "ese": 4567, + "▁jes": 4568, + "▁nodd": 4569, + "▁salt": 4570, + "▁fields": 4571, + "▁cart": 4572, + "▁indians": 4573, + "▁fierce": 4574, + "dle": 4575, + "▁ride": 4576, + "▁desired": 4577, + "▁edward": 4578, + "▁importance": 4579, + "▁information": 4580, + "ture": 4581, + "▁hosp": 4582, + "▁memb": 4583, + "▁perceived": 4584, + "▁yard": 4585, + "▁crit": 4586, + "ternal": 4587, + "▁task": 4588, + "▁fold": 4589, + "rant": 4590, + "▁sooner": 4591, + "▁merch": 4592, + "▁absolutely": 4593, + "▁citiz": 4594, + "▁suffered": 4595, + "▁tight": 4596, + "▁dur": 4597, + "▁iss": 4598, + "illy": 4599, + "▁log": 4600, + "▁completely": 4601, + "hold": 4602, + "▁rad": 4603, + "▁share": 4604, + "▁willing": 4605, + "▁devil": 4606, + "▁ships": 4607, + "▁imagination": 4608, + "▁superior": 4609, + "com": 4610, + "ams": 4611, + "▁anybody": 4612, + "▁env": 4613, + "▁appl": 4614, + "▁drag": 4615, + "▁dawn": 4616, + "asped": 4617, + "▁occupied": 4618, + "▁curiosity": 4619, + "iest": 4620, + "▁sigh": 4621, + "▁fox": 4622, + "asant": 4623, + "▁myst": 4624, + "▁stead": 4625, + "ett": 4626, + "▁couple": 4627, + "▁type": 4628, + "▁extraord": 4629, + "▁apparently": 4630, + "▁welcome": 4631, + "▁daily": 4632, + "▁modern": 4633, + "iot": 4634, + "▁ain't": 4635, + "▁dying": 4636, + "llen": 4637, + "▁feat": 4638, + "▁accident": 4639, + "▁countenance": 4640, + "▁abandon": 4641, + "ortion": 4642, + "▁lock": 4643, + "▁crime": 4644, + "pir": 4645, + "▁mult": 4646, + "▁alas": 4647, + "▁refused": 4648, + "▁hate": 4649, + "▁dw": 4650, + "▁whenever": 4651, + "▁thanks": 4652, + "▁slave": 4653, + "▁regarded": 4654, + "▁suggested": 4655, + "ulf": 4656, + "▁actually": 4657, + "gment": 4658, + "▁size": 4659, + "reg": 4660, + "▁cult": 4661, + "▁kat": 4662, + "▁bodies": 4663, + "hus": 4664, + "▁bay": 4665, + "▁truly": 4666, + "▁flesh": 4667, + "ishop": 4668, + "▁smith": 4669, + "▁betr": 4670, + "with": 4671, + "▁wet": 4672, + "▁rapidly": 4673, + "gers": 4674, + "▁odd": 4675, + "asons": 4676, + "ette": 4677, + "▁club": 4678, + "abel": 4679, + "▁horror": 4680, + "▁mile": 4681, + "▁flight": 4682, + "▁crossed": 4683, + "▁professor": 4684, + "▁oce": 4685, + "▁worst": 4686, + "ization": 4687, + "▁rushed": 4688, + "▁science": 4689, + "▁brief": 4690, + "▁stepped": 4691, + "▁midst": 4692, + "ha": 4693, + "▁sour": 4694, + "▁maint": 4695, + "▁brain": 4696, + "▁cottage": 4697, + "▁expressed": 4698, + "▁equally": 4699, + "▁education": 4700, + "▁august": 4701, + "▁buck": 4702, + "▁nay": 4703, + "ids": 4704, + "▁tempt": 4705, + "▁inquir": 4706, + "▁foolish": 4707, + "▁taught": 4708, + "▁cop": 4709, + "▁dun": 4710, + "▁picked": 4711, + "▁elsie": 4712, + "▁lands": 4713, + "▁driven": 4714, + "▁political": 4715, + "mas": 4716, + "▁deck": 4717, + "▁resist": 4718, + "▁instr": 4719, + "▁bon": 4720, + "▁ken": 4721, + "ips": 4722, + "▁hotel": 4723, + "▁dangerous": 4724, + "ially": 4725, + "now": 4726, + "▁dozen": 4727, + "▁trade": 4728, + "▁points": 4729, + "▁ninet": 4730, + "ability": 4731, + "▁crim": 4732, + "▁relations": 4733, + "▁interp": 4734, + "▁barb": 4735, + "▁delighted": 4736, + "▁members": 4737, + "▁sisters": 4738, + "▁sty": 4739, + "▁anger": 4740, + "▁belief": 4741, + "▁asking": 4742, + "▁meat": 4743, + "▁displ": 4744, + "▁relief": 4745, + "ification": 4746, + "▁hunting": 4747, + "▁alex": 4748, + "aries": 4749, + "▁obst": 4750, + "▁behold": 4751, + "▁mistake": 4752, + "▁inquired": 4753, + "▁remarkable": 4754, + "▁origin": 4755, + "cked": 4756, + "▁nerv": 4757, + "acks": 4758, + "vert": 4759, + "rop": 4760, + "▁careful": 4761, + "▁wounded": 4762, + "ading": 4763, + "▁cere": 4764, + "▁enemies": 4765, + "▁gradually": 4766, + "▁interrupted": 4767, + "▁fis": 4768, + "▁stup": 4769, + "▁severe": 4770, + "▁keen": 4771, + "▁sixteen": 4772, + "kins": 4773, + "resp": 4774, + "▁worn": 4775, + "▁flour": 4776, + "▁sylv": 4777, + "▁control": 4778, + "kin": 4779, + "▁lone": 4780, + "asing": 4781, + "▁nap": 4782, + "▁assert": 4783, + "▁depth": 4784, + "▁kindly": 4785, + "▁murder": 4786, + "acity": 4787, + "▁eleven": 4788, + "▁invol": 4789, + "▁d'art": 4790, + "▁wings": 4791, + "▁oak": 4792, + "▁et": 4793, + "▁begun": 4794, + "▁dreams": 4795, + "while": 4796, + "▁moreover": 4797, + "▁exped": 4798, + "▁independ": 4799, + "▁buried": 4800, + "▁approached": 4801, + "agnan": 4802, + "▁d'artagnan": 4803, + "▁sex": 4804, + "▁saved": 4805, + "▁harry": 4806, + "▁physical": 4807, + "▁species": 4808, + "cer": 4809, + "oe": 4810, + "▁glory": 4811, + "▁creatures": 4812, + "▁newspap": 4813, + "▁sang": 4814, + "▁plenty": 4815, + "▁useful": 4816, + "▁shoes": 4817, + "▁hoped": 4818, + "▁frequently": 4819, + "▁saf": 4820, + "▁distr": 4821, + "▁princip": 4822, + "▁pu": 4823, + "y's": 4824, + "aunt": 4825, + "▁lover": 4826, + "▁famous": 4827, + "▁recollect": 4828, + "▁nur": 4829, + "▁grim": 4830, + "▁indif": 4831, + "▁charming": 4832, + "▁aim": 4833, + "▁loose": 4834, + "▁consciousness": 4835, + "▁mamma": 4836, + "▁enthus": 4837, + "▁slept": 4838, + "▁smooth": 4839, + "▁fighting": 4840, + "▁hyp": 4841, + "▁enthusi": 4842, + "▁dig": 4843, + "aling": 4844, + "▁stage": 4845, + "▁anyone": 4846, + "▁thrust": 4847, + "▁desper": 4848, + "▁tar": 4849, + "▁lamp": 4850, + "stone": 4851, + "▁stern": 4852, + "▁evident": 4853, + "▁meanwhile": 4854, + "▁forgive": 4855, + "▁accepted": 4856, + "▁ocean": 4857, + "▁tot": 4858, + "▁they're": 4859, + "▁wondered": 4860, + "▁playing": 4861, + "▁detect": 4862, + "▁hale": 4863, + "▁knife": 4864, + "ailed": 4865, + "▁closely": 4866, + "▁meas": 4867, + "▁proceeded": 4868, + "▁message": 4869, + "▁mour": 4870, + "▁fac": 4871, + "▁union": 4872, + "ustomed": 4873, + "hem": 4874, + "aming": 4875, + "▁exceed": 4876, + "▁feather": 4877, + "▁precious": 4878, + "▁century": 4879, + "▁unex": 4880, + "▁park": 4881, + "ication": 4882, + "▁everywhere": 4883, + "▁minds": 4884, + "▁extraordinary": 4885, + "▁arose": 4886, + "▁entrance": 4887, + "▁capital": 4888, + "▁recall": 4889, + "▁burning": 4890, + "▁magnific": 4891, + "oes": 4892, + "orious": 4893, + "stand": 4894, + "▁assemb": 4895, + "▁plant": 4896, + "▁neighbor": 4897, + "▁lest": 4898, + "uments": 4899, + "▁colle": 4900, + "▁virtue": 4901, + "▁bew": 4902, + "▁forb": 4903, + "▁retreat": 4904, + "▁capable": 4905, + "▁assured": 4906, + "▁constant": 4907, + "▁governor": 4908, + "▁increased": 4909, + "▁horn": 4910, + "▁removed": 4911, + "▁facts": 4912, + "▁absence": 4913, + "▁explan": 4914, + "▁ack": 4915, + "▁somebody": 4916, + "▁awa": 4917, + "▁admit": 4918, + "▁correct": 4919, + "▁forgot": 4920, + "▁jealous": 4921, + "▁kissed": 4922, + "▁popular": 4923, + "▁hut": 4924, + "▁ug": 4925, + "pelled": 4926, + "▁grant": 4927, + "▁friendship": 4928, + "▁indign": 4929, + "▁sympathy": 4930, + "iable": 4931, + "erous": 4932, + "▁thom": 4933, + "▁alice": 4934, + "▁level": 4935, + "▁objects": 4936, + "▁pressed": 4937, + "▁sha": 4938, + "room": 4939, + "▁qual": 4940, + "▁begged": 4941, + "▁emp": 4942, + "▁hind": 4943, + "▁highest": 4944, + "▁clouds": 4945, + "▁ghost": 4946, + "▁acknow": 4947, + "oused": 4948, + "▁strike": 4949, + "▁wishes": 4950, + "▁becomes": 4951, + "▁trembling": 4952, + "▁nob": 4953, + "▁kindness": 4954, + "▁accordingly": 4955, + "▁throat": 4956, + "ration": 4957, + "▁fare": 4958, + "▁we're": 4959, + "▁stretched": 4960, + "▁frag": 4961, + "▁wheel": 4962, + "▁queer": 4963, + "▁grandfather": 4964, + "for": 4965, + "▁choose": 4966, + "▁helen": 4967, + "▁eighty": 4968, + "▁ly": 4969, + "▁miserable": 4970, + "▁contempt": 4971, + "igned": 4972, + "▁military": 4973, + "▁russ": 4974, + "▁basket": 4975, + "▁ahead": 4976, + "oops": 4977, + "ivered": 4978, + "▁listening": 4979, + "▁fro": 4980, + "▁larger": 4981, + "▁divine": 4982, + "iber": 4983, + "▁stories": 4984, + "anches": 4985, + "ushing": 4986, + "izing": 4987, + "▁treasure": 4988, + "▁excuse": 4989, + "▁innocent": 4990, + "▁aid": 4991, + "▁remind": 4992, + "▁slaves": 4993, + "rit": 4994, + "stairs": 4995, + "▁reward": 4996, + "ograph": 4997, + "▁manage": 4998, + "▁dish": 4999, + "▁throughout": 5000, + "▁waves": 5001, + "▁judgment": 5002, + "▁arrival": 5003, + "▁choice": 5004, + "▁unhappy": 5005, + "astic": 5006, + "▁blank": 5007, + "▁advance": 5008, + "▁informed": 5009, + "▁acquaintance": 5010, + "▁impression": 5011, + "▁mysterious": 5012, + "bb": 5013, + "▁ara": 5014, + "▁notes": 5015, + "▁hadn't": 5016, + "▁sell": 5017, + "▁comr": 5018, + "▁impl": 5019, + "▁indust": 5020, + "▁ended": 5021, + "▁lights": 5022, + "▁nurse": 5023, + "▁sout": 5024, + "▁bought": 5025, + "▁fred": 5026, + "▁marked": 5027, + "▁scream": 5028, + "mend": 5029, + "▁uneas": 5030, + "▁delicate": 5031, + "▁weary": 5032, + "estic": 5033, + "▁prompt": 5034, + "▁experi": 5035, + "▁hungry": 5036, + "▁flying": 5037, + "▁pow": 5038, + "▁bridge": 5039, + "▁join": 5040, + "▁visible": 5041, + "▁understanding": 5042, + "▁crying": 5043, + "▁avoid": 5044, + "▁tis": 5045, + "▁stiff": 5046, + "aches": 5047, + "▁restr": 5048, + "▁sounds": 5049, + "▁bowed": 5050, + "▁caut": 5051, + "▁goods": 5052, + "▁david": 5053, + "▁unable": 5054, + "▁you'd": 5055, + "hamed": 5056, + "▁bos": 5057, + "eral": 5058, + "▁ashamed": 5059, + "▁somewhere": 5060, + "▁infinite": 5061, + "ocks": 5062, + "▁dignity": 5063, + "▁gay": 5064, + "▁vic": 5065, + "▁amid": 5066, + "▁hollow": 5067, + "▁emotion": 5068, + "▁admitted": 5069, + "▁parents": 5070, + "▁wra": 5071, + "▁hint": 5072, + "▁temple": 5073, + "▁comfortable": 5074, + "▁intelligence": 5075, + "orous": 5076, + "▁bearing": 5077, + "▁hers": 5078, + "abeth": 5079, + "▁remains": 5080, + "▁contem": 5081, + "▁settle": 5082, + "▁immense": 5083, + "ffe": 5084, + "pher": 5085, + "▁cher": 5086, + "ldom": 5087, + "▁weap": 5088, + "ulated": 5089, + "▁lighted": 5090, + "gypt": 5091, + "▁adventure": 5092, + "▁thoroughly": 5093, + "▁egypt": 5094, + "ilst": 5095, + "anges": 5096, + "▁obt": 5097, + "▁friendly": 5098, + "▁reckon": 5099, + "▁stupid": 5100, + "▁fed": 5101, + "▁rome": 5102, + "▁meal": 5103, + "▁intention": 5104, + "▁returning": 5105, + "▁convin": 5106, + "▁coo": 5107, + "lection": 5108, + "▁ash": 5109, + "achel": 5110, + "▁rope": 5111, + "▁price": 5112, + "▁project": 5113, + "elt": 5114, + "rows": 5115, + "▁secure": 5116, + "▁escaped": 5117, + "▁hopes": 5118, + "▁elizabeth": 5119, + "▁safety": 5120, + "▁wound": 5121, + "▁sup": 5122, + "▁unus": 5123, + "onscious": 5124, + "▁horri": 5125, + "▁minister": 5126, + "▁ox": 5127, + "lla": 5128, + "ensive": 5129, + "▁helped": 5130, + "▁plainly": 5131, + "▁seldom": 5132, + "▁thinks": 5133, + "▁fellows": 5134, + "▁mood": 5135, + "▁pushed": 5136, + "▁exhib": 5137, + "inging": 5138, + "▁thunder": 5139, + "aud": 5140, + "iana": 5141, + "▁fairly": 5142, + "▁elder": 5143, + "▁eggs": 5144, + "irm": 5145, + "▁maiden": 5146, + "mother": 5147, + "▁appears": 5148, + "▁cheeks": 5149, + "▁won": 5150, + "▁ease": 5151, + "▁redu": 5152, + "▁skill": 5153, + "▁extent": 5154, + "▁practice": 5155, + "▁religious": 5156, + "▁becoming": 5157, + "▁virgin": 5158, + "▁features": 5159, + "▁tied": 5160, + "▁whence": 5161, + "▁somehow": 5162, + "▁greet": 5163, + "▁faithful": 5164, + "▁concerned": 5165, + "▁theat": 5166, + "▁bishop": 5167, + "▁pink": 5168, + "▁eagerly": 5169, + "rees": 5170, + "▁eating": 5171, + "▁waste": 5172, + "▁rank": 5173, + "▁fem": 5174, + "▁bride": 5175, + "▁unl": 5176, + "otted": 5177, + "ceiving": 5178, + "▁trib": 5179, + "▁original": 5180, + "▁concerning": 5181, + "▁hab": 5182, + "▁accustomed": 5183, + "▁patient": 5184, + "▁recom": 5185, + "▁cell": 5186, + "ointment": 5187, + "▁arranged": 5188, + "ville": 5189, + "iture": 5190, + "▁wholly": 5191, + "▁older": 5192, + "▁colour": 5193, + "▁provided": 5194, + "▁ate": 5195, + "▁partly": 5196, + "▁mont": 5197, + "ology": 5198, + "▁prospect": 5199, + "▁ceremon": 5200, + "▁ze": 5201, + "▁laughter": 5202, + "▁fee": 5203, + "▁branches": 5204, + "▁fled": 5205, + "right": 5206, + "▁whilst": 5207, + "▁slipped": 5208, + "▁violent": 5209, + "▁inhabit": 5210, + "▁sons": 5211, + "▁engage": 5212, + "▁uncom": 5213, + "▁deeply": 5214, + "▁substance": 5215, + "▁tale": 5216, + "▁tiny": 5217, + "▁dan": 5218, + "▁ga": 5219, + "▁bee": 5220, + "▁yards": 5221, + "icks": 5222, + "▁hastily": 5223, + "held": 5224, + "▁wes": 5225, + "▁vague": 5226, + "▁amuse": 5227, + "▁mud": 5228, + "▁wolf": 5229, + "▁hans": 5230, + "illing": 5231, + "▁supply": 5232, + "▁silk": 5233, + "▁constantly": 5234, + "▁christmas": 5235, + "▁million": 5236, + "▁whisper": 5237, + "▁mental": 5238, + "▁washing": 5239, + "verse": 5240, + "▁cloth": 5241, + "▁baron": 5242, + "▁corresp": 5243, + "▁nodded": 5244, + "▁correspond": 5245, + "ka": 5246, + "▁hell": 5247, + "▁gain": 5248, + "▁rust": 5249, + "▁obtain": 5250, + "▁unconscious": 5251, + "▁struggle": 5252, + "▁established": 5253, + "▁lawy": 5254, + "ols": 5255, + "▁signs": 5256, + "▁uttered": 5257, + "▁roman": 5258, + "▁constitution": 5259, + "pes": 5260, + "▁cave": 5261, + "▁spare": 5262, + "▁quant": 5263, + "▁image": 5264, + "▁merry": 5265, + "▁treated": 5266, + "▁efforts": 5267, + "▁lonely": 5268, + "rated": 5269, + "▁nut": 5270, + "▁glanced": 5271, + "▁portion": 5272, + "itor": 5273, + "▁resemb": 5274, + "▁withd": 5275, + "▁mead": 5276, + "▁feast": 5277, + "▁prim": 5278, + "▁cliff": 5279, + "▁emer": 5280, + "▁proportion": 5281, + "▁consideration": 5282, + "▁haste": 5283, + "▁gaze": 5284, + "▁savage": 5285, + "▁crew": 5286, + "▁tower": 5287, + "▁lack": 5288, + "▁conscience": 5289, + "▁mercy": 5290, + "▁exha": 5291, + "▁consent": 5292, + "ators": 5293, + "urd": 5294, + "▁outl": 5295, + "▁clo": 5296, + "▁adop": 5297, + "▁amongst": 5298, + "▁hanging": 5299, + "▁circle": 5300, + "▁prepar": 5301, + "▁brilliant": 5302, + "fl": 5303, + "▁gained": 5304, + "▁row": 5305, + "▁troops": 5306, + "▁repro": 5307, + "▁ming": 5308, + "oul": 5309, + "▁dared": 5310, + "▁lion": 5311, + "▁joe": 5312, + "▁winds": 5313, + "▁bringing": 5314, + "▁anxiety": 5315, + "▁billy": 5316, + "▁consequence": 5317, + "fice": 5318, + "pse": 5319, + "▁fought": 5320, + "▁pred": 5321, + "▁scra": 5322, + "▁glim": 5323, + "▁victory": 5324, + "ped": 5325, + "▁rab": 5326, + "▁scot": 5327, + "▁obv": 5328, + "▁shock": 5329, + "chan": 5330, + "▁knock": 5331, + "ourse": 5332, + "▁handed": 5333, + "▁indul": 5334, + "▁patience": 5335, + "▁souther": 5336, + "▁jose": 5337, + "▁fever": 5338, + "▁rolled": 5339, + "icted": 5340, + "▁setting": 5341, + "▁profession": 5342, + "▁sylvia": 5343, + "▁hun": 5344, + "utions": 5345, + "▁feared": 5346, + "▁brand": 5347, + "▁boots": 5348, + "▁forehead": 5349, + "▁principles": 5350, + "▁sink": 5351, + "▁rig": 5352, + "aval": 5353, + "▁purch": 5354, + "▁gazed": 5355, + "▁employed": 5356, + "▁murmured": 5357, + "more": 5358, + "▁sar": 5359, + "ashing": 5360, + "ural": 5361, + "acles": 5362, + "▁trad": 5363, + "▁active": 5364, + "▁benef": 5365, + "▁bottle": 5366, + "▁rage": 5367, + "▁invest": 5368, + "▁lux": 5369, + "▁sank": 5370, + "▁hang": 5371, + "▁beard": 5372, + "ential": 5373, + "▁loving": 5374, + "▁native": 5375, + "▁instruct": 5376, + "▁waist": 5377, + "▁relation": 5378, + "▁discovery": 5379, + "▁melan": 5380, + "▁nervous": 5381, + "▁obtained": 5382, + "▁pig": 5383, + "▁sear": 5384, + "▁flag": 5385, + "▁trail": 5386, + "▁distinguished": 5387, + "▁stared": 5388, + "▁misery": 5389, + "▁print": 5390, + "▁guil": 5391, + "▁jumped": 5392, + "▁swim": 5393, + "▁approaching": 5394, + "▁suspicion": 5395, + "▁iv": 5396, + "▁managed": 5397, + "aker": 5398, + "▁teach": 5399, + "▁match": 5400, + "▁guilty": 5401, + "▁wretched": 5402, + "▁rum": 5403, + "▁compar": 5404, + "▁theory": 5405, + "▁sher": 5406, + "▁bree": 5407, + "▁kings": 5408, + "▁shone": 5409, + "atherine": 5410, + "▁throne": 5411, + "▁showing": 5412, + "aws": 5413, + "▁robin": 5414, + "▁embar": 5415, + "utation": 5416, + "▁woman's": 5417, + "▁addressed": 5418, + "▁protest": 5419, + "▁admiration": 5420, + "▁troubled": 5421, + "▁ugly": 5422, + "oom": 5423, + "erves": 5424, + "▁flung": 5425, + "▁subs": 5426, + "▁relie": 5427, + "▁thousands": 5428, + "nce": 5429, + "▁od": 5430, + "▁current": 5431, + "▁wooden": 5432, + "▁sacrifice": 5433, + "urity": 5434, + "cip": 5435, + "▁pear": 5436, + "▁farmer": 5437, + "▁needs": 5438, + "▁condem": 5439, + "▁member": 5440, + "▁bade": 5441, + "▁dancing": 5442, + "▁reasons": 5443, + "▁consult": 5444, + "▁swall": 5445, + "▁shadows": 5446, + "▁angel": 5447, + "▁nineteen": 5448, + "▁style": 5449, + "field": 5450, + "▁lan": 5451, + "▁manif": 5452, + "▁robert": 5453, + "▁grate": 5454, + "▁engine": 5455, + "▁wisdom": 5456, + "▁jesus": 5457, + "▁convent": 5458, + "▁preced": 5459, + "▁interests": 5460, + "▁trial": 5461, + "bor": 5462, + "iven": 5463, + "▁nest": 5464, + "▁exch": 5465, + "▁voy": 5466, + "▁illust": 5467, + "▁worship": 5468, + "▁adam": 5469, + "▁phr": 5470, + "▁principal": 5471, + "▁hit": 5472, + "▁spend": 5473, + "▁stands": 5474, + "▁respons": 5475, + "▁ay": 5476, + "▁haw": 5477, + "▁whist": 5478, + "▁arrest": 5479, + "▁kinds": 5480, + "▁require": 5481, + "▁described": 5482, + "▁lit": 5483, + "▁precise": 5484, + "▁proposed": 5485, + "▁produce": 5486, + "▁utterly": 5487, + "ulse": 5488, + "▁novel": 5489, + "▁blame": 5490, + "▁credit": 5491, + "▁pause": 5492, + "osen": 5493, + "▁household": 5494, + "▁armed": 5495, + "▁follows": 5496, + "upon": 5497, + "▁approach": 5498, + "▁ninety": 5499, + "▁pir": 5500, + "▁flore": 5501, + "ivity": 5502, + "▁refuse": 5503, + "▁sensible": 5504, + "choly": 5505, + "▁national": 5506, + "▁grie": 5507, + "▁reven": 5508, + "▁let's": 5509, + "▁delightful": 5510, + "▁extremely": 5511, + "▁melancholy": 5512, + "uing": 5513, + "▁enorm": 5514, + "cles": 5515, + "▁slightly": 5516, + "▁sacred": 5517, + "▁recognized": 5518, + "▁mystery": 5519, + "▁gri": 5520, + "▁compre": 5521, + "▁distress": 5522, + "▁warri": 5523, + "▁useless": 5524, + "▁trif": 5525, + "▁mounted": 5526, + "▁philip": 5527, + "▁energy": 5528, + "▁explanation": 5529, + "▁cas": 5530, + "atory": 5531, + "▁pour": 5532, + "▁ric": 5533, + "▁chosen": 5534, + "▁everyone": 5535, + "umbled": 5536, + "▁apr": 5537, + "▁cam": 5538, + "▁proc": 5539, + "▁resumed": 5540, + "▁appreci": 5541, + "▁alexand": 5542, + "▁aven": 5543, + "▁wing": 5544, + "▁intense": 5545, + "▁highly": 5546, + "▁lucy": 5547, + "▁solid": 5548, + "▁departure": 5549, + "▁agreeable": 5550, + "▁exercise": 5551, + "apped": 5552, + "▁ward": 5553, + "▁bud": 5554, + "▁dwell": 5555, + "icate": 5556, + "▁dece": 5557, + "▁teacher": 5558, + "tending": 5559, + "▁max": 5560, + "▁request": 5561, + "▁unexpected": 5562, + "▁joseph": 5563, + "col": 5564, + "▁leap": 5565, + "▁victim": 5566, + "▁sighed": 5567, + "▁forces": 5568, + "chie": 5569, + "▁feed": 5570, + "▁sport": 5571, + "▁drift": 5572, + "▁wedding": 5573, + "▁british": 5574, + "sec": 5575, + "▁attitude": 5576, + "▁vision": 5577, + "▁pipe": 5578, + "▁tow": 5579, + "▁halt": 5580, + "▁manners": 5581, + "▁tend": 5582, + "▁flood": 5583, + "▁commission": 5584, + "▁guide": 5585, + "▁observe": 5586, + "▁concern": 5587, + "▁rush": 5588, + "▁affected": 5589, + "fall": 5590, + "▁stret": 5591, + "▁coach": 5592, + "▁poison": 5593, + "▁directed": 5594, + "▁medic": 5595, + "▁gest": 5596, + "▁echo": 5597, + "▁younger": 5598, + "▁confusion": 5599, + "▁continue": 5600, + "▁parli": 5601, + "▁absor": 5602, + "▁centre": 5603, + "conom": 5604, + "▁horrible": 5605, + "rison": 5606, + "▁bol": 5607, + "▁bath": 5608, + "▁gown": 5609, + "▁bye": 5610, + "▁aloud": 5611, + "▁suppl": 5612, + "▁profound": 5613, + "▁err": 5614, + "▁cheerful": 5615, + "worth": 5616, + "▁sentence": 5617, + "▁mistaken": 5618, + "▁torn": 5619, + "▁figures": 5620, + "▁accompanied": 5621, + "▁catherine": 5622, + "▁econom": 5623, + "▁atm": 5624, + "▁shaking": 5625, + "umber": 5626, + "▁council": 5627, + "lot": 5628, + "▁asce": 5629, + "ilities": 5630, + "▁spar": 5631, + "▁ends": 5632, + "▁straw": 5633, + "▁knights": 5634, + "▁atmosp": 5635, + "▁shade": 5636, + "▁brow": 5637, + "▁spark": 5638, + "▁rested": 5639, + "▁sentiment": 5640, + "▁recovered": 5641, + "▁subjects": 5642, + "▁duties": 5643, + "▁composed": 5644, + "▁swept": 5645, + "▁reality": 5646, + "▁singular": 5647, + "▁transp": 5648, + "▁locked": 5649, + "▁louis": 5650, + "▁assistance": 5651, + "▁wake": 5652, + "rem": 5653, + "▁sovere": 5654, + "▁unp": 5655, + "▁loves": 5656, + "▁absurd": 5657, + "▁souls": 5658, + "▁immediate": 5659, + "▁riding": 5660, + "▁connection": 5661, + "▁cheek": 5662, + "▁magnificent": 5663, + "▁ere": 5664, + "▁sugar": 5665, + "▁plans": 5666, + "▁prud": 5667, + "▁dise": 5668, + "▁adj": 5669, + "▁leaning": 5670, + "▁surrounded": 5671, + "▁we've": 5672, + "▁orn": 5673, + "▁roll": 5674, + "▁proble": 5675, + "▁strict": 5676, + "▁awake": 5677, + "▁praise": 5678, + "▁convinced": 5679, + "▁rele": 5680, + "▁frame": 5681, + "▁breaking": 5682, + "▁curtain": 5683, + "▁stayed": 5684, + "▁divided": 5685, + "▁craw": 5686, + "▁inclined": 5687, + "▁previous": 5688, + "ault": 5689, + "omen": 5690, + "▁stair": 5691, + "▁sees": 5692, + "▁pron": 5693, + "board": 5694, + "▁complex": 5695, + "▁prayer": 5696, + "▁pierre": 5697, + "▁unfortunate": 5698, + "gs": 5699, + "▁genius": 5700, + "▁increase": 5701, + "▁sufficiently": 5702, + "▁banks": 5703, + "▁revolution": 5704, + "▁southern": 5705, + "ki": 5706, + "oke": 5707, + "▁aust": 5708, + "edy": 5709, + "▁ling": 5710, + "▁countess": 5711, + "▁sleeping": 5712, + "▁devoted": 5713, + "▁utmost": 5714, + "▁market": 5715, + "▁bosom": 5716, + "▁bark": 5717, + "▁cath": 5718, + "alt": 5719, + "char": 5720, + "▁clock": 5721, + "▁handker": 5722, + "▁admin": 5723, + "▁senses": 5724, + "▁ident": 5725, + "▁midnight": 5726, + "▁connected": 5727, + "▁permitted": 5728, + "▁hid": 5729, + "▁fil": 5730, + "▁faced": 5731, + "▁gift": 5732, + "▁chat": 5733, + "▁brid": 5734, + "▁norther": 5735, + "▁horiz": 5736, + "▁college": 5737, + "▁handkerchief": 5738, + "isions": 5739, + "▁rebe": 5740, + "▁polic": 5741, + "▁announced": 5742, + "ounce": 5743, + "▁nons": 5744, + "▁nurs": 5745, + "ales": 5746, + "▁fleet": 5747, + "▁ragged": 5748, + "▁coffe": 5749, + "▁parties": 5750, + "▁delay": 5751, + "▁sounded": 5752, + "▁cities": 5753, + "▁wash": 5754, + "▁appointed": 5755, + "▁nights": 5756, + "▁instit": 5757, + "▁god's": 5758, + "▁striking": 5759, + "▁guns": 5760, + "▁astonishment": 5761, + "▁merchant": 5762, + "▁parliament": 5763, + "nal": 5764, + "▁ax": 5765, + "atched": 5766, + "▁pil": 5767, + "▁page": 5768, + "iform": 5769, + "▁plate": 5770, + "▁thirst": 5771, + "▁negro": 5772, + "▁ruin": 5773, + "▁inhabitants": 5774, + "win": 5775, + "arf": 5776, + "▁rib": 5777, + "▁addition": 5778, + "▁argument": 5779, + "bour": 5780, + "▁tad": 5781, + "▁scen": 5782, + "▁guests": 5783, + "▁wondering": 5784, + "▁acquainted": 5785, + "▁intent": 5786, + "pless": 5787, + "▁destroyed": 5788, + "▁coffee": 5789, + "inent": 5790, + "lebr": 5791, + "▁render": 5792, + "▁sob": 5793, + "▁demon": 5794, + "▁desir": 5795, + "uding": 5796, + "▁gets": 5797, + "▁assure": 5798, + "▁raise": 5799, + "▁sharply": 5800, + "▁privile": 5801, + "▁alarm": 5802, + "▁machine": 5803, + "fied": 5804, + "▁contract": 5805, + "▁deliber": 5806, + "▁drown": 5807, + "▁afterward": 5808, + "▁guest": 5809, + "▁conclusion": 5810, + "▁risk": 5811, + "▁ignorant": 5812, + "bury": 5813, + "kind": 5814, + "▁pian": 5815, + "an's": 5816, + "uries": 5817, + "▁soil": 5818, + "▁refer": 5819, + "▁commanded": 5820, + "▁practical": 5821, + "▁toss": 5822, + "▁offe": 5823, + "▁beheld": 5824, + "▁arist": 5825, + "▁quarters": 5826, + "▁degrees": 5827, + "▁fisher": 5828, + "▁nonsense": 5829, + "▁mc": 5830, + "isp": 5831, + "▁mechan": 5832, + "keep": 5833, + "▁doubtless": 5834, + "▁violence": 5835, + "▁neglect": 5836, + "▁folk": 5837, + "liness": 5838, + "▁bul": 5839, + "▁easter": 5840, + "▁loft": 5841, + "▁contained": 5842, + "▁reflection": 5843, + "▁celebr": 5844, + "▁leaf": 5845, + "▁concluded": 5846, + "▁district": 5847, + "iation": 5848, + "rs": 5849, + "▁scient": 5850, + "▁he'd": 5851, + "▁scorn": 5852, + "▁crack": 5853, + "▁steep": 5854, + "▁muttered": 5855, + "▁establish": 5856, + "▁darling": 5857, + "▁andrew": 5858, + "▁chim": 5859, + "quis": 5860, + "▁quality": 5861, + "▁polly": 5862, + "▁check": 5863, + "▁craft": 5864, + "▁travell": 5865, + "▁universal": 5866, + "inate": 5867, + "▁cig": 5868, + "atives": 5869, + "omp": 5870, + "uten": 5871, + "▁jac": 5872, + "▁job": 5873, + "▁subm": 5874, + "▁reader": 5875, + "▁leis": 5876, + "▁emph": 5877, + "▁surround": 5878, + "ox": 5879, + "pent": 5880, + "itate": 5881, + "▁extended": 5882, + "▁lev": 5883, + "▁overt": 5884, + "▁retired": 5885, + "▁puzz": 5886, + "uable": 5887, + "▁libr": 5888, + "▁chin": 5889, + "▁spl": 5890, + "▁realized": 5891, + "▁causes": 5892, + "▁punishment": 5893, + "▁physic": 5894, + "▁leisure": 5895, + "can": 5896, + "▁wave": 5897, + "▁shake": 5898, + "▁charm": 5899, + "▁belonged": 5900, + "mber": 5901, + "▁bones": 5902, + "▁gas": 5903, + "▁range": 5904, + "▁prec": 5905, + "▁smell": 5906, + "▁maybe": 5907, + "▁invited": 5908, + "▁troubles": 5909, + "▁tables": 5910, + "anch": 5911, + "icip": 5912, + "▁june": 5913, + "▁abo": 5914, + "▁ages": 5915, + "▁anywhere": 5916, + "ffin": 5917, + "▁drunk": 5918, + "▁properly": 5919, + "▁local": 5920, + "▁improve": 5921, + "▁atmosphere": 5922, + "▁dir": 5923, + "▁he'll": 5924, + "▁reb": 5925, + "▁rang": 5926, + "▁compass": 5927, + "▁lieuten": 5928, + "▁leaned": 5929, + "▁firmly": 5930, + "▁nations": 5931, + "▁hay": 5932, + "▁wept": 5933, + "▁ral": 5934, + "▁conven": 5935, + "▁uniform": 5936, + "▁julia": 5937, + "eem": 5938, + "rass": 5939, + "▁track": 5940, + "▁commer": 5941, + "▁bushes": 5942, + "▁obsc": 5943, + "▁sorts": 5944, + "▁difficulties": 5945, + "▁intellectual": 5946, + "▁introduced": 5947, + "mith": 5948, + "▁tro": 5949, + "iday": 5950, + "▁rendered": 5951, + "▁rout": 5952, + "add": 5953, + "▁plun": 5954, + "▁throwing": 5955, + "▁humble": 5956, + "▁polite": 5957, + "▁numerous": 5958, + "▁movements": 5959, + "▁successful": 5960, + "▁candle": 5961, + "▁separate": 5962, + "▁protection": 5963, + "▁thomas": 5964, + "▁enormous": 5965, + "▁unb": 5966, + "▁repub": 5967, + "▁sunsh": 5968, + "▁descended": 5969, + "▁unusual": 5970, + "ived": 5971, + "▁blaz": 5972, + "▁shows": 5973, + "▁simpl": 5974, + "▁cattle": 5975, + "▁crept": 5976, + "▁astonished": 5977, + "▁deserted": 5978, + "▁lap": 5979, + "arse": 5980, + "▁nearest": 5981, + "udes": 5982, + "▁entering": 5983, + "▁ideal": 5984, + "standing": 5985, + "nders": 5986, + "▁sore": 5987, + "aine": 5988, + "▁clos": 5989, + "▁ours": 5990, + "▁wherever": 5991, + "▁term": 5992, + "▁visited": 5993, + "▁calcul": 5994, + "ds": 5995, + "▁base": 5996, + "▁gates": 5997, + "▁stamp": 5998, + "▁liber": 5999, + "▁official": 6000, + "▁erect": 6001, + "▁alt": 6002, + "elia": 6003, + "▁harmon": 6004, + "▁painful": 6005, + "▁burned": 6006, + "▁republic": 6007, + "uer": 6008, + "▁lately": 6009, + "▁ital": 6010, + "amm": 6011, + "▁tear": 6012, + "▁actions": 6013, + "▁final": 6014, + "▁startled": 6015, + "▁sensation": 6016, + "▁fatal": 6017, + "olic": 6018, + "▁flash": 6019, + "▁appet": 6020, + "▁stronger": 6021, + "▁numbers": 6022, + "▁gratitude": 6023, + "▁female": 6024, + "▁western": 6025, + "lest": 6026 + }, + "merges": [ + "▁ t", + "h e", + "▁ a", + "▁t he", + "i n", + "▁ s", + "▁ w", + "▁ o", + "r e", + "n d", + "▁ b", + "▁ h", + "e r", + "▁ m", + "▁ i", + "o u", + "▁ c", + "▁ f", + "a t", + "e d", + "▁a nd", + "e n", + "▁t o", + "▁o f", + "o n", + "i s", + "▁ d", + "in g", + "▁t h", + "▁ p", + "▁ he", + "o r", + "▁ l", + "e s", + "▁ in", + "l l", + "i t", + "a r", + "a s", + "a n", + "▁ n", + "▁ g", + "o m", + "▁b e", + "▁h a", + "▁ e", + "l e", + "o t", + "▁ y", + "u t", + "o w", + "i c", + "▁w h", + "▁i t", + "l d", + "v e", + "▁th at", + "l y", + "▁w as", + "i d", + "s e", + "s t", + "▁o n", + "g h", + "en t", + "▁ re", + "▁y ou", + "i m", + "c e", + "▁ u", + "v er", + "i on", + "▁a s", + "e t", + "▁f or", + "a y", + "▁h is", + "▁w e", + "it h", + "a l", + "i r", + "▁ r", + "▁w ith", + "▁s t", + "a d", + "u r", + "gh t", + "▁a n", + "▁he r", + "▁n ot", + "▁i s", + "▁ha d", + "t er", + "he r", + "a c", + "a m", + "▁a t", + "o o", + "▁b ut", + "ou ld", + "▁s he", + "▁ k", + "▁s e", + "▁s a", + "▁s h", + "▁f r", + "▁h im", + "▁s o", + "▁m e", + "i ll", + "a in", + "▁s u", + "i ght", + "c h", + "re d", + "c t", + "a ll", + "r o", + "k e", + "es s", + "i l", + "' s", + "o re", + "▁d e", + "▁m y", + "▁the y", + "▁w he", + "▁a ll", + "ic h", + "▁n e", + "r i", + "▁b y", + "▁ha ve", + "om e", + "p p", + "▁th is", + "▁l i", + "▁d o", + "▁c on", + "u s", + "▁wh ich", + "▁c h", + "u l", + "q u", + "▁ j", + "▁u p", + "▁sa id", + "▁fr om", + "ar d", + "g e", + "▁o r", + "▁ v", + "▁on e", + "▁n o", + "t h", + "▁e x", + "▁we re", + "▁the re", + "p e", + "a nd", + "es t", + "▁m an", + "▁wh o", + "b le", + "i e", + "▁a l", + "an t", + "re s", + "ou s", + "u st", + "ver y", + "at ion", + "▁f e", + "▁the m", + "l f", + "▁whe n", + "n t", + "am e", + "in d", + "r a", + "▁g o", + "er s", + "as t", + "f e", + "oo d", + "▁k n", + "▁in t", + "is t", + "▁a re", + "ar t", + "ou t", + "▁w ould", + "▁l e", + "▁wh at", + "o s", + "▁the ir", + "on g", + "ou r", + "▁i f", + "▁c om", + "ou nd", + "▁a b", + "▁o ut", + "▁w or", + "e m", + "▁w ill", + "a k", + "▁m is", + "at e", + "o l", + "u m", + "u n", + "it t", + "ou gh", + "k ed", + "i g", + "a p", + "on e", + "▁be en", + "ow n", + "i ve", + "▁the n", + "▁b r", + "v en", + "i f", + "▁a r", + "' t", + "se lf", + "▁t r", + "▁p l", + "▁r o", + "▁p r", + "t her", + "re at", + "▁u n", + "▁a f", + "▁s p", + "▁ qu", + "▁p ro", + "it y", + "he d", + "▁t w", + "▁a g", + "▁c ould", + "o st", + "a ce", + "or t", + "u re", + "a ke", + "▁a m", + "ac k", + "▁an y", + "▁s ome", + "▁you r", + "▁m ore", + "▁c an", + "a u", + "▁t im", + "e p", + "a g", + "▁ en", + "c k", + "▁int o", + "▁c l", + "r y", + "▁n ow", + "h ing", + "nd er", + "a re", + "▁ very", + "▁g r", + "e l", + "o se", + "▁l oo", + "▁b o", + "v ed", + "o p", + "▁o ther", + "▁d id", + "an ce", + "▁th an", + "itt le", + "▁l ittle", + "in e", + "i es", + "w ay", + "it e", + "▁li ke", + "id e", + "▁l o", + "as s", + "▁b l", + "a ble", + "ur n", + "ou ght", + "▁kn ow", + "ot her", + "▁tim e", + "▁i m", + "▁d is", + "▁u s", + "▁c o", + "f ore", + "▁h ow", + "▁t e", + "en ce", + "▁d ay", + "▁a d", + "ad e", + "ic e", + "▁ab out", + "▁se e", + "▁o ver", + "p t", + "c c", + "▁to o", + "in k", + "▁f l", + "w n", + "▁g reat", + "▁af ter", + "p l", + "d e", + "▁p er", + "m ent", + "▁ag ain", + "▁up on", + "▁ha nd", + "a b", + "▁h as", + "re e", + "is h", + "c i", + "▁on ly", + "all y", + "▁we ll", + "▁sh ould", + "▁p o", + "▁m ar", + "res s", + "▁s ay", + "▁g ood", + "at her", + "▁tw o", + "ing s", + "▁p e", + "ou nt", + "▁o ur", + "i re", + "v ing", + "▁d own", + "ar s", + "er t", + "w e", + "▁be fore", + "i le", + "v es", + "▁a pp", + "▁e very", + "▁it s", + "▁o ld", + "▁th r", + "▁m u", + "▁m ade", + "i ed", + "ic k", + "▁l ong", + "a ge", + "t e", + "f t", + "▁whe re", + "an g", + "▁ne ver", + "▁m ust", + "▁p re", + "▁s m", + "f ul", + "▁su ch", + "u ll", + "▁st r", + "ion s", + "▁of f", + "▁s c", + "▁c ame", + "i ous", + "u e", + "▁mis s", + "w ard", + "i ld", + "▁f ir", + "▁e ven", + "▁u nder", + "ac t", + "▁the se", + "▁c ome", + "▁p art", + "▁f o", + "at ed", + "n ess", + "▁re m", + "or d", + "▁be c", + "t y", + "▁m ay", + "▁mu ch", + "▁th ink", + "p er", + "▁w ay", + "▁mis ter", + "l ed", + "▁l et", + "or n", + "▁e y", + "▁g l", + "▁con t", + "▁th ought", + "▁loo k", + "e ct", + "▁s pe", + "is e", + "▁b ack", + "▁be t", + "ad y", + "▁y e", + "an s", + "ac h", + "▁he re", + "▁j ust", + "re n", + "▁fir st", + "▁h o", + "▁o wn", + "▁d es", + "▁o b", + "ri ed", + "u d", + "ar y", + "▁w ent", + "▁m o", + "▁him self", + "▁m en", + "a ir", + "c l", + "a ve", + "at h", + "f f", + "▁s l", + "c o", + "on 't", + "ll ow", + "▁c r", + "▁re s", + "▁i '", + "▁m ight", + "i ly", + "▁se em", + "in t", + "i p", + "▁be g", + "ou se", + "an c", + "n 't", + "▁w at", + "▁thr ough", + "▁com p", + "b er", + "▁a way", + "▁c ar", + "▁e m", + "▁g et", + "▁im p", + "▁he ad", + "os s", + "▁li fe", + "▁be l", + "▁with out", + "▁m ost", + "▁p ass", + "▁m ake", + "▁con s", + "en ed", + "▁s om", + "▁t urn", + "a v", + "n g", + "▁sh all", + "▁a cc", + "▁th ose", + "▁p res", + "▁ey es", + "▁h ouse", + "i z", + "▁som et", + "▁j o", + "▁st ill", + "▁c all", + "▁n ight", + "he s", + "▁o p", + "au se", + "▁w om", + "▁l ast", + "k s", + "l ess", + "a red", + "▁com m", + "▁d on't", + "▁te ll", + "▁ ent", + "▁not hing", + "▁ne w", + "ig n", + "▁t ake", + "▁be ing", + "▁man y", + "▁wor d", + "on s", + "▁f ound", + "▁re t", + "as e", + "▁e ar", + "▁wh ile", + "▁at t", + "or y", + "i x", + "▁s er", + "▁sa w", + "▁p ut", + "n e", + "ot h", + "ie nd", + "▁pe op", + "▁w r", + "▁you ng", + "ar k", + "d y", + "ak ing", + "l es", + "▁c ount", + "▁on ce", + "▁fr iend", + "▁l a", + "en s", + "▁peop le", + "pe ct", + "or s", + "fe ct", + "▁m at", + "in ce", + "i ble", + "e red", + "▁ro om", + "▁th ree", + "▁y et", + "a il", + "▁s ame", + "▁f ather", + "▁r ight", + "▁ch ild", + "▁c our", + "i gh", + "▁pl ace", + "▁an other", + "ul t", + "i v", + "it ion", + "▁in d", + "▁w ant", + "▁th ough", + "▁n or", + "▁f ar", + "▁k ing", + "▁ha pp", + "▁he art", + "▁f ace", + "▁e nd", + "▁e ver", + "▁n at", + "th ing", + "▁lo ve", + "g et", + "▁too k", + "▁d ist", + "e ver", + "i an", + "▁h u", + "e w", + "▁ar m", + "▁in st", + "m an", + "▁wor k", + "▁l ight", + "▁ch ar", + "▁p le", + "ic t", + "▁s et", + "▁a c", + "▁loo ked", + "▁miss us", + "▁as ked", + "▁m ind", + "▁y es", + "▁su pp", + "▁int e", + "▁re p", + "c ess", + "ent ly", + "▁le ft", + "g g", + "ert ain", + "▁k e", + "is hed", + "u b", + "▁p ers", + "way s", + "▁th ings", + "al k", + "ir l", + "▁m om", + "▁s ir", + "▁w a", + "▁mom ent", + "ation s", + "▁s at", + "se l", + "▁f ind", + "ow er", + "i a", + "v ent", + "re w", + "▁wor ld", + "j ect", + "▁g ive", + "▁c ap", + "▁wh y", + "s o", + "▁g u", + "▁m other", + "▁g en", + "▁s w", + "▁al ways", + "d er", + "l t", + "l ing", + "▁an s", + "pp ed", + "▁so on", + "▁a ct", + "▁for m", + "▁e l", + "d d", + "▁he ard", + "re t", + "▁th ing", + "▁somet hing", + "▁seem ed", + "▁su b", + "▁do or", + "an ge", + "▁g irl", + "c ed", + "▁app e", + "it her", + "▁w ind", + "▁bec ause", + "▁d if", + "▁m on", + "s s", + "▁go ing", + "▁to ld", + "or m", + "▁h ome", + "ain ed", + "▁g ot", + "▁w ar", + "▁go d", + "au ght", + "▁g i", + "▁en g", + "▁s ur", + "n ing", + "▁hand s", + "▁wom an", + "▁fo llow", + "l and", + "a ut", + "▁v o", + "▁fe el", + "▁re l", + "▁p oss", + "c hed", + "ic al", + "p le", + "p h", + "▁bo y", + "▁ret urn", + "▁re g", + "▁re st", + "oo k", + "▁kn ew", + "n er", + "▁e ach", + "▁o h", + "▁s il", + "▁k ind", + "▁ex p", + "▁m a", + "▁c le", + "▁he l", + "i ver", + "t ing", + "▁de l", + "u al", + "▁in f", + "▁as s", + "▁wat er", + "▁con f", + "▁b re", + "▁w o", + "ce pt", + "▁bel ie", + "▁c ertain", + "▁again st", + "▁h ard", + "▁p h", + "r ow", + "▁u nt", + "▁ye ars", + "▁qu ite", + "▁s ide", + "in ess", + "in ed", + "▁ne ar", + "▁h or", + "ter s", + "i red", + "oo l", + "▁f our", + "▁fe w", + "▁d one", + "i er", + "▁c he", + "re st", + "it ed", + "m ost", + "▁bet ter", + "▁ha lf", + "▁m in", + "▁t re", + "p s", + "▁al so", + "▁c are", + "o ck", + "u ck", + "ou b", + "▁beg an", + "ull y", + "▁en ough", + "is ed", + "r u", + "▁ha ving", + "▁se en", + "▁gen er", + "▁l ady", + "▁d ra", + "▁h um", + "ap s", + "ot t", + "▁p ur", + "ak en", + "ro ss", + "y ing", + "▁t er", + "▁h our", + "▁in de", + "an k", + "▁call ed", + "i al", + "as on", + "▁be h", + "▁do es", + "▁who le", + "▁m orn", + "▁turn ed", + "▁ple as", + "▁st e", + "▁re f", + "▁g ave", + "en se", + "▁o cc", + "i b", + "▁cour se", + "▁in s", + "re am", + "get her", + "ut h", + "▁b oth", + "▁s ou", + "▁c ur", + "▁ad d", + "e en", + "▁c ol", + "▁re ad", + "we en", + "sel ves", + "▁am ong", + "▁bet ween", + "▁in c", + "▁ke ep", + "▁be aut", + "ul ar", + "▁po or", + "▁it 's", + "▁su re", + "▁morn ing", + "▁wh ite", + "g ed", + "▁n ame", + "▁de ar", + "▁to ward", + "ut e", + "▁sm all", + "▁wh om", + "▁re pl", + "▁s k", + "▁l ar", + "▁fe lt", + "b o", + "os ed", + "at ing", + "▁my self", + "▁op en", + "▁s ix", + "▁her self", + "▁how ever", + "▁b u", + "o nd", + "ain t", + "x t", + "▁f ore", + "▁in ter", + "▁e v", + "▁h igh", + "ct ion", + "▁hu nd", + "▁st ood", + "▁hund red", + "as ter", + "▁t ra", + "▁sh ow", + "▁s ent", + "i fe", + "▁r ound", + "▁s im", + "▁d r", + "▁g ra", + "▁word s", + "▁day s", + "▁al most", + "a le", + "ve l", + "▁po int", + "ent s", + "▁g re", + "▁e ight", + "c es", + "at es", + "dd en", + "▁f am", + "▁st and", + "▁b us", + "▁l and", + "▁ ed", + "▁me an", + "un g", + "h aps", + "▁su n", + "u res", + "▁s ince", + "i et", + "ir d", + "▁per haps", + "n ed", + "▁s le", + "is s", + "▁b est", + "▁su dden", + "▁d ark", + "▁repl ied", + "▁vo ice", + "▁m et", + "▁any thing", + "▁t ill", + "▁under st", + "▁b ar", + "it s", + "▁unt il", + "in s", + "ou d", + "▁bl ack", + "▁b ro", + "▁he ar", + "▁look ing", + "▁c ried", + "▁you '", + "▁f act", + "am p", + "▁pr in", + "▁l ess", + "▁l ay", + "▁ne xt", + "▁la w", + "u p", + "▁p ower", + "▁pro p", + "n ot", + "re nt", + "▁br ought", + "ate ly", + "ent y", + "▁count ry", + "▁hel p", + "al s", + "▁qu est", + "m ed", + "▁u se", + "▁v is", + "▁s n", + "▁i' m", + "f ully", + "▁sp o", + "▁to gether", + "▁ne ed", + "▁a ir", + "▁ad v", + "▁pers on", + "▁inde ed", + "▁cont in", + "▁un c", + "one y", + "▁g ent", + "▁pres ent", + "▁a w", + "▁p ar", + "ow s", + "u red", + "▁f ull", + "t ain", + "▁r un", + "▁r ather", + "▁i de", + "▁co nd", + "nd ed", + "▁l at", + "▁s y", + "b e", + "d u", + "▁h ar", + "▁fe et", + "▁f in", + "et er", + "▁f all", + "ce i", + "▁f ive", + "▁m il", + "▁b ed", + "o c", + "▁do ct", + "▁inte rest", + "ress ed", + "▁mat ter", + "▁l ord", + "▁g one", + "▁ es", + "f ort", + "▁de ath", + "▁w ife", + "▁ser v", + "▁p at", + "er ing", + "oub t", + "▁ad m", + "▁t alk", + "▁t aken", + "▁ar t", + "▁t ri", + "▁other s", + "▁ho pe", + "as h", + "a z", + "▁ex t", + "▁can not", + "ie f", + "▁spe ak", + "▁l au", + "▁them selves", + "▁al ong", + "▁d ire", + "o ve", + "m b", + "p r", + "▁b es", + "▁c ou", + "▁m or", + "t en", + "▁gent le", + "ur ing", + "▁f ire", + "▁lar ge", + "▁p ol", + "▁c at", + "▁s we", + "ent ion", + "ver s", + "▁th us", + "a pp", + "▁se c", + "▁pl ay", + "▁re al", + "▁pr om", + "ment s", + "we red", + "ie ld", + "ain s", + "is on", + "ac hed", + "▁th ou", + "▁re ason", + "▁th ous", + "it ing", + "▁br other", + "ak es", + "▁thous and", + "on t", + "▁m oney", + "▁rem em", + "▁de p", + "▁ans wered", + "▁tr ue", + "▁child ren", + "▁beh ind", + "o y", + "▁s ound", + "ant s", + "ab ly", + "▁w ood", + "us ed", + "▁de c", + "▁who se", + "o d", + "▁e le", + "▁tw enty", + "▁r a", + "it u", + "▁belie ve", + "▁wo nder", + "en e", + "▁in v", + "▁h on", + "ar ing", + "s h", + "u ed", + "▁su ff", + "▁o pp", + "▁d oubt", + "▁re c", + "t on", + "▁ho ld", + "▁dif fe", + "▁pass ed", + "▁c or", + "m e", + "id ed", + "it ies", + "▁m er", + "▁s ing", + "▁nat ure", + "▁al one", + "▁de ad", + "▁p ri", + "k en", + "l ic", + "▁re d", + "▁b ur", + "ac es", + "▁cl ose", + "▁go ld", + "▁st art", + "▁h ur", + "▁f ur", + "o g", + "anc es", + "▁as k", + "▁doct or", + "▁s on", + "▁gr ound", + "w er", + "et s", + "▁se a", + "▁str ong", + "▁le ave", + "▁comp an", + "▁i' ll", + "er y", + "c y", + "ill ed", + "ep t", + "id es", + "t le", + "▁c e", + "▁ob s", + "bo dy", + "▁fe ll", + "▁s ign", + "co nd", + "▁m ount", + "▁f air", + "▁gi ven", + "▁there fore", + "an e", + "▁i r", + "▁de ep", + "if ul", + "f ic", + "y s", + "▁of ten", + "▁bo dy", + "u nt", + "▁sh ort", + "▁t em", + "▁f a", + "▁m aster", + "▁ear th", + "▁p ap", + "ce ed", + "▁st re", + "▁se cond", + "▁for t", + "b ed", + "g th", + "ow ed", + "▁hor se", + "id d", + "▁m ad", + "u ally", + "▁p a", + "▁ch r", + "▁or der", + "▁t en", + "ve red", + "▁con st", + "▁w ish", + "▁f if", + "▁e as", + "▁c ir", + "▁d ro", + "a im", + "he n", + "▁c a", + "▁re ally", + "re ad", + "cei ved", + "▁i ll", + "▁fe ar", + "os ition", + "▁underst and", + "▁sp ir", + "▁l ist", + "▁ab s", + "▁sp r", + "ac ed", + "▁quest ion", + "ang er", + "▁every thing", + "aught er", + "▁af f", + "▁w all", + "▁com ing", + "ch ing", + "re ady", + "id er", + "▁ab ove", + "▁pr ince", + "▁al ready", + "▁le ast", + "▁re co", + "▁ex pl", + "▁st ep", + "▁us ed", + "▁r u", + "▁it self", + "is ter", + "▁ne cess", + "▁c ase", + "▁ar ound", + "h n", + "▁sou l", + "▁sudden ly", + "g er", + "▁l ad", + "▁even ing", + "▁m ag", + "▁gener al", + "▁n um", + "im es", + "▁kn own", + "▁w al", + "▁qu ick", + "iz ed", + "▁m us", + "▁s ch", + "▁cap tain", + "▁that 's", + "if ic", + "▁whe ther", + "▁le ar", + "g n", + "▁with in", + "m en", + "▁li ve", + "ver n", + "▁tim es", + "▁ex pect", + "▁st ate", + "▁friend s", + "▁br ing", + "▁s ort", + "▁wom en", + "▁t able", + "▁me et", + "▁jo hn", + "▁cir c", + "▁su m", + "▁return ed", + "il ed", + "▁d ri", + "▁he ld", + "▁ex c", + "▁b ig", + "▁say s", + "▁per fect", + "▁le a", + "▁obs er", + "▁el se", + "▁d uring", + "id ent", + "▁h us", + "t ed", + "▁beaut iful", + "▁cle ar", + "▁e ither", + "▁to wn", + "▁s ight", + "▁l ost", + "▁sle ep", + "▁me ans", + "▁fo ot", + "▁c ut", + "▁c al", + "▁k ept", + "▁r an", + "i ence", + "▁pro f", + "te red", + "he re", + "et y", + "▁fe llow", + "▁can 't", + "▁m ist", + "▁p ast", + "▁d ream", + "ag es", + "▁bec ame", + "▁pre t", + "▁dis c", + "▁b ad", + "▁m aking", + "ut ion", + "▁ob ject", + "▁toward s", + "▁l ow", + "u ght", + "▁de v", + "▁hum an", + "▁man ner", + "▁str ange", + "▁ye ar", + "o ld", + "i ent", + "in es", + "▁se ver", + "m on", + "▁an n", + "air s", + "c hes", + "▁c ity", + "▁somet imes", + "' d", + "▁ro se", + "▁ est", + "il ity", + "▁w alk", + "▁re ady", + "▁p al", + "▁le g", + "▁ro ad", + "i ans", + "ci ous", + "▁c orn", + "▁th y", + "▁co ld", + "ll y", + "ious ly", + "l ish", + "▁st ra", + "m er", + "▁b at", + "ow ing", + "ie w", + "▁chr ist", + "▁s qu", + "▁tr uth", + "c ri", + "ll ed", + "▁th ir", + "▁did n't", + "b ert", + "▁so ci", + "b r", + "▁b it", + "▁sub ject", + "▁sh ip", + "▁m ur", + "▁app ro", + "▁p ie", + "▁ans wer", + "▁f ree", + "▁bus iness", + "▁ ut", + "a pe", + "▁appe ar", + "▁r iver", + "▁st o", + "▁c ast", + "▁fam ily", + "▁j ud", + "▁ex cl", + "▁let ter", + "ing ly", + "ri e", + "▁ha ir", + "ot e", + "▁arm s", + "▁bec ome", + "er n", + "ou ble", + "▁diffe rent", + "▁v al", + "f fect", + "▁nat ur", + "▁poss ible", + "▁sever al", + "▁f ine", + "a h", + "▁le ad", + "▁for g", + "▁exp ress", + "l i", + "▁su s", + "▁gl ad", + "o on", + "▁ar ri", + "▁bl ood", + "itt ing", + "▁qu iet", + "ren ce", + "▁ide a", + "▁a ble", + "itt ed", + "st er", + "▁char ac", + "▁beg in", + "▁ch ur", + "▁t ou", + "▁st ory", + "▁ey e", + "b and", + "at ive", + "▁gr and", + "▁cons ider", + "▁ac ross", + "▁p en", + "▁ex cept", + "▁f re", + "▁w in", + "▁e qu", + "et h", + "▁c ent", + "is f", + "▁part ic", + "▁dif fic", + "▁wind ow", + "▁sur pr", + "ll ect", + "▁pro v", + "▁dire ct", + "▁con c", + "e y", + "a w", + "▁go vern", + "▁dis co", + "▁w ild", + "▁do g", + "▁fl o", + "▁so ft", + "te en", + "▁c ross", + "as ed", + "▁e ffect", + "▁s or", + "▁long er", + "▁he n", + "▁follow ed", + "▁so ld", + "▁the e", + "▁p ub", + "▁hus band", + "ard s", + "ant ly", + "b y", + "▁a p", + "▁supp ose", + "▁res pect", + "t s", + "▁h ast", + "▁s al", + "▁comp le", + "▁he av", + "▁happ y", + "▁r ich", + "▁c reat", + "un e", + "▁t aking", + "▁re qu", + "▁st ay", + "▁spo ke", + "▁d aughter", + "▁we e", + "▁ ve", + "▁d u", + "▁gre en", + "▁an im", + "▁d in", + "' ll", + "▁b ird", + "al th", + "▁me re", + "▁g ard", + "n y", + "le y", + "▁poss ess", + "em pt", + "▁re ached", + "▁appe ared", + "o v", + "▁ex ist", + "in ation", + "▁pret ty", + "▁remem ber", + "▁he a", + "▁op ened", + "▁to m", + "ang ed", + "▁sl ow", + "▁im ag", + "▁i' ve", + "r act", + "▁say ing", + "k ing", + "ut es", + "▁comm on", + "▁occ as", + "▁b ook", + "▁r us", + "am es", + "ic es", + "▁br ight", + "m s", + "▁sat isf", + "▁s ense", + "▁f av", + "▁su cc", + "um p", + "is ing", + "▁l u", + "▁acc ord", + "ter n", + "▁bre ak", + "▁ex per", + "▁mon th", + "u se", + "▁de m", + "▁sc ar", + "▁contin ued", + "▁sec ret", + "▁chur ch", + "▁t ree", + "▁st ri", + "▁car ried", + "▁c ry", + "nd ing", + "▁spir it", + "▁want ed", + "er ic", + "▁certain ly", + "▁comm and", + "▁d est", + "▁mo ve", + "ou n", + "▁swe et", + "▁stre et", + "▁o ught", + "▁acc ount", + "▁de f", + "h am", + "▁pre p", + "▁s ens", + "▁es c", + "▁ro ck", + "ot s", + "▁de cl", + "▁pur p", + "ri age", + "ou th", + "ow ers", + "▁dra w", + "▁e at", + "▁b reat", + "▁charac ter", + "im e", + "c ul", + "med i", + "▁st ud", + "▁sch ool", + "itu de", + "▁hea ven", + "▁feel ing", + "▁s ad", + "▁reg ard", + "em ent", + "▁p ain", + "▁wor th", + "▁b ra", + "ne y", + "▁d ut", + "▁sm o", + "aim ed", + "▁tr ans", + "▁del ight", + "▁qu ar", + "▁h ung", + "▁m ot", + "▁bl ue", + "▁h ot", + "▁h ill", + "▁d iv", + "um b", + "▁dis app", + "▁mar g", + "▁lau gh", + "id ence", + "▁pro du", + "▁succ ess", + "ur y", + "s on", + "▁f ast", + "▁eng lish", + "▁d ress", + "▁h at", + "▁ter ri", + "▁p ort", + "▁ne ither", + "▁cour t", + "▁se ven", + "▁f ight", + "▁prin cess", + "▁li ved", + "▁v iew", + "▁im medi", + "▁se lf", + "▁v ar", + "▁hour s", + "▁m ill", + "▁so l", + "▁ex am", + "▁t ried", + "▁w on't", + "▁ent ered", + "▁dis p", + "t o", + "r ic", + "▁car ry", + "▁imp ort", + "▁an g", + "z e", + "on y", + "▁d anger", + "led ge", + "▁off ic", + "▁c ause", + "▁n one", + "▁for ward", + "▁unc le", + "▁to r", + "▁d et", + "as k", + "▁l en", + "▁fur ther", + "▁p ay", + "▁add ed", + "▁fr ont", + "r or", + "▁g e", + "▁partic ular", + "▁de al", + "▁pr ot", + "▁l ed", + "▁ac qu", + "▁pr ay", + "▁e ff", + "▁happ ened", + "▁ch ief", + "le ct", + "▁wal ked", + "▁lat er", + "▁jo y", + "i ar", + "d ay", + "▁or d", + "▁al th", + "▁com fort", + "▁pro b", + "▁ma j", + "▁af fect", + "▁pub lic", + "▁b ene", + "en ing", + "▁alth ough", + "g r", + "▁sh o", + "▁f ig", + "res h", + "▁f ail", + "u ct", + "u g", + "al ity", + "▁me m", + "▁seem s", + "▁your self", + "sh ip", + "e ad", + "i am", + "▁num ber", + "s ide", + "▁a h", + "▁do ing", + "▁li ving", + "are nt", + "▁des p", + "iz e", + "oo f", + "▁f ield", + "▁re ceived", + "▁sh ad", + "▁be y", + "▁bey ond", + "▁ph il", + "▁l ine", + "▁vis it", + "in ct", + "ri g", + "▁part y", + "▁gard en", + "▁j e", + "▁m outh", + "▁ha ll", + "▁qu een", + "▁bo at", + "▁be ar", + "▁am eric", + "is m", + "▁gentle man", + "▁v i", + "ir t", + "u ff", + "▁la id", + "ra id", + "▁occas ion", + "▁ent ire", + "▁a ge", + "▁s ister", + "▁cl ot", + "▁re pe", + "ous ly", + "▁pr ison", + "▁acc om", + "▁wh is", + "▁near ly", + "▁tre es", + "il ing", + "if f", + "▁eight een", + "b it", + "ward s", + "▁ear ly", + "▁t al", + "▁l ab", + "▁for th", + "m ing", + "on es", + "▁m ed", + "▁tr y", + "▁d a", + "il t", + "anc ed", + "▁prin ci", + "▁en em", + "▁think ing", + "▁ch ance", + "w here", + "▁c re", + "▁min utes", + "▁an x", + "▁mar y", + "▁p ict", + "▁wa it", + "▁v ill", + "▁st ren", + "▁af raid", + "▁cr ow", + "▁sm ile", + "▁l ate", + "▁eng land", + "▁pleas ure", + "▁a unt", + "▁new s", + "▁w is", + "▁f le", + "▁see ing", + "▁su per", + "▁fa ith", + "▁ro b", + "im ent", + "o int", + "▁b ill", + "ll ing", + "▁ne igh", + "▁tr ouble", + "▁sil ence", + "▁pl ain", + "▁there 's", + "are t", + "pe nd", + "▁excl aimed", + "ren ch", + "g y", + "▁mil es", + "p ly", + "▁gl ass", + "▁d rew", + "▁neigh b", + "el s", + "▁m ine", + "▁pr act", + "▁heav y", + "▁stand ing", + "▁se vent", + "▁sh ar", + "▁ch ange", + "▁necess ary", + "▁ch ap", + "▁purp ose", + "▁in qu", + "▁natur al", + "▁d eter", + "ic ked", + "▁b ott", + "▁hard ly", + "▁be ll", + "▁to p", + "▁c aught", + "fe red", + "w h", + "i ves", + "ound ed", + "▁a uth", + "▁circ um", + "▁f ing", + "▁sto pped", + "u c", + "▁w it", + "am ent", + "▁op in", + "▁a v", + "▁pri v", + "ain ing", + "▁inst ead", + "ru pt", + "▁g rew", + "▁lo ved", + "▁is land", + "▁kn ight", + "▁ag o", + "▁len gth", + "▁in n", + "▁pe ace", + "l s", + "in ary", + "i or", + "u es", + "▁th ird", + "us h", + "▁beaut y", + "▁h ig", + "▁he 's", + "t he", + "f orm", + "he ad", + "ic ally", + "as p", + "anc y", + "▁deter m", + "▁stra ight", + "▁c ra", + "in ing", + "pp er", + "l er", + "▁inf l", + "▁th or", + "▁con vers", + "▁bes ides", + "▁p osition", + "▁thir ty", + "▁d en", + "ra ge", + "▁att ention", + "m a", + "▁con v", + "ag er", + "▁his t", + "o red", + "▁com es", + "ag ed", + "▁for ce", + "▁s itting", + "▁ple ase", + "te nd", + "it er", + "▁what ever", + "▁inf orm", + "▁h op", + "▁ch air", + "▁bu ild", + "▁b ab", + "ust om", + "▁girl s", + "▁r om", + "▁f rench", + "▁str uck", + "▁p ull", + "▁a st", + "▁li e", + "▁wr ong", + "▁know ledge", + "▁gra ce", + "▁scar ce", + "g hed", + "▁res ol", + "▁wat ch", + "▁thought s", + "▁r id", + "▁att empt", + "▁fif ty", + "▁r ap", + "▁bo x", + "h ood", + "▁get ting", + "▁ ver", + "▁f at", + "▁compan y", + "▁ar r", + "▁crow d", + "▁b urn", + "▁sl ight", + "▁cl ass", + "▁sou th", + "▁d ie", + "▁ex act", + "▁dr ink", + "▁en j", + "▁th ick", + "▁din ner", + "▁sa ve", + "▁ma id", + "▁pl an", + "▁sa int", + "▁immedi ately", + "i ers", + "▁b orn", + "i us", + "▁re v", + "▁te ars", + "ist s", + "▁t reat", + "us ion", + "▁me ant", + "▁boy s", + "pp ing", + "▁slow ly", + "▁in cl", + "▁l im", + "▁d ied", + "ic ed", + "▁com pl", + "▁f ool", + "▁fore st", + "▁su gg", + "▁p ost", + "▁ac cept", + "▁res ult", + "▁auth or", + "nd on", + "ce ive", + "▁sugg est", + "ci ent", + "▁st one", + "▁fr ight", + "▁pap er", + "▁con se", + "▁j our", + "▁t y", + "▁en c", + "▁quick ly", + "▁cont r", + "▁you th", + "▁se nd", + "▁v ict", + "if ied", + "▁bel ong", + "▁war m", + "▁f ix", + "▁imp oss", + "▁bes ide", + "▁ er", + "▁to ne", + "▁c amp", + "▁des ire", + "▁b ound", + "▁m akes", + "▁marg aret", + "▁nor th", + "▁br own", + "▁mo on", + "▁li ps", + "▁pl aced", + "v al", + "▁circum st", + "▁f ood", + "▁f illed", + "ic s", + "if t", + "an n", + "▁lo ndon", + "▁dist ance", + "g ing", + "▁stren gth", + "▁i d", + "▁flo or", + "▁for get", + "▁ob l", + "▁m id", + "ri es", + "it ions", + "b s", + "▁spr ing", + "▁you' re", + "▁vi ol", + "▁j ack", + "▁po ck", + "oo ks", + "▁follow ing", + "▁s ac", + "▁rem ained", + "ar ch", + "▁gr ow", + "▁sn ow", + "▁govern ment", + "▁b all", + "▁h ors", + "▁n ar", + "ad ed", + "▁bro ken", + "▁lau ghed", + "▁des cri", + "▁sa fe", + "itt en", + "ive ly", + "▁prof ess", + "▁o '", + "am ed", + "▁dep art", + "▁eas y", + "ou red", + "▁u nd", + "▁cou n", + "▁than k", + "▁know s", + "▁wa iting", + "d om", + "at s", + "▁g er", + "▁v an", + "▁an ne", + "▁hors es", + "u gg", + "▁d read", + "▁un e", + "g es", + "ac y", + "▁pro ceed", + "▁g az", + "▁sh out", + "▁start ed", + "ent ed", + "▁comple te", + "o pe", + "▁g all", + "de red", + "▁w ide", + "i res", + "▁ne ck", + "as ure", + "ist ed", + "▁serv ice", + "▁pie ce", + "ci ally", + "en ces", + "▁sa il", + "▁pal ace", + "er v", + "▁gu ard", + "▁do ll", + "▁talk ing", + "▁man 's", + "▁li ft", + "▁gra ve", + "▁wee k", + "le t", + "▁imposs ible", + "▁eff ort", + "▁im m", + "▁arm y", + "we ll", + "▁diffic ult", + "u nd", + "▁f resh", + "▁f un", + "re me", + "▁st op", + "▁m ess", + "▁g ar", + "▁de g", + "▁inc re", + "▁corn er", + "▁soci ety", + "▁we ak", + "▁sh ut", + "▁h y", + "▁pro per", + "ac hing", + "▁cl oud", + "idd le", + "iv id", + "▁dem and", + "▁n ine", + "▁s it", + "▁reco gn", + "▁be at", + "us s", + "▁turn ing", + "▁sk y", + "▁opin ion", + "▁sing le", + "p ic", + "▁f ly", + "▁l ang", + "▁m ass", + "ce ll", + "▁out side", + "▁k iss", + "▁tr ust", + "▁occ up", + "▁ev il", + "▁bel ow", + "▁appear ance", + "u it", + "▁after n", + "▁gl o", + "▁g un", + "▁w est", + "en cy", + "p ar", + "▁show ed", + "▁convers ation", + "is es", + "▁con n", + "▁could n't", + "▁run ning", + "▁m ention", + "▁great er", + "▁mus ic", + "▁breat h", + "as es", + "▁n in", + "▁an t", + "are r", + "▁mor row", + "▁b ank", + "▁es pe", + "▁p eter", + "or k", + "ci al", + "▁pres ence", + "▁bat tle", + "▁win ter", + "he red", + "▁prob ably", + "▁clot hes", + "▁f ash", + "▁mar k", + "▁w ished", + "ve re", + "▁co ll", + "▁em b", + "▁kn e", + "▁mar ried", + "▁arri ved", + "▁p un", + "▁e vent", + "us hed", + "▁suff ic", + "▁e ager", + "▁form er", + "▁gi ving", + "▁p op", + "▁sa nd", + "▁ne g", + "▁us ual", + "▁rel ig", + "▁sim ple", + "▁sy m", + "it ation", + "▁g ro", + "or ies", + "▁mo ved", + "▁month s", + "▁spe aking", + "▁p et", + "▁sil ent", + "▁c ab", + "▁mount ain", + "▁express ion", + "g ar", + "▁co vered", + "▁hu nt", + "▁aftern oon", + "ap ed", + "▁occ ur", + "rie f", + "▁st ates", + "▁ z", + "st r", + "▁lo c", + "l ight", + "▁sh ore", + "c he", + "▁eas ily", + "▁p ale", + "un ity", + "▁rem ark", + "▁ph ys", + "▁begin ning", + "▁dut y", + "▁chap ter", + "▁infl u", + "ch o", + "▁con cl", + "am b", + "▁inst ant", + "▁pol it", + "z z", + "▁enj oy", + "▁s ick", + "▁rem ain", + "u el", + "▁st ream", + "▁fig ure", + "a ld", + "▁t ur", + "▁p ath", + "▁v ol", + "▁min ute", + "▁pleas ant", + "▁scarce ly", + "▁cons cious", + "▁terri ble", + "▁k ill", + "▁ra ised", + "▁fash ion", + "▁tw el", + "y al", + "▁lea ving", + "▁twel ve", + "at ure", + "▁f ut", + "▁th rew", + "▁st ar", + "▁fl owers", + "ol og", + "▁tr ying", + "ri b", + "▁sw ord", + "▁t all", + "▁mar ry", + "▁b en", + "▁expect ed", + "▁accord ing", + "▁for ty", + "▁st ick", + "in al", + "▁gu ess", + "▁sil ver", + "▁ir on", + "▁obl ig", + "▁off ice", + "▁rap id", + "▁lad ies", + "▁espe cially", + "i pped", + "ort ed", + "▁bre ad", + "e ch", + "▁te nder", + "or th", + "▁lear ned", + "▁b ooks", + "▁is n't", + "▁surpr ise", + "▁wr ite", + "▁pur s", + "pe red", + "▁wr itten", + "▁k illed", + "▁conse qu", + "▁ex h", + "▁pl aces", + "▁cond ition", + "▁dire ction", + "▁ch o", + "ul ty", + "j o", + "m it", + "▁entire ly", + "ter ing", + "▁ent er", + "▁act ion", + "w ise", + "▁su c", + "ib ly", + "▁happ iness", + "▁dec ided", + "▁gold en", + "▁lang u", + "en ess", + "▁not e", + "▁un less", + "u ous", + "▁f al", + "al ed", + "▁you' ll", + "▁wonder ful", + "ound s", + "um e", + "' re", + "▁sh ook", + "er 's", + "oo p", + "one l", + "▁perfect ly", + "▁ge or", + "nd ered", + "▁bro ad", + "at ic", + "▁cl osed", + "a 's", + "▁sp ot", + "te nded", + "▁lat ter", + "▁step s", + "▁mere ly", + "▁hist ory", + "f er", + "▁w ise", + "is hing", + "os ing", + "▁m iddle", + "ide red", + "▁underst ood", + "▁enem y", + "▁so le", + "ll ig", + "▁j ew", + "▁sim ply", + "g an", + "▁cond uct", + "▁t ast", + "▁bo ard", + "▁sa v", + "▁would n't", + "▁sh ot", + "▁rep ly", + "▁ch anged", + "m n", + "▁gr ass", + "▁fin ally", + "▁adm ir", + "it al", + "▁shar p", + "it ch", + "▁fort une", + "▁sum mer", + "▁exper ience", + "▁suc ceed", + "g ress", + "ut ed", + "▁o rig", + "ret ched", + "▁jour ney", + "▁ex cell", + "▁obser ved", + "a x", + "▁after wards", + "f ast", + "s y", + "▁b ow", + "▁fl at", + "▁pers ons", + "▁le an", + "▁ear n", + "▁bro ke", + "▁m ir", + "▁f it", + "os p", + "▁mar riage", + "▁rep res", + "i o", + "▁l ying", + "un k", + "▁tra ve", + "▁s itu", + "▁list en", + "▁acqu aint", + "▁r ing", + "ci ence", + "▁f aint", + "ol ute", + "▁cal m", + "b ered", + "▁li ves", + "▁esc ape", + "▁bene ath", + "ous es", + "▁cl im", + "▁bl ess", + "▁repe ated", + "▁pock et", + "est s", + "▁t ail", + "▁pass ion", + "▁d ick", + "▁v en", + "os es", + "cl ock", + "▁m ut", + "▁bec om", + "▁o per", + "▁o' clock", + "▁f ish", + "▁l ou", + "se mb", + "▁pre v", + "▁all owed", + "▁fam il", + "he l", + "▁g ate", + "▁sp ite", + "iver s", + "▁he alth", + "iss ion", + "▁i gn", + "▁re ach", + "▁c and", + "▁r ain", + "▁em pl", + "▁b an", + "▁str ugg", + "▁fir m", + "▁bit ter", + "▁sor ry", + "b ing", + "▁father 's", + "▁tem per", + "▁mad ame", + "pl es", + "▁f urn", + "▁fut ure", + "um ed", + "▁n ice", + "▁se par", + "▁pres ently", + "▁circumst ances", + "▁conn ect", + "id ing", + "▁set t", + "k es", + "▁l oud", + "▁wor se", + "▁w and", + "▁sp read", + "▁i' d", + "▁let ters", + "▁ye llow", + "▁mag n", + "▁pass ing", + "▁k it", + "▁pleas ed", + "▁dark ness", + "▁rem ar", + "idd en", + "c ome", + "▁te a", + "▁c iv", + "▁ap art", + "▁disapp e", + "▁import ant", + "▁leg s", + "▁n ation", + "▁del ic", + "▁d ressed", + "▁g ame", + "▁wall s", + "e c", + "▁d ry", + "▁v irt", + "▁d im", + "id ently", + "re l", + "▁r ub", + "▁abs olute", + "▁bl ind", + "▁disco vered", + "▁exact ly", + "▁d am", + "ott en", + "▁sor row", + "m y", + "▁c ost", + "fe rence", + "▁empl oy", + "vel op", + "▁c ous", + "▁be ast", + "▁spe c", + "▁opp ort", + "▁e ars", + "▁dro pped", + "▁sub st", + "▁che e", + "▁prot ect", + "il s", + "▁sm iled", + "in a", + "▁res p", + "▁prom ise", + "▁b ag", + "▁h ost", + "ur s", + "▁creat ure", + "▁not ice", + "▁know ing", + "▁head s", + "▁conc er", + "▁se at", + "ish ment", + "▁ind ivid", + "▁exist ence", + "▁determ ined", + "le nd", + "▁st orm", + "ro y", + "our s", + "▁con ce", + "ang ing", + "▁fix ed", + "▁p ress", + "▁maj or", + "o ved", + "▁v es", + "i od", + "▁lear n", + "▁mot ion", + "▁em pt", + "▁lea ves", + "▁bott om", + "▁ar g", + "iet y", + "▁no body", + "▁pro s", + "qu e", + "▁ut ter", + "▁p ick", + "ac ked", + "▁inte llig", + "▁he s", + "▁st ir", + "▁pre vent", + "▁ass ist", + "▁d om", + "▁dis g", + "▁adv ant", + "er able", + "▁v ent", + "um ent", + "▁t ired", + "re ct", + "as hed", + "act ion", + "▁cons idered", + "▁wr ote", + "▁h ouses", + "▁su it", + "▁che er", + "▁cast le", + "▁p ra", + "▁per form", + "anc ing", + "▁cle an", + "ru ct", + "▁st ro", + "▁fre qu", + "▁draw ing", + "▁l uck", + "▁ha bit", + "id ge", + "e ll", + "▁on es", + "▁no ble", + "▁sp lend", + "▁hon or", + "z en", + "▁pa id", + "▁spe ech", + "▁est ab", + "▁u r", + "ist r", + "▁individ ual", + "in ite", + "▁v all", + "▁bird s", + "ro du", + "▁d ar", + "▁all ow", + "▁conf ess", + "▁imp ress", + "▁prop ert", + "▁j ane", + "▁s ong", + "▁var ious", + "▁nar row", + "▁mo der", + "▁belie ved", + "ay s", + "▁ext ra", + "▁p ure", + "ar ily", + "▁per iod", + "▁shad ow", + "▁some wh", + "▁m al", + "▁c ott", + "▁ext reme", + "▁jud ge", + "▁vill age", + "▁ro yal", + "▁somewh at", + "▁l ower", + "▁ha m", + "▁ag ree", + "▁remem bered", + "▁ast on", + "ent h", + "▁decl ared", + "p an", + "▁tr ain", + "▁part s", + "▁col onel", + "am ber", + "▁break fast", + "▁sure ly", + "▁s in", + "ay ed", + "▁sc ene", + "g o", + "▁great est", + "▁influ ence", + "▁c ustom", + "it ary", + "▁anim al", + "▁sa ke", + "▁mo d", + "▁sold iers", + "in y", + "▁an cient", + "▁dra wn", + "▁ev idently", + "▁way s", + "▁look s", + "▁rev ol", + "at or", + "ant ed", + "▁ref lect", + "▁pict ure", + "▁like ly", + "▁sh r", + "▁law s", + "▁hold ing", + "▁diffic ulty", + "▁in j", + "▁me l", + "▁cou rage", + "n es", + "▁m ort", + "▁tr oub", + "▁bur st", + "▁ang ry", + "▁pr oud", + "gg ed", + "▁spo ken", + "is ion", + "▁des ert", + "pt ion", + "▁com b", + "▁app arent", + "r ing", + "▁wat ched", + "n a", + "▁e ast", + "▁sh op", + "▁ag re", + "▁priv ate", + "est y", + "▁j ul", + "▁fin ished", + "▁anx ious", + "ot ion", + "▁fif teen", + "▁soci al", + "u nder", + "▁dis m", + "▁tou ch", + "▁w ine", + "▁att ack", + "▁ide as", + "▁geor ge", + "a f", + "re r", + "oo se", + "▁sp ace", + "▁sc r", + "▁ins ide", + "▁gentle men", + "▁civ il", + "i ently", + "▁form ed", + "▁f ol", + "▁go es", + "▁you' ve", + "▁th in", + "▁sur f", + "▁serv ant", + "▁b al", + "▁co ver", + "▁our selves", + "▁fall en", + "▁hen ry", + "▁l ot", + "i um", + "▁ad vent", + "▁car riage", + "▁bab y", + "▁ele ct", + "▁to ng", + "▁app re", + "▁every body", + "ud ed", + "▁comm un", + "▁in e", + "it ive", + "▁wa ited", + "c ise", + "▁gr ou", + "he t", + "▁v ain", + "▁imp ro", + "▁fav or", + "er ial", + "▁spe ed", + "▁wind ows", + "▁care fully", + "▁i ce", + "▁no ise", + "▁her o", + "▁j im", + "▁will iam", + "▁pe cul", + "▁prom ised", + "▁walk ing", + "▁forg otten", + "▁oblig ed", + "▁earn est", + "▁m ain", + "▁l ose", + "▁gl ance", + "▁ves sel", + "▁gr ad", + "▁th ro", + "▁bo d", + "▁should er", + "▁met h", + "▁anim als", + "▁not iced", + "ab les", + "▁pecul iar", + "▁f ier", + "▁p ot", + "▁quiet ly", + "▁c up", + "▁ser ious", + "▁tre mb", + "▁gener ally", + "▁americ an", + "▁sym p", + "r al", + "▁d on", + "▁fr ance", + "ict ion", + "▁propert y", + "▁should ers", + "▁str anger", + "▁s an", + "▁c ow", + "▁what 's", + "▁d ust", + "▁affect ion", + "▁hands ome", + "▁hig her", + "i ant", + "nd ay", + "▁we l", + "▁po et", + "▁sl a", + "▁dist inct", + "▁m am", + "▁p ier", + "ac ing", + "ag ue", + "▁gr own", + "u ly", + "▁d '", + "▁ch amber", + "▁des ce", + "▁mur m", + "st em", + "▁person al", + "▁f ancy", + "▁of fered", + "os ite", + "ons ie", + "▁bu ilt", + "▁ed ge", + "▁whis pered", + "▁sk in", + "▁pie ces", + "it ated", + "c her", + "os ity", + "▁p it", + "▁cont ro", + "▁f aces", + "▁sp ent", + "▁inter rupt", + "h ow", + "is ters", + "▁but ter", + "▁de velop", + "▁un k", + "h ip", + "▁he at", + "▁fo nd", + "▁co at", + "▁tou ched", + "▁h ol", + "ing u", + "▁p i", + "▁r ace", + "▁j ump", + "▁surpr ised", + "ot ed", + "▁de fe", + "en ced", + "▁was n't", + "▁we ar", + "and on", + "▁f an", + "ac her", + "▁ar ch", + "▁ed uc", + "▁bra ve", + "at hered", + "▁e ld", + "▁we alth", + "▁sy stem", + "▁ger man", + "▁fal se", + "w ood", + "▁d are", + "ak ed", + "▁cous in", + "▁f er", + "ke y", + "▁l in", + "▁inte llect", + "▁prep ared", + "▁fing ers", + "▁sur r", + "▁mount ains", + "i pp", + "▁opport unity", + "a ff", + "▁b are", + "▁d or", + "▁int rodu", + "▁co llect", + "▁love ly", + "▁r ag", + "▁cr own", + "▁mat ters", + "▁compan ion", + "▁we ather", + "▁al ar", + "▁inn oc", + "▁r is", + "▁m ix", + "▁l ake", + "▁st ore", + "▁un h", + "▁mean ing", + "▁mem ory", + "o ver", + "▁b and", + "le ep", + "▁find ing", + "e e", + "▁char ge", + "▁gr at", + "▁att ract", + "▁gr ay", + "▁quar ter", + "▁av o", + "▁great ly", + "▁m ach", + "▁in h", + "▁as leep", + "▁par is", + "▁d av", + "▁al to", + "▁off er", + "▁opp osite", + "oun ced", + "er ve", + "▁bre ast", + "n own", + "▁read ing", + "▁alto gether", + "▁wr iting", + "pect ed", + "▁deg ree", + "c ing", + "n ight", + "▁ex ec", + "fort un", + "▁st at", + "▁feel ings", + "▁h ath", + "▁c ook", + "▁r ail", + "▁hon our", + "d ing", + "▁f ate", + "▁p or", + "▁fr ank", + "▁meet ing", + "▁r ough", + "▁al ive", + "▁h ide", + "it es", + "il ar", + "▁bl ow", + "▁cr uel", + "ra ph", + "▁hur t", + "▁l oss", + "▁thr own", + "▁ca used", + "▁we 'll", + "▁ser ve", + "▁du ke", + "▁b ent", + "▁un ited", + "▁see k", + "▁king dom", + "▁situ ation", + "▁empt y", + "n ers", + "▁d ue", + "▁li ked", + "▁sw ift", + "▁open ing", + "▁serv ants", + "c hen", + "ou ra", + "▁g h", + "▁sus pic", + "▁fre ed", + "oint ed", + "▁surf ace", + "c il", + "▁quest ions", + "▁ ess", + "▁cur ious", + "▁const it", + "▁accom pan", + "▁christ ian", + "▁f ill", + "are st", + "▁satisf ied", + "r on", + "▁s ides", + "▁p ity", + "▁re ve", + "▁equ al", + "▁he ight", + "▁or dered", + "os op", + "▁gre y", + "▁list ened", + "p et", + "▁re jo", + "▁cap t", + "ib ility", + "o b", + "▁m art", + "▁happ en", + "▁hur ried", + "▁doll ars", + "▁langu age", + "▁an ge", + "▁your s", + "▁supp osed", + "▁laugh ing", + "▁sett led", + "▁ro de", + "▁per m", + "▁dist ingu", + "▁hur ry", + "▁dest roy", + "▁tal ked", + "▁lift ed", + "oc r", + "▁squ are", + "▁val ue", + "▁tast e", + "▁v ast", + "▁king 's", + "▁r ul", + "▁r oof", + "▁tell ing", + "▁stud y", + "▁o w", + "▁p an", + "▁b as", + "▁r ising", + "▁suffic ient", + "▁for ced", + "▁r ise", + "▁at tend", + "▁phil osop", + "▁no se", + "▁six ty", + "he st", + "▁p in", + "▁e gg", + "▁am b", + "▁fa ult", + "b ur", + "▁st ation", + "▁dist ur", + "▁reg ular", + "ill e", + "▁p ack", + "▁spe cial", + "▁hon est", + "▁build ing", + "▁se ason", + "▁sh ape", + "▁pr ide", + "▁sm iling", + "li ke", + "▁ord ers", + "y n", + "▁wood s", + "▁accom pl", + "c on", + "▁s am", + "▁us ually", + "▁wat ching", + "▁sac ri", + "er ved", + "▁pass age", + "▁mat erial", + "▁vall ey", + "y r", + "▁st airs", + "▁li bert", + "▁fright ened", + "▁remar ked", + "▁t it", + "▁w ed", + "▁mist ress", + "▁direct ly", + "▁suff er", + "▁glo om", + "▁l ines", + "▁st ock", + "▁just ice", + "▁d iam", + "est ed", + "▁gr owing", + "▁does n't", + "▁g athered", + "▁ord inary", + "u ce", + "▁e ur", + "▁un f", + "▁kit chen", + "▁th reat", + "▁de pend", + "▁wee ks", + "▁desp air", + "▁meth od", + "▁se ized", + "▁disc uss", + "▁ex er", + "if y", + "▁fl ower", + "▁ign or", + "e er", + "ad es", + "▁de b", + "ep ing", + "▁a le", + "▁y o", + "ch ief", + "▁supp er", + "i k", + "▁bo ld", + "▁put ting", + "▁ne arer", + "us es", + "▁one 's", + "▁b le", + "▁y ork", + "▁end e", + "▁aff airs", + "▁sold ier", + "▁contr ary", + "▁mo ving", + "▁stre ets", + "▁b ir", + "r ance", + "hen s", + "▁c it", + "ic ated", + "▁cat ch", + "▁imag ine", + "ed s", + "▁mar ch", + "▁se arch", + "ar a", + "▁re ceive", + "im ate", + "▁m onsie", + "▁tw ice", + "▁pap a", + "▁monsie ur", + "▁re ck", + "m in", + "u de", + "▁pro cess", + "▁ho le", + "a ly", + "l in", + "▁c ro", + "▁fav our", + "▁d ign", + "▁work ing", + "▁har m", + "▁eur ope", + "ant ic", + "▁pro ved", + "oc ked", + "▁pro ve", + "▁cl er", + "▁lo d", + "cept ion", + "▁pull ed", + "▁ar th", + "▁author ity", + "▁ha ven", + "▁j er", + "▁un s", + "▁move ment", + "ust ed", + "▁eng aged", + "▁brother s", + "▁advant age", + "l ished", + "o le", + "▁arth ur", + "▁a ut", + "▁st ones", + "▁far m", + "▁diffe rence", + "▁f art", + "▁as ide", + "▁m as", + "▁obser v", + "▁hen ce", + "▁possess ion", + "▁hill s", + "▁fort un", + "ul s", + "ail s", + "▁inst ance", + "▁she 's", + "▁o l", + "▁ho ly", + "▁fle w", + "k y", + "▁col or", + "▁r ate", + "▁do ors", + "▁bus y", + "se t", + "▁add ress", + "▁famil iar", + "▁we ight", + "▁aw are", + "▁play ed", + "▁symp ath", + "ll s", + "▁sole mn", + "▁l iter", + "▁t est", + "▁em per", + "▁ind ian", + "▁dist ant", + "▁interest ing", + "▁b ull", + "▁thor ough", + "▁w ore", + "▁wor ked", + "▁expl ained", + "▁excell ent", + "▁splend id", + "▁tong ue", + "▁d i", + "▁p ard", + "▁n amed", + "▁sh ame", + "▁fr anc", + "▁spe ct", + "▁moment s", + "b ers", + "▁w il", + "▁my ster", + "▁se ated", + "▁inst antly", + "▁sim ilar", + "▁ende av", + "▁me asure", + "▁natur ally", + "nd s", + "▁su f", + "▁am ount", + "▁im per", + "▁dog s", + "it able", + "▁br it", + "▁necess ity", + "r id", + "ul ous", + "▁conf idence", + "d en", + "▁p arent", + "▁w id", + "▁v ir", + "▁never the", + "▁agre ed", + "▁neverthe less", + "un ch", + "▁hear ing", + "▁t akes", + "▁a ug", + "▁un ivers", + "en ance", + "▁un w", + "▁ear l", + "▁keep ing", + "▁dri ve", + "▁produ ced", + "▁a ud", + "on 's", + "▁n ames", + "ag n", + "▁disappe ared", + "▁thr ow", + "▁pres ident", + "▁god s", + "▁mag ic", + "▁repres ent", + "▁unk nown", + "p or", + "▁ter ror", + "▁haven 't", + "as c", + "▁supp ort", + "▁smo ke", + "▁w icked", + "k er", + "▁wor ks", + "▁art ic", + "▁d ull", + "▁yes ter", + "▁fall ing", + "▁worth y", + "▁libert y", + "ul ation", + "▁des ign", + "▁want s", + "▁ev idence", + "▁compan ions", + "▁spir its", + "▁co ast", + "▁might y", + "▁particular ly", + "▁wit ness", + "▁disco ver", + "▁s ought", + "▁sp an", + "' ve", + "▁r are", + "▁offic ers", + "l v", + "z y", + "▁yester day", + "ve y", + "c ent", + "▁p owers", + "▁y ield", + "▁c ool", + "▁or gan", + "▁am az", + "▁point ed", + "f ord", + "▁cl aim", + "▁cont ent", + "▁poss ibly", + "▁ter ms", + "▁tri um", + "▁offic er", + "▁pers u", + "▁ce ased", + "▁dro ve", + "▁occur red", + "▁g ree", + "▁li es", + "▁other wise", + "▁emper or", + "▁h om", + "▁st ars", + "▁kne es", + "▁trium ph", + "ru ction", + "▁pa used", + "om s", + "▁requ ired", + "▁fail ed", + "▁unh app", + "▁diam ond", + "▁r at", + "▁al i", + "▁d ouble", + "▁form s", + "▁gi ves", + "▁fing er", + "ra ce", + "▁p air", + "al ous", + "ill a", + "▁bo b", + "▁el iz", + "▁tra vel", + "▁carry ing", + "▁g le", + "il es", + "▁te eth", + "es h", + "▁sh own", + "▁fr uit", + "▁wat ers", + "▁ent ertain", + "▁heart s", + "um n", + "▁lab or", + "in 't", + "▁p ill", + "▁en er", + "so ci", + "▁exam ple", + "▁u pper", + "▁fore ign", + "▁mor al", + "▁soft ly", + "ro se", + "▁hu ge", + "▁char les", + "▁pri est", + "▁exc it", + "▁f et", + "▁mother 's", + "▁possess ed", + "▁c ases", + "▁rep ort", + "▁mil k", + "▁aff air", + "▁princi ple", + "▁inh ab", + "▁freed om", + "▁pr oof", + "▁inte nded", + "▁satisf action", + "▁shout ed", + "is c", + "▁pl at", + "▁b ask", + "ent al", + "▁grou p", + "▁fart her", + "as m", + "▁un fortun", + "▁unt o", + "▁sing ing", + "▁arr ange", + "▁relig ion", + "▁b er", + "▁rock s", + "▁sevent een", + "▁d er", + "▁j ames", + "▁bu y", + "▁succeed ed", + "▁room s", + "▁lead ing", + "▁maj esty", + "▁event s", + "▁d ance", + "▁p aint", + "▁g ently", + "ac le", + "▁te le", + "▁pard on", + "us ing", + "▁dro p", + "f ather", + "▁in vent", + "▁ke y", + "▁mention ed", + "▁sevent y", + "▁r os", + "▁suff ering", + "▁rec ord", + "▁cab in", + "ro ad", + "▁dis s", + "iv al", + "▁demand ed", + "▁excit ement", + "▁as soci", + "▁pro gress", + "ang ers", + "▁cur i", + "▁americ a", + "▁ru le", + "▁b or", + "▁v ig", + "less ly", + "▁clear ly", + "▁b ore", + "▁she ep", + "▁reg ret", + "▁neighb our", + "b ly", + "i ance", + "▁inst inct", + "▁adv ice", + "▁aw ful", + "▁s en", + "▁f ully", + "▁g ather", + "▁pap ers", + "▁h idden", + "▁che st", + "▁bir th", + "h y", + "p ap", + "▁h ither", + "▁st uff", + "▁imp at", + "▁call ing", + "▁four th", + "▁dread ful", + "▁p os", + "▁g rief", + "▁br ill", + "▁power ful", + "▁present ed", + "▁fair y", + "▁expl ain", + "▁sho ot", + "▁prison er", + "▁jo ined", + "▁aff ord", + "m ond", + "at tered", + "▁ ing", + "im ents", + "▁she l", + "▁pre fer", + "▁consider able", + "▁ob ey", + "▁vo ices", + "▁inter v", + "▁interest ed", + "▁vir g", + "▁c red", + "▁c ard", + "▁e p", + "▁need ed", + "▁p ounds", + "▁con qu", + "▁cle ver", + "▁adv anced", + "▁c ord", + "ig hed", + "▁under t", + "▁resol ved", + "▁w ag", + "ist ic", + "▁pa ul", + "▁exc ited", + "▁cond itions", + "▁pict ures", + "ac ious", + "▁sh ining", + "▁su nday", + "▁ser ved", + "▁ste am", + "▁pol ice", + "▁spr ang", + "s ie", + "or a", + "es e", + "▁j es", + "▁no dd", + "▁sal t", + "▁field s", + "▁c art", + "▁ind ians", + "▁fier ce", + "d le", + "▁r ide", + "▁des ired", + "▁ed ward", + "▁import ance", + "▁inform ation", + "t ure", + "▁h osp", + "▁me mb", + "▁per ceived", + "▁y ard", + "▁cr it", + "tern al", + "▁t ask", + "▁fo ld", + "r ant", + "▁soon er", + "▁mer ch", + "▁absolute ly", + "▁cit iz", + "▁suf fered", + "▁t ight", + "▁d ur", + "▁is s", + "ill y", + "▁lo g", + "▁complete ly", + "h old", + "▁r ad", + "▁sh are", + "▁will ing", + "▁dev il", + "▁ship s", + "▁imag ination", + "▁super ior", + "c om", + "am s", + "▁any body", + "▁en v", + "▁app l", + "▁dra g", + "▁da wn", + "asp ed", + "▁occup ied", + "▁curi osity", + "i est", + "▁s igh", + "▁fo x", + "as ant", + "▁my st", + "▁ste ad", + "et t", + "▁cou ple", + "▁ty pe", + "▁extra ord", + "▁apparent ly", + "▁wel come", + "▁da ily", + "▁moder n", + "i ot", + "▁a in't", + "▁d ying", + "ll en", + "▁fe at", + "▁acc ident", + "▁count enance", + "▁ab andon", + "ort ion", + "▁lo ck", + "▁cr ime", + "p ir", + "▁m ult", + "▁al as", + "▁ref used", + "▁h ate", + "▁d w", + "▁when ever", + "▁than ks", + "▁sl ave", + "▁regard ed", + "▁suggest ed", + "ul f", + "▁act ually", + "g ment", + "▁s ize", + "re g", + "▁c ult", + "▁k at", + "▁bod ies", + "h us", + "▁b ay", + "▁tr uly", + "▁fl esh", + "ish op", + "▁sm ith", + "▁bet r", + "w ith", + "▁w et", + "▁rapid ly", + "g ers", + "▁o dd", + "as ons", + "et te", + "▁cl ub", + "ab el", + "▁hor ror", + "▁m ile", + "▁fl ight", + "▁cross ed", + "▁profess or", + "▁o ce", + "▁wor st", + "iz ation", + "▁rus hed", + "▁s cience", + "▁b rief", + "▁ste pped", + "▁mid st", + "h a", + "▁s our", + "▁m aint", + "▁br ain", + "▁cott age", + "▁exp ressed", + "▁equ ally", + "▁educ ation", + "▁aug ust", + "▁b uck", + "▁n ay", + "id s", + "▁tem pt", + "▁inqu ir", + "▁fool ish", + "▁t aught", + "▁c op", + "▁d un", + "▁p icked", + "▁el sie", + "▁land s", + "▁dri ven", + "▁polit ical", + "m as", + "▁de ck", + "▁res ist", + "▁inst r", + "▁b on", + "▁k en", + "ip s", + "▁hot el", + "▁danger ous", + "i ally", + "n ow", + "▁do zen", + "▁tr ade", + "▁point s", + "▁nin et", + "ab ility", + "▁cr im", + "▁rel ations", + "▁inter p", + "▁bar b", + "▁delight ed", + "▁memb ers", + "▁s isters", + "▁st y", + "▁an ger", + "▁belie f", + "▁ask ing", + "▁me at", + "▁dis pl", + "▁rel ief", + "ific ation", + "▁hunt ing", + "▁ale x", + "ar ies", + "▁ob st", + "▁beh old", + "▁mist ake", + "▁inqu ired", + "▁remark able", + "▁orig in", + "c ked", + "▁n erv", + "ack s", + "ver t", + "ro p", + "▁care ful", + "▁w ounded", + "ad ing", + "▁ce re", + "▁enem ies", + "▁grad ually", + "▁interrupt ed", + "▁f is", + "▁st up", + "▁se vere", + "▁ke en", + "▁six teen", + "k ins", + "res p", + "▁wor n", + "▁fl our", + "▁sy lv", + "▁contro l", + "k in", + "▁l one", + "as ing", + "▁n ap", + "▁ass ert", + "▁dep th", + "▁kind ly", + "▁mur der", + "ac ity", + "▁ele ven", + "▁inv ol", + "▁d' art", + "▁w ings", + "▁o ak", + "▁e t", + "▁beg un", + "▁dream s", + "wh ile", + "▁more over", + "▁exp ed", + "▁inde pend", + "▁bur ied", + "▁appro ached", + "agn an", + "▁d'art agnan", + "▁se x", + "▁sa ved", + "▁har ry", + "▁phys ical", + "▁spec ies", + "c er", + "o e", + "▁gl ory", + "▁creat ures", + "▁news pap", + "▁s ang", + "▁pl enty", + "▁use ful", + "▁sho es", + "▁hop ed", + "▁frequ ently", + "▁sa f", + "▁dist r", + "▁princi p", + "▁p u", + "y 's", + "au nt", + "▁lo ver", + "▁fam ous", + "▁reco llect", + "▁n ur", + "▁gr im", + "▁ind if", + "▁char ming", + "▁a im", + "▁loo se", + "▁conscious ness", + "▁mam ma", + "▁ent hus", + "▁sle pt", + "▁smo oth", + "▁fight ing", + "▁hy p", + "▁enthus i", + "▁d ig", + "al ing", + "▁st age", + "▁any one", + "▁thr ust", + "▁des per", + "▁t ar", + "▁l amp", + "st one", + "▁st ern", + "▁ev ident", + "▁mean while", + "▁forg ive", + "▁accept ed", + "▁oce an", + "▁to t", + "▁they 're", + "▁wo ndered", + "▁play ing", + "▁det ect", + "▁ha le", + "▁kn ife", + "ail ed", + "▁close ly", + "▁me as", + "▁proceed ed", + "▁mess age", + "▁m our", + "▁f ac", + "▁un ion", + "ustom ed", + "he m", + "am ing", + "▁ex ceed", + "▁fe ather", + "▁pre cious", + "▁cent ury", + "▁une x", + "▁p ark", + "ic ation", + "▁every where", + "▁mind s", + "▁extraord inary", + "▁a rose", + "▁ent rance", + "▁cap ital", + "▁rec all", + "▁burn ing", + "▁magn ific", + "o es", + "or ious", + "st and", + "▁as semb", + "▁pl ant", + "▁neighb or", + "▁l est", + "um ents", + "▁coll e", + "▁virt ue", + "▁be w", + "▁for b", + "▁ret reat", + "▁cap able", + "▁ass ured", + "▁const ant", + "▁govern or", + "▁incre ased", + "▁h orn", + "▁rem oved", + "▁fact s", + "▁abs ence", + "▁expl an", + "▁a ck", + "▁some body", + "▁aw a", + "▁adm it", + "▁cor rect", + "▁forg ot", + "▁je alous", + "▁kiss ed", + "▁pop ular", + "▁h ut", + "▁u g", + "pe lled", + "▁gr ant", + "▁friend ship", + "▁ind ign", + "▁sympath y", + "i able", + "er ous", + "▁th om", + "▁al ice", + "▁le vel", + "▁object s", + "▁p ressed", + "▁sh a", + "ro om", + "▁qu al", + "▁beg ged", + "▁em p", + "▁h ind", + "▁hig hest", + "▁cloud s", + "▁gh ost", + "▁ack now", + "ous ed", + "▁stri ke", + "▁wis hes", + "▁becom es", + "▁tremb ling", + "▁no b", + "▁kind ness", + "▁accord ingly", + "▁thro at", + "r ation", + "▁f are", + "▁we 're", + "▁st retched", + "▁fr ag", + "▁whe el", + "▁qu eer", + "▁grand father", + "f or", + "▁ch oose", + "▁hel en", + "▁eight y", + "▁l y", + "▁mis erable", + "▁cont empt", + "ign ed", + "▁mil itary", + "▁rus s", + "▁bask et", + "▁a head", + "oo ps", + "ive red", + "▁list ening", + "▁fr o", + "▁lar ger", + "▁div ine", + "i ber", + "▁st ories", + "anc hes", + "us hing", + "iz ing", + "▁tre asure", + "▁exc use", + "▁innoc ent", + "▁a id", + "▁rem ind", + "▁sla ves", + "r it", + "st airs", + "▁re ward", + "og raph", + "▁man age", + "▁dis h", + "▁through out", + "▁wa ves", + "▁jud gment", + "▁arri val", + "▁cho ice", + "▁unhapp y", + "ast ic", + "▁bl ank", + "▁adv ance", + "▁inform ed", + "▁acquaint ance", + "▁impress ion", + "▁myster ious", + "b b", + "▁a ra", + "▁not es", + "▁had n't", + "▁se ll", + "▁com r", + "▁im pl", + "▁ind ust", + "▁end ed", + "▁light s", + "▁nur se", + "▁s out", + "▁b ought", + "▁f red", + "▁mar ked", + "▁sc ream", + "me nd", + "▁une as", + "▁delic ate", + "▁we ary", + "est ic", + "▁prom pt", + "▁exper i", + "▁hung ry", + "▁fly ing", + "▁p ow", + "▁br idge", + "▁jo in", + "▁vis ible", + "▁understand ing", + "▁cry ing", + "▁avo id", + "▁t is", + "▁st iff", + "ac hes", + "▁rest r", + "▁sound s", + "▁b owed", + "▁c aut", + "▁good s", + "▁dav id", + "▁un able", + "▁you' d", + "ham ed", + "▁b os", + "er al", + "▁as hamed", + "▁some where", + "▁inf inite", + "ock s", + "▁dign ity", + "▁g ay", + "▁v ic", + "▁am id", + "▁ho llow", + "▁em otion", + "▁adm itted", + "▁parent s", + "▁w ra", + "▁h int", + "▁tem ple", + "▁comfort able", + "▁intellig ence", + "or ous", + "▁be aring", + "▁her s", + "ab eth", + "▁rem ains", + "▁cont em", + "▁set tle", + "▁imm ense", + "f fe", + "p her", + "▁c her", + "ld om", + "▁we ap", + "ul ated", + "▁light ed", + "gy pt", + "▁advent ure", + "▁thorough ly", + "▁e gypt", + "il st", + "ang es", + "▁ob t", + "▁friend ly", + "▁reck on", + "▁stup id", + "▁f ed", + "▁r ome", + "▁me al", + "▁int ention", + "▁return ing", + "▁conv in", + "▁c oo", + "le ction", + "▁as h", + "ac hel", + "▁ro pe", + "▁pr ice", + "▁pro ject", + "el t", + "row s", + "▁sec ure", + "▁esc aped", + "▁hop es", + "▁eliz abeth", + "▁saf ety", + "▁w ound", + "▁su p", + "▁un us", + "ons cious", + "▁hor ri", + "▁min ister", + "▁o x", + "ll a", + "ens ive", + "▁help ed", + "▁plain ly", + "▁se ldom", + "▁think s", + "▁fellow s", + "▁m ood", + "▁p ushed", + "▁exh ib", + "ing ing", + "▁th under", + "au d", + "ian a", + "▁fair ly", + "▁eld er", + "▁egg s", + "ir m", + "▁maid en", + "m other", + "▁appe ars", + "▁chee ks", + "▁w on", + "▁e ase", + "▁re du", + "▁sk ill", + "▁ext ent", + "▁pract ice", + "▁relig ious", + "▁becom ing", + "▁virg in", + "▁feat ures", + "▁t ied", + "▁when ce", + "▁some how", + "▁gre et", + "▁faith ful", + "▁concer ned", + "▁the at", + "▁b ishop", + "▁p ink", + "▁eager ly", + "re es", + "▁e ating", + "▁was te", + "▁r ank", + "▁fe m", + "▁br ide", + "▁un l", + "ott ed", + "cei ving", + "▁tri b", + "▁orig inal", + "▁concer ning", + "▁ha b", + "▁acc ustomed", + "▁pat ient", + "▁rec om", + "▁ce ll", + "oint ment", + "▁arr anged", + "v ille", + "it ure", + "▁who lly", + "▁old er", + "▁col our", + "▁prov ided", + "▁at e", + "▁part ly", + "▁mon t", + "olog y", + "▁pros pect", + "▁cere mon", + "▁ ze", + "▁l aughter", + "▁fe e", + "▁br anches", + "▁fl ed", + "r ight", + "▁wh ilst", + "▁sl ipped", + "▁viol ent", + "▁inhab it", + "▁s ons", + "▁eng age", + "▁unc om", + "▁deep ly", + "▁subst ance", + "▁t ale", + "▁t iny", + "▁d an", + "▁g a", + "▁be e", + "▁y ards", + "ick s", + "▁hast ily", + "he ld", + "▁w es", + "▁v ague", + "▁am use", + "▁mu d", + "▁wo lf", + "▁h ans", + "ill ing", + "▁supp ly", + "▁sil k", + "▁const antly", + "▁christ mas", + "▁mill ion", + "▁whis per", + "▁m ental", + "▁was hing", + "ver se", + "▁cl oth", + "▁bar on", + "▁cor resp", + "▁nodd ed", + "▁corresp ond", + "k a", + "▁he ll", + "▁g ain", + "▁r ust", + "▁ob tain", + "▁unc onscious", + "▁strugg le", + "▁estab lished", + "▁law y", + "ol s", + "▁sign s", + "▁ut tered", + "▁rom an", + "▁constit ution", + "p es", + "▁c ave", + "▁sp are", + "▁qu ant", + "▁im age", + "▁mer ry", + "▁treat ed", + "▁effort s", + "▁lone ly", + "r ated", + "▁n ut", + "▁gl anced", + "▁port ion", + "it or", + "▁re semb", + "▁with d", + "▁me ad", + "▁fe ast", + "▁pr im", + "▁cl iff", + "▁em er", + "▁prop ortion", + "▁consider ation", + "▁hast e", + "▁gaz e", + "▁sav age", + "▁c rew", + "▁to wer", + "▁l ack", + "▁cons cience", + "▁mer cy", + "▁exh a", + "▁cons ent", + "at ors", + "ur d", + "▁out l", + "▁cl o", + "▁ad op", + "▁among st", + "▁h anging", + "▁circ le", + "▁prep ar", + "▁brill iant", + "f l", + "▁g ained", + "▁r ow", + "▁tr oops", + "▁rep ro", + "▁m ing", + "ou l", + "▁d ared", + "▁l ion", + "▁jo e", + "▁wind s", + "▁bring ing", + "▁anx iety", + "▁bill y", + "▁consequ ence", + "f ice", + "p se", + "▁f ought", + "▁p red", + "▁sc ra", + "▁gl im", + "▁vict ory", + "p ed", + "▁r ab", + "▁sc ot", + "▁ob v", + "▁sh ock", + "ch an", + "▁kn ock", + "our se", + "▁hand ed", + "▁ind ul", + "▁pat ience", + "▁sout her", + "▁j ose", + "▁fe ver", + "▁ro lled", + "ict ed", + "▁set ting", + "▁profess ion", + "▁sylv ia", + "▁h un", + "ut ions", + "▁fe ared", + "▁br and", + "▁bo ots", + "▁fore head", + "▁princi ples", + "▁s ink", + "▁r ig", + "av al", + "▁pur ch", + "▁gaz ed", + "▁employ ed", + "▁murm ured", + "m ore", + "▁s ar", + "as hing", + "ur al", + "ac les", + "▁tr ad", + "▁act ive", + "▁bene f", + "▁bott le", + "▁r age", + "▁inv est", + "▁lu x", + "▁s ank", + "▁h ang", + "▁be ard", + "ent ial", + "▁lo ving", + "▁nat ive", + "▁inst ruct", + "▁wa ist", + "▁rel ation", + "▁disco very", + "▁mel an", + "▁nerv ous", + "▁obt ained", + "▁p ig", + "▁se ar", + "▁fl ag", + "▁tra il", + "▁distingu ished", + "▁st ared", + "▁mis ery", + "▁pr int", + "▁gu il", + "▁jump ed", + "▁sw im", + "▁appro aching", + "▁suspic ion", + "▁i v", + "▁man aged", + "ak er", + "▁te ach", + "▁mat ch", + "▁guil ty", + "▁w retched", + "▁r um", + "▁comp ar", + "▁the ory", + "▁s her", + "▁b ree", + "▁k ings", + "▁sh one", + "ather ine", + "▁thr one", + "▁show ing", + "aw s", + "▁rob in", + "▁emb ar", + "ut ation", + "▁woman 's", + "▁add ressed", + "▁prot est", + "▁admir ation", + "▁troub led", + "▁ug ly", + "o om", + "er ves", + "▁fl ung", + "▁sub s", + "▁rel ie", + "▁thousand s", + "n ce", + "▁o d", + "▁cur rent", + "▁wood en", + "▁sacri fice", + "ur ity", + "ci p", + "▁pe ar", + "▁far mer", + "▁need s", + "▁cond em", + "▁mem ber", + "▁b ade", + "▁d ancing", + "▁re asons", + "▁cons ult", + "▁sw all", + "▁shad ows", + "▁ange l", + "▁ninet een", + "▁sty le", + "f ield", + "▁l an", + "▁man if", + "▁ro bert", + "▁gr ate", + "▁eng ine", + "▁wis dom", + "▁jes us", + "▁con vent", + "▁pre ced", + "▁interest s", + "▁tri al", + "b or", + "i ven", + "▁n est", + "▁ex ch", + "▁vo y", + "▁ill ust", + "▁wor ship", + "▁ad am", + "▁ph r", + "▁princip al", + "▁h it", + "▁spe nd", + "▁stand s", + "▁resp ons", + "▁a y", + "▁ha w", + "▁wh ist", + "▁ar rest", + "▁kind s", + "▁requ ire", + "▁descri bed", + "▁l it", + "▁pre cise", + "▁prop osed", + "▁produ ce", + "▁utter ly", + "ul se", + "▁no vel", + "▁bl ame", + "▁cred it", + "▁p ause", + "os en", + "▁house hold", + "▁arm ed", + "▁follow s", + "up on", + "▁appro ach", + "▁nin ety", + "▁p ir", + "▁fl ore", + "iv ity", + "▁ref use", + "▁sens ible", + "cho ly", + "▁nation al", + "▁g rie", + "▁re ven", + "▁let 's", + "▁delight ful", + "▁extreme ly", + "▁melan choly", + "u ing", + "▁en orm", + "cl es", + "▁slight ly", + "▁sac red", + "▁recogn ized", + "▁myst ery", + "▁g ri", + "▁comp re", + "▁dist ress", + "▁war ri", + "▁use less", + "▁tri f", + "▁mount ed", + "▁phil ip", + "▁ener gy", + "▁explan ation", + "▁c as", + "at ory", + "▁p our", + "▁r ic", + "▁ch osen", + "▁every one", + "umb led", + "▁a pr", + "▁c am", + "▁pro c", + "▁res umed", + "▁appre ci", + "▁alex and", + "▁a ven", + "▁w ing", + "▁int ense", + "▁high ly", + "▁lu cy", + "▁sol id", + "▁depart ure", + "▁agree able", + "▁exer cise", + "a pped", + "▁w ard", + "▁b ud", + "▁d well", + "ic ate", + "▁de ce", + "▁te acher", + "te nding", + "▁ma x", + "▁requ est", + "▁unex pected", + "▁jose ph", + "c ol", + "▁le ap", + "▁vict im", + "▁s ighed", + "▁for ces", + "ch ie", + "▁fe ed", + "▁sp ort", + "▁dri ft", + "▁wed ding", + "▁brit ish", + "se c", + "▁att itude", + "▁vis ion", + "▁pi pe", + "▁to w", + "▁ha lt", + "▁man ners", + "▁te nd", + "▁fl ood", + "▁comm ission", + "▁gu ide", + "▁obser ve", + "▁conc ern", + "▁rus h", + "▁affect ed", + "f all", + "▁st ret", + "▁co ach", + "▁po ison", + "▁direct ed", + "▁med ic", + "▁g est", + "▁e cho", + "▁young er", + "▁conf usion", + "▁contin ue", + "▁par li", + "▁abs or", + "▁cent re", + "con om", + "▁horri ble", + "r ison", + "▁b ol", + "▁b ath", + "▁g own", + "▁by e", + "▁al oud", + "▁supp l", + "▁prof ound", + "▁er r", + "▁cheer ful", + "w orth", + "▁sent ence", + "▁mist aken", + "▁tor n", + "▁fig ures", + "▁accompan ied", + "▁c atherine", + "▁e conom", + "▁at m", + "▁sh aking", + "um ber", + "▁coun cil", + "l ot", + "▁as ce", + "il ities", + "▁sp ar", + "▁end s", + "▁stra w", + "▁knight s", + "▁atm osp", + "▁sh ade", + "▁br ow", + "▁sp ark", + "▁rest ed", + "▁sent iment", + "▁reco vered", + "▁subject s", + "▁dut ies", + "▁comp osed", + "▁sw ept", + "▁real ity", + "▁sing ular", + "▁trans p", + "▁loc ked", + "▁lou is", + "▁assist ance", + "▁w ake", + "re m", + "▁so vere", + "▁un p", + "▁lo ves", + "▁abs urd", + "▁soul s", + "▁immedi ate", + "▁rid ing", + "▁connect ion", + "▁chee k", + "▁magnific ent", + "▁e re", + "▁su gar", + "▁pl ans", + "▁pr ud", + "▁dis e", + "▁ad j", + "▁lean ing", + "▁surr ounded", + "▁we 've", + "▁or n", + "▁ro ll", + "▁pro ble", + "▁str ict", + "▁aw ake", + "▁pra ise", + "▁convin ced", + "▁re le", + "▁fr ame", + "▁bre aking", + "▁cur tain", + "▁stay ed", + "▁div ided", + "▁cra w", + "▁incl ined", + "▁prev ious", + "a ult", + "om en", + "▁st air", + "▁se es", + "▁pr on", + "bo ard", + "▁comple x", + "▁pray er", + "▁pier re", + "▁unfortun ate", + "g s", + "▁gen ius", + "▁incre ase", + "▁suffic iently", + "▁ban ks", + "▁revol ution", + "▁souther n", + "k i", + "o ke", + "▁a ust", + "ed y", + "▁l ing", + "▁count ess", + "▁sleep ing", + "▁dev oted", + "▁ut most", + "▁mark et", + "▁bos om", + "▁b ark", + "▁c ath", + "al t", + "ch ar", + "▁cl ock", + "▁hand ker", + "▁adm in", + "▁sens es", + "▁id ent", + "▁mid night", + "▁connect ed", + "▁perm itted", + "▁h id", + "▁f il", + "▁f aced", + "▁g ift", + "▁ch at", + "▁br id", + "▁nor ther", + "▁hor iz", + "▁colle ge", + "▁handker chief", + "is ions", + "▁re be", + "▁pol ic", + "▁ann ounced", + "oun ce", + "▁n ons", + "▁n urs", + "al es", + "▁fle et", + "▁rag ged", + "▁co ffe", + "▁part ies", + "▁del ay", + "▁sound ed", + "▁c ities", + "▁was h", + "▁app ointed", + "▁night s", + "▁inst it", + "▁god 's", + "▁stri king", + "▁gun s", + "▁aston ishment", + "▁merch ant", + "▁parli ament", + "n al", + "▁a x", + "at ched", + "▁p il", + "▁p age", + "if orm", + "▁pl ate", + "▁thir st", + "▁neg ro", + "▁ru in", + "▁inhabit ants", + "w in", + "ar f", + "▁r ib", + "▁add ition", + "▁arg ument", + "b our", + "▁t ad", + "▁sc en", + "▁gu ests", + "▁wonder ing", + "▁acquaint ed", + "▁int ent", + "pl ess", + "▁destroy ed", + "▁coffe e", + "in ent", + "le br", + "▁re nder", + "▁so b", + "▁de mon", + "▁des ir", + "ud ing", + "▁get s", + "▁ass ure", + "▁ra ise", + "▁shar ply", + "▁priv ile", + "▁alar m", + "▁mach ine", + "f ied", + "▁cont ract", + "▁del iber", + "▁dr own", + "▁after ward", + "▁gu est", + "▁concl usion", + "▁ris k", + "▁ignor ant", + "b ury", + "k ind", + "▁p ian", + "an 's", + "ur ies", + "▁so il", + "▁ref er", + "▁command ed", + "▁pract ical", + "▁to ss", + "▁of fe", + "▁be held", + "▁ar ist", + "▁quar ters", + "▁deg rees", + "▁fis her", + "▁nons ense", + "▁m c", + "is p", + "▁me chan", + "ke ep", + "▁doubt less", + "▁viol ence", + "▁neg lect", + "▁fol k", + "l iness", + "▁b ul", + "▁e aster", + "▁lo ft", + "▁cont ained", + "▁ref lection", + "▁ce lebr", + "▁lea f", + "▁concl uded", + "▁distr ict", + "i ation", + "r s", + "▁s cient", + "▁he 'd", + "▁sc orn", + "▁cr ack", + "▁ste ep", + "▁mut tered", + "▁estab lish", + "▁dar ling", + "▁and rew", + "▁ch im", + "qu is", + "▁qu ality", + "▁po lly", + "▁che ck", + "▁cra ft", + "▁trave ll", + "▁univers al", + "in ate", + "▁c ig", + "at ives", + "om p", + "ut en", + "▁j ac", + "▁jo b", + "▁sub m", + "▁read er", + "▁le is", + "▁em ph", + "▁surr ound", + "o x", + "p ent", + "it ate", + "▁ex tended", + "▁le v", + "▁over t", + "▁ret ired", + "▁pu zz", + "u able", + "▁li br", + "▁ch in", + "▁sp l", + "▁real ized", + "▁ca uses", + "▁pun ishment", + "▁phys ic", + "▁leis ure", + "c an", + "▁w ave", + "▁sh ake", + "▁char m", + "▁belong ed", + "m ber", + "▁b ones", + "▁g as", + "▁r ange", + "▁pre c", + "▁sm ell", + "▁may be", + "▁inv ited", + "▁troub les", + "▁t ables", + "an ch", + "ic ip", + "▁j une", + "▁ab o", + "▁ag es", + "▁any where", + "ff in", + "▁dr unk", + "▁proper ly", + "▁loc al", + "▁impro ve", + "▁atmosp here", + "▁d ir", + "▁he 'll", + "▁re b", + "▁r ang", + "▁comp ass", + "▁lie uten", + "▁lean ed", + "▁firm ly", + "▁n ations", + "▁ha y", + "▁we pt", + "▁r al", + "▁con ven", + "▁un iform", + "▁jul ia", + "e em", + "r ass", + "▁tr ack", + "▁comm er", + "▁bus hes", + "▁obs c", + "▁sort s", + "▁difficult ies", + "▁intellect ual", + "▁introdu ced", + "m ith", + "▁t ro", + "id ay", + "▁re ndered", + "▁r out", + "ad d", + "▁pl un", + "▁thr owing", + "▁hum ble", + "▁pol ite", + "▁num erous", + "▁move ments", + "▁success ful", + "▁cand le", + "▁separ ate", + "▁protect ion", + "▁thom as", + "▁enorm ous", + "▁un b", + "▁rep ub", + "▁sun sh", + "▁desce nded", + "▁unus ual", + "i ved", + "▁bl az", + "▁show s", + "▁sim pl", + "▁cat tle", + "▁cre pt", + "▁aston ished", + "▁desert ed", + "▁l ap", + "ar se", + "▁ne arest", + "ud es", + "▁ent ering", + "▁ide al", + "stand ing", + "nd ers", + "▁so re", + "ain e", + "▁cl os", + "▁our s", + "▁where ver", + "▁ter m", + "▁vis ited", + "▁cal cul", + "d s", + "▁b ase", + "▁g ates", + "▁st amp", + "▁li ber", + "▁offic ial", + "▁e rect", + "▁al t", + "el ia", + "▁har mon", + "▁pain ful", + "▁burn ed", + "▁repub lic", + "u er", + "▁l ately", + "▁it al", + "am m", + "▁te ar", + "▁act ions", + "▁fin al", + "▁start led", + "▁sens ation", + "▁fat al", + "ol ic", + "▁fl ash", + "▁app et", + "▁strong er", + "▁num bers", + "▁grat itude", + "▁fem ale", + "▁wes tern", + "l est" + ] + } +} \ No newline at end of file diff --git a/out/checkpoint-20000/tokenizer_config.json b/out/checkpoint-20000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..0073e6415da746fc5c44a52e02785cb94510efa4 --- /dev/null +++ b/out/checkpoint-20000/tokenizer_config.json @@ -0,0 +1,9253 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "<|audio:0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|audio:1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|audio:2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "<|audio:3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "<|audio:4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "<|audio:5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "<|audio:6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "<|audio:7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "8": { + "content": "<|audio:8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9": { + "content": "<|audio:9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10": { + "content": "<|audio:10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "11": { + "content": "<|audio:11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "12": { + "content": "<|audio:12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13": { + "content": "<|audio:13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "14": { + "content": "<|audio:14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "15": { + "content": "<|audio:15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "16": { + "content": "<|audio:16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17": { + "content": "<|audio:17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "18": { + "content": "<|audio:18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19": { + "content": "<|audio:19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20": { + "content": "<|audio:20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21": { + "content": "<|audio:21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22": { + "content": "<|audio:22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "23": { + "content": "<|audio:23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "24": { + "content": "<|audio:24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "25": { + "content": "<|audio:25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "26": { + "content": "<|audio:26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27": { + "content": "<|audio:27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "<|audio:28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "<|audio:29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "<|audio:30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "<|audio:31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "<|audio:32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "33": { + "content": "<|audio:33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34": { + "content": "<|audio:34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "35": { + "content": "<|audio:35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "36": { + "content": "<|audio:36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "37": { + "content": "<|audio:37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "38": { + "content": "<|audio:38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39": { + "content": "<|audio:39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "40": { + "content": "<|audio:40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "41": { + "content": "<|audio:41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "42": { + "content": "<|audio:42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43": { + "content": "<|audio:43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "44": { + "content": "<|audio:44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "45": { + "content": "<|audio:45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46": { + "content": "<|audio:46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47": { + "content": "<|audio:47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "48": { + "content": "<|audio:48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "49": { + "content": "<|audio:49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50": { + "content": "<|audio:50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "51": { + "content": "<|audio:51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52": { + "content": "<|audio:52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "53": { + "content": "<|audio:53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54": { + "content": "<|audio:54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "55": { + "content": "<|audio:55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "56": { + "content": "<|audio:56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "57": { + "content": "<|audio:57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58": { + "content": "<|audio:58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "59": { + "content": "<|audio:59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "60": { + "content": "<|audio:60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "61": { + "content": "<|audio:61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "62": { + "content": "<|audio:62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "63": { + "content": "<|audio:63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64": { + "content": "<|audio:64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "65": { + "content": "<|audio:65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66": { + "content": "<|audio:66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "67": { + "content": "<|audio:67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68": { + "content": "<|audio:68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "69": { + "content": "<|audio:69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70": { + "content": "<|audio:70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "71": { + "content": "<|audio:71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72": { + "content": "<|audio:72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "73": { + "content": "<|audio:73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74": { + "content": "<|audio:74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "75": { + "content": "<|audio:75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "76": { + "content": "<|audio:76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77": { + "content": "<|audio:77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "78": { + "content": "<|audio:78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "<|audio:79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "<|audio:80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81": { + "content": "<|audio:81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82": { + "content": "<|audio:82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "83": { + "content": "<|audio:83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84": { + "content": "<|audio:84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "85": { + "content": "<|audio:85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86": { + "content": "<|audio:86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "87": { + "content": "<|audio:87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "88": { + "content": "<|audio:88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "89": { + "content": "<|audio:89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "90": { + "content": "<|audio:90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "91": { + "content": "<|audio:91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92": { + "content": "<|audio:92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93": { + "content": "<|audio:93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94": { + "content": "<|audio:94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95": { + "content": "<|audio:95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "96": { + "content": "<|audio:96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "97": { + "content": "<|audio:97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "98": { + "content": "<|audio:98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "99": { + "content": "<|audio:99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100": { + "content": "<|audio:100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "<|audio:101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "<|audio:102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "<|audio:103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104": { + "content": "<|audio:104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "105": { + "content": "<|audio:105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "<|audio:106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "<|audio:107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "<|audio:108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "109": { + "content": "<|audio:109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110": { + "content": "<|audio:110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "111": { + "content": "<|audio:111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "112": { + "content": "<|audio:112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "113": { + "content": "<|audio:113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "114": { + "content": "<|audio:114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "115": { + "content": "<|audio:115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "116": { + "content": "<|audio:116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117": { + "content": "<|audio:117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118": { + "content": "<|audio:118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "119": { + "content": "<|audio:119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "120": { + "content": "<|audio:120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "121": { + "content": "<|audio:121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "122": { + "content": "<|audio:122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "123": { + "content": "<|audio:123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "124": { + "content": "<|audio:124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "125": { + "content": "<|audio:125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126": { + "content": "<|audio:126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "127": { + "content": "<|audio:127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128": { + "content": "<|audio:128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "129": { + "content": "<|audio:129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130": { + "content": "<|audio:130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "131": { + "content": "<|audio:131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "132": { + "content": "<|audio:132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "133": { + "content": "<|audio:133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "134": { + "content": "<|audio:134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "135": { + "content": "<|audio:135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "136": { + "content": "<|audio:136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "137": { + "content": "<|audio:137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "138": { + "content": "<|audio:138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "139": { + "content": "<|audio:139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "140": { + "content": "<|audio:140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "141": { + "content": "<|audio:141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "142": { + "content": "<|audio:142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "143": { + "content": "<|audio:143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "144": { + "content": "<|audio:144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "145": { + "content": "<|audio:145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "146": { + "content": "<|audio:146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147": { + "content": "<|audio:147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "148": { + "content": "<|audio:148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "149": { + "content": "<|audio:149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "150": { + "content": "<|audio:150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151": { + "content": "<|audio:151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152": { + "content": "<|audio:152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "153": { + "content": "<|audio:153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "154": { + "content": "<|audio:154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155": { + "content": "<|audio:155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "156": { + "content": "<|audio:156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "157": { + "content": "<|audio:157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158": { + "content": "<|audio:158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "159": { + "content": "<|audio:159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "160": { + "content": "<|audio:160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "161": { + "content": "<|audio:161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162": { + "content": "<|audio:162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "163": { + "content": "<|audio:163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "164": { + "content": "<|audio:164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "165": { + "content": "<|audio:165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166": { + "content": "<|audio:166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "167": { + "content": "<|audio:167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "168": { + "content": "<|audio:168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "169": { + "content": "<|audio:169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "170": { + "content": "<|audio:170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171": { + "content": "<|audio:171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172": { + "content": "<|audio:172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "173": { + "content": "<|audio:173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "174": { + "content": "<|audio:174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175": { + "content": "<|audio:175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "176": { + "content": "<|audio:176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177": { + "content": "<|audio:177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178": { + "content": "<|audio:178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "179": { + "content": "<|audio:179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "180": { + "content": "<|audio:180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "181": { + "content": "<|audio:181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "182": { + "content": "<|audio:182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "183": { + "content": "<|audio:183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "184": { + "content": "<|audio:184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "185": { + "content": "<|audio:185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186": { + "content": "<|audio:186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187": { + "content": "<|audio:187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "188": { + "content": "<|audio:188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "189": { + "content": "<|audio:189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "190": { + "content": "<|audio:190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "191": { + "content": "<|audio:191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "192": { + "content": "<|audio:192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "193": { + "content": "<|audio:193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "194": { + "content": "<|audio:194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "195": { + "content": "<|audio:195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "196": { + "content": "<|audio:196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "197": { + "content": "<|audio:197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "198": { + "content": "<|audio:198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199": { + "content": "<|audio:199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200": { + "content": "<|audio:200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "201": { + "content": "<|audio:201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "202": { + "content": "<|audio:202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "203": { + "content": "<|audio:203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "204": { + "content": "<|audio:204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "205": { + "content": "<|audio:205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "206": { + "content": "<|audio:206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "207": { + "content": "<|audio:207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "208": { + "content": "<|audio:208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "209": { + "content": "<|audio:209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "210": { + "content": "<|audio:210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "211": { + "content": "<|audio:211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "212": { + "content": "<|audio:212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "213": { + "content": "<|audio:213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "214": { + "content": "<|audio:214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "215": { + "content": "<|audio:215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "216": { + "content": "<|audio:216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "217": { + "content": "<|audio:217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "218": { + "content": "<|audio:218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "219": { + "content": "<|audio:219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "220": { + "content": "<|audio:220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "221": { + "content": "<|audio:221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "222": { + "content": "<|audio:222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "223": { + "content": "<|audio:223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "224": { + "content": "<|audio:224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "225": { + "content": "<|audio:225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "226": { + "content": "<|audio:226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "227": { + "content": "<|audio:227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "228": { + "content": "<|audio:228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "229": { + "content": "<|audio:229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "230": { + "content": "<|audio:230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "231": { + "content": "<|audio:231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "232": { + "content": "<|audio:232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "233": { + "content": "<|audio:233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "234": { + "content": "<|audio:234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "235": { + "content": "<|audio:235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "236": { + "content": "<|audio:236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "237": { + "content": "<|audio:237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "238": { + "content": "<|audio:238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "239": { + "content": "<|audio:239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "240": { + "content": "<|audio:240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "241": { + "content": "<|audio:241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "242": { + "content": "<|audio:242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "243": { + "content": "<|audio:243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "244": { + "content": "<|audio:244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "245": { + "content": "<|audio:245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "246": { + "content": "<|audio:246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "247": { + "content": "<|audio:247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "248": { + "content": "<|audio:248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "249": { + "content": "<|audio:249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250": { + "content": "<|audio:250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "251": { + "content": "<|audio:251|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "252": { + "content": "<|audio:252|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "253": { + "content": "<|audio:253|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "254": { + "content": "<|audio:254|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255": { + "content": "<|audio:255|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256": { + "content": "<|audio:256|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "257": { + "content": "<|audio:257|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "258": { + "content": "<|audio:258|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "<|audio:259|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "<|audio:260|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "<|audio:261|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "<|audio:262|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "<|audio:263|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "<|audio:264|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "<|audio:265|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "<|audio:266|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "<|audio:267|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "<|audio:268|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "<|audio:269|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "<|audio:270|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "<|audio:271|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "<|audio:272|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "<|audio:273|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "<|audio:274|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "<|audio:275|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "<|audio:276|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "<|audio:277|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "<|audio:278|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "<|audio:279|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "<|audio:280|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "<|audio:281|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "<|audio:282|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "<|audio:283|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "<|audio:284|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "<|audio:285|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "<|audio:286|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "<|audio:287|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "<|audio:288|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "<|audio:289|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "<|audio:290|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "<|audio:291|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "<|audio:292|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "<|audio:293|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "<|audio:294|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "<|audio:295|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "<|audio:296|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "<|audio:297|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "<|audio:298|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "<|audio:299|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "<|audio:300|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "<|audio:301|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "<|audio:302|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "<|audio:303|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "<|audio:304|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "<|audio:305|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "<|audio:306|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "<|audio:307|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "<|audio:308|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "<|audio:309|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "<|audio:310|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "<|audio:311|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "<|audio:312|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "<|audio:313|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "<|audio:314|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "<|audio:315|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "<|audio:316|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "<|audio:317|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "<|audio:318|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "<|audio:319|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "<|audio:320|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "<|audio:321|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "<|audio:322|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "<|audio:323|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "<|audio:324|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "<|audio:325|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "<|audio:326|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "<|audio:327|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "<|audio:328|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "<|audio:329|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "<|audio:330|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "<|audio:331|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "<|audio:332|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "<|audio:333|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "<|audio:334|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "<|audio:335|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "<|audio:336|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "<|audio:337|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "<|audio:338|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "<|audio:339|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "<|audio:340|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "<|audio:341|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "<|audio:342|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "<|audio:343|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "<|audio:344|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "<|audio:345|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "<|audio:346|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "<|audio:347|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "<|audio:348|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "<|audio:349|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "<|audio:350|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "<|audio:351|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "<|audio:352|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "<|audio:353|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "<|audio:354|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "<|audio:355|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "<|audio:356|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "<|audio:357|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "<|audio:358|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "<|audio:359|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "<|audio:360|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "<|audio:361|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "<|audio:362|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "<|audio:363|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "<|audio:364|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "<|audio:365|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "<|audio:366|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "<|audio:367|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "<|audio:368|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "<|audio:369|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "<|audio:370|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "<|audio:371|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "<|audio:372|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "<|audio:373|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "<|audio:374|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "<|audio:375|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "<|audio:376|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "<|audio:377|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "<|audio:378|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "<|audio:379|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "<|audio:380|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "<|audio:381|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "<|audio:382|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "<|audio:383|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "384": { + "content": "<|audio:384|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "385": { + "content": "<|audio:385|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "386": { + "content": "<|audio:386|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "387": { + "content": "<|audio:387|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "388": { + "content": "<|audio:388|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "389": { + "content": "<|audio:389|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "390": { + "content": "<|audio:390|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "391": { + "content": "<|audio:391|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "392": { + "content": "<|audio:392|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "393": { + "content": "<|audio:393|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "394": { + "content": "<|audio:394|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "395": { + "content": "<|audio:395|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "396": { + "content": "<|audio:396|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "397": { + "content": "<|audio:397|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "398": { + "content": "<|audio:398|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "399": { + "content": "<|audio:399|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "400": { + "content": "<|audio:400|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "401": { + "content": "<|audio:401|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "402": { + "content": "<|audio:402|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "403": { + "content": "<|audio:403|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "404": { + "content": "<|audio:404|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "405": { + "content": "<|audio:405|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "406": { + "content": "<|audio:406|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "407": { + "content": "<|audio:407|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "408": { + "content": "<|audio:408|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "409": { + "content": "<|audio:409|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "410": { + "content": "<|audio:410|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "411": { + "content": "<|audio:411|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "412": { + "content": "<|audio:412|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "413": { + "content": "<|audio:413|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "414": { + "content": "<|audio:414|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "415": { + "content": "<|audio:415|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "416": { + "content": "<|audio:416|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "417": { + "content": "<|audio:417|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "418": { + "content": "<|audio:418|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "419": { + "content": "<|audio:419|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "420": { + "content": "<|audio:420|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "421": { + "content": "<|audio:421|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "422": { + "content": "<|audio:422|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "423": { + "content": "<|audio:423|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "424": { + "content": "<|audio:424|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "425": { + "content": "<|audio:425|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "426": { + "content": "<|audio:426|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "427": { + "content": "<|audio:427|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "428": { + "content": "<|audio:428|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "429": { + "content": "<|audio:429|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "430": { + "content": "<|audio:430|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "431": { + "content": "<|audio:431|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "432": { + "content": "<|audio:432|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "433": { + "content": "<|audio:433|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "434": { + "content": "<|audio:434|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "435": { + "content": "<|audio:435|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "436": { + "content": "<|audio:436|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "437": { + "content": "<|audio:437|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "438": { + "content": "<|audio:438|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "439": { + "content": "<|audio:439|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "440": { + "content": "<|audio:440|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "441": { + "content": "<|audio:441|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "442": { + "content": "<|audio:442|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "443": { + "content": "<|audio:443|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "444": { + "content": "<|audio:444|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "445": { + "content": "<|audio:445|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "446": { + "content": "<|audio:446|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "447": { + "content": "<|audio:447|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "448": { + "content": "<|audio:448|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "449": { + "content": "<|audio:449|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "450": { + "content": "<|audio:450|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "451": { + "content": "<|audio:451|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "452": { + "content": "<|audio:452|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "453": { + "content": "<|audio:453|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "454": { + "content": "<|audio:454|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "455": { + "content": "<|audio:455|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "456": { + "content": "<|audio:456|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "457": { + "content": "<|audio:457|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "458": { + "content": "<|audio:458|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "459": { + "content": "<|audio:459|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "460": { + "content": "<|audio:460|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "461": { + "content": "<|audio:461|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "462": { + "content": "<|audio:462|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "463": { + "content": "<|audio:463|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "464": { + "content": "<|audio:464|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "465": { + "content": "<|audio:465|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "466": { + "content": "<|audio:466|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "467": { + "content": "<|audio:467|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "468": { + "content": "<|audio:468|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "469": { + "content": "<|audio:469|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "470": { + "content": "<|audio:470|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "471": { + "content": "<|audio:471|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "472": { + "content": "<|audio:472|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "473": { + "content": "<|audio:473|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "474": { + "content": "<|audio:474|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "475": { + "content": "<|audio:475|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "476": { + "content": "<|audio:476|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "477": { + "content": "<|audio:477|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "478": { + "content": "<|audio:478|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "479": { + "content": "<|audio:479|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "480": { + "content": "<|audio:480|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "481": { + "content": "<|audio:481|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "482": { + "content": "<|audio:482|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "483": { + "content": "<|audio:483|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "484": { + "content": "<|audio:484|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "485": { + "content": "<|audio:485|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "486": { + "content": "<|audio:486|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "487": { + "content": "<|audio:487|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "488": { + "content": "<|audio:488|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "489": { + "content": "<|audio:489|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "490": { + "content": "<|audio:490|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "491": { + "content": "<|audio:491|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "492": { + "content": "<|audio:492|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "493": { + "content": "<|audio:493|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "494": { + "content": "<|audio:494|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "495": { + "content": "<|audio:495|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "496": { + "content": "<|audio:496|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "497": { + "content": "<|audio:497|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "498": { + "content": "<|audio:498|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "499": { + "content": "<|audio:499|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "500": { + "content": "<|audio:500|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "501": { + "content": "<|audio:501|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "502": { + "content": "<|audio:502|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "503": { + "content": "<|audio:503|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "504": { + "content": "<|audio:504|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "505": { + "content": "<|audio:505|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "506": { + "content": "<|audio:506|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "507": { + "content": "<|audio:507|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "508": { + "content": "<|audio:508|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "509": { + "content": "<|audio:509|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "510": { + "content": "<|audio:510|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "511": { + "content": "<|audio:511|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "512": { + "content": "<|audio:512|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "513": { + "content": "<|audio:513|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "514": { + "content": "<|audio:514|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "515": { + "content": "<|audio:515|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "516": { + "content": "<|audio:516|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "517": { + "content": "<|audio:517|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "518": { + "content": "<|audio:518|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "519": { + "content": "<|audio:519|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "520": { + "content": "<|audio:520|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "521": { + "content": "<|audio:521|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "522": { + "content": "<|audio:522|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "523": { + "content": "<|audio:523|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "524": { + "content": "<|audio:524|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "525": { + "content": "<|audio:525|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "526": { + "content": "<|audio:526|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "527": { + "content": "<|audio:527|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "528": { + "content": "<|audio:528|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "529": { + "content": "<|audio:529|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "530": { + "content": "<|audio:530|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "531": { + "content": "<|audio:531|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "532": { + "content": "<|audio:532|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "533": { + "content": "<|audio:533|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "534": { + "content": "<|audio:534|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "535": { + "content": "<|audio:535|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "536": { + "content": "<|audio:536|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "537": { + "content": "<|audio:537|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "538": { + "content": "<|audio:538|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "539": { + "content": "<|audio:539|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "540": { + "content": "<|audio:540|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "541": { + "content": "<|audio:541|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "542": { + "content": "<|audio:542|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "543": { + "content": "<|audio:543|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "544": { + "content": "<|audio:544|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "545": { + "content": "<|audio:545|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "546": { + "content": "<|audio:546|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "547": { + "content": "<|audio:547|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "548": { + "content": "<|audio:548|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "549": { + "content": "<|audio:549|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "550": { + "content": "<|audio:550|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "551": { + "content": "<|audio:551|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "552": { + "content": "<|audio:552|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "553": { + "content": "<|audio:553|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "554": { + "content": "<|audio:554|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "555": { + "content": "<|audio:555|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "556": { + "content": "<|audio:556|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "557": { + "content": "<|audio:557|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "558": { + "content": "<|audio:558|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "559": { + "content": "<|audio:559|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "560": { + "content": "<|audio:560|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "561": { + "content": "<|audio:561|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "562": { + "content": "<|audio:562|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "563": { + "content": "<|audio:563|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "564": { + "content": "<|audio:564|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "565": { + "content": "<|audio:565|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "566": { + "content": "<|audio:566|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "567": { + "content": "<|audio:567|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "568": { + "content": "<|audio:568|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "569": { + "content": "<|audio:569|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "570": { + "content": "<|audio:570|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "571": { + "content": "<|audio:571|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "572": { + "content": "<|audio:572|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "573": { + "content": "<|audio:573|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "574": { + "content": "<|audio:574|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "575": { + "content": "<|audio:575|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "576": { + "content": "<|audio:576|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "577": { + "content": "<|audio:577|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "578": { + "content": "<|audio:578|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "579": { + "content": "<|audio:579|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "580": { + "content": "<|audio:580|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "581": { + "content": "<|audio:581|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "582": { + "content": "<|audio:582|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "583": { + "content": "<|audio:583|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "584": { + "content": "<|audio:584|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "585": { + "content": "<|audio:585|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "586": { + "content": "<|audio:586|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "587": { + "content": "<|audio:587|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "588": { + "content": "<|audio:588|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "589": { + "content": "<|audio:589|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "590": { + "content": "<|audio:590|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "591": { + "content": "<|audio:591|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "592": { + "content": "<|audio:592|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "593": { + "content": "<|audio:593|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "594": { + "content": "<|audio:594|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "595": { + "content": "<|audio:595|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "596": { + "content": "<|audio:596|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "597": { + "content": "<|audio:597|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "598": { + "content": "<|audio:598|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "599": { + "content": "<|audio:599|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "600": { + "content": "<|audio:600|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "601": { + "content": "<|audio:601|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "602": { + "content": "<|audio:602|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "603": { + "content": "<|audio:603|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "604": { + "content": "<|audio:604|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "605": { + "content": "<|audio:605|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "606": { + "content": "<|audio:606|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "607": { + "content": "<|audio:607|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "608": { + "content": "<|audio:608|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "609": { + "content": "<|audio:609|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "610": { + "content": "<|audio:610|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "611": { + "content": "<|audio:611|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "612": { + "content": "<|audio:612|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "613": { + "content": "<|audio:613|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "614": { + "content": "<|audio:614|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "615": { + "content": "<|audio:615|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "616": { + "content": "<|audio:616|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "617": { + "content": "<|audio:617|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "618": { + "content": "<|audio:618|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "619": { + "content": "<|audio:619|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "620": { + "content": "<|audio:620|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "621": { + "content": "<|audio:621|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "622": { + "content": "<|audio:622|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "623": { + "content": "<|audio:623|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "624": { + "content": "<|audio:624|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "625": { + "content": "<|audio:625|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "626": { + "content": "<|audio:626|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "627": { + "content": "<|audio:627|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "628": { + "content": "<|audio:628|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "629": { + "content": "<|audio:629|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "630": { + "content": "<|audio:630|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "631": { + "content": "<|audio:631|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "632": { + "content": "<|audio:632|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "633": { + "content": "<|audio:633|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "634": { + "content": "<|audio:634|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "635": { + "content": "<|audio:635|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "636": { + "content": "<|audio:636|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "637": { + "content": "<|audio:637|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "638": { + "content": "<|audio:638|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "639": { + "content": "<|audio:639|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "640": { + "content": "<|audio:640|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "641": { + "content": "<|audio:641|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "642": { + "content": "<|audio:642|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "643": { + "content": "<|audio:643|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "644": { + "content": "<|audio:644|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "645": { + "content": "<|audio:645|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "646": { + "content": "<|audio:646|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "647": { + "content": "<|audio:647|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "648": { + "content": "<|audio:648|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "649": { + "content": "<|audio:649|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "650": { + "content": "<|audio:650|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "651": { + "content": "<|audio:651|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "652": { + "content": "<|audio:652|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "653": { + "content": "<|audio:653|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "654": { + "content": "<|audio:654|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "655": { + "content": "<|audio:655|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "656": { + "content": "<|audio:656|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "657": { + "content": "<|audio:657|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "658": { + "content": "<|audio:658|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "659": { + "content": "<|audio:659|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "660": { + "content": "<|audio:660|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "661": { + "content": "<|audio:661|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "662": { + "content": "<|audio:662|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "663": { + "content": "<|audio:663|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "664": { + "content": "<|audio:664|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "665": { + "content": "<|audio:665|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "666": { + "content": "<|audio:666|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "667": { + "content": "<|audio:667|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "668": { + "content": "<|audio:668|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "669": { + "content": "<|audio:669|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "670": { + "content": "<|audio:670|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "671": { + "content": "<|audio:671|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "672": { + "content": "<|audio:672|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "673": { + "content": "<|audio:673|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "674": { + "content": "<|audio:674|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "675": { + "content": "<|audio:675|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "676": { + "content": "<|audio:676|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "677": { + "content": "<|audio:677|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "678": { + "content": "<|audio:678|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "679": { + "content": "<|audio:679|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "680": { + "content": "<|audio:680|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "681": { + "content": "<|audio:681|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "682": { + "content": "<|audio:682|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "683": { + "content": "<|audio:683|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "684": { + "content": "<|audio:684|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "685": { + "content": "<|audio:685|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "686": { + "content": "<|audio:686|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "687": { + "content": "<|audio:687|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "688": { + "content": "<|audio:688|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "689": { + "content": "<|audio:689|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "690": { + "content": "<|audio:690|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "691": { + "content": "<|audio:691|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "692": { + "content": "<|audio:692|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "693": { + "content": "<|audio:693|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "694": { + "content": "<|audio:694|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "695": { + "content": "<|audio:695|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "696": { + "content": "<|audio:696|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "697": { + "content": "<|audio:697|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "698": { + "content": "<|audio:698|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "699": { + "content": "<|audio:699|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "700": { + "content": "<|audio:700|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "701": { + "content": "<|audio:701|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "702": { + "content": "<|audio:702|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "703": { + "content": "<|audio:703|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "704": { + "content": "<|audio:704|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "705": { + "content": "<|audio:705|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "706": { + "content": "<|audio:706|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "707": { + "content": "<|audio:707|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "708": { + "content": "<|audio:708|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "709": { + "content": "<|audio:709|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "710": { + "content": "<|audio:710|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "711": { + "content": "<|audio:711|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "712": { + "content": "<|audio:712|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "713": { + "content": "<|audio:713|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "714": { + "content": "<|audio:714|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "715": { + "content": "<|audio:715|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "716": { + "content": "<|audio:716|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "717": { + "content": "<|audio:717|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "718": { + "content": "<|audio:718|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "719": { + "content": "<|audio:719|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "720": { + "content": "<|audio:720|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "721": { + "content": "<|audio:721|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "722": { + "content": "<|audio:722|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "723": { + "content": "<|audio:723|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "724": { + "content": "<|audio:724|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "725": { + "content": "<|audio:725|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "726": { + "content": "<|audio:726|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "727": { + "content": "<|audio:727|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "728": { + "content": "<|audio:728|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "729": { + "content": "<|audio:729|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "730": { + "content": "<|audio:730|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "731": { + "content": "<|audio:731|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "732": { + "content": "<|audio:732|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "733": { + "content": "<|audio:733|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "734": { + "content": "<|audio:734|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "735": { + "content": "<|audio:735|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "736": { + "content": "<|audio:736|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "737": { + "content": "<|audio:737|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "738": { + "content": "<|audio:738|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "739": { + "content": "<|audio:739|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "740": { + "content": "<|audio:740|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "741": { + "content": "<|audio:741|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "742": { + "content": "<|audio:742|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "743": { + "content": "<|audio:743|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "744": { + "content": "<|audio:744|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "745": { + "content": "<|audio:745|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "746": { + "content": "<|audio:746|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "747": { + "content": "<|audio:747|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "748": { + "content": "<|audio:748|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "749": { + "content": "<|audio:749|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "750": { + "content": "<|audio:750|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "751": { + "content": "<|audio:751|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "752": { + "content": "<|audio:752|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "753": { + "content": "<|audio:753|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "754": { + "content": "<|audio:754|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "755": { + "content": "<|audio:755|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "756": { + "content": "<|audio:756|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "757": { + "content": "<|audio:757|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "758": { + "content": "<|audio:758|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "759": { + "content": "<|audio:759|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "760": { + "content": "<|audio:760|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "761": { + "content": "<|audio:761|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "762": { + "content": "<|audio:762|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "763": { + "content": "<|audio:763|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "764": { + "content": "<|audio:764|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "765": { + "content": "<|audio:765|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "766": { + "content": "<|audio:766|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "767": { + "content": "<|audio:767|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "768": { + "content": "<|audio:768|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "769": { + "content": "<|audio:769|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "770": { + "content": "<|audio:770|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "771": { + "content": "<|audio:771|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "772": { + "content": "<|audio:772|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "773": { + "content": "<|audio:773|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "774": { + "content": "<|audio:774|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "775": { + "content": "<|audio:775|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "776": { + "content": "<|audio:776|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "777": { + "content": "<|audio:777|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "778": { + "content": "<|audio:778|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "779": { + "content": "<|audio:779|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "780": { + "content": "<|audio:780|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "781": { + "content": "<|audio:781|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "782": { + "content": "<|audio:782|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "783": { + "content": "<|audio:783|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "784": { + "content": "<|audio:784|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "785": { + "content": "<|audio:785|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "786": { + "content": "<|audio:786|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "787": { + "content": "<|audio:787|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "788": { + "content": "<|audio:788|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "789": { + "content": "<|audio:789|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "790": { + "content": "<|audio:790|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "791": { + "content": "<|audio:791|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "792": { + "content": "<|audio:792|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "793": { + "content": "<|audio:793|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "794": { + "content": "<|audio:794|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "795": { + "content": "<|audio:795|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "796": { + "content": "<|audio:796|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "797": { + "content": "<|audio:797|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "798": { + "content": "<|audio:798|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "799": { + "content": "<|audio:799|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "800": { + "content": "<|audio:800|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "801": { + "content": "<|audio:801|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "802": { + "content": "<|audio:802|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "803": { + "content": "<|audio:803|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "804": { + "content": "<|audio:804|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "805": { + "content": "<|audio:805|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "806": { + "content": "<|audio:806|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "807": { + "content": "<|audio:807|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "808": { + "content": "<|audio:808|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "809": { + "content": "<|audio:809|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "810": { + "content": "<|audio:810|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "811": { + "content": "<|audio:811|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "812": { + "content": "<|audio:812|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "813": { + "content": "<|audio:813|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "814": { + "content": "<|audio:814|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "815": { + "content": "<|audio:815|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "816": { + "content": "<|audio:816|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "817": { + "content": "<|audio:817|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "818": { + "content": "<|audio:818|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "819": { + "content": "<|audio:819|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "820": { + "content": "<|audio:820|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "821": { + "content": "<|audio:821|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "822": { + "content": "<|audio:822|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "823": { + "content": "<|audio:823|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "824": { + "content": "<|audio:824|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "825": { + "content": "<|audio:825|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "826": { + "content": "<|audio:826|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "827": { + "content": "<|audio:827|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "828": { + "content": "<|audio:828|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "829": { + "content": "<|audio:829|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "830": { + "content": "<|audio:830|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "831": { + "content": "<|audio:831|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "832": { + "content": "<|audio:832|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "833": { + "content": "<|audio:833|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "834": { + "content": "<|audio:834|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "835": { + "content": "<|audio:835|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "836": { + "content": "<|audio:836|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "837": { + "content": "<|audio:837|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "838": { + "content": "<|audio:838|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "839": { + "content": "<|audio:839|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "840": { + "content": "<|audio:840|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "841": { + "content": "<|audio:841|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "842": { + "content": "<|audio:842|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "843": { + "content": "<|audio:843|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "844": { + "content": "<|audio:844|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "845": { + "content": "<|audio:845|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "846": { + "content": "<|audio:846|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "847": { + "content": "<|audio:847|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "848": { + "content": "<|audio:848|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "849": { + "content": "<|audio:849|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "850": { + "content": "<|audio:850|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "851": { + "content": "<|audio:851|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "852": { + "content": "<|audio:852|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "853": { + "content": "<|audio:853|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "854": { + "content": "<|audio:854|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "855": { + "content": "<|audio:855|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "856": { + "content": "<|audio:856|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "857": { + "content": "<|audio:857|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "858": { + "content": "<|audio:858|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "859": { + "content": "<|audio:859|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "860": { + "content": "<|audio:860|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "861": { + "content": "<|audio:861|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "862": { + "content": "<|audio:862|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "863": { + "content": "<|audio:863|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "864": { + "content": "<|audio:864|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "865": { + "content": "<|audio:865|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "866": { + "content": "<|audio:866|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "867": { + "content": "<|audio:867|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "868": { + "content": "<|audio:868|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "869": { + "content": "<|audio:869|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "870": { + "content": "<|audio:870|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "871": { + "content": "<|audio:871|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "872": { + "content": "<|audio:872|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "873": { + "content": "<|audio:873|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "874": { + "content": "<|audio:874|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "875": { + "content": "<|audio:875|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "876": { + "content": "<|audio:876|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "877": { + "content": "<|audio:877|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "878": { + "content": "<|audio:878|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "879": { + "content": "<|audio:879|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "880": { + "content": "<|audio:880|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "881": { + "content": "<|audio:881|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "882": { + "content": "<|audio:882|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "883": { + "content": "<|audio:883|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "884": { + "content": "<|audio:884|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "885": { + "content": "<|audio:885|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "886": { + "content": "<|audio:886|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "887": { + "content": "<|audio:887|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "888": { + "content": "<|audio:888|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "889": { + "content": "<|audio:889|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "890": { + "content": "<|audio:890|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "891": { + "content": "<|audio:891|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "892": { + "content": "<|audio:892|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "893": { + "content": "<|audio:893|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "894": { + "content": "<|audio:894|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "895": { + "content": "<|audio:895|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "896": { + "content": "<|audio:896|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "897": { + "content": "<|audio:897|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "898": { + "content": "<|audio:898|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "899": { + "content": "<|audio:899|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "900": { + "content": "<|audio:900|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "901": { + "content": "<|audio:901|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "902": { + "content": "<|audio:902|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "903": { + "content": "<|audio:903|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "904": { + "content": "<|audio:904|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "905": { + "content": "<|audio:905|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "906": { + "content": "<|audio:906|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "907": { + "content": "<|audio:907|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "908": { + "content": "<|audio:908|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "909": { + "content": "<|audio:909|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "910": { + "content": "<|audio:910|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "911": { + "content": "<|audio:911|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "912": { + "content": "<|audio:912|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "913": { + "content": "<|audio:913|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "914": { + "content": "<|audio:914|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "915": { + "content": "<|audio:915|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "916": { + "content": "<|audio:916|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "917": { + "content": "<|audio:917|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "918": { + "content": "<|audio:918|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "919": { + "content": "<|audio:919|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "920": { + "content": "<|audio:920|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "921": { + "content": "<|audio:921|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "922": { + "content": "<|audio:922|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "923": { + "content": "<|audio:923|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "924": { + "content": "<|audio:924|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "925": { + "content": "<|audio:925|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "926": { + "content": "<|audio:926|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "927": { + "content": "<|audio:927|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "928": { + "content": "<|audio:928|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "929": { + "content": "<|audio:929|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "930": { + "content": "<|audio:930|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "931": { + "content": "<|audio:931|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "932": { + "content": "<|audio:932|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "933": { + "content": "<|audio:933|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "934": { + "content": "<|audio:934|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "935": { + "content": "<|audio:935|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "936": { + "content": "<|audio:936|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "937": { + "content": "<|audio:937|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "938": { + "content": "<|audio:938|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "939": { + "content": "<|audio:939|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "940": { + "content": "<|audio:940|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "941": { + "content": "<|audio:941|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "942": { + "content": "<|audio:942|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "943": { + "content": "<|audio:943|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "944": { + "content": "<|audio:944|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "945": { + "content": "<|audio:945|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "946": { + "content": "<|audio:946|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "947": { + "content": "<|audio:947|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "948": { + "content": "<|audio:948|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "949": { + "content": "<|audio:949|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "950": { + "content": "<|audio:950|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "951": { + "content": "<|audio:951|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "952": { + "content": "<|audio:952|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "953": { + "content": "<|audio:953|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "954": { + "content": "<|audio:954|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "955": { + "content": "<|audio:955|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "956": { + "content": "<|audio:956|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "957": { + "content": "<|audio:957|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "958": { + "content": "<|audio:958|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "959": { + "content": "<|audio:959|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "960": { + "content": "<|audio:960|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "961": { + "content": "<|audio:961|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "962": { + "content": "<|audio:962|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "963": { + "content": "<|audio:963|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "964": { + "content": "<|audio:964|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "965": { + "content": "<|audio:965|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "966": { + "content": "<|audio:966|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "967": { + "content": "<|audio:967|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "968": { + "content": "<|audio:968|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "969": { + "content": "<|audio:969|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "970": { + "content": "<|audio:970|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "971": { + "content": "<|audio:971|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "972": { + "content": "<|audio:972|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "973": { + "content": "<|audio:973|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "974": { + "content": "<|audio:974|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "975": { + "content": "<|audio:975|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "976": { + "content": "<|audio:976|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "977": { + "content": "<|audio:977|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "978": { + "content": "<|audio:978|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "979": { + "content": "<|audio:979|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "980": { + "content": "<|audio:980|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "981": { + "content": "<|audio:981|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "982": { + "content": "<|audio:982|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "983": { + "content": "<|audio:983|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "984": { + "content": "<|audio:984|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "985": { + "content": "<|audio:985|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "986": { + "content": "<|audio:986|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "987": { + "content": "<|audio:987|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "988": { + "content": "<|audio:988|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "989": { + "content": "<|audio:989|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "990": { + "content": "<|audio:990|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "991": { + "content": "<|audio:991|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "992": { + "content": "<|audio:992|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "993": { + "content": "<|audio:993|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "994": { + "content": "<|audio:994|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "995": { + "content": "<|audio:995|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "996": { + "content": "<|audio:996|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "997": { + "content": "<|audio:997|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "998": { + "content": "<|audio:998|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "999": { + "content": "<|audio:999|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1000": { + "content": "<|audio:1000|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1001": { + "content": "<|audio:1001|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1002": { + "content": "<|audio:1002|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1003": { + "content": "<|audio:1003|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1004": { + "content": "<|audio:1004|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1005": { + "content": "<|audio:1005|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1006": { + "content": "<|audio:1006|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1007": { + "content": "<|audio:1007|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1008": { + "content": "<|audio:1008|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1009": { + "content": "<|audio:1009|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1010": { + "content": "<|audio:1010|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1011": { + "content": "<|audio:1011|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1012": { + "content": "<|audio:1012|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1013": { + "content": "<|audio:1013|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1014": { + "content": "<|audio:1014|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1015": { + "content": "<|audio:1015|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1016": { + "content": "<|audio:1016|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1017": { + "content": "<|audio:1017|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1018": { + "content": "<|audio:1018|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1019": { + "content": "<|audio:1019|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1020": { + "content": "<|audio:1020|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1021": { + "content": "<|audio:1021|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1022": { + "content": "<|audio:1022|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1023": { + "content": "<|audio:1023|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1024": { + "content": "<|startoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1025": { + "content": "<|endoftranscript|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1026": { + "content": "<|padding|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "clean_up_tokenization_spaces": true, + "model_max_length": 1877, + "pad_token": "<|padding|>", + "special_tokens": [ + "<|audio:0|>", + "<|audio:1|>", + "<|audio:2|>", + "<|audio:3|>", + "<|audio:4|>", + "<|audio:5|>", + "<|audio:6|>", + "<|audio:7|>", + "<|audio:8|>", + "<|audio:9|>", + "<|audio:10|>", + "<|audio:11|>", + "<|audio:12|>", + "<|audio:13|>", + "<|audio:14|>", + "<|audio:15|>", + "<|audio:16|>", + "<|audio:17|>", + "<|audio:18|>", + "<|audio:19|>", + "<|audio:20|>", + "<|audio:21|>", + "<|audio:22|>", + "<|audio:23|>", + "<|audio:24|>", + "<|audio:25|>", + "<|audio:26|>", + "<|audio:27|>", + "<|audio:28|>", + "<|audio:29|>", + "<|audio:30|>", + "<|audio:31|>", + "<|audio:32|>", + "<|audio:33|>", + "<|audio:34|>", + "<|audio:35|>", + "<|audio:36|>", + "<|audio:37|>", + "<|audio:38|>", + "<|audio:39|>", + "<|audio:40|>", + "<|audio:41|>", + "<|audio:42|>", + "<|audio:43|>", + "<|audio:44|>", + "<|audio:45|>", + "<|audio:46|>", + "<|audio:47|>", + "<|audio:48|>", + "<|audio:49|>", + "<|audio:50|>", + "<|audio:51|>", + "<|audio:52|>", + "<|audio:53|>", + "<|audio:54|>", + "<|audio:55|>", + "<|audio:56|>", + "<|audio:57|>", + "<|audio:58|>", + "<|audio:59|>", + "<|audio:60|>", + "<|audio:61|>", + "<|audio:62|>", + "<|audio:63|>", + "<|audio:64|>", + "<|audio:65|>", + "<|audio:66|>", + "<|audio:67|>", + "<|audio:68|>", + "<|audio:69|>", + "<|audio:70|>", + "<|audio:71|>", + "<|audio:72|>", + "<|audio:73|>", + "<|audio:74|>", + "<|audio:75|>", + "<|audio:76|>", + "<|audio:77|>", + "<|audio:78|>", + "<|audio:79|>", + "<|audio:80|>", + "<|audio:81|>", + "<|audio:82|>", + "<|audio:83|>", + "<|audio:84|>", + "<|audio:85|>", + "<|audio:86|>", + "<|audio:87|>", + "<|audio:88|>", + "<|audio:89|>", + "<|audio:90|>", + "<|audio:91|>", + "<|audio:92|>", + "<|audio:93|>", + "<|audio:94|>", + "<|audio:95|>", + "<|audio:96|>", + "<|audio:97|>", + "<|audio:98|>", + "<|audio:99|>", + "<|audio:100|>", + "<|audio:101|>", + "<|audio:102|>", + "<|audio:103|>", + "<|audio:104|>", + "<|audio:105|>", + "<|audio:106|>", + "<|audio:107|>", + "<|audio:108|>", + "<|audio:109|>", + "<|audio:110|>", + "<|audio:111|>", + "<|audio:112|>", + "<|audio:113|>", + "<|audio:114|>", + "<|audio:115|>", + "<|audio:116|>", + "<|audio:117|>", + "<|audio:118|>", + "<|audio:119|>", + "<|audio:120|>", + "<|audio:121|>", + "<|audio:122|>", + "<|audio:123|>", + "<|audio:124|>", + "<|audio:125|>", + "<|audio:126|>", + "<|audio:127|>", + "<|audio:128|>", + "<|audio:129|>", + "<|audio:130|>", + "<|audio:131|>", + "<|audio:132|>", + "<|audio:133|>", + "<|audio:134|>", + "<|audio:135|>", + "<|audio:136|>", + "<|audio:137|>", + "<|audio:138|>", + "<|audio:139|>", + "<|audio:140|>", + "<|audio:141|>", + "<|audio:142|>", + "<|audio:143|>", + "<|audio:144|>", + "<|audio:145|>", + "<|audio:146|>", + "<|audio:147|>", + "<|audio:148|>", + "<|audio:149|>", + "<|audio:150|>", + "<|audio:151|>", + "<|audio:152|>", + "<|audio:153|>", + "<|audio:154|>", + "<|audio:155|>", + "<|audio:156|>", + "<|audio:157|>", + "<|audio:158|>", + "<|audio:159|>", + "<|audio:160|>", + "<|audio:161|>", + "<|audio:162|>", + "<|audio:163|>", + "<|audio:164|>", + "<|audio:165|>", + "<|audio:166|>", + "<|audio:167|>", + "<|audio:168|>", + "<|audio:169|>", + "<|audio:170|>", + "<|audio:171|>", + "<|audio:172|>", + "<|audio:173|>", + "<|audio:174|>", + "<|audio:175|>", + "<|audio:176|>", + "<|audio:177|>", + "<|audio:178|>", + "<|audio:179|>", + "<|audio:180|>", + "<|audio:181|>", + "<|audio:182|>", + "<|audio:183|>", + "<|audio:184|>", + "<|audio:185|>", + "<|audio:186|>", + "<|audio:187|>", + "<|audio:188|>", + "<|audio:189|>", + "<|audio:190|>", + "<|audio:191|>", + "<|audio:192|>", + "<|audio:193|>", + "<|audio:194|>", + "<|audio:195|>", + "<|audio:196|>", + "<|audio:197|>", + "<|audio:198|>", + "<|audio:199|>", + "<|audio:200|>", + "<|audio:201|>", + "<|audio:202|>", + "<|audio:203|>", + "<|audio:204|>", + "<|audio:205|>", + "<|audio:206|>", + "<|audio:207|>", + "<|audio:208|>", + "<|audio:209|>", + "<|audio:210|>", + "<|audio:211|>", + "<|audio:212|>", + "<|audio:213|>", + "<|audio:214|>", + "<|audio:215|>", + "<|audio:216|>", + "<|audio:217|>", + "<|audio:218|>", + "<|audio:219|>", + "<|audio:220|>", + "<|audio:221|>", + "<|audio:222|>", + "<|audio:223|>", + "<|audio:224|>", + "<|audio:225|>", + "<|audio:226|>", + "<|audio:227|>", + "<|audio:228|>", + "<|audio:229|>", + "<|audio:230|>", + "<|audio:231|>", + "<|audio:232|>", + "<|audio:233|>", + "<|audio:234|>", + "<|audio:235|>", + "<|audio:236|>", + "<|audio:237|>", + "<|audio:238|>", + "<|audio:239|>", + "<|audio:240|>", + "<|audio:241|>", + "<|audio:242|>", + "<|audio:243|>", + "<|audio:244|>", + "<|audio:245|>", + "<|audio:246|>", + "<|audio:247|>", + "<|audio:248|>", + "<|audio:249|>", + "<|audio:250|>", + "<|audio:251|>", + "<|audio:252|>", + "<|audio:253|>", + "<|audio:254|>", + "<|audio:255|>", + "<|audio:256|>", + "<|audio:257|>", + "<|audio:258|>", + "<|audio:259|>", + "<|audio:260|>", + "<|audio:261|>", + "<|audio:262|>", + "<|audio:263|>", + "<|audio:264|>", + "<|audio:265|>", + "<|audio:266|>", + "<|audio:267|>", + "<|audio:268|>", + "<|audio:269|>", + "<|audio:270|>", + "<|audio:271|>", + "<|audio:272|>", + "<|audio:273|>", + "<|audio:274|>", + "<|audio:275|>", + "<|audio:276|>", + "<|audio:277|>", + "<|audio:278|>", + "<|audio:279|>", + "<|audio:280|>", + "<|audio:281|>", + "<|audio:282|>", + "<|audio:283|>", + "<|audio:284|>", + "<|audio:285|>", + "<|audio:286|>", + "<|audio:287|>", + "<|audio:288|>", + "<|audio:289|>", + "<|audio:290|>", + "<|audio:291|>", + "<|audio:292|>", + "<|audio:293|>", + "<|audio:294|>", + "<|audio:295|>", + "<|audio:296|>", + "<|audio:297|>", + "<|audio:298|>", + "<|audio:299|>", + "<|audio:300|>", + "<|audio:301|>", + "<|audio:302|>", + "<|audio:303|>", + "<|audio:304|>", + "<|audio:305|>", + "<|audio:306|>", + "<|audio:307|>", + "<|audio:308|>", + "<|audio:309|>", + "<|audio:310|>", + "<|audio:311|>", + "<|audio:312|>", + "<|audio:313|>", + "<|audio:314|>", + "<|audio:315|>", + "<|audio:316|>", + "<|audio:317|>", + "<|audio:318|>", + "<|audio:319|>", + "<|audio:320|>", + "<|audio:321|>", + "<|audio:322|>", + "<|audio:323|>", + "<|audio:324|>", + "<|audio:325|>", + "<|audio:326|>", + "<|audio:327|>", + "<|audio:328|>", + "<|audio:329|>", + "<|audio:330|>", + "<|audio:331|>", + "<|audio:332|>", + "<|audio:333|>", + "<|audio:334|>", + "<|audio:335|>", + "<|audio:336|>", + "<|audio:337|>", + "<|audio:338|>", + "<|audio:339|>", + "<|audio:340|>", + "<|audio:341|>", + "<|audio:342|>", + "<|audio:343|>", + "<|audio:344|>", + "<|audio:345|>", + "<|audio:346|>", + "<|audio:347|>", + "<|audio:348|>", + "<|audio:349|>", + "<|audio:350|>", + "<|audio:351|>", + "<|audio:352|>", + "<|audio:353|>", + "<|audio:354|>", + "<|audio:355|>", + "<|audio:356|>", + "<|audio:357|>", + "<|audio:358|>", + "<|audio:359|>", + "<|audio:360|>", + "<|audio:361|>", + "<|audio:362|>", + "<|audio:363|>", + "<|audio:364|>", + "<|audio:365|>", + "<|audio:366|>", + "<|audio:367|>", + "<|audio:368|>", + "<|audio:369|>", + "<|audio:370|>", + "<|audio:371|>", + "<|audio:372|>", + "<|audio:373|>", + "<|audio:374|>", + "<|audio:375|>", + "<|audio:376|>", + "<|audio:377|>", + "<|audio:378|>", + "<|audio:379|>", + "<|audio:380|>", + "<|audio:381|>", + "<|audio:382|>", + "<|audio:383|>", + "<|audio:384|>", + "<|audio:385|>", + "<|audio:386|>", + "<|audio:387|>", + "<|audio:388|>", + "<|audio:389|>", + "<|audio:390|>", + "<|audio:391|>", + "<|audio:392|>", + "<|audio:393|>", + "<|audio:394|>", + "<|audio:395|>", + "<|audio:396|>", + "<|audio:397|>", + "<|audio:398|>", + "<|audio:399|>", + "<|audio:400|>", + "<|audio:401|>", + "<|audio:402|>", + "<|audio:403|>", + "<|audio:404|>", + "<|audio:405|>", + "<|audio:406|>", + "<|audio:407|>", + "<|audio:408|>", + "<|audio:409|>", + "<|audio:410|>", + "<|audio:411|>", + "<|audio:412|>", + "<|audio:413|>", + "<|audio:414|>", + "<|audio:415|>", + "<|audio:416|>", + "<|audio:417|>", + "<|audio:418|>", + "<|audio:419|>", + "<|audio:420|>", + "<|audio:421|>", + "<|audio:422|>", + "<|audio:423|>", + "<|audio:424|>", + "<|audio:425|>", + "<|audio:426|>", + "<|audio:427|>", + "<|audio:428|>", + "<|audio:429|>", + "<|audio:430|>", + "<|audio:431|>", + "<|audio:432|>", + "<|audio:433|>", + "<|audio:434|>", + "<|audio:435|>", + "<|audio:436|>", + "<|audio:437|>", + "<|audio:438|>", + "<|audio:439|>", + "<|audio:440|>", + "<|audio:441|>", + "<|audio:442|>", + "<|audio:443|>", + "<|audio:444|>", + "<|audio:445|>", + "<|audio:446|>", + "<|audio:447|>", + "<|audio:448|>", + "<|audio:449|>", + "<|audio:450|>", + "<|audio:451|>", + "<|audio:452|>", + "<|audio:453|>", + "<|audio:454|>", + "<|audio:455|>", + "<|audio:456|>", + "<|audio:457|>", + "<|audio:458|>", + "<|audio:459|>", + "<|audio:460|>", + "<|audio:461|>", + "<|audio:462|>", + "<|audio:463|>", + "<|audio:464|>", + "<|audio:465|>", + "<|audio:466|>", + "<|audio:467|>", + "<|audio:468|>", + "<|audio:469|>", + "<|audio:470|>", + "<|audio:471|>", + "<|audio:472|>", + "<|audio:473|>", + "<|audio:474|>", + "<|audio:475|>", + "<|audio:476|>", + "<|audio:477|>", + "<|audio:478|>", + "<|audio:479|>", + "<|audio:480|>", + "<|audio:481|>", + "<|audio:482|>", + "<|audio:483|>", + "<|audio:484|>", + "<|audio:485|>", + "<|audio:486|>", + "<|audio:487|>", + "<|audio:488|>", + "<|audio:489|>", + "<|audio:490|>", + "<|audio:491|>", + "<|audio:492|>", + "<|audio:493|>", + "<|audio:494|>", + "<|audio:495|>", + "<|audio:496|>", + "<|audio:497|>", + "<|audio:498|>", + "<|audio:499|>", + "<|audio:500|>", + "<|audio:501|>", + "<|audio:502|>", + "<|audio:503|>", + "<|audio:504|>", + "<|audio:505|>", + "<|audio:506|>", + "<|audio:507|>", + "<|audio:508|>", + "<|audio:509|>", + "<|audio:510|>", + "<|audio:511|>", + "<|audio:512|>", + "<|audio:513|>", + "<|audio:514|>", + "<|audio:515|>", + "<|audio:516|>", + "<|audio:517|>", + "<|audio:518|>", + "<|audio:519|>", + "<|audio:520|>", + "<|audio:521|>", + "<|audio:522|>", + "<|audio:523|>", + "<|audio:524|>", + "<|audio:525|>", + "<|audio:526|>", + "<|audio:527|>", + "<|audio:528|>", + "<|audio:529|>", + "<|audio:530|>", + "<|audio:531|>", + "<|audio:532|>", + "<|audio:533|>", + "<|audio:534|>", + "<|audio:535|>", + "<|audio:536|>", + "<|audio:537|>", + "<|audio:538|>", + "<|audio:539|>", + "<|audio:540|>", + "<|audio:541|>", + "<|audio:542|>", + "<|audio:543|>", + "<|audio:544|>", + "<|audio:545|>", + "<|audio:546|>", + "<|audio:547|>", + "<|audio:548|>", + "<|audio:549|>", + "<|audio:550|>", + "<|audio:551|>", + "<|audio:552|>", + "<|audio:553|>", + "<|audio:554|>", + "<|audio:555|>", + "<|audio:556|>", + "<|audio:557|>", + "<|audio:558|>", + "<|audio:559|>", + "<|audio:560|>", + "<|audio:561|>", + "<|audio:562|>", + "<|audio:563|>", + "<|audio:564|>", + "<|audio:565|>", + "<|audio:566|>", + "<|audio:567|>", + "<|audio:568|>", + "<|audio:569|>", + "<|audio:570|>", + "<|audio:571|>", + "<|audio:572|>", + "<|audio:573|>", + "<|audio:574|>", + "<|audio:575|>", + "<|audio:576|>", + "<|audio:577|>", + "<|audio:578|>", + "<|audio:579|>", + "<|audio:580|>", + "<|audio:581|>", + "<|audio:582|>", + "<|audio:583|>", + "<|audio:584|>", + "<|audio:585|>", + "<|audio:586|>", + "<|audio:587|>", + "<|audio:588|>", + "<|audio:589|>", + "<|audio:590|>", + "<|audio:591|>", + "<|audio:592|>", + "<|audio:593|>", + "<|audio:594|>", + "<|audio:595|>", + "<|audio:596|>", + "<|audio:597|>", + "<|audio:598|>", + "<|audio:599|>", + "<|audio:600|>", + "<|audio:601|>", + "<|audio:602|>", + "<|audio:603|>", + "<|audio:604|>", + "<|audio:605|>", + "<|audio:606|>", + "<|audio:607|>", + "<|audio:608|>", + "<|audio:609|>", + "<|audio:610|>", + "<|audio:611|>", + "<|audio:612|>", + "<|audio:613|>", + "<|audio:614|>", + "<|audio:615|>", + "<|audio:616|>", + "<|audio:617|>", + "<|audio:618|>", + "<|audio:619|>", + "<|audio:620|>", + "<|audio:621|>", + "<|audio:622|>", + "<|audio:623|>", + "<|audio:624|>", + "<|audio:625|>", + "<|audio:626|>", + "<|audio:627|>", + "<|audio:628|>", + "<|audio:629|>", + "<|audio:630|>", + "<|audio:631|>", + "<|audio:632|>", + "<|audio:633|>", + "<|audio:634|>", + "<|audio:635|>", + "<|audio:636|>", + "<|audio:637|>", + "<|audio:638|>", + "<|audio:639|>", + "<|audio:640|>", + "<|audio:641|>", + "<|audio:642|>", + "<|audio:643|>", + "<|audio:644|>", + "<|audio:645|>", + "<|audio:646|>", + "<|audio:647|>", + "<|audio:648|>", + "<|audio:649|>", + "<|audio:650|>", + "<|audio:651|>", + "<|audio:652|>", + "<|audio:653|>", + "<|audio:654|>", + "<|audio:655|>", + "<|audio:656|>", + "<|audio:657|>", + "<|audio:658|>", + "<|audio:659|>", + "<|audio:660|>", + "<|audio:661|>", + "<|audio:662|>", + "<|audio:663|>", + "<|audio:664|>", + "<|audio:665|>", + "<|audio:666|>", + "<|audio:667|>", + "<|audio:668|>", + "<|audio:669|>", + "<|audio:670|>", + "<|audio:671|>", + "<|audio:672|>", + "<|audio:673|>", + "<|audio:674|>", + "<|audio:675|>", + "<|audio:676|>", + "<|audio:677|>", + "<|audio:678|>", + "<|audio:679|>", + "<|audio:680|>", + "<|audio:681|>", + "<|audio:682|>", + "<|audio:683|>", + "<|audio:684|>", + "<|audio:685|>", + "<|audio:686|>", + "<|audio:687|>", + "<|audio:688|>", + "<|audio:689|>", + "<|audio:690|>", + "<|audio:691|>", + "<|audio:692|>", + "<|audio:693|>", + "<|audio:694|>", + "<|audio:695|>", + "<|audio:696|>", + "<|audio:697|>", + "<|audio:698|>", + "<|audio:699|>", + "<|audio:700|>", + "<|audio:701|>", + "<|audio:702|>", + "<|audio:703|>", + "<|audio:704|>", + "<|audio:705|>", + "<|audio:706|>", + "<|audio:707|>", + "<|audio:708|>", + "<|audio:709|>", + "<|audio:710|>", + "<|audio:711|>", + "<|audio:712|>", + "<|audio:713|>", + "<|audio:714|>", + "<|audio:715|>", + "<|audio:716|>", + "<|audio:717|>", + "<|audio:718|>", + "<|audio:719|>", + "<|audio:720|>", + "<|audio:721|>", + "<|audio:722|>", + "<|audio:723|>", + "<|audio:724|>", + "<|audio:725|>", + "<|audio:726|>", + "<|audio:727|>", + "<|audio:728|>", + "<|audio:729|>", + "<|audio:730|>", + "<|audio:731|>", + "<|audio:732|>", + "<|audio:733|>", + "<|audio:734|>", + "<|audio:735|>", + "<|audio:736|>", + "<|audio:737|>", + "<|audio:738|>", + "<|audio:739|>", + "<|audio:740|>", + "<|audio:741|>", + "<|audio:742|>", + "<|audio:743|>", + "<|audio:744|>", + "<|audio:745|>", + "<|audio:746|>", + "<|audio:747|>", + "<|audio:748|>", + "<|audio:749|>", + "<|audio:750|>", + "<|audio:751|>", + "<|audio:752|>", + "<|audio:753|>", + "<|audio:754|>", + "<|audio:755|>", + "<|audio:756|>", + "<|audio:757|>", + "<|audio:758|>", + "<|audio:759|>", + "<|audio:760|>", + "<|audio:761|>", + "<|audio:762|>", + "<|audio:763|>", + "<|audio:764|>", + "<|audio:765|>", + "<|audio:766|>", + "<|audio:767|>", + "<|audio:768|>", + "<|audio:769|>", + "<|audio:770|>", + "<|audio:771|>", + "<|audio:772|>", + "<|audio:773|>", + "<|audio:774|>", + "<|audio:775|>", + "<|audio:776|>", + "<|audio:777|>", + "<|audio:778|>", + "<|audio:779|>", + "<|audio:780|>", + "<|audio:781|>", + "<|audio:782|>", + "<|audio:783|>", + "<|audio:784|>", + "<|audio:785|>", + "<|audio:786|>", + "<|audio:787|>", + "<|audio:788|>", + "<|audio:789|>", + "<|audio:790|>", + "<|audio:791|>", + "<|audio:792|>", + "<|audio:793|>", + "<|audio:794|>", + "<|audio:795|>", + "<|audio:796|>", + "<|audio:797|>", + "<|audio:798|>", + "<|audio:799|>", + "<|audio:800|>", + "<|audio:801|>", + "<|audio:802|>", + "<|audio:803|>", + "<|audio:804|>", + "<|audio:805|>", + "<|audio:806|>", + "<|audio:807|>", + "<|audio:808|>", + "<|audio:809|>", + "<|audio:810|>", + "<|audio:811|>", + "<|audio:812|>", + "<|audio:813|>", + "<|audio:814|>", + "<|audio:815|>", + "<|audio:816|>", + "<|audio:817|>", + "<|audio:818|>", + "<|audio:819|>", + "<|audio:820|>", + "<|audio:821|>", + "<|audio:822|>", + "<|audio:823|>", + "<|audio:824|>", + "<|audio:825|>", + "<|audio:826|>", + "<|audio:827|>", + "<|audio:828|>", + "<|audio:829|>", + "<|audio:830|>", + "<|audio:831|>", + "<|audio:832|>", + "<|audio:833|>", + "<|audio:834|>", + "<|audio:835|>", + "<|audio:836|>", + "<|audio:837|>", + "<|audio:838|>", + "<|audio:839|>", + "<|audio:840|>", + "<|audio:841|>", + "<|audio:842|>", + "<|audio:843|>", + "<|audio:844|>", + "<|audio:845|>", + "<|audio:846|>", + "<|audio:847|>", + "<|audio:848|>", + "<|audio:849|>", + "<|audio:850|>", + "<|audio:851|>", + "<|audio:852|>", + "<|audio:853|>", + "<|audio:854|>", + "<|audio:855|>", + "<|audio:856|>", + "<|audio:857|>", + "<|audio:858|>", + "<|audio:859|>", + "<|audio:860|>", + "<|audio:861|>", + "<|audio:862|>", + "<|audio:863|>", + "<|audio:864|>", + "<|audio:865|>", + "<|audio:866|>", + "<|audio:867|>", + "<|audio:868|>", + "<|audio:869|>", + "<|audio:870|>", + "<|audio:871|>", + "<|audio:872|>", + "<|audio:873|>", + "<|audio:874|>", + "<|audio:875|>", + "<|audio:876|>", + "<|audio:877|>", + "<|audio:878|>", + "<|audio:879|>", + "<|audio:880|>", + "<|audio:881|>", + "<|audio:882|>", + "<|audio:883|>", + "<|audio:884|>", + "<|audio:885|>", + "<|audio:886|>", + "<|audio:887|>", + "<|audio:888|>", + "<|audio:889|>", + "<|audio:890|>", + "<|audio:891|>", + "<|audio:892|>", + "<|audio:893|>", + "<|audio:894|>", + "<|audio:895|>", + "<|audio:896|>", + "<|audio:897|>", + "<|audio:898|>", + "<|audio:899|>", + "<|audio:900|>", + "<|audio:901|>", + "<|audio:902|>", + "<|audio:903|>", + "<|audio:904|>", + "<|audio:905|>", + "<|audio:906|>", + "<|audio:907|>", + "<|audio:908|>", + "<|audio:909|>", + "<|audio:910|>", + "<|audio:911|>", + "<|audio:912|>", + "<|audio:913|>", + "<|audio:914|>", + "<|audio:915|>", + "<|audio:916|>", + "<|audio:917|>", + "<|audio:918|>", + "<|audio:919|>", + "<|audio:920|>", + "<|audio:921|>", + "<|audio:922|>", + "<|audio:923|>", + "<|audio:924|>", + "<|audio:925|>", + "<|audio:926|>", + "<|audio:927|>", + "<|audio:928|>", + "<|audio:929|>", + "<|audio:930|>", + "<|audio:931|>", + "<|audio:932|>", + "<|audio:933|>", + "<|audio:934|>", + "<|audio:935|>", + "<|audio:936|>", + "<|audio:937|>", + "<|audio:938|>", + "<|audio:939|>", + "<|audio:940|>", + "<|audio:941|>", + "<|audio:942|>", + "<|audio:943|>", + "<|audio:944|>", + "<|audio:945|>", + "<|audio:946|>", + "<|audio:947|>", + "<|audio:948|>", + "<|audio:949|>", + "<|audio:950|>", + "<|audio:951|>", + "<|audio:952|>", + "<|audio:953|>", + "<|audio:954|>", + "<|audio:955|>", + "<|audio:956|>", + "<|audio:957|>", + "<|audio:958|>", + "<|audio:959|>", + "<|audio:960|>", + "<|audio:961|>", + "<|audio:962|>", + "<|audio:963|>", + "<|audio:964|>", + "<|audio:965|>", + "<|audio:966|>", + "<|audio:967|>", + "<|audio:968|>", + "<|audio:969|>", + "<|audio:970|>", + "<|audio:971|>", + "<|audio:972|>", + "<|audio:973|>", + "<|audio:974|>", + "<|audio:975|>", + "<|audio:976|>", + "<|audio:977|>", + "<|audio:978|>", + "<|audio:979|>", + "<|audio:980|>", + "<|audio:981|>", + "<|audio:982|>", + "<|audio:983|>", + "<|audio:984|>", + "<|audio:985|>", + "<|audio:986|>", + "<|audio:987|>", + "<|audio:988|>", + "<|audio:989|>", + "<|audio:990|>", + "<|audio:991|>", + "<|audio:992|>", + "<|audio:993|>", + "<|audio:994|>", + "<|audio:995|>", + "<|audio:996|>", + "<|audio:997|>", + "<|audio:998|>", + "<|audio:999|>", + "<|audio:1000|>", + "<|audio:1001|>", + "<|audio:1002|>", + "<|audio:1003|>", + "<|audio:1004|>", + "<|audio:1005|>", + "<|audio:1006|>", + "<|audio:1007|>", + "<|audio:1008|>", + "<|audio:1009|>", + "<|audio:1010|>", + "<|audio:1011|>", + "<|audio:1012|>", + "<|audio:1013|>", + "<|audio:1014|>", + "<|audio:1015|>", + "<|audio:1016|>", + "<|audio:1017|>", + "<|audio:1018|>", + "<|audio:1019|>", + "<|audio:1020|>", + "<|audio:1021|>", + "<|audio:1022|>", + "<|audio:1023|>", + "<|startoftranscript|>", + "<|endoftranscript|>", + "<|padding|>" + ], + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/out/checkpoint-20000/trainer_state.json b/out/checkpoint-20000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1dff689e0664d965db0932decff2f298916fdd23 --- /dev/null +++ b/out/checkpoint-20000/trainer_state.json @@ -0,0 +1,140193 @@ +{ + "best_metric": 2.3642282485961914, + "best_model_checkpoint": "./out/checkpoint-20000", + "epoch": 1.6140747316600759, + "eval_steps": 1000, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 8.07037365830038e-05, + "grad_norm": 0.8911969065666199, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.6759, + "step": 1 + }, + { + "epoch": 0.0001614074731660076, + "grad_norm": 0.8724873661994934, + "learning_rate": 4.000000000000001e-06, + "loss": 2.7001, + "step": 2 + }, + { + "epoch": 0.00024211120974901139, + "grad_norm": 0.9050428867340088, + "learning_rate": 6e-06, + "loss": 2.6291, + "step": 3 + }, + { + "epoch": 0.0003228149463320152, + "grad_norm": 0.9249712824821472, + "learning_rate": 8.000000000000001e-06, + "loss": 2.7174, + "step": 4 + }, + { + "epoch": 0.000403518682915019, + "grad_norm": 0.9102846384048462, + "learning_rate": 1e-05, + "loss": 2.6831, + "step": 5 + }, + { + "epoch": 0.00048422241949802277, + "grad_norm": 0.9129141569137573, + "learning_rate": 1.2e-05, + "loss": 2.684, + "step": 6 + }, + { + "epoch": 0.0005649261560810266, + "grad_norm": 0.8648065328598022, + "learning_rate": 1.4000000000000001e-05, + "loss": 2.6488, + "step": 7 + }, + { + "epoch": 0.0006456298926640304, + "grad_norm": 0.8677545785903931, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.7143, + "step": 8 + }, + { + "epoch": 0.0007263336292470342, + "grad_norm": 0.919029712677002, + "learning_rate": 1.8e-05, + "loss": 2.631, + "step": 9 + }, + { + "epoch": 0.000807037365830038, + "grad_norm": 0.9289683103561401, + "learning_rate": 2e-05, + "loss": 2.6564, + "step": 10 + }, + { + "epoch": 0.0008877411024130417, + "grad_norm": 0.8810267448425293, + "learning_rate": 2.2000000000000003e-05, + "loss": 2.6395, + "step": 11 + }, + { + "epoch": 0.0009684448389960455, + "grad_norm": 0.8185754418373108, + "learning_rate": 2.4e-05, + "loss": 2.6871, + "step": 12 + }, + { + "epoch": 0.0010491485755790492, + "grad_norm": 0.9476913213729858, + "learning_rate": 2.6000000000000002e-05, + "loss": 2.7011, + "step": 13 + }, + { + "epoch": 0.0011298523121620531, + "grad_norm": 0.9616057872772217, + "learning_rate": 2.8000000000000003e-05, + "loss": 2.7373, + "step": 14 + }, + { + "epoch": 0.0012105560487450568, + "grad_norm": 0.9429686665534973, + "learning_rate": 3e-05, + "loss": 2.7556, + "step": 15 + }, + { + "epoch": 0.0012912597853280607, + "grad_norm": 1.0331422090530396, + "learning_rate": 3.2000000000000005e-05, + "loss": 2.7756, + "step": 16 + }, + { + "epoch": 0.0013719635219110644, + "grad_norm": 0.906057596206665, + "learning_rate": 3.4000000000000007e-05, + "loss": 2.7053, + "step": 17 + }, + { + "epoch": 0.0014526672584940683, + "grad_norm": 0.8677626252174377, + "learning_rate": 3.6e-05, + "loss": 2.7012, + "step": 18 + }, + { + "epoch": 0.001533370995077072, + "grad_norm": 0.9378079175949097, + "learning_rate": 3.8e-05, + "loss": 2.6786, + "step": 19 + }, + { + "epoch": 0.001614074731660076, + "grad_norm": 1.0333882570266724, + "learning_rate": 4e-05, + "loss": 2.689, + "step": 20 + }, + { + "epoch": 0.0016947784682430796, + "grad_norm": 0.9435378909111023, + "learning_rate": 4.2e-05, + "loss": 2.7084, + "step": 21 + }, + { + "epoch": 0.0017754822048260835, + "grad_norm": 0.9530225396156311, + "learning_rate": 4.4000000000000006e-05, + "loss": 2.7039, + "step": 22 + }, + { + "epoch": 0.0018561859414090872, + "grad_norm": 1.0154749155044556, + "learning_rate": 4.600000000000001e-05, + "loss": 2.6623, + "step": 23 + }, + { + "epoch": 0.001936889677992091, + "grad_norm": 1.0341671705245972, + "learning_rate": 4.8e-05, + "loss": 2.7072, + "step": 24 + }, + { + "epoch": 0.002017593414575095, + "grad_norm": 0.9185739159584045, + "learning_rate": 5e-05, + "loss": 2.6595, + "step": 25 + }, + { + "epoch": 0.0020982971511580985, + "grad_norm": 1.060390591621399, + "learning_rate": 5.2000000000000004e-05, + "loss": 2.7045, + "step": 26 + }, + { + "epoch": 0.0021790008877411024, + "grad_norm": 0.9720118641853333, + "learning_rate": 5.4000000000000005e-05, + "loss": 2.6513, + "step": 27 + }, + { + "epoch": 0.0022597046243241063, + "grad_norm": 0.9426784515380859, + "learning_rate": 5.6000000000000006e-05, + "loss": 2.6541, + "step": 28 + }, + { + "epoch": 0.00234040836090711, + "grad_norm": 0.9736170768737793, + "learning_rate": 5.8e-05, + "loss": 2.7324, + "step": 29 + }, + { + "epoch": 0.0024211120974901136, + "grad_norm": 0.9831354022026062, + "learning_rate": 6e-05, + "loss": 2.6651, + "step": 30 + }, + { + "epoch": 0.0025018158340731175, + "grad_norm": 1.0222605466842651, + "learning_rate": 6.2e-05, + "loss": 2.7375, + "step": 31 + }, + { + "epoch": 0.0025825195706561214, + "grad_norm": 0.9182235598564148, + "learning_rate": 6.400000000000001e-05, + "loss": 2.7142, + "step": 32 + }, + { + "epoch": 0.0026632233072391254, + "grad_norm": 1.0200958251953125, + "learning_rate": 6.6e-05, + "loss": 2.6785, + "step": 33 + }, + { + "epoch": 0.002743927043822129, + "grad_norm": 1.0153381824493408, + "learning_rate": 6.800000000000001e-05, + "loss": 2.6737, + "step": 34 + }, + { + "epoch": 0.0028246307804051327, + "grad_norm": 0.8998087644577026, + "learning_rate": 7e-05, + "loss": 2.7594, + "step": 35 + }, + { + "epoch": 0.0029053345169881366, + "grad_norm": 0.9005621671676636, + "learning_rate": 7.2e-05, + "loss": 2.713, + "step": 36 + }, + { + "epoch": 0.0029860382535711405, + "grad_norm": 1.0165663957595825, + "learning_rate": 7.4e-05, + "loss": 2.7197, + "step": 37 + }, + { + "epoch": 0.003066741990154144, + "grad_norm": 1.0011894702911377, + "learning_rate": 7.6e-05, + "loss": 2.6315, + "step": 38 + }, + { + "epoch": 0.003147445726737148, + "grad_norm": 1.141209602355957, + "learning_rate": 7.800000000000001e-05, + "loss": 2.7249, + "step": 39 + }, + { + "epoch": 0.003228149463320152, + "grad_norm": 0.9114719033241272, + "learning_rate": 8e-05, + "loss": 2.7039, + "step": 40 + }, + { + "epoch": 0.0033088531999031557, + "grad_norm": 1.0193392038345337, + "learning_rate": 8.2e-05, + "loss": 2.6501, + "step": 41 + }, + { + "epoch": 0.003389556936486159, + "grad_norm": 0.9458270072937012, + "learning_rate": 8.4e-05, + "loss": 2.725, + "step": 42 + }, + { + "epoch": 0.003470260673069163, + "grad_norm": 0.9667492508888245, + "learning_rate": 8.6e-05, + "loss": 2.7232, + "step": 43 + }, + { + "epoch": 0.003550964409652167, + "grad_norm": 0.9987972378730774, + "learning_rate": 8.800000000000001e-05, + "loss": 2.6554, + "step": 44 + }, + { + "epoch": 0.003631668146235171, + "grad_norm": 1.0166393518447876, + "learning_rate": 9e-05, + "loss": 2.7291, + "step": 45 + }, + { + "epoch": 0.0037123718828181744, + "grad_norm": 0.9557009935379028, + "learning_rate": 9.200000000000001e-05, + "loss": 2.7194, + "step": 46 + }, + { + "epoch": 0.0037930756194011783, + "grad_norm": 0.9575492143630981, + "learning_rate": 9.4e-05, + "loss": 2.6671, + "step": 47 + }, + { + "epoch": 0.003873779355984182, + "grad_norm": 0.9614555239677429, + "learning_rate": 9.6e-05, + "loss": 2.6865, + "step": 48 + }, + { + "epoch": 0.003954483092567186, + "grad_norm": 0.9245515465736389, + "learning_rate": 9.8e-05, + "loss": 2.7821, + "step": 49 + }, + { + "epoch": 0.00403518682915019, + "grad_norm": 0.9756044745445251, + "learning_rate": 0.0001, + "loss": 2.7608, + "step": 50 + }, + { + "epoch": 0.0041158905657331935, + "grad_norm": 0.95787513256073, + "learning_rate": 0.00010200000000000001, + "loss": 2.6458, + "step": 51 + }, + { + "epoch": 0.004196594302316197, + "grad_norm": 1.0102490186691284, + "learning_rate": 0.00010400000000000001, + "loss": 2.7835, + "step": 52 + }, + { + "epoch": 0.004277298038899201, + "grad_norm": 0.9676176309585571, + "learning_rate": 0.00010600000000000002, + "loss": 2.702, + "step": 53 + }, + { + "epoch": 0.004358001775482205, + "grad_norm": 0.9724096655845642, + "learning_rate": 0.00010800000000000001, + "loss": 2.714, + "step": 54 + }, + { + "epoch": 0.004438705512065208, + "grad_norm": 0.9482994675636292, + "learning_rate": 0.00011000000000000002, + "loss": 2.8069, + "step": 55 + }, + { + "epoch": 0.0045194092486482125, + "grad_norm": 0.9886480569839478, + "learning_rate": 0.00011200000000000001, + "loss": 2.7468, + "step": 56 + }, + { + "epoch": 0.004600112985231216, + "grad_norm": 0.9696247577667236, + "learning_rate": 0.00011399999999999999, + "loss": 2.7486, + "step": 57 + }, + { + "epoch": 0.00468081672181422, + "grad_norm": 1.0638912916183472, + "learning_rate": 0.000116, + "loss": 2.7747, + "step": 58 + }, + { + "epoch": 0.004761520458397224, + "grad_norm": 1.016483187675476, + "learning_rate": 0.000118, + "loss": 2.6925, + "step": 59 + }, + { + "epoch": 0.004842224194980227, + "grad_norm": 1.0298779010772705, + "learning_rate": 0.00012, + "loss": 2.7487, + "step": 60 + }, + { + "epoch": 0.004922927931563232, + "grad_norm": 1.1082268953323364, + "learning_rate": 0.000122, + "loss": 2.7697, + "step": 61 + }, + { + "epoch": 0.005003631668146235, + "grad_norm": 0.9202101826667786, + "learning_rate": 0.000124, + "loss": 2.7429, + "step": 62 + }, + { + "epoch": 0.0050843354047292386, + "grad_norm": 1.0140503644943237, + "learning_rate": 0.000126, + "loss": 2.7492, + "step": 63 + }, + { + "epoch": 0.005165039141312243, + "grad_norm": 1.0689163208007812, + "learning_rate": 0.00012800000000000002, + "loss": 2.7353, + "step": 64 + }, + { + "epoch": 0.005245742877895246, + "grad_norm": 0.9947141408920288, + "learning_rate": 0.00013000000000000002, + "loss": 2.7385, + "step": 65 + }, + { + "epoch": 0.005326446614478251, + "grad_norm": 1.2034410238265991, + "learning_rate": 0.000132, + "loss": 2.7632, + "step": 66 + }, + { + "epoch": 0.005407150351061254, + "grad_norm": 0.9450412392616272, + "learning_rate": 0.000134, + "loss": 2.7547, + "step": 67 + }, + { + "epoch": 0.005487854087644258, + "grad_norm": 1.1818269491195679, + "learning_rate": 0.00013600000000000003, + "loss": 2.7663, + "step": 68 + }, + { + "epoch": 0.005568557824227262, + "grad_norm": 1.003347396850586, + "learning_rate": 0.000138, + "loss": 2.7299, + "step": 69 + }, + { + "epoch": 0.0056492615608102655, + "grad_norm": 1.0105760097503662, + "learning_rate": 0.00014, + "loss": 2.7261, + "step": 70 + }, + { + "epoch": 0.005729965297393269, + "grad_norm": 0.9459090232849121, + "learning_rate": 0.000142, + "loss": 2.7237, + "step": 71 + }, + { + "epoch": 0.005810669033976273, + "grad_norm": 0.9716219305992126, + "learning_rate": 0.000144, + "loss": 2.8175, + "step": 72 + }, + { + "epoch": 0.005891372770559277, + "grad_norm": 0.9968419075012207, + "learning_rate": 0.000146, + "loss": 2.7828, + "step": 73 + }, + { + "epoch": 0.005972076507142281, + "grad_norm": 1.099680781364441, + "learning_rate": 0.000148, + "loss": 2.7111, + "step": 74 + }, + { + "epoch": 0.0060527802437252845, + "grad_norm": 1.004846453666687, + "learning_rate": 0.00015000000000000001, + "loss": 2.7508, + "step": 75 + }, + { + "epoch": 0.006133483980308288, + "grad_norm": 1.0568128824234009, + "learning_rate": 0.000152, + "loss": 2.7341, + "step": 76 + }, + { + "epoch": 0.006214187716891292, + "grad_norm": 0.9871000051498413, + "learning_rate": 0.000154, + "loss": 2.7831, + "step": 77 + }, + { + "epoch": 0.006294891453474296, + "grad_norm": 1.005947232246399, + "learning_rate": 0.00015600000000000002, + "loss": 2.6798, + "step": 78 + }, + { + "epoch": 0.006375595190057299, + "grad_norm": 0.9984713792800903, + "learning_rate": 0.00015800000000000002, + "loss": 2.8126, + "step": 79 + }, + { + "epoch": 0.006456298926640304, + "grad_norm": 0.9805751442909241, + "learning_rate": 0.00016, + "loss": 2.7826, + "step": 80 + }, + { + "epoch": 0.006537002663223307, + "grad_norm": 1.02998685836792, + "learning_rate": 0.000162, + "loss": 2.7636, + "step": 81 + }, + { + "epoch": 0.006617706399806311, + "grad_norm": 1.0790135860443115, + "learning_rate": 0.000164, + "loss": 2.7809, + "step": 82 + }, + { + "epoch": 0.006698410136389315, + "grad_norm": 1.1058307886123657, + "learning_rate": 0.000166, + "loss": 2.787, + "step": 83 + }, + { + "epoch": 0.006779113872972318, + "grad_norm": 1.0199624300003052, + "learning_rate": 0.000168, + "loss": 2.7171, + "step": 84 + }, + { + "epoch": 0.006859817609555323, + "grad_norm": 1.006494402885437, + "learning_rate": 0.00017, + "loss": 2.7791, + "step": 85 + }, + { + "epoch": 0.006940521346138326, + "grad_norm": 0.9672449827194214, + "learning_rate": 0.000172, + "loss": 2.6929, + "step": 86 + }, + { + "epoch": 0.00702122508272133, + "grad_norm": 0.9747781157493591, + "learning_rate": 0.000174, + "loss": 2.7676, + "step": 87 + }, + { + "epoch": 0.007101928819304334, + "grad_norm": 0.9193839430809021, + "learning_rate": 0.00017600000000000002, + "loss": 2.7124, + "step": 88 + }, + { + "epoch": 0.0071826325558873375, + "grad_norm": 1.078499436378479, + "learning_rate": 0.00017800000000000002, + "loss": 2.8018, + "step": 89 + }, + { + "epoch": 0.007263336292470342, + "grad_norm": 1.070957899093628, + "learning_rate": 0.00018, + "loss": 2.7889, + "step": 90 + }, + { + "epoch": 0.007344040029053345, + "grad_norm": 1.160942554473877, + "learning_rate": 0.000182, + "loss": 2.8026, + "step": 91 + }, + { + "epoch": 0.007424743765636349, + "grad_norm": 0.9988501071929932, + "learning_rate": 0.00018400000000000003, + "loss": 2.7746, + "step": 92 + }, + { + "epoch": 0.007505447502219353, + "grad_norm": 1.0882319211959839, + "learning_rate": 0.00018600000000000002, + "loss": 2.8105, + "step": 93 + }, + { + "epoch": 0.0075861512388023565, + "grad_norm": 1.1882357597351074, + "learning_rate": 0.000188, + "loss": 2.8294, + "step": 94 + }, + { + "epoch": 0.00766685497538536, + "grad_norm": 1.0761829614639282, + "learning_rate": 0.00019, + "loss": 2.7846, + "step": 95 + }, + { + "epoch": 0.007747558711968364, + "grad_norm": 1.0665982961654663, + "learning_rate": 0.000192, + "loss": 2.8542, + "step": 96 + }, + { + "epoch": 0.007828262448551369, + "grad_norm": 1.206127405166626, + "learning_rate": 0.000194, + "loss": 2.7711, + "step": 97 + }, + { + "epoch": 0.007908966185134371, + "grad_norm": 1.095150113105774, + "learning_rate": 0.000196, + "loss": 2.732, + "step": 98 + }, + { + "epoch": 0.007989669921717376, + "grad_norm": 1.118348240852356, + "learning_rate": 0.00019800000000000002, + "loss": 2.7736, + "step": 99 + }, + { + "epoch": 0.00807037365830038, + "grad_norm": 1.0646461248397827, + "learning_rate": 0.0002, + "loss": 2.8584, + "step": 100 + }, + { + "epoch": 0.008151077394883383, + "grad_norm": 1.0387661457061768, + "learning_rate": 0.0001999999987538693, + "loss": 2.7961, + "step": 101 + }, + { + "epoch": 0.008231781131466387, + "grad_norm": 1.1905474662780762, + "learning_rate": 0.00019999999501547723, + "loss": 2.8615, + "step": 102 + }, + { + "epoch": 0.008312484868049391, + "grad_norm": 0.9630722999572754, + "learning_rate": 0.0001999999887848239, + "loss": 2.8076, + "step": 103 + }, + { + "epoch": 0.008393188604632394, + "grad_norm": 1.1034537553787231, + "learning_rate": 0.00019999998006190942, + "loss": 2.8402, + "step": 104 + }, + { + "epoch": 0.008473892341215398, + "grad_norm": 1.0679295063018799, + "learning_rate": 0.00019999996884673403, + "loss": 2.7948, + "step": 105 + }, + { + "epoch": 0.008554596077798403, + "grad_norm": 1.0108860731124878, + "learning_rate": 0.00019999995513929802, + "loss": 2.7996, + "step": 106 + }, + { + "epoch": 0.008635299814381405, + "grad_norm": 1.3762084245681763, + "learning_rate": 0.0001999999389396017, + "loss": 2.8023, + "step": 107 + }, + { + "epoch": 0.00871600355096441, + "grad_norm": 1.1320533752441406, + "learning_rate": 0.00019999992024764555, + "loss": 2.793, + "step": 108 + }, + { + "epoch": 0.008796707287547414, + "grad_norm": 1.1752389669418335, + "learning_rate": 0.00019999989906342998, + "loss": 2.8274, + "step": 109 + }, + { + "epoch": 0.008877411024130416, + "grad_norm": 1.2734956741333008, + "learning_rate": 0.00019999987538695552, + "loss": 2.8017, + "step": 110 + }, + { + "epoch": 0.00895811476071342, + "grad_norm": 1.3703055381774902, + "learning_rate": 0.00019999984921822273, + "loss": 2.8699, + "step": 111 + }, + { + "epoch": 0.009038818497296425, + "grad_norm": 1.0079127550125122, + "learning_rate": 0.0001999998205572323, + "loss": 2.8845, + "step": 112 + }, + { + "epoch": 0.00911952223387943, + "grad_norm": 1.28025484085083, + "learning_rate": 0.000199999789403985, + "loss": 2.8636, + "step": 113 + }, + { + "epoch": 0.009200225970462432, + "grad_norm": 1.1057093143463135, + "learning_rate": 0.00019999975575848148, + "loss": 2.8484, + "step": 114 + }, + { + "epoch": 0.009280929707045436, + "grad_norm": 1.0874677896499634, + "learning_rate": 0.00019999971962072265, + "loss": 2.7314, + "step": 115 + }, + { + "epoch": 0.00936163344362844, + "grad_norm": 1.0909658670425415, + "learning_rate": 0.00019999968099070943, + "loss": 2.7827, + "step": 116 + }, + { + "epoch": 0.009442337180211443, + "grad_norm": 1.0881624221801758, + "learning_rate": 0.00019999963986844273, + "loss": 2.827, + "step": 117 + }, + { + "epoch": 0.009523040916794448, + "grad_norm": 1.2498180866241455, + "learning_rate": 0.00019999959625392362, + "loss": 2.8695, + "step": 118 + }, + { + "epoch": 0.009603744653377452, + "grad_norm": 1.1344549655914307, + "learning_rate": 0.00019999955014715317, + "loss": 2.8079, + "step": 119 + }, + { + "epoch": 0.009684448389960455, + "grad_norm": 1.032563328742981, + "learning_rate": 0.00019999950154813253, + "loss": 2.7787, + "step": 120 + }, + { + "epoch": 0.009765152126543459, + "grad_norm": 0.9630110263824463, + "learning_rate": 0.0001999994504568629, + "loss": 2.8103, + "step": 121 + }, + { + "epoch": 0.009845855863126463, + "grad_norm": 1.0418641567230225, + "learning_rate": 0.0001999993968733456, + "loss": 2.8679, + "step": 122 + }, + { + "epoch": 0.009926559599709466, + "grad_norm": 0.9797310829162598, + "learning_rate": 0.00019999934079758188, + "loss": 2.7792, + "step": 123 + }, + { + "epoch": 0.01000726333629247, + "grad_norm": 1.0494028329849243, + "learning_rate": 0.00019999928222957323, + "loss": 2.8007, + "step": 124 + }, + { + "epoch": 0.010087967072875475, + "grad_norm": 1.1570640802383423, + "learning_rate": 0.00019999922116932105, + "loss": 2.8331, + "step": 125 + }, + { + "epoch": 0.010168670809458477, + "grad_norm": 1.2753098011016846, + "learning_rate": 0.00019999915761682684, + "loss": 2.8533, + "step": 126 + }, + { + "epoch": 0.010249374546041481, + "grad_norm": 0.9804013967514038, + "learning_rate": 0.00019999909157209227, + "loss": 2.841, + "step": 127 + }, + { + "epoch": 0.010330078282624486, + "grad_norm": 1.320839285850525, + "learning_rate": 0.00019999902303511892, + "loss": 2.8738, + "step": 128 + }, + { + "epoch": 0.01041078201920749, + "grad_norm": 1.1105059385299683, + "learning_rate": 0.0001999989520059085, + "loss": 2.8458, + "step": 129 + }, + { + "epoch": 0.010491485755790493, + "grad_norm": 1.2869762182235718, + "learning_rate": 0.0001999988784844628, + "loss": 2.7951, + "step": 130 + }, + { + "epoch": 0.010572189492373497, + "grad_norm": 1.1609153747558594, + "learning_rate": 0.00019999880247078368, + "loss": 2.8147, + "step": 131 + }, + { + "epoch": 0.010652893228956501, + "grad_norm": 1.066728115081787, + "learning_rate": 0.00019999872396487297, + "loss": 2.863, + "step": 132 + }, + { + "epoch": 0.010733596965539504, + "grad_norm": 1.2868720293045044, + "learning_rate": 0.0001999986429667327, + "loss": 2.7765, + "step": 133 + }, + { + "epoch": 0.010814300702122508, + "grad_norm": 1.0064955949783325, + "learning_rate": 0.00019999855947636485, + "loss": 2.7834, + "step": 134 + }, + { + "epoch": 0.010895004438705513, + "grad_norm": 1.146589756011963, + "learning_rate": 0.00019999847349377143, + "loss": 2.7966, + "step": 135 + }, + { + "epoch": 0.010975708175288515, + "grad_norm": 0.9831073880195618, + "learning_rate": 0.0001999983850189547, + "loss": 2.8877, + "step": 136 + }, + { + "epoch": 0.01105641191187152, + "grad_norm": 1.1690322160720825, + "learning_rate": 0.0001999982940519168, + "loss": 2.8514, + "step": 137 + }, + { + "epoch": 0.011137115648454524, + "grad_norm": 1.0014944076538086, + "learning_rate": 0.00019999820059266003, + "loss": 2.7846, + "step": 138 + }, + { + "epoch": 0.011217819385037527, + "grad_norm": 0.9581566452980042, + "learning_rate": 0.0001999981046411867, + "loss": 2.7907, + "step": 139 + }, + { + "epoch": 0.011298523121620531, + "grad_norm": 1.1300675868988037, + "learning_rate": 0.00019999800619749922, + "loss": 2.8099, + "step": 140 + }, + { + "epoch": 0.011379226858203535, + "grad_norm": 0.9845526814460754, + "learning_rate": 0.0001999979052616, + "loss": 2.8607, + "step": 141 + }, + { + "epoch": 0.011459930594786538, + "grad_norm": 1.0781387090682983, + "learning_rate": 0.0001999978018334916, + "loss": 2.831, + "step": 142 + }, + { + "epoch": 0.011540634331369542, + "grad_norm": 1.1142648458480835, + "learning_rate": 0.00019999769591317658, + "loss": 2.9194, + "step": 143 + }, + { + "epoch": 0.011621338067952547, + "grad_norm": 0.9972650408744812, + "learning_rate": 0.00019999758750065757, + "loss": 2.8253, + "step": 144 + }, + { + "epoch": 0.01170204180453555, + "grad_norm": 1.040738582611084, + "learning_rate": 0.0001999974765959373, + "loss": 2.7378, + "step": 145 + }, + { + "epoch": 0.011782745541118553, + "grad_norm": 0.9824327826499939, + "learning_rate": 0.00019999736319901848, + "loss": 2.8263, + "step": 146 + }, + { + "epoch": 0.011863449277701558, + "grad_norm": 1.0531679391860962, + "learning_rate": 0.00019999724730990402, + "loss": 2.7975, + "step": 147 + }, + { + "epoch": 0.011944153014284562, + "grad_norm": 1.0699561834335327, + "learning_rate": 0.0001999971289285967, + "loss": 2.8199, + "step": 148 + }, + { + "epoch": 0.012024856750867565, + "grad_norm": 1.0203633308410645, + "learning_rate": 0.0001999970080550996, + "loss": 2.8479, + "step": 149 + }, + { + "epoch": 0.012105560487450569, + "grad_norm": 1.035589575767517, + "learning_rate": 0.00019999688468941564, + "loss": 2.8263, + "step": 150 + }, + { + "epoch": 0.012186264224033573, + "grad_norm": 0.9706670641899109, + "learning_rate": 0.00019999675883154792, + "loss": 2.8324, + "step": 151 + }, + { + "epoch": 0.012266967960616576, + "grad_norm": 1.1565446853637695, + "learning_rate": 0.00019999663048149958, + "loss": 2.8098, + "step": 152 + }, + { + "epoch": 0.01234767169719958, + "grad_norm": 1.025796890258789, + "learning_rate": 0.0001999964996392738, + "loss": 2.7906, + "step": 153 + }, + { + "epoch": 0.012428375433782585, + "grad_norm": 1.117438554763794, + "learning_rate": 0.00019999636630487386, + "loss": 2.8276, + "step": 154 + }, + { + "epoch": 0.012509079170365587, + "grad_norm": 1.025159478187561, + "learning_rate": 0.00019999623047830308, + "loss": 2.8089, + "step": 155 + }, + { + "epoch": 0.012589782906948592, + "grad_norm": 1.007582664489746, + "learning_rate": 0.00019999609215956487, + "loss": 2.8147, + "step": 156 + }, + { + "epoch": 0.012670486643531596, + "grad_norm": 1.0504885911941528, + "learning_rate": 0.0001999959513486626, + "loss": 2.8329, + "step": 157 + }, + { + "epoch": 0.012751190380114599, + "grad_norm": 0.918382465839386, + "learning_rate": 0.00019999580804559987, + "loss": 2.878, + "step": 158 + }, + { + "epoch": 0.012831894116697603, + "grad_norm": 0.9397236704826355, + "learning_rate": 0.0001999956622503802, + "loss": 2.8254, + "step": 159 + }, + { + "epoch": 0.012912597853280607, + "grad_norm": 0.9985697269439697, + "learning_rate": 0.00019999551396300723, + "loss": 2.8417, + "step": 160 + }, + { + "epoch": 0.01299330158986361, + "grad_norm": 0.9866878390312195, + "learning_rate": 0.00019999536318348465, + "loss": 2.7524, + "step": 161 + }, + { + "epoch": 0.013074005326446614, + "grad_norm": 1.0707440376281738, + "learning_rate": 0.00019999520991181627, + "loss": 2.8171, + "step": 162 + }, + { + "epoch": 0.013154709063029619, + "grad_norm": 0.9359755516052246, + "learning_rate": 0.00019999505414800583, + "loss": 2.8463, + "step": 163 + }, + { + "epoch": 0.013235412799612623, + "grad_norm": 1.056647777557373, + "learning_rate": 0.00019999489589205726, + "loss": 2.8602, + "step": 164 + }, + { + "epoch": 0.013316116536195625, + "grad_norm": 0.975370466709137, + "learning_rate": 0.0001999947351439745, + "loss": 2.8292, + "step": 165 + }, + { + "epoch": 0.01339682027277863, + "grad_norm": 0.9241237044334412, + "learning_rate": 0.00019999457190376157, + "loss": 2.7827, + "step": 166 + }, + { + "epoch": 0.013477524009361634, + "grad_norm": 0.9478302001953125, + "learning_rate": 0.00019999440617142247, + "loss": 2.7708, + "step": 167 + }, + { + "epoch": 0.013558227745944637, + "grad_norm": 0.9804863333702087, + "learning_rate": 0.00019999423794696142, + "loss": 2.7696, + "step": 168 + }, + { + "epoch": 0.013638931482527641, + "grad_norm": 0.9764013886451721, + "learning_rate": 0.00019999406723038255, + "loss": 2.8521, + "step": 169 + }, + { + "epoch": 0.013719635219110645, + "grad_norm": 1.026532769203186, + "learning_rate": 0.00019999389402169016, + "loss": 2.8507, + "step": 170 + }, + { + "epoch": 0.013800338955693648, + "grad_norm": 0.9983204007148743, + "learning_rate": 0.00019999371832088854, + "loss": 2.8761, + "step": 171 + }, + { + "epoch": 0.013881042692276652, + "grad_norm": 0.9914593696594238, + "learning_rate": 0.00019999354012798206, + "loss": 2.8723, + "step": 172 + }, + { + "epoch": 0.013961746428859657, + "grad_norm": 1.066962718963623, + "learning_rate": 0.00019999335944297517, + "loss": 2.8635, + "step": 173 + }, + { + "epoch": 0.01404245016544266, + "grad_norm": 1.0848973989486694, + "learning_rate": 0.0001999931762658724, + "loss": 2.8645, + "step": 174 + }, + { + "epoch": 0.014123153902025664, + "grad_norm": 1.0245702266693115, + "learning_rate": 0.0001999929905966783, + "loss": 2.8463, + "step": 175 + }, + { + "epoch": 0.014203857638608668, + "grad_norm": 1.2363669872283936, + "learning_rate": 0.00019999280243539747, + "loss": 2.8345, + "step": 176 + }, + { + "epoch": 0.01428456137519167, + "grad_norm": 1.0224756002426147, + "learning_rate": 0.0001999926117820346, + "loss": 2.8309, + "step": 177 + }, + { + "epoch": 0.014365265111774675, + "grad_norm": 1.0882402658462524, + "learning_rate": 0.0001999924186365945, + "loss": 2.8619, + "step": 178 + }, + { + "epoch": 0.01444596884835768, + "grad_norm": 1.0384254455566406, + "learning_rate": 0.00019999222299908192, + "loss": 2.8477, + "step": 179 + }, + { + "epoch": 0.014526672584940684, + "grad_norm": 0.9662587642669678, + "learning_rate": 0.00019999202486950177, + "loss": 2.8087, + "step": 180 + }, + { + "epoch": 0.014607376321523686, + "grad_norm": 0.9086892604827881, + "learning_rate": 0.000199991824247859, + "loss": 2.7688, + "step": 181 + }, + { + "epoch": 0.01468808005810669, + "grad_norm": 1.004185676574707, + "learning_rate": 0.00019999162113415854, + "loss": 2.8237, + "step": 182 + }, + { + "epoch": 0.014768783794689695, + "grad_norm": 0.997965395450592, + "learning_rate": 0.00019999141552840552, + "loss": 2.8228, + "step": 183 + }, + { + "epoch": 0.014849487531272697, + "grad_norm": 0.9844975471496582, + "learning_rate": 0.00019999120743060503, + "loss": 2.8582, + "step": 184 + }, + { + "epoch": 0.014930191267855702, + "grad_norm": 1.0531272888183594, + "learning_rate": 0.00019999099684076232, + "loss": 2.8571, + "step": 185 + }, + { + "epoch": 0.015010895004438706, + "grad_norm": 1.1178920269012451, + "learning_rate": 0.00019999078375888257, + "loss": 2.85, + "step": 186 + }, + { + "epoch": 0.015091598741021709, + "grad_norm": 1.0773903131484985, + "learning_rate": 0.0001999905681849711, + "loss": 2.826, + "step": 187 + }, + { + "epoch": 0.015172302477604713, + "grad_norm": 1.1573486328125, + "learning_rate": 0.00019999035011903325, + "loss": 2.8866, + "step": 188 + }, + { + "epoch": 0.015253006214187717, + "grad_norm": 1.0401980876922607, + "learning_rate": 0.00019999012956107456, + "loss": 2.788, + "step": 189 + }, + { + "epoch": 0.01533370995077072, + "grad_norm": 1.0150686502456665, + "learning_rate": 0.00019998990651110045, + "loss": 2.8542, + "step": 190 + }, + { + "epoch": 0.015414413687353724, + "grad_norm": 1.1902797222137451, + "learning_rate": 0.0001999896809691165, + "loss": 2.9209, + "step": 191 + }, + { + "epoch": 0.015495117423936729, + "grad_norm": 1.0177555084228516, + "learning_rate": 0.0001999894529351283, + "loss": 2.7852, + "step": 192 + }, + { + "epoch": 0.015575821160519731, + "grad_norm": 1.062322974205017, + "learning_rate": 0.00019998922240914159, + "loss": 2.8328, + "step": 193 + }, + { + "epoch": 0.015656524897102737, + "grad_norm": 1.0937334299087524, + "learning_rate": 0.00019998898939116205, + "loss": 2.8069, + "step": 194 + }, + { + "epoch": 0.015737228633685738, + "grad_norm": 0.9553198218345642, + "learning_rate": 0.00019998875388119554, + "loss": 2.8402, + "step": 195 + }, + { + "epoch": 0.015817932370268743, + "grad_norm": 1.1802356243133545, + "learning_rate": 0.0001999885158792479, + "loss": 2.945, + "step": 196 + }, + { + "epoch": 0.015898636106851747, + "grad_norm": 1.160346269607544, + "learning_rate": 0.0001999882753853251, + "loss": 2.8341, + "step": 197 + }, + { + "epoch": 0.01597933984343475, + "grad_norm": 1.0379278659820557, + "learning_rate": 0.00019998803239943305, + "loss": 2.898, + "step": 198 + }, + { + "epoch": 0.016060043580017756, + "grad_norm": 1.2022395133972168, + "learning_rate": 0.00019998778692157792, + "loss": 2.8302, + "step": 199 + }, + { + "epoch": 0.01614074731660076, + "grad_norm": 1.057017207145691, + "learning_rate": 0.00019998753895176575, + "loss": 2.8474, + "step": 200 + }, + { + "epoch": 0.01622145105318376, + "grad_norm": 0.9299072027206421, + "learning_rate": 0.00019998728849000271, + "loss": 2.8266, + "step": 201 + }, + { + "epoch": 0.016302154789766765, + "grad_norm": 1.0296592712402344, + "learning_rate": 0.00019998703553629512, + "loss": 2.8106, + "step": 202 + }, + { + "epoch": 0.01638285852634977, + "grad_norm": 0.9641671180725098, + "learning_rate": 0.0001999867800906492, + "loss": 2.8089, + "step": 203 + }, + { + "epoch": 0.016463562262932774, + "grad_norm": 0.9951125383377075, + "learning_rate": 0.00019998652215307136, + "loss": 2.813, + "step": 204 + }, + { + "epoch": 0.016544265999515778, + "grad_norm": 1.0089969635009766, + "learning_rate": 0.00019998626172356804, + "loss": 2.8021, + "step": 205 + }, + { + "epoch": 0.016624969736098782, + "grad_norm": 0.9916231632232666, + "learning_rate": 0.00019998599880214566, + "loss": 2.8455, + "step": 206 + }, + { + "epoch": 0.016705673472681787, + "grad_norm": 0.9612492322921753, + "learning_rate": 0.00019998573338881088, + "loss": 2.8653, + "step": 207 + }, + { + "epoch": 0.016786377209264788, + "grad_norm": 0.984578013420105, + "learning_rate": 0.00019998546548357022, + "loss": 2.8359, + "step": 208 + }, + { + "epoch": 0.016867080945847792, + "grad_norm": 0.9457565546035767, + "learning_rate": 0.0001999851950864304, + "loss": 2.8507, + "step": 209 + }, + { + "epoch": 0.016947784682430796, + "grad_norm": 1.0219026803970337, + "learning_rate": 0.00019998492219739817, + "loss": 2.8326, + "step": 210 + }, + { + "epoch": 0.0170284884190138, + "grad_norm": 0.971570611000061, + "learning_rate": 0.00019998464681648032, + "loss": 2.8079, + "step": 211 + }, + { + "epoch": 0.017109192155596805, + "grad_norm": 0.9731320738792419, + "learning_rate": 0.00019998436894368368, + "loss": 2.8536, + "step": 212 + }, + { + "epoch": 0.01718989589217981, + "grad_norm": 1.0519105195999146, + "learning_rate": 0.00019998408857901525, + "loss": 2.8589, + "step": 213 + }, + { + "epoch": 0.01727059962876281, + "grad_norm": 0.9725883603096008, + "learning_rate": 0.00019998380572248194, + "loss": 2.7937, + "step": 214 + }, + { + "epoch": 0.017351303365345815, + "grad_norm": 1.0397064685821533, + "learning_rate": 0.00019998352037409084, + "loss": 2.9145, + "step": 215 + }, + { + "epoch": 0.01743200710192882, + "grad_norm": 0.9094852209091187, + "learning_rate": 0.00019998323253384904, + "loss": 2.7692, + "step": 216 + }, + { + "epoch": 0.017512710838511823, + "grad_norm": 0.941646158695221, + "learning_rate": 0.00019998294220176374, + "loss": 2.7975, + "step": 217 + }, + { + "epoch": 0.017593414575094828, + "grad_norm": 0.9939892888069153, + "learning_rate": 0.00019998264937784216, + "loss": 2.8421, + "step": 218 + }, + { + "epoch": 0.017674118311677832, + "grad_norm": 0.8985795378684998, + "learning_rate": 0.0001999823540620916, + "loss": 2.8146, + "step": 219 + }, + { + "epoch": 0.017754822048260833, + "grad_norm": 1.0436078310012817, + "learning_rate": 0.00019998205625451943, + "loss": 2.8416, + "step": 220 + }, + { + "epoch": 0.017835525784843837, + "grad_norm": 0.9941675066947937, + "learning_rate": 0.00019998175595513305, + "loss": 2.8723, + "step": 221 + }, + { + "epoch": 0.01791622952142684, + "grad_norm": 0.9203903675079346, + "learning_rate": 0.00019998145316393995, + "loss": 2.7791, + "step": 222 + }, + { + "epoch": 0.017996933258009846, + "grad_norm": 0.9325969815254211, + "learning_rate": 0.00019998114788094768, + "loss": 2.8664, + "step": 223 + }, + { + "epoch": 0.01807763699459285, + "grad_norm": 0.9483599662780762, + "learning_rate": 0.00019998084010616388, + "loss": 2.7782, + "step": 224 + }, + { + "epoch": 0.018158340731175854, + "grad_norm": 0.9555078744888306, + "learning_rate": 0.00019998052983959615, + "loss": 2.7771, + "step": 225 + }, + { + "epoch": 0.01823904446775886, + "grad_norm": 0.9452421069145203, + "learning_rate": 0.00019998021708125233, + "loss": 2.8878, + "step": 226 + }, + { + "epoch": 0.01831974820434186, + "grad_norm": 0.9784894585609436, + "learning_rate": 0.00019997990183114007, + "loss": 2.8382, + "step": 227 + }, + { + "epoch": 0.018400451940924864, + "grad_norm": 1.0844931602478027, + "learning_rate": 0.00019997958408926735, + "loss": 2.8015, + "step": 228 + }, + { + "epoch": 0.01848115567750787, + "grad_norm": 1.0416710376739502, + "learning_rate": 0.00019997926385564207, + "loss": 2.8364, + "step": 229 + }, + { + "epoch": 0.018561859414090873, + "grad_norm": 0.9213813543319702, + "learning_rate": 0.00019997894113027215, + "loss": 2.8489, + "step": 230 + }, + { + "epoch": 0.018642563150673877, + "grad_norm": 1.0186388492584229, + "learning_rate": 0.00019997861591316567, + "loss": 2.914, + "step": 231 + }, + { + "epoch": 0.01872326688725688, + "grad_norm": 1.0032236576080322, + "learning_rate": 0.00019997828820433072, + "loss": 2.8733, + "step": 232 + }, + { + "epoch": 0.018803970623839882, + "grad_norm": 0.9783569574356079, + "learning_rate": 0.0001999779580037755, + "loss": 2.851, + "step": 233 + }, + { + "epoch": 0.018884674360422887, + "grad_norm": 0.8471441268920898, + "learning_rate": 0.00019997762531150825, + "loss": 2.7923, + "step": 234 + }, + { + "epoch": 0.01896537809700589, + "grad_norm": 0.8912937641143799, + "learning_rate": 0.00019997729012753717, + "loss": 2.8725, + "step": 235 + }, + { + "epoch": 0.019046081833588895, + "grad_norm": 1.2453325986862183, + "learning_rate": 0.00019997695245187075, + "loss": 2.9292, + "step": 236 + }, + { + "epoch": 0.0191267855701719, + "grad_norm": 0.8870908617973328, + "learning_rate": 0.0001999766122845173, + "loss": 2.8008, + "step": 237 + }, + { + "epoch": 0.019207489306754904, + "grad_norm": 1.0679768323898315, + "learning_rate": 0.0001999762696254853, + "loss": 2.8919, + "step": 238 + }, + { + "epoch": 0.01928819304333791, + "grad_norm": 0.9769917130470276, + "learning_rate": 0.00019997592447478337, + "loss": 2.7937, + "step": 239 + }, + { + "epoch": 0.01936889677992091, + "grad_norm": 1.066183090209961, + "learning_rate": 0.00019997557683242004, + "loss": 2.8375, + "step": 240 + }, + { + "epoch": 0.019449600516503913, + "grad_norm": 0.9834103584289551, + "learning_rate": 0.000199975226698404, + "loss": 2.8577, + "step": 241 + }, + { + "epoch": 0.019530304253086918, + "grad_norm": 1.102211833000183, + "learning_rate": 0.00019997487407274396, + "loss": 2.8466, + "step": 242 + }, + { + "epoch": 0.019611007989669922, + "grad_norm": 0.9936226606369019, + "learning_rate": 0.00019997451895544872, + "loss": 2.7729, + "step": 243 + }, + { + "epoch": 0.019691711726252926, + "grad_norm": 1.0995992422103882, + "learning_rate": 0.00019997416134652713, + "loss": 2.8425, + "step": 244 + }, + { + "epoch": 0.01977241546283593, + "grad_norm": 0.94181889295578, + "learning_rate": 0.00019997380124598814, + "loss": 2.8495, + "step": 245 + }, + { + "epoch": 0.01985311919941893, + "grad_norm": 0.9791487455368042, + "learning_rate": 0.00019997343865384067, + "loss": 2.8919, + "step": 246 + }, + { + "epoch": 0.019933822936001936, + "grad_norm": 0.9173399209976196, + "learning_rate": 0.00019997307357009375, + "loss": 2.8593, + "step": 247 + }, + { + "epoch": 0.02001452667258494, + "grad_norm": 0.9675281047821045, + "learning_rate": 0.00019997270599475653, + "loss": 2.8226, + "step": 248 + }, + { + "epoch": 0.020095230409167945, + "grad_norm": 0.8928244113922119, + "learning_rate": 0.00019997233592783812, + "loss": 2.8296, + "step": 249 + }, + { + "epoch": 0.02017593414575095, + "grad_norm": 0.928601861000061, + "learning_rate": 0.0001999719633693478, + "loss": 2.8399, + "step": 250 + }, + { + "epoch": 0.020256637882333953, + "grad_norm": 0.9378123879432678, + "learning_rate": 0.00019997158831929482, + "loss": 2.8711, + "step": 251 + }, + { + "epoch": 0.020337341618916954, + "grad_norm": 0.9041047692298889, + "learning_rate": 0.00019997121077768853, + "loss": 2.8338, + "step": 252 + }, + { + "epoch": 0.02041804535549996, + "grad_norm": 0.9673274755477905, + "learning_rate": 0.00019997083074453832, + "loss": 2.8556, + "step": 253 + }, + { + "epoch": 0.020498749092082963, + "grad_norm": 0.9204083681106567, + "learning_rate": 0.0001999704482198537, + "loss": 2.7954, + "step": 254 + }, + { + "epoch": 0.020579452828665967, + "grad_norm": 0.9267606735229492, + "learning_rate": 0.00019997006320364417, + "loss": 2.8656, + "step": 255 + }, + { + "epoch": 0.02066015656524897, + "grad_norm": 0.9562919735908508, + "learning_rate": 0.00019996967569591936, + "loss": 2.8406, + "step": 256 + }, + { + "epoch": 0.020740860301831976, + "grad_norm": 0.9065950512886047, + "learning_rate": 0.0001999692856966889, + "loss": 2.7856, + "step": 257 + }, + { + "epoch": 0.02082156403841498, + "grad_norm": 0.9136463403701782, + "learning_rate": 0.0001999688932059625, + "loss": 2.8083, + "step": 258 + }, + { + "epoch": 0.02090226777499798, + "grad_norm": 0.9785570502281189, + "learning_rate": 0.00019996849822374998, + "loss": 2.7984, + "step": 259 + }, + { + "epoch": 0.020982971511580985, + "grad_norm": 0.9549168348312378, + "learning_rate": 0.00019996810075006117, + "loss": 2.8048, + "step": 260 + }, + { + "epoch": 0.02106367524816399, + "grad_norm": 0.8923975825309753, + "learning_rate": 0.00019996770078490594, + "loss": 2.8559, + "step": 261 + }, + { + "epoch": 0.021144378984746994, + "grad_norm": 0.9516206383705139, + "learning_rate": 0.0001999672983282943, + "loss": 2.9171, + "step": 262 + }, + { + "epoch": 0.02122508272133, + "grad_norm": 0.9101666808128357, + "learning_rate": 0.0001999668933802363, + "loss": 2.8746, + "step": 263 + }, + { + "epoch": 0.021305786457913003, + "grad_norm": 0.9081267714500427, + "learning_rate": 0.00019996648594074195, + "loss": 2.8637, + "step": 264 + }, + { + "epoch": 0.021386490194496004, + "grad_norm": 1.0048178434371948, + "learning_rate": 0.0001999660760098215, + "loss": 2.8783, + "step": 265 + }, + { + "epoch": 0.021467193931079008, + "grad_norm": 0.9625924229621887, + "learning_rate": 0.0001999656635874851, + "loss": 2.8226, + "step": 266 + }, + { + "epoch": 0.021547897667662012, + "grad_norm": 0.9911805391311646, + "learning_rate": 0.00019996524867374306, + "loss": 2.8135, + "step": 267 + }, + { + "epoch": 0.021628601404245017, + "grad_norm": 0.8920134902000427, + "learning_rate": 0.00019996483126860572, + "loss": 2.7934, + "step": 268 + }, + { + "epoch": 0.02170930514082802, + "grad_norm": 1.0806514024734497, + "learning_rate": 0.00019996441137208346, + "loss": 2.8435, + "step": 269 + }, + { + "epoch": 0.021790008877411025, + "grad_norm": 0.9426547884941101, + "learning_rate": 0.00019996398898418675, + "loss": 2.7919, + "step": 270 + }, + { + "epoch": 0.021870712613994026, + "grad_norm": 0.9893020987510681, + "learning_rate": 0.00019996356410492615, + "loss": 2.8616, + "step": 271 + }, + { + "epoch": 0.02195141635057703, + "grad_norm": 1.0196046829223633, + "learning_rate": 0.00019996313673431218, + "loss": 2.8101, + "step": 272 + }, + { + "epoch": 0.022032120087160035, + "grad_norm": 0.9556699991226196, + "learning_rate": 0.00019996270687235558, + "loss": 2.8669, + "step": 273 + }, + { + "epoch": 0.02211282382374304, + "grad_norm": 0.8985902667045593, + "learning_rate": 0.00019996227451906702, + "loss": 2.8078, + "step": 274 + }, + { + "epoch": 0.022193527560326044, + "grad_norm": 1.0198246240615845, + "learning_rate": 0.00019996183967445726, + "loss": 2.8314, + "step": 275 + }, + { + "epoch": 0.022274231296909048, + "grad_norm": 0.9360179901123047, + "learning_rate": 0.00019996140233853715, + "loss": 2.7969, + "step": 276 + }, + { + "epoch": 0.022354935033492052, + "grad_norm": 1.0250160694122314, + "learning_rate": 0.00019996096251131759, + "loss": 2.7897, + "step": 277 + }, + { + "epoch": 0.022435638770075053, + "grad_norm": 0.934582531452179, + "learning_rate": 0.00019996052019280954, + "loss": 2.8667, + "step": 278 + }, + { + "epoch": 0.022516342506658057, + "grad_norm": 0.9394461512565613, + "learning_rate": 0.00019996007538302407, + "loss": 2.7681, + "step": 279 + }, + { + "epoch": 0.022597046243241062, + "grad_norm": 0.9468861222267151, + "learning_rate": 0.00019995962808197216, + "loss": 2.7709, + "step": 280 + }, + { + "epoch": 0.022677749979824066, + "grad_norm": 0.9798515439033508, + "learning_rate": 0.00019995917828966506, + "loss": 2.8274, + "step": 281 + }, + { + "epoch": 0.02275845371640707, + "grad_norm": 1.0403941869735718, + "learning_rate": 0.00019995872600611395, + "loss": 2.8897, + "step": 282 + }, + { + "epoch": 0.022839157452990075, + "grad_norm": 0.9795030951499939, + "learning_rate": 0.00019995827123133006, + "loss": 2.8792, + "step": 283 + }, + { + "epoch": 0.022919861189573076, + "grad_norm": 0.9162538647651672, + "learning_rate": 0.00019995781396532479, + "loss": 2.8339, + "step": 284 + }, + { + "epoch": 0.02300056492615608, + "grad_norm": 1.0864707231521606, + "learning_rate": 0.00019995735420810947, + "loss": 2.8599, + "step": 285 + }, + { + "epoch": 0.023081268662739084, + "grad_norm": 0.9181776642799377, + "learning_rate": 0.0001999568919596956, + "loss": 2.8736, + "step": 286 + }, + { + "epoch": 0.02316197239932209, + "grad_norm": 0.8880531191825867, + "learning_rate": 0.00019995642722009472, + "loss": 2.8215, + "step": 287 + }, + { + "epoch": 0.023242676135905093, + "grad_norm": 0.9287240505218506, + "learning_rate": 0.00019995595998931835, + "loss": 2.844, + "step": 288 + }, + { + "epoch": 0.023323379872488097, + "grad_norm": 0.886894941329956, + "learning_rate": 0.0001999554902673782, + "loss": 2.8319, + "step": 289 + }, + { + "epoch": 0.0234040836090711, + "grad_norm": 0.9564458131790161, + "learning_rate": 0.0001999550180542859, + "loss": 2.8126, + "step": 290 + }, + { + "epoch": 0.023484787345654103, + "grad_norm": 0.8745970726013184, + "learning_rate": 0.00019995454335005334, + "loss": 2.8344, + "step": 291 + }, + { + "epoch": 0.023565491082237107, + "grad_norm": 1.0343137979507446, + "learning_rate": 0.00019995406615469217, + "loss": 2.8498, + "step": 292 + }, + { + "epoch": 0.02364619481882011, + "grad_norm": 0.9951575994491577, + "learning_rate": 0.0001999535864682145, + "loss": 2.8655, + "step": 293 + }, + { + "epoch": 0.023726898555403116, + "grad_norm": 0.8457592725753784, + "learning_rate": 0.0001999531042906321, + "loss": 2.8189, + "step": 294 + }, + { + "epoch": 0.02380760229198612, + "grad_norm": 0.9126954674720764, + "learning_rate": 0.00019995261962195708, + "loss": 2.8272, + "step": 295 + }, + { + "epoch": 0.023888306028569124, + "grad_norm": 1.0171937942504883, + "learning_rate": 0.0001999521324622015, + "loss": 2.869, + "step": 296 + }, + { + "epoch": 0.023969009765152125, + "grad_norm": 0.9887226223945618, + "learning_rate": 0.00019995164281137753, + "loss": 2.7643, + "step": 297 + }, + { + "epoch": 0.02404971350173513, + "grad_norm": 1.4240798950195312, + "learning_rate": 0.00019995115066949733, + "loss": 2.8332, + "step": 298 + }, + { + "epoch": 0.024130417238318134, + "grad_norm": 0.9856921434402466, + "learning_rate": 0.00019995065603657316, + "loss": 2.8283, + "step": 299 + }, + { + "epoch": 0.024211120974901138, + "grad_norm": 0.997164785861969, + "learning_rate": 0.0001999501589126174, + "loss": 2.9164, + "step": 300 + }, + { + "epoch": 0.024291824711484142, + "grad_norm": 1.6480412483215332, + "learning_rate": 0.00019994965929764238, + "loss": 2.8941, + "step": 301 + }, + { + "epoch": 0.024372528448067147, + "grad_norm": 1.1590758562088013, + "learning_rate": 0.0001999491571916606, + "loss": 2.8127, + "step": 302 + }, + { + "epoch": 0.024453232184650148, + "grad_norm": 1.1228376626968384, + "learning_rate": 0.00019994865259468454, + "loss": 2.8439, + "step": 303 + }, + { + "epoch": 0.024533935921233152, + "grad_norm": 1.0426349639892578, + "learning_rate": 0.0001999481455067268, + "loss": 2.8671, + "step": 304 + }, + { + "epoch": 0.024614639657816156, + "grad_norm": 1.0911917686462402, + "learning_rate": 0.00019994763592779996, + "loss": 2.8297, + "step": 305 + }, + { + "epoch": 0.02469534339439916, + "grad_norm": 1.0493195056915283, + "learning_rate": 0.00019994712385791683, + "loss": 2.7996, + "step": 306 + }, + { + "epoch": 0.024776047130982165, + "grad_norm": 0.9275023341178894, + "learning_rate": 0.00019994660929709008, + "loss": 2.7949, + "step": 307 + }, + { + "epoch": 0.02485675086756517, + "grad_norm": 1.1074799299240112, + "learning_rate": 0.00019994609224533255, + "loss": 2.8364, + "step": 308 + }, + { + "epoch": 0.024937454604148174, + "grad_norm": 0.9189429879188538, + "learning_rate": 0.00019994557270265717, + "loss": 2.8293, + "step": 309 + }, + { + "epoch": 0.025018158340731175, + "grad_norm": 0.9577780961990356, + "learning_rate": 0.00019994505066907683, + "loss": 2.8295, + "step": 310 + }, + { + "epoch": 0.02509886207731418, + "grad_norm": 1.0707277059555054, + "learning_rate": 0.0001999445261446046, + "loss": 2.795, + "step": 311 + }, + { + "epoch": 0.025179565813897183, + "grad_norm": 0.9211257696151733, + "learning_rate": 0.0001999439991292535, + "loss": 2.8355, + "step": 312 + }, + { + "epoch": 0.025260269550480188, + "grad_norm": 0.987779438495636, + "learning_rate": 0.00019994346962303667, + "loss": 2.8175, + "step": 313 + }, + { + "epoch": 0.025340973287063192, + "grad_norm": 0.9317128658294678, + "learning_rate": 0.00019994293762596734, + "loss": 2.8205, + "step": 314 + }, + { + "epoch": 0.025421677023646196, + "grad_norm": 0.8989154100418091, + "learning_rate": 0.00019994240313805873, + "loss": 2.8257, + "step": 315 + }, + { + "epoch": 0.025502380760229197, + "grad_norm": 0.8391042351722717, + "learning_rate": 0.00019994186615932423, + "loss": 2.8105, + "step": 316 + }, + { + "epoch": 0.0255830844968122, + "grad_norm": 0.8908089995384216, + "learning_rate": 0.00019994132668977715, + "loss": 2.7894, + "step": 317 + }, + { + "epoch": 0.025663788233395206, + "grad_norm": 0.8666881322860718, + "learning_rate": 0.00019994078472943097, + "loss": 2.7934, + "step": 318 + }, + { + "epoch": 0.02574449196997821, + "grad_norm": 0.8834616541862488, + "learning_rate": 0.00019994024027829914, + "loss": 2.8166, + "step": 319 + }, + { + "epoch": 0.025825195706561214, + "grad_norm": 0.9831370115280151, + "learning_rate": 0.00019993969333639532, + "loss": 2.889, + "step": 320 + }, + { + "epoch": 0.02590589944314422, + "grad_norm": 0.9171644449234009, + "learning_rate": 0.00019993914390373308, + "loss": 2.8582, + "step": 321 + }, + { + "epoch": 0.02598660317972722, + "grad_norm": 0.9624861478805542, + "learning_rate": 0.00019993859198032615, + "loss": 2.8574, + "step": 322 + }, + { + "epoch": 0.026067306916310224, + "grad_norm": 0.8826586008071899, + "learning_rate": 0.00019993803756618826, + "loss": 2.8544, + "step": 323 + }, + { + "epoch": 0.02614801065289323, + "grad_norm": 0.9286447763442993, + "learning_rate": 0.0001999374806613332, + "loss": 2.7937, + "step": 324 + }, + { + "epoch": 0.026228714389476233, + "grad_norm": 0.9901685118675232, + "learning_rate": 0.00019993692126577493, + "loss": 2.7654, + "step": 325 + }, + { + "epoch": 0.026309418126059237, + "grad_norm": 0.9624341130256653, + "learning_rate": 0.00019993635937952734, + "loss": 2.8804, + "step": 326 + }, + { + "epoch": 0.02639012186264224, + "grad_norm": 0.8867596387863159, + "learning_rate": 0.0001999357950026044, + "loss": 2.8254, + "step": 327 + }, + { + "epoch": 0.026470825599225246, + "grad_norm": 0.9243817925453186, + "learning_rate": 0.00019993522813502022, + "loss": 2.8177, + "step": 328 + }, + { + "epoch": 0.026551529335808247, + "grad_norm": 0.9322247505187988, + "learning_rate": 0.00019993465877678895, + "loss": 2.9023, + "step": 329 + }, + { + "epoch": 0.02663223307239125, + "grad_norm": 0.8768174648284912, + "learning_rate": 0.00019993408692792474, + "loss": 2.8184, + "step": 330 + }, + { + "epoch": 0.026712936808974255, + "grad_norm": 0.9436870813369751, + "learning_rate": 0.00019993351258844184, + "loss": 2.8319, + "step": 331 + }, + { + "epoch": 0.02679364054555726, + "grad_norm": 0.9970327019691467, + "learning_rate": 0.0001999329357583546, + "loss": 2.7946, + "step": 332 + }, + { + "epoch": 0.026874344282140264, + "grad_norm": 0.9100088477134705, + "learning_rate": 0.00019993235643767736, + "loss": 2.782, + "step": 333 + }, + { + "epoch": 0.02695504801872327, + "grad_norm": 0.9693402051925659, + "learning_rate": 0.00019993177462642456, + "loss": 2.8182, + "step": 334 + }, + { + "epoch": 0.02703575175530627, + "grad_norm": 0.8761965036392212, + "learning_rate": 0.00019993119032461073, + "loss": 2.8058, + "step": 335 + }, + { + "epoch": 0.027116455491889273, + "grad_norm": 1.0699270963668823, + "learning_rate": 0.00019993060353225043, + "loss": 2.9211, + "step": 336 + }, + { + "epoch": 0.027197159228472278, + "grad_norm": 1.0094172954559326, + "learning_rate": 0.00019993001424935822, + "loss": 2.8837, + "step": 337 + }, + { + "epoch": 0.027277862965055282, + "grad_norm": 0.9683573842048645, + "learning_rate": 0.00019992942247594887, + "loss": 2.8523, + "step": 338 + }, + { + "epoch": 0.027358566701638286, + "grad_norm": 1.3243813514709473, + "learning_rate": 0.00019992882821203708, + "loss": 2.7891, + "step": 339 + }, + { + "epoch": 0.02743927043822129, + "grad_norm": 1.0227056741714478, + "learning_rate": 0.0001999282314576377, + "loss": 2.8396, + "step": 340 + }, + { + "epoch": 0.027519974174804295, + "grad_norm": 1.03257417678833, + "learning_rate": 0.00019992763221276556, + "loss": 2.824, + "step": 341 + }, + { + "epoch": 0.027600677911387296, + "grad_norm": 0.86456698179245, + "learning_rate": 0.00019992703047743562, + "loss": 2.8006, + "step": 342 + }, + { + "epoch": 0.0276813816479703, + "grad_norm": 0.965339720249176, + "learning_rate": 0.00019992642625166286, + "loss": 2.8658, + "step": 343 + }, + { + "epoch": 0.027762085384553305, + "grad_norm": 1.0028942823410034, + "learning_rate": 0.00019992581953546236, + "loss": 2.8311, + "step": 344 + }, + { + "epoch": 0.02784278912113631, + "grad_norm": 0.984307050704956, + "learning_rate": 0.0001999252103288492, + "loss": 2.8748, + "step": 345 + }, + { + "epoch": 0.027923492857719313, + "grad_norm": 0.9405032396316528, + "learning_rate": 0.00019992459863183858, + "loss": 2.8371, + "step": 346 + }, + { + "epoch": 0.028004196594302318, + "grad_norm": 0.9867002367973328, + "learning_rate": 0.0001999239844444458, + "loss": 2.7914, + "step": 347 + }, + { + "epoch": 0.02808490033088532, + "grad_norm": 0.9224951267242432, + "learning_rate": 0.00019992336776668613, + "loss": 2.7986, + "step": 348 + }, + { + "epoch": 0.028165604067468323, + "grad_norm": 1.002838134765625, + "learning_rate": 0.0001999227485985749, + "loss": 2.8207, + "step": 349 + }, + { + "epoch": 0.028246307804051327, + "grad_norm": 0.8922045826911926, + "learning_rate": 0.00019992212694012757, + "loss": 2.8264, + "step": 350 + }, + { + "epoch": 0.02832701154063433, + "grad_norm": 1.0860323905944824, + "learning_rate": 0.00019992150279135964, + "loss": 2.8778, + "step": 351 + }, + { + "epoch": 0.028407715277217336, + "grad_norm": 1.0995604991912842, + "learning_rate": 0.0001999208761522867, + "loss": 2.8599, + "step": 352 + }, + { + "epoch": 0.02848841901380034, + "grad_norm": 0.8741658926010132, + "learning_rate": 0.0001999202470229243, + "loss": 2.7757, + "step": 353 + }, + { + "epoch": 0.02856912275038334, + "grad_norm": 0.9142587184906006, + "learning_rate": 0.00019991961540328815, + "loss": 2.8235, + "step": 354 + }, + { + "epoch": 0.028649826486966345, + "grad_norm": 1.0000953674316406, + "learning_rate": 0.000199918981293394, + "loss": 2.8, + "step": 355 + }, + { + "epoch": 0.02873053022354935, + "grad_norm": 0.9416046738624573, + "learning_rate": 0.00019991834469325763, + "loss": 2.7941, + "step": 356 + }, + { + "epoch": 0.028811233960132354, + "grad_norm": 0.9135935306549072, + "learning_rate": 0.00019991770560289496, + "loss": 2.8315, + "step": 357 + }, + { + "epoch": 0.02889193769671536, + "grad_norm": 0.8867244124412537, + "learning_rate": 0.00019991706402232184, + "loss": 2.8649, + "step": 358 + }, + { + "epoch": 0.028972641433298363, + "grad_norm": 0.9360243678092957, + "learning_rate": 0.00019991641995155431, + "loss": 2.7556, + "step": 359 + }, + { + "epoch": 0.029053345169881367, + "grad_norm": 0.8903766870498657, + "learning_rate": 0.00019991577339060842, + "loss": 2.8379, + "step": 360 + }, + { + "epoch": 0.029134048906464368, + "grad_norm": 1.0178784132003784, + "learning_rate": 0.00019991512433950023, + "loss": 2.8045, + "step": 361 + }, + { + "epoch": 0.029214752643047372, + "grad_norm": 0.9318631887435913, + "learning_rate": 0.000199914472798246, + "loss": 2.823, + "step": 362 + }, + { + "epoch": 0.029295456379630377, + "grad_norm": 0.9384647011756897, + "learning_rate": 0.00019991381876686195, + "loss": 2.9379, + "step": 363 + }, + { + "epoch": 0.02937616011621338, + "grad_norm": 0.9318633675575256, + "learning_rate": 0.00019991316224536433, + "loss": 2.8222, + "step": 364 + }, + { + "epoch": 0.029456863852796385, + "grad_norm": 0.8653938174247742, + "learning_rate": 0.00019991250323376952, + "loss": 2.8447, + "step": 365 + }, + { + "epoch": 0.02953756758937939, + "grad_norm": 0.8997991681098938, + "learning_rate": 0.00019991184173209398, + "loss": 2.8523, + "step": 366 + }, + { + "epoch": 0.02961827132596239, + "grad_norm": 0.8587092161178589, + "learning_rate": 0.00019991117774035416, + "loss": 2.8141, + "step": 367 + }, + { + "epoch": 0.029698975062545395, + "grad_norm": 0.8740741014480591, + "learning_rate": 0.00019991051125856663, + "loss": 2.7487, + "step": 368 + }, + { + "epoch": 0.0297796787991284, + "grad_norm": 0.9099416732788086, + "learning_rate": 0.00019990984228674798, + "loss": 2.834, + "step": 369 + }, + { + "epoch": 0.029860382535711404, + "grad_norm": 0.8675365447998047, + "learning_rate": 0.0001999091708249149, + "loss": 2.8259, + "step": 370 + }, + { + "epoch": 0.029941086272294408, + "grad_norm": 1.0141092538833618, + "learning_rate": 0.00019990849687308412, + "loss": 2.8369, + "step": 371 + }, + { + "epoch": 0.030021790008877412, + "grad_norm": 0.849155604839325, + "learning_rate": 0.00019990782043127243, + "loss": 2.7505, + "step": 372 + }, + { + "epoch": 0.030102493745460413, + "grad_norm": 1.073754072189331, + "learning_rate": 0.0001999071414994967, + "loss": 2.8939, + "step": 373 + }, + { + "epoch": 0.030183197482043417, + "grad_norm": 0.8615279197692871, + "learning_rate": 0.00019990646007777383, + "loss": 2.7662, + "step": 374 + }, + { + "epoch": 0.030263901218626422, + "grad_norm": 0.8803398609161377, + "learning_rate": 0.0001999057761661208, + "loss": 2.7992, + "step": 375 + }, + { + "epoch": 0.030344604955209426, + "grad_norm": 0.8901834487915039, + "learning_rate": 0.00019990508976455473, + "loss": 2.8222, + "step": 376 + }, + { + "epoch": 0.03042530869179243, + "grad_norm": 0.9443284869194031, + "learning_rate": 0.00019990440087309263, + "loss": 2.8326, + "step": 377 + }, + { + "epoch": 0.030506012428375435, + "grad_norm": 0.9122868180274963, + "learning_rate": 0.0001999037094917517, + "loss": 2.7653, + "step": 378 + }, + { + "epoch": 0.03058671616495844, + "grad_norm": 0.8764635920524597, + "learning_rate": 0.0001999030156205492, + "loss": 2.7813, + "step": 379 + }, + { + "epoch": 0.03066741990154144, + "grad_norm": 0.8466865420341492, + "learning_rate": 0.0001999023192595024, + "loss": 2.8338, + "step": 380 + }, + { + "epoch": 0.030748123638124444, + "grad_norm": 0.8833961486816406, + "learning_rate": 0.00019990162040862863, + "loss": 2.78, + "step": 381 + }, + { + "epoch": 0.03082882737470745, + "grad_norm": 1.0298357009887695, + "learning_rate": 0.00019990091906794537, + "loss": 2.8059, + "step": 382 + }, + { + "epoch": 0.030909531111290453, + "grad_norm": 0.8651318550109863, + "learning_rate": 0.00019990021523747005, + "loss": 2.8608, + "step": 383 + }, + { + "epoch": 0.030990234847873457, + "grad_norm": 1.0262864828109741, + "learning_rate": 0.0001998995089172202, + "loss": 2.8226, + "step": 384 + }, + { + "epoch": 0.03107093858445646, + "grad_norm": 0.9266276955604553, + "learning_rate": 0.00019989880010721348, + "loss": 2.9414, + "step": 385 + }, + { + "epoch": 0.031151642321039463, + "grad_norm": 0.8762117028236389, + "learning_rate": 0.00019989808880746749, + "loss": 2.8023, + "step": 386 + }, + { + "epoch": 0.031232346057622467, + "grad_norm": 0.8531816601753235, + "learning_rate": 0.00019989737501800004, + "loss": 2.777, + "step": 387 + }, + { + "epoch": 0.031313049794205475, + "grad_norm": 0.8999545574188232, + "learning_rate": 0.0001998966587388288, + "loss": 2.8656, + "step": 388 + }, + { + "epoch": 0.03139375353078847, + "grad_norm": 0.932248055934906, + "learning_rate": 0.00019989593996997177, + "loss": 2.8212, + "step": 389 + }, + { + "epoch": 0.031474457267371476, + "grad_norm": 0.9059134125709534, + "learning_rate": 0.00019989521871144672, + "loss": 2.7945, + "step": 390 + }, + { + "epoch": 0.03155516100395448, + "grad_norm": 0.9323028922080994, + "learning_rate": 0.00019989449496327172, + "loss": 2.8338, + "step": 391 + }, + { + "epoch": 0.031635864740537485, + "grad_norm": 0.9141251444816589, + "learning_rate": 0.0001998937687254648, + "loss": 2.7935, + "step": 392 + }, + { + "epoch": 0.03171656847712049, + "grad_norm": 1.0026880502700806, + "learning_rate": 0.000199893039998044, + "loss": 2.8811, + "step": 393 + }, + { + "epoch": 0.031797272213703494, + "grad_norm": 1.0178622007369995, + "learning_rate": 0.00019989230878102756, + "loss": 2.9003, + "step": 394 + }, + { + "epoch": 0.0318779759502865, + "grad_norm": 0.9111912846565247, + "learning_rate": 0.00019989157507443363, + "loss": 2.8399, + "step": 395 + }, + { + "epoch": 0.0319586796868695, + "grad_norm": 1.054563283920288, + "learning_rate": 0.00019989083887828052, + "loss": 2.9088, + "step": 396 + }, + { + "epoch": 0.03203938342345251, + "grad_norm": 0.9459816217422485, + "learning_rate": 0.00019989010019258663, + "loss": 2.805, + "step": 397 + }, + { + "epoch": 0.03212008716003551, + "grad_norm": 1.0139873027801514, + "learning_rate": 0.00019988935901737033, + "loss": 2.8452, + "step": 398 + }, + { + "epoch": 0.032200790896618516, + "grad_norm": 0.986325204372406, + "learning_rate": 0.00019988861535265006, + "loss": 2.8311, + "step": 399 + }, + { + "epoch": 0.03228149463320152, + "grad_norm": 0.9565223455429077, + "learning_rate": 0.00019988786919844436, + "loss": 2.7766, + "step": 400 + }, + { + "epoch": 0.032362198369784524, + "grad_norm": 0.8901559710502625, + "learning_rate": 0.0001998871205547719, + "loss": 2.7966, + "step": 401 + }, + { + "epoch": 0.03244290210636752, + "grad_norm": 1.0959528684616089, + "learning_rate": 0.00019988636942165123, + "loss": 2.8377, + "step": 402 + }, + { + "epoch": 0.032523605842950526, + "grad_norm": 1.0768988132476807, + "learning_rate": 0.00019988561579910118, + "loss": 2.8267, + "step": 403 + }, + { + "epoch": 0.03260430957953353, + "grad_norm": 0.9563855528831482, + "learning_rate": 0.00019988485968714048, + "loss": 2.8459, + "step": 404 + }, + { + "epoch": 0.032685013316116535, + "grad_norm": 0.930927038192749, + "learning_rate": 0.00019988410108578796, + "loss": 2.8053, + "step": 405 + }, + { + "epoch": 0.03276571705269954, + "grad_norm": 1.0658363103866577, + "learning_rate": 0.00019988333999506255, + "loss": 2.8512, + "step": 406 + }, + { + "epoch": 0.03284642078928254, + "grad_norm": 0.9258090257644653, + "learning_rate": 0.0001998825764149832, + "loss": 2.8541, + "step": 407 + }, + { + "epoch": 0.03292712452586555, + "grad_norm": 1.18158757686615, + "learning_rate": 0.00019988181034556895, + "loss": 2.8838, + "step": 408 + }, + { + "epoch": 0.03300782826244855, + "grad_norm": 0.9506754875183105, + "learning_rate": 0.00019988104178683891, + "loss": 2.7733, + "step": 409 + }, + { + "epoch": 0.033088531999031556, + "grad_norm": 0.9559460282325745, + "learning_rate": 0.0001998802707388122, + "loss": 2.9259, + "step": 410 + }, + { + "epoch": 0.03316923573561456, + "grad_norm": 0.9322298765182495, + "learning_rate": 0.00019987949720150808, + "loss": 2.8318, + "step": 411 + }, + { + "epoch": 0.033249939472197565, + "grad_norm": 0.9226691722869873, + "learning_rate": 0.00019987872117494576, + "loss": 2.9063, + "step": 412 + }, + { + "epoch": 0.03333064320878057, + "grad_norm": 1.0543674230575562, + "learning_rate": 0.00019987794265914464, + "loss": 2.7877, + "step": 413 + }, + { + "epoch": 0.033411346945363574, + "grad_norm": 0.989986002445221, + "learning_rate": 0.00019987716165412408, + "loss": 2.8354, + "step": 414 + }, + { + "epoch": 0.03349205068194657, + "grad_norm": 0.8703451752662659, + "learning_rate": 0.0001998763781599036, + "loss": 2.8127, + "step": 415 + }, + { + "epoch": 0.033572754418529575, + "grad_norm": 0.974943220615387, + "learning_rate": 0.0001998755921765027, + "loss": 2.9272, + "step": 416 + }, + { + "epoch": 0.03365345815511258, + "grad_norm": 0.8714169859886169, + "learning_rate": 0.000199874803703941, + "loss": 2.8027, + "step": 417 + }, + { + "epoch": 0.033734161891695584, + "grad_norm": 0.9251161217689514, + "learning_rate": 0.00019987401274223804, + "loss": 2.8186, + "step": 418 + }, + { + "epoch": 0.03381486562827859, + "grad_norm": 0.9657236933708191, + "learning_rate": 0.00019987321929141366, + "loss": 2.8297, + "step": 419 + }, + { + "epoch": 0.03389556936486159, + "grad_norm": 0.9022002816200256, + "learning_rate": 0.00019987242335148757, + "loss": 2.881, + "step": 420 + }, + { + "epoch": 0.0339762731014446, + "grad_norm": 0.9479621052742004, + "learning_rate": 0.0001998716249224796, + "loss": 2.8288, + "step": 421 + }, + { + "epoch": 0.0340569768380276, + "grad_norm": 0.9458955526351929, + "learning_rate": 0.00019987082400440968, + "loss": 2.8861, + "step": 422 + }, + { + "epoch": 0.034137680574610606, + "grad_norm": 0.9444572329521179, + "learning_rate": 0.0001998700205972978, + "loss": 2.8877, + "step": 423 + }, + { + "epoch": 0.03421838431119361, + "grad_norm": 0.9263925552368164, + "learning_rate": 0.00019986921470116392, + "loss": 2.8028, + "step": 424 + }, + { + "epoch": 0.034299088047776614, + "grad_norm": 1.0690566301345825, + "learning_rate": 0.00019986840631602812, + "loss": 2.882, + "step": 425 + }, + { + "epoch": 0.03437979178435962, + "grad_norm": 0.8999007940292358, + "learning_rate": 0.0001998675954419106, + "loss": 2.8179, + "step": 426 + }, + { + "epoch": 0.03446049552094262, + "grad_norm": 0.894395112991333, + "learning_rate": 0.00019986678207883153, + "loss": 2.814, + "step": 427 + }, + { + "epoch": 0.03454119925752562, + "grad_norm": 0.8621550798416138, + "learning_rate": 0.00019986596622681123, + "loss": 2.7584, + "step": 428 + }, + { + "epoch": 0.034621902994108625, + "grad_norm": 0.9452527165412903, + "learning_rate": 0.00019986514788587, + "loss": 2.8949, + "step": 429 + }, + { + "epoch": 0.03470260673069163, + "grad_norm": 0.8973272442817688, + "learning_rate": 0.0001998643270560282, + "loss": 2.868, + "step": 430 + }, + { + "epoch": 0.034783310467274633, + "grad_norm": 0.9887418150901794, + "learning_rate": 0.00019986350373730634, + "loss": 2.8009, + "step": 431 + }, + { + "epoch": 0.03486401420385764, + "grad_norm": 0.9449994564056396, + "learning_rate": 0.0001998626779297249, + "loss": 2.8305, + "step": 432 + }, + { + "epoch": 0.03494471794044064, + "grad_norm": 1.052871823310852, + "learning_rate": 0.0001998618496333045, + "loss": 2.8136, + "step": 433 + }, + { + "epoch": 0.035025421677023647, + "grad_norm": 0.9600724577903748, + "learning_rate": 0.00019986101884806576, + "loss": 2.7857, + "step": 434 + }, + { + "epoch": 0.03510612541360665, + "grad_norm": 0.874043345451355, + "learning_rate": 0.00019986018557402942, + "loss": 2.8524, + "step": 435 + }, + { + "epoch": 0.035186829150189655, + "grad_norm": 0.9810616374015808, + "learning_rate": 0.0001998593498112162, + "loss": 2.7506, + "step": 436 + }, + { + "epoch": 0.03526753288677266, + "grad_norm": 0.9163016080856323, + "learning_rate": 0.00019985851155964693, + "loss": 2.798, + "step": 437 + }, + { + "epoch": 0.035348236623355664, + "grad_norm": 1.0688380002975464, + "learning_rate": 0.00019985767081934252, + "loss": 2.8916, + "step": 438 + }, + { + "epoch": 0.03542894035993867, + "grad_norm": 0.925020158290863, + "learning_rate": 0.00019985682759032393, + "loss": 2.8017, + "step": 439 + }, + { + "epoch": 0.035509644096521666, + "grad_norm": 0.9429430961608887, + "learning_rate": 0.0001998559818726122, + "loss": 2.837, + "step": 440 + }, + { + "epoch": 0.03559034783310467, + "grad_norm": 0.9135627150535583, + "learning_rate": 0.00019985513366622832, + "loss": 2.8423, + "step": 441 + }, + { + "epoch": 0.035671051569687674, + "grad_norm": 0.9218924045562744, + "learning_rate": 0.00019985428297119353, + "loss": 2.854, + "step": 442 + }, + { + "epoch": 0.03575175530627068, + "grad_norm": 0.9307878613471985, + "learning_rate": 0.00019985342978752897, + "loss": 2.8591, + "step": 443 + }, + { + "epoch": 0.03583245904285368, + "grad_norm": 0.935394287109375, + "learning_rate": 0.00019985257411525592, + "loss": 2.8388, + "step": 444 + }, + { + "epoch": 0.03591316277943669, + "grad_norm": 0.890959620475769, + "learning_rate": 0.0001998517159543957, + "loss": 2.78, + "step": 445 + }, + { + "epoch": 0.03599386651601969, + "grad_norm": 1.110924482345581, + "learning_rate": 0.0001998508553049697, + "loss": 2.8117, + "step": 446 + }, + { + "epoch": 0.036074570252602696, + "grad_norm": 0.8774176239967346, + "learning_rate": 0.0001998499921669994, + "loss": 2.8368, + "step": 447 + }, + { + "epoch": 0.0361552739891857, + "grad_norm": 0.9766948819160461, + "learning_rate": 0.00019984912654050625, + "loss": 2.764, + "step": 448 + }, + { + "epoch": 0.036235977725768705, + "grad_norm": 1.1439398527145386, + "learning_rate": 0.00019984825842551187, + "loss": 2.84, + "step": 449 + }, + { + "epoch": 0.03631668146235171, + "grad_norm": 0.8995118737220764, + "learning_rate": 0.0001998473878220379, + "loss": 2.834, + "step": 450 + }, + { + "epoch": 0.03639738519893471, + "grad_norm": 0.9810060858726501, + "learning_rate": 0.000199846514730106, + "loss": 2.9338, + "step": 451 + }, + { + "epoch": 0.03647808893551772, + "grad_norm": 1.0862053632736206, + "learning_rate": 0.00019984563914973795, + "loss": 2.837, + "step": 452 + }, + { + "epoch": 0.036558792672100715, + "grad_norm": 0.9456702470779419, + "learning_rate": 0.0001998447610809556, + "loss": 2.7664, + "step": 453 + }, + { + "epoch": 0.03663949640868372, + "grad_norm": 1.0714432001113892, + "learning_rate": 0.0001998438805237808, + "loss": 2.8339, + "step": 454 + }, + { + "epoch": 0.036720200145266724, + "grad_norm": 0.89134281873703, + "learning_rate": 0.00019984299747823547, + "loss": 2.7818, + "step": 455 + }, + { + "epoch": 0.03680090388184973, + "grad_norm": 0.869742214679718, + "learning_rate": 0.0001998421119443417, + "loss": 2.7916, + "step": 456 + }, + { + "epoch": 0.03688160761843273, + "grad_norm": 0.9307265281677246, + "learning_rate": 0.00019984122392212149, + "loss": 2.8485, + "step": 457 + }, + { + "epoch": 0.03696231135501574, + "grad_norm": 0.900215744972229, + "learning_rate": 0.00019984033341159698, + "loss": 2.8536, + "step": 458 + }, + { + "epoch": 0.03704301509159874, + "grad_norm": 0.8679699897766113, + "learning_rate": 0.00019983944041279038, + "loss": 2.8344, + "step": 459 + }, + { + "epoch": 0.037123718828181745, + "grad_norm": 0.9540488719940186, + "learning_rate": 0.00019983854492572394, + "loss": 2.873, + "step": 460 + }, + { + "epoch": 0.03720442256476475, + "grad_norm": 0.8697962760925293, + "learning_rate": 0.00019983764695042, + "loss": 2.8122, + "step": 461 + }, + { + "epoch": 0.037285126301347754, + "grad_norm": 0.9534483551979065, + "learning_rate": 0.0001998367464869009, + "loss": 2.8842, + "step": 462 + }, + { + "epoch": 0.03736583003793076, + "grad_norm": 0.8402275443077087, + "learning_rate": 0.00019983584353518911, + "loss": 2.8135, + "step": 463 + }, + { + "epoch": 0.03744653377451376, + "grad_norm": 0.8226146697998047, + "learning_rate": 0.0001998349380953071, + "loss": 2.8036, + "step": 464 + }, + { + "epoch": 0.03752723751109677, + "grad_norm": 0.9292199611663818, + "learning_rate": 0.0001998340301672775, + "loss": 2.7887, + "step": 465 + }, + { + "epoch": 0.037607941247679764, + "grad_norm": 0.9035555124282837, + "learning_rate": 0.0001998331197511229, + "loss": 2.7851, + "step": 466 + }, + { + "epoch": 0.03768864498426277, + "grad_norm": 0.9411706328392029, + "learning_rate": 0.00019983220684686596, + "loss": 2.7782, + "step": 467 + }, + { + "epoch": 0.03776934872084577, + "grad_norm": 0.9867696166038513, + "learning_rate": 0.0001998312914545295, + "loss": 2.8125, + "step": 468 + }, + { + "epoch": 0.03785005245742878, + "grad_norm": 0.9683675169944763, + "learning_rate": 0.00019983037357413624, + "loss": 2.8325, + "step": 469 + }, + { + "epoch": 0.03793075619401178, + "grad_norm": 0.963941752910614, + "learning_rate": 0.00019982945320570913, + "loss": 2.8281, + "step": 470 + }, + { + "epoch": 0.038011459930594786, + "grad_norm": 0.9812459349632263, + "learning_rate": 0.0001998285303492711, + "loss": 2.765, + "step": 471 + }, + { + "epoch": 0.03809216366717779, + "grad_norm": 0.9681405425071716, + "learning_rate": 0.00019982760500484516, + "loss": 2.8882, + "step": 472 + }, + { + "epoch": 0.038172867403760795, + "grad_norm": 0.8983948826789856, + "learning_rate": 0.00019982667717245432, + "loss": 2.8182, + "step": 473 + }, + { + "epoch": 0.0382535711403438, + "grad_norm": 0.9875261783599854, + "learning_rate": 0.00019982574685212178, + "loss": 2.8072, + "step": 474 + }, + { + "epoch": 0.038334274876926804, + "grad_norm": 0.8889442086219788, + "learning_rate": 0.00019982481404387064, + "loss": 2.8635, + "step": 475 + }, + { + "epoch": 0.03841497861350981, + "grad_norm": 0.8904242515563965, + "learning_rate": 0.00019982387874772418, + "loss": 2.829, + "step": 476 + }, + { + "epoch": 0.03849568235009281, + "grad_norm": 1.0182000398635864, + "learning_rate": 0.00019982294096370574, + "loss": 2.8552, + "step": 477 + }, + { + "epoch": 0.03857638608667582, + "grad_norm": 0.9867151975631714, + "learning_rate": 0.00019982200069183867, + "loss": 2.8201, + "step": 478 + }, + { + "epoch": 0.038657089823258814, + "grad_norm": 0.9785345196723938, + "learning_rate": 0.0001998210579321464, + "loss": 2.8652, + "step": 479 + }, + { + "epoch": 0.03873779355984182, + "grad_norm": 0.9696915149688721, + "learning_rate": 0.00019982011268465243, + "loss": 2.8276, + "step": 480 + }, + { + "epoch": 0.03881849729642482, + "grad_norm": 0.9257470965385437, + "learning_rate": 0.00019981916494938033, + "loss": 2.8321, + "step": 481 + }, + { + "epoch": 0.03889920103300783, + "grad_norm": 0.9394895434379578, + "learning_rate": 0.00019981821472635369, + "loss": 2.8747, + "step": 482 + }, + { + "epoch": 0.03897990476959083, + "grad_norm": 0.9888504147529602, + "learning_rate": 0.00019981726201559626, + "loss": 2.8201, + "step": 483 + }, + { + "epoch": 0.039060608506173836, + "grad_norm": 0.8957003951072693, + "learning_rate": 0.0001998163068171317, + "loss": 2.8255, + "step": 484 + }, + { + "epoch": 0.03914131224275684, + "grad_norm": 0.9792008996009827, + "learning_rate": 0.00019981534913098383, + "loss": 2.7985, + "step": 485 + }, + { + "epoch": 0.039222015979339844, + "grad_norm": 0.8689060211181641, + "learning_rate": 0.00019981438895717656, + "loss": 2.7945, + "step": 486 + }, + { + "epoch": 0.03930271971592285, + "grad_norm": 0.9932593703269958, + "learning_rate": 0.0001998134262957338, + "loss": 2.9041, + "step": 487 + }, + { + "epoch": 0.03938342345250585, + "grad_norm": 0.8496069312095642, + "learning_rate": 0.00019981246114667955, + "loss": 2.8433, + "step": 488 + }, + { + "epoch": 0.03946412718908886, + "grad_norm": 0.8484126925468445, + "learning_rate": 0.00019981149351003786, + "loss": 2.7872, + "step": 489 + }, + { + "epoch": 0.03954483092567186, + "grad_norm": 0.9208858013153076, + "learning_rate": 0.00019981052338583283, + "loss": 2.7776, + "step": 490 + }, + { + "epoch": 0.03962553466225486, + "grad_norm": 0.9305418729782104, + "learning_rate": 0.00019980955077408865, + "loss": 2.7851, + "step": 491 + }, + { + "epoch": 0.03970623839883786, + "grad_norm": 0.9803212881088257, + "learning_rate": 0.00019980857567482955, + "loss": 2.8469, + "step": 492 + }, + { + "epoch": 0.03978694213542087, + "grad_norm": 0.9165790677070618, + "learning_rate": 0.00019980759808807985, + "loss": 2.8513, + "step": 493 + }, + { + "epoch": 0.03986764587200387, + "grad_norm": 0.9153794050216675, + "learning_rate": 0.00019980661801386393, + "loss": 2.8322, + "step": 494 + }, + { + "epoch": 0.039948349608586876, + "grad_norm": 0.89347904920578, + "learning_rate": 0.00019980563545220616, + "loss": 2.8316, + "step": 495 + }, + { + "epoch": 0.04002905334516988, + "grad_norm": 0.9882236123085022, + "learning_rate": 0.00019980465040313105, + "loss": 2.7471, + "step": 496 + }, + { + "epoch": 0.040109757081752885, + "grad_norm": 0.9391099810600281, + "learning_rate": 0.00019980366286666322, + "loss": 2.8182, + "step": 497 + }, + { + "epoch": 0.04019046081833589, + "grad_norm": 1.0155293941497803, + "learning_rate": 0.00019980267284282717, + "loss": 2.8721, + "step": 498 + }, + { + "epoch": 0.040271164554918894, + "grad_norm": 0.9952930212020874, + "learning_rate": 0.00019980168033164765, + "loss": 2.8538, + "step": 499 + }, + { + "epoch": 0.0403518682915019, + "grad_norm": 0.8385666608810425, + "learning_rate": 0.00019980068533314934, + "loss": 2.8242, + "step": 500 + }, + { + "epoch": 0.0404325720280849, + "grad_norm": 0.8747559785842896, + "learning_rate": 0.0001997996878473571, + "loss": 2.7908, + "step": 501 + }, + { + "epoch": 0.04051327576466791, + "grad_norm": 0.9267926216125488, + "learning_rate": 0.00019979868787429575, + "loss": 2.8359, + "step": 502 + }, + { + "epoch": 0.04059397950125091, + "grad_norm": 0.8194155693054199, + "learning_rate": 0.00019979768541399022, + "loss": 2.8161, + "step": 503 + }, + { + "epoch": 0.04067468323783391, + "grad_norm": 0.8923258185386658, + "learning_rate": 0.00019979668046646548, + "loss": 2.7547, + "step": 504 + }, + { + "epoch": 0.04075538697441691, + "grad_norm": 0.8965646028518677, + "learning_rate": 0.00019979567303174663, + "loss": 2.8432, + "step": 505 + }, + { + "epoch": 0.04083609071099992, + "grad_norm": 0.814481794834137, + "learning_rate": 0.0001997946631098587, + "loss": 2.8327, + "step": 506 + }, + { + "epoch": 0.04091679444758292, + "grad_norm": 0.8806928396224976, + "learning_rate": 0.00019979365070082694, + "loss": 2.8573, + "step": 507 + }, + { + "epoch": 0.040997498184165926, + "grad_norm": 0.8546919822692871, + "learning_rate": 0.00019979263580467653, + "loss": 2.8618, + "step": 508 + }, + { + "epoch": 0.04107820192074893, + "grad_norm": 0.8557277321815491, + "learning_rate": 0.00019979161842143274, + "loss": 2.8454, + "step": 509 + }, + { + "epoch": 0.041158905657331935, + "grad_norm": 0.9153180122375488, + "learning_rate": 0.00019979059855112098, + "loss": 2.8027, + "step": 510 + }, + { + "epoch": 0.04123960939391494, + "grad_norm": 0.8616741895675659, + "learning_rate": 0.00019978957619376666, + "loss": 2.7628, + "step": 511 + }, + { + "epoch": 0.04132031313049794, + "grad_norm": 0.8777137398719788, + "learning_rate": 0.00019978855134939524, + "loss": 2.8443, + "step": 512 + }, + { + "epoch": 0.04140101686708095, + "grad_norm": 0.852100133895874, + "learning_rate": 0.0001997875240180323, + "loss": 2.8125, + "step": 513 + }, + { + "epoch": 0.04148172060366395, + "grad_norm": 0.8470742702484131, + "learning_rate": 0.00019978649419970338, + "loss": 2.8139, + "step": 514 + }, + { + "epoch": 0.041562424340246956, + "grad_norm": 0.8890305161476135, + "learning_rate": 0.0001997854618944342, + "loss": 2.8633, + "step": 515 + }, + { + "epoch": 0.04164312807682996, + "grad_norm": 0.8893599510192871, + "learning_rate": 0.00019978442710225043, + "loss": 2.8066, + "step": 516 + }, + { + "epoch": 0.04172383181341296, + "grad_norm": 0.9093891382217407, + "learning_rate": 0.00019978338982317792, + "loss": 2.8026, + "step": 517 + }, + { + "epoch": 0.04180453554999596, + "grad_norm": 0.9775434136390686, + "learning_rate": 0.00019978235005724252, + "loss": 2.849, + "step": 518 + }, + { + "epoch": 0.04188523928657897, + "grad_norm": 1.0014091730117798, + "learning_rate": 0.00019978130780447012, + "loss": 2.8572, + "step": 519 + }, + { + "epoch": 0.04196594302316197, + "grad_norm": 0.8487632870674133, + "learning_rate": 0.00019978026306488668, + "loss": 2.7611, + "step": 520 + }, + { + "epoch": 0.042046646759744975, + "grad_norm": 0.86592698097229, + "learning_rate": 0.00019977921583851825, + "loss": 2.7616, + "step": 521 + }, + { + "epoch": 0.04212735049632798, + "grad_norm": 1.0285916328430176, + "learning_rate": 0.00019977816612539093, + "loss": 2.8049, + "step": 522 + }, + { + "epoch": 0.042208054232910984, + "grad_norm": 0.9716495871543884, + "learning_rate": 0.00019977711392553092, + "loss": 2.8459, + "step": 523 + }, + { + "epoch": 0.04228875796949399, + "grad_norm": 0.8842264413833618, + "learning_rate": 0.0001997760592389644, + "loss": 2.7934, + "step": 524 + }, + { + "epoch": 0.04236946170607699, + "grad_norm": 0.8839964866638184, + "learning_rate": 0.00019977500206571765, + "loss": 2.8135, + "step": 525 + }, + { + "epoch": 0.04245016544266, + "grad_norm": 0.870331346988678, + "learning_rate": 0.00019977394240581705, + "loss": 2.8684, + "step": 526 + }, + { + "epoch": 0.042530869179243, + "grad_norm": 0.8844720125198364, + "learning_rate": 0.000199772880259289, + "loss": 2.7867, + "step": 527 + }, + { + "epoch": 0.042611572915826006, + "grad_norm": 0.9353455901145935, + "learning_rate": 0.00019977181562615994, + "loss": 2.8051, + "step": 528 + }, + { + "epoch": 0.04269227665240901, + "grad_norm": 0.9530816078186035, + "learning_rate": 0.00019977074850645646, + "loss": 2.7915, + "step": 529 + }, + { + "epoch": 0.04277298038899201, + "grad_norm": 0.8984190821647644, + "learning_rate": 0.00019976967890020507, + "loss": 2.7957, + "step": 530 + }, + { + "epoch": 0.04285368412557501, + "grad_norm": 0.9146613478660583, + "learning_rate": 0.00019976860680743252, + "loss": 2.9053, + "step": 531 + }, + { + "epoch": 0.042934387862158016, + "grad_norm": 0.9228026866912842, + "learning_rate": 0.0001997675322281655, + "loss": 2.8578, + "step": 532 + }, + { + "epoch": 0.04301509159874102, + "grad_norm": 0.8266343474388123, + "learning_rate": 0.0001997664551624308, + "loss": 2.7393, + "step": 533 + }, + { + "epoch": 0.043095795335324025, + "grad_norm": 0.9197628498077393, + "learning_rate": 0.0001997653756102552, + "loss": 2.8828, + "step": 534 + }, + { + "epoch": 0.04317649907190703, + "grad_norm": 0.9145991802215576, + "learning_rate": 0.00019976429357166566, + "loss": 2.7767, + "step": 535 + }, + { + "epoch": 0.04325720280849003, + "grad_norm": 0.9123281240463257, + "learning_rate": 0.00019976320904668913, + "loss": 2.7993, + "step": 536 + }, + { + "epoch": 0.04333790654507304, + "grad_norm": 0.8597636818885803, + "learning_rate": 0.00019976212203535266, + "loss": 2.8148, + "step": 537 + }, + { + "epoch": 0.04341861028165604, + "grad_norm": 0.8963296413421631, + "learning_rate": 0.00019976103253768334, + "loss": 2.7722, + "step": 538 + }, + { + "epoch": 0.043499314018239046, + "grad_norm": 0.9480688571929932, + "learning_rate": 0.0001997599405537083, + "loss": 2.8038, + "step": 539 + }, + { + "epoch": 0.04358001775482205, + "grad_norm": 0.8115736842155457, + "learning_rate": 0.00019975884608345476, + "loss": 2.8069, + "step": 540 + }, + { + "epoch": 0.043660721491405055, + "grad_norm": 0.9642506837844849, + "learning_rate": 0.00019975774912695, + "loss": 2.8703, + "step": 541 + }, + { + "epoch": 0.04374142522798805, + "grad_norm": 0.9638697504997253, + "learning_rate": 0.0001997566496842214, + "loss": 2.8223, + "step": 542 + }, + { + "epoch": 0.04382212896457106, + "grad_norm": 0.9478490352630615, + "learning_rate": 0.00019975554775529628, + "loss": 2.8164, + "step": 543 + }, + { + "epoch": 0.04390283270115406, + "grad_norm": 1.1771583557128906, + "learning_rate": 0.00019975444334020215, + "loss": 2.7969, + "step": 544 + }, + { + "epoch": 0.043983536437737066, + "grad_norm": 0.9597339034080505, + "learning_rate": 0.00019975333643896655, + "loss": 2.8025, + "step": 545 + }, + { + "epoch": 0.04406424017432007, + "grad_norm": 0.981595516204834, + "learning_rate": 0.00019975222705161704, + "loss": 2.7994, + "step": 546 + }, + { + "epoch": 0.044144943910903074, + "grad_norm": 0.9581133723258972, + "learning_rate": 0.00019975111517818127, + "loss": 2.802, + "step": 547 + }, + { + "epoch": 0.04422564764748608, + "grad_norm": 0.8643878698348999, + "learning_rate": 0.00019975000081868697, + "loss": 2.7958, + "step": 548 + }, + { + "epoch": 0.04430635138406908, + "grad_norm": 1.2188652753829956, + "learning_rate": 0.0001997488839731619, + "loss": 2.8786, + "step": 549 + }, + { + "epoch": 0.04438705512065209, + "grad_norm": 0.9138071537017822, + "learning_rate": 0.00019974776464163387, + "loss": 2.809, + "step": 550 + }, + { + "epoch": 0.04446775885723509, + "grad_norm": 0.9604587554931641, + "learning_rate": 0.00019974664282413083, + "loss": 2.8009, + "step": 551 + }, + { + "epoch": 0.044548462593818096, + "grad_norm": 1.0271116495132446, + "learning_rate": 0.00019974551852068072, + "loss": 2.8689, + "step": 552 + }, + { + "epoch": 0.0446291663304011, + "grad_norm": 0.9330877065658569, + "learning_rate": 0.00019974439173131155, + "loss": 2.7613, + "step": 553 + }, + { + "epoch": 0.044709870066984105, + "grad_norm": 0.9549325108528137, + "learning_rate": 0.00019974326245605136, + "loss": 2.8314, + "step": 554 + }, + { + "epoch": 0.0447905738035671, + "grad_norm": 0.8928439021110535, + "learning_rate": 0.00019974213069492836, + "loss": 2.8097, + "step": 555 + }, + { + "epoch": 0.044871277540150106, + "grad_norm": 0.8705076575279236, + "learning_rate": 0.00019974099644797075, + "loss": 2.8112, + "step": 556 + }, + { + "epoch": 0.04495198127673311, + "grad_norm": 0.988345742225647, + "learning_rate": 0.00019973985971520676, + "loss": 2.7648, + "step": 557 + }, + { + "epoch": 0.045032685013316115, + "grad_norm": 0.9161957502365112, + "learning_rate": 0.00019973872049666475, + "loss": 2.8691, + "step": 558 + }, + { + "epoch": 0.04511338874989912, + "grad_norm": 0.8404076099395752, + "learning_rate": 0.00019973757879237312, + "loss": 2.7708, + "step": 559 + }, + { + "epoch": 0.045194092486482124, + "grad_norm": 1.05247962474823, + "learning_rate": 0.0001997364346023603, + "loss": 2.8638, + "step": 560 + }, + { + "epoch": 0.04527479622306513, + "grad_norm": 0.9235066175460815, + "learning_rate": 0.00019973528792665483, + "loss": 2.7876, + "step": 561 + }, + { + "epoch": 0.04535549995964813, + "grad_norm": 1.220075249671936, + "learning_rate": 0.00019973413876528526, + "loss": 2.8563, + "step": 562 + }, + { + "epoch": 0.04543620369623114, + "grad_norm": 0.9098384976387024, + "learning_rate": 0.00019973298711828025, + "loss": 2.8427, + "step": 563 + }, + { + "epoch": 0.04551690743281414, + "grad_norm": 0.8792217969894409, + "learning_rate": 0.00019973183298566848, + "loss": 2.8673, + "step": 564 + }, + { + "epoch": 0.045597611169397145, + "grad_norm": 0.9895235896110535, + "learning_rate": 0.00019973067636747875, + "loss": 2.8262, + "step": 565 + }, + { + "epoch": 0.04567831490598015, + "grad_norm": 0.9191479086875916, + "learning_rate": 0.00019972951726373984, + "loss": 2.8005, + "step": 566 + }, + { + "epoch": 0.045759018642563154, + "grad_norm": 0.9631491899490356, + "learning_rate": 0.0001997283556744807, + "loss": 2.8438, + "step": 567 + }, + { + "epoch": 0.04583972237914615, + "grad_norm": 0.8302746415138245, + "learning_rate": 0.00019972719159973024, + "loss": 2.8221, + "step": 568 + }, + { + "epoch": 0.045920426115729156, + "grad_norm": 0.8238534927368164, + "learning_rate": 0.00019972602503951748, + "loss": 2.7674, + "step": 569 + }, + { + "epoch": 0.04600112985231216, + "grad_norm": 0.9675811529159546, + "learning_rate": 0.00019972485599387146, + "loss": 2.8457, + "step": 570 + }, + { + "epoch": 0.046081833588895164, + "grad_norm": 0.8663914203643799, + "learning_rate": 0.00019972368446282134, + "loss": 2.7851, + "step": 571 + }, + { + "epoch": 0.04616253732547817, + "grad_norm": 0.9904592633247375, + "learning_rate": 0.00019972251044639636, + "loss": 2.8792, + "step": 572 + }, + { + "epoch": 0.04624324106206117, + "grad_norm": 0.907600462436676, + "learning_rate": 0.0001997213339446257, + "loss": 2.7991, + "step": 573 + }, + { + "epoch": 0.04632394479864418, + "grad_norm": 0.871362566947937, + "learning_rate": 0.00019972015495753876, + "loss": 2.7959, + "step": 574 + }, + { + "epoch": 0.04640464853522718, + "grad_norm": 0.9664937853813171, + "learning_rate": 0.00019971897348516486, + "loss": 2.7847, + "step": 575 + }, + { + "epoch": 0.046485352271810186, + "grad_norm": 1.0670619010925293, + "learning_rate": 0.0001997177895275335, + "loss": 2.8864, + "step": 576 + }, + { + "epoch": 0.04656605600839319, + "grad_norm": 0.9281025528907776, + "learning_rate": 0.00019971660308467414, + "loss": 2.8568, + "step": 577 + }, + { + "epoch": 0.046646759744976195, + "grad_norm": 0.8964822888374329, + "learning_rate": 0.00019971541415661639, + "loss": 2.7246, + "step": 578 + }, + { + "epoch": 0.0467274634815592, + "grad_norm": 0.8921917676925659, + "learning_rate": 0.00019971422274338985, + "loss": 2.8513, + "step": 579 + }, + { + "epoch": 0.0468081672181422, + "grad_norm": 0.9550159573554993, + "learning_rate": 0.0001997130288450242, + "loss": 2.7615, + "step": 580 + }, + { + "epoch": 0.0468888709547252, + "grad_norm": 0.9330170154571533, + "learning_rate": 0.00019971183246154925, + "loss": 2.9017, + "step": 581 + }, + { + "epoch": 0.046969574691308205, + "grad_norm": 0.9125271439552307, + "learning_rate": 0.00019971063359299477, + "loss": 2.8263, + "step": 582 + }, + { + "epoch": 0.04705027842789121, + "grad_norm": 1.0005927085876465, + "learning_rate": 0.00019970943223939066, + "loss": 2.8371, + "step": 583 + }, + { + "epoch": 0.047130982164474214, + "grad_norm": 1.0333613157272339, + "learning_rate": 0.00019970822840076685, + "loss": 2.8275, + "step": 584 + }, + { + "epoch": 0.04721168590105722, + "grad_norm": 0.8684708476066589, + "learning_rate": 0.00019970702207715334, + "loss": 2.8343, + "step": 585 + }, + { + "epoch": 0.04729238963764022, + "grad_norm": 1.1112761497497559, + "learning_rate": 0.00019970581326858025, + "loss": 2.9012, + "step": 586 + }, + { + "epoch": 0.04737309337422323, + "grad_norm": 1.0187962055206299, + "learning_rate": 0.00019970460197507763, + "loss": 2.8423, + "step": 587 + }, + { + "epoch": 0.04745379711080623, + "grad_norm": 0.9802024960517883, + "learning_rate": 0.00019970338819667567, + "loss": 2.867, + "step": 588 + }, + { + "epoch": 0.047534500847389236, + "grad_norm": 0.9825551509857178, + "learning_rate": 0.00019970217193340467, + "loss": 2.8359, + "step": 589 + }, + { + "epoch": 0.04761520458397224, + "grad_norm": 1.1399210691452026, + "learning_rate": 0.00019970095318529494, + "loss": 2.8356, + "step": 590 + }, + { + "epoch": 0.047695908320555244, + "grad_norm": 1.0373995304107666, + "learning_rate": 0.00019969973195237684, + "loss": 2.8005, + "step": 591 + }, + { + "epoch": 0.04777661205713825, + "grad_norm": 1.133596420288086, + "learning_rate": 0.00019969850823468077, + "loss": 2.8778, + "step": 592 + }, + { + "epoch": 0.047857315793721246, + "grad_norm": 1.0187327861785889, + "learning_rate": 0.00019969728203223728, + "loss": 2.8291, + "step": 593 + }, + { + "epoch": 0.04793801953030425, + "grad_norm": 1.0588128566741943, + "learning_rate": 0.00019969605334507688, + "loss": 2.9396, + "step": 594 + }, + { + "epoch": 0.048018723266887255, + "grad_norm": 0.8783230781555176, + "learning_rate": 0.00019969482217323026, + "loss": 2.8076, + "step": 595 + }, + { + "epoch": 0.04809942700347026, + "grad_norm": 1.0500195026397705, + "learning_rate": 0.00019969358851672805, + "loss": 2.9099, + "step": 596 + }, + { + "epoch": 0.04818013074005326, + "grad_norm": 0.9523593187332153, + "learning_rate": 0.000199692352375601, + "loss": 2.7448, + "step": 597 + }, + { + "epoch": 0.04826083447663627, + "grad_norm": 1.0008500814437866, + "learning_rate": 0.00019969111374987995, + "loss": 2.8212, + "step": 598 + }, + { + "epoch": 0.04834153821321927, + "grad_norm": 0.8992626070976257, + "learning_rate": 0.00019968987263959575, + "loss": 2.8698, + "step": 599 + }, + { + "epoch": 0.048422241949802276, + "grad_norm": 0.9914852380752563, + "learning_rate": 0.00019968862904477935, + "loss": 2.8221, + "step": 600 + }, + { + "epoch": 0.04850294568638528, + "grad_norm": 0.9633241295814514, + "learning_rate": 0.00019968738296546168, + "loss": 2.8835, + "step": 601 + }, + { + "epoch": 0.048583649422968285, + "grad_norm": 1.055831789970398, + "learning_rate": 0.00019968613440167387, + "loss": 2.8781, + "step": 602 + }, + { + "epoch": 0.04866435315955129, + "grad_norm": 0.913856029510498, + "learning_rate": 0.000199684883353447, + "loss": 2.7863, + "step": 603 + }, + { + "epoch": 0.048745056896134294, + "grad_norm": 0.8429243564605713, + "learning_rate": 0.00019968362982081226, + "loss": 2.7753, + "step": 604 + }, + { + "epoch": 0.0488257606327173, + "grad_norm": 0.9324761629104614, + "learning_rate": 0.0001996823738038009, + "loss": 2.8058, + "step": 605 + }, + { + "epoch": 0.048906464369300295, + "grad_norm": 1.0004981756210327, + "learning_rate": 0.0001996811153024442, + "loss": 2.8537, + "step": 606 + }, + { + "epoch": 0.0489871681058833, + "grad_norm": 0.9438043236732483, + "learning_rate": 0.00019967985431677354, + "loss": 2.8828, + "step": 607 + }, + { + "epoch": 0.049067871842466304, + "grad_norm": 0.9359340071678162, + "learning_rate": 0.00019967859084682034, + "loss": 2.8149, + "step": 608 + }, + { + "epoch": 0.04914857557904931, + "grad_norm": 1.0400227308273315, + "learning_rate": 0.00019967732489261609, + "loss": 2.8489, + "step": 609 + }, + { + "epoch": 0.04922927931563231, + "grad_norm": 0.8978031277656555, + "learning_rate": 0.00019967605645419237, + "loss": 2.8599, + "step": 610 + }, + { + "epoch": 0.04930998305221532, + "grad_norm": 0.9982689619064331, + "learning_rate": 0.00019967478553158073, + "loss": 2.9024, + "step": 611 + }, + { + "epoch": 0.04939068678879832, + "grad_norm": 1.0695222616195679, + "learning_rate": 0.00019967351212481292, + "loss": 2.8483, + "step": 612 + }, + { + "epoch": 0.049471390525381326, + "grad_norm": 1.0615525245666504, + "learning_rate": 0.0001996722362339206, + "loss": 2.806, + "step": 613 + }, + { + "epoch": 0.04955209426196433, + "grad_norm": 0.9624890089035034, + "learning_rate": 0.0001996709578589356, + "loss": 2.8641, + "step": 614 + }, + { + "epoch": 0.049632797998547334, + "grad_norm": 0.9156595468521118, + "learning_rate": 0.00019966967699988985, + "loss": 2.7991, + "step": 615 + }, + { + "epoch": 0.04971350173513034, + "grad_norm": 0.8687645196914673, + "learning_rate": 0.00019966839365681517, + "loss": 2.774, + "step": 616 + }, + { + "epoch": 0.04979420547171334, + "grad_norm": 0.9175437688827515, + "learning_rate": 0.00019966710782974359, + "loss": 2.8064, + "step": 617 + }, + { + "epoch": 0.04987490920829635, + "grad_norm": 0.8897463083267212, + "learning_rate": 0.00019966581951870715, + "loss": 2.8487, + "step": 618 + }, + { + "epoch": 0.049955612944879345, + "grad_norm": 0.8908397555351257, + "learning_rate": 0.00019966452872373795, + "loss": 2.8523, + "step": 619 + }, + { + "epoch": 0.05003631668146235, + "grad_norm": 0.95484858751297, + "learning_rate": 0.00019966323544486818, + "loss": 2.8471, + "step": 620 + }, + { + "epoch": 0.050117020418045354, + "grad_norm": 0.9995831251144409, + "learning_rate": 0.00019966193968213008, + "loss": 2.8341, + "step": 621 + }, + { + "epoch": 0.05019772415462836, + "grad_norm": 0.8731706142425537, + "learning_rate": 0.00019966064143555587, + "loss": 2.8491, + "step": 622 + }, + { + "epoch": 0.05027842789121136, + "grad_norm": 0.9213298559188843, + "learning_rate": 0.000199659340705178, + "loss": 2.8256, + "step": 623 + }, + { + "epoch": 0.050359131627794367, + "grad_norm": 0.9565179347991943, + "learning_rate": 0.00019965803749102885, + "loss": 2.8177, + "step": 624 + }, + { + "epoch": 0.05043983536437737, + "grad_norm": 1.0076881647109985, + "learning_rate": 0.00019965673179314086, + "loss": 2.7812, + "step": 625 + }, + { + "epoch": 0.050520539100960375, + "grad_norm": 0.989647388458252, + "learning_rate": 0.00019965542361154666, + "loss": 2.9226, + "step": 626 + }, + { + "epoch": 0.05060124283754338, + "grad_norm": 0.9671580791473389, + "learning_rate": 0.00019965411294627878, + "loss": 2.8204, + "step": 627 + }, + { + "epoch": 0.050681946574126384, + "grad_norm": 0.9275986552238464, + "learning_rate": 0.00019965279979736989, + "loss": 2.8481, + "step": 628 + }, + { + "epoch": 0.05076265031070939, + "grad_norm": 0.9949543476104736, + "learning_rate": 0.00019965148416485273, + "loss": 2.8606, + "step": 629 + }, + { + "epoch": 0.05084335404729239, + "grad_norm": 0.9506482481956482, + "learning_rate": 0.0001996501660487601, + "loss": 2.8088, + "step": 630 + }, + { + "epoch": 0.0509240577838754, + "grad_norm": 0.9147887229919434, + "learning_rate": 0.00019964884544912488, + "loss": 2.7997, + "step": 631 + }, + { + "epoch": 0.051004761520458394, + "grad_norm": 0.8964840769767761, + "learning_rate": 0.00019964752236597993, + "loss": 2.8342, + "step": 632 + }, + { + "epoch": 0.0510854652570414, + "grad_norm": 0.931811511516571, + "learning_rate": 0.00019964619679935824, + "loss": 2.8229, + "step": 633 + }, + { + "epoch": 0.0511661689936244, + "grad_norm": 0.8634423017501831, + "learning_rate": 0.00019964486874929282, + "loss": 2.803, + "step": 634 + }, + { + "epoch": 0.05124687273020741, + "grad_norm": 0.892223596572876, + "learning_rate": 0.00019964353821581683, + "loss": 2.802, + "step": 635 + }, + { + "epoch": 0.05132757646679041, + "grad_norm": 0.8373630046844482, + "learning_rate": 0.00019964220519896338, + "loss": 2.7693, + "step": 636 + }, + { + "epoch": 0.051408280203373416, + "grad_norm": 0.8729730248451233, + "learning_rate": 0.0001996408696987657, + "loss": 2.8467, + "step": 637 + }, + { + "epoch": 0.05148898393995642, + "grad_norm": 0.8994413614273071, + "learning_rate": 0.0001996395317152571, + "loss": 2.8837, + "step": 638 + }, + { + "epoch": 0.051569687676539425, + "grad_norm": 0.9146113395690918, + "learning_rate": 0.0001996381912484709, + "loss": 2.8189, + "step": 639 + }, + { + "epoch": 0.05165039141312243, + "grad_norm": 0.9330562353134155, + "learning_rate": 0.00019963684829844052, + "loss": 2.7873, + "step": 640 + }, + { + "epoch": 0.05173109514970543, + "grad_norm": 0.9076224565505981, + "learning_rate": 0.00019963550286519944, + "loss": 2.802, + "step": 641 + }, + { + "epoch": 0.05181179888628844, + "grad_norm": 0.9580704569816589, + "learning_rate": 0.00019963415494878115, + "loss": 2.8173, + "step": 642 + }, + { + "epoch": 0.05189250262287144, + "grad_norm": 0.9291248917579651, + "learning_rate": 0.00019963280454921928, + "loss": 2.7866, + "step": 643 + }, + { + "epoch": 0.05197320635945444, + "grad_norm": 0.9815296530723572, + "learning_rate": 0.0001996314516665475, + "loss": 2.7903, + "step": 644 + }, + { + "epoch": 0.052053910096037444, + "grad_norm": 0.9461820721626282, + "learning_rate": 0.00019963009630079949, + "loss": 2.7854, + "step": 645 + }, + { + "epoch": 0.05213461383262045, + "grad_norm": 0.9660771489143372, + "learning_rate": 0.00019962873845200908, + "loss": 2.9187, + "step": 646 + }, + { + "epoch": 0.05221531756920345, + "grad_norm": 0.8987802863121033, + "learning_rate": 0.00019962737812021002, + "loss": 2.8854, + "step": 647 + }, + { + "epoch": 0.05229602130578646, + "grad_norm": 0.9810429215431213, + "learning_rate": 0.0001996260153054363, + "loss": 2.8974, + "step": 648 + }, + { + "epoch": 0.05237672504236946, + "grad_norm": 0.8185738325119019, + "learning_rate": 0.00019962465000772183, + "loss": 2.797, + "step": 649 + }, + { + "epoch": 0.052457428778952465, + "grad_norm": 0.8976237773895264, + "learning_rate": 0.0001996232822271007, + "loss": 2.8557, + "step": 650 + }, + { + "epoch": 0.05253813251553547, + "grad_norm": 0.8591496348381042, + "learning_rate": 0.0001996219119636069, + "loss": 2.8521, + "step": 651 + }, + { + "epoch": 0.052618836252118474, + "grad_norm": 0.8907031416893005, + "learning_rate": 0.00019962053921727472, + "loss": 2.8117, + "step": 652 + }, + { + "epoch": 0.05269953998870148, + "grad_norm": 0.9034241437911987, + "learning_rate": 0.00019961916398813823, + "loss": 2.741, + "step": 653 + }, + { + "epoch": 0.05278024372528448, + "grad_norm": 0.8284802436828613, + "learning_rate": 0.00019961778627623176, + "loss": 2.776, + "step": 654 + }, + { + "epoch": 0.05286094746186749, + "grad_norm": 0.8459529876708984, + "learning_rate": 0.00019961640608158967, + "loss": 2.8027, + "step": 655 + }, + { + "epoch": 0.05294165119845049, + "grad_norm": 0.9720042943954468, + "learning_rate": 0.00019961502340424636, + "loss": 2.9086, + "step": 656 + }, + { + "epoch": 0.05302235493503349, + "grad_norm": 0.8581427335739136, + "learning_rate": 0.00019961363824423626, + "loss": 2.8347, + "step": 657 + }, + { + "epoch": 0.05310305867161649, + "grad_norm": 0.9545331597328186, + "learning_rate": 0.00019961225060159386, + "loss": 2.828, + "step": 658 + }, + { + "epoch": 0.0531837624081995, + "grad_norm": 1.0303562879562378, + "learning_rate": 0.00019961086047635385, + "loss": 2.8461, + "step": 659 + }, + { + "epoch": 0.0532644661447825, + "grad_norm": 0.86605304479599, + "learning_rate": 0.0001996094678685508, + "loss": 2.8355, + "step": 660 + }, + { + "epoch": 0.053345169881365506, + "grad_norm": 0.8146334886550903, + "learning_rate": 0.0001996080727782194, + "loss": 2.8638, + "step": 661 + }, + { + "epoch": 0.05342587361794851, + "grad_norm": 0.9434560537338257, + "learning_rate": 0.00019960667520539446, + "loss": 2.8196, + "step": 662 + }, + { + "epoch": 0.053506577354531515, + "grad_norm": 0.9362602829933167, + "learning_rate": 0.00019960527515011084, + "loss": 2.8452, + "step": 663 + }, + { + "epoch": 0.05358728109111452, + "grad_norm": 0.828713059425354, + "learning_rate": 0.00019960387261240334, + "loss": 2.8079, + "step": 664 + }, + { + "epoch": 0.053667984827697524, + "grad_norm": 0.8610214591026306, + "learning_rate": 0.00019960246759230697, + "loss": 2.8197, + "step": 665 + }, + { + "epoch": 0.05374868856428053, + "grad_norm": 0.8913124799728394, + "learning_rate": 0.00019960106008985674, + "loss": 2.8392, + "step": 666 + }, + { + "epoch": 0.05382939230086353, + "grad_norm": 0.8109759092330933, + "learning_rate": 0.00019959965010508778, + "loss": 2.7961, + "step": 667 + }, + { + "epoch": 0.05391009603744654, + "grad_norm": 0.8714832663536072, + "learning_rate": 0.00019959823763803514, + "loss": 2.7984, + "step": 668 + }, + { + "epoch": 0.05399079977402954, + "grad_norm": 0.9008125066757202, + "learning_rate": 0.00019959682268873408, + "loss": 2.8319, + "step": 669 + }, + { + "epoch": 0.05407150351061254, + "grad_norm": 0.8718584775924683, + "learning_rate": 0.00019959540525721985, + "loss": 2.7973, + "step": 670 + }, + { + "epoch": 0.05415220724719554, + "grad_norm": 0.8666327595710754, + "learning_rate": 0.00019959398534352774, + "loss": 2.8296, + "step": 671 + }, + { + "epoch": 0.05423291098377855, + "grad_norm": 0.9755229949951172, + "learning_rate": 0.00019959256294769322, + "loss": 2.8358, + "step": 672 + }, + { + "epoch": 0.05431361472036155, + "grad_norm": 1.193708062171936, + "learning_rate": 0.0001995911380697517, + "loss": 2.7672, + "step": 673 + }, + { + "epoch": 0.054394318456944556, + "grad_norm": 0.9104088544845581, + "learning_rate": 0.00019958971070973866, + "loss": 2.8389, + "step": 674 + }, + { + "epoch": 0.05447502219352756, + "grad_norm": 0.9266251921653748, + "learning_rate": 0.0001995882808676897, + "loss": 2.8226, + "step": 675 + }, + { + "epoch": 0.054555725930110564, + "grad_norm": 1.1161282062530518, + "learning_rate": 0.00019958684854364046, + "loss": 2.8236, + "step": 676 + }, + { + "epoch": 0.05463642966669357, + "grad_norm": 0.9200586080551147, + "learning_rate": 0.00019958541373762666, + "loss": 2.8074, + "step": 677 + }, + { + "epoch": 0.05471713340327657, + "grad_norm": 1.0372560024261475, + "learning_rate": 0.000199583976449684, + "loss": 2.815, + "step": 678 + }, + { + "epoch": 0.05479783713985958, + "grad_norm": 0.8822301030158997, + "learning_rate": 0.0001995825366798483, + "loss": 2.7985, + "step": 679 + }, + { + "epoch": 0.05487854087644258, + "grad_norm": 0.9226076006889343, + "learning_rate": 0.00019958109442815553, + "loss": 2.7649, + "step": 680 + }, + { + "epoch": 0.054959244613025586, + "grad_norm": 0.8769479990005493, + "learning_rate": 0.00019957964969464156, + "loss": 2.8483, + "step": 681 + }, + { + "epoch": 0.05503994834960859, + "grad_norm": 0.8601027727127075, + "learning_rate": 0.0001995782024793424, + "loss": 2.8072, + "step": 682 + }, + { + "epoch": 0.05512065208619159, + "grad_norm": 0.9684911370277405, + "learning_rate": 0.00019957675278229416, + "loss": 2.8693, + "step": 683 + }, + { + "epoch": 0.05520135582277459, + "grad_norm": 0.9119890928268433, + "learning_rate": 0.00019957530060353294, + "loss": 2.853, + "step": 684 + }, + { + "epoch": 0.055282059559357596, + "grad_norm": 0.9588247537612915, + "learning_rate": 0.0001995738459430949, + "loss": 2.8435, + "step": 685 + }, + { + "epoch": 0.0553627632959406, + "grad_norm": 0.8317441940307617, + "learning_rate": 0.00019957238880101636, + "loss": 2.8208, + "step": 686 + }, + { + "epoch": 0.055443467032523605, + "grad_norm": 0.92695152759552, + "learning_rate": 0.00019957092917733361, + "loss": 2.8378, + "step": 687 + }, + { + "epoch": 0.05552417076910661, + "grad_norm": 0.8908315300941467, + "learning_rate": 0.00019956946707208305, + "loss": 2.8041, + "step": 688 + }, + { + "epoch": 0.055604874505689614, + "grad_norm": 0.9787055253982544, + "learning_rate": 0.00019956800248530107, + "loss": 2.8604, + "step": 689 + }, + { + "epoch": 0.05568557824227262, + "grad_norm": 0.8707631826400757, + "learning_rate": 0.00019956653541702415, + "loss": 2.7763, + "step": 690 + }, + { + "epoch": 0.05576628197885562, + "grad_norm": 1.0059715509414673, + "learning_rate": 0.00019956506586728896, + "loss": 2.8267, + "step": 691 + }, + { + "epoch": 0.05584698571543863, + "grad_norm": 0.88490891456604, + "learning_rate": 0.00019956359383613203, + "loss": 2.8278, + "step": 692 + }, + { + "epoch": 0.05592768945202163, + "grad_norm": 0.9527923464775085, + "learning_rate": 0.00019956211932359007, + "loss": 2.8251, + "step": 693 + }, + { + "epoch": 0.056008393188604635, + "grad_norm": 0.9612617492675781, + "learning_rate": 0.00019956064232969987, + "loss": 2.8148, + "step": 694 + }, + { + "epoch": 0.05608909692518763, + "grad_norm": 0.9261285066604614, + "learning_rate": 0.0001995591628544982, + "loss": 2.8176, + "step": 695 + }, + { + "epoch": 0.05616980066177064, + "grad_norm": 0.9766250252723694, + "learning_rate": 0.0001995576808980219, + "loss": 2.7968, + "step": 696 + }, + { + "epoch": 0.05625050439835364, + "grad_norm": 0.9287495017051697, + "learning_rate": 0.00019955619646030802, + "loss": 2.7679, + "step": 697 + }, + { + "epoch": 0.056331208134936646, + "grad_norm": 0.9182924032211304, + "learning_rate": 0.00019955470954139345, + "loss": 2.8295, + "step": 698 + }, + { + "epoch": 0.05641191187151965, + "grad_norm": 0.8650663495063782, + "learning_rate": 0.00019955322014131524, + "loss": 2.7928, + "step": 699 + }, + { + "epoch": 0.056492615608102655, + "grad_norm": 0.9543934464454651, + "learning_rate": 0.00019955172826011062, + "loss": 2.8049, + "step": 700 + }, + { + "epoch": 0.05657331934468566, + "grad_norm": 0.9060636162757874, + "learning_rate": 0.00019955023389781664, + "loss": 2.871, + "step": 701 + }, + { + "epoch": 0.05665402308126866, + "grad_norm": 0.9824137091636658, + "learning_rate": 0.00019954873705447065, + "loss": 2.816, + "step": 702 + }, + { + "epoch": 0.05673472681785167, + "grad_norm": 0.8831053972244263, + "learning_rate": 0.00019954723773010988, + "loss": 2.8207, + "step": 703 + }, + { + "epoch": 0.05681543055443467, + "grad_norm": 0.9603390693664551, + "learning_rate": 0.00019954573592477173, + "loss": 2.831, + "step": 704 + }, + { + "epoch": 0.056896134291017676, + "grad_norm": 0.911556601524353, + "learning_rate": 0.00019954423163849364, + "loss": 2.7679, + "step": 705 + }, + { + "epoch": 0.05697683802760068, + "grad_norm": 0.8558745384216309, + "learning_rate": 0.00019954272487131305, + "loss": 2.7934, + "step": 706 + }, + { + "epoch": 0.057057541764183685, + "grad_norm": 1.0175282955169678, + "learning_rate": 0.00019954121562326758, + "loss": 2.905, + "step": 707 + }, + { + "epoch": 0.05713824550076668, + "grad_norm": 0.9480875730514526, + "learning_rate": 0.00019953970389439483, + "loss": 2.85, + "step": 708 + }, + { + "epoch": 0.05721894923734969, + "grad_norm": 0.9271003603935242, + "learning_rate": 0.0001995381896847324, + "loss": 2.8237, + "step": 709 + }, + { + "epoch": 0.05729965297393269, + "grad_norm": 0.8439653515815735, + "learning_rate": 0.00019953667299431815, + "loss": 2.821, + "step": 710 + }, + { + "epoch": 0.057380356710515695, + "grad_norm": 0.9750552177429199, + "learning_rate": 0.0001995351538231898, + "loss": 2.8613, + "step": 711 + }, + { + "epoch": 0.0574610604470987, + "grad_norm": 0.9409266710281372, + "learning_rate": 0.0001995336321713852, + "loss": 2.7876, + "step": 712 + }, + { + "epoch": 0.057541764183681704, + "grad_norm": 0.811138927936554, + "learning_rate": 0.00019953210803894233, + "loss": 2.7957, + "step": 713 + }, + { + "epoch": 0.05762246792026471, + "grad_norm": 0.9504825472831726, + "learning_rate": 0.00019953058142589916, + "loss": 2.8536, + "step": 714 + }, + { + "epoch": 0.05770317165684771, + "grad_norm": 0.8183554410934448, + "learning_rate": 0.00019952905233229368, + "loss": 2.7697, + "step": 715 + }, + { + "epoch": 0.05778387539343072, + "grad_norm": 1.1146113872528076, + "learning_rate": 0.0001995275207581641, + "loss": 2.8629, + "step": 716 + }, + { + "epoch": 0.05786457913001372, + "grad_norm": 0.8797986507415771, + "learning_rate": 0.00019952598670354852, + "loss": 2.7962, + "step": 717 + }, + { + "epoch": 0.057945282866596726, + "grad_norm": 0.8771101832389832, + "learning_rate": 0.00019952445016848517, + "loss": 2.8323, + "step": 718 + }, + { + "epoch": 0.05802598660317973, + "grad_norm": 0.9003355503082275, + "learning_rate": 0.00019952291115301235, + "loss": 2.777, + "step": 719 + }, + { + "epoch": 0.058106690339762734, + "grad_norm": 0.846125602722168, + "learning_rate": 0.00019952136965716846, + "loss": 2.7875, + "step": 720 + }, + { + "epoch": 0.05818739407634573, + "grad_norm": 0.908833920955658, + "learning_rate": 0.00019951982568099187, + "loss": 2.7975, + "step": 721 + }, + { + "epoch": 0.058268097812928736, + "grad_norm": 0.8616230487823486, + "learning_rate": 0.00019951827922452106, + "loss": 2.7486, + "step": 722 + }, + { + "epoch": 0.05834880154951174, + "grad_norm": 0.8791850805282593, + "learning_rate": 0.00019951673028779462, + "loss": 2.8301, + "step": 723 + }, + { + "epoch": 0.058429505286094745, + "grad_norm": 0.9437321424484253, + "learning_rate": 0.00019951517887085112, + "loss": 2.7956, + "step": 724 + }, + { + "epoch": 0.05851020902267775, + "grad_norm": 0.9263394474983215, + "learning_rate": 0.00019951362497372922, + "loss": 2.867, + "step": 725 + }, + { + "epoch": 0.05859091275926075, + "grad_norm": 0.9442462921142578, + "learning_rate": 0.00019951206859646764, + "loss": 2.8447, + "step": 726 + }, + { + "epoch": 0.05867161649584376, + "grad_norm": 0.9286711812019348, + "learning_rate": 0.0001995105097391052, + "loss": 2.7588, + "step": 727 + }, + { + "epoch": 0.05875232023242676, + "grad_norm": 0.9338774085044861, + "learning_rate": 0.00019950894840168072, + "loss": 2.7394, + "step": 728 + }, + { + "epoch": 0.058833023969009766, + "grad_norm": 0.8880760073661804, + "learning_rate": 0.00019950738458423314, + "loss": 2.7949, + "step": 729 + }, + { + "epoch": 0.05891372770559277, + "grad_norm": 1.0091183185577393, + "learning_rate": 0.00019950581828680143, + "loss": 2.8633, + "step": 730 + }, + { + "epoch": 0.058994431442175775, + "grad_norm": 0.8657729625701904, + "learning_rate": 0.0001995042495094246, + "loss": 2.8649, + "step": 731 + }, + { + "epoch": 0.05907513517875878, + "grad_norm": 1.0084047317504883, + "learning_rate": 0.00019950267825214176, + "loss": 2.8422, + "step": 732 + }, + { + "epoch": 0.059155838915341784, + "grad_norm": 0.9096506237983704, + "learning_rate": 0.00019950110451499208, + "loss": 2.7908, + "step": 733 + }, + { + "epoch": 0.05923654265192478, + "grad_norm": 1.1338937282562256, + "learning_rate": 0.0001994995282980148, + "loss": 2.8093, + "step": 734 + }, + { + "epoch": 0.059317246388507786, + "grad_norm": 0.8813811540603638, + "learning_rate": 0.00019949794960124915, + "loss": 2.8866, + "step": 735 + }, + { + "epoch": 0.05939795012509079, + "grad_norm": 0.8457592129707336, + "learning_rate": 0.00019949636842473453, + "loss": 2.7744, + "step": 736 + }, + { + "epoch": 0.059478653861673794, + "grad_norm": 0.8731856346130371, + "learning_rate": 0.0001994947847685103, + "loss": 2.7822, + "step": 737 + }, + { + "epoch": 0.0595593575982568, + "grad_norm": 0.8915185332298279, + "learning_rate": 0.00019949319863261597, + "loss": 2.773, + "step": 738 + }, + { + "epoch": 0.0596400613348398, + "grad_norm": 0.9478987455368042, + "learning_rate": 0.00019949161001709106, + "loss": 2.8462, + "step": 739 + }, + { + "epoch": 0.05972076507142281, + "grad_norm": 0.8903716206550598, + "learning_rate": 0.00019949001892197515, + "loss": 2.7741, + "step": 740 + }, + { + "epoch": 0.05980146880800581, + "grad_norm": 0.8870117664337158, + "learning_rate": 0.00019948842534730786, + "loss": 2.8255, + "step": 741 + }, + { + "epoch": 0.059882172544588816, + "grad_norm": 1.0766080617904663, + "learning_rate": 0.00019948682929312898, + "loss": 2.8865, + "step": 742 + }, + { + "epoch": 0.05996287628117182, + "grad_norm": 0.846447229385376, + "learning_rate": 0.00019948523075947824, + "loss": 2.8441, + "step": 743 + }, + { + "epoch": 0.060043580017754825, + "grad_norm": 0.9847991466522217, + "learning_rate": 0.00019948362974639552, + "loss": 2.8099, + "step": 744 + }, + { + "epoch": 0.06012428375433783, + "grad_norm": 0.9170514941215515, + "learning_rate": 0.00019948202625392068, + "loss": 2.8797, + "step": 745 + }, + { + "epoch": 0.060204987490920826, + "grad_norm": 0.8564898371696472, + "learning_rate": 0.0001994804202820937, + "loss": 2.7993, + "step": 746 + }, + { + "epoch": 0.06028569122750383, + "grad_norm": 0.8527392148971558, + "learning_rate": 0.00019947881183095457, + "loss": 2.7816, + "step": 747 + }, + { + "epoch": 0.060366394964086835, + "grad_norm": 0.9170876145362854, + "learning_rate": 0.00019947720090054342, + "loss": 2.8031, + "step": 748 + }, + { + "epoch": 0.06044709870066984, + "grad_norm": 0.8891414403915405, + "learning_rate": 0.0001994755874909004, + "loss": 2.8072, + "step": 749 + }, + { + "epoch": 0.060527802437252844, + "grad_norm": 0.8853670358657837, + "learning_rate": 0.0001994739716020657, + "loss": 2.8857, + "step": 750 + }, + { + "epoch": 0.06060850617383585, + "grad_norm": 0.9011211395263672, + "learning_rate": 0.0001994723532340796, + "loss": 2.8519, + "step": 751 + }, + { + "epoch": 0.06068920991041885, + "grad_norm": 0.8843330144882202, + "learning_rate": 0.00019947073238698243, + "loss": 2.7882, + "step": 752 + }, + { + "epoch": 0.06076991364700186, + "grad_norm": 0.8712944984436035, + "learning_rate": 0.00019946910906081463, + "loss": 2.791, + "step": 753 + }, + { + "epoch": 0.06085061738358486, + "grad_norm": 0.8296090364456177, + "learning_rate": 0.00019946748325561656, + "loss": 2.8073, + "step": 754 + }, + { + "epoch": 0.060931321120167865, + "grad_norm": 0.9239117503166199, + "learning_rate": 0.00019946585497142885, + "loss": 2.8209, + "step": 755 + }, + { + "epoch": 0.06101202485675087, + "grad_norm": 0.8885170221328735, + "learning_rate": 0.000199464224208292, + "loss": 2.8391, + "step": 756 + }, + { + "epoch": 0.061092728593333874, + "grad_norm": 0.933720588684082, + "learning_rate": 0.0001994625909662467, + "loss": 2.7635, + "step": 757 + }, + { + "epoch": 0.06117343232991688, + "grad_norm": 0.9751253724098206, + "learning_rate": 0.00019946095524533362, + "loss": 2.7933, + "step": 758 + }, + { + "epoch": 0.061254136066499876, + "grad_norm": 0.9469670057296753, + "learning_rate": 0.00019945931704559353, + "loss": 2.7652, + "step": 759 + }, + { + "epoch": 0.06133483980308288, + "grad_norm": 0.8559684157371521, + "learning_rate": 0.00019945767636706728, + "loss": 2.8258, + "step": 760 + }, + { + "epoch": 0.061415543539665884, + "grad_norm": 1.021478295326233, + "learning_rate": 0.00019945603320979574, + "loss": 2.8047, + "step": 761 + }, + { + "epoch": 0.06149624727624889, + "grad_norm": 0.8421681523323059, + "learning_rate": 0.00019945438757381986, + "loss": 2.8233, + "step": 762 + }, + { + "epoch": 0.06157695101283189, + "grad_norm": 0.900654137134552, + "learning_rate": 0.0001994527394591807, + "loss": 2.7591, + "step": 763 + }, + { + "epoch": 0.0616576547494149, + "grad_norm": 0.878300666809082, + "learning_rate": 0.0001994510888659193, + "loss": 2.715, + "step": 764 + }, + { + "epoch": 0.0617383584859979, + "grad_norm": 0.9170855283737183, + "learning_rate": 0.00019944943579407678, + "loss": 2.8604, + "step": 765 + }, + { + "epoch": 0.061819062222580906, + "grad_norm": 0.8532859683036804, + "learning_rate": 0.00019944778024369434, + "loss": 2.8124, + "step": 766 + }, + { + "epoch": 0.06189976595916391, + "grad_norm": 0.8549049496650696, + "learning_rate": 0.00019944612221481332, + "loss": 2.8066, + "step": 767 + }, + { + "epoch": 0.061980469695746915, + "grad_norm": 0.9602857828140259, + "learning_rate": 0.00019944446170747492, + "loss": 2.8424, + "step": 768 + }, + { + "epoch": 0.06206117343232992, + "grad_norm": 0.910953164100647, + "learning_rate": 0.0001994427987217206, + "loss": 2.8093, + "step": 769 + }, + { + "epoch": 0.06214187716891292, + "grad_norm": 0.8536386489868164, + "learning_rate": 0.0001994411332575918, + "loss": 2.802, + "step": 770 + }, + { + "epoch": 0.06222258090549593, + "grad_norm": 0.9166232347488403, + "learning_rate": 0.00019943946531513, + "loss": 2.783, + "step": 771 + }, + { + "epoch": 0.062303284642078925, + "grad_norm": 0.9954056739807129, + "learning_rate": 0.00019943779489437678, + "loss": 2.8198, + "step": 772 + }, + { + "epoch": 0.06238398837866193, + "grad_norm": 0.8527171015739441, + "learning_rate": 0.0001994361219953738, + "loss": 2.8159, + "step": 773 + }, + { + "epoch": 0.062464692115244934, + "grad_norm": 0.8951592445373535, + "learning_rate": 0.00019943444661816274, + "loss": 2.7969, + "step": 774 + }, + { + "epoch": 0.06254539585182795, + "grad_norm": 0.9348207116127014, + "learning_rate": 0.00019943276876278532, + "loss": 2.8403, + "step": 775 + }, + { + "epoch": 0.06262609958841095, + "grad_norm": 0.866318941116333, + "learning_rate": 0.00019943108842928342, + "loss": 2.7886, + "step": 776 + }, + { + "epoch": 0.06270680332499395, + "grad_norm": 0.8571285605430603, + "learning_rate": 0.00019942940561769884, + "loss": 2.771, + "step": 777 + }, + { + "epoch": 0.06278750706157694, + "grad_norm": 0.8384295105934143, + "learning_rate": 0.00019942772032807357, + "loss": 2.7885, + "step": 778 + }, + { + "epoch": 0.06286821079815995, + "grad_norm": 0.9934808611869812, + "learning_rate": 0.00019942603256044961, + "loss": 2.8399, + "step": 779 + }, + { + "epoch": 0.06294891453474295, + "grad_norm": 0.8275915384292603, + "learning_rate": 0.00019942434231486902, + "loss": 2.8983, + "step": 780 + }, + { + "epoch": 0.06302961827132596, + "grad_norm": 0.9073596000671387, + "learning_rate": 0.0001994226495913739, + "loss": 2.7886, + "step": 781 + }, + { + "epoch": 0.06311032200790896, + "grad_norm": 0.9091461300849915, + "learning_rate": 0.00019942095439000646, + "loss": 2.814, + "step": 782 + }, + { + "epoch": 0.06319102574449197, + "grad_norm": 0.9356934428215027, + "learning_rate": 0.000199419256710809, + "loss": 2.8238, + "step": 783 + }, + { + "epoch": 0.06327172948107497, + "grad_norm": 0.883514940738678, + "learning_rate": 0.00019941755655382374, + "loss": 2.7912, + "step": 784 + }, + { + "epoch": 0.06335243321765797, + "grad_norm": 0.8770506381988525, + "learning_rate": 0.00019941585391909308, + "loss": 2.7774, + "step": 785 + }, + { + "epoch": 0.06343313695424098, + "grad_norm": 0.8891726136207581, + "learning_rate": 0.00019941414880665948, + "loss": 2.7975, + "step": 786 + }, + { + "epoch": 0.06351384069082398, + "grad_norm": 0.9280585050582886, + "learning_rate": 0.00019941244121656545, + "loss": 2.9468, + "step": 787 + }, + { + "epoch": 0.06359454442740699, + "grad_norm": 0.8545510768890381, + "learning_rate": 0.00019941073114885347, + "loss": 2.8165, + "step": 788 + }, + { + "epoch": 0.06367524816398999, + "grad_norm": 0.8631312847137451, + "learning_rate": 0.0001994090186035662, + "loss": 2.7955, + "step": 789 + }, + { + "epoch": 0.063755951900573, + "grad_norm": 0.8883851170539856, + "learning_rate": 0.00019940730358074634, + "loss": 2.7828, + "step": 790 + }, + { + "epoch": 0.063836655637156, + "grad_norm": 0.8421074748039246, + "learning_rate": 0.00019940558608043664, + "loss": 2.7999, + "step": 791 + }, + { + "epoch": 0.063917359373739, + "grad_norm": 0.918134868144989, + "learning_rate": 0.0001994038661026799, + "loss": 2.7888, + "step": 792 + }, + { + "epoch": 0.06399806311032201, + "grad_norm": 0.8513637781143188, + "learning_rate": 0.00019940214364751896, + "loss": 2.7719, + "step": 793 + }, + { + "epoch": 0.06407876684690501, + "grad_norm": 0.9181898236274719, + "learning_rate": 0.00019940041871499675, + "loss": 2.8345, + "step": 794 + }, + { + "epoch": 0.06415947058348802, + "grad_norm": 0.8129134774208069, + "learning_rate": 0.00019939869130515626, + "loss": 2.7316, + "step": 795 + }, + { + "epoch": 0.06424017432007102, + "grad_norm": 0.8782191872596741, + "learning_rate": 0.00019939696141804057, + "loss": 2.7852, + "step": 796 + }, + { + "epoch": 0.06432087805665403, + "grad_norm": 0.9064851403236389, + "learning_rate": 0.00019939522905369276, + "loss": 2.8105, + "step": 797 + }, + { + "epoch": 0.06440158179323703, + "grad_norm": 0.9888454675674438, + "learning_rate": 0.00019939349421215603, + "loss": 2.8496, + "step": 798 + }, + { + "epoch": 0.06448228552982004, + "grad_norm": 0.8717427253723145, + "learning_rate": 0.0001993917568934736, + "loss": 2.8227, + "step": 799 + }, + { + "epoch": 0.06456298926640304, + "grad_norm": 0.922980010509491, + "learning_rate": 0.0001993900170976888, + "loss": 2.8571, + "step": 800 + }, + { + "epoch": 0.06464369300298604, + "grad_norm": 0.8311850428581238, + "learning_rate": 0.00019938827482484492, + "loss": 2.7905, + "step": 801 + }, + { + "epoch": 0.06472439673956905, + "grad_norm": 0.9274900555610657, + "learning_rate": 0.0001993865300749855, + "loss": 2.8526, + "step": 802 + }, + { + "epoch": 0.06480510047615205, + "grad_norm": 0.9072165489196777, + "learning_rate": 0.00019938478284815388, + "loss": 2.8384, + "step": 803 + }, + { + "epoch": 0.06488580421273504, + "grad_norm": 0.854099452495575, + "learning_rate": 0.0001993830331443937, + "loss": 2.8459, + "step": 804 + }, + { + "epoch": 0.06496650794931805, + "grad_norm": 0.824126660823822, + "learning_rate": 0.00019938128096374854, + "loss": 2.7845, + "step": 805 + }, + { + "epoch": 0.06504721168590105, + "grad_norm": 0.8570442795753479, + "learning_rate": 0.0001993795263062621, + "loss": 2.8446, + "step": 806 + }, + { + "epoch": 0.06512791542248406, + "grad_norm": 0.8998628854751587, + "learning_rate": 0.00019937776917197805, + "loss": 2.8604, + "step": 807 + }, + { + "epoch": 0.06520861915906706, + "grad_norm": 0.9189189076423645, + "learning_rate": 0.00019937600956094023, + "loss": 2.7866, + "step": 808 + }, + { + "epoch": 0.06528932289565006, + "grad_norm": 0.9471604824066162, + "learning_rate": 0.00019937424747319248, + "loss": 2.7619, + "step": 809 + }, + { + "epoch": 0.06537002663223307, + "grad_norm": 0.8507755994796753, + "learning_rate": 0.00019937248290877874, + "loss": 2.8259, + "step": 810 + }, + { + "epoch": 0.06545073036881607, + "grad_norm": 0.8800963759422302, + "learning_rate": 0.00019937071586774292, + "loss": 2.827, + "step": 811 + }, + { + "epoch": 0.06553143410539908, + "grad_norm": 0.8851124048233032, + "learning_rate": 0.00019936894635012915, + "loss": 2.793, + "step": 812 + }, + { + "epoch": 0.06561213784198208, + "grad_norm": 0.88127601146698, + "learning_rate": 0.00019936717435598144, + "loss": 2.8885, + "step": 813 + }, + { + "epoch": 0.06569284157856509, + "grad_norm": 0.9115073084831238, + "learning_rate": 0.000199365399885344, + "loss": 2.8278, + "step": 814 + }, + { + "epoch": 0.06577354531514809, + "grad_norm": 0.8722662925720215, + "learning_rate": 0.00019936362293826107, + "loss": 2.8125, + "step": 815 + }, + { + "epoch": 0.0658542490517311, + "grad_norm": 0.8332365155220032, + "learning_rate": 0.0001993618435147769, + "loss": 2.7682, + "step": 816 + }, + { + "epoch": 0.0659349527883141, + "grad_norm": 0.9524003863334656, + "learning_rate": 0.0001993600616149359, + "loss": 2.8166, + "step": 817 + }, + { + "epoch": 0.0660156565248971, + "grad_norm": 0.8402767181396484, + "learning_rate": 0.0001993582772387824, + "loss": 2.8192, + "step": 818 + }, + { + "epoch": 0.06609636026148011, + "grad_norm": 0.8589913249015808, + "learning_rate": 0.0001993564903863609, + "loss": 2.7785, + "step": 819 + }, + { + "epoch": 0.06617706399806311, + "grad_norm": 1.034550428390503, + "learning_rate": 0.00019935470105771598, + "loss": 2.8407, + "step": 820 + }, + { + "epoch": 0.06625776773464612, + "grad_norm": 0.856490969657898, + "learning_rate": 0.0001993529092528921, + "loss": 2.794, + "step": 821 + }, + { + "epoch": 0.06633847147122912, + "grad_norm": 0.897498369216919, + "learning_rate": 0.0001993511149719341, + "loss": 2.7959, + "step": 822 + }, + { + "epoch": 0.06641917520781213, + "grad_norm": 0.8495277166366577, + "learning_rate": 0.00019934931821488658, + "loss": 2.783, + "step": 823 + }, + { + "epoch": 0.06649987894439513, + "grad_norm": 0.8362239599227905, + "learning_rate": 0.00019934751898179436, + "loss": 2.8628, + "step": 824 + }, + { + "epoch": 0.06658058268097813, + "grad_norm": 0.8702061176300049, + "learning_rate": 0.00019934571727270225, + "loss": 2.7878, + "step": 825 + }, + { + "epoch": 0.06666128641756114, + "grad_norm": 0.8341560363769531, + "learning_rate": 0.0001993439130876552, + "loss": 2.7345, + "step": 826 + }, + { + "epoch": 0.06674199015414414, + "grad_norm": 0.880181074142456, + "learning_rate": 0.00019934210642669813, + "loss": 2.7789, + "step": 827 + }, + { + "epoch": 0.06682269389072715, + "grad_norm": 0.9088126420974731, + "learning_rate": 0.00019934029728987607, + "loss": 2.7893, + "step": 828 + }, + { + "epoch": 0.06690339762731014, + "grad_norm": 0.8087106347084045, + "learning_rate": 0.00019933848567723416, + "loss": 2.7967, + "step": 829 + }, + { + "epoch": 0.06698410136389314, + "grad_norm": 0.8970876336097717, + "learning_rate": 0.00019933667158881745, + "loss": 2.8837, + "step": 830 + }, + { + "epoch": 0.06706480510047615, + "grad_norm": 0.9344804883003235, + "learning_rate": 0.00019933485502467128, + "loss": 2.7754, + "step": 831 + }, + { + "epoch": 0.06714550883705915, + "grad_norm": 0.8119301795959473, + "learning_rate": 0.00019933303598484084, + "loss": 2.7919, + "step": 832 + }, + { + "epoch": 0.06722621257364216, + "grad_norm": 0.9370681047439575, + "learning_rate": 0.00019933121446937148, + "loss": 2.8011, + "step": 833 + }, + { + "epoch": 0.06730691631022516, + "grad_norm": 0.8358973264694214, + "learning_rate": 0.00019932939047830858, + "loss": 2.8339, + "step": 834 + }, + { + "epoch": 0.06738762004680816, + "grad_norm": 0.8565972447395325, + "learning_rate": 0.00019932756401169765, + "loss": 2.8269, + "step": 835 + }, + { + "epoch": 0.06746832378339117, + "grad_norm": 0.8405514359474182, + "learning_rate": 0.00019932573506958417, + "loss": 2.7621, + "step": 836 + }, + { + "epoch": 0.06754902751997417, + "grad_norm": 0.8217617869377136, + "learning_rate": 0.00019932390365201373, + "loss": 2.8363, + "step": 837 + }, + { + "epoch": 0.06762973125655718, + "grad_norm": 0.9121438264846802, + "learning_rate": 0.00019932206975903198, + "loss": 2.8033, + "step": 838 + }, + { + "epoch": 0.06771043499314018, + "grad_norm": 0.9113054871559143, + "learning_rate": 0.00019932023339068464, + "loss": 2.8696, + "step": 839 + }, + { + "epoch": 0.06779113872972319, + "grad_norm": 0.8638293743133545, + "learning_rate": 0.00019931839454701743, + "loss": 2.8008, + "step": 840 + }, + { + "epoch": 0.06787184246630619, + "grad_norm": 0.862932562828064, + "learning_rate": 0.0001993165532280762, + "loss": 2.8092, + "step": 841 + }, + { + "epoch": 0.0679525462028892, + "grad_norm": 0.9089607000350952, + "learning_rate": 0.00019931470943390685, + "loss": 2.8921, + "step": 842 + }, + { + "epoch": 0.0680332499394722, + "grad_norm": 0.9233555793762207, + "learning_rate": 0.00019931286316455537, + "loss": 2.9025, + "step": 843 + }, + { + "epoch": 0.0681139536760552, + "grad_norm": 0.9403017163276672, + "learning_rate": 0.0001993110144200677, + "loss": 2.7875, + "step": 844 + }, + { + "epoch": 0.06819465741263821, + "grad_norm": 0.9194290637969971, + "learning_rate": 0.00019930916320048996, + "loss": 2.8254, + "step": 845 + }, + { + "epoch": 0.06827536114922121, + "grad_norm": 0.8238688111305237, + "learning_rate": 0.00019930730950586828, + "loss": 2.82, + "step": 846 + }, + { + "epoch": 0.06835606488580422, + "grad_norm": 0.8560660481452942, + "learning_rate": 0.00019930545333624885, + "loss": 2.8516, + "step": 847 + }, + { + "epoch": 0.06843676862238722, + "grad_norm": 0.9127222895622253, + "learning_rate": 0.0001993035946916779, + "loss": 2.7674, + "step": 848 + }, + { + "epoch": 0.06851747235897022, + "grad_norm": 0.8679420948028564, + "learning_rate": 0.00019930173357220182, + "loss": 2.777, + "step": 849 + }, + { + "epoch": 0.06859817609555323, + "grad_norm": 0.9686945676803589, + "learning_rate": 0.00019929986997786699, + "loss": 2.7841, + "step": 850 + }, + { + "epoch": 0.06867887983213623, + "grad_norm": 0.8366333246231079, + "learning_rate": 0.00019929800390871977, + "loss": 2.7993, + "step": 851 + }, + { + "epoch": 0.06875958356871924, + "grad_norm": 0.8374585509300232, + "learning_rate": 0.00019929613536480675, + "loss": 2.7545, + "step": 852 + }, + { + "epoch": 0.06884028730530224, + "grad_norm": 0.9843763709068298, + "learning_rate": 0.00019929426434617451, + "loss": 2.8118, + "step": 853 + }, + { + "epoch": 0.06892099104188525, + "grad_norm": 0.8093454241752625, + "learning_rate": 0.0001992923908528696, + "loss": 2.7301, + "step": 854 + }, + { + "epoch": 0.06900169477846824, + "grad_norm": 0.8374418020248413, + "learning_rate": 0.00019929051488493877, + "loss": 2.7745, + "step": 855 + }, + { + "epoch": 0.06908239851505124, + "grad_norm": 0.869965136051178, + "learning_rate": 0.00019928863644242875, + "loss": 2.7637, + "step": 856 + }, + { + "epoch": 0.06916310225163425, + "grad_norm": 0.9280590415000916, + "learning_rate": 0.00019928675552538638, + "loss": 2.7792, + "step": 857 + }, + { + "epoch": 0.06924380598821725, + "grad_norm": 0.8624193668365479, + "learning_rate": 0.00019928487213385852, + "loss": 2.7755, + "step": 858 + }, + { + "epoch": 0.06932450972480025, + "grad_norm": 0.8379972577095032, + "learning_rate": 0.00019928298626789212, + "loss": 2.8563, + "step": 859 + }, + { + "epoch": 0.06940521346138326, + "grad_norm": 0.9272914528846741, + "learning_rate": 0.00019928109792753418, + "loss": 2.836, + "step": 860 + }, + { + "epoch": 0.06948591719796626, + "grad_norm": 0.9239040613174438, + "learning_rate": 0.00019927920711283175, + "loss": 2.7999, + "step": 861 + }, + { + "epoch": 0.06956662093454927, + "grad_norm": 0.9125113487243652, + "learning_rate": 0.00019927731382383195, + "loss": 2.8494, + "step": 862 + }, + { + "epoch": 0.06964732467113227, + "grad_norm": 0.8782855868339539, + "learning_rate": 0.00019927541806058198, + "loss": 2.767, + "step": 863 + }, + { + "epoch": 0.06972802840771528, + "grad_norm": 0.8815447092056274, + "learning_rate": 0.00019927351982312907, + "loss": 2.7877, + "step": 864 + }, + { + "epoch": 0.06980873214429828, + "grad_norm": 0.8555476069450378, + "learning_rate": 0.00019927161911152056, + "loss": 2.8057, + "step": 865 + }, + { + "epoch": 0.06988943588088128, + "grad_norm": 0.8562924265861511, + "learning_rate": 0.00019926971592580382, + "loss": 2.8049, + "step": 866 + }, + { + "epoch": 0.06997013961746429, + "grad_norm": 0.846503734588623, + "learning_rate": 0.00019926781026602625, + "loss": 2.8545, + "step": 867 + }, + { + "epoch": 0.07005084335404729, + "grad_norm": 0.8439623713493347, + "learning_rate": 0.00019926590213223535, + "loss": 2.7451, + "step": 868 + }, + { + "epoch": 0.0701315470906303, + "grad_norm": 0.8471730351448059, + "learning_rate": 0.00019926399152447868, + "loss": 2.7879, + "step": 869 + }, + { + "epoch": 0.0702122508272133, + "grad_norm": 0.8721400499343872, + "learning_rate": 0.00019926207844280387, + "loss": 2.8594, + "step": 870 + }, + { + "epoch": 0.0702929545637963, + "grad_norm": 0.8110925555229187, + "learning_rate": 0.0001992601628872586, + "loss": 2.7789, + "step": 871 + }, + { + "epoch": 0.07037365830037931, + "grad_norm": 0.9593119025230408, + "learning_rate": 0.0001992582448578906, + "loss": 2.8792, + "step": 872 + }, + { + "epoch": 0.07045436203696231, + "grad_norm": 0.8553354144096375, + "learning_rate": 0.00019925632435474765, + "loss": 2.8056, + "step": 873 + }, + { + "epoch": 0.07053506577354532, + "grad_norm": 0.8062612414360046, + "learning_rate": 0.00019925440137787768, + "loss": 2.7762, + "step": 874 + }, + { + "epoch": 0.07061576951012832, + "grad_norm": 0.8264921307563782, + "learning_rate": 0.00019925247592732858, + "loss": 2.8435, + "step": 875 + }, + { + "epoch": 0.07069647324671133, + "grad_norm": 0.7770401835441589, + "learning_rate": 0.00019925054800314828, + "loss": 2.7846, + "step": 876 + }, + { + "epoch": 0.07077717698329433, + "grad_norm": 0.8426765203475952, + "learning_rate": 0.0001992486176053849, + "loss": 2.782, + "step": 877 + }, + { + "epoch": 0.07085788071987734, + "grad_norm": 0.855330228805542, + "learning_rate": 0.00019924668473408655, + "loss": 2.8051, + "step": 878 + }, + { + "epoch": 0.07093858445646034, + "grad_norm": 0.8762049674987793, + "learning_rate": 0.00019924474938930135, + "loss": 2.7634, + "step": 879 + }, + { + "epoch": 0.07101928819304333, + "grad_norm": 0.9226812124252319, + "learning_rate": 0.0001992428115710776, + "loss": 2.8342, + "step": 880 + }, + { + "epoch": 0.07109999192962634, + "grad_norm": 0.9031660556793213, + "learning_rate": 0.00019924087127946353, + "loss": 2.7953, + "step": 881 + }, + { + "epoch": 0.07118069566620934, + "grad_norm": 1.0151792764663696, + "learning_rate": 0.00019923892851450757, + "loss": 2.8225, + "step": 882 + }, + { + "epoch": 0.07126139940279234, + "grad_norm": 0.9805678725242615, + "learning_rate": 0.00019923698327625806, + "loss": 2.7727, + "step": 883 + }, + { + "epoch": 0.07134210313937535, + "grad_norm": 0.8831729888916016, + "learning_rate": 0.00019923503556476356, + "loss": 2.7682, + "step": 884 + }, + { + "epoch": 0.07142280687595835, + "grad_norm": 1.0311404466629028, + "learning_rate": 0.00019923308538007253, + "loss": 2.8422, + "step": 885 + }, + { + "epoch": 0.07150351061254136, + "grad_norm": 0.8143388628959656, + "learning_rate": 0.0001992311327222336, + "loss": 2.7876, + "step": 886 + }, + { + "epoch": 0.07158421434912436, + "grad_norm": 0.877017617225647, + "learning_rate": 0.00019922917759129552, + "loss": 2.7486, + "step": 887 + }, + { + "epoch": 0.07166491808570737, + "grad_norm": 0.930646538734436, + "learning_rate": 0.0001992272199873069, + "loss": 2.8022, + "step": 888 + }, + { + "epoch": 0.07174562182229037, + "grad_norm": 0.934753954410553, + "learning_rate": 0.00019922525991031655, + "loss": 2.8485, + "step": 889 + }, + { + "epoch": 0.07182632555887337, + "grad_norm": 0.9564220905303955, + "learning_rate": 0.00019922329736037339, + "loss": 2.761, + "step": 890 + }, + { + "epoch": 0.07190702929545638, + "grad_norm": 0.9457311630249023, + "learning_rate": 0.00019922133233752626, + "loss": 2.8279, + "step": 891 + }, + { + "epoch": 0.07198773303203938, + "grad_norm": 0.9385658502578735, + "learning_rate": 0.0001992193648418242, + "loss": 2.8222, + "step": 892 + }, + { + "epoch": 0.07206843676862239, + "grad_norm": 1.0157524347305298, + "learning_rate": 0.00019921739487331616, + "loss": 2.9166, + "step": 893 + }, + { + "epoch": 0.07214914050520539, + "grad_norm": 0.9143860340118408, + "learning_rate": 0.00019921542243205132, + "loss": 2.8139, + "step": 894 + }, + { + "epoch": 0.0722298442417884, + "grad_norm": 0.8769320249557495, + "learning_rate": 0.00019921344751807878, + "loss": 2.8023, + "step": 895 + }, + { + "epoch": 0.0723105479783714, + "grad_norm": 0.9647517204284668, + "learning_rate": 0.0001992114701314478, + "loss": 2.8872, + "step": 896 + }, + { + "epoch": 0.0723912517149544, + "grad_norm": 1.025978446006775, + "learning_rate": 0.00019920949027220762, + "loss": 2.837, + "step": 897 + }, + { + "epoch": 0.07247195545153741, + "grad_norm": 0.8848521113395691, + "learning_rate": 0.0001992075079404076, + "loss": 2.7498, + "step": 898 + }, + { + "epoch": 0.07255265918812041, + "grad_norm": 0.9395595788955688, + "learning_rate": 0.0001992055231360972, + "loss": 2.8752, + "step": 899 + }, + { + "epoch": 0.07263336292470342, + "grad_norm": 0.8711572885513306, + "learning_rate": 0.00019920353585932578, + "loss": 2.8608, + "step": 900 + }, + { + "epoch": 0.07271406666128642, + "grad_norm": 0.8606846332550049, + "learning_rate": 0.00019920154611014295, + "loss": 2.829, + "step": 901 + }, + { + "epoch": 0.07279477039786943, + "grad_norm": 0.859354555606842, + "learning_rate": 0.0001991995538885983, + "loss": 2.8102, + "step": 902 + }, + { + "epoch": 0.07287547413445243, + "grad_norm": 0.9063243865966797, + "learning_rate": 0.00019919755919474143, + "loss": 2.8509, + "step": 903 + }, + { + "epoch": 0.07295617787103544, + "grad_norm": 0.8321940898895264, + "learning_rate": 0.00019919556202862207, + "loss": 2.796, + "step": 904 + }, + { + "epoch": 0.07303688160761844, + "grad_norm": 0.8875191807746887, + "learning_rate": 0.00019919356239029003, + "loss": 2.8672, + "step": 905 + }, + { + "epoch": 0.07311758534420143, + "grad_norm": 0.9028071165084839, + "learning_rate": 0.0001991915602797951, + "loss": 2.8926, + "step": 906 + }, + { + "epoch": 0.07319828908078443, + "grad_norm": 0.9449291825294495, + "learning_rate": 0.0001991895556971872, + "loss": 2.8159, + "step": 907 + }, + { + "epoch": 0.07327899281736744, + "grad_norm": 0.871576189994812, + "learning_rate": 0.0001991875486425163, + "loss": 2.8162, + "step": 908 + }, + { + "epoch": 0.07335969655395044, + "grad_norm": 0.818423330783844, + "learning_rate": 0.0001991855391158324, + "loss": 2.8882, + "step": 909 + }, + { + "epoch": 0.07344040029053345, + "grad_norm": 0.8802343606948853, + "learning_rate": 0.0001991835271171856, + "loss": 2.8245, + "step": 910 + }, + { + "epoch": 0.07352110402711645, + "grad_norm": 0.916023313999176, + "learning_rate": 0.000199181512646626, + "loss": 2.8966, + "step": 911 + }, + { + "epoch": 0.07360180776369946, + "grad_norm": 1.0663317441940308, + "learning_rate": 0.0001991794957042039, + "loss": 2.7736, + "step": 912 + }, + { + "epoch": 0.07368251150028246, + "grad_norm": 0.9212445616722107, + "learning_rate": 0.00019917747628996947, + "loss": 2.7924, + "step": 913 + }, + { + "epoch": 0.07376321523686546, + "grad_norm": 0.9785256385803223, + "learning_rate": 0.00019917545440397308, + "loss": 2.8021, + "step": 914 + }, + { + "epoch": 0.07384391897344847, + "grad_norm": 0.8510444760322571, + "learning_rate": 0.00019917343004626514, + "loss": 2.7991, + "step": 915 + }, + { + "epoch": 0.07392462271003147, + "grad_norm": 0.8967106342315674, + "learning_rate": 0.0001991714032168961, + "loss": 2.8838, + "step": 916 + }, + { + "epoch": 0.07400532644661448, + "grad_norm": 0.8940563797950745, + "learning_rate": 0.0001991693739159164, + "loss": 2.8124, + "step": 917 + }, + { + "epoch": 0.07408603018319748, + "grad_norm": 0.9270479679107666, + "learning_rate": 0.0001991673421433767, + "loss": 2.7627, + "step": 918 + }, + { + "epoch": 0.07416673391978049, + "grad_norm": 0.905805230140686, + "learning_rate": 0.0001991653078993276, + "loss": 2.781, + "step": 919 + }, + { + "epoch": 0.07424743765636349, + "grad_norm": 0.9295129179954529, + "learning_rate": 0.00019916327118381982, + "loss": 2.8332, + "step": 920 + }, + { + "epoch": 0.0743281413929465, + "grad_norm": 0.863331139087677, + "learning_rate": 0.00019916123199690408, + "loss": 2.8489, + "step": 921 + }, + { + "epoch": 0.0744088451295295, + "grad_norm": 0.9966896772384644, + "learning_rate": 0.00019915919033863127, + "loss": 2.9107, + "step": 922 + }, + { + "epoch": 0.0744895488661125, + "grad_norm": 0.8921390771865845, + "learning_rate": 0.00019915714620905218, + "loss": 2.7668, + "step": 923 + }, + { + "epoch": 0.07457025260269551, + "grad_norm": 0.9378434419631958, + "learning_rate": 0.00019915509960821782, + "loss": 2.8305, + "step": 924 + }, + { + "epoch": 0.07465095633927851, + "grad_norm": 1.0351817607879639, + "learning_rate": 0.0001991530505361792, + "loss": 2.9412, + "step": 925 + }, + { + "epoch": 0.07473166007586152, + "grad_norm": 0.7995476722717285, + "learning_rate": 0.0001991509989929874, + "loss": 2.7872, + "step": 926 + }, + { + "epoch": 0.07481236381244452, + "grad_norm": 0.858830988407135, + "learning_rate": 0.0001991489449786935, + "loss": 2.7775, + "step": 927 + }, + { + "epoch": 0.07489306754902753, + "grad_norm": 1.1254682540893555, + "learning_rate": 0.00019914688849334867, + "loss": 2.7913, + "step": 928 + }, + { + "epoch": 0.07497377128561053, + "grad_norm": 0.9475330710411072, + "learning_rate": 0.00019914482953700428, + "loss": 2.7945, + "step": 929 + }, + { + "epoch": 0.07505447502219353, + "grad_norm": 0.8427290916442871, + "learning_rate": 0.00019914276810971152, + "loss": 2.8297, + "step": 930 + }, + { + "epoch": 0.07513517875877652, + "grad_norm": 0.9308956265449524, + "learning_rate": 0.00019914070421152183, + "loss": 2.8534, + "step": 931 + }, + { + "epoch": 0.07521588249535953, + "grad_norm": 0.9264787435531616, + "learning_rate": 0.00019913863784248664, + "loss": 2.7959, + "step": 932 + }, + { + "epoch": 0.07529658623194253, + "grad_norm": 0.8432087302207947, + "learning_rate": 0.00019913656900265742, + "loss": 2.8479, + "step": 933 + }, + { + "epoch": 0.07537728996852554, + "grad_norm": 0.8237274885177612, + "learning_rate": 0.0001991344976920858, + "loss": 2.782, + "step": 934 + }, + { + "epoch": 0.07545799370510854, + "grad_norm": 0.8143243789672852, + "learning_rate": 0.0001991324239108233, + "loss": 2.7567, + "step": 935 + }, + { + "epoch": 0.07553869744169155, + "grad_norm": 0.8824434280395508, + "learning_rate": 0.0001991303476589217, + "loss": 2.7971, + "step": 936 + }, + { + "epoch": 0.07561940117827455, + "grad_norm": 0.8202407360076904, + "learning_rate": 0.00019912826893643272, + "loss": 2.7825, + "step": 937 + }, + { + "epoch": 0.07570010491485755, + "grad_norm": 0.8001337647438049, + "learning_rate": 0.00019912618774340813, + "loss": 2.8294, + "step": 938 + }, + { + "epoch": 0.07578080865144056, + "grad_norm": 0.8875572085380554, + "learning_rate": 0.00019912410407989982, + "loss": 2.8013, + "step": 939 + }, + { + "epoch": 0.07586151238802356, + "grad_norm": 0.8676280379295349, + "learning_rate": 0.0001991220179459597, + "loss": 2.767, + "step": 940 + }, + { + "epoch": 0.07594221612460657, + "grad_norm": 0.9767136573791504, + "learning_rate": 0.00019911992934163982, + "loss": 2.8315, + "step": 941 + }, + { + "epoch": 0.07602291986118957, + "grad_norm": 0.8690733909606934, + "learning_rate": 0.0001991178382669922, + "loss": 2.8042, + "step": 942 + }, + { + "epoch": 0.07610362359777258, + "grad_norm": 0.862978458404541, + "learning_rate": 0.00019911574472206893, + "loss": 2.8243, + "step": 943 + }, + { + "epoch": 0.07618432733435558, + "grad_norm": 0.9116127490997314, + "learning_rate": 0.00019911364870692225, + "loss": 2.7377, + "step": 944 + }, + { + "epoch": 0.07626503107093859, + "grad_norm": 0.8765420317649841, + "learning_rate": 0.00019911155022160433, + "loss": 2.7673, + "step": 945 + }, + { + "epoch": 0.07634573480752159, + "grad_norm": 0.8229342699050903, + "learning_rate": 0.0001991094492661675, + "loss": 2.7749, + "step": 946 + }, + { + "epoch": 0.0764264385441046, + "grad_norm": 0.8340098261833191, + "learning_rate": 0.00019910734584066412, + "loss": 2.7871, + "step": 947 + }, + { + "epoch": 0.0765071422806876, + "grad_norm": 0.8116940259933472, + "learning_rate": 0.0001991052399451466, + "loss": 2.8202, + "step": 948 + }, + { + "epoch": 0.0765878460172706, + "grad_norm": 0.8730412721633911, + "learning_rate": 0.00019910313157966747, + "loss": 2.8661, + "step": 949 + }, + { + "epoch": 0.07666854975385361, + "grad_norm": 0.8272213339805603, + "learning_rate": 0.0001991010207442792, + "loss": 2.8352, + "step": 950 + }, + { + "epoch": 0.07674925349043661, + "grad_norm": 0.8586944937705994, + "learning_rate": 0.0001990989074390345, + "loss": 2.8018, + "step": 951 + }, + { + "epoch": 0.07682995722701962, + "grad_norm": 0.81830894947052, + "learning_rate": 0.00019909679166398592, + "loss": 2.8154, + "step": 952 + }, + { + "epoch": 0.07691066096360262, + "grad_norm": 0.8158484101295471, + "learning_rate": 0.00019909467341918627, + "loss": 2.7618, + "step": 953 + }, + { + "epoch": 0.07699136470018562, + "grad_norm": 0.816834032535553, + "learning_rate": 0.00019909255270468833, + "loss": 2.8125, + "step": 954 + }, + { + "epoch": 0.07707206843676863, + "grad_norm": 0.944790780544281, + "learning_rate": 0.00019909042952054496, + "loss": 2.8054, + "step": 955 + }, + { + "epoch": 0.07715277217335163, + "grad_norm": 0.9281302690505981, + "learning_rate": 0.00019908830386680904, + "loss": 2.8724, + "step": 956 + }, + { + "epoch": 0.07723347590993462, + "grad_norm": 0.8850300908088684, + "learning_rate": 0.00019908617574353356, + "loss": 2.7906, + "step": 957 + }, + { + "epoch": 0.07731417964651763, + "grad_norm": 0.8997938632965088, + "learning_rate": 0.00019908404515077158, + "loss": 2.7814, + "step": 958 + }, + { + "epoch": 0.07739488338310063, + "grad_norm": 0.8814194798469543, + "learning_rate": 0.0001990819120885762, + "loss": 2.7423, + "step": 959 + }, + { + "epoch": 0.07747558711968364, + "grad_norm": 0.8759928345680237, + "learning_rate": 0.00019907977655700054, + "loss": 2.7803, + "step": 960 + }, + { + "epoch": 0.07755629085626664, + "grad_norm": 0.8439476490020752, + "learning_rate": 0.00019907763855609787, + "loss": 2.8277, + "step": 961 + }, + { + "epoch": 0.07763699459284965, + "grad_norm": 0.8745121955871582, + "learning_rate": 0.00019907549808592144, + "loss": 2.8152, + "step": 962 + }, + { + "epoch": 0.07771769832943265, + "grad_norm": 1.0439598560333252, + "learning_rate": 0.00019907335514652465, + "loss": 2.7882, + "step": 963 + }, + { + "epoch": 0.07779840206601565, + "grad_norm": 0.9516503810882568, + "learning_rate": 0.00019907120973796082, + "loss": 2.8555, + "step": 964 + }, + { + "epoch": 0.07787910580259866, + "grad_norm": 0.928717315196991, + "learning_rate": 0.0001990690618602835, + "loss": 2.8214, + "step": 965 + }, + { + "epoch": 0.07795980953918166, + "grad_norm": 0.7923071384429932, + "learning_rate": 0.00019906691151354617, + "loss": 2.8153, + "step": 966 + }, + { + "epoch": 0.07804051327576467, + "grad_norm": 0.8783324956893921, + "learning_rate": 0.00019906475869780246, + "loss": 2.7691, + "step": 967 + }, + { + "epoch": 0.07812121701234767, + "grad_norm": 0.8974801301956177, + "learning_rate": 0.000199062603413106, + "loss": 2.8156, + "step": 968 + }, + { + "epoch": 0.07820192074893068, + "grad_norm": 0.9304391741752625, + "learning_rate": 0.00019906044565951052, + "loss": 2.8489, + "step": 969 + }, + { + "epoch": 0.07828262448551368, + "grad_norm": 0.8351098895072937, + "learning_rate": 0.00019905828543706976, + "loss": 2.7744, + "step": 970 + }, + { + "epoch": 0.07836332822209668, + "grad_norm": 0.8634265065193176, + "learning_rate": 0.0001990561227458376, + "loss": 2.8193, + "step": 971 + }, + { + "epoch": 0.07844403195867969, + "grad_norm": 0.8969653248786926, + "learning_rate": 0.00019905395758586792, + "loss": 2.7548, + "step": 972 + }, + { + "epoch": 0.07852473569526269, + "grad_norm": 0.8964852094650269, + "learning_rate": 0.0001990517899572147, + "loss": 2.8037, + "step": 973 + }, + { + "epoch": 0.0786054394318457, + "grad_norm": 0.8567596077919006, + "learning_rate": 0.00019904961985993196, + "loss": 2.7942, + "step": 974 + }, + { + "epoch": 0.0786861431684287, + "grad_norm": 0.8275273442268372, + "learning_rate": 0.00019904744729407374, + "loss": 2.8359, + "step": 975 + }, + { + "epoch": 0.0787668469050117, + "grad_norm": 0.9458810091018677, + "learning_rate": 0.00019904527225969424, + "loss": 2.8354, + "step": 976 + }, + { + "epoch": 0.07884755064159471, + "grad_norm": 0.8690593838691711, + "learning_rate": 0.00019904309475684767, + "loss": 2.7894, + "step": 977 + }, + { + "epoch": 0.07892825437817771, + "grad_norm": 0.810279130935669, + "learning_rate": 0.00019904091478558823, + "loss": 2.7939, + "step": 978 + }, + { + "epoch": 0.07900895811476072, + "grad_norm": 0.8779012560844421, + "learning_rate": 0.0001990387323459703, + "loss": 2.7551, + "step": 979 + }, + { + "epoch": 0.07908966185134372, + "grad_norm": 0.7936381101608276, + "learning_rate": 0.00019903654743804833, + "loss": 2.814, + "step": 980 + }, + { + "epoch": 0.07917036558792673, + "grad_norm": 0.9567989110946655, + "learning_rate": 0.00019903436006187667, + "loss": 2.7715, + "step": 981 + }, + { + "epoch": 0.07925106932450972, + "grad_norm": 0.9250255823135376, + "learning_rate": 0.00019903217021750987, + "loss": 2.8967, + "step": 982 + }, + { + "epoch": 0.07933177306109272, + "grad_norm": 0.8342804312705994, + "learning_rate": 0.00019902997790500256, + "loss": 2.7728, + "step": 983 + }, + { + "epoch": 0.07941247679767573, + "grad_norm": 0.8321473598480225, + "learning_rate": 0.00019902778312440932, + "loss": 2.8479, + "step": 984 + }, + { + "epoch": 0.07949318053425873, + "grad_norm": 0.894727885723114, + "learning_rate": 0.00019902558587578484, + "loss": 2.8211, + "step": 985 + }, + { + "epoch": 0.07957388427084174, + "grad_norm": 0.8093457221984863, + "learning_rate": 0.0001990233861591839, + "loss": 2.7481, + "step": 986 + }, + { + "epoch": 0.07965458800742474, + "grad_norm": 0.8626284599304199, + "learning_rate": 0.00019902118397466132, + "loss": 2.8368, + "step": 987 + }, + { + "epoch": 0.07973529174400774, + "grad_norm": 0.799648642539978, + "learning_rate": 0.00019901897932227204, + "loss": 2.8713, + "step": 988 + }, + { + "epoch": 0.07981599548059075, + "grad_norm": 0.9658265709877014, + "learning_rate": 0.00019901677220207092, + "loss": 2.7284, + "step": 989 + }, + { + "epoch": 0.07989669921717375, + "grad_norm": 0.877299427986145, + "learning_rate": 0.00019901456261411303, + "loss": 2.7916, + "step": 990 + }, + { + "epoch": 0.07997740295375676, + "grad_norm": 0.926450252532959, + "learning_rate": 0.00019901235055845337, + "loss": 2.8207, + "step": 991 + }, + { + "epoch": 0.08005810669033976, + "grad_norm": 0.8858455419540405, + "learning_rate": 0.00019901013603514716, + "loss": 2.795, + "step": 992 + }, + { + "epoch": 0.08013881042692277, + "grad_norm": 0.8619922995567322, + "learning_rate": 0.0001990079190442495, + "loss": 2.8163, + "step": 993 + }, + { + "epoch": 0.08021951416350577, + "grad_norm": 0.859200656414032, + "learning_rate": 0.00019900569958581572, + "loss": 2.7715, + "step": 994 + }, + { + "epoch": 0.08030021790008877, + "grad_norm": 0.8346282839775085, + "learning_rate": 0.0001990034776599011, + "loss": 2.8312, + "step": 995 + }, + { + "epoch": 0.08038092163667178, + "grad_norm": 0.9188725352287292, + "learning_rate": 0.00019900125326656102, + "loss": 2.799, + "step": 996 + }, + { + "epoch": 0.08046162537325478, + "grad_norm": 0.8548648953437805, + "learning_rate": 0.00019899902640585092, + "loss": 2.7778, + "step": 997 + }, + { + "epoch": 0.08054232910983779, + "grad_norm": 0.8883183002471924, + "learning_rate": 0.00019899679707782624, + "loss": 2.809, + "step": 998 + }, + { + "epoch": 0.08062303284642079, + "grad_norm": 0.8915852308273315, + "learning_rate": 0.00019899456528254267, + "loss": 2.8309, + "step": 999 + }, + { + "epoch": 0.0807037365830038, + "grad_norm": 0.8092094659805298, + "learning_rate": 0.00019899233102005573, + "loss": 2.7753, + "step": 1000 + }, + { + "epoch": 0.0807037365830038, + "eval_loss": 2.7104671001434326, + "eval_runtime": 773.7354, + "eval_samples_per_second": 3.386, + "eval_steps_per_second": 0.565, + "step": 1000 + }, + { + "epoch": 0.0807844403195868, + "grad_norm": 0.8744900226593018, + "learning_rate": 0.00019899009429042114, + "loss": 2.7948, + "step": 1001 + }, + { + "epoch": 0.0808651440561698, + "grad_norm": 0.8749974370002747, + "learning_rate": 0.0001989878550936946, + "loss": 2.7609, + "step": 1002 + }, + { + "epoch": 0.08094584779275281, + "grad_norm": 0.8622820377349854, + "learning_rate": 0.000198985613429932, + "loss": 2.8023, + "step": 1003 + }, + { + "epoch": 0.08102655152933581, + "grad_norm": 0.9404367208480835, + "learning_rate": 0.00019898336929918915, + "loss": 2.7992, + "step": 1004 + }, + { + "epoch": 0.08110725526591882, + "grad_norm": 0.8846708536148071, + "learning_rate": 0.000198981122701522, + "loss": 2.8084, + "step": 1005 + }, + { + "epoch": 0.08118795900250182, + "grad_norm": 0.8105908036231995, + "learning_rate": 0.0001989788736369865, + "loss": 2.8504, + "step": 1006 + }, + { + "epoch": 0.08126866273908483, + "grad_norm": 1.0107187032699585, + "learning_rate": 0.0001989766221056388, + "loss": 2.7935, + "step": 1007 + }, + { + "epoch": 0.08134936647566782, + "grad_norm": 0.7825451493263245, + "learning_rate": 0.0001989743681075349, + "loss": 2.8024, + "step": 1008 + }, + { + "epoch": 0.08143007021225082, + "grad_norm": 0.8478613495826721, + "learning_rate": 0.000198972111642731, + "loss": 2.8645, + "step": 1009 + }, + { + "epoch": 0.08151077394883383, + "grad_norm": 0.8432144522666931, + "learning_rate": 0.0001989698527112834, + "loss": 2.8469, + "step": 1010 + }, + { + "epoch": 0.08159147768541683, + "grad_norm": 0.8147936463356018, + "learning_rate": 0.00019896759131324835, + "loss": 2.7799, + "step": 1011 + }, + { + "epoch": 0.08167218142199983, + "grad_norm": 0.8446993827819824, + "learning_rate": 0.00019896532744868224, + "loss": 2.7685, + "step": 1012 + }, + { + "epoch": 0.08175288515858284, + "grad_norm": 0.7635807394981384, + "learning_rate": 0.00019896306111764146, + "loss": 2.7823, + "step": 1013 + }, + { + "epoch": 0.08183358889516584, + "grad_norm": 0.8272855877876282, + "learning_rate": 0.00019896079232018253, + "loss": 2.7877, + "step": 1014 + }, + { + "epoch": 0.08191429263174885, + "grad_norm": 0.8079700469970703, + "learning_rate": 0.00019895852105636193, + "loss": 2.7849, + "step": 1015 + }, + { + "epoch": 0.08199499636833185, + "grad_norm": 0.8518063426017761, + "learning_rate": 0.0001989562473262363, + "loss": 2.8622, + "step": 1016 + }, + { + "epoch": 0.08207570010491486, + "grad_norm": 0.8646622896194458, + "learning_rate": 0.00019895397112986235, + "loss": 2.8224, + "step": 1017 + }, + { + "epoch": 0.08215640384149786, + "grad_norm": 0.8764398097991943, + "learning_rate": 0.00019895169246729672, + "loss": 2.938, + "step": 1018 + }, + { + "epoch": 0.08223710757808086, + "grad_norm": 0.8304057717323303, + "learning_rate": 0.0001989494113385963, + "loss": 2.7586, + "step": 1019 + }, + { + "epoch": 0.08231781131466387, + "grad_norm": 0.8569272756576538, + "learning_rate": 0.00019894712774381787, + "loss": 2.7803, + "step": 1020 + }, + { + "epoch": 0.08239851505124687, + "grad_norm": 0.8788578510284424, + "learning_rate": 0.00019894484168301836, + "loss": 2.8138, + "step": 1021 + }, + { + "epoch": 0.08247921878782988, + "grad_norm": 0.9113569855690002, + "learning_rate": 0.0001989425531562548, + "loss": 2.8023, + "step": 1022 + }, + { + "epoch": 0.08255992252441288, + "grad_norm": 0.8630590438842773, + "learning_rate": 0.00019894026216358413, + "loss": 2.791, + "step": 1023 + }, + { + "epoch": 0.08264062626099589, + "grad_norm": 0.8691157698631287, + "learning_rate": 0.00019893796870506348, + "loss": 2.811, + "step": 1024 + }, + { + "epoch": 0.08272132999757889, + "grad_norm": 0.9078284502029419, + "learning_rate": 0.00019893567278075007, + "loss": 2.8282, + "step": 1025 + }, + { + "epoch": 0.0828020337341619, + "grad_norm": 0.867511510848999, + "learning_rate": 0.00019893337439070105, + "loss": 2.7862, + "step": 1026 + }, + { + "epoch": 0.0828827374707449, + "grad_norm": 0.8016698360443115, + "learning_rate": 0.00019893107353497372, + "loss": 2.8083, + "step": 1027 + }, + { + "epoch": 0.0829634412073279, + "grad_norm": 0.8583545684814453, + "learning_rate": 0.00019892877021362543, + "loss": 2.8041, + "step": 1028 + }, + { + "epoch": 0.08304414494391091, + "grad_norm": 0.8302493691444397, + "learning_rate": 0.0001989264644267136, + "loss": 2.7866, + "step": 1029 + }, + { + "epoch": 0.08312484868049391, + "grad_norm": 0.9628411531448364, + "learning_rate": 0.00019892415617429567, + "loss": 2.8187, + "step": 1030 + }, + { + "epoch": 0.08320555241707692, + "grad_norm": 0.874840259552002, + "learning_rate": 0.0001989218454564292, + "loss": 2.7475, + "step": 1031 + }, + { + "epoch": 0.08328625615365992, + "grad_norm": 0.8641294836997986, + "learning_rate": 0.0001989195322731717, + "loss": 2.7795, + "step": 1032 + }, + { + "epoch": 0.08336695989024291, + "grad_norm": 0.8219757080078125, + "learning_rate": 0.0001989172166245809, + "loss": 2.7683, + "step": 1033 + }, + { + "epoch": 0.08344766362682592, + "grad_norm": 0.7905694246292114, + "learning_rate": 0.00019891489851071455, + "loss": 2.7668, + "step": 1034 + }, + { + "epoch": 0.08352836736340892, + "grad_norm": 0.8180816173553467, + "learning_rate": 0.0001989125779316303, + "loss": 2.7661, + "step": 1035 + }, + { + "epoch": 0.08360907109999192, + "grad_norm": 0.8337293267250061, + "learning_rate": 0.00019891025488738605, + "loss": 2.7823, + "step": 1036 + }, + { + "epoch": 0.08368977483657493, + "grad_norm": 0.9673140048980713, + "learning_rate": 0.00019890792937803973, + "loss": 2.8164, + "step": 1037 + }, + { + "epoch": 0.08377047857315793, + "grad_norm": 0.8810501098632812, + "learning_rate": 0.00019890560140364922, + "loss": 2.7904, + "step": 1038 + }, + { + "epoch": 0.08385118230974094, + "grad_norm": 0.9507614374160767, + "learning_rate": 0.0001989032709642726, + "loss": 2.7928, + "step": 1039 + }, + { + "epoch": 0.08393188604632394, + "grad_norm": 0.953738808631897, + "learning_rate": 0.00019890093805996793, + "loss": 2.7922, + "step": 1040 + }, + { + "epoch": 0.08401258978290695, + "grad_norm": 0.8079931139945984, + "learning_rate": 0.00019889860269079336, + "loss": 2.7909, + "step": 1041 + }, + { + "epoch": 0.08409329351948995, + "grad_norm": 1.0330647230148315, + "learning_rate": 0.0001988962648568071, + "loss": 2.7526, + "step": 1042 + }, + { + "epoch": 0.08417399725607295, + "grad_norm": 0.8988988399505615, + "learning_rate": 0.00019889392455806738, + "loss": 2.7471, + "step": 1043 + }, + { + "epoch": 0.08425470099265596, + "grad_norm": 0.7986348271369934, + "learning_rate": 0.00019889158179463255, + "loss": 2.7208, + "step": 1044 + }, + { + "epoch": 0.08433540472923896, + "grad_norm": 0.9231631755828857, + "learning_rate": 0.000198889236566561, + "loss": 2.7953, + "step": 1045 + }, + { + "epoch": 0.08441610846582197, + "grad_norm": 0.8438155055046082, + "learning_rate": 0.00019888688887391117, + "loss": 2.8006, + "step": 1046 + }, + { + "epoch": 0.08449681220240497, + "grad_norm": 0.8915219306945801, + "learning_rate": 0.0001988845387167416, + "loss": 2.8184, + "step": 1047 + }, + { + "epoch": 0.08457751593898798, + "grad_norm": 0.924401581287384, + "learning_rate": 0.0001988821860951108, + "loss": 2.8411, + "step": 1048 + }, + { + "epoch": 0.08465821967557098, + "grad_norm": 0.8144630193710327, + "learning_rate": 0.00019887983100907745, + "loss": 2.8258, + "step": 1049 + }, + { + "epoch": 0.08473892341215399, + "grad_norm": 0.9974459409713745, + "learning_rate": 0.00019887747345870028, + "loss": 2.7567, + "step": 1050 + }, + { + "epoch": 0.08481962714873699, + "grad_norm": 0.944526195526123, + "learning_rate": 0.00019887511344403796, + "loss": 2.8657, + "step": 1051 + }, + { + "epoch": 0.08490033088532, + "grad_norm": 0.8204831480979919, + "learning_rate": 0.00019887275096514936, + "loss": 2.8054, + "step": 1052 + }, + { + "epoch": 0.084981034621903, + "grad_norm": 0.8855900168418884, + "learning_rate": 0.00019887038602209336, + "loss": 2.8019, + "step": 1053 + }, + { + "epoch": 0.085061738358486, + "grad_norm": 0.9025108814239502, + "learning_rate": 0.0001988680186149289, + "loss": 2.7934, + "step": 1054 + }, + { + "epoch": 0.08514244209506901, + "grad_norm": 0.8486441373825073, + "learning_rate": 0.00019886564874371494, + "loss": 2.809, + "step": 1055 + }, + { + "epoch": 0.08522314583165201, + "grad_norm": 0.778364896774292, + "learning_rate": 0.00019886327640851058, + "loss": 2.7783, + "step": 1056 + }, + { + "epoch": 0.08530384956823502, + "grad_norm": 0.8515299558639526, + "learning_rate": 0.00019886090160937497, + "loss": 2.8122, + "step": 1057 + }, + { + "epoch": 0.08538455330481802, + "grad_norm": 0.8466131091117859, + "learning_rate": 0.00019885852434636724, + "loss": 2.7798, + "step": 1058 + }, + { + "epoch": 0.08546525704140101, + "grad_norm": 0.8856541514396667, + "learning_rate": 0.00019885614461954667, + "loss": 2.8033, + "step": 1059 + }, + { + "epoch": 0.08554596077798401, + "grad_norm": 0.8853924870491028, + "learning_rate": 0.00019885376242897258, + "loss": 2.8368, + "step": 1060 + }, + { + "epoch": 0.08562666451456702, + "grad_norm": 0.7858660221099854, + "learning_rate": 0.0001988513777747043, + "loss": 2.7806, + "step": 1061 + }, + { + "epoch": 0.08570736825115002, + "grad_norm": 0.8601513504981995, + "learning_rate": 0.0001988489906568013, + "loss": 2.8434, + "step": 1062 + }, + { + "epoch": 0.08578807198773303, + "grad_norm": 0.9126001596450806, + "learning_rate": 0.00019884660107532306, + "loss": 2.8469, + "step": 1063 + }, + { + "epoch": 0.08586877572431603, + "grad_norm": 0.9016061425209045, + "learning_rate": 0.00019884420903032912, + "loss": 2.7907, + "step": 1064 + }, + { + "epoch": 0.08594947946089904, + "grad_norm": 0.9134494066238403, + "learning_rate": 0.00019884181452187915, + "loss": 2.8426, + "step": 1065 + }, + { + "epoch": 0.08603018319748204, + "grad_norm": 0.8891138434410095, + "learning_rate": 0.00019883941755003272, + "loss": 2.8092, + "step": 1066 + }, + { + "epoch": 0.08611088693406505, + "grad_norm": 0.822884202003479, + "learning_rate": 0.0001988370181148497, + "loss": 2.8454, + "step": 1067 + }, + { + "epoch": 0.08619159067064805, + "grad_norm": 0.8341901898384094, + "learning_rate": 0.0001988346162163898, + "loss": 2.8027, + "step": 1068 + }, + { + "epoch": 0.08627229440723105, + "grad_norm": 0.8653229475021362, + "learning_rate": 0.00019883221185471291, + "loss": 2.7487, + "step": 1069 + }, + { + "epoch": 0.08635299814381406, + "grad_norm": 0.8065966367721558, + "learning_rate": 0.00019882980502987894, + "loss": 2.7847, + "step": 1070 + }, + { + "epoch": 0.08643370188039706, + "grad_norm": 0.9106903076171875, + "learning_rate": 0.0001988273957419479, + "loss": 2.7962, + "step": 1071 + }, + { + "epoch": 0.08651440561698007, + "grad_norm": 0.953815221786499, + "learning_rate": 0.0001988249839909798, + "loss": 2.8168, + "step": 1072 + }, + { + "epoch": 0.08659510935356307, + "grad_norm": 0.8642842173576355, + "learning_rate": 0.00019882256977703477, + "loss": 2.8205, + "step": 1073 + }, + { + "epoch": 0.08667581309014608, + "grad_norm": 0.8500350117683411, + "learning_rate": 0.000198820153100173, + "loss": 2.8798, + "step": 1074 + }, + { + "epoch": 0.08675651682672908, + "grad_norm": 0.9212989807128906, + "learning_rate": 0.00019881773396045467, + "loss": 2.8088, + "step": 1075 + }, + { + "epoch": 0.08683722056331208, + "grad_norm": 0.8897970914840698, + "learning_rate": 0.0001988153123579401, + "loss": 2.7983, + "step": 1076 + }, + { + "epoch": 0.08691792429989509, + "grad_norm": 0.7942636609077454, + "learning_rate": 0.00019881288829268968, + "loss": 2.7711, + "step": 1077 + }, + { + "epoch": 0.08699862803647809, + "grad_norm": 0.8286700248718262, + "learning_rate": 0.00019881046176476374, + "loss": 2.7995, + "step": 1078 + }, + { + "epoch": 0.0870793317730611, + "grad_norm": 0.9436343908309937, + "learning_rate": 0.00019880803277422281, + "loss": 2.8399, + "step": 1079 + }, + { + "epoch": 0.0871600355096441, + "grad_norm": 0.9592518210411072, + "learning_rate": 0.00019880560132112742, + "loss": 2.7888, + "step": 1080 + }, + { + "epoch": 0.0872407392462271, + "grad_norm": 0.8956589698791504, + "learning_rate": 0.00019880316740553816, + "loss": 2.7635, + "step": 1081 + }, + { + "epoch": 0.08732144298281011, + "grad_norm": 1.055312156677246, + "learning_rate": 0.00019880073102751574, + "loss": 2.7778, + "step": 1082 + }, + { + "epoch": 0.08740214671939311, + "grad_norm": 0.783273458480835, + "learning_rate": 0.00019879829218712075, + "loss": 2.735, + "step": 1083 + }, + { + "epoch": 0.0874828504559761, + "grad_norm": 0.8315421938896179, + "learning_rate": 0.00019879585088441413, + "loss": 2.7973, + "step": 1084 + }, + { + "epoch": 0.08756355419255911, + "grad_norm": 0.9550945162773132, + "learning_rate": 0.00019879340711945662, + "loss": 2.8083, + "step": 1085 + }, + { + "epoch": 0.08764425792914211, + "grad_norm": 0.9579277634620667, + "learning_rate": 0.00019879096089230915, + "loss": 2.7411, + "step": 1086 + }, + { + "epoch": 0.08772496166572512, + "grad_norm": 0.8602219223976135, + "learning_rate": 0.0001987885122030327, + "loss": 2.7461, + "step": 1087 + }, + { + "epoch": 0.08780566540230812, + "grad_norm": 0.9749068021774292, + "learning_rate": 0.00019878606105168829, + "loss": 2.7701, + "step": 1088 + }, + { + "epoch": 0.08788636913889113, + "grad_norm": 0.8128982186317444, + "learning_rate": 0.00019878360743833703, + "loss": 2.7949, + "step": 1089 + }, + { + "epoch": 0.08796707287547413, + "grad_norm": 0.9177080988883972, + "learning_rate": 0.00019878115136304003, + "loss": 2.7471, + "step": 1090 + }, + { + "epoch": 0.08804777661205714, + "grad_norm": 0.9052132368087769, + "learning_rate": 0.0001987786928258585, + "loss": 2.8356, + "step": 1091 + }, + { + "epoch": 0.08812848034864014, + "grad_norm": 0.8972994089126587, + "learning_rate": 0.00019877623182685378, + "loss": 2.8304, + "step": 1092 + }, + { + "epoch": 0.08820918408522314, + "grad_norm": 0.861251950263977, + "learning_rate": 0.0001987737683660871, + "loss": 2.8436, + "step": 1093 + }, + { + "epoch": 0.08828988782180615, + "grad_norm": 0.9139869809150696, + "learning_rate": 0.00019877130244361996, + "loss": 2.7583, + "step": 1094 + }, + { + "epoch": 0.08837059155838915, + "grad_norm": 0.8441170454025269, + "learning_rate": 0.00019876883405951377, + "loss": 2.7508, + "step": 1095 + }, + { + "epoch": 0.08845129529497216, + "grad_norm": 0.8624769449234009, + "learning_rate": 0.00019876636321383004, + "loss": 2.8003, + "step": 1096 + }, + { + "epoch": 0.08853199903155516, + "grad_norm": 0.9033877849578857, + "learning_rate": 0.00019876388990663037, + "loss": 2.7934, + "step": 1097 + }, + { + "epoch": 0.08861270276813817, + "grad_norm": 0.9492632746696472, + "learning_rate": 0.0001987614141379764, + "loss": 2.7852, + "step": 1098 + }, + { + "epoch": 0.08869340650472117, + "grad_norm": 0.9004682302474976, + "learning_rate": 0.00019875893590792982, + "loss": 2.7518, + "step": 1099 + }, + { + "epoch": 0.08877411024130417, + "grad_norm": 0.8352272510528564, + "learning_rate": 0.0001987564552165524, + "loss": 2.8035, + "step": 1100 + }, + { + "epoch": 0.08885481397788718, + "grad_norm": 0.8488562107086182, + "learning_rate": 0.00019875397206390593, + "loss": 2.7672, + "step": 1101 + }, + { + "epoch": 0.08893551771447018, + "grad_norm": 0.9450985193252563, + "learning_rate": 0.00019875148645005238, + "loss": 2.7558, + "step": 1102 + }, + { + "epoch": 0.08901622145105319, + "grad_norm": 0.9203561544418335, + "learning_rate": 0.0001987489983750536, + "loss": 2.7983, + "step": 1103 + }, + { + "epoch": 0.08909692518763619, + "grad_norm": 0.8761897087097168, + "learning_rate": 0.0001987465078389717, + "loss": 2.7536, + "step": 1104 + }, + { + "epoch": 0.0891776289242192, + "grad_norm": 0.9064637422561646, + "learning_rate": 0.00019874401484186867, + "loss": 2.8104, + "step": 1105 + }, + { + "epoch": 0.0892583326608022, + "grad_norm": 0.8394999504089355, + "learning_rate": 0.00019874151938380666, + "loss": 2.7459, + "step": 1106 + }, + { + "epoch": 0.0893390363973852, + "grad_norm": 0.8782099485397339, + "learning_rate": 0.00019873902146484785, + "loss": 2.8675, + "step": 1107 + }, + { + "epoch": 0.08941974013396821, + "grad_norm": 0.8564850091934204, + "learning_rate": 0.00019873652108505458, + "loss": 2.8561, + "step": 1108 + }, + { + "epoch": 0.08950044387055121, + "grad_norm": 0.8343809843063354, + "learning_rate": 0.0001987340182444891, + "loss": 2.8406, + "step": 1109 + }, + { + "epoch": 0.0895811476071342, + "grad_norm": 1.096273422241211, + "learning_rate": 0.00019873151294321376, + "loss": 2.8264, + "step": 1110 + }, + { + "epoch": 0.08966185134371721, + "grad_norm": 0.8654618263244629, + "learning_rate": 0.00019872900518129103, + "loss": 2.7956, + "step": 1111 + }, + { + "epoch": 0.08974255508030021, + "grad_norm": 0.8868138194084167, + "learning_rate": 0.00019872649495878344, + "loss": 2.8028, + "step": 1112 + }, + { + "epoch": 0.08982325881688322, + "grad_norm": 0.8139104843139648, + "learning_rate": 0.00019872398227575348, + "loss": 2.7502, + "step": 1113 + }, + { + "epoch": 0.08990396255346622, + "grad_norm": 0.8277762532234192, + "learning_rate": 0.00019872146713226384, + "loss": 2.7913, + "step": 1114 + }, + { + "epoch": 0.08998466629004923, + "grad_norm": 0.8470397591590881, + "learning_rate": 0.00019871894952837717, + "loss": 2.7982, + "step": 1115 + }, + { + "epoch": 0.09006537002663223, + "grad_norm": 0.8424760103225708, + "learning_rate": 0.00019871642946415625, + "loss": 2.8067, + "step": 1116 + }, + { + "epoch": 0.09014607376321523, + "grad_norm": 0.8253894448280334, + "learning_rate": 0.00019871390693966382, + "loss": 2.8339, + "step": 1117 + }, + { + "epoch": 0.09022677749979824, + "grad_norm": 0.8120691776275635, + "learning_rate": 0.00019871138195496282, + "loss": 2.7938, + "step": 1118 + }, + { + "epoch": 0.09030748123638124, + "grad_norm": 0.920189619064331, + "learning_rate": 0.00019870885451011617, + "loss": 2.8083, + "step": 1119 + }, + { + "epoch": 0.09038818497296425, + "grad_norm": 0.8990969657897949, + "learning_rate": 0.0001987063246051868, + "loss": 2.7481, + "step": 1120 + }, + { + "epoch": 0.09046888870954725, + "grad_norm": 0.8280801773071289, + "learning_rate": 0.0001987037922402378, + "loss": 2.8536, + "step": 1121 + }, + { + "epoch": 0.09054959244613026, + "grad_norm": 0.8510503768920898, + "learning_rate": 0.0001987012574153323, + "loss": 2.758, + "step": 1122 + }, + { + "epoch": 0.09063029618271326, + "grad_norm": 0.9103946685791016, + "learning_rate": 0.00019869872013053344, + "loss": 2.7594, + "step": 1123 + }, + { + "epoch": 0.09071099991929626, + "grad_norm": 0.804916262626648, + "learning_rate": 0.00019869618038590448, + "loss": 2.7489, + "step": 1124 + }, + { + "epoch": 0.09079170365587927, + "grad_norm": 0.7542802095413208, + "learning_rate": 0.00019869363818150867, + "loss": 2.76, + "step": 1125 + }, + { + "epoch": 0.09087240739246227, + "grad_norm": 0.7725108861923218, + "learning_rate": 0.00019869109351740947, + "loss": 2.8124, + "step": 1126 + }, + { + "epoch": 0.09095311112904528, + "grad_norm": 0.8533692955970764, + "learning_rate": 0.0001986885463936702, + "loss": 2.8499, + "step": 1127 + }, + { + "epoch": 0.09103381486562828, + "grad_norm": 0.8351541757583618, + "learning_rate": 0.0001986859968103544, + "loss": 2.8075, + "step": 1128 + }, + { + "epoch": 0.09111451860221129, + "grad_norm": 0.8780044913291931, + "learning_rate": 0.0001986834447675256, + "loss": 2.7587, + "step": 1129 + }, + { + "epoch": 0.09119522233879429, + "grad_norm": 0.9587519764900208, + "learning_rate": 0.00019868089026524736, + "loss": 2.8069, + "step": 1130 + }, + { + "epoch": 0.0912759260753773, + "grad_norm": 0.8285651206970215, + "learning_rate": 0.00019867833330358342, + "loss": 2.8209, + "step": 1131 + }, + { + "epoch": 0.0913566298119603, + "grad_norm": 0.8589211106300354, + "learning_rate": 0.00019867577388259745, + "loss": 2.8144, + "step": 1132 + }, + { + "epoch": 0.0914373335485433, + "grad_norm": 0.8740364909172058, + "learning_rate": 0.00019867321200235324, + "loss": 2.858, + "step": 1133 + }, + { + "epoch": 0.09151803728512631, + "grad_norm": 0.8368108868598938, + "learning_rate": 0.00019867064766291467, + "loss": 2.7997, + "step": 1134 + }, + { + "epoch": 0.0915987410217093, + "grad_norm": 0.8243690133094788, + "learning_rate": 0.00019866808086434564, + "loss": 2.7925, + "step": 1135 + }, + { + "epoch": 0.0916794447582923, + "grad_norm": 0.8296996355056763, + "learning_rate": 0.0001986655116067101, + "loss": 2.7953, + "step": 1136 + }, + { + "epoch": 0.09176014849487531, + "grad_norm": 0.9255942702293396, + "learning_rate": 0.0001986629398900721, + "loss": 2.844, + "step": 1137 + }, + { + "epoch": 0.09184085223145831, + "grad_norm": 0.7498174905776978, + "learning_rate": 0.00019866036571449574, + "loss": 2.7372, + "step": 1138 + }, + { + "epoch": 0.09192155596804132, + "grad_norm": 0.8170139193534851, + "learning_rate": 0.00019865778908004513, + "loss": 2.7656, + "step": 1139 + }, + { + "epoch": 0.09200225970462432, + "grad_norm": 0.8858106732368469, + "learning_rate": 0.00019865520998678458, + "loss": 2.7657, + "step": 1140 + }, + { + "epoch": 0.09208296344120732, + "grad_norm": 0.8789847493171692, + "learning_rate": 0.00019865262843477826, + "loss": 2.8419, + "step": 1141 + }, + { + "epoch": 0.09216366717779033, + "grad_norm": 0.8433314561843872, + "learning_rate": 0.00019865004442409058, + "loss": 2.7981, + "step": 1142 + }, + { + "epoch": 0.09224437091437333, + "grad_norm": 0.8822595477104187, + "learning_rate": 0.0001986474579547859, + "loss": 2.8368, + "step": 1143 + }, + { + "epoch": 0.09232507465095634, + "grad_norm": 0.9067013263702393, + "learning_rate": 0.00019864486902692872, + "loss": 2.7807, + "step": 1144 + }, + { + "epoch": 0.09240577838753934, + "grad_norm": 0.9551558494567871, + "learning_rate": 0.00019864227764058355, + "loss": 2.7617, + "step": 1145 + }, + { + "epoch": 0.09248648212412235, + "grad_norm": 0.8337206244468689, + "learning_rate": 0.00019863968379581494, + "loss": 2.8289, + "step": 1146 + }, + { + "epoch": 0.09256718586070535, + "grad_norm": 0.952702522277832, + "learning_rate": 0.0001986370874926876, + "loss": 2.8508, + "step": 1147 + }, + { + "epoch": 0.09264788959728835, + "grad_norm": 0.8586699366569519, + "learning_rate": 0.00019863448873126615, + "loss": 2.8784, + "step": 1148 + }, + { + "epoch": 0.09272859333387136, + "grad_norm": 0.7625309228897095, + "learning_rate": 0.00019863188751161544, + "loss": 2.7936, + "step": 1149 + }, + { + "epoch": 0.09280929707045436, + "grad_norm": 0.8912700414657593, + "learning_rate": 0.0001986292838338003, + "loss": 2.8745, + "step": 1150 + }, + { + "epoch": 0.09289000080703737, + "grad_norm": 0.8618904948234558, + "learning_rate": 0.00019862667769788553, + "loss": 2.8086, + "step": 1151 + }, + { + "epoch": 0.09297070454362037, + "grad_norm": 1.0013352632522583, + "learning_rate": 0.00019862406910393617, + "loss": 2.8211, + "step": 1152 + }, + { + "epoch": 0.09305140828020338, + "grad_norm": 0.7922475337982178, + "learning_rate": 0.0001986214580520172, + "loss": 2.7668, + "step": 1153 + }, + { + "epoch": 0.09313211201678638, + "grad_norm": 0.9490330815315247, + "learning_rate": 0.00019861884454219365, + "loss": 2.7571, + "step": 1154 + }, + { + "epoch": 0.09321281575336939, + "grad_norm": 0.8780270218849182, + "learning_rate": 0.00019861622857453076, + "loss": 2.7598, + "step": 1155 + }, + { + "epoch": 0.09329351948995239, + "grad_norm": 0.9220066070556641, + "learning_rate": 0.00019861361014909365, + "loss": 2.7609, + "step": 1156 + }, + { + "epoch": 0.0933742232265354, + "grad_norm": 0.8299020528793335, + "learning_rate": 0.0001986109892659476, + "loss": 2.8655, + "step": 1157 + }, + { + "epoch": 0.0934549269631184, + "grad_norm": 0.9700348377227783, + "learning_rate": 0.0001986083659251579, + "loss": 2.8597, + "step": 1158 + }, + { + "epoch": 0.0935356306997014, + "grad_norm": 0.8820784687995911, + "learning_rate": 0.00019860574012679001, + "loss": 2.8776, + "step": 1159 + }, + { + "epoch": 0.0936163344362844, + "grad_norm": 0.8134172558784485, + "learning_rate": 0.0001986031118709093, + "loss": 2.8163, + "step": 1160 + }, + { + "epoch": 0.0936970381728674, + "grad_norm": 0.885974109172821, + "learning_rate": 0.00019860048115758123, + "loss": 2.752, + "step": 1161 + }, + { + "epoch": 0.0937777419094504, + "grad_norm": 0.9650186896324158, + "learning_rate": 0.0001985978479868715, + "loss": 2.7587, + "step": 1162 + }, + { + "epoch": 0.0938584456460334, + "grad_norm": 0.8550445437431335, + "learning_rate": 0.00019859521235884563, + "loss": 2.7887, + "step": 1163 + }, + { + "epoch": 0.09393914938261641, + "grad_norm": 0.9686560034751892, + "learning_rate": 0.00019859257427356933, + "loss": 2.7974, + "step": 1164 + }, + { + "epoch": 0.09401985311919941, + "grad_norm": 0.9185387492179871, + "learning_rate": 0.00019858993373110837, + "loss": 2.7933, + "step": 1165 + }, + { + "epoch": 0.09410055685578242, + "grad_norm": 0.9549610018730164, + "learning_rate": 0.00019858729073152852, + "loss": 2.7698, + "step": 1166 + }, + { + "epoch": 0.09418126059236542, + "grad_norm": 1.0523492097854614, + "learning_rate": 0.0001985846452748957, + "loss": 2.7215, + "step": 1167 + }, + { + "epoch": 0.09426196432894843, + "grad_norm": 0.8551118969917297, + "learning_rate": 0.00019858199736127582, + "loss": 2.805, + "step": 1168 + }, + { + "epoch": 0.09434266806553143, + "grad_norm": 1.021374225616455, + "learning_rate": 0.0001985793469907349, + "loss": 2.794, + "step": 1169 + }, + { + "epoch": 0.09442337180211444, + "grad_norm": 0.8745501041412354, + "learning_rate": 0.0001985766941633389, + "loss": 2.7793, + "step": 1170 + }, + { + "epoch": 0.09450407553869744, + "grad_norm": 0.7426434755325317, + "learning_rate": 0.00019857403887915402, + "loss": 2.7808, + "step": 1171 + }, + { + "epoch": 0.09458477927528045, + "grad_norm": 0.9183726906776428, + "learning_rate": 0.0001985713811382464, + "loss": 2.8001, + "step": 1172 + }, + { + "epoch": 0.09466548301186345, + "grad_norm": 0.8136709928512573, + "learning_rate": 0.00019856872094068233, + "loss": 2.7394, + "step": 1173 + }, + { + "epoch": 0.09474618674844645, + "grad_norm": 0.9399348497390747, + "learning_rate": 0.00019856605828652807, + "loss": 2.7733, + "step": 1174 + }, + { + "epoch": 0.09482689048502946, + "grad_norm": 0.8233176469802856, + "learning_rate": 0.00019856339317584997, + "loss": 2.7672, + "step": 1175 + }, + { + "epoch": 0.09490759422161246, + "grad_norm": 0.9157048463821411, + "learning_rate": 0.00019856072560871447, + "loss": 2.7992, + "step": 1176 + }, + { + "epoch": 0.09498829795819547, + "grad_norm": 0.8729545474052429, + "learning_rate": 0.00019855805558518803, + "loss": 2.749, + "step": 1177 + }, + { + "epoch": 0.09506900169477847, + "grad_norm": 0.8592300415039062, + "learning_rate": 0.00019855538310533722, + "loss": 2.7257, + "step": 1178 + }, + { + "epoch": 0.09514970543136148, + "grad_norm": 0.8470803499221802, + "learning_rate": 0.00019855270816922867, + "loss": 2.7479, + "step": 1179 + }, + { + "epoch": 0.09523040916794448, + "grad_norm": 0.8538667559623718, + "learning_rate": 0.00019855003077692897, + "loss": 2.7576, + "step": 1180 + }, + { + "epoch": 0.09531111290452748, + "grad_norm": 0.8890984654426575, + "learning_rate": 0.0001985473509285049, + "loss": 2.7961, + "step": 1181 + }, + { + "epoch": 0.09539181664111049, + "grad_norm": 0.7769411206245422, + "learning_rate": 0.00019854466862402324, + "loss": 2.8087, + "step": 1182 + }, + { + "epoch": 0.09547252037769349, + "grad_norm": 0.8892520666122437, + "learning_rate": 0.00019854198386355085, + "loss": 2.7935, + "step": 1183 + }, + { + "epoch": 0.0955532241142765, + "grad_norm": 0.8675585389137268, + "learning_rate": 0.00019853929664715464, + "loss": 2.833, + "step": 1184 + }, + { + "epoch": 0.0956339278508595, + "grad_norm": 0.8053853511810303, + "learning_rate": 0.00019853660697490154, + "loss": 2.8002, + "step": 1185 + }, + { + "epoch": 0.09571463158744249, + "grad_norm": 0.9237198829650879, + "learning_rate": 0.00019853391484685865, + "loss": 2.8281, + "step": 1186 + }, + { + "epoch": 0.0957953353240255, + "grad_norm": 0.8432926535606384, + "learning_rate": 0.000198531220263093, + "loss": 2.8131, + "step": 1187 + }, + { + "epoch": 0.0958760390606085, + "grad_norm": 0.796380341053009, + "learning_rate": 0.0001985285232236718, + "loss": 2.753, + "step": 1188 + }, + { + "epoch": 0.0959567427971915, + "grad_norm": 0.9183037281036377, + "learning_rate": 0.00019852582372866225, + "loss": 2.7625, + "step": 1189 + }, + { + "epoch": 0.09603744653377451, + "grad_norm": 0.8194435238838196, + "learning_rate": 0.0001985231217781316, + "loss": 2.7906, + "step": 1190 + }, + { + "epoch": 0.09611815027035751, + "grad_norm": 0.8430871367454529, + "learning_rate": 0.00019852041737214725, + "loss": 2.8457, + "step": 1191 + }, + { + "epoch": 0.09619885400694052, + "grad_norm": 1.0237345695495605, + "learning_rate": 0.0001985177105107765, + "loss": 2.789, + "step": 1192 + }, + { + "epoch": 0.09627955774352352, + "grad_norm": 0.8721581101417542, + "learning_rate": 0.00019851500119408692, + "loss": 2.7187, + "step": 1193 + }, + { + "epoch": 0.09636026148010653, + "grad_norm": 0.8089142441749573, + "learning_rate": 0.00019851228942214603, + "loss": 2.7544, + "step": 1194 + }, + { + "epoch": 0.09644096521668953, + "grad_norm": 1.1076842546463013, + "learning_rate": 0.0001985095751950213, + "loss": 2.7859, + "step": 1195 + }, + { + "epoch": 0.09652166895327254, + "grad_norm": 0.84585040807724, + "learning_rate": 0.0001985068585127805, + "loss": 2.8005, + "step": 1196 + }, + { + "epoch": 0.09660237268985554, + "grad_norm": 0.8231167197227478, + "learning_rate": 0.00019850413937549127, + "loss": 2.8561, + "step": 1197 + }, + { + "epoch": 0.09668307642643854, + "grad_norm": 1.0028103590011597, + "learning_rate": 0.00019850141778322136, + "loss": 2.8049, + "step": 1198 + }, + { + "epoch": 0.09676378016302155, + "grad_norm": 0.8575148582458496, + "learning_rate": 0.0001984986937360387, + "loss": 2.7723, + "step": 1199 + }, + { + "epoch": 0.09684448389960455, + "grad_norm": 0.8567116260528564, + "learning_rate": 0.00019849596723401107, + "loss": 2.7418, + "step": 1200 + }, + { + "epoch": 0.09692518763618756, + "grad_norm": 1.1159218549728394, + "learning_rate": 0.00019849323827720645, + "loss": 2.8352, + "step": 1201 + }, + { + "epoch": 0.09700589137277056, + "grad_norm": 0.849656879901886, + "learning_rate": 0.0001984905068656929, + "loss": 2.7875, + "step": 1202 + }, + { + "epoch": 0.09708659510935357, + "grad_norm": 0.8479150533676147, + "learning_rate": 0.00019848777299953847, + "loss": 2.7828, + "step": 1203 + }, + { + "epoch": 0.09716729884593657, + "grad_norm": 0.9143954515457153, + "learning_rate": 0.00019848503667881125, + "loss": 2.7978, + "step": 1204 + }, + { + "epoch": 0.09724800258251957, + "grad_norm": 0.8162297010421753, + "learning_rate": 0.0001984822979035795, + "loss": 2.7621, + "step": 1205 + }, + { + "epoch": 0.09732870631910258, + "grad_norm": 0.8625509142875671, + "learning_rate": 0.00019847955667391144, + "loss": 2.7484, + "step": 1206 + }, + { + "epoch": 0.09740941005568558, + "grad_norm": 0.8485168218612671, + "learning_rate": 0.00019847681298987543, + "loss": 2.7599, + "step": 1207 + }, + { + "epoch": 0.09749011379226859, + "grad_norm": 0.8962678909301758, + "learning_rate": 0.00019847406685153976, + "loss": 2.7753, + "step": 1208 + }, + { + "epoch": 0.09757081752885159, + "grad_norm": 0.8890791535377502, + "learning_rate": 0.00019847131825897297, + "loss": 2.7635, + "step": 1209 + }, + { + "epoch": 0.0976515212654346, + "grad_norm": 0.8461710810661316, + "learning_rate": 0.00019846856721224355, + "loss": 2.796, + "step": 1210 + }, + { + "epoch": 0.0977322250020176, + "grad_norm": 0.912738025188446, + "learning_rate": 0.00019846581371141996, + "loss": 2.7889, + "step": 1211 + }, + { + "epoch": 0.09781292873860059, + "grad_norm": 0.8530749082565308, + "learning_rate": 0.00019846305775657097, + "loss": 2.8298, + "step": 1212 + }, + { + "epoch": 0.0978936324751836, + "grad_norm": 0.8890148401260376, + "learning_rate": 0.00019846029934776516, + "loss": 2.7491, + "step": 1213 + }, + { + "epoch": 0.0979743362117666, + "grad_norm": 0.8936887979507446, + "learning_rate": 0.0001984575384850713, + "loss": 2.7759, + "step": 1214 + }, + { + "epoch": 0.0980550399483496, + "grad_norm": 0.7811321020126343, + "learning_rate": 0.00019845477516855823, + "loss": 2.8126, + "step": 1215 + }, + { + "epoch": 0.09813574368493261, + "grad_norm": 0.8751768469810486, + "learning_rate": 0.00019845200939829484, + "loss": 2.792, + "step": 1216 + }, + { + "epoch": 0.09821644742151561, + "grad_norm": 0.8749501705169678, + "learning_rate": 0.00019844924117434998, + "loss": 2.7818, + "step": 1217 + }, + { + "epoch": 0.09829715115809862, + "grad_norm": 0.8130955100059509, + "learning_rate": 0.0001984464704967927, + "loss": 2.8581, + "step": 1218 + }, + { + "epoch": 0.09837785489468162, + "grad_norm": 0.8158220648765564, + "learning_rate": 0.00019844369736569196, + "loss": 2.7704, + "step": 1219 + }, + { + "epoch": 0.09845855863126463, + "grad_norm": 0.9351849555969238, + "learning_rate": 0.00019844092178111702, + "loss": 2.7857, + "step": 1220 + }, + { + "epoch": 0.09853926236784763, + "grad_norm": 0.8373914957046509, + "learning_rate": 0.00019843814374313697, + "loss": 2.8217, + "step": 1221 + }, + { + "epoch": 0.09861996610443063, + "grad_norm": 0.8919960856437683, + "learning_rate": 0.00019843536325182104, + "loss": 2.7914, + "step": 1222 + }, + { + "epoch": 0.09870066984101364, + "grad_norm": 0.9994316697120667, + "learning_rate": 0.00019843258030723858, + "loss": 2.7981, + "step": 1223 + }, + { + "epoch": 0.09878137357759664, + "grad_norm": 0.8144915699958801, + "learning_rate": 0.0001984297949094589, + "loss": 2.811, + "step": 1224 + }, + { + "epoch": 0.09886207731417965, + "grad_norm": 0.8957876563072205, + "learning_rate": 0.0001984270070585514, + "loss": 2.7752, + "step": 1225 + }, + { + "epoch": 0.09894278105076265, + "grad_norm": 0.9426520466804504, + "learning_rate": 0.0001984242167545856, + "loss": 2.8139, + "step": 1226 + }, + { + "epoch": 0.09902348478734566, + "grad_norm": 0.888769268989563, + "learning_rate": 0.00019842142399763106, + "loss": 2.8305, + "step": 1227 + }, + { + "epoch": 0.09910418852392866, + "grad_norm": 0.9497748613357544, + "learning_rate": 0.00019841862878775736, + "loss": 2.748, + "step": 1228 + }, + { + "epoch": 0.09918489226051166, + "grad_norm": 0.8715065717697144, + "learning_rate": 0.00019841583112503416, + "loss": 2.7794, + "step": 1229 + }, + { + "epoch": 0.09926559599709467, + "grad_norm": 0.875599205493927, + "learning_rate": 0.00019841303100953116, + "loss": 2.8016, + "step": 1230 + }, + { + "epoch": 0.09934629973367767, + "grad_norm": 0.8631919622421265, + "learning_rate": 0.0001984102284413182, + "loss": 2.8239, + "step": 1231 + }, + { + "epoch": 0.09942700347026068, + "grad_norm": 0.9028074741363525, + "learning_rate": 0.0001984074234204651, + "loss": 2.8372, + "step": 1232 + }, + { + "epoch": 0.09950770720684368, + "grad_norm": 0.890933096408844, + "learning_rate": 0.00019840461594704175, + "loss": 2.799, + "step": 1233 + }, + { + "epoch": 0.09958841094342669, + "grad_norm": 0.9626480340957642, + "learning_rate": 0.00019840180602111816, + "loss": 2.8207, + "step": 1234 + }, + { + "epoch": 0.09966911468000969, + "grad_norm": 0.798394501209259, + "learning_rate": 0.00019839899364276433, + "loss": 2.7784, + "step": 1235 + }, + { + "epoch": 0.0997498184165927, + "grad_norm": 0.8246447443962097, + "learning_rate": 0.00019839617881205036, + "loss": 2.8193, + "step": 1236 + }, + { + "epoch": 0.09983052215317569, + "grad_norm": 0.8315989375114441, + "learning_rate": 0.0001983933615290464, + "loss": 2.8036, + "step": 1237 + }, + { + "epoch": 0.09991122588975869, + "grad_norm": 0.8889075517654419, + "learning_rate": 0.00019839054179382267, + "loss": 2.7606, + "step": 1238 + }, + { + "epoch": 0.0999919296263417, + "grad_norm": 0.7558645009994507, + "learning_rate": 0.00019838771960644942, + "loss": 2.7666, + "step": 1239 + }, + { + "epoch": 0.1000726333629247, + "grad_norm": 0.8876601457595825, + "learning_rate": 0.00019838489496699704, + "loss": 2.8778, + "step": 1240 + }, + { + "epoch": 0.1001533370995077, + "grad_norm": 0.8609516620635986, + "learning_rate": 0.00019838206787553588, + "loss": 2.8189, + "step": 1241 + }, + { + "epoch": 0.10023404083609071, + "grad_norm": 0.8521148562431335, + "learning_rate": 0.00019837923833213644, + "loss": 2.8159, + "step": 1242 + }, + { + "epoch": 0.10031474457267371, + "grad_norm": 0.9155359268188477, + "learning_rate": 0.0001983764063368692, + "loss": 2.8351, + "step": 1243 + }, + { + "epoch": 0.10039544830925672, + "grad_norm": 0.8595378398895264, + "learning_rate": 0.00019837357188980475, + "loss": 2.8447, + "step": 1244 + }, + { + "epoch": 0.10047615204583972, + "grad_norm": 0.900244951248169, + "learning_rate": 0.00019837073499101373, + "loss": 2.8646, + "step": 1245 + }, + { + "epoch": 0.10055685578242272, + "grad_norm": 0.8404260277748108, + "learning_rate": 0.00019836789564056689, + "loss": 2.7824, + "step": 1246 + }, + { + "epoch": 0.10063755951900573, + "grad_norm": 0.8776196241378784, + "learning_rate": 0.0001983650538385349, + "loss": 2.8045, + "step": 1247 + }, + { + "epoch": 0.10071826325558873, + "grad_norm": 0.8889327049255371, + "learning_rate": 0.00019836220958498868, + "loss": 2.7967, + "step": 1248 + }, + { + "epoch": 0.10079896699217174, + "grad_norm": 0.8905191421508789, + "learning_rate": 0.00019835936287999906, + "loss": 2.8167, + "step": 1249 + }, + { + "epoch": 0.10087967072875474, + "grad_norm": 0.839970052242279, + "learning_rate": 0.000198356513723637, + "loss": 2.8643, + "step": 1250 + }, + { + "epoch": 0.10096037446533775, + "grad_norm": 0.7989531755447388, + "learning_rate": 0.00019835366211597353, + "loss": 2.8493, + "step": 1251 + }, + { + "epoch": 0.10104107820192075, + "grad_norm": 0.7960095405578613, + "learning_rate": 0.0001983508080570797, + "loss": 2.7377, + "step": 1252 + }, + { + "epoch": 0.10112178193850375, + "grad_norm": 0.7989903092384338, + "learning_rate": 0.00019834795154702661, + "loss": 2.7409, + "step": 1253 + }, + { + "epoch": 0.10120248567508676, + "grad_norm": 0.8557813167572021, + "learning_rate": 0.0001983450925858855, + "loss": 2.7945, + "step": 1254 + }, + { + "epoch": 0.10128318941166976, + "grad_norm": 0.948357880115509, + "learning_rate": 0.0001983422311737276, + "loss": 2.826, + "step": 1255 + }, + { + "epoch": 0.10136389314825277, + "grad_norm": 0.8356020450592041, + "learning_rate": 0.00019833936731062423, + "loss": 2.8157, + "step": 1256 + }, + { + "epoch": 0.10144459688483577, + "grad_norm": 0.8199872970581055, + "learning_rate": 0.00019833650099664678, + "loss": 2.7273, + "step": 1257 + }, + { + "epoch": 0.10152530062141878, + "grad_norm": 0.8178466558456421, + "learning_rate": 0.00019833363223186669, + "loss": 2.7513, + "step": 1258 + }, + { + "epoch": 0.10160600435800178, + "grad_norm": 0.8165889978408813, + "learning_rate": 0.00019833076101635538, + "loss": 2.7689, + "step": 1259 + }, + { + "epoch": 0.10168670809458479, + "grad_norm": 0.8240275979042053, + "learning_rate": 0.0001983278873501845, + "loss": 2.7477, + "step": 1260 + }, + { + "epoch": 0.10176741183116779, + "grad_norm": 0.8470584750175476, + "learning_rate": 0.00019832501123342563, + "loss": 2.7414, + "step": 1261 + }, + { + "epoch": 0.1018481155677508, + "grad_norm": 0.819063663482666, + "learning_rate": 0.00019832213266615046, + "loss": 2.7335, + "step": 1262 + }, + { + "epoch": 0.10192881930433378, + "grad_norm": 0.8045673370361328, + "learning_rate": 0.00019831925164843071, + "loss": 2.8141, + "step": 1263 + }, + { + "epoch": 0.10200952304091679, + "grad_norm": 0.7827214598655701, + "learning_rate": 0.00019831636818033824, + "loss": 2.7549, + "step": 1264 + }, + { + "epoch": 0.10209022677749979, + "grad_norm": 0.9596436619758606, + "learning_rate": 0.00019831348226194485, + "loss": 2.7327, + "step": 1265 + }, + { + "epoch": 0.1021709305140828, + "grad_norm": 0.826909601688385, + "learning_rate": 0.0001983105938933225, + "loss": 2.7166, + "step": 1266 + }, + { + "epoch": 0.1022516342506658, + "grad_norm": 0.8060985207557678, + "learning_rate": 0.00019830770307454313, + "loss": 2.7514, + "step": 1267 + }, + { + "epoch": 0.1023323379872488, + "grad_norm": 0.8257390856742859, + "learning_rate": 0.00019830480980567887, + "loss": 2.77, + "step": 1268 + }, + { + "epoch": 0.10241304172383181, + "grad_norm": 0.844406008720398, + "learning_rate": 0.00019830191408680173, + "loss": 2.8548, + "step": 1269 + }, + { + "epoch": 0.10249374546041481, + "grad_norm": 0.84171462059021, + "learning_rate": 0.00019829901591798398, + "loss": 2.7404, + "step": 1270 + }, + { + "epoch": 0.10257444919699782, + "grad_norm": 0.8084118962287903, + "learning_rate": 0.00019829611529929774, + "loss": 2.8078, + "step": 1271 + }, + { + "epoch": 0.10265515293358082, + "grad_norm": 0.8273561000823975, + "learning_rate": 0.00019829321223081538, + "loss": 2.787, + "step": 1272 + }, + { + "epoch": 0.10273585667016383, + "grad_norm": 0.799098551273346, + "learning_rate": 0.00019829030671260925, + "loss": 2.7563, + "step": 1273 + }, + { + "epoch": 0.10281656040674683, + "grad_norm": 0.885866105556488, + "learning_rate": 0.00019828739874475172, + "loss": 2.7313, + "step": 1274 + }, + { + "epoch": 0.10289726414332984, + "grad_norm": 0.7702760696411133, + "learning_rate": 0.00019828448832731529, + "loss": 2.7919, + "step": 1275 + }, + { + "epoch": 0.10297796787991284, + "grad_norm": 0.7577444911003113, + "learning_rate": 0.0001982815754603725, + "loss": 2.7149, + "step": 1276 + }, + { + "epoch": 0.10305867161649584, + "grad_norm": 0.8439713716506958, + "learning_rate": 0.00019827866014399592, + "loss": 2.7881, + "step": 1277 + }, + { + "epoch": 0.10313937535307885, + "grad_norm": 0.8504937291145325, + "learning_rate": 0.00019827574237825827, + "loss": 2.7611, + "step": 1278 + }, + { + "epoch": 0.10322007908966185, + "grad_norm": 0.7775665521621704, + "learning_rate": 0.00019827282216323218, + "loss": 2.7312, + "step": 1279 + }, + { + "epoch": 0.10330078282624486, + "grad_norm": 0.8671591281890869, + "learning_rate": 0.00019826989949899048, + "loss": 2.836, + "step": 1280 + }, + { + "epoch": 0.10338148656282786, + "grad_norm": 0.9308713674545288, + "learning_rate": 0.00019826697438560603, + "loss": 2.7494, + "step": 1281 + }, + { + "epoch": 0.10346219029941087, + "grad_norm": 0.9145268797874451, + "learning_rate": 0.0001982640468231517, + "loss": 2.8054, + "step": 1282 + }, + { + "epoch": 0.10354289403599387, + "grad_norm": 0.8150805234909058, + "learning_rate": 0.00019826111681170043, + "loss": 2.7879, + "step": 1283 + }, + { + "epoch": 0.10362359777257688, + "grad_norm": 0.8576685786247253, + "learning_rate": 0.00019825818435132531, + "loss": 2.8184, + "step": 1284 + }, + { + "epoch": 0.10370430150915988, + "grad_norm": 0.8838599920272827, + "learning_rate": 0.00019825524944209937, + "loss": 2.7838, + "step": 1285 + }, + { + "epoch": 0.10378500524574288, + "grad_norm": 0.9119304418563843, + "learning_rate": 0.00019825231208409576, + "loss": 2.8392, + "step": 1286 + }, + { + "epoch": 0.10386570898232589, + "grad_norm": 0.8112398982048035, + "learning_rate": 0.00019824937227738771, + "loss": 2.7844, + "step": 1287 + }, + { + "epoch": 0.10394641271890888, + "grad_norm": 0.8714308738708496, + "learning_rate": 0.00019824643002204847, + "loss": 2.7765, + "step": 1288 + }, + { + "epoch": 0.10402711645549188, + "grad_norm": 0.8733358979225159, + "learning_rate": 0.00019824348531815138, + "loss": 2.771, + "step": 1289 + }, + { + "epoch": 0.10410782019207489, + "grad_norm": 0.8218281269073486, + "learning_rate": 0.00019824053816576981, + "loss": 2.8099, + "step": 1290 + }, + { + "epoch": 0.10418852392865789, + "grad_norm": 0.8647308945655823, + "learning_rate": 0.00019823758856497725, + "loss": 2.7738, + "step": 1291 + }, + { + "epoch": 0.1042692276652409, + "grad_norm": 0.8358582854270935, + "learning_rate": 0.00019823463651584718, + "loss": 2.8021, + "step": 1292 + }, + { + "epoch": 0.1043499314018239, + "grad_norm": 0.7943673133850098, + "learning_rate": 0.00019823168201845318, + "loss": 2.8293, + "step": 1293 + }, + { + "epoch": 0.1044306351384069, + "grad_norm": 0.8501425981521606, + "learning_rate": 0.0001982287250728689, + "loss": 2.7701, + "step": 1294 + }, + { + "epoch": 0.10451133887498991, + "grad_norm": 0.8503665328025818, + "learning_rate": 0.00019822576567916797, + "loss": 2.7881, + "step": 1295 + }, + { + "epoch": 0.10459204261157291, + "grad_norm": 0.9687628149986267, + "learning_rate": 0.0001982228038374242, + "loss": 2.7623, + "step": 1296 + }, + { + "epoch": 0.10467274634815592, + "grad_norm": 0.8034376502037048, + "learning_rate": 0.00019821983954771146, + "loss": 2.8072, + "step": 1297 + }, + { + "epoch": 0.10475345008473892, + "grad_norm": 0.817135214805603, + "learning_rate": 0.00019821687281010352, + "loss": 2.7572, + "step": 1298 + }, + { + "epoch": 0.10483415382132193, + "grad_norm": 0.7961457371711731, + "learning_rate": 0.0001982139036246744, + "loss": 2.8405, + "step": 1299 + }, + { + "epoch": 0.10491485755790493, + "grad_norm": 0.7572407722473145, + "learning_rate": 0.00019821093199149804, + "loss": 2.7495, + "step": 1300 + }, + { + "epoch": 0.10499556129448794, + "grad_norm": 0.7990664839744568, + "learning_rate": 0.00019820795791064856, + "loss": 2.7567, + "step": 1301 + }, + { + "epoch": 0.10507626503107094, + "grad_norm": 0.8197236061096191, + "learning_rate": 0.0001982049813822, + "loss": 2.7807, + "step": 1302 + }, + { + "epoch": 0.10515696876765394, + "grad_norm": 0.9491304159164429, + "learning_rate": 0.00019820200240622664, + "loss": 2.8531, + "step": 1303 + }, + { + "epoch": 0.10523767250423695, + "grad_norm": 0.8143845200538635, + "learning_rate": 0.00019819902098280268, + "loss": 2.7542, + "step": 1304 + }, + { + "epoch": 0.10531837624081995, + "grad_norm": 0.9055941104888916, + "learning_rate": 0.0001981960371120024, + "loss": 2.863, + "step": 1305 + }, + { + "epoch": 0.10539907997740296, + "grad_norm": 0.7804721593856812, + "learning_rate": 0.0001981930507939002, + "loss": 2.8213, + "step": 1306 + }, + { + "epoch": 0.10547978371398596, + "grad_norm": 0.8375318050384521, + "learning_rate": 0.00019819006202857046, + "loss": 2.8222, + "step": 1307 + }, + { + "epoch": 0.10556048745056897, + "grad_norm": 0.9145569801330566, + "learning_rate": 0.00019818707081608773, + "loss": 2.805, + "step": 1308 + }, + { + "epoch": 0.10564119118715197, + "grad_norm": 0.7899324893951416, + "learning_rate": 0.00019818407715652654, + "loss": 2.8246, + "step": 1309 + }, + { + "epoch": 0.10572189492373497, + "grad_norm": 0.7843480110168457, + "learning_rate": 0.0001981810810499615, + "loss": 2.7909, + "step": 1310 + }, + { + "epoch": 0.10580259866031798, + "grad_norm": 0.8071008920669556, + "learning_rate": 0.00019817808249646723, + "loss": 2.7434, + "step": 1311 + }, + { + "epoch": 0.10588330239690098, + "grad_norm": 0.8682011961936951, + "learning_rate": 0.0001981750814961185, + "loss": 2.8387, + "step": 1312 + }, + { + "epoch": 0.10596400613348399, + "grad_norm": 0.7501091361045837, + "learning_rate": 0.0001981720780489902, + "loss": 2.7633, + "step": 1313 + }, + { + "epoch": 0.10604470987006698, + "grad_norm": 0.9259567856788635, + "learning_rate": 0.000198169072155157, + "loss": 2.8309, + "step": 1314 + }, + { + "epoch": 0.10612541360664998, + "grad_norm": 0.8018674254417419, + "learning_rate": 0.00019816606381469393, + "loss": 2.8647, + "step": 1315 + }, + { + "epoch": 0.10620611734323299, + "grad_norm": 0.8218088746070862, + "learning_rate": 0.00019816305302767595, + "loss": 2.823, + "step": 1316 + }, + { + "epoch": 0.10628682107981599, + "grad_norm": 0.812125027179718, + "learning_rate": 0.00019816003979417808, + "loss": 2.7216, + "step": 1317 + }, + { + "epoch": 0.106367524816399, + "grad_norm": 0.787407636642456, + "learning_rate": 0.0001981570241142754, + "loss": 2.7639, + "step": 1318 + }, + { + "epoch": 0.106448228552982, + "grad_norm": 0.7982528805732727, + "learning_rate": 0.00019815400598804312, + "loss": 2.8597, + "step": 1319 + }, + { + "epoch": 0.106528932289565, + "grad_norm": 0.8490404486656189, + "learning_rate": 0.00019815098541555646, + "loss": 2.7947, + "step": 1320 + }, + { + "epoch": 0.10660963602614801, + "grad_norm": 0.8743172883987427, + "learning_rate": 0.00019814796239689064, + "loss": 2.8674, + "step": 1321 + }, + { + "epoch": 0.10669033976273101, + "grad_norm": 0.8338125348091125, + "learning_rate": 0.00019814493693212106, + "loss": 2.781, + "step": 1322 + }, + { + "epoch": 0.10677104349931402, + "grad_norm": 0.871516764163971, + "learning_rate": 0.00019814190902132307, + "loss": 2.8742, + "step": 1323 + }, + { + "epoch": 0.10685174723589702, + "grad_norm": 0.8935555815696716, + "learning_rate": 0.00019813887866457216, + "loss": 2.7991, + "step": 1324 + }, + { + "epoch": 0.10693245097248003, + "grad_norm": 0.840067446231842, + "learning_rate": 0.00019813584586194388, + "loss": 2.7922, + "step": 1325 + }, + { + "epoch": 0.10701315470906303, + "grad_norm": 0.7919262647628784, + "learning_rate": 0.0001981328106135138, + "loss": 2.7912, + "step": 1326 + }, + { + "epoch": 0.10709385844564603, + "grad_norm": 0.7974550127983093, + "learning_rate": 0.00019812977291935752, + "loss": 2.8497, + "step": 1327 + }, + { + "epoch": 0.10717456218222904, + "grad_norm": 0.9126157164573669, + "learning_rate": 0.00019812673277955082, + "loss": 2.7698, + "step": 1328 + }, + { + "epoch": 0.10725526591881204, + "grad_norm": 0.8329752683639526, + "learning_rate": 0.0001981236901941694, + "loss": 2.8366, + "step": 1329 + }, + { + "epoch": 0.10733596965539505, + "grad_norm": 0.8313524127006531, + "learning_rate": 0.00019812064516328915, + "loss": 2.6863, + "step": 1330 + }, + { + "epoch": 0.10741667339197805, + "grad_norm": 0.8917783498764038, + "learning_rate": 0.0001981175976869859, + "loss": 2.7817, + "step": 1331 + }, + { + "epoch": 0.10749737712856106, + "grad_norm": 0.8370450735092163, + "learning_rate": 0.00019811454776533566, + "loss": 2.837, + "step": 1332 + }, + { + "epoch": 0.10757808086514406, + "grad_norm": 0.8415676355361938, + "learning_rate": 0.00019811149539841443, + "loss": 2.7399, + "step": 1333 + }, + { + "epoch": 0.10765878460172706, + "grad_norm": 0.8576632142066956, + "learning_rate": 0.00019810844058629825, + "loss": 2.7747, + "step": 1334 + }, + { + "epoch": 0.10773948833831007, + "grad_norm": 0.8943549394607544, + "learning_rate": 0.00019810538332906328, + "loss": 2.7368, + "step": 1335 + }, + { + "epoch": 0.10782019207489307, + "grad_norm": 0.8878718018531799, + "learning_rate": 0.00019810232362678568, + "loss": 2.7907, + "step": 1336 + }, + { + "epoch": 0.10790089581147608, + "grad_norm": 0.8131409287452698, + "learning_rate": 0.00019809926147954174, + "loss": 2.7782, + "step": 1337 + }, + { + "epoch": 0.10798159954805908, + "grad_norm": 0.8733747005462646, + "learning_rate": 0.0001980961968874078, + "loss": 2.8552, + "step": 1338 + }, + { + "epoch": 0.10806230328464207, + "grad_norm": 0.8997320532798767, + "learning_rate": 0.0001980931298504602, + "loss": 2.8452, + "step": 1339 + }, + { + "epoch": 0.10814300702122508, + "grad_norm": 0.8400282263755798, + "learning_rate": 0.00019809006036877538, + "loss": 2.786, + "step": 1340 + }, + { + "epoch": 0.10822371075780808, + "grad_norm": 0.8173925280570984, + "learning_rate": 0.00019808698844242983, + "loss": 2.8363, + "step": 1341 + }, + { + "epoch": 0.10830441449439109, + "grad_norm": 0.872278094291687, + "learning_rate": 0.00019808391407150015, + "loss": 2.7789, + "step": 1342 + }, + { + "epoch": 0.10838511823097409, + "grad_norm": 0.8939952254295349, + "learning_rate": 0.00019808083725606293, + "loss": 2.7453, + "step": 1343 + }, + { + "epoch": 0.1084658219675571, + "grad_norm": 0.8351218104362488, + "learning_rate": 0.00019807775799619484, + "loss": 2.8004, + "step": 1344 + }, + { + "epoch": 0.1085465257041401, + "grad_norm": 0.8381102681159973, + "learning_rate": 0.00019807467629197266, + "loss": 2.8155, + "step": 1345 + }, + { + "epoch": 0.1086272294407231, + "grad_norm": 0.869458019733429, + "learning_rate": 0.00019807159214347317, + "loss": 2.8219, + "step": 1346 + }, + { + "epoch": 0.10870793317730611, + "grad_norm": 0.8251017928123474, + "learning_rate": 0.00019806850555077326, + "loss": 2.7978, + "step": 1347 + }, + { + "epoch": 0.10878863691388911, + "grad_norm": 0.8056492209434509, + "learning_rate": 0.0001980654165139498, + "loss": 2.7994, + "step": 1348 + }, + { + "epoch": 0.10886934065047212, + "grad_norm": 0.9566174745559692, + "learning_rate": 0.00019806232503307984, + "loss": 2.794, + "step": 1349 + }, + { + "epoch": 0.10895004438705512, + "grad_norm": 0.7891408801078796, + "learning_rate": 0.0001980592311082404, + "loss": 2.7134, + "step": 1350 + }, + { + "epoch": 0.10903074812363812, + "grad_norm": 0.8894741535186768, + "learning_rate": 0.00019805613473950862, + "loss": 2.7829, + "step": 1351 + }, + { + "epoch": 0.10911145186022113, + "grad_norm": 0.893086850643158, + "learning_rate": 0.0001980530359269616, + "loss": 2.7475, + "step": 1352 + }, + { + "epoch": 0.10919215559680413, + "grad_norm": 0.8758537173271179, + "learning_rate": 0.00019804993467067666, + "loss": 2.8715, + "step": 1353 + }, + { + "epoch": 0.10927285933338714, + "grad_norm": 0.9304648041725159, + "learning_rate": 0.00019804683097073098, + "loss": 2.8051, + "step": 1354 + }, + { + "epoch": 0.10935356306997014, + "grad_norm": 0.8465876579284668, + "learning_rate": 0.00019804372482720202, + "loss": 2.7879, + "step": 1355 + }, + { + "epoch": 0.10943426680655315, + "grad_norm": 0.8485612273216248, + "learning_rate": 0.00019804061624016713, + "loss": 2.7783, + "step": 1356 + }, + { + "epoch": 0.10951497054313615, + "grad_norm": 0.835630476474762, + "learning_rate": 0.0001980375052097038, + "loss": 2.8116, + "step": 1357 + }, + { + "epoch": 0.10959567427971915, + "grad_norm": 0.8404836058616638, + "learning_rate": 0.00019803439173588956, + "loss": 2.8257, + "step": 1358 + }, + { + "epoch": 0.10967637801630216, + "grad_norm": 0.8048505783081055, + "learning_rate": 0.00019803127581880206, + "loss": 2.7762, + "step": 1359 + }, + { + "epoch": 0.10975708175288516, + "grad_norm": 0.8481776118278503, + "learning_rate": 0.00019802815745851885, + "loss": 2.8243, + "step": 1360 + }, + { + "epoch": 0.10983778548946817, + "grad_norm": 0.8565996885299683, + "learning_rate": 0.00019802503665511775, + "loss": 2.7958, + "step": 1361 + }, + { + "epoch": 0.10991848922605117, + "grad_norm": 0.8867515921592712, + "learning_rate": 0.0001980219134086765, + "loss": 2.7973, + "step": 1362 + }, + { + "epoch": 0.10999919296263418, + "grad_norm": 0.8459765911102295, + "learning_rate": 0.0001980187877192729, + "loss": 2.848, + "step": 1363 + }, + { + "epoch": 0.11007989669921718, + "grad_norm": 0.7929832339286804, + "learning_rate": 0.0001980156595869849, + "loss": 2.8583, + "step": 1364 + }, + { + "epoch": 0.11016060043580017, + "grad_norm": 0.8475651741027832, + "learning_rate": 0.00019801252901189043, + "loss": 2.8436, + "step": 1365 + }, + { + "epoch": 0.11024130417238318, + "grad_norm": 0.8545576333999634, + "learning_rate": 0.00019800939599406755, + "loss": 2.7457, + "step": 1366 + }, + { + "epoch": 0.11032200790896618, + "grad_norm": 1.0093715190887451, + "learning_rate": 0.00019800626053359435, + "loss": 2.8198, + "step": 1367 + }, + { + "epoch": 0.11040271164554918, + "grad_norm": 0.8728145956993103, + "learning_rate": 0.0001980031226305489, + "loss": 2.7794, + "step": 1368 + }, + { + "epoch": 0.11048341538213219, + "grad_norm": 0.8538581728935242, + "learning_rate": 0.00019799998228500946, + "loss": 2.8018, + "step": 1369 + }, + { + "epoch": 0.11056411911871519, + "grad_norm": 0.9452785849571228, + "learning_rate": 0.00019799683949705432, + "loss": 2.8173, + "step": 1370 + }, + { + "epoch": 0.1106448228552982, + "grad_norm": 0.806508481502533, + "learning_rate": 0.00019799369426676174, + "loss": 2.8192, + "step": 1371 + }, + { + "epoch": 0.1107255265918812, + "grad_norm": 0.8952856063842773, + "learning_rate": 0.00019799054659421018, + "loss": 2.8072, + "step": 1372 + }, + { + "epoch": 0.1108062303284642, + "grad_norm": 0.8863561749458313, + "learning_rate": 0.00019798739647947802, + "loss": 2.7836, + "step": 1373 + }, + { + "epoch": 0.11088693406504721, + "grad_norm": 0.8544357419013977, + "learning_rate": 0.00019798424392264378, + "loss": 2.7714, + "step": 1374 + }, + { + "epoch": 0.11096763780163021, + "grad_norm": 0.807546854019165, + "learning_rate": 0.00019798108892378607, + "loss": 2.7635, + "step": 1375 + }, + { + "epoch": 0.11104834153821322, + "grad_norm": 0.8198233246803284, + "learning_rate": 0.0001979779314829835, + "loss": 2.8253, + "step": 1376 + }, + { + "epoch": 0.11112904527479622, + "grad_norm": 0.9268671870231628, + "learning_rate": 0.00019797477160031477, + "loss": 2.8007, + "step": 1377 + }, + { + "epoch": 0.11120974901137923, + "grad_norm": 0.8547680974006653, + "learning_rate": 0.0001979716092758586, + "loss": 2.7749, + "step": 1378 + }, + { + "epoch": 0.11129045274796223, + "grad_norm": 0.8052394390106201, + "learning_rate": 0.00019796844450969384, + "loss": 2.763, + "step": 1379 + }, + { + "epoch": 0.11137115648454524, + "grad_norm": 0.8291144371032715, + "learning_rate": 0.00019796527730189936, + "loss": 2.8053, + "step": 1380 + }, + { + "epoch": 0.11145186022112824, + "grad_norm": 0.8114006519317627, + "learning_rate": 0.00019796210765255404, + "loss": 2.8047, + "step": 1381 + }, + { + "epoch": 0.11153256395771124, + "grad_norm": 0.9326293468475342, + "learning_rate": 0.00019795893556173697, + "loss": 2.8199, + "step": 1382 + }, + { + "epoch": 0.11161326769429425, + "grad_norm": 0.7702555656433105, + "learning_rate": 0.00019795576102952714, + "loss": 2.7909, + "step": 1383 + }, + { + "epoch": 0.11169397143087725, + "grad_norm": 0.8115492463111877, + "learning_rate": 0.0001979525840560037, + "loss": 2.748, + "step": 1384 + }, + { + "epoch": 0.11177467516746026, + "grad_norm": 0.8926187753677368, + "learning_rate": 0.0001979494046412458, + "loss": 2.7791, + "step": 1385 + }, + { + "epoch": 0.11185537890404326, + "grad_norm": 0.8549754023551941, + "learning_rate": 0.0001979462227853327, + "loss": 2.7989, + "step": 1386 + }, + { + "epoch": 0.11193608264062627, + "grad_norm": 0.8625262975692749, + "learning_rate": 0.0001979430384883437, + "loss": 2.7202, + "step": 1387 + }, + { + "epoch": 0.11201678637720927, + "grad_norm": 0.8134698867797852, + "learning_rate": 0.00019793985175035813, + "loss": 2.8008, + "step": 1388 + }, + { + "epoch": 0.11209749011379228, + "grad_norm": 0.8546617031097412, + "learning_rate": 0.00019793666257145547, + "loss": 2.8076, + "step": 1389 + }, + { + "epoch": 0.11217819385037527, + "grad_norm": 0.8003748059272766, + "learning_rate": 0.00019793347095171514, + "loss": 2.826, + "step": 1390 + }, + { + "epoch": 0.11225889758695827, + "grad_norm": 0.8116614818572998, + "learning_rate": 0.00019793027689121674, + "loss": 2.7096, + "step": 1391 + }, + { + "epoch": 0.11233960132354127, + "grad_norm": 0.7785829901695251, + "learning_rate": 0.00019792708039003984, + "loss": 2.748, + "step": 1392 + }, + { + "epoch": 0.11242030506012428, + "grad_norm": 0.7999277710914612, + "learning_rate": 0.0001979238814482641, + "loss": 2.7671, + "step": 1393 + }, + { + "epoch": 0.11250100879670728, + "grad_norm": 0.8862190842628479, + "learning_rate": 0.00019792068006596925, + "loss": 2.8484, + "step": 1394 + }, + { + "epoch": 0.11258171253329029, + "grad_norm": 0.8747627139091492, + "learning_rate": 0.00019791747624323512, + "loss": 2.7477, + "step": 1395 + }, + { + "epoch": 0.11266241626987329, + "grad_norm": 0.8280831575393677, + "learning_rate": 0.0001979142699801415, + "loss": 2.87, + "step": 1396 + }, + { + "epoch": 0.1127431200064563, + "grad_norm": 0.8069074153900146, + "learning_rate": 0.00019791106127676832, + "loss": 2.7724, + "step": 1397 + }, + { + "epoch": 0.1128238237430393, + "grad_norm": 0.8253301382064819, + "learning_rate": 0.00019790785013319557, + "loss": 2.7351, + "step": 1398 + }, + { + "epoch": 0.1129045274796223, + "grad_norm": 0.8298853635787964, + "learning_rate": 0.00019790463654950323, + "loss": 2.7709, + "step": 1399 + }, + { + "epoch": 0.11298523121620531, + "grad_norm": 0.7796407341957092, + "learning_rate": 0.0001979014205257715, + "loss": 2.7766, + "step": 1400 + }, + { + "epoch": 0.11306593495278831, + "grad_norm": 0.8922166228294373, + "learning_rate": 0.00019789820206208037, + "loss": 2.8473, + "step": 1401 + }, + { + "epoch": 0.11314663868937132, + "grad_norm": 0.7763219475746155, + "learning_rate": 0.00019789498115851015, + "loss": 2.8629, + "step": 1402 + }, + { + "epoch": 0.11322734242595432, + "grad_norm": 0.8679928779602051, + "learning_rate": 0.0001978917578151411, + "loss": 2.8017, + "step": 1403 + }, + { + "epoch": 0.11330804616253733, + "grad_norm": 0.8491933941841125, + "learning_rate": 0.00019788853203205357, + "loss": 2.7156, + "step": 1404 + }, + { + "epoch": 0.11338874989912033, + "grad_norm": 0.8271194696426392, + "learning_rate": 0.00019788530380932792, + "loss": 2.7892, + "step": 1405 + }, + { + "epoch": 0.11346945363570334, + "grad_norm": 0.9224163293838501, + "learning_rate": 0.00019788207314704463, + "loss": 2.7824, + "step": 1406 + }, + { + "epoch": 0.11355015737228634, + "grad_norm": 0.7662777900695801, + "learning_rate": 0.00019787884004528422, + "loss": 2.7364, + "step": 1407 + }, + { + "epoch": 0.11363086110886934, + "grad_norm": 0.8750362396240234, + "learning_rate": 0.00019787560450412728, + "loss": 2.7546, + "step": 1408 + }, + { + "epoch": 0.11371156484545235, + "grad_norm": 0.9158821105957031, + "learning_rate": 0.0001978723665236544, + "loss": 2.8304, + "step": 1409 + }, + { + "epoch": 0.11379226858203535, + "grad_norm": 0.8291050791740417, + "learning_rate": 0.0001978691261039463, + "loss": 2.758, + "step": 1410 + }, + { + "epoch": 0.11387297231861836, + "grad_norm": 0.801886796951294, + "learning_rate": 0.00019786588324508374, + "loss": 2.7805, + "step": 1411 + }, + { + "epoch": 0.11395367605520136, + "grad_norm": 0.8140222430229187, + "learning_rate": 0.00019786263794714757, + "loss": 2.8155, + "step": 1412 + }, + { + "epoch": 0.11403437979178437, + "grad_norm": 0.7747580409049988, + "learning_rate": 0.00019785939021021865, + "loss": 2.778, + "step": 1413 + }, + { + "epoch": 0.11411508352836737, + "grad_norm": 0.8954138159751892, + "learning_rate": 0.0001978561400343779, + "loss": 2.7756, + "step": 1414 + }, + { + "epoch": 0.11419578726495037, + "grad_norm": 0.9038921594619751, + "learning_rate": 0.00019785288741970634, + "loss": 2.7181, + "step": 1415 + }, + { + "epoch": 0.11427649100153336, + "grad_norm": 0.8284393548965454, + "learning_rate": 0.000197849632366285, + "loss": 2.7467, + "step": 1416 + }, + { + "epoch": 0.11435719473811637, + "grad_norm": 0.8996441960334778, + "learning_rate": 0.00019784637487419514, + "loss": 2.7918, + "step": 1417 + }, + { + "epoch": 0.11443789847469937, + "grad_norm": 0.9868448376655579, + "learning_rate": 0.00019784311494351777, + "loss": 2.7687, + "step": 1418 + }, + { + "epoch": 0.11451860221128238, + "grad_norm": 0.8491402864456177, + "learning_rate": 0.0001978398525743342, + "loss": 2.8492, + "step": 1419 + }, + { + "epoch": 0.11459930594786538, + "grad_norm": 1.06125807762146, + "learning_rate": 0.0001978365877667258, + "loss": 2.8041, + "step": 1420 + }, + { + "epoch": 0.11468000968444839, + "grad_norm": 0.8194011449813843, + "learning_rate": 0.00019783332052077386, + "loss": 2.7109, + "step": 1421 + }, + { + "epoch": 0.11476071342103139, + "grad_norm": 0.972620964050293, + "learning_rate": 0.00019783005083655984, + "loss": 2.8107, + "step": 1422 + }, + { + "epoch": 0.1148414171576144, + "grad_norm": 0.925410270690918, + "learning_rate": 0.0001978267787141652, + "loss": 2.7603, + "step": 1423 + }, + { + "epoch": 0.1149221208941974, + "grad_norm": 0.920156717300415, + "learning_rate": 0.00019782350415367152, + "loss": 2.7644, + "step": 1424 + }, + { + "epoch": 0.1150028246307804, + "grad_norm": 0.8617576360702515, + "learning_rate": 0.00019782022715516043, + "loss": 2.769, + "step": 1425 + }, + { + "epoch": 0.11508352836736341, + "grad_norm": 1.0987342596054077, + "learning_rate": 0.00019781694771871356, + "loss": 2.8224, + "step": 1426 + }, + { + "epoch": 0.11516423210394641, + "grad_norm": 0.8418076634407043, + "learning_rate": 0.00019781366584441264, + "loss": 2.7947, + "step": 1427 + }, + { + "epoch": 0.11524493584052942, + "grad_norm": 0.8010901808738708, + "learning_rate": 0.0001978103815323395, + "loss": 2.733, + "step": 1428 + }, + { + "epoch": 0.11532563957711242, + "grad_norm": 0.8649042844772339, + "learning_rate": 0.00019780709478257598, + "loss": 2.7681, + "step": 1429 + }, + { + "epoch": 0.11540634331369543, + "grad_norm": 0.7728127837181091, + "learning_rate": 0.00019780380559520397, + "loss": 2.7795, + "step": 1430 + }, + { + "epoch": 0.11548704705027843, + "grad_norm": 0.7770940065383911, + "learning_rate": 0.00019780051397030545, + "loss": 2.743, + "step": 1431 + }, + { + "epoch": 0.11556775078686143, + "grad_norm": 0.8341890573501587, + "learning_rate": 0.0001977972199079625, + "loss": 2.8047, + "step": 1432 + }, + { + "epoch": 0.11564845452344444, + "grad_norm": 0.7894187569618225, + "learning_rate": 0.00019779392340825717, + "loss": 2.7757, + "step": 1433 + }, + { + "epoch": 0.11572915826002744, + "grad_norm": 0.8002873063087463, + "learning_rate": 0.00019779062447127164, + "loss": 2.7816, + "step": 1434 + }, + { + "epoch": 0.11580986199661045, + "grad_norm": 0.8256075978279114, + "learning_rate": 0.0001977873230970881, + "loss": 2.7839, + "step": 1435 + }, + { + "epoch": 0.11589056573319345, + "grad_norm": 0.8695322871208191, + "learning_rate": 0.0001977840192857889, + "loss": 2.746, + "step": 1436 + }, + { + "epoch": 0.11597126946977646, + "grad_norm": 0.767425537109375, + "learning_rate": 0.00019778071303745628, + "loss": 2.797, + "step": 1437 + }, + { + "epoch": 0.11605197320635946, + "grad_norm": 0.8263241052627563, + "learning_rate": 0.0001977774043521727, + "loss": 2.7702, + "step": 1438 + }, + { + "epoch": 0.11613267694294246, + "grad_norm": 0.8108638525009155, + "learning_rate": 0.0001977740932300206, + "loss": 2.6981, + "step": 1439 + }, + { + "epoch": 0.11621338067952547, + "grad_norm": 0.7945007681846619, + "learning_rate": 0.00019777077967108255, + "loss": 2.7357, + "step": 1440 + }, + { + "epoch": 0.11629408441610846, + "grad_norm": 0.8480326533317566, + "learning_rate": 0.00019776746367544107, + "loss": 2.8563, + "step": 1441 + }, + { + "epoch": 0.11637478815269146, + "grad_norm": 0.8202071785926819, + "learning_rate": 0.00019776414524317882, + "loss": 2.7955, + "step": 1442 + }, + { + "epoch": 0.11645549188927447, + "grad_norm": 0.8202874660491943, + "learning_rate": 0.00019776082437437852, + "loss": 2.765, + "step": 1443 + }, + { + "epoch": 0.11653619562585747, + "grad_norm": 0.8053051829338074, + "learning_rate": 0.00019775750106912294, + "loss": 2.6866, + "step": 1444 + }, + { + "epoch": 0.11661689936244048, + "grad_norm": 0.831968367099762, + "learning_rate": 0.00019775417532749486, + "loss": 2.7022, + "step": 1445 + }, + { + "epoch": 0.11669760309902348, + "grad_norm": 0.8903129696846008, + "learning_rate": 0.00019775084714957725, + "loss": 2.7308, + "step": 1446 + }, + { + "epoch": 0.11677830683560649, + "grad_norm": 0.8178622722625732, + "learning_rate": 0.000197747516535453, + "loss": 2.7446, + "step": 1447 + }, + { + "epoch": 0.11685901057218949, + "grad_norm": 0.8270576596260071, + "learning_rate": 0.00019774418348520508, + "loss": 2.7716, + "step": 1448 + }, + { + "epoch": 0.1169397143087725, + "grad_norm": 0.7965807914733887, + "learning_rate": 0.00019774084799891662, + "loss": 2.7305, + "step": 1449 + }, + { + "epoch": 0.1170204180453555, + "grad_norm": 0.8499472737312317, + "learning_rate": 0.00019773751007667073, + "loss": 2.7584, + "step": 1450 + }, + { + "epoch": 0.1171011217819385, + "grad_norm": 0.8961663842201233, + "learning_rate": 0.0001977341697185506, + "loss": 2.7729, + "step": 1451 + }, + { + "epoch": 0.1171818255185215, + "grad_norm": 1.0203527212142944, + "learning_rate": 0.0001977308269246395, + "loss": 2.727, + "step": 1452 + }, + { + "epoch": 0.11726252925510451, + "grad_norm": 0.953289806842804, + "learning_rate": 0.0001977274816950207, + "loss": 2.8158, + "step": 1453 + }, + { + "epoch": 0.11734323299168752, + "grad_norm": 1.0064597129821777, + "learning_rate": 0.0001977241340297776, + "loss": 2.8743, + "step": 1454 + }, + { + "epoch": 0.11742393672827052, + "grad_norm": 0.8541988730430603, + "learning_rate": 0.00019772078392899363, + "loss": 2.8532, + "step": 1455 + }, + { + "epoch": 0.11750464046485352, + "grad_norm": 0.8351433873176575, + "learning_rate": 0.00019771743139275228, + "loss": 2.7749, + "step": 1456 + }, + { + "epoch": 0.11758534420143653, + "grad_norm": 0.9555812478065491, + "learning_rate": 0.00019771407642113712, + "loss": 2.7408, + "step": 1457 + }, + { + "epoch": 0.11766604793801953, + "grad_norm": 0.7943894267082214, + "learning_rate": 0.0001977107190142317, + "loss": 2.7265, + "step": 1458 + }, + { + "epoch": 0.11774675167460254, + "grad_norm": 0.8636460900306702, + "learning_rate": 0.0001977073591721198, + "loss": 2.8178, + "step": 1459 + }, + { + "epoch": 0.11782745541118554, + "grad_norm": 0.8673834800720215, + "learning_rate": 0.00019770399689488506, + "loss": 2.7928, + "step": 1460 + }, + { + "epoch": 0.11790815914776855, + "grad_norm": 0.9463722705841064, + "learning_rate": 0.00019770063218261133, + "loss": 2.7448, + "step": 1461 + }, + { + "epoch": 0.11798886288435155, + "grad_norm": 0.8429726362228394, + "learning_rate": 0.00019769726503538246, + "loss": 2.7564, + "step": 1462 + }, + { + "epoch": 0.11806956662093455, + "grad_norm": 0.9412201642990112, + "learning_rate": 0.00019769389545328236, + "loss": 2.793, + "step": 1463 + }, + { + "epoch": 0.11815027035751756, + "grad_norm": 0.9112111926078796, + "learning_rate": 0.000197690523436395, + "loss": 2.7787, + "step": 1464 + }, + { + "epoch": 0.11823097409410056, + "grad_norm": 0.8417023420333862, + "learning_rate": 0.00019768714898480444, + "loss": 2.7654, + "step": 1465 + }, + { + "epoch": 0.11831167783068357, + "grad_norm": 0.8275290727615356, + "learning_rate": 0.00019768377209859476, + "loss": 2.7914, + "step": 1466 + }, + { + "epoch": 0.11839238156726656, + "grad_norm": 0.8113142848014832, + "learning_rate": 0.00019768039277785017, + "loss": 2.7516, + "step": 1467 + }, + { + "epoch": 0.11847308530384956, + "grad_norm": 0.8655288219451904, + "learning_rate": 0.0001976770110226548, + "loss": 2.8158, + "step": 1468 + }, + { + "epoch": 0.11855378904043257, + "grad_norm": 0.8063547611236572, + "learning_rate": 0.000197673626833093, + "loss": 2.7624, + "step": 1469 + }, + { + "epoch": 0.11863449277701557, + "grad_norm": 0.843772292137146, + "learning_rate": 0.00019767024020924908, + "loss": 2.86, + "step": 1470 + }, + { + "epoch": 0.11871519651359858, + "grad_norm": 0.7942481637001038, + "learning_rate": 0.0001976668511512075, + "loss": 2.758, + "step": 1471 + }, + { + "epoch": 0.11879590025018158, + "grad_norm": 0.841275155544281, + "learning_rate": 0.00019766345965905268, + "loss": 2.8014, + "step": 1472 + }, + { + "epoch": 0.11887660398676458, + "grad_norm": 0.8003600835800171, + "learning_rate": 0.00019766006573286915, + "loss": 2.7829, + "step": 1473 + }, + { + "epoch": 0.11895730772334759, + "grad_norm": 0.8437239527702332, + "learning_rate": 0.00019765666937274147, + "loss": 2.7706, + "step": 1474 + }, + { + "epoch": 0.11903801145993059, + "grad_norm": 0.8118240833282471, + "learning_rate": 0.00019765327057875433, + "loss": 2.8185, + "step": 1475 + }, + { + "epoch": 0.1191187151965136, + "grad_norm": 0.8051649928092957, + "learning_rate": 0.00019764986935099244, + "loss": 2.7676, + "step": 1476 + }, + { + "epoch": 0.1191994189330966, + "grad_norm": 0.7786862850189209, + "learning_rate": 0.00019764646568954053, + "loss": 2.8069, + "step": 1477 + }, + { + "epoch": 0.1192801226696796, + "grad_norm": 0.8199592232704163, + "learning_rate": 0.0001976430595944834, + "loss": 2.7718, + "step": 1478 + }, + { + "epoch": 0.11936082640626261, + "grad_norm": 0.8696652054786682, + "learning_rate": 0.00019763965106590604, + "loss": 2.7682, + "step": 1479 + }, + { + "epoch": 0.11944153014284561, + "grad_norm": 0.7993931174278259, + "learning_rate": 0.00019763624010389334, + "loss": 2.7607, + "step": 1480 + }, + { + "epoch": 0.11952223387942862, + "grad_norm": 0.8107055425643921, + "learning_rate": 0.0001976328267085303, + "loss": 2.7885, + "step": 1481 + }, + { + "epoch": 0.11960293761601162, + "grad_norm": 0.8189423084259033, + "learning_rate": 0.000197629410879902, + "loss": 2.7332, + "step": 1482 + }, + { + "epoch": 0.11968364135259463, + "grad_norm": 0.9134814143180847, + "learning_rate": 0.0001976259926180936, + "loss": 2.7691, + "step": 1483 + }, + { + "epoch": 0.11976434508917763, + "grad_norm": 0.8642883896827698, + "learning_rate": 0.00019762257192319023, + "loss": 2.7876, + "step": 1484 + }, + { + "epoch": 0.11984504882576064, + "grad_norm": 0.7411352396011353, + "learning_rate": 0.0001976191487952772, + "loss": 2.7577, + "step": 1485 + }, + { + "epoch": 0.11992575256234364, + "grad_norm": 0.7741669416427612, + "learning_rate": 0.00019761572323443978, + "loss": 2.8005, + "step": 1486 + }, + { + "epoch": 0.12000645629892664, + "grad_norm": 0.8195405602455139, + "learning_rate": 0.0001976122952407634, + "loss": 2.7421, + "step": 1487 + }, + { + "epoch": 0.12008716003550965, + "grad_norm": 0.8355886936187744, + "learning_rate": 0.00019760886481433345, + "loss": 2.8156, + "step": 1488 + }, + { + "epoch": 0.12016786377209265, + "grad_norm": 0.8321093916893005, + "learning_rate": 0.00019760543195523542, + "loss": 2.7261, + "step": 1489 + }, + { + "epoch": 0.12024856750867566, + "grad_norm": 0.7792446613311768, + "learning_rate": 0.0001976019966635549, + "loss": 2.7319, + "step": 1490 + }, + { + "epoch": 0.12032927124525866, + "grad_norm": 0.770535409450531, + "learning_rate": 0.00019759855893937748, + "loss": 2.7727, + "step": 1491 + }, + { + "epoch": 0.12040997498184165, + "grad_norm": 0.8168532252311707, + "learning_rate": 0.00019759511878278887, + "loss": 2.7763, + "step": 1492 + }, + { + "epoch": 0.12049067871842466, + "grad_norm": 0.8395755290985107, + "learning_rate": 0.00019759167619387476, + "loss": 2.8382, + "step": 1493 + }, + { + "epoch": 0.12057138245500766, + "grad_norm": 0.8682762384414673, + "learning_rate": 0.00019758823117272097, + "loss": 2.8056, + "step": 1494 + }, + { + "epoch": 0.12065208619159067, + "grad_norm": 0.815192699432373, + "learning_rate": 0.00019758478371941337, + "loss": 2.7602, + "step": 1495 + }, + { + "epoch": 0.12073278992817367, + "grad_norm": 0.7919273376464844, + "learning_rate": 0.00019758133383403786, + "loss": 2.7989, + "step": 1496 + }, + { + "epoch": 0.12081349366475667, + "grad_norm": 1.004387378692627, + "learning_rate": 0.00019757788151668045, + "loss": 2.7765, + "step": 1497 + }, + { + "epoch": 0.12089419740133968, + "grad_norm": 1.0032062530517578, + "learning_rate": 0.00019757442676742715, + "loss": 2.7751, + "step": 1498 + }, + { + "epoch": 0.12097490113792268, + "grad_norm": 0.8797723054885864, + "learning_rate": 0.00019757096958636407, + "loss": 2.7798, + "step": 1499 + }, + { + "epoch": 0.12105560487450569, + "grad_norm": 0.9239820241928101, + "learning_rate": 0.0001975675099735774, + "loss": 2.7976, + "step": 1500 + }, + { + "epoch": 0.12113630861108869, + "grad_norm": 0.9903601408004761, + "learning_rate": 0.00019756404792915328, + "loss": 2.7891, + "step": 1501 + }, + { + "epoch": 0.1212170123476717, + "grad_norm": 0.8402895331382751, + "learning_rate": 0.0001975605834531781, + "loss": 2.8037, + "step": 1502 + }, + { + "epoch": 0.1212977160842547, + "grad_norm": 0.8986102342605591, + "learning_rate": 0.00019755711654573813, + "loss": 2.8375, + "step": 1503 + }, + { + "epoch": 0.1213784198208377, + "grad_norm": 0.8795471787452698, + "learning_rate": 0.0001975536472069198, + "loss": 2.7916, + "step": 1504 + }, + { + "epoch": 0.12145912355742071, + "grad_norm": 0.866278350353241, + "learning_rate": 0.00019755017543680962, + "loss": 2.7884, + "step": 1505 + }, + { + "epoch": 0.12153982729400371, + "grad_norm": 0.7877952456474304, + "learning_rate": 0.00019754670123549398, + "loss": 2.7659, + "step": 1506 + }, + { + "epoch": 0.12162053103058672, + "grad_norm": 0.857155978679657, + "learning_rate": 0.00019754322460305962, + "loss": 2.8029, + "step": 1507 + }, + { + "epoch": 0.12170123476716972, + "grad_norm": 0.8323284387588501, + "learning_rate": 0.00019753974553959314, + "loss": 2.7764, + "step": 1508 + }, + { + "epoch": 0.12178193850375273, + "grad_norm": 0.8557485938072205, + "learning_rate": 0.00019753626404518117, + "loss": 2.7448, + "step": 1509 + }, + { + "epoch": 0.12186264224033573, + "grad_norm": 0.8026818037033081, + "learning_rate": 0.00019753278011991058, + "loss": 2.7323, + "step": 1510 + }, + { + "epoch": 0.12194334597691874, + "grad_norm": 0.8578904271125793, + "learning_rate": 0.00019752929376386816, + "loss": 2.759, + "step": 1511 + }, + { + "epoch": 0.12202404971350174, + "grad_norm": 0.8617175221443176, + "learning_rate": 0.00019752580497714076, + "loss": 2.7641, + "step": 1512 + }, + { + "epoch": 0.12210475345008474, + "grad_norm": 0.8261943459510803, + "learning_rate": 0.00019752231375981538, + "loss": 2.7554, + "step": 1513 + }, + { + "epoch": 0.12218545718666775, + "grad_norm": 0.9984099268913269, + "learning_rate": 0.00019751882011197902, + "loss": 2.763, + "step": 1514 + }, + { + "epoch": 0.12226616092325075, + "grad_norm": 0.8014064431190491, + "learning_rate": 0.00019751532403371874, + "loss": 2.8083, + "step": 1515 + }, + { + "epoch": 0.12234686465983376, + "grad_norm": 0.9276653528213501, + "learning_rate": 0.0001975118255251217, + "loss": 2.8055, + "step": 1516 + }, + { + "epoch": 0.12242756839641676, + "grad_norm": 0.9365193843841553, + "learning_rate": 0.00019750832458627503, + "loss": 2.7397, + "step": 1517 + }, + { + "epoch": 0.12250827213299975, + "grad_norm": 0.8952646851539612, + "learning_rate": 0.00019750482121726605, + "loss": 2.8305, + "step": 1518 + }, + { + "epoch": 0.12258897586958276, + "grad_norm": 0.8395531177520752, + "learning_rate": 0.00019750131541818204, + "loss": 2.7852, + "step": 1519 + }, + { + "epoch": 0.12266967960616576, + "grad_norm": 0.8123572468757629, + "learning_rate": 0.0001974978071891104, + "loss": 2.831, + "step": 1520 + }, + { + "epoch": 0.12275038334274876, + "grad_norm": 0.8716141581535339, + "learning_rate": 0.00019749429653013851, + "loss": 2.8012, + "step": 1521 + }, + { + "epoch": 0.12283108707933177, + "grad_norm": 0.7848379611968994, + "learning_rate": 0.0001974907834413539, + "loss": 2.7812, + "step": 1522 + }, + { + "epoch": 0.12291179081591477, + "grad_norm": 0.834072470664978, + "learning_rate": 0.00019748726792284414, + "loss": 2.7442, + "step": 1523 + }, + { + "epoch": 0.12299249455249778, + "grad_norm": 0.8377225399017334, + "learning_rate": 0.0001974837499746968, + "loss": 2.7967, + "step": 1524 + }, + { + "epoch": 0.12307319828908078, + "grad_norm": 0.8809494376182556, + "learning_rate": 0.0001974802295969996, + "loss": 2.8042, + "step": 1525 + }, + { + "epoch": 0.12315390202566379, + "grad_norm": 0.8504741787910461, + "learning_rate": 0.00019747670678984028, + "loss": 2.7909, + "step": 1526 + }, + { + "epoch": 0.12323460576224679, + "grad_norm": 0.9444355368614197, + "learning_rate": 0.00019747318155330663, + "loss": 2.8567, + "step": 1527 + }, + { + "epoch": 0.1233153094988298, + "grad_norm": 0.859166145324707, + "learning_rate": 0.00019746965388748645, + "loss": 2.8305, + "step": 1528 + }, + { + "epoch": 0.1233960132354128, + "grad_norm": 0.8431086540222168, + "learning_rate": 0.00019746612379246777, + "loss": 2.7799, + "step": 1529 + }, + { + "epoch": 0.1234767169719958, + "grad_norm": 0.8872438669204712, + "learning_rate": 0.00019746259126833846, + "loss": 2.8413, + "step": 1530 + }, + { + "epoch": 0.12355742070857881, + "grad_norm": 0.8698925375938416, + "learning_rate": 0.0001974590563151866, + "loss": 2.8446, + "step": 1531 + }, + { + "epoch": 0.12363812444516181, + "grad_norm": 0.8926429152488708, + "learning_rate": 0.0001974555189331003, + "loss": 2.7859, + "step": 1532 + }, + { + "epoch": 0.12371882818174482, + "grad_norm": 0.8089048862457275, + "learning_rate": 0.00019745197912216775, + "loss": 2.7985, + "step": 1533 + }, + { + "epoch": 0.12379953191832782, + "grad_norm": 0.8180400729179382, + "learning_rate": 0.0001974484368824771, + "loss": 2.7587, + "step": 1534 + }, + { + "epoch": 0.12388023565491083, + "grad_norm": 0.9584212303161621, + "learning_rate": 0.00019744489221411668, + "loss": 2.766, + "step": 1535 + }, + { + "epoch": 0.12396093939149383, + "grad_norm": 0.8425920009613037, + "learning_rate": 0.00019744134511717485, + "loss": 2.8125, + "step": 1536 + }, + { + "epoch": 0.12404164312807683, + "grad_norm": 0.9109299182891846, + "learning_rate": 0.00019743779559173996, + "loss": 2.8613, + "step": 1537 + }, + { + "epoch": 0.12412234686465984, + "grad_norm": 0.8840214610099792, + "learning_rate": 0.0001974342436379005, + "loss": 2.7603, + "step": 1538 + }, + { + "epoch": 0.12420305060124284, + "grad_norm": 0.8128962516784668, + "learning_rate": 0.00019743068925574502, + "loss": 2.7593, + "step": 1539 + }, + { + "epoch": 0.12428375433782585, + "grad_norm": 0.8150052428245544, + "learning_rate": 0.00019742713244536204, + "loss": 2.8099, + "step": 1540 + }, + { + "epoch": 0.12436445807440885, + "grad_norm": 0.8442968130111694, + "learning_rate": 0.00019742357320684027, + "loss": 2.7746, + "step": 1541 + }, + { + "epoch": 0.12444516181099186, + "grad_norm": 0.9347402453422546, + "learning_rate": 0.00019742001154026838, + "loss": 2.8247, + "step": 1542 + }, + { + "epoch": 0.12452586554757485, + "grad_norm": 0.8305966854095459, + "learning_rate": 0.00019741644744573512, + "loss": 2.7398, + "step": 1543 + }, + { + "epoch": 0.12460656928415785, + "grad_norm": 0.8811129927635193, + "learning_rate": 0.00019741288092332935, + "loss": 2.8014, + "step": 1544 + }, + { + "epoch": 0.12468727302074085, + "grad_norm": 1.0287303924560547, + "learning_rate": 0.00019740931197313996, + "loss": 2.8449, + "step": 1545 + }, + { + "epoch": 0.12476797675732386, + "grad_norm": 0.8499771356582642, + "learning_rate": 0.00019740574059525588, + "loss": 2.7845, + "step": 1546 + }, + { + "epoch": 0.12484868049390686, + "grad_norm": 0.8110969066619873, + "learning_rate": 0.00019740216678976614, + "loss": 2.7565, + "step": 1547 + }, + { + "epoch": 0.12492938423048987, + "grad_norm": 0.8530771136283875, + "learning_rate": 0.00019739859055675977, + "loss": 2.8098, + "step": 1548 + }, + { + "epoch": 0.12501008796707289, + "grad_norm": 0.8483901619911194, + "learning_rate": 0.00019739501189632591, + "loss": 2.812, + "step": 1549 + }, + { + "epoch": 0.1250907917036559, + "grad_norm": 0.7894467711448669, + "learning_rate": 0.00019739143080855378, + "loss": 2.8576, + "step": 1550 + }, + { + "epoch": 0.1251714954402389, + "grad_norm": 0.8270247578620911, + "learning_rate": 0.0001973878472935326, + "loss": 2.7613, + "step": 1551 + }, + { + "epoch": 0.1252521991768219, + "grad_norm": 0.8496212959289551, + "learning_rate": 0.00019738426135135174, + "loss": 2.8375, + "step": 1552 + }, + { + "epoch": 0.1253329029134049, + "grad_norm": 0.8465524911880493, + "learning_rate": 0.00019738067298210045, + "loss": 2.8023, + "step": 1553 + }, + { + "epoch": 0.1254136066499879, + "grad_norm": 0.7843824028968811, + "learning_rate": 0.00019737708218586826, + "loss": 2.7424, + "step": 1554 + }, + { + "epoch": 0.1254943103865709, + "grad_norm": 0.8310040235519409, + "learning_rate": 0.00019737348896274462, + "loss": 2.7608, + "step": 1555 + }, + { + "epoch": 0.1255750141231539, + "grad_norm": 0.7895017266273499, + "learning_rate": 0.00019736989331281914, + "loss": 2.7549, + "step": 1556 + }, + { + "epoch": 0.1256557178597369, + "grad_norm": 0.8140431642532349, + "learning_rate": 0.00019736629523618138, + "loss": 2.802, + "step": 1557 + }, + { + "epoch": 0.1257364215963199, + "grad_norm": 0.8026889562606812, + "learning_rate": 0.000197362694732921, + "loss": 2.7758, + "step": 1558 + }, + { + "epoch": 0.1258171253329029, + "grad_norm": 0.8018048405647278, + "learning_rate": 0.0001973590918031278, + "loss": 2.7729, + "step": 1559 + }, + { + "epoch": 0.1258978290694859, + "grad_norm": 0.8394612073898315, + "learning_rate": 0.00019735548644689147, + "loss": 2.7692, + "step": 1560 + }, + { + "epoch": 0.1259785328060689, + "grad_norm": 0.819804310798645, + "learning_rate": 0.00019735187866430198, + "loss": 2.6933, + "step": 1561 + }, + { + "epoch": 0.12605923654265191, + "grad_norm": 0.8094257116317749, + "learning_rate": 0.0001973482684554492, + "loss": 2.7722, + "step": 1562 + }, + { + "epoch": 0.12613994027923492, + "grad_norm": 0.8647315502166748, + "learning_rate": 0.00019734465582042305, + "loss": 2.787, + "step": 1563 + }, + { + "epoch": 0.12622064401581792, + "grad_norm": 0.8439335823059082, + "learning_rate": 0.00019734104075931367, + "loss": 2.8, + "step": 1564 + }, + { + "epoch": 0.12630134775240093, + "grad_norm": 0.852480947971344, + "learning_rate": 0.00019733742327221105, + "loss": 2.8656, + "step": 1565 + }, + { + "epoch": 0.12638205148898393, + "grad_norm": 0.813846230506897, + "learning_rate": 0.00019733380335920542, + "loss": 2.7733, + "step": 1566 + }, + { + "epoch": 0.12646275522556694, + "grad_norm": 0.7860896587371826, + "learning_rate": 0.00019733018102038698, + "loss": 2.8201, + "step": 1567 + }, + { + "epoch": 0.12654345896214994, + "grad_norm": 0.7857748866081238, + "learning_rate": 0.00019732655625584602, + "loss": 2.8726, + "step": 1568 + }, + { + "epoch": 0.12662416269873294, + "grad_norm": 0.8152899146080017, + "learning_rate": 0.00019732292906567286, + "loss": 2.7738, + "step": 1569 + }, + { + "epoch": 0.12670486643531595, + "grad_norm": 0.8281696438789368, + "learning_rate": 0.00019731929944995788, + "loss": 2.7966, + "step": 1570 + }, + { + "epoch": 0.12678557017189895, + "grad_norm": 0.8070773482322693, + "learning_rate": 0.00019731566740879158, + "loss": 2.6988, + "step": 1571 + }, + { + "epoch": 0.12686627390848196, + "grad_norm": 0.7859680652618408, + "learning_rate": 0.00019731203294226445, + "loss": 2.7241, + "step": 1572 + }, + { + "epoch": 0.12694697764506496, + "grad_norm": 0.7753982543945312, + "learning_rate": 0.0001973083960504671, + "loss": 2.7621, + "step": 1573 + }, + { + "epoch": 0.12702768138164797, + "grad_norm": 0.8063471913337708, + "learning_rate": 0.00019730475673349014, + "loss": 2.7298, + "step": 1574 + }, + { + "epoch": 0.12710838511823097, + "grad_norm": 0.7943962812423706, + "learning_rate": 0.0001973011149914243, + "loss": 2.7714, + "step": 1575 + }, + { + "epoch": 0.12718908885481398, + "grad_norm": 0.8297483325004578, + "learning_rate": 0.00019729747082436033, + "loss": 2.7743, + "step": 1576 + }, + { + "epoch": 0.12726979259139698, + "grad_norm": 0.8728111386299133, + "learning_rate": 0.000197293824232389, + "loss": 2.8251, + "step": 1577 + }, + { + "epoch": 0.12735049632797998, + "grad_norm": 0.8762480020523071, + "learning_rate": 0.00019729017521560128, + "loss": 2.8036, + "step": 1578 + }, + { + "epoch": 0.127431200064563, + "grad_norm": 0.9266185164451599, + "learning_rate": 0.00019728652377408806, + "loss": 2.7335, + "step": 1579 + }, + { + "epoch": 0.127511903801146, + "grad_norm": 0.9289839267730713, + "learning_rate": 0.00019728286990794037, + "loss": 2.7715, + "step": 1580 + }, + { + "epoch": 0.127592607537729, + "grad_norm": 0.8811823725700378, + "learning_rate": 0.0001972792136172493, + "loss": 2.7389, + "step": 1581 + }, + { + "epoch": 0.127673311274312, + "grad_norm": 0.8174294233322144, + "learning_rate": 0.00019727555490210588, + "loss": 2.7483, + "step": 1582 + }, + { + "epoch": 0.127754015010895, + "grad_norm": 0.8254107236862183, + "learning_rate": 0.00019727189376260137, + "loss": 2.7897, + "step": 1583 + }, + { + "epoch": 0.127834718747478, + "grad_norm": 0.8478763699531555, + "learning_rate": 0.000197268230198827, + "loss": 2.7394, + "step": 1584 + }, + { + "epoch": 0.12791542248406101, + "grad_norm": 0.8356192111968994, + "learning_rate": 0.00019726456421087404, + "loss": 2.7518, + "step": 1585 + }, + { + "epoch": 0.12799612622064402, + "grad_norm": 0.8523107767105103, + "learning_rate": 0.00019726089579883392, + "loss": 2.7893, + "step": 1586 + }, + { + "epoch": 0.12807682995722702, + "grad_norm": 0.9048579931259155, + "learning_rate": 0.00019725722496279804, + "loss": 2.7488, + "step": 1587 + }, + { + "epoch": 0.12815753369381003, + "grad_norm": 0.8242251873016357, + "learning_rate": 0.00019725355170285787, + "loss": 2.7544, + "step": 1588 + }, + { + "epoch": 0.12823823743039303, + "grad_norm": 0.8343983888626099, + "learning_rate": 0.00019724987601910497, + "loss": 2.7317, + "step": 1589 + }, + { + "epoch": 0.12831894116697604, + "grad_norm": 0.8084509372711182, + "learning_rate": 0.00019724619791163095, + "loss": 2.7822, + "step": 1590 + }, + { + "epoch": 0.12839964490355904, + "grad_norm": 0.8397380113601685, + "learning_rate": 0.00019724251738052745, + "loss": 2.8188, + "step": 1591 + }, + { + "epoch": 0.12848034864014204, + "grad_norm": 0.8558558821678162, + "learning_rate": 0.00019723883442588624, + "loss": 2.7623, + "step": 1592 + }, + { + "epoch": 0.12856105237672505, + "grad_norm": 0.7602639198303223, + "learning_rate": 0.0001972351490477991, + "loss": 2.7932, + "step": 1593 + }, + { + "epoch": 0.12864175611330805, + "grad_norm": 0.8379851579666138, + "learning_rate": 0.00019723146124635786, + "loss": 2.8296, + "step": 1594 + }, + { + "epoch": 0.12872245984989106, + "grad_norm": 0.8454548716545105, + "learning_rate": 0.00019722777102165444, + "loss": 2.8192, + "step": 1595 + }, + { + "epoch": 0.12880316358647406, + "grad_norm": 0.8344082832336426, + "learning_rate": 0.0001972240783737808, + "loss": 2.7628, + "step": 1596 + }, + { + "epoch": 0.12888386732305707, + "grad_norm": 0.809093713760376, + "learning_rate": 0.000197220383302829, + "loss": 2.8055, + "step": 1597 + }, + { + "epoch": 0.12896457105964007, + "grad_norm": 0.7909694910049438, + "learning_rate": 0.0001972166858088911, + "loss": 2.7292, + "step": 1598 + }, + { + "epoch": 0.12904527479622308, + "grad_norm": 0.8350280523300171, + "learning_rate": 0.00019721298589205928, + "loss": 2.7671, + "step": 1599 + }, + { + "epoch": 0.12912597853280608, + "grad_norm": 0.7857616543769836, + "learning_rate": 0.00019720928355242568, + "loss": 2.729, + "step": 1600 + }, + { + "epoch": 0.12920668226938908, + "grad_norm": 0.7899746298789978, + "learning_rate": 0.0001972055787900827, + "loss": 2.8023, + "step": 1601 + }, + { + "epoch": 0.1292873860059721, + "grad_norm": 0.8604246377944946, + "learning_rate": 0.00019720187160512256, + "loss": 2.749, + "step": 1602 + }, + { + "epoch": 0.1293680897425551, + "grad_norm": 0.8517864942550659, + "learning_rate": 0.0001971981619976377, + "loss": 2.7203, + "step": 1603 + }, + { + "epoch": 0.1294487934791381, + "grad_norm": 0.8860471248626709, + "learning_rate": 0.00019719444996772056, + "loss": 2.7372, + "step": 1604 + }, + { + "epoch": 0.1295294972157211, + "grad_norm": 0.8355888724327087, + "learning_rate": 0.00019719073551546367, + "loss": 2.7284, + "step": 1605 + }, + { + "epoch": 0.1296102009523041, + "grad_norm": 0.7998479604721069, + "learning_rate": 0.00019718701864095955, + "loss": 2.7726, + "step": 1606 + }, + { + "epoch": 0.12969090468888708, + "grad_norm": 0.8564549088478088, + "learning_rate": 0.00019718329934430092, + "loss": 2.7334, + "step": 1607 + }, + { + "epoch": 0.1297716084254701, + "grad_norm": 0.8594443798065186, + "learning_rate": 0.00019717957762558044, + "loss": 2.7865, + "step": 1608 + }, + { + "epoch": 0.1298523121620531, + "grad_norm": 0.804553210735321, + "learning_rate": 0.00019717585348489082, + "loss": 2.8094, + "step": 1609 + }, + { + "epoch": 0.1299330158986361, + "grad_norm": 0.7892553806304932, + "learning_rate": 0.0001971721269223249, + "loss": 2.7969, + "step": 1610 + }, + { + "epoch": 0.1300137196352191, + "grad_norm": 0.8703331351280212, + "learning_rate": 0.0001971683979379756, + "loss": 2.8192, + "step": 1611 + }, + { + "epoch": 0.1300944233718021, + "grad_norm": 0.8176589012145996, + "learning_rate": 0.00019716466653193582, + "loss": 2.7902, + "step": 1612 + }, + { + "epoch": 0.1301751271083851, + "grad_norm": 0.8305137157440186, + "learning_rate": 0.00019716093270429855, + "loss": 2.8202, + "step": 1613 + }, + { + "epoch": 0.1302558308449681, + "grad_norm": 0.8261505365371704, + "learning_rate": 0.00019715719645515688, + "loss": 2.7905, + "step": 1614 + }, + { + "epoch": 0.13033653458155112, + "grad_norm": 0.9465535879135132, + "learning_rate": 0.00019715345778460389, + "loss": 2.7965, + "step": 1615 + }, + { + "epoch": 0.13041723831813412, + "grad_norm": 0.8847100138664246, + "learning_rate": 0.00019714971669273275, + "loss": 2.8177, + "step": 1616 + }, + { + "epoch": 0.13049794205471713, + "grad_norm": 0.9768328666687012, + "learning_rate": 0.0001971459731796367, + "loss": 2.7668, + "step": 1617 + }, + { + "epoch": 0.13057864579130013, + "grad_norm": 0.7498586177825928, + "learning_rate": 0.0001971422272454091, + "loss": 2.761, + "step": 1618 + }, + { + "epoch": 0.13065934952788313, + "grad_norm": 1.0455373525619507, + "learning_rate": 0.00019713847889014325, + "loss": 2.7652, + "step": 1619 + }, + { + "epoch": 0.13074005326446614, + "grad_norm": 0.8484631180763245, + "learning_rate": 0.00019713472811393258, + "loss": 2.7858, + "step": 1620 + }, + { + "epoch": 0.13082075700104914, + "grad_norm": 0.8190686702728271, + "learning_rate": 0.00019713097491687057, + "loss": 2.7217, + "step": 1621 + }, + { + "epoch": 0.13090146073763215, + "grad_norm": 0.8866000175476074, + "learning_rate": 0.00019712721929905077, + "loss": 2.7868, + "step": 1622 + }, + { + "epoch": 0.13098216447421515, + "grad_norm": 0.8026713132858276, + "learning_rate": 0.00019712346126056677, + "loss": 2.7276, + "step": 1623 + }, + { + "epoch": 0.13106286821079816, + "grad_norm": 0.8306462168693542, + "learning_rate": 0.00019711970080151225, + "loss": 2.7747, + "step": 1624 + }, + { + "epoch": 0.13114357194738116, + "grad_norm": 0.8276618123054504, + "learning_rate": 0.0001971159379219809, + "loss": 2.7146, + "step": 1625 + }, + { + "epoch": 0.13122427568396416, + "grad_norm": 0.9749011993408203, + "learning_rate": 0.00019711217262206648, + "loss": 2.8731, + "step": 1626 + }, + { + "epoch": 0.13130497942054717, + "grad_norm": 0.828484058380127, + "learning_rate": 0.00019710840490186292, + "loss": 2.803, + "step": 1627 + }, + { + "epoch": 0.13138568315713017, + "grad_norm": 0.8095957636833191, + "learning_rate": 0.00019710463476146402, + "loss": 2.7751, + "step": 1628 + }, + { + "epoch": 0.13146638689371318, + "grad_norm": 0.8731853365898132, + "learning_rate": 0.0001971008622009638, + "loss": 2.8274, + "step": 1629 + }, + { + "epoch": 0.13154709063029618, + "grad_norm": 0.8180200457572937, + "learning_rate": 0.00019709708722045628, + "loss": 2.813, + "step": 1630 + }, + { + "epoch": 0.13162779436687919, + "grad_norm": 0.7740067839622498, + "learning_rate": 0.00019709330982003553, + "loss": 2.7319, + "step": 1631 + }, + { + "epoch": 0.1317084981034622, + "grad_norm": 0.8439326882362366, + "learning_rate": 0.0001970895299997957, + "loss": 2.8182, + "step": 1632 + }, + { + "epoch": 0.1317892018400452, + "grad_norm": 0.8254802823066711, + "learning_rate": 0.000197085747759831, + "loss": 2.7874, + "step": 1633 + }, + { + "epoch": 0.1318699055766282, + "grad_norm": 0.8128175139427185, + "learning_rate": 0.00019708196310023562, + "loss": 2.8125, + "step": 1634 + }, + { + "epoch": 0.1319506093132112, + "grad_norm": 0.8664820790290833, + "learning_rate": 0.00019707817602110402, + "loss": 2.8446, + "step": 1635 + }, + { + "epoch": 0.1320313130497942, + "grad_norm": 0.8101332783699036, + "learning_rate": 0.00019707438652253044, + "loss": 2.8027, + "step": 1636 + }, + { + "epoch": 0.1321120167863772, + "grad_norm": 0.8296725153923035, + "learning_rate": 0.00019707059460460945, + "loss": 2.7677, + "step": 1637 + }, + { + "epoch": 0.13219272052296022, + "grad_norm": 0.7321150898933411, + "learning_rate": 0.0001970668002674355, + "loss": 2.6991, + "step": 1638 + }, + { + "epoch": 0.13227342425954322, + "grad_norm": 0.8321375250816345, + "learning_rate": 0.0001970630035111031, + "loss": 2.6948, + "step": 1639 + }, + { + "epoch": 0.13235412799612623, + "grad_norm": 0.7622714042663574, + "learning_rate": 0.00019705920433570694, + "loss": 2.6957, + "step": 1640 + }, + { + "epoch": 0.13243483173270923, + "grad_norm": 0.8413416147232056, + "learning_rate": 0.00019705540274134173, + "loss": 2.7277, + "step": 1641 + }, + { + "epoch": 0.13251553546929223, + "grad_norm": 0.8798941373825073, + "learning_rate": 0.00019705159872810218, + "loss": 2.7699, + "step": 1642 + }, + { + "epoch": 0.13259623920587524, + "grad_norm": 0.788287341594696, + "learning_rate": 0.00019704779229608304, + "loss": 2.7933, + "step": 1643 + }, + { + "epoch": 0.13267694294245824, + "grad_norm": 0.8547430634498596, + "learning_rate": 0.00019704398344537927, + "loss": 2.7706, + "step": 1644 + }, + { + "epoch": 0.13275764667904125, + "grad_norm": 0.8474008440971375, + "learning_rate": 0.00019704017217608575, + "loss": 2.8005, + "step": 1645 + }, + { + "epoch": 0.13283835041562425, + "grad_norm": 0.8636945486068726, + "learning_rate": 0.00019703635848829747, + "loss": 2.8241, + "step": 1646 + }, + { + "epoch": 0.13291905415220726, + "grad_norm": 0.8158168792724609, + "learning_rate": 0.00019703254238210947, + "loss": 2.7576, + "step": 1647 + }, + { + "epoch": 0.13299975788879026, + "grad_norm": 0.8420887589454651, + "learning_rate": 0.0001970287238576169, + "loss": 2.7677, + "step": 1648 + }, + { + "epoch": 0.13308046162537326, + "grad_norm": 0.7910059690475464, + "learning_rate": 0.00019702490291491486, + "loss": 2.7807, + "step": 1649 + }, + { + "epoch": 0.13316116536195627, + "grad_norm": 0.8308143615722656, + "learning_rate": 0.00019702107955409863, + "loss": 2.7698, + "step": 1650 + }, + { + "epoch": 0.13324186909853927, + "grad_norm": 0.8215764760971069, + "learning_rate": 0.00019701725377526349, + "loss": 2.8263, + "step": 1651 + }, + { + "epoch": 0.13332257283512228, + "grad_norm": 0.8780504465103149, + "learning_rate": 0.00019701342557850476, + "loss": 2.8032, + "step": 1652 + }, + { + "epoch": 0.13340327657170528, + "grad_norm": 0.8125136494636536, + "learning_rate": 0.0001970095949639179, + "loss": 2.8317, + "step": 1653 + }, + { + "epoch": 0.13348398030828829, + "grad_norm": 0.8170902132987976, + "learning_rate": 0.00019700576193159831, + "loss": 2.7528, + "step": 1654 + }, + { + "epoch": 0.1335646840448713, + "grad_norm": 0.8318637013435364, + "learning_rate": 0.00019700192648164157, + "loss": 2.7963, + "step": 1655 + }, + { + "epoch": 0.1336453877814543, + "grad_norm": 0.8445270657539368, + "learning_rate": 0.00019699808861414327, + "loss": 2.772, + "step": 1656 + }, + { + "epoch": 0.1337260915180373, + "grad_norm": 0.7908959984779358, + "learning_rate": 0.00019699424832919906, + "loss": 2.7528, + "step": 1657 + }, + { + "epoch": 0.13380679525462028, + "grad_norm": 0.8153900504112244, + "learning_rate": 0.00019699040562690462, + "loss": 2.7643, + "step": 1658 + }, + { + "epoch": 0.13388749899120328, + "grad_norm": 0.86302250623703, + "learning_rate": 0.0001969865605073557, + "loss": 2.8037, + "step": 1659 + }, + { + "epoch": 0.13396820272778628, + "grad_norm": 0.8373419046401978, + "learning_rate": 0.0001969827129706482, + "loss": 2.7647, + "step": 1660 + }, + { + "epoch": 0.1340489064643693, + "grad_norm": 0.8166481852531433, + "learning_rate": 0.00019697886301687798, + "loss": 2.8333, + "step": 1661 + }, + { + "epoch": 0.1341296102009523, + "grad_norm": 0.7807812094688416, + "learning_rate": 0.00019697501064614098, + "loss": 2.7495, + "step": 1662 + }, + { + "epoch": 0.1342103139375353, + "grad_norm": 0.8375338315963745, + "learning_rate": 0.00019697115585853324, + "loss": 2.7518, + "step": 1663 + }, + { + "epoch": 0.1342910176741183, + "grad_norm": 0.7392182350158691, + "learning_rate": 0.00019696729865415077, + "loss": 2.758, + "step": 1664 + }, + { + "epoch": 0.1343717214107013, + "grad_norm": 0.8041971921920776, + "learning_rate": 0.00019696343903308978, + "loss": 2.7485, + "step": 1665 + }, + { + "epoch": 0.1344524251472843, + "grad_norm": 0.789310097694397, + "learning_rate": 0.00019695957699544643, + "loss": 2.8179, + "step": 1666 + }, + { + "epoch": 0.13453312888386731, + "grad_norm": 0.7643609642982483, + "learning_rate": 0.00019695571254131693, + "loss": 2.7791, + "step": 1667 + }, + { + "epoch": 0.13461383262045032, + "grad_norm": 0.8284661769866943, + "learning_rate": 0.00019695184567079766, + "loss": 2.717, + "step": 1668 + }, + { + "epoch": 0.13469453635703332, + "grad_norm": 0.7620903253555298, + "learning_rate": 0.00019694797638398494, + "loss": 2.7808, + "step": 1669 + }, + { + "epoch": 0.13477524009361633, + "grad_norm": 0.9123913645744324, + "learning_rate": 0.00019694410468097524, + "loss": 2.7648, + "step": 1670 + }, + { + "epoch": 0.13485594383019933, + "grad_norm": 0.735518217086792, + "learning_rate": 0.000196940230561865, + "loss": 2.7653, + "step": 1671 + }, + { + "epoch": 0.13493664756678234, + "grad_norm": 0.8363413214683533, + "learning_rate": 0.00019693635402675085, + "loss": 2.766, + "step": 1672 + }, + { + "epoch": 0.13501735130336534, + "grad_norm": 0.8206491470336914, + "learning_rate": 0.00019693247507572936, + "loss": 2.7829, + "step": 1673 + }, + { + "epoch": 0.13509805503994834, + "grad_norm": 0.7726099491119385, + "learning_rate": 0.0001969285937088972, + "loss": 2.7381, + "step": 1674 + }, + { + "epoch": 0.13517875877653135, + "grad_norm": 0.8970316052436829, + "learning_rate": 0.0001969247099263511, + "loss": 2.7836, + "step": 1675 + }, + { + "epoch": 0.13525946251311435, + "grad_norm": 0.7966172099113464, + "learning_rate": 0.00019692082372818788, + "loss": 2.7135, + "step": 1676 + }, + { + "epoch": 0.13534016624969736, + "grad_norm": 0.8583024740219116, + "learning_rate": 0.00019691693511450438, + "loss": 2.7908, + "step": 1677 + }, + { + "epoch": 0.13542086998628036, + "grad_norm": 0.9430457353591919, + "learning_rate": 0.0001969130440853975, + "loss": 2.7311, + "step": 1678 + }, + { + "epoch": 0.13550157372286337, + "grad_norm": 0.8066009879112244, + "learning_rate": 0.00019690915064096424, + "loss": 2.7039, + "step": 1679 + }, + { + "epoch": 0.13558227745944637, + "grad_norm": 1.0169655084609985, + "learning_rate": 0.0001969052547813016, + "loss": 2.7832, + "step": 1680 + }, + { + "epoch": 0.13566298119602938, + "grad_norm": 0.8606080412864685, + "learning_rate": 0.00019690135650650672, + "loss": 2.751, + "step": 1681 + }, + { + "epoch": 0.13574368493261238, + "grad_norm": 0.8625333905220032, + "learning_rate": 0.00019689745581667674, + "loss": 2.761, + "step": 1682 + }, + { + "epoch": 0.13582438866919538, + "grad_norm": 0.9304285645484924, + "learning_rate": 0.00019689355271190886, + "loss": 2.7566, + "step": 1683 + }, + { + "epoch": 0.1359050924057784, + "grad_norm": 0.793397068977356, + "learning_rate": 0.00019688964719230035, + "loss": 2.7648, + "step": 1684 + }, + { + "epoch": 0.1359857961423614, + "grad_norm": 0.8496749401092529, + "learning_rate": 0.00019688573925794858, + "loss": 2.7461, + "step": 1685 + }, + { + "epoch": 0.1360664998789444, + "grad_norm": 0.7807914018630981, + "learning_rate": 0.0001968818289089509, + "loss": 2.8266, + "step": 1686 + }, + { + "epoch": 0.1361472036155274, + "grad_norm": 0.8186607956886292, + "learning_rate": 0.0001968779161454048, + "loss": 2.8447, + "step": 1687 + }, + { + "epoch": 0.1362279073521104, + "grad_norm": 0.8007118701934814, + "learning_rate": 0.0001968740009674078, + "loss": 2.7888, + "step": 1688 + }, + { + "epoch": 0.1363086110886934, + "grad_norm": 0.8735570311546326, + "learning_rate": 0.00019687008337505749, + "loss": 2.7152, + "step": 1689 + }, + { + "epoch": 0.13638931482527641, + "grad_norm": 0.8546476364135742, + "learning_rate": 0.00019686616336845144, + "loss": 2.8113, + "step": 1690 + }, + { + "epoch": 0.13647001856185942, + "grad_norm": 0.9156736135482788, + "learning_rate": 0.0001968622409476874, + "loss": 2.7561, + "step": 1691 + }, + { + "epoch": 0.13655072229844242, + "grad_norm": 0.8091925382614136, + "learning_rate": 0.0001968583161128631, + "loss": 2.7384, + "step": 1692 + }, + { + "epoch": 0.13663142603502543, + "grad_norm": 0.7871039509773254, + "learning_rate": 0.0001968543888640764, + "loss": 2.7138, + "step": 1693 + }, + { + "epoch": 0.13671212977160843, + "grad_norm": 0.9537062048912048, + "learning_rate": 0.00019685045920142516, + "loss": 2.7726, + "step": 1694 + }, + { + "epoch": 0.13679283350819144, + "grad_norm": 0.8663280010223389, + "learning_rate": 0.00019684652712500728, + "loss": 2.7509, + "step": 1695 + }, + { + "epoch": 0.13687353724477444, + "grad_norm": 0.8717214465141296, + "learning_rate": 0.0001968425926349208, + "loss": 2.791, + "step": 1696 + }, + { + "epoch": 0.13695424098135744, + "grad_norm": 0.8942584991455078, + "learning_rate": 0.00019683865573126374, + "loss": 2.77, + "step": 1697 + }, + { + "epoch": 0.13703494471794045, + "grad_norm": 0.8243421316146851, + "learning_rate": 0.00019683471641413424, + "loss": 2.8063, + "step": 1698 + }, + { + "epoch": 0.13711564845452345, + "grad_norm": 0.8618699908256531, + "learning_rate": 0.0001968307746836305, + "loss": 2.6872, + "step": 1699 + }, + { + "epoch": 0.13719635219110646, + "grad_norm": 0.7931695580482483, + "learning_rate": 0.00019682683053985072, + "loss": 2.7495, + "step": 1700 + }, + { + "epoch": 0.13727705592768946, + "grad_norm": 0.7549482583999634, + "learning_rate": 0.00019682288398289324, + "loss": 2.7543, + "step": 1701 + }, + { + "epoch": 0.13735775966427247, + "grad_norm": 0.7953789234161377, + "learning_rate": 0.00019681893501285636, + "loss": 2.6895, + "step": 1702 + }, + { + "epoch": 0.13743846340085547, + "grad_norm": 0.7916574478149414, + "learning_rate": 0.00019681498362983857, + "loss": 2.819, + "step": 1703 + }, + { + "epoch": 0.13751916713743847, + "grad_norm": 0.7986735105514526, + "learning_rate": 0.0001968110298339383, + "loss": 2.8062, + "step": 1704 + }, + { + "epoch": 0.13759987087402148, + "grad_norm": 0.8601658940315247, + "learning_rate": 0.00019680707362525407, + "loss": 2.7625, + "step": 1705 + }, + { + "epoch": 0.13768057461060448, + "grad_norm": 0.8888362050056458, + "learning_rate": 0.00019680311500388454, + "loss": 2.7747, + "step": 1706 + }, + { + "epoch": 0.1377612783471875, + "grad_norm": 0.7762896418571472, + "learning_rate": 0.00019679915396992833, + "loss": 2.7959, + "step": 1707 + }, + { + "epoch": 0.1378419820837705, + "grad_norm": 0.8942253589630127, + "learning_rate": 0.00019679519052348416, + "loss": 2.7717, + "step": 1708 + }, + { + "epoch": 0.13792268582035347, + "grad_norm": 0.8388909697532654, + "learning_rate": 0.00019679122466465082, + "loss": 2.7448, + "step": 1709 + }, + { + "epoch": 0.13800338955693647, + "grad_norm": 0.8826024532318115, + "learning_rate": 0.00019678725639352712, + "loss": 2.7307, + "step": 1710 + }, + { + "epoch": 0.13808409329351948, + "grad_norm": 0.8972313404083252, + "learning_rate": 0.00019678328571021204, + "loss": 2.7619, + "step": 1711 + }, + { + "epoch": 0.13816479703010248, + "grad_norm": 0.9373044371604919, + "learning_rate": 0.00019677931261480444, + "loss": 2.7664, + "step": 1712 + }, + { + "epoch": 0.1382455007666855, + "grad_norm": 0.8060994148254395, + "learning_rate": 0.00019677533710740343, + "loss": 2.7707, + "step": 1713 + }, + { + "epoch": 0.1383262045032685, + "grad_norm": 0.8324100971221924, + "learning_rate": 0.000196771359188108, + "loss": 2.8249, + "step": 1714 + }, + { + "epoch": 0.1384069082398515, + "grad_norm": 0.879176676273346, + "learning_rate": 0.00019676737885701738, + "loss": 2.7767, + "step": 1715 + }, + { + "epoch": 0.1384876119764345, + "grad_norm": 0.8823966979980469, + "learning_rate": 0.0001967633961142307, + "loss": 2.791, + "step": 1716 + }, + { + "epoch": 0.1385683157130175, + "grad_norm": 0.8176039457321167, + "learning_rate": 0.00019675941095984728, + "loss": 2.8225, + "step": 1717 + }, + { + "epoch": 0.1386490194496005, + "grad_norm": 0.8005076050758362, + "learning_rate": 0.00019675542339396635, + "loss": 2.8175, + "step": 1718 + }, + { + "epoch": 0.1387297231861835, + "grad_norm": 0.800854504108429, + "learning_rate": 0.0001967514334166874, + "loss": 2.8226, + "step": 1719 + }, + { + "epoch": 0.13881042692276652, + "grad_norm": 0.7941261529922485, + "learning_rate": 0.00019674744102810978, + "loss": 2.7488, + "step": 1720 + }, + { + "epoch": 0.13889113065934952, + "grad_norm": 0.7955947518348694, + "learning_rate": 0.00019674344622833302, + "loss": 2.7749, + "step": 1721 + }, + { + "epoch": 0.13897183439593253, + "grad_norm": 0.8353856205940247, + "learning_rate": 0.00019673944901745674, + "loss": 2.7982, + "step": 1722 + }, + { + "epoch": 0.13905253813251553, + "grad_norm": 0.8711503744125366, + "learning_rate": 0.00019673544939558047, + "loss": 2.8007, + "step": 1723 + }, + { + "epoch": 0.13913324186909853, + "grad_norm": 0.8525274991989136, + "learning_rate": 0.00019673144736280396, + "loss": 2.7423, + "step": 1724 + }, + { + "epoch": 0.13921394560568154, + "grad_norm": 0.8143991231918335, + "learning_rate": 0.0001967274429192269, + "loss": 2.7752, + "step": 1725 + }, + { + "epoch": 0.13929464934226454, + "grad_norm": 0.8508228063583374, + "learning_rate": 0.00019672343606494912, + "loss": 2.7422, + "step": 1726 + }, + { + "epoch": 0.13937535307884755, + "grad_norm": 0.8320932984352112, + "learning_rate": 0.0001967194268000705, + "loss": 2.7598, + "step": 1727 + }, + { + "epoch": 0.13945605681543055, + "grad_norm": 0.8233908414840698, + "learning_rate": 0.00019671541512469092, + "loss": 2.7834, + "step": 1728 + }, + { + "epoch": 0.13953676055201356, + "grad_norm": 0.8097162246704102, + "learning_rate": 0.00019671140103891038, + "loss": 2.7856, + "step": 1729 + }, + { + "epoch": 0.13961746428859656, + "grad_norm": 0.9043141007423401, + "learning_rate": 0.0001967073845428289, + "loss": 2.8047, + "step": 1730 + }, + { + "epoch": 0.13969816802517956, + "grad_norm": 0.9118517637252808, + "learning_rate": 0.00019670336563654662, + "loss": 2.789, + "step": 1731 + }, + { + "epoch": 0.13977887176176257, + "grad_norm": 0.8016074895858765, + "learning_rate": 0.00019669934432016368, + "loss": 2.7506, + "step": 1732 + }, + { + "epoch": 0.13985957549834557, + "grad_norm": 0.8376848697662354, + "learning_rate": 0.0001966953205937803, + "loss": 2.7832, + "step": 1733 + }, + { + "epoch": 0.13994027923492858, + "grad_norm": 0.8511834144592285, + "learning_rate": 0.0001966912944574968, + "loss": 2.7564, + "step": 1734 + }, + { + "epoch": 0.14002098297151158, + "grad_norm": 0.7796351909637451, + "learning_rate": 0.00019668726591141344, + "loss": 2.7489, + "step": 1735 + }, + { + "epoch": 0.14010168670809459, + "grad_norm": 0.8204767107963562, + "learning_rate": 0.00019668323495563068, + "loss": 2.7634, + "step": 1736 + }, + { + "epoch": 0.1401823904446776, + "grad_norm": 0.9049975872039795, + "learning_rate": 0.000196679201590249, + "loss": 2.7863, + "step": 1737 + }, + { + "epoch": 0.1402630941812606, + "grad_norm": 0.7473673224449158, + "learning_rate": 0.0001966751658153689, + "loss": 2.7557, + "step": 1738 + }, + { + "epoch": 0.1403437979178436, + "grad_norm": 0.7765525579452515, + "learning_rate": 0.0001966711276310909, + "loss": 2.7865, + "step": 1739 + }, + { + "epoch": 0.1404245016544266, + "grad_norm": 0.8766517043113708, + "learning_rate": 0.00019666708703751576, + "loss": 2.7873, + "step": 1740 + }, + { + "epoch": 0.1405052053910096, + "grad_norm": 0.8351505994796753, + "learning_rate": 0.00019666304403474408, + "loss": 2.7355, + "step": 1741 + }, + { + "epoch": 0.1405859091275926, + "grad_norm": 0.7612324953079224, + "learning_rate": 0.00019665899862287667, + "loss": 2.7608, + "step": 1742 + }, + { + "epoch": 0.14066661286417562, + "grad_norm": 0.894249439239502, + "learning_rate": 0.00019665495080201434, + "loss": 2.7469, + "step": 1743 + }, + { + "epoch": 0.14074731660075862, + "grad_norm": 0.8528907895088196, + "learning_rate": 0.00019665090057225803, + "loss": 2.773, + "step": 1744 + }, + { + "epoch": 0.14082802033734163, + "grad_norm": 0.7718498706817627, + "learning_rate": 0.00019664684793370855, + "loss": 2.8045, + "step": 1745 + }, + { + "epoch": 0.14090872407392463, + "grad_norm": 0.8013718128204346, + "learning_rate": 0.00019664279288646706, + "loss": 2.7665, + "step": 1746 + }, + { + "epoch": 0.14098942781050763, + "grad_norm": 0.828803539276123, + "learning_rate": 0.00019663873543063448, + "loss": 2.7846, + "step": 1747 + }, + { + "epoch": 0.14107013154709064, + "grad_norm": 0.8349393606185913, + "learning_rate": 0.00019663467556631204, + "loss": 2.7405, + "step": 1748 + }, + { + "epoch": 0.14115083528367364, + "grad_norm": 0.8273345232009888, + "learning_rate": 0.00019663061329360085, + "loss": 2.7578, + "step": 1749 + }, + { + "epoch": 0.14123153902025665, + "grad_norm": 0.7989444136619568, + "learning_rate": 0.0001966265486126022, + "loss": 2.739, + "step": 1750 + }, + { + "epoch": 0.14131224275683965, + "grad_norm": 0.8690519332885742, + "learning_rate": 0.00019662248152341736, + "loss": 2.7566, + "step": 1751 + }, + { + "epoch": 0.14139294649342266, + "grad_norm": 0.8453623056411743, + "learning_rate": 0.0001966184120261477, + "loss": 2.8572, + "step": 1752 + }, + { + "epoch": 0.14147365023000566, + "grad_norm": 0.8396254777908325, + "learning_rate": 0.00019661434012089468, + "loss": 2.786, + "step": 1753 + }, + { + "epoch": 0.14155435396658866, + "grad_norm": 0.7643738389015198, + "learning_rate": 0.00019661026580775973, + "loss": 2.8193, + "step": 1754 + }, + { + "epoch": 0.14163505770317167, + "grad_norm": 0.8124154806137085, + "learning_rate": 0.00019660618908684443, + "loss": 2.7754, + "step": 1755 + }, + { + "epoch": 0.14171576143975467, + "grad_norm": 0.8620683550834656, + "learning_rate": 0.00019660210995825036, + "loss": 2.7827, + "step": 1756 + }, + { + "epoch": 0.14179646517633768, + "grad_norm": 0.8241196274757385, + "learning_rate": 0.0001965980284220792, + "loss": 2.7573, + "step": 1757 + }, + { + "epoch": 0.14187716891292068, + "grad_norm": 0.8264089822769165, + "learning_rate": 0.00019659394447843262, + "loss": 2.8214, + "step": 1758 + }, + { + "epoch": 0.14195787264950369, + "grad_norm": 0.9129722118377686, + "learning_rate": 0.00019658985812741247, + "loss": 2.7962, + "step": 1759 + }, + { + "epoch": 0.14203857638608666, + "grad_norm": 0.7976365089416504, + "learning_rate": 0.00019658576936912057, + "loss": 2.7534, + "step": 1760 + }, + { + "epoch": 0.14211928012266967, + "grad_norm": 0.7587228417396545, + "learning_rate": 0.00019658167820365882, + "loss": 2.7083, + "step": 1761 + }, + { + "epoch": 0.14219998385925267, + "grad_norm": 0.757882833480835, + "learning_rate": 0.00019657758463112918, + "loss": 2.7135, + "step": 1762 + }, + { + "epoch": 0.14228068759583568, + "grad_norm": 0.8541501760482788, + "learning_rate": 0.00019657348865163369, + "loss": 2.7833, + "step": 1763 + }, + { + "epoch": 0.14236139133241868, + "grad_norm": 0.7708966135978699, + "learning_rate": 0.00019656939026527442, + "loss": 2.7128, + "step": 1764 + }, + { + "epoch": 0.14244209506900168, + "grad_norm": 0.8733000159263611, + "learning_rate": 0.00019656528947215347, + "loss": 2.7597, + "step": 1765 + }, + { + "epoch": 0.1425227988055847, + "grad_norm": 0.7913360595703125, + "learning_rate": 0.0001965611862723731, + "loss": 2.7681, + "step": 1766 + }, + { + "epoch": 0.1426035025421677, + "grad_norm": 0.8692380785942078, + "learning_rate": 0.00019655708066603555, + "loss": 2.7587, + "step": 1767 + }, + { + "epoch": 0.1426842062787507, + "grad_norm": 0.8231006860733032, + "learning_rate": 0.00019655297265324317, + "loss": 2.772, + "step": 1768 + }, + { + "epoch": 0.1427649100153337, + "grad_norm": 0.7373722791671753, + "learning_rate": 0.0001965488622340983, + "loss": 2.7875, + "step": 1769 + }, + { + "epoch": 0.1428456137519167, + "grad_norm": 0.8614751696586609, + "learning_rate": 0.0001965447494087034, + "loss": 2.7962, + "step": 1770 + }, + { + "epoch": 0.1429263174884997, + "grad_norm": 0.8336494565010071, + "learning_rate": 0.000196540634177161, + "loss": 2.7072, + "step": 1771 + }, + { + "epoch": 0.14300702122508271, + "grad_norm": 0.844292163848877, + "learning_rate": 0.00019653651653957362, + "loss": 2.8043, + "step": 1772 + }, + { + "epoch": 0.14308772496166572, + "grad_norm": 0.7366824150085449, + "learning_rate": 0.0001965323964960439, + "loss": 2.7296, + "step": 1773 + }, + { + "epoch": 0.14316842869824872, + "grad_norm": 0.75767982006073, + "learning_rate": 0.0001965282740466745, + "loss": 2.7946, + "step": 1774 + }, + { + "epoch": 0.14324913243483173, + "grad_norm": 0.8361382484436035, + "learning_rate": 0.00019652414919156823, + "loss": 2.7232, + "step": 1775 + }, + { + "epoch": 0.14332983617141473, + "grad_norm": 0.8473719358444214, + "learning_rate": 0.0001965200219308278, + "loss": 2.774, + "step": 1776 + }, + { + "epoch": 0.14341053990799774, + "grad_norm": 0.7446423172950745, + "learning_rate": 0.00019651589226455613, + "loss": 2.7439, + "step": 1777 + }, + { + "epoch": 0.14349124364458074, + "grad_norm": 0.8332851529121399, + "learning_rate": 0.00019651176019285616, + "loss": 2.7891, + "step": 1778 + }, + { + "epoch": 0.14357194738116374, + "grad_norm": 0.885313868522644, + "learning_rate": 0.0001965076257158308, + "loss": 2.7677, + "step": 1779 + }, + { + "epoch": 0.14365265111774675, + "grad_norm": 0.8506965637207031, + "learning_rate": 0.00019650348883358315, + "loss": 2.8112, + "step": 1780 + }, + { + "epoch": 0.14373335485432975, + "grad_norm": 0.8415799736976624, + "learning_rate": 0.0001964993495462163, + "loss": 2.8242, + "step": 1781 + }, + { + "epoch": 0.14381405859091276, + "grad_norm": 0.8501513004302979, + "learning_rate": 0.00019649520785383338, + "loss": 2.8352, + "step": 1782 + }, + { + "epoch": 0.14389476232749576, + "grad_norm": 0.7839778065681458, + "learning_rate": 0.00019649106375653767, + "loss": 2.7194, + "step": 1783 + }, + { + "epoch": 0.14397546606407877, + "grad_norm": 0.8013346195220947, + "learning_rate": 0.00019648691725443243, + "loss": 2.7665, + "step": 1784 + }, + { + "epoch": 0.14405616980066177, + "grad_norm": 1.0338317155838013, + "learning_rate": 0.00019648276834762095, + "loss": 2.8599, + "step": 1785 + }, + { + "epoch": 0.14413687353724478, + "grad_norm": 0.898417592048645, + "learning_rate": 0.0001964786170362067, + "loss": 2.7192, + "step": 1786 + }, + { + "epoch": 0.14421757727382778, + "grad_norm": 0.8876320123672485, + "learning_rate": 0.00019647446332029313, + "loss": 2.7722, + "step": 1787 + }, + { + "epoch": 0.14429828101041078, + "grad_norm": 0.819461464881897, + "learning_rate": 0.00019647030719998373, + "loss": 2.7698, + "step": 1788 + }, + { + "epoch": 0.1443789847469938, + "grad_norm": 0.848380446434021, + "learning_rate": 0.0001964661486753821, + "loss": 2.7894, + "step": 1789 + }, + { + "epoch": 0.1444596884835768, + "grad_norm": 0.8343753814697266, + "learning_rate": 0.0001964619877465919, + "loss": 2.699, + "step": 1790 + }, + { + "epoch": 0.1445403922201598, + "grad_norm": 0.8718340396881104, + "learning_rate": 0.0001964578244137168, + "loss": 2.7313, + "step": 1791 + }, + { + "epoch": 0.1446210959567428, + "grad_norm": 0.866122841835022, + "learning_rate": 0.00019645365867686056, + "loss": 2.7112, + "step": 1792 + }, + { + "epoch": 0.1447017996933258, + "grad_norm": 0.8351789712905884, + "learning_rate": 0.000196449490536127, + "loss": 2.7765, + "step": 1793 + }, + { + "epoch": 0.1447825034299088, + "grad_norm": 0.8628408312797546, + "learning_rate": 0.00019644531999162004, + "loss": 2.7375, + "step": 1794 + }, + { + "epoch": 0.14486320716649181, + "grad_norm": 0.8414484858512878, + "learning_rate": 0.00019644114704344358, + "loss": 2.7502, + "step": 1795 + }, + { + "epoch": 0.14494391090307482, + "grad_norm": 0.9092586636543274, + "learning_rate": 0.00019643697169170166, + "loss": 2.7714, + "step": 1796 + }, + { + "epoch": 0.14502461463965782, + "grad_norm": 0.8458060622215271, + "learning_rate": 0.0001964327939364983, + "loss": 2.8376, + "step": 1797 + }, + { + "epoch": 0.14510531837624083, + "grad_norm": 0.8150759935379028, + "learning_rate": 0.00019642861377793764, + "loss": 2.7147, + "step": 1798 + }, + { + "epoch": 0.14518602211282383, + "grad_norm": 0.9008790850639343, + "learning_rate": 0.00019642443121612387, + "loss": 2.7786, + "step": 1799 + }, + { + "epoch": 0.14526672584940684, + "grad_norm": 0.848671555519104, + "learning_rate": 0.00019642024625116117, + "loss": 2.7813, + "step": 1800 + }, + { + "epoch": 0.14534742958598984, + "grad_norm": 0.8035007119178772, + "learning_rate": 0.00019641605888315393, + "loss": 2.7988, + "step": 1801 + }, + { + "epoch": 0.14542813332257284, + "grad_norm": 0.8210242390632629, + "learning_rate": 0.00019641186911220645, + "loss": 2.8451, + "step": 1802 + }, + { + "epoch": 0.14550883705915585, + "grad_norm": 0.8852066397666931, + "learning_rate": 0.00019640767693842318, + "loss": 2.7492, + "step": 1803 + }, + { + "epoch": 0.14558954079573885, + "grad_norm": 0.8421196937561035, + "learning_rate": 0.0001964034823619086, + "loss": 2.759, + "step": 1804 + }, + { + "epoch": 0.14567024453232186, + "grad_norm": 0.8166298866271973, + "learning_rate": 0.00019639928538276724, + "loss": 2.7942, + "step": 1805 + }, + { + "epoch": 0.14575094826890486, + "grad_norm": 0.8502809405326843, + "learning_rate": 0.00019639508600110368, + "loss": 2.7829, + "step": 1806 + }, + { + "epoch": 0.14583165200548787, + "grad_norm": 0.8371078372001648, + "learning_rate": 0.0001963908842170226, + "loss": 2.7168, + "step": 1807 + }, + { + "epoch": 0.14591235574207087, + "grad_norm": 0.8148230910301208, + "learning_rate": 0.0001963866800306287, + "loss": 2.7706, + "step": 1808 + }, + { + "epoch": 0.14599305947865387, + "grad_norm": 0.8984564542770386, + "learning_rate": 0.0001963824734420268, + "loss": 2.7761, + "step": 1809 + }, + { + "epoch": 0.14607376321523688, + "grad_norm": 0.9357183575630188, + "learning_rate": 0.00019637826445132172, + "loss": 2.7738, + "step": 1810 + }, + { + "epoch": 0.14615446695181986, + "grad_norm": 0.8545449376106262, + "learning_rate": 0.00019637405305861834, + "loss": 2.772, + "step": 1811 + }, + { + "epoch": 0.14623517068840286, + "grad_norm": 1.1674948930740356, + "learning_rate": 0.00019636983926402165, + "loss": 2.8988, + "step": 1812 + }, + { + "epoch": 0.14631587442498586, + "grad_norm": 0.7875451445579529, + "learning_rate": 0.00019636562306763665, + "loss": 2.7053, + "step": 1813 + }, + { + "epoch": 0.14639657816156887, + "grad_norm": 0.8980962038040161, + "learning_rate": 0.0001963614044695684, + "loss": 2.7731, + "step": 1814 + }, + { + "epoch": 0.14647728189815187, + "grad_norm": 0.8403381705284119, + "learning_rate": 0.00019635718346992207, + "loss": 2.8555, + "step": 1815 + }, + { + "epoch": 0.14655798563473488, + "grad_norm": 0.8736433982849121, + "learning_rate": 0.00019635296006880284, + "loss": 2.7918, + "step": 1816 + }, + { + "epoch": 0.14663868937131788, + "grad_norm": 0.8604151606559753, + "learning_rate": 0.000196348734266316, + "loss": 2.7493, + "step": 1817 + }, + { + "epoch": 0.1467193931079009, + "grad_norm": 0.8329424262046814, + "learning_rate": 0.00019634450606256681, + "loss": 2.7348, + "step": 1818 + }, + { + "epoch": 0.1468000968444839, + "grad_norm": 0.9835913181304932, + "learning_rate": 0.0001963402754576607, + "loss": 2.7651, + "step": 1819 + }, + { + "epoch": 0.1468808005810669, + "grad_norm": 0.7968378067016602, + "learning_rate": 0.0001963360424517031, + "loss": 2.7672, + "step": 1820 + }, + { + "epoch": 0.1469615043176499, + "grad_norm": 0.8012512922286987, + "learning_rate": 0.00019633180704479948, + "loss": 2.8022, + "step": 1821 + }, + { + "epoch": 0.1470422080542329, + "grad_norm": 0.7656376957893372, + "learning_rate": 0.0001963275692370554, + "loss": 2.7561, + "step": 1822 + }, + { + "epoch": 0.1471229117908159, + "grad_norm": 0.8030453324317932, + "learning_rate": 0.00019632332902857656, + "loss": 2.8048, + "step": 1823 + }, + { + "epoch": 0.1472036155273989, + "grad_norm": 0.8050903677940369, + "learning_rate": 0.0001963190864194685, + "loss": 2.7846, + "step": 1824 + }, + { + "epoch": 0.14728431926398192, + "grad_norm": 0.8001886606216431, + "learning_rate": 0.00019631484140983705, + "loss": 2.7382, + "step": 1825 + }, + { + "epoch": 0.14736502300056492, + "grad_norm": 0.8589862585067749, + "learning_rate": 0.00019631059399978796, + "loss": 2.8376, + "step": 1826 + }, + { + "epoch": 0.14744572673714793, + "grad_norm": 0.86325603723526, + "learning_rate": 0.00019630634418942714, + "loss": 2.7643, + "step": 1827 + }, + { + "epoch": 0.14752643047373093, + "grad_norm": 0.7893280386924744, + "learning_rate": 0.00019630209197886046, + "loss": 2.713, + "step": 1828 + }, + { + "epoch": 0.14760713421031393, + "grad_norm": 0.8890528082847595, + "learning_rate": 0.00019629783736819394, + "loss": 2.7435, + "step": 1829 + }, + { + "epoch": 0.14768783794689694, + "grad_norm": 0.794924795627594, + "learning_rate": 0.00019629358035753357, + "loss": 2.7703, + "step": 1830 + }, + { + "epoch": 0.14776854168347994, + "grad_norm": 0.7712973952293396, + "learning_rate": 0.00019628932094698545, + "loss": 2.7487, + "step": 1831 + }, + { + "epoch": 0.14784924542006295, + "grad_norm": 0.7810670137405396, + "learning_rate": 0.00019628505913665576, + "loss": 2.7687, + "step": 1832 + }, + { + "epoch": 0.14792994915664595, + "grad_norm": 0.8331059813499451, + "learning_rate": 0.0001962807949266507, + "loss": 2.7166, + "step": 1833 + }, + { + "epoch": 0.14801065289322896, + "grad_norm": 0.8983452916145325, + "learning_rate": 0.00019627652831707656, + "loss": 2.8096, + "step": 1834 + }, + { + "epoch": 0.14809135662981196, + "grad_norm": 0.8387179374694824, + "learning_rate": 0.00019627225930803963, + "loss": 2.8252, + "step": 1835 + }, + { + "epoch": 0.14817206036639496, + "grad_norm": 0.8619294762611389, + "learning_rate": 0.0001962679878996464, + "loss": 2.7623, + "step": 1836 + }, + { + "epoch": 0.14825276410297797, + "grad_norm": 0.8195026516914368, + "learning_rate": 0.0001962637140920032, + "loss": 2.7295, + "step": 1837 + }, + { + "epoch": 0.14833346783956097, + "grad_norm": 0.806216835975647, + "learning_rate": 0.00019625943788521664, + "loss": 2.7184, + "step": 1838 + }, + { + "epoch": 0.14841417157614398, + "grad_norm": 0.7758379578590393, + "learning_rate": 0.00019625515927939327, + "loss": 2.7675, + "step": 1839 + }, + { + "epoch": 0.14849487531272698, + "grad_norm": 0.7617168426513672, + "learning_rate": 0.0001962508782746397, + "loss": 2.8041, + "step": 1840 + }, + { + "epoch": 0.14857557904930999, + "grad_norm": 0.9630066156387329, + "learning_rate": 0.00019624659487106264, + "loss": 2.814, + "step": 1841 + }, + { + "epoch": 0.148656282785893, + "grad_norm": 0.7656112313270569, + "learning_rate": 0.00019624230906876888, + "loss": 2.7564, + "step": 1842 + }, + { + "epoch": 0.148736986522476, + "grad_norm": 0.9394779801368713, + "learning_rate": 0.0001962380208678652, + "loss": 2.7958, + "step": 1843 + }, + { + "epoch": 0.148817690259059, + "grad_norm": 0.7647004127502441, + "learning_rate": 0.00019623373026845842, + "loss": 2.72, + "step": 1844 + }, + { + "epoch": 0.148898393995642, + "grad_norm": 0.809079647064209, + "learning_rate": 0.00019622943727065555, + "loss": 2.7732, + "step": 1845 + }, + { + "epoch": 0.148979097732225, + "grad_norm": 0.8241337537765503, + "learning_rate": 0.00019622514187456357, + "loss": 2.759, + "step": 1846 + }, + { + "epoch": 0.149059801468808, + "grad_norm": 0.8979619145393372, + "learning_rate": 0.00019622084408028948, + "loss": 2.8307, + "step": 1847 + }, + { + "epoch": 0.14914050520539102, + "grad_norm": 0.8058865666389465, + "learning_rate": 0.00019621654388794047, + "loss": 2.807, + "step": 1848 + }, + { + "epoch": 0.14922120894197402, + "grad_norm": 0.81967693567276, + "learning_rate": 0.00019621224129762364, + "loss": 2.7762, + "step": 1849 + }, + { + "epoch": 0.14930191267855702, + "grad_norm": 0.7385755777359009, + "learning_rate": 0.0001962079363094463, + "loss": 2.7854, + "step": 1850 + }, + { + "epoch": 0.14938261641514003, + "grad_norm": 0.8585657477378845, + "learning_rate": 0.00019620362892351566, + "loss": 2.7781, + "step": 1851 + }, + { + "epoch": 0.14946332015172303, + "grad_norm": 0.8328986763954163, + "learning_rate": 0.00019619931913993912, + "loss": 2.8245, + "step": 1852 + }, + { + "epoch": 0.14954402388830604, + "grad_norm": 0.749727189540863, + "learning_rate": 0.0001961950069588241, + "loss": 2.8049, + "step": 1853 + }, + { + "epoch": 0.14962472762488904, + "grad_norm": 0.7886502742767334, + "learning_rate": 0.00019619069238027803, + "loss": 2.7521, + "step": 1854 + }, + { + "epoch": 0.14970543136147205, + "grad_norm": 0.816137433052063, + "learning_rate": 0.00019618637540440848, + "loss": 2.8383, + "step": 1855 + }, + { + "epoch": 0.14978613509805505, + "grad_norm": 0.80442214012146, + "learning_rate": 0.000196182056031323, + "loss": 2.7227, + "step": 1856 + }, + { + "epoch": 0.14986683883463806, + "grad_norm": 0.7605221271514893, + "learning_rate": 0.00019617773426112924, + "loss": 2.7494, + "step": 1857 + }, + { + "epoch": 0.14994754257122106, + "grad_norm": 0.8745137453079224, + "learning_rate": 0.00019617341009393497, + "loss": 2.6978, + "step": 1858 + }, + { + "epoch": 0.15002824630780406, + "grad_norm": 0.8151741623878479, + "learning_rate": 0.00019616908352984789, + "loss": 2.7817, + "step": 1859 + }, + { + "epoch": 0.15010895004438707, + "grad_norm": 0.773876428604126, + "learning_rate": 0.0001961647545689759, + "loss": 2.812, + "step": 1860 + }, + { + "epoch": 0.15018965378097007, + "grad_norm": 0.8216966390609741, + "learning_rate": 0.00019616042321142683, + "loss": 2.8181, + "step": 1861 + }, + { + "epoch": 0.15027035751755305, + "grad_norm": 0.8097409605979919, + "learning_rate": 0.00019615608945730862, + "loss": 2.8336, + "step": 1862 + }, + { + "epoch": 0.15035106125413605, + "grad_norm": 0.8085697293281555, + "learning_rate": 0.00019615175330672932, + "loss": 2.8176, + "step": 1863 + }, + { + "epoch": 0.15043176499071906, + "grad_norm": 0.7658133506774902, + "learning_rate": 0.00019614741475979701, + "loss": 2.7543, + "step": 1864 + }, + { + "epoch": 0.15051246872730206, + "grad_norm": 0.7193909883499146, + "learning_rate": 0.00019614307381661978, + "loss": 2.7475, + "step": 1865 + }, + { + "epoch": 0.15059317246388507, + "grad_norm": 0.835608959197998, + "learning_rate": 0.0001961387304773058, + "loss": 2.8017, + "step": 1866 + }, + { + "epoch": 0.15067387620046807, + "grad_norm": 0.7898489832878113, + "learning_rate": 0.0001961343847419634, + "loss": 2.7613, + "step": 1867 + }, + { + "epoch": 0.15075457993705108, + "grad_norm": 0.8031982183456421, + "learning_rate": 0.0001961300366107008, + "loss": 2.7442, + "step": 1868 + }, + { + "epoch": 0.15083528367363408, + "grad_norm": 0.8427363634109497, + "learning_rate": 0.00019612568608362642, + "loss": 2.8095, + "step": 1869 + }, + { + "epoch": 0.15091598741021708, + "grad_norm": 0.8282802700996399, + "learning_rate": 0.00019612133316084863, + "loss": 2.7216, + "step": 1870 + }, + { + "epoch": 0.1509966911468001, + "grad_norm": 0.7799758911132812, + "learning_rate": 0.000196116977842476, + "loss": 2.793, + "step": 1871 + }, + { + "epoch": 0.1510773948833831, + "grad_norm": 0.8151525259017944, + "learning_rate": 0.00019611262012861702, + "loss": 2.7641, + "step": 1872 + }, + { + "epoch": 0.1511580986199661, + "grad_norm": 0.7926812767982483, + "learning_rate": 0.0001961082600193803, + "loss": 2.7523, + "step": 1873 + }, + { + "epoch": 0.1512388023565491, + "grad_norm": 0.8737135529518127, + "learning_rate": 0.0001961038975148745, + "loss": 2.7965, + "step": 1874 + }, + { + "epoch": 0.1513195060931321, + "grad_norm": 0.7948090434074402, + "learning_rate": 0.00019609953261520837, + "loss": 2.7737, + "step": 1875 + }, + { + "epoch": 0.1514002098297151, + "grad_norm": 0.8161277770996094, + "learning_rate": 0.0001960951653204907, + "loss": 2.7423, + "step": 1876 + }, + { + "epoch": 0.15148091356629811, + "grad_norm": 0.8904973864555359, + "learning_rate": 0.00019609079563083026, + "loss": 2.7066, + "step": 1877 + }, + { + "epoch": 0.15156161730288112, + "grad_norm": 0.8107061982154846, + "learning_rate": 0.00019608642354633604, + "loss": 2.7939, + "step": 1878 + }, + { + "epoch": 0.15164232103946412, + "grad_norm": 0.8410987854003906, + "learning_rate": 0.00019608204906711694, + "loss": 2.7521, + "step": 1879 + }, + { + "epoch": 0.15172302477604713, + "grad_norm": 0.8336483836174011, + "learning_rate": 0.0001960776721932821, + "loss": 2.7613, + "step": 1880 + }, + { + "epoch": 0.15180372851263013, + "grad_norm": 0.730549156665802, + "learning_rate": 0.00019607329292494044, + "loss": 2.8019, + "step": 1881 + }, + { + "epoch": 0.15188443224921314, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001960689112622012, + "loss": 2.6907, + "step": 1882 + }, + { + "epoch": 0.15196513598579614, + "grad_norm": 0.848414421081543, + "learning_rate": 0.00019606452720517359, + "loss": 2.7278, + "step": 1883 + }, + { + "epoch": 0.15204583972237914, + "grad_norm": 0.8331718444824219, + "learning_rate": 0.00019606014075396682, + "loss": 2.6994, + "step": 1884 + }, + { + "epoch": 0.15212654345896215, + "grad_norm": 0.9192764759063721, + "learning_rate": 0.00019605575190869025, + "loss": 2.7095, + "step": 1885 + }, + { + "epoch": 0.15220724719554515, + "grad_norm": 0.8377116322517395, + "learning_rate": 0.00019605136066945324, + "loss": 2.7925, + "step": 1886 + }, + { + "epoch": 0.15228795093212816, + "grad_norm": 0.7302869558334351, + "learning_rate": 0.00019604696703636525, + "loss": 2.7286, + "step": 1887 + }, + { + "epoch": 0.15236865466871116, + "grad_norm": 0.7972438335418701, + "learning_rate": 0.00019604257100953577, + "loss": 2.7732, + "step": 1888 + }, + { + "epoch": 0.15244935840529417, + "grad_norm": 1.0350826978683472, + "learning_rate": 0.00019603817258907435, + "loss": 2.8211, + "step": 1889 + }, + { + "epoch": 0.15253006214187717, + "grad_norm": 0.782755970954895, + "learning_rate": 0.00019603377177509067, + "loss": 2.8489, + "step": 1890 + }, + { + "epoch": 0.15261076587846018, + "grad_norm": 0.9072603583335876, + "learning_rate": 0.0001960293685676943, + "loss": 2.7764, + "step": 1891 + }, + { + "epoch": 0.15269146961504318, + "grad_norm": 0.7878704071044922, + "learning_rate": 0.0001960249629669951, + "loss": 2.7494, + "step": 1892 + }, + { + "epoch": 0.15277217335162618, + "grad_norm": 0.8770418167114258, + "learning_rate": 0.00019602055497310278, + "loss": 2.7318, + "step": 1893 + }, + { + "epoch": 0.1528528770882092, + "grad_norm": 0.8004975914955139, + "learning_rate": 0.00019601614458612723, + "loss": 2.7272, + "step": 1894 + }, + { + "epoch": 0.1529335808247922, + "grad_norm": 0.8511070013046265, + "learning_rate": 0.00019601173180617835, + "loss": 2.7876, + "step": 1895 + }, + { + "epoch": 0.1530142845613752, + "grad_norm": 0.7946128845214844, + "learning_rate": 0.00019600731663336617, + "loss": 2.7435, + "step": 1896 + }, + { + "epoch": 0.1530949882979582, + "grad_norm": 0.8155317902565002, + "learning_rate": 0.00019600289906780067, + "loss": 2.7642, + "step": 1897 + }, + { + "epoch": 0.1531756920345412, + "grad_norm": 0.8086098432540894, + "learning_rate": 0.000195998479109592, + "loss": 2.7358, + "step": 1898 + }, + { + "epoch": 0.1532563957711242, + "grad_norm": 0.8698278665542603, + "learning_rate": 0.00019599405675885026, + "loss": 2.725, + "step": 1899 + }, + { + "epoch": 0.15333709950770721, + "grad_norm": 0.8756006360054016, + "learning_rate": 0.00019598963201568573, + "loss": 2.7209, + "step": 1900 + }, + { + "epoch": 0.15341780324429022, + "grad_norm": 0.7984628081321716, + "learning_rate": 0.0001959852048802086, + "loss": 2.7685, + "step": 1901 + }, + { + "epoch": 0.15349850698087322, + "grad_norm": 0.8244056105613708, + "learning_rate": 0.0001959807753525293, + "loss": 2.7692, + "step": 1902 + }, + { + "epoch": 0.15357921071745623, + "grad_norm": 0.8577731251716614, + "learning_rate": 0.00019597634343275814, + "loss": 2.7571, + "step": 1903 + }, + { + "epoch": 0.15365991445403923, + "grad_norm": 0.8410975933074951, + "learning_rate": 0.00019597190912100566, + "loss": 2.7862, + "step": 1904 + }, + { + "epoch": 0.15374061819062224, + "grad_norm": 0.9094158411026001, + "learning_rate": 0.0001959674724173823, + "loss": 2.7655, + "step": 1905 + }, + { + "epoch": 0.15382132192720524, + "grad_norm": 0.8375208973884583, + "learning_rate": 0.00019596303332199868, + "loss": 2.8129, + "step": 1906 + }, + { + "epoch": 0.15390202566378824, + "grad_norm": 0.8335977792739868, + "learning_rate": 0.00019595859183496543, + "loss": 2.7835, + "step": 1907 + }, + { + "epoch": 0.15398272940037125, + "grad_norm": 0.7973531484603882, + "learning_rate": 0.0001959541479563932, + "loss": 2.7785, + "step": 1908 + }, + { + "epoch": 0.15406343313695425, + "grad_norm": 0.7808824181556702, + "learning_rate": 0.0001959497016863928, + "loss": 2.7862, + "step": 1909 + }, + { + "epoch": 0.15414413687353726, + "grad_norm": 0.853824257850647, + "learning_rate": 0.00019594525302507504, + "loss": 2.6721, + "step": 1910 + }, + { + "epoch": 0.15422484061012026, + "grad_norm": 0.8589324355125427, + "learning_rate": 0.00019594080197255073, + "loss": 2.7948, + "step": 1911 + }, + { + "epoch": 0.15430554434670327, + "grad_norm": 0.7951898574829102, + "learning_rate": 0.00019593634852893086, + "loss": 2.7903, + "step": 1912 + }, + { + "epoch": 0.15438624808328624, + "grad_norm": 0.8333349227905273, + "learning_rate": 0.0001959318926943264, + "loss": 2.8073, + "step": 1913 + }, + { + "epoch": 0.15446695181986925, + "grad_norm": 0.8552380800247192, + "learning_rate": 0.0001959274344688484, + "loss": 2.8199, + "step": 1914 + }, + { + "epoch": 0.15454765555645225, + "grad_norm": 0.8356214165687561, + "learning_rate": 0.000195922973852608, + "loss": 2.7985, + "step": 1915 + }, + { + "epoch": 0.15462835929303526, + "grad_norm": 0.7167248725891113, + "learning_rate": 0.00019591851084571634, + "loss": 2.6802, + "step": 1916 + }, + { + "epoch": 0.15470906302961826, + "grad_norm": 0.7980726361274719, + "learning_rate": 0.00019591404544828464, + "loss": 2.692, + "step": 1917 + }, + { + "epoch": 0.15478976676620126, + "grad_norm": 0.7766004800796509, + "learning_rate": 0.00019590957766042424, + "loss": 2.7219, + "step": 1918 + }, + { + "epoch": 0.15487047050278427, + "grad_norm": 0.828852653503418, + "learning_rate": 0.0001959051074822464, + "loss": 2.7369, + "step": 1919 + }, + { + "epoch": 0.15495117423936727, + "grad_norm": 0.7818129062652588, + "learning_rate": 0.0001959006349138626, + "loss": 2.7778, + "step": 1920 + }, + { + "epoch": 0.15503187797595028, + "grad_norm": 0.8428593873977661, + "learning_rate": 0.00019589615995538432, + "loss": 2.8257, + "step": 1921 + }, + { + "epoch": 0.15511258171253328, + "grad_norm": 0.8756616115570068, + "learning_rate": 0.00019589168260692307, + "loss": 2.7692, + "step": 1922 + }, + { + "epoch": 0.15519328544911629, + "grad_norm": 0.7802519202232361, + "learning_rate": 0.0001958872028685904, + "loss": 2.7811, + "step": 1923 + }, + { + "epoch": 0.1552739891856993, + "grad_norm": 0.7787032723426819, + "learning_rate": 0.00019588272074049797, + "loss": 2.7546, + "step": 1924 + }, + { + "epoch": 0.1553546929222823, + "grad_norm": 0.848479151725769, + "learning_rate": 0.0001958782362227575, + "loss": 2.7759, + "step": 1925 + }, + { + "epoch": 0.1554353966588653, + "grad_norm": 0.8331353664398193, + "learning_rate": 0.00019587374931548076, + "loss": 2.7881, + "step": 1926 + }, + { + "epoch": 0.1555161003954483, + "grad_norm": 0.8646424412727356, + "learning_rate": 0.00019586926001877958, + "loss": 2.8059, + "step": 1927 + }, + { + "epoch": 0.1555968041320313, + "grad_norm": 0.912253737449646, + "learning_rate": 0.00019586476833276584, + "loss": 2.7446, + "step": 1928 + }, + { + "epoch": 0.1556775078686143, + "grad_norm": 0.9256471395492554, + "learning_rate": 0.00019586027425755147, + "loss": 2.8, + "step": 1929 + }, + { + "epoch": 0.15575821160519732, + "grad_norm": 1.0984607934951782, + "learning_rate": 0.0001958557777932485, + "loss": 2.7759, + "step": 1930 + }, + { + "epoch": 0.15583891534178032, + "grad_norm": 0.8736081123352051, + "learning_rate": 0.00019585127893996895, + "loss": 2.7464, + "step": 1931 + }, + { + "epoch": 0.15591961907836333, + "grad_norm": 0.932538628578186, + "learning_rate": 0.00019584677769782498, + "loss": 2.7874, + "step": 1932 + }, + { + "epoch": 0.15600032281494633, + "grad_norm": 0.9742087125778198, + "learning_rate": 0.0001958422740669288, + "loss": 2.7727, + "step": 1933 + }, + { + "epoch": 0.15608102655152933, + "grad_norm": 0.8975874781608582, + "learning_rate": 0.00019583776804739256, + "loss": 2.7812, + "step": 1934 + }, + { + "epoch": 0.15616173028811234, + "grad_norm": 0.9380232691764832, + "learning_rate": 0.00019583325963932864, + "loss": 2.7284, + "step": 1935 + }, + { + "epoch": 0.15624243402469534, + "grad_norm": 0.8332872986793518, + "learning_rate": 0.00019582874884284938, + "loss": 2.7792, + "step": 1936 + }, + { + "epoch": 0.15632313776127835, + "grad_norm": 1.0017194747924805, + "learning_rate": 0.0001958242356580672, + "loss": 2.7187, + "step": 1937 + }, + { + "epoch": 0.15640384149786135, + "grad_norm": 0.9433515667915344, + "learning_rate": 0.0001958197200850946, + "loss": 2.8394, + "step": 1938 + }, + { + "epoch": 0.15648454523444436, + "grad_norm": 0.8781030178070068, + "learning_rate": 0.00019581520212404407, + "loss": 2.7667, + "step": 1939 + }, + { + "epoch": 0.15656524897102736, + "grad_norm": 0.895656168460846, + "learning_rate": 0.00019581068177502826, + "loss": 2.799, + "step": 1940 + }, + { + "epoch": 0.15664595270761036, + "grad_norm": 0.8336960673332214, + "learning_rate": 0.0001958061590381598, + "loss": 2.8152, + "step": 1941 + }, + { + "epoch": 0.15672665644419337, + "grad_norm": 0.9184536337852478, + "learning_rate": 0.00019580163391355143, + "loss": 2.7746, + "step": 1942 + }, + { + "epoch": 0.15680736018077637, + "grad_norm": 0.8564908504486084, + "learning_rate": 0.00019579710640131587, + "loss": 2.7674, + "step": 1943 + }, + { + "epoch": 0.15688806391735938, + "grad_norm": 0.7491608262062073, + "learning_rate": 0.00019579257650156605, + "loss": 2.7665, + "step": 1944 + }, + { + "epoch": 0.15696876765394238, + "grad_norm": 0.9165031313896179, + "learning_rate": 0.00019578804421441478, + "loss": 2.7343, + "step": 1945 + }, + { + "epoch": 0.15704947139052539, + "grad_norm": 0.8413978815078735, + "learning_rate": 0.00019578350953997512, + "loss": 2.7503, + "step": 1946 + }, + { + "epoch": 0.1571301751271084, + "grad_norm": 0.7820419073104858, + "learning_rate": 0.00019577897247835993, + "loss": 2.7535, + "step": 1947 + }, + { + "epoch": 0.1572108788636914, + "grad_norm": 0.8134996294975281, + "learning_rate": 0.00019577443302968246, + "loss": 2.7504, + "step": 1948 + }, + { + "epoch": 0.1572915826002744, + "grad_norm": 0.8201301097869873, + "learning_rate": 0.00019576989119405574, + "loss": 2.6927, + "step": 1949 + }, + { + "epoch": 0.1573722863368574, + "grad_norm": 0.8343217372894287, + "learning_rate": 0.00019576534697159296, + "loss": 2.7742, + "step": 1950 + }, + { + "epoch": 0.1574529900734404, + "grad_norm": 0.8161751627922058, + "learning_rate": 0.0001957608003624074, + "loss": 2.8236, + "step": 1951 + }, + { + "epoch": 0.1575336938100234, + "grad_norm": 0.8626808524131775, + "learning_rate": 0.00019575625136661242, + "loss": 2.7305, + "step": 1952 + }, + { + "epoch": 0.15761439754660642, + "grad_norm": 0.8238986730575562, + "learning_rate": 0.0001957516999843213, + "loss": 2.7641, + "step": 1953 + }, + { + "epoch": 0.15769510128318942, + "grad_norm": 0.7806095480918884, + "learning_rate": 0.00019574714621564755, + "loss": 2.7155, + "step": 1954 + }, + { + "epoch": 0.15777580501977242, + "grad_norm": 0.8137761950492859, + "learning_rate": 0.0001957425900607046, + "loss": 2.7529, + "step": 1955 + }, + { + "epoch": 0.15785650875635543, + "grad_norm": 0.8383988738059998, + "learning_rate": 0.00019573803151960606, + "loss": 2.7726, + "step": 1956 + }, + { + "epoch": 0.15793721249293843, + "grad_norm": 0.8734413385391235, + "learning_rate": 0.00019573347059246549, + "loss": 2.8563, + "step": 1957 + }, + { + "epoch": 0.15801791622952144, + "grad_norm": 0.8018438816070557, + "learning_rate": 0.0001957289072793966, + "loss": 2.8031, + "step": 1958 + }, + { + "epoch": 0.15809861996610444, + "grad_norm": 0.8175764083862305, + "learning_rate": 0.0001957243415805131, + "loss": 2.7824, + "step": 1959 + }, + { + "epoch": 0.15817932370268745, + "grad_norm": 0.7642164826393127, + "learning_rate": 0.00019571977349592878, + "loss": 2.7666, + "step": 1960 + }, + { + "epoch": 0.15826002743927045, + "grad_norm": 0.7584841847419739, + "learning_rate": 0.0001957152030257575, + "loss": 2.7211, + "step": 1961 + }, + { + "epoch": 0.15834073117585346, + "grad_norm": 0.822610080242157, + "learning_rate": 0.00019571063017011312, + "loss": 2.7025, + "step": 1962 + }, + { + "epoch": 0.15842143491243646, + "grad_norm": 0.7553817629814148, + "learning_rate": 0.00019570605492910968, + "loss": 2.8122, + "step": 1963 + }, + { + "epoch": 0.15850213864901944, + "grad_norm": 0.7224497199058533, + "learning_rate": 0.0001957014773028612, + "loss": 2.7613, + "step": 1964 + }, + { + "epoch": 0.15858284238560244, + "grad_norm": 0.8563623428344727, + "learning_rate": 0.00019569689729148168, + "loss": 2.8005, + "step": 1965 + }, + { + "epoch": 0.15866354612218544, + "grad_norm": 0.7665508389472961, + "learning_rate": 0.00019569231489508537, + "loss": 2.7387, + "step": 1966 + }, + { + "epoch": 0.15874424985876845, + "grad_norm": 0.7788479328155518, + "learning_rate": 0.0001956877301137864, + "loss": 2.7229, + "step": 1967 + }, + { + "epoch": 0.15882495359535145, + "grad_norm": 0.7326748371124268, + "learning_rate": 0.00019568314294769908, + "loss": 2.7728, + "step": 1968 + }, + { + "epoch": 0.15890565733193446, + "grad_norm": 0.790492594242096, + "learning_rate": 0.00019567855339693772, + "loss": 2.7809, + "step": 1969 + }, + { + "epoch": 0.15898636106851746, + "grad_norm": 0.8026898503303528, + "learning_rate": 0.0001956739614616167, + "loss": 2.7267, + "step": 1970 + }, + { + "epoch": 0.15906706480510047, + "grad_norm": 0.7963770627975464, + "learning_rate": 0.00019566936714185046, + "loss": 2.7161, + "step": 1971 + }, + { + "epoch": 0.15914776854168347, + "grad_norm": 0.7708200216293335, + "learning_rate": 0.00019566477043775354, + "loss": 2.7223, + "step": 1972 + }, + { + "epoch": 0.15922847227826648, + "grad_norm": 0.8036624789237976, + "learning_rate": 0.00019566017134944042, + "loss": 2.7644, + "step": 1973 + }, + { + "epoch": 0.15930917601484948, + "grad_norm": 0.8221341967582703, + "learning_rate": 0.00019565556987702581, + "loss": 2.7629, + "step": 1974 + }, + { + "epoch": 0.15938987975143248, + "grad_norm": 0.7685462832450867, + "learning_rate": 0.00019565096602062435, + "loss": 2.8016, + "step": 1975 + }, + { + "epoch": 0.1594705834880155, + "grad_norm": 0.8173574209213257, + "learning_rate": 0.00019564635978035075, + "loss": 2.761, + "step": 1976 + }, + { + "epoch": 0.1595512872245985, + "grad_norm": 0.7567519545555115, + "learning_rate": 0.00019564175115631988, + "loss": 2.7794, + "step": 1977 + }, + { + "epoch": 0.1596319909611815, + "grad_norm": 0.8754587173461914, + "learning_rate": 0.00019563714014864654, + "loss": 2.7769, + "step": 1978 + }, + { + "epoch": 0.1597126946977645, + "grad_norm": 0.753871738910675, + "learning_rate": 0.00019563252675744569, + "loss": 2.7489, + "step": 1979 + }, + { + "epoch": 0.1597933984343475, + "grad_norm": 0.777103841304779, + "learning_rate": 0.00019562791098283225, + "loss": 2.7667, + "step": 1980 + }, + { + "epoch": 0.1598741021709305, + "grad_norm": 0.8227293491363525, + "learning_rate": 0.00019562329282492131, + "loss": 2.7904, + "step": 1981 + }, + { + "epoch": 0.15995480590751351, + "grad_norm": 0.7595541477203369, + "learning_rate": 0.00019561867228382797, + "loss": 2.7654, + "step": 1982 + }, + { + "epoch": 0.16003550964409652, + "grad_norm": 0.8330550789833069, + "learning_rate": 0.00019561404935966733, + "loss": 2.7533, + "step": 1983 + }, + { + "epoch": 0.16011621338067952, + "grad_norm": 0.8213297128677368, + "learning_rate": 0.0001956094240525547, + "loss": 2.8103, + "step": 1984 + }, + { + "epoch": 0.16019691711726253, + "grad_norm": 0.8046056628227234, + "learning_rate": 0.00019560479636260527, + "loss": 2.7666, + "step": 1985 + }, + { + "epoch": 0.16027762085384553, + "grad_norm": 0.7886037230491638, + "learning_rate": 0.0001956001662899344, + "loss": 2.7066, + "step": 1986 + }, + { + "epoch": 0.16035832459042854, + "grad_norm": 0.8300043940544128, + "learning_rate": 0.00019559553383465748, + "loss": 2.7617, + "step": 1987 + }, + { + "epoch": 0.16043902832701154, + "grad_norm": 0.7963815331459045, + "learning_rate": 0.00019559089899688994, + "loss": 2.6891, + "step": 1988 + }, + { + "epoch": 0.16051973206359454, + "grad_norm": 0.7794002294540405, + "learning_rate": 0.00019558626177674734, + "loss": 2.8012, + "step": 1989 + }, + { + "epoch": 0.16060043580017755, + "grad_norm": 0.8345863819122314, + "learning_rate": 0.00019558162217434526, + "loss": 2.7715, + "step": 1990 + }, + { + "epoch": 0.16068113953676055, + "grad_norm": 0.8883393406867981, + "learning_rate": 0.00019557698018979927, + "loss": 2.7863, + "step": 1991 + }, + { + "epoch": 0.16076184327334356, + "grad_norm": 0.8069450259208679, + "learning_rate": 0.0001955723358232251, + "loss": 2.759, + "step": 1992 + }, + { + "epoch": 0.16084254700992656, + "grad_norm": 0.9014191031455994, + "learning_rate": 0.00019556768907473852, + "loss": 2.711, + "step": 1993 + }, + { + "epoch": 0.16092325074650957, + "grad_norm": 0.8429470658302307, + "learning_rate": 0.0001955630399444553, + "loss": 2.6936, + "step": 1994 + }, + { + "epoch": 0.16100395448309257, + "grad_norm": 0.7859500050544739, + "learning_rate": 0.00019555838843249128, + "loss": 2.7343, + "step": 1995 + }, + { + "epoch": 0.16108465821967557, + "grad_norm": 0.8068249821662903, + "learning_rate": 0.00019555373453896245, + "loss": 2.7492, + "step": 1996 + }, + { + "epoch": 0.16116536195625858, + "grad_norm": 0.8194023370742798, + "learning_rate": 0.00019554907826398478, + "loss": 2.7265, + "step": 1997 + }, + { + "epoch": 0.16124606569284158, + "grad_norm": 0.8139404654502869, + "learning_rate": 0.00019554441960767434, + "loss": 2.7311, + "step": 1998 + }, + { + "epoch": 0.1613267694294246, + "grad_norm": 0.8210673928260803, + "learning_rate": 0.00019553975857014718, + "loss": 2.7095, + "step": 1999 + }, + { + "epoch": 0.1614074731660076, + "grad_norm": 0.8615561723709106, + "learning_rate": 0.0001955350951515195, + "loss": 2.7458, + "step": 2000 + }, + { + "epoch": 0.1614074731660076, + "eval_loss": 2.6739437580108643, + "eval_runtime": 813.8274, + "eval_samples_per_second": 3.219, + "eval_steps_per_second": 0.537, + "step": 2000 + }, + { + "epoch": 0.1614881769025906, + "grad_norm": 0.8945594429969788, + "learning_rate": 0.0001955304293519075, + "loss": 2.776, + "step": 2001 + }, + { + "epoch": 0.1615688806391736, + "grad_norm": 0.7943438291549683, + "learning_rate": 0.00019552576117142748, + "loss": 2.7484, + "step": 2002 + }, + { + "epoch": 0.1616495843757566, + "grad_norm": 0.8264374136924744, + "learning_rate": 0.00019552109061019582, + "loss": 2.7725, + "step": 2003 + }, + { + "epoch": 0.1617302881123396, + "grad_norm": 0.7591681480407715, + "learning_rate": 0.00019551641766832887, + "loss": 2.7217, + "step": 2004 + }, + { + "epoch": 0.16181099184892261, + "grad_norm": 0.8275293707847595, + "learning_rate": 0.0001955117423459431, + "loss": 2.7279, + "step": 2005 + }, + { + "epoch": 0.16189169558550562, + "grad_norm": 0.8109650611877441, + "learning_rate": 0.00019550706464315504, + "loss": 2.8111, + "step": 2006 + }, + { + "epoch": 0.16197239932208862, + "grad_norm": 0.8710397481918335, + "learning_rate": 0.00019550238456008127, + "loss": 2.7166, + "step": 2007 + }, + { + "epoch": 0.16205310305867163, + "grad_norm": 0.8569270968437195, + "learning_rate": 0.00019549770209683845, + "loss": 2.7739, + "step": 2008 + }, + { + "epoch": 0.16213380679525463, + "grad_norm": 0.7927817702293396, + "learning_rate": 0.00019549301725354325, + "loss": 2.7154, + "step": 2009 + }, + { + "epoch": 0.16221451053183764, + "grad_norm": 0.7576590776443481, + "learning_rate": 0.00019548833003031244, + "loss": 2.7276, + "step": 2010 + }, + { + "epoch": 0.16229521426842064, + "grad_norm": 0.8092780709266663, + "learning_rate": 0.00019548364042726283, + "loss": 2.7494, + "step": 2011 + }, + { + "epoch": 0.16237591800500364, + "grad_norm": 0.7643424868583679, + "learning_rate": 0.0001954789484445113, + "loss": 2.7877, + "step": 2012 + }, + { + "epoch": 0.16245662174158665, + "grad_norm": 0.8235166072845459, + "learning_rate": 0.0001954742540821748, + "loss": 2.7884, + "step": 2013 + }, + { + "epoch": 0.16253732547816965, + "grad_norm": 0.9297853708267212, + "learning_rate": 0.00019546955734037034, + "loss": 2.765, + "step": 2014 + }, + { + "epoch": 0.16261802921475263, + "grad_norm": 0.7778275609016418, + "learning_rate": 0.0001954648582192149, + "loss": 2.7178, + "step": 2015 + }, + { + "epoch": 0.16269873295133563, + "grad_norm": 0.8767017126083374, + "learning_rate": 0.00019546015671882566, + "loss": 2.8254, + "step": 2016 + }, + { + "epoch": 0.16277943668791864, + "grad_norm": 0.7870603203773499, + "learning_rate": 0.0001954554528393198, + "loss": 2.797, + "step": 2017 + }, + { + "epoch": 0.16286014042450164, + "grad_norm": 0.8112391233444214, + "learning_rate": 0.00019545074658081454, + "loss": 2.8562, + "step": 2018 + }, + { + "epoch": 0.16294084416108465, + "grad_norm": 0.8216677308082581, + "learning_rate": 0.00019544603794342713, + "loss": 2.7894, + "step": 2019 + }, + { + "epoch": 0.16302154789766765, + "grad_norm": 0.8445515632629395, + "learning_rate": 0.00019544132692727497, + "loss": 2.8618, + "step": 2020 + }, + { + "epoch": 0.16310225163425066, + "grad_norm": 0.8275444507598877, + "learning_rate": 0.00019543661353247548, + "loss": 2.8087, + "step": 2021 + }, + { + "epoch": 0.16318295537083366, + "grad_norm": 0.8142833709716797, + "learning_rate": 0.00019543189775914608, + "loss": 2.8075, + "step": 2022 + }, + { + "epoch": 0.16326365910741666, + "grad_norm": 0.8182976245880127, + "learning_rate": 0.0001954271796074043, + "loss": 2.8312, + "step": 2023 + }, + { + "epoch": 0.16334436284399967, + "grad_norm": 0.7629228234291077, + "learning_rate": 0.0001954224590773678, + "loss": 2.7191, + "step": 2024 + }, + { + "epoch": 0.16342506658058267, + "grad_norm": 0.8630000948905945, + "learning_rate": 0.00019541773616915418, + "loss": 2.8013, + "step": 2025 + }, + { + "epoch": 0.16350577031716568, + "grad_norm": 0.8917906880378723, + "learning_rate": 0.00019541301088288115, + "loss": 2.7573, + "step": 2026 + }, + { + "epoch": 0.16358647405374868, + "grad_norm": 0.8641694188117981, + "learning_rate": 0.00019540828321866648, + "loss": 2.7509, + "step": 2027 + }, + { + "epoch": 0.16366717779033169, + "grad_norm": 0.7687639594078064, + "learning_rate": 0.00019540355317662798, + "loss": 2.7266, + "step": 2028 + }, + { + "epoch": 0.1637478815269147, + "grad_norm": 0.7870400547981262, + "learning_rate": 0.00019539882075688355, + "loss": 2.8217, + "step": 2029 + }, + { + "epoch": 0.1638285852634977, + "grad_norm": 0.9373054504394531, + "learning_rate": 0.0001953940859595511, + "loss": 2.7562, + "step": 2030 + }, + { + "epoch": 0.1639092890000807, + "grad_norm": 0.7941255569458008, + "learning_rate": 0.00019538934878474872, + "loss": 2.7553, + "step": 2031 + }, + { + "epoch": 0.1639899927366637, + "grad_norm": 0.735977053642273, + "learning_rate": 0.00019538460923259438, + "loss": 2.7058, + "step": 2032 + }, + { + "epoch": 0.1640706964732467, + "grad_norm": 0.7812782526016235, + "learning_rate": 0.00019537986730320625, + "loss": 2.7885, + "step": 2033 + }, + { + "epoch": 0.1641514002098297, + "grad_norm": 1.1534128189086914, + "learning_rate": 0.0001953751229967025, + "loss": 2.7139, + "step": 2034 + }, + { + "epoch": 0.16423210394641272, + "grad_norm": 0.9139814972877502, + "learning_rate": 0.00019537037631320135, + "loss": 2.7869, + "step": 2035 + }, + { + "epoch": 0.16431280768299572, + "grad_norm": 0.8330421447753906, + "learning_rate": 0.00019536562725282116, + "loss": 2.7491, + "step": 2036 + }, + { + "epoch": 0.16439351141957873, + "grad_norm": 0.9040594696998596, + "learning_rate": 0.00019536087581568026, + "loss": 2.7637, + "step": 2037 + }, + { + "epoch": 0.16447421515616173, + "grad_norm": 0.9158666729927063, + "learning_rate": 0.00019535612200189705, + "loss": 2.7709, + "step": 2038 + }, + { + "epoch": 0.16455491889274473, + "grad_norm": 0.8668088912963867, + "learning_rate": 0.00019535136581158997, + "loss": 2.7994, + "step": 2039 + }, + { + "epoch": 0.16463562262932774, + "grad_norm": 0.9179345369338989, + "learning_rate": 0.00019534660724487764, + "loss": 2.747, + "step": 2040 + }, + { + "epoch": 0.16471632636591074, + "grad_norm": 0.9690881967544556, + "learning_rate": 0.00019534184630187862, + "loss": 2.742, + "step": 2041 + }, + { + "epoch": 0.16479703010249375, + "grad_norm": 0.8478729724884033, + "learning_rate": 0.00019533708298271157, + "loss": 2.7824, + "step": 2042 + }, + { + "epoch": 0.16487773383907675, + "grad_norm": 0.8286584615707397, + "learning_rate": 0.00019533231728749518, + "loss": 2.7263, + "step": 2043 + }, + { + "epoch": 0.16495843757565976, + "grad_norm": 0.8095324039459229, + "learning_rate": 0.00019532754921634826, + "loss": 2.7845, + "step": 2044 + }, + { + "epoch": 0.16503914131224276, + "grad_norm": 0.9552872776985168, + "learning_rate": 0.0001953227787693896, + "loss": 2.7676, + "step": 2045 + }, + { + "epoch": 0.16511984504882576, + "grad_norm": 1.021515130996704, + "learning_rate": 0.00019531800594673815, + "loss": 2.784, + "step": 2046 + }, + { + "epoch": 0.16520054878540877, + "grad_norm": 0.7847293019294739, + "learning_rate": 0.00019531323074851276, + "loss": 2.7319, + "step": 2047 + }, + { + "epoch": 0.16528125252199177, + "grad_norm": 0.7803899049758911, + "learning_rate": 0.0001953084531748326, + "loss": 2.8321, + "step": 2048 + }, + { + "epoch": 0.16536195625857478, + "grad_norm": 0.8687692880630493, + "learning_rate": 0.0001953036732258166, + "loss": 2.763, + "step": 2049 + }, + { + "epoch": 0.16544265999515778, + "grad_norm": 0.8212031126022339, + "learning_rate": 0.00019529889090158392, + "loss": 2.7262, + "step": 2050 + }, + { + "epoch": 0.16552336373174079, + "grad_norm": 0.8460689187049866, + "learning_rate": 0.0001952941062022538, + "loss": 2.8018, + "step": 2051 + }, + { + "epoch": 0.1656040674683238, + "grad_norm": 0.9189361929893494, + "learning_rate": 0.00019528931912794547, + "loss": 2.8079, + "step": 2052 + }, + { + "epoch": 0.1656847712049068, + "grad_norm": 0.9529987573623657, + "learning_rate": 0.00019528452967877816, + "loss": 2.8015, + "step": 2053 + }, + { + "epoch": 0.1657654749414898, + "grad_norm": 0.8468493223190308, + "learning_rate": 0.00019527973785487133, + "loss": 2.8013, + "step": 2054 + }, + { + "epoch": 0.1658461786780728, + "grad_norm": 0.8150945901870728, + "learning_rate": 0.00019527494365634436, + "loss": 2.7975, + "step": 2055 + }, + { + "epoch": 0.1659268824146558, + "grad_norm": 0.814942479133606, + "learning_rate": 0.00019527014708331674, + "loss": 2.7503, + "step": 2056 + }, + { + "epoch": 0.1660075861512388, + "grad_norm": 0.7841517329216003, + "learning_rate": 0.000195265348135908, + "loss": 2.7921, + "step": 2057 + }, + { + "epoch": 0.16608828988782182, + "grad_norm": 0.7603738903999329, + "learning_rate": 0.0001952605468142378, + "loss": 2.7658, + "step": 2058 + }, + { + "epoch": 0.16616899362440482, + "grad_norm": 0.8460882902145386, + "learning_rate": 0.00019525574311842574, + "loss": 2.7644, + "step": 2059 + }, + { + "epoch": 0.16624969736098782, + "grad_norm": 0.8633555173873901, + "learning_rate": 0.00019525093704859156, + "loss": 2.7956, + "step": 2060 + }, + { + "epoch": 0.16633040109757083, + "grad_norm": 0.7700977325439453, + "learning_rate": 0.00019524612860485503, + "loss": 2.7103, + "step": 2061 + }, + { + "epoch": 0.16641110483415383, + "grad_norm": 0.888770580291748, + "learning_rate": 0.00019524131778733602, + "loss": 2.7325, + "step": 2062 + }, + { + "epoch": 0.16649180857073684, + "grad_norm": 0.8338149189949036, + "learning_rate": 0.00019523650459615438, + "loss": 2.7533, + "step": 2063 + }, + { + "epoch": 0.16657251230731984, + "grad_norm": 0.7723987698554993, + "learning_rate": 0.0001952316890314301, + "loss": 2.7316, + "step": 2064 + }, + { + "epoch": 0.16665321604390285, + "grad_norm": 0.8952934145927429, + "learning_rate": 0.0001952268710932832, + "loss": 2.7825, + "step": 2065 + }, + { + "epoch": 0.16673391978048582, + "grad_norm": 0.8201496601104736, + "learning_rate": 0.00019522205078183378, + "loss": 2.7162, + "step": 2066 + }, + { + "epoch": 0.16681462351706883, + "grad_norm": 0.7733781337738037, + "learning_rate": 0.00019521722809720188, + "loss": 2.7834, + "step": 2067 + }, + { + "epoch": 0.16689532725365183, + "grad_norm": 0.8285118937492371, + "learning_rate": 0.0001952124030395078, + "loss": 2.8475, + "step": 2068 + }, + { + "epoch": 0.16697603099023484, + "grad_norm": 0.84097820520401, + "learning_rate": 0.00019520757560887174, + "loss": 2.784, + "step": 2069 + }, + { + "epoch": 0.16705673472681784, + "grad_norm": 0.7336563467979431, + "learning_rate": 0.000195202745805414, + "loss": 2.7663, + "step": 2070 + }, + { + "epoch": 0.16713743846340084, + "grad_norm": 0.8359388113021851, + "learning_rate": 0.000195197913629255, + "loss": 2.7931, + "step": 2071 + }, + { + "epoch": 0.16721814219998385, + "grad_norm": 0.8272559642791748, + "learning_rate": 0.0001951930790805151, + "loss": 2.8578, + "step": 2072 + }, + { + "epoch": 0.16729884593656685, + "grad_norm": 0.7970743179321289, + "learning_rate": 0.00019518824215931487, + "loss": 2.8148, + "step": 2073 + }, + { + "epoch": 0.16737954967314986, + "grad_norm": 0.856200098991394, + "learning_rate": 0.00019518340286577482, + "loss": 2.8067, + "step": 2074 + }, + { + "epoch": 0.16746025340973286, + "grad_norm": 0.7581893801689148, + "learning_rate": 0.00019517856120001556, + "loss": 2.7339, + "step": 2075 + }, + { + "epoch": 0.16754095714631587, + "grad_norm": 0.8488386869430542, + "learning_rate": 0.00019517371716215774, + "loss": 2.7332, + "step": 2076 + }, + { + "epoch": 0.16762166088289887, + "grad_norm": 0.7488275170326233, + "learning_rate": 0.00019516887075232212, + "loss": 2.7734, + "step": 2077 + }, + { + "epoch": 0.16770236461948188, + "grad_norm": 0.9173932075500488, + "learning_rate": 0.00019516402197062945, + "loss": 2.7792, + "step": 2078 + }, + { + "epoch": 0.16778306835606488, + "grad_norm": 0.8200702667236328, + "learning_rate": 0.0001951591708172006, + "loss": 2.8046, + "step": 2079 + }, + { + "epoch": 0.16786377209264788, + "grad_norm": 0.8270781636238098, + "learning_rate": 0.00019515431729215642, + "loss": 2.7467, + "step": 2080 + }, + { + "epoch": 0.1679444758292309, + "grad_norm": 0.8660609722137451, + "learning_rate": 0.00019514946139561799, + "loss": 2.8169, + "step": 2081 + }, + { + "epoch": 0.1680251795658139, + "grad_norm": 0.78753262758255, + "learning_rate": 0.0001951446031277062, + "loss": 2.7388, + "step": 2082 + }, + { + "epoch": 0.1681058833023969, + "grad_norm": 0.791593074798584, + "learning_rate": 0.00019513974248854224, + "loss": 2.8776, + "step": 2083 + }, + { + "epoch": 0.1681865870389799, + "grad_norm": 0.7883535623550415, + "learning_rate": 0.0001951348794782472, + "loss": 2.78, + "step": 2084 + }, + { + "epoch": 0.1682672907755629, + "grad_norm": 0.7877013087272644, + "learning_rate": 0.00019513001409694224, + "loss": 2.7559, + "step": 2085 + }, + { + "epoch": 0.1683479945121459, + "grad_norm": 0.8838450908660889, + "learning_rate": 0.00019512514634474864, + "loss": 2.7489, + "step": 2086 + }, + { + "epoch": 0.16842869824872891, + "grad_norm": 0.7751588821411133, + "learning_rate": 0.00019512027622178775, + "loss": 2.6832, + "step": 2087 + }, + { + "epoch": 0.16850940198531192, + "grad_norm": 0.90345299243927, + "learning_rate": 0.00019511540372818095, + "loss": 2.8189, + "step": 2088 + }, + { + "epoch": 0.16859010572189492, + "grad_norm": 0.7820938229560852, + "learning_rate": 0.00019511052886404966, + "loss": 2.7655, + "step": 2089 + }, + { + "epoch": 0.16867080945847793, + "grad_norm": 0.8250375986099243, + "learning_rate": 0.00019510565162951537, + "loss": 2.7866, + "step": 2090 + }, + { + "epoch": 0.16875151319506093, + "grad_norm": 0.8063845634460449, + "learning_rate": 0.00019510077202469962, + "loss": 2.7774, + "step": 2091 + }, + { + "epoch": 0.16883221693164394, + "grad_norm": 0.7627965807914734, + "learning_rate": 0.00019509589004972403, + "loss": 2.7201, + "step": 2092 + }, + { + "epoch": 0.16891292066822694, + "grad_norm": 0.8392470479011536, + "learning_rate": 0.00019509100570471027, + "loss": 2.7613, + "step": 2093 + }, + { + "epoch": 0.16899362440480994, + "grad_norm": 0.7807552814483643, + "learning_rate": 0.0001950861189897801, + "loss": 2.7451, + "step": 2094 + }, + { + "epoch": 0.16907432814139295, + "grad_norm": 0.7829259634017944, + "learning_rate": 0.00019508122990505528, + "loss": 2.7128, + "step": 2095 + }, + { + "epoch": 0.16915503187797595, + "grad_norm": 0.7793046832084656, + "learning_rate": 0.00019507633845065766, + "loss": 2.7849, + "step": 2096 + }, + { + "epoch": 0.16923573561455896, + "grad_norm": 0.869752824306488, + "learning_rate": 0.00019507144462670915, + "loss": 2.7882, + "step": 2097 + }, + { + "epoch": 0.16931643935114196, + "grad_norm": 0.7550783753395081, + "learning_rate": 0.00019506654843333174, + "loss": 2.7211, + "step": 2098 + }, + { + "epoch": 0.16939714308772497, + "grad_norm": 0.8364891409873962, + "learning_rate": 0.0001950616498706474, + "loss": 2.7171, + "step": 2099 + }, + { + "epoch": 0.16947784682430797, + "grad_norm": 0.8026537299156189, + "learning_rate": 0.0001950567489387783, + "loss": 2.8362, + "step": 2100 + }, + { + "epoch": 0.16955855056089097, + "grad_norm": 0.8073398470878601, + "learning_rate": 0.00019505184563784652, + "loss": 2.7635, + "step": 2101 + }, + { + "epoch": 0.16963925429747398, + "grad_norm": 0.8168368935585022, + "learning_rate": 0.00019504693996797424, + "loss": 2.7553, + "step": 2102 + }, + { + "epoch": 0.16971995803405698, + "grad_norm": 0.7933681011199951, + "learning_rate": 0.0001950420319292838, + "loss": 2.7887, + "step": 2103 + }, + { + "epoch": 0.16980066177064, + "grad_norm": 0.8326540589332581, + "learning_rate": 0.00019503712152189748, + "loss": 2.7844, + "step": 2104 + }, + { + "epoch": 0.169881365507223, + "grad_norm": 0.8357202410697937, + "learning_rate": 0.00019503220874593765, + "loss": 2.7744, + "step": 2105 + }, + { + "epoch": 0.169962069243806, + "grad_norm": 0.8541022539138794, + "learning_rate": 0.00019502729360152676, + "loss": 2.7867, + "step": 2106 + }, + { + "epoch": 0.170042772980389, + "grad_norm": 0.8338841795921326, + "learning_rate": 0.0001950223760887873, + "loss": 2.7208, + "step": 2107 + }, + { + "epoch": 0.170123476716972, + "grad_norm": 0.8824255466461182, + "learning_rate": 0.00019501745620784187, + "loss": 2.7658, + "step": 2108 + }, + { + "epoch": 0.170204180453555, + "grad_norm": 0.7710463404655457, + "learning_rate": 0.00019501253395881306, + "loss": 2.7167, + "step": 2109 + }, + { + "epoch": 0.17028488419013801, + "grad_norm": 0.7740076184272766, + "learning_rate": 0.0001950076093418235, + "loss": 2.7251, + "step": 2110 + }, + { + "epoch": 0.17036558792672102, + "grad_norm": 0.8258434534072876, + "learning_rate": 0.00019500268235699597, + "loss": 2.7533, + "step": 2111 + }, + { + "epoch": 0.17044629166330402, + "grad_norm": 0.8347997069358826, + "learning_rate": 0.00019499775300445326, + "loss": 2.7372, + "step": 2112 + }, + { + "epoch": 0.17052699539988703, + "grad_norm": 0.8246529698371887, + "learning_rate": 0.00019499282128431823, + "loss": 2.7458, + "step": 2113 + }, + { + "epoch": 0.17060769913647003, + "grad_norm": 0.8510704040527344, + "learning_rate": 0.00019498788719671378, + "loss": 2.8144, + "step": 2114 + }, + { + "epoch": 0.17068840287305304, + "grad_norm": 0.7793454527854919, + "learning_rate": 0.00019498295074176286, + "loss": 2.7927, + "step": 2115 + }, + { + "epoch": 0.17076910660963604, + "grad_norm": 0.7888665199279785, + "learning_rate": 0.00019497801191958853, + "loss": 2.7156, + "step": 2116 + }, + { + "epoch": 0.17084981034621902, + "grad_norm": 0.8502812385559082, + "learning_rate": 0.00019497307073031386, + "loss": 2.7906, + "step": 2117 + }, + { + "epoch": 0.17093051408280202, + "grad_norm": 0.8376502990722656, + "learning_rate": 0.00019496812717406203, + "loss": 2.7354, + "step": 2118 + }, + { + "epoch": 0.17101121781938503, + "grad_norm": 0.7974401116371155, + "learning_rate": 0.0001949631812509562, + "loss": 2.7755, + "step": 2119 + }, + { + "epoch": 0.17109192155596803, + "grad_norm": 0.7760190963745117, + "learning_rate": 0.00019495823296111965, + "loss": 2.7694, + "step": 2120 + }, + { + "epoch": 0.17117262529255103, + "grad_norm": 0.7721701860427856, + "learning_rate": 0.00019495328230467575, + "loss": 2.7474, + "step": 2121 + }, + { + "epoch": 0.17125332902913404, + "grad_norm": 0.7360577583312988, + "learning_rate": 0.0001949483292817478, + "loss": 2.8044, + "step": 2122 + }, + { + "epoch": 0.17133403276571704, + "grad_norm": 0.7536107301712036, + "learning_rate": 0.0001949433738924593, + "loss": 2.8165, + "step": 2123 + }, + { + "epoch": 0.17141473650230005, + "grad_norm": 0.7668276429176331, + "learning_rate": 0.00019493841613693375, + "loss": 2.7964, + "step": 2124 + }, + { + "epoch": 0.17149544023888305, + "grad_norm": 0.8323161602020264, + "learning_rate": 0.0001949334560152947, + "loss": 2.7395, + "step": 2125 + }, + { + "epoch": 0.17157614397546606, + "grad_norm": 0.8132179975509644, + "learning_rate": 0.00019492849352766576, + "loss": 2.7511, + "step": 2126 + }, + { + "epoch": 0.17165684771204906, + "grad_norm": 0.7806998491287231, + "learning_rate": 0.0001949235286741706, + "loss": 2.7649, + "step": 2127 + }, + { + "epoch": 0.17173755144863206, + "grad_norm": 0.8315939903259277, + "learning_rate": 0.00019491856145493298, + "loss": 2.7742, + "step": 2128 + }, + { + "epoch": 0.17181825518521507, + "grad_norm": 0.8368063569068909, + "learning_rate": 0.00019491359187007672, + "loss": 2.7667, + "step": 2129 + }, + { + "epoch": 0.17189895892179807, + "grad_norm": 0.9183431267738342, + "learning_rate": 0.0001949086199197256, + "loss": 2.7444, + "step": 2130 + }, + { + "epoch": 0.17197966265838108, + "grad_norm": 0.7824065089225769, + "learning_rate": 0.0001949036456040036, + "loss": 2.7455, + "step": 2131 + }, + { + "epoch": 0.17206036639496408, + "grad_norm": 0.777974009513855, + "learning_rate": 0.00019489866892303468, + "loss": 2.7466, + "step": 2132 + }, + { + "epoch": 0.17214107013154709, + "grad_norm": 0.8068816065788269, + "learning_rate": 0.00019489368987694286, + "loss": 2.7081, + "step": 2133 + }, + { + "epoch": 0.1722217738681301, + "grad_norm": 0.8757622838020325, + "learning_rate": 0.00019488870846585222, + "loss": 2.8005, + "step": 2134 + }, + { + "epoch": 0.1723024776047131, + "grad_norm": 0.7967162728309631, + "learning_rate": 0.00019488372468988693, + "loss": 2.7737, + "step": 2135 + }, + { + "epoch": 0.1723831813412961, + "grad_norm": 0.7700283527374268, + "learning_rate": 0.00019487873854917117, + "loss": 2.7431, + "step": 2136 + }, + { + "epoch": 0.1724638850778791, + "grad_norm": 0.8259130716323853, + "learning_rate": 0.00019487375004382927, + "loss": 2.7635, + "step": 2137 + }, + { + "epoch": 0.1725445888144621, + "grad_norm": 0.8253815770149231, + "learning_rate": 0.0001948687591739855, + "loss": 2.7046, + "step": 2138 + }, + { + "epoch": 0.1726252925510451, + "grad_norm": 0.8087987303733826, + "learning_rate": 0.00019486376593976426, + "loss": 2.7728, + "step": 2139 + }, + { + "epoch": 0.17270599628762812, + "grad_norm": 0.8437588214874268, + "learning_rate": 0.00019485877034128998, + "loss": 2.7606, + "step": 2140 + }, + { + "epoch": 0.17278670002421112, + "grad_norm": 0.8416075110435486, + "learning_rate": 0.00019485377237868723, + "loss": 2.7396, + "step": 2141 + }, + { + "epoch": 0.17286740376079412, + "grad_norm": 0.784275472164154, + "learning_rate": 0.00019484877205208046, + "loss": 2.766, + "step": 2142 + }, + { + "epoch": 0.17294810749737713, + "grad_norm": 0.8082472681999207, + "learning_rate": 0.0001948437693615944, + "loss": 2.8, + "step": 2143 + }, + { + "epoch": 0.17302881123396013, + "grad_norm": 0.8904329538345337, + "learning_rate": 0.00019483876430735365, + "loss": 2.6579, + "step": 2144 + }, + { + "epoch": 0.17310951497054314, + "grad_norm": 0.7864851355552673, + "learning_rate": 0.000194833756889483, + "loss": 2.8231, + "step": 2145 + }, + { + "epoch": 0.17319021870712614, + "grad_norm": 0.7445049285888672, + "learning_rate": 0.00019482874710810723, + "loss": 2.7498, + "step": 2146 + }, + { + "epoch": 0.17327092244370915, + "grad_norm": 0.8266116380691528, + "learning_rate": 0.00019482373496335117, + "loss": 2.7152, + "step": 2147 + }, + { + "epoch": 0.17335162618029215, + "grad_norm": 0.7712300419807434, + "learning_rate": 0.0001948187204553398, + "loss": 2.7751, + "step": 2148 + }, + { + "epoch": 0.17343232991687516, + "grad_norm": 0.7472708225250244, + "learning_rate": 0.00019481370358419807, + "loss": 2.7397, + "step": 2149 + }, + { + "epoch": 0.17351303365345816, + "grad_norm": 0.763454020023346, + "learning_rate": 0.00019480868435005095, + "loss": 2.7174, + "step": 2150 + }, + { + "epoch": 0.17359373739004116, + "grad_norm": 0.8187674283981323, + "learning_rate": 0.00019480366275302362, + "loss": 2.8424, + "step": 2151 + }, + { + "epoch": 0.17367444112662417, + "grad_norm": 0.8183228373527527, + "learning_rate": 0.0001947986387932412, + "loss": 2.7351, + "step": 2152 + }, + { + "epoch": 0.17375514486320717, + "grad_norm": 0.807231605052948, + "learning_rate": 0.00019479361247082884, + "loss": 2.8054, + "step": 2153 + }, + { + "epoch": 0.17383584859979018, + "grad_norm": 0.8383626341819763, + "learning_rate": 0.00019478858378591194, + "loss": 2.7181, + "step": 2154 + }, + { + "epoch": 0.17391655233637318, + "grad_norm": 0.8330298662185669, + "learning_rate": 0.0001947835527386157, + "loss": 2.748, + "step": 2155 + }, + { + "epoch": 0.17399725607295619, + "grad_norm": 0.8433073163032532, + "learning_rate": 0.0001947785193290656, + "loss": 2.8115, + "step": 2156 + }, + { + "epoch": 0.1740779598095392, + "grad_norm": 0.8873384594917297, + "learning_rate": 0.000194773483557387, + "loss": 2.8288, + "step": 2157 + }, + { + "epoch": 0.1741586635461222, + "grad_norm": 0.8399423360824585, + "learning_rate": 0.00019476844542370546, + "loss": 2.7514, + "step": 2158 + }, + { + "epoch": 0.1742393672827052, + "grad_norm": 0.7808830738067627, + "learning_rate": 0.00019476340492814655, + "loss": 2.7003, + "step": 2159 + }, + { + "epoch": 0.1743200710192882, + "grad_norm": 0.8268750905990601, + "learning_rate": 0.00019475836207083589, + "loss": 2.7961, + "step": 2160 + }, + { + "epoch": 0.1744007747558712, + "grad_norm": 0.9144260883331299, + "learning_rate": 0.0001947533168518991, + "loss": 2.769, + "step": 2161 + }, + { + "epoch": 0.1744814784924542, + "grad_norm": 0.8409113883972168, + "learning_rate": 0.000194748269271462, + "loss": 2.8004, + "step": 2162 + }, + { + "epoch": 0.17456218222903722, + "grad_norm": 0.8747037649154663, + "learning_rate": 0.00019474321932965035, + "loss": 2.7602, + "step": 2163 + }, + { + "epoch": 0.17464288596562022, + "grad_norm": 0.8582575917243958, + "learning_rate": 0.00019473816702659, + "loss": 2.7292, + "step": 2164 + }, + { + "epoch": 0.17472358970220322, + "grad_norm": 0.7402843832969666, + "learning_rate": 0.0001947331123624069, + "loss": 2.7287, + "step": 2165 + }, + { + "epoch": 0.17480429343878623, + "grad_norm": 0.8019410967826843, + "learning_rate": 0.000194728055337227, + "loss": 2.7451, + "step": 2166 + }, + { + "epoch": 0.17488499717536923, + "grad_norm": 0.9137046337127686, + "learning_rate": 0.0001947229959511763, + "loss": 2.808, + "step": 2167 + }, + { + "epoch": 0.1749657009119522, + "grad_norm": 0.7539177536964417, + "learning_rate": 0.000194717934204381, + "loss": 2.7031, + "step": 2168 + }, + { + "epoch": 0.17504640464853521, + "grad_norm": 0.8611089587211609, + "learning_rate": 0.00019471287009696715, + "loss": 2.8751, + "step": 2169 + }, + { + "epoch": 0.17512710838511822, + "grad_norm": 0.906134843826294, + "learning_rate": 0.000194707803629061, + "loss": 2.9163, + "step": 2170 + }, + { + "epoch": 0.17520781212170122, + "grad_norm": 0.8066667318344116, + "learning_rate": 0.00019470273480078879, + "loss": 2.7549, + "step": 2171 + }, + { + "epoch": 0.17528851585828423, + "grad_norm": 0.7962325215339661, + "learning_rate": 0.00019469766361227692, + "loss": 2.7964, + "step": 2172 + }, + { + "epoch": 0.17536921959486723, + "grad_norm": 0.7802287340164185, + "learning_rate": 0.0001946925900636517, + "loss": 2.7022, + "step": 2173 + }, + { + "epoch": 0.17544992333145024, + "grad_norm": 0.783478319644928, + "learning_rate": 0.0001946875141550396, + "loss": 2.7798, + "step": 2174 + }, + { + "epoch": 0.17553062706803324, + "grad_norm": 0.8006815314292908, + "learning_rate": 0.00019468243588656713, + "loss": 2.7345, + "step": 2175 + }, + { + "epoch": 0.17561133080461624, + "grad_norm": 0.7566428184509277, + "learning_rate": 0.00019467735525836085, + "loss": 2.7822, + "step": 2176 + }, + { + "epoch": 0.17569203454119925, + "grad_norm": 0.772282600402832, + "learning_rate": 0.0001946722722705474, + "loss": 2.7346, + "step": 2177 + }, + { + "epoch": 0.17577273827778225, + "grad_norm": 0.7808345556259155, + "learning_rate": 0.00019466718692325347, + "loss": 2.755, + "step": 2178 + }, + { + "epoch": 0.17585344201436526, + "grad_norm": 0.8150362372398376, + "learning_rate": 0.00019466209921660576, + "loss": 2.7691, + "step": 2179 + }, + { + "epoch": 0.17593414575094826, + "grad_norm": 0.7952939867973328, + "learning_rate": 0.0001946570091507311, + "loss": 2.8175, + "step": 2180 + }, + { + "epoch": 0.17601484948753127, + "grad_norm": 0.8211334347724915, + "learning_rate": 0.00019465191672575634, + "loss": 2.7561, + "step": 2181 + }, + { + "epoch": 0.17609555322411427, + "grad_norm": 0.7726178765296936, + "learning_rate": 0.00019464682194180838, + "loss": 2.7435, + "step": 2182 + }, + { + "epoch": 0.17617625696069728, + "grad_norm": 0.7614372372627258, + "learning_rate": 0.00019464172479901422, + "loss": 2.7301, + "step": 2183 + }, + { + "epoch": 0.17625696069728028, + "grad_norm": 0.7818898558616638, + "learning_rate": 0.00019463662529750083, + "loss": 2.6964, + "step": 2184 + }, + { + "epoch": 0.17633766443386328, + "grad_norm": 0.7849796414375305, + "learning_rate": 0.0001946315234373954, + "loss": 2.7431, + "step": 2185 + }, + { + "epoch": 0.1764183681704463, + "grad_norm": 0.7939459085464478, + "learning_rate": 0.00019462641921882506, + "loss": 2.7126, + "step": 2186 + }, + { + "epoch": 0.1764990719070293, + "grad_norm": 0.8391629457473755, + "learning_rate": 0.00019462131264191696, + "loss": 2.8394, + "step": 2187 + }, + { + "epoch": 0.1765797756436123, + "grad_norm": 0.7548067569732666, + "learning_rate": 0.0001946162037067984, + "loss": 2.7315, + "step": 2188 + }, + { + "epoch": 0.1766604793801953, + "grad_norm": 0.8278634548187256, + "learning_rate": 0.00019461109241359674, + "loss": 2.8298, + "step": 2189 + }, + { + "epoch": 0.1767411831167783, + "grad_norm": 0.8275949954986572, + "learning_rate": 0.00019460597876243933, + "loss": 2.8072, + "step": 2190 + }, + { + "epoch": 0.1768218868533613, + "grad_norm": 0.7720363140106201, + "learning_rate": 0.00019460086275345363, + "loss": 2.7478, + "step": 2191 + }, + { + "epoch": 0.17690259058994431, + "grad_norm": 0.7795925140380859, + "learning_rate": 0.00019459574438676714, + "loss": 2.7633, + "step": 2192 + }, + { + "epoch": 0.17698329432652732, + "grad_norm": 0.7722043991088867, + "learning_rate": 0.00019459062366250743, + "loss": 2.8001, + "step": 2193 + }, + { + "epoch": 0.17706399806311032, + "grad_norm": 0.8560587763786316, + "learning_rate": 0.00019458550058080212, + "loss": 2.7494, + "step": 2194 + }, + { + "epoch": 0.17714470179969333, + "grad_norm": 0.7473754286766052, + "learning_rate": 0.00019458037514177886, + "loss": 2.7112, + "step": 2195 + }, + { + "epoch": 0.17722540553627633, + "grad_norm": 0.7625827789306641, + "learning_rate": 0.00019457524734556542, + "loss": 2.7496, + "step": 2196 + }, + { + "epoch": 0.17730610927285934, + "grad_norm": 0.7809351682662964, + "learning_rate": 0.00019457011719228962, + "loss": 2.7764, + "step": 2197 + }, + { + "epoch": 0.17738681300944234, + "grad_norm": 0.7846190333366394, + "learning_rate": 0.00019456498468207927, + "loss": 2.7189, + "step": 2198 + }, + { + "epoch": 0.17746751674602534, + "grad_norm": 0.7919551134109497, + "learning_rate": 0.0001945598498150623, + "loss": 2.7798, + "step": 2199 + }, + { + "epoch": 0.17754822048260835, + "grad_norm": 0.796183705329895, + "learning_rate": 0.0001945547125913667, + "loss": 2.7498, + "step": 2200 + }, + { + "epoch": 0.17762892421919135, + "grad_norm": 0.791668176651001, + "learning_rate": 0.0001945495730111205, + "loss": 2.7638, + "step": 2201 + }, + { + "epoch": 0.17770962795577436, + "grad_norm": 0.8303191661834717, + "learning_rate": 0.0001945444310744518, + "loss": 2.8079, + "step": 2202 + }, + { + "epoch": 0.17779033169235736, + "grad_norm": 0.8245917558670044, + "learning_rate": 0.00019453928678148872, + "loss": 2.7222, + "step": 2203 + }, + { + "epoch": 0.17787103542894037, + "grad_norm": 0.793456494808197, + "learning_rate": 0.0001945341401323595, + "loss": 2.8532, + "step": 2204 + }, + { + "epoch": 0.17795173916552337, + "grad_norm": 0.7574856877326965, + "learning_rate": 0.00019452899112719235, + "loss": 2.7361, + "step": 2205 + }, + { + "epoch": 0.17803244290210637, + "grad_norm": 0.7748556733131409, + "learning_rate": 0.0001945238397661157, + "loss": 2.7423, + "step": 2206 + }, + { + "epoch": 0.17811314663868938, + "grad_norm": 0.8973588347434998, + "learning_rate": 0.00019451868604925782, + "loss": 2.7604, + "step": 2207 + }, + { + "epoch": 0.17819385037527238, + "grad_norm": 0.7613589763641357, + "learning_rate": 0.00019451352997674722, + "loss": 2.7168, + "step": 2208 + }, + { + "epoch": 0.1782745541118554, + "grad_norm": 0.8152763247489929, + "learning_rate": 0.00019450837154871243, + "loss": 2.7904, + "step": 2209 + }, + { + "epoch": 0.1783552578484384, + "grad_norm": 0.8115083575248718, + "learning_rate": 0.00019450321076528194, + "loss": 2.7595, + "step": 2210 + }, + { + "epoch": 0.1784359615850214, + "grad_norm": 0.772665798664093, + "learning_rate": 0.00019449804762658438, + "loss": 2.7125, + "step": 2211 + }, + { + "epoch": 0.1785166653216044, + "grad_norm": 0.8002723455429077, + "learning_rate": 0.0001944928821327485, + "loss": 2.8121, + "step": 2212 + }, + { + "epoch": 0.1785973690581874, + "grad_norm": 0.8354858160018921, + "learning_rate": 0.00019448771428390296, + "loss": 2.8662, + "step": 2213 + }, + { + "epoch": 0.1786780727947704, + "grad_norm": 0.7799130082130432, + "learning_rate": 0.0001944825440801766, + "loss": 2.7247, + "step": 2214 + }, + { + "epoch": 0.1787587765313534, + "grad_norm": 0.810265302658081, + "learning_rate": 0.00019447737152169828, + "loss": 2.7095, + "step": 2215 + }, + { + "epoch": 0.17883948026793642, + "grad_norm": 0.8305599093437195, + "learning_rate": 0.00019447219660859687, + "loss": 2.7448, + "step": 2216 + }, + { + "epoch": 0.17892018400451942, + "grad_norm": 0.7899554371833801, + "learning_rate": 0.00019446701934100138, + "loss": 2.7295, + "step": 2217 + }, + { + "epoch": 0.17900088774110243, + "grad_norm": 0.7675672173500061, + "learning_rate": 0.00019446183971904082, + "loss": 2.7236, + "step": 2218 + }, + { + "epoch": 0.1790815914776854, + "grad_norm": 0.8717279434204102, + "learning_rate": 0.0001944566577428443, + "loss": 2.8044, + "step": 2219 + }, + { + "epoch": 0.1791622952142684, + "grad_norm": 0.8151431679725647, + "learning_rate": 0.00019445147341254094, + "loss": 2.7753, + "step": 2220 + }, + { + "epoch": 0.1792429989508514, + "grad_norm": 0.8481619358062744, + "learning_rate": 0.00019444628672825998, + "loss": 2.7954, + "step": 2221 + }, + { + "epoch": 0.17932370268743442, + "grad_norm": 0.8133199214935303, + "learning_rate": 0.00019444109769013065, + "loss": 2.7235, + "step": 2222 + }, + { + "epoch": 0.17940440642401742, + "grad_norm": 0.8250097036361694, + "learning_rate": 0.00019443590629828232, + "loss": 2.8352, + "step": 2223 + }, + { + "epoch": 0.17948511016060043, + "grad_norm": 0.8279787302017212, + "learning_rate": 0.00019443071255284433, + "loss": 2.7513, + "step": 2224 + }, + { + "epoch": 0.17956581389718343, + "grad_norm": 0.7781538963317871, + "learning_rate": 0.00019442551645394612, + "loss": 2.7239, + "step": 2225 + }, + { + "epoch": 0.17964651763376643, + "grad_norm": 0.7718615531921387, + "learning_rate": 0.00019442031800171727, + "loss": 2.7387, + "step": 2226 + }, + { + "epoch": 0.17972722137034944, + "grad_norm": 0.7704512476921082, + "learning_rate": 0.00019441511719628724, + "loss": 2.792, + "step": 2227 + }, + { + "epoch": 0.17980792510693244, + "grad_norm": 0.8290835618972778, + "learning_rate": 0.00019440991403778566, + "loss": 2.7745, + "step": 2228 + }, + { + "epoch": 0.17988862884351545, + "grad_norm": 0.8408392667770386, + "learning_rate": 0.00019440470852634227, + "loss": 2.7688, + "step": 2229 + }, + { + "epoch": 0.17996933258009845, + "grad_norm": 0.8503465056419373, + "learning_rate": 0.00019439950066208676, + "loss": 2.6747, + "step": 2230 + }, + { + "epoch": 0.18005003631668146, + "grad_norm": 0.8213364481925964, + "learning_rate": 0.0001943942904451489, + "loss": 2.7212, + "step": 2231 + }, + { + "epoch": 0.18013074005326446, + "grad_norm": 0.8511209487915039, + "learning_rate": 0.0001943890778756586, + "loss": 2.701, + "step": 2232 + }, + { + "epoch": 0.18021144378984746, + "grad_norm": 0.8034417033195496, + "learning_rate": 0.00019438386295374577, + "loss": 2.7029, + "step": 2233 + }, + { + "epoch": 0.18029214752643047, + "grad_norm": 0.7603715658187866, + "learning_rate": 0.0001943786456795403, + "loss": 2.7201, + "step": 2234 + }, + { + "epoch": 0.18037285126301347, + "grad_norm": 0.9210647940635681, + "learning_rate": 0.0001943734260531723, + "loss": 2.7847, + "step": 2235 + }, + { + "epoch": 0.18045355499959648, + "grad_norm": 0.7429665923118591, + "learning_rate": 0.00019436820407477186, + "loss": 2.7493, + "step": 2236 + }, + { + "epoch": 0.18053425873617948, + "grad_norm": 0.8290510773658752, + "learning_rate": 0.00019436297974446905, + "loss": 2.7711, + "step": 2237 + }, + { + "epoch": 0.18061496247276249, + "grad_norm": 0.7593570947647095, + "learning_rate": 0.0001943577530623941, + "loss": 2.7539, + "step": 2238 + }, + { + "epoch": 0.1806956662093455, + "grad_norm": 0.8222225308418274, + "learning_rate": 0.00019435252402867734, + "loss": 2.7703, + "step": 2239 + }, + { + "epoch": 0.1807763699459285, + "grad_norm": 0.8280842900276184, + "learning_rate": 0.00019434729264344898, + "loss": 2.7966, + "step": 2240 + }, + { + "epoch": 0.1808570736825115, + "grad_norm": 0.8258495926856995, + "learning_rate": 0.00019434205890683952, + "loss": 2.759, + "step": 2241 + }, + { + "epoch": 0.1809377774190945, + "grad_norm": 0.8294420838356018, + "learning_rate": 0.00019433682281897932, + "loss": 2.6996, + "step": 2242 + }, + { + "epoch": 0.1810184811556775, + "grad_norm": 0.8258811235427856, + "learning_rate": 0.0001943315843799989, + "loss": 2.774, + "step": 2243 + }, + { + "epoch": 0.1810991848922605, + "grad_norm": 0.8035838007926941, + "learning_rate": 0.0001943263435900288, + "loss": 2.7806, + "step": 2244 + }, + { + "epoch": 0.18117988862884352, + "grad_norm": 0.7900332808494568, + "learning_rate": 0.00019432110044919964, + "loss": 2.7462, + "step": 2245 + }, + { + "epoch": 0.18126059236542652, + "grad_norm": 0.8126730918884277, + "learning_rate": 0.00019431585495764212, + "loss": 2.6913, + "step": 2246 + }, + { + "epoch": 0.18134129610200952, + "grad_norm": 0.8411321043968201, + "learning_rate": 0.00019431060711548695, + "loss": 2.7503, + "step": 2247 + }, + { + "epoch": 0.18142199983859253, + "grad_norm": 0.7712867856025696, + "learning_rate": 0.0001943053569228649, + "loss": 2.7703, + "step": 2248 + }, + { + "epoch": 0.18150270357517553, + "grad_norm": 0.9093566536903381, + "learning_rate": 0.00019430010437990688, + "loss": 2.7838, + "step": 2249 + }, + { + "epoch": 0.18158340731175854, + "grad_norm": 0.8184913396835327, + "learning_rate": 0.00019429484948674372, + "loss": 2.8167, + "step": 2250 + }, + { + "epoch": 0.18166411104834154, + "grad_norm": 0.7215915322303772, + "learning_rate": 0.00019428959224350643, + "loss": 2.739, + "step": 2251 + }, + { + "epoch": 0.18174481478492455, + "grad_norm": 0.7842726111412048, + "learning_rate": 0.000194284332650326, + "loss": 2.8547, + "step": 2252 + }, + { + "epoch": 0.18182551852150755, + "grad_norm": 0.7758263349533081, + "learning_rate": 0.00019427907070733357, + "loss": 2.7746, + "step": 2253 + }, + { + "epoch": 0.18190622225809056, + "grad_norm": 0.7710500359535217, + "learning_rate": 0.00019427380641466027, + "loss": 2.7415, + "step": 2254 + }, + { + "epoch": 0.18198692599467356, + "grad_norm": 0.8233851194381714, + "learning_rate": 0.00019426853977243724, + "loss": 2.7471, + "step": 2255 + }, + { + "epoch": 0.18206762973125656, + "grad_norm": 0.7856284379959106, + "learning_rate": 0.00019426327078079578, + "loss": 2.6892, + "step": 2256 + }, + { + "epoch": 0.18214833346783957, + "grad_norm": 0.7978290915489197, + "learning_rate": 0.00019425799943986722, + "loss": 2.7346, + "step": 2257 + }, + { + "epoch": 0.18222903720442257, + "grad_norm": 0.8339362740516663, + "learning_rate": 0.00019425272574978293, + "loss": 2.7403, + "step": 2258 + }, + { + "epoch": 0.18230974094100558, + "grad_norm": 0.8035171031951904, + "learning_rate": 0.0001942474497106743, + "loss": 2.7444, + "step": 2259 + }, + { + "epoch": 0.18239044467758858, + "grad_norm": 0.7950475811958313, + "learning_rate": 0.0001942421713226729, + "loss": 2.7218, + "step": 2260 + }, + { + "epoch": 0.18247114841417159, + "grad_norm": 0.8439741730690002, + "learning_rate": 0.00019423689058591022, + "loss": 2.7498, + "step": 2261 + }, + { + "epoch": 0.1825518521507546, + "grad_norm": 0.8585919737815857, + "learning_rate": 0.00019423160750051789, + "loss": 2.7459, + "step": 2262 + }, + { + "epoch": 0.1826325558873376, + "grad_norm": 0.857276201248169, + "learning_rate": 0.00019422632206662755, + "loss": 2.8404, + "step": 2263 + }, + { + "epoch": 0.1827132596239206, + "grad_norm": 0.7692707777023315, + "learning_rate": 0.000194221034284371, + "loss": 2.8069, + "step": 2264 + }, + { + "epoch": 0.1827939633605036, + "grad_norm": 0.9107782244682312, + "learning_rate": 0.00019421574415387998, + "loss": 2.7554, + "step": 2265 + }, + { + "epoch": 0.1828746670970866, + "grad_norm": 0.763300895690918, + "learning_rate": 0.00019421045167528628, + "loss": 2.8031, + "step": 2266 + }, + { + "epoch": 0.1829553708336696, + "grad_norm": 0.8625530004501343, + "learning_rate": 0.0001942051568487219, + "loss": 2.7622, + "step": 2267 + }, + { + "epoch": 0.18303607457025262, + "grad_norm": 0.8483080863952637, + "learning_rate": 0.00019419985967431875, + "loss": 2.7726, + "step": 2268 + }, + { + "epoch": 0.18311677830683562, + "grad_norm": 0.8295309543609619, + "learning_rate": 0.00019419456015220884, + "loss": 2.7676, + "step": 2269 + }, + { + "epoch": 0.1831974820434186, + "grad_norm": 0.812976062297821, + "learning_rate": 0.0001941892582825243, + "loss": 2.745, + "step": 2270 + }, + { + "epoch": 0.1832781857800016, + "grad_norm": 0.799846351146698, + "learning_rate": 0.00019418395406539717, + "loss": 2.7474, + "step": 2271 + }, + { + "epoch": 0.1833588895165846, + "grad_norm": 0.7825174331665039, + "learning_rate": 0.00019417864750095976, + "loss": 2.7982, + "step": 2272 + }, + { + "epoch": 0.1834395932531676, + "grad_norm": 0.8331060409545898, + "learning_rate": 0.00019417333858934424, + "loss": 2.7279, + "step": 2273 + }, + { + "epoch": 0.18352029698975061, + "grad_norm": 0.8579809665679932, + "learning_rate": 0.00019416802733068295, + "loss": 2.7425, + "step": 2274 + }, + { + "epoch": 0.18360100072633362, + "grad_norm": 0.8643589019775391, + "learning_rate": 0.0001941627137251083, + "loss": 2.7369, + "step": 2275 + }, + { + "epoch": 0.18368170446291662, + "grad_norm": 0.9086846113204956, + "learning_rate": 0.00019415739777275265, + "loss": 2.7681, + "step": 2276 + }, + { + "epoch": 0.18376240819949963, + "grad_norm": 0.8442896604537964, + "learning_rate": 0.00019415207947374853, + "loss": 2.7733, + "step": 2277 + }, + { + "epoch": 0.18384311193608263, + "grad_norm": 0.7858592867851257, + "learning_rate": 0.00019414675882822846, + "loss": 2.7726, + "step": 2278 + }, + { + "epoch": 0.18392381567266564, + "grad_norm": 0.8191118240356445, + "learning_rate": 0.00019414143583632503, + "loss": 2.8142, + "step": 2279 + }, + { + "epoch": 0.18400451940924864, + "grad_norm": 0.8093815445899963, + "learning_rate": 0.00019413611049817097, + "loss": 2.7068, + "step": 2280 + }, + { + "epoch": 0.18408522314583164, + "grad_norm": 0.80247563123703, + "learning_rate": 0.00019413078281389895, + "loss": 2.7459, + "step": 2281 + }, + { + "epoch": 0.18416592688241465, + "grad_norm": 0.8200877904891968, + "learning_rate": 0.00019412545278364176, + "loss": 2.6963, + "step": 2282 + }, + { + "epoch": 0.18424663061899765, + "grad_norm": 0.870662271976471, + "learning_rate": 0.00019412012040753224, + "loss": 2.8636, + "step": 2283 + }, + { + "epoch": 0.18432733435558066, + "grad_norm": 0.7626601457595825, + "learning_rate": 0.00019411478568570332, + "loss": 2.8082, + "step": 2284 + }, + { + "epoch": 0.18440803809216366, + "grad_norm": 0.7492787837982178, + "learning_rate": 0.00019410944861828787, + "loss": 2.7231, + "step": 2285 + }, + { + "epoch": 0.18448874182874667, + "grad_norm": 0.8172419667243958, + "learning_rate": 0.000194104109205419, + "loss": 2.7054, + "step": 2286 + }, + { + "epoch": 0.18456944556532967, + "grad_norm": 0.7749670147895813, + "learning_rate": 0.0001940987674472297, + "loss": 2.6907, + "step": 2287 + }, + { + "epoch": 0.18465014930191267, + "grad_norm": 0.8855465054512024, + "learning_rate": 0.00019409342334385316, + "loss": 2.7439, + "step": 2288 + }, + { + "epoch": 0.18473085303849568, + "grad_norm": 0.8066419363021851, + "learning_rate": 0.00019408807689542257, + "loss": 2.7126, + "step": 2289 + }, + { + "epoch": 0.18481155677507868, + "grad_norm": 0.7759004235267639, + "learning_rate": 0.00019408272810207114, + "loss": 2.7207, + "step": 2290 + }, + { + "epoch": 0.1848922605116617, + "grad_norm": 0.8593513369560242, + "learning_rate": 0.00019407737696393215, + "loss": 2.7375, + "step": 2291 + }, + { + "epoch": 0.1849729642482447, + "grad_norm": 0.8154759407043457, + "learning_rate": 0.00019407202348113904, + "loss": 2.7608, + "step": 2292 + }, + { + "epoch": 0.1850536679848277, + "grad_norm": 0.7912892699241638, + "learning_rate": 0.0001940666676538252, + "loss": 2.7886, + "step": 2293 + }, + { + "epoch": 0.1851343717214107, + "grad_norm": 0.9184576272964478, + "learning_rate": 0.0001940613094821241, + "loss": 2.7867, + "step": 2294 + }, + { + "epoch": 0.1852150754579937, + "grad_norm": 0.8114588856697083, + "learning_rate": 0.0001940559489661693, + "loss": 2.8105, + "step": 2295 + }, + { + "epoch": 0.1852957791945767, + "grad_norm": 0.7681595683097839, + "learning_rate": 0.00019405058610609438, + "loss": 2.7707, + "step": 2296 + }, + { + "epoch": 0.18537648293115971, + "grad_norm": 0.7719643712043762, + "learning_rate": 0.000194045220902033, + "loss": 2.6767, + "step": 2297 + }, + { + "epoch": 0.18545718666774272, + "grad_norm": 0.7602487206459045, + "learning_rate": 0.00019403985335411888, + "loss": 2.7698, + "step": 2298 + }, + { + "epoch": 0.18553789040432572, + "grad_norm": 0.8044554591178894, + "learning_rate": 0.00019403448346248578, + "loss": 2.7578, + "step": 2299 + }, + { + "epoch": 0.18561859414090873, + "grad_norm": 0.7830328345298767, + "learning_rate": 0.00019402911122726757, + "loss": 2.7113, + "step": 2300 + }, + { + "epoch": 0.18569929787749173, + "grad_norm": 0.7793100476264954, + "learning_rate": 0.0001940237366485981, + "loss": 2.7388, + "step": 2301 + }, + { + "epoch": 0.18578000161407474, + "grad_norm": 0.9127374887466431, + "learning_rate": 0.00019401835972661133, + "loss": 2.7459, + "step": 2302 + }, + { + "epoch": 0.18586070535065774, + "grad_norm": 0.8007177114486694, + "learning_rate": 0.00019401298046144128, + "loss": 2.776, + "step": 2303 + }, + { + "epoch": 0.18594140908724074, + "grad_norm": 0.7384614944458008, + "learning_rate": 0.000194007598853222, + "loss": 2.6819, + "step": 2304 + }, + { + "epoch": 0.18602211282382375, + "grad_norm": 0.798909068107605, + "learning_rate": 0.0001940022149020876, + "loss": 2.7218, + "step": 2305 + }, + { + "epoch": 0.18610281656040675, + "grad_norm": 0.8388963341712952, + "learning_rate": 0.0001939968286081723, + "loss": 2.8248, + "step": 2306 + }, + { + "epoch": 0.18618352029698976, + "grad_norm": 0.8411754369735718, + "learning_rate": 0.0001939914399716103, + "loss": 2.7575, + "step": 2307 + }, + { + "epoch": 0.18626422403357276, + "grad_norm": 0.7936103343963623, + "learning_rate": 0.00019398604899253594, + "loss": 2.7488, + "step": 2308 + }, + { + "epoch": 0.18634492777015577, + "grad_norm": 0.7913734912872314, + "learning_rate": 0.00019398065567108357, + "loss": 2.7963, + "step": 2309 + }, + { + "epoch": 0.18642563150673877, + "grad_norm": 0.8341575860977173, + "learning_rate": 0.00019397526000738754, + "loss": 2.7698, + "step": 2310 + }, + { + "epoch": 0.18650633524332177, + "grad_norm": 0.8323128819465637, + "learning_rate": 0.00019396986200158244, + "loss": 2.7218, + "step": 2311 + }, + { + "epoch": 0.18658703897990478, + "grad_norm": 0.748073160648346, + "learning_rate": 0.0001939644616538027, + "loss": 2.7798, + "step": 2312 + }, + { + "epoch": 0.18666774271648778, + "grad_norm": 0.8166958689689636, + "learning_rate": 0.00019395905896418296, + "loss": 2.661, + "step": 2313 + }, + { + "epoch": 0.1867484464530708, + "grad_norm": 0.796791672706604, + "learning_rate": 0.00019395365393285786, + "loss": 2.7297, + "step": 2314 + }, + { + "epoch": 0.1868291501896538, + "grad_norm": 0.7851170897483826, + "learning_rate": 0.0001939482465599621, + "loss": 2.7798, + "step": 2315 + }, + { + "epoch": 0.1869098539262368, + "grad_norm": 0.7545836567878723, + "learning_rate": 0.00019394283684563045, + "loss": 2.7327, + "step": 2316 + }, + { + "epoch": 0.1869905576628198, + "grad_norm": 0.8100360631942749, + "learning_rate": 0.00019393742478999776, + "loss": 2.7901, + "step": 2317 + }, + { + "epoch": 0.1870712613994028, + "grad_norm": 0.7874314785003662, + "learning_rate": 0.00019393201039319887, + "loss": 2.7597, + "step": 2318 + }, + { + "epoch": 0.1871519651359858, + "grad_norm": 0.7698730826377869, + "learning_rate": 0.00019392659365536876, + "loss": 2.7327, + "step": 2319 + }, + { + "epoch": 0.1872326688725688, + "grad_norm": 0.7417994141578674, + "learning_rate": 0.0001939211745766424, + "loss": 2.7413, + "step": 2320 + }, + { + "epoch": 0.1873133726091518, + "grad_norm": 0.7823258638381958, + "learning_rate": 0.00019391575315715485, + "loss": 2.7577, + "step": 2321 + }, + { + "epoch": 0.1873940763457348, + "grad_norm": 0.82382732629776, + "learning_rate": 0.00019391032939704124, + "loss": 2.7769, + "step": 2322 + }, + { + "epoch": 0.1874747800823178, + "grad_norm": 0.8405026197433472, + "learning_rate": 0.0001939049032964367, + "loss": 2.8402, + "step": 2323 + }, + { + "epoch": 0.1875554838189008, + "grad_norm": 0.8307906985282898, + "learning_rate": 0.00019389947485547654, + "loss": 2.7642, + "step": 2324 + }, + { + "epoch": 0.1876361875554838, + "grad_norm": 0.8618248701095581, + "learning_rate": 0.000193894044074296, + "loss": 2.7853, + "step": 2325 + }, + { + "epoch": 0.1877168912920668, + "grad_norm": 0.8040831685066223, + "learning_rate": 0.00019388861095303046, + "loss": 2.7467, + "step": 2326 + }, + { + "epoch": 0.18779759502864982, + "grad_norm": 0.7723637223243713, + "learning_rate": 0.0001938831754918153, + "loss": 2.7222, + "step": 2327 + }, + { + "epoch": 0.18787829876523282, + "grad_norm": 0.8189084529876709, + "learning_rate": 0.000193877737690786, + "loss": 2.7857, + "step": 2328 + }, + { + "epoch": 0.18795900250181583, + "grad_norm": 0.8335791826248169, + "learning_rate": 0.00019387229755007805, + "loss": 2.6997, + "step": 2329 + }, + { + "epoch": 0.18803970623839883, + "grad_norm": 0.7732782959938049, + "learning_rate": 0.00019386685506982707, + "loss": 2.7155, + "step": 2330 + }, + { + "epoch": 0.18812040997498183, + "grad_norm": 0.8262906670570374, + "learning_rate": 0.0001938614102501687, + "loss": 2.7638, + "step": 2331 + }, + { + "epoch": 0.18820111371156484, + "grad_norm": 0.7969058156013489, + "learning_rate": 0.00019385596309123862, + "loss": 2.7363, + "step": 2332 + }, + { + "epoch": 0.18828181744814784, + "grad_norm": 0.7834853529930115, + "learning_rate": 0.0001938505135931726, + "loss": 2.7205, + "step": 2333 + }, + { + "epoch": 0.18836252118473085, + "grad_norm": 0.748481810092926, + "learning_rate": 0.00019384506175610647, + "loss": 2.7759, + "step": 2334 + }, + { + "epoch": 0.18844322492131385, + "grad_norm": 0.8137786984443665, + "learning_rate": 0.00019383960758017604, + "loss": 2.828, + "step": 2335 + }, + { + "epoch": 0.18852392865789686, + "grad_norm": 0.8065745234489441, + "learning_rate": 0.00019383415106551734, + "loss": 2.7408, + "step": 2336 + }, + { + "epoch": 0.18860463239447986, + "grad_norm": 0.768643856048584, + "learning_rate": 0.0001938286922122663, + "loss": 2.6503, + "step": 2337 + }, + { + "epoch": 0.18868533613106286, + "grad_norm": 0.7677921652793884, + "learning_rate": 0.00019382323102055897, + "loss": 2.7088, + "step": 2338 + }, + { + "epoch": 0.18876603986764587, + "grad_norm": 0.7648717164993286, + "learning_rate": 0.0001938177674905315, + "loss": 2.7015, + "step": 2339 + }, + { + "epoch": 0.18884674360422887, + "grad_norm": 0.7517116665840149, + "learning_rate": 0.00019381230162231997, + "loss": 2.7095, + "step": 2340 + }, + { + "epoch": 0.18892744734081188, + "grad_norm": 0.8147841691970825, + "learning_rate": 0.00019380683341606067, + "loss": 2.8563, + "step": 2341 + }, + { + "epoch": 0.18900815107739488, + "grad_norm": 0.7849822640419006, + "learning_rate": 0.00019380136287188988, + "loss": 2.7432, + "step": 2342 + }, + { + "epoch": 0.18908885481397789, + "grad_norm": 0.813811719417572, + "learning_rate": 0.0001937958899899439, + "loss": 2.7419, + "step": 2343 + }, + { + "epoch": 0.1891695585505609, + "grad_norm": 0.8142707943916321, + "learning_rate": 0.00019379041477035923, + "loss": 2.7658, + "step": 2344 + }, + { + "epoch": 0.1892502622871439, + "grad_norm": 0.7594506740570068, + "learning_rate": 0.00019378493721327217, + "loss": 2.7298, + "step": 2345 + }, + { + "epoch": 0.1893309660237269, + "grad_norm": 0.8374232053756714, + "learning_rate": 0.00019377945731881936, + "loss": 2.8112, + "step": 2346 + }, + { + "epoch": 0.1894116697603099, + "grad_norm": 0.783608615398407, + "learning_rate": 0.00019377397508713734, + "loss": 2.8168, + "step": 2347 + }, + { + "epoch": 0.1894923734968929, + "grad_norm": 0.720214307308197, + "learning_rate": 0.0001937684905183627, + "loss": 2.7516, + "step": 2348 + }, + { + "epoch": 0.1895730772334759, + "grad_norm": 0.7939600944519043, + "learning_rate": 0.0001937630036126322, + "loss": 2.7609, + "step": 2349 + }, + { + "epoch": 0.18965378097005892, + "grad_norm": 0.787315309047699, + "learning_rate": 0.00019375751437008252, + "loss": 2.758, + "step": 2350 + }, + { + "epoch": 0.18973448470664192, + "grad_norm": 0.7862411141395569, + "learning_rate": 0.00019375202279085053, + "loss": 2.6866, + "step": 2351 + }, + { + "epoch": 0.18981518844322492, + "grad_norm": 0.8651136159896851, + "learning_rate": 0.000193746528875073, + "loss": 2.7488, + "step": 2352 + }, + { + "epoch": 0.18989589217980793, + "grad_norm": 0.8150602579116821, + "learning_rate": 0.00019374103262288696, + "loss": 2.7417, + "step": 2353 + }, + { + "epoch": 0.18997659591639093, + "grad_norm": 0.9053540229797363, + "learning_rate": 0.00019373553403442934, + "loss": 2.7587, + "step": 2354 + }, + { + "epoch": 0.19005729965297394, + "grad_norm": 0.8775703310966492, + "learning_rate": 0.0001937300331098372, + "loss": 2.733, + "step": 2355 + }, + { + "epoch": 0.19013800338955694, + "grad_norm": 0.7714357972145081, + "learning_rate": 0.0001937245298492476, + "loss": 2.7595, + "step": 2356 + }, + { + "epoch": 0.19021870712613995, + "grad_norm": 0.8648017048835754, + "learning_rate": 0.0001937190242527977, + "loss": 2.7944, + "step": 2357 + }, + { + "epoch": 0.19029941086272295, + "grad_norm": 0.9367388486862183, + "learning_rate": 0.00019371351632062477, + "loss": 2.7902, + "step": 2358 + }, + { + "epoch": 0.19038011459930596, + "grad_norm": 0.8116368651390076, + "learning_rate": 0.00019370800605286604, + "loss": 2.7291, + "step": 2359 + }, + { + "epoch": 0.19046081833588896, + "grad_norm": 0.7892753481864929, + "learning_rate": 0.00019370249344965882, + "loss": 2.8192, + "step": 2360 + }, + { + "epoch": 0.19054152207247196, + "grad_norm": 0.8109372854232788, + "learning_rate": 0.00019369697851114056, + "loss": 2.6982, + "step": 2361 + }, + { + "epoch": 0.19062222580905497, + "grad_norm": 0.8756314516067505, + "learning_rate": 0.00019369146123744864, + "loss": 2.744, + "step": 2362 + }, + { + "epoch": 0.19070292954563797, + "grad_norm": 0.7400399446487427, + "learning_rate": 0.00019368594162872058, + "loss": 2.7328, + "step": 2363 + }, + { + "epoch": 0.19078363328222098, + "grad_norm": 0.8223158717155457, + "learning_rate": 0.000193680419685094, + "loss": 2.7614, + "step": 2364 + }, + { + "epoch": 0.19086433701880398, + "grad_norm": 0.7350139617919922, + "learning_rate": 0.00019367489540670645, + "loss": 2.7074, + "step": 2365 + }, + { + "epoch": 0.19094504075538699, + "grad_norm": 0.7915631532669067, + "learning_rate": 0.00019366936879369563, + "loss": 2.7835, + "step": 2366 + }, + { + "epoch": 0.19102574449197, + "grad_norm": 0.7765628099441528, + "learning_rate": 0.00019366383984619932, + "loss": 2.765, + "step": 2367 + }, + { + "epoch": 0.191106448228553, + "grad_norm": 0.8127059936523438, + "learning_rate": 0.00019365830856435525, + "loss": 2.7753, + "step": 2368 + }, + { + "epoch": 0.191187151965136, + "grad_norm": 0.8652897477149963, + "learning_rate": 0.0001936527749483013, + "loss": 2.7137, + "step": 2369 + }, + { + "epoch": 0.191267855701719, + "grad_norm": 0.8086774945259094, + "learning_rate": 0.00019364723899817541, + "loss": 2.7209, + "step": 2370 + }, + { + "epoch": 0.191348559438302, + "grad_norm": 0.7965098023414612, + "learning_rate": 0.00019364170071411554, + "loss": 2.786, + "step": 2371 + }, + { + "epoch": 0.19142926317488498, + "grad_norm": 0.7954064607620239, + "learning_rate": 0.00019363616009625967, + "loss": 2.7508, + "step": 2372 + }, + { + "epoch": 0.191509966911468, + "grad_norm": 0.7835928201675415, + "learning_rate": 0.00019363061714474595, + "loss": 2.7423, + "step": 2373 + }, + { + "epoch": 0.191590670648051, + "grad_norm": 0.8720580339431763, + "learning_rate": 0.0001936250718597125, + "loss": 2.7877, + "step": 2374 + }, + { + "epoch": 0.191671374384634, + "grad_norm": 0.836066484451294, + "learning_rate": 0.00019361952424129747, + "loss": 2.8456, + "step": 2375 + }, + { + "epoch": 0.191752078121217, + "grad_norm": 0.793666660785675, + "learning_rate": 0.00019361397428963923, + "loss": 2.786, + "step": 2376 + }, + { + "epoch": 0.1918327818578, + "grad_norm": 0.8573217391967773, + "learning_rate": 0.000193608422004876, + "loss": 2.7569, + "step": 2377 + }, + { + "epoch": 0.191913485594383, + "grad_norm": 0.81243896484375, + "learning_rate": 0.00019360286738714623, + "loss": 2.771, + "step": 2378 + }, + { + "epoch": 0.19199418933096601, + "grad_norm": 0.7449626326560974, + "learning_rate": 0.00019359731043658832, + "loss": 2.7479, + "step": 2379 + }, + { + "epoch": 0.19207489306754902, + "grad_norm": 0.8124165534973145, + "learning_rate": 0.00019359175115334076, + "loss": 2.7602, + "step": 2380 + }, + { + "epoch": 0.19215559680413202, + "grad_norm": 0.7786986827850342, + "learning_rate": 0.00019358618953754211, + "loss": 2.6926, + "step": 2381 + }, + { + "epoch": 0.19223630054071503, + "grad_norm": 0.7987258434295654, + "learning_rate": 0.000193580625589331, + "loss": 2.7573, + "step": 2382 + }, + { + "epoch": 0.19231700427729803, + "grad_norm": 0.8236463665962219, + "learning_rate": 0.00019357505930884606, + "loss": 2.6755, + "step": 2383 + }, + { + "epoch": 0.19239770801388104, + "grad_norm": 0.8285779356956482, + "learning_rate": 0.00019356949069622602, + "loss": 2.7658, + "step": 2384 + }, + { + "epoch": 0.19247841175046404, + "grad_norm": 0.7823960781097412, + "learning_rate": 0.0001935639197516097, + "loss": 2.7404, + "step": 2385 + }, + { + "epoch": 0.19255911548704704, + "grad_norm": 0.968638002872467, + "learning_rate": 0.00019355834647513591, + "loss": 2.7836, + "step": 2386 + }, + { + "epoch": 0.19263981922363005, + "grad_norm": 0.8170328736305237, + "learning_rate": 0.00019355277086694357, + "loss": 2.7816, + "step": 2387 + }, + { + "epoch": 0.19272052296021305, + "grad_norm": 0.8342583179473877, + "learning_rate": 0.00019354719292717163, + "loss": 2.8204, + "step": 2388 + }, + { + "epoch": 0.19280122669679606, + "grad_norm": 0.8160435557365417, + "learning_rate": 0.0001935416126559591, + "loss": 2.6938, + "step": 2389 + }, + { + "epoch": 0.19288193043337906, + "grad_norm": 0.7888174653053284, + "learning_rate": 0.00019353603005344504, + "loss": 2.6804, + "step": 2390 + }, + { + "epoch": 0.19296263416996207, + "grad_norm": 0.8389205932617188, + "learning_rate": 0.00019353044511976865, + "loss": 2.7571, + "step": 2391 + }, + { + "epoch": 0.19304333790654507, + "grad_norm": 0.7920562028884888, + "learning_rate": 0.00019352485785506906, + "loss": 2.7174, + "step": 2392 + }, + { + "epoch": 0.19312404164312807, + "grad_norm": 0.7853459715843201, + "learning_rate": 0.00019351926825948555, + "loss": 2.7626, + "step": 2393 + }, + { + "epoch": 0.19320474537971108, + "grad_norm": 0.9109459519386292, + "learning_rate": 0.0001935136763331574, + "loss": 2.7568, + "step": 2394 + }, + { + "epoch": 0.19328544911629408, + "grad_norm": 0.7983853816986084, + "learning_rate": 0.00019350808207622397, + "loss": 2.7412, + "step": 2395 + }, + { + "epoch": 0.1933661528528771, + "grad_norm": 0.7416854500770569, + "learning_rate": 0.00019350248548882472, + "loss": 2.7335, + "step": 2396 + }, + { + "epoch": 0.1934468565894601, + "grad_norm": 0.7305171489715576, + "learning_rate": 0.0001934968865710991, + "loss": 2.7295, + "step": 2397 + }, + { + "epoch": 0.1935275603260431, + "grad_norm": 0.7717033624649048, + "learning_rate": 0.0001934912853231867, + "loss": 2.7568, + "step": 2398 + }, + { + "epoch": 0.1936082640626261, + "grad_norm": 0.7833831906318665, + "learning_rate": 0.00019348568174522705, + "loss": 2.736, + "step": 2399 + }, + { + "epoch": 0.1936889677992091, + "grad_norm": 0.872831404209137, + "learning_rate": 0.00019348007583735983, + "loss": 2.7719, + "step": 2400 + }, + { + "epoch": 0.1937696715357921, + "grad_norm": 0.8389193415641785, + "learning_rate": 0.0001934744675997248, + "loss": 2.7572, + "step": 2401 + }, + { + "epoch": 0.19385037527237511, + "grad_norm": 0.8442249298095703, + "learning_rate": 0.00019346885703246165, + "loss": 2.8117, + "step": 2402 + }, + { + "epoch": 0.19393107900895812, + "grad_norm": 0.8451170325279236, + "learning_rate": 0.00019346324413571027, + "loss": 2.7216, + "step": 2403 + }, + { + "epoch": 0.19401178274554112, + "grad_norm": 0.898529052734375, + "learning_rate": 0.00019345762890961052, + "loss": 2.8119, + "step": 2404 + }, + { + "epoch": 0.19409248648212413, + "grad_norm": 0.8302313685417175, + "learning_rate": 0.00019345201135430236, + "loss": 2.76, + "step": 2405 + }, + { + "epoch": 0.19417319021870713, + "grad_norm": 0.8975207209587097, + "learning_rate": 0.00019344639146992582, + "loss": 2.8043, + "step": 2406 + }, + { + "epoch": 0.19425389395529014, + "grad_norm": 0.8972581028938293, + "learning_rate": 0.0001934407692566209, + "loss": 2.7487, + "step": 2407 + }, + { + "epoch": 0.19433459769187314, + "grad_norm": 0.8311447501182556, + "learning_rate": 0.00019343514471452776, + "loss": 2.7653, + "step": 2408 + }, + { + "epoch": 0.19441530142845614, + "grad_norm": 0.8336243033409119, + "learning_rate": 0.0001934295178437866, + "loss": 2.753, + "step": 2409 + }, + { + "epoch": 0.19449600516503915, + "grad_norm": 0.8339207172393799, + "learning_rate": 0.0001934238886445376, + "loss": 2.7643, + "step": 2410 + }, + { + "epoch": 0.19457670890162215, + "grad_norm": 0.906074583530426, + "learning_rate": 0.0001934182571169211, + "loss": 2.7777, + "step": 2411 + }, + { + "epoch": 0.19465741263820516, + "grad_norm": 0.8759943246841431, + "learning_rate": 0.00019341262326107742, + "loss": 2.77, + "step": 2412 + }, + { + "epoch": 0.19473811637478816, + "grad_norm": 0.8399369716644287, + "learning_rate": 0.00019340698707714699, + "loss": 2.752, + "step": 2413 + }, + { + "epoch": 0.19481882011137117, + "grad_norm": 0.8551808595657349, + "learning_rate": 0.00019340134856527026, + "loss": 2.6727, + "step": 2414 + }, + { + "epoch": 0.19489952384795417, + "grad_norm": 0.7660732865333557, + "learning_rate": 0.00019339570772558778, + "loss": 2.7491, + "step": 2415 + }, + { + "epoch": 0.19498022758453717, + "grad_norm": 0.8257685303688049, + "learning_rate": 0.00019339006455824015, + "loss": 2.7584, + "step": 2416 + }, + { + "epoch": 0.19506093132112018, + "grad_norm": 0.797275960445404, + "learning_rate": 0.00019338441906336794, + "loss": 2.7051, + "step": 2417 + }, + { + "epoch": 0.19514163505770318, + "grad_norm": 0.8311913013458252, + "learning_rate": 0.00019337877124111193, + "loss": 2.8084, + "step": 2418 + }, + { + "epoch": 0.1952223387942862, + "grad_norm": 0.7995893359184265, + "learning_rate": 0.0001933731210916128, + "loss": 2.7556, + "step": 2419 + }, + { + "epoch": 0.1953030425308692, + "grad_norm": 0.792850136756897, + "learning_rate": 0.00019336746861501147, + "loss": 2.7289, + "step": 2420 + }, + { + "epoch": 0.1953837462674522, + "grad_norm": 0.8058848977088928, + "learning_rate": 0.00019336181381144873, + "loss": 2.7394, + "step": 2421 + }, + { + "epoch": 0.1954644500040352, + "grad_norm": 0.8267124891281128, + "learning_rate": 0.00019335615668106555, + "loss": 2.771, + "step": 2422 + }, + { + "epoch": 0.19554515374061818, + "grad_norm": 0.7641060948371887, + "learning_rate": 0.00019335049722400292, + "loss": 2.7311, + "step": 2423 + }, + { + "epoch": 0.19562585747720118, + "grad_norm": 0.8023245930671692, + "learning_rate": 0.00019334483544040186, + "loss": 2.7658, + "step": 2424 + }, + { + "epoch": 0.19570656121378419, + "grad_norm": 0.8341927528381348, + "learning_rate": 0.00019333917133040348, + "loss": 2.7476, + "step": 2425 + }, + { + "epoch": 0.1957872649503672, + "grad_norm": 0.7985726594924927, + "learning_rate": 0.000193333504894149, + "loss": 2.7362, + "step": 2426 + }, + { + "epoch": 0.1958679686869502, + "grad_norm": 0.7267594933509827, + "learning_rate": 0.0001933278361317796, + "loss": 2.6875, + "step": 2427 + }, + { + "epoch": 0.1959486724235332, + "grad_norm": 0.8292990326881409, + "learning_rate": 0.00019332216504343652, + "loss": 2.7619, + "step": 2428 + }, + { + "epoch": 0.1960293761601162, + "grad_norm": 0.7549588680267334, + "learning_rate": 0.00019331649162926116, + "loss": 2.7385, + "step": 2429 + }, + { + "epoch": 0.1961100798966992, + "grad_norm": 0.7688446640968323, + "learning_rate": 0.0001933108158893949, + "loss": 2.7544, + "step": 2430 + }, + { + "epoch": 0.1961907836332822, + "grad_norm": 0.8168436884880066, + "learning_rate": 0.00019330513782397918, + "loss": 2.8013, + "step": 2431 + }, + { + "epoch": 0.19627148736986522, + "grad_norm": 0.8405759334564209, + "learning_rate": 0.00019329945743315556, + "loss": 2.7299, + "step": 2432 + }, + { + "epoch": 0.19635219110644822, + "grad_norm": 0.79430091381073, + "learning_rate": 0.00019329377471706554, + "loss": 2.7293, + "step": 2433 + }, + { + "epoch": 0.19643289484303122, + "grad_norm": 0.8428656458854675, + "learning_rate": 0.0001932880896758508, + "loss": 2.8211, + "step": 2434 + }, + { + "epoch": 0.19651359857961423, + "grad_norm": 0.7883139252662659, + "learning_rate": 0.00019328240230965298, + "loss": 2.6943, + "step": 2435 + }, + { + "epoch": 0.19659430231619723, + "grad_norm": 0.7539335489273071, + "learning_rate": 0.00019327671261861387, + "loss": 2.6926, + "step": 2436 + }, + { + "epoch": 0.19667500605278024, + "grad_norm": 0.9986057281494141, + "learning_rate": 0.00019327102060287524, + "loss": 2.7851, + "step": 2437 + }, + { + "epoch": 0.19675570978936324, + "grad_norm": 0.7716113924980164, + "learning_rate": 0.000193265326262579, + "loss": 2.752, + "step": 2438 + }, + { + "epoch": 0.19683641352594625, + "grad_norm": 0.9134296774864197, + "learning_rate": 0.000193259629597867, + "loss": 2.7698, + "step": 2439 + }, + { + "epoch": 0.19691711726252925, + "grad_norm": 0.7966345548629761, + "learning_rate": 0.00019325393060888124, + "loss": 2.7839, + "step": 2440 + }, + { + "epoch": 0.19699782099911226, + "grad_norm": 0.8051251173019409, + "learning_rate": 0.0001932482292957638, + "loss": 2.7322, + "step": 2441 + }, + { + "epoch": 0.19707852473569526, + "grad_norm": 0.843169629573822, + "learning_rate": 0.0001932425256586567, + "loss": 2.8263, + "step": 2442 + }, + { + "epoch": 0.19715922847227826, + "grad_norm": 0.7552370429039001, + "learning_rate": 0.00019323681969770213, + "loss": 2.7342, + "step": 2443 + }, + { + "epoch": 0.19723993220886127, + "grad_norm": 0.844473123550415, + "learning_rate": 0.0001932311114130423, + "loss": 2.776, + "step": 2444 + }, + { + "epoch": 0.19732063594544427, + "grad_norm": 0.8002473711967468, + "learning_rate": 0.00019322540080481945, + "loss": 2.7382, + "step": 2445 + }, + { + "epoch": 0.19740133968202728, + "grad_norm": 0.8564329147338867, + "learning_rate": 0.00019321968787317594, + "loss": 2.7592, + "step": 2446 + }, + { + "epoch": 0.19748204341861028, + "grad_norm": 0.7853825688362122, + "learning_rate": 0.00019321397261825408, + "loss": 2.7101, + "step": 2447 + }, + { + "epoch": 0.19756274715519329, + "grad_norm": 0.8482939004898071, + "learning_rate": 0.0001932082550401964, + "loss": 2.7891, + "step": 2448 + }, + { + "epoch": 0.1976434508917763, + "grad_norm": 0.8361770510673523, + "learning_rate": 0.00019320253513914536, + "loss": 2.7341, + "step": 2449 + }, + { + "epoch": 0.1977241546283593, + "grad_norm": 0.7814618945121765, + "learning_rate": 0.0001931968129152435, + "loss": 2.771, + "step": 2450 + }, + { + "epoch": 0.1978048583649423, + "grad_norm": 0.7588146924972534, + "learning_rate": 0.00019319108836863343, + "loss": 2.7577, + "step": 2451 + }, + { + "epoch": 0.1978855621015253, + "grad_norm": 0.9184895157814026, + "learning_rate": 0.00019318536149945785, + "loss": 2.7711, + "step": 2452 + }, + { + "epoch": 0.1979662658381083, + "grad_norm": 0.8454298973083496, + "learning_rate": 0.00019317963230785947, + "loss": 2.7748, + "step": 2453 + }, + { + "epoch": 0.1980469695746913, + "grad_norm": 0.7662420868873596, + "learning_rate": 0.0001931739007939811, + "loss": 2.7704, + "step": 2454 + }, + { + "epoch": 0.19812767331127432, + "grad_norm": 0.837888777256012, + "learning_rate": 0.0001931681669579655, + "loss": 2.7613, + "step": 2455 + }, + { + "epoch": 0.19820837704785732, + "grad_norm": 0.7835226058959961, + "learning_rate": 0.0001931624307999557, + "loss": 2.6888, + "step": 2456 + }, + { + "epoch": 0.19828908078444032, + "grad_norm": 0.8491464257240295, + "learning_rate": 0.00019315669232009456, + "loss": 2.7521, + "step": 2457 + }, + { + "epoch": 0.19836978452102333, + "grad_norm": 0.7590088248252869, + "learning_rate": 0.00019315095151852516, + "loss": 2.7441, + "step": 2458 + }, + { + "epoch": 0.19845048825760633, + "grad_norm": 0.9316127300262451, + "learning_rate": 0.00019314520839539052, + "loss": 2.786, + "step": 2459 + }, + { + "epoch": 0.19853119199418934, + "grad_norm": 0.7819615006446838, + "learning_rate": 0.0001931394629508338, + "loss": 2.7003, + "step": 2460 + }, + { + "epoch": 0.19861189573077234, + "grad_norm": 0.7675932049751282, + "learning_rate": 0.0001931337151849982, + "loss": 2.7065, + "step": 2461 + }, + { + "epoch": 0.19869259946735535, + "grad_norm": 0.7797678112983704, + "learning_rate": 0.000193127965098027, + "loss": 2.7605, + "step": 2462 + }, + { + "epoch": 0.19877330320393835, + "grad_norm": 0.789544403553009, + "learning_rate": 0.00019312221269006345, + "loss": 2.7913, + "step": 2463 + }, + { + "epoch": 0.19885400694052136, + "grad_norm": 0.9594957232475281, + "learning_rate": 0.00019311645796125094, + "loss": 2.785, + "step": 2464 + }, + { + "epoch": 0.19893471067710436, + "grad_norm": 0.8154739141464233, + "learning_rate": 0.00019311070091173287, + "loss": 2.6716, + "step": 2465 + }, + { + "epoch": 0.19901541441368736, + "grad_norm": 0.9042142629623413, + "learning_rate": 0.00019310494154165274, + "loss": 2.734, + "step": 2466 + }, + { + "epoch": 0.19909611815027037, + "grad_norm": 0.7803483605384827, + "learning_rate": 0.0001930991798511541, + "loss": 2.7052, + "step": 2467 + }, + { + "epoch": 0.19917682188685337, + "grad_norm": 0.7917614579200745, + "learning_rate": 0.00019309341584038055, + "loss": 2.728, + "step": 2468 + }, + { + "epoch": 0.19925752562343638, + "grad_norm": 0.8295063376426697, + "learning_rate": 0.00019308764950947568, + "loss": 2.7496, + "step": 2469 + }, + { + "epoch": 0.19933822936001938, + "grad_norm": 0.790831983089447, + "learning_rate": 0.0001930818808585833, + "loss": 2.7356, + "step": 2470 + }, + { + "epoch": 0.19941893309660239, + "grad_norm": 0.8527843952178955, + "learning_rate": 0.0001930761098878471, + "loss": 2.718, + "step": 2471 + }, + { + "epoch": 0.1994996368331854, + "grad_norm": 0.8518494367599487, + "learning_rate": 0.00019307033659741096, + "loss": 2.7189, + "step": 2472 + }, + { + "epoch": 0.1995803405697684, + "grad_norm": 0.8027220368385315, + "learning_rate": 0.00019306456098741872, + "loss": 2.7272, + "step": 2473 + }, + { + "epoch": 0.19966104430635137, + "grad_norm": 0.7516468167304993, + "learning_rate": 0.00019305878305801434, + "loss": 2.798, + "step": 2474 + }, + { + "epoch": 0.19974174804293438, + "grad_norm": 0.7676397562026978, + "learning_rate": 0.00019305300280934187, + "loss": 2.8076, + "step": 2475 + }, + { + "epoch": 0.19982245177951738, + "grad_norm": 0.8237762451171875, + "learning_rate": 0.00019304722024154528, + "loss": 2.6998, + "step": 2476 + }, + { + "epoch": 0.19990315551610038, + "grad_norm": 0.8397759199142456, + "learning_rate": 0.0001930414353547688, + "loss": 2.806, + "step": 2477 + }, + { + "epoch": 0.1999838592526834, + "grad_norm": 0.8911117911338806, + "learning_rate": 0.00019303564814915645, + "loss": 2.7566, + "step": 2478 + }, + { + "epoch": 0.2000645629892664, + "grad_norm": 0.765404999256134, + "learning_rate": 0.00019302985862485264, + "loss": 2.7363, + "step": 2479 + }, + { + "epoch": 0.2001452667258494, + "grad_norm": 0.7898589372634888, + "learning_rate": 0.0001930240667820015, + "loss": 2.7007, + "step": 2480 + }, + { + "epoch": 0.2002259704624324, + "grad_norm": 0.7581521272659302, + "learning_rate": 0.0001930182726207475, + "loss": 2.7508, + "step": 2481 + }, + { + "epoch": 0.2003066741990154, + "grad_norm": 0.8179795742034912, + "learning_rate": 0.00019301247614123495, + "loss": 2.7327, + "step": 2482 + }, + { + "epoch": 0.2003873779355984, + "grad_norm": 0.8103611469268799, + "learning_rate": 0.00019300667734360838, + "loss": 2.7869, + "step": 2483 + }, + { + "epoch": 0.20046808167218141, + "grad_norm": 0.7368054389953613, + "learning_rate": 0.0001930008762280123, + "loss": 2.73, + "step": 2484 + }, + { + "epoch": 0.20054878540876442, + "grad_norm": 0.7679662108421326, + "learning_rate": 0.00019299507279459127, + "loss": 2.7905, + "step": 2485 + }, + { + "epoch": 0.20062948914534742, + "grad_norm": 0.7783839702606201, + "learning_rate": 0.0001929892670434899, + "loss": 2.6816, + "step": 2486 + }, + { + "epoch": 0.20071019288193043, + "grad_norm": 0.7575809359550476, + "learning_rate": 0.00019298345897485298, + "loss": 2.7351, + "step": 2487 + }, + { + "epoch": 0.20079089661851343, + "grad_norm": 0.7674959301948547, + "learning_rate": 0.00019297764858882514, + "loss": 2.7682, + "step": 2488 + }, + { + "epoch": 0.20087160035509644, + "grad_norm": 0.7972592115402222, + "learning_rate": 0.00019297183588555127, + "loss": 2.782, + "step": 2489 + }, + { + "epoch": 0.20095230409167944, + "grad_norm": 0.8417105674743652, + "learning_rate": 0.00019296602086517624, + "loss": 2.8173, + "step": 2490 + }, + { + "epoch": 0.20103300782826244, + "grad_norm": 0.7194239497184753, + "learning_rate": 0.00019296020352784496, + "loss": 2.7735, + "step": 2491 + }, + { + "epoch": 0.20111371156484545, + "grad_norm": 0.801895022392273, + "learning_rate": 0.00019295438387370237, + "loss": 2.7018, + "step": 2492 + }, + { + "epoch": 0.20119441530142845, + "grad_norm": 0.900943398475647, + "learning_rate": 0.0001929485619028936, + "loss": 2.77, + "step": 2493 + }, + { + "epoch": 0.20127511903801146, + "grad_norm": 0.7882106304168701, + "learning_rate": 0.00019294273761556366, + "loss": 2.7195, + "step": 2494 + }, + { + "epoch": 0.20135582277459446, + "grad_norm": 0.7471950054168701, + "learning_rate": 0.00019293691101185775, + "loss": 2.7346, + "step": 2495 + }, + { + "epoch": 0.20143652651117747, + "grad_norm": 0.7498352527618408, + "learning_rate": 0.00019293108209192104, + "loss": 2.7255, + "step": 2496 + }, + { + "epoch": 0.20151723024776047, + "grad_norm": 0.8233164548873901, + "learning_rate": 0.0001929252508558989, + "loss": 2.8253, + "step": 2497 + }, + { + "epoch": 0.20159793398434347, + "grad_norm": 0.7533289790153503, + "learning_rate": 0.00019291941730393658, + "loss": 2.7487, + "step": 2498 + }, + { + "epoch": 0.20167863772092648, + "grad_norm": 0.7372691035270691, + "learning_rate": 0.0001929135814361795, + "loss": 2.6799, + "step": 2499 + }, + { + "epoch": 0.20175934145750948, + "grad_norm": 0.7760890126228333, + "learning_rate": 0.00019290774325277305, + "loss": 2.8366, + "step": 2500 + }, + { + "epoch": 0.2018400451940925, + "grad_norm": 0.7653746008872986, + "learning_rate": 0.0001929019027538628, + "loss": 2.7413, + "step": 2501 + }, + { + "epoch": 0.2019207489306755, + "grad_norm": 0.7364951372146606, + "learning_rate": 0.0001928960599395943, + "loss": 2.7405, + "step": 2502 + }, + { + "epoch": 0.2020014526672585, + "grad_norm": 0.8317872285842896, + "learning_rate": 0.00019289021481011314, + "loss": 2.7186, + "step": 2503 + }, + { + "epoch": 0.2020821564038415, + "grad_norm": 0.8325691223144531, + "learning_rate": 0.00019288436736556502, + "loss": 2.7305, + "step": 2504 + }, + { + "epoch": 0.2021628601404245, + "grad_norm": 0.7674683332443237, + "learning_rate": 0.00019287851760609566, + "loss": 2.7171, + "step": 2505 + }, + { + "epoch": 0.2022435638770075, + "grad_norm": 0.8043155074119568, + "learning_rate": 0.00019287266553185084, + "loss": 2.7425, + "step": 2506 + }, + { + "epoch": 0.2023242676135905, + "grad_norm": 0.8522058725357056, + "learning_rate": 0.00019286681114297642, + "loss": 2.7764, + "step": 2507 + }, + { + "epoch": 0.20240497135017352, + "grad_norm": 0.7700086236000061, + "learning_rate": 0.00019286095443961832, + "loss": 2.7499, + "step": 2508 + }, + { + "epoch": 0.20248567508675652, + "grad_norm": 0.8078013062477112, + "learning_rate": 0.0001928550954219225, + "loss": 2.7863, + "step": 2509 + }, + { + "epoch": 0.20256637882333953, + "grad_norm": 0.7431712746620178, + "learning_rate": 0.00019284923409003496, + "loss": 2.8296, + "step": 2510 + }, + { + "epoch": 0.20264708255992253, + "grad_norm": 0.753754734992981, + "learning_rate": 0.00019284337044410182, + "loss": 2.722, + "step": 2511 + }, + { + "epoch": 0.20272778629650554, + "grad_norm": 0.8117631077766418, + "learning_rate": 0.00019283750448426918, + "loss": 2.7718, + "step": 2512 + }, + { + "epoch": 0.20280849003308854, + "grad_norm": 0.9149020910263062, + "learning_rate": 0.00019283163621068325, + "loss": 2.7416, + "step": 2513 + }, + { + "epoch": 0.20288919376967154, + "grad_norm": 0.8240262866020203, + "learning_rate": 0.0001928257656234903, + "loss": 2.811, + "step": 2514 + }, + { + "epoch": 0.20296989750625455, + "grad_norm": 0.7394035458564758, + "learning_rate": 0.00019281989272283657, + "loss": 2.7345, + "step": 2515 + }, + { + "epoch": 0.20305060124283755, + "grad_norm": 0.7827345132827759, + "learning_rate": 0.00019281401750886854, + "loss": 2.7955, + "step": 2516 + }, + { + "epoch": 0.20313130497942056, + "grad_norm": 0.7482333183288574, + "learning_rate": 0.00019280813998173252, + "loss": 2.6963, + "step": 2517 + }, + { + "epoch": 0.20321200871600356, + "grad_norm": 0.8187180757522583, + "learning_rate": 0.00019280226014157509, + "loss": 2.7413, + "step": 2518 + }, + { + "epoch": 0.20329271245258657, + "grad_norm": 0.7708666920661926, + "learning_rate": 0.00019279637798854274, + "loss": 2.7636, + "step": 2519 + }, + { + "epoch": 0.20337341618916957, + "grad_norm": 0.7414180040359497, + "learning_rate": 0.00019279049352278208, + "loss": 2.7321, + "step": 2520 + }, + { + "epoch": 0.20345411992575257, + "grad_norm": 0.8172248601913452, + "learning_rate": 0.00019278460674443975, + "loss": 2.8026, + "step": 2521 + }, + { + "epoch": 0.20353482366233558, + "grad_norm": 0.7463089227676392, + "learning_rate": 0.0001927787176536625, + "loss": 2.74, + "step": 2522 + }, + { + "epoch": 0.20361552739891858, + "grad_norm": 0.7684210538864136, + "learning_rate": 0.00019277282625059704, + "loss": 2.782, + "step": 2523 + }, + { + "epoch": 0.2036962311355016, + "grad_norm": 0.9246797561645508, + "learning_rate": 0.00019276693253539027, + "loss": 2.8546, + "step": 2524 + }, + { + "epoch": 0.20377693487208456, + "grad_norm": 0.753753125667572, + "learning_rate": 0.00019276103650818906, + "loss": 2.7422, + "step": 2525 + }, + { + "epoch": 0.20385763860866757, + "grad_norm": 0.7461897134780884, + "learning_rate": 0.00019275513816914032, + "loss": 2.7575, + "step": 2526 + }, + { + "epoch": 0.20393834234525057, + "grad_norm": 0.7555257081985474, + "learning_rate": 0.00019274923751839106, + "loss": 2.7423, + "step": 2527 + }, + { + "epoch": 0.20401904608183358, + "grad_norm": 0.7628511786460876, + "learning_rate": 0.00019274333455608837, + "loss": 2.7386, + "step": 2528 + }, + { + "epoch": 0.20409974981841658, + "grad_norm": 0.7529371976852417, + "learning_rate": 0.00019273742928237937, + "loss": 2.6852, + "step": 2529 + }, + { + "epoch": 0.20418045355499959, + "grad_norm": 0.7466779351234436, + "learning_rate": 0.00019273152169741118, + "loss": 2.6996, + "step": 2530 + }, + { + "epoch": 0.2042611572915826, + "grad_norm": 0.7916153073310852, + "learning_rate": 0.0001927256118013311, + "loss": 2.7644, + "step": 2531 + }, + { + "epoch": 0.2043418610281656, + "grad_norm": 0.7662972211837769, + "learning_rate": 0.00019271969959428636, + "loss": 2.7497, + "step": 2532 + }, + { + "epoch": 0.2044225647647486, + "grad_norm": 0.8244680166244507, + "learning_rate": 0.00019271378507642432, + "loss": 2.7598, + "step": 2533 + }, + { + "epoch": 0.2045032685013316, + "grad_norm": 0.7721532583236694, + "learning_rate": 0.00019270786824789244, + "loss": 2.7303, + "step": 2534 + }, + { + "epoch": 0.2045839722379146, + "grad_norm": 0.7598209381103516, + "learning_rate": 0.0001927019491088381, + "loss": 2.734, + "step": 2535 + }, + { + "epoch": 0.2046646759744976, + "grad_norm": 0.7778685092926025, + "learning_rate": 0.00019269602765940887, + "loss": 2.7113, + "step": 2536 + }, + { + "epoch": 0.20474537971108062, + "grad_norm": 0.7447141408920288, + "learning_rate": 0.00019269010389975235, + "loss": 2.7205, + "step": 2537 + }, + { + "epoch": 0.20482608344766362, + "grad_norm": 0.8066664338111877, + "learning_rate": 0.00019268417783001613, + "loss": 2.7637, + "step": 2538 + }, + { + "epoch": 0.20490678718424662, + "grad_norm": 0.7055318355560303, + "learning_rate": 0.00019267824945034794, + "loss": 2.6936, + "step": 2539 + }, + { + "epoch": 0.20498749092082963, + "grad_norm": 0.832647979259491, + "learning_rate": 0.0001926723187608955, + "loss": 2.7423, + "step": 2540 + }, + { + "epoch": 0.20506819465741263, + "grad_norm": 0.7316983938217163, + "learning_rate": 0.0001926663857618066, + "loss": 2.7136, + "step": 2541 + }, + { + "epoch": 0.20514889839399564, + "grad_norm": 0.8115554451942444, + "learning_rate": 0.00019266045045322915, + "loss": 2.6964, + "step": 2542 + }, + { + "epoch": 0.20522960213057864, + "grad_norm": 0.802573025226593, + "learning_rate": 0.00019265451283531108, + "loss": 2.7989, + "step": 2543 + }, + { + "epoch": 0.20531030586716165, + "grad_norm": 0.7073348164558411, + "learning_rate": 0.00019264857290820033, + "loss": 2.7399, + "step": 2544 + }, + { + "epoch": 0.20539100960374465, + "grad_norm": 0.7749258279800415, + "learning_rate": 0.00019264263067204495, + "loss": 2.7321, + "step": 2545 + }, + { + "epoch": 0.20547171334032766, + "grad_norm": 0.7473557591438293, + "learning_rate": 0.00019263668612699305, + "loss": 2.7774, + "step": 2546 + }, + { + "epoch": 0.20555241707691066, + "grad_norm": 0.8073423504829407, + "learning_rate": 0.0001926307392731928, + "loss": 2.7429, + "step": 2547 + }, + { + "epoch": 0.20563312081349366, + "grad_norm": 0.9106586575508118, + "learning_rate": 0.00019262479011079235, + "loss": 2.7972, + "step": 2548 + }, + { + "epoch": 0.20571382455007667, + "grad_norm": 0.7975970506668091, + "learning_rate": 0.00019261883863994002, + "loss": 2.7561, + "step": 2549 + }, + { + "epoch": 0.20579452828665967, + "grad_norm": 0.8967030048370361, + "learning_rate": 0.00019261288486078414, + "loss": 2.7368, + "step": 2550 + }, + { + "epoch": 0.20587523202324268, + "grad_norm": 0.7157345414161682, + "learning_rate": 0.00019260692877347304, + "loss": 2.7329, + "step": 2551 + }, + { + "epoch": 0.20595593575982568, + "grad_norm": 0.8758620619773865, + "learning_rate": 0.00019260097037815524, + "loss": 2.7522, + "step": 2552 + }, + { + "epoch": 0.20603663949640869, + "grad_norm": 0.7948124408721924, + "learning_rate": 0.00019259500967497916, + "loss": 2.7675, + "step": 2553 + }, + { + "epoch": 0.2061173432329917, + "grad_norm": 0.8233941197395325, + "learning_rate": 0.00019258904666409344, + "loss": 2.7728, + "step": 2554 + }, + { + "epoch": 0.2061980469695747, + "grad_norm": 0.8084299564361572, + "learning_rate": 0.0001925830813456466, + "loss": 2.7728, + "step": 2555 + }, + { + "epoch": 0.2062787507061577, + "grad_norm": 0.8004557490348816, + "learning_rate": 0.00019257711371978737, + "loss": 2.7783, + "step": 2556 + }, + { + "epoch": 0.2063594544427407, + "grad_norm": 0.7999755144119263, + "learning_rate": 0.0001925711437866645, + "loss": 2.7632, + "step": 2557 + }, + { + "epoch": 0.2064401581793237, + "grad_norm": 0.7317264080047607, + "learning_rate": 0.0001925651715464267, + "loss": 2.7101, + "step": 2558 + }, + { + "epoch": 0.2065208619159067, + "grad_norm": 0.7906385660171509, + "learning_rate": 0.00019255919699922287, + "loss": 2.7258, + "step": 2559 + }, + { + "epoch": 0.20660156565248972, + "grad_norm": 0.7932917475700378, + "learning_rate": 0.0001925532201452019, + "loss": 2.7714, + "step": 2560 + }, + { + "epoch": 0.20668226938907272, + "grad_norm": 0.8039286732673645, + "learning_rate": 0.00019254724098451275, + "loss": 2.7469, + "step": 2561 + }, + { + "epoch": 0.20676297312565572, + "grad_norm": 0.79400634765625, + "learning_rate": 0.00019254125951730444, + "loss": 2.7499, + "step": 2562 + }, + { + "epoch": 0.20684367686223873, + "grad_norm": 0.8072263598442078, + "learning_rate": 0.00019253527574372603, + "loss": 2.7805, + "step": 2563 + }, + { + "epoch": 0.20692438059882173, + "grad_norm": 0.7117579579353333, + "learning_rate": 0.00019252928966392667, + "loss": 2.7321, + "step": 2564 + }, + { + "epoch": 0.20700508433540474, + "grad_norm": 0.7080324292182922, + "learning_rate": 0.00019252330127805554, + "loss": 2.7225, + "step": 2565 + }, + { + "epoch": 0.20708578807198774, + "grad_norm": 0.7276670336723328, + "learning_rate": 0.00019251731058626186, + "loss": 2.7592, + "step": 2566 + }, + { + "epoch": 0.20716649180857075, + "grad_norm": 0.8030811548233032, + "learning_rate": 0.00019251131758869495, + "loss": 2.7184, + "step": 2567 + }, + { + "epoch": 0.20724719554515375, + "grad_norm": 0.7808283567428589, + "learning_rate": 0.0001925053222855042, + "loss": 2.7504, + "step": 2568 + }, + { + "epoch": 0.20732789928173675, + "grad_norm": 0.783225953578949, + "learning_rate": 0.00019249932467683902, + "loss": 2.7125, + "step": 2569 + }, + { + "epoch": 0.20740860301831976, + "grad_norm": 0.7440134286880493, + "learning_rate": 0.00019249332476284887, + "loss": 2.7938, + "step": 2570 + }, + { + "epoch": 0.20748930675490276, + "grad_norm": 0.8729553818702698, + "learning_rate": 0.00019248732254368328, + "loss": 2.8338, + "step": 2571 + }, + { + "epoch": 0.20757001049148577, + "grad_norm": 0.8170497417449951, + "learning_rate": 0.0001924813180194918, + "loss": 2.7254, + "step": 2572 + }, + { + "epoch": 0.20765071422806877, + "grad_norm": 0.733220100402832, + "learning_rate": 0.00019247531119042418, + "loss": 2.6401, + "step": 2573 + }, + { + "epoch": 0.20773141796465178, + "grad_norm": 0.7247937917709351, + "learning_rate": 0.00019246930205663008, + "loss": 2.736, + "step": 2574 + }, + { + "epoch": 0.20781212170123478, + "grad_norm": 0.7880212068557739, + "learning_rate": 0.00019246329061825925, + "loss": 2.7173, + "step": 2575 + }, + { + "epoch": 0.20789282543781776, + "grad_norm": 0.820808470249176, + "learning_rate": 0.00019245727687546149, + "loss": 2.7331, + "step": 2576 + }, + { + "epoch": 0.20797352917440076, + "grad_norm": 0.8605412840843201, + "learning_rate": 0.00019245126082838673, + "loss": 2.761, + "step": 2577 + }, + { + "epoch": 0.20805423291098377, + "grad_norm": 0.763506293296814, + "learning_rate": 0.00019244524247718486, + "loss": 2.7053, + "step": 2578 + }, + { + "epoch": 0.20813493664756677, + "grad_norm": 0.8428114652633667, + "learning_rate": 0.00019243922182200592, + "loss": 2.724, + "step": 2579 + }, + { + "epoch": 0.20821564038414977, + "grad_norm": 0.821986734867096, + "learning_rate": 0.0001924331988629999, + "loss": 2.7615, + "step": 2580 + }, + { + "epoch": 0.20829634412073278, + "grad_norm": 0.8177430629730225, + "learning_rate": 0.00019242717360031693, + "loss": 2.7012, + "step": 2581 + }, + { + "epoch": 0.20837704785731578, + "grad_norm": 0.7584180235862732, + "learning_rate": 0.00019242114603410724, + "loss": 2.7372, + "step": 2582 + }, + { + "epoch": 0.2084577515938988, + "grad_norm": 0.9384645223617554, + "learning_rate": 0.00019241511616452096, + "loss": 2.695, + "step": 2583 + }, + { + "epoch": 0.2085384553304818, + "grad_norm": 0.8518964648246765, + "learning_rate": 0.00019240908399170844, + "loss": 2.8216, + "step": 2584 + }, + { + "epoch": 0.2086191590670648, + "grad_norm": 0.9082949161529541, + "learning_rate": 0.00019240304951581995, + "loss": 2.777, + "step": 2585 + }, + { + "epoch": 0.2086998628036478, + "grad_norm": 0.7906371355056763, + "learning_rate": 0.00019239701273700597, + "loss": 2.7083, + "step": 2586 + }, + { + "epoch": 0.2087805665402308, + "grad_norm": 0.7711954712867737, + "learning_rate": 0.00019239097365541686, + "loss": 2.6907, + "step": 2587 + }, + { + "epoch": 0.2088612702768138, + "grad_norm": 0.8155506253242493, + "learning_rate": 0.0001923849322712032, + "loss": 2.7602, + "step": 2588 + }, + { + "epoch": 0.20894197401339681, + "grad_norm": 0.8843441009521484, + "learning_rate": 0.0001923788885845155, + "loss": 2.7525, + "step": 2589 + }, + { + "epoch": 0.20902267774997982, + "grad_norm": 0.7336379289627075, + "learning_rate": 0.00019237284259550444, + "loss": 2.731, + "step": 2590 + }, + { + "epoch": 0.20910338148656282, + "grad_norm": 0.8261263370513916, + "learning_rate": 0.00019236679430432066, + "loss": 2.6493, + "step": 2591 + }, + { + "epoch": 0.20918408522314583, + "grad_norm": 0.7716216444969177, + "learning_rate": 0.00019236074371111497, + "loss": 2.7775, + "step": 2592 + }, + { + "epoch": 0.20926478895972883, + "grad_norm": 0.8390100598335266, + "learning_rate": 0.00019235469081603808, + "loss": 2.7532, + "step": 2593 + }, + { + "epoch": 0.20934549269631184, + "grad_norm": 0.8388446569442749, + "learning_rate": 0.00019234863561924087, + "loss": 2.8171, + "step": 2594 + }, + { + "epoch": 0.20942619643289484, + "grad_norm": 0.8003209829330444, + "learning_rate": 0.00019234257812087425, + "loss": 2.7385, + "step": 2595 + }, + { + "epoch": 0.20950690016947784, + "grad_norm": 0.8008458018302917, + "learning_rate": 0.00019233651832108918, + "loss": 2.7366, + "step": 2596 + }, + { + "epoch": 0.20958760390606085, + "grad_norm": 0.7701897025108337, + "learning_rate": 0.00019233045622003676, + "loss": 2.69, + "step": 2597 + }, + { + "epoch": 0.20966830764264385, + "grad_norm": 0.8106730580329895, + "learning_rate": 0.00019232439181786796, + "loss": 2.6911, + "step": 2598 + }, + { + "epoch": 0.20974901137922686, + "grad_norm": 0.9580766558647156, + "learning_rate": 0.00019231832511473401, + "loss": 2.7663, + "step": 2599 + }, + { + "epoch": 0.20982971511580986, + "grad_norm": 0.7851876616477966, + "learning_rate": 0.0001923122561107861, + "loss": 2.7632, + "step": 2600 + }, + { + "epoch": 0.20991041885239287, + "grad_norm": 0.8160942196846008, + "learning_rate": 0.0001923061848061754, + "loss": 2.8533, + "step": 2601 + }, + { + "epoch": 0.20999112258897587, + "grad_norm": 0.8540663719177246, + "learning_rate": 0.00019230011120105334, + "loss": 2.7083, + "step": 2602 + }, + { + "epoch": 0.21007182632555887, + "grad_norm": 0.8273833394050598, + "learning_rate": 0.0001922940352955712, + "loss": 2.7916, + "step": 2603 + }, + { + "epoch": 0.21015253006214188, + "grad_norm": 0.8394255638122559, + "learning_rate": 0.00019228795708988046, + "loss": 2.8561, + "step": 2604 + }, + { + "epoch": 0.21023323379872488, + "grad_norm": 0.8291410803794861, + "learning_rate": 0.00019228187658413258, + "loss": 2.7462, + "step": 2605 + }, + { + "epoch": 0.2103139375353079, + "grad_norm": 0.7984235286712646, + "learning_rate": 0.00019227579377847912, + "loss": 2.7459, + "step": 2606 + }, + { + "epoch": 0.2103946412718909, + "grad_norm": 0.8343340158462524, + "learning_rate": 0.00019226970867307163, + "loss": 2.6963, + "step": 2607 + }, + { + "epoch": 0.2104753450084739, + "grad_norm": 0.6982808709144592, + "learning_rate": 0.00019226362126806184, + "loss": 2.7333, + "step": 2608 + }, + { + "epoch": 0.2105560487450569, + "grad_norm": 0.8039572834968567, + "learning_rate": 0.0001922575315636014, + "loss": 2.7253, + "step": 2609 + }, + { + "epoch": 0.2106367524816399, + "grad_norm": 0.8708705902099609, + "learning_rate": 0.00019225143955984214, + "loss": 2.7555, + "step": 2610 + }, + { + "epoch": 0.2107174562182229, + "grad_norm": 0.8773347735404968, + "learning_rate": 0.00019224534525693585, + "loss": 2.7598, + "step": 2611 + }, + { + "epoch": 0.2107981599548059, + "grad_norm": 0.8151054978370667, + "learning_rate": 0.0001922392486550344, + "loss": 2.7398, + "step": 2612 + }, + { + "epoch": 0.21087886369138892, + "grad_norm": 0.7922329306602478, + "learning_rate": 0.0001922331497542898, + "loss": 2.7296, + "step": 2613 + }, + { + "epoch": 0.21095956742797192, + "grad_norm": 0.7536506652832031, + "learning_rate": 0.00019222704855485396, + "loss": 2.7897, + "step": 2614 + }, + { + "epoch": 0.21104027116455493, + "grad_norm": 0.7539274096488953, + "learning_rate": 0.000192220945056879, + "loss": 2.7809, + "step": 2615 + }, + { + "epoch": 0.21112097490113793, + "grad_norm": 0.7737646698951721, + "learning_rate": 0.00019221483926051705, + "loss": 2.7195, + "step": 2616 + }, + { + "epoch": 0.21120167863772094, + "grad_norm": 0.7421913743019104, + "learning_rate": 0.00019220873116592024, + "loss": 2.6817, + "step": 2617 + }, + { + "epoch": 0.21128238237430394, + "grad_norm": 0.7872927784919739, + "learning_rate": 0.0001922026207732408, + "loss": 2.7379, + "step": 2618 + }, + { + "epoch": 0.21136308611088694, + "grad_norm": 0.7950671315193176, + "learning_rate": 0.00019219650808263104, + "loss": 2.7135, + "step": 2619 + }, + { + "epoch": 0.21144378984746995, + "grad_norm": 0.7711792588233948, + "learning_rate": 0.0001921903930942433, + "loss": 2.7021, + "step": 2620 + }, + { + "epoch": 0.21152449358405295, + "grad_norm": 0.9030743837356567, + "learning_rate": 0.00019218427580822996, + "loss": 2.8083, + "step": 2621 + }, + { + "epoch": 0.21160519732063596, + "grad_norm": 0.8191907405853271, + "learning_rate": 0.0001921781562247435, + "loss": 2.6998, + "step": 2622 + }, + { + "epoch": 0.21168590105721896, + "grad_norm": 0.7883538603782654, + "learning_rate": 0.00019217203434393644, + "loss": 2.7573, + "step": 2623 + }, + { + "epoch": 0.21176660479380197, + "grad_norm": 0.7565868496894836, + "learning_rate": 0.00019216591016596134, + "loss": 2.7725, + "step": 2624 + }, + { + "epoch": 0.21184730853038497, + "grad_norm": 0.8579828143119812, + "learning_rate": 0.00019215978369097086, + "loss": 2.7529, + "step": 2625 + }, + { + "epoch": 0.21192801226696797, + "grad_norm": 0.7835422158241272, + "learning_rate": 0.0001921536549191176, + "loss": 2.6926, + "step": 2626 + }, + { + "epoch": 0.21200871600355095, + "grad_norm": 0.8041907548904419, + "learning_rate": 0.00019214752385055442, + "loss": 2.7541, + "step": 2627 + }, + { + "epoch": 0.21208941974013396, + "grad_norm": 0.7754014730453491, + "learning_rate": 0.00019214139048543406, + "loss": 2.6807, + "step": 2628 + }, + { + "epoch": 0.21217012347671696, + "grad_norm": 0.8222344517707825, + "learning_rate": 0.00019213525482390936, + "loss": 2.7339, + "step": 2629 + }, + { + "epoch": 0.21225082721329996, + "grad_norm": 0.8083673715591431, + "learning_rate": 0.0001921291168661333, + "loss": 2.739, + "step": 2630 + }, + { + "epoch": 0.21233153094988297, + "grad_norm": 0.8039100766181946, + "learning_rate": 0.0001921229766122588, + "loss": 2.7372, + "step": 2631 + }, + { + "epoch": 0.21241223468646597, + "grad_norm": 0.7513072490692139, + "learning_rate": 0.00019211683406243892, + "loss": 2.7284, + "step": 2632 + }, + { + "epoch": 0.21249293842304898, + "grad_norm": 0.7653890252113342, + "learning_rate": 0.00019211068921682673, + "loss": 2.6911, + "step": 2633 + }, + { + "epoch": 0.21257364215963198, + "grad_norm": 0.7210217714309692, + "learning_rate": 0.00019210454207557542, + "loss": 2.6989, + "step": 2634 + }, + { + "epoch": 0.21265434589621499, + "grad_norm": 0.7389202117919922, + "learning_rate": 0.00019209839263883814, + "loss": 2.7016, + "step": 2635 + }, + { + "epoch": 0.212735049632798, + "grad_norm": 0.8069031238555908, + "learning_rate": 0.00019209224090676813, + "loss": 2.8213, + "step": 2636 + }, + { + "epoch": 0.212815753369381, + "grad_norm": 0.8019161224365234, + "learning_rate": 0.00019208608687951877, + "loss": 2.7413, + "step": 2637 + }, + { + "epoch": 0.212896457105964, + "grad_norm": 0.775572657585144, + "learning_rate": 0.00019207993055724343, + "loss": 2.7016, + "step": 2638 + }, + { + "epoch": 0.212977160842547, + "grad_norm": 0.7482941746711731, + "learning_rate": 0.0001920737719400955, + "loss": 2.7991, + "step": 2639 + }, + { + "epoch": 0.21305786457913, + "grad_norm": 0.8467636704444885, + "learning_rate": 0.0001920676110282285, + "loss": 2.7401, + "step": 2640 + }, + { + "epoch": 0.213138568315713, + "grad_norm": 0.8726305365562439, + "learning_rate": 0.00019206144782179597, + "loss": 2.7599, + "step": 2641 + }, + { + "epoch": 0.21321927205229602, + "grad_norm": 0.740527868270874, + "learning_rate": 0.00019205528232095148, + "loss": 2.7326, + "step": 2642 + }, + { + "epoch": 0.21329997578887902, + "grad_norm": 0.7932354211807251, + "learning_rate": 0.00019204911452584873, + "loss": 2.7873, + "step": 2643 + }, + { + "epoch": 0.21338067952546202, + "grad_norm": 0.7994125485420227, + "learning_rate": 0.00019204294443664143, + "loss": 2.7305, + "step": 2644 + }, + { + "epoch": 0.21346138326204503, + "grad_norm": 0.880557656288147, + "learning_rate": 0.00019203677205348338, + "loss": 2.7295, + "step": 2645 + }, + { + "epoch": 0.21354208699862803, + "grad_norm": 0.8269557952880859, + "learning_rate": 0.00019203059737652836, + "loss": 2.765, + "step": 2646 + }, + { + "epoch": 0.21362279073521104, + "grad_norm": 0.8732784986495972, + "learning_rate": 0.00019202442040593026, + "loss": 2.6742, + "step": 2647 + }, + { + "epoch": 0.21370349447179404, + "grad_norm": 0.7921704649925232, + "learning_rate": 0.0001920182411418431, + "loss": 2.7144, + "step": 2648 + }, + { + "epoch": 0.21378419820837705, + "grad_norm": 0.8097628355026245, + "learning_rate": 0.00019201205958442082, + "loss": 2.7513, + "step": 2649 + }, + { + "epoch": 0.21386490194496005, + "grad_norm": 0.8230542540550232, + "learning_rate": 0.00019200587573381744, + "loss": 2.7648, + "step": 2650 + }, + { + "epoch": 0.21394560568154306, + "grad_norm": 0.7719153761863708, + "learning_rate": 0.0001919996895901872, + "loss": 2.7637, + "step": 2651 + }, + { + "epoch": 0.21402630941812606, + "grad_norm": 0.9022669792175293, + "learning_rate": 0.00019199350115368415, + "loss": 2.7707, + "step": 2652 + }, + { + "epoch": 0.21410701315470906, + "grad_norm": 0.8111257553100586, + "learning_rate": 0.00019198731042446263, + "loss": 2.7423, + "step": 2653 + }, + { + "epoch": 0.21418771689129207, + "grad_norm": 0.7534981966018677, + "learning_rate": 0.00019198111740267683, + "loss": 2.7474, + "step": 2654 + }, + { + "epoch": 0.21426842062787507, + "grad_norm": 0.761411190032959, + "learning_rate": 0.00019197492208848117, + "loss": 2.7541, + "step": 2655 + }, + { + "epoch": 0.21434912436445808, + "grad_norm": 0.8076324462890625, + "learning_rate": 0.00019196872448203002, + "loss": 2.7198, + "step": 2656 + }, + { + "epoch": 0.21442982810104108, + "grad_norm": 0.7987746000289917, + "learning_rate": 0.00019196252458347784, + "loss": 2.7164, + "step": 2657 + }, + { + "epoch": 0.21451053183762409, + "grad_norm": 0.7581545114517212, + "learning_rate": 0.0001919563223929792, + "loss": 2.6837, + "step": 2658 + }, + { + "epoch": 0.2145912355742071, + "grad_norm": 0.8773601055145264, + "learning_rate": 0.00019195011791068857, + "loss": 2.8248, + "step": 2659 + }, + { + "epoch": 0.2146719393107901, + "grad_norm": 0.7027503252029419, + "learning_rate": 0.00019194391113676066, + "loss": 2.6726, + "step": 2660 + }, + { + "epoch": 0.2147526430473731, + "grad_norm": 0.8650866746902466, + "learning_rate": 0.00019193770207135015, + "loss": 2.7348, + "step": 2661 + }, + { + "epoch": 0.2148333467839561, + "grad_norm": 0.8521862030029297, + "learning_rate": 0.0001919314907146118, + "loss": 2.7409, + "step": 2662 + }, + { + "epoch": 0.2149140505205391, + "grad_norm": 0.8098535537719727, + "learning_rate": 0.00019192527706670033, + "loss": 2.7615, + "step": 2663 + }, + { + "epoch": 0.2149947542571221, + "grad_norm": 0.7396193146705627, + "learning_rate": 0.0001919190611277707, + "loss": 2.7191, + "step": 2664 + }, + { + "epoch": 0.21507545799370512, + "grad_norm": 0.8245799541473389, + "learning_rate": 0.00019191284289797776, + "loss": 2.7429, + "step": 2665 + }, + { + "epoch": 0.21515616173028812, + "grad_norm": 0.791646420955658, + "learning_rate": 0.00019190662237747656, + "loss": 2.7197, + "step": 2666 + }, + { + "epoch": 0.21523686546687112, + "grad_norm": 0.7850802540779114, + "learning_rate": 0.00019190039956642205, + "loss": 2.7353, + "step": 2667 + }, + { + "epoch": 0.21531756920345413, + "grad_norm": 0.7657971978187561, + "learning_rate": 0.00019189417446496937, + "loss": 2.7083, + "step": 2668 + }, + { + "epoch": 0.21539827294003713, + "grad_norm": 0.7704403400421143, + "learning_rate": 0.00019188794707327363, + "loss": 2.7813, + "step": 2669 + }, + { + "epoch": 0.21547897667662014, + "grad_norm": 0.7345917224884033, + "learning_rate": 0.00019188171739149005, + "loss": 2.7098, + "step": 2670 + }, + { + "epoch": 0.21555968041320314, + "grad_norm": 0.728831946849823, + "learning_rate": 0.00019187548541977392, + "loss": 2.6745, + "step": 2671 + }, + { + "epoch": 0.21564038414978615, + "grad_norm": 0.8079627156257629, + "learning_rate": 0.0001918692511582805, + "loss": 2.6427, + "step": 2672 + }, + { + "epoch": 0.21572108788636915, + "grad_norm": 0.766808032989502, + "learning_rate": 0.0001918630146071652, + "loss": 2.6956, + "step": 2673 + }, + { + "epoch": 0.21580179162295215, + "grad_norm": 0.7555391192436218, + "learning_rate": 0.00019185677576658345, + "loss": 2.6499, + "step": 2674 + }, + { + "epoch": 0.21588249535953516, + "grad_norm": 0.7740229964256287, + "learning_rate": 0.00019185053463669074, + "loss": 2.7685, + "step": 2675 + }, + { + "epoch": 0.21596319909611816, + "grad_norm": 0.8272803425788879, + "learning_rate": 0.00019184429121764257, + "loss": 2.7272, + "step": 2676 + }, + { + "epoch": 0.21604390283270117, + "grad_norm": 0.870625376701355, + "learning_rate": 0.00019183804550959463, + "loss": 2.7509, + "step": 2677 + }, + { + "epoch": 0.21612460656928414, + "grad_norm": 0.8021238446235657, + "learning_rate": 0.0001918317975127025, + "loss": 2.7058, + "step": 2678 + }, + { + "epoch": 0.21620531030586715, + "grad_norm": 0.729918897151947, + "learning_rate": 0.00019182554722712192, + "loss": 2.6145, + "step": 2679 + }, + { + "epoch": 0.21628601404245015, + "grad_norm": 0.7658380270004272, + "learning_rate": 0.00019181929465300867, + "loss": 2.712, + "step": 2680 + }, + { + "epoch": 0.21636671777903316, + "grad_norm": 0.7702174186706543, + "learning_rate": 0.00019181303979051858, + "loss": 2.8257, + "step": 2681 + }, + { + "epoch": 0.21644742151561616, + "grad_norm": 0.7782231569290161, + "learning_rate": 0.00019180678263980755, + "loss": 2.8226, + "step": 2682 + }, + { + "epoch": 0.21652812525219917, + "grad_norm": 0.7448495626449585, + "learning_rate": 0.0001918005232010315, + "loss": 2.7877, + "step": 2683 + }, + { + "epoch": 0.21660882898878217, + "grad_norm": 0.7273527979850769, + "learning_rate": 0.00019179426147434647, + "loss": 2.7169, + "step": 2684 + }, + { + "epoch": 0.21668953272536517, + "grad_norm": 0.7730992436408997, + "learning_rate": 0.00019178799745990846, + "loss": 2.717, + "step": 2685 + }, + { + "epoch": 0.21677023646194818, + "grad_norm": 0.7709231376647949, + "learning_rate": 0.0001917817311578736, + "loss": 2.7676, + "step": 2686 + }, + { + "epoch": 0.21685094019853118, + "grad_norm": 0.7825181484222412, + "learning_rate": 0.00019177546256839812, + "loss": 2.7473, + "step": 2687 + }, + { + "epoch": 0.2169316439351142, + "grad_norm": 0.8133581280708313, + "learning_rate": 0.0001917691916916382, + "loss": 2.7242, + "step": 2688 + }, + { + "epoch": 0.2170123476716972, + "grad_norm": 0.7833015322685242, + "learning_rate": 0.00019176291852775011, + "loss": 2.8128, + "step": 2689 + }, + { + "epoch": 0.2170930514082802, + "grad_norm": 0.7423487305641174, + "learning_rate": 0.00019175664307689028, + "loss": 2.6999, + "step": 2690 + }, + { + "epoch": 0.2171737551448632, + "grad_norm": 0.7881289124488831, + "learning_rate": 0.000191750365339215, + "loss": 2.7349, + "step": 2691 + }, + { + "epoch": 0.2172544588814462, + "grad_norm": 0.8316197395324707, + "learning_rate": 0.00019174408531488077, + "loss": 2.7654, + "step": 2692 + }, + { + "epoch": 0.2173351626180292, + "grad_norm": 0.7589917778968811, + "learning_rate": 0.00019173780300404413, + "loss": 2.6815, + "step": 2693 + }, + { + "epoch": 0.21741586635461221, + "grad_norm": 0.7752439975738525, + "learning_rate": 0.00019173151840686163, + "loss": 2.7804, + "step": 2694 + }, + { + "epoch": 0.21749657009119522, + "grad_norm": 0.8156552910804749, + "learning_rate": 0.0001917252315234899, + "loss": 2.7325, + "step": 2695 + }, + { + "epoch": 0.21757727382777822, + "grad_norm": 0.8886982798576355, + "learning_rate": 0.00019171894235408564, + "loss": 2.7257, + "step": 2696 + }, + { + "epoch": 0.21765797756436123, + "grad_norm": 0.8270704746246338, + "learning_rate": 0.00019171265089880558, + "loss": 2.7357, + "step": 2697 + }, + { + "epoch": 0.21773868130094423, + "grad_norm": 0.807700514793396, + "learning_rate": 0.00019170635715780651, + "loss": 2.7488, + "step": 2698 + }, + { + "epoch": 0.21781938503752724, + "grad_norm": 0.8195288181304932, + "learning_rate": 0.00019170006113124533, + "loss": 2.7048, + "step": 2699 + }, + { + "epoch": 0.21790008877411024, + "grad_norm": 0.817097008228302, + "learning_rate": 0.00019169376281927888, + "loss": 2.7148, + "step": 2700 + }, + { + "epoch": 0.21798079251069324, + "grad_norm": 0.8415588140487671, + "learning_rate": 0.0001916874622220642, + "loss": 2.7376, + "step": 2701 + }, + { + "epoch": 0.21806149624727625, + "grad_norm": 0.8004198670387268, + "learning_rate": 0.00019168115933975826, + "loss": 2.7145, + "step": 2702 + }, + { + "epoch": 0.21814219998385925, + "grad_norm": 0.8167368769645691, + "learning_rate": 0.0001916748541725182, + "loss": 2.6923, + "step": 2703 + }, + { + "epoch": 0.21822290372044226, + "grad_norm": 0.8877980709075928, + "learning_rate": 0.0001916685467205011, + "loss": 2.8232, + "step": 2704 + }, + { + "epoch": 0.21830360745702526, + "grad_norm": 0.7835622429847717, + "learning_rate": 0.00019166223698386422, + "loss": 2.7797, + "step": 2705 + }, + { + "epoch": 0.21838431119360827, + "grad_norm": 0.8023552894592285, + "learning_rate": 0.00019165592496276477, + "loss": 2.6697, + "step": 2706 + }, + { + "epoch": 0.21846501493019127, + "grad_norm": 0.8549069166183472, + "learning_rate": 0.00019164961065736008, + "loss": 2.729, + "step": 2707 + }, + { + "epoch": 0.21854571866677427, + "grad_norm": 0.8561950325965881, + "learning_rate": 0.00019164329406780753, + "loss": 2.772, + "step": 2708 + }, + { + "epoch": 0.21862642240335728, + "grad_norm": 0.6979276537895203, + "learning_rate": 0.00019163697519426453, + "loss": 2.7195, + "step": 2709 + }, + { + "epoch": 0.21870712613994028, + "grad_norm": 0.7659175395965576, + "learning_rate": 0.00019163065403688856, + "loss": 2.7742, + "step": 2710 + }, + { + "epoch": 0.2187878298765233, + "grad_norm": 0.8621466755867004, + "learning_rate": 0.00019162433059583718, + "loss": 2.721, + "step": 2711 + }, + { + "epoch": 0.2188685336131063, + "grad_norm": 0.8086833357810974, + "learning_rate": 0.00019161800487126795, + "loss": 2.7356, + "step": 2712 + }, + { + "epoch": 0.2189492373496893, + "grad_norm": 0.816215455532074, + "learning_rate": 0.00019161167686333855, + "loss": 2.7159, + "step": 2713 + }, + { + "epoch": 0.2190299410862723, + "grad_norm": 0.9180822968482971, + "learning_rate": 0.0001916053465722067, + "loss": 2.7162, + "step": 2714 + }, + { + "epoch": 0.2191106448228553, + "grad_norm": 0.7547199130058289, + "learning_rate": 0.00019159901399803014, + "loss": 2.7338, + "step": 2715 + }, + { + "epoch": 0.2191913485594383, + "grad_norm": 0.7380769848823547, + "learning_rate": 0.00019159267914096675, + "loss": 2.7149, + "step": 2716 + }, + { + "epoch": 0.2192720522960213, + "grad_norm": 0.7242285013198853, + "learning_rate": 0.00019158634200117433, + "loss": 2.724, + "step": 2717 + }, + { + "epoch": 0.21935275603260432, + "grad_norm": 0.8400316834449768, + "learning_rate": 0.00019158000257881087, + "loss": 2.7528, + "step": 2718 + }, + { + "epoch": 0.21943345976918732, + "grad_norm": 0.8437172770500183, + "learning_rate": 0.00019157366087403435, + "loss": 2.7872, + "step": 2719 + }, + { + "epoch": 0.21951416350577033, + "grad_norm": 0.7428301572799683, + "learning_rate": 0.00019156731688700282, + "loss": 2.6831, + "step": 2720 + }, + { + "epoch": 0.21959486724235333, + "grad_norm": 0.7589641213417053, + "learning_rate": 0.00019156097061787445, + "loss": 2.7105, + "step": 2721 + }, + { + "epoch": 0.21967557097893634, + "grad_norm": 0.7607305645942688, + "learning_rate": 0.00019155462206680727, + "loss": 2.7913, + "step": 2722 + }, + { + "epoch": 0.21975627471551934, + "grad_norm": 0.7455689311027527, + "learning_rate": 0.00019154827123395963, + "loss": 2.6321, + "step": 2723 + }, + { + "epoch": 0.21983697845210234, + "grad_norm": 0.7860318422317505, + "learning_rate": 0.00019154191811948974, + "loss": 2.7907, + "step": 2724 + }, + { + "epoch": 0.21991768218868535, + "grad_norm": 0.8101385235786438, + "learning_rate": 0.00019153556272355596, + "loss": 2.7682, + "step": 2725 + }, + { + "epoch": 0.21999838592526835, + "grad_norm": 0.7437283396720886, + "learning_rate": 0.00019152920504631667, + "loss": 2.7271, + "step": 2726 + }, + { + "epoch": 0.22007908966185136, + "grad_norm": 0.7390851974487305, + "learning_rate": 0.00019152284508793034, + "loss": 2.7492, + "step": 2727 + }, + { + "epoch": 0.22015979339843436, + "grad_norm": 0.9074966311454773, + "learning_rate": 0.0001915164828485555, + "loss": 2.8076, + "step": 2728 + }, + { + "epoch": 0.22024049713501734, + "grad_norm": 0.7644218802452087, + "learning_rate": 0.00019151011832835063, + "loss": 2.7238, + "step": 2729 + }, + { + "epoch": 0.22032120087160034, + "grad_norm": 0.823567807674408, + "learning_rate": 0.0001915037515274744, + "loss": 2.7701, + "step": 2730 + }, + { + "epoch": 0.22040190460818335, + "grad_norm": 0.7601858377456665, + "learning_rate": 0.00019149738244608552, + "loss": 2.6981, + "step": 2731 + }, + { + "epoch": 0.22048260834476635, + "grad_norm": 0.8242961764335632, + "learning_rate": 0.00019149101108434269, + "loss": 2.6916, + "step": 2732 + }, + { + "epoch": 0.22056331208134936, + "grad_norm": 0.7970656156539917, + "learning_rate": 0.0001914846374424047, + "loss": 2.7858, + "step": 2733 + }, + { + "epoch": 0.22064401581793236, + "grad_norm": 0.7844050526618958, + "learning_rate": 0.0001914782615204304, + "loss": 2.6782, + "step": 2734 + }, + { + "epoch": 0.22072471955451536, + "grad_norm": 0.7965044975280762, + "learning_rate": 0.00019147188331857868, + "loss": 2.7563, + "step": 2735 + }, + { + "epoch": 0.22080542329109837, + "grad_norm": 0.8189071416854858, + "learning_rate": 0.00019146550283700856, + "loss": 2.7587, + "step": 2736 + }, + { + "epoch": 0.22088612702768137, + "grad_norm": 0.7610960602760315, + "learning_rate": 0.00019145912007587898, + "loss": 2.663, + "step": 2737 + }, + { + "epoch": 0.22096683076426438, + "grad_norm": 0.7642313838005066, + "learning_rate": 0.00019145273503534907, + "loss": 2.78, + "step": 2738 + }, + { + "epoch": 0.22104753450084738, + "grad_norm": 0.7699539065361023, + "learning_rate": 0.0001914463477155779, + "loss": 2.7429, + "step": 2739 + }, + { + "epoch": 0.22112823823743039, + "grad_norm": 0.7674413919448853, + "learning_rate": 0.00019143995811672477, + "loss": 2.7048, + "step": 2740 + }, + { + "epoch": 0.2212089419740134, + "grad_norm": 0.7871866226196289, + "learning_rate": 0.00019143356623894882, + "loss": 2.7769, + "step": 2741 + }, + { + "epoch": 0.2212896457105964, + "grad_norm": 0.8453468680381775, + "learning_rate": 0.00019142717208240937, + "loss": 2.7677, + "step": 2742 + }, + { + "epoch": 0.2213703494471794, + "grad_norm": 0.8050780892372131, + "learning_rate": 0.00019142077564726582, + "loss": 2.7809, + "step": 2743 + }, + { + "epoch": 0.2214510531837624, + "grad_norm": 0.811287522315979, + "learning_rate": 0.0001914143769336776, + "loss": 2.7201, + "step": 2744 + }, + { + "epoch": 0.2215317569203454, + "grad_norm": 0.823106050491333, + "learning_rate": 0.00019140797594180412, + "loss": 2.7371, + "step": 2745 + }, + { + "epoch": 0.2216124606569284, + "grad_norm": 0.778126060962677, + "learning_rate": 0.0001914015726718049, + "loss": 2.6925, + "step": 2746 + }, + { + "epoch": 0.22169316439351142, + "grad_norm": 0.8240278959274292, + "learning_rate": 0.0001913951671238396, + "loss": 2.7227, + "step": 2747 + }, + { + "epoch": 0.22177386813009442, + "grad_norm": 0.8061805963516235, + "learning_rate": 0.0001913887592980678, + "loss": 2.7092, + "step": 2748 + }, + { + "epoch": 0.22185457186667742, + "grad_norm": 0.9111800789833069, + "learning_rate": 0.00019138234919464925, + "loss": 2.7364, + "step": 2749 + }, + { + "epoch": 0.22193527560326043, + "grad_norm": 0.8154863715171814, + "learning_rate": 0.0001913759368137437, + "loss": 2.6983, + "step": 2750 + }, + { + "epoch": 0.22201597933984343, + "grad_norm": 0.8547734022140503, + "learning_rate": 0.0001913695221555109, + "loss": 2.7016, + "step": 2751 + }, + { + "epoch": 0.22209668307642644, + "grad_norm": 0.7488531470298767, + "learning_rate": 0.00019136310522011079, + "loss": 2.6641, + "step": 2752 + }, + { + "epoch": 0.22217738681300944, + "grad_norm": 0.9118027091026306, + "learning_rate": 0.00019135668600770326, + "loss": 2.6965, + "step": 2753 + }, + { + "epoch": 0.22225809054959245, + "grad_norm": 0.7629117369651794, + "learning_rate": 0.00019135026451844834, + "loss": 2.7836, + "step": 2754 + }, + { + "epoch": 0.22233879428617545, + "grad_norm": 0.8081222176551819, + "learning_rate": 0.000191343840752506, + "loss": 2.7339, + "step": 2755 + }, + { + "epoch": 0.22241949802275846, + "grad_norm": 0.9143899083137512, + "learning_rate": 0.00019133741471003636, + "loss": 2.7051, + "step": 2756 + }, + { + "epoch": 0.22250020175934146, + "grad_norm": 0.8096790909767151, + "learning_rate": 0.00019133098639119962, + "loss": 2.6884, + "step": 2757 + }, + { + "epoch": 0.22258090549592446, + "grad_norm": 0.7959297895431519, + "learning_rate": 0.00019132455579615597, + "loss": 2.7127, + "step": 2758 + }, + { + "epoch": 0.22266160923250747, + "grad_norm": 0.7111356854438782, + "learning_rate": 0.00019131812292506563, + "loss": 2.7418, + "step": 2759 + }, + { + "epoch": 0.22274231296909047, + "grad_norm": 0.7584012150764465, + "learning_rate": 0.00019131168777808898, + "loss": 2.6705, + "step": 2760 + }, + { + "epoch": 0.22282301670567348, + "grad_norm": 0.7646663784980774, + "learning_rate": 0.0001913052503553864, + "loss": 2.7166, + "step": 2761 + }, + { + "epoch": 0.22290372044225648, + "grad_norm": 0.7643954157829285, + "learning_rate": 0.00019129881065711827, + "loss": 2.7967, + "step": 2762 + }, + { + "epoch": 0.22298442417883949, + "grad_norm": 0.7591429948806763, + "learning_rate": 0.0001912923686834451, + "loss": 2.6611, + "step": 2763 + }, + { + "epoch": 0.2230651279154225, + "grad_norm": 0.7182386517524719, + "learning_rate": 0.00019128592443452749, + "loss": 2.6808, + "step": 2764 + }, + { + "epoch": 0.2231458316520055, + "grad_norm": 0.7689648270606995, + "learning_rate": 0.00019127947791052602, + "loss": 2.7288, + "step": 2765 + }, + { + "epoch": 0.2232265353885885, + "grad_norm": 0.7851321697235107, + "learning_rate": 0.00019127302911160136, + "loss": 2.7227, + "step": 2766 + }, + { + "epoch": 0.2233072391251715, + "grad_norm": 0.8419411182403564, + "learning_rate": 0.00019126657803791424, + "loss": 2.7397, + "step": 2767 + }, + { + "epoch": 0.2233879428617545, + "grad_norm": 0.7657596468925476, + "learning_rate": 0.0001912601246896254, + "loss": 2.7223, + "step": 2768 + }, + { + "epoch": 0.2234686465983375, + "grad_norm": 0.8033619523048401, + "learning_rate": 0.00019125366906689567, + "loss": 2.7256, + "step": 2769 + }, + { + "epoch": 0.22354935033492052, + "grad_norm": 0.7784682512283325, + "learning_rate": 0.00019124721116988601, + "loss": 2.7692, + "step": 2770 + }, + { + "epoch": 0.22363005407150352, + "grad_norm": 0.7842707633972168, + "learning_rate": 0.00019124075099875731, + "loss": 2.7707, + "step": 2771 + }, + { + "epoch": 0.22371075780808652, + "grad_norm": 0.7864845395088196, + "learning_rate": 0.0001912342885536706, + "loss": 2.6912, + "step": 2772 + }, + { + "epoch": 0.22379146154466953, + "grad_norm": 0.8544312715530396, + "learning_rate": 0.0001912278238347869, + "loss": 2.8345, + "step": 2773 + }, + { + "epoch": 0.22387216528125253, + "grad_norm": 0.7210882306098938, + "learning_rate": 0.0001912213568422674, + "loss": 2.6933, + "step": 2774 + }, + { + "epoch": 0.22395286901783554, + "grad_norm": 0.8877022862434387, + "learning_rate": 0.00019121488757627318, + "loss": 2.7583, + "step": 2775 + }, + { + "epoch": 0.22403357275441854, + "grad_norm": 0.902886688709259, + "learning_rate": 0.00019120841603696554, + "loss": 2.8, + "step": 2776 + }, + { + "epoch": 0.22411427649100155, + "grad_norm": 0.771294355392456, + "learning_rate": 0.0001912019422245058, + "loss": 2.7712, + "step": 2777 + }, + { + "epoch": 0.22419498022758455, + "grad_norm": 0.7973463535308838, + "learning_rate": 0.0001911954661390552, + "loss": 2.7368, + "step": 2778 + }, + { + "epoch": 0.22427568396416755, + "grad_norm": 0.776836633682251, + "learning_rate": 0.00019118898778077524, + "loss": 2.7126, + "step": 2779 + }, + { + "epoch": 0.22435638770075053, + "grad_norm": 0.8286641240119934, + "learning_rate": 0.00019118250714982731, + "loss": 2.7148, + "step": 2780 + }, + { + "epoch": 0.22443709143733354, + "grad_norm": 0.7848700284957886, + "learning_rate": 0.00019117602424637294, + "loss": 2.7284, + "step": 2781 + }, + { + "epoch": 0.22451779517391654, + "grad_norm": 0.7658216953277588, + "learning_rate": 0.0001911695390705737, + "loss": 2.7186, + "step": 2782 + }, + { + "epoch": 0.22459849891049954, + "grad_norm": 0.7596792578697205, + "learning_rate": 0.00019116305162259124, + "loss": 2.6854, + "step": 2783 + }, + { + "epoch": 0.22467920264708255, + "grad_norm": 0.7901157140731812, + "learning_rate": 0.00019115656190258726, + "loss": 2.7347, + "step": 2784 + }, + { + "epoch": 0.22475990638366555, + "grad_norm": 0.7499287128448486, + "learning_rate": 0.00019115006991072346, + "loss": 2.7219, + "step": 2785 + }, + { + "epoch": 0.22484061012024856, + "grad_norm": 0.7427374124526978, + "learning_rate": 0.00019114357564716162, + "loss": 2.7147, + "step": 2786 + }, + { + "epoch": 0.22492131385683156, + "grad_norm": 0.8305855393409729, + "learning_rate": 0.00019113707911206363, + "loss": 2.7587, + "step": 2787 + }, + { + "epoch": 0.22500201759341457, + "grad_norm": 0.8266459703445435, + "learning_rate": 0.00019113058030559142, + "loss": 2.7275, + "step": 2788 + }, + { + "epoch": 0.22508272132999757, + "grad_norm": 0.7338323593139648, + "learning_rate": 0.0001911240792279069, + "loss": 2.762, + "step": 2789 + }, + { + "epoch": 0.22516342506658057, + "grad_norm": 0.7653434872627258, + "learning_rate": 0.00019111757587917216, + "loss": 2.6715, + "step": 2790 + }, + { + "epoch": 0.22524412880316358, + "grad_norm": 0.76301509141922, + "learning_rate": 0.00019111107025954923, + "loss": 2.698, + "step": 2791 + }, + { + "epoch": 0.22532483253974658, + "grad_norm": 0.7810547947883606, + "learning_rate": 0.00019110456236920024, + "loss": 2.7295, + "step": 2792 + }, + { + "epoch": 0.2254055362763296, + "grad_norm": 0.7885214686393738, + "learning_rate": 0.00019109805220828742, + "loss": 2.7724, + "step": 2793 + }, + { + "epoch": 0.2254862400129126, + "grad_norm": 0.8087031841278076, + "learning_rate": 0.00019109153977697301, + "loss": 2.7888, + "step": 2794 + }, + { + "epoch": 0.2255669437494956, + "grad_norm": 0.795101523399353, + "learning_rate": 0.00019108502507541933, + "loss": 2.6815, + "step": 2795 + }, + { + "epoch": 0.2256476474860786, + "grad_norm": 0.8337482213973999, + "learning_rate": 0.0001910785081037887, + "loss": 2.8192, + "step": 2796 + }, + { + "epoch": 0.2257283512226616, + "grad_norm": 0.8357288241386414, + "learning_rate": 0.00019107198886224357, + "loss": 2.7867, + "step": 2797 + }, + { + "epoch": 0.2258090549592446, + "grad_norm": 0.80678391456604, + "learning_rate": 0.00019106546735094644, + "loss": 2.7313, + "step": 2798 + }, + { + "epoch": 0.2258897586958276, + "grad_norm": 0.7481401562690735, + "learning_rate": 0.00019105894357005979, + "loss": 2.7073, + "step": 2799 + }, + { + "epoch": 0.22597046243241062, + "grad_norm": 0.8025074005126953, + "learning_rate": 0.00019105241751974622, + "loss": 2.6922, + "step": 2800 + }, + { + "epoch": 0.22605116616899362, + "grad_norm": 0.7308986186981201, + "learning_rate": 0.00019104588920016842, + "loss": 2.7511, + "step": 2801 + }, + { + "epoch": 0.22613186990557663, + "grad_norm": 0.7727689146995544, + "learning_rate": 0.00019103935861148905, + "loss": 2.707, + "step": 2802 + }, + { + "epoch": 0.22621257364215963, + "grad_norm": 0.8611076474189758, + "learning_rate": 0.0001910328257538709, + "loss": 2.8494, + "step": 2803 + }, + { + "epoch": 0.22629327737874264, + "grad_norm": 0.8487605452537537, + "learning_rate": 0.00019102629062747677, + "loss": 2.7698, + "step": 2804 + }, + { + "epoch": 0.22637398111532564, + "grad_norm": 0.7495502233505249, + "learning_rate": 0.00019101975323246952, + "loss": 2.7091, + "step": 2805 + }, + { + "epoch": 0.22645468485190864, + "grad_norm": 0.7334234118461609, + "learning_rate": 0.0001910132135690121, + "loss": 2.7375, + "step": 2806 + }, + { + "epoch": 0.22653538858849165, + "grad_norm": 0.879912257194519, + "learning_rate": 0.00019100667163726747, + "loss": 2.7278, + "step": 2807 + }, + { + "epoch": 0.22661609232507465, + "grad_norm": 0.8087306618690491, + "learning_rate": 0.0001910001274373987, + "loss": 2.8065, + "step": 2808 + }, + { + "epoch": 0.22669679606165766, + "grad_norm": 0.7548169493675232, + "learning_rate": 0.00019099358096956887, + "loss": 2.7235, + "step": 2809 + }, + { + "epoch": 0.22677749979824066, + "grad_norm": 0.7505785822868347, + "learning_rate": 0.00019098703223394118, + "loss": 2.6633, + "step": 2810 + }, + { + "epoch": 0.22685820353482367, + "grad_norm": 0.829075813293457, + "learning_rate": 0.00019098048123067875, + "loss": 2.7389, + "step": 2811 + }, + { + "epoch": 0.22693890727140667, + "grad_norm": 0.7731673121452332, + "learning_rate": 0.00019097392795994493, + "loss": 2.7639, + "step": 2812 + }, + { + "epoch": 0.22701961100798967, + "grad_norm": 0.7389004826545715, + "learning_rate": 0.00019096737242190303, + "loss": 2.717, + "step": 2813 + }, + { + "epoch": 0.22710031474457268, + "grad_norm": 0.7520460486412048, + "learning_rate": 0.0001909608146167164, + "loss": 2.7203, + "step": 2814 + }, + { + "epoch": 0.22718101848115568, + "grad_norm": 0.7272354364395142, + "learning_rate": 0.00019095425454454849, + "loss": 2.7306, + "step": 2815 + }, + { + "epoch": 0.2272617222177387, + "grad_norm": 0.7593528032302856, + "learning_rate": 0.00019094769220556282, + "loss": 2.7565, + "step": 2816 + }, + { + "epoch": 0.2273424259543217, + "grad_norm": 0.7312695384025574, + "learning_rate": 0.0001909411275999229, + "loss": 2.744, + "step": 2817 + }, + { + "epoch": 0.2274231296909047, + "grad_norm": 0.7483308911323547, + "learning_rate": 0.00019093456072779238, + "loss": 2.7938, + "step": 2818 + }, + { + "epoch": 0.2275038334274877, + "grad_norm": 0.8515620231628418, + "learning_rate": 0.00019092799158933486, + "loss": 2.7392, + "step": 2819 + }, + { + "epoch": 0.2275845371640707, + "grad_norm": 0.7119776606559753, + "learning_rate": 0.00019092142018471415, + "loss": 2.6985, + "step": 2820 + }, + { + "epoch": 0.2276652409006537, + "grad_norm": 0.7549445033073425, + "learning_rate": 0.00019091484651409394, + "loss": 2.7621, + "step": 2821 + }, + { + "epoch": 0.2277459446372367, + "grad_norm": 0.8728097081184387, + "learning_rate": 0.00019090827057763814, + "loss": 2.8321, + "step": 2822 + }, + { + "epoch": 0.22782664837381972, + "grad_norm": 0.755043089389801, + "learning_rate": 0.00019090169237551057, + "loss": 2.7341, + "step": 2823 + }, + { + "epoch": 0.22790735211040272, + "grad_norm": 0.7949401140213013, + "learning_rate": 0.00019089511190787523, + "loss": 2.7646, + "step": 2824 + }, + { + "epoch": 0.22798805584698573, + "grad_norm": 0.8027622103691101, + "learning_rate": 0.00019088852917489607, + "loss": 2.7606, + "step": 2825 + }, + { + "epoch": 0.22806875958356873, + "grad_norm": 0.8609418869018555, + "learning_rate": 0.0001908819441767372, + "loss": 2.7433, + "step": 2826 + }, + { + "epoch": 0.22814946332015174, + "grad_norm": 0.8021805882453918, + "learning_rate": 0.00019087535691356271, + "loss": 2.7723, + "step": 2827 + }, + { + "epoch": 0.22823016705673474, + "grad_norm": 0.8104252219200134, + "learning_rate": 0.00019086876738553675, + "loss": 2.7229, + "step": 2828 + }, + { + "epoch": 0.22831087079331774, + "grad_norm": 0.8714433908462524, + "learning_rate": 0.00019086217559282362, + "loss": 2.75, + "step": 2829 + }, + { + "epoch": 0.22839157452990075, + "grad_norm": 0.7598714828491211, + "learning_rate": 0.0001908555815355875, + "loss": 2.6979, + "step": 2830 + }, + { + "epoch": 0.22847227826648372, + "grad_norm": 0.859708309173584, + "learning_rate": 0.00019084898521399283, + "loss": 2.7863, + "step": 2831 + }, + { + "epoch": 0.22855298200306673, + "grad_norm": 0.7798011302947998, + "learning_rate": 0.00019084238662820397, + "loss": 2.7623, + "step": 2832 + }, + { + "epoch": 0.22863368573964973, + "grad_norm": 0.7869576811790466, + "learning_rate": 0.00019083578577838535, + "loss": 2.7341, + "step": 2833 + }, + { + "epoch": 0.22871438947623274, + "grad_norm": 0.7486738562583923, + "learning_rate": 0.0001908291826647015, + "loss": 2.7615, + "step": 2834 + }, + { + "epoch": 0.22879509321281574, + "grad_norm": 0.8270190954208374, + "learning_rate": 0.00019082257728731704, + "loss": 2.7515, + "step": 2835 + }, + { + "epoch": 0.22887579694939875, + "grad_norm": 0.9060254693031311, + "learning_rate": 0.00019081596964639648, + "loss": 2.874, + "step": 2836 + }, + { + "epoch": 0.22895650068598175, + "grad_norm": 0.7802320122718811, + "learning_rate": 0.00019080935974210458, + "loss": 2.7224, + "step": 2837 + }, + { + "epoch": 0.22903720442256476, + "grad_norm": 0.9513018131256104, + "learning_rate": 0.00019080274757460607, + "loss": 2.7168, + "step": 2838 + }, + { + "epoch": 0.22911790815914776, + "grad_norm": 0.7139711976051331, + "learning_rate": 0.0001907961331440657, + "loss": 2.676, + "step": 2839 + }, + { + "epoch": 0.22919861189573076, + "grad_norm": 0.8635632395744324, + "learning_rate": 0.00019078951645064838, + "loss": 2.6979, + "step": 2840 + }, + { + "epoch": 0.22927931563231377, + "grad_norm": 0.8823218941688538, + "learning_rate": 0.000190782897494519, + "loss": 2.7345, + "step": 2841 + }, + { + "epoch": 0.22936001936889677, + "grad_norm": 0.8139359354972839, + "learning_rate": 0.00019077627627584246, + "loss": 2.6988, + "step": 2842 + }, + { + "epoch": 0.22944072310547978, + "grad_norm": 0.8935994505882263, + "learning_rate": 0.00019076965279478383, + "loss": 2.7706, + "step": 2843 + }, + { + "epoch": 0.22952142684206278, + "grad_norm": 0.8362705111503601, + "learning_rate": 0.00019076302705150816, + "loss": 2.7593, + "step": 2844 + }, + { + "epoch": 0.22960213057864579, + "grad_norm": 0.7534157633781433, + "learning_rate": 0.00019075639904618066, + "loss": 2.7501, + "step": 2845 + }, + { + "epoch": 0.2296828343152288, + "grad_norm": 0.8826640248298645, + "learning_rate": 0.00019074976877896642, + "loss": 2.7758, + "step": 2846 + }, + { + "epoch": 0.2297635380518118, + "grad_norm": 0.8395571112632751, + "learning_rate": 0.0001907431362500307, + "loss": 2.7625, + "step": 2847 + }, + { + "epoch": 0.2298442417883948, + "grad_norm": 0.7927684783935547, + "learning_rate": 0.00019073650145953885, + "loss": 2.7392, + "step": 2848 + }, + { + "epoch": 0.2299249455249778, + "grad_norm": 0.823208749294281, + "learning_rate": 0.00019072986440765618, + "loss": 2.7259, + "step": 2849 + }, + { + "epoch": 0.2300056492615608, + "grad_norm": 0.889416515827179, + "learning_rate": 0.00019072322509454815, + "loss": 2.7539, + "step": 2850 + }, + { + "epoch": 0.2300863529981438, + "grad_norm": 0.7957748770713806, + "learning_rate": 0.0001907165835203802, + "loss": 2.7756, + "step": 2851 + }, + { + "epoch": 0.23016705673472682, + "grad_norm": 0.7924029231071472, + "learning_rate": 0.00019070993968531782, + "loss": 2.7439, + "step": 2852 + }, + { + "epoch": 0.23024776047130982, + "grad_norm": 0.7811052799224854, + "learning_rate": 0.0001907032935895266, + "loss": 2.7479, + "step": 2853 + }, + { + "epoch": 0.23032846420789282, + "grad_norm": 0.7973877191543579, + "learning_rate": 0.00019069664523317225, + "loss": 2.7502, + "step": 2854 + }, + { + "epoch": 0.23040916794447583, + "grad_norm": 0.7524267435073853, + "learning_rate": 0.0001906899946164204, + "loss": 2.75, + "step": 2855 + }, + { + "epoch": 0.23048987168105883, + "grad_norm": 0.7594791054725647, + "learning_rate": 0.00019068334173943683, + "loss": 2.6534, + "step": 2856 + }, + { + "epoch": 0.23057057541764184, + "grad_norm": 0.7253785729408264, + "learning_rate": 0.00019067668660238733, + "loss": 2.7246, + "step": 2857 + }, + { + "epoch": 0.23065127915422484, + "grad_norm": 0.788737416267395, + "learning_rate": 0.00019067002920543775, + "loss": 2.757, + "step": 2858 + }, + { + "epoch": 0.23073198289080785, + "grad_norm": 0.7577618956565857, + "learning_rate": 0.00019066336954875403, + "loss": 2.674, + "step": 2859 + }, + { + "epoch": 0.23081268662739085, + "grad_norm": 0.7682929635047913, + "learning_rate": 0.0001906567076325022, + "loss": 2.8193, + "step": 2860 + }, + { + "epoch": 0.23089339036397385, + "grad_norm": 0.7742112874984741, + "learning_rate": 0.00019065004345684817, + "loss": 2.6969, + "step": 2861 + }, + { + "epoch": 0.23097409410055686, + "grad_norm": 0.7981678247451782, + "learning_rate": 0.00019064337702195814, + "loss": 2.7681, + "step": 2862 + }, + { + "epoch": 0.23105479783713986, + "grad_norm": 0.7608500123023987, + "learning_rate": 0.00019063670832799817, + "loss": 2.7459, + "step": 2863 + }, + { + "epoch": 0.23113550157372287, + "grad_norm": 0.7563463449478149, + "learning_rate": 0.00019063003737513455, + "loss": 2.7678, + "step": 2864 + }, + { + "epoch": 0.23121620531030587, + "grad_norm": 0.7915034890174866, + "learning_rate": 0.00019062336416353343, + "loss": 2.7577, + "step": 2865 + }, + { + "epoch": 0.23129690904688888, + "grad_norm": 0.7229592204093933, + "learning_rate": 0.00019061668869336122, + "loss": 2.7308, + "step": 2866 + }, + { + "epoch": 0.23137761278347188, + "grad_norm": 0.7910905480384827, + "learning_rate": 0.00019061001096478425, + "loss": 2.7571, + "step": 2867 + }, + { + "epoch": 0.23145831652005489, + "grad_norm": 0.8474656939506531, + "learning_rate": 0.00019060333097796895, + "loss": 2.7011, + "step": 2868 + }, + { + "epoch": 0.2315390202566379, + "grad_norm": 0.8005419373512268, + "learning_rate": 0.00019059664873308178, + "loss": 2.7441, + "step": 2869 + }, + { + "epoch": 0.2316197239932209, + "grad_norm": 0.7728021740913391, + "learning_rate": 0.00019058996423028935, + "loss": 2.7753, + "step": 2870 + }, + { + "epoch": 0.2317004277298039, + "grad_norm": 0.7338094115257263, + "learning_rate": 0.00019058327746975816, + "loss": 2.7009, + "step": 2871 + }, + { + "epoch": 0.2317811314663869, + "grad_norm": 0.7746245265007019, + "learning_rate": 0.00019057658845165494, + "loss": 2.6938, + "step": 2872 + }, + { + "epoch": 0.2318618352029699, + "grad_norm": 0.7474356293678284, + "learning_rate": 0.00019056989717614636, + "loss": 2.7161, + "step": 2873 + }, + { + "epoch": 0.2319425389395529, + "grad_norm": 0.9540585279464722, + "learning_rate": 0.00019056320364339917, + "loss": 2.7753, + "step": 2874 + }, + { + "epoch": 0.23202324267613592, + "grad_norm": 0.799726665019989, + "learning_rate": 0.00019055650785358024, + "loss": 2.7301, + "step": 2875 + }, + { + "epoch": 0.23210394641271892, + "grad_norm": 0.8087828159332275, + "learning_rate": 0.0001905498098068564, + "loss": 2.7305, + "step": 2876 + }, + { + "epoch": 0.23218465014930192, + "grad_norm": 0.8177600502967834, + "learning_rate": 0.00019054310950339457, + "loss": 2.7462, + "step": 2877 + }, + { + "epoch": 0.23226535388588493, + "grad_norm": 0.7106238603591919, + "learning_rate": 0.00019053640694336181, + "loss": 2.7183, + "step": 2878 + }, + { + "epoch": 0.23234605762246793, + "grad_norm": 0.884185791015625, + "learning_rate": 0.00019052970212692514, + "loss": 2.7549, + "step": 2879 + }, + { + "epoch": 0.23242676135905094, + "grad_norm": 0.7532132267951965, + "learning_rate": 0.00019052299505425163, + "loss": 2.7524, + "step": 2880 + }, + { + "epoch": 0.23250746509563394, + "grad_norm": 0.7295021414756775, + "learning_rate": 0.00019051628572550842, + "loss": 2.6928, + "step": 2881 + }, + { + "epoch": 0.23258816883221692, + "grad_norm": 0.8475896716117859, + "learning_rate": 0.00019050957414086278, + "loss": 2.7138, + "step": 2882 + }, + { + "epoch": 0.23266887256879992, + "grad_norm": 0.7219378352165222, + "learning_rate": 0.00019050286030048198, + "loss": 2.7034, + "step": 2883 + }, + { + "epoch": 0.23274957630538293, + "grad_norm": 0.8410176634788513, + "learning_rate": 0.0001904961442045333, + "loss": 2.7413, + "step": 2884 + }, + { + "epoch": 0.23283028004196593, + "grad_norm": 0.7792301177978516, + "learning_rate": 0.00019048942585318414, + "loss": 2.6771, + "step": 2885 + }, + { + "epoch": 0.23291098377854894, + "grad_norm": 0.7457073926925659, + "learning_rate": 0.00019048270524660196, + "loss": 2.7325, + "step": 2886 + }, + { + "epoch": 0.23299168751513194, + "grad_norm": 0.8258858323097229, + "learning_rate": 0.00019047598238495424, + "loss": 2.7434, + "step": 2887 + }, + { + "epoch": 0.23307239125171494, + "grad_norm": 0.8188657164573669, + "learning_rate": 0.00019046925726840853, + "loss": 2.732, + "step": 2888 + }, + { + "epoch": 0.23315309498829795, + "grad_norm": 0.8084142208099365, + "learning_rate": 0.00019046252989713246, + "loss": 2.7537, + "step": 2889 + }, + { + "epoch": 0.23323379872488095, + "grad_norm": 0.75553297996521, + "learning_rate": 0.00019045580027129364, + "loss": 2.6685, + "step": 2890 + }, + { + "epoch": 0.23331450246146396, + "grad_norm": 0.8145995736122131, + "learning_rate": 0.00019044906839105986, + "loss": 2.7654, + "step": 2891 + }, + { + "epoch": 0.23339520619804696, + "grad_norm": 0.8433949947357178, + "learning_rate": 0.0001904423342565988, + "loss": 2.7713, + "step": 2892 + }, + { + "epoch": 0.23347590993462997, + "grad_norm": 0.7826054096221924, + "learning_rate": 0.0001904355978680784, + "loss": 2.7108, + "step": 2893 + }, + { + "epoch": 0.23355661367121297, + "grad_norm": 0.7281686663627625, + "learning_rate": 0.0001904288592256665, + "loss": 2.7606, + "step": 2894 + }, + { + "epoch": 0.23363731740779597, + "grad_norm": 0.8282813429832458, + "learning_rate": 0.00019042211832953103, + "loss": 2.6662, + "step": 2895 + }, + { + "epoch": 0.23371802114437898, + "grad_norm": 0.8227263689041138, + "learning_rate": 0.00019041537517984, + "loss": 2.7493, + "step": 2896 + }, + { + "epoch": 0.23379872488096198, + "grad_norm": 0.839350700378418, + "learning_rate": 0.0001904086297767615, + "loss": 2.7258, + "step": 2897 + }, + { + "epoch": 0.233879428617545, + "grad_norm": 0.713231086730957, + "learning_rate": 0.00019040188212046357, + "loss": 2.6722, + "step": 2898 + }, + { + "epoch": 0.233960132354128, + "grad_norm": 0.8314552903175354, + "learning_rate": 0.00019039513221111447, + "loss": 2.8509, + "step": 2899 + }, + { + "epoch": 0.234040836090711, + "grad_norm": 0.8885688781738281, + "learning_rate": 0.0001903883800488824, + "loss": 2.7608, + "step": 2900 + }, + { + "epoch": 0.234121539827294, + "grad_norm": 0.755308210849762, + "learning_rate": 0.00019038162563393555, + "loss": 2.7065, + "step": 2901 + }, + { + "epoch": 0.234202243563877, + "grad_norm": 0.7436641454696655, + "learning_rate": 0.00019037486896644236, + "loss": 2.6865, + "step": 2902 + }, + { + "epoch": 0.23428294730046, + "grad_norm": 0.7861987948417664, + "learning_rate": 0.0001903681100465712, + "loss": 2.7238, + "step": 2903 + }, + { + "epoch": 0.234363651037043, + "grad_norm": 0.7481045126914978, + "learning_rate": 0.0001903613488744905, + "loss": 2.7038, + "step": 2904 + }, + { + "epoch": 0.23444435477362602, + "grad_norm": 0.790765106678009, + "learning_rate": 0.0001903545854503688, + "loss": 2.6865, + "step": 2905 + }, + { + "epoch": 0.23452505851020902, + "grad_norm": 0.8594793677330017, + "learning_rate": 0.0001903478197743746, + "loss": 2.7324, + "step": 2906 + }, + { + "epoch": 0.23460576224679203, + "grad_norm": 0.7504310011863708, + "learning_rate": 0.00019034105184667662, + "loss": 2.6535, + "step": 2907 + }, + { + "epoch": 0.23468646598337503, + "grad_norm": 0.7824578881263733, + "learning_rate": 0.00019033428166744342, + "loss": 2.7113, + "step": 2908 + }, + { + "epoch": 0.23476716971995804, + "grad_norm": 0.7766899466514587, + "learning_rate": 0.0001903275092368438, + "loss": 2.6907, + "step": 2909 + }, + { + "epoch": 0.23484787345654104, + "grad_norm": 0.8082600235939026, + "learning_rate": 0.00019032073455504657, + "loss": 2.6781, + "step": 2910 + }, + { + "epoch": 0.23492857719312404, + "grad_norm": 0.7790517210960388, + "learning_rate": 0.0001903139576222205, + "loss": 2.7277, + "step": 2911 + }, + { + "epoch": 0.23500928092970705, + "grad_norm": 0.7449578046798706, + "learning_rate": 0.00019030717843853453, + "loss": 2.7078, + "step": 2912 + }, + { + "epoch": 0.23508998466629005, + "grad_norm": 0.7931632399559021, + "learning_rate": 0.0001903003970041576, + "loss": 2.7165, + "step": 2913 + }, + { + "epoch": 0.23517068840287306, + "grad_norm": 0.7970653176307678, + "learning_rate": 0.00019029361331925873, + "loss": 2.7993, + "step": 2914 + }, + { + "epoch": 0.23525139213945606, + "grad_norm": 0.8497335314750671, + "learning_rate": 0.00019028682738400697, + "loss": 2.7564, + "step": 2915 + }, + { + "epoch": 0.23533209587603907, + "grad_norm": 0.7840128540992737, + "learning_rate": 0.0001902800391985715, + "loss": 2.7546, + "step": 2916 + }, + { + "epoch": 0.23541279961262207, + "grad_norm": 0.8237372636795044, + "learning_rate": 0.00019027324876312146, + "loss": 2.7507, + "step": 2917 + }, + { + "epoch": 0.23549350334920507, + "grad_norm": 0.8445321917533875, + "learning_rate": 0.00019026645607782603, + "loss": 2.7287, + "step": 2918 + }, + { + "epoch": 0.23557420708578808, + "grad_norm": 0.8380417227745056, + "learning_rate": 0.0001902596611428546, + "loss": 2.7778, + "step": 2919 + }, + { + "epoch": 0.23565491082237108, + "grad_norm": 0.7989064455032349, + "learning_rate": 0.00019025286395837646, + "loss": 2.7254, + "step": 2920 + }, + { + "epoch": 0.2357356145589541, + "grad_norm": 0.8223496079444885, + "learning_rate": 0.00019024606452456102, + "loss": 2.7028, + "step": 2921 + }, + { + "epoch": 0.2358163182955371, + "grad_norm": 0.8090229630470276, + "learning_rate": 0.00019023926284157775, + "loss": 2.6911, + "step": 2922 + }, + { + "epoch": 0.2358970220321201, + "grad_norm": 0.7556560635566711, + "learning_rate": 0.00019023245890959615, + "loss": 2.7183, + "step": 2923 + }, + { + "epoch": 0.2359777257687031, + "grad_norm": 0.7907983660697937, + "learning_rate": 0.00019022565272878582, + "loss": 2.6805, + "step": 2924 + }, + { + "epoch": 0.2360584295052861, + "grad_norm": 0.9404142498970032, + "learning_rate": 0.0001902188442993164, + "loss": 2.8081, + "step": 2925 + }, + { + "epoch": 0.2361391332418691, + "grad_norm": 0.8349069952964783, + "learning_rate": 0.0001902120336213575, + "loss": 2.8329, + "step": 2926 + }, + { + "epoch": 0.2362198369784521, + "grad_norm": 0.8557522892951965, + "learning_rate": 0.00019020522069507892, + "loss": 2.704, + "step": 2927 + }, + { + "epoch": 0.23630054071503512, + "grad_norm": 0.7557278275489807, + "learning_rate": 0.00019019840552065044, + "loss": 2.7071, + "step": 2928 + }, + { + "epoch": 0.23638124445161812, + "grad_norm": 0.8810723423957825, + "learning_rate": 0.00019019158809824193, + "loss": 2.7535, + "step": 2929 + }, + { + "epoch": 0.23646194818820113, + "grad_norm": 0.7845562100410461, + "learning_rate": 0.00019018476842802326, + "loss": 2.7254, + "step": 2930 + }, + { + "epoch": 0.23654265192478413, + "grad_norm": 0.7566044926643372, + "learning_rate": 0.00019017794651016444, + "loss": 2.7295, + "step": 2931 + }, + { + "epoch": 0.23662335566136714, + "grad_norm": 0.8083382248878479, + "learning_rate": 0.00019017112234483545, + "loss": 2.7305, + "step": 2932 + }, + { + "epoch": 0.2367040593979501, + "grad_norm": 0.7924187183380127, + "learning_rate": 0.00019016429593220638, + "loss": 2.7659, + "step": 2933 + }, + { + "epoch": 0.23678476313453312, + "grad_norm": 0.8400307297706604, + "learning_rate": 0.00019015746727244737, + "loss": 2.7293, + "step": 2934 + }, + { + "epoch": 0.23686546687111612, + "grad_norm": 0.6931199431419373, + "learning_rate": 0.0001901506363657286, + "loss": 2.7189, + "step": 2935 + }, + { + "epoch": 0.23694617060769912, + "grad_norm": 0.8263585567474365, + "learning_rate": 0.0001901438032122203, + "loss": 2.7368, + "step": 2936 + }, + { + "epoch": 0.23702687434428213, + "grad_norm": 0.8001893162727356, + "learning_rate": 0.0001901369678120928, + "loss": 2.7793, + "step": 2937 + }, + { + "epoch": 0.23710757808086513, + "grad_norm": 0.7724235653877258, + "learning_rate": 0.00019013013016551644, + "loss": 2.717, + "step": 2938 + }, + { + "epoch": 0.23718828181744814, + "grad_norm": 0.7617147564888, + "learning_rate": 0.00019012329027266164, + "loss": 2.7275, + "step": 2939 + }, + { + "epoch": 0.23726898555403114, + "grad_norm": 0.80738765001297, + "learning_rate": 0.00019011644813369884, + "loss": 2.7444, + "step": 2940 + }, + { + "epoch": 0.23734968929061415, + "grad_norm": 0.7885528802871704, + "learning_rate": 0.00019010960374879861, + "loss": 2.7377, + "step": 2941 + }, + { + "epoch": 0.23743039302719715, + "grad_norm": 0.720268964767456, + "learning_rate": 0.00019010275711813147, + "loss": 2.6897, + "step": 2942 + }, + { + "epoch": 0.23751109676378016, + "grad_norm": 0.7532111406326294, + "learning_rate": 0.00019009590824186815, + "loss": 2.8117, + "step": 2943 + }, + { + "epoch": 0.23759180050036316, + "grad_norm": 0.780777633190155, + "learning_rate": 0.00019008905712017925, + "loss": 2.7565, + "step": 2944 + }, + { + "epoch": 0.23767250423694616, + "grad_norm": 0.8721919059753418, + "learning_rate": 0.00019008220375323553, + "loss": 2.801, + "step": 2945 + }, + { + "epoch": 0.23775320797352917, + "grad_norm": 0.8258914947509766, + "learning_rate": 0.00019007534814120786, + "loss": 2.7696, + "step": 2946 + }, + { + "epoch": 0.23783391171011217, + "grad_norm": 0.7292730808258057, + "learning_rate": 0.00019006849028426704, + "loss": 2.7512, + "step": 2947 + }, + { + "epoch": 0.23791461544669518, + "grad_norm": 0.7789164185523987, + "learning_rate": 0.00019006163018258398, + "loss": 2.7489, + "step": 2948 + }, + { + "epoch": 0.23799531918327818, + "grad_norm": 0.8049725294113159, + "learning_rate": 0.00019005476783632967, + "loss": 2.672, + "step": 2949 + }, + { + "epoch": 0.23807602291986119, + "grad_norm": 0.7440119981765747, + "learning_rate": 0.00019004790324567519, + "loss": 2.7208, + "step": 2950 + }, + { + "epoch": 0.2381567266564442, + "grad_norm": 0.7695925235748291, + "learning_rate": 0.00019004103641079154, + "loss": 2.7816, + "step": 2951 + }, + { + "epoch": 0.2382374303930272, + "grad_norm": 0.7623234391212463, + "learning_rate": 0.00019003416733184988, + "loss": 2.7034, + "step": 2952 + }, + { + "epoch": 0.2383181341296102, + "grad_norm": 0.8136502504348755, + "learning_rate": 0.00019002729600902141, + "loss": 2.7638, + "step": 2953 + }, + { + "epoch": 0.2383988378661932, + "grad_norm": 0.7813066840171814, + "learning_rate": 0.00019002042244247743, + "loss": 2.7606, + "step": 2954 + }, + { + "epoch": 0.2384795416027762, + "grad_norm": 0.7863059043884277, + "learning_rate": 0.0001900135466323892, + "loss": 2.7219, + "step": 2955 + }, + { + "epoch": 0.2385602453393592, + "grad_norm": 0.8712359070777893, + "learning_rate": 0.00019000666857892806, + "loss": 2.7485, + "step": 2956 + }, + { + "epoch": 0.23864094907594222, + "grad_norm": 0.8130611777305603, + "learning_rate": 0.00018999978828226547, + "loss": 2.7195, + "step": 2957 + }, + { + "epoch": 0.23872165281252522, + "grad_norm": 0.759503960609436, + "learning_rate": 0.00018999290574257292, + "loss": 2.6856, + "step": 2958 + }, + { + "epoch": 0.23880235654910822, + "grad_norm": 0.7490882277488708, + "learning_rate": 0.0001899860209600219, + "loss": 2.7587, + "step": 2959 + }, + { + "epoch": 0.23888306028569123, + "grad_norm": 0.8111297488212585, + "learning_rate": 0.000189979133934784, + "loss": 2.7688, + "step": 2960 + }, + { + "epoch": 0.23896376402227423, + "grad_norm": 0.844894289970398, + "learning_rate": 0.0001899722446670309, + "loss": 2.7706, + "step": 2961 + }, + { + "epoch": 0.23904446775885724, + "grad_norm": 0.7875459790229797, + "learning_rate": 0.00018996535315693423, + "loss": 2.7535, + "step": 2962 + }, + { + "epoch": 0.23912517149544024, + "grad_norm": 0.7768518328666687, + "learning_rate": 0.0001899584594046658, + "loss": 2.7268, + "step": 2963 + }, + { + "epoch": 0.23920587523202325, + "grad_norm": 0.8645716309547424, + "learning_rate": 0.00018995156341039744, + "loss": 2.7856, + "step": 2964 + }, + { + "epoch": 0.23928657896860625, + "grad_norm": 0.7816600799560547, + "learning_rate": 0.00018994466517430097, + "loss": 2.757, + "step": 2965 + }, + { + "epoch": 0.23936728270518925, + "grad_norm": 0.7967644333839417, + "learning_rate": 0.00018993776469654832, + "loss": 2.7021, + "step": 2966 + }, + { + "epoch": 0.23944798644177226, + "grad_norm": 0.800589919090271, + "learning_rate": 0.00018993086197731146, + "loss": 2.6838, + "step": 2967 + }, + { + "epoch": 0.23952869017835526, + "grad_norm": 0.7658529281616211, + "learning_rate": 0.00018992395701676246, + "loss": 2.6992, + "step": 2968 + }, + { + "epoch": 0.23960939391493827, + "grad_norm": 0.848456621170044, + "learning_rate": 0.00018991704981507338, + "loss": 2.7249, + "step": 2969 + }, + { + "epoch": 0.23969009765152127, + "grad_norm": 0.7365427017211914, + "learning_rate": 0.00018991014037241638, + "loss": 2.7044, + "step": 2970 + }, + { + "epoch": 0.23977080138810428, + "grad_norm": 0.8026351928710938, + "learning_rate": 0.00018990322868896365, + "loss": 2.7409, + "step": 2971 + }, + { + "epoch": 0.23985150512468728, + "grad_norm": 0.788646936416626, + "learning_rate": 0.00018989631476488744, + "loss": 2.7331, + "step": 2972 + }, + { + "epoch": 0.23993220886127029, + "grad_norm": 0.8388644456863403, + "learning_rate": 0.00018988939860036007, + "loss": 2.7478, + "step": 2973 + }, + { + "epoch": 0.2400129125978533, + "grad_norm": 0.7479026913642883, + "learning_rate": 0.00018988248019555394, + "loss": 2.7248, + "step": 2974 + }, + { + "epoch": 0.2400936163344363, + "grad_norm": 0.7313364744186401, + "learning_rate": 0.00018987555955064144, + "loss": 2.7323, + "step": 2975 + }, + { + "epoch": 0.2401743200710193, + "grad_norm": 0.7858260273933411, + "learning_rate": 0.00018986863666579505, + "loss": 2.6845, + "step": 2976 + }, + { + "epoch": 0.2402550238076023, + "grad_norm": 0.8090949654579163, + "learning_rate": 0.00018986171154118732, + "loss": 2.8094, + "step": 2977 + }, + { + "epoch": 0.2403357275441853, + "grad_norm": 0.7917135953903198, + "learning_rate": 0.00018985478417699085, + "loss": 2.7106, + "step": 2978 + }, + { + "epoch": 0.2404164312807683, + "grad_norm": 0.8192126154899597, + "learning_rate": 0.00018984785457337825, + "loss": 2.7729, + "step": 2979 + }, + { + "epoch": 0.24049713501735132, + "grad_norm": 0.797922670841217, + "learning_rate": 0.00018984092273052226, + "loss": 2.7747, + "step": 2980 + }, + { + "epoch": 0.24057783875393432, + "grad_norm": 0.9050948023796082, + "learning_rate": 0.00018983398864859564, + "loss": 2.7453, + "step": 2981 + }, + { + "epoch": 0.24065854249051732, + "grad_norm": 0.7827617526054382, + "learning_rate": 0.0001898270523277712, + "loss": 2.7371, + "step": 2982 + }, + { + "epoch": 0.24073924622710033, + "grad_norm": 0.7530156373977661, + "learning_rate": 0.0001898201137682218, + "loss": 2.7397, + "step": 2983 + }, + { + "epoch": 0.2408199499636833, + "grad_norm": 0.7989545464515686, + "learning_rate": 0.00018981317297012034, + "loss": 2.7532, + "step": 2984 + }, + { + "epoch": 0.2409006537002663, + "grad_norm": 0.7501168847084045, + "learning_rate": 0.00018980622993363988, + "loss": 2.7395, + "step": 2985 + }, + { + "epoch": 0.2409813574368493, + "grad_norm": 0.8073468208312988, + "learning_rate": 0.0001897992846589534, + "loss": 2.7673, + "step": 2986 + }, + { + "epoch": 0.24106206117343232, + "grad_norm": 0.9155512452125549, + "learning_rate": 0.00018979233714623401, + "loss": 2.6608, + "step": 2987 + }, + { + "epoch": 0.24114276491001532, + "grad_norm": 0.7461311221122742, + "learning_rate": 0.00018978538739565485, + "loss": 2.7657, + "step": 2988 + }, + { + "epoch": 0.24122346864659833, + "grad_norm": 0.8011443018913269, + "learning_rate": 0.00018977843540738914, + "loss": 2.7363, + "step": 2989 + }, + { + "epoch": 0.24130417238318133, + "grad_norm": 0.7602998614311218, + "learning_rate": 0.0001897714811816101, + "loss": 2.7285, + "step": 2990 + }, + { + "epoch": 0.24138487611976434, + "grad_norm": 0.8283531069755554, + "learning_rate": 0.00018976452471849116, + "loss": 2.7614, + "step": 2991 + }, + { + "epoch": 0.24146557985634734, + "grad_norm": 0.7358889579772949, + "learning_rate": 0.00018975756601820556, + "loss": 2.7429, + "step": 2992 + }, + { + "epoch": 0.24154628359293034, + "grad_norm": 0.7749240398406982, + "learning_rate": 0.0001897506050809268, + "loss": 2.6884, + "step": 2993 + }, + { + "epoch": 0.24162698732951335, + "grad_norm": 0.7529963254928589, + "learning_rate": 0.00018974364190682837, + "loss": 2.7619, + "step": 2994 + }, + { + "epoch": 0.24170769106609635, + "grad_norm": 0.7946054935455322, + "learning_rate": 0.00018973667649608376, + "loss": 2.7403, + "step": 2995 + }, + { + "epoch": 0.24178839480267936, + "grad_norm": 0.735870897769928, + "learning_rate": 0.0001897297088488666, + "loss": 2.7158, + "step": 2996 + }, + { + "epoch": 0.24186909853926236, + "grad_norm": 0.8409188985824585, + "learning_rate": 0.00018972273896535055, + "loss": 2.768, + "step": 2997 + }, + { + "epoch": 0.24194980227584537, + "grad_norm": 0.8351938724517822, + "learning_rate": 0.0001897157668457093, + "loss": 2.7548, + "step": 2998 + }, + { + "epoch": 0.24203050601242837, + "grad_norm": 0.8339046239852905, + "learning_rate": 0.00018970879249011663, + "loss": 2.7842, + "step": 2999 + }, + { + "epoch": 0.24211120974901137, + "grad_norm": 0.8092730641365051, + "learning_rate": 0.00018970181589874637, + "loss": 2.7141, + "step": 3000 + }, + { + "epoch": 0.24211120974901137, + "eval_loss": 2.643277406692505, + "eval_runtime": 784.7512, + "eval_samples_per_second": 3.339, + "eval_steps_per_second": 0.557, + "step": 3000 + }, + { + "epoch": 0.24219191348559438, + "grad_norm": 0.8014447093009949, + "learning_rate": 0.00018969483707177235, + "loss": 2.7341, + "step": 3001 + }, + { + "epoch": 0.24227261722217738, + "grad_norm": 0.744153618812561, + "learning_rate": 0.00018968785600936855, + "loss": 2.678, + "step": 3002 + }, + { + "epoch": 0.2423533209587604, + "grad_norm": 0.7264240384101868, + "learning_rate": 0.0001896808727117089, + "loss": 2.7321, + "step": 3003 + }, + { + "epoch": 0.2424340246953434, + "grad_norm": 0.8214067220687866, + "learning_rate": 0.00018967388717896748, + "loss": 2.7311, + "step": 3004 + }, + { + "epoch": 0.2425147284319264, + "grad_norm": 0.7871330976486206, + "learning_rate": 0.00018966689941131838, + "loss": 2.7184, + "step": 3005 + }, + { + "epoch": 0.2425954321685094, + "grad_norm": 0.7301360964775085, + "learning_rate": 0.00018965990940893575, + "loss": 2.7039, + "step": 3006 + }, + { + "epoch": 0.2426761359050924, + "grad_norm": 0.8290385603904724, + "learning_rate": 0.00018965291717199382, + "loss": 2.7848, + "step": 3007 + }, + { + "epoch": 0.2427568396416754, + "grad_norm": 0.7465909123420715, + "learning_rate": 0.00018964592270066683, + "loss": 2.7271, + "step": 3008 + }, + { + "epoch": 0.2428375433782584, + "grad_norm": 0.7992933988571167, + "learning_rate": 0.00018963892599512913, + "loss": 2.7749, + "step": 3009 + }, + { + "epoch": 0.24291824711484142, + "grad_norm": 0.7879100441932678, + "learning_rate": 0.00018963192705555507, + "loss": 2.6844, + "step": 3010 + }, + { + "epoch": 0.24299895085142442, + "grad_norm": 0.7895401120185852, + "learning_rate": 0.00018962492588211905, + "loss": 2.725, + "step": 3011 + }, + { + "epoch": 0.24307965458800743, + "grad_norm": 0.7699374556541443, + "learning_rate": 0.00018961792247499564, + "loss": 2.7408, + "step": 3012 + }, + { + "epoch": 0.24316035832459043, + "grad_norm": 0.828372597694397, + "learning_rate": 0.0001896109168343593, + "loss": 2.7527, + "step": 3013 + }, + { + "epoch": 0.24324106206117344, + "grad_norm": 0.7611951231956482, + "learning_rate": 0.0001896039089603847, + "loss": 2.7294, + "step": 3014 + }, + { + "epoch": 0.24332176579775644, + "grad_norm": 0.8214892148971558, + "learning_rate": 0.00018959689885324646, + "loss": 2.6931, + "step": 3015 + }, + { + "epoch": 0.24340246953433944, + "grad_norm": 0.7472538352012634, + "learning_rate": 0.00018958988651311928, + "loss": 2.7316, + "step": 3016 + }, + { + "epoch": 0.24348317327092245, + "grad_norm": 0.7574933171272278, + "learning_rate": 0.00018958287194017795, + "loss": 2.7764, + "step": 3017 + }, + { + "epoch": 0.24356387700750545, + "grad_norm": 0.739152729511261, + "learning_rate": 0.00018957585513459723, + "loss": 2.7949, + "step": 3018 + }, + { + "epoch": 0.24364458074408846, + "grad_norm": 0.824097752571106, + "learning_rate": 0.00018956883609655208, + "loss": 2.6612, + "step": 3019 + }, + { + "epoch": 0.24372528448067146, + "grad_norm": 0.7891144156455994, + "learning_rate": 0.00018956181482621744, + "loss": 2.7139, + "step": 3020 + }, + { + "epoch": 0.24380598821725447, + "grad_norm": 0.7364415526390076, + "learning_rate": 0.0001895547913237682, + "loss": 2.6984, + "step": 3021 + }, + { + "epoch": 0.24388669195383747, + "grad_norm": 0.7631362080574036, + "learning_rate": 0.0001895477655893795, + "loss": 2.7015, + "step": 3022 + }, + { + "epoch": 0.24396739569042047, + "grad_norm": 0.780541181564331, + "learning_rate": 0.00018954073762322637, + "loss": 2.7716, + "step": 3023 + }, + { + "epoch": 0.24404809942700348, + "grad_norm": 0.7877349853515625, + "learning_rate": 0.00018953370742548403, + "loss": 2.6654, + "step": 3024 + }, + { + "epoch": 0.24412880316358648, + "grad_norm": 0.7786216139793396, + "learning_rate": 0.00018952667499632763, + "loss": 2.7491, + "step": 3025 + }, + { + "epoch": 0.2442095069001695, + "grad_norm": 0.8207663893699646, + "learning_rate": 0.00018951964033593247, + "loss": 2.7212, + "step": 3026 + }, + { + "epoch": 0.2442902106367525, + "grad_norm": 0.8271831274032593, + "learning_rate": 0.00018951260344447386, + "loss": 2.7456, + "step": 3027 + }, + { + "epoch": 0.2443709143733355, + "grad_norm": 0.7610505819320679, + "learning_rate": 0.00018950556432212722, + "loss": 2.7472, + "step": 3028 + }, + { + "epoch": 0.2444516181099185, + "grad_norm": 0.7521701455116272, + "learning_rate": 0.00018949852296906792, + "loss": 2.7263, + "step": 3029 + }, + { + "epoch": 0.2445323218465015, + "grad_norm": 0.7518337965011597, + "learning_rate": 0.00018949147938547144, + "loss": 2.7069, + "step": 3030 + }, + { + "epoch": 0.2446130255830845, + "grad_norm": 0.7823107838630676, + "learning_rate": 0.00018948443357151343, + "loss": 2.7858, + "step": 3031 + }, + { + "epoch": 0.2446937293196675, + "grad_norm": 0.733132004737854, + "learning_rate": 0.00018947738552736938, + "loss": 2.7194, + "step": 3032 + }, + { + "epoch": 0.24477443305625052, + "grad_norm": 0.7756488919258118, + "learning_rate": 0.00018947033525321501, + "loss": 2.7299, + "step": 3033 + }, + { + "epoch": 0.24485513679283352, + "grad_norm": 0.7971112728118896, + "learning_rate": 0.00018946328274922598, + "loss": 2.7474, + "step": 3034 + }, + { + "epoch": 0.2449358405294165, + "grad_norm": 0.7871260643005371, + "learning_rate": 0.0001894562280155781, + "loss": 2.6994, + "step": 3035 + }, + { + "epoch": 0.2450165442659995, + "grad_norm": 0.7431116104125977, + "learning_rate": 0.00018944917105244717, + "loss": 2.6834, + "step": 3036 + }, + { + "epoch": 0.2450972480025825, + "grad_norm": 0.7372273206710815, + "learning_rate": 0.00018944211186000906, + "loss": 2.6988, + "step": 3037 + }, + { + "epoch": 0.2451779517391655, + "grad_norm": 0.8161508440971375, + "learning_rate": 0.00018943505043843975, + "loss": 2.7595, + "step": 3038 + }, + { + "epoch": 0.24525865547574852, + "grad_norm": 0.8062586784362793, + "learning_rate": 0.00018942798678791518, + "loss": 2.6893, + "step": 3039 + }, + { + "epoch": 0.24533935921233152, + "grad_norm": 0.824023425579071, + "learning_rate": 0.0001894209209086114, + "loss": 2.7188, + "step": 3040 + }, + { + "epoch": 0.24542006294891452, + "grad_norm": 0.740466833114624, + "learning_rate": 0.00018941385280070455, + "loss": 2.674, + "step": 3041 + }, + { + "epoch": 0.24550076668549753, + "grad_norm": 0.8543577194213867, + "learning_rate": 0.00018940678246437073, + "loss": 2.7423, + "step": 3042 + }, + { + "epoch": 0.24558147042208053, + "grad_norm": 0.7059324979782104, + "learning_rate": 0.0001893997098997862, + "loss": 2.6669, + "step": 3043 + }, + { + "epoch": 0.24566217415866354, + "grad_norm": 0.7739956974983215, + "learning_rate": 0.00018939263510712721, + "loss": 2.7118, + "step": 3044 + }, + { + "epoch": 0.24574287789524654, + "grad_norm": 0.7701205611228943, + "learning_rate": 0.00018938555808657007, + "loss": 2.7653, + "step": 3045 + }, + { + "epoch": 0.24582358163182955, + "grad_norm": 0.7243000864982605, + "learning_rate": 0.00018937847883829115, + "loss": 2.6789, + "step": 3046 + }, + { + "epoch": 0.24590428536841255, + "grad_norm": 0.7645598649978638, + "learning_rate": 0.00018937139736246693, + "loss": 2.7108, + "step": 3047 + }, + { + "epoch": 0.24598498910499556, + "grad_norm": 0.7544745802879333, + "learning_rate": 0.00018936431365927385, + "loss": 2.6958, + "step": 3048 + }, + { + "epoch": 0.24606569284157856, + "grad_norm": 0.709282398223877, + "learning_rate": 0.00018935722772888848, + "loss": 2.6728, + "step": 3049 + }, + { + "epoch": 0.24614639657816156, + "grad_norm": 0.7524243593215942, + "learning_rate": 0.00018935013957148742, + "loss": 2.7283, + "step": 3050 + }, + { + "epoch": 0.24622710031474457, + "grad_norm": 0.7959655523300171, + "learning_rate": 0.0001893430491872473, + "loss": 2.7384, + "step": 3051 + }, + { + "epoch": 0.24630780405132757, + "grad_norm": 0.7252553105354309, + "learning_rate": 0.00018933595657634486, + "loss": 2.7226, + "step": 3052 + }, + { + "epoch": 0.24638850778791058, + "grad_norm": 0.7387316226959229, + "learning_rate": 0.00018932886173895686, + "loss": 2.7546, + "step": 3053 + }, + { + "epoch": 0.24646921152449358, + "grad_norm": 0.804856538772583, + "learning_rate": 0.0001893217646752601, + "loss": 2.7321, + "step": 3054 + }, + { + "epoch": 0.24654991526107659, + "grad_norm": 0.6929069757461548, + "learning_rate": 0.0001893146653854315, + "loss": 2.6735, + "step": 3055 + }, + { + "epoch": 0.2466306189976596, + "grad_norm": 0.7076159715652466, + "learning_rate": 0.00018930756386964794, + "loss": 2.7368, + "step": 3056 + }, + { + "epoch": 0.2467113227342426, + "grad_norm": 0.7522851228713989, + "learning_rate": 0.00018930046012808648, + "loss": 2.7448, + "step": 3057 + }, + { + "epoch": 0.2467920264708256, + "grad_norm": 0.8347200155258179, + "learning_rate": 0.00018929335416092408, + "loss": 2.6837, + "step": 3058 + }, + { + "epoch": 0.2468727302074086, + "grad_norm": 0.737503707408905, + "learning_rate": 0.00018928624596833786, + "loss": 2.693, + "step": 3059 + }, + { + "epoch": 0.2469534339439916, + "grad_norm": 0.7836787104606628, + "learning_rate": 0.00018927913555050503, + "loss": 2.7335, + "step": 3060 + }, + { + "epoch": 0.2470341376805746, + "grad_norm": 0.7823840975761414, + "learning_rate": 0.00018927202290760278, + "loss": 2.6736, + "step": 3061 + }, + { + "epoch": 0.24711484141715762, + "grad_norm": 0.7894529700279236, + "learning_rate": 0.00018926490803980833, + "loss": 2.7112, + "step": 3062 + }, + { + "epoch": 0.24719554515374062, + "grad_norm": 0.8289024829864502, + "learning_rate": 0.000189257790947299, + "loss": 2.7667, + "step": 3063 + }, + { + "epoch": 0.24727624889032362, + "grad_norm": 0.70560222864151, + "learning_rate": 0.00018925067163025227, + "loss": 2.6946, + "step": 3064 + }, + { + "epoch": 0.24735695262690663, + "grad_norm": 0.6954196095466614, + "learning_rate": 0.00018924355008884548, + "loss": 2.7237, + "step": 3065 + }, + { + "epoch": 0.24743765636348963, + "grad_norm": 0.7975121736526489, + "learning_rate": 0.0001892364263232561, + "loss": 2.6392, + "step": 3066 + }, + { + "epoch": 0.24751836010007264, + "grad_norm": 0.777350902557373, + "learning_rate": 0.00018922930033366174, + "loss": 2.7284, + "step": 3067 + }, + { + "epoch": 0.24759906383665564, + "grad_norm": 0.738240659236908, + "learning_rate": 0.00018922217212023995, + "loss": 2.6884, + "step": 3068 + }, + { + "epoch": 0.24767976757323865, + "grad_norm": 0.8077268600463867, + "learning_rate": 0.0001892150416831684, + "loss": 2.7205, + "step": 3069 + }, + { + "epoch": 0.24776047130982165, + "grad_norm": 0.8108188509941101, + "learning_rate": 0.00018920790902262483, + "loss": 2.7592, + "step": 3070 + }, + { + "epoch": 0.24784117504640465, + "grad_norm": 0.7842642664909363, + "learning_rate": 0.00018920077413878695, + "loss": 2.7474, + "step": 3071 + }, + { + "epoch": 0.24792187878298766, + "grad_norm": 0.7644543051719666, + "learning_rate": 0.0001891936370318326, + "loss": 2.7179, + "step": 3072 + }, + { + "epoch": 0.24800258251957066, + "grad_norm": 0.7761854529380798, + "learning_rate": 0.00018918649770193965, + "loss": 2.71, + "step": 3073 + }, + { + "epoch": 0.24808328625615367, + "grad_norm": 0.7724074125289917, + "learning_rate": 0.00018917935614928607, + "loss": 2.7359, + "step": 3074 + }, + { + "epoch": 0.24816398999273667, + "grad_norm": 0.7360609173774719, + "learning_rate": 0.0001891722123740498, + "loss": 2.7342, + "step": 3075 + }, + { + "epoch": 0.24824469372931968, + "grad_norm": 0.757561206817627, + "learning_rate": 0.00018916506637640894, + "loss": 2.7647, + "step": 3076 + }, + { + "epoch": 0.24832539746590268, + "grad_norm": 0.7180947065353394, + "learning_rate": 0.00018915791815654148, + "loss": 2.6771, + "step": 3077 + }, + { + "epoch": 0.24840610120248569, + "grad_norm": 0.7219653129577637, + "learning_rate": 0.0001891507677146257, + "loss": 2.7772, + "step": 3078 + }, + { + "epoch": 0.2484868049390687, + "grad_norm": 0.749113917350769, + "learning_rate": 0.0001891436150508397, + "loss": 2.6996, + "step": 3079 + }, + { + "epoch": 0.2485675086756517, + "grad_norm": 0.766180157661438, + "learning_rate": 0.00018913646016536183, + "loss": 2.7896, + "step": 3080 + }, + { + "epoch": 0.2486482124122347, + "grad_norm": 0.7672411799430847, + "learning_rate": 0.00018912930305837032, + "loss": 2.7307, + "step": 3081 + }, + { + "epoch": 0.2487289161488177, + "grad_norm": 0.7639018297195435, + "learning_rate": 0.00018912214373004364, + "loss": 2.6569, + "step": 3082 + }, + { + "epoch": 0.2488096198854007, + "grad_norm": 0.8935483694076538, + "learning_rate": 0.00018911498218056013, + "loss": 2.6897, + "step": 3083 + }, + { + "epoch": 0.2488903236219837, + "grad_norm": 0.8506368398666382, + "learning_rate": 0.00018910781841009836, + "loss": 2.778, + "step": 3084 + }, + { + "epoch": 0.24897102735856672, + "grad_norm": 0.8026999235153198, + "learning_rate": 0.0001891006524188368, + "loss": 2.7799, + "step": 3085 + }, + { + "epoch": 0.2490517310951497, + "grad_norm": 0.784637987613678, + "learning_rate": 0.00018909348420695406, + "loss": 2.673, + "step": 3086 + }, + { + "epoch": 0.2491324348317327, + "grad_norm": 0.8949337601661682, + "learning_rate": 0.00018908631377462882, + "loss": 2.7726, + "step": 3087 + }, + { + "epoch": 0.2492131385683157, + "grad_norm": 0.73841792345047, + "learning_rate": 0.00018907914112203974, + "loss": 2.7403, + "step": 3088 + }, + { + "epoch": 0.2492938423048987, + "grad_norm": 0.7305924296379089, + "learning_rate": 0.00018907196624936564, + "loss": 2.6713, + "step": 3089 + }, + { + "epoch": 0.2493745460414817, + "grad_norm": 0.7707394361495972, + "learning_rate": 0.0001890647891567853, + "loss": 2.7306, + "step": 3090 + }, + { + "epoch": 0.2494552497780647, + "grad_norm": 0.8691473603248596, + "learning_rate": 0.00018905760984447759, + "loss": 2.6775, + "step": 3091 + }, + { + "epoch": 0.24953595351464772, + "grad_norm": 0.7466028332710266, + "learning_rate": 0.00018905042831262144, + "loss": 2.7196, + "step": 3092 + }, + { + "epoch": 0.24961665725123072, + "grad_norm": 0.7785150408744812, + "learning_rate": 0.0001890432445613958, + "loss": 2.7099, + "step": 3093 + }, + { + "epoch": 0.24969736098781373, + "grad_norm": 0.7775028347969055, + "learning_rate": 0.0001890360585909798, + "loss": 2.698, + "step": 3094 + }, + { + "epoch": 0.24977806472439673, + "grad_norm": 0.829257071018219, + "learning_rate": 0.00018902887040155245, + "loss": 2.711, + "step": 3095 + }, + { + "epoch": 0.24985876846097974, + "grad_norm": 0.8492234945297241, + "learning_rate": 0.00018902167999329295, + "loss": 2.7164, + "step": 3096 + }, + { + "epoch": 0.24993947219756274, + "grad_norm": 0.7332174777984619, + "learning_rate": 0.00018901448736638045, + "loss": 2.6925, + "step": 3097 + }, + { + "epoch": 0.25002017593414577, + "grad_norm": 0.7494251728057861, + "learning_rate": 0.00018900729252099426, + "loss": 2.6899, + "step": 3098 + }, + { + "epoch": 0.25010087967072875, + "grad_norm": 0.7760747075080872, + "learning_rate": 0.00018900009545731367, + "loss": 2.6626, + "step": 3099 + }, + { + "epoch": 0.2501815834073118, + "grad_norm": 0.7270001173019409, + "learning_rate": 0.00018899289617551804, + "loss": 2.7338, + "step": 3100 + }, + { + "epoch": 0.25026228714389476, + "grad_norm": 0.7832693457603455, + "learning_rate": 0.0001889856946757868, + "loss": 2.6668, + "step": 3101 + }, + { + "epoch": 0.2503429908804778, + "grad_norm": 0.8833239674568176, + "learning_rate": 0.00018897849095829945, + "loss": 2.7219, + "step": 3102 + }, + { + "epoch": 0.25042369461706077, + "grad_norm": 0.8144814372062683, + "learning_rate": 0.0001889712850232355, + "loss": 2.724, + "step": 3103 + }, + { + "epoch": 0.2505043983536438, + "grad_norm": 0.9466180801391602, + "learning_rate": 0.0001889640768707746, + "loss": 2.7499, + "step": 3104 + }, + { + "epoch": 0.2505851020902268, + "grad_norm": 0.926292359828949, + "learning_rate": 0.00018895686650109632, + "loss": 2.7391, + "step": 3105 + }, + { + "epoch": 0.2506658058268098, + "grad_norm": 0.8214002251625061, + "learning_rate": 0.00018894965391438038, + "loss": 2.7546, + "step": 3106 + }, + { + "epoch": 0.2507465095633928, + "grad_norm": 0.9021030068397522, + "learning_rate": 0.00018894243911080655, + "loss": 2.7188, + "step": 3107 + }, + { + "epoch": 0.2508272132999758, + "grad_norm": 0.778366208076477, + "learning_rate": 0.00018893522209055465, + "loss": 2.7852, + "step": 3108 + }, + { + "epoch": 0.2509079170365588, + "grad_norm": 0.8780209422111511, + "learning_rate": 0.00018892800285380456, + "loss": 2.7344, + "step": 3109 + }, + { + "epoch": 0.2509886207731418, + "grad_norm": 0.7581839561462402, + "learning_rate": 0.00018892078140073614, + "loss": 2.6697, + "step": 3110 + }, + { + "epoch": 0.2510693245097248, + "grad_norm": 0.7818635702133179, + "learning_rate": 0.00018891355773152944, + "loss": 2.6969, + "step": 3111 + }, + { + "epoch": 0.2511500282463078, + "grad_norm": 0.7528424859046936, + "learning_rate": 0.0001889063318463644, + "loss": 2.7359, + "step": 3112 + }, + { + "epoch": 0.2512307319828908, + "grad_norm": 0.8274288773536682, + "learning_rate": 0.0001888991037454212, + "loss": 2.7124, + "step": 3113 + }, + { + "epoch": 0.2513114357194738, + "grad_norm": 0.7186813354492188, + "learning_rate": 0.00018889187342888, + "loss": 2.7037, + "step": 3114 + }, + { + "epoch": 0.2513921394560568, + "grad_norm": 0.7458071112632751, + "learning_rate": 0.00018888464089692088, + "loss": 2.7178, + "step": 3115 + }, + { + "epoch": 0.2514728431926398, + "grad_norm": 0.7814257740974426, + "learning_rate": 0.00018887740614972418, + "loss": 2.7554, + "step": 3116 + }, + { + "epoch": 0.2515535469292228, + "grad_norm": 0.7706831097602844, + "learning_rate": 0.0001888701691874702, + "loss": 2.7441, + "step": 3117 + }, + { + "epoch": 0.2516342506658058, + "grad_norm": 0.8177775740623474, + "learning_rate": 0.0001888629300103393, + "loss": 2.7257, + "step": 3118 + }, + { + "epoch": 0.25171495440238884, + "grad_norm": 0.791097104549408, + "learning_rate": 0.00018885568861851188, + "loss": 2.6937, + "step": 3119 + }, + { + "epoch": 0.2517956581389718, + "grad_norm": 0.7521430850028992, + "learning_rate": 0.00018884844501216845, + "loss": 2.7723, + "step": 3120 + }, + { + "epoch": 0.25187636187555484, + "grad_norm": 0.8119359016418457, + "learning_rate": 0.00018884119919148948, + "loss": 2.7573, + "step": 3121 + }, + { + "epoch": 0.2519570656121378, + "grad_norm": 0.7579830288887024, + "learning_rate": 0.00018883395115665562, + "loss": 2.6943, + "step": 3122 + }, + { + "epoch": 0.25203776934872085, + "grad_norm": 0.7718791365623474, + "learning_rate": 0.00018882670090784748, + "loss": 2.6911, + "step": 3123 + }, + { + "epoch": 0.25211847308530383, + "grad_norm": 0.7718087434768677, + "learning_rate": 0.00018881944844524576, + "loss": 2.7505, + "step": 3124 + }, + { + "epoch": 0.25219917682188686, + "grad_norm": 0.7696875333786011, + "learning_rate": 0.0001888121937690312, + "loss": 2.7272, + "step": 3125 + }, + { + "epoch": 0.25227988055846984, + "grad_norm": 0.8082131743431091, + "learning_rate": 0.00018880493687938464, + "loss": 2.6677, + "step": 3126 + }, + { + "epoch": 0.25236058429505287, + "grad_norm": 0.857224702835083, + "learning_rate": 0.00018879767777648686, + "loss": 2.7237, + "step": 3127 + }, + { + "epoch": 0.25244128803163585, + "grad_norm": 0.8135749697685242, + "learning_rate": 0.00018879041646051886, + "loss": 2.7298, + "step": 3128 + }, + { + "epoch": 0.2525219917682189, + "grad_norm": 0.7772457003593445, + "learning_rate": 0.0001887831529316616, + "loss": 2.7723, + "step": 3129 + }, + { + "epoch": 0.25260269550480186, + "grad_norm": 0.795555055141449, + "learning_rate": 0.00018877588719009607, + "loss": 2.7207, + "step": 3130 + }, + { + "epoch": 0.2526833992413849, + "grad_norm": 0.7677939534187317, + "learning_rate": 0.00018876861923600337, + "loss": 2.6649, + "step": 3131 + }, + { + "epoch": 0.25276410297796786, + "grad_norm": 0.7706151008605957, + "learning_rate": 0.00018876134906956464, + "loss": 2.7154, + "step": 3132 + }, + { + "epoch": 0.2528448067145509, + "grad_norm": 0.8230584859848022, + "learning_rate": 0.00018875407669096105, + "loss": 2.7871, + "step": 3133 + }, + { + "epoch": 0.2529255104511339, + "grad_norm": 0.7037158608436584, + "learning_rate": 0.0001887468021003739, + "loss": 2.669, + "step": 3134 + }, + { + "epoch": 0.2530062141877169, + "grad_norm": 0.8485400080680847, + "learning_rate": 0.00018873952529798441, + "loss": 2.7517, + "step": 3135 + }, + { + "epoch": 0.2530869179242999, + "grad_norm": 0.7803399562835693, + "learning_rate": 0.000188732246283974, + "loss": 2.6987, + "step": 3136 + }, + { + "epoch": 0.2531676216608829, + "grad_norm": 0.7884016633033752, + "learning_rate": 0.0001887249650585241, + "loss": 2.7348, + "step": 3137 + }, + { + "epoch": 0.2532483253974659, + "grad_norm": 0.7794530987739563, + "learning_rate": 0.0001887176816218161, + "loss": 2.6934, + "step": 3138 + }, + { + "epoch": 0.2533290291340489, + "grad_norm": 0.7905173301696777, + "learning_rate": 0.00018871039597403156, + "loss": 2.714, + "step": 3139 + }, + { + "epoch": 0.2534097328706319, + "grad_norm": 0.7857949137687683, + "learning_rate": 0.0001887031081153521, + "loss": 2.7591, + "step": 3140 + }, + { + "epoch": 0.25349043660721493, + "grad_norm": 0.8602419495582581, + "learning_rate": 0.00018869581804595927, + "loss": 2.7819, + "step": 3141 + }, + { + "epoch": 0.2535711403437979, + "grad_norm": 0.7845202088356018, + "learning_rate": 0.00018868852576603483, + "loss": 2.6796, + "step": 3142 + }, + { + "epoch": 0.25365184408038094, + "grad_norm": 0.7600612640380859, + "learning_rate": 0.00018868123127576048, + "loss": 2.6785, + "step": 3143 + }, + { + "epoch": 0.2537325478169639, + "grad_norm": 0.7731521725654602, + "learning_rate": 0.000188673934575318, + "loss": 2.7435, + "step": 3144 + }, + { + "epoch": 0.25381325155354695, + "grad_norm": 0.8214225172996521, + "learning_rate": 0.0001886666356648893, + "loss": 2.7264, + "step": 3145 + }, + { + "epoch": 0.2538939552901299, + "grad_norm": 0.7623010277748108, + "learning_rate": 0.00018865933454465628, + "loss": 2.73, + "step": 3146 + }, + { + "epoch": 0.25397465902671296, + "grad_norm": 0.7864633798599243, + "learning_rate": 0.00018865203121480088, + "loss": 2.7654, + "step": 3147 + }, + { + "epoch": 0.25405536276329593, + "grad_norm": 0.7654051780700684, + "learning_rate": 0.0001886447256755051, + "loss": 2.7171, + "step": 3148 + }, + { + "epoch": 0.25413606649987897, + "grad_norm": 0.8045486211776733, + "learning_rate": 0.0001886374179269511, + "loss": 2.7385, + "step": 3149 + }, + { + "epoch": 0.25421677023646194, + "grad_norm": 0.8504971861839294, + "learning_rate": 0.0001886301079693209, + "loss": 2.6719, + "step": 3150 + }, + { + "epoch": 0.254297473973045, + "grad_norm": 0.771538496017456, + "learning_rate": 0.0001886227958027967, + "loss": 2.6707, + "step": 3151 + }, + { + "epoch": 0.25437817770962795, + "grad_norm": 0.8472220301628113, + "learning_rate": 0.0001886154814275608, + "loss": 2.7201, + "step": 3152 + }, + { + "epoch": 0.254458881446211, + "grad_norm": 0.7639158368110657, + "learning_rate": 0.00018860816484379545, + "loss": 2.76, + "step": 3153 + }, + { + "epoch": 0.25453958518279396, + "grad_norm": 0.8042064905166626, + "learning_rate": 0.000188600846051683, + "loss": 2.6862, + "step": 3154 + }, + { + "epoch": 0.254620288919377, + "grad_norm": 0.7481087446212769, + "learning_rate": 0.0001885935250514059, + "loss": 2.7394, + "step": 3155 + }, + { + "epoch": 0.25470099265595997, + "grad_norm": 0.7826097011566162, + "learning_rate": 0.00018858620184314653, + "loss": 2.596, + "step": 3156 + }, + { + "epoch": 0.254781696392543, + "grad_norm": 0.7477610111236572, + "learning_rate": 0.00018857887642708743, + "loss": 2.7385, + "step": 3157 + }, + { + "epoch": 0.254862400129126, + "grad_norm": 0.7347466945648193, + "learning_rate": 0.00018857154880341122, + "loss": 2.722, + "step": 3158 + }, + { + "epoch": 0.254943103865709, + "grad_norm": 0.7853806018829346, + "learning_rate": 0.00018856421897230048, + "loss": 2.7675, + "step": 3159 + }, + { + "epoch": 0.255023807602292, + "grad_norm": 0.7497034072875977, + "learning_rate": 0.0001885568869339379, + "loss": 2.6882, + "step": 3160 + }, + { + "epoch": 0.255104511338875, + "grad_norm": 0.7932263612747192, + "learning_rate": 0.0001885495526885062, + "loss": 2.7938, + "step": 3161 + }, + { + "epoch": 0.255185215075458, + "grad_norm": 0.7776823043823242, + "learning_rate": 0.00018854221623618815, + "loss": 2.6955, + "step": 3162 + }, + { + "epoch": 0.25526591881204097, + "grad_norm": 0.7564878463745117, + "learning_rate": 0.00018853487757716666, + "loss": 2.7644, + "step": 3163 + }, + { + "epoch": 0.255346622548624, + "grad_norm": 0.836270809173584, + "learning_rate": 0.00018852753671162454, + "loss": 2.7119, + "step": 3164 + }, + { + "epoch": 0.255427326285207, + "grad_norm": 0.7540388703346252, + "learning_rate": 0.00018852019363974485, + "loss": 2.797, + "step": 3165 + }, + { + "epoch": 0.25550803002179, + "grad_norm": 0.7943860292434692, + "learning_rate": 0.0001885128483617105, + "loss": 2.7973, + "step": 3166 + }, + { + "epoch": 0.255588733758373, + "grad_norm": 0.7743831276893616, + "learning_rate": 0.00018850550087770463, + "loss": 2.7403, + "step": 3167 + }, + { + "epoch": 0.255669437494956, + "grad_norm": 0.7593801021575928, + "learning_rate": 0.00018849815118791028, + "loss": 2.7203, + "step": 3168 + }, + { + "epoch": 0.255750141231539, + "grad_norm": 0.7663586139678955, + "learning_rate": 0.00018849079929251068, + "loss": 2.7481, + "step": 3169 + }, + { + "epoch": 0.25583084496812203, + "grad_norm": 0.7218170166015625, + "learning_rate": 0.00018848344519168905, + "loss": 2.6698, + "step": 3170 + }, + { + "epoch": 0.255911548704705, + "grad_norm": 0.8374441266059875, + "learning_rate": 0.00018847608888562868, + "loss": 2.8121, + "step": 3171 + }, + { + "epoch": 0.25599225244128804, + "grad_norm": 0.7488373517990112, + "learning_rate": 0.00018846873037451286, + "loss": 2.6871, + "step": 3172 + }, + { + "epoch": 0.256072956177871, + "grad_norm": 0.7513325810432434, + "learning_rate": 0.00018846136965852505, + "loss": 2.6924, + "step": 3173 + }, + { + "epoch": 0.25615365991445405, + "grad_norm": 0.7467690706253052, + "learning_rate": 0.00018845400673784865, + "loss": 2.714, + "step": 3174 + }, + { + "epoch": 0.256234363651037, + "grad_norm": 0.7717954516410828, + "learning_rate": 0.0001884466416126672, + "loss": 2.6679, + "step": 3175 + }, + { + "epoch": 0.25631506738762005, + "grad_norm": 0.7086547613143921, + "learning_rate": 0.0001884392742831642, + "loss": 2.7046, + "step": 3176 + }, + { + "epoch": 0.25639577112420303, + "grad_norm": 0.7024885416030884, + "learning_rate": 0.00018843190474952337, + "loss": 2.6724, + "step": 3177 + }, + { + "epoch": 0.25647647486078606, + "grad_norm": 0.8376390933990479, + "learning_rate": 0.00018842453301192827, + "loss": 2.7818, + "step": 3178 + }, + { + "epoch": 0.25655717859736904, + "grad_norm": 0.8190221190452576, + "learning_rate": 0.00018841715907056265, + "loss": 2.7455, + "step": 3179 + }, + { + "epoch": 0.25663788233395207, + "grad_norm": 0.8029047846794128, + "learning_rate": 0.0001884097829256103, + "loss": 2.7102, + "step": 3180 + }, + { + "epoch": 0.25671858607053505, + "grad_norm": 0.7467923760414124, + "learning_rate": 0.00018840240457725508, + "loss": 2.7051, + "step": 3181 + }, + { + "epoch": 0.2567992898071181, + "grad_norm": 0.7850394248962402, + "learning_rate": 0.00018839502402568086, + "loss": 2.6826, + "step": 3182 + }, + { + "epoch": 0.25687999354370106, + "grad_norm": 0.7144927978515625, + "learning_rate": 0.00018838764127107155, + "loss": 2.6694, + "step": 3183 + }, + { + "epoch": 0.2569606972802841, + "grad_norm": 0.7580311894416809, + "learning_rate": 0.0001883802563136112, + "loss": 2.7191, + "step": 3184 + }, + { + "epoch": 0.25704140101686707, + "grad_norm": 0.7366482615470886, + "learning_rate": 0.0001883728691534838, + "loss": 2.7175, + "step": 3185 + }, + { + "epoch": 0.2571221047534501, + "grad_norm": 0.6961715817451477, + "learning_rate": 0.0001883654797908735, + "loss": 2.7705, + "step": 3186 + }, + { + "epoch": 0.2572028084900331, + "grad_norm": 0.7473716735839844, + "learning_rate": 0.00018835808822596445, + "loss": 2.707, + "step": 3187 + }, + { + "epoch": 0.2572835122266161, + "grad_norm": 0.8376151919364929, + "learning_rate": 0.00018835069445894087, + "loss": 2.7424, + "step": 3188 + }, + { + "epoch": 0.2573642159631991, + "grad_norm": 0.7950237393379211, + "learning_rate": 0.00018834329848998706, + "loss": 2.7593, + "step": 3189 + }, + { + "epoch": 0.2574449196997821, + "grad_norm": 0.7637122869491577, + "learning_rate": 0.0001883359003192873, + "loss": 2.6708, + "step": 3190 + }, + { + "epoch": 0.2575256234363651, + "grad_norm": 0.709516704082489, + "learning_rate": 0.00018832849994702597, + "loss": 2.6988, + "step": 3191 + }, + { + "epoch": 0.2576063271729481, + "grad_norm": 0.7465435266494751, + "learning_rate": 0.00018832109737338757, + "loss": 2.7183, + "step": 3192 + }, + { + "epoch": 0.2576870309095311, + "grad_norm": 0.7619186043739319, + "learning_rate": 0.00018831369259855653, + "loss": 2.6833, + "step": 3193 + }, + { + "epoch": 0.25776773464611413, + "grad_norm": 0.7501961588859558, + "learning_rate": 0.0001883062856227174, + "loss": 2.725, + "step": 3194 + }, + { + "epoch": 0.2578484383826971, + "grad_norm": 0.7720133066177368, + "learning_rate": 0.00018829887644605483, + "loss": 2.7988, + "step": 3195 + }, + { + "epoch": 0.25792914211928014, + "grad_norm": 0.7253942489624023, + "learning_rate": 0.00018829146506875344, + "loss": 2.6999, + "step": 3196 + }, + { + "epoch": 0.2580098458558631, + "grad_norm": 0.7759599685668945, + "learning_rate": 0.00018828405149099792, + "loss": 2.6831, + "step": 3197 + }, + { + "epoch": 0.25809054959244615, + "grad_norm": 0.7250547409057617, + "learning_rate": 0.0001882766357129731, + "loss": 2.6742, + "step": 3198 + }, + { + "epoch": 0.2581712533290291, + "grad_norm": 0.7565183043479919, + "learning_rate": 0.00018826921773486372, + "loss": 2.6777, + "step": 3199 + }, + { + "epoch": 0.25825195706561216, + "grad_norm": 0.7183675169944763, + "learning_rate": 0.0001882617975568547, + "loss": 2.6743, + "step": 3200 + }, + { + "epoch": 0.25833266080219514, + "grad_norm": 0.7021663784980774, + "learning_rate": 0.00018825437517913098, + "loss": 2.727, + "step": 3201 + }, + { + "epoch": 0.25841336453877817, + "grad_norm": 0.7406932711601257, + "learning_rate": 0.00018824695060187753, + "loss": 2.7448, + "step": 3202 + }, + { + "epoch": 0.25849406827536114, + "grad_norm": 0.7766773104667664, + "learning_rate": 0.0001882395238252794, + "loss": 2.69, + "step": 3203 + }, + { + "epoch": 0.2585747720119442, + "grad_norm": 0.7483372688293457, + "learning_rate": 0.00018823209484952164, + "loss": 2.6611, + "step": 3204 + }, + { + "epoch": 0.25865547574852715, + "grad_norm": 0.781831681728363, + "learning_rate": 0.0001882246636747895, + "loss": 2.7292, + "step": 3205 + }, + { + "epoch": 0.2587361794851102, + "grad_norm": 0.7188203930854797, + "learning_rate": 0.00018821723030126806, + "loss": 2.718, + "step": 3206 + }, + { + "epoch": 0.25881688322169316, + "grad_norm": 0.7332054972648621, + "learning_rate": 0.00018820979472914263, + "loss": 2.6492, + "step": 3207 + }, + { + "epoch": 0.2588975869582762, + "grad_norm": 0.7044041156768799, + "learning_rate": 0.00018820235695859858, + "loss": 2.7047, + "step": 3208 + }, + { + "epoch": 0.25897829069485917, + "grad_norm": 0.8651862740516663, + "learning_rate": 0.00018819491698982121, + "loss": 2.6301, + "step": 3209 + }, + { + "epoch": 0.2590589944314422, + "grad_norm": 0.8118106126785278, + "learning_rate": 0.00018818747482299598, + "loss": 2.6522, + "step": 3210 + }, + { + "epoch": 0.2591396981680252, + "grad_norm": 0.7239218354225159, + "learning_rate": 0.00018818003045830832, + "loss": 2.7058, + "step": 3211 + }, + { + "epoch": 0.2592204019046082, + "grad_norm": 0.8557687997817993, + "learning_rate": 0.00018817258389594382, + "loss": 2.7125, + "step": 3212 + }, + { + "epoch": 0.2593011056411912, + "grad_norm": 0.7685148119926453, + "learning_rate": 0.00018816513513608801, + "loss": 2.7516, + "step": 3213 + }, + { + "epoch": 0.25938180937777416, + "grad_norm": 0.7497698664665222, + "learning_rate": 0.00018815768417892664, + "loss": 2.6536, + "step": 3214 + }, + { + "epoch": 0.2594625131143572, + "grad_norm": 0.7041923403739929, + "learning_rate": 0.0001881502310246453, + "loss": 2.7031, + "step": 3215 + }, + { + "epoch": 0.2595432168509402, + "grad_norm": 0.7815428376197815, + "learning_rate": 0.00018814277567342976, + "loss": 2.7291, + "step": 3216 + }, + { + "epoch": 0.2596239205875232, + "grad_norm": 0.7285065650939941, + "learning_rate": 0.00018813531812546583, + "loss": 2.7712, + "step": 3217 + }, + { + "epoch": 0.2597046243241062, + "grad_norm": 0.7606547474861145, + "learning_rate": 0.0001881278583809394, + "loss": 2.6714, + "step": 3218 + }, + { + "epoch": 0.2597853280606892, + "grad_norm": 0.7166680097579956, + "learning_rate": 0.00018812039644003638, + "loss": 2.7147, + "step": 3219 + }, + { + "epoch": 0.2598660317972722, + "grad_norm": 0.8977978229522705, + "learning_rate": 0.0001881129323029427, + "loss": 2.7743, + "step": 3220 + }, + { + "epoch": 0.2599467355338552, + "grad_norm": 0.7447277307510376, + "learning_rate": 0.00018810546596984446, + "loss": 2.7049, + "step": 3221 + }, + { + "epoch": 0.2600274392704382, + "grad_norm": 0.7343515157699585, + "learning_rate": 0.00018809799744092768, + "loss": 2.6999, + "step": 3222 + }, + { + "epoch": 0.26010814300702123, + "grad_norm": 0.7303341627120972, + "learning_rate": 0.00018809052671637852, + "loss": 2.7222, + "step": 3223 + }, + { + "epoch": 0.2601888467436042, + "grad_norm": 0.7412950396537781, + "learning_rate": 0.00018808305379638314, + "loss": 2.6957, + "step": 3224 + }, + { + "epoch": 0.26026955048018724, + "grad_norm": 0.7495343089103699, + "learning_rate": 0.00018807557868112781, + "loss": 2.7123, + "step": 3225 + }, + { + "epoch": 0.2603502542167702, + "grad_norm": 0.8137524724006653, + "learning_rate": 0.00018806810137079886, + "loss": 2.7191, + "step": 3226 + }, + { + "epoch": 0.26043095795335325, + "grad_norm": 0.786374568939209, + "learning_rate": 0.0001880606218655826, + "loss": 2.7237, + "step": 3227 + }, + { + "epoch": 0.2605116616899362, + "grad_norm": 0.9969484806060791, + "learning_rate": 0.00018805314016566543, + "loss": 2.7603, + "step": 3228 + }, + { + "epoch": 0.26059236542651926, + "grad_norm": 0.8132432103157043, + "learning_rate": 0.00018804565627123386, + "loss": 2.6807, + "step": 3229 + }, + { + "epoch": 0.26067306916310223, + "grad_norm": 0.7604904174804688, + "learning_rate": 0.00018803817018247436, + "loss": 2.7105, + "step": 3230 + }, + { + "epoch": 0.26075377289968527, + "grad_norm": 0.743505597114563, + "learning_rate": 0.00018803068189957354, + "loss": 2.7152, + "step": 3231 + }, + { + "epoch": 0.26083447663626824, + "grad_norm": 0.7780006527900696, + "learning_rate": 0.000188023191422718, + "loss": 2.7043, + "step": 3232 + }, + { + "epoch": 0.2609151803728513, + "grad_norm": 0.7683089375495911, + "learning_rate": 0.00018801569875209447, + "loss": 2.7033, + "step": 3233 + }, + { + "epoch": 0.26099588410943425, + "grad_norm": 0.7540118098258972, + "learning_rate": 0.0001880082038878896, + "loss": 2.7121, + "step": 3234 + }, + { + "epoch": 0.2610765878460173, + "grad_norm": 0.7509592771530151, + "learning_rate": 0.00018800070683029025, + "loss": 2.6575, + "step": 3235 + }, + { + "epoch": 0.26115729158260026, + "grad_norm": 0.8015461564064026, + "learning_rate": 0.00018799320757948327, + "loss": 2.6956, + "step": 3236 + }, + { + "epoch": 0.2612379953191833, + "grad_norm": 0.7586383819580078, + "learning_rate": 0.00018798570613565553, + "loss": 2.6719, + "step": 3237 + }, + { + "epoch": 0.26131869905576627, + "grad_norm": 0.7833155989646912, + "learning_rate": 0.000187978202498994, + "loss": 2.7317, + "step": 3238 + }, + { + "epoch": 0.2613994027923493, + "grad_norm": 0.7976018786430359, + "learning_rate": 0.00018797069666968565, + "loss": 2.7514, + "step": 3239 + }, + { + "epoch": 0.2614801065289323, + "grad_norm": 0.8388968706130981, + "learning_rate": 0.00018796318864791763, + "loss": 2.6845, + "step": 3240 + }, + { + "epoch": 0.2615608102655153, + "grad_norm": 0.8082842230796814, + "learning_rate": 0.00018795567843387701, + "loss": 2.7204, + "step": 3241 + }, + { + "epoch": 0.2616415140020983, + "grad_norm": 0.7514800429344177, + "learning_rate": 0.00018794816602775094, + "loss": 2.7117, + "step": 3242 + }, + { + "epoch": 0.2617222177386813, + "grad_norm": 0.8676564693450928, + "learning_rate": 0.00018794065142972664, + "loss": 2.6596, + "step": 3243 + }, + { + "epoch": 0.2618029214752643, + "grad_norm": 0.7449865341186523, + "learning_rate": 0.0001879331346399915, + "loss": 2.7089, + "step": 3244 + }, + { + "epoch": 0.2618836252118473, + "grad_norm": 0.8020811676979065, + "learning_rate": 0.00018792561565873274, + "loss": 2.7293, + "step": 3245 + }, + { + "epoch": 0.2619643289484303, + "grad_norm": 0.7961642146110535, + "learning_rate": 0.00018791809448613783, + "loss": 2.7269, + "step": 3246 + }, + { + "epoch": 0.26204503268501333, + "grad_norm": 0.7842351198196411, + "learning_rate": 0.00018791057112239415, + "loss": 2.6773, + "step": 3247 + }, + { + "epoch": 0.2621257364215963, + "grad_norm": 0.7494246959686279, + "learning_rate": 0.00018790304556768925, + "loss": 2.7317, + "step": 3248 + }, + { + "epoch": 0.26220644015817934, + "grad_norm": 0.7822836637496948, + "learning_rate": 0.0001878955178222107, + "loss": 2.6834, + "step": 3249 + }, + { + "epoch": 0.2622871438947623, + "grad_norm": 0.8432494401931763, + "learning_rate": 0.00018788798788614607, + "loss": 2.7048, + "step": 3250 + }, + { + "epoch": 0.26236784763134535, + "grad_norm": 0.9599446058273315, + "learning_rate": 0.000187880455759683, + "loss": 2.7793, + "step": 3251 + }, + { + "epoch": 0.26244855136792833, + "grad_norm": 0.8097226023674011, + "learning_rate": 0.00018787292144300928, + "loss": 2.7177, + "step": 3252 + }, + { + "epoch": 0.26252925510451136, + "grad_norm": 0.8423499464988708, + "learning_rate": 0.00018786538493631265, + "loss": 2.7265, + "step": 3253 + }, + { + "epoch": 0.26260995884109434, + "grad_norm": 0.7388847470283508, + "learning_rate": 0.00018785784623978095, + "loss": 2.6778, + "step": 3254 + }, + { + "epoch": 0.26269066257767737, + "grad_norm": 0.766368567943573, + "learning_rate": 0.0001878503053536021, + "loss": 2.654, + "step": 3255 + }, + { + "epoch": 0.26277136631426035, + "grad_norm": 0.8181266188621521, + "learning_rate": 0.00018784276227796394, + "loss": 2.7568, + "step": 3256 + }, + { + "epoch": 0.2628520700508434, + "grad_norm": 0.8235312104225159, + "learning_rate": 0.00018783521701305452, + "loss": 2.7317, + "step": 3257 + }, + { + "epoch": 0.26293277378742635, + "grad_norm": 0.7103183269500732, + "learning_rate": 0.00018782766955906195, + "loss": 2.6919, + "step": 3258 + }, + { + "epoch": 0.2630134775240094, + "grad_norm": 0.7202538251876831, + "learning_rate": 0.0001878201199161742, + "loss": 2.7179, + "step": 3259 + }, + { + "epoch": 0.26309418126059236, + "grad_norm": 0.8402286171913147, + "learning_rate": 0.00018781256808457952, + "loss": 2.7789, + "step": 3260 + }, + { + "epoch": 0.2631748849971754, + "grad_norm": 0.8136829137802124, + "learning_rate": 0.00018780501406446613, + "loss": 2.6872, + "step": 3261 + }, + { + "epoch": 0.26325558873375837, + "grad_norm": 0.8017000555992126, + "learning_rate": 0.00018779745785602224, + "loss": 2.7527, + "step": 3262 + }, + { + "epoch": 0.2633362924703414, + "grad_norm": 0.7880774140357971, + "learning_rate": 0.00018778989945943619, + "loss": 2.7348, + "step": 3263 + }, + { + "epoch": 0.2634169962069244, + "grad_norm": 0.7402438521385193, + "learning_rate": 0.00018778233887489635, + "loss": 2.6946, + "step": 3264 + }, + { + "epoch": 0.26349769994350736, + "grad_norm": 0.7450907230377197, + "learning_rate": 0.0001877747761025912, + "loss": 2.7502, + "step": 3265 + }, + { + "epoch": 0.2635784036800904, + "grad_norm": 0.7504056692123413, + "learning_rate": 0.00018776721114270917, + "loss": 2.832, + "step": 3266 + }, + { + "epoch": 0.26365910741667337, + "grad_norm": 0.7710226774215698, + "learning_rate": 0.00018775964399543878, + "loss": 2.6895, + "step": 3267 + }, + { + "epoch": 0.2637398111532564, + "grad_norm": 0.769927978515625, + "learning_rate": 0.00018775207466096867, + "loss": 2.6801, + "step": 3268 + }, + { + "epoch": 0.2638205148898394, + "grad_norm": 0.7210869193077087, + "learning_rate": 0.0001877445031394875, + "loss": 2.6966, + "step": 3269 + }, + { + "epoch": 0.2639012186264224, + "grad_norm": 0.7731119990348816, + "learning_rate": 0.00018773692943118393, + "loss": 2.6965, + "step": 3270 + }, + { + "epoch": 0.2639819223630054, + "grad_norm": 0.7539728283882141, + "learning_rate": 0.00018772935353624672, + "loss": 2.753, + "step": 3271 + }, + { + "epoch": 0.2640626260995884, + "grad_norm": 0.7993821501731873, + "learning_rate": 0.00018772177545486472, + "loss": 2.7177, + "step": 3272 + }, + { + "epoch": 0.2641433298361714, + "grad_norm": 0.7880005240440369, + "learning_rate": 0.00018771419518722672, + "loss": 2.6854, + "step": 3273 + }, + { + "epoch": 0.2642240335727544, + "grad_norm": 0.8079188466072083, + "learning_rate": 0.0001877066127335217, + "loss": 2.734, + "step": 3274 + }, + { + "epoch": 0.2643047373093374, + "grad_norm": 0.8241428732872009, + "learning_rate": 0.00018769902809393865, + "loss": 2.7156, + "step": 3275 + }, + { + "epoch": 0.26438544104592043, + "grad_norm": 0.8007158041000366, + "learning_rate": 0.00018769144126866657, + "loss": 2.693, + "step": 3276 + }, + { + "epoch": 0.2644661447825034, + "grad_norm": 0.8360451459884644, + "learning_rate": 0.00018768385225789456, + "loss": 2.6919, + "step": 3277 + }, + { + "epoch": 0.26454684851908644, + "grad_norm": 0.7596627473831177, + "learning_rate": 0.00018767626106181172, + "loss": 2.7861, + "step": 3278 + }, + { + "epoch": 0.2646275522556694, + "grad_norm": 0.7469248175621033, + "learning_rate": 0.00018766866768060727, + "loss": 2.7305, + "step": 3279 + }, + { + "epoch": 0.26470825599225245, + "grad_norm": 0.7103936076164246, + "learning_rate": 0.00018766107211447045, + "loss": 2.6456, + "step": 3280 + }, + { + "epoch": 0.2647889597288354, + "grad_norm": 0.7595266103744507, + "learning_rate": 0.00018765347436359056, + "loss": 2.7235, + "step": 3281 + }, + { + "epoch": 0.26486966346541846, + "grad_norm": 0.786648154258728, + "learning_rate": 0.00018764587442815698, + "loss": 2.7182, + "step": 3282 + }, + { + "epoch": 0.26495036720200144, + "grad_norm": 0.7152618169784546, + "learning_rate": 0.00018763827230835908, + "loss": 2.6842, + "step": 3283 + }, + { + "epoch": 0.26503107093858447, + "grad_norm": 0.89169842004776, + "learning_rate": 0.00018763066800438636, + "loss": 2.7661, + "step": 3284 + }, + { + "epoch": 0.26511177467516744, + "grad_norm": 0.8148171305656433, + "learning_rate": 0.00018762306151642833, + "loss": 2.7264, + "step": 3285 + }, + { + "epoch": 0.2651924784117505, + "grad_norm": 0.8070533871650696, + "learning_rate": 0.00018761545284467454, + "loss": 2.7425, + "step": 3286 + }, + { + "epoch": 0.26527318214833345, + "grad_norm": 0.8536118268966675, + "learning_rate": 0.00018760784198931465, + "loss": 2.702, + "step": 3287 + }, + { + "epoch": 0.2653538858849165, + "grad_norm": 0.7422329783439636, + "learning_rate": 0.00018760022895053833, + "loss": 2.6913, + "step": 3288 + }, + { + "epoch": 0.26543458962149946, + "grad_norm": 0.7415527105331421, + "learning_rate": 0.0001875926137285353, + "loss": 2.6472, + "step": 3289 + }, + { + "epoch": 0.2655152933580825, + "grad_norm": 0.8432031273841858, + "learning_rate": 0.00018758499632349538, + "loss": 2.7506, + "step": 3290 + }, + { + "epoch": 0.26559599709466547, + "grad_norm": 0.8113259077072144, + "learning_rate": 0.0001875773767356084, + "loss": 2.6866, + "step": 3291 + }, + { + "epoch": 0.2656767008312485, + "grad_norm": 0.7898122668266296, + "learning_rate": 0.00018756975496506424, + "loss": 2.6516, + "step": 3292 + }, + { + "epoch": 0.2657574045678315, + "grad_norm": 0.7627275586128235, + "learning_rate": 0.0001875621310120529, + "loss": 2.7065, + "step": 3293 + }, + { + "epoch": 0.2658381083044145, + "grad_norm": 0.8227291107177734, + "learning_rate": 0.00018755450487676435, + "loss": 2.7614, + "step": 3294 + }, + { + "epoch": 0.2659188120409975, + "grad_norm": 0.8162109851837158, + "learning_rate": 0.00018754687655938868, + "loss": 2.7924, + "step": 3295 + }, + { + "epoch": 0.2659995157775805, + "grad_norm": 0.7231846451759338, + "learning_rate": 0.00018753924606011602, + "loss": 2.7505, + "step": 3296 + }, + { + "epoch": 0.2660802195141635, + "grad_norm": 0.8635944724082947, + "learning_rate": 0.00018753161337913647, + "loss": 2.7505, + "step": 3297 + }, + { + "epoch": 0.26616092325074653, + "grad_norm": 0.8131890892982483, + "learning_rate": 0.00018752397851664031, + "loss": 2.7872, + "step": 3298 + }, + { + "epoch": 0.2662416269873295, + "grad_norm": 0.7336695790290833, + "learning_rate": 0.00018751634147281786, + "loss": 2.7517, + "step": 3299 + }, + { + "epoch": 0.26632233072391254, + "grad_norm": 0.7541754841804504, + "learning_rate": 0.00018750870224785939, + "loss": 2.7807, + "step": 3300 + }, + { + "epoch": 0.2664030344604955, + "grad_norm": 0.9347110390663147, + "learning_rate": 0.0001875010608419553, + "loss": 2.6954, + "step": 3301 + }, + { + "epoch": 0.26648373819707855, + "grad_norm": 0.7591213583946228, + "learning_rate": 0.00018749341725529604, + "loss": 2.7019, + "step": 3302 + }, + { + "epoch": 0.2665644419336615, + "grad_norm": 0.811527669429779, + "learning_rate": 0.00018748577148807211, + "loss": 2.7123, + "step": 3303 + }, + { + "epoch": 0.26664514567024455, + "grad_norm": 0.7419980764389038, + "learning_rate": 0.00018747812354047408, + "loss": 2.7383, + "step": 3304 + }, + { + "epoch": 0.26672584940682753, + "grad_norm": 0.7801192402839661, + "learning_rate": 0.00018747047341269256, + "loss": 2.7245, + "step": 3305 + }, + { + "epoch": 0.26680655314341056, + "grad_norm": 0.7392756938934326, + "learning_rate": 0.00018746282110491816, + "loss": 2.6992, + "step": 3306 + }, + { + "epoch": 0.26688725687999354, + "grad_norm": 0.7085927724838257, + "learning_rate": 0.00018745516661734161, + "loss": 2.739, + "step": 3307 + }, + { + "epoch": 0.26696796061657657, + "grad_norm": 0.7218676209449768, + "learning_rate": 0.00018744750995015373, + "loss": 2.7091, + "step": 3308 + }, + { + "epoch": 0.26704866435315955, + "grad_norm": 0.847872257232666, + "learning_rate": 0.0001874398511035453, + "loss": 2.699, + "step": 3309 + }, + { + "epoch": 0.2671293680897426, + "grad_norm": 0.8280770778656006, + "learning_rate": 0.00018743219007770723, + "loss": 2.763, + "step": 3310 + }, + { + "epoch": 0.26721007182632556, + "grad_norm": 0.7271165251731873, + "learning_rate": 0.0001874245268728304, + "loss": 2.7219, + "step": 3311 + }, + { + "epoch": 0.2672907755629086, + "grad_norm": 0.7342363595962524, + "learning_rate": 0.00018741686148910586, + "loss": 2.6765, + "step": 3312 + }, + { + "epoch": 0.26737147929949157, + "grad_norm": 0.7260174751281738, + "learning_rate": 0.0001874091939267246, + "loss": 2.7003, + "step": 3313 + }, + { + "epoch": 0.2674521830360746, + "grad_norm": 0.742494523525238, + "learning_rate": 0.00018740152418587775, + "loss": 2.7371, + "step": 3314 + }, + { + "epoch": 0.2675328867726576, + "grad_norm": 0.7238131165504456, + "learning_rate": 0.00018739385226675646, + "loss": 2.7486, + "step": 3315 + }, + { + "epoch": 0.26761359050924055, + "grad_norm": 0.7329363226890564, + "learning_rate": 0.0001873861781695519, + "loss": 2.6414, + "step": 3316 + }, + { + "epoch": 0.2676942942458236, + "grad_norm": 0.7078117728233337, + "learning_rate": 0.00018737850189445534, + "loss": 2.7271, + "step": 3317 + }, + { + "epoch": 0.26777499798240656, + "grad_norm": 0.7945309281349182, + "learning_rate": 0.00018737082344165814, + "loss": 2.7323, + "step": 3318 + }, + { + "epoch": 0.2678557017189896, + "grad_norm": 0.7510890364646912, + "learning_rate": 0.0001873631428113516, + "loss": 2.6563, + "step": 3319 + }, + { + "epoch": 0.26793640545557257, + "grad_norm": 0.7790820002555847, + "learning_rate": 0.0001873554600037272, + "loss": 2.7445, + "step": 3320 + }, + { + "epoch": 0.2680171091921556, + "grad_norm": 0.7689393162727356, + "learning_rate": 0.00018734777501897636, + "loss": 2.669, + "step": 3321 + }, + { + "epoch": 0.2680978129287386, + "grad_norm": 0.8227118253707886, + "learning_rate": 0.00018734008785729065, + "loss": 2.7279, + "step": 3322 + }, + { + "epoch": 0.2681785166653216, + "grad_norm": 0.7551290392875671, + "learning_rate": 0.00018733239851886162, + "loss": 2.6864, + "step": 3323 + }, + { + "epoch": 0.2682592204019046, + "grad_norm": 0.8572004437446594, + "learning_rate": 0.00018732470700388097, + "loss": 2.8159, + "step": 3324 + }, + { + "epoch": 0.2683399241384876, + "grad_norm": 0.7509044408798218, + "learning_rate": 0.00018731701331254033, + "loss": 2.7698, + "step": 3325 + }, + { + "epoch": 0.2684206278750706, + "grad_norm": 0.8474129438400269, + "learning_rate": 0.00018730931744503148, + "loss": 2.6745, + "step": 3326 + }, + { + "epoch": 0.2685013316116536, + "grad_norm": 0.8310953378677368, + "learning_rate": 0.00018730161940154618, + "loss": 2.712, + "step": 3327 + }, + { + "epoch": 0.2685820353482366, + "grad_norm": 0.8820717334747314, + "learning_rate": 0.00018729391918227632, + "loss": 2.7776, + "step": 3328 + }, + { + "epoch": 0.26866273908481964, + "grad_norm": 0.8827663064002991, + "learning_rate": 0.00018728621678741384, + "loss": 2.7115, + "step": 3329 + }, + { + "epoch": 0.2687434428214026, + "grad_norm": 0.7896323800086975, + "learning_rate": 0.00018727851221715064, + "loss": 2.6799, + "step": 3330 + }, + { + "epoch": 0.26882414655798564, + "grad_norm": 0.7775614261627197, + "learning_rate": 0.0001872708054716788, + "loss": 2.7021, + "step": 3331 + }, + { + "epoch": 0.2689048502945686, + "grad_norm": 0.8150187134742737, + "learning_rate": 0.0001872630965511903, + "loss": 2.679, + "step": 3332 + }, + { + "epoch": 0.26898555403115165, + "grad_norm": 0.7821844220161438, + "learning_rate": 0.00018725538545587736, + "loss": 2.7067, + "step": 3333 + }, + { + "epoch": 0.26906625776773463, + "grad_norm": 0.8390234112739563, + "learning_rate": 0.00018724767218593216, + "loss": 2.7133, + "step": 3334 + }, + { + "epoch": 0.26914696150431766, + "grad_norm": 0.8150694370269775, + "learning_rate": 0.00018723995674154687, + "loss": 2.7022, + "step": 3335 + }, + { + "epoch": 0.26922766524090064, + "grad_norm": 0.7473872900009155, + "learning_rate": 0.0001872322391229138, + "loss": 2.7268, + "step": 3336 + }, + { + "epoch": 0.26930836897748367, + "grad_norm": 0.7591951489448547, + "learning_rate": 0.0001872245193302253, + "loss": 2.7516, + "step": 3337 + }, + { + "epoch": 0.26938907271406665, + "grad_norm": 0.7914662957191467, + "learning_rate": 0.00018721679736367382, + "loss": 2.6613, + "step": 3338 + }, + { + "epoch": 0.2694697764506497, + "grad_norm": 0.7823428511619568, + "learning_rate": 0.00018720907322345172, + "loss": 2.6661, + "step": 3339 + }, + { + "epoch": 0.26955048018723266, + "grad_norm": 0.8428264260292053, + "learning_rate": 0.00018720134690975156, + "loss": 2.672, + "step": 3340 + }, + { + "epoch": 0.2696311839238157, + "grad_norm": 0.71320641040802, + "learning_rate": 0.00018719361842276587, + "loss": 2.7326, + "step": 3341 + }, + { + "epoch": 0.26971188766039866, + "grad_norm": 0.7972821593284607, + "learning_rate": 0.00018718588776268731, + "loss": 2.7182, + "step": 3342 + }, + { + "epoch": 0.2697925913969817, + "grad_norm": 0.7924500107765198, + "learning_rate": 0.0001871781549297085, + "loss": 2.7308, + "step": 3343 + }, + { + "epoch": 0.2698732951335647, + "grad_norm": 0.7668356895446777, + "learning_rate": 0.0001871704199240222, + "loss": 2.678, + "step": 3344 + }, + { + "epoch": 0.2699539988701477, + "grad_norm": 0.866973876953125, + "learning_rate": 0.00018716268274582114, + "loss": 2.7802, + "step": 3345 + }, + { + "epoch": 0.2700347026067307, + "grad_norm": 0.7709557414054871, + "learning_rate": 0.0001871549433952982, + "loss": 2.7418, + "step": 3346 + }, + { + "epoch": 0.2701154063433137, + "grad_norm": 0.7707573771476746, + "learning_rate": 0.00018714720187264626, + "loss": 2.7486, + "step": 3347 + }, + { + "epoch": 0.2701961100798967, + "grad_norm": 0.8007768392562866, + "learning_rate": 0.00018713945817805822, + "loss": 2.7106, + "step": 3348 + }, + { + "epoch": 0.2702768138164797, + "grad_norm": 0.7239583134651184, + "learning_rate": 0.0001871317123117271, + "loss": 2.7209, + "step": 3349 + }, + { + "epoch": 0.2703575175530627, + "grad_norm": 0.775104820728302, + "learning_rate": 0.00018712396427384594, + "loss": 2.6503, + "step": 3350 + }, + { + "epoch": 0.27043822128964573, + "grad_norm": 0.7492741346359253, + "learning_rate": 0.0001871162140646079, + "loss": 2.699, + "step": 3351 + }, + { + "epoch": 0.2705189250262287, + "grad_norm": 0.7550846338272095, + "learning_rate": 0.00018710846168420604, + "loss": 2.7458, + "step": 3352 + }, + { + "epoch": 0.27059962876281174, + "grad_norm": 0.807996928691864, + "learning_rate": 0.0001871007071328336, + "loss": 2.7604, + "step": 3353 + }, + { + "epoch": 0.2706803324993947, + "grad_norm": 0.7381845116615295, + "learning_rate": 0.00018709295041068386, + "loss": 2.6833, + "step": 3354 + }, + { + "epoch": 0.27076103623597775, + "grad_norm": 0.7542420625686646, + "learning_rate": 0.00018708519151795016, + "loss": 2.6462, + "step": 3355 + }, + { + "epoch": 0.2708417399725607, + "grad_norm": 0.7675846219062805, + "learning_rate": 0.00018707743045482582, + "loss": 2.7068, + "step": 3356 + }, + { + "epoch": 0.27092244370914376, + "grad_norm": 0.7437357902526855, + "learning_rate": 0.0001870696672215043, + "loss": 2.73, + "step": 3357 + }, + { + "epoch": 0.27100314744572673, + "grad_norm": 0.7880852222442627, + "learning_rate": 0.00018706190181817903, + "loss": 2.759, + "step": 3358 + }, + { + "epoch": 0.27108385118230977, + "grad_norm": 0.7403178811073303, + "learning_rate": 0.00018705413424504363, + "loss": 2.7538, + "step": 3359 + }, + { + "epoch": 0.27116455491889274, + "grad_norm": 0.7601225972175598, + "learning_rate": 0.00018704636450229164, + "loss": 2.7331, + "step": 3360 + }, + { + "epoch": 0.2712452586554758, + "grad_norm": 0.7810701727867126, + "learning_rate": 0.0001870385925901167, + "loss": 2.7736, + "step": 3361 + }, + { + "epoch": 0.27132596239205875, + "grad_norm": 0.8934530019760132, + "learning_rate": 0.0001870308185087125, + "loss": 2.7214, + "step": 3362 + }, + { + "epoch": 0.2714066661286418, + "grad_norm": 0.7468441128730774, + "learning_rate": 0.0001870230422582728, + "loss": 2.6957, + "step": 3363 + }, + { + "epoch": 0.27148736986522476, + "grad_norm": 0.7643293142318726, + "learning_rate": 0.00018701526383899144, + "loss": 2.6773, + "step": 3364 + }, + { + "epoch": 0.2715680736018078, + "grad_norm": 0.7602033615112305, + "learning_rate": 0.0001870074832510622, + "loss": 2.7095, + "step": 3365 + }, + { + "epoch": 0.27164877733839077, + "grad_norm": 0.772065281867981, + "learning_rate": 0.00018699970049467908, + "loss": 2.6753, + "step": 3366 + }, + { + "epoch": 0.27172948107497374, + "grad_norm": 0.7718359231948853, + "learning_rate": 0.00018699191557003598, + "loss": 2.6857, + "step": 3367 + }, + { + "epoch": 0.2718101848115568, + "grad_norm": 0.8207093477249146, + "learning_rate": 0.00018698412847732693, + "loss": 2.7549, + "step": 3368 + }, + { + "epoch": 0.27189088854813975, + "grad_norm": 0.7393590807914734, + "learning_rate": 0.00018697633921674605, + "loss": 2.6884, + "step": 3369 + }, + { + "epoch": 0.2719715922847228, + "grad_norm": 0.7955869436264038, + "learning_rate": 0.0001869685477884874, + "loss": 2.708, + "step": 3370 + }, + { + "epoch": 0.27205229602130576, + "grad_norm": 0.7392188906669617, + "learning_rate": 0.00018696075419274527, + "loss": 2.717, + "step": 3371 + }, + { + "epoch": 0.2721329997578888, + "grad_norm": 0.800204873085022, + "learning_rate": 0.00018695295842971376, + "loss": 2.7184, + "step": 3372 + }, + { + "epoch": 0.27221370349447177, + "grad_norm": 0.8195740580558777, + "learning_rate": 0.00018694516049958725, + "loss": 2.6865, + "step": 3373 + }, + { + "epoch": 0.2722944072310548, + "grad_norm": 0.8617578148841858, + "learning_rate": 0.00018693736040256007, + "loss": 2.7098, + "step": 3374 + }, + { + "epoch": 0.2723751109676378, + "grad_norm": 0.8184413909912109, + "learning_rate": 0.00018692955813882662, + "loss": 2.7449, + "step": 3375 + }, + { + "epoch": 0.2724558147042208, + "grad_norm": 0.990275502204895, + "learning_rate": 0.00018692175370858133, + "loss": 2.7891, + "step": 3376 + }, + { + "epoch": 0.2725365184408038, + "grad_norm": 0.7857810854911804, + "learning_rate": 0.0001869139471120187, + "loss": 2.6884, + "step": 3377 + }, + { + "epoch": 0.2726172221773868, + "grad_norm": 0.8040915131568909, + "learning_rate": 0.00018690613834933335, + "loss": 2.7047, + "step": 3378 + }, + { + "epoch": 0.2726979259139698, + "grad_norm": 0.7512348294258118, + "learning_rate": 0.00018689832742071983, + "loss": 2.6898, + "step": 3379 + }, + { + "epoch": 0.27277862965055283, + "grad_norm": 0.6781859397888184, + "learning_rate": 0.00018689051432637288, + "loss": 2.6396, + "step": 3380 + }, + { + "epoch": 0.2728593333871358, + "grad_norm": 0.7858247756958008, + "learning_rate": 0.00018688269906648716, + "loss": 2.6785, + "step": 3381 + }, + { + "epoch": 0.27294003712371884, + "grad_norm": 0.7342140674591064, + "learning_rate": 0.00018687488164125744, + "loss": 2.6778, + "step": 3382 + }, + { + "epoch": 0.2730207408603018, + "grad_norm": 0.8113372921943665, + "learning_rate": 0.00018686706205087858, + "loss": 2.6982, + "step": 3383 + }, + { + "epoch": 0.27310144459688485, + "grad_norm": 0.7904205918312073, + "learning_rate": 0.0001868592402955455, + "loss": 2.7891, + "step": 3384 + }, + { + "epoch": 0.2731821483334678, + "grad_norm": 0.7274135947227478, + "learning_rate": 0.00018685141637545308, + "loss": 2.6908, + "step": 3385 + }, + { + "epoch": 0.27326285207005085, + "grad_norm": 0.7675744295120239, + "learning_rate": 0.0001868435902907963, + "loss": 2.6987, + "step": 3386 + }, + { + "epoch": 0.27334355580663383, + "grad_norm": 0.8085030913352966, + "learning_rate": 0.00018683576204177026, + "loss": 2.7798, + "step": 3387 + }, + { + "epoch": 0.27342425954321686, + "grad_norm": 0.7498135566711426, + "learning_rate": 0.00018682793162857006, + "loss": 2.7216, + "step": 3388 + }, + { + "epoch": 0.27350496327979984, + "grad_norm": 0.900741696357727, + "learning_rate": 0.0001868200990513908, + "loss": 2.6871, + "step": 3389 + }, + { + "epoch": 0.27358566701638287, + "grad_norm": 0.7948571443557739, + "learning_rate": 0.00018681226431042772, + "loss": 2.6985, + "step": 3390 + }, + { + "epoch": 0.27366637075296585, + "grad_norm": 0.8739100098609924, + "learning_rate": 0.00018680442740587612, + "loss": 2.6922, + "step": 3391 + }, + { + "epoch": 0.2737470744895489, + "grad_norm": 0.730084240436554, + "learning_rate": 0.00018679658833793125, + "loss": 2.7029, + "step": 3392 + }, + { + "epoch": 0.27382777822613186, + "grad_norm": 0.7560603022575378, + "learning_rate": 0.00018678874710678853, + "loss": 2.7429, + "step": 3393 + }, + { + "epoch": 0.2739084819627149, + "grad_norm": 0.8331460356712341, + "learning_rate": 0.00018678090371264334, + "loss": 2.7157, + "step": 3394 + }, + { + "epoch": 0.27398918569929787, + "grad_norm": 0.8070168495178223, + "learning_rate": 0.00018677305815569122, + "loss": 2.7629, + "step": 3395 + }, + { + "epoch": 0.2740698894358809, + "grad_norm": 0.7922534346580505, + "learning_rate": 0.00018676521043612762, + "loss": 2.7159, + "step": 3396 + }, + { + "epoch": 0.2741505931724639, + "grad_norm": 0.7838901281356812, + "learning_rate": 0.0001867573605541482, + "loss": 2.6721, + "step": 3397 + }, + { + "epoch": 0.2742312969090469, + "grad_norm": 0.8912512063980103, + "learning_rate": 0.00018674950850994856, + "loss": 2.7243, + "step": 3398 + }, + { + "epoch": 0.2743120006456299, + "grad_norm": 0.7205448150634766, + "learning_rate": 0.0001867416543037244, + "loss": 2.7152, + "step": 3399 + }, + { + "epoch": 0.2743927043822129, + "grad_norm": 0.6992877721786499, + "learning_rate": 0.00018673379793567146, + "loss": 2.7183, + "step": 3400 + }, + { + "epoch": 0.2744734081187959, + "grad_norm": 0.8009448051452637, + "learning_rate": 0.00018672593940598556, + "loss": 2.715, + "step": 3401 + }, + { + "epoch": 0.2745541118553789, + "grad_norm": 0.7812647819519043, + "learning_rate": 0.0001867180787148626, + "loss": 2.7579, + "step": 3402 + }, + { + "epoch": 0.2746348155919619, + "grad_norm": 0.7300555109977722, + "learning_rate": 0.00018671021586249835, + "loss": 2.694, + "step": 3403 + }, + { + "epoch": 0.27471551932854493, + "grad_norm": 0.8082736134529114, + "learning_rate": 0.00018670235084908887, + "loss": 2.768, + "step": 3404 + }, + { + "epoch": 0.2747962230651279, + "grad_norm": 0.7729581594467163, + "learning_rate": 0.0001866944836748302, + "loss": 2.7256, + "step": 3405 + }, + { + "epoch": 0.27487692680171094, + "grad_norm": 0.8113458752632141, + "learning_rate": 0.00018668661433991835, + "loss": 2.6692, + "step": 3406 + }, + { + "epoch": 0.2749576305382939, + "grad_norm": 0.7757337689399719, + "learning_rate": 0.00018667874284454948, + "loss": 2.6769, + "step": 3407 + }, + { + "epoch": 0.27503833427487695, + "grad_norm": 0.7896093726158142, + "learning_rate": 0.00018667086918891976, + "loss": 2.7118, + "step": 3408 + }, + { + "epoch": 0.2751190380114599, + "grad_norm": 0.7764071822166443, + "learning_rate": 0.00018666299337322543, + "loss": 2.7284, + "step": 3409 + }, + { + "epoch": 0.27519974174804296, + "grad_norm": 0.794815182685852, + "learning_rate": 0.00018665511539766273, + "loss": 2.7232, + "step": 3410 + }, + { + "epoch": 0.27528044548462594, + "grad_norm": 0.8134122490882874, + "learning_rate": 0.0001866472352624281, + "loss": 2.7023, + "step": 3411 + }, + { + "epoch": 0.27536114922120897, + "grad_norm": 0.7654025554656982, + "learning_rate": 0.00018663935296771782, + "loss": 2.7002, + "step": 3412 + }, + { + "epoch": 0.27544185295779194, + "grad_norm": 0.6930806636810303, + "learning_rate": 0.0001866314685137284, + "loss": 2.6764, + "step": 3413 + }, + { + "epoch": 0.275522556694375, + "grad_norm": 0.7535184621810913, + "learning_rate": 0.00018662358190065631, + "loss": 2.6657, + "step": 3414 + }, + { + "epoch": 0.27560326043095795, + "grad_norm": 0.7775620818138123, + "learning_rate": 0.00018661569312869816, + "loss": 2.6931, + "step": 3415 + }, + { + "epoch": 0.275683964167541, + "grad_norm": 0.7209072113037109, + "learning_rate": 0.00018660780219805048, + "loss": 2.7293, + "step": 3416 + }, + { + "epoch": 0.27576466790412396, + "grad_norm": 0.7182055711746216, + "learning_rate": 0.00018659990910891, + "loss": 2.6561, + "step": 3417 + }, + { + "epoch": 0.27584537164070694, + "grad_norm": 0.7130969166755676, + "learning_rate": 0.00018659201386147338, + "loss": 2.7156, + "step": 3418 + }, + { + "epoch": 0.27592607537728997, + "grad_norm": 0.7296265959739685, + "learning_rate": 0.00018658411645593745, + "loss": 2.6894, + "step": 3419 + }, + { + "epoch": 0.27600677911387295, + "grad_norm": 0.7707972526550293, + "learning_rate": 0.000186576216892499, + "loss": 2.7528, + "step": 3420 + }, + { + "epoch": 0.276087482850456, + "grad_norm": 0.6945170164108276, + "learning_rate": 0.0001865683151713549, + "loss": 2.6762, + "step": 3421 + }, + { + "epoch": 0.27616818658703896, + "grad_norm": 0.7664114236831665, + "learning_rate": 0.0001865604112927021, + "loss": 2.7212, + "step": 3422 + }, + { + "epoch": 0.276248890323622, + "grad_norm": 0.6950399875640869, + "learning_rate": 0.0001865525052567376, + "loss": 2.7035, + "step": 3423 + }, + { + "epoch": 0.27632959406020496, + "grad_norm": 0.7307506799697876, + "learning_rate": 0.00018654459706365838, + "loss": 2.7296, + "step": 3424 + }, + { + "epoch": 0.276410297796788, + "grad_norm": 0.720912516117096, + "learning_rate": 0.0001865366867136616, + "loss": 2.6884, + "step": 3425 + }, + { + "epoch": 0.276491001533371, + "grad_norm": 0.7581072449684143, + "learning_rate": 0.00018652877420694436, + "loss": 2.705, + "step": 3426 + }, + { + "epoch": 0.276571705269954, + "grad_norm": 0.7473136186599731, + "learning_rate": 0.0001865208595437039, + "loss": 2.7316, + "step": 3427 + }, + { + "epoch": 0.276652409006537, + "grad_norm": 0.7272855639457703, + "learning_rate": 0.00018651294272413745, + "loss": 2.6834, + "step": 3428 + }, + { + "epoch": 0.27673311274312, + "grad_norm": 0.7046366930007935, + "learning_rate": 0.0001865050237484423, + "loss": 2.6491, + "step": 3429 + }, + { + "epoch": 0.276813816479703, + "grad_norm": 0.7521376609802246, + "learning_rate": 0.00018649710261681586, + "loss": 2.708, + "step": 3430 + }, + { + "epoch": 0.276894520216286, + "grad_norm": 0.7372453808784485, + "learning_rate": 0.0001864891793294555, + "loss": 2.682, + "step": 3431 + }, + { + "epoch": 0.276975223952869, + "grad_norm": 0.7381749749183655, + "learning_rate": 0.0001864812538865587, + "loss": 2.7526, + "step": 3432 + }, + { + "epoch": 0.27705592768945203, + "grad_norm": 0.7891514301300049, + "learning_rate": 0.00018647332628832298, + "loss": 2.6904, + "step": 3433 + }, + { + "epoch": 0.277136631426035, + "grad_norm": 0.7942724823951721, + "learning_rate": 0.00018646539653494596, + "loss": 2.7873, + "step": 3434 + }, + { + "epoch": 0.27721733516261804, + "grad_norm": 0.7365398406982422, + "learning_rate": 0.0001864574646266252, + "loss": 2.6684, + "step": 3435 + }, + { + "epoch": 0.277298038899201, + "grad_norm": 0.7802249193191528, + "learning_rate": 0.00018644953056355846, + "loss": 2.7152, + "step": 3436 + }, + { + "epoch": 0.27737874263578405, + "grad_norm": 0.7801448106765747, + "learning_rate": 0.0001864415943459434, + "loss": 2.7034, + "step": 3437 + }, + { + "epoch": 0.277459446372367, + "grad_norm": 0.7722738981246948, + "learning_rate": 0.00018643365597397786, + "loss": 2.7135, + "step": 3438 + }, + { + "epoch": 0.27754015010895006, + "grad_norm": 0.7847445011138916, + "learning_rate": 0.00018642571544785967, + "loss": 2.6999, + "step": 3439 + }, + { + "epoch": 0.27762085384553303, + "grad_norm": 0.7226125597953796, + "learning_rate": 0.00018641777276778675, + "loss": 2.7613, + "step": 3440 + }, + { + "epoch": 0.27770155758211607, + "grad_norm": 0.713188111782074, + "learning_rate": 0.000186409827933957, + "loss": 2.6953, + "step": 3441 + }, + { + "epoch": 0.27778226131869904, + "grad_norm": 0.7308298349380493, + "learning_rate": 0.0001864018809465685, + "loss": 2.7045, + "step": 3442 + }, + { + "epoch": 0.2778629650552821, + "grad_norm": 0.7606719732284546, + "learning_rate": 0.00018639393180581925, + "loss": 2.7883, + "step": 3443 + }, + { + "epoch": 0.27794366879186505, + "grad_norm": 0.7583296895027161, + "learning_rate": 0.00018638598051190738, + "loss": 2.6734, + "step": 3444 + }, + { + "epoch": 0.2780243725284481, + "grad_norm": 0.7147012948989868, + "learning_rate": 0.00018637802706503108, + "loss": 2.7223, + "step": 3445 + }, + { + "epoch": 0.27810507626503106, + "grad_norm": 0.7812997102737427, + "learning_rate": 0.00018637007146538853, + "loss": 2.7277, + "step": 3446 + }, + { + "epoch": 0.2781857800016141, + "grad_norm": 0.7460772395133972, + "learning_rate": 0.000186362113713178, + "loss": 2.6875, + "step": 3447 + }, + { + "epoch": 0.27826648373819707, + "grad_norm": 0.7359143495559692, + "learning_rate": 0.0001863541538085979, + "loss": 2.7122, + "step": 3448 + }, + { + "epoch": 0.2783471874747801, + "grad_norm": 0.7122978568077087, + "learning_rate": 0.00018634619175184655, + "loss": 2.6381, + "step": 3449 + }, + { + "epoch": 0.2784278912113631, + "grad_norm": 0.6965885758399963, + "learning_rate": 0.00018633822754312234, + "loss": 2.6957, + "step": 3450 + }, + { + "epoch": 0.2785085949479461, + "grad_norm": 0.7737082242965698, + "learning_rate": 0.00018633026118262385, + "loss": 2.7579, + "step": 3451 + }, + { + "epoch": 0.2785892986845291, + "grad_norm": 0.6925420165061951, + "learning_rate": 0.00018632229267054958, + "loss": 2.6226, + "step": 3452 + }, + { + "epoch": 0.2786700024211121, + "grad_norm": 0.7496356964111328, + "learning_rate": 0.0001863143220070981, + "loss": 2.7059, + "step": 3453 + }, + { + "epoch": 0.2787507061576951, + "grad_norm": 0.7066817283630371, + "learning_rate": 0.0001863063491924681, + "loss": 2.681, + "step": 3454 + }, + { + "epoch": 0.2788314098942781, + "grad_norm": 0.8143237829208374, + "learning_rate": 0.0001862983742268583, + "loss": 2.6698, + "step": 3455 + }, + { + "epoch": 0.2789121136308611, + "grad_norm": 0.7518483996391296, + "learning_rate": 0.00018629039711046737, + "loss": 2.7041, + "step": 3456 + }, + { + "epoch": 0.27899281736744413, + "grad_norm": 0.8756366968154907, + "learning_rate": 0.00018628241784349422, + "loss": 2.7547, + "step": 3457 + }, + { + "epoch": 0.2790735211040271, + "grad_norm": 0.8709446787834167, + "learning_rate": 0.0001862744364261377, + "loss": 2.7068, + "step": 3458 + }, + { + "epoch": 0.27915422484061014, + "grad_norm": 0.8121913075447083, + "learning_rate": 0.00018626645285859666, + "loss": 2.673, + "step": 3459 + }, + { + "epoch": 0.2792349285771931, + "grad_norm": 0.7685909271240234, + "learning_rate": 0.00018625846714107012, + "loss": 2.7389, + "step": 3460 + }, + { + "epoch": 0.27931563231377615, + "grad_norm": 0.7098073363304138, + "learning_rate": 0.0001862504792737571, + "loss": 2.6942, + "step": 3461 + }, + { + "epoch": 0.27939633605035913, + "grad_norm": 0.7718049883842468, + "learning_rate": 0.00018624248925685666, + "loss": 2.7359, + "step": 3462 + }, + { + "epoch": 0.27947703978694216, + "grad_norm": 0.7912909984588623, + "learning_rate": 0.00018623449709056797, + "loss": 2.6658, + "step": 3463 + }, + { + "epoch": 0.27955774352352514, + "grad_norm": 0.7255454659461975, + "learning_rate": 0.0001862265027750902, + "loss": 2.771, + "step": 3464 + }, + { + "epoch": 0.27963844726010817, + "grad_norm": 0.7542218565940857, + "learning_rate": 0.00018621850631062254, + "loss": 2.6741, + "step": 3465 + }, + { + "epoch": 0.27971915099669115, + "grad_norm": 0.8386052846908569, + "learning_rate": 0.00018621050769736437, + "loss": 2.67, + "step": 3466 + }, + { + "epoch": 0.2797998547332742, + "grad_norm": 0.8563781976699829, + "learning_rate": 0.00018620250693551495, + "loss": 2.7461, + "step": 3467 + }, + { + "epoch": 0.27988055846985715, + "grad_norm": 0.7490699291229248, + "learning_rate": 0.00018619450402527376, + "loss": 2.6863, + "step": 3468 + }, + { + "epoch": 0.27996126220644013, + "grad_norm": 0.8008999824523926, + "learning_rate": 0.00018618649896684017, + "loss": 2.7769, + "step": 3469 + }, + { + "epoch": 0.28004196594302316, + "grad_norm": 0.7678235769271851, + "learning_rate": 0.00018617849176041378, + "loss": 2.7237, + "step": 3470 + }, + { + "epoch": 0.28012266967960614, + "grad_norm": 0.8774877786636353, + "learning_rate": 0.00018617048240619408, + "loss": 2.7502, + "step": 3471 + }, + { + "epoch": 0.28020337341618917, + "grad_norm": 0.8150283098220825, + "learning_rate": 0.00018616247090438073, + "loss": 2.6941, + "step": 3472 + }, + { + "epoch": 0.28028407715277215, + "grad_norm": 0.7330089807510376, + "learning_rate": 0.00018615445725517332, + "loss": 2.7002, + "step": 3473 + }, + { + "epoch": 0.2803647808893552, + "grad_norm": 0.748275101184845, + "learning_rate": 0.00018614644145877168, + "loss": 2.6996, + "step": 3474 + }, + { + "epoch": 0.28044548462593816, + "grad_norm": 0.7718296647071838, + "learning_rate": 0.0001861384235153755, + "loss": 2.7333, + "step": 3475 + }, + { + "epoch": 0.2805261883625212, + "grad_norm": 0.7751123309135437, + "learning_rate": 0.00018613040342518465, + "loss": 2.7362, + "step": 3476 + }, + { + "epoch": 0.28060689209910417, + "grad_norm": 0.70979243516922, + "learning_rate": 0.000186122381188399, + "loss": 2.6651, + "step": 3477 + }, + { + "epoch": 0.2806875958356872, + "grad_norm": 0.9607138633728027, + "learning_rate": 0.00018611435680521848, + "loss": 2.7779, + "step": 3478 + }, + { + "epoch": 0.2807682995722702, + "grad_norm": 0.709671676158905, + "learning_rate": 0.0001861063302758431, + "loss": 2.6994, + "step": 3479 + }, + { + "epoch": 0.2808490033088532, + "grad_norm": 0.8765757083892822, + "learning_rate": 0.00018609830160047283, + "loss": 2.7107, + "step": 3480 + }, + { + "epoch": 0.2809297070454362, + "grad_norm": 0.7996764183044434, + "learning_rate": 0.0001860902707793079, + "loss": 2.7921, + "step": 3481 + }, + { + "epoch": 0.2810104107820192, + "grad_norm": 0.7094513177871704, + "learning_rate": 0.0001860822378125483, + "loss": 2.7211, + "step": 3482 + }, + { + "epoch": 0.2810911145186022, + "grad_norm": 0.8068607449531555, + "learning_rate": 0.0001860742027003944, + "loss": 2.675, + "step": 3483 + }, + { + "epoch": 0.2811718182551852, + "grad_norm": 0.7737938165664673, + "learning_rate": 0.00018606616544304628, + "loss": 2.7538, + "step": 3484 + }, + { + "epoch": 0.2812525219917682, + "grad_norm": 0.7979975342750549, + "learning_rate": 0.0001860581260407044, + "loss": 2.7894, + "step": 3485 + }, + { + "epoch": 0.28133322572835123, + "grad_norm": 0.7671655416488647, + "learning_rate": 0.00018605008449356904, + "loss": 2.7097, + "step": 3486 + }, + { + "epoch": 0.2814139294649342, + "grad_norm": 0.7284159064292908, + "learning_rate": 0.00018604204080184062, + "loss": 2.7447, + "step": 3487 + }, + { + "epoch": 0.28149463320151724, + "grad_norm": 0.7425351142883301, + "learning_rate": 0.00018603399496571968, + "loss": 2.7302, + "step": 3488 + }, + { + "epoch": 0.2815753369381002, + "grad_norm": 0.7709810733795166, + "learning_rate": 0.00018602594698540663, + "loss": 2.6979, + "step": 3489 + }, + { + "epoch": 0.28165604067468325, + "grad_norm": 0.744628369808197, + "learning_rate": 0.00018601789686110214, + "loss": 2.7279, + "step": 3490 + }, + { + "epoch": 0.2817367444112662, + "grad_norm": 0.7679976224899292, + "learning_rate": 0.00018600984459300678, + "loss": 2.6862, + "step": 3491 + }, + { + "epoch": 0.28181744814784926, + "grad_norm": 0.7923497557640076, + "learning_rate": 0.0001860017901813213, + "loss": 2.6975, + "step": 3492 + }, + { + "epoch": 0.28189815188443224, + "grad_norm": 0.7896692156791687, + "learning_rate": 0.00018599373362624636, + "loss": 2.7052, + "step": 3493 + }, + { + "epoch": 0.28197885562101527, + "grad_norm": 0.7913276553153992, + "learning_rate": 0.00018598567492798284, + "loss": 2.7233, + "step": 3494 + }, + { + "epoch": 0.28205955935759824, + "grad_norm": 0.7385257482528687, + "learning_rate": 0.00018597761408673146, + "loss": 2.7616, + "step": 3495 + }, + { + "epoch": 0.2821402630941813, + "grad_norm": 0.7181909084320068, + "learning_rate": 0.00018596955110269323, + "loss": 2.718, + "step": 3496 + }, + { + "epoch": 0.28222096683076425, + "grad_norm": 0.8313151597976685, + "learning_rate": 0.00018596148597606907, + "loss": 2.6775, + "step": 3497 + }, + { + "epoch": 0.2823016705673473, + "grad_norm": 0.7235481142997742, + "learning_rate": 0.00018595341870705995, + "loss": 2.7085, + "step": 3498 + }, + { + "epoch": 0.28238237430393026, + "grad_norm": 0.7092145085334778, + "learning_rate": 0.00018594534929586697, + "loss": 2.7167, + "step": 3499 + }, + { + "epoch": 0.2824630780405133, + "grad_norm": 0.7929207682609558, + "learning_rate": 0.0001859372777426912, + "loss": 2.663, + "step": 3500 + }, + { + "epoch": 0.28254378177709627, + "grad_norm": 0.7488871216773987, + "learning_rate": 0.00018592920404773383, + "loss": 2.7911, + "step": 3501 + }, + { + "epoch": 0.2826244855136793, + "grad_norm": 0.8230419158935547, + "learning_rate": 0.0001859211282111961, + "loss": 2.754, + "step": 3502 + }, + { + "epoch": 0.2827051892502623, + "grad_norm": 0.731971025466919, + "learning_rate": 0.00018591305023327924, + "loss": 2.7142, + "step": 3503 + }, + { + "epoch": 0.2827858929868453, + "grad_norm": 0.8159881234169006, + "learning_rate": 0.00018590497011418457, + "loss": 2.7046, + "step": 3504 + }, + { + "epoch": 0.2828665967234283, + "grad_norm": 0.750266432762146, + "learning_rate": 0.0001858968878541135, + "loss": 2.6951, + "step": 3505 + }, + { + "epoch": 0.2829473004600113, + "grad_norm": 0.7750049233436584, + "learning_rate": 0.00018588880345326748, + "loss": 2.6958, + "step": 3506 + }, + { + "epoch": 0.2830280041965943, + "grad_norm": 0.8559218049049377, + "learning_rate": 0.00018588071691184795, + "loss": 2.7205, + "step": 3507 + }, + { + "epoch": 0.28310870793317733, + "grad_norm": 0.7334830164909363, + "learning_rate": 0.00018587262823005642, + "loss": 2.7134, + "step": 3508 + }, + { + "epoch": 0.2831894116697603, + "grad_norm": 0.8749497532844543, + "learning_rate": 0.00018586453740809456, + "loss": 2.6811, + "step": 3509 + }, + { + "epoch": 0.28327011540634334, + "grad_norm": 0.8800753355026245, + "learning_rate": 0.00018585644444616396, + "loss": 2.7427, + "step": 3510 + }, + { + "epoch": 0.2833508191429263, + "grad_norm": 0.8666185736656189, + "learning_rate": 0.00018584834934446632, + "loss": 2.6828, + "step": 3511 + }, + { + "epoch": 0.28343152287950935, + "grad_norm": 0.7451635003089905, + "learning_rate": 0.00018584025210320343, + "loss": 2.6784, + "step": 3512 + }, + { + "epoch": 0.2835122266160923, + "grad_norm": 0.8512656688690186, + "learning_rate": 0.00018583215272257708, + "loss": 2.7762, + "step": 3513 + }, + { + "epoch": 0.28359293035267535, + "grad_norm": 0.9298297166824341, + "learning_rate": 0.00018582405120278907, + "loss": 2.7714, + "step": 3514 + }, + { + "epoch": 0.28367363408925833, + "grad_norm": 0.7968065738677979, + "learning_rate": 0.0001858159475440414, + "loss": 2.7286, + "step": 3515 + }, + { + "epoch": 0.28375433782584136, + "grad_norm": 0.7381564378738403, + "learning_rate": 0.00018580784174653596, + "loss": 2.6697, + "step": 3516 + }, + { + "epoch": 0.28383504156242434, + "grad_norm": 0.8199222683906555, + "learning_rate": 0.00018579973381047481, + "loss": 2.7463, + "step": 3517 + }, + { + "epoch": 0.28391574529900737, + "grad_norm": 0.8022071123123169, + "learning_rate": 0.00018579162373606002, + "loss": 2.6898, + "step": 3518 + }, + { + "epoch": 0.28399644903559035, + "grad_norm": 0.7899700999259949, + "learning_rate": 0.0001857835115234937, + "loss": 2.7074, + "step": 3519 + }, + { + "epoch": 0.2840771527721733, + "grad_norm": 0.7237183451652527, + "learning_rate": 0.00018577539717297805, + "loss": 2.6699, + "step": 3520 + }, + { + "epoch": 0.28415785650875636, + "grad_norm": 0.7627314329147339, + "learning_rate": 0.00018576728068471526, + "loss": 2.7745, + "step": 3521 + }, + { + "epoch": 0.28423856024533933, + "grad_norm": 0.7301654815673828, + "learning_rate": 0.00018575916205890766, + "loss": 2.7191, + "step": 3522 + }, + { + "epoch": 0.28431926398192237, + "grad_norm": 0.7441647052764893, + "learning_rate": 0.00018575104129575753, + "loss": 2.7529, + "step": 3523 + }, + { + "epoch": 0.28439996771850534, + "grad_norm": 0.7715914249420166, + "learning_rate": 0.0001857429183954673, + "loss": 2.6893, + "step": 3524 + }, + { + "epoch": 0.2844806714550884, + "grad_norm": 0.7464057207107544, + "learning_rate": 0.00018573479335823944, + "loss": 2.7169, + "step": 3525 + }, + { + "epoch": 0.28456137519167135, + "grad_norm": 0.753198504447937, + "learning_rate": 0.00018572666618427638, + "loss": 2.7144, + "step": 3526 + }, + { + "epoch": 0.2846420789282544, + "grad_norm": 0.7681953310966492, + "learning_rate": 0.00018571853687378073, + "loss": 2.709, + "step": 3527 + }, + { + "epoch": 0.28472278266483736, + "grad_norm": 0.7591876983642578, + "learning_rate": 0.0001857104054269551, + "loss": 2.7519, + "step": 3528 + }, + { + "epoch": 0.2848034864014204, + "grad_norm": 0.7417709827423096, + "learning_rate": 0.00018570227184400205, + "loss": 2.6756, + "step": 3529 + }, + { + "epoch": 0.28488419013800337, + "grad_norm": 0.7641329169273376, + "learning_rate": 0.0001856941361251244, + "loss": 2.6614, + "step": 3530 + }, + { + "epoch": 0.2849648938745864, + "grad_norm": 0.7813490033149719, + "learning_rate": 0.0001856859982705249, + "loss": 2.7145, + "step": 3531 + }, + { + "epoch": 0.2850455976111694, + "grad_norm": 0.7777202129364014, + "learning_rate": 0.00018567785828040628, + "loss": 2.7015, + "step": 3532 + }, + { + "epoch": 0.2851263013477524, + "grad_norm": 0.7647144794464111, + "learning_rate": 0.0001856697161549715, + "loss": 2.7311, + "step": 3533 + }, + { + "epoch": 0.2852070050843354, + "grad_norm": 0.7477256655693054, + "learning_rate": 0.00018566157189442342, + "loss": 2.6832, + "step": 3534 + }, + { + "epoch": 0.2852877088209184, + "grad_norm": 0.7037049531936646, + "learning_rate": 0.00018565342549896506, + "loss": 2.6942, + "step": 3535 + }, + { + "epoch": 0.2853684125575014, + "grad_norm": 0.7309197783470154, + "learning_rate": 0.00018564527696879945, + "loss": 2.6797, + "step": 3536 + }, + { + "epoch": 0.2854491162940844, + "grad_norm": 0.798075795173645, + "learning_rate": 0.00018563712630412967, + "loss": 2.6926, + "step": 3537 + }, + { + "epoch": 0.2855298200306674, + "grad_norm": 0.7831682562828064, + "learning_rate": 0.0001856289735051588, + "loss": 2.7537, + "step": 3538 + }, + { + "epoch": 0.28561052376725043, + "grad_norm": 0.7983096241950989, + "learning_rate": 0.0001856208185720901, + "loss": 2.7037, + "step": 3539 + }, + { + "epoch": 0.2856912275038334, + "grad_norm": 0.7250573635101318, + "learning_rate": 0.00018561266150512678, + "loss": 2.7282, + "step": 3540 + }, + { + "epoch": 0.28577193124041644, + "grad_norm": 0.7800211906433105, + "learning_rate": 0.00018560450230447218, + "loss": 2.6541, + "step": 3541 + }, + { + "epoch": 0.2858526349769994, + "grad_norm": 0.7624209523200989, + "learning_rate": 0.00018559634097032953, + "loss": 2.7041, + "step": 3542 + }, + { + "epoch": 0.28593333871358245, + "grad_norm": 0.7212036848068237, + "learning_rate": 0.0001855881775029024, + "loss": 2.7287, + "step": 3543 + }, + { + "epoch": 0.28601404245016543, + "grad_norm": 0.7774164080619812, + "learning_rate": 0.00018558001190239408, + "loss": 2.6515, + "step": 3544 + }, + { + "epoch": 0.28609474618674846, + "grad_norm": 0.7169588208198547, + "learning_rate": 0.0001855718441690082, + "loss": 2.7111, + "step": 3545 + }, + { + "epoch": 0.28617544992333144, + "grad_norm": 0.7473909258842468, + "learning_rate": 0.00018556367430294827, + "loss": 2.7405, + "step": 3546 + }, + { + "epoch": 0.28625615365991447, + "grad_norm": 0.7213929295539856, + "learning_rate": 0.0001855555023044179, + "loss": 2.7336, + "step": 3547 + }, + { + "epoch": 0.28633685739649745, + "grad_norm": 0.701816201210022, + "learning_rate": 0.00018554732817362078, + "loss": 2.721, + "step": 3548 + }, + { + "epoch": 0.2864175611330805, + "grad_norm": 0.8158134818077087, + "learning_rate": 0.00018553915191076064, + "loss": 2.6979, + "step": 3549 + }, + { + "epoch": 0.28649826486966345, + "grad_norm": 0.7303084135055542, + "learning_rate": 0.00018553097351604118, + "loss": 2.6734, + "step": 3550 + }, + { + "epoch": 0.2865789686062465, + "grad_norm": 0.8140435814857483, + "learning_rate": 0.00018552279298966634, + "loss": 2.6832, + "step": 3551 + }, + { + "epoch": 0.28665967234282946, + "grad_norm": 0.7024678587913513, + "learning_rate": 0.00018551461033183988, + "loss": 2.7118, + "step": 3552 + }, + { + "epoch": 0.2867403760794125, + "grad_norm": 0.7277806401252747, + "learning_rate": 0.00018550642554276582, + "loss": 2.6362, + "step": 3553 + }, + { + "epoch": 0.28682107981599547, + "grad_norm": 0.8376575112342834, + "learning_rate": 0.00018549823862264812, + "loss": 2.744, + "step": 3554 + }, + { + "epoch": 0.2869017835525785, + "grad_norm": 0.712195098400116, + "learning_rate": 0.00018549004957169082, + "loss": 2.6715, + "step": 3555 + }, + { + "epoch": 0.2869824872891615, + "grad_norm": 0.7511523962020874, + "learning_rate": 0.00018548185839009805, + "loss": 2.7655, + "step": 3556 + }, + { + "epoch": 0.2870631910257445, + "grad_norm": 0.7397211790084839, + "learning_rate": 0.00018547366507807388, + "loss": 2.6813, + "step": 3557 + }, + { + "epoch": 0.2871438947623275, + "grad_norm": 0.6926341652870178, + "learning_rate": 0.00018546546963582253, + "loss": 2.6477, + "step": 3558 + }, + { + "epoch": 0.2872245984989105, + "grad_norm": 0.7776244878768921, + "learning_rate": 0.00018545727206354827, + "loss": 2.6979, + "step": 3559 + }, + { + "epoch": 0.2873053022354935, + "grad_norm": 0.7639400959014893, + "learning_rate": 0.00018544907236145542, + "loss": 2.6913, + "step": 3560 + }, + { + "epoch": 0.28738600597207653, + "grad_norm": 0.7738329768180847, + "learning_rate": 0.0001854408705297483, + "loss": 2.7231, + "step": 3561 + }, + { + "epoch": 0.2874667097086595, + "grad_norm": 0.7182422876358032, + "learning_rate": 0.00018543266656863137, + "loss": 2.718, + "step": 3562 + }, + { + "epoch": 0.28754741344524254, + "grad_norm": 0.7257261276245117, + "learning_rate": 0.00018542446047830903, + "loss": 2.7354, + "step": 3563 + }, + { + "epoch": 0.2876281171818255, + "grad_norm": 0.7761391997337341, + "learning_rate": 0.00018541625225898588, + "loss": 2.705, + "step": 3564 + }, + { + "epoch": 0.28770882091840855, + "grad_norm": 0.9272314310073853, + "learning_rate": 0.0001854080419108664, + "loss": 2.7278, + "step": 3565 + }, + { + "epoch": 0.2877895246549915, + "grad_norm": 0.7622589468955994, + "learning_rate": 0.00018539982943415527, + "loss": 2.7224, + "step": 3566 + }, + { + "epoch": 0.28787022839157456, + "grad_norm": 0.725349485874176, + "learning_rate": 0.0001853916148290572, + "loss": 2.6782, + "step": 3567 + }, + { + "epoch": 0.28795093212815753, + "grad_norm": 0.776242733001709, + "learning_rate": 0.0001853833980957768, + "loss": 2.6467, + "step": 3568 + }, + { + "epoch": 0.28803163586474057, + "grad_norm": 0.8461112976074219, + "learning_rate": 0.00018537517923451896, + "loss": 2.6763, + "step": 3569 + }, + { + "epoch": 0.28811233960132354, + "grad_norm": 0.8161221742630005, + "learning_rate": 0.00018536695824548848, + "loss": 2.7057, + "step": 3570 + }, + { + "epoch": 0.2881930433379065, + "grad_norm": 0.7404211759567261, + "learning_rate": 0.00018535873512889024, + "loss": 2.7083, + "step": 3571 + }, + { + "epoch": 0.28827374707448955, + "grad_norm": 0.831042468547821, + "learning_rate": 0.00018535050988492918, + "loss": 2.6121, + "step": 3572 + }, + { + "epoch": 0.2883544508110725, + "grad_norm": 0.7286352515220642, + "learning_rate": 0.00018534228251381035, + "loss": 2.7165, + "step": 3573 + }, + { + "epoch": 0.28843515454765556, + "grad_norm": 0.7951883673667908, + "learning_rate": 0.00018533405301573872, + "loss": 2.6794, + "step": 3574 + }, + { + "epoch": 0.28851585828423854, + "grad_norm": 0.7431079149246216, + "learning_rate": 0.00018532582139091944, + "loss": 2.6758, + "step": 3575 + }, + { + "epoch": 0.28859656202082157, + "grad_norm": 0.7408809065818787, + "learning_rate": 0.0001853175876395576, + "loss": 2.6901, + "step": 3576 + }, + { + "epoch": 0.28867726575740454, + "grad_norm": 0.7428708672523499, + "learning_rate": 0.00018530935176185848, + "loss": 2.6679, + "step": 3577 + }, + { + "epoch": 0.2887579694939876, + "grad_norm": 0.7670302987098694, + "learning_rate": 0.00018530111375802735, + "loss": 2.7306, + "step": 3578 + }, + { + "epoch": 0.28883867323057055, + "grad_norm": 0.7582474946975708, + "learning_rate": 0.00018529287362826943, + "loss": 2.7715, + "step": 3579 + }, + { + "epoch": 0.2889193769671536, + "grad_norm": 0.750973105430603, + "learning_rate": 0.0001852846313727902, + "loss": 2.7147, + "step": 3580 + }, + { + "epoch": 0.28900008070373656, + "grad_norm": 0.771854043006897, + "learning_rate": 0.00018527638699179498, + "loss": 2.6874, + "step": 3581 + }, + { + "epoch": 0.2890807844403196, + "grad_norm": 0.785469651222229, + "learning_rate": 0.00018526814048548928, + "loss": 2.6858, + "step": 3582 + }, + { + "epoch": 0.28916148817690257, + "grad_norm": 0.7601101398468018, + "learning_rate": 0.00018525989185407864, + "loss": 2.6927, + "step": 3583 + }, + { + "epoch": 0.2892421919134856, + "grad_norm": 0.7313411831855774, + "learning_rate": 0.00018525164109776861, + "loss": 2.6813, + "step": 3584 + }, + { + "epoch": 0.2893228956500686, + "grad_norm": 0.7471718192100525, + "learning_rate": 0.00018524338821676483, + "loss": 2.6791, + "step": 3585 + }, + { + "epoch": 0.2894035993866516, + "grad_norm": 0.7615204453468323, + "learning_rate": 0.00018523513321127302, + "loss": 2.7767, + "step": 3586 + }, + { + "epoch": 0.2894843031232346, + "grad_norm": 0.766793966293335, + "learning_rate": 0.00018522687608149886, + "loss": 2.664, + "step": 3587 + }, + { + "epoch": 0.2895650068598176, + "grad_norm": 0.7897932529449463, + "learning_rate": 0.00018521861682764816, + "loss": 2.7148, + "step": 3588 + }, + { + "epoch": 0.2896457105964006, + "grad_norm": 0.7366818785667419, + "learning_rate": 0.00018521035544992679, + "loss": 2.69, + "step": 3589 + }, + { + "epoch": 0.28972641433298363, + "grad_norm": 0.7503829598426819, + "learning_rate": 0.00018520209194854058, + "loss": 2.7141, + "step": 3590 + }, + { + "epoch": 0.2898071180695666, + "grad_norm": 0.8064351081848145, + "learning_rate": 0.00018519382632369556, + "loss": 2.6738, + "step": 3591 + }, + { + "epoch": 0.28988782180614964, + "grad_norm": 0.7364048361778259, + "learning_rate": 0.00018518555857559768, + "loss": 2.6731, + "step": 3592 + }, + { + "epoch": 0.2899685255427326, + "grad_norm": 0.7065430283546448, + "learning_rate": 0.00018517728870445297, + "loss": 2.7314, + "step": 3593 + }, + { + "epoch": 0.29004922927931565, + "grad_norm": 0.8233428001403809, + "learning_rate": 0.0001851690167104676, + "loss": 2.727, + "step": 3594 + }, + { + "epoch": 0.2901299330158986, + "grad_norm": 0.7563758492469788, + "learning_rate": 0.00018516074259384768, + "loss": 2.665, + "step": 3595 + }, + { + "epoch": 0.29021063675248165, + "grad_norm": 0.7451249361038208, + "learning_rate": 0.00018515246635479943, + "loss": 2.7686, + "step": 3596 + }, + { + "epoch": 0.29029134048906463, + "grad_norm": 0.7374305725097656, + "learning_rate": 0.00018514418799352918, + "loss": 2.6466, + "step": 3597 + }, + { + "epoch": 0.29037204422564766, + "grad_norm": 0.7596983909606934, + "learning_rate": 0.00018513590751024315, + "loss": 2.6763, + "step": 3598 + }, + { + "epoch": 0.29045274796223064, + "grad_norm": 0.7808190584182739, + "learning_rate": 0.0001851276249051478, + "loss": 2.7362, + "step": 3599 + }, + { + "epoch": 0.29053345169881367, + "grad_norm": 0.765785276889801, + "learning_rate": 0.00018511934017844948, + "loss": 2.7049, + "step": 3600 + }, + { + "epoch": 0.29061415543539665, + "grad_norm": 0.7503563165664673, + "learning_rate": 0.0001851110533303547, + "loss": 2.6262, + "step": 3601 + }, + { + "epoch": 0.2906948591719797, + "grad_norm": 0.7287782430648804, + "learning_rate": 0.00018510276436107, + "loss": 2.7076, + "step": 3602 + }, + { + "epoch": 0.29077556290856266, + "grad_norm": 0.7748721837997437, + "learning_rate": 0.00018509447327080193, + "loss": 2.6945, + "step": 3603 + }, + { + "epoch": 0.2908562666451457, + "grad_norm": 0.7482423186302185, + "learning_rate": 0.00018508618005975714, + "loss": 2.7326, + "step": 3604 + }, + { + "epoch": 0.29093697038172867, + "grad_norm": 0.7708765864372253, + "learning_rate": 0.00018507788472814238, + "loss": 2.7602, + "step": 3605 + }, + { + "epoch": 0.2910176741183117, + "grad_norm": 0.7308060526847839, + "learning_rate": 0.0001850695872761643, + "loss": 2.6735, + "step": 3606 + }, + { + "epoch": 0.2910983778548947, + "grad_norm": 0.7512951493263245, + "learning_rate": 0.00018506128770402972, + "loss": 2.6877, + "step": 3607 + }, + { + "epoch": 0.2911790815914777, + "grad_norm": 0.6806616187095642, + "learning_rate": 0.00018505298601194552, + "loss": 2.6689, + "step": 3608 + }, + { + "epoch": 0.2912597853280607, + "grad_norm": 0.7825661301612854, + "learning_rate": 0.00018504468220011857, + "loss": 2.7108, + "step": 3609 + }, + { + "epoch": 0.2913404890646437, + "grad_norm": 0.8243381977081299, + "learning_rate": 0.00018503637626875584, + "loss": 2.6789, + "step": 3610 + }, + { + "epoch": 0.2914211928012267, + "grad_norm": 0.745012640953064, + "learning_rate": 0.00018502806821806429, + "loss": 2.7658, + "step": 3611 + }, + { + "epoch": 0.2915018965378097, + "grad_norm": 0.7091341018676758, + "learning_rate": 0.00018501975804825104, + "loss": 2.7046, + "step": 3612 + }, + { + "epoch": 0.2915826002743927, + "grad_norm": 0.729026734828949, + "learning_rate": 0.0001850114457595232, + "loss": 2.6692, + "step": 3613 + }, + { + "epoch": 0.29166330401097573, + "grad_norm": 0.8098071813583374, + "learning_rate": 0.00018500313135208786, + "loss": 2.712, + "step": 3614 + }, + { + "epoch": 0.2917440077475587, + "grad_norm": 0.7387483716011047, + "learning_rate": 0.0001849948148261523, + "loss": 2.6705, + "step": 3615 + }, + { + "epoch": 0.29182471148414174, + "grad_norm": 0.7904576659202576, + "learning_rate": 0.0001849864961819238, + "loss": 2.5969, + "step": 3616 + }, + { + "epoch": 0.2919054152207247, + "grad_norm": 0.7560681700706482, + "learning_rate": 0.00018497817541960964, + "loss": 2.6971, + "step": 3617 + }, + { + "epoch": 0.29198611895730775, + "grad_norm": 0.8488430976867676, + "learning_rate": 0.00018496985253941723, + "loss": 2.7367, + "step": 3618 + }, + { + "epoch": 0.2920668226938907, + "grad_norm": 0.7641268372535706, + "learning_rate": 0.00018496152754155399, + "loss": 2.6948, + "step": 3619 + }, + { + "epoch": 0.29214752643047376, + "grad_norm": 0.7219721674919128, + "learning_rate": 0.00018495320042622736, + "loss": 2.7225, + "step": 3620 + }, + { + "epoch": 0.29222823016705674, + "grad_norm": 0.7583872675895691, + "learning_rate": 0.00018494487119364493, + "loss": 2.7335, + "step": 3621 + }, + { + "epoch": 0.2923089339036397, + "grad_norm": 0.7771418690681458, + "learning_rate": 0.00018493653984401424, + "loss": 2.6712, + "step": 3622 + }, + { + "epoch": 0.29238963764022274, + "grad_norm": 0.7537891268730164, + "learning_rate": 0.00018492820637754296, + "loss": 2.7282, + "step": 3623 + }, + { + "epoch": 0.2924703413768057, + "grad_norm": 0.7334226965904236, + "learning_rate": 0.00018491987079443875, + "loss": 2.7072, + "step": 3624 + }, + { + "epoch": 0.29255104511338875, + "grad_norm": 0.7768076658248901, + "learning_rate": 0.00018491153309490942, + "loss": 2.7176, + "step": 3625 + }, + { + "epoch": 0.29263174884997173, + "grad_norm": 0.6831281185150146, + "learning_rate": 0.0001849031932791627, + "loss": 2.6982, + "step": 3626 + }, + { + "epoch": 0.29271245258655476, + "grad_norm": 0.7150557637214661, + "learning_rate": 0.00018489485134740648, + "loss": 2.7325, + "step": 3627 + }, + { + "epoch": 0.29279315632313774, + "grad_norm": 0.782667338848114, + "learning_rate": 0.00018488650729984863, + "loss": 2.7146, + "step": 3628 + }, + { + "epoch": 0.29287386005972077, + "grad_norm": 0.7718524932861328, + "learning_rate": 0.0001848781611366971, + "loss": 2.746, + "step": 3629 + }, + { + "epoch": 0.29295456379630375, + "grad_norm": 0.7066439390182495, + "learning_rate": 0.00018486981285815998, + "loss": 2.7497, + "step": 3630 + }, + { + "epoch": 0.2930352675328868, + "grad_norm": 0.7705665826797485, + "learning_rate": 0.00018486146246444522, + "loss": 2.6448, + "step": 3631 + }, + { + "epoch": 0.29311597126946976, + "grad_norm": 0.7334863543510437, + "learning_rate": 0.000184853109955761, + "loss": 2.6931, + "step": 3632 + }, + { + "epoch": 0.2931966750060528, + "grad_norm": 0.7903133630752563, + "learning_rate": 0.0001848447553323155, + "loss": 2.6954, + "step": 3633 + }, + { + "epoch": 0.29327737874263576, + "grad_norm": 0.6821191310882568, + "learning_rate": 0.00018483639859431689, + "loss": 2.6165, + "step": 3634 + }, + { + "epoch": 0.2933580824792188, + "grad_norm": 0.7187811136245728, + "learning_rate": 0.00018482803974197344, + "loss": 2.6387, + "step": 3635 + }, + { + "epoch": 0.2934387862158018, + "grad_norm": 0.7429843544960022, + "learning_rate": 0.00018481967877549354, + "loss": 2.6848, + "step": 3636 + }, + { + "epoch": 0.2935194899523848, + "grad_norm": 0.7431524395942688, + "learning_rate": 0.0001848113156950855, + "loss": 2.7044, + "step": 3637 + }, + { + "epoch": 0.2936001936889678, + "grad_norm": 0.7008687853813171, + "learning_rate": 0.00018480295050095778, + "loss": 2.6922, + "step": 3638 + }, + { + "epoch": 0.2936808974255508, + "grad_norm": 0.7106652855873108, + "learning_rate": 0.00018479458319331884, + "loss": 2.6845, + "step": 3639 + }, + { + "epoch": 0.2937616011621338, + "grad_norm": 0.7288951873779297, + "learning_rate": 0.00018478621377237723, + "loss": 2.7017, + "step": 3640 + }, + { + "epoch": 0.2938423048987168, + "grad_norm": 0.7228607535362244, + "learning_rate": 0.00018477784223834155, + "loss": 2.7449, + "step": 3641 + }, + { + "epoch": 0.2939230086352998, + "grad_norm": 0.7180825471878052, + "learning_rate": 0.00018476946859142043, + "loss": 2.7291, + "step": 3642 + }, + { + "epoch": 0.29400371237188283, + "grad_norm": 0.7854947447776794, + "learning_rate": 0.00018476109283182258, + "loss": 2.7619, + "step": 3643 + }, + { + "epoch": 0.2940844161084658, + "grad_norm": 0.7871318459510803, + "learning_rate": 0.00018475271495975673, + "loss": 2.6695, + "step": 3644 + }, + { + "epoch": 0.29416511984504884, + "grad_norm": 0.7813127636909485, + "learning_rate": 0.00018474433497543165, + "loss": 2.735, + "step": 3645 + }, + { + "epoch": 0.2942458235816318, + "grad_norm": 0.7835291028022766, + "learning_rate": 0.00018473595287905623, + "loss": 2.7336, + "step": 3646 + }, + { + "epoch": 0.29432652731821485, + "grad_norm": 0.6970148682594299, + "learning_rate": 0.00018472756867083935, + "loss": 2.6912, + "step": 3647 + }, + { + "epoch": 0.2944072310547978, + "grad_norm": 0.7968462109565735, + "learning_rate": 0.00018471918235098998, + "loss": 2.6889, + "step": 3648 + }, + { + "epoch": 0.29448793479138086, + "grad_norm": 0.7011313438415527, + "learning_rate": 0.00018471079391971714, + "loss": 2.6989, + "step": 3649 + }, + { + "epoch": 0.29456863852796383, + "grad_norm": 0.8047335743904114, + "learning_rate": 0.00018470240337722991, + "loss": 2.6827, + "step": 3650 + }, + { + "epoch": 0.29464934226454687, + "grad_norm": 0.7446332573890686, + "learning_rate": 0.00018469401072373733, + "loss": 2.7089, + "step": 3651 + }, + { + "epoch": 0.29473004600112984, + "grad_norm": 0.7610359191894531, + "learning_rate": 0.00018468561595944862, + "loss": 2.6766, + "step": 3652 + }, + { + "epoch": 0.2948107497377129, + "grad_norm": 0.7705755233764648, + "learning_rate": 0.000184677219084573, + "loss": 2.7445, + "step": 3653 + }, + { + "epoch": 0.29489145347429585, + "grad_norm": 0.7466446757316589, + "learning_rate": 0.00018466882009931973, + "loss": 2.726, + "step": 3654 + }, + { + "epoch": 0.2949721572108789, + "grad_norm": 0.7912059426307678, + "learning_rate": 0.00018466041900389813, + "loss": 2.6865, + "step": 3655 + }, + { + "epoch": 0.29505286094746186, + "grad_norm": 0.722588837146759, + "learning_rate": 0.00018465201579851757, + "loss": 2.7039, + "step": 3656 + }, + { + "epoch": 0.2951335646840449, + "grad_norm": 0.739311933517456, + "learning_rate": 0.00018464361048338752, + "loss": 2.6991, + "step": 3657 + }, + { + "epoch": 0.29521426842062787, + "grad_norm": 0.7784128785133362, + "learning_rate": 0.00018463520305871743, + "loss": 2.753, + "step": 3658 + }, + { + "epoch": 0.2952949721572109, + "grad_norm": 0.8261777758598328, + "learning_rate": 0.00018462679352471682, + "loss": 2.7257, + "step": 3659 + }, + { + "epoch": 0.2953756758937939, + "grad_norm": 0.7510927319526672, + "learning_rate": 0.0001846183818815953, + "loss": 2.6981, + "step": 3660 + }, + { + "epoch": 0.2954563796303769, + "grad_norm": 0.7403035163879395, + "learning_rate": 0.00018460996812956254, + "loss": 2.744, + "step": 3661 + }, + { + "epoch": 0.2955370833669599, + "grad_norm": 0.7927733063697815, + "learning_rate": 0.00018460155226882817, + "loss": 2.6304, + "step": 3662 + }, + { + "epoch": 0.2956177871035429, + "grad_norm": 0.7923495769500732, + "learning_rate": 0.000184593134299602, + "loss": 2.7882, + "step": 3663 + }, + { + "epoch": 0.2956984908401259, + "grad_norm": 0.7639210224151611, + "learning_rate": 0.00018458471422209377, + "loss": 2.7171, + "step": 3664 + }, + { + "epoch": 0.2957791945767089, + "grad_norm": 0.736652672290802, + "learning_rate": 0.00018457629203651337, + "loss": 2.7479, + "step": 3665 + }, + { + "epoch": 0.2958598983132919, + "grad_norm": 0.7718610763549805, + "learning_rate": 0.00018456786774307066, + "loss": 2.7135, + "step": 3666 + }, + { + "epoch": 0.29594060204987493, + "grad_norm": 0.7711780071258545, + "learning_rate": 0.00018455944134197565, + "loss": 2.6867, + "step": 3667 + }, + { + "epoch": 0.2960213057864579, + "grad_norm": 0.7202491760253906, + "learning_rate": 0.0001845510128334383, + "loss": 2.6657, + "step": 3668 + }, + { + "epoch": 0.29610200952304094, + "grad_norm": 0.8155657649040222, + "learning_rate": 0.00018454258221766869, + "loss": 2.7342, + "step": 3669 + }, + { + "epoch": 0.2961827132596239, + "grad_norm": 0.7972069382667542, + "learning_rate": 0.00018453414949487696, + "loss": 2.7351, + "step": 3670 + }, + { + "epoch": 0.29626341699620695, + "grad_norm": 0.8645625710487366, + "learning_rate": 0.00018452571466527325, + "loss": 2.6778, + "step": 3671 + }, + { + "epoch": 0.29634412073278993, + "grad_norm": 0.7410334944725037, + "learning_rate": 0.00018451727772906775, + "loss": 2.7228, + "step": 3672 + }, + { + "epoch": 0.2964248244693729, + "grad_norm": 0.7845733165740967, + "learning_rate": 0.0001845088386864708, + "loss": 2.7068, + "step": 3673 + }, + { + "epoch": 0.29650552820595594, + "grad_norm": 0.7709881067276001, + "learning_rate": 0.00018450039753769266, + "loss": 2.676, + "step": 3674 + }, + { + "epoch": 0.2965862319425389, + "grad_norm": 0.7214749455451965, + "learning_rate": 0.00018449195428294371, + "loss": 2.6488, + "step": 3675 + }, + { + "epoch": 0.29666693567912195, + "grad_norm": 0.7467561960220337, + "learning_rate": 0.00018448350892243443, + "loss": 2.7262, + "step": 3676 + }, + { + "epoch": 0.2967476394157049, + "grad_norm": 0.8412678241729736, + "learning_rate": 0.00018447506145637522, + "loss": 2.7898, + "step": 3677 + }, + { + "epoch": 0.29682834315228795, + "grad_norm": 0.7130109071731567, + "learning_rate": 0.00018446661188497668, + "loss": 2.7344, + "step": 3678 + }, + { + "epoch": 0.29690904688887093, + "grad_norm": 0.7807374000549316, + "learning_rate": 0.00018445816020844937, + "loss": 2.7198, + "step": 3679 + }, + { + "epoch": 0.29698975062545396, + "grad_norm": 0.8497760891914368, + "learning_rate": 0.00018444970642700394, + "loss": 2.7479, + "step": 3680 + }, + { + "epoch": 0.29707045436203694, + "grad_norm": 0.6827178001403809, + "learning_rate": 0.0001844412505408511, + "loss": 2.727, + "step": 3681 + }, + { + "epoch": 0.29715115809861997, + "grad_norm": 0.8063304424285889, + "learning_rate": 0.00018443279255020152, + "loss": 2.7896, + "step": 3682 + }, + { + "epoch": 0.29723186183520295, + "grad_norm": 0.7759353518486023, + "learning_rate": 0.00018442433245526604, + "loss": 2.7014, + "step": 3683 + }, + { + "epoch": 0.297312565571786, + "grad_norm": 0.7380958199501038, + "learning_rate": 0.00018441587025625554, + "loss": 2.6665, + "step": 3684 + }, + { + "epoch": 0.29739326930836896, + "grad_norm": 0.7623556852340698, + "learning_rate": 0.00018440740595338087, + "loss": 2.6955, + "step": 3685 + }, + { + "epoch": 0.297473973044952, + "grad_norm": 0.8204537630081177, + "learning_rate": 0.000184398939546853, + "loss": 2.6854, + "step": 3686 + }, + { + "epoch": 0.29755467678153497, + "grad_norm": 0.7346726655960083, + "learning_rate": 0.00018439047103688293, + "loss": 2.6664, + "step": 3687 + }, + { + "epoch": 0.297635380518118, + "grad_norm": 0.777860701084137, + "learning_rate": 0.00018438200042368173, + "loss": 2.6423, + "step": 3688 + }, + { + "epoch": 0.297716084254701, + "grad_norm": 0.7331553101539612, + "learning_rate": 0.00018437352770746054, + "loss": 2.6137, + "step": 3689 + }, + { + "epoch": 0.297796787991284, + "grad_norm": 0.7634466290473938, + "learning_rate": 0.00018436505288843043, + "loss": 2.7266, + "step": 3690 + }, + { + "epoch": 0.297877491727867, + "grad_norm": 0.8151016235351562, + "learning_rate": 0.00018435657596680268, + "loss": 2.7373, + "step": 3691 + }, + { + "epoch": 0.29795819546445, + "grad_norm": 0.7806773781776428, + "learning_rate": 0.00018434809694278857, + "loss": 2.7011, + "step": 3692 + }, + { + "epoch": 0.298038899201033, + "grad_norm": 0.7575243711471558, + "learning_rate": 0.00018433961581659935, + "loss": 2.6601, + "step": 3693 + }, + { + "epoch": 0.298119602937616, + "grad_norm": 0.7527276873588562, + "learning_rate": 0.00018433113258844647, + "loss": 2.6864, + "step": 3694 + }, + { + "epoch": 0.298200306674199, + "grad_norm": 0.8024318218231201, + "learning_rate": 0.0001843226472585413, + "loss": 2.728, + "step": 3695 + }, + { + "epoch": 0.29828101041078203, + "grad_norm": 0.7549982666969299, + "learning_rate": 0.0001843141598270954, + "loss": 2.6834, + "step": 3696 + }, + { + "epoch": 0.298361714147365, + "grad_norm": 0.7699971199035645, + "learning_rate": 0.0001843056702943202, + "loss": 2.7209, + "step": 3697 + }, + { + "epoch": 0.29844241788394804, + "grad_norm": 0.823842465877533, + "learning_rate": 0.0001842971786604273, + "loss": 2.6924, + "step": 3698 + }, + { + "epoch": 0.298523121620531, + "grad_norm": 0.7645791172981262, + "learning_rate": 0.00018428868492562837, + "loss": 2.6821, + "step": 3699 + }, + { + "epoch": 0.29860382535711405, + "grad_norm": 0.7530989050865173, + "learning_rate": 0.00018428018909013506, + "loss": 2.7592, + "step": 3700 + }, + { + "epoch": 0.298684529093697, + "grad_norm": 0.7958168387413025, + "learning_rate": 0.00018427169115415914, + "loss": 2.6925, + "step": 3701 + }, + { + "epoch": 0.29876523283028006, + "grad_norm": 0.7777522802352905, + "learning_rate": 0.00018426319111791242, + "loss": 2.6757, + "step": 3702 + }, + { + "epoch": 0.29884593656686304, + "grad_norm": 0.7418079972267151, + "learning_rate": 0.00018425468898160667, + "loss": 2.6445, + "step": 3703 + }, + { + "epoch": 0.29892664030344607, + "grad_norm": 0.7591132521629333, + "learning_rate": 0.00018424618474545382, + "loss": 2.7157, + "step": 3704 + }, + { + "epoch": 0.29900734404002904, + "grad_norm": 0.7591627836227417, + "learning_rate": 0.00018423767840966586, + "loss": 2.6691, + "step": 3705 + }, + { + "epoch": 0.2990880477766121, + "grad_norm": 0.7934779524803162, + "learning_rate": 0.00018422916997445476, + "loss": 2.7262, + "step": 3706 + }, + { + "epoch": 0.29916875151319505, + "grad_norm": 0.7964254021644592, + "learning_rate": 0.00018422065944003252, + "loss": 2.6196, + "step": 3707 + }, + { + "epoch": 0.2992494552497781, + "grad_norm": 0.7448374032974243, + "learning_rate": 0.0001842121468066113, + "loss": 2.6732, + "step": 3708 + }, + { + "epoch": 0.29933015898636106, + "grad_norm": 0.7813000679016113, + "learning_rate": 0.00018420363207440329, + "loss": 2.6978, + "step": 3709 + }, + { + "epoch": 0.2994108627229441, + "grad_norm": 0.7760851979255676, + "learning_rate": 0.00018419511524362064, + "loss": 2.7466, + "step": 3710 + }, + { + "epoch": 0.29949156645952707, + "grad_norm": 0.7786797881126404, + "learning_rate": 0.00018418659631447564, + "loss": 2.7044, + "step": 3711 + }, + { + "epoch": 0.2995722701961101, + "grad_norm": 0.7860158085823059, + "learning_rate": 0.00018417807528718055, + "loss": 2.6587, + "step": 3712 + }, + { + "epoch": 0.2996529739326931, + "grad_norm": 0.8327339291572571, + "learning_rate": 0.0001841695521619478, + "loss": 2.7112, + "step": 3713 + }, + { + "epoch": 0.2997336776692761, + "grad_norm": 0.7535735368728638, + "learning_rate": 0.00018416102693898982, + "loss": 2.726, + "step": 3714 + }, + { + "epoch": 0.2998143814058591, + "grad_norm": 0.7781090140342712, + "learning_rate": 0.000184152499618519, + "loss": 2.7238, + "step": 3715 + }, + { + "epoch": 0.2998950851424421, + "grad_norm": 0.7700545191764832, + "learning_rate": 0.00018414397020074795, + "loss": 2.7081, + "step": 3716 + }, + { + "epoch": 0.2999757888790251, + "grad_norm": 0.7578303217887878, + "learning_rate": 0.0001841354386858892, + "loss": 2.6591, + "step": 3717 + }, + { + "epoch": 0.30005649261560813, + "grad_norm": 0.7506501078605652, + "learning_rate": 0.00018412690507415538, + "loss": 2.6551, + "step": 3718 + }, + { + "epoch": 0.3001371963521911, + "grad_norm": 0.7869547009468079, + "learning_rate": 0.00018411836936575918, + "loss": 2.7169, + "step": 3719 + }, + { + "epoch": 0.30021790008877414, + "grad_norm": 0.7547428607940674, + "learning_rate": 0.00018410983156091332, + "loss": 2.7498, + "step": 3720 + }, + { + "epoch": 0.3002986038253571, + "grad_norm": 0.7829383015632629, + "learning_rate": 0.0001841012916598306, + "loss": 2.6885, + "step": 3721 + }, + { + "epoch": 0.30037930756194015, + "grad_norm": 0.8469082117080688, + "learning_rate": 0.00018409274966272386, + "loss": 2.7594, + "step": 3722 + }, + { + "epoch": 0.3004600112985231, + "grad_norm": 0.7690171599388123, + "learning_rate": 0.00018408420556980596, + "loss": 2.7892, + "step": 3723 + }, + { + "epoch": 0.3005407150351061, + "grad_norm": 0.7295899987220764, + "learning_rate": 0.00018407565938128987, + "loss": 2.7023, + "step": 3724 + }, + { + "epoch": 0.30062141877168913, + "grad_norm": 0.7249528169631958, + "learning_rate": 0.00018406711109738856, + "loss": 2.7135, + "step": 3725 + }, + { + "epoch": 0.3007021225082721, + "grad_norm": 0.7237234711647034, + "learning_rate": 0.0001840585607183151, + "loss": 2.6117, + "step": 3726 + }, + { + "epoch": 0.30078282624485514, + "grad_norm": 0.7426557540893555, + "learning_rate": 0.00018405000824428256, + "loss": 2.7202, + "step": 3727 + }, + { + "epoch": 0.3008635299814381, + "grad_norm": 0.7572938799858093, + "learning_rate": 0.00018404145367550414, + "loss": 2.7373, + "step": 3728 + }, + { + "epoch": 0.30094423371802115, + "grad_norm": 0.7198675274848938, + "learning_rate": 0.00018403289701219295, + "loss": 2.6675, + "step": 3729 + }, + { + "epoch": 0.3010249374546041, + "grad_norm": 0.722532331943512, + "learning_rate": 0.00018402433825456235, + "loss": 2.6933, + "step": 3730 + }, + { + "epoch": 0.30110564119118716, + "grad_norm": 0.7621530890464783, + "learning_rate": 0.0001840157774028256, + "loss": 2.6951, + "step": 3731 + }, + { + "epoch": 0.30118634492777013, + "grad_norm": 0.7435615062713623, + "learning_rate": 0.00018400721445719604, + "loss": 2.7323, + "step": 3732 + }, + { + "epoch": 0.30126704866435317, + "grad_norm": 0.7233619689941406, + "learning_rate": 0.00018399864941788708, + "loss": 2.6789, + "step": 3733 + }, + { + "epoch": 0.30134775240093614, + "grad_norm": 0.7421496510505676, + "learning_rate": 0.00018399008228511224, + "loss": 2.72, + "step": 3734 + }, + { + "epoch": 0.3014284561375192, + "grad_norm": 0.7250909805297852, + "learning_rate": 0.000183981513059085, + "loss": 2.6717, + "step": 3735 + }, + { + "epoch": 0.30150915987410215, + "grad_norm": 0.7642899751663208, + "learning_rate": 0.0001839729417400189, + "loss": 2.6823, + "step": 3736 + }, + { + "epoch": 0.3015898636106852, + "grad_norm": 0.7434508204460144, + "learning_rate": 0.00018396436832812758, + "loss": 2.6441, + "step": 3737 + }, + { + "epoch": 0.30167056734726816, + "grad_norm": 0.7163311839103699, + "learning_rate": 0.00018395579282362473, + "loss": 2.6736, + "step": 3738 + }, + { + "epoch": 0.3017512710838512, + "grad_norm": 0.6936792731285095, + "learning_rate": 0.00018394721522672404, + "loss": 2.6792, + "step": 3739 + }, + { + "epoch": 0.30183197482043417, + "grad_norm": 0.7791975736618042, + "learning_rate": 0.0001839386355376393, + "loss": 2.653, + "step": 3740 + }, + { + "epoch": 0.3019126785570172, + "grad_norm": 0.7902694940567017, + "learning_rate": 0.00018393005375658437, + "loss": 2.7448, + "step": 3741 + }, + { + "epoch": 0.3019933822936002, + "grad_norm": 0.7405624389648438, + "learning_rate": 0.0001839214698837731, + "loss": 2.6977, + "step": 3742 + }, + { + "epoch": 0.3020740860301832, + "grad_norm": 0.8033632040023804, + "learning_rate": 0.00018391288391941943, + "loss": 2.7468, + "step": 3743 + }, + { + "epoch": 0.3021547897667662, + "grad_norm": 0.8148884177207947, + "learning_rate": 0.00018390429586373735, + "loss": 2.6992, + "step": 3744 + }, + { + "epoch": 0.3022354935033492, + "grad_norm": 0.7633625268936157, + "learning_rate": 0.00018389570571694089, + "loss": 2.6604, + "step": 3745 + }, + { + "epoch": 0.3023161972399322, + "grad_norm": 0.8687180876731873, + "learning_rate": 0.00018388711347924413, + "loss": 2.6808, + "step": 3746 + }, + { + "epoch": 0.3023969009765152, + "grad_norm": 0.6974104046821594, + "learning_rate": 0.0001838785191508612, + "loss": 2.7613, + "step": 3747 + }, + { + "epoch": 0.3024776047130982, + "grad_norm": 0.7919288873672485, + "learning_rate": 0.00018386992273200633, + "loss": 2.664, + "step": 3748 + }, + { + "epoch": 0.30255830844968123, + "grad_norm": 0.7708829045295715, + "learning_rate": 0.00018386132422289374, + "loss": 2.7703, + "step": 3749 + }, + { + "epoch": 0.3026390121862642, + "grad_norm": 0.7099813222885132, + "learning_rate": 0.00018385272362373775, + "loss": 2.6485, + "step": 3750 + }, + { + "epoch": 0.30271971592284724, + "grad_norm": 0.7629622220993042, + "learning_rate": 0.0001838441209347527, + "loss": 2.7339, + "step": 3751 + }, + { + "epoch": 0.3028004196594302, + "grad_norm": 0.727275550365448, + "learning_rate": 0.00018383551615615295, + "loss": 2.7194, + "step": 3752 + }, + { + "epoch": 0.30288112339601325, + "grad_norm": 0.7158832550048828, + "learning_rate": 0.00018382690928815302, + "loss": 2.6698, + "step": 3753 + }, + { + "epoch": 0.30296182713259623, + "grad_norm": 0.8075565099716187, + "learning_rate": 0.00018381830033096735, + "loss": 2.7198, + "step": 3754 + }, + { + "epoch": 0.30304253086917926, + "grad_norm": 0.7949094176292419, + "learning_rate": 0.00018380968928481057, + "loss": 2.7048, + "step": 3755 + }, + { + "epoch": 0.30312323460576224, + "grad_norm": 0.7009503841400146, + "learning_rate": 0.00018380107614989724, + "loss": 2.709, + "step": 3756 + }, + { + "epoch": 0.30320393834234527, + "grad_norm": 0.668574869632721, + "learning_rate": 0.00018379246092644204, + "loss": 2.6515, + "step": 3757 + }, + { + "epoch": 0.30328464207892825, + "grad_norm": 0.7470806241035461, + "learning_rate": 0.00018378384361465968, + "loss": 2.7577, + "step": 3758 + }, + { + "epoch": 0.3033653458155113, + "grad_norm": 0.7529913783073425, + "learning_rate": 0.0001837752242147649, + "loss": 2.7189, + "step": 3759 + }, + { + "epoch": 0.30344604955209425, + "grad_norm": 0.7373302578926086, + "learning_rate": 0.00018376660272697258, + "loss": 2.7197, + "step": 3760 + }, + { + "epoch": 0.3035267532886773, + "grad_norm": 0.7650466561317444, + "learning_rate": 0.0001837579791514975, + "loss": 2.6613, + "step": 3761 + }, + { + "epoch": 0.30360745702526026, + "grad_norm": 0.775209903717041, + "learning_rate": 0.00018374935348855468, + "loss": 2.6454, + "step": 3762 + }, + { + "epoch": 0.3036881607618433, + "grad_norm": 0.7049290537834167, + "learning_rate": 0.00018374072573835903, + "loss": 2.6663, + "step": 3763 + }, + { + "epoch": 0.30376886449842627, + "grad_norm": 0.7060630917549133, + "learning_rate": 0.0001837320959011256, + "loss": 2.6908, + "step": 3764 + }, + { + "epoch": 0.3038495682350093, + "grad_norm": 0.7561464905738831, + "learning_rate": 0.00018372346397706944, + "loss": 2.673, + "step": 3765 + }, + { + "epoch": 0.3039302719715923, + "grad_norm": 0.7293568849563599, + "learning_rate": 0.0001837148299664057, + "loss": 2.6431, + "step": 3766 + }, + { + "epoch": 0.3040109757081753, + "grad_norm": 0.8460379838943481, + "learning_rate": 0.00018370619386934962, + "loss": 2.7493, + "step": 3767 + }, + { + "epoch": 0.3040916794447583, + "grad_norm": 0.8136082291603088, + "learning_rate": 0.00018369755568611632, + "loss": 2.7298, + "step": 3768 + }, + { + "epoch": 0.3041723831813413, + "grad_norm": 0.6916636824607849, + "learning_rate": 0.00018368891541692116, + "loss": 2.7173, + "step": 3769 + }, + { + "epoch": 0.3042530869179243, + "grad_norm": 0.7547643780708313, + "learning_rate": 0.0001836802730619795, + "loss": 2.6343, + "step": 3770 + }, + { + "epoch": 0.30433379065450733, + "grad_norm": 0.7439205050468445, + "learning_rate": 0.00018367162862150665, + "loss": 2.6627, + "step": 3771 + }, + { + "epoch": 0.3044144943910903, + "grad_norm": 0.7781087756156921, + "learning_rate": 0.0001836629820957181, + "loss": 2.7223, + "step": 3772 + }, + { + "epoch": 0.30449519812767334, + "grad_norm": 0.7876880764961243, + "learning_rate": 0.00018365433348482935, + "loss": 2.7139, + "step": 3773 + }, + { + "epoch": 0.3045759018642563, + "grad_norm": 0.7571346163749695, + "learning_rate": 0.00018364568278905595, + "loss": 2.6939, + "step": 3774 + }, + { + "epoch": 0.3046566056008393, + "grad_norm": 0.9011813402175903, + "learning_rate": 0.00018363703000861346, + "loss": 2.7516, + "step": 3775 + }, + { + "epoch": 0.3047373093374223, + "grad_norm": 0.7809761762619019, + "learning_rate": 0.00018362837514371755, + "loss": 2.7587, + "step": 3776 + }, + { + "epoch": 0.3048180130740053, + "grad_norm": 0.7486867308616638, + "learning_rate": 0.00018361971819458393, + "loss": 2.6617, + "step": 3777 + }, + { + "epoch": 0.30489871681058833, + "grad_norm": 0.7434267401695251, + "learning_rate": 0.00018361105916142836, + "loss": 2.7328, + "step": 3778 + }, + { + "epoch": 0.3049794205471713, + "grad_norm": 0.7895822525024414, + "learning_rate": 0.0001836023980444666, + "loss": 2.7038, + "step": 3779 + }, + { + "epoch": 0.30506012428375434, + "grad_norm": 0.7329267263412476, + "learning_rate": 0.00018359373484391458, + "loss": 2.6533, + "step": 3780 + }, + { + "epoch": 0.3051408280203373, + "grad_norm": 0.7578477263450623, + "learning_rate": 0.00018358506955998817, + "loss": 2.723, + "step": 3781 + }, + { + "epoch": 0.30522153175692035, + "grad_norm": 0.7174215316772461, + "learning_rate": 0.0001835764021929033, + "loss": 2.7665, + "step": 3782 + }, + { + "epoch": 0.3053022354935033, + "grad_norm": 0.7261673808097839, + "learning_rate": 0.00018356773274287605, + "loss": 2.7239, + "step": 3783 + }, + { + "epoch": 0.30538293923008636, + "grad_norm": 0.7550768852233887, + "learning_rate": 0.00018355906121012244, + "loss": 2.6952, + "step": 3784 + }, + { + "epoch": 0.30546364296666934, + "grad_norm": 0.7805373668670654, + "learning_rate": 0.0001835503875948586, + "loss": 2.6453, + "step": 3785 + }, + { + "epoch": 0.30554434670325237, + "grad_norm": 0.7753674983978271, + "learning_rate": 0.0001835417118973007, + "loss": 2.7188, + "step": 3786 + }, + { + "epoch": 0.30562505043983534, + "grad_norm": 0.719774603843689, + "learning_rate": 0.00018353303411766496, + "loss": 2.69, + "step": 3787 + }, + { + "epoch": 0.3057057541764184, + "grad_norm": 0.786780059337616, + "learning_rate": 0.00018352435425616763, + "loss": 2.7015, + "step": 3788 + }, + { + "epoch": 0.30578645791300135, + "grad_norm": 0.7481613159179688, + "learning_rate": 0.00018351567231302508, + "loss": 2.6267, + "step": 3789 + }, + { + "epoch": 0.3058671616495844, + "grad_norm": 0.8138384222984314, + "learning_rate": 0.00018350698828845365, + "loss": 2.7301, + "step": 3790 + }, + { + "epoch": 0.30594786538616736, + "grad_norm": 0.7911081314086914, + "learning_rate": 0.00018349830218266982, + "loss": 2.6661, + "step": 3791 + }, + { + "epoch": 0.3060285691227504, + "grad_norm": 0.763179361820221, + "learning_rate": 0.00018348961399588997, + "loss": 2.6509, + "step": 3792 + }, + { + "epoch": 0.30610927285933337, + "grad_norm": 0.8214982748031616, + "learning_rate": 0.00018348092372833072, + "loss": 2.6951, + "step": 3793 + }, + { + "epoch": 0.3061899765959164, + "grad_norm": 0.7271003127098083, + "learning_rate": 0.00018347223138020865, + "loss": 2.7227, + "step": 3794 + }, + { + "epoch": 0.3062706803324994, + "grad_norm": 0.7727730870246887, + "learning_rate": 0.00018346353695174037, + "loss": 2.721, + "step": 3795 + }, + { + "epoch": 0.3063513840690824, + "grad_norm": 0.844895601272583, + "learning_rate": 0.00018345484044314257, + "loss": 2.6757, + "step": 3796 + }, + { + "epoch": 0.3064320878056654, + "grad_norm": 0.7409898638725281, + "learning_rate": 0.00018344614185463197, + "loss": 2.6798, + "step": 3797 + }, + { + "epoch": 0.3065127915422484, + "grad_norm": 0.8284425139427185, + "learning_rate": 0.00018343744118642542, + "loss": 2.7573, + "step": 3798 + }, + { + "epoch": 0.3065934952788314, + "grad_norm": 0.7535427808761597, + "learning_rate": 0.00018342873843873973, + "loss": 2.7026, + "step": 3799 + }, + { + "epoch": 0.30667419901541443, + "grad_norm": 0.8013898730278015, + "learning_rate": 0.00018342003361179176, + "loss": 2.7331, + "step": 3800 + }, + { + "epoch": 0.3067549027519974, + "grad_norm": 0.7458386421203613, + "learning_rate": 0.0001834113267057985, + "loss": 2.6976, + "step": 3801 + }, + { + "epoch": 0.30683560648858044, + "grad_norm": 0.8333673477172852, + "learning_rate": 0.00018340261772097695, + "loss": 2.7064, + "step": 3802 + }, + { + "epoch": 0.3069163102251634, + "grad_norm": 0.7273485064506531, + "learning_rate": 0.00018339390665754414, + "loss": 2.6619, + "step": 3803 + }, + { + "epoch": 0.30699701396174645, + "grad_norm": 0.8199014067649841, + "learning_rate": 0.0001833851935157172, + "loss": 2.654, + "step": 3804 + }, + { + "epoch": 0.3070777176983294, + "grad_norm": 0.780197024345398, + "learning_rate": 0.00018337647829571324, + "loss": 2.6814, + "step": 3805 + }, + { + "epoch": 0.30715842143491245, + "grad_norm": 0.7214049100875854, + "learning_rate": 0.0001833677609977495, + "loss": 2.709, + "step": 3806 + }, + { + "epoch": 0.30723912517149543, + "grad_norm": 0.7680457830429077, + "learning_rate": 0.00018335904162204326, + "loss": 2.6628, + "step": 3807 + }, + { + "epoch": 0.30731982890807846, + "grad_norm": 0.760728120803833, + "learning_rate": 0.00018335032016881178, + "loss": 2.7005, + "step": 3808 + }, + { + "epoch": 0.30740053264466144, + "grad_norm": 0.7631687521934509, + "learning_rate": 0.00018334159663827243, + "loss": 2.7012, + "step": 3809 + }, + { + "epoch": 0.30748123638124447, + "grad_norm": 0.7515785694122314, + "learning_rate": 0.00018333287103064266, + "loss": 2.7062, + "step": 3810 + }, + { + "epoch": 0.30756194011782745, + "grad_norm": 0.804500162601471, + "learning_rate": 0.00018332414334613987, + "loss": 2.7888, + "step": 3811 + }, + { + "epoch": 0.3076426438544105, + "grad_norm": 0.7551451325416565, + "learning_rate": 0.00018331541358498164, + "loss": 2.6345, + "step": 3812 + }, + { + "epoch": 0.30772334759099346, + "grad_norm": 0.7342958450317383, + "learning_rate": 0.0001833066817473855, + "loss": 2.6601, + "step": 3813 + }, + { + "epoch": 0.3078040513275765, + "grad_norm": 0.8059296607971191, + "learning_rate": 0.0001832979478335691, + "loss": 2.7694, + "step": 3814 + }, + { + "epoch": 0.30788475506415947, + "grad_norm": 0.7037352919578552, + "learning_rate": 0.0001832892118437501, + "loss": 2.6788, + "step": 3815 + }, + { + "epoch": 0.3079654588007425, + "grad_norm": 0.759509801864624, + "learning_rate": 0.0001832804737781462, + "loss": 2.7115, + "step": 3816 + }, + { + "epoch": 0.3080461625373255, + "grad_norm": 0.7911720871925354, + "learning_rate": 0.00018327173363697524, + "loss": 2.6676, + "step": 3817 + }, + { + "epoch": 0.3081268662739085, + "grad_norm": 0.7592991590499878, + "learning_rate": 0.00018326299142045496, + "loss": 2.7245, + "step": 3818 + }, + { + "epoch": 0.3082075700104915, + "grad_norm": 0.7620227932929993, + "learning_rate": 0.00018325424712880333, + "loss": 2.7224, + "step": 3819 + }, + { + "epoch": 0.3082882737470745, + "grad_norm": 0.7834638953208923, + "learning_rate": 0.0001832455007622382, + "loss": 2.7469, + "step": 3820 + }, + { + "epoch": 0.3083689774836575, + "grad_norm": 0.7765992879867554, + "learning_rate": 0.00018323675232097757, + "loss": 2.7193, + "step": 3821 + }, + { + "epoch": 0.3084496812202405, + "grad_norm": 0.7334728837013245, + "learning_rate": 0.00018322800180523949, + "loss": 2.667, + "step": 3822 + }, + { + "epoch": 0.3085303849568235, + "grad_norm": 0.7674607634544373, + "learning_rate": 0.00018321924921524207, + "loss": 2.6479, + "step": 3823 + }, + { + "epoch": 0.30861108869340653, + "grad_norm": 0.7616469860076904, + "learning_rate": 0.0001832104945512034, + "loss": 2.6535, + "step": 3824 + }, + { + "epoch": 0.3086917924299895, + "grad_norm": 0.7693164944648743, + "learning_rate": 0.00018320173781334172, + "loss": 2.7616, + "step": 3825 + }, + { + "epoch": 0.3087724961665725, + "grad_norm": 0.7099221348762512, + "learning_rate": 0.0001831929790018752, + "loss": 2.6729, + "step": 3826 + }, + { + "epoch": 0.3088531999031555, + "grad_norm": 0.7389346957206726, + "learning_rate": 0.00018318421811702222, + "loss": 2.6396, + "step": 3827 + }, + { + "epoch": 0.3089339036397385, + "grad_norm": 0.8302628397941589, + "learning_rate": 0.00018317545515900106, + "loss": 2.6786, + "step": 3828 + }, + { + "epoch": 0.3090146073763215, + "grad_norm": 0.7441998720169067, + "learning_rate": 0.00018316669012803015, + "loss": 2.6769, + "step": 3829 + }, + { + "epoch": 0.3090953111129045, + "grad_norm": 0.8454675674438477, + "learning_rate": 0.00018315792302432788, + "loss": 2.7275, + "step": 3830 + }, + { + "epoch": 0.30917601484948753, + "grad_norm": 0.8129739761352539, + "learning_rate": 0.00018314915384811282, + "loss": 2.7603, + "step": 3831 + }, + { + "epoch": 0.3092567185860705, + "grad_norm": 0.7525617480278015, + "learning_rate": 0.00018314038259960349, + "loss": 2.7156, + "step": 3832 + }, + { + "epoch": 0.30933742232265354, + "grad_norm": 0.7319022417068481, + "learning_rate": 0.0001831316092790185, + "loss": 2.676, + "step": 3833 + }, + { + "epoch": 0.3094181260592365, + "grad_norm": 0.7767768502235413, + "learning_rate": 0.00018312283388657646, + "loss": 2.7022, + "step": 3834 + }, + { + "epoch": 0.30949882979581955, + "grad_norm": 0.709293007850647, + "learning_rate": 0.00018311405642249616, + "loss": 2.6241, + "step": 3835 + }, + { + "epoch": 0.30957953353240253, + "grad_norm": 0.715360701084137, + "learning_rate": 0.0001831052768869963, + "loss": 2.6777, + "step": 3836 + }, + { + "epoch": 0.30966023726898556, + "grad_norm": 0.7361319065093994, + "learning_rate": 0.0001830964952802957, + "loss": 2.6539, + "step": 3837 + }, + { + "epoch": 0.30974094100556854, + "grad_norm": 0.7243087291717529, + "learning_rate": 0.0001830877116026132, + "loss": 2.7506, + "step": 3838 + }, + { + "epoch": 0.30982164474215157, + "grad_norm": 0.7361106872558594, + "learning_rate": 0.00018307892585416776, + "loss": 2.697, + "step": 3839 + }, + { + "epoch": 0.30990234847873455, + "grad_norm": 0.7541893720626831, + "learning_rate": 0.00018307013803517833, + "loss": 2.694, + "step": 3840 + }, + { + "epoch": 0.3099830522153176, + "grad_norm": 0.7235575914382935, + "learning_rate": 0.00018306134814586388, + "loss": 2.6711, + "step": 3841 + }, + { + "epoch": 0.31006375595190055, + "grad_norm": 0.7868196368217468, + "learning_rate": 0.00018305255618644354, + "loss": 2.7177, + "step": 3842 + }, + { + "epoch": 0.3101444596884836, + "grad_norm": 0.8074443340301514, + "learning_rate": 0.00018304376215713637, + "loss": 2.7293, + "step": 3843 + }, + { + "epoch": 0.31022516342506656, + "grad_norm": 0.6993385553359985, + "learning_rate": 0.00018303496605816158, + "loss": 2.6942, + "step": 3844 + }, + { + "epoch": 0.3103058671616496, + "grad_norm": 0.7272824645042419, + "learning_rate": 0.00018302616788973839, + "loss": 2.7093, + "step": 3845 + }, + { + "epoch": 0.31038657089823257, + "grad_norm": 0.7496963143348694, + "learning_rate": 0.00018301736765208605, + "loss": 2.7096, + "step": 3846 + }, + { + "epoch": 0.3104672746348156, + "grad_norm": 0.7407644987106323, + "learning_rate": 0.00018300856534542387, + "loss": 2.6956, + "step": 3847 + }, + { + "epoch": 0.3105479783713986, + "grad_norm": 0.742382287979126, + "learning_rate": 0.00018299976096997132, + "loss": 2.6744, + "step": 3848 + }, + { + "epoch": 0.3106286821079816, + "grad_norm": 0.7314567565917969, + "learning_rate": 0.0001829909545259477, + "loss": 2.7544, + "step": 3849 + }, + { + "epoch": 0.3107093858445646, + "grad_norm": 0.7550896406173706, + "learning_rate": 0.0001829821460135726, + "loss": 2.714, + "step": 3850 + }, + { + "epoch": 0.3107900895811476, + "grad_norm": 0.7496031522750854, + "learning_rate": 0.00018297333543306548, + "loss": 2.6718, + "step": 3851 + }, + { + "epoch": 0.3108707933177306, + "grad_norm": 0.7600073218345642, + "learning_rate": 0.00018296452278464596, + "loss": 2.7141, + "step": 3852 + }, + { + "epoch": 0.31095149705431363, + "grad_norm": 0.7242388129234314, + "learning_rate": 0.00018295570806853366, + "loss": 2.7407, + "step": 3853 + }, + { + "epoch": 0.3110322007908966, + "grad_norm": 0.723874568939209, + "learning_rate": 0.00018294689128494824, + "loss": 2.7253, + "step": 3854 + }, + { + "epoch": 0.31111290452747964, + "grad_norm": 0.7902834415435791, + "learning_rate": 0.00018293807243410947, + "loss": 2.7118, + "step": 3855 + }, + { + "epoch": 0.3111936082640626, + "grad_norm": 0.7676794528961182, + "learning_rate": 0.00018292925151623717, + "loss": 2.684, + "step": 3856 + }, + { + "epoch": 0.31127431200064565, + "grad_norm": 0.767431378364563, + "learning_rate": 0.0001829204285315511, + "loss": 2.6936, + "step": 3857 + }, + { + "epoch": 0.3113550157372286, + "grad_norm": 0.7802234888076782, + "learning_rate": 0.00018291160348027122, + "loss": 2.7181, + "step": 3858 + }, + { + "epoch": 0.31143571947381166, + "grad_norm": 0.7823610305786133, + "learning_rate": 0.00018290277636261743, + "loss": 2.7014, + "step": 3859 + }, + { + "epoch": 0.31151642321039463, + "grad_norm": 0.8199869394302368, + "learning_rate": 0.00018289394717880978, + "loss": 2.73, + "step": 3860 + }, + { + "epoch": 0.31159712694697766, + "grad_norm": 0.7725761532783508, + "learning_rate": 0.00018288511592906822, + "loss": 2.6978, + "step": 3861 + }, + { + "epoch": 0.31167783068356064, + "grad_norm": 0.752034068107605, + "learning_rate": 0.00018287628261361296, + "loss": 2.6635, + "step": 3862 + }, + { + "epoch": 0.3117585344201437, + "grad_norm": 0.7961714267730713, + "learning_rate": 0.0001828674472326641, + "loss": 2.7047, + "step": 3863 + }, + { + "epoch": 0.31183923815672665, + "grad_norm": 0.7413069605827332, + "learning_rate": 0.00018285860978644182, + "loss": 2.6872, + "step": 3864 + }, + { + "epoch": 0.3119199418933097, + "grad_norm": 0.8943146467208862, + "learning_rate": 0.00018284977027516636, + "loss": 2.7611, + "step": 3865 + }, + { + "epoch": 0.31200064562989266, + "grad_norm": 0.7663856744766235, + "learning_rate": 0.0001828409286990581, + "loss": 2.7541, + "step": 3866 + }, + { + "epoch": 0.3120813493664757, + "grad_norm": 0.7557348608970642, + "learning_rate": 0.00018283208505833731, + "loss": 2.6633, + "step": 3867 + }, + { + "epoch": 0.31216205310305867, + "grad_norm": 0.7690094113349915, + "learning_rate": 0.00018282323935322445, + "loss": 2.7117, + "step": 3868 + }, + { + "epoch": 0.3122427568396417, + "grad_norm": 0.8059033751487732, + "learning_rate": 0.00018281439158393997, + "loss": 2.6743, + "step": 3869 + }, + { + "epoch": 0.3123234605762247, + "grad_norm": 0.7877150774002075, + "learning_rate": 0.00018280554175070438, + "loss": 2.6546, + "step": 3870 + }, + { + "epoch": 0.3124041643128077, + "grad_norm": 0.799670934677124, + "learning_rate": 0.0001827966898537382, + "loss": 2.7184, + "step": 3871 + }, + { + "epoch": 0.3124848680493907, + "grad_norm": 0.8353915214538574, + "learning_rate": 0.0001827878358932621, + "loss": 2.7235, + "step": 3872 + }, + { + "epoch": 0.3125655717859737, + "grad_norm": 0.7954776883125305, + "learning_rate": 0.00018277897986949672, + "loss": 2.5992, + "step": 3873 + }, + { + "epoch": 0.3126462755225567, + "grad_norm": 0.7959856986999512, + "learning_rate": 0.00018277012178266277, + "loss": 2.6877, + "step": 3874 + }, + { + "epoch": 0.3127269792591397, + "grad_norm": 0.8220208883285522, + "learning_rate": 0.00018276126163298102, + "loss": 2.6891, + "step": 3875 + }, + { + "epoch": 0.3128076829957227, + "grad_norm": 0.7827965021133423, + "learning_rate": 0.0001827523994206723, + "loss": 2.7271, + "step": 3876 + }, + { + "epoch": 0.3128883867323057, + "grad_norm": 0.764369010925293, + "learning_rate": 0.00018274353514595746, + "loss": 2.6661, + "step": 3877 + }, + { + "epoch": 0.3129690904688887, + "grad_norm": 0.7440944314002991, + "learning_rate": 0.00018273466880905744, + "loss": 2.6621, + "step": 3878 + }, + { + "epoch": 0.3130497942054717, + "grad_norm": 0.8544813394546509, + "learning_rate": 0.00018272580041019319, + "loss": 2.7168, + "step": 3879 + }, + { + "epoch": 0.3131304979420547, + "grad_norm": 0.7232592701911926, + "learning_rate": 0.00018271692994958577, + "loss": 2.6666, + "step": 3880 + }, + { + "epoch": 0.3132112016786377, + "grad_norm": 0.750525712966919, + "learning_rate": 0.00018270805742745617, + "loss": 2.6984, + "step": 3881 + }, + { + "epoch": 0.31329190541522073, + "grad_norm": 0.8195550441741943, + "learning_rate": 0.00018269918284402565, + "loss": 2.7183, + "step": 3882 + }, + { + "epoch": 0.3133726091518037, + "grad_norm": 0.7695632576942444, + "learning_rate": 0.0001826903061995153, + "loss": 2.7092, + "step": 3883 + }, + { + "epoch": 0.31345331288838674, + "grad_norm": 0.7631582617759705, + "learning_rate": 0.0001826814274941463, + "loss": 2.7061, + "step": 3884 + }, + { + "epoch": 0.3135340166249697, + "grad_norm": 0.8318471908569336, + "learning_rate": 0.0001826725467281401, + "loss": 2.694, + "step": 3885 + }, + { + "epoch": 0.31361472036155275, + "grad_norm": 0.7313492298126221, + "learning_rate": 0.00018266366390171784, + "loss": 2.6729, + "step": 3886 + }, + { + "epoch": 0.3136954240981357, + "grad_norm": 0.7508631944656372, + "learning_rate": 0.00018265477901510105, + "loss": 2.731, + "step": 3887 + }, + { + "epoch": 0.31377612783471875, + "grad_norm": 0.8106402158737183, + "learning_rate": 0.00018264589206851107, + "loss": 2.7113, + "step": 3888 + }, + { + "epoch": 0.31385683157130173, + "grad_norm": 0.771542489528656, + "learning_rate": 0.00018263700306216945, + "loss": 2.644, + "step": 3889 + }, + { + "epoch": 0.31393753530788476, + "grad_norm": 0.812441885471344, + "learning_rate": 0.00018262811199629768, + "loss": 2.6889, + "step": 3890 + }, + { + "epoch": 0.31401823904446774, + "grad_norm": 0.8231199979782104, + "learning_rate": 0.00018261921887111738, + "loss": 2.6466, + "step": 3891 + }, + { + "epoch": 0.31409894278105077, + "grad_norm": 0.7492454051971436, + "learning_rate": 0.00018261032368685012, + "loss": 2.6693, + "step": 3892 + }, + { + "epoch": 0.31417964651763375, + "grad_norm": 0.7651814222335815, + "learning_rate": 0.00018260142644371772, + "loss": 2.6569, + "step": 3893 + }, + { + "epoch": 0.3142603502542168, + "grad_norm": 0.7504465579986572, + "learning_rate": 0.0001825925271419418, + "loss": 2.684, + "step": 3894 + }, + { + "epoch": 0.31434105399079976, + "grad_norm": 0.749650239944458, + "learning_rate": 0.00018258362578174424, + "loss": 2.6482, + "step": 3895 + }, + { + "epoch": 0.3144217577273828, + "grad_norm": 0.8445256352424622, + "learning_rate": 0.00018257472236334686, + "loss": 2.727, + "step": 3896 + }, + { + "epoch": 0.31450246146396577, + "grad_norm": 0.7628257870674133, + "learning_rate": 0.0001825658168869715, + "loss": 2.7314, + "step": 3897 + }, + { + "epoch": 0.3145831652005488, + "grad_norm": 0.7738446593284607, + "learning_rate": 0.00018255690935284019, + "loss": 2.7478, + "step": 3898 + }, + { + "epoch": 0.3146638689371318, + "grad_norm": 0.7578958868980408, + "learning_rate": 0.00018254799976117486, + "loss": 2.6922, + "step": 3899 + }, + { + "epoch": 0.3147445726737148, + "grad_norm": 0.8367362022399902, + "learning_rate": 0.00018253908811219764, + "loss": 2.7347, + "step": 3900 + }, + { + "epoch": 0.3148252764102978, + "grad_norm": 0.7530354857444763, + "learning_rate": 0.00018253017440613057, + "loss": 2.7151, + "step": 3901 + }, + { + "epoch": 0.3149059801468808, + "grad_norm": 0.7168053388595581, + "learning_rate": 0.00018252125864319578, + "loss": 2.7072, + "step": 3902 + }, + { + "epoch": 0.3149866838834638, + "grad_norm": 0.7480056285858154, + "learning_rate": 0.00018251234082361555, + "loss": 2.6489, + "step": 3903 + }, + { + "epoch": 0.3150673876200468, + "grad_norm": 0.8563880324363708, + "learning_rate": 0.0001825034209476121, + "loss": 2.7384, + "step": 3904 + }, + { + "epoch": 0.3151480913566298, + "grad_norm": 0.7959346771240234, + "learning_rate": 0.0001824944990154077, + "loss": 2.631, + "step": 3905 + }, + { + "epoch": 0.31522879509321283, + "grad_norm": 0.7385980486869812, + "learning_rate": 0.00018248557502722476, + "loss": 2.7394, + "step": 3906 + }, + { + "epoch": 0.3153094988297958, + "grad_norm": 0.7682650685310364, + "learning_rate": 0.00018247664898328567, + "loss": 2.7327, + "step": 3907 + }, + { + "epoch": 0.31539020256637884, + "grad_norm": 0.7720316648483276, + "learning_rate": 0.0001824677208838129, + "loss": 2.6442, + "step": 3908 + }, + { + "epoch": 0.3154709063029618, + "grad_norm": 0.7927379608154297, + "learning_rate": 0.00018245879072902895, + "loss": 2.7738, + "step": 3909 + }, + { + "epoch": 0.31555161003954485, + "grad_norm": 0.7506012916564941, + "learning_rate": 0.00018244985851915637, + "loss": 2.6825, + "step": 3910 + }, + { + "epoch": 0.3156323137761278, + "grad_norm": 0.6996353268623352, + "learning_rate": 0.00018244092425441781, + "loss": 2.6783, + "step": 3911 + }, + { + "epoch": 0.31571301751271086, + "grad_norm": 0.8039344549179077, + "learning_rate": 0.00018243198793503588, + "loss": 2.7628, + "step": 3912 + }, + { + "epoch": 0.31579372124929384, + "grad_norm": 0.7890963554382324, + "learning_rate": 0.0001824230495612334, + "loss": 2.7512, + "step": 3913 + }, + { + "epoch": 0.31587442498587687, + "grad_norm": 0.7470870614051819, + "learning_rate": 0.00018241410913323301, + "loss": 2.7058, + "step": 3914 + }, + { + "epoch": 0.31595512872245984, + "grad_norm": 0.7056336402893066, + "learning_rate": 0.0001824051666512576, + "loss": 2.6091, + "step": 3915 + }, + { + "epoch": 0.3160358324590429, + "grad_norm": 0.7818490862846375, + "learning_rate": 0.00018239622211553002, + "loss": 2.7509, + "step": 3916 + }, + { + "epoch": 0.31611653619562585, + "grad_norm": 0.7590607404708862, + "learning_rate": 0.0001823872755262732, + "loss": 2.7238, + "step": 3917 + }, + { + "epoch": 0.3161972399322089, + "grad_norm": 0.7157841920852661, + "learning_rate": 0.00018237832688371014, + "loss": 2.6639, + "step": 3918 + }, + { + "epoch": 0.31627794366879186, + "grad_norm": 0.7515804171562195, + "learning_rate": 0.00018236937618806382, + "loss": 2.6973, + "step": 3919 + }, + { + "epoch": 0.3163586474053749, + "grad_norm": 0.6691949963569641, + "learning_rate": 0.00018236042343955733, + "loss": 2.727, + "step": 3920 + }, + { + "epoch": 0.31643935114195787, + "grad_norm": 0.8122327327728271, + "learning_rate": 0.0001823514686384138, + "loss": 2.7513, + "step": 3921 + }, + { + "epoch": 0.3165200548785409, + "grad_norm": 0.7813653349876404, + "learning_rate": 0.0001823425117848564, + "loss": 2.7037, + "step": 3922 + }, + { + "epoch": 0.3166007586151239, + "grad_norm": 0.6869354844093323, + "learning_rate": 0.00018233355287910834, + "loss": 2.693, + "step": 3923 + }, + { + "epoch": 0.3166814623517069, + "grad_norm": 0.7773037552833557, + "learning_rate": 0.00018232459192139296, + "loss": 2.687, + "step": 3924 + }, + { + "epoch": 0.3167621660882899, + "grad_norm": 0.7644256949424744, + "learning_rate": 0.00018231562891193352, + "loss": 2.6753, + "step": 3925 + }, + { + "epoch": 0.3168428698248729, + "grad_norm": 0.8427005410194397, + "learning_rate": 0.00018230666385095343, + "loss": 2.6641, + "step": 3926 + }, + { + "epoch": 0.3169235735614559, + "grad_norm": 0.7194599509239197, + "learning_rate": 0.0001822976967386761, + "loss": 2.7091, + "step": 3927 + }, + { + "epoch": 0.3170042772980389, + "grad_norm": 0.7710655331611633, + "learning_rate": 0.00018228872757532512, + "loss": 2.6938, + "step": 3928 + }, + { + "epoch": 0.3170849810346219, + "grad_norm": 0.8003759980201721, + "learning_rate": 0.0001822797563611239, + "loss": 2.7019, + "step": 3929 + }, + { + "epoch": 0.3171656847712049, + "grad_norm": 0.7960470914840698, + "learning_rate": 0.00018227078309629606, + "loss": 2.661, + "step": 3930 + }, + { + "epoch": 0.3172463885077879, + "grad_norm": 0.7731126546859741, + "learning_rate": 0.00018226180778106526, + "loss": 2.7023, + "step": 3931 + }, + { + "epoch": 0.3173270922443709, + "grad_norm": 0.7561383843421936, + "learning_rate": 0.00018225283041565515, + "loss": 2.6768, + "step": 3932 + }, + { + "epoch": 0.3174077959809539, + "grad_norm": 0.7578409910202026, + "learning_rate": 0.0001822438510002895, + "loss": 2.7145, + "step": 3933 + }, + { + "epoch": 0.3174884997175369, + "grad_norm": 0.7901952862739563, + "learning_rate": 0.00018223486953519214, + "loss": 2.7121, + "step": 3934 + }, + { + "epoch": 0.31756920345411993, + "grad_norm": 0.82305908203125, + "learning_rate": 0.0001822258860205868, + "loss": 2.7553, + "step": 3935 + }, + { + "epoch": 0.3176499071907029, + "grad_norm": 0.748055636882782, + "learning_rate": 0.0001822169004566975, + "loss": 2.7236, + "step": 3936 + }, + { + "epoch": 0.31773061092728594, + "grad_norm": 0.7981358766555786, + "learning_rate": 0.0001822079128437481, + "loss": 2.7444, + "step": 3937 + }, + { + "epoch": 0.3178113146638689, + "grad_norm": 0.7938945889472961, + "learning_rate": 0.0001821989231819626, + "loss": 2.7512, + "step": 3938 + }, + { + "epoch": 0.31789201840045195, + "grad_norm": 0.7250397205352783, + "learning_rate": 0.0001821899314715651, + "loss": 2.6843, + "step": 3939 + }, + { + "epoch": 0.3179727221370349, + "grad_norm": 0.8844723701477051, + "learning_rate": 0.00018218093771277965, + "loss": 2.6295, + "step": 3940 + }, + { + "epoch": 0.31805342587361796, + "grad_norm": 0.7545698881149292, + "learning_rate": 0.0001821719419058304, + "loss": 2.7478, + "step": 3941 + }, + { + "epoch": 0.31813412961020093, + "grad_norm": 0.7254738807678223, + "learning_rate": 0.00018216294405094157, + "loss": 2.665, + "step": 3942 + }, + { + "epoch": 0.31821483334678397, + "grad_norm": 0.7664754390716553, + "learning_rate": 0.00018215394414833737, + "loss": 2.7431, + "step": 3943 + }, + { + "epoch": 0.31829553708336694, + "grad_norm": 0.8250303864479065, + "learning_rate": 0.00018214494219824217, + "loss": 2.6957, + "step": 3944 + }, + { + "epoch": 0.31837624081995, + "grad_norm": 0.7425532341003418, + "learning_rate": 0.00018213593820088026, + "loss": 2.666, + "step": 3945 + }, + { + "epoch": 0.31845694455653295, + "grad_norm": 0.6943121552467346, + "learning_rate": 0.00018212693215647604, + "loss": 2.716, + "step": 3946 + }, + { + "epoch": 0.318537648293116, + "grad_norm": 0.732829213142395, + "learning_rate": 0.00018211792406525403, + "loss": 2.6557, + "step": 3947 + }, + { + "epoch": 0.31861835202969896, + "grad_norm": 0.7666537165641785, + "learning_rate": 0.00018210891392743866, + "loss": 2.7275, + "step": 3948 + }, + { + "epoch": 0.318699055766282, + "grad_norm": 0.7652621865272522, + "learning_rate": 0.00018209990174325455, + "loss": 2.6372, + "step": 3949 + }, + { + "epoch": 0.31877975950286497, + "grad_norm": 0.7416055202484131, + "learning_rate": 0.00018209088751292626, + "loss": 2.6688, + "step": 3950 + }, + { + "epoch": 0.318860463239448, + "grad_norm": 0.7504609227180481, + "learning_rate": 0.00018208187123667848, + "loss": 2.6912, + "step": 3951 + }, + { + "epoch": 0.318941166976031, + "grad_norm": 0.7308809757232666, + "learning_rate": 0.00018207285291473588, + "loss": 2.7272, + "step": 3952 + }, + { + "epoch": 0.319021870712614, + "grad_norm": 0.8031618595123291, + "learning_rate": 0.00018206383254732326, + "loss": 2.7354, + "step": 3953 + }, + { + "epoch": 0.319102574449197, + "grad_norm": 0.81386798620224, + "learning_rate": 0.00018205481013466542, + "loss": 2.676, + "step": 3954 + }, + { + "epoch": 0.31918327818578, + "grad_norm": 0.7845911383628845, + "learning_rate": 0.0001820457856769872, + "loss": 2.7094, + "step": 3955 + }, + { + "epoch": 0.319263981922363, + "grad_norm": 0.7189298272132874, + "learning_rate": 0.00018203675917451357, + "loss": 2.6764, + "step": 3956 + }, + { + "epoch": 0.319344685658946, + "grad_norm": 0.8253228664398193, + "learning_rate": 0.00018202773062746944, + "loss": 2.6805, + "step": 3957 + }, + { + "epoch": 0.319425389395529, + "grad_norm": 0.7965289950370789, + "learning_rate": 0.0001820187000360798, + "loss": 2.7148, + "step": 3958 + }, + { + "epoch": 0.31950609313211203, + "grad_norm": 0.7505398988723755, + "learning_rate": 0.0001820096674005698, + "loss": 2.6732, + "step": 3959 + }, + { + "epoch": 0.319586796868695, + "grad_norm": 0.7554877400398254, + "learning_rate": 0.0001820006327211645, + "loss": 2.7467, + "step": 3960 + }, + { + "epoch": 0.31966750060527804, + "grad_norm": 0.7836194038391113, + "learning_rate": 0.00018199159599808907, + "loss": 2.7252, + "step": 3961 + }, + { + "epoch": 0.319748204341861, + "grad_norm": 0.7967261672019958, + "learning_rate": 0.00018198255723156877, + "loss": 2.6814, + "step": 3962 + }, + { + "epoch": 0.31982890807844405, + "grad_norm": 0.7411713600158691, + "learning_rate": 0.00018197351642182882, + "loss": 2.6928, + "step": 3963 + }, + { + "epoch": 0.31990961181502703, + "grad_norm": 0.6961422562599182, + "learning_rate": 0.00018196447356909454, + "loss": 2.6651, + "step": 3964 + }, + { + "epoch": 0.31999031555161006, + "grad_norm": 0.7245771884918213, + "learning_rate": 0.00018195542867359134, + "loss": 2.6726, + "step": 3965 + }, + { + "epoch": 0.32007101928819304, + "grad_norm": 0.784654974937439, + "learning_rate": 0.00018194638173554462, + "loss": 2.6829, + "step": 3966 + }, + { + "epoch": 0.32015172302477607, + "grad_norm": 0.7373329997062683, + "learning_rate": 0.00018193733275517985, + "loss": 2.6481, + "step": 3967 + }, + { + "epoch": 0.32023242676135905, + "grad_norm": 0.7878682613372803, + "learning_rate": 0.00018192828173272258, + "loss": 2.6701, + "step": 3968 + }, + { + "epoch": 0.3203131304979421, + "grad_norm": 0.759676992893219, + "learning_rate": 0.00018191922866839835, + "loss": 2.7218, + "step": 3969 + }, + { + "epoch": 0.32039383423452505, + "grad_norm": 0.7923088669776917, + "learning_rate": 0.00018191017356243282, + "loss": 2.6841, + "step": 3970 + }, + { + "epoch": 0.3204745379711081, + "grad_norm": 0.7084882855415344, + "learning_rate": 0.00018190111641505164, + "loss": 2.7167, + "step": 3971 + }, + { + "epoch": 0.32055524170769106, + "grad_norm": 0.7166235446929932, + "learning_rate": 0.00018189205722648054, + "loss": 2.6647, + "step": 3972 + }, + { + "epoch": 0.3206359454442741, + "grad_norm": 0.7997722029685974, + "learning_rate": 0.0001818829959969453, + "loss": 2.7199, + "step": 3973 + }, + { + "epoch": 0.32071664918085707, + "grad_norm": 0.8309516310691833, + "learning_rate": 0.0001818739327266718, + "loss": 2.8006, + "step": 3974 + }, + { + "epoch": 0.3207973529174401, + "grad_norm": 0.7164002656936646, + "learning_rate": 0.00018186486741588582, + "loss": 2.6258, + "step": 3975 + }, + { + "epoch": 0.3208780566540231, + "grad_norm": 0.7715865969657898, + "learning_rate": 0.0001818558000648134, + "loss": 2.7034, + "step": 3976 + }, + { + "epoch": 0.3209587603906061, + "grad_norm": 0.7806593775749207, + "learning_rate": 0.0001818467306736804, + "loss": 2.6758, + "step": 3977 + }, + { + "epoch": 0.3210394641271891, + "grad_norm": 0.8026594519615173, + "learning_rate": 0.00018183765924271298, + "loss": 2.6976, + "step": 3978 + }, + { + "epoch": 0.32112016786377207, + "grad_norm": 0.7971245050430298, + "learning_rate": 0.00018182858577213716, + "loss": 2.7312, + "step": 3979 + }, + { + "epoch": 0.3212008716003551, + "grad_norm": 0.7347297072410583, + "learning_rate": 0.00018181951026217908, + "loss": 2.6664, + "step": 3980 + }, + { + "epoch": 0.3212815753369381, + "grad_norm": 0.7929779291152954, + "learning_rate": 0.0001818104327130649, + "loss": 2.6603, + "step": 3981 + }, + { + "epoch": 0.3213622790735211, + "grad_norm": 0.7465224862098694, + "learning_rate": 0.00018180135312502089, + "loss": 2.6566, + "step": 3982 + }, + { + "epoch": 0.3214429828101041, + "grad_norm": 0.7114695906639099, + "learning_rate": 0.00018179227149827334, + "loss": 2.6492, + "step": 3983 + }, + { + "epoch": 0.3215236865466871, + "grad_norm": 0.7179337739944458, + "learning_rate": 0.00018178318783304857, + "loss": 2.6778, + "step": 3984 + }, + { + "epoch": 0.3216043902832701, + "grad_norm": 0.7182629704475403, + "learning_rate": 0.000181774102129573, + "loss": 2.7057, + "step": 3985 + }, + { + "epoch": 0.3216850940198531, + "grad_norm": 0.7383119463920593, + "learning_rate": 0.000181765014388073, + "loss": 2.6633, + "step": 3986 + }, + { + "epoch": 0.3217657977564361, + "grad_norm": 0.7340527176856995, + "learning_rate": 0.00018175592460877512, + "loss": 2.6838, + "step": 3987 + }, + { + "epoch": 0.32184650149301913, + "grad_norm": 0.7934359312057495, + "learning_rate": 0.00018174683279190593, + "loss": 2.6795, + "step": 3988 + }, + { + "epoch": 0.3219272052296021, + "grad_norm": 0.6960840821266174, + "learning_rate": 0.00018173773893769192, + "loss": 2.6669, + "step": 3989 + }, + { + "epoch": 0.32200790896618514, + "grad_norm": 0.7513574361801147, + "learning_rate": 0.00018172864304635985, + "loss": 2.6744, + "step": 3990 + }, + { + "epoch": 0.3220886127027681, + "grad_norm": 0.7516636848449707, + "learning_rate": 0.00018171954511813629, + "loss": 2.6652, + "step": 3991 + }, + { + "epoch": 0.32216931643935115, + "grad_norm": 0.7817716002464294, + "learning_rate": 0.00018171044515324808, + "loss": 2.6671, + "step": 3992 + }, + { + "epoch": 0.3222500201759341, + "grad_norm": 0.6859925389289856, + "learning_rate": 0.000181701343151922, + "loss": 2.6984, + "step": 3993 + }, + { + "epoch": 0.32233072391251716, + "grad_norm": 0.7669627666473389, + "learning_rate": 0.00018169223911438485, + "loss": 2.7102, + "step": 3994 + }, + { + "epoch": 0.32241142764910014, + "grad_norm": 0.784724235534668, + "learning_rate": 0.00018168313304086357, + "loss": 2.7413, + "step": 3995 + }, + { + "epoch": 0.32249213138568317, + "grad_norm": 0.7341497540473938, + "learning_rate": 0.00018167402493158509, + "loss": 2.706, + "step": 3996 + }, + { + "epoch": 0.32257283512226614, + "grad_norm": 0.7975730299949646, + "learning_rate": 0.00018166491478677641, + "loss": 2.6896, + "step": 3997 + }, + { + "epoch": 0.3226535388588492, + "grad_norm": 0.8138537406921387, + "learning_rate": 0.00018165580260666458, + "loss": 2.6986, + "step": 3998 + }, + { + "epoch": 0.32273424259543215, + "grad_norm": 0.6734997034072876, + "learning_rate": 0.0001816466883914767, + "loss": 2.6686, + "step": 3999 + }, + { + "epoch": 0.3228149463320152, + "grad_norm": 0.7742779850959778, + "learning_rate": 0.00018163757214143992, + "loss": 2.7222, + "step": 4000 + }, + { + "epoch": 0.3228149463320152, + "eval_loss": 2.615234375, + "eval_runtime": 783.0394, + "eval_samples_per_second": 3.346, + "eval_steps_per_second": 0.558, + "step": 4000 + }, + { + "epoch": 0.32289565006859816, + "grad_norm": 0.7654715180397034, + "learning_rate": 0.00018162845385678145, + "loss": 2.7016, + "step": 4001 + }, + { + "epoch": 0.3229763538051812, + "grad_norm": 0.8698763251304626, + "learning_rate": 0.0001816193335377285, + "loss": 2.6709, + "step": 4002 + }, + { + "epoch": 0.32305705754176417, + "grad_norm": 0.758056640625, + "learning_rate": 0.00018161021118450843, + "loss": 2.7277, + "step": 4003 + }, + { + "epoch": 0.3231377612783472, + "grad_norm": 0.7462654113769531, + "learning_rate": 0.00018160108679734856, + "loss": 2.623, + "step": 4004 + }, + { + "epoch": 0.3232184650149302, + "grad_norm": 0.7274953722953796, + "learning_rate": 0.00018159196037647628, + "loss": 2.6875, + "step": 4005 + }, + { + "epoch": 0.3232991687515132, + "grad_norm": 0.7737346887588501, + "learning_rate": 0.0001815828319221191, + "loss": 2.6967, + "step": 4006 + }, + { + "epoch": 0.3233798724880962, + "grad_norm": 0.7793172001838684, + "learning_rate": 0.00018157370143450448, + "loss": 2.724, + "step": 4007 + }, + { + "epoch": 0.3234605762246792, + "grad_norm": 0.7791805863380432, + "learning_rate": 0.00018156456891385995, + "loss": 2.6653, + "step": 4008 + }, + { + "epoch": 0.3235412799612622, + "grad_norm": 0.7225624918937683, + "learning_rate": 0.0001815554343604132, + "loss": 2.745, + "step": 4009 + }, + { + "epoch": 0.32362198369784523, + "grad_norm": 0.6958494782447815, + "learning_rate": 0.0001815462977743918, + "loss": 2.6856, + "step": 4010 + }, + { + "epoch": 0.3237026874344282, + "grad_norm": 0.7572030425071716, + "learning_rate": 0.0001815371591560235, + "loss": 2.7053, + "step": 4011 + }, + { + "epoch": 0.32378339117101124, + "grad_norm": 0.7133952975273132, + "learning_rate": 0.00018152801850553605, + "loss": 2.6984, + "step": 4012 + }, + { + "epoch": 0.3238640949075942, + "grad_norm": 0.7598705291748047, + "learning_rate": 0.00018151887582315728, + "loss": 2.6632, + "step": 4013 + }, + { + "epoch": 0.32394479864417725, + "grad_norm": 0.7670698165893555, + "learning_rate": 0.00018150973110911503, + "loss": 2.7035, + "step": 4014 + }, + { + "epoch": 0.3240255023807602, + "grad_norm": 0.7547060251235962, + "learning_rate": 0.00018150058436363723, + "loss": 2.6531, + "step": 4015 + }, + { + "epoch": 0.32410620611734325, + "grad_norm": 0.7943035364151001, + "learning_rate": 0.00018149143558695178, + "loss": 2.766, + "step": 4016 + }, + { + "epoch": 0.32418690985392623, + "grad_norm": 0.864356517791748, + "learning_rate": 0.00018148228477928675, + "loss": 2.7134, + "step": 4017 + }, + { + "epoch": 0.32426761359050926, + "grad_norm": 0.7773902416229248, + "learning_rate": 0.00018147313194087018, + "loss": 2.6948, + "step": 4018 + }, + { + "epoch": 0.32434831732709224, + "grad_norm": 0.839131772518158, + "learning_rate": 0.0001814639770719302, + "loss": 2.7393, + "step": 4019 + }, + { + "epoch": 0.32442902106367527, + "grad_norm": 0.807837963104248, + "learning_rate": 0.00018145482017269498, + "loss": 2.7835, + "step": 4020 + }, + { + "epoch": 0.32450972480025825, + "grad_norm": 0.7133228182792664, + "learning_rate": 0.00018144566124339272, + "loss": 2.6859, + "step": 4021 + }, + { + "epoch": 0.3245904285368413, + "grad_norm": 0.8450621962547302, + "learning_rate": 0.00018143650028425162, + "loss": 2.7548, + "step": 4022 + }, + { + "epoch": 0.32467113227342426, + "grad_norm": 0.8594980835914612, + "learning_rate": 0.00018142733729550013, + "loss": 2.6636, + "step": 4023 + }, + { + "epoch": 0.3247518360100073, + "grad_norm": 0.7134621739387512, + "learning_rate": 0.0001814181722773665, + "loss": 2.6501, + "step": 4024 + }, + { + "epoch": 0.32483253974659027, + "grad_norm": 0.8630430698394775, + "learning_rate": 0.0001814090052300792, + "loss": 2.6994, + "step": 4025 + }, + { + "epoch": 0.3249132434831733, + "grad_norm": 0.7044873237609863, + "learning_rate": 0.00018139983615386666, + "loss": 2.6603, + "step": 4026 + }, + { + "epoch": 0.3249939472197563, + "grad_norm": 0.6896052360534668, + "learning_rate": 0.00018139066504895744, + "loss": 2.6649, + "step": 4027 + }, + { + "epoch": 0.3250746509563393, + "grad_norm": 0.802855372428894, + "learning_rate": 0.00018138149191558012, + "loss": 2.7067, + "step": 4028 + }, + { + "epoch": 0.3251553546929223, + "grad_norm": 0.7555437088012695, + "learning_rate": 0.00018137231675396324, + "loss": 2.6471, + "step": 4029 + }, + { + "epoch": 0.32523605842950526, + "grad_norm": 0.6846967339515686, + "learning_rate": 0.00018136313956433552, + "loss": 2.6774, + "step": 4030 + }, + { + "epoch": 0.3253167621660883, + "grad_norm": 0.7435858249664307, + "learning_rate": 0.0001813539603469257, + "loss": 2.7135, + "step": 4031 + }, + { + "epoch": 0.32539746590267127, + "grad_norm": 0.7669098377227783, + "learning_rate": 0.00018134477910196253, + "loss": 2.7014, + "step": 4032 + }, + { + "epoch": 0.3254781696392543, + "grad_norm": 0.7797521352767944, + "learning_rate": 0.00018133559582967482, + "loss": 2.7229, + "step": 4033 + }, + { + "epoch": 0.3255588733758373, + "grad_norm": 0.7377886176109314, + "learning_rate": 0.00018132641053029142, + "loss": 2.7196, + "step": 4034 + }, + { + "epoch": 0.3256395771124203, + "grad_norm": 0.7387986779212952, + "learning_rate": 0.0001813172232040413, + "loss": 2.687, + "step": 4035 + }, + { + "epoch": 0.3257202808490033, + "grad_norm": 0.7276624441146851, + "learning_rate": 0.0001813080338511534, + "loss": 2.6954, + "step": 4036 + }, + { + "epoch": 0.3258009845855863, + "grad_norm": 0.7929670214653015, + "learning_rate": 0.00018129884247185683, + "loss": 2.7431, + "step": 4037 + }, + { + "epoch": 0.3258816883221693, + "grad_norm": 0.7896441221237183, + "learning_rate": 0.0001812896490663805, + "loss": 2.6823, + "step": 4038 + }, + { + "epoch": 0.3259623920587523, + "grad_norm": 0.8642957210540771, + "learning_rate": 0.00018128045363495368, + "loss": 2.7334, + "step": 4039 + }, + { + "epoch": 0.3260430957953353, + "grad_norm": 0.7156081795692444, + "learning_rate": 0.00018127125617780542, + "loss": 2.6886, + "step": 4040 + }, + { + "epoch": 0.32612379953191833, + "grad_norm": 0.8260853290557861, + "learning_rate": 0.00018126205669516507, + "loss": 2.6802, + "step": 4041 + }, + { + "epoch": 0.3262045032685013, + "grad_norm": 0.6853542327880859, + "learning_rate": 0.00018125285518726182, + "loss": 2.6392, + "step": 4042 + }, + { + "epoch": 0.32628520700508434, + "grad_norm": 0.7574017643928528, + "learning_rate": 0.00018124365165432505, + "loss": 2.7412, + "step": 4043 + }, + { + "epoch": 0.3263659107416673, + "grad_norm": 0.8656191825866699, + "learning_rate": 0.00018123444609658408, + "loss": 2.6903, + "step": 4044 + }, + { + "epoch": 0.32644661447825035, + "grad_norm": 0.7443257570266724, + "learning_rate": 0.00018122523851426837, + "loss": 2.682, + "step": 4045 + }, + { + "epoch": 0.32652731821483333, + "grad_norm": 0.7222229242324829, + "learning_rate": 0.0001812160289076074, + "loss": 2.6196, + "step": 4046 + }, + { + "epoch": 0.32660802195141636, + "grad_norm": 0.8531985878944397, + "learning_rate": 0.00018120681727683066, + "loss": 2.6777, + "step": 4047 + }, + { + "epoch": 0.32668872568799934, + "grad_norm": 0.7380290627479553, + "learning_rate": 0.0001811976036221678, + "loss": 2.6847, + "step": 4048 + }, + { + "epoch": 0.32676942942458237, + "grad_norm": 0.7250707149505615, + "learning_rate": 0.00018118838794384837, + "loss": 2.6846, + "step": 4049 + }, + { + "epoch": 0.32685013316116535, + "grad_norm": 0.763504147529602, + "learning_rate": 0.00018117917024210208, + "loss": 2.69, + "step": 4050 + }, + { + "epoch": 0.3269308368977484, + "grad_norm": 0.7740737795829773, + "learning_rate": 0.00018116995051715867, + "loss": 2.6945, + "step": 4051 + }, + { + "epoch": 0.32701154063433135, + "grad_norm": 0.7777624726295471, + "learning_rate": 0.00018116072876924792, + "loss": 2.6918, + "step": 4052 + }, + { + "epoch": 0.3270922443709144, + "grad_norm": 0.7957910895347595, + "learning_rate": 0.0001811515049985997, + "loss": 2.7237, + "step": 4053 + }, + { + "epoch": 0.32717294810749736, + "grad_norm": 0.7828991413116455, + "learning_rate": 0.00018114227920544375, + "loss": 2.7008, + "step": 4054 + }, + { + "epoch": 0.3272536518440804, + "grad_norm": 0.6695161461830139, + "learning_rate": 0.00018113305139001016, + "loss": 2.7311, + "step": 4055 + }, + { + "epoch": 0.32733435558066337, + "grad_norm": 0.7693436145782471, + "learning_rate": 0.00018112382155252883, + "loss": 2.7102, + "step": 4056 + }, + { + "epoch": 0.3274150593172464, + "grad_norm": 0.7520042657852173, + "learning_rate": 0.0001811145896932298, + "loss": 2.6455, + "step": 4057 + }, + { + "epoch": 0.3274957630538294, + "grad_norm": 0.786834716796875, + "learning_rate": 0.00018110535581234317, + "loss": 2.6965, + "step": 4058 + }, + { + "epoch": 0.3275764667904124, + "grad_norm": 0.742001473903656, + "learning_rate": 0.00018109611991009905, + "loss": 2.7341, + "step": 4059 + }, + { + "epoch": 0.3276571705269954, + "grad_norm": 0.813522219657898, + "learning_rate": 0.00018108688198672766, + "loss": 2.8116, + "step": 4060 + }, + { + "epoch": 0.3277378742635784, + "grad_norm": 0.7611314058303833, + "learning_rate": 0.00018107764204245916, + "loss": 2.6741, + "step": 4061 + }, + { + "epoch": 0.3278185780001614, + "grad_norm": 0.7285993695259094, + "learning_rate": 0.00018106840007752392, + "loss": 2.671, + "step": 4062 + }, + { + "epoch": 0.32789928173674443, + "grad_norm": 0.773151695728302, + "learning_rate": 0.0001810591560921522, + "loss": 2.7106, + "step": 4063 + }, + { + "epoch": 0.3279799854733274, + "grad_norm": 0.7448920011520386, + "learning_rate": 0.00018104991008657445, + "loss": 2.7176, + "step": 4064 + }, + { + "epoch": 0.32806068920991044, + "grad_norm": 0.7088467478752136, + "learning_rate": 0.0001810406620610211, + "loss": 2.7085, + "step": 4065 + }, + { + "epoch": 0.3281413929464934, + "grad_norm": 0.7507789731025696, + "learning_rate": 0.00018103141201572255, + "loss": 2.7361, + "step": 4066 + }, + { + "epoch": 0.32822209668307645, + "grad_norm": 0.7065643072128296, + "learning_rate": 0.00018102215995090943, + "loss": 2.6573, + "step": 4067 + }, + { + "epoch": 0.3283028004196594, + "grad_norm": 0.6888713836669922, + "learning_rate": 0.0001810129058668123, + "loss": 2.6699, + "step": 4068 + }, + { + "epoch": 0.32838350415624246, + "grad_norm": 0.736347496509552, + "learning_rate": 0.00018100364976366174, + "loss": 2.7089, + "step": 4069 + }, + { + "epoch": 0.32846420789282543, + "grad_norm": 0.6854562759399414, + "learning_rate": 0.0001809943916416885, + "loss": 2.7051, + "step": 4070 + }, + { + "epoch": 0.32854491162940846, + "grad_norm": 0.7481048107147217, + "learning_rate": 0.0001809851315011233, + "loss": 2.7428, + "step": 4071 + }, + { + "epoch": 0.32862561536599144, + "grad_norm": 0.7600961923599243, + "learning_rate": 0.0001809758693421969, + "loss": 2.7153, + "step": 4072 + }, + { + "epoch": 0.3287063191025745, + "grad_norm": 0.7545063495635986, + "learning_rate": 0.00018096660516514024, + "loss": 2.6736, + "step": 4073 + }, + { + "epoch": 0.32878702283915745, + "grad_norm": 0.7967175841331482, + "learning_rate": 0.0001809573389701841, + "loss": 2.6711, + "step": 4074 + }, + { + "epoch": 0.3288677265757405, + "grad_norm": 0.7115446925163269, + "learning_rate": 0.00018094807075755943, + "loss": 2.6761, + "step": 4075 + }, + { + "epoch": 0.32894843031232346, + "grad_norm": 0.8230876326560974, + "learning_rate": 0.00018093880052749725, + "loss": 2.6749, + "step": 4076 + }, + { + "epoch": 0.3290291340489065, + "grad_norm": 0.8549706935882568, + "learning_rate": 0.00018092952828022856, + "loss": 2.7084, + "step": 4077 + }, + { + "epoch": 0.32910983778548947, + "grad_norm": 0.7379534244537354, + "learning_rate": 0.00018092025401598448, + "loss": 2.7241, + "step": 4078 + }, + { + "epoch": 0.3291905415220725, + "grad_norm": 0.7659998536109924, + "learning_rate": 0.00018091097773499616, + "loss": 2.7108, + "step": 4079 + }, + { + "epoch": 0.3292712452586555, + "grad_norm": 0.8074536323547363, + "learning_rate": 0.00018090169943749476, + "loss": 2.676, + "step": 4080 + }, + { + "epoch": 0.32935194899523845, + "grad_norm": 0.7588536143302917, + "learning_rate": 0.00018089241912371153, + "loss": 2.639, + "step": 4081 + }, + { + "epoch": 0.3294326527318215, + "grad_norm": 0.7510811686515808, + "learning_rate": 0.00018088313679387775, + "loss": 2.6722, + "step": 4082 + }, + { + "epoch": 0.32951335646840446, + "grad_norm": 0.7538900971412659, + "learning_rate": 0.0001808738524482248, + "loss": 2.6917, + "step": 4083 + }, + { + "epoch": 0.3295940602049875, + "grad_norm": 0.8071155548095703, + "learning_rate": 0.00018086456608698402, + "loss": 2.6964, + "step": 4084 + }, + { + "epoch": 0.32967476394157047, + "grad_norm": 0.7778098583221436, + "learning_rate": 0.00018085527771038686, + "loss": 2.7301, + "step": 4085 + }, + { + "epoch": 0.3297554676781535, + "grad_norm": 0.7717564702033997, + "learning_rate": 0.00018084598731866485, + "loss": 2.7484, + "step": 4086 + }, + { + "epoch": 0.3298361714147365, + "grad_norm": 0.7361736297607422, + "learning_rate": 0.00018083669491204948, + "loss": 2.6299, + "step": 4087 + }, + { + "epoch": 0.3299168751513195, + "grad_norm": 0.736681342124939, + "learning_rate": 0.00018082740049077238, + "loss": 2.7521, + "step": 4088 + }, + { + "epoch": 0.3299975788879025, + "grad_norm": 0.8011857867240906, + "learning_rate": 0.00018081810405506517, + "loss": 2.724, + "step": 4089 + }, + { + "epoch": 0.3300782826244855, + "grad_norm": 0.7741932272911072, + "learning_rate": 0.00018080880560515956, + "loss": 2.6766, + "step": 4090 + }, + { + "epoch": 0.3301589863610685, + "grad_norm": 0.7321778535842896, + "learning_rate": 0.00018079950514128724, + "loss": 2.6614, + "step": 4091 + }, + { + "epoch": 0.33023969009765153, + "grad_norm": 0.7916514277458191, + "learning_rate": 0.00018079020266368006, + "loss": 2.7177, + "step": 4092 + }, + { + "epoch": 0.3303203938342345, + "grad_norm": 0.7961388826370239, + "learning_rate": 0.00018078089817256986, + "loss": 2.6671, + "step": 4093 + }, + { + "epoch": 0.33040109757081754, + "grad_norm": 0.7167038321495056, + "learning_rate": 0.0001807715916681885, + "loss": 2.6989, + "step": 4094 + }, + { + "epoch": 0.3304818013074005, + "grad_norm": 0.6924864649772644, + "learning_rate": 0.00018076228315076794, + "loss": 2.6484, + "step": 4095 + }, + { + "epoch": 0.33056250504398355, + "grad_norm": 0.777881383895874, + "learning_rate": 0.00018075297262054013, + "loss": 2.6498, + "step": 4096 + }, + { + "epoch": 0.3306432087805665, + "grad_norm": 0.7878376841545105, + "learning_rate": 0.0001807436600777372, + "loss": 2.7745, + "step": 4097 + }, + { + "epoch": 0.33072391251714955, + "grad_norm": 0.8418465256690979, + "learning_rate": 0.0001807343455225912, + "loss": 2.7195, + "step": 4098 + }, + { + "epoch": 0.33080461625373253, + "grad_norm": 0.7780830264091492, + "learning_rate": 0.00018072502895533424, + "loss": 2.6652, + "step": 4099 + }, + { + "epoch": 0.33088531999031556, + "grad_norm": 0.7102445960044861, + "learning_rate": 0.00018071571037619853, + "loss": 2.6618, + "step": 4100 + }, + { + "epoch": 0.33096602372689854, + "grad_norm": 0.7028098106384277, + "learning_rate": 0.00018070638978541633, + "loss": 2.7114, + "step": 4101 + }, + { + "epoch": 0.33104672746348157, + "grad_norm": 0.7529525756835938, + "learning_rate": 0.00018069706718321996, + "loss": 2.7231, + "step": 4102 + }, + { + "epoch": 0.33112743120006455, + "grad_norm": 0.7404564023017883, + "learning_rate": 0.0001806877425698417, + "loss": 2.6564, + "step": 4103 + }, + { + "epoch": 0.3312081349366476, + "grad_norm": 0.7725130319595337, + "learning_rate": 0.00018067841594551401, + "loss": 2.677, + "step": 4104 + }, + { + "epoch": 0.33128883867323056, + "grad_norm": 0.7616425156593323, + "learning_rate": 0.00018066908731046927, + "loss": 2.6586, + "step": 4105 + }, + { + "epoch": 0.3313695424098136, + "grad_norm": 0.7318183779716492, + "learning_rate": 0.00018065975666494002, + "loss": 2.6624, + "step": 4106 + }, + { + "epoch": 0.33145024614639657, + "grad_norm": 0.7012802958488464, + "learning_rate": 0.00018065042400915878, + "loss": 2.6663, + "step": 4107 + }, + { + "epoch": 0.3315309498829796, + "grad_norm": 0.815226674079895, + "learning_rate": 0.00018064108934335814, + "loss": 2.7248, + "step": 4108 + }, + { + "epoch": 0.3316116536195626, + "grad_norm": 0.68972247838974, + "learning_rate": 0.00018063175266777077, + "loss": 2.6961, + "step": 4109 + }, + { + "epoch": 0.3316923573561456, + "grad_norm": 0.7563794255256653, + "learning_rate": 0.00018062241398262937, + "loss": 2.6526, + "step": 4110 + }, + { + "epoch": 0.3317730610927286, + "grad_norm": 0.7878836989402771, + "learning_rate": 0.00018061307328816662, + "loss": 2.7316, + "step": 4111 + }, + { + "epoch": 0.3318537648293116, + "grad_norm": 0.7189129590988159, + "learning_rate": 0.00018060373058461537, + "loss": 2.6577, + "step": 4112 + }, + { + "epoch": 0.3319344685658946, + "grad_norm": 0.7517561912536621, + "learning_rate": 0.00018059438587220847, + "loss": 2.668, + "step": 4113 + }, + { + "epoch": 0.3320151723024776, + "grad_norm": 0.7602595686912537, + "learning_rate": 0.00018058503915117878, + "loss": 2.6741, + "step": 4114 + }, + { + "epoch": 0.3320958760390606, + "grad_norm": 0.7702187299728394, + "learning_rate": 0.00018057569042175927, + "loss": 2.7082, + "step": 4115 + }, + { + "epoch": 0.33217657977564363, + "grad_norm": 0.7289660573005676, + "learning_rate": 0.00018056633968418294, + "loss": 2.6728, + "step": 4116 + }, + { + "epoch": 0.3322572835122266, + "grad_norm": 0.6936683654785156, + "learning_rate": 0.0001805569869386828, + "loss": 2.6735, + "step": 4117 + }, + { + "epoch": 0.33233798724880964, + "grad_norm": 0.7128138542175293, + "learning_rate": 0.000180547632185492, + "loss": 2.646, + "step": 4118 + }, + { + "epoch": 0.3324186909853926, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00018053827542484363, + "loss": 2.6497, + "step": 4119 + }, + { + "epoch": 0.33249939472197565, + "grad_norm": 0.7084202170372009, + "learning_rate": 0.0001805289166569709, + "loss": 2.6328, + "step": 4120 + }, + { + "epoch": 0.3325800984585586, + "grad_norm": 0.8068051934242249, + "learning_rate": 0.00018051955588210708, + "loss": 2.6576, + "step": 4121 + }, + { + "epoch": 0.33266080219514166, + "grad_norm": 0.787680447101593, + "learning_rate": 0.00018051019310048544, + "loss": 2.7091, + "step": 4122 + }, + { + "epoch": 0.33274150593172463, + "grad_norm": 0.698946475982666, + "learning_rate": 0.00018050082831233931, + "loss": 2.6657, + "step": 4123 + }, + { + "epoch": 0.33282220966830767, + "grad_norm": 0.7946122288703918, + "learning_rate": 0.00018049146151790215, + "loss": 2.6981, + "step": 4124 + }, + { + "epoch": 0.33290291340489064, + "grad_norm": 0.8025123476982117, + "learning_rate": 0.00018048209271740736, + "loss": 2.6878, + "step": 4125 + }, + { + "epoch": 0.3329836171414737, + "grad_norm": 0.7493376135826111, + "learning_rate": 0.0001804727219110884, + "loss": 2.6556, + "step": 4126 + }, + { + "epoch": 0.33306432087805665, + "grad_norm": 0.7143186926841736, + "learning_rate": 0.00018046334909917886, + "loss": 2.6879, + "step": 4127 + }, + { + "epoch": 0.3331450246146397, + "grad_norm": 0.7375641465187073, + "learning_rate": 0.00018045397428191235, + "loss": 2.6817, + "step": 4128 + }, + { + "epoch": 0.33322572835122266, + "grad_norm": 0.7201291918754578, + "learning_rate": 0.00018044459745952248, + "loss": 2.6765, + "step": 4129 + }, + { + "epoch": 0.3333064320878057, + "grad_norm": 0.7924519777297974, + "learning_rate": 0.00018043521863224296, + "loss": 2.7748, + "step": 4130 + }, + { + "epoch": 0.33338713582438867, + "grad_norm": 0.7773354053497314, + "learning_rate": 0.00018042583780030752, + "loss": 2.6839, + "step": 4131 + }, + { + "epoch": 0.33346783956097165, + "grad_norm": 0.7527397274971008, + "learning_rate": 0.00018041645496394998, + "loss": 2.6749, + "step": 4132 + }, + { + "epoch": 0.3335485432975547, + "grad_norm": 0.7329208254814148, + "learning_rate": 0.00018040707012340418, + "loss": 2.7535, + "step": 4133 + }, + { + "epoch": 0.33362924703413765, + "grad_norm": 0.7637773752212524, + "learning_rate": 0.00018039768327890397, + "loss": 2.632, + "step": 4134 + }, + { + "epoch": 0.3337099507707207, + "grad_norm": 0.823623776435852, + "learning_rate": 0.00018038829443068333, + "loss": 2.7122, + "step": 4135 + }, + { + "epoch": 0.33379065450730366, + "grad_norm": 0.8040826916694641, + "learning_rate": 0.00018037890357897632, + "loss": 2.7197, + "step": 4136 + }, + { + "epoch": 0.3338713582438867, + "grad_norm": 0.7483998537063599, + "learning_rate": 0.00018036951072401686, + "loss": 2.6535, + "step": 4137 + }, + { + "epoch": 0.33395206198046967, + "grad_norm": 0.8141106367111206, + "learning_rate": 0.00018036011586603914, + "loss": 2.7127, + "step": 4138 + }, + { + "epoch": 0.3340327657170527, + "grad_norm": 0.7226041555404663, + "learning_rate": 0.00018035071900527724, + "loss": 2.6846, + "step": 4139 + }, + { + "epoch": 0.3341134694536357, + "grad_norm": 0.7624794840812683, + "learning_rate": 0.00018034132014196541, + "loss": 2.6725, + "step": 4140 + }, + { + "epoch": 0.3341941731902187, + "grad_norm": 0.7299962043762207, + "learning_rate": 0.00018033191927633785, + "loss": 2.6728, + "step": 4141 + }, + { + "epoch": 0.3342748769268017, + "grad_norm": 0.7920462489128113, + "learning_rate": 0.0001803225164086289, + "loss": 2.6544, + "step": 4142 + }, + { + "epoch": 0.3343555806633847, + "grad_norm": 0.7469778656959534, + "learning_rate": 0.00018031311153907282, + "loss": 2.7356, + "step": 4143 + }, + { + "epoch": 0.3344362843999677, + "grad_norm": 0.8831696510314941, + "learning_rate": 0.0001803037046679041, + "loss": 2.6584, + "step": 4144 + }, + { + "epoch": 0.33451698813655073, + "grad_norm": 0.8047679662704468, + "learning_rate": 0.00018029429579535715, + "loss": 2.6213, + "step": 4145 + }, + { + "epoch": 0.3345976918731337, + "grad_norm": 0.7109517455101013, + "learning_rate": 0.00018028488492166645, + "loss": 2.6622, + "step": 4146 + }, + { + "epoch": 0.33467839560971674, + "grad_norm": 0.7240141034126282, + "learning_rate": 0.0001802754720470665, + "loss": 2.6794, + "step": 4147 + }, + { + "epoch": 0.3347590993462997, + "grad_norm": 0.7292990684509277, + "learning_rate": 0.000180266057171792, + "loss": 2.6079, + "step": 4148 + }, + { + "epoch": 0.33483980308288275, + "grad_norm": 0.8055328130722046, + "learning_rate": 0.00018025664029607756, + "loss": 2.7044, + "step": 4149 + }, + { + "epoch": 0.3349205068194657, + "grad_norm": 0.8348979949951172, + "learning_rate": 0.00018024722142015781, + "loss": 2.6757, + "step": 4150 + }, + { + "epoch": 0.33500121055604876, + "grad_norm": 0.7797044515609741, + "learning_rate": 0.00018023780054426754, + "loss": 2.7125, + "step": 4151 + }, + { + "epoch": 0.33508191429263173, + "grad_norm": 0.802442729473114, + "learning_rate": 0.00018022837766864153, + "loss": 2.7121, + "step": 4152 + }, + { + "epoch": 0.33516261802921476, + "grad_norm": 0.7248829007148743, + "learning_rate": 0.00018021895279351463, + "loss": 2.7344, + "step": 4153 + }, + { + "epoch": 0.33524332176579774, + "grad_norm": 0.7458582520484924, + "learning_rate": 0.00018020952591912175, + "loss": 2.665, + "step": 4154 + }, + { + "epoch": 0.3353240255023808, + "grad_norm": 0.8153703808784485, + "learning_rate": 0.0001802000970456978, + "loss": 2.7416, + "step": 4155 + }, + { + "epoch": 0.33540472923896375, + "grad_norm": 0.7583708763122559, + "learning_rate": 0.00018019066617347779, + "loss": 2.7002, + "step": 4156 + }, + { + "epoch": 0.3354854329755468, + "grad_norm": 0.7522469162940979, + "learning_rate": 0.00018018123330269678, + "loss": 2.7196, + "step": 4157 + }, + { + "epoch": 0.33556613671212976, + "grad_norm": 0.7386923432350159, + "learning_rate": 0.00018017179843358983, + "loss": 2.6947, + "step": 4158 + }, + { + "epoch": 0.3356468404487128, + "grad_norm": 0.7366231083869934, + "learning_rate": 0.00018016236156639205, + "loss": 2.7377, + "step": 4159 + }, + { + "epoch": 0.33572754418529577, + "grad_norm": 0.7727232575416565, + "learning_rate": 0.00018015292270133872, + "loss": 2.7566, + "step": 4160 + }, + { + "epoch": 0.3358082479218788, + "grad_norm": 0.6781843304634094, + "learning_rate": 0.000180143481838665, + "loss": 2.6796, + "step": 4161 + }, + { + "epoch": 0.3358889516584618, + "grad_norm": 0.7036039233207703, + "learning_rate": 0.00018013403897860624, + "loss": 2.7012, + "step": 4162 + }, + { + "epoch": 0.3359696553950448, + "grad_norm": 0.8252625465393066, + "learning_rate": 0.00018012459412139776, + "loss": 2.6613, + "step": 4163 + }, + { + "epoch": 0.3360503591316278, + "grad_norm": 0.6924486756324768, + "learning_rate": 0.00018011514726727493, + "loss": 2.6425, + "step": 4164 + }, + { + "epoch": 0.3361310628682108, + "grad_norm": 0.7735962271690369, + "learning_rate": 0.0001801056984164732, + "loss": 2.7235, + "step": 4165 + }, + { + "epoch": 0.3362117666047938, + "grad_norm": 0.7439951300621033, + "learning_rate": 0.0001800962475692281, + "loss": 2.7428, + "step": 4166 + }, + { + "epoch": 0.3362924703413768, + "grad_norm": 0.6830539107322693, + "learning_rate": 0.0001800867947257751, + "loss": 2.5907, + "step": 4167 + }, + { + "epoch": 0.3363731740779598, + "grad_norm": 0.8355144262313843, + "learning_rate": 0.00018007733988634986, + "loss": 2.6978, + "step": 4168 + }, + { + "epoch": 0.33645387781454283, + "grad_norm": 0.6880978941917419, + "learning_rate": 0.00018006788305118798, + "loss": 2.6934, + "step": 4169 + }, + { + "epoch": 0.3365345815511258, + "grad_norm": 0.762709379196167, + "learning_rate": 0.0001800584242205251, + "loss": 2.684, + "step": 4170 + }, + { + "epoch": 0.33661528528770884, + "grad_norm": 0.7543070912361145, + "learning_rate": 0.0001800489633945971, + "loss": 2.6857, + "step": 4171 + }, + { + "epoch": 0.3366959890242918, + "grad_norm": 0.787651777267456, + "learning_rate": 0.00018003950057363964, + "loss": 2.6979, + "step": 4172 + }, + { + "epoch": 0.33677669276087485, + "grad_norm": 0.7831481099128723, + "learning_rate": 0.00018003003575788856, + "loss": 2.7158, + "step": 4173 + }, + { + "epoch": 0.33685739649745783, + "grad_norm": 0.844904363155365, + "learning_rate": 0.00018002056894757986, + "loss": 2.6459, + "step": 4174 + }, + { + "epoch": 0.33693810023404086, + "grad_norm": 0.7529420852661133, + "learning_rate": 0.00018001110014294937, + "loss": 2.685, + "step": 4175 + }, + { + "epoch": 0.33701880397062384, + "grad_norm": 0.776719868183136, + "learning_rate": 0.0001800016293442331, + "loss": 2.6353, + "step": 4176 + }, + { + "epoch": 0.33709950770720687, + "grad_norm": 0.7988671660423279, + "learning_rate": 0.00017999215655166716, + "loss": 2.7241, + "step": 4177 + }, + { + "epoch": 0.33718021144378985, + "grad_norm": 0.7190617918968201, + "learning_rate": 0.00017998268176548752, + "loss": 2.7278, + "step": 4178 + }, + { + "epoch": 0.3372609151803729, + "grad_norm": 0.8337060809135437, + "learning_rate": 0.0001799732049859304, + "loss": 2.7059, + "step": 4179 + }, + { + "epoch": 0.33734161891695585, + "grad_norm": 0.7547435164451599, + "learning_rate": 0.0001799637262132319, + "loss": 2.7782, + "step": 4180 + }, + { + "epoch": 0.3374223226535389, + "grad_norm": 0.8067883253097534, + "learning_rate": 0.0001799542454476284, + "loss": 2.7978, + "step": 4181 + }, + { + "epoch": 0.33750302639012186, + "grad_norm": 0.7451581358909607, + "learning_rate": 0.00017994476268935609, + "loss": 2.6931, + "step": 4182 + }, + { + "epoch": 0.33758373012670484, + "grad_norm": 0.7521898746490479, + "learning_rate": 0.00017993527793865125, + "loss": 2.6939, + "step": 4183 + }, + { + "epoch": 0.33766443386328787, + "grad_norm": 0.7608996033668518, + "learning_rate": 0.0001799257911957504, + "loss": 2.715, + "step": 4184 + }, + { + "epoch": 0.33774513759987085, + "grad_norm": 0.7459948658943176, + "learning_rate": 0.00017991630246088987, + "loss": 2.6951, + "step": 4185 + }, + { + "epoch": 0.3378258413364539, + "grad_norm": 0.7549717426300049, + "learning_rate": 0.00017990681173430618, + "loss": 2.7353, + "step": 4186 + }, + { + "epoch": 0.33790654507303686, + "grad_norm": 0.7234344482421875, + "learning_rate": 0.0001798973190162359, + "loss": 2.6491, + "step": 4187 + }, + { + "epoch": 0.3379872488096199, + "grad_norm": 0.7652330994606018, + "learning_rate": 0.00017988782430691553, + "loss": 2.765, + "step": 4188 + }, + { + "epoch": 0.33806795254620287, + "grad_norm": 0.742953360080719, + "learning_rate": 0.00017987832760658177, + "loss": 2.7079, + "step": 4189 + }, + { + "epoch": 0.3381486562827859, + "grad_norm": 0.7440767288208008, + "learning_rate": 0.00017986882891547125, + "loss": 2.6751, + "step": 4190 + }, + { + "epoch": 0.3382293600193689, + "grad_norm": 0.7141925096511841, + "learning_rate": 0.00017985932823382078, + "loss": 2.6249, + "step": 4191 + }, + { + "epoch": 0.3383100637559519, + "grad_norm": 0.7200489044189453, + "learning_rate": 0.00017984982556186707, + "loss": 2.6811, + "step": 4192 + }, + { + "epoch": 0.3383907674925349, + "grad_norm": 0.7677409648895264, + "learning_rate": 0.00017984032089984696, + "loss": 2.6641, + "step": 4193 + }, + { + "epoch": 0.3384714712291179, + "grad_norm": 0.7386545538902283, + "learning_rate": 0.00017983081424799741, + "loss": 2.6504, + "step": 4194 + }, + { + "epoch": 0.3385521749657009, + "grad_norm": 0.7528583407402039, + "learning_rate": 0.00017982130560655526, + "loss": 2.6422, + "step": 4195 + }, + { + "epoch": 0.3386328787022839, + "grad_norm": 0.7339407801628113, + "learning_rate": 0.0001798117949757575, + "loss": 2.7047, + "step": 4196 + }, + { + "epoch": 0.3387135824388669, + "grad_norm": 0.7655882239341736, + "learning_rate": 0.00017980228235584117, + "loss": 2.7644, + "step": 4197 + }, + { + "epoch": 0.33879428617544993, + "grad_norm": 0.7602109909057617, + "learning_rate": 0.00017979276774704342, + "loss": 2.697, + "step": 4198 + }, + { + "epoch": 0.3388749899120329, + "grad_norm": 0.7188911437988281, + "learning_rate": 0.00017978325114960126, + "loss": 2.7147, + "step": 4199 + }, + { + "epoch": 0.33895569364861594, + "grad_norm": 0.7672597765922546, + "learning_rate": 0.00017977373256375194, + "loss": 2.6558, + "step": 4200 + }, + { + "epoch": 0.3390363973851989, + "grad_norm": 0.784187912940979, + "learning_rate": 0.0001797642119897327, + "loss": 2.7005, + "step": 4201 + }, + { + "epoch": 0.33911710112178195, + "grad_norm": 0.7359703779220581, + "learning_rate": 0.00017975468942778075, + "loss": 2.6578, + "step": 4202 + }, + { + "epoch": 0.3391978048583649, + "grad_norm": 0.7776080965995789, + "learning_rate": 0.00017974516487813345, + "loss": 2.6747, + "step": 4203 + }, + { + "epoch": 0.33927850859494796, + "grad_norm": 0.6934135556221008, + "learning_rate": 0.00017973563834102824, + "loss": 2.6335, + "step": 4204 + }, + { + "epoch": 0.33935921233153094, + "grad_norm": 0.7715818881988525, + "learning_rate": 0.00017972610981670245, + "loss": 2.6062, + "step": 4205 + }, + { + "epoch": 0.33943991606811397, + "grad_norm": 0.7466367483139038, + "learning_rate": 0.0001797165793053936, + "loss": 2.7243, + "step": 4206 + }, + { + "epoch": 0.33952061980469694, + "grad_norm": 0.7485085129737854, + "learning_rate": 0.00017970704680733926, + "loss": 2.6603, + "step": 4207 + }, + { + "epoch": 0.33960132354128, + "grad_norm": 0.7365782856941223, + "learning_rate": 0.0001796975123227769, + "loss": 2.7179, + "step": 4208 + }, + { + "epoch": 0.33968202727786295, + "grad_norm": 0.8405506014823914, + "learning_rate": 0.00017968797585194422, + "loss": 2.7413, + "step": 4209 + }, + { + "epoch": 0.339762731014446, + "grad_norm": 0.8227888941764832, + "learning_rate": 0.00017967843739507888, + "loss": 2.6814, + "step": 4210 + }, + { + "epoch": 0.33984343475102896, + "grad_norm": 0.8247283697128296, + "learning_rate": 0.0001796688969524186, + "loss": 2.6802, + "step": 4211 + }, + { + "epoch": 0.339924138487612, + "grad_norm": 0.7639476656913757, + "learning_rate": 0.00017965935452420116, + "loss": 2.7422, + "step": 4212 + }, + { + "epoch": 0.34000484222419497, + "grad_norm": 0.7846776247024536, + "learning_rate": 0.00017964981011066436, + "loss": 2.7443, + "step": 4213 + }, + { + "epoch": 0.340085545960778, + "grad_norm": 0.7593334913253784, + "learning_rate": 0.00017964026371204608, + "loss": 2.7179, + "step": 4214 + }, + { + "epoch": 0.340166249697361, + "grad_norm": 0.7878177165985107, + "learning_rate": 0.00017963071532858425, + "loss": 2.7118, + "step": 4215 + }, + { + "epoch": 0.340246953433944, + "grad_norm": 0.7728220224380493, + "learning_rate": 0.00017962116496051685, + "loss": 2.6646, + "step": 4216 + }, + { + "epoch": 0.340327657170527, + "grad_norm": 0.8419308066368103, + "learning_rate": 0.00017961161260808187, + "loss": 2.7829, + "step": 4217 + }, + { + "epoch": 0.34040836090711, + "grad_norm": 0.7066153883934021, + "learning_rate": 0.0001796020582715174, + "loss": 2.6498, + "step": 4218 + }, + { + "epoch": 0.340489064643693, + "grad_norm": 0.7976264953613281, + "learning_rate": 0.00017959250195106156, + "loss": 2.7496, + "step": 4219 + }, + { + "epoch": 0.34056976838027603, + "grad_norm": 0.736595630645752, + "learning_rate": 0.0001795829436469525, + "loss": 2.6497, + "step": 4220 + }, + { + "epoch": 0.340650472116859, + "grad_norm": 0.818550705909729, + "learning_rate": 0.0001795733833594285, + "loss": 2.6793, + "step": 4221 + }, + { + "epoch": 0.34073117585344204, + "grad_norm": 0.7712778449058533, + "learning_rate": 0.00017956382108872773, + "loss": 2.6215, + "step": 4222 + }, + { + "epoch": 0.340811879590025, + "grad_norm": 0.746306300163269, + "learning_rate": 0.00017955425683508858, + "loss": 2.7372, + "step": 4223 + }, + { + "epoch": 0.34089258332660805, + "grad_norm": 0.7269306778907776, + "learning_rate": 0.00017954469059874937, + "loss": 2.6438, + "step": 4224 + }, + { + "epoch": 0.340973287063191, + "grad_norm": 0.7426211833953857, + "learning_rate": 0.00017953512237994855, + "loss": 2.6539, + "step": 4225 + }, + { + "epoch": 0.34105399079977405, + "grad_norm": 0.7269948124885559, + "learning_rate": 0.0001795255521789246, + "loss": 2.6833, + "step": 4226 + }, + { + "epoch": 0.34113469453635703, + "grad_norm": 0.7279343605041504, + "learning_rate": 0.00017951597999591598, + "loss": 2.7011, + "step": 4227 + }, + { + "epoch": 0.34121539827294006, + "grad_norm": 0.7554663419723511, + "learning_rate": 0.0001795064058311613, + "loss": 2.7036, + "step": 4228 + }, + { + "epoch": 0.34129610200952304, + "grad_norm": 0.7516502141952515, + "learning_rate": 0.00017949682968489912, + "loss": 2.6699, + "step": 4229 + }, + { + "epoch": 0.34137680574610607, + "grad_norm": 0.7931745052337646, + "learning_rate": 0.00017948725155736818, + "loss": 2.6655, + "step": 4230 + }, + { + "epoch": 0.34145750948268905, + "grad_norm": 0.6981344223022461, + "learning_rate": 0.0001794776714488071, + "loss": 2.6987, + "step": 4231 + }, + { + "epoch": 0.3415382132192721, + "grad_norm": 0.7513911724090576, + "learning_rate": 0.00017946808935945474, + "loss": 2.6985, + "step": 4232 + }, + { + "epoch": 0.34161891695585506, + "grad_norm": 0.7373185753822327, + "learning_rate": 0.00017945850528954983, + "loss": 2.7269, + "step": 4233 + }, + { + "epoch": 0.34169962069243803, + "grad_norm": 0.6990259289741516, + "learning_rate": 0.0001794489192393313, + "loss": 2.6763, + "step": 4234 + }, + { + "epoch": 0.34178032442902107, + "grad_norm": 0.7661817669868469, + "learning_rate": 0.00017943933120903797, + "loss": 2.7057, + "step": 4235 + }, + { + "epoch": 0.34186102816560404, + "grad_norm": 0.7570027112960815, + "learning_rate": 0.0001794297411989089, + "loss": 2.7358, + "step": 4236 + }, + { + "epoch": 0.3419417319021871, + "grad_norm": 0.7751824855804443, + "learning_rate": 0.000179420149209183, + "loss": 2.6771, + "step": 4237 + }, + { + "epoch": 0.34202243563877005, + "grad_norm": 0.8028360605239868, + "learning_rate": 0.0001794105552400994, + "loss": 2.6399, + "step": 4238 + }, + { + "epoch": 0.3421031393753531, + "grad_norm": 0.7398171424865723, + "learning_rate": 0.00017940095929189716, + "loss": 2.6532, + "step": 4239 + }, + { + "epoch": 0.34218384311193606, + "grad_norm": 0.8300225138664246, + "learning_rate": 0.0001793913613648155, + "loss": 2.6798, + "step": 4240 + }, + { + "epoch": 0.3422645468485191, + "grad_norm": 0.7501145005226135, + "learning_rate": 0.00017938176145909356, + "loss": 2.7132, + "step": 4241 + }, + { + "epoch": 0.34234525058510207, + "grad_norm": 0.7178483605384827, + "learning_rate": 0.00017937215957497063, + "loss": 2.7172, + "step": 4242 + }, + { + "epoch": 0.3424259543216851, + "grad_norm": 0.7207306027412415, + "learning_rate": 0.00017936255571268599, + "loss": 2.629, + "step": 4243 + }, + { + "epoch": 0.3425066580582681, + "grad_norm": 0.7339839935302734, + "learning_rate": 0.00017935294987247899, + "loss": 2.6262, + "step": 4244 + }, + { + "epoch": 0.3425873617948511, + "grad_norm": 0.6977292895317078, + "learning_rate": 0.00017934334205458907, + "loss": 2.6949, + "step": 4245 + }, + { + "epoch": 0.3426680655314341, + "grad_norm": 0.7368096113204956, + "learning_rate": 0.00017933373225925564, + "loss": 2.681, + "step": 4246 + }, + { + "epoch": 0.3427487692680171, + "grad_norm": 0.7234459519386292, + "learning_rate": 0.00017932412048671825, + "loss": 2.6891, + "step": 4247 + }, + { + "epoch": 0.3428294730046001, + "grad_norm": 0.7659995555877686, + "learning_rate": 0.00017931450673721642, + "loss": 2.7394, + "step": 4248 + }, + { + "epoch": 0.3429101767411831, + "grad_norm": 0.7799893617630005, + "learning_rate": 0.00017930489101098974, + "loss": 2.7707, + "step": 4249 + }, + { + "epoch": 0.3429908804777661, + "grad_norm": 0.7063946723937988, + "learning_rate": 0.00017929527330827786, + "loss": 2.6573, + "step": 4250 + }, + { + "epoch": 0.34307158421434913, + "grad_norm": 0.7090561389923096, + "learning_rate": 0.0001792856536293205, + "loss": 2.7095, + "step": 4251 + }, + { + "epoch": 0.3431522879509321, + "grad_norm": 0.8020029067993164, + "learning_rate": 0.0001792760319743574, + "loss": 2.6905, + "step": 4252 + }, + { + "epoch": 0.34323299168751514, + "grad_norm": 0.7221484780311584, + "learning_rate": 0.00017926640834362836, + "loss": 2.6853, + "step": 4253 + }, + { + "epoch": 0.3433136954240981, + "grad_norm": 0.7102623581886292, + "learning_rate": 0.00017925678273737324, + "loss": 2.6821, + "step": 4254 + }, + { + "epoch": 0.34339439916068115, + "grad_norm": 0.7702807784080505, + "learning_rate": 0.00017924715515583187, + "loss": 2.6986, + "step": 4255 + }, + { + "epoch": 0.34347510289726413, + "grad_norm": 0.7938152551651001, + "learning_rate": 0.00017923752559924425, + "loss": 2.7162, + "step": 4256 + }, + { + "epoch": 0.34355580663384716, + "grad_norm": 0.7340937852859497, + "learning_rate": 0.00017922789406785036, + "loss": 2.6904, + "step": 4257 + }, + { + "epoch": 0.34363651037043014, + "grad_norm": 0.7010839581489563, + "learning_rate": 0.00017921826056189026, + "loss": 2.6969, + "step": 4258 + }, + { + "epoch": 0.34371721410701317, + "grad_norm": 0.758178174495697, + "learning_rate": 0.00017920862508160403, + "loss": 2.6391, + "step": 4259 + }, + { + "epoch": 0.34379791784359615, + "grad_norm": 0.7861726880073547, + "learning_rate": 0.0001791989876272318, + "loss": 2.7088, + "step": 4260 + }, + { + "epoch": 0.3438786215801792, + "grad_norm": 0.6764364242553711, + "learning_rate": 0.00017918934819901377, + "loss": 2.6221, + "step": 4261 + }, + { + "epoch": 0.34395932531676215, + "grad_norm": 0.76728355884552, + "learning_rate": 0.00017917970679719018, + "loss": 2.6854, + "step": 4262 + }, + { + "epoch": 0.3440400290533452, + "grad_norm": 0.7161166071891785, + "learning_rate": 0.00017917006342200133, + "loss": 2.7048, + "step": 4263 + }, + { + "epoch": 0.34412073278992816, + "grad_norm": 0.7182073593139648, + "learning_rate": 0.00017916041807368753, + "loss": 2.7559, + "step": 4264 + }, + { + "epoch": 0.3442014365265112, + "grad_norm": 0.832258403301239, + "learning_rate": 0.0001791507707524892, + "loss": 2.6743, + "step": 4265 + }, + { + "epoch": 0.34428214026309417, + "grad_norm": 0.7048495411872864, + "learning_rate": 0.00017914112145864675, + "loss": 2.693, + "step": 4266 + }, + { + "epoch": 0.3443628439996772, + "grad_norm": 0.7475518584251404, + "learning_rate": 0.00017913147019240068, + "loss": 2.6881, + "step": 4267 + }, + { + "epoch": 0.3444435477362602, + "grad_norm": 0.72830730676651, + "learning_rate": 0.00017912181695399154, + "loss": 2.659, + "step": 4268 + }, + { + "epoch": 0.3445242514728432, + "grad_norm": 0.7183662056922913, + "learning_rate": 0.00017911216174365988, + "loss": 2.6611, + "step": 4269 + }, + { + "epoch": 0.3446049552094262, + "grad_norm": 0.7487103343009949, + "learning_rate": 0.0001791025045616463, + "loss": 2.6518, + "step": 4270 + }, + { + "epoch": 0.3446856589460092, + "grad_norm": 0.7733812928199768, + "learning_rate": 0.0001790928454081916, + "loss": 2.6359, + "step": 4271 + }, + { + "epoch": 0.3447663626825922, + "grad_norm": 0.7774991393089294, + "learning_rate": 0.00017908318428353642, + "loss": 2.6654, + "step": 4272 + }, + { + "epoch": 0.34484706641917523, + "grad_norm": 0.6882895827293396, + "learning_rate": 0.00017907352118792157, + "loss": 2.686, + "step": 4273 + }, + { + "epoch": 0.3449277701557582, + "grad_norm": 0.7571535110473633, + "learning_rate": 0.00017906385612158785, + "loss": 2.7108, + "step": 4274 + }, + { + "epoch": 0.34500847389234124, + "grad_norm": 0.7324517369270325, + "learning_rate": 0.00017905418908477615, + "loss": 2.6663, + "step": 4275 + }, + { + "epoch": 0.3450891776289242, + "grad_norm": 0.7476221919059753, + "learning_rate": 0.00017904452007772744, + "loss": 2.7202, + "step": 4276 + }, + { + "epoch": 0.34516988136550725, + "grad_norm": 0.7648386359214783, + "learning_rate": 0.00017903484910068268, + "loss": 2.6759, + "step": 4277 + }, + { + "epoch": 0.3452505851020902, + "grad_norm": 0.7375434637069702, + "learning_rate": 0.00017902517615388282, + "loss": 2.6603, + "step": 4278 + }, + { + "epoch": 0.34533128883867326, + "grad_norm": 0.7248519062995911, + "learning_rate": 0.00017901550123756906, + "loss": 2.7147, + "step": 4279 + }, + { + "epoch": 0.34541199257525623, + "grad_norm": 0.7264916896820068, + "learning_rate": 0.0001790058243519824, + "loss": 2.6992, + "step": 4280 + }, + { + "epoch": 0.34549269631183926, + "grad_norm": 0.8370026350021362, + "learning_rate": 0.0001789961454973641, + "loss": 2.7114, + "step": 4281 + }, + { + "epoch": 0.34557340004842224, + "grad_norm": 0.72071373462677, + "learning_rate": 0.00017898646467395538, + "loss": 2.6957, + "step": 4282 + }, + { + "epoch": 0.3456541037850053, + "grad_norm": 0.7355397343635559, + "learning_rate": 0.0001789767818819975, + "loss": 2.6744, + "step": 4283 + }, + { + "epoch": 0.34573480752158825, + "grad_norm": 0.734756588935852, + "learning_rate": 0.00017896709712173173, + "loss": 2.726, + "step": 4284 + }, + { + "epoch": 0.3458155112581712, + "grad_norm": 0.7890543341636658, + "learning_rate": 0.00017895741039339945, + "loss": 2.6726, + "step": 4285 + }, + { + "epoch": 0.34589621499475426, + "grad_norm": 0.7768735885620117, + "learning_rate": 0.00017894772169724216, + "loss": 2.7617, + "step": 4286 + }, + { + "epoch": 0.34597691873133724, + "grad_norm": 0.7306547164916992, + "learning_rate": 0.00017893803103350125, + "loss": 2.6253, + "step": 4287 + }, + { + "epoch": 0.34605762246792027, + "grad_norm": 0.767066478729248, + "learning_rate": 0.00017892833840241828, + "loss": 2.6522, + "step": 4288 + }, + { + "epoch": 0.34613832620450324, + "grad_norm": 0.7018097639083862, + "learning_rate": 0.00017891864380423477, + "loss": 2.7111, + "step": 4289 + }, + { + "epoch": 0.3462190299410863, + "grad_norm": 0.7305615544319153, + "learning_rate": 0.00017890894723919236, + "loss": 2.6924, + "step": 4290 + }, + { + "epoch": 0.34629973367766925, + "grad_norm": 0.7588002681732178, + "learning_rate": 0.00017889924870753275, + "loss": 2.6952, + "step": 4291 + }, + { + "epoch": 0.3463804374142523, + "grad_norm": 0.7162861824035645, + "learning_rate": 0.0001788895482094976, + "loss": 2.6239, + "step": 4292 + }, + { + "epoch": 0.34646114115083526, + "grad_norm": 0.7494024634361267, + "learning_rate": 0.00017887984574532868, + "loss": 2.6763, + "step": 4293 + }, + { + "epoch": 0.3465418448874183, + "grad_norm": 0.7100037336349487, + "learning_rate": 0.0001788701413152678, + "loss": 2.6378, + "step": 4294 + }, + { + "epoch": 0.34662254862400127, + "grad_norm": 0.7316900491714478, + "learning_rate": 0.00017886043491955684, + "loss": 2.7001, + "step": 4295 + }, + { + "epoch": 0.3467032523605843, + "grad_norm": 0.8467028737068176, + "learning_rate": 0.00017885072655843772, + "loss": 2.7536, + "step": 4296 + }, + { + "epoch": 0.3467839560971673, + "grad_norm": 0.7248796820640564, + "learning_rate": 0.00017884101623215237, + "loss": 2.6956, + "step": 4297 + }, + { + "epoch": 0.3468646598337503, + "grad_norm": 0.7183107137680054, + "learning_rate": 0.0001788313039409428, + "loss": 2.743, + "step": 4298 + }, + { + "epoch": 0.3469453635703333, + "grad_norm": 0.6835163831710815, + "learning_rate": 0.00017882158968505105, + "loss": 2.7016, + "step": 4299 + }, + { + "epoch": 0.3470260673069163, + "grad_norm": 0.7973365783691406, + "learning_rate": 0.00017881187346471925, + "loss": 2.6927, + "step": 4300 + }, + { + "epoch": 0.3471067710434993, + "grad_norm": 0.700040876865387, + "learning_rate": 0.00017880215528018954, + "loss": 2.6961, + "step": 4301 + }, + { + "epoch": 0.34718747478008233, + "grad_norm": 0.8180583119392395, + "learning_rate": 0.00017879243513170415, + "loss": 2.642, + "step": 4302 + }, + { + "epoch": 0.3472681785166653, + "grad_norm": 0.7134599685668945, + "learning_rate": 0.0001787827130195053, + "loss": 2.6901, + "step": 4303 + }, + { + "epoch": 0.34734888225324834, + "grad_norm": 0.767998218536377, + "learning_rate": 0.0001787729889438353, + "loss": 2.6472, + "step": 4304 + }, + { + "epoch": 0.3474295859898313, + "grad_norm": 0.7260780930519104, + "learning_rate": 0.0001787632629049365, + "loss": 2.6791, + "step": 4305 + }, + { + "epoch": 0.34751028972641435, + "grad_norm": 0.6918236613273621, + "learning_rate": 0.00017875353490305132, + "loss": 2.6596, + "step": 4306 + }, + { + "epoch": 0.3475909934629973, + "grad_norm": 0.7734197974205017, + "learning_rate": 0.00017874380493842216, + "loss": 2.6402, + "step": 4307 + }, + { + "epoch": 0.34767169719958035, + "grad_norm": 0.7051037549972534, + "learning_rate": 0.00017873407301129154, + "loss": 2.7517, + "step": 4308 + }, + { + "epoch": 0.34775240093616333, + "grad_norm": 0.7026919722557068, + "learning_rate": 0.00017872433912190203, + "loss": 2.7058, + "step": 4309 + }, + { + "epoch": 0.34783310467274636, + "grad_norm": 0.7248546481132507, + "learning_rate": 0.00017871460327049618, + "loss": 2.666, + "step": 4310 + }, + { + "epoch": 0.34791380840932934, + "grad_norm": 0.7348842620849609, + "learning_rate": 0.0001787048654573167, + "loss": 2.7712, + "step": 4311 + }, + { + "epoch": 0.34799451214591237, + "grad_norm": 0.7923693656921387, + "learning_rate": 0.00017869512568260618, + "loss": 2.6469, + "step": 4312 + }, + { + "epoch": 0.34807521588249535, + "grad_norm": 0.7604066729545593, + "learning_rate": 0.00017868538394660743, + "loss": 2.7152, + "step": 4313 + }, + { + "epoch": 0.3481559196190784, + "grad_norm": 0.6811137795448303, + "learning_rate": 0.00017867564024956324, + "loss": 2.715, + "step": 4314 + }, + { + "epoch": 0.34823662335566136, + "grad_norm": 0.7292799353599548, + "learning_rate": 0.00017866589459171643, + "loss": 2.6374, + "step": 4315 + }, + { + "epoch": 0.3483173270922444, + "grad_norm": 0.6961250901222229, + "learning_rate": 0.0001786561469733099, + "loss": 2.6592, + "step": 4316 + }, + { + "epoch": 0.34839803082882737, + "grad_norm": 0.7447086572647095, + "learning_rate": 0.00017864639739458658, + "loss": 2.6965, + "step": 4317 + }, + { + "epoch": 0.3484787345654104, + "grad_norm": 0.7107378244400024, + "learning_rate": 0.00017863664585578942, + "loss": 2.7057, + "step": 4318 + }, + { + "epoch": 0.3485594383019934, + "grad_norm": 0.7372235655784607, + "learning_rate": 0.00017862689235716153, + "loss": 2.6289, + "step": 4319 + }, + { + "epoch": 0.3486401420385764, + "grad_norm": 0.7360481023788452, + "learning_rate": 0.00017861713689894593, + "loss": 2.7208, + "step": 4320 + }, + { + "epoch": 0.3487208457751594, + "grad_norm": 0.7378106713294983, + "learning_rate": 0.00017860737948138575, + "loss": 2.6836, + "step": 4321 + }, + { + "epoch": 0.3488015495117424, + "grad_norm": 0.7110548615455627, + "learning_rate": 0.00017859762010472423, + "loss": 2.6941, + "step": 4322 + }, + { + "epoch": 0.3488822532483254, + "grad_norm": 0.7419706583023071, + "learning_rate": 0.00017858785876920455, + "loss": 2.6591, + "step": 4323 + }, + { + "epoch": 0.3489629569849084, + "grad_norm": 0.7759542465209961, + "learning_rate": 0.00017857809547506997, + "loss": 2.6966, + "step": 4324 + }, + { + "epoch": 0.3490436607214914, + "grad_norm": 0.7894207239151001, + "learning_rate": 0.0001785683302225639, + "loss": 2.7298, + "step": 4325 + }, + { + "epoch": 0.34912436445807443, + "grad_norm": 0.7342399954795837, + "learning_rate": 0.0001785585630119296, + "loss": 2.6998, + "step": 4326 + }, + { + "epoch": 0.3492050681946574, + "grad_norm": 0.8684173822402954, + "learning_rate": 0.0001785487938434106, + "loss": 2.7179, + "step": 4327 + }, + { + "epoch": 0.34928577193124044, + "grad_norm": 0.7557523846626282, + "learning_rate": 0.00017853902271725033, + "loss": 2.7081, + "step": 4328 + }, + { + "epoch": 0.3493664756678234, + "grad_norm": 0.7910173535346985, + "learning_rate": 0.0001785292496336923, + "loss": 2.718, + "step": 4329 + }, + { + "epoch": 0.34944717940440645, + "grad_norm": 0.7878917455673218, + "learning_rate": 0.00017851947459298007, + "loss": 2.674, + "step": 4330 + }, + { + "epoch": 0.3495278831409894, + "grad_norm": 0.7290656566619873, + "learning_rate": 0.0001785096975953573, + "loss": 2.6962, + "step": 4331 + }, + { + "epoch": 0.34960858687757246, + "grad_norm": 0.8465737104415894, + "learning_rate": 0.00017849991864106763, + "loss": 2.6793, + "step": 4332 + }, + { + "epoch": 0.34968929061415543, + "grad_norm": 0.7183132171630859, + "learning_rate": 0.0001784901377303548, + "loss": 2.6902, + "step": 4333 + }, + { + "epoch": 0.34976999435073847, + "grad_norm": 0.7535461783409119, + "learning_rate": 0.00017848035486346255, + "loss": 2.7153, + "step": 4334 + }, + { + "epoch": 0.34985069808732144, + "grad_norm": 0.778734028339386, + "learning_rate": 0.0001784705700406347, + "loss": 2.6316, + "step": 4335 + }, + { + "epoch": 0.3499314018239044, + "grad_norm": 0.6937401294708252, + "learning_rate": 0.00017846078326211516, + "loss": 2.6902, + "step": 4336 + }, + { + "epoch": 0.35001210556048745, + "grad_norm": 0.7450751066207886, + "learning_rate": 0.00017845099452814774, + "loss": 2.6898, + "step": 4337 + }, + { + "epoch": 0.35009280929707043, + "grad_norm": 0.7535614967346191, + "learning_rate": 0.0001784412038389765, + "loss": 2.6969, + "step": 4338 + }, + { + "epoch": 0.35017351303365346, + "grad_norm": 0.6971385478973389, + "learning_rate": 0.00017843141119484543, + "loss": 2.6517, + "step": 4339 + }, + { + "epoch": 0.35025421677023644, + "grad_norm": 0.7233202457427979, + "learning_rate": 0.00017842161659599858, + "loss": 2.7332, + "step": 4340 + }, + { + "epoch": 0.35033492050681947, + "grad_norm": 0.7870340347290039, + "learning_rate": 0.00017841182004268, + "loss": 2.6485, + "step": 4341 + }, + { + "epoch": 0.35041562424340245, + "grad_norm": 0.7387053966522217, + "learning_rate": 0.0001784020215351339, + "loss": 2.6945, + "step": 4342 + }, + { + "epoch": 0.3504963279799855, + "grad_norm": 0.8357887268066406, + "learning_rate": 0.00017839222107360453, + "loss": 2.703, + "step": 4343 + }, + { + "epoch": 0.35057703171656845, + "grad_norm": 0.7197332978248596, + "learning_rate": 0.000178382418658336, + "loss": 2.6649, + "step": 4344 + }, + { + "epoch": 0.3506577354531515, + "grad_norm": 0.7416980862617493, + "learning_rate": 0.0001783726142895728, + "loss": 2.7393, + "step": 4345 + }, + { + "epoch": 0.35073843918973446, + "grad_norm": 0.6807832717895508, + "learning_rate": 0.00017836280796755912, + "loss": 2.6619, + "step": 4346 + }, + { + "epoch": 0.3508191429263175, + "grad_norm": 0.6858795285224915, + "learning_rate": 0.00017835299969253945, + "loss": 2.6266, + "step": 4347 + }, + { + "epoch": 0.35089984666290047, + "grad_norm": 0.8432363867759705, + "learning_rate": 0.0001783431894647582, + "loss": 2.6534, + "step": 4348 + }, + { + "epoch": 0.3509805503994835, + "grad_norm": 0.7240749001502991, + "learning_rate": 0.0001783333772844599, + "loss": 2.6851, + "step": 4349 + }, + { + "epoch": 0.3510612541360665, + "grad_norm": 0.7814531326293945, + "learning_rate": 0.00017832356315188906, + "loss": 2.7085, + "step": 4350 + }, + { + "epoch": 0.3511419578726495, + "grad_norm": 0.6989716291427612, + "learning_rate": 0.00017831374706729026, + "loss": 2.6674, + "step": 4351 + }, + { + "epoch": 0.3512226616092325, + "grad_norm": 0.7118446230888367, + "learning_rate": 0.0001783039290309082, + "loss": 2.6837, + "step": 4352 + }, + { + "epoch": 0.3513033653458155, + "grad_norm": 0.7641892433166504, + "learning_rate": 0.00017829410904298754, + "loss": 2.6415, + "step": 4353 + }, + { + "epoch": 0.3513840690823985, + "grad_norm": 0.6975794434547424, + "learning_rate": 0.000178284287103773, + "loss": 2.6679, + "step": 4354 + }, + { + "epoch": 0.35146477281898153, + "grad_norm": 0.7192546725273132, + "learning_rate": 0.00017827446321350943, + "loss": 2.6539, + "step": 4355 + }, + { + "epoch": 0.3515454765555645, + "grad_norm": 0.8749549388885498, + "learning_rate": 0.00017826463737244155, + "loss": 2.7254, + "step": 4356 + }, + { + "epoch": 0.35162618029214754, + "grad_norm": 0.8509732484817505, + "learning_rate": 0.0001782548095808144, + "loss": 2.7679, + "step": 4357 + }, + { + "epoch": 0.3517068840287305, + "grad_norm": 0.7647901773452759, + "learning_rate": 0.00017824497983887278, + "loss": 2.7049, + "step": 4358 + }, + { + "epoch": 0.35178758776531355, + "grad_norm": 0.7551973462104797, + "learning_rate": 0.00017823514814686178, + "loss": 2.7086, + "step": 4359 + }, + { + "epoch": 0.3518682915018965, + "grad_norm": 0.730140209197998, + "learning_rate": 0.00017822531450502633, + "loss": 2.6334, + "step": 4360 + }, + { + "epoch": 0.35194899523847956, + "grad_norm": 0.8210160136222839, + "learning_rate": 0.00017821547891361158, + "loss": 2.7248, + "step": 4361 + }, + { + "epoch": 0.35202969897506253, + "grad_norm": 0.761972963809967, + "learning_rate": 0.00017820564137286264, + "loss": 2.6502, + "step": 4362 + }, + { + "epoch": 0.35211040271164556, + "grad_norm": 0.7564061284065247, + "learning_rate": 0.00017819580188302466, + "loss": 2.6795, + "step": 4363 + }, + { + "epoch": 0.35219110644822854, + "grad_norm": 0.7382947206497192, + "learning_rate": 0.00017818596044434293, + "loss": 2.6754, + "step": 4364 + }, + { + "epoch": 0.3522718101848116, + "grad_norm": 0.737194836139679, + "learning_rate": 0.00017817611705706266, + "loss": 2.7098, + "step": 4365 + }, + { + "epoch": 0.35235251392139455, + "grad_norm": 0.7183281779289246, + "learning_rate": 0.0001781662717214292, + "loss": 2.6528, + "step": 4366 + }, + { + "epoch": 0.3524332176579776, + "grad_norm": 0.7785990238189697, + "learning_rate": 0.00017815642443768794, + "loss": 2.6419, + "step": 4367 + }, + { + "epoch": 0.35251392139456056, + "grad_norm": 0.7114452719688416, + "learning_rate": 0.00017814657520608427, + "loss": 2.7088, + "step": 4368 + }, + { + "epoch": 0.3525946251311436, + "grad_norm": 0.746969997882843, + "learning_rate": 0.00017813672402686365, + "loss": 2.7199, + "step": 4369 + }, + { + "epoch": 0.35267532886772657, + "grad_norm": 0.7700605988502502, + "learning_rate": 0.00017812687090027165, + "loss": 2.6713, + "step": 4370 + }, + { + "epoch": 0.3527560326043096, + "grad_norm": 0.7733504772186279, + "learning_rate": 0.0001781170158265538, + "loss": 2.6916, + "step": 4371 + }, + { + "epoch": 0.3528367363408926, + "grad_norm": 0.7769689559936523, + "learning_rate": 0.00017810715880595566, + "loss": 2.7787, + "step": 4372 + }, + { + "epoch": 0.3529174400774756, + "grad_norm": 0.7538996934890747, + "learning_rate": 0.000178097299838723, + "loss": 2.6964, + "step": 4373 + }, + { + "epoch": 0.3529981438140586, + "grad_norm": 0.7777890563011169, + "learning_rate": 0.00017808743892510146, + "loss": 2.6882, + "step": 4374 + }, + { + "epoch": 0.3530788475506416, + "grad_norm": 0.8331751823425293, + "learning_rate": 0.00017807757606533683, + "loss": 2.7113, + "step": 4375 + }, + { + "epoch": 0.3531595512872246, + "grad_norm": 0.8039207458496094, + "learning_rate": 0.00017806771125967492, + "loss": 2.6694, + "step": 4376 + }, + { + "epoch": 0.3532402550238076, + "grad_norm": 0.7727575898170471, + "learning_rate": 0.00017805784450836154, + "loss": 2.6639, + "step": 4377 + }, + { + "epoch": 0.3533209587603906, + "grad_norm": 0.8247967958450317, + "learning_rate": 0.00017804797581164264, + "loss": 2.6539, + "step": 4378 + }, + { + "epoch": 0.35340166249697363, + "grad_norm": 0.7574009299278259, + "learning_rate": 0.0001780381051697642, + "loss": 2.7163, + "step": 4379 + }, + { + "epoch": 0.3534823662335566, + "grad_norm": 0.7304368615150452, + "learning_rate": 0.0001780282325829721, + "loss": 2.5759, + "step": 4380 + }, + { + "epoch": 0.35356306997013964, + "grad_norm": 0.7133963704109192, + "learning_rate": 0.00017801835805151257, + "loss": 2.7008, + "step": 4381 + }, + { + "epoch": 0.3536437737067226, + "grad_norm": 0.7525407075881958, + "learning_rate": 0.00017800848157563157, + "loss": 2.6785, + "step": 4382 + }, + { + "epoch": 0.35372447744330565, + "grad_norm": 0.7306779623031616, + "learning_rate": 0.00017799860315557528, + "loss": 2.6454, + "step": 4383 + }, + { + "epoch": 0.35380518117988863, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.00017798872279158994, + "loss": 2.708, + "step": 4384 + }, + { + "epoch": 0.35388588491647166, + "grad_norm": 0.7655978202819824, + "learning_rate": 0.00017797884048392177, + "loss": 2.727, + "step": 4385 + }, + { + "epoch": 0.35396658865305464, + "grad_norm": 0.6802939176559448, + "learning_rate": 0.00017796895623281702, + "loss": 2.659, + "step": 4386 + }, + { + "epoch": 0.3540472923896376, + "grad_norm": 0.7191160917282104, + "learning_rate": 0.00017795907003852207, + "loss": 2.6335, + "step": 4387 + }, + { + "epoch": 0.35412799612622065, + "grad_norm": 0.7771886587142944, + "learning_rate": 0.00017794918190128337, + "loss": 2.6658, + "step": 4388 + }, + { + "epoch": 0.3542086998628036, + "grad_norm": 0.7133512496948242, + "learning_rate": 0.00017793929182134723, + "loss": 2.6701, + "step": 4389 + }, + { + "epoch": 0.35428940359938665, + "grad_norm": 0.7795221209526062, + "learning_rate": 0.00017792939979896022, + "loss": 2.6932, + "step": 4390 + }, + { + "epoch": 0.35437010733596963, + "grad_norm": 0.726767897605896, + "learning_rate": 0.00017791950583436887, + "loss": 2.676, + "step": 4391 + }, + { + "epoch": 0.35445081107255266, + "grad_norm": 0.7447288632392883, + "learning_rate": 0.00017790960992781972, + "loss": 2.7195, + "step": 4392 + }, + { + "epoch": 0.35453151480913564, + "grad_norm": 0.8053649663925171, + "learning_rate": 0.0001778997120795595, + "loss": 2.6851, + "step": 4393 + }, + { + "epoch": 0.35461221854571867, + "grad_norm": 0.7258884906768799, + "learning_rate": 0.00017788981228983474, + "loss": 2.6819, + "step": 4394 + }, + { + "epoch": 0.35469292228230165, + "grad_norm": 0.7279395461082458, + "learning_rate": 0.0001778799105588923, + "loss": 2.6954, + "step": 4395 + }, + { + "epoch": 0.3547736260188847, + "grad_norm": 0.7372962236404419, + "learning_rate": 0.0001778700068869789, + "loss": 2.7049, + "step": 4396 + }, + { + "epoch": 0.35485432975546766, + "grad_norm": 0.712003767490387, + "learning_rate": 0.00017786010127434135, + "loss": 2.7413, + "step": 4397 + }, + { + "epoch": 0.3549350334920507, + "grad_norm": 0.7487424612045288, + "learning_rate": 0.0001778501937212266, + "loss": 2.7231, + "step": 4398 + }, + { + "epoch": 0.35501573722863367, + "grad_norm": 0.73053377866745, + "learning_rate": 0.00017784028422788146, + "loss": 2.7029, + "step": 4399 + }, + { + "epoch": 0.3550964409652167, + "grad_norm": 0.697062611579895, + "learning_rate": 0.00017783037279455298, + "loss": 2.7139, + "step": 4400 + }, + { + "epoch": 0.3551771447017997, + "grad_norm": 0.7750880718231201, + "learning_rate": 0.00017782045942148819, + "loss": 2.6601, + "step": 4401 + }, + { + "epoch": 0.3552578484383827, + "grad_norm": 0.7124977111816406, + "learning_rate": 0.00017781054410893413, + "loss": 2.6119, + "step": 4402 + }, + { + "epoch": 0.3553385521749657, + "grad_norm": 0.7773111462593079, + "learning_rate": 0.00017780062685713785, + "loss": 2.7181, + "step": 4403 + }, + { + "epoch": 0.3554192559115487, + "grad_norm": 0.7282142639160156, + "learning_rate": 0.00017779070766634663, + "loss": 2.7141, + "step": 4404 + }, + { + "epoch": 0.3554999596481317, + "grad_norm": 0.8578598499298096, + "learning_rate": 0.0001777807865368076, + "loss": 2.7628, + "step": 4405 + }, + { + "epoch": 0.3555806633847147, + "grad_norm": 0.7126399874687195, + "learning_rate": 0.00017777086346876809, + "loss": 2.6914, + "step": 4406 + }, + { + "epoch": 0.3556613671212977, + "grad_norm": 0.8026365637779236, + "learning_rate": 0.00017776093846247533, + "loss": 2.7059, + "step": 4407 + }, + { + "epoch": 0.35574207085788073, + "grad_norm": 0.7839884161949158, + "learning_rate": 0.0001777510115181767, + "loss": 2.7265, + "step": 4408 + }, + { + "epoch": 0.3558227745944637, + "grad_norm": 0.7498767971992493, + "learning_rate": 0.00017774108263611966, + "loss": 2.7201, + "step": 4409 + }, + { + "epoch": 0.35590347833104674, + "grad_norm": 0.6996301412582397, + "learning_rate": 0.0001777311518165516, + "loss": 2.6271, + "step": 4410 + }, + { + "epoch": 0.3559841820676297, + "grad_norm": 0.7721461057662964, + "learning_rate": 0.00017772121905972003, + "loss": 2.6739, + "step": 4411 + }, + { + "epoch": 0.35606488580421275, + "grad_norm": 0.8018803000450134, + "learning_rate": 0.00017771128436587256, + "loss": 2.7092, + "step": 4412 + }, + { + "epoch": 0.3561455895407957, + "grad_norm": 0.7185639142990112, + "learning_rate": 0.0001777013477352567, + "loss": 2.6996, + "step": 4413 + }, + { + "epoch": 0.35622629327737876, + "grad_norm": 0.7218519449234009, + "learning_rate": 0.0001776914091681202, + "loss": 2.6555, + "step": 4414 + }, + { + "epoch": 0.35630699701396173, + "grad_norm": 0.7234479188919067, + "learning_rate": 0.00017768146866471062, + "loss": 2.6762, + "step": 4415 + }, + { + "epoch": 0.35638770075054477, + "grad_norm": 0.6723350286483765, + "learning_rate": 0.00017767152622527582, + "loss": 2.6272, + "step": 4416 + }, + { + "epoch": 0.35646840448712774, + "grad_norm": 0.7281947731971741, + "learning_rate": 0.00017766158185006356, + "loss": 2.7216, + "step": 4417 + }, + { + "epoch": 0.3565491082237108, + "grad_norm": 0.8350874781608582, + "learning_rate": 0.00017765163553932166, + "loss": 2.6619, + "step": 4418 + }, + { + "epoch": 0.35662981196029375, + "grad_norm": 0.7454007267951965, + "learning_rate": 0.00017764168729329801, + "loss": 2.6623, + "step": 4419 + }, + { + "epoch": 0.3567105156968768, + "grad_norm": 0.7419041395187378, + "learning_rate": 0.00017763173711224058, + "loss": 2.6773, + "step": 4420 + }, + { + "epoch": 0.35679121943345976, + "grad_norm": 0.7965987920761108, + "learning_rate": 0.0001776217849963973, + "loss": 2.6426, + "step": 4421 + }, + { + "epoch": 0.3568719231700428, + "grad_norm": 0.7093302607536316, + "learning_rate": 0.00017761183094601622, + "loss": 2.6745, + "step": 4422 + }, + { + "epoch": 0.35695262690662577, + "grad_norm": 0.7937216758728027, + "learning_rate": 0.00017760187496134548, + "loss": 2.7275, + "step": 4423 + }, + { + "epoch": 0.3570333306432088, + "grad_norm": 0.9185259938240051, + "learning_rate": 0.00017759191704263313, + "loss": 2.7055, + "step": 4424 + }, + { + "epoch": 0.3571140343797918, + "grad_norm": 0.7365124821662903, + "learning_rate": 0.00017758195719012743, + "loss": 2.6504, + "step": 4425 + }, + { + "epoch": 0.3571947381163748, + "grad_norm": 0.6992416977882385, + "learning_rate": 0.0001775719954040765, + "loss": 2.6684, + "step": 4426 + }, + { + "epoch": 0.3572754418529578, + "grad_norm": 0.7742372751235962, + "learning_rate": 0.00017756203168472866, + "loss": 2.6877, + "step": 4427 + }, + { + "epoch": 0.3573561455895408, + "grad_norm": 0.7448472380638123, + "learning_rate": 0.0001775520660323323, + "loss": 2.7027, + "step": 4428 + }, + { + "epoch": 0.3574368493261238, + "grad_norm": 0.7201915979385376, + "learning_rate": 0.00017754209844713569, + "loss": 2.7046, + "step": 4429 + }, + { + "epoch": 0.3575175530627068, + "grad_norm": 0.6675081253051758, + "learning_rate": 0.0001775321289293873, + "loss": 2.6503, + "step": 4430 + }, + { + "epoch": 0.3575982567992898, + "grad_norm": 0.7252706289291382, + "learning_rate": 0.0001775221574793356, + "loss": 2.6053, + "step": 4431 + }, + { + "epoch": 0.35767896053587284, + "grad_norm": 0.7134702801704407, + "learning_rate": 0.00017751218409722906, + "loss": 2.6857, + "step": 4432 + }, + { + "epoch": 0.3577596642724558, + "grad_norm": 0.7074102163314819, + "learning_rate": 0.0001775022087833163, + "loss": 2.6871, + "step": 4433 + }, + { + "epoch": 0.35784036800903885, + "grad_norm": 0.693520724773407, + "learning_rate": 0.00017749223153784588, + "loss": 2.6629, + "step": 4434 + }, + { + "epoch": 0.3579210717456218, + "grad_norm": 0.6933221817016602, + "learning_rate": 0.0001774822523610665, + "loss": 2.6793, + "step": 4435 + }, + { + "epoch": 0.35800177548220485, + "grad_norm": 0.75307297706604, + "learning_rate": 0.00017747227125322685, + "loss": 2.7012, + "step": 4436 + }, + { + "epoch": 0.35808247921878783, + "grad_norm": 0.7732915282249451, + "learning_rate": 0.0001774622882145757, + "loss": 2.6908, + "step": 4437 + }, + { + "epoch": 0.3581631829553708, + "grad_norm": 0.7067054510116577, + "learning_rate": 0.0001774523032453618, + "loss": 2.7494, + "step": 4438 + }, + { + "epoch": 0.35824388669195384, + "grad_norm": 0.7412838935852051, + "learning_rate": 0.00017744231634583406, + "loss": 2.6734, + "step": 4439 + }, + { + "epoch": 0.3583245904285368, + "grad_norm": 0.7663930654525757, + "learning_rate": 0.00017743232751624136, + "loss": 2.6952, + "step": 4440 + }, + { + "epoch": 0.35840529416511985, + "grad_norm": 0.70650714635849, + "learning_rate": 0.00017742233675683268, + "loss": 2.6806, + "step": 4441 + }, + { + "epoch": 0.3584859979017028, + "grad_norm": 0.698310375213623, + "learning_rate": 0.00017741234406785692, + "loss": 2.6471, + "step": 4442 + }, + { + "epoch": 0.35856670163828586, + "grad_norm": 0.7274026274681091, + "learning_rate": 0.00017740234944956323, + "loss": 2.6688, + "step": 4443 + }, + { + "epoch": 0.35864740537486883, + "grad_norm": 0.6944074034690857, + "learning_rate": 0.00017739235290220067, + "loss": 2.6954, + "step": 4444 + }, + { + "epoch": 0.35872810911145186, + "grad_norm": 0.841995358467102, + "learning_rate": 0.00017738235442601834, + "loss": 2.7169, + "step": 4445 + }, + { + "epoch": 0.35880881284803484, + "grad_norm": 0.74863201379776, + "learning_rate": 0.00017737235402126545, + "loss": 2.6534, + "step": 4446 + }, + { + "epoch": 0.3588895165846179, + "grad_norm": 0.7260422110557556, + "learning_rate": 0.00017736235168819126, + "loss": 2.6266, + "step": 4447 + }, + { + "epoch": 0.35897022032120085, + "grad_norm": 0.7450951337814331, + "learning_rate": 0.00017735234742704504, + "loss": 2.7328, + "step": 4448 + }, + { + "epoch": 0.3590509240577839, + "grad_norm": 0.6942493319511414, + "learning_rate": 0.00017734234123807614, + "loss": 2.7219, + "step": 4449 + }, + { + "epoch": 0.35913162779436686, + "grad_norm": 0.7676761746406555, + "learning_rate": 0.00017733233312153393, + "loss": 2.6594, + "step": 4450 + }, + { + "epoch": 0.3592123315309499, + "grad_norm": 0.7446104288101196, + "learning_rate": 0.00017732232307766778, + "loss": 2.6877, + "step": 4451 + }, + { + "epoch": 0.35929303526753287, + "grad_norm": 0.7551130056381226, + "learning_rate": 0.00017731231110672727, + "loss": 2.672, + "step": 4452 + }, + { + "epoch": 0.3593737390041159, + "grad_norm": 0.6876464486122131, + "learning_rate": 0.00017730229720896182, + "loss": 2.6658, + "step": 4453 + }, + { + "epoch": 0.3594544427406989, + "grad_norm": 0.6992844343185425, + "learning_rate": 0.00017729228138462107, + "loss": 2.6805, + "step": 4454 + }, + { + "epoch": 0.3595351464772819, + "grad_norm": 0.8437497615814209, + "learning_rate": 0.00017728226363395466, + "loss": 2.6884, + "step": 4455 + }, + { + "epoch": 0.3596158502138649, + "grad_norm": 0.7669322490692139, + "learning_rate": 0.00017727224395721217, + "loss": 2.6432, + "step": 4456 + }, + { + "epoch": 0.3596965539504479, + "grad_norm": 0.7613428831100464, + "learning_rate": 0.0001772622223546434, + "loss": 2.6124, + "step": 4457 + }, + { + "epoch": 0.3597772576870309, + "grad_norm": 0.719932496547699, + "learning_rate": 0.00017725219882649807, + "loss": 2.6623, + "step": 4458 + }, + { + "epoch": 0.3598579614236139, + "grad_norm": 0.7650800347328186, + "learning_rate": 0.000177242173373026, + "loss": 2.7551, + "step": 4459 + }, + { + "epoch": 0.3599386651601969, + "grad_norm": 0.7423754930496216, + "learning_rate": 0.0001772321459944771, + "loss": 2.7375, + "step": 4460 + }, + { + "epoch": 0.36001936889677993, + "grad_norm": 0.7602835297584534, + "learning_rate": 0.0001772221166911012, + "loss": 2.7086, + "step": 4461 + }, + { + "epoch": 0.3601000726333629, + "grad_norm": 0.7246943712234497, + "learning_rate": 0.00017721208546314827, + "loss": 2.7068, + "step": 4462 + }, + { + "epoch": 0.36018077636994594, + "grad_norm": 0.715965211391449, + "learning_rate": 0.00017720205231086837, + "loss": 2.689, + "step": 4463 + }, + { + "epoch": 0.3602614801065289, + "grad_norm": 0.7696218490600586, + "learning_rate": 0.00017719201723451151, + "loss": 2.611, + "step": 4464 + }, + { + "epoch": 0.36034218384311195, + "grad_norm": 0.7599236369132996, + "learning_rate": 0.00017718198023432779, + "loss": 2.6504, + "step": 4465 + }, + { + "epoch": 0.36042288757969493, + "grad_norm": 0.7674956321716309, + "learning_rate": 0.0001771719413105674, + "loss": 2.7559, + "step": 4466 + }, + { + "epoch": 0.36050359131627796, + "grad_norm": 0.7263289093971252, + "learning_rate": 0.00017716190046348045, + "loss": 2.6822, + "step": 4467 + }, + { + "epoch": 0.36058429505286094, + "grad_norm": 0.7564195990562439, + "learning_rate": 0.0001771518576933173, + "loss": 2.7319, + "step": 4468 + }, + { + "epoch": 0.36066499878944397, + "grad_norm": 0.7291253805160522, + "learning_rate": 0.00017714181300032813, + "loss": 2.704, + "step": 4469 + }, + { + "epoch": 0.36074570252602695, + "grad_norm": 0.7354169487953186, + "learning_rate": 0.00017713176638476332, + "loss": 2.6344, + "step": 4470 + }, + { + "epoch": 0.36082640626261, + "grad_norm": 0.7104110717773438, + "learning_rate": 0.0001771217178468733, + "loss": 2.665, + "step": 4471 + }, + { + "epoch": 0.36090710999919295, + "grad_norm": 0.6913934350013733, + "learning_rate": 0.00017711166738690847, + "loss": 2.6674, + "step": 4472 + }, + { + "epoch": 0.360987813735776, + "grad_norm": 0.7999634742736816, + "learning_rate": 0.0001771016150051193, + "loss": 2.6847, + "step": 4473 + }, + { + "epoch": 0.36106851747235896, + "grad_norm": 0.7878915667533875, + "learning_rate": 0.00017709156070175634, + "loss": 2.7125, + "step": 4474 + }, + { + "epoch": 0.361149221208942, + "grad_norm": 0.7145688533782959, + "learning_rate": 0.00017708150447707017, + "loss": 2.6863, + "step": 4475 + }, + { + "epoch": 0.36122992494552497, + "grad_norm": 0.7518604397773743, + "learning_rate": 0.00017707144633131143, + "loss": 2.6616, + "step": 4476 + }, + { + "epoch": 0.361310628682108, + "grad_norm": 0.735634982585907, + "learning_rate": 0.0001770613862647308, + "loss": 2.6315, + "step": 4477 + }, + { + "epoch": 0.361391332418691, + "grad_norm": 0.7925180196762085, + "learning_rate": 0.00017705132427757895, + "loss": 2.6951, + "step": 4478 + }, + { + "epoch": 0.361472036155274, + "grad_norm": 0.6949547529220581, + "learning_rate": 0.00017704126037010667, + "loss": 2.6934, + "step": 4479 + }, + { + "epoch": 0.361552739891857, + "grad_norm": 0.7233577966690063, + "learning_rate": 0.00017703119454256483, + "loss": 2.6773, + "step": 4480 + }, + { + "epoch": 0.36163344362844, + "grad_norm": 0.7303269505500793, + "learning_rate": 0.00017702112679520424, + "loss": 2.6351, + "step": 4481 + }, + { + "epoch": 0.361714147365023, + "grad_norm": 0.7620660066604614, + "learning_rate": 0.00017701105712827583, + "loss": 2.6748, + "step": 4482 + }, + { + "epoch": 0.36179485110160603, + "grad_norm": 0.7744965553283691, + "learning_rate": 0.00017700098554203057, + "loss": 2.7013, + "step": 4483 + }, + { + "epoch": 0.361875554838189, + "grad_norm": 0.8017357587814331, + "learning_rate": 0.00017699091203671947, + "loss": 2.7273, + "step": 4484 + }, + { + "epoch": 0.36195625857477204, + "grad_norm": 0.8014432191848755, + "learning_rate": 0.0001769808366125936, + "loss": 2.6864, + "step": 4485 + }, + { + "epoch": 0.362036962311355, + "grad_norm": 0.6914888620376587, + "learning_rate": 0.00017697075926990406, + "loss": 2.6851, + "step": 4486 + }, + { + "epoch": 0.36211766604793805, + "grad_norm": 0.7472698092460632, + "learning_rate": 0.00017696068000890196, + "loss": 2.695, + "step": 4487 + }, + { + "epoch": 0.362198369784521, + "grad_norm": 0.7506285309791565, + "learning_rate": 0.00017695059882983855, + "loss": 2.7055, + "step": 4488 + }, + { + "epoch": 0.362279073521104, + "grad_norm": 0.7501141428947449, + "learning_rate": 0.00017694051573296507, + "loss": 2.7109, + "step": 4489 + }, + { + "epoch": 0.36235977725768703, + "grad_norm": 0.6654670834541321, + "learning_rate": 0.00017693043071853284, + "loss": 2.6165, + "step": 4490 + }, + { + "epoch": 0.36244048099427, + "grad_norm": 0.7894664406776428, + "learning_rate": 0.00017692034378679315, + "loss": 2.7274, + "step": 4491 + }, + { + "epoch": 0.36252118473085304, + "grad_norm": 0.7206711173057556, + "learning_rate": 0.00017691025493799743, + "loss": 2.7047, + "step": 4492 + }, + { + "epoch": 0.362601888467436, + "grad_norm": 0.7656282186508179, + "learning_rate": 0.00017690016417239708, + "loss": 2.696, + "step": 4493 + }, + { + "epoch": 0.36268259220401905, + "grad_norm": 0.7357437610626221, + "learning_rate": 0.00017689007149024362, + "loss": 2.7279, + "step": 4494 + }, + { + "epoch": 0.362763295940602, + "grad_norm": 0.7262146472930908, + "learning_rate": 0.00017687997689178864, + "loss": 2.6964, + "step": 4495 + }, + { + "epoch": 0.36284399967718506, + "grad_norm": 0.7839891910552979, + "learning_rate": 0.00017686988037728365, + "loss": 2.651, + "step": 4496 + }, + { + "epoch": 0.36292470341376803, + "grad_norm": 0.7150306105613708, + "learning_rate": 0.00017685978194698028, + "loss": 2.6481, + "step": 4497 + }, + { + "epoch": 0.36300540715035107, + "grad_norm": 0.7144685387611389, + "learning_rate": 0.00017684968160113025, + "loss": 2.7169, + "step": 4498 + }, + { + "epoch": 0.36308611088693404, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00017683957933998525, + "loss": 2.7543, + "step": 4499 + }, + { + "epoch": 0.3631668146235171, + "grad_norm": 0.7301446199417114, + "learning_rate": 0.00017682947516379707, + "loss": 2.6806, + "step": 4500 + }, + { + "epoch": 0.36324751836010005, + "grad_norm": 0.7314243316650391, + "learning_rate": 0.00017681936907281757, + "loss": 2.7227, + "step": 4501 + }, + { + "epoch": 0.3633282220966831, + "grad_norm": 0.7695817351341248, + "learning_rate": 0.00017680926106729852, + "loss": 2.7229, + "step": 4502 + }, + { + "epoch": 0.36340892583326606, + "grad_norm": 0.6885762810707092, + "learning_rate": 0.00017679915114749198, + "loss": 2.7246, + "step": 4503 + }, + { + "epoch": 0.3634896295698491, + "grad_norm": 0.6893608570098877, + "learning_rate": 0.0001767890393136498, + "loss": 2.6572, + "step": 4504 + }, + { + "epoch": 0.36357033330643207, + "grad_norm": 0.7011978626251221, + "learning_rate": 0.00017677892556602402, + "loss": 2.6775, + "step": 4505 + }, + { + "epoch": 0.3636510370430151, + "grad_norm": 0.6693406105041504, + "learning_rate": 0.00017676880990486672, + "loss": 2.6183, + "step": 4506 + }, + { + "epoch": 0.3637317407795981, + "grad_norm": 0.7023048996925354, + "learning_rate": 0.00017675869233043002, + "loss": 2.6772, + "step": 4507 + }, + { + "epoch": 0.3638124445161811, + "grad_norm": 0.6903806328773499, + "learning_rate": 0.00017674857284296605, + "loss": 2.6486, + "step": 4508 + }, + { + "epoch": 0.3638931482527641, + "grad_norm": 0.6799258589744568, + "learning_rate": 0.000176738451442727, + "loss": 2.6305, + "step": 4509 + }, + { + "epoch": 0.3639738519893471, + "grad_norm": 0.7935682535171509, + "learning_rate": 0.00017672832812996517, + "loss": 2.7365, + "step": 4510 + }, + { + "epoch": 0.3640545557259301, + "grad_norm": 0.7593684196472168, + "learning_rate": 0.00017671820290493284, + "loss": 2.7029, + "step": 4511 + }, + { + "epoch": 0.36413525946251313, + "grad_norm": 0.7185288667678833, + "learning_rate": 0.00017670807576788234, + "loss": 2.6646, + "step": 4512 + }, + { + "epoch": 0.3642159631990961, + "grad_norm": 0.7260291576385498, + "learning_rate": 0.00017669794671906606, + "loss": 2.6615, + "step": 4513 + }, + { + "epoch": 0.36429666693567914, + "grad_norm": 0.6933417916297913, + "learning_rate": 0.00017668781575873646, + "loss": 2.6678, + "step": 4514 + }, + { + "epoch": 0.3643773706722621, + "grad_norm": 0.7657343149185181, + "learning_rate": 0.00017667768288714603, + "loss": 2.7155, + "step": 4515 + }, + { + "epoch": 0.36445807440884515, + "grad_norm": 0.7326949834823608, + "learning_rate": 0.0001766675481045473, + "loss": 2.732, + "step": 4516 + }, + { + "epoch": 0.3645387781454281, + "grad_norm": 0.7370324730873108, + "learning_rate": 0.0001766574114111929, + "loss": 2.6124, + "step": 4517 + }, + { + "epoch": 0.36461948188201115, + "grad_norm": 0.7280072569847107, + "learning_rate": 0.00017664727280733536, + "loss": 2.6793, + "step": 4518 + }, + { + "epoch": 0.36470018561859413, + "grad_norm": 0.7174237370491028, + "learning_rate": 0.00017663713229322748, + "loss": 2.629, + "step": 4519 + }, + { + "epoch": 0.36478088935517716, + "grad_norm": 0.6660771369934082, + "learning_rate": 0.0001766269898691219, + "loss": 2.6862, + "step": 4520 + }, + { + "epoch": 0.36486159309176014, + "grad_norm": 0.7024446725845337, + "learning_rate": 0.00017661684553527143, + "loss": 2.6602, + "step": 4521 + }, + { + "epoch": 0.36494229682834317, + "grad_norm": 0.7419618964195251, + "learning_rate": 0.0001766066992919289, + "loss": 2.6904, + "step": 4522 + }, + { + "epoch": 0.36502300056492615, + "grad_norm": 0.7425804138183594, + "learning_rate": 0.00017659655113934716, + "loss": 2.7312, + "step": 4523 + }, + { + "epoch": 0.3651037043015092, + "grad_norm": 0.7117013931274414, + "learning_rate": 0.00017658640107777915, + "loss": 2.6411, + "step": 4524 + }, + { + "epoch": 0.36518440803809216, + "grad_norm": 0.719613254070282, + "learning_rate": 0.00017657624910747782, + "loss": 2.6799, + "step": 4525 + }, + { + "epoch": 0.3652651117746752, + "grad_norm": 0.7654159665107727, + "learning_rate": 0.0001765660952286962, + "loss": 2.6675, + "step": 4526 + }, + { + "epoch": 0.36534581551125817, + "grad_norm": 0.7111814022064209, + "learning_rate": 0.00017655593944168734, + "loss": 2.6717, + "step": 4527 + }, + { + "epoch": 0.3654265192478412, + "grad_norm": 0.7494712471961975, + "learning_rate": 0.00017654578174670436, + "loss": 2.7181, + "step": 4528 + }, + { + "epoch": 0.3655072229844242, + "grad_norm": 0.8062291145324707, + "learning_rate": 0.0001765356221440004, + "loss": 2.6563, + "step": 4529 + }, + { + "epoch": 0.3655879267210072, + "grad_norm": 0.7923303842544556, + "learning_rate": 0.00017652546063382866, + "loss": 2.6295, + "step": 4530 + }, + { + "epoch": 0.3656686304575902, + "grad_norm": 0.7417340278625488, + "learning_rate": 0.00017651529721644238, + "loss": 2.6727, + "step": 4531 + }, + { + "epoch": 0.3657493341941732, + "grad_norm": 0.7326166033744812, + "learning_rate": 0.0001765051318920949, + "loss": 2.702, + "step": 4532 + }, + { + "epoch": 0.3658300379307562, + "grad_norm": 0.8133745193481445, + "learning_rate": 0.00017649496466103957, + "loss": 2.7157, + "step": 4533 + }, + { + "epoch": 0.3659107416673392, + "grad_norm": 0.710502564907074, + "learning_rate": 0.00017648479552352973, + "loss": 2.6668, + "step": 4534 + }, + { + "epoch": 0.3659914454039222, + "grad_norm": 0.6947012543678284, + "learning_rate": 0.00017647462447981885, + "loss": 2.6865, + "step": 4535 + }, + { + "epoch": 0.36607214914050523, + "grad_norm": 0.8432720899581909, + "learning_rate": 0.0001764644515301604, + "loss": 2.6226, + "step": 4536 + }, + { + "epoch": 0.3661528528770882, + "grad_norm": 0.7321269512176514, + "learning_rate": 0.00017645427667480802, + "loss": 2.662, + "step": 4537 + }, + { + "epoch": 0.36623355661367124, + "grad_norm": 0.8099743723869324, + "learning_rate": 0.00017644409991401515, + "loss": 2.6853, + "step": 4538 + }, + { + "epoch": 0.3663142603502542, + "grad_norm": 0.6885355114936829, + "learning_rate": 0.0001764339212480355, + "loss": 2.6672, + "step": 4539 + }, + { + "epoch": 0.3663949640868372, + "grad_norm": 0.911396324634552, + "learning_rate": 0.00017642374067712276, + "loss": 2.5778, + "step": 4540 + }, + { + "epoch": 0.3664756678234202, + "grad_norm": 0.7461941838264465, + "learning_rate": 0.0001764135582015306, + "loss": 2.6629, + "step": 4541 + }, + { + "epoch": 0.3665563715600032, + "grad_norm": 0.772741436958313, + "learning_rate": 0.0001764033738215128, + "loss": 2.725, + "step": 4542 + }, + { + "epoch": 0.36663707529658623, + "grad_norm": 0.7256152629852295, + "learning_rate": 0.0001763931875373232, + "loss": 2.6439, + "step": 4543 + }, + { + "epoch": 0.3667177790331692, + "grad_norm": 0.8089167475700378, + "learning_rate": 0.0001763829993492157, + "loss": 2.5972, + "step": 4544 + }, + { + "epoch": 0.36679848276975224, + "grad_norm": 0.7115232944488525, + "learning_rate": 0.0001763728092574442, + "loss": 2.633, + "step": 4545 + }, + { + "epoch": 0.3668791865063352, + "grad_norm": 0.7189347147941589, + "learning_rate": 0.00017636261726226266, + "loss": 2.619, + "step": 4546 + }, + { + "epoch": 0.36695989024291825, + "grad_norm": 0.7667742967605591, + "learning_rate": 0.00017635242336392506, + "loss": 2.667, + "step": 4547 + }, + { + "epoch": 0.36704059397950123, + "grad_norm": 0.7982457876205444, + "learning_rate": 0.00017634222756268545, + "loss": 2.6667, + "step": 4548 + }, + { + "epoch": 0.36712129771608426, + "grad_norm": 0.7465574145317078, + "learning_rate": 0.00017633202985879804, + "loss": 2.6436, + "step": 4549 + }, + { + "epoch": 0.36720200145266724, + "grad_norm": 0.7297804951667786, + "learning_rate": 0.00017632183025251686, + "loss": 2.6464, + "step": 4550 + }, + { + "epoch": 0.36728270518925027, + "grad_norm": 0.6885054111480713, + "learning_rate": 0.0001763116287440962, + "loss": 2.6742, + "step": 4551 + }, + { + "epoch": 0.36736340892583325, + "grad_norm": 0.7341574430465698, + "learning_rate": 0.00017630142533379023, + "loss": 2.6688, + "step": 4552 + }, + { + "epoch": 0.3674441126624163, + "grad_norm": 0.8565430045127869, + "learning_rate": 0.0001762912200218533, + "loss": 2.6889, + "step": 4553 + }, + { + "epoch": 0.36752481639899925, + "grad_norm": 0.7509489059448242, + "learning_rate": 0.00017628101280853974, + "loss": 2.6177, + "step": 4554 + }, + { + "epoch": 0.3676055201355823, + "grad_norm": 0.8128334879875183, + "learning_rate": 0.00017627080369410396, + "loss": 2.7301, + "step": 4555 + }, + { + "epoch": 0.36768622387216526, + "grad_norm": 0.7511637210845947, + "learning_rate": 0.00017626059267880035, + "loss": 2.7327, + "step": 4556 + }, + { + "epoch": 0.3677669276087483, + "grad_norm": 0.8350822925567627, + "learning_rate": 0.00017625037976288347, + "loss": 2.6073, + "step": 4557 + }, + { + "epoch": 0.36784763134533127, + "grad_norm": 0.7743313312530518, + "learning_rate": 0.00017624016494660776, + "loss": 2.7055, + "step": 4558 + }, + { + "epoch": 0.3679283350819143, + "grad_norm": 0.8196439146995544, + "learning_rate": 0.00017622994823022787, + "loss": 2.6565, + "step": 4559 + }, + { + "epoch": 0.3680090388184973, + "grad_norm": 0.7223393321037292, + "learning_rate": 0.00017621972961399837, + "loss": 2.68, + "step": 4560 + }, + { + "epoch": 0.3680897425550803, + "grad_norm": 0.7215418219566345, + "learning_rate": 0.000176209509098174, + "loss": 2.6627, + "step": 4561 + }, + { + "epoch": 0.3681704462916633, + "grad_norm": 0.8050473928451538, + "learning_rate": 0.00017619928668300946, + "loss": 2.5802, + "step": 4562 + }, + { + "epoch": 0.3682511500282463, + "grad_norm": 0.7452750205993652, + "learning_rate": 0.00017618906236875948, + "loss": 2.6524, + "step": 4563 + }, + { + "epoch": 0.3683318537648293, + "grad_norm": 0.7950742244720459, + "learning_rate": 0.00017617883615567888, + "loss": 2.6371, + "step": 4564 + }, + { + "epoch": 0.36841255750141233, + "grad_norm": 0.7185397744178772, + "learning_rate": 0.00017616860804402261, + "loss": 2.6531, + "step": 4565 + }, + { + "epoch": 0.3684932612379953, + "grad_norm": 0.7480553388595581, + "learning_rate": 0.0001761583780340455, + "loss": 2.6727, + "step": 4566 + }, + { + "epoch": 0.36857396497457834, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.00017614814612600251, + "loss": 2.6095, + "step": 4567 + }, + { + "epoch": 0.3686546687111613, + "grad_norm": 0.9159810543060303, + "learning_rate": 0.00017613791232014866, + "loss": 2.7039, + "step": 4568 + }, + { + "epoch": 0.36873537244774435, + "grad_norm": 0.7478305697441101, + "learning_rate": 0.00017612767661673905, + "loss": 2.6307, + "step": 4569 + }, + { + "epoch": 0.3688160761843273, + "grad_norm": 0.9154726266860962, + "learning_rate": 0.00017611743901602874, + "loss": 2.675, + "step": 4570 + }, + { + "epoch": 0.36889677992091036, + "grad_norm": 0.7903287410736084, + "learning_rate": 0.0001761071995182728, + "loss": 2.6938, + "step": 4571 + }, + { + "epoch": 0.36897748365749333, + "grad_norm": 0.7919119596481323, + "learning_rate": 0.0001760969581237266, + "loss": 2.7092, + "step": 4572 + }, + { + "epoch": 0.36905818739407636, + "grad_norm": 0.8052253723144531, + "learning_rate": 0.00017608671483264522, + "loss": 2.6914, + "step": 4573 + }, + { + "epoch": 0.36913889113065934, + "grad_norm": 0.7660435438156128, + "learning_rate": 0.00017607646964528403, + "loss": 2.674, + "step": 4574 + }, + { + "epoch": 0.3692195948672424, + "grad_norm": 0.8554383516311646, + "learning_rate": 0.00017606622256189836, + "loss": 2.6792, + "step": 4575 + }, + { + "epoch": 0.36930029860382535, + "grad_norm": 0.7719140648841858, + "learning_rate": 0.00017605597358274358, + "loss": 2.6836, + "step": 4576 + }, + { + "epoch": 0.3693810023404084, + "grad_norm": 0.733068585395813, + "learning_rate": 0.00017604572270807513, + "loss": 2.6496, + "step": 4577 + }, + { + "epoch": 0.36946170607699136, + "grad_norm": 0.7622445225715637, + "learning_rate": 0.00017603546993814849, + "loss": 2.7097, + "step": 4578 + }, + { + "epoch": 0.3695424098135744, + "grad_norm": 0.7326679825782776, + "learning_rate": 0.00017602521527321913, + "loss": 2.6786, + "step": 4579 + }, + { + "epoch": 0.36962311355015737, + "grad_norm": 0.7579432129859924, + "learning_rate": 0.00017601495871354272, + "loss": 2.6618, + "step": 4580 + }, + { + "epoch": 0.3697038172867404, + "grad_norm": 0.8812715411186218, + "learning_rate": 0.00017600470025937485, + "loss": 2.6942, + "step": 4581 + }, + { + "epoch": 0.3697845210233234, + "grad_norm": 0.7230449318885803, + "learning_rate": 0.00017599443991097116, + "loss": 2.6374, + "step": 4582 + }, + { + "epoch": 0.3698652247599064, + "grad_norm": 0.8347739577293396, + "learning_rate": 0.00017598417766858735, + "loss": 2.6653, + "step": 4583 + }, + { + "epoch": 0.3699459284964894, + "grad_norm": 0.7826598882675171, + "learning_rate": 0.0001759739135324792, + "loss": 2.6342, + "step": 4584 + }, + { + "epoch": 0.3700266322330724, + "grad_norm": 0.749060332775116, + "learning_rate": 0.00017596364750290254, + "loss": 2.7256, + "step": 4585 + }, + { + "epoch": 0.3701073359696554, + "grad_norm": 0.7470815181732178, + "learning_rate": 0.00017595337958011323, + "loss": 2.6485, + "step": 4586 + }, + { + "epoch": 0.3701880397062384, + "grad_norm": 0.7251530289649963, + "learning_rate": 0.00017594310976436716, + "loss": 2.6613, + "step": 4587 + }, + { + "epoch": 0.3702687434428214, + "grad_norm": 0.7143718004226685, + "learning_rate": 0.00017593283805592027, + "loss": 2.6101, + "step": 4588 + }, + { + "epoch": 0.37034944717940443, + "grad_norm": 0.7378203272819519, + "learning_rate": 0.00017592256445502855, + "loss": 2.6735, + "step": 4589 + }, + { + "epoch": 0.3704301509159874, + "grad_norm": 0.7193629741668701, + "learning_rate": 0.00017591228896194808, + "loss": 2.719, + "step": 4590 + }, + { + "epoch": 0.3705108546525704, + "grad_norm": 0.7377258539199829, + "learning_rate": 0.00017590201157693494, + "loss": 2.6789, + "step": 4591 + }, + { + "epoch": 0.3705915583891534, + "grad_norm": 0.7468351721763611, + "learning_rate": 0.00017589173230024522, + "loss": 2.6389, + "step": 4592 + }, + { + "epoch": 0.3706722621257364, + "grad_norm": 0.7612246870994568, + "learning_rate": 0.0001758814511321352, + "loss": 2.7045, + "step": 4593 + }, + { + "epoch": 0.37075296586231943, + "grad_norm": 0.7603838443756104, + "learning_rate": 0.00017587116807286102, + "loss": 2.7323, + "step": 4594 + }, + { + "epoch": 0.3708336695989024, + "grad_norm": 0.7436477541923523, + "learning_rate": 0.000175860883122679, + "loss": 2.7331, + "step": 4595 + }, + { + "epoch": 0.37091437333548544, + "grad_norm": 0.7004369497299194, + "learning_rate": 0.0001758505962818455, + "loss": 2.6418, + "step": 4596 + }, + { + "epoch": 0.3709950770720684, + "grad_norm": 0.711980938911438, + "learning_rate": 0.00017584030755061683, + "loss": 2.6184, + "step": 4597 + }, + { + "epoch": 0.37107578080865145, + "grad_norm": 0.6999367475509644, + "learning_rate": 0.0001758300169292495, + "loss": 2.6584, + "step": 4598 + }, + { + "epoch": 0.3711564845452344, + "grad_norm": 0.6755785942077637, + "learning_rate": 0.0001758197244179999, + "loss": 2.664, + "step": 4599 + }, + { + "epoch": 0.37123718828181745, + "grad_norm": 0.7174055576324463, + "learning_rate": 0.00017580943001712455, + "loss": 2.6821, + "step": 4600 + }, + { + "epoch": 0.37131789201840043, + "grad_norm": 0.8218933343887329, + "learning_rate": 0.00017579913372688005, + "loss": 2.6355, + "step": 4601 + }, + { + "epoch": 0.37139859575498346, + "grad_norm": 0.7417960166931152, + "learning_rate": 0.000175788835547523, + "loss": 2.7226, + "step": 4602 + }, + { + "epoch": 0.37147929949156644, + "grad_norm": 0.824421763420105, + "learning_rate": 0.00017577853547931006, + "loss": 2.6526, + "step": 4603 + }, + { + "epoch": 0.37156000322814947, + "grad_norm": 0.7391949892044067, + "learning_rate": 0.00017576823352249794, + "loss": 2.6702, + "step": 4604 + }, + { + "epoch": 0.37164070696473245, + "grad_norm": 0.7890247106552124, + "learning_rate": 0.00017575792967734337, + "loss": 2.7281, + "step": 4605 + }, + { + "epoch": 0.3717214107013155, + "grad_norm": 0.785527765750885, + "learning_rate": 0.00017574762394410317, + "loss": 2.6728, + "step": 4606 + }, + { + "epoch": 0.37180211443789846, + "grad_norm": 0.7195863127708435, + "learning_rate": 0.00017573731632303415, + "loss": 2.6329, + "step": 4607 + }, + { + "epoch": 0.3718828181744815, + "grad_norm": 0.7896780371665955, + "learning_rate": 0.0001757270068143932, + "loss": 2.6776, + "step": 4608 + }, + { + "epoch": 0.37196352191106447, + "grad_norm": 0.7568275332450867, + "learning_rate": 0.00017571669541843735, + "loss": 2.6668, + "step": 4609 + }, + { + "epoch": 0.3720442256476475, + "grad_norm": 0.7923939228057861, + "learning_rate": 0.00017570638213542348, + "loss": 2.7033, + "step": 4610 + }, + { + "epoch": 0.3721249293842305, + "grad_norm": 0.7586569786071777, + "learning_rate": 0.00017569606696560868, + "loss": 2.7286, + "step": 4611 + }, + { + "epoch": 0.3722056331208135, + "grad_norm": 0.8222009539604187, + "learning_rate": 0.00017568574990925004, + "loss": 2.6448, + "step": 4612 + }, + { + "epoch": 0.3722863368573965, + "grad_norm": 0.7144019603729248, + "learning_rate": 0.00017567543096660466, + "loss": 2.6671, + "step": 4613 + }, + { + "epoch": 0.3723670405939795, + "grad_norm": 0.7602240443229675, + "learning_rate": 0.00017566511013792973, + "loss": 2.6492, + "step": 4614 + }, + { + "epoch": 0.3724477443305625, + "grad_norm": 0.7949689626693726, + "learning_rate": 0.00017565478742348245, + "loss": 2.7002, + "step": 4615 + }, + { + "epoch": 0.3725284480671455, + "grad_norm": 0.6922519207000732, + "learning_rate": 0.00017564446282352012, + "loss": 2.6917, + "step": 4616 + }, + { + "epoch": 0.3726091518037285, + "grad_norm": 0.7382915616035461, + "learning_rate": 0.0001756341363383, + "loss": 2.6375, + "step": 4617 + }, + { + "epoch": 0.37268985554031153, + "grad_norm": 0.7511888742446899, + "learning_rate": 0.00017562380796807956, + "loss": 2.6823, + "step": 4618 + }, + { + "epoch": 0.3727705592768945, + "grad_norm": 0.7273457646369934, + "learning_rate": 0.00017561347771311608, + "loss": 2.6124, + "step": 4619 + }, + { + "epoch": 0.37285126301347754, + "grad_norm": 0.689440131187439, + "learning_rate": 0.0001756031455736671, + "loss": 2.6931, + "step": 4620 + }, + { + "epoch": 0.3729319667500605, + "grad_norm": 0.7755659222602844, + "learning_rate": 0.00017559281154999013, + "loss": 2.6273, + "step": 4621 + }, + { + "epoch": 0.37301267048664355, + "grad_norm": 0.6940193176269531, + "learning_rate": 0.00017558247564234265, + "loss": 2.641, + "step": 4622 + }, + { + "epoch": 0.3730933742232265, + "grad_norm": 0.7387529015541077, + "learning_rate": 0.00017557213785098232, + "loss": 2.7229, + "step": 4623 + }, + { + "epoch": 0.37317407795980956, + "grad_norm": 0.6807727217674255, + "learning_rate": 0.00017556179817616678, + "loss": 2.6469, + "step": 4624 + }, + { + "epoch": 0.37325478169639253, + "grad_norm": 0.7203819751739502, + "learning_rate": 0.0001755514566181537, + "loss": 2.6239, + "step": 4625 + }, + { + "epoch": 0.37333548543297557, + "grad_norm": 0.9345876574516296, + "learning_rate": 0.0001755411131772008, + "loss": 2.7154, + "step": 4626 + }, + { + "epoch": 0.37341618916955854, + "grad_norm": 0.6787357330322266, + "learning_rate": 0.00017553076785356594, + "loss": 2.6374, + "step": 4627 + }, + { + "epoch": 0.3734968929061416, + "grad_norm": 0.7153670191764832, + "learning_rate": 0.0001755204206475069, + "loss": 2.6734, + "step": 4628 + }, + { + "epoch": 0.37357759664272455, + "grad_norm": 0.736464262008667, + "learning_rate": 0.00017551007155928154, + "loss": 2.7241, + "step": 4629 + }, + { + "epoch": 0.3736583003793076, + "grad_norm": 0.7134939432144165, + "learning_rate": 0.0001754997205891478, + "loss": 2.682, + "step": 4630 + }, + { + "epoch": 0.37373900411589056, + "grad_norm": 0.7071199417114258, + "learning_rate": 0.0001754893677373637, + "loss": 2.7361, + "step": 4631 + }, + { + "epoch": 0.3738197078524736, + "grad_norm": 0.7040621638298035, + "learning_rate": 0.00017547901300418722, + "loss": 2.7031, + "step": 4632 + }, + { + "epoch": 0.37390041158905657, + "grad_norm": 0.7179287075996399, + "learning_rate": 0.00017546865638987642, + "loss": 2.6755, + "step": 4633 + }, + { + "epoch": 0.3739811153256396, + "grad_norm": 0.7579259276390076, + "learning_rate": 0.00017545829789468944, + "loss": 2.6514, + "step": 4634 + }, + { + "epoch": 0.3740618190622226, + "grad_norm": 0.7825835347175598, + "learning_rate": 0.0001754479375188844, + "loss": 2.6876, + "step": 4635 + }, + { + "epoch": 0.3741425227988056, + "grad_norm": 0.7913421988487244, + "learning_rate": 0.00017543757526271956, + "loss": 2.7153, + "step": 4636 + }, + { + "epoch": 0.3742232265353886, + "grad_norm": 0.7766042947769165, + "learning_rate": 0.00017542721112645313, + "loss": 2.645, + "step": 4637 + }, + { + "epoch": 0.3743039302719716, + "grad_norm": 0.7363953590393066, + "learning_rate": 0.00017541684511034343, + "loss": 2.6376, + "step": 4638 + }, + { + "epoch": 0.3743846340085546, + "grad_norm": 0.6928617358207703, + "learning_rate": 0.00017540647721464881, + "loss": 2.6882, + "step": 4639 + }, + { + "epoch": 0.3744653377451376, + "grad_norm": 0.7832257747650146, + "learning_rate": 0.0001753961074396277, + "loss": 2.7305, + "step": 4640 + }, + { + "epoch": 0.3745460414817206, + "grad_norm": 0.7180350422859192, + "learning_rate": 0.00017538573578553844, + "loss": 2.6783, + "step": 4641 + }, + { + "epoch": 0.3746267452183036, + "grad_norm": 0.718209981918335, + "learning_rate": 0.00017537536225263964, + "loss": 2.6961, + "step": 4642 + }, + { + "epoch": 0.3747074489548866, + "grad_norm": 0.7056655287742615, + "learning_rate": 0.00017536498684118975, + "loss": 2.7096, + "step": 4643 + }, + { + "epoch": 0.3747881526914696, + "grad_norm": 0.8004828691482544, + "learning_rate": 0.0001753546095514474, + "loss": 2.7168, + "step": 4644 + }, + { + "epoch": 0.3748688564280526, + "grad_norm": 0.7630821466445923, + "learning_rate": 0.0001753442303836712, + "loss": 2.7091, + "step": 4645 + }, + { + "epoch": 0.3749495601646356, + "grad_norm": 0.7539668083190918, + "learning_rate": 0.0001753338493381198, + "loss": 2.651, + "step": 4646 + }, + { + "epoch": 0.37503026390121863, + "grad_norm": 0.7243319749832153, + "learning_rate": 0.000175323466415052, + "loss": 2.6765, + "step": 4647 + }, + { + "epoch": 0.3751109676378016, + "grad_norm": 0.8906281590461731, + "learning_rate": 0.00017531308161472647, + "loss": 2.5938, + "step": 4648 + }, + { + "epoch": 0.37519167137438464, + "grad_norm": 0.787966251373291, + "learning_rate": 0.0001753026949374021, + "loss": 2.6011, + "step": 4649 + }, + { + "epoch": 0.3752723751109676, + "grad_norm": 0.7763915061950684, + "learning_rate": 0.00017529230638333772, + "loss": 2.7197, + "step": 4650 + }, + { + "epoch": 0.37535307884755065, + "grad_norm": 0.7717103362083435, + "learning_rate": 0.00017528191595279224, + "loss": 2.6605, + "step": 4651 + }, + { + "epoch": 0.3754337825841336, + "grad_norm": 0.7340055108070374, + "learning_rate": 0.00017527152364602464, + "loss": 2.6856, + "step": 4652 + }, + { + "epoch": 0.37551448632071666, + "grad_norm": 0.7805169820785522, + "learning_rate": 0.0001752611294632939, + "loss": 2.7088, + "step": 4653 + }, + { + "epoch": 0.37559519005729963, + "grad_norm": 0.7894891500473022, + "learning_rate": 0.00017525073340485912, + "loss": 2.6691, + "step": 4654 + }, + { + "epoch": 0.37567589379388266, + "grad_norm": 0.7627872824668884, + "learning_rate": 0.0001752403354709793, + "loss": 2.6536, + "step": 4655 + }, + { + "epoch": 0.37575659753046564, + "grad_norm": 0.8097225427627563, + "learning_rate": 0.00017522993566191367, + "loss": 2.7108, + "step": 4656 + }, + { + "epoch": 0.3758373012670487, + "grad_norm": 0.834449827671051, + "learning_rate": 0.00017521953397792137, + "loss": 2.7565, + "step": 4657 + }, + { + "epoch": 0.37591800500363165, + "grad_norm": 0.7924147844314575, + "learning_rate": 0.00017520913041926166, + "loss": 2.7101, + "step": 4658 + }, + { + "epoch": 0.3759987087402147, + "grad_norm": 0.7407249808311462, + "learning_rate": 0.00017519872498619385, + "loss": 2.6501, + "step": 4659 + }, + { + "epoch": 0.37607941247679766, + "grad_norm": 0.7251791954040527, + "learning_rate": 0.0001751883176789772, + "loss": 2.6786, + "step": 4660 + }, + { + "epoch": 0.3761601162133807, + "grad_norm": 0.7120431661605835, + "learning_rate": 0.00017517790849787116, + "loss": 2.7244, + "step": 4661 + }, + { + "epoch": 0.37624081994996367, + "grad_norm": 0.724836528301239, + "learning_rate": 0.00017516749744313513, + "loss": 2.7099, + "step": 4662 + }, + { + "epoch": 0.3763215236865467, + "grad_norm": 0.7788939476013184, + "learning_rate": 0.00017515708451502855, + "loss": 2.6206, + "step": 4663 + }, + { + "epoch": 0.3764022274231297, + "grad_norm": 0.7518914341926575, + "learning_rate": 0.00017514666971381099, + "loss": 2.7505, + "step": 4664 + }, + { + "epoch": 0.3764829311597127, + "grad_norm": 0.8004730939865112, + "learning_rate": 0.00017513625303974194, + "loss": 2.6119, + "step": 4665 + }, + { + "epoch": 0.3765636348962957, + "grad_norm": 0.7661109566688538, + "learning_rate": 0.00017512583449308107, + "loss": 2.724, + "step": 4666 + }, + { + "epoch": 0.3766443386328787, + "grad_norm": 0.7669692635536194, + "learning_rate": 0.00017511541407408805, + "loss": 2.7109, + "step": 4667 + }, + { + "epoch": 0.3767250423694617, + "grad_norm": 0.738608181476593, + "learning_rate": 0.00017510499178302253, + "loss": 2.6642, + "step": 4668 + }, + { + "epoch": 0.3768057461060447, + "grad_norm": 0.7194661498069763, + "learning_rate": 0.00017509456762014432, + "loss": 2.6906, + "step": 4669 + }, + { + "epoch": 0.3768864498426277, + "grad_norm": 0.7025040984153748, + "learning_rate": 0.00017508414158571314, + "loss": 2.6596, + "step": 4670 + }, + { + "epoch": 0.37696715357921073, + "grad_norm": 0.7756575345993042, + "learning_rate": 0.00017507371367998892, + "loss": 2.7114, + "step": 4671 + }, + { + "epoch": 0.3770478573157937, + "grad_norm": 0.834966778755188, + "learning_rate": 0.00017506328390323148, + "loss": 2.7554, + "step": 4672 + }, + { + "epoch": 0.37712856105237674, + "grad_norm": 0.6997280120849609, + "learning_rate": 0.0001750528522557008, + "loss": 2.6285, + "step": 4673 + }, + { + "epoch": 0.3772092647889597, + "grad_norm": 0.7101716995239258, + "learning_rate": 0.0001750424187376569, + "loss": 2.6465, + "step": 4674 + }, + { + "epoch": 0.37728996852554275, + "grad_norm": 0.6577222347259521, + "learning_rate": 0.0001750319833493597, + "loss": 2.6372, + "step": 4675 + }, + { + "epoch": 0.37737067226212573, + "grad_norm": 0.7402529120445251, + "learning_rate": 0.00017502154609106937, + "loss": 2.6464, + "step": 4676 + }, + { + "epoch": 0.37745137599870876, + "grad_norm": 0.6858490705490112, + "learning_rate": 0.00017501110696304596, + "loss": 2.6141, + "step": 4677 + }, + { + "epoch": 0.37753207973529174, + "grad_norm": 0.729468822479248, + "learning_rate": 0.0001750006659655497, + "loss": 2.6671, + "step": 4678 + }, + { + "epoch": 0.37761278347187477, + "grad_norm": 0.7197559475898743, + "learning_rate": 0.0001749902230988408, + "loss": 2.6462, + "step": 4679 + }, + { + "epoch": 0.37769348720845775, + "grad_norm": 0.7171144485473633, + "learning_rate": 0.00017497977836317957, + "loss": 2.6427, + "step": 4680 + }, + { + "epoch": 0.3777741909450408, + "grad_norm": 0.7423805594444275, + "learning_rate": 0.00017496933175882617, + "loss": 2.662, + "step": 4681 + }, + { + "epoch": 0.37785489468162375, + "grad_norm": 0.7498061060905457, + "learning_rate": 0.0001749588832860411, + "loss": 2.6243, + "step": 4682 + }, + { + "epoch": 0.3779355984182068, + "grad_norm": 0.7706165909767151, + "learning_rate": 0.0001749484329450847, + "loss": 2.6928, + "step": 4683 + }, + { + "epoch": 0.37801630215478976, + "grad_norm": 0.723363995552063, + "learning_rate": 0.00017493798073621745, + "loss": 2.6787, + "step": 4684 + }, + { + "epoch": 0.3780970058913728, + "grad_norm": 0.7444875836372375, + "learning_rate": 0.00017492752665969983, + "loss": 2.6789, + "step": 4685 + }, + { + "epoch": 0.37817770962795577, + "grad_norm": 0.6946491599082947, + "learning_rate": 0.00017491707071579237, + "loss": 2.6761, + "step": 4686 + }, + { + "epoch": 0.3782584133645388, + "grad_norm": 0.7171412706375122, + "learning_rate": 0.00017490661290475568, + "loss": 2.6788, + "step": 4687 + }, + { + "epoch": 0.3783391171011218, + "grad_norm": 0.7503272891044617, + "learning_rate": 0.00017489615322685038, + "loss": 2.7057, + "step": 4688 + }, + { + "epoch": 0.3784198208377048, + "grad_norm": 0.7458747625350952, + "learning_rate": 0.00017488569168233714, + "loss": 2.6857, + "step": 4689 + }, + { + "epoch": 0.3785005245742878, + "grad_norm": 0.7030516266822815, + "learning_rate": 0.0001748752282714768, + "loss": 2.6522, + "step": 4690 + }, + { + "epoch": 0.3785812283108708, + "grad_norm": 0.7717545628547668, + "learning_rate": 0.00017486476299452994, + "loss": 2.6527, + "step": 4691 + }, + { + "epoch": 0.3786619320474538, + "grad_norm": 0.6788322925567627, + "learning_rate": 0.0001748542958517575, + "loss": 2.6362, + "step": 4692 + }, + { + "epoch": 0.3787426357840368, + "grad_norm": 0.8518630266189575, + "learning_rate": 0.0001748438268434204, + "loss": 2.6812, + "step": 4693 + }, + { + "epoch": 0.3788233395206198, + "grad_norm": 0.7167141437530518, + "learning_rate": 0.00017483335596977945, + "loss": 2.6414, + "step": 4694 + }, + { + "epoch": 0.3789040432572028, + "grad_norm": 0.7748053073883057, + "learning_rate": 0.00017482288323109567, + "loss": 2.7291, + "step": 4695 + }, + { + "epoch": 0.3789847469937858, + "grad_norm": 0.7203041911125183, + "learning_rate": 0.00017481240862763002, + "loss": 2.6957, + "step": 4696 + }, + { + "epoch": 0.3790654507303688, + "grad_norm": 0.7973119020462036, + "learning_rate": 0.00017480193215964362, + "loss": 2.7456, + "step": 4697 + }, + { + "epoch": 0.3791461544669518, + "grad_norm": 0.7851223945617676, + "learning_rate": 0.00017479145382739755, + "loss": 2.6525, + "step": 4698 + }, + { + "epoch": 0.3792268582035348, + "grad_norm": 0.7012068629264832, + "learning_rate": 0.0001747809736311529, + "loss": 2.6662, + "step": 4699 + }, + { + "epoch": 0.37930756194011783, + "grad_norm": 0.7266128659248352, + "learning_rate": 0.00017477049157117093, + "loss": 2.5853, + "step": 4700 + }, + { + "epoch": 0.3793882656767008, + "grad_norm": 0.7264416217803955, + "learning_rate": 0.00017476000764771285, + "loss": 2.6972, + "step": 4701 + }, + { + "epoch": 0.37946896941328384, + "grad_norm": 0.797709047794342, + "learning_rate": 0.00017474952186103995, + "loss": 2.6997, + "step": 4702 + }, + { + "epoch": 0.3795496731498668, + "grad_norm": 0.7552568912506104, + "learning_rate": 0.00017473903421141358, + "loss": 2.7178, + "step": 4703 + }, + { + "epoch": 0.37963037688644985, + "grad_norm": 0.7611108422279358, + "learning_rate": 0.0001747285446990951, + "loss": 2.6997, + "step": 4704 + }, + { + "epoch": 0.3797110806230328, + "grad_norm": 0.8081753253936768, + "learning_rate": 0.00017471805332434595, + "loss": 2.7242, + "step": 4705 + }, + { + "epoch": 0.37979178435961586, + "grad_norm": 0.728301465511322, + "learning_rate": 0.0001747075600874276, + "loss": 2.5885, + "step": 4706 + }, + { + "epoch": 0.37987248809619883, + "grad_norm": 0.7548539638519287, + "learning_rate": 0.00017469706498860155, + "loss": 2.7038, + "step": 4707 + }, + { + "epoch": 0.37995319183278187, + "grad_norm": 0.7054354548454285, + "learning_rate": 0.00017468656802812938, + "loss": 2.6566, + "step": 4708 + }, + { + "epoch": 0.38003389556936484, + "grad_norm": 0.7231585383415222, + "learning_rate": 0.0001746760692062727, + "loss": 2.6564, + "step": 4709 + }, + { + "epoch": 0.3801145993059479, + "grad_norm": 0.6931934952735901, + "learning_rate": 0.00017466556852329318, + "loss": 2.6403, + "step": 4710 + }, + { + "epoch": 0.38019530304253085, + "grad_norm": 0.7882393598556519, + "learning_rate": 0.00017465506597945255, + "loss": 2.6337, + "step": 4711 + }, + { + "epoch": 0.3802760067791139, + "grad_norm": 0.7015109658241272, + "learning_rate": 0.0001746445615750125, + "loss": 2.6742, + "step": 4712 + }, + { + "epoch": 0.38035671051569686, + "grad_norm": 0.7653505802154541, + "learning_rate": 0.0001746340553102348, + "loss": 2.6742, + "step": 4713 + }, + { + "epoch": 0.3804374142522799, + "grad_norm": 0.7166270613670349, + "learning_rate": 0.0001746235471853814, + "loss": 2.5995, + "step": 4714 + }, + { + "epoch": 0.38051811798886287, + "grad_norm": 0.7612236738204956, + "learning_rate": 0.0001746130372007141, + "loss": 2.7595, + "step": 4715 + }, + { + "epoch": 0.3805988217254459, + "grad_norm": 0.6783852577209473, + "learning_rate": 0.00017460252535649493, + "loss": 2.6156, + "step": 4716 + }, + { + "epoch": 0.3806795254620289, + "grad_norm": 0.7495827078819275, + "learning_rate": 0.00017459201165298578, + "loss": 2.6847, + "step": 4717 + }, + { + "epoch": 0.3807602291986119, + "grad_norm": 0.814798891544342, + "learning_rate": 0.0001745814960904487, + "loss": 2.6211, + "step": 4718 + }, + { + "epoch": 0.3808409329351949, + "grad_norm": 0.7541367411613464, + "learning_rate": 0.0001745709786691458, + "loss": 2.6214, + "step": 4719 + }, + { + "epoch": 0.3809216366717779, + "grad_norm": 0.7065702676773071, + "learning_rate": 0.00017456045938933921, + "loss": 2.6699, + "step": 4720 + }, + { + "epoch": 0.3810023404083609, + "grad_norm": 0.751960813999176, + "learning_rate": 0.000174549938251291, + "loss": 2.6085, + "step": 4721 + }, + { + "epoch": 0.3810830441449439, + "grad_norm": 0.72068190574646, + "learning_rate": 0.00017453941525526353, + "loss": 2.6201, + "step": 4722 + }, + { + "epoch": 0.3811637478815269, + "grad_norm": 0.7201167941093445, + "learning_rate": 0.00017452889040151892, + "loss": 2.6775, + "step": 4723 + }, + { + "epoch": 0.38124445161810994, + "grad_norm": 0.7904958128929138, + "learning_rate": 0.00017451836369031956, + "loss": 2.7217, + "step": 4724 + }, + { + "epoch": 0.3813251553546929, + "grad_norm": 0.7096366882324219, + "learning_rate": 0.0001745078351219278, + "loss": 2.7004, + "step": 4725 + }, + { + "epoch": 0.38140585909127594, + "grad_norm": 0.6812441945075989, + "learning_rate": 0.00017449730469660602, + "loss": 2.6555, + "step": 4726 + }, + { + "epoch": 0.3814865628278589, + "grad_norm": 0.8037428855895996, + "learning_rate": 0.00017448677241461665, + "loss": 2.7094, + "step": 4727 + }, + { + "epoch": 0.38156726656444195, + "grad_norm": 0.7282679677009583, + "learning_rate": 0.00017447623827622223, + "loss": 2.6699, + "step": 4728 + }, + { + "epoch": 0.38164797030102493, + "grad_norm": 0.745705783367157, + "learning_rate": 0.00017446570228168523, + "loss": 2.6098, + "step": 4729 + }, + { + "epoch": 0.38172867403760796, + "grad_norm": 0.7098714113235474, + "learning_rate": 0.00017445516443126828, + "loss": 2.6628, + "step": 4730 + }, + { + "epoch": 0.38180937777419094, + "grad_norm": 0.7376620769500732, + "learning_rate": 0.00017444462472523405, + "loss": 2.7086, + "step": 4731 + }, + { + "epoch": 0.38189008151077397, + "grad_norm": 0.717800498008728, + "learning_rate": 0.00017443408316384512, + "loss": 2.6582, + "step": 4732 + }, + { + "epoch": 0.38197078524735695, + "grad_norm": 0.7061530947685242, + "learning_rate": 0.00017442353974736428, + "loss": 2.6817, + "step": 4733 + }, + { + "epoch": 0.38205148898394, + "grad_norm": 0.744667112827301, + "learning_rate": 0.0001744129944760543, + "loss": 2.6649, + "step": 4734 + }, + { + "epoch": 0.38213219272052296, + "grad_norm": 0.7302529215812683, + "learning_rate": 0.00017440244735017797, + "loss": 2.7313, + "step": 4735 + }, + { + "epoch": 0.382212896457106, + "grad_norm": 0.6845258474349976, + "learning_rate": 0.00017439189836999816, + "loss": 2.637, + "step": 4736 + }, + { + "epoch": 0.38229360019368896, + "grad_norm": 0.7060490250587463, + "learning_rate": 0.0001743813475357778, + "loss": 2.6674, + "step": 4737 + }, + { + "epoch": 0.382374303930272, + "grad_norm": 0.7146841287612915, + "learning_rate": 0.00017437079484777977, + "loss": 2.6607, + "step": 4738 + }, + { + "epoch": 0.382455007666855, + "grad_norm": 0.7107662558555603, + "learning_rate": 0.00017436024030626719, + "loss": 2.6777, + "step": 4739 + }, + { + "epoch": 0.382535711403438, + "grad_norm": 0.7356777191162109, + "learning_rate": 0.00017434968391150303, + "loss": 2.5801, + "step": 4740 + }, + { + "epoch": 0.382616415140021, + "grad_norm": 0.6839054226875305, + "learning_rate": 0.00017433912566375037, + "loss": 2.6319, + "step": 4741 + }, + { + "epoch": 0.382697118876604, + "grad_norm": 0.7049627900123596, + "learning_rate": 0.00017432856556327236, + "loss": 2.741, + "step": 4742 + }, + { + "epoch": 0.382777822613187, + "grad_norm": 0.7926551103591919, + "learning_rate": 0.00017431800361033224, + "loss": 2.64, + "step": 4743 + }, + { + "epoch": 0.38285852634976997, + "grad_norm": 0.734272301197052, + "learning_rate": 0.0001743074398051932, + "loss": 2.6575, + "step": 4744 + }, + { + "epoch": 0.382939230086353, + "grad_norm": 0.6959543824195862, + "learning_rate": 0.00017429687414811847, + "loss": 2.664, + "step": 4745 + }, + { + "epoch": 0.383019933822936, + "grad_norm": 0.7258255481719971, + "learning_rate": 0.00017428630663937148, + "loss": 2.6597, + "step": 4746 + }, + { + "epoch": 0.383100637559519, + "grad_norm": 0.8067473769187927, + "learning_rate": 0.0001742757372792155, + "loss": 2.6798, + "step": 4747 + }, + { + "epoch": 0.383181341296102, + "grad_norm": 0.7000626921653748, + "learning_rate": 0.000174265166067914, + "loss": 2.6561, + "step": 4748 + }, + { + "epoch": 0.383262045032685, + "grad_norm": 0.818914532661438, + "learning_rate": 0.00017425459300573045, + "loss": 2.6491, + "step": 4749 + }, + { + "epoch": 0.383342748769268, + "grad_norm": 0.7060543298721313, + "learning_rate": 0.00017424401809292833, + "loss": 2.6825, + "step": 4750 + }, + { + "epoch": 0.383423452505851, + "grad_norm": 0.893488883972168, + "learning_rate": 0.0001742334413297712, + "loss": 2.7201, + "step": 4751 + }, + { + "epoch": 0.383504156242434, + "grad_norm": 0.8131078481674194, + "learning_rate": 0.00017422286271652265, + "loss": 2.7828, + "step": 4752 + }, + { + "epoch": 0.38358485997901703, + "grad_norm": 0.7735587954521179, + "learning_rate": 0.00017421228225344634, + "loss": 2.6489, + "step": 4753 + }, + { + "epoch": 0.3836655637156, + "grad_norm": 0.713800311088562, + "learning_rate": 0.000174201699940806, + "loss": 2.6686, + "step": 4754 + }, + { + "epoch": 0.38374626745218304, + "grad_norm": 0.8246580362319946, + "learning_rate": 0.00017419111577886528, + "loss": 2.6771, + "step": 4755 + }, + { + "epoch": 0.383826971188766, + "grad_norm": 0.694542646408081, + "learning_rate": 0.00017418052976788805, + "loss": 2.6632, + "step": 4756 + }, + { + "epoch": 0.38390767492534905, + "grad_norm": 0.7200453281402588, + "learning_rate": 0.0001741699419081381, + "loss": 2.6386, + "step": 4757 + }, + { + "epoch": 0.38398837866193203, + "grad_norm": 0.7002073526382446, + "learning_rate": 0.00017415935219987933, + "loss": 2.6399, + "step": 4758 + }, + { + "epoch": 0.38406908239851506, + "grad_norm": 0.7056967616081238, + "learning_rate": 0.00017414876064337565, + "loss": 2.7048, + "step": 4759 + }, + { + "epoch": 0.38414978613509804, + "grad_norm": 0.7406448721885681, + "learning_rate": 0.000174138167238891, + "loss": 2.6256, + "step": 4760 + }, + { + "epoch": 0.38423048987168107, + "grad_norm": 0.7280529737472534, + "learning_rate": 0.00017412757198668945, + "loss": 2.6393, + "step": 4761 + }, + { + "epoch": 0.38431119360826405, + "grad_norm": 0.7626908421516418, + "learning_rate": 0.00017411697488703502, + "loss": 2.6717, + "step": 4762 + }, + { + "epoch": 0.3843918973448471, + "grad_norm": 0.716345489025116, + "learning_rate": 0.00017410637594019184, + "loss": 2.6457, + "step": 4763 + }, + { + "epoch": 0.38447260108143005, + "grad_norm": 0.8825077414512634, + "learning_rate": 0.00017409577514642405, + "loss": 2.7042, + "step": 4764 + }, + { + "epoch": 0.3845533048180131, + "grad_norm": 0.7301186919212341, + "learning_rate": 0.00017408517250599585, + "loss": 2.7065, + "step": 4765 + }, + { + "epoch": 0.38463400855459606, + "grad_norm": 0.8235788345336914, + "learning_rate": 0.0001740745680191715, + "loss": 2.6315, + "step": 4766 + }, + { + "epoch": 0.3847147122911791, + "grad_norm": 0.7355515956878662, + "learning_rate": 0.00017406396168621527, + "loss": 2.6939, + "step": 4767 + }, + { + "epoch": 0.38479541602776207, + "grad_norm": 0.6781682372093201, + "learning_rate": 0.0001740533535073915, + "loss": 2.6071, + "step": 4768 + }, + { + "epoch": 0.3848761197643451, + "grad_norm": 0.801191508769989, + "learning_rate": 0.0001740427434829646, + "loss": 2.6635, + "step": 4769 + }, + { + "epoch": 0.3849568235009281, + "grad_norm": 0.759682297706604, + "learning_rate": 0.00017403213161319903, + "loss": 2.6823, + "step": 4770 + }, + { + "epoch": 0.3850375272375111, + "grad_norm": 0.806498110294342, + "learning_rate": 0.00017402151789835916, + "loss": 2.7111, + "step": 4771 + }, + { + "epoch": 0.3851182309740941, + "grad_norm": 0.7677996158599854, + "learning_rate": 0.00017401090233870958, + "loss": 2.6701, + "step": 4772 + }, + { + "epoch": 0.3851989347106771, + "grad_norm": 0.7449933290481567, + "learning_rate": 0.00017400028493451487, + "loss": 2.7037, + "step": 4773 + }, + { + "epoch": 0.3852796384472601, + "grad_norm": 0.7506107091903687, + "learning_rate": 0.0001739896656860396, + "loss": 2.6587, + "step": 4774 + }, + { + "epoch": 0.38536034218384313, + "grad_norm": 0.8781036734580994, + "learning_rate": 0.00017397904459354844, + "loss": 2.7634, + "step": 4775 + }, + { + "epoch": 0.3854410459204261, + "grad_norm": 0.7067514657974243, + "learning_rate": 0.0001739684216573061, + "loss": 2.638, + "step": 4776 + }, + { + "epoch": 0.38552174965700914, + "grad_norm": 0.7742886543273926, + "learning_rate": 0.00017395779687757735, + "loss": 2.7043, + "step": 4777 + }, + { + "epoch": 0.3856024533935921, + "grad_norm": 0.7348291277885437, + "learning_rate": 0.00017394717025462697, + "loss": 2.7404, + "step": 4778 + }, + { + "epoch": 0.38568315713017515, + "grad_norm": 0.7449346780776978, + "learning_rate": 0.00017393654178871984, + "loss": 2.631, + "step": 4779 + }, + { + "epoch": 0.3857638608667581, + "grad_norm": 0.7191200256347656, + "learning_rate": 0.00017392591148012078, + "loss": 2.6776, + "step": 4780 + }, + { + "epoch": 0.38584456460334116, + "grad_norm": 0.7055533528327942, + "learning_rate": 0.00017391527932909476, + "loss": 2.6219, + "step": 4781 + }, + { + "epoch": 0.38592526833992413, + "grad_norm": 0.73755943775177, + "learning_rate": 0.0001739046453359068, + "loss": 2.6692, + "step": 4782 + }, + { + "epoch": 0.38600597207650716, + "grad_norm": 0.7469369769096375, + "learning_rate": 0.00017389400950082185, + "loss": 2.6572, + "step": 4783 + }, + { + "epoch": 0.38608667581309014, + "grad_norm": 0.7552534341812134, + "learning_rate": 0.00017388337182410504, + "loss": 2.6853, + "step": 4784 + }, + { + "epoch": 0.3861673795496732, + "grad_norm": 0.7453532814979553, + "learning_rate": 0.00017387273230602145, + "loss": 2.6601, + "step": 4785 + }, + { + "epoch": 0.38624808328625615, + "grad_norm": 0.7259301543235779, + "learning_rate": 0.0001738620909468363, + "loss": 2.6997, + "step": 4786 + }, + { + "epoch": 0.3863287870228392, + "grad_norm": 0.6970019936561584, + "learning_rate": 0.00017385144774681476, + "loss": 2.7497, + "step": 4787 + }, + { + "epoch": 0.38640949075942216, + "grad_norm": 0.7172032594680786, + "learning_rate": 0.00017384080270622208, + "loss": 2.7182, + "step": 4788 + }, + { + "epoch": 0.3864901944960052, + "grad_norm": 0.7184371948242188, + "learning_rate": 0.00017383015582532357, + "loss": 2.6358, + "step": 4789 + }, + { + "epoch": 0.38657089823258817, + "grad_norm": 0.7302096486091614, + "learning_rate": 0.00017381950710438458, + "loss": 2.6066, + "step": 4790 + }, + { + "epoch": 0.3866516019691712, + "grad_norm": 0.7043540477752686, + "learning_rate": 0.00017380885654367053, + "loss": 2.699, + "step": 4791 + }, + { + "epoch": 0.3867323057057542, + "grad_norm": 0.6919732689857483, + "learning_rate": 0.0001737982041434468, + "loss": 2.6025, + "step": 4792 + }, + { + "epoch": 0.3868130094423372, + "grad_norm": 0.7277705669403076, + "learning_rate": 0.00017378754990397894, + "loss": 2.6764, + "step": 4793 + }, + { + "epoch": 0.3868937131789202, + "grad_norm": 0.7546190619468689, + "learning_rate": 0.00017377689382553247, + "loss": 2.5865, + "step": 4794 + }, + { + "epoch": 0.38697441691550316, + "grad_norm": 0.7636401653289795, + "learning_rate": 0.00017376623590837294, + "loss": 2.6488, + "step": 4795 + }, + { + "epoch": 0.3870551206520862, + "grad_norm": 0.6945658922195435, + "learning_rate": 0.00017375557615276595, + "loss": 2.6739, + "step": 4796 + }, + { + "epoch": 0.38713582438866917, + "grad_norm": 0.7503637075424194, + "learning_rate": 0.00017374491455897722, + "loss": 2.6854, + "step": 4797 + }, + { + "epoch": 0.3872165281252522, + "grad_norm": 0.7457373142242432, + "learning_rate": 0.00017373425112727247, + "loss": 2.6659, + "step": 4798 + }, + { + "epoch": 0.3872972318618352, + "grad_norm": 0.7742534875869751, + "learning_rate": 0.0001737235858579174, + "loss": 2.6461, + "step": 4799 + }, + { + "epoch": 0.3873779355984182, + "grad_norm": 0.7397909760475159, + "learning_rate": 0.0001737129187511779, + "loss": 2.6779, + "step": 4800 + }, + { + "epoch": 0.3874586393350012, + "grad_norm": 0.7922031879425049, + "learning_rate": 0.00017370224980731974, + "loss": 2.6417, + "step": 4801 + }, + { + "epoch": 0.3875393430715842, + "grad_norm": 0.8503968715667725, + "learning_rate": 0.00017369157902660887, + "loss": 2.7063, + "step": 4802 + }, + { + "epoch": 0.3876200468081672, + "grad_norm": 0.7143701314926147, + "learning_rate": 0.00017368090640931125, + "loss": 2.6152, + "step": 4803 + }, + { + "epoch": 0.38770075054475023, + "grad_norm": 0.8016753196716309, + "learning_rate": 0.0001736702319556928, + "loss": 2.6005, + "step": 4804 + }, + { + "epoch": 0.3877814542813332, + "grad_norm": 0.7329538464546204, + "learning_rate": 0.00017365955566601962, + "loss": 2.6027, + "step": 4805 + }, + { + "epoch": 0.38786215801791624, + "grad_norm": 0.7005148530006409, + "learning_rate": 0.00017364887754055773, + "loss": 2.6585, + "step": 4806 + }, + { + "epoch": 0.3879428617544992, + "grad_norm": 0.7092769145965576, + "learning_rate": 0.00017363819757957333, + "loss": 2.6763, + "step": 4807 + }, + { + "epoch": 0.38802356549108225, + "grad_norm": 0.7475202679634094, + "learning_rate": 0.0001736275157833325, + "loss": 2.5969, + "step": 4808 + }, + { + "epoch": 0.3881042692276652, + "grad_norm": 0.822496235370636, + "learning_rate": 0.0001736168321521016, + "loss": 2.6758, + "step": 4809 + }, + { + "epoch": 0.38818497296424825, + "grad_norm": 0.7756842374801636, + "learning_rate": 0.0001736061466861467, + "loss": 2.6676, + "step": 4810 + }, + { + "epoch": 0.38826567670083123, + "grad_norm": 0.7192497849464417, + "learning_rate": 0.00017359545938573428, + "loss": 2.7045, + "step": 4811 + }, + { + "epoch": 0.38834638043741426, + "grad_norm": 0.7064149379730225, + "learning_rate": 0.00017358477025113063, + "loss": 2.6169, + "step": 4812 + }, + { + "epoch": 0.38842708417399724, + "grad_norm": 0.7297258973121643, + "learning_rate": 0.00017357407928260215, + "loss": 2.612, + "step": 4813 + }, + { + "epoch": 0.38850778791058027, + "grad_norm": 0.7011935114860535, + "learning_rate": 0.00017356338648041528, + "loss": 2.6507, + "step": 4814 + }, + { + "epoch": 0.38858849164716325, + "grad_norm": 0.7647256255149841, + "learning_rate": 0.00017355269184483651, + "loss": 2.6838, + "step": 4815 + }, + { + "epoch": 0.3886691953837463, + "grad_norm": 0.690182089805603, + "learning_rate": 0.0001735419953761324, + "loss": 2.6996, + "step": 4816 + }, + { + "epoch": 0.38874989912032926, + "grad_norm": 0.7142173647880554, + "learning_rate": 0.00017353129707456955, + "loss": 2.6705, + "step": 4817 + }, + { + "epoch": 0.3888306028569123, + "grad_norm": 0.801369309425354, + "learning_rate": 0.00017352059694041456, + "loss": 2.7002, + "step": 4818 + }, + { + "epoch": 0.38891130659349527, + "grad_norm": 0.7021649479866028, + "learning_rate": 0.0001735098949739341, + "loss": 2.7042, + "step": 4819 + }, + { + "epoch": 0.3889920103300783, + "grad_norm": 0.6802586317062378, + "learning_rate": 0.00017349919117539488, + "loss": 2.7186, + "step": 4820 + }, + { + "epoch": 0.3890727140666613, + "grad_norm": 0.7723212838172913, + "learning_rate": 0.0001734884855450637, + "loss": 2.608, + "step": 4821 + }, + { + "epoch": 0.3891534178032443, + "grad_norm": 0.7037193179130554, + "learning_rate": 0.00017347777808320735, + "loss": 2.6198, + "step": 4822 + }, + { + "epoch": 0.3892341215398273, + "grad_norm": 0.7172731161117554, + "learning_rate": 0.00017346706879009272, + "loss": 2.7037, + "step": 4823 + }, + { + "epoch": 0.3893148252764103, + "grad_norm": 0.7421539425849915, + "learning_rate": 0.00017345635766598667, + "loss": 2.6619, + "step": 4824 + }, + { + "epoch": 0.3893955290129933, + "grad_norm": 0.7587071061134338, + "learning_rate": 0.0001734456447111562, + "loss": 2.6229, + "step": 4825 + }, + { + "epoch": 0.3894762327495763, + "grad_norm": 0.6981459259986877, + "learning_rate": 0.00017343492992586822, + "loss": 2.5927, + "step": 4826 + }, + { + "epoch": 0.3895569364861593, + "grad_norm": 0.7628491520881653, + "learning_rate": 0.00017342421331038987, + "loss": 2.7047, + "step": 4827 + }, + { + "epoch": 0.38963764022274233, + "grad_norm": 0.8005064129829407, + "learning_rate": 0.00017341349486498818, + "loss": 2.6918, + "step": 4828 + }, + { + "epoch": 0.3897183439593253, + "grad_norm": 0.7756431102752686, + "learning_rate": 0.0001734027745899303, + "loss": 2.6621, + "step": 4829 + }, + { + "epoch": 0.38979904769590834, + "grad_norm": 0.7317833304405212, + "learning_rate": 0.00017339205248548338, + "loss": 2.7134, + "step": 4830 + }, + { + "epoch": 0.3898797514324913, + "grad_norm": 0.7293959259986877, + "learning_rate": 0.0001733813285519147, + "loss": 2.6865, + "step": 4831 + }, + { + "epoch": 0.38996045516907435, + "grad_norm": 0.7120299935340881, + "learning_rate": 0.00017337060278949147, + "loss": 2.6915, + "step": 4832 + }, + { + "epoch": 0.3900411589056573, + "grad_norm": 0.7255397439002991, + "learning_rate": 0.00017335987519848103, + "loss": 2.6671, + "step": 4833 + }, + { + "epoch": 0.39012186264224036, + "grad_norm": 0.7849408388137817, + "learning_rate": 0.0001733491457791507, + "loss": 2.6301, + "step": 4834 + }, + { + "epoch": 0.39020256637882333, + "grad_norm": 0.6998472809791565, + "learning_rate": 0.00017333841453176797, + "loss": 2.6587, + "step": 4835 + }, + { + "epoch": 0.39028327011540637, + "grad_norm": 0.7530023455619812, + "learning_rate": 0.00017332768145660024, + "loss": 2.7011, + "step": 4836 + }, + { + "epoch": 0.39036397385198934, + "grad_norm": 0.7251207828521729, + "learning_rate": 0.00017331694655391497, + "loss": 2.6416, + "step": 4837 + }, + { + "epoch": 0.3904446775885724, + "grad_norm": 0.7016854882240295, + "learning_rate": 0.00017330620982397975, + "loss": 2.7224, + "step": 4838 + }, + { + "epoch": 0.39052538132515535, + "grad_norm": 0.7253310084342957, + "learning_rate": 0.00017329547126706217, + "loss": 2.6747, + "step": 4839 + }, + { + "epoch": 0.3906060850617384, + "grad_norm": 0.7114601731300354, + "learning_rate": 0.00017328473088342987, + "loss": 2.6654, + "step": 4840 + }, + { + "epoch": 0.39068678879832136, + "grad_norm": 0.7773289680480957, + "learning_rate": 0.00017327398867335048, + "loss": 2.6625, + "step": 4841 + }, + { + "epoch": 0.3907674925349044, + "grad_norm": 0.7541868686676025, + "learning_rate": 0.00017326324463709175, + "loss": 2.667, + "step": 4842 + }, + { + "epoch": 0.39084819627148737, + "grad_norm": 0.8095890283584595, + "learning_rate": 0.00017325249877492147, + "loss": 2.706, + "step": 4843 + }, + { + "epoch": 0.3909289000080704, + "grad_norm": 0.7019474506378174, + "learning_rate": 0.00017324175108710742, + "loss": 2.6125, + "step": 4844 + }, + { + "epoch": 0.3910096037446534, + "grad_norm": 0.7055396437644958, + "learning_rate": 0.00017323100157391746, + "loss": 2.6373, + "step": 4845 + }, + { + "epoch": 0.39109030748123635, + "grad_norm": 0.7332476377487183, + "learning_rate": 0.00017322025023561955, + "loss": 2.6559, + "step": 4846 + }, + { + "epoch": 0.3911710112178194, + "grad_norm": 0.7740387916564941, + "learning_rate": 0.00017320949707248158, + "loss": 2.7341, + "step": 4847 + }, + { + "epoch": 0.39125171495440236, + "grad_norm": 0.7371044754981995, + "learning_rate": 0.0001731987420847716, + "loss": 2.7318, + "step": 4848 + }, + { + "epoch": 0.3913324186909854, + "grad_norm": 0.7897786498069763, + "learning_rate": 0.00017318798527275758, + "loss": 2.6759, + "step": 4849 + }, + { + "epoch": 0.39141312242756837, + "grad_norm": 0.7149896621704102, + "learning_rate": 0.0001731772266367077, + "loss": 2.7097, + "step": 4850 + }, + { + "epoch": 0.3914938261641514, + "grad_norm": 0.7824358344078064, + "learning_rate": 0.00017316646617689002, + "loss": 2.6376, + "step": 4851 + }, + { + "epoch": 0.3915745299007344, + "grad_norm": 0.7704496383666992, + "learning_rate": 0.00017315570389357272, + "loss": 2.6539, + "step": 4852 + }, + { + "epoch": 0.3916552336373174, + "grad_norm": 0.7489706873893738, + "learning_rate": 0.00017314493978702407, + "loss": 2.6716, + "step": 4853 + }, + { + "epoch": 0.3917359373739004, + "grad_norm": 0.7368690967559814, + "learning_rate": 0.00017313417385751234, + "loss": 2.7171, + "step": 4854 + }, + { + "epoch": 0.3918166411104834, + "grad_norm": 0.7215858697891235, + "learning_rate": 0.00017312340610530579, + "loss": 2.6306, + "step": 4855 + }, + { + "epoch": 0.3918973448470664, + "grad_norm": 0.7622217535972595, + "learning_rate": 0.00017311263653067285, + "loss": 2.6089, + "step": 4856 + }, + { + "epoch": 0.39197804858364943, + "grad_norm": 0.7317889332771301, + "learning_rate": 0.00017310186513388185, + "loss": 2.6831, + "step": 4857 + }, + { + "epoch": 0.3920587523202324, + "grad_norm": 0.894185483455658, + "learning_rate": 0.0001730910919152013, + "loss": 2.684, + "step": 4858 + }, + { + "epoch": 0.39213945605681544, + "grad_norm": 0.7313157916069031, + "learning_rate": 0.00017308031687489968, + "loss": 2.6465, + "step": 4859 + }, + { + "epoch": 0.3922201597933984, + "grad_norm": 0.7765825390815735, + "learning_rate": 0.00017306954001324552, + "loss": 2.6526, + "step": 4860 + }, + { + "epoch": 0.39230086352998145, + "grad_norm": 0.7171424031257629, + "learning_rate": 0.00017305876133050742, + "loss": 2.6212, + "step": 4861 + }, + { + "epoch": 0.3923815672665644, + "grad_norm": 0.7215112447738647, + "learning_rate": 0.000173047980826954, + "loss": 2.6329, + "step": 4862 + }, + { + "epoch": 0.39246227100314746, + "grad_norm": 0.7393578886985779, + "learning_rate": 0.00017303719850285396, + "loss": 2.7264, + "step": 4863 + }, + { + "epoch": 0.39254297473973043, + "grad_norm": 0.7620136737823486, + "learning_rate": 0.00017302641435847603, + "loss": 2.6686, + "step": 4864 + }, + { + "epoch": 0.39262367847631346, + "grad_norm": 0.7290963530540466, + "learning_rate": 0.00017301562839408893, + "loss": 2.578, + "step": 4865 + }, + { + "epoch": 0.39270438221289644, + "grad_norm": 0.6978541612625122, + "learning_rate": 0.00017300484060996153, + "loss": 2.6783, + "step": 4866 + }, + { + "epoch": 0.3927850859494795, + "grad_norm": 0.7212007641792297, + "learning_rate": 0.00017299405100636264, + "loss": 2.6282, + "step": 4867 + }, + { + "epoch": 0.39286578968606245, + "grad_norm": 0.757324755191803, + "learning_rate": 0.0001729832595835612, + "loss": 2.6933, + "step": 4868 + }, + { + "epoch": 0.3929464934226455, + "grad_norm": 0.7052869200706482, + "learning_rate": 0.00017297246634182618, + "loss": 2.7152, + "step": 4869 + }, + { + "epoch": 0.39302719715922846, + "grad_norm": 0.7326259016990662, + "learning_rate": 0.0001729616712814265, + "loss": 2.6792, + "step": 4870 + }, + { + "epoch": 0.3931079008958115, + "grad_norm": 0.7540302276611328, + "learning_rate": 0.00017295087440263128, + "loss": 2.6621, + "step": 4871 + }, + { + "epoch": 0.39318860463239447, + "grad_norm": 0.765454888343811, + "learning_rate": 0.00017294007570570956, + "loss": 2.7049, + "step": 4872 + }, + { + "epoch": 0.3932693083689775, + "grad_norm": 0.7303065061569214, + "learning_rate": 0.0001729292751909305, + "loss": 2.6867, + "step": 4873 + }, + { + "epoch": 0.3933500121055605, + "grad_norm": 0.7049854397773743, + "learning_rate": 0.00017291847285856325, + "loss": 2.7052, + "step": 4874 + }, + { + "epoch": 0.3934307158421435, + "grad_norm": 0.7199053764343262, + "learning_rate": 0.00017290766870887704, + "loss": 2.7195, + "step": 4875 + }, + { + "epoch": 0.3935114195787265, + "grad_norm": 0.7536180019378662, + "learning_rate": 0.00017289686274214118, + "loss": 2.6861, + "step": 4876 + }, + { + "epoch": 0.3935921233153095, + "grad_norm": 0.7295238971710205, + "learning_rate": 0.00017288605495862492, + "loss": 2.6684, + "step": 4877 + }, + { + "epoch": 0.3936728270518925, + "grad_norm": 0.7575719952583313, + "learning_rate": 0.00017287524535859763, + "loss": 2.6439, + "step": 4878 + }, + { + "epoch": 0.3937535307884755, + "grad_norm": 0.678909182548523, + "learning_rate": 0.00017286443394232874, + "loss": 2.6562, + "step": 4879 + }, + { + "epoch": 0.3938342345250585, + "grad_norm": 0.6908892393112183, + "learning_rate": 0.00017285362071008768, + "loss": 2.6364, + "step": 4880 + }, + { + "epoch": 0.39391493826164153, + "grad_norm": 0.7414079904556274, + "learning_rate": 0.00017284280566214397, + "loss": 2.5872, + "step": 4881 + }, + { + "epoch": 0.3939956419982245, + "grad_norm": 0.6824749112129211, + "learning_rate": 0.0001728319887987671, + "loss": 2.641, + "step": 4882 + }, + { + "epoch": 0.39407634573480754, + "grad_norm": 0.6908513903617859, + "learning_rate": 0.0001728211701202267, + "loss": 2.6977, + "step": 4883 + }, + { + "epoch": 0.3941570494713905, + "grad_norm": 0.7214735746383667, + "learning_rate": 0.0001728103496267924, + "loss": 2.5826, + "step": 4884 + }, + { + "epoch": 0.39423775320797355, + "grad_norm": 0.812781572341919, + "learning_rate": 0.00017279952731873385, + "loss": 2.6806, + "step": 4885 + }, + { + "epoch": 0.39431845694455653, + "grad_norm": 0.7610746026039124, + "learning_rate": 0.00017278870319632078, + "loss": 2.6046, + "step": 4886 + }, + { + "epoch": 0.39439916068113956, + "grad_norm": 0.7151652574539185, + "learning_rate": 0.00017277787725982293, + "loss": 2.6543, + "step": 4887 + }, + { + "epoch": 0.39447986441772254, + "grad_norm": 0.7293612360954285, + "learning_rate": 0.00017276704950951017, + "loss": 2.6384, + "step": 4888 + }, + { + "epoch": 0.39456056815430557, + "grad_norm": 0.8138254284858704, + "learning_rate": 0.00017275621994565233, + "loss": 2.7208, + "step": 4889 + }, + { + "epoch": 0.39464127189088855, + "grad_norm": 0.7557196021080017, + "learning_rate": 0.00017274538856851924, + "loss": 2.6571, + "step": 4890 + }, + { + "epoch": 0.3947219756274716, + "grad_norm": 0.7297266721725464, + "learning_rate": 0.00017273455537838097, + "loss": 2.6222, + "step": 4891 + }, + { + "epoch": 0.39480267936405455, + "grad_norm": 0.7838431596755981, + "learning_rate": 0.00017272372037550743, + "loss": 2.782, + "step": 4892 + }, + { + "epoch": 0.3948833831006376, + "grad_norm": 0.7799673676490784, + "learning_rate": 0.00017271288356016866, + "loss": 2.6658, + "step": 4893 + }, + { + "epoch": 0.39496408683722056, + "grad_norm": 0.8495545387268066, + "learning_rate": 0.0001727020449326348, + "loss": 2.6552, + "step": 4894 + }, + { + "epoch": 0.3950447905738036, + "grad_norm": 0.7317770719528198, + "learning_rate": 0.00017269120449317588, + "loss": 2.6616, + "step": 4895 + }, + { + "epoch": 0.39512549431038657, + "grad_norm": 0.7518885731697083, + "learning_rate": 0.00017268036224206217, + "loss": 2.6864, + "step": 4896 + }, + { + "epoch": 0.39520619804696955, + "grad_norm": 0.83487468957901, + "learning_rate": 0.00017266951817956382, + "loss": 2.7535, + "step": 4897 + }, + { + "epoch": 0.3952869017835526, + "grad_norm": 0.7440658211708069, + "learning_rate": 0.00017265867230595113, + "loss": 2.6584, + "step": 4898 + }, + { + "epoch": 0.39536760552013556, + "grad_norm": 0.7060485482215881, + "learning_rate": 0.00017264782462149438, + "loss": 2.6892, + "step": 4899 + }, + { + "epoch": 0.3954483092567186, + "grad_norm": 0.8410428166389465, + "learning_rate": 0.00017263697512646394, + "loss": 2.6425, + "step": 4900 + }, + { + "epoch": 0.39552901299330157, + "grad_norm": 0.757046639919281, + "learning_rate": 0.0001726261238211302, + "loss": 2.6159, + "step": 4901 + }, + { + "epoch": 0.3956097167298846, + "grad_norm": 0.7288908958435059, + "learning_rate": 0.00017261527070576365, + "loss": 2.6753, + "step": 4902 + }, + { + "epoch": 0.3956904204664676, + "grad_norm": 0.8194541335105896, + "learning_rate": 0.0001726044157806347, + "loss": 2.6673, + "step": 4903 + }, + { + "epoch": 0.3957711242030506, + "grad_norm": 0.7957740426063538, + "learning_rate": 0.00017259355904601393, + "loss": 2.6662, + "step": 4904 + }, + { + "epoch": 0.3958518279396336, + "grad_norm": 0.8790122270584106, + "learning_rate": 0.0001725827005021719, + "loss": 2.7513, + "step": 4905 + }, + { + "epoch": 0.3959325316762166, + "grad_norm": 0.7674984335899353, + "learning_rate": 0.00017257184014937924, + "loss": 2.6375, + "step": 4906 + }, + { + "epoch": 0.3960132354127996, + "grad_norm": 0.7250992655754089, + "learning_rate": 0.00017256097798790663, + "loss": 2.63, + "step": 4907 + }, + { + "epoch": 0.3960939391493826, + "grad_norm": 0.8578312397003174, + "learning_rate": 0.00017255011401802475, + "loss": 2.702, + "step": 4908 + }, + { + "epoch": 0.3961746428859656, + "grad_norm": 0.7365253567695618, + "learning_rate": 0.00017253924824000438, + "loss": 2.6156, + "step": 4909 + }, + { + "epoch": 0.39625534662254863, + "grad_norm": 0.7148925065994263, + "learning_rate": 0.00017252838065411633, + "loss": 2.6658, + "step": 4910 + }, + { + "epoch": 0.3963360503591316, + "grad_norm": 0.7517829537391663, + "learning_rate": 0.00017251751126063148, + "loss": 2.6347, + "step": 4911 + }, + { + "epoch": 0.39641675409571464, + "grad_norm": 0.7880864143371582, + "learning_rate": 0.00017250664005982066, + "loss": 2.7045, + "step": 4912 + }, + { + "epoch": 0.3964974578322976, + "grad_norm": 0.7460693120956421, + "learning_rate": 0.00017249576705195482, + "loss": 2.6976, + "step": 4913 + }, + { + "epoch": 0.39657816156888065, + "grad_norm": 0.7179895043373108, + "learning_rate": 0.00017248489223730496, + "loss": 2.6366, + "step": 4914 + }, + { + "epoch": 0.3966588653054636, + "grad_norm": 0.7737421989440918, + "learning_rate": 0.00017247401561614213, + "loss": 2.7116, + "step": 4915 + }, + { + "epoch": 0.39673956904204666, + "grad_norm": 0.8561483025550842, + "learning_rate": 0.0001724631371887374, + "loss": 2.6591, + "step": 4916 + }, + { + "epoch": 0.39682027277862963, + "grad_norm": 0.7616356611251831, + "learning_rate": 0.00017245225695536182, + "loss": 2.6436, + "step": 4917 + }, + { + "epoch": 0.39690097651521267, + "grad_norm": 0.7754645943641663, + "learning_rate": 0.0001724413749162866, + "loss": 2.6699, + "step": 4918 + }, + { + "epoch": 0.39698168025179564, + "grad_norm": 0.800165593624115, + "learning_rate": 0.000172430491071783, + "loss": 2.7155, + "step": 4919 + }, + { + "epoch": 0.3970623839883787, + "grad_norm": 0.8448799848556519, + "learning_rate": 0.00017241960542212223, + "loss": 2.6991, + "step": 4920 + }, + { + "epoch": 0.39714308772496165, + "grad_norm": 0.7106496095657349, + "learning_rate": 0.00017240871796757556, + "loss": 2.628, + "step": 4921 + }, + { + "epoch": 0.3972237914615447, + "grad_norm": 0.7332959175109863, + "learning_rate": 0.00017239782870841436, + "loss": 2.6159, + "step": 4922 + }, + { + "epoch": 0.39730449519812766, + "grad_norm": 0.7573551535606384, + "learning_rate": 0.00017238693764491002, + "loss": 2.67, + "step": 4923 + }, + { + "epoch": 0.3973851989347107, + "grad_norm": 0.7833136320114136, + "learning_rate": 0.00017237604477733399, + "loss": 2.7276, + "step": 4924 + }, + { + "epoch": 0.39746590267129367, + "grad_norm": 0.7233073711395264, + "learning_rate": 0.00017236515010595773, + "loss": 2.6654, + "step": 4925 + }, + { + "epoch": 0.3975466064078767, + "grad_norm": 0.7920324206352234, + "learning_rate": 0.00017235425363105273, + "loss": 2.7611, + "step": 4926 + }, + { + "epoch": 0.3976273101444597, + "grad_norm": 0.7096883058547974, + "learning_rate": 0.00017234335535289063, + "loss": 2.687, + "step": 4927 + }, + { + "epoch": 0.3977080138810427, + "grad_norm": 0.7231960296630859, + "learning_rate": 0.000172332455271743, + "loss": 2.6441, + "step": 4928 + }, + { + "epoch": 0.3977887176176257, + "grad_norm": 0.7852105498313904, + "learning_rate": 0.00017232155338788146, + "loss": 2.5948, + "step": 4929 + }, + { + "epoch": 0.3978694213542087, + "grad_norm": 0.788789689540863, + "learning_rate": 0.0001723106497015778, + "loss": 2.6797, + "step": 4930 + }, + { + "epoch": 0.3979501250907917, + "grad_norm": 0.7082793116569519, + "learning_rate": 0.00017229974421310377, + "loss": 2.6787, + "step": 4931 + }, + { + "epoch": 0.3980308288273747, + "grad_norm": 0.8157992362976074, + "learning_rate": 0.00017228883692273106, + "loss": 2.6367, + "step": 4932 + }, + { + "epoch": 0.3981115325639577, + "grad_norm": 0.7576673030853271, + "learning_rate": 0.00017227792783073157, + "loss": 2.6826, + "step": 4933 + }, + { + "epoch": 0.39819223630054074, + "grad_norm": 0.7225388884544373, + "learning_rate": 0.00017226701693737718, + "loss": 2.668, + "step": 4934 + }, + { + "epoch": 0.3982729400371237, + "grad_norm": 0.7029562592506409, + "learning_rate": 0.00017225610424293985, + "loss": 2.6613, + "step": 4935 + }, + { + "epoch": 0.39835364377370674, + "grad_norm": 0.73081374168396, + "learning_rate": 0.0001722451897476915, + "loss": 2.6378, + "step": 4936 + }, + { + "epoch": 0.3984343475102897, + "grad_norm": 0.744008481502533, + "learning_rate": 0.0001722342734519042, + "loss": 2.6501, + "step": 4937 + }, + { + "epoch": 0.39851505124687275, + "grad_norm": 0.7482618093490601, + "learning_rate": 0.00017222335535584996, + "loss": 2.7287, + "step": 4938 + }, + { + "epoch": 0.39859575498345573, + "grad_norm": 0.6487892866134644, + "learning_rate": 0.00017221243545980093, + "loss": 2.6417, + "step": 4939 + }, + { + "epoch": 0.39867645872003876, + "grad_norm": 0.7894789576530457, + "learning_rate": 0.00017220151376402923, + "loss": 2.7431, + "step": 4940 + }, + { + "epoch": 0.39875716245662174, + "grad_norm": 0.8232294321060181, + "learning_rate": 0.00017219059026880708, + "loss": 2.6824, + "step": 4941 + }, + { + "epoch": 0.39883786619320477, + "grad_norm": 0.6844691634178162, + "learning_rate": 0.00017217966497440668, + "loss": 2.6294, + "step": 4942 + }, + { + "epoch": 0.39891856992978775, + "grad_norm": 0.7245259881019592, + "learning_rate": 0.00017216873788110037, + "loss": 2.6815, + "step": 4943 + }, + { + "epoch": 0.3989992736663708, + "grad_norm": 0.7197226881980896, + "learning_rate": 0.00017215780898916045, + "loss": 2.725, + "step": 4944 + }, + { + "epoch": 0.39907997740295376, + "grad_norm": 0.8391285538673401, + "learning_rate": 0.00017214687829885934, + "loss": 2.6724, + "step": 4945 + }, + { + "epoch": 0.3991606811395368, + "grad_norm": 0.7357564568519592, + "learning_rate": 0.00017213594581046938, + "loss": 2.7052, + "step": 4946 + }, + { + "epoch": 0.39924138487611976, + "grad_norm": 0.7611483931541443, + "learning_rate": 0.00017212501152426312, + "loss": 2.7214, + "step": 4947 + }, + { + "epoch": 0.39932208861270274, + "grad_norm": 0.7314950227737427, + "learning_rate": 0.00017211407544051306, + "loss": 2.6594, + "step": 4948 + }, + { + "epoch": 0.3994027923492858, + "grad_norm": 0.774131178855896, + "learning_rate": 0.00017210313755949169, + "loss": 2.6812, + "step": 4949 + }, + { + "epoch": 0.39948349608586875, + "grad_norm": 0.707003116607666, + "learning_rate": 0.00017209219788147167, + "loss": 2.7334, + "step": 4950 + }, + { + "epoch": 0.3995641998224518, + "grad_norm": 0.8179643154144287, + "learning_rate": 0.0001720812564067256, + "loss": 2.6554, + "step": 4951 + }, + { + "epoch": 0.39964490355903476, + "grad_norm": 0.6572005152702332, + "learning_rate": 0.00017207031313552621, + "loss": 2.6423, + "step": 4952 + }, + { + "epoch": 0.3997256072956178, + "grad_norm": 0.7663072943687439, + "learning_rate": 0.00017205936806814623, + "loss": 2.689, + "step": 4953 + }, + { + "epoch": 0.39980631103220077, + "grad_norm": 0.7351107001304626, + "learning_rate": 0.00017204842120485846, + "loss": 2.631, + "step": 4954 + }, + { + "epoch": 0.3998870147687838, + "grad_norm": 0.7754253149032593, + "learning_rate": 0.00017203747254593564, + "loss": 2.6371, + "step": 4955 + }, + { + "epoch": 0.3999677185053668, + "grad_norm": 0.7471042275428772, + "learning_rate": 0.00017202652209165074, + "loss": 2.6542, + "step": 4956 + }, + { + "epoch": 0.4000484222419498, + "grad_norm": 0.7357343435287476, + "learning_rate": 0.00017201556984227664, + "loss": 2.6226, + "step": 4957 + }, + { + "epoch": 0.4001291259785328, + "grad_norm": 0.8096252679824829, + "learning_rate": 0.00017200461579808626, + "loss": 2.6458, + "step": 4958 + }, + { + "epoch": 0.4002098297151158, + "grad_norm": 0.7622970938682556, + "learning_rate": 0.0001719936599593526, + "loss": 2.7129, + "step": 4959 + }, + { + "epoch": 0.4002905334516988, + "grad_norm": 0.7374953627586365, + "learning_rate": 0.00017198270232634882, + "loss": 2.696, + "step": 4960 + }, + { + "epoch": 0.4003712371882818, + "grad_norm": 0.7897924184799194, + "learning_rate": 0.00017197174289934787, + "loss": 2.7508, + "step": 4961 + }, + { + "epoch": 0.4004519409248648, + "grad_norm": 0.7047984004020691, + "learning_rate": 0.00017196078167862298, + "loss": 2.6733, + "step": 4962 + }, + { + "epoch": 0.40053264466144783, + "grad_norm": 0.7866294980049133, + "learning_rate": 0.0001719498186644473, + "loss": 2.694, + "step": 4963 + }, + { + "epoch": 0.4006133483980308, + "grad_norm": 0.739923894405365, + "learning_rate": 0.00017193885385709409, + "loss": 2.7125, + "step": 4964 + }, + { + "epoch": 0.40069405213461384, + "grad_norm": 0.7506374716758728, + "learning_rate": 0.00017192788725683652, + "loss": 2.627, + "step": 4965 + }, + { + "epoch": 0.4007747558711968, + "grad_norm": 0.6591607928276062, + "learning_rate": 0.00017191691886394802, + "loss": 2.6723, + "step": 4966 + }, + { + "epoch": 0.40085545960777985, + "grad_norm": 0.7748788595199585, + "learning_rate": 0.00017190594867870192, + "loss": 2.6486, + "step": 4967 + }, + { + "epoch": 0.40093616334436283, + "grad_norm": 0.7518232464790344, + "learning_rate": 0.0001718949767013716, + "loss": 2.6879, + "step": 4968 + }, + { + "epoch": 0.40101686708094586, + "grad_norm": 0.7360039949417114, + "learning_rate": 0.00017188400293223052, + "loss": 2.6506, + "step": 4969 + }, + { + "epoch": 0.40109757081752884, + "grad_norm": 0.7217130064964294, + "learning_rate": 0.0001718730273715522, + "loss": 2.6263, + "step": 4970 + }, + { + "epoch": 0.40117827455411187, + "grad_norm": 0.7246078252792358, + "learning_rate": 0.00017186205001961015, + "loss": 2.6222, + "step": 4971 + }, + { + "epoch": 0.40125897829069485, + "grad_norm": 0.7566879391670227, + "learning_rate": 0.00017185107087667794, + "loss": 2.7003, + "step": 4972 + }, + { + "epoch": 0.4013396820272779, + "grad_norm": 0.7881271243095398, + "learning_rate": 0.00017184008994302924, + "loss": 2.6463, + "step": 4973 + }, + { + "epoch": 0.40142038576386085, + "grad_norm": 0.7307420372962952, + "learning_rate": 0.00017182910721893775, + "loss": 2.667, + "step": 4974 + }, + { + "epoch": 0.4015010895004439, + "grad_norm": 0.7088132500648499, + "learning_rate": 0.00017181812270467708, + "loss": 2.6073, + "step": 4975 + }, + { + "epoch": 0.40158179323702686, + "grad_norm": 0.7839647531509399, + "learning_rate": 0.0001718071364005211, + "loss": 2.6594, + "step": 4976 + }, + { + "epoch": 0.4016624969736099, + "grad_norm": 0.7472013235092163, + "learning_rate": 0.00017179614830674353, + "loss": 2.737, + "step": 4977 + }, + { + "epoch": 0.40174320071019287, + "grad_norm": 0.7241616249084473, + "learning_rate": 0.0001717851584236183, + "loss": 2.6615, + "step": 4978 + }, + { + "epoch": 0.4018239044467759, + "grad_norm": 0.7918941378593445, + "learning_rate": 0.00017177416675141929, + "loss": 2.6774, + "step": 4979 + }, + { + "epoch": 0.4019046081833589, + "grad_norm": 0.801003098487854, + "learning_rate": 0.00017176317329042039, + "loss": 2.6749, + "step": 4980 + }, + { + "epoch": 0.4019853119199419, + "grad_norm": 0.7556802034378052, + "learning_rate": 0.00017175217804089564, + "loss": 2.6197, + "step": 4981 + }, + { + "epoch": 0.4020660156565249, + "grad_norm": 0.7539604902267456, + "learning_rate": 0.00017174118100311904, + "loss": 2.6222, + "step": 4982 + }, + { + "epoch": 0.4021467193931079, + "grad_norm": 0.741436243057251, + "learning_rate": 0.0001717301821773647, + "loss": 2.6471, + "step": 4983 + }, + { + "epoch": 0.4022274231296909, + "grad_norm": 0.7449339628219604, + "learning_rate": 0.0001717191815639067, + "loss": 2.6448, + "step": 4984 + }, + { + "epoch": 0.40230812686627393, + "grad_norm": 0.7771497964859009, + "learning_rate": 0.0001717081791630192, + "loss": 2.673, + "step": 4985 + }, + { + "epoch": 0.4023888306028569, + "grad_norm": 0.6916669607162476, + "learning_rate": 0.00017169717497497646, + "loss": 2.6025, + "step": 4986 + }, + { + "epoch": 0.40246953433943994, + "grad_norm": 0.7373276948928833, + "learning_rate": 0.0001716861690000527, + "loss": 2.6783, + "step": 4987 + }, + { + "epoch": 0.4025502380760229, + "grad_norm": 0.7756158709526062, + "learning_rate": 0.0001716751612385222, + "loss": 2.7296, + "step": 4988 + }, + { + "epoch": 0.40263094181260595, + "grad_norm": 0.7725681066513062, + "learning_rate": 0.00017166415169065933, + "loss": 2.7169, + "step": 4989 + }, + { + "epoch": 0.4027116455491889, + "grad_norm": 0.7165024280548096, + "learning_rate": 0.00017165314035673846, + "loss": 2.677, + "step": 4990 + }, + { + "epoch": 0.40279234928577196, + "grad_norm": 0.8888981938362122, + "learning_rate": 0.00017164212723703404, + "loss": 2.7694, + "step": 4991 + }, + { + "epoch": 0.40287305302235493, + "grad_norm": 0.7439224720001221, + "learning_rate": 0.00017163111233182052, + "loss": 2.674, + "step": 4992 + }, + { + "epoch": 0.40295375675893796, + "grad_norm": 0.6948431730270386, + "learning_rate": 0.00017162009564137244, + "loss": 2.6595, + "step": 4993 + }, + { + "epoch": 0.40303446049552094, + "grad_norm": 0.7274380922317505, + "learning_rate": 0.00017160907716596438, + "loss": 2.649, + "step": 4994 + }, + { + "epoch": 0.403115164232104, + "grad_norm": 0.7127148509025574, + "learning_rate": 0.0001715980569058709, + "loss": 2.6883, + "step": 4995 + }, + { + "epoch": 0.40319586796868695, + "grad_norm": 0.7129155993461609, + "learning_rate": 0.00017158703486136668, + "loss": 2.6516, + "step": 4996 + }, + { + "epoch": 0.40327657170527, + "grad_norm": 0.7848126292228699, + "learning_rate": 0.00017157601103272646, + "loss": 2.6778, + "step": 4997 + }, + { + "epoch": 0.40335727544185296, + "grad_norm": 0.752268373966217, + "learning_rate": 0.0001715649854202249, + "loss": 2.7228, + "step": 4998 + }, + { + "epoch": 0.40343797917843593, + "grad_norm": 0.7750338912010193, + "learning_rate": 0.00017155395802413684, + "loss": 2.6338, + "step": 4999 + }, + { + "epoch": 0.40351868291501897, + "grad_norm": 0.7165457010269165, + "learning_rate": 0.00017154292884473713, + "loss": 2.6195, + "step": 5000 + }, + { + "epoch": 0.40351868291501897, + "eval_loss": 2.585501194000244, + "eval_runtime": 901.8519, + "eval_samples_per_second": 2.905, + "eval_steps_per_second": 0.485, + "step": 5000 + }, + { + "epoch": 0.40359938665160194, + "grad_norm": 0.8118943572044373, + "learning_rate": 0.00017153189788230062, + "loss": 2.6649, + "step": 5001 + }, + { + "epoch": 0.403680090388185, + "grad_norm": 0.722984790802002, + "learning_rate": 0.00017152086513710221, + "loss": 2.6929, + "step": 5002 + }, + { + "epoch": 0.40376079412476795, + "grad_norm": 0.700690507888794, + "learning_rate": 0.00017150983060941686, + "loss": 2.6368, + "step": 5003 + }, + { + "epoch": 0.403841497861351, + "grad_norm": 0.7331504225730896, + "learning_rate": 0.00017149879429951965, + "loss": 2.6826, + "step": 5004 + }, + { + "epoch": 0.40392220159793396, + "grad_norm": 0.7312643527984619, + "learning_rate": 0.00017148775620768553, + "loss": 2.6279, + "step": 5005 + }, + { + "epoch": 0.404002905334517, + "grad_norm": 0.7488462924957275, + "learning_rate": 0.00017147671633418972, + "loss": 2.6711, + "step": 5006 + }, + { + "epoch": 0.40408360907109997, + "grad_norm": 0.8620340824127197, + "learning_rate": 0.00017146567467930725, + "loss": 2.6637, + "step": 5007 + }, + { + "epoch": 0.404164312807683, + "grad_norm": 0.683907151222229, + "learning_rate": 0.00017145463124331335, + "loss": 2.6331, + "step": 5008 + }, + { + "epoch": 0.404245016544266, + "grad_norm": 0.7389389276504517, + "learning_rate": 0.0001714435860264833, + "loss": 2.7232, + "step": 5009 + }, + { + "epoch": 0.404325720280849, + "grad_norm": 0.7456515431404114, + "learning_rate": 0.00017143253902909228, + "loss": 2.6363, + "step": 5010 + }, + { + "epoch": 0.404406424017432, + "grad_norm": 0.7044962644577026, + "learning_rate": 0.0001714214902514157, + "loss": 2.6672, + "step": 5011 + }, + { + "epoch": 0.404487127754015, + "grad_norm": 0.7410328984260559, + "learning_rate": 0.00017141043969372887, + "loss": 2.6059, + "step": 5012 + }, + { + "epoch": 0.404567831490598, + "grad_norm": 0.6697140336036682, + "learning_rate": 0.00017139938735630722, + "loss": 2.7151, + "step": 5013 + }, + { + "epoch": 0.404648535227181, + "grad_norm": 0.746675431728363, + "learning_rate": 0.00017138833323942617, + "loss": 2.6792, + "step": 5014 + }, + { + "epoch": 0.404729238963764, + "grad_norm": 0.7724997401237488, + "learning_rate": 0.00017137727734336129, + "loss": 2.6234, + "step": 5015 + }, + { + "epoch": 0.40480994270034704, + "grad_norm": 0.8014429211616516, + "learning_rate": 0.00017136621966838805, + "loss": 2.6795, + "step": 5016 + }, + { + "epoch": 0.40489064643693, + "grad_norm": 0.6900430917739868, + "learning_rate": 0.00017135516021478205, + "loss": 2.7127, + "step": 5017 + }, + { + "epoch": 0.40497135017351304, + "grad_norm": 0.6648666858673096, + "learning_rate": 0.00017134409898281896, + "loss": 2.6564, + "step": 5018 + }, + { + "epoch": 0.405052053910096, + "grad_norm": 0.7054181098937988, + "learning_rate": 0.00017133303597277442, + "loss": 2.6652, + "step": 5019 + }, + { + "epoch": 0.40513275764667905, + "grad_norm": 0.6847733855247498, + "learning_rate": 0.00017132197118492414, + "loss": 2.6997, + "step": 5020 + }, + { + "epoch": 0.40521346138326203, + "grad_norm": 0.7047749757766724, + "learning_rate": 0.00017131090461954392, + "loss": 2.6752, + "step": 5021 + }, + { + "epoch": 0.40529416511984506, + "grad_norm": 0.7549976706504822, + "learning_rate": 0.00017129983627690957, + "loss": 2.6736, + "step": 5022 + }, + { + "epoch": 0.40537486885642804, + "grad_norm": 0.7436367273330688, + "learning_rate": 0.00017128876615729686, + "loss": 2.7189, + "step": 5023 + }, + { + "epoch": 0.40545557259301107, + "grad_norm": 0.6515071988105774, + "learning_rate": 0.00017127769426098177, + "loss": 2.6422, + "step": 5024 + }, + { + "epoch": 0.40553627632959405, + "grad_norm": 0.6960858702659607, + "learning_rate": 0.00017126662058824024, + "loss": 2.6619, + "step": 5025 + }, + { + "epoch": 0.4056169800661771, + "grad_norm": 0.8075968623161316, + "learning_rate": 0.0001712555451393482, + "loss": 2.6678, + "step": 5026 + }, + { + "epoch": 0.40569768380276006, + "grad_norm": 0.6864624619483948, + "learning_rate": 0.00017124446791458176, + "loss": 2.6331, + "step": 5027 + }, + { + "epoch": 0.4057783875393431, + "grad_norm": 0.7218763828277588, + "learning_rate": 0.0001712333889142169, + "loss": 2.6316, + "step": 5028 + }, + { + "epoch": 0.40585909127592606, + "grad_norm": 0.7024715542793274, + "learning_rate": 0.0001712223081385298, + "loss": 2.623, + "step": 5029 + }, + { + "epoch": 0.4059397950125091, + "grad_norm": 0.6681575775146484, + "learning_rate": 0.0001712112255877966, + "loss": 2.6786, + "step": 5030 + }, + { + "epoch": 0.4060204987490921, + "grad_norm": 0.7249817848205566, + "learning_rate": 0.0001712001412622935, + "loss": 2.6179, + "step": 5031 + }, + { + "epoch": 0.4061012024856751, + "grad_norm": 0.7178316116333008, + "learning_rate": 0.00017118905516229677, + "loss": 2.696, + "step": 5032 + }, + { + "epoch": 0.4061819062222581, + "grad_norm": 0.7838767766952515, + "learning_rate": 0.0001711779672880827, + "loss": 2.6881, + "step": 5033 + }, + { + "epoch": 0.4062626099588411, + "grad_norm": 0.799937903881073, + "learning_rate": 0.0001711668776399276, + "loss": 2.7587, + "step": 5034 + }, + { + "epoch": 0.4063433136954241, + "grad_norm": 0.7622246146202087, + "learning_rate": 0.0001711557862181079, + "loss": 2.6621, + "step": 5035 + }, + { + "epoch": 0.4064240174320071, + "grad_norm": 0.7158814072608948, + "learning_rate": 0.00017114469302290003, + "loss": 2.6421, + "step": 5036 + }, + { + "epoch": 0.4065047211685901, + "grad_norm": 0.7913404107093811, + "learning_rate": 0.0001711335980545804, + "loss": 2.6323, + "step": 5037 + }, + { + "epoch": 0.40658542490517313, + "grad_norm": 0.718325138092041, + "learning_rate": 0.00017112250131342556, + "loss": 2.6171, + "step": 5038 + }, + { + "epoch": 0.4066661286417561, + "grad_norm": 0.7793646454811096, + "learning_rate": 0.0001711114027997121, + "loss": 2.7494, + "step": 5039 + }, + { + "epoch": 0.40674683237833914, + "grad_norm": 0.7774816155433655, + "learning_rate": 0.00017110030251371656, + "loss": 2.5534, + "step": 5040 + }, + { + "epoch": 0.4068275361149221, + "grad_norm": 0.8547549247741699, + "learning_rate": 0.00017108920045571564, + "loss": 2.7155, + "step": 5041 + }, + { + "epoch": 0.40690823985150515, + "grad_norm": 0.7685851454734802, + "learning_rate": 0.000171078096625986, + "loss": 2.6109, + "step": 5042 + }, + { + "epoch": 0.4069889435880881, + "grad_norm": 0.7953611016273499, + "learning_rate": 0.00017106699102480445, + "loss": 2.7034, + "step": 5043 + }, + { + "epoch": 0.40706964732467116, + "grad_norm": 0.7550730109214783, + "learning_rate": 0.00017105588365244764, + "loss": 2.7026, + "step": 5044 + }, + { + "epoch": 0.40715035106125413, + "grad_norm": 0.7036548256874084, + "learning_rate": 0.0001710447745091925, + "loss": 2.6246, + "step": 5045 + }, + { + "epoch": 0.40723105479783717, + "grad_norm": 0.7154512405395508, + "learning_rate": 0.00017103366359531586, + "loss": 2.6592, + "step": 5046 + }, + { + "epoch": 0.40731175853442014, + "grad_norm": 0.7773932218551636, + "learning_rate": 0.00017102255091109463, + "loss": 2.6458, + "step": 5047 + }, + { + "epoch": 0.4073924622710032, + "grad_norm": 0.7458996176719666, + "learning_rate": 0.0001710114364568058, + "loss": 2.643, + "step": 5048 + }, + { + "epoch": 0.40747316600758615, + "grad_norm": 0.7465376257896423, + "learning_rate": 0.00017100032023272633, + "loss": 2.6677, + "step": 5049 + }, + { + "epoch": 0.40755386974416913, + "grad_norm": 0.7340850830078125, + "learning_rate": 0.0001709892022391333, + "loss": 2.6372, + "step": 5050 + }, + { + "epoch": 0.40763457348075216, + "grad_norm": 0.7189164757728577, + "learning_rate": 0.00017097808247630377, + "loss": 2.6524, + "step": 5051 + }, + { + "epoch": 0.40771527721733514, + "grad_norm": 0.6954184174537659, + "learning_rate": 0.0001709669609445149, + "loss": 2.7383, + "step": 5052 + }, + { + "epoch": 0.40779598095391817, + "grad_norm": 0.736409604549408, + "learning_rate": 0.00017095583764404384, + "loss": 2.6424, + "step": 5053 + }, + { + "epoch": 0.40787668469050115, + "grad_norm": 0.6773545742034912, + "learning_rate": 0.0001709447125751678, + "loss": 2.6557, + "step": 5054 + }, + { + "epoch": 0.4079573884270842, + "grad_norm": 0.718748927116394, + "learning_rate": 0.00017093358573816412, + "loss": 2.6884, + "step": 5055 + }, + { + "epoch": 0.40803809216366715, + "grad_norm": 0.8276848793029785, + "learning_rate": 0.00017092245713331002, + "loss": 2.6642, + "step": 5056 + }, + { + "epoch": 0.4081187959002502, + "grad_norm": 0.7694761157035828, + "learning_rate": 0.00017091132676088294, + "loss": 2.644, + "step": 5057 + }, + { + "epoch": 0.40819949963683316, + "grad_norm": 0.766724705696106, + "learning_rate": 0.0001709001946211602, + "loss": 2.6918, + "step": 5058 + }, + { + "epoch": 0.4082802033734162, + "grad_norm": 0.7067074775695801, + "learning_rate": 0.00017088906071441927, + "loss": 2.7228, + "step": 5059 + }, + { + "epoch": 0.40836090710999917, + "grad_norm": 0.7216899991035461, + "learning_rate": 0.00017087792504093767, + "loss": 2.7068, + "step": 5060 + }, + { + "epoch": 0.4084416108465822, + "grad_norm": 0.6728984713554382, + "learning_rate": 0.00017086678760099287, + "loss": 2.686, + "step": 5061 + }, + { + "epoch": 0.4085223145831652, + "grad_norm": 0.7546882033348083, + "learning_rate": 0.0001708556483948625, + "loss": 2.6907, + "step": 5062 + }, + { + "epoch": 0.4086030183197482, + "grad_norm": 0.7471179962158203, + "learning_rate": 0.00017084450742282416, + "loss": 2.6857, + "step": 5063 + }, + { + "epoch": 0.4086837220563312, + "grad_norm": 0.7879743576049805, + "learning_rate": 0.00017083336468515548, + "loss": 2.7224, + "step": 5064 + }, + { + "epoch": 0.4087644257929142, + "grad_norm": 0.691343367099762, + "learning_rate": 0.00017082222018213422, + "loss": 2.6561, + "step": 5065 + }, + { + "epoch": 0.4088451295294972, + "grad_norm": 0.7497386336326599, + "learning_rate": 0.00017081107391403805, + "loss": 2.6317, + "step": 5066 + }, + { + "epoch": 0.40892583326608023, + "grad_norm": 0.6846269965171814, + "learning_rate": 0.00017079992588114485, + "loss": 2.6522, + "step": 5067 + }, + { + "epoch": 0.4090065370026632, + "grad_norm": 0.7312905192375183, + "learning_rate": 0.0001707887760837324, + "loss": 2.588, + "step": 5068 + }, + { + "epoch": 0.40908724073924624, + "grad_norm": 0.6966867446899414, + "learning_rate": 0.00017077762452207866, + "loss": 2.6316, + "step": 5069 + }, + { + "epoch": 0.4091679444758292, + "grad_norm": 0.6882073283195496, + "learning_rate": 0.00017076647119646147, + "loss": 2.6977, + "step": 5070 + }, + { + "epoch": 0.40924864821241225, + "grad_norm": 0.7392483949661255, + "learning_rate": 0.00017075531610715884, + "loss": 2.6768, + "step": 5071 + }, + { + "epoch": 0.4093293519489952, + "grad_norm": 0.7311073541641235, + "learning_rate": 0.00017074415925444876, + "loss": 2.6628, + "step": 5072 + }, + { + "epoch": 0.40941005568557826, + "grad_norm": 0.6769934296607971, + "learning_rate": 0.00017073300063860934, + "loss": 2.6438, + "step": 5073 + }, + { + "epoch": 0.40949075942216123, + "grad_norm": 0.736456573009491, + "learning_rate": 0.00017072184025991862, + "loss": 2.6151, + "step": 5074 + }, + { + "epoch": 0.40957146315874426, + "grad_norm": 0.7026283740997314, + "learning_rate": 0.00017071067811865476, + "loss": 2.6726, + "step": 5075 + }, + { + "epoch": 0.40965216689532724, + "grad_norm": 0.6825234293937683, + "learning_rate": 0.00017069951421509597, + "loss": 2.6795, + "step": 5076 + }, + { + "epoch": 0.4097328706319103, + "grad_norm": 0.7243828773498535, + "learning_rate": 0.0001706883485495205, + "loss": 2.687, + "step": 5077 + }, + { + "epoch": 0.40981357436849325, + "grad_norm": 0.7300469875335693, + "learning_rate": 0.00017067718112220658, + "loss": 2.6268, + "step": 5078 + }, + { + "epoch": 0.4098942781050763, + "grad_norm": 0.698095440864563, + "learning_rate": 0.00017066601193343255, + "loss": 2.6461, + "step": 5079 + }, + { + "epoch": 0.40997498184165926, + "grad_norm": 0.7318777441978455, + "learning_rate": 0.00017065484098347677, + "loss": 2.6817, + "step": 5080 + }, + { + "epoch": 0.4100556855782423, + "grad_norm": 0.7681582570075989, + "learning_rate": 0.00017064366827261772, + "loss": 2.7309, + "step": 5081 + }, + { + "epoch": 0.41013638931482527, + "grad_norm": 0.7690179944038391, + "learning_rate": 0.0001706324938011337, + "loss": 2.6292, + "step": 5082 + }, + { + "epoch": 0.4102170930514083, + "grad_norm": 0.6745284199714661, + "learning_rate": 0.00017062131756930338, + "loss": 2.7133, + "step": 5083 + }, + { + "epoch": 0.4102977967879913, + "grad_norm": 0.7524279952049255, + "learning_rate": 0.00017061013957740518, + "loss": 2.6237, + "step": 5084 + }, + { + "epoch": 0.4103785005245743, + "grad_norm": 0.7813692092895508, + "learning_rate": 0.00017059895982571773, + "loss": 2.6953, + "step": 5085 + }, + { + "epoch": 0.4104592042611573, + "grad_norm": 0.7128829956054688, + "learning_rate": 0.00017058777831451967, + "loss": 2.6771, + "step": 5086 + }, + { + "epoch": 0.4105399079977403, + "grad_norm": 0.7249834537506104, + "learning_rate": 0.00017057659504408963, + "loss": 2.6376, + "step": 5087 + }, + { + "epoch": 0.4106206117343233, + "grad_norm": 0.7742593288421631, + "learning_rate": 0.00017056541001470637, + "loss": 2.6227, + "step": 5088 + }, + { + "epoch": 0.4107013154709063, + "grad_norm": 0.6994228959083557, + "learning_rate": 0.00017055422322664863, + "loss": 2.6573, + "step": 5089 + }, + { + "epoch": 0.4107820192074893, + "grad_norm": 0.7144249081611633, + "learning_rate": 0.00017054303468019518, + "loss": 2.6602, + "step": 5090 + }, + { + "epoch": 0.41086272294407233, + "grad_norm": 0.7695099711418152, + "learning_rate": 0.00017053184437562497, + "loss": 2.6516, + "step": 5091 + }, + { + "epoch": 0.4109434266806553, + "grad_norm": 0.7610031962394714, + "learning_rate": 0.00017052065231321678, + "loss": 2.6963, + "step": 5092 + }, + { + "epoch": 0.41102413041723834, + "grad_norm": 0.7117859721183777, + "learning_rate": 0.0001705094584932496, + "loss": 2.6954, + "step": 5093 + }, + { + "epoch": 0.4111048341538213, + "grad_norm": 0.7891486287117004, + "learning_rate": 0.00017049826291600244, + "loss": 2.7265, + "step": 5094 + }, + { + "epoch": 0.41118553789040435, + "grad_norm": 0.7347370386123657, + "learning_rate": 0.00017048706558175423, + "loss": 2.658, + "step": 5095 + }, + { + "epoch": 0.41126624162698733, + "grad_norm": 0.7541289925575256, + "learning_rate": 0.00017047586649078414, + "loss": 2.6596, + "step": 5096 + }, + { + "epoch": 0.41134694536357036, + "grad_norm": 0.7471255660057068, + "learning_rate": 0.00017046466564337118, + "loss": 2.7008, + "step": 5097 + }, + { + "epoch": 0.41142764910015334, + "grad_norm": 0.7566937208175659, + "learning_rate": 0.00017045346303979457, + "loss": 2.7006, + "step": 5098 + }, + { + "epoch": 0.41150835283673637, + "grad_norm": 0.6991304159164429, + "learning_rate": 0.00017044225868033353, + "loss": 2.6846, + "step": 5099 + }, + { + "epoch": 0.41158905657331935, + "grad_norm": 0.7286314368247986, + "learning_rate": 0.00017043105256526724, + "loss": 2.6219, + "step": 5100 + }, + { + "epoch": 0.4116697603099023, + "grad_norm": 0.6953727006912231, + "learning_rate": 0.000170419844694875, + "loss": 2.6093, + "step": 5101 + }, + { + "epoch": 0.41175046404648535, + "grad_norm": 0.6942756772041321, + "learning_rate": 0.00017040863506943615, + "loss": 2.6399, + "step": 5102 + }, + { + "epoch": 0.41183116778306833, + "grad_norm": 0.7513531446456909, + "learning_rate": 0.00017039742368923005, + "loss": 2.6187, + "step": 5103 + }, + { + "epoch": 0.41191187151965136, + "grad_norm": 0.7530633211135864, + "learning_rate": 0.00017038621055453617, + "loss": 2.6124, + "step": 5104 + }, + { + "epoch": 0.41199257525623434, + "grad_norm": 0.7487555146217346, + "learning_rate": 0.00017037499566563392, + "loss": 2.6331, + "step": 5105 + }, + { + "epoch": 0.41207327899281737, + "grad_norm": 0.7641858458518982, + "learning_rate": 0.00017036377902280282, + "loss": 2.6875, + "step": 5106 + }, + { + "epoch": 0.41215398272940035, + "grad_norm": 0.6962767839431763, + "learning_rate": 0.0001703525606263224, + "loss": 2.6538, + "step": 5107 + }, + { + "epoch": 0.4122346864659834, + "grad_norm": 0.8183409571647644, + "learning_rate": 0.0001703413404764723, + "loss": 2.6204, + "step": 5108 + }, + { + "epoch": 0.41231539020256636, + "grad_norm": 0.7029808759689331, + "learning_rate": 0.00017033011857353207, + "loss": 2.6369, + "step": 5109 + }, + { + "epoch": 0.4123960939391494, + "grad_norm": 0.7171663045883179, + "learning_rate": 0.00017031889491778149, + "loss": 2.6211, + "step": 5110 + }, + { + "epoch": 0.41247679767573237, + "grad_norm": 0.7456090450286865, + "learning_rate": 0.0001703076695095002, + "loss": 2.6574, + "step": 5111 + }, + { + "epoch": 0.4125575014123154, + "grad_norm": 0.7468575239181519, + "learning_rate": 0.000170296442348968, + "loss": 2.598, + "step": 5112 + }, + { + "epoch": 0.4126382051488984, + "grad_norm": 0.7106603384017944, + "learning_rate": 0.0001702852134364647, + "loss": 2.6577, + "step": 5113 + }, + { + "epoch": 0.4127189088854814, + "grad_norm": 0.7788330912590027, + "learning_rate": 0.00017027398277227017, + "loss": 2.6797, + "step": 5114 + }, + { + "epoch": 0.4127996126220644, + "grad_norm": 0.7794120907783508, + "learning_rate": 0.00017026275035666427, + "loss": 2.5834, + "step": 5115 + }, + { + "epoch": 0.4128803163586474, + "grad_norm": 0.7270684838294983, + "learning_rate": 0.00017025151618992702, + "loss": 2.7153, + "step": 5116 + }, + { + "epoch": 0.4129610200952304, + "grad_norm": 0.8169006109237671, + "learning_rate": 0.00017024028027233827, + "loss": 2.6786, + "step": 5117 + }, + { + "epoch": 0.4130417238318134, + "grad_norm": 0.8053112626075745, + "learning_rate": 0.00017022904260417815, + "loss": 2.6456, + "step": 5118 + }, + { + "epoch": 0.4131224275683964, + "grad_norm": 0.7646365165710449, + "learning_rate": 0.0001702178031857267, + "loss": 2.6784, + "step": 5119 + }, + { + "epoch": 0.41320313130497943, + "grad_norm": 0.7878902554512024, + "learning_rate": 0.00017020656201726406, + "loss": 2.66, + "step": 5120 + }, + { + "epoch": 0.4132838350415624, + "grad_norm": 0.8602383732795715, + "learning_rate": 0.00017019531909907037, + "loss": 2.7018, + "step": 5121 + }, + { + "epoch": 0.41336453877814544, + "grad_norm": 0.801092267036438, + "learning_rate": 0.00017018407443142585, + "loss": 2.7728, + "step": 5122 + }, + { + "epoch": 0.4134452425147284, + "grad_norm": 0.7372604012489319, + "learning_rate": 0.00017017282801461074, + "loss": 2.6588, + "step": 5123 + }, + { + "epoch": 0.41352594625131145, + "grad_norm": 0.7553830146789551, + "learning_rate": 0.0001701615798489053, + "loss": 2.6844, + "step": 5124 + }, + { + "epoch": 0.4136066499878944, + "grad_norm": 0.7699872255325317, + "learning_rate": 0.0001701503299345899, + "loss": 2.6523, + "step": 5125 + }, + { + "epoch": 0.41368735372447746, + "grad_norm": 0.7087047696113586, + "learning_rate": 0.0001701390782719449, + "loss": 2.6785, + "step": 5126 + }, + { + "epoch": 0.41376805746106043, + "grad_norm": 0.7835792303085327, + "learning_rate": 0.0001701278248612507, + "loss": 2.7064, + "step": 5127 + }, + { + "epoch": 0.41384876119764347, + "grad_norm": 0.7833154201507568, + "learning_rate": 0.0001701165697027878, + "loss": 2.6552, + "step": 5128 + }, + { + "epoch": 0.41392946493422644, + "grad_norm": 0.8240615725517273, + "learning_rate": 0.0001701053127968367, + "loss": 2.7074, + "step": 5129 + }, + { + "epoch": 0.4140101686708095, + "grad_norm": 0.7612149119377136, + "learning_rate": 0.0001700940541436779, + "loss": 2.7484, + "step": 5130 + }, + { + "epoch": 0.41409087240739245, + "grad_norm": 0.7795391082763672, + "learning_rate": 0.00017008279374359212, + "loss": 2.6022, + "step": 5131 + }, + { + "epoch": 0.4141715761439755, + "grad_norm": 0.7714587450027466, + "learning_rate": 0.00017007153159685992, + "loss": 2.6529, + "step": 5132 + }, + { + "epoch": 0.41425227988055846, + "grad_norm": 0.7821317911148071, + "learning_rate": 0.00017006026770376194, + "loss": 2.6356, + "step": 5133 + }, + { + "epoch": 0.4143329836171415, + "grad_norm": 0.7300596833229065, + "learning_rate": 0.00017004900206457897, + "loss": 2.6552, + "step": 5134 + }, + { + "epoch": 0.41441368735372447, + "grad_norm": 0.780505359172821, + "learning_rate": 0.00017003773467959174, + "loss": 2.675, + "step": 5135 + }, + { + "epoch": 0.4144943910903075, + "grad_norm": 0.7107391357421875, + "learning_rate": 0.00017002646554908107, + "loss": 2.7096, + "step": 5136 + }, + { + "epoch": 0.4145750948268905, + "grad_norm": 0.7358834743499756, + "learning_rate": 0.0001700151946733279, + "loss": 2.6619, + "step": 5137 + }, + { + "epoch": 0.4146557985634735, + "grad_norm": 0.7573859095573425, + "learning_rate": 0.00017000392205261298, + "loss": 2.6234, + "step": 5138 + }, + { + "epoch": 0.4147365023000565, + "grad_norm": 0.7032024264335632, + "learning_rate": 0.00016999264768721738, + "loss": 2.6096, + "step": 5139 + }, + { + "epoch": 0.4148172060366395, + "grad_norm": 0.743813693523407, + "learning_rate": 0.00016998137157742203, + "loss": 2.6782, + "step": 5140 + }, + { + "epoch": 0.4148979097732225, + "grad_norm": 0.8861347436904907, + "learning_rate": 0.00016997009372350793, + "loss": 2.6645, + "step": 5141 + }, + { + "epoch": 0.4149786135098055, + "grad_norm": 0.7598684430122375, + "learning_rate": 0.00016995881412575623, + "loss": 2.649, + "step": 5142 + }, + { + "epoch": 0.4150593172463885, + "grad_norm": 0.7535565495491028, + "learning_rate": 0.00016994753278444798, + "loss": 2.6449, + "step": 5143 + }, + { + "epoch": 0.41514002098297154, + "grad_norm": 0.7073138356208801, + "learning_rate": 0.0001699362496998644, + "loss": 2.6253, + "step": 5144 + }, + { + "epoch": 0.4152207247195545, + "grad_norm": 0.7161526679992676, + "learning_rate": 0.00016992496487228662, + "loss": 2.6623, + "step": 5145 + }, + { + "epoch": 0.41530142845613754, + "grad_norm": 0.8284714818000793, + "learning_rate": 0.00016991367830199595, + "loss": 2.7363, + "step": 5146 + }, + { + "epoch": 0.4153821321927205, + "grad_norm": 0.7127673625946045, + "learning_rate": 0.0001699023899892737, + "loss": 2.6274, + "step": 5147 + }, + { + "epoch": 0.41546283592930355, + "grad_norm": 0.7496370673179626, + "learning_rate": 0.00016989109993440112, + "loss": 2.6364, + "step": 5148 + }, + { + "epoch": 0.41554353966588653, + "grad_norm": 0.7616143822669983, + "learning_rate": 0.00016987980813765963, + "loss": 2.7225, + "step": 5149 + }, + { + "epoch": 0.41562424340246956, + "grad_norm": 0.6935909986495972, + "learning_rate": 0.00016986851459933067, + "loss": 2.6109, + "step": 5150 + }, + { + "epoch": 0.41570494713905254, + "grad_norm": 0.721023678779602, + "learning_rate": 0.00016985721931969566, + "loss": 2.6993, + "step": 5151 + }, + { + "epoch": 0.4157856508756355, + "grad_norm": 0.8216699361801147, + "learning_rate": 0.00016984592229903617, + "loss": 2.6512, + "step": 5152 + }, + { + "epoch": 0.41586635461221855, + "grad_norm": 0.7425234913825989, + "learning_rate": 0.00016983462353763372, + "loss": 2.5903, + "step": 5153 + }, + { + "epoch": 0.4159470583488015, + "grad_norm": 0.7292542457580566, + "learning_rate": 0.00016982332303576986, + "loss": 2.692, + "step": 5154 + }, + { + "epoch": 0.41602776208538456, + "grad_norm": 0.7466831803321838, + "learning_rate": 0.0001698120207937263, + "loss": 2.7145, + "step": 5155 + }, + { + "epoch": 0.41610846582196753, + "grad_norm": 0.7271949648857117, + "learning_rate": 0.00016980071681178471, + "loss": 2.655, + "step": 5156 + }, + { + "epoch": 0.41618916955855056, + "grad_norm": 0.7505547404289246, + "learning_rate": 0.00016978941109022677, + "loss": 2.7167, + "step": 5157 + }, + { + "epoch": 0.41626987329513354, + "grad_norm": 0.7307172417640686, + "learning_rate": 0.00016977810362933427, + "loss": 2.6735, + "step": 5158 + }, + { + "epoch": 0.4163505770317166, + "grad_norm": 0.7839170098304749, + "learning_rate": 0.00016976679442938904, + "loss": 2.6818, + "step": 5159 + }, + { + "epoch": 0.41643128076829955, + "grad_norm": 0.7131803631782532, + "learning_rate": 0.00016975548349067293, + "loss": 2.6921, + "step": 5160 + }, + { + "epoch": 0.4165119845048826, + "grad_norm": 0.8129798173904419, + "learning_rate": 0.0001697441708134678, + "loss": 2.6682, + "step": 5161 + }, + { + "epoch": 0.41659268824146556, + "grad_norm": 0.7634746432304382, + "learning_rate": 0.00016973285639805563, + "loss": 2.6684, + "step": 5162 + }, + { + "epoch": 0.4166733919780486, + "grad_norm": 0.7367348074913025, + "learning_rate": 0.0001697215402447184, + "loss": 2.6424, + "step": 5163 + }, + { + "epoch": 0.41675409571463157, + "grad_norm": 0.7235338687896729, + "learning_rate": 0.00016971022235373815, + "loss": 2.6817, + "step": 5164 + }, + { + "epoch": 0.4168347994512146, + "grad_norm": 0.7764291763305664, + "learning_rate": 0.0001696989027253969, + "loss": 2.6477, + "step": 5165 + }, + { + "epoch": 0.4169155031877976, + "grad_norm": 0.8207562565803528, + "learning_rate": 0.00016968758135997683, + "loss": 2.6408, + "step": 5166 + }, + { + "epoch": 0.4169962069243806, + "grad_norm": 0.7291484475135803, + "learning_rate": 0.00016967625825776005, + "loss": 2.6233, + "step": 5167 + }, + { + "epoch": 0.4170769106609636, + "grad_norm": 0.7060603499412537, + "learning_rate": 0.0001696649334190288, + "loss": 2.6204, + "step": 5168 + }, + { + "epoch": 0.4171576143975466, + "grad_norm": 0.7058241963386536, + "learning_rate": 0.00016965360684406528, + "loss": 2.6212, + "step": 5169 + }, + { + "epoch": 0.4172383181341296, + "grad_norm": 0.8248410224914551, + "learning_rate": 0.00016964227853315177, + "loss": 2.6688, + "step": 5170 + }, + { + "epoch": 0.4173190218707126, + "grad_norm": 0.7287606596946716, + "learning_rate": 0.0001696309484865707, + "loss": 2.6201, + "step": 5171 + }, + { + "epoch": 0.4173997256072956, + "grad_norm": 0.7214288115501404, + "learning_rate": 0.00016961961670460433, + "loss": 2.682, + "step": 5172 + }, + { + "epoch": 0.41748042934387863, + "grad_norm": 0.7133594155311584, + "learning_rate": 0.00016960828318753516, + "loss": 2.7167, + "step": 5173 + }, + { + "epoch": 0.4175611330804616, + "grad_norm": 0.6935842633247375, + "learning_rate": 0.00016959694793564558, + "loss": 2.6134, + "step": 5174 + }, + { + "epoch": 0.41764183681704464, + "grad_norm": 0.6863382458686829, + "learning_rate": 0.00016958561094921815, + "loss": 2.6396, + "step": 5175 + }, + { + "epoch": 0.4177225405536276, + "grad_norm": 0.7659433484077454, + "learning_rate": 0.0001695742722285354, + "loss": 2.6926, + "step": 5176 + }, + { + "epoch": 0.41780324429021065, + "grad_norm": 0.6997129917144775, + "learning_rate": 0.00016956293177387992, + "loss": 2.6983, + "step": 5177 + }, + { + "epoch": 0.41788394802679363, + "grad_norm": 0.6784526705741882, + "learning_rate": 0.00016955158958553433, + "loss": 2.6961, + "step": 5178 + }, + { + "epoch": 0.41796465176337666, + "grad_norm": 0.8227884769439697, + "learning_rate": 0.00016954024566378132, + "loss": 2.7008, + "step": 5179 + }, + { + "epoch": 0.41804535549995964, + "grad_norm": 0.7733054757118225, + "learning_rate": 0.0001695289000089036, + "loss": 2.6615, + "step": 5180 + }, + { + "epoch": 0.41812605923654267, + "grad_norm": 0.7077545523643494, + "learning_rate": 0.00016951755262118394, + "loss": 2.6388, + "step": 5181 + }, + { + "epoch": 0.41820676297312565, + "grad_norm": 0.7962050437927246, + "learning_rate": 0.00016950620350090513, + "loss": 2.7063, + "step": 5182 + }, + { + "epoch": 0.4182874667097087, + "grad_norm": 0.6950554847717285, + "learning_rate": 0.00016949485264835005, + "loss": 2.7076, + "step": 5183 + }, + { + "epoch": 0.41836817044629165, + "grad_norm": 0.8546960949897766, + "learning_rate": 0.00016948350006380162, + "loss": 2.6533, + "step": 5184 + }, + { + "epoch": 0.4184488741828747, + "grad_norm": 0.7469324469566345, + "learning_rate": 0.00016947214574754272, + "loss": 2.5884, + "step": 5185 + }, + { + "epoch": 0.41852957791945766, + "grad_norm": 0.7125554084777832, + "learning_rate": 0.0001694607896998563, + "loss": 2.6448, + "step": 5186 + }, + { + "epoch": 0.4186102816560407, + "grad_norm": 0.6998329758644104, + "learning_rate": 0.00016944943192102549, + "loss": 2.5569, + "step": 5187 + }, + { + "epoch": 0.41869098539262367, + "grad_norm": 0.9046749472618103, + "learning_rate": 0.00016943807241133328, + "loss": 2.7701, + "step": 5188 + }, + { + "epoch": 0.4187716891292067, + "grad_norm": 0.7842074036598206, + "learning_rate": 0.00016942671117106274, + "loss": 2.7124, + "step": 5189 + }, + { + "epoch": 0.4188523928657897, + "grad_norm": 0.7625874280929565, + "learning_rate": 0.00016941534820049713, + "loss": 2.6626, + "step": 5190 + }, + { + "epoch": 0.4189330966023727, + "grad_norm": 0.7006461024284363, + "learning_rate": 0.00016940398349991957, + "loss": 2.6283, + "step": 5191 + }, + { + "epoch": 0.4190138003389557, + "grad_norm": 0.7081875205039978, + "learning_rate": 0.00016939261706961332, + "loss": 2.69, + "step": 5192 + }, + { + "epoch": 0.4190945040755387, + "grad_norm": 0.7554503083229065, + "learning_rate": 0.00016938124890986166, + "loss": 2.641, + "step": 5193 + }, + { + "epoch": 0.4191752078121217, + "grad_norm": 0.7478535175323486, + "learning_rate": 0.0001693698790209479, + "loss": 2.7035, + "step": 5194 + }, + { + "epoch": 0.41925591154870473, + "grad_norm": 0.7323064208030701, + "learning_rate": 0.00016935850740315545, + "loss": 2.6713, + "step": 5195 + }, + { + "epoch": 0.4193366152852877, + "grad_norm": 0.8011505007743835, + "learning_rate": 0.00016934713405676764, + "loss": 2.6413, + "step": 5196 + }, + { + "epoch": 0.41941731902187074, + "grad_norm": 0.768851637840271, + "learning_rate": 0.00016933575898206804, + "loss": 2.6147, + "step": 5197 + }, + { + "epoch": 0.4194980227584537, + "grad_norm": 0.7255160808563232, + "learning_rate": 0.00016932438217934006, + "loss": 2.6093, + "step": 5198 + }, + { + "epoch": 0.41957872649503675, + "grad_norm": 0.7431769967079163, + "learning_rate": 0.00016931300364886722, + "loss": 2.6658, + "step": 5199 + }, + { + "epoch": 0.4196594302316197, + "grad_norm": 0.7532122731208801, + "learning_rate": 0.00016930162339093318, + "loss": 2.6371, + "step": 5200 + }, + { + "epoch": 0.41974013396820276, + "grad_norm": 0.7253943681716919, + "learning_rate": 0.00016929024140582152, + "loss": 2.6365, + "step": 5201 + }, + { + "epoch": 0.41982083770478573, + "grad_norm": 0.7323265075683594, + "learning_rate": 0.00016927885769381593, + "loss": 2.7096, + "step": 5202 + }, + { + "epoch": 0.4199015414413687, + "grad_norm": 0.7340009808540344, + "learning_rate": 0.00016926747225520008, + "loss": 2.6983, + "step": 5203 + }, + { + "epoch": 0.41998224517795174, + "grad_norm": 0.838706374168396, + "learning_rate": 0.00016925608509025776, + "loss": 2.7098, + "step": 5204 + }, + { + "epoch": 0.4200629489145347, + "grad_norm": 0.7320838570594788, + "learning_rate": 0.0001692446961992728, + "loss": 2.6767, + "step": 5205 + }, + { + "epoch": 0.42014365265111775, + "grad_norm": 0.7275335192680359, + "learning_rate": 0.00016923330558252898, + "loss": 2.6754, + "step": 5206 + }, + { + "epoch": 0.4202243563877007, + "grad_norm": 0.7572353482246399, + "learning_rate": 0.00016922191324031017, + "loss": 2.7076, + "step": 5207 + }, + { + "epoch": 0.42030506012428376, + "grad_norm": 0.7991098165512085, + "learning_rate": 0.0001692105191729004, + "loss": 2.7281, + "step": 5208 + }, + { + "epoch": 0.42038576386086673, + "grad_norm": 0.70769202709198, + "learning_rate": 0.00016919912338058356, + "loss": 2.684, + "step": 5209 + }, + { + "epoch": 0.42046646759744977, + "grad_norm": 0.6895349621772766, + "learning_rate": 0.0001691877258636436, + "loss": 2.6723, + "step": 5210 + }, + { + "epoch": 0.42054717133403274, + "grad_norm": 0.7368944883346558, + "learning_rate": 0.00016917632662236476, + "loss": 2.601, + "step": 5211 + }, + { + "epoch": 0.4206278750706158, + "grad_norm": 0.7122060060501099, + "learning_rate": 0.00016916492565703097, + "loss": 2.703, + "step": 5212 + }, + { + "epoch": 0.42070857880719875, + "grad_norm": 0.735251784324646, + "learning_rate": 0.00016915352296792646, + "loss": 2.7715, + "step": 5213 + }, + { + "epoch": 0.4207892825437818, + "grad_norm": 0.7686039805412292, + "learning_rate": 0.00016914211855533536, + "loss": 2.6935, + "step": 5214 + }, + { + "epoch": 0.42086998628036476, + "grad_norm": 0.8457472920417786, + "learning_rate": 0.00016913071241954195, + "loss": 2.6535, + "step": 5215 + }, + { + "epoch": 0.4209506900169478, + "grad_norm": 0.6913465261459351, + "learning_rate": 0.00016911930456083046, + "loss": 2.6453, + "step": 5216 + }, + { + "epoch": 0.42103139375353077, + "grad_norm": 0.6939878463745117, + "learning_rate": 0.00016910789497948524, + "loss": 2.6483, + "step": 5217 + }, + { + "epoch": 0.4211120974901138, + "grad_norm": 0.7240888476371765, + "learning_rate": 0.00016909648367579062, + "loss": 2.6649, + "step": 5218 + }, + { + "epoch": 0.4211928012266968, + "grad_norm": 0.7570972442626953, + "learning_rate": 0.00016908507065003102, + "loss": 2.6633, + "step": 5219 + }, + { + "epoch": 0.4212735049632798, + "grad_norm": 0.72161465883255, + "learning_rate": 0.00016907365590249082, + "loss": 2.6999, + "step": 5220 + }, + { + "epoch": 0.4213542086998628, + "grad_norm": 0.7818038463592529, + "learning_rate": 0.00016906223943345458, + "loss": 2.6478, + "step": 5221 + }, + { + "epoch": 0.4214349124364458, + "grad_norm": 0.7292464971542358, + "learning_rate": 0.00016905082124320684, + "loss": 2.6725, + "step": 5222 + }, + { + "epoch": 0.4215156161730288, + "grad_norm": 0.7612937092781067, + "learning_rate": 0.0001690394013320321, + "loss": 2.6474, + "step": 5223 + }, + { + "epoch": 0.4215963199096118, + "grad_norm": 0.7325131297111511, + "learning_rate": 0.000169027979700215, + "loss": 2.6525, + "step": 5224 + }, + { + "epoch": 0.4216770236461948, + "grad_norm": 0.7736644148826599, + "learning_rate": 0.00016901655634804022, + "loss": 2.662, + "step": 5225 + }, + { + "epoch": 0.42175772738277784, + "grad_norm": 0.758522629737854, + "learning_rate": 0.00016900513127579244, + "loss": 2.6558, + "step": 5226 + }, + { + "epoch": 0.4218384311193608, + "grad_norm": 0.7559491991996765, + "learning_rate": 0.00016899370448375642, + "loss": 2.7361, + "step": 5227 + }, + { + "epoch": 0.42191913485594384, + "grad_norm": 0.7791146039962769, + "learning_rate": 0.00016898227597221692, + "loss": 2.6739, + "step": 5228 + }, + { + "epoch": 0.4219998385925268, + "grad_norm": 0.7280717492103577, + "learning_rate": 0.00016897084574145878, + "loss": 2.6316, + "step": 5229 + }, + { + "epoch": 0.42208054232910985, + "grad_norm": 0.7455596327781677, + "learning_rate": 0.0001689594137917669, + "loss": 2.7244, + "step": 5230 + }, + { + "epoch": 0.42216124606569283, + "grad_norm": 0.7965813875198364, + "learning_rate": 0.00016894798012342613, + "loss": 2.6757, + "step": 5231 + }, + { + "epoch": 0.42224194980227586, + "grad_norm": 0.6740596294403076, + "learning_rate": 0.00016893654473672148, + "loss": 2.631, + "step": 5232 + }, + { + "epoch": 0.42232265353885884, + "grad_norm": 0.695105254650116, + "learning_rate": 0.00016892510763193795, + "loss": 2.6563, + "step": 5233 + }, + { + "epoch": 0.42240335727544187, + "grad_norm": 0.7623865008354187, + "learning_rate": 0.00016891366880936051, + "loss": 2.6738, + "step": 5234 + }, + { + "epoch": 0.42248406101202485, + "grad_norm": 0.7545912265777588, + "learning_rate": 0.00016890222826927435, + "loss": 2.6949, + "step": 5235 + }, + { + "epoch": 0.4225647647486079, + "grad_norm": 0.7280749678611755, + "learning_rate": 0.00016889078601196452, + "loss": 2.6571, + "step": 5236 + }, + { + "epoch": 0.42264546848519086, + "grad_norm": 0.6624523401260376, + "learning_rate": 0.00016887934203771625, + "loss": 2.6854, + "step": 5237 + }, + { + "epoch": 0.4227261722217739, + "grad_norm": 0.7835487127304077, + "learning_rate": 0.0001688678963468147, + "loss": 2.6437, + "step": 5238 + }, + { + "epoch": 0.42280687595835686, + "grad_norm": 0.7384940981864929, + "learning_rate": 0.00016885644893954518, + "loss": 2.6584, + "step": 5239 + }, + { + "epoch": 0.4228875796949399, + "grad_norm": 0.8227531313896179, + "learning_rate": 0.00016884499981619292, + "loss": 2.673, + "step": 5240 + }, + { + "epoch": 0.4229682834315229, + "grad_norm": 0.7442220449447632, + "learning_rate": 0.00016883354897704334, + "loss": 2.6729, + "step": 5241 + }, + { + "epoch": 0.4230489871681059, + "grad_norm": 0.7182636857032776, + "learning_rate": 0.00016882209642238175, + "loss": 2.6833, + "step": 5242 + }, + { + "epoch": 0.4231296909046889, + "grad_norm": 0.7061870098114014, + "learning_rate": 0.00016881064215249362, + "loss": 2.6696, + "step": 5243 + }, + { + "epoch": 0.4232103946412719, + "grad_norm": 0.6792885065078735, + "learning_rate": 0.00016879918616766445, + "loss": 2.6805, + "step": 5244 + }, + { + "epoch": 0.4232910983778549, + "grad_norm": 0.7439807057380676, + "learning_rate": 0.00016878772846817968, + "loss": 2.6522, + "step": 5245 + }, + { + "epoch": 0.4233718021144379, + "grad_norm": 0.7078969478607178, + "learning_rate": 0.00016877626905432492, + "loss": 2.6549, + "step": 5246 + }, + { + "epoch": 0.4234525058510209, + "grad_norm": 0.7103868126869202, + "learning_rate": 0.00016876480792638577, + "loss": 2.6812, + "step": 5247 + }, + { + "epoch": 0.42353320958760393, + "grad_norm": 0.7224452495574951, + "learning_rate": 0.00016875334508464782, + "loss": 2.6657, + "step": 5248 + }, + { + "epoch": 0.4236139133241869, + "grad_norm": 0.6885106563568115, + "learning_rate": 0.00016874188052939682, + "loss": 2.6421, + "step": 5249 + }, + { + "epoch": 0.42369461706076994, + "grad_norm": 0.6736720204353333, + "learning_rate": 0.00016873041426091845, + "loss": 2.6717, + "step": 5250 + }, + { + "epoch": 0.4237753207973529, + "grad_norm": 0.7597963809967041, + "learning_rate": 0.00016871894627949846, + "loss": 2.6231, + "step": 5251 + }, + { + "epoch": 0.42385602453393595, + "grad_norm": 0.8295687437057495, + "learning_rate": 0.00016870747658542275, + "loss": 2.6631, + "step": 5252 + }, + { + "epoch": 0.4239367282705189, + "grad_norm": 0.6750548481941223, + "learning_rate": 0.0001686960051789771, + "loss": 2.6997, + "step": 5253 + }, + { + "epoch": 0.4240174320071019, + "grad_norm": 0.7229160666465759, + "learning_rate": 0.0001686845320604474, + "loss": 2.6525, + "step": 5254 + }, + { + "epoch": 0.42409813574368493, + "grad_norm": 0.8318623900413513, + "learning_rate": 0.00016867305723011967, + "loss": 2.7774, + "step": 5255 + }, + { + "epoch": 0.4241788394802679, + "grad_norm": 0.8391026854515076, + "learning_rate": 0.00016866158068827979, + "loss": 2.6712, + "step": 5256 + }, + { + "epoch": 0.42425954321685094, + "grad_norm": 0.691146969795227, + "learning_rate": 0.00016865010243521388, + "loss": 2.6459, + "step": 5257 + }, + { + "epoch": 0.4243402469534339, + "grad_norm": 0.7223602533340454, + "learning_rate": 0.00016863862247120794, + "loss": 2.6675, + "step": 5258 + }, + { + "epoch": 0.42442095069001695, + "grad_norm": 0.8400631546974182, + "learning_rate": 0.0001686271407965481, + "loss": 2.6978, + "step": 5259 + }, + { + "epoch": 0.42450165442659993, + "grad_norm": 0.737684965133667, + "learning_rate": 0.0001686156574115205, + "loss": 2.6992, + "step": 5260 + }, + { + "epoch": 0.42458235816318296, + "grad_norm": 0.7511717677116394, + "learning_rate": 0.0001686041723164114, + "loss": 2.6947, + "step": 5261 + }, + { + "epoch": 0.42466306189976594, + "grad_norm": 0.7434492707252502, + "learning_rate": 0.00016859268551150698, + "loss": 2.7353, + "step": 5262 + }, + { + "epoch": 0.42474376563634897, + "grad_norm": 0.746609628200531, + "learning_rate": 0.00016858119699709353, + "loss": 2.7519, + "step": 5263 + }, + { + "epoch": 0.42482446937293195, + "grad_norm": 0.7709949612617493, + "learning_rate": 0.0001685697067734574, + "loss": 2.7018, + "step": 5264 + }, + { + "epoch": 0.424905173109515, + "grad_norm": 0.7496309876441956, + "learning_rate": 0.00016855821484088488, + "loss": 2.6761, + "step": 5265 + }, + { + "epoch": 0.42498587684609795, + "grad_norm": 0.7071252465248108, + "learning_rate": 0.00016854672119966243, + "loss": 2.6762, + "step": 5266 + }, + { + "epoch": 0.425066580582681, + "grad_norm": 0.7991356253623962, + "learning_rate": 0.00016853522585007658, + "loss": 2.6134, + "step": 5267 + }, + { + "epoch": 0.42514728431926396, + "grad_norm": 0.8194605708122253, + "learning_rate": 0.0001685237287924137, + "loss": 2.6601, + "step": 5268 + }, + { + "epoch": 0.425227988055847, + "grad_norm": 0.7451688051223755, + "learning_rate": 0.00016851223002696037, + "loss": 2.6631, + "step": 5269 + }, + { + "epoch": 0.42530869179242997, + "grad_norm": 0.7220263481140137, + "learning_rate": 0.0001685007295540032, + "loss": 2.6631, + "step": 5270 + }, + { + "epoch": 0.425389395529013, + "grad_norm": 0.7268854975700378, + "learning_rate": 0.00016848922737382874, + "loss": 2.6752, + "step": 5271 + }, + { + "epoch": 0.425470099265596, + "grad_norm": 0.8841642141342163, + "learning_rate": 0.00016847772348672378, + "loss": 2.7153, + "step": 5272 + }, + { + "epoch": 0.425550803002179, + "grad_norm": 0.7725942134857178, + "learning_rate": 0.00016846621789297489, + "loss": 2.6726, + "step": 5273 + }, + { + "epoch": 0.425631506738762, + "grad_norm": 0.7179448008537292, + "learning_rate": 0.00016845471059286887, + "loss": 2.6659, + "step": 5274 + }, + { + "epoch": 0.425712210475345, + "grad_norm": 0.7630325555801392, + "learning_rate": 0.00016844320158669257, + "loss": 2.7133, + "step": 5275 + }, + { + "epoch": 0.425792914211928, + "grad_norm": 0.7349739670753479, + "learning_rate": 0.00016843169087473272, + "loss": 2.6397, + "step": 5276 + }, + { + "epoch": 0.42587361794851103, + "grad_norm": 0.7670298218727112, + "learning_rate": 0.00016842017845727626, + "loss": 2.6485, + "step": 5277 + }, + { + "epoch": 0.425954321685094, + "grad_norm": 0.692095160484314, + "learning_rate": 0.00016840866433461013, + "loss": 2.6058, + "step": 5278 + }, + { + "epoch": 0.42603502542167704, + "grad_norm": 0.6888624429702759, + "learning_rate": 0.00016839714850702125, + "loss": 2.5757, + "step": 5279 + }, + { + "epoch": 0.42611572915826, + "grad_norm": 0.6816484332084656, + "learning_rate": 0.00016838563097479664, + "loss": 2.6656, + "step": 5280 + }, + { + "epoch": 0.42619643289484305, + "grad_norm": 0.7778486609458923, + "learning_rate": 0.00016837411173822333, + "loss": 2.6738, + "step": 5281 + }, + { + "epoch": 0.426277136631426, + "grad_norm": 0.73436439037323, + "learning_rate": 0.00016836259079758845, + "loss": 2.6346, + "step": 5282 + }, + { + "epoch": 0.42635784036800906, + "grad_norm": 0.673528254032135, + "learning_rate": 0.00016835106815317908, + "loss": 2.6636, + "step": 5283 + }, + { + "epoch": 0.42643854410459203, + "grad_norm": 0.6892737150192261, + "learning_rate": 0.00016833954380528242, + "loss": 2.6723, + "step": 5284 + }, + { + "epoch": 0.42651924784117506, + "grad_norm": 0.7404607534408569, + "learning_rate": 0.00016832801775418571, + "loss": 2.6751, + "step": 5285 + }, + { + "epoch": 0.42659995157775804, + "grad_norm": 0.7040587663650513, + "learning_rate": 0.00016831649000017618, + "loss": 2.6079, + "step": 5286 + }, + { + "epoch": 0.4266806553143411, + "grad_norm": 0.7295164465904236, + "learning_rate": 0.00016830496054354112, + "loss": 2.5928, + "step": 5287 + }, + { + "epoch": 0.42676135905092405, + "grad_norm": 0.7269962430000305, + "learning_rate": 0.00016829342938456788, + "loss": 2.6648, + "step": 5288 + }, + { + "epoch": 0.4268420627875071, + "grad_norm": 0.7296550273895264, + "learning_rate": 0.0001682818965235439, + "loss": 2.6814, + "step": 5289 + }, + { + "epoch": 0.42692276652409006, + "grad_norm": 0.8376085758209229, + "learning_rate": 0.00016827036196075655, + "loss": 2.702, + "step": 5290 + }, + { + "epoch": 0.4270034702606731, + "grad_norm": 0.7461032271385193, + "learning_rate": 0.00016825882569649332, + "loss": 2.6959, + "step": 5291 + }, + { + "epoch": 0.42708417399725607, + "grad_norm": 0.7218661308288574, + "learning_rate": 0.00016824728773104171, + "loss": 2.7182, + "step": 5292 + }, + { + "epoch": 0.4271648777338391, + "grad_norm": 0.7012860774993896, + "learning_rate": 0.00016823574806468933, + "loss": 2.6989, + "step": 5293 + }, + { + "epoch": 0.4272455814704221, + "grad_norm": 0.7039482593536377, + "learning_rate": 0.0001682242066977237, + "loss": 2.6153, + "step": 5294 + }, + { + "epoch": 0.4273262852070051, + "grad_norm": 0.8783851861953735, + "learning_rate": 0.0001682126636304325, + "loss": 2.7174, + "step": 5295 + }, + { + "epoch": 0.4274069889435881, + "grad_norm": 0.7266566157341003, + "learning_rate": 0.00016820111886310343, + "loss": 2.6571, + "step": 5296 + }, + { + "epoch": 0.4274876926801711, + "grad_norm": 0.7512212991714478, + "learning_rate": 0.0001681895723960242, + "loss": 2.6802, + "step": 5297 + }, + { + "epoch": 0.4275683964167541, + "grad_norm": 0.7786974310874939, + "learning_rate": 0.00016817802422948254, + "loss": 2.6514, + "step": 5298 + }, + { + "epoch": 0.4276491001533371, + "grad_norm": 0.7454531788825989, + "learning_rate": 0.00016816647436376634, + "loss": 2.6508, + "step": 5299 + }, + { + "epoch": 0.4277298038899201, + "grad_norm": 0.7542992830276489, + "learning_rate": 0.0001681549227991634, + "loss": 2.6455, + "step": 5300 + }, + { + "epoch": 0.42781050762650313, + "grad_norm": 0.7405722141265869, + "learning_rate": 0.0001681433695359616, + "loss": 2.6505, + "step": 5301 + }, + { + "epoch": 0.4278912113630861, + "grad_norm": 0.7120002508163452, + "learning_rate": 0.00016813181457444896, + "loss": 2.6652, + "step": 5302 + }, + { + "epoch": 0.42797191509966914, + "grad_norm": 0.7645997405052185, + "learning_rate": 0.00016812025791491334, + "loss": 2.6456, + "step": 5303 + }, + { + "epoch": 0.4280526188362521, + "grad_norm": 0.7214465141296387, + "learning_rate": 0.00016810869955764286, + "loss": 2.6261, + "step": 5304 + }, + { + "epoch": 0.4281333225728351, + "grad_norm": 0.7653367519378662, + "learning_rate": 0.00016809713950292551, + "loss": 2.7295, + "step": 5305 + }, + { + "epoch": 0.4282140263094181, + "grad_norm": 0.6798970103263855, + "learning_rate": 0.0001680855777510495, + "loss": 2.6549, + "step": 5306 + }, + { + "epoch": 0.4282947300460011, + "grad_norm": 0.7693684101104736, + "learning_rate": 0.00016807401430230288, + "loss": 2.7001, + "step": 5307 + }, + { + "epoch": 0.42837543378258414, + "grad_norm": 0.6962063312530518, + "learning_rate": 0.00016806244915697384, + "loss": 2.6582, + "step": 5308 + }, + { + "epoch": 0.4284561375191671, + "grad_norm": 0.7526959776878357, + "learning_rate": 0.00016805088231535068, + "loss": 2.7204, + "step": 5309 + }, + { + "epoch": 0.42853684125575014, + "grad_norm": 0.7403820753097534, + "learning_rate": 0.0001680393137777217, + "loss": 2.6505, + "step": 5310 + }, + { + "epoch": 0.4286175449923331, + "grad_norm": 0.7056909799575806, + "learning_rate": 0.00016802774354437506, + "loss": 2.5981, + "step": 5311 + }, + { + "epoch": 0.42869824872891615, + "grad_norm": 0.6756439805030823, + "learning_rate": 0.0001680161716155993, + "loss": 2.6845, + "step": 5312 + }, + { + "epoch": 0.42877895246549913, + "grad_norm": 0.7634297013282776, + "learning_rate": 0.0001680045979916827, + "loss": 2.6399, + "step": 5313 + }, + { + "epoch": 0.42885965620208216, + "grad_norm": 0.6793022751808167, + "learning_rate": 0.0001679930226729138, + "loss": 2.6808, + "step": 5314 + }, + { + "epoch": 0.42894035993866514, + "grad_norm": 0.7692369222640991, + "learning_rate": 0.00016798144565958103, + "loss": 2.673, + "step": 5315 + }, + { + "epoch": 0.42902106367524817, + "grad_norm": 0.668798565864563, + "learning_rate": 0.00016796986695197293, + "loss": 2.6465, + "step": 5316 + }, + { + "epoch": 0.42910176741183115, + "grad_norm": 0.719160795211792, + "learning_rate": 0.00016795828655037805, + "loss": 2.5876, + "step": 5317 + }, + { + "epoch": 0.4291824711484142, + "grad_norm": 0.7352864742279053, + "learning_rate": 0.000167946704455085, + "loss": 2.625, + "step": 5318 + }, + { + "epoch": 0.42926317488499716, + "grad_norm": 0.7103392481803894, + "learning_rate": 0.00016793512066638254, + "loss": 2.602, + "step": 5319 + }, + { + "epoch": 0.4293438786215802, + "grad_norm": 0.7005727291107178, + "learning_rate": 0.0001679235351845592, + "loss": 2.6723, + "step": 5320 + }, + { + "epoch": 0.42942458235816316, + "grad_norm": 0.7686243653297424, + "learning_rate": 0.00016791194800990387, + "loss": 2.693, + "step": 5321 + }, + { + "epoch": 0.4295052860947462, + "grad_norm": 0.7026933431625366, + "learning_rate": 0.00016790035914270526, + "loss": 2.6334, + "step": 5322 + }, + { + "epoch": 0.4295859898313292, + "grad_norm": 0.748938262462616, + "learning_rate": 0.0001678887685832522, + "loss": 2.6757, + "step": 5323 + }, + { + "epoch": 0.4296666935679122, + "grad_norm": 0.7753568887710571, + "learning_rate": 0.00016787717633183355, + "loss": 2.6782, + "step": 5324 + }, + { + "epoch": 0.4297473973044952, + "grad_norm": 0.7605767846107483, + "learning_rate": 0.00016786558238873823, + "loss": 2.6822, + "step": 5325 + }, + { + "epoch": 0.4298281010410782, + "grad_norm": 0.7516531348228455, + "learning_rate": 0.00016785398675425524, + "loss": 2.6802, + "step": 5326 + }, + { + "epoch": 0.4299088047776612, + "grad_norm": 0.7551677227020264, + "learning_rate": 0.0001678423894286735, + "loss": 2.6509, + "step": 5327 + }, + { + "epoch": 0.4299895085142442, + "grad_norm": 0.765364944934845, + "learning_rate": 0.00016783079041228206, + "loss": 2.6552, + "step": 5328 + }, + { + "epoch": 0.4300702122508272, + "grad_norm": 0.7016649842262268, + "learning_rate": 0.00016781918970537002, + "loss": 2.6861, + "step": 5329 + }, + { + "epoch": 0.43015091598741023, + "grad_norm": 0.7266311645507812, + "learning_rate": 0.0001678075873082265, + "loss": 2.7064, + "step": 5330 + }, + { + "epoch": 0.4302316197239932, + "grad_norm": 0.7414532899856567, + "learning_rate": 0.00016779598322114064, + "loss": 2.6273, + "step": 5331 + }, + { + "epoch": 0.43031232346057624, + "grad_norm": 0.7032443881034851, + "learning_rate": 0.00016778437744440167, + "loss": 2.6577, + "step": 5332 + }, + { + "epoch": 0.4303930271971592, + "grad_norm": 0.7150338888168335, + "learning_rate": 0.00016777276997829882, + "loss": 2.6586, + "step": 5333 + }, + { + "epoch": 0.43047373093374225, + "grad_norm": 0.6893971562385559, + "learning_rate": 0.0001677611608231214, + "loss": 2.6713, + "step": 5334 + }, + { + "epoch": 0.4305544346703252, + "grad_norm": 0.861935555934906, + "learning_rate": 0.00016774954997915867, + "loss": 2.7037, + "step": 5335 + }, + { + "epoch": 0.43063513840690826, + "grad_norm": 0.7140138745307922, + "learning_rate": 0.00016773793744670012, + "loss": 2.6684, + "step": 5336 + }, + { + "epoch": 0.43071584214349123, + "grad_norm": 0.7245929837226868, + "learning_rate": 0.00016772632322603506, + "loss": 2.6349, + "step": 5337 + }, + { + "epoch": 0.43079654588007427, + "grad_norm": 0.7216203808784485, + "learning_rate": 0.000167714707317453, + "loss": 2.6338, + "step": 5338 + }, + { + "epoch": 0.43087724961665724, + "grad_norm": 0.7076452374458313, + "learning_rate": 0.00016770308972124343, + "loss": 2.6614, + "step": 5339 + }, + { + "epoch": 0.4309579533532403, + "grad_norm": 0.7392035722732544, + "learning_rate": 0.00016769147043769586, + "loss": 2.6697, + "step": 5340 + }, + { + "epoch": 0.43103865708982325, + "grad_norm": 0.7235357761383057, + "learning_rate": 0.00016767984946709994, + "loss": 2.6664, + "step": 5341 + }, + { + "epoch": 0.4311193608264063, + "grad_norm": 0.6985526084899902, + "learning_rate": 0.00016766822680974524, + "loss": 2.6157, + "step": 5342 + }, + { + "epoch": 0.43120006456298926, + "grad_norm": 0.769963264465332, + "learning_rate": 0.0001676566024659214, + "loss": 2.6096, + "step": 5343 + }, + { + "epoch": 0.4312807682995723, + "grad_norm": 0.7504093050956726, + "learning_rate": 0.00016764497643591823, + "loss": 2.5795, + "step": 5344 + }, + { + "epoch": 0.43136147203615527, + "grad_norm": 0.7193379402160645, + "learning_rate": 0.0001676333487200254, + "loss": 2.6158, + "step": 5345 + }, + { + "epoch": 0.4314421757727383, + "grad_norm": 0.777357280254364, + "learning_rate": 0.00016762171931853273, + "loss": 2.6388, + "step": 5346 + }, + { + "epoch": 0.4315228795093213, + "grad_norm": 0.8590179085731506, + "learning_rate": 0.00016761008823173003, + "loss": 2.6597, + "step": 5347 + }, + { + "epoch": 0.4316035832459043, + "grad_norm": 0.7040170431137085, + "learning_rate": 0.0001675984554599072, + "loss": 2.6447, + "step": 5348 + }, + { + "epoch": 0.4316842869824873, + "grad_norm": 0.7682301998138428, + "learning_rate": 0.00016758682100335417, + "loss": 2.6738, + "step": 5349 + }, + { + "epoch": 0.4317649907190703, + "grad_norm": 0.8342414498329163, + "learning_rate": 0.00016757518486236087, + "loss": 2.7058, + "step": 5350 + }, + { + "epoch": 0.4318456944556533, + "grad_norm": 0.7410600781440735, + "learning_rate": 0.00016756354703721736, + "loss": 2.6597, + "step": 5351 + }, + { + "epoch": 0.4319263981922363, + "grad_norm": 0.7633174061775208, + "learning_rate": 0.00016755190752821363, + "loss": 2.6461, + "step": 5352 + }, + { + "epoch": 0.4320071019288193, + "grad_norm": 0.7855150103569031, + "learning_rate": 0.00016754026633563973, + "loss": 2.6556, + "step": 5353 + }, + { + "epoch": 0.43208780566540234, + "grad_norm": 0.7197602391242981, + "learning_rate": 0.00016752862345978587, + "loss": 2.6511, + "step": 5354 + }, + { + "epoch": 0.4321685094019853, + "grad_norm": 0.7748876810073853, + "learning_rate": 0.00016751697890094223, + "loss": 2.7, + "step": 5355 + }, + { + "epoch": 0.4322492131385683, + "grad_norm": 0.7457308173179626, + "learning_rate": 0.00016750533265939895, + "loss": 2.6934, + "step": 5356 + }, + { + "epoch": 0.4323299168751513, + "grad_norm": 0.8003394603729248, + "learning_rate": 0.00016749368473544633, + "loss": 2.6273, + "step": 5357 + }, + { + "epoch": 0.4324106206117343, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.00016748203512937464, + "loss": 2.6605, + "step": 5358 + }, + { + "epoch": 0.43249132434831733, + "grad_norm": 0.6859120726585388, + "learning_rate": 0.00016747038384147422, + "loss": 2.6748, + "step": 5359 + }, + { + "epoch": 0.4325720280849003, + "grad_norm": 0.7169440984725952, + "learning_rate": 0.0001674587308720355, + "loss": 2.6674, + "step": 5360 + }, + { + "epoch": 0.43265273182148334, + "grad_norm": 0.7762351036071777, + "learning_rate": 0.00016744707622134888, + "loss": 2.6673, + "step": 5361 + }, + { + "epoch": 0.4327334355580663, + "grad_norm": 0.7169542908668518, + "learning_rate": 0.0001674354198897048, + "loss": 2.7341, + "step": 5362 + }, + { + "epoch": 0.43281413929464935, + "grad_norm": 0.7903403043746948, + "learning_rate": 0.00016742376187739376, + "loss": 2.6019, + "step": 5363 + }, + { + "epoch": 0.4328948430312323, + "grad_norm": 0.8395403027534485, + "learning_rate": 0.00016741210218470634, + "loss": 2.6519, + "step": 5364 + }, + { + "epoch": 0.43297554676781536, + "grad_norm": 0.7521546483039856, + "learning_rate": 0.0001674004408119331, + "loss": 2.6067, + "step": 5365 + }, + { + "epoch": 0.43305625050439833, + "grad_norm": 0.7186779975891113, + "learning_rate": 0.0001673887777593647, + "loss": 2.6435, + "step": 5366 + }, + { + "epoch": 0.43313695424098136, + "grad_norm": 0.7362968921661377, + "learning_rate": 0.0001673771130272918, + "loss": 2.6031, + "step": 5367 + }, + { + "epoch": 0.43321765797756434, + "grad_norm": 0.8033537864685059, + "learning_rate": 0.0001673654466160051, + "loss": 2.7234, + "step": 5368 + }, + { + "epoch": 0.4332983617141474, + "grad_norm": 0.7109711766242981, + "learning_rate": 0.0001673537785257954, + "loss": 2.6621, + "step": 5369 + }, + { + "epoch": 0.43337906545073035, + "grad_norm": 0.7499226927757263, + "learning_rate": 0.0001673421087569535, + "loss": 2.706, + "step": 5370 + }, + { + "epoch": 0.4334597691873134, + "grad_norm": 0.7192875146865845, + "learning_rate": 0.00016733043730977017, + "loss": 2.6053, + "step": 5371 + }, + { + "epoch": 0.43354047292389636, + "grad_norm": 0.6939374208450317, + "learning_rate": 0.00016731876418453636, + "loss": 2.6621, + "step": 5372 + }, + { + "epoch": 0.4336211766604794, + "grad_norm": 0.720741331577301, + "learning_rate": 0.00016730708938154297, + "loss": 2.6358, + "step": 5373 + }, + { + "epoch": 0.43370188039706237, + "grad_norm": 0.6979780793190002, + "learning_rate": 0.00016729541290108095, + "loss": 2.6162, + "step": 5374 + }, + { + "epoch": 0.4337825841336454, + "grad_norm": 0.8014200925827026, + "learning_rate": 0.00016728373474344136, + "loss": 2.6255, + "step": 5375 + }, + { + "epoch": 0.4338632878702284, + "grad_norm": 0.7780057787895203, + "learning_rate": 0.0001672720549089152, + "loss": 2.6257, + "step": 5376 + }, + { + "epoch": 0.4339439916068114, + "grad_norm": 0.7111102938652039, + "learning_rate": 0.00016726037339779358, + "loss": 2.6384, + "step": 5377 + }, + { + "epoch": 0.4340246953433944, + "grad_norm": 0.7077106833457947, + "learning_rate": 0.00016724869021036764, + "loss": 2.6293, + "step": 5378 + }, + { + "epoch": 0.4341053990799774, + "grad_norm": 0.8328250646591187, + "learning_rate": 0.00016723700534692853, + "loss": 2.6186, + "step": 5379 + }, + { + "epoch": 0.4341861028165604, + "grad_norm": 0.6942149996757507, + "learning_rate": 0.00016722531880776752, + "loss": 2.6032, + "step": 5380 + }, + { + "epoch": 0.4342668065531434, + "grad_norm": 0.7180305123329163, + "learning_rate": 0.00016721363059317583, + "loss": 2.6166, + "step": 5381 + }, + { + "epoch": 0.4343475102897264, + "grad_norm": 0.8093443512916565, + "learning_rate": 0.00016720194070344476, + "loss": 2.6596, + "step": 5382 + }, + { + "epoch": 0.43442821402630943, + "grad_norm": 0.7337743043899536, + "learning_rate": 0.00016719024913886568, + "loss": 2.6137, + "step": 5383 + }, + { + "epoch": 0.4345089177628924, + "grad_norm": 0.7590384483337402, + "learning_rate": 0.00016717855589972993, + "loss": 2.6541, + "step": 5384 + }, + { + "epoch": 0.43458962149947544, + "grad_norm": 0.6945257186889648, + "learning_rate": 0.00016716686098632898, + "loss": 2.686, + "step": 5385 + }, + { + "epoch": 0.4346703252360584, + "grad_norm": 0.7175764441490173, + "learning_rate": 0.00016715516439895424, + "loss": 2.6081, + "step": 5386 + }, + { + "epoch": 0.43475102897264145, + "grad_norm": 0.7287259697914124, + "learning_rate": 0.00016714346613789732, + "loss": 2.6462, + "step": 5387 + }, + { + "epoch": 0.43483173270922443, + "grad_norm": 0.6864096522331238, + "learning_rate": 0.00016713176620344964, + "loss": 2.7104, + "step": 5388 + }, + { + "epoch": 0.43491243644580746, + "grad_norm": 0.6554383039474487, + "learning_rate": 0.00016712006459590289, + "loss": 2.6153, + "step": 5389 + }, + { + "epoch": 0.43499314018239044, + "grad_norm": 0.6415165662765503, + "learning_rate": 0.00016710836131554867, + "loss": 2.6198, + "step": 5390 + }, + { + "epoch": 0.43507384391897347, + "grad_norm": 0.6998475193977356, + "learning_rate": 0.00016709665636267869, + "loss": 2.6774, + "step": 5391 + }, + { + "epoch": 0.43515454765555645, + "grad_norm": 0.7437679171562195, + "learning_rate": 0.00016708494973758465, + "loss": 2.6176, + "step": 5392 + }, + { + "epoch": 0.4352352513921395, + "grad_norm": 0.6898311376571655, + "learning_rate": 0.00016707324144055825, + "loss": 2.6194, + "step": 5393 + }, + { + "epoch": 0.43531595512872245, + "grad_norm": 0.7536425590515137, + "learning_rate": 0.00016706153147189138, + "loss": 2.672, + "step": 5394 + }, + { + "epoch": 0.4353966588653055, + "grad_norm": 0.7576118111610413, + "learning_rate": 0.00016704981983187581, + "loss": 2.6473, + "step": 5395 + }, + { + "epoch": 0.43547736260188846, + "grad_norm": 0.7452495098114014, + "learning_rate": 0.00016703810652080349, + "loss": 2.6487, + "step": 5396 + }, + { + "epoch": 0.4355580663384715, + "grad_norm": 0.7817744612693787, + "learning_rate": 0.0001670263915389663, + "loss": 2.61, + "step": 5397 + }, + { + "epoch": 0.43563877007505447, + "grad_norm": 0.7195492386817932, + "learning_rate": 0.00016701467488665624, + "loss": 2.6745, + "step": 5398 + }, + { + "epoch": 0.4357194738116375, + "grad_norm": 0.7703930735588074, + "learning_rate": 0.0001670029565641653, + "loss": 2.7196, + "step": 5399 + }, + { + "epoch": 0.4358001775482205, + "grad_norm": 0.6859520673751831, + "learning_rate": 0.00016699123657178553, + "loss": 2.6317, + "step": 5400 + }, + { + "epoch": 0.4358808812848035, + "grad_norm": 0.7380268573760986, + "learning_rate": 0.00016697951490980903, + "loss": 2.6008, + "step": 5401 + }, + { + "epoch": 0.4359615850213865, + "grad_norm": 0.7903439402580261, + "learning_rate": 0.00016696779157852792, + "loss": 2.6411, + "step": 5402 + }, + { + "epoch": 0.4360422887579695, + "grad_norm": 0.7022606134414673, + "learning_rate": 0.0001669560665782344, + "loss": 2.6153, + "step": 5403 + }, + { + "epoch": 0.4361229924945525, + "grad_norm": 0.8196203112602234, + "learning_rate": 0.00016694433990922068, + "loss": 2.6128, + "step": 5404 + }, + { + "epoch": 0.43620369623113553, + "grad_norm": 0.7342696189880371, + "learning_rate": 0.000166932611571779, + "loss": 2.6802, + "step": 5405 + }, + { + "epoch": 0.4362843999677185, + "grad_norm": 0.7475131154060364, + "learning_rate": 0.0001669208815662017, + "loss": 2.6106, + "step": 5406 + }, + { + "epoch": 0.4363651037043015, + "grad_norm": 0.7067655324935913, + "learning_rate": 0.00016690914989278107, + "loss": 2.6362, + "step": 5407 + }, + { + "epoch": 0.4364458074408845, + "grad_norm": 0.7550163865089417, + "learning_rate": 0.00016689741655180956, + "loss": 2.6256, + "step": 5408 + }, + { + "epoch": 0.4365265111774675, + "grad_norm": 0.7341828346252441, + "learning_rate": 0.00016688568154357952, + "loss": 2.6912, + "step": 5409 + }, + { + "epoch": 0.4366072149140505, + "grad_norm": 0.7501869201660156, + "learning_rate": 0.00016687394486838349, + "loss": 2.7122, + "step": 5410 + }, + { + "epoch": 0.4366879186506335, + "grad_norm": 0.7041562795639038, + "learning_rate": 0.00016686220652651392, + "loss": 2.6755, + "step": 5411 + }, + { + "epoch": 0.43676862238721653, + "grad_norm": 0.7218217253684998, + "learning_rate": 0.00016685046651826338, + "loss": 2.693, + "step": 5412 + }, + { + "epoch": 0.4368493261237995, + "grad_norm": 0.6880577206611633, + "learning_rate": 0.00016683872484392448, + "loss": 2.638, + "step": 5413 + }, + { + "epoch": 0.43693002986038254, + "grad_norm": 0.6864475607872009, + "learning_rate": 0.0001668269815037898, + "loss": 2.6497, + "step": 5414 + }, + { + "epoch": 0.4370107335969655, + "grad_norm": 0.7326167821884155, + "learning_rate": 0.00016681523649815212, + "loss": 2.6858, + "step": 5415 + }, + { + "epoch": 0.43709143733354855, + "grad_norm": 0.6773428320884705, + "learning_rate": 0.00016680348982730405, + "loss": 2.6489, + "step": 5416 + }, + { + "epoch": 0.4371721410701315, + "grad_norm": 0.7117835283279419, + "learning_rate": 0.00016679174149153837, + "loss": 2.6607, + "step": 5417 + }, + { + "epoch": 0.43725284480671456, + "grad_norm": 0.7268334031105042, + "learning_rate": 0.00016677999149114793, + "loss": 2.703, + "step": 5418 + }, + { + "epoch": 0.43733354854329753, + "grad_norm": 0.7672972679138184, + "learning_rate": 0.00016676823982642554, + "loss": 2.5803, + "step": 5419 + }, + { + "epoch": 0.43741425227988057, + "grad_norm": 0.6966733932495117, + "learning_rate": 0.00016675648649766407, + "loss": 2.6149, + "step": 5420 + }, + { + "epoch": 0.43749495601646354, + "grad_norm": 0.752896249294281, + "learning_rate": 0.00016674473150515644, + "loss": 2.7108, + "step": 5421 + }, + { + "epoch": 0.4375756597530466, + "grad_norm": 0.7094796895980835, + "learning_rate": 0.00016673297484919565, + "loss": 2.6989, + "step": 5422 + }, + { + "epoch": 0.43765636348962955, + "grad_norm": 0.7631612420082092, + "learning_rate": 0.00016672121653007465, + "loss": 2.6673, + "step": 5423 + }, + { + "epoch": 0.4377370672262126, + "grad_norm": 0.7083843946456909, + "learning_rate": 0.00016670945654808655, + "loss": 2.6529, + "step": 5424 + }, + { + "epoch": 0.43781777096279556, + "grad_norm": 0.7291569709777832, + "learning_rate": 0.0001666976949035244, + "loss": 2.633, + "step": 5425 + }, + { + "epoch": 0.4378984746993786, + "grad_norm": 0.8351448774337769, + "learning_rate": 0.00016668593159668138, + "loss": 2.5993, + "step": 5426 + }, + { + "epoch": 0.43797917843596157, + "grad_norm": 0.7339642643928528, + "learning_rate": 0.00016667416662785058, + "loss": 2.6486, + "step": 5427 + }, + { + "epoch": 0.4380598821725446, + "grad_norm": 0.7257512211799622, + "learning_rate": 0.00016666239999732526, + "loss": 2.6453, + "step": 5428 + }, + { + "epoch": 0.4381405859091276, + "grad_norm": 0.7282476425170898, + "learning_rate": 0.00016665063170539872, + "loss": 2.6654, + "step": 5429 + }, + { + "epoch": 0.4382212896457106, + "grad_norm": 0.726685643196106, + "learning_rate": 0.00016663886175236417, + "loss": 2.65, + "step": 5430 + }, + { + "epoch": 0.4383019933822936, + "grad_norm": 0.7478880286216736, + "learning_rate": 0.000166627090138515, + "loss": 2.623, + "step": 5431 + }, + { + "epoch": 0.4383826971188766, + "grad_norm": 0.7624948024749756, + "learning_rate": 0.00016661531686414457, + "loss": 2.6438, + "step": 5432 + }, + { + "epoch": 0.4384634008554596, + "grad_norm": 0.8098936676979065, + "learning_rate": 0.00016660354192954633, + "loss": 2.6226, + "step": 5433 + }, + { + "epoch": 0.4385441045920426, + "grad_norm": 0.7305725812911987, + "learning_rate": 0.0001665917653350137, + "loss": 2.6425, + "step": 5434 + }, + { + "epoch": 0.4386248083286256, + "grad_norm": 0.7064421772956848, + "learning_rate": 0.00016657998708084027, + "loss": 2.6069, + "step": 5435 + }, + { + "epoch": 0.43870551206520864, + "grad_norm": 0.8279524445533752, + "learning_rate": 0.00016656820716731945, + "loss": 2.6609, + "step": 5436 + }, + { + "epoch": 0.4387862158017916, + "grad_norm": 0.742659866809845, + "learning_rate": 0.00016655642559474488, + "loss": 2.64, + "step": 5437 + }, + { + "epoch": 0.43886691953837464, + "grad_norm": 0.757780909538269, + "learning_rate": 0.00016654464236341026, + "loss": 2.6546, + "step": 5438 + }, + { + "epoch": 0.4389476232749576, + "grad_norm": 0.7439742684364319, + "learning_rate": 0.00016653285747360918, + "loss": 2.6717, + "step": 5439 + }, + { + "epoch": 0.43902832701154065, + "grad_norm": 0.7529581189155579, + "learning_rate": 0.0001665210709256354, + "loss": 2.6204, + "step": 5440 + }, + { + "epoch": 0.43910903074812363, + "grad_norm": 0.7224153876304626, + "learning_rate": 0.00016650928271978258, + "loss": 2.6417, + "step": 5441 + }, + { + "epoch": 0.43918973448470666, + "grad_norm": 0.6792185306549072, + "learning_rate": 0.00016649749285634462, + "loss": 2.6382, + "step": 5442 + }, + { + "epoch": 0.43927043822128964, + "grad_norm": 0.6887058019638062, + "learning_rate": 0.00016648570133561533, + "loss": 2.6302, + "step": 5443 + }, + { + "epoch": 0.43935114195787267, + "grad_norm": 0.7373671531677246, + "learning_rate": 0.00016647390815788853, + "loss": 2.625, + "step": 5444 + }, + { + "epoch": 0.43943184569445565, + "grad_norm": 0.7595719695091248, + "learning_rate": 0.0001664621133234582, + "loss": 2.6444, + "step": 5445 + }, + { + "epoch": 0.4395125494310387, + "grad_norm": 0.7331473231315613, + "learning_rate": 0.00016645031683261825, + "loss": 2.6308, + "step": 5446 + }, + { + "epoch": 0.43959325316762166, + "grad_norm": 0.7724922895431519, + "learning_rate": 0.0001664385186856627, + "loss": 2.6646, + "step": 5447 + }, + { + "epoch": 0.4396739569042047, + "grad_norm": 0.6960163712501526, + "learning_rate": 0.00016642671888288563, + "loss": 2.6196, + "step": 5448 + }, + { + "epoch": 0.43975466064078766, + "grad_norm": 0.6769189834594727, + "learning_rate": 0.00016641491742458103, + "loss": 2.6558, + "step": 5449 + }, + { + "epoch": 0.4398353643773707, + "grad_norm": 0.7435783743858337, + "learning_rate": 0.0001664031143110431, + "loss": 2.6717, + "step": 5450 + }, + { + "epoch": 0.4399160681139537, + "grad_norm": 0.7234118580818176, + "learning_rate": 0.00016639130954256603, + "loss": 2.6549, + "step": 5451 + }, + { + "epoch": 0.4399967718505367, + "grad_norm": 0.720825731754303, + "learning_rate": 0.00016637950311944392, + "loss": 2.6098, + "step": 5452 + }, + { + "epoch": 0.4400774755871197, + "grad_norm": 0.6977505087852478, + "learning_rate": 0.0001663676950419711, + "loss": 2.6351, + "step": 5453 + }, + { + "epoch": 0.4401581793237027, + "grad_norm": 0.6959076523780823, + "learning_rate": 0.00016635588531044185, + "loss": 2.6918, + "step": 5454 + }, + { + "epoch": 0.4402388830602857, + "grad_norm": 0.7022189497947693, + "learning_rate": 0.00016634407392515044, + "loss": 2.6218, + "step": 5455 + }, + { + "epoch": 0.4403195867968687, + "grad_norm": 0.7147775292396545, + "learning_rate": 0.0001663322608863913, + "loss": 2.6966, + "step": 5456 + }, + { + "epoch": 0.4404002905334517, + "grad_norm": 0.7592755556106567, + "learning_rate": 0.00016632044619445882, + "loss": 2.6326, + "step": 5457 + }, + { + "epoch": 0.4404809942700347, + "grad_norm": 0.6914302110671997, + "learning_rate": 0.00016630862984964745, + "loss": 2.603, + "step": 5458 + }, + { + "epoch": 0.4405616980066177, + "grad_norm": 0.7735368609428406, + "learning_rate": 0.0001662968118522517, + "loss": 2.6666, + "step": 5459 + }, + { + "epoch": 0.4406424017432007, + "grad_norm": 0.7175899744033813, + "learning_rate": 0.00016628499220256612, + "loss": 2.666, + "step": 5460 + }, + { + "epoch": 0.4407231054797837, + "grad_norm": 0.6735796332359314, + "learning_rate": 0.00016627317090088523, + "loss": 2.6451, + "step": 5461 + }, + { + "epoch": 0.4408038092163667, + "grad_norm": 0.72022545337677, + "learning_rate": 0.0001662613479475037, + "loss": 2.6295, + "step": 5462 + }, + { + "epoch": 0.4408845129529497, + "grad_norm": 0.7084751725196838, + "learning_rate": 0.00016624952334271616, + "loss": 2.6633, + "step": 5463 + }, + { + "epoch": 0.4409652166895327, + "grad_norm": 0.7399250864982605, + "learning_rate": 0.00016623769708681735, + "loss": 2.6076, + "step": 5464 + }, + { + "epoch": 0.44104592042611573, + "grad_norm": 0.6904892325401306, + "learning_rate": 0.00016622586918010193, + "loss": 2.6799, + "step": 5465 + }, + { + "epoch": 0.4411266241626987, + "grad_norm": 0.7419006824493408, + "learning_rate": 0.00016621403962286478, + "loss": 2.65, + "step": 5466 + }, + { + "epoch": 0.44120732789928174, + "grad_norm": 0.7201282978057861, + "learning_rate": 0.00016620220841540064, + "loss": 2.6769, + "step": 5467 + }, + { + "epoch": 0.4412880316358647, + "grad_norm": 0.7223218679428101, + "learning_rate": 0.00016619037555800443, + "loss": 2.6342, + "step": 5468 + }, + { + "epoch": 0.44136873537244775, + "grad_norm": 0.7517585754394531, + "learning_rate": 0.00016617854105097104, + "loss": 2.6103, + "step": 5469 + }, + { + "epoch": 0.44144943910903073, + "grad_norm": 0.6765139698982239, + "learning_rate": 0.0001661667048945954, + "loss": 2.624, + "step": 5470 + }, + { + "epoch": 0.44153014284561376, + "grad_norm": 0.7197677493095398, + "learning_rate": 0.00016615486708917255, + "loss": 2.5786, + "step": 5471 + }, + { + "epoch": 0.44161084658219674, + "grad_norm": 0.7196774482727051, + "learning_rate": 0.00016614302763499742, + "loss": 2.6147, + "step": 5472 + }, + { + "epoch": 0.44169155031877977, + "grad_norm": 0.7210293412208557, + "learning_rate": 0.00016613118653236518, + "loss": 2.6526, + "step": 5473 + }, + { + "epoch": 0.44177225405536275, + "grad_norm": 0.6870129108428955, + "learning_rate": 0.00016611934378157092, + "loss": 2.665, + "step": 5474 + }, + { + "epoch": 0.4418529577919458, + "grad_norm": 0.6925365328788757, + "learning_rate": 0.00016610749938290975, + "loss": 2.5734, + "step": 5475 + }, + { + "epoch": 0.44193366152852875, + "grad_norm": 0.7399131655693054, + "learning_rate": 0.0001660956533366769, + "loss": 2.6935, + "step": 5476 + }, + { + "epoch": 0.4420143652651118, + "grad_norm": 0.7348966002464294, + "learning_rate": 0.00016608380564316758, + "loss": 2.6788, + "step": 5477 + }, + { + "epoch": 0.44209506900169476, + "grad_norm": 0.7597334980964661, + "learning_rate": 0.00016607195630267708, + "loss": 2.6732, + "step": 5478 + }, + { + "epoch": 0.4421757727382778, + "grad_norm": 0.6847043037414551, + "learning_rate": 0.00016606010531550072, + "loss": 2.6475, + "step": 5479 + }, + { + "epoch": 0.44225647647486077, + "grad_norm": 0.7065151929855347, + "learning_rate": 0.00016604825268193388, + "loss": 2.6674, + "step": 5480 + }, + { + "epoch": 0.4423371802114438, + "grad_norm": 0.7102208137512207, + "learning_rate": 0.0001660363984022719, + "loss": 2.6723, + "step": 5481 + }, + { + "epoch": 0.4424178839480268, + "grad_norm": 0.6912767887115479, + "learning_rate": 0.00016602454247681024, + "loss": 2.628, + "step": 5482 + }, + { + "epoch": 0.4424985876846098, + "grad_norm": 0.7265123128890991, + "learning_rate": 0.0001660126849058444, + "loss": 2.5935, + "step": 5483 + }, + { + "epoch": 0.4425792914211928, + "grad_norm": 0.8177923560142517, + "learning_rate": 0.0001660008256896699, + "loss": 2.6402, + "step": 5484 + }, + { + "epoch": 0.4426599951577758, + "grad_norm": 0.7196556925773621, + "learning_rate": 0.00016598896482858231, + "loss": 2.6939, + "step": 5485 + }, + { + "epoch": 0.4427406988943588, + "grad_norm": 0.7459850907325745, + "learning_rate": 0.0001659771023228772, + "loss": 2.6343, + "step": 5486 + }, + { + "epoch": 0.44282140263094183, + "grad_norm": 0.7399095892906189, + "learning_rate": 0.00016596523817285024, + "loss": 2.6139, + "step": 5487 + }, + { + "epoch": 0.4429021063675248, + "grad_norm": 0.7517558336257935, + "learning_rate": 0.0001659533723787971, + "loss": 2.6609, + "step": 5488 + }, + { + "epoch": 0.44298281010410784, + "grad_norm": 0.7073537707328796, + "learning_rate": 0.00016594150494101355, + "loss": 2.6326, + "step": 5489 + }, + { + "epoch": 0.4430635138406908, + "grad_norm": 0.7414752244949341, + "learning_rate": 0.0001659296358597953, + "loss": 2.6759, + "step": 5490 + }, + { + "epoch": 0.44314421757727385, + "grad_norm": 0.7636380195617676, + "learning_rate": 0.0001659177651354382, + "loss": 2.5743, + "step": 5491 + }, + { + "epoch": 0.4432249213138568, + "grad_norm": 0.6839539408683777, + "learning_rate": 0.00016590589276823804, + "loss": 2.631, + "step": 5492 + }, + { + "epoch": 0.44330562505043986, + "grad_norm": 0.8057516813278198, + "learning_rate": 0.0001658940187584908, + "loss": 2.6916, + "step": 5493 + }, + { + "epoch": 0.44338632878702283, + "grad_norm": 0.7479767799377441, + "learning_rate": 0.00016588214310649232, + "loss": 2.6811, + "step": 5494 + }, + { + "epoch": 0.44346703252360586, + "grad_norm": 0.7854729294776917, + "learning_rate": 0.00016587026581253866, + "loss": 2.6746, + "step": 5495 + }, + { + "epoch": 0.44354773626018884, + "grad_norm": 0.7782836556434631, + "learning_rate": 0.00016585838687692577, + "loss": 2.61, + "step": 5496 + }, + { + "epoch": 0.4436284399967719, + "grad_norm": 0.7047034502029419, + "learning_rate": 0.00016584650629994968, + "loss": 2.6573, + "step": 5497 + }, + { + "epoch": 0.44370914373335485, + "grad_norm": 0.7398735880851746, + "learning_rate": 0.0001658346240819066, + "loss": 2.6338, + "step": 5498 + }, + { + "epoch": 0.4437898474699379, + "grad_norm": 0.7243468165397644, + "learning_rate": 0.00016582274022309258, + "loss": 2.5898, + "step": 5499 + }, + { + "epoch": 0.44387055120652086, + "grad_norm": 0.7415906190872192, + "learning_rate": 0.00016581085472380376, + "loss": 2.5893, + "step": 5500 + }, + { + "epoch": 0.4439512549431039, + "grad_norm": 0.6935107707977295, + "learning_rate": 0.00016579896758433645, + "loss": 2.6704, + "step": 5501 + }, + { + "epoch": 0.44403195867968687, + "grad_norm": 0.7188034653663635, + "learning_rate": 0.00016578707880498685, + "loss": 2.643, + "step": 5502 + }, + { + "epoch": 0.4441126624162699, + "grad_norm": 0.6697022914886475, + "learning_rate": 0.0001657751883860513, + "loss": 2.6313, + "step": 5503 + }, + { + "epoch": 0.4441933661528529, + "grad_norm": 0.760154664516449, + "learning_rate": 0.00016576329632782613, + "loss": 2.6604, + "step": 5504 + }, + { + "epoch": 0.4442740698894359, + "grad_norm": 0.6883447170257568, + "learning_rate": 0.00016575140263060765, + "loss": 2.64, + "step": 5505 + }, + { + "epoch": 0.4443547736260189, + "grad_norm": 0.8628804683685303, + "learning_rate": 0.0001657395072946924, + "loss": 2.6651, + "step": 5506 + }, + { + "epoch": 0.4444354773626019, + "grad_norm": 0.7125170230865479, + "learning_rate": 0.0001657276103203768, + "loss": 2.7132, + "step": 5507 + }, + { + "epoch": 0.4445161810991849, + "grad_norm": 0.6965304613113403, + "learning_rate": 0.00016571571170795725, + "loss": 2.7109, + "step": 5508 + }, + { + "epoch": 0.44459688483576787, + "grad_norm": 0.720327615737915, + "learning_rate": 0.00016570381145773042, + "loss": 2.6323, + "step": 5509 + }, + { + "epoch": 0.4446775885723509, + "grad_norm": 0.7097898125648499, + "learning_rate": 0.00016569190956999287, + "loss": 2.6461, + "step": 5510 + }, + { + "epoch": 0.4447582923089339, + "grad_norm": 0.7142884731292725, + "learning_rate": 0.0001656800060450412, + "loss": 2.6894, + "step": 5511 + }, + { + "epoch": 0.4448389960455169, + "grad_norm": 0.6992002725601196, + "learning_rate": 0.0001656681008831721, + "loss": 2.6116, + "step": 5512 + }, + { + "epoch": 0.4449196997820999, + "grad_norm": 0.763841450214386, + "learning_rate": 0.00016565619408468227, + "loss": 2.6441, + "step": 5513 + }, + { + "epoch": 0.4450004035186829, + "grad_norm": 0.6958404183387756, + "learning_rate": 0.00016564428564986848, + "loss": 2.5751, + "step": 5514 + }, + { + "epoch": 0.4450811072552659, + "grad_norm": 0.8804046511650085, + "learning_rate": 0.00016563237557902744, + "loss": 2.6353, + "step": 5515 + }, + { + "epoch": 0.4451618109918489, + "grad_norm": 0.744864821434021, + "learning_rate": 0.00016562046387245608, + "loss": 2.6887, + "step": 5516 + }, + { + "epoch": 0.4452425147284319, + "grad_norm": 0.7627978920936584, + "learning_rate": 0.0001656085505304512, + "loss": 2.6347, + "step": 5517 + }, + { + "epoch": 0.44532321846501494, + "grad_norm": 0.7728918194770813, + "learning_rate": 0.00016559663555330975, + "loss": 2.6344, + "step": 5518 + }, + { + "epoch": 0.4454039222015979, + "grad_norm": 0.7853842377662659, + "learning_rate": 0.00016558471894132865, + "loss": 2.7239, + "step": 5519 + }, + { + "epoch": 0.44548462593818094, + "grad_norm": 0.7981860041618347, + "learning_rate": 0.00016557280069480495, + "loss": 2.66, + "step": 5520 + }, + { + "epoch": 0.4455653296747639, + "grad_norm": 0.7555295825004578, + "learning_rate": 0.0001655608808140356, + "loss": 2.6636, + "step": 5521 + }, + { + "epoch": 0.44564603341134695, + "grad_norm": 0.6893854141235352, + "learning_rate": 0.00016554895929931778, + "loss": 2.5999, + "step": 5522 + }, + { + "epoch": 0.44572673714792993, + "grad_norm": 0.7740506529808044, + "learning_rate": 0.0001655370361509485, + "loss": 2.6308, + "step": 5523 + }, + { + "epoch": 0.44580744088451296, + "grad_norm": 0.6956021785736084, + "learning_rate": 0.00016552511136922498, + "loss": 2.6376, + "step": 5524 + }, + { + "epoch": 0.44588814462109594, + "grad_norm": 0.7408841252326965, + "learning_rate": 0.00016551318495444445, + "loss": 2.6644, + "step": 5525 + }, + { + "epoch": 0.44596884835767897, + "grad_norm": 0.7715663313865662, + "learning_rate": 0.000165501256906904, + "loss": 2.6791, + "step": 5526 + }, + { + "epoch": 0.44604955209426195, + "grad_norm": 0.6880629062652588, + "learning_rate": 0.0001654893272269011, + "loss": 2.7209, + "step": 5527 + }, + { + "epoch": 0.446130255830845, + "grad_norm": 0.6765853762626648, + "learning_rate": 0.0001654773959147329, + "loss": 2.6548, + "step": 5528 + }, + { + "epoch": 0.44621095956742796, + "grad_norm": 0.739248514175415, + "learning_rate": 0.00016546546297069688, + "loss": 2.69, + "step": 5529 + }, + { + "epoch": 0.446291663304011, + "grad_norm": 0.7655714750289917, + "learning_rate": 0.00016545352839509038, + "loss": 2.6238, + "step": 5530 + }, + { + "epoch": 0.44637236704059396, + "grad_norm": 0.706068217754364, + "learning_rate": 0.00016544159218821088, + "loss": 2.6528, + "step": 5531 + }, + { + "epoch": 0.446453070777177, + "grad_norm": 0.7411316633224487, + "learning_rate": 0.00016542965435035578, + "loss": 2.7034, + "step": 5532 + }, + { + "epoch": 0.44653377451376, + "grad_norm": 0.6550690531730652, + "learning_rate": 0.0001654177148818227, + "loss": 2.6388, + "step": 5533 + }, + { + "epoch": 0.446614478250343, + "grad_norm": 0.7151147127151489, + "learning_rate": 0.00016540577378290915, + "loss": 2.7382, + "step": 5534 + }, + { + "epoch": 0.446695181986926, + "grad_norm": 0.7343939542770386, + "learning_rate": 0.00016539383105391276, + "loss": 2.6316, + "step": 5535 + }, + { + "epoch": 0.446775885723509, + "grad_norm": 0.702036440372467, + "learning_rate": 0.00016538188669513115, + "loss": 2.6465, + "step": 5536 + }, + { + "epoch": 0.446856589460092, + "grad_norm": 0.7212840914726257, + "learning_rate": 0.00016536994070686197, + "loss": 2.6471, + "step": 5537 + }, + { + "epoch": 0.446937293196675, + "grad_norm": 0.7345479130744934, + "learning_rate": 0.00016535799308940304, + "loss": 2.6746, + "step": 5538 + }, + { + "epoch": 0.447017996933258, + "grad_norm": 0.7447341084480286, + "learning_rate": 0.00016534604384305207, + "loss": 2.6487, + "step": 5539 + }, + { + "epoch": 0.44709870066984103, + "grad_norm": 0.6865687370300293, + "learning_rate": 0.00016533409296810687, + "loss": 2.6202, + "step": 5540 + }, + { + "epoch": 0.447179404406424, + "grad_norm": 0.8210769891738892, + "learning_rate": 0.0001653221404648653, + "loss": 2.7155, + "step": 5541 + }, + { + "epoch": 0.44726010814300704, + "grad_norm": 0.7768925428390503, + "learning_rate": 0.0001653101863336252, + "loss": 2.6011, + "step": 5542 + }, + { + "epoch": 0.44734081187959, + "grad_norm": 0.7160049080848694, + "learning_rate": 0.00016529823057468456, + "loss": 2.6541, + "step": 5543 + }, + { + "epoch": 0.44742151561617305, + "grad_norm": 0.7386900782585144, + "learning_rate": 0.00016528627318834134, + "loss": 2.6586, + "step": 5544 + }, + { + "epoch": 0.447502219352756, + "grad_norm": 0.7415460348129272, + "learning_rate": 0.0001652743141748935, + "loss": 2.7032, + "step": 5545 + }, + { + "epoch": 0.44758292308933906, + "grad_norm": 0.8483054637908936, + "learning_rate": 0.00016526235353463912, + "loss": 2.6145, + "step": 5546 + }, + { + "epoch": 0.44766362682592203, + "grad_norm": 0.7428778409957886, + "learning_rate": 0.00016525039126787629, + "loss": 2.7005, + "step": 5547 + }, + { + "epoch": 0.44774433056250507, + "grad_norm": 0.7214285731315613, + "learning_rate": 0.00016523842737490316, + "loss": 2.6267, + "step": 5548 + }, + { + "epoch": 0.44782503429908804, + "grad_norm": 0.6753950715065002, + "learning_rate": 0.0001652264618560179, + "loss": 2.6732, + "step": 5549 + }, + { + "epoch": 0.4479057380356711, + "grad_norm": 0.6969403028488159, + "learning_rate": 0.00016521449471151867, + "loss": 2.6218, + "step": 5550 + }, + { + "epoch": 0.44798644177225405, + "grad_norm": 0.7562664151191711, + "learning_rate": 0.00016520252594170377, + "loss": 2.69, + "step": 5551 + }, + { + "epoch": 0.4480671455088371, + "grad_norm": 0.6831937432289124, + "learning_rate": 0.0001651905555468715, + "loss": 2.709, + "step": 5552 + }, + { + "epoch": 0.44814784924542006, + "grad_norm": 0.6753427386283875, + "learning_rate": 0.00016517858352732017, + "loss": 2.5852, + "step": 5553 + }, + { + "epoch": 0.4482285529820031, + "grad_norm": 0.7573871612548828, + "learning_rate": 0.00016516660988334815, + "loss": 2.6187, + "step": 5554 + }, + { + "epoch": 0.44830925671858607, + "grad_norm": 0.6424254775047302, + "learning_rate": 0.00016515463461525383, + "loss": 2.6411, + "step": 5555 + }, + { + "epoch": 0.4483899604551691, + "grad_norm": 0.7460073232650757, + "learning_rate": 0.0001651426577233358, + "loss": 2.6239, + "step": 5556 + }, + { + "epoch": 0.4484706641917521, + "grad_norm": 0.6980866193771362, + "learning_rate": 0.0001651306792078924, + "loss": 2.605, + "step": 5557 + }, + { + "epoch": 0.4485513679283351, + "grad_norm": 0.7376009225845337, + "learning_rate": 0.00016511869906922217, + "loss": 2.7114, + "step": 5558 + }, + { + "epoch": 0.4486320716649181, + "grad_norm": 0.7227364778518677, + "learning_rate": 0.0001651067173076238, + "loss": 2.6212, + "step": 5559 + }, + { + "epoch": 0.44871277540150106, + "grad_norm": 0.8989635705947876, + "learning_rate": 0.00016509473392339584, + "loss": 2.671, + "step": 5560 + }, + { + "epoch": 0.4487934791380841, + "grad_norm": 0.7273553609848022, + "learning_rate": 0.0001650827489168369, + "loss": 2.6556, + "step": 5561 + }, + { + "epoch": 0.44887418287466707, + "grad_norm": 0.839439868927002, + "learning_rate": 0.00016507076228824578, + "loss": 2.6959, + "step": 5562 + }, + { + "epoch": 0.4489548866112501, + "grad_norm": 0.6912770867347717, + "learning_rate": 0.00016505877403792115, + "loss": 2.6709, + "step": 5563 + }, + { + "epoch": 0.4490355903478331, + "grad_norm": 0.7850949168205261, + "learning_rate": 0.00016504678416616182, + "loss": 2.7257, + "step": 5564 + }, + { + "epoch": 0.4491162940844161, + "grad_norm": 0.7768355011940002, + "learning_rate": 0.0001650347926732666, + "loss": 2.5939, + "step": 5565 + }, + { + "epoch": 0.4491969978209991, + "grad_norm": 0.6518398523330688, + "learning_rate": 0.0001650227995595343, + "loss": 2.6589, + "step": 5566 + }, + { + "epoch": 0.4492777015575821, + "grad_norm": 0.6855975389480591, + "learning_rate": 0.0001650108048252639, + "loss": 2.6372, + "step": 5567 + }, + { + "epoch": 0.4493584052941651, + "grad_norm": 0.7176938056945801, + "learning_rate": 0.0001649988084707543, + "loss": 2.6506, + "step": 5568 + }, + { + "epoch": 0.44943910903074813, + "grad_norm": 0.735335648059845, + "learning_rate": 0.00016498681049630448, + "loss": 2.608, + "step": 5569 + }, + { + "epoch": 0.4495198127673311, + "grad_norm": 0.6862306594848633, + "learning_rate": 0.00016497481090221346, + "loss": 2.5982, + "step": 5570 + }, + { + "epoch": 0.44960051650391414, + "grad_norm": 0.7213380336761475, + "learning_rate": 0.0001649628096887803, + "loss": 2.6457, + "step": 5571 + }, + { + "epoch": 0.4496812202404971, + "grad_norm": 0.7118985652923584, + "learning_rate": 0.0001649508068563041, + "loss": 2.6321, + "step": 5572 + }, + { + "epoch": 0.44976192397708015, + "grad_norm": 0.7663396596908569, + "learning_rate": 0.00016493880240508405, + "loss": 2.5865, + "step": 5573 + }, + { + "epoch": 0.4498426277136631, + "grad_norm": 0.6854543089866638, + "learning_rate": 0.00016492679633541926, + "loss": 2.6536, + "step": 5574 + }, + { + "epoch": 0.44992333145024616, + "grad_norm": 0.7071701884269714, + "learning_rate": 0.000164914788647609, + "loss": 2.6149, + "step": 5575 + }, + { + "epoch": 0.45000403518682913, + "grad_norm": 0.7610478401184082, + "learning_rate": 0.00016490277934195252, + "loss": 2.6326, + "step": 5576 + }, + { + "epoch": 0.45008473892341216, + "grad_norm": 0.7117596864700317, + "learning_rate": 0.0001648907684187491, + "loss": 2.6938, + "step": 5577 + }, + { + "epoch": 0.45016544265999514, + "grad_norm": 0.6980494856834412, + "learning_rate": 0.00016487875587829813, + "loss": 2.6798, + "step": 5578 + }, + { + "epoch": 0.4502461463965782, + "grad_norm": 0.7957972288131714, + "learning_rate": 0.00016486674172089898, + "loss": 2.6029, + "step": 5579 + }, + { + "epoch": 0.45032685013316115, + "grad_norm": 0.7258082032203674, + "learning_rate": 0.00016485472594685103, + "loss": 2.6785, + "step": 5580 + }, + { + "epoch": 0.4504075538697442, + "grad_norm": 0.7402041554450989, + "learning_rate": 0.0001648427085564538, + "loss": 2.6263, + "step": 5581 + }, + { + "epoch": 0.45048825760632716, + "grad_norm": 0.6943814158439636, + "learning_rate": 0.00016483068955000673, + "loss": 2.6761, + "step": 5582 + }, + { + "epoch": 0.4505689613429102, + "grad_norm": 0.8021644353866577, + "learning_rate": 0.00016481866892780947, + "loss": 2.6376, + "step": 5583 + }, + { + "epoch": 0.45064966507949317, + "grad_norm": 0.7748533487319946, + "learning_rate": 0.0001648066466901615, + "loss": 2.7465, + "step": 5584 + }, + { + "epoch": 0.4507303688160762, + "grad_norm": 0.7432222366333008, + "learning_rate": 0.00016479462283736248, + "loss": 2.6368, + "step": 5585 + }, + { + "epoch": 0.4508110725526592, + "grad_norm": 0.7835286259651184, + "learning_rate": 0.00016478259736971214, + "loss": 2.6449, + "step": 5586 + }, + { + "epoch": 0.4508917762892422, + "grad_norm": 0.7372995018959045, + "learning_rate": 0.00016477057028751007, + "loss": 2.6091, + "step": 5587 + }, + { + "epoch": 0.4509724800258252, + "grad_norm": 0.8230665326118469, + "learning_rate": 0.0001647585415910561, + "loss": 2.6345, + "step": 5588 + }, + { + "epoch": 0.4510531837624082, + "grad_norm": 0.7490825057029724, + "learning_rate": 0.00016474651128065002, + "loss": 2.5996, + "step": 5589 + }, + { + "epoch": 0.4511338874989912, + "grad_norm": 0.7950569987297058, + "learning_rate": 0.00016473447935659157, + "loss": 2.7109, + "step": 5590 + }, + { + "epoch": 0.4512145912355742, + "grad_norm": 0.7648342251777649, + "learning_rate": 0.00016472244581918074, + "loss": 2.6268, + "step": 5591 + }, + { + "epoch": 0.4512952949721572, + "grad_norm": 0.726828396320343, + "learning_rate": 0.00016471041066871733, + "loss": 2.5959, + "step": 5592 + }, + { + "epoch": 0.45137599870874023, + "grad_norm": 0.7855841517448425, + "learning_rate": 0.00016469837390550133, + "loss": 2.6671, + "step": 5593 + }, + { + "epoch": 0.4514567024453232, + "grad_norm": 0.6858882904052734, + "learning_rate": 0.00016468633552983275, + "loss": 2.6003, + "step": 5594 + }, + { + "epoch": 0.45153740618190624, + "grad_norm": 0.710926353931427, + "learning_rate": 0.0001646742955420116, + "loss": 2.6049, + "step": 5595 + }, + { + "epoch": 0.4516181099184892, + "grad_norm": 0.8359978199005127, + "learning_rate": 0.0001646622539423379, + "loss": 2.6636, + "step": 5596 + }, + { + "epoch": 0.45169881365507225, + "grad_norm": 0.7628041505813599, + "learning_rate": 0.00016465021073111186, + "loss": 2.6586, + "step": 5597 + }, + { + "epoch": 0.4517795173916552, + "grad_norm": 0.7723419666290283, + "learning_rate": 0.00016463816590863356, + "loss": 2.6213, + "step": 5598 + }, + { + "epoch": 0.45186022112823826, + "grad_norm": 0.7210986018180847, + "learning_rate": 0.0001646261194752032, + "loss": 2.6674, + "step": 5599 + }, + { + "epoch": 0.45194092486482124, + "grad_norm": 0.7665949463844299, + "learning_rate": 0.00016461407143112097, + "loss": 2.68, + "step": 5600 + }, + { + "epoch": 0.45202162860140427, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016460202177668722, + "loss": 2.6473, + "step": 5601 + }, + { + "epoch": 0.45210233233798724, + "grad_norm": 0.6831738948822021, + "learning_rate": 0.0001645899705122022, + "loss": 2.6863, + "step": 5602 + }, + { + "epoch": 0.4521830360745703, + "grad_norm": 0.7006321549415588, + "learning_rate": 0.00016457791763796627, + "loss": 2.6242, + "step": 5603 + }, + { + "epoch": 0.45226373981115325, + "grad_norm": 0.7245663404464722, + "learning_rate": 0.00016456586315427983, + "loss": 2.6201, + "step": 5604 + }, + { + "epoch": 0.4523444435477363, + "grad_norm": 0.7444287538528442, + "learning_rate": 0.00016455380706144332, + "loss": 2.6684, + "step": 5605 + }, + { + "epoch": 0.45242514728431926, + "grad_norm": 0.6562673449516296, + "learning_rate": 0.00016454174935975714, + "loss": 2.5912, + "step": 5606 + }, + { + "epoch": 0.4525058510209023, + "grad_norm": 0.6494336724281311, + "learning_rate": 0.0001645296900495219, + "loss": 2.6245, + "step": 5607 + }, + { + "epoch": 0.45258655475748527, + "grad_norm": 0.6968161463737488, + "learning_rate": 0.0001645176291310381, + "loss": 2.6494, + "step": 5608 + }, + { + "epoch": 0.4526672584940683, + "grad_norm": 0.7351142764091492, + "learning_rate": 0.00016450556660460632, + "loss": 2.574, + "step": 5609 + }, + { + "epoch": 0.4527479622306513, + "grad_norm": 0.7522323131561279, + "learning_rate": 0.0001644935024705272, + "loss": 2.6512, + "step": 5610 + }, + { + "epoch": 0.45282866596723426, + "grad_norm": 0.6744225025177002, + "learning_rate": 0.0001644814367291014, + "loss": 2.6288, + "step": 5611 + }, + { + "epoch": 0.4529093697038173, + "grad_norm": 0.6933234333992004, + "learning_rate": 0.00016446936938062967, + "loss": 2.6076, + "step": 5612 + }, + { + "epoch": 0.45299007344040026, + "grad_norm": 0.7101204991340637, + "learning_rate": 0.00016445730042541272, + "loss": 2.6322, + "step": 5613 + }, + { + "epoch": 0.4530707771769833, + "grad_norm": 0.7647581696510315, + "learning_rate": 0.00016444522986375134, + "loss": 2.7021, + "step": 5614 + }, + { + "epoch": 0.4531514809135663, + "grad_norm": 0.7028820514678955, + "learning_rate": 0.00016443315769594635, + "loss": 2.6171, + "step": 5615 + }, + { + "epoch": 0.4532321846501493, + "grad_norm": 0.6933851838111877, + "learning_rate": 0.00016442108392229868, + "loss": 2.6119, + "step": 5616 + }, + { + "epoch": 0.4533128883867323, + "grad_norm": 0.7218462824821472, + "learning_rate": 0.0001644090085431092, + "loss": 2.6661, + "step": 5617 + }, + { + "epoch": 0.4533935921233153, + "grad_norm": 0.7390525341033936, + "learning_rate": 0.00016439693155867883, + "loss": 2.7084, + "step": 5618 + }, + { + "epoch": 0.4534742958598983, + "grad_norm": 0.734136164188385, + "learning_rate": 0.0001643848529693086, + "loss": 2.6896, + "step": 5619 + }, + { + "epoch": 0.4535549995964813, + "grad_norm": 0.8082060813903809, + "learning_rate": 0.00016437277277529954, + "loss": 2.5828, + "step": 5620 + }, + { + "epoch": 0.4536357033330643, + "grad_norm": 0.695988655090332, + "learning_rate": 0.0001643606909769527, + "loss": 2.6383, + "step": 5621 + }, + { + "epoch": 0.45371640706964733, + "grad_norm": 0.7415786385536194, + "learning_rate": 0.00016434860757456922, + "loss": 2.6388, + "step": 5622 + }, + { + "epoch": 0.4537971108062303, + "grad_norm": 0.7378649115562439, + "learning_rate": 0.0001643365225684502, + "loss": 2.6534, + "step": 5623 + }, + { + "epoch": 0.45387781454281334, + "grad_norm": 0.7686129808425903, + "learning_rate": 0.0001643244359588969, + "loss": 2.6637, + "step": 5624 + }, + { + "epoch": 0.4539585182793963, + "grad_norm": 0.7305558323860168, + "learning_rate": 0.00016431234774621047, + "loss": 2.6525, + "step": 5625 + }, + { + "epoch": 0.45403922201597935, + "grad_norm": 0.7994235157966614, + "learning_rate": 0.00016430025793069225, + "loss": 2.6316, + "step": 5626 + }, + { + "epoch": 0.4541199257525623, + "grad_norm": 0.6945801377296448, + "learning_rate": 0.0001642881665126435, + "loss": 2.6367, + "step": 5627 + }, + { + "epoch": 0.45420062948914536, + "grad_norm": 0.6855447292327881, + "learning_rate": 0.00016427607349236558, + "loss": 2.6317, + "step": 5628 + }, + { + "epoch": 0.45428133322572833, + "grad_norm": 0.6961888670921326, + "learning_rate": 0.00016426397887015992, + "loss": 2.6477, + "step": 5629 + }, + { + "epoch": 0.45436203696231137, + "grad_norm": 0.7531994581222534, + "learning_rate": 0.0001642518826463279, + "loss": 2.7219, + "step": 5630 + }, + { + "epoch": 0.45444274069889434, + "grad_norm": 0.7442335486412048, + "learning_rate": 0.00016423978482117102, + "loss": 2.706, + "step": 5631 + }, + { + "epoch": 0.4545234444354774, + "grad_norm": 0.7075700759887695, + "learning_rate": 0.00016422768539499076, + "loss": 2.6481, + "step": 5632 + }, + { + "epoch": 0.45460414817206035, + "grad_norm": 0.7831876873970032, + "learning_rate": 0.0001642155843680887, + "loss": 2.616, + "step": 5633 + }, + { + "epoch": 0.4546848519086434, + "grad_norm": 0.7514604926109314, + "learning_rate": 0.00016420348174076642, + "loss": 2.6282, + "step": 5634 + }, + { + "epoch": 0.45476555564522636, + "grad_norm": 0.7136685252189636, + "learning_rate": 0.0001641913775133255, + "loss": 2.6764, + "step": 5635 + }, + { + "epoch": 0.4548462593818094, + "grad_norm": 0.7406740784645081, + "learning_rate": 0.00016417927168606771, + "loss": 2.6126, + "step": 5636 + }, + { + "epoch": 0.45492696311839237, + "grad_norm": 0.7257869839668274, + "learning_rate": 0.0001641671642592947, + "loss": 2.6035, + "step": 5637 + }, + { + "epoch": 0.4550076668549754, + "grad_norm": 0.8378798961639404, + "learning_rate": 0.00016415505523330822, + "loss": 2.6657, + "step": 5638 + }, + { + "epoch": 0.4550883705915584, + "grad_norm": 0.7218836545944214, + "learning_rate": 0.00016414294460841003, + "loss": 2.6209, + "step": 5639 + }, + { + "epoch": 0.4551690743281414, + "grad_norm": 0.7792766690254211, + "learning_rate": 0.00016413083238490204, + "loss": 2.7208, + "step": 5640 + }, + { + "epoch": 0.4552497780647244, + "grad_norm": 0.7800823450088501, + "learning_rate": 0.000164118718563086, + "loss": 2.6351, + "step": 5641 + }, + { + "epoch": 0.4553304818013074, + "grad_norm": 0.7593275904655457, + "learning_rate": 0.00016410660314326395, + "loss": 2.7025, + "step": 5642 + }, + { + "epoch": 0.4554111855378904, + "grad_norm": 0.7561587691307068, + "learning_rate": 0.00016409448612573772, + "loss": 2.6188, + "step": 5643 + }, + { + "epoch": 0.4554918892744734, + "grad_norm": 0.7674516439437866, + "learning_rate": 0.00016408236751080937, + "loss": 2.629, + "step": 5644 + }, + { + "epoch": 0.4555725930110564, + "grad_norm": 0.7112495303153992, + "learning_rate": 0.00016407024729878095, + "loss": 2.6261, + "step": 5645 + }, + { + "epoch": 0.45565329674763944, + "grad_norm": 0.6861695647239685, + "learning_rate": 0.00016405812548995444, + "loss": 2.6984, + "step": 5646 + }, + { + "epoch": 0.4557340004842224, + "grad_norm": 0.7711648941040039, + "learning_rate": 0.000164046002084632, + "loss": 2.6839, + "step": 5647 + }, + { + "epoch": 0.45581470422080544, + "grad_norm": 0.6862967014312744, + "learning_rate": 0.00016403387708311578, + "loss": 2.5964, + "step": 5648 + }, + { + "epoch": 0.4558954079573884, + "grad_norm": 0.707374632358551, + "learning_rate": 0.00016402175048570793, + "loss": 2.6191, + "step": 5649 + }, + { + "epoch": 0.45597611169397145, + "grad_norm": 0.7980892658233643, + "learning_rate": 0.00016400962229271072, + "loss": 2.6288, + "step": 5650 + }, + { + "epoch": 0.45605681543055443, + "grad_norm": 0.686187744140625, + "learning_rate": 0.0001639974925044264, + "loss": 2.6277, + "step": 5651 + }, + { + "epoch": 0.45613751916713746, + "grad_norm": 0.6970425844192505, + "learning_rate": 0.0001639853611211573, + "loss": 2.5726, + "step": 5652 + }, + { + "epoch": 0.45621822290372044, + "grad_norm": 0.701500415802002, + "learning_rate": 0.00016397322814320573, + "loss": 2.6275, + "step": 5653 + }, + { + "epoch": 0.45629892664030347, + "grad_norm": 0.8432207107543945, + "learning_rate": 0.00016396109357087407, + "loss": 2.6185, + "step": 5654 + }, + { + "epoch": 0.45637963037688645, + "grad_norm": 0.7049770951271057, + "learning_rate": 0.00016394895740446476, + "loss": 2.674, + "step": 5655 + }, + { + "epoch": 0.4564603341134695, + "grad_norm": 0.7068646550178528, + "learning_rate": 0.00016393681964428026, + "loss": 2.6072, + "step": 5656 + }, + { + "epoch": 0.45654103785005246, + "grad_norm": 0.7698760032653809, + "learning_rate": 0.00016392468029062312, + "loss": 2.6547, + "step": 5657 + }, + { + "epoch": 0.4566217415866355, + "grad_norm": 0.7381031513214111, + "learning_rate": 0.00016391253934379583, + "loss": 2.6125, + "step": 5658 + }, + { + "epoch": 0.45670244532321846, + "grad_norm": 0.7367781400680542, + "learning_rate": 0.00016390039680410097, + "loss": 2.6763, + "step": 5659 + }, + { + "epoch": 0.4567831490598015, + "grad_norm": 0.7416272759437561, + "learning_rate": 0.00016388825267184121, + "loss": 2.7059, + "step": 5660 + }, + { + "epoch": 0.4568638527963845, + "grad_norm": 0.6933416724205017, + "learning_rate": 0.0001638761069473192, + "loss": 2.6028, + "step": 5661 + }, + { + "epoch": 0.45694455653296745, + "grad_norm": 0.7311314940452576, + "learning_rate": 0.00016386395963083756, + "loss": 2.6266, + "step": 5662 + }, + { + "epoch": 0.4570252602695505, + "grad_norm": 0.7172734141349792, + "learning_rate": 0.00016385181072269917, + "loss": 2.6754, + "step": 5663 + }, + { + "epoch": 0.45710596400613346, + "grad_norm": 0.7286428213119507, + "learning_rate": 0.00016383966022320671, + "loss": 2.6637, + "step": 5664 + }, + { + "epoch": 0.4571866677427165, + "grad_norm": 0.7296474575996399, + "learning_rate": 0.00016382750813266308, + "loss": 2.6655, + "step": 5665 + }, + { + "epoch": 0.45726737147929947, + "grad_norm": 0.6929224133491516, + "learning_rate": 0.00016381535445137105, + "loss": 2.6376, + "step": 5666 + }, + { + "epoch": 0.4573480752158825, + "grad_norm": 0.7012765407562256, + "learning_rate": 0.0001638031991796336, + "loss": 2.6222, + "step": 5667 + }, + { + "epoch": 0.4574287789524655, + "grad_norm": 0.7360745668411255, + "learning_rate": 0.00016379104231775368, + "loss": 2.6304, + "step": 5668 + }, + { + "epoch": 0.4575094826890485, + "grad_norm": 0.7276801466941833, + "learning_rate": 0.00016377888386603419, + "loss": 2.7046, + "step": 5669 + }, + { + "epoch": 0.4575901864256315, + "grad_norm": 0.688432514667511, + "learning_rate": 0.0001637667238247782, + "loss": 2.6598, + "step": 5670 + }, + { + "epoch": 0.4576708901622145, + "grad_norm": 0.6874414682388306, + "learning_rate": 0.00016375456219428877, + "loss": 2.7, + "step": 5671 + }, + { + "epoch": 0.4577515938987975, + "grad_norm": 0.711091160774231, + "learning_rate": 0.000163742398974869, + "loss": 2.6063, + "step": 5672 + }, + { + "epoch": 0.4578322976353805, + "grad_norm": 0.7131791710853577, + "learning_rate": 0.000163730234166822, + "loss": 2.5948, + "step": 5673 + }, + { + "epoch": 0.4579130013719635, + "grad_norm": 0.7166630625724792, + "learning_rate": 0.000163718067770451, + "loss": 2.6488, + "step": 5674 + }, + { + "epoch": 0.45799370510854653, + "grad_norm": 0.7285952568054199, + "learning_rate": 0.00016370589978605916, + "loss": 2.6445, + "step": 5675 + }, + { + "epoch": 0.4580744088451295, + "grad_norm": 0.728050172328949, + "learning_rate": 0.0001636937302139498, + "loss": 2.5425, + "step": 5676 + }, + { + "epoch": 0.45815511258171254, + "grad_norm": 0.7196047902107239, + "learning_rate": 0.00016368155905442615, + "loss": 2.7426, + "step": 5677 + }, + { + "epoch": 0.4582358163182955, + "grad_norm": 0.6844602823257446, + "learning_rate": 0.0001636693863077916, + "loss": 2.6157, + "step": 5678 + }, + { + "epoch": 0.45831652005487855, + "grad_norm": 0.7375781536102295, + "learning_rate": 0.0001636572119743495, + "loss": 2.7069, + "step": 5679 + }, + { + "epoch": 0.4583972237914615, + "grad_norm": 0.7667750120162964, + "learning_rate": 0.0001636450360544033, + "loss": 2.6589, + "step": 5680 + }, + { + "epoch": 0.45847792752804456, + "grad_norm": 0.6569861173629761, + "learning_rate": 0.00016363285854825642, + "loss": 2.6197, + "step": 5681 + }, + { + "epoch": 0.45855863126462754, + "grad_norm": 0.7177335023880005, + "learning_rate": 0.00016362067945621239, + "loss": 2.6104, + "step": 5682 + }, + { + "epoch": 0.45863933500121057, + "grad_norm": 0.7260481715202332, + "learning_rate": 0.00016360849877857469, + "loss": 2.6435, + "step": 5683 + }, + { + "epoch": 0.45872003873779355, + "grad_norm": 0.7083989381790161, + "learning_rate": 0.00016359631651564693, + "loss": 2.6366, + "step": 5684 + }, + { + "epoch": 0.4588007424743766, + "grad_norm": 0.6417020559310913, + "learning_rate": 0.00016358413266773271, + "loss": 2.6311, + "step": 5685 + }, + { + "epoch": 0.45888144621095955, + "grad_norm": 0.737856924533844, + "learning_rate": 0.0001635719472351357, + "loss": 2.6647, + "step": 5686 + }, + { + "epoch": 0.4589621499475426, + "grad_norm": 0.6774190068244934, + "learning_rate": 0.0001635597602181596, + "loss": 2.6366, + "step": 5687 + }, + { + "epoch": 0.45904285368412556, + "grad_norm": 0.6480480432510376, + "learning_rate": 0.0001635475716171081, + "loss": 2.6501, + "step": 5688 + }, + { + "epoch": 0.4591235574207086, + "grad_norm": 0.7886860370635986, + "learning_rate": 0.0001635353814322851, + "loss": 2.7239, + "step": 5689 + }, + { + "epoch": 0.45920426115729157, + "grad_norm": 0.7579021453857422, + "learning_rate": 0.0001635231896639942, + "loss": 2.6155, + "step": 5690 + }, + { + "epoch": 0.4592849648938746, + "grad_norm": 0.6853809356689453, + "learning_rate": 0.0001635109963125394, + "loss": 2.5933, + "step": 5691 + }, + { + "epoch": 0.4593656686304576, + "grad_norm": 0.661342978477478, + "learning_rate": 0.00016349880137822456, + "loss": 2.6277, + "step": 5692 + }, + { + "epoch": 0.4594463723670406, + "grad_norm": 0.6795682311058044, + "learning_rate": 0.0001634866048613536, + "loss": 2.6221, + "step": 5693 + }, + { + "epoch": 0.4595270761036236, + "grad_norm": 0.7375383377075195, + "learning_rate": 0.00016347440676223047, + "loss": 2.6082, + "step": 5694 + }, + { + "epoch": 0.4596077798402066, + "grad_norm": 0.7565153241157532, + "learning_rate": 0.0001634622070811592, + "loss": 2.6615, + "step": 5695 + }, + { + "epoch": 0.4596884835767896, + "grad_norm": 0.6869745254516602, + "learning_rate": 0.00016345000581844386, + "loss": 2.6172, + "step": 5696 + }, + { + "epoch": 0.45976918731337263, + "grad_norm": 0.7192853689193726, + "learning_rate": 0.0001634378029743885, + "loss": 2.6324, + "step": 5697 + }, + { + "epoch": 0.4598498910499556, + "grad_norm": 0.6919218301773071, + "learning_rate": 0.00016342559854929726, + "loss": 2.5965, + "step": 5698 + }, + { + "epoch": 0.45993059478653864, + "grad_norm": 0.6715282797813416, + "learning_rate": 0.00016341339254347432, + "loss": 2.6225, + "step": 5699 + }, + { + "epoch": 0.4600112985231216, + "grad_norm": 0.6768380999565125, + "learning_rate": 0.00016340118495722388, + "loss": 2.6376, + "step": 5700 + }, + { + "epoch": 0.46009200225970465, + "grad_norm": 0.6898325681686401, + "learning_rate": 0.00016338897579085018, + "loss": 2.667, + "step": 5701 + }, + { + "epoch": 0.4601727059962876, + "grad_norm": 0.7171810865402222, + "learning_rate": 0.00016337676504465747, + "loss": 2.678, + "step": 5702 + }, + { + "epoch": 0.46025340973287066, + "grad_norm": 0.7050724029541016, + "learning_rate": 0.00016336455271895016, + "loss": 2.619, + "step": 5703 + }, + { + "epoch": 0.46033411346945363, + "grad_norm": 0.8287240862846375, + "learning_rate": 0.00016335233881403248, + "loss": 2.71, + "step": 5704 + }, + { + "epoch": 0.46041481720603666, + "grad_norm": 0.6880568861961365, + "learning_rate": 0.000163340123330209, + "loss": 2.6516, + "step": 5705 + }, + { + "epoch": 0.46049552094261964, + "grad_norm": 0.7222896218299866, + "learning_rate": 0.00016332790626778402, + "loss": 2.5899, + "step": 5706 + }, + { + "epoch": 0.4605762246792027, + "grad_norm": 0.7707448601722717, + "learning_rate": 0.00016331568762706207, + "loss": 2.6116, + "step": 5707 + }, + { + "epoch": 0.46065692841578565, + "grad_norm": 0.7780653834342957, + "learning_rate": 0.0001633034674083477, + "loss": 2.6072, + "step": 5708 + }, + { + "epoch": 0.4607376321523687, + "grad_norm": 0.7551524639129639, + "learning_rate": 0.00016329124561194545, + "loss": 2.548, + "step": 5709 + }, + { + "epoch": 0.46081833588895166, + "grad_norm": 0.9312284588813782, + "learning_rate": 0.0001632790222381599, + "loss": 2.6557, + "step": 5710 + }, + { + "epoch": 0.4608990396255347, + "grad_norm": 0.7404753565788269, + "learning_rate": 0.0001632667972872957, + "loss": 2.6889, + "step": 5711 + }, + { + "epoch": 0.46097974336211767, + "grad_norm": 0.7423726916313171, + "learning_rate": 0.00016325457075965752, + "loss": 2.6265, + "step": 5712 + }, + { + "epoch": 0.46106044709870064, + "grad_norm": 1.0683187246322632, + "learning_rate": 0.0001632423426555501, + "loss": 2.6827, + "step": 5713 + }, + { + "epoch": 0.4611411508352837, + "grad_norm": 0.7204160094261169, + "learning_rate": 0.0001632301129752782, + "loss": 2.702, + "step": 5714 + }, + { + "epoch": 0.46122185457186665, + "grad_norm": 0.7591153383255005, + "learning_rate": 0.0001632178817191466, + "loss": 2.6031, + "step": 5715 + }, + { + "epoch": 0.4613025583084497, + "grad_norm": 0.8147456645965576, + "learning_rate": 0.00016320564888746013, + "loss": 2.6117, + "step": 5716 + }, + { + "epoch": 0.46138326204503266, + "grad_norm": 0.7880246639251709, + "learning_rate": 0.00016319341448052364, + "loss": 2.5896, + "step": 5717 + }, + { + "epoch": 0.4614639657816157, + "grad_norm": 0.6875137686729431, + "learning_rate": 0.00016318117849864206, + "loss": 2.6258, + "step": 5718 + }, + { + "epoch": 0.46154466951819867, + "grad_norm": 0.7197960615158081, + "learning_rate": 0.00016316894094212044, + "loss": 2.6656, + "step": 5719 + }, + { + "epoch": 0.4616253732547817, + "grad_norm": 0.7049540281295776, + "learning_rate": 0.0001631567018112636, + "loss": 2.6698, + "step": 5720 + }, + { + "epoch": 0.4617060769913647, + "grad_norm": 0.7128825783729553, + "learning_rate": 0.00016314446110637668, + "loss": 2.6552, + "step": 5721 + }, + { + "epoch": 0.4617867807279477, + "grad_norm": 0.7956201434135437, + "learning_rate": 0.00016313221882776477, + "loss": 2.6747, + "step": 5722 + }, + { + "epoch": 0.4618674844645307, + "grad_norm": 0.7598347663879395, + "learning_rate": 0.0001631199749757329, + "loss": 2.6187, + "step": 5723 + }, + { + "epoch": 0.4619481882011137, + "grad_norm": 0.6587582230567932, + "learning_rate": 0.00016310772955058627, + "loss": 2.596, + "step": 5724 + }, + { + "epoch": 0.4620288919376967, + "grad_norm": 0.700136125087738, + "learning_rate": 0.00016309548255263003, + "loss": 2.6527, + "step": 5725 + }, + { + "epoch": 0.4621095956742797, + "grad_norm": 0.7246582508087158, + "learning_rate": 0.00016308323398216945, + "loss": 2.6577, + "step": 5726 + }, + { + "epoch": 0.4621902994108627, + "grad_norm": 0.6951557993888855, + "learning_rate": 0.00016307098383950977, + "loss": 2.5816, + "step": 5727 + }, + { + "epoch": 0.46227100314744574, + "grad_norm": 0.7109191417694092, + "learning_rate": 0.0001630587321249563, + "loss": 2.6586, + "step": 5728 + }, + { + "epoch": 0.4623517068840287, + "grad_norm": 0.7357863783836365, + "learning_rate": 0.0001630464788388144, + "loss": 2.691, + "step": 5729 + }, + { + "epoch": 0.46243241062061174, + "grad_norm": 0.7916350960731506, + "learning_rate": 0.00016303422398138945, + "loss": 2.6584, + "step": 5730 + }, + { + "epoch": 0.4625131143571947, + "grad_norm": 0.6543231010437012, + "learning_rate": 0.00016302196755298685, + "loss": 2.6482, + "step": 5731 + }, + { + "epoch": 0.46259381809377775, + "grad_norm": 0.6978787183761597, + "learning_rate": 0.00016300970955391208, + "loss": 2.5956, + "step": 5732 + }, + { + "epoch": 0.46267452183036073, + "grad_norm": 0.7301886677742004, + "learning_rate": 0.00016299744998447065, + "loss": 2.6178, + "step": 5733 + }, + { + "epoch": 0.46275522556694376, + "grad_norm": 0.7381030321121216, + "learning_rate": 0.00016298518884496808, + "loss": 2.6712, + "step": 5734 + }, + { + "epoch": 0.46283592930352674, + "grad_norm": 0.7769027948379517, + "learning_rate": 0.00016297292613570995, + "loss": 2.6082, + "step": 5735 + }, + { + "epoch": 0.46291663304010977, + "grad_norm": 0.7698354721069336, + "learning_rate": 0.0001629606618570019, + "loss": 2.6543, + "step": 5736 + }, + { + "epoch": 0.46299733677669275, + "grad_norm": 0.7001554369926453, + "learning_rate": 0.00016294839600914957, + "loss": 2.6174, + "step": 5737 + }, + { + "epoch": 0.4630780405132758, + "grad_norm": 0.7589300274848938, + "learning_rate": 0.00016293612859245868, + "loss": 2.6338, + "step": 5738 + }, + { + "epoch": 0.46315874424985876, + "grad_norm": 0.7083945274353027, + "learning_rate": 0.00016292385960723493, + "loss": 2.6793, + "step": 5739 + }, + { + "epoch": 0.4632394479864418, + "grad_norm": 0.739439845085144, + "learning_rate": 0.00016291158905378412, + "loss": 2.7335, + "step": 5740 + }, + { + "epoch": 0.46332015172302476, + "grad_norm": 0.6868166923522949, + "learning_rate": 0.00016289931693241205, + "loss": 2.6139, + "step": 5741 + }, + { + "epoch": 0.4634008554596078, + "grad_norm": 0.7385871410369873, + "learning_rate": 0.0001628870432434246, + "loss": 2.6783, + "step": 5742 + }, + { + "epoch": 0.4634815591961908, + "grad_norm": 0.7227835655212402, + "learning_rate": 0.00016287476798712764, + "loss": 2.6732, + "step": 5743 + }, + { + "epoch": 0.4635622629327738, + "grad_norm": 0.6662411689758301, + "learning_rate": 0.00016286249116382709, + "loss": 2.6645, + "step": 5744 + }, + { + "epoch": 0.4636429666693568, + "grad_norm": 0.8110263347625732, + "learning_rate": 0.00016285021277382894, + "loss": 2.6448, + "step": 5745 + }, + { + "epoch": 0.4637236704059398, + "grad_norm": 0.7419269680976868, + "learning_rate": 0.0001628379328174392, + "loss": 2.7286, + "step": 5746 + }, + { + "epoch": 0.4638043741425228, + "grad_norm": 0.6518125534057617, + "learning_rate": 0.0001628256512949639, + "loss": 2.6545, + "step": 5747 + }, + { + "epoch": 0.4638850778791058, + "grad_norm": 0.6816060543060303, + "learning_rate": 0.00016281336820670917, + "loss": 2.6167, + "step": 5748 + }, + { + "epoch": 0.4639657816156888, + "grad_norm": 0.6537362337112427, + "learning_rate": 0.0001628010835529811, + "loss": 2.6522, + "step": 5749 + }, + { + "epoch": 0.46404648535227183, + "grad_norm": 0.6720992922782898, + "learning_rate": 0.00016278879733408585, + "loss": 2.6028, + "step": 5750 + }, + { + "epoch": 0.4641271890888548, + "grad_norm": 0.6778908371925354, + "learning_rate": 0.00016277650955032967, + "loss": 2.5591, + "step": 5751 + }, + { + "epoch": 0.46420789282543784, + "grad_norm": 0.6908471584320068, + "learning_rate": 0.0001627642202020187, + "loss": 2.6574, + "step": 5752 + }, + { + "epoch": 0.4642885965620208, + "grad_norm": 0.7034298181533813, + "learning_rate": 0.00016275192928945936, + "loss": 2.657, + "step": 5753 + }, + { + "epoch": 0.46436930029860385, + "grad_norm": 0.7245952486991882, + "learning_rate": 0.0001627396368129579, + "loss": 2.6572, + "step": 5754 + }, + { + "epoch": 0.4644500040351868, + "grad_norm": 0.6764482855796814, + "learning_rate": 0.0001627273427728207, + "loss": 2.6576, + "step": 5755 + }, + { + "epoch": 0.46453070777176986, + "grad_norm": 0.7074379920959473, + "learning_rate": 0.0001627150471693541, + "loss": 2.614, + "step": 5756 + }, + { + "epoch": 0.46461141150835283, + "grad_norm": 0.7292052507400513, + "learning_rate": 0.0001627027500028646, + "loss": 2.673, + "step": 5757 + }, + { + "epoch": 0.46469211524493587, + "grad_norm": 0.7554025650024414, + "learning_rate": 0.0001626904512736587, + "loss": 2.5919, + "step": 5758 + }, + { + "epoch": 0.46477281898151884, + "grad_norm": 0.6829606890678406, + "learning_rate": 0.00016267815098204284, + "loss": 2.7206, + "step": 5759 + }, + { + "epoch": 0.4648535227181019, + "grad_norm": 0.7201548218727112, + "learning_rate": 0.00016266584912832363, + "loss": 2.6651, + "step": 5760 + }, + { + "epoch": 0.46493422645468485, + "grad_norm": 0.6889227628707886, + "learning_rate": 0.00016265354571280764, + "loss": 2.6776, + "step": 5761 + }, + { + "epoch": 0.4650149301912679, + "grad_norm": 0.7286190986633301, + "learning_rate": 0.00016264124073580156, + "loss": 2.591, + "step": 5762 + }, + { + "epoch": 0.46509563392785086, + "grad_norm": 0.7222036123275757, + "learning_rate": 0.00016262893419761196, + "loss": 2.6422, + "step": 5763 + }, + { + "epoch": 0.46517633766443384, + "grad_norm": 0.6822768449783325, + "learning_rate": 0.00016261662609854562, + "loss": 2.6126, + "step": 5764 + }, + { + "epoch": 0.46525704140101687, + "grad_norm": 0.7263356447219849, + "learning_rate": 0.00016260431643890929, + "loss": 2.6304, + "step": 5765 + }, + { + "epoch": 0.46533774513759985, + "grad_norm": 0.7152180075645447, + "learning_rate": 0.00016259200521900972, + "loss": 2.6489, + "step": 5766 + }, + { + "epoch": 0.4654184488741829, + "grad_norm": 0.6988116502761841, + "learning_rate": 0.00016257969243915378, + "loss": 2.6151, + "step": 5767 + }, + { + "epoch": 0.46549915261076585, + "grad_norm": 0.7131790518760681, + "learning_rate": 0.00016256737809964831, + "loss": 2.6284, + "step": 5768 + }, + { + "epoch": 0.4655798563473489, + "grad_norm": 0.674196183681488, + "learning_rate": 0.00016255506220080025, + "loss": 2.5815, + "step": 5769 + }, + { + "epoch": 0.46566056008393186, + "grad_norm": 0.7166198492050171, + "learning_rate": 0.0001625427447429165, + "loss": 2.6594, + "step": 5770 + }, + { + "epoch": 0.4657412638205149, + "grad_norm": 0.6997127532958984, + "learning_rate": 0.00016253042572630407, + "loss": 2.6502, + "step": 5771 + }, + { + "epoch": 0.46582196755709787, + "grad_norm": 0.7761591076850891, + "learning_rate": 0.00016251810515126994, + "loss": 2.624, + "step": 5772 + }, + { + "epoch": 0.4659026712936809, + "grad_norm": 0.7038728594779968, + "learning_rate": 0.00016250578301812125, + "loss": 2.6096, + "step": 5773 + }, + { + "epoch": 0.4659833750302639, + "grad_norm": 0.7080080509185791, + "learning_rate": 0.00016249345932716505, + "loss": 2.6196, + "step": 5774 + }, + { + "epoch": 0.4660640787668469, + "grad_norm": 0.7461444735527039, + "learning_rate": 0.00016248113407870847, + "loss": 2.65, + "step": 5775 + }, + { + "epoch": 0.4661447825034299, + "grad_norm": 0.7914463877677917, + "learning_rate": 0.00016246880727305868, + "loss": 2.6539, + "step": 5776 + }, + { + "epoch": 0.4662254862400129, + "grad_norm": 0.7067776918411255, + "learning_rate": 0.00016245647891052295, + "loss": 2.72, + "step": 5777 + }, + { + "epoch": 0.4663061899765959, + "grad_norm": 0.7190818190574646, + "learning_rate": 0.00016244414899140852, + "loss": 2.7029, + "step": 5778 + }, + { + "epoch": 0.46638689371317893, + "grad_norm": 0.6740003824234009, + "learning_rate": 0.00016243181751602261, + "loss": 2.6404, + "step": 5779 + }, + { + "epoch": 0.4664675974497619, + "grad_norm": 0.7942661643028259, + "learning_rate": 0.00016241948448467267, + "loss": 2.6333, + "step": 5780 + }, + { + "epoch": 0.46654830118634494, + "grad_norm": 0.6415690183639526, + "learning_rate": 0.00016240714989766597, + "loss": 2.6354, + "step": 5781 + }, + { + "epoch": 0.4666290049229279, + "grad_norm": 0.7287769913673401, + "learning_rate": 0.00016239481375530997, + "loss": 2.6721, + "step": 5782 + }, + { + "epoch": 0.46670970865951095, + "grad_norm": 0.8197699189186096, + "learning_rate": 0.00016238247605791212, + "loss": 2.7577, + "step": 5783 + }, + { + "epoch": 0.4667904123960939, + "grad_norm": 0.8182012438774109, + "learning_rate": 0.0001623701368057799, + "loss": 2.6475, + "step": 5784 + }, + { + "epoch": 0.46687111613267696, + "grad_norm": 0.6974665522575378, + "learning_rate": 0.00016235779599922082, + "loss": 2.5897, + "step": 5785 + }, + { + "epoch": 0.46695181986925993, + "grad_norm": 0.7156379222869873, + "learning_rate": 0.00016234545363854247, + "loss": 2.5981, + "step": 5786 + }, + { + "epoch": 0.46703252360584296, + "grad_norm": 0.6875364780426025, + "learning_rate": 0.0001623331097240524, + "loss": 2.6333, + "step": 5787 + }, + { + "epoch": 0.46711322734242594, + "grad_norm": 0.7222917675971985, + "learning_rate": 0.00016232076425605835, + "loss": 2.5865, + "step": 5788 + }, + { + "epoch": 0.467193931079009, + "grad_norm": 0.7224915027618408, + "learning_rate": 0.00016230841723486792, + "loss": 2.667, + "step": 5789 + }, + { + "epoch": 0.46727463481559195, + "grad_norm": 0.7125402688980103, + "learning_rate": 0.00016229606866078887, + "loss": 2.6548, + "step": 5790 + }, + { + "epoch": 0.467355338552175, + "grad_norm": 0.6866132616996765, + "learning_rate": 0.00016228371853412894, + "loss": 2.6381, + "step": 5791 + }, + { + "epoch": 0.46743604228875796, + "grad_norm": 0.7573552131652832, + "learning_rate": 0.00016227136685519593, + "loss": 2.6766, + "step": 5792 + }, + { + "epoch": 0.467516746025341, + "grad_norm": 0.7565932273864746, + "learning_rate": 0.00016225901362429767, + "loss": 2.5965, + "step": 5793 + }, + { + "epoch": 0.46759744976192397, + "grad_norm": 0.7279250621795654, + "learning_rate": 0.00016224665884174207, + "loss": 2.6599, + "step": 5794 + }, + { + "epoch": 0.467678153498507, + "grad_norm": 0.7501276731491089, + "learning_rate": 0.000162234302507837, + "loss": 2.636, + "step": 5795 + }, + { + "epoch": 0.46775885723509, + "grad_norm": 0.7823930978775024, + "learning_rate": 0.00016222194462289042, + "loss": 2.6277, + "step": 5796 + }, + { + "epoch": 0.467839560971673, + "grad_norm": 0.7168415784835815, + "learning_rate": 0.00016220958518721034, + "loss": 2.6868, + "step": 5797 + }, + { + "epoch": 0.467920264708256, + "grad_norm": 0.7468454241752625, + "learning_rate": 0.00016219722420110478, + "loss": 2.7209, + "step": 5798 + }, + { + "epoch": 0.468000968444839, + "grad_norm": 0.6915228962898254, + "learning_rate": 0.0001621848616648818, + "loss": 2.6356, + "step": 5799 + }, + { + "epoch": 0.468081672181422, + "grad_norm": 0.7731573581695557, + "learning_rate": 0.00016217249757884955, + "loss": 2.6396, + "step": 5800 + }, + { + "epoch": 0.468162375918005, + "grad_norm": 0.6579388380050659, + "learning_rate": 0.0001621601319433161, + "loss": 2.6077, + "step": 5801 + }, + { + "epoch": 0.468243079654588, + "grad_norm": 0.7136246562004089, + "learning_rate": 0.00016214776475858967, + "loss": 2.6602, + "step": 5802 + }, + { + "epoch": 0.46832378339117103, + "grad_norm": 0.6929461359977722, + "learning_rate": 0.0001621353960249785, + "loss": 2.6851, + "step": 5803 + }, + { + "epoch": 0.468404487127754, + "grad_norm": 0.8001779913902283, + "learning_rate": 0.00016212302574279087, + "loss": 2.6577, + "step": 5804 + }, + { + "epoch": 0.46848519086433704, + "grad_norm": 0.7637671828269958, + "learning_rate": 0.00016211065391233498, + "loss": 2.6923, + "step": 5805 + }, + { + "epoch": 0.46856589460092, + "grad_norm": 0.6879906058311462, + "learning_rate": 0.0001620982805339193, + "loss": 2.6555, + "step": 5806 + }, + { + "epoch": 0.46864659833750305, + "grad_norm": 0.7731223702430725, + "learning_rate": 0.0001620859056078521, + "loss": 2.6301, + "step": 5807 + }, + { + "epoch": 0.468727302074086, + "grad_norm": 0.7351491451263428, + "learning_rate": 0.00016207352913444185, + "loss": 2.6154, + "step": 5808 + }, + { + "epoch": 0.46880800581066906, + "grad_norm": 0.716314435005188, + "learning_rate": 0.000162061151113997, + "loss": 2.6294, + "step": 5809 + }, + { + "epoch": 0.46888870954725204, + "grad_norm": 0.6974702477455139, + "learning_rate": 0.00016204877154682605, + "loss": 2.6046, + "step": 5810 + }, + { + "epoch": 0.46896941328383507, + "grad_norm": 0.7456035614013672, + "learning_rate": 0.00016203639043323745, + "loss": 2.6308, + "step": 5811 + }, + { + "epoch": 0.46905011702041804, + "grad_norm": 0.7198047637939453, + "learning_rate": 0.0001620240077735399, + "loss": 2.6303, + "step": 5812 + }, + { + "epoch": 0.4691308207570011, + "grad_norm": 0.7098269462585449, + "learning_rate": 0.00016201162356804192, + "loss": 2.6352, + "step": 5813 + }, + { + "epoch": 0.46921152449358405, + "grad_norm": 0.7060410976409912, + "learning_rate": 0.0001619992378170522, + "loss": 2.6489, + "step": 5814 + }, + { + "epoch": 0.46929222823016703, + "grad_norm": 0.7126092314720154, + "learning_rate": 0.0001619868505208794, + "loss": 2.66, + "step": 5815 + }, + { + "epoch": 0.46937293196675006, + "grad_norm": 0.7391123175621033, + "learning_rate": 0.00016197446167983223, + "loss": 2.6066, + "step": 5816 + }, + { + "epoch": 0.46945363570333304, + "grad_norm": 0.7282211780548096, + "learning_rate": 0.0001619620712942195, + "loss": 2.6422, + "step": 5817 + }, + { + "epoch": 0.46953433943991607, + "grad_norm": 0.7581801414489746, + "learning_rate": 0.00016194967936434998, + "loss": 2.702, + "step": 5818 + }, + { + "epoch": 0.46961504317649905, + "grad_norm": 0.6649011373519897, + "learning_rate": 0.00016193728589053248, + "loss": 2.6235, + "step": 5819 + }, + { + "epoch": 0.4696957469130821, + "grad_norm": 0.720312237739563, + "learning_rate": 0.00016192489087307592, + "loss": 2.5961, + "step": 5820 + }, + { + "epoch": 0.46977645064966506, + "grad_norm": 0.72076016664505, + "learning_rate": 0.0001619124943122892, + "loss": 2.6793, + "step": 5821 + }, + { + "epoch": 0.4698571543862481, + "grad_norm": 0.6695740818977356, + "learning_rate": 0.0001619000962084813, + "loss": 2.6325, + "step": 5822 + }, + { + "epoch": 0.46993785812283106, + "grad_norm": 0.7678804993629456, + "learning_rate": 0.0001618876965619612, + "loss": 2.7473, + "step": 5823 + }, + { + "epoch": 0.4700185618594141, + "grad_norm": 0.782349169254303, + "learning_rate": 0.00016187529537303792, + "loss": 2.6139, + "step": 5824 + }, + { + "epoch": 0.4700992655959971, + "grad_norm": 0.6906631588935852, + "learning_rate": 0.00016186289264202052, + "loss": 2.6529, + "step": 5825 + }, + { + "epoch": 0.4701799693325801, + "grad_norm": 0.732947051525116, + "learning_rate": 0.00016185048836921814, + "loss": 2.6416, + "step": 5826 + }, + { + "epoch": 0.4702606730691631, + "grad_norm": 0.8306718468666077, + "learning_rate": 0.0001618380825549399, + "loss": 2.6566, + "step": 5827 + }, + { + "epoch": 0.4703413768057461, + "grad_norm": 0.725764811038971, + "learning_rate": 0.00016182567519949502, + "loss": 2.6664, + "step": 5828 + }, + { + "epoch": 0.4704220805423291, + "grad_norm": 0.7301872372627258, + "learning_rate": 0.00016181326630319268, + "loss": 2.6666, + "step": 5829 + }, + { + "epoch": 0.4705027842789121, + "grad_norm": 0.7297122478485107, + "learning_rate": 0.00016180085586634216, + "loss": 2.6415, + "step": 5830 + }, + { + "epoch": 0.4705834880154951, + "grad_norm": 0.7445664405822754, + "learning_rate": 0.00016178844388925278, + "loss": 2.6112, + "step": 5831 + }, + { + "epoch": 0.47066419175207813, + "grad_norm": 0.7787267565727234, + "learning_rate": 0.00016177603037223384, + "loss": 2.6452, + "step": 5832 + }, + { + "epoch": 0.4707448954886611, + "grad_norm": 0.7386903762817383, + "learning_rate": 0.00016176361531559474, + "loss": 2.6919, + "step": 5833 + }, + { + "epoch": 0.47082559922524414, + "grad_norm": 0.7991776466369629, + "learning_rate": 0.0001617511987196449, + "loss": 2.6728, + "step": 5834 + }, + { + "epoch": 0.4709063029618271, + "grad_norm": 0.7196263670921326, + "learning_rate": 0.00016173878058469375, + "loss": 2.6008, + "step": 5835 + }, + { + "epoch": 0.47098700669841015, + "grad_norm": 0.6773477792739868, + "learning_rate": 0.00016172636091105086, + "loss": 2.6184, + "step": 5836 + }, + { + "epoch": 0.4710677104349931, + "grad_norm": 0.7238345742225647, + "learning_rate": 0.00016171393969902567, + "loss": 2.6221, + "step": 5837 + }, + { + "epoch": 0.47114841417157616, + "grad_norm": 0.702104926109314, + "learning_rate": 0.00016170151694892777, + "loss": 2.5909, + "step": 5838 + }, + { + "epoch": 0.47122911790815913, + "grad_norm": 0.7571590542793274, + "learning_rate": 0.00016168909266106677, + "loss": 2.6044, + "step": 5839 + }, + { + "epoch": 0.47130982164474217, + "grad_norm": 0.7408227324485779, + "learning_rate": 0.00016167666683575234, + "loss": 2.5771, + "step": 5840 + }, + { + "epoch": 0.47139052538132514, + "grad_norm": 0.6760764122009277, + "learning_rate": 0.00016166423947329414, + "loss": 2.6202, + "step": 5841 + }, + { + "epoch": 0.4714712291179082, + "grad_norm": 0.7085632681846619, + "learning_rate": 0.00016165181057400192, + "loss": 2.5887, + "step": 5842 + }, + { + "epoch": 0.47155193285449115, + "grad_norm": 0.7298943400382996, + "learning_rate": 0.00016163938013818538, + "loss": 2.609, + "step": 5843 + }, + { + "epoch": 0.4716326365910742, + "grad_norm": 0.7591157555580139, + "learning_rate": 0.0001616269481661544, + "loss": 2.6582, + "step": 5844 + }, + { + "epoch": 0.47171334032765716, + "grad_norm": 0.6727088093757629, + "learning_rate": 0.00016161451465821877, + "loss": 2.6289, + "step": 5845 + }, + { + "epoch": 0.4717940440642402, + "grad_norm": 0.6782706379890442, + "learning_rate": 0.00016160207961468835, + "loss": 2.6875, + "step": 5846 + }, + { + "epoch": 0.47187474780082317, + "grad_norm": 0.6839444041252136, + "learning_rate": 0.00016158964303587313, + "loss": 2.5687, + "step": 5847 + }, + { + "epoch": 0.4719554515374062, + "grad_norm": 0.7565997838973999, + "learning_rate": 0.00016157720492208295, + "loss": 2.6855, + "step": 5848 + }, + { + "epoch": 0.4720361552739892, + "grad_norm": 0.7286611199378967, + "learning_rate": 0.0001615647652736279, + "loss": 2.5906, + "step": 5849 + }, + { + "epoch": 0.4721168590105722, + "grad_norm": 0.7503396272659302, + "learning_rate": 0.00016155232409081793, + "loss": 2.6419, + "step": 5850 + }, + { + "epoch": 0.4721975627471552, + "grad_norm": 0.6924198865890503, + "learning_rate": 0.00016153988137396317, + "loss": 2.661, + "step": 5851 + }, + { + "epoch": 0.4722782664837382, + "grad_norm": 0.7731672525405884, + "learning_rate": 0.0001615274371233737, + "loss": 2.6993, + "step": 5852 + }, + { + "epoch": 0.4723589702203212, + "grad_norm": 0.7422799468040466, + "learning_rate": 0.00016151499133935964, + "loss": 2.6134, + "step": 5853 + }, + { + "epoch": 0.4724396739569042, + "grad_norm": 0.6924546957015991, + "learning_rate": 0.0001615025440222312, + "loss": 2.672, + "step": 5854 + }, + { + "epoch": 0.4725203776934872, + "grad_norm": 0.7205976843833923, + "learning_rate": 0.00016149009517229862, + "loss": 2.6722, + "step": 5855 + }, + { + "epoch": 0.47260108143007024, + "grad_norm": 0.6898519992828369, + "learning_rate": 0.0001614776447898721, + "loss": 2.6474, + "step": 5856 + }, + { + "epoch": 0.4726817851666532, + "grad_norm": 0.7512481212615967, + "learning_rate": 0.00016146519287526197, + "loss": 2.7413, + "step": 5857 + }, + { + "epoch": 0.47276248890323624, + "grad_norm": 0.6734220385551453, + "learning_rate": 0.0001614527394287786, + "loss": 2.6114, + "step": 5858 + }, + { + "epoch": 0.4728431926398192, + "grad_norm": 0.6745339632034302, + "learning_rate": 0.00016144028445073228, + "loss": 2.6039, + "step": 5859 + }, + { + "epoch": 0.47292389637640225, + "grad_norm": 0.7463086843490601, + "learning_rate": 0.0001614278279414335, + "loss": 2.6109, + "step": 5860 + }, + { + "epoch": 0.47300460011298523, + "grad_norm": 0.7203261256217957, + "learning_rate": 0.00016141536990119264, + "loss": 2.651, + "step": 5861 + }, + { + "epoch": 0.47308530384956826, + "grad_norm": 0.7718746066093445, + "learning_rate": 0.00016140291033032024, + "loss": 2.6953, + "step": 5862 + }, + { + "epoch": 0.47316600758615124, + "grad_norm": 0.7854858040809631, + "learning_rate": 0.0001613904492291268, + "loss": 2.5941, + "step": 5863 + }, + { + "epoch": 0.47324671132273427, + "grad_norm": 0.7218664288520813, + "learning_rate": 0.0001613779865979229, + "loss": 2.6447, + "step": 5864 + }, + { + "epoch": 0.47332741505931725, + "grad_norm": 0.7479045987129211, + "learning_rate": 0.0001613655224370191, + "loss": 2.6662, + "step": 5865 + }, + { + "epoch": 0.4734081187959002, + "grad_norm": 0.7335021495819092, + "learning_rate": 0.00016135305674672612, + "loss": 2.6283, + "step": 5866 + }, + { + "epoch": 0.47348882253248326, + "grad_norm": 0.7650331258773804, + "learning_rate": 0.00016134058952735453, + "loss": 2.7168, + "step": 5867 + }, + { + "epoch": 0.47356952626906623, + "grad_norm": 0.733383297920227, + "learning_rate": 0.00016132812077921513, + "loss": 2.6352, + "step": 5868 + }, + { + "epoch": 0.47365023000564926, + "grad_norm": 1.3944146633148193, + "learning_rate": 0.00016131565050261866, + "loss": 2.7518, + "step": 5869 + }, + { + "epoch": 0.47373093374223224, + "grad_norm": 0.746112585067749, + "learning_rate": 0.0001613031786978759, + "loss": 2.6253, + "step": 5870 + }, + { + "epoch": 0.4738116374788153, + "grad_norm": 0.9859737753868103, + "learning_rate": 0.00016129070536529766, + "loss": 2.6682, + "step": 5871 + }, + { + "epoch": 0.47389234121539825, + "grad_norm": 0.7358877062797546, + "learning_rate": 0.00016127823050519484, + "loss": 2.6712, + "step": 5872 + }, + { + "epoch": 0.4739730449519813, + "grad_norm": 0.7379923462867737, + "learning_rate": 0.0001612657541178783, + "loss": 2.6268, + "step": 5873 + }, + { + "epoch": 0.47405374868856426, + "grad_norm": 0.7671005725860596, + "learning_rate": 0.00016125327620365907, + "loss": 2.6127, + "step": 5874 + }, + { + "epoch": 0.4741344524251473, + "grad_norm": 0.8007156252861023, + "learning_rate": 0.00016124079676284805, + "loss": 2.6173, + "step": 5875 + }, + { + "epoch": 0.47421515616173027, + "grad_norm": 0.7930500507354736, + "learning_rate": 0.00016122831579575627, + "loss": 2.589, + "step": 5876 + }, + { + "epoch": 0.4742958598983133, + "grad_norm": 0.788006603717804, + "learning_rate": 0.00016121583330269484, + "loss": 2.6731, + "step": 5877 + }, + { + "epoch": 0.4743765636348963, + "grad_norm": 0.742148220539093, + "learning_rate": 0.00016120334928397483, + "loss": 2.674, + "step": 5878 + }, + { + "epoch": 0.4744572673714793, + "grad_norm": 0.6823038458824158, + "learning_rate": 0.00016119086373990736, + "loss": 2.6153, + "step": 5879 + }, + { + "epoch": 0.4745379711080623, + "grad_norm": 0.7542331218719482, + "learning_rate": 0.00016117837667080356, + "loss": 2.6739, + "step": 5880 + }, + { + "epoch": 0.4746186748446453, + "grad_norm": 0.8163543343544006, + "learning_rate": 0.00016116588807697476, + "loss": 2.6558, + "step": 5881 + }, + { + "epoch": 0.4746993785812283, + "grad_norm": 0.7528213858604431, + "learning_rate": 0.0001611533979587321, + "loss": 2.6243, + "step": 5882 + }, + { + "epoch": 0.4747800823178113, + "grad_norm": 0.7476626038551331, + "learning_rate": 0.00016114090631638695, + "loss": 2.5984, + "step": 5883 + }, + { + "epoch": 0.4748607860543943, + "grad_norm": 0.7436621785163879, + "learning_rate": 0.00016112841315025055, + "loss": 2.6118, + "step": 5884 + }, + { + "epoch": 0.47494148979097733, + "grad_norm": 0.8024004101753235, + "learning_rate": 0.0001611159184606343, + "loss": 2.6926, + "step": 5885 + }, + { + "epoch": 0.4750221935275603, + "grad_norm": 0.7475626468658447, + "learning_rate": 0.00016110342224784962, + "loss": 2.6175, + "step": 5886 + }, + { + "epoch": 0.47510289726414334, + "grad_norm": 0.7900637984275818, + "learning_rate": 0.00016109092451220796, + "loss": 2.6503, + "step": 5887 + }, + { + "epoch": 0.4751836010007263, + "grad_norm": 0.6988356113433838, + "learning_rate": 0.00016107842525402074, + "loss": 2.6494, + "step": 5888 + }, + { + "epoch": 0.47526430473730935, + "grad_norm": 1.0214186906814575, + "learning_rate": 0.00016106592447359948, + "loss": 2.6476, + "step": 5889 + }, + { + "epoch": 0.4753450084738923, + "grad_norm": 0.741527795791626, + "learning_rate": 0.00016105342217125578, + "loss": 2.6054, + "step": 5890 + }, + { + "epoch": 0.47542571221047536, + "grad_norm": 0.7196603417396545, + "learning_rate": 0.0001610409183473012, + "loss": 2.6146, + "step": 5891 + }, + { + "epoch": 0.47550641594705834, + "grad_norm": 0.8130923509597778, + "learning_rate": 0.00016102841300204737, + "loss": 2.6505, + "step": 5892 + }, + { + "epoch": 0.47558711968364137, + "grad_norm": 0.7929537892341614, + "learning_rate": 0.00016101590613580596, + "loss": 2.6725, + "step": 5893 + }, + { + "epoch": 0.47566782342022434, + "grad_norm": 0.7149303555488586, + "learning_rate": 0.00016100339774888865, + "loss": 2.6272, + "step": 5894 + }, + { + "epoch": 0.4757485271568074, + "grad_norm": 0.7242792248725891, + "learning_rate": 0.00016099088784160724, + "loss": 2.5948, + "step": 5895 + }, + { + "epoch": 0.47582923089339035, + "grad_norm": 0.7571540474891663, + "learning_rate": 0.00016097837641427346, + "loss": 2.689, + "step": 5896 + }, + { + "epoch": 0.4759099346299734, + "grad_norm": 0.7402021288871765, + "learning_rate": 0.00016096586346719916, + "loss": 2.7035, + "step": 5897 + }, + { + "epoch": 0.47599063836655636, + "grad_norm": 0.7195574045181274, + "learning_rate": 0.00016095334900069613, + "loss": 2.5862, + "step": 5898 + }, + { + "epoch": 0.4760713421031394, + "grad_norm": 0.7677412033081055, + "learning_rate": 0.00016094083301507634, + "loss": 2.6715, + "step": 5899 + }, + { + "epoch": 0.47615204583972237, + "grad_norm": 0.7131708860397339, + "learning_rate": 0.0001609283155106517, + "loss": 2.6555, + "step": 5900 + }, + { + "epoch": 0.4762327495763054, + "grad_norm": 0.6774055361747742, + "learning_rate": 0.00016091579648773414, + "loss": 2.621, + "step": 5901 + }, + { + "epoch": 0.4763134533128884, + "grad_norm": 0.6873257160186768, + "learning_rate": 0.00016090327594663571, + "loss": 2.6719, + "step": 5902 + }, + { + "epoch": 0.4763941570494714, + "grad_norm": 0.8004229068756104, + "learning_rate": 0.00016089075388766845, + "loss": 2.6926, + "step": 5903 + }, + { + "epoch": 0.4764748607860544, + "grad_norm": 0.7196173667907715, + "learning_rate": 0.00016087823031114438, + "loss": 2.6032, + "step": 5904 + }, + { + "epoch": 0.4765555645226374, + "grad_norm": 0.7665518522262573, + "learning_rate": 0.00016086570521737573, + "loss": 2.6359, + "step": 5905 + }, + { + "epoch": 0.4766362682592204, + "grad_norm": 0.7240240573883057, + "learning_rate": 0.0001608531786066746, + "loss": 2.6489, + "step": 5906 + }, + { + "epoch": 0.47671697199580343, + "grad_norm": 0.7603839039802551, + "learning_rate": 0.00016084065047935317, + "loss": 2.6064, + "step": 5907 + }, + { + "epoch": 0.4767976757323864, + "grad_norm": 0.7394058704376221, + "learning_rate": 0.0001608281208357237, + "loss": 2.6643, + "step": 5908 + }, + { + "epoch": 0.47687837946896944, + "grad_norm": 0.7183148860931396, + "learning_rate": 0.00016081558967609845, + "loss": 2.56, + "step": 5909 + }, + { + "epoch": 0.4769590832055524, + "grad_norm": 0.7181926965713501, + "learning_rate": 0.00016080305700078972, + "loss": 2.6665, + "step": 5910 + }, + { + "epoch": 0.47703978694213545, + "grad_norm": 0.7634081840515137, + "learning_rate": 0.00016079052281010988, + "loss": 2.7076, + "step": 5911 + }, + { + "epoch": 0.4771204906787184, + "grad_norm": 0.7928739190101624, + "learning_rate": 0.0001607779871043713, + "loss": 2.6512, + "step": 5912 + }, + { + "epoch": 0.47720119441530146, + "grad_norm": 0.7192893028259277, + "learning_rate": 0.00016076544988388643, + "loss": 2.6453, + "step": 5913 + }, + { + "epoch": 0.47728189815188443, + "grad_norm": 0.7171720862388611, + "learning_rate": 0.00016075291114896767, + "loss": 2.6501, + "step": 5914 + }, + { + "epoch": 0.47736260188846746, + "grad_norm": 0.6787160038948059, + "learning_rate": 0.00016074037089992756, + "loss": 2.6566, + "step": 5915 + }, + { + "epoch": 0.47744330562505044, + "grad_norm": 0.8118634819984436, + "learning_rate": 0.00016072782913707868, + "loss": 2.6635, + "step": 5916 + }, + { + "epoch": 0.4775240093616334, + "grad_norm": 0.7188509702682495, + "learning_rate": 0.0001607152858607335, + "loss": 2.6899, + "step": 5917 + }, + { + "epoch": 0.47760471309821645, + "grad_norm": 0.6742647290229797, + "learning_rate": 0.00016070274107120468, + "loss": 2.6221, + "step": 5918 + }, + { + "epoch": 0.4776854168347994, + "grad_norm": 0.7274083495140076, + "learning_rate": 0.00016069019476880488, + "loss": 2.6588, + "step": 5919 + }, + { + "epoch": 0.47776612057138246, + "grad_norm": 0.6984386444091797, + "learning_rate": 0.00016067764695384682, + "loss": 2.6376, + "step": 5920 + }, + { + "epoch": 0.47784682430796543, + "grad_norm": 0.7260883450508118, + "learning_rate": 0.00016066509762664315, + "loss": 2.6623, + "step": 5921 + }, + { + "epoch": 0.47792752804454847, + "grad_norm": 0.7540579438209534, + "learning_rate": 0.00016065254678750666, + "loss": 2.695, + "step": 5922 + }, + { + "epoch": 0.47800823178113144, + "grad_norm": 0.7032651305198669, + "learning_rate": 0.00016063999443675017, + "loss": 2.6791, + "step": 5923 + }, + { + "epoch": 0.4780889355177145, + "grad_norm": 0.682842493057251, + "learning_rate": 0.0001606274405746865, + "loss": 2.6198, + "step": 5924 + }, + { + "epoch": 0.47816963925429745, + "grad_norm": 0.6843859553337097, + "learning_rate": 0.00016061488520162853, + "loss": 2.6432, + "step": 5925 + }, + { + "epoch": 0.4782503429908805, + "grad_norm": 0.652119517326355, + "learning_rate": 0.00016060232831788918, + "loss": 2.6461, + "step": 5926 + }, + { + "epoch": 0.47833104672746346, + "grad_norm": 0.6986887454986572, + "learning_rate": 0.0001605897699237814, + "loss": 2.5885, + "step": 5927 + }, + { + "epoch": 0.4784117504640465, + "grad_norm": 0.7156725525856018, + "learning_rate": 0.00016057721001961817, + "loss": 2.6526, + "step": 5928 + }, + { + "epoch": 0.47849245420062947, + "grad_norm": 0.7367579936981201, + "learning_rate": 0.0001605646486057125, + "loss": 2.5842, + "step": 5929 + }, + { + "epoch": 0.4785731579372125, + "grad_norm": 0.7059770822525024, + "learning_rate": 0.00016055208568237746, + "loss": 2.617, + "step": 5930 + }, + { + "epoch": 0.4786538616737955, + "grad_norm": 0.7225117087364197, + "learning_rate": 0.00016053952124992619, + "loss": 2.6499, + "step": 5931 + }, + { + "epoch": 0.4787345654103785, + "grad_norm": 0.7027475237846375, + "learning_rate": 0.00016052695530867177, + "loss": 2.5934, + "step": 5932 + }, + { + "epoch": 0.4788152691469615, + "grad_norm": 0.7031852602958679, + "learning_rate": 0.00016051438785892743, + "loss": 2.5947, + "step": 5933 + }, + { + "epoch": 0.4788959728835445, + "grad_norm": 0.6731768846511841, + "learning_rate": 0.00016050181890100635, + "loss": 2.6811, + "step": 5934 + }, + { + "epoch": 0.4789766766201275, + "grad_norm": 0.7120038866996765, + "learning_rate": 0.0001604892484352218, + "loss": 2.6625, + "step": 5935 + }, + { + "epoch": 0.4790573803567105, + "grad_norm": 0.6895150542259216, + "learning_rate": 0.00016047667646188702, + "loss": 2.6784, + "step": 5936 + }, + { + "epoch": 0.4791380840932935, + "grad_norm": 0.7080708742141724, + "learning_rate": 0.0001604641029813154, + "loss": 2.6491, + "step": 5937 + }, + { + "epoch": 0.47921878782987654, + "grad_norm": 0.6522819399833679, + "learning_rate": 0.00016045152799382025, + "loss": 2.6113, + "step": 5938 + }, + { + "epoch": 0.4792994915664595, + "grad_norm": 0.6988112926483154, + "learning_rate": 0.00016043895149971506, + "loss": 2.6892, + "step": 5939 + }, + { + "epoch": 0.47938019530304254, + "grad_norm": 0.7545368671417236, + "learning_rate": 0.00016042637349931318, + "loss": 2.6872, + "step": 5940 + }, + { + "epoch": 0.4794608990396255, + "grad_norm": 0.7083707451820374, + "learning_rate": 0.0001604137939929281, + "loss": 2.6726, + "step": 5941 + }, + { + "epoch": 0.47954160277620855, + "grad_norm": 0.8198027014732361, + "learning_rate": 0.00016040121298087337, + "loss": 2.647, + "step": 5942 + }, + { + "epoch": 0.47962230651279153, + "grad_norm": 0.7296201586723328, + "learning_rate": 0.00016038863046346252, + "loss": 2.7122, + "step": 5943 + }, + { + "epoch": 0.47970301024937456, + "grad_norm": 0.7262474298477173, + "learning_rate": 0.00016037604644100913, + "loss": 2.6903, + "step": 5944 + }, + { + "epoch": 0.47978371398595754, + "grad_norm": 0.8010182976722717, + "learning_rate": 0.00016036346091382686, + "loss": 2.6942, + "step": 5945 + }, + { + "epoch": 0.47986441772254057, + "grad_norm": 0.7227098345756531, + "learning_rate": 0.00016035087388222932, + "loss": 2.6661, + "step": 5946 + }, + { + "epoch": 0.47994512145912355, + "grad_norm": 0.7374662756919861, + "learning_rate": 0.00016033828534653028, + "loss": 2.6233, + "step": 5947 + }, + { + "epoch": 0.4800258251957066, + "grad_norm": 0.7139650583267212, + "learning_rate": 0.00016032569530704342, + "loss": 2.5859, + "step": 5948 + }, + { + "epoch": 0.48010652893228956, + "grad_norm": 0.7067660689353943, + "learning_rate": 0.00016031310376408254, + "loss": 2.6677, + "step": 5949 + }, + { + "epoch": 0.4801872326688726, + "grad_norm": 0.694715142250061, + "learning_rate": 0.00016030051071796146, + "loss": 2.6415, + "step": 5950 + }, + { + "epoch": 0.48026793640545556, + "grad_norm": 0.728918194770813, + "learning_rate": 0.00016028791616899403, + "loss": 2.6274, + "step": 5951 + }, + { + "epoch": 0.4803486401420386, + "grad_norm": 0.699846088886261, + "learning_rate": 0.00016027532011749412, + "loss": 2.6613, + "step": 5952 + }, + { + "epoch": 0.4804293438786216, + "grad_norm": 0.7177432179450989, + "learning_rate": 0.0001602627225637757, + "loss": 2.6107, + "step": 5953 + }, + { + "epoch": 0.4805100476152046, + "grad_norm": 0.7502370476722717, + "learning_rate": 0.00016025012350815267, + "loss": 2.6534, + "step": 5954 + }, + { + "epoch": 0.4805907513517876, + "grad_norm": 0.7730218172073364, + "learning_rate": 0.0001602375229509391, + "loss": 2.7037, + "step": 5955 + }, + { + "epoch": 0.4806714550883706, + "grad_norm": 0.7046666145324707, + "learning_rate": 0.00016022492089244898, + "loss": 2.6336, + "step": 5956 + }, + { + "epoch": 0.4807521588249536, + "grad_norm": 0.7991104125976562, + "learning_rate": 0.0001602123173329964, + "loss": 2.7024, + "step": 5957 + }, + { + "epoch": 0.4808328625615366, + "grad_norm": 0.7056288123130798, + "learning_rate": 0.00016019971227289548, + "loss": 2.6088, + "step": 5958 + }, + { + "epoch": 0.4809135662981196, + "grad_norm": 0.7277925610542297, + "learning_rate": 0.00016018710571246038, + "loss": 2.6245, + "step": 5959 + }, + { + "epoch": 0.48099427003470263, + "grad_norm": 0.7545790672302246, + "learning_rate": 0.00016017449765200526, + "loss": 2.6076, + "step": 5960 + }, + { + "epoch": 0.4810749737712856, + "grad_norm": 0.7106321454048157, + "learning_rate": 0.00016016188809184434, + "loss": 2.5561, + "step": 5961 + }, + { + "epoch": 0.48115567750786864, + "grad_norm": 0.7464704513549805, + "learning_rate": 0.0001601492770322919, + "loss": 2.6336, + "step": 5962 + }, + { + "epoch": 0.4812363812444516, + "grad_norm": 0.7531768083572388, + "learning_rate": 0.00016013666447366228, + "loss": 2.6236, + "step": 5963 + }, + { + "epoch": 0.48131708498103465, + "grad_norm": 0.7412876486778259, + "learning_rate": 0.00016012405041626978, + "loss": 2.6309, + "step": 5964 + }, + { + "epoch": 0.4813977887176176, + "grad_norm": 0.7030940055847168, + "learning_rate": 0.00016011143486042878, + "loss": 2.6252, + "step": 5965 + }, + { + "epoch": 0.48147849245420066, + "grad_norm": 0.7932302951812744, + "learning_rate": 0.00016009881780645367, + "loss": 2.6797, + "step": 5966 + }, + { + "epoch": 0.48155919619078363, + "grad_norm": 0.7366262078285217, + "learning_rate": 0.00016008619925465893, + "loss": 2.6616, + "step": 5967 + }, + { + "epoch": 0.4816398999273666, + "grad_norm": 0.6938421130180359, + "learning_rate": 0.00016007357920535902, + "loss": 2.6888, + "step": 5968 + }, + { + "epoch": 0.48172060366394964, + "grad_norm": 0.7560005784034729, + "learning_rate": 0.00016006095765886853, + "loss": 2.6044, + "step": 5969 + }, + { + "epoch": 0.4818013074005326, + "grad_norm": 0.7330430150032043, + "learning_rate": 0.0001600483346155019, + "loss": 2.7023, + "step": 5970 + }, + { + "epoch": 0.48188201113711565, + "grad_norm": 0.7257955074310303, + "learning_rate": 0.00016003571007557388, + "loss": 2.6763, + "step": 5971 + }, + { + "epoch": 0.4819627148736986, + "grad_norm": 0.704187273979187, + "learning_rate": 0.000160023084039399, + "loss": 2.6229, + "step": 5972 + }, + { + "epoch": 0.48204341861028166, + "grad_norm": 0.7014813423156738, + "learning_rate": 0.00016001045650729196, + "loss": 2.6207, + "step": 5973 + }, + { + "epoch": 0.48212412234686464, + "grad_norm": 0.8039405941963196, + "learning_rate": 0.00015999782747956747, + "loss": 2.6198, + "step": 5974 + }, + { + "epoch": 0.48220482608344767, + "grad_norm": 0.7114945650100708, + "learning_rate": 0.0001599851969565403, + "loss": 2.6154, + "step": 5975 + }, + { + "epoch": 0.48228552982003065, + "grad_norm": 0.7603329420089722, + "learning_rate": 0.00015997256493852517, + "loss": 2.6217, + "step": 5976 + }, + { + "epoch": 0.4823662335566137, + "grad_norm": 0.7773346900939941, + "learning_rate": 0.000159959931425837, + "loss": 2.7054, + "step": 5977 + }, + { + "epoch": 0.48244693729319665, + "grad_norm": 0.8022029399871826, + "learning_rate": 0.0001599472964187906, + "loss": 2.6844, + "step": 5978 + }, + { + "epoch": 0.4825276410297797, + "grad_norm": 0.7384541630744934, + "learning_rate": 0.00015993465991770087, + "loss": 2.6516, + "step": 5979 + }, + { + "epoch": 0.48260834476636266, + "grad_norm": 0.6993509531021118, + "learning_rate": 0.00015992202192288273, + "loss": 2.6837, + "step": 5980 + }, + { + "epoch": 0.4826890485029457, + "grad_norm": 0.7430509328842163, + "learning_rate": 0.00015990938243465116, + "loss": 2.6717, + "step": 5981 + }, + { + "epoch": 0.48276975223952867, + "grad_norm": 0.7544847726821899, + "learning_rate": 0.0001598967414533212, + "loss": 2.6573, + "step": 5982 + }, + { + "epoch": 0.4828504559761117, + "grad_norm": 0.736955463886261, + "learning_rate": 0.00015988409897920786, + "loss": 2.6865, + "step": 5983 + }, + { + "epoch": 0.4829311597126947, + "grad_norm": 0.7771684527397156, + "learning_rate": 0.00015987145501262622, + "loss": 2.6173, + "step": 5984 + }, + { + "epoch": 0.4830118634492777, + "grad_norm": 0.7504391670227051, + "learning_rate": 0.00015985880955389143, + "loss": 2.6218, + "step": 5985 + }, + { + "epoch": 0.4830925671858607, + "grad_norm": 0.7025442123413086, + "learning_rate": 0.00015984616260331861, + "loss": 2.6107, + "step": 5986 + }, + { + "epoch": 0.4831732709224437, + "grad_norm": 0.6906485557556152, + "learning_rate": 0.000159833514161223, + "loss": 2.633, + "step": 5987 + }, + { + "epoch": 0.4832539746590267, + "grad_norm": 0.7771004438400269, + "learning_rate": 0.00015982086422791983, + "loss": 2.5956, + "step": 5988 + }, + { + "epoch": 0.48333467839560973, + "grad_norm": 0.6927372813224792, + "learning_rate": 0.00015980821280372432, + "loss": 2.5984, + "step": 5989 + }, + { + "epoch": 0.4834153821321927, + "grad_norm": 0.7196357846260071, + "learning_rate": 0.00015979555988895184, + "loss": 2.6386, + "step": 5990 + }, + { + "epoch": 0.48349608586877574, + "grad_norm": 0.7601087689399719, + "learning_rate": 0.0001597829054839177, + "loss": 2.6707, + "step": 5991 + }, + { + "epoch": 0.4835767896053587, + "grad_norm": 0.7783588767051697, + "learning_rate": 0.00015977024958893722, + "loss": 2.5815, + "step": 5992 + }, + { + "epoch": 0.48365749334194175, + "grad_norm": 0.7651833891868591, + "learning_rate": 0.00015975759220432592, + "loss": 2.6235, + "step": 5993 + }, + { + "epoch": 0.4837381970785247, + "grad_norm": 0.7158511877059937, + "learning_rate": 0.0001597449333303992, + "loss": 2.6813, + "step": 5994 + }, + { + "epoch": 0.48381890081510776, + "grad_norm": 0.7411341667175293, + "learning_rate": 0.0001597322729674726, + "loss": 2.7231, + "step": 5995 + }, + { + "epoch": 0.48389960455169073, + "grad_norm": 0.7168158292770386, + "learning_rate": 0.0001597196111158616, + "loss": 2.6408, + "step": 5996 + }, + { + "epoch": 0.48398030828827376, + "grad_norm": 0.7603393793106079, + "learning_rate": 0.00015970694777588175, + "loss": 2.7821, + "step": 5997 + }, + { + "epoch": 0.48406101202485674, + "grad_norm": 0.7298564910888672, + "learning_rate": 0.0001596942829478487, + "loss": 2.6828, + "step": 5998 + }, + { + "epoch": 0.4841417157614398, + "grad_norm": 0.7850572466850281, + "learning_rate": 0.0001596816166320781, + "loss": 2.6191, + "step": 5999 + }, + { + "epoch": 0.48422241949802275, + "grad_norm": 0.7697601914405823, + "learning_rate": 0.00015966894882888562, + "loss": 2.6768, + "step": 6000 + }, + { + "epoch": 0.48422241949802275, + "eval_loss": 2.5610127449035645, + "eval_runtime": 760.0481, + "eval_samples_per_second": 3.447, + "eval_steps_per_second": 0.575, + "step": 6000 + }, + { + "epoch": 0.4843031232346058, + "grad_norm": 0.7212432026863098, + "learning_rate": 0.00015965627953858693, + "loss": 2.5967, + "step": 6001 + }, + { + "epoch": 0.48438382697118876, + "grad_norm": 0.7629631757736206, + "learning_rate": 0.0001596436087614978, + "loss": 2.7005, + "step": 6002 + }, + { + "epoch": 0.4844645307077718, + "grad_norm": 0.7154754400253296, + "learning_rate": 0.00015963093649793404, + "loss": 2.6909, + "step": 6003 + }, + { + "epoch": 0.48454523444435477, + "grad_norm": 0.7365279793739319, + "learning_rate": 0.00015961826274821147, + "loss": 2.6268, + "step": 6004 + }, + { + "epoch": 0.4846259381809378, + "grad_norm": 0.8114632964134216, + "learning_rate": 0.00015960558751264596, + "loss": 2.6647, + "step": 6005 + }, + { + "epoch": 0.4847066419175208, + "grad_norm": 0.7411556243896484, + "learning_rate": 0.00015959291079155338, + "loss": 2.6378, + "step": 6006 + }, + { + "epoch": 0.4847873456541038, + "grad_norm": 0.7137390375137329, + "learning_rate": 0.00015958023258524968, + "loss": 2.6454, + "step": 6007 + }, + { + "epoch": 0.4848680493906868, + "grad_norm": 0.7477054595947266, + "learning_rate": 0.00015956755289405088, + "loss": 2.6463, + "step": 6008 + }, + { + "epoch": 0.4849487531272698, + "grad_norm": 0.7198071479797363, + "learning_rate": 0.0001595548717182729, + "loss": 2.6537, + "step": 6009 + }, + { + "epoch": 0.4850294568638528, + "grad_norm": 0.6697781085968018, + "learning_rate": 0.00015954218905823186, + "loss": 2.7018, + "step": 6010 + }, + { + "epoch": 0.4851101606004358, + "grad_norm": 0.7577201724052429, + "learning_rate": 0.00015952950491424382, + "loss": 2.6531, + "step": 6011 + }, + { + "epoch": 0.4851908643370188, + "grad_norm": 0.6852774024009705, + "learning_rate": 0.0001595168192866249, + "loss": 2.5819, + "step": 6012 + }, + { + "epoch": 0.48527156807360183, + "grad_norm": 0.7116097807884216, + "learning_rate": 0.0001595041321756913, + "loss": 2.5691, + "step": 6013 + }, + { + "epoch": 0.4853522718101848, + "grad_norm": 0.7478477954864502, + "learning_rate": 0.00015949144358175916, + "loss": 2.6658, + "step": 6014 + }, + { + "epoch": 0.48543297554676784, + "grad_norm": 0.816969633102417, + "learning_rate": 0.0001594787535051447, + "loss": 2.6709, + "step": 6015 + }, + { + "epoch": 0.4855136792833508, + "grad_norm": 0.6953164339065552, + "learning_rate": 0.00015946606194616427, + "loss": 2.6139, + "step": 6016 + }, + { + "epoch": 0.48559438301993385, + "grad_norm": 0.6698834300041199, + "learning_rate": 0.0001594533689051341, + "loss": 2.574, + "step": 6017 + }, + { + "epoch": 0.4856750867565168, + "grad_norm": 0.7686784267425537, + "learning_rate": 0.0001594406743823706, + "loss": 2.6271, + "step": 6018 + }, + { + "epoch": 0.4857557904930998, + "grad_norm": 0.7713280916213989, + "learning_rate": 0.00015942797837819009, + "loss": 2.6682, + "step": 6019 + }, + { + "epoch": 0.48583649422968284, + "grad_norm": 0.8102596998214722, + "learning_rate": 0.00015941528089290902, + "loss": 2.6771, + "step": 6020 + }, + { + "epoch": 0.4859171979662658, + "grad_norm": 0.7140331864356995, + "learning_rate": 0.00015940258192684382, + "loss": 2.6267, + "step": 6021 + }, + { + "epoch": 0.48599790170284884, + "grad_norm": 0.7057615518569946, + "learning_rate": 0.000159389881480311, + "loss": 2.6011, + "step": 6022 + }, + { + "epoch": 0.4860786054394318, + "grad_norm": 0.7106850147247314, + "learning_rate": 0.0001593771795536271, + "loss": 2.6681, + "step": 6023 + }, + { + "epoch": 0.48615930917601485, + "grad_norm": 0.7618210315704346, + "learning_rate": 0.00015936447614710867, + "loss": 2.6545, + "step": 6024 + }, + { + "epoch": 0.48624001291259783, + "grad_norm": 0.7577608227729797, + "learning_rate": 0.00015935177126107233, + "loss": 2.6479, + "step": 6025 + }, + { + "epoch": 0.48632071664918086, + "grad_norm": 0.758745551109314, + "learning_rate": 0.00015933906489583468, + "loss": 2.7057, + "step": 6026 + }, + { + "epoch": 0.48640142038576384, + "grad_norm": 0.785906970500946, + "learning_rate": 0.00015932635705171241, + "loss": 2.7081, + "step": 6027 + }, + { + "epoch": 0.48648212412234687, + "grad_norm": 0.6744558215141296, + "learning_rate": 0.00015931364772902228, + "loss": 2.6438, + "step": 6028 + }, + { + "epoch": 0.48656282785892985, + "grad_norm": 0.7451377511024475, + "learning_rate": 0.00015930093692808099, + "loss": 2.6509, + "step": 6029 + }, + { + "epoch": 0.4866435315955129, + "grad_norm": 0.6590149402618408, + "learning_rate": 0.0001592882246492053, + "loss": 2.5683, + "step": 6030 + }, + { + "epoch": 0.48672423533209586, + "grad_norm": 0.7433840036392212, + "learning_rate": 0.0001592755108927121, + "loss": 2.6647, + "step": 6031 + }, + { + "epoch": 0.4868049390686789, + "grad_norm": 0.876806378364563, + "learning_rate": 0.00015926279565891822, + "loss": 2.6482, + "step": 6032 + }, + { + "epoch": 0.48688564280526186, + "grad_norm": 0.7495005130767822, + "learning_rate": 0.00015925007894814058, + "loss": 2.6346, + "step": 6033 + }, + { + "epoch": 0.4869663465418449, + "grad_norm": 0.7005730271339417, + "learning_rate": 0.00015923736076069604, + "loss": 2.6241, + "step": 6034 + }, + { + "epoch": 0.4870470502784279, + "grad_norm": 0.664098858833313, + "learning_rate": 0.00015922464109690166, + "loss": 2.6281, + "step": 6035 + }, + { + "epoch": 0.4871277540150109, + "grad_norm": 0.7482514977455139, + "learning_rate": 0.00015921191995707442, + "loss": 2.5764, + "step": 6036 + }, + { + "epoch": 0.4872084577515939, + "grad_norm": 0.7450351715087891, + "learning_rate": 0.0001591991973415313, + "loss": 2.6433, + "step": 6037 + }, + { + "epoch": 0.4872891614881769, + "grad_norm": 0.6738519072532654, + "learning_rate": 0.00015918647325058948, + "loss": 2.6688, + "step": 6038 + }, + { + "epoch": 0.4873698652247599, + "grad_norm": 0.7999960780143738, + "learning_rate": 0.000159173747684566, + "loss": 2.6309, + "step": 6039 + }, + { + "epoch": 0.4874505689613429, + "grad_norm": 0.7249687910079956, + "learning_rate": 0.00015916102064377806, + "loss": 2.5808, + "step": 6040 + }, + { + "epoch": 0.4875312726979259, + "grad_norm": 0.7014601826667786, + "learning_rate": 0.00015914829212854286, + "loss": 2.6646, + "step": 6041 + }, + { + "epoch": 0.48761197643450893, + "grad_norm": 0.7091174721717834, + "learning_rate": 0.00015913556213917757, + "loss": 2.6576, + "step": 6042 + }, + { + "epoch": 0.4876926801710919, + "grad_norm": 0.6949019432067871, + "learning_rate": 0.00015912283067599952, + "loss": 2.5883, + "step": 6043 + }, + { + "epoch": 0.48777338390767494, + "grad_norm": 0.6990448236465454, + "learning_rate": 0.00015911009773932598, + "loss": 2.6413, + "step": 6044 + }, + { + "epoch": 0.4878540876442579, + "grad_norm": 0.7106831073760986, + "learning_rate": 0.00015909736332947425, + "loss": 2.6122, + "step": 6045 + }, + { + "epoch": 0.48793479138084095, + "grad_norm": 0.7052395343780518, + "learning_rate": 0.00015908462744676177, + "loss": 2.572, + "step": 6046 + }, + { + "epoch": 0.4880154951174239, + "grad_norm": 0.7250158190727234, + "learning_rate": 0.00015907189009150592, + "loss": 2.6582, + "step": 6047 + }, + { + "epoch": 0.48809619885400696, + "grad_norm": 0.7213590145111084, + "learning_rate": 0.00015905915126402414, + "loss": 2.7025, + "step": 6048 + }, + { + "epoch": 0.48817690259058993, + "grad_norm": 0.7136254906654358, + "learning_rate": 0.00015904641096463394, + "loss": 2.6823, + "step": 6049 + }, + { + "epoch": 0.48825760632717297, + "grad_norm": 0.7163361310958862, + "learning_rate": 0.00015903366919365282, + "loss": 2.6642, + "step": 6050 + }, + { + "epoch": 0.48833831006375594, + "grad_norm": 0.6842724680900574, + "learning_rate": 0.00015902092595139838, + "loss": 2.6599, + "step": 6051 + }, + { + "epoch": 0.488419013800339, + "grad_norm": 0.7426519393920898, + "learning_rate": 0.0001590081812381882, + "loss": 2.6271, + "step": 6052 + }, + { + "epoch": 0.48849971753692195, + "grad_norm": 0.7415586709976196, + "learning_rate": 0.00015899543505433985, + "loss": 2.6105, + "step": 6053 + }, + { + "epoch": 0.488580421273505, + "grad_norm": 0.7286739945411682, + "learning_rate": 0.00015898268740017105, + "loss": 2.6304, + "step": 6054 + }, + { + "epoch": 0.48866112501008796, + "grad_norm": 0.6898483633995056, + "learning_rate": 0.00015896993827599947, + "loss": 2.6237, + "step": 6055 + }, + { + "epoch": 0.488741828746671, + "grad_norm": 0.7020056247711182, + "learning_rate": 0.00015895718768214293, + "loss": 2.6166, + "step": 6056 + }, + { + "epoch": 0.48882253248325397, + "grad_norm": 0.7145286798477173, + "learning_rate": 0.00015894443561891914, + "loss": 2.6729, + "step": 6057 + }, + { + "epoch": 0.488903236219837, + "grad_norm": 0.6888289451599121, + "learning_rate": 0.00015893168208664594, + "loss": 2.6154, + "step": 6058 + }, + { + "epoch": 0.48898393995642, + "grad_norm": 0.6929970383644104, + "learning_rate": 0.00015891892708564116, + "loss": 2.6748, + "step": 6059 + }, + { + "epoch": 0.489064643693003, + "grad_norm": 0.679853618144989, + "learning_rate": 0.0001589061706162227, + "loss": 2.605, + "step": 6060 + }, + { + "epoch": 0.489145347429586, + "grad_norm": 0.71812504529953, + "learning_rate": 0.0001588934126787085, + "loss": 2.7249, + "step": 6061 + }, + { + "epoch": 0.489226051166169, + "grad_norm": 0.7083466053009033, + "learning_rate": 0.00015888065327341648, + "loss": 2.5986, + "step": 6062 + }, + { + "epoch": 0.489306754902752, + "grad_norm": 0.7476792931556702, + "learning_rate": 0.00015886789240066466, + "loss": 2.5942, + "step": 6063 + }, + { + "epoch": 0.489387458639335, + "grad_norm": 0.7197855114936829, + "learning_rate": 0.00015885513006077114, + "loss": 2.6198, + "step": 6064 + }, + { + "epoch": 0.489468162375918, + "grad_norm": 0.6678233742713928, + "learning_rate": 0.00015884236625405385, + "loss": 2.5793, + "step": 6065 + }, + { + "epoch": 0.48954886611250104, + "grad_norm": 0.7371037602424622, + "learning_rate": 0.00015882960098083105, + "loss": 2.6231, + "step": 6066 + }, + { + "epoch": 0.489629569849084, + "grad_norm": 0.7087417244911194, + "learning_rate": 0.00015881683424142078, + "loss": 2.6483, + "step": 6067 + }, + { + "epoch": 0.48971027358566704, + "grad_norm": 0.7300292253494263, + "learning_rate": 0.00015880406603614126, + "loss": 2.6778, + "step": 6068 + }, + { + "epoch": 0.48979097732225, + "grad_norm": 0.8347866535186768, + "learning_rate": 0.0001587912963653107, + "loss": 2.554, + "step": 6069 + }, + { + "epoch": 0.489871681058833, + "grad_norm": 0.7717794179916382, + "learning_rate": 0.00015877852522924732, + "loss": 2.6904, + "step": 6070 + }, + { + "epoch": 0.48995238479541603, + "grad_norm": 0.6960952281951904, + "learning_rate": 0.00015876575262826944, + "loss": 2.6059, + "step": 6071 + }, + { + "epoch": 0.490033088531999, + "grad_norm": 0.7316592931747437, + "learning_rate": 0.00015875297856269543, + "loss": 2.6685, + "step": 6072 + }, + { + "epoch": 0.49011379226858204, + "grad_norm": 0.6775457859039307, + "learning_rate": 0.00015874020303284362, + "loss": 2.6232, + "step": 6073 + }, + { + "epoch": 0.490194496005165, + "grad_norm": 0.7741925120353699, + "learning_rate": 0.00015872742603903237, + "loss": 2.6767, + "step": 6074 + }, + { + "epoch": 0.49027519974174805, + "grad_norm": 0.857490599155426, + "learning_rate": 0.00015871464758158017, + "loss": 2.6649, + "step": 6075 + }, + { + "epoch": 0.490355903478331, + "grad_norm": 0.7474274039268494, + "learning_rate": 0.00015870186766080545, + "loss": 2.6926, + "step": 6076 + }, + { + "epoch": 0.49043660721491406, + "grad_norm": 0.7266567945480347, + "learning_rate": 0.00015868908627702675, + "loss": 2.5919, + "step": 6077 + }, + { + "epoch": 0.49051731095149703, + "grad_norm": 0.7247830629348755, + "learning_rate": 0.0001586763034305626, + "loss": 2.6158, + "step": 6078 + }, + { + "epoch": 0.49059801468808006, + "grad_norm": 0.7654951214790344, + "learning_rate": 0.00015866351912173157, + "loss": 2.7236, + "step": 6079 + }, + { + "epoch": 0.49067871842466304, + "grad_norm": 0.732431948184967, + "learning_rate": 0.00015865073335085236, + "loss": 2.6349, + "step": 6080 + }, + { + "epoch": 0.4907594221612461, + "grad_norm": 0.7240673303604126, + "learning_rate": 0.0001586379461182435, + "loss": 2.6282, + "step": 6081 + }, + { + "epoch": 0.49084012589782905, + "grad_norm": 0.767473042011261, + "learning_rate": 0.00015862515742422374, + "loss": 2.6939, + "step": 6082 + }, + { + "epoch": 0.4909208296344121, + "grad_norm": 0.6977359056472778, + "learning_rate": 0.00015861236726911183, + "loss": 2.6591, + "step": 6083 + }, + { + "epoch": 0.49100153337099506, + "grad_norm": 0.7676639556884766, + "learning_rate": 0.00015859957565322655, + "loss": 2.6189, + "step": 6084 + }, + { + "epoch": 0.4910822371075781, + "grad_norm": 0.7157976031303406, + "learning_rate": 0.0001585867825768866, + "loss": 2.644, + "step": 6085 + }, + { + "epoch": 0.49116294084416107, + "grad_norm": 0.7080803513526917, + "learning_rate": 0.0001585739880404109, + "loss": 2.6099, + "step": 6086 + }, + { + "epoch": 0.4912436445807441, + "grad_norm": 0.7109760046005249, + "learning_rate": 0.0001585611920441183, + "loss": 2.7087, + "step": 6087 + }, + { + "epoch": 0.4913243483173271, + "grad_norm": 0.7274255156517029, + "learning_rate": 0.00015854839458832772, + "loss": 2.6394, + "step": 6088 + }, + { + "epoch": 0.4914050520539101, + "grad_norm": 0.7407883405685425, + "learning_rate": 0.00015853559567335812, + "loss": 2.6729, + "step": 6089 + }, + { + "epoch": 0.4914857557904931, + "grad_norm": 0.6879885196685791, + "learning_rate": 0.00015852279529952843, + "loss": 2.5971, + "step": 6090 + }, + { + "epoch": 0.4915664595270761, + "grad_norm": 0.7678415179252625, + "learning_rate": 0.00015850999346715772, + "loss": 2.6606, + "step": 6091 + }, + { + "epoch": 0.4916471632636591, + "grad_norm": 0.7108608484268188, + "learning_rate": 0.00015849719017656504, + "loss": 2.6494, + "step": 6092 + }, + { + "epoch": 0.4917278670002421, + "grad_norm": 0.7238833904266357, + "learning_rate": 0.00015848438542806945, + "loss": 2.6742, + "step": 6093 + }, + { + "epoch": 0.4918085707368251, + "grad_norm": 0.7316902279853821, + "learning_rate": 0.0001584715792219901, + "loss": 2.6757, + "step": 6094 + }, + { + "epoch": 0.49188927447340813, + "grad_norm": 0.7339446544647217, + "learning_rate": 0.00015845877155864612, + "loss": 2.607, + "step": 6095 + }, + { + "epoch": 0.4919699782099911, + "grad_norm": 0.6931337714195251, + "learning_rate": 0.0001584459624383568, + "loss": 2.6203, + "step": 6096 + }, + { + "epoch": 0.49205068194657414, + "grad_norm": 0.734229326248169, + "learning_rate": 0.00015843315186144126, + "loss": 2.646, + "step": 6097 + }, + { + "epoch": 0.4921313856831571, + "grad_norm": 0.7764919400215149, + "learning_rate": 0.00015842033982821883, + "loss": 2.6698, + "step": 6098 + }, + { + "epoch": 0.49221208941974015, + "grad_norm": 0.7707986235618591, + "learning_rate": 0.00015840752633900887, + "loss": 2.6995, + "step": 6099 + }, + { + "epoch": 0.4922927931563231, + "grad_norm": 0.7321949601173401, + "learning_rate": 0.00015839471139413066, + "loss": 2.6517, + "step": 6100 + }, + { + "epoch": 0.49237349689290616, + "grad_norm": 0.7087488770484924, + "learning_rate": 0.00015838189499390353, + "loss": 2.6153, + "step": 6101 + }, + { + "epoch": 0.49245420062948914, + "grad_norm": 0.7300730347633362, + "learning_rate": 0.00015836907713864706, + "loss": 2.5868, + "step": 6102 + }, + { + "epoch": 0.49253490436607217, + "grad_norm": 0.8476536273956299, + "learning_rate": 0.00015835625782868054, + "loss": 2.7158, + "step": 6103 + }, + { + "epoch": 0.49261560810265514, + "grad_norm": 0.8062012791633606, + "learning_rate": 0.0001583434370643236, + "loss": 2.6896, + "step": 6104 + }, + { + "epoch": 0.4926963118392382, + "grad_norm": 0.7336686849594116, + "learning_rate": 0.00015833061484589562, + "loss": 2.6416, + "step": 6105 + }, + { + "epoch": 0.49277701557582115, + "grad_norm": 0.6976929306983948, + "learning_rate": 0.00015831779117371627, + "loss": 2.6279, + "step": 6106 + }, + { + "epoch": 0.4928577193124042, + "grad_norm": 0.7262609601020813, + "learning_rate": 0.00015830496604810513, + "loss": 2.6144, + "step": 6107 + }, + { + "epoch": 0.49293842304898716, + "grad_norm": 0.7274572253227234, + "learning_rate": 0.00015829213946938183, + "loss": 2.7409, + "step": 6108 + }, + { + "epoch": 0.4930191267855702, + "grad_norm": 0.7438454031944275, + "learning_rate": 0.000158279311437866, + "loss": 2.5928, + "step": 6109 + }, + { + "epoch": 0.49309983052215317, + "grad_norm": 0.6885421872138977, + "learning_rate": 0.00015826648195387742, + "loss": 2.6659, + "step": 6110 + }, + { + "epoch": 0.4931805342587362, + "grad_norm": 0.6781450510025024, + "learning_rate": 0.0001582536510177358, + "loss": 2.6068, + "step": 6111 + }, + { + "epoch": 0.4932612379953192, + "grad_norm": 0.7618128657341003, + "learning_rate": 0.0001582408186297609, + "loss": 2.6705, + "step": 6112 + }, + { + "epoch": 0.4933419417319022, + "grad_norm": 0.7011203765869141, + "learning_rate": 0.00015822798479027256, + "loss": 2.596, + "step": 6113 + }, + { + "epoch": 0.4934226454684852, + "grad_norm": 0.7727806568145752, + "learning_rate": 0.00015821514949959065, + "loss": 2.6458, + "step": 6114 + }, + { + "epoch": 0.4935033492050682, + "grad_norm": 0.7318129539489746, + "learning_rate": 0.00015820231275803502, + "loss": 2.6009, + "step": 6115 + }, + { + "epoch": 0.4935840529416512, + "grad_norm": 0.6836227178573608, + "learning_rate": 0.00015818947456592563, + "loss": 2.6311, + "step": 6116 + }, + { + "epoch": 0.49366475667823423, + "grad_norm": 0.7657275199890137, + "learning_rate": 0.0001581766349235824, + "loss": 2.6079, + "step": 6117 + }, + { + "epoch": 0.4937454604148172, + "grad_norm": 0.74736487865448, + "learning_rate": 0.0001581637938313254, + "loss": 2.6752, + "step": 6118 + }, + { + "epoch": 0.49382616415140024, + "grad_norm": 0.716708242893219, + "learning_rate": 0.00015815095128947454, + "loss": 2.5896, + "step": 6119 + }, + { + "epoch": 0.4939068678879832, + "grad_norm": 0.740727424621582, + "learning_rate": 0.00015813810729835002, + "loss": 2.6528, + "step": 6120 + }, + { + "epoch": 0.4939875716245662, + "grad_norm": 0.6746687293052673, + "learning_rate": 0.0001581252618582719, + "loss": 2.6438, + "step": 6121 + }, + { + "epoch": 0.4940682753611492, + "grad_norm": 0.7547900080680847, + "learning_rate": 0.00015811241496956028, + "loss": 2.631, + "step": 6122 + }, + { + "epoch": 0.4941489790977322, + "grad_norm": 0.7500903606414795, + "learning_rate": 0.0001580995666325354, + "loss": 2.7039, + "step": 6123 + }, + { + "epoch": 0.49422968283431523, + "grad_norm": 0.7692849636077881, + "learning_rate": 0.00015808671684751743, + "loss": 2.5922, + "step": 6124 + }, + { + "epoch": 0.4943103865708982, + "grad_norm": 0.6964236497879028, + "learning_rate": 0.00015807386561482662, + "loss": 2.6239, + "step": 6125 + }, + { + "epoch": 0.49439109030748124, + "grad_norm": 0.7094165086746216, + "learning_rate": 0.0001580610129347833, + "loss": 2.6239, + "step": 6126 + }, + { + "epoch": 0.4944717940440642, + "grad_norm": 0.7579131126403809, + "learning_rate": 0.00015804815880770775, + "loss": 2.6654, + "step": 6127 + }, + { + "epoch": 0.49455249778064725, + "grad_norm": 0.7687693238258362, + "learning_rate": 0.00015803530323392034, + "loss": 2.6557, + "step": 6128 + }, + { + "epoch": 0.4946332015172302, + "grad_norm": 0.6913540363311768, + "learning_rate": 0.0001580224462137415, + "loss": 2.6299, + "step": 6129 + }, + { + "epoch": 0.49471390525381326, + "grad_norm": 0.7574129700660706, + "learning_rate": 0.0001580095877474916, + "loss": 2.6327, + "step": 6130 + }, + { + "epoch": 0.49479460899039623, + "grad_norm": 0.6834598183631897, + "learning_rate": 0.0001579967278354911, + "loss": 2.6402, + "step": 6131 + }, + { + "epoch": 0.49487531272697927, + "grad_norm": 0.7872750163078308, + "learning_rate": 0.00015798386647806057, + "loss": 2.6647, + "step": 6132 + }, + { + "epoch": 0.49495601646356224, + "grad_norm": 0.705211341381073, + "learning_rate": 0.00015797100367552055, + "loss": 2.6288, + "step": 6133 + }, + { + "epoch": 0.4950367202001453, + "grad_norm": 0.7302640080451965, + "learning_rate": 0.00015795813942819155, + "loss": 2.6683, + "step": 6134 + }, + { + "epoch": 0.49511742393672825, + "grad_norm": 0.7522360682487488, + "learning_rate": 0.0001579452737363942, + "loss": 2.5885, + "step": 6135 + }, + { + "epoch": 0.4951981276733113, + "grad_norm": 0.657376229763031, + "learning_rate": 0.0001579324066004492, + "loss": 2.5775, + "step": 6136 + }, + { + "epoch": 0.49527883140989426, + "grad_norm": 0.7539556622505188, + "learning_rate": 0.00015791953802067715, + "loss": 2.6236, + "step": 6137 + }, + { + "epoch": 0.4953595351464773, + "grad_norm": 0.7090374827384949, + "learning_rate": 0.00015790666799739883, + "loss": 2.5845, + "step": 6138 + }, + { + "epoch": 0.49544023888306027, + "grad_norm": 0.6883948445320129, + "learning_rate": 0.00015789379653093497, + "loss": 2.6621, + "step": 6139 + }, + { + "epoch": 0.4955209426196433, + "grad_norm": 0.7466424107551575, + "learning_rate": 0.00015788092362160633, + "loss": 2.6289, + "step": 6140 + }, + { + "epoch": 0.4956016463562263, + "grad_norm": 0.7424437403678894, + "learning_rate": 0.00015786804926973383, + "loss": 2.6405, + "step": 6141 + }, + { + "epoch": 0.4956823500928093, + "grad_norm": 0.7227851748466492, + "learning_rate": 0.00015785517347563822, + "loss": 2.6537, + "step": 6142 + }, + { + "epoch": 0.4957630538293923, + "grad_norm": 0.7548653483390808, + "learning_rate": 0.00015784229623964048, + "loss": 2.7377, + "step": 6143 + }, + { + "epoch": 0.4958437575659753, + "grad_norm": 0.7086976170539856, + "learning_rate": 0.00015782941756206152, + "loss": 2.6194, + "step": 6144 + }, + { + "epoch": 0.4959244613025583, + "grad_norm": 0.6605533957481384, + "learning_rate": 0.0001578165374432223, + "loss": 2.6265, + "step": 6145 + }, + { + "epoch": 0.4960051650391413, + "grad_norm": 0.7187899947166443, + "learning_rate": 0.00015780365588344384, + "loss": 2.5639, + "step": 6146 + }, + { + "epoch": 0.4960858687757243, + "grad_norm": 0.7014074921607971, + "learning_rate": 0.00015779077288304716, + "loss": 2.6011, + "step": 6147 + }, + { + "epoch": 0.49616657251230734, + "grad_norm": 0.7463840842247009, + "learning_rate": 0.00015777788844235335, + "loss": 2.6059, + "step": 6148 + }, + { + "epoch": 0.4962472762488903, + "grad_norm": 0.8022417426109314, + "learning_rate": 0.00015776500256168356, + "loss": 2.6011, + "step": 6149 + }, + { + "epoch": 0.49632797998547334, + "grad_norm": 0.7140083909034729, + "learning_rate": 0.0001577521152413589, + "loss": 2.6891, + "step": 6150 + }, + { + "epoch": 0.4964086837220563, + "grad_norm": 0.7266198992729187, + "learning_rate": 0.00015773922648170053, + "loss": 2.6561, + "step": 6151 + }, + { + "epoch": 0.49648938745863935, + "grad_norm": 0.7241406440734863, + "learning_rate": 0.0001577263362830297, + "loss": 2.6835, + "step": 6152 + }, + { + "epoch": 0.49657009119522233, + "grad_norm": 0.7422344088554382, + "learning_rate": 0.0001577134446456677, + "loss": 2.6039, + "step": 6153 + }, + { + "epoch": 0.49665079493180536, + "grad_norm": 0.8764764666557312, + "learning_rate": 0.0001577005515699358, + "loss": 2.68, + "step": 6154 + }, + { + "epoch": 0.49673149866838834, + "grad_norm": 0.7224323749542236, + "learning_rate": 0.0001576876570561553, + "loss": 2.5824, + "step": 6155 + }, + { + "epoch": 0.49681220240497137, + "grad_norm": 0.7601075172424316, + "learning_rate": 0.00015767476110464758, + "loss": 2.7124, + "step": 6156 + }, + { + "epoch": 0.49689290614155435, + "grad_norm": 0.7425428628921509, + "learning_rate": 0.0001576618637157341, + "loss": 2.5913, + "step": 6157 + }, + { + "epoch": 0.4969736098781374, + "grad_norm": 0.721969723701477, + "learning_rate": 0.0001576489648897362, + "loss": 2.6482, + "step": 6158 + }, + { + "epoch": 0.49705431361472036, + "grad_norm": 0.8142126798629761, + "learning_rate": 0.00015763606462697544, + "loss": 2.6231, + "step": 6159 + }, + { + "epoch": 0.4971350173513034, + "grad_norm": 0.6636359691619873, + "learning_rate": 0.00015762316292777326, + "loss": 2.6388, + "step": 6160 + }, + { + "epoch": 0.49721572108788636, + "grad_norm": 0.7093132734298706, + "learning_rate": 0.00015761025979245123, + "loss": 2.6562, + "step": 6161 + }, + { + "epoch": 0.4972964248244694, + "grad_norm": 0.7130851745605469, + "learning_rate": 0.00015759735522133094, + "loss": 2.6856, + "step": 6162 + }, + { + "epoch": 0.4973771285610524, + "grad_norm": 0.7303292155265808, + "learning_rate": 0.000157584449214734, + "loss": 2.6077, + "step": 6163 + }, + { + "epoch": 0.4974578322976354, + "grad_norm": 0.6742258071899414, + "learning_rate": 0.00015757154177298204, + "loss": 2.6644, + "step": 6164 + }, + { + "epoch": 0.4975385360342184, + "grad_norm": 0.6882894039154053, + "learning_rate": 0.00015755863289639677, + "loss": 2.6462, + "step": 6165 + }, + { + "epoch": 0.4976192397708014, + "grad_norm": 0.7882276773452759, + "learning_rate": 0.00015754572258529993, + "loss": 2.6509, + "step": 6166 + }, + { + "epoch": 0.4976999435073844, + "grad_norm": 0.7163859009742737, + "learning_rate": 0.00015753281084001324, + "loss": 2.627, + "step": 6167 + }, + { + "epoch": 0.4977806472439674, + "grad_norm": 0.7194411158561707, + "learning_rate": 0.0001575198976608585, + "loss": 2.6798, + "step": 6168 + }, + { + "epoch": 0.4978613509805504, + "grad_norm": 0.7233198881149292, + "learning_rate": 0.0001575069830481576, + "loss": 2.6616, + "step": 6169 + }, + { + "epoch": 0.49794205471713343, + "grad_norm": 0.7246997952461243, + "learning_rate": 0.00015749406700223231, + "loss": 2.6262, + "step": 6170 + }, + { + "epoch": 0.4980227584537164, + "grad_norm": 0.7509368658065796, + "learning_rate": 0.00015748114952340457, + "loss": 2.6148, + "step": 6171 + }, + { + "epoch": 0.4981034621902994, + "grad_norm": 0.7079075574874878, + "learning_rate": 0.00015746823061199637, + "loss": 2.6712, + "step": 6172 + }, + { + "epoch": 0.4981841659268824, + "grad_norm": 0.6821560859680176, + "learning_rate": 0.0001574553102683296, + "loss": 2.6253, + "step": 6173 + }, + { + "epoch": 0.4982648696634654, + "grad_norm": 0.7623000741004944, + "learning_rate": 0.00015744238849272634, + "loss": 2.6252, + "step": 6174 + }, + { + "epoch": 0.4983455734000484, + "grad_norm": 0.709434449672699, + "learning_rate": 0.00015742946528550858, + "loss": 2.555, + "step": 6175 + }, + { + "epoch": 0.4984262771366314, + "grad_norm": 0.7277799844741821, + "learning_rate": 0.00015741654064699846, + "loss": 2.6551, + "step": 6176 + }, + { + "epoch": 0.49850698087321443, + "grad_norm": 0.7208690643310547, + "learning_rate": 0.00015740361457751802, + "loss": 2.6747, + "step": 6177 + }, + { + "epoch": 0.4985876846097974, + "grad_norm": 0.8458136916160583, + "learning_rate": 0.00015739068707738946, + "loss": 2.6551, + "step": 6178 + }, + { + "epoch": 0.49866838834638044, + "grad_norm": 0.7718539834022522, + "learning_rate": 0.00015737775814693498, + "loss": 2.6246, + "step": 6179 + }, + { + "epoch": 0.4987490920829634, + "grad_norm": 0.6982735395431519, + "learning_rate": 0.00015736482778647674, + "loss": 2.5726, + "step": 6180 + }, + { + "epoch": 0.49882979581954645, + "grad_norm": 0.6759411692619324, + "learning_rate": 0.00015735189599633707, + "loss": 2.6603, + "step": 6181 + }, + { + "epoch": 0.4989104995561294, + "grad_norm": 0.7016656994819641, + "learning_rate": 0.0001573389627768382, + "loss": 2.6045, + "step": 6182 + }, + { + "epoch": 0.49899120329271246, + "grad_norm": 0.7170618176460266, + "learning_rate": 0.00015732602812830253, + "loss": 2.6419, + "step": 6183 + }, + { + "epoch": 0.49907190702929544, + "grad_norm": 0.6963300704956055, + "learning_rate": 0.00015731309205105237, + "loss": 2.6377, + "step": 6184 + }, + { + "epoch": 0.49915261076587847, + "grad_norm": 0.7437995672225952, + "learning_rate": 0.00015730015454541014, + "loss": 2.7013, + "step": 6185 + }, + { + "epoch": 0.49923331450246144, + "grad_norm": 0.6846518516540527, + "learning_rate": 0.00015728721561169827, + "loss": 2.5526, + "step": 6186 + }, + { + "epoch": 0.4993140182390445, + "grad_norm": 0.7343618273735046, + "learning_rate": 0.00015727427525023924, + "loss": 2.6567, + "step": 6187 + }, + { + "epoch": 0.49939472197562745, + "grad_norm": 0.6947566270828247, + "learning_rate": 0.00015726133346135554, + "loss": 2.6642, + "step": 6188 + }, + { + "epoch": 0.4994754257122105, + "grad_norm": 0.7402610778808594, + "learning_rate": 0.00015724839024536976, + "loss": 2.6964, + "step": 6189 + }, + { + "epoch": 0.49955612944879346, + "grad_norm": 0.7318306565284729, + "learning_rate": 0.00015723544560260444, + "loss": 2.5864, + "step": 6190 + }, + { + "epoch": 0.4996368331853765, + "grad_norm": 0.752216100692749, + "learning_rate": 0.00015722249953338215, + "loss": 2.6357, + "step": 6191 + }, + { + "epoch": 0.49971753692195947, + "grad_norm": 0.70283442735672, + "learning_rate": 0.00015720955203802565, + "loss": 2.5892, + "step": 6192 + }, + { + "epoch": 0.4997982406585425, + "grad_norm": 0.7457823753356934, + "learning_rate": 0.00015719660311685755, + "loss": 2.6663, + "step": 6193 + }, + { + "epoch": 0.4998789443951255, + "grad_norm": 0.7296229600906372, + "learning_rate": 0.00015718365277020058, + "loss": 2.6238, + "step": 6194 + }, + { + "epoch": 0.4999596481317085, + "grad_norm": 0.6963346004486084, + "learning_rate": 0.0001571707009983775, + "loss": 2.6303, + "step": 6195 + }, + { + "epoch": 0.5000403518682915, + "grad_norm": 0.7074694633483887, + "learning_rate": 0.0001571577478017111, + "loss": 2.6077, + "step": 6196 + }, + { + "epoch": 0.5001210556048745, + "grad_norm": 0.7826260328292847, + "learning_rate": 0.00015714479318052423, + "loss": 2.6668, + "step": 6197 + }, + { + "epoch": 0.5002017593414575, + "grad_norm": 0.6908758282661438, + "learning_rate": 0.00015713183713513974, + "loss": 2.6195, + "step": 6198 + }, + { + "epoch": 0.5002824630780405, + "grad_norm": 0.7571602463722229, + "learning_rate": 0.0001571188796658805, + "loss": 2.6546, + "step": 6199 + }, + { + "epoch": 0.5003631668146236, + "grad_norm": 0.7359431385993958, + "learning_rate": 0.0001571059207730695, + "loss": 2.5792, + "step": 6200 + }, + { + "epoch": 0.5004438705512065, + "grad_norm": 0.6886340379714966, + "learning_rate": 0.00015709296045702967, + "loss": 2.6099, + "step": 6201 + }, + { + "epoch": 0.5005245742877895, + "grad_norm": 0.6900473833084106, + "learning_rate": 0.000157079998718084, + "loss": 2.6461, + "step": 6202 + }, + { + "epoch": 0.5006052780243725, + "grad_norm": 0.66212397813797, + "learning_rate": 0.00015706703555655555, + "loss": 2.6178, + "step": 6203 + }, + { + "epoch": 0.5006859817609556, + "grad_norm": 0.7666565179824829, + "learning_rate": 0.00015705407097276744, + "loss": 2.7097, + "step": 6204 + }, + { + "epoch": 0.5007666854975386, + "grad_norm": 0.7294591069221497, + "learning_rate": 0.0001570411049670427, + "loss": 2.5995, + "step": 6205 + }, + { + "epoch": 0.5008473892341215, + "grad_norm": 0.7279765009880066, + "learning_rate": 0.00015702813753970453, + "loss": 2.5554, + "step": 6206 + }, + { + "epoch": 0.5009280929707045, + "grad_norm": 0.7174742817878723, + "learning_rate": 0.0001570151686910761, + "loss": 2.6523, + "step": 6207 + }, + { + "epoch": 0.5010087967072876, + "grad_norm": 0.67017662525177, + "learning_rate": 0.00015700219842148063, + "loss": 2.5613, + "step": 6208 + }, + { + "epoch": 0.5010895004438706, + "grad_norm": 0.7000258564949036, + "learning_rate": 0.00015698922673124138, + "loss": 2.5658, + "step": 6209 + }, + { + "epoch": 0.5011702041804535, + "grad_norm": 0.6894544363021851, + "learning_rate": 0.00015697625362068164, + "loss": 2.6925, + "step": 6210 + }, + { + "epoch": 0.5012509079170365, + "grad_norm": 0.6742957234382629, + "learning_rate": 0.00015696327909012466, + "loss": 2.6429, + "step": 6211 + }, + { + "epoch": 0.5013316116536196, + "grad_norm": 0.7039656639099121, + "learning_rate": 0.0001569503031398939, + "loss": 2.6313, + "step": 6212 + }, + { + "epoch": 0.5014123153902026, + "grad_norm": 0.720003604888916, + "learning_rate": 0.00015693732577031272, + "loss": 2.6207, + "step": 6213 + }, + { + "epoch": 0.5014930191267856, + "grad_norm": 0.8611499071121216, + "learning_rate": 0.00015692434698170456, + "loss": 2.6855, + "step": 6214 + }, + { + "epoch": 0.5015737228633685, + "grad_norm": 0.6664702296257019, + "learning_rate": 0.00015691136677439284, + "loss": 2.6174, + "step": 6215 + }, + { + "epoch": 0.5016544265999516, + "grad_norm": 0.7258509993553162, + "learning_rate": 0.00015689838514870111, + "loss": 2.6558, + "step": 6216 + }, + { + "epoch": 0.5017351303365346, + "grad_norm": 0.6972211599349976, + "learning_rate": 0.0001568854021049529, + "loss": 2.5913, + "step": 6217 + }, + { + "epoch": 0.5018158340731176, + "grad_norm": 0.7927280068397522, + "learning_rate": 0.00015687241764347177, + "loss": 2.6466, + "step": 6218 + }, + { + "epoch": 0.5018965378097006, + "grad_norm": 0.7044646143913269, + "learning_rate": 0.00015685943176458128, + "loss": 2.6195, + "step": 6219 + }, + { + "epoch": 0.5019772415462836, + "grad_norm": 0.6935598254203796, + "learning_rate": 0.00015684644446860516, + "loss": 2.6486, + "step": 6220 + }, + { + "epoch": 0.5020579452828666, + "grad_norm": 0.7965792417526245, + "learning_rate": 0.00015683345575586704, + "loss": 2.6265, + "step": 6221 + }, + { + "epoch": 0.5021386490194496, + "grad_norm": 0.727053701877594, + "learning_rate": 0.00015682046562669064, + "loss": 2.6714, + "step": 6222 + }, + { + "epoch": 0.5022193527560326, + "grad_norm": 0.7919184565544128, + "learning_rate": 0.0001568074740813997, + "loss": 2.7115, + "step": 6223 + }, + { + "epoch": 0.5023000564926156, + "grad_norm": 0.7724714279174805, + "learning_rate": 0.00015679448112031801, + "loss": 2.6636, + "step": 6224 + }, + { + "epoch": 0.5023807602291986, + "grad_norm": 0.6893701553344727, + "learning_rate": 0.0001567814867437694, + "loss": 2.6562, + "step": 6225 + }, + { + "epoch": 0.5024614639657816, + "grad_norm": 0.7089633345603943, + "learning_rate": 0.00015676849095207769, + "loss": 2.6125, + "step": 6226 + }, + { + "epoch": 0.5025421677023646, + "grad_norm": 0.7620012760162354, + "learning_rate": 0.00015675549374556682, + "loss": 2.6935, + "step": 6227 + }, + { + "epoch": 0.5026228714389476, + "grad_norm": 0.7293741703033447, + "learning_rate": 0.00015674249512456065, + "loss": 2.66, + "step": 6228 + }, + { + "epoch": 0.5027035751755307, + "grad_norm": 0.7366519570350647, + "learning_rate": 0.00015672949508938318, + "loss": 2.5968, + "step": 6229 + }, + { + "epoch": 0.5027842789121136, + "grad_norm": 0.6646310091018677, + "learning_rate": 0.00015671649364035846, + "loss": 2.5751, + "step": 6230 + }, + { + "epoch": 0.5028649826486966, + "grad_norm": 0.6682632565498352, + "learning_rate": 0.00015670349077781038, + "loss": 2.5902, + "step": 6231 + }, + { + "epoch": 0.5029456863852796, + "grad_norm": 0.7327528595924377, + "learning_rate": 0.00015669048650206313, + "loss": 2.6487, + "step": 6232 + }, + { + "epoch": 0.5030263901218627, + "grad_norm": 0.7114281058311462, + "learning_rate": 0.00015667748081344074, + "loss": 2.5779, + "step": 6233 + }, + { + "epoch": 0.5031070938584457, + "grad_norm": 0.7908105850219727, + "learning_rate": 0.00015666447371226737, + "loss": 2.6099, + "step": 6234 + }, + { + "epoch": 0.5031877975950286, + "grad_norm": 0.7823575139045715, + "learning_rate": 0.00015665146519886725, + "loss": 2.6339, + "step": 6235 + }, + { + "epoch": 0.5032685013316116, + "grad_norm": 0.7404836416244507, + "learning_rate": 0.00015663845527356447, + "loss": 2.6035, + "step": 6236 + }, + { + "epoch": 0.5033492050681947, + "grad_norm": 0.7448995113372803, + "learning_rate": 0.00015662544393668334, + "loss": 2.6566, + "step": 6237 + }, + { + "epoch": 0.5034299088047777, + "grad_norm": 0.7209747433662415, + "learning_rate": 0.00015661243118854815, + "loss": 2.682, + "step": 6238 + }, + { + "epoch": 0.5035106125413606, + "grad_norm": 0.691759467124939, + "learning_rate": 0.00015659941702948315, + "loss": 2.6435, + "step": 6239 + }, + { + "epoch": 0.5035913162779436, + "grad_norm": 0.7646063566207886, + "learning_rate": 0.00015658640145981275, + "loss": 2.591, + "step": 6240 + }, + { + "epoch": 0.5036720200145267, + "grad_norm": 0.8319387435913086, + "learning_rate": 0.00015657338447986133, + "loss": 2.5937, + "step": 6241 + }, + { + "epoch": 0.5037527237511097, + "grad_norm": 0.729193389415741, + "learning_rate": 0.00015656036608995323, + "loss": 2.651, + "step": 6242 + }, + { + "epoch": 0.5038334274876927, + "grad_norm": 0.720098614692688, + "learning_rate": 0.000156547346290413, + "loss": 2.681, + "step": 6243 + }, + { + "epoch": 0.5039141312242756, + "grad_norm": 0.7172541618347168, + "learning_rate": 0.00015653432508156508, + "loss": 2.5906, + "step": 6244 + }, + { + "epoch": 0.5039948349608587, + "grad_norm": 0.7352481484413147, + "learning_rate": 0.00015652130246373398, + "loss": 2.6376, + "step": 6245 + }, + { + "epoch": 0.5040755386974417, + "grad_norm": 0.6664925813674927, + "learning_rate": 0.0001565082784372443, + "loss": 2.706, + "step": 6246 + }, + { + "epoch": 0.5041562424340247, + "grad_norm": 0.7292987704277039, + "learning_rate": 0.0001564952530024206, + "loss": 2.6149, + "step": 6247 + }, + { + "epoch": 0.5042369461706077, + "grad_norm": 0.6904531121253967, + "learning_rate": 0.00015648222615958747, + "loss": 2.579, + "step": 6248 + }, + { + "epoch": 0.5043176499071907, + "grad_norm": 0.7385311722755432, + "learning_rate": 0.00015646919790906965, + "loss": 2.6137, + "step": 6249 + }, + { + "epoch": 0.5043983536437737, + "grad_norm": 0.7869507074356079, + "learning_rate": 0.0001564561682511918, + "loss": 2.6831, + "step": 6250 + }, + { + "epoch": 0.5044790573803567, + "grad_norm": 0.723680317401886, + "learning_rate": 0.00015644313718627867, + "loss": 2.6083, + "step": 6251 + }, + { + "epoch": 0.5045597611169397, + "grad_norm": 0.7029969692230225, + "learning_rate": 0.00015643010471465502, + "loss": 2.6462, + "step": 6252 + }, + { + "epoch": 0.5046404648535228, + "grad_norm": 0.818975031375885, + "learning_rate": 0.00015641707083664566, + "loss": 2.6393, + "step": 6253 + }, + { + "epoch": 0.5047211685901057, + "grad_norm": 0.7237667441368103, + "learning_rate": 0.0001564040355525754, + "loss": 2.5995, + "step": 6254 + }, + { + "epoch": 0.5048018723266887, + "grad_norm": 0.8613824248313904, + "learning_rate": 0.00015639099886276912, + "loss": 2.748, + "step": 6255 + }, + { + "epoch": 0.5048825760632717, + "grad_norm": 0.6802194118499756, + "learning_rate": 0.00015637796076755178, + "loss": 2.6393, + "step": 6256 + }, + { + "epoch": 0.5049632797998548, + "grad_norm": 0.7816255688667297, + "learning_rate": 0.00015636492126724823, + "loss": 2.6218, + "step": 6257 + }, + { + "epoch": 0.5050439835364378, + "grad_norm": 0.7443990707397461, + "learning_rate": 0.00015635188036218356, + "loss": 2.6181, + "step": 6258 + }, + { + "epoch": 0.5051246872730207, + "grad_norm": 0.7869458794593811, + "learning_rate": 0.0001563388380526827, + "loss": 2.6641, + "step": 6259 + }, + { + "epoch": 0.5052053910096037, + "grad_norm": 0.7423158288002014, + "learning_rate": 0.00015632579433907072, + "loss": 2.5849, + "step": 6260 + }, + { + "epoch": 0.5052860947461868, + "grad_norm": 0.7888280153274536, + "learning_rate": 0.00015631274922167272, + "loss": 2.7095, + "step": 6261 + }, + { + "epoch": 0.5053667984827698, + "grad_norm": 0.7053405046463013, + "learning_rate": 0.0001562997027008138, + "loss": 2.5747, + "step": 6262 + }, + { + "epoch": 0.5054475022193528, + "grad_norm": 0.7930825352668762, + "learning_rate": 0.0001562866547768191, + "loss": 2.6359, + "step": 6263 + }, + { + "epoch": 0.5055282059559357, + "grad_norm": 0.7431469559669495, + "learning_rate": 0.0001562736054500139, + "loss": 2.6167, + "step": 6264 + }, + { + "epoch": 0.5056089096925188, + "grad_norm": 0.8395694494247437, + "learning_rate": 0.00015626055472072324, + "loss": 2.7217, + "step": 6265 + }, + { + "epoch": 0.5056896134291018, + "grad_norm": 0.7318898439407349, + "learning_rate": 0.0001562475025892726, + "loss": 2.6866, + "step": 6266 + }, + { + "epoch": 0.5057703171656848, + "grad_norm": 0.7487025856971741, + "learning_rate": 0.0001562344490559871, + "loss": 2.7206, + "step": 6267 + }, + { + "epoch": 0.5058510209022677, + "grad_norm": 0.8187269568443298, + "learning_rate": 0.00015622139412119212, + "loss": 2.658, + "step": 6268 + }, + { + "epoch": 0.5059317246388508, + "grad_norm": 0.6714495420455933, + "learning_rate": 0.00015620833778521307, + "loss": 2.6182, + "step": 6269 + }, + { + "epoch": 0.5060124283754338, + "grad_norm": 0.7556246519088745, + "learning_rate": 0.00015619528004837528, + "loss": 2.6502, + "step": 6270 + }, + { + "epoch": 0.5060931321120168, + "grad_norm": 0.6989960074424744, + "learning_rate": 0.00015618222091100424, + "loss": 2.6031, + "step": 6271 + }, + { + "epoch": 0.5061738358485998, + "grad_norm": 0.7002139091491699, + "learning_rate": 0.0001561691603734254, + "loss": 2.6563, + "step": 6272 + }, + { + "epoch": 0.5062545395851827, + "grad_norm": 0.7064816355705261, + "learning_rate": 0.00015615609843596423, + "loss": 2.6482, + "step": 6273 + }, + { + "epoch": 0.5063352433217658, + "grad_norm": 0.6971433162689209, + "learning_rate": 0.00015614303509894634, + "loss": 2.6522, + "step": 6274 + }, + { + "epoch": 0.5064159470583488, + "grad_norm": 0.6982942223548889, + "learning_rate": 0.0001561299703626972, + "loss": 2.6477, + "step": 6275 + }, + { + "epoch": 0.5064966507949318, + "grad_norm": 0.7219811081886292, + "learning_rate": 0.0001561169042275425, + "loss": 2.6514, + "step": 6276 + }, + { + "epoch": 0.5065773545315148, + "grad_norm": 0.7391932010650635, + "learning_rate": 0.00015610383669380787, + "loss": 2.698, + "step": 6277 + }, + { + "epoch": 0.5066580582680978, + "grad_norm": 0.7852853536605835, + "learning_rate": 0.00015609076776181894, + "loss": 2.6281, + "step": 6278 + }, + { + "epoch": 0.5067387620046808, + "grad_norm": 0.7435647249221802, + "learning_rate": 0.00015607769743190147, + "loss": 2.6403, + "step": 6279 + }, + { + "epoch": 0.5068194657412638, + "grad_norm": 0.7300949096679688, + "learning_rate": 0.00015606462570438119, + "loss": 2.6125, + "step": 6280 + }, + { + "epoch": 0.5069001694778468, + "grad_norm": 0.7081549167633057, + "learning_rate": 0.00015605155257958388, + "loss": 2.6192, + "step": 6281 + }, + { + "epoch": 0.5069808732144299, + "grad_norm": 0.709020733833313, + "learning_rate": 0.00015603847805783537, + "loss": 2.6745, + "step": 6282 + }, + { + "epoch": 0.5070615769510128, + "grad_norm": 0.691684901714325, + "learning_rate": 0.0001560254021394615, + "loss": 2.5638, + "step": 6283 + }, + { + "epoch": 0.5071422806875958, + "grad_norm": 0.8338537812232971, + "learning_rate": 0.00015601232482478813, + "loss": 2.5835, + "step": 6284 + }, + { + "epoch": 0.5072229844241788, + "grad_norm": 0.659436047077179, + "learning_rate": 0.00015599924611414126, + "loss": 2.601, + "step": 6285 + }, + { + "epoch": 0.5073036881607619, + "grad_norm": 0.72590172290802, + "learning_rate": 0.00015598616600784676, + "loss": 2.602, + "step": 6286 + }, + { + "epoch": 0.5073843918973449, + "grad_norm": 0.6704443693161011, + "learning_rate": 0.00015597308450623066, + "loss": 2.5703, + "step": 6287 + }, + { + "epoch": 0.5074650956339278, + "grad_norm": 0.7298632264137268, + "learning_rate": 0.00015596000160961898, + "loss": 2.6859, + "step": 6288 + }, + { + "epoch": 0.5075457993705108, + "grad_norm": 0.6900345087051392, + "learning_rate": 0.00015594691731833776, + "loss": 2.6264, + "step": 6289 + }, + { + "epoch": 0.5076265031070939, + "grad_norm": 0.6705992221832275, + "learning_rate": 0.0001559338316327131, + "loss": 2.6135, + "step": 6290 + }, + { + "epoch": 0.5077072068436769, + "grad_norm": 0.691545307636261, + "learning_rate": 0.0001559207445530712, + "loss": 2.6538, + "step": 6291 + }, + { + "epoch": 0.5077879105802598, + "grad_norm": 0.6579985618591309, + "learning_rate": 0.00015590765607973811, + "loss": 2.6224, + "step": 6292 + }, + { + "epoch": 0.5078686143168428, + "grad_norm": 0.6938790678977966, + "learning_rate": 0.00015589456621304014, + "loss": 2.5932, + "step": 6293 + }, + { + "epoch": 0.5079493180534259, + "grad_norm": 0.7421671748161316, + "learning_rate": 0.00015588147495330346, + "loss": 2.7098, + "step": 6294 + }, + { + "epoch": 0.5080300217900089, + "grad_norm": 0.7076674699783325, + "learning_rate": 0.0001558683823008543, + "loss": 2.664, + "step": 6295 + }, + { + "epoch": 0.5081107255265919, + "grad_norm": 0.6829726696014404, + "learning_rate": 0.00015585528825601906, + "loss": 2.6029, + "step": 6296 + }, + { + "epoch": 0.5081914292631748, + "grad_norm": 0.6968080401420593, + "learning_rate": 0.000155842192819124, + "loss": 2.6256, + "step": 6297 + }, + { + "epoch": 0.5082721329997579, + "grad_norm": 0.7453410625457764, + "learning_rate": 0.00015582909599049554, + "loss": 2.6577, + "step": 6298 + }, + { + "epoch": 0.5083528367363409, + "grad_norm": 0.6603519916534424, + "learning_rate": 0.00015581599777046007, + "loss": 2.6066, + "step": 6299 + }, + { + "epoch": 0.5084335404729239, + "grad_norm": 0.7096173763275146, + "learning_rate": 0.00015580289815934401, + "loss": 2.5488, + "step": 6300 + }, + { + "epoch": 0.5085142442095069, + "grad_norm": 0.799298107624054, + "learning_rate": 0.0001557897971574739, + "loss": 2.6021, + "step": 6301 + }, + { + "epoch": 0.50859494794609, + "grad_norm": 0.6820314526557922, + "learning_rate": 0.00015577669476517618, + "loss": 2.6276, + "step": 6302 + }, + { + "epoch": 0.5086756516826729, + "grad_norm": 0.7119347453117371, + "learning_rate": 0.00015576359098277742, + "loss": 2.6627, + "step": 6303 + }, + { + "epoch": 0.5087563554192559, + "grad_norm": 0.7638720273971558, + "learning_rate": 0.00015575048581060422, + "loss": 2.6824, + "step": 6304 + }, + { + "epoch": 0.5088370591558389, + "grad_norm": 0.7360339164733887, + "learning_rate": 0.00015573737924898316, + "loss": 2.5805, + "step": 6305 + }, + { + "epoch": 0.508917762892422, + "grad_norm": 0.7220984697341919, + "learning_rate": 0.00015572427129824091, + "loss": 2.6374, + "step": 6306 + }, + { + "epoch": 0.5089984666290049, + "grad_norm": 0.670964777469635, + "learning_rate": 0.00015571116195870418, + "loss": 2.6371, + "step": 6307 + }, + { + "epoch": 0.5090791703655879, + "grad_norm": 0.7826075553894043, + "learning_rate": 0.00015569805123069968, + "loss": 2.7666, + "step": 6308 + }, + { + "epoch": 0.5091598741021709, + "grad_norm": 0.7691593766212463, + "learning_rate": 0.00015568493911455412, + "loss": 2.6242, + "step": 6309 + }, + { + "epoch": 0.509240577838754, + "grad_norm": 0.714500367641449, + "learning_rate": 0.0001556718256105943, + "loss": 2.6551, + "step": 6310 + }, + { + "epoch": 0.509321281575337, + "grad_norm": 0.7634009718894958, + "learning_rate": 0.00015565871071914706, + "loss": 2.7069, + "step": 6311 + }, + { + "epoch": 0.5094019853119199, + "grad_norm": 0.7134168148040771, + "learning_rate": 0.00015564559444053926, + "loss": 2.5816, + "step": 6312 + }, + { + "epoch": 0.5094826890485029, + "grad_norm": 0.6548121571540833, + "learning_rate": 0.0001556324767750978, + "loss": 2.6192, + "step": 6313 + }, + { + "epoch": 0.509563392785086, + "grad_norm": 0.7244428992271423, + "learning_rate": 0.0001556193577231496, + "loss": 2.6072, + "step": 6314 + }, + { + "epoch": 0.509644096521669, + "grad_norm": 0.6976662278175354, + "learning_rate": 0.0001556062372850216, + "loss": 2.6148, + "step": 6315 + }, + { + "epoch": 0.509724800258252, + "grad_norm": 0.772726833820343, + "learning_rate": 0.00015559311546104083, + "loss": 2.6458, + "step": 6316 + }, + { + "epoch": 0.5098055039948349, + "grad_norm": 0.7976188659667969, + "learning_rate": 0.00015557999225153428, + "loss": 2.6772, + "step": 6317 + }, + { + "epoch": 0.509886207731418, + "grad_norm": 0.6458039283752441, + "learning_rate": 0.00015556686765682903, + "loss": 2.6143, + "step": 6318 + }, + { + "epoch": 0.509966911468001, + "grad_norm": 0.7295405268669128, + "learning_rate": 0.0001555537416772522, + "loss": 2.5919, + "step": 6319 + }, + { + "epoch": 0.510047615204584, + "grad_norm": 0.657978355884552, + "learning_rate": 0.00015554061431313093, + "loss": 2.6245, + "step": 6320 + }, + { + "epoch": 0.510128318941167, + "grad_norm": 0.6726922392845154, + "learning_rate": 0.00015552748556479232, + "loss": 2.6207, + "step": 6321 + }, + { + "epoch": 0.51020902267775, + "grad_norm": 0.7954673767089844, + "learning_rate": 0.00015551435543256363, + "loss": 2.7177, + "step": 6322 + }, + { + "epoch": 0.510289726414333, + "grad_norm": 0.7186735272407532, + "learning_rate": 0.00015550122391677211, + "loss": 2.5953, + "step": 6323 + }, + { + "epoch": 0.510370430150916, + "grad_norm": 0.7835420966148376, + "learning_rate": 0.00015548809101774498, + "loss": 2.7039, + "step": 6324 + }, + { + "epoch": 0.510451133887499, + "grad_norm": 0.6966592073440552, + "learning_rate": 0.00015547495673580962, + "loss": 2.6287, + "step": 6325 + }, + { + "epoch": 0.5105318376240819, + "grad_norm": 0.6676180362701416, + "learning_rate": 0.00015546182107129328, + "loss": 2.638, + "step": 6326 + }, + { + "epoch": 0.510612541360665, + "grad_norm": 0.7285657525062561, + "learning_rate": 0.0001554486840245234, + "loss": 2.6661, + "step": 6327 + }, + { + "epoch": 0.510693245097248, + "grad_norm": 0.6453657150268555, + "learning_rate": 0.00015543554559582735, + "loss": 2.715, + "step": 6328 + }, + { + "epoch": 0.510773948833831, + "grad_norm": 0.7364684343338013, + "learning_rate": 0.0001554224057855326, + "loss": 2.6475, + "step": 6329 + }, + { + "epoch": 0.510854652570414, + "grad_norm": 0.670894980430603, + "learning_rate": 0.00015540926459396665, + "loss": 2.6091, + "step": 6330 + }, + { + "epoch": 0.510935356306997, + "grad_norm": 0.6750168204307556, + "learning_rate": 0.00015539612202145696, + "loss": 2.6473, + "step": 6331 + }, + { + "epoch": 0.51101606004358, + "grad_norm": 0.6552454233169556, + "learning_rate": 0.0001553829780683311, + "loss": 2.6158, + "step": 6332 + }, + { + "epoch": 0.511096763780163, + "grad_norm": 0.7387828230857849, + "learning_rate": 0.00015536983273491668, + "loss": 2.6219, + "step": 6333 + }, + { + "epoch": 0.511177467516746, + "grad_norm": 0.6993975639343262, + "learning_rate": 0.00015535668602154127, + "loss": 2.6446, + "step": 6334 + }, + { + "epoch": 0.5112581712533291, + "grad_norm": 0.6491217613220215, + "learning_rate": 0.00015534353792853254, + "loss": 2.6404, + "step": 6335 + }, + { + "epoch": 0.511338874989912, + "grad_norm": 0.7165521383285522, + "learning_rate": 0.0001553303884562182, + "loss": 2.6339, + "step": 6336 + }, + { + "epoch": 0.511419578726495, + "grad_norm": 0.7363756895065308, + "learning_rate": 0.0001553172376049259, + "loss": 2.6411, + "step": 6337 + }, + { + "epoch": 0.511500282463078, + "grad_norm": 0.7148438096046448, + "learning_rate": 0.00015530408537498347, + "loss": 2.5617, + "step": 6338 + }, + { + "epoch": 0.5115809861996611, + "grad_norm": 0.7140451669692993, + "learning_rate": 0.00015529093176671864, + "loss": 2.5898, + "step": 6339 + }, + { + "epoch": 0.5116616899362441, + "grad_norm": 0.7799252271652222, + "learning_rate": 0.00015527777678045926, + "loss": 2.6176, + "step": 6340 + }, + { + "epoch": 0.511742393672827, + "grad_norm": 0.7292928099632263, + "learning_rate": 0.00015526462041653323, + "loss": 2.6722, + "step": 6341 + }, + { + "epoch": 0.51182309740941, + "grad_norm": 0.6986904740333557, + "learning_rate": 0.00015525146267526837, + "loss": 2.6154, + "step": 6342 + }, + { + "epoch": 0.5119038011459931, + "grad_norm": 0.7239612936973572, + "learning_rate": 0.00015523830355699262, + "loss": 2.5664, + "step": 6343 + }, + { + "epoch": 0.5119845048825761, + "grad_norm": 0.6805121898651123, + "learning_rate": 0.00015522514306203395, + "loss": 2.6204, + "step": 6344 + }, + { + "epoch": 0.512065208619159, + "grad_norm": 0.7036689519882202, + "learning_rate": 0.00015521198119072035, + "loss": 2.6211, + "step": 6345 + }, + { + "epoch": 0.512145912355742, + "grad_norm": 0.7155849933624268, + "learning_rate": 0.00015519881794337988, + "loss": 2.6074, + "step": 6346 + }, + { + "epoch": 0.5122266160923251, + "grad_norm": 0.7183938026428223, + "learning_rate": 0.00015518565332034057, + "loss": 2.6148, + "step": 6347 + }, + { + "epoch": 0.5123073198289081, + "grad_norm": 0.7053570747375488, + "learning_rate": 0.0001551724873219305, + "loss": 2.6476, + "step": 6348 + }, + { + "epoch": 0.5123880235654911, + "grad_norm": 0.714846670627594, + "learning_rate": 0.00015515931994847785, + "loss": 2.5728, + "step": 6349 + }, + { + "epoch": 0.512468727302074, + "grad_norm": 0.7504729628562927, + "learning_rate": 0.00015514615120031076, + "loss": 2.6415, + "step": 6350 + }, + { + "epoch": 0.5125494310386571, + "grad_norm": 0.6940335035324097, + "learning_rate": 0.0001551329810777574, + "loss": 2.6115, + "step": 6351 + }, + { + "epoch": 0.5126301347752401, + "grad_norm": 0.7166119813919067, + "learning_rate": 0.00015511980958114608, + "loss": 2.6284, + "step": 6352 + }, + { + "epoch": 0.5127108385118231, + "grad_norm": 0.7787839770317078, + "learning_rate": 0.00015510663671080497, + "loss": 2.6385, + "step": 6353 + }, + { + "epoch": 0.5127915422484061, + "grad_norm": 0.7298412322998047, + "learning_rate": 0.00015509346246706245, + "loss": 2.629, + "step": 6354 + }, + { + "epoch": 0.5128722459849892, + "grad_norm": 0.7918897271156311, + "learning_rate": 0.00015508028685024683, + "loss": 2.6777, + "step": 6355 + }, + { + "epoch": 0.5129529497215721, + "grad_norm": 0.6867843866348267, + "learning_rate": 0.00015506710986068646, + "loss": 2.6101, + "step": 6356 + }, + { + "epoch": 0.5130336534581551, + "grad_norm": 0.716468870639801, + "learning_rate": 0.00015505393149870978, + "loss": 2.6558, + "step": 6357 + }, + { + "epoch": 0.5131143571947381, + "grad_norm": 0.6704092621803284, + "learning_rate": 0.0001550407517646452, + "loss": 2.6128, + "step": 6358 + }, + { + "epoch": 0.5131950609313212, + "grad_norm": 0.820716381072998, + "learning_rate": 0.00015502757065882124, + "loss": 2.6052, + "step": 6359 + }, + { + "epoch": 0.5132757646679041, + "grad_norm": 0.7328094840049744, + "learning_rate": 0.00015501438818156635, + "loss": 2.6399, + "step": 6360 + }, + { + "epoch": 0.5133564684044871, + "grad_norm": 0.6602808833122253, + "learning_rate": 0.00015500120433320911, + "loss": 2.5509, + "step": 6361 + }, + { + "epoch": 0.5134371721410701, + "grad_norm": 0.7013166546821594, + "learning_rate": 0.00015498801911407805, + "loss": 2.6439, + "step": 6362 + }, + { + "epoch": 0.5135178758776532, + "grad_norm": 0.7415499091148376, + "learning_rate": 0.00015497483252450186, + "loss": 2.575, + "step": 6363 + }, + { + "epoch": 0.5135985796142362, + "grad_norm": 0.7262336015701294, + "learning_rate": 0.00015496164456480912, + "loss": 2.6815, + "step": 6364 + }, + { + "epoch": 0.5136792833508191, + "grad_norm": 0.7353699803352356, + "learning_rate": 0.0001549484552353285, + "loss": 2.6172, + "step": 6365 + }, + { + "epoch": 0.5137599870874021, + "grad_norm": 0.7005086541175842, + "learning_rate": 0.00015493526453638879, + "loss": 2.5945, + "step": 6366 + }, + { + "epoch": 0.5138406908239852, + "grad_norm": 0.7469770908355713, + "learning_rate": 0.00015492207246831864, + "loss": 2.6797, + "step": 6367 + }, + { + "epoch": 0.5139213945605682, + "grad_norm": 0.6768934726715088, + "learning_rate": 0.00015490887903144693, + "loss": 2.6369, + "step": 6368 + }, + { + "epoch": 0.5140020982971512, + "grad_norm": 0.7625820636749268, + "learning_rate": 0.00015489568422610237, + "loss": 2.6182, + "step": 6369 + }, + { + "epoch": 0.5140828020337341, + "grad_norm": 0.749351978302002, + "learning_rate": 0.00015488248805261388, + "loss": 2.6066, + "step": 6370 + }, + { + "epoch": 0.5141635057703172, + "grad_norm": 0.8369480967521667, + "learning_rate": 0.00015486929051131032, + "loss": 2.7627, + "step": 6371 + }, + { + "epoch": 0.5142442095069002, + "grad_norm": 0.6482037305831909, + "learning_rate": 0.0001548560916025206, + "loss": 2.609, + "step": 6372 + }, + { + "epoch": 0.5143249132434832, + "grad_norm": 0.6801851391792297, + "learning_rate": 0.0001548428913265737, + "loss": 2.5878, + "step": 6373 + }, + { + "epoch": 0.5144056169800661, + "grad_norm": 0.744926929473877, + "learning_rate": 0.0001548296896837986, + "loss": 2.6569, + "step": 6374 + }, + { + "epoch": 0.5144863207166491, + "grad_norm": 0.6862614750862122, + "learning_rate": 0.00015481648667452425, + "loss": 2.5626, + "step": 6375 + }, + { + "epoch": 0.5145670244532322, + "grad_norm": 0.7186449766159058, + "learning_rate": 0.0001548032822990798, + "loss": 2.6783, + "step": 6376 + }, + { + "epoch": 0.5146477281898152, + "grad_norm": 0.699715256690979, + "learning_rate": 0.0001547900765577943, + "loss": 2.6709, + "step": 6377 + }, + { + "epoch": 0.5147284319263982, + "grad_norm": 0.7272205352783203, + "learning_rate": 0.00015477686945099687, + "loss": 2.6076, + "step": 6378 + }, + { + "epoch": 0.5148091356629811, + "grad_norm": 0.7667459845542908, + "learning_rate": 0.00015476366097901667, + "loss": 2.6541, + "step": 6379 + }, + { + "epoch": 0.5148898393995642, + "grad_norm": 0.6538121700286865, + "learning_rate": 0.00015475045114218285, + "loss": 2.5806, + "step": 6380 + }, + { + "epoch": 0.5149705431361472, + "grad_norm": 0.7388994097709656, + "learning_rate": 0.00015473723994082473, + "loss": 2.6293, + "step": 6381 + }, + { + "epoch": 0.5150512468727302, + "grad_norm": 0.7044215202331543, + "learning_rate": 0.00015472402737527142, + "loss": 2.5755, + "step": 6382 + }, + { + "epoch": 0.5151319506093132, + "grad_norm": 0.6807994246482849, + "learning_rate": 0.00015471081344585236, + "loss": 2.6493, + "step": 6383 + }, + { + "epoch": 0.5152126543458962, + "grad_norm": 0.676278293132782, + "learning_rate": 0.00015469759815289681, + "loss": 2.6319, + "step": 6384 + }, + { + "epoch": 0.5152933580824792, + "grad_norm": 0.7515453696250916, + "learning_rate": 0.00015468438149673412, + "loss": 2.6415, + "step": 6385 + }, + { + "epoch": 0.5153740618190622, + "grad_norm": 0.8694239854812622, + "learning_rate": 0.0001546711634776937, + "loss": 2.5818, + "step": 6386 + }, + { + "epoch": 0.5154547655556452, + "grad_norm": 0.717090368270874, + "learning_rate": 0.000154657944096105, + "loss": 2.7132, + "step": 6387 + }, + { + "epoch": 0.5155354692922283, + "grad_norm": 0.7098804116249084, + "learning_rate": 0.00015464472335229742, + "loss": 2.564, + "step": 6388 + }, + { + "epoch": 0.5156161730288112, + "grad_norm": 0.6879690289497375, + "learning_rate": 0.0001546315012466005, + "loss": 2.6094, + "step": 6389 + }, + { + "epoch": 0.5156968767653942, + "grad_norm": 0.7110763788223267, + "learning_rate": 0.00015461827777934377, + "loss": 2.5982, + "step": 6390 + }, + { + "epoch": 0.5157775805019772, + "grad_norm": 0.7168039679527283, + "learning_rate": 0.00015460505295085677, + "loss": 2.5451, + "step": 6391 + }, + { + "epoch": 0.5158582842385603, + "grad_norm": 0.7059877514839172, + "learning_rate": 0.00015459182676146914, + "loss": 2.6655, + "step": 6392 + }, + { + "epoch": 0.5159389879751433, + "grad_norm": 0.7278143763542175, + "learning_rate": 0.00015457859921151043, + "loss": 2.6587, + "step": 6393 + }, + { + "epoch": 0.5160196917117262, + "grad_norm": 0.7301023602485657, + "learning_rate": 0.0001545653703013104, + "loss": 2.7672, + "step": 6394 + }, + { + "epoch": 0.5161003954483092, + "grad_norm": 0.6933302283287048, + "learning_rate": 0.0001545521400311987, + "loss": 2.5924, + "step": 6395 + }, + { + "epoch": 0.5161810991848923, + "grad_norm": 0.7074775099754333, + "learning_rate": 0.00015453890840150508, + "loss": 2.6663, + "step": 6396 + }, + { + "epoch": 0.5162618029214753, + "grad_norm": 0.7069801092147827, + "learning_rate": 0.00015452567541255924, + "loss": 2.6791, + "step": 6397 + }, + { + "epoch": 0.5163425066580583, + "grad_norm": 0.6586462259292603, + "learning_rate": 0.00015451244106469108, + "loss": 2.6368, + "step": 6398 + }, + { + "epoch": 0.5164232103946412, + "grad_norm": 0.6862531900405884, + "learning_rate": 0.00015449920535823042, + "loss": 2.7099, + "step": 6399 + }, + { + "epoch": 0.5165039141312243, + "grad_norm": 0.7177795767784119, + "learning_rate": 0.00015448596829350706, + "loss": 2.5921, + "step": 6400 + }, + { + "epoch": 0.5165846178678073, + "grad_norm": 0.6936569213867188, + "learning_rate": 0.00015447272987085094, + "loss": 2.5739, + "step": 6401 + }, + { + "epoch": 0.5166653216043903, + "grad_norm": 0.7394363284111023, + "learning_rate": 0.00015445949009059202, + "loss": 2.5941, + "step": 6402 + }, + { + "epoch": 0.5167460253409732, + "grad_norm": 0.6713366508483887, + "learning_rate": 0.00015444624895306027, + "loss": 2.574, + "step": 6403 + }, + { + "epoch": 0.5168267290775563, + "grad_norm": 0.679128885269165, + "learning_rate": 0.0001544330064585856, + "loss": 2.6422, + "step": 6404 + }, + { + "epoch": 0.5169074328141393, + "grad_norm": 0.6803367137908936, + "learning_rate": 0.0001544197626074982, + "loss": 2.6503, + "step": 6405 + }, + { + "epoch": 0.5169881365507223, + "grad_norm": 0.8009794354438782, + "learning_rate": 0.000154406517400128, + "loss": 2.6434, + "step": 6406 + }, + { + "epoch": 0.5170688402873053, + "grad_norm": 0.7292529344558716, + "learning_rate": 0.00015439327083680517, + "loss": 2.6333, + "step": 6407 + }, + { + "epoch": 0.5171495440238884, + "grad_norm": 0.67046719789505, + "learning_rate": 0.00015438002291785988, + "loss": 2.5791, + "step": 6408 + }, + { + "epoch": 0.5172302477604713, + "grad_norm": 0.755501925945282, + "learning_rate": 0.00015436677364362225, + "loss": 2.5558, + "step": 6409 + }, + { + "epoch": 0.5173109514970543, + "grad_norm": 0.6957115530967712, + "learning_rate": 0.0001543535230144225, + "loss": 2.5839, + "step": 6410 + }, + { + "epoch": 0.5173916552336373, + "grad_norm": 0.6629074215888977, + "learning_rate": 0.0001543402710305909, + "loss": 2.6529, + "step": 6411 + }, + { + "epoch": 0.5174723589702204, + "grad_norm": 0.6647019386291504, + "learning_rate": 0.00015432701769245766, + "loss": 2.589, + "step": 6412 + }, + { + "epoch": 0.5175530627068033, + "grad_norm": 0.6472512483596802, + "learning_rate": 0.00015431376300035316, + "loss": 2.6184, + "step": 6413 + }, + { + "epoch": 0.5176337664433863, + "grad_norm": 0.6900136470794678, + "learning_rate": 0.0001543005069546077, + "loss": 2.7029, + "step": 6414 + }, + { + "epoch": 0.5177144701799693, + "grad_norm": 0.7702177166938782, + "learning_rate": 0.00015428724955555165, + "loss": 2.6189, + "step": 6415 + }, + { + "epoch": 0.5177951739165524, + "grad_norm": 0.641655445098877, + "learning_rate": 0.00015427399080351545, + "loss": 2.6486, + "step": 6416 + }, + { + "epoch": 0.5178758776531354, + "grad_norm": 0.6826485991477966, + "learning_rate": 0.00015426073069882952, + "loss": 2.6105, + "step": 6417 + }, + { + "epoch": 0.5179565813897183, + "grad_norm": 0.749812662601471, + "learning_rate": 0.00015424746924182434, + "loss": 2.5644, + "step": 6418 + }, + { + "epoch": 0.5180372851263013, + "grad_norm": 0.6737890243530273, + "learning_rate": 0.0001542342064328304, + "loss": 2.686, + "step": 6419 + }, + { + "epoch": 0.5181179888628844, + "grad_norm": 0.7131822109222412, + "learning_rate": 0.0001542209422721783, + "loss": 2.697, + "step": 6420 + }, + { + "epoch": 0.5181986925994674, + "grad_norm": 0.7543746829032898, + "learning_rate": 0.0001542076767601986, + "loss": 2.6349, + "step": 6421 + }, + { + "epoch": 0.5182793963360504, + "grad_norm": 0.7589309215545654, + "learning_rate": 0.00015419440989722184, + "loss": 2.63, + "step": 6422 + }, + { + "epoch": 0.5183601000726333, + "grad_norm": 0.7036365866661072, + "learning_rate": 0.00015418114168357872, + "loss": 2.605, + "step": 6423 + }, + { + "epoch": 0.5184408038092164, + "grad_norm": 0.733161985874176, + "learning_rate": 0.00015416787211959998, + "loss": 2.6708, + "step": 6424 + }, + { + "epoch": 0.5185215075457994, + "grad_norm": 0.6928101181983948, + "learning_rate": 0.00015415460120561623, + "loss": 2.6549, + "step": 6425 + }, + { + "epoch": 0.5186022112823824, + "grad_norm": 0.6557250022888184, + "learning_rate": 0.00015414132894195825, + "loss": 2.6185, + "step": 6426 + }, + { + "epoch": 0.5186829150189654, + "grad_norm": 0.7236297726631165, + "learning_rate": 0.00015412805532895684, + "loss": 2.6185, + "step": 6427 + }, + { + "epoch": 0.5187636187555483, + "grad_norm": 0.7194060683250427, + "learning_rate": 0.0001541147803669428, + "loss": 2.6123, + "step": 6428 + }, + { + "epoch": 0.5188443224921314, + "grad_norm": 0.7077342867851257, + "learning_rate": 0.00015410150405624696, + "loss": 2.6628, + "step": 6429 + }, + { + "epoch": 0.5189250262287144, + "grad_norm": 0.7036150693893433, + "learning_rate": 0.00015408822639720023, + "loss": 2.5966, + "step": 6430 + }, + { + "epoch": 0.5190057299652974, + "grad_norm": 0.7047349810600281, + "learning_rate": 0.00015407494739013352, + "loss": 2.6626, + "step": 6431 + }, + { + "epoch": 0.5190864337018803, + "grad_norm": 0.7537584900856018, + "learning_rate": 0.00015406166703537777, + "loss": 2.6452, + "step": 6432 + }, + { + "epoch": 0.5191671374384634, + "grad_norm": 0.7944707870483398, + "learning_rate": 0.00015404838533326394, + "loss": 2.6834, + "step": 6433 + }, + { + "epoch": 0.5192478411750464, + "grad_norm": 0.8602458238601685, + "learning_rate": 0.00015403510228412305, + "loss": 2.6238, + "step": 6434 + }, + { + "epoch": 0.5193285449116294, + "grad_norm": 0.7181896567344666, + "learning_rate": 0.0001540218178882862, + "loss": 2.652, + "step": 6435 + }, + { + "epoch": 0.5194092486482124, + "grad_norm": 0.7470960021018982, + "learning_rate": 0.0001540085321460844, + "loss": 2.6703, + "step": 6436 + }, + { + "epoch": 0.5194899523847955, + "grad_norm": 0.8249944448471069, + "learning_rate": 0.00015399524505784883, + "loss": 2.5945, + "step": 6437 + }, + { + "epoch": 0.5195706561213784, + "grad_norm": 0.7332444190979004, + "learning_rate": 0.00015398195662391057, + "loss": 2.6472, + "step": 6438 + }, + { + "epoch": 0.5196513598579614, + "grad_norm": 0.7727739810943604, + "learning_rate": 0.0001539686668446009, + "loss": 2.6276, + "step": 6439 + }, + { + "epoch": 0.5197320635945444, + "grad_norm": 0.7161617279052734, + "learning_rate": 0.00015395537572025094, + "loss": 2.624, + "step": 6440 + }, + { + "epoch": 0.5198127673311275, + "grad_norm": 0.7657529711723328, + "learning_rate": 0.00015394208325119198, + "loss": 2.6604, + "step": 6441 + }, + { + "epoch": 0.5198934710677104, + "grad_norm": 0.732904314994812, + "learning_rate": 0.00015392878943775527, + "loss": 2.6334, + "step": 6442 + }, + { + "epoch": 0.5199741748042934, + "grad_norm": 0.7058991193771362, + "learning_rate": 0.0001539154942802722, + "loss": 2.5936, + "step": 6443 + }, + { + "epoch": 0.5200548785408764, + "grad_norm": 0.7328821420669556, + "learning_rate": 0.00015390219777907405, + "loss": 2.5969, + "step": 6444 + }, + { + "epoch": 0.5201355822774595, + "grad_norm": 0.7899969220161438, + "learning_rate": 0.00015388889993449224, + "loss": 2.5856, + "step": 6445 + }, + { + "epoch": 0.5202162860140425, + "grad_norm": 0.6963860392570496, + "learning_rate": 0.00015387560074685817, + "loss": 2.6139, + "step": 6446 + }, + { + "epoch": 0.5202969897506254, + "grad_norm": 0.812053918838501, + "learning_rate": 0.00015386230021650327, + "loss": 2.716, + "step": 6447 + }, + { + "epoch": 0.5203776934872084, + "grad_norm": 0.766781210899353, + "learning_rate": 0.0001538489983437591, + "loss": 2.6509, + "step": 6448 + }, + { + "epoch": 0.5204583972237915, + "grad_norm": 0.6877299547195435, + "learning_rate": 0.00015383569512895712, + "loss": 2.6076, + "step": 6449 + }, + { + "epoch": 0.5205391009603745, + "grad_norm": 0.7009176015853882, + "learning_rate": 0.00015382239057242888, + "loss": 2.608, + "step": 6450 + }, + { + "epoch": 0.5206198046969575, + "grad_norm": 0.7187578678131104, + "learning_rate": 0.000153809084674506, + "loss": 2.5946, + "step": 6451 + }, + { + "epoch": 0.5207005084335404, + "grad_norm": 0.7242687344551086, + "learning_rate": 0.00015379577743552001, + "loss": 2.6752, + "step": 6452 + }, + { + "epoch": 0.5207812121701235, + "grad_norm": 0.7668174505233765, + "learning_rate": 0.00015378246885580266, + "loss": 2.6694, + "step": 6453 + }, + { + "epoch": 0.5208619159067065, + "grad_norm": 0.7676039338111877, + "learning_rate": 0.00015376915893568557, + "loss": 2.6379, + "step": 6454 + }, + { + "epoch": 0.5209426196432895, + "grad_norm": 0.7394412159919739, + "learning_rate": 0.00015375584767550053, + "loss": 2.6046, + "step": 6455 + }, + { + "epoch": 0.5210233233798724, + "grad_norm": 0.7246636748313904, + "learning_rate": 0.00015374253507557923, + "loss": 2.592, + "step": 6456 + }, + { + "epoch": 0.5211040271164555, + "grad_norm": 0.7121255993843079, + "learning_rate": 0.00015372922113625345, + "loss": 2.634, + "step": 6457 + }, + { + "epoch": 0.5211847308530385, + "grad_norm": 0.7378345131874084, + "learning_rate": 0.00015371590585785505, + "loss": 2.5753, + "step": 6458 + }, + { + "epoch": 0.5212654345896215, + "grad_norm": 0.6682030558586121, + "learning_rate": 0.00015370258924071587, + "loss": 2.6305, + "step": 6459 + }, + { + "epoch": 0.5213461383262045, + "grad_norm": 0.7164177894592285, + "learning_rate": 0.00015368927128516776, + "loss": 2.7188, + "step": 6460 + }, + { + "epoch": 0.5214268420627876, + "grad_norm": 0.7341115474700928, + "learning_rate": 0.00015367595199154273, + "loss": 2.6204, + "step": 6461 + }, + { + "epoch": 0.5215075457993705, + "grad_norm": 0.6781840920448303, + "learning_rate": 0.00015366263136017258, + "loss": 2.6104, + "step": 6462 + }, + { + "epoch": 0.5215882495359535, + "grad_norm": 0.7029077410697937, + "learning_rate": 0.0001536493093913894, + "loss": 2.6055, + "step": 6463 + }, + { + "epoch": 0.5216689532725365, + "grad_norm": 0.6958553194999695, + "learning_rate": 0.00015363598608552522, + "loss": 2.5991, + "step": 6464 + }, + { + "epoch": 0.5217496570091196, + "grad_norm": 0.6919750571250916, + "learning_rate": 0.00015362266144291207, + "loss": 2.6022, + "step": 6465 + }, + { + "epoch": 0.5218303607457025, + "grad_norm": 0.6980622410774231, + "learning_rate": 0.000153609335463882, + "loss": 2.6289, + "step": 6466 + }, + { + "epoch": 0.5219110644822855, + "grad_norm": 0.7468248009681702, + "learning_rate": 0.00015359600814876715, + "loss": 2.6327, + "step": 6467 + }, + { + "epoch": 0.5219917682188685, + "grad_norm": 0.7183729410171509, + "learning_rate": 0.00015358267949789966, + "loss": 2.6389, + "step": 6468 + }, + { + "epoch": 0.5220724719554516, + "grad_norm": 0.6558868885040283, + "learning_rate": 0.00015356934951161178, + "loss": 2.6261, + "step": 6469 + }, + { + "epoch": 0.5221531756920346, + "grad_norm": 0.8000216484069824, + "learning_rate": 0.00015355601819023562, + "loss": 2.6908, + "step": 6470 + }, + { + "epoch": 0.5222338794286175, + "grad_norm": 0.775056004524231, + "learning_rate": 0.00015354268553410355, + "loss": 2.6763, + "step": 6471 + }, + { + "epoch": 0.5223145831652005, + "grad_norm": 0.7345123291015625, + "learning_rate": 0.00015352935154354776, + "loss": 2.582, + "step": 6472 + }, + { + "epoch": 0.5223952869017836, + "grad_norm": 0.731311023235321, + "learning_rate": 0.0001535160162189006, + "loss": 2.6519, + "step": 6473 + }, + { + "epoch": 0.5224759906383666, + "grad_norm": 0.6481007933616638, + "learning_rate": 0.00015350267956049443, + "loss": 2.5695, + "step": 6474 + }, + { + "epoch": 0.5225566943749496, + "grad_norm": 0.7698814868927002, + "learning_rate": 0.00015348934156866163, + "loss": 2.5732, + "step": 6475 + }, + { + "epoch": 0.5226373981115325, + "grad_norm": 0.7404680848121643, + "learning_rate": 0.00015347600224373462, + "loss": 2.5826, + "step": 6476 + }, + { + "epoch": 0.5227181018481155, + "grad_norm": 0.6965613961219788, + "learning_rate": 0.00015346266158604584, + "loss": 2.6069, + "step": 6477 + }, + { + "epoch": 0.5227988055846986, + "grad_norm": 0.6611152291297913, + "learning_rate": 0.00015344931959592777, + "loss": 2.4937, + "step": 6478 + }, + { + "epoch": 0.5228795093212816, + "grad_norm": 0.7418150305747986, + "learning_rate": 0.00015343597627371296, + "loss": 2.5747, + "step": 6479 + }, + { + "epoch": 0.5229602130578646, + "grad_norm": 0.6847610473632812, + "learning_rate": 0.00015342263161973393, + "loss": 2.5906, + "step": 6480 + }, + { + "epoch": 0.5230409167944475, + "grad_norm": 0.7054881453514099, + "learning_rate": 0.00015340928563432326, + "loss": 2.5914, + "step": 6481 + }, + { + "epoch": 0.5231216205310306, + "grad_norm": 0.6918888092041016, + "learning_rate": 0.0001533959383178136, + "loss": 2.6412, + "step": 6482 + }, + { + "epoch": 0.5232023242676136, + "grad_norm": 0.7232856154441833, + "learning_rate": 0.00015338258967053755, + "loss": 2.6364, + "step": 6483 + }, + { + "epoch": 0.5232830280041966, + "grad_norm": 0.7345031499862671, + "learning_rate": 0.00015336923969282786, + "loss": 2.6649, + "step": 6484 + }, + { + "epoch": 0.5233637317407795, + "grad_norm": 0.7644383907318115, + "learning_rate": 0.0001533558883850172, + "loss": 2.6949, + "step": 6485 + }, + { + "epoch": 0.5234444354773626, + "grad_norm": 0.6532372832298279, + "learning_rate": 0.0001533425357474383, + "loss": 2.5915, + "step": 6486 + }, + { + "epoch": 0.5235251392139456, + "grad_norm": 0.7089118361473083, + "learning_rate": 0.000153329181780424, + "loss": 2.6446, + "step": 6487 + }, + { + "epoch": 0.5236058429505286, + "grad_norm": 0.6966068148612976, + "learning_rate": 0.00015331582648430705, + "loss": 2.6764, + "step": 6488 + }, + { + "epoch": 0.5236865466871116, + "grad_norm": 0.7130835056304932, + "learning_rate": 0.00015330246985942035, + "loss": 2.6279, + "step": 6489 + }, + { + "epoch": 0.5237672504236947, + "grad_norm": 0.729727029800415, + "learning_rate": 0.00015328911190609678, + "loss": 2.612, + "step": 6490 + }, + { + "epoch": 0.5238479541602776, + "grad_norm": 0.6804213523864746, + "learning_rate": 0.0001532757526246692, + "loss": 2.6113, + "step": 6491 + }, + { + "epoch": 0.5239286578968606, + "grad_norm": 0.7324437499046326, + "learning_rate": 0.0001532623920154707, + "loss": 2.6054, + "step": 6492 + }, + { + "epoch": 0.5240093616334436, + "grad_norm": 0.6166699528694153, + "learning_rate": 0.00015324903007883406, + "loss": 2.5822, + "step": 6493 + }, + { + "epoch": 0.5240900653700267, + "grad_norm": 0.7339944839477539, + "learning_rate": 0.00015323566681509242, + "loss": 2.6204, + "step": 6494 + }, + { + "epoch": 0.5241707691066096, + "grad_norm": 0.7267727255821228, + "learning_rate": 0.00015322230222457886, + "loss": 2.6094, + "step": 6495 + }, + { + "epoch": 0.5242514728431926, + "grad_norm": 0.6417120695114136, + "learning_rate": 0.00015320893630762635, + "loss": 2.6044, + "step": 6496 + }, + { + "epoch": 0.5243321765797756, + "grad_norm": 0.7092922329902649, + "learning_rate": 0.00015319556906456808, + "loss": 2.6428, + "step": 6497 + }, + { + "epoch": 0.5244128803163587, + "grad_norm": 0.7482922673225403, + "learning_rate": 0.00015318220049573714, + "loss": 2.6025, + "step": 6498 + }, + { + "epoch": 0.5244935840529417, + "grad_norm": 0.691925048828125, + "learning_rate": 0.00015316883060146675, + "loss": 2.6308, + "step": 6499 + }, + { + "epoch": 0.5245742877895246, + "grad_norm": 0.7084488272666931, + "learning_rate": 0.00015315545938209015, + "loss": 2.6535, + "step": 6500 + }, + { + "epoch": 0.5246549915261076, + "grad_norm": 0.7182802557945251, + "learning_rate": 0.00015314208683794056, + "loss": 2.6045, + "step": 6501 + }, + { + "epoch": 0.5247356952626907, + "grad_norm": 0.7043096423149109, + "learning_rate": 0.00015312871296935122, + "loss": 2.6465, + "step": 6502 + }, + { + "epoch": 0.5248163989992737, + "grad_norm": 0.7679466009140015, + "learning_rate": 0.00015311533777665547, + "loss": 2.6624, + "step": 6503 + }, + { + "epoch": 0.5248971027358567, + "grad_norm": 0.6825870275497437, + "learning_rate": 0.00015310196126018668, + "loss": 2.5548, + "step": 6504 + }, + { + "epoch": 0.5249778064724396, + "grad_norm": 0.7364058494567871, + "learning_rate": 0.00015308858342027816, + "loss": 2.6495, + "step": 6505 + }, + { + "epoch": 0.5250585102090227, + "grad_norm": 0.7333239316940308, + "learning_rate": 0.00015307520425726341, + "loss": 2.5835, + "step": 6506 + }, + { + "epoch": 0.5251392139456057, + "grad_norm": 0.7479620575904846, + "learning_rate": 0.00015306182377147583, + "loss": 2.6065, + "step": 6507 + }, + { + "epoch": 0.5252199176821887, + "grad_norm": 0.7347591519355774, + "learning_rate": 0.00015304844196324888, + "loss": 2.6624, + "step": 6508 + }, + { + "epoch": 0.5253006214187717, + "grad_norm": 0.6879193782806396, + "learning_rate": 0.0001530350588329161, + "loss": 2.6598, + "step": 6509 + }, + { + "epoch": 0.5253813251553547, + "grad_norm": 0.7841597199440002, + "learning_rate": 0.000153021674380811, + "loss": 2.53, + "step": 6510 + }, + { + "epoch": 0.5254620288919377, + "grad_norm": 0.7916845679283142, + "learning_rate": 0.0001530082886072672, + "loss": 2.6995, + "step": 6511 + }, + { + "epoch": 0.5255427326285207, + "grad_norm": 0.7066318988800049, + "learning_rate": 0.0001529949015126183, + "loss": 2.58, + "step": 6512 + }, + { + "epoch": 0.5256234363651037, + "grad_norm": 0.6871134638786316, + "learning_rate": 0.00015298151309719787, + "loss": 2.6095, + "step": 6513 + }, + { + "epoch": 0.5257041401016868, + "grad_norm": 0.7479702830314636, + "learning_rate": 0.00015296812336133963, + "loss": 2.608, + "step": 6514 + }, + { + "epoch": 0.5257848438382697, + "grad_norm": 0.6772119402885437, + "learning_rate": 0.00015295473230537735, + "loss": 2.5679, + "step": 6515 + }, + { + "epoch": 0.5258655475748527, + "grad_norm": 0.7365416884422302, + "learning_rate": 0.0001529413399296447, + "loss": 2.6722, + "step": 6516 + }, + { + "epoch": 0.5259462513114357, + "grad_norm": 0.7538040280342102, + "learning_rate": 0.00015292794623447545, + "loss": 2.5562, + "step": 6517 + }, + { + "epoch": 0.5260269550480188, + "grad_norm": 0.7471820712089539, + "learning_rate": 0.00015291455122020344, + "loss": 2.7079, + "step": 6518 + }, + { + "epoch": 0.5261076587846018, + "grad_norm": 0.7605932354927063, + "learning_rate": 0.00015290115488716247, + "loss": 2.6696, + "step": 6519 + }, + { + "epoch": 0.5261883625211847, + "grad_norm": 0.7081854939460754, + "learning_rate": 0.00015288775723568647, + "loss": 2.6502, + "step": 6520 + }, + { + "epoch": 0.5262690662577677, + "grad_norm": 0.7236372828483582, + "learning_rate": 0.0001528743582661093, + "loss": 2.662, + "step": 6521 + }, + { + "epoch": 0.5263497699943508, + "grad_norm": 0.6710047721862793, + "learning_rate": 0.0001528609579787649, + "loss": 2.5947, + "step": 6522 + }, + { + "epoch": 0.5264304737309338, + "grad_norm": 0.709381103515625, + "learning_rate": 0.00015284755637398726, + "loss": 2.5922, + "step": 6523 + }, + { + "epoch": 0.5265111774675167, + "grad_norm": 0.7029775381088257, + "learning_rate": 0.00015283415345211033, + "loss": 2.6777, + "step": 6524 + }, + { + "epoch": 0.5265918812040997, + "grad_norm": 0.7250857949256897, + "learning_rate": 0.00015282074921346825, + "loss": 2.6027, + "step": 6525 + }, + { + "epoch": 0.5266725849406828, + "grad_norm": 0.7192760705947876, + "learning_rate": 0.00015280734365839498, + "loss": 2.6544, + "step": 6526 + }, + { + "epoch": 0.5267532886772658, + "grad_norm": 0.693583071231842, + "learning_rate": 0.0001527939367872247, + "loss": 2.6302, + "step": 6527 + }, + { + "epoch": 0.5268339924138488, + "grad_norm": 0.7031428217887878, + "learning_rate": 0.00015278052860029145, + "loss": 2.6944, + "step": 6528 + }, + { + "epoch": 0.5269146961504317, + "grad_norm": 0.6986895799636841, + "learning_rate": 0.00015276711909792949, + "loss": 2.6595, + "step": 6529 + }, + { + "epoch": 0.5269953998870147, + "grad_norm": 0.7375979423522949, + "learning_rate": 0.000152753708280473, + "loss": 2.6839, + "step": 6530 + }, + { + "epoch": 0.5270761036235978, + "grad_norm": 0.7126755714416504, + "learning_rate": 0.0001527402961482562, + "loss": 2.5597, + "step": 6531 + }, + { + "epoch": 0.5271568073601808, + "grad_norm": 0.6631070971488953, + "learning_rate": 0.00015272688270161338, + "loss": 2.5566, + "step": 6532 + }, + { + "epoch": 0.5272375110967638, + "grad_norm": 0.6896609663963318, + "learning_rate": 0.00015271346794087874, + "loss": 2.5801, + "step": 6533 + }, + { + "epoch": 0.5273182148333467, + "grad_norm": 0.7437502145767212, + "learning_rate": 0.00015270005186638673, + "loss": 2.6572, + "step": 6534 + }, + { + "epoch": 0.5273989185699298, + "grad_norm": 0.7013052701950073, + "learning_rate": 0.00015268663447847166, + "loss": 2.621, + "step": 6535 + }, + { + "epoch": 0.5274796223065128, + "grad_norm": 0.7161773443222046, + "learning_rate": 0.00015267321577746795, + "loss": 2.5989, + "step": 6536 + }, + { + "epoch": 0.5275603260430958, + "grad_norm": 0.7654534578323364, + "learning_rate": 0.00015265979576371, + "loss": 2.6338, + "step": 6537 + }, + { + "epoch": 0.5276410297796787, + "grad_norm": 0.694646954536438, + "learning_rate": 0.0001526463744375323, + "loss": 2.6036, + "step": 6538 + }, + { + "epoch": 0.5277217335162618, + "grad_norm": 0.6594679355621338, + "learning_rate": 0.0001526329517992693, + "loss": 2.6256, + "step": 6539 + }, + { + "epoch": 0.5278024372528448, + "grad_norm": 0.6424389481544495, + "learning_rate": 0.00015261952784925557, + "loss": 2.6389, + "step": 6540 + }, + { + "epoch": 0.5278831409894278, + "grad_norm": 0.7465235590934753, + "learning_rate": 0.0001526061025878257, + "loss": 2.5449, + "step": 6541 + }, + { + "epoch": 0.5279638447260108, + "grad_norm": 0.6900132298469543, + "learning_rate": 0.0001525926760153142, + "loss": 2.5597, + "step": 6542 + }, + { + "epoch": 0.5280445484625939, + "grad_norm": 0.7505282163619995, + "learning_rate": 0.00015257924813205572, + "loss": 2.6526, + "step": 6543 + }, + { + "epoch": 0.5281252521991768, + "grad_norm": 0.72642582654953, + "learning_rate": 0.00015256581893838495, + "loss": 2.6593, + "step": 6544 + }, + { + "epoch": 0.5282059559357598, + "grad_norm": 0.6901132464408875, + "learning_rate": 0.00015255238843463656, + "loss": 2.6726, + "step": 6545 + }, + { + "epoch": 0.5282866596723428, + "grad_norm": 0.7741395831108093, + "learning_rate": 0.0001525389566211453, + "loss": 2.5929, + "step": 6546 + }, + { + "epoch": 0.5283673634089259, + "grad_norm": 0.7282403111457825, + "learning_rate": 0.00015252552349824585, + "loss": 2.5696, + "step": 6547 + }, + { + "epoch": 0.5284480671455088, + "grad_norm": 0.7421764731407166, + "learning_rate": 0.0001525120890662731, + "loss": 2.5593, + "step": 6548 + }, + { + "epoch": 0.5285287708820918, + "grad_norm": 0.6830468773841858, + "learning_rate": 0.00015249865332556182, + "loss": 2.6396, + "step": 6549 + }, + { + "epoch": 0.5286094746186748, + "grad_norm": 0.6758440732955933, + "learning_rate": 0.00015248521627644684, + "loss": 2.5375, + "step": 6550 + }, + { + "epoch": 0.5286901783552579, + "grad_norm": 0.6897253394126892, + "learning_rate": 0.00015247177791926308, + "loss": 2.6148, + "step": 6551 + }, + { + "epoch": 0.5287708820918409, + "grad_norm": 0.6391426920890808, + "learning_rate": 0.00015245833825434547, + "loss": 2.5563, + "step": 6552 + }, + { + "epoch": 0.5288515858284238, + "grad_norm": 0.7213610410690308, + "learning_rate": 0.00015244489728202893, + "loss": 2.6158, + "step": 6553 + }, + { + "epoch": 0.5289322895650068, + "grad_norm": 0.6678160429000854, + "learning_rate": 0.00015243145500264845, + "loss": 2.6177, + "step": 6554 + }, + { + "epoch": 0.5290129933015899, + "grad_norm": 0.7041724324226379, + "learning_rate": 0.00015241801141653905, + "loss": 2.6504, + "step": 6555 + }, + { + "epoch": 0.5290936970381729, + "grad_norm": 0.6551648378372192, + "learning_rate": 0.0001524045665240358, + "loss": 2.577, + "step": 6556 + }, + { + "epoch": 0.5291744007747559, + "grad_norm": 0.7190412878990173, + "learning_rate": 0.00015239112032547377, + "loss": 2.596, + "step": 6557 + }, + { + "epoch": 0.5292551045113388, + "grad_norm": 0.6936302781105042, + "learning_rate": 0.00015237767282118807, + "loss": 2.6551, + "step": 6558 + }, + { + "epoch": 0.5293358082479219, + "grad_norm": 0.6901839971542358, + "learning_rate": 0.0001523642240115138, + "loss": 2.6263, + "step": 6559 + }, + { + "epoch": 0.5294165119845049, + "grad_norm": 0.6905068159103394, + "learning_rate": 0.00015235077389678624, + "loss": 2.6323, + "step": 6560 + }, + { + "epoch": 0.5294972157210879, + "grad_norm": 0.7495188117027283, + "learning_rate": 0.00015233732247734057, + "loss": 2.6243, + "step": 6561 + }, + { + "epoch": 0.5295779194576709, + "grad_norm": 0.6758708357810974, + "learning_rate": 0.00015232386975351197, + "loss": 2.6184, + "step": 6562 + }, + { + "epoch": 0.5296586231942539, + "grad_norm": 0.6443266868591309, + "learning_rate": 0.00015231041572563573, + "loss": 2.6543, + "step": 6563 + }, + { + "epoch": 0.5297393269308369, + "grad_norm": 0.7384275794029236, + "learning_rate": 0.00015229696039404723, + "loss": 2.6117, + "step": 6564 + }, + { + "epoch": 0.5298200306674199, + "grad_norm": 0.6873897314071655, + "learning_rate": 0.00015228350375908178, + "loss": 2.5689, + "step": 6565 + }, + { + "epoch": 0.5299007344040029, + "grad_norm": 0.6715645790100098, + "learning_rate": 0.00015227004582107472, + "loss": 2.5943, + "step": 6566 + }, + { + "epoch": 0.529981438140586, + "grad_norm": 0.6814208030700684, + "learning_rate": 0.00015225658658036151, + "loss": 2.5562, + "step": 6567 + }, + { + "epoch": 0.5300621418771689, + "grad_norm": 0.6942310929298401, + "learning_rate": 0.00015224312603727755, + "loss": 2.5902, + "step": 6568 + }, + { + "epoch": 0.5301428456137519, + "grad_norm": 0.6856299042701721, + "learning_rate": 0.0001522296641921583, + "loss": 2.6115, + "step": 6569 + }, + { + "epoch": 0.5302235493503349, + "grad_norm": 0.870833694934845, + "learning_rate": 0.0001522162010453393, + "loss": 2.7492, + "step": 6570 + }, + { + "epoch": 0.530304253086918, + "grad_norm": 0.6796989440917969, + "learning_rate": 0.0001522027365971561, + "loss": 2.6957, + "step": 6571 + }, + { + "epoch": 0.530384956823501, + "grad_norm": 0.7043026685714722, + "learning_rate": 0.00015218927084794423, + "loss": 2.604, + "step": 6572 + }, + { + "epoch": 0.5304656605600839, + "grad_norm": 0.7533933520317078, + "learning_rate": 0.00015217580379803933, + "loss": 2.6271, + "step": 6573 + }, + { + "epoch": 0.5305463642966669, + "grad_norm": 0.7526697516441345, + "learning_rate": 0.000152162335447777, + "loss": 2.553, + "step": 6574 + }, + { + "epoch": 0.53062706803325, + "grad_norm": 0.6942071318626404, + "learning_rate": 0.00015214886579749284, + "loss": 2.7206, + "step": 6575 + }, + { + "epoch": 0.530707771769833, + "grad_norm": 0.7133236527442932, + "learning_rate": 0.00015213539484752273, + "loss": 2.6545, + "step": 6576 + }, + { + "epoch": 0.530788475506416, + "grad_norm": 0.7229849696159363, + "learning_rate": 0.00015212192259820222, + "loss": 2.6647, + "step": 6577 + }, + { + "epoch": 0.5308691792429989, + "grad_norm": 0.7142449617385864, + "learning_rate": 0.0001521084490498672, + "loss": 2.5777, + "step": 6578 + }, + { + "epoch": 0.5309498829795819, + "grad_norm": 0.6950247287750244, + "learning_rate": 0.00015209497420285342, + "loss": 2.6159, + "step": 6579 + }, + { + "epoch": 0.531030586716165, + "grad_norm": 0.7492622137069702, + "learning_rate": 0.00015208149805749668, + "loss": 2.6927, + "step": 6580 + }, + { + "epoch": 0.531111290452748, + "grad_norm": 0.7618215084075928, + "learning_rate": 0.00015206802061413287, + "loss": 2.5831, + "step": 6581 + }, + { + "epoch": 0.5311919941893309, + "grad_norm": 0.7448660731315613, + "learning_rate": 0.0001520545418730979, + "loss": 2.6123, + "step": 6582 + }, + { + "epoch": 0.5312726979259139, + "grad_norm": 0.7450618147850037, + "learning_rate": 0.00015204106183472766, + "loss": 2.5768, + "step": 6583 + }, + { + "epoch": 0.531353401662497, + "grad_norm": 0.7426019310951233, + "learning_rate": 0.0001520275804993581, + "loss": 2.603, + "step": 6584 + }, + { + "epoch": 0.53143410539908, + "grad_norm": 0.7503333687782288, + "learning_rate": 0.00015201409786732526, + "loss": 2.6159, + "step": 6585 + }, + { + "epoch": 0.531514809135663, + "grad_norm": 0.6944373846054077, + "learning_rate": 0.00015200061393896513, + "loss": 2.5201, + "step": 6586 + }, + { + "epoch": 0.5315955128722459, + "grad_norm": 0.6958110332489014, + "learning_rate": 0.00015198712871461375, + "loss": 2.5592, + "step": 6587 + }, + { + "epoch": 0.531676216608829, + "grad_norm": 0.7838244438171387, + "learning_rate": 0.00015197364219460727, + "loss": 2.6663, + "step": 6588 + }, + { + "epoch": 0.531756920345412, + "grad_norm": 0.754338800907135, + "learning_rate": 0.00015196015437928174, + "loss": 2.6183, + "step": 6589 + }, + { + "epoch": 0.531837624081995, + "grad_norm": 0.7394337058067322, + "learning_rate": 0.00015194666526897332, + "loss": 2.5622, + "step": 6590 + }, + { + "epoch": 0.531918327818578, + "grad_norm": 0.7352069020271301, + "learning_rate": 0.00015193317486401824, + "loss": 2.6173, + "step": 6591 + }, + { + "epoch": 0.531999031555161, + "grad_norm": 0.6318944096565247, + "learning_rate": 0.00015191968316475267, + "loss": 2.6159, + "step": 6592 + }, + { + "epoch": 0.532079735291744, + "grad_norm": 0.7071281671524048, + "learning_rate": 0.00015190619017151291, + "loss": 2.633, + "step": 6593 + }, + { + "epoch": 0.532160439028327, + "grad_norm": 0.7762585282325745, + "learning_rate": 0.00015189269588463517, + "loss": 2.6445, + "step": 6594 + }, + { + "epoch": 0.53224114276491, + "grad_norm": 0.7979930639266968, + "learning_rate": 0.0001518792003044558, + "loss": 2.5825, + "step": 6595 + }, + { + "epoch": 0.5323218465014931, + "grad_norm": 0.7355580925941467, + "learning_rate": 0.00015186570343131114, + "loss": 2.6197, + "step": 6596 + }, + { + "epoch": 0.532402550238076, + "grad_norm": 0.7286938428878784, + "learning_rate": 0.0001518522052655376, + "loss": 2.6385, + "step": 6597 + }, + { + "epoch": 0.532483253974659, + "grad_norm": 0.689143180847168, + "learning_rate": 0.00015183870580747156, + "loss": 2.6593, + "step": 6598 + }, + { + "epoch": 0.532563957711242, + "grad_norm": 0.714746356010437, + "learning_rate": 0.00015182520505744945, + "loss": 2.6059, + "step": 6599 + }, + { + "epoch": 0.5326446614478251, + "grad_norm": 0.8055040240287781, + "learning_rate": 0.00015181170301580777, + "loss": 2.6983, + "step": 6600 + }, + { + "epoch": 0.532725365184408, + "grad_norm": 0.7104170918464661, + "learning_rate": 0.00015179819968288297, + "loss": 2.6578, + "step": 6601 + }, + { + "epoch": 0.532806068920991, + "grad_norm": 0.7175524830818176, + "learning_rate": 0.0001517846950590117, + "loss": 2.6263, + "step": 6602 + }, + { + "epoch": 0.532886772657574, + "grad_norm": 0.6755492091178894, + "learning_rate": 0.00015177118914453042, + "loss": 2.5752, + "step": 6603 + }, + { + "epoch": 0.5329674763941571, + "grad_norm": 0.7020289897918701, + "learning_rate": 0.00015175768193977578, + "loss": 2.6186, + "step": 6604 + }, + { + "epoch": 0.5330481801307401, + "grad_norm": 0.7550958395004272, + "learning_rate": 0.0001517441734450844, + "loss": 2.628, + "step": 6605 + }, + { + "epoch": 0.533128883867323, + "grad_norm": 0.6697603464126587, + "learning_rate": 0.00015173066366079297, + "loss": 2.6433, + "step": 6606 + }, + { + "epoch": 0.533209587603906, + "grad_norm": 0.715372622013092, + "learning_rate": 0.0001517171525872382, + "loss": 2.6022, + "step": 6607 + }, + { + "epoch": 0.5332902913404891, + "grad_norm": 0.7081933617591858, + "learning_rate": 0.00015170364022475675, + "loss": 2.675, + "step": 6608 + }, + { + "epoch": 0.5333709950770721, + "grad_norm": 0.7074152231216431, + "learning_rate": 0.00015169012657368546, + "loss": 2.6637, + "step": 6609 + }, + { + "epoch": 0.5334516988136551, + "grad_norm": 0.6692848801612854, + "learning_rate": 0.00015167661163436108, + "loss": 2.5855, + "step": 6610 + }, + { + "epoch": 0.533532402550238, + "grad_norm": 0.7307556867599487, + "learning_rate": 0.00015166309540712048, + "loss": 2.6105, + "step": 6611 + }, + { + "epoch": 0.5336131062868211, + "grad_norm": 0.7026669383049011, + "learning_rate": 0.00015164957789230048, + "loss": 2.6656, + "step": 6612 + }, + { + "epoch": 0.5336938100234041, + "grad_norm": 0.6579706072807312, + "learning_rate": 0.000151636059090238, + "loss": 2.6456, + "step": 6613 + }, + { + "epoch": 0.5337745137599871, + "grad_norm": 0.6854498386383057, + "learning_rate": 0.00015162253900126993, + "loss": 2.5969, + "step": 6614 + }, + { + "epoch": 0.5338552174965701, + "grad_norm": 0.7542434334754944, + "learning_rate": 0.00015160901762573323, + "loss": 2.6333, + "step": 6615 + }, + { + "epoch": 0.5339359212331531, + "grad_norm": 0.6795105934143066, + "learning_rate": 0.0001515954949639649, + "loss": 2.6268, + "step": 6616 + }, + { + "epoch": 0.5340166249697361, + "grad_norm": 0.6395254135131836, + "learning_rate": 0.000151581971016302, + "loss": 2.5684, + "step": 6617 + }, + { + "epoch": 0.5340973287063191, + "grad_norm": 0.7069850564002991, + "learning_rate": 0.00015156844578308155, + "loss": 2.64, + "step": 6618 + }, + { + "epoch": 0.5341780324429021, + "grad_norm": 0.6779203414916992, + "learning_rate": 0.0001515549192646406, + "loss": 2.6255, + "step": 6619 + }, + { + "epoch": 0.5342587361794852, + "grad_norm": 0.6403560638427734, + "learning_rate": 0.00015154139146131632, + "loss": 2.611, + "step": 6620 + }, + { + "epoch": 0.5343394399160681, + "grad_norm": 0.7532669901847839, + "learning_rate": 0.00015152786237344583, + "loss": 2.5641, + "step": 6621 + }, + { + "epoch": 0.5344201436526511, + "grad_norm": 0.6827573776245117, + "learning_rate": 0.00015151433200136629, + "loss": 2.6096, + "step": 6622 + }, + { + "epoch": 0.5345008473892341, + "grad_norm": 0.6691904067993164, + "learning_rate": 0.000151500800345415, + "loss": 2.6602, + "step": 6623 + }, + { + "epoch": 0.5345815511258172, + "grad_norm": 0.7288634777069092, + "learning_rate": 0.00015148726740592906, + "loss": 2.6468, + "step": 6624 + }, + { + "epoch": 0.5346622548624002, + "grad_norm": 0.7087839245796204, + "learning_rate": 0.00015147373318324586, + "loss": 2.5795, + "step": 6625 + }, + { + "epoch": 0.5347429585989831, + "grad_norm": 0.6618373394012451, + "learning_rate": 0.00015146019767770267, + "loss": 2.638, + "step": 6626 + }, + { + "epoch": 0.5348236623355661, + "grad_norm": 0.7384989857673645, + "learning_rate": 0.00015144666088963684, + "loss": 2.6104, + "step": 6627 + }, + { + "epoch": 0.5349043660721492, + "grad_norm": 0.6662275195121765, + "learning_rate": 0.00015143312281938576, + "loss": 2.6174, + "step": 6628 + }, + { + "epoch": 0.5349850698087322, + "grad_norm": 0.6617184281349182, + "learning_rate": 0.0001514195834672868, + "loss": 2.6154, + "step": 6629 + }, + { + "epoch": 0.5350657735453151, + "grad_norm": 0.7173622846603394, + "learning_rate": 0.0001514060428336774, + "loss": 2.5741, + "step": 6630 + }, + { + "epoch": 0.5351464772818981, + "grad_norm": 0.7773584127426147, + "learning_rate": 0.00015139250091889502, + "loss": 2.6333, + "step": 6631 + }, + { + "epoch": 0.5352271810184811, + "grad_norm": 0.7255204916000366, + "learning_rate": 0.0001513789577232772, + "loss": 2.5459, + "step": 6632 + }, + { + "epoch": 0.5353078847550642, + "grad_norm": 0.7308403849601746, + "learning_rate": 0.00015136541324716144, + "loss": 2.5934, + "step": 6633 + }, + { + "epoch": 0.5353885884916472, + "grad_norm": 0.699367880821228, + "learning_rate": 0.0001513518674908853, + "loss": 2.6797, + "step": 6634 + }, + { + "epoch": 0.5354692922282301, + "grad_norm": 0.7236449718475342, + "learning_rate": 0.0001513383204547864, + "loss": 2.6289, + "step": 6635 + }, + { + "epoch": 0.5355499959648131, + "grad_norm": 0.6860557794570923, + "learning_rate": 0.00015132477213920234, + "loss": 2.6736, + "step": 6636 + }, + { + "epoch": 0.5356306997013962, + "grad_norm": 0.6724153161048889, + "learning_rate": 0.00015131122254447084, + "loss": 2.5581, + "step": 6637 + }, + { + "epoch": 0.5357114034379792, + "grad_norm": 0.6818630695343018, + "learning_rate": 0.00015129767167092949, + "loss": 2.5979, + "step": 6638 + }, + { + "epoch": 0.5357921071745622, + "grad_norm": 0.6956631541252136, + "learning_rate": 0.00015128411951891607, + "loss": 2.6116, + "step": 6639 + }, + { + "epoch": 0.5358728109111451, + "grad_norm": 0.6698076128959656, + "learning_rate": 0.00015127056608876837, + "loss": 2.65, + "step": 6640 + }, + { + "epoch": 0.5359535146477282, + "grad_norm": 0.7763264179229736, + "learning_rate": 0.00015125701138082415, + "loss": 2.6164, + "step": 6641 + }, + { + "epoch": 0.5360342183843112, + "grad_norm": 0.7148340940475464, + "learning_rate": 0.00015124345539542118, + "loss": 2.6467, + "step": 6642 + }, + { + "epoch": 0.5361149221208942, + "grad_norm": 0.7350041270256042, + "learning_rate": 0.00015122989813289733, + "loss": 2.6477, + "step": 6643 + }, + { + "epoch": 0.5361956258574772, + "grad_norm": 0.6993441581726074, + "learning_rate": 0.00015121633959359055, + "loss": 2.7526, + "step": 6644 + }, + { + "epoch": 0.5362763295940602, + "grad_norm": 0.6828470826148987, + "learning_rate": 0.00015120277977783873, + "loss": 2.6439, + "step": 6645 + }, + { + "epoch": 0.5363570333306432, + "grad_norm": 0.7076796889305115, + "learning_rate": 0.0001511892186859797, + "loss": 2.6375, + "step": 6646 + }, + { + "epoch": 0.5364377370672262, + "grad_norm": 0.6830769777297974, + "learning_rate": 0.0001511756563183516, + "loss": 2.6052, + "step": 6647 + }, + { + "epoch": 0.5365184408038092, + "grad_norm": 0.6482179760932922, + "learning_rate": 0.00015116209267529237, + "loss": 2.6251, + "step": 6648 + }, + { + "epoch": 0.5365991445403923, + "grad_norm": 0.6687620878219604, + "learning_rate": 0.00015114852775714, + "loss": 2.659, + "step": 6649 + }, + { + "epoch": 0.5366798482769752, + "grad_norm": 0.734108030796051, + "learning_rate": 0.0001511349615642327, + "loss": 2.6542, + "step": 6650 + }, + { + "epoch": 0.5367605520135582, + "grad_norm": 0.7092111706733704, + "learning_rate": 0.00015112139409690842, + "loss": 2.6228, + "step": 6651 + }, + { + "epoch": 0.5368412557501412, + "grad_norm": 0.6544996500015259, + "learning_rate": 0.0001511078253555054, + "loss": 2.5661, + "step": 6652 + }, + { + "epoch": 0.5369219594867243, + "grad_norm": 0.7012531161308289, + "learning_rate": 0.00015109425534036176, + "loss": 2.6447, + "step": 6653 + }, + { + "epoch": 0.5370026632233073, + "grad_norm": 0.6813335418701172, + "learning_rate": 0.0001510806840518157, + "loss": 2.5723, + "step": 6654 + }, + { + "epoch": 0.5370833669598902, + "grad_norm": 0.6711288094520569, + "learning_rate": 0.0001510671114902055, + "loss": 2.6096, + "step": 6655 + }, + { + "epoch": 0.5371640706964732, + "grad_norm": 0.721866250038147, + "learning_rate": 0.00015105353765586935, + "loss": 2.6167, + "step": 6656 + }, + { + "epoch": 0.5372447744330563, + "grad_norm": 0.8140639066696167, + "learning_rate": 0.00015103996254914562, + "loss": 2.5768, + "step": 6657 + }, + { + "epoch": 0.5373254781696393, + "grad_norm": 0.6859177947044373, + "learning_rate": 0.0001510263861703726, + "loss": 2.5638, + "step": 6658 + }, + { + "epoch": 0.5374061819062222, + "grad_norm": 0.7254204154014587, + "learning_rate": 0.00015101280851988864, + "loss": 2.5855, + "step": 6659 + }, + { + "epoch": 0.5374868856428052, + "grad_norm": 0.7181829810142517, + "learning_rate": 0.00015099922959803218, + "loss": 2.5358, + "step": 6660 + }, + { + "epoch": 0.5375675893793883, + "grad_norm": 0.7092663645744324, + "learning_rate": 0.00015098564940514155, + "loss": 2.679, + "step": 6661 + }, + { + "epoch": 0.5376482931159713, + "grad_norm": 0.7126225233078003, + "learning_rate": 0.00015097206794155527, + "loss": 2.6167, + "step": 6662 + }, + { + "epoch": 0.5377289968525543, + "grad_norm": 0.7469925880432129, + "learning_rate": 0.00015095848520761186, + "loss": 2.5906, + "step": 6663 + }, + { + "epoch": 0.5378097005891372, + "grad_norm": 0.6911186575889587, + "learning_rate": 0.00015094490120364973, + "loss": 2.6488, + "step": 6664 + }, + { + "epoch": 0.5378904043257203, + "grad_norm": 0.6579635143280029, + "learning_rate": 0.00015093131593000753, + "loss": 2.5894, + "step": 6665 + }, + { + "epoch": 0.5379711080623033, + "grad_norm": 0.7107242345809937, + "learning_rate": 0.00015091772938702377, + "loss": 2.6568, + "step": 6666 + }, + { + "epoch": 0.5380518117988863, + "grad_norm": 0.6845428943634033, + "learning_rate": 0.00015090414157503714, + "loss": 2.5697, + "step": 6667 + }, + { + "epoch": 0.5381325155354693, + "grad_norm": 0.6713212132453918, + "learning_rate": 0.00015089055249438622, + "loss": 2.5747, + "step": 6668 + }, + { + "epoch": 0.5382132192720523, + "grad_norm": 0.7091513276100159, + "learning_rate": 0.0001508769621454097, + "loss": 2.6765, + "step": 6669 + }, + { + "epoch": 0.5382939230086353, + "grad_norm": 0.7403436899185181, + "learning_rate": 0.00015086337052844627, + "loss": 2.6841, + "step": 6670 + }, + { + "epoch": 0.5383746267452183, + "grad_norm": 0.6745626330375671, + "learning_rate": 0.0001508497776438347, + "loss": 2.6436, + "step": 6671 + }, + { + "epoch": 0.5384553304818013, + "grad_norm": 0.7491294145584106, + "learning_rate": 0.00015083618349191372, + "loss": 2.6376, + "step": 6672 + }, + { + "epoch": 0.5385360342183844, + "grad_norm": 0.719761848449707, + "learning_rate": 0.00015082258807302222, + "loss": 2.5885, + "step": 6673 + }, + { + "epoch": 0.5386167379549673, + "grad_norm": 0.7302667498588562, + "learning_rate": 0.00015080899138749895, + "loss": 2.7019, + "step": 6674 + }, + { + "epoch": 0.5386974416915503, + "grad_norm": 0.7640584111213684, + "learning_rate": 0.0001507953934356828, + "loss": 2.6404, + "step": 6675 + }, + { + "epoch": 0.5387781454281333, + "grad_norm": 0.699515700340271, + "learning_rate": 0.0001507817942179127, + "loss": 2.6407, + "step": 6676 + }, + { + "epoch": 0.5388588491647164, + "grad_norm": 0.7305224537849426, + "learning_rate": 0.00015076819373452746, + "loss": 2.5994, + "step": 6677 + }, + { + "epoch": 0.5389395529012994, + "grad_norm": 0.7125952243804932, + "learning_rate": 0.00015075459198586616, + "loss": 2.6472, + "step": 6678 + }, + { + "epoch": 0.5390202566378823, + "grad_norm": 0.7077293395996094, + "learning_rate": 0.00015074098897226778, + "loss": 2.6168, + "step": 6679 + }, + { + "epoch": 0.5391009603744653, + "grad_norm": 0.6713843941688538, + "learning_rate": 0.00015072738469407127, + "loss": 2.5736, + "step": 6680 + }, + { + "epoch": 0.5391816641110483, + "grad_norm": 0.7101294994354248, + "learning_rate": 0.00015071377915161578, + "loss": 2.6994, + "step": 6681 + }, + { + "epoch": 0.5392623678476314, + "grad_norm": 0.7132740020751953, + "learning_rate": 0.00015070017234524032, + "loss": 2.586, + "step": 6682 + }, + { + "epoch": 0.5393430715842144, + "grad_norm": 0.7043401598930359, + "learning_rate": 0.00015068656427528402, + "loss": 2.6025, + "step": 6683 + }, + { + "epoch": 0.5394237753207973, + "grad_norm": 0.6831551194190979, + "learning_rate": 0.00015067295494208607, + "loss": 2.6183, + "step": 6684 + }, + { + "epoch": 0.5395044790573803, + "grad_norm": 0.7066370844841003, + "learning_rate": 0.0001506593443459856, + "loss": 2.6467, + "step": 6685 + }, + { + "epoch": 0.5395851827939634, + "grad_norm": 0.7908033132553101, + "learning_rate": 0.0001506457324873219, + "loss": 2.6929, + "step": 6686 + }, + { + "epoch": 0.5396658865305464, + "grad_norm": 0.7186774611473083, + "learning_rate": 0.00015063211936643407, + "loss": 2.5841, + "step": 6687 + }, + { + "epoch": 0.5397465902671293, + "grad_norm": 0.6634512543678284, + "learning_rate": 0.0001506185049836615, + "loss": 2.5517, + "step": 6688 + }, + { + "epoch": 0.5398272940037123, + "grad_norm": 0.734406590461731, + "learning_rate": 0.00015060488933934353, + "loss": 2.6317, + "step": 6689 + }, + { + "epoch": 0.5399079977402954, + "grad_norm": 0.7754772305488586, + "learning_rate": 0.00015059127243381937, + "loss": 2.6885, + "step": 6690 + }, + { + "epoch": 0.5399887014768784, + "grad_norm": 0.7636603713035583, + "learning_rate": 0.00015057765426742848, + "loss": 2.5767, + "step": 6691 + }, + { + "epoch": 0.5400694052134614, + "grad_norm": 0.6621577143669128, + "learning_rate": 0.00015056403484051017, + "loss": 2.5905, + "step": 6692 + }, + { + "epoch": 0.5401501089500443, + "grad_norm": 0.7605881094932556, + "learning_rate": 0.00015055041415340404, + "loss": 2.6166, + "step": 6693 + }, + { + "epoch": 0.5402308126866274, + "grad_norm": 0.7603485584259033, + "learning_rate": 0.0001505367922064494, + "loss": 2.6123, + "step": 6694 + }, + { + "epoch": 0.5403115164232104, + "grad_norm": 0.7021469473838806, + "learning_rate": 0.0001505231689999858, + "loss": 2.6754, + "step": 6695 + }, + { + "epoch": 0.5403922201597934, + "grad_norm": 0.7291955947875977, + "learning_rate": 0.00015050954453435273, + "loss": 2.6393, + "step": 6696 + }, + { + "epoch": 0.5404729238963764, + "grad_norm": 0.6658700704574585, + "learning_rate": 0.00015049591880988977, + "loss": 2.5888, + "step": 6697 + }, + { + "epoch": 0.5405536276329594, + "grad_norm": 0.7080146074295044, + "learning_rate": 0.00015048229182693657, + "loss": 2.6318, + "step": 6698 + }, + { + "epoch": 0.5406343313695424, + "grad_norm": 0.7440849542617798, + "learning_rate": 0.00015046866358583267, + "loss": 2.596, + "step": 6699 + }, + { + "epoch": 0.5407150351061254, + "grad_norm": 0.886578381061554, + "learning_rate": 0.00015045503408691775, + "loss": 2.6479, + "step": 6700 + }, + { + "epoch": 0.5407957388427084, + "grad_norm": 0.7221408486366272, + "learning_rate": 0.00015044140333053148, + "loss": 2.625, + "step": 6701 + }, + { + "epoch": 0.5408764425792915, + "grad_norm": 0.7193209528923035, + "learning_rate": 0.0001504277713170136, + "loss": 2.6044, + "step": 6702 + }, + { + "epoch": 0.5409571463158744, + "grad_norm": 0.7139819860458374, + "learning_rate": 0.00015041413804670384, + "loss": 2.5572, + "step": 6703 + }, + { + "epoch": 0.5410378500524574, + "grad_norm": 0.728875994682312, + "learning_rate": 0.00015040050351994196, + "loss": 2.6373, + "step": 6704 + }, + { + "epoch": 0.5411185537890404, + "grad_norm": 0.6794858574867249, + "learning_rate": 0.0001503868677370678, + "loss": 2.6265, + "step": 6705 + }, + { + "epoch": 0.5411992575256235, + "grad_norm": 0.6874774098396301, + "learning_rate": 0.00015037323069842117, + "loss": 2.6146, + "step": 6706 + }, + { + "epoch": 0.5412799612622065, + "grad_norm": 0.7064409255981445, + "learning_rate": 0.00015035959240434197, + "loss": 2.6126, + "step": 6707 + }, + { + "epoch": 0.5413606649987894, + "grad_norm": 0.7212977409362793, + "learning_rate": 0.00015034595285517006, + "loss": 2.6836, + "step": 6708 + }, + { + "epoch": 0.5414413687353724, + "grad_norm": 0.7826492190361023, + "learning_rate": 0.0001503323120512454, + "loss": 2.6648, + "step": 6709 + }, + { + "epoch": 0.5415220724719555, + "grad_norm": 0.7228415608406067, + "learning_rate": 0.000150318669992908, + "loss": 2.5734, + "step": 6710 + }, + { + "epoch": 0.5416027762085385, + "grad_norm": 0.6929590702056885, + "learning_rate": 0.00015030502668049778, + "loss": 2.6023, + "step": 6711 + }, + { + "epoch": 0.5416834799451214, + "grad_norm": 0.679990291595459, + "learning_rate": 0.0001502913821143548, + "loss": 2.5867, + "step": 6712 + }, + { + "epoch": 0.5417641836817044, + "grad_norm": 0.7324180603027344, + "learning_rate": 0.00015027773629481907, + "loss": 2.5722, + "step": 6713 + }, + { + "epoch": 0.5418448874182875, + "grad_norm": 0.686826765537262, + "learning_rate": 0.00015026408922223078, + "loss": 2.6138, + "step": 6714 + }, + { + "epoch": 0.5419255911548705, + "grad_norm": 0.7045193314552307, + "learning_rate": 0.00015025044089693, + "loss": 2.619, + "step": 6715 + }, + { + "epoch": 0.5420062948914535, + "grad_norm": 0.6839936375617981, + "learning_rate": 0.00015023679131925683, + "loss": 2.5778, + "step": 6716 + }, + { + "epoch": 0.5420869986280364, + "grad_norm": 0.7613961696624756, + "learning_rate": 0.00015022314048955153, + "loss": 2.6262, + "step": 6717 + }, + { + "epoch": 0.5421677023646195, + "grad_norm": 0.7867478728294373, + "learning_rate": 0.00015020948840815428, + "loss": 2.6576, + "step": 6718 + }, + { + "epoch": 0.5422484061012025, + "grad_norm": 0.7371038794517517, + "learning_rate": 0.0001501958350754053, + "loss": 2.6495, + "step": 6719 + }, + { + "epoch": 0.5423291098377855, + "grad_norm": 0.7146512269973755, + "learning_rate": 0.00015018218049164494, + "loss": 2.6514, + "step": 6720 + }, + { + "epoch": 0.5424098135743685, + "grad_norm": 0.7507650256156921, + "learning_rate": 0.00015016852465721346, + "loss": 2.6509, + "step": 6721 + }, + { + "epoch": 0.5424905173109515, + "grad_norm": 0.6786547303199768, + "learning_rate": 0.0001501548675724512, + "loss": 2.5983, + "step": 6722 + }, + { + "epoch": 0.5425712210475345, + "grad_norm": 0.7077932357788086, + "learning_rate": 0.0001501412092376985, + "loss": 2.622, + "step": 6723 + }, + { + "epoch": 0.5426519247841175, + "grad_norm": 0.7191271781921387, + "learning_rate": 0.00015012754965329584, + "loss": 2.6632, + "step": 6724 + }, + { + "epoch": 0.5427326285207005, + "grad_norm": 0.6785906553268433, + "learning_rate": 0.00015011388881958356, + "loss": 2.6312, + "step": 6725 + }, + { + "epoch": 0.5428133322572836, + "grad_norm": 0.6880263090133667, + "learning_rate": 0.00015010022673690222, + "loss": 2.5951, + "step": 6726 + }, + { + "epoch": 0.5428940359938665, + "grad_norm": 0.7769095301628113, + "learning_rate": 0.0001500865634055923, + "loss": 2.5503, + "step": 6727 + }, + { + "epoch": 0.5429747397304495, + "grad_norm": 0.6847476959228516, + "learning_rate": 0.0001500728988259942, + "loss": 2.6824, + "step": 6728 + }, + { + "epoch": 0.5430554434670325, + "grad_norm": 0.6829310059547424, + "learning_rate": 0.00015005923299844863, + "loss": 2.5683, + "step": 6729 + }, + { + "epoch": 0.5431361472036156, + "grad_norm": 0.7436082363128662, + "learning_rate": 0.0001500455659232961, + "loss": 2.6165, + "step": 6730 + }, + { + "epoch": 0.5432168509401986, + "grad_norm": 0.7876375913619995, + "learning_rate": 0.00015003189760087724, + "loss": 2.6203, + "step": 6731 + }, + { + "epoch": 0.5432975546767815, + "grad_norm": 0.6869253516197205, + "learning_rate": 0.0001500182280315327, + "loss": 2.6136, + "step": 6732 + }, + { + "epoch": 0.5433782584133645, + "grad_norm": 0.7179432511329651, + "learning_rate": 0.00015000455721560316, + "loss": 2.6049, + "step": 6733 + }, + { + "epoch": 0.5434589621499475, + "grad_norm": 0.7286917567253113, + "learning_rate": 0.00014999088515342939, + "loss": 2.5704, + "step": 6734 + }, + { + "epoch": 0.5435396658865306, + "grad_norm": 0.6841779351234436, + "learning_rate": 0.00014997721184535206, + "loss": 2.6095, + "step": 6735 + }, + { + "epoch": 0.5436203696231136, + "grad_norm": 0.7661791443824768, + "learning_rate": 0.00014996353729171196, + "loss": 2.6193, + "step": 6736 + }, + { + "epoch": 0.5437010733596965, + "grad_norm": 0.7365885376930237, + "learning_rate": 0.0001499498614928499, + "loss": 2.586, + "step": 6737 + }, + { + "epoch": 0.5437817770962795, + "grad_norm": 0.7423815131187439, + "learning_rate": 0.00014993618444910674, + "loss": 2.6199, + "step": 6738 + }, + { + "epoch": 0.5438624808328626, + "grad_norm": 0.7667781114578247, + "learning_rate": 0.0001499225061608233, + "loss": 2.6584, + "step": 6739 + }, + { + "epoch": 0.5439431845694456, + "grad_norm": 0.7148830890655518, + "learning_rate": 0.00014990882662834057, + "loss": 2.7172, + "step": 6740 + }, + { + "epoch": 0.5440238883060285, + "grad_norm": 0.7206205725669861, + "learning_rate": 0.00014989514585199936, + "loss": 2.5682, + "step": 6741 + }, + { + "epoch": 0.5441045920426115, + "grad_norm": 0.7306448221206665, + "learning_rate": 0.0001498814638321407, + "loss": 2.6724, + "step": 6742 + }, + { + "epoch": 0.5441852957791946, + "grad_norm": 0.7058824896812439, + "learning_rate": 0.00014986778056910556, + "loss": 2.6573, + "step": 6743 + }, + { + "epoch": 0.5442659995157776, + "grad_norm": 0.770588755607605, + "learning_rate": 0.000149854096063235, + "loss": 2.658, + "step": 6744 + }, + { + "epoch": 0.5443467032523606, + "grad_norm": 0.8283931612968445, + "learning_rate": 0.00014984041031487001, + "loss": 2.6624, + "step": 6745 + }, + { + "epoch": 0.5444274069889435, + "grad_norm": 0.6814693808555603, + "learning_rate": 0.00014982672332435176, + "loss": 2.5835, + "step": 6746 + }, + { + "epoch": 0.5445081107255266, + "grad_norm": 0.7059363722801208, + "learning_rate": 0.00014981303509202127, + "loss": 2.5977, + "step": 6747 + }, + { + "epoch": 0.5445888144621096, + "grad_norm": 0.6678106188774109, + "learning_rate": 0.00014979934561821975, + "loss": 2.6479, + "step": 6748 + }, + { + "epoch": 0.5446695181986926, + "grad_norm": 0.8167592883110046, + "learning_rate": 0.00014978565490328835, + "loss": 2.6529, + "step": 6749 + }, + { + "epoch": 0.5447502219352756, + "grad_norm": 0.807209849357605, + "learning_rate": 0.00014977196294756832, + "loss": 2.6546, + "step": 6750 + }, + { + "epoch": 0.5448309256718586, + "grad_norm": 0.7099517583847046, + "learning_rate": 0.00014975826975140085, + "loss": 2.6178, + "step": 6751 + }, + { + "epoch": 0.5449116294084416, + "grad_norm": 0.7900758981704712, + "learning_rate": 0.0001497445753151272, + "loss": 2.586, + "step": 6752 + }, + { + "epoch": 0.5449923331450246, + "grad_norm": 0.6826134920120239, + "learning_rate": 0.00014973087963908875, + "loss": 2.5914, + "step": 6753 + }, + { + "epoch": 0.5450730368816076, + "grad_norm": 0.7383863925933838, + "learning_rate": 0.0001497171827236268, + "loss": 2.6357, + "step": 6754 + }, + { + "epoch": 0.5451537406181907, + "grad_norm": 0.7208051085472107, + "learning_rate": 0.0001497034845690826, + "loss": 2.5435, + "step": 6755 + }, + { + "epoch": 0.5452344443547736, + "grad_norm": 0.680794894695282, + "learning_rate": 0.00014968978517579772, + "loss": 2.5691, + "step": 6756 + }, + { + "epoch": 0.5453151480913566, + "grad_norm": 0.680759847164154, + "learning_rate": 0.00014967608454411347, + "loss": 2.5761, + "step": 6757 + }, + { + "epoch": 0.5453958518279396, + "grad_norm": 0.719634473323822, + "learning_rate": 0.00014966238267437134, + "loss": 2.637, + "step": 6758 + }, + { + "epoch": 0.5454765555645227, + "grad_norm": 0.777302086353302, + "learning_rate": 0.0001496486795669128, + "loss": 2.6457, + "step": 6759 + }, + { + "epoch": 0.5455572593011057, + "grad_norm": 0.6875059604644775, + "learning_rate": 0.0001496349752220794, + "loss": 2.6116, + "step": 6760 + }, + { + "epoch": 0.5456379630376886, + "grad_norm": 0.6884258985519409, + "learning_rate": 0.0001496212696402127, + "loss": 2.5863, + "step": 6761 + }, + { + "epoch": 0.5457186667742716, + "grad_norm": 0.6667922139167786, + "learning_rate": 0.00014960756282165422, + "loss": 2.5892, + "step": 6762 + }, + { + "epoch": 0.5457993705108547, + "grad_norm": 0.6712725162506104, + "learning_rate": 0.00014959385476674559, + "loss": 2.5478, + "step": 6763 + }, + { + "epoch": 0.5458800742474377, + "grad_norm": 0.6803874969482422, + "learning_rate": 0.00014958014547582845, + "loss": 2.5785, + "step": 6764 + }, + { + "epoch": 0.5459607779840207, + "grad_norm": 0.6975811123847961, + "learning_rate": 0.0001495664349492445, + "loss": 2.5765, + "step": 6765 + }, + { + "epoch": 0.5460414817206036, + "grad_norm": 0.7676273584365845, + "learning_rate": 0.00014955272318733544, + "loss": 2.634, + "step": 6766 + }, + { + "epoch": 0.5461221854571867, + "grad_norm": 0.7044547200202942, + "learning_rate": 0.000149539010190443, + "loss": 2.646, + "step": 6767 + }, + { + "epoch": 0.5462028891937697, + "grad_norm": 0.7453166842460632, + "learning_rate": 0.00014952529595890887, + "loss": 2.6137, + "step": 6768 + }, + { + "epoch": 0.5462835929303527, + "grad_norm": 0.7281681299209595, + "learning_rate": 0.00014951158049307493, + "loss": 2.6558, + "step": 6769 + }, + { + "epoch": 0.5463642966669356, + "grad_norm": 0.7131047248840332, + "learning_rate": 0.00014949786379328298, + "loss": 2.6441, + "step": 6770 + }, + { + "epoch": 0.5464450004035187, + "grad_norm": 0.7072219848632812, + "learning_rate": 0.00014948414585987487, + "loss": 2.5861, + "step": 6771 + }, + { + "epoch": 0.5465257041401017, + "grad_norm": 0.7270335555076599, + "learning_rate": 0.00014947042669319252, + "loss": 2.6703, + "step": 6772 + }, + { + "epoch": 0.5466064078766847, + "grad_norm": 0.7314150929450989, + "learning_rate": 0.0001494567062935778, + "loss": 2.6101, + "step": 6773 + }, + { + "epoch": 0.5466871116132677, + "grad_norm": 0.8168460130691528, + "learning_rate": 0.00014944298466137266, + "loss": 2.662, + "step": 6774 + }, + { + "epoch": 0.5467678153498507, + "grad_norm": 0.7338390350341797, + "learning_rate": 0.00014942926179691913, + "loss": 2.6481, + "step": 6775 + }, + { + "epoch": 0.5468485190864337, + "grad_norm": 0.7065639495849609, + "learning_rate": 0.00014941553770055917, + "loss": 2.6192, + "step": 6776 + }, + { + "epoch": 0.5469292228230167, + "grad_norm": 0.7675396203994751, + "learning_rate": 0.00014940181237263483, + "loss": 2.5828, + "step": 6777 + }, + { + "epoch": 0.5470099265595997, + "grad_norm": 0.7085692286491394, + "learning_rate": 0.0001493880858134882, + "loss": 2.5815, + "step": 6778 + }, + { + "epoch": 0.5470906302961828, + "grad_norm": 0.757591187953949, + "learning_rate": 0.00014937435802346135, + "loss": 2.691, + "step": 6779 + }, + { + "epoch": 0.5471713340327657, + "grad_norm": 0.7299168705940247, + "learning_rate": 0.00014936062900289647, + "loss": 2.6246, + "step": 6780 + }, + { + "epoch": 0.5472520377693487, + "grad_norm": 0.693692684173584, + "learning_rate": 0.00014934689875213564, + "loss": 2.6149, + "step": 6781 + }, + { + "epoch": 0.5473327415059317, + "grad_norm": 0.733657956123352, + "learning_rate": 0.00014933316727152113, + "loss": 2.582, + "step": 6782 + }, + { + "epoch": 0.5474134452425147, + "grad_norm": 0.6881953477859497, + "learning_rate": 0.00014931943456139514, + "loss": 2.6023, + "step": 6783 + }, + { + "epoch": 0.5474941489790978, + "grad_norm": 0.7102411985397339, + "learning_rate": 0.00014930570062209988, + "loss": 2.6296, + "step": 6784 + }, + { + "epoch": 0.5475748527156807, + "grad_norm": 0.7263364791870117, + "learning_rate": 0.00014929196545397771, + "loss": 2.6414, + "step": 6785 + }, + { + "epoch": 0.5476555564522637, + "grad_norm": 0.7239066958427429, + "learning_rate": 0.00014927822905737092, + "loss": 2.6174, + "step": 6786 + }, + { + "epoch": 0.5477362601888467, + "grad_norm": 0.6909911632537842, + "learning_rate": 0.0001492644914326218, + "loss": 2.6036, + "step": 6787 + }, + { + "epoch": 0.5478169639254298, + "grad_norm": 0.719693124294281, + "learning_rate": 0.00014925075258007283, + "loss": 2.6507, + "step": 6788 + }, + { + "epoch": 0.5478976676620128, + "grad_norm": 0.7722225785255432, + "learning_rate": 0.0001492370125000663, + "loss": 2.6268, + "step": 6789 + }, + { + "epoch": 0.5479783713985957, + "grad_norm": 0.7456568479537964, + "learning_rate": 0.00014922327119294476, + "loss": 2.6426, + "step": 6790 + }, + { + "epoch": 0.5480590751351787, + "grad_norm": 0.7430242300033569, + "learning_rate": 0.00014920952865905062, + "loss": 2.6632, + "step": 6791 + }, + { + "epoch": 0.5481397788717618, + "grad_norm": 0.7363260388374329, + "learning_rate": 0.0001491957848987264, + "loss": 2.6021, + "step": 6792 + }, + { + "epoch": 0.5482204826083448, + "grad_norm": 0.6903972029685974, + "learning_rate": 0.00014918203991231462, + "loss": 2.6086, + "step": 6793 + }, + { + "epoch": 0.5483011863449277, + "grad_norm": 0.6765161752700806, + "learning_rate": 0.00014916829370015781, + "loss": 2.5806, + "step": 6794 + }, + { + "epoch": 0.5483818900815107, + "grad_norm": 0.7533403635025024, + "learning_rate": 0.0001491545462625986, + "loss": 2.6351, + "step": 6795 + }, + { + "epoch": 0.5484625938180938, + "grad_norm": 0.6841829419136047, + "learning_rate": 0.00014914079759997963, + "loss": 2.606, + "step": 6796 + }, + { + "epoch": 0.5485432975546768, + "grad_norm": 0.7671411037445068, + "learning_rate": 0.00014912704771264353, + "loss": 2.6645, + "step": 6797 + }, + { + "epoch": 0.5486240012912598, + "grad_norm": 0.7218797206878662, + "learning_rate": 0.00014911329660093295, + "loss": 2.6302, + "step": 6798 + }, + { + "epoch": 0.5487047050278427, + "grad_norm": 0.7269994020462036, + "learning_rate": 0.00014909954426519067, + "loss": 2.6261, + "step": 6799 + }, + { + "epoch": 0.5487854087644258, + "grad_norm": 0.765353262424469, + "learning_rate": 0.00014908579070575936, + "loss": 2.5787, + "step": 6800 + }, + { + "epoch": 0.5488661125010088, + "grad_norm": 0.6503065228462219, + "learning_rate": 0.00014907203592298189, + "loss": 2.6404, + "step": 6801 + }, + { + "epoch": 0.5489468162375918, + "grad_norm": 0.6869633197784424, + "learning_rate": 0.00014905827991720097, + "loss": 2.6463, + "step": 6802 + }, + { + "epoch": 0.5490275199741748, + "grad_norm": 0.7221426963806152, + "learning_rate": 0.00014904452268875947, + "loss": 2.6686, + "step": 6803 + }, + { + "epoch": 0.5491082237107578, + "grad_norm": 0.6781399250030518, + "learning_rate": 0.00014903076423800028, + "loss": 2.6274, + "step": 6804 + }, + { + "epoch": 0.5491889274473408, + "grad_norm": 0.7451084852218628, + "learning_rate": 0.00014901700456526626, + "loss": 2.6449, + "step": 6805 + }, + { + "epoch": 0.5492696311839238, + "grad_norm": 0.7159574627876282, + "learning_rate": 0.0001490032436709004, + "loss": 2.6664, + "step": 6806 + }, + { + "epoch": 0.5493503349205068, + "grad_norm": 0.724039614200592, + "learning_rate": 0.00014898948155524558, + "loss": 2.5816, + "step": 6807 + }, + { + "epoch": 0.5494310386570899, + "grad_norm": 0.7194633483886719, + "learning_rate": 0.0001489757182186448, + "loss": 2.5625, + "step": 6808 + }, + { + "epoch": 0.5495117423936728, + "grad_norm": 0.704133927822113, + "learning_rate": 0.0001489619536614411, + "loss": 2.6295, + "step": 6809 + }, + { + "epoch": 0.5495924461302558, + "grad_norm": 0.6717158555984497, + "learning_rate": 0.00014894818788397757, + "loss": 2.6168, + "step": 6810 + }, + { + "epoch": 0.5496731498668388, + "grad_norm": 0.7096573710441589, + "learning_rate": 0.0001489344208865972, + "loss": 2.6316, + "step": 6811 + }, + { + "epoch": 0.5497538536034219, + "grad_norm": 0.6383458375930786, + "learning_rate": 0.00014892065266964316, + "loss": 2.5577, + "step": 6812 + }, + { + "epoch": 0.5498345573400049, + "grad_norm": 0.7606377601623535, + "learning_rate": 0.0001489068832334586, + "loss": 2.7078, + "step": 6813 + }, + { + "epoch": 0.5499152610765878, + "grad_norm": 0.649162232875824, + "learning_rate": 0.00014889311257838665, + "loss": 2.6023, + "step": 6814 + }, + { + "epoch": 0.5499959648131708, + "grad_norm": 0.6445025205612183, + "learning_rate": 0.00014887934070477053, + "loss": 2.6, + "step": 6815 + }, + { + "epoch": 0.5500766685497539, + "grad_norm": 0.6873729825019836, + "learning_rate": 0.00014886556761295342, + "loss": 2.6398, + "step": 6816 + }, + { + "epoch": 0.5501573722863369, + "grad_norm": 0.7814947366714478, + "learning_rate": 0.0001488517933032787, + "loss": 2.5803, + "step": 6817 + }, + { + "epoch": 0.5502380760229199, + "grad_norm": 0.7140909433364868, + "learning_rate": 0.00014883801777608953, + "loss": 2.6051, + "step": 6818 + }, + { + "epoch": 0.5503187797595028, + "grad_norm": 0.7326326370239258, + "learning_rate": 0.00014882424103172936, + "loss": 2.6123, + "step": 6819 + }, + { + "epoch": 0.5503994834960859, + "grad_norm": 0.7093667387962341, + "learning_rate": 0.00014881046307054142, + "loss": 2.6527, + "step": 6820 + }, + { + "epoch": 0.5504801872326689, + "grad_norm": 0.6877567768096924, + "learning_rate": 0.00014879668389286915, + "loss": 2.6086, + "step": 6821 + }, + { + "epoch": 0.5505608909692519, + "grad_norm": 0.7095615863800049, + "learning_rate": 0.000148782903499056, + "loss": 2.6469, + "step": 6822 + }, + { + "epoch": 0.5506415947058348, + "grad_norm": 0.6931191086769104, + "learning_rate": 0.00014876912188944535, + "loss": 2.6842, + "step": 6823 + }, + { + "epoch": 0.5507222984424179, + "grad_norm": 0.7016414403915405, + "learning_rate": 0.00014875533906438072, + "loss": 2.5753, + "step": 6824 + }, + { + "epoch": 0.5508030021790009, + "grad_norm": 0.6813814640045166, + "learning_rate": 0.00014874155502420558, + "loss": 2.5739, + "step": 6825 + }, + { + "epoch": 0.5508837059155839, + "grad_norm": 0.7068608403205872, + "learning_rate": 0.00014872776976926347, + "loss": 2.6325, + "step": 6826 + }, + { + "epoch": 0.5509644096521669, + "grad_norm": 0.6978127360343933, + "learning_rate": 0.00014871398329989796, + "loss": 2.5614, + "step": 6827 + }, + { + "epoch": 0.55104511338875, + "grad_norm": 0.6923051476478577, + "learning_rate": 0.00014870019561645265, + "loss": 2.6075, + "step": 6828 + }, + { + "epoch": 0.5511258171253329, + "grad_norm": 0.6708533763885498, + "learning_rate": 0.00014868640671927117, + "loss": 2.5883, + "step": 6829 + }, + { + "epoch": 0.5512065208619159, + "grad_norm": 0.7679650783538818, + "learning_rate": 0.00014867261660869713, + "loss": 2.6105, + "step": 6830 + }, + { + "epoch": 0.5512872245984989, + "grad_norm": 0.7080917358398438, + "learning_rate": 0.0001486588252850743, + "loss": 2.5855, + "step": 6831 + }, + { + "epoch": 0.551367928335082, + "grad_norm": 0.7218755483627319, + "learning_rate": 0.00014864503274874635, + "loss": 2.5872, + "step": 6832 + }, + { + "epoch": 0.551448632071665, + "grad_norm": 0.689038872718811, + "learning_rate": 0.000148631239000057, + "loss": 2.5902, + "step": 6833 + }, + { + "epoch": 0.5515293358082479, + "grad_norm": 0.6810954213142395, + "learning_rate": 0.00014861744403935005, + "loss": 2.5938, + "step": 6834 + }, + { + "epoch": 0.5516100395448309, + "grad_norm": 0.7509457468986511, + "learning_rate": 0.00014860364786696933, + "loss": 2.593, + "step": 6835 + }, + { + "epoch": 0.5516907432814139, + "grad_norm": 0.739536702632904, + "learning_rate": 0.00014858985048325863, + "loss": 2.6668, + "step": 6836 + }, + { + "epoch": 0.551771447017997, + "grad_norm": 0.661829948425293, + "learning_rate": 0.00014857605188856184, + "loss": 2.6407, + "step": 6837 + }, + { + "epoch": 0.5518521507545799, + "grad_norm": 0.6869735717773438, + "learning_rate": 0.00014856225208322287, + "loss": 2.535, + "step": 6838 + }, + { + "epoch": 0.5519328544911629, + "grad_norm": 0.6724792122840881, + "learning_rate": 0.00014854845106758563, + "loss": 2.5629, + "step": 6839 + }, + { + "epoch": 0.5520135582277459, + "grad_norm": 0.7066503763198853, + "learning_rate": 0.00014853464884199407, + "loss": 2.6002, + "step": 6840 + }, + { + "epoch": 0.552094261964329, + "grad_norm": 0.7354215979576111, + "learning_rate": 0.0001485208454067922, + "loss": 2.6032, + "step": 6841 + }, + { + "epoch": 0.552174965700912, + "grad_norm": 0.8124571442604065, + "learning_rate": 0.00014850704076232405, + "loss": 2.5884, + "step": 6842 + }, + { + "epoch": 0.5522556694374949, + "grad_norm": 0.6941336393356323, + "learning_rate": 0.00014849323490893364, + "loss": 2.6461, + "step": 6843 + }, + { + "epoch": 0.5523363731740779, + "grad_norm": 0.6848790049552917, + "learning_rate": 0.00014847942784696505, + "loss": 2.6098, + "step": 6844 + }, + { + "epoch": 0.552417076910661, + "grad_norm": 0.6688000559806824, + "learning_rate": 0.00014846561957676237, + "loss": 2.6115, + "step": 6845 + }, + { + "epoch": 0.552497780647244, + "grad_norm": 0.6647306084632874, + "learning_rate": 0.00014845181009866975, + "loss": 2.597, + "step": 6846 + }, + { + "epoch": 0.552578484383827, + "grad_norm": 0.7277785539627075, + "learning_rate": 0.0001484379994130314, + "loss": 2.6223, + "step": 6847 + }, + { + "epoch": 0.5526591881204099, + "grad_norm": 0.6623761057853699, + "learning_rate": 0.00014842418752019146, + "loss": 2.5657, + "step": 6848 + }, + { + "epoch": 0.552739891856993, + "grad_norm": 0.7207754254341125, + "learning_rate": 0.00014841037442049423, + "loss": 2.5711, + "step": 6849 + }, + { + "epoch": 0.552820595593576, + "grad_norm": 0.6963560581207275, + "learning_rate": 0.00014839656011428389, + "loss": 2.6078, + "step": 6850 + }, + { + "epoch": 0.552901299330159, + "grad_norm": 0.6875078678131104, + "learning_rate": 0.00014838274460190475, + "loss": 2.6109, + "step": 6851 + }, + { + "epoch": 0.552982003066742, + "grad_norm": 0.7049943804740906, + "learning_rate": 0.00014836892788370118, + "loss": 2.5755, + "step": 6852 + }, + { + "epoch": 0.553062706803325, + "grad_norm": 0.6941191554069519, + "learning_rate": 0.00014835510996001744, + "loss": 2.6694, + "step": 6853 + }, + { + "epoch": 0.553143410539908, + "grad_norm": 0.7589484453201294, + "learning_rate": 0.000148341290831198, + "loss": 2.5677, + "step": 6854 + }, + { + "epoch": 0.553224114276491, + "grad_norm": 0.6594784259796143, + "learning_rate": 0.00014832747049758723, + "loss": 2.6209, + "step": 6855 + }, + { + "epoch": 0.553304818013074, + "grad_norm": 0.726598858833313, + "learning_rate": 0.00014831364895952952, + "loss": 2.6492, + "step": 6856 + }, + { + "epoch": 0.553385521749657, + "grad_norm": 0.6668030023574829, + "learning_rate": 0.0001482998262173694, + "loss": 2.6057, + "step": 6857 + }, + { + "epoch": 0.55346622548624, + "grad_norm": 0.7698997855186462, + "learning_rate": 0.0001482860022714514, + "loss": 2.6215, + "step": 6858 + }, + { + "epoch": 0.553546929222823, + "grad_norm": 0.6805251836776733, + "learning_rate": 0.00014827217712211997, + "loss": 2.5855, + "step": 6859 + }, + { + "epoch": 0.553627632959406, + "grad_norm": 0.8481020331382751, + "learning_rate": 0.00014825835076971968, + "loss": 2.6218, + "step": 6860 + }, + { + "epoch": 0.5537083366959891, + "grad_norm": 0.6801722645759583, + "learning_rate": 0.00014824452321459517, + "loss": 2.5998, + "step": 6861 + }, + { + "epoch": 0.553789040432572, + "grad_norm": 0.7174597978591919, + "learning_rate": 0.00014823069445709104, + "loss": 2.5782, + "step": 6862 + }, + { + "epoch": 0.553869744169155, + "grad_norm": 0.7607117891311646, + "learning_rate": 0.0001482168644975519, + "loss": 2.6492, + "step": 6863 + }, + { + "epoch": 0.553950447905738, + "grad_norm": 0.7554265856742859, + "learning_rate": 0.00014820303333632246, + "loss": 2.6511, + "step": 6864 + }, + { + "epoch": 0.5540311516423211, + "grad_norm": 0.7520260214805603, + "learning_rate": 0.00014818920097374745, + "loss": 2.6258, + "step": 6865 + }, + { + "epoch": 0.5541118553789041, + "grad_norm": 0.7897995114326477, + "learning_rate": 0.00014817536741017152, + "loss": 2.6153, + "step": 6866 + }, + { + "epoch": 0.554192559115487, + "grad_norm": 0.7444615960121155, + "learning_rate": 0.00014816153264593957, + "loss": 2.5892, + "step": 6867 + }, + { + "epoch": 0.55427326285207, + "grad_norm": 0.6593222618103027, + "learning_rate": 0.0001481476966813963, + "loss": 2.6048, + "step": 6868 + }, + { + "epoch": 0.5543539665886531, + "grad_norm": 0.7517102360725403, + "learning_rate": 0.0001481338595168866, + "loss": 2.6496, + "step": 6869 + }, + { + "epoch": 0.5544346703252361, + "grad_norm": 0.7314056754112244, + "learning_rate": 0.00014812002115275529, + "loss": 2.6009, + "step": 6870 + }, + { + "epoch": 0.554515374061819, + "grad_norm": 0.6718037724494934, + "learning_rate": 0.00014810618158934722, + "loss": 2.6279, + "step": 6871 + }, + { + "epoch": 0.554596077798402, + "grad_norm": 0.6853529810905457, + "learning_rate": 0.00014809234082700735, + "loss": 2.6562, + "step": 6872 + }, + { + "epoch": 0.5546767815349851, + "grad_norm": 0.713599443435669, + "learning_rate": 0.0001480784988660807, + "loss": 2.5783, + "step": 6873 + }, + { + "epoch": 0.5547574852715681, + "grad_norm": 0.6820243000984192, + "learning_rate": 0.00014806465570691213, + "loss": 2.5753, + "step": 6874 + }, + { + "epoch": 0.5548381890081511, + "grad_norm": 0.6999152302742004, + "learning_rate": 0.00014805081134984673, + "loss": 2.5839, + "step": 6875 + }, + { + "epoch": 0.554918892744734, + "grad_norm": 0.7145923376083374, + "learning_rate": 0.00014803696579522948, + "loss": 2.6153, + "step": 6876 + }, + { + "epoch": 0.5549995964813171, + "grad_norm": 0.7569223046302795, + "learning_rate": 0.00014802311904340548, + "loss": 2.5879, + "step": 6877 + }, + { + "epoch": 0.5550803002179001, + "grad_norm": 0.6977131962776184, + "learning_rate": 0.00014800927109471983, + "loss": 2.6587, + "step": 6878 + }, + { + "epoch": 0.5551610039544831, + "grad_norm": 0.6693562865257263, + "learning_rate": 0.00014799542194951764, + "loss": 2.6271, + "step": 6879 + }, + { + "epoch": 0.5552417076910661, + "grad_norm": 0.6937456130981445, + "learning_rate": 0.00014798157160814406, + "loss": 2.6213, + "step": 6880 + }, + { + "epoch": 0.5553224114276492, + "grad_norm": 0.761538565158844, + "learning_rate": 0.0001479677200709443, + "loss": 2.6053, + "step": 6881 + }, + { + "epoch": 0.5554031151642321, + "grad_norm": 0.707457959651947, + "learning_rate": 0.00014795386733826356, + "loss": 2.5763, + "step": 6882 + }, + { + "epoch": 0.5554838189008151, + "grad_norm": 0.7323198318481445, + "learning_rate": 0.0001479400134104471, + "loss": 2.6899, + "step": 6883 + }, + { + "epoch": 0.5555645226373981, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.0001479261582878402, + "loss": 2.5743, + "step": 6884 + }, + { + "epoch": 0.5556452263739811, + "grad_norm": 0.7683241367340088, + "learning_rate": 0.00014791230197078813, + "loss": 2.5295, + "step": 6885 + }, + { + "epoch": 0.5557259301105641, + "grad_norm": 0.7248150706291199, + "learning_rate": 0.00014789844445963626, + "loss": 2.6131, + "step": 6886 + }, + { + "epoch": 0.5558066338471471, + "grad_norm": 0.6868402361869812, + "learning_rate": 0.00014788458575472997, + "loss": 2.6182, + "step": 6887 + }, + { + "epoch": 0.5558873375837301, + "grad_norm": 0.6995798945426941, + "learning_rate": 0.0001478707258564146, + "loss": 2.5969, + "step": 6888 + }, + { + "epoch": 0.5559680413203131, + "grad_norm": 0.6912558078765869, + "learning_rate": 0.00014785686476503565, + "loss": 2.6264, + "step": 6889 + }, + { + "epoch": 0.5560487450568962, + "grad_norm": 0.7485123872756958, + "learning_rate": 0.00014784300248093848, + "loss": 2.6036, + "step": 6890 + }, + { + "epoch": 0.5561294487934791, + "grad_norm": 0.7150819897651672, + "learning_rate": 0.00014782913900446864, + "loss": 2.5807, + "step": 6891 + }, + { + "epoch": 0.5562101525300621, + "grad_norm": 0.6715224385261536, + "learning_rate": 0.00014781527433597167, + "loss": 2.6164, + "step": 6892 + }, + { + "epoch": 0.5562908562666451, + "grad_norm": 0.6951256394386292, + "learning_rate": 0.000147801408475793, + "loss": 2.6106, + "step": 6893 + }, + { + "epoch": 0.5563715600032282, + "grad_norm": 0.7296997904777527, + "learning_rate": 0.00014778754142427832, + "loss": 2.6182, + "step": 6894 + }, + { + "epoch": 0.5564522637398112, + "grad_norm": 0.7484713196754456, + "learning_rate": 0.0001477736731817732, + "loss": 2.6384, + "step": 6895 + }, + { + "epoch": 0.5565329674763941, + "grad_norm": 0.6967526078224182, + "learning_rate": 0.00014775980374862326, + "loss": 2.5889, + "step": 6896 + }, + { + "epoch": 0.5566136712129771, + "grad_norm": 0.7004885077476501, + "learning_rate": 0.00014774593312517415, + "loss": 2.6549, + "step": 6897 + }, + { + "epoch": 0.5566943749495602, + "grad_norm": 0.7069302201271057, + "learning_rate": 0.00014773206131177158, + "loss": 2.6408, + "step": 6898 + }, + { + "epoch": 0.5567750786861432, + "grad_norm": 0.7048566341400146, + "learning_rate": 0.00014771818830876127, + "loss": 2.5909, + "step": 6899 + }, + { + "epoch": 0.5568557824227262, + "grad_norm": 0.7386630773544312, + "learning_rate": 0.00014770431411648897, + "loss": 2.6402, + "step": 6900 + }, + { + "epoch": 0.5569364861593091, + "grad_norm": 0.7244876027107239, + "learning_rate": 0.00014769043873530047, + "loss": 2.5548, + "step": 6901 + }, + { + "epoch": 0.5570171898958922, + "grad_norm": 0.6820651888847351, + "learning_rate": 0.00014767656216554156, + "loss": 2.682, + "step": 6902 + }, + { + "epoch": 0.5570978936324752, + "grad_norm": 0.7281784415245056, + "learning_rate": 0.00014766268440755812, + "loss": 2.622, + "step": 6903 + }, + { + "epoch": 0.5571785973690582, + "grad_norm": 0.6525030136108398, + "learning_rate": 0.00014764880546169594, + "loss": 2.5809, + "step": 6904 + }, + { + "epoch": 0.5572593011056411, + "grad_norm": 0.6735210418701172, + "learning_rate": 0.00014763492532830102, + "loss": 2.6645, + "step": 6905 + }, + { + "epoch": 0.5573400048422242, + "grad_norm": 0.674700140953064, + "learning_rate": 0.00014762104400771922, + "loss": 2.6466, + "step": 6906 + }, + { + "epoch": 0.5574207085788072, + "grad_norm": 0.7570134401321411, + "learning_rate": 0.00014760716150029652, + "loss": 2.57, + "step": 6907 + }, + { + "epoch": 0.5575014123153902, + "grad_norm": 0.6532449722290039, + "learning_rate": 0.00014759327780637893, + "loss": 2.6207, + "step": 6908 + }, + { + "epoch": 0.5575821160519732, + "grad_norm": 0.7697737812995911, + "learning_rate": 0.00014757939292631242, + "loss": 2.5846, + "step": 6909 + }, + { + "epoch": 0.5576628197885563, + "grad_norm": 0.6750194430351257, + "learning_rate": 0.00014756550686044308, + "loss": 2.6421, + "step": 6910 + }, + { + "epoch": 0.5577435235251392, + "grad_norm": 0.7357683777809143, + "learning_rate": 0.00014755161960911697, + "loss": 2.6173, + "step": 6911 + }, + { + "epoch": 0.5578242272617222, + "grad_norm": 0.6812090277671814, + "learning_rate": 0.0001475377311726802, + "loss": 2.5556, + "step": 6912 + }, + { + "epoch": 0.5579049309983052, + "grad_norm": 0.7633040547370911, + "learning_rate": 0.00014752384155147888, + "loss": 2.6505, + "step": 6913 + }, + { + "epoch": 0.5579856347348883, + "grad_norm": 0.7426417469978333, + "learning_rate": 0.00014750995074585922, + "loss": 2.5575, + "step": 6914 + }, + { + "epoch": 0.5580663384714712, + "grad_norm": 0.6926711201667786, + "learning_rate": 0.00014749605875616744, + "loss": 2.5751, + "step": 6915 + }, + { + "epoch": 0.5581470422080542, + "grad_norm": 0.70630943775177, + "learning_rate": 0.00014748216558274966, + "loss": 2.6228, + "step": 6916 + }, + { + "epoch": 0.5582277459446372, + "grad_norm": 0.7183346748352051, + "learning_rate": 0.0001474682712259522, + "loss": 2.5704, + "step": 6917 + }, + { + "epoch": 0.5583084496812203, + "grad_norm": 0.7622792720794678, + "learning_rate": 0.00014745437568612136, + "loss": 2.6031, + "step": 6918 + }, + { + "epoch": 0.5583891534178033, + "grad_norm": 0.6967802047729492, + "learning_rate": 0.00014744047896360344, + "loss": 2.6031, + "step": 6919 + }, + { + "epoch": 0.5584698571543862, + "grad_norm": 0.7827191948890686, + "learning_rate": 0.00014742658105874475, + "loss": 2.5427, + "step": 6920 + }, + { + "epoch": 0.5585505608909692, + "grad_norm": 0.6865705847740173, + "learning_rate": 0.0001474126819718917, + "loss": 2.6514, + "step": 6921 + }, + { + "epoch": 0.5586312646275523, + "grad_norm": 0.7181665897369385, + "learning_rate": 0.0001473987817033906, + "loss": 2.613, + "step": 6922 + }, + { + "epoch": 0.5587119683641353, + "grad_norm": 0.7198463082313538, + "learning_rate": 0.00014738488025358806, + "loss": 2.6423, + "step": 6923 + }, + { + "epoch": 0.5587926721007183, + "grad_norm": 0.773078441619873, + "learning_rate": 0.00014737097762283042, + "loss": 2.5946, + "step": 6924 + }, + { + "epoch": 0.5588733758373012, + "grad_norm": 0.7732799649238586, + "learning_rate": 0.00014735707381146416, + "loss": 2.6778, + "step": 6925 + }, + { + "epoch": 0.5589540795738843, + "grad_norm": 0.7639997601509094, + "learning_rate": 0.00014734316881983585, + "loss": 2.6064, + "step": 6926 + }, + { + "epoch": 0.5590347833104673, + "grad_norm": 0.7912085652351379, + "learning_rate": 0.00014732926264829198, + "loss": 2.5765, + "step": 6927 + }, + { + "epoch": 0.5591154870470503, + "grad_norm": 0.7460121512413025, + "learning_rate": 0.0001473153552971792, + "loss": 2.6724, + "step": 6928 + }, + { + "epoch": 0.5591961907836333, + "grad_norm": 0.6853603720664978, + "learning_rate": 0.00014730144676684408, + "loss": 2.5846, + "step": 6929 + }, + { + "epoch": 0.5592768945202163, + "grad_norm": 0.7368159294128418, + "learning_rate": 0.00014728753705763324, + "loss": 2.6626, + "step": 6930 + }, + { + "epoch": 0.5593575982567993, + "grad_norm": 0.6888907551765442, + "learning_rate": 0.0001472736261698934, + "loss": 2.6169, + "step": 6931 + }, + { + "epoch": 0.5594383019933823, + "grad_norm": 0.6978163719177246, + "learning_rate": 0.0001472597141039712, + "loss": 2.6367, + "step": 6932 + }, + { + "epoch": 0.5595190057299653, + "grad_norm": 0.7829774618148804, + "learning_rate": 0.00014724580086021335, + "loss": 2.5983, + "step": 6933 + }, + { + "epoch": 0.5595997094665484, + "grad_norm": 0.7872018218040466, + "learning_rate": 0.0001472318864389667, + "loss": 2.5418, + "step": 6934 + }, + { + "epoch": 0.5596804132031313, + "grad_norm": 0.6994973421096802, + "learning_rate": 0.00014721797084057793, + "loss": 2.6062, + "step": 6935 + }, + { + "epoch": 0.5597611169397143, + "grad_norm": 0.7281144857406616, + "learning_rate": 0.00014720405406539394, + "loss": 2.573, + "step": 6936 + }, + { + "epoch": 0.5598418206762973, + "grad_norm": 0.713513970375061, + "learning_rate": 0.0001471901361137615, + "loss": 2.6589, + "step": 6937 + }, + { + "epoch": 0.5599225244128803, + "grad_norm": 0.7752750515937805, + "learning_rate": 0.00014717621698602754, + "loss": 2.6478, + "step": 6938 + }, + { + "epoch": 0.5600032281494634, + "grad_norm": 0.6876000165939331, + "learning_rate": 0.00014716229668253889, + "loss": 2.6092, + "step": 6939 + }, + { + "epoch": 0.5600839318860463, + "grad_norm": 0.6371028423309326, + "learning_rate": 0.00014714837520364256, + "loss": 2.606, + "step": 6940 + }, + { + "epoch": 0.5601646356226293, + "grad_norm": 0.6488915085792542, + "learning_rate": 0.00014713445254968546, + "loss": 2.5769, + "step": 6941 + }, + { + "epoch": 0.5602453393592123, + "grad_norm": 0.7286413908004761, + "learning_rate": 0.00014712052872101458, + "loss": 2.6267, + "step": 6942 + }, + { + "epoch": 0.5603260430957954, + "grad_norm": 0.6863759160041809, + "learning_rate": 0.00014710660371797696, + "loss": 2.641, + "step": 6943 + }, + { + "epoch": 0.5604067468323783, + "grad_norm": 0.706900417804718, + "learning_rate": 0.00014709267754091964, + "loss": 2.6344, + "step": 6944 + }, + { + "epoch": 0.5604874505689613, + "grad_norm": 0.6462892293930054, + "learning_rate": 0.0001470787501901897, + "loss": 2.5561, + "step": 6945 + }, + { + "epoch": 0.5605681543055443, + "grad_norm": 0.7342472076416016, + "learning_rate": 0.00014706482166613425, + "loss": 2.583, + "step": 6946 + }, + { + "epoch": 0.5606488580421274, + "grad_norm": 0.7132803797721863, + "learning_rate": 0.00014705089196910038, + "loss": 2.558, + "step": 6947 + }, + { + "epoch": 0.5607295617787104, + "grad_norm": 0.7709125876426697, + "learning_rate": 0.00014703696109943533, + "loss": 2.6165, + "step": 6948 + }, + { + "epoch": 0.5608102655152933, + "grad_norm": 0.7108885645866394, + "learning_rate": 0.00014702302905748619, + "loss": 2.5788, + "step": 6949 + }, + { + "epoch": 0.5608909692518763, + "grad_norm": 0.7295591235160828, + "learning_rate": 0.0001470090958436003, + "loss": 2.6526, + "step": 6950 + }, + { + "epoch": 0.5609716729884594, + "grad_norm": 0.7235364317893982, + "learning_rate": 0.00014699516145812486, + "loss": 2.604, + "step": 6951 + }, + { + "epoch": 0.5610523767250424, + "grad_norm": 0.6723269820213318, + "learning_rate": 0.00014698122590140714, + "loss": 2.5838, + "step": 6952 + }, + { + "epoch": 0.5611330804616254, + "grad_norm": 0.7022266983985901, + "learning_rate": 0.00014696728917379447, + "loss": 2.6086, + "step": 6953 + }, + { + "epoch": 0.5612137841982083, + "grad_norm": 0.6923824548721313, + "learning_rate": 0.00014695335127563414, + "loss": 2.6678, + "step": 6954 + }, + { + "epoch": 0.5612944879347914, + "grad_norm": 0.6909339427947998, + "learning_rate": 0.0001469394122072736, + "loss": 2.6397, + "step": 6955 + }, + { + "epoch": 0.5613751916713744, + "grad_norm": 0.710299015045166, + "learning_rate": 0.00014692547196906022, + "loss": 2.5973, + "step": 6956 + }, + { + "epoch": 0.5614558954079574, + "grad_norm": 0.7141178250312805, + "learning_rate": 0.00014691153056134136, + "loss": 2.6111, + "step": 6957 + }, + { + "epoch": 0.5615365991445403, + "grad_norm": 0.6994750499725342, + "learning_rate": 0.00014689758798446456, + "loss": 2.6498, + "step": 6958 + }, + { + "epoch": 0.5616173028811234, + "grad_norm": 0.6951611638069153, + "learning_rate": 0.00014688364423877726, + "loss": 2.6208, + "step": 6959 + }, + { + "epoch": 0.5616980066177064, + "grad_norm": 0.6610642075538635, + "learning_rate": 0.000146869699324627, + "loss": 2.5725, + "step": 6960 + }, + { + "epoch": 0.5617787103542894, + "grad_norm": 0.6771267056465149, + "learning_rate": 0.00014685575324236135, + "loss": 2.6336, + "step": 6961 + }, + { + "epoch": 0.5618594140908724, + "grad_norm": 0.7431008815765381, + "learning_rate": 0.0001468418059923278, + "loss": 2.6782, + "step": 6962 + }, + { + "epoch": 0.5619401178274555, + "grad_norm": 0.7399705648422241, + "learning_rate": 0.000146827857574874, + "loss": 2.6212, + "step": 6963 + }, + { + "epoch": 0.5620208215640384, + "grad_norm": 0.7237067222595215, + "learning_rate": 0.00014681390799034763, + "loss": 2.6261, + "step": 6964 + }, + { + "epoch": 0.5621015253006214, + "grad_norm": 0.7033257484436035, + "learning_rate": 0.00014679995723909623, + "loss": 2.6912, + "step": 6965 + }, + { + "epoch": 0.5621822290372044, + "grad_norm": 0.6953759789466858, + "learning_rate": 0.00014678600532146762, + "loss": 2.6022, + "step": 6966 + }, + { + "epoch": 0.5622629327737875, + "grad_norm": 0.8338057994842529, + "learning_rate": 0.0001467720522378094, + "loss": 2.595, + "step": 6967 + }, + { + "epoch": 0.5623436365103704, + "grad_norm": 0.6506100296974182, + "learning_rate": 0.00014675809798846942, + "loss": 2.6033, + "step": 6968 + }, + { + "epoch": 0.5624243402469534, + "grad_norm": 0.7122468948364258, + "learning_rate": 0.0001467441425737954, + "loss": 2.56, + "step": 6969 + }, + { + "epoch": 0.5625050439835364, + "grad_norm": 0.7012680172920227, + "learning_rate": 0.00014673018599413516, + "loss": 2.6052, + "step": 6970 + }, + { + "epoch": 0.5625857477201195, + "grad_norm": 0.668187141418457, + "learning_rate": 0.00014671622824983653, + "loss": 2.6675, + "step": 6971 + }, + { + "epoch": 0.5626664514567025, + "grad_norm": 0.7259203791618347, + "learning_rate": 0.00014670226934124738, + "loss": 2.5977, + "step": 6972 + }, + { + "epoch": 0.5627471551932854, + "grad_norm": 0.6705875396728516, + "learning_rate": 0.00014668830926871555, + "loss": 2.649, + "step": 6973 + }, + { + "epoch": 0.5628278589298684, + "grad_norm": 0.682731568813324, + "learning_rate": 0.00014667434803258906, + "loss": 2.6084, + "step": 6974 + }, + { + "epoch": 0.5629085626664515, + "grad_norm": 0.7061700224876404, + "learning_rate": 0.00014666038563321577, + "loss": 2.6256, + "step": 6975 + }, + { + "epoch": 0.5629892664030345, + "grad_norm": 0.6839977502822876, + "learning_rate": 0.00014664642207094374, + "loss": 2.6342, + "step": 6976 + }, + { + "epoch": 0.5630699701396175, + "grad_norm": 0.7376503348350525, + "learning_rate": 0.00014663245734612094, + "loss": 2.6001, + "step": 6977 + }, + { + "epoch": 0.5631506738762004, + "grad_norm": 0.6901546716690063, + "learning_rate": 0.0001466184914590954, + "loss": 2.6715, + "step": 6978 + }, + { + "epoch": 0.5632313776127835, + "grad_norm": 0.816223680973053, + "learning_rate": 0.00014660452441021512, + "loss": 2.6407, + "step": 6979 + }, + { + "epoch": 0.5633120813493665, + "grad_norm": 0.6904644966125488, + "learning_rate": 0.00014659055619982835, + "loss": 2.5543, + "step": 6980 + }, + { + "epoch": 0.5633927850859495, + "grad_norm": 0.6784235239028931, + "learning_rate": 0.0001465765868282831, + "loss": 2.6184, + "step": 6981 + }, + { + "epoch": 0.5634734888225325, + "grad_norm": 0.7689006328582764, + "learning_rate": 0.00014656261629592755, + "loss": 2.644, + "step": 6982 + }, + { + "epoch": 0.5635541925591155, + "grad_norm": 0.7608775496482849, + "learning_rate": 0.0001465486446031099, + "loss": 2.5952, + "step": 6983 + }, + { + "epoch": 0.5636348962956985, + "grad_norm": 0.7266525626182556, + "learning_rate": 0.00014653467175017833, + "loss": 2.6479, + "step": 6984 + }, + { + "epoch": 0.5637156000322815, + "grad_norm": 0.6907477974891663, + "learning_rate": 0.00014652069773748113, + "loss": 2.5825, + "step": 6985 + }, + { + "epoch": 0.5637963037688645, + "grad_norm": 0.7790403366088867, + "learning_rate": 0.00014650672256536648, + "loss": 2.5948, + "step": 6986 + }, + { + "epoch": 0.5638770075054474, + "grad_norm": 0.7072858214378357, + "learning_rate": 0.00014649274623418278, + "loss": 2.6017, + "step": 6987 + }, + { + "epoch": 0.5639577112420305, + "grad_norm": 0.7140414118766785, + "learning_rate": 0.0001464787687442783, + "loss": 2.5709, + "step": 6988 + }, + { + "epoch": 0.5640384149786135, + "grad_norm": 0.857783317565918, + "learning_rate": 0.00014646479009600139, + "loss": 2.7049, + "step": 6989 + }, + { + "epoch": 0.5641191187151965, + "grad_norm": 0.7599344253540039, + "learning_rate": 0.00014645081028970047, + "loss": 2.6369, + "step": 6990 + }, + { + "epoch": 0.5641998224517795, + "grad_norm": 0.7286150455474854, + "learning_rate": 0.00014643682932572393, + "loss": 2.6238, + "step": 6991 + }, + { + "epoch": 0.5642805261883626, + "grad_norm": 0.7095075249671936, + "learning_rate": 0.0001464228472044202, + "loss": 2.5924, + "step": 6992 + }, + { + "epoch": 0.5643612299249455, + "grad_norm": 0.7583668828010559, + "learning_rate": 0.0001464088639261378, + "loss": 2.6098, + "step": 6993 + }, + { + "epoch": 0.5644419336615285, + "grad_norm": 0.7393970489501953, + "learning_rate": 0.00014639487949122515, + "loss": 2.6036, + "step": 6994 + }, + { + "epoch": 0.5645226373981115, + "grad_norm": 0.6789388656616211, + "learning_rate": 0.00014638089390003086, + "loss": 2.642, + "step": 6995 + }, + { + "epoch": 0.5646033411346946, + "grad_norm": 0.8021289706230164, + "learning_rate": 0.00014636690715290346, + "loss": 2.6851, + "step": 6996 + }, + { + "epoch": 0.5646840448712775, + "grad_norm": 0.6931039094924927, + "learning_rate": 0.00014635291925019152, + "loss": 2.6358, + "step": 6997 + }, + { + "epoch": 0.5647647486078605, + "grad_norm": 0.7356590032577515, + "learning_rate": 0.00014633893019224366, + "loss": 2.5661, + "step": 6998 + }, + { + "epoch": 0.5648454523444435, + "grad_norm": 0.6777941584587097, + "learning_rate": 0.0001463249399794085, + "loss": 2.5578, + "step": 6999 + }, + { + "epoch": 0.5649261560810266, + "grad_norm": 0.7163615822792053, + "learning_rate": 0.0001463109486120348, + "loss": 2.5582, + "step": 7000 + }, + { + "epoch": 0.5649261560810266, + "eval_loss": 2.5298855304718018, + "eval_runtime": 757.774, + "eval_samples_per_second": 3.457, + "eval_steps_per_second": 0.577, + "step": 7000 + }, + { + "epoch": 0.5650068598176096, + "grad_norm": 0.7175148129463196, + "learning_rate": 0.0001462969560904712, + "loss": 2.568, + "step": 7001 + }, + { + "epoch": 0.5650875635541925, + "grad_norm": 0.6998937129974365, + "learning_rate": 0.00014628296241506636, + "loss": 2.6347, + "step": 7002 + }, + { + "epoch": 0.5651682672907755, + "grad_norm": 0.8140312433242798, + "learning_rate": 0.00014626896758616916, + "loss": 2.6566, + "step": 7003 + }, + { + "epoch": 0.5652489710273586, + "grad_norm": 0.7218164205551147, + "learning_rate": 0.00014625497160412833, + "loss": 2.5693, + "step": 7004 + }, + { + "epoch": 0.5653296747639416, + "grad_norm": 0.6974074244499207, + "learning_rate": 0.0001462409744692927, + "loss": 2.6084, + "step": 7005 + }, + { + "epoch": 0.5654103785005246, + "grad_norm": 0.7475053071975708, + "learning_rate": 0.00014622697618201113, + "loss": 2.6534, + "step": 7006 + }, + { + "epoch": 0.5654910822371075, + "grad_norm": 0.6768492460250854, + "learning_rate": 0.00014621297674263247, + "loss": 2.585, + "step": 7007 + }, + { + "epoch": 0.5655717859736906, + "grad_norm": 0.7023029923439026, + "learning_rate": 0.0001461989761515056, + "loss": 2.6219, + "step": 7008 + }, + { + "epoch": 0.5656524897102736, + "grad_norm": 0.7248445749282837, + "learning_rate": 0.0001461849744089795, + "loss": 2.6382, + "step": 7009 + }, + { + "epoch": 0.5657331934468566, + "grad_norm": 0.6961148381233215, + "learning_rate": 0.00014617097151540308, + "loss": 2.7184, + "step": 7010 + }, + { + "epoch": 0.5658138971834396, + "grad_norm": 0.6649057269096375, + "learning_rate": 0.0001461569674711254, + "loss": 2.6059, + "step": 7011 + }, + { + "epoch": 0.5658946009200226, + "grad_norm": 0.7451788783073425, + "learning_rate": 0.00014614296227649542, + "loss": 2.5697, + "step": 7012 + }, + { + "epoch": 0.5659753046566056, + "grad_norm": 0.6880216598510742, + "learning_rate": 0.0001461289559318622, + "loss": 2.5785, + "step": 7013 + }, + { + "epoch": 0.5660560083931886, + "grad_norm": 0.7505971789360046, + "learning_rate": 0.00014611494843757482, + "loss": 2.5479, + "step": 7014 + }, + { + "epoch": 0.5661367121297716, + "grad_norm": 0.745914876461029, + "learning_rate": 0.00014610093979398235, + "loss": 2.6367, + "step": 7015 + }, + { + "epoch": 0.5662174158663547, + "grad_norm": 0.6758660674095154, + "learning_rate": 0.000146086930001434, + "loss": 2.5673, + "step": 7016 + }, + { + "epoch": 0.5662981196029376, + "grad_norm": 0.7114273309707642, + "learning_rate": 0.00014607291906027886, + "loss": 2.6188, + "step": 7017 + }, + { + "epoch": 0.5663788233395206, + "grad_norm": 0.6791165471076965, + "learning_rate": 0.00014605890697086613, + "loss": 2.6197, + "step": 7018 + }, + { + "epoch": 0.5664595270761036, + "grad_norm": 0.6948217153549194, + "learning_rate": 0.00014604489373354503, + "loss": 2.5996, + "step": 7019 + }, + { + "epoch": 0.5665402308126867, + "grad_norm": 0.6993576884269714, + "learning_rate": 0.00014603087934866483, + "loss": 2.565, + "step": 7020 + }, + { + "epoch": 0.5666209345492697, + "grad_norm": 0.6936905384063721, + "learning_rate": 0.0001460168638165748, + "loss": 2.6524, + "step": 7021 + }, + { + "epoch": 0.5667016382858526, + "grad_norm": 0.6810741424560547, + "learning_rate": 0.00014600284713762424, + "loss": 2.6519, + "step": 7022 + }, + { + "epoch": 0.5667823420224356, + "grad_norm": 0.7540227770805359, + "learning_rate": 0.00014598882931216245, + "loss": 2.659, + "step": 7023 + }, + { + "epoch": 0.5668630457590187, + "grad_norm": 0.6520613431930542, + "learning_rate": 0.0001459748103405388, + "loss": 2.5341, + "step": 7024 + }, + { + "epoch": 0.5669437494956017, + "grad_norm": 0.7159109711647034, + "learning_rate": 0.00014596079022310277, + "loss": 2.6548, + "step": 7025 + }, + { + "epoch": 0.5670244532321846, + "grad_norm": 0.803284227848053, + "learning_rate": 0.00014594676896020366, + "loss": 2.705, + "step": 7026 + }, + { + "epoch": 0.5671051569687676, + "grad_norm": 0.7069976925849915, + "learning_rate": 0.00014593274655219095, + "loss": 2.5733, + "step": 7027 + }, + { + "epoch": 0.5671858607053507, + "grad_norm": 0.7085167169570923, + "learning_rate": 0.00014591872299941417, + "loss": 2.6247, + "step": 7028 + }, + { + "epoch": 0.5672665644419337, + "grad_norm": 0.6748499274253845, + "learning_rate": 0.00014590469830222272, + "loss": 2.6446, + "step": 7029 + }, + { + "epoch": 0.5673472681785167, + "grad_norm": 0.6885821223258972, + "learning_rate": 0.00014589067246096623, + "loss": 2.5879, + "step": 7030 + }, + { + "epoch": 0.5674279719150996, + "grad_norm": 0.7220324277877808, + "learning_rate": 0.0001458766454759942, + "loss": 2.6249, + "step": 7031 + }, + { + "epoch": 0.5675086756516827, + "grad_norm": 0.6712783575057983, + "learning_rate": 0.00014586261734765628, + "loss": 2.5971, + "step": 7032 + }, + { + "epoch": 0.5675893793882657, + "grad_norm": 0.6582161784172058, + "learning_rate": 0.00014584858807630203, + "loss": 2.6224, + "step": 7033 + }, + { + "epoch": 0.5676700831248487, + "grad_norm": 0.6699219346046448, + "learning_rate": 0.0001458345576622811, + "loss": 2.5926, + "step": 7034 + }, + { + "epoch": 0.5677507868614317, + "grad_norm": 0.6508033871650696, + "learning_rate": 0.0001458205261059432, + "loss": 2.6311, + "step": 7035 + }, + { + "epoch": 0.5678314905980147, + "grad_norm": 0.7551338076591492, + "learning_rate": 0.00014580649340763802, + "loss": 2.5729, + "step": 7036 + }, + { + "epoch": 0.5679121943345977, + "grad_norm": 0.6875829100608826, + "learning_rate": 0.00014579245956771527, + "loss": 2.6253, + "step": 7037 + }, + { + "epoch": 0.5679928980711807, + "grad_norm": 0.698204517364502, + "learning_rate": 0.00014577842458652474, + "loss": 2.6218, + "step": 7038 + }, + { + "epoch": 0.5680736018077637, + "grad_norm": 0.8258630037307739, + "learning_rate": 0.00014576438846441615, + "loss": 2.6307, + "step": 7039 + }, + { + "epoch": 0.5681543055443466, + "grad_norm": 0.753105878829956, + "learning_rate": 0.00014575035120173942, + "loss": 2.5664, + "step": 7040 + }, + { + "epoch": 0.5682350092809297, + "grad_norm": 0.6999726295471191, + "learning_rate": 0.00014573631279884435, + "loss": 2.6857, + "step": 7041 + }, + { + "epoch": 0.5683157130175127, + "grad_norm": 0.6484847068786621, + "learning_rate": 0.00014572227325608078, + "loss": 2.6068, + "step": 7042 + }, + { + "epoch": 0.5683964167540957, + "grad_norm": 0.7098011374473572, + "learning_rate": 0.00014570823257379866, + "loss": 2.6591, + "step": 7043 + }, + { + "epoch": 0.5684771204906787, + "grad_norm": 0.8304192423820496, + "learning_rate": 0.0001456941907523479, + "loss": 2.6582, + "step": 7044 + }, + { + "epoch": 0.5685578242272618, + "grad_norm": 0.763214111328125, + "learning_rate": 0.00014568014779207844, + "loss": 2.6605, + "step": 7045 + }, + { + "epoch": 0.5686385279638447, + "grad_norm": 0.6805880665779114, + "learning_rate": 0.00014566610369334032, + "loss": 2.6362, + "step": 7046 + }, + { + "epoch": 0.5687192317004277, + "grad_norm": 0.6753434538841248, + "learning_rate": 0.00014565205845648352, + "loss": 2.6352, + "step": 7047 + }, + { + "epoch": 0.5687999354370107, + "grad_norm": 0.7065438032150269, + "learning_rate": 0.00014563801208185807, + "loss": 2.5975, + "step": 7048 + }, + { + "epoch": 0.5688806391735938, + "grad_norm": 0.6863527894020081, + "learning_rate": 0.00014562396456981407, + "loss": 2.576, + "step": 7049 + }, + { + "epoch": 0.5689613429101767, + "grad_norm": 0.7344440817832947, + "learning_rate": 0.00014560991592070158, + "loss": 2.5933, + "step": 7050 + }, + { + "epoch": 0.5690420466467597, + "grad_norm": 0.699992835521698, + "learning_rate": 0.00014559586613487082, + "loss": 2.6161, + "step": 7051 + }, + { + "epoch": 0.5691227503833427, + "grad_norm": 0.7287258505821228, + "learning_rate": 0.00014558181521267185, + "loss": 2.665, + "step": 7052 + }, + { + "epoch": 0.5692034541199258, + "grad_norm": 0.7304692268371582, + "learning_rate": 0.0001455677631544549, + "loss": 2.5696, + "step": 7053 + }, + { + "epoch": 0.5692841578565088, + "grad_norm": 0.6556086540222168, + "learning_rate": 0.00014555370996057016, + "loss": 2.6405, + "step": 7054 + }, + { + "epoch": 0.5693648615930917, + "grad_norm": 0.6796221137046814, + "learning_rate": 0.0001455396556313679, + "loss": 2.6475, + "step": 7055 + }, + { + "epoch": 0.5694455653296747, + "grad_norm": 0.7067505717277527, + "learning_rate": 0.00014552560016719838, + "loss": 2.6344, + "step": 7056 + }, + { + "epoch": 0.5695262690662578, + "grad_norm": 0.7108997106552124, + "learning_rate": 0.00014551154356841193, + "loss": 2.6543, + "step": 7057 + }, + { + "epoch": 0.5696069728028408, + "grad_norm": 0.7296212911605835, + "learning_rate": 0.0001454974858353588, + "loss": 2.6152, + "step": 7058 + }, + { + "epoch": 0.5696876765394238, + "grad_norm": 0.7329154014587402, + "learning_rate": 0.00014548342696838943, + "loss": 2.6338, + "step": 7059 + }, + { + "epoch": 0.5697683802760067, + "grad_norm": 0.6880258321762085, + "learning_rate": 0.00014546936696785412, + "loss": 2.5834, + "step": 7060 + }, + { + "epoch": 0.5698490840125898, + "grad_norm": 0.7140741348266602, + "learning_rate": 0.00014545530583410336, + "loss": 2.6361, + "step": 7061 + }, + { + "epoch": 0.5699297877491728, + "grad_norm": 0.6419476866722107, + "learning_rate": 0.00014544124356748755, + "loss": 2.4982, + "step": 7062 + }, + { + "epoch": 0.5700104914857558, + "grad_norm": 0.6934036612510681, + "learning_rate": 0.00014542718016835718, + "loss": 2.5748, + "step": 7063 + }, + { + "epoch": 0.5700911952223388, + "grad_norm": 0.721663236618042, + "learning_rate": 0.0001454131156370627, + "loss": 2.5419, + "step": 7064 + }, + { + "epoch": 0.5701718989589218, + "grad_norm": 0.734062671661377, + "learning_rate": 0.00014539904997395468, + "loss": 2.6288, + "step": 7065 + }, + { + "epoch": 0.5702526026955048, + "grad_norm": 0.7927694320678711, + "learning_rate": 0.00014538498317938367, + "loss": 2.6331, + "step": 7066 + }, + { + "epoch": 0.5703333064320878, + "grad_norm": 0.715929388999939, + "learning_rate": 0.00014537091525370025, + "loss": 2.6333, + "step": 7067 + }, + { + "epoch": 0.5704140101686708, + "grad_norm": 0.772230327129364, + "learning_rate": 0.00014535684619725498, + "loss": 2.6019, + "step": 7068 + }, + { + "epoch": 0.5704947139052539, + "grad_norm": 0.7277318239212036, + "learning_rate": 0.0001453427760103986, + "loss": 2.6062, + "step": 7069 + }, + { + "epoch": 0.5705754176418368, + "grad_norm": 0.6708227396011353, + "learning_rate": 0.00014532870469348164, + "loss": 2.6613, + "step": 7070 + }, + { + "epoch": 0.5706561213784198, + "grad_norm": 0.7507323622703552, + "learning_rate": 0.0001453146322468549, + "loss": 2.6456, + "step": 7071 + }, + { + "epoch": 0.5707368251150028, + "grad_norm": 0.6864063739776611, + "learning_rate": 0.00014530055867086912, + "loss": 2.6361, + "step": 7072 + }, + { + "epoch": 0.5708175288515859, + "grad_norm": 0.6805310249328613, + "learning_rate": 0.00014528648396587498, + "loss": 2.6088, + "step": 7073 + }, + { + "epoch": 0.5708982325881689, + "grad_norm": 0.7946523427963257, + "learning_rate": 0.00014527240813222325, + "loss": 2.6533, + "step": 7074 + }, + { + "epoch": 0.5709789363247518, + "grad_norm": 0.6814306974411011, + "learning_rate": 0.00014525833117026474, + "loss": 2.6478, + "step": 7075 + }, + { + "epoch": 0.5710596400613348, + "grad_norm": 0.749664843082428, + "learning_rate": 0.00014524425308035034, + "loss": 2.6296, + "step": 7076 + }, + { + "epoch": 0.5711403437979179, + "grad_norm": 0.6774656772613525, + "learning_rate": 0.00014523017386283091, + "loss": 2.5867, + "step": 7077 + }, + { + "epoch": 0.5712210475345009, + "grad_norm": 0.7331634163856506, + "learning_rate": 0.00014521609351805733, + "loss": 2.6484, + "step": 7078 + }, + { + "epoch": 0.5713017512710838, + "grad_norm": 0.7076910734176636, + "learning_rate": 0.00014520201204638045, + "loss": 2.6464, + "step": 7079 + }, + { + "epoch": 0.5713824550076668, + "grad_norm": 0.74099200963974, + "learning_rate": 0.00014518792944815127, + "loss": 2.6304, + "step": 7080 + }, + { + "epoch": 0.5714631587442499, + "grad_norm": 0.6673823595046997, + "learning_rate": 0.00014517384572372078, + "loss": 2.5903, + "step": 7081 + }, + { + "epoch": 0.5715438624808329, + "grad_norm": 0.6872609257698059, + "learning_rate": 0.00014515976087343997, + "loss": 2.6189, + "step": 7082 + }, + { + "epoch": 0.5716245662174159, + "grad_norm": 0.7363224625587463, + "learning_rate": 0.0001451456748976599, + "loss": 2.5845, + "step": 7083 + }, + { + "epoch": 0.5717052699539988, + "grad_norm": 0.7672157287597656, + "learning_rate": 0.00014513158779673157, + "loss": 2.6331, + "step": 7084 + }, + { + "epoch": 0.5717859736905819, + "grad_norm": 0.661195695400238, + "learning_rate": 0.00014511749957100612, + "loss": 2.5827, + "step": 7085 + }, + { + "epoch": 0.5718666774271649, + "grad_norm": 0.8034788370132446, + "learning_rate": 0.0001451034102208346, + "loss": 2.6209, + "step": 7086 + }, + { + "epoch": 0.5719473811637479, + "grad_norm": 0.7318302392959595, + "learning_rate": 0.00014508931974656822, + "loss": 2.5898, + "step": 7087 + }, + { + "epoch": 0.5720280849003309, + "grad_norm": 0.7334744930267334, + "learning_rate": 0.00014507522814855814, + "loss": 2.5893, + "step": 7088 + }, + { + "epoch": 0.5721087886369138, + "grad_norm": 0.783051609992981, + "learning_rate": 0.00014506113542715553, + "loss": 2.6284, + "step": 7089 + }, + { + "epoch": 0.5721894923734969, + "grad_norm": 0.7319497466087341, + "learning_rate": 0.00014504704158271165, + "loss": 2.5705, + "step": 7090 + }, + { + "epoch": 0.5722701961100799, + "grad_norm": 0.7886925935745239, + "learning_rate": 0.00014503294661557772, + "loss": 2.641, + "step": 7091 + }, + { + "epoch": 0.5723508998466629, + "grad_norm": 0.6882795691490173, + "learning_rate": 0.00014501885052610502, + "loss": 2.5714, + "step": 7092 + }, + { + "epoch": 0.5724316035832459, + "grad_norm": 0.7089235186576843, + "learning_rate": 0.00014500475331464494, + "loss": 2.6073, + "step": 7093 + }, + { + "epoch": 0.5725123073198289, + "grad_norm": 0.7261029481887817, + "learning_rate": 0.00014499065498154874, + "loss": 2.5595, + "step": 7094 + }, + { + "epoch": 0.5725930110564119, + "grad_norm": 0.7625105977058411, + "learning_rate": 0.0001449765555271678, + "loss": 2.5978, + "step": 7095 + }, + { + "epoch": 0.5726737147929949, + "grad_norm": 0.7853986024856567, + "learning_rate": 0.00014496245495185353, + "loss": 2.6378, + "step": 7096 + }, + { + "epoch": 0.5727544185295779, + "grad_norm": 0.8070923686027527, + "learning_rate": 0.00014494835325595736, + "loss": 2.7062, + "step": 7097 + }, + { + "epoch": 0.572835122266161, + "grad_norm": 0.7074965834617615, + "learning_rate": 0.00014493425043983073, + "loss": 2.5177, + "step": 7098 + }, + { + "epoch": 0.5729158260027439, + "grad_norm": 0.6890520453453064, + "learning_rate": 0.00014492014650382512, + "loss": 2.6058, + "step": 7099 + }, + { + "epoch": 0.5729965297393269, + "grad_norm": 0.6979860067367554, + "learning_rate": 0.00014490604144829202, + "loss": 2.5274, + "step": 7100 + }, + { + "epoch": 0.5730772334759099, + "grad_norm": 0.7972229719161987, + "learning_rate": 0.000144891935273583, + "loss": 2.6369, + "step": 7101 + }, + { + "epoch": 0.573157937212493, + "grad_norm": 0.6994345188140869, + "learning_rate": 0.0001448778279800496, + "loss": 2.5975, + "step": 7102 + }, + { + "epoch": 0.573238640949076, + "grad_norm": 0.7943929433822632, + "learning_rate": 0.0001448637195680434, + "loss": 2.6317, + "step": 7103 + }, + { + "epoch": 0.5733193446856589, + "grad_norm": 0.6975306272506714, + "learning_rate": 0.00014484961003791605, + "loss": 2.6264, + "step": 7104 + }, + { + "epoch": 0.5734000484222419, + "grad_norm": 0.6889060735702515, + "learning_rate": 0.00014483549939001917, + "loss": 2.5974, + "step": 7105 + }, + { + "epoch": 0.573480752158825, + "grad_norm": 0.7372777462005615, + "learning_rate": 0.00014482138762470444, + "loss": 2.5851, + "step": 7106 + }, + { + "epoch": 0.573561455895408, + "grad_norm": 0.7045157551765442, + "learning_rate": 0.00014480727474232362, + "loss": 2.6451, + "step": 7107 + }, + { + "epoch": 0.5736421596319909, + "grad_norm": 0.6974517107009888, + "learning_rate": 0.00014479316074322832, + "loss": 2.6796, + "step": 7108 + }, + { + "epoch": 0.5737228633685739, + "grad_norm": 0.7328097224235535, + "learning_rate": 0.00014477904562777038, + "loss": 2.5923, + "step": 7109 + }, + { + "epoch": 0.573803567105157, + "grad_norm": 0.7288877964019775, + "learning_rate": 0.0001447649293963016, + "loss": 2.6012, + "step": 7110 + }, + { + "epoch": 0.57388427084174, + "grad_norm": 0.7054389119148254, + "learning_rate": 0.00014475081204917372, + "loss": 2.6666, + "step": 7111 + }, + { + "epoch": 0.573964974578323, + "grad_norm": 0.7447949647903442, + "learning_rate": 0.00014473669358673865, + "loss": 2.6093, + "step": 7112 + }, + { + "epoch": 0.5740456783149059, + "grad_norm": 0.6431592106819153, + "learning_rate": 0.0001447225740093482, + "loss": 2.6242, + "step": 7113 + }, + { + "epoch": 0.574126382051489, + "grad_norm": 0.7096747756004333, + "learning_rate": 0.00014470845331735434, + "loss": 2.6297, + "step": 7114 + }, + { + "epoch": 0.574207085788072, + "grad_norm": 0.6918880939483643, + "learning_rate": 0.00014469433151110894, + "loss": 2.5849, + "step": 7115 + }, + { + "epoch": 0.574287789524655, + "grad_norm": 0.6617783308029175, + "learning_rate": 0.00014468020859096395, + "loss": 2.5972, + "step": 7116 + }, + { + "epoch": 0.574368493261238, + "grad_norm": 0.6525121927261353, + "learning_rate": 0.0001446660845572714, + "loss": 2.5888, + "step": 7117 + }, + { + "epoch": 0.574449196997821, + "grad_norm": 0.7024720907211304, + "learning_rate": 0.00014465195941038326, + "loss": 2.6135, + "step": 7118 + }, + { + "epoch": 0.574529900734404, + "grad_norm": 0.7660520672798157, + "learning_rate": 0.00014463783315065153, + "loss": 2.5837, + "step": 7119 + }, + { + "epoch": 0.574610604470987, + "grad_norm": 0.8206443190574646, + "learning_rate": 0.00014462370577842838, + "loss": 2.6749, + "step": 7120 + }, + { + "epoch": 0.57469130820757, + "grad_norm": 0.7176216840744019, + "learning_rate": 0.00014460957729406577, + "loss": 2.5814, + "step": 7121 + }, + { + "epoch": 0.5747720119441531, + "grad_norm": 0.7867588400840759, + "learning_rate": 0.0001445954476979159, + "loss": 2.5697, + "step": 7122 + }, + { + "epoch": 0.574852715680736, + "grad_norm": 0.7150471806526184, + "learning_rate": 0.0001445813169903309, + "loss": 2.5689, + "step": 7123 + }, + { + "epoch": 0.574933419417319, + "grad_norm": 0.7082479596138, + "learning_rate": 0.00014456718517166296, + "loss": 2.6081, + "step": 7124 + }, + { + "epoch": 0.575014123153902, + "grad_norm": 0.7207253575325012, + "learning_rate": 0.00014455305224226426, + "loss": 2.6573, + "step": 7125 + }, + { + "epoch": 0.5750948268904851, + "grad_norm": 0.7451751232147217, + "learning_rate": 0.00014453891820248704, + "loss": 2.6057, + "step": 7126 + }, + { + "epoch": 0.575175530627068, + "grad_norm": 0.7030230164527893, + "learning_rate": 0.0001445247830526835, + "loss": 2.6122, + "step": 7127 + }, + { + "epoch": 0.575256234363651, + "grad_norm": 0.7233754396438599, + "learning_rate": 0.00014451064679320605, + "loss": 2.5937, + "step": 7128 + }, + { + "epoch": 0.575336938100234, + "grad_norm": 0.6943942904472351, + "learning_rate": 0.0001444965094244069, + "loss": 2.6327, + "step": 7129 + }, + { + "epoch": 0.5754176418368171, + "grad_norm": 0.682056725025177, + "learning_rate": 0.00014448237094663843, + "loss": 2.6212, + "step": 7130 + }, + { + "epoch": 0.5754983455734001, + "grad_norm": 0.7424136400222778, + "learning_rate": 0.00014446823136025298, + "loss": 2.6031, + "step": 7131 + }, + { + "epoch": 0.575579049309983, + "grad_norm": 0.7464002370834351, + "learning_rate": 0.00014445409066560298, + "loss": 2.6363, + "step": 7132 + }, + { + "epoch": 0.575659753046566, + "grad_norm": 0.7137650847434998, + "learning_rate": 0.00014443994886304085, + "loss": 2.5343, + "step": 7133 + }, + { + "epoch": 0.5757404567831491, + "grad_norm": 0.6744158864021301, + "learning_rate": 0.00014442580595291901, + "loss": 2.6463, + "step": 7134 + }, + { + "epoch": 0.5758211605197321, + "grad_norm": 0.6947084069252014, + "learning_rate": 0.00014441166193558991, + "loss": 2.6074, + "step": 7135 + }, + { + "epoch": 0.5759018642563151, + "grad_norm": 0.6981585621833801, + "learning_rate": 0.00014439751681140616, + "loss": 2.6257, + "step": 7136 + }, + { + "epoch": 0.575982567992898, + "grad_norm": 0.6800102591514587, + "learning_rate": 0.00014438337058072023, + "loss": 2.6447, + "step": 7137 + }, + { + "epoch": 0.5760632717294811, + "grad_norm": 0.6952316164970398, + "learning_rate": 0.00014436922324388465, + "loss": 2.5739, + "step": 7138 + }, + { + "epoch": 0.5761439754660641, + "grad_norm": 0.709170937538147, + "learning_rate": 0.0001443550748012521, + "loss": 2.5918, + "step": 7139 + }, + { + "epoch": 0.5762246792026471, + "grad_norm": 0.7677363157272339, + "learning_rate": 0.00014434092525317512, + "loss": 2.6322, + "step": 7140 + }, + { + "epoch": 0.5763053829392301, + "grad_norm": 0.6730263233184814, + "learning_rate": 0.00014432677460000636, + "loss": 2.6764, + "step": 7141 + }, + { + "epoch": 0.576386086675813, + "grad_norm": 0.6782239675521851, + "learning_rate": 0.0001443126228420985, + "loss": 2.5208, + "step": 7142 + }, + { + "epoch": 0.5764667904123961, + "grad_norm": 0.7737600207328796, + "learning_rate": 0.00014429846997980424, + "loss": 2.6964, + "step": 7143 + }, + { + "epoch": 0.5765474941489791, + "grad_norm": 0.7456403374671936, + "learning_rate": 0.00014428431601347635, + "loss": 2.6163, + "step": 7144 + }, + { + "epoch": 0.5766281978855621, + "grad_norm": 0.7824606895446777, + "learning_rate": 0.00014427016094346754, + "loss": 2.6499, + "step": 7145 + }, + { + "epoch": 0.576708901622145, + "grad_norm": 0.7233635187149048, + "learning_rate": 0.00014425600477013055, + "loss": 2.6064, + "step": 7146 + }, + { + "epoch": 0.5767896053587281, + "grad_norm": 0.7008275389671326, + "learning_rate": 0.00014424184749381824, + "loss": 2.5585, + "step": 7147 + }, + { + "epoch": 0.5768703090953111, + "grad_norm": 0.6817710995674133, + "learning_rate": 0.00014422768911488346, + "loss": 2.6215, + "step": 7148 + }, + { + "epoch": 0.5769510128318941, + "grad_norm": 0.6860779523849487, + "learning_rate": 0.00014421352963367906, + "loss": 2.5877, + "step": 7149 + }, + { + "epoch": 0.5770317165684771, + "grad_norm": 0.732865035533905, + "learning_rate": 0.00014419936905055793, + "loss": 2.5704, + "step": 7150 + }, + { + "epoch": 0.5771124203050602, + "grad_norm": 0.6992458701133728, + "learning_rate": 0.00014418520736587297, + "loss": 2.6654, + "step": 7151 + }, + { + "epoch": 0.5771931240416431, + "grad_norm": 0.6865053176879883, + "learning_rate": 0.00014417104457997715, + "loss": 2.6389, + "step": 7152 + }, + { + "epoch": 0.5772738277782261, + "grad_norm": 0.7652727365493774, + "learning_rate": 0.00014415688069322345, + "loss": 2.6478, + "step": 7153 + }, + { + "epoch": 0.5773545315148091, + "grad_norm": 0.708692193031311, + "learning_rate": 0.0001441427157059648, + "loss": 2.6065, + "step": 7154 + }, + { + "epoch": 0.5774352352513922, + "grad_norm": 0.7549232244491577, + "learning_rate": 0.00014412854961855435, + "loss": 2.6484, + "step": 7155 + }, + { + "epoch": 0.5775159389879752, + "grad_norm": 0.6410655975341797, + "learning_rate": 0.00014411438243134506, + "loss": 2.6061, + "step": 7156 + }, + { + "epoch": 0.5775966427245581, + "grad_norm": 0.7711724042892456, + "learning_rate": 0.00014410021414469005, + "loss": 2.628, + "step": 7157 + }, + { + "epoch": 0.5776773464611411, + "grad_norm": 0.6723695993423462, + "learning_rate": 0.0001440860447589424, + "loss": 2.6214, + "step": 7158 + }, + { + "epoch": 0.5777580501977242, + "grad_norm": 0.7359206676483154, + "learning_rate": 0.0001440718742744553, + "loss": 2.6157, + "step": 7159 + }, + { + "epoch": 0.5778387539343072, + "grad_norm": 0.7320525050163269, + "learning_rate": 0.0001440577026915819, + "loss": 2.6081, + "step": 7160 + }, + { + "epoch": 0.5779194576708901, + "grad_norm": 0.7728561162948608, + "learning_rate": 0.00014404353001067535, + "loss": 2.5989, + "step": 7161 + }, + { + "epoch": 0.5780001614074731, + "grad_norm": 0.7380329370498657, + "learning_rate": 0.0001440293562320889, + "loss": 2.6337, + "step": 7162 + }, + { + "epoch": 0.5780808651440562, + "grad_norm": 0.667789876461029, + "learning_rate": 0.00014401518135617581, + "loss": 2.6324, + "step": 7163 + }, + { + "epoch": 0.5781615688806392, + "grad_norm": 0.6907219886779785, + "learning_rate": 0.00014400100538328935, + "loss": 2.5897, + "step": 7164 + }, + { + "epoch": 0.5782422726172222, + "grad_norm": 0.9051530957221985, + "learning_rate": 0.00014398682831378283, + "loss": 2.6895, + "step": 7165 + }, + { + "epoch": 0.5783229763538051, + "grad_norm": 0.7189533114433289, + "learning_rate": 0.00014397265014800956, + "loss": 2.5948, + "step": 7166 + }, + { + "epoch": 0.5784036800903882, + "grad_norm": 0.7003059983253479, + "learning_rate": 0.00014395847088632285, + "loss": 2.5814, + "step": 7167 + }, + { + "epoch": 0.5784843838269712, + "grad_norm": 0.8083534240722656, + "learning_rate": 0.0001439442905290762, + "loss": 2.6131, + "step": 7168 + }, + { + "epoch": 0.5785650875635542, + "grad_norm": 0.7068585157394409, + "learning_rate": 0.0001439301090766229, + "loss": 2.6027, + "step": 7169 + }, + { + "epoch": 0.5786457913001372, + "grad_norm": 0.7010494470596313, + "learning_rate": 0.00014391592652931653, + "loss": 2.5296, + "step": 7170 + }, + { + "epoch": 0.5787264950367202, + "grad_norm": 0.7577467560768127, + "learning_rate": 0.00014390174288751045, + "loss": 2.6347, + "step": 7171 + }, + { + "epoch": 0.5788071987733032, + "grad_norm": 0.643799364566803, + "learning_rate": 0.00014388755815155813, + "loss": 2.6152, + "step": 7172 + }, + { + "epoch": 0.5788879025098862, + "grad_norm": 0.740352988243103, + "learning_rate": 0.00014387337232181315, + "loss": 2.6123, + "step": 7173 + }, + { + "epoch": 0.5789686062464692, + "grad_norm": 0.7309309840202332, + "learning_rate": 0.00014385918539862907, + "loss": 2.6072, + "step": 7174 + }, + { + "epoch": 0.5790493099830523, + "grad_norm": 0.7237016558647156, + "learning_rate": 0.00014384499738235941, + "loss": 2.6375, + "step": 7175 + }, + { + "epoch": 0.5791300137196352, + "grad_norm": 0.6600970029830933, + "learning_rate": 0.00014383080827335784, + "loss": 2.5285, + "step": 7176 + }, + { + "epoch": 0.5792107174562182, + "grad_norm": 0.6822233200073242, + "learning_rate": 0.00014381661807197794, + "loss": 2.5497, + "step": 7177 + }, + { + "epoch": 0.5792914211928012, + "grad_norm": 0.6990383863449097, + "learning_rate": 0.00014380242677857337, + "loss": 2.6283, + "step": 7178 + }, + { + "epoch": 0.5793721249293843, + "grad_norm": 0.64422208070755, + "learning_rate": 0.00014378823439349783, + "loss": 2.5762, + "step": 7179 + }, + { + "epoch": 0.5794528286659673, + "grad_norm": 0.63804692029953, + "learning_rate": 0.00014377404091710501, + "loss": 2.5523, + "step": 7180 + }, + { + "epoch": 0.5795335324025502, + "grad_norm": 0.6978863477706909, + "learning_rate": 0.0001437598463497487, + "loss": 2.5089, + "step": 7181 + }, + { + "epoch": 0.5796142361391332, + "grad_norm": 0.7091087698936462, + "learning_rate": 0.00014374565069178257, + "loss": 2.7005, + "step": 7182 + }, + { + "epoch": 0.5796949398757163, + "grad_norm": 0.683659553527832, + "learning_rate": 0.00014373145394356053, + "loss": 2.5988, + "step": 7183 + }, + { + "epoch": 0.5797756436122993, + "grad_norm": 0.7352960705757141, + "learning_rate": 0.00014371725610543633, + "loss": 2.5671, + "step": 7184 + }, + { + "epoch": 0.5798563473488823, + "grad_norm": 0.6951913237571716, + "learning_rate": 0.00014370305717776382, + "loss": 2.5917, + "step": 7185 + }, + { + "epoch": 0.5799370510854652, + "grad_norm": 0.6644465923309326, + "learning_rate": 0.0001436888571608969, + "loss": 2.5954, + "step": 7186 + }, + { + "epoch": 0.5800177548220483, + "grad_norm": 0.7406458258628845, + "learning_rate": 0.00014367465605518942, + "loss": 2.6369, + "step": 7187 + }, + { + "epoch": 0.5800984585586313, + "grad_norm": 0.6724697351455688, + "learning_rate": 0.00014366045386099535, + "loss": 2.6227, + "step": 7188 + }, + { + "epoch": 0.5801791622952143, + "grad_norm": 0.6804977059364319, + "learning_rate": 0.00014364625057866867, + "loss": 2.6445, + "step": 7189 + }, + { + "epoch": 0.5802598660317972, + "grad_norm": 0.7020019888877869, + "learning_rate": 0.00014363204620856335, + "loss": 2.6733, + "step": 7190 + }, + { + "epoch": 0.5803405697683802, + "grad_norm": 0.6458491086959839, + "learning_rate": 0.00014361784075103332, + "loss": 2.572, + "step": 7191 + }, + { + "epoch": 0.5804212735049633, + "grad_norm": 0.7078056335449219, + "learning_rate": 0.00014360363420643272, + "loss": 2.7032, + "step": 7192 + }, + { + "epoch": 0.5805019772415463, + "grad_norm": 0.6367471814155579, + "learning_rate": 0.00014358942657511557, + "loss": 2.5369, + "step": 7193 + }, + { + "epoch": 0.5805826809781293, + "grad_norm": 0.7311955094337463, + "learning_rate": 0.00014357521785743596, + "loss": 2.6513, + "step": 7194 + }, + { + "epoch": 0.5806633847147122, + "grad_norm": 0.6957442164421082, + "learning_rate": 0.00014356100805374805, + "loss": 2.6512, + "step": 7195 + }, + { + "epoch": 0.5807440884512953, + "grad_norm": 0.7026693224906921, + "learning_rate": 0.0001435467971644059, + "loss": 2.6049, + "step": 7196 + }, + { + "epoch": 0.5808247921878783, + "grad_norm": 0.7337697744369507, + "learning_rate": 0.00014353258518976376, + "loss": 2.5516, + "step": 7197 + }, + { + "epoch": 0.5809054959244613, + "grad_norm": 0.6891856789588928, + "learning_rate": 0.00014351837213017577, + "loss": 2.5894, + "step": 7198 + }, + { + "epoch": 0.5809861996610443, + "grad_norm": 0.6710659265518188, + "learning_rate": 0.0001435041579859962, + "loss": 2.596, + "step": 7199 + }, + { + "epoch": 0.5810669033976273, + "grad_norm": 0.7637245059013367, + "learning_rate": 0.00014348994275757931, + "loss": 2.6278, + "step": 7200 + }, + { + "epoch": 0.5811476071342103, + "grad_norm": 0.7558664679527283, + "learning_rate": 0.00014347572644527934, + "loss": 2.6917, + "step": 7201 + }, + { + "epoch": 0.5812283108707933, + "grad_norm": 0.7254986763000488, + "learning_rate": 0.00014346150904945065, + "loss": 2.6161, + "step": 7202 + }, + { + "epoch": 0.5813090146073763, + "grad_norm": 0.7177211046218872, + "learning_rate": 0.00014344729057044753, + "loss": 2.555, + "step": 7203 + }, + { + "epoch": 0.5813897183439594, + "grad_norm": 0.6408729553222656, + "learning_rate": 0.00014343307100862432, + "loss": 2.6071, + "step": 7204 + }, + { + "epoch": 0.5814704220805423, + "grad_norm": 0.7399997711181641, + "learning_rate": 0.0001434188503643355, + "loss": 2.6013, + "step": 7205 + }, + { + "epoch": 0.5815511258171253, + "grad_norm": 0.7796236276626587, + "learning_rate": 0.00014340462863793543, + "loss": 2.603, + "step": 7206 + }, + { + "epoch": 0.5816318295537083, + "grad_norm": 0.7420137524604797, + "learning_rate": 0.00014339040582977855, + "loss": 2.5858, + "step": 7207 + }, + { + "epoch": 0.5817125332902914, + "grad_norm": 0.738042414188385, + "learning_rate": 0.00014337618194021928, + "loss": 2.592, + "step": 7208 + }, + { + "epoch": 0.5817932370268744, + "grad_norm": 0.6910614371299744, + "learning_rate": 0.00014336195696961222, + "loss": 2.6448, + "step": 7209 + }, + { + "epoch": 0.5818739407634573, + "grad_norm": 0.7838915586471558, + "learning_rate": 0.00014334773091831185, + "loss": 2.6257, + "step": 7210 + }, + { + "epoch": 0.5819546445000403, + "grad_norm": 0.7362141013145447, + "learning_rate": 0.0001433335037866727, + "loss": 2.6505, + "step": 7211 + }, + { + "epoch": 0.5820353482366234, + "grad_norm": 0.6892269253730774, + "learning_rate": 0.00014331927557504934, + "loss": 2.6518, + "step": 7212 + }, + { + "epoch": 0.5821160519732064, + "grad_norm": 0.7444556951522827, + "learning_rate": 0.0001433050462837964, + "loss": 2.6785, + "step": 7213 + }, + { + "epoch": 0.5821967557097893, + "grad_norm": 0.6948450207710266, + "learning_rate": 0.00014329081591326853, + "loss": 2.5753, + "step": 7214 + }, + { + "epoch": 0.5822774594463723, + "grad_norm": 0.713741660118103, + "learning_rate": 0.00014327658446382032, + "loss": 2.6425, + "step": 7215 + }, + { + "epoch": 0.5823581631829554, + "grad_norm": 0.7352245450019836, + "learning_rate": 0.00014326235193580657, + "loss": 2.6859, + "step": 7216 + }, + { + "epoch": 0.5824388669195384, + "grad_norm": 0.7151867151260376, + "learning_rate": 0.00014324811832958187, + "loss": 2.6106, + "step": 7217 + }, + { + "epoch": 0.5825195706561214, + "grad_norm": 0.7003469467163086, + "learning_rate": 0.000143233883645501, + "loss": 2.618, + "step": 7218 + }, + { + "epoch": 0.5826002743927043, + "grad_norm": 0.7139034867286682, + "learning_rate": 0.00014321964788391878, + "loss": 2.5772, + "step": 7219 + }, + { + "epoch": 0.5826809781292874, + "grad_norm": 0.6368305683135986, + "learning_rate": 0.00014320541104518992, + "loss": 2.5259, + "step": 7220 + }, + { + "epoch": 0.5827616818658704, + "grad_norm": 0.6921548247337341, + "learning_rate": 0.0001431911731296693, + "loss": 2.6403, + "step": 7221 + }, + { + "epoch": 0.5828423856024534, + "grad_norm": 0.6995570659637451, + "learning_rate": 0.00014317693413771175, + "loss": 2.6172, + "step": 7222 + }, + { + "epoch": 0.5829230893390364, + "grad_norm": 0.7557246088981628, + "learning_rate": 0.0001431626940696721, + "loss": 2.6347, + "step": 7223 + }, + { + "epoch": 0.5830037930756194, + "grad_norm": 0.6912205219268799, + "learning_rate": 0.00014314845292590528, + "loss": 2.5958, + "step": 7224 + }, + { + "epoch": 0.5830844968122024, + "grad_norm": 0.6896184682846069, + "learning_rate": 0.00014313421070676625, + "loss": 2.569, + "step": 7225 + }, + { + "epoch": 0.5831652005487854, + "grad_norm": 0.6900814771652222, + "learning_rate": 0.00014311996741260994, + "loss": 2.5466, + "step": 7226 + }, + { + "epoch": 0.5832459042853684, + "grad_norm": 0.7319771647453308, + "learning_rate": 0.00014310572304379132, + "loss": 2.6181, + "step": 7227 + }, + { + "epoch": 0.5833266080219515, + "grad_norm": 0.728138267993927, + "learning_rate": 0.0001430914776006654, + "loss": 2.6644, + "step": 7228 + }, + { + "epoch": 0.5834073117585344, + "grad_norm": 0.7361802458763123, + "learning_rate": 0.0001430772310835872, + "loss": 2.6079, + "step": 7229 + }, + { + "epoch": 0.5834880154951174, + "grad_norm": 0.6893376708030701, + "learning_rate": 0.00014306298349291182, + "loss": 2.5615, + "step": 7230 + }, + { + "epoch": 0.5835687192317004, + "grad_norm": 0.6661401987075806, + "learning_rate": 0.00014304873482899431, + "loss": 2.6028, + "step": 7231 + }, + { + "epoch": 0.5836494229682835, + "grad_norm": 0.6571504473686218, + "learning_rate": 0.0001430344850921898, + "loss": 2.5553, + "step": 7232 + }, + { + "epoch": 0.5837301267048665, + "grad_norm": 0.6878423690795898, + "learning_rate": 0.00014302023428285342, + "loss": 2.5336, + "step": 7233 + }, + { + "epoch": 0.5838108304414494, + "grad_norm": 0.768117368221283, + "learning_rate": 0.00014300598240134035, + "loss": 2.6036, + "step": 7234 + }, + { + "epoch": 0.5838915341780324, + "grad_norm": 0.6876625418663025, + "learning_rate": 0.0001429917294480058, + "loss": 2.6314, + "step": 7235 + }, + { + "epoch": 0.5839722379146155, + "grad_norm": 0.7146790027618408, + "learning_rate": 0.00014297747542320495, + "loss": 2.6029, + "step": 7236 + }, + { + "epoch": 0.5840529416511985, + "grad_norm": 0.7032392024993896, + "learning_rate": 0.00014296322032729308, + "loss": 2.6163, + "step": 7237 + }, + { + "epoch": 0.5841336453877815, + "grad_norm": 0.7323551177978516, + "learning_rate": 0.00014294896416062544, + "loss": 2.6706, + "step": 7238 + }, + { + "epoch": 0.5842143491243644, + "grad_norm": 0.7647258639335632, + "learning_rate": 0.00014293470692355734, + "loss": 2.6744, + "step": 7239 + }, + { + "epoch": 0.5842950528609475, + "grad_norm": 0.6824506521224976, + "learning_rate": 0.00014292044861644414, + "loss": 2.579, + "step": 7240 + }, + { + "epoch": 0.5843757565975305, + "grad_norm": 0.7553619742393494, + "learning_rate": 0.00014290618923964115, + "loss": 2.6196, + "step": 7241 + }, + { + "epoch": 0.5844564603341135, + "grad_norm": 0.6872109770774841, + "learning_rate": 0.00014289192879350375, + "loss": 2.555, + "step": 7242 + }, + { + "epoch": 0.5845371640706964, + "grad_norm": 0.664658784866333, + "learning_rate": 0.00014287766727838735, + "loss": 2.5781, + "step": 7243 + }, + { + "epoch": 0.5846178678072794, + "grad_norm": 0.6709543466567993, + "learning_rate": 0.00014286340469464744, + "loss": 2.6022, + "step": 7244 + }, + { + "epoch": 0.5846985715438625, + "grad_norm": 0.7236210107803345, + "learning_rate": 0.00014284914104263941, + "loss": 2.5609, + "step": 7245 + }, + { + "epoch": 0.5847792752804455, + "grad_norm": 0.6751740574836731, + "learning_rate": 0.0001428348763227188, + "loss": 2.5792, + "step": 7246 + }, + { + "epoch": 0.5848599790170285, + "grad_norm": 0.6684607267379761, + "learning_rate": 0.0001428206105352411, + "loss": 2.5705, + "step": 7247 + }, + { + "epoch": 0.5849406827536114, + "grad_norm": 0.6876732707023621, + "learning_rate": 0.00014280634368056186, + "loss": 2.6576, + "step": 7248 + }, + { + "epoch": 0.5850213864901945, + "grad_norm": 0.758637547492981, + "learning_rate": 0.0001427920757590366, + "loss": 2.6215, + "step": 7249 + }, + { + "epoch": 0.5851020902267775, + "grad_norm": 0.6839025020599365, + "learning_rate": 0.00014277780677102097, + "loss": 2.5898, + "step": 7250 + }, + { + "epoch": 0.5851827939633605, + "grad_norm": 0.6912671327590942, + "learning_rate": 0.00014276353671687056, + "loss": 2.5879, + "step": 7251 + }, + { + "epoch": 0.5852634976999435, + "grad_norm": 0.6727048754692078, + "learning_rate": 0.00014274926559694107, + "loss": 2.5501, + "step": 7252 + }, + { + "epoch": 0.5853442014365265, + "grad_norm": 0.7031945586204529, + "learning_rate": 0.00014273499341158812, + "loss": 2.625, + "step": 7253 + }, + { + "epoch": 0.5854249051731095, + "grad_norm": 0.6886943578720093, + "learning_rate": 0.0001427207201611674, + "loss": 2.6141, + "step": 7254 + }, + { + "epoch": 0.5855056089096925, + "grad_norm": 0.7906915545463562, + "learning_rate": 0.00014270644584603466, + "loss": 2.7189, + "step": 7255 + }, + { + "epoch": 0.5855863126462755, + "grad_norm": 0.6873704195022583, + "learning_rate": 0.00014269217046654567, + "loss": 2.6031, + "step": 7256 + }, + { + "epoch": 0.5856670163828586, + "grad_norm": 0.6655381321907043, + "learning_rate": 0.00014267789402305618, + "loss": 2.5747, + "step": 7257 + }, + { + "epoch": 0.5857477201194415, + "grad_norm": 0.6655673384666443, + "learning_rate": 0.00014266361651592204, + "loss": 2.625, + "step": 7258 + }, + { + "epoch": 0.5858284238560245, + "grad_norm": 0.6752866506576538, + "learning_rate": 0.00014264933794549901, + "loss": 2.5914, + "step": 7259 + }, + { + "epoch": 0.5859091275926075, + "grad_norm": 0.6680975556373596, + "learning_rate": 0.00014263505831214302, + "loss": 2.5572, + "step": 7260 + }, + { + "epoch": 0.5859898313291906, + "grad_norm": 0.6873607039451599, + "learning_rate": 0.00014262077761620994, + "loss": 2.6696, + "step": 7261 + }, + { + "epoch": 0.5860705350657736, + "grad_norm": 0.6745384335517883, + "learning_rate": 0.00014260649585805566, + "loss": 2.5738, + "step": 7262 + }, + { + "epoch": 0.5861512388023565, + "grad_norm": 0.6524637937545776, + "learning_rate": 0.0001425922130380361, + "loss": 2.6209, + "step": 7263 + }, + { + "epoch": 0.5862319425389395, + "grad_norm": 0.6729850172996521, + "learning_rate": 0.00014257792915650728, + "loss": 2.652, + "step": 7264 + }, + { + "epoch": 0.5863126462755226, + "grad_norm": 0.6713503003120422, + "learning_rate": 0.00014256364421382514, + "loss": 2.5658, + "step": 7265 + }, + { + "epoch": 0.5863933500121056, + "grad_norm": 0.6835616827011108, + "learning_rate": 0.00014254935821034575, + "loss": 2.5535, + "step": 7266 + }, + { + "epoch": 0.5864740537486886, + "grad_norm": 0.7425376176834106, + "learning_rate": 0.00014253507114642515, + "loss": 2.6369, + "step": 7267 + }, + { + "epoch": 0.5865547574852715, + "grad_norm": 0.6788069605827332, + "learning_rate": 0.00014252078302241932, + "loss": 2.601, + "step": 7268 + }, + { + "epoch": 0.5866354612218546, + "grad_norm": 0.6828538179397583, + "learning_rate": 0.0001425064938386845, + "loss": 2.5861, + "step": 7269 + }, + { + "epoch": 0.5867161649584376, + "grad_norm": 0.6763372421264648, + "learning_rate": 0.0001424922035955767, + "loss": 2.6035, + "step": 7270 + }, + { + "epoch": 0.5867968686950206, + "grad_norm": 0.6517930626869202, + "learning_rate": 0.0001424779122934521, + "loss": 2.5564, + "step": 7271 + }, + { + "epoch": 0.5868775724316035, + "grad_norm": 0.6633113622665405, + "learning_rate": 0.00014246361993266692, + "loss": 2.6163, + "step": 7272 + }, + { + "epoch": 0.5869582761681866, + "grad_norm": 0.684822678565979, + "learning_rate": 0.00014244932651357733, + "loss": 2.6057, + "step": 7273 + }, + { + "epoch": 0.5870389799047696, + "grad_norm": 0.7679704427719116, + "learning_rate": 0.00014243503203653952, + "loss": 2.6522, + "step": 7274 + }, + { + "epoch": 0.5871196836413526, + "grad_norm": 0.6834188103675842, + "learning_rate": 0.00014242073650190984, + "loss": 2.652, + "step": 7275 + }, + { + "epoch": 0.5872003873779356, + "grad_norm": 0.6903846859931946, + "learning_rate": 0.00014240643991004449, + "loss": 2.5894, + "step": 7276 + }, + { + "epoch": 0.5872810911145186, + "grad_norm": 0.7060866951942444, + "learning_rate": 0.0001423921422612998, + "loss": 2.5994, + "step": 7277 + }, + { + "epoch": 0.5873617948511016, + "grad_norm": 0.6646741628646851, + "learning_rate": 0.0001423778435560321, + "loss": 2.6432, + "step": 7278 + }, + { + "epoch": 0.5874424985876846, + "grad_norm": 0.6930218935012817, + "learning_rate": 0.0001423635437945978, + "loss": 2.6233, + "step": 7279 + }, + { + "epoch": 0.5875232023242676, + "grad_norm": 0.6914143562316895, + "learning_rate": 0.00014234924297735322, + "loss": 2.6143, + "step": 7280 + }, + { + "epoch": 0.5876039060608507, + "grad_norm": 0.7351366281509399, + "learning_rate": 0.0001423349411046548, + "loss": 2.6323, + "step": 7281 + }, + { + "epoch": 0.5876846097974336, + "grad_norm": 0.6813770532608032, + "learning_rate": 0.000142320638176859, + "loss": 2.5964, + "step": 7282 + }, + { + "epoch": 0.5877653135340166, + "grad_norm": 0.7049702405929565, + "learning_rate": 0.00014230633419432226, + "loss": 2.6284, + "step": 7283 + }, + { + "epoch": 0.5878460172705996, + "grad_norm": 0.7140446901321411, + "learning_rate": 0.00014229202915740107, + "loss": 2.6113, + "step": 7284 + }, + { + "epoch": 0.5879267210071827, + "grad_norm": 0.696588933467865, + "learning_rate": 0.00014227772306645196, + "loss": 2.6384, + "step": 7285 + }, + { + "epoch": 0.5880074247437657, + "grad_norm": 0.6800615787506104, + "learning_rate": 0.0001422634159218315, + "loss": 2.5743, + "step": 7286 + }, + { + "epoch": 0.5880881284803486, + "grad_norm": 0.7586596608161926, + "learning_rate": 0.00014224910772389624, + "loss": 2.6504, + "step": 7287 + }, + { + "epoch": 0.5881688322169316, + "grad_norm": 0.73286372423172, + "learning_rate": 0.00014223479847300278, + "loss": 2.6026, + "step": 7288 + }, + { + "epoch": 0.5882495359535147, + "grad_norm": 0.6808766722679138, + "learning_rate": 0.00014222048816950772, + "loss": 2.5822, + "step": 7289 + }, + { + "epoch": 0.5883302396900977, + "grad_norm": 0.7424919009208679, + "learning_rate": 0.0001422061768137677, + "loss": 2.6474, + "step": 7290 + }, + { + "epoch": 0.5884109434266807, + "grad_norm": 0.658183753490448, + "learning_rate": 0.00014219186440613948, + "loss": 2.6051, + "step": 7291 + }, + { + "epoch": 0.5884916471632636, + "grad_norm": 0.6693006157875061, + "learning_rate": 0.0001421775509469797, + "loss": 2.5774, + "step": 7292 + }, + { + "epoch": 0.5885723508998466, + "grad_norm": 0.7298646569252014, + "learning_rate": 0.00014216323643664508, + "loss": 2.5688, + "step": 7293 + }, + { + "epoch": 0.5886530546364297, + "grad_norm": 0.6665881276130676, + "learning_rate": 0.00014214892087549238, + "loss": 2.608, + "step": 7294 + }, + { + "epoch": 0.5887337583730127, + "grad_norm": 0.7220060229301453, + "learning_rate": 0.00014213460426387841, + "loss": 2.6078, + "step": 7295 + }, + { + "epoch": 0.5888144621095956, + "grad_norm": 0.6693970561027527, + "learning_rate": 0.00014212028660215997, + "loss": 2.597, + "step": 7296 + }, + { + "epoch": 0.5888951658461786, + "grad_norm": 0.682331919670105, + "learning_rate": 0.00014210596789069387, + "loss": 2.5752, + "step": 7297 + }, + { + "epoch": 0.5889758695827617, + "grad_norm": 0.7586890459060669, + "learning_rate": 0.000142091648129837, + "loss": 2.6878, + "step": 7298 + }, + { + "epoch": 0.5890565733193447, + "grad_norm": 0.6740901470184326, + "learning_rate": 0.00014207732731994624, + "loss": 2.6083, + "step": 7299 + }, + { + "epoch": 0.5891372770559277, + "grad_norm": 0.6959021091461182, + "learning_rate": 0.00014206300546137842, + "loss": 2.5765, + "step": 7300 + }, + { + "epoch": 0.5892179807925106, + "grad_norm": 0.7446078658103943, + "learning_rate": 0.0001420486825544906, + "loss": 2.662, + "step": 7301 + }, + { + "epoch": 0.5892986845290937, + "grad_norm": 0.7418847680091858, + "learning_rate": 0.0001420343585996397, + "loss": 2.6606, + "step": 7302 + }, + { + "epoch": 0.5893793882656767, + "grad_norm": 0.7185709476470947, + "learning_rate": 0.00014202003359718273, + "loss": 2.563, + "step": 7303 + }, + { + "epoch": 0.5894600920022597, + "grad_norm": 0.6960515379905701, + "learning_rate": 0.00014200570754747664, + "loss": 2.6182, + "step": 7304 + }, + { + "epoch": 0.5895407957388427, + "grad_norm": 0.6589705348014832, + "learning_rate": 0.00014199138045087849, + "loss": 2.6714, + "step": 7305 + }, + { + "epoch": 0.5896214994754257, + "grad_norm": 0.7027507424354553, + "learning_rate": 0.00014197705230774543, + "loss": 2.6145, + "step": 7306 + }, + { + "epoch": 0.5897022032120087, + "grad_norm": 0.6761246919631958, + "learning_rate": 0.00014196272311843447, + "loss": 2.5688, + "step": 7307 + }, + { + "epoch": 0.5897829069485917, + "grad_norm": 0.6618059277534485, + "learning_rate": 0.00014194839288330277, + "loss": 2.6194, + "step": 7308 + }, + { + "epoch": 0.5898636106851747, + "grad_norm": 0.7182614803314209, + "learning_rate": 0.00014193406160270747, + "loss": 2.5452, + "step": 7309 + }, + { + "epoch": 0.5899443144217578, + "grad_norm": 0.6830565333366394, + "learning_rate": 0.0001419197292770057, + "loss": 2.5728, + "step": 7310 + }, + { + "epoch": 0.5900250181583407, + "grad_norm": 0.6744499802589417, + "learning_rate": 0.00014190539590655475, + "loss": 2.5736, + "step": 7311 + }, + { + "epoch": 0.5901057218949237, + "grad_norm": 0.7177874445915222, + "learning_rate": 0.00014189106149171176, + "loss": 2.6271, + "step": 7312 + }, + { + "epoch": 0.5901864256315067, + "grad_norm": 0.6770105361938477, + "learning_rate": 0.000141876726032834, + "loss": 2.5924, + "step": 7313 + }, + { + "epoch": 0.5902671293680898, + "grad_norm": 0.7295818328857422, + "learning_rate": 0.0001418623895302788, + "loss": 2.644, + "step": 7314 + }, + { + "epoch": 0.5903478331046728, + "grad_norm": 0.7244859933853149, + "learning_rate": 0.00014184805198440338, + "loss": 2.5892, + "step": 7315 + }, + { + "epoch": 0.5904285368412557, + "grad_norm": 0.7067728638648987, + "learning_rate": 0.00014183371339556512, + "loss": 2.5985, + "step": 7316 + }, + { + "epoch": 0.5905092405778387, + "grad_norm": 0.6732490062713623, + "learning_rate": 0.0001418193737641214, + "loss": 2.5771, + "step": 7317 + }, + { + "epoch": 0.5905899443144218, + "grad_norm": 0.7087544202804565, + "learning_rate": 0.00014180503309042957, + "loss": 2.6373, + "step": 7318 + }, + { + "epoch": 0.5906706480510048, + "grad_norm": 0.772174596786499, + "learning_rate": 0.00014179069137484703, + "loss": 2.6262, + "step": 7319 + }, + { + "epoch": 0.5907513517875878, + "grad_norm": 0.6855718493461609, + "learning_rate": 0.00014177634861773118, + "loss": 2.6268, + "step": 7320 + }, + { + "epoch": 0.5908320555241707, + "grad_norm": 0.7168720364570618, + "learning_rate": 0.00014176200481943953, + "loss": 2.5892, + "step": 7321 + }, + { + "epoch": 0.5909127592607538, + "grad_norm": 0.7126333713531494, + "learning_rate": 0.0001417476599803296, + "loss": 2.6079, + "step": 7322 + }, + { + "epoch": 0.5909934629973368, + "grad_norm": 0.7451913952827454, + "learning_rate": 0.0001417333141007588, + "loss": 2.635, + "step": 7323 + }, + { + "epoch": 0.5910741667339198, + "grad_norm": 0.7405436038970947, + "learning_rate": 0.00014171896718108475, + "loss": 2.6014, + "step": 7324 + }, + { + "epoch": 0.5911548704705027, + "grad_norm": 0.7583999037742615, + "learning_rate": 0.00014170461922166498, + "loss": 2.6815, + "step": 7325 + }, + { + "epoch": 0.5912355742070858, + "grad_norm": 0.6653509140014648, + "learning_rate": 0.00014169027022285706, + "loss": 2.6153, + "step": 7326 + }, + { + "epoch": 0.5913162779436688, + "grad_norm": 0.7145548462867737, + "learning_rate": 0.00014167592018501864, + "loss": 2.6022, + "step": 7327 + }, + { + "epoch": 0.5913969816802518, + "grad_norm": 0.6996089816093445, + "learning_rate": 0.00014166156910850737, + "loss": 2.6586, + "step": 7328 + }, + { + "epoch": 0.5914776854168348, + "grad_norm": 0.735653281211853, + "learning_rate": 0.0001416472169936809, + "loss": 2.6084, + "step": 7329 + }, + { + "epoch": 0.5915583891534179, + "grad_norm": 0.695036768913269, + "learning_rate": 0.00014163286384089686, + "loss": 2.5058, + "step": 7330 + }, + { + "epoch": 0.5916390928900008, + "grad_norm": 0.9014756679534912, + "learning_rate": 0.00014161850965051307, + "loss": 2.5991, + "step": 7331 + }, + { + "epoch": 0.5917197966265838, + "grad_norm": 0.7079846858978271, + "learning_rate": 0.0001416041544228872, + "loss": 2.6067, + "step": 7332 + }, + { + "epoch": 0.5918005003631668, + "grad_norm": 0.7681204080581665, + "learning_rate": 0.00014158979815837705, + "loss": 2.5414, + "step": 7333 + }, + { + "epoch": 0.5918812040997499, + "grad_norm": 0.6501670479774475, + "learning_rate": 0.00014157544085734042, + "loss": 2.617, + "step": 7334 + }, + { + "epoch": 0.5919619078363328, + "grad_norm": 0.7573496103286743, + "learning_rate": 0.00014156108252013513, + "loss": 2.6341, + "step": 7335 + }, + { + "epoch": 0.5920426115729158, + "grad_norm": 0.6865558624267578, + "learning_rate": 0.00014154672314711903, + "loss": 2.6229, + "step": 7336 + }, + { + "epoch": 0.5921233153094988, + "grad_norm": 0.6859166622161865, + "learning_rate": 0.00014153236273864995, + "loss": 2.6149, + "step": 7337 + }, + { + "epoch": 0.5922040190460819, + "grad_norm": 0.7603647112846375, + "learning_rate": 0.00014151800129508585, + "loss": 2.5645, + "step": 7338 + }, + { + "epoch": 0.5922847227826649, + "grad_norm": 0.6740217208862305, + "learning_rate": 0.00014150363881678464, + "loss": 2.5883, + "step": 7339 + }, + { + "epoch": 0.5923654265192478, + "grad_norm": 0.6412263512611389, + "learning_rate": 0.00014148927530410426, + "loss": 2.576, + "step": 7340 + }, + { + "epoch": 0.5924461302558308, + "grad_norm": 0.669834315776825, + "learning_rate": 0.00014147491075740265, + "loss": 2.542, + "step": 7341 + }, + { + "epoch": 0.5925268339924139, + "grad_norm": 0.720024049282074, + "learning_rate": 0.00014146054517703786, + "loss": 2.6491, + "step": 7342 + }, + { + "epoch": 0.5926075377289969, + "grad_norm": 0.7191612720489502, + "learning_rate": 0.00014144617856336794, + "loss": 2.5933, + "step": 7343 + }, + { + "epoch": 0.5926882414655799, + "grad_norm": 0.7012050747871399, + "learning_rate": 0.00014143181091675087, + "loss": 2.5253, + "step": 7344 + }, + { + "epoch": 0.5927689452021628, + "grad_norm": 0.7825081944465637, + "learning_rate": 0.00014141744223754478, + "loss": 2.6225, + "step": 7345 + }, + { + "epoch": 0.5928496489387458, + "grad_norm": 0.6699295043945312, + "learning_rate": 0.00014140307252610775, + "loss": 2.5893, + "step": 7346 + }, + { + "epoch": 0.5929303526753289, + "grad_norm": 0.6668846011161804, + "learning_rate": 0.00014138870178279794, + "loss": 2.5944, + "step": 7347 + }, + { + "epoch": 0.5930110564119119, + "grad_norm": 0.7681072950363159, + "learning_rate": 0.0001413743300079735, + "loss": 2.5715, + "step": 7348 + }, + { + "epoch": 0.5930917601484949, + "grad_norm": 0.653075635433197, + "learning_rate": 0.00014135995720199258, + "loss": 2.5924, + "step": 7349 + }, + { + "epoch": 0.5931724638850778, + "grad_norm": 0.6807504892349243, + "learning_rate": 0.00014134558336521342, + "loss": 2.5395, + "step": 7350 + }, + { + "epoch": 0.5932531676216609, + "grad_norm": 0.681175708770752, + "learning_rate": 0.00014133120849799423, + "loss": 2.5401, + "step": 7351 + }, + { + "epoch": 0.5933338713582439, + "grad_norm": 0.7159900665283203, + "learning_rate": 0.0001413168326006933, + "loss": 2.5684, + "step": 7352 + }, + { + "epoch": 0.5934145750948269, + "grad_norm": 0.6517181992530823, + "learning_rate": 0.00014130245567366888, + "loss": 2.5887, + "step": 7353 + }, + { + "epoch": 0.5934952788314098, + "grad_norm": 0.6982731223106384, + "learning_rate": 0.00014128807771727936, + "loss": 2.5707, + "step": 7354 + }, + { + "epoch": 0.5935759825679929, + "grad_norm": 0.7003650069236755, + "learning_rate": 0.00014127369873188296, + "loss": 2.6415, + "step": 7355 + }, + { + "epoch": 0.5936566863045759, + "grad_norm": 0.7408339977264404, + "learning_rate": 0.0001412593187178381, + "loss": 2.5655, + "step": 7356 + }, + { + "epoch": 0.5937373900411589, + "grad_norm": 0.717218279838562, + "learning_rate": 0.00014124493767550317, + "loss": 2.586, + "step": 7357 + }, + { + "epoch": 0.5938180937777419, + "grad_norm": 0.6723458766937256, + "learning_rate": 0.00014123055560523657, + "loss": 2.593, + "step": 7358 + }, + { + "epoch": 0.593898797514325, + "grad_norm": 0.6861262321472168, + "learning_rate": 0.00014121617250739677, + "loss": 2.612, + "step": 7359 + }, + { + "epoch": 0.5939795012509079, + "grad_norm": 0.6811453104019165, + "learning_rate": 0.00014120178838234222, + "loss": 2.5708, + "step": 7360 + }, + { + "epoch": 0.5940602049874909, + "grad_norm": 0.6249656677246094, + "learning_rate": 0.00014118740323043136, + "loss": 2.5604, + "step": 7361 + }, + { + "epoch": 0.5941409087240739, + "grad_norm": 0.7671588659286499, + "learning_rate": 0.00014117301705202274, + "loss": 2.547, + "step": 7362 + }, + { + "epoch": 0.594221612460657, + "grad_norm": 0.6856057643890381, + "learning_rate": 0.00014115862984747496, + "loss": 2.6108, + "step": 7363 + }, + { + "epoch": 0.5943023161972399, + "grad_norm": 0.692331850528717, + "learning_rate": 0.0001411442416171465, + "loss": 2.6347, + "step": 7364 + }, + { + "epoch": 0.5943830199338229, + "grad_norm": 0.7256516814231873, + "learning_rate": 0.000141129852361396, + "loss": 2.6098, + "step": 7365 + }, + { + "epoch": 0.5944637236704059, + "grad_norm": 0.7522590160369873, + "learning_rate": 0.00014111546208058203, + "loss": 2.5688, + "step": 7366 + }, + { + "epoch": 0.594544427406989, + "grad_norm": 0.6915806531906128, + "learning_rate": 0.0001411010707750633, + "loss": 2.5899, + "step": 7367 + }, + { + "epoch": 0.594625131143572, + "grad_norm": 0.7355465292930603, + "learning_rate": 0.00014108667844519844, + "loss": 2.5212, + "step": 7368 + }, + { + "epoch": 0.5947058348801549, + "grad_norm": 0.731002926826477, + "learning_rate": 0.00014107228509134615, + "loss": 2.6369, + "step": 7369 + }, + { + "epoch": 0.5947865386167379, + "grad_norm": 0.6764423251152039, + "learning_rate": 0.0001410578907138652, + "loss": 2.6012, + "step": 7370 + }, + { + "epoch": 0.594867242353321, + "grad_norm": 0.7466071844100952, + "learning_rate": 0.0001410434953131142, + "loss": 2.5822, + "step": 7371 + }, + { + "epoch": 0.594947946089904, + "grad_norm": 0.7276137471199036, + "learning_rate": 0.00014102909888945205, + "loss": 2.6055, + "step": 7372 + }, + { + "epoch": 0.595028649826487, + "grad_norm": 0.7411746978759766, + "learning_rate": 0.00014101470144323752, + "loss": 2.6489, + "step": 7373 + }, + { + "epoch": 0.5951093535630699, + "grad_norm": 0.7511908411979675, + "learning_rate": 0.0001410003029748294, + "loss": 2.6268, + "step": 7374 + }, + { + "epoch": 0.595190057299653, + "grad_norm": 0.6623562574386597, + "learning_rate": 0.0001409859034845866, + "loss": 2.58, + "step": 7375 + }, + { + "epoch": 0.595270761036236, + "grad_norm": 0.6948572397232056, + "learning_rate": 0.00014097150297286785, + "loss": 2.5811, + "step": 7376 + }, + { + "epoch": 0.595351464772819, + "grad_norm": 0.6836786270141602, + "learning_rate": 0.0001409571014400322, + "loss": 2.5861, + "step": 7377 + }, + { + "epoch": 0.595432168509402, + "grad_norm": 0.6644341945648193, + "learning_rate": 0.00014094269888643854, + "loss": 2.6339, + "step": 7378 + }, + { + "epoch": 0.595512872245985, + "grad_norm": 0.6434289813041687, + "learning_rate": 0.0001409282953124458, + "loss": 2.4897, + "step": 7379 + }, + { + "epoch": 0.595593575982568, + "grad_norm": 0.6745082139968872, + "learning_rate": 0.0001409138907184129, + "loss": 2.522, + "step": 7380 + }, + { + "epoch": 0.595674279719151, + "grad_norm": 0.725321352481842, + "learning_rate": 0.0001408994851046989, + "loss": 2.5711, + "step": 7381 + }, + { + "epoch": 0.595754983455734, + "grad_norm": 0.7485500574111938, + "learning_rate": 0.00014088507847166283, + "loss": 2.6095, + "step": 7382 + }, + { + "epoch": 0.595835687192317, + "grad_norm": 0.721125602722168, + "learning_rate": 0.00014087067081966376, + "loss": 2.6762, + "step": 7383 + }, + { + "epoch": 0.5959163909289, + "grad_norm": 0.7099901437759399, + "learning_rate": 0.00014085626214906073, + "loss": 2.5667, + "step": 7384 + }, + { + "epoch": 0.595997094665483, + "grad_norm": 0.6889060139656067, + "learning_rate": 0.00014084185246021283, + "loss": 2.6723, + "step": 7385 + }, + { + "epoch": 0.596077798402066, + "grad_norm": 0.735698938369751, + "learning_rate": 0.00014082744175347923, + "loss": 2.6434, + "step": 7386 + }, + { + "epoch": 0.5961585021386491, + "grad_norm": 0.7603070735931396, + "learning_rate": 0.00014081303002921902, + "loss": 2.665, + "step": 7387 + }, + { + "epoch": 0.596239205875232, + "grad_norm": 0.6786355376243591, + "learning_rate": 0.00014079861728779141, + "loss": 2.5842, + "step": 7388 + }, + { + "epoch": 0.596319909611815, + "grad_norm": 0.6693331003189087, + "learning_rate": 0.00014078420352955565, + "loss": 2.6211, + "step": 7389 + }, + { + "epoch": 0.596400613348398, + "grad_norm": 0.74013751745224, + "learning_rate": 0.0001407697887548709, + "loss": 2.5886, + "step": 7390 + }, + { + "epoch": 0.5964813170849811, + "grad_norm": 0.739507257938385, + "learning_rate": 0.00014075537296409646, + "loss": 2.607, + "step": 7391 + }, + { + "epoch": 0.5965620208215641, + "grad_norm": 0.7121848464012146, + "learning_rate": 0.00014074095615759156, + "loss": 2.6052, + "step": 7392 + }, + { + "epoch": 0.596642724558147, + "grad_norm": 0.7526760697364807, + "learning_rate": 0.00014072653833571556, + "loss": 2.6051, + "step": 7393 + }, + { + "epoch": 0.59672342829473, + "grad_norm": 0.7867496609687805, + "learning_rate": 0.00014071211949882777, + "loss": 2.6228, + "step": 7394 + }, + { + "epoch": 0.596804132031313, + "grad_norm": 0.7527757883071899, + "learning_rate": 0.00014069769964728752, + "loss": 2.6793, + "step": 7395 + }, + { + "epoch": 0.5968848357678961, + "grad_norm": 0.7096899747848511, + "learning_rate": 0.00014068327878145423, + "loss": 2.5207, + "step": 7396 + }, + { + "epoch": 0.5969655395044791, + "grad_norm": 0.6863983869552612, + "learning_rate": 0.00014066885690168726, + "loss": 2.7059, + "step": 7397 + }, + { + "epoch": 0.597046243241062, + "grad_norm": 0.7782251834869385, + "learning_rate": 0.0001406544340083461, + "loss": 2.6232, + "step": 7398 + }, + { + "epoch": 0.597126946977645, + "grad_norm": 0.6944136619567871, + "learning_rate": 0.00014064001010179013, + "loss": 2.6134, + "step": 7399 + }, + { + "epoch": 0.5972076507142281, + "grad_norm": 0.7629704475402832, + "learning_rate": 0.00014062558518237892, + "loss": 2.5358, + "step": 7400 + }, + { + "epoch": 0.5972883544508111, + "grad_norm": 0.6922330260276794, + "learning_rate": 0.0001406111592504719, + "loss": 2.5457, + "step": 7401 + }, + { + "epoch": 0.597369058187394, + "grad_norm": 0.6992952227592468, + "learning_rate": 0.00014059673230642865, + "loss": 2.6241, + "step": 7402 + }, + { + "epoch": 0.597449761923977, + "grad_norm": 0.6587642431259155, + "learning_rate": 0.0001405823043506087, + "loss": 2.5867, + "step": 7403 + }, + { + "epoch": 0.5975304656605601, + "grad_norm": 0.6993013024330139, + "learning_rate": 0.00014056787538337164, + "loss": 2.6194, + "step": 7404 + }, + { + "epoch": 0.5976111693971431, + "grad_norm": 0.7605414986610413, + "learning_rate": 0.0001405534454050771, + "loss": 2.607, + "step": 7405 + }, + { + "epoch": 0.5976918731337261, + "grad_norm": 0.6624562740325928, + "learning_rate": 0.00014053901441608466, + "loss": 2.5962, + "step": 7406 + }, + { + "epoch": 0.597772576870309, + "grad_norm": 0.7432621717453003, + "learning_rate": 0.000140524582416754, + "loss": 2.6434, + "step": 7407 + }, + { + "epoch": 0.5978532806068921, + "grad_norm": 0.7184053659439087, + "learning_rate": 0.00014051014940744488, + "loss": 2.6139, + "step": 7408 + }, + { + "epoch": 0.5979339843434751, + "grad_norm": 0.7567455768585205, + "learning_rate": 0.00014049571538851687, + "loss": 2.5788, + "step": 7409 + }, + { + "epoch": 0.5980146880800581, + "grad_norm": 0.6759883761405945, + "learning_rate": 0.00014048128036032984, + "loss": 2.5584, + "step": 7410 + }, + { + "epoch": 0.5980953918166411, + "grad_norm": 0.7607424855232239, + "learning_rate": 0.00014046684432324343, + "loss": 2.5675, + "step": 7411 + }, + { + "epoch": 0.5981760955532242, + "grad_norm": 0.7134036421775818, + "learning_rate": 0.00014045240727761748, + "loss": 2.6805, + "step": 7412 + }, + { + "epoch": 0.5982567992898071, + "grad_norm": 0.6996984481811523, + "learning_rate": 0.00014043796922381184, + "loss": 2.5874, + "step": 7413 + }, + { + "epoch": 0.5983375030263901, + "grad_norm": 0.7098252177238464, + "learning_rate": 0.00014042353016218627, + "loss": 2.5895, + "step": 7414 + }, + { + "epoch": 0.5984182067629731, + "grad_norm": 0.7160520553588867, + "learning_rate": 0.00014040909009310068, + "loss": 2.6042, + "step": 7415 + }, + { + "epoch": 0.5984989104995562, + "grad_norm": 0.6727281212806702, + "learning_rate": 0.00014039464901691493, + "loss": 2.5356, + "step": 7416 + }, + { + "epoch": 0.5985796142361391, + "grad_norm": 0.7052881717681885, + "learning_rate": 0.00014038020693398891, + "loss": 2.6093, + "step": 7417 + }, + { + "epoch": 0.5986603179727221, + "grad_norm": 0.7151781916618347, + "learning_rate": 0.00014036576384468262, + "loss": 2.5776, + "step": 7418 + }, + { + "epoch": 0.5987410217093051, + "grad_norm": 0.7376574873924255, + "learning_rate": 0.0001403513197493559, + "loss": 2.6246, + "step": 7419 + }, + { + "epoch": 0.5988217254458882, + "grad_norm": 0.6882135272026062, + "learning_rate": 0.00014033687464836892, + "loss": 2.6028, + "step": 7420 + }, + { + "epoch": 0.5989024291824712, + "grad_norm": 0.6603999137878418, + "learning_rate": 0.00014032242854208153, + "loss": 2.5897, + "step": 7421 + }, + { + "epoch": 0.5989831329190541, + "grad_norm": 0.7001559734344482, + "learning_rate": 0.0001403079814308538, + "loss": 2.6033, + "step": 7422 + }, + { + "epoch": 0.5990638366556371, + "grad_norm": 0.7184363603591919, + "learning_rate": 0.00014029353331504582, + "loss": 2.7464, + "step": 7423 + }, + { + "epoch": 0.5991445403922202, + "grad_norm": 0.6794769167900085, + "learning_rate": 0.00014027908419501767, + "loss": 2.569, + "step": 7424 + }, + { + "epoch": 0.5992252441288032, + "grad_norm": 0.6846041083335876, + "learning_rate": 0.00014026463407112942, + "loss": 2.5995, + "step": 7425 + }, + { + "epoch": 0.5993059478653862, + "grad_norm": 0.6539658308029175, + "learning_rate": 0.00014025018294374129, + "loss": 2.5749, + "step": 7426 + }, + { + "epoch": 0.5993866516019691, + "grad_norm": 0.6572301983833313, + "learning_rate": 0.00014023573081321336, + "loss": 2.5312, + "step": 7427 + }, + { + "epoch": 0.5994673553385522, + "grad_norm": 0.7010765671730042, + "learning_rate": 0.00014022127767990581, + "loss": 2.5088, + "step": 7428 + }, + { + "epoch": 0.5995480590751352, + "grad_norm": 0.7193396091461182, + "learning_rate": 0.0001402068235441789, + "loss": 2.6193, + "step": 7429 + }, + { + "epoch": 0.5996287628117182, + "grad_norm": 0.6928533315658569, + "learning_rate": 0.00014019236840639288, + "loss": 2.6149, + "step": 7430 + }, + { + "epoch": 0.5997094665483012, + "grad_norm": 0.743658185005188, + "learning_rate": 0.00014017791226690794, + "loss": 2.5466, + "step": 7431 + }, + { + "epoch": 0.5997901702848842, + "grad_norm": 0.752082347869873, + "learning_rate": 0.0001401634551260844, + "loss": 2.6605, + "step": 7432 + }, + { + "epoch": 0.5998708740214672, + "grad_norm": 0.7280415296554565, + "learning_rate": 0.00014014899698428255, + "loss": 2.6128, + "step": 7433 + }, + { + "epoch": 0.5999515777580502, + "grad_norm": 0.7037710547447205, + "learning_rate": 0.0001401345378418628, + "loss": 2.6157, + "step": 7434 + }, + { + "epoch": 0.6000322814946332, + "grad_norm": 0.6984395980834961, + "learning_rate": 0.00014012007769918542, + "loss": 2.5579, + "step": 7435 + }, + { + "epoch": 0.6001129852312163, + "grad_norm": 0.6853601336479187, + "learning_rate": 0.00014010561655661085, + "loss": 2.6316, + "step": 7436 + }, + { + "epoch": 0.6001936889677992, + "grad_norm": 0.7551750540733337, + "learning_rate": 0.00014009115441449948, + "loss": 2.6671, + "step": 7437 + }, + { + "epoch": 0.6002743927043822, + "grad_norm": 0.7680155038833618, + "learning_rate": 0.0001400766912732117, + "loss": 2.6301, + "step": 7438 + }, + { + "epoch": 0.6003550964409652, + "grad_norm": 0.6757175922393799, + "learning_rate": 0.00014006222713310807, + "loss": 2.5584, + "step": 7439 + }, + { + "epoch": 0.6004358001775483, + "grad_norm": 0.6636163592338562, + "learning_rate": 0.00014004776199454897, + "loss": 2.5437, + "step": 7440 + }, + { + "epoch": 0.6005165039141312, + "grad_norm": 0.7317774891853333, + "learning_rate": 0.00014003329585789498, + "loss": 2.594, + "step": 7441 + }, + { + "epoch": 0.6005972076507142, + "grad_norm": 0.6903451681137085, + "learning_rate": 0.0001400188287235066, + "loss": 2.6175, + "step": 7442 + }, + { + "epoch": 0.6006779113872972, + "grad_norm": 0.7137858867645264, + "learning_rate": 0.00014000436059174437, + "loss": 2.6411, + "step": 7443 + }, + { + "epoch": 0.6007586151238803, + "grad_norm": 0.7124149203300476, + "learning_rate": 0.00013998989146296893, + "loss": 2.6562, + "step": 7444 + }, + { + "epoch": 0.6008393188604633, + "grad_norm": 0.7518175840377808, + "learning_rate": 0.00013997542133754087, + "loss": 2.6213, + "step": 7445 + }, + { + "epoch": 0.6009200225970462, + "grad_norm": 0.6843053698539734, + "learning_rate": 0.0001399609502158208, + "loss": 2.6099, + "step": 7446 + }, + { + "epoch": 0.6010007263336292, + "grad_norm": 0.6668025255203247, + "learning_rate": 0.0001399464780981694, + "loss": 2.609, + "step": 7447 + }, + { + "epoch": 0.6010814300702122, + "grad_norm": 0.6849119067192078, + "learning_rate": 0.00013993200498494735, + "loss": 2.6097, + "step": 7448 + }, + { + "epoch": 0.6011621338067953, + "grad_norm": 0.7767381072044373, + "learning_rate": 0.0001399175308765153, + "loss": 2.6351, + "step": 7449 + }, + { + "epoch": 0.6012428375433783, + "grad_norm": 0.6630256772041321, + "learning_rate": 0.0001399030557732341, + "loss": 2.5924, + "step": 7450 + }, + { + "epoch": 0.6013235412799612, + "grad_norm": 0.6918755769729614, + "learning_rate": 0.00013988857967546444, + "loss": 2.6205, + "step": 7451 + }, + { + "epoch": 0.6014042450165442, + "grad_norm": 0.7179181575775146, + "learning_rate": 0.00013987410258356708, + "loss": 2.5971, + "step": 7452 + }, + { + "epoch": 0.6014849487531273, + "grad_norm": 0.7233672738075256, + "learning_rate": 0.00013985962449790284, + "loss": 2.595, + "step": 7453 + }, + { + "epoch": 0.6015656524897103, + "grad_norm": 0.6861593127250671, + "learning_rate": 0.0001398451454188326, + "loss": 2.6127, + "step": 7454 + }, + { + "epoch": 0.6016463562262933, + "grad_norm": 0.6818981170654297, + "learning_rate": 0.00013983066534671714, + "loss": 2.5923, + "step": 7455 + }, + { + "epoch": 0.6017270599628762, + "grad_norm": 0.700036346912384, + "learning_rate": 0.0001398161842819174, + "loss": 2.5474, + "step": 7456 + }, + { + "epoch": 0.6018077636994593, + "grad_norm": 0.6884824633598328, + "learning_rate": 0.00013980170222479426, + "loss": 2.6041, + "step": 7457 + }, + { + "epoch": 0.6018884674360423, + "grad_norm": 0.6745120286941528, + "learning_rate": 0.00013978721917570866, + "loss": 2.6638, + "step": 7458 + }, + { + "epoch": 0.6019691711726253, + "grad_norm": 0.6886256337165833, + "learning_rate": 0.00013977273513502157, + "loss": 2.5733, + "step": 7459 + }, + { + "epoch": 0.6020498749092082, + "grad_norm": 0.7220930457115173, + "learning_rate": 0.00013975825010309394, + "loss": 2.5739, + "step": 7460 + }, + { + "epoch": 0.6021305786457913, + "grad_norm": 0.7281780242919922, + "learning_rate": 0.0001397437640802868, + "loss": 2.5646, + "step": 7461 + }, + { + "epoch": 0.6022112823823743, + "grad_norm": 0.7316896915435791, + "learning_rate": 0.00013972927706696115, + "loss": 2.6532, + "step": 7462 + }, + { + "epoch": 0.6022919861189573, + "grad_norm": 0.6288646459579468, + "learning_rate": 0.00013971478906347806, + "loss": 2.5753, + "step": 7463 + }, + { + "epoch": 0.6023726898555403, + "grad_norm": 0.7110145688056946, + "learning_rate": 0.00013970030007019862, + "loss": 2.6421, + "step": 7464 + }, + { + "epoch": 0.6024533935921234, + "grad_norm": 0.7437754273414612, + "learning_rate": 0.00013968581008748393, + "loss": 2.585, + "step": 7465 + }, + { + "epoch": 0.6025340973287063, + "grad_norm": 0.6839718222618103, + "learning_rate": 0.00013967131911569514, + "loss": 2.6249, + "step": 7466 + }, + { + "epoch": 0.6026148010652893, + "grad_norm": 0.7358397841453552, + "learning_rate": 0.00013965682715519332, + "loss": 2.597, + "step": 7467 + }, + { + "epoch": 0.6026955048018723, + "grad_norm": 0.673651397228241, + "learning_rate": 0.00013964233420633973, + "loss": 2.6111, + "step": 7468 + }, + { + "epoch": 0.6027762085384554, + "grad_norm": 0.7390083074569702, + "learning_rate": 0.00013962784026949553, + "loss": 2.6131, + "step": 7469 + }, + { + "epoch": 0.6028569122750383, + "grad_norm": 0.6902220249176025, + "learning_rate": 0.00013961334534502197, + "loss": 2.6116, + "step": 7470 + }, + { + "epoch": 0.6029376160116213, + "grad_norm": 0.6946651935577393, + "learning_rate": 0.00013959884943328033, + "loss": 2.6307, + "step": 7471 + }, + { + "epoch": 0.6030183197482043, + "grad_norm": 0.7277294993400574, + "learning_rate": 0.00013958435253463183, + "loss": 2.6065, + "step": 7472 + }, + { + "epoch": 0.6030990234847874, + "grad_norm": 0.743833601474762, + "learning_rate": 0.00013956985464943776, + "loss": 2.6644, + "step": 7473 + }, + { + "epoch": 0.6031797272213704, + "grad_norm": 0.6480288505554199, + "learning_rate": 0.0001395553557780595, + "loss": 2.5386, + "step": 7474 + }, + { + "epoch": 0.6032604309579533, + "grad_norm": 0.799443781375885, + "learning_rate": 0.00013954085592085834, + "loss": 2.5653, + "step": 7475 + }, + { + "epoch": 0.6033411346945363, + "grad_norm": 0.6790705323219299, + "learning_rate": 0.00013952635507819575, + "loss": 2.6229, + "step": 7476 + }, + { + "epoch": 0.6034218384311194, + "grad_norm": 0.6871588826179504, + "learning_rate": 0.00013951185325043302, + "loss": 2.6514, + "step": 7477 + }, + { + "epoch": 0.6035025421677024, + "grad_norm": 0.7236921787261963, + "learning_rate": 0.00013949735043793164, + "loss": 2.5931, + "step": 7478 + }, + { + "epoch": 0.6035832459042854, + "grad_norm": 0.6888518929481506, + "learning_rate": 0.00013948284664105305, + "loss": 2.6408, + "step": 7479 + }, + { + "epoch": 0.6036639496408683, + "grad_norm": 0.7292625904083252, + "learning_rate": 0.00013946834186015868, + "loss": 2.5829, + "step": 7480 + }, + { + "epoch": 0.6037446533774514, + "grad_norm": 0.6755293607711792, + "learning_rate": 0.00013945383609561009, + "loss": 2.5917, + "step": 7481 + }, + { + "epoch": 0.6038253571140344, + "grad_norm": 0.6808032989501953, + "learning_rate": 0.00013943932934776877, + "loss": 2.6103, + "step": 7482 + }, + { + "epoch": 0.6039060608506174, + "grad_norm": 0.747173547744751, + "learning_rate": 0.00013942482161699625, + "loss": 2.624, + "step": 7483 + }, + { + "epoch": 0.6039867645872004, + "grad_norm": 0.7265594005584717, + "learning_rate": 0.00013941031290365413, + "loss": 2.5672, + "step": 7484 + }, + { + "epoch": 0.6040674683237834, + "grad_norm": 0.6434060335159302, + "learning_rate": 0.000139395803208104, + "loss": 2.5885, + "step": 7485 + }, + { + "epoch": 0.6041481720603664, + "grad_norm": 0.7148730754852295, + "learning_rate": 0.00013938129253070747, + "loss": 2.6466, + "step": 7486 + }, + { + "epoch": 0.6042288757969494, + "grad_norm": 0.7724708318710327, + "learning_rate": 0.00013936678087182616, + "loss": 2.6364, + "step": 7487 + }, + { + "epoch": 0.6043095795335324, + "grad_norm": 0.6886702179908752, + "learning_rate": 0.0001393522682318218, + "loss": 2.5844, + "step": 7488 + }, + { + "epoch": 0.6043902832701155, + "grad_norm": 0.6501082181930542, + "learning_rate": 0.00013933775461105603, + "loss": 2.5767, + "step": 7489 + }, + { + "epoch": 0.6044709870066984, + "grad_norm": 0.7333959341049194, + "learning_rate": 0.00013932324000989058, + "loss": 2.5735, + "step": 7490 + }, + { + "epoch": 0.6045516907432814, + "grad_norm": 0.7057361602783203, + "learning_rate": 0.00013930872442868722, + "loss": 2.627, + "step": 7491 + }, + { + "epoch": 0.6046323944798644, + "grad_norm": 0.705078661441803, + "learning_rate": 0.00013929420786780767, + "loss": 2.6012, + "step": 7492 + }, + { + "epoch": 0.6047130982164475, + "grad_norm": 0.7192156314849854, + "learning_rate": 0.00013927969032761378, + "loss": 2.5594, + "step": 7493 + }, + { + "epoch": 0.6047938019530305, + "grad_norm": 0.703116774559021, + "learning_rate": 0.00013926517180846726, + "loss": 2.6099, + "step": 7494 + }, + { + "epoch": 0.6048745056896134, + "grad_norm": 0.6970264315605164, + "learning_rate": 0.00013925065231073006, + "loss": 2.5832, + "step": 7495 + }, + { + "epoch": 0.6049552094261964, + "grad_norm": 0.7308031320571899, + "learning_rate": 0.00013923613183476402, + "loss": 2.586, + "step": 7496 + }, + { + "epoch": 0.6050359131627794, + "grad_norm": 0.7212777137756348, + "learning_rate": 0.00013922161038093097, + "loss": 2.6374, + "step": 7497 + }, + { + "epoch": 0.6051166168993625, + "grad_norm": 0.6644641757011414, + "learning_rate": 0.0001392070879495929, + "loss": 2.5226, + "step": 7498 + }, + { + "epoch": 0.6051973206359454, + "grad_norm": 0.6683016419410706, + "learning_rate": 0.0001391925645411117, + "loss": 2.5279, + "step": 7499 + }, + { + "epoch": 0.6052780243725284, + "grad_norm": 0.7341439127922058, + "learning_rate": 0.00013917804015584932, + "loss": 2.5995, + "step": 7500 + }, + { + "epoch": 0.6053587281091114, + "grad_norm": 0.753942608833313, + "learning_rate": 0.0001391635147941678, + "loss": 2.5706, + "step": 7501 + }, + { + "epoch": 0.6054394318456945, + "grad_norm": 0.7541958093643188, + "learning_rate": 0.00013914898845642908, + "loss": 2.6365, + "step": 7502 + }, + { + "epoch": 0.6055201355822775, + "grad_norm": 0.6583349108695984, + "learning_rate": 0.00013913446114299528, + "loss": 2.534, + "step": 7503 + }, + { + "epoch": 0.6056008393188604, + "grad_norm": 0.6545756459236145, + "learning_rate": 0.00013911993285422835, + "loss": 2.5443, + "step": 7504 + }, + { + "epoch": 0.6056815430554434, + "grad_norm": 0.8290210366249084, + "learning_rate": 0.00013910540359049045, + "loss": 2.6196, + "step": 7505 + }, + { + "epoch": 0.6057622467920265, + "grad_norm": 0.7032577395439148, + "learning_rate": 0.0001390908733521437, + "loss": 2.6575, + "step": 7506 + }, + { + "epoch": 0.6058429505286095, + "grad_norm": 0.7018071413040161, + "learning_rate": 0.0001390763421395502, + "loss": 2.6272, + "step": 7507 + }, + { + "epoch": 0.6059236542651925, + "grad_norm": 0.6288552284240723, + "learning_rate": 0.00013906180995307206, + "loss": 2.5295, + "step": 7508 + }, + { + "epoch": 0.6060043580017754, + "grad_norm": 0.7013774514198303, + "learning_rate": 0.00013904727679307153, + "loss": 2.5669, + "step": 7509 + }, + { + "epoch": 0.6060850617383585, + "grad_norm": 0.6811630129814148, + "learning_rate": 0.00013903274265991082, + "loss": 2.5827, + "step": 7510 + }, + { + "epoch": 0.6061657654749415, + "grad_norm": 0.6690269112586975, + "learning_rate": 0.0001390182075539521, + "loss": 2.5947, + "step": 7511 + }, + { + "epoch": 0.6062464692115245, + "grad_norm": 0.6946289539337158, + "learning_rate": 0.00013900367147555768, + "loss": 2.59, + "step": 7512 + }, + { + "epoch": 0.6063271729481075, + "grad_norm": 0.7302843332290649, + "learning_rate": 0.0001389891344250898, + "loss": 2.5994, + "step": 7513 + }, + { + "epoch": 0.6064078766846905, + "grad_norm": 0.7462306022644043, + "learning_rate": 0.00013897459640291074, + "loss": 2.5983, + "step": 7514 + }, + { + "epoch": 0.6064885804212735, + "grad_norm": 0.6948123574256897, + "learning_rate": 0.0001389600574093829, + "loss": 2.5737, + "step": 7515 + }, + { + "epoch": 0.6065692841578565, + "grad_norm": 0.6897372007369995, + "learning_rate": 0.00013894551744486857, + "loss": 2.607, + "step": 7516 + }, + { + "epoch": 0.6066499878944395, + "grad_norm": 0.6808069348335266, + "learning_rate": 0.00013893097650973015, + "loss": 2.5712, + "step": 7517 + }, + { + "epoch": 0.6067306916310226, + "grad_norm": 0.7000731229782104, + "learning_rate": 0.00013891643460433, + "loss": 2.5654, + "step": 7518 + }, + { + "epoch": 0.6068113953676055, + "grad_norm": 0.7197545766830444, + "learning_rate": 0.0001389018917290306, + "loss": 2.5705, + "step": 7519 + }, + { + "epoch": 0.6068920991041885, + "grad_norm": 0.7001069188117981, + "learning_rate": 0.00013888734788419433, + "loss": 2.5934, + "step": 7520 + }, + { + "epoch": 0.6069728028407715, + "grad_norm": 0.7480459213256836, + "learning_rate": 0.00013887280307018377, + "loss": 2.5211, + "step": 7521 + }, + { + "epoch": 0.6070535065773546, + "grad_norm": 0.6913945078849792, + "learning_rate": 0.00013885825728736132, + "loss": 2.6013, + "step": 7522 + }, + { + "epoch": 0.6071342103139376, + "grad_norm": 0.6527336239814758, + "learning_rate": 0.00013884371053608948, + "loss": 2.5901, + "step": 7523 + }, + { + "epoch": 0.6072149140505205, + "grad_norm": 0.6897335052490234, + "learning_rate": 0.00013882916281673086, + "loss": 2.5389, + "step": 7524 + }, + { + "epoch": 0.6072956177871035, + "grad_norm": 0.7159501910209656, + "learning_rate": 0.00013881461412964798, + "loss": 2.5399, + "step": 7525 + }, + { + "epoch": 0.6073763215236866, + "grad_norm": 0.6744364500045776, + "learning_rate": 0.00013880006447520346, + "loss": 2.5658, + "step": 7526 + }, + { + "epoch": 0.6074570252602696, + "grad_norm": 0.819950520992279, + "learning_rate": 0.00013878551385375994, + "loss": 2.6143, + "step": 7527 + }, + { + "epoch": 0.6075377289968525, + "grad_norm": 0.744293212890625, + "learning_rate": 0.00013877096226568, + "loss": 2.6565, + "step": 7528 + }, + { + "epoch": 0.6076184327334355, + "grad_norm": 0.7121254205703735, + "learning_rate": 0.00013875640971132636, + "loss": 2.6151, + "step": 7529 + }, + { + "epoch": 0.6076991364700186, + "grad_norm": 0.7616204023361206, + "learning_rate": 0.00013874185619106163, + "loss": 2.6395, + "step": 7530 + }, + { + "epoch": 0.6077798402066016, + "grad_norm": 0.7481076121330261, + "learning_rate": 0.0001387273017052486, + "loss": 2.597, + "step": 7531 + }, + { + "epoch": 0.6078605439431846, + "grad_norm": 0.6660816073417664, + "learning_rate": 0.00013871274625425, + "loss": 2.5696, + "step": 7532 + }, + { + "epoch": 0.6079412476797675, + "grad_norm": 0.7491411566734314, + "learning_rate": 0.00013869818983842854, + "loss": 2.552, + "step": 7533 + }, + { + "epoch": 0.6080219514163506, + "grad_norm": 0.7130792140960693, + "learning_rate": 0.00013868363245814704, + "loss": 2.5959, + "step": 7534 + }, + { + "epoch": 0.6081026551529336, + "grad_norm": 0.7157341241836548, + "learning_rate": 0.00013866907411376827, + "loss": 2.5598, + "step": 7535 + }, + { + "epoch": 0.6081833588895166, + "grad_norm": 0.7750656008720398, + "learning_rate": 0.00013865451480565513, + "loss": 2.6217, + "step": 7536 + }, + { + "epoch": 0.6082640626260996, + "grad_norm": 0.6915080547332764, + "learning_rate": 0.00013863995453417043, + "loss": 2.6211, + "step": 7537 + }, + { + "epoch": 0.6083447663626826, + "grad_norm": 0.7245940566062927, + "learning_rate": 0.00013862539329967706, + "loss": 2.5619, + "step": 7538 + }, + { + "epoch": 0.6084254700992656, + "grad_norm": 0.8884119391441345, + "learning_rate": 0.0001386108311025379, + "loss": 2.6349, + "step": 7539 + }, + { + "epoch": 0.6085061738358486, + "grad_norm": 0.7889477610588074, + "learning_rate": 0.0001385962679431159, + "loss": 2.6169, + "step": 7540 + }, + { + "epoch": 0.6085868775724316, + "grad_norm": 0.7187505960464478, + "learning_rate": 0.00013858170382177403, + "loss": 2.5582, + "step": 7541 + }, + { + "epoch": 0.6086675813090147, + "grad_norm": 0.7502198219299316, + "learning_rate": 0.00013856713873887526, + "loss": 2.5418, + "step": 7542 + }, + { + "epoch": 0.6087482850455976, + "grad_norm": 0.797704815864563, + "learning_rate": 0.00013855257269478256, + "loss": 2.5764, + "step": 7543 + }, + { + "epoch": 0.6088289887821806, + "grad_norm": 0.7651431560516357, + "learning_rate": 0.00013853800568985896, + "loss": 2.5995, + "step": 7544 + }, + { + "epoch": 0.6089096925187636, + "grad_norm": 0.7048482298851013, + "learning_rate": 0.00013852343772446753, + "loss": 2.5656, + "step": 7545 + }, + { + "epoch": 0.6089903962553467, + "grad_norm": 0.7252251505851746, + "learning_rate": 0.00013850886879897135, + "loss": 2.6509, + "step": 7546 + }, + { + "epoch": 0.6090710999919297, + "grad_norm": 0.7220067381858826, + "learning_rate": 0.00013849429891373344, + "loss": 2.5558, + "step": 7547 + }, + { + "epoch": 0.6091518037285126, + "grad_norm": 0.7672600746154785, + "learning_rate": 0.000138479728069117, + "loss": 2.5682, + "step": 7548 + }, + { + "epoch": 0.6092325074650956, + "grad_norm": 0.7753601670265198, + "learning_rate": 0.0001384651562654852, + "loss": 2.6459, + "step": 7549 + }, + { + "epoch": 0.6093132112016786, + "grad_norm": 0.7346559166908264, + "learning_rate": 0.00013845058350320108, + "loss": 2.5988, + "step": 7550 + }, + { + "epoch": 0.6093939149382617, + "grad_norm": 0.7386072874069214, + "learning_rate": 0.00013843600978262797, + "loss": 2.6366, + "step": 7551 + }, + { + "epoch": 0.6094746186748446, + "grad_norm": 0.7114188075065613, + "learning_rate": 0.00013842143510412898, + "loss": 2.5515, + "step": 7552 + }, + { + "epoch": 0.6095553224114276, + "grad_norm": 0.6836373209953308, + "learning_rate": 0.00013840685946806742, + "loss": 2.6301, + "step": 7553 + }, + { + "epoch": 0.6096360261480106, + "grad_norm": 0.7548927068710327, + "learning_rate": 0.00013839228287480652, + "loss": 2.6508, + "step": 7554 + }, + { + "epoch": 0.6097167298845937, + "grad_norm": 0.6931679248809814, + "learning_rate": 0.00013837770532470957, + "loss": 2.5535, + "step": 7555 + }, + { + "epoch": 0.6097974336211767, + "grad_norm": 0.7621145248413086, + "learning_rate": 0.00013836312681813988, + "loss": 2.6831, + "step": 7556 + }, + { + "epoch": 0.6098781373577596, + "grad_norm": 0.6735427975654602, + "learning_rate": 0.00013834854735546079, + "loss": 2.5338, + "step": 7557 + }, + { + "epoch": 0.6099588410943426, + "grad_norm": 0.7157600522041321, + "learning_rate": 0.00013833396693703565, + "loss": 2.5713, + "step": 7558 + }, + { + "epoch": 0.6100395448309257, + "grad_norm": 0.718032956123352, + "learning_rate": 0.00013831938556322789, + "loss": 2.5625, + "step": 7559 + }, + { + "epoch": 0.6101202485675087, + "grad_norm": 0.7290309071540833, + "learning_rate": 0.0001383048032344008, + "loss": 2.5956, + "step": 7560 + }, + { + "epoch": 0.6102009523040917, + "grad_norm": 0.675470769405365, + "learning_rate": 0.00013829021995091792, + "loss": 2.6053, + "step": 7561 + }, + { + "epoch": 0.6102816560406746, + "grad_norm": 0.7348767518997192, + "learning_rate": 0.00013827563571314268, + "loss": 2.6174, + "step": 7562 + }, + { + "epoch": 0.6103623597772577, + "grad_norm": 0.64495849609375, + "learning_rate": 0.00013826105052143852, + "loss": 2.5923, + "step": 7563 + }, + { + "epoch": 0.6104430635138407, + "grad_norm": 0.7379264235496521, + "learning_rate": 0.000138246464376169, + "loss": 2.6438, + "step": 7564 + }, + { + "epoch": 0.6105237672504237, + "grad_norm": 0.7802134156227112, + "learning_rate": 0.00013823187727769756, + "loss": 2.5884, + "step": 7565 + }, + { + "epoch": 0.6106044709870067, + "grad_norm": 0.6907222867012024, + "learning_rate": 0.00013821728922638782, + "loss": 2.596, + "step": 7566 + }, + { + "epoch": 0.6106851747235897, + "grad_norm": 0.6924182176589966, + "learning_rate": 0.00013820270022260335, + "loss": 2.5631, + "step": 7567 + }, + { + "epoch": 0.6107658784601727, + "grad_norm": 0.729258120059967, + "learning_rate": 0.0001381881102667077, + "loss": 2.5761, + "step": 7568 + }, + { + "epoch": 0.6108465821967557, + "grad_norm": 0.7141425013542175, + "learning_rate": 0.00013817351935906455, + "loss": 2.6214, + "step": 7569 + }, + { + "epoch": 0.6109272859333387, + "grad_norm": 0.7564505338668823, + "learning_rate": 0.00013815892750003748, + "loss": 2.6338, + "step": 7570 + }, + { + "epoch": 0.6110079896699218, + "grad_norm": 0.674705982208252, + "learning_rate": 0.00013814433468999022, + "loss": 2.5604, + "step": 7571 + }, + { + "epoch": 0.6110886934065047, + "grad_norm": 0.6956657767295837, + "learning_rate": 0.00013812974092928642, + "loss": 2.5805, + "step": 7572 + }, + { + "epoch": 0.6111693971430877, + "grad_norm": 0.7393823862075806, + "learning_rate": 0.0001381151462182898, + "loss": 2.6312, + "step": 7573 + }, + { + "epoch": 0.6112501008796707, + "grad_norm": 0.7048184275627136, + "learning_rate": 0.00013810055055736407, + "loss": 2.5948, + "step": 7574 + }, + { + "epoch": 0.6113308046162538, + "grad_norm": 0.748798668384552, + "learning_rate": 0.0001380859539468731, + "loss": 2.5815, + "step": 7575 + }, + { + "epoch": 0.6114115083528368, + "grad_norm": 0.7146531343460083, + "learning_rate": 0.00013807135638718048, + "loss": 2.5803, + "step": 7576 + }, + { + "epoch": 0.6114922120894197, + "grad_norm": 0.6883770823478699, + "learning_rate": 0.00013805675787865025, + "loss": 2.6005, + "step": 7577 + }, + { + "epoch": 0.6115729158260027, + "grad_norm": 0.7808375358581543, + "learning_rate": 0.0001380421584216461, + "loss": 2.6539, + "step": 7578 + }, + { + "epoch": 0.6116536195625858, + "grad_norm": 0.6919417977333069, + "learning_rate": 0.00013802755801653192, + "loss": 2.5812, + "step": 7579 + }, + { + "epoch": 0.6117343232991688, + "grad_norm": 0.6651085615158081, + "learning_rate": 0.0001380129566636716, + "loss": 2.5952, + "step": 7580 + }, + { + "epoch": 0.6118150270357517, + "grad_norm": 0.7806586623191833, + "learning_rate": 0.00013799835436342897, + "loss": 2.6509, + "step": 7581 + }, + { + "epoch": 0.6118957307723347, + "grad_norm": 0.6522969007492065, + "learning_rate": 0.0001379837511161681, + "loss": 2.606, + "step": 7582 + }, + { + "epoch": 0.6119764345089178, + "grad_norm": 0.7566540837287903, + "learning_rate": 0.0001379691469222528, + "loss": 2.6625, + "step": 7583 + }, + { + "epoch": 0.6120571382455008, + "grad_norm": 0.7126421928405762, + "learning_rate": 0.00013795454178204715, + "loss": 2.6396, + "step": 7584 + }, + { + "epoch": 0.6121378419820838, + "grad_norm": 0.6534276008605957, + "learning_rate": 0.0001379399356959151, + "loss": 2.5841, + "step": 7585 + }, + { + "epoch": 0.6122185457186667, + "grad_norm": 0.7663385272026062, + "learning_rate": 0.00013792532866422065, + "loss": 2.6685, + "step": 7586 + }, + { + "epoch": 0.6122992494552498, + "grad_norm": 0.6971656084060669, + "learning_rate": 0.0001379107206873279, + "loss": 2.6036, + "step": 7587 + }, + { + "epoch": 0.6123799531918328, + "grad_norm": 0.6807122230529785, + "learning_rate": 0.00013789611176560088, + "loss": 2.6499, + "step": 7588 + }, + { + "epoch": 0.6124606569284158, + "grad_norm": 0.6712431311607361, + "learning_rate": 0.0001378815018994037, + "loss": 2.6725, + "step": 7589 + }, + { + "epoch": 0.6125413606649988, + "grad_norm": 0.6986604928970337, + "learning_rate": 0.00013786689108910045, + "loss": 2.6159, + "step": 7590 + }, + { + "epoch": 0.6126220644015818, + "grad_norm": 0.7004108428955078, + "learning_rate": 0.0001378522793350553, + "loss": 2.5743, + "step": 7591 + }, + { + "epoch": 0.6127027681381648, + "grad_norm": 0.6782098412513733, + "learning_rate": 0.00013783766663763239, + "loss": 2.5776, + "step": 7592 + }, + { + "epoch": 0.6127834718747478, + "grad_norm": 0.6697036027908325, + "learning_rate": 0.00013782305299719593, + "loss": 2.6195, + "step": 7593 + }, + { + "epoch": 0.6128641756113308, + "grad_norm": 0.6894395351409912, + "learning_rate": 0.00013780843841411014, + "loss": 2.662, + "step": 7594 + }, + { + "epoch": 0.6129448793479139, + "grad_norm": 0.6775636672973633, + "learning_rate": 0.00013779382288873918, + "loss": 2.6083, + "step": 7595 + }, + { + "epoch": 0.6130255830844968, + "grad_norm": 0.7143577337265015, + "learning_rate": 0.00013777920642144738, + "loss": 2.581, + "step": 7596 + }, + { + "epoch": 0.6131062868210798, + "grad_norm": 0.6143797636032104, + "learning_rate": 0.00013776458901259905, + "loss": 2.541, + "step": 7597 + }, + { + "epoch": 0.6131869905576628, + "grad_norm": 0.7003727555274963, + "learning_rate": 0.00013774997066255839, + "loss": 2.5748, + "step": 7598 + }, + { + "epoch": 0.6132676942942458, + "grad_norm": 0.6796504259109497, + "learning_rate": 0.0001377353513716898, + "loss": 2.596, + "step": 7599 + }, + { + "epoch": 0.6133483980308289, + "grad_norm": 0.7011274695396423, + "learning_rate": 0.00013772073114035762, + "loss": 2.5318, + "step": 7600 + }, + { + "epoch": 0.6134291017674118, + "grad_norm": 0.6584382057189941, + "learning_rate": 0.0001377061099689262, + "loss": 2.5793, + "step": 7601 + }, + { + "epoch": 0.6135098055039948, + "grad_norm": 0.6586211919784546, + "learning_rate": 0.00013769148785775995, + "loss": 2.5969, + "step": 7602 + }, + { + "epoch": 0.6135905092405778, + "grad_norm": 0.7187132835388184, + "learning_rate": 0.0001376768648072233, + "loss": 2.6407, + "step": 7603 + }, + { + "epoch": 0.6136712129771609, + "grad_norm": 0.7394679188728333, + "learning_rate": 0.00013766224081768072, + "loss": 2.5959, + "step": 7604 + }, + { + "epoch": 0.6137519167137439, + "grad_norm": 0.6802375912666321, + "learning_rate": 0.00013764761588949665, + "loss": 2.5956, + "step": 7605 + }, + { + "epoch": 0.6138326204503268, + "grad_norm": 0.6949049234390259, + "learning_rate": 0.00013763299002303553, + "loss": 2.556, + "step": 7606 + }, + { + "epoch": 0.6139133241869098, + "grad_norm": 0.7406589388847351, + "learning_rate": 0.00013761836321866196, + "loss": 2.5495, + "step": 7607 + }, + { + "epoch": 0.6139940279234929, + "grad_norm": 0.742499053478241, + "learning_rate": 0.0001376037354767404, + "loss": 2.589, + "step": 7608 + }, + { + "epoch": 0.6140747316600759, + "grad_norm": 0.7669157385826111, + "learning_rate": 0.00013758910679763551, + "loss": 2.576, + "step": 7609 + }, + { + "epoch": 0.6141554353966588, + "grad_norm": 0.6506752967834473, + "learning_rate": 0.00013757447718171182, + "loss": 2.5792, + "step": 7610 + }, + { + "epoch": 0.6142361391332418, + "grad_norm": 0.698514461517334, + "learning_rate": 0.00013755984662933393, + "loss": 2.5809, + "step": 7611 + }, + { + "epoch": 0.6143168428698249, + "grad_norm": 0.6541082262992859, + "learning_rate": 0.00013754521514086645, + "loss": 2.5755, + "step": 7612 + }, + { + "epoch": 0.6143975466064079, + "grad_norm": 0.6619362235069275, + "learning_rate": 0.0001375305827166741, + "loss": 2.5886, + "step": 7613 + }, + { + "epoch": 0.6144782503429909, + "grad_norm": 0.7205569744110107, + "learning_rate": 0.00013751594935712148, + "loss": 2.6293, + "step": 7614 + }, + { + "epoch": 0.6145589540795738, + "grad_norm": 0.7382494211196899, + "learning_rate": 0.00013750131506257339, + "loss": 2.6977, + "step": 7615 + }, + { + "epoch": 0.6146396578161569, + "grad_norm": 0.7492627501487732, + "learning_rate": 0.00013748667983339444, + "loss": 2.6492, + "step": 7616 + }, + { + "epoch": 0.6147203615527399, + "grad_norm": 0.6627328991889954, + "learning_rate": 0.00013747204366994947, + "loss": 2.5458, + "step": 7617 + }, + { + "epoch": 0.6148010652893229, + "grad_norm": 0.7039626836776733, + "learning_rate": 0.00013745740657260323, + "loss": 2.6578, + "step": 7618 + }, + { + "epoch": 0.6148817690259059, + "grad_norm": 0.6999295353889465, + "learning_rate": 0.00013744276854172046, + "loss": 2.6189, + "step": 7619 + }, + { + "epoch": 0.6149624727624889, + "grad_norm": 0.7604365348815918, + "learning_rate": 0.00013742812957766607, + "loss": 2.5344, + "step": 7620 + }, + { + "epoch": 0.6150431764990719, + "grad_norm": 0.6860831379890442, + "learning_rate": 0.0001374134896808048, + "loss": 2.6309, + "step": 7621 + }, + { + "epoch": 0.6151238802356549, + "grad_norm": 0.6628854274749756, + "learning_rate": 0.0001373988488515016, + "loss": 2.6339, + "step": 7622 + }, + { + "epoch": 0.6152045839722379, + "grad_norm": 0.7112562656402588, + "learning_rate": 0.00013738420709012134, + "loss": 2.6064, + "step": 7623 + }, + { + "epoch": 0.615285287708821, + "grad_norm": 0.7068392634391785, + "learning_rate": 0.0001373695643970289, + "loss": 2.624, + "step": 7624 + }, + { + "epoch": 0.6153659914454039, + "grad_norm": 0.6534786224365234, + "learning_rate": 0.00013735492077258924, + "loss": 2.5582, + "step": 7625 + }, + { + "epoch": 0.6154466951819869, + "grad_norm": 0.7433418035507202, + "learning_rate": 0.00013734027621716729, + "loss": 2.5803, + "step": 7626 + }, + { + "epoch": 0.6155273989185699, + "grad_norm": 0.7172532081604004, + "learning_rate": 0.00013732563073112804, + "loss": 2.5906, + "step": 7627 + }, + { + "epoch": 0.615608102655153, + "grad_norm": 0.6712297201156616, + "learning_rate": 0.00013731098431483653, + "loss": 2.5597, + "step": 7628 + }, + { + "epoch": 0.615688806391736, + "grad_norm": 0.7079061269760132, + "learning_rate": 0.00013729633696865775, + "loss": 2.5538, + "step": 7629 + }, + { + "epoch": 0.6157695101283189, + "grad_norm": 0.6968971490859985, + "learning_rate": 0.00013728168869295678, + "loss": 2.6429, + "step": 7630 + }, + { + "epoch": 0.6158502138649019, + "grad_norm": 0.7123236060142517, + "learning_rate": 0.00013726703948809864, + "loss": 2.5607, + "step": 7631 + }, + { + "epoch": 0.615930917601485, + "grad_norm": 0.6441208124160767, + "learning_rate": 0.00013725238935444843, + "loss": 2.6176, + "step": 7632 + }, + { + "epoch": 0.616011621338068, + "grad_norm": 0.7145917415618896, + "learning_rate": 0.00013723773829237137, + "loss": 2.5698, + "step": 7633 + }, + { + "epoch": 0.616092325074651, + "grad_norm": 0.6397334337234497, + "learning_rate": 0.00013722308630223252, + "loss": 2.596, + "step": 7634 + }, + { + "epoch": 0.6161730288112339, + "grad_norm": 0.6372843980789185, + "learning_rate": 0.00013720843338439702, + "loss": 2.5679, + "step": 7635 + }, + { + "epoch": 0.616253732547817, + "grad_norm": 0.707842230796814, + "learning_rate": 0.00013719377953923012, + "loss": 2.6296, + "step": 7636 + }, + { + "epoch": 0.6163344362844, + "grad_norm": 0.6629409193992615, + "learning_rate": 0.000137179124767097, + "loss": 2.542, + "step": 7637 + }, + { + "epoch": 0.616415140020983, + "grad_norm": 0.753646194934845, + "learning_rate": 0.00013716446906836288, + "loss": 2.5741, + "step": 7638 + }, + { + "epoch": 0.6164958437575659, + "grad_norm": 0.6409948468208313, + "learning_rate": 0.0001371498124433931, + "loss": 2.6723, + "step": 7639 + }, + { + "epoch": 0.616576547494149, + "grad_norm": 0.6489264965057373, + "learning_rate": 0.0001371351548925528, + "loss": 2.5806, + "step": 7640 + }, + { + "epoch": 0.616657251230732, + "grad_norm": 0.6857934594154358, + "learning_rate": 0.00013712049641620745, + "loss": 2.6406, + "step": 7641 + }, + { + "epoch": 0.616737954967315, + "grad_norm": 0.6754183769226074, + "learning_rate": 0.00013710583701472226, + "loss": 2.5576, + "step": 7642 + }, + { + "epoch": 0.616818658703898, + "grad_norm": 0.7083800435066223, + "learning_rate": 0.0001370911766884626, + "loss": 2.5747, + "step": 7643 + }, + { + "epoch": 0.616899362440481, + "grad_norm": 0.7281948924064636, + "learning_rate": 0.0001370765154377939, + "loss": 2.5627, + "step": 7644 + }, + { + "epoch": 0.616980066177064, + "grad_norm": 0.655414342880249, + "learning_rate": 0.00013706185326308148, + "loss": 2.5897, + "step": 7645 + }, + { + "epoch": 0.617060769913647, + "grad_norm": 0.6771859526634216, + "learning_rate": 0.0001370471901646908, + "loss": 2.5761, + "step": 7646 + }, + { + "epoch": 0.61714147365023, + "grad_norm": 0.6813557147979736, + "learning_rate": 0.00013703252614298732, + "loss": 2.5807, + "step": 7647 + }, + { + "epoch": 0.6172221773868131, + "grad_norm": 0.6948046684265137, + "learning_rate": 0.00013701786119833646, + "loss": 2.586, + "step": 7648 + }, + { + "epoch": 0.617302881123396, + "grad_norm": 0.643455982208252, + "learning_rate": 0.00013700319533110377, + "loss": 2.592, + "step": 7649 + }, + { + "epoch": 0.617383584859979, + "grad_norm": 0.7292457818984985, + "learning_rate": 0.0001369885285416547, + "loss": 2.6396, + "step": 7650 + }, + { + "epoch": 0.617464288596562, + "grad_norm": 0.642902672290802, + "learning_rate": 0.00013697386083035478, + "loss": 2.6115, + "step": 7651 + }, + { + "epoch": 0.617544992333145, + "grad_norm": 0.6536445021629333, + "learning_rate": 0.00013695919219756966, + "loss": 2.5406, + "step": 7652 + }, + { + "epoch": 0.6176256960697281, + "grad_norm": 0.6643723249435425, + "learning_rate": 0.0001369445226436648, + "loss": 2.6188, + "step": 7653 + }, + { + "epoch": 0.617706399806311, + "grad_norm": 0.6481621265411377, + "learning_rate": 0.00013692985216900592, + "loss": 2.5489, + "step": 7654 + }, + { + "epoch": 0.617787103542894, + "grad_norm": 0.6828036904335022, + "learning_rate": 0.00013691518077395856, + "loss": 2.5114, + "step": 7655 + }, + { + "epoch": 0.617867807279477, + "grad_norm": 0.6802895665168762, + "learning_rate": 0.00013690050845888838, + "loss": 2.5973, + "step": 7656 + }, + { + "epoch": 0.6179485110160601, + "grad_norm": 0.6980829238891602, + "learning_rate": 0.00013688583522416107, + "loss": 2.6032, + "step": 7657 + }, + { + "epoch": 0.618029214752643, + "grad_norm": 0.7157626748085022, + "learning_rate": 0.00013687116107014236, + "loss": 2.5552, + "step": 7658 + }, + { + "epoch": 0.618109918489226, + "grad_norm": 0.69700688123703, + "learning_rate": 0.00013685648599719792, + "loss": 2.5988, + "step": 7659 + }, + { + "epoch": 0.618190622225809, + "grad_norm": 0.6859539151191711, + "learning_rate": 0.0001368418100056935, + "loss": 2.6268, + "step": 7660 + }, + { + "epoch": 0.6182713259623921, + "grad_norm": 0.6812828183174133, + "learning_rate": 0.00013682713309599487, + "loss": 2.6002, + "step": 7661 + }, + { + "epoch": 0.6183520296989751, + "grad_norm": 0.6461766362190247, + "learning_rate": 0.00013681245526846783, + "loss": 2.6064, + "step": 7662 + }, + { + "epoch": 0.618432733435558, + "grad_norm": 0.7198306322097778, + "learning_rate": 0.00013679777652347814, + "loss": 2.6012, + "step": 7663 + }, + { + "epoch": 0.618513437172141, + "grad_norm": 0.7367191910743713, + "learning_rate": 0.00013678309686139168, + "loss": 2.6661, + "step": 7664 + }, + { + "epoch": 0.6185941409087241, + "grad_norm": 0.6975768804550171, + "learning_rate": 0.0001367684162825743, + "loss": 2.6394, + "step": 7665 + }, + { + "epoch": 0.6186748446453071, + "grad_norm": 0.7545140385627747, + "learning_rate": 0.0001367537347873919, + "loss": 2.624, + "step": 7666 + }, + { + "epoch": 0.6187555483818901, + "grad_norm": 0.6683520674705505, + "learning_rate": 0.0001367390523762103, + "loss": 2.6345, + "step": 7667 + }, + { + "epoch": 0.618836252118473, + "grad_norm": 0.6964975595474243, + "learning_rate": 0.00013672436904939552, + "loss": 2.591, + "step": 7668 + }, + { + "epoch": 0.6189169558550561, + "grad_norm": 0.7033975124359131, + "learning_rate": 0.00013670968480731344, + "loss": 2.566, + "step": 7669 + }, + { + "epoch": 0.6189976595916391, + "grad_norm": 0.706136167049408, + "learning_rate": 0.00013669499965033007, + "loss": 2.6073, + "step": 7670 + }, + { + "epoch": 0.6190783633282221, + "grad_norm": 0.7146300673484802, + "learning_rate": 0.0001366803135788114, + "loss": 2.6602, + "step": 7671 + }, + { + "epoch": 0.6191590670648051, + "grad_norm": 0.7603063583374023, + "learning_rate": 0.00013666562659312342, + "loss": 2.5286, + "step": 7672 + }, + { + "epoch": 0.6192397708013881, + "grad_norm": 0.744955837726593, + "learning_rate": 0.00013665093869363217, + "loss": 2.5678, + "step": 7673 + }, + { + "epoch": 0.6193204745379711, + "grad_norm": 0.7548620104789734, + "learning_rate": 0.00013663624988070373, + "loss": 2.6081, + "step": 7674 + }, + { + "epoch": 0.6194011782745541, + "grad_norm": 0.7367276549339294, + "learning_rate": 0.0001366215601547042, + "loss": 2.5559, + "step": 7675 + }, + { + "epoch": 0.6194818820111371, + "grad_norm": 0.7243839502334595, + "learning_rate": 0.00013660686951599962, + "loss": 2.5545, + "step": 7676 + }, + { + "epoch": 0.6195625857477202, + "grad_norm": 0.7595756649971008, + "learning_rate": 0.00013659217796495616, + "loss": 2.6547, + "step": 7677 + }, + { + "epoch": 0.6196432894843031, + "grad_norm": 0.7566717863082886, + "learning_rate": 0.00013657748550193998, + "loss": 2.6521, + "step": 7678 + }, + { + "epoch": 0.6197239932208861, + "grad_norm": 0.8441942930221558, + "learning_rate": 0.00013656279212731728, + "loss": 2.6325, + "step": 7679 + }, + { + "epoch": 0.6198046969574691, + "grad_norm": 0.7481170296669006, + "learning_rate": 0.00013654809784145418, + "loss": 2.6037, + "step": 7680 + }, + { + "epoch": 0.6198854006940522, + "grad_norm": 0.6626241207122803, + "learning_rate": 0.00013653340264471695, + "loss": 2.6028, + "step": 7681 + }, + { + "epoch": 0.6199661044306352, + "grad_norm": 0.7658020853996277, + "learning_rate": 0.00013651870653747186, + "loss": 2.5553, + "step": 7682 + }, + { + "epoch": 0.6200468081672181, + "grad_norm": 0.8218126893043518, + "learning_rate": 0.0001365040095200851, + "loss": 2.5661, + "step": 7683 + }, + { + "epoch": 0.6201275119038011, + "grad_norm": 0.6481068134307861, + "learning_rate": 0.00013648931159292304, + "loss": 2.5675, + "step": 7684 + }, + { + "epoch": 0.6202082156403842, + "grad_norm": 0.7529950141906738, + "learning_rate": 0.0001364746127563519, + "loss": 2.6137, + "step": 7685 + }, + { + "epoch": 0.6202889193769672, + "grad_norm": 0.7133232355117798, + "learning_rate": 0.00013645991301073816, + "loss": 2.6004, + "step": 7686 + }, + { + "epoch": 0.6203696231135502, + "grad_norm": 0.7809340953826904, + "learning_rate": 0.000136445212356448, + "loss": 2.6317, + "step": 7687 + }, + { + "epoch": 0.6204503268501331, + "grad_norm": 0.7106895446777344, + "learning_rate": 0.00013643051079384789, + "loss": 2.6086, + "step": 7688 + }, + { + "epoch": 0.6205310305867162, + "grad_norm": 0.6960744261741638, + "learning_rate": 0.00013641580832330423, + "loss": 2.5554, + "step": 7689 + }, + { + "epoch": 0.6206117343232992, + "grad_norm": 0.7078820466995239, + "learning_rate": 0.00013640110494518343, + "loss": 2.5902, + "step": 7690 + }, + { + "epoch": 0.6206924380598822, + "grad_norm": 0.7150746583938599, + "learning_rate": 0.00013638640065985195, + "loss": 2.5947, + "step": 7691 + }, + { + "epoch": 0.6207731417964651, + "grad_norm": 0.7507869601249695, + "learning_rate": 0.00013637169546767625, + "loss": 2.559, + "step": 7692 + }, + { + "epoch": 0.6208538455330482, + "grad_norm": 0.7453179359436035, + "learning_rate": 0.00013635698936902282, + "loss": 2.5612, + "step": 7693 + }, + { + "epoch": 0.6209345492696312, + "grad_norm": 0.7174177765846252, + "learning_rate": 0.00013634228236425816, + "loss": 2.6221, + "step": 7694 + }, + { + "epoch": 0.6210152530062142, + "grad_norm": 0.7394092679023743, + "learning_rate": 0.00013632757445374884, + "loss": 2.6045, + "step": 7695 + }, + { + "epoch": 0.6210959567427972, + "grad_norm": 0.7346367239952087, + "learning_rate": 0.0001363128656378614, + "loss": 2.677, + "step": 7696 + }, + { + "epoch": 0.6211766604793802, + "grad_norm": 0.6697696447372437, + "learning_rate": 0.00013629815591696245, + "loss": 2.5741, + "step": 7697 + }, + { + "epoch": 0.6212573642159632, + "grad_norm": 0.6993793845176697, + "learning_rate": 0.00013628344529141852, + "loss": 2.5206, + "step": 7698 + }, + { + "epoch": 0.6213380679525462, + "grad_norm": 0.6946697235107422, + "learning_rate": 0.00013626873376159631, + "loss": 2.6046, + "step": 7699 + }, + { + "epoch": 0.6214187716891292, + "grad_norm": 0.7641928195953369, + "learning_rate": 0.00013625402132786248, + "loss": 2.5459, + "step": 7700 + }, + { + "epoch": 0.6214994754257122, + "grad_norm": 0.6513504981994629, + "learning_rate": 0.00013623930799058363, + "loss": 2.6137, + "step": 7701 + }, + { + "epoch": 0.6215801791622952, + "grad_norm": 0.6745209097862244, + "learning_rate": 0.00013622459375012651, + "loss": 2.5285, + "step": 7702 + }, + { + "epoch": 0.6216608828988782, + "grad_norm": 0.7162348628044128, + "learning_rate": 0.0001362098786068578, + "loss": 2.6224, + "step": 7703 + }, + { + "epoch": 0.6217415866354612, + "grad_norm": 0.7387436032295227, + "learning_rate": 0.00013619516256114427, + "loss": 2.6216, + "step": 7704 + }, + { + "epoch": 0.6218222903720442, + "grad_norm": 0.764955461025238, + "learning_rate": 0.00013618044561335268, + "loss": 2.612, + "step": 7705 + }, + { + "epoch": 0.6219029941086273, + "grad_norm": 0.6492719054222107, + "learning_rate": 0.00013616572776384983, + "loss": 2.5532, + "step": 7706 + }, + { + "epoch": 0.6219836978452102, + "grad_norm": 0.6870293617248535, + "learning_rate": 0.0001361510090130025, + "loss": 2.5705, + "step": 7707 + }, + { + "epoch": 0.6220644015817932, + "grad_norm": 0.6899540424346924, + "learning_rate": 0.0001361362893611775, + "loss": 2.5768, + "step": 7708 + }, + { + "epoch": 0.6221451053183762, + "grad_norm": 0.658941924571991, + "learning_rate": 0.0001361215688087417, + "loss": 2.5664, + "step": 7709 + }, + { + "epoch": 0.6222258090549593, + "grad_norm": 0.6875531673431396, + "learning_rate": 0.000136106847356062, + "loss": 2.6128, + "step": 7710 + }, + { + "epoch": 0.6223065127915423, + "grad_norm": 0.657073974609375, + "learning_rate": 0.0001360921250035053, + "loss": 2.6449, + "step": 7711 + }, + { + "epoch": 0.6223872165281252, + "grad_norm": 0.7051201462745667, + "learning_rate": 0.00013607740175143848, + "loss": 2.5925, + "step": 7712 + }, + { + "epoch": 0.6224679202647082, + "grad_norm": 0.702877938747406, + "learning_rate": 0.0001360626776002285, + "loss": 2.5338, + "step": 7713 + }, + { + "epoch": 0.6225486240012913, + "grad_norm": 0.650935709476471, + "learning_rate": 0.00013604795255024233, + "loss": 2.5799, + "step": 7714 + }, + { + "epoch": 0.6226293277378743, + "grad_norm": 0.7035139203071594, + "learning_rate": 0.00013603322660184694, + "loss": 2.5476, + "step": 7715 + }, + { + "epoch": 0.6227100314744572, + "grad_norm": 0.6549977660179138, + "learning_rate": 0.0001360184997554094, + "loss": 2.6117, + "step": 7716 + }, + { + "epoch": 0.6227907352110402, + "grad_norm": 0.6882792115211487, + "learning_rate": 0.00013600377201129662, + "loss": 2.53, + "step": 7717 + }, + { + "epoch": 0.6228714389476233, + "grad_norm": 0.7390840649604797, + "learning_rate": 0.0001359890433698758, + "loss": 2.6345, + "step": 7718 + }, + { + "epoch": 0.6229521426842063, + "grad_norm": 0.7577612400054932, + "learning_rate": 0.00013597431383151386, + "loss": 2.6386, + "step": 7719 + }, + { + "epoch": 0.6230328464207893, + "grad_norm": 0.6818724870681763, + "learning_rate": 0.00013595958339657804, + "loss": 2.5806, + "step": 7720 + }, + { + "epoch": 0.6231135501573722, + "grad_norm": 0.6954349279403687, + "learning_rate": 0.0001359448520654354, + "loss": 2.5913, + "step": 7721 + }, + { + "epoch": 0.6231942538939553, + "grad_norm": 0.7976544499397278, + "learning_rate": 0.00013593011983845308, + "loss": 2.5686, + "step": 7722 + }, + { + "epoch": 0.6232749576305383, + "grad_norm": 0.7362754940986633, + "learning_rate": 0.00013591538671599824, + "loss": 2.5596, + "step": 7723 + }, + { + "epoch": 0.6233556613671213, + "grad_norm": 0.6842390298843384, + "learning_rate": 0.00013590065269843805, + "loss": 2.5793, + "step": 7724 + }, + { + "epoch": 0.6234363651037043, + "grad_norm": 0.6816275715827942, + "learning_rate": 0.0001358859177861398, + "loss": 2.5948, + "step": 7725 + }, + { + "epoch": 0.6235170688402873, + "grad_norm": 0.6892915964126587, + "learning_rate": 0.00013587118197947066, + "loss": 2.6287, + "step": 7726 + }, + { + "epoch": 0.6235977725768703, + "grad_norm": 0.6851752996444702, + "learning_rate": 0.00013585644527879792, + "loss": 2.5781, + "step": 7727 + }, + { + "epoch": 0.6236784763134533, + "grad_norm": 0.7022164463996887, + "learning_rate": 0.00013584170768448877, + "loss": 2.5856, + "step": 7728 + }, + { + "epoch": 0.6237591800500363, + "grad_norm": 0.6752299070358276, + "learning_rate": 0.0001358269691969106, + "loss": 2.6042, + "step": 7729 + }, + { + "epoch": 0.6238398837866194, + "grad_norm": 0.6861466765403748, + "learning_rate": 0.00013581222981643074, + "loss": 2.5887, + "step": 7730 + }, + { + "epoch": 0.6239205875232023, + "grad_norm": 0.7147940397262573, + "learning_rate": 0.00013579748954341647, + "loss": 2.5796, + "step": 7731 + }, + { + "epoch": 0.6240012912597853, + "grad_norm": 0.6704726219177246, + "learning_rate": 0.0001357827483782352, + "loss": 2.6027, + "step": 7732 + }, + { + "epoch": 0.6240819949963683, + "grad_norm": 0.6984317898750305, + "learning_rate": 0.0001357680063212543, + "loss": 2.635, + "step": 7733 + }, + { + "epoch": 0.6241626987329514, + "grad_norm": 0.6205787658691406, + "learning_rate": 0.00013575326337284115, + "loss": 2.5715, + "step": 7734 + }, + { + "epoch": 0.6242434024695344, + "grad_norm": 0.7214726805686951, + "learning_rate": 0.00013573851953336326, + "loss": 2.5605, + "step": 7735 + }, + { + "epoch": 0.6243241062061173, + "grad_norm": 0.6716169714927673, + "learning_rate": 0.000135723774803188, + "loss": 2.6766, + "step": 7736 + }, + { + "epoch": 0.6244048099427003, + "grad_norm": 0.6446832418441772, + "learning_rate": 0.00013570902918268293, + "loss": 2.5629, + "step": 7737 + }, + { + "epoch": 0.6244855136792834, + "grad_norm": 0.6721374988555908, + "learning_rate": 0.0001356942826722155, + "loss": 2.6093, + "step": 7738 + }, + { + "epoch": 0.6245662174158664, + "grad_norm": 0.7430365681648254, + "learning_rate": 0.0001356795352721532, + "loss": 2.5966, + "step": 7739 + }, + { + "epoch": 0.6246469211524494, + "grad_norm": 0.6787518858909607, + "learning_rate": 0.00013566478698286366, + "loss": 2.5519, + "step": 7740 + }, + { + "epoch": 0.6247276248890323, + "grad_norm": 0.6340047121047974, + "learning_rate": 0.0001356500378047144, + "loss": 2.5181, + "step": 7741 + }, + { + "epoch": 0.6248083286256154, + "grad_norm": 0.7559040188789368, + "learning_rate": 0.000135635287738073, + "loss": 2.6068, + "step": 7742 + }, + { + "epoch": 0.6248890323621984, + "grad_norm": 0.6819902062416077, + "learning_rate": 0.00013562053678330707, + "loss": 2.5754, + "step": 7743 + }, + { + "epoch": 0.6249697360987814, + "grad_norm": 0.6463500261306763, + "learning_rate": 0.00013560578494078423, + "loss": 2.5915, + "step": 7744 + }, + { + "epoch": 0.6250504398353643, + "grad_norm": 0.7510617971420288, + "learning_rate": 0.0001355910322108722, + "loss": 2.5738, + "step": 7745 + }, + { + "epoch": 0.6251311435719474, + "grad_norm": 0.75312739610672, + "learning_rate": 0.00013557627859393855, + "loss": 2.5938, + "step": 7746 + }, + { + "epoch": 0.6252118473085304, + "grad_norm": 0.7784396409988403, + "learning_rate": 0.0001355615240903511, + "loss": 2.6634, + "step": 7747 + }, + { + "epoch": 0.6252925510451134, + "grad_norm": 0.7174746990203857, + "learning_rate": 0.00013554676870047752, + "loss": 2.5973, + "step": 7748 + }, + { + "epoch": 0.6253732547816964, + "grad_norm": 0.6854952573776245, + "learning_rate": 0.0001355320124246855, + "loss": 2.5397, + "step": 7749 + }, + { + "epoch": 0.6254539585182795, + "grad_norm": 0.6584961414337158, + "learning_rate": 0.00013551725526334284, + "loss": 2.5574, + "step": 7750 + }, + { + "epoch": 0.6255346622548624, + "grad_norm": 0.7067389488220215, + "learning_rate": 0.00013550249721681738, + "loss": 2.5524, + "step": 7751 + }, + { + "epoch": 0.6256153659914454, + "grad_norm": 0.6923872232437134, + "learning_rate": 0.00013548773828547686, + "loss": 2.5651, + "step": 7752 + }, + { + "epoch": 0.6256960697280284, + "grad_norm": 0.6612355709075928, + "learning_rate": 0.00013547297846968915, + "loss": 2.6075, + "step": 7753 + }, + { + "epoch": 0.6257767734646114, + "grad_norm": 0.6762828826904297, + "learning_rate": 0.00013545821776982206, + "loss": 2.6136, + "step": 7754 + }, + { + "epoch": 0.6258574772011944, + "grad_norm": 0.6940783858299255, + "learning_rate": 0.0001354434561862435, + "loss": 2.5566, + "step": 7755 + }, + { + "epoch": 0.6259381809377774, + "grad_norm": 0.7874250411987305, + "learning_rate": 0.0001354286937193214, + "loss": 2.6732, + "step": 7756 + }, + { + "epoch": 0.6260188846743604, + "grad_norm": 0.6974111795425415, + "learning_rate": 0.0001354139303694236, + "loss": 2.5455, + "step": 7757 + }, + { + "epoch": 0.6260995884109434, + "grad_norm": 0.6710802316665649, + "learning_rate": 0.0001353991661369181, + "loss": 2.5608, + "step": 7758 + }, + { + "epoch": 0.6261802921475265, + "grad_norm": 0.681635320186615, + "learning_rate": 0.00013538440102217286, + "loss": 2.6107, + "step": 7759 + }, + { + "epoch": 0.6262609958841094, + "grad_norm": 0.7229577898979187, + "learning_rate": 0.0001353696350255558, + "loss": 2.5936, + "step": 7760 + }, + { + "epoch": 0.6263416996206924, + "grad_norm": 0.6909681558609009, + "learning_rate": 0.00013535486814743504, + "loss": 2.5521, + "step": 7761 + }, + { + "epoch": 0.6264224033572754, + "grad_norm": 0.7003746032714844, + "learning_rate": 0.0001353401003881785, + "loss": 2.5606, + "step": 7762 + }, + { + "epoch": 0.6265031070938585, + "grad_norm": 0.6883233785629272, + "learning_rate": 0.0001353253317481543, + "loss": 2.5971, + "step": 7763 + }, + { + "epoch": 0.6265838108304415, + "grad_norm": 0.7382355332374573, + "learning_rate": 0.0001353105622277305, + "loss": 2.5449, + "step": 7764 + }, + { + "epoch": 0.6266645145670244, + "grad_norm": 0.7090556621551514, + "learning_rate": 0.00013529579182727515, + "loss": 2.5988, + "step": 7765 + }, + { + "epoch": 0.6267452183036074, + "grad_norm": 0.6842581629753113, + "learning_rate": 0.00013528102054715643, + "loss": 2.6214, + "step": 7766 + }, + { + "epoch": 0.6268259220401905, + "grad_norm": 0.6969670653343201, + "learning_rate": 0.00013526624838774246, + "loss": 2.5443, + "step": 7767 + }, + { + "epoch": 0.6269066257767735, + "grad_norm": 0.7244827151298523, + "learning_rate": 0.00013525147534940138, + "loss": 2.5967, + "step": 7768 + }, + { + "epoch": 0.6269873295133565, + "grad_norm": 0.7022162675857544, + "learning_rate": 0.0001352367014325014, + "loss": 2.599, + "step": 7769 + }, + { + "epoch": 0.6270680332499394, + "grad_norm": 0.7065250873565674, + "learning_rate": 0.00013522192663741067, + "loss": 2.6105, + "step": 7770 + }, + { + "epoch": 0.6271487369865225, + "grad_norm": 0.6690711975097656, + "learning_rate": 0.0001352071509644975, + "loss": 2.55, + "step": 7771 + }, + { + "epoch": 0.6272294407231055, + "grad_norm": 0.6405982971191406, + "learning_rate": 0.00013519237441413011, + "loss": 2.6078, + "step": 7772 + }, + { + "epoch": 0.6273101444596885, + "grad_norm": 0.7340127229690552, + "learning_rate": 0.00013517759698667672, + "loss": 2.6244, + "step": 7773 + }, + { + "epoch": 0.6273908481962714, + "grad_norm": 0.6609435677528381, + "learning_rate": 0.00013516281868250566, + "loss": 2.5746, + "step": 7774 + }, + { + "epoch": 0.6274715519328545, + "grad_norm": 0.6681997179985046, + "learning_rate": 0.00013514803950198523, + "loss": 2.6181, + "step": 7775 + }, + { + "epoch": 0.6275522556694375, + "grad_norm": 0.7120032906532288, + "learning_rate": 0.0001351332594454838, + "loss": 2.6018, + "step": 7776 + }, + { + "epoch": 0.6276329594060205, + "grad_norm": 0.6618601679801941, + "learning_rate": 0.0001351184785133697, + "loss": 2.5342, + "step": 7777 + }, + { + "epoch": 0.6277136631426035, + "grad_norm": 0.7250192165374756, + "learning_rate": 0.00013510369670601132, + "loss": 2.5795, + "step": 7778 + }, + { + "epoch": 0.6277943668791865, + "grad_norm": 0.7918543219566345, + "learning_rate": 0.00013508891402377708, + "loss": 2.6544, + "step": 7779 + }, + { + "epoch": 0.6278750706157695, + "grad_norm": 0.678895890712738, + "learning_rate": 0.00013507413046703534, + "loss": 2.5937, + "step": 7780 + }, + { + "epoch": 0.6279557743523525, + "grad_norm": 0.7336576581001282, + "learning_rate": 0.00013505934603615457, + "loss": 2.598, + "step": 7781 + }, + { + "epoch": 0.6280364780889355, + "grad_norm": 0.6891419291496277, + "learning_rate": 0.00013504456073150332, + "loss": 2.5063, + "step": 7782 + }, + { + "epoch": 0.6281171818255186, + "grad_norm": 0.7949386835098267, + "learning_rate": 0.00013502977455344997, + "loss": 2.5703, + "step": 7783 + }, + { + "epoch": 0.6281978855621015, + "grad_norm": 0.7917985320091248, + "learning_rate": 0.00013501498750236306, + "loss": 2.639, + "step": 7784 + }, + { + "epoch": 0.6282785892986845, + "grad_norm": 0.7387086749076843, + "learning_rate": 0.00013500019957861113, + "loss": 2.5864, + "step": 7785 + }, + { + "epoch": 0.6283592930352675, + "grad_norm": 0.7189435958862305, + "learning_rate": 0.00013498541078256273, + "loss": 2.5627, + "step": 7786 + }, + { + "epoch": 0.6284399967718506, + "grad_norm": 0.6709900498390198, + "learning_rate": 0.00013497062111458646, + "loss": 2.5973, + "step": 7787 + }, + { + "epoch": 0.6285207005084336, + "grad_norm": 0.6925386190414429, + "learning_rate": 0.0001349558305750509, + "loss": 2.615, + "step": 7788 + }, + { + "epoch": 0.6286014042450165, + "grad_norm": 0.7191932201385498, + "learning_rate": 0.00013494103916432466, + "loss": 2.576, + "step": 7789 + }, + { + "epoch": 0.6286821079815995, + "grad_norm": 0.6798804402351379, + "learning_rate": 0.00013492624688277638, + "loss": 2.5661, + "step": 7790 + }, + { + "epoch": 0.6287628117181826, + "grad_norm": 0.6514562964439392, + "learning_rate": 0.00013491145373077475, + "loss": 2.6135, + "step": 7791 + }, + { + "epoch": 0.6288435154547656, + "grad_norm": 0.7345223426818848, + "learning_rate": 0.00013489665970868838, + "loss": 2.6015, + "step": 7792 + }, + { + "epoch": 0.6289242191913486, + "grad_norm": 0.7102675437927246, + "learning_rate": 0.0001348818648168861, + "loss": 2.5545, + "step": 7793 + }, + { + "epoch": 0.6290049229279315, + "grad_norm": 0.7151654362678528, + "learning_rate": 0.0001348670690557365, + "loss": 2.6464, + "step": 7794 + }, + { + "epoch": 0.6290856266645146, + "grad_norm": 0.7344057559967041, + "learning_rate": 0.00013485227242560844, + "loss": 2.6777, + "step": 7795 + }, + { + "epoch": 0.6291663304010976, + "grad_norm": 0.6622766852378845, + "learning_rate": 0.00013483747492687065, + "loss": 2.5713, + "step": 7796 + }, + { + "epoch": 0.6292470341376806, + "grad_norm": 0.6899346709251404, + "learning_rate": 0.0001348226765598919, + "loss": 2.5188, + "step": 7797 + }, + { + "epoch": 0.6293277378742635, + "grad_norm": 0.6711421012878418, + "learning_rate": 0.000134807877325041, + "loss": 2.5603, + "step": 7798 + }, + { + "epoch": 0.6294084416108466, + "grad_norm": 0.6973204016685486, + "learning_rate": 0.00013479307722268687, + "loss": 2.6621, + "step": 7799 + }, + { + "epoch": 0.6294891453474296, + "grad_norm": 0.7782350778579712, + "learning_rate": 0.00013477827625319824, + "loss": 2.5929, + "step": 7800 + }, + { + "epoch": 0.6295698490840126, + "grad_norm": 0.8703733682632446, + "learning_rate": 0.0001347634744169441, + "loss": 2.6884, + "step": 7801 + }, + { + "epoch": 0.6296505528205956, + "grad_norm": 0.7196036577224731, + "learning_rate": 0.00013474867171429326, + "loss": 2.6002, + "step": 7802 + }, + { + "epoch": 0.6297312565571785, + "grad_norm": 0.7224054932594299, + "learning_rate": 0.00013473386814561475, + "loss": 2.6007, + "step": 7803 + }, + { + "epoch": 0.6298119602937616, + "grad_norm": 0.7615752816200256, + "learning_rate": 0.00013471906371127743, + "loss": 2.6459, + "step": 7804 + }, + { + "epoch": 0.6298926640303446, + "grad_norm": 0.7189914584159851, + "learning_rate": 0.00013470425841165024, + "loss": 2.5692, + "step": 7805 + }, + { + "epoch": 0.6299733677669276, + "grad_norm": 0.7101845741271973, + "learning_rate": 0.00013468945224710225, + "loss": 2.5776, + "step": 7806 + }, + { + "epoch": 0.6300540715035106, + "grad_norm": 0.6860305666923523, + "learning_rate": 0.00013467464521800244, + "loss": 2.5567, + "step": 7807 + }, + { + "epoch": 0.6301347752400936, + "grad_norm": 0.7003797292709351, + "learning_rate": 0.0001346598373247198, + "loss": 2.6444, + "step": 7808 + }, + { + "epoch": 0.6302154789766766, + "grad_norm": 0.6341832876205444, + "learning_rate": 0.00013464502856762344, + "loss": 2.5475, + "step": 7809 + }, + { + "epoch": 0.6302961827132596, + "grad_norm": 0.6255922317504883, + "learning_rate": 0.00013463021894708242, + "loss": 2.5875, + "step": 7810 + }, + { + "epoch": 0.6303768864498426, + "grad_norm": 0.7136420607566833, + "learning_rate": 0.00013461540846346575, + "loss": 2.5708, + "step": 7811 + }, + { + "epoch": 0.6304575901864257, + "grad_norm": 0.7164542078971863, + "learning_rate": 0.00013460059711714267, + "loss": 2.4975, + "step": 7812 + }, + { + "epoch": 0.6305382939230086, + "grad_norm": 0.7667872905731201, + "learning_rate": 0.00013458578490848226, + "loss": 2.6124, + "step": 7813 + }, + { + "epoch": 0.6306189976595916, + "grad_norm": 0.6631812453269958, + "learning_rate": 0.0001345709718378537, + "loss": 2.5318, + "step": 7814 + }, + { + "epoch": 0.6306997013961746, + "grad_norm": 0.696864664554596, + "learning_rate": 0.0001345561579056261, + "loss": 2.6171, + "step": 7815 + }, + { + "epoch": 0.6307804051327577, + "grad_norm": 0.7368598580360413, + "learning_rate": 0.00013454134311216873, + "loss": 2.5734, + "step": 7816 + }, + { + "epoch": 0.6308611088693407, + "grad_norm": 0.7279712557792664, + "learning_rate": 0.00013452652745785083, + "loss": 2.6231, + "step": 7817 + }, + { + "epoch": 0.6309418126059236, + "grad_norm": 0.8070993423461914, + "learning_rate": 0.00013451171094304158, + "loss": 2.5486, + "step": 7818 + }, + { + "epoch": 0.6310225163425066, + "grad_norm": 0.7522621750831604, + "learning_rate": 0.0001344968935681103, + "loss": 2.5576, + "step": 7819 + }, + { + "epoch": 0.6311032200790897, + "grad_norm": 0.8185423612594604, + "learning_rate": 0.00013448207533342624, + "loss": 2.6068, + "step": 7820 + }, + { + "epoch": 0.6311839238156727, + "grad_norm": 0.7542584538459778, + "learning_rate": 0.0001344672562393587, + "loss": 2.643, + "step": 7821 + }, + { + "epoch": 0.6312646275522557, + "grad_norm": 0.7892276644706726, + "learning_rate": 0.00013445243628627712, + "loss": 2.6211, + "step": 7822 + }, + { + "epoch": 0.6313453312888386, + "grad_norm": 0.7216602563858032, + "learning_rate": 0.00013443761547455072, + "loss": 2.5725, + "step": 7823 + }, + { + "epoch": 0.6314260350254217, + "grad_norm": 0.6750743985176086, + "learning_rate": 0.0001344227938045489, + "loss": 2.5319, + "step": 7824 + }, + { + "epoch": 0.6315067387620047, + "grad_norm": 0.6711540222167969, + "learning_rate": 0.0001344079712766411, + "loss": 2.5957, + "step": 7825 + }, + { + "epoch": 0.6315874424985877, + "grad_norm": 0.6923524737358093, + "learning_rate": 0.00013439314789119667, + "loss": 2.6084, + "step": 7826 + }, + { + "epoch": 0.6316681462351706, + "grad_norm": 0.6859166026115417, + "learning_rate": 0.00013437832364858517, + "loss": 2.5608, + "step": 7827 + }, + { + "epoch": 0.6317488499717537, + "grad_norm": 0.7340966463088989, + "learning_rate": 0.0001343634985491759, + "loss": 2.531, + "step": 7828 + }, + { + "epoch": 0.6318295537083367, + "grad_norm": 0.7374520301818848, + "learning_rate": 0.00013434867259333848, + "loss": 2.5972, + "step": 7829 + }, + { + "epoch": 0.6319102574449197, + "grad_norm": 0.7252814769744873, + "learning_rate": 0.00013433384578144232, + "loss": 2.5874, + "step": 7830 + }, + { + "epoch": 0.6319909611815027, + "grad_norm": 0.7000489830970764, + "learning_rate": 0.000134319018113857, + "loss": 2.6137, + "step": 7831 + }, + { + "epoch": 0.6320716649180858, + "grad_norm": 0.805981457233429, + "learning_rate": 0.00013430418959095198, + "loss": 2.5581, + "step": 7832 + }, + { + "epoch": 0.6321523686546687, + "grad_norm": 0.7459721565246582, + "learning_rate": 0.00013428936021309693, + "loss": 2.5284, + "step": 7833 + }, + { + "epoch": 0.6322330723912517, + "grad_norm": 0.749794065952301, + "learning_rate": 0.00013427452998066136, + "loss": 2.5927, + "step": 7834 + }, + { + "epoch": 0.6323137761278347, + "grad_norm": 0.6925346255302429, + "learning_rate": 0.00013425969889401494, + "loss": 2.5703, + "step": 7835 + }, + { + "epoch": 0.6323944798644178, + "grad_norm": 0.6647117137908936, + "learning_rate": 0.00013424486695352728, + "loss": 2.5649, + "step": 7836 + }, + { + "epoch": 0.6324751836010007, + "grad_norm": 0.7358147501945496, + "learning_rate": 0.00013423003415956796, + "loss": 2.6122, + "step": 7837 + }, + { + "epoch": 0.6325558873375837, + "grad_norm": 0.7798088788986206, + "learning_rate": 0.00013421520051250675, + "loss": 2.5805, + "step": 7838 + }, + { + "epoch": 0.6326365910741667, + "grad_norm": 0.7108271718025208, + "learning_rate": 0.00013420036601271334, + "loss": 2.5457, + "step": 7839 + }, + { + "epoch": 0.6327172948107498, + "grad_norm": 0.7108528017997742, + "learning_rate": 0.00013418553066055734, + "loss": 2.6313, + "step": 7840 + }, + { + "epoch": 0.6327979985473328, + "grad_norm": 0.7325249910354614, + "learning_rate": 0.00013417069445640858, + "loss": 2.5598, + "step": 7841 + }, + { + "epoch": 0.6328787022839157, + "grad_norm": 0.6861844062805176, + "learning_rate": 0.0001341558574006368, + "loss": 2.5899, + "step": 7842 + }, + { + "epoch": 0.6329594060204987, + "grad_norm": 0.7576130628585815, + "learning_rate": 0.00013414101949361175, + "loss": 2.6077, + "step": 7843 + }, + { + "epoch": 0.6330401097570818, + "grad_norm": 0.7756128907203674, + "learning_rate": 0.0001341261807357033, + "loss": 2.6111, + "step": 7844 + }, + { + "epoch": 0.6331208134936648, + "grad_norm": 0.7131127715110779, + "learning_rate": 0.00013411134112728114, + "loss": 2.5227, + "step": 7845 + }, + { + "epoch": 0.6332015172302478, + "grad_norm": 0.6517898440361023, + "learning_rate": 0.00013409650066871525, + "loss": 2.5825, + "step": 7846 + }, + { + "epoch": 0.6332822209668307, + "grad_norm": 0.8452722430229187, + "learning_rate": 0.0001340816593603754, + "loss": 2.6037, + "step": 7847 + }, + { + "epoch": 0.6333629247034138, + "grad_norm": 0.7421110272407532, + "learning_rate": 0.00013406681720263153, + "loss": 2.5684, + "step": 7848 + }, + { + "epoch": 0.6334436284399968, + "grad_norm": 0.695139467716217, + "learning_rate": 0.0001340519741958535, + "loss": 2.5648, + "step": 7849 + }, + { + "epoch": 0.6335243321765798, + "grad_norm": 0.7780016660690308, + "learning_rate": 0.0001340371303404113, + "loss": 2.6849, + "step": 7850 + }, + { + "epoch": 0.6336050359131628, + "grad_norm": 0.7276864051818848, + "learning_rate": 0.00013402228563667482, + "loss": 2.6198, + "step": 7851 + }, + { + "epoch": 0.6336857396497458, + "grad_norm": 0.7566827535629272, + "learning_rate": 0.00013400744008501404, + "loss": 2.5803, + "step": 7852 + }, + { + "epoch": 0.6337664433863288, + "grad_norm": 0.7933458089828491, + "learning_rate": 0.00013399259368579894, + "loss": 2.6029, + "step": 7853 + }, + { + "epoch": 0.6338471471229118, + "grad_norm": 0.6849822402000427, + "learning_rate": 0.00013397774643939957, + "loss": 2.5454, + "step": 7854 + }, + { + "epoch": 0.6339278508594948, + "grad_norm": 0.7054651379585266, + "learning_rate": 0.00013396289834618594, + "loss": 2.5905, + "step": 7855 + }, + { + "epoch": 0.6340085545960777, + "grad_norm": 0.7036863565444946, + "learning_rate": 0.00013394804940652813, + "loss": 2.6342, + "step": 7856 + }, + { + "epoch": 0.6340892583326608, + "grad_norm": 0.7101735472679138, + "learning_rate": 0.00013393319962079614, + "loss": 2.6402, + "step": 7857 + }, + { + "epoch": 0.6341699620692438, + "grad_norm": 0.7053956389427185, + "learning_rate": 0.0001339183489893601, + "loss": 2.5841, + "step": 7858 + }, + { + "epoch": 0.6342506658058268, + "grad_norm": 0.7734887003898621, + "learning_rate": 0.0001339034975125902, + "loss": 2.652, + "step": 7859 + }, + { + "epoch": 0.6343313695424098, + "grad_norm": 0.6714119911193848, + "learning_rate": 0.0001338886451908565, + "loss": 2.5927, + "step": 7860 + }, + { + "epoch": 0.6344120732789928, + "grad_norm": 0.6580910682678223, + "learning_rate": 0.00013387379202452917, + "loss": 2.6114, + "step": 7861 + }, + { + "epoch": 0.6344927770155758, + "grad_norm": 0.6810200214385986, + "learning_rate": 0.00013385893801397836, + "loss": 2.5616, + "step": 7862 + }, + { + "epoch": 0.6345734807521588, + "grad_norm": 0.6989572048187256, + "learning_rate": 0.00013384408315957432, + "loss": 2.5954, + "step": 7863 + }, + { + "epoch": 0.6346541844887418, + "grad_norm": 0.7033671736717224, + "learning_rate": 0.00013382922746168728, + "loss": 2.6015, + "step": 7864 + }, + { + "epoch": 0.6347348882253249, + "grad_norm": 0.6873033046722412, + "learning_rate": 0.0001338143709206875, + "loss": 2.562, + "step": 7865 + }, + { + "epoch": 0.6348155919619078, + "grad_norm": 0.7361463904380798, + "learning_rate": 0.00013379951353694513, + "loss": 2.6175, + "step": 7866 + }, + { + "epoch": 0.6348962956984908, + "grad_norm": 0.7623226046562195, + "learning_rate": 0.00013378465531083055, + "loss": 2.7342, + "step": 7867 + }, + { + "epoch": 0.6349769994350738, + "grad_norm": 0.7427035570144653, + "learning_rate": 0.0001337697962427141, + "loss": 2.5468, + "step": 7868 + }, + { + "epoch": 0.6350577031716569, + "grad_norm": 0.6865772008895874, + "learning_rate": 0.00013375493633296598, + "loss": 2.6112, + "step": 7869 + }, + { + "epoch": 0.6351384069082399, + "grad_norm": 0.663567304611206, + "learning_rate": 0.00013374007558195666, + "loss": 2.5896, + "step": 7870 + }, + { + "epoch": 0.6352191106448228, + "grad_norm": 0.6804360151290894, + "learning_rate": 0.00013372521399005643, + "loss": 2.58, + "step": 7871 + }, + { + "epoch": 0.6352998143814058, + "grad_norm": 0.6755216121673584, + "learning_rate": 0.0001337103515576357, + "loss": 2.5593, + "step": 7872 + }, + { + "epoch": 0.6353805181179889, + "grad_norm": 0.8148807883262634, + "learning_rate": 0.00013369548828506491, + "loss": 2.6473, + "step": 7873 + }, + { + "epoch": 0.6354612218545719, + "grad_norm": 0.713009774684906, + "learning_rate": 0.00013368062417271447, + "loss": 2.6002, + "step": 7874 + }, + { + "epoch": 0.6355419255911549, + "grad_norm": 0.6390172839164734, + "learning_rate": 0.00013366575922095484, + "loss": 2.5794, + "step": 7875 + }, + { + "epoch": 0.6356226293277378, + "grad_norm": 0.7228195667266846, + "learning_rate": 0.00013365089343015649, + "loss": 2.6051, + "step": 7876 + }, + { + "epoch": 0.6357033330643209, + "grad_norm": 0.7563474178314209, + "learning_rate": 0.00013363602680068986, + "loss": 2.6308, + "step": 7877 + }, + { + "epoch": 0.6357840368009039, + "grad_norm": 0.7366798520088196, + "learning_rate": 0.00013362115933292557, + "loss": 2.5589, + "step": 7878 + }, + { + "epoch": 0.6358647405374869, + "grad_norm": 0.7137070894241333, + "learning_rate": 0.00013360629102723409, + "loss": 2.6428, + "step": 7879 + }, + { + "epoch": 0.6359454442740698, + "grad_norm": 0.6799132823944092, + "learning_rate": 0.000133591421883986, + "loss": 2.5549, + "step": 7880 + }, + { + "epoch": 0.6360261480106529, + "grad_norm": 0.7031344771385193, + "learning_rate": 0.00013357655190355188, + "loss": 2.6298, + "step": 7881 + }, + { + "epoch": 0.6361068517472359, + "grad_norm": 0.7441670298576355, + "learning_rate": 0.00013356168108630227, + "loss": 2.5844, + "step": 7882 + }, + { + "epoch": 0.6361875554838189, + "grad_norm": 0.7281978726387024, + "learning_rate": 0.00013354680943260784, + "loss": 2.5773, + "step": 7883 + }, + { + "epoch": 0.6362682592204019, + "grad_norm": 0.6969650983810425, + "learning_rate": 0.00013353193694283928, + "loss": 2.6156, + "step": 7884 + }, + { + "epoch": 0.636348962956985, + "grad_norm": 0.6668435335159302, + "learning_rate": 0.00013351706361736714, + "loss": 2.6328, + "step": 7885 + }, + { + "epoch": 0.6364296666935679, + "grad_norm": 0.6909573078155518, + "learning_rate": 0.0001335021894565622, + "loss": 2.5772, + "step": 7886 + }, + { + "epoch": 0.6365103704301509, + "grad_norm": 0.6740022897720337, + "learning_rate": 0.0001334873144607951, + "loss": 2.6435, + "step": 7887 + }, + { + "epoch": 0.6365910741667339, + "grad_norm": 0.7203185558319092, + "learning_rate": 0.0001334724386304366, + "loss": 2.5401, + "step": 7888 + }, + { + "epoch": 0.636671777903317, + "grad_norm": 0.7343020439147949, + "learning_rate": 0.0001334575619658574, + "loss": 2.5811, + "step": 7889 + }, + { + "epoch": 0.6367524816399, + "grad_norm": 0.6941348314285278, + "learning_rate": 0.00013344268446742835, + "loss": 2.6267, + "step": 7890 + }, + { + "epoch": 0.6368331853764829, + "grad_norm": 0.6983792185783386, + "learning_rate": 0.00013342780613552016, + "loss": 2.533, + "step": 7891 + }, + { + "epoch": 0.6369138891130659, + "grad_norm": 0.7093533277511597, + "learning_rate": 0.00013341292697050365, + "loss": 2.6616, + "step": 7892 + }, + { + "epoch": 0.636994592849649, + "grad_norm": 0.7377648949623108, + "learning_rate": 0.00013339804697274965, + "loss": 2.6032, + "step": 7893 + }, + { + "epoch": 0.637075296586232, + "grad_norm": 0.6669821739196777, + "learning_rate": 0.00013338316614262903, + "loss": 2.6082, + "step": 7894 + }, + { + "epoch": 0.6371560003228149, + "grad_norm": 0.6665576100349426, + "learning_rate": 0.00013336828448051263, + "loss": 2.6114, + "step": 7895 + }, + { + "epoch": 0.6372367040593979, + "grad_norm": 0.6893584132194519, + "learning_rate": 0.0001333534019867714, + "loss": 2.5886, + "step": 7896 + }, + { + "epoch": 0.637317407795981, + "grad_norm": 0.7651494741439819, + "learning_rate": 0.00013333851866177617, + "loss": 2.5622, + "step": 7897 + }, + { + "epoch": 0.637398111532564, + "grad_norm": 0.8124055862426758, + "learning_rate": 0.00013332363450589788, + "loss": 2.6036, + "step": 7898 + }, + { + "epoch": 0.637478815269147, + "grad_norm": 0.7394436597824097, + "learning_rate": 0.00013330874951950755, + "loss": 2.6214, + "step": 7899 + }, + { + "epoch": 0.6375595190057299, + "grad_norm": 0.6279659867286682, + "learning_rate": 0.00013329386370297615, + "loss": 2.5652, + "step": 7900 + }, + { + "epoch": 0.637640222742313, + "grad_norm": 0.7289649248123169, + "learning_rate": 0.00013327897705667455, + "loss": 2.5628, + "step": 7901 + }, + { + "epoch": 0.637720926478896, + "grad_norm": 0.7267701625823975, + "learning_rate": 0.0001332640895809739, + "loss": 2.5475, + "step": 7902 + }, + { + "epoch": 0.637801630215479, + "grad_norm": 0.7470490336418152, + "learning_rate": 0.00013324920127624515, + "loss": 2.5054, + "step": 7903 + }, + { + "epoch": 0.637882333952062, + "grad_norm": 0.6963294148445129, + "learning_rate": 0.00013323431214285944, + "loss": 2.5992, + "step": 7904 + }, + { + "epoch": 0.6379630376886449, + "grad_norm": 0.6993808746337891, + "learning_rate": 0.00013321942218118778, + "loss": 2.6044, + "step": 7905 + }, + { + "epoch": 0.638043741425228, + "grad_norm": 0.6620917916297913, + "learning_rate": 0.00013320453139160126, + "loss": 2.5278, + "step": 7906 + }, + { + "epoch": 0.638124445161811, + "grad_norm": 0.6535444855690002, + "learning_rate": 0.00013318963977447106, + "loss": 2.6069, + "step": 7907 + }, + { + "epoch": 0.638205148898394, + "grad_norm": 0.6913008689880371, + "learning_rate": 0.00013317474733016824, + "loss": 2.5271, + "step": 7908 + }, + { + "epoch": 0.638285852634977, + "grad_norm": 0.6760269403457642, + "learning_rate": 0.000133159854059064, + "loss": 2.7029, + "step": 7909 + }, + { + "epoch": 0.63836655637156, + "grad_norm": 0.7026536464691162, + "learning_rate": 0.0001331449599615295, + "loss": 2.592, + "step": 7910 + }, + { + "epoch": 0.638447260108143, + "grad_norm": 0.7935923933982849, + "learning_rate": 0.000133130065037936, + "loss": 2.5674, + "step": 7911 + }, + { + "epoch": 0.638527963844726, + "grad_norm": 0.694675087928772, + "learning_rate": 0.00013311516928865466, + "loss": 2.6727, + "step": 7912 + }, + { + "epoch": 0.638608667581309, + "grad_norm": 0.7378186583518982, + "learning_rate": 0.00013310027271405672, + "loss": 2.5691, + "step": 7913 + }, + { + "epoch": 0.638689371317892, + "grad_norm": 0.7684193849563599, + "learning_rate": 0.00013308537531451345, + "loss": 2.5796, + "step": 7914 + }, + { + "epoch": 0.638770075054475, + "grad_norm": 0.6881510019302368, + "learning_rate": 0.00013307047709039619, + "loss": 2.6, + "step": 7915 + }, + { + "epoch": 0.638850778791058, + "grad_norm": 0.7341364026069641, + "learning_rate": 0.00013305557804207618, + "loss": 2.622, + "step": 7916 + }, + { + "epoch": 0.638931482527641, + "grad_norm": 0.7620663642883301, + "learning_rate": 0.00013304067816992474, + "loss": 2.5571, + "step": 7917 + }, + { + "epoch": 0.6390121862642241, + "grad_norm": 0.6929789781570435, + "learning_rate": 0.00013302577747431322, + "loss": 2.6204, + "step": 7918 + }, + { + "epoch": 0.639092890000807, + "grad_norm": 0.6942943334579468, + "learning_rate": 0.000133010875955613, + "loss": 2.6737, + "step": 7919 + }, + { + "epoch": 0.63917359373739, + "grad_norm": 0.69537752866745, + "learning_rate": 0.0001329959736141955, + "loss": 2.6105, + "step": 7920 + }, + { + "epoch": 0.639254297473973, + "grad_norm": 0.6690821051597595, + "learning_rate": 0.00013298107045043203, + "loss": 2.6279, + "step": 7921 + }, + { + "epoch": 0.6393350012105561, + "grad_norm": 0.7748103141784668, + "learning_rate": 0.00013296616646469412, + "loss": 2.6307, + "step": 7922 + }, + { + "epoch": 0.6394157049471391, + "grad_norm": 0.7509558200836182, + "learning_rate": 0.00013295126165735311, + "loss": 2.6388, + "step": 7923 + }, + { + "epoch": 0.639496408683722, + "grad_norm": 0.7641764283180237, + "learning_rate": 0.0001329363560287806, + "loss": 2.5819, + "step": 7924 + }, + { + "epoch": 0.639577112420305, + "grad_norm": 0.6912327408790588, + "learning_rate": 0.00013292144957934794, + "loss": 2.5588, + "step": 7925 + }, + { + "epoch": 0.6396578161568881, + "grad_norm": 0.7568803429603577, + "learning_rate": 0.0001329065423094267, + "loss": 2.5627, + "step": 7926 + }, + { + "epoch": 0.6397385198934711, + "grad_norm": 0.7272306084632874, + "learning_rate": 0.00013289163421938843, + "loss": 2.6101, + "step": 7927 + }, + { + "epoch": 0.6398192236300541, + "grad_norm": 0.6965963840484619, + "learning_rate": 0.00013287672530960465, + "loss": 2.5967, + "step": 7928 + }, + { + "epoch": 0.639899927366637, + "grad_norm": 0.7729843854904175, + "learning_rate": 0.00013286181558044694, + "loss": 2.6222, + "step": 7929 + }, + { + "epoch": 0.6399806311032201, + "grad_norm": 0.6876606941223145, + "learning_rate": 0.00013284690503228687, + "loss": 2.6162, + "step": 7930 + }, + { + "epoch": 0.6400613348398031, + "grad_norm": 0.7555204629898071, + "learning_rate": 0.0001328319936654961, + "loss": 2.588, + "step": 7931 + }, + { + "epoch": 0.6401420385763861, + "grad_norm": 0.7324720621109009, + "learning_rate": 0.0001328170814804462, + "loss": 2.6111, + "step": 7932 + }, + { + "epoch": 0.640222742312969, + "grad_norm": 0.6802392601966858, + "learning_rate": 0.0001328021684775088, + "loss": 2.5955, + "step": 7933 + }, + { + "epoch": 0.6403034460495521, + "grad_norm": 0.7564330697059631, + "learning_rate": 0.00013278725465705568, + "loss": 2.5355, + "step": 7934 + }, + { + "epoch": 0.6403841497861351, + "grad_norm": 0.6916235089302063, + "learning_rate": 0.00013277234001945844, + "loss": 2.6037, + "step": 7935 + }, + { + "epoch": 0.6404648535227181, + "grad_norm": 0.688819169998169, + "learning_rate": 0.00013275742456508885, + "loss": 2.5626, + "step": 7936 + }, + { + "epoch": 0.6405455572593011, + "grad_norm": 0.6647922992706299, + "learning_rate": 0.0001327425082943186, + "loss": 2.6166, + "step": 7937 + }, + { + "epoch": 0.6406262609958842, + "grad_norm": 0.6792626976966858, + "learning_rate": 0.00013272759120751943, + "loss": 2.6206, + "step": 7938 + }, + { + "epoch": 0.6407069647324671, + "grad_norm": 0.6482827663421631, + "learning_rate": 0.00013271267330506312, + "loss": 2.5558, + "step": 7939 + }, + { + "epoch": 0.6407876684690501, + "grad_norm": 0.6628372073173523, + "learning_rate": 0.0001326977545873215, + "loss": 2.5904, + "step": 7940 + }, + { + "epoch": 0.6408683722056331, + "grad_norm": 0.7168916463851929, + "learning_rate": 0.00013268283505466635, + "loss": 2.5189, + "step": 7941 + }, + { + "epoch": 0.6409490759422162, + "grad_norm": 0.6691678762435913, + "learning_rate": 0.00013266791470746957, + "loss": 2.608, + "step": 7942 + }, + { + "epoch": 0.6410297796787991, + "grad_norm": 0.6850359439849854, + "learning_rate": 0.00013265299354610292, + "loss": 2.5929, + "step": 7943 + }, + { + "epoch": 0.6411104834153821, + "grad_norm": 0.6807669401168823, + "learning_rate": 0.0001326380715709383, + "loss": 2.6016, + "step": 7944 + }, + { + "epoch": 0.6411911871519651, + "grad_norm": 0.6450446844100952, + "learning_rate": 0.00013262314878234767, + "loss": 2.6129, + "step": 7945 + }, + { + "epoch": 0.6412718908885482, + "grad_norm": 0.679115355014801, + "learning_rate": 0.00013260822518070285, + "loss": 2.6049, + "step": 7946 + }, + { + "epoch": 0.6413525946251312, + "grad_norm": 0.7082008123397827, + "learning_rate": 0.00013259330076637583, + "loss": 2.5673, + "step": 7947 + }, + { + "epoch": 0.6414332983617141, + "grad_norm": 0.7357851266860962, + "learning_rate": 0.00013257837553973855, + "loss": 2.6118, + "step": 7948 + }, + { + "epoch": 0.6415140020982971, + "grad_norm": 0.687035083770752, + "learning_rate": 0.000132563449501163, + "loss": 2.5359, + "step": 7949 + }, + { + "epoch": 0.6415947058348802, + "grad_norm": 0.6950698494911194, + "learning_rate": 0.00013254852265102117, + "loss": 2.5527, + "step": 7950 + }, + { + "epoch": 0.6416754095714632, + "grad_norm": 0.6878959536552429, + "learning_rate": 0.00013253359498968507, + "loss": 2.611, + "step": 7951 + }, + { + "epoch": 0.6417561133080462, + "grad_norm": 0.7224605083465576, + "learning_rate": 0.00013251866651752675, + "loss": 2.5459, + "step": 7952 + }, + { + "epoch": 0.6418368170446291, + "grad_norm": 0.7299731969833374, + "learning_rate": 0.00013250373723491826, + "loss": 2.5651, + "step": 7953 + }, + { + "epoch": 0.6419175207812122, + "grad_norm": 0.7663037776947021, + "learning_rate": 0.00013248880714223163, + "loss": 2.6073, + "step": 7954 + }, + { + "epoch": 0.6419982245177952, + "grad_norm": 0.6532007455825806, + "learning_rate": 0.00013247387623983902, + "loss": 2.6087, + "step": 7955 + }, + { + "epoch": 0.6420789282543782, + "grad_norm": 0.7520449757575989, + "learning_rate": 0.00013245894452811255, + "loss": 2.5998, + "step": 7956 + }, + { + "epoch": 0.6421596319909612, + "grad_norm": 0.7196050882339478, + "learning_rate": 0.0001324440120074243, + "loss": 2.6448, + "step": 7957 + }, + { + "epoch": 0.6422403357275441, + "grad_norm": 0.7093806862831116, + "learning_rate": 0.0001324290786781465, + "loss": 2.5935, + "step": 7958 + }, + { + "epoch": 0.6423210394641272, + "grad_norm": 0.695541501045227, + "learning_rate": 0.00013241414454065125, + "loss": 2.5872, + "step": 7959 + }, + { + "epoch": 0.6424017432007102, + "grad_norm": 0.6763006448745728, + "learning_rate": 0.0001323992095953108, + "loss": 2.572, + "step": 7960 + }, + { + "epoch": 0.6424824469372932, + "grad_norm": 0.6403522491455078, + "learning_rate": 0.00013238427384249738, + "loss": 2.6137, + "step": 7961 + }, + { + "epoch": 0.6425631506738761, + "grad_norm": 0.6647571325302124, + "learning_rate": 0.00013236933728258315, + "loss": 2.5904, + "step": 7962 + }, + { + "epoch": 0.6426438544104592, + "grad_norm": 0.6931071877479553, + "learning_rate": 0.0001323543999159405, + "loss": 2.6085, + "step": 7963 + }, + { + "epoch": 0.6427245581470422, + "grad_norm": 0.6899439096450806, + "learning_rate": 0.00013233946174294155, + "loss": 2.5555, + "step": 7964 + }, + { + "epoch": 0.6428052618836252, + "grad_norm": 0.6564984321594238, + "learning_rate": 0.0001323245227639587, + "loss": 2.576, + "step": 7965 + }, + { + "epoch": 0.6428859656202082, + "grad_norm": 0.7427607774734497, + "learning_rate": 0.00013230958297936427, + "loss": 2.6178, + "step": 7966 + }, + { + "epoch": 0.6429666693567913, + "grad_norm": 0.6884508728981018, + "learning_rate": 0.00013229464238953054, + "loss": 2.6519, + "step": 7967 + }, + { + "epoch": 0.6430473730933742, + "grad_norm": 0.692442774772644, + "learning_rate": 0.00013227970099482993, + "loss": 2.5784, + "step": 7968 + }, + { + "epoch": 0.6431280768299572, + "grad_norm": 0.6637876629829407, + "learning_rate": 0.00013226475879563477, + "loss": 2.5785, + "step": 7969 + }, + { + "epoch": 0.6432087805665402, + "grad_norm": 0.6844972372055054, + "learning_rate": 0.0001322498157923175, + "loss": 2.5745, + "step": 7970 + }, + { + "epoch": 0.6432894843031233, + "grad_norm": 0.7259756922721863, + "learning_rate": 0.0001322348719852505, + "loss": 2.5696, + "step": 7971 + }, + { + "epoch": 0.6433701880397062, + "grad_norm": 0.6719023585319519, + "learning_rate": 0.00013221992737480625, + "loss": 2.6049, + "step": 7972 + }, + { + "epoch": 0.6434508917762892, + "grad_norm": 0.7160155773162842, + "learning_rate": 0.00013220498196135717, + "loss": 2.572, + "step": 7973 + }, + { + "epoch": 0.6435315955128722, + "grad_norm": 0.6920225620269775, + "learning_rate": 0.00013219003574527576, + "loss": 2.6576, + "step": 7974 + }, + { + "epoch": 0.6436122992494553, + "grad_norm": 0.698518693447113, + "learning_rate": 0.0001321750887269345, + "loss": 2.6074, + "step": 7975 + }, + { + "epoch": 0.6436930029860383, + "grad_norm": 0.7607932090759277, + "learning_rate": 0.00013216014090670594, + "loss": 2.6173, + "step": 7976 + }, + { + "epoch": 0.6437737067226212, + "grad_norm": 0.8130847811698914, + "learning_rate": 0.0001321451922849626, + "loss": 2.6023, + "step": 7977 + }, + { + "epoch": 0.6438544104592042, + "grad_norm": 0.676675021648407, + "learning_rate": 0.00013213024286207702, + "loss": 2.6174, + "step": 7978 + }, + { + "epoch": 0.6439351141957873, + "grad_norm": 0.7018851041793823, + "learning_rate": 0.00013211529263842183, + "loss": 2.5713, + "step": 7979 + }, + { + "epoch": 0.6440158179323703, + "grad_norm": 0.796097457408905, + "learning_rate": 0.00013210034161436954, + "loss": 2.5937, + "step": 7980 + }, + { + "epoch": 0.6440965216689533, + "grad_norm": 0.7118527293205261, + "learning_rate": 0.0001320853897902929, + "loss": 2.5721, + "step": 7981 + }, + { + "epoch": 0.6441772254055362, + "grad_norm": 0.7282249331474304, + "learning_rate": 0.00013207043716656445, + "loss": 2.5975, + "step": 7982 + }, + { + "epoch": 0.6442579291421193, + "grad_norm": 0.6710900664329529, + "learning_rate": 0.00013205548374355686, + "loss": 2.5809, + "step": 7983 + }, + { + "epoch": 0.6443386328787023, + "grad_norm": 0.7045658230781555, + "learning_rate": 0.00013204052952164278, + "loss": 2.5715, + "step": 7984 + }, + { + "epoch": 0.6444193366152853, + "grad_norm": 0.719507098197937, + "learning_rate": 0.00013202557450119504, + "loss": 2.5948, + "step": 7985 + }, + { + "epoch": 0.6445000403518683, + "grad_norm": 0.7603922486305237, + "learning_rate": 0.0001320106186825862, + "loss": 2.6176, + "step": 7986 + }, + { + "epoch": 0.6445807440884513, + "grad_norm": 0.7057444453239441, + "learning_rate": 0.0001319956620661891, + "loss": 2.5905, + "step": 7987 + }, + { + "epoch": 0.6446614478250343, + "grad_norm": 0.7884874939918518, + "learning_rate": 0.00013198070465237645, + "loss": 2.5892, + "step": 7988 + }, + { + "epoch": 0.6447421515616173, + "grad_norm": 0.6932834386825562, + "learning_rate": 0.00013196574644152103, + "loss": 2.6032, + "step": 7989 + }, + { + "epoch": 0.6448228552982003, + "grad_norm": 0.7361180186271667, + "learning_rate": 0.00013195078743399568, + "loss": 2.5877, + "step": 7990 + }, + { + "epoch": 0.6449035590347834, + "grad_norm": 0.6843615174293518, + "learning_rate": 0.00013193582763017315, + "loss": 2.5804, + "step": 7991 + }, + { + "epoch": 0.6449842627713663, + "grad_norm": 0.7592078447341919, + "learning_rate": 0.00013192086703042635, + "loss": 2.6464, + "step": 7992 + }, + { + "epoch": 0.6450649665079493, + "grad_norm": 0.7362154126167297, + "learning_rate": 0.0001319059056351281, + "loss": 2.6154, + "step": 7993 + }, + { + "epoch": 0.6451456702445323, + "grad_norm": 0.6721758246421814, + "learning_rate": 0.00013189094344465125, + "loss": 2.5735, + "step": 7994 + }, + { + "epoch": 0.6452263739811154, + "grad_norm": 0.6221550107002258, + "learning_rate": 0.00013187598045936874, + "loss": 2.5612, + "step": 7995 + }, + { + "epoch": 0.6453070777176984, + "grad_norm": 0.7225528359413147, + "learning_rate": 0.00013186101667965344, + "loss": 2.6263, + "step": 7996 + }, + { + "epoch": 0.6453877814542813, + "grad_norm": 0.7599418759346008, + "learning_rate": 0.00013184605210587837, + "loss": 2.5814, + "step": 7997 + }, + { + "epoch": 0.6454684851908643, + "grad_norm": 0.6778777837753296, + "learning_rate": 0.00013183108673841642, + "loss": 2.6158, + "step": 7998 + }, + { + "epoch": 0.6455491889274474, + "grad_norm": 0.6860963106155396, + "learning_rate": 0.00013181612057764058, + "loss": 2.6207, + "step": 7999 + }, + { + "epoch": 0.6456298926640304, + "grad_norm": 0.6615182757377625, + "learning_rate": 0.00013180115362392382, + "loss": 2.5571, + "step": 8000 + }, + { + "epoch": 0.6456298926640304, + "eval_loss": 2.5128066539764404, + "eval_runtime": 754.3655, + "eval_samples_per_second": 3.473, + "eval_steps_per_second": 0.579, + "step": 8000 + }, + { + "epoch": 0.6457105964006133, + "grad_norm": 0.688169538974762, + "learning_rate": 0.0001317861858776392, + "loss": 2.6513, + "step": 8001 + }, + { + "epoch": 0.6457913001371963, + "grad_norm": 0.6726182103157043, + "learning_rate": 0.00013177121733915975, + "loss": 2.5909, + "step": 8002 + }, + { + "epoch": 0.6458720038737794, + "grad_norm": 0.7348085641860962, + "learning_rate": 0.00013175624800885853, + "loss": 2.577, + "step": 8003 + }, + { + "epoch": 0.6459527076103624, + "grad_norm": 0.677435040473938, + "learning_rate": 0.00013174127788710856, + "loss": 2.5056, + "step": 8004 + }, + { + "epoch": 0.6460334113469454, + "grad_norm": 0.6864951848983765, + "learning_rate": 0.000131726306974283, + "loss": 2.5733, + "step": 8005 + }, + { + "epoch": 0.6461141150835283, + "grad_norm": 0.7070075869560242, + "learning_rate": 0.0001317113352707549, + "loss": 2.5359, + "step": 8006 + }, + { + "epoch": 0.6461948188201113, + "grad_norm": 0.7065049409866333, + "learning_rate": 0.00013169636277689746, + "loss": 2.6261, + "step": 8007 + }, + { + "epoch": 0.6462755225566944, + "grad_norm": 0.6691577434539795, + "learning_rate": 0.0001316813894930838, + "loss": 2.6015, + "step": 8008 + }, + { + "epoch": 0.6463562262932774, + "grad_norm": 0.6754019260406494, + "learning_rate": 0.0001316664154196871, + "loss": 2.5954, + "step": 8009 + }, + { + "epoch": 0.6464369300298604, + "grad_norm": 0.6172776818275452, + "learning_rate": 0.00013165144055708055, + "loss": 2.5599, + "step": 8010 + }, + { + "epoch": 0.6465176337664433, + "grad_norm": 0.6778094172477722, + "learning_rate": 0.00013163646490563737, + "loss": 2.5407, + "step": 8011 + }, + { + "epoch": 0.6465983375030264, + "grad_norm": 0.7363924980163574, + "learning_rate": 0.00013162148846573076, + "loss": 2.6075, + "step": 8012 + }, + { + "epoch": 0.6466790412396094, + "grad_norm": 0.6662711501121521, + "learning_rate": 0.00013160651123773404, + "loss": 2.5611, + "step": 8013 + }, + { + "epoch": 0.6467597449761924, + "grad_norm": 0.699670135974884, + "learning_rate": 0.00013159153322202043, + "loss": 2.5612, + "step": 8014 + }, + { + "epoch": 0.6468404487127754, + "grad_norm": 0.7382899522781372, + "learning_rate": 0.0001315765544189632, + "loss": 2.6017, + "step": 8015 + }, + { + "epoch": 0.6469211524493584, + "grad_norm": 0.7624868154525757, + "learning_rate": 0.0001315615748289357, + "loss": 2.6174, + "step": 8016 + }, + { + "epoch": 0.6470018561859414, + "grad_norm": 0.704622745513916, + "learning_rate": 0.00013154659445231129, + "loss": 2.5367, + "step": 8017 + }, + { + "epoch": 0.6470825599225244, + "grad_norm": 0.7117413878440857, + "learning_rate": 0.00013153161328946324, + "loss": 2.5958, + "step": 8018 + }, + { + "epoch": 0.6471632636591074, + "grad_norm": 0.6825408339500427, + "learning_rate": 0.00013151663134076497, + "loss": 2.5118, + "step": 8019 + }, + { + "epoch": 0.6472439673956905, + "grad_norm": 0.6732384562492371, + "learning_rate": 0.00013150164860658986, + "loss": 2.6312, + "step": 8020 + }, + { + "epoch": 0.6473246711322734, + "grad_norm": 0.712812602519989, + "learning_rate": 0.00013148666508731134, + "loss": 2.576, + "step": 8021 + }, + { + "epoch": 0.6474053748688564, + "grad_norm": 0.8128857612609863, + "learning_rate": 0.0001314716807833028, + "loss": 2.5333, + "step": 8022 + }, + { + "epoch": 0.6474860786054394, + "grad_norm": 0.7817162275314331, + "learning_rate": 0.00013145669569493773, + "loss": 2.6835, + "step": 8023 + }, + { + "epoch": 0.6475667823420225, + "grad_norm": 0.7164301872253418, + "learning_rate": 0.00013144170982258956, + "loss": 2.5573, + "step": 8024 + }, + { + "epoch": 0.6476474860786054, + "grad_norm": 0.67625892162323, + "learning_rate": 0.00013142672316663177, + "loss": 2.5976, + "step": 8025 + }, + { + "epoch": 0.6477281898151884, + "grad_norm": 0.6919494867324829, + "learning_rate": 0.0001314117357274379, + "loss": 2.6179, + "step": 8026 + }, + { + "epoch": 0.6478088935517714, + "grad_norm": 0.6787464618682861, + "learning_rate": 0.0001313967475053815, + "loss": 2.5405, + "step": 8027 + }, + { + "epoch": 0.6478895972883545, + "grad_norm": 0.6305621862411499, + "learning_rate": 0.00013138175850083605, + "loss": 2.6016, + "step": 8028 + }, + { + "epoch": 0.6479703010249375, + "grad_norm": 0.7456182837486267, + "learning_rate": 0.00013136676871417516, + "loss": 2.6091, + "step": 8029 + }, + { + "epoch": 0.6480510047615204, + "grad_norm": 0.7047890424728394, + "learning_rate": 0.00013135177814577238, + "loss": 2.6108, + "step": 8030 + }, + { + "epoch": 0.6481317084981034, + "grad_norm": 0.7509389519691467, + "learning_rate": 0.00013133678679600133, + "loss": 2.6396, + "step": 8031 + }, + { + "epoch": 0.6482124122346865, + "grad_norm": 0.63836270570755, + "learning_rate": 0.00013132179466523566, + "loss": 2.5759, + "step": 8032 + }, + { + "epoch": 0.6482931159712695, + "grad_norm": 0.6994885206222534, + "learning_rate": 0.000131306801753849, + "loss": 2.61, + "step": 8033 + }, + { + "epoch": 0.6483738197078525, + "grad_norm": 0.6762083768844604, + "learning_rate": 0.00013129180806221497, + "loss": 2.5431, + "step": 8034 + }, + { + "epoch": 0.6484545234444354, + "grad_norm": 0.6890944242477417, + "learning_rate": 0.0001312768135907073, + "loss": 2.5922, + "step": 8035 + }, + { + "epoch": 0.6485352271810185, + "grad_norm": 0.7409473061561584, + "learning_rate": 0.0001312618183396997, + "loss": 2.6132, + "step": 8036 + }, + { + "epoch": 0.6486159309176015, + "grad_norm": 0.6660643815994263, + "learning_rate": 0.00013124682230956585, + "loss": 2.5816, + "step": 8037 + }, + { + "epoch": 0.6486966346541845, + "grad_norm": 0.714235246181488, + "learning_rate": 0.0001312318255006795, + "loss": 2.5613, + "step": 8038 + }, + { + "epoch": 0.6487773383907675, + "grad_norm": 0.6568472385406494, + "learning_rate": 0.00013121682791341442, + "loss": 2.6382, + "step": 8039 + }, + { + "epoch": 0.6488580421273505, + "grad_norm": 0.6874251961708069, + "learning_rate": 0.00013120182954814438, + "loss": 2.593, + "step": 8040 + }, + { + "epoch": 0.6489387458639335, + "grad_norm": 0.7620158791542053, + "learning_rate": 0.0001311868304052432, + "loss": 2.589, + "step": 8041 + }, + { + "epoch": 0.6490194496005165, + "grad_norm": 0.6755926609039307, + "learning_rate": 0.00013117183048508467, + "loss": 2.5876, + "step": 8042 + }, + { + "epoch": 0.6491001533370995, + "grad_norm": 0.6952808499336243, + "learning_rate": 0.00013115682978804264, + "loss": 2.5909, + "step": 8043 + }, + { + "epoch": 0.6491808570736826, + "grad_norm": 0.6599535346031189, + "learning_rate": 0.00013114182831449098, + "loss": 2.6031, + "step": 8044 + }, + { + "epoch": 0.6492615608102655, + "grad_norm": 0.7816598415374756, + "learning_rate": 0.00013112682606480355, + "loss": 2.5633, + "step": 8045 + }, + { + "epoch": 0.6493422645468485, + "grad_norm": 0.7188639640808105, + "learning_rate": 0.00013111182303935425, + "loss": 2.6292, + "step": 8046 + }, + { + "epoch": 0.6494229682834315, + "grad_norm": 0.7131505608558655, + "learning_rate": 0.00013109681923851698, + "loss": 2.5729, + "step": 8047 + }, + { + "epoch": 0.6495036720200146, + "grad_norm": 0.7466408014297485, + "learning_rate": 0.00013108181466266568, + "loss": 2.5742, + "step": 8048 + }, + { + "epoch": 0.6495843757565976, + "grad_norm": 0.6707943677902222, + "learning_rate": 0.00013106680931217437, + "loss": 2.5506, + "step": 8049 + }, + { + "epoch": 0.6496650794931805, + "grad_norm": 0.6913424730300903, + "learning_rate": 0.0001310518031874169, + "loss": 2.5639, + "step": 8050 + }, + { + "epoch": 0.6497457832297635, + "grad_norm": 0.8261755704879761, + "learning_rate": 0.00013103679628876733, + "loss": 2.601, + "step": 8051 + }, + { + "epoch": 0.6498264869663466, + "grad_norm": 0.7410566806793213, + "learning_rate": 0.0001310217886165997, + "loss": 2.5326, + "step": 8052 + }, + { + "epoch": 0.6499071907029296, + "grad_norm": 0.7032365202903748, + "learning_rate": 0.00013100678017128798, + "loss": 2.5907, + "step": 8053 + }, + { + "epoch": 0.6499878944395125, + "grad_norm": 0.7074568271636963, + "learning_rate": 0.00013099177095320626, + "loss": 2.6193, + "step": 8054 + }, + { + "epoch": 0.6500685981760955, + "grad_norm": 0.7754546999931335, + "learning_rate": 0.00013097676096272855, + "loss": 2.5832, + "step": 8055 + }, + { + "epoch": 0.6501493019126786, + "grad_norm": 0.7475717663764954, + "learning_rate": 0.00013096175020022903, + "loss": 2.6233, + "step": 8056 + }, + { + "epoch": 0.6502300056492616, + "grad_norm": 0.7863949537277222, + "learning_rate": 0.00013094673866608173, + "loss": 2.5745, + "step": 8057 + }, + { + "epoch": 0.6503107093858446, + "grad_norm": 0.69294673204422, + "learning_rate": 0.0001309317263606608, + "loss": 2.5982, + "step": 8058 + }, + { + "epoch": 0.6503914131224275, + "grad_norm": 0.7096135020256042, + "learning_rate": 0.00013091671328434046, + "loss": 2.5944, + "step": 8059 + }, + { + "epoch": 0.6504721168590105, + "grad_norm": 0.7001097202301025, + "learning_rate": 0.00013090169943749476, + "loss": 2.5435, + "step": 8060 + }, + { + "epoch": 0.6505528205955936, + "grad_norm": 0.7522539496421814, + "learning_rate": 0.00013088668482049792, + "loss": 2.5843, + "step": 8061 + }, + { + "epoch": 0.6506335243321766, + "grad_norm": 0.6675420999526978, + "learning_rate": 0.00013087166943372418, + "loss": 2.5623, + "step": 8062 + }, + { + "epoch": 0.6507142280687596, + "grad_norm": 0.7779181599617004, + "learning_rate": 0.00013085665327754772, + "loss": 2.6087, + "step": 8063 + }, + { + "epoch": 0.6507949318053425, + "grad_norm": 0.7385239005088806, + "learning_rate": 0.00013084163635234284, + "loss": 2.5725, + "step": 8064 + }, + { + "epoch": 0.6508756355419256, + "grad_norm": 0.6966612339019775, + "learning_rate": 0.00013082661865848375, + "loss": 2.5745, + "step": 8065 + }, + { + "epoch": 0.6509563392785086, + "grad_norm": 0.7098337411880493, + "learning_rate": 0.00013081160019634468, + "loss": 2.5461, + "step": 8066 + }, + { + "epoch": 0.6510370430150916, + "grad_norm": 0.6514503359794617, + "learning_rate": 0.00013079658096630002, + "loss": 2.5869, + "step": 8067 + }, + { + "epoch": 0.6511177467516746, + "grad_norm": 0.680422306060791, + "learning_rate": 0.0001307815609687241, + "loss": 2.6316, + "step": 8068 + }, + { + "epoch": 0.6511984504882576, + "grad_norm": 0.6892665028572083, + "learning_rate": 0.00013076654020399117, + "loss": 2.5862, + "step": 8069 + }, + { + "epoch": 0.6512791542248406, + "grad_norm": 0.7605568170547485, + "learning_rate": 0.00013075151867247568, + "loss": 2.5342, + "step": 8070 + }, + { + "epoch": 0.6513598579614236, + "grad_norm": 0.7571204900741577, + "learning_rate": 0.00013073649637455192, + "loss": 2.5762, + "step": 8071 + }, + { + "epoch": 0.6514405616980066, + "grad_norm": 0.6910812258720398, + "learning_rate": 0.00013072147331059431, + "loss": 2.6635, + "step": 8072 + }, + { + "epoch": 0.6515212654345897, + "grad_norm": 0.765559196472168, + "learning_rate": 0.00013070644948097733, + "loss": 2.5885, + "step": 8073 + }, + { + "epoch": 0.6516019691711726, + "grad_norm": 0.7533665299415588, + "learning_rate": 0.00013069142488607532, + "loss": 2.6545, + "step": 8074 + }, + { + "epoch": 0.6516826729077556, + "grad_norm": 0.685089647769928, + "learning_rate": 0.0001306763995262628, + "loss": 2.5955, + "step": 8075 + }, + { + "epoch": 0.6517633766443386, + "grad_norm": 0.7280653715133667, + "learning_rate": 0.00013066137340191422, + "loss": 2.5548, + "step": 8076 + }, + { + "epoch": 0.6518440803809217, + "grad_norm": 0.6881482601165771, + "learning_rate": 0.00013064634651340404, + "loss": 2.6143, + "step": 8077 + }, + { + "epoch": 0.6519247841175047, + "grad_norm": 0.6878265142440796, + "learning_rate": 0.0001306313188611068, + "loss": 2.5681, + "step": 8078 + }, + { + "epoch": 0.6520054878540876, + "grad_norm": 0.685238242149353, + "learning_rate": 0.00013061629044539702, + "loss": 2.5517, + "step": 8079 + }, + { + "epoch": 0.6520861915906706, + "grad_norm": 0.6689820885658264, + "learning_rate": 0.00013060126126664928, + "loss": 2.6201, + "step": 8080 + }, + { + "epoch": 0.6521668953272537, + "grad_norm": 0.7128999829292297, + "learning_rate": 0.00013058623132523807, + "loss": 2.5829, + "step": 8081 + }, + { + "epoch": 0.6522475990638367, + "grad_norm": 0.6835216879844666, + "learning_rate": 0.00013057120062153805, + "loss": 2.6312, + "step": 8082 + }, + { + "epoch": 0.6523283028004196, + "grad_norm": 0.7140012383460999, + "learning_rate": 0.00013055616915592382, + "loss": 2.6148, + "step": 8083 + }, + { + "epoch": 0.6524090065370026, + "grad_norm": 0.7378252148628235, + "learning_rate": 0.00013054113692876994, + "loss": 2.5805, + "step": 8084 + }, + { + "epoch": 0.6524897102735857, + "grad_norm": 0.7569258213043213, + "learning_rate": 0.0001305261039404511, + "loss": 2.6088, + "step": 8085 + }, + { + "epoch": 0.6525704140101687, + "grad_norm": 0.6909007430076599, + "learning_rate": 0.00013051107019134195, + "loss": 2.5285, + "step": 8086 + }, + { + "epoch": 0.6526511177467517, + "grad_norm": 0.6785587072372437, + "learning_rate": 0.0001304960356818172, + "loss": 2.5527, + "step": 8087 + }, + { + "epoch": 0.6527318214833346, + "grad_norm": 0.7058801054954529, + "learning_rate": 0.0001304810004122515, + "loss": 2.6789, + "step": 8088 + }, + { + "epoch": 0.6528125252199177, + "grad_norm": 0.6920512318611145, + "learning_rate": 0.0001304659643830196, + "loss": 2.5748, + "step": 8089 + }, + { + "epoch": 0.6528932289565007, + "grad_norm": 0.6829244494438171, + "learning_rate": 0.00013045092759449625, + "loss": 2.5389, + "step": 8090 + }, + { + "epoch": 0.6529739326930837, + "grad_norm": 0.6942421793937683, + "learning_rate": 0.00013043589004705614, + "loss": 2.5851, + "step": 8091 + }, + { + "epoch": 0.6530546364296667, + "grad_norm": 0.6473072171211243, + "learning_rate": 0.0001304208517410741, + "loss": 2.56, + "step": 8092 + }, + { + "epoch": 0.6531353401662497, + "grad_norm": 0.6692056655883789, + "learning_rate": 0.00013040581267692494, + "loss": 2.5977, + "step": 8093 + }, + { + "epoch": 0.6532160439028327, + "grad_norm": 0.6918915510177612, + "learning_rate": 0.00013039077285498344, + "loss": 2.551, + "step": 8094 + }, + { + "epoch": 0.6532967476394157, + "grad_norm": 0.7432852387428284, + "learning_rate": 0.00013037573227562443, + "loss": 2.5537, + "step": 8095 + }, + { + "epoch": 0.6533774513759987, + "grad_norm": 0.6737081408500671, + "learning_rate": 0.0001303606909392228, + "loss": 2.5947, + "step": 8096 + }, + { + "epoch": 0.6534581551125818, + "grad_norm": 0.6810599565505981, + "learning_rate": 0.0001303456488461533, + "loss": 2.5704, + "step": 8097 + }, + { + "epoch": 0.6535388588491647, + "grad_norm": 0.675240159034729, + "learning_rate": 0.00013033060599679098, + "loss": 2.591, + "step": 8098 + }, + { + "epoch": 0.6536195625857477, + "grad_norm": 0.6888695359230042, + "learning_rate": 0.00013031556239151066, + "loss": 2.5403, + "step": 8099 + }, + { + "epoch": 0.6537002663223307, + "grad_norm": 0.7154796719551086, + "learning_rate": 0.00013030051803068727, + "loss": 2.5654, + "step": 8100 + }, + { + "epoch": 0.6537809700589138, + "grad_norm": 0.6655243635177612, + "learning_rate": 0.0001302854729146958, + "loss": 2.5867, + "step": 8101 + }, + { + "epoch": 0.6538616737954968, + "grad_norm": 0.7070788145065308, + "learning_rate": 0.00013027042704391115, + "loss": 2.5593, + "step": 8102 + }, + { + "epoch": 0.6539423775320797, + "grad_norm": 0.7071834206581116, + "learning_rate": 0.0001302553804187083, + "loss": 2.536, + "step": 8103 + }, + { + "epoch": 0.6540230812686627, + "grad_norm": 0.7086542248725891, + "learning_rate": 0.00013024033303946233, + "loss": 2.5644, + "step": 8104 + }, + { + "epoch": 0.6541037850052458, + "grad_norm": 0.6714556813240051, + "learning_rate": 0.00013022528490654818, + "loss": 2.5167, + "step": 8105 + }, + { + "epoch": 0.6541844887418288, + "grad_norm": 0.6905114054679871, + "learning_rate": 0.00013021023602034095, + "loss": 2.5227, + "step": 8106 + }, + { + "epoch": 0.6542651924784118, + "grad_norm": 0.7050586342811584, + "learning_rate": 0.00013019518638121563, + "loss": 2.5725, + "step": 8107 + }, + { + "epoch": 0.6543458962149947, + "grad_norm": 0.6940500736236572, + "learning_rate": 0.00013018013598954737, + "loss": 2.5912, + "step": 8108 + }, + { + "epoch": 0.6544265999515777, + "grad_norm": 0.7136965990066528, + "learning_rate": 0.00013016508484571122, + "loss": 2.6101, + "step": 8109 + }, + { + "epoch": 0.6545073036881608, + "grad_norm": 0.7205774188041687, + "learning_rate": 0.0001301500329500823, + "loss": 2.5869, + "step": 8110 + }, + { + "epoch": 0.6545880074247438, + "grad_norm": 0.6831154823303223, + "learning_rate": 0.00013013498030303575, + "loss": 2.5309, + "step": 8111 + }, + { + "epoch": 0.6546687111613267, + "grad_norm": 0.6778538823127747, + "learning_rate": 0.0001301199269049467, + "loss": 2.6297, + "step": 8112 + }, + { + "epoch": 0.6547494148979097, + "grad_norm": 0.705055832862854, + "learning_rate": 0.00013010487275619034, + "loss": 2.6188, + "step": 8113 + }, + { + "epoch": 0.6548301186344928, + "grad_norm": 0.6927980780601501, + "learning_rate": 0.00013008981785714188, + "loss": 2.5744, + "step": 8114 + }, + { + "epoch": 0.6549108223710758, + "grad_norm": 0.7070884108543396, + "learning_rate": 0.0001300747622081765, + "loss": 2.618, + "step": 8115 + }, + { + "epoch": 0.6549915261076588, + "grad_norm": 0.723479688167572, + "learning_rate": 0.0001300597058096694, + "loss": 2.5928, + "step": 8116 + }, + { + "epoch": 0.6550722298442417, + "grad_norm": 0.6689562201499939, + "learning_rate": 0.00013004464866199587, + "loss": 2.5592, + "step": 8117 + }, + { + "epoch": 0.6551529335808248, + "grad_norm": 0.6685079336166382, + "learning_rate": 0.00013002959076553115, + "loss": 2.558, + "step": 8118 + }, + { + "epoch": 0.6552336373174078, + "grad_norm": 0.678105890750885, + "learning_rate": 0.00013001453212065057, + "loss": 2.6176, + "step": 8119 + }, + { + "epoch": 0.6553143410539908, + "grad_norm": 0.7355597019195557, + "learning_rate": 0.00012999947272772933, + "loss": 2.6293, + "step": 8120 + }, + { + "epoch": 0.6553950447905738, + "grad_norm": 0.735862672328949, + "learning_rate": 0.00012998441258714284, + "loss": 2.635, + "step": 8121 + }, + { + "epoch": 0.6554757485271568, + "grad_norm": 0.6766025424003601, + "learning_rate": 0.0001299693516992664, + "loss": 2.5829, + "step": 8122 + }, + { + "epoch": 0.6555564522637398, + "grad_norm": 0.6701885461807251, + "learning_rate": 0.00012995429006447542, + "loss": 2.5996, + "step": 8123 + }, + { + "epoch": 0.6556371560003228, + "grad_norm": 0.6814082264900208, + "learning_rate": 0.00012993922768314518, + "loss": 2.5906, + "step": 8124 + }, + { + "epoch": 0.6557178597369058, + "grad_norm": 0.7104958295822144, + "learning_rate": 0.00012992416455565113, + "loss": 2.6708, + "step": 8125 + }, + { + "epoch": 0.6557985634734889, + "grad_norm": 0.6451221108436584, + "learning_rate": 0.0001299091006823687, + "loss": 2.5512, + "step": 8126 + }, + { + "epoch": 0.6558792672100718, + "grad_norm": 0.6736068725585938, + "learning_rate": 0.0001298940360636733, + "loss": 2.5839, + "step": 8127 + }, + { + "epoch": 0.6559599709466548, + "grad_norm": 0.6873149871826172, + "learning_rate": 0.00012987897069994031, + "loss": 2.5804, + "step": 8128 + }, + { + "epoch": 0.6560406746832378, + "grad_norm": 0.6937728524208069, + "learning_rate": 0.00012986390459154533, + "loss": 2.5648, + "step": 8129 + }, + { + "epoch": 0.6561213784198209, + "grad_norm": 0.7109464406967163, + "learning_rate": 0.00012984883773886377, + "loss": 2.6132, + "step": 8130 + }, + { + "epoch": 0.6562020821564039, + "grad_norm": 0.7134159803390503, + "learning_rate": 0.00012983377014227115, + "loss": 2.6029, + "step": 8131 + }, + { + "epoch": 0.6562827858929868, + "grad_norm": 0.6788110733032227, + "learning_rate": 0.000129818701802143, + "loss": 2.6344, + "step": 8132 + }, + { + "epoch": 0.6563634896295698, + "grad_norm": 0.6798231601715088, + "learning_rate": 0.00012980363271885483, + "loss": 2.5758, + "step": 8133 + }, + { + "epoch": 0.6564441933661529, + "grad_norm": 0.6586930155754089, + "learning_rate": 0.00012978856289278226, + "loss": 2.5918, + "step": 8134 + }, + { + "epoch": 0.6565248971027359, + "grad_norm": 0.6614218950271606, + "learning_rate": 0.0001297734923243008, + "loss": 2.5777, + "step": 8135 + }, + { + "epoch": 0.6566056008393188, + "grad_norm": 0.6874340176582336, + "learning_rate": 0.0001297584210137861, + "loss": 2.5528, + "step": 8136 + }, + { + "epoch": 0.6566863045759018, + "grad_norm": 0.6972174048423767, + "learning_rate": 0.00012974334896161376, + "loss": 2.6551, + "step": 8137 + }, + { + "epoch": 0.6567670083124849, + "grad_norm": 0.7414106726646423, + "learning_rate": 0.0001297282761681594, + "loss": 2.5719, + "step": 8138 + }, + { + "epoch": 0.6568477120490679, + "grad_norm": 0.6678279042243958, + "learning_rate": 0.00012971320263379868, + "loss": 2.555, + "step": 8139 + }, + { + "epoch": 0.6569284157856509, + "grad_norm": 0.692149817943573, + "learning_rate": 0.0001296981283589073, + "loss": 2.5991, + "step": 8140 + }, + { + "epoch": 0.6570091195222338, + "grad_norm": 0.6937025189399719, + "learning_rate": 0.00012968305334386094, + "loss": 2.5635, + "step": 8141 + }, + { + "epoch": 0.6570898232588169, + "grad_norm": 0.6250358819961548, + "learning_rate": 0.00012966797758903528, + "loss": 2.55, + "step": 8142 + }, + { + "epoch": 0.6571705269953999, + "grad_norm": 0.7388221025466919, + "learning_rate": 0.00012965290109480607, + "loss": 2.5307, + "step": 8143 + }, + { + "epoch": 0.6572512307319829, + "grad_norm": 0.7165891528129578, + "learning_rate": 0.00012963782386154904, + "loss": 2.5482, + "step": 8144 + }, + { + "epoch": 0.6573319344685659, + "grad_norm": 0.7605282068252563, + "learning_rate": 0.00012962274588963996, + "loss": 2.5839, + "step": 8145 + }, + { + "epoch": 0.657412638205149, + "grad_norm": 0.7259613275527954, + "learning_rate": 0.00012960766717945465, + "loss": 2.5612, + "step": 8146 + }, + { + "epoch": 0.6574933419417319, + "grad_norm": 0.7301480770111084, + "learning_rate": 0.00012959258773136885, + "loss": 2.5365, + "step": 8147 + }, + { + "epoch": 0.6575740456783149, + "grad_norm": 0.6800966262817383, + "learning_rate": 0.0001295775075457584, + "loss": 2.5663, + "step": 8148 + }, + { + "epoch": 0.6576547494148979, + "grad_norm": 0.6968960165977478, + "learning_rate": 0.0001295624266229992, + "loss": 2.5626, + "step": 8149 + }, + { + "epoch": 0.657735453151481, + "grad_norm": 0.9044952392578125, + "learning_rate": 0.00012954734496346704, + "loss": 2.6479, + "step": 8150 + }, + { + "epoch": 0.6578161568880639, + "grad_norm": 0.6955156922340393, + "learning_rate": 0.00012953226256753777, + "loss": 2.5879, + "step": 8151 + }, + { + "epoch": 0.6578968606246469, + "grad_norm": 0.6535033583641052, + "learning_rate": 0.00012951717943558735, + "loss": 2.5372, + "step": 8152 + }, + { + "epoch": 0.6579775643612299, + "grad_norm": 0.720730721950531, + "learning_rate": 0.0001295020955679916, + "loss": 2.5813, + "step": 8153 + }, + { + "epoch": 0.658058268097813, + "grad_norm": 0.7190384268760681, + "learning_rate": 0.00012948701096512655, + "loss": 2.5923, + "step": 8154 + }, + { + "epoch": 0.658138971834396, + "grad_norm": 0.6624464988708496, + "learning_rate": 0.0001294719256273681, + "loss": 2.5548, + "step": 8155 + }, + { + "epoch": 0.6582196755709789, + "grad_norm": 0.7839831709861755, + "learning_rate": 0.00012945683955509224, + "loss": 2.531, + "step": 8156 + }, + { + "epoch": 0.6583003793075619, + "grad_norm": 0.694970965385437, + "learning_rate": 0.00012944175274867497, + "loss": 2.4693, + "step": 8157 + }, + { + "epoch": 0.658381083044145, + "grad_norm": 0.7409366965293884, + "learning_rate": 0.0001294266652084922, + "loss": 2.5706, + "step": 8158 + }, + { + "epoch": 0.658461786780728, + "grad_norm": 0.7502163052558899, + "learning_rate": 0.00012941157693492002, + "loss": 2.6137, + "step": 8159 + }, + { + "epoch": 0.658542490517311, + "grad_norm": 0.6627129912376404, + "learning_rate": 0.00012939648792833447, + "loss": 2.5781, + "step": 8160 + }, + { + "epoch": 0.6586231942538939, + "grad_norm": 0.6775660514831543, + "learning_rate": 0.00012938139818911157, + "loss": 2.5441, + "step": 8161 + }, + { + "epoch": 0.6587038979904769, + "grad_norm": 0.7150553464889526, + "learning_rate": 0.00012936630771762748, + "loss": 2.5763, + "step": 8162 + }, + { + "epoch": 0.65878460172706, + "grad_norm": 0.7461466193199158, + "learning_rate": 0.0001293512165142582, + "loss": 2.54, + "step": 8163 + }, + { + "epoch": 0.658865305463643, + "grad_norm": 0.7635199427604675, + "learning_rate": 0.00012933612457937988, + "loss": 2.5763, + "step": 8164 + }, + { + "epoch": 0.658946009200226, + "grad_norm": 0.7360543608665466, + "learning_rate": 0.00012932103191336865, + "loss": 2.5968, + "step": 8165 + }, + { + "epoch": 0.6590267129368089, + "grad_norm": 0.6482167840003967, + "learning_rate": 0.0001293059385166007, + "loss": 2.5704, + "step": 8166 + }, + { + "epoch": 0.659107416673392, + "grad_norm": 0.7024737596511841, + "learning_rate": 0.00012929084438945208, + "loss": 2.6221, + "step": 8167 + }, + { + "epoch": 0.659188120409975, + "grad_norm": 0.7192068696022034, + "learning_rate": 0.0001292757495322991, + "loss": 2.5574, + "step": 8168 + }, + { + "epoch": 0.659268824146558, + "grad_norm": 0.6900508403778076, + "learning_rate": 0.0001292606539455179, + "loss": 2.5969, + "step": 8169 + }, + { + "epoch": 0.6593495278831409, + "grad_norm": 0.7522475719451904, + "learning_rate": 0.00012924555762948474, + "loss": 2.592, + "step": 8170 + }, + { + "epoch": 0.659430231619724, + "grad_norm": 0.6610947251319885, + "learning_rate": 0.00012923046058457583, + "loss": 2.5404, + "step": 8171 + }, + { + "epoch": 0.659510935356307, + "grad_norm": 0.667628288269043, + "learning_rate": 0.00012921536281116738, + "loss": 2.5551, + "step": 8172 + }, + { + "epoch": 0.65959163909289, + "grad_norm": 0.7119980454444885, + "learning_rate": 0.00012920026430963578, + "loss": 2.6002, + "step": 8173 + }, + { + "epoch": 0.659672342829473, + "grad_norm": 0.712166428565979, + "learning_rate": 0.00012918516508035724, + "loss": 2.626, + "step": 8174 + }, + { + "epoch": 0.659753046566056, + "grad_norm": 0.6993290185928345, + "learning_rate": 0.0001291700651237081, + "loss": 2.6311, + "step": 8175 + }, + { + "epoch": 0.659833750302639, + "grad_norm": 0.6889405250549316, + "learning_rate": 0.0001291549644400647, + "loss": 2.6483, + "step": 8176 + }, + { + "epoch": 0.659914454039222, + "grad_norm": 0.7120937705039978, + "learning_rate": 0.00012913986302980334, + "loss": 2.5489, + "step": 8177 + }, + { + "epoch": 0.659995157775805, + "grad_norm": 0.7112947106361389, + "learning_rate": 0.00012912476089330043, + "loss": 2.6393, + "step": 8178 + }, + { + "epoch": 0.6600758615123881, + "grad_norm": 0.710342526435852, + "learning_rate": 0.00012910965803093237, + "loss": 2.5897, + "step": 8179 + }, + { + "epoch": 0.660156565248971, + "grad_norm": 0.6506931185722351, + "learning_rate": 0.0001290945544430755, + "loss": 2.6429, + "step": 8180 + }, + { + "epoch": 0.660237268985554, + "grad_norm": 0.7147021293640137, + "learning_rate": 0.00012907945013010633, + "loss": 2.5521, + "step": 8181 + }, + { + "epoch": 0.660317972722137, + "grad_norm": 0.6802387833595276, + "learning_rate": 0.0001290643450924012, + "loss": 2.581, + "step": 8182 + }, + { + "epoch": 0.6603986764587201, + "grad_norm": 0.7599670886993408, + "learning_rate": 0.00012904923933033664, + "loss": 2.5532, + "step": 8183 + }, + { + "epoch": 0.6604793801953031, + "grad_norm": 0.7105657458305359, + "learning_rate": 0.0001290341328442891, + "loss": 2.5744, + "step": 8184 + }, + { + "epoch": 0.660560083931886, + "grad_norm": 0.6786425113677979, + "learning_rate": 0.00012901902563463506, + "loss": 2.5326, + "step": 8185 + }, + { + "epoch": 0.660640787668469, + "grad_norm": 0.7305583357810974, + "learning_rate": 0.00012900391770175106, + "loss": 2.6103, + "step": 8186 + }, + { + "epoch": 0.6607214914050521, + "grad_norm": 0.6578992605209351, + "learning_rate": 0.00012898880904601363, + "loss": 2.5833, + "step": 8187 + }, + { + "epoch": 0.6608021951416351, + "grad_norm": 0.6498856544494629, + "learning_rate": 0.00012897369966779926, + "loss": 2.6333, + "step": 8188 + }, + { + "epoch": 0.660882898878218, + "grad_norm": 0.7065569162368774, + "learning_rate": 0.00012895858956748458, + "loss": 2.5326, + "step": 8189 + }, + { + "epoch": 0.660963602614801, + "grad_norm": 0.7676446437835693, + "learning_rate": 0.00012894347874544613, + "loss": 2.6233, + "step": 8190 + }, + { + "epoch": 0.6610443063513841, + "grad_norm": 0.6794395446777344, + "learning_rate": 0.00012892836720206056, + "loss": 2.5426, + "step": 8191 + }, + { + "epoch": 0.6611250100879671, + "grad_norm": 0.7448986768722534, + "learning_rate": 0.00012891325493770444, + "loss": 2.5832, + "step": 8192 + }, + { + "epoch": 0.6612057138245501, + "grad_norm": 0.7789760231971741, + "learning_rate": 0.0001288981419527544, + "loss": 2.6393, + "step": 8193 + }, + { + "epoch": 0.661286417561133, + "grad_norm": 0.7425827980041504, + "learning_rate": 0.00012888302824758718, + "loss": 2.6159, + "step": 8194 + }, + { + "epoch": 0.6613671212977161, + "grad_norm": 0.6677481532096863, + "learning_rate": 0.00012886791382257936, + "loss": 2.5399, + "step": 8195 + }, + { + "epoch": 0.6614478250342991, + "grad_norm": 0.698397159576416, + "learning_rate": 0.0001288527986781077, + "loss": 2.5443, + "step": 8196 + }, + { + "epoch": 0.6615285287708821, + "grad_norm": 0.6862680315971375, + "learning_rate": 0.00012883768281454885, + "loss": 2.5843, + "step": 8197 + }, + { + "epoch": 0.6616092325074651, + "grad_norm": 0.7421948313713074, + "learning_rate": 0.00012882256623227955, + "loss": 2.5885, + "step": 8198 + }, + { + "epoch": 0.6616899362440481, + "grad_norm": 0.7453073859214783, + "learning_rate": 0.00012880744893167654, + "loss": 2.5821, + "step": 8199 + }, + { + "epoch": 0.6617706399806311, + "grad_norm": 0.668218195438385, + "learning_rate": 0.00012879233091311667, + "loss": 2.5941, + "step": 8200 + }, + { + "epoch": 0.6618513437172141, + "grad_norm": 0.6864587664604187, + "learning_rate": 0.00012877721217697657, + "loss": 2.5321, + "step": 8201 + }, + { + "epoch": 0.6619320474537971, + "grad_norm": 0.6521022319793701, + "learning_rate": 0.00012876209272363317, + "loss": 2.5945, + "step": 8202 + }, + { + "epoch": 0.6620127511903802, + "grad_norm": 0.7564631104469299, + "learning_rate": 0.00012874697255346325, + "loss": 2.5901, + "step": 8203 + }, + { + "epoch": 0.6620934549269631, + "grad_norm": 0.731991171836853, + "learning_rate": 0.00012873185166684356, + "loss": 2.649, + "step": 8204 + }, + { + "epoch": 0.6621741586635461, + "grad_norm": 0.6804815530776978, + "learning_rate": 0.00012871673006415108, + "loss": 2.5417, + "step": 8205 + }, + { + "epoch": 0.6622548624001291, + "grad_norm": 0.6862792372703552, + "learning_rate": 0.0001287016077457626, + "loss": 2.6118, + "step": 8206 + }, + { + "epoch": 0.6623355661367122, + "grad_norm": 0.7013735175132751, + "learning_rate": 0.00012868648471205503, + "loss": 2.6296, + "step": 8207 + }, + { + "epoch": 0.6624162698732952, + "grad_norm": 0.7284584045410156, + "learning_rate": 0.00012867136096340529, + "loss": 2.6547, + "step": 8208 + }, + { + "epoch": 0.6624969736098781, + "grad_norm": 0.714546799659729, + "learning_rate": 0.00012865623650019025, + "loss": 2.5955, + "step": 8209 + }, + { + "epoch": 0.6625776773464611, + "grad_norm": 0.7645453214645386, + "learning_rate": 0.0001286411113227869, + "loss": 2.6132, + "step": 8210 + }, + { + "epoch": 0.6626583810830441, + "grad_norm": 0.6615093946456909, + "learning_rate": 0.0001286259854315722, + "loss": 2.5701, + "step": 8211 + }, + { + "epoch": 0.6627390848196272, + "grad_norm": 0.6565523147583008, + "learning_rate": 0.0001286108588269231, + "loss": 2.57, + "step": 8212 + }, + { + "epoch": 0.6628197885562102, + "grad_norm": 0.7173478007316589, + "learning_rate": 0.00012859573150921666, + "loss": 2.589, + "step": 8213 + }, + { + "epoch": 0.6629004922927931, + "grad_norm": 0.7069580554962158, + "learning_rate": 0.00012858060347882975, + "loss": 2.6146, + "step": 8214 + }, + { + "epoch": 0.6629811960293761, + "grad_norm": 0.7004678249359131, + "learning_rate": 0.00012856547473613953, + "loss": 2.5735, + "step": 8215 + }, + { + "epoch": 0.6630618997659592, + "grad_norm": 0.6589130163192749, + "learning_rate": 0.00012855034528152305, + "loss": 2.5731, + "step": 8216 + }, + { + "epoch": 0.6631426035025422, + "grad_norm": 0.7223117351531982, + "learning_rate": 0.0001285352151153573, + "loss": 2.5262, + "step": 8217 + }, + { + "epoch": 0.6632233072391251, + "grad_norm": 0.7045131325721741, + "learning_rate": 0.0001285200842380194, + "loss": 2.5789, + "step": 8218 + }, + { + "epoch": 0.6633040109757081, + "grad_norm": 0.7002174854278564, + "learning_rate": 0.00012850495264988645, + "loss": 2.6386, + "step": 8219 + }, + { + "epoch": 0.6633847147122912, + "grad_norm": 0.6844584941864014, + "learning_rate": 0.00012848982035133555, + "loss": 2.5394, + "step": 8220 + }, + { + "epoch": 0.6634654184488742, + "grad_norm": 0.7154871821403503, + "learning_rate": 0.00012847468734274387, + "loss": 2.5927, + "step": 8221 + }, + { + "epoch": 0.6635461221854572, + "grad_norm": 0.6856776475906372, + "learning_rate": 0.00012845955362448855, + "loss": 2.5694, + "step": 8222 + }, + { + "epoch": 0.6636268259220401, + "grad_norm": 0.7069089412689209, + "learning_rate": 0.00012844441919694676, + "loss": 2.5856, + "step": 8223 + }, + { + "epoch": 0.6637075296586232, + "grad_norm": 0.7084143161773682, + "learning_rate": 0.00012842928406049567, + "loss": 2.6301, + "step": 8224 + }, + { + "epoch": 0.6637882333952062, + "grad_norm": 0.6790862679481506, + "learning_rate": 0.00012841414821551252, + "loss": 2.5586, + "step": 8225 + }, + { + "epoch": 0.6638689371317892, + "grad_norm": 0.6537249684333801, + "learning_rate": 0.00012839901166237453, + "loss": 2.5652, + "step": 8226 + }, + { + "epoch": 0.6639496408683722, + "grad_norm": 0.6670125126838684, + "learning_rate": 0.00012838387440145893, + "loss": 2.5438, + "step": 8227 + }, + { + "epoch": 0.6640303446049552, + "grad_norm": 0.7202955484390259, + "learning_rate": 0.00012836873643314297, + "loss": 2.5632, + "step": 8228 + }, + { + "epoch": 0.6641110483415382, + "grad_norm": 0.6844765543937683, + "learning_rate": 0.00012835359775780394, + "loss": 2.5595, + "step": 8229 + }, + { + "epoch": 0.6641917520781212, + "grad_norm": 0.6557698249816895, + "learning_rate": 0.00012833845837581916, + "loss": 2.5998, + "step": 8230 + }, + { + "epoch": 0.6642724558147042, + "grad_norm": 0.6741784811019897, + "learning_rate": 0.0001283233182875659, + "loss": 2.5591, + "step": 8231 + }, + { + "epoch": 0.6643531595512873, + "grad_norm": 0.6926484704017639, + "learning_rate": 0.00012830817749342154, + "loss": 2.5557, + "step": 8232 + }, + { + "epoch": 0.6644338632878702, + "grad_norm": 0.6866984367370605, + "learning_rate": 0.00012829303599376336, + "loss": 2.5646, + "step": 8233 + }, + { + "epoch": 0.6645145670244532, + "grad_norm": 0.6772707104682922, + "learning_rate": 0.0001282778937889688, + "loss": 2.6028, + "step": 8234 + }, + { + "epoch": 0.6645952707610362, + "grad_norm": 0.693236768245697, + "learning_rate": 0.00012826275087941518, + "loss": 2.611, + "step": 8235 + }, + { + "epoch": 0.6646759744976193, + "grad_norm": 0.7181996703147888, + "learning_rate": 0.00012824760726547993, + "loss": 2.6081, + "step": 8236 + }, + { + "epoch": 0.6647566782342023, + "grad_norm": 0.6845484375953674, + "learning_rate": 0.00012823246294754048, + "loss": 2.5544, + "step": 8237 + }, + { + "epoch": 0.6648373819707852, + "grad_norm": 0.7106444239616394, + "learning_rate": 0.00012821731792597425, + "loss": 2.552, + "step": 8238 + }, + { + "epoch": 0.6649180857073682, + "grad_norm": 0.6930601000785828, + "learning_rate": 0.0001282021722011587, + "loss": 2.5401, + "step": 8239 + }, + { + "epoch": 0.6649987894439513, + "grad_norm": 0.6658228039741516, + "learning_rate": 0.00012818702577347129, + "loss": 2.6287, + "step": 8240 + }, + { + "epoch": 0.6650794931805343, + "grad_norm": 0.6919803619384766, + "learning_rate": 0.0001281718786432895, + "loss": 2.6142, + "step": 8241 + }, + { + "epoch": 0.6651601969171173, + "grad_norm": 0.6675698757171631, + "learning_rate": 0.00012815673081099086, + "loss": 2.5325, + "step": 8242 + }, + { + "epoch": 0.6652409006537002, + "grad_norm": 0.6669798493385315, + "learning_rate": 0.0001281415822769529, + "loss": 2.5355, + "step": 8243 + }, + { + "epoch": 0.6653216043902833, + "grad_norm": 0.6449857950210571, + "learning_rate": 0.00012812643304155316, + "loss": 2.5968, + "step": 8244 + }, + { + "epoch": 0.6654023081268663, + "grad_norm": 0.6972789168357849, + "learning_rate": 0.00012811128310516914, + "loss": 2.6133, + "step": 8245 + }, + { + "epoch": 0.6654830118634493, + "grad_norm": 0.7179878354072571, + "learning_rate": 0.0001280961324681785, + "loss": 2.5793, + "step": 8246 + }, + { + "epoch": 0.6655637156000322, + "grad_norm": 0.6736378073692322, + "learning_rate": 0.0001280809811309588, + "loss": 2.5543, + "step": 8247 + }, + { + "epoch": 0.6656444193366153, + "grad_norm": 0.7376420497894287, + "learning_rate": 0.00012806582909388763, + "loss": 2.5501, + "step": 8248 + }, + { + "epoch": 0.6657251230731983, + "grad_norm": 0.7163094878196716, + "learning_rate": 0.00012805067635734263, + "loss": 2.5538, + "step": 8249 + }, + { + "epoch": 0.6658058268097813, + "grad_norm": 0.7699353694915771, + "learning_rate": 0.00012803552292170144, + "loss": 2.5925, + "step": 8250 + }, + { + "epoch": 0.6658865305463643, + "grad_norm": 0.6504995822906494, + "learning_rate": 0.00012802036878734177, + "loss": 2.5944, + "step": 8251 + }, + { + "epoch": 0.6659672342829474, + "grad_norm": 0.7150379419326782, + "learning_rate": 0.0001280052139546412, + "loss": 2.5959, + "step": 8252 + }, + { + "epoch": 0.6660479380195303, + "grad_norm": 0.7562555074691772, + "learning_rate": 0.00012799005842397757, + "loss": 2.6041, + "step": 8253 + }, + { + "epoch": 0.6661286417561133, + "grad_norm": 0.7242838740348816, + "learning_rate": 0.00012797490219572846, + "loss": 2.6152, + "step": 8254 + }, + { + "epoch": 0.6662093454926963, + "grad_norm": 0.7062848210334778, + "learning_rate": 0.00012795974527027168, + "loss": 2.596, + "step": 8255 + }, + { + "epoch": 0.6662900492292794, + "grad_norm": 0.8179726004600525, + "learning_rate": 0.00012794458764798497, + "loss": 2.5792, + "step": 8256 + }, + { + "epoch": 0.6663707529658623, + "grad_norm": 0.692166268825531, + "learning_rate": 0.00012792942932924608, + "loss": 2.6025, + "step": 8257 + }, + { + "epoch": 0.6664514567024453, + "grad_norm": 0.6540334224700928, + "learning_rate": 0.0001279142703144328, + "loss": 2.5119, + "step": 8258 + }, + { + "epoch": 0.6665321604390283, + "grad_norm": 0.7087461352348328, + "learning_rate": 0.00012789911060392294, + "loss": 2.5808, + "step": 8259 + }, + { + "epoch": 0.6666128641756114, + "grad_norm": 0.6897622346878052, + "learning_rate": 0.0001278839501980943, + "loss": 2.5811, + "step": 8260 + }, + { + "epoch": 0.6666935679121944, + "grad_norm": 0.6653634905815125, + "learning_rate": 0.00012786878909732473, + "loss": 2.5498, + "step": 8261 + }, + { + "epoch": 0.6667742716487773, + "grad_norm": 0.6541483402252197, + "learning_rate": 0.0001278536273019921, + "loss": 2.605, + "step": 8262 + }, + { + "epoch": 0.6668549753853603, + "grad_norm": 0.6748146414756775, + "learning_rate": 0.00012783846481247428, + "loss": 2.5571, + "step": 8263 + }, + { + "epoch": 0.6669356791219433, + "grad_norm": 0.7258282899856567, + "learning_rate": 0.00012782330162914915, + "loss": 2.5562, + "step": 8264 + }, + { + "epoch": 0.6670163828585264, + "grad_norm": 0.6963080167770386, + "learning_rate": 0.00012780813775239457, + "loss": 2.6467, + "step": 8265 + }, + { + "epoch": 0.6670970865951094, + "grad_norm": 0.6627718806266785, + "learning_rate": 0.00012779297318258855, + "loss": 2.5369, + "step": 8266 + }, + { + "epoch": 0.6671777903316923, + "grad_norm": 0.7026168704032898, + "learning_rate": 0.00012777780792010897, + "loss": 2.5639, + "step": 8267 + }, + { + "epoch": 0.6672584940682753, + "grad_norm": 0.6969077587127686, + "learning_rate": 0.0001277626419653338, + "loss": 2.517, + "step": 8268 + }, + { + "epoch": 0.6673391978048584, + "grad_norm": 0.6918485760688782, + "learning_rate": 0.00012774747531864102, + "loss": 2.6388, + "step": 8269 + }, + { + "epoch": 0.6674199015414414, + "grad_norm": 0.6661256551742554, + "learning_rate": 0.00012773230798040862, + "loss": 2.5477, + "step": 8270 + }, + { + "epoch": 0.6675006052780244, + "grad_norm": 0.6778402328491211, + "learning_rate": 0.0001277171399510146, + "loss": 2.6032, + "step": 8271 + }, + { + "epoch": 0.6675813090146073, + "grad_norm": 0.6464864611625671, + "learning_rate": 0.00012770197123083702, + "loss": 2.5396, + "step": 8272 + }, + { + "epoch": 0.6676620127511904, + "grad_norm": 0.7154508233070374, + "learning_rate": 0.0001276868018202539, + "loss": 2.6163, + "step": 8273 + }, + { + "epoch": 0.6677427164877734, + "grad_norm": 0.6849631071090698, + "learning_rate": 0.0001276716317196433, + "loss": 2.549, + "step": 8274 + }, + { + "epoch": 0.6678234202243564, + "grad_norm": 0.6696017980575562, + "learning_rate": 0.00012765646092938334, + "loss": 2.5046, + "step": 8275 + }, + { + "epoch": 0.6679041239609393, + "grad_norm": 0.668153703212738, + "learning_rate": 0.00012764128944985203, + "loss": 2.5422, + "step": 8276 + }, + { + "epoch": 0.6679848276975224, + "grad_norm": 0.6600282192230225, + "learning_rate": 0.00012762611728142756, + "loss": 2.6117, + "step": 8277 + }, + { + "epoch": 0.6680655314341054, + "grad_norm": 0.6691608428955078, + "learning_rate": 0.000127610944424488, + "loss": 2.5761, + "step": 8278 + }, + { + "epoch": 0.6681462351706884, + "grad_norm": 0.695142924785614, + "learning_rate": 0.00012759577087941156, + "loss": 2.6123, + "step": 8279 + }, + { + "epoch": 0.6682269389072714, + "grad_norm": 0.6846559643745422, + "learning_rate": 0.00012758059664657635, + "loss": 2.5882, + "step": 8280 + }, + { + "epoch": 0.6683076426438544, + "grad_norm": 0.7616459131240845, + "learning_rate": 0.0001275654217263606, + "loss": 2.5559, + "step": 8281 + }, + { + "epoch": 0.6683883463804374, + "grad_norm": 0.6995570063591003, + "learning_rate": 0.00012755024611914246, + "loss": 2.5336, + "step": 8282 + }, + { + "epoch": 0.6684690501170204, + "grad_norm": 0.7199691534042358, + "learning_rate": 0.0001275350698253002, + "loss": 2.6618, + "step": 8283 + }, + { + "epoch": 0.6685497538536034, + "grad_norm": 0.6938748955726624, + "learning_rate": 0.000127519892845212, + "loss": 2.574, + "step": 8284 + }, + { + "epoch": 0.6686304575901865, + "grad_norm": 0.6827714443206787, + "learning_rate": 0.00012750471517925614, + "loss": 2.5647, + "step": 8285 + }, + { + "epoch": 0.6687111613267694, + "grad_norm": 0.6684606671333313, + "learning_rate": 0.00012748953682781083, + "loss": 2.528, + "step": 8286 + }, + { + "epoch": 0.6687918650633524, + "grad_norm": 0.6842156052589417, + "learning_rate": 0.00012747435779125448, + "loss": 2.5521, + "step": 8287 + }, + { + "epoch": 0.6688725687999354, + "grad_norm": 0.7440506219863892, + "learning_rate": 0.0001274591780699653, + "loss": 2.5646, + "step": 8288 + }, + { + "epoch": 0.6689532725365185, + "grad_norm": 0.769922137260437, + "learning_rate": 0.0001274439976643216, + "loss": 2.6104, + "step": 8289 + }, + { + "epoch": 0.6690339762731015, + "grad_norm": 0.7793089747428894, + "learning_rate": 0.00012742881657470175, + "loss": 2.6348, + "step": 8290 + }, + { + "epoch": 0.6691146800096844, + "grad_norm": 0.695060133934021, + "learning_rate": 0.0001274136348014841, + "loss": 2.5797, + "step": 8291 + }, + { + "epoch": 0.6691953837462674, + "grad_norm": 0.7089917659759521, + "learning_rate": 0.00012739845234504697, + "loss": 2.5431, + "step": 8292 + }, + { + "epoch": 0.6692760874828505, + "grad_norm": 0.7542717456817627, + "learning_rate": 0.00012738326920576885, + "loss": 2.6172, + "step": 8293 + }, + { + "epoch": 0.6693567912194335, + "grad_norm": 0.6947969794273376, + "learning_rate": 0.00012736808538402802, + "loss": 2.6026, + "step": 8294 + }, + { + "epoch": 0.6694374949560165, + "grad_norm": 0.6696321368217468, + "learning_rate": 0.00012735290088020302, + "loss": 2.5592, + "step": 8295 + }, + { + "epoch": 0.6695181986925994, + "grad_norm": 0.7001518607139587, + "learning_rate": 0.0001273377156946722, + "loss": 2.5994, + "step": 8296 + }, + { + "epoch": 0.6695989024291825, + "grad_norm": 0.6708101630210876, + "learning_rate": 0.000127322529827814, + "loss": 2.6392, + "step": 8297 + }, + { + "epoch": 0.6696796061657655, + "grad_norm": 0.6282601952552795, + "learning_rate": 0.000127307343280007, + "loss": 2.5762, + "step": 8298 + }, + { + "epoch": 0.6697603099023485, + "grad_norm": 0.6879595518112183, + "learning_rate": 0.0001272921560516296, + "loss": 2.5507, + "step": 8299 + }, + { + "epoch": 0.6698410136389314, + "grad_norm": 0.6108266115188599, + "learning_rate": 0.00012727696814306033, + "loss": 2.5865, + "step": 8300 + }, + { + "epoch": 0.6699217173755145, + "grad_norm": 0.6763970851898193, + "learning_rate": 0.0001272617795546777, + "loss": 2.6439, + "step": 8301 + }, + { + "epoch": 0.6700024211120975, + "grad_norm": 0.6997560858726501, + "learning_rate": 0.00012724659028686027, + "loss": 2.5291, + "step": 8302 + }, + { + "epoch": 0.6700831248486805, + "grad_norm": 0.675714910030365, + "learning_rate": 0.0001272314003399866, + "loss": 2.5452, + "step": 8303 + }, + { + "epoch": 0.6701638285852635, + "grad_norm": 0.6847789883613586, + "learning_rate": 0.00012721620971443525, + "loss": 2.6111, + "step": 8304 + }, + { + "epoch": 0.6702445323218466, + "grad_norm": 0.7283920645713806, + "learning_rate": 0.0001272010184105848, + "loss": 2.6322, + "step": 8305 + }, + { + "epoch": 0.6703252360584295, + "grad_norm": 0.7551796436309814, + "learning_rate": 0.00012718582642881382, + "loss": 2.5728, + "step": 8306 + }, + { + "epoch": 0.6704059397950125, + "grad_norm": 0.694526195526123, + "learning_rate": 0.00012717063376950104, + "loss": 2.6241, + "step": 8307 + }, + { + "epoch": 0.6704866435315955, + "grad_norm": 0.6956443190574646, + "learning_rate": 0.00012715544043302504, + "loss": 2.5531, + "step": 8308 + }, + { + "epoch": 0.6705673472681786, + "grad_norm": 0.7649452686309814, + "learning_rate": 0.00012714024641976446, + "loss": 2.5462, + "step": 8309 + }, + { + "epoch": 0.6706480510047615, + "grad_norm": 0.7711065411567688, + "learning_rate": 0.00012712505173009797, + "loss": 2.5878, + "step": 8310 + }, + { + "epoch": 0.6707287547413445, + "grad_norm": 0.68077552318573, + "learning_rate": 0.00012710985636440434, + "loss": 2.5668, + "step": 8311 + }, + { + "epoch": 0.6708094584779275, + "grad_norm": 0.7181024551391602, + "learning_rate": 0.0001270946603230622, + "loss": 2.6104, + "step": 8312 + }, + { + "epoch": 0.6708901622145105, + "grad_norm": 0.7136553525924683, + "learning_rate": 0.0001270794636064503, + "loss": 2.5282, + "step": 8313 + }, + { + "epoch": 0.6709708659510936, + "grad_norm": 0.880094587802887, + "learning_rate": 0.00012706426621494736, + "loss": 2.5837, + "step": 8314 + }, + { + "epoch": 0.6710515696876765, + "grad_norm": 0.7438541054725647, + "learning_rate": 0.00012704906814893217, + "loss": 2.5577, + "step": 8315 + }, + { + "epoch": 0.6711322734242595, + "grad_norm": 0.8197470903396606, + "learning_rate": 0.00012703386940878352, + "loss": 2.569, + "step": 8316 + }, + { + "epoch": 0.6712129771608425, + "grad_norm": 0.7728317975997925, + "learning_rate": 0.00012701866999488014, + "loss": 2.6407, + "step": 8317 + }, + { + "epoch": 0.6712936808974256, + "grad_norm": 0.7594823837280273, + "learning_rate": 0.0001270034699076009, + "loss": 2.5789, + "step": 8318 + }, + { + "epoch": 0.6713743846340086, + "grad_norm": 0.7502284646034241, + "learning_rate": 0.0001269882691473246, + "loss": 2.6068, + "step": 8319 + }, + { + "epoch": 0.6714550883705915, + "grad_norm": 0.7355664372444153, + "learning_rate": 0.0001269730677144301, + "loss": 2.6055, + "step": 8320 + }, + { + "epoch": 0.6715357921071745, + "grad_norm": 0.7218407392501831, + "learning_rate": 0.0001269578656092962, + "loss": 2.5953, + "step": 8321 + }, + { + "epoch": 0.6716164958437576, + "grad_norm": 0.6932538747787476, + "learning_rate": 0.00012694266283230185, + "loss": 2.5795, + "step": 8322 + }, + { + "epoch": 0.6716971995803406, + "grad_norm": 0.7337260246276855, + "learning_rate": 0.00012692745938382591, + "loss": 2.5606, + "step": 8323 + }, + { + "epoch": 0.6717779033169236, + "grad_norm": 0.6959026455879211, + "learning_rate": 0.00012691225526424731, + "loss": 2.5688, + "step": 8324 + }, + { + "epoch": 0.6718586070535065, + "grad_norm": 0.7352995872497559, + "learning_rate": 0.00012689705047394493, + "loss": 2.6308, + "step": 8325 + }, + { + "epoch": 0.6719393107900896, + "grad_norm": 0.7023616433143616, + "learning_rate": 0.00012688184501329777, + "loss": 2.6462, + "step": 8326 + }, + { + "epoch": 0.6720200145266726, + "grad_norm": 0.6581354737281799, + "learning_rate": 0.00012686663888268474, + "loss": 2.5997, + "step": 8327 + }, + { + "epoch": 0.6721007182632556, + "grad_norm": 0.6332606077194214, + "learning_rate": 0.00012685143208248484, + "loss": 2.6348, + "step": 8328 + }, + { + "epoch": 0.6721814219998385, + "grad_norm": 0.6826457977294922, + "learning_rate": 0.00012683622461307707, + "loss": 2.5092, + "step": 8329 + }, + { + "epoch": 0.6722621257364216, + "grad_norm": 0.7641614079475403, + "learning_rate": 0.00012682101647484042, + "loss": 2.7098, + "step": 8330 + }, + { + "epoch": 0.6723428294730046, + "grad_norm": 0.7153630256652832, + "learning_rate": 0.00012680580766815394, + "loss": 2.5647, + "step": 8331 + }, + { + "epoch": 0.6724235332095876, + "grad_norm": 0.6746379137039185, + "learning_rate": 0.00012679059819339664, + "loss": 2.6187, + "step": 8332 + }, + { + "epoch": 0.6725042369461706, + "grad_norm": 0.6748883128166199, + "learning_rate": 0.00012677538805094764, + "loss": 2.6045, + "step": 8333 + }, + { + "epoch": 0.6725849406827537, + "grad_norm": 0.7366370558738708, + "learning_rate": 0.00012676017724118596, + "loss": 2.5789, + "step": 8334 + }, + { + "epoch": 0.6726656444193366, + "grad_norm": 0.7381749153137207, + "learning_rate": 0.00012674496576449074, + "loss": 2.5958, + "step": 8335 + }, + { + "epoch": 0.6727463481559196, + "grad_norm": 0.7109243869781494, + "learning_rate": 0.00012672975362124103, + "loss": 2.5874, + "step": 8336 + }, + { + "epoch": 0.6728270518925026, + "grad_norm": 0.6904270052909851, + "learning_rate": 0.00012671454081181595, + "loss": 2.5891, + "step": 8337 + }, + { + "epoch": 0.6729077556290857, + "grad_norm": 0.6809365749359131, + "learning_rate": 0.00012669932733659476, + "loss": 2.5904, + "step": 8338 + }, + { + "epoch": 0.6729884593656686, + "grad_norm": 0.7527552843093872, + "learning_rate": 0.00012668411319595647, + "loss": 2.5602, + "step": 8339 + }, + { + "epoch": 0.6730691631022516, + "grad_norm": 0.6746577620506287, + "learning_rate": 0.00012666889839028038, + "loss": 2.5468, + "step": 8340 + }, + { + "epoch": 0.6731498668388346, + "grad_norm": 0.6904895305633545, + "learning_rate": 0.00012665368291994562, + "loss": 2.623, + "step": 8341 + }, + { + "epoch": 0.6732305705754177, + "grad_norm": 0.6495908498764038, + "learning_rate": 0.00012663846678533135, + "loss": 2.5843, + "step": 8342 + }, + { + "epoch": 0.6733112743120007, + "grad_norm": 0.6782342195510864, + "learning_rate": 0.00012662324998681692, + "loss": 2.6141, + "step": 8343 + }, + { + "epoch": 0.6733919780485836, + "grad_norm": 0.7090504765510559, + "learning_rate": 0.0001266080325247815, + "loss": 2.6654, + "step": 8344 + }, + { + "epoch": 0.6734726817851666, + "grad_norm": 0.7085515856742859, + "learning_rate": 0.00012659281439960434, + "loss": 2.5394, + "step": 8345 + }, + { + "epoch": 0.6735533855217497, + "grad_norm": 0.6813806295394897, + "learning_rate": 0.00012657759561166473, + "loss": 2.6522, + "step": 8346 + }, + { + "epoch": 0.6736340892583327, + "grad_norm": 0.726378858089447, + "learning_rate": 0.00012656237616134197, + "loss": 2.5922, + "step": 8347 + }, + { + "epoch": 0.6737147929949157, + "grad_norm": 0.6323714256286621, + "learning_rate": 0.00012654715604901534, + "loss": 2.4938, + "step": 8348 + }, + { + "epoch": 0.6737954967314986, + "grad_norm": 0.6925889253616333, + "learning_rate": 0.0001265319352750642, + "loss": 2.635, + "step": 8349 + }, + { + "epoch": 0.6738762004680817, + "grad_norm": 0.6676003932952881, + "learning_rate": 0.00012651671383986788, + "loss": 2.558, + "step": 8350 + }, + { + "epoch": 0.6739569042046647, + "grad_norm": 0.7464616298675537, + "learning_rate": 0.00012650149174380575, + "loss": 2.5777, + "step": 8351 + }, + { + "epoch": 0.6740376079412477, + "grad_norm": 0.6611667275428772, + "learning_rate": 0.00012648626898725715, + "loss": 2.5779, + "step": 8352 + }, + { + "epoch": 0.6741183116778307, + "grad_norm": 0.7391866445541382, + "learning_rate": 0.00012647104557060148, + "loss": 2.5624, + "step": 8353 + }, + { + "epoch": 0.6741990154144137, + "grad_norm": 0.7107826471328735, + "learning_rate": 0.00012645582149421817, + "loss": 2.5744, + "step": 8354 + }, + { + "epoch": 0.6742797191509967, + "grad_norm": 0.7385339736938477, + "learning_rate": 0.00012644059675848666, + "loss": 2.5752, + "step": 8355 + }, + { + "epoch": 0.6743604228875797, + "grad_norm": 0.6887345314025879, + "learning_rate": 0.00012642537136378634, + "loss": 2.5794, + "step": 8356 + }, + { + "epoch": 0.6744411266241627, + "grad_norm": 0.6934933662414551, + "learning_rate": 0.00012641014531049666, + "loss": 2.5361, + "step": 8357 + }, + { + "epoch": 0.6745218303607458, + "grad_norm": 0.7437291741371155, + "learning_rate": 0.00012639491859899716, + "loss": 2.5741, + "step": 8358 + }, + { + "epoch": 0.6746025340973287, + "grad_norm": 0.7088494896888733, + "learning_rate": 0.00012637969122966729, + "loss": 2.6449, + "step": 8359 + }, + { + "epoch": 0.6746832378339117, + "grad_norm": 0.7496390342712402, + "learning_rate": 0.00012636446320288654, + "loss": 2.6109, + "step": 8360 + }, + { + "epoch": 0.6747639415704947, + "grad_norm": 0.6949843764305115, + "learning_rate": 0.00012634923451903447, + "loss": 2.5769, + "step": 8361 + }, + { + "epoch": 0.6748446453070778, + "grad_norm": 0.7192673087120056, + "learning_rate": 0.00012633400517849056, + "loss": 2.6053, + "step": 8362 + }, + { + "epoch": 0.6749253490436607, + "grad_norm": 0.7003379464149475, + "learning_rate": 0.00012631877518163442, + "loss": 2.5745, + "step": 8363 + }, + { + "epoch": 0.6750060527802437, + "grad_norm": 0.7499879002571106, + "learning_rate": 0.00012630354452884563, + "loss": 2.6077, + "step": 8364 + }, + { + "epoch": 0.6750867565168267, + "grad_norm": 0.7047405242919922, + "learning_rate": 0.00012628831322050377, + "loss": 2.5955, + "step": 8365 + }, + { + "epoch": 0.6751674602534097, + "grad_norm": 0.7463203072547913, + "learning_rate": 0.00012627308125698838, + "loss": 2.5421, + "step": 8366 + }, + { + "epoch": 0.6752481639899928, + "grad_norm": 0.7377086877822876, + "learning_rate": 0.00012625784863867914, + "loss": 2.5804, + "step": 8367 + }, + { + "epoch": 0.6753288677265757, + "grad_norm": 0.7136400938034058, + "learning_rate": 0.00012624261536595566, + "loss": 2.5673, + "step": 8368 + }, + { + "epoch": 0.6754095714631587, + "grad_norm": 0.6923615336418152, + "learning_rate": 0.0001262273814391976, + "loss": 2.5832, + "step": 8369 + }, + { + "epoch": 0.6754902751997417, + "grad_norm": 0.7495028972625732, + "learning_rate": 0.00012621214685878469, + "loss": 2.5943, + "step": 8370 + }, + { + "epoch": 0.6755709789363248, + "grad_norm": 0.6751434206962585, + "learning_rate": 0.0001261969116250965, + "loss": 2.5495, + "step": 8371 + }, + { + "epoch": 0.6756516826729078, + "grad_norm": 0.7055973410606384, + "learning_rate": 0.00012618167573851284, + "loss": 2.5651, + "step": 8372 + }, + { + "epoch": 0.6757323864094907, + "grad_norm": 0.7479640245437622, + "learning_rate": 0.00012616643919941337, + "loss": 2.653, + "step": 8373 + }, + { + "epoch": 0.6758130901460737, + "grad_norm": 0.7075015902519226, + "learning_rate": 0.00012615120200817778, + "loss": 2.5787, + "step": 8374 + }, + { + "epoch": 0.6758937938826568, + "grad_norm": 0.7513934969902039, + "learning_rate": 0.00012613596416518593, + "loss": 2.6099, + "step": 8375 + }, + { + "epoch": 0.6759744976192398, + "grad_norm": 0.6742326021194458, + "learning_rate": 0.00012612072567081754, + "loss": 2.5335, + "step": 8376 + }, + { + "epoch": 0.6760552013558228, + "grad_norm": 0.7271459698677063, + "learning_rate": 0.00012610548652545239, + "loss": 2.6082, + "step": 8377 + }, + { + "epoch": 0.6761359050924057, + "grad_norm": 0.7481515407562256, + "learning_rate": 0.00012609024672947022, + "loss": 2.5805, + "step": 8378 + }, + { + "epoch": 0.6762166088289888, + "grad_norm": 0.7484803199768066, + "learning_rate": 0.00012607500628325093, + "loss": 2.6099, + "step": 8379 + }, + { + "epoch": 0.6762973125655718, + "grad_norm": 0.7462390661239624, + "learning_rate": 0.00012605976518717435, + "loss": 2.6054, + "step": 8380 + }, + { + "epoch": 0.6763780163021548, + "grad_norm": 0.7014410495758057, + "learning_rate": 0.00012604452344162028, + "loss": 2.5614, + "step": 8381 + }, + { + "epoch": 0.6764587200387377, + "grad_norm": 0.6902963519096375, + "learning_rate": 0.0001260292810469686, + "loss": 2.5813, + "step": 8382 + }, + { + "epoch": 0.6765394237753208, + "grad_norm": 0.6646186113357544, + "learning_rate": 0.00012601403800359919, + "loss": 2.545, + "step": 8383 + }, + { + "epoch": 0.6766201275119038, + "grad_norm": 0.7067462801933289, + "learning_rate": 0.00012599879431189197, + "loss": 2.6195, + "step": 8384 + }, + { + "epoch": 0.6767008312484868, + "grad_norm": 0.7263965010643005, + "learning_rate": 0.0001259835499722268, + "loss": 2.5929, + "step": 8385 + }, + { + "epoch": 0.6767815349850698, + "grad_norm": 0.6672000885009766, + "learning_rate": 0.0001259683049849837, + "loss": 2.5561, + "step": 8386 + }, + { + "epoch": 0.6768622387216529, + "grad_norm": 0.6543236374855042, + "learning_rate": 0.0001259530593505425, + "loss": 2.6256, + "step": 8387 + }, + { + "epoch": 0.6769429424582358, + "grad_norm": 0.6532339453697205, + "learning_rate": 0.00012593781306928324, + "loss": 2.5074, + "step": 8388 + }, + { + "epoch": 0.6770236461948188, + "grad_norm": 0.7442833185195923, + "learning_rate": 0.00012592256614158591, + "loss": 2.6124, + "step": 8389 + }, + { + "epoch": 0.6771043499314018, + "grad_norm": 0.786685585975647, + "learning_rate": 0.00012590731856783043, + "loss": 2.6077, + "step": 8390 + }, + { + "epoch": 0.6771850536679849, + "grad_norm": 0.7952337265014648, + "learning_rate": 0.00012589207034839687, + "loss": 2.5894, + "step": 8391 + }, + { + "epoch": 0.6772657574045678, + "grad_norm": 0.7847954034805298, + "learning_rate": 0.00012587682148366524, + "loss": 2.4934, + "step": 8392 + }, + { + "epoch": 0.6773464611411508, + "grad_norm": 0.6769007444381714, + "learning_rate": 0.00012586157197401552, + "loss": 2.5695, + "step": 8393 + }, + { + "epoch": 0.6774271648777338, + "grad_norm": 0.6583757996559143, + "learning_rate": 0.00012584632181982788, + "loss": 2.5866, + "step": 8394 + }, + { + "epoch": 0.6775078686143169, + "grad_norm": 0.7375823855400085, + "learning_rate": 0.0001258310710214823, + "loss": 2.5141, + "step": 8395 + }, + { + "epoch": 0.6775885723508999, + "grad_norm": 0.6901078224182129, + "learning_rate": 0.00012581581957935896, + "loss": 2.5732, + "step": 8396 + }, + { + "epoch": 0.6776692760874828, + "grad_norm": 0.687152624130249, + "learning_rate": 0.0001258005674938379, + "loss": 2.5916, + "step": 8397 + }, + { + "epoch": 0.6777499798240658, + "grad_norm": 0.7198586463928223, + "learning_rate": 0.00012578531476529917, + "loss": 2.5626, + "step": 8398 + }, + { + "epoch": 0.6778306835606489, + "grad_norm": 0.7417474985122681, + "learning_rate": 0.00012577006139412309, + "loss": 2.5486, + "step": 8399 + }, + { + "epoch": 0.6779113872972319, + "grad_norm": 0.6588087677955627, + "learning_rate": 0.0001257548073806897, + "loss": 2.6123, + "step": 8400 + }, + { + "epoch": 0.6779920910338149, + "grad_norm": 0.7211382389068604, + "learning_rate": 0.00012573955272537915, + "loss": 2.6402, + "step": 8401 + }, + { + "epoch": 0.6780727947703978, + "grad_norm": 0.7196084856987, + "learning_rate": 0.00012572429742857167, + "loss": 2.51, + "step": 8402 + }, + { + "epoch": 0.6781534985069809, + "grad_norm": 0.6399394273757935, + "learning_rate": 0.00012570904149064748, + "loss": 2.5309, + "step": 8403 + }, + { + "epoch": 0.6782342022435639, + "grad_norm": 0.6969572305679321, + "learning_rate": 0.00012569378491198674, + "loss": 2.5829, + "step": 8404 + }, + { + "epoch": 0.6783149059801469, + "grad_norm": 0.8005492091178894, + "learning_rate": 0.00012567852769296975, + "loss": 2.6277, + "step": 8405 + }, + { + "epoch": 0.6783956097167299, + "grad_norm": 0.6786207556724548, + "learning_rate": 0.0001256632698339767, + "loss": 2.5839, + "step": 8406 + }, + { + "epoch": 0.6784763134533129, + "grad_norm": 0.7047130465507507, + "learning_rate": 0.0001256480113353879, + "loss": 2.533, + "step": 8407 + }, + { + "epoch": 0.6785570171898959, + "grad_norm": 0.7640479803085327, + "learning_rate": 0.0001256327521975836, + "loss": 2.5855, + "step": 8408 + }, + { + "epoch": 0.6786377209264789, + "grad_norm": 0.728111207485199, + "learning_rate": 0.00012561749242094412, + "loss": 2.6184, + "step": 8409 + }, + { + "epoch": 0.6787184246630619, + "grad_norm": 0.7842772603034973, + "learning_rate": 0.00012560223200584975, + "loss": 2.5915, + "step": 8410 + }, + { + "epoch": 0.678799128399645, + "grad_norm": 0.7129092812538147, + "learning_rate": 0.00012558697095268085, + "loss": 2.6526, + "step": 8411 + }, + { + "epoch": 0.6788798321362279, + "grad_norm": 0.751103401184082, + "learning_rate": 0.00012557170926181773, + "loss": 2.605, + "step": 8412 + }, + { + "epoch": 0.6789605358728109, + "grad_norm": 0.6850594878196716, + "learning_rate": 0.0001255564469336408, + "loss": 2.6047, + "step": 8413 + }, + { + "epoch": 0.6790412396093939, + "grad_norm": 0.703037679195404, + "learning_rate": 0.00012554118396853036, + "loss": 2.653, + "step": 8414 + }, + { + "epoch": 0.6791219433459769, + "grad_norm": 0.8097915053367615, + "learning_rate": 0.0001255259203668669, + "loss": 2.5937, + "step": 8415 + }, + { + "epoch": 0.67920264708256, + "grad_norm": 0.700351357460022, + "learning_rate": 0.00012551065612903076, + "loss": 2.6089, + "step": 8416 + }, + { + "epoch": 0.6792833508191429, + "grad_norm": 0.6760888695716858, + "learning_rate": 0.00012549539125540236, + "loss": 2.547, + "step": 8417 + }, + { + "epoch": 0.6793640545557259, + "grad_norm": 0.6751723289489746, + "learning_rate": 0.0001254801257463622, + "loss": 2.625, + "step": 8418 + }, + { + "epoch": 0.6794447582923089, + "grad_norm": 0.6928921937942505, + "learning_rate": 0.00012546485960229065, + "loss": 2.5671, + "step": 8419 + }, + { + "epoch": 0.679525462028892, + "grad_norm": 0.6541565656661987, + "learning_rate": 0.0001254495928235683, + "loss": 2.5837, + "step": 8420 + }, + { + "epoch": 0.679606165765475, + "grad_norm": 0.6228676438331604, + "learning_rate": 0.00012543432541057555, + "loss": 2.5798, + "step": 8421 + }, + { + "epoch": 0.6796868695020579, + "grad_norm": 0.7620853185653687, + "learning_rate": 0.0001254190573636929, + "loss": 2.5885, + "step": 8422 + }, + { + "epoch": 0.6797675732386409, + "grad_norm": 0.7425604462623596, + "learning_rate": 0.0001254037886833009, + "loss": 2.6124, + "step": 8423 + }, + { + "epoch": 0.679848276975224, + "grad_norm": 0.7150974273681641, + "learning_rate": 0.0001253885193697801, + "loss": 2.5423, + "step": 8424 + }, + { + "epoch": 0.679928980711807, + "grad_norm": 0.672649621963501, + "learning_rate": 0.000125373249423511, + "loss": 2.5563, + "step": 8425 + }, + { + "epoch": 0.6800096844483899, + "grad_norm": 0.6913620829582214, + "learning_rate": 0.00012535797884487425, + "loss": 2.5261, + "step": 8426 + }, + { + "epoch": 0.6800903881849729, + "grad_norm": 0.712123692035675, + "learning_rate": 0.00012534270763425034, + "loss": 2.5958, + "step": 8427 + }, + { + "epoch": 0.680171091921556, + "grad_norm": 0.7593061327934265, + "learning_rate": 0.00012532743579201993, + "loss": 2.6036, + "step": 8428 + }, + { + "epoch": 0.680251795658139, + "grad_norm": 0.7108714580535889, + "learning_rate": 0.0001253121633185636, + "loss": 2.6004, + "step": 8429 + }, + { + "epoch": 0.680332499394722, + "grad_norm": 0.7142449021339417, + "learning_rate": 0.00012529689021426198, + "loss": 2.588, + "step": 8430 + }, + { + "epoch": 0.6804132031313049, + "grad_norm": 0.7579841017723083, + "learning_rate": 0.00012528161647949574, + "loss": 2.5927, + "step": 8431 + }, + { + "epoch": 0.680493906867888, + "grad_norm": 0.6522083878517151, + "learning_rate": 0.00012526634211464555, + "loss": 2.5619, + "step": 8432 + }, + { + "epoch": 0.680574610604471, + "grad_norm": 0.7681782245635986, + "learning_rate": 0.00012525106712009203, + "loss": 2.6065, + "step": 8433 + }, + { + "epoch": 0.680655314341054, + "grad_norm": 0.6900169253349304, + "learning_rate": 0.00012523579149621594, + "loss": 2.5507, + "step": 8434 + }, + { + "epoch": 0.680736018077637, + "grad_norm": 0.6907666325569153, + "learning_rate": 0.00012522051524339794, + "loss": 2.5213, + "step": 8435 + }, + { + "epoch": 0.68081672181422, + "grad_norm": 0.7202023267745972, + "learning_rate": 0.0001252052383620188, + "loss": 2.6367, + "step": 8436 + }, + { + "epoch": 0.680897425550803, + "grad_norm": 0.7893621325492859, + "learning_rate": 0.00012518996085245925, + "loss": 2.6066, + "step": 8437 + }, + { + "epoch": 0.680978129287386, + "grad_norm": 0.7693532109260559, + "learning_rate": 0.00012517468271509998, + "loss": 2.5346, + "step": 8438 + }, + { + "epoch": 0.681058833023969, + "grad_norm": 0.7976840734481812, + "learning_rate": 0.0001251594039503218, + "loss": 2.5991, + "step": 8439 + }, + { + "epoch": 0.6811395367605521, + "grad_norm": 0.7671225666999817, + "learning_rate": 0.00012514412455850554, + "loss": 2.5959, + "step": 8440 + }, + { + "epoch": 0.681220240497135, + "grad_norm": 0.7143450975418091, + "learning_rate": 0.00012512884454003194, + "loss": 2.5828, + "step": 8441 + }, + { + "epoch": 0.681300944233718, + "grad_norm": 0.6821861863136292, + "learning_rate": 0.00012511356389528192, + "loss": 2.5908, + "step": 8442 + }, + { + "epoch": 0.681381647970301, + "grad_norm": 0.7279960513114929, + "learning_rate": 0.00012509828262463615, + "loss": 2.578, + "step": 8443 + }, + { + "epoch": 0.6814623517068841, + "grad_norm": 0.6503065824508667, + "learning_rate": 0.0001250830007284756, + "loss": 2.525, + "step": 8444 + }, + { + "epoch": 0.681543055443467, + "grad_norm": 0.7276029586791992, + "learning_rate": 0.00012506771820718112, + "loss": 2.584, + "step": 8445 + }, + { + "epoch": 0.68162375918005, + "grad_norm": 0.7635578513145447, + "learning_rate": 0.00012505243506113356, + "loss": 2.627, + "step": 8446 + }, + { + "epoch": 0.681704462916633, + "grad_norm": 0.7086981534957886, + "learning_rate": 0.00012503715129071386, + "loss": 2.6164, + "step": 8447 + }, + { + "epoch": 0.6817851666532161, + "grad_norm": 0.7144165635108948, + "learning_rate": 0.00012502186689630285, + "loss": 2.5642, + "step": 8448 + }, + { + "epoch": 0.6818658703897991, + "grad_norm": 0.8135093450546265, + "learning_rate": 0.00012500658187828155, + "loss": 2.6161, + "step": 8449 + }, + { + "epoch": 0.681946574126382, + "grad_norm": 0.7223377227783203, + "learning_rate": 0.00012499129623703086, + "loss": 2.6192, + "step": 8450 + }, + { + "epoch": 0.682027277862965, + "grad_norm": 0.7189127206802368, + "learning_rate": 0.00012497600997293172, + "loss": 2.6086, + "step": 8451 + }, + { + "epoch": 0.6821079815995481, + "grad_norm": 0.6742144823074341, + "learning_rate": 0.00012496072308636514, + "loss": 2.5747, + "step": 8452 + }, + { + "epoch": 0.6821886853361311, + "grad_norm": 0.7432419657707214, + "learning_rate": 0.0001249454355777121, + "loss": 2.5687, + "step": 8453 + }, + { + "epoch": 0.6822693890727141, + "grad_norm": 0.6140317320823669, + "learning_rate": 0.00012493014744735357, + "loss": 2.5371, + "step": 8454 + }, + { + "epoch": 0.682350092809297, + "grad_norm": 0.7215768098831177, + "learning_rate": 0.0001249148586956706, + "loss": 2.6806, + "step": 8455 + }, + { + "epoch": 0.6824307965458801, + "grad_norm": 0.7485790252685547, + "learning_rate": 0.0001248995693230442, + "loss": 2.575, + "step": 8456 + }, + { + "epoch": 0.6825115002824631, + "grad_norm": 0.744349479675293, + "learning_rate": 0.00012488427932985552, + "loss": 2.5961, + "step": 8457 + }, + { + "epoch": 0.6825922040190461, + "grad_norm": 0.6784959435462952, + "learning_rate": 0.0001248689887164855, + "loss": 2.5501, + "step": 8458 + }, + { + "epoch": 0.682672907755629, + "grad_norm": 0.6664010286331177, + "learning_rate": 0.0001248536974833153, + "loss": 2.5741, + "step": 8459 + }, + { + "epoch": 0.6827536114922121, + "grad_norm": 0.7185953259468079, + "learning_rate": 0.00012483840563072592, + "loss": 2.5875, + "step": 8460 + }, + { + "epoch": 0.6828343152287951, + "grad_norm": 0.6553035378456116, + "learning_rate": 0.00012482311315909864, + "loss": 2.5321, + "step": 8461 + }, + { + "epoch": 0.6829150189653781, + "grad_norm": 0.6713398694992065, + "learning_rate": 0.00012480782006881442, + "loss": 2.6207, + "step": 8462 + }, + { + "epoch": 0.6829957227019611, + "grad_norm": 0.6733734607696533, + "learning_rate": 0.00012479252636025452, + "loss": 2.5746, + "step": 8463 + }, + { + "epoch": 0.6830764264385442, + "grad_norm": 0.7257994413375854, + "learning_rate": 0.00012477723203380004, + "loss": 2.5837, + "step": 8464 + }, + { + "epoch": 0.6831571301751271, + "grad_norm": 0.716242253780365, + "learning_rate": 0.00012476193708983214, + "loss": 2.5611, + "step": 8465 + }, + { + "epoch": 0.6832378339117101, + "grad_norm": 0.6797829866409302, + "learning_rate": 0.0001247466415287321, + "loss": 2.5763, + "step": 8466 + }, + { + "epoch": 0.6833185376482931, + "grad_norm": 0.679931640625, + "learning_rate": 0.000124731345350881, + "loss": 2.606, + "step": 8467 + }, + { + "epoch": 0.6833992413848761, + "grad_norm": 0.6767866611480713, + "learning_rate": 0.00012471604855666016, + "loss": 2.5682, + "step": 8468 + }, + { + "epoch": 0.6834799451214592, + "grad_norm": 0.7297048568725586, + "learning_rate": 0.00012470075114645078, + "loss": 2.5527, + "step": 8469 + }, + { + "epoch": 0.6835606488580421, + "grad_norm": 0.6882644295692444, + "learning_rate": 0.0001246854531206341, + "loss": 2.5712, + "step": 8470 + }, + { + "epoch": 0.6836413525946251, + "grad_norm": 0.7129159569740295, + "learning_rate": 0.00012467015447959143, + "loss": 2.5627, + "step": 8471 + }, + { + "epoch": 0.6837220563312081, + "grad_norm": 0.6671481728553772, + "learning_rate": 0.000124654855223704, + "loss": 2.6226, + "step": 8472 + }, + { + "epoch": 0.6838027600677912, + "grad_norm": 0.7096946835517883, + "learning_rate": 0.00012463955535335313, + "loss": 2.5373, + "step": 8473 + }, + { + "epoch": 0.6838834638043741, + "grad_norm": 0.6781395077705383, + "learning_rate": 0.00012462425486892012, + "loss": 2.5607, + "step": 8474 + }, + { + "epoch": 0.6839641675409571, + "grad_norm": 0.6777891516685486, + "learning_rate": 0.00012460895377078632, + "loss": 2.5991, + "step": 8475 + }, + { + "epoch": 0.6840448712775401, + "grad_norm": 0.7175275087356567, + "learning_rate": 0.00012459365205933306, + "loss": 2.6006, + "step": 8476 + }, + { + "epoch": 0.6841255750141232, + "grad_norm": 0.6832807660102844, + "learning_rate": 0.00012457834973494174, + "loss": 2.5757, + "step": 8477 + }, + { + "epoch": 0.6842062787507062, + "grad_norm": 0.7002938985824585, + "learning_rate": 0.00012456304679799366, + "loss": 2.554, + "step": 8478 + }, + { + "epoch": 0.6842869824872891, + "grad_norm": 0.7236241698265076, + "learning_rate": 0.00012454774324887027, + "loss": 2.6054, + "step": 8479 + }, + { + "epoch": 0.6843676862238721, + "grad_norm": 0.7327216267585754, + "learning_rate": 0.00012453243908795288, + "loss": 2.6101, + "step": 8480 + }, + { + "epoch": 0.6844483899604552, + "grad_norm": 0.7414156794548035, + "learning_rate": 0.00012451713431562306, + "loss": 2.5505, + "step": 8481 + }, + { + "epoch": 0.6845290936970382, + "grad_norm": 0.697795569896698, + "learning_rate": 0.00012450182893226214, + "loss": 2.539, + "step": 8482 + }, + { + "epoch": 0.6846097974336212, + "grad_norm": 0.7053593397140503, + "learning_rate": 0.00012448652293825158, + "loss": 2.6045, + "step": 8483 + }, + { + "epoch": 0.6846905011702041, + "grad_norm": 0.6710856556892395, + "learning_rate": 0.00012447121633397287, + "loss": 2.554, + "step": 8484 + }, + { + "epoch": 0.6847712049067872, + "grad_norm": 0.754454493522644, + "learning_rate": 0.0001244559091198075, + "loss": 2.5523, + "step": 8485 + }, + { + "epoch": 0.6848519086433702, + "grad_norm": 0.6468656659126282, + "learning_rate": 0.0001244406012961369, + "loss": 2.5931, + "step": 8486 + }, + { + "epoch": 0.6849326123799532, + "grad_norm": 0.7169063091278076, + "learning_rate": 0.00012442529286334266, + "loss": 2.5743, + "step": 8487 + }, + { + "epoch": 0.6850133161165362, + "grad_norm": 0.6737040877342224, + "learning_rate": 0.00012440998382180627, + "loss": 2.5734, + "step": 8488 + }, + { + "epoch": 0.6850940198531192, + "grad_norm": 0.7026428580284119, + "learning_rate": 0.0001243946741719093, + "loss": 2.4994, + "step": 8489 + }, + { + "epoch": 0.6851747235897022, + "grad_norm": 0.7378512024879456, + "learning_rate": 0.00012437936391403322, + "loss": 2.5611, + "step": 8490 + }, + { + "epoch": 0.6852554273262852, + "grad_norm": 0.7379863262176514, + "learning_rate": 0.0001243640530485597, + "loss": 2.538, + "step": 8491 + }, + { + "epoch": 0.6853361310628682, + "grad_norm": 0.68398118019104, + "learning_rate": 0.00012434874157587027, + "loss": 2.5593, + "step": 8492 + }, + { + "epoch": 0.6854168347994513, + "grad_norm": 0.6780444383621216, + "learning_rate": 0.0001243334294963466, + "loss": 2.5068, + "step": 8493 + }, + { + "epoch": 0.6854975385360342, + "grad_norm": 0.7425427436828613, + "learning_rate": 0.0001243181168103702, + "loss": 2.6607, + "step": 8494 + }, + { + "epoch": 0.6855782422726172, + "grad_norm": 0.7563300132751465, + "learning_rate": 0.0001243028035183228, + "loss": 2.5915, + "step": 8495 + }, + { + "epoch": 0.6856589460092002, + "grad_norm": 0.6746618151664734, + "learning_rate": 0.000124287489620586, + "loss": 2.5399, + "step": 8496 + }, + { + "epoch": 0.6857396497457833, + "grad_norm": 0.7100487947463989, + "learning_rate": 0.00012427217511754146, + "loss": 2.5927, + "step": 8497 + }, + { + "epoch": 0.6858203534823663, + "grad_norm": 0.6487080454826355, + "learning_rate": 0.00012425686000957088, + "loss": 2.5582, + "step": 8498 + }, + { + "epoch": 0.6859010572189492, + "grad_norm": 0.6577199697494507, + "learning_rate": 0.00012424154429705592, + "loss": 2.5589, + "step": 8499 + }, + { + "epoch": 0.6859817609555322, + "grad_norm": 0.6748726963996887, + "learning_rate": 0.00012422622798037832, + "loss": 2.5651, + "step": 8500 + }, + { + "epoch": 0.6860624646921153, + "grad_norm": 0.7159377336502075, + "learning_rate": 0.0001242109110599198, + "loss": 2.569, + "step": 8501 + }, + { + "epoch": 0.6861431684286983, + "grad_norm": 0.6772934198379517, + "learning_rate": 0.00012419559353606208, + "loss": 2.5533, + "step": 8502 + }, + { + "epoch": 0.6862238721652812, + "grad_norm": 0.6776062846183777, + "learning_rate": 0.00012418027540918693, + "loss": 2.5704, + "step": 8503 + }, + { + "epoch": 0.6863045759018642, + "grad_norm": 0.7009913921356201, + "learning_rate": 0.00012416495667967608, + "loss": 2.5928, + "step": 8504 + }, + { + "epoch": 0.6863852796384473, + "grad_norm": 0.607571005821228, + "learning_rate": 0.00012414963734791137, + "loss": 2.5459, + "step": 8505 + }, + { + "epoch": 0.6864659833750303, + "grad_norm": 0.6798292398452759, + "learning_rate": 0.00012413431741427458, + "loss": 2.6585, + "step": 8506 + }, + { + "epoch": 0.6865466871116133, + "grad_norm": 0.7892771363258362, + "learning_rate": 0.00012411899687914747, + "loss": 2.5781, + "step": 8507 + }, + { + "epoch": 0.6866273908481962, + "grad_norm": 0.6683816909790039, + "learning_rate": 0.00012410367574291199, + "loss": 2.5598, + "step": 8508 + }, + { + "epoch": 0.6867080945847793, + "grad_norm": 0.7591805458068848, + "learning_rate": 0.00012408835400594983, + "loss": 2.6478, + "step": 8509 + }, + { + "epoch": 0.6867887983213623, + "grad_norm": 0.6896353960037231, + "learning_rate": 0.00012407303166864293, + "loss": 2.5418, + "step": 8510 + }, + { + "epoch": 0.6868695020579453, + "grad_norm": 0.6657233834266663, + "learning_rate": 0.00012405770873137316, + "loss": 2.5753, + "step": 8511 + }, + { + "epoch": 0.6869502057945283, + "grad_norm": 0.6775455474853516, + "learning_rate": 0.00012404238519452237, + "loss": 2.4902, + "step": 8512 + }, + { + "epoch": 0.6870309095311113, + "grad_norm": 0.6572847962379456, + "learning_rate": 0.00012402706105847254, + "loss": 2.6189, + "step": 8513 + }, + { + "epoch": 0.6871116132676943, + "grad_norm": 0.7159940004348755, + "learning_rate": 0.00012401173632360557, + "loss": 2.5928, + "step": 8514 + }, + { + "epoch": 0.6871923170042773, + "grad_norm": 0.7178850173950195, + "learning_rate": 0.0001239964109903033, + "loss": 2.5342, + "step": 8515 + }, + { + "epoch": 0.6872730207408603, + "grad_norm": 0.6761649250984192, + "learning_rate": 0.00012398108505894774, + "loss": 2.5716, + "step": 8516 + }, + { + "epoch": 0.6873537244774433, + "grad_norm": 0.6831200122833252, + "learning_rate": 0.0001239657585299209, + "loss": 2.5506, + "step": 8517 + }, + { + "epoch": 0.6874344282140263, + "grad_norm": 0.7064316868782043, + "learning_rate": 0.00012395043140360468, + "loss": 2.541, + "step": 8518 + }, + { + "epoch": 0.6875151319506093, + "grad_norm": 0.7269963026046753, + "learning_rate": 0.00012393510368038113, + "loss": 2.541, + "step": 8519 + }, + { + "epoch": 0.6875958356871923, + "grad_norm": 0.6651471257209778, + "learning_rate": 0.00012391977536063218, + "loss": 2.5476, + "step": 8520 + }, + { + "epoch": 0.6876765394237753, + "grad_norm": 0.7649257779121399, + "learning_rate": 0.00012390444644473994, + "loss": 2.601, + "step": 8521 + }, + { + "epoch": 0.6877572431603584, + "grad_norm": 0.6637376546859741, + "learning_rate": 0.0001238891169330864, + "loss": 2.5582, + "step": 8522 + }, + { + "epoch": 0.6878379468969413, + "grad_norm": 0.6609189510345459, + "learning_rate": 0.0001238737868260536, + "loss": 2.5795, + "step": 8523 + }, + { + "epoch": 0.6879186506335243, + "grad_norm": 0.657494843006134, + "learning_rate": 0.00012385845612402363, + "loss": 2.6005, + "step": 8524 + }, + { + "epoch": 0.6879993543701073, + "grad_norm": 0.6780641674995422, + "learning_rate": 0.00012384312482737858, + "loss": 2.514, + "step": 8525 + }, + { + "epoch": 0.6880800581066904, + "grad_norm": 0.7310795187950134, + "learning_rate": 0.00012382779293650052, + "loss": 2.5707, + "step": 8526 + }, + { + "epoch": 0.6881607618432733, + "grad_norm": 0.6722557544708252, + "learning_rate": 0.0001238124604517716, + "loss": 2.5897, + "step": 8527 + }, + { + "epoch": 0.6882414655798563, + "grad_norm": 0.6502346992492676, + "learning_rate": 0.0001237971273735739, + "loss": 2.5554, + "step": 8528 + }, + { + "epoch": 0.6883221693164393, + "grad_norm": 0.6993897557258606, + "learning_rate": 0.0001237817937022896, + "loss": 2.6328, + "step": 8529 + }, + { + "epoch": 0.6884028730530224, + "grad_norm": 0.7069644331932068, + "learning_rate": 0.00012376645943830083, + "loss": 2.5957, + "step": 8530 + }, + { + "epoch": 0.6884835767896054, + "grad_norm": 0.7193333506584167, + "learning_rate": 0.00012375112458198973, + "loss": 2.6505, + "step": 8531 + }, + { + "epoch": 0.6885642805261883, + "grad_norm": 0.6821088194847107, + "learning_rate": 0.00012373578913373853, + "loss": 2.6129, + "step": 8532 + }, + { + "epoch": 0.6886449842627713, + "grad_norm": 0.6499428749084473, + "learning_rate": 0.00012372045309392947, + "loss": 2.6053, + "step": 8533 + }, + { + "epoch": 0.6887256879993544, + "grad_norm": 0.7469449639320374, + "learning_rate": 0.00012370511646294464, + "loss": 2.6423, + "step": 8534 + }, + { + "epoch": 0.6888063917359374, + "grad_norm": 0.7326325178146362, + "learning_rate": 0.00012368977924116637, + "loss": 2.5708, + "step": 8535 + }, + { + "epoch": 0.6888870954725204, + "grad_norm": 0.7459580302238464, + "learning_rate": 0.00012367444142897686, + "loss": 2.544, + "step": 8536 + }, + { + "epoch": 0.6889677992091033, + "grad_norm": 0.7198929786682129, + "learning_rate": 0.00012365910302675843, + "loss": 2.6295, + "step": 8537 + }, + { + "epoch": 0.6890485029456864, + "grad_norm": 0.8139802813529968, + "learning_rate": 0.0001236437640348933, + "loss": 2.549, + "step": 8538 + }, + { + "epoch": 0.6891292066822694, + "grad_norm": 0.6497162580490112, + "learning_rate": 0.00012362842445376372, + "loss": 2.5849, + "step": 8539 + }, + { + "epoch": 0.6892099104188524, + "grad_norm": 0.7378165125846863, + "learning_rate": 0.00012361308428375208, + "loss": 2.606, + "step": 8540 + }, + { + "epoch": 0.6892906141554354, + "grad_norm": 0.6807567477226257, + "learning_rate": 0.00012359774352524062, + "loss": 2.5892, + "step": 8541 + }, + { + "epoch": 0.6893713178920184, + "grad_norm": 0.6639370918273926, + "learning_rate": 0.0001235824021786117, + "loss": 2.5249, + "step": 8542 + }, + { + "epoch": 0.6894520216286014, + "grad_norm": 0.7140880823135376, + "learning_rate": 0.00012356706024424773, + "loss": 2.5877, + "step": 8543 + }, + { + "epoch": 0.6895327253651844, + "grad_norm": 0.7079257965087891, + "learning_rate": 0.00012355171772253097, + "loss": 2.6011, + "step": 8544 + }, + { + "epoch": 0.6896134291017674, + "grad_norm": 0.7150856852531433, + "learning_rate": 0.00012353637461384387, + "loss": 2.549, + "step": 8545 + }, + { + "epoch": 0.6896941328383505, + "grad_norm": 0.6896397471427917, + "learning_rate": 0.00012352103091856876, + "loss": 2.5452, + "step": 8546 + }, + { + "epoch": 0.6897748365749334, + "grad_norm": 0.696964681148529, + "learning_rate": 0.00012350568663708808, + "loss": 2.5075, + "step": 8547 + }, + { + "epoch": 0.6898555403115164, + "grad_norm": 0.6926069855690002, + "learning_rate": 0.00012349034176978427, + "loss": 2.5905, + "step": 8548 + }, + { + "epoch": 0.6899362440480994, + "grad_norm": 0.6949423551559448, + "learning_rate": 0.00012347499631703968, + "loss": 2.5284, + "step": 8549 + }, + { + "epoch": 0.6900169477846825, + "grad_norm": 0.6480536460876465, + "learning_rate": 0.0001234596502792369, + "loss": 2.5713, + "step": 8550 + }, + { + "epoch": 0.6900976515212655, + "grad_norm": 0.6990019679069519, + "learning_rate": 0.00012344430365675825, + "loss": 2.5826, + "step": 8551 + }, + { + "epoch": 0.6901783552578484, + "grad_norm": 0.7063903212547302, + "learning_rate": 0.00012342895644998627, + "loss": 2.5271, + "step": 8552 + }, + { + "epoch": 0.6902590589944314, + "grad_norm": 0.7037132978439331, + "learning_rate": 0.0001234136086593035, + "loss": 2.5855, + "step": 8553 + }, + { + "epoch": 0.6903397627310145, + "grad_norm": 0.679701030254364, + "learning_rate": 0.00012339826028509235, + "loss": 2.5577, + "step": 8554 + }, + { + "epoch": 0.6904204664675975, + "grad_norm": 0.7088965773582458, + "learning_rate": 0.0001233829113277354, + "loss": 2.5767, + "step": 8555 + }, + { + "epoch": 0.6905011702041804, + "grad_norm": 0.7115551829338074, + "learning_rate": 0.00012336756178761517, + "loss": 2.5651, + "step": 8556 + }, + { + "epoch": 0.6905818739407634, + "grad_norm": 0.6778836250305176, + "learning_rate": 0.00012335221166511425, + "loss": 2.6388, + "step": 8557 + }, + { + "epoch": 0.6906625776773465, + "grad_norm": 0.6358879804611206, + "learning_rate": 0.00012333686096061515, + "loss": 2.5493, + "step": 8558 + }, + { + "epoch": 0.6907432814139295, + "grad_norm": 0.688197135925293, + "learning_rate": 0.00012332150967450046, + "loss": 2.5707, + "step": 8559 + }, + { + "epoch": 0.6908239851505125, + "grad_norm": 0.6931524872779846, + "learning_rate": 0.0001233061578071528, + "loss": 2.5561, + "step": 8560 + }, + { + "epoch": 0.6909046888870954, + "grad_norm": 0.6684975624084473, + "learning_rate": 0.00012329080535895478, + "loss": 2.6442, + "step": 8561 + }, + { + "epoch": 0.6909853926236785, + "grad_norm": 0.6865811347961426, + "learning_rate": 0.00012327545233028898, + "loss": 2.564, + "step": 8562 + }, + { + "epoch": 0.6910660963602615, + "grad_norm": 0.6999006867408752, + "learning_rate": 0.0001232600987215381, + "loss": 2.5607, + "step": 8563 + }, + { + "epoch": 0.6911468000968445, + "grad_norm": 0.6734526753425598, + "learning_rate": 0.0001232447445330847, + "loss": 2.5261, + "step": 8564 + }, + { + "epoch": 0.6912275038334275, + "grad_norm": 0.7447343468666077, + "learning_rate": 0.00012322938976531153, + "loss": 2.5359, + "step": 8565 + }, + { + "epoch": 0.6913082075700105, + "grad_norm": 0.6498517394065857, + "learning_rate": 0.00012321403441860126, + "loss": 2.5345, + "step": 8566 + }, + { + "epoch": 0.6913889113065935, + "grad_norm": 0.692933976650238, + "learning_rate": 0.00012319867849333658, + "loss": 2.6293, + "step": 8567 + }, + { + "epoch": 0.6914696150431765, + "grad_norm": 0.728430449962616, + "learning_rate": 0.00012318332198990015, + "loss": 2.618, + "step": 8568 + }, + { + "epoch": 0.6915503187797595, + "grad_norm": 0.7029061913490295, + "learning_rate": 0.00012316796490867478, + "loss": 2.6151, + "step": 8569 + }, + { + "epoch": 0.6916310225163425, + "grad_norm": 0.6692330241203308, + "learning_rate": 0.00012315260725004313, + "loss": 2.5511, + "step": 8570 + }, + { + "epoch": 0.6917117262529255, + "grad_norm": 0.6811983585357666, + "learning_rate": 0.000123137249014388, + "loss": 2.6337, + "step": 8571 + }, + { + "epoch": 0.6917924299895085, + "grad_norm": 0.7387441992759705, + "learning_rate": 0.00012312189020209212, + "loss": 2.5679, + "step": 8572 + }, + { + "epoch": 0.6918731337260915, + "grad_norm": 0.7180185914039612, + "learning_rate": 0.0001231065308135383, + "loss": 2.639, + "step": 8573 + }, + { + "epoch": 0.6919538374626745, + "grad_norm": 0.6997829079627991, + "learning_rate": 0.00012309117084910936, + "loss": 2.5392, + "step": 8574 + }, + { + "epoch": 0.6920345411992576, + "grad_norm": 0.7004552483558655, + "learning_rate": 0.00012307581030918807, + "loss": 2.6033, + "step": 8575 + }, + { + "epoch": 0.6921152449358405, + "grad_norm": 0.7183418273925781, + "learning_rate": 0.00012306044919415724, + "loss": 2.6302, + "step": 8576 + }, + { + "epoch": 0.6921959486724235, + "grad_norm": 0.6645712852478027, + "learning_rate": 0.00012304508750439976, + "loss": 2.5401, + "step": 8577 + }, + { + "epoch": 0.6922766524090065, + "grad_norm": 0.6455898284912109, + "learning_rate": 0.00012302972524029848, + "loss": 2.5084, + "step": 8578 + }, + { + "epoch": 0.6923573561455896, + "grad_norm": 0.6933849453926086, + "learning_rate": 0.00012301436240223622, + "loss": 2.5734, + "step": 8579 + }, + { + "epoch": 0.6924380598821726, + "grad_norm": 0.7967655658721924, + "learning_rate": 0.00012299899899059587, + "loss": 2.5721, + "step": 8580 + }, + { + "epoch": 0.6925187636187555, + "grad_norm": 0.706730306148529, + "learning_rate": 0.0001229836350057604, + "loss": 2.6216, + "step": 8581 + }, + { + "epoch": 0.6925994673553385, + "grad_norm": 0.7021105885505676, + "learning_rate": 0.0001229682704481126, + "loss": 2.4877, + "step": 8582 + }, + { + "epoch": 0.6926801710919216, + "grad_norm": 0.7197253108024597, + "learning_rate": 0.00012295290531803553, + "loss": 2.6124, + "step": 8583 + }, + { + "epoch": 0.6927608748285046, + "grad_norm": 0.7559605836868286, + "learning_rate": 0.00012293753961591198, + "loss": 2.6391, + "step": 8584 + }, + { + "epoch": 0.6928415785650875, + "grad_norm": 0.7074676752090454, + "learning_rate": 0.00012292217334212505, + "loss": 2.5949, + "step": 8585 + }, + { + "epoch": 0.6929222823016705, + "grad_norm": 0.6843528747558594, + "learning_rate": 0.00012290680649705763, + "loss": 2.4981, + "step": 8586 + }, + { + "epoch": 0.6930029860382536, + "grad_norm": 0.6853117942810059, + "learning_rate": 0.00012289143908109266, + "loss": 2.6352, + "step": 8587 + }, + { + "epoch": 0.6930836897748366, + "grad_norm": 0.6545630097389221, + "learning_rate": 0.00012287607109461325, + "loss": 2.5344, + "step": 8588 + }, + { + "epoch": 0.6931643935114196, + "grad_norm": 0.7377945184707642, + "learning_rate": 0.00012286070253800233, + "loss": 2.5895, + "step": 8589 + }, + { + "epoch": 0.6932450972480025, + "grad_norm": 0.6919971108436584, + "learning_rate": 0.00012284533341164295, + "loss": 2.5825, + "step": 8590 + }, + { + "epoch": 0.6933258009845856, + "grad_norm": 0.6911910176277161, + "learning_rate": 0.00012282996371591816, + "loss": 2.6008, + "step": 8591 + }, + { + "epoch": 0.6934065047211686, + "grad_norm": 0.7486373782157898, + "learning_rate": 0.00012281459345121095, + "loss": 2.6056, + "step": 8592 + }, + { + "epoch": 0.6934872084577516, + "grad_norm": 0.6829040050506592, + "learning_rate": 0.00012279922261790443, + "loss": 2.5161, + "step": 8593 + }, + { + "epoch": 0.6935679121943346, + "grad_norm": 0.7410104870796204, + "learning_rate": 0.00012278385121638173, + "loss": 2.6114, + "step": 8594 + }, + { + "epoch": 0.6936486159309176, + "grad_norm": 0.7355940937995911, + "learning_rate": 0.00012276847924702587, + "loss": 2.6371, + "step": 8595 + }, + { + "epoch": 0.6937293196675006, + "grad_norm": 0.650641679763794, + "learning_rate": 0.00012275310671022003, + "loss": 2.5568, + "step": 8596 + }, + { + "epoch": 0.6938100234040836, + "grad_norm": 0.661573052406311, + "learning_rate": 0.00012273773360634726, + "loss": 2.5828, + "step": 8597 + }, + { + "epoch": 0.6938907271406666, + "grad_norm": 0.6848435401916504, + "learning_rate": 0.00012272235993579072, + "loss": 2.5226, + "step": 8598 + }, + { + "epoch": 0.6939714308772497, + "grad_norm": 0.7015430927276611, + "learning_rate": 0.0001227069856989336, + "loss": 2.6156, + "step": 8599 + }, + { + "epoch": 0.6940521346138326, + "grad_norm": 0.7058628797531128, + "learning_rate": 0.000122691610896159, + "loss": 2.6007, + "step": 8600 + }, + { + "epoch": 0.6941328383504156, + "grad_norm": 0.6589432954788208, + "learning_rate": 0.0001226762355278502, + "loss": 2.5551, + "step": 8601 + }, + { + "epoch": 0.6942135420869986, + "grad_norm": 0.6875284910202026, + "learning_rate": 0.0001226608595943903, + "loss": 2.5537, + "step": 8602 + }, + { + "epoch": 0.6942942458235817, + "grad_norm": 0.7178356051445007, + "learning_rate": 0.00012264548309616252, + "loss": 2.655, + "step": 8603 + }, + { + "epoch": 0.6943749495601647, + "grad_norm": 0.7327077388763428, + "learning_rate": 0.00012263010603355017, + "loss": 2.5574, + "step": 8604 + }, + { + "epoch": 0.6944556532967476, + "grad_norm": 0.6318337917327881, + "learning_rate": 0.0001226147284069364, + "loss": 2.577, + "step": 8605 + }, + { + "epoch": 0.6945363570333306, + "grad_norm": 0.674872875213623, + "learning_rate": 0.00012259935021670444, + "loss": 2.6225, + "step": 8606 + }, + { + "epoch": 0.6946170607699137, + "grad_norm": 0.6554198861122131, + "learning_rate": 0.0001225839714632376, + "loss": 2.5951, + "step": 8607 + }, + { + "epoch": 0.6946977645064967, + "grad_norm": 0.7086453437805176, + "learning_rate": 0.00012256859214691918, + "loss": 2.622, + "step": 8608 + }, + { + "epoch": 0.6947784682430796, + "grad_norm": 0.6609488129615784, + "learning_rate": 0.00012255321226813245, + "loss": 2.5623, + "step": 8609 + }, + { + "epoch": 0.6948591719796626, + "grad_norm": 0.7504609823226929, + "learning_rate": 0.00012253783182726075, + "loss": 2.5264, + "step": 8610 + }, + { + "epoch": 0.6949398757162457, + "grad_norm": 0.6702934503555298, + "learning_rate": 0.00012252245082468733, + "loss": 2.5877, + "step": 8611 + }, + { + "epoch": 0.6950205794528287, + "grad_norm": 0.7116326689720154, + "learning_rate": 0.00012250706926079553, + "loss": 2.5629, + "step": 8612 + }, + { + "epoch": 0.6951012831894117, + "grad_norm": 0.7495368719100952, + "learning_rate": 0.00012249168713596875, + "loss": 2.5731, + "step": 8613 + }, + { + "epoch": 0.6951819869259946, + "grad_norm": 0.7434844970703125, + "learning_rate": 0.0001224763044505904, + "loss": 2.6008, + "step": 8614 + }, + { + "epoch": 0.6952626906625777, + "grad_norm": 0.719667375087738, + "learning_rate": 0.00012246092120504371, + "loss": 2.6051, + "step": 8615 + }, + { + "epoch": 0.6953433943991607, + "grad_norm": 0.7189086079597473, + "learning_rate": 0.00012244553739971216, + "loss": 2.5662, + "step": 8616 + }, + { + "epoch": 0.6954240981357437, + "grad_norm": 0.7222673892974854, + "learning_rate": 0.00012243015303497917, + "loss": 2.609, + "step": 8617 + }, + { + "epoch": 0.6955048018723267, + "grad_norm": 0.7323142290115356, + "learning_rate": 0.00012241476811122813, + "loss": 2.5458, + "step": 8618 + }, + { + "epoch": 0.6955855056089096, + "grad_norm": 0.7374032735824585, + "learning_rate": 0.00012239938262884246, + "loss": 2.6147, + "step": 8619 + }, + { + "epoch": 0.6956662093454927, + "grad_norm": 0.6707843542098999, + "learning_rate": 0.00012238399658820562, + "loss": 2.6462, + "step": 8620 + }, + { + "epoch": 0.6957469130820757, + "grad_norm": 0.7603243589401245, + "learning_rate": 0.0001223686099897011, + "loss": 2.6295, + "step": 8621 + }, + { + "epoch": 0.6958276168186587, + "grad_norm": 0.6966906785964966, + "learning_rate": 0.00012235322283371232, + "loss": 2.545, + "step": 8622 + }, + { + "epoch": 0.6959083205552417, + "grad_norm": 0.6757891774177551, + "learning_rate": 0.0001223378351206228, + "loss": 2.5548, + "step": 8623 + }, + { + "epoch": 0.6959890242918247, + "grad_norm": 0.6901456713676453, + "learning_rate": 0.00012232244685081605, + "loss": 2.5734, + "step": 8624 + }, + { + "epoch": 0.6960697280284077, + "grad_norm": 0.6942903995513916, + "learning_rate": 0.00012230705802467558, + "loss": 2.5495, + "step": 8625 + }, + { + "epoch": 0.6961504317649907, + "grad_norm": 0.6774815320968628, + "learning_rate": 0.0001222916686425849, + "loss": 2.5076, + "step": 8626 + }, + { + "epoch": 0.6962311355015737, + "grad_norm": 0.8037571310997009, + "learning_rate": 0.00012227627870492754, + "loss": 2.6737, + "step": 8627 + }, + { + "epoch": 0.6963118392381568, + "grad_norm": 0.7027560472488403, + "learning_rate": 0.0001222608882120871, + "loss": 2.5401, + "step": 8628 + }, + { + "epoch": 0.6963925429747397, + "grad_norm": 0.6651299595832825, + "learning_rate": 0.00012224549716444714, + "loss": 2.5835, + "step": 8629 + }, + { + "epoch": 0.6964732467113227, + "grad_norm": 0.7082433104515076, + "learning_rate": 0.00012223010556239124, + "loss": 2.5622, + "step": 8630 + }, + { + "epoch": 0.6965539504479057, + "grad_norm": 0.7993464469909668, + "learning_rate": 0.00012221471340630305, + "loss": 2.655, + "step": 8631 + }, + { + "epoch": 0.6966346541844888, + "grad_norm": 0.7375298142433167, + "learning_rate": 0.00012219932069656606, + "loss": 2.598, + "step": 8632 + }, + { + "epoch": 0.6967153579210718, + "grad_norm": 0.6915456652641296, + "learning_rate": 0.00012218392743356397, + "loss": 2.5649, + "step": 8633 + }, + { + "epoch": 0.6967960616576547, + "grad_norm": 0.679256021976471, + "learning_rate": 0.00012216853361768045, + "loss": 2.545, + "step": 8634 + }, + { + "epoch": 0.6968767653942377, + "grad_norm": 0.7234694361686707, + "learning_rate": 0.0001221531392492991, + "loss": 2.5863, + "step": 8635 + }, + { + "epoch": 0.6969574691308208, + "grad_norm": 0.7053319811820984, + "learning_rate": 0.00012213774432880364, + "loss": 2.5829, + "step": 8636 + }, + { + "epoch": 0.6970381728674038, + "grad_norm": 0.7584449648857117, + "learning_rate": 0.00012212234885657772, + "loss": 2.5855, + "step": 8637 + }, + { + "epoch": 0.6971188766039867, + "grad_norm": 0.7098579406738281, + "learning_rate": 0.00012210695283300501, + "loss": 2.6057, + "step": 8638 + }, + { + "epoch": 0.6971995803405697, + "grad_norm": 0.7350205779075623, + "learning_rate": 0.00012209155625846928, + "loss": 2.546, + "step": 8639 + }, + { + "epoch": 0.6972802840771528, + "grad_norm": 0.6842331290245056, + "learning_rate": 0.0001220761591333542, + "loss": 2.5602, + "step": 8640 + }, + { + "epoch": 0.6973609878137358, + "grad_norm": 0.6731252074241638, + "learning_rate": 0.00012206076145804354, + "loss": 2.4676, + "step": 8641 + }, + { + "epoch": 0.6974416915503188, + "grad_norm": 0.7271167635917664, + "learning_rate": 0.00012204536323292104, + "loss": 2.5605, + "step": 8642 + }, + { + "epoch": 0.6975223952869017, + "grad_norm": 0.6860780715942383, + "learning_rate": 0.00012202996445837043, + "loss": 2.5041, + "step": 8643 + }, + { + "epoch": 0.6976030990234848, + "grad_norm": 0.7134578824043274, + "learning_rate": 0.00012201456513477554, + "loss": 2.614, + "step": 8644 + }, + { + "epoch": 0.6976838027600678, + "grad_norm": 0.6995248198509216, + "learning_rate": 0.00012199916526252014, + "loss": 2.5087, + "step": 8645 + }, + { + "epoch": 0.6977645064966508, + "grad_norm": 0.7280197143554688, + "learning_rate": 0.00012198376484198803, + "loss": 2.5723, + "step": 8646 + }, + { + "epoch": 0.6978452102332338, + "grad_norm": 0.6898967623710632, + "learning_rate": 0.00012196836387356306, + "loss": 2.6073, + "step": 8647 + }, + { + "epoch": 0.6979259139698168, + "grad_norm": 0.6670758128166199, + "learning_rate": 0.00012195296235762901, + "loss": 2.5276, + "step": 8648 + }, + { + "epoch": 0.6980066177063998, + "grad_norm": 0.6862780451774597, + "learning_rate": 0.00012193756029456973, + "loss": 2.5363, + "step": 8649 + }, + { + "epoch": 0.6980873214429828, + "grad_norm": 0.6568876504898071, + "learning_rate": 0.00012192215768476916, + "loss": 2.5828, + "step": 8650 + }, + { + "epoch": 0.6981680251795658, + "grad_norm": 0.7237746119499207, + "learning_rate": 0.00012190675452861107, + "loss": 2.6076, + "step": 8651 + }, + { + "epoch": 0.6982487289161489, + "grad_norm": 0.6831536293029785, + "learning_rate": 0.00012189135082647943, + "loss": 2.5199, + "step": 8652 + }, + { + "epoch": 0.6983294326527318, + "grad_norm": 0.6767029166221619, + "learning_rate": 0.00012187594657875805, + "loss": 2.5859, + "step": 8653 + }, + { + "epoch": 0.6984101363893148, + "grad_norm": 0.6977167129516602, + "learning_rate": 0.00012186054178583092, + "loss": 2.5831, + "step": 8654 + }, + { + "epoch": 0.6984908401258978, + "grad_norm": 0.6369525194168091, + "learning_rate": 0.00012184513644808197, + "loss": 2.5839, + "step": 8655 + }, + { + "epoch": 0.6985715438624809, + "grad_norm": 0.6814634203910828, + "learning_rate": 0.00012182973056589508, + "loss": 2.5493, + "step": 8656 + }, + { + "epoch": 0.6986522475990639, + "grad_norm": 0.6895000338554382, + "learning_rate": 0.00012181432413965428, + "loss": 2.5616, + "step": 8657 + }, + { + "epoch": 0.6987329513356468, + "grad_norm": 0.6689717769622803, + "learning_rate": 0.00012179891716974345, + "loss": 2.5481, + "step": 8658 + }, + { + "epoch": 0.6988136550722298, + "grad_norm": 0.6945160031318665, + "learning_rate": 0.00012178350965654666, + "loss": 2.5781, + "step": 8659 + }, + { + "epoch": 0.6988943588088129, + "grad_norm": 0.7226110696792603, + "learning_rate": 0.00012176810160044785, + "loss": 2.5767, + "step": 8660 + }, + { + "epoch": 0.6989750625453959, + "grad_norm": 0.6810569167137146, + "learning_rate": 0.00012175269300183105, + "loss": 2.5184, + "step": 8661 + }, + { + "epoch": 0.6990557662819789, + "grad_norm": 0.727281928062439, + "learning_rate": 0.0001217372838610803, + "loss": 2.5972, + "step": 8662 + }, + { + "epoch": 0.6991364700185618, + "grad_norm": 0.7111573219299316, + "learning_rate": 0.00012172187417857959, + "loss": 2.6445, + "step": 8663 + }, + { + "epoch": 0.6992171737551449, + "grad_norm": 0.6808965802192688, + "learning_rate": 0.00012170646395471296, + "loss": 2.5191, + "step": 8664 + }, + { + "epoch": 0.6992978774917279, + "grad_norm": 0.7063688635826111, + "learning_rate": 0.00012169105318986455, + "loss": 2.6021, + "step": 8665 + }, + { + "epoch": 0.6993785812283109, + "grad_norm": 0.6522886753082275, + "learning_rate": 0.0001216756418844184, + "loss": 2.5697, + "step": 8666 + }, + { + "epoch": 0.6994592849648938, + "grad_norm": 0.6706095337867737, + "learning_rate": 0.00012166023003875859, + "loss": 2.5706, + "step": 8667 + }, + { + "epoch": 0.6995399887014769, + "grad_norm": 0.6744416356086731, + "learning_rate": 0.00012164481765326923, + "loss": 2.5713, + "step": 8668 + }, + { + "epoch": 0.6996206924380599, + "grad_norm": 0.7385411858558655, + "learning_rate": 0.0001216294047283344, + "loss": 2.5543, + "step": 8669 + }, + { + "epoch": 0.6997013961746429, + "grad_norm": 0.7286678552627563, + "learning_rate": 0.0001216139912643383, + "loss": 2.588, + "step": 8670 + }, + { + "epoch": 0.6997820999112259, + "grad_norm": 0.7065937519073486, + "learning_rate": 0.00012159857726166503, + "loss": 2.5475, + "step": 8671 + }, + { + "epoch": 0.6998628036478088, + "grad_norm": 0.6609788537025452, + "learning_rate": 0.00012158316272069874, + "loss": 2.5664, + "step": 8672 + }, + { + "epoch": 0.6999435073843919, + "grad_norm": 0.7360579371452332, + "learning_rate": 0.00012156774764182364, + "loss": 2.5822, + "step": 8673 + }, + { + "epoch": 0.7000242111209749, + "grad_norm": 0.6265058517456055, + "learning_rate": 0.00012155233202542384, + "loss": 2.5849, + "step": 8674 + }, + { + "epoch": 0.7001049148575579, + "grad_norm": 0.646976888179779, + "learning_rate": 0.00012153691587188363, + "loss": 2.5839, + "step": 8675 + }, + { + "epoch": 0.7001856185941409, + "grad_norm": 0.6634985208511353, + "learning_rate": 0.0001215214991815872, + "loss": 2.5434, + "step": 8676 + }, + { + "epoch": 0.700266322330724, + "grad_norm": 0.6757560968399048, + "learning_rate": 0.00012150608195491871, + "loss": 2.6186, + "step": 8677 + }, + { + "epoch": 0.7003470260673069, + "grad_norm": 0.7077112197875977, + "learning_rate": 0.00012149066419226247, + "loss": 2.5757, + "step": 8678 + }, + { + "epoch": 0.7004277298038899, + "grad_norm": 0.698226273059845, + "learning_rate": 0.00012147524589400268, + "loss": 2.5307, + "step": 8679 + }, + { + "epoch": 0.7005084335404729, + "grad_norm": 0.6782405376434326, + "learning_rate": 0.00012145982706052361, + "loss": 2.5582, + "step": 8680 + }, + { + "epoch": 0.700589137277056, + "grad_norm": 0.6832882165908813, + "learning_rate": 0.0001214444076922096, + "loss": 2.574, + "step": 8681 + }, + { + "epoch": 0.7006698410136389, + "grad_norm": 0.7182612419128418, + "learning_rate": 0.00012142898778944485, + "loss": 2.6457, + "step": 8682 + }, + { + "epoch": 0.7007505447502219, + "grad_norm": 0.7043644785881042, + "learning_rate": 0.00012141356735261373, + "loss": 2.5244, + "step": 8683 + }, + { + "epoch": 0.7008312484868049, + "grad_norm": 0.6942669749259949, + "learning_rate": 0.00012139814638210054, + "loss": 2.5507, + "step": 8684 + }, + { + "epoch": 0.700911952223388, + "grad_norm": 0.8412066102027893, + "learning_rate": 0.00012138272487828959, + "loss": 2.6025, + "step": 8685 + }, + { + "epoch": 0.700992655959971, + "grad_norm": 0.6906788945198059, + "learning_rate": 0.00012136730284156525, + "loss": 2.5259, + "step": 8686 + }, + { + "epoch": 0.7010733596965539, + "grad_norm": 0.7258631587028503, + "learning_rate": 0.00012135188027231188, + "loss": 2.6311, + "step": 8687 + }, + { + "epoch": 0.7011540634331369, + "grad_norm": 0.6294744610786438, + "learning_rate": 0.00012133645717091382, + "loss": 2.5969, + "step": 8688 + }, + { + "epoch": 0.70123476716972, + "grad_norm": 0.6994131207466125, + "learning_rate": 0.00012132103353775548, + "loss": 2.5954, + "step": 8689 + }, + { + "epoch": 0.701315470906303, + "grad_norm": 0.671441912651062, + "learning_rate": 0.00012130560937322124, + "loss": 2.5628, + "step": 8690 + }, + { + "epoch": 0.701396174642886, + "grad_norm": 0.6915482878684998, + "learning_rate": 0.00012129018467769555, + "loss": 2.5173, + "step": 8691 + }, + { + "epoch": 0.7014768783794689, + "grad_norm": 0.6810318231582642, + "learning_rate": 0.00012127475945156279, + "loss": 2.6186, + "step": 8692 + }, + { + "epoch": 0.701557582116052, + "grad_norm": 0.7931910157203674, + "learning_rate": 0.00012125933369520741, + "loss": 2.6243, + "step": 8693 + }, + { + "epoch": 0.701638285852635, + "grad_norm": 0.6843162178993225, + "learning_rate": 0.00012124390740901386, + "loss": 2.6072, + "step": 8694 + }, + { + "epoch": 0.701718989589218, + "grad_norm": 0.672115683555603, + "learning_rate": 0.0001212284805933666, + "loss": 2.6027, + "step": 8695 + }, + { + "epoch": 0.7017996933258009, + "grad_norm": 0.65242600440979, + "learning_rate": 0.00012121305324865014, + "loss": 2.5128, + "step": 8696 + }, + { + "epoch": 0.701880397062384, + "grad_norm": 0.7253173589706421, + "learning_rate": 0.00012119762537524893, + "loss": 2.5776, + "step": 8697 + }, + { + "epoch": 0.701961100798967, + "grad_norm": 0.6536431312561035, + "learning_rate": 0.00012118219697354745, + "loss": 2.5656, + "step": 8698 + }, + { + "epoch": 0.70204180453555, + "grad_norm": 0.7121500372886658, + "learning_rate": 0.00012116676804393028, + "loss": 2.5878, + "step": 8699 + }, + { + "epoch": 0.702122508272133, + "grad_norm": 0.676449716091156, + "learning_rate": 0.00012115133858678191, + "loss": 2.6624, + "step": 8700 + }, + { + "epoch": 0.702203212008716, + "grad_norm": 0.7230382561683655, + "learning_rate": 0.0001211359086024869, + "loss": 2.5461, + "step": 8701 + }, + { + "epoch": 0.702283915745299, + "grad_norm": 0.6679937839508057, + "learning_rate": 0.00012112047809142979, + "loss": 2.5568, + "step": 8702 + }, + { + "epoch": 0.702364619481882, + "grad_norm": 0.6627704501152039, + "learning_rate": 0.0001211050470539952, + "loss": 2.4819, + "step": 8703 + }, + { + "epoch": 0.702445323218465, + "grad_norm": 0.6680646538734436, + "learning_rate": 0.0001210896154905676, + "loss": 2.5722, + "step": 8704 + }, + { + "epoch": 0.7025260269550481, + "grad_norm": 0.7406336665153503, + "learning_rate": 0.00012107418340153167, + "loss": 2.5722, + "step": 8705 + }, + { + "epoch": 0.702606730691631, + "grad_norm": 0.6634557247161865, + "learning_rate": 0.00012105875078727203, + "loss": 2.5747, + "step": 8706 + }, + { + "epoch": 0.702687434428214, + "grad_norm": 0.6521568894386292, + "learning_rate": 0.00012104331764817325, + "loss": 2.555, + "step": 8707 + }, + { + "epoch": 0.702768138164797, + "grad_norm": 0.677606463432312, + "learning_rate": 0.00012102788398461999, + "loss": 2.5544, + "step": 8708 + }, + { + "epoch": 0.7028488419013801, + "grad_norm": 0.6593700051307678, + "learning_rate": 0.0001210124497969969, + "loss": 2.5252, + "step": 8709 + }, + { + "epoch": 0.7029295456379631, + "grad_norm": 0.686903715133667, + "learning_rate": 0.00012099701508568863, + "loss": 2.6513, + "step": 8710 + }, + { + "epoch": 0.703010249374546, + "grad_norm": 0.6395620107650757, + "learning_rate": 0.00012098157985107987, + "loss": 2.5169, + "step": 8711 + }, + { + "epoch": 0.703090953111129, + "grad_norm": 0.7387555837631226, + "learning_rate": 0.00012096614409355526, + "loss": 2.5741, + "step": 8712 + }, + { + "epoch": 0.7031716568477121, + "grad_norm": 0.665900707244873, + "learning_rate": 0.00012095070781349957, + "loss": 2.5068, + "step": 8713 + }, + { + "epoch": 0.7032523605842951, + "grad_norm": 0.6983458399772644, + "learning_rate": 0.00012093527101129745, + "loss": 2.5028, + "step": 8714 + }, + { + "epoch": 0.703333064320878, + "grad_norm": 0.6250826120376587, + "learning_rate": 0.00012091983368733366, + "loss": 2.5765, + "step": 8715 + }, + { + "epoch": 0.703413768057461, + "grad_norm": 0.7031501531600952, + "learning_rate": 0.00012090439584199294, + "loss": 2.5885, + "step": 8716 + }, + { + "epoch": 0.7034944717940441, + "grad_norm": 0.7140926122665405, + "learning_rate": 0.00012088895747566002, + "loss": 2.6278, + "step": 8717 + }, + { + "epoch": 0.7035751755306271, + "grad_norm": 0.6753602027893066, + "learning_rate": 0.00012087351858871969, + "loss": 2.5664, + "step": 8718 + }, + { + "epoch": 0.7036558792672101, + "grad_norm": 0.7150039076805115, + "learning_rate": 0.0001208580791815567, + "loss": 2.6739, + "step": 8719 + }, + { + "epoch": 0.703736583003793, + "grad_norm": 0.7120389342308044, + "learning_rate": 0.00012084263925455583, + "loss": 2.565, + "step": 8720 + }, + { + "epoch": 0.703817286740376, + "grad_norm": 0.7775784134864807, + "learning_rate": 0.00012082719880810194, + "loss": 2.5861, + "step": 8721 + }, + { + "epoch": 0.7038979904769591, + "grad_norm": 0.6704322695732117, + "learning_rate": 0.0001208117578425798, + "loss": 2.5957, + "step": 8722 + }, + { + "epoch": 0.7039786942135421, + "grad_norm": 0.6761276721954346, + "learning_rate": 0.00012079631635837426, + "loss": 2.5472, + "step": 8723 + }, + { + "epoch": 0.7040593979501251, + "grad_norm": 0.7639868855476379, + "learning_rate": 0.00012078087435587016, + "loss": 2.6053, + "step": 8724 + }, + { + "epoch": 0.704140101686708, + "grad_norm": 0.7490074038505554, + "learning_rate": 0.0001207654318354523, + "loss": 2.5517, + "step": 8725 + }, + { + "epoch": 0.7042208054232911, + "grad_norm": 0.7068852782249451, + "learning_rate": 0.00012074998879750566, + "loss": 2.5357, + "step": 8726 + }, + { + "epoch": 0.7043015091598741, + "grad_norm": 0.7273775935173035, + "learning_rate": 0.00012073454524241503, + "loss": 2.6028, + "step": 8727 + }, + { + "epoch": 0.7043822128964571, + "grad_norm": 0.7146363258361816, + "learning_rate": 0.00012071910117056533, + "loss": 2.5982, + "step": 8728 + }, + { + "epoch": 0.7044629166330401, + "grad_norm": 0.7631390690803528, + "learning_rate": 0.00012070365658234149, + "loss": 2.6021, + "step": 8729 + }, + { + "epoch": 0.7045436203696231, + "grad_norm": 0.7065283060073853, + "learning_rate": 0.00012068821147812839, + "loss": 2.5538, + "step": 8730 + }, + { + "epoch": 0.7046243241062061, + "grad_norm": 0.7914319634437561, + "learning_rate": 0.00012067276585831097, + "loss": 2.5617, + "step": 8731 + }, + { + "epoch": 0.7047050278427891, + "grad_norm": 0.7036565542221069, + "learning_rate": 0.0001206573197232742, + "loss": 2.5354, + "step": 8732 + }, + { + "epoch": 0.7047857315793721, + "grad_norm": 0.657116711139679, + "learning_rate": 0.00012064187307340303, + "loss": 2.5084, + "step": 8733 + }, + { + "epoch": 0.7048664353159552, + "grad_norm": 0.7246817946434021, + "learning_rate": 0.00012062642590908242, + "loss": 2.5737, + "step": 8734 + }, + { + "epoch": 0.7049471390525381, + "grad_norm": 0.6895857453346252, + "learning_rate": 0.00012061097823069736, + "loss": 2.5792, + "step": 8735 + }, + { + "epoch": 0.7050278427891211, + "grad_norm": 0.7654988169670105, + "learning_rate": 0.00012059553003863282, + "loss": 2.5302, + "step": 8736 + }, + { + "epoch": 0.7051085465257041, + "grad_norm": 0.7611668109893799, + "learning_rate": 0.00012058008133327387, + "loss": 2.6073, + "step": 8737 + }, + { + "epoch": 0.7051892502622872, + "grad_norm": 0.728729784488678, + "learning_rate": 0.00012056463211500546, + "loss": 2.5714, + "step": 8738 + }, + { + "epoch": 0.7052699539988702, + "grad_norm": 0.7251634001731873, + "learning_rate": 0.00012054918238421271, + "loss": 2.627, + "step": 8739 + }, + { + "epoch": 0.7053506577354531, + "grad_norm": 0.827745795249939, + "learning_rate": 0.00012053373214128056, + "loss": 2.6303, + "step": 8740 + }, + { + "epoch": 0.7054313614720361, + "grad_norm": 0.6837510466575623, + "learning_rate": 0.00012051828138659416, + "loss": 2.5837, + "step": 8741 + }, + { + "epoch": 0.7055120652086192, + "grad_norm": 0.6763553619384766, + "learning_rate": 0.00012050283012053856, + "loss": 2.575, + "step": 8742 + }, + { + "epoch": 0.7055927689452022, + "grad_norm": 0.6779605150222778, + "learning_rate": 0.00012048737834349886, + "loss": 2.588, + "step": 8743 + }, + { + "epoch": 0.7056734726817852, + "grad_norm": 0.7207251191139221, + "learning_rate": 0.00012047192605586008, + "loss": 2.6182, + "step": 8744 + }, + { + "epoch": 0.7057541764183681, + "grad_norm": 0.6681165099143982, + "learning_rate": 0.00012045647325800742, + "loss": 2.5595, + "step": 8745 + }, + { + "epoch": 0.7058348801549512, + "grad_norm": 0.7520970702171326, + "learning_rate": 0.00012044101995032594, + "loss": 2.6306, + "step": 8746 + }, + { + "epoch": 0.7059155838915342, + "grad_norm": 0.7148429155349731, + "learning_rate": 0.00012042556613320087, + "loss": 2.5749, + "step": 8747 + }, + { + "epoch": 0.7059962876281172, + "grad_norm": 0.619369626045227, + "learning_rate": 0.00012041011180701729, + "loss": 2.5382, + "step": 8748 + }, + { + "epoch": 0.7060769913647001, + "grad_norm": 0.7450816035270691, + "learning_rate": 0.00012039465697216032, + "loss": 2.5547, + "step": 8749 + }, + { + "epoch": 0.7061576951012832, + "grad_norm": 0.7324537634849548, + "learning_rate": 0.00012037920162901521, + "loss": 2.5756, + "step": 8750 + }, + { + "epoch": 0.7062383988378662, + "grad_norm": 0.7881754636764526, + "learning_rate": 0.00012036374577796715, + "loss": 2.6376, + "step": 8751 + }, + { + "epoch": 0.7063191025744492, + "grad_norm": 0.7095965147018433, + "learning_rate": 0.00012034828941940128, + "loss": 2.5454, + "step": 8752 + }, + { + "epoch": 0.7063998063110322, + "grad_norm": 0.7142949104309082, + "learning_rate": 0.00012033283255370287, + "loss": 2.5738, + "step": 8753 + }, + { + "epoch": 0.7064805100476153, + "grad_norm": 0.6592378616333008, + "learning_rate": 0.0001203173751812571, + "loss": 2.5473, + "step": 8754 + }, + { + "epoch": 0.7065612137841982, + "grad_norm": 0.6964332461357117, + "learning_rate": 0.00012030191730244926, + "loss": 2.5829, + "step": 8755 + }, + { + "epoch": 0.7066419175207812, + "grad_norm": 0.707539975643158, + "learning_rate": 0.00012028645891766455, + "loss": 2.5652, + "step": 8756 + }, + { + "epoch": 0.7067226212573642, + "grad_norm": 0.6991387009620667, + "learning_rate": 0.00012027100002728824, + "loss": 2.5874, + "step": 8757 + }, + { + "epoch": 0.7068033249939473, + "grad_norm": 0.665746808052063, + "learning_rate": 0.00012025554063170566, + "loss": 2.5163, + "step": 8758 + }, + { + "epoch": 0.7068840287305302, + "grad_norm": 0.696130096912384, + "learning_rate": 0.00012024008073130204, + "loss": 2.5748, + "step": 8759 + }, + { + "epoch": 0.7069647324671132, + "grad_norm": 0.698885440826416, + "learning_rate": 0.00012022462032646269, + "loss": 2.5561, + "step": 8760 + }, + { + "epoch": 0.7070454362036962, + "grad_norm": 0.7052211761474609, + "learning_rate": 0.00012020915941757292, + "loss": 2.5979, + "step": 8761 + }, + { + "epoch": 0.7071261399402793, + "grad_norm": 0.7370811104774475, + "learning_rate": 0.00012019369800501808, + "loss": 2.5623, + "step": 8762 + }, + { + "epoch": 0.7072068436768623, + "grad_norm": 0.6699148416519165, + "learning_rate": 0.00012017823608918352, + "loss": 2.5816, + "step": 8763 + }, + { + "epoch": 0.7072875474134452, + "grad_norm": 0.6712930798530579, + "learning_rate": 0.00012016277367045457, + "loss": 2.5495, + "step": 8764 + }, + { + "epoch": 0.7073682511500282, + "grad_norm": 0.7238204479217529, + "learning_rate": 0.00012014731074921659, + "loss": 2.5936, + "step": 8765 + }, + { + "epoch": 0.7074489548866113, + "grad_norm": 0.7303668856620789, + "learning_rate": 0.00012013184732585494, + "loss": 2.6366, + "step": 8766 + }, + { + "epoch": 0.7075296586231943, + "grad_norm": 0.6883132457733154, + "learning_rate": 0.00012011638340075505, + "loss": 2.534, + "step": 8767 + }, + { + "epoch": 0.7076103623597773, + "grad_norm": 0.7057133316993713, + "learning_rate": 0.00012010091897430229, + "loss": 2.6035, + "step": 8768 + }, + { + "epoch": 0.7076910660963602, + "grad_norm": 0.7069352269172668, + "learning_rate": 0.0001200854540468821, + "loss": 2.5047, + "step": 8769 + }, + { + "epoch": 0.7077717698329433, + "grad_norm": 0.7192478775978088, + "learning_rate": 0.00012006998861887985, + "loss": 2.5698, + "step": 8770 + }, + { + "epoch": 0.7078524735695263, + "grad_norm": 0.6992887854576111, + "learning_rate": 0.00012005452269068107, + "loss": 2.5631, + "step": 8771 + }, + { + "epoch": 0.7079331773061093, + "grad_norm": 0.676154613494873, + "learning_rate": 0.00012003905626267114, + "loss": 2.5255, + "step": 8772 + }, + { + "epoch": 0.7080138810426923, + "grad_norm": 0.672269880771637, + "learning_rate": 0.00012002358933523555, + "loss": 2.5766, + "step": 8773 + }, + { + "epoch": 0.7080945847792752, + "grad_norm": 0.7334566712379456, + "learning_rate": 0.00012000812190875976, + "loss": 2.6068, + "step": 8774 + }, + { + "epoch": 0.7081752885158583, + "grad_norm": 0.6599388122558594, + "learning_rate": 0.00011999265398362931, + "loss": 2.6032, + "step": 8775 + }, + { + "epoch": 0.7082559922524413, + "grad_norm": 0.7158498167991638, + "learning_rate": 0.00011997718556022958, + "loss": 2.599, + "step": 8776 + }, + { + "epoch": 0.7083366959890243, + "grad_norm": 0.7470360994338989, + "learning_rate": 0.00011996171663894624, + "loss": 2.58, + "step": 8777 + }, + { + "epoch": 0.7084173997256072, + "grad_norm": 0.6251266002655029, + "learning_rate": 0.00011994624722016472, + "loss": 2.5996, + "step": 8778 + }, + { + "epoch": 0.7084981034621903, + "grad_norm": 0.6649689078330994, + "learning_rate": 0.00011993077730427058, + "loss": 2.6025, + "step": 8779 + }, + { + "epoch": 0.7085788071987733, + "grad_norm": 0.7554693818092346, + "learning_rate": 0.00011991530689164939, + "loss": 2.6207, + "step": 8780 + }, + { + "epoch": 0.7086595109353563, + "grad_norm": 0.7941430807113647, + "learning_rate": 0.00011989983598268661, + "loss": 2.584, + "step": 8781 + }, + { + "epoch": 0.7087402146719393, + "grad_norm": 0.7257998585700989, + "learning_rate": 0.00011988436457776799, + "loss": 2.6152, + "step": 8782 + }, + { + "epoch": 0.7088209184085223, + "grad_norm": 0.716354489326477, + "learning_rate": 0.00011986889267727899, + "loss": 2.585, + "step": 8783 + }, + { + "epoch": 0.7089016221451053, + "grad_norm": 0.7094400525093079, + "learning_rate": 0.00011985342028160525, + "loss": 2.5759, + "step": 8784 + }, + { + "epoch": 0.7089823258816883, + "grad_norm": 0.7211421728134155, + "learning_rate": 0.0001198379473911324, + "loss": 2.5645, + "step": 8785 + }, + { + "epoch": 0.7090630296182713, + "grad_norm": 0.7166693806648254, + "learning_rate": 0.000119822474006246, + "loss": 2.5357, + "step": 8786 + }, + { + "epoch": 0.7091437333548544, + "grad_norm": 0.6702254414558411, + "learning_rate": 0.00011980700012733175, + "loss": 2.5353, + "step": 8787 + }, + { + "epoch": 0.7092244370914373, + "grad_norm": 0.6784049868583679, + "learning_rate": 0.0001197915257547753, + "loss": 2.4942, + "step": 8788 + }, + { + "epoch": 0.7093051408280203, + "grad_norm": 0.6914299726486206, + "learning_rate": 0.00011977605088896226, + "loss": 2.5682, + "step": 8789 + }, + { + "epoch": 0.7093858445646033, + "grad_norm": 0.7324358820915222, + "learning_rate": 0.00011976057553027837, + "loss": 2.564, + "step": 8790 + }, + { + "epoch": 0.7094665483011864, + "grad_norm": 0.6927928924560547, + "learning_rate": 0.00011974509967910927, + "loss": 2.5728, + "step": 8791 + }, + { + "epoch": 0.7095472520377694, + "grad_norm": 0.6795603036880493, + "learning_rate": 0.00011972962333584066, + "loss": 2.588, + "step": 8792 + }, + { + "epoch": 0.7096279557743523, + "grad_norm": 0.7132226228713989, + "learning_rate": 0.00011971414650085828, + "loss": 2.5759, + "step": 8793 + }, + { + "epoch": 0.7097086595109353, + "grad_norm": 0.737195611000061, + "learning_rate": 0.00011969866917454782, + "loss": 2.5721, + "step": 8794 + }, + { + "epoch": 0.7097893632475184, + "grad_norm": 0.6776021718978882, + "learning_rate": 0.00011968319135729507, + "loss": 2.5794, + "step": 8795 + }, + { + "epoch": 0.7098700669841014, + "grad_norm": 0.7113735675811768, + "learning_rate": 0.0001196677130494857, + "loss": 2.5595, + "step": 8796 + }, + { + "epoch": 0.7099507707206844, + "grad_norm": 0.6277747750282288, + "learning_rate": 0.0001196522342515055, + "loss": 2.5003, + "step": 8797 + }, + { + "epoch": 0.7100314744572673, + "grad_norm": 0.6982879042625427, + "learning_rate": 0.00011963675496374028, + "loss": 2.542, + "step": 8798 + }, + { + "epoch": 0.7101121781938504, + "grad_norm": 0.7019705176353455, + "learning_rate": 0.00011962127518657578, + "loss": 2.5723, + "step": 8799 + }, + { + "epoch": 0.7101928819304334, + "grad_norm": 0.6831088662147522, + "learning_rate": 0.00011960579492039783, + "loss": 2.5676, + "step": 8800 + }, + { + "epoch": 0.7102735856670164, + "grad_norm": 0.6744031310081482, + "learning_rate": 0.0001195903141655922, + "loss": 2.58, + "step": 8801 + }, + { + "epoch": 0.7103542894035993, + "grad_norm": 0.6873177289962769, + "learning_rate": 0.00011957483292254473, + "loss": 2.6289, + "step": 8802 + }, + { + "epoch": 0.7104349931401824, + "grad_norm": 0.6340685486793518, + "learning_rate": 0.00011955935119164125, + "loss": 2.5688, + "step": 8803 + }, + { + "epoch": 0.7105156968767654, + "grad_norm": 0.7147708535194397, + "learning_rate": 0.00011954386897326764, + "loss": 2.5471, + "step": 8804 + }, + { + "epoch": 0.7105964006133484, + "grad_norm": 0.699605405330658, + "learning_rate": 0.00011952838626780971, + "loss": 2.6122, + "step": 8805 + }, + { + "epoch": 0.7106771043499314, + "grad_norm": 0.6685385704040527, + "learning_rate": 0.00011951290307565335, + "loss": 2.5423, + "step": 8806 + }, + { + "epoch": 0.7107578080865145, + "grad_norm": 0.6884726881980896, + "learning_rate": 0.00011949741939718439, + "loss": 2.5243, + "step": 8807 + }, + { + "epoch": 0.7108385118230974, + "grad_norm": 0.6991142630577087, + "learning_rate": 0.00011948193523278884, + "loss": 2.6271, + "step": 8808 + }, + { + "epoch": 0.7109192155596804, + "grad_norm": 0.6964353919029236, + "learning_rate": 0.00011946645058285253, + "loss": 2.6296, + "step": 8809 + }, + { + "epoch": 0.7109999192962634, + "grad_norm": 0.7592040300369263, + "learning_rate": 0.00011945096544776136, + "loss": 2.6601, + "step": 8810 + }, + { + "epoch": 0.7110806230328465, + "grad_norm": 0.7146934866905212, + "learning_rate": 0.00011943547982790131, + "loss": 2.54, + "step": 8811 + }, + { + "epoch": 0.7111613267694294, + "grad_norm": 0.6991123557090759, + "learning_rate": 0.00011941999372365827, + "loss": 2.5978, + "step": 8812 + }, + { + "epoch": 0.7112420305060124, + "grad_norm": 0.6835920810699463, + "learning_rate": 0.00011940450713541822, + "loss": 2.6096, + "step": 8813 + }, + { + "epoch": 0.7113227342425954, + "grad_norm": 0.6913917660713196, + "learning_rate": 0.00011938902006356716, + "loss": 2.5624, + "step": 8814 + }, + { + "epoch": 0.7114034379791785, + "grad_norm": 0.6620622873306274, + "learning_rate": 0.00011937353250849102, + "loss": 2.6211, + "step": 8815 + }, + { + "epoch": 0.7114841417157615, + "grad_norm": 0.6738792061805725, + "learning_rate": 0.00011935804447057581, + "loss": 2.5889, + "step": 8816 + }, + { + "epoch": 0.7115648454523444, + "grad_norm": 0.7101936936378479, + "learning_rate": 0.00011934255595020751, + "loss": 2.5846, + "step": 8817 + }, + { + "epoch": 0.7116455491889274, + "grad_norm": 0.6843911409378052, + "learning_rate": 0.00011932706694777216, + "loss": 2.5757, + "step": 8818 + }, + { + "epoch": 0.7117262529255105, + "grad_norm": 0.7217971086502075, + "learning_rate": 0.0001193115774636558, + "loss": 2.6174, + "step": 8819 + }, + { + "epoch": 0.7118069566620935, + "grad_norm": 0.6706245541572571, + "learning_rate": 0.00011929608749824445, + "loss": 2.5893, + "step": 8820 + }, + { + "epoch": 0.7118876603986765, + "grad_norm": 0.7057672739028931, + "learning_rate": 0.00011928059705192413, + "loss": 2.5426, + "step": 8821 + }, + { + "epoch": 0.7119683641352594, + "grad_norm": 0.7354697585105896, + "learning_rate": 0.00011926510612508095, + "loss": 2.5741, + "step": 8822 + }, + { + "epoch": 0.7120490678718424, + "grad_norm": 0.6618186235427856, + "learning_rate": 0.00011924961471810096, + "loss": 2.6007, + "step": 8823 + }, + { + "epoch": 0.7121297716084255, + "grad_norm": 0.6733995676040649, + "learning_rate": 0.00011923412283137028, + "loss": 2.5739, + "step": 8824 + }, + { + "epoch": 0.7122104753450085, + "grad_norm": 0.7324833869934082, + "learning_rate": 0.00011921863046527497, + "loss": 2.5461, + "step": 8825 + }, + { + "epoch": 0.7122911790815915, + "grad_norm": 0.6753048896789551, + "learning_rate": 0.00011920313762020113, + "loss": 2.5066, + "step": 8826 + }, + { + "epoch": 0.7123718828181744, + "grad_norm": 0.7861250638961792, + "learning_rate": 0.00011918764429653489, + "loss": 2.5229, + "step": 8827 + }, + { + "epoch": 0.7124525865547575, + "grad_norm": 0.7037342190742493, + "learning_rate": 0.00011917215049466244, + "loss": 2.5443, + "step": 8828 + }, + { + "epoch": 0.7125332902913405, + "grad_norm": 0.7112773060798645, + "learning_rate": 0.00011915665621496985, + "loss": 2.5656, + "step": 8829 + }, + { + "epoch": 0.7126139940279235, + "grad_norm": 0.6384316682815552, + "learning_rate": 0.00011914116145784333, + "loss": 2.5526, + "step": 8830 + }, + { + "epoch": 0.7126946977645064, + "grad_norm": 0.6673600077629089, + "learning_rate": 0.000119125666223669, + "loss": 2.5868, + "step": 8831 + }, + { + "epoch": 0.7127754015010895, + "grad_norm": 0.6927722692489624, + "learning_rate": 0.0001191101705128331, + "loss": 2.6237, + "step": 8832 + }, + { + "epoch": 0.7128561052376725, + "grad_norm": 0.7410106658935547, + "learning_rate": 0.00011909467432572182, + "loss": 2.5652, + "step": 8833 + }, + { + "epoch": 0.7129368089742555, + "grad_norm": 0.6780139803886414, + "learning_rate": 0.0001190791776627213, + "loss": 2.5343, + "step": 8834 + }, + { + "epoch": 0.7130175127108385, + "grad_norm": 0.7147949934005737, + "learning_rate": 0.00011906368052421781, + "loss": 2.5368, + "step": 8835 + }, + { + "epoch": 0.7130982164474216, + "grad_norm": 0.7092324495315552, + "learning_rate": 0.00011904818291059759, + "loss": 2.538, + "step": 8836 + }, + { + "epoch": 0.7131789201840045, + "grad_norm": 0.761763870716095, + "learning_rate": 0.00011903268482224684, + "loss": 2.5984, + "step": 8837 + }, + { + "epoch": 0.7132596239205875, + "grad_norm": 0.7011365294456482, + "learning_rate": 0.00011901718625955182, + "loss": 2.5383, + "step": 8838 + }, + { + "epoch": 0.7133403276571705, + "grad_norm": 0.7982703447341919, + "learning_rate": 0.00011900168722289882, + "loss": 2.5714, + "step": 8839 + }, + { + "epoch": 0.7134210313937536, + "grad_norm": 0.6788253784179688, + "learning_rate": 0.00011898618771267412, + "loss": 2.5675, + "step": 8840 + }, + { + "epoch": 0.7135017351303365, + "grad_norm": 0.6245018243789673, + "learning_rate": 0.00011897068772926397, + "loss": 2.5497, + "step": 8841 + }, + { + "epoch": 0.7135824388669195, + "grad_norm": 0.732109785079956, + "learning_rate": 0.0001189551872730547, + "loss": 2.5043, + "step": 8842 + }, + { + "epoch": 0.7136631426035025, + "grad_norm": 0.7640885710716248, + "learning_rate": 0.0001189396863444326, + "loss": 2.5974, + "step": 8843 + }, + { + "epoch": 0.7137438463400856, + "grad_norm": 0.6806808710098267, + "learning_rate": 0.00011892418494378403, + "loss": 2.5911, + "step": 8844 + }, + { + "epoch": 0.7138245500766686, + "grad_norm": 0.6730000376701355, + "learning_rate": 0.00011890868307149528, + "loss": 2.5405, + "step": 8845 + }, + { + "epoch": 0.7139052538132515, + "grad_norm": 0.6881929636001587, + "learning_rate": 0.00011889318072795275, + "loss": 2.6083, + "step": 8846 + }, + { + "epoch": 0.7139859575498345, + "grad_norm": 0.7079598307609558, + "learning_rate": 0.00011887767791354275, + "loss": 2.5743, + "step": 8847 + }, + { + "epoch": 0.7140666612864176, + "grad_norm": 0.6760475635528564, + "learning_rate": 0.00011886217462865166, + "loss": 2.5925, + "step": 8848 + }, + { + "epoch": 0.7141473650230006, + "grad_norm": 0.6851043701171875, + "learning_rate": 0.00011884667087366587, + "loss": 2.5839, + "step": 8849 + }, + { + "epoch": 0.7142280687595836, + "grad_norm": 0.6805267930030823, + "learning_rate": 0.00011883116664897178, + "loss": 2.562, + "step": 8850 + }, + { + "epoch": 0.7143087724961665, + "grad_norm": 0.6720704436302185, + "learning_rate": 0.00011881566195495581, + "loss": 2.5381, + "step": 8851 + }, + { + "epoch": 0.7143894762327496, + "grad_norm": 0.718166172504425, + "learning_rate": 0.00011880015679200436, + "loss": 2.5912, + "step": 8852 + }, + { + "epoch": 0.7144701799693326, + "grad_norm": 0.6643497943878174, + "learning_rate": 0.00011878465116050383, + "loss": 2.5122, + "step": 8853 + }, + { + "epoch": 0.7145508837059156, + "grad_norm": 0.705186665058136, + "learning_rate": 0.00011876914506084074, + "loss": 2.617, + "step": 8854 + }, + { + "epoch": 0.7146315874424986, + "grad_norm": 0.6417848467826843, + "learning_rate": 0.00011875363849340144, + "loss": 2.5552, + "step": 8855 + }, + { + "epoch": 0.7147122911790816, + "grad_norm": 0.6861358880996704, + "learning_rate": 0.00011873813145857249, + "loss": 2.6324, + "step": 8856 + }, + { + "epoch": 0.7147929949156646, + "grad_norm": 0.7134111523628235, + "learning_rate": 0.00011872262395674027, + "loss": 2.5892, + "step": 8857 + }, + { + "epoch": 0.7148736986522476, + "grad_norm": 0.7177506685256958, + "learning_rate": 0.00011870711598829135, + "loss": 2.5677, + "step": 8858 + }, + { + "epoch": 0.7149544023888306, + "grad_norm": 0.6435763835906982, + "learning_rate": 0.00011869160755361219, + "loss": 2.5452, + "step": 8859 + }, + { + "epoch": 0.7150351061254137, + "grad_norm": 0.6443132758140564, + "learning_rate": 0.00011867609865308935, + "loss": 2.5566, + "step": 8860 + }, + { + "epoch": 0.7151158098619966, + "grad_norm": 0.7132347822189331, + "learning_rate": 0.00011866058928710925, + "loss": 2.565, + "step": 8861 + }, + { + "epoch": 0.7151965135985796, + "grad_norm": 0.7803207039833069, + "learning_rate": 0.00011864507945605854, + "loss": 2.556, + "step": 8862 + }, + { + "epoch": 0.7152772173351626, + "grad_norm": 0.7277950644493103, + "learning_rate": 0.00011862956916032367, + "loss": 2.5623, + "step": 8863 + }, + { + "epoch": 0.7153579210717457, + "grad_norm": 0.6812277436256409, + "learning_rate": 0.00011861405840029125, + "loss": 2.6146, + "step": 8864 + }, + { + "epoch": 0.7154386248083286, + "grad_norm": 0.7170509099960327, + "learning_rate": 0.00011859854717634786, + "loss": 2.52, + "step": 8865 + }, + { + "epoch": 0.7155193285449116, + "grad_norm": 0.7282906174659729, + "learning_rate": 0.00011858303548888004, + "loss": 2.5605, + "step": 8866 + }, + { + "epoch": 0.7156000322814946, + "grad_norm": 0.7290246486663818, + "learning_rate": 0.00011856752333827439, + "loss": 2.6292, + "step": 8867 + }, + { + "epoch": 0.7156807360180777, + "grad_norm": 0.6870024800300598, + "learning_rate": 0.00011855201072491752, + "loss": 2.6396, + "step": 8868 + }, + { + "epoch": 0.7157614397546607, + "grad_norm": 0.7336156964302063, + "learning_rate": 0.00011853649764919605, + "loss": 2.6356, + "step": 8869 + }, + { + "epoch": 0.7158421434912436, + "grad_norm": 0.7181294560432434, + "learning_rate": 0.00011852098411149661, + "loss": 2.5163, + "step": 8870 + }, + { + "epoch": 0.7159228472278266, + "grad_norm": 0.7355513572692871, + "learning_rate": 0.00011850547011220583, + "loss": 2.5485, + "step": 8871 + }, + { + "epoch": 0.7160035509644097, + "grad_norm": 0.7005351185798645, + "learning_rate": 0.00011848995565171038, + "loss": 2.5187, + "step": 8872 + }, + { + "epoch": 0.7160842547009927, + "grad_norm": 0.6550194025039673, + "learning_rate": 0.00011847444073039686, + "loss": 2.5174, + "step": 8873 + }, + { + "epoch": 0.7161649584375757, + "grad_norm": 0.6568251252174377, + "learning_rate": 0.00011845892534865202, + "loss": 2.5128, + "step": 8874 + }, + { + "epoch": 0.7162456621741586, + "grad_norm": 0.6359419226646423, + "learning_rate": 0.0001184434095068625, + "loss": 2.5967, + "step": 8875 + }, + { + "epoch": 0.7163263659107416, + "grad_norm": 0.6730023622512817, + "learning_rate": 0.00011842789320541504, + "loss": 2.5243, + "step": 8876 + }, + { + "epoch": 0.7164070696473247, + "grad_norm": 0.6750187277793884, + "learning_rate": 0.00011841237644469625, + "loss": 2.602, + "step": 8877 + }, + { + "epoch": 0.7164877733839077, + "grad_norm": 0.7039143443107605, + "learning_rate": 0.00011839685922509291, + "loss": 2.5345, + "step": 8878 + }, + { + "epoch": 0.7165684771204907, + "grad_norm": 0.6602306962013245, + "learning_rate": 0.00011838134154699177, + "loss": 2.5995, + "step": 8879 + }, + { + "epoch": 0.7166491808570736, + "grad_norm": 0.6744598150253296, + "learning_rate": 0.00011836582341077955, + "loss": 2.6005, + "step": 8880 + }, + { + "epoch": 0.7167298845936567, + "grad_norm": 0.7136051058769226, + "learning_rate": 0.00011835030481684302, + "loss": 2.5424, + "step": 8881 + }, + { + "epoch": 0.7168105883302397, + "grad_norm": 0.7085986137390137, + "learning_rate": 0.00011833478576556889, + "loss": 2.5912, + "step": 8882 + }, + { + "epoch": 0.7168912920668227, + "grad_norm": 0.7635689377784729, + "learning_rate": 0.00011831926625734398, + "loss": 2.5836, + "step": 8883 + }, + { + "epoch": 0.7169719958034056, + "grad_norm": 0.6543256640434265, + "learning_rate": 0.00011830374629255508, + "loss": 2.5442, + "step": 8884 + }, + { + "epoch": 0.7170526995399887, + "grad_norm": 0.663840115070343, + "learning_rate": 0.00011828822587158896, + "loss": 2.5529, + "step": 8885 + }, + { + "epoch": 0.7171334032765717, + "grad_norm": 0.6868027448654175, + "learning_rate": 0.00011827270499483247, + "loss": 2.6678, + "step": 8886 + }, + { + "epoch": 0.7172141070131547, + "grad_norm": 0.649172842502594, + "learning_rate": 0.00011825718366267238, + "loss": 2.57, + "step": 8887 + }, + { + "epoch": 0.7172948107497377, + "grad_norm": 0.6818440556526184, + "learning_rate": 0.00011824166187549554, + "loss": 2.5602, + "step": 8888 + }, + { + "epoch": 0.7173755144863208, + "grad_norm": 0.7222314476966858, + "learning_rate": 0.00011822613963368885, + "loss": 2.5526, + "step": 8889 + }, + { + "epoch": 0.7174562182229037, + "grad_norm": 0.7309598922729492, + "learning_rate": 0.00011821061693763909, + "loss": 2.5515, + "step": 8890 + }, + { + "epoch": 0.7175369219594867, + "grad_norm": 0.6935746669769287, + "learning_rate": 0.00011819509378773314, + "loss": 2.5506, + "step": 8891 + }, + { + "epoch": 0.7176176256960697, + "grad_norm": 0.6754423975944519, + "learning_rate": 0.00011817957018435792, + "loss": 2.5621, + "step": 8892 + }, + { + "epoch": 0.7176983294326528, + "grad_norm": 0.7087355852127075, + "learning_rate": 0.00011816404612790026, + "loss": 2.5708, + "step": 8893 + }, + { + "epoch": 0.7177790331692357, + "grad_norm": 0.726820707321167, + "learning_rate": 0.0001181485216187471, + "loss": 2.5741, + "step": 8894 + }, + { + "epoch": 0.7178597369058187, + "grad_norm": 0.6539922952651978, + "learning_rate": 0.00011813299665728532, + "loss": 2.613, + "step": 8895 + }, + { + "epoch": 0.7179404406424017, + "grad_norm": 0.7008066773414612, + "learning_rate": 0.00011811747124390189, + "loss": 2.6029, + "step": 8896 + }, + { + "epoch": 0.7180211443789848, + "grad_norm": 0.6900522708892822, + "learning_rate": 0.00011810194537898374, + "loss": 2.5716, + "step": 8897 + }, + { + "epoch": 0.7181018481155678, + "grad_norm": 0.675345242023468, + "learning_rate": 0.00011808641906291776, + "loss": 2.5742, + "step": 8898 + }, + { + "epoch": 0.7181825518521507, + "grad_norm": 0.6697559356689453, + "learning_rate": 0.00011807089229609092, + "loss": 2.5717, + "step": 8899 + }, + { + "epoch": 0.7182632555887337, + "grad_norm": 0.6874344944953918, + "learning_rate": 0.00011805536507889021, + "loss": 2.5394, + "step": 8900 + }, + { + "epoch": 0.7183439593253168, + "grad_norm": 0.6675494313240051, + "learning_rate": 0.00011803983741170263, + "loss": 2.5655, + "step": 8901 + }, + { + "epoch": 0.7184246630618998, + "grad_norm": 0.6937244534492493, + "learning_rate": 0.00011802430929491517, + "loss": 2.5676, + "step": 8902 + }, + { + "epoch": 0.7185053667984828, + "grad_norm": 0.7591496109962463, + "learning_rate": 0.00011800878072891474, + "loss": 2.5849, + "step": 8903 + }, + { + "epoch": 0.7185860705350657, + "grad_norm": 0.6503129005432129, + "learning_rate": 0.00011799325171408846, + "loss": 2.5416, + "step": 8904 + }, + { + "epoch": 0.7186667742716488, + "grad_norm": 0.6450222134590149, + "learning_rate": 0.00011797772225082333, + "loss": 2.5395, + "step": 8905 + }, + { + "epoch": 0.7187474780082318, + "grad_norm": 0.7317619919776917, + "learning_rate": 0.00011796219233950632, + "loss": 2.609, + "step": 8906 + }, + { + "epoch": 0.7188281817448148, + "grad_norm": 0.7585787773132324, + "learning_rate": 0.00011794666198052455, + "loss": 2.5556, + "step": 8907 + }, + { + "epoch": 0.7189088854813978, + "grad_norm": 0.6718214750289917, + "learning_rate": 0.00011793113117426505, + "loss": 2.5914, + "step": 8908 + }, + { + "epoch": 0.7189895892179808, + "grad_norm": 0.6459314823150635, + "learning_rate": 0.00011791559992111487, + "loss": 2.5956, + "step": 8909 + }, + { + "epoch": 0.7190702929545638, + "grad_norm": 0.6592775583267212, + "learning_rate": 0.00011790006822146113, + "loss": 2.5568, + "step": 8910 + }, + { + "epoch": 0.7191509966911468, + "grad_norm": 0.7277452349662781, + "learning_rate": 0.0001178845360756909, + "loss": 2.5989, + "step": 8911 + }, + { + "epoch": 0.7192317004277298, + "grad_norm": 0.7020131945610046, + "learning_rate": 0.00011786900348419128, + "loss": 2.645, + "step": 8912 + }, + { + "epoch": 0.7193124041643129, + "grad_norm": 0.6746636629104614, + "learning_rate": 0.00011785347044734938, + "loss": 2.5173, + "step": 8913 + }, + { + "epoch": 0.7193931079008958, + "grad_norm": 0.6782798171043396, + "learning_rate": 0.0001178379369655523, + "loss": 2.6007, + "step": 8914 + }, + { + "epoch": 0.7194738116374788, + "grad_norm": 0.705498218536377, + "learning_rate": 0.00011782240303918724, + "loss": 2.5408, + "step": 8915 + }, + { + "epoch": 0.7195545153740618, + "grad_norm": 0.675532341003418, + "learning_rate": 0.00011780686866864128, + "loss": 2.5188, + "step": 8916 + }, + { + "epoch": 0.7196352191106449, + "grad_norm": 0.6552390456199646, + "learning_rate": 0.00011779133385430161, + "loss": 2.5409, + "step": 8917 + }, + { + "epoch": 0.7197159228472279, + "grad_norm": 0.6589654088020325, + "learning_rate": 0.00011777579859655544, + "loss": 2.5447, + "step": 8918 + }, + { + "epoch": 0.7197966265838108, + "grad_norm": 0.7548382878303528, + "learning_rate": 0.00011776026289578985, + "loss": 2.5239, + "step": 8919 + }, + { + "epoch": 0.7198773303203938, + "grad_norm": 0.697325587272644, + "learning_rate": 0.00011774472675239207, + "loss": 2.5887, + "step": 8920 + }, + { + "epoch": 0.7199580340569769, + "grad_norm": 0.734462320804596, + "learning_rate": 0.00011772919016674934, + "loss": 2.5847, + "step": 8921 + }, + { + "epoch": 0.7200387377935599, + "grad_norm": 0.6736955642700195, + "learning_rate": 0.00011771365313924886, + "loss": 2.558, + "step": 8922 + }, + { + "epoch": 0.7201194415301428, + "grad_norm": 0.7157856822013855, + "learning_rate": 0.00011769811567027784, + "loss": 2.6199, + "step": 8923 + }, + { + "epoch": 0.7202001452667258, + "grad_norm": 0.7045830488204956, + "learning_rate": 0.0001176825777602235, + "loss": 2.576, + "step": 8924 + }, + { + "epoch": 0.7202808490033088, + "grad_norm": 0.6875419020652771, + "learning_rate": 0.00011766703940947308, + "loss": 2.6045, + "step": 8925 + }, + { + "epoch": 0.7203615527398919, + "grad_norm": 0.7313494086265564, + "learning_rate": 0.00011765150061841387, + "loss": 2.5388, + "step": 8926 + }, + { + "epoch": 0.7204422564764749, + "grad_norm": 0.7223608493804932, + "learning_rate": 0.00011763596138743313, + "loss": 2.5466, + "step": 8927 + }, + { + "epoch": 0.7205229602130578, + "grad_norm": 0.7289614081382751, + "learning_rate": 0.00011762042171691816, + "loss": 2.5862, + "step": 8928 + }, + { + "epoch": 0.7206036639496408, + "grad_norm": 0.7098878026008606, + "learning_rate": 0.00011760488160725617, + "loss": 2.5497, + "step": 8929 + }, + { + "epoch": 0.7206843676862239, + "grad_norm": 0.7096838355064392, + "learning_rate": 0.00011758934105883452, + "loss": 2.558, + "step": 8930 + }, + { + "epoch": 0.7207650714228069, + "grad_norm": 0.7334743738174438, + "learning_rate": 0.00011757380007204055, + "loss": 2.5966, + "step": 8931 + }, + { + "epoch": 0.7208457751593899, + "grad_norm": 0.7192476391792297, + "learning_rate": 0.00011755825864726149, + "loss": 2.5307, + "step": 8932 + }, + { + "epoch": 0.7209264788959728, + "grad_norm": 0.7329632043838501, + "learning_rate": 0.00011754271678488478, + "loss": 2.6453, + "step": 8933 + }, + { + "epoch": 0.7210071826325559, + "grad_norm": 0.6827974915504456, + "learning_rate": 0.00011752717448529766, + "loss": 2.5507, + "step": 8934 + }, + { + "epoch": 0.7210878863691389, + "grad_norm": 0.8292449116706848, + "learning_rate": 0.00011751163174888756, + "loss": 2.6178, + "step": 8935 + }, + { + "epoch": 0.7211685901057219, + "grad_norm": 0.6504058837890625, + "learning_rate": 0.00011749608857604183, + "loss": 2.574, + "step": 8936 + }, + { + "epoch": 0.7212492938423049, + "grad_norm": 0.6567742824554443, + "learning_rate": 0.00011748054496714785, + "loss": 2.45, + "step": 8937 + }, + { + "epoch": 0.7213299975788879, + "grad_norm": 0.6699101328849792, + "learning_rate": 0.00011746500092259296, + "loss": 2.5827, + "step": 8938 + }, + { + "epoch": 0.7214107013154709, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.0001174494564427646, + "loss": 2.5246, + "step": 8939 + }, + { + "epoch": 0.7214914050520539, + "grad_norm": 0.7276309132575989, + "learning_rate": 0.00011743391152805017, + "loss": 2.6096, + "step": 8940 + }, + { + "epoch": 0.7215721087886369, + "grad_norm": 0.7248005867004395, + "learning_rate": 0.0001174183661788371, + "loss": 2.6362, + "step": 8941 + }, + { + "epoch": 0.72165281252522, + "grad_norm": 0.7773801684379578, + "learning_rate": 0.00011740282039551282, + "loss": 2.547, + "step": 8942 + }, + { + "epoch": 0.7217335162618029, + "grad_norm": 0.7346466779708862, + "learning_rate": 0.00011738727417846476, + "loss": 2.5635, + "step": 8943 + }, + { + "epoch": 0.7218142199983859, + "grad_norm": 0.7042707800865173, + "learning_rate": 0.0001173717275280804, + "loss": 2.5593, + "step": 8944 + }, + { + "epoch": 0.7218949237349689, + "grad_norm": 0.6894899010658264, + "learning_rate": 0.00011735618044474712, + "loss": 2.5272, + "step": 8945 + }, + { + "epoch": 0.721975627471552, + "grad_norm": 0.6643744111061096, + "learning_rate": 0.00011734063292885249, + "loss": 2.6001, + "step": 8946 + }, + { + "epoch": 0.722056331208135, + "grad_norm": 0.7543076276779175, + "learning_rate": 0.00011732508498078396, + "loss": 2.558, + "step": 8947 + }, + { + "epoch": 0.7221370349447179, + "grad_norm": 0.7065596580505371, + "learning_rate": 0.00011730953660092903, + "loss": 2.6255, + "step": 8948 + }, + { + "epoch": 0.7222177386813009, + "grad_norm": 0.6968158483505249, + "learning_rate": 0.0001172939877896752, + "loss": 2.5277, + "step": 8949 + }, + { + "epoch": 0.722298442417884, + "grad_norm": 0.6918557286262512, + "learning_rate": 0.00011727843854740996, + "loss": 2.5456, + "step": 8950 + }, + { + "epoch": 0.722379146154467, + "grad_norm": 0.7262142300605774, + "learning_rate": 0.00011726288887452088, + "loss": 2.5345, + "step": 8951 + }, + { + "epoch": 0.7224598498910499, + "grad_norm": 0.7423329949378967, + "learning_rate": 0.00011724733877139548, + "loss": 2.6335, + "step": 8952 + }, + { + "epoch": 0.7225405536276329, + "grad_norm": 0.7734495997428894, + "learning_rate": 0.00011723178823842136, + "loss": 2.5951, + "step": 8953 + }, + { + "epoch": 0.722621257364216, + "grad_norm": 0.6792804598808289, + "learning_rate": 0.00011721623727598597, + "loss": 2.5927, + "step": 8954 + }, + { + "epoch": 0.722701961100799, + "grad_norm": 0.7971853017807007, + "learning_rate": 0.00011720068588447697, + "loss": 2.5451, + "step": 8955 + }, + { + "epoch": 0.722782664837382, + "grad_norm": 0.7264395356178284, + "learning_rate": 0.00011718513406428189, + "loss": 2.5769, + "step": 8956 + }, + { + "epoch": 0.7228633685739649, + "grad_norm": 0.6536725759506226, + "learning_rate": 0.0001171695818157884, + "loss": 2.6285, + "step": 8957 + }, + { + "epoch": 0.722944072310548, + "grad_norm": 0.6676235198974609, + "learning_rate": 0.000117154029139384, + "loss": 2.5896, + "step": 8958 + }, + { + "epoch": 0.723024776047131, + "grad_norm": 0.7104088664054871, + "learning_rate": 0.00011713847603545636, + "loss": 2.5606, + "step": 8959 + }, + { + "epoch": 0.723105479783714, + "grad_norm": 0.6646785140037537, + "learning_rate": 0.0001171229225043931, + "loss": 2.5617, + "step": 8960 + }, + { + "epoch": 0.723186183520297, + "grad_norm": 0.7148672342300415, + "learning_rate": 0.00011710736854658186, + "loss": 2.5855, + "step": 8961 + }, + { + "epoch": 0.72326688725688, + "grad_norm": 0.6864955425262451, + "learning_rate": 0.00011709181416241028, + "loss": 2.6098, + "step": 8962 + }, + { + "epoch": 0.723347590993463, + "grad_norm": 0.7049087285995483, + "learning_rate": 0.00011707625935226602, + "loss": 2.506, + "step": 8963 + }, + { + "epoch": 0.723428294730046, + "grad_norm": 0.6419759392738342, + "learning_rate": 0.00011706070411653672, + "loss": 2.5485, + "step": 8964 + }, + { + "epoch": 0.723508998466629, + "grad_norm": 0.6879174709320068, + "learning_rate": 0.00011704514845561007, + "loss": 2.5373, + "step": 8965 + }, + { + "epoch": 0.7235897022032121, + "grad_norm": 0.6473780274391174, + "learning_rate": 0.00011702959236987378, + "loss": 2.5479, + "step": 8966 + }, + { + "epoch": 0.723670405939795, + "grad_norm": 0.6924241185188293, + "learning_rate": 0.00011701403585971553, + "loss": 2.5679, + "step": 8967 + }, + { + "epoch": 0.723751109676378, + "grad_norm": 0.7452483773231506, + "learning_rate": 0.00011699847892552305, + "loss": 2.5043, + "step": 8968 + }, + { + "epoch": 0.723831813412961, + "grad_norm": 0.7517218589782715, + "learning_rate": 0.00011698292156768402, + "loss": 2.5554, + "step": 8969 + }, + { + "epoch": 0.7239125171495441, + "grad_norm": 0.6492432355880737, + "learning_rate": 0.00011696736378658618, + "loss": 2.6091, + "step": 8970 + }, + { + "epoch": 0.723993220886127, + "grad_norm": 0.740093469619751, + "learning_rate": 0.0001169518055826173, + "loss": 2.5629, + "step": 8971 + }, + { + "epoch": 0.72407392462271, + "grad_norm": 0.7186923027038574, + "learning_rate": 0.00011693624695616509, + "loss": 2.5537, + "step": 8972 + }, + { + "epoch": 0.724154628359293, + "grad_norm": 0.7066059112548828, + "learning_rate": 0.00011692068790761737, + "loss": 2.5115, + "step": 8973 + }, + { + "epoch": 0.7242353320958761, + "grad_norm": 0.7031805515289307, + "learning_rate": 0.00011690512843736185, + "loss": 2.596, + "step": 8974 + }, + { + "epoch": 0.7243160358324591, + "grad_norm": 0.7308956384658813, + "learning_rate": 0.00011688956854578635, + "loss": 2.6311, + "step": 8975 + }, + { + "epoch": 0.724396739569042, + "grad_norm": 0.6926052570343018, + "learning_rate": 0.00011687400823327863, + "loss": 2.5659, + "step": 8976 + }, + { + "epoch": 0.724477443305625, + "grad_norm": 0.69638991355896, + "learning_rate": 0.00011685844750022654, + "loss": 2.4792, + "step": 8977 + }, + { + "epoch": 0.724558147042208, + "grad_norm": 0.6858355402946472, + "learning_rate": 0.00011684288634701785, + "loss": 2.5707, + "step": 8978 + }, + { + "epoch": 0.7246388507787911, + "grad_norm": 0.6673639416694641, + "learning_rate": 0.00011682732477404044, + "loss": 2.5627, + "step": 8979 + }, + { + "epoch": 0.7247195545153741, + "grad_norm": 0.7174322605133057, + "learning_rate": 0.00011681176278168206, + "loss": 2.5801, + "step": 8980 + }, + { + "epoch": 0.724800258251957, + "grad_norm": 0.6840930581092834, + "learning_rate": 0.00011679620037033064, + "loss": 2.4994, + "step": 8981 + }, + { + "epoch": 0.72488096198854, + "grad_norm": 0.7179884910583496, + "learning_rate": 0.00011678063754037399, + "loss": 2.6408, + "step": 8982 + }, + { + "epoch": 0.7249616657251231, + "grad_norm": 0.6564825773239136, + "learning_rate": 0.00011676507429219998, + "loss": 2.5412, + "step": 8983 + }, + { + "epoch": 0.7250423694617061, + "grad_norm": 0.7020624876022339, + "learning_rate": 0.00011674951062619652, + "loss": 2.5778, + "step": 8984 + }, + { + "epoch": 0.7251230731982891, + "grad_norm": 0.8061255812644958, + "learning_rate": 0.00011673394654275145, + "loss": 2.5581, + "step": 8985 + }, + { + "epoch": 0.725203776934872, + "grad_norm": 0.7653982043266296, + "learning_rate": 0.00011671838204225267, + "loss": 2.5324, + "step": 8986 + }, + { + "epoch": 0.7252844806714551, + "grad_norm": 0.7168377041816711, + "learning_rate": 0.00011670281712508816, + "loss": 2.6357, + "step": 8987 + }, + { + "epoch": 0.7253651844080381, + "grad_norm": 0.6860470771789551, + "learning_rate": 0.00011668725179164575, + "loss": 2.5367, + "step": 8988 + }, + { + "epoch": 0.7254458881446211, + "grad_norm": 0.7175878286361694, + "learning_rate": 0.00011667168604231342, + "loss": 2.549, + "step": 8989 + }, + { + "epoch": 0.725526591881204, + "grad_norm": 0.7124783992767334, + "learning_rate": 0.00011665611987747907, + "loss": 2.5566, + "step": 8990 + }, + { + "epoch": 0.7256072956177871, + "grad_norm": 0.6575417518615723, + "learning_rate": 0.00011664055329753067, + "loss": 2.5455, + "step": 8991 + }, + { + "epoch": 0.7256879993543701, + "grad_norm": 0.6576877236366272, + "learning_rate": 0.00011662498630285623, + "loss": 2.5596, + "step": 8992 + }, + { + "epoch": 0.7257687030909531, + "grad_norm": 0.7235110402107239, + "learning_rate": 0.00011660941889384365, + "loss": 2.6199, + "step": 8993 + }, + { + "epoch": 0.7258494068275361, + "grad_norm": 0.6623982787132263, + "learning_rate": 0.00011659385107088092, + "loss": 2.5642, + "step": 8994 + }, + { + "epoch": 0.7259301105641192, + "grad_norm": 0.7113857865333557, + "learning_rate": 0.00011657828283435605, + "loss": 2.5631, + "step": 8995 + }, + { + "epoch": 0.7260108143007021, + "grad_norm": 0.7076124548912048, + "learning_rate": 0.00011656271418465702, + "loss": 2.5141, + "step": 8996 + }, + { + "epoch": 0.7260915180372851, + "grad_norm": 0.7534562349319458, + "learning_rate": 0.00011654714512217188, + "loss": 2.5896, + "step": 8997 + }, + { + "epoch": 0.7261722217738681, + "grad_norm": 0.7393170595169067, + "learning_rate": 0.00011653157564728865, + "loss": 2.5848, + "step": 8998 + }, + { + "epoch": 0.7262529255104512, + "grad_norm": 0.6829591989517212, + "learning_rate": 0.0001165160057603953, + "loss": 2.5439, + "step": 8999 + }, + { + "epoch": 0.7263336292470342, + "grad_norm": 0.6527189016342163, + "learning_rate": 0.00011650043546187995, + "loss": 2.5655, + "step": 9000 + }, + { + "epoch": 0.7263336292470342, + "eval_loss": 2.487652063369751, + "eval_runtime": 845.9129, + "eval_samples_per_second": 3.097, + "eval_steps_per_second": 0.517, + "step": 9000 + }, + { + "epoch": 0.7264143329836171, + "grad_norm": 0.6545615196228027, + "learning_rate": 0.00011648486475213058, + "loss": 2.5366, + "step": 9001 + }, + { + "epoch": 0.7264950367202001, + "grad_norm": 0.6854971647262573, + "learning_rate": 0.00011646929363153529, + "loss": 2.5832, + "step": 9002 + }, + { + "epoch": 0.7265757404567832, + "grad_norm": 0.7745552062988281, + "learning_rate": 0.00011645372210048218, + "loss": 2.5854, + "step": 9003 + }, + { + "epoch": 0.7266564441933662, + "grad_norm": 0.7159156203269958, + "learning_rate": 0.00011643815015935928, + "loss": 2.614, + "step": 9004 + }, + { + "epoch": 0.7267371479299491, + "grad_norm": 0.700074315071106, + "learning_rate": 0.00011642257780855475, + "loss": 2.6124, + "step": 9005 + }, + { + "epoch": 0.7268178516665321, + "grad_norm": 0.7367869019508362, + "learning_rate": 0.0001164070050484566, + "loss": 2.5512, + "step": 9006 + }, + { + "epoch": 0.7268985554031152, + "grad_norm": 0.6623905897140503, + "learning_rate": 0.00011639143187945301, + "loss": 2.5724, + "step": 9007 + }, + { + "epoch": 0.7269792591396982, + "grad_norm": 0.7111610770225525, + "learning_rate": 0.0001163758583019321, + "loss": 2.547, + "step": 9008 + }, + { + "epoch": 0.7270599628762812, + "grad_norm": 0.6860959529876709, + "learning_rate": 0.00011636028431628199, + "loss": 2.532, + "step": 9009 + }, + { + "epoch": 0.7271406666128641, + "grad_norm": 0.7606309056282043, + "learning_rate": 0.00011634470992289084, + "loss": 2.5214, + "step": 9010 + }, + { + "epoch": 0.7272213703494472, + "grad_norm": 0.6440508365631104, + "learning_rate": 0.00011632913512214677, + "loss": 2.5554, + "step": 9011 + }, + { + "epoch": 0.7273020740860302, + "grad_norm": 0.6770462393760681, + "learning_rate": 0.00011631355991443796, + "loss": 2.5877, + "step": 9012 + }, + { + "epoch": 0.7273827778226132, + "grad_norm": 0.6419155597686768, + "learning_rate": 0.00011629798430015262, + "loss": 2.5337, + "step": 9013 + }, + { + "epoch": 0.7274634815591962, + "grad_norm": 0.6782121658325195, + "learning_rate": 0.00011628240827967891, + "loss": 2.5152, + "step": 9014 + }, + { + "epoch": 0.7275441852957792, + "grad_norm": 0.6972285509109497, + "learning_rate": 0.00011626683185340501, + "loss": 2.5628, + "step": 9015 + }, + { + "epoch": 0.7276248890323622, + "grad_norm": 0.6823342442512512, + "learning_rate": 0.00011625125502171914, + "loss": 2.5977, + "step": 9016 + }, + { + "epoch": 0.7277055927689452, + "grad_norm": 0.723311722278595, + "learning_rate": 0.0001162356777850095, + "loss": 2.5772, + "step": 9017 + }, + { + "epoch": 0.7277862965055282, + "grad_norm": 0.7395427227020264, + "learning_rate": 0.00011622010014366435, + "loss": 2.6068, + "step": 9018 + }, + { + "epoch": 0.7278670002421113, + "grad_norm": 0.6970974206924438, + "learning_rate": 0.00011620452209807192, + "loss": 2.5577, + "step": 9019 + }, + { + "epoch": 0.7279477039786942, + "grad_norm": 0.6921418309211731, + "learning_rate": 0.0001161889436486204, + "loss": 2.5476, + "step": 9020 + }, + { + "epoch": 0.7280284077152772, + "grad_norm": 0.7243841886520386, + "learning_rate": 0.0001161733647956981, + "loss": 2.579, + "step": 9021 + }, + { + "epoch": 0.7281091114518602, + "grad_norm": 0.7240262627601624, + "learning_rate": 0.0001161577855396933, + "loss": 2.5959, + "step": 9022 + }, + { + "epoch": 0.7281898151884433, + "grad_norm": 0.7215476632118225, + "learning_rate": 0.0001161422058809942, + "loss": 2.5979, + "step": 9023 + }, + { + "epoch": 0.7282705189250263, + "grad_norm": 0.7109708786010742, + "learning_rate": 0.00011612662581998917, + "loss": 2.5912, + "step": 9024 + }, + { + "epoch": 0.7283512226616092, + "grad_norm": 0.6814073920249939, + "learning_rate": 0.00011611104535706645, + "loss": 2.5742, + "step": 9025 + }, + { + "epoch": 0.7284319263981922, + "grad_norm": 0.6788144707679749, + "learning_rate": 0.0001160954644926144, + "loss": 2.5656, + "step": 9026 + }, + { + "epoch": 0.7285126301347752, + "grad_norm": 0.7312989830970764, + "learning_rate": 0.00011607988322702126, + "loss": 2.5877, + "step": 9027 + }, + { + "epoch": 0.7285933338713583, + "grad_norm": 0.6725338697433472, + "learning_rate": 0.0001160643015606754, + "loss": 2.5261, + "step": 9028 + }, + { + "epoch": 0.7286740376079412, + "grad_norm": 0.7439326047897339, + "learning_rate": 0.00011604871949396516, + "loss": 2.603, + "step": 9029 + }, + { + "epoch": 0.7287547413445242, + "grad_norm": 0.7091783285140991, + "learning_rate": 0.00011603313702727889, + "loss": 2.5227, + "step": 9030 + }, + { + "epoch": 0.7288354450811072, + "grad_norm": 0.7474398016929626, + "learning_rate": 0.00011601755416100492, + "loss": 2.616, + "step": 9031 + }, + { + "epoch": 0.7289161488176903, + "grad_norm": 0.6904098987579346, + "learning_rate": 0.00011600197089553162, + "loss": 2.556, + "step": 9032 + }, + { + "epoch": 0.7289968525542733, + "grad_norm": 0.7305783033370972, + "learning_rate": 0.00011598638723124739, + "loss": 2.5633, + "step": 9033 + }, + { + "epoch": 0.7290775562908562, + "grad_norm": 0.6626651883125305, + "learning_rate": 0.00011597080316854062, + "loss": 2.5862, + "step": 9034 + }, + { + "epoch": 0.7291582600274392, + "grad_norm": 0.683102548122406, + "learning_rate": 0.00011595521870779968, + "loss": 2.5629, + "step": 9035 + }, + { + "epoch": 0.7292389637640223, + "grad_norm": 0.7486757636070251, + "learning_rate": 0.00011593963384941295, + "loss": 2.5831, + "step": 9036 + }, + { + "epoch": 0.7293196675006053, + "grad_norm": 0.8059591054916382, + "learning_rate": 0.00011592404859376888, + "loss": 2.6414, + "step": 9037 + }, + { + "epoch": 0.7294003712371883, + "grad_norm": 0.8371721506118774, + "learning_rate": 0.00011590846294125594, + "loss": 2.643, + "step": 9038 + }, + { + "epoch": 0.7294810749737712, + "grad_norm": 0.7216931581497192, + "learning_rate": 0.00011589287689226246, + "loss": 2.6, + "step": 9039 + }, + { + "epoch": 0.7295617787103543, + "grad_norm": 0.6940354704856873, + "learning_rate": 0.00011587729044717701, + "loss": 2.546, + "step": 9040 + }, + { + "epoch": 0.7296424824469373, + "grad_norm": 0.6888829469680786, + "learning_rate": 0.00011586170360638792, + "loss": 2.5878, + "step": 9041 + }, + { + "epoch": 0.7297231861835203, + "grad_norm": 0.6863886117935181, + "learning_rate": 0.00011584611637028373, + "loss": 2.5389, + "step": 9042 + }, + { + "epoch": 0.7298038899201033, + "grad_norm": 0.6670756936073303, + "learning_rate": 0.00011583052873925294, + "loss": 2.5465, + "step": 9043 + }, + { + "epoch": 0.7298845936566863, + "grad_norm": 0.7441220879554749, + "learning_rate": 0.00011581494071368392, + "loss": 2.5679, + "step": 9044 + }, + { + "epoch": 0.7299652973932693, + "grad_norm": 0.7135717272758484, + "learning_rate": 0.0001157993522939653, + "loss": 2.5341, + "step": 9045 + }, + { + "epoch": 0.7300460011298523, + "grad_norm": 0.6837992072105408, + "learning_rate": 0.00011578376348048547, + "loss": 2.5233, + "step": 9046 + }, + { + "epoch": 0.7301267048664353, + "grad_norm": 0.706666886806488, + "learning_rate": 0.00011576817427363302, + "loss": 2.6109, + "step": 9047 + }, + { + "epoch": 0.7302074086030184, + "grad_norm": 0.6856269240379333, + "learning_rate": 0.00011575258467379646, + "loss": 2.5651, + "step": 9048 + }, + { + "epoch": 0.7302881123396013, + "grad_norm": 0.6931480169296265, + "learning_rate": 0.00011573699468136427, + "loss": 2.6031, + "step": 9049 + }, + { + "epoch": 0.7303688160761843, + "grad_norm": 0.6558480858802795, + "learning_rate": 0.00011572140429672508, + "loss": 2.5661, + "step": 9050 + }, + { + "epoch": 0.7304495198127673, + "grad_norm": 0.6468425393104553, + "learning_rate": 0.00011570581352026742, + "loss": 2.5171, + "step": 9051 + }, + { + "epoch": 0.7305302235493504, + "grad_norm": 0.7204702496528625, + "learning_rate": 0.00011569022235237974, + "loss": 2.5861, + "step": 9052 + }, + { + "epoch": 0.7306109272859334, + "grad_norm": 0.7536416053771973, + "learning_rate": 0.00011567463079345078, + "loss": 2.633, + "step": 9053 + }, + { + "epoch": 0.7306916310225163, + "grad_norm": 0.6597960591316223, + "learning_rate": 0.00011565903884386904, + "loss": 2.5327, + "step": 9054 + }, + { + "epoch": 0.7307723347590993, + "grad_norm": 0.689153254032135, + "learning_rate": 0.0001156434465040231, + "loss": 2.5397, + "step": 9055 + }, + { + "epoch": 0.7308530384956824, + "grad_norm": 0.7664844393730164, + "learning_rate": 0.00011562785377430159, + "loss": 2.4852, + "step": 9056 + }, + { + "epoch": 0.7309337422322654, + "grad_norm": 0.7122881412506104, + "learning_rate": 0.0001156122606550931, + "loss": 2.5401, + "step": 9057 + }, + { + "epoch": 0.7310144459688483, + "grad_norm": 0.6937551498413086, + "learning_rate": 0.00011559666714678627, + "loss": 2.5705, + "step": 9058 + }, + { + "epoch": 0.7310951497054313, + "grad_norm": 0.6504047513008118, + "learning_rate": 0.00011558107324976974, + "loss": 2.5638, + "step": 9059 + }, + { + "epoch": 0.7311758534420144, + "grad_norm": 0.7759538888931274, + "learning_rate": 0.0001155654789644321, + "loss": 2.5864, + "step": 9060 + }, + { + "epoch": 0.7312565571785974, + "grad_norm": 0.719859778881073, + "learning_rate": 0.00011554988429116207, + "loss": 2.519, + "step": 9061 + }, + { + "epoch": 0.7313372609151804, + "grad_norm": 0.7159178853034973, + "learning_rate": 0.00011553428923034826, + "loss": 2.5301, + "step": 9062 + }, + { + "epoch": 0.7314179646517633, + "grad_norm": 0.6584001183509827, + "learning_rate": 0.00011551869378237934, + "loss": 2.4716, + "step": 9063 + }, + { + "epoch": 0.7314986683883464, + "grad_norm": 0.6548463702201843, + "learning_rate": 0.00011550309794764405, + "loss": 2.5637, + "step": 9064 + }, + { + "epoch": 0.7315793721249294, + "grad_norm": 0.73887699842453, + "learning_rate": 0.000115487501726531, + "loss": 2.5813, + "step": 9065 + }, + { + "epoch": 0.7316600758615124, + "grad_norm": 0.7856181859970093, + "learning_rate": 0.00011547190511942893, + "loss": 2.592, + "step": 9066 + }, + { + "epoch": 0.7317407795980954, + "grad_norm": 0.7040740847587585, + "learning_rate": 0.00011545630812672654, + "loss": 2.5324, + "step": 9067 + }, + { + "epoch": 0.7318214833346784, + "grad_norm": 0.7316064238548279, + "learning_rate": 0.00011544071074881253, + "loss": 2.5487, + "step": 9068 + }, + { + "epoch": 0.7319021870712614, + "grad_norm": 0.7020413279533386, + "learning_rate": 0.00011542511298607568, + "loss": 2.5179, + "step": 9069 + }, + { + "epoch": 0.7319828908078444, + "grad_norm": 0.672605574131012, + "learning_rate": 0.00011540951483890468, + "loss": 2.5367, + "step": 9070 + }, + { + "epoch": 0.7320635945444274, + "grad_norm": 0.7668856382369995, + "learning_rate": 0.00011539391630768828, + "loss": 2.6089, + "step": 9071 + }, + { + "epoch": 0.7321442982810105, + "grad_norm": 0.6641809940338135, + "learning_rate": 0.00011537831739281524, + "loss": 2.5411, + "step": 9072 + }, + { + "epoch": 0.7322250020175934, + "grad_norm": 0.7142000198364258, + "learning_rate": 0.00011536271809467434, + "loss": 2.5469, + "step": 9073 + }, + { + "epoch": 0.7323057057541764, + "grad_norm": 0.7266140580177307, + "learning_rate": 0.00011534711841365435, + "loss": 2.5565, + "step": 9074 + }, + { + "epoch": 0.7323864094907594, + "grad_norm": 0.6763899326324463, + "learning_rate": 0.00011533151835014407, + "loss": 2.551, + "step": 9075 + }, + { + "epoch": 0.7324671132273425, + "grad_norm": 0.6517418026924133, + "learning_rate": 0.00011531591790453224, + "loss": 2.5415, + "step": 9076 + }, + { + "epoch": 0.7325478169639255, + "grad_norm": 0.6602214574813843, + "learning_rate": 0.00011530031707720772, + "loss": 2.593, + "step": 9077 + }, + { + "epoch": 0.7326285207005084, + "grad_norm": 0.7448844313621521, + "learning_rate": 0.00011528471586855931, + "loss": 2.5598, + "step": 9078 + }, + { + "epoch": 0.7327092244370914, + "grad_norm": 0.7197073698043823, + "learning_rate": 0.00011526911427897579, + "loss": 2.5128, + "step": 9079 + }, + { + "epoch": 0.7327899281736744, + "grad_norm": 0.7245968580245972, + "learning_rate": 0.00011525351230884606, + "loss": 2.5016, + "step": 9080 + }, + { + "epoch": 0.7328706319102575, + "grad_norm": 0.6715837717056274, + "learning_rate": 0.00011523790995855892, + "loss": 2.5469, + "step": 9081 + }, + { + "epoch": 0.7329513356468405, + "grad_norm": 0.7143638730049133, + "learning_rate": 0.00011522230722850325, + "loss": 2.5164, + "step": 9082 + }, + { + "epoch": 0.7330320393834234, + "grad_norm": 0.6809647083282471, + "learning_rate": 0.00011520670411906787, + "loss": 2.6071, + "step": 9083 + }, + { + "epoch": 0.7331127431200064, + "grad_norm": 0.7160956859588623, + "learning_rate": 0.00011519110063064167, + "loss": 2.5346, + "step": 9084 + }, + { + "epoch": 0.7331934468565895, + "grad_norm": 0.6814724802970886, + "learning_rate": 0.00011517549676361357, + "loss": 2.5499, + "step": 9085 + }, + { + "epoch": 0.7332741505931725, + "grad_norm": 0.6914821267127991, + "learning_rate": 0.00011515989251837239, + "loss": 2.5386, + "step": 9086 + }, + { + "epoch": 0.7333548543297554, + "grad_norm": 0.7292554378509521, + "learning_rate": 0.00011514428789530705, + "loss": 2.5642, + "step": 9087 + }, + { + "epoch": 0.7334355580663384, + "grad_norm": 0.6894826292991638, + "learning_rate": 0.00011512868289480647, + "loss": 2.6131, + "step": 9088 + }, + { + "epoch": 0.7335162618029215, + "grad_norm": 0.658770740032196, + "learning_rate": 0.00011511307751725957, + "loss": 2.5594, + "step": 9089 + }, + { + "epoch": 0.7335969655395045, + "grad_norm": 0.7508681416511536, + "learning_rate": 0.0001150974717630553, + "loss": 2.595, + "step": 9090 + }, + { + "epoch": 0.7336776692760875, + "grad_norm": 0.69661545753479, + "learning_rate": 0.00011508186563258256, + "loss": 2.5803, + "step": 9091 + }, + { + "epoch": 0.7337583730126704, + "grad_norm": 0.7277412414550781, + "learning_rate": 0.00011506625912623028, + "loss": 2.5456, + "step": 9092 + }, + { + "epoch": 0.7338390767492535, + "grad_norm": 0.658329963684082, + "learning_rate": 0.00011505065224438745, + "loss": 2.5177, + "step": 9093 + }, + { + "epoch": 0.7339197804858365, + "grad_norm": 0.7277211546897888, + "learning_rate": 0.00011503504498744302, + "loss": 2.553, + "step": 9094 + }, + { + "epoch": 0.7340004842224195, + "grad_norm": 0.7240201830863953, + "learning_rate": 0.00011501943735578598, + "loss": 2.5851, + "step": 9095 + }, + { + "epoch": 0.7340811879590025, + "grad_norm": 0.6565662026405334, + "learning_rate": 0.00011500382934980529, + "loss": 2.5865, + "step": 9096 + }, + { + "epoch": 0.7341618916955855, + "grad_norm": 0.658268392086029, + "learning_rate": 0.00011498822096988995, + "loss": 2.5402, + "step": 9097 + }, + { + "epoch": 0.7342425954321685, + "grad_norm": 0.7305087447166443, + "learning_rate": 0.00011497261221642894, + "loss": 2.5483, + "step": 9098 + }, + { + "epoch": 0.7343232991687515, + "grad_norm": 0.7271504402160645, + "learning_rate": 0.00011495700308981134, + "loss": 2.5303, + "step": 9099 + }, + { + "epoch": 0.7344040029053345, + "grad_norm": 0.70429527759552, + "learning_rate": 0.0001149413935904261, + "loss": 2.5878, + "step": 9100 + }, + { + "epoch": 0.7344847066419176, + "grad_norm": 0.7168769836425781, + "learning_rate": 0.00011492578371866229, + "loss": 2.6017, + "step": 9101 + }, + { + "epoch": 0.7345654103785005, + "grad_norm": 0.7131996154785156, + "learning_rate": 0.00011491017347490891, + "loss": 2.5439, + "step": 9102 + }, + { + "epoch": 0.7346461141150835, + "grad_norm": 0.660321056842804, + "learning_rate": 0.00011489456285955504, + "loss": 2.5236, + "step": 9103 + }, + { + "epoch": 0.7347268178516665, + "grad_norm": 0.6742995977401733, + "learning_rate": 0.00011487895187298977, + "loss": 2.5375, + "step": 9104 + }, + { + "epoch": 0.7348075215882496, + "grad_norm": 0.6380610466003418, + "learning_rate": 0.00011486334051560206, + "loss": 2.5173, + "step": 9105 + }, + { + "epoch": 0.7348882253248326, + "grad_norm": 0.6948198080062866, + "learning_rate": 0.0001148477287877811, + "loss": 2.5247, + "step": 9106 + }, + { + "epoch": 0.7349689290614155, + "grad_norm": 0.7088696360588074, + "learning_rate": 0.00011483211668991591, + "loss": 2.587, + "step": 9107 + }, + { + "epoch": 0.7350496327979985, + "grad_norm": 0.6278921961784363, + "learning_rate": 0.00011481650422239556, + "loss": 2.5652, + "step": 9108 + }, + { + "epoch": 0.7351303365345816, + "grad_norm": 0.6901956796646118, + "learning_rate": 0.00011480089138560926, + "loss": 2.5964, + "step": 9109 + }, + { + "epoch": 0.7352110402711646, + "grad_norm": 0.7264819145202637, + "learning_rate": 0.00011478527817994604, + "loss": 2.5437, + "step": 9110 + }, + { + "epoch": 0.7352917440077475, + "grad_norm": 0.6940708756446838, + "learning_rate": 0.00011476966460579501, + "loss": 2.5761, + "step": 9111 + }, + { + "epoch": 0.7353724477443305, + "grad_norm": 0.689588189125061, + "learning_rate": 0.00011475405066354536, + "loss": 2.5457, + "step": 9112 + }, + { + "epoch": 0.7354531514809136, + "grad_norm": 0.6938436031341553, + "learning_rate": 0.00011473843635358618, + "loss": 2.6026, + "step": 9113 + }, + { + "epoch": 0.7355338552174966, + "grad_norm": 0.7122177481651306, + "learning_rate": 0.00011472282167630663, + "loss": 2.5701, + "step": 9114 + }, + { + "epoch": 0.7356145589540796, + "grad_norm": 0.6667213439941406, + "learning_rate": 0.00011470720663209591, + "loss": 2.5944, + "step": 9115 + }, + { + "epoch": 0.7356952626906625, + "grad_norm": 0.705910861492157, + "learning_rate": 0.00011469159122134314, + "loss": 2.6183, + "step": 9116 + }, + { + "epoch": 0.7357759664272456, + "grad_norm": 0.709937572479248, + "learning_rate": 0.00011467597544443751, + "loss": 2.5153, + "step": 9117 + }, + { + "epoch": 0.7358566701638286, + "grad_norm": 0.6870958805084229, + "learning_rate": 0.00011466035930176822, + "loss": 2.5334, + "step": 9118 + }, + { + "epoch": 0.7359373739004116, + "grad_norm": 0.7274392247200012, + "learning_rate": 0.00011464474279372443, + "loss": 2.5336, + "step": 9119 + }, + { + "epoch": 0.7360180776369946, + "grad_norm": 0.6360952258110046, + "learning_rate": 0.0001146291259206954, + "loss": 2.5604, + "step": 9120 + }, + { + "epoch": 0.7360987813735776, + "grad_norm": 0.7990559935569763, + "learning_rate": 0.00011461350868307028, + "loss": 2.624, + "step": 9121 + }, + { + "epoch": 0.7361794851101606, + "grad_norm": 0.6670079827308655, + "learning_rate": 0.00011459789108123835, + "loss": 2.5761, + "step": 9122 + }, + { + "epoch": 0.7362601888467436, + "grad_norm": 0.6994437575340271, + "learning_rate": 0.00011458227311558877, + "loss": 2.5679, + "step": 9123 + }, + { + "epoch": 0.7363408925833266, + "grad_norm": 0.7428358197212219, + "learning_rate": 0.00011456665478651087, + "loss": 2.5874, + "step": 9124 + }, + { + "epoch": 0.7364215963199097, + "grad_norm": 0.7079486846923828, + "learning_rate": 0.00011455103609439387, + "loss": 2.5999, + "step": 9125 + }, + { + "epoch": 0.7365023000564926, + "grad_norm": 0.646244466304779, + "learning_rate": 0.00011453541703962695, + "loss": 2.5053, + "step": 9126 + }, + { + "epoch": 0.7365830037930756, + "grad_norm": 0.6671318411827087, + "learning_rate": 0.0001145197976225995, + "loss": 2.5277, + "step": 9127 + }, + { + "epoch": 0.7366637075296586, + "grad_norm": 0.7060399055480957, + "learning_rate": 0.00011450417784370072, + "loss": 2.6092, + "step": 9128 + }, + { + "epoch": 0.7367444112662416, + "grad_norm": 0.741547703742981, + "learning_rate": 0.00011448855770331989, + "loss": 2.6121, + "step": 9129 + }, + { + "epoch": 0.7368251150028247, + "grad_norm": 0.710267961025238, + "learning_rate": 0.00011447293720184636, + "loss": 2.5141, + "step": 9130 + }, + { + "epoch": 0.7369058187394076, + "grad_norm": 0.6914308071136475, + "learning_rate": 0.0001144573163396694, + "loss": 2.5489, + "step": 9131 + }, + { + "epoch": 0.7369865224759906, + "grad_norm": 0.7051414847373962, + "learning_rate": 0.0001144416951171783, + "loss": 2.5925, + "step": 9132 + }, + { + "epoch": 0.7370672262125736, + "grad_norm": 0.6765387058258057, + "learning_rate": 0.00011442607353476245, + "loss": 2.5864, + "step": 9133 + }, + { + "epoch": 0.7371479299491567, + "grad_norm": 0.706672191619873, + "learning_rate": 0.00011441045159281108, + "loss": 2.4823, + "step": 9134 + }, + { + "epoch": 0.7372286336857397, + "grad_norm": 0.7534066438674927, + "learning_rate": 0.00011439482929171362, + "loss": 2.5728, + "step": 9135 + }, + { + "epoch": 0.7373093374223226, + "grad_norm": 0.6628777384757996, + "learning_rate": 0.00011437920663185939, + "loss": 2.5538, + "step": 9136 + }, + { + "epoch": 0.7373900411589056, + "grad_norm": 0.6575733423233032, + "learning_rate": 0.00011436358361363773, + "loss": 2.4802, + "step": 9137 + }, + { + "epoch": 0.7374707448954887, + "grad_norm": 0.7629329562187195, + "learning_rate": 0.00011434796023743803, + "loss": 2.6169, + "step": 9138 + }, + { + "epoch": 0.7375514486320717, + "grad_norm": 0.7148225903511047, + "learning_rate": 0.00011433233650364965, + "loss": 2.6335, + "step": 9139 + }, + { + "epoch": 0.7376321523686546, + "grad_norm": 0.705210268497467, + "learning_rate": 0.00011431671241266198, + "loss": 2.6261, + "step": 9140 + }, + { + "epoch": 0.7377128561052376, + "grad_norm": 0.7137441635131836, + "learning_rate": 0.00011430108796486441, + "loss": 2.5021, + "step": 9141 + }, + { + "epoch": 0.7377935598418207, + "grad_norm": 0.6979854702949524, + "learning_rate": 0.00011428546316064635, + "loss": 2.5436, + "step": 9142 + }, + { + "epoch": 0.7378742635784037, + "grad_norm": 0.6568784713745117, + "learning_rate": 0.00011426983800039721, + "loss": 2.5882, + "step": 9143 + }, + { + "epoch": 0.7379549673149867, + "grad_norm": 0.666606605052948, + "learning_rate": 0.00011425421248450638, + "loss": 2.5472, + "step": 9144 + }, + { + "epoch": 0.7380356710515696, + "grad_norm": 0.7240840792655945, + "learning_rate": 0.00011423858661336333, + "loss": 2.6057, + "step": 9145 + }, + { + "epoch": 0.7381163747881527, + "grad_norm": 0.7342149615287781, + "learning_rate": 0.0001142229603873575, + "loss": 2.508, + "step": 9146 + }, + { + "epoch": 0.7381970785247357, + "grad_norm": 0.7089941501617432, + "learning_rate": 0.0001142073338068783, + "loss": 2.6115, + "step": 9147 + }, + { + "epoch": 0.7382777822613187, + "grad_norm": 0.6883555054664612, + "learning_rate": 0.00011419170687231519, + "loss": 2.5254, + "step": 9148 + }, + { + "epoch": 0.7383584859979017, + "grad_norm": 0.6819528937339783, + "learning_rate": 0.00011417607958405765, + "loss": 2.5498, + "step": 9149 + }, + { + "epoch": 0.7384391897344847, + "grad_norm": 0.7348979711532593, + "learning_rate": 0.00011416045194249516, + "loss": 2.5547, + "step": 9150 + }, + { + "epoch": 0.7385198934710677, + "grad_norm": 0.6733320355415344, + "learning_rate": 0.00011414482394801719, + "loss": 2.5985, + "step": 9151 + }, + { + "epoch": 0.7386005972076507, + "grad_norm": 0.714771032333374, + "learning_rate": 0.00011412919560101327, + "loss": 2.571, + "step": 9152 + }, + { + "epoch": 0.7386813009442337, + "grad_norm": 0.7010024189949036, + "learning_rate": 0.0001141135669018728, + "loss": 2.5755, + "step": 9153 + }, + { + "epoch": 0.7387620046808168, + "grad_norm": 0.7014826536178589, + "learning_rate": 0.00011409793785098536, + "loss": 2.6033, + "step": 9154 + }, + { + "epoch": 0.7388427084173997, + "grad_norm": 0.7286051511764526, + "learning_rate": 0.0001140823084487405, + "loss": 2.515, + "step": 9155 + }, + { + "epoch": 0.7389234121539827, + "grad_norm": 0.669365406036377, + "learning_rate": 0.00011406667869552768, + "loss": 2.506, + "step": 9156 + }, + { + "epoch": 0.7390041158905657, + "grad_norm": 0.6886852979660034, + "learning_rate": 0.00011405104859173645, + "loss": 2.6123, + "step": 9157 + }, + { + "epoch": 0.7390848196271488, + "grad_norm": 0.6344162225723267, + "learning_rate": 0.00011403541813775635, + "loss": 2.5483, + "step": 9158 + }, + { + "epoch": 0.7391655233637318, + "grad_norm": 0.7043579816818237, + "learning_rate": 0.00011401978733397694, + "loss": 2.5545, + "step": 9159 + }, + { + "epoch": 0.7392462271003147, + "grad_norm": 0.7960262298583984, + "learning_rate": 0.00011400415618078781, + "loss": 2.5666, + "step": 9160 + }, + { + "epoch": 0.7393269308368977, + "grad_norm": 0.6771546006202698, + "learning_rate": 0.00011398852467857848, + "loss": 2.6016, + "step": 9161 + }, + { + "epoch": 0.7394076345734808, + "grad_norm": 0.6522069573402405, + "learning_rate": 0.00011397289282773855, + "loss": 2.5493, + "step": 9162 + }, + { + "epoch": 0.7394883383100638, + "grad_norm": 0.6804657578468323, + "learning_rate": 0.00011395726062865762, + "loss": 2.5856, + "step": 9163 + }, + { + "epoch": 0.7395690420466468, + "grad_norm": 0.7562841176986694, + "learning_rate": 0.00011394162808172526, + "loss": 2.557, + "step": 9164 + }, + { + "epoch": 0.7396497457832297, + "grad_norm": 0.6464113593101501, + "learning_rate": 0.00011392599518733107, + "loss": 2.5292, + "step": 9165 + }, + { + "epoch": 0.7397304495198128, + "grad_norm": 0.7469549775123596, + "learning_rate": 0.00011391036194586466, + "loss": 2.6168, + "step": 9166 + }, + { + "epoch": 0.7398111532563958, + "grad_norm": 0.7095946669578552, + "learning_rate": 0.00011389472835771572, + "loss": 2.5468, + "step": 9167 + }, + { + "epoch": 0.7398918569929788, + "grad_norm": 0.7376375794410706, + "learning_rate": 0.00011387909442327382, + "loss": 2.5576, + "step": 9168 + }, + { + "epoch": 0.7399725607295617, + "grad_norm": 0.736727774143219, + "learning_rate": 0.00011386346014292859, + "loss": 2.6034, + "step": 9169 + }, + { + "epoch": 0.7400532644661448, + "grad_norm": 0.7026904821395874, + "learning_rate": 0.00011384782551706967, + "loss": 2.5848, + "step": 9170 + }, + { + "epoch": 0.7401339682027278, + "grad_norm": 0.6894888877868652, + "learning_rate": 0.00011383219054608678, + "loss": 2.5475, + "step": 9171 + }, + { + "epoch": 0.7402146719393108, + "grad_norm": 0.6754137277603149, + "learning_rate": 0.00011381655523036954, + "loss": 2.5124, + "step": 9172 + }, + { + "epoch": 0.7402953756758938, + "grad_norm": 0.7935643196105957, + "learning_rate": 0.00011380091957030762, + "loss": 2.5898, + "step": 9173 + }, + { + "epoch": 0.7403760794124769, + "grad_norm": 0.7017118334770203, + "learning_rate": 0.0001137852835662907, + "loss": 2.6139, + "step": 9174 + }, + { + "epoch": 0.7404567831490598, + "grad_norm": 0.7246189117431641, + "learning_rate": 0.00011376964721870847, + "loss": 2.4627, + "step": 9175 + }, + { + "epoch": 0.7405374868856428, + "grad_norm": 0.6835598349571228, + "learning_rate": 0.00011375401052795064, + "loss": 2.5707, + "step": 9176 + }, + { + "epoch": 0.7406181906222258, + "grad_norm": 0.6439787745475769, + "learning_rate": 0.00011373837349440693, + "loss": 2.5161, + "step": 9177 + }, + { + "epoch": 0.7406988943588089, + "grad_norm": 0.7249091267585754, + "learning_rate": 0.00011372273611846704, + "loss": 2.5054, + "step": 9178 + }, + { + "epoch": 0.7407795980953918, + "grad_norm": 0.7653267979621887, + "learning_rate": 0.0001137070984005207, + "loss": 2.6016, + "step": 9179 + }, + { + "epoch": 0.7408603018319748, + "grad_norm": 0.7195165157318115, + "learning_rate": 0.0001136914603409576, + "loss": 2.5931, + "step": 9180 + }, + { + "epoch": 0.7409410055685578, + "grad_norm": 0.7093746662139893, + "learning_rate": 0.00011367582194016756, + "loss": 2.5567, + "step": 9181 + }, + { + "epoch": 0.7410217093051408, + "grad_norm": 0.6868107318878174, + "learning_rate": 0.00011366018319854026, + "loss": 2.5769, + "step": 9182 + }, + { + "epoch": 0.7411024130417239, + "grad_norm": 0.6870261430740356, + "learning_rate": 0.00011364454411646552, + "loss": 2.5418, + "step": 9183 + }, + { + "epoch": 0.7411831167783068, + "grad_norm": 0.7034662365913391, + "learning_rate": 0.00011362890469433306, + "loss": 2.5798, + "step": 9184 + }, + { + "epoch": 0.7412638205148898, + "grad_norm": 0.7200794816017151, + "learning_rate": 0.00011361326493253264, + "loss": 2.5523, + "step": 9185 + }, + { + "epoch": 0.7413445242514728, + "grad_norm": 0.7034540772438049, + "learning_rate": 0.0001135976248314541, + "loss": 2.5107, + "step": 9186 + }, + { + "epoch": 0.7414252279880559, + "grad_norm": 0.7155053019523621, + "learning_rate": 0.00011358198439148721, + "loss": 2.5804, + "step": 9187 + }, + { + "epoch": 0.7415059317246389, + "grad_norm": 0.6965398788452148, + "learning_rate": 0.00011356634361302175, + "loss": 2.5532, + "step": 9188 + }, + { + "epoch": 0.7415866354612218, + "grad_norm": 0.65416419506073, + "learning_rate": 0.00011355070249644755, + "loss": 2.5411, + "step": 9189 + }, + { + "epoch": 0.7416673391978048, + "grad_norm": 0.6798486709594727, + "learning_rate": 0.0001135350610421544, + "loss": 2.4957, + "step": 9190 + }, + { + "epoch": 0.7417480429343879, + "grad_norm": 0.6839874386787415, + "learning_rate": 0.00011351941925053218, + "loss": 2.5745, + "step": 9191 + }, + { + "epoch": 0.7418287466709709, + "grad_norm": 0.7374398708343506, + "learning_rate": 0.00011350377712197068, + "loss": 2.4923, + "step": 9192 + }, + { + "epoch": 0.7419094504075538, + "grad_norm": 0.7517396807670593, + "learning_rate": 0.00011348813465685974, + "loss": 2.538, + "step": 9193 + }, + { + "epoch": 0.7419901541441368, + "grad_norm": 0.6670863628387451, + "learning_rate": 0.00011347249185558926, + "loss": 2.5442, + "step": 9194 + }, + { + "epoch": 0.7420708578807199, + "grad_norm": 0.6508080363273621, + "learning_rate": 0.00011345684871854905, + "loss": 2.6665, + "step": 9195 + }, + { + "epoch": 0.7421515616173029, + "grad_norm": 0.6935258507728577, + "learning_rate": 0.00011344120524612898, + "loss": 2.5388, + "step": 9196 + }, + { + "epoch": 0.7422322653538859, + "grad_norm": 0.696067750453949, + "learning_rate": 0.00011342556143871897, + "loss": 2.574, + "step": 9197 + }, + { + "epoch": 0.7423129690904688, + "grad_norm": 0.7486966252326965, + "learning_rate": 0.00011340991729670882, + "loss": 2.5924, + "step": 9198 + }, + { + "epoch": 0.7423936728270519, + "grad_norm": 0.676407516002655, + "learning_rate": 0.00011339427282048854, + "loss": 2.5907, + "step": 9199 + }, + { + "epoch": 0.7424743765636349, + "grad_norm": 0.7241318225860596, + "learning_rate": 0.00011337862801044792, + "loss": 2.5685, + "step": 9200 + }, + { + "epoch": 0.7425550803002179, + "grad_norm": 0.7012883424758911, + "learning_rate": 0.00011336298286697692, + "loss": 2.56, + "step": 9201 + }, + { + "epoch": 0.7426357840368009, + "grad_norm": 0.7313060164451599, + "learning_rate": 0.0001133473373904655, + "loss": 2.632, + "step": 9202 + }, + { + "epoch": 0.742716487773384, + "grad_norm": 0.6829206943511963, + "learning_rate": 0.00011333169158130353, + "loss": 2.5006, + "step": 9203 + }, + { + "epoch": 0.7427971915099669, + "grad_norm": 0.7324578166007996, + "learning_rate": 0.00011331604543988093, + "loss": 2.5004, + "step": 9204 + }, + { + "epoch": 0.7428778952465499, + "grad_norm": 0.6761097311973572, + "learning_rate": 0.00011330039896658766, + "loss": 2.5516, + "step": 9205 + }, + { + "epoch": 0.7429585989831329, + "grad_norm": 0.6909754276275635, + "learning_rate": 0.00011328475216181369, + "loss": 2.5273, + "step": 9206 + }, + { + "epoch": 0.743039302719716, + "grad_norm": 0.6420674324035645, + "learning_rate": 0.00011326910502594899, + "loss": 2.5507, + "step": 9207 + }, + { + "epoch": 0.7431200064562989, + "grad_norm": 0.6442455053329468, + "learning_rate": 0.0001132534575593835, + "loss": 2.542, + "step": 9208 + }, + { + "epoch": 0.7432007101928819, + "grad_norm": 0.7053101658821106, + "learning_rate": 0.0001132378097625072, + "loss": 2.5116, + "step": 9209 + }, + { + "epoch": 0.7432814139294649, + "grad_norm": 0.7570765614509583, + "learning_rate": 0.00011322216163571007, + "loss": 2.5576, + "step": 9210 + }, + { + "epoch": 0.743362117666048, + "grad_norm": 0.6937675476074219, + "learning_rate": 0.00011320651317938214, + "loss": 2.6212, + "step": 9211 + }, + { + "epoch": 0.743442821402631, + "grad_norm": 0.6741313934326172, + "learning_rate": 0.00011319086439391333, + "loss": 2.5723, + "step": 9212 + }, + { + "epoch": 0.7435235251392139, + "grad_norm": 0.711358904838562, + "learning_rate": 0.00011317521527969374, + "loss": 2.5713, + "step": 9213 + }, + { + "epoch": 0.7436042288757969, + "grad_norm": 0.7443268895149231, + "learning_rate": 0.00011315956583711331, + "loss": 2.5301, + "step": 9214 + }, + { + "epoch": 0.74368493261238, + "grad_norm": 0.7001742720603943, + "learning_rate": 0.00011314391606656212, + "loss": 2.5545, + "step": 9215 + }, + { + "epoch": 0.743765636348963, + "grad_norm": 0.7294990420341492, + "learning_rate": 0.00011312826596843019, + "loss": 2.5897, + "step": 9216 + }, + { + "epoch": 0.743846340085546, + "grad_norm": 0.706924319267273, + "learning_rate": 0.00011311261554310753, + "loss": 2.6477, + "step": 9217 + }, + { + "epoch": 0.7439270438221289, + "grad_norm": 0.7065039277076721, + "learning_rate": 0.00011309696479098423, + "loss": 2.5326, + "step": 9218 + }, + { + "epoch": 0.744007747558712, + "grad_norm": 0.6502599716186523, + "learning_rate": 0.00011308131371245037, + "loss": 2.5833, + "step": 9219 + }, + { + "epoch": 0.744088451295295, + "grad_norm": 0.7135158181190491, + "learning_rate": 0.00011306566230789592, + "loss": 2.5686, + "step": 9220 + }, + { + "epoch": 0.744169155031878, + "grad_norm": 0.7239195108413696, + "learning_rate": 0.00011305001057771101, + "loss": 2.6303, + "step": 9221 + }, + { + "epoch": 0.744249858768461, + "grad_norm": 0.6442604660987854, + "learning_rate": 0.00011303435852228574, + "loss": 2.5495, + "step": 9222 + }, + { + "epoch": 0.744330562505044, + "grad_norm": 0.6700316071510315, + "learning_rate": 0.0001130187061420102, + "loss": 2.5575, + "step": 9223 + }, + { + "epoch": 0.744411266241627, + "grad_norm": 0.7532816529273987, + "learning_rate": 0.00011300305343727446, + "loss": 2.5174, + "step": 9224 + }, + { + "epoch": 0.74449196997821, + "grad_norm": 0.7614738941192627, + "learning_rate": 0.00011298740040846862, + "loss": 2.5995, + "step": 9225 + }, + { + "epoch": 0.744572673714793, + "grad_norm": 0.6781208515167236, + "learning_rate": 0.00011297174705598283, + "loss": 2.5225, + "step": 9226 + }, + { + "epoch": 0.744653377451376, + "grad_norm": 0.680525541305542, + "learning_rate": 0.0001129560933802072, + "loss": 2.5844, + "step": 9227 + }, + { + "epoch": 0.744734081187959, + "grad_norm": 0.7196657657623291, + "learning_rate": 0.00011294043938153185, + "loss": 2.564, + "step": 9228 + }, + { + "epoch": 0.744814784924542, + "grad_norm": 0.6997412443161011, + "learning_rate": 0.00011292478506034694, + "loss": 2.6486, + "step": 9229 + }, + { + "epoch": 0.744895488661125, + "grad_norm": 0.7438939809799194, + "learning_rate": 0.00011290913041704256, + "loss": 2.5667, + "step": 9230 + }, + { + "epoch": 0.744976192397708, + "grad_norm": 0.7391374707221985, + "learning_rate": 0.00011289347545200892, + "loss": 2.5974, + "step": 9231 + }, + { + "epoch": 0.745056896134291, + "grad_norm": 0.7845481634140015, + "learning_rate": 0.0001128778201656362, + "loss": 2.5168, + "step": 9232 + }, + { + "epoch": 0.745137599870874, + "grad_norm": 0.728712797164917, + "learning_rate": 0.00011286216455831449, + "loss": 2.5241, + "step": 9233 + }, + { + "epoch": 0.745218303607457, + "grad_norm": 0.7310191988945007, + "learning_rate": 0.00011284650863043407, + "loss": 2.5777, + "step": 9234 + }, + { + "epoch": 0.74529900734404, + "grad_norm": 0.6661474704742432, + "learning_rate": 0.00011283085238238503, + "loss": 2.5471, + "step": 9235 + }, + { + "epoch": 0.7453797110806231, + "grad_norm": 0.7697983384132385, + "learning_rate": 0.00011281519581455761, + "loss": 2.587, + "step": 9236 + }, + { + "epoch": 0.745460414817206, + "grad_norm": 0.7336567640304565, + "learning_rate": 0.00011279953892734203, + "loss": 2.5756, + "step": 9237 + }, + { + "epoch": 0.745541118553789, + "grad_norm": 0.6192059516906738, + "learning_rate": 0.00011278388172112848, + "loss": 2.5038, + "step": 9238 + }, + { + "epoch": 0.745621822290372, + "grad_norm": 0.7180300354957581, + "learning_rate": 0.00011276822419630719, + "loss": 2.5469, + "step": 9239 + }, + { + "epoch": 0.7457025260269551, + "grad_norm": 0.7583367824554443, + "learning_rate": 0.00011275256635326837, + "loss": 2.6274, + "step": 9240 + }, + { + "epoch": 0.7457832297635381, + "grad_norm": 0.6848096251487732, + "learning_rate": 0.00011273690819240221, + "loss": 2.5117, + "step": 9241 + }, + { + "epoch": 0.745863933500121, + "grad_norm": 0.6830503344535828, + "learning_rate": 0.00011272124971409907, + "loss": 2.5114, + "step": 9242 + }, + { + "epoch": 0.745944637236704, + "grad_norm": 0.780240535736084, + "learning_rate": 0.0001127055909187491, + "loss": 2.6432, + "step": 9243 + }, + { + "epoch": 0.7460253409732871, + "grad_norm": 0.7421274185180664, + "learning_rate": 0.00011268993180674261, + "loss": 2.5723, + "step": 9244 + }, + { + "epoch": 0.7461060447098701, + "grad_norm": 0.6695685386657715, + "learning_rate": 0.00011267427237846986, + "loss": 2.5335, + "step": 9245 + }, + { + "epoch": 0.746186748446453, + "grad_norm": 0.8390316963195801, + "learning_rate": 0.00011265861263432104, + "loss": 2.5125, + "step": 9246 + }, + { + "epoch": 0.746267452183036, + "grad_norm": 0.7030535936355591, + "learning_rate": 0.00011264295257468658, + "loss": 2.5986, + "step": 9247 + }, + { + "epoch": 0.7463481559196191, + "grad_norm": 0.6754253506660461, + "learning_rate": 0.00011262729219995669, + "loss": 2.5067, + "step": 9248 + }, + { + "epoch": 0.7464288596562021, + "grad_norm": 0.6809592843055725, + "learning_rate": 0.00011261163151052163, + "loss": 2.5359, + "step": 9249 + }, + { + "epoch": 0.7465095633927851, + "grad_norm": 0.6546878218650818, + "learning_rate": 0.00011259597050677178, + "loss": 2.5357, + "step": 9250 + }, + { + "epoch": 0.746590267129368, + "grad_norm": 0.6514731645584106, + "learning_rate": 0.00011258030918909739, + "loss": 2.5591, + "step": 9251 + }, + { + "epoch": 0.7466709708659511, + "grad_norm": 0.6981258392333984, + "learning_rate": 0.0001125646475578888, + "loss": 2.6171, + "step": 9252 + }, + { + "epoch": 0.7467516746025341, + "grad_norm": 0.6763784885406494, + "learning_rate": 0.00011254898561353639, + "loss": 2.5455, + "step": 9253 + }, + { + "epoch": 0.7468323783391171, + "grad_norm": 0.6241726279258728, + "learning_rate": 0.00011253332335643043, + "loss": 2.6073, + "step": 9254 + }, + { + "epoch": 0.7469130820757001, + "grad_norm": 0.6810312271118164, + "learning_rate": 0.00011251766078696132, + "loss": 2.5285, + "step": 9255 + }, + { + "epoch": 0.7469937858122832, + "grad_norm": 0.6603971123695374, + "learning_rate": 0.00011250199790551934, + "loss": 2.5985, + "step": 9256 + }, + { + "epoch": 0.7470744895488661, + "grad_norm": 0.69618159532547, + "learning_rate": 0.0001124863347124949, + "loss": 2.5728, + "step": 9257 + }, + { + "epoch": 0.7471551932854491, + "grad_norm": 0.6878889203071594, + "learning_rate": 0.00011247067120827837, + "loss": 2.5459, + "step": 9258 + }, + { + "epoch": 0.7472358970220321, + "grad_norm": 0.6613149046897888, + "learning_rate": 0.00011245500739326011, + "loss": 2.6559, + "step": 9259 + }, + { + "epoch": 0.7473166007586152, + "grad_norm": 0.6397448778152466, + "learning_rate": 0.00011243934326783053, + "loss": 2.5712, + "step": 9260 + }, + { + "epoch": 0.7473973044951981, + "grad_norm": 0.6804259419441223, + "learning_rate": 0.00011242367883237996, + "loss": 2.6143, + "step": 9261 + }, + { + "epoch": 0.7474780082317811, + "grad_norm": 0.8029066324234009, + "learning_rate": 0.00011240801408729884, + "loss": 2.5702, + "step": 9262 + }, + { + "epoch": 0.7475587119683641, + "grad_norm": 0.7086285948753357, + "learning_rate": 0.00011239234903297761, + "loss": 2.6113, + "step": 9263 + }, + { + "epoch": 0.7476394157049472, + "grad_norm": 0.6980452537536621, + "learning_rate": 0.00011237668366980665, + "loss": 2.6355, + "step": 9264 + }, + { + "epoch": 0.7477201194415302, + "grad_norm": 0.6906906962394714, + "learning_rate": 0.00011236101799817636, + "loss": 2.5605, + "step": 9265 + }, + { + "epoch": 0.7478008231781131, + "grad_norm": 0.7412894368171692, + "learning_rate": 0.00011234535201847716, + "loss": 2.6073, + "step": 9266 + }, + { + "epoch": 0.7478815269146961, + "grad_norm": 0.6949330568313599, + "learning_rate": 0.00011232968573109955, + "loss": 2.5623, + "step": 9267 + }, + { + "epoch": 0.7479622306512792, + "grad_norm": 0.6916515827178955, + "learning_rate": 0.00011231401913643393, + "loss": 2.5348, + "step": 9268 + }, + { + "epoch": 0.7480429343878622, + "grad_norm": 0.7576180696487427, + "learning_rate": 0.0001122983522348708, + "loss": 2.5968, + "step": 9269 + }, + { + "epoch": 0.7481236381244452, + "grad_norm": 0.6734197735786438, + "learning_rate": 0.00011228268502680052, + "loss": 2.5185, + "step": 9270 + }, + { + "epoch": 0.7482043418610281, + "grad_norm": 0.6952544450759888, + "learning_rate": 0.00011226701751261367, + "loss": 2.57, + "step": 9271 + }, + { + "epoch": 0.7482850455976112, + "grad_norm": 0.6504654884338379, + "learning_rate": 0.00011225134969270068, + "loss": 2.5677, + "step": 9272 + }, + { + "epoch": 0.7483657493341942, + "grad_norm": 0.6843643188476562, + "learning_rate": 0.00011223568156745198, + "loss": 2.5686, + "step": 9273 + }, + { + "epoch": 0.7484464530707772, + "grad_norm": 0.6786371469497681, + "learning_rate": 0.00011222001313725816, + "loss": 2.5024, + "step": 9274 + }, + { + "epoch": 0.7485271568073602, + "grad_norm": 0.6431117057800293, + "learning_rate": 0.00011220434440250967, + "loss": 2.5206, + "step": 9275 + }, + { + "epoch": 0.7486078605439432, + "grad_norm": 0.699547290802002, + "learning_rate": 0.000112188675363597, + "loss": 2.5974, + "step": 9276 + }, + { + "epoch": 0.7486885642805262, + "grad_norm": 0.6870436072349548, + "learning_rate": 0.00011217300602091067, + "loss": 2.5303, + "step": 9277 + }, + { + "epoch": 0.7487692680171092, + "grad_norm": 0.7032173871994019, + "learning_rate": 0.0001121573363748412, + "loss": 2.5045, + "step": 9278 + }, + { + "epoch": 0.7488499717536922, + "grad_norm": 0.6890417337417603, + "learning_rate": 0.00011214166642577917, + "loss": 2.5945, + "step": 9279 + }, + { + "epoch": 0.7489306754902753, + "grad_norm": 0.7257806062698364, + "learning_rate": 0.00011212599617411506, + "loss": 2.6013, + "step": 9280 + }, + { + "epoch": 0.7490113792268582, + "grad_norm": 0.722561240196228, + "learning_rate": 0.0001121103256202394, + "loss": 2.5809, + "step": 9281 + }, + { + "epoch": 0.7490920829634412, + "grad_norm": 0.7360994219779968, + "learning_rate": 0.00011209465476454277, + "loss": 2.5036, + "step": 9282 + }, + { + "epoch": 0.7491727867000242, + "grad_norm": 0.6561676263809204, + "learning_rate": 0.00011207898360741574, + "loss": 2.5302, + "step": 9283 + }, + { + "epoch": 0.7492534904366072, + "grad_norm": 0.7454147338867188, + "learning_rate": 0.00011206331214924887, + "loss": 2.5511, + "step": 9284 + }, + { + "epoch": 0.7493341941731902, + "grad_norm": 0.7085482478141785, + "learning_rate": 0.00011204764039043275, + "loss": 2.5743, + "step": 9285 + }, + { + "epoch": 0.7494148979097732, + "grad_norm": 0.691872775554657, + "learning_rate": 0.0001120319683313579, + "loss": 2.5414, + "step": 9286 + }, + { + "epoch": 0.7494956016463562, + "grad_norm": 0.6661050915718079, + "learning_rate": 0.00011201629597241496, + "loss": 2.5418, + "step": 9287 + }, + { + "epoch": 0.7495763053829392, + "grad_norm": 0.7440990805625916, + "learning_rate": 0.00011200062331399452, + "loss": 2.5543, + "step": 9288 + }, + { + "epoch": 0.7496570091195223, + "grad_norm": 0.6655303835868835, + "learning_rate": 0.00011198495035648715, + "loss": 2.5629, + "step": 9289 + }, + { + "epoch": 0.7497377128561052, + "grad_norm": 0.7550996541976929, + "learning_rate": 0.00011196927710028353, + "loss": 2.5376, + "step": 9290 + }, + { + "epoch": 0.7498184165926882, + "grad_norm": 0.692915678024292, + "learning_rate": 0.00011195360354577422, + "loss": 2.4661, + "step": 9291 + }, + { + "epoch": 0.7498991203292712, + "grad_norm": 0.7572253346443176, + "learning_rate": 0.00011193792969334985, + "loss": 2.5641, + "step": 9292 + }, + { + "epoch": 0.7499798240658543, + "grad_norm": 0.6550531387329102, + "learning_rate": 0.00011192225554340107, + "loss": 2.5591, + "step": 9293 + }, + { + "epoch": 0.7500605278024373, + "grad_norm": 0.677130401134491, + "learning_rate": 0.0001119065810963185, + "loss": 2.5859, + "step": 9294 + }, + { + "epoch": 0.7501412315390202, + "grad_norm": 0.680673360824585, + "learning_rate": 0.00011189090635249287, + "loss": 2.5343, + "step": 9295 + }, + { + "epoch": 0.7502219352756032, + "grad_norm": 0.7574957609176636, + "learning_rate": 0.00011187523131231472, + "loss": 2.5966, + "step": 9296 + }, + { + "epoch": 0.7503026390121863, + "grad_norm": 0.7099971175193787, + "learning_rate": 0.00011185955597617474, + "loss": 2.5547, + "step": 9297 + }, + { + "epoch": 0.7503833427487693, + "grad_norm": 0.7153162956237793, + "learning_rate": 0.00011184388034446367, + "loss": 2.5986, + "step": 9298 + }, + { + "epoch": 0.7504640464853523, + "grad_norm": 0.7154852747917175, + "learning_rate": 0.00011182820441757212, + "loss": 2.5214, + "step": 9299 + }, + { + "epoch": 0.7505447502219352, + "grad_norm": 0.6899208426475525, + "learning_rate": 0.00011181252819589081, + "loss": 2.5026, + "step": 9300 + }, + { + "epoch": 0.7506254539585183, + "grad_norm": 0.6719048023223877, + "learning_rate": 0.00011179685167981041, + "loss": 2.5915, + "step": 9301 + }, + { + "epoch": 0.7507061576951013, + "grad_norm": 0.6664413213729858, + "learning_rate": 0.00011178117486972164, + "loss": 2.5479, + "step": 9302 + }, + { + "epoch": 0.7507868614316843, + "grad_norm": 0.7433286905288696, + "learning_rate": 0.00011176549776601517, + "loss": 2.5941, + "step": 9303 + }, + { + "epoch": 0.7508675651682672, + "grad_norm": 0.7868518233299255, + "learning_rate": 0.00011174982036908177, + "loss": 2.5537, + "step": 9304 + }, + { + "epoch": 0.7509482689048503, + "grad_norm": 0.7037336826324463, + "learning_rate": 0.0001117341426793121, + "loss": 2.568, + "step": 9305 + }, + { + "epoch": 0.7510289726414333, + "grad_norm": 0.6630405783653259, + "learning_rate": 0.00011171846469709697, + "loss": 2.4906, + "step": 9306 + }, + { + "epoch": 0.7511096763780163, + "grad_norm": 0.7398669719696045, + "learning_rate": 0.00011170278642282701, + "loss": 2.574, + "step": 9307 + }, + { + "epoch": 0.7511903801145993, + "grad_norm": 0.7557641267776489, + "learning_rate": 0.00011168710785689304, + "loss": 2.5237, + "step": 9308 + }, + { + "epoch": 0.7512710838511824, + "grad_norm": 0.6883708238601685, + "learning_rate": 0.00011167142899968581, + "loss": 2.5643, + "step": 9309 + }, + { + "epoch": 0.7513517875877653, + "grad_norm": 0.6623669862747192, + "learning_rate": 0.00011165574985159606, + "loss": 2.5319, + "step": 9310 + }, + { + "epoch": 0.7514324913243483, + "grad_norm": 0.6938778758049011, + "learning_rate": 0.00011164007041301454, + "loss": 2.5083, + "step": 9311 + }, + { + "epoch": 0.7515131950609313, + "grad_norm": 0.718534529209137, + "learning_rate": 0.00011162439068433204, + "loss": 2.4791, + "step": 9312 + }, + { + "epoch": 0.7515938987975144, + "grad_norm": 0.672113299369812, + "learning_rate": 0.00011160871066593934, + "loss": 2.5264, + "step": 9313 + }, + { + "epoch": 0.7516746025340973, + "grad_norm": 0.6854343414306641, + "learning_rate": 0.00011159303035822723, + "loss": 2.5734, + "step": 9314 + }, + { + "epoch": 0.7517553062706803, + "grad_norm": 0.6494589447975159, + "learning_rate": 0.0001115773497615865, + "loss": 2.5564, + "step": 9315 + }, + { + "epoch": 0.7518360100072633, + "grad_norm": 0.7219608426094055, + "learning_rate": 0.00011156166887640793, + "loss": 2.6049, + "step": 9316 + }, + { + "epoch": 0.7519167137438464, + "grad_norm": 0.6892502903938293, + "learning_rate": 0.00011154598770308236, + "loss": 2.5333, + "step": 9317 + }, + { + "epoch": 0.7519974174804294, + "grad_norm": 0.6670175790786743, + "learning_rate": 0.0001115303062420006, + "loss": 2.5882, + "step": 9318 + }, + { + "epoch": 0.7520781212170123, + "grad_norm": 0.7367776036262512, + "learning_rate": 0.00011151462449355347, + "loss": 2.5634, + "step": 9319 + }, + { + "epoch": 0.7521588249535953, + "grad_norm": 0.6971952319145203, + "learning_rate": 0.00011149894245813182, + "loss": 2.5323, + "step": 9320 + }, + { + "epoch": 0.7522395286901784, + "grad_norm": 0.6555755734443665, + "learning_rate": 0.00011148326013612642, + "loss": 2.5597, + "step": 9321 + }, + { + "epoch": 0.7523202324267614, + "grad_norm": 0.7004384994506836, + "learning_rate": 0.00011146757752792819, + "loss": 2.4761, + "step": 9322 + }, + { + "epoch": 0.7524009361633444, + "grad_norm": 0.7151978015899658, + "learning_rate": 0.00011145189463392791, + "loss": 2.5825, + "step": 9323 + }, + { + "epoch": 0.7524816398999273, + "grad_norm": 0.7176918387413025, + "learning_rate": 0.00011143621145451653, + "loss": 2.6112, + "step": 9324 + }, + { + "epoch": 0.7525623436365104, + "grad_norm": 0.7156146168708801, + "learning_rate": 0.00011142052799008487, + "loss": 2.5293, + "step": 9325 + }, + { + "epoch": 0.7526430473730934, + "grad_norm": 0.7360113263130188, + "learning_rate": 0.00011140484424102375, + "loss": 2.5703, + "step": 9326 + }, + { + "epoch": 0.7527237511096764, + "grad_norm": 0.65630042552948, + "learning_rate": 0.00011138916020772414, + "loss": 2.5224, + "step": 9327 + }, + { + "epoch": 0.7528044548462594, + "grad_norm": 0.7088161110877991, + "learning_rate": 0.00011137347589057687, + "loss": 2.6673, + "step": 9328 + }, + { + "epoch": 0.7528851585828424, + "grad_norm": 0.7335243821144104, + "learning_rate": 0.00011135779128997283, + "loss": 2.5693, + "step": 9329 + }, + { + "epoch": 0.7529658623194254, + "grad_norm": 0.7166211605072021, + "learning_rate": 0.00011134210640630298, + "loss": 2.5612, + "step": 9330 + }, + { + "epoch": 0.7530465660560084, + "grad_norm": 0.7324960231781006, + "learning_rate": 0.00011132642123995816, + "loss": 2.5682, + "step": 9331 + }, + { + "epoch": 0.7531272697925914, + "grad_norm": 0.7133917808532715, + "learning_rate": 0.00011131073579132936, + "loss": 2.6131, + "step": 9332 + }, + { + "epoch": 0.7532079735291743, + "grad_norm": 0.678741455078125, + "learning_rate": 0.0001112950500608074, + "loss": 2.6109, + "step": 9333 + }, + { + "epoch": 0.7532886772657574, + "grad_norm": 0.7000784277915955, + "learning_rate": 0.0001112793640487833, + "loss": 2.5087, + "step": 9334 + }, + { + "epoch": 0.7533693810023404, + "grad_norm": 0.719976544380188, + "learning_rate": 0.00011126367775564795, + "loss": 2.4665, + "step": 9335 + }, + { + "epoch": 0.7534500847389234, + "grad_norm": 0.7127155065536499, + "learning_rate": 0.00011124799118179232, + "loss": 2.5254, + "step": 9336 + }, + { + "epoch": 0.7535307884755064, + "grad_norm": 0.6306474804878235, + "learning_rate": 0.00011123230432760734, + "loss": 2.5487, + "step": 9337 + }, + { + "epoch": 0.7536114922120895, + "grad_norm": 0.667019784450531, + "learning_rate": 0.00011121661719348397, + "loss": 2.5576, + "step": 9338 + }, + { + "epoch": 0.7536921959486724, + "grad_norm": 0.6869673132896423, + "learning_rate": 0.00011120092977981318, + "loss": 2.544, + "step": 9339 + }, + { + "epoch": 0.7537728996852554, + "grad_norm": 0.6688670516014099, + "learning_rate": 0.00011118524208698596, + "loss": 2.6017, + "step": 9340 + }, + { + "epoch": 0.7538536034218384, + "grad_norm": 0.6717860102653503, + "learning_rate": 0.00011116955411539325, + "loss": 2.5571, + "step": 9341 + }, + { + "epoch": 0.7539343071584215, + "grad_norm": 0.7113999724388123, + "learning_rate": 0.00011115386586542604, + "loss": 2.5684, + "step": 9342 + }, + { + "epoch": 0.7540150108950044, + "grad_norm": 0.6687907576560974, + "learning_rate": 0.00011113817733747536, + "loss": 2.548, + "step": 9343 + }, + { + "epoch": 0.7540957146315874, + "grad_norm": 0.6828920841217041, + "learning_rate": 0.00011112248853193219, + "loss": 2.5544, + "step": 9344 + }, + { + "epoch": 0.7541764183681704, + "grad_norm": 0.6793262362480164, + "learning_rate": 0.00011110679944918749, + "loss": 2.4655, + "step": 9345 + }, + { + "epoch": 0.7542571221047535, + "grad_norm": 0.6812230348587036, + "learning_rate": 0.00011109111008963235, + "loss": 2.5473, + "step": 9346 + }, + { + "epoch": 0.7543378258413365, + "grad_norm": 0.6838300824165344, + "learning_rate": 0.00011107542045365775, + "loss": 2.5248, + "step": 9347 + }, + { + "epoch": 0.7544185295779194, + "grad_norm": 0.7101932764053345, + "learning_rate": 0.0001110597305416547, + "loss": 2.5235, + "step": 9348 + }, + { + "epoch": 0.7544992333145024, + "grad_norm": 0.7136144042015076, + "learning_rate": 0.0001110440403540143, + "loss": 2.5592, + "step": 9349 + }, + { + "epoch": 0.7545799370510855, + "grad_norm": 0.6673154234886169, + "learning_rate": 0.00011102834989112751, + "loss": 2.4962, + "step": 9350 + }, + { + "epoch": 0.7546606407876685, + "grad_norm": 0.6849049925804138, + "learning_rate": 0.00011101265915338544, + "loss": 2.5793, + "step": 9351 + }, + { + "epoch": 0.7547413445242515, + "grad_norm": 0.7239733338356018, + "learning_rate": 0.0001109969681411791, + "loss": 2.5556, + "step": 9352 + }, + { + "epoch": 0.7548220482608344, + "grad_norm": 0.6738215684890747, + "learning_rate": 0.00011098127685489955, + "loss": 2.6181, + "step": 9353 + }, + { + "epoch": 0.7549027519974175, + "grad_norm": 0.6212114095687866, + "learning_rate": 0.00011096558529493787, + "loss": 2.5509, + "step": 9354 + }, + { + "epoch": 0.7549834557340005, + "grad_norm": 0.6801952123641968, + "learning_rate": 0.00011094989346168517, + "loss": 2.6454, + "step": 9355 + }, + { + "epoch": 0.7550641594705835, + "grad_norm": 0.6605944037437439, + "learning_rate": 0.0001109342013555325, + "loss": 2.5218, + "step": 9356 + }, + { + "epoch": 0.7551448632071665, + "grad_norm": 0.6486438512802124, + "learning_rate": 0.00011091850897687096, + "loss": 2.5431, + "step": 9357 + }, + { + "epoch": 0.7552255669437495, + "grad_norm": 0.6701794266700745, + "learning_rate": 0.0001109028163260916, + "loss": 2.563, + "step": 9358 + }, + { + "epoch": 0.7553062706803325, + "grad_norm": 0.6486446261405945, + "learning_rate": 0.00011088712340358555, + "loss": 2.5147, + "step": 9359 + }, + { + "epoch": 0.7553869744169155, + "grad_norm": 0.695197582244873, + "learning_rate": 0.00011087143020974396, + "loss": 2.5707, + "step": 9360 + }, + { + "epoch": 0.7554676781534985, + "grad_norm": 0.6910821199417114, + "learning_rate": 0.00011085573674495791, + "loss": 2.5797, + "step": 9361 + }, + { + "epoch": 0.7555483818900816, + "grad_norm": 0.7084208726882935, + "learning_rate": 0.00011084004300961852, + "loss": 2.5362, + "step": 9362 + }, + { + "epoch": 0.7556290856266645, + "grad_norm": 0.6750916242599487, + "learning_rate": 0.00011082434900411691, + "loss": 2.5554, + "step": 9363 + }, + { + "epoch": 0.7557097893632475, + "grad_norm": 0.6711466908454895, + "learning_rate": 0.0001108086547288442, + "loss": 2.5577, + "step": 9364 + }, + { + "epoch": 0.7557904930998305, + "grad_norm": 0.7267118096351624, + "learning_rate": 0.00011079296018419163, + "loss": 2.5422, + "step": 9365 + }, + { + "epoch": 0.7558711968364136, + "grad_norm": 0.692730188369751, + "learning_rate": 0.00011077726537055021, + "loss": 2.5281, + "step": 9366 + }, + { + "epoch": 0.7559519005729965, + "grad_norm": 0.7071926593780518, + "learning_rate": 0.00011076157028831122, + "loss": 2.5273, + "step": 9367 + }, + { + "epoch": 0.7560326043095795, + "grad_norm": 0.7662521600723267, + "learning_rate": 0.00011074587493786574, + "loss": 2.5433, + "step": 9368 + }, + { + "epoch": 0.7561133080461625, + "grad_norm": 0.7173436880111694, + "learning_rate": 0.00011073017931960496, + "loss": 2.579, + "step": 9369 + }, + { + "epoch": 0.7561940117827456, + "grad_norm": 0.6401154398918152, + "learning_rate": 0.00011071448343392008, + "loss": 2.5189, + "step": 9370 + }, + { + "epoch": 0.7562747155193286, + "grad_norm": 0.6510714292526245, + "learning_rate": 0.00011069878728120224, + "loss": 2.5682, + "step": 9371 + }, + { + "epoch": 0.7563554192559115, + "grad_norm": 0.7189988493919373, + "learning_rate": 0.00011068309086184269, + "loss": 2.5247, + "step": 9372 + }, + { + "epoch": 0.7564361229924945, + "grad_norm": 0.678753137588501, + "learning_rate": 0.00011066739417623258, + "loss": 2.5083, + "step": 9373 + }, + { + "epoch": 0.7565168267290776, + "grad_norm": 0.6903115510940552, + "learning_rate": 0.0001106516972247631, + "loss": 2.5658, + "step": 9374 + }, + { + "epoch": 0.7565975304656606, + "grad_norm": 0.6772382855415344, + "learning_rate": 0.0001106360000078255, + "loss": 2.5445, + "step": 9375 + }, + { + "epoch": 0.7566782342022436, + "grad_norm": 0.6655055284500122, + "learning_rate": 0.00011062030252581097, + "loss": 2.5186, + "step": 9376 + }, + { + "epoch": 0.7567589379388265, + "grad_norm": 0.7173851728439331, + "learning_rate": 0.00011060460477911074, + "loss": 2.5297, + "step": 9377 + }, + { + "epoch": 0.7568396416754096, + "grad_norm": 0.6891282200813293, + "learning_rate": 0.00011058890676811606, + "loss": 2.5706, + "step": 9378 + }, + { + "epoch": 0.7569203454119926, + "grad_norm": 0.7053082585334778, + "learning_rate": 0.0001105732084932181, + "loss": 2.5475, + "step": 9379 + }, + { + "epoch": 0.7570010491485756, + "grad_norm": 0.7503373622894287, + "learning_rate": 0.00011055750995480818, + "loss": 2.6438, + "step": 9380 + }, + { + "epoch": 0.7570817528851586, + "grad_norm": 0.6703453660011292, + "learning_rate": 0.0001105418111532775, + "loss": 2.5485, + "step": 9381 + }, + { + "epoch": 0.7571624566217416, + "grad_norm": 0.6651757955551147, + "learning_rate": 0.00011052611208901733, + "loss": 2.6079, + "step": 9382 + }, + { + "epoch": 0.7572431603583246, + "grad_norm": 0.6738902926445007, + "learning_rate": 0.00011051041276241895, + "loss": 2.5279, + "step": 9383 + }, + { + "epoch": 0.7573238640949076, + "grad_norm": 0.6803816556930542, + "learning_rate": 0.00011049471317387357, + "loss": 2.5972, + "step": 9384 + }, + { + "epoch": 0.7574045678314906, + "grad_norm": 0.7127584218978882, + "learning_rate": 0.00011047901332377253, + "loss": 2.5275, + "step": 9385 + }, + { + "epoch": 0.7574852715680735, + "grad_norm": 0.7655676007270813, + "learning_rate": 0.00011046331321250711, + "loss": 2.6491, + "step": 9386 + }, + { + "epoch": 0.7575659753046566, + "grad_norm": 0.7005762457847595, + "learning_rate": 0.00011044761284046854, + "loss": 2.5266, + "step": 9387 + }, + { + "epoch": 0.7576466790412396, + "grad_norm": 0.701931357383728, + "learning_rate": 0.00011043191220804817, + "loss": 2.5556, + "step": 9388 + }, + { + "epoch": 0.7577273827778226, + "grad_norm": 0.6888757944107056, + "learning_rate": 0.00011041621131563724, + "loss": 2.5654, + "step": 9389 + }, + { + "epoch": 0.7578080865144056, + "grad_norm": 0.7119149565696716, + "learning_rate": 0.00011040051016362711, + "loss": 2.5925, + "step": 9390 + }, + { + "epoch": 0.7578887902509887, + "grad_norm": 0.7378301024436951, + "learning_rate": 0.00011038480875240911, + "loss": 2.5604, + "step": 9391 + }, + { + "epoch": 0.7579694939875716, + "grad_norm": 0.7221272587776184, + "learning_rate": 0.00011036910708237449, + "loss": 2.5293, + "step": 9392 + }, + { + "epoch": 0.7580501977241546, + "grad_norm": 0.6895891427993774, + "learning_rate": 0.00011035340515391465, + "loss": 2.5177, + "step": 9393 + }, + { + "epoch": 0.7581309014607376, + "grad_norm": 0.6812298893928528, + "learning_rate": 0.00011033770296742086, + "loss": 2.6345, + "step": 9394 + }, + { + "epoch": 0.7582116051973207, + "grad_norm": 0.6733750700950623, + "learning_rate": 0.00011032200052328449, + "loss": 2.5548, + "step": 9395 + }, + { + "epoch": 0.7582923089339036, + "grad_norm": 0.7667728066444397, + "learning_rate": 0.00011030629782189692, + "loss": 2.5858, + "step": 9396 + }, + { + "epoch": 0.7583730126704866, + "grad_norm": 0.6809018850326538, + "learning_rate": 0.00011029059486364946, + "loss": 2.6028, + "step": 9397 + }, + { + "epoch": 0.7584537164070696, + "grad_norm": 0.6817305684089661, + "learning_rate": 0.00011027489164893345, + "loss": 2.5594, + "step": 9398 + }, + { + "epoch": 0.7585344201436527, + "grad_norm": 0.6936343908309937, + "learning_rate": 0.00011025918817814027, + "loss": 2.4997, + "step": 9399 + }, + { + "epoch": 0.7586151238802357, + "grad_norm": 0.7046801447868347, + "learning_rate": 0.00011024348445166133, + "loss": 2.5199, + "step": 9400 + }, + { + "epoch": 0.7586958276168186, + "grad_norm": 0.7247316241264343, + "learning_rate": 0.00011022778046988798, + "loss": 2.5233, + "step": 9401 + }, + { + "epoch": 0.7587765313534016, + "grad_norm": 0.675652265548706, + "learning_rate": 0.00011021207623321162, + "loss": 2.5213, + "step": 9402 + }, + { + "epoch": 0.7588572350899847, + "grad_norm": 0.6866120100021362, + "learning_rate": 0.0001101963717420236, + "loss": 2.6026, + "step": 9403 + }, + { + "epoch": 0.7589379388265677, + "grad_norm": 0.7168806791305542, + "learning_rate": 0.00011018066699671534, + "loss": 2.5707, + "step": 9404 + }, + { + "epoch": 0.7590186425631507, + "grad_norm": 0.6858265995979309, + "learning_rate": 0.00011016496199767825, + "loss": 2.5313, + "step": 9405 + }, + { + "epoch": 0.7590993462997336, + "grad_norm": 0.7064315676689148, + "learning_rate": 0.00011014925674530375, + "loss": 2.5362, + "step": 9406 + }, + { + "epoch": 0.7591800500363167, + "grad_norm": 0.658385694026947, + "learning_rate": 0.00011013355123998324, + "loss": 2.5773, + "step": 9407 + }, + { + "epoch": 0.7592607537728997, + "grad_norm": 0.7112493515014648, + "learning_rate": 0.00011011784548210813, + "loss": 2.589, + "step": 9408 + }, + { + "epoch": 0.7593414575094827, + "grad_norm": 0.6835871934890747, + "learning_rate": 0.00011010213947206986, + "loss": 2.5952, + "step": 9409 + }, + { + "epoch": 0.7594221612460657, + "grad_norm": 0.6920506358146667, + "learning_rate": 0.00011008643321025989, + "loss": 2.5433, + "step": 9410 + }, + { + "epoch": 0.7595028649826487, + "grad_norm": 0.7239150404930115, + "learning_rate": 0.00011007072669706962, + "loss": 2.5291, + "step": 9411 + }, + { + "epoch": 0.7595835687192317, + "grad_norm": 0.644568145275116, + "learning_rate": 0.00011005501993289052, + "loss": 2.5324, + "step": 9412 + }, + { + "epoch": 0.7596642724558147, + "grad_norm": 0.6604863405227661, + "learning_rate": 0.00011003931291811405, + "loss": 2.561, + "step": 9413 + }, + { + "epoch": 0.7597449761923977, + "grad_norm": 0.7056753635406494, + "learning_rate": 0.00011002360565313164, + "loss": 2.6537, + "step": 9414 + }, + { + "epoch": 0.7598256799289808, + "grad_norm": 0.6712720394134521, + "learning_rate": 0.00011000789813833476, + "loss": 2.5222, + "step": 9415 + }, + { + "epoch": 0.7599063836655637, + "grad_norm": 0.6829253435134888, + "learning_rate": 0.00010999219037411492, + "loss": 2.5156, + "step": 9416 + }, + { + "epoch": 0.7599870874021467, + "grad_norm": 0.7386518120765686, + "learning_rate": 0.00010997648236086359, + "loss": 2.5378, + "step": 9417 + }, + { + "epoch": 0.7600677911387297, + "grad_norm": 0.6711105108261108, + "learning_rate": 0.00010996077409897223, + "loss": 2.4985, + "step": 9418 + }, + { + "epoch": 0.7601484948753128, + "grad_norm": 0.6936883926391602, + "learning_rate": 0.00010994506558883233, + "loss": 2.4912, + "step": 9419 + }, + { + "epoch": 0.7602291986118958, + "grad_norm": 0.6927978992462158, + "learning_rate": 0.00010992935683083541, + "loss": 2.5526, + "step": 9420 + }, + { + "epoch": 0.7603099023484787, + "grad_norm": 0.7661495804786682, + "learning_rate": 0.00010991364782537297, + "loss": 2.5778, + "step": 9421 + }, + { + "epoch": 0.7603906060850617, + "grad_norm": 0.7092108726501465, + "learning_rate": 0.0001098979385728365, + "loss": 2.6557, + "step": 9422 + }, + { + "epoch": 0.7604713098216448, + "grad_norm": 0.696666419506073, + "learning_rate": 0.00010988222907361754, + "loss": 2.4897, + "step": 9423 + }, + { + "epoch": 0.7605520135582278, + "grad_norm": 0.6836280822753906, + "learning_rate": 0.00010986651932810756, + "loss": 2.5146, + "step": 9424 + }, + { + "epoch": 0.7606327172948107, + "grad_norm": 0.7269579768180847, + "learning_rate": 0.00010985080933669815, + "loss": 2.5314, + "step": 9425 + }, + { + "epoch": 0.7607134210313937, + "grad_norm": 0.6862092018127441, + "learning_rate": 0.00010983509909978085, + "loss": 2.5415, + "step": 9426 + }, + { + "epoch": 0.7607941247679768, + "grad_norm": 0.7068747878074646, + "learning_rate": 0.00010981938861774713, + "loss": 2.5919, + "step": 9427 + }, + { + "epoch": 0.7608748285045598, + "grad_norm": 0.699999213218689, + "learning_rate": 0.0001098036778909886, + "loss": 2.5175, + "step": 9428 + }, + { + "epoch": 0.7609555322411428, + "grad_norm": 0.6642772555351257, + "learning_rate": 0.0001097879669198968, + "loss": 2.5721, + "step": 9429 + }, + { + "epoch": 0.7610362359777257, + "grad_norm": 0.7100533843040466, + "learning_rate": 0.00010977225570486323, + "loss": 2.5189, + "step": 9430 + }, + { + "epoch": 0.7611169397143088, + "grad_norm": 0.7289063930511475, + "learning_rate": 0.00010975654424627955, + "loss": 2.6139, + "step": 9431 + }, + { + "epoch": 0.7611976434508918, + "grad_norm": 0.7289659380912781, + "learning_rate": 0.00010974083254453726, + "loss": 2.5201, + "step": 9432 + }, + { + "epoch": 0.7612783471874748, + "grad_norm": 0.7389557957649231, + "learning_rate": 0.000109725120600028, + "loss": 2.559, + "step": 9433 + }, + { + "epoch": 0.7613590509240578, + "grad_norm": 0.7021538615226746, + "learning_rate": 0.00010970940841314327, + "loss": 2.6353, + "step": 9434 + }, + { + "epoch": 0.7614397546606407, + "grad_norm": 0.6614113450050354, + "learning_rate": 0.0001096936959842747, + "loss": 2.54, + "step": 9435 + }, + { + "epoch": 0.7615204583972238, + "grad_norm": 0.6905426979064941, + "learning_rate": 0.00010967798331381392, + "loss": 2.5845, + "step": 9436 + }, + { + "epoch": 0.7616011621338068, + "grad_norm": 0.8183904886245728, + "learning_rate": 0.00010966227040215247, + "loss": 2.5255, + "step": 9437 + }, + { + "epoch": 0.7616818658703898, + "grad_norm": 0.7404630780220032, + "learning_rate": 0.00010964655724968199, + "loss": 2.5726, + "step": 9438 + }, + { + "epoch": 0.7617625696069728, + "grad_norm": 0.657127320766449, + "learning_rate": 0.0001096308438567941, + "loss": 2.6233, + "step": 9439 + }, + { + "epoch": 0.7618432733435558, + "grad_norm": 0.7417906522750854, + "learning_rate": 0.00010961513022388039, + "loss": 2.6361, + "step": 9440 + }, + { + "epoch": 0.7619239770801388, + "grad_norm": 0.6930029988288879, + "learning_rate": 0.00010959941635133249, + "loss": 2.5164, + "step": 9441 + }, + { + "epoch": 0.7620046808167218, + "grad_norm": 0.6897261738777161, + "learning_rate": 0.00010958370223954207, + "loss": 2.5626, + "step": 9442 + }, + { + "epoch": 0.7620853845533048, + "grad_norm": 0.6737398505210876, + "learning_rate": 0.00010956798788890072, + "loss": 2.5342, + "step": 9443 + }, + { + "epoch": 0.7621660882898879, + "grad_norm": 0.6550001502037048, + "learning_rate": 0.0001095522732998001, + "loss": 2.5604, + "step": 9444 + }, + { + "epoch": 0.7622467920264708, + "grad_norm": 0.7184637784957886, + "learning_rate": 0.00010953655847263187, + "loss": 2.6006, + "step": 9445 + }, + { + "epoch": 0.7623274957630538, + "grad_norm": 0.6188609600067139, + "learning_rate": 0.00010952084340778766, + "loss": 2.4875, + "step": 9446 + }, + { + "epoch": 0.7624081994996368, + "grad_norm": 0.6550862789154053, + "learning_rate": 0.00010950512810565917, + "loss": 2.5794, + "step": 9447 + }, + { + "epoch": 0.7624889032362199, + "grad_norm": 0.6659231781959534, + "learning_rate": 0.000109489412566638, + "loss": 2.5137, + "step": 9448 + }, + { + "epoch": 0.7625696069728028, + "grad_norm": 0.749376118183136, + "learning_rate": 0.00010947369679111592, + "loss": 2.5923, + "step": 9449 + }, + { + "epoch": 0.7626503107093858, + "grad_norm": 0.6597894430160522, + "learning_rate": 0.0001094579807794845, + "loss": 2.5677, + "step": 9450 + }, + { + "epoch": 0.7627310144459688, + "grad_norm": 0.7194519639015198, + "learning_rate": 0.00010944226453213548, + "loss": 2.5754, + "step": 9451 + }, + { + "epoch": 0.7628117181825519, + "grad_norm": 0.6734583377838135, + "learning_rate": 0.00010942654804946057, + "loss": 2.535, + "step": 9452 + }, + { + "epoch": 0.7628924219191349, + "grad_norm": 0.7171904444694519, + "learning_rate": 0.00010941083133185146, + "loss": 2.5431, + "step": 9453 + }, + { + "epoch": 0.7629731256557178, + "grad_norm": 0.6760339736938477, + "learning_rate": 0.00010939511437969978, + "loss": 2.5163, + "step": 9454 + }, + { + "epoch": 0.7630538293923008, + "grad_norm": 0.6720966696739197, + "learning_rate": 0.00010937939719339731, + "loss": 2.5621, + "step": 9455 + }, + { + "epoch": 0.7631345331288839, + "grad_norm": 0.6374503970146179, + "learning_rate": 0.00010936367977333574, + "loss": 2.5007, + "step": 9456 + }, + { + "epoch": 0.7632152368654669, + "grad_norm": 0.6407146453857422, + "learning_rate": 0.00010934796211990684, + "loss": 2.5724, + "step": 9457 + }, + { + "epoch": 0.7632959406020499, + "grad_norm": 0.6685383319854736, + "learning_rate": 0.00010933224423350225, + "loss": 2.501, + "step": 9458 + }, + { + "epoch": 0.7633766443386328, + "grad_norm": 0.664806604385376, + "learning_rate": 0.00010931652611451373, + "loss": 2.6174, + "step": 9459 + }, + { + "epoch": 0.7634573480752159, + "grad_norm": 0.6383369565010071, + "learning_rate": 0.00010930080776333303, + "loss": 2.557, + "step": 9460 + }, + { + "epoch": 0.7635380518117989, + "grad_norm": 0.6747864484786987, + "learning_rate": 0.0001092850891803519, + "loss": 2.5406, + "step": 9461 + }, + { + "epoch": 0.7636187555483819, + "grad_norm": 0.7312811613082886, + "learning_rate": 0.00010926937036596205, + "loss": 2.5903, + "step": 9462 + }, + { + "epoch": 0.7636994592849649, + "grad_norm": 0.645847737789154, + "learning_rate": 0.00010925365132055529, + "loss": 2.5254, + "step": 9463 + }, + { + "epoch": 0.7637801630215479, + "grad_norm": 0.6466063857078552, + "learning_rate": 0.00010923793204452335, + "loss": 2.5322, + "step": 9464 + }, + { + "epoch": 0.7638608667581309, + "grad_norm": 0.6450574994087219, + "learning_rate": 0.000109222212538258, + "loss": 2.522, + "step": 9465 + }, + { + "epoch": 0.7639415704947139, + "grad_norm": 0.6491848826408386, + "learning_rate": 0.00010920649280215096, + "loss": 2.5545, + "step": 9466 + }, + { + "epoch": 0.7640222742312969, + "grad_norm": 0.6888336539268494, + "learning_rate": 0.0001091907728365941, + "loss": 2.5217, + "step": 9467 + }, + { + "epoch": 0.76410297796788, + "grad_norm": 0.702557384967804, + "learning_rate": 0.00010917505264197914, + "loss": 2.5351, + "step": 9468 + }, + { + "epoch": 0.7641836817044629, + "grad_norm": 0.6552408933639526, + "learning_rate": 0.0001091593322186979, + "loss": 2.5115, + "step": 9469 + }, + { + "epoch": 0.7642643854410459, + "grad_norm": 0.7514002919197083, + "learning_rate": 0.00010914361156714212, + "loss": 2.5196, + "step": 9470 + }, + { + "epoch": 0.7643450891776289, + "grad_norm": 0.6692500710487366, + "learning_rate": 0.00010912789068770366, + "loss": 2.5639, + "step": 9471 + }, + { + "epoch": 0.764425792914212, + "grad_norm": 0.6567397117614746, + "learning_rate": 0.0001091121695807743, + "loss": 2.5027, + "step": 9472 + }, + { + "epoch": 0.764506496650795, + "grad_norm": 0.6876057982444763, + "learning_rate": 0.00010909644824674587, + "loss": 2.519, + "step": 9473 + }, + { + "epoch": 0.7645872003873779, + "grad_norm": 0.747949481010437, + "learning_rate": 0.00010908072668601017, + "loss": 2.5604, + "step": 9474 + }, + { + "epoch": 0.7646679041239609, + "grad_norm": 0.6371368169784546, + "learning_rate": 0.000109065004898959, + "loss": 2.5853, + "step": 9475 + }, + { + "epoch": 0.764748607860544, + "grad_norm": 0.6472185254096985, + "learning_rate": 0.00010904928288598422, + "loss": 2.5662, + "step": 9476 + }, + { + "epoch": 0.764829311597127, + "grad_norm": 0.7009313702583313, + "learning_rate": 0.00010903356064747765, + "loss": 2.5244, + "step": 9477 + }, + { + "epoch": 0.76491001533371, + "grad_norm": 0.7405661940574646, + "learning_rate": 0.00010901783818383116, + "loss": 2.4963, + "step": 9478 + }, + { + "epoch": 0.7649907190702929, + "grad_norm": 0.7693421840667725, + "learning_rate": 0.00010900211549543658, + "loss": 2.6018, + "step": 9479 + }, + { + "epoch": 0.765071422806876, + "grad_norm": 0.6965410709381104, + "learning_rate": 0.00010898639258268571, + "loss": 2.627, + "step": 9480 + }, + { + "epoch": 0.765152126543459, + "grad_norm": 0.7167130708694458, + "learning_rate": 0.00010897066944597046, + "loss": 2.5298, + "step": 9481 + }, + { + "epoch": 0.765232830280042, + "grad_norm": 0.7159689664840698, + "learning_rate": 0.00010895494608568268, + "loss": 2.5179, + "step": 9482 + }, + { + "epoch": 0.7653135340166249, + "grad_norm": 0.7329332232475281, + "learning_rate": 0.00010893922250221423, + "loss": 2.6498, + "step": 9483 + }, + { + "epoch": 0.765394237753208, + "grad_norm": 0.6912567019462585, + "learning_rate": 0.000108923498695957, + "loss": 2.5679, + "step": 9484 + }, + { + "epoch": 0.765474941489791, + "grad_norm": 0.7030324935913086, + "learning_rate": 0.00010890777466730285, + "loss": 2.5678, + "step": 9485 + }, + { + "epoch": 0.765555645226374, + "grad_norm": 0.7238864898681641, + "learning_rate": 0.00010889205041664365, + "loss": 2.5525, + "step": 9486 + }, + { + "epoch": 0.765636348962957, + "grad_norm": 0.6623672842979431, + "learning_rate": 0.00010887632594437134, + "loss": 2.4857, + "step": 9487 + }, + { + "epoch": 0.7657170526995399, + "grad_norm": 0.726645827293396, + "learning_rate": 0.00010886060125087776, + "loss": 2.5405, + "step": 9488 + }, + { + "epoch": 0.765797756436123, + "grad_norm": 0.6624459624290466, + "learning_rate": 0.00010884487633655487, + "loss": 2.5538, + "step": 9489 + }, + { + "epoch": 0.765878460172706, + "grad_norm": 0.7198002934455872, + "learning_rate": 0.00010882915120179453, + "loss": 2.5808, + "step": 9490 + }, + { + "epoch": 0.765959163909289, + "grad_norm": 0.7545582056045532, + "learning_rate": 0.00010881342584698862, + "loss": 2.6059, + "step": 9491 + }, + { + "epoch": 0.766039867645872, + "grad_norm": 0.6748257279396057, + "learning_rate": 0.00010879770027252915, + "loss": 2.5203, + "step": 9492 + }, + { + "epoch": 0.766120571382455, + "grad_norm": 0.7376208901405334, + "learning_rate": 0.00010878197447880796, + "loss": 2.5255, + "step": 9493 + }, + { + "epoch": 0.766201275119038, + "grad_norm": 0.7589401006698608, + "learning_rate": 0.00010876624846621704, + "loss": 2.6304, + "step": 9494 + }, + { + "epoch": 0.766281978855621, + "grad_norm": 0.6963146924972534, + "learning_rate": 0.00010875052223514827, + "loss": 2.5547, + "step": 9495 + }, + { + "epoch": 0.766362682592204, + "grad_norm": 0.6660788059234619, + "learning_rate": 0.00010873479578599361, + "loss": 2.5922, + "step": 9496 + }, + { + "epoch": 0.7664433863287871, + "grad_norm": 0.7506482005119324, + "learning_rate": 0.00010871906911914502, + "loss": 2.5383, + "step": 9497 + }, + { + "epoch": 0.76652409006537, + "grad_norm": 0.7514285445213318, + "learning_rate": 0.00010870334223499443, + "loss": 2.5551, + "step": 9498 + }, + { + "epoch": 0.766604793801953, + "grad_norm": 0.6461809873580933, + "learning_rate": 0.00010868761513393379, + "loss": 2.5367, + "step": 9499 + }, + { + "epoch": 0.766685497538536, + "grad_norm": 0.6328238844871521, + "learning_rate": 0.00010867188781635512, + "loss": 2.5505, + "step": 9500 + }, + { + "epoch": 0.7667662012751191, + "grad_norm": 0.7090224027633667, + "learning_rate": 0.00010865616028265027, + "loss": 2.5921, + "step": 9501 + }, + { + "epoch": 0.766846905011702, + "grad_norm": 0.6404605507850647, + "learning_rate": 0.0001086404325332113, + "loss": 2.5357, + "step": 9502 + }, + { + "epoch": 0.766927608748285, + "grad_norm": 0.652477502822876, + "learning_rate": 0.00010862470456843016, + "loss": 2.5277, + "step": 9503 + }, + { + "epoch": 0.767008312484868, + "grad_norm": 0.7045448422431946, + "learning_rate": 0.00010860897638869887, + "loss": 2.5712, + "step": 9504 + }, + { + "epoch": 0.7670890162214511, + "grad_norm": 0.7024295926094055, + "learning_rate": 0.00010859324799440936, + "loss": 2.5976, + "step": 9505 + }, + { + "epoch": 0.7671697199580341, + "grad_norm": 0.7165585160255432, + "learning_rate": 0.00010857751938595364, + "loss": 2.5378, + "step": 9506 + }, + { + "epoch": 0.767250423694617, + "grad_norm": 0.7037522196769714, + "learning_rate": 0.0001085617905637237, + "loss": 2.554, + "step": 9507 + }, + { + "epoch": 0.7673311274312, + "grad_norm": 0.738210916519165, + "learning_rate": 0.00010854606152811163, + "loss": 2.5102, + "step": 9508 + }, + { + "epoch": 0.7674118311677831, + "grad_norm": 0.7500020861625671, + "learning_rate": 0.0001085303322795093, + "loss": 2.5908, + "step": 9509 + }, + { + "epoch": 0.7674925349043661, + "grad_norm": 0.7669610977172852, + "learning_rate": 0.00010851460281830883, + "loss": 2.5119, + "step": 9510 + }, + { + "epoch": 0.7675732386409491, + "grad_norm": 0.6619212031364441, + "learning_rate": 0.00010849887314490217, + "loss": 2.5622, + "step": 9511 + }, + { + "epoch": 0.767653942377532, + "grad_norm": 0.7142546772956848, + "learning_rate": 0.00010848314325968136, + "loss": 2.596, + "step": 9512 + }, + { + "epoch": 0.7677346461141151, + "grad_norm": 0.7365403175354004, + "learning_rate": 0.0001084674131630385, + "loss": 2.5695, + "step": 9513 + }, + { + "epoch": 0.7678153498506981, + "grad_norm": 0.7843711972236633, + "learning_rate": 0.00010845168285536555, + "loss": 2.5707, + "step": 9514 + }, + { + "epoch": 0.7678960535872811, + "grad_norm": 0.6391385197639465, + "learning_rate": 0.00010843595233705454, + "loss": 2.5523, + "step": 9515 + }, + { + "epoch": 0.7679767573238641, + "grad_norm": 0.6955631971359253, + "learning_rate": 0.00010842022160849758, + "loss": 2.5072, + "step": 9516 + }, + { + "epoch": 0.7680574610604471, + "grad_norm": 0.7291388511657715, + "learning_rate": 0.00010840449067008665, + "loss": 2.5786, + "step": 9517 + }, + { + "epoch": 0.7681381647970301, + "grad_norm": 0.7988889813423157, + "learning_rate": 0.00010838875952221387, + "loss": 2.5622, + "step": 9518 + }, + { + "epoch": 0.7682188685336131, + "grad_norm": 0.726271390914917, + "learning_rate": 0.00010837302816527129, + "loss": 2.5479, + "step": 9519 + }, + { + "epoch": 0.7682995722701961, + "grad_norm": 0.7305205464363098, + "learning_rate": 0.00010835729659965095, + "loss": 2.5946, + "step": 9520 + }, + { + "epoch": 0.7683802760067792, + "grad_norm": 0.7843366265296936, + "learning_rate": 0.00010834156482574493, + "loss": 2.5212, + "step": 9521 + }, + { + "epoch": 0.7684609797433621, + "grad_norm": 0.6988845467567444, + "learning_rate": 0.00010832583284394529, + "loss": 2.5174, + "step": 9522 + }, + { + "epoch": 0.7685416834799451, + "grad_norm": 0.7088077068328857, + "learning_rate": 0.00010831010065464414, + "loss": 2.5253, + "step": 9523 + }, + { + "epoch": 0.7686223872165281, + "grad_norm": 0.7447031140327454, + "learning_rate": 0.00010829436825823358, + "loss": 2.6045, + "step": 9524 + }, + { + "epoch": 0.7687030909531112, + "grad_norm": 0.6865237951278687, + "learning_rate": 0.00010827863565510566, + "loss": 2.558, + "step": 9525 + }, + { + "epoch": 0.7687837946896942, + "grad_norm": 0.7748900651931763, + "learning_rate": 0.0001082629028456525, + "loss": 2.5694, + "step": 9526 + }, + { + "epoch": 0.7688644984262771, + "grad_norm": 0.7031759023666382, + "learning_rate": 0.00010824716983026622, + "loss": 2.5171, + "step": 9527 + }, + { + "epoch": 0.7689452021628601, + "grad_norm": 0.7627702355384827, + "learning_rate": 0.00010823143660933888, + "loss": 2.5715, + "step": 9528 + }, + { + "epoch": 0.7690259058994432, + "grad_norm": 0.707815945148468, + "learning_rate": 0.00010821570318326264, + "loss": 2.5281, + "step": 9529 + }, + { + "epoch": 0.7691066096360262, + "grad_norm": 0.6833841800689697, + "learning_rate": 0.00010819996955242962, + "loss": 2.5702, + "step": 9530 + }, + { + "epoch": 0.7691873133726091, + "grad_norm": 0.7029415369033813, + "learning_rate": 0.00010818423571723189, + "loss": 2.5331, + "step": 9531 + }, + { + "epoch": 0.7692680171091921, + "grad_norm": 0.6442921161651611, + "learning_rate": 0.00010816850167806161, + "loss": 2.5423, + "step": 9532 + }, + { + "epoch": 0.7693487208457752, + "grad_norm": 0.7259004712104797, + "learning_rate": 0.00010815276743531093, + "loss": 2.6014, + "step": 9533 + }, + { + "epoch": 0.7694294245823582, + "grad_norm": 0.6483473777770996, + "learning_rate": 0.00010813703298937199, + "loss": 2.5268, + "step": 9534 + }, + { + "epoch": 0.7695101283189412, + "grad_norm": 0.6805520057678223, + "learning_rate": 0.00010812129834063691, + "loss": 2.5536, + "step": 9535 + }, + { + "epoch": 0.7695908320555241, + "grad_norm": 0.7120587825775146, + "learning_rate": 0.00010810556348949783, + "loss": 2.518, + "step": 9536 + }, + { + "epoch": 0.7696715357921071, + "grad_norm": 0.7280872464179993, + "learning_rate": 0.00010808982843634692, + "loss": 2.5525, + "step": 9537 + }, + { + "epoch": 0.7697522395286902, + "grad_norm": 0.68332439661026, + "learning_rate": 0.00010807409318157636, + "loss": 2.6318, + "step": 9538 + }, + { + "epoch": 0.7698329432652732, + "grad_norm": 0.655352771282196, + "learning_rate": 0.00010805835772557826, + "loss": 2.5781, + "step": 9539 + }, + { + "epoch": 0.7699136470018562, + "grad_norm": 0.7675400972366333, + "learning_rate": 0.00010804262206874484, + "loss": 2.5542, + "step": 9540 + }, + { + "epoch": 0.7699943507384391, + "grad_norm": 0.6676837205886841, + "learning_rate": 0.00010802688621146826, + "loss": 2.5411, + "step": 9541 + }, + { + "epoch": 0.7700750544750222, + "grad_norm": 0.7378436326980591, + "learning_rate": 0.00010801115015414067, + "loss": 2.5416, + "step": 9542 + }, + { + "epoch": 0.7701557582116052, + "grad_norm": 0.7330371141433716, + "learning_rate": 0.0001079954138971543, + "loss": 2.5154, + "step": 9543 + }, + { + "epoch": 0.7702364619481882, + "grad_norm": 0.6792974472045898, + "learning_rate": 0.00010797967744090131, + "loss": 2.5328, + "step": 9544 + }, + { + "epoch": 0.7703171656847712, + "grad_norm": 0.7129618525505066, + "learning_rate": 0.00010796394078577392, + "loss": 2.5688, + "step": 9545 + }, + { + "epoch": 0.7703978694213542, + "grad_norm": 0.6900608539581299, + "learning_rate": 0.00010794820393216429, + "loss": 2.5659, + "step": 9546 + }, + { + "epoch": 0.7704785731579372, + "grad_norm": 0.6798564195632935, + "learning_rate": 0.00010793246688046464, + "loss": 2.5746, + "step": 9547 + }, + { + "epoch": 0.7705592768945202, + "grad_norm": 0.7132395505905151, + "learning_rate": 0.00010791672963106715, + "loss": 2.6277, + "step": 9548 + }, + { + "epoch": 0.7706399806311032, + "grad_norm": 0.6762476563453674, + "learning_rate": 0.0001079009921843641, + "loss": 2.5265, + "step": 9549 + }, + { + "epoch": 0.7707206843676863, + "grad_norm": 0.7223351001739502, + "learning_rate": 0.00010788525454074765, + "loss": 2.6255, + "step": 9550 + }, + { + "epoch": 0.7708013881042692, + "grad_norm": 0.7383624315261841, + "learning_rate": 0.00010786951670061008, + "loss": 2.5744, + "step": 9551 + }, + { + "epoch": 0.7708820918408522, + "grad_norm": 0.6677328944206238, + "learning_rate": 0.00010785377866434355, + "loss": 2.5594, + "step": 9552 + }, + { + "epoch": 0.7709627955774352, + "grad_norm": 0.6572195887565613, + "learning_rate": 0.00010783804043234032, + "loss": 2.5582, + "step": 9553 + }, + { + "epoch": 0.7710434993140183, + "grad_norm": 0.6837800741195679, + "learning_rate": 0.00010782230200499265, + "loss": 2.5311, + "step": 9554 + }, + { + "epoch": 0.7711242030506013, + "grad_norm": 0.7232153415679932, + "learning_rate": 0.00010780656338269277, + "loss": 2.5074, + "step": 9555 + }, + { + "epoch": 0.7712049067871842, + "grad_norm": 0.6722296476364136, + "learning_rate": 0.00010779082456583291, + "loss": 2.551, + "step": 9556 + }, + { + "epoch": 0.7712856105237672, + "grad_norm": 0.6461100578308105, + "learning_rate": 0.00010777508555480535, + "loss": 2.5723, + "step": 9557 + }, + { + "epoch": 0.7713663142603503, + "grad_norm": 0.6573290824890137, + "learning_rate": 0.0001077593463500023, + "loss": 2.4967, + "step": 9558 + }, + { + "epoch": 0.7714470179969333, + "grad_norm": 0.7184738516807556, + "learning_rate": 0.0001077436069518161, + "loss": 2.6703, + "step": 9559 + }, + { + "epoch": 0.7715277217335162, + "grad_norm": 0.7226557731628418, + "learning_rate": 0.00010772786736063895, + "loss": 2.6118, + "step": 9560 + }, + { + "epoch": 0.7716084254700992, + "grad_norm": 0.6800956130027771, + "learning_rate": 0.00010771212757686318, + "loss": 2.578, + "step": 9561 + }, + { + "epoch": 0.7716891292066823, + "grad_norm": 0.6657535433769226, + "learning_rate": 0.00010769638760088099, + "loss": 2.5291, + "step": 9562 + }, + { + "epoch": 0.7717698329432653, + "grad_norm": 0.620527982711792, + "learning_rate": 0.00010768064743308471, + "loss": 2.5518, + "step": 9563 + }, + { + "epoch": 0.7718505366798483, + "grad_norm": 0.693760097026825, + "learning_rate": 0.00010766490707386663, + "loss": 2.52, + "step": 9564 + }, + { + "epoch": 0.7719312404164312, + "grad_norm": 0.6674148440361023, + "learning_rate": 0.000107649166523619, + "loss": 2.5197, + "step": 9565 + }, + { + "epoch": 0.7720119441530143, + "grad_norm": 0.6844033598899841, + "learning_rate": 0.00010763342578273419, + "loss": 2.5842, + "step": 9566 + }, + { + "epoch": 0.7720926478895973, + "grad_norm": 0.6891880035400391, + "learning_rate": 0.00010761768485160442, + "loss": 2.5349, + "step": 9567 + }, + { + "epoch": 0.7721733516261803, + "grad_norm": 0.7157394289970398, + "learning_rate": 0.00010760194373062204, + "loss": 2.5762, + "step": 9568 + }, + { + "epoch": 0.7722540553627633, + "grad_norm": 0.7522526383399963, + "learning_rate": 0.00010758620242017936, + "loss": 2.5348, + "step": 9569 + }, + { + "epoch": 0.7723347590993463, + "grad_norm": 0.6817746162414551, + "learning_rate": 0.00010757046092066869, + "loss": 2.5836, + "step": 9570 + }, + { + "epoch": 0.7724154628359293, + "grad_norm": 0.7274518013000488, + "learning_rate": 0.00010755471923248232, + "loss": 2.5276, + "step": 9571 + }, + { + "epoch": 0.7724961665725123, + "grad_norm": 0.6735557913780212, + "learning_rate": 0.00010753897735601264, + "loss": 2.6116, + "step": 9572 + }, + { + "epoch": 0.7725768703090953, + "grad_norm": 0.6626406908035278, + "learning_rate": 0.00010752323529165186, + "loss": 2.5778, + "step": 9573 + }, + { + "epoch": 0.7726575740456784, + "grad_norm": 0.6627367734909058, + "learning_rate": 0.00010750749303979246, + "loss": 2.5839, + "step": 9574 + }, + { + "epoch": 0.7727382777822613, + "grad_norm": 0.6658251881599426, + "learning_rate": 0.0001074917506008267, + "loss": 2.5233, + "step": 9575 + }, + { + "epoch": 0.7728189815188443, + "grad_norm": 0.6969848871231079, + "learning_rate": 0.00010747600797514692, + "loss": 2.5169, + "step": 9576 + }, + { + "epoch": 0.7728996852554273, + "grad_norm": 0.7313554883003235, + "learning_rate": 0.00010746026516314549, + "loss": 2.5528, + "step": 9577 + }, + { + "epoch": 0.7729803889920104, + "grad_norm": 0.6467077136039734, + "learning_rate": 0.00010744452216521472, + "loss": 2.5158, + "step": 9578 + }, + { + "epoch": 0.7730610927285934, + "grad_norm": 0.6808056235313416, + "learning_rate": 0.00010742877898174702, + "loss": 2.5346, + "step": 9579 + }, + { + "epoch": 0.7731417964651763, + "grad_norm": 0.7537400722503662, + "learning_rate": 0.00010741303561313474, + "loss": 2.5621, + "step": 9580 + }, + { + "epoch": 0.7732225002017593, + "grad_norm": 0.6715610027313232, + "learning_rate": 0.00010739729205977021, + "loss": 2.5384, + "step": 9581 + }, + { + "epoch": 0.7733032039383424, + "grad_norm": 0.7129234075546265, + "learning_rate": 0.00010738154832204586, + "loss": 2.5639, + "step": 9582 + }, + { + "epoch": 0.7733839076749254, + "grad_norm": 0.7156025171279907, + "learning_rate": 0.00010736580440035397, + "loss": 2.5427, + "step": 9583 + }, + { + "epoch": 0.7734646114115084, + "grad_norm": 0.7394191026687622, + "learning_rate": 0.00010735006029508703, + "loss": 2.5809, + "step": 9584 + }, + { + "epoch": 0.7735453151480913, + "grad_norm": 0.7117684483528137, + "learning_rate": 0.00010733431600663737, + "loss": 2.5807, + "step": 9585 + }, + { + "epoch": 0.7736260188846744, + "grad_norm": 0.6622862219810486, + "learning_rate": 0.00010731857153539737, + "loss": 2.5277, + "step": 9586 + }, + { + "epoch": 0.7737067226212574, + "grad_norm": 0.7744547128677368, + "learning_rate": 0.00010730282688175943, + "loss": 2.6119, + "step": 9587 + }, + { + "epoch": 0.7737874263578404, + "grad_norm": 0.6804926991462708, + "learning_rate": 0.00010728708204611597, + "loss": 2.534, + "step": 9588 + }, + { + "epoch": 0.7738681300944233, + "grad_norm": 0.7115367650985718, + "learning_rate": 0.00010727133702885937, + "loss": 2.542, + "step": 9589 + }, + { + "epoch": 0.7739488338310063, + "grad_norm": 0.7623847723007202, + "learning_rate": 0.00010725559183038205, + "loss": 2.587, + "step": 9590 + }, + { + "epoch": 0.7740295375675894, + "grad_norm": 0.6612982153892517, + "learning_rate": 0.00010723984645107641, + "loss": 2.5257, + "step": 9591 + }, + { + "epoch": 0.7741102413041724, + "grad_norm": 0.7553900480270386, + "learning_rate": 0.00010722410089133488, + "loss": 2.6311, + "step": 9592 + }, + { + "epoch": 0.7741909450407554, + "grad_norm": 0.7541414499282837, + "learning_rate": 0.00010720835515154983, + "loss": 2.5978, + "step": 9593 + }, + { + "epoch": 0.7742716487773383, + "grad_norm": 0.6690947413444519, + "learning_rate": 0.00010719260923211376, + "loss": 2.568, + "step": 9594 + }, + { + "epoch": 0.7743523525139214, + "grad_norm": 0.7282151579856873, + "learning_rate": 0.00010717686313341909, + "loss": 2.5375, + "step": 9595 + }, + { + "epoch": 0.7744330562505044, + "grad_norm": 0.6862902045249939, + "learning_rate": 0.00010716111685585821, + "loss": 2.5503, + "step": 9596 + }, + { + "epoch": 0.7745137599870874, + "grad_norm": 0.7076265811920166, + "learning_rate": 0.00010714537039982357, + "loss": 2.4766, + "step": 9597 + }, + { + "epoch": 0.7745944637236704, + "grad_norm": 0.7063891887664795, + "learning_rate": 0.00010712962376570761, + "loss": 2.5822, + "step": 9598 + }, + { + "epoch": 0.7746751674602534, + "grad_norm": 0.6975609064102173, + "learning_rate": 0.00010711387695390282, + "loss": 2.597, + "step": 9599 + }, + { + "epoch": 0.7747558711968364, + "grad_norm": 0.6790002584457397, + "learning_rate": 0.0001070981299648016, + "loss": 2.5705, + "step": 9600 + }, + { + "epoch": 0.7748365749334194, + "grad_norm": 0.6493679881095886, + "learning_rate": 0.00010708238279879643, + "loss": 2.49, + "step": 9601 + }, + { + "epoch": 0.7749172786700024, + "grad_norm": 0.6741142868995667, + "learning_rate": 0.00010706663545627977, + "loss": 2.6008, + "step": 9602 + }, + { + "epoch": 0.7749979824065855, + "grad_norm": 0.6753309965133667, + "learning_rate": 0.00010705088793764408, + "loss": 2.536, + "step": 9603 + }, + { + "epoch": 0.7750786861431684, + "grad_norm": 0.6879377365112305, + "learning_rate": 0.00010703514024328183, + "loss": 2.5884, + "step": 9604 + }, + { + "epoch": 0.7751593898797514, + "grad_norm": 0.6535949110984802, + "learning_rate": 0.00010701939237358549, + "loss": 2.5489, + "step": 9605 + }, + { + "epoch": 0.7752400936163344, + "grad_norm": 0.7308230400085449, + "learning_rate": 0.00010700364432894756, + "loss": 2.5679, + "step": 9606 + }, + { + "epoch": 0.7753207973529175, + "grad_norm": 0.7016584277153015, + "learning_rate": 0.00010698789610976052, + "loss": 2.5678, + "step": 9607 + }, + { + "epoch": 0.7754015010895005, + "grad_norm": 0.7181541323661804, + "learning_rate": 0.00010697214771641682, + "loss": 2.5004, + "step": 9608 + }, + { + "epoch": 0.7754822048260834, + "grad_norm": 0.6414844989776611, + "learning_rate": 0.00010695639914930895, + "loss": 2.4896, + "step": 9609 + }, + { + "epoch": 0.7755629085626664, + "grad_norm": 0.7288017868995667, + "learning_rate": 0.00010694065040882943, + "loss": 2.5945, + "step": 9610 + }, + { + "epoch": 0.7756436122992495, + "grad_norm": 0.6808066368103027, + "learning_rate": 0.00010692490149537079, + "loss": 2.5973, + "step": 9611 + }, + { + "epoch": 0.7757243160358325, + "grad_norm": 0.7924454212188721, + "learning_rate": 0.00010690915240932553, + "loss": 2.5448, + "step": 9612 + }, + { + "epoch": 0.7758050197724154, + "grad_norm": 0.6466094851493835, + "learning_rate": 0.00010689340315108606, + "loss": 2.5065, + "step": 9613 + }, + { + "epoch": 0.7758857235089984, + "grad_norm": 0.6775460243225098, + "learning_rate": 0.00010687765372104502, + "loss": 2.5238, + "step": 9614 + }, + { + "epoch": 0.7759664272455815, + "grad_norm": 0.6901230812072754, + "learning_rate": 0.00010686190411959484, + "loss": 2.5109, + "step": 9615 + }, + { + "epoch": 0.7760471309821645, + "grad_norm": 0.7032039165496826, + "learning_rate": 0.00010684615434712808, + "loss": 2.6094, + "step": 9616 + }, + { + "epoch": 0.7761278347187475, + "grad_norm": 0.7008969187736511, + "learning_rate": 0.00010683040440403727, + "loss": 2.5758, + "step": 9617 + }, + { + "epoch": 0.7762085384553304, + "grad_norm": 0.6909677386283875, + "learning_rate": 0.00010681465429071491, + "loss": 2.5373, + "step": 9618 + }, + { + "epoch": 0.7762892421919135, + "grad_norm": 0.699030339717865, + "learning_rate": 0.00010679890400755355, + "loss": 2.577, + "step": 9619 + }, + { + "epoch": 0.7763699459284965, + "grad_norm": 0.7012344598770142, + "learning_rate": 0.00010678315355494575, + "loss": 2.5205, + "step": 9620 + }, + { + "epoch": 0.7764506496650795, + "grad_norm": 0.7693915367126465, + "learning_rate": 0.000106767402933284, + "loss": 2.5947, + "step": 9621 + }, + { + "epoch": 0.7765313534016625, + "grad_norm": 0.7635772228240967, + "learning_rate": 0.00010675165214296093, + "loss": 2.6221, + "step": 9622 + }, + { + "epoch": 0.7766120571382455, + "grad_norm": 0.701411783695221, + "learning_rate": 0.000106735901184369, + "loss": 2.5236, + "step": 9623 + }, + { + "epoch": 0.7766927608748285, + "grad_norm": 0.7283998727798462, + "learning_rate": 0.00010672015005790079, + "loss": 2.5581, + "step": 9624 + }, + { + "epoch": 0.7767734646114115, + "grad_norm": 0.7069897055625916, + "learning_rate": 0.0001067043987639489, + "loss": 2.5541, + "step": 9625 + }, + { + "epoch": 0.7768541683479945, + "grad_norm": 0.7419753074645996, + "learning_rate": 0.00010668864730290586, + "loss": 2.5992, + "step": 9626 + }, + { + "epoch": 0.7769348720845776, + "grad_norm": 0.6651501059532166, + "learning_rate": 0.00010667289567516426, + "loss": 2.546, + "step": 9627 + }, + { + "epoch": 0.7770155758211605, + "grad_norm": 0.7265670895576477, + "learning_rate": 0.00010665714388111665, + "loss": 2.611, + "step": 9628 + }, + { + "epoch": 0.7770962795577435, + "grad_norm": 0.6520028114318848, + "learning_rate": 0.00010664139192115559, + "loss": 2.5433, + "step": 9629 + }, + { + "epoch": 0.7771769832943265, + "grad_norm": 0.6990057826042175, + "learning_rate": 0.0001066256397956737, + "loss": 2.5325, + "step": 9630 + }, + { + "epoch": 0.7772576870309096, + "grad_norm": 0.7353312373161316, + "learning_rate": 0.00010660988750506355, + "loss": 2.4707, + "step": 9631 + }, + { + "epoch": 0.7773383907674926, + "grad_norm": 0.6810272932052612, + "learning_rate": 0.00010659413504971774, + "loss": 2.5618, + "step": 9632 + }, + { + "epoch": 0.7774190945040755, + "grad_norm": 0.6480081081390381, + "learning_rate": 0.00010657838243002883, + "loss": 2.4543, + "step": 9633 + }, + { + "epoch": 0.7774997982406585, + "grad_norm": 0.6617380976676941, + "learning_rate": 0.00010656262964638942, + "loss": 2.5628, + "step": 9634 + }, + { + "epoch": 0.7775805019772416, + "grad_norm": 0.6761382222175598, + "learning_rate": 0.00010654687669919212, + "loss": 2.5433, + "step": 9635 + }, + { + "epoch": 0.7776612057138246, + "grad_norm": 0.6733867526054382, + "learning_rate": 0.00010653112358882957, + "loss": 2.5282, + "step": 9636 + }, + { + "epoch": 0.7777419094504076, + "grad_norm": 0.6854631304740906, + "learning_rate": 0.00010651537031569433, + "loss": 2.5997, + "step": 9637 + }, + { + "epoch": 0.7778226131869905, + "grad_norm": 0.7451226115226746, + "learning_rate": 0.00010649961688017904, + "loss": 2.5058, + "step": 9638 + }, + { + "epoch": 0.7779033169235735, + "grad_norm": 0.6744229197502136, + "learning_rate": 0.0001064838632826763, + "loss": 2.5962, + "step": 9639 + }, + { + "epoch": 0.7779840206601566, + "grad_norm": 0.7568119764328003, + "learning_rate": 0.00010646810952357873, + "loss": 2.5896, + "step": 9640 + }, + { + "epoch": 0.7780647243967396, + "grad_norm": 0.6860085725784302, + "learning_rate": 0.00010645235560327899, + "loss": 2.5675, + "step": 9641 + }, + { + "epoch": 0.7781454281333225, + "grad_norm": 0.6491742134094238, + "learning_rate": 0.00010643660152216965, + "loss": 2.5374, + "step": 9642 + }, + { + "epoch": 0.7782261318699055, + "grad_norm": 0.6664023399353027, + "learning_rate": 0.0001064208472806434, + "loss": 2.4679, + "step": 9643 + }, + { + "epoch": 0.7783068356064886, + "grad_norm": 0.6595140099525452, + "learning_rate": 0.00010640509287909284, + "loss": 2.5045, + "step": 9644 + }, + { + "epoch": 0.7783875393430716, + "grad_norm": 0.6788576245307922, + "learning_rate": 0.0001063893383179106, + "loss": 2.5706, + "step": 9645 + }, + { + "epoch": 0.7784682430796546, + "grad_norm": 0.6741334199905396, + "learning_rate": 0.00010637358359748939, + "loss": 2.5763, + "step": 9646 + }, + { + "epoch": 0.7785489468162375, + "grad_norm": 0.6837517023086548, + "learning_rate": 0.0001063578287182218, + "loss": 2.5484, + "step": 9647 + }, + { + "epoch": 0.7786296505528206, + "grad_norm": 0.6604229211807251, + "learning_rate": 0.00010634207368050048, + "loss": 2.5465, + "step": 9648 + }, + { + "epoch": 0.7787103542894036, + "grad_norm": 0.6528951525688171, + "learning_rate": 0.00010632631848471813, + "loss": 2.5409, + "step": 9649 + }, + { + "epoch": 0.7787910580259866, + "grad_norm": 0.6615377068519592, + "learning_rate": 0.00010631056313126734, + "loss": 2.5545, + "step": 9650 + }, + { + "epoch": 0.7788717617625696, + "grad_norm": 0.666033923625946, + "learning_rate": 0.00010629480762054089, + "loss": 2.5341, + "step": 9651 + }, + { + "epoch": 0.7789524654991526, + "grad_norm": 0.7022622227668762, + "learning_rate": 0.00010627905195293135, + "loss": 2.5206, + "step": 9652 + }, + { + "epoch": 0.7790331692357356, + "grad_norm": 0.7175850868225098, + "learning_rate": 0.00010626329612883141, + "loss": 2.5912, + "step": 9653 + }, + { + "epoch": 0.7791138729723186, + "grad_norm": 0.6592069268226624, + "learning_rate": 0.00010624754014863379, + "loss": 2.5076, + "step": 9654 + }, + { + "epoch": 0.7791945767089016, + "grad_norm": 0.645893931388855, + "learning_rate": 0.0001062317840127311, + "loss": 2.5124, + "step": 9655 + }, + { + "epoch": 0.7792752804454847, + "grad_norm": 0.6638232469558716, + "learning_rate": 0.00010621602772151607, + "loss": 2.5182, + "step": 9656 + }, + { + "epoch": 0.7793559841820676, + "grad_norm": 0.6718387603759766, + "learning_rate": 0.0001062002712753814, + "loss": 2.4773, + "step": 9657 + }, + { + "epoch": 0.7794366879186506, + "grad_norm": 0.6402876377105713, + "learning_rate": 0.00010618451467471972, + "loss": 2.5557, + "step": 9658 + }, + { + "epoch": 0.7795173916552336, + "grad_norm": 0.6898398399353027, + "learning_rate": 0.00010616875791992382, + "loss": 2.5557, + "step": 9659 + }, + { + "epoch": 0.7795980953918167, + "grad_norm": 0.6718475222587585, + "learning_rate": 0.00010615300101138633, + "loss": 2.5335, + "step": 9660 + }, + { + "epoch": 0.7796787991283997, + "grad_norm": 0.6436911225318909, + "learning_rate": 0.00010613724394949995, + "loss": 2.5214, + "step": 9661 + }, + { + "epoch": 0.7797595028649826, + "grad_norm": 0.7554156184196472, + "learning_rate": 0.00010612148673465743, + "loss": 2.5526, + "step": 9662 + }, + { + "epoch": 0.7798402066015656, + "grad_norm": 0.6728504300117493, + "learning_rate": 0.00010610572936725147, + "loss": 2.5935, + "step": 9663 + }, + { + "epoch": 0.7799209103381487, + "grad_norm": 0.6793323159217834, + "learning_rate": 0.00010608997184767476, + "loss": 2.5515, + "step": 9664 + }, + { + "epoch": 0.7800016140747317, + "grad_norm": 0.7242898941040039, + "learning_rate": 0.00010607421417631999, + "loss": 2.5332, + "step": 9665 + }, + { + "epoch": 0.7800823178113147, + "grad_norm": 0.6719244718551636, + "learning_rate": 0.00010605845635357996, + "loss": 2.5191, + "step": 9666 + }, + { + "epoch": 0.7801630215478976, + "grad_norm": 0.6836631894111633, + "learning_rate": 0.00010604269837984737, + "loss": 2.6489, + "step": 9667 + }, + { + "epoch": 0.7802437252844807, + "grad_norm": 0.6833824515342712, + "learning_rate": 0.00010602694025551496, + "loss": 2.4906, + "step": 9668 + }, + { + "epoch": 0.7803244290210637, + "grad_norm": 0.7449159026145935, + "learning_rate": 0.0001060111819809754, + "loss": 2.5301, + "step": 9669 + }, + { + "epoch": 0.7804051327576467, + "grad_norm": 0.7149158120155334, + "learning_rate": 0.00010599542355662149, + "loss": 2.5097, + "step": 9670 + }, + { + "epoch": 0.7804858364942296, + "grad_norm": 0.6616973876953125, + "learning_rate": 0.00010597966498284595, + "loss": 2.5928, + "step": 9671 + }, + { + "epoch": 0.7805665402308127, + "grad_norm": 0.6556531190872192, + "learning_rate": 0.00010596390626004154, + "loss": 2.5543, + "step": 9672 + }, + { + "epoch": 0.7806472439673957, + "grad_norm": 0.6585283875465393, + "learning_rate": 0.000105948147388601, + "loss": 2.5244, + "step": 9673 + }, + { + "epoch": 0.7807279477039787, + "grad_norm": 0.6484133005142212, + "learning_rate": 0.00010593238836891704, + "loss": 2.4996, + "step": 9674 + }, + { + "epoch": 0.7808086514405617, + "grad_norm": 0.6681119799613953, + "learning_rate": 0.00010591662920138248, + "loss": 2.5322, + "step": 9675 + }, + { + "epoch": 0.7808893551771448, + "grad_norm": 0.709403395652771, + "learning_rate": 0.00010590086988639005, + "loss": 2.5554, + "step": 9676 + }, + { + "epoch": 0.7809700589137277, + "grad_norm": 0.6734669804573059, + "learning_rate": 0.00010588511042433251, + "loss": 2.5452, + "step": 9677 + }, + { + "epoch": 0.7810507626503107, + "grad_norm": 0.6800141930580139, + "learning_rate": 0.00010586935081560268, + "loss": 2.5154, + "step": 9678 + }, + { + "epoch": 0.7811314663868937, + "grad_norm": 0.7757244110107422, + "learning_rate": 0.00010585359106059326, + "loss": 2.5935, + "step": 9679 + }, + { + "epoch": 0.7812121701234768, + "grad_norm": 0.7288491725921631, + "learning_rate": 0.00010583783115969699, + "loss": 2.5276, + "step": 9680 + }, + { + "epoch": 0.7812928738600597, + "grad_norm": 0.6785164475440979, + "learning_rate": 0.00010582207111330678, + "loss": 2.5907, + "step": 9681 + }, + { + "epoch": 0.7813735775966427, + "grad_norm": 0.6651367545127869, + "learning_rate": 0.0001058063109218153, + "loss": 2.545, + "step": 9682 + }, + { + "epoch": 0.7814542813332257, + "grad_norm": 0.6657043695449829, + "learning_rate": 0.0001057905505856154, + "loss": 2.5548, + "step": 9683 + }, + { + "epoch": 0.7815349850698088, + "grad_norm": 0.6486692428588867, + "learning_rate": 0.00010577479010509986, + "loss": 2.5589, + "step": 9684 + }, + { + "epoch": 0.7816156888063918, + "grad_norm": 0.700749397277832, + "learning_rate": 0.0001057590294806614, + "loss": 2.6008, + "step": 9685 + }, + { + "epoch": 0.7816963925429747, + "grad_norm": 0.647051215171814, + "learning_rate": 0.00010574326871269289, + "loss": 2.4894, + "step": 9686 + }, + { + "epoch": 0.7817770962795577, + "grad_norm": 0.6932066679000854, + "learning_rate": 0.00010572750780158713, + "loss": 2.5256, + "step": 9687 + }, + { + "epoch": 0.7818578000161408, + "grad_norm": 0.6330733895301819, + "learning_rate": 0.00010571174674773689, + "loss": 2.5242, + "step": 9688 + }, + { + "epoch": 0.7819385037527238, + "grad_norm": 0.6476379036903381, + "learning_rate": 0.00010569598555153499, + "loss": 2.552, + "step": 9689 + }, + { + "epoch": 0.7820192074893068, + "grad_norm": 0.661204993724823, + "learning_rate": 0.00010568022421337424, + "loss": 2.4869, + "step": 9690 + }, + { + "epoch": 0.7820999112258897, + "grad_norm": 0.6663263440132141, + "learning_rate": 0.00010566446273364746, + "loss": 2.5134, + "step": 9691 + }, + { + "epoch": 0.7821806149624727, + "grad_norm": 0.6982834339141846, + "learning_rate": 0.00010564870111274748, + "loss": 2.5755, + "step": 9692 + }, + { + "epoch": 0.7822613186990558, + "grad_norm": 0.6266167759895325, + "learning_rate": 0.00010563293935106706, + "loss": 2.5413, + "step": 9693 + }, + { + "epoch": 0.7823420224356388, + "grad_norm": 0.6484279632568359, + "learning_rate": 0.0001056171774489991, + "loss": 2.5579, + "step": 9694 + }, + { + "epoch": 0.7824227261722217, + "grad_norm": 0.674933910369873, + "learning_rate": 0.00010560141540693638, + "loss": 2.5364, + "step": 9695 + }, + { + "epoch": 0.7825034299088047, + "grad_norm": 0.7961840033531189, + "learning_rate": 0.00010558565322527174, + "loss": 2.5143, + "step": 9696 + }, + { + "epoch": 0.7825841336453878, + "grad_norm": 0.697158694267273, + "learning_rate": 0.00010556989090439804, + "loss": 2.5341, + "step": 9697 + }, + { + "epoch": 0.7826648373819708, + "grad_norm": 0.6912708282470703, + "learning_rate": 0.00010555412844470806, + "loss": 2.5331, + "step": 9698 + }, + { + "epoch": 0.7827455411185538, + "grad_norm": 0.7078350186347961, + "learning_rate": 0.00010553836584659474, + "loss": 2.5752, + "step": 9699 + }, + { + "epoch": 0.7828262448551367, + "grad_norm": 0.6421065926551819, + "learning_rate": 0.00010552260311045082, + "loss": 2.5393, + "step": 9700 + }, + { + "epoch": 0.7829069485917198, + "grad_norm": 0.644120454788208, + "learning_rate": 0.00010550684023666918, + "loss": 2.5062, + "step": 9701 + }, + { + "epoch": 0.7829876523283028, + "grad_norm": 0.7038589715957642, + "learning_rate": 0.00010549107722564275, + "loss": 2.6074, + "step": 9702 + }, + { + "epoch": 0.7830683560648858, + "grad_norm": 0.6692953109741211, + "learning_rate": 0.00010547531407776427, + "loss": 2.5801, + "step": 9703 + }, + { + "epoch": 0.7831490598014688, + "grad_norm": 0.7059200406074524, + "learning_rate": 0.00010545955079342669, + "loss": 2.5579, + "step": 9704 + }, + { + "epoch": 0.7832297635380518, + "grad_norm": 0.7126718759536743, + "learning_rate": 0.0001054437873730228, + "loss": 2.5764, + "step": 9705 + }, + { + "epoch": 0.7833104672746348, + "grad_norm": 0.696784257888794, + "learning_rate": 0.0001054280238169455, + "loss": 2.5256, + "step": 9706 + }, + { + "epoch": 0.7833911710112178, + "grad_norm": 0.7473082542419434, + "learning_rate": 0.00010541226012558767, + "loss": 2.5983, + "step": 9707 + }, + { + "epoch": 0.7834718747478008, + "grad_norm": 0.6598967909812927, + "learning_rate": 0.00010539649629934219, + "loss": 2.5267, + "step": 9708 + }, + { + "epoch": 0.7835525784843839, + "grad_norm": 0.7168934345245361, + "learning_rate": 0.00010538073233860188, + "loss": 2.5278, + "step": 9709 + }, + { + "epoch": 0.7836332822209668, + "grad_norm": 0.6848951578140259, + "learning_rate": 0.00010536496824375968, + "loss": 2.5267, + "step": 9710 + }, + { + "epoch": 0.7837139859575498, + "grad_norm": 0.7276272773742676, + "learning_rate": 0.0001053492040152084, + "loss": 2.5706, + "step": 9711 + }, + { + "epoch": 0.7837946896941328, + "grad_norm": 0.6929399371147156, + "learning_rate": 0.00010533343965334101, + "loss": 2.5184, + "step": 9712 + }, + { + "epoch": 0.7838753934307159, + "grad_norm": 0.7497181296348572, + "learning_rate": 0.00010531767515855037, + "loss": 2.5626, + "step": 9713 + }, + { + "epoch": 0.7839560971672989, + "grad_norm": 0.6536200046539307, + "learning_rate": 0.00010530191053122935, + "loss": 2.5909, + "step": 9714 + }, + { + "epoch": 0.7840368009038818, + "grad_norm": 0.6750395894050598, + "learning_rate": 0.00010528614577177087, + "loss": 2.5119, + "step": 9715 + }, + { + "epoch": 0.7841175046404648, + "grad_norm": 0.6284878849983215, + "learning_rate": 0.00010527038088056782, + "loss": 2.5417, + "step": 9716 + }, + { + "epoch": 0.7841982083770479, + "grad_norm": 0.6529444456100464, + "learning_rate": 0.00010525461585801308, + "loss": 2.5865, + "step": 9717 + }, + { + "epoch": 0.7842789121136309, + "grad_norm": 0.7332968711853027, + "learning_rate": 0.00010523885070449959, + "loss": 2.561, + "step": 9718 + }, + { + "epoch": 0.7843596158502139, + "grad_norm": 0.7054178714752197, + "learning_rate": 0.00010522308542042025, + "loss": 2.623, + "step": 9719 + }, + { + "epoch": 0.7844403195867968, + "grad_norm": 0.6837820410728455, + "learning_rate": 0.00010520732000616798, + "loss": 2.5586, + "step": 9720 + }, + { + "epoch": 0.7845210233233799, + "grad_norm": 0.7339439392089844, + "learning_rate": 0.00010519155446213565, + "loss": 2.5374, + "step": 9721 + }, + { + "epoch": 0.7846017270599629, + "grad_norm": 0.7625028491020203, + "learning_rate": 0.00010517578878871624, + "loss": 2.5663, + "step": 9722 + }, + { + "epoch": 0.7846824307965459, + "grad_norm": 0.6749752759933472, + "learning_rate": 0.00010516002298630263, + "loss": 2.5744, + "step": 9723 + }, + { + "epoch": 0.7847631345331288, + "grad_norm": 0.6702882647514343, + "learning_rate": 0.00010514425705528776, + "loss": 2.6247, + "step": 9724 + }, + { + "epoch": 0.7848438382697119, + "grad_norm": 0.6641737222671509, + "learning_rate": 0.00010512849099606457, + "loss": 2.5792, + "step": 9725 + }, + { + "epoch": 0.7849245420062949, + "grad_norm": 0.7522993683815002, + "learning_rate": 0.00010511272480902597, + "loss": 2.5941, + "step": 9726 + }, + { + "epoch": 0.7850052457428779, + "grad_norm": 0.7507709860801697, + "learning_rate": 0.00010509695849456487, + "loss": 2.5312, + "step": 9727 + }, + { + "epoch": 0.7850859494794609, + "grad_norm": 0.7101978063583374, + "learning_rate": 0.0001050811920530743, + "loss": 2.5833, + "step": 9728 + }, + { + "epoch": 0.785166653216044, + "grad_norm": 0.6814672946929932, + "learning_rate": 0.0001050654254849471, + "loss": 2.5466, + "step": 9729 + }, + { + "epoch": 0.7852473569526269, + "grad_norm": 0.7250106930732727, + "learning_rate": 0.0001050496587905763, + "loss": 2.5144, + "step": 9730 + }, + { + "epoch": 0.7853280606892099, + "grad_norm": 0.7125658392906189, + "learning_rate": 0.00010503389197035474, + "loss": 2.5384, + "step": 9731 + }, + { + "epoch": 0.7854087644257929, + "grad_norm": 0.7076827883720398, + "learning_rate": 0.00010501812502467547, + "loss": 2.4879, + "step": 9732 + }, + { + "epoch": 0.785489468162376, + "grad_norm": 0.632216215133667, + "learning_rate": 0.00010500235795393141, + "loss": 2.5678, + "step": 9733 + }, + { + "epoch": 0.785570171898959, + "grad_norm": 0.7376949191093445, + "learning_rate": 0.00010498659075851551, + "loss": 2.5024, + "step": 9734 + }, + { + "epoch": 0.7856508756355419, + "grad_norm": 0.6730546951293945, + "learning_rate": 0.00010497082343882072, + "loss": 2.5001, + "step": 9735 + }, + { + "epoch": 0.7857315793721249, + "grad_norm": 0.6958187818527222, + "learning_rate": 0.00010495505599524002, + "loss": 2.538, + "step": 9736 + }, + { + "epoch": 0.785812283108708, + "grad_norm": 0.6882508397102356, + "learning_rate": 0.00010493928842816638, + "loss": 2.5247, + "step": 9737 + }, + { + "epoch": 0.785892986845291, + "grad_norm": 0.711086630821228, + "learning_rate": 0.00010492352073799276, + "loss": 2.5721, + "step": 9738 + }, + { + "epoch": 0.7859736905818739, + "grad_norm": 0.7217094898223877, + "learning_rate": 0.00010490775292511214, + "loss": 2.5827, + "step": 9739 + }, + { + "epoch": 0.7860543943184569, + "grad_norm": 0.6812087893486023, + "learning_rate": 0.0001048919849899175, + "loss": 2.532, + "step": 9740 + }, + { + "epoch": 0.7861350980550399, + "grad_norm": 0.7449110150337219, + "learning_rate": 0.00010487621693280176, + "loss": 2.5611, + "step": 9741 + }, + { + "epoch": 0.786215801791623, + "grad_norm": 0.7297104001045227, + "learning_rate": 0.00010486044875415797, + "loss": 2.5173, + "step": 9742 + }, + { + "epoch": 0.786296505528206, + "grad_norm": 0.6741474270820618, + "learning_rate": 0.0001048446804543791, + "loss": 2.5451, + "step": 9743 + }, + { + "epoch": 0.7863772092647889, + "grad_norm": 0.6450859308242798, + "learning_rate": 0.00010482891203385812, + "loss": 2.551, + "step": 9744 + }, + { + "epoch": 0.7864579130013719, + "grad_norm": 0.6867123246192932, + "learning_rate": 0.00010481314349298805, + "loss": 2.4875, + "step": 9745 + }, + { + "epoch": 0.786538616737955, + "grad_norm": 0.6951552629470825, + "learning_rate": 0.00010479737483216183, + "loss": 2.6253, + "step": 9746 + }, + { + "epoch": 0.786619320474538, + "grad_norm": 0.6786869764328003, + "learning_rate": 0.0001047816060517725, + "loss": 2.5551, + "step": 9747 + }, + { + "epoch": 0.786700024211121, + "grad_norm": 0.698957622051239, + "learning_rate": 0.00010476583715221306, + "loss": 2.5554, + "step": 9748 + }, + { + "epoch": 0.7867807279477039, + "grad_norm": 0.6407502889633179, + "learning_rate": 0.00010475006813387648, + "loss": 2.5112, + "step": 9749 + }, + { + "epoch": 0.786861431684287, + "grad_norm": 0.660418689250946, + "learning_rate": 0.00010473429899715581, + "loss": 2.5557, + "step": 9750 + }, + { + "epoch": 0.78694213542087, + "grad_norm": 0.71445631980896, + "learning_rate": 0.00010471852974244403, + "loss": 2.5169, + "step": 9751 + }, + { + "epoch": 0.787022839157453, + "grad_norm": 0.6620494723320007, + "learning_rate": 0.00010470276037013414, + "loss": 2.5517, + "step": 9752 + }, + { + "epoch": 0.787103542894036, + "grad_norm": 0.6921235918998718, + "learning_rate": 0.00010468699088061917, + "loss": 2.5246, + "step": 9753 + }, + { + "epoch": 0.787184246630619, + "grad_norm": 0.6617140769958496, + "learning_rate": 0.00010467122127429214, + "loss": 2.4941, + "step": 9754 + }, + { + "epoch": 0.787264950367202, + "grad_norm": 0.6549816727638245, + "learning_rate": 0.00010465545155154608, + "loss": 2.5189, + "step": 9755 + }, + { + "epoch": 0.787345654103785, + "grad_norm": 0.7030060887336731, + "learning_rate": 0.00010463968171277396, + "loss": 2.5058, + "step": 9756 + }, + { + "epoch": 0.787426357840368, + "grad_norm": 0.7294049859046936, + "learning_rate": 0.00010462391175836886, + "loss": 2.5166, + "step": 9757 + }, + { + "epoch": 0.787507061576951, + "grad_norm": 0.6407562494277954, + "learning_rate": 0.00010460814168872382, + "loss": 2.5391, + "step": 9758 + }, + { + "epoch": 0.787587765313534, + "grad_norm": 0.8024646639823914, + "learning_rate": 0.0001045923715042318, + "loss": 2.7034, + "step": 9759 + }, + { + "epoch": 0.787668469050117, + "grad_norm": 0.7160943150520325, + "learning_rate": 0.00010457660120528592, + "loss": 2.6016, + "step": 9760 + }, + { + "epoch": 0.7877491727867, + "grad_norm": 0.6987707018852234, + "learning_rate": 0.00010456083079227916, + "loss": 2.5428, + "step": 9761 + }, + { + "epoch": 0.7878298765232831, + "grad_norm": 0.7235369086265564, + "learning_rate": 0.00010454506026560453, + "loss": 2.517, + "step": 9762 + }, + { + "epoch": 0.787910580259866, + "grad_norm": 0.6827502846717834, + "learning_rate": 0.00010452928962565518, + "loss": 2.5777, + "step": 9763 + }, + { + "epoch": 0.787991283996449, + "grad_norm": 0.71755450963974, + "learning_rate": 0.00010451351887282408, + "loss": 2.6004, + "step": 9764 + }, + { + "epoch": 0.788071987733032, + "grad_norm": 0.6988046765327454, + "learning_rate": 0.00010449774800750427, + "loss": 2.6116, + "step": 9765 + }, + { + "epoch": 0.7881526914696151, + "grad_norm": 0.6959548592567444, + "learning_rate": 0.00010448197703008884, + "loss": 2.5856, + "step": 9766 + }, + { + "epoch": 0.7882333952061981, + "grad_norm": 0.687042772769928, + "learning_rate": 0.00010446620594097079, + "loss": 2.5167, + "step": 9767 + }, + { + "epoch": 0.788314098942781, + "grad_norm": 0.6950173377990723, + "learning_rate": 0.00010445043474054325, + "loss": 2.5157, + "step": 9768 + }, + { + "epoch": 0.788394802679364, + "grad_norm": 0.680768609046936, + "learning_rate": 0.00010443466342919926, + "loss": 2.6177, + "step": 9769 + }, + { + "epoch": 0.7884755064159471, + "grad_norm": 0.7790142893791199, + "learning_rate": 0.00010441889200733181, + "loss": 2.5761, + "step": 9770 + }, + { + "epoch": 0.7885562101525301, + "grad_norm": 0.6207798719406128, + "learning_rate": 0.00010440312047533406, + "loss": 2.5305, + "step": 9771 + }, + { + "epoch": 0.7886369138891131, + "grad_norm": 0.7143635749816895, + "learning_rate": 0.00010438734883359903, + "loss": 2.5922, + "step": 9772 + }, + { + "epoch": 0.788717617625696, + "grad_norm": 0.7234248518943787, + "learning_rate": 0.00010437157708251977, + "loss": 2.6051, + "step": 9773 + }, + { + "epoch": 0.7887983213622791, + "grad_norm": 0.6602753400802612, + "learning_rate": 0.00010435580522248942, + "loss": 2.6002, + "step": 9774 + }, + { + "epoch": 0.7888790250988621, + "grad_norm": 0.6929246783256531, + "learning_rate": 0.00010434003325390101, + "loss": 2.5798, + "step": 9775 + }, + { + "epoch": 0.7889597288354451, + "grad_norm": 0.7355811595916748, + "learning_rate": 0.00010432426117714762, + "loss": 2.5859, + "step": 9776 + }, + { + "epoch": 0.789040432572028, + "grad_norm": 0.7009611129760742, + "learning_rate": 0.00010430848899262233, + "loss": 2.5535, + "step": 9777 + }, + { + "epoch": 0.7891211363086111, + "grad_norm": 0.6699070930480957, + "learning_rate": 0.00010429271670071823, + "loss": 2.5687, + "step": 9778 + }, + { + "epoch": 0.7892018400451941, + "grad_norm": 0.6632630228996277, + "learning_rate": 0.00010427694430182844, + "loss": 2.5359, + "step": 9779 + }, + { + "epoch": 0.7892825437817771, + "grad_norm": 0.7256911993026733, + "learning_rate": 0.000104261171796346, + "loss": 2.5432, + "step": 9780 + }, + { + "epoch": 0.7893632475183601, + "grad_norm": 0.6654312610626221, + "learning_rate": 0.000104245399184664, + "loss": 2.5432, + "step": 9781 + }, + { + "epoch": 0.7894439512549432, + "grad_norm": 0.6808900237083435, + "learning_rate": 0.00010422962646717557, + "loss": 2.4951, + "step": 9782 + }, + { + "epoch": 0.7895246549915261, + "grad_norm": 0.6655945181846619, + "learning_rate": 0.00010421385364427378, + "loss": 2.5152, + "step": 9783 + }, + { + "epoch": 0.7896053587281091, + "grad_norm": 0.8399274349212646, + "learning_rate": 0.00010419808071635178, + "loss": 2.5688, + "step": 9784 + }, + { + "epoch": 0.7896860624646921, + "grad_norm": 0.6412226557731628, + "learning_rate": 0.00010418230768380262, + "loss": 2.5527, + "step": 9785 + }, + { + "epoch": 0.7897667662012752, + "grad_norm": 0.6505058407783508, + "learning_rate": 0.0001041665345470194, + "loss": 2.5768, + "step": 9786 + }, + { + "epoch": 0.7898474699378581, + "grad_norm": 0.6297653317451477, + "learning_rate": 0.00010415076130639526, + "loss": 2.5372, + "step": 9787 + }, + { + "epoch": 0.7899281736744411, + "grad_norm": 0.6524460315704346, + "learning_rate": 0.00010413498796232331, + "loss": 2.5047, + "step": 9788 + }, + { + "epoch": 0.7900088774110241, + "grad_norm": 0.6637924313545227, + "learning_rate": 0.00010411921451519662, + "loss": 2.508, + "step": 9789 + }, + { + "epoch": 0.7900895811476072, + "grad_norm": 0.6423435211181641, + "learning_rate": 0.00010410344096540836, + "loss": 2.4597, + "step": 9790 + }, + { + "epoch": 0.7901702848841902, + "grad_norm": 0.6361977458000183, + "learning_rate": 0.00010408766731335163, + "loss": 2.5921, + "step": 9791 + }, + { + "epoch": 0.7902509886207731, + "grad_norm": 0.6792182922363281, + "learning_rate": 0.00010407189355941953, + "loss": 2.5543, + "step": 9792 + }, + { + "epoch": 0.7903316923573561, + "grad_norm": 0.6998419761657715, + "learning_rate": 0.00010405611970400519, + "loss": 2.5333, + "step": 9793 + }, + { + "epoch": 0.7904123960939391, + "grad_norm": 0.6730015873908997, + "learning_rate": 0.00010404034574750174, + "loss": 2.596, + "step": 9794 + }, + { + "epoch": 0.7904930998305222, + "grad_norm": 0.7120258808135986, + "learning_rate": 0.00010402457169030235, + "loss": 2.5314, + "step": 9795 + }, + { + "epoch": 0.7905738035671052, + "grad_norm": 0.6553651690483093, + "learning_rate": 0.0001040087975328001, + "loss": 2.4973, + "step": 9796 + }, + { + "epoch": 0.7906545073036881, + "grad_norm": 0.6506681442260742, + "learning_rate": 0.00010399302327538812, + "loss": 2.588, + "step": 9797 + }, + { + "epoch": 0.7907352110402711, + "grad_norm": 0.6737257242202759, + "learning_rate": 0.00010397724891845957, + "loss": 2.5454, + "step": 9798 + }, + { + "epoch": 0.7908159147768542, + "grad_norm": 0.670120894908905, + "learning_rate": 0.00010396147446240756, + "loss": 2.4926, + "step": 9799 + }, + { + "epoch": 0.7908966185134372, + "grad_norm": 0.7028468251228333, + "learning_rate": 0.00010394569990762529, + "loss": 2.5727, + "step": 9800 + }, + { + "epoch": 0.7909773222500202, + "grad_norm": 0.7084455490112305, + "learning_rate": 0.00010392992525450584, + "loss": 2.547, + "step": 9801 + }, + { + "epoch": 0.7910580259866031, + "grad_norm": 0.732694685459137, + "learning_rate": 0.0001039141505034424, + "loss": 2.5871, + "step": 9802 + }, + { + "epoch": 0.7911387297231862, + "grad_norm": 0.7214515209197998, + "learning_rate": 0.00010389837565482807, + "loss": 2.5672, + "step": 9803 + }, + { + "epoch": 0.7912194334597692, + "grad_norm": 0.6495330333709717, + "learning_rate": 0.00010388260070905604, + "loss": 2.5266, + "step": 9804 + }, + { + "epoch": 0.7913001371963522, + "grad_norm": 0.6930941343307495, + "learning_rate": 0.00010386682566651945, + "loss": 2.5734, + "step": 9805 + }, + { + "epoch": 0.7913808409329351, + "grad_norm": 0.714214563369751, + "learning_rate": 0.00010385105052761148, + "loss": 2.4987, + "step": 9806 + }, + { + "epoch": 0.7914615446695182, + "grad_norm": 0.7525388598442078, + "learning_rate": 0.00010383527529272523, + "loss": 2.5427, + "step": 9807 + }, + { + "epoch": 0.7915422484061012, + "grad_norm": 0.6088642477989197, + "learning_rate": 0.00010381949996225389, + "loss": 2.5018, + "step": 9808 + }, + { + "epoch": 0.7916229521426842, + "grad_norm": 0.6797540187835693, + "learning_rate": 0.00010380372453659066, + "loss": 2.5235, + "step": 9809 + }, + { + "epoch": 0.7917036558792672, + "grad_norm": 0.6754054427146912, + "learning_rate": 0.00010378794901612865, + "loss": 2.5343, + "step": 9810 + }, + { + "epoch": 0.7917843596158503, + "grad_norm": 0.7375015020370483, + "learning_rate": 0.00010377217340126106, + "loss": 2.6101, + "step": 9811 + }, + { + "epoch": 0.7918650633524332, + "grad_norm": 0.6487904191017151, + "learning_rate": 0.00010375639769238103, + "loss": 2.5408, + "step": 9812 + }, + { + "epoch": 0.7919457670890162, + "grad_norm": 0.7280275821685791, + "learning_rate": 0.00010374062188988176, + "loss": 2.5503, + "step": 9813 + }, + { + "epoch": 0.7920264708255992, + "grad_norm": 0.6944922208786011, + "learning_rate": 0.00010372484599415644, + "loss": 2.5815, + "step": 9814 + }, + { + "epoch": 0.7921071745621823, + "grad_norm": 0.6970139741897583, + "learning_rate": 0.00010370907000559818, + "loss": 2.546, + "step": 9815 + }, + { + "epoch": 0.7921878782987652, + "grad_norm": 0.7338151335716248, + "learning_rate": 0.00010369329392460023, + "loss": 2.5449, + "step": 9816 + }, + { + "epoch": 0.7922685820353482, + "grad_norm": 0.7763465642929077, + "learning_rate": 0.00010367751775155574, + "loss": 2.5331, + "step": 9817 + }, + { + "epoch": 0.7923492857719312, + "grad_norm": 0.6892645955085754, + "learning_rate": 0.00010366174148685786, + "loss": 2.5617, + "step": 9818 + }, + { + "epoch": 0.7924299895085143, + "grad_norm": 0.7388250231742859, + "learning_rate": 0.00010364596513089984, + "loss": 2.5236, + "step": 9819 + }, + { + "epoch": 0.7925106932450973, + "grad_norm": 0.7035132646560669, + "learning_rate": 0.00010363018868407482, + "loss": 2.5711, + "step": 9820 + }, + { + "epoch": 0.7925913969816802, + "grad_norm": 0.7087043523788452, + "learning_rate": 0.00010361441214677603, + "loss": 2.5416, + "step": 9821 + }, + { + "epoch": 0.7926721007182632, + "grad_norm": 0.7173168063163757, + "learning_rate": 0.00010359863551939664, + "loss": 2.529, + "step": 9822 + }, + { + "epoch": 0.7927528044548463, + "grad_norm": 0.7007408738136292, + "learning_rate": 0.00010358285880232983, + "loss": 2.5287, + "step": 9823 + }, + { + "epoch": 0.7928335081914293, + "grad_norm": 0.7731965780258179, + "learning_rate": 0.0001035670819959688, + "loss": 2.5913, + "step": 9824 + }, + { + "epoch": 0.7929142119280123, + "grad_norm": 0.6625120639801025, + "learning_rate": 0.00010355130510070681, + "loss": 2.5815, + "step": 9825 + }, + { + "epoch": 0.7929949156645952, + "grad_norm": 0.6628395318984985, + "learning_rate": 0.00010353552811693699, + "loss": 2.512, + "step": 9826 + }, + { + "epoch": 0.7930756194011783, + "grad_norm": 0.6565915942192078, + "learning_rate": 0.00010351975104505256, + "loss": 2.54, + "step": 9827 + }, + { + "epoch": 0.7931563231377613, + "grad_norm": 0.6581636667251587, + "learning_rate": 0.00010350397388544672, + "loss": 2.5462, + "step": 9828 + }, + { + "epoch": 0.7932370268743443, + "grad_norm": 0.705668568611145, + "learning_rate": 0.0001034881966385127, + "loss": 2.5241, + "step": 9829 + }, + { + "epoch": 0.7933177306109273, + "grad_norm": 0.7047126293182373, + "learning_rate": 0.00010347241930464373, + "loss": 2.5275, + "step": 9830 + }, + { + "epoch": 0.7933984343475103, + "grad_norm": 0.6285849213600159, + "learning_rate": 0.00010345664188423296, + "loss": 2.518, + "step": 9831 + }, + { + "epoch": 0.7934791380840933, + "grad_norm": 0.697542130947113, + "learning_rate": 0.00010344086437767366, + "loss": 2.5219, + "step": 9832 + }, + { + "epoch": 0.7935598418206763, + "grad_norm": 0.6349283456802368, + "learning_rate": 0.00010342508678535903, + "loss": 2.5277, + "step": 9833 + }, + { + "epoch": 0.7936405455572593, + "grad_norm": 0.7084335088729858, + "learning_rate": 0.00010340930910768225, + "loss": 2.476, + "step": 9834 + }, + { + "epoch": 0.7937212492938424, + "grad_norm": 0.6714156866073608, + "learning_rate": 0.00010339353134503662, + "loss": 2.556, + "step": 9835 + }, + { + "epoch": 0.7938019530304253, + "grad_norm": 0.6687895059585571, + "learning_rate": 0.00010337775349781527, + "loss": 2.5756, + "step": 9836 + }, + { + "epoch": 0.7938826567670083, + "grad_norm": 0.669784665107727, + "learning_rate": 0.00010336197556641152, + "loss": 2.5545, + "step": 9837 + }, + { + "epoch": 0.7939633605035913, + "grad_norm": 0.6738600134849548, + "learning_rate": 0.0001033461975512185, + "loss": 2.5807, + "step": 9838 + }, + { + "epoch": 0.7940440642401744, + "grad_norm": 0.691443681716919, + "learning_rate": 0.00010333041945262953, + "loss": 2.5279, + "step": 9839 + }, + { + "epoch": 0.7941247679767574, + "grad_norm": 0.6283861398696899, + "learning_rate": 0.0001033146412710378, + "loss": 2.5355, + "step": 9840 + }, + { + "epoch": 0.7942054717133403, + "grad_norm": 0.6491204500198364, + "learning_rate": 0.00010329886300683655, + "loss": 2.5431, + "step": 9841 + }, + { + "epoch": 0.7942861754499233, + "grad_norm": 0.6673988103866577, + "learning_rate": 0.00010328308466041898, + "loss": 2.5845, + "step": 9842 + }, + { + "epoch": 0.7943668791865063, + "grad_norm": 0.6669130325317383, + "learning_rate": 0.00010326730623217837, + "loss": 2.5348, + "step": 9843 + }, + { + "epoch": 0.7944475829230894, + "grad_norm": 0.7003189921379089, + "learning_rate": 0.00010325152772250795, + "loss": 2.5779, + "step": 9844 + }, + { + "epoch": 0.7945282866596723, + "grad_norm": 0.6602177619934082, + "learning_rate": 0.00010323574913180097, + "loss": 2.5527, + "step": 9845 + }, + { + "epoch": 0.7946089903962553, + "grad_norm": 0.7053726315498352, + "learning_rate": 0.00010321997046045066, + "loss": 2.566, + "step": 9846 + }, + { + "epoch": 0.7946896941328383, + "grad_norm": 0.7428076863288879, + "learning_rate": 0.00010320419170885025, + "loss": 2.5348, + "step": 9847 + }, + { + "epoch": 0.7947703978694214, + "grad_norm": 0.7029163837432861, + "learning_rate": 0.00010318841287739303, + "loss": 2.5387, + "step": 9848 + }, + { + "epoch": 0.7948511016060044, + "grad_norm": 0.6159133911132812, + "learning_rate": 0.00010317263396647221, + "loss": 2.5408, + "step": 9849 + }, + { + "epoch": 0.7949318053425873, + "grad_norm": 0.6748857498168945, + "learning_rate": 0.00010315685497648106, + "loss": 2.5299, + "step": 9850 + }, + { + "epoch": 0.7950125090791703, + "grad_norm": 0.6281898021697998, + "learning_rate": 0.00010314107590781284, + "loss": 2.5202, + "step": 9851 + }, + { + "epoch": 0.7950932128157534, + "grad_norm": 0.6602163910865784, + "learning_rate": 0.00010312529676086078, + "loss": 2.5119, + "step": 9852 + }, + { + "epoch": 0.7951739165523364, + "grad_norm": 0.6665403246879578, + "learning_rate": 0.00010310951753601818, + "loss": 2.5913, + "step": 9853 + }, + { + "epoch": 0.7952546202889194, + "grad_norm": 0.6705873012542725, + "learning_rate": 0.00010309373823367827, + "loss": 2.6039, + "step": 9854 + }, + { + "epoch": 0.7953353240255023, + "grad_norm": 0.6571313738822937, + "learning_rate": 0.0001030779588542343, + "loss": 2.5629, + "step": 9855 + }, + { + "epoch": 0.7954160277620854, + "grad_norm": 0.6597230434417725, + "learning_rate": 0.00010306217939807956, + "loss": 2.5569, + "step": 9856 + }, + { + "epoch": 0.7954967314986684, + "grad_norm": 0.7098817229270935, + "learning_rate": 0.00010304639986560733, + "loss": 2.4736, + "step": 9857 + }, + { + "epoch": 0.7955774352352514, + "grad_norm": 0.628663957118988, + "learning_rate": 0.00010303062025721082, + "loss": 2.5241, + "step": 9858 + }, + { + "epoch": 0.7956581389718343, + "grad_norm": 0.630843460559845, + "learning_rate": 0.00010301484057328333, + "loss": 2.5604, + "step": 9859 + }, + { + "epoch": 0.7957388427084174, + "grad_norm": 0.7457596659660339, + "learning_rate": 0.00010299906081421813, + "loss": 2.5675, + "step": 9860 + }, + { + "epoch": 0.7958195464450004, + "grad_norm": 0.6566091775894165, + "learning_rate": 0.00010298328098040851, + "loss": 2.4918, + "step": 9861 + }, + { + "epoch": 0.7959002501815834, + "grad_norm": 0.657357931137085, + "learning_rate": 0.00010296750107224773, + "loss": 2.5268, + "step": 9862 + }, + { + "epoch": 0.7959809539181664, + "grad_norm": 0.7021927833557129, + "learning_rate": 0.00010295172109012905, + "loss": 2.528, + "step": 9863 + }, + { + "epoch": 0.7960616576547495, + "grad_norm": 0.662053108215332, + "learning_rate": 0.00010293594103444578, + "loss": 2.5483, + "step": 9864 + }, + { + "epoch": 0.7961423613913324, + "grad_norm": 0.776407778263092, + "learning_rate": 0.00010292016090559118, + "loss": 2.6089, + "step": 9865 + }, + { + "epoch": 0.7962230651279154, + "grad_norm": 0.6499512791633606, + "learning_rate": 0.00010290438070395854, + "loss": 2.5609, + "step": 9866 + }, + { + "epoch": 0.7963037688644984, + "grad_norm": 0.6802246570587158, + "learning_rate": 0.00010288860042994113, + "loss": 2.5217, + "step": 9867 + }, + { + "epoch": 0.7963844726010815, + "grad_norm": 0.6371235847473145, + "learning_rate": 0.00010287282008393224, + "loss": 2.4783, + "step": 9868 + }, + { + "epoch": 0.7964651763376644, + "grad_norm": 0.7070169448852539, + "learning_rate": 0.00010285703966632518, + "loss": 2.5006, + "step": 9869 + }, + { + "epoch": 0.7965458800742474, + "grad_norm": 0.657738208770752, + "learning_rate": 0.00010284125917751323, + "loss": 2.551, + "step": 9870 + }, + { + "epoch": 0.7966265838108304, + "grad_norm": 0.7936853170394897, + "learning_rate": 0.00010282547861788964, + "loss": 2.574, + "step": 9871 + }, + { + "epoch": 0.7967072875474135, + "grad_norm": 0.675715982913971, + "learning_rate": 0.00010280969798784779, + "loss": 2.5288, + "step": 9872 + }, + { + "epoch": 0.7967879912839965, + "grad_norm": 0.6980394124984741, + "learning_rate": 0.00010279391728778092, + "loss": 2.5437, + "step": 9873 + }, + { + "epoch": 0.7968686950205794, + "grad_norm": 0.6580469608306885, + "learning_rate": 0.00010277813651808226, + "loss": 2.5574, + "step": 9874 + }, + { + "epoch": 0.7969493987571624, + "grad_norm": 0.6960238218307495, + "learning_rate": 0.00010276235567914522, + "loss": 2.5477, + "step": 9875 + }, + { + "epoch": 0.7970301024937455, + "grad_norm": 0.704140841960907, + "learning_rate": 0.00010274657477136304, + "loss": 2.5099, + "step": 9876 + }, + { + "epoch": 0.7971108062303285, + "grad_norm": 0.7238990068435669, + "learning_rate": 0.00010273079379512906, + "loss": 2.6182, + "step": 9877 + }, + { + "epoch": 0.7971915099669115, + "grad_norm": 0.6527700424194336, + "learning_rate": 0.00010271501275083657, + "loss": 2.5148, + "step": 9878 + }, + { + "epoch": 0.7972722137034944, + "grad_norm": 0.6665365695953369, + "learning_rate": 0.00010269923163887884, + "loss": 2.5624, + "step": 9879 + }, + { + "epoch": 0.7973529174400775, + "grad_norm": 0.7304019927978516, + "learning_rate": 0.0001026834504596492, + "loss": 2.5537, + "step": 9880 + }, + { + "epoch": 0.7974336211766605, + "grad_norm": 0.6645877957344055, + "learning_rate": 0.00010266766921354099, + "loss": 2.5381, + "step": 9881 + }, + { + "epoch": 0.7975143249132435, + "grad_norm": 0.6817314624786377, + "learning_rate": 0.00010265188790094744, + "loss": 2.5399, + "step": 9882 + }, + { + "epoch": 0.7975950286498265, + "grad_norm": 0.7477232217788696, + "learning_rate": 0.00010263610652226194, + "loss": 2.6461, + "step": 9883 + }, + { + "epoch": 0.7976757323864095, + "grad_norm": 0.7087170481681824, + "learning_rate": 0.00010262032507787777, + "loss": 2.5469, + "step": 9884 + }, + { + "epoch": 0.7977564361229925, + "grad_norm": 0.7093435525894165, + "learning_rate": 0.00010260454356818825, + "loss": 2.5606, + "step": 9885 + }, + { + "epoch": 0.7978371398595755, + "grad_norm": 0.6662636399269104, + "learning_rate": 0.00010258876199358672, + "loss": 2.5415, + "step": 9886 + }, + { + "epoch": 0.7979178435961585, + "grad_norm": 0.6829736232757568, + "learning_rate": 0.00010257298035446644, + "loss": 2.5618, + "step": 9887 + }, + { + "epoch": 0.7979985473327416, + "grad_norm": 0.6872264742851257, + "learning_rate": 0.00010255719865122077, + "loss": 2.5629, + "step": 9888 + }, + { + "epoch": 0.7980792510693245, + "grad_norm": 0.6988633871078491, + "learning_rate": 0.00010254141688424303, + "loss": 2.5191, + "step": 9889 + }, + { + "epoch": 0.7981599548059075, + "grad_norm": 0.6787285804748535, + "learning_rate": 0.00010252563505392654, + "loss": 2.5003, + "step": 9890 + }, + { + "epoch": 0.7982406585424905, + "grad_norm": 0.6703466773033142, + "learning_rate": 0.00010250985316066461, + "loss": 2.5442, + "step": 9891 + }, + { + "epoch": 0.7983213622790736, + "grad_norm": 0.6463642120361328, + "learning_rate": 0.0001024940712048506, + "loss": 2.5236, + "step": 9892 + }, + { + "epoch": 0.7984020660156566, + "grad_norm": 0.6835207939147949, + "learning_rate": 0.0001024782891868778, + "loss": 2.5094, + "step": 9893 + }, + { + "epoch": 0.7984827697522395, + "grad_norm": 0.6621001958847046, + "learning_rate": 0.00010246250710713956, + "loss": 2.5456, + "step": 9894 + }, + { + "epoch": 0.7985634734888225, + "grad_norm": 0.6675469875335693, + "learning_rate": 0.0001024467249660292, + "loss": 2.5312, + "step": 9895 + }, + { + "epoch": 0.7986441772254055, + "grad_norm": 0.7357796430587769, + "learning_rate": 0.00010243094276394007, + "loss": 2.5374, + "step": 9896 + }, + { + "epoch": 0.7987248809619886, + "grad_norm": 0.7005879878997803, + "learning_rate": 0.00010241516050126549, + "loss": 2.5667, + "step": 9897 + }, + { + "epoch": 0.7988055846985715, + "grad_norm": 0.669870913028717, + "learning_rate": 0.0001023993781783988, + "loss": 2.533, + "step": 9898 + }, + { + "epoch": 0.7988862884351545, + "grad_norm": 0.7584091424942017, + "learning_rate": 0.00010238359579573333, + "loss": 2.5995, + "step": 9899 + }, + { + "epoch": 0.7989669921717375, + "grad_norm": 0.6931570172309875, + "learning_rate": 0.00010236781335366239, + "loss": 2.5506, + "step": 9900 + }, + { + "epoch": 0.7990476959083206, + "grad_norm": 0.6810948848724365, + "learning_rate": 0.0001023520308525794, + "loss": 2.5048, + "step": 9901 + }, + { + "epoch": 0.7991283996449036, + "grad_norm": 0.6857194900512695, + "learning_rate": 0.00010233624829287765, + "loss": 2.5559, + "step": 9902 + }, + { + "epoch": 0.7992091033814865, + "grad_norm": 0.6685707569122314, + "learning_rate": 0.00010232046567495046, + "loss": 2.5661, + "step": 9903 + }, + { + "epoch": 0.7992898071180695, + "grad_norm": 0.6626694202423096, + "learning_rate": 0.00010230468299919121, + "loss": 2.6293, + "step": 9904 + }, + { + "epoch": 0.7993705108546526, + "grad_norm": 0.6407302021980286, + "learning_rate": 0.00010228890026599323, + "loss": 2.5552, + "step": 9905 + }, + { + "epoch": 0.7994512145912356, + "grad_norm": 0.762235701084137, + "learning_rate": 0.00010227311747574986, + "loss": 2.4904, + "step": 9906 + }, + { + "epoch": 0.7995319183278186, + "grad_norm": 0.703507661819458, + "learning_rate": 0.0001022573346288545, + "loss": 2.5684, + "step": 9907 + }, + { + "epoch": 0.7996126220644015, + "grad_norm": 0.82541823387146, + "learning_rate": 0.00010224155172570043, + "loss": 2.521, + "step": 9908 + }, + { + "epoch": 0.7996933258009846, + "grad_norm": 0.6836804747581482, + "learning_rate": 0.00010222576876668104, + "loss": 2.5364, + "step": 9909 + }, + { + "epoch": 0.7997740295375676, + "grad_norm": 0.7388977408409119, + "learning_rate": 0.00010220998575218966, + "loss": 2.5724, + "step": 9910 + }, + { + "epoch": 0.7998547332741506, + "grad_norm": 0.7380896806716919, + "learning_rate": 0.00010219420268261966, + "loss": 2.5918, + "step": 9911 + }, + { + "epoch": 0.7999354370107336, + "grad_norm": 0.7303522825241089, + "learning_rate": 0.00010217841955836442, + "loss": 2.5432, + "step": 9912 + }, + { + "epoch": 0.8000161407473166, + "grad_norm": 0.6859301924705505, + "learning_rate": 0.00010216263637981727, + "loss": 2.5734, + "step": 9913 + }, + { + "epoch": 0.8000968444838996, + "grad_norm": 0.731910228729248, + "learning_rate": 0.00010214685314737154, + "loss": 2.5227, + "step": 9914 + }, + { + "epoch": 0.8001775482204826, + "grad_norm": 0.7105006575584412, + "learning_rate": 0.00010213106986142062, + "loss": 2.5335, + "step": 9915 + }, + { + "epoch": 0.8002582519570656, + "grad_norm": 0.7337056994438171, + "learning_rate": 0.00010211528652235786, + "loss": 2.6204, + "step": 9916 + }, + { + "epoch": 0.8003389556936487, + "grad_norm": 0.7350614666938782, + "learning_rate": 0.00010209950313057668, + "loss": 2.5264, + "step": 9917 + }, + { + "epoch": 0.8004196594302316, + "grad_norm": 0.6411921977996826, + "learning_rate": 0.00010208371968647036, + "loss": 2.4642, + "step": 9918 + }, + { + "epoch": 0.8005003631668146, + "grad_norm": 0.7601611018180847, + "learning_rate": 0.00010206793619043229, + "loss": 2.6249, + "step": 9919 + }, + { + "epoch": 0.8005810669033976, + "grad_norm": 0.7086012363433838, + "learning_rate": 0.00010205215264285585, + "loss": 2.5508, + "step": 9920 + }, + { + "epoch": 0.8006617706399807, + "grad_norm": 0.7267128825187683, + "learning_rate": 0.00010203636904413443, + "loss": 2.5109, + "step": 9921 + }, + { + "epoch": 0.8007424743765637, + "grad_norm": 0.7606067657470703, + "learning_rate": 0.00010202058539466132, + "loss": 2.5172, + "step": 9922 + }, + { + "epoch": 0.8008231781131466, + "grad_norm": 0.7610498666763306, + "learning_rate": 0.00010200480169483, + "loss": 2.5085, + "step": 9923 + }, + { + "epoch": 0.8009038818497296, + "grad_norm": 0.7604225873947144, + "learning_rate": 0.00010198901794503373, + "loss": 2.5615, + "step": 9924 + }, + { + "epoch": 0.8009845855863127, + "grad_norm": 0.739532470703125, + "learning_rate": 0.00010197323414566596, + "loss": 2.5574, + "step": 9925 + }, + { + "epoch": 0.8010652893228957, + "grad_norm": 0.6913303136825562, + "learning_rate": 0.00010195745029712003, + "loss": 2.5403, + "step": 9926 + }, + { + "epoch": 0.8011459930594786, + "grad_norm": 0.6963592767715454, + "learning_rate": 0.0001019416663997893, + "loss": 2.5615, + "step": 9927 + }, + { + "epoch": 0.8012266967960616, + "grad_norm": 0.681481122970581, + "learning_rate": 0.0001019258824540672, + "loss": 2.5125, + "step": 9928 + }, + { + "epoch": 0.8013074005326447, + "grad_norm": 0.7192744016647339, + "learning_rate": 0.00010191009846034709, + "loss": 2.5952, + "step": 9929 + }, + { + "epoch": 0.8013881042692277, + "grad_norm": 0.7030046582221985, + "learning_rate": 0.00010189431441902228, + "loss": 2.5445, + "step": 9930 + }, + { + "epoch": 0.8014688080058107, + "grad_norm": 0.6180598139762878, + "learning_rate": 0.00010187853033048622, + "loss": 2.4902, + "step": 9931 + }, + { + "epoch": 0.8015495117423936, + "grad_norm": 0.7479971051216125, + "learning_rate": 0.0001018627461951323, + "loss": 2.5703, + "step": 9932 + }, + { + "epoch": 0.8016302154789767, + "grad_norm": 0.7339857220649719, + "learning_rate": 0.00010184696201335387, + "loss": 2.5744, + "step": 9933 + }, + { + "epoch": 0.8017109192155597, + "grad_norm": 0.6741397380828857, + "learning_rate": 0.00010183117778554432, + "loss": 2.5777, + "step": 9934 + }, + { + "epoch": 0.8017916229521427, + "grad_norm": 0.6731706857681274, + "learning_rate": 0.00010181539351209699, + "loss": 2.5438, + "step": 9935 + }, + { + "epoch": 0.8018723266887257, + "grad_norm": 0.6929418444633484, + "learning_rate": 0.00010179960919340535, + "loss": 2.5308, + "step": 9936 + }, + { + "epoch": 0.8019530304253087, + "grad_norm": 0.7383175492286682, + "learning_rate": 0.00010178382482986271, + "loss": 2.5623, + "step": 9937 + }, + { + "epoch": 0.8020337341618917, + "grad_norm": 0.6872193217277527, + "learning_rate": 0.00010176804042186252, + "loss": 2.5271, + "step": 9938 + }, + { + "epoch": 0.8021144378984747, + "grad_norm": 0.7354295253753662, + "learning_rate": 0.00010175225596979816, + "loss": 2.5122, + "step": 9939 + }, + { + "epoch": 0.8021951416350577, + "grad_norm": 0.7589237689971924, + "learning_rate": 0.00010173647147406297, + "loss": 2.5529, + "step": 9940 + }, + { + "epoch": 0.8022758453716408, + "grad_norm": 0.6998353004455566, + "learning_rate": 0.00010172068693505037, + "loss": 2.4683, + "step": 9941 + }, + { + "epoch": 0.8023565491082237, + "grad_norm": 0.6816055178642273, + "learning_rate": 0.00010170490235315377, + "loss": 2.567, + "step": 9942 + }, + { + "epoch": 0.8024372528448067, + "grad_norm": 0.7188318371772766, + "learning_rate": 0.00010168911772876652, + "loss": 2.5631, + "step": 9943 + }, + { + "epoch": 0.8025179565813897, + "grad_norm": 0.6925922632217407, + "learning_rate": 0.00010167333306228209, + "loss": 2.4872, + "step": 9944 + }, + { + "epoch": 0.8025986603179727, + "grad_norm": 0.7081493735313416, + "learning_rate": 0.00010165754835409377, + "loss": 2.5482, + "step": 9945 + }, + { + "epoch": 0.8026793640545558, + "grad_norm": 0.6838935613632202, + "learning_rate": 0.00010164176360459505, + "loss": 2.541, + "step": 9946 + }, + { + "epoch": 0.8027600677911387, + "grad_norm": 0.6959214210510254, + "learning_rate": 0.00010162597881417928, + "loss": 2.4574, + "step": 9947 + }, + { + "epoch": 0.8028407715277217, + "grad_norm": 0.693004310131073, + "learning_rate": 0.00010161019398323986, + "loss": 2.5553, + "step": 9948 + }, + { + "epoch": 0.8029214752643047, + "grad_norm": 0.6683690547943115, + "learning_rate": 0.00010159440911217022, + "loss": 2.5501, + "step": 9949 + }, + { + "epoch": 0.8030021790008878, + "grad_norm": 0.6797001361846924, + "learning_rate": 0.0001015786242013637, + "loss": 2.5731, + "step": 9950 + }, + { + "epoch": 0.8030828827374707, + "grad_norm": 0.6621012091636658, + "learning_rate": 0.00010156283925121375, + "loss": 2.5278, + "step": 9951 + }, + { + "epoch": 0.8031635864740537, + "grad_norm": 0.7024650573730469, + "learning_rate": 0.00010154705426211377, + "loss": 2.5939, + "step": 9952 + }, + { + "epoch": 0.8032442902106367, + "grad_norm": 0.6756548285484314, + "learning_rate": 0.00010153126923445714, + "loss": 2.5797, + "step": 9953 + }, + { + "epoch": 0.8033249939472198, + "grad_norm": 0.6560662984848022, + "learning_rate": 0.00010151548416863732, + "loss": 2.5358, + "step": 9954 + }, + { + "epoch": 0.8034056976838028, + "grad_norm": 0.7172456979751587, + "learning_rate": 0.00010149969906504766, + "loss": 2.5054, + "step": 9955 + }, + { + "epoch": 0.8034864014203857, + "grad_norm": 0.6379461288452148, + "learning_rate": 0.00010148391392408152, + "loss": 2.5341, + "step": 9956 + }, + { + "epoch": 0.8035671051569687, + "grad_norm": 0.6553892493247986, + "learning_rate": 0.00010146812874613243, + "loss": 2.5618, + "step": 9957 + }, + { + "epoch": 0.8036478088935518, + "grad_norm": 0.6940072178840637, + "learning_rate": 0.00010145234353159372, + "loss": 2.5686, + "step": 9958 + }, + { + "epoch": 0.8037285126301348, + "grad_norm": 0.6641896963119507, + "learning_rate": 0.00010143655828085878, + "loss": 2.5188, + "step": 9959 + }, + { + "epoch": 0.8038092163667178, + "grad_norm": 0.6622887253761292, + "learning_rate": 0.00010142077299432111, + "loss": 2.54, + "step": 9960 + }, + { + "epoch": 0.8038899201033007, + "grad_norm": 0.7216808795928955, + "learning_rate": 0.000101404987672374, + "loss": 2.5775, + "step": 9961 + }, + { + "epoch": 0.8039706238398838, + "grad_norm": 0.6544952988624573, + "learning_rate": 0.00010138920231541095, + "loss": 2.6066, + "step": 9962 + }, + { + "epoch": 0.8040513275764668, + "grad_norm": 0.6869354248046875, + "learning_rate": 0.00010137341692382539, + "loss": 2.5157, + "step": 9963 + }, + { + "epoch": 0.8041320313130498, + "grad_norm": 0.6731898784637451, + "learning_rate": 0.00010135763149801063, + "loss": 2.4369, + "step": 9964 + }, + { + "epoch": 0.8042127350496328, + "grad_norm": 0.6943373084068298, + "learning_rate": 0.00010134184603836017, + "loss": 2.5529, + "step": 9965 + }, + { + "epoch": 0.8042934387862158, + "grad_norm": 0.729928195476532, + "learning_rate": 0.00010132606054526739, + "loss": 2.5814, + "step": 9966 + }, + { + "epoch": 0.8043741425227988, + "grad_norm": 0.6491130590438843, + "learning_rate": 0.00010131027501912571, + "loss": 2.5246, + "step": 9967 + }, + { + "epoch": 0.8044548462593818, + "grad_norm": 0.747756838798523, + "learning_rate": 0.00010129448946032857, + "loss": 2.513, + "step": 9968 + }, + { + "epoch": 0.8045355499959648, + "grad_norm": 0.6449645757675171, + "learning_rate": 0.00010127870386926935, + "loss": 2.5232, + "step": 9969 + }, + { + "epoch": 0.8046162537325479, + "grad_norm": 0.6425037980079651, + "learning_rate": 0.0001012629182463415, + "loss": 2.5065, + "step": 9970 + }, + { + "epoch": 0.8046969574691308, + "grad_norm": 0.7340624332427979, + "learning_rate": 0.00010124713259193843, + "loss": 2.5325, + "step": 9971 + }, + { + "epoch": 0.8047776612057138, + "grad_norm": 0.7308940291404724, + "learning_rate": 0.00010123134690645352, + "loss": 2.5717, + "step": 9972 + }, + { + "epoch": 0.8048583649422968, + "grad_norm": 0.7128338813781738, + "learning_rate": 0.00010121556119028028, + "loss": 2.5548, + "step": 9973 + }, + { + "epoch": 0.8049390686788799, + "grad_norm": 0.7027677893638611, + "learning_rate": 0.00010119977544381207, + "loss": 2.5311, + "step": 9974 + }, + { + "epoch": 0.8050197724154629, + "grad_norm": 0.7022054195404053, + "learning_rate": 0.00010118398966744229, + "loss": 2.5177, + "step": 9975 + }, + { + "epoch": 0.8051004761520458, + "grad_norm": 0.7382696270942688, + "learning_rate": 0.00010116820386156441, + "loss": 2.532, + "step": 9976 + }, + { + "epoch": 0.8051811798886288, + "grad_norm": 0.6968613862991333, + "learning_rate": 0.00010115241802657181, + "loss": 2.536, + "step": 9977 + }, + { + "epoch": 0.8052618836252119, + "grad_norm": 0.8277899026870728, + "learning_rate": 0.00010113663216285798, + "loss": 2.5963, + "step": 9978 + }, + { + "epoch": 0.8053425873617949, + "grad_norm": 0.677707314491272, + "learning_rate": 0.00010112084627081629, + "loss": 2.5041, + "step": 9979 + }, + { + "epoch": 0.8054232910983778, + "grad_norm": 0.6943314075469971, + "learning_rate": 0.00010110506035084017, + "loss": 2.4776, + "step": 9980 + }, + { + "epoch": 0.8055039948349608, + "grad_norm": 0.6948177218437195, + "learning_rate": 0.00010108927440332306, + "loss": 2.5306, + "step": 9981 + }, + { + "epoch": 0.8055846985715439, + "grad_norm": 0.6873918771743774, + "learning_rate": 0.0001010734884286584, + "loss": 2.5783, + "step": 9982 + }, + { + "epoch": 0.8056654023081269, + "grad_norm": 0.6370649933815002, + "learning_rate": 0.00010105770242723958, + "loss": 2.5584, + "step": 9983 + }, + { + "epoch": 0.8057461060447099, + "grad_norm": 0.7594422698020935, + "learning_rate": 0.00010104191639946008, + "loss": 2.543, + "step": 9984 + }, + { + "epoch": 0.8058268097812928, + "grad_norm": 0.697380542755127, + "learning_rate": 0.00010102613034571327, + "loss": 2.5295, + "step": 9985 + }, + { + "epoch": 0.8059075135178759, + "grad_norm": 0.6597251892089844, + "learning_rate": 0.00010101034426639264, + "loss": 2.5917, + "step": 9986 + }, + { + "epoch": 0.8059882172544589, + "grad_norm": 0.6583479046821594, + "learning_rate": 0.00010099455816189156, + "loss": 2.6206, + "step": 9987 + }, + { + "epoch": 0.8060689209910419, + "grad_norm": 0.6603943705558777, + "learning_rate": 0.00010097877203260349, + "loss": 2.5223, + "step": 9988 + }, + { + "epoch": 0.8061496247276249, + "grad_norm": 0.716454267501831, + "learning_rate": 0.00010096298587892188, + "loss": 2.5572, + "step": 9989 + }, + { + "epoch": 0.806230328464208, + "grad_norm": 0.6511488556861877, + "learning_rate": 0.00010094719970124016, + "loss": 2.5815, + "step": 9990 + }, + { + "epoch": 0.8063110322007909, + "grad_norm": 0.6969261169433594, + "learning_rate": 0.00010093141349995173, + "loss": 2.5902, + "step": 9991 + }, + { + "epoch": 0.8063917359373739, + "grad_norm": 0.7012695074081421, + "learning_rate": 0.00010091562727545001, + "loss": 2.5134, + "step": 9992 + }, + { + "epoch": 0.8064724396739569, + "grad_norm": 0.6368406414985657, + "learning_rate": 0.00010089984102812848, + "loss": 2.568, + "step": 9993 + }, + { + "epoch": 0.80655314341054, + "grad_norm": 0.6552153825759888, + "learning_rate": 0.00010088405475838059, + "loss": 2.5101, + "step": 9994 + }, + { + "epoch": 0.8066338471471229, + "grad_norm": 0.6949633359909058, + "learning_rate": 0.00010086826846659974, + "loss": 2.5427, + "step": 9995 + }, + { + "epoch": 0.8067145508837059, + "grad_norm": 0.6593093872070312, + "learning_rate": 0.00010085248215317935, + "loss": 2.5551, + "step": 9996 + }, + { + "epoch": 0.8067952546202889, + "grad_norm": 0.6963745355606079, + "learning_rate": 0.00010083669581851287, + "loss": 2.4956, + "step": 9997 + }, + { + "epoch": 0.8068759583568719, + "grad_norm": 0.7093523144721985, + "learning_rate": 0.00010082090946299377, + "loss": 2.5876, + "step": 9998 + }, + { + "epoch": 0.806956662093455, + "grad_norm": 0.6796671152114868, + "learning_rate": 0.00010080512308701544, + "loss": 2.5302, + "step": 9999 + }, + { + "epoch": 0.8070373658300379, + "grad_norm": 0.7170542478561401, + "learning_rate": 0.00010078933669097135, + "loss": 2.5886, + "step": 10000 + }, + { + "epoch": 0.8070373658300379, + "eval_loss": 2.4734926223754883, + "eval_runtime": 788.2594, + "eval_samples_per_second": 3.324, + "eval_steps_per_second": 0.554, + "step": 10000 + }, + { + "epoch": 0.8071180695666209, + "grad_norm": 0.6566126346588135, + "learning_rate": 0.0001007735502752549, + "loss": 2.4441, + "step": 10001 + }, + { + "epoch": 0.8071987733032039, + "grad_norm": 0.6739515662193298, + "learning_rate": 0.00010075776384025957, + "loss": 2.5767, + "step": 10002 + }, + { + "epoch": 0.807279477039787, + "grad_norm": 0.6334208846092224, + "learning_rate": 0.00010074197738637881, + "loss": 2.5321, + "step": 10003 + }, + { + "epoch": 0.80736018077637, + "grad_norm": 0.6764520406723022, + "learning_rate": 0.000100726190914006, + "loss": 2.5144, + "step": 10004 + }, + { + "epoch": 0.8074408845129529, + "grad_norm": 0.7090082764625549, + "learning_rate": 0.00010071040442353464, + "loss": 2.5626, + "step": 10005 + }, + { + "epoch": 0.8075215882495359, + "grad_norm": 0.6915304064750671, + "learning_rate": 0.00010069461791535814, + "loss": 2.5261, + "step": 10006 + }, + { + "epoch": 0.807602291986119, + "grad_norm": 0.6685747504234314, + "learning_rate": 0.00010067883138986991, + "loss": 2.492, + "step": 10007 + }, + { + "epoch": 0.807682995722702, + "grad_norm": 0.7179074883460999, + "learning_rate": 0.00010066304484746347, + "loss": 2.4601, + "step": 10008 + }, + { + "epoch": 0.807763699459285, + "grad_norm": 0.7032761573791504, + "learning_rate": 0.00010064725828853219, + "loss": 2.578, + "step": 10009 + }, + { + "epoch": 0.8078444031958679, + "grad_norm": 0.710322916507721, + "learning_rate": 0.00010063147171346959, + "loss": 2.5514, + "step": 10010 + }, + { + "epoch": 0.807925106932451, + "grad_norm": 0.6552841067314148, + "learning_rate": 0.00010061568512266903, + "loss": 2.5474, + "step": 10011 + }, + { + "epoch": 0.808005810669034, + "grad_norm": 0.6862452626228333, + "learning_rate": 0.00010059989851652398, + "loss": 2.5772, + "step": 10012 + }, + { + "epoch": 0.808086514405617, + "grad_norm": 0.7123851180076599, + "learning_rate": 0.00010058411189542788, + "loss": 2.4936, + "step": 10013 + }, + { + "epoch": 0.8081672181421999, + "grad_norm": 0.6889944672584534, + "learning_rate": 0.00010056832525977422, + "loss": 2.5041, + "step": 10014 + }, + { + "epoch": 0.808247921878783, + "grad_norm": 0.6986924409866333, + "learning_rate": 0.0001005525386099564, + "loss": 2.5591, + "step": 10015 + }, + { + "epoch": 0.808328625615366, + "grad_norm": 0.6935306787490845, + "learning_rate": 0.00010053675194636787, + "loss": 2.5423, + "step": 10016 + }, + { + "epoch": 0.808409329351949, + "grad_norm": 0.6751969456672668, + "learning_rate": 0.00010052096526940207, + "loss": 2.5666, + "step": 10017 + }, + { + "epoch": 0.808490033088532, + "grad_norm": 0.676909327507019, + "learning_rate": 0.00010050517857945243, + "loss": 2.5394, + "step": 10018 + }, + { + "epoch": 0.808570736825115, + "grad_norm": 0.7439377307891846, + "learning_rate": 0.00010048939187691246, + "loss": 2.5011, + "step": 10019 + }, + { + "epoch": 0.808651440561698, + "grad_norm": 0.6594791412353516, + "learning_rate": 0.00010047360516217554, + "loss": 2.5159, + "step": 10020 + }, + { + "epoch": 0.808732144298281, + "grad_norm": 0.7013304233551025, + "learning_rate": 0.00010045781843563517, + "loss": 2.5439, + "step": 10021 + }, + { + "epoch": 0.808812848034864, + "grad_norm": 0.7537491917610168, + "learning_rate": 0.00010044203169768476, + "loss": 2.5837, + "step": 10022 + }, + { + "epoch": 0.8088935517714471, + "grad_norm": 0.7273866534233093, + "learning_rate": 0.00010042624494871773, + "loss": 2.5546, + "step": 10023 + }, + { + "epoch": 0.80897425550803, + "grad_norm": 0.6716369986534119, + "learning_rate": 0.0001004104581891276, + "loss": 2.5264, + "step": 10024 + }, + { + "epoch": 0.809054959244613, + "grad_norm": 0.7544769644737244, + "learning_rate": 0.00010039467141930777, + "loss": 2.5502, + "step": 10025 + }, + { + "epoch": 0.809135662981196, + "grad_norm": 0.8713179230690002, + "learning_rate": 0.0001003788846396517, + "loss": 2.5178, + "step": 10026 + }, + { + "epoch": 0.8092163667177791, + "grad_norm": 0.6704887747764587, + "learning_rate": 0.00010036309785055283, + "loss": 2.5136, + "step": 10027 + }, + { + "epoch": 0.809297070454362, + "grad_norm": 0.7308552861213684, + "learning_rate": 0.00010034731105240458, + "loss": 2.4781, + "step": 10028 + }, + { + "epoch": 0.809377774190945, + "grad_norm": 0.7214144468307495, + "learning_rate": 0.00010033152424560049, + "loss": 2.5946, + "step": 10029 + }, + { + "epoch": 0.809458477927528, + "grad_norm": 0.6946821808815002, + "learning_rate": 0.00010031573743053393, + "loss": 2.4937, + "step": 10030 + }, + { + "epoch": 0.8095391816641111, + "grad_norm": 0.7348416447639465, + "learning_rate": 0.00010029995060759833, + "loss": 2.5959, + "step": 10031 + }, + { + "epoch": 0.8096198854006941, + "grad_norm": 0.7482579350471497, + "learning_rate": 0.00010028416377718721, + "loss": 2.6, + "step": 10032 + }, + { + "epoch": 0.809700589137277, + "grad_norm": 0.7114939093589783, + "learning_rate": 0.00010026837693969397, + "loss": 2.5376, + "step": 10033 + }, + { + "epoch": 0.80978129287386, + "grad_norm": 0.6559228897094727, + "learning_rate": 0.00010025259009551209, + "loss": 2.4961, + "step": 10034 + }, + { + "epoch": 0.8098619966104431, + "grad_norm": 0.7494906187057495, + "learning_rate": 0.00010023680324503501, + "loss": 2.5723, + "step": 10035 + }, + { + "epoch": 0.8099427003470261, + "grad_norm": 0.7207093834877014, + "learning_rate": 0.00010022101638865618, + "loss": 2.5523, + "step": 10036 + }, + { + "epoch": 0.8100234040836091, + "grad_norm": 0.6730504035949707, + "learning_rate": 0.00010020522952676903, + "loss": 2.5135, + "step": 10037 + }, + { + "epoch": 0.810104107820192, + "grad_norm": 0.6805168390274048, + "learning_rate": 0.000100189442659767, + "loss": 2.5598, + "step": 10038 + }, + { + "epoch": 0.8101848115567751, + "grad_norm": 0.6639137268066406, + "learning_rate": 0.00010017365578804358, + "loss": 2.5152, + "step": 10039 + }, + { + "epoch": 0.8102655152933581, + "grad_norm": 0.6604194641113281, + "learning_rate": 0.00010015786891199221, + "loss": 2.5302, + "step": 10040 + }, + { + "epoch": 0.8103462190299411, + "grad_norm": 0.7664934992790222, + "learning_rate": 0.00010014208203200634, + "loss": 2.5437, + "step": 10041 + }, + { + "epoch": 0.8104269227665241, + "grad_norm": 0.7404079437255859, + "learning_rate": 0.00010012629514847942, + "loss": 2.6559, + "step": 10042 + }, + { + "epoch": 0.8105076265031071, + "grad_norm": 0.694006085395813, + "learning_rate": 0.00010011050826180488, + "loss": 2.5571, + "step": 10043 + }, + { + "epoch": 0.8105883302396901, + "grad_norm": 0.7007058262825012, + "learning_rate": 0.00010009472137237616, + "loss": 2.5639, + "step": 10044 + }, + { + "epoch": 0.8106690339762731, + "grad_norm": 0.7331913113594055, + "learning_rate": 0.00010007893448058678, + "loss": 2.5499, + "step": 10045 + }, + { + "epoch": 0.8107497377128561, + "grad_norm": 0.7636487483978271, + "learning_rate": 0.00010006314758683015, + "loss": 2.6068, + "step": 10046 + }, + { + "epoch": 0.810830441449439, + "grad_norm": 0.6505223512649536, + "learning_rate": 0.0001000473606914997, + "loss": 2.5313, + "step": 10047 + }, + { + "epoch": 0.8109111451860221, + "grad_norm": 0.6425966620445251, + "learning_rate": 0.00010003157379498886, + "loss": 2.5998, + "step": 10048 + }, + { + "epoch": 0.8109918489226051, + "grad_norm": 0.7163281440734863, + "learning_rate": 0.00010001578689769116, + "loss": 2.5493, + "step": 10049 + }, + { + "epoch": 0.8110725526591881, + "grad_norm": 0.7345306873321533, + "learning_rate": 0.0001, + "loss": 2.5609, + "step": 10050 + }, + { + "epoch": 0.8111532563957711, + "grad_norm": 0.6808427572250366, + "learning_rate": 9.998421310230884e-05, + "loss": 2.4823, + "step": 10051 + }, + { + "epoch": 0.8112339601323542, + "grad_norm": 0.7456082105636597, + "learning_rate": 9.996842620501115e-05, + "loss": 2.4782, + "step": 10052 + }, + { + "epoch": 0.8113146638689371, + "grad_norm": 0.7061728239059448, + "learning_rate": 9.995263930850034e-05, + "loss": 2.4906, + "step": 10053 + }, + { + "epoch": 0.8113953676055201, + "grad_norm": 0.691663920879364, + "learning_rate": 9.993685241316986e-05, + "loss": 2.5842, + "step": 10054 + }, + { + "epoch": 0.8114760713421031, + "grad_norm": 0.6899400353431702, + "learning_rate": 9.992106551941325e-05, + "loss": 2.5628, + "step": 10055 + }, + { + "epoch": 0.8115567750786862, + "grad_norm": 0.6909289360046387, + "learning_rate": 9.990527862762385e-05, + "loss": 2.5173, + "step": 10056 + }, + { + "epoch": 0.8116374788152692, + "grad_norm": 0.6507968306541443, + "learning_rate": 9.988949173819514e-05, + "loss": 2.5763, + "step": 10057 + }, + { + "epoch": 0.8117181825518521, + "grad_norm": 0.6972371339797974, + "learning_rate": 9.98737048515206e-05, + "loss": 2.604, + "step": 10058 + }, + { + "epoch": 0.8117988862884351, + "grad_norm": 0.6500107049942017, + "learning_rate": 9.985791796799368e-05, + "loss": 2.509, + "step": 10059 + }, + { + "epoch": 0.8118795900250182, + "grad_norm": 0.704501211643219, + "learning_rate": 9.98421310880078e-05, + "loss": 2.5773, + "step": 10060 + }, + { + "epoch": 0.8119602937616012, + "grad_norm": 0.7037203311920166, + "learning_rate": 9.982634421195641e-05, + "loss": 2.5968, + "step": 10061 + }, + { + "epoch": 0.8120409974981841, + "grad_norm": 0.7161232829093933, + "learning_rate": 9.981055734023304e-05, + "loss": 2.5373, + "step": 10062 + }, + { + "epoch": 0.8121217012347671, + "grad_norm": 0.6602928638458252, + "learning_rate": 9.979477047323099e-05, + "loss": 2.5851, + "step": 10063 + }, + { + "epoch": 0.8122024049713502, + "grad_norm": 0.6685947775840759, + "learning_rate": 9.977898361134383e-05, + "loss": 2.5543, + "step": 10064 + }, + { + "epoch": 0.8122831087079332, + "grad_norm": 0.6772760152816772, + "learning_rate": 9.976319675496502e-05, + "loss": 2.5355, + "step": 10065 + }, + { + "epoch": 0.8123638124445162, + "grad_norm": 0.6140885949134827, + "learning_rate": 9.974740990448792e-05, + "loss": 2.489, + "step": 10066 + }, + { + "epoch": 0.8124445161810991, + "grad_norm": 0.6597142219543457, + "learning_rate": 9.973162306030604e-05, + "loss": 2.5619, + "step": 10067 + }, + { + "epoch": 0.8125252199176822, + "grad_norm": 0.6768592000007629, + "learning_rate": 9.971583622281281e-05, + "loss": 2.5107, + "step": 10068 + }, + { + "epoch": 0.8126059236542652, + "grad_norm": 0.682296633720398, + "learning_rate": 9.970004939240168e-05, + "loss": 2.5003, + "step": 10069 + }, + { + "epoch": 0.8126866273908482, + "grad_norm": 0.7356325387954712, + "learning_rate": 9.96842625694661e-05, + "loss": 2.5864, + "step": 10070 + }, + { + "epoch": 0.8127673311274312, + "grad_norm": 0.6818091869354248, + "learning_rate": 9.966847575439956e-05, + "loss": 2.5375, + "step": 10071 + }, + { + "epoch": 0.8128480348640142, + "grad_norm": 0.6954368352890015, + "learning_rate": 9.965268894759543e-05, + "loss": 2.5314, + "step": 10072 + }, + { + "epoch": 0.8129287386005972, + "grad_norm": 0.6759306192398071, + "learning_rate": 9.963690214944721e-05, + "loss": 2.5881, + "step": 10073 + }, + { + "epoch": 0.8130094423371802, + "grad_norm": 0.6546545624732971, + "learning_rate": 9.962111536034832e-05, + "loss": 2.5264, + "step": 10074 + }, + { + "epoch": 0.8130901460737632, + "grad_norm": 0.6709586977958679, + "learning_rate": 9.960532858069226e-05, + "loss": 2.5906, + "step": 10075 + }, + { + "epoch": 0.8131708498103463, + "grad_norm": 0.7310851812362671, + "learning_rate": 9.958954181087241e-05, + "loss": 2.5134, + "step": 10076 + }, + { + "epoch": 0.8132515535469292, + "grad_norm": 0.6793027520179749, + "learning_rate": 9.957375505128227e-05, + "loss": 2.5387, + "step": 10077 + }, + { + "epoch": 0.8133322572835122, + "grad_norm": 0.6965875029563904, + "learning_rate": 9.955796830231528e-05, + "loss": 2.5649, + "step": 10078 + }, + { + "epoch": 0.8134129610200952, + "grad_norm": 0.6597574353218079, + "learning_rate": 9.954218156436485e-05, + "loss": 2.5281, + "step": 10079 + }, + { + "epoch": 0.8134936647566783, + "grad_norm": 0.7911555171012878, + "learning_rate": 9.952639483782445e-05, + "loss": 2.535, + "step": 10080 + }, + { + "epoch": 0.8135743684932613, + "grad_norm": 0.7405688762664795, + "learning_rate": 9.951060812308757e-05, + "loss": 2.5303, + "step": 10081 + }, + { + "epoch": 0.8136550722298442, + "grad_norm": 0.6961480379104614, + "learning_rate": 9.949482142054758e-05, + "loss": 2.4959, + "step": 10082 + }, + { + "epoch": 0.8137357759664272, + "grad_norm": 0.6761718392372131, + "learning_rate": 9.947903473059797e-05, + "loss": 2.5591, + "step": 10083 + }, + { + "epoch": 0.8138164797030103, + "grad_norm": 0.7383104562759399, + "learning_rate": 9.946324805363218e-05, + "loss": 2.5848, + "step": 10084 + }, + { + "epoch": 0.8138971834395933, + "grad_norm": 0.6495873928070068, + "learning_rate": 9.944746139004364e-05, + "loss": 2.4972, + "step": 10085 + }, + { + "epoch": 0.8139778871761763, + "grad_norm": 0.7247152328491211, + "learning_rate": 9.94316747402258e-05, + "loss": 2.5361, + "step": 10086 + }, + { + "epoch": 0.8140585909127592, + "grad_norm": 0.6965751051902771, + "learning_rate": 9.941588810457215e-05, + "loss": 2.4997, + "step": 10087 + }, + { + "epoch": 0.8141392946493423, + "grad_norm": 0.7138223648071289, + "learning_rate": 9.940010148347603e-05, + "loss": 2.5226, + "step": 10088 + }, + { + "epoch": 0.8142199983859253, + "grad_norm": 0.6571210622787476, + "learning_rate": 9.938431487733099e-05, + "loss": 2.5388, + "step": 10089 + }, + { + "epoch": 0.8143007021225083, + "grad_norm": 0.6721277832984924, + "learning_rate": 9.936852828653042e-05, + "loss": 2.5219, + "step": 10090 + }, + { + "epoch": 0.8143814058590912, + "grad_norm": 0.647520124912262, + "learning_rate": 9.935274171146782e-05, + "loss": 2.6199, + "step": 10091 + }, + { + "epoch": 0.8144621095956743, + "grad_norm": 0.6892204284667969, + "learning_rate": 9.933695515253654e-05, + "loss": 2.5132, + "step": 10092 + }, + { + "epoch": 0.8145428133322573, + "grad_norm": 0.6979050636291504, + "learning_rate": 9.932116861013008e-05, + "loss": 2.5148, + "step": 10093 + }, + { + "epoch": 0.8146235170688403, + "grad_norm": 0.6682664752006531, + "learning_rate": 9.930538208464189e-05, + "loss": 2.5795, + "step": 10094 + }, + { + "epoch": 0.8147042208054233, + "grad_norm": 0.734121561050415, + "learning_rate": 9.928959557646537e-05, + "loss": 2.5469, + "step": 10095 + }, + { + "epoch": 0.8147849245420064, + "grad_norm": 0.6669620275497437, + "learning_rate": 9.9273809085994e-05, + "loss": 2.5277, + "step": 10096 + }, + { + "epoch": 0.8148656282785893, + "grad_norm": 0.6750600934028625, + "learning_rate": 9.925802261362124e-05, + "loss": 2.5869, + "step": 10097 + }, + { + "epoch": 0.8149463320151723, + "grad_norm": 0.6813061237335205, + "learning_rate": 9.924223615974044e-05, + "loss": 2.585, + "step": 10098 + }, + { + "epoch": 0.8150270357517553, + "grad_norm": 0.6775497794151306, + "learning_rate": 9.92264497247451e-05, + "loss": 2.5353, + "step": 10099 + }, + { + "epoch": 0.8151077394883383, + "grad_norm": 0.6877530813217163, + "learning_rate": 9.92106633090287e-05, + "loss": 2.5349, + "step": 10100 + }, + { + "epoch": 0.8151884432249213, + "grad_norm": 0.6984169483184814, + "learning_rate": 9.91948769129846e-05, + "loss": 2.5986, + "step": 10101 + }, + { + "epoch": 0.8152691469615043, + "grad_norm": 0.7144806981086731, + "learning_rate": 9.917909053700626e-05, + "loss": 2.5797, + "step": 10102 + }, + { + "epoch": 0.8153498506980873, + "grad_norm": 0.6494203209877014, + "learning_rate": 9.916330418148715e-05, + "loss": 2.5035, + "step": 10103 + }, + { + "epoch": 0.8154305544346703, + "grad_norm": 0.6669752597808838, + "learning_rate": 9.914751784682069e-05, + "loss": 2.5489, + "step": 10104 + }, + { + "epoch": 0.8155112581712534, + "grad_norm": 0.6557981371879578, + "learning_rate": 9.913173153340029e-05, + "loss": 2.5266, + "step": 10105 + }, + { + "epoch": 0.8155919619078363, + "grad_norm": 0.6633948087692261, + "learning_rate": 9.911594524161941e-05, + "loss": 2.5263, + "step": 10106 + }, + { + "epoch": 0.8156726656444193, + "grad_norm": 0.7191522717475891, + "learning_rate": 9.910015897187154e-05, + "loss": 2.5625, + "step": 10107 + }, + { + "epoch": 0.8157533693810023, + "grad_norm": 0.7089062929153442, + "learning_rate": 9.908437272455001e-05, + "loss": 2.5644, + "step": 10108 + }, + { + "epoch": 0.8158340731175854, + "grad_norm": 0.7662761211395264, + "learning_rate": 9.906858650004831e-05, + "loss": 2.5875, + "step": 10109 + }, + { + "epoch": 0.8159147768541684, + "grad_norm": 0.6658861041069031, + "learning_rate": 9.905280029875988e-05, + "loss": 2.5818, + "step": 10110 + }, + { + "epoch": 0.8159954805907513, + "grad_norm": 0.7229514718055725, + "learning_rate": 9.903701412107815e-05, + "loss": 2.5421, + "step": 10111 + }, + { + "epoch": 0.8160761843273343, + "grad_norm": 0.7295149564743042, + "learning_rate": 9.902122796739652e-05, + "loss": 2.5298, + "step": 10112 + }, + { + "epoch": 0.8161568880639174, + "grad_norm": 0.6805420517921448, + "learning_rate": 9.900544183810849e-05, + "loss": 2.6693, + "step": 10113 + }, + { + "epoch": 0.8162375918005004, + "grad_norm": 0.6560602188110352, + "learning_rate": 9.898965573360738e-05, + "loss": 2.5445, + "step": 10114 + }, + { + "epoch": 0.8163182955370833, + "grad_norm": 0.690396785736084, + "learning_rate": 9.897386965428674e-05, + "loss": 2.5281, + "step": 10115 + }, + { + "epoch": 0.8163989992736663, + "grad_norm": 0.6905054450035095, + "learning_rate": 9.895808360053998e-05, + "loss": 2.5406, + "step": 10116 + }, + { + "epoch": 0.8164797030102494, + "grad_norm": 0.6905301213264465, + "learning_rate": 9.894229757276045e-05, + "loss": 2.5458, + "step": 10117 + }, + { + "epoch": 0.8165604067468324, + "grad_norm": 0.6827620267868042, + "learning_rate": 9.892651157134162e-05, + "loss": 2.4403, + "step": 10118 + }, + { + "epoch": 0.8166411104834154, + "grad_norm": 0.7614343166351318, + "learning_rate": 9.891072559667697e-05, + "loss": 2.6369, + "step": 10119 + }, + { + "epoch": 0.8167218142199983, + "grad_norm": 0.6913704872131348, + "learning_rate": 9.889493964915985e-05, + "loss": 2.5914, + "step": 10120 + }, + { + "epoch": 0.8168025179565814, + "grad_norm": 0.7026088237762451, + "learning_rate": 9.887915372918372e-05, + "loss": 2.5139, + "step": 10121 + }, + { + "epoch": 0.8168832216931644, + "grad_norm": 0.7064465284347534, + "learning_rate": 9.886336783714203e-05, + "loss": 2.549, + "step": 10122 + }, + { + "epoch": 0.8169639254297474, + "grad_norm": 0.7345553040504456, + "learning_rate": 9.884758197342821e-05, + "loss": 2.5887, + "step": 10123 + }, + { + "epoch": 0.8170446291663304, + "grad_norm": 0.6916251182556152, + "learning_rate": 9.883179613843563e-05, + "loss": 2.5659, + "step": 10124 + }, + { + "epoch": 0.8171253329029134, + "grad_norm": 0.6428200602531433, + "learning_rate": 9.881601033255771e-05, + "loss": 2.5379, + "step": 10125 + }, + { + "epoch": 0.8172060366394964, + "grad_norm": 0.7433571815490723, + "learning_rate": 9.880022455618796e-05, + "loss": 2.5751, + "step": 10126 + }, + { + "epoch": 0.8172867403760794, + "grad_norm": 0.733256995677948, + "learning_rate": 9.878443880971974e-05, + "loss": 2.4971, + "step": 10127 + }, + { + "epoch": 0.8173674441126624, + "grad_norm": 0.708289384841919, + "learning_rate": 9.876865309354646e-05, + "loss": 2.635, + "step": 10128 + }, + { + "epoch": 0.8174481478492455, + "grad_norm": 0.6877188682556152, + "learning_rate": 9.87528674080616e-05, + "loss": 2.5827, + "step": 10129 + }, + { + "epoch": 0.8175288515858284, + "grad_norm": 0.7108712792396545, + "learning_rate": 9.873708175365852e-05, + "loss": 2.5643, + "step": 10130 + }, + { + "epoch": 0.8176095553224114, + "grad_norm": 0.7435629367828369, + "learning_rate": 9.872129613073065e-05, + "loss": 2.5267, + "step": 10131 + }, + { + "epoch": 0.8176902590589944, + "grad_norm": 0.669913113117218, + "learning_rate": 9.870551053967148e-05, + "loss": 2.5684, + "step": 10132 + }, + { + "epoch": 0.8177709627955775, + "grad_norm": 0.6981424689292908, + "learning_rate": 9.868972498087431e-05, + "loss": 2.592, + "step": 10133 + }, + { + "epoch": 0.8178516665321605, + "grad_norm": 0.6661834716796875, + "learning_rate": 9.867393945473263e-05, + "loss": 2.5082, + "step": 10134 + }, + { + "epoch": 0.8179323702687434, + "grad_norm": 0.6611261367797852, + "learning_rate": 9.865815396163987e-05, + "loss": 2.556, + "step": 10135 + }, + { + "epoch": 0.8180130740053264, + "grad_norm": 0.6732283234596252, + "learning_rate": 9.86423685019894e-05, + "loss": 2.5668, + "step": 10136 + }, + { + "epoch": 0.8180937777419095, + "grad_norm": 0.6768637299537659, + "learning_rate": 9.862658307617465e-05, + "loss": 2.5467, + "step": 10137 + }, + { + "epoch": 0.8181744814784925, + "grad_norm": 0.6943596601486206, + "learning_rate": 9.861079768458904e-05, + "loss": 2.5989, + "step": 10138 + }, + { + "epoch": 0.8182551852150755, + "grad_norm": 0.7369638681411743, + "learning_rate": 9.859501232762601e-05, + "loss": 2.5189, + "step": 10139 + }, + { + "epoch": 0.8183358889516584, + "grad_norm": 0.7443112730979919, + "learning_rate": 9.857922700567892e-05, + "loss": 2.5979, + "step": 10140 + }, + { + "epoch": 0.8184165926882415, + "grad_norm": 0.6726163029670715, + "learning_rate": 9.85634417191412e-05, + "loss": 2.5451, + "step": 10141 + }, + { + "epoch": 0.8184972964248245, + "grad_norm": 0.720492422580719, + "learning_rate": 9.854765646840632e-05, + "loss": 2.6116, + "step": 10142 + }, + { + "epoch": 0.8185780001614075, + "grad_norm": 0.6998233795166016, + "learning_rate": 9.85318712538676e-05, + "loss": 2.556, + "step": 10143 + }, + { + "epoch": 0.8186587038979904, + "grad_norm": 0.7580110430717468, + "learning_rate": 9.851608607591848e-05, + "loss": 2.5222, + "step": 10144 + }, + { + "epoch": 0.8187394076345735, + "grad_norm": 0.6893007755279541, + "learning_rate": 9.85003009349524e-05, + "loss": 2.4639, + "step": 10145 + }, + { + "epoch": 0.8188201113711565, + "grad_norm": 0.6448441743850708, + "learning_rate": 9.84845158313627e-05, + "loss": 2.5249, + "step": 10146 + }, + { + "epoch": 0.8189008151077395, + "grad_norm": 0.7591872215270996, + "learning_rate": 9.846873076554285e-05, + "loss": 2.5173, + "step": 10147 + }, + { + "epoch": 0.8189815188443225, + "grad_norm": 0.6994685530662537, + "learning_rate": 9.845294573788626e-05, + "loss": 2.5181, + "step": 10148 + }, + { + "epoch": 0.8190622225809054, + "grad_norm": 0.6822378635406494, + "learning_rate": 9.843716074878628e-05, + "loss": 2.5109, + "step": 10149 + }, + { + "epoch": 0.8191429263174885, + "grad_norm": 0.6730359792709351, + "learning_rate": 9.842137579863632e-05, + "loss": 2.5402, + "step": 10150 + }, + { + "epoch": 0.8192236300540715, + "grad_norm": 0.6280627846717834, + "learning_rate": 9.840559088782984e-05, + "loss": 2.4806, + "step": 10151 + }, + { + "epoch": 0.8193043337906545, + "grad_norm": 0.6887876391410828, + "learning_rate": 9.838980601676017e-05, + "loss": 2.5498, + "step": 10152 + }, + { + "epoch": 0.8193850375272375, + "grad_norm": 0.7823790907859802, + "learning_rate": 9.837402118582075e-05, + "loss": 2.467, + "step": 10153 + }, + { + "epoch": 0.8194657412638205, + "grad_norm": 0.8109384179115295, + "learning_rate": 9.835823639540496e-05, + "loss": 2.5898, + "step": 10154 + }, + { + "epoch": 0.8195464450004035, + "grad_norm": 0.6883066892623901, + "learning_rate": 9.834245164590624e-05, + "loss": 2.5589, + "step": 10155 + }, + { + "epoch": 0.8196271487369865, + "grad_norm": 0.7291175723075867, + "learning_rate": 9.832666693771794e-05, + "loss": 2.5317, + "step": 10156 + }, + { + "epoch": 0.8197078524735695, + "grad_norm": 0.6819449663162231, + "learning_rate": 9.831088227123346e-05, + "loss": 2.5513, + "step": 10157 + }, + { + "epoch": 0.8197885562101526, + "grad_norm": 0.7038870453834534, + "learning_rate": 9.829509764684626e-05, + "loss": 2.5301, + "step": 10158 + }, + { + "epoch": 0.8198692599467355, + "grad_norm": 0.7483033537864685, + "learning_rate": 9.827931306494965e-05, + "loss": 2.5273, + "step": 10159 + }, + { + "epoch": 0.8199499636833185, + "grad_norm": 0.6998303532600403, + "learning_rate": 9.826352852593705e-05, + "loss": 2.5083, + "step": 10160 + }, + { + "epoch": 0.8200306674199015, + "grad_norm": 0.6865512728691101, + "learning_rate": 9.824774403020188e-05, + "loss": 2.5693, + "step": 10161 + }, + { + "epoch": 0.8201113711564846, + "grad_norm": 0.8144257068634033, + "learning_rate": 9.823195957813749e-05, + "loss": 2.6052, + "step": 10162 + }, + { + "epoch": 0.8201920748930676, + "grad_norm": 0.6920810341835022, + "learning_rate": 9.821617517013729e-05, + "loss": 2.5467, + "step": 10163 + }, + { + "epoch": 0.8202727786296505, + "grad_norm": 0.7538061141967773, + "learning_rate": 9.820039080659469e-05, + "loss": 2.5933, + "step": 10164 + }, + { + "epoch": 0.8203534823662335, + "grad_norm": 0.6744310259819031, + "learning_rate": 9.818460648790302e-05, + "loss": 2.5633, + "step": 10165 + }, + { + "epoch": 0.8204341861028166, + "grad_norm": 0.6943854689598083, + "learning_rate": 9.816882221445571e-05, + "loss": 2.5868, + "step": 10166 + }, + { + "epoch": 0.8205148898393996, + "grad_norm": 0.6486902832984924, + "learning_rate": 9.815303798664614e-05, + "loss": 2.4983, + "step": 10167 + }, + { + "epoch": 0.8205955935759826, + "grad_norm": 0.6699065566062927, + "learning_rate": 9.813725380486773e-05, + "loss": 2.563, + "step": 10168 + }, + { + "epoch": 0.8206762973125655, + "grad_norm": 0.6547110080718994, + "learning_rate": 9.812146966951379e-05, + "loss": 2.5404, + "step": 10169 + }, + { + "epoch": 0.8207570010491486, + "grad_norm": 0.692592203617096, + "learning_rate": 9.810568558097774e-05, + "loss": 2.5625, + "step": 10170 + }, + { + "epoch": 0.8208377047857316, + "grad_norm": 0.6696702837944031, + "learning_rate": 9.808990153965296e-05, + "loss": 2.5866, + "step": 10171 + }, + { + "epoch": 0.8209184085223146, + "grad_norm": 0.6425998210906982, + "learning_rate": 9.807411754593282e-05, + "loss": 2.5487, + "step": 10172 + }, + { + "epoch": 0.8209991122588975, + "grad_norm": 0.6849769949913025, + "learning_rate": 9.805833360021069e-05, + "loss": 2.5772, + "step": 10173 + }, + { + "epoch": 0.8210798159954806, + "grad_norm": 0.7451414465904236, + "learning_rate": 9.804254970288001e-05, + "loss": 2.5089, + "step": 10174 + }, + { + "epoch": 0.8211605197320636, + "grad_norm": 0.7134390473365784, + "learning_rate": 9.802676585433408e-05, + "loss": 2.541, + "step": 10175 + }, + { + "epoch": 0.8212412234686466, + "grad_norm": 0.7490564584732056, + "learning_rate": 9.801098205496627e-05, + "loss": 2.5299, + "step": 10176 + }, + { + "epoch": 0.8213219272052296, + "grad_norm": 0.6614408493041992, + "learning_rate": 9.799519830517005e-05, + "loss": 2.5252, + "step": 10177 + }, + { + "epoch": 0.8214026309418127, + "grad_norm": 0.761049211025238, + "learning_rate": 9.797941460533869e-05, + "loss": 2.5153, + "step": 10178 + }, + { + "epoch": 0.8214833346783956, + "grad_norm": 0.6352702379226685, + "learning_rate": 9.796363095586561e-05, + "loss": 2.5407, + "step": 10179 + }, + { + "epoch": 0.8215640384149786, + "grad_norm": 0.684212863445282, + "learning_rate": 9.794784735714417e-05, + "loss": 2.5425, + "step": 10180 + }, + { + "epoch": 0.8216447421515616, + "grad_norm": 0.652987539768219, + "learning_rate": 9.793206380956772e-05, + "loss": 2.5542, + "step": 10181 + }, + { + "epoch": 0.8217254458881447, + "grad_norm": 0.6912897229194641, + "learning_rate": 9.791628031352966e-05, + "loss": 2.5041, + "step": 10182 + }, + { + "epoch": 0.8218061496247276, + "grad_norm": 0.7025408744812012, + "learning_rate": 9.790049686942333e-05, + "loss": 2.5296, + "step": 10183 + }, + { + "epoch": 0.8218868533613106, + "grad_norm": 0.7580777406692505, + "learning_rate": 9.788471347764215e-05, + "loss": 2.578, + "step": 10184 + }, + { + "epoch": 0.8219675570978936, + "grad_norm": 0.7044378519058228, + "learning_rate": 9.78689301385794e-05, + "loss": 2.5093, + "step": 10185 + }, + { + "epoch": 0.8220482608344767, + "grad_norm": 0.7339754700660706, + "learning_rate": 9.785314685262849e-05, + "loss": 2.5202, + "step": 10186 + }, + { + "epoch": 0.8221289645710597, + "grad_norm": 0.6872244477272034, + "learning_rate": 9.783736362018277e-05, + "loss": 2.541, + "step": 10187 + }, + { + "epoch": 0.8222096683076426, + "grad_norm": 0.7052434682846069, + "learning_rate": 9.78215804416356e-05, + "loss": 2.4968, + "step": 10188 + }, + { + "epoch": 0.8222903720442256, + "grad_norm": 0.6739610433578491, + "learning_rate": 9.780579731738033e-05, + "loss": 2.5137, + "step": 10189 + }, + { + "epoch": 0.8223710757808087, + "grad_norm": 0.6842939853668213, + "learning_rate": 9.779001424781035e-05, + "loss": 2.5329, + "step": 10190 + }, + { + "epoch": 0.8224517795173917, + "grad_norm": 0.7057977914810181, + "learning_rate": 9.777423123331898e-05, + "loss": 2.5657, + "step": 10191 + }, + { + "epoch": 0.8225324832539747, + "grad_norm": 0.6748424172401428, + "learning_rate": 9.775844827429958e-05, + "loss": 2.6104, + "step": 10192 + }, + { + "epoch": 0.8226131869905576, + "grad_norm": 0.6492514610290527, + "learning_rate": 9.774266537114555e-05, + "loss": 2.58, + "step": 10193 + }, + { + "epoch": 0.8226938907271407, + "grad_norm": 0.6987641453742981, + "learning_rate": 9.772688252425016e-05, + "loss": 2.5301, + "step": 10194 + }, + { + "epoch": 0.8227745944637237, + "grad_norm": 0.710921585559845, + "learning_rate": 9.771109973400679e-05, + "loss": 2.6245, + "step": 10195 + }, + { + "epoch": 0.8228552982003067, + "grad_norm": 0.6673738360404968, + "learning_rate": 9.769531700080883e-05, + "loss": 2.5205, + "step": 10196 + }, + { + "epoch": 0.8229360019368896, + "grad_norm": 0.6705252528190613, + "learning_rate": 9.767953432504958e-05, + "loss": 2.4932, + "step": 10197 + }, + { + "epoch": 0.8230167056734727, + "grad_norm": 0.6587076783180237, + "learning_rate": 9.766375170712237e-05, + "loss": 2.5085, + "step": 10198 + }, + { + "epoch": 0.8230974094100557, + "grad_norm": 0.7285338640213013, + "learning_rate": 9.764796914742061e-05, + "loss": 2.5481, + "step": 10199 + }, + { + "epoch": 0.8231781131466387, + "grad_norm": 0.6971831321716309, + "learning_rate": 9.763218664633763e-05, + "loss": 2.6092, + "step": 10200 + }, + { + "epoch": 0.8232588168832217, + "grad_norm": 0.6940265893936157, + "learning_rate": 9.761640420426669e-05, + "loss": 2.5325, + "step": 10201 + }, + { + "epoch": 0.8233395206198046, + "grad_norm": 0.6612978577613831, + "learning_rate": 9.76006218216012e-05, + "loss": 2.5532, + "step": 10202 + }, + { + "epoch": 0.8234202243563877, + "grad_norm": 0.6707638502120972, + "learning_rate": 9.758483949873453e-05, + "loss": 2.512, + "step": 10203 + }, + { + "epoch": 0.8235009280929707, + "grad_norm": 0.6636764407157898, + "learning_rate": 9.756905723605994e-05, + "loss": 2.5446, + "step": 10204 + }, + { + "epoch": 0.8235816318295537, + "grad_norm": 0.6996643543243408, + "learning_rate": 9.755327503397081e-05, + "loss": 2.5504, + "step": 10205 + }, + { + "epoch": 0.8236623355661367, + "grad_norm": 0.604487955570221, + "learning_rate": 9.753749289286046e-05, + "loss": 2.4767, + "step": 10206 + }, + { + "epoch": 0.8237430393027197, + "grad_norm": 0.6484553217887878, + "learning_rate": 9.752171081312222e-05, + "loss": 2.5522, + "step": 10207 + }, + { + "epoch": 0.8238237430393027, + "grad_norm": 0.6890987753868103, + "learning_rate": 9.75059287951494e-05, + "loss": 2.5545, + "step": 10208 + }, + { + "epoch": 0.8239044467758857, + "grad_norm": 0.6786034107208252, + "learning_rate": 9.749014683933541e-05, + "loss": 2.591, + "step": 10209 + }, + { + "epoch": 0.8239851505124687, + "grad_norm": 0.751192033290863, + "learning_rate": 9.747436494607349e-05, + "loss": 2.5335, + "step": 10210 + }, + { + "epoch": 0.8240658542490518, + "grad_norm": 0.6611589789390564, + "learning_rate": 9.7458583115757e-05, + "loss": 2.5104, + "step": 10211 + }, + { + "epoch": 0.8241465579856347, + "grad_norm": 0.6602892875671387, + "learning_rate": 9.744280134877926e-05, + "loss": 2.5319, + "step": 10212 + }, + { + "epoch": 0.8242272617222177, + "grad_norm": 0.6856467127799988, + "learning_rate": 9.742701964553359e-05, + "loss": 2.5418, + "step": 10213 + }, + { + "epoch": 0.8243079654588007, + "grad_norm": 0.6810153126716614, + "learning_rate": 9.741123800641332e-05, + "loss": 2.5691, + "step": 10214 + }, + { + "epoch": 0.8243886691953838, + "grad_norm": 0.7044229507446289, + "learning_rate": 9.739545643181175e-05, + "loss": 2.5911, + "step": 10215 + }, + { + "epoch": 0.8244693729319668, + "grad_norm": 0.6689271330833435, + "learning_rate": 9.737967492212225e-05, + "loss": 2.5374, + "step": 10216 + }, + { + "epoch": 0.8245500766685497, + "grad_norm": 0.6558904051780701, + "learning_rate": 9.736389347773807e-05, + "loss": 2.5118, + "step": 10217 + }, + { + "epoch": 0.8246307804051327, + "grad_norm": 0.6900291442871094, + "learning_rate": 9.734811209905255e-05, + "loss": 2.515, + "step": 10218 + }, + { + "epoch": 0.8247114841417158, + "grad_norm": 0.7129492163658142, + "learning_rate": 9.733233078645907e-05, + "loss": 2.5191, + "step": 10219 + }, + { + "epoch": 0.8247921878782988, + "grad_norm": 0.7031866908073425, + "learning_rate": 9.731654954035082e-05, + "loss": 2.5616, + "step": 10220 + }, + { + "epoch": 0.8248728916148818, + "grad_norm": 0.6418820023536682, + "learning_rate": 9.730076836112118e-05, + "loss": 2.537, + "step": 10221 + }, + { + "epoch": 0.8249535953514647, + "grad_norm": 0.6731035113334656, + "learning_rate": 9.728498724916347e-05, + "loss": 2.5483, + "step": 10222 + }, + { + "epoch": 0.8250342990880478, + "grad_norm": 0.6941342353820801, + "learning_rate": 9.726920620487096e-05, + "loss": 2.5314, + "step": 10223 + }, + { + "epoch": 0.8251150028246308, + "grad_norm": 0.6808927059173584, + "learning_rate": 9.725342522863696e-05, + "loss": 2.5521, + "step": 10224 + }, + { + "epoch": 0.8251957065612138, + "grad_norm": 0.6873155832290649, + "learning_rate": 9.723764432085481e-05, + "loss": 2.5205, + "step": 10225 + }, + { + "epoch": 0.8252764102977967, + "grad_norm": 0.8590287566184998, + "learning_rate": 9.722186348191776e-05, + "loss": 2.5378, + "step": 10226 + }, + { + "epoch": 0.8253571140343798, + "grad_norm": 0.691523015499115, + "learning_rate": 9.720608271221912e-05, + "loss": 2.5062, + "step": 10227 + }, + { + "epoch": 0.8254378177709628, + "grad_norm": 0.6695523262023926, + "learning_rate": 9.719030201215226e-05, + "loss": 2.5164, + "step": 10228 + }, + { + "epoch": 0.8255185215075458, + "grad_norm": 0.745516300201416, + "learning_rate": 9.717452138211037e-05, + "loss": 2.5207, + "step": 10229 + }, + { + "epoch": 0.8255992252441288, + "grad_norm": 0.6628115773200989, + "learning_rate": 9.715874082248679e-05, + "loss": 2.5293, + "step": 10230 + }, + { + "epoch": 0.8256799289807119, + "grad_norm": 0.6531884074211121, + "learning_rate": 9.714296033367482e-05, + "loss": 2.4812, + "step": 10231 + }, + { + "epoch": 0.8257606327172948, + "grad_norm": 0.7444833517074585, + "learning_rate": 9.712717991606777e-05, + "loss": 2.5422, + "step": 10232 + }, + { + "epoch": 0.8258413364538778, + "grad_norm": 0.7013139128684998, + "learning_rate": 9.711139957005888e-05, + "loss": 2.5117, + "step": 10233 + }, + { + "epoch": 0.8259220401904608, + "grad_norm": 0.6588132977485657, + "learning_rate": 9.709561929604147e-05, + "loss": 2.5257, + "step": 10234 + }, + { + "epoch": 0.8260027439270439, + "grad_norm": 0.7538537383079529, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5225, + "step": 10235 + }, + { + "epoch": 0.8260834476636268, + "grad_norm": Infinity, + "learning_rate": 9.707983909440886e-05, + "loss": 2.5532, + "step": 10236 + }, + { + "epoch": 0.8261641514002098, + "grad_norm": 0.7414929270744324, + "learning_rate": 9.706405896555425e-05, + "loss": 2.5653, + "step": 10237 + }, + { + "epoch": 0.8262448551367928, + "grad_norm": 0.757057785987854, + "learning_rate": 9.704827890987097e-05, + "loss": 2.5732, + "step": 10238 + }, + { + "epoch": 0.8263255588733759, + "grad_norm": 0.730721652507782, + "learning_rate": 9.703249892775232e-05, + "loss": 2.5317, + "step": 10239 + }, + { + "epoch": 0.8264062626099589, + "grad_norm": 0.6943208575248718, + "learning_rate": 9.701671901959151e-05, + "loss": 2.5849, + "step": 10240 + }, + { + "epoch": 0.8264869663465418, + "grad_norm": 0.7111102938652039, + "learning_rate": 9.700093918578188e-05, + "loss": 2.5007, + "step": 10241 + }, + { + "epoch": 0.8265676700831248, + "grad_norm": 0.7240251302719116, + "learning_rate": 9.69851594267167e-05, + "loss": 2.5002, + "step": 10242 + }, + { + "epoch": 0.8266483738197079, + "grad_norm": 0.6624411344528198, + "learning_rate": 9.696937974278922e-05, + "loss": 2.5175, + "step": 10243 + }, + { + "epoch": 0.8267290775562909, + "grad_norm": 0.6972576975822449, + "learning_rate": 9.695360013439269e-05, + "loss": 2.5285, + "step": 10244 + }, + { + "epoch": 0.8268097812928739, + "grad_norm": 0.684446394443512, + "learning_rate": 9.693782060192046e-05, + "loss": 2.57, + "step": 10245 + }, + { + "epoch": 0.8268904850294568, + "grad_norm": 0.6920011639595032, + "learning_rate": 9.692204114576573e-05, + "loss": 2.5042, + "step": 10246 + }, + { + "epoch": 0.8269711887660399, + "grad_norm": 0.7526013851165771, + "learning_rate": 9.690626176632176e-05, + "loss": 2.5878, + "step": 10247 + }, + { + "epoch": 0.8270518925026229, + "grad_norm": 0.6936177611351013, + "learning_rate": 9.689048246398184e-05, + "loss": 2.5572, + "step": 10248 + }, + { + "epoch": 0.8271325962392059, + "grad_norm": 0.672168493270874, + "learning_rate": 9.687470323913922e-05, + "loss": 2.5127, + "step": 10249 + }, + { + "epoch": 0.8272132999757889, + "grad_norm": 0.6847899556159973, + "learning_rate": 9.685892409218717e-05, + "loss": 2.5443, + "step": 10250 + }, + { + "epoch": 0.8272940037123718, + "grad_norm": 0.6877103447914124, + "learning_rate": 9.684314502351894e-05, + "loss": 2.4924, + "step": 10251 + }, + { + "epoch": 0.8273747074489549, + "grad_norm": 0.6894243359565735, + "learning_rate": 9.682736603352783e-05, + "loss": 2.5107, + "step": 10252 + }, + { + "epoch": 0.8274554111855379, + "grad_norm": 0.7318278551101685, + "learning_rate": 9.681158712260698e-05, + "loss": 2.5276, + "step": 10253 + }, + { + "epoch": 0.8275361149221209, + "grad_norm": 0.6949039101600647, + "learning_rate": 9.679580829114975e-05, + "loss": 2.5128, + "step": 10254 + }, + { + "epoch": 0.8276168186587038, + "grad_norm": 0.6523800492286682, + "learning_rate": 9.678002953954939e-05, + "loss": 2.5584, + "step": 10255 + }, + { + "epoch": 0.8276975223952869, + "grad_norm": 0.6914480328559875, + "learning_rate": 9.676425086819905e-05, + "loss": 2.5597, + "step": 10256 + }, + { + "epoch": 0.8277782261318699, + "grad_norm": 0.7107869982719421, + "learning_rate": 9.674847227749206e-05, + "loss": 2.5009, + "step": 10257 + }, + { + "epoch": 0.8278589298684529, + "grad_norm": 0.7066758275032043, + "learning_rate": 9.673269376782166e-05, + "loss": 2.4599, + "step": 10258 + }, + { + "epoch": 0.8279396336050359, + "grad_norm": 0.7147037982940674, + "learning_rate": 9.671691533958104e-05, + "loss": 2.4478, + "step": 10259 + }, + { + "epoch": 0.828020337341619, + "grad_norm": 0.666265606880188, + "learning_rate": 9.670113699316347e-05, + "loss": 2.5652, + "step": 10260 + }, + { + "epoch": 0.8281010410782019, + "grad_norm": 0.7026315927505493, + "learning_rate": 9.668535872896225e-05, + "loss": 2.5397, + "step": 10261 + }, + { + "epoch": 0.8281817448147849, + "grad_norm": 0.6611438393592834, + "learning_rate": 9.66695805473705e-05, + "loss": 2.5628, + "step": 10262 + }, + { + "epoch": 0.8282624485513679, + "grad_norm": 0.7211201190948486, + "learning_rate": 9.66538024487815e-05, + "loss": 2.5551, + "step": 10263 + }, + { + "epoch": 0.828343152287951, + "grad_norm": 0.7224553227424622, + "learning_rate": 9.663802443358849e-05, + "loss": 2.5329, + "step": 10264 + }, + { + "epoch": 0.8284238560245339, + "grad_norm": 0.6805843710899353, + "learning_rate": 9.662224650218474e-05, + "loss": 2.5744, + "step": 10265 + }, + { + "epoch": 0.8285045597611169, + "grad_norm": 0.7101335525512695, + "learning_rate": 9.66064686549634e-05, + "loss": 2.5281, + "step": 10266 + }, + { + "epoch": 0.8285852634976999, + "grad_norm": 0.7208443284034729, + "learning_rate": 9.659069089231774e-05, + "loss": 2.5326, + "step": 10267 + }, + { + "epoch": 0.828665967234283, + "grad_norm": 0.747894287109375, + "learning_rate": 9.6574913214641e-05, + "loss": 2.4909, + "step": 10268 + }, + { + "epoch": 0.828746670970866, + "grad_norm": 0.6618027091026306, + "learning_rate": 9.655913562232635e-05, + "loss": 2.6091, + "step": 10269 + }, + { + "epoch": 0.8288273747074489, + "grad_norm": 0.7101535201072693, + "learning_rate": 9.654335811576704e-05, + "loss": 2.5194, + "step": 10270 + }, + { + "epoch": 0.8289080784440319, + "grad_norm": 0.727763831615448, + "learning_rate": 9.652758069535631e-05, + "loss": 2.5767, + "step": 10271 + }, + { + "epoch": 0.828988782180615, + "grad_norm": 0.6936737895011902, + "learning_rate": 9.65118033614873e-05, + "loss": 2.498, + "step": 10272 + }, + { + "epoch": 0.829069485917198, + "grad_norm": 0.699462354183197, + "learning_rate": 9.64960261145533e-05, + "loss": 2.5033, + "step": 10273 + }, + { + "epoch": 0.829150189653781, + "grad_norm": 0.7024868726730347, + "learning_rate": 9.648024895494749e-05, + "loss": 2.5937, + "step": 10274 + }, + { + "epoch": 0.8292308933903639, + "grad_norm": 0.7028421759605408, + "learning_rate": 9.646447188306305e-05, + "loss": 2.5528, + "step": 10275 + }, + { + "epoch": 0.829311597126947, + "grad_norm": 0.7216476202011108, + "learning_rate": 9.644869489929321e-05, + "loss": 2.5298, + "step": 10276 + }, + { + "epoch": 0.82939230086353, + "grad_norm": 0.6815251111984253, + "learning_rate": 9.643291800403123e-05, + "loss": 2.5138, + "step": 10277 + }, + { + "epoch": 0.829473004600113, + "grad_norm": 0.6961970925331116, + "learning_rate": 9.64171411976702e-05, + "loss": 2.5441, + "step": 10278 + }, + { + "epoch": 0.829553708336696, + "grad_norm": 0.7317311763763428, + "learning_rate": 9.640136448060337e-05, + "loss": 2.5885, + "step": 10279 + }, + { + "epoch": 0.829634412073279, + "grad_norm": 0.729086697101593, + "learning_rate": 9.638558785322396e-05, + "loss": 2.475, + "step": 10280 + }, + { + "epoch": 0.829715115809862, + "grad_norm": 0.7790165543556213, + "learning_rate": 9.636981131592521e-05, + "loss": 2.5538, + "step": 10281 + }, + { + "epoch": 0.829795819546445, + "grad_norm": 0.7066864967346191, + "learning_rate": 9.635403486910018e-05, + "loss": 2.5916, + "step": 10282 + }, + { + "epoch": 0.829876523283028, + "grad_norm": 0.7070252299308777, + "learning_rate": 9.633825851314215e-05, + "loss": 2.5879, + "step": 10283 + }, + { + "epoch": 0.829957227019611, + "grad_norm": 0.7604004740715027, + "learning_rate": 9.63224822484443e-05, + "loss": 2.5298, + "step": 10284 + }, + { + "epoch": 0.830037930756194, + "grad_norm": 0.7548386454582214, + "learning_rate": 9.63067060753998e-05, + "loss": 2.5313, + "step": 10285 + }, + { + "epoch": 0.830118634492777, + "grad_norm": 0.7241540551185608, + "learning_rate": 9.629092999440183e-05, + "loss": 2.5498, + "step": 10286 + }, + { + "epoch": 0.83019933822936, + "grad_norm": 0.6748291850090027, + "learning_rate": 9.627515400584361e-05, + "loss": 2.523, + "step": 10287 + }, + { + "epoch": 0.8302800419659431, + "grad_norm": 0.6624683141708374, + "learning_rate": 9.625937811011826e-05, + "loss": 2.568, + "step": 10288 + }, + { + "epoch": 0.830360745702526, + "grad_norm": 0.6681114435195923, + "learning_rate": 9.624360230761899e-05, + "loss": 2.5255, + "step": 10289 + }, + { + "epoch": 0.830441449439109, + "grad_norm": 0.6895325183868408, + "learning_rate": 9.622782659873899e-05, + "loss": 2.5275, + "step": 10290 + }, + { + "epoch": 0.830522153175692, + "grad_norm": 0.7257826924324036, + "learning_rate": 9.621205098387137e-05, + "loss": 2.5102, + "step": 10291 + }, + { + "epoch": 0.8306028569122751, + "grad_norm": 0.6567066311836243, + "learning_rate": 9.619627546340935e-05, + "loss": 2.5721, + "step": 10292 + }, + { + "epoch": 0.8306835606488581, + "grad_norm": 0.6571428179740906, + "learning_rate": 9.61805000377461e-05, + "loss": 2.5014, + "step": 10293 + }, + { + "epoch": 0.830764264385441, + "grad_norm": 0.7807042598724365, + "learning_rate": 9.61647247072748e-05, + "loss": 2.632, + "step": 10294 + }, + { + "epoch": 0.830844968122024, + "grad_norm": 0.6688913702964783, + "learning_rate": 9.614894947238854e-05, + "loss": 2.5457, + "step": 10295 + }, + { + "epoch": 0.8309256718586071, + "grad_norm": 0.7769338488578796, + "learning_rate": 9.613317433348055e-05, + "loss": 2.4775, + "step": 10296 + }, + { + "epoch": 0.8310063755951901, + "grad_norm": 0.7089162468910217, + "learning_rate": 9.611739929094399e-05, + "loss": 2.4887, + "step": 10297 + }, + { + "epoch": 0.8310870793317731, + "grad_norm": 0.6901174783706665, + "learning_rate": 9.610162434517196e-05, + "loss": 2.6127, + "step": 10298 + }, + { + "epoch": 0.831167783068356, + "grad_norm": 0.6862173676490784, + "learning_rate": 9.608584949655764e-05, + "loss": 2.5432, + "step": 10299 + }, + { + "epoch": 0.8312484868049391, + "grad_norm": 0.6789367198944092, + "learning_rate": 9.607007474549418e-05, + "loss": 2.5135, + "step": 10300 + }, + { + "epoch": 0.8313291905415221, + "grad_norm": 0.6548805832862854, + "learning_rate": 9.605430009237474e-05, + "loss": 2.5466, + "step": 10301 + }, + { + "epoch": 0.8314098942781051, + "grad_norm": 0.6873800158500671, + "learning_rate": 9.603852553759244e-05, + "loss": 2.4954, + "step": 10302 + }, + { + "epoch": 0.831490598014688, + "grad_norm": 0.6816138029098511, + "learning_rate": 9.602275108154046e-05, + "loss": 2.5556, + "step": 10303 + }, + { + "epoch": 0.831571301751271, + "grad_norm": 0.6890314221382141, + "learning_rate": 9.600697672461189e-05, + "loss": 2.5253, + "step": 10304 + }, + { + "epoch": 0.8316520054878541, + "grad_norm": 0.6217427849769592, + "learning_rate": 9.599120246719992e-05, + "loss": 2.53, + "step": 10305 + }, + { + "epoch": 0.8317327092244371, + "grad_norm": 0.6638299226760864, + "learning_rate": 9.59754283096977e-05, + "loss": 2.5323, + "step": 10306 + }, + { + "epoch": 0.8318134129610201, + "grad_norm": 0.6834245920181274, + "learning_rate": 9.595965425249828e-05, + "loss": 2.5339, + "step": 10307 + }, + { + "epoch": 0.831894116697603, + "grad_norm": 0.8013476729393005, + "learning_rate": 9.594388029599484e-05, + "loss": 2.4925, + "step": 10308 + }, + { + "epoch": 0.8319748204341861, + "grad_norm": 0.7677187323570251, + "learning_rate": 9.592810644058049e-05, + "loss": 2.5717, + "step": 10309 + }, + { + "epoch": 0.8320555241707691, + "grad_norm": 0.6558046340942383, + "learning_rate": 9.591233268664841e-05, + "loss": 2.5631, + "step": 10310 + }, + { + "epoch": 0.8321362279073521, + "grad_norm": 0.6648481488227844, + "learning_rate": 9.589655903459165e-05, + "loss": 2.5232, + "step": 10311 + }, + { + "epoch": 0.8322169316439351, + "grad_norm": 0.6907756328582764, + "learning_rate": 9.588078548480338e-05, + "loss": 2.4804, + "step": 10312 + }, + { + "epoch": 0.8322976353805182, + "grad_norm": 0.6924928426742554, + "learning_rate": 9.586501203767675e-05, + "loss": 2.4648, + "step": 10313 + }, + { + "epoch": 0.8323783391171011, + "grad_norm": 0.7654799222946167, + "learning_rate": 9.584923869360477e-05, + "loss": 2.6184, + "step": 10314 + }, + { + "epoch": 0.8324590428536841, + "grad_norm": 0.7056179046630859, + "learning_rate": 9.58334654529806e-05, + "loss": 2.5862, + "step": 10315 + }, + { + "epoch": 0.8325397465902671, + "grad_norm": 0.7245064973831177, + "learning_rate": 9.581769231619743e-05, + "loss": 2.4866, + "step": 10316 + }, + { + "epoch": 0.8326204503268502, + "grad_norm": 0.6782355308532715, + "learning_rate": 9.580191928364824e-05, + "loss": 2.5519, + "step": 10317 + }, + { + "epoch": 0.8327011540634331, + "grad_norm": 0.6910805106163025, + "learning_rate": 9.578614635572621e-05, + "loss": 2.542, + "step": 10318 + }, + { + "epoch": 0.8327818578000161, + "grad_norm": 0.6858026385307312, + "learning_rate": 9.577037353282444e-05, + "loss": 2.5601, + "step": 10319 + }, + { + "epoch": 0.8328625615365991, + "grad_norm": 0.6886423230171204, + "learning_rate": 9.5754600815336e-05, + "loss": 2.5817, + "step": 10320 + }, + { + "epoch": 0.8329432652731822, + "grad_norm": 0.7585750818252563, + "learning_rate": 9.573882820365402e-05, + "loss": 2.5153, + "step": 10321 + }, + { + "epoch": 0.8330239690097652, + "grad_norm": 0.7004472613334656, + "learning_rate": 9.57230556981716e-05, + "loss": 2.5456, + "step": 10322 + }, + { + "epoch": 0.8331046727463481, + "grad_norm": 0.6530508399009705, + "learning_rate": 9.570728329928179e-05, + "loss": 2.5453, + "step": 10323 + }, + { + "epoch": 0.8331853764829311, + "grad_norm": 0.6767956614494324, + "learning_rate": 9.569151100737769e-05, + "loss": 2.5311, + "step": 10324 + }, + { + "epoch": 0.8332660802195142, + "grad_norm": 0.6835905909538269, + "learning_rate": 9.56757388228524e-05, + "loss": 2.5417, + "step": 10325 + }, + { + "epoch": 0.8333467839560972, + "grad_norm": 0.6582748889923096, + "learning_rate": 9.565996674609901e-05, + "loss": 2.5144, + "step": 10326 + }, + { + "epoch": 0.8334274876926802, + "grad_norm": 0.6815205216407776, + "learning_rate": 9.56441947775106e-05, + "loss": 2.5272, + "step": 10327 + }, + { + "epoch": 0.8335081914292631, + "grad_norm": 0.6810150146484375, + "learning_rate": 9.562842291748022e-05, + "loss": 2.5475, + "step": 10328 + }, + { + "epoch": 0.8335888951658462, + "grad_norm": 0.7220990657806396, + "learning_rate": 9.5612651166401e-05, + "loss": 2.54, + "step": 10329 + }, + { + "epoch": 0.8336695989024292, + "grad_norm": 0.6840164065361023, + "learning_rate": 9.559687952466596e-05, + "loss": 2.5987, + "step": 10330 + }, + { + "epoch": 0.8337503026390122, + "grad_norm": 0.7085031867027283, + "learning_rate": 9.558110799266819e-05, + "loss": 2.5674, + "step": 10331 + }, + { + "epoch": 0.8338310063755952, + "grad_norm": 0.6658117175102234, + "learning_rate": 9.55653365708008e-05, + "loss": 2.5793, + "step": 10332 + }, + { + "epoch": 0.8339117101121782, + "grad_norm": 0.782648503780365, + "learning_rate": 9.554956525945677e-05, + "loss": 2.5463, + "step": 10333 + }, + { + "epoch": 0.8339924138487612, + "grad_norm": 0.6999937891960144, + "learning_rate": 9.553379405902922e-05, + "loss": 2.5961, + "step": 10334 + }, + { + "epoch": 0.8340731175853442, + "grad_norm": 0.6681220531463623, + "learning_rate": 9.55180229699112e-05, + "loss": 2.6055, + "step": 10335 + }, + { + "epoch": 0.8341538213219272, + "grad_norm": 0.7127133011817932, + "learning_rate": 9.550225199249577e-05, + "loss": 2.5571, + "step": 10336 + }, + { + "epoch": 0.8342345250585103, + "grad_norm": 0.6939001679420471, + "learning_rate": 9.548648112717596e-05, + "loss": 2.5653, + "step": 10337 + }, + { + "epoch": 0.8343152287950932, + "grad_norm": 0.7483924031257629, + "learning_rate": 9.547071037434487e-05, + "loss": 2.5316, + "step": 10338 + }, + { + "epoch": 0.8343959325316762, + "grad_norm": 0.7975850105285645, + "learning_rate": 9.545493973439548e-05, + "loss": 2.6039, + "step": 10339 + }, + { + "epoch": 0.8344766362682592, + "grad_norm": 0.6893026232719421, + "learning_rate": 9.543916920772087e-05, + "loss": 2.5797, + "step": 10340 + }, + { + "epoch": 0.8345573400048423, + "grad_norm": 0.752869188785553, + "learning_rate": 9.542339879471409e-05, + "loss": 2.5677, + "step": 10341 + }, + { + "epoch": 0.8346380437414253, + "grad_norm": 0.7336339354515076, + "learning_rate": 9.540762849576822e-05, + "loss": 2.5212, + "step": 10342 + }, + { + "epoch": 0.8347187474780082, + "grad_norm": 0.7742713689804077, + "learning_rate": 9.539185831127621e-05, + "loss": 2.5599, + "step": 10343 + }, + { + "epoch": 0.8347994512145912, + "grad_norm": 0.7205352783203125, + "learning_rate": 9.537608824163114e-05, + "loss": 2.5591, + "step": 10344 + }, + { + "epoch": 0.8348801549511743, + "grad_norm": 0.7794787287712097, + "learning_rate": 9.536031828722605e-05, + "loss": 2.5858, + "step": 10345 + }, + { + "epoch": 0.8349608586877573, + "grad_norm": 0.7129528522491455, + "learning_rate": 9.534454844845396e-05, + "loss": 2.5591, + "step": 10346 + }, + { + "epoch": 0.8350415624243402, + "grad_norm": 0.731038510799408, + "learning_rate": 9.532877872570787e-05, + "loss": 2.5774, + "step": 10347 + }, + { + "epoch": 0.8351222661609232, + "grad_norm": 0.7706510424613953, + "learning_rate": 9.531300911938087e-05, + "loss": 2.6102, + "step": 10348 + }, + { + "epoch": 0.8352029698975063, + "grad_norm": 0.6890363097190857, + "learning_rate": 9.52972396298659e-05, + "loss": 2.5393, + "step": 10349 + }, + { + "epoch": 0.8352836736340893, + "grad_norm": 0.6792402863502502, + "learning_rate": 9.528147025755601e-05, + "loss": 2.5607, + "step": 10350 + }, + { + "epoch": 0.8353643773706723, + "grad_norm": 0.7097377777099609, + "learning_rate": 9.526570100284422e-05, + "loss": 2.5681, + "step": 10351 + }, + { + "epoch": 0.8354450811072552, + "grad_norm": 0.7530940771102905, + "learning_rate": 9.524993186612353e-05, + "loss": 2.5405, + "step": 10352 + }, + { + "epoch": 0.8355257848438382, + "grad_norm": 0.714080810546875, + "learning_rate": 9.523416284778696e-05, + "loss": 2.5365, + "step": 10353 + }, + { + "epoch": 0.8356064885804213, + "grad_norm": 0.6745832562446594, + "learning_rate": 9.521839394822752e-05, + "loss": 2.5553, + "step": 10354 + }, + { + "epoch": 0.8356871923170043, + "grad_norm": 0.7163450121879578, + "learning_rate": 9.52026251678382e-05, + "loss": 2.5074, + "step": 10355 + }, + { + "epoch": 0.8357678960535873, + "grad_norm": 0.6876534223556519, + "learning_rate": 9.518685650701197e-05, + "loss": 2.5652, + "step": 10356 + }, + { + "epoch": 0.8358485997901702, + "grad_norm": 0.6424533128738403, + "learning_rate": 9.517108796614187e-05, + "loss": 2.4823, + "step": 10357 + }, + { + "epoch": 0.8359293035267533, + "grad_norm": 0.646802544593811, + "learning_rate": 9.515531954562094e-05, + "loss": 2.5602, + "step": 10358 + }, + { + "epoch": 0.8360100072633363, + "grad_norm": 0.7266993522644043, + "learning_rate": 9.513955124584205e-05, + "loss": 2.5384, + "step": 10359 + }, + { + "epoch": 0.8360907109999193, + "grad_norm": 0.7358742356300354, + "learning_rate": 9.512378306719826e-05, + "loss": 2.5798, + "step": 10360 + }, + { + "epoch": 0.8361714147365022, + "grad_norm": 0.7191498279571533, + "learning_rate": 9.510801501008256e-05, + "loss": 2.5229, + "step": 10361 + }, + { + "epoch": 0.8362521184730853, + "grad_norm": 0.7058876156806946, + "learning_rate": 9.509224707488788e-05, + "loss": 2.5146, + "step": 10362 + }, + { + "epoch": 0.8363328222096683, + "grad_norm": 0.7348346710205078, + "learning_rate": 9.507647926200725e-05, + "loss": 2.5878, + "step": 10363 + }, + { + "epoch": 0.8364135259462513, + "grad_norm": 0.7464115619659424, + "learning_rate": 9.506071157183366e-05, + "loss": 2.6056, + "step": 10364 + }, + { + "epoch": 0.8364942296828343, + "grad_norm": 0.7077332139015198, + "learning_rate": 9.504494400476e-05, + "loss": 2.5161, + "step": 10365 + }, + { + "epoch": 0.8365749334194174, + "grad_norm": 0.7381827235221863, + "learning_rate": 9.502917656117928e-05, + "loss": 2.519, + "step": 10366 + }, + { + "epoch": 0.8366556371560003, + "grad_norm": 0.743180513381958, + "learning_rate": 9.501340924148452e-05, + "loss": 2.6149, + "step": 10367 + }, + { + "epoch": 0.8367363408925833, + "grad_norm": 0.6496078372001648, + "learning_rate": 9.499764204606863e-05, + "loss": 2.4969, + "step": 10368 + }, + { + "epoch": 0.8368170446291663, + "grad_norm": 0.6796541810035706, + "learning_rate": 9.498187497532454e-05, + "loss": 2.5304, + "step": 10369 + }, + { + "epoch": 0.8368977483657494, + "grad_norm": 0.6555948853492737, + "learning_rate": 9.496610802964529e-05, + "loss": 2.6029, + "step": 10370 + }, + { + "epoch": 0.8369784521023323, + "grad_norm": 0.6990405321121216, + "learning_rate": 9.495034120942374e-05, + "loss": 2.5286, + "step": 10371 + }, + { + "epoch": 0.8370591558389153, + "grad_norm": 0.7417613863945007, + "learning_rate": 9.49345745150529e-05, + "loss": 2.5301, + "step": 10372 + }, + { + "epoch": 0.8371398595754983, + "grad_norm": 0.6809872388839722, + "learning_rate": 9.49188079469257e-05, + "loss": 2.5075, + "step": 10373 + }, + { + "epoch": 0.8372205633120814, + "grad_norm": 0.6537099480628967, + "learning_rate": 9.490304150543514e-05, + "loss": 2.5515, + "step": 10374 + }, + { + "epoch": 0.8373012670486644, + "grad_norm": 0.6660431027412415, + "learning_rate": 9.488727519097407e-05, + "loss": 2.549, + "step": 10375 + }, + { + "epoch": 0.8373819707852473, + "grad_norm": 0.7257838249206543, + "learning_rate": 9.487150900393546e-05, + "loss": 2.546, + "step": 10376 + }, + { + "epoch": 0.8374626745218303, + "grad_norm": 0.742085874080658, + "learning_rate": 9.485574294471226e-05, + "loss": 2.5302, + "step": 10377 + }, + { + "epoch": 0.8375433782584134, + "grad_norm": 0.659934401512146, + "learning_rate": 9.48399770136974e-05, + "loss": 2.5553, + "step": 10378 + }, + { + "epoch": 0.8376240819949964, + "grad_norm": 0.7219613790512085, + "learning_rate": 9.482421121128377e-05, + "loss": 2.6186, + "step": 10379 + }, + { + "epoch": 0.8377047857315794, + "grad_norm": 0.706444263458252, + "learning_rate": 9.480844553786436e-05, + "loss": 2.5082, + "step": 10380 + }, + { + "epoch": 0.8377854894681623, + "grad_norm": 0.7527014017105103, + "learning_rate": 9.479267999383204e-05, + "loss": 2.5625, + "step": 10381 + }, + { + "epoch": 0.8378661932047454, + "grad_norm": 0.7488746643066406, + "learning_rate": 9.477691457957976e-05, + "loss": 2.528, + "step": 10382 + }, + { + "epoch": 0.8379468969413284, + "grad_norm": 0.7394229173660278, + "learning_rate": 9.476114929550045e-05, + "loss": 2.5387, + "step": 10383 + }, + { + "epoch": 0.8380276006779114, + "grad_norm": 0.7490981817245483, + "learning_rate": 9.474538414198695e-05, + "loss": 2.548, + "step": 10384 + }, + { + "epoch": 0.8381083044144944, + "grad_norm": 0.7203173041343689, + "learning_rate": 9.472961911943222e-05, + "loss": 2.5547, + "step": 10385 + }, + { + "epoch": 0.8381890081510774, + "grad_norm": 0.6929850578308105, + "learning_rate": 9.471385422822917e-05, + "loss": 2.4831, + "step": 10386 + }, + { + "epoch": 0.8382697118876604, + "grad_norm": 0.6303263902664185, + "learning_rate": 9.469808946877067e-05, + "loss": 2.4569, + "step": 10387 + }, + { + "epoch": 0.8383504156242434, + "grad_norm": 0.6986981630325317, + "learning_rate": 9.468232484144964e-05, + "loss": 2.5278, + "step": 10388 + }, + { + "epoch": 0.8384311193608264, + "grad_norm": 0.6910964846611023, + "learning_rate": 9.466656034665898e-05, + "loss": 2.5657, + "step": 10389 + }, + { + "epoch": 0.8385118230974095, + "grad_norm": 0.6571134924888611, + "learning_rate": 9.465079598479163e-05, + "loss": 2.6017, + "step": 10390 + }, + { + "epoch": 0.8385925268339924, + "grad_norm": 0.7117733359336853, + "learning_rate": 9.463503175624034e-05, + "loss": 2.56, + "step": 10391 + }, + { + "epoch": 0.8386732305705754, + "grad_norm": 0.7052998542785645, + "learning_rate": 9.461926766139813e-05, + "loss": 2.4998, + "step": 10392 + }, + { + "epoch": 0.8387539343071584, + "grad_norm": 0.7306597232818604, + "learning_rate": 9.460350370065786e-05, + "loss": 2.5292, + "step": 10393 + }, + { + "epoch": 0.8388346380437415, + "grad_norm": 0.681069552898407, + "learning_rate": 9.458773987441235e-05, + "loss": 2.5469, + "step": 10394 + }, + { + "epoch": 0.8389153417803245, + "grad_norm": 0.6681767702102661, + "learning_rate": 9.45719761830545e-05, + "loss": 2.5476, + "step": 10395 + }, + { + "epoch": 0.8389960455169074, + "grad_norm": 0.6759339570999146, + "learning_rate": 9.455621262697723e-05, + "loss": 2.4806, + "step": 10396 + }, + { + "epoch": 0.8390767492534904, + "grad_norm": 0.695829451084137, + "learning_rate": 9.454044920657333e-05, + "loss": 2.5255, + "step": 10397 + }, + { + "epoch": 0.8391574529900735, + "grad_norm": 0.686568558216095, + "learning_rate": 9.452468592223572e-05, + "loss": 2.5655, + "step": 10398 + }, + { + "epoch": 0.8392381567266565, + "grad_norm": 0.6529035568237305, + "learning_rate": 9.45089227743573e-05, + "loss": 2.5026, + "step": 10399 + }, + { + "epoch": 0.8393188604632394, + "grad_norm": 0.6809061765670776, + "learning_rate": 9.449315976333082e-05, + "loss": 2.5549, + "step": 10400 + }, + { + "epoch": 0.8393995641998224, + "grad_norm": 0.6920269727706909, + "learning_rate": 9.447739688954919e-05, + "loss": 2.517, + "step": 10401 + }, + { + "epoch": 0.8394802679364055, + "grad_norm": 0.6626712083816528, + "learning_rate": 9.446163415340526e-05, + "loss": 2.605, + "step": 10402 + }, + { + "epoch": 0.8395609716729885, + "grad_norm": 0.6912916898727417, + "learning_rate": 9.444587155529195e-05, + "loss": 2.588, + "step": 10403 + }, + { + "epoch": 0.8396416754095715, + "grad_norm": 0.6771352291107178, + "learning_rate": 9.443010909560198e-05, + "loss": 2.5148, + "step": 10404 + }, + { + "epoch": 0.8397223791461544, + "grad_norm": 0.7015509009361267, + "learning_rate": 9.441434677472827e-05, + "loss": 2.5425, + "step": 10405 + }, + { + "epoch": 0.8398030828827374, + "grad_norm": 0.6789976358413696, + "learning_rate": 9.439858459306364e-05, + "loss": 2.598, + "step": 10406 + }, + { + "epoch": 0.8398837866193205, + "grad_norm": 0.674391508102417, + "learning_rate": 9.438282255100091e-05, + "loss": 2.5581, + "step": 10407 + }, + { + "epoch": 0.8399644903559035, + "grad_norm": 0.6944772005081177, + "learning_rate": 9.436706064893294e-05, + "loss": 2.5591, + "step": 10408 + }, + { + "epoch": 0.8400451940924865, + "grad_norm": 0.6750832200050354, + "learning_rate": 9.435129888725259e-05, + "loss": 2.533, + "step": 10409 + }, + { + "epoch": 0.8401258978290694, + "grad_norm": 0.6927465200424194, + "learning_rate": 9.433553726635257e-05, + "loss": 2.536, + "step": 10410 + }, + { + "epoch": 0.8402066015656525, + "grad_norm": 0.6399651765823364, + "learning_rate": 9.431977578662578e-05, + "loss": 2.5123, + "step": 10411 + }, + { + "epoch": 0.8402873053022355, + "grad_norm": 0.7588143944740295, + "learning_rate": 9.430401444846505e-05, + "loss": 2.6133, + "step": 10412 + }, + { + "epoch": 0.8403680090388185, + "grad_norm": 0.8010972738265991, + "learning_rate": 9.428825325226313e-05, + "loss": 2.5407, + "step": 10413 + }, + { + "epoch": 0.8404487127754015, + "grad_norm": 0.6847307085990906, + "learning_rate": 9.427249219841288e-05, + "loss": 2.5912, + "step": 10414 + }, + { + "epoch": 0.8405294165119845, + "grad_norm": 0.7005963325500488, + "learning_rate": 9.425673128730716e-05, + "loss": 2.5059, + "step": 10415 + }, + { + "epoch": 0.8406101202485675, + "grad_norm": 0.7383962273597717, + "learning_rate": 9.424097051933862e-05, + "loss": 2.5157, + "step": 10416 + }, + { + "epoch": 0.8406908239851505, + "grad_norm": 0.7078843712806702, + "learning_rate": 9.422520989490018e-05, + "loss": 2.6093, + "step": 10417 + }, + { + "epoch": 0.8407715277217335, + "grad_norm": 0.7449501752853394, + "learning_rate": 9.42094494143846e-05, + "loss": 2.594, + "step": 10418 + }, + { + "epoch": 0.8408522314583166, + "grad_norm": 0.6823872923851013, + "learning_rate": 9.419368907818473e-05, + "loss": 2.5653, + "step": 10419 + }, + { + "epoch": 0.8409329351948995, + "grad_norm": 0.7403056025505066, + "learning_rate": 9.417792888669325e-05, + "loss": 2.5296, + "step": 10420 + }, + { + "epoch": 0.8410136389314825, + "grad_norm": 0.6858980655670166, + "learning_rate": 9.4162168840303e-05, + "loss": 2.5401, + "step": 10421 + }, + { + "epoch": 0.8410943426680655, + "grad_norm": 0.692348837852478, + "learning_rate": 9.41464089394068e-05, + "loss": 2.4797, + "step": 10422 + }, + { + "epoch": 0.8411750464046486, + "grad_norm": 0.6939836144447327, + "learning_rate": 9.413064918439736e-05, + "loss": 2.505, + "step": 10423 + }, + { + "epoch": 0.8412557501412316, + "grad_norm": 0.7334314584732056, + "learning_rate": 9.411488957566748e-05, + "loss": 2.5792, + "step": 10424 + }, + { + "epoch": 0.8413364538778145, + "grad_norm": 0.6977920532226562, + "learning_rate": 9.409913011360999e-05, + "loss": 2.5204, + "step": 10425 + }, + { + "epoch": 0.8414171576143975, + "grad_norm": 0.7121822834014893, + "learning_rate": 9.408337079861756e-05, + "loss": 2.571, + "step": 10426 + }, + { + "epoch": 0.8414978613509806, + "grad_norm": 0.761476993560791, + "learning_rate": 9.406761163108297e-05, + "loss": 2.5845, + "step": 10427 + }, + { + "epoch": 0.8415785650875636, + "grad_norm": 0.7160221934318542, + "learning_rate": 9.405185261139906e-05, + "loss": 2.5331, + "step": 10428 + }, + { + "epoch": 0.8416592688241465, + "grad_norm": 0.6828827857971191, + "learning_rate": 9.40360937399585e-05, + "loss": 2.5596, + "step": 10429 + }, + { + "epoch": 0.8417399725607295, + "grad_norm": 0.756473183631897, + "learning_rate": 9.402033501715406e-05, + "loss": 2.6107, + "step": 10430 + }, + { + "epoch": 0.8418206762973126, + "grad_norm": 0.7486895322799683, + "learning_rate": 9.400457644337853e-05, + "loss": 2.5388, + "step": 10431 + }, + { + "epoch": 0.8419013800338956, + "grad_norm": 0.7759146690368652, + "learning_rate": 9.398881801902461e-05, + "loss": 2.5559, + "step": 10432 + }, + { + "epoch": 0.8419820837704786, + "grad_norm": 0.71756911277771, + "learning_rate": 9.397305974448506e-05, + "loss": 2.6109, + "step": 10433 + }, + { + "epoch": 0.8420627875070615, + "grad_norm": 0.7741644382476807, + "learning_rate": 9.395730162015261e-05, + "loss": 2.5664, + "step": 10434 + }, + { + "epoch": 0.8421434912436446, + "grad_norm": 0.7155938744544983, + "learning_rate": 9.394154364642006e-05, + "loss": 2.5693, + "step": 10435 + }, + { + "epoch": 0.8422241949802276, + "grad_norm": 0.6862725019454956, + "learning_rate": 9.392578582368002e-05, + "loss": 2.4942, + "step": 10436 + }, + { + "epoch": 0.8423048987168106, + "grad_norm": 0.6698417067527771, + "learning_rate": 9.391002815232528e-05, + "loss": 2.5258, + "step": 10437 + }, + { + "epoch": 0.8423856024533936, + "grad_norm": 0.7756468057632446, + "learning_rate": 9.389427063274858e-05, + "loss": 2.5008, + "step": 10438 + }, + { + "epoch": 0.8424663061899766, + "grad_norm": 0.6579857468605042, + "learning_rate": 9.387851326534259e-05, + "loss": 2.5335, + "step": 10439 + }, + { + "epoch": 0.8425470099265596, + "grad_norm": 0.7673436403274536, + "learning_rate": 9.386275605050006e-05, + "loss": 2.5646, + "step": 10440 + }, + { + "epoch": 0.8426277136631426, + "grad_norm": 0.7377188205718994, + "learning_rate": 9.384699898861372e-05, + "loss": 2.568, + "step": 10441 + }, + { + "epoch": 0.8427084173997256, + "grad_norm": 0.6502123475074768, + "learning_rate": 9.38312420800762e-05, + "loss": 2.6091, + "step": 10442 + }, + { + "epoch": 0.8427891211363087, + "grad_norm": 0.729852020740509, + "learning_rate": 9.381548532528026e-05, + "loss": 2.4873, + "step": 10443 + }, + { + "epoch": 0.8428698248728916, + "grad_norm": 0.7419102191925049, + "learning_rate": 9.379972872461865e-05, + "loss": 2.4966, + "step": 10444 + }, + { + "epoch": 0.8429505286094746, + "grad_norm": 0.6921093463897705, + "learning_rate": 9.378397227848395e-05, + "loss": 2.4895, + "step": 10445 + }, + { + "epoch": 0.8430312323460576, + "grad_norm": 0.7697325944900513, + "learning_rate": 9.376821598726892e-05, + "loss": 2.5779, + "step": 10446 + }, + { + "epoch": 0.8431119360826407, + "grad_norm": 0.6441029906272888, + "learning_rate": 9.375245985136626e-05, + "loss": 2.4909, + "step": 10447 + }, + { + "epoch": 0.8431926398192237, + "grad_norm": 0.6962057948112488, + "learning_rate": 9.373670387116861e-05, + "loss": 2.5602, + "step": 10448 + }, + { + "epoch": 0.8432733435558066, + "grad_norm": 0.7030641436576843, + "learning_rate": 9.372094804706867e-05, + "loss": 2.5641, + "step": 10449 + }, + { + "epoch": 0.8433540472923896, + "grad_norm": 0.6969063878059387, + "learning_rate": 9.370519237945912e-05, + "loss": 2.5555, + "step": 10450 + }, + { + "epoch": 0.8434347510289727, + "grad_norm": 0.7169879674911499, + "learning_rate": 9.368943686873267e-05, + "loss": 2.5258, + "step": 10451 + }, + { + "epoch": 0.8435154547655557, + "grad_norm": 0.7198735475540161, + "learning_rate": 9.36736815152819e-05, + "loss": 2.5192, + "step": 10452 + }, + { + "epoch": 0.8435961585021386, + "grad_norm": 0.6613535284996033, + "learning_rate": 9.365792631949951e-05, + "loss": 2.5596, + "step": 10453 + }, + { + "epoch": 0.8436768622387216, + "grad_norm": 0.6377065777778625, + "learning_rate": 9.364217128177824e-05, + "loss": 2.5518, + "step": 10454 + }, + { + "epoch": 0.8437575659753046, + "grad_norm": 0.6670635938644409, + "learning_rate": 9.362641640251063e-05, + "loss": 2.4793, + "step": 10455 + }, + { + "epoch": 0.8438382697118877, + "grad_norm": 0.6556122899055481, + "learning_rate": 9.361066168208939e-05, + "loss": 2.5492, + "step": 10456 + }, + { + "epoch": 0.8439189734484707, + "grad_norm": 0.7262280583381653, + "learning_rate": 9.35949071209072e-05, + "loss": 2.6059, + "step": 10457 + }, + { + "epoch": 0.8439996771850536, + "grad_norm": 0.702953040599823, + "learning_rate": 9.357915271935662e-05, + "loss": 2.5445, + "step": 10458 + }, + { + "epoch": 0.8440803809216366, + "grad_norm": 0.6619930267333984, + "learning_rate": 9.356339847783036e-05, + "loss": 2.5688, + "step": 10459 + }, + { + "epoch": 0.8441610846582197, + "grad_norm": 0.7038032412528992, + "learning_rate": 9.354764439672106e-05, + "loss": 2.5195, + "step": 10460 + }, + { + "epoch": 0.8442417883948027, + "grad_norm": 0.6615132689476013, + "learning_rate": 9.353189047642129e-05, + "loss": 2.5176, + "step": 10461 + }, + { + "epoch": 0.8443224921313857, + "grad_norm": 0.6524826288223267, + "learning_rate": 9.351613671732372e-05, + "loss": 2.4294, + "step": 10462 + }, + { + "epoch": 0.8444031958679686, + "grad_norm": 0.6526279449462891, + "learning_rate": 9.350038311982099e-05, + "loss": 2.595, + "step": 10463 + }, + { + "epoch": 0.8444838996045517, + "grad_norm": 0.6610859632492065, + "learning_rate": 9.348462968430569e-05, + "loss": 2.5311, + "step": 10464 + }, + { + "epoch": 0.8445646033411347, + "grad_norm": 0.6835470795631409, + "learning_rate": 9.346887641117045e-05, + "loss": 2.5694, + "step": 10465 + }, + { + "epoch": 0.8446453070777177, + "grad_norm": 0.6768551468849182, + "learning_rate": 9.345312330080787e-05, + "loss": 2.6082, + "step": 10466 + }, + { + "epoch": 0.8447260108143007, + "grad_norm": 0.6368672847747803, + "learning_rate": 9.343737035361059e-05, + "loss": 2.5221, + "step": 10467 + }, + { + "epoch": 0.8448067145508837, + "grad_norm": 0.6952844858169556, + "learning_rate": 9.34216175699712e-05, + "loss": 2.5003, + "step": 10468 + }, + { + "epoch": 0.8448874182874667, + "grad_norm": 0.6663931012153625, + "learning_rate": 9.340586495028227e-05, + "loss": 2.5469, + "step": 10469 + }, + { + "epoch": 0.8449681220240497, + "grad_norm": 0.6840688586235046, + "learning_rate": 9.339011249493647e-05, + "loss": 2.5499, + "step": 10470 + }, + { + "epoch": 0.8450488257606327, + "grad_norm": 0.6832869052886963, + "learning_rate": 9.337436020432632e-05, + "loss": 2.5492, + "step": 10471 + }, + { + "epoch": 0.8451295294972158, + "grad_norm": 0.7444044947624207, + "learning_rate": 9.335860807884442e-05, + "loss": 2.5791, + "step": 10472 + }, + { + "epoch": 0.8452102332337987, + "grad_norm": 0.6821839809417725, + "learning_rate": 9.334285611888339e-05, + "loss": 2.4772, + "step": 10473 + }, + { + "epoch": 0.8452909369703817, + "grad_norm": 0.6209141612052917, + "learning_rate": 9.332710432483577e-05, + "loss": 2.5656, + "step": 10474 + }, + { + "epoch": 0.8453716407069647, + "grad_norm": 0.6531212329864502, + "learning_rate": 9.331135269709415e-05, + "loss": 2.5285, + "step": 10475 + }, + { + "epoch": 0.8454523444435478, + "grad_norm": 0.6418079137802124, + "learning_rate": 9.329560123605115e-05, + "loss": 2.5503, + "step": 10476 + }, + { + "epoch": 0.8455330481801308, + "grad_norm": 0.6636360287666321, + "learning_rate": 9.327984994209924e-05, + "loss": 2.528, + "step": 10477 + }, + { + "epoch": 0.8456137519167137, + "grad_norm": 0.6196488738059998, + "learning_rate": 9.326409881563102e-05, + "loss": 2.4907, + "step": 10478 + }, + { + "epoch": 0.8456944556532967, + "grad_norm": 0.6339137554168701, + "learning_rate": 9.324834785703913e-05, + "loss": 2.4672, + "step": 10479 + }, + { + "epoch": 0.8457751593898798, + "grad_norm": 0.6803932189941406, + "learning_rate": 9.323259706671602e-05, + "loss": 2.5538, + "step": 10480 + }, + { + "epoch": 0.8458558631264628, + "grad_norm": 0.6815275549888611, + "learning_rate": 9.321684644505429e-05, + "loss": 2.5291, + "step": 10481 + }, + { + "epoch": 0.8459365668630457, + "grad_norm": 0.6497374773025513, + "learning_rate": 9.320109599244646e-05, + "loss": 2.5499, + "step": 10482 + }, + { + "epoch": 0.8460172705996287, + "grad_norm": 0.7966926097869873, + "learning_rate": 9.318534570928512e-05, + "loss": 2.523, + "step": 10483 + }, + { + "epoch": 0.8460979743362118, + "grad_norm": 0.6532156467437744, + "learning_rate": 9.316959559596276e-05, + "loss": 2.5138, + "step": 10484 + }, + { + "epoch": 0.8461786780727948, + "grad_norm": 0.7292522192001343, + "learning_rate": 9.315384565287193e-05, + "loss": 2.5413, + "step": 10485 + }, + { + "epoch": 0.8462593818093778, + "grad_norm": 0.7610795497894287, + "learning_rate": 9.313809588040519e-05, + "loss": 2.5071, + "step": 10486 + }, + { + "epoch": 0.8463400855459607, + "grad_norm": 0.7038258910179138, + "learning_rate": 9.312234627895502e-05, + "loss": 2.5568, + "step": 10487 + }, + { + "epoch": 0.8464207892825438, + "grad_norm": 0.7136046290397644, + "learning_rate": 9.310659684891395e-05, + "loss": 2.5372, + "step": 10488 + }, + { + "epoch": 0.8465014930191268, + "grad_norm": 0.7512896060943604, + "learning_rate": 9.309084759067452e-05, + "loss": 2.5821, + "step": 10489 + }, + { + "epoch": 0.8465821967557098, + "grad_norm": 0.7436400651931763, + "learning_rate": 9.307509850462922e-05, + "loss": 2.5489, + "step": 10490 + }, + { + "epoch": 0.8466629004922928, + "grad_norm": 0.6858603954315186, + "learning_rate": 9.305934959117056e-05, + "loss": 2.5622, + "step": 10491 + }, + { + "epoch": 0.8467436042288758, + "grad_norm": 0.707185685634613, + "learning_rate": 9.304360085069107e-05, + "loss": 2.5275, + "step": 10492 + }, + { + "epoch": 0.8468243079654588, + "grad_norm": 0.7207933068275452, + "learning_rate": 9.302785228358322e-05, + "loss": 2.5877, + "step": 10493 + }, + { + "epoch": 0.8469050117020418, + "grad_norm": 0.6470080614089966, + "learning_rate": 9.30121038902395e-05, + "loss": 2.5117, + "step": 10494 + }, + { + "epoch": 0.8469857154386248, + "grad_norm": 0.75248783826828, + "learning_rate": 9.299635567105247e-05, + "loss": 2.5259, + "step": 10495 + }, + { + "epoch": 0.8470664191752079, + "grad_norm": 0.7150708436965942, + "learning_rate": 9.298060762641452e-05, + "loss": 2.551, + "step": 10496 + }, + { + "epoch": 0.8471471229117908, + "grad_norm": 0.6865069270133972, + "learning_rate": 9.296485975671818e-05, + "loss": 2.5184, + "step": 10497 + }, + { + "epoch": 0.8472278266483738, + "grad_norm": 0.7188237309455872, + "learning_rate": 9.294911206235593e-05, + "loss": 2.5207, + "step": 10498 + }, + { + "epoch": 0.8473085303849568, + "grad_norm": 0.6907880902290344, + "learning_rate": 9.293336454372026e-05, + "loss": 2.5544, + "step": 10499 + }, + { + "epoch": 0.8473892341215399, + "grad_norm": 0.7626079320907593, + "learning_rate": 9.291761720120358e-05, + "loss": 2.5741, + "step": 10500 + }, + { + "epoch": 0.8474699378581229, + "grad_norm": 0.6731963753700256, + "learning_rate": 9.29018700351984e-05, + "loss": 2.5433, + "step": 10501 + }, + { + "epoch": 0.8475506415947058, + "grad_norm": 0.7256288528442383, + "learning_rate": 9.288612304609723e-05, + "loss": 2.5131, + "step": 10502 + }, + { + "epoch": 0.8476313453312888, + "grad_norm": 0.7129119634628296, + "learning_rate": 9.287037623429242e-05, + "loss": 2.5054, + "step": 10503 + }, + { + "epoch": 0.8477120490678719, + "grad_norm": 0.6711156964302063, + "learning_rate": 9.285462960017644e-05, + "loss": 2.5671, + "step": 10504 + }, + { + "epoch": 0.8477927528044549, + "grad_norm": 0.7268081903457642, + "learning_rate": 9.283888314414184e-05, + "loss": 2.5627, + "step": 10505 + }, + { + "epoch": 0.8478734565410379, + "grad_norm": 0.8635050058364868, + "learning_rate": 9.282313686658094e-05, + "loss": 2.517, + "step": 10506 + }, + { + "epoch": 0.8479541602776208, + "grad_norm": 0.7077138423919678, + "learning_rate": 9.280739076788624e-05, + "loss": 2.5551, + "step": 10507 + }, + { + "epoch": 0.8480348640142038, + "grad_norm": 0.6312204599380493, + "learning_rate": 9.279164484845018e-05, + "loss": 2.5329, + "step": 10508 + }, + { + "epoch": 0.8481155677507869, + "grad_norm": 0.6749829649925232, + "learning_rate": 9.277589910866516e-05, + "loss": 2.5092, + "step": 10509 + }, + { + "epoch": 0.8481962714873699, + "grad_norm": 0.753391683101654, + "learning_rate": 9.27601535489236e-05, + "loss": 2.6244, + "step": 10510 + }, + { + "epoch": 0.8482769752239528, + "grad_norm": 0.7230119109153748, + "learning_rate": 9.2744408169618e-05, + "loss": 2.5021, + "step": 10511 + }, + { + "epoch": 0.8483576789605358, + "grad_norm": 0.6759157776832581, + "learning_rate": 9.272866297114067e-05, + "loss": 2.5399, + "step": 10512 + }, + { + "epoch": 0.8484383826971189, + "grad_norm": 0.7049473524093628, + "learning_rate": 9.271291795388406e-05, + "loss": 2.5024, + "step": 10513 + }, + { + "epoch": 0.8485190864337019, + "grad_norm": 0.6579850912094116, + "learning_rate": 9.269717311824058e-05, + "loss": 2.5019, + "step": 10514 + }, + { + "epoch": 0.8485997901702849, + "grad_norm": 0.7091391086578369, + "learning_rate": 9.268142846460265e-05, + "loss": 2.5785, + "step": 10515 + }, + { + "epoch": 0.8486804939068678, + "grad_norm": 0.6612898707389832, + "learning_rate": 9.266568399336266e-05, + "loss": 2.5046, + "step": 10516 + }, + { + "epoch": 0.8487611976434509, + "grad_norm": 0.6348623633384705, + "learning_rate": 9.264993970491298e-05, + "loss": 2.543, + "step": 10517 + }, + { + "epoch": 0.8488419013800339, + "grad_norm": 0.688360869884491, + "learning_rate": 9.263419559964604e-05, + "loss": 2.5294, + "step": 10518 + }, + { + "epoch": 0.8489226051166169, + "grad_norm": 0.6483190059661865, + "learning_rate": 9.261845167795418e-05, + "loss": 2.5623, + "step": 10519 + }, + { + "epoch": 0.8490033088531999, + "grad_norm": 0.689379096031189, + "learning_rate": 9.26027079402298e-05, + "loss": 2.4871, + "step": 10520 + }, + { + "epoch": 0.8490840125897829, + "grad_norm": 0.6627655625343323, + "learning_rate": 9.25869643868653e-05, + "loss": 2.5353, + "step": 10521 + }, + { + "epoch": 0.8491647163263659, + "grad_norm": 0.6701192259788513, + "learning_rate": 9.2571221018253e-05, + "loss": 2.5003, + "step": 10522 + }, + { + "epoch": 0.8492454200629489, + "grad_norm": 0.7413944005966187, + "learning_rate": 9.255547783478529e-05, + "loss": 2.5473, + "step": 10523 + }, + { + "epoch": 0.8493261237995319, + "grad_norm": 0.6490365266799927, + "learning_rate": 9.253973483685455e-05, + "loss": 2.5168, + "step": 10524 + }, + { + "epoch": 0.849406827536115, + "grad_norm": 0.7303688526153564, + "learning_rate": 9.25239920248531e-05, + "loss": 2.5953, + "step": 10525 + }, + { + "epoch": 0.8494875312726979, + "grad_norm": 0.7132991552352905, + "learning_rate": 9.250824939917331e-05, + "loss": 2.475, + "step": 10526 + }, + { + "epoch": 0.8495682350092809, + "grad_norm": 0.6935676336288452, + "learning_rate": 9.249250696020753e-05, + "loss": 2.5212, + "step": 10527 + }, + { + "epoch": 0.8496489387458639, + "grad_norm": 0.732961118221283, + "learning_rate": 9.247676470834814e-05, + "loss": 2.5848, + "step": 10528 + }, + { + "epoch": 0.849729642482447, + "grad_norm": 0.6899160146713257, + "learning_rate": 9.246102264398739e-05, + "loss": 2.4551, + "step": 10529 + }, + { + "epoch": 0.84981034621903, + "grad_norm": 0.6941123604774475, + "learning_rate": 9.244528076751766e-05, + "loss": 2.5441, + "step": 10530 + }, + { + "epoch": 0.8498910499556129, + "grad_norm": 0.7351016998291016, + "learning_rate": 9.242953907933134e-05, + "loss": 2.6519, + "step": 10531 + }, + { + "epoch": 0.8499717536921959, + "grad_norm": 0.7156691551208496, + "learning_rate": 9.241379757982065e-05, + "loss": 2.573, + "step": 10532 + }, + { + "epoch": 0.850052457428779, + "grad_norm": 0.7137688994407654, + "learning_rate": 9.239805626937797e-05, + "loss": 2.5688, + "step": 10533 + }, + { + "epoch": 0.850133161165362, + "grad_norm": 0.7018687129020691, + "learning_rate": 9.238231514839559e-05, + "loss": 2.5725, + "step": 10534 + }, + { + "epoch": 0.850213864901945, + "grad_norm": 0.6723659634590149, + "learning_rate": 9.236657421726583e-05, + "loss": 2.5661, + "step": 10535 + }, + { + "epoch": 0.8502945686385279, + "grad_norm": 0.7105850577354431, + "learning_rate": 9.235083347638098e-05, + "loss": 2.5676, + "step": 10536 + }, + { + "epoch": 0.850375272375111, + "grad_norm": 0.682601809501648, + "learning_rate": 9.233509292613341e-05, + "loss": 2.5489, + "step": 10537 + }, + { + "epoch": 0.850455976111694, + "grad_norm": 0.6703988313674927, + "learning_rate": 9.231935256691531e-05, + "loss": 2.5349, + "step": 10538 + }, + { + "epoch": 0.850536679848277, + "grad_norm": 0.6430882215499878, + "learning_rate": 9.230361239911903e-05, + "loss": 2.4959, + "step": 10539 + }, + { + "epoch": 0.8506173835848599, + "grad_norm": 0.7164519429206848, + "learning_rate": 9.228787242313687e-05, + "loss": 2.4999, + "step": 10540 + }, + { + "epoch": 0.850698087321443, + "grad_norm": 0.7463028430938721, + "learning_rate": 9.227213263936107e-05, + "loss": 2.545, + "step": 10541 + }, + { + "epoch": 0.850778791058026, + "grad_norm": 0.650577187538147, + "learning_rate": 9.22563930481839e-05, + "loss": 2.5707, + "step": 10542 + }, + { + "epoch": 0.850859494794609, + "grad_norm": 0.6808211207389832, + "learning_rate": 9.224065364999768e-05, + "loss": 2.5236, + "step": 10543 + }, + { + "epoch": 0.850940198531192, + "grad_norm": 0.6947758793830872, + "learning_rate": 9.222491444519467e-05, + "loss": 2.555, + "step": 10544 + }, + { + "epoch": 0.851020902267775, + "grad_norm": 0.6805624961853027, + "learning_rate": 9.22091754341671e-05, + "loss": 2.517, + "step": 10545 + }, + { + "epoch": 0.851101606004358, + "grad_norm": 0.6645655035972595, + "learning_rate": 9.219343661730724e-05, + "loss": 2.5237, + "step": 10546 + }, + { + "epoch": 0.851182309740941, + "grad_norm": 0.6912586092948914, + "learning_rate": 9.217769799500738e-05, + "loss": 2.5345, + "step": 10547 + }, + { + "epoch": 0.851263013477524, + "grad_norm": 0.6713781356811523, + "learning_rate": 9.21619595676597e-05, + "loss": 2.56, + "step": 10548 + }, + { + "epoch": 0.8513437172141071, + "grad_norm": 0.7031502723693848, + "learning_rate": 9.214622133565648e-05, + "loss": 2.4885, + "step": 10549 + }, + { + "epoch": 0.85142442095069, + "grad_norm": 0.6616455316543579, + "learning_rate": 9.213048329938997e-05, + "loss": 2.5101, + "step": 10550 + }, + { + "epoch": 0.851505124687273, + "grad_norm": 0.711077094078064, + "learning_rate": 9.211474545925236e-05, + "loss": 2.6264, + "step": 10551 + }, + { + "epoch": 0.851585828423856, + "grad_norm": 0.7534502744674683, + "learning_rate": 9.209900781563592e-05, + "loss": 2.5417, + "step": 10552 + }, + { + "epoch": 0.8516665321604391, + "grad_norm": 0.7405222058296204, + "learning_rate": 9.208327036893288e-05, + "loss": 2.546, + "step": 10553 + }, + { + "epoch": 0.8517472358970221, + "grad_norm": 0.7014057040214539, + "learning_rate": 9.20675331195354e-05, + "loss": 2.5211, + "step": 10554 + }, + { + "epoch": 0.851827939633605, + "grad_norm": 0.6984074115753174, + "learning_rate": 9.205179606783573e-05, + "loss": 2.5181, + "step": 10555 + }, + { + "epoch": 0.851908643370188, + "grad_norm": 0.7312670350074768, + "learning_rate": 9.203605921422613e-05, + "loss": 2.5345, + "step": 10556 + }, + { + "epoch": 0.851989347106771, + "grad_norm": 0.6861104369163513, + "learning_rate": 9.202032255909871e-05, + "loss": 2.5426, + "step": 10557 + }, + { + "epoch": 0.8520700508433541, + "grad_norm": 0.6989030838012695, + "learning_rate": 9.200458610284571e-05, + "loss": 2.5221, + "step": 10558 + }, + { + "epoch": 0.852150754579937, + "grad_norm": 0.6645115613937378, + "learning_rate": 9.198884984585932e-05, + "loss": 2.4755, + "step": 10559 + }, + { + "epoch": 0.85223145831652, + "grad_norm": 0.6577785015106201, + "learning_rate": 9.197311378853176e-05, + "loss": 2.5491, + "step": 10560 + }, + { + "epoch": 0.852312162053103, + "grad_norm": 0.7311568856239319, + "learning_rate": 9.195737793125517e-05, + "loss": 2.5653, + "step": 10561 + }, + { + "epoch": 0.8523928657896861, + "grad_norm": 0.6469970345497131, + "learning_rate": 9.194164227442174e-05, + "loss": 2.5384, + "step": 10562 + }, + { + "epoch": 0.8524735695262691, + "grad_norm": 0.6562933325767517, + "learning_rate": 9.19259068184237e-05, + "loss": 2.5644, + "step": 10563 + }, + { + "epoch": 0.852554273262852, + "grad_norm": 0.7740273475646973, + "learning_rate": 9.19101715636531e-05, + "loss": 2.5868, + "step": 10564 + }, + { + "epoch": 0.852634976999435, + "grad_norm": 0.6461195349693298, + "learning_rate": 9.18944365105022e-05, + "loss": 2.4862, + "step": 10565 + }, + { + "epoch": 0.8527156807360181, + "grad_norm": 0.7230537533760071, + "learning_rate": 9.187870165936313e-05, + "loss": 2.5125, + "step": 10566 + }, + { + "epoch": 0.8527963844726011, + "grad_norm": 0.6858233213424683, + "learning_rate": 9.186296701062805e-05, + "loss": 2.5463, + "step": 10567 + }, + { + "epoch": 0.8528770882091841, + "grad_norm": 0.717407763004303, + "learning_rate": 9.184723256468908e-05, + "loss": 2.5399, + "step": 10568 + }, + { + "epoch": 0.852957791945767, + "grad_norm": 0.7537745237350464, + "learning_rate": 9.18314983219384e-05, + "loss": 2.5164, + "step": 10569 + }, + { + "epoch": 0.8530384956823501, + "grad_norm": 0.7068665027618408, + "learning_rate": 9.181576428276814e-05, + "loss": 2.5747, + "step": 10570 + }, + { + "epoch": 0.8531191994189331, + "grad_norm": 0.8013456463813782, + "learning_rate": 9.18000304475704e-05, + "loss": 2.5401, + "step": 10571 + }, + { + "epoch": 0.8531999031555161, + "grad_norm": 0.6458969712257385, + "learning_rate": 9.178429681673741e-05, + "loss": 2.4781, + "step": 10572 + }, + { + "epoch": 0.8532806068920991, + "grad_norm": 0.7235112190246582, + "learning_rate": 9.176856339066114e-05, + "loss": 2.5753, + "step": 10573 + }, + { + "epoch": 0.8533613106286821, + "grad_norm": 0.6815706491470337, + "learning_rate": 9.175283016973382e-05, + "loss": 2.5526, + "step": 10574 + }, + { + "epoch": 0.8534420143652651, + "grad_norm": 0.739747166633606, + "learning_rate": 9.173709715434751e-05, + "loss": 2.5631, + "step": 10575 + }, + { + "epoch": 0.8535227181018481, + "grad_norm": 0.7325060963630676, + "learning_rate": 9.172136434489437e-05, + "loss": 2.4925, + "step": 10576 + }, + { + "epoch": 0.8536034218384311, + "grad_norm": 0.6505454182624817, + "learning_rate": 9.170563174176645e-05, + "loss": 2.5423, + "step": 10577 + }, + { + "epoch": 0.8536841255750142, + "grad_norm": 0.7267098426818848, + "learning_rate": 9.168989934535586e-05, + "loss": 2.5687, + "step": 10578 + }, + { + "epoch": 0.8537648293115971, + "grad_norm": 0.7264497876167297, + "learning_rate": 9.167416715605476e-05, + "loss": 2.5165, + "step": 10579 + }, + { + "epoch": 0.8538455330481801, + "grad_norm": 0.7473852634429932, + "learning_rate": 9.165843517425509e-05, + "loss": 2.5837, + "step": 10580 + }, + { + "epoch": 0.8539262367847631, + "grad_norm": 0.7249133586883545, + "learning_rate": 9.164270340034906e-05, + "loss": 2.5805, + "step": 10581 + }, + { + "epoch": 0.8540069405213462, + "grad_norm": 0.7463760375976562, + "learning_rate": 9.162697183472875e-05, + "loss": 2.5067, + "step": 10582 + }, + { + "epoch": 0.8540876442579292, + "grad_norm": 0.7125511169433594, + "learning_rate": 9.161124047778614e-05, + "loss": 2.5093, + "step": 10583 + }, + { + "epoch": 0.8541683479945121, + "grad_norm": 0.7247455716133118, + "learning_rate": 9.159550932991335e-05, + "loss": 2.5356, + "step": 10584 + }, + { + "epoch": 0.8542490517310951, + "grad_norm": 0.7593860030174255, + "learning_rate": 9.157977839150246e-05, + "loss": 2.5477, + "step": 10585 + }, + { + "epoch": 0.8543297554676782, + "grad_norm": 0.6758295297622681, + "learning_rate": 9.156404766294547e-05, + "loss": 2.4748, + "step": 10586 + }, + { + "epoch": 0.8544104592042612, + "grad_norm": 0.7114073634147644, + "learning_rate": 9.154831714463447e-05, + "loss": 2.5479, + "step": 10587 + }, + { + "epoch": 0.8544911629408442, + "grad_norm": 0.6881263256072998, + "learning_rate": 9.153258683696156e-05, + "loss": 2.5471, + "step": 10588 + }, + { + "epoch": 0.8545718666774271, + "grad_norm": 0.6509317755699158, + "learning_rate": 9.151685674031866e-05, + "loss": 2.5239, + "step": 10589 + }, + { + "epoch": 0.8546525704140102, + "grad_norm": 0.7754644751548767, + "learning_rate": 9.150112685509787e-05, + "loss": 2.5572, + "step": 10590 + }, + { + "epoch": 0.8547332741505932, + "grad_norm": 0.707080602645874, + "learning_rate": 9.148539718169118e-05, + "loss": 2.5572, + "step": 10591 + }, + { + "epoch": 0.8548139778871762, + "grad_norm": 0.6996685266494751, + "learning_rate": 9.146966772049073e-05, + "loss": 2.4968, + "step": 10592 + }, + { + "epoch": 0.8548946816237591, + "grad_norm": 0.6830589771270752, + "learning_rate": 9.145393847188841e-05, + "loss": 2.5795, + "step": 10593 + }, + { + "epoch": 0.8549753853603422, + "grad_norm": 0.7507784366607666, + "learning_rate": 9.143820943627628e-05, + "loss": 2.6135, + "step": 10594 + }, + { + "epoch": 0.8550560890969252, + "grad_norm": 0.673218309879303, + "learning_rate": 9.142248061404638e-05, + "loss": 2.5875, + "step": 10595 + }, + { + "epoch": 0.8551367928335082, + "grad_norm": 0.6861804723739624, + "learning_rate": 9.140675200559065e-05, + "loss": 2.5892, + "step": 10596 + }, + { + "epoch": 0.8552174965700912, + "grad_norm": 0.6928709149360657, + "learning_rate": 9.139102361130114e-05, + "loss": 2.5303, + "step": 10597 + }, + { + "epoch": 0.8552982003066743, + "grad_norm": 0.6958343386650085, + "learning_rate": 9.137529543156986e-05, + "loss": 2.5567, + "step": 10598 + }, + { + "epoch": 0.8553789040432572, + "grad_norm": 0.703845739364624, + "learning_rate": 9.135956746678873e-05, + "loss": 2.5215, + "step": 10599 + }, + { + "epoch": 0.8554596077798402, + "grad_norm": 0.7108649015426636, + "learning_rate": 9.134383971734975e-05, + "loss": 2.5687, + "step": 10600 + }, + { + "epoch": 0.8555403115164232, + "grad_norm": 0.7249850034713745, + "learning_rate": 9.132811218364495e-05, + "loss": 2.565, + "step": 10601 + }, + { + "epoch": 0.8556210152530063, + "grad_norm": 0.7060014009475708, + "learning_rate": 9.131238486606623e-05, + "loss": 2.5366, + "step": 10602 + }, + { + "epoch": 0.8557017189895892, + "grad_norm": 0.6915088891983032, + "learning_rate": 9.129665776500559e-05, + "loss": 2.527, + "step": 10603 + }, + { + "epoch": 0.8557824227261722, + "grad_norm": 0.7226938605308533, + "learning_rate": 9.128093088085503e-05, + "loss": 2.5999, + "step": 10604 + }, + { + "epoch": 0.8558631264627552, + "grad_norm": 0.6802428364753723, + "learning_rate": 9.126520421400641e-05, + "loss": 2.4788, + "step": 10605 + }, + { + "epoch": 0.8559438301993383, + "grad_norm": 0.7855350375175476, + "learning_rate": 9.124947776485175e-05, + "loss": 2.5349, + "step": 10606 + }, + { + "epoch": 0.8560245339359213, + "grad_norm": 0.6758337020874023, + "learning_rate": 9.123375153378296e-05, + "loss": 2.5874, + "step": 10607 + }, + { + "epoch": 0.8561052376725042, + "grad_norm": 0.675061821937561, + "learning_rate": 9.121802552119206e-05, + "loss": 2.5343, + "step": 10608 + }, + { + "epoch": 0.8561859414090872, + "grad_norm": 0.7044726014137268, + "learning_rate": 9.120229972747087e-05, + "loss": 2.5361, + "step": 10609 + }, + { + "epoch": 0.8562666451456702, + "grad_norm": 0.6324402689933777, + "learning_rate": 9.118657415301137e-05, + "loss": 2.5039, + "step": 10610 + }, + { + "epoch": 0.8563473488822533, + "grad_norm": 0.6621509790420532, + "learning_rate": 9.11708487982055e-05, + "loss": 2.5346, + "step": 10611 + }, + { + "epoch": 0.8564280526188363, + "grad_norm": 0.6709887981414795, + "learning_rate": 9.115512366344516e-05, + "loss": 2.5409, + "step": 10612 + }, + { + "epoch": 0.8565087563554192, + "grad_norm": 0.7237712740898132, + "learning_rate": 9.113939874912223e-05, + "loss": 2.5051, + "step": 10613 + }, + { + "epoch": 0.8565894600920022, + "grad_norm": 0.6646109223365784, + "learning_rate": 9.11236740556287e-05, + "loss": 2.5866, + "step": 10614 + }, + { + "epoch": 0.8566701638285853, + "grad_norm": 0.7131930589675903, + "learning_rate": 9.110794958335637e-05, + "loss": 2.5472, + "step": 10615 + }, + { + "epoch": 0.8567508675651683, + "grad_norm": 0.6662428975105286, + "learning_rate": 9.109222533269715e-05, + "loss": 2.4863, + "step": 10616 + }, + { + "epoch": 0.8568315713017512, + "grad_norm": 0.6527226567268372, + "learning_rate": 9.107650130404304e-05, + "loss": 2.5594, + "step": 10617 + }, + { + "epoch": 0.8569122750383342, + "grad_norm": 0.6639060378074646, + "learning_rate": 9.106077749778578e-05, + "loss": 2.5519, + "step": 10618 + }, + { + "epoch": 0.8569929787749173, + "grad_norm": 0.7088096737861633, + "learning_rate": 9.104505391431734e-05, + "loss": 2.5404, + "step": 10619 + }, + { + "epoch": 0.8570736825115003, + "grad_norm": 0.7155873775482178, + "learning_rate": 9.102933055402957e-05, + "loss": 2.5636, + "step": 10620 + }, + { + "epoch": 0.8571543862480833, + "grad_norm": 0.6522316932678223, + "learning_rate": 9.101360741731431e-05, + "loss": 2.5216, + "step": 10621 + }, + { + "epoch": 0.8572350899846662, + "grad_norm": 0.6515649557113647, + "learning_rate": 9.099788450456345e-05, + "loss": 2.5804, + "step": 10622 + }, + { + "epoch": 0.8573157937212493, + "grad_norm": 0.6791853904724121, + "learning_rate": 9.098216181616883e-05, + "loss": 2.5353, + "step": 10623 + }, + { + "epoch": 0.8573964974578323, + "grad_norm": 0.6946877241134644, + "learning_rate": 9.096643935252236e-05, + "loss": 2.5492, + "step": 10624 + }, + { + "epoch": 0.8574772011944153, + "grad_norm": 0.7235898375511169, + "learning_rate": 9.095071711401581e-05, + "loss": 2.5178, + "step": 10625 + }, + { + "epoch": 0.8575579049309983, + "grad_norm": 0.6740610003471375, + "learning_rate": 9.093499510104102e-05, + "loss": 2.5699, + "step": 10626 + }, + { + "epoch": 0.8576386086675813, + "grad_norm": 0.7441792488098145, + "learning_rate": 9.091927331398988e-05, + "loss": 2.579, + "step": 10627 + }, + { + "epoch": 0.8577193124041643, + "grad_norm": 0.6986937522888184, + "learning_rate": 9.090355175325416e-05, + "loss": 2.5556, + "step": 10628 + }, + { + "epoch": 0.8578000161407473, + "grad_norm": 0.6960151791572571, + "learning_rate": 9.08878304192257e-05, + "loss": 2.5448, + "step": 10629 + }, + { + "epoch": 0.8578807198773303, + "grad_norm": 0.6376819014549255, + "learning_rate": 9.087210931229636e-05, + "loss": 2.4636, + "step": 10630 + }, + { + "epoch": 0.8579614236139134, + "grad_norm": 0.752473771572113, + "learning_rate": 9.08563884328579e-05, + "loss": 2.5451, + "step": 10631 + }, + { + "epoch": 0.8580421273504963, + "grad_norm": 0.6879361867904663, + "learning_rate": 9.084066778130213e-05, + "loss": 2.5365, + "step": 10632 + }, + { + "epoch": 0.8581228310870793, + "grad_norm": 0.6630483865737915, + "learning_rate": 9.082494735802091e-05, + "loss": 2.5085, + "step": 10633 + }, + { + "epoch": 0.8582035348236623, + "grad_norm": 0.689602792263031, + "learning_rate": 9.080922716340594e-05, + "loss": 2.5087, + "step": 10634 + }, + { + "epoch": 0.8582842385602454, + "grad_norm": 0.7333599925041199, + "learning_rate": 9.079350719784905e-05, + "loss": 2.5476, + "step": 10635 + }, + { + "epoch": 0.8583649422968284, + "grad_norm": 0.6895802021026611, + "learning_rate": 9.077778746174204e-05, + "loss": 2.5099, + "step": 10636 + }, + { + "epoch": 0.8584456460334113, + "grad_norm": 0.7202162146568298, + "learning_rate": 9.076206795547668e-05, + "loss": 2.5197, + "step": 10637 + }, + { + "epoch": 0.8585263497699943, + "grad_norm": 0.6454200148582458, + "learning_rate": 9.074634867944472e-05, + "loss": 2.5303, + "step": 10638 + }, + { + "epoch": 0.8586070535065774, + "grad_norm": 0.6842506527900696, + "learning_rate": 9.073062963403795e-05, + "loss": 2.5051, + "step": 10639 + }, + { + "epoch": 0.8586877572431604, + "grad_norm": 0.6979129314422607, + "learning_rate": 9.071491081964815e-05, + "loss": 2.5209, + "step": 10640 + }, + { + "epoch": 0.8587684609797434, + "grad_norm": 0.6851540803909302, + "learning_rate": 9.0699192236667e-05, + "loss": 2.5003, + "step": 10641 + }, + { + "epoch": 0.8588491647163263, + "grad_norm": 0.7528585195541382, + "learning_rate": 9.068347388548627e-05, + "loss": 2.5524, + "step": 10642 + }, + { + "epoch": 0.8589298684529094, + "grad_norm": 0.6297397613525391, + "learning_rate": 9.06677557664978e-05, + "loss": 2.5412, + "step": 10643 + }, + { + "epoch": 0.8590105721894924, + "grad_norm": 0.7034026980400085, + "learning_rate": 9.06520378800932e-05, + "loss": 2.4958, + "step": 10644 + }, + { + "epoch": 0.8590912759260754, + "grad_norm": 0.690258800983429, + "learning_rate": 9.063632022666425e-05, + "loss": 2.4894, + "step": 10645 + }, + { + "epoch": 0.8591719796626583, + "grad_norm": 0.6449949145317078, + "learning_rate": 9.06206028066027e-05, + "loss": 2.507, + "step": 10646 + }, + { + "epoch": 0.8592526833992414, + "grad_norm": 0.6328588724136353, + "learning_rate": 9.060488562030023e-05, + "loss": 2.5503, + "step": 10647 + }, + { + "epoch": 0.8593333871358244, + "grad_norm": 0.6570547819137573, + "learning_rate": 9.058916866814858e-05, + "loss": 2.4993, + "step": 10648 + }, + { + "epoch": 0.8594140908724074, + "grad_norm": 0.7689602375030518, + "learning_rate": 9.057345195053945e-05, + "loss": 2.5498, + "step": 10649 + }, + { + "epoch": 0.8594947946089904, + "grad_norm": 0.6727081537246704, + "learning_rate": 9.055773546786454e-05, + "loss": 2.5172, + "step": 10650 + }, + { + "epoch": 0.8595754983455735, + "grad_norm": 0.694722056388855, + "learning_rate": 9.054201922051552e-05, + "loss": 2.5485, + "step": 10651 + }, + { + "epoch": 0.8596562020821564, + "grad_norm": 0.6638815999031067, + "learning_rate": 9.052630320888411e-05, + "loss": 2.5134, + "step": 10652 + }, + { + "epoch": 0.8597369058187394, + "grad_norm": 0.6600833535194397, + "learning_rate": 9.0510587433362e-05, + "loss": 2.5206, + "step": 10653 + }, + { + "epoch": 0.8598176095553224, + "grad_norm": 0.7193894386291504, + "learning_rate": 9.049487189434084e-05, + "loss": 2.5485, + "step": 10654 + }, + { + "epoch": 0.8598983132919055, + "grad_norm": 0.6651753187179565, + "learning_rate": 9.047915659221233e-05, + "loss": 2.5703, + "step": 10655 + }, + { + "epoch": 0.8599790170284884, + "grad_norm": 0.7346364855766296, + "learning_rate": 9.046344152736815e-05, + "loss": 2.5301, + "step": 10656 + }, + { + "epoch": 0.8600597207650714, + "grad_norm": 0.6681811809539795, + "learning_rate": 9.04477267001999e-05, + "loss": 2.5124, + "step": 10657 + }, + { + "epoch": 0.8601404245016544, + "grad_norm": 0.6928461790084839, + "learning_rate": 9.043201211109929e-05, + "loss": 2.5153, + "step": 10658 + }, + { + "epoch": 0.8602211282382374, + "grad_norm": 0.6957700252532959, + "learning_rate": 9.041629776045797e-05, + "loss": 2.4697, + "step": 10659 + }, + { + "epoch": 0.8603018319748205, + "grad_norm": 0.6361939311027527, + "learning_rate": 9.040058364866752e-05, + "loss": 2.5162, + "step": 10660 + }, + { + "epoch": 0.8603825357114034, + "grad_norm": 0.6827390193939209, + "learning_rate": 9.038486977611964e-05, + "loss": 2.4856, + "step": 10661 + }, + { + "epoch": 0.8604632394479864, + "grad_norm": 0.6638801097869873, + "learning_rate": 9.036915614320595e-05, + "loss": 2.5224, + "step": 10662 + }, + { + "epoch": 0.8605439431845694, + "grad_norm": 0.7249652743339539, + "learning_rate": 9.035344275031802e-05, + "loss": 2.5461, + "step": 10663 + }, + { + "epoch": 0.8606246469211525, + "grad_norm": 0.6693316102027893, + "learning_rate": 9.033772959784754e-05, + "loss": 2.5676, + "step": 10664 + }, + { + "epoch": 0.8607053506577355, + "grad_norm": 0.6787340641021729, + "learning_rate": 9.032201668618614e-05, + "loss": 2.5374, + "step": 10665 + }, + { + "epoch": 0.8607860543943184, + "grad_norm": 0.6581670641899109, + "learning_rate": 9.030630401572533e-05, + "loss": 2.5052, + "step": 10666 + }, + { + "epoch": 0.8608667581309014, + "grad_norm": 0.6975873112678528, + "learning_rate": 9.029059158685675e-05, + "loss": 2.4823, + "step": 10667 + }, + { + "epoch": 0.8609474618674845, + "grad_norm": 0.6632521748542786, + "learning_rate": 9.027487939997201e-05, + "loss": 2.5992, + "step": 10668 + }, + { + "epoch": 0.8610281656040675, + "grad_norm": 0.6793977618217468, + "learning_rate": 9.025916745546276e-05, + "loss": 2.5308, + "step": 10669 + }, + { + "epoch": 0.8611088693406505, + "grad_norm": 0.6499481797218323, + "learning_rate": 9.024345575372046e-05, + "loss": 2.4964, + "step": 10670 + }, + { + "epoch": 0.8611895730772334, + "grad_norm": 0.6858868598937988, + "learning_rate": 9.022774429513677e-05, + "loss": 2.5388, + "step": 10671 + }, + { + "epoch": 0.8612702768138165, + "grad_norm": 0.7586160898208618, + "learning_rate": 9.021203308010324e-05, + "loss": 2.5166, + "step": 10672 + }, + { + "epoch": 0.8613509805503995, + "grad_norm": 0.7179701328277588, + "learning_rate": 9.019632210901141e-05, + "loss": 2.5501, + "step": 10673 + }, + { + "epoch": 0.8614316842869825, + "grad_norm": 0.6830369830131531, + "learning_rate": 9.018061138225287e-05, + "loss": 2.4956, + "step": 10674 + }, + { + "epoch": 0.8615123880235654, + "grad_norm": 0.6710512042045593, + "learning_rate": 9.01649009002192e-05, + "loss": 2.5722, + "step": 10675 + }, + { + "epoch": 0.8615930917601485, + "grad_norm": 0.640011727809906, + "learning_rate": 9.014919066330186e-05, + "loss": 2.5197, + "step": 10676 + }, + { + "epoch": 0.8616737954967315, + "grad_norm": 0.6803860664367676, + "learning_rate": 9.013348067189245e-05, + "loss": 2.4794, + "step": 10677 + }, + { + "epoch": 0.8617544992333145, + "grad_norm": 0.6734865307807922, + "learning_rate": 9.011777092638251e-05, + "loss": 2.5831, + "step": 10678 + }, + { + "epoch": 0.8618352029698975, + "grad_norm": 0.6525718569755554, + "learning_rate": 9.010206142716353e-05, + "loss": 2.4925, + "step": 10679 + }, + { + "epoch": 0.8619159067064806, + "grad_norm": 0.6886672377586365, + "learning_rate": 9.008635217462706e-05, + "loss": 2.491, + "step": 10680 + }, + { + "epoch": 0.8619966104430635, + "grad_norm": 0.6397131085395813, + "learning_rate": 9.007064316916461e-05, + "loss": 2.4684, + "step": 10681 + }, + { + "epoch": 0.8620773141796465, + "grad_norm": 0.6308462023735046, + "learning_rate": 9.005493441116768e-05, + "loss": 2.504, + "step": 10682 + }, + { + "epoch": 0.8621580179162295, + "grad_norm": 0.7223808169364929, + "learning_rate": 9.003922590102778e-05, + "loss": 2.5342, + "step": 10683 + }, + { + "epoch": 0.8622387216528126, + "grad_norm": 0.687515914440155, + "learning_rate": 9.002351763913642e-05, + "loss": 2.4822, + "step": 10684 + }, + { + "epoch": 0.8623194253893955, + "grad_norm": 0.6888468265533447, + "learning_rate": 9.00078096258851e-05, + "loss": 2.5497, + "step": 10685 + }, + { + "epoch": 0.8624001291259785, + "grad_norm": 0.7429301738739014, + "learning_rate": 8.999210186166525e-05, + "loss": 2.624, + "step": 10686 + }, + { + "epoch": 0.8624808328625615, + "grad_norm": 0.6901945471763611, + "learning_rate": 8.997639434686839e-05, + "loss": 2.5268, + "step": 10687 + }, + { + "epoch": 0.8625615365991446, + "grad_norm": 0.7396681308746338, + "learning_rate": 8.9960687081886e-05, + "loss": 2.5427, + "step": 10688 + }, + { + "epoch": 0.8626422403357276, + "grad_norm": 0.6825531125068665, + "learning_rate": 8.99449800671095e-05, + "loss": 2.5722, + "step": 10689 + }, + { + "epoch": 0.8627229440723105, + "grad_norm": 0.6719860434532166, + "learning_rate": 8.992927330293039e-05, + "loss": 2.4939, + "step": 10690 + }, + { + "epoch": 0.8628036478088935, + "grad_norm": 0.644567608833313, + "learning_rate": 8.991356678974017e-05, + "loss": 2.5495, + "step": 10691 + }, + { + "epoch": 0.8628843515454766, + "grad_norm": 0.7066643834114075, + "learning_rate": 8.989786052793015e-05, + "loss": 2.5508, + "step": 10692 + }, + { + "epoch": 0.8629650552820596, + "grad_norm": 0.6697196364402771, + "learning_rate": 8.988215451789187e-05, + "loss": 2.5231, + "step": 10693 + }, + { + "epoch": 0.8630457590186426, + "grad_norm": 0.7143658399581909, + "learning_rate": 8.986644876001681e-05, + "loss": 2.5368, + "step": 10694 + }, + { + "epoch": 0.8631264627552255, + "grad_norm": 0.7597684264183044, + "learning_rate": 8.985074325469628e-05, + "loss": 2.5983, + "step": 10695 + }, + { + "epoch": 0.8632071664918086, + "grad_norm": 0.7418014407157898, + "learning_rate": 8.983503800232176e-05, + "loss": 2.5736, + "step": 10696 + }, + { + "epoch": 0.8632878702283916, + "grad_norm": 0.654435932636261, + "learning_rate": 8.981933300328468e-05, + "loss": 2.5389, + "step": 10697 + }, + { + "epoch": 0.8633685739649746, + "grad_norm": 0.658203661441803, + "learning_rate": 8.980362825797643e-05, + "loss": 2.5204, + "step": 10698 + }, + { + "epoch": 0.8634492777015575, + "grad_norm": 0.7132784724235535, + "learning_rate": 8.97879237667884e-05, + "loss": 2.4982, + "step": 10699 + }, + { + "epoch": 0.8635299814381406, + "grad_norm": 0.6901868581771851, + "learning_rate": 8.9772219530112e-05, + "loss": 2.5599, + "step": 10700 + }, + { + "epoch": 0.8636106851747236, + "grad_norm": 0.6241179704666138, + "learning_rate": 8.975651554833869e-05, + "loss": 2.5185, + "step": 10701 + }, + { + "epoch": 0.8636913889113066, + "grad_norm": 0.693692147731781, + "learning_rate": 8.974081182185974e-05, + "loss": 2.506, + "step": 10702 + }, + { + "epoch": 0.8637720926478896, + "grad_norm": 0.6699246168136597, + "learning_rate": 8.972510835106658e-05, + "loss": 2.557, + "step": 10703 + }, + { + "epoch": 0.8638527963844727, + "grad_norm": 0.7339062094688416, + "learning_rate": 8.970940513635059e-05, + "loss": 2.5614, + "step": 10704 + }, + { + "epoch": 0.8639335001210556, + "grad_norm": 0.7558815479278564, + "learning_rate": 8.969370217810311e-05, + "loss": 2.5949, + "step": 10705 + }, + { + "epoch": 0.8640142038576386, + "grad_norm": 0.6992602348327637, + "learning_rate": 8.96779994767155e-05, + "loss": 2.4755, + "step": 10706 + }, + { + "epoch": 0.8640949075942216, + "grad_norm": 0.6836397647857666, + "learning_rate": 8.966229703257915e-05, + "loss": 2.5172, + "step": 10707 + }, + { + "epoch": 0.8641756113308047, + "grad_norm": 0.7054563760757446, + "learning_rate": 8.964659484608537e-05, + "loss": 2.5186, + "step": 10708 + }, + { + "epoch": 0.8642563150673876, + "grad_norm": 0.7096611261367798, + "learning_rate": 8.963089291762551e-05, + "loss": 2.5157, + "step": 10709 + }, + { + "epoch": 0.8643370188039706, + "grad_norm": 0.657465934753418, + "learning_rate": 8.961519124759094e-05, + "loss": 2.5332, + "step": 10710 + }, + { + "epoch": 0.8644177225405536, + "grad_norm": 0.7490121126174927, + "learning_rate": 8.959948983637291e-05, + "loss": 2.512, + "step": 10711 + }, + { + "epoch": 0.8644984262771366, + "grad_norm": 0.7074166536331177, + "learning_rate": 8.958378868436279e-05, + "loss": 2.4745, + "step": 10712 + }, + { + "epoch": 0.8645791300137197, + "grad_norm": 0.7496227025985718, + "learning_rate": 8.956808779195188e-05, + "loss": 2.5533, + "step": 10713 + }, + { + "epoch": 0.8646598337503026, + "grad_norm": 0.6624657511711121, + "learning_rate": 8.95523871595315e-05, + "loss": 2.5346, + "step": 10714 + }, + { + "epoch": 0.8647405374868856, + "grad_norm": 0.6829125881195068, + "learning_rate": 8.953668678749292e-05, + "loss": 2.558, + "step": 10715 + }, + { + "epoch": 0.8648212412234686, + "grad_norm": 0.6954498887062073, + "learning_rate": 8.952098667622745e-05, + "loss": 2.5617, + "step": 10716 + }, + { + "epoch": 0.8649019449600517, + "grad_norm": 0.6722636818885803, + "learning_rate": 8.950528682612645e-05, + "loss": 2.5565, + "step": 10717 + }, + { + "epoch": 0.8649826486966347, + "grad_norm": 0.6793767213821411, + "learning_rate": 8.948958723758107e-05, + "loss": 2.5803, + "step": 10718 + }, + { + "epoch": 0.8650633524332176, + "grad_norm": 0.7159373760223389, + "learning_rate": 8.947388791098266e-05, + "loss": 2.5465, + "step": 10719 + }, + { + "epoch": 0.8651440561698006, + "grad_norm": 0.6823835372924805, + "learning_rate": 8.945818884672253e-05, + "loss": 2.5079, + "step": 10720 + }, + { + "epoch": 0.8652247599063837, + "grad_norm": 0.7521452903747559, + "learning_rate": 8.944249004519185e-05, + "loss": 2.5628, + "step": 10721 + }, + { + "epoch": 0.8653054636429667, + "grad_norm": 0.6774886846542358, + "learning_rate": 8.94267915067819e-05, + "loss": 2.6042, + "step": 10722 + }, + { + "epoch": 0.8653861673795497, + "grad_norm": 0.6915935277938843, + "learning_rate": 8.941109323188398e-05, + "loss": 2.5563, + "step": 10723 + }, + { + "epoch": 0.8654668711161326, + "grad_norm": 0.6609061360359192, + "learning_rate": 8.939539522088927e-05, + "loss": 2.5083, + "step": 10724 + }, + { + "epoch": 0.8655475748527157, + "grad_norm": 0.6457223892211914, + "learning_rate": 8.937969747418903e-05, + "loss": 2.573, + "step": 10725 + }, + { + "epoch": 0.8656282785892987, + "grad_norm": 0.6960360407829285, + "learning_rate": 8.936399999217455e-05, + "loss": 2.516, + "step": 10726 + }, + { + "epoch": 0.8657089823258817, + "grad_norm": 0.7269721627235413, + "learning_rate": 8.934830277523693e-05, + "loss": 2.5932, + "step": 10727 + }, + { + "epoch": 0.8657896860624646, + "grad_norm": 0.7057532668113708, + "learning_rate": 8.933260582376745e-05, + "loss": 2.5022, + "step": 10728 + }, + { + "epoch": 0.8658703897990477, + "grad_norm": 0.6698749661445618, + "learning_rate": 8.931690913815735e-05, + "loss": 2.5357, + "step": 10729 + }, + { + "epoch": 0.8659510935356307, + "grad_norm": 0.6616599559783936, + "learning_rate": 8.930121271879777e-05, + "loss": 2.4776, + "step": 10730 + }, + { + "epoch": 0.8660317972722137, + "grad_norm": 0.7457093000411987, + "learning_rate": 8.928551656607993e-05, + "loss": 2.5799, + "step": 10731 + }, + { + "epoch": 0.8661125010087967, + "grad_norm": 0.7199469804763794, + "learning_rate": 8.926982068039505e-05, + "loss": 2.5278, + "step": 10732 + }, + { + "epoch": 0.8661932047453798, + "grad_norm": 0.7579182386398315, + "learning_rate": 8.925412506213428e-05, + "loss": 2.5227, + "step": 10733 + }, + { + "epoch": 0.8662739084819627, + "grad_norm": 0.687455952167511, + "learning_rate": 8.92384297116888e-05, + "loss": 2.5099, + "step": 10734 + }, + { + "epoch": 0.8663546122185457, + "grad_norm": 0.7616521120071411, + "learning_rate": 8.922273462944978e-05, + "loss": 2.598, + "step": 10735 + }, + { + "epoch": 0.8664353159551287, + "grad_norm": 0.6730697751045227, + "learning_rate": 8.920703981580842e-05, + "loss": 2.5517, + "step": 10736 + }, + { + "epoch": 0.8665160196917118, + "grad_norm": 0.6769895553588867, + "learning_rate": 8.91913452711558e-05, + "loss": 2.5535, + "step": 10737 + }, + { + "epoch": 0.8665967234282947, + "grad_norm": 0.6284549832344055, + "learning_rate": 8.917565099588312e-05, + "loss": 2.4597, + "step": 10738 + }, + { + "epoch": 0.8666774271648777, + "grad_norm": 0.6900805830955505, + "learning_rate": 8.915995699038152e-05, + "loss": 2.5236, + "step": 10739 + }, + { + "epoch": 0.8667581309014607, + "grad_norm": 0.6842896938323975, + "learning_rate": 8.914426325504211e-05, + "loss": 2.5199, + "step": 10740 + }, + { + "epoch": 0.8668388346380438, + "grad_norm": 0.6637243628501892, + "learning_rate": 8.912856979025604e-05, + "loss": 2.5368, + "step": 10741 + }, + { + "epoch": 0.8669195383746268, + "grad_norm": 0.7474464178085327, + "learning_rate": 8.911287659641449e-05, + "loss": 2.4902, + "step": 10742 + }, + { + "epoch": 0.8670002421112097, + "grad_norm": 0.6977849006652832, + "learning_rate": 8.909718367390843e-05, + "loss": 2.5034, + "step": 10743 + }, + { + "epoch": 0.8670809458477927, + "grad_norm": 0.6968807578086853, + "learning_rate": 8.908149102312907e-05, + "loss": 2.5396, + "step": 10744 + }, + { + "epoch": 0.8671616495843758, + "grad_norm": 0.6656209230422974, + "learning_rate": 8.906579864446755e-05, + "loss": 2.5702, + "step": 10745 + }, + { + "epoch": 0.8672423533209588, + "grad_norm": 0.7079079151153564, + "learning_rate": 8.905010653831486e-05, + "loss": 2.5344, + "step": 10746 + }, + { + "epoch": 0.8673230570575418, + "grad_norm": 0.7423387765884399, + "learning_rate": 8.903441470506214e-05, + "loss": 2.5635, + "step": 10747 + }, + { + "epoch": 0.8674037607941247, + "grad_norm": 0.6607224941253662, + "learning_rate": 8.901872314510046e-05, + "loss": 2.54, + "step": 10748 + }, + { + "epoch": 0.8674844645307078, + "grad_norm": 0.6646947860717773, + "learning_rate": 8.900303185882095e-05, + "loss": 2.4661, + "step": 10749 + }, + { + "epoch": 0.8675651682672908, + "grad_norm": 0.6943496465682983, + "learning_rate": 8.89873408466146e-05, + "loss": 2.5213, + "step": 10750 + }, + { + "epoch": 0.8676458720038738, + "grad_norm": 0.7048123478889465, + "learning_rate": 8.89716501088725e-05, + "loss": 2.5529, + "step": 10751 + }, + { + "epoch": 0.8677265757404568, + "grad_norm": 0.654617428779602, + "learning_rate": 8.895595964598574e-05, + "loss": 2.5535, + "step": 10752 + }, + { + "epoch": 0.8678072794770398, + "grad_norm": 0.672063410282135, + "learning_rate": 8.894026945834531e-05, + "loss": 2.5279, + "step": 10753 + }, + { + "epoch": 0.8678879832136228, + "grad_norm": 0.7134148478507996, + "learning_rate": 8.892457954634225e-05, + "loss": 2.5403, + "step": 10754 + }, + { + "epoch": 0.8679686869502058, + "grad_norm": 0.6457598805427551, + "learning_rate": 8.890888991036768e-05, + "loss": 2.515, + "step": 10755 + }, + { + "epoch": 0.8680493906867888, + "grad_norm": 0.6725220084190369, + "learning_rate": 8.889320055081252e-05, + "loss": 2.4829, + "step": 10756 + }, + { + "epoch": 0.8681300944233719, + "grad_norm": 0.6425862312316895, + "learning_rate": 8.887751146806785e-05, + "loss": 2.4965, + "step": 10757 + }, + { + "epoch": 0.8682107981599548, + "grad_norm": 0.6654682755470276, + "learning_rate": 8.886182266252468e-05, + "loss": 2.48, + "step": 10758 + }, + { + "epoch": 0.8682915018965378, + "grad_norm": 0.7102493643760681, + "learning_rate": 8.884613413457398e-05, + "loss": 2.5415, + "step": 10759 + }, + { + "epoch": 0.8683722056331208, + "grad_norm": 0.6996567249298096, + "learning_rate": 8.883044588460677e-05, + "loss": 2.542, + "step": 10760 + }, + { + "epoch": 0.8684529093697038, + "grad_norm": 0.7011905312538147, + "learning_rate": 8.881475791301405e-05, + "loss": 2.5391, + "step": 10761 + }, + { + "epoch": 0.8685336131062869, + "grad_norm": 0.6508356928825378, + "learning_rate": 8.879907022018686e-05, + "loss": 2.4892, + "step": 10762 + }, + { + "epoch": 0.8686143168428698, + "grad_norm": 0.7104009985923767, + "learning_rate": 8.878338280651605e-05, + "loss": 2.5152, + "step": 10763 + }, + { + "epoch": 0.8686950205794528, + "grad_norm": 0.6501138210296631, + "learning_rate": 8.876769567239268e-05, + "loss": 2.5767, + "step": 10764 + }, + { + "epoch": 0.8687757243160358, + "grad_norm": 0.6463173031806946, + "learning_rate": 8.875200881820771e-05, + "loss": 2.4758, + "step": 10765 + }, + { + "epoch": 0.8688564280526189, + "grad_norm": 0.6494991779327393, + "learning_rate": 8.873632224435206e-05, + "loss": 2.5364, + "step": 10766 + }, + { + "epoch": 0.8689371317892018, + "grad_norm": 0.6926043033599854, + "learning_rate": 8.872063595121671e-05, + "loss": 2.5288, + "step": 10767 + }, + { + "epoch": 0.8690178355257848, + "grad_norm": 0.7076035737991333, + "learning_rate": 8.870494993919261e-05, + "loss": 2.5118, + "step": 10768 + }, + { + "epoch": 0.8690985392623678, + "grad_norm": 0.6456892490386963, + "learning_rate": 8.868926420867068e-05, + "loss": 2.4957, + "step": 10769 + }, + { + "epoch": 0.8691792429989509, + "grad_norm": 0.6585200428962708, + "learning_rate": 8.867357876004183e-05, + "loss": 2.5049, + "step": 10770 + }, + { + "epoch": 0.8692599467355339, + "grad_norm": 0.6893252730369568, + "learning_rate": 8.865789359369706e-05, + "loss": 2.4808, + "step": 10771 + }, + { + "epoch": 0.8693406504721168, + "grad_norm": 0.6700639724731445, + "learning_rate": 8.864220871002719e-05, + "loss": 2.5475, + "step": 10772 + }, + { + "epoch": 0.8694213542086998, + "grad_norm": 0.6551913619041443, + "learning_rate": 8.862652410942315e-05, + "loss": 2.5063, + "step": 10773 + }, + { + "epoch": 0.8695020579452829, + "grad_norm": 0.6870427131652832, + "learning_rate": 8.86108397922759e-05, + "loss": 2.5785, + "step": 10774 + }, + { + "epoch": 0.8695827616818659, + "grad_norm": 0.6489934325218201, + "learning_rate": 8.859515575897626e-05, + "loss": 2.5584, + "step": 10775 + }, + { + "epoch": 0.8696634654184489, + "grad_norm": 0.6726663112640381, + "learning_rate": 8.857947200991517e-05, + "loss": 2.5707, + "step": 10776 + }, + { + "epoch": 0.8697441691550318, + "grad_norm": 0.7696183323860168, + "learning_rate": 8.856378854548347e-05, + "loss": 2.501, + "step": 10777 + }, + { + "epoch": 0.8698248728916149, + "grad_norm": 0.7002642154693604, + "learning_rate": 8.854810536607212e-05, + "loss": 2.5792, + "step": 10778 + }, + { + "epoch": 0.8699055766281979, + "grad_norm": 0.6429435610771179, + "learning_rate": 8.853242247207185e-05, + "loss": 2.5463, + "step": 10779 + }, + { + "epoch": 0.8699862803647809, + "grad_norm": 0.7006216645240784, + "learning_rate": 8.851673986387358e-05, + "loss": 2.5698, + "step": 10780 + }, + { + "epoch": 0.8700669841013638, + "grad_norm": 0.7053292989730835, + "learning_rate": 8.850105754186824e-05, + "loss": 2.5468, + "step": 10781 + }, + { + "epoch": 0.8701476878379469, + "grad_norm": 0.6592122912406921, + "learning_rate": 8.848537550644654e-05, + "loss": 2.5271, + "step": 10782 + }, + { + "epoch": 0.8702283915745299, + "grad_norm": 0.679132342338562, + "learning_rate": 8.846969375799941e-05, + "loss": 2.5281, + "step": 10783 + }, + { + "epoch": 0.8703090953111129, + "grad_norm": 0.6868568062782288, + "learning_rate": 8.845401229691765e-05, + "loss": 2.5415, + "step": 10784 + }, + { + "epoch": 0.8703897990476959, + "grad_norm": 0.7060674428939819, + "learning_rate": 8.843833112359208e-05, + "loss": 2.5649, + "step": 10785 + }, + { + "epoch": 0.870470502784279, + "grad_norm": 0.6663981676101685, + "learning_rate": 8.842265023841352e-05, + "loss": 2.5055, + "step": 10786 + }, + { + "epoch": 0.8705512065208619, + "grad_norm": 0.7095218896865845, + "learning_rate": 8.840696964177282e-05, + "loss": 2.5442, + "step": 10787 + }, + { + "epoch": 0.8706319102574449, + "grad_norm": 0.6884104013442993, + "learning_rate": 8.839128933406069e-05, + "loss": 2.5285, + "step": 10788 + }, + { + "epoch": 0.8707126139940279, + "grad_norm": 0.6427462697029114, + "learning_rate": 8.837560931566798e-05, + "loss": 2.5197, + "step": 10789 + }, + { + "epoch": 0.870793317730611, + "grad_norm": 0.6870493292808533, + "learning_rate": 8.835992958698548e-05, + "loss": 2.4937, + "step": 10790 + }, + { + "epoch": 0.870874021467194, + "grad_norm": 0.7006319761276245, + "learning_rate": 8.834425014840398e-05, + "loss": 2.5148, + "step": 10791 + }, + { + "epoch": 0.8709547252037769, + "grad_norm": 0.690601646900177, + "learning_rate": 8.83285710003142e-05, + "loss": 2.5454, + "step": 10792 + }, + { + "epoch": 0.8710354289403599, + "grad_norm": 0.7205955982208252, + "learning_rate": 8.831289214310695e-05, + "loss": 2.5221, + "step": 10793 + }, + { + "epoch": 0.871116132676943, + "grad_norm": 0.7134295105934143, + "learning_rate": 8.8297213577173e-05, + "loss": 2.5626, + "step": 10794 + }, + { + "epoch": 0.871196836413526, + "grad_norm": 0.6560496091842651, + "learning_rate": 8.828153530290307e-05, + "loss": 2.5408, + "step": 10795 + }, + { + "epoch": 0.8712775401501089, + "grad_norm": 0.7055882215499878, + "learning_rate": 8.82658573206879e-05, + "loss": 2.5173, + "step": 10796 + }, + { + "epoch": 0.8713582438866919, + "grad_norm": 0.6751883029937744, + "learning_rate": 8.825017963091827e-05, + "loss": 2.5378, + "step": 10797 + }, + { + "epoch": 0.871438947623275, + "grad_norm": 0.6794824600219727, + "learning_rate": 8.823450223398485e-05, + "loss": 2.592, + "step": 10798 + }, + { + "epoch": 0.871519651359858, + "grad_norm": 0.675729513168335, + "learning_rate": 8.821882513027838e-05, + "loss": 2.5253, + "step": 10799 + }, + { + "epoch": 0.871600355096441, + "grad_norm": 0.7185894250869751, + "learning_rate": 8.820314832018962e-05, + "loss": 2.5073, + "step": 10800 + }, + { + "epoch": 0.8716810588330239, + "grad_norm": 0.6605187654495239, + "learning_rate": 8.818747180410921e-05, + "loss": 2.5141, + "step": 10801 + }, + { + "epoch": 0.871761762569607, + "grad_norm": 0.6955205798149109, + "learning_rate": 8.817179558242788e-05, + "loss": 2.5313, + "step": 10802 + }, + { + "epoch": 0.87184246630619, + "grad_norm": 0.6307928562164307, + "learning_rate": 8.815611965553638e-05, + "loss": 2.4975, + "step": 10803 + }, + { + "epoch": 0.871923170042773, + "grad_norm": 0.7283728122711182, + "learning_rate": 8.814044402382527e-05, + "loss": 2.4623, + "step": 10804 + }, + { + "epoch": 0.872003873779356, + "grad_norm": 0.7019702792167664, + "learning_rate": 8.81247686876853e-05, + "loss": 2.4755, + "step": 10805 + }, + { + "epoch": 0.872084577515939, + "grad_norm": 0.6769137382507324, + "learning_rate": 8.81090936475072e-05, + "loss": 2.59, + "step": 10806 + }, + { + "epoch": 0.872165281252522, + "grad_norm": 0.6185588836669922, + "learning_rate": 8.80934189036815e-05, + "loss": 2.5308, + "step": 10807 + }, + { + "epoch": 0.872245984989105, + "grad_norm": 0.7127000689506531, + "learning_rate": 8.807774445659894e-05, + "loss": 2.5301, + "step": 10808 + }, + { + "epoch": 0.872326688725688, + "grad_norm": 0.7039114236831665, + "learning_rate": 8.806207030665016e-05, + "loss": 2.5176, + "step": 10809 + }, + { + "epoch": 0.8724073924622711, + "grad_norm": 0.6763370633125305, + "learning_rate": 8.804639645422582e-05, + "loss": 2.5324, + "step": 10810 + }, + { + "epoch": 0.872488096198854, + "grad_norm": 0.7546409368515015, + "learning_rate": 8.803072289971648e-05, + "loss": 2.5446, + "step": 10811 + }, + { + "epoch": 0.872568799935437, + "grad_norm": 0.6916004419326782, + "learning_rate": 8.801504964351284e-05, + "loss": 2.5056, + "step": 10812 + }, + { + "epoch": 0.87264950367202, + "grad_norm": 0.7108416557312012, + "learning_rate": 8.799937668600552e-05, + "loss": 2.5966, + "step": 10813 + }, + { + "epoch": 0.872730207408603, + "grad_norm": 0.7146576046943665, + "learning_rate": 8.798370402758506e-05, + "loss": 2.5152, + "step": 10814 + }, + { + "epoch": 0.872810911145186, + "grad_norm": 0.6708142757415771, + "learning_rate": 8.796803166864211e-05, + "loss": 2.5248, + "step": 10815 + }, + { + "epoch": 0.872891614881769, + "grad_norm": 0.6687600612640381, + "learning_rate": 8.795235960956729e-05, + "loss": 2.4451, + "step": 10816 + }, + { + "epoch": 0.872972318618352, + "grad_norm": 0.724012553691864, + "learning_rate": 8.793668785075114e-05, + "loss": 2.4816, + "step": 10817 + }, + { + "epoch": 0.873053022354935, + "grad_norm": 0.6938769221305847, + "learning_rate": 8.792101639258426e-05, + "loss": 2.5435, + "step": 10818 + }, + { + "epoch": 0.8731337260915181, + "grad_norm": 0.7066235542297363, + "learning_rate": 8.790534523545724e-05, + "loss": 2.5167, + "step": 10819 + }, + { + "epoch": 0.873214429828101, + "grad_norm": 0.7129037380218506, + "learning_rate": 8.788967437976062e-05, + "loss": 2.5079, + "step": 10820 + }, + { + "epoch": 0.873295133564684, + "grad_norm": 0.6949728727340698, + "learning_rate": 8.787400382588497e-05, + "loss": 2.5564, + "step": 10821 + }, + { + "epoch": 0.873375837301267, + "grad_norm": 0.7924233675003052, + "learning_rate": 8.785833357422088e-05, + "loss": 2.5748, + "step": 10822 + }, + { + "epoch": 0.8734565410378501, + "grad_norm": 0.7486331462860107, + "learning_rate": 8.784266362515882e-05, + "loss": 2.565, + "step": 10823 + }, + { + "epoch": 0.8735372447744331, + "grad_norm": 0.7036460638046265, + "learning_rate": 8.782699397908935e-05, + "loss": 2.5101, + "step": 10824 + }, + { + "epoch": 0.873617948511016, + "grad_norm": 0.6691471338272095, + "learning_rate": 8.781132463640302e-05, + "loss": 2.5262, + "step": 10825 + }, + { + "epoch": 0.873698652247599, + "grad_norm": 0.6836682558059692, + "learning_rate": 8.779565559749037e-05, + "loss": 2.5651, + "step": 10826 + }, + { + "epoch": 0.8737793559841821, + "grad_norm": 0.6634507775306702, + "learning_rate": 8.777998686274185e-05, + "loss": 2.5383, + "step": 10827 + }, + { + "epoch": 0.8738600597207651, + "grad_norm": 0.6903105974197388, + "learning_rate": 8.7764318432548e-05, + "loss": 2.5659, + "step": 10828 + }, + { + "epoch": 0.8739407634573481, + "grad_norm": 0.737859308719635, + "learning_rate": 8.774865030729937e-05, + "loss": 2.5859, + "step": 10829 + }, + { + "epoch": 0.874021467193931, + "grad_norm": 0.696843683719635, + "learning_rate": 8.773298248738633e-05, + "loss": 2.5244, + "step": 10830 + }, + { + "epoch": 0.8741021709305141, + "grad_norm": 0.7342235445976257, + "learning_rate": 8.771731497319946e-05, + "loss": 2.5073, + "step": 10831 + }, + { + "epoch": 0.8741828746670971, + "grad_norm": 0.6676939725875854, + "learning_rate": 8.770164776512926e-05, + "loss": 2.5408, + "step": 10832 + }, + { + "epoch": 0.8742635784036801, + "grad_norm": 0.6957886219024658, + "learning_rate": 8.768598086356608e-05, + "loss": 2.5566, + "step": 10833 + }, + { + "epoch": 0.874344282140263, + "grad_norm": 0.6938990950584412, + "learning_rate": 8.767031426890046e-05, + "loss": 2.517, + "step": 10834 + }, + { + "epoch": 0.8744249858768461, + "grad_norm": 0.8387169241905212, + "learning_rate": 8.765464798152286e-05, + "loss": 2.5507, + "step": 10835 + }, + { + "epoch": 0.8745056896134291, + "grad_norm": 0.6396276354789734, + "learning_rate": 8.763898200182368e-05, + "loss": 2.5063, + "step": 10836 + }, + { + "epoch": 0.8745863933500121, + "grad_norm": 0.7122719883918762, + "learning_rate": 8.762331633019339e-05, + "loss": 2.5816, + "step": 10837 + }, + { + "epoch": 0.8746670970865951, + "grad_norm": 0.6807141304016113, + "learning_rate": 8.760765096702244e-05, + "loss": 2.6004, + "step": 10838 + }, + { + "epoch": 0.8747478008231782, + "grad_norm": 0.6764848232269287, + "learning_rate": 8.759198591270117e-05, + "loss": 2.5303, + "step": 10839 + }, + { + "epoch": 0.8748285045597611, + "grad_norm": 0.718515932559967, + "learning_rate": 8.757632116762006e-05, + "loss": 2.5088, + "step": 10840 + }, + { + "epoch": 0.8749092082963441, + "grad_norm": 0.7084362506866455, + "learning_rate": 8.75606567321695e-05, + "loss": 2.5496, + "step": 10841 + }, + { + "epoch": 0.8749899120329271, + "grad_norm": 0.7191734910011292, + "learning_rate": 8.754499260673991e-05, + "loss": 2.5525, + "step": 10842 + }, + { + "epoch": 0.8750706157695102, + "grad_norm": 0.7167977094650269, + "learning_rate": 8.752932879172164e-05, + "loss": 2.5479, + "step": 10843 + }, + { + "epoch": 0.8751513195060932, + "grad_norm": 0.6994979381561279, + "learning_rate": 8.751366528750511e-05, + "loss": 2.4942, + "step": 10844 + }, + { + "epoch": 0.8752320232426761, + "grad_norm": 0.7192725539207458, + "learning_rate": 8.749800209448068e-05, + "loss": 2.5233, + "step": 10845 + }, + { + "epoch": 0.8753127269792591, + "grad_norm": 0.7728807330131531, + "learning_rate": 8.748233921303871e-05, + "loss": 2.5698, + "step": 10846 + }, + { + "epoch": 0.8753934307158422, + "grad_norm": 0.7305434942245483, + "learning_rate": 8.746667664356956e-05, + "loss": 2.5096, + "step": 10847 + }, + { + "epoch": 0.8754741344524252, + "grad_norm": 0.7117629051208496, + "learning_rate": 8.745101438646365e-05, + "loss": 2.5272, + "step": 10848 + }, + { + "epoch": 0.8755548381890081, + "grad_norm": 0.7180361151695251, + "learning_rate": 8.743535244211121e-05, + "loss": 2.4718, + "step": 10849 + }, + { + "epoch": 0.8756355419255911, + "grad_norm": 0.6419457793235779, + "learning_rate": 8.741969081090263e-05, + "loss": 2.5407, + "step": 10850 + }, + { + "epoch": 0.8757162456621742, + "grad_norm": 0.7928328514099121, + "learning_rate": 8.740402949322827e-05, + "loss": 2.488, + "step": 10851 + }, + { + "epoch": 0.8757969493987572, + "grad_norm": 0.7449139952659607, + "learning_rate": 8.738836848947839e-05, + "loss": 2.5943, + "step": 10852 + }, + { + "epoch": 0.8758776531353402, + "grad_norm": 0.7919576168060303, + "learning_rate": 8.737270780004334e-05, + "loss": 2.5556, + "step": 10853 + }, + { + "epoch": 0.8759583568719231, + "grad_norm": 0.6867526769638062, + "learning_rate": 8.735704742531346e-05, + "loss": 2.5395, + "step": 10854 + }, + { + "epoch": 0.8760390606085062, + "grad_norm": 0.7195394039154053, + "learning_rate": 8.734138736567896e-05, + "loss": 2.4404, + "step": 10855 + }, + { + "epoch": 0.8761197643450892, + "grad_norm": 0.68385910987854, + "learning_rate": 8.732572762153016e-05, + "loss": 2.502, + "step": 10856 + }, + { + "epoch": 0.8762004680816722, + "grad_norm": 0.6957393884658813, + "learning_rate": 8.731006819325739e-05, + "loss": 2.5788, + "step": 10857 + }, + { + "epoch": 0.8762811718182552, + "grad_norm": 0.6973037123680115, + "learning_rate": 8.729440908125092e-05, + "loss": 2.4927, + "step": 10858 + }, + { + "epoch": 0.8763618755548382, + "grad_norm": 0.6535985469818115, + "learning_rate": 8.727875028590095e-05, + "loss": 2.596, + "step": 10859 + }, + { + "epoch": 0.8764425792914212, + "grad_norm": 0.7447848320007324, + "learning_rate": 8.726309180759777e-05, + "loss": 2.5825, + "step": 10860 + }, + { + "epoch": 0.8765232830280042, + "grad_norm": 0.7155942320823669, + "learning_rate": 8.724743364673168e-05, + "loss": 2.5105, + "step": 10861 + }, + { + "epoch": 0.8766039867645872, + "grad_norm": 0.6664694547653198, + "learning_rate": 8.723177580369285e-05, + "loss": 2.5244, + "step": 10862 + }, + { + "epoch": 0.8766846905011701, + "grad_norm": 0.7437852025032043, + "learning_rate": 8.721611827887153e-05, + "loss": 2.534, + "step": 10863 + }, + { + "epoch": 0.8767653942377532, + "grad_norm": 0.6752577424049377, + "learning_rate": 8.7200461072658e-05, + "loss": 2.5025, + "step": 10864 + }, + { + "epoch": 0.8768460979743362, + "grad_norm": 0.7420764565467834, + "learning_rate": 8.718480418544241e-05, + "loss": 2.5261, + "step": 10865 + }, + { + "epoch": 0.8769268017109192, + "grad_norm": 0.669384777545929, + "learning_rate": 8.7169147617615e-05, + "loss": 2.5258, + "step": 10866 + }, + { + "epoch": 0.8770075054475022, + "grad_norm": 0.6649587750434875, + "learning_rate": 8.715349136956599e-05, + "loss": 2.5308, + "step": 10867 + }, + { + "epoch": 0.8770882091840853, + "grad_norm": 0.728922426700592, + "learning_rate": 8.713783544168552e-05, + "loss": 2.5251, + "step": 10868 + }, + { + "epoch": 0.8771689129206682, + "grad_norm": 0.6957671642303467, + "learning_rate": 8.712217983436384e-05, + "loss": 2.5818, + "step": 10869 + }, + { + "epoch": 0.8772496166572512, + "grad_norm": 0.6796830892562866, + "learning_rate": 8.710652454799108e-05, + "loss": 2.5122, + "step": 10870 + }, + { + "epoch": 0.8773303203938342, + "grad_norm": 0.7230980396270752, + "learning_rate": 8.709086958295746e-05, + "loss": 2.5836, + "step": 10871 + }, + { + "epoch": 0.8774110241304173, + "grad_norm": 0.6992264986038208, + "learning_rate": 8.707521493965309e-05, + "loss": 2.5907, + "step": 10872 + }, + { + "epoch": 0.8774917278670002, + "grad_norm": 0.7066535353660583, + "learning_rate": 8.705956061846816e-05, + "loss": 2.5508, + "step": 10873 + }, + { + "epoch": 0.8775724316035832, + "grad_norm": 0.6559327244758606, + "learning_rate": 8.704390661979283e-05, + "loss": 2.611, + "step": 10874 + }, + { + "epoch": 0.8776531353401662, + "grad_norm": 0.6673287749290466, + "learning_rate": 8.70282529440172e-05, + "loss": 2.5778, + "step": 10875 + }, + { + "epoch": 0.8777338390767493, + "grad_norm": 0.6715971231460571, + "learning_rate": 8.701259959153139e-05, + "loss": 2.5342, + "step": 10876 + }, + { + "epoch": 0.8778145428133323, + "grad_norm": 0.7456488609313965, + "learning_rate": 8.699694656272557e-05, + "loss": 2.5365, + "step": 10877 + }, + { + "epoch": 0.8778952465499152, + "grad_norm": 0.6658159494400024, + "learning_rate": 8.698129385798983e-05, + "loss": 2.4387, + "step": 10878 + }, + { + "epoch": 0.8779759502864982, + "grad_norm": 0.6653816103935242, + "learning_rate": 8.696564147771427e-05, + "loss": 2.5791, + "step": 10879 + }, + { + "epoch": 0.8780566540230813, + "grad_norm": 0.6763200163841248, + "learning_rate": 8.694998942228902e-05, + "loss": 2.5356, + "step": 10880 + }, + { + "epoch": 0.8781373577596643, + "grad_norm": 0.6534504890441895, + "learning_rate": 8.69343376921041e-05, + "loss": 2.5358, + "step": 10881 + }, + { + "epoch": 0.8782180614962473, + "grad_norm": 0.6341667771339417, + "learning_rate": 8.691868628754967e-05, + "loss": 2.4927, + "step": 10882 + }, + { + "epoch": 0.8782987652328302, + "grad_norm": 0.6215559244155884, + "learning_rate": 8.690303520901579e-05, + "loss": 2.4312, + "step": 10883 + }, + { + "epoch": 0.8783794689694133, + "grad_norm": 0.6705841422080994, + "learning_rate": 8.688738445689248e-05, + "loss": 2.4778, + "step": 10884 + }, + { + "epoch": 0.8784601727059963, + "grad_norm": 0.680275559425354, + "learning_rate": 8.687173403156982e-05, + "loss": 2.5577, + "step": 10885 + }, + { + "epoch": 0.8785408764425793, + "grad_norm": 0.6918728351593018, + "learning_rate": 8.685608393343789e-05, + "loss": 2.5212, + "step": 10886 + }, + { + "epoch": 0.8786215801791623, + "grad_norm": 0.623636782169342, + "learning_rate": 8.68404341628867e-05, + "loss": 2.5131, + "step": 10887 + }, + { + "epoch": 0.8787022839157453, + "grad_norm": 0.7200562357902527, + "learning_rate": 8.682478472030628e-05, + "loss": 2.5517, + "step": 10888 + }, + { + "epoch": 0.8787829876523283, + "grad_norm": 0.6902644634246826, + "learning_rate": 8.680913560608666e-05, + "loss": 2.511, + "step": 10889 + }, + { + "epoch": 0.8788636913889113, + "grad_norm": 0.6855802536010742, + "learning_rate": 8.679348682061792e-05, + "loss": 2.5169, + "step": 10890 + }, + { + "epoch": 0.8789443951254943, + "grad_norm": 0.7229284048080444, + "learning_rate": 8.677783836428995e-05, + "loss": 2.5634, + "step": 10891 + }, + { + "epoch": 0.8790250988620774, + "grad_norm": 0.6350376605987549, + "learning_rate": 8.676219023749281e-05, + "loss": 2.443, + "step": 10892 + }, + { + "epoch": 0.8791058025986603, + "grad_norm": 0.6884307265281677, + "learning_rate": 8.674654244061653e-05, + "loss": 2.524, + "step": 10893 + }, + { + "epoch": 0.8791865063352433, + "grad_norm": 0.6571067571640015, + "learning_rate": 8.673089497405102e-05, + "loss": 2.5322, + "step": 10894 + }, + { + "epoch": 0.8792672100718263, + "grad_norm": 0.7078021764755249, + "learning_rate": 8.67152478381863e-05, + "loss": 2.5317, + "step": 10895 + }, + { + "epoch": 0.8793479138084094, + "grad_norm": 0.6809059381484985, + "learning_rate": 8.669960103341236e-05, + "loss": 2.5767, + "step": 10896 + }, + { + "epoch": 0.8794286175449924, + "grad_norm": 0.7399441003799438, + "learning_rate": 8.66839545601191e-05, + "loss": 2.5194, + "step": 10897 + }, + { + "epoch": 0.8795093212815753, + "grad_norm": 0.6762270927429199, + "learning_rate": 8.66683084186965e-05, + "loss": 2.5306, + "step": 10898 + }, + { + "epoch": 0.8795900250181583, + "grad_norm": 0.7394620776176453, + "learning_rate": 8.665266260953455e-05, + "loss": 2.4516, + "step": 10899 + }, + { + "epoch": 0.8796707287547414, + "grad_norm": 0.6775416135787964, + "learning_rate": 8.663701713302309e-05, + "loss": 2.5574, + "step": 10900 + }, + { + "epoch": 0.8797514324913244, + "grad_norm": 0.7630520462989807, + "learning_rate": 8.66213719895521e-05, + "loss": 2.5516, + "step": 10901 + }, + { + "epoch": 0.8798321362279073, + "grad_norm": 0.6555768847465515, + "learning_rate": 8.660572717951149e-05, + "loss": 2.5267, + "step": 10902 + }, + { + "epoch": 0.8799128399644903, + "grad_norm": 0.6899500489234924, + "learning_rate": 8.659008270329119e-05, + "loss": 2.4938, + "step": 10903 + }, + { + "epoch": 0.8799935437010734, + "grad_norm": 0.6939221024513245, + "learning_rate": 8.657443856128107e-05, + "loss": 2.5358, + "step": 10904 + }, + { + "epoch": 0.8800742474376564, + "grad_norm": 0.6454630494117737, + "learning_rate": 8.655879475387102e-05, + "loss": 2.5528, + "step": 10905 + }, + { + "epoch": 0.8801549511742394, + "grad_norm": 0.7142425775527954, + "learning_rate": 8.654315128145099e-05, + "loss": 2.5668, + "step": 10906 + }, + { + "epoch": 0.8802356549108223, + "grad_norm": 0.7512764930725098, + "learning_rate": 8.652750814441075e-05, + "loss": 2.5224, + "step": 10907 + }, + { + "epoch": 0.8803163586474054, + "grad_norm": 0.6599575877189636, + "learning_rate": 8.651186534314026e-05, + "loss": 2.5363, + "step": 10908 + }, + { + "epoch": 0.8803970623839884, + "grad_norm": 0.6787410974502563, + "learning_rate": 8.649622287802935e-05, + "loss": 2.4587, + "step": 10909 + }, + { + "epoch": 0.8804777661205714, + "grad_norm": 0.7124783396720886, + "learning_rate": 8.648058074946786e-05, + "loss": 2.5842, + "step": 10910 + }, + { + "epoch": 0.8805584698571544, + "grad_norm": 0.6698839664459229, + "learning_rate": 8.646493895784562e-05, + "loss": 2.513, + "step": 10911 + }, + { + "epoch": 0.8806391735937374, + "grad_norm": 0.6660044193267822, + "learning_rate": 8.644929750355249e-05, + "loss": 2.4996, + "step": 10912 + }, + { + "epoch": 0.8807198773303204, + "grad_norm": 0.7060455083847046, + "learning_rate": 8.643365638697828e-05, + "loss": 2.5497, + "step": 10913 + }, + { + "epoch": 0.8808005810669034, + "grad_norm": 0.6835277676582336, + "learning_rate": 8.641801560851281e-05, + "loss": 2.5198, + "step": 10914 + }, + { + "epoch": 0.8808812848034864, + "grad_norm": 0.6994042992591858, + "learning_rate": 8.640237516854595e-05, + "loss": 2.5692, + "step": 10915 + }, + { + "epoch": 0.8809619885400694, + "grad_norm": 0.6583377718925476, + "learning_rate": 8.63867350674674e-05, + "loss": 2.5025, + "step": 10916 + }, + { + "epoch": 0.8810426922766524, + "grad_norm": 0.6882332563400269, + "learning_rate": 8.637109530566698e-05, + "loss": 2.5343, + "step": 10917 + }, + { + "epoch": 0.8811233960132354, + "grad_norm": 0.6329876184463501, + "learning_rate": 8.635545588353449e-05, + "loss": 2.5335, + "step": 10918 + }, + { + "epoch": 0.8812040997498184, + "grad_norm": 0.713196337223053, + "learning_rate": 8.633981680145975e-05, + "loss": 2.4814, + "step": 10919 + }, + { + "epoch": 0.8812848034864014, + "grad_norm": 0.7388820648193359, + "learning_rate": 8.632417805983246e-05, + "loss": 2.4927, + "step": 10920 + }, + { + "epoch": 0.8813655072229845, + "grad_norm": 0.7316160798072815, + "learning_rate": 8.63085396590424e-05, + "loss": 2.508, + "step": 10921 + }, + { + "epoch": 0.8814462109595674, + "grad_norm": 0.6690139174461365, + "learning_rate": 8.629290159947934e-05, + "loss": 2.5719, + "step": 10922 + }, + { + "epoch": 0.8815269146961504, + "grad_norm": 0.6369553208351135, + "learning_rate": 8.627726388153297e-05, + "loss": 2.5277, + "step": 10923 + }, + { + "epoch": 0.8816076184327334, + "grad_norm": 0.6870365738868713, + "learning_rate": 8.626162650559306e-05, + "loss": 2.4731, + "step": 10924 + }, + { + "epoch": 0.8816883221693165, + "grad_norm": 0.6890872716903687, + "learning_rate": 8.624598947204938e-05, + "loss": 2.5417, + "step": 10925 + }, + { + "epoch": 0.8817690259058995, + "grad_norm": 0.6548230051994324, + "learning_rate": 8.623035278129156e-05, + "loss": 2.4888, + "step": 10926 + }, + { + "epoch": 0.8818497296424824, + "grad_norm": 0.6835262775421143, + "learning_rate": 8.621471643370933e-05, + "loss": 2.531, + "step": 10927 + }, + { + "epoch": 0.8819304333790654, + "grad_norm": 0.6910626292228699, + "learning_rate": 8.619908042969243e-05, + "loss": 2.4864, + "step": 10928 + }, + { + "epoch": 0.8820111371156485, + "grad_norm": 0.6727725267410278, + "learning_rate": 8.618344476963049e-05, + "loss": 2.5063, + "step": 10929 + }, + { + "epoch": 0.8820918408522315, + "grad_norm": 0.7285245656967163, + "learning_rate": 8.616780945391323e-05, + "loss": 2.5036, + "step": 10930 + }, + { + "epoch": 0.8821725445888144, + "grad_norm": 0.6561840176582336, + "learning_rate": 8.615217448293035e-05, + "loss": 2.5152, + "step": 10931 + }, + { + "epoch": 0.8822532483253974, + "grad_norm": 0.6524627208709717, + "learning_rate": 8.613653985707144e-05, + "loss": 2.4827, + "step": 10932 + }, + { + "epoch": 0.8823339520619805, + "grad_norm": 0.6815671920776367, + "learning_rate": 8.612090557672619e-05, + "loss": 2.5385, + "step": 10933 + }, + { + "epoch": 0.8824146557985635, + "grad_norm": 0.7479865550994873, + "learning_rate": 8.610527164228429e-05, + "loss": 2.5311, + "step": 10934 + }, + { + "epoch": 0.8824953595351465, + "grad_norm": 0.699504554271698, + "learning_rate": 8.608963805413535e-05, + "loss": 2.5332, + "step": 10935 + }, + { + "epoch": 0.8825760632717294, + "grad_norm": 0.7081198692321777, + "learning_rate": 8.607400481266896e-05, + "loss": 2.5636, + "step": 10936 + }, + { + "epoch": 0.8826567670083125, + "grad_norm": 0.7020730972290039, + "learning_rate": 8.605837191827478e-05, + "loss": 2.498, + "step": 10937 + }, + { + "epoch": 0.8827374707448955, + "grad_norm": 0.8004096150398254, + "learning_rate": 8.604273937134242e-05, + "loss": 2.5352, + "step": 10938 + }, + { + "epoch": 0.8828181744814785, + "grad_norm": 0.6399645209312439, + "learning_rate": 8.602710717226147e-05, + "loss": 2.5673, + "step": 10939 + }, + { + "epoch": 0.8828988782180615, + "grad_norm": 0.683195173740387, + "learning_rate": 8.601147532142153e-05, + "loss": 2.4812, + "step": 10940 + }, + { + "epoch": 0.8829795819546445, + "grad_norm": 0.7783642411231995, + "learning_rate": 8.599584381921224e-05, + "loss": 2.4812, + "step": 10941 + }, + { + "epoch": 0.8830602856912275, + "grad_norm": 0.7107423543930054, + "learning_rate": 8.598021266602308e-05, + "loss": 2.5527, + "step": 10942 + }, + { + "epoch": 0.8831409894278105, + "grad_norm": 0.6419345140457153, + "learning_rate": 8.596458186224365e-05, + "loss": 2.5642, + "step": 10943 + }, + { + "epoch": 0.8832216931643935, + "grad_norm": 0.6897309422492981, + "learning_rate": 8.59489514082636e-05, + "loss": 2.5743, + "step": 10944 + }, + { + "epoch": 0.8833023969009766, + "grad_norm": 0.6901495456695557, + "learning_rate": 8.593332130447236e-05, + "loss": 2.5139, + "step": 10945 + }, + { + "epoch": 0.8833831006375595, + "grad_norm": 0.6865388751029968, + "learning_rate": 8.591769155125953e-05, + "loss": 2.5281, + "step": 10946 + }, + { + "epoch": 0.8834638043741425, + "grad_norm": 0.7070403099060059, + "learning_rate": 8.590206214901465e-05, + "loss": 2.4648, + "step": 10947 + }, + { + "epoch": 0.8835445081107255, + "grad_norm": 0.6846395134925842, + "learning_rate": 8.588643309812721e-05, + "loss": 2.4792, + "step": 10948 + }, + { + "epoch": 0.8836252118473086, + "grad_norm": 0.6875495314598083, + "learning_rate": 8.587080439898675e-05, + "loss": 2.5126, + "step": 10949 + }, + { + "epoch": 0.8837059155838916, + "grad_norm": 0.670098066329956, + "learning_rate": 8.58551760519828e-05, + "loss": 2.4922, + "step": 10950 + }, + { + "epoch": 0.8837866193204745, + "grad_norm": 0.6675527691841125, + "learning_rate": 8.583954805750487e-05, + "loss": 2.499, + "step": 10951 + }, + { + "epoch": 0.8838673230570575, + "grad_norm": 0.6694127321243286, + "learning_rate": 8.582392041594236e-05, + "loss": 2.5286, + "step": 10952 + }, + { + "epoch": 0.8839480267936406, + "grad_norm": 0.7291092872619629, + "learning_rate": 8.580829312768482e-05, + "loss": 2.5705, + "step": 10953 + }, + { + "epoch": 0.8840287305302236, + "grad_norm": 0.709904670715332, + "learning_rate": 8.579266619312174e-05, + "loss": 2.5238, + "step": 10954 + }, + { + "epoch": 0.8841094342668065, + "grad_norm": 0.7037622332572937, + "learning_rate": 8.577703961264254e-05, + "loss": 2.5491, + "step": 10955 + }, + { + "epoch": 0.8841901380033895, + "grad_norm": 0.7553049325942993, + "learning_rate": 8.576141338663668e-05, + "loss": 2.5643, + "step": 10956 + }, + { + "epoch": 0.8842708417399726, + "grad_norm": 0.7177377343177795, + "learning_rate": 8.574578751549364e-05, + "loss": 2.49, + "step": 10957 + }, + { + "epoch": 0.8843515454765556, + "grad_norm": 0.682668149471283, + "learning_rate": 8.573016199960283e-05, + "loss": 2.5221, + "step": 10958 + }, + { + "epoch": 0.8844322492131386, + "grad_norm": 0.7508956789970398, + "learning_rate": 8.571453683935366e-05, + "loss": 2.5766, + "step": 10959 + }, + { + "epoch": 0.8845129529497215, + "grad_norm": 0.6495946645736694, + "learning_rate": 8.569891203513562e-05, + "loss": 2.534, + "step": 10960 + }, + { + "epoch": 0.8845936566863046, + "grad_norm": 0.7362824082374573, + "learning_rate": 8.568328758733806e-05, + "loss": 2.4614, + "step": 10961 + }, + { + "epoch": 0.8846743604228876, + "grad_norm": 0.6571496725082397, + "learning_rate": 8.566766349635037e-05, + "loss": 2.4393, + "step": 10962 + }, + { + "epoch": 0.8847550641594706, + "grad_norm": 0.7088329195976257, + "learning_rate": 8.5652039762562e-05, + "loss": 2.5476, + "step": 10963 + }, + { + "epoch": 0.8848357678960536, + "grad_norm": 0.6414440274238586, + "learning_rate": 8.56364163863623e-05, + "loss": 2.4668, + "step": 10964 + }, + { + "epoch": 0.8849164716326365, + "grad_norm": 0.7333478331565857, + "learning_rate": 8.562079336814063e-05, + "loss": 2.5151, + "step": 10965 + }, + { + "epoch": 0.8849971753692196, + "grad_norm": 0.638038694858551, + "learning_rate": 8.560517070828638e-05, + "loss": 2.5063, + "step": 10966 + }, + { + "epoch": 0.8850778791058026, + "grad_norm": 0.638921320438385, + "learning_rate": 8.558954840718896e-05, + "loss": 2.4769, + "step": 10967 + }, + { + "epoch": 0.8851585828423856, + "grad_norm": 0.6923465728759766, + "learning_rate": 8.557392646523759e-05, + "loss": 2.5388, + "step": 10968 + }, + { + "epoch": 0.8852392865789686, + "grad_norm": 0.7095212936401367, + "learning_rate": 8.555830488282169e-05, + "loss": 2.4955, + "step": 10969 + }, + { + "epoch": 0.8853199903155516, + "grad_norm": 0.689908504486084, + "learning_rate": 8.554268366033065e-05, + "loss": 2.4998, + "step": 10970 + }, + { + "epoch": 0.8854006940521346, + "grad_norm": 0.6551975011825562, + "learning_rate": 8.552706279815366e-05, + "loss": 2.4965, + "step": 10971 + }, + { + "epoch": 0.8854813977887176, + "grad_norm": 0.7239118218421936, + "learning_rate": 8.551144229668012e-05, + "loss": 2.5785, + "step": 10972 + }, + { + "epoch": 0.8855621015253006, + "grad_norm": 0.6743230819702148, + "learning_rate": 8.549582215629932e-05, + "loss": 2.5146, + "step": 10973 + }, + { + "epoch": 0.8856428052618837, + "grad_norm": 0.6991584300994873, + "learning_rate": 8.548020237740052e-05, + "loss": 2.5524, + "step": 10974 + }, + { + "epoch": 0.8857235089984666, + "grad_norm": 0.6605305075645447, + "learning_rate": 8.546458296037304e-05, + "loss": 2.5505, + "step": 10975 + }, + { + "epoch": 0.8858042127350496, + "grad_norm": 0.7011568546295166, + "learning_rate": 8.54489639056062e-05, + "loss": 2.4381, + "step": 10976 + }, + { + "epoch": 0.8858849164716326, + "grad_norm": 0.7015339136123657, + "learning_rate": 8.543334521348916e-05, + "loss": 2.5432, + "step": 10977 + }, + { + "epoch": 0.8859656202082157, + "grad_norm": 0.6892278790473938, + "learning_rate": 8.541772688441124e-05, + "loss": 2.5286, + "step": 10978 + }, + { + "epoch": 0.8860463239447987, + "grad_norm": 0.6680187582969666, + "learning_rate": 8.540210891876168e-05, + "loss": 2.439, + "step": 10979 + }, + { + "epoch": 0.8861270276813816, + "grad_norm": 0.7043240666389465, + "learning_rate": 8.538649131692975e-05, + "loss": 2.5558, + "step": 10980 + }, + { + "epoch": 0.8862077314179646, + "grad_norm": 0.6940229535102844, + "learning_rate": 8.537087407930463e-05, + "loss": 2.5219, + "step": 10981 + }, + { + "epoch": 0.8862884351545477, + "grad_norm": 0.6571553945541382, + "learning_rate": 8.535525720627558e-05, + "loss": 2.5054, + "step": 10982 + }, + { + "epoch": 0.8863691388911307, + "grad_norm": 0.6846656203269958, + "learning_rate": 8.533964069823182e-05, + "loss": 2.497, + "step": 10983 + }, + { + "epoch": 0.8864498426277136, + "grad_norm": 0.6838627457618713, + "learning_rate": 8.53240245555625e-05, + "loss": 2.5495, + "step": 10984 + }, + { + "epoch": 0.8865305463642966, + "grad_norm": 0.6825091242790222, + "learning_rate": 8.530840877865687e-05, + "loss": 2.5656, + "step": 10985 + }, + { + "epoch": 0.8866112501008797, + "grad_norm": 0.7368674278259277, + "learning_rate": 8.529279336790414e-05, + "loss": 2.5378, + "step": 10986 + }, + { + "epoch": 0.8866919538374627, + "grad_norm": 0.7333693504333496, + "learning_rate": 8.527717832369338e-05, + "loss": 2.506, + "step": 10987 + }, + { + "epoch": 0.8867726575740457, + "grad_norm": 0.6623306274414062, + "learning_rate": 8.526156364641384e-05, + "loss": 2.4824, + "step": 10988 + }, + { + "epoch": 0.8868533613106286, + "grad_norm": 0.6863973140716553, + "learning_rate": 8.524594933645468e-05, + "loss": 2.536, + "step": 10989 + }, + { + "epoch": 0.8869340650472117, + "grad_norm": 0.6805100440979004, + "learning_rate": 8.523033539420501e-05, + "loss": 2.4954, + "step": 10990 + }, + { + "epoch": 0.8870147687837947, + "grad_norm": 0.6672216653823853, + "learning_rate": 8.521472182005399e-05, + "loss": 2.4893, + "step": 10991 + }, + { + "epoch": 0.8870954725203777, + "grad_norm": 0.7310158610343933, + "learning_rate": 8.519910861439079e-05, + "loss": 2.5317, + "step": 10992 + }, + { + "epoch": 0.8871761762569607, + "grad_norm": 0.6820743083953857, + "learning_rate": 8.518349577760445e-05, + "loss": 2.4482, + "step": 10993 + }, + { + "epoch": 0.8872568799935437, + "grad_norm": 0.6660269498825073, + "learning_rate": 8.516788331008411e-05, + "loss": 2.5353, + "step": 10994 + }, + { + "epoch": 0.8873375837301267, + "grad_norm": 0.676243007183075, + "learning_rate": 8.51522712122189e-05, + "loss": 2.531, + "step": 10995 + }, + { + "epoch": 0.8874182874667097, + "grad_norm": 0.6677152514457703, + "learning_rate": 8.513665948439796e-05, + "loss": 2.4732, + "step": 10996 + }, + { + "epoch": 0.8874989912032927, + "grad_norm": 0.7341045141220093, + "learning_rate": 8.512104812701027e-05, + "loss": 2.5668, + "step": 10997 + }, + { + "epoch": 0.8875796949398758, + "grad_norm": 0.6475326418876648, + "learning_rate": 8.510543714044496e-05, + "loss": 2.5026, + "step": 10998 + }, + { + "epoch": 0.8876603986764587, + "grad_norm": 0.7335529923439026, + "learning_rate": 8.50898265250911e-05, + "loss": 2.4946, + "step": 10999 + }, + { + "epoch": 0.8877411024130417, + "grad_norm": 0.760108232498169, + "learning_rate": 8.507421628133772e-05, + "loss": 2.5697, + "step": 11000 + }, + { + "epoch": 0.8877411024130417, + "eval_loss": 2.450413465499878, + "eval_runtime": 975.281, + "eval_samples_per_second": 2.686, + "eval_steps_per_second": 0.448, + "step": 11000 + }, + { + "epoch": 0.8878218061496247, + "grad_norm": 0.6420160531997681, + "learning_rate": 8.505860640957391e-05, + "loss": 2.5842, + "step": 11001 + }, + { + "epoch": 0.8879025098862078, + "grad_norm": 0.6625204086303711, + "learning_rate": 8.50429969101887e-05, + "loss": 2.4771, + "step": 11002 + }, + { + "epoch": 0.8879832136227908, + "grad_norm": 0.7430149912834167, + "learning_rate": 8.502738778357107e-05, + "loss": 2.5509, + "step": 11003 + }, + { + "epoch": 0.8880639173593737, + "grad_norm": 0.663624107837677, + "learning_rate": 8.501177903011008e-05, + "loss": 2.504, + "step": 11004 + }, + { + "epoch": 0.8881446210959567, + "grad_norm": 0.6638087630271912, + "learning_rate": 8.499617065019476e-05, + "loss": 2.492, + "step": 11005 + }, + { + "epoch": 0.8882253248325398, + "grad_norm": 0.7321780323982239, + "learning_rate": 8.498056264421406e-05, + "loss": 2.5808, + "step": 11006 + }, + { + "epoch": 0.8883060285691228, + "grad_norm": 0.7108619809150696, + "learning_rate": 8.4964955012557e-05, + "loss": 2.6185, + "step": 11007 + }, + { + "epoch": 0.8883867323057058, + "grad_norm": 0.6745856404304504, + "learning_rate": 8.494934775561258e-05, + "loss": 2.576, + "step": 11008 + }, + { + "epoch": 0.8884674360422887, + "grad_norm": 0.8002225756645203, + "learning_rate": 8.493374087376976e-05, + "loss": 2.5598, + "step": 11009 + }, + { + "epoch": 0.8885481397788718, + "grad_norm": 0.6848840713500977, + "learning_rate": 8.491813436741746e-05, + "loss": 2.5218, + "step": 11010 + }, + { + "epoch": 0.8886288435154548, + "grad_norm": 0.6464105248451233, + "learning_rate": 8.490252823694471e-05, + "loss": 2.5503, + "step": 11011 + }, + { + "epoch": 0.8887095472520378, + "grad_norm": 0.7165790796279907, + "learning_rate": 8.488692248274045e-05, + "loss": 2.5104, + "step": 11012 + }, + { + "epoch": 0.8887902509886207, + "grad_norm": 0.6832898259162903, + "learning_rate": 8.487131710519355e-05, + "loss": 2.5379, + "step": 11013 + }, + { + "epoch": 0.8888709547252038, + "grad_norm": 0.6992432475090027, + "learning_rate": 8.485571210469296e-05, + "loss": 2.5388, + "step": 11014 + }, + { + "epoch": 0.8889516584617868, + "grad_norm": 0.6410119533538818, + "learning_rate": 8.484010748162765e-05, + "loss": 2.5237, + "step": 11015 + }, + { + "epoch": 0.8890323621983698, + "grad_norm": 0.716248095035553, + "learning_rate": 8.482450323638647e-05, + "loss": 2.4977, + "step": 11016 + }, + { + "epoch": 0.8891130659349528, + "grad_norm": 0.6620567440986633, + "learning_rate": 8.480889936935833e-05, + "loss": 2.5088, + "step": 11017 + }, + { + "epoch": 0.8891937696715357, + "grad_norm": 0.7311015129089355, + "learning_rate": 8.479329588093217e-05, + "loss": 2.5547, + "step": 11018 + }, + { + "epoch": 0.8892744734081188, + "grad_norm": 0.757203996181488, + "learning_rate": 8.477769277149676e-05, + "loss": 2.5681, + "step": 11019 + }, + { + "epoch": 0.8893551771447018, + "grad_norm": 0.6941282153129578, + "learning_rate": 8.476209004144107e-05, + "loss": 2.5078, + "step": 11020 + }, + { + "epoch": 0.8894358808812848, + "grad_norm": 0.6381667256355286, + "learning_rate": 8.474648769115396e-05, + "loss": 2.5371, + "step": 11021 + }, + { + "epoch": 0.8895165846178678, + "grad_norm": 0.7978621125221252, + "learning_rate": 8.473088572102422e-05, + "loss": 2.5384, + "step": 11022 + }, + { + "epoch": 0.8895972883544508, + "grad_norm": 0.7229189872741699, + "learning_rate": 8.471528413144072e-05, + "loss": 2.5469, + "step": 11023 + }, + { + "epoch": 0.8896779920910338, + "grad_norm": 0.705545961856842, + "learning_rate": 8.469968292279231e-05, + "loss": 2.5281, + "step": 11024 + }, + { + "epoch": 0.8897586958276168, + "grad_norm": 0.7259972095489502, + "learning_rate": 8.468408209546777e-05, + "loss": 2.5485, + "step": 11025 + }, + { + "epoch": 0.8898393995641998, + "grad_norm": 0.6859608888626099, + "learning_rate": 8.466848164985594e-05, + "loss": 2.5548, + "step": 11026 + }, + { + "epoch": 0.8899201033007829, + "grad_norm": 0.7036644816398621, + "learning_rate": 8.465288158634565e-05, + "loss": 2.5159, + "step": 11027 + }, + { + "epoch": 0.8900008070373658, + "grad_norm": 0.6899380087852478, + "learning_rate": 8.463728190532569e-05, + "loss": 2.5037, + "step": 11028 + }, + { + "epoch": 0.8900815107739488, + "grad_norm": 0.7428410649299622, + "learning_rate": 8.462168260718477e-05, + "loss": 2.5074, + "step": 11029 + }, + { + "epoch": 0.8901622145105318, + "grad_norm": 0.6724158525466919, + "learning_rate": 8.460608369231173e-05, + "loss": 2.5544, + "step": 11030 + }, + { + "epoch": 0.8902429182471149, + "grad_norm": 0.6516450643539429, + "learning_rate": 8.459048516109535e-05, + "loss": 2.5152, + "step": 11031 + }, + { + "epoch": 0.8903236219836979, + "grad_norm": 0.7013405561447144, + "learning_rate": 8.457488701392434e-05, + "loss": 2.5116, + "step": 11032 + }, + { + "epoch": 0.8904043257202808, + "grad_norm": 0.7207479476928711, + "learning_rate": 8.455928925118747e-05, + "loss": 2.6041, + "step": 11033 + }, + { + "epoch": 0.8904850294568638, + "grad_norm": 0.69600510597229, + "learning_rate": 8.454369187327348e-05, + "loss": 2.5794, + "step": 11034 + }, + { + "epoch": 0.8905657331934469, + "grad_norm": 0.6831288933753967, + "learning_rate": 8.452809488057108e-05, + "loss": 2.4682, + "step": 11035 + }, + { + "epoch": 0.8906464369300299, + "grad_norm": 0.6978991627693176, + "learning_rate": 8.451249827346901e-05, + "loss": 2.4862, + "step": 11036 + }, + { + "epoch": 0.8907271406666128, + "grad_norm": 0.6772337555885315, + "learning_rate": 8.4496902052356e-05, + "loss": 2.5357, + "step": 11037 + }, + { + "epoch": 0.8908078444031958, + "grad_norm": 0.6735778450965881, + "learning_rate": 8.448130621762067e-05, + "loss": 2.5115, + "step": 11038 + }, + { + "epoch": 0.8908885481397789, + "grad_norm": 0.6695345044136047, + "learning_rate": 8.446571076965177e-05, + "loss": 2.5083, + "step": 11039 + }, + { + "epoch": 0.8909692518763619, + "grad_norm": 0.685343325138092, + "learning_rate": 8.445011570883796e-05, + "loss": 2.5221, + "step": 11040 + }, + { + "epoch": 0.8910499556129449, + "grad_norm": 0.7030319571495056, + "learning_rate": 8.443452103556792e-05, + "loss": 2.5708, + "step": 11041 + }, + { + "epoch": 0.8911306593495278, + "grad_norm": 0.6910343766212463, + "learning_rate": 8.441892675023029e-05, + "loss": 2.5373, + "step": 11042 + }, + { + "epoch": 0.8912113630861109, + "grad_norm": 0.7207868099212646, + "learning_rate": 8.440333285321374e-05, + "loss": 2.5862, + "step": 11043 + }, + { + "epoch": 0.8912920668226939, + "grad_norm": 0.6780788898468018, + "learning_rate": 8.438773934490692e-05, + "loss": 2.562, + "step": 11044 + }, + { + "epoch": 0.8913727705592769, + "grad_norm": 0.7010074257850647, + "learning_rate": 8.437214622569842e-05, + "loss": 2.4556, + "step": 11045 + }, + { + "epoch": 0.8914534742958599, + "grad_norm": 0.6763667464256287, + "learning_rate": 8.435655349597689e-05, + "loss": 2.5402, + "step": 11046 + }, + { + "epoch": 0.891534178032443, + "grad_norm": 0.6870944499969482, + "learning_rate": 8.4340961156131e-05, + "loss": 2.5307, + "step": 11047 + }, + { + "epoch": 0.8916148817690259, + "grad_norm": 0.7835623025894165, + "learning_rate": 8.432536920654923e-05, + "loss": 2.4974, + "step": 11048 + }, + { + "epoch": 0.8916955855056089, + "grad_norm": 0.7551318407058716, + "learning_rate": 8.430977764762024e-05, + "loss": 2.5206, + "step": 11049 + }, + { + "epoch": 0.8917762892421919, + "grad_norm": 0.6486842632293701, + "learning_rate": 8.429418647973265e-05, + "loss": 2.4909, + "step": 11050 + }, + { + "epoch": 0.891856992978775, + "grad_norm": 0.6894064545631409, + "learning_rate": 8.427859570327494e-05, + "loss": 2.5846, + "step": 11051 + }, + { + "epoch": 0.8919376967153579, + "grad_norm": 0.7597395181655884, + "learning_rate": 8.426300531863571e-05, + "loss": 2.5259, + "step": 11052 + }, + { + "epoch": 0.8920184004519409, + "grad_norm": 0.6784652471542358, + "learning_rate": 8.42474153262036e-05, + "loss": 2.5048, + "step": 11053 + }, + { + "epoch": 0.8920991041885239, + "grad_norm": 0.7703847885131836, + "learning_rate": 8.4231825726367e-05, + "loss": 2.4962, + "step": 11054 + }, + { + "epoch": 0.892179807925107, + "grad_norm": 0.6646561026573181, + "learning_rate": 8.421623651951454e-05, + "loss": 2.491, + "step": 11055 + }, + { + "epoch": 0.89226051166169, + "grad_norm": 0.6901054978370667, + "learning_rate": 8.420064770603475e-05, + "loss": 2.515, + "step": 11056 + }, + { + "epoch": 0.8923412153982729, + "grad_norm": 0.6789328455924988, + "learning_rate": 8.41850592863161e-05, + "loss": 2.5481, + "step": 11057 + }, + { + "epoch": 0.8924219191348559, + "grad_norm": 0.6211017370223999, + "learning_rate": 8.41694712607471e-05, + "loss": 2.51, + "step": 11058 + }, + { + "epoch": 0.892502622871439, + "grad_norm": 0.6482260823249817, + "learning_rate": 8.415388362971626e-05, + "loss": 2.5418, + "step": 11059 + }, + { + "epoch": 0.892583326608022, + "grad_norm": 0.7627651691436768, + "learning_rate": 8.413829639361209e-05, + "loss": 2.5033, + "step": 11060 + }, + { + "epoch": 0.892664030344605, + "grad_norm": 0.6560852527618408, + "learning_rate": 8.412270955282302e-05, + "loss": 2.5442, + "step": 11061 + }, + { + "epoch": 0.8927447340811879, + "grad_norm": 0.7479087114334106, + "learning_rate": 8.410712310773752e-05, + "loss": 2.5189, + "step": 11062 + }, + { + "epoch": 0.892825437817771, + "grad_norm": 0.6970879435539246, + "learning_rate": 8.409153705874411e-05, + "loss": 2.5418, + "step": 11063 + }, + { + "epoch": 0.892906141554354, + "grad_norm": 0.6514548659324646, + "learning_rate": 8.407595140623113e-05, + "loss": 2.5277, + "step": 11064 + }, + { + "epoch": 0.892986845290937, + "grad_norm": 0.6745554804801941, + "learning_rate": 8.406036615058707e-05, + "loss": 2.5085, + "step": 11065 + }, + { + "epoch": 0.89306754902752, + "grad_norm": 0.7510363459587097, + "learning_rate": 8.404478129220037e-05, + "loss": 2.4941, + "step": 11066 + }, + { + "epoch": 0.8931482527641029, + "grad_norm": 0.6531470417976379, + "learning_rate": 8.402919683145941e-05, + "loss": 2.5363, + "step": 11067 + }, + { + "epoch": 0.893228956500686, + "grad_norm": 0.6861493587493896, + "learning_rate": 8.401361276875262e-05, + "loss": 2.6369, + "step": 11068 + }, + { + "epoch": 0.893309660237269, + "grad_norm": 0.6029497981071472, + "learning_rate": 8.39980291044684e-05, + "loss": 2.4953, + "step": 11069 + }, + { + "epoch": 0.893390363973852, + "grad_norm": 0.6831715106964111, + "learning_rate": 8.39824458389951e-05, + "loss": 2.5074, + "step": 11070 + }, + { + "epoch": 0.8934710677104349, + "grad_norm": 0.7076299786567688, + "learning_rate": 8.396686297272112e-05, + "loss": 2.5934, + "step": 11071 + }, + { + "epoch": 0.893551771447018, + "grad_norm": 0.6941438913345337, + "learning_rate": 8.395128050603487e-05, + "loss": 2.5338, + "step": 11072 + }, + { + "epoch": 0.893632475183601, + "grad_norm": 0.6867249011993408, + "learning_rate": 8.393569843932463e-05, + "loss": 2.5311, + "step": 11073 + }, + { + "epoch": 0.893713178920184, + "grad_norm": 0.623991847038269, + "learning_rate": 8.392011677297877e-05, + "loss": 2.5133, + "step": 11074 + }, + { + "epoch": 0.893793882656767, + "grad_norm": 0.6808422803878784, + "learning_rate": 8.390453550738564e-05, + "loss": 2.5398, + "step": 11075 + }, + { + "epoch": 0.89387458639335, + "grad_norm": 0.7136701345443726, + "learning_rate": 8.388895464293357e-05, + "loss": 2.5415, + "step": 11076 + }, + { + "epoch": 0.893955290129933, + "grad_norm": 0.6814287304878235, + "learning_rate": 8.387337418001084e-05, + "loss": 2.4782, + "step": 11077 + }, + { + "epoch": 0.894035993866516, + "grad_norm": 0.8101940155029297, + "learning_rate": 8.385779411900579e-05, + "loss": 2.5292, + "step": 11078 + }, + { + "epoch": 0.894116697603099, + "grad_norm": 0.7106796503067017, + "learning_rate": 8.384221446030676e-05, + "loss": 2.5819, + "step": 11079 + }, + { + "epoch": 0.8941974013396821, + "grad_norm": 0.7840015292167664, + "learning_rate": 8.382663520430191e-05, + "loss": 2.5243, + "step": 11080 + }, + { + "epoch": 0.894278105076265, + "grad_norm": 0.7037288546562195, + "learning_rate": 8.381105635137959e-05, + "loss": 2.5606, + "step": 11081 + }, + { + "epoch": 0.894358808812848, + "grad_norm": 0.671558678150177, + "learning_rate": 8.379547790192812e-05, + "loss": 2.4923, + "step": 11082 + }, + { + "epoch": 0.894439512549431, + "grad_norm": 0.6789675951004028, + "learning_rate": 8.377989985633567e-05, + "loss": 2.5281, + "step": 11083 + }, + { + "epoch": 0.8945202162860141, + "grad_norm": 0.6777840852737427, + "learning_rate": 8.37643222149905e-05, + "loss": 2.5159, + "step": 11084 + }, + { + "epoch": 0.8946009200225971, + "grad_norm": 0.6920693516731262, + "learning_rate": 8.374874497828089e-05, + "loss": 2.4952, + "step": 11085 + }, + { + "epoch": 0.89468162375918, + "grad_norm": 0.7394022941589355, + "learning_rate": 8.373316814659502e-05, + "loss": 2.5035, + "step": 11086 + }, + { + "epoch": 0.894762327495763, + "grad_norm": 0.625960648059845, + "learning_rate": 8.37175917203211e-05, + "loss": 2.5324, + "step": 11087 + }, + { + "epoch": 0.8948430312323461, + "grad_norm": 0.6848758459091187, + "learning_rate": 8.370201569984742e-05, + "loss": 2.5312, + "step": 11088 + }, + { + "epoch": 0.8949237349689291, + "grad_norm": 0.7207037210464478, + "learning_rate": 8.368644008556205e-05, + "loss": 2.5807, + "step": 11089 + }, + { + "epoch": 0.895004438705512, + "grad_norm": 0.7582261562347412, + "learning_rate": 8.367086487785326e-05, + "loss": 2.532, + "step": 11090 + }, + { + "epoch": 0.895085142442095, + "grad_norm": 0.6916806101799011, + "learning_rate": 8.36552900771092e-05, + "loss": 2.4772, + "step": 11091 + }, + { + "epoch": 0.8951658461786781, + "grad_norm": 0.6457386016845703, + "learning_rate": 8.363971568371805e-05, + "loss": 2.4952, + "step": 11092 + }, + { + "epoch": 0.8952465499152611, + "grad_norm": 0.7006754279136658, + "learning_rate": 8.362414169806792e-05, + "loss": 2.5818, + "step": 11093 + }, + { + "epoch": 0.8953272536518441, + "grad_norm": 0.6939932703971863, + "learning_rate": 8.3608568120547e-05, + "loss": 2.5411, + "step": 11094 + }, + { + "epoch": 0.895407957388427, + "grad_norm": 0.6314546465873718, + "learning_rate": 8.359299495154343e-05, + "loss": 2.5408, + "step": 11095 + }, + { + "epoch": 0.8954886611250101, + "grad_norm": 0.7202826738357544, + "learning_rate": 8.357742219144529e-05, + "loss": 2.4925, + "step": 11096 + }, + { + "epoch": 0.8955693648615931, + "grad_norm": 0.6475295424461365, + "learning_rate": 8.356184984064071e-05, + "loss": 2.5023, + "step": 11097 + }, + { + "epoch": 0.8956500685981761, + "grad_norm": 0.6161238551139832, + "learning_rate": 8.354627789951785e-05, + "loss": 2.5053, + "step": 11098 + }, + { + "epoch": 0.8957307723347591, + "grad_norm": 0.6919825077056885, + "learning_rate": 8.353070636846472e-05, + "loss": 2.5387, + "step": 11099 + }, + { + "epoch": 0.8958114760713421, + "grad_norm": 0.6374878883361816, + "learning_rate": 8.351513524786944e-05, + "loss": 2.5526, + "step": 11100 + }, + { + "epoch": 0.8958921798079251, + "grad_norm": 0.7041093707084656, + "learning_rate": 8.349956453812009e-05, + "loss": 2.5282, + "step": 11101 + }, + { + "epoch": 0.8959728835445081, + "grad_norm": 0.7252324819564819, + "learning_rate": 8.348399423960471e-05, + "loss": 2.5723, + "step": 11102 + }, + { + "epoch": 0.8960535872810911, + "grad_norm": 0.681682825088501, + "learning_rate": 8.346842435271137e-05, + "loss": 2.5284, + "step": 11103 + }, + { + "epoch": 0.8961342910176742, + "grad_norm": 0.7293850183486938, + "learning_rate": 8.34528548778281e-05, + "loss": 2.5014, + "step": 11104 + }, + { + "epoch": 0.8962149947542571, + "grad_norm": 0.7057846188545227, + "learning_rate": 8.343728581534299e-05, + "loss": 2.5502, + "step": 11105 + }, + { + "epoch": 0.8962956984908401, + "grad_norm": 0.6740830540657043, + "learning_rate": 8.342171716564398e-05, + "loss": 2.5205, + "step": 11106 + }, + { + "epoch": 0.8963764022274231, + "grad_norm": 0.6917470097541809, + "learning_rate": 8.340614892911907e-05, + "loss": 2.5216, + "step": 11107 + }, + { + "epoch": 0.8964571059640062, + "grad_norm": 0.7495635151863098, + "learning_rate": 8.339058110615638e-05, + "loss": 2.5509, + "step": 11108 + }, + { + "epoch": 0.8965378097005892, + "grad_norm": 0.6687765717506409, + "learning_rate": 8.33750136971438e-05, + "loss": 2.5286, + "step": 11109 + }, + { + "epoch": 0.8966185134371721, + "grad_norm": 0.6901381015777588, + "learning_rate": 8.335944670246931e-05, + "loss": 2.5545, + "step": 11110 + }, + { + "epoch": 0.8966992171737551, + "grad_norm": 0.6645506024360657, + "learning_rate": 8.334388012252094e-05, + "loss": 2.4883, + "step": 11111 + }, + { + "epoch": 0.8967799209103382, + "grad_norm": 0.6427997350692749, + "learning_rate": 8.332831395768662e-05, + "loss": 2.5103, + "step": 11112 + }, + { + "epoch": 0.8968606246469212, + "grad_norm": 0.7224035263061523, + "learning_rate": 8.331274820835425e-05, + "loss": 2.5086, + "step": 11113 + }, + { + "epoch": 0.8969413283835042, + "grad_norm": 0.6918233036994934, + "learning_rate": 8.329718287491188e-05, + "loss": 2.5222, + "step": 11114 + }, + { + "epoch": 0.8970220321200871, + "grad_norm": 0.735583484172821, + "learning_rate": 8.328161795774734e-05, + "loss": 2.5277, + "step": 11115 + }, + { + "epoch": 0.8971027358566702, + "grad_norm": 0.6624864339828491, + "learning_rate": 8.326605345724857e-05, + "loss": 2.532, + "step": 11116 + }, + { + "epoch": 0.8971834395932532, + "grad_norm": 0.6227770447731018, + "learning_rate": 8.325048937380352e-05, + "loss": 2.5386, + "step": 11117 + }, + { + "epoch": 0.8972641433298362, + "grad_norm": 0.6483022570610046, + "learning_rate": 8.323492570780004e-05, + "loss": 2.4958, + "step": 11118 + }, + { + "epoch": 0.8973448470664191, + "grad_norm": 0.7072618007659912, + "learning_rate": 8.321936245962602e-05, + "loss": 2.4931, + "step": 11119 + }, + { + "epoch": 0.8974255508030021, + "grad_norm": 0.6848764419555664, + "learning_rate": 8.320379962966937e-05, + "loss": 2.4549, + "step": 11120 + }, + { + "epoch": 0.8975062545395852, + "grad_norm": 0.6819620132446289, + "learning_rate": 8.318823721831795e-05, + "loss": 2.5156, + "step": 11121 + }, + { + "epoch": 0.8975869582761682, + "grad_norm": 0.6834476590156555, + "learning_rate": 8.31726752259596e-05, + "loss": 2.507, + "step": 11122 + }, + { + "epoch": 0.8976676620127512, + "grad_norm": 0.6785772442817688, + "learning_rate": 8.315711365298214e-05, + "loss": 2.5086, + "step": 11123 + }, + { + "epoch": 0.8977483657493341, + "grad_norm": 0.6303566098213196, + "learning_rate": 8.314155249977351e-05, + "loss": 2.5087, + "step": 11124 + }, + { + "epoch": 0.8978290694859172, + "grad_norm": 0.6544361710548401, + "learning_rate": 8.31259917667214e-05, + "loss": 2.505, + "step": 11125 + }, + { + "epoch": 0.8979097732225002, + "grad_norm": 0.8135818243026733, + "learning_rate": 8.311043145421369e-05, + "loss": 2.5139, + "step": 11126 + }, + { + "epoch": 0.8979904769590832, + "grad_norm": 0.6744341254234314, + "learning_rate": 8.309487156263818e-05, + "loss": 2.4797, + "step": 11127 + }, + { + "epoch": 0.8980711806956662, + "grad_norm": 0.6138790845870972, + "learning_rate": 8.307931209238267e-05, + "loss": 2.5334, + "step": 11128 + }, + { + "epoch": 0.8981518844322492, + "grad_norm": 0.702434241771698, + "learning_rate": 8.306375304383492e-05, + "loss": 2.5343, + "step": 11129 + }, + { + "epoch": 0.8982325881688322, + "grad_norm": 0.6787155270576477, + "learning_rate": 8.304819441738275e-05, + "loss": 2.507, + "step": 11130 + }, + { + "epoch": 0.8983132919054152, + "grad_norm": 0.6963719129562378, + "learning_rate": 8.303263621341386e-05, + "loss": 2.5238, + "step": 11131 + }, + { + "epoch": 0.8983939956419982, + "grad_norm": 0.6623271107673645, + "learning_rate": 8.3017078432316e-05, + "loss": 2.5206, + "step": 11132 + }, + { + "epoch": 0.8984746993785813, + "grad_norm": 0.777222752571106, + "learning_rate": 8.300152107447701e-05, + "loss": 2.5004, + "step": 11133 + }, + { + "epoch": 0.8985554031151642, + "grad_norm": 0.6788455247879028, + "learning_rate": 8.29859641402845e-05, + "loss": 2.5735, + "step": 11134 + }, + { + "epoch": 0.8986361068517472, + "grad_norm": 0.6595063209533691, + "learning_rate": 8.297040763012624e-05, + "loss": 2.4988, + "step": 11135 + }, + { + "epoch": 0.8987168105883302, + "grad_norm": 0.7105697989463806, + "learning_rate": 8.295485154438994e-05, + "loss": 2.5531, + "step": 11136 + }, + { + "epoch": 0.8987975143249133, + "grad_norm": 0.6884949803352356, + "learning_rate": 8.29392958834633e-05, + "loss": 2.5158, + "step": 11137 + }, + { + "epoch": 0.8988782180614963, + "grad_norm": 0.7178345322608948, + "learning_rate": 8.2923740647734e-05, + "loss": 2.5836, + "step": 11138 + }, + { + "epoch": 0.8989589217980792, + "grad_norm": 0.7000541687011719, + "learning_rate": 8.290818583758973e-05, + "loss": 2.5345, + "step": 11139 + }, + { + "epoch": 0.8990396255346622, + "grad_norm": 0.6808128952980042, + "learning_rate": 8.289263145341816e-05, + "loss": 2.5227, + "step": 11140 + }, + { + "epoch": 0.8991203292712453, + "grad_norm": 0.7047473788261414, + "learning_rate": 8.287707749560691e-05, + "loss": 2.477, + "step": 11141 + }, + { + "epoch": 0.8992010330078283, + "grad_norm": 0.6654812693595886, + "learning_rate": 8.286152396454365e-05, + "loss": 2.4575, + "step": 11142 + }, + { + "epoch": 0.8992817367444113, + "grad_norm": 0.6690360307693481, + "learning_rate": 8.284597086061603e-05, + "loss": 2.4755, + "step": 11143 + }, + { + "epoch": 0.8993624404809942, + "grad_norm": 0.7270147204399109, + "learning_rate": 8.283041818421164e-05, + "loss": 2.5893, + "step": 11144 + }, + { + "epoch": 0.8994431442175773, + "grad_norm": 0.5977498888969421, + "learning_rate": 8.28148659357181e-05, + "loss": 2.5108, + "step": 11145 + }, + { + "epoch": 0.8995238479541603, + "grad_norm": 0.694593071937561, + "learning_rate": 8.279931411552307e-05, + "loss": 2.5036, + "step": 11146 + }, + { + "epoch": 0.8996045516907433, + "grad_norm": 0.7395440936088562, + "learning_rate": 8.278376272401404e-05, + "loss": 2.5244, + "step": 11147 + }, + { + "epoch": 0.8996852554273262, + "grad_norm": 0.6483517289161682, + "learning_rate": 8.276821176157867e-05, + "loss": 2.5619, + "step": 11148 + }, + { + "epoch": 0.8997659591639093, + "grad_norm": 0.6996768116950989, + "learning_rate": 8.275266122860454e-05, + "loss": 2.5275, + "step": 11149 + }, + { + "epoch": 0.8998466629004923, + "grad_norm": 0.661122739315033, + "learning_rate": 8.273711112547914e-05, + "loss": 2.5053, + "step": 11150 + }, + { + "epoch": 0.8999273666370753, + "grad_norm": 0.6919111609458923, + "learning_rate": 8.272156145259006e-05, + "loss": 2.578, + "step": 11151 + }, + { + "epoch": 0.9000080703736583, + "grad_norm": 0.6680958867073059, + "learning_rate": 8.270601221032482e-05, + "loss": 2.4942, + "step": 11152 + }, + { + "epoch": 0.9000887741102414, + "grad_norm": 0.6782989501953125, + "learning_rate": 8.269046339907101e-05, + "loss": 2.5461, + "step": 11153 + }, + { + "epoch": 0.9001694778468243, + "grad_norm": 0.743468165397644, + "learning_rate": 8.267491501921605e-05, + "loss": 2.629, + "step": 11154 + }, + { + "epoch": 0.9002501815834073, + "grad_norm": 0.709562361240387, + "learning_rate": 8.265936707114751e-05, + "loss": 2.566, + "step": 11155 + }, + { + "epoch": 0.9003308853199903, + "grad_norm": 0.7075676918029785, + "learning_rate": 8.264381955525291e-05, + "loss": 2.5409, + "step": 11156 + }, + { + "epoch": 0.9004115890565734, + "grad_norm": 0.7021335959434509, + "learning_rate": 8.262827247191963e-05, + "loss": 2.5606, + "step": 11157 + }, + { + "epoch": 0.9004922927931563, + "grad_norm": 0.6507331132888794, + "learning_rate": 8.261272582153524e-05, + "loss": 2.5557, + "step": 11158 + }, + { + "epoch": 0.9005729965297393, + "grad_norm": 0.7182760238647461, + "learning_rate": 8.25971796044872e-05, + "loss": 2.5567, + "step": 11159 + }, + { + "epoch": 0.9006537002663223, + "grad_norm": 0.6632338762283325, + "learning_rate": 8.258163382116291e-05, + "loss": 2.5081, + "step": 11160 + }, + { + "epoch": 0.9007344040029054, + "grad_norm": 0.6889928579330444, + "learning_rate": 8.256608847194983e-05, + "loss": 2.5034, + "step": 11161 + }, + { + "epoch": 0.9008151077394884, + "grad_norm": 0.6374824047088623, + "learning_rate": 8.255054355723542e-05, + "loss": 2.4826, + "step": 11162 + }, + { + "epoch": 0.9008958114760713, + "grad_norm": 0.7100771069526672, + "learning_rate": 8.253499907740706e-05, + "loss": 2.4666, + "step": 11163 + }, + { + "epoch": 0.9009765152126543, + "grad_norm": 0.8141123652458191, + "learning_rate": 8.251945503285218e-05, + "loss": 2.5339, + "step": 11164 + }, + { + "epoch": 0.9010572189492374, + "grad_norm": 0.6621670722961426, + "learning_rate": 8.250391142395822e-05, + "loss": 2.4805, + "step": 11165 + }, + { + "epoch": 0.9011379226858204, + "grad_norm": 0.6624772548675537, + "learning_rate": 8.248836825111245e-05, + "loss": 2.5148, + "step": 11166 + }, + { + "epoch": 0.9012186264224034, + "grad_norm": 0.6783565282821655, + "learning_rate": 8.247282551470235e-05, + "loss": 2.4481, + "step": 11167 + }, + { + "epoch": 0.9012993301589863, + "grad_norm": 0.700089156627655, + "learning_rate": 8.245728321511525e-05, + "loss": 2.5649, + "step": 11168 + }, + { + "epoch": 0.9013800338955693, + "grad_norm": 0.6765339970588684, + "learning_rate": 8.244174135273852e-05, + "loss": 2.5221, + "step": 11169 + }, + { + "epoch": 0.9014607376321524, + "grad_norm": 0.6896056532859802, + "learning_rate": 8.242619992795948e-05, + "loss": 2.4742, + "step": 11170 + }, + { + "epoch": 0.9015414413687354, + "grad_norm": 0.7134374976158142, + "learning_rate": 8.241065894116547e-05, + "loss": 2.5231, + "step": 11171 + }, + { + "epoch": 0.9016221451053184, + "grad_norm": 0.6939442753791809, + "learning_rate": 8.239511839274385e-05, + "loss": 2.5159, + "step": 11172 + }, + { + "epoch": 0.9017028488419013, + "grad_norm": 0.6780345439910889, + "learning_rate": 8.237957828308187e-05, + "loss": 2.5474, + "step": 11173 + }, + { + "epoch": 0.9017835525784844, + "grad_norm": 0.6532382965087891, + "learning_rate": 8.236403861256687e-05, + "loss": 2.4982, + "step": 11174 + }, + { + "epoch": 0.9018642563150674, + "grad_norm": 0.6918137073516846, + "learning_rate": 8.234849938158615e-05, + "loss": 2.4657, + "step": 11175 + }, + { + "epoch": 0.9019449600516504, + "grad_norm": 0.6838762164115906, + "learning_rate": 8.233296059052695e-05, + "loss": 2.5405, + "step": 11176 + }, + { + "epoch": 0.9020256637882333, + "grad_norm": 0.7560290098190308, + "learning_rate": 8.231742223977653e-05, + "loss": 2.5379, + "step": 11177 + }, + { + "epoch": 0.9021063675248164, + "grad_norm": 0.6673319339752197, + "learning_rate": 8.230188432972221e-05, + "loss": 2.4669, + "step": 11178 + }, + { + "epoch": 0.9021870712613994, + "grad_norm": 0.7486294507980347, + "learning_rate": 8.228634686075116e-05, + "loss": 2.526, + "step": 11179 + }, + { + "epoch": 0.9022677749979824, + "grad_norm": 0.7012811303138733, + "learning_rate": 8.227080983325067e-05, + "loss": 2.5544, + "step": 11180 + }, + { + "epoch": 0.9023484787345654, + "grad_norm": 0.6807447075843811, + "learning_rate": 8.225527324760796e-05, + "loss": 2.5139, + "step": 11181 + }, + { + "epoch": 0.9024291824711484, + "grad_norm": 0.7594932317733765, + "learning_rate": 8.223973710421018e-05, + "loss": 2.539, + "step": 11182 + }, + { + "epoch": 0.9025098862077314, + "grad_norm": 0.6764204502105713, + "learning_rate": 8.22242014034446e-05, + "loss": 2.6128, + "step": 11183 + }, + { + "epoch": 0.9025905899443144, + "grad_norm": 0.6499967575073242, + "learning_rate": 8.220866614569837e-05, + "loss": 2.5459, + "step": 11184 + }, + { + "epoch": 0.9026712936808974, + "grad_norm": 0.673076331615448, + "learning_rate": 8.219313133135876e-05, + "loss": 2.5852, + "step": 11185 + }, + { + "epoch": 0.9027519974174805, + "grad_norm": 0.784854531288147, + "learning_rate": 8.21775969608128e-05, + "loss": 2.5586, + "step": 11186 + }, + { + "epoch": 0.9028327011540634, + "grad_norm": 0.658963680267334, + "learning_rate": 8.216206303444771e-05, + "loss": 2.4376, + "step": 11187 + }, + { + "epoch": 0.9029134048906464, + "grad_norm": 0.6456249356269836, + "learning_rate": 8.214652955265067e-05, + "loss": 2.5166, + "step": 11188 + }, + { + "epoch": 0.9029941086272294, + "grad_norm": 0.6940007209777832, + "learning_rate": 8.213099651580874e-05, + "loss": 2.4992, + "step": 11189 + }, + { + "epoch": 0.9030748123638125, + "grad_norm": 0.6661425828933716, + "learning_rate": 8.211546392430911e-05, + "loss": 2.5177, + "step": 11190 + }, + { + "epoch": 0.9031555161003955, + "grad_norm": 0.647834300994873, + "learning_rate": 8.20999317785389e-05, + "loss": 2.4666, + "step": 11191 + }, + { + "epoch": 0.9032362198369784, + "grad_norm": 0.7673383355140686, + "learning_rate": 8.208440007888515e-05, + "loss": 2.4852, + "step": 11192 + }, + { + "epoch": 0.9033169235735614, + "grad_norm": 0.7033390998840332, + "learning_rate": 8.206886882573498e-05, + "loss": 2.5549, + "step": 11193 + }, + { + "epoch": 0.9033976273101445, + "grad_norm": 0.6871141195297241, + "learning_rate": 8.205333801947548e-05, + "loss": 2.4585, + "step": 11194 + }, + { + "epoch": 0.9034783310467275, + "grad_norm": 0.7201984524726868, + "learning_rate": 8.20378076604937e-05, + "loss": 2.5271, + "step": 11195 + }, + { + "epoch": 0.9035590347833105, + "grad_norm": 0.704060971736908, + "learning_rate": 8.202227774917671e-05, + "loss": 2.4915, + "step": 11196 + }, + { + "epoch": 0.9036397385198934, + "grad_norm": 0.6833879947662354, + "learning_rate": 8.200674828591156e-05, + "loss": 2.4496, + "step": 11197 + }, + { + "epoch": 0.9037204422564765, + "grad_norm": 0.6564866304397583, + "learning_rate": 8.199121927108527e-05, + "loss": 2.4818, + "step": 11198 + }, + { + "epoch": 0.9038011459930595, + "grad_norm": 0.6970151662826538, + "learning_rate": 8.197569070508486e-05, + "loss": 2.5812, + "step": 11199 + }, + { + "epoch": 0.9038818497296425, + "grad_norm": 0.7147194743156433, + "learning_rate": 8.196016258829737e-05, + "loss": 2.5543, + "step": 11200 + }, + { + "epoch": 0.9039625534662254, + "grad_norm": 0.6357648968696594, + "learning_rate": 8.194463492110981e-05, + "loss": 2.5254, + "step": 11201 + }, + { + "epoch": 0.9040432572028085, + "grad_norm": 0.7113756537437439, + "learning_rate": 8.19291077039091e-05, + "loss": 2.5179, + "step": 11202 + }, + { + "epoch": 0.9041239609393915, + "grad_norm": 0.7252987623214722, + "learning_rate": 8.191358093708228e-05, + "loss": 2.5658, + "step": 11203 + }, + { + "epoch": 0.9042046646759745, + "grad_norm": 0.7095803618431091, + "learning_rate": 8.189805462101631e-05, + "loss": 2.583, + "step": 11204 + }, + { + "epoch": 0.9042853684125575, + "grad_norm": 0.7447760105133057, + "learning_rate": 8.188252875609812e-05, + "loss": 2.5608, + "step": 11205 + }, + { + "epoch": 0.9043660721491406, + "grad_norm": 0.6578439474105835, + "learning_rate": 8.186700334271468e-05, + "loss": 2.508, + "step": 11206 + }, + { + "epoch": 0.9044467758857235, + "grad_norm": 0.6776832938194275, + "learning_rate": 8.185147838125296e-05, + "loss": 2.6188, + "step": 11207 + }, + { + "epoch": 0.9045274796223065, + "grad_norm": 0.6559253931045532, + "learning_rate": 8.183595387209976e-05, + "loss": 2.5307, + "step": 11208 + }, + { + "epoch": 0.9046081833588895, + "grad_norm": 0.7078405022621155, + "learning_rate": 8.18204298156421e-05, + "loss": 2.5545, + "step": 11209 + }, + { + "epoch": 0.9046888870954726, + "grad_norm": 0.6790273189544678, + "learning_rate": 8.18049062122669e-05, + "loss": 2.4963, + "step": 11210 + }, + { + "epoch": 0.9047695908320555, + "grad_norm": 0.6888250708580017, + "learning_rate": 8.178938306236095e-05, + "loss": 2.5108, + "step": 11211 + }, + { + "epoch": 0.9048502945686385, + "grad_norm": 0.6438474059104919, + "learning_rate": 8.177386036631119e-05, + "loss": 2.4976, + "step": 11212 + }, + { + "epoch": 0.9049309983052215, + "grad_norm": 0.6786646842956543, + "learning_rate": 8.175833812450445e-05, + "loss": 2.4584, + "step": 11213 + }, + { + "epoch": 0.9050117020418046, + "grad_norm": 0.6480324268341064, + "learning_rate": 8.174281633732764e-05, + "loss": 2.5021, + "step": 11214 + }, + { + "epoch": 0.9050924057783876, + "grad_norm": 0.7232171893119812, + "learning_rate": 8.172729500516756e-05, + "loss": 2.4742, + "step": 11215 + }, + { + "epoch": 0.9051731095149705, + "grad_norm": 0.7048845291137695, + "learning_rate": 8.171177412841105e-05, + "loss": 2.518, + "step": 11216 + }, + { + "epoch": 0.9052538132515535, + "grad_norm": 0.6363180875778198, + "learning_rate": 8.169625370744496e-05, + "loss": 2.5154, + "step": 11217 + }, + { + "epoch": 0.9053345169881366, + "grad_norm": 0.7176045179367065, + "learning_rate": 8.168073374265605e-05, + "loss": 2.5182, + "step": 11218 + }, + { + "epoch": 0.9054152207247196, + "grad_norm": 0.7011643052101135, + "learning_rate": 8.166521423443112e-05, + "loss": 2.5615, + "step": 11219 + }, + { + "epoch": 0.9054959244613026, + "grad_norm": 0.6853327751159668, + "learning_rate": 8.164969518315704e-05, + "loss": 2.5057, + "step": 11220 + }, + { + "epoch": 0.9055766281978855, + "grad_norm": 0.6972528696060181, + "learning_rate": 8.163417658922049e-05, + "loss": 2.4949, + "step": 11221 + }, + { + "epoch": 0.9056573319344685, + "grad_norm": 0.6780978441238403, + "learning_rate": 8.161865845300824e-05, + "loss": 2.5601, + "step": 11222 + }, + { + "epoch": 0.9057380356710516, + "grad_norm": 0.6454098224639893, + "learning_rate": 8.160314077490711e-05, + "loss": 2.4203, + "step": 11223 + }, + { + "epoch": 0.9058187394076346, + "grad_norm": 0.7300907969474792, + "learning_rate": 8.158762355530378e-05, + "loss": 2.4818, + "step": 11224 + }, + { + "epoch": 0.9058994431442176, + "grad_norm": 0.682475745677948, + "learning_rate": 8.1572106794585e-05, + "loss": 2.4852, + "step": 11225 + }, + { + "epoch": 0.9059801468808005, + "grad_norm": 0.6666192412376404, + "learning_rate": 8.155659049313754e-05, + "loss": 2.5642, + "step": 11226 + }, + { + "epoch": 0.9060608506173836, + "grad_norm": 0.6873177886009216, + "learning_rate": 8.154107465134801e-05, + "loss": 2.5163, + "step": 11227 + }, + { + "epoch": 0.9061415543539666, + "grad_norm": 0.6704845428466797, + "learning_rate": 8.152555926960315e-05, + "loss": 2.5481, + "step": 11228 + }, + { + "epoch": 0.9062222580905496, + "grad_norm": 0.6340618133544922, + "learning_rate": 8.151004434828963e-05, + "loss": 2.4701, + "step": 11229 + }, + { + "epoch": 0.9063029618271325, + "grad_norm": 0.7886226177215576, + "learning_rate": 8.14945298877942e-05, + "loss": 2.5322, + "step": 11230 + }, + { + "epoch": 0.9063836655637156, + "grad_norm": 0.7086018919944763, + "learning_rate": 8.14790158885034e-05, + "loss": 2.4909, + "step": 11231 + }, + { + "epoch": 0.9064643693002986, + "grad_norm": 0.6791329979896545, + "learning_rate": 8.146350235080396e-05, + "loss": 2.4438, + "step": 11232 + }, + { + "epoch": 0.9065450730368816, + "grad_norm": 0.7070720791816711, + "learning_rate": 8.14479892750825e-05, + "loss": 2.528, + "step": 11233 + }, + { + "epoch": 0.9066257767734646, + "grad_norm": 0.6551348567008972, + "learning_rate": 8.143247666172564e-05, + "loss": 2.4747, + "step": 11234 + }, + { + "epoch": 0.9067064805100477, + "grad_norm": 0.6691645979881287, + "learning_rate": 8.141696451111997e-05, + "loss": 2.5038, + "step": 11235 + }, + { + "epoch": 0.9067871842466306, + "grad_norm": 0.6814864277839661, + "learning_rate": 8.14014528236522e-05, + "loss": 2.5737, + "step": 11236 + }, + { + "epoch": 0.9068678879832136, + "grad_norm": 0.7442377209663391, + "learning_rate": 8.138594159970877e-05, + "loss": 2.5839, + "step": 11237 + }, + { + "epoch": 0.9069485917197966, + "grad_norm": 0.6861338019371033, + "learning_rate": 8.137043083967634e-05, + "loss": 2.567, + "step": 11238 + }, + { + "epoch": 0.9070292954563797, + "grad_norm": 0.7056479454040527, + "learning_rate": 8.135492054394151e-05, + "loss": 2.5297, + "step": 11239 + }, + { + "epoch": 0.9071099991929626, + "grad_norm": 0.7166962623596191, + "learning_rate": 8.133941071289076e-05, + "loss": 2.4834, + "step": 11240 + }, + { + "epoch": 0.9071907029295456, + "grad_norm": 0.6285616159439087, + "learning_rate": 8.132390134691068e-05, + "loss": 2.5066, + "step": 11241 + }, + { + "epoch": 0.9072714066661286, + "grad_norm": 0.681915283203125, + "learning_rate": 8.130839244638783e-05, + "loss": 2.5387, + "step": 11242 + }, + { + "epoch": 0.9073521104027117, + "grad_norm": 0.6876898407936096, + "learning_rate": 8.129288401170866e-05, + "loss": 2.4465, + "step": 11243 + }, + { + "epoch": 0.9074328141392947, + "grad_norm": 0.657132625579834, + "learning_rate": 8.127737604325975e-05, + "loss": 2.499, + "step": 11244 + }, + { + "epoch": 0.9075135178758776, + "grad_norm": 0.6678825616836548, + "learning_rate": 8.126186854142752e-05, + "loss": 2.4872, + "step": 11245 + }, + { + "epoch": 0.9075942216124606, + "grad_norm": 0.7296879291534424, + "learning_rate": 8.124636150659858e-05, + "loss": 2.4783, + "step": 11246 + }, + { + "epoch": 0.9076749253490437, + "grad_norm": 0.7087056040763855, + "learning_rate": 8.12308549391593e-05, + "loss": 2.507, + "step": 11247 + }, + { + "epoch": 0.9077556290856267, + "grad_norm": 0.7099738121032715, + "learning_rate": 8.121534883949616e-05, + "loss": 2.5317, + "step": 11248 + }, + { + "epoch": 0.9078363328222097, + "grad_norm": 0.6421170830726624, + "learning_rate": 8.119984320799566e-05, + "loss": 2.5291, + "step": 11249 + }, + { + "epoch": 0.9079170365587926, + "grad_norm": 0.6835018396377563, + "learning_rate": 8.11843380450442e-05, + "loss": 2.5523, + "step": 11250 + }, + { + "epoch": 0.9079977402953757, + "grad_norm": 0.6638229489326477, + "learning_rate": 8.11688333510282e-05, + "loss": 2.5128, + "step": 11251 + }, + { + "epoch": 0.9080784440319587, + "grad_norm": 0.6783459186553955, + "learning_rate": 8.115332912633415e-05, + "loss": 2.5485, + "step": 11252 + }, + { + "epoch": 0.9081591477685417, + "grad_norm": 0.65911865234375, + "learning_rate": 8.113782537134838e-05, + "loss": 2.5408, + "step": 11253 + }, + { + "epoch": 0.9082398515051247, + "grad_norm": 0.6844244003295898, + "learning_rate": 8.112232208645729e-05, + "loss": 2.6067, + "step": 11254 + }, + { + "epoch": 0.9083205552417077, + "grad_norm": 0.6896870136260986, + "learning_rate": 8.110681927204729e-05, + "loss": 2.5444, + "step": 11255 + }, + { + "epoch": 0.9084012589782907, + "grad_norm": 0.6693820953369141, + "learning_rate": 8.109131692850473e-05, + "loss": 2.5118, + "step": 11256 + }, + { + "epoch": 0.9084819627148737, + "grad_norm": 0.6401854753494263, + "learning_rate": 8.107581505621599e-05, + "loss": 2.4811, + "step": 11257 + }, + { + "epoch": 0.9085626664514567, + "grad_norm": 0.6861663460731506, + "learning_rate": 8.106031365556743e-05, + "loss": 2.4633, + "step": 11258 + }, + { + "epoch": 0.9086433701880398, + "grad_norm": 0.6631655097007751, + "learning_rate": 8.104481272694533e-05, + "loss": 2.5748, + "step": 11259 + }, + { + "epoch": 0.9087240739246227, + "grad_norm": 0.6499454975128174, + "learning_rate": 8.102931227073604e-05, + "loss": 2.5573, + "step": 11260 + }, + { + "epoch": 0.9088047776612057, + "grad_norm": 0.7214524149894714, + "learning_rate": 8.10138122873259e-05, + "loss": 2.4905, + "step": 11261 + }, + { + "epoch": 0.9088854813977887, + "grad_norm": 0.6481152176856995, + "learning_rate": 8.099831277710122e-05, + "loss": 2.5073, + "step": 11262 + }, + { + "epoch": 0.9089661851343718, + "grad_norm": 0.6666486859321594, + "learning_rate": 8.09828137404482e-05, + "loss": 2.5379, + "step": 11263 + }, + { + "epoch": 0.9090468888709548, + "grad_norm": 0.7186474800109863, + "learning_rate": 8.096731517775319e-05, + "loss": 2.5164, + "step": 11264 + }, + { + "epoch": 0.9091275926075377, + "grad_norm": 0.6838653087615967, + "learning_rate": 8.095181708940245e-05, + "loss": 2.49, + "step": 11265 + }, + { + "epoch": 0.9092082963441207, + "grad_norm": 0.7740866541862488, + "learning_rate": 8.093631947578221e-05, + "loss": 2.5487, + "step": 11266 + }, + { + "epoch": 0.9092890000807038, + "grad_norm": 0.7198607325553894, + "learning_rate": 8.092082233727871e-05, + "loss": 2.4477, + "step": 11267 + }, + { + "epoch": 0.9093697038172868, + "grad_norm": 0.6454673409461975, + "learning_rate": 8.090532567427825e-05, + "loss": 2.523, + "step": 11268 + }, + { + "epoch": 0.9094504075538697, + "grad_norm": 0.6169581413269043, + "learning_rate": 8.088982948716692e-05, + "loss": 2.4924, + "step": 11269 + }, + { + "epoch": 0.9095311112904527, + "grad_norm": 0.7034861445426941, + "learning_rate": 8.0874333776331e-05, + "loss": 2.4756, + "step": 11270 + }, + { + "epoch": 0.9096118150270357, + "grad_norm": 0.7231355309486389, + "learning_rate": 8.085883854215671e-05, + "loss": 2.4963, + "step": 11271 + }, + { + "epoch": 0.9096925187636188, + "grad_norm": 0.6597892045974731, + "learning_rate": 8.084334378503017e-05, + "loss": 2.5617, + "step": 11272 + }, + { + "epoch": 0.9097732225002018, + "grad_norm": 0.7257365584373474, + "learning_rate": 8.082784950533759e-05, + "loss": 2.5293, + "step": 11273 + }, + { + "epoch": 0.9098539262367847, + "grad_norm": 0.7305313944816589, + "learning_rate": 8.081235570346512e-05, + "loss": 2.5355, + "step": 11274 + }, + { + "epoch": 0.9099346299733677, + "grad_norm": 0.6814435720443726, + "learning_rate": 8.07968623797989e-05, + "loss": 2.4842, + "step": 11275 + }, + { + "epoch": 0.9100153337099508, + "grad_norm": 0.7342902421951294, + "learning_rate": 8.078136953472506e-05, + "loss": 2.4817, + "step": 11276 + }, + { + "epoch": 0.9100960374465338, + "grad_norm": 0.6456516981124878, + "learning_rate": 8.076587716862973e-05, + "loss": 2.5119, + "step": 11277 + }, + { + "epoch": 0.9101767411831168, + "grad_norm": 0.7268881797790527, + "learning_rate": 8.075038528189906e-05, + "loss": 2.4614, + "step": 11278 + }, + { + "epoch": 0.9102574449196997, + "grad_norm": 0.6901549696922302, + "learning_rate": 8.073489387491906e-05, + "loss": 2.5411, + "step": 11279 + }, + { + "epoch": 0.9103381486562828, + "grad_norm": 0.6850160956382751, + "learning_rate": 8.071940294807588e-05, + "loss": 2.5078, + "step": 11280 + }, + { + "epoch": 0.9104188523928658, + "grad_norm": 0.6550731658935547, + "learning_rate": 8.070391250175558e-05, + "loss": 2.5502, + "step": 11281 + }, + { + "epoch": 0.9104995561294488, + "grad_norm": 0.7524412274360657, + "learning_rate": 8.068842253634421e-05, + "loss": 2.4699, + "step": 11282 + }, + { + "epoch": 0.9105802598660317, + "grad_norm": 0.6659243702888489, + "learning_rate": 8.067293305222784e-05, + "loss": 2.557, + "step": 11283 + }, + { + "epoch": 0.9106609636026148, + "grad_norm": 0.67015540599823, + "learning_rate": 8.065744404979251e-05, + "loss": 2.5929, + "step": 11284 + }, + { + "epoch": 0.9107416673391978, + "grad_norm": 0.7139000296592712, + "learning_rate": 8.064195552942422e-05, + "loss": 2.5262, + "step": 11285 + }, + { + "epoch": 0.9108223710757808, + "grad_norm": 0.6918016672134399, + "learning_rate": 8.062646749150899e-05, + "loss": 2.5161, + "step": 11286 + }, + { + "epoch": 0.9109030748123638, + "grad_norm": 0.7395541667938232, + "learning_rate": 8.061097993643289e-05, + "loss": 2.5351, + "step": 11287 + }, + { + "epoch": 0.9109837785489469, + "grad_norm": 0.6794499158859253, + "learning_rate": 8.05954928645818e-05, + "loss": 2.4617, + "step": 11288 + }, + { + "epoch": 0.9110644822855298, + "grad_norm": 0.6906577348709106, + "learning_rate": 8.058000627634176e-05, + "loss": 2.5701, + "step": 11289 + }, + { + "epoch": 0.9111451860221128, + "grad_norm": 0.6954079866409302, + "learning_rate": 8.056452017209874e-05, + "loss": 2.5137, + "step": 11290 + }, + { + "epoch": 0.9112258897586958, + "grad_norm": 0.7381381988525391, + "learning_rate": 8.054903455223866e-05, + "loss": 2.6666, + "step": 11291 + }, + { + "epoch": 0.9113065934952789, + "grad_norm": 0.6731518507003784, + "learning_rate": 8.053354941714749e-05, + "loss": 2.5173, + "step": 11292 + }, + { + "epoch": 0.9113872972318618, + "grad_norm": 0.6976885795593262, + "learning_rate": 8.051806476721116e-05, + "loss": 2.5089, + "step": 11293 + }, + { + "epoch": 0.9114680009684448, + "grad_norm": 0.6401965618133545, + "learning_rate": 8.050258060281562e-05, + "loss": 2.5295, + "step": 11294 + }, + { + "epoch": 0.9115487047050278, + "grad_norm": 0.7409671545028687, + "learning_rate": 8.048709692434667e-05, + "loss": 2.5074, + "step": 11295 + }, + { + "epoch": 0.9116294084416109, + "grad_norm": 0.6028234958648682, + "learning_rate": 8.04716137321903e-05, + "loss": 2.5437, + "step": 11296 + }, + { + "epoch": 0.9117101121781939, + "grad_norm": 0.727643609046936, + "learning_rate": 8.04561310267324e-05, + "loss": 2.5272, + "step": 11297 + }, + { + "epoch": 0.9117908159147768, + "grad_norm": 0.6912926435470581, + "learning_rate": 8.044064880835876e-05, + "loss": 2.5166, + "step": 11298 + }, + { + "epoch": 0.9118715196513598, + "grad_norm": 0.6971367001533508, + "learning_rate": 8.042516707745528e-05, + "loss": 2.5421, + "step": 11299 + }, + { + "epoch": 0.9119522233879429, + "grad_norm": 0.6722451448440552, + "learning_rate": 8.040968583440783e-05, + "loss": 2.5088, + "step": 11300 + }, + { + "epoch": 0.9120329271245259, + "grad_norm": 0.6469144225120544, + "learning_rate": 8.03942050796022e-05, + "loss": 2.4921, + "step": 11301 + }, + { + "epoch": 0.9121136308611089, + "grad_norm": 0.6709008812904358, + "learning_rate": 8.037872481342423e-05, + "loss": 2.4553, + "step": 11302 + }, + { + "epoch": 0.9121943345976918, + "grad_norm": 0.6540920734405518, + "learning_rate": 8.036324503625977e-05, + "loss": 2.489, + "step": 11303 + }, + { + "epoch": 0.9122750383342749, + "grad_norm": 0.6589755415916443, + "learning_rate": 8.034776574849453e-05, + "loss": 2.5195, + "step": 11304 + }, + { + "epoch": 0.9123557420708579, + "grad_norm": 0.676943838596344, + "learning_rate": 8.033228695051434e-05, + "loss": 2.4877, + "step": 11305 + }, + { + "epoch": 0.9124364458074409, + "grad_norm": 0.6509177088737488, + "learning_rate": 8.031680864270498e-05, + "loss": 2.5229, + "step": 11306 + }, + { + "epoch": 0.9125171495440239, + "grad_norm": 0.7480820417404175, + "learning_rate": 8.030133082545219e-05, + "loss": 2.5016, + "step": 11307 + }, + { + "epoch": 0.9125978532806069, + "grad_norm": 0.7130550742149353, + "learning_rate": 8.028585349914174e-05, + "loss": 2.5251, + "step": 11308 + }, + { + "epoch": 0.9126785570171899, + "grad_norm": 0.6959688067436218, + "learning_rate": 8.027037666415934e-05, + "loss": 2.4776, + "step": 11309 + }, + { + "epoch": 0.9127592607537729, + "grad_norm": 0.7540854215621948, + "learning_rate": 8.025490032089076e-05, + "loss": 2.5097, + "step": 11310 + }, + { + "epoch": 0.9128399644903559, + "grad_norm": 0.6921199560165405, + "learning_rate": 8.023942446972165e-05, + "loss": 2.5354, + "step": 11311 + }, + { + "epoch": 0.912920668226939, + "grad_norm": 0.649824857711792, + "learning_rate": 8.022394911103774e-05, + "loss": 2.5398, + "step": 11312 + }, + { + "epoch": 0.9130013719635219, + "grad_norm": 0.6951068639755249, + "learning_rate": 8.020847424522474e-05, + "loss": 2.5302, + "step": 11313 + }, + { + "epoch": 0.9130820757001049, + "grad_norm": 0.6906851530075073, + "learning_rate": 8.019299987266827e-05, + "loss": 2.581, + "step": 11314 + }, + { + "epoch": 0.9131627794366879, + "grad_norm": 0.6758459210395813, + "learning_rate": 8.0177525993754e-05, + "loss": 2.5208, + "step": 11315 + }, + { + "epoch": 0.913243483173271, + "grad_norm": 0.6915175318717957, + "learning_rate": 8.016205260886766e-05, + "loss": 2.5386, + "step": 11316 + }, + { + "epoch": 0.913324186909854, + "grad_norm": 0.7083550691604614, + "learning_rate": 8.014657971839476e-05, + "loss": 2.4895, + "step": 11317 + }, + { + "epoch": 0.9134048906464369, + "grad_norm": 0.7052562832832336, + "learning_rate": 8.013110732272102e-05, + "loss": 2.4896, + "step": 11318 + }, + { + "epoch": 0.9134855943830199, + "grad_norm": 0.7811834216117859, + "learning_rate": 8.011563542223206e-05, + "loss": 2.5082, + "step": 11319 + }, + { + "epoch": 0.913566298119603, + "grad_norm": 0.6207153797149658, + "learning_rate": 8.01001640173134e-05, + "loss": 2.4967, + "step": 11320 + }, + { + "epoch": 0.913647001856186, + "grad_norm": 0.7637950778007507, + "learning_rate": 8.008469310835065e-05, + "loss": 2.4907, + "step": 11321 + }, + { + "epoch": 0.913727705592769, + "grad_norm": 0.7263950705528259, + "learning_rate": 8.006922269572947e-05, + "loss": 2.5259, + "step": 11322 + }, + { + "epoch": 0.9138084093293519, + "grad_norm": 0.6965721845626831, + "learning_rate": 8.005375277983531e-05, + "loss": 2.5648, + "step": 11323 + }, + { + "epoch": 0.9138891130659349, + "grad_norm": 0.7146127223968506, + "learning_rate": 8.003828336105377e-05, + "loss": 2.53, + "step": 11324 + }, + { + "epoch": 0.913969816802518, + "grad_norm": 0.7083697319030762, + "learning_rate": 8.00228144397704e-05, + "loss": 2.4923, + "step": 11325 + }, + { + "epoch": 0.914050520539101, + "grad_norm": 0.7259312868118286, + "learning_rate": 8.000734601637074e-05, + "loss": 2.5303, + "step": 11326 + }, + { + "epoch": 0.9141312242756839, + "grad_norm": 0.7072086930274963, + "learning_rate": 7.999187809124025e-05, + "loss": 2.4662, + "step": 11327 + }, + { + "epoch": 0.9142119280122669, + "grad_norm": 0.7216035723686218, + "learning_rate": 7.997641066476445e-05, + "loss": 2.5069, + "step": 11328 + }, + { + "epoch": 0.91429263174885, + "grad_norm": 0.6925712823867798, + "learning_rate": 7.99609437373289e-05, + "loss": 2.5107, + "step": 11329 + }, + { + "epoch": 0.914373335485433, + "grad_norm": 0.6672701835632324, + "learning_rate": 7.994547730931896e-05, + "loss": 2.5248, + "step": 11330 + }, + { + "epoch": 0.914454039222016, + "grad_norm": 0.8058515787124634, + "learning_rate": 7.993001138112016e-05, + "loss": 2.4427, + "step": 11331 + }, + { + "epoch": 0.9145347429585989, + "grad_norm": 0.6942592859268188, + "learning_rate": 7.991454595311795e-05, + "loss": 2.6163, + "step": 11332 + }, + { + "epoch": 0.914615446695182, + "grad_norm": 0.7051894068717957, + "learning_rate": 7.989908102569774e-05, + "loss": 2.5327, + "step": 11333 + }, + { + "epoch": 0.914696150431765, + "grad_norm": 0.6824771761894226, + "learning_rate": 7.988361659924496e-05, + "loss": 2.4843, + "step": 11334 + }, + { + "epoch": 0.914776854168348, + "grad_norm": 0.6756488084793091, + "learning_rate": 7.98681526741451e-05, + "loss": 2.5215, + "step": 11335 + }, + { + "epoch": 0.914857557904931, + "grad_norm": 0.6988239288330078, + "learning_rate": 7.985268925078344e-05, + "loss": 2.5153, + "step": 11336 + }, + { + "epoch": 0.914938261641514, + "grad_norm": 0.6446006298065186, + "learning_rate": 7.983722632954544e-05, + "loss": 2.5081, + "step": 11337 + }, + { + "epoch": 0.915018965378097, + "grad_norm": 0.6828100681304932, + "learning_rate": 7.982176391081649e-05, + "loss": 2.5607, + "step": 11338 + }, + { + "epoch": 0.91509966911468, + "grad_norm": 0.659721851348877, + "learning_rate": 7.980630199498193e-05, + "loss": 2.531, + "step": 11339 + }, + { + "epoch": 0.915180372851263, + "grad_norm": 0.6298564076423645, + "learning_rate": 7.979084058242709e-05, + "loss": 2.513, + "step": 11340 + }, + { + "epoch": 0.9152610765878461, + "grad_norm": 0.664299726486206, + "learning_rate": 7.977537967353735e-05, + "loss": 2.5533, + "step": 11341 + }, + { + "epoch": 0.915341780324429, + "grad_norm": 0.7035108804702759, + "learning_rate": 7.975991926869801e-05, + "loss": 2.4868, + "step": 11342 + }, + { + "epoch": 0.915422484061012, + "grad_norm": 0.7428407073020935, + "learning_rate": 7.974445936829438e-05, + "loss": 2.5694, + "step": 11343 + }, + { + "epoch": 0.915503187797595, + "grad_norm": 0.6845505237579346, + "learning_rate": 7.972899997271176e-05, + "loss": 2.5092, + "step": 11344 + }, + { + "epoch": 0.9155838915341781, + "grad_norm": 0.7135340571403503, + "learning_rate": 7.971354108233551e-05, + "loss": 2.5157, + "step": 11345 + }, + { + "epoch": 0.915664595270761, + "grad_norm": 0.7032433152198792, + "learning_rate": 7.969808269755077e-05, + "loss": 2.5292, + "step": 11346 + }, + { + "epoch": 0.915745299007344, + "grad_norm": 0.6874690651893616, + "learning_rate": 7.96826248187429e-05, + "loss": 2.5312, + "step": 11347 + }, + { + "epoch": 0.915826002743927, + "grad_norm": 0.6497030258178711, + "learning_rate": 7.966716744629718e-05, + "loss": 2.505, + "step": 11348 + }, + { + "epoch": 0.9159067064805101, + "grad_norm": 0.6618520021438599, + "learning_rate": 7.965171058059874e-05, + "loss": 2.5287, + "step": 11349 + }, + { + "epoch": 0.9159874102170931, + "grad_norm": 0.6737041473388672, + "learning_rate": 7.963625422203288e-05, + "loss": 2.5494, + "step": 11350 + }, + { + "epoch": 0.916068113953676, + "grad_norm": 0.705646276473999, + "learning_rate": 7.96207983709848e-05, + "loss": 2.5402, + "step": 11351 + }, + { + "epoch": 0.916148817690259, + "grad_norm": 0.6852068901062012, + "learning_rate": 7.96053430278397e-05, + "loss": 2.51, + "step": 11352 + }, + { + "epoch": 0.9162295214268421, + "grad_norm": 0.7166822552680969, + "learning_rate": 7.958988819298274e-05, + "loss": 2.576, + "step": 11353 + }, + { + "epoch": 0.9163102251634251, + "grad_norm": 0.6349207162857056, + "learning_rate": 7.957443386679913e-05, + "loss": 2.5219, + "step": 11354 + }, + { + "epoch": 0.9163909289000081, + "grad_norm": 0.6504647135734558, + "learning_rate": 7.955898004967406e-05, + "loss": 2.4593, + "step": 11355 + }, + { + "epoch": 0.916471632636591, + "grad_norm": 0.7313871383666992, + "learning_rate": 7.95435267419926e-05, + "loss": 2.5616, + "step": 11356 + }, + { + "epoch": 0.9165523363731741, + "grad_norm": 0.6948587894439697, + "learning_rate": 7.95280739441399e-05, + "loss": 2.4608, + "step": 11357 + }, + { + "epoch": 0.9166330401097571, + "grad_norm": 0.6130328178405762, + "learning_rate": 7.95126216565012e-05, + "loss": 2.5563, + "step": 11358 + }, + { + "epoch": 0.9167137438463401, + "grad_norm": 0.7149228453636169, + "learning_rate": 7.949716987946145e-05, + "loss": 2.5664, + "step": 11359 + }, + { + "epoch": 0.916794447582923, + "grad_norm": 0.7452285289764404, + "learning_rate": 7.948171861340584e-05, + "loss": 2.525, + "step": 11360 + }, + { + "epoch": 0.9168751513195061, + "grad_norm": 0.6840611100196838, + "learning_rate": 7.946626785871945e-05, + "loss": 2.537, + "step": 11361 + }, + { + "epoch": 0.9169558550560891, + "grad_norm": 0.7269708514213562, + "learning_rate": 7.945081761578732e-05, + "loss": 2.5227, + "step": 11362 + }, + { + "epoch": 0.9170365587926721, + "grad_norm": 0.6521697044372559, + "learning_rate": 7.943536788499452e-05, + "loss": 2.54, + "step": 11363 + }, + { + "epoch": 0.9171172625292551, + "grad_norm": 0.6516863107681274, + "learning_rate": 7.941991866672618e-05, + "loss": 2.4788, + "step": 11364 + }, + { + "epoch": 0.9171979662658382, + "grad_norm": 0.7673580050468445, + "learning_rate": 7.94044699613672e-05, + "loss": 2.4678, + "step": 11365 + }, + { + "epoch": 0.9172786700024211, + "grad_norm": 0.6666994690895081, + "learning_rate": 7.938902176930268e-05, + "loss": 2.5251, + "step": 11366 + }, + { + "epoch": 0.9173593737390041, + "grad_norm": 0.7261863946914673, + "learning_rate": 7.937357409091761e-05, + "loss": 2.4977, + "step": 11367 + }, + { + "epoch": 0.9174400774755871, + "grad_norm": 0.6920679807662964, + "learning_rate": 7.9358126926597e-05, + "loss": 2.5367, + "step": 11368 + }, + { + "epoch": 0.9175207812121702, + "grad_norm": 0.6715712547302246, + "learning_rate": 7.93426802767258e-05, + "loss": 2.4898, + "step": 11369 + }, + { + "epoch": 0.9176014849487532, + "grad_norm": 0.7014333605766296, + "learning_rate": 7.932723414168904e-05, + "loss": 2.4507, + "step": 11370 + }, + { + "epoch": 0.9176821886853361, + "grad_norm": 0.6755761504173279, + "learning_rate": 7.931178852187163e-05, + "loss": 2.5895, + "step": 11371 + }, + { + "epoch": 0.9177628924219191, + "grad_norm": 0.6846731305122375, + "learning_rate": 7.929634341765852e-05, + "loss": 2.5002, + "step": 11372 + }, + { + "epoch": 0.9178435961585021, + "grad_norm": 0.6422831416130066, + "learning_rate": 7.928089882943466e-05, + "loss": 2.5326, + "step": 11373 + }, + { + "epoch": 0.9179242998950852, + "grad_norm": 0.7256442308425903, + "learning_rate": 7.9265454757585e-05, + "loss": 2.5706, + "step": 11374 + }, + { + "epoch": 0.9180050036316681, + "grad_norm": 0.6514387130737305, + "learning_rate": 7.925001120249436e-05, + "loss": 2.5349, + "step": 11375 + }, + { + "epoch": 0.9180857073682511, + "grad_norm": 0.7596457600593567, + "learning_rate": 7.923456816454768e-05, + "loss": 2.4767, + "step": 11376 + }, + { + "epoch": 0.9181664111048341, + "grad_norm": 0.673283040523529, + "learning_rate": 7.921912564412988e-05, + "loss": 2.5156, + "step": 11377 + }, + { + "epoch": 0.9182471148414172, + "grad_norm": 0.6964103579521179, + "learning_rate": 7.920368364162575e-05, + "loss": 2.5293, + "step": 11378 + }, + { + "epoch": 0.9183278185780002, + "grad_norm": 0.6765062212944031, + "learning_rate": 7.91882421574202e-05, + "loss": 2.5757, + "step": 11379 + }, + { + "epoch": 0.9184085223145831, + "grad_norm": 0.7039035558700562, + "learning_rate": 7.917280119189811e-05, + "loss": 2.513, + "step": 11380 + }, + { + "epoch": 0.9184892260511661, + "grad_norm": 0.6523976922035217, + "learning_rate": 7.915736074544419e-05, + "loss": 2.4712, + "step": 11381 + }, + { + "epoch": 0.9185699297877492, + "grad_norm": 0.7159552574157715, + "learning_rate": 7.914192081844334e-05, + "loss": 2.4713, + "step": 11382 + }, + { + "epoch": 0.9186506335243322, + "grad_norm": 0.7071694731712341, + "learning_rate": 7.912648141128036e-05, + "loss": 2.5367, + "step": 11383 + }, + { + "epoch": 0.9187313372609152, + "grad_norm": 0.6675183773040771, + "learning_rate": 7.911104252434e-05, + "loss": 2.5372, + "step": 11384 + }, + { + "epoch": 0.9188120409974981, + "grad_norm": 0.7293995022773743, + "learning_rate": 7.909560415800707e-05, + "loss": 2.5469, + "step": 11385 + }, + { + "epoch": 0.9188927447340812, + "grad_norm": 0.6774035096168518, + "learning_rate": 7.908016631266635e-05, + "loss": 2.5655, + "step": 11386 + }, + { + "epoch": 0.9189734484706642, + "grad_norm": 0.7068144083023071, + "learning_rate": 7.906472898870256e-05, + "loss": 2.5265, + "step": 11387 + }, + { + "epoch": 0.9190541522072472, + "grad_norm": 0.6756324172019958, + "learning_rate": 7.904929218650044e-05, + "loss": 2.4966, + "step": 11388 + }, + { + "epoch": 0.9191348559438302, + "grad_norm": 0.6964625120162964, + "learning_rate": 7.903385590644473e-05, + "loss": 2.5646, + "step": 11389 + }, + { + "epoch": 0.9192155596804132, + "grad_norm": 0.6760976314544678, + "learning_rate": 7.901842014892018e-05, + "loss": 2.5159, + "step": 11390 + }, + { + "epoch": 0.9192962634169962, + "grad_norm": 0.6648714542388916, + "learning_rate": 7.900298491431139e-05, + "loss": 2.5715, + "step": 11391 + }, + { + "epoch": 0.9193769671535792, + "grad_norm": 0.7492914199829102, + "learning_rate": 7.898755020300312e-05, + "loss": 2.5226, + "step": 11392 + }, + { + "epoch": 0.9194576708901622, + "grad_norm": 0.7041164040565491, + "learning_rate": 7.897211601538004e-05, + "loss": 2.5809, + "step": 11393 + }, + { + "epoch": 0.9195383746267453, + "grad_norm": 0.6746383309364319, + "learning_rate": 7.895668235182677e-05, + "loss": 2.5369, + "step": 11394 + }, + { + "epoch": 0.9196190783633282, + "grad_norm": 0.6486156582832336, + "learning_rate": 7.894124921272798e-05, + "loss": 2.5406, + "step": 11395 + }, + { + "epoch": 0.9196997820999112, + "grad_norm": 0.6828807592391968, + "learning_rate": 7.892581659846834e-05, + "loss": 2.5241, + "step": 11396 + }, + { + "epoch": 0.9197804858364942, + "grad_norm": 0.694970428943634, + "learning_rate": 7.891038450943242e-05, + "loss": 2.4402, + "step": 11397 + }, + { + "epoch": 0.9198611895730773, + "grad_norm": 0.7187039852142334, + "learning_rate": 7.889495294600484e-05, + "loss": 2.5052, + "step": 11398 + }, + { + "epoch": 0.9199418933096603, + "grad_norm": 0.6919832825660706, + "learning_rate": 7.887952190857024e-05, + "loss": 2.5078, + "step": 11399 + }, + { + "epoch": 0.9200225970462432, + "grad_norm": 0.7129504084587097, + "learning_rate": 7.886409139751313e-05, + "loss": 2.5047, + "step": 11400 + }, + { + "epoch": 0.9201033007828262, + "grad_norm": 0.6755272746086121, + "learning_rate": 7.88486614132181e-05, + "loss": 2.4821, + "step": 11401 + }, + { + "epoch": 0.9201840045194093, + "grad_norm": 0.7253937125205994, + "learning_rate": 7.883323195606973e-05, + "loss": 2.5062, + "step": 11402 + }, + { + "epoch": 0.9202647082559923, + "grad_norm": 0.7057155966758728, + "learning_rate": 7.881780302645257e-05, + "loss": 2.5475, + "step": 11403 + }, + { + "epoch": 0.9203454119925752, + "grad_norm": 0.713869571685791, + "learning_rate": 7.880237462475111e-05, + "loss": 2.5335, + "step": 11404 + }, + { + "epoch": 0.9204261157291582, + "grad_norm": 0.769648551940918, + "learning_rate": 7.878694675134987e-05, + "loss": 2.4944, + "step": 11405 + }, + { + "epoch": 0.9205068194657413, + "grad_norm": 0.6444964408874512, + "learning_rate": 7.877151940663343e-05, + "loss": 2.5755, + "step": 11406 + }, + { + "epoch": 0.9205875232023243, + "grad_norm": 0.6811819672584534, + "learning_rate": 7.875609259098618e-05, + "loss": 2.5475, + "step": 11407 + }, + { + "epoch": 0.9206682269389073, + "grad_norm": 0.6959417462348938, + "learning_rate": 7.874066630479259e-05, + "loss": 2.5095, + "step": 11408 + }, + { + "epoch": 0.9207489306754902, + "grad_norm": 0.6721363067626953, + "learning_rate": 7.872524054843724e-05, + "loss": 2.5166, + "step": 11409 + }, + { + "epoch": 0.9208296344120733, + "grad_norm": 0.713122546672821, + "learning_rate": 7.870981532230447e-05, + "loss": 2.5084, + "step": 11410 + }, + { + "epoch": 0.9209103381486563, + "grad_norm": 0.7059469819068909, + "learning_rate": 7.869439062677876e-05, + "loss": 2.437, + "step": 11411 + }, + { + "epoch": 0.9209910418852393, + "grad_norm": 0.6808314323425293, + "learning_rate": 7.867896646224454e-05, + "loss": 2.5658, + "step": 11412 + }, + { + "epoch": 0.9210717456218223, + "grad_norm": 0.7060894966125488, + "learning_rate": 7.86635428290862e-05, + "loss": 2.515, + "step": 11413 + }, + { + "epoch": 0.9211524493584053, + "grad_norm": 0.7538465857505798, + "learning_rate": 7.864811972768813e-05, + "loss": 2.4448, + "step": 11414 + }, + { + "epoch": 0.9212331530949883, + "grad_norm": 0.6824522018432617, + "learning_rate": 7.863269715843478e-05, + "loss": 2.503, + "step": 11415 + }, + { + "epoch": 0.9213138568315713, + "grad_norm": 0.7068174481391907, + "learning_rate": 7.861727512171044e-05, + "loss": 2.5198, + "step": 11416 + }, + { + "epoch": 0.9213945605681543, + "grad_norm": 0.6742961406707764, + "learning_rate": 7.860185361789948e-05, + "loss": 2.5167, + "step": 11417 + }, + { + "epoch": 0.9214752643047374, + "grad_norm": 0.7643383741378784, + "learning_rate": 7.858643264738628e-05, + "loss": 2.5508, + "step": 11418 + }, + { + "epoch": 0.9215559680413203, + "grad_norm": 0.6737802028656006, + "learning_rate": 7.857101221055518e-05, + "loss": 2.589, + "step": 11419 + }, + { + "epoch": 0.9216366717779033, + "grad_norm": 0.668214738368988, + "learning_rate": 7.855559230779043e-05, + "loss": 2.4747, + "step": 11420 + }, + { + "epoch": 0.9217173755144863, + "grad_norm": 0.6933084726333618, + "learning_rate": 7.854017293947638e-05, + "loss": 2.5171, + "step": 11421 + }, + { + "epoch": 0.9217980792510694, + "grad_norm": 0.6320228576660156, + "learning_rate": 7.852475410599736e-05, + "loss": 2.5213, + "step": 11422 + }, + { + "epoch": 0.9218787829876524, + "grad_norm": 0.6578245759010315, + "learning_rate": 7.850933580773756e-05, + "loss": 2.5085, + "step": 11423 + }, + { + "epoch": 0.9219594867242353, + "grad_norm": 0.6741796135902405, + "learning_rate": 7.849391804508129e-05, + "loss": 2.5294, + "step": 11424 + }, + { + "epoch": 0.9220401904608183, + "grad_norm": 0.6875781416893005, + "learning_rate": 7.847850081841285e-05, + "loss": 2.5034, + "step": 11425 + }, + { + "epoch": 0.9221208941974013, + "grad_norm": 0.6515244245529175, + "learning_rate": 7.846308412811638e-05, + "loss": 2.4707, + "step": 11426 + }, + { + "epoch": 0.9222015979339844, + "grad_norm": 0.7326812148094177, + "learning_rate": 7.844766797457615e-05, + "loss": 2.5049, + "step": 11427 + }, + { + "epoch": 0.9222823016705674, + "grad_norm": 0.7539918422698975, + "learning_rate": 7.84322523581764e-05, + "loss": 2.4726, + "step": 11428 + }, + { + "epoch": 0.9223630054071503, + "grad_norm": 0.745468020439148, + "learning_rate": 7.841683727930129e-05, + "loss": 2.5003, + "step": 11429 + }, + { + "epoch": 0.9224437091437333, + "grad_norm": 0.726362943649292, + "learning_rate": 7.840142273833499e-05, + "loss": 2.5056, + "step": 11430 + }, + { + "epoch": 0.9225244128803164, + "grad_norm": 0.7275403738021851, + "learning_rate": 7.838600873566175e-05, + "loss": 2.5188, + "step": 11431 + }, + { + "epoch": 0.9226051166168994, + "grad_norm": 0.6908789873123169, + "learning_rate": 7.837059527166563e-05, + "loss": 2.5349, + "step": 11432 + }, + { + "epoch": 0.9226858203534823, + "grad_norm": 0.7220396399497986, + "learning_rate": 7.835518234673079e-05, + "loss": 2.4863, + "step": 11433 + }, + { + "epoch": 0.9227665240900653, + "grad_norm": 0.6516178846359253, + "learning_rate": 7.833976996124142e-05, + "loss": 2.556, + "step": 11434 + }, + { + "epoch": 0.9228472278266484, + "grad_norm": 0.6958726644515991, + "learning_rate": 7.832435811558163e-05, + "loss": 2.5286, + "step": 11435 + }, + { + "epoch": 0.9229279315632314, + "grad_norm": 0.7734121680259705, + "learning_rate": 7.830894681013546e-05, + "loss": 2.5087, + "step": 11436 + }, + { + "epoch": 0.9230086352998144, + "grad_norm": 0.709064245223999, + "learning_rate": 7.829353604528703e-05, + "loss": 2.4817, + "step": 11437 + }, + { + "epoch": 0.9230893390363973, + "grad_norm": 0.7224971652030945, + "learning_rate": 7.827812582142045e-05, + "loss": 2.5179, + "step": 11438 + }, + { + "epoch": 0.9231700427729804, + "grad_norm": 0.7139936685562134, + "learning_rate": 7.826271613891973e-05, + "loss": 2.537, + "step": 11439 + }, + { + "epoch": 0.9232507465095634, + "grad_norm": 0.671138346195221, + "learning_rate": 7.824730699816896e-05, + "loss": 2.4865, + "step": 11440 + }, + { + "epoch": 0.9233314502461464, + "grad_norm": 0.6547425389289856, + "learning_rate": 7.823189839955218e-05, + "loss": 2.509, + "step": 11441 + }, + { + "epoch": 0.9234121539827294, + "grad_norm": 0.719765305519104, + "learning_rate": 7.821649034345338e-05, + "loss": 2.591, + "step": 11442 + }, + { + "epoch": 0.9234928577193124, + "grad_norm": 0.7128504514694214, + "learning_rate": 7.820108283025656e-05, + "loss": 2.541, + "step": 11443 + }, + { + "epoch": 0.9235735614558954, + "grad_norm": 0.7711538672447205, + "learning_rate": 7.818567586034577e-05, + "loss": 2.5388, + "step": 11444 + }, + { + "epoch": 0.9236542651924784, + "grad_norm": 0.7151121497154236, + "learning_rate": 7.817026943410494e-05, + "loss": 2.5539, + "step": 11445 + }, + { + "epoch": 0.9237349689290614, + "grad_norm": 0.7009569406509399, + "learning_rate": 7.815486355191805e-05, + "loss": 2.4793, + "step": 11446 + }, + { + "epoch": 0.9238156726656445, + "grad_norm": 0.7251109480857849, + "learning_rate": 7.813945821416909e-05, + "loss": 2.5406, + "step": 11447 + }, + { + "epoch": 0.9238963764022274, + "grad_norm": 0.6907934546470642, + "learning_rate": 7.812405342124196e-05, + "loss": 2.5069, + "step": 11448 + }, + { + "epoch": 0.9239770801388104, + "grad_norm": 0.699207067489624, + "learning_rate": 7.810864917352061e-05, + "loss": 2.4844, + "step": 11449 + }, + { + "epoch": 0.9240577838753934, + "grad_norm": 0.718386173248291, + "learning_rate": 7.809324547138893e-05, + "loss": 2.5666, + "step": 11450 + }, + { + "epoch": 0.9241384876119765, + "grad_norm": 0.6420444846153259, + "learning_rate": 7.807784231523089e-05, + "loss": 2.506, + "step": 11451 + }, + { + "epoch": 0.9242191913485595, + "grad_norm": 0.6777252554893494, + "learning_rate": 7.806243970543028e-05, + "loss": 2.487, + "step": 11452 + }, + { + "epoch": 0.9242998950851424, + "grad_norm": 0.6907702684402466, + "learning_rate": 7.804703764237102e-05, + "loss": 2.5284, + "step": 11453 + }, + { + "epoch": 0.9243805988217254, + "grad_norm": 0.6383422613143921, + "learning_rate": 7.803163612643698e-05, + "loss": 2.4704, + "step": 11454 + }, + { + "epoch": 0.9244613025583085, + "grad_norm": 0.6879577040672302, + "learning_rate": 7.801623515801198e-05, + "loss": 2.5103, + "step": 11455 + }, + { + "epoch": 0.9245420062948915, + "grad_norm": 0.6856719851493835, + "learning_rate": 7.800083473747986e-05, + "loss": 2.5086, + "step": 11456 + }, + { + "epoch": 0.9246227100314744, + "grad_norm": 0.7463707327842712, + "learning_rate": 7.79854348652245e-05, + "loss": 2.5456, + "step": 11457 + }, + { + "epoch": 0.9247034137680574, + "grad_norm": 0.7352643013000488, + "learning_rate": 7.79700355416296e-05, + "loss": 2.5335, + "step": 11458 + }, + { + "epoch": 0.9247841175046405, + "grad_norm": 0.7525908350944519, + "learning_rate": 7.795463676707897e-05, + "loss": 2.5855, + "step": 11459 + }, + { + "epoch": 0.9248648212412235, + "grad_norm": 0.7323870658874512, + "learning_rate": 7.79392385419565e-05, + "loss": 2.5471, + "step": 11460 + }, + { + "epoch": 0.9249455249778065, + "grad_norm": 0.7443860769271851, + "learning_rate": 7.792384086664582e-05, + "loss": 2.5449, + "step": 11461 + }, + { + "epoch": 0.9250262287143894, + "grad_norm": 0.6928641200065613, + "learning_rate": 7.790844374153073e-05, + "loss": 2.505, + "step": 11462 + }, + { + "epoch": 0.9251069324509725, + "grad_norm": 0.6491222381591797, + "learning_rate": 7.789304716699498e-05, + "loss": 2.5447, + "step": 11463 + }, + { + "epoch": 0.9251876361875555, + "grad_norm": 0.7351166009902954, + "learning_rate": 7.78776511434223e-05, + "loss": 2.524, + "step": 11464 + }, + { + "epoch": 0.9252683399241385, + "grad_norm": 0.6680036783218384, + "learning_rate": 7.786225567119637e-05, + "loss": 2.5019, + "step": 11465 + }, + { + "epoch": 0.9253490436607215, + "grad_norm": 0.7070801258087158, + "learning_rate": 7.784686075070089e-05, + "loss": 2.5052, + "step": 11466 + }, + { + "epoch": 0.9254297473973045, + "grad_norm": 0.7095211148262024, + "learning_rate": 7.783146638231957e-05, + "loss": 2.4998, + "step": 11467 + }, + { + "epoch": 0.9255104511338875, + "grad_norm": 0.6725812554359436, + "learning_rate": 7.781607256643604e-05, + "loss": 2.4909, + "step": 11468 + }, + { + "epoch": 0.9255911548704705, + "grad_norm": 0.684177577495575, + "learning_rate": 7.780067930343396e-05, + "loss": 2.5636, + "step": 11469 + }, + { + "epoch": 0.9256718586070535, + "grad_norm": 0.703419029712677, + "learning_rate": 7.778528659369702e-05, + "loss": 2.4295, + "step": 11470 + }, + { + "epoch": 0.9257525623436366, + "grad_norm": 0.6850195527076721, + "learning_rate": 7.776989443760877e-05, + "loss": 2.5143, + "step": 11471 + }, + { + "epoch": 0.9258332660802195, + "grad_norm": 0.7322348952293396, + "learning_rate": 7.775450283555286e-05, + "loss": 2.5616, + "step": 11472 + }, + { + "epoch": 0.9259139698168025, + "grad_norm": 0.6924510598182678, + "learning_rate": 7.77391117879129e-05, + "loss": 2.4796, + "step": 11473 + }, + { + "epoch": 0.9259946735533855, + "grad_norm": 0.7006441354751587, + "learning_rate": 7.772372129507249e-05, + "loss": 2.5142, + "step": 11474 + }, + { + "epoch": 0.9260753772899685, + "grad_norm": 0.6379218697547913, + "learning_rate": 7.770833135741513e-05, + "loss": 2.5366, + "step": 11475 + }, + { + "epoch": 0.9261560810265516, + "grad_norm": 0.676163375377655, + "learning_rate": 7.769294197532448e-05, + "loss": 2.4936, + "step": 11476 + }, + { + "epoch": 0.9262367847631345, + "grad_norm": 0.6964210271835327, + "learning_rate": 7.767755314918399e-05, + "loss": 2.429, + "step": 11477 + }, + { + "epoch": 0.9263174884997175, + "grad_norm": 0.7017048597335815, + "learning_rate": 7.766216487937722e-05, + "loss": 2.5488, + "step": 11478 + }, + { + "epoch": 0.9263981922363005, + "grad_norm": 0.6742509603500366, + "learning_rate": 7.76467771662877e-05, + "loss": 2.5121, + "step": 11479 + }, + { + "epoch": 0.9264788959728836, + "grad_norm": 0.6751403212547302, + "learning_rate": 7.763139001029893e-05, + "loss": 2.5897, + "step": 11480 + }, + { + "epoch": 0.9265595997094666, + "grad_norm": 0.6639657616615295, + "learning_rate": 7.761600341179439e-05, + "loss": 2.5015, + "step": 11481 + }, + { + "epoch": 0.9266403034460495, + "grad_norm": 0.6332827210426331, + "learning_rate": 7.760061737115756e-05, + "loss": 2.5518, + "step": 11482 + }, + { + "epoch": 0.9267210071826325, + "grad_norm": 0.6751062870025635, + "learning_rate": 7.758523188877192e-05, + "loss": 2.4252, + "step": 11483 + }, + { + "epoch": 0.9268017109192156, + "grad_norm": 0.6763231754302979, + "learning_rate": 7.756984696502084e-05, + "loss": 2.5683, + "step": 11484 + }, + { + "epoch": 0.9268824146557986, + "grad_norm": 0.6480380296707153, + "learning_rate": 7.755446260028784e-05, + "loss": 2.558, + "step": 11485 + }, + { + "epoch": 0.9269631183923815, + "grad_norm": 0.6925072073936462, + "learning_rate": 7.753907879495634e-05, + "loss": 2.5374, + "step": 11486 + }, + { + "epoch": 0.9270438221289645, + "grad_norm": 0.6771834492683411, + "learning_rate": 7.752369554940966e-05, + "loss": 2.5652, + "step": 11487 + }, + { + "epoch": 0.9271245258655476, + "grad_norm": 0.6747026443481445, + "learning_rate": 7.750831286403124e-05, + "loss": 2.5076, + "step": 11488 + }, + { + "epoch": 0.9272052296021306, + "grad_norm": 0.6727211475372314, + "learning_rate": 7.749293073920448e-05, + "loss": 2.4774, + "step": 11489 + }, + { + "epoch": 0.9272859333387136, + "grad_norm": 0.6334055066108704, + "learning_rate": 7.747754917531272e-05, + "loss": 2.5245, + "step": 11490 + }, + { + "epoch": 0.9273666370752965, + "grad_norm": 0.740700900554657, + "learning_rate": 7.746216817273928e-05, + "loss": 2.5485, + "step": 11491 + }, + { + "epoch": 0.9274473408118796, + "grad_norm": 0.6500691771507263, + "learning_rate": 7.744678773186757e-05, + "loss": 2.5277, + "step": 11492 + }, + { + "epoch": 0.9275280445484626, + "grad_norm": 0.6592985987663269, + "learning_rate": 7.743140785308084e-05, + "loss": 2.5304, + "step": 11493 + }, + { + "epoch": 0.9276087482850456, + "grad_norm": 0.6980452537536621, + "learning_rate": 7.741602853676241e-05, + "loss": 2.544, + "step": 11494 + }, + { + "epoch": 0.9276894520216286, + "grad_norm": 0.643190860748291, + "learning_rate": 7.740064978329555e-05, + "loss": 2.5167, + "step": 11495 + }, + { + "epoch": 0.9277701557582116, + "grad_norm": 0.6789804100990295, + "learning_rate": 7.738527159306366e-05, + "loss": 2.5117, + "step": 11496 + }, + { + "epoch": 0.9278508594947946, + "grad_norm": 0.7109663486480713, + "learning_rate": 7.736989396644987e-05, + "loss": 2.5294, + "step": 11497 + }, + { + "epoch": 0.9279315632313776, + "grad_norm": 0.6752706170082092, + "learning_rate": 7.735451690383746e-05, + "loss": 2.4851, + "step": 11498 + }, + { + "epoch": 0.9280122669679606, + "grad_norm": 0.6947829723358154, + "learning_rate": 7.733914040560972e-05, + "loss": 2.5792, + "step": 11499 + }, + { + "epoch": 0.9280929707045437, + "grad_norm": 0.6701157689094543, + "learning_rate": 7.732376447214981e-05, + "loss": 2.4884, + "step": 11500 + }, + { + "epoch": 0.9281736744411266, + "grad_norm": 0.64533531665802, + "learning_rate": 7.730838910384097e-05, + "loss": 2.4644, + "step": 11501 + }, + { + "epoch": 0.9282543781777096, + "grad_norm": 0.6664395332336426, + "learning_rate": 7.729301430106644e-05, + "loss": 2.5286, + "step": 11502 + }, + { + "epoch": 0.9283350819142926, + "grad_norm": 0.6982395648956299, + "learning_rate": 7.72776400642093e-05, + "loss": 2.5092, + "step": 11503 + }, + { + "epoch": 0.9284157856508757, + "grad_norm": 0.6656171679496765, + "learning_rate": 7.726226639365278e-05, + "loss": 2.4945, + "step": 11504 + }, + { + "epoch": 0.9284964893874587, + "grad_norm": 0.6213308572769165, + "learning_rate": 7.724689328978001e-05, + "loss": 2.5042, + "step": 11505 + }, + { + "epoch": 0.9285771931240416, + "grad_norm": 0.6855599880218506, + "learning_rate": 7.723152075297414e-05, + "loss": 2.5207, + "step": 11506 + }, + { + "epoch": 0.9286578968606246, + "grad_norm": 0.7724171280860901, + "learning_rate": 7.721614878361828e-05, + "loss": 2.4842, + "step": 11507 + }, + { + "epoch": 0.9287386005972077, + "grad_norm": 0.708634614944458, + "learning_rate": 7.720077738209559e-05, + "loss": 2.58, + "step": 11508 + }, + { + "epoch": 0.9288193043337907, + "grad_norm": 0.6766082644462585, + "learning_rate": 7.718540654878907e-05, + "loss": 2.492, + "step": 11509 + }, + { + "epoch": 0.9289000080703737, + "grad_norm": 0.6856982707977295, + "learning_rate": 7.717003628408187e-05, + "loss": 2.5186, + "step": 11510 + }, + { + "epoch": 0.9289807118069566, + "grad_norm": 0.680647611618042, + "learning_rate": 7.715466658835705e-05, + "loss": 2.5305, + "step": 11511 + }, + { + "epoch": 0.9290614155435397, + "grad_norm": 0.7174721360206604, + "learning_rate": 7.713929746199771e-05, + "loss": 2.4498, + "step": 11512 + }, + { + "epoch": 0.9291421192801227, + "grad_norm": 0.6507031321525574, + "learning_rate": 7.712392890538676e-05, + "loss": 2.5334, + "step": 11513 + }, + { + "epoch": 0.9292228230167057, + "grad_norm": 0.7545748353004456, + "learning_rate": 7.710856091890732e-05, + "loss": 2.505, + "step": 11514 + }, + { + "epoch": 0.9293035267532886, + "grad_norm": 0.6978560090065002, + "learning_rate": 7.709319350294242e-05, + "loss": 2.5243, + "step": 11515 + }, + { + "epoch": 0.9293842304898717, + "grad_norm": 0.6620199084281921, + "learning_rate": 7.707782665787497e-05, + "loss": 2.5114, + "step": 11516 + }, + { + "epoch": 0.9294649342264547, + "grad_norm": 0.7160476446151733, + "learning_rate": 7.7062460384088e-05, + "loss": 2.5322, + "step": 11517 + }, + { + "epoch": 0.9295456379630377, + "grad_norm": 0.6637005805969238, + "learning_rate": 7.704709468196454e-05, + "loss": 2.456, + "step": 11518 + }, + { + "epoch": 0.9296263416996207, + "grad_norm": 0.6668851375579834, + "learning_rate": 7.703172955188742e-05, + "loss": 2.5251, + "step": 11519 + }, + { + "epoch": 0.9297070454362037, + "grad_norm": 0.6840329170227051, + "learning_rate": 7.701636499423965e-05, + "loss": 2.5068, + "step": 11520 + }, + { + "epoch": 0.9297877491727867, + "grad_norm": 0.695122241973877, + "learning_rate": 7.700100100940415e-05, + "loss": 2.4822, + "step": 11521 + }, + { + "epoch": 0.9298684529093697, + "grad_norm": 0.6784923672676086, + "learning_rate": 7.698563759776382e-05, + "loss": 2.4978, + "step": 11522 + }, + { + "epoch": 0.9299491566459527, + "grad_norm": 0.6949357986450195, + "learning_rate": 7.697027475970154e-05, + "loss": 2.5392, + "step": 11523 + }, + { + "epoch": 0.9300298603825358, + "grad_norm": 0.7128093242645264, + "learning_rate": 7.695491249560025e-05, + "loss": 2.455, + "step": 11524 + }, + { + "epoch": 0.9301105641191187, + "grad_norm": 0.6534962058067322, + "learning_rate": 7.693955080584277e-05, + "loss": 2.5272, + "step": 11525 + }, + { + "epoch": 0.9301912678557017, + "grad_norm": 0.6893511414527893, + "learning_rate": 7.692418969081194e-05, + "loss": 2.5366, + "step": 11526 + }, + { + "epoch": 0.9302719715922847, + "grad_norm": 0.6335335373878479, + "learning_rate": 7.690882915089064e-05, + "loss": 2.5781, + "step": 11527 + }, + { + "epoch": 0.9303526753288677, + "grad_norm": 0.7264769077301025, + "learning_rate": 7.689346918646172e-05, + "loss": 2.5322, + "step": 11528 + }, + { + "epoch": 0.9304333790654508, + "grad_norm": 0.7156329154968262, + "learning_rate": 7.68781097979079e-05, + "loss": 2.5558, + "step": 11529 + }, + { + "epoch": 0.9305140828020337, + "grad_norm": 0.6914563775062561, + "learning_rate": 7.686275098561203e-05, + "loss": 2.5058, + "step": 11530 + }, + { + "epoch": 0.9305947865386167, + "grad_norm": 0.6939939260482788, + "learning_rate": 7.684739274995691e-05, + "loss": 2.4764, + "step": 11531 + }, + { + "epoch": 0.9306754902751997, + "grad_norm": 0.7103014588356018, + "learning_rate": 7.683203509132526e-05, + "loss": 2.5062, + "step": 11532 + }, + { + "epoch": 0.9307561940117828, + "grad_norm": 0.6558870077133179, + "learning_rate": 7.681667801009985e-05, + "loss": 2.4869, + "step": 11533 + }, + { + "epoch": 0.9308368977483658, + "grad_norm": 0.7280104160308838, + "learning_rate": 7.680132150666348e-05, + "loss": 2.566, + "step": 11534 + }, + { + "epoch": 0.9309176014849487, + "grad_norm": 0.6814180612564087, + "learning_rate": 7.678596558139875e-05, + "loss": 2.4926, + "step": 11535 + }, + { + "epoch": 0.9309983052215317, + "grad_norm": 0.6916589736938477, + "learning_rate": 7.677061023468846e-05, + "loss": 2.5189, + "step": 11536 + }, + { + "epoch": 0.9310790089581148, + "grad_norm": 0.6527554988861084, + "learning_rate": 7.675525546691533e-05, + "loss": 2.4969, + "step": 11537 + }, + { + "epoch": 0.9311597126946978, + "grad_norm": 0.6458954811096191, + "learning_rate": 7.673990127846196e-05, + "loss": 2.5159, + "step": 11538 + }, + { + "epoch": 0.9312404164312807, + "grad_norm": 0.6704902052879333, + "learning_rate": 7.672454766971105e-05, + "loss": 2.49, + "step": 11539 + }, + { + "epoch": 0.9313211201678637, + "grad_norm": 0.6599698066711426, + "learning_rate": 7.670919464104527e-05, + "loss": 2.4872, + "step": 11540 + }, + { + "epoch": 0.9314018239044468, + "grad_norm": 0.7638888955116272, + "learning_rate": 7.669384219284722e-05, + "loss": 2.5228, + "step": 11541 + }, + { + "epoch": 0.9314825276410298, + "grad_norm": 0.6911981105804443, + "learning_rate": 7.667849032549954e-05, + "loss": 2.4675, + "step": 11542 + }, + { + "epoch": 0.9315632313776128, + "grad_norm": 0.6414669156074524, + "learning_rate": 7.666313903938486e-05, + "loss": 2.5137, + "step": 11543 + }, + { + "epoch": 0.9316439351141957, + "grad_norm": 0.7552139759063721, + "learning_rate": 7.66477883348858e-05, + "loss": 2.5778, + "step": 11544 + }, + { + "epoch": 0.9317246388507788, + "grad_norm": 0.6738760471343994, + "learning_rate": 7.663243821238484e-05, + "loss": 2.5326, + "step": 11545 + }, + { + "epoch": 0.9318053425873618, + "grad_norm": 0.7406899333000183, + "learning_rate": 7.661708867226459e-05, + "loss": 2.4608, + "step": 11546 + }, + { + "epoch": 0.9318860463239448, + "grad_norm": 0.7261415719985962, + "learning_rate": 7.660173971490769e-05, + "loss": 2.5684, + "step": 11547 + }, + { + "epoch": 0.9319667500605278, + "grad_norm": 0.636542797088623, + "learning_rate": 7.658639134069654e-05, + "loss": 2.5159, + "step": 11548 + }, + { + "epoch": 0.9320474537971108, + "grad_norm": 0.7730209231376648, + "learning_rate": 7.657104355001373e-05, + "loss": 2.487, + "step": 11549 + }, + { + "epoch": 0.9321281575336938, + "grad_norm": 0.6553641557693481, + "learning_rate": 7.655569634324178e-05, + "loss": 2.5105, + "step": 11550 + }, + { + "epoch": 0.9322088612702768, + "grad_norm": 0.7008326649665833, + "learning_rate": 7.654034972076314e-05, + "loss": 2.492, + "step": 11551 + }, + { + "epoch": 0.9322895650068598, + "grad_norm": 0.7074279189109802, + "learning_rate": 7.65250036829603e-05, + "loss": 2.5221, + "step": 11552 + }, + { + "epoch": 0.9323702687434429, + "grad_norm": 0.7235530018806458, + "learning_rate": 7.650965823021578e-05, + "loss": 2.5285, + "step": 11553 + }, + { + "epoch": 0.9324509724800258, + "grad_norm": 0.7601436376571655, + "learning_rate": 7.649431336291194e-05, + "loss": 2.5071, + "step": 11554 + }, + { + "epoch": 0.9325316762166088, + "grad_norm": 0.6446424126625061, + "learning_rate": 7.647896908143127e-05, + "loss": 2.5032, + "step": 11555 + }, + { + "epoch": 0.9326123799531918, + "grad_norm": 0.7032139897346497, + "learning_rate": 7.646362538615614e-05, + "loss": 2.6096, + "step": 11556 + }, + { + "epoch": 0.9326930836897749, + "grad_norm": 0.6727899312973022, + "learning_rate": 7.644828227746904e-05, + "loss": 2.5041, + "step": 11557 + }, + { + "epoch": 0.9327737874263579, + "grad_norm": 0.6817529201507568, + "learning_rate": 7.643293975575229e-05, + "loss": 2.4474, + "step": 11558 + }, + { + "epoch": 0.9328544911629408, + "grad_norm": 0.6374444365501404, + "learning_rate": 7.641759782138827e-05, + "loss": 2.5204, + "step": 11559 + }, + { + "epoch": 0.9329351948995238, + "grad_norm": 0.6889457702636719, + "learning_rate": 7.640225647475939e-05, + "loss": 2.6344, + "step": 11560 + }, + { + "epoch": 0.9330158986361069, + "grad_norm": 0.6657958626747131, + "learning_rate": 7.638691571624794e-05, + "loss": 2.4672, + "step": 11561 + }, + { + "epoch": 0.9330966023726899, + "grad_norm": 0.6425464749336243, + "learning_rate": 7.637157554623627e-05, + "loss": 2.4756, + "step": 11562 + }, + { + "epoch": 0.9331773061092729, + "grad_norm": 0.7193450927734375, + "learning_rate": 7.635623596510675e-05, + "loss": 2.4969, + "step": 11563 + }, + { + "epoch": 0.9332580098458558, + "grad_norm": 0.6595252156257629, + "learning_rate": 7.634089697324159e-05, + "loss": 2.4647, + "step": 11564 + }, + { + "epoch": 0.9333387135824389, + "grad_norm": 0.6505268812179565, + "learning_rate": 7.632555857102312e-05, + "loss": 2.5059, + "step": 11565 + }, + { + "epoch": 0.9334194173190219, + "grad_norm": 0.6877838969230652, + "learning_rate": 7.631022075883365e-05, + "loss": 2.4855, + "step": 11566 + }, + { + "epoch": 0.9335001210556049, + "grad_norm": 0.6376198530197144, + "learning_rate": 7.629488353705538e-05, + "loss": 2.5024, + "step": 11567 + }, + { + "epoch": 0.9335808247921878, + "grad_norm": 0.6807642579078674, + "learning_rate": 7.627954690607058e-05, + "loss": 2.4954, + "step": 11568 + }, + { + "epoch": 0.9336615285287709, + "grad_norm": 0.6785219311714172, + "learning_rate": 7.62642108662615e-05, + "loss": 2.4854, + "step": 11569 + }, + { + "epoch": 0.9337422322653539, + "grad_norm": 0.8159591555595398, + "learning_rate": 7.624887541801032e-05, + "loss": 2.524, + "step": 11570 + }, + { + "epoch": 0.9338229360019369, + "grad_norm": 0.6912592053413391, + "learning_rate": 7.62335405616992e-05, + "loss": 2.5111, + "step": 11571 + }, + { + "epoch": 0.9339036397385199, + "grad_norm": 0.6772454977035522, + "learning_rate": 7.621820629771041e-05, + "loss": 2.5603, + "step": 11572 + }, + { + "epoch": 0.933984343475103, + "grad_norm": 0.6720221638679504, + "learning_rate": 7.620287262642613e-05, + "loss": 2.5016, + "step": 11573 + }, + { + "epoch": 0.9340650472116859, + "grad_norm": 0.651935338973999, + "learning_rate": 7.618753954822841e-05, + "loss": 2.445, + "step": 11574 + }, + { + "epoch": 0.9341457509482689, + "grad_norm": 0.6731166839599609, + "learning_rate": 7.617220706349947e-05, + "loss": 2.4703, + "step": 11575 + }, + { + "epoch": 0.9342264546848519, + "grad_norm": 0.6283879280090332, + "learning_rate": 7.615687517262143e-05, + "loss": 2.5232, + "step": 11576 + }, + { + "epoch": 0.9343071584214349, + "grad_norm": 0.7193455696105957, + "learning_rate": 7.614154387597638e-05, + "loss": 2.5268, + "step": 11577 + }, + { + "epoch": 0.934387862158018, + "grad_norm": 0.6992828845977783, + "learning_rate": 7.61262131739464e-05, + "loss": 2.5834, + "step": 11578 + }, + { + "epoch": 0.9344685658946009, + "grad_norm": 0.6501220464706421, + "learning_rate": 7.611088306691365e-05, + "loss": 2.5146, + "step": 11579 + }, + { + "epoch": 0.9345492696311839, + "grad_norm": 0.7246220111846924, + "learning_rate": 7.60955535552601e-05, + "loss": 2.5665, + "step": 11580 + }, + { + "epoch": 0.9346299733677669, + "grad_norm": 0.7190428376197815, + "learning_rate": 7.608022463936783e-05, + "loss": 2.5061, + "step": 11581 + }, + { + "epoch": 0.93471067710435, + "grad_norm": 0.7144324779510498, + "learning_rate": 7.606489631961893e-05, + "loss": 2.4982, + "step": 11582 + }, + { + "epoch": 0.9347913808409329, + "grad_norm": 0.7144657373428345, + "learning_rate": 7.604956859639535e-05, + "loss": 2.5506, + "step": 11583 + }, + { + "epoch": 0.9348720845775159, + "grad_norm": 0.6596626043319702, + "learning_rate": 7.603424147007913e-05, + "loss": 2.4911, + "step": 11584 + }, + { + "epoch": 0.9349527883140989, + "grad_norm": 0.7090883851051331, + "learning_rate": 7.601891494105227e-05, + "loss": 2.5087, + "step": 11585 + }, + { + "epoch": 0.935033492050682, + "grad_norm": 0.6679760217666626, + "learning_rate": 7.600358900969671e-05, + "loss": 2.497, + "step": 11586 + }, + { + "epoch": 0.935114195787265, + "grad_norm": 0.6795344948768616, + "learning_rate": 7.598826367639447e-05, + "loss": 2.4839, + "step": 11587 + }, + { + "epoch": 0.9351948995238479, + "grad_norm": 0.6378790736198425, + "learning_rate": 7.597293894152744e-05, + "loss": 2.4656, + "step": 11588 + }, + { + "epoch": 0.9352756032604309, + "grad_norm": 0.6646658182144165, + "learning_rate": 7.595761480547762e-05, + "loss": 2.4739, + "step": 11589 + }, + { + "epoch": 0.935356306997014, + "grad_norm": 0.6662073731422424, + "learning_rate": 7.594229126862687e-05, + "loss": 2.4872, + "step": 11590 + }, + { + "epoch": 0.935437010733597, + "grad_norm": 0.6698113679885864, + "learning_rate": 7.592696833135708e-05, + "loss": 2.4964, + "step": 11591 + }, + { + "epoch": 0.93551771447018, + "grad_norm": 0.6520004272460938, + "learning_rate": 7.59116459940502e-05, + "loss": 2.5616, + "step": 11592 + }, + { + "epoch": 0.9355984182067629, + "grad_norm": 0.6675869226455688, + "learning_rate": 7.589632425708806e-05, + "loss": 2.4854, + "step": 11593 + }, + { + "epoch": 0.935679121943346, + "grad_norm": 0.6914103031158447, + "learning_rate": 7.588100312085251e-05, + "loss": 2.5252, + "step": 11594 + }, + { + "epoch": 0.935759825679929, + "grad_norm": 0.7283286452293396, + "learning_rate": 7.586568258572546e-05, + "loss": 2.543, + "step": 11595 + }, + { + "epoch": 0.935840529416512, + "grad_norm": 0.6881958246231079, + "learning_rate": 7.585036265208864e-05, + "loss": 2.4499, + "step": 11596 + }, + { + "epoch": 0.935921233153095, + "grad_norm": 0.7733677625656128, + "learning_rate": 7.58350433203239e-05, + "loss": 2.5595, + "step": 11597 + }, + { + "epoch": 0.936001936889678, + "grad_norm": 0.672711968421936, + "learning_rate": 7.58197245908131e-05, + "loss": 2.4757, + "step": 11598 + }, + { + "epoch": 0.936082640626261, + "grad_norm": 0.691780686378479, + "learning_rate": 7.580440646393794e-05, + "loss": 2.5134, + "step": 11599 + }, + { + "epoch": 0.936163344362844, + "grad_norm": 0.6935102343559265, + "learning_rate": 7.578908894008021e-05, + "loss": 2.5128, + "step": 11600 + }, + { + "epoch": 0.936244048099427, + "grad_norm": 0.7005696892738342, + "learning_rate": 7.57737720196217e-05, + "loss": 2.5338, + "step": 11601 + }, + { + "epoch": 0.93632475183601, + "grad_norm": 0.6729815602302551, + "learning_rate": 7.575845570294409e-05, + "loss": 2.5373, + "step": 11602 + }, + { + "epoch": 0.936405455572593, + "grad_norm": 0.6694760918617249, + "learning_rate": 7.574313999042913e-05, + "loss": 2.5165, + "step": 11603 + }, + { + "epoch": 0.936486159309176, + "grad_norm": 0.6425337791442871, + "learning_rate": 7.572782488245854e-05, + "loss": 2.5102, + "step": 11604 + }, + { + "epoch": 0.936566863045759, + "grad_norm": 0.6613046526908875, + "learning_rate": 7.571251037941405e-05, + "loss": 2.5108, + "step": 11605 + }, + { + "epoch": 0.9366475667823421, + "grad_norm": 0.7396309971809387, + "learning_rate": 7.569719648167723e-05, + "loss": 2.5261, + "step": 11606 + }, + { + "epoch": 0.936728270518925, + "grad_norm": 0.6783239245414734, + "learning_rate": 7.568188318962981e-05, + "loss": 2.5725, + "step": 11607 + }, + { + "epoch": 0.936808974255508, + "grad_norm": 0.7591684460639954, + "learning_rate": 7.566657050365345e-05, + "loss": 2.5085, + "step": 11608 + }, + { + "epoch": 0.936889677992091, + "grad_norm": 0.6805615425109863, + "learning_rate": 7.565125842412974e-05, + "loss": 2.5598, + "step": 11609 + }, + { + "epoch": 0.9369703817286741, + "grad_norm": 0.680203378200531, + "learning_rate": 7.563594695144032e-05, + "loss": 2.5072, + "step": 11610 + }, + { + "epoch": 0.9370510854652571, + "grad_norm": 0.7035777568817139, + "learning_rate": 7.56206360859668e-05, + "loss": 2.4882, + "step": 11611 + }, + { + "epoch": 0.93713178920184, + "grad_norm": 0.7457048892974854, + "learning_rate": 7.560532582809075e-05, + "loss": 2.4975, + "step": 11612 + }, + { + "epoch": 0.937212492938423, + "grad_norm": 0.702055037021637, + "learning_rate": 7.559001617819374e-05, + "loss": 2.5522, + "step": 11613 + }, + { + "epoch": 0.9372931966750061, + "grad_norm": 0.7618527412414551, + "learning_rate": 7.557470713665738e-05, + "loss": 2.5503, + "step": 11614 + }, + { + "epoch": 0.9373739004115891, + "grad_norm": 0.8611559867858887, + "learning_rate": 7.555939870386312e-05, + "loss": 2.4866, + "step": 11615 + }, + { + "epoch": 0.937454604148172, + "grad_norm": 0.7285227179527283, + "learning_rate": 7.554409088019254e-05, + "loss": 2.4855, + "step": 11616 + }, + { + "epoch": 0.937535307884755, + "grad_norm": 0.7512121796607971, + "learning_rate": 7.552878366602716e-05, + "loss": 2.5496, + "step": 11617 + }, + { + "epoch": 0.9376160116213381, + "grad_norm": 0.7353625297546387, + "learning_rate": 7.551347706174844e-05, + "loss": 2.5754, + "step": 11618 + }, + { + "epoch": 0.9376967153579211, + "grad_norm": 0.7131205797195435, + "learning_rate": 7.549817106773788e-05, + "loss": 2.4927, + "step": 11619 + }, + { + "epoch": 0.9377774190945041, + "grad_norm": 0.6562477946281433, + "learning_rate": 7.548286568437695e-05, + "loss": 2.5247, + "step": 11620 + }, + { + "epoch": 0.937858122831087, + "grad_norm": 0.7094948887825012, + "learning_rate": 7.546756091204713e-05, + "loss": 2.5084, + "step": 11621 + }, + { + "epoch": 0.9379388265676701, + "grad_norm": 0.6890475153923035, + "learning_rate": 7.545225675112977e-05, + "loss": 2.5178, + "step": 11622 + }, + { + "epoch": 0.9380195303042531, + "grad_norm": 0.6801474094390869, + "learning_rate": 7.543695320200634e-05, + "loss": 2.5457, + "step": 11623 + }, + { + "epoch": 0.9381002340408361, + "grad_norm": 0.7093712687492371, + "learning_rate": 7.54216502650583e-05, + "loss": 2.6122, + "step": 11624 + }, + { + "epoch": 0.9381809377774191, + "grad_norm": 0.7246927618980408, + "learning_rate": 7.540634794066695e-05, + "loss": 2.5251, + "step": 11625 + }, + { + "epoch": 0.9382616415140022, + "grad_norm": 0.7358111143112183, + "learning_rate": 7.539104622921368e-05, + "loss": 2.5444, + "step": 11626 + }, + { + "epoch": 0.9383423452505851, + "grad_norm": 0.6915993690490723, + "learning_rate": 7.53757451310799e-05, + "loss": 2.448, + "step": 11627 + }, + { + "epoch": 0.9384230489871681, + "grad_norm": 0.6864039301872253, + "learning_rate": 7.536044464664689e-05, + "loss": 2.5267, + "step": 11628 + }, + { + "epoch": 0.9385037527237511, + "grad_norm": 0.664799690246582, + "learning_rate": 7.534514477629602e-05, + "loss": 2.5602, + "step": 11629 + }, + { + "epoch": 0.9385844564603341, + "grad_norm": 0.6770062446594238, + "learning_rate": 7.532984552040862e-05, + "loss": 2.5034, + "step": 11630 + }, + { + "epoch": 0.9386651601969171, + "grad_norm": 0.6961095929145813, + "learning_rate": 7.531454687936592e-05, + "loss": 2.4523, + "step": 11631 + }, + { + "epoch": 0.9387458639335001, + "grad_norm": 0.6776804327964783, + "learning_rate": 7.529924885354924e-05, + "loss": 2.5526, + "step": 11632 + }, + { + "epoch": 0.9388265676700831, + "grad_norm": 0.785796582698822, + "learning_rate": 7.528395144333988e-05, + "loss": 2.5256, + "step": 11633 + }, + { + "epoch": 0.9389072714066661, + "grad_norm": 0.7016655206680298, + "learning_rate": 7.526865464911902e-05, + "loss": 2.4781, + "step": 11634 + }, + { + "epoch": 0.9389879751432492, + "grad_norm": 0.7027767300605774, + "learning_rate": 7.525335847126795e-05, + "loss": 2.5287, + "step": 11635 + }, + { + "epoch": 0.9390686788798321, + "grad_norm": 0.710624098777771, + "learning_rate": 7.523806291016787e-05, + "loss": 2.5486, + "step": 11636 + }, + { + "epoch": 0.9391493826164151, + "grad_norm": 0.7029656767845154, + "learning_rate": 7.52227679662e-05, + "loss": 2.5244, + "step": 11637 + }, + { + "epoch": 0.9392300863529981, + "grad_norm": 0.7417333722114563, + "learning_rate": 7.520747363974551e-05, + "loss": 2.5561, + "step": 11638 + }, + { + "epoch": 0.9393107900895812, + "grad_norm": 0.6595067381858826, + "learning_rate": 7.519217993118559e-05, + "loss": 2.617, + "step": 11639 + }, + { + "epoch": 0.9393914938261642, + "grad_norm": 0.6808187365531921, + "learning_rate": 7.517688684090141e-05, + "loss": 2.5279, + "step": 11640 + }, + { + "epoch": 0.9394721975627471, + "grad_norm": 0.6618706583976746, + "learning_rate": 7.516159436927408e-05, + "loss": 2.4976, + "step": 11641 + }, + { + "epoch": 0.9395529012993301, + "grad_norm": 0.6979385018348694, + "learning_rate": 7.514630251668475e-05, + "loss": 2.4542, + "step": 11642 + }, + { + "epoch": 0.9396336050359132, + "grad_norm": 0.6380844116210938, + "learning_rate": 7.513101128351454e-05, + "loss": 2.48, + "step": 11643 + }, + { + "epoch": 0.9397143087724962, + "grad_norm": 0.6390014290809631, + "learning_rate": 7.511572067014452e-05, + "loss": 2.5111, + "step": 11644 + }, + { + "epoch": 0.9397950125090792, + "grad_norm": 0.7592498064041138, + "learning_rate": 7.510043067695578e-05, + "loss": 2.5161, + "step": 11645 + }, + { + "epoch": 0.9398757162456621, + "grad_norm": 0.6269322037696838, + "learning_rate": 7.508514130432945e-05, + "loss": 2.491, + "step": 11646 + }, + { + "epoch": 0.9399564199822452, + "grad_norm": 0.6372053623199463, + "learning_rate": 7.506985255264646e-05, + "loss": 2.4826, + "step": 11647 + }, + { + "epoch": 0.9400371237188282, + "grad_norm": 0.6962460875511169, + "learning_rate": 7.505456442228794e-05, + "loss": 2.5605, + "step": 11648 + }, + { + "epoch": 0.9401178274554112, + "grad_norm": 0.7931656241416931, + "learning_rate": 7.503927691363491e-05, + "loss": 2.4909, + "step": 11649 + }, + { + "epoch": 0.9401985311919941, + "grad_norm": 0.688792884349823, + "learning_rate": 7.502399002706832e-05, + "loss": 2.4888, + "step": 11650 + }, + { + "epoch": 0.9402792349285772, + "grad_norm": 0.6683691143989563, + "learning_rate": 7.500870376296918e-05, + "loss": 2.5233, + "step": 11651 + }, + { + "epoch": 0.9403599386651602, + "grad_norm": 0.6537527441978455, + "learning_rate": 7.499341812171846e-05, + "loss": 2.5061, + "step": 11652 + }, + { + "epoch": 0.9404406424017432, + "grad_norm": 0.6657658219337463, + "learning_rate": 7.497813310369717e-05, + "loss": 2.4844, + "step": 11653 + }, + { + "epoch": 0.9405213461383262, + "grad_norm": 0.6865110993385315, + "learning_rate": 7.496284870928618e-05, + "loss": 2.4986, + "step": 11654 + }, + { + "epoch": 0.9406020498749093, + "grad_norm": 0.6724923849105835, + "learning_rate": 7.494756493886644e-05, + "loss": 2.4818, + "step": 11655 + }, + { + "epoch": 0.9406827536114922, + "grad_norm": 0.6478626728057861, + "learning_rate": 7.493228179281892e-05, + "loss": 2.5321, + "step": 11656 + }, + { + "epoch": 0.9407634573480752, + "grad_norm": 0.6474425792694092, + "learning_rate": 7.491699927152443e-05, + "loss": 2.5276, + "step": 11657 + }, + { + "epoch": 0.9408441610846582, + "grad_norm": 0.6736220717430115, + "learning_rate": 7.490171737536387e-05, + "loss": 2.4734, + "step": 11658 + }, + { + "epoch": 0.9409248648212413, + "grad_norm": 0.6714746952056885, + "learning_rate": 7.488643610471815e-05, + "loss": 2.5754, + "step": 11659 + }, + { + "epoch": 0.9410055685578242, + "grad_norm": 0.6714532375335693, + "learning_rate": 7.487115545996805e-05, + "loss": 2.4855, + "step": 11660 + }, + { + "epoch": 0.9410862722944072, + "grad_norm": 0.7601683139801025, + "learning_rate": 7.485587544149447e-05, + "loss": 2.4887, + "step": 11661 + }, + { + "epoch": 0.9411669760309902, + "grad_norm": 0.7655646204948425, + "learning_rate": 7.484059604967821e-05, + "loss": 2.4904, + "step": 11662 + }, + { + "epoch": 0.9412476797675733, + "grad_norm": 0.6841822862625122, + "learning_rate": 7.482531728490006e-05, + "loss": 2.5272, + "step": 11663 + }, + { + "epoch": 0.9413283835041563, + "grad_norm": 0.7683621048927307, + "learning_rate": 7.481003914754078e-05, + "loss": 2.5218, + "step": 11664 + }, + { + "epoch": 0.9414090872407392, + "grad_norm": 0.6597647070884705, + "learning_rate": 7.479476163798124e-05, + "loss": 2.4925, + "step": 11665 + }, + { + "epoch": 0.9414897909773222, + "grad_norm": 0.6573941111564636, + "learning_rate": 7.477948475660208e-05, + "loss": 2.4854, + "step": 11666 + }, + { + "epoch": 0.9415704947139053, + "grad_norm": 0.6639125943183899, + "learning_rate": 7.476420850378407e-05, + "loss": 2.5207, + "step": 11667 + }, + { + "epoch": 0.9416511984504883, + "grad_norm": 0.6770366430282593, + "learning_rate": 7.474893287990796e-05, + "loss": 2.5167, + "step": 11668 + }, + { + "epoch": 0.9417319021870713, + "grad_norm": 0.6908389925956726, + "learning_rate": 7.473365788535447e-05, + "loss": 2.4606, + "step": 11669 + }, + { + "epoch": 0.9418126059236542, + "grad_norm": 0.6625069975852966, + "learning_rate": 7.471838352050427e-05, + "loss": 2.5344, + "step": 11670 + }, + { + "epoch": 0.9418933096602373, + "grad_norm": 0.6690869331359863, + "learning_rate": 7.470310978573803e-05, + "loss": 2.4507, + "step": 11671 + }, + { + "epoch": 0.9419740133968203, + "grad_norm": 0.6741886734962463, + "learning_rate": 7.468783668143645e-05, + "loss": 2.5755, + "step": 11672 + }, + { + "epoch": 0.9420547171334033, + "grad_norm": 0.6876424551010132, + "learning_rate": 7.467256420798009e-05, + "loss": 2.483, + "step": 11673 + }, + { + "epoch": 0.9421354208699863, + "grad_norm": 0.7044318318367004, + "learning_rate": 7.465729236574965e-05, + "loss": 2.5025, + "step": 11674 + }, + { + "epoch": 0.9422161246065693, + "grad_norm": 0.6608660817146301, + "learning_rate": 7.46420211551258e-05, + "loss": 2.5253, + "step": 11675 + }, + { + "epoch": 0.9422968283431523, + "grad_norm": 0.6944260001182556, + "learning_rate": 7.4626750576489e-05, + "loss": 2.5002, + "step": 11676 + }, + { + "epoch": 0.9423775320797353, + "grad_norm": 0.7304964065551758, + "learning_rate": 7.46114806302199e-05, + "loss": 2.5501, + "step": 11677 + }, + { + "epoch": 0.9424582358163183, + "grad_norm": 0.688525378704071, + "learning_rate": 7.459621131669911e-05, + "loss": 2.5291, + "step": 11678 + }, + { + "epoch": 0.9425389395529012, + "grad_norm": 0.7388432025909424, + "learning_rate": 7.45809426363071e-05, + "loss": 2.5391, + "step": 11679 + }, + { + "epoch": 0.9426196432894843, + "grad_norm": 0.6777819991111755, + "learning_rate": 7.456567458942447e-05, + "loss": 2.5425, + "step": 11680 + }, + { + "epoch": 0.9427003470260673, + "grad_norm": 0.7208845615386963, + "learning_rate": 7.455040717643169e-05, + "loss": 2.5306, + "step": 11681 + }, + { + "epoch": 0.9427810507626503, + "grad_norm": 0.745384693145752, + "learning_rate": 7.453514039770934e-05, + "loss": 2.4695, + "step": 11682 + }, + { + "epoch": 0.9428617544992333, + "grad_norm": 0.7088115215301514, + "learning_rate": 7.451987425363782e-05, + "loss": 2.5413, + "step": 11683 + }, + { + "epoch": 0.9429424582358163, + "grad_norm": 0.7287998795509338, + "learning_rate": 7.450460874459762e-05, + "loss": 2.5773, + "step": 11684 + }, + { + "epoch": 0.9430231619723993, + "grad_norm": 0.6897092461585999, + "learning_rate": 7.448934387096928e-05, + "loss": 2.5255, + "step": 11685 + }, + { + "epoch": 0.9431038657089823, + "grad_norm": 0.6227227449417114, + "learning_rate": 7.447407963313313e-05, + "loss": 2.5027, + "step": 11686 + }, + { + "epoch": 0.9431845694455653, + "grad_norm": 0.6954305768013, + "learning_rate": 7.445881603146964e-05, + "loss": 2.5477, + "step": 11687 + }, + { + "epoch": 0.9432652731821484, + "grad_norm": 0.7860052585601807, + "learning_rate": 7.444355306635924e-05, + "loss": 2.469, + "step": 11688 + }, + { + "epoch": 0.9433459769187313, + "grad_norm": 0.6851965188980103, + "learning_rate": 7.442829073818227e-05, + "loss": 2.4997, + "step": 11689 + }, + { + "epoch": 0.9434266806553143, + "grad_norm": 0.7011744379997253, + "learning_rate": 7.441302904731916e-05, + "loss": 2.5399, + "step": 11690 + }, + { + "epoch": 0.9435073843918973, + "grad_norm": 0.703167200088501, + "learning_rate": 7.439776799415028e-05, + "loss": 2.5323, + "step": 11691 + }, + { + "epoch": 0.9435880881284804, + "grad_norm": 0.6747310161590576, + "learning_rate": 7.438250757905591e-05, + "loss": 2.5406, + "step": 11692 + }, + { + "epoch": 0.9436687918650634, + "grad_norm": 0.8631153106689453, + "learning_rate": 7.436724780241642e-05, + "loss": 2.5215, + "step": 11693 + }, + { + "epoch": 0.9437494956016463, + "grad_norm": 0.6919798254966736, + "learning_rate": 7.435198866461214e-05, + "loss": 2.4654, + "step": 11694 + }, + { + "epoch": 0.9438301993382293, + "grad_norm": 0.6747070550918579, + "learning_rate": 7.433673016602332e-05, + "loss": 2.5186, + "step": 11695 + }, + { + "epoch": 0.9439109030748124, + "grad_norm": 0.7368776798248291, + "learning_rate": 7.432147230703026e-05, + "loss": 2.5365, + "step": 11696 + }, + { + "epoch": 0.9439916068113954, + "grad_norm": 0.7443639636039734, + "learning_rate": 7.430621508801325e-05, + "loss": 2.4966, + "step": 11697 + }, + { + "epoch": 0.9440723105479784, + "grad_norm": 0.7371395230293274, + "learning_rate": 7.429095850935255e-05, + "loss": 2.4638, + "step": 11698 + }, + { + "epoch": 0.9441530142845613, + "grad_norm": 0.6917321681976318, + "learning_rate": 7.427570257142832e-05, + "loss": 2.5341, + "step": 11699 + }, + { + "epoch": 0.9442337180211444, + "grad_norm": 0.7704101800918579, + "learning_rate": 7.426044727462085e-05, + "loss": 2.5144, + "step": 11700 + }, + { + "epoch": 0.9443144217577274, + "grad_norm": 0.692197859287262, + "learning_rate": 7.424519261931036e-05, + "loss": 2.5293, + "step": 11701 + }, + { + "epoch": 0.9443951254943104, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.422993860587695e-05, + "loss": 2.5236, + "step": 11702 + }, + { + "epoch": 0.9444758292308933, + "grad_norm": 0.6955052018165588, + "learning_rate": 7.421468523470081e-05, + "loss": 2.4765, + "step": 11703 + }, + { + "epoch": 0.9445565329674764, + "grad_norm": 0.7394432425498962, + "learning_rate": 7.419943250616216e-05, + "loss": 2.5053, + "step": 11704 + }, + { + "epoch": 0.9446372367040594, + "grad_norm": 0.679044246673584, + "learning_rate": 7.418418042064108e-05, + "loss": 2.5413, + "step": 11705 + }, + { + "epoch": 0.9447179404406424, + "grad_norm": 0.7153440117835999, + "learning_rate": 7.41689289785177e-05, + "loss": 2.4938, + "step": 11706 + }, + { + "epoch": 0.9447986441772254, + "grad_norm": 0.697068452835083, + "learning_rate": 7.415367818017217e-05, + "loss": 2.5157, + "step": 11707 + }, + { + "epoch": 0.9448793479138085, + "grad_norm": 0.664616048336029, + "learning_rate": 7.41384280259845e-05, + "loss": 2.4859, + "step": 11708 + }, + { + "epoch": 0.9449600516503914, + "grad_norm": 0.7275365591049194, + "learning_rate": 7.412317851633479e-05, + "loss": 2.523, + "step": 11709 + }, + { + "epoch": 0.9450407553869744, + "grad_norm": 0.7408944368362427, + "learning_rate": 7.410792965160318e-05, + "loss": 2.4994, + "step": 11710 + }, + { + "epoch": 0.9451214591235574, + "grad_norm": 0.7222678065299988, + "learning_rate": 7.40926814321696e-05, + "loss": 2.5084, + "step": 11711 + }, + { + "epoch": 0.9452021628601405, + "grad_norm": 0.7242292761802673, + "learning_rate": 7.407743385841412e-05, + "loss": 2.5165, + "step": 11712 + }, + { + "epoch": 0.9452828665967234, + "grad_norm": 0.6634014844894409, + "learning_rate": 7.406218693071677e-05, + "loss": 2.4947, + "step": 11713 + }, + { + "epoch": 0.9453635703333064, + "grad_norm": 0.8126605153083801, + "learning_rate": 7.404694064945751e-05, + "loss": 2.5553, + "step": 11714 + }, + { + "epoch": 0.9454442740698894, + "grad_norm": 0.679344654083252, + "learning_rate": 7.403169501501632e-05, + "loss": 2.5475, + "step": 11715 + }, + { + "epoch": 0.9455249778064725, + "grad_norm": 0.7584314346313477, + "learning_rate": 7.401645002777318e-05, + "loss": 2.5498, + "step": 11716 + }, + { + "epoch": 0.9456056815430555, + "grad_norm": 0.7191590666770935, + "learning_rate": 7.400120568810806e-05, + "loss": 2.5161, + "step": 11717 + }, + { + "epoch": 0.9456863852796384, + "grad_norm": 0.6738762855529785, + "learning_rate": 7.398596199640084e-05, + "loss": 2.4819, + "step": 11718 + }, + { + "epoch": 0.9457670890162214, + "grad_norm": 0.7305885553359985, + "learning_rate": 7.397071895303143e-05, + "loss": 2.4842, + "step": 11719 + }, + { + "epoch": 0.9458477927528045, + "grad_norm": 0.6885291337966919, + "learning_rate": 7.395547655837976e-05, + "loss": 2.5016, + "step": 11720 + }, + { + "epoch": 0.9459284964893875, + "grad_norm": 0.6807307600975037, + "learning_rate": 7.394023481282568e-05, + "loss": 2.4949, + "step": 11721 + }, + { + "epoch": 0.9460092002259705, + "grad_norm": 0.6683849096298218, + "learning_rate": 7.392499371674907e-05, + "loss": 2.4974, + "step": 11722 + }, + { + "epoch": 0.9460899039625534, + "grad_norm": 0.6615697741508484, + "learning_rate": 7.39097532705298e-05, + "loss": 2.4744, + "step": 11723 + }, + { + "epoch": 0.9461706076991365, + "grad_norm": 0.6463690996170044, + "learning_rate": 7.389451347454765e-05, + "loss": 2.478, + "step": 11724 + }, + { + "epoch": 0.9462513114357195, + "grad_norm": 0.6848269701004028, + "learning_rate": 7.387927432918247e-05, + "loss": 2.5491, + "step": 11725 + }, + { + "epoch": 0.9463320151723025, + "grad_norm": 0.7251551747322083, + "learning_rate": 7.386403583481409e-05, + "loss": 2.4936, + "step": 11726 + }, + { + "epoch": 0.9464127189088855, + "grad_norm": 0.6562095284461975, + "learning_rate": 7.384879799182223e-05, + "loss": 2.4895, + "step": 11727 + }, + { + "epoch": 0.9464934226454685, + "grad_norm": 0.6891352534294128, + "learning_rate": 7.383356080058668e-05, + "loss": 2.508, + "step": 11728 + }, + { + "epoch": 0.9465741263820515, + "grad_norm": 0.7220255136489868, + "learning_rate": 7.381832426148719e-05, + "loss": 2.5181, + "step": 11729 + }, + { + "epoch": 0.9466548301186345, + "grad_norm": 0.7213689088821411, + "learning_rate": 7.38030883749035e-05, + "loss": 2.5136, + "step": 11730 + }, + { + "epoch": 0.9467355338552175, + "grad_norm": 0.6711129546165466, + "learning_rate": 7.378785314121535e-05, + "loss": 2.5463, + "step": 11731 + }, + { + "epoch": 0.9468162375918004, + "grad_norm": 0.6380139589309692, + "learning_rate": 7.377261856080239e-05, + "loss": 2.5092, + "step": 11732 + }, + { + "epoch": 0.9468969413283835, + "grad_norm": 0.66046142578125, + "learning_rate": 7.375738463404437e-05, + "loss": 2.5561, + "step": 11733 + }, + { + "epoch": 0.9469776450649665, + "grad_norm": 0.6857354044914246, + "learning_rate": 7.37421513613209e-05, + "loss": 2.5774, + "step": 11734 + }, + { + "epoch": 0.9470583488015495, + "grad_norm": 0.6811589598655701, + "learning_rate": 7.372691874301163e-05, + "loss": 2.4918, + "step": 11735 + }, + { + "epoch": 0.9471390525381325, + "grad_norm": 0.6401017308235168, + "learning_rate": 7.37116867794963e-05, + "loss": 2.4994, + "step": 11736 + }, + { + "epoch": 0.9472197562747156, + "grad_norm": 0.6967078447341919, + "learning_rate": 7.369645547115438e-05, + "loss": 2.5809, + "step": 11737 + }, + { + "epoch": 0.9473004600112985, + "grad_norm": 0.6695219278335571, + "learning_rate": 7.368122481836557e-05, + "loss": 2.4735, + "step": 11738 + }, + { + "epoch": 0.9473811637478815, + "grad_norm": 0.6540528535842896, + "learning_rate": 7.366599482150944e-05, + "loss": 2.4998, + "step": 11739 + }, + { + "epoch": 0.9474618674844645, + "grad_norm": 0.700683057308197, + "learning_rate": 7.365076548096556e-05, + "loss": 2.5258, + "step": 11740 + }, + { + "epoch": 0.9475425712210476, + "grad_norm": 0.7125419974327087, + "learning_rate": 7.363553679711347e-05, + "loss": 2.4653, + "step": 11741 + }, + { + "epoch": 0.9476232749576305, + "grad_norm": 0.7285346984863281, + "learning_rate": 7.362030877033275e-05, + "loss": 2.5523, + "step": 11742 + }, + { + "epoch": 0.9477039786942135, + "grad_norm": 0.7310814261436462, + "learning_rate": 7.360508140100288e-05, + "loss": 2.5027, + "step": 11743 + }, + { + "epoch": 0.9477846824307965, + "grad_norm": 0.746961772441864, + "learning_rate": 7.358985468950335e-05, + "loss": 2.5485, + "step": 11744 + }, + { + "epoch": 0.9478653861673796, + "grad_norm": 0.6880186796188354, + "learning_rate": 7.357462863621369e-05, + "loss": 2.5243, + "step": 11745 + }, + { + "epoch": 0.9479460899039626, + "grad_norm": 0.6406471133232117, + "learning_rate": 7.355940324151339e-05, + "loss": 2.512, + "step": 11746 + }, + { + "epoch": 0.9480267936405455, + "grad_norm": 0.6503005027770996, + "learning_rate": 7.354417850578184e-05, + "loss": 2.5318, + "step": 11747 + }, + { + "epoch": 0.9481074973771285, + "grad_norm": 0.6458879113197327, + "learning_rate": 7.352895442939852e-05, + "loss": 2.5451, + "step": 11748 + }, + { + "epoch": 0.9481882011137116, + "grad_norm": 0.7382936477661133, + "learning_rate": 7.351373101274288e-05, + "loss": 2.5393, + "step": 11749 + }, + { + "epoch": 0.9482689048502946, + "grad_norm": 0.7366087436676025, + "learning_rate": 7.349850825619429e-05, + "loss": 2.5591, + "step": 11750 + }, + { + "epoch": 0.9483496085868776, + "grad_norm": 0.6652588248252869, + "learning_rate": 7.348328616013213e-05, + "loss": 2.5348, + "step": 11751 + }, + { + "epoch": 0.9484303123234605, + "grad_norm": 0.7515435814857483, + "learning_rate": 7.346806472493584e-05, + "loss": 2.5208, + "step": 11752 + }, + { + "epoch": 0.9485110160600436, + "grad_norm": 0.7161263227462769, + "learning_rate": 7.345284395098469e-05, + "loss": 2.5518, + "step": 11753 + }, + { + "epoch": 0.9485917197966266, + "grad_norm": 0.7433953285217285, + "learning_rate": 7.343762383865807e-05, + "loss": 2.5914, + "step": 11754 + }, + { + "epoch": 0.9486724235332096, + "grad_norm": 0.674991250038147, + "learning_rate": 7.342240438833532e-05, + "loss": 2.5566, + "step": 11755 + }, + { + "epoch": 0.9487531272697926, + "grad_norm": 0.7511670589447021, + "learning_rate": 7.34071856003957e-05, + "loss": 2.5253, + "step": 11756 + }, + { + "epoch": 0.9488338310063756, + "grad_norm": 0.6672492623329163, + "learning_rate": 7.339196747521853e-05, + "loss": 2.4887, + "step": 11757 + }, + { + "epoch": 0.9489145347429586, + "grad_norm": 0.6826158761978149, + "learning_rate": 7.337675001318312e-05, + "loss": 2.5072, + "step": 11758 + }, + { + "epoch": 0.9489952384795416, + "grad_norm": 0.7189450860023499, + "learning_rate": 7.336153321466867e-05, + "loss": 2.5583, + "step": 11759 + }, + { + "epoch": 0.9490759422161246, + "grad_norm": 0.6923015117645264, + "learning_rate": 7.33463170800544e-05, + "loss": 2.5416, + "step": 11760 + }, + { + "epoch": 0.9491566459527077, + "grad_norm": 0.690060555934906, + "learning_rate": 7.333110160971963e-05, + "loss": 2.4931, + "step": 11761 + }, + { + "epoch": 0.9492373496892906, + "grad_norm": 0.6887977719306946, + "learning_rate": 7.331588680404354e-05, + "loss": 2.4676, + "step": 11762 + }, + { + "epoch": 0.9493180534258736, + "grad_norm": 0.8573753237724304, + "learning_rate": 7.330067266340528e-05, + "loss": 2.5074, + "step": 11763 + }, + { + "epoch": 0.9493987571624566, + "grad_norm": 0.6760974526405334, + "learning_rate": 7.328545918818403e-05, + "loss": 2.5395, + "step": 11764 + }, + { + "epoch": 0.9494794608990397, + "grad_norm": 0.6946160197257996, + "learning_rate": 7.327024637875901e-05, + "loss": 2.535, + "step": 11765 + }, + { + "epoch": 0.9495601646356226, + "grad_norm": 0.6851378679275513, + "learning_rate": 7.32550342355093e-05, + "loss": 2.487, + "step": 11766 + }, + { + "epoch": 0.9496408683722056, + "grad_norm": 0.6480168104171753, + "learning_rate": 7.323982275881404e-05, + "loss": 2.513, + "step": 11767 + }, + { + "epoch": 0.9497215721087886, + "grad_norm": 0.6492218971252441, + "learning_rate": 7.322461194905239e-05, + "loss": 2.4532, + "step": 11768 + }, + { + "epoch": 0.9498022758453717, + "grad_norm": 0.6670051217079163, + "learning_rate": 7.320940180660337e-05, + "loss": 2.5258, + "step": 11769 + }, + { + "epoch": 0.9498829795819547, + "grad_norm": 0.6678066253662109, + "learning_rate": 7.319419233184608e-05, + "loss": 2.5388, + "step": 11770 + }, + { + "epoch": 0.9499636833185376, + "grad_norm": 0.693545937538147, + "learning_rate": 7.31789835251596e-05, + "loss": 2.5304, + "step": 11771 + }, + { + "epoch": 0.9500443870551206, + "grad_norm": 0.680486798286438, + "learning_rate": 7.316377538692297e-05, + "loss": 2.5024, + "step": 11772 + }, + { + "epoch": 0.9501250907917037, + "grad_norm": 0.7271847128868103, + "learning_rate": 7.314856791751518e-05, + "loss": 2.5947, + "step": 11773 + }, + { + "epoch": 0.9502057945282867, + "grad_norm": 0.6889839172363281, + "learning_rate": 7.31333611173153e-05, + "loss": 2.5135, + "step": 11774 + }, + { + "epoch": 0.9502864982648697, + "grad_norm": 0.7431777119636536, + "learning_rate": 7.311815498670226e-05, + "loss": 2.5856, + "step": 11775 + }, + { + "epoch": 0.9503672020014526, + "grad_norm": 0.7168101072311401, + "learning_rate": 7.310294952605508e-05, + "loss": 2.4383, + "step": 11776 + }, + { + "epoch": 0.9504479057380357, + "grad_norm": 0.654803454875946, + "learning_rate": 7.308774473575271e-05, + "loss": 2.4908, + "step": 11777 + }, + { + "epoch": 0.9505286094746187, + "grad_norm": 0.6810718774795532, + "learning_rate": 7.307254061617412e-05, + "loss": 2.5073, + "step": 11778 + }, + { + "epoch": 0.9506093132112017, + "grad_norm": 0.637980043888092, + "learning_rate": 7.305733716769817e-05, + "loss": 2.5686, + "step": 11779 + }, + { + "epoch": 0.9506900169477847, + "grad_norm": 0.6549471020698547, + "learning_rate": 7.30421343907038e-05, + "loss": 2.5502, + "step": 11780 + }, + { + "epoch": 0.9507707206843676, + "grad_norm": 0.7087163329124451, + "learning_rate": 7.302693228556994e-05, + "loss": 2.4773, + "step": 11781 + }, + { + "epoch": 0.9508514244209507, + "grad_norm": 0.6230717897415161, + "learning_rate": 7.301173085267541e-05, + "loss": 2.4806, + "step": 11782 + }, + { + "epoch": 0.9509321281575337, + "grad_norm": 0.7145688533782959, + "learning_rate": 7.299653009239911e-05, + "loss": 2.5259, + "step": 11783 + }, + { + "epoch": 0.9510128318941167, + "grad_norm": 0.679100513458252, + "learning_rate": 7.298133000511988e-05, + "loss": 2.5012, + "step": 11784 + }, + { + "epoch": 0.9510935356306996, + "grad_norm": 0.7057691216468811, + "learning_rate": 7.29661305912165e-05, + "loss": 2.4826, + "step": 11785 + }, + { + "epoch": 0.9511742393672827, + "grad_norm": 0.65343177318573, + "learning_rate": 7.295093185106782e-05, + "loss": 2.4553, + "step": 11786 + }, + { + "epoch": 0.9512549431038657, + "grad_norm": 0.7948461174964905, + "learning_rate": 7.293573378505268e-05, + "loss": 2.478, + "step": 11787 + }, + { + "epoch": 0.9513356468404487, + "grad_norm": 0.6511468887329102, + "learning_rate": 7.292053639354975e-05, + "loss": 2.4862, + "step": 11788 + }, + { + "epoch": 0.9514163505770317, + "grad_norm": 0.7293919324874878, + "learning_rate": 7.290533967693782e-05, + "loss": 2.5956, + "step": 11789 + }, + { + "epoch": 0.9514970543136148, + "grad_norm": 0.6691277623176575, + "learning_rate": 7.289014363559567e-05, + "loss": 2.5659, + "step": 11790 + }, + { + "epoch": 0.9515777580501977, + "grad_norm": 0.7054625749588013, + "learning_rate": 7.287494826990203e-05, + "loss": 2.5875, + "step": 11791 + }, + { + "epoch": 0.9516584617867807, + "grad_norm": 0.6597220301628113, + "learning_rate": 7.285975358023555e-05, + "loss": 2.5215, + "step": 11792 + }, + { + "epoch": 0.9517391655233637, + "grad_norm": 0.6719489097595215, + "learning_rate": 7.284455956697497e-05, + "loss": 2.4752, + "step": 11793 + }, + { + "epoch": 0.9518198692599468, + "grad_norm": 0.7325637340545654, + "learning_rate": 7.2829366230499e-05, + "loss": 2.5504, + "step": 11794 + }, + { + "epoch": 0.9519005729965297, + "grad_norm": 0.637668788433075, + "learning_rate": 7.281417357118619e-05, + "loss": 2.5105, + "step": 11795 + }, + { + "epoch": 0.9519812767331127, + "grad_norm": 0.7815340161323547, + "learning_rate": 7.279898158941525e-05, + "loss": 2.4998, + "step": 11796 + }, + { + "epoch": 0.9520619804696957, + "grad_norm": 0.6555821299552917, + "learning_rate": 7.278379028556481e-05, + "loss": 2.4326, + "step": 11797 + }, + { + "epoch": 0.9521426842062788, + "grad_norm": 0.7298933863639832, + "learning_rate": 7.276859966001344e-05, + "loss": 2.4779, + "step": 11798 + }, + { + "epoch": 0.9522233879428618, + "grad_norm": 0.683455765247345, + "learning_rate": 7.275340971313974e-05, + "loss": 2.4416, + "step": 11799 + }, + { + "epoch": 0.9523040916794447, + "grad_norm": 0.6353151798248291, + "learning_rate": 7.273822044532232e-05, + "loss": 2.4777, + "step": 11800 + }, + { + "epoch": 0.9523847954160277, + "grad_norm": 0.6898894309997559, + "learning_rate": 7.27230318569397e-05, + "loss": 2.5351, + "step": 11801 + }, + { + "epoch": 0.9524654991526108, + "grad_norm": 0.6528690457344055, + "learning_rate": 7.270784394837041e-05, + "loss": 2.5145, + "step": 11802 + }, + { + "epoch": 0.9525462028891938, + "grad_norm": 0.6432619094848633, + "learning_rate": 7.269265671999304e-05, + "loss": 2.5002, + "step": 11803 + }, + { + "epoch": 0.9526269066257768, + "grad_norm": 0.7317861318588257, + "learning_rate": 7.267747017218601e-05, + "loss": 2.5318, + "step": 11804 + }, + { + "epoch": 0.9527076103623597, + "grad_norm": 0.7581185698509216, + "learning_rate": 7.266228430532785e-05, + "loss": 2.5313, + "step": 11805 + }, + { + "epoch": 0.9527883140989428, + "grad_norm": 0.7316486239433289, + "learning_rate": 7.264709911979702e-05, + "loss": 2.5147, + "step": 11806 + }, + { + "epoch": 0.9528690178355258, + "grad_norm": 0.7378978729248047, + "learning_rate": 7.263191461597199e-05, + "loss": 2.5149, + "step": 11807 + }, + { + "epoch": 0.9529497215721088, + "grad_norm": 0.6603738069534302, + "learning_rate": 7.26167307942312e-05, + "loss": 2.4684, + "step": 11808 + }, + { + "epoch": 0.9530304253086918, + "grad_norm": 0.7566502690315247, + "learning_rate": 7.260154765495302e-05, + "loss": 2.5535, + "step": 11809 + }, + { + "epoch": 0.9531111290452748, + "grad_norm": 0.693067729473114, + "learning_rate": 7.258636519851596e-05, + "loss": 2.5103, + "step": 11810 + }, + { + "epoch": 0.9531918327818578, + "grad_norm": 0.7049208283424377, + "learning_rate": 7.257118342529826e-05, + "loss": 2.5482, + "step": 11811 + }, + { + "epoch": 0.9532725365184408, + "grad_norm": 0.6986998319625854, + "learning_rate": 7.25560023356784e-05, + "loss": 2.4921, + "step": 11812 + }, + { + "epoch": 0.9533532402550238, + "grad_norm": 0.7079482674598694, + "learning_rate": 7.254082193003476e-05, + "loss": 2.5339, + "step": 11813 + }, + { + "epoch": 0.9534339439916069, + "grad_norm": 0.7283922433853149, + "learning_rate": 7.252564220874553e-05, + "loss": 2.5056, + "step": 11814 + }, + { + "epoch": 0.9535146477281898, + "grad_norm": 0.6965533494949341, + "learning_rate": 7.251046317218914e-05, + "loss": 2.5512, + "step": 11815 + }, + { + "epoch": 0.9535953514647728, + "grad_norm": 0.7367159128189087, + "learning_rate": 7.24952848207439e-05, + "loss": 2.5015, + "step": 11816 + }, + { + "epoch": 0.9536760552013558, + "grad_norm": 0.6959818601608276, + "learning_rate": 7.248010715478802e-05, + "loss": 2.4969, + "step": 11817 + }, + { + "epoch": 0.9537567589379389, + "grad_norm": 0.69304358959198, + "learning_rate": 7.246493017469981e-05, + "loss": 2.5098, + "step": 11818 + }, + { + "epoch": 0.9538374626745219, + "grad_norm": 0.6830596327781677, + "learning_rate": 7.244975388085757e-05, + "loss": 2.5206, + "step": 11819 + }, + { + "epoch": 0.9539181664111048, + "grad_norm": 0.7354303598403931, + "learning_rate": 7.243457827363944e-05, + "loss": 2.5223, + "step": 11820 + }, + { + "epoch": 0.9539988701476878, + "grad_norm": 0.7046182751655579, + "learning_rate": 7.241940335342366e-05, + "loss": 2.4931, + "step": 11821 + }, + { + "epoch": 0.9540795738842709, + "grad_norm": 0.6990540623664856, + "learning_rate": 7.240422912058843e-05, + "loss": 2.4302, + "step": 11822 + }, + { + "epoch": 0.9541602776208539, + "grad_norm": 0.7562115788459778, + "learning_rate": 7.238905557551202e-05, + "loss": 2.5118, + "step": 11823 + }, + { + "epoch": 0.9542409813574368, + "grad_norm": 0.8212862014770508, + "learning_rate": 7.237388271857248e-05, + "loss": 2.5476, + "step": 11824 + }, + { + "epoch": 0.9543216850940198, + "grad_norm": 0.7095397710800171, + "learning_rate": 7.235871055014798e-05, + "loss": 2.5073, + "step": 11825 + }, + { + "epoch": 0.9544023888306029, + "grad_norm": 0.7174660563468933, + "learning_rate": 7.23435390706167e-05, + "loss": 2.4553, + "step": 11826 + }, + { + "epoch": 0.9544830925671859, + "grad_norm": 0.7121314406394958, + "learning_rate": 7.23283682803567e-05, + "loss": 2.5164, + "step": 11827 + }, + { + "epoch": 0.9545637963037689, + "grad_norm": 0.7354126572608948, + "learning_rate": 7.231319817974609e-05, + "loss": 2.5413, + "step": 11828 + }, + { + "epoch": 0.9546445000403518, + "grad_norm": 0.7770543694496155, + "learning_rate": 7.2298028769163e-05, + "loss": 2.5244, + "step": 11829 + }, + { + "epoch": 0.9547252037769349, + "grad_norm": 0.6770393252372742, + "learning_rate": 7.228286004898541e-05, + "loss": 2.4707, + "step": 11830 + }, + { + "epoch": 0.9548059075135179, + "grad_norm": 0.6916880011558533, + "learning_rate": 7.22676920195914e-05, + "loss": 2.506, + "step": 11831 + }, + { + "epoch": 0.9548866112501009, + "grad_norm": 0.6299161314964294, + "learning_rate": 7.225252468135901e-05, + "loss": 2.5042, + "step": 11832 + }, + { + "epoch": 0.9549673149866839, + "grad_norm": 0.7081227898597717, + "learning_rate": 7.223735803466623e-05, + "loss": 2.5537, + "step": 11833 + }, + { + "epoch": 0.9550480187232668, + "grad_norm": 0.6600900888442993, + "learning_rate": 7.222219207989104e-05, + "loss": 2.5329, + "step": 11834 + }, + { + "epoch": 0.9551287224598499, + "grad_norm": 0.6715366244316101, + "learning_rate": 7.22070268174115e-05, + "loss": 2.5273, + "step": 11835 + }, + { + "epoch": 0.9552094261964329, + "grad_norm": 0.6655930280685425, + "learning_rate": 7.219186224760543e-05, + "loss": 2.4254, + "step": 11836 + }, + { + "epoch": 0.9552901299330159, + "grad_norm": 0.6925715208053589, + "learning_rate": 7.217669837085088e-05, + "loss": 2.5104, + "step": 11837 + }, + { + "epoch": 0.9553708336695989, + "grad_norm": 0.7132978439331055, + "learning_rate": 7.216153518752571e-05, + "loss": 2.5238, + "step": 11838 + }, + { + "epoch": 0.9554515374061819, + "grad_norm": 0.661651611328125, + "learning_rate": 7.214637269800791e-05, + "loss": 2.445, + "step": 11839 + }, + { + "epoch": 0.9555322411427649, + "grad_norm": 0.6635430455207825, + "learning_rate": 7.213121090267528e-05, + "loss": 2.4707, + "step": 11840 + }, + { + "epoch": 0.9556129448793479, + "grad_norm": 0.6303616166114807, + "learning_rate": 7.211604980190571e-05, + "loss": 2.4923, + "step": 11841 + }, + { + "epoch": 0.9556936486159309, + "grad_norm": 0.7027459144592285, + "learning_rate": 7.210088939607708e-05, + "loss": 2.5592, + "step": 11842 + }, + { + "epoch": 0.955774352352514, + "grad_norm": 0.6539996862411499, + "learning_rate": 7.208572968556722e-05, + "loss": 2.5256, + "step": 11843 + }, + { + "epoch": 0.9558550560890969, + "grad_norm": 0.7019872069358826, + "learning_rate": 7.207057067075393e-05, + "loss": 2.488, + "step": 11844 + }, + { + "epoch": 0.9559357598256799, + "grad_norm": 0.6848211288452148, + "learning_rate": 7.205541235201507e-05, + "loss": 2.4883, + "step": 11845 + }, + { + "epoch": 0.9560164635622629, + "grad_norm": 0.7806351184844971, + "learning_rate": 7.204025472972834e-05, + "loss": 2.5563, + "step": 11846 + }, + { + "epoch": 0.956097167298846, + "grad_norm": 0.7327724695205688, + "learning_rate": 7.202509780427156e-05, + "loss": 2.5275, + "step": 11847 + }, + { + "epoch": 0.956177871035429, + "grad_norm": 0.6805681586265564, + "learning_rate": 7.200994157602248e-05, + "loss": 2.4723, + "step": 11848 + }, + { + "epoch": 0.9562585747720119, + "grad_norm": 0.7053409814834595, + "learning_rate": 7.19947860453588e-05, + "loss": 2.4471, + "step": 11849 + }, + { + "epoch": 0.9563392785085949, + "grad_norm": 0.6783127188682556, + "learning_rate": 7.197963121265826e-05, + "loss": 2.4586, + "step": 11850 + }, + { + "epoch": 0.956419982245178, + "grad_norm": 0.6639916300773621, + "learning_rate": 7.196447707829857e-05, + "loss": 2.4966, + "step": 11851 + }, + { + "epoch": 0.956500685981761, + "grad_norm": 0.684066891670227, + "learning_rate": 7.194932364265739e-05, + "loss": 2.5676, + "step": 11852 + }, + { + "epoch": 0.9565813897183439, + "grad_norm": 0.7872990965843201, + "learning_rate": 7.193417090611239e-05, + "loss": 2.5101, + "step": 11853 + }, + { + "epoch": 0.9566620934549269, + "grad_norm": 0.7543401122093201, + "learning_rate": 7.19190188690412e-05, + "loss": 2.5503, + "step": 11854 + }, + { + "epoch": 0.95674279719151, + "grad_norm": 0.6514382362365723, + "learning_rate": 7.190386753182152e-05, + "loss": 2.4902, + "step": 11855 + }, + { + "epoch": 0.956823500928093, + "grad_norm": 0.6867108345031738, + "learning_rate": 7.188871689483087e-05, + "loss": 2.5054, + "step": 11856 + }, + { + "epoch": 0.956904204664676, + "grad_norm": 0.6536040306091309, + "learning_rate": 7.187356695844687e-05, + "loss": 2.5462, + "step": 11857 + }, + { + "epoch": 0.9569849084012589, + "grad_norm": 0.690237820148468, + "learning_rate": 7.185841772304711e-05, + "loss": 2.5673, + "step": 11858 + }, + { + "epoch": 0.957065612137842, + "grad_norm": 0.6699091196060181, + "learning_rate": 7.184326918900915e-05, + "loss": 2.4733, + "step": 11859 + }, + { + "epoch": 0.957146315874425, + "grad_norm": 0.6482241153717041, + "learning_rate": 7.18281213567105e-05, + "loss": 2.4897, + "step": 11860 + }, + { + "epoch": 0.957227019611008, + "grad_norm": 0.686130166053772, + "learning_rate": 7.181297422652874e-05, + "loss": 2.4596, + "step": 11861 + }, + { + "epoch": 0.957307723347591, + "grad_norm": 0.6507205367088318, + "learning_rate": 7.179782779884132e-05, + "loss": 2.5527, + "step": 11862 + }, + { + "epoch": 0.957388427084174, + "grad_norm": 0.6578813195228577, + "learning_rate": 7.178268207402577e-05, + "loss": 2.4975, + "step": 11863 + }, + { + "epoch": 0.957469130820757, + "grad_norm": 0.6931977272033691, + "learning_rate": 7.176753705245956e-05, + "loss": 2.5533, + "step": 11864 + }, + { + "epoch": 0.95754983455734, + "grad_norm": 0.7306256890296936, + "learning_rate": 7.17523927345201e-05, + "loss": 2.534, + "step": 11865 + }, + { + "epoch": 0.957630538293923, + "grad_norm": 0.6337448358535767, + "learning_rate": 7.173724912058483e-05, + "loss": 2.5015, + "step": 11866 + }, + { + "epoch": 0.9577112420305061, + "grad_norm": 0.6561456322669983, + "learning_rate": 7.172210621103124e-05, + "loss": 2.4946, + "step": 11867 + }, + { + "epoch": 0.957791945767089, + "grad_norm": 0.6341130137443542, + "learning_rate": 7.170696400623666e-05, + "loss": 2.5611, + "step": 11868 + }, + { + "epoch": 0.957872649503672, + "grad_norm": 0.7202804088592529, + "learning_rate": 7.169182250657849e-05, + "loss": 2.5209, + "step": 11869 + }, + { + "epoch": 0.957953353240255, + "grad_norm": 0.6620556712150574, + "learning_rate": 7.167668171243408e-05, + "loss": 2.4895, + "step": 11870 + }, + { + "epoch": 0.9580340569768381, + "grad_norm": 0.6842508912086487, + "learning_rate": 7.166154162418087e-05, + "loss": 2.4417, + "step": 11871 + }, + { + "epoch": 0.958114760713421, + "grad_norm": 0.7539907693862915, + "learning_rate": 7.164640224219608e-05, + "loss": 2.5153, + "step": 11872 + }, + { + "epoch": 0.958195464450004, + "grad_norm": 0.6524286270141602, + "learning_rate": 7.163126356685703e-05, + "loss": 2.509, + "step": 11873 + }, + { + "epoch": 0.958276168186587, + "grad_norm": 0.7022691965103149, + "learning_rate": 7.16161255985411e-05, + "loss": 2.5223, + "step": 11874 + }, + { + "epoch": 0.9583568719231701, + "grad_norm": 0.6659076809883118, + "learning_rate": 7.160098833762549e-05, + "loss": 2.5231, + "step": 11875 + }, + { + "epoch": 0.9584375756597531, + "grad_norm": 0.6756494641304016, + "learning_rate": 7.15858517844875e-05, + "loss": 2.5017, + "step": 11876 + }, + { + "epoch": 0.958518279396336, + "grad_norm": 0.729850709438324, + "learning_rate": 7.157071593950436e-05, + "loss": 2.4583, + "step": 11877 + }, + { + "epoch": 0.958598983132919, + "grad_norm": 0.7155230641365051, + "learning_rate": 7.155558080305326e-05, + "loss": 2.4753, + "step": 11878 + }, + { + "epoch": 0.9586796868695021, + "grad_norm": 0.6553284525871277, + "learning_rate": 7.154044637551147e-05, + "loss": 2.5093, + "step": 11879 + }, + { + "epoch": 0.9587603906060851, + "grad_norm": 0.6516379117965698, + "learning_rate": 7.152531265725617e-05, + "loss": 2.4996, + "step": 11880 + }, + { + "epoch": 0.9588410943426681, + "grad_norm": 0.6871184706687927, + "learning_rate": 7.151017964866449e-05, + "loss": 2.5322, + "step": 11881 + }, + { + "epoch": 0.958921798079251, + "grad_norm": 0.6998933553695679, + "learning_rate": 7.149504735011358e-05, + "loss": 2.5328, + "step": 11882 + }, + { + "epoch": 0.959002501815834, + "grad_norm": 0.7065120935440063, + "learning_rate": 7.147991576198065e-05, + "loss": 2.5251, + "step": 11883 + }, + { + "epoch": 0.9590832055524171, + "grad_norm": 0.6718337535858154, + "learning_rate": 7.146478488464275e-05, + "loss": 2.5596, + "step": 11884 + }, + { + "epoch": 0.9591639092890001, + "grad_norm": 0.6394883990287781, + "learning_rate": 7.144965471847698e-05, + "loss": 2.5022, + "step": 11885 + }, + { + "epoch": 0.9592446130255831, + "grad_norm": 0.6867207288742065, + "learning_rate": 7.143452526386045e-05, + "loss": 2.4927, + "step": 11886 + }, + { + "epoch": 0.959325316762166, + "grad_norm": 0.6710157990455627, + "learning_rate": 7.141939652117026e-05, + "loss": 2.5127, + "step": 11887 + }, + { + "epoch": 0.9594060204987491, + "grad_norm": 0.6286540627479553, + "learning_rate": 7.14042684907834e-05, + "loss": 2.4966, + "step": 11888 + }, + { + "epoch": 0.9594867242353321, + "grad_norm": 0.7295787334442139, + "learning_rate": 7.13891411730769e-05, + "loss": 2.5127, + "step": 11889 + }, + { + "epoch": 0.9595674279719151, + "grad_norm": 0.646084189414978, + "learning_rate": 7.137401456842784e-05, + "loss": 2.5575, + "step": 11890 + }, + { + "epoch": 0.959648131708498, + "grad_norm": 0.7884495258331299, + "learning_rate": 7.135888867721312e-05, + "loss": 2.4807, + "step": 11891 + }, + { + "epoch": 0.9597288354450811, + "grad_norm": 0.638469934463501, + "learning_rate": 7.134376349980977e-05, + "loss": 2.4989, + "step": 11892 + }, + { + "epoch": 0.9598095391816641, + "grad_norm": 0.6802849769592285, + "learning_rate": 7.132863903659476e-05, + "loss": 2.5139, + "step": 11893 + }, + { + "epoch": 0.9598902429182471, + "grad_norm": 0.6657521724700928, + "learning_rate": 7.131351528794499e-05, + "loss": 2.4488, + "step": 11894 + }, + { + "epoch": 0.9599709466548301, + "grad_norm": 0.6537562012672424, + "learning_rate": 7.129839225423741e-05, + "loss": 2.4664, + "step": 11895 + }, + { + "epoch": 0.9600516503914132, + "grad_norm": 0.689637303352356, + "learning_rate": 7.128326993584897e-05, + "loss": 2.582, + "step": 11896 + }, + { + "epoch": 0.9601323541279961, + "grad_norm": 0.6701640486717224, + "learning_rate": 7.126814833315646e-05, + "loss": 2.4963, + "step": 11897 + }, + { + "epoch": 0.9602130578645791, + "grad_norm": 0.7466658353805542, + "learning_rate": 7.125302744653677e-05, + "loss": 2.5015, + "step": 11898 + }, + { + "epoch": 0.9602937616011621, + "grad_norm": 0.6487225294113159, + "learning_rate": 7.123790727636685e-05, + "loss": 2.5393, + "step": 11899 + }, + { + "epoch": 0.9603744653377452, + "grad_norm": 0.7204654216766357, + "learning_rate": 7.122278782302343e-05, + "loss": 2.4668, + "step": 11900 + }, + { + "epoch": 0.9604551690743282, + "grad_norm": 0.6852861046791077, + "learning_rate": 7.120766908688336e-05, + "loss": 2.5893, + "step": 11901 + }, + { + "epoch": 0.9605358728109111, + "grad_norm": 0.6483901739120483, + "learning_rate": 7.119255106832344e-05, + "loss": 2.48, + "step": 11902 + }, + { + "epoch": 0.9606165765474941, + "grad_norm": 0.6670375466346741, + "learning_rate": 7.117743376772049e-05, + "loss": 2.5225, + "step": 11903 + }, + { + "epoch": 0.9606972802840772, + "grad_norm": 0.6805974841117859, + "learning_rate": 7.116231718545118e-05, + "loss": 2.4652, + "step": 11904 + }, + { + "epoch": 0.9607779840206602, + "grad_norm": 0.6700397729873657, + "learning_rate": 7.114720132189232e-05, + "loss": 2.5115, + "step": 11905 + }, + { + "epoch": 0.9608586877572431, + "grad_norm": 0.7167409062385559, + "learning_rate": 7.113208617742066e-05, + "loss": 2.5062, + "step": 11906 + }, + { + "epoch": 0.9609393914938261, + "grad_norm": 0.7337077856063843, + "learning_rate": 7.111697175241286e-05, + "loss": 2.5768, + "step": 11907 + }, + { + "epoch": 0.9610200952304092, + "grad_norm": 0.6681819558143616, + "learning_rate": 7.110185804724558e-05, + "loss": 2.5058, + "step": 11908 + }, + { + "epoch": 0.9611007989669922, + "grad_norm": 0.7235603332519531, + "learning_rate": 7.10867450622956e-05, + "loss": 2.4606, + "step": 11909 + }, + { + "epoch": 0.9611815027035752, + "grad_norm": 0.6931360363960266, + "learning_rate": 7.107163279793947e-05, + "loss": 2.5129, + "step": 11910 + }, + { + "epoch": 0.9612622064401581, + "grad_norm": 0.7331648468971252, + "learning_rate": 7.105652125455388e-05, + "loss": 2.4916, + "step": 11911 + }, + { + "epoch": 0.9613429101767412, + "grad_norm": 0.6538143754005432, + "learning_rate": 7.104141043251545e-05, + "loss": 2.5184, + "step": 11912 + }, + { + "epoch": 0.9614236139133242, + "grad_norm": 0.7018921375274658, + "learning_rate": 7.102630033220077e-05, + "loss": 2.5446, + "step": 11913 + }, + { + "epoch": 0.9615043176499072, + "grad_norm": 0.7528507709503174, + "learning_rate": 7.10111909539864e-05, + "loss": 2.4404, + "step": 11914 + }, + { + "epoch": 0.9615850213864902, + "grad_norm": 0.7258831858634949, + "learning_rate": 7.099608229824894e-05, + "loss": 2.4758, + "step": 11915 + }, + { + "epoch": 0.9616657251230732, + "grad_norm": 0.6954349875450134, + "learning_rate": 7.098097436536498e-05, + "loss": 2.4894, + "step": 11916 + }, + { + "epoch": 0.9617464288596562, + "grad_norm": 0.691584050655365, + "learning_rate": 7.096586715571092e-05, + "loss": 2.544, + "step": 11917 + }, + { + "epoch": 0.9618271325962392, + "grad_norm": 0.7107009291648865, + "learning_rate": 7.095076066966337e-05, + "loss": 2.4994, + "step": 11918 + }, + { + "epoch": 0.9619078363328222, + "grad_norm": 0.6492058634757996, + "learning_rate": 7.093565490759881e-05, + "loss": 2.5751, + "step": 11919 + }, + { + "epoch": 0.9619885400694053, + "grad_norm": 0.6817753314971924, + "learning_rate": 7.092054986989371e-05, + "loss": 2.5129, + "step": 11920 + }, + { + "epoch": 0.9620692438059882, + "grad_norm": 0.6991822123527527, + "learning_rate": 7.090544555692448e-05, + "loss": 2.5728, + "step": 11921 + }, + { + "epoch": 0.9621499475425712, + "grad_norm": 0.6627625226974487, + "learning_rate": 7.089034196906768e-05, + "loss": 2.4479, + "step": 11922 + }, + { + "epoch": 0.9622306512791542, + "grad_norm": 0.6889652013778687, + "learning_rate": 7.087523910669957e-05, + "loss": 2.5323, + "step": 11923 + }, + { + "epoch": 0.9623113550157373, + "grad_norm": 0.7863786816596985, + "learning_rate": 7.086013697019667e-05, + "loss": 2.5146, + "step": 11924 + }, + { + "epoch": 0.9623920587523203, + "grad_norm": 0.6885324716567993, + "learning_rate": 7.084503555993536e-05, + "loss": 2.5072, + "step": 11925 + }, + { + "epoch": 0.9624727624889032, + "grad_norm": 0.619239091873169, + "learning_rate": 7.082993487629192e-05, + "loss": 2.4622, + "step": 11926 + }, + { + "epoch": 0.9625534662254862, + "grad_norm": 0.6762447953224182, + "learning_rate": 7.081483491964278e-05, + "loss": 2.5155, + "step": 11927 + }, + { + "epoch": 0.9626341699620693, + "grad_norm": 0.6559715867042542, + "learning_rate": 7.079973569036424e-05, + "loss": 2.4729, + "step": 11928 + }, + { + "epoch": 0.9627148736986523, + "grad_norm": 0.633280873298645, + "learning_rate": 7.078463718883261e-05, + "loss": 2.4715, + "step": 11929 + }, + { + "epoch": 0.9627955774352353, + "grad_norm": 0.7740094065666199, + "learning_rate": 7.07695394154242e-05, + "loss": 2.4871, + "step": 11930 + }, + { + "epoch": 0.9628762811718182, + "grad_norm": 0.7103284597396851, + "learning_rate": 7.075444237051527e-05, + "loss": 2.5299, + "step": 11931 + }, + { + "epoch": 0.9629569849084013, + "grad_norm": 0.6800934076309204, + "learning_rate": 7.073934605448212e-05, + "loss": 2.5919, + "step": 11932 + }, + { + "epoch": 0.9630376886449843, + "grad_norm": 0.6680917143821716, + "learning_rate": 7.072425046770092e-05, + "loss": 2.4942, + "step": 11933 + }, + { + "epoch": 0.9631183923815673, + "grad_norm": 0.7248062491416931, + "learning_rate": 7.070915561054792e-05, + "loss": 2.4956, + "step": 11934 + }, + { + "epoch": 0.9631990961181502, + "grad_norm": 0.6635782122612, + "learning_rate": 7.069406148339936e-05, + "loss": 2.4658, + "step": 11935 + }, + { + "epoch": 0.9632797998547332, + "grad_norm": 0.6751061081886292, + "learning_rate": 7.067896808663137e-05, + "loss": 2.4912, + "step": 11936 + }, + { + "epoch": 0.9633605035913163, + "grad_norm": 0.7476027607917786, + "learning_rate": 7.066387542062013e-05, + "loss": 2.4858, + "step": 11937 + }, + { + "epoch": 0.9634412073278993, + "grad_norm": 0.6770931482315063, + "learning_rate": 7.064878348574183e-05, + "loss": 2.4574, + "step": 11938 + }, + { + "epoch": 0.9635219110644823, + "grad_norm": 0.7105392813682556, + "learning_rate": 7.063369228237255e-05, + "loss": 2.5523, + "step": 11939 + }, + { + "epoch": 0.9636026148010652, + "grad_norm": 0.6806207299232483, + "learning_rate": 7.061860181088842e-05, + "loss": 2.4992, + "step": 11940 + }, + { + "epoch": 0.9636833185376483, + "grad_norm": 0.7059600353240967, + "learning_rate": 7.060351207166558e-05, + "loss": 2.5778, + "step": 11941 + }, + { + "epoch": 0.9637640222742313, + "grad_norm": 0.6306884288787842, + "learning_rate": 7.058842306508002e-05, + "loss": 2.5389, + "step": 11942 + }, + { + "epoch": 0.9638447260108143, + "grad_norm": 0.6997150778770447, + "learning_rate": 7.057333479150783e-05, + "loss": 2.5077, + "step": 11943 + }, + { + "epoch": 0.9639254297473973, + "grad_norm": 0.7073743343353271, + "learning_rate": 7.05582472513251e-05, + "loss": 2.5274, + "step": 11944 + }, + { + "epoch": 0.9640061334839803, + "grad_norm": 0.6768803596496582, + "learning_rate": 7.054316044490777e-05, + "loss": 2.5155, + "step": 11945 + }, + { + "epoch": 0.9640868372205633, + "grad_norm": 0.6792057752609253, + "learning_rate": 7.052807437263189e-05, + "loss": 2.5509, + "step": 11946 + }, + { + "epoch": 0.9641675409571463, + "grad_norm": 0.6883981823921204, + "learning_rate": 7.051298903487344e-05, + "loss": 2.5176, + "step": 11947 + }, + { + "epoch": 0.9642482446937293, + "grad_norm": 0.6934401392936707, + "learning_rate": 7.049790443200844e-05, + "loss": 2.502, + "step": 11948 + }, + { + "epoch": 0.9643289484303124, + "grad_norm": 0.6882597804069519, + "learning_rate": 7.048282056441269e-05, + "loss": 2.487, + "step": 11949 + }, + { + "epoch": 0.9644096521668953, + "grad_norm": 0.6972896456718445, + "learning_rate": 7.046773743246225e-05, + "loss": 2.5304, + "step": 11950 + }, + { + "epoch": 0.9644903559034783, + "grad_norm": 0.6591988205909729, + "learning_rate": 7.045265503653303e-05, + "loss": 2.4734, + "step": 11951 + }, + { + "epoch": 0.9645710596400613, + "grad_norm": 0.6890063285827637, + "learning_rate": 7.043757337700082e-05, + "loss": 2.5289, + "step": 11952 + }, + { + "epoch": 0.9646517633766444, + "grad_norm": 0.6931065917015076, + "learning_rate": 7.042249245424157e-05, + "loss": 2.484, + "step": 11953 + }, + { + "epoch": 0.9647324671132274, + "grad_norm": 0.6943762898445129, + "learning_rate": 7.040741226863117e-05, + "loss": 2.501, + "step": 11954 + }, + { + "epoch": 0.9648131708498103, + "grad_norm": 0.677154004573822, + "learning_rate": 7.039233282054536e-05, + "loss": 2.4976, + "step": 11955 + }, + { + "epoch": 0.9648938745863933, + "grad_norm": 0.6662883758544922, + "learning_rate": 7.037725411036003e-05, + "loss": 2.4928, + "step": 11956 + }, + { + "epoch": 0.9649745783229764, + "grad_norm": 0.6854663491249084, + "learning_rate": 7.0362176138451e-05, + "loss": 2.4657, + "step": 11957 + }, + { + "epoch": 0.9650552820595594, + "grad_norm": 0.6703238487243652, + "learning_rate": 7.034709890519397e-05, + "loss": 2.4879, + "step": 11958 + }, + { + "epoch": 0.9651359857961423, + "grad_norm": 0.7023652791976929, + "learning_rate": 7.033202241096474e-05, + "loss": 2.4619, + "step": 11959 + }, + { + "epoch": 0.9652166895327253, + "grad_norm": 0.6950454711914062, + "learning_rate": 7.031694665613911e-05, + "loss": 2.5125, + "step": 11960 + }, + { + "epoch": 0.9652973932693084, + "grad_norm": 0.6740411520004272, + "learning_rate": 7.030187164109272e-05, + "loss": 2.436, + "step": 11961 + }, + { + "epoch": 0.9653780970058914, + "grad_norm": 0.6697152256965637, + "learning_rate": 7.028679736620132e-05, + "loss": 2.5513, + "step": 11962 + }, + { + "epoch": 0.9654588007424744, + "grad_norm": 0.6920599937438965, + "learning_rate": 7.027172383184061e-05, + "loss": 2.5264, + "step": 11963 + }, + { + "epoch": 0.9655395044790573, + "grad_norm": 0.6493465304374695, + "learning_rate": 7.025665103838627e-05, + "loss": 2.4834, + "step": 11964 + }, + { + "epoch": 0.9656202082156404, + "grad_norm": 0.684092104434967, + "learning_rate": 7.02415789862139e-05, + "loss": 2.4662, + "step": 11965 + }, + { + "epoch": 0.9657009119522234, + "grad_norm": 0.7161515355110168, + "learning_rate": 7.022650767569921e-05, + "loss": 2.4648, + "step": 11966 + }, + { + "epoch": 0.9657816156888064, + "grad_norm": 0.6994524002075195, + "learning_rate": 7.021143710721778e-05, + "loss": 2.5186, + "step": 11967 + }, + { + "epoch": 0.9658623194253894, + "grad_norm": 0.7105295062065125, + "learning_rate": 7.019636728114518e-05, + "loss": 2.5132, + "step": 11968 + }, + { + "epoch": 0.9659430231619724, + "grad_norm": 0.7182292938232422, + "learning_rate": 7.018129819785702e-05, + "loss": 2.5469, + "step": 11969 + }, + { + "epoch": 0.9660237268985554, + "grad_norm": 0.7021759152412415, + "learning_rate": 7.016622985772887e-05, + "loss": 2.5477, + "step": 11970 + }, + { + "epoch": 0.9661044306351384, + "grad_norm": 0.6751413941383362, + "learning_rate": 7.015116226113624e-05, + "loss": 2.5174, + "step": 11971 + }, + { + "epoch": 0.9661851343717214, + "grad_norm": 0.6341918110847473, + "learning_rate": 7.013609540845468e-05, + "loss": 2.4778, + "step": 11972 + }, + { + "epoch": 0.9662658381083045, + "grad_norm": 0.7080956697463989, + "learning_rate": 7.012102930005971e-05, + "loss": 2.5304, + "step": 11973 + }, + { + "epoch": 0.9663465418448874, + "grad_norm": 0.6367003321647644, + "learning_rate": 7.010596393632674e-05, + "loss": 2.4857, + "step": 11974 + }, + { + "epoch": 0.9664272455814704, + "grad_norm": 0.6841328740119934, + "learning_rate": 7.009089931763131e-05, + "loss": 2.5365, + "step": 11975 + }, + { + "epoch": 0.9665079493180534, + "grad_norm": 0.6568236351013184, + "learning_rate": 7.00758354443489e-05, + "loss": 2.5286, + "step": 11976 + }, + { + "epoch": 0.9665886530546365, + "grad_norm": 0.7071812152862549, + "learning_rate": 7.006077231685485e-05, + "loss": 2.458, + "step": 11977 + }, + { + "epoch": 0.9666693567912195, + "grad_norm": 0.6997712850570679, + "learning_rate": 7.004570993552462e-05, + "loss": 2.4571, + "step": 11978 + }, + { + "epoch": 0.9667500605278024, + "grad_norm": 0.6920793056488037, + "learning_rate": 7.003064830073359e-05, + "loss": 2.4172, + "step": 11979 + }, + { + "epoch": 0.9668307642643854, + "grad_norm": 0.6823387742042542, + "learning_rate": 7.001558741285718e-05, + "loss": 2.4895, + "step": 11980 + }, + { + "epoch": 0.9669114680009685, + "grad_norm": 0.7309569716453552, + "learning_rate": 7.000052727227068e-05, + "loss": 2.502, + "step": 11981 + }, + { + "epoch": 0.9669921717375515, + "grad_norm": 0.734708845615387, + "learning_rate": 6.998546787934946e-05, + "loss": 2.4918, + "step": 11982 + }, + { + "epoch": 0.9670728754741345, + "grad_norm": 0.690406084060669, + "learning_rate": 6.997040923446889e-05, + "loss": 2.4994, + "step": 11983 + }, + { + "epoch": 0.9671535792107174, + "grad_norm": 0.7126687169075012, + "learning_rate": 6.995535133800416e-05, + "loss": 2.4824, + "step": 11984 + }, + { + "epoch": 0.9672342829473004, + "grad_norm": 0.7020599246025085, + "learning_rate": 6.994029419033062e-05, + "loss": 2.4889, + "step": 11985 + }, + { + "epoch": 0.9673149866838835, + "grad_norm": 0.7690796852111816, + "learning_rate": 6.992523779182356e-05, + "loss": 2.4997, + "step": 11986 + }, + { + "epoch": 0.9673956904204665, + "grad_norm": 0.6635778546333313, + "learning_rate": 6.991018214285816e-05, + "loss": 2.4989, + "step": 11987 + }, + { + "epoch": 0.9674763941570494, + "grad_norm": 0.7088577747344971, + "learning_rate": 6.989512724380967e-05, + "loss": 2.549, + "step": 11988 + }, + { + "epoch": 0.9675570978936324, + "grad_norm": 0.6420924663543701, + "learning_rate": 6.988007309505333e-05, + "loss": 2.4585, + "step": 11989 + }, + { + "epoch": 0.9676378016302155, + "grad_norm": 0.7902400493621826, + "learning_rate": 6.986501969696428e-05, + "loss": 2.5009, + "step": 11990 + }, + { + "epoch": 0.9677185053667985, + "grad_norm": 0.700907289981842, + "learning_rate": 6.984996704991773e-05, + "loss": 2.4778, + "step": 11991 + }, + { + "epoch": 0.9677992091033815, + "grad_norm": 0.664378821849823, + "learning_rate": 6.983491515428883e-05, + "loss": 2.5116, + "step": 11992 + }, + { + "epoch": 0.9678799128399644, + "grad_norm": 0.6314663887023926, + "learning_rate": 6.981986401045266e-05, + "loss": 2.4588, + "step": 11993 + }, + { + "epoch": 0.9679606165765475, + "grad_norm": 0.6521078944206238, + "learning_rate": 6.980481361878438e-05, + "loss": 2.5224, + "step": 11994 + }, + { + "epoch": 0.9680413203131305, + "grad_norm": 0.6336014270782471, + "learning_rate": 6.978976397965907e-05, + "loss": 2.4297, + "step": 11995 + }, + { + "epoch": 0.9681220240497135, + "grad_norm": 0.7321500778198242, + "learning_rate": 6.977471509345183e-05, + "loss": 2.5252, + "step": 11996 + }, + { + "epoch": 0.9682027277862965, + "grad_norm": 0.686950147151947, + "learning_rate": 6.97596669605377e-05, + "loss": 2.5188, + "step": 11997 + }, + { + "epoch": 0.9682834315228795, + "grad_norm": 0.729343056678772, + "learning_rate": 6.97446195812917e-05, + "loss": 2.5157, + "step": 11998 + }, + { + "epoch": 0.9683641352594625, + "grad_norm": 0.6447068452835083, + "learning_rate": 6.972957295608889e-05, + "loss": 2.5041, + "step": 11999 + }, + { + "epoch": 0.9684448389960455, + "grad_norm": 0.6847280859947205, + "learning_rate": 6.971452708530423e-05, + "loss": 2.443, + "step": 12000 + }, + { + "epoch": 0.9684448389960455, + "eval_loss": 2.431878089904785, + "eval_runtime": 758.167, + "eval_samples_per_second": 3.456, + "eval_steps_per_second": 0.576, + "step": 12000 + }, + { + "epoch": 0.9685255427326285, + "grad_norm": 0.6440466046333313, + "learning_rate": 6.969948196931272e-05, + "loss": 2.5091, + "step": 12001 + }, + { + "epoch": 0.9686062464692116, + "grad_norm": 0.6570029258728027, + "learning_rate": 6.968443760848937e-05, + "loss": 2.491, + "step": 12002 + }, + { + "epoch": 0.9686869502057945, + "grad_norm": 0.7610877752304077, + "learning_rate": 6.966939400320905e-05, + "loss": 2.4713, + "step": 12003 + }, + { + "epoch": 0.9687676539423775, + "grad_norm": 0.7187781929969788, + "learning_rate": 6.965435115384669e-05, + "loss": 2.4303, + "step": 12004 + }, + { + "epoch": 0.9688483576789605, + "grad_norm": 0.7668420672416687, + "learning_rate": 6.963930906077727e-05, + "loss": 2.5513, + "step": 12005 + }, + { + "epoch": 0.9689290614155436, + "grad_norm": 0.7025619745254517, + "learning_rate": 6.96242677243756e-05, + "loss": 2.4349, + "step": 12006 + }, + { + "epoch": 0.9690097651521266, + "grad_norm": 0.7066935896873474, + "learning_rate": 6.960922714501657e-05, + "loss": 2.5465, + "step": 12007 + }, + { + "epoch": 0.9690904688887095, + "grad_norm": 0.6758970618247986, + "learning_rate": 6.95941873230751e-05, + "loss": 2.4827, + "step": 12008 + }, + { + "epoch": 0.9691711726252925, + "grad_norm": 0.7108862996101379, + "learning_rate": 6.957914825892591e-05, + "loss": 2.5412, + "step": 12009 + }, + { + "epoch": 0.9692518763618756, + "grad_norm": 0.660784125328064, + "learning_rate": 6.956410995294389e-05, + "loss": 2.5173, + "step": 12010 + }, + { + "epoch": 0.9693325800984586, + "grad_norm": 0.6966561079025269, + "learning_rate": 6.954907240550377e-05, + "loss": 2.5196, + "step": 12011 + }, + { + "epoch": 0.9694132838350416, + "grad_norm": 0.6889416575431824, + "learning_rate": 6.953403561698042e-05, + "loss": 2.5351, + "step": 12012 + }, + { + "epoch": 0.9694939875716245, + "grad_norm": 0.7578341960906982, + "learning_rate": 6.951899958774852e-05, + "loss": 2.5184, + "step": 12013 + }, + { + "epoch": 0.9695746913082076, + "grad_norm": 0.6735317707061768, + "learning_rate": 6.950396431818282e-05, + "loss": 2.4592, + "step": 12014 + }, + { + "epoch": 0.9696553950447906, + "grad_norm": 0.6903232932090759, + "learning_rate": 6.948892980865806e-05, + "loss": 2.5212, + "step": 12015 + }, + { + "epoch": 0.9697360987813736, + "grad_norm": 0.6477165818214417, + "learning_rate": 6.94738960595489e-05, + "loss": 2.4423, + "step": 12016 + }, + { + "epoch": 0.9698168025179565, + "grad_norm": 0.6778751015663147, + "learning_rate": 6.945886307123007e-05, + "loss": 2.547, + "step": 12017 + }, + { + "epoch": 0.9698975062545396, + "grad_norm": 0.690558135509491, + "learning_rate": 6.944383084407623e-05, + "loss": 2.5081, + "step": 12018 + }, + { + "epoch": 0.9699782099911226, + "grad_norm": 0.7210639119148254, + "learning_rate": 6.942879937846196e-05, + "loss": 2.496, + "step": 12019 + }, + { + "epoch": 0.9700589137277056, + "grad_norm": 0.7182444930076599, + "learning_rate": 6.941376867476194e-05, + "loss": 2.6138, + "step": 12020 + }, + { + "epoch": 0.9701396174642886, + "grad_norm": 0.6929295063018799, + "learning_rate": 6.939873873335077e-05, + "loss": 2.4828, + "step": 12021 + }, + { + "epoch": 0.9702203212008716, + "grad_norm": 0.6919693350791931, + "learning_rate": 6.938370955460298e-05, + "loss": 2.5123, + "step": 12022 + }, + { + "epoch": 0.9703010249374546, + "grad_norm": 0.6475244164466858, + "learning_rate": 6.93686811388932e-05, + "loss": 2.4992, + "step": 12023 + }, + { + "epoch": 0.9703817286740376, + "grad_norm": 0.6728265881538391, + "learning_rate": 6.935365348659597e-05, + "loss": 2.4486, + "step": 12024 + }, + { + "epoch": 0.9704624324106206, + "grad_norm": 0.6791470646858215, + "learning_rate": 6.933862659808582e-05, + "loss": 2.4657, + "step": 12025 + }, + { + "epoch": 0.9705431361472037, + "grad_norm": 0.7611662745475769, + "learning_rate": 6.932360047373721e-05, + "loss": 2.5243, + "step": 12026 + }, + { + "epoch": 0.9706238398837866, + "grad_norm": 0.6642355918884277, + "learning_rate": 6.930857511392467e-05, + "loss": 2.5308, + "step": 12027 + }, + { + "epoch": 0.9707045436203696, + "grad_norm": 0.7270805239677429, + "learning_rate": 6.92935505190227e-05, + "loss": 2.4708, + "step": 12028 + }, + { + "epoch": 0.9707852473569526, + "grad_norm": 0.6706295013427734, + "learning_rate": 6.927852668940568e-05, + "loss": 2.5136, + "step": 12029 + }, + { + "epoch": 0.9708659510935357, + "grad_norm": 0.6923376321792603, + "learning_rate": 6.92635036254481e-05, + "loss": 2.5238, + "step": 12030 + }, + { + "epoch": 0.9709466548301187, + "grad_norm": 0.7154483199119568, + "learning_rate": 6.924848132752436e-05, + "loss": 2.488, + "step": 12031 + }, + { + "epoch": 0.9710273585667016, + "grad_norm": 0.6675701141357422, + "learning_rate": 6.923345979600884e-05, + "loss": 2.5066, + "step": 12032 + }, + { + "epoch": 0.9711080623032846, + "grad_norm": 0.7282043695449829, + "learning_rate": 6.921843903127592e-05, + "loss": 2.5096, + "step": 12033 + }, + { + "epoch": 0.9711887660398677, + "grad_norm": 0.663526177406311, + "learning_rate": 6.92034190337e-05, + "loss": 2.5276, + "step": 12034 + }, + { + "epoch": 0.9712694697764507, + "grad_norm": 0.7491087913513184, + "learning_rate": 6.918839980365534e-05, + "loss": 2.5044, + "step": 12035 + }, + { + "epoch": 0.9713501735130337, + "grad_norm": 0.6977766156196594, + "learning_rate": 6.917338134151629e-05, + "loss": 2.6102, + "step": 12036 + }, + { + "epoch": 0.9714308772496166, + "grad_norm": 0.6447446346282959, + "learning_rate": 6.915836364765722e-05, + "loss": 2.5137, + "step": 12037 + }, + { + "epoch": 0.9715115809861996, + "grad_norm": 0.6801442503929138, + "learning_rate": 6.91433467224523e-05, + "loss": 2.5145, + "step": 12038 + }, + { + "epoch": 0.9715922847227827, + "grad_norm": 0.6843627691268921, + "learning_rate": 6.912833056627583e-05, + "loss": 2.6099, + "step": 12039 + }, + { + "epoch": 0.9716729884593657, + "grad_norm": 0.6862856149673462, + "learning_rate": 6.911331517950209e-05, + "loss": 2.5358, + "step": 12040 + }, + { + "epoch": 0.9717536921959486, + "grad_norm": 0.6835047602653503, + "learning_rate": 6.909830056250527e-05, + "loss": 2.5257, + "step": 12041 + }, + { + "epoch": 0.9718343959325316, + "grad_norm": 0.6958080530166626, + "learning_rate": 6.908328671565956e-05, + "loss": 2.5008, + "step": 12042 + }, + { + "epoch": 0.9719150996691147, + "grad_norm": 0.7556219100952148, + "learning_rate": 6.906827363933917e-05, + "loss": 2.5283, + "step": 12043 + }, + { + "epoch": 0.9719958034056977, + "grad_norm": 0.7074917554855347, + "learning_rate": 6.90532613339183e-05, + "loss": 2.4898, + "step": 12044 + }, + { + "epoch": 0.9720765071422807, + "grad_norm": 0.6456350684165955, + "learning_rate": 6.903824979977101e-05, + "loss": 2.4989, + "step": 12045 + }, + { + "epoch": 0.9721572108788636, + "grad_norm": 0.6609941720962524, + "learning_rate": 6.902323903727146e-05, + "loss": 2.4883, + "step": 12046 + }, + { + "epoch": 0.9722379146154467, + "grad_norm": 0.7132936716079712, + "learning_rate": 6.90082290467938e-05, + "loss": 2.4983, + "step": 12047 + }, + { + "epoch": 0.9723186183520297, + "grad_norm": 0.6686434745788574, + "learning_rate": 6.899321982871206e-05, + "loss": 2.4862, + "step": 12048 + }, + { + "epoch": 0.9723993220886127, + "grad_norm": 0.6792194247245789, + "learning_rate": 6.897821138340033e-05, + "loss": 2.5368, + "step": 12049 + }, + { + "epoch": 0.9724800258251957, + "grad_norm": 0.6829379796981812, + "learning_rate": 6.896320371123268e-05, + "loss": 2.4842, + "step": 12050 + }, + { + "epoch": 0.9725607295617787, + "grad_norm": 0.7459573745727539, + "learning_rate": 6.894819681258312e-05, + "loss": 2.5023, + "step": 12051 + }, + { + "epoch": 0.9726414332983617, + "grad_norm": 0.6700068712234497, + "learning_rate": 6.893319068782566e-05, + "loss": 2.552, + "step": 12052 + }, + { + "epoch": 0.9727221370349447, + "grad_norm": 0.7093638777732849, + "learning_rate": 6.891818533733434e-05, + "loss": 2.445, + "step": 12053 + }, + { + "epoch": 0.9728028407715277, + "grad_norm": 0.703599214553833, + "learning_rate": 6.890318076148304e-05, + "loss": 2.5536, + "step": 12054 + }, + { + "epoch": 0.9728835445081108, + "grad_norm": 0.6214482188224792, + "learning_rate": 6.888817696064578e-05, + "loss": 2.5188, + "step": 12055 + }, + { + "epoch": 0.9729642482446937, + "grad_norm": 0.6893547773361206, + "learning_rate": 6.887317393519645e-05, + "loss": 2.5596, + "step": 12056 + }, + { + "epoch": 0.9730449519812767, + "grad_norm": 0.6282656788825989, + "learning_rate": 6.885817168550903e-05, + "loss": 2.4873, + "step": 12057 + }, + { + "epoch": 0.9731256557178597, + "grad_norm": 0.6979188323020935, + "learning_rate": 6.884317021195737e-05, + "loss": 2.5358, + "step": 12058 + }, + { + "epoch": 0.9732063594544428, + "grad_norm": 0.7925785183906555, + "learning_rate": 6.882816951491533e-05, + "loss": 2.5358, + "step": 12059 + }, + { + "epoch": 0.9732870631910258, + "grad_norm": 0.6449821591377258, + "learning_rate": 6.881316959475684e-05, + "loss": 2.4784, + "step": 12060 + }, + { + "epoch": 0.9733677669276087, + "grad_norm": 0.7013393044471741, + "learning_rate": 6.879817045185565e-05, + "loss": 2.4804, + "step": 12061 + }, + { + "epoch": 0.9734484706641917, + "grad_norm": 0.8338057398796082, + "learning_rate": 6.878317208658559e-05, + "loss": 2.512, + "step": 12062 + }, + { + "epoch": 0.9735291744007748, + "grad_norm": 0.6815133094787598, + "learning_rate": 6.876817449932054e-05, + "loss": 2.467, + "step": 12063 + }, + { + "epoch": 0.9736098781373578, + "grad_norm": 0.659156858921051, + "learning_rate": 6.87531776904342e-05, + "loss": 2.503, + "step": 12064 + }, + { + "epoch": 0.9736905818739408, + "grad_norm": 0.7149603962898254, + "learning_rate": 6.873818166030033e-05, + "loss": 2.5135, + "step": 12065 + }, + { + "epoch": 0.9737712856105237, + "grad_norm": 0.7010510563850403, + "learning_rate": 6.872318640929272e-05, + "loss": 2.5133, + "step": 12066 + }, + { + "epoch": 0.9738519893471068, + "grad_norm": 0.6247616410255432, + "learning_rate": 6.870819193778504e-05, + "loss": 2.5189, + "step": 12067 + }, + { + "epoch": 0.9739326930836898, + "grad_norm": 0.6938940286636353, + "learning_rate": 6.869319824615101e-05, + "loss": 2.5053, + "step": 12068 + }, + { + "epoch": 0.9740133968202728, + "grad_norm": 0.7636895179748535, + "learning_rate": 6.867820533476436e-05, + "loss": 2.4989, + "step": 12069 + }, + { + "epoch": 0.9740941005568557, + "grad_norm": 0.6489234566688538, + "learning_rate": 6.866321320399869e-05, + "loss": 2.4935, + "step": 12070 + }, + { + "epoch": 0.9741748042934388, + "grad_norm": 0.6752095818519592, + "learning_rate": 6.864822185422764e-05, + "loss": 2.4835, + "step": 12071 + }, + { + "epoch": 0.9742555080300218, + "grad_norm": 0.6947118639945984, + "learning_rate": 6.863323128582486e-05, + "loss": 2.504, + "step": 12072 + }, + { + "epoch": 0.9743362117666048, + "grad_norm": 0.6815536618232727, + "learning_rate": 6.861824149916398e-05, + "loss": 2.5369, + "step": 12073 + }, + { + "epoch": 0.9744169155031878, + "grad_norm": 0.6550236344337463, + "learning_rate": 6.860325249461852e-05, + "loss": 2.4753, + "step": 12074 + }, + { + "epoch": 0.9744976192397709, + "grad_norm": 0.6833250522613525, + "learning_rate": 6.858826427256209e-05, + "loss": 2.4687, + "step": 12075 + }, + { + "epoch": 0.9745783229763538, + "grad_norm": 0.6925075650215149, + "learning_rate": 6.857327683336824e-05, + "loss": 2.5363, + "step": 12076 + }, + { + "epoch": 0.9746590267129368, + "grad_norm": 0.6754821538925171, + "learning_rate": 6.855829017741046e-05, + "loss": 2.4696, + "step": 12077 + }, + { + "epoch": 0.9747397304495198, + "grad_norm": 0.7360671162605286, + "learning_rate": 6.854330430506228e-05, + "loss": 2.5144, + "step": 12078 + }, + { + "epoch": 0.9748204341861029, + "grad_norm": 0.6814733743667603, + "learning_rate": 6.852831921669723e-05, + "loss": 2.5059, + "step": 12079 + }, + { + "epoch": 0.9749011379226858, + "grad_norm": 0.7106744647026062, + "learning_rate": 6.851333491268869e-05, + "loss": 2.453, + "step": 12080 + }, + { + "epoch": 0.9749818416592688, + "grad_norm": 0.6623831987380981, + "learning_rate": 6.849835139341015e-05, + "loss": 2.5244, + "step": 12081 + }, + { + "epoch": 0.9750625453958518, + "grad_norm": 0.6723372936248779, + "learning_rate": 6.848336865923506e-05, + "loss": 2.5159, + "step": 12082 + }, + { + "epoch": 0.9751432491324349, + "grad_norm": 0.7256618142127991, + "learning_rate": 6.84683867105368e-05, + "loss": 2.494, + "step": 12083 + }, + { + "epoch": 0.9752239528690179, + "grad_norm": 0.6881731152534485, + "learning_rate": 6.845340554768874e-05, + "loss": 2.4374, + "step": 12084 + }, + { + "epoch": 0.9753046566056008, + "grad_norm": 0.6759666204452515, + "learning_rate": 6.843842517106434e-05, + "loss": 2.5082, + "step": 12085 + }, + { + "epoch": 0.9753853603421838, + "grad_norm": 0.6983315348625183, + "learning_rate": 6.842344558103684e-05, + "loss": 2.5191, + "step": 12086 + }, + { + "epoch": 0.9754660640787668, + "grad_norm": 0.6805596351623535, + "learning_rate": 6.840846677797959e-05, + "loss": 2.5289, + "step": 12087 + }, + { + "epoch": 0.9755467678153499, + "grad_norm": 0.712942361831665, + "learning_rate": 6.839348876226595e-05, + "loss": 2.5544, + "step": 12088 + }, + { + "epoch": 0.9756274715519329, + "grad_norm": 0.6931124329566956, + "learning_rate": 6.837851153426924e-05, + "loss": 2.5407, + "step": 12089 + }, + { + "epoch": 0.9757081752885158, + "grad_norm": 0.6939486265182495, + "learning_rate": 6.836353509436264e-05, + "loss": 2.5236, + "step": 12090 + }, + { + "epoch": 0.9757888790250988, + "grad_norm": 0.7434083223342896, + "learning_rate": 6.834855944291944e-05, + "loss": 2.4903, + "step": 12091 + }, + { + "epoch": 0.9758695827616819, + "grad_norm": 0.672177255153656, + "learning_rate": 6.833358458031292e-05, + "loss": 2.4995, + "step": 12092 + }, + { + "epoch": 0.9759502864982649, + "grad_norm": 0.6631280779838562, + "learning_rate": 6.831861050691619e-05, + "loss": 2.4689, + "step": 12093 + }, + { + "epoch": 0.9760309902348479, + "grad_norm": 0.7485793232917786, + "learning_rate": 6.830363722310253e-05, + "loss": 2.5526, + "step": 12094 + }, + { + "epoch": 0.9761116939714308, + "grad_norm": 0.6592193245887756, + "learning_rate": 6.828866472924511e-05, + "loss": 2.4425, + "step": 12095 + }, + { + "epoch": 0.9761923977080139, + "grad_norm": 0.6479860544204712, + "learning_rate": 6.827369302571703e-05, + "loss": 2.4637, + "step": 12096 + }, + { + "epoch": 0.9762731014445969, + "grad_norm": 0.6694966554641724, + "learning_rate": 6.825872211289146e-05, + "loss": 2.5256, + "step": 12097 + }, + { + "epoch": 0.9763538051811799, + "grad_norm": 0.675751805305481, + "learning_rate": 6.82437519911415e-05, + "loss": 2.5021, + "step": 12098 + }, + { + "epoch": 0.9764345089177628, + "grad_norm": 0.7255450487136841, + "learning_rate": 6.822878266084026e-05, + "loss": 2.5275, + "step": 12099 + }, + { + "epoch": 0.9765152126543459, + "grad_norm": 0.7034213542938232, + "learning_rate": 6.821381412236079e-05, + "loss": 2.5432, + "step": 12100 + }, + { + "epoch": 0.9765959163909289, + "grad_norm": 0.6808038949966431, + "learning_rate": 6.819884637607619e-05, + "loss": 2.5044, + "step": 12101 + }, + { + "epoch": 0.9766766201275119, + "grad_norm": 0.6601580381393433, + "learning_rate": 6.818387942235945e-05, + "loss": 2.4602, + "step": 12102 + }, + { + "epoch": 0.9767573238640949, + "grad_norm": 0.7163928151130676, + "learning_rate": 6.816891326158359e-05, + "loss": 2.4785, + "step": 12103 + }, + { + "epoch": 0.976838027600678, + "grad_norm": 0.6616904735565186, + "learning_rate": 6.815394789412164e-05, + "loss": 2.5081, + "step": 12104 + }, + { + "epoch": 0.9769187313372609, + "grad_norm": 0.6476422548294067, + "learning_rate": 6.813898332034657e-05, + "loss": 2.4624, + "step": 12105 + }, + { + "epoch": 0.9769994350738439, + "grad_norm": 0.6468440890312195, + "learning_rate": 6.812401954063131e-05, + "loss": 2.4948, + "step": 12106 + }, + { + "epoch": 0.9770801388104269, + "grad_norm": 0.6988391876220703, + "learning_rate": 6.810905655534878e-05, + "loss": 2.4958, + "step": 12107 + }, + { + "epoch": 0.97716084254701, + "grad_norm": 0.6777953505516052, + "learning_rate": 6.809409436487196e-05, + "loss": 2.5304, + "step": 12108 + }, + { + "epoch": 0.9772415462835929, + "grad_norm": 0.7115550637245178, + "learning_rate": 6.807913296957368e-05, + "loss": 2.5321, + "step": 12109 + }, + { + "epoch": 0.9773222500201759, + "grad_norm": 0.737823486328125, + "learning_rate": 6.806417236982684e-05, + "loss": 2.5121, + "step": 12110 + }, + { + "epoch": 0.9774029537567589, + "grad_norm": 0.6797437071800232, + "learning_rate": 6.804921256600439e-05, + "loss": 2.4783, + "step": 12111 + }, + { + "epoch": 0.977483657493342, + "grad_norm": 0.7240802645683289, + "learning_rate": 6.803425355847897e-05, + "loss": 2.4949, + "step": 12112 + }, + { + "epoch": 0.977564361229925, + "grad_norm": 0.6433781981468201, + "learning_rate": 6.801929534762357e-05, + "loss": 2.4937, + "step": 12113 + }, + { + "epoch": 0.9776450649665079, + "grad_norm": 0.6935293078422546, + "learning_rate": 6.800433793381095e-05, + "loss": 2.5025, + "step": 12114 + }, + { + "epoch": 0.9777257687030909, + "grad_norm": 0.699780285358429, + "learning_rate": 6.798938131741383e-05, + "loss": 2.5231, + "step": 12115 + }, + { + "epoch": 0.977806472439674, + "grad_norm": 0.6414729952812195, + "learning_rate": 6.7974425498805e-05, + "loss": 2.4422, + "step": 12116 + }, + { + "epoch": 0.977887176176257, + "grad_norm": 0.6733608841896057, + "learning_rate": 6.795947047835722e-05, + "loss": 2.4873, + "step": 12117 + }, + { + "epoch": 0.97796787991284, + "grad_norm": 0.6985765099525452, + "learning_rate": 6.794451625644318e-05, + "loss": 2.4994, + "step": 12118 + }, + { + "epoch": 0.9780485836494229, + "grad_norm": 0.6429893374443054, + "learning_rate": 6.792956283343559e-05, + "loss": 2.4968, + "step": 12119 + }, + { + "epoch": 0.978129287386006, + "grad_norm": 0.7129024267196655, + "learning_rate": 6.79146102097071e-05, + "loss": 2.5457, + "step": 12120 + }, + { + "epoch": 0.978209991122589, + "grad_norm": 0.6811943650245667, + "learning_rate": 6.789965838563047e-05, + "loss": 2.5012, + "step": 12121 + }, + { + "epoch": 0.978290694859172, + "grad_norm": 0.7269948720932007, + "learning_rate": 6.788470736157821e-05, + "loss": 2.5124, + "step": 12122 + }, + { + "epoch": 0.978371398595755, + "grad_norm": 0.7396084666252136, + "learning_rate": 6.786975713792299e-05, + "loss": 2.5631, + "step": 12123 + }, + { + "epoch": 0.978452102332338, + "grad_norm": 0.6880094408988953, + "learning_rate": 6.785480771503745e-05, + "loss": 2.5103, + "step": 12124 + }, + { + "epoch": 0.978532806068921, + "grad_norm": 0.737095057964325, + "learning_rate": 6.783985909329409e-05, + "loss": 2.5062, + "step": 12125 + }, + { + "epoch": 0.978613509805504, + "grad_norm": 0.6540948152542114, + "learning_rate": 6.782491127306552e-05, + "loss": 2.5568, + "step": 12126 + }, + { + "epoch": 0.978694213542087, + "grad_norm": 0.669706404209137, + "learning_rate": 6.780996425472427e-05, + "loss": 2.5156, + "step": 12127 + }, + { + "epoch": 0.97877491727867, + "grad_norm": 0.6722843647003174, + "learning_rate": 6.779501803864286e-05, + "loss": 2.4784, + "step": 12128 + }, + { + "epoch": 0.978855621015253, + "grad_norm": 0.6545475125312805, + "learning_rate": 6.778007262519377e-05, + "loss": 2.5159, + "step": 12129 + }, + { + "epoch": 0.978936324751836, + "grad_norm": 0.7010136246681213, + "learning_rate": 6.776512801474953e-05, + "loss": 2.5244, + "step": 12130 + }, + { + "epoch": 0.979017028488419, + "grad_norm": 0.6912714242935181, + "learning_rate": 6.775018420768253e-05, + "loss": 2.5223, + "step": 12131 + }, + { + "epoch": 0.9790977322250021, + "grad_norm": 0.6864827275276184, + "learning_rate": 6.773524120436525e-05, + "loss": 2.5027, + "step": 12132 + }, + { + "epoch": 0.979178435961585, + "grad_norm": 0.7586981058120728, + "learning_rate": 6.77202990051701e-05, + "loss": 2.4554, + "step": 12133 + }, + { + "epoch": 0.979259139698168, + "grad_norm": 0.6487839818000793, + "learning_rate": 6.770535761046948e-05, + "loss": 2.5035, + "step": 12134 + }, + { + "epoch": 0.979339843434751, + "grad_norm": 0.7193071246147156, + "learning_rate": 6.769041702063575e-05, + "loss": 2.4669, + "step": 12135 + }, + { + "epoch": 0.9794205471713341, + "grad_norm": 0.7118960618972778, + "learning_rate": 6.76754772360413e-05, + "loss": 2.493, + "step": 12136 + }, + { + "epoch": 0.9795012509079171, + "grad_norm": 0.6617394685745239, + "learning_rate": 6.766053825705847e-05, + "loss": 2.4771, + "step": 12137 + }, + { + "epoch": 0.9795819546445, + "grad_norm": 0.7664859294891357, + "learning_rate": 6.764560008405953e-05, + "loss": 2.5191, + "step": 12138 + }, + { + "epoch": 0.979662658381083, + "grad_norm": 0.708063542842865, + "learning_rate": 6.763066271741682e-05, + "loss": 2.5521, + "step": 12139 + }, + { + "epoch": 0.979743362117666, + "grad_norm": 0.6951049566268921, + "learning_rate": 6.761572615750267e-05, + "loss": 2.4708, + "step": 12140 + }, + { + "epoch": 0.9798240658542491, + "grad_norm": 0.6914932727813721, + "learning_rate": 6.760079040468921e-05, + "loss": 2.5101, + "step": 12141 + }, + { + "epoch": 0.9799047695908321, + "grad_norm": 0.6843075752258301, + "learning_rate": 6.758585545934876e-05, + "loss": 2.4932, + "step": 12142 + }, + { + "epoch": 0.979985473327415, + "grad_norm": 0.6567733883857727, + "learning_rate": 6.757092132185354e-05, + "loss": 2.4577, + "step": 12143 + }, + { + "epoch": 0.980066177063998, + "grad_norm": 0.6874415874481201, + "learning_rate": 6.75559879925757e-05, + "loss": 2.4818, + "step": 12144 + }, + { + "epoch": 0.9801468808005811, + "grad_norm": 0.7274627685546875, + "learning_rate": 6.754105547188746e-05, + "loss": 2.523, + "step": 12145 + }, + { + "epoch": 0.9802275845371641, + "grad_norm": 0.6991173028945923, + "learning_rate": 6.7526123760161e-05, + "loss": 2.4864, + "step": 12146 + }, + { + "epoch": 0.980308288273747, + "grad_norm": 0.670078456401825, + "learning_rate": 6.75111928577684e-05, + "loss": 2.4889, + "step": 12147 + }, + { + "epoch": 0.98038899201033, + "grad_norm": 0.6653482913970947, + "learning_rate": 6.749626276508178e-05, + "loss": 2.4652, + "step": 12148 + }, + { + "epoch": 0.9804696957469131, + "grad_norm": 0.7329251766204834, + "learning_rate": 6.748133348247326e-05, + "loss": 2.518, + "step": 12149 + }, + { + "epoch": 0.9805503994834961, + "grad_norm": 0.7792871594429016, + "learning_rate": 6.746640501031495e-05, + "loss": 2.5018, + "step": 12150 + }, + { + "epoch": 0.9806311032200791, + "grad_norm": 0.6962797045707703, + "learning_rate": 6.745147734897883e-05, + "loss": 2.4388, + "step": 12151 + }, + { + "epoch": 0.980711806956662, + "grad_norm": 0.6981272101402283, + "learning_rate": 6.7436550498837e-05, + "loss": 2.4886, + "step": 12152 + }, + { + "epoch": 0.9807925106932451, + "grad_norm": 0.6696565747261047, + "learning_rate": 6.742162446026146e-05, + "loss": 2.5258, + "step": 12153 + }, + { + "epoch": 0.9808732144298281, + "grad_norm": 0.6922139525413513, + "learning_rate": 6.740669923362417e-05, + "loss": 2.493, + "step": 12154 + }, + { + "epoch": 0.9809539181664111, + "grad_norm": 0.6745694875717163, + "learning_rate": 6.739177481929715e-05, + "loss": 2.5209, + "step": 12155 + }, + { + "epoch": 0.9810346219029941, + "grad_norm": 0.7023215889930725, + "learning_rate": 6.737685121765238e-05, + "loss": 2.4987, + "step": 12156 + }, + { + "epoch": 0.9811153256395772, + "grad_norm": 0.6337805390357971, + "learning_rate": 6.73619284290617e-05, + "loss": 2.4838, + "step": 12157 + }, + { + "epoch": 0.9811960293761601, + "grad_norm": 0.6747817397117615, + "learning_rate": 6.73470064538971e-05, + "loss": 2.4834, + "step": 12158 + }, + { + "epoch": 0.9812767331127431, + "grad_norm": 0.6714580655097961, + "learning_rate": 6.733208529253047e-05, + "loss": 2.4724, + "step": 12159 + }, + { + "epoch": 0.9813574368493261, + "grad_norm": 0.6927861571311951, + "learning_rate": 6.731716494533364e-05, + "loss": 2.495, + "step": 12160 + }, + { + "epoch": 0.9814381405859092, + "grad_norm": 0.6576036214828491, + "learning_rate": 6.73022454126785e-05, + "loss": 2.5415, + "step": 12161 + }, + { + "epoch": 0.9815188443224921, + "grad_norm": 0.6495294570922852, + "learning_rate": 6.728732669493691e-05, + "loss": 2.4889, + "step": 12162 + }, + { + "epoch": 0.9815995480590751, + "grad_norm": 0.6680364012718201, + "learning_rate": 6.72724087924806e-05, + "loss": 2.4733, + "step": 12163 + }, + { + "epoch": 0.9816802517956581, + "grad_norm": 0.6816582083702087, + "learning_rate": 6.725749170568143e-05, + "loss": 2.4688, + "step": 12164 + }, + { + "epoch": 0.9817609555322412, + "grad_norm": 0.6995956897735596, + "learning_rate": 6.724257543491116e-05, + "loss": 2.4962, + "step": 12165 + }, + { + "epoch": 0.9818416592688242, + "grad_norm": 0.6728340983390808, + "learning_rate": 6.722765998054157e-05, + "loss": 2.5218, + "step": 12166 + }, + { + "epoch": 0.9819223630054071, + "grad_norm": 0.6835319995880127, + "learning_rate": 6.721274534294433e-05, + "loss": 2.4845, + "step": 12167 + }, + { + "epoch": 0.9820030667419901, + "grad_norm": 0.6969910264015198, + "learning_rate": 6.719783152249119e-05, + "loss": 2.4983, + "step": 12168 + }, + { + "epoch": 0.9820837704785732, + "grad_norm": 0.7327036261558533, + "learning_rate": 6.718291851955383e-05, + "loss": 2.5893, + "step": 12169 + }, + { + "epoch": 0.9821644742151562, + "grad_norm": 0.7092839479446411, + "learning_rate": 6.716800633450393e-05, + "loss": 2.5104, + "step": 12170 + }, + { + "epoch": 0.9822451779517392, + "grad_norm": 0.7384308576583862, + "learning_rate": 6.715309496771311e-05, + "loss": 2.5066, + "step": 12171 + }, + { + "epoch": 0.9823258816883221, + "grad_norm": 0.6744845509529114, + "learning_rate": 6.713818441955308e-05, + "loss": 2.469, + "step": 12172 + }, + { + "epoch": 0.9824065854249052, + "grad_norm": 0.6497980952262878, + "learning_rate": 6.712327469039536e-05, + "loss": 2.4943, + "step": 12173 + }, + { + "epoch": 0.9824872891614882, + "grad_norm": 0.6550357937812805, + "learning_rate": 6.710836578061156e-05, + "loss": 2.5019, + "step": 12174 + }, + { + "epoch": 0.9825679928980712, + "grad_norm": 0.6813549995422363, + "learning_rate": 6.709345769057331e-05, + "loss": 2.4314, + "step": 12175 + }, + { + "epoch": 0.9826486966346542, + "grad_norm": 0.6636531352996826, + "learning_rate": 6.707855042065209e-05, + "loss": 2.5202, + "step": 12176 + }, + { + "epoch": 0.9827294003712372, + "grad_norm": 0.6684894561767578, + "learning_rate": 6.706364397121944e-05, + "loss": 2.4353, + "step": 12177 + }, + { + "epoch": 0.9828101041078202, + "grad_norm": 0.6813677549362183, + "learning_rate": 6.704873834264688e-05, + "loss": 2.4254, + "step": 12178 + }, + { + "epoch": 0.9828908078444032, + "grad_norm": 0.6584975719451904, + "learning_rate": 6.70338335353059e-05, + "loss": 2.5647, + "step": 12179 + }, + { + "epoch": 0.9829715115809862, + "grad_norm": 0.6959114074707031, + "learning_rate": 6.701892954956796e-05, + "loss": 2.5203, + "step": 12180 + }, + { + "epoch": 0.9830522153175693, + "grad_norm": 0.6399044990539551, + "learning_rate": 6.700402638580452e-05, + "loss": 2.4697, + "step": 12181 + }, + { + "epoch": 0.9831329190541522, + "grad_norm": 0.6838750839233398, + "learning_rate": 6.698912404438702e-05, + "loss": 2.5261, + "step": 12182 + }, + { + "epoch": 0.9832136227907352, + "grad_norm": 0.6286367177963257, + "learning_rate": 6.697422252568679e-05, + "loss": 2.4264, + "step": 12183 + }, + { + "epoch": 0.9832943265273182, + "grad_norm": 0.901637852191925, + "learning_rate": 6.695932183007528e-05, + "loss": 2.4908, + "step": 12184 + }, + { + "epoch": 0.9833750302639013, + "grad_norm": 0.8361458778381348, + "learning_rate": 6.694442195792386e-05, + "loss": 2.5183, + "step": 12185 + }, + { + "epoch": 0.9834557340004842, + "grad_norm": 0.7033401727676392, + "learning_rate": 6.692952290960384e-05, + "loss": 2.5702, + "step": 12186 + }, + { + "epoch": 0.9835364377370672, + "grad_norm": 0.669486939907074, + "learning_rate": 6.691462468548653e-05, + "loss": 2.5143, + "step": 12187 + }, + { + "epoch": 0.9836171414736502, + "grad_norm": 0.7043797969818115, + "learning_rate": 6.689972728594329e-05, + "loss": 2.5638, + "step": 12188 + }, + { + "epoch": 0.9836978452102332, + "grad_norm": 0.6532511115074158, + "learning_rate": 6.688483071134537e-05, + "loss": 2.5227, + "step": 12189 + }, + { + "epoch": 0.9837785489468163, + "grad_norm": 0.7363922595977783, + "learning_rate": 6.6869934962064e-05, + "loss": 2.4953, + "step": 12190 + }, + { + "epoch": 0.9838592526833992, + "grad_norm": 0.6746651530265808, + "learning_rate": 6.685504003847051e-05, + "loss": 2.5021, + "step": 12191 + }, + { + "epoch": 0.9839399564199822, + "grad_norm": 0.665459930896759, + "learning_rate": 6.684014594093604e-05, + "loss": 2.5126, + "step": 12192 + }, + { + "epoch": 0.9840206601565652, + "grad_norm": 0.6618975400924683, + "learning_rate": 6.682525266983179e-05, + "loss": 2.5046, + "step": 12193 + }, + { + "epoch": 0.9841013638931483, + "grad_norm": 0.6536173224449158, + "learning_rate": 6.6810360225529e-05, + "loss": 2.4222, + "step": 12194 + }, + { + "epoch": 0.9841820676297313, + "grad_norm": 0.6882187724113464, + "learning_rate": 6.679546860839876e-05, + "loss": 2.475, + "step": 12195 + }, + { + "epoch": 0.9842627713663142, + "grad_norm": 0.6941187977790833, + "learning_rate": 6.678057781881224e-05, + "loss": 2.5642, + "step": 12196 + }, + { + "epoch": 0.9843434751028972, + "grad_norm": 0.7057064175605774, + "learning_rate": 6.676568785714057e-05, + "loss": 2.4817, + "step": 12197 + }, + { + "epoch": 0.9844241788394803, + "grad_norm": 0.6455948352813721, + "learning_rate": 6.675079872375487e-05, + "loss": 2.5206, + "step": 12198 + }, + { + "epoch": 0.9845048825760633, + "grad_norm": 0.6559014320373535, + "learning_rate": 6.673591041902613e-05, + "loss": 2.4082, + "step": 12199 + }, + { + "epoch": 0.9845855863126463, + "grad_norm": 0.6732046008110046, + "learning_rate": 6.672102294332542e-05, + "loss": 2.5472, + "step": 12200 + }, + { + "epoch": 0.9846662900492292, + "grad_norm": 0.7074914574623108, + "learning_rate": 6.670613629702391e-05, + "loss": 2.5243, + "step": 12201 + }, + { + "epoch": 0.9847469937858123, + "grad_norm": 0.6780694127082825, + "learning_rate": 6.669125048049246e-05, + "loss": 2.494, + "step": 12202 + }, + { + "epoch": 0.9848276975223953, + "grad_norm": 0.6361132264137268, + "learning_rate": 6.66763654941021e-05, + "loss": 2.4764, + "step": 12203 + }, + { + "epoch": 0.9849084012589783, + "grad_norm": 0.752727210521698, + "learning_rate": 6.666148133822387e-05, + "loss": 2.4942, + "step": 12204 + }, + { + "epoch": 0.9849891049955612, + "grad_norm": 0.7282724976539612, + "learning_rate": 6.664659801322863e-05, + "loss": 2.471, + "step": 12205 + }, + { + "epoch": 0.9850698087321443, + "grad_norm": 0.6977601051330566, + "learning_rate": 6.663171551948736e-05, + "loss": 2.4695, + "step": 12206 + }, + { + "epoch": 0.9851505124687273, + "grad_norm": 0.6957824230194092, + "learning_rate": 6.661683385737101e-05, + "loss": 2.5096, + "step": 12207 + }, + { + "epoch": 0.9852312162053103, + "grad_norm": 0.6197221279144287, + "learning_rate": 6.660195302725037e-05, + "loss": 2.4199, + "step": 12208 + }, + { + "epoch": 0.9853119199418933, + "grad_norm": 0.747558057308197, + "learning_rate": 6.658707302949638e-05, + "loss": 2.5988, + "step": 12209 + }, + { + "epoch": 0.9853926236784764, + "grad_norm": 0.6593184471130371, + "learning_rate": 6.657219386447989e-05, + "loss": 2.4837, + "step": 12210 + }, + { + "epoch": 0.9854733274150593, + "grad_norm": 0.6795992255210876, + "learning_rate": 6.655731553257169e-05, + "loss": 2.498, + "step": 12211 + }, + { + "epoch": 0.9855540311516423, + "grad_norm": 0.7588422298431396, + "learning_rate": 6.65424380341426e-05, + "loss": 2.444, + "step": 12212 + }, + { + "epoch": 0.9856347348882253, + "grad_norm": 0.7791433930397034, + "learning_rate": 6.652756136956342e-05, + "loss": 2.4893, + "step": 12213 + }, + { + "epoch": 0.9857154386248084, + "grad_norm": 0.6320767998695374, + "learning_rate": 6.651268553920493e-05, + "loss": 2.4831, + "step": 12214 + }, + { + "epoch": 0.9857961423613913, + "grad_norm": 0.6818140745162964, + "learning_rate": 6.649781054343783e-05, + "loss": 2.4316, + "step": 12215 + }, + { + "epoch": 0.9858768460979743, + "grad_norm": 0.7460113763809204, + "learning_rate": 6.648293638263285e-05, + "loss": 2.5335, + "step": 12216 + }, + { + "epoch": 0.9859575498345573, + "grad_norm": 0.714074432849884, + "learning_rate": 6.646806305716079e-05, + "loss": 2.4573, + "step": 12217 + }, + { + "epoch": 0.9860382535711404, + "grad_norm": 0.6815951466560364, + "learning_rate": 6.645319056739217e-05, + "loss": 2.4758, + "step": 12218 + }, + { + "epoch": 0.9861189573077234, + "grad_norm": 0.6842799782752991, + "learning_rate": 6.643831891369775e-05, + "loss": 2.4998, + "step": 12219 + }, + { + "epoch": 0.9861996610443063, + "grad_norm": 0.6725212335586548, + "learning_rate": 6.642344809644818e-05, + "loss": 2.5179, + "step": 12220 + }, + { + "epoch": 0.9862803647808893, + "grad_norm": 0.7859417796134949, + "learning_rate": 6.640857811601402e-05, + "loss": 2.5801, + "step": 12221 + }, + { + "epoch": 0.9863610685174724, + "grad_norm": 0.6438577771186829, + "learning_rate": 6.639370897276591e-05, + "loss": 2.4659, + "step": 12222 + }, + { + "epoch": 0.9864417722540554, + "grad_norm": 0.7036609053611755, + "learning_rate": 6.637884066707447e-05, + "loss": 2.5637, + "step": 12223 + }, + { + "epoch": 0.9865224759906384, + "grad_norm": 0.6756969094276428, + "learning_rate": 6.636397319931016e-05, + "loss": 2.5381, + "step": 12224 + }, + { + "epoch": 0.9866031797272213, + "grad_norm": 0.6907589435577393, + "learning_rate": 6.634910656984354e-05, + "loss": 2.4927, + "step": 12225 + }, + { + "epoch": 0.9866838834638044, + "grad_norm": 0.7347010374069214, + "learning_rate": 6.63342407790452e-05, + "loss": 2.5131, + "step": 12226 + }, + { + "epoch": 0.9867645872003874, + "grad_norm": 0.6835876107215881, + "learning_rate": 6.631937582728555e-05, + "loss": 2.4611, + "step": 12227 + }, + { + "epoch": 0.9868452909369704, + "grad_norm": 0.8199172616004944, + "learning_rate": 6.630451171493511e-05, + "loss": 2.5341, + "step": 12228 + }, + { + "epoch": 0.9869259946735534, + "grad_norm": 0.7537188529968262, + "learning_rate": 6.62896484423643e-05, + "loss": 2.5218, + "step": 12229 + }, + { + "epoch": 0.9870066984101364, + "grad_norm": 0.7254310250282288, + "learning_rate": 6.62747860099436e-05, + "loss": 2.4766, + "step": 12230 + }, + { + "epoch": 0.9870874021467194, + "grad_norm": 0.6852995157241821, + "learning_rate": 6.625992441804338e-05, + "loss": 2.548, + "step": 12231 + }, + { + "epoch": 0.9871681058833024, + "grad_norm": 0.7089388966560364, + "learning_rate": 6.624506366703402e-05, + "loss": 2.5125, + "step": 12232 + }, + { + "epoch": 0.9872488096198854, + "grad_norm": 0.7114216685295105, + "learning_rate": 6.623020375728597e-05, + "loss": 2.5408, + "step": 12233 + }, + { + "epoch": 0.9873295133564685, + "grad_norm": 0.7891978025436401, + "learning_rate": 6.621534468916946e-05, + "loss": 2.5946, + "step": 12234 + }, + { + "epoch": 0.9874102170930514, + "grad_norm": 0.671399712562561, + "learning_rate": 6.620048646305488e-05, + "loss": 2.4732, + "step": 12235 + }, + { + "epoch": 0.9874909208296344, + "grad_norm": 0.6712855696678162, + "learning_rate": 6.618562907931256e-05, + "loss": 2.4376, + "step": 12236 + }, + { + "epoch": 0.9875716245662174, + "grad_norm": 0.7183727025985718, + "learning_rate": 6.617077253831272e-05, + "loss": 2.5406, + "step": 12237 + }, + { + "epoch": 0.9876523283028005, + "grad_norm": 0.6857761144638062, + "learning_rate": 6.615591684042568e-05, + "loss": 2.5279, + "step": 12238 + }, + { + "epoch": 0.9877330320393835, + "grad_norm": 0.7268103957176208, + "learning_rate": 6.614106198602165e-05, + "loss": 2.5283, + "step": 12239 + }, + { + "epoch": 0.9878137357759664, + "grad_norm": 0.6703717708587646, + "learning_rate": 6.612620797547087e-05, + "loss": 2.4254, + "step": 12240 + }, + { + "epoch": 0.9878944395125494, + "grad_norm": 0.7110719680786133, + "learning_rate": 6.611135480914352e-05, + "loss": 2.496, + "step": 12241 + }, + { + "epoch": 0.9879751432491324, + "grad_norm": 0.7268263697624207, + "learning_rate": 6.609650248740983e-05, + "loss": 2.5489, + "step": 12242 + }, + { + "epoch": 0.9880558469857155, + "grad_norm": 0.7413432598114014, + "learning_rate": 6.60816510106399e-05, + "loss": 2.4998, + "step": 12243 + }, + { + "epoch": 0.9881365507222984, + "grad_norm": 0.7443360090255737, + "learning_rate": 6.606680037920389e-05, + "loss": 2.5282, + "step": 12244 + }, + { + "epoch": 0.9882172544588814, + "grad_norm": 0.7787832021713257, + "learning_rate": 6.605195059347191e-05, + "loss": 2.5221, + "step": 12245 + }, + { + "epoch": 0.9882979581954644, + "grad_norm": 0.6921473741531372, + "learning_rate": 6.603710165381409e-05, + "loss": 2.5434, + "step": 12246 + }, + { + "epoch": 0.9883786619320475, + "grad_norm": 0.737328827381134, + "learning_rate": 6.602225356060044e-05, + "loss": 2.5222, + "step": 12247 + }, + { + "epoch": 0.9884593656686305, + "grad_norm": 0.698823094367981, + "learning_rate": 6.600740631420106e-05, + "loss": 2.528, + "step": 12248 + }, + { + "epoch": 0.9885400694052134, + "grad_norm": 0.6735067963600159, + "learning_rate": 6.599255991498601e-05, + "loss": 2.4942, + "step": 12249 + }, + { + "epoch": 0.9886207731417964, + "grad_norm": 0.659622311592102, + "learning_rate": 6.59777143633252e-05, + "loss": 2.4822, + "step": 12250 + }, + { + "epoch": 0.9887014768783795, + "grad_norm": 0.6973726153373718, + "learning_rate": 6.596286965958872e-05, + "loss": 2.5499, + "step": 12251 + }, + { + "epoch": 0.9887821806149625, + "grad_norm": 0.6771909594535828, + "learning_rate": 6.594802580414651e-05, + "loss": 2.4968, + "step": 12252 + }, + { + "epoch": 0.9888628843515455, + "grad_norm": 0.68080073595047, + "learning_rate": 6.593318279736849e-05, + "loss": 2.5142, + "step": 12253 + }, + { + "epoch": 0.9889435880881284, + "grad_norm": NaN, + "learning_rate": 6.593318279736849e-05, + "loss": 2.466, + "step": 12254 + }, + { + "epoch": 0.9890242918247115, + "grad_norm": 0.6865221858024597, + "learning_rate": 6.591834063962461e-05, + "loss": 2.4894, + "step": 12255 + }, + { + "epoch": 0.9891049955612945, + "grad_norm": 0.7050445079803467, + "learning_rate": 6.590349933128478e-05, + "loss": 2.5733, + "step": 12256 + }, + { + "epoch": 0.9891856992978775, + "grad_norm": 0.6971526741981506, + "learning_rate": 6.588865887271887e-05, + "loss": 2.4997, + "step": 12257 + }, + { + "epoch": 0.9892664030344605, + "grad_norm": 0.6465088725090027, + "learning_rate": 6.587381926429674e-05, + "loss": 2.5155, + "step": 12258 + }, + { + "epoch": 0.9893471067710435, + "grad_norm": 0.6521422266960144, + "learning_rate": 6.585898050638823e-05, + "loss": 2.4803, + "step": 12259 + }, + { + "epoch": 0.9894278105076265, + "grad_norm": 0.6798849105834961, + "learning_rate": 6.584414259936324e-05, + "loss": 2.5301, + "step": 12260 + }, + { + "epoch": 0.9895085142442095, + "grad_norm": 0.6903446912765503, + "learning_rate": 6.582930554359144e-05, + "loss": 2.4662, + "step": 12261 + }, + { + "epoch": 0.9895892179807925, + "grad_norm": 0.7183516621589661, + "learning_rate": 6.581446933944267e-05, + "loss": 2.4711, + "step": 12262 + }, + { + "epoch": 0.9896699217173756, + "grad_norm": 0.702738344669342, + "learning_rate": 6.579963398728671e-05, + "loss": 2.531, + "step": 12263 + }, + { + "epoch": 0.9897506254539585, + "grad_norm": 0.7187048196792603, + "learning_rate": 6.578479948749325e-05, + "loss": 2.4933, + "step": 12264 + }, + { + "epoch": 0.9898313291905415, + "grad_norm": 0.6988784670829773, + "learning_rate": 6.576996584043202e-05, + "loss": 2.5179, + "step": 12265 + }, + { + "epoch": 0.9899120329271245, + "grad_norm": 0.7434641122817993, + "learning_rate": 6.575513304647276e-05, + "loss": 2.5157, + "step": 12266 + }, + { + "epoch": 0.9899927366637076, + "grad_norm": 0.667881429195404, + "learning_rate": 6.574030110598505e-05, + "loss": 2.5152, + "step": 12267 + }, + { + "epoch": 0.9900734404002905, + "grad_norm": 0.6766676902770996, + "learning_rate": 6.572547001933862e-05, + "loss": 2.5041, + "step": 12268 + }, + { + "epoch": 0.9901541441368735, + "grad_norm": 0.6531797051429749, + "learning_rate": 6.571063978690311e-05, + "loss": 2.5457, + "step": 12269 + }, + { + "epoch": 0.9902348478734565, + "grad_norm": 0.6557255983352661, + "learning_rate": 6.569581040904804e-05, + "loss": 2.5253, + "step": 12270 + }, + { + "epoch": 0.9903155516100396, + "grad_norm": 0.6818893551826477, + "learning_rate": 6.568098188614304e-05, + "loss": 2.5031, + "step": 12271 + }, + { + "epoch": 0.9903962553466226, + "grad_norm": 0.6644853949546814, + "learning_rate": 6.56661542185577e-05, + "loss": 2.5285, + "step": 12272 + }, + { + "epoch": 0.9904769590832055, + "grad_norm": 0.6035603284835815, + "learning_rate": 6.565132740666155e-05, + "loss": 2.46, + "step": 12273 + }, + { + "epoch": 0.9905576628197885, + "grad_norm": 0.7061343193054199, + "learning_rate": 6.56365014508241e-05, + "loss": 2.4731, + "step": 12274 + }, + { + "epoch": 0.9906383665563716, + "grad_norm": 0.6981248259544373, + "learning_rate": 6.562167635141486e-05, + "loss": 2.4518, + "step": 12275 + }, + { + "epoch": 0.9907190702929546, + "grad_norm": 0.6718073487281799, + "learning_rate": 6.560685210880334e-05, + "loss": 2.4919, + "step": 12276 + }, + { + "epoch": 0.9907997740295376, + "grad_norm": 0.7095392942428589, + "learning_rate": 6.559202872335893e-05, + "loss": 2.5284, + "step": 12277 + }, + { + "epoch": 0.9908804777661205, + "grad_norm": 0.7052092552185059, + "learning_rate": 6.557720619545111e-05, + "loss": 2.4781, + "step": 12278 + }, + { + "epoch": 0.9909611815027036, + "grad_norm": 0.653570830821991, + "learning_rate": 6.556238452544934e-05, + "loss": 2.5293, + "step": 12279 + }, + { + "epoch": 0.9910418852392866, + "grad_norm": 0.6705330610275269, + "learning_rate": 6.554756371372293e-05, + "loss": 2.4437, + "step": 12280 + }, + { + "epoch": 0.9911225889758696, + "grad_norm": 0.6494189500808716, + "learning_rate": 6.553274376064127e-05, + "loss": 2.4833, + "step": 12281 + }, + { + "epoch": 0.9912032927124526, + "grad_norm": 0.6497724652290344, + "learning_rate": 6.551792466657378e-05, + "loss": 2.4803, + "step": 12282 + }, + { + "epoch": 0.9912839964490356, + "grad_norm": 0.7740494608879089, + "learning_rate": 6.550310643188972e-05, + "loss": 2.4907, + "step": 12283 + }, + { + "epoch": 0.9913647001856186, + "grad_norm": 0.699562668800354, + "learning_rate": 6.548828905695843e-05, + "loss": 2.4576, + "step": 12284 + }, + { + "epoch": 0.9914454039222016, + "grad_norm": 0.8123162984848022, + "learning_rate": 6.547347254214921e-05, + "loss": 2.5118, + "step": 12285 + }, + { + "epoch": 0.9915261076587846, + "grad_norm": 0.7227715253829956, + "learning_rate": 6.545865688783129e-05, + "loss": 2.4688, + "step": 12286 + }, + { + "epoch": 0.9916068113953677, + "grad_norm": 0.6498493552207947, + "learning_rate": 6.544384209437392e-05, + "loss": 2.477, + "step": 12287 + }, + { + "epoch": 0.9916875151319506, + "grad_norm": 0.6427823901176453, + "learning_rate": 6.542902816214636e-05, + "loss": 2.4388, + "step": 12288 + }, + { + "epoch": 0.9917682188685336, + "grad_norm": 0.6803679466247559, + "learning_rate": 6.541421509151778e-05, + "loss": 2.5095, + "step": 12289 + }, + { + "epoch": 0.9918489226051166, + "grad_norm": 0.7025790810585022, + "learning_rate": 6.539940288285734e-05, + "loss": 2.4881, + "step": 12290 + }, + { + "epoch": 0.9919296263416996, + "grad_norm": 0.6899270415306091, + "learning_rate": 6.538459153653424e-05, + "loss": 2.486, + "step": 12291 + }, + { + "epoch": 0.9920103300782827, + "grad_norm": 0.7379609942436218, + "learning_rate": 6.536978105291762e-05, + "loss": 2.5368, + "step": 12292 + }, + { + "epoch": 0.9920910338148656, + "grad_norm": 0.7279202342033386, + "learning_rate": 6.535497143237657e-05, + "loss": 2.5275, + "step": 12293 + }, + { + "epoch": 0.9921717375514486, + "grad_norm": 0.6810527443885803, + "learning_rate": 6.53401626752802e-05, + "loss": 2.5053, + "step": 12294 + }, + { + "epoch": 0.9922524412880316, + "grad_norm": 0.6578424572944641, + "learning_rate": 6.532535478199759e-05, + "loss": 2.5334, + "step": 12295 + }, + { + "epoch": 0.9923331450246147, + "grad_norm": 0.6819284558296204, + "learning_rate": 6.531054775289778e-05, + "loss": 2.4879, + "step": 12296 + }, + { + "epoch": 0.9924138487611976, + "grad_norm": 0.6524500846862793, + "learning_rate": 6.529574158834977e-05, + "loss": 2.5349, + "step": 12297 + }, + { + "epoch": 0.9924945524977806, + "grad_norm": 0.6853352785110474, + "learning_rate": 6.528093628872263e-05, + "loss": 2.4217, + "step": 12298 + }, + { + "epoch": 0.9925752562343636, + "grad_norm": 0.6731893420219421, + "learning_rate": 6.526613185438529e-05, + "loss": 2.4739, + "step": 12299 + }, + { + "epoch": 0.9926559599709467, + "grad_norm": 0.6515606641769409, + "learning_rate": 6.525132828570673e-05, + "loss": 2.5348, + "step": 12300 + }, + { + "epoch": 0.9927366637075297, + "grad_norm": 0.6819963455200195, + "learning_rate": 6.523652558305596e-05, + "loss": 2.5052, + "step": 12301 + }, + { + "epoch": 0.9928173674441126, + "grad_norm": 0.6521475911140442, + "learning_rate": 6.522172374680177e-05, + "loss": 2.5283, + "step": 12302 + }, + { + "epoch": 0.9928980711806956, + "grad_norm": 0.6488186717033386, + "learning_rate": 6.520692277731315e-05, + "loss": 2.4779, + "step": 12303 + }, + { + "epoch": 0.9929787749172787, + "grad_norm": 0.6509760022163391, + "learning_rate": 6.519212267495903e-05, + "loss": 2.5426, + "step": 12304 + }, + { + "epoch": 0.9930594786538617, + "grad_norm": 0.621366560459137, + "learning_rate": 6.517732344010814e-05, + "loss": 2.4804, + "step": 12305 + }, + { + "epoch": 0.9931401823904447, + "grad_norm": 0.6907268166542053, + "learning_rate": 6.516252507312938e-05, + "loss": 2.4883, + "step": 12306 + }, + { + "epoch": 0.9932208861270276, + "grad_norm": 0.7739343643188477, + "learning_rate": 6.514772757439157e-05, + "loss": 2.481, + "step": 12307 + }, + { + "epoch": 0.9933015898636107, + "grad_norm": 0.6794601082801819, + "learning_rate": 6.513293094426352e-05, + "loss": 2.5244, + "step": 12308 + }, + { + "epoch": 0.9933822936001937, + "grad_norm": 0.7189902663230896, + "learning_rate": 6.511813518311394e-05, + "loss": 2.5221, + "step": 12309 + }, + { + "epoch": 0.9934629973367767, + "grad_norm": 0.733318030834198, + "learning_rate": 6.510334029131163e-05, + "loss": 2.521, + "step": 12310 + }, + { + "epoch": 0.9935437010733597, + "grad_norm": 0.7584299445152283, + "learning_rate": 6.508854626922531e-05, + "loss": 2.4962, + "step": 12311 + }, + { + "epoch": 0.9936244048099427, + "grad_norm": 0.6442410349845886, + "learning_rate": 6.507375311722366e-05, + "loss": 2.4775, + "step": 12312 + }, + { + "epoch": 0.9937051085465257, + "grad_norm": 0.6609243154525757, + "learning_rate": 6.505896083567536e-05, + "loss": 2.4706, + "step": 12313 + }, + { + "epoch": 0.9937858122831087, + "grad_norm": 0.6527631878852844, + "learning_rate": 6.504416942494914e-05, + "loss": 2.4612, + "step": 12314 + }, + { + "epoch": 0.9938665160196917, + "grad_norm": 0.6798218488693237, + "learning_rate": 6.502937888541357e-05, + "loss": 2.5502, + "step": 12315 + }, + { + "epoch": 0.9939472197562748, + "grad_norm": 0.6573790907859802, + "learning_rate": 6.501458921743728e-05, + "loss": 2.5598, + "step": 12316 + }, + { + "epoch": 0.9940279234928577, + "grad_norm": 0.6945913434028625, + "learning_rate": 6.49998004213889e-05, + "loss": 2.5323, + "step": 12317 + }, + { + "epoch": 0.9941086272294407, + "grad_norm": 0.7609078288078308, + "learning_rate": 6.498501249763697e-05, + "loss": 2.5211, + "step": 12318 + }, + { + "epoch": 0.9941893309660237, + "grad_norm": 0.6878666281700134, + "learning_rate": 6.497022544655006e-05, + "loss": 2.5366, + "step": 12319 + }, + { + "epoch": 0.9942700347026068, + "grad_norm": 0.6675810813903809, + "learning_rate": 6.495543926849674e-05, + "loss": 2.512, + "step": 12320 + }, + { + "epoch": 0.9943507384391898, + "grad_norm": 0.7285950779914856, + "learning_rate": 6.494065396384544e-05, + "loss": 2.4741, + "step": 12321 + }, + { + "epoch": 0.9944314421757727, + "grad_norm": 0.6287158131599426, + "learning_rate": 6.49258695329647e-05, + "loss": 2.4824, + "step": 12322 + }, + { + "epoch": 0.9945121459123557, + "grad_norm": 0.6506727337837219, + "learning_rate": 6.491108597622296e-05, + "loss": 2.5126, + "step": 12323 + }, + { + "epoch": 0.9945928496489388, + "grad_norm": 0.7679052352905273, + "learning_rate": 6.489630329398869e-05, + "loss": 2.5503, + "step": 12324 + }, + { + "epoch": 0.9946735533855218, + "grad_norm": 0.637184202671051, + "learning_rate": 6.488152148663029e-05, + "loss": 2.5098, + "step": 12325 + }, + { + "epoch": 0.9947542571221047, + "grad_norm": 0.6747186779975891, + "learning_rate": 6.486674055451619e-05, + "loss": 2.5154, + "step": 12326 + }, + { + "epoch": 0.9948349608586877, + "grad_norm": 0.7288245558738708, + "learning_rate": 6.485196049801476e-05, + "loss": 2.5077, + "step": 12327 + }, + { + "epoch": 0.9949156645952708, + "grad_norm": 0.6914251446723938, + "learning_rate": 6.483718131749435e-05, + "loss": 2.4877, + "step": 12328 + }, + { + "epoch": 0.9949963683318538, + "grad_norm": 0.7224392294883728, + "learning_rate": 6.48224030133233e-05, + "loss": 2.4862, + "step": 12329 + }, + { + "epoch": 0.9950770720684368, + "grad_norm": 0.7365561723709106, + "learning_rate": 6.480762558586995e-05, + "loss": 2.477, + "step": 12330 + }, + { + "epoch": 0.9951577758050197, + "grad_norm": 0.7673236131668091, + "learning_rate": 6.47928490355025e-05, + "loss": 2.5423, + "step": 12331 + }, + { + "epoch": 0.9952384795416028, + "grad_norm": 0.6638002395629883, + "learning_rate": 6.477807336258931e-05, + "loss": 2.5007, + "step": 12332 + }, + { + "epoch": 0.9953191832781858, + "grad_norm": 0.6415974497795105, + "learning_rate": 6.476329856749864e-05, + "loss": 2.4924, + "step": 12333 + }, + { + "epoch": 0.9953998870147688, + "grad_norm": 0.7129398584365845, + "learning_rate": 6.474852465059864e-05, + "loss": 2.5313, + "step": 12334 + }, + { + "epoch": 0.9954805907513518, + "grad_norm": 0.6896344423294067, + "learning_rate": 6.473375161225756e-05, + "loss": 2.5073, + "step": 12335 + }, + { + "epoch": 0.9955612944879348, + "grad_norm": 0.7009317874908447, + "learning_rate": 6.47189794528436e-05, + "loss": 2.574, + "step": 12336 + }, + { + "epoch": 0.9956419982245178, + "grad_norm": 0.6555172801017761, + "learning_rate": 6.470420817272488e-05, + "loss": 2.4769, + "step": 12337 + }, + { + "epoch": 0.9957227019611008, + "grad_norm": 0.7569532990455627, + "learning_rate": 6.468943777226954e-05, + "loss": 2.4691, + "step": 12338 + }, + { + "epoch": 0.9958034056976838, + "grad_norm": 0.68092280626297, + "learning_rate": 6.467466825184569e-05, + "loss": 2.4793, + "step": 12339 + }, + { + "epoch": 0.9958841094342669, + "grad_norm": 0.6977378726005554, + "learning_rate": 6.465989961182152e-05, + "loss": 2.4678, + "step": 12340 + }, + { + "epoch": 0.9959648131708498, + "grad_norm": 0.6702281832695007, + "learning_rate": 6.4645131852565e-05, + "loss": 2.5398, + "step": 12341 + }, + { + "epoch": 0.9960455169074328, + "grad_norm": 0.7584038972854614, + "learning_rate": 6.46303649744442e-05, + "loss": 2.5355, + "step": 12342 + }, + { + "epoch": 0.9961262206440158, + "grad_norm": 0.6779505610466003, + "learning_rate": 6.461559897782718e-05, + "loss": 2.4828, + "step": 12343 + }, + { + "epoch": 0.9962069243805988, + "grad_norm": 0.6968233585357666, + "learning_rate": 6.460083386308192e-05, + "loss": 2.5108, + "step": 12344 + }, + { + "epoch": 0.9962876281171819, + "grad_norm": 0.7114594578742981, + "learning_rate": 6.45860696305764e-05, + "loss": 2.5236, + "step": 12345 + }, + { + "epoch": 0.9963683318537648, + "grad_norm": 0.6850530505180359, + "learning_rate": 6.457130628067865e-05, + "loss": 2.458, + "step": 12346 + }, + { + "epoch": 0.9964490355903478, + "grad_norm": 0.7135400772094727, + "learning_rate": 6.455654381375651e-05, + "loss": 2.539, + "step": 12347 + }, + { + "epoch": 0.9965297393269308, + "grad_norm": 0.6736366748809814, + "learning_rate": 6.454178223017797e-05, + "loss": 2.4721, + "step": 12348 + }, + { + "epoch": 0.9966104430635139, + "grad_norm": 0.6806206107139587, + "learning_rate": 6.45270215303109e-05, + "loss": 2.5035, + "step": 12349 + }, + { + "epoch": 0.9966911468000968, + "grad_norm": 0.7120711803436279, + "learning_rate": 6.451226171452318e-05, + "loss": 2.5344, + "step": 12350 + }, + { + "epoch": 0.9967718505366798, + "grad_norm": 0.6865986585617065, + "learning_rate": 6.449750278318264e-05, + "loss": 2.4807, + "step": 12351 + }, + { + "epoch": 0.9968525542732628, + "grad_norm": 0.6461294889450073, + "learning_rate": 6.448274473665717e-05, + "loss": 2.4878, + "step": 12352 + }, + { + "epoch": 0.9969332580098459, + "grad_norm": 0.7090638279914856, + "learning_rate": 6.446798757531454e-05, + "loss": 2.4599, + "step": 12353 + }, + { + "epoch": 0.9970139617464289, + "grad_norm": 0.6933324337005615, + "learning_rate": 6.445323129952252e-05, + "loss": 2.5398, + "step": 12354 + }, + { + "epoch": 0.9970946654830118, + "grad_norm": 0.7018197774887085, + "learning_rate": 6.443847590964888e-05, + "loss": 2.5159, + "step": 12355 + }, + { + "epoch": 0.9971753692195948, + "grad_norm": 0.7292604446411133, + "learning_rate": 6.442372140606145e-05, + "loss": 2.4934, + "step": 12356 + }, + { + "epoch": 0.9972560729561779, + "grad_norm": 0.6686378121376038, + "learning_rate": 6.440896778912783e-05, + "loss": 2.5076, + "step": 12357 + }, + { + "epoch": 0.9973367766927609, + "grad_norm": 0.7194764018058777, + "learning_rate": 6.439421505921576e-05, + "loss": 2.4958, + "step": 12358 + }, + { + "epoch": 0.9974174804293439, + "grad_norm": 0.662467360496521, + "learning_rate": 6.437946321669296e-05, + "loss": 2.5202, + "step": 12359 + }, + { + "epoch": 0.9974981841659268, + "grad_norm": 0.7222515940666199, + "learning_rate": 6.436471226192703e-05, + "loss": 2.5058, + "step": 12360 + }, + { + "epoch": 0.9975788879025099, + "grad_norm": 0.6354855895042419, + "learning_rate": 6.434996219528562e-05, + "loss": 2.4849, + "step": 12361 + }, + { + "epoch": 0.9976595916390929, + "grad_norm": 0.7689539790153503, + "learning_rate": 6.433521301713636e-05, + "loss": 2.4959, + "step": 12362 + }, + { + "epoch": 0.9977402953756759, + "grad_norm": 0.6894338130950928, + "learning_rate": 6.43204647278468e-05, + "loss": 2.5098, + "step": 12363 + }, + { + "epoch": 0.9978209991122589, + "grad_norm": 0.7694165110588074, + "learning_rate": 6.430571732778451e-05, + "loss": 2.513, + "step": 12364 + }, + { + "epoch": 0.9979017028488419, + "grad_norm": 0.6512044668197632, + "learning_rate": 6.42909708173171e-05, + "loss": 2.4785, + "step": 12365 + }, + { + "epoch": 0.9979824065854249, + "grad_norm": 0.6605672836303711, + "learning_rate": 6.427622519681201e-05, + "loss": 2.4804, + "step": 12366 + }, + { + "epoch": 0.9980631103220079, + "grad_norm": 0.7123624086380005, + "learning_rate": 6.426148046663677e-05, + "loss": 2.4854, + "step": 12367 + }, + { + "epoch": 0.9981438140585909, + "grad_norm": 0.662645697593689, + "learning_rate": 6.424673662715886e-05, + "loss": 2.5314, + "step": 12368 + }, + { + "epoch": 0.998224517795174, + "grad_norm": 0.6482149362564087, + "learning_rate": 6.423199367874573e-05, + "loss": 2.4492, + "step": 12369 + }, + { + "epoch": 0.9983052215317569, + "grad_norm": 0.6545752286911011, + "learning_rate": 6.421725162176482e-05, + "loss": 2.5042, + "step": 12370 + }, + { + "epoch": 0.9983859252683399, + "grad_norm": 0.6698874235153198, + "learning_rate": 6.420251045658353e-05, + "loss": 2.4523, + "step": 12371 + }, + { + "epoch": 0.9984666290049229, + "grad_norm": 0.6961477398872375, + "learning_rate": 6.418777018356929e-05, + "loss": 2.556, + "step": 12372 + }, + { + "epoch": 0.998547332741506, + "grad_norm": 0.67090904712677, + "learning_rate": 6.41730308030894e-05, + "loss": 2.5237, + "step": 12373 + }, + { + "epoch": 0.998628036478089, + "grad_norm": 0.6828685402870178, + "learning_rate": 6.415829231551124e-05, + "loss": 2.453, + "step": 12374 + }, + { + "epoch": 0.9987087402146719, + "grad_norm": 0.6699565649032593, + "learning_rate": 6.414355472120213e-05, + "loss": 2.4632, + "step": 12375 + }, + { + "epoch": 0.9987894439512549, + "grad_norm": 0.6918730735778809, + "learning_rate": 6.412881802052936e-05, + "loss": 2.4532, + "step": 12376 + }, + { + "epoch": 0.998870147687838, + "grad_norm": 0.7222442030906677, + "learning_rate": 6.411408221386021e-05, + "loss": 2.5113, + "step": 12377 + }, + { + "epoch": 0.998950851424421, + "grad_norm": 0.7479627132415771, + "learning_rate": 6.409934730156195e-05, + "loss": 2.4857, + "step": 12378 + }, + { + "epoch": 0.999031555161004, + "grad_norm": 0.6552882194519043, + "learning_rate": 6.40846132840018e-05, + "loss": 2.4816, + "step": 12379 + }, + { + "epoch": 0.9991122588975869, + "grad_norm": 0.5990073084831238, + "learning_rate": 6.406988016154694e-05, + "loss": 2.4753, + "step": 12380 + }, + { + "epoch": 0.99919296263417, + "grad_norm": 0.6671901941299438, + "learning_rate": 6.405514793456465e-05, + "loss": 2.5298, + "step": 12381 + }, + { + "epoch": 0.999273666370753, + "grad_norm": 0.6630427241325378, + "learning_rate": 6.4040416603422e-05, + "loss": 2.485, + "step": 12382 + }, + { + "epoch": 0.999354370107336, + "grad_norm": 0.6873636841773987, + "learning_rate": 6.402568616848614e-05, + "loss": 2.4902, + "step": 12383 + }, + { + "epoch": 0.9994350738439189, + "grad_norm": 0.6912413239479065, + "learning_rate": 6.401095663012424e-05, + "loss": 2.5339, + "step": 12384 + }, + { + "epoch": 0.999515777580502, + "grad_norm": 0.6491912603378296, + "learning_rate": 6.39962279887034e-05, + "loss": 2.5367, + "step": 12385 + }, + { + "epoch": 0.999596481317085, + "grad_norm": 0.6668288111686707, + "learning_rate": 6.398150024459065e-05, + "loss": 2.5294, + "step": 12386 + }, + { + "epoch": 0.999677185053668, + "grad_norm": 0.6603856086730957, + "learning_rate": 6.396677339815306e-05, + "loss": 2.4378, + "step": 12387 + }, + { + "epoch": 0.999757888790251, + "grad_norm": 0.6461218595504761, + "learning_rate": 6.395204744975772e-05, + "loss": 2.4835, + "step": 12388 + }, + { + "epoch": 0.999838592526834, + "grad_norm": 0.6621688604354858, + "learning_rate": 6.39373223997715e-05, + "loss": 2.4834, + "step": 12389 + }, + { + "epoch": 0.999919296263417, + "grad_norm": 0.6758724451065063, + "learning_rate": 6.392259824856153e-05, + "loss": 2.4549, + "step": 12390 + }, + { + "epoch": 1.0, + "grad_norm": 1.1304112672805786, + "learning_rate": 6.390787499649473e-05, + "loss": 2.5547, + "step": 12391 + }, + { + "epoch": 1.000080703736583, + "grad_norm": 0.6919478178024292, + "learning_rate": 6.389315264393801e-05, + "loss": 2.47, + "step": 12392 + }, + { + "epoch": 1.000161407473166, + "grad_norm": 0.6916815638542175, + "learning_rate": 6.38784311912583e-05, + "loss": 2.4636, + "step": 12393 + }, + { + "epoch": 1.000242111209749, + "grad_norm": 0.6627040505409241, + "learning_rate": 6.386371063882252e-05, + "loss": 2.5094, + "step": 12394 + }, + { + "epoch": 1.000322814946332, + "grad_norm": 0.6408648490905762, + "learning_rate": 6.384899098699754e-05, + "loss": 2.426, + "step": 12395 + }, + { + "epoch": 1.000403518682915, + "grad_norm": 0.70432448387146, + "learning_rate": 6.38342722361502e-05, + "loss": 2.4861, + "step": 12396 + }, + { + "epoch": 1.000484222419498, + "grad_norm": 0.7115964889526367, + "learning_rate": 6.381955438664735e-05, + "loss": 2.4824, + "step": 12397 + }, + { + "epoch": 1.000564926156081, + "grad_norm": 0.6547040939331055, + "learning_rate": 6.380483743885574e-05, + "loss": 2.488, + "step": 12398 + }, + { + "epoch": 1.000645629892664, + "grad_norm": 0.6916625499725342, + "learning_rate": 6.379012139314223e-05, + "loss": 2.4864, + "step": 12399 + }, + { + "epoch": 1.0007263336292471, + "grad_norm": 0.6311133503913879, + "learning_rate": 6.377540624987352e-05, + "loss": 2.4672, + "step": 12400 + }, + { + "epoch": 1.00080703736583, + "grad_norm": 0.7115580439567566, + "learning_rate": 6.376069200941642e-05, + "loss": 2.4359, + "step": 12401 + }, + { + "epoch": 1.000887741102413, + "grad_norm": 0.6734051704406738, + "learning_rate": 6.374597867213756e-05, + "loss": 2.4896, + "step": 12402 + }, + { + "epoch": 1.000968444838996, + "grad_norm": 0.6910715699195862, + "learning_rate": 6.373126623840368e-05, + "loss": 2.4502, + "step": 12403 + }, + { + "epoch": 1.001049148575579, + "grad_norm": 0.6807514429092407, + "learning_rate": 6.37165547085815e-05, + "loss": 2.4791, + "step": 12404 + }, + { + "epoch": 1.0011298523121621, + "grad_norm": 0.679350733757019, + "learning_rate": 6.370184408303759e-05, + "loss": 2.4758, + "step": 12405 + }, + { + "epoch": 1.001210556048745, + "grad_norm": 0.6516300439834595, + "learning_rate": 6.36871343621386e-05, + "loss": 2.4338, + "step": 12406 + }, + { + "epoch": 1.001291259785328, + "grad_norm": 0.7033620476722717, + "learning_rate": 6.367242554625119e-05, + "loss": 2.429, + "step": 12407 + }, + { + "epoch": 1.0013719635219112, + "grad_norm": 0.6750274896621704, + "learning_rate": 6.365771763574186e-05, + "loss": 2.4283, + "step": 12408 + }, + { + "epoch": 1.001452667258494, + "grad_norm": 0.7188721895217896, + "learning_rate": 6.364301063097722e-05, + "loss": 2.4509, + "step": 12409 + }, + { + "epoch": 1.001533370995077, + "grad_norm": 0.6936308741569519, + "learning_rate": 6.362830453232379e-05, + "loss": 2.4469, + "step": 12410 + }, + { + "epoch": 1.00161407473166, + "grad_norm": 0.673060953617096, + "learning_rate": 6.361359934014808e-05, + "loss": 2.4444, + "step": 12411 + }, + { + "epoch": 1.001694778468243, + "grad_norm": 0.7465113997459412, + "learning_rate": 6.359889505481658e-05, + "loss": 2.4376, + "step": 12412 + }, + { + "epoch": 1.0017754822048262, + "grad_norm": 0.7180366516113281, + "learning_rate": 6.358419167669582e-05, + "loss": 2.4223, + "step": 12413 + }, + { + "epoch": 1.001856185941409, + "grad_norm": 0.6582302451133728, + "learning_rate": 6.356948920615214e-05, + "loss": 2.4723, + "step": 12414 + }, + { + "epoch": 1.001936889677992, + "grad_norm": 0.6452654600143433, + "learning_rate": 6.3554787643552e-05, + "loss": 2.4609, + "step": 12415 + }, + { + "epoch": 1.0020175934145752, + "grad_norm": 0.7170321345329285, + "learning_rate": 6.354008698926185e-05, + "loss": 2.5377, + "step": 12416 + }, + { + "epoch": 1.002098297151158, + "grad_norm": 0.6483680605888367, + "learning_rate": 6.352538724364809e-05, + "loss": 2.4349, + "step": 12417 + }, + { + "epoch": 1.0021790008877411, + "grad_norm": 0.6567494869232178, + "learning_rate": 6.351068840707697e-05, + "loss": 2.4421, + "step": 12418 + }, + { + "epoch": 1.002259704624324, + "grad_norm": 0.7498565912246704, + "learning_rate": 6.349599047991488e-05, + "loss": 2.4212, + "step": 12419 + }, + { + "epoch": 1.002340408360907, + "grad_norm": 0.6894906759262085, + "learning_rate": 6.348129346252816e-05, + "loss": 2.4356, + "step": 12420 + }, + { + "epoch": 1.0024211120974902, + "grad_norm": 0.657361626625061, + "learning_rate": 6.346659735528304e-05, + "loss": 2.4164, + "step": 12421 + }, + { + "epoch": 1.002501815834073, + "grad_norm": 0.6369211673736572, + "learning_rate": 6.345190215854581e-05, + "loss": 2.4229, + "step": 12422 + }, + { + "epoch": 1.0025825195706561, + "grad_norm": 0.7033721208572388, + "learning_rate": 6.343720787268277e-05, + "loss": 2.5052, + "step": 12423 + }, + { + "epoch": 1.0026632233072392, + "grad_norm": 0.7125518918037415, + "learning_rate": 6.342251449806003e-05, + "loss": 2.514, + "step": 12424 + }, + { + "epoch": 1.002743927043822, + "grad_norm": 0.7355595827102661, + "learning_rate": 6.340782203504385e-05, + "loss": 2.4459, + "step": 12425 + }, + { + "epoch": 1.0028246307804052, + "grad_norm": 0.7244594693183899, + "learning_rate": 6.339313048400042e-05, + "loss": 2.452, + "step": 12426 + }, + { + "epoch": 1.002905334516988, + "grad_norm": 0.7112728357315063, + "learning_rate": 6.337843984529585e-05, + "loss": 2.4951, + "step": 12427 + }, + { + "epoch": 1.0029860382535711, + "grad_norm": 0.7235615849494934, + "learning_rate": 6.336375011929628e-05, + "loss": 2.4697, + "step": 12428 + }, + { + "epoch": 1.0030667419901542, + "grad_norm": 0.653865396976471, + "learning_rate": 6.334906130636784e-05, + "loss": 2.4804, + "step": 12429 + }, + { + "epoch": 1.003147445726737, + "grad_norm": 0.7845149636268616, + "learning_rate": 6.33343734068766e-05, + "loss": 2.5415, + "step": 12430 + }, + { + "epoch": 1.0032281494633202, + "grad_norm": 0.7356342077255249, + "learning_rate": 6.33196864211886e-05, + "loss": 2.5321, + "step": 12431 + }, + { + "epoch": 1.0033088531999033, + "grad_norm": 0.6828265190124512, + "learning_rate": 6.330500034966991e-05, + "loss": 2.3849, + "step": 12432 + }, + { + "epoch": 1.0033895569364861, + "grad_norm": 0.7226579189300537, + "learning_rate": 6.329031519268658e-05, + "loss": 2.512, + "step": 12433 + }, + { + "epoch": 1.0034702606730692, + "grad_norm": 0.6490235924720764, + "learning_rate": 6.327563095060449e-05, + "loss": 2.487, + "step": 12434 + }, + { + "epoch": 1.003550964409652, + "grad_norm": 0.6889309883117676, + "learning_rate": 6.326094762378969e-05, + "loss": 2.4677, + "step": 12435 + }, + { + "epoch": 1.0036316681462352, + "grad_norm": 0.695854127407074, + "learning_rate": 6.324626521260815e-05, + "loss": 2.4362, + "step": 12436 + }, + { + "epoch": 1.0037123718828183, + "grad_norm": 0.7045256495475769, + "learning_rate": 6.32315837174257e-05, + "loss": 2.4307, + "step": 12437 + }, + { + "epoch": 1.0037930756194011, + "grad_norm": 0.662604570388794, + "learning_rate": 6.321690313860833e-05, + "loss": 2.4271, + "step": 12438 + }, + { + "epoch": 1.0038737793559842, + "grad_norm": 0.7682240009307861, + "learning_rate": 6.320222347652191e-05, + "loss": 2.4617, + "step": 12439 + }, + { + "epoch": 1.0039544830925673, + "grad_norm": 0.6599584817886353, + "learning_rate": 6.318754473153221e-05, + "loss": 2.405, + "step": 12440 + }, + { + "epoch": 1.0040351868291502, + "grad_norm": 0.7423116564750671, + "learning_rate": 6.317286690400515e-05, + "loss": 2.5496, + "step": 12441 + }, + { + "epoch": 1.0041158905657332, + "grad_norm": 0.6928953528404236, + "learning_rate": 6.315818999430654e-05, + "loss": 2.4265, + "step": 12442 + }, + { + "epoch": 1.0041965943023161, + "grad_norm": 0.699990451335907, + "learning_rate": 6.314351400280211e-05, + "loss": 2.4747, + "step": 12443 + }, + { + "epoch": 1.0042772980388992, + "grad_norm": 0.673384964466095, + "learning_rate": 6.312883892985765e-05, + "loss": 2.4891, + "step": 12444 + }, + { + "epoch": 1.0043580017754823, + "grad_norm": 0.6668596863746643, + "learning_rate": 6.311416477583893e-05, + "loss": 2.4312, + "step": 12445 + }, + { + "epoch": 1.0044387055120652, + "grad_norm": 0.6931218504905701, + "learning_rate": 6.309949154111163e-05, + "loss": 2.4907, + "step": 12446 + }, + { + "epoch": 1.0045194092486482, + "grad_norm": 0.687683641910553, + "learning_rate": 6.308481922604146e-05, + "loss": 2.4302, + "step": 12447 + }, + { + "epoch": 1.004600112985231, + "grad_norm": 0.6887302398681641, + "learning_rate": 6.30701478309941e-05, + "loss": 2.4749, + "step": 12448 + }, + { + "epoch": 1.0046808167218142, + "grad_norm": 0.6713404655456543, + "learning_rate": 6.305547735633522e-05, + "loss": 2.5046, + "step": 12449 + }, + { + "epoch": 1.0047615204583973, + "grad_norm": 0.7147336006164551, + "learning_rate": 6.304080780243038e-05, + "loss": 2.4578, + "step": 12450 + }, + { + "epoch": 1.0048422241949801, + "grad_norm": 0.87425297498703, + "learning_rate": 6.30261391696452e-05, + "loss": 2.4487, + "step": 12451 + }, + { + "epoch": 1.0049229279315632, + "grad_norm": 0.6641440987586975, + "learning_rate": 6.301147145834534e-05, + "loss": 2.4657, + "step": 12452 + }, + { + "epoch": 1.0050036316681463, + "grad_norm": 0.7311998009681702, + "learning_rate": 6.299680466889626e-05, + "loss": 2.4784, + "step": 12453 + }, + { + "epoch": 1.0050843354047292, + "grad_norm": 0.6722697615623474, + "learning_rate": 6.298213880166354e-05, + "loss": 2.4653, + "step": 12454 + }, + { + "epoch": 1.0051650391413123, + "grad_norm": 0.6886328458786011, + "learning_rate": 6.29674738570127e-05, + "loss": 2.3949, + "step": 12455 + }, + { + "epoch": 1.0052457428778951, + "grad_norm": 0.684688925743103, + "learning_rate": 6.295280983530921e-05, + "loss": 2.4334, + "step": 12456 + }, + { + "epoch": 1.0053264466144782, + "grad_norm": 0.7436798214912415, + "learning_rate": 6.293814673691853e-05, + "loss": 2.5316, + "step": 12457 + }, + { + "epoch": 1.0054071503510613, + "grad_norm": 0.7401304244995117, + "learning_rate": 6.292348456220615e-05, + "loss": 2.4556, + "step": 12458 + }, + { + "epoch": 1.0054878540876442, + "grad_norm": 0.7330329418182373, + "learning_rate": 6.290882331153742e-05, + "loss": 2.4321, + "step": 12459 + }, + { + "epoch": 1.0055685578242273, + "grad_norm": 0.8005052208900452, + "learning_rate": 6.289416298527776e-05, + "loss": 2.415, + "step": 12460 + }, + { + "epoch": 1.0056492615608104, + "grad_norm": 0.8047310709953308, + "learning_rate": 6.28795035837926e-05, + "loss": 2.4144, + "step": 12461 + }, + { + "epoch": 1.0057299652973932, + "grad_norm": 0.7384032011032104, + "learning_rate": 6.28648451074472e-05, + "loss": 2.5237, + "step": 12462 + }, + { + "epoch": 1.0058106690339763, + "grad_norm": 0.7240314483642578, + "learning_rate": 6.285018755660695e-05, + "loss": 2.4894, + "step": 12463 + }, + { + "epoch": 1.0058913727705592, + "grad_norm": 0.6901080012321472, + "learning_rate": 6.283553093163712e-05, + "loss": 2.4244, + "step": 12464 + }, + { + "epoch": 1.0059720765071423, + "grad_norm": 0.6572268605232239, + "learning_rate": 6.282087523290304e-05, + "loss": 2.456, + "step": 12465 + }, + { + "epoch": 1.0060527802437254, + "grad_norm": 0.7207481861114502, + "learning_rate": 6.28062204607699e-05, + "loss": 2.4153, + "step": 12466 + }, + { + "epoch": 1.0061334839803082, + "grad_norm": 0.6901980042457581, + "learning_rate": 6.279156661560299e-05, + "loss": 2.4776, + "step": 12467 + }, + { + "epoch": 1.0062141877168913, + "grad_norm": 0.7003545761108398, + "learning_rate": 6.277691369776752e-05, + "loss": 2.4206, + "step": 12468 + }, + { + "epoch": 1.0062948914534744, + "grad_norm": 0.6978366374969482, + "learning_rate": 6.276226170762865e-05, + "loss": 2.3866, + "step": 12469 + }, + { + "epoch": 1.0063755951900573, + "grad_norm": 0.6763097643852234, + "learning_rate": 6.274761064555154e-05, + "loss": 2.5439, + "step": 12470 + }, + { + "epoch": 1.0064562989266403, + "grad_norm": 0.7146836519241333, + "learning_rate": 6.273296051190139e-05, + "loss": 2.5486, + "step": 12471 + }, + { + "epoch": 1.0065370026632232, + "grad_norm": 0.7448136806488037, + "learning_rate": 6.271831130704326e-05, + "loss": 2.4539, + "step": 12472 + }, + { + "epoch": 1.0066177063998063, + "grad_norm": 0.6918472051620483, + "learning_rate": 6.270366303134226e-05, + "loss": 2.4756, + "step": 12473 + }, + { + "epoch": 1.0066984101363894, + "grad_norm": 0.7067514657974243, + "learning_rate": 6.26890156851635e-05, + "loss": 2.4925, + "step": 12474 + }, + { + "epoch": 1.0067791138729723, + "grad_norm": 0.6517517566680908, + "learning_rate": 6.267436926887197e-05, + "loss": 2.4339, + "step": 12475 + }, + { + "epoch": 1.0068598176095553, + "grad_norm": 0.673367977142334, + "learning_rate": 6.265972378283274e-05, + "loss": 2.416, + "step": 12476 + }, + { + "epoch": 1.0069405213461384, + "grad_norm": 0.7190212607383728, + "learning_rate": 6.26450792274108e-05, + "loss": 2.4822, + "step": 12477 + }, + { + "epoch": 1.0070212250827213, + "grad_norm": 0.7568029165267944, + "learning_rate": 6.263043560297112e-05, + "loss": 2.4607, + "step": 12478 + }, + { + "epoch": 1.0071019288193044, + "grad_norm": 0.6860609650611877, + "learning_rate": 6.261579290987866e-05, + "loss": 2.4429, + "step": 12479 + }, + { + "epoch": 1.0071826325558872, + "grad_norm": 0.7066059112548828, + "learning_rate": 6.260115114849839e-05, + "loss": 2.5504, + "step": 12480 + }, + { + "epoch": 1.0072633362924703, + "grad_norm": 0.6857946515083313, + "learning_rate": 6.25865103191952e-05, + "loss": 2.4776, + "step": 12481 + }, + { + "epoch": 1.0073440400290534, + "grad_norm": 0.6879859566688538, + "learning_rate": 6.257187042233396e-05, + "loss": 2.3651, + "step": 12482 + }, + { + "epoch": 1.0074247437656363, + "grad_norm": 0.6900867223739624, + "learning_rate": 6.255723145827954e-05, + "loss": 2.4644, + "step": 12483 + }, + { + "epoch": 1.0075054475022194, + "grad_norm": 0.7144716382026672, + "learning_rate": 6.254259342739683e-05, + "loss": 2.4219, + "step": 12484 + }, + { + "epoch": 1.0075861512388025, + "grad_norm": 0.674619197845459, + "learning_rate": 6.252795633005056e-05, + "loss": 2.5038, + "step": 12485 + }, + { + "epoch": 1.0076668549753853, + "grad_norm": 0.7036965489387512, + "learning_rate": 6.251332016660558e-05, + "loss": 2.4784, + "step": 12486 + }, + { + "epoch": 1.0077475587119684, + "grad_norm": 0.7046369910240173, + "learning_rate": 6.249868493742668e-05, + "loss": 2.514, + "step": 12487 + }, + { + "epoch": 1.0078282624485513, + "grad_norm": 0.6933087110519409, + "learning_rate": 6.248405064287854e-05, + "loss": 2.4855, + "step": 12488 + }, + { + "epoch": 1.0079089661851344, + "grad_norm": 0.7210546731948853, + "learning_rate": 6.246941728332594e-05, + "loss": 2.5101, + "step": 12489 + }, + { + "epoch": 1.0079896699217175, + "grad_norm": 0.6738288402557373, + "learning_rate": 6.245478485913361e-05, + "loss": 2.4891, + "step": 12490 + }, + { + "epoch": 1.0080703736583003, + "grad_norm": 0.7023273706436157, + "learning_rate": 6.244015337066611e-05, + "loss": 2.4977, + "step": 12491 + }, + { + "epoch": 1.0081510773948834, + "grad_norm": 0.6761355996131897, + "learning_rate": 6.24255228182882e-05, + "loss": 2.4948, + "step": 12492 + }, + { + "epoch": 1.0082317811314665, + "grad_norm": 0.6427976489067078, + "learning_rate": 6.241089320236448e-05, + "loss": 2.466, + "step": 12493 + }, + { + "epoch": 1.0083124848680494, + "grad_norm": 0.6907719969749451, + "learning_rate": 6.23962645232596e-05, + "loss": 2.437, + "step": 12494 + }, + { + "epoch": 1.0083931886046325, + "grad_norm": 0.709032416343689, + "learning_rate": 6.238163678133807e-05, + "loss": 2.4298, + "step": 12495 + }, + { + "epoch": 1.0084738923412153, + "grad_norm": 0.7395734786987305, + "learning_rate": 6.236700997696448e-05, + "loss": 2.4502, + "step": 12496 + }, + { + "epoch": 1.0085545960777984, + "grad_norm": 0.6535435914993286, + "learning_rate": 6.23523841105034e-05, + "loss": 2.4494, + "step": 12497 + }, + { + "epoch": 1.0086352998143815, + "grad_norm": 0.6597761511802673, + "learning_rate": 6.23377591823193e-05, + "loss": 2.4377, + "step": 12498 + }, + { + "epoch": 1.0087160035509644, + "grad_norm": 0.6610515713691711, + "learning_rate": 6.232313519277668e-05, + "loss": 2.4328, + "step": 12499 + }, + { + "epoch": 1.0087967072875474, + "grad_norm": 0.6785424947738647, + "learning_rate": 6.230851214224009e-05, + "loss": 2.457, + "step": 12500 + }, + { + "epoch": 1.0088774110241303, + "grad_norm": 0.6939748525619507, + "learning_rate": 6.229389003107383e-05, + "loss": 2.383, + "step": 12501 + }, + { + "epoch": 1.0089581147607134, + "grad_norm": 0.7592256665229797, + "learning_rate": 6.22792688596424e-05, + "loss": 2.4665, + "step": 12502 + }, + { + "epoch": 1.0090388184972965, + "grad_norm": 0.6751298308372498, + "learning_rate": 6.226464862831023e-05, + "loss": 2.491, + "step": 12503 + }, + { + "epoch": 1.0091195222338794, + "grad_norm": 0.682771623134613, + "learning_rate": 6.225002933744164e-05, + "loss": 2.4275, + "step": 12504 + }, + { + "epoch": 1.0092002259704624, + "grad_norm": 0.7314651608467102, + "learning_rate": 6.223541098740098e-05, + "loss": 2.4489, + "step": 12505 + }, + { + "epoch": 1.0092809297070455, + "grad_norm": 0.7132120132446289, + "learning_rate": 6.222079357855261e-05, + "loss": 2.4819, + "step": 12506 + }, + { + "epoch": 1.0093616334436284, + "grad_norm": 0.6571424007415771, + "learning_rate": 6.220617711126082e-05, + "loss": 2.455, + "step": 12507 + }, + { + "epoch": 1.0094423371802115, + "grad_norm": 0.7675301432609558, + "learning_rate": 6.21915615858899e-05, + "loss": 2.5282, + "step": 12508 + }, + { + "epoch": 1.0095230409167943, + "grad_norm": 0.6907868385314941, + "learning_rate": 6.217694700280408e-05, + "loss": 2.4639, + "step": 12509 + }, + { + "epoch": 1.0096037446533774, + "grad_norm": 0.7223815321922302, + "learning_rate": 6.216233336236764e-05, + "loss": 2.4682, + "step": 12510 + }, + { + "epoch": 1.0096844483899605, + "grad_norm": 0.7325109839439392, + "learning_rate": 6.214772066494474e-05, + "loss": 2.4591, + "step": 12511 + }, + { + "epoch": 1.0097651521265434, + "grad_norm": 0.6589400768280029, + "learning_rate": 6.213310891089957e-05, + "loss": 2.4883, + "step": 12512 + }, + { + "epoch": 1.0098458558631265, + "grad_norm": 0.6692262291908264, + "learning_rate": 6.211849810059635e-05, + "loss": 2.4635, + "step": 12513 + }, + { + "epoch": 1.0099265595997096, + "grad_norm": 0.7352520823478699, + "learning_rate": 6.210388823439914e-05, + "loss": 2.4743, + "step": 12514 + }, + { + "epoch": 1.0100072633362924, + "grad_norm": 0.6631996035575867, + "learning_rate": 6.208927931267212e-05, + "loss": 2.4848, + "step": 12515 + }, + { + "epoch": 1.0100879670728755, + "grad_norm": 0.6985767483711243, + "learning_rate": 6.207467133577937e-05, + "loss": 2.5044, + "step": 12516 + }, + { + "epoch": 1.0101686708094584, + "grad_norm": 0.665635347366333, + "learning_rate": 6.206006430408494e-05, + "loss": 2.4718, + "step": 12517 + }, + { + "epoch": 1.0102493745460415, + "grad_norm": 0.6859133243560791, + "learning_rate": 6.204545821795286e-05, + "loss": 2.4702, + "step": 12518 + }, + { + "epoch": 1.0103300782826246, + "grad_norm": 0.6578841805458069, + "learning_rate": 6.203085307774722e-05, + "loss": 2.4614, + "step": 12519 + }, + { + "epoch": 1.0104107820192074, + "grad_norm": 0.717523455619812, + "learning_rate": 6.201624888383194e-05, + "loss": 2.4412, + "step": 12520 + }, + { + "epoch": 1.0104914857557905, + "grad_norm": 0.7333831787109375, + "learning_rate": 6.200164563657103e-05, + "loss": 2.4157, + "step": 12521 + }, + { + "epoch": 1.0105721894923736, + "grad_norm": 0.6968720555305481, + "learning_rate": 6.198704333632845e-05, + "loss": 2.4556, + "step": 12522 + }, + { + "epoch": 1.0106528932289565, + "grad_norm": 0.6533070802688599, + "learning_rate": 6.19724419834681e-05, + "loss": 2.43, + "step": 12523 + }, + { + "epoch": 1.0107335969655395, + "grad_norm": 0.7341824769973755, + "learning_rate": 6.195784157835391e-05, + "loss": 2.5326, + "step": 12524 + }, + { + "epoch": 1.0108143007021224, + "grad_norm": 0.752912163734436, + "learning_rate": 6.194324212134974e-05, + "loss": 2.4282, + "step": 12525 + }, + { + "epoch": 1.0108950044387055, + "grad_norm": 0.6538611650466919, + "learning_rate": 6.192864361281951e-05, + "loss": 2.4135, + "step": 12526 + }, + { + "epoch": 1.0109757081752886, + "grad_norm": 0.6931454539299011, + "learning_rate": 6.191404605312695e-05, + "loss": 2.5097, + "step": 12527 + }, + { + "epoch": 1.0110564119118715, + "grad_norm": 0.6317688822746277, + "learning_rate": 6.18994494426359e-05, + "loss": 2.4977, + "step": 12528 + }, + { + "epoch": 1.0111371156484545, + "grad_norm": 0.6793715953826904, + "learning_rate": 6.188485378171024e-05, + "loss": 2.4619, + "step": 12529 + }, + { + "epoch": 1.0112178193850376, + "grad_norm": 0.6696654558181763, + "learning_rate": 6.187025907071361e-05, + "loss": 2.4658, + "step": 12530 + }, + { + "epoch": 1.0112985231216205, + "grad_norm": 0.6788807511329651, + "learning_rate": 6.185566531000979e-05, + "loss": 2.4793, + "step": 12531 + }, + { + "epoch": 1.0113792268582036, + "grad_norm": 0.6933971643447876, + "learning_rate": 6.184107249996253e-05, + "loss": 2.4772, + "step": 12532 + }, + { + "epoch": 1.0114599305947864, + "grad_norm": 0.6866000294685364, + "learning_rate": 6.182648064093546e-05, + "loss": 2.428, + "step": 12533 + }, + { + "epoch": 1.0115406343313695, + "grad_norm": 0.7013841271400452, + "learning_rate": 6.181188973329229e-05, + "loss": 2.5273, + "step": 12534 + }, + { + "epoch": 1.0116213380679526, + "grad_norm": 0.6569108963012695, + "learning_rate": 6.179729977739669e-05, + "loss": 2.4125, + "step": 12535 + }, + { + "epoch": 1.0117020418045355, + "grad_norm": 0.7503486275672913, + "learning_rate": 6.17827107736122e-05, + "loss": 2.4385, + "step": 12536 + }, + { + "epoch": 1.0117827455411186, + "grad_norm": 0.6757314205169678, + "learning_rate": 6.176812272230246e-05, + "loss": 2.4364, + "step": 12537 + }, + { + "epoch": 1.0118634492777017, + "grad_norm": 0.6567254662513733, + "learning_rate": 6.175353562383106e-05, + "loss": 2.4992, + "step": 12538 + }, + { + "epoch": 1.0119441530142845, + "grad_norm": 0.7564988732337952, + "learning_rate": 6.17389494785615e-05, + "loss": 2.4777, + "step": 12539 + }, + { + "epoch": 1.0120248567508676, + "grad_norm": 0.6972391605377197, + "learning_rate": 6.172436428685735e-05, + "loss": 2.5041, + "step": 12540 + }, + { + "epoch": 1.0121055604874505, + "grad_norm": 0.6861580610275269, + "learning_rate": 6.170978004908209e-05, + "loss": 2.4684, + "step": 12541 + }, + { + "epoch": 1.0121862642240336, + "grad_norm": 0.6621903777122498, + "learning_rate": 6.169519676559921e-05, + "loss": 2.4614, + "step": 12542 + }, + { + "epoch": 1.0122669679606167, + "grad_norm": 0.6879795789718628, + "learning_rate": 6.168061443677215e-05, + "loss": 2.4765, + "step": 12543 + }, + { + "epoch": 1.0123476716971995, + "grad_norm": 0.6361081004142761, + "learning_rate": 6.166603306296434e-05, + "loss": 2.4792, + "step": 12544 + }, + { + "epoch": 1.0124283754337826, + "grad_norm": 0.6660729050636292, + "learning_rate": 6.165145264453924e-05, + "loss": 2.489, + "step": 12545 + }, + { + "epoch": 1.0125090791703655, + "grad_norm": 0.6900594234466553, + "learning_rate": 6.163687318186015e-05, + "loss": 2.4543, + "step": 12546 + }, + { + "epoch": 1.0125897829069486, + "grad_norm": 0.7195869088172913, + "learning_rate": 6.162229467529046e-05, + "loss": 2.4137, + "step": 12547 + }, + { + "epoch": 1.0126704866435317, + "grad_norm": 0.7030326128005981, + "learning_rate": 6.16077171251935e-05, + "loss": 2.4657, + "step": 12548 + }, + { + "epoch": 1.0127511903801145, + "grad_norm": 0.6712052822113037, + "learning_rate": 6.15931405319326e-05, + "loss": 2.4718, + "step": 12549 + }, + { + "epoch": 1.0128318941166976, + "grad_norm": 0.7471029162406921, + "learning_rate": 6.157856489587102e-05, + "loss": 2.4705, + "step": 12550 + }, + { + "epoch": 1.0129125978532807, + "grad_norm": 0.6813762187957764, + "learning_rate": 6.15639902173721e-05, + "loss": 2.4479, + "step": 12551 + }, + { + "epoch": 1.0129933015898636, + "grad_norm": 0.6657249927520752, + "learning_rate": 6.154941649679894e-05, + "loss": 2.4911, + "step": 12552 + }, + { + "epoch": 1.0130740053264466, + "grad_norm": 0.6700132489204407, + "learning_rate": 6.153484373451483e-05, + "loss": 2.4962, + "step": 12553 + }, + { + "epoch": 1.0131547090630295, + "grad_norm": 0.7058695554733276, + "learning_rate": 6.152027193088302e-05, + "loss": 2.3935, + "step": 12554 + }, + { + "epoch": 1.0132354127996126, + "grad_norm": 0.7390396595001221, + "learning_rate": 6.150570108626658e-05, + "loss": 2.4454, + "step": 12555 + }, + { + "epoch": 1.0133161165361957, + "grad_norm": 0.7251414060592651, + "learning_rate": 6.149113120102869e-05, + "loss": 2.4146, + "step": 12556 + }, + { + "epoch": 1.0133968202727786, + "grad_norm": 0.8262537717819214, + "learning_rate": 6.14765622755325e-05, + "loss": 2.4638, + "step": 12557 + }, + { + "epoch": 1.0134775240093616, + "grad_norm": 0.7184064984321594, + "learning_rate": 6.146199431014106e-05, + "loss": 2.3958, + "step": 12558 + }, + { + "epoch": 1.0135582277459447, + "grad_norm": 0.7544865012168884, + "learning_rate": 6.144742730521746e-05, + "loss": 2.4662, + "step": 12559 + }, + { + "epoch": 1.0136389314825276, + "grad_norm": 0.6866207718849182, + "learning_rate": 6.143286126112475e-05, + "loss": 2.4951, + "step": 12560 + }, + { + "epoch": 1.0137196352191107, + "grad_norm": 0.6566087603569031, + "learning_rate": 6.1418296178226e-05, + "loss": 2.4002, + "step": 12561 + }, + { + "epoch": 1.0138003389556935, + "grad_norm": 0.6999008059501648, + "learning_rate": 6.140373205688411e-05, + "loss": 2.5306, + "step": 12562 + }, + { + "epoch": 1.0138810426922766, + "grad_norm": 0.6682353615760803, + "learning_rate": 6.138916889746212e-05, + "loss": 2.5565, + "step": 12563 + }, + { + "epoch": 1.0139617464288597, + "grad_norm": 0.7443362474441528, + "learning_rate": 6.137460670032298e-05, + "loss": 2.3958, + "step": 12564 + }, + { + "epoch": 1.0140424501654426, + "grad_norm": 0.6542403697967529, + "learning_rate": 6.136004546582958e-05, + "loss": 2.4394, + "step": 12565 + }, + { + "epoch": 1.0141231539020257, + "grad_norm": 0.6524317264556885, + "learning_rate": 6.134548519434488e-05, + "loss": 2.4979, + "step": 12566 + }, + { + "epoch": 1.0142038576386088, + "grad_norm": 0.6605600118637085, + "learning_rate": 6.133092588623174e-05, + "loss": 2.4827, + "step": 12567 + }, + { + "epoch": 1.0142845613751916, + "grad_norm": 0.7114397883415222, + "learning_rate": 6.1316367541853e-05, + "loss": 2.4799, + "step": 12568 + }, + { + "epoch": 1.0143652651117747, + "grad_norm": 0.6607296466827393, + "learning_rate": 6.130181016157148e-05, + "loss": 2.4991, + "step": 12569 + }, + { + "epoch": 1.0144459688483576, + "grad_norm": 0.6750844717025757, + "learning_rate": 6.128725374575005e-05, + "loss": 2.4451, + "step": 12570 + }, + { + "epoch": 1.0145266725849407, + "grad_norm": 0.6978901624679565, + "learning_rate": 6.127269829475141e-05, + "loss": 2.4608, + "step": 12571 + }, + { + "epoch": 1.0146073763215238, + "grad_norm": 0.676343560218811, + "learning_rate": 6.125814380893838e-05, + "loss": 2.4536, + "step": 12572 + }, + { + "epoch": 1.0146880800581066, + "grad_norm": 0.7082604765892029, + "learning_rate": 6.124359028867368e-05, + "loss": 2.45, + "step": 12573 + }, + { + "epoch": 1.0147687837946897, + "grad_norm": 0.7049853205680847, + "learning_rate": 6.122903773432003e-05, + "loss": 2.4378, + "step": 12574 + }, + { + "epoch": 1.0148494875312728, + "grad_norm": 0.6329593062400818, + "learning_rate": 6.121448614624009e-05, + "loss": 2.4386, + "step": 12575 + }, + { + "epoch": 1.0149301912678557, + "grad_norm": 0.7249468564987183, + "learning_rate": 6.119993552479655e-05, + "loss": 2.5191, + "step": 12576 + }, + { + "epoch": 1.0150108950044388, + "grad_norm": 0.7028193473815918, + "learning_rate": 6.118538587035206e-05, + "loss": 2.4376, + "step": 12577 + }, + { + "epoch": 1.0150915987410216, + "grad_norm": 0.697382926940918, + "learning_rate": 6.117083718326917e-05, + "loss": 2.4797, + "step": 12578 + }, + { + "epoch": 1.0151723024776047, + "grad_norm": 0.7386965155601501, + "learning_rate": 6.115628946391055e-05, + "loss": 2.4512, + "step": 12579 + }, + { + "epoch": 1.0152530062141878, + "grad_norm": 0.6614577174186707, + "learning_rate": 6.114174271263875e-05, + "loss": 2.4404, + "step": 12580 + }, + { + "epoch": 1.0153337099507707, + "grad_norm": 0.6927464604377747, + "learning_rate": 6.112719692981627e-05, + "loss": 2.47, + "step": 12581 + }, + { + "epoch": 1.0154144136873537, + "grad_norm": 0.7004262208938599, + "learning_rate": 6.111265211580566e-05, + "loss": 2.4212, + "step": 12582 + }, + { + "epoch": 1.0154951174239368, + "grad_norm": 0.71146559715271, + "learning_rate": 6.109810827096942e-05, + "loss": 2.4431, + "step": 12583 + }, + { + "epoch": 1.0155758211605197, + "grad_norm": 0.6857032775878906, + "learning_rate": 6.108356539567e-05, + "loss": 2.453, + "step": 12584 + }, + { + "epoch": 1.0156565248971028, + "grad_norm": 0.6976168155670166, + "learning_rate": 6.106902349026986e-05, + "loss": 2.4718, + "step": 12585 + }, + { + "epoch": 1.0157372286336857, + "grad_norm": 0.7158414125442505, + "learning_rate": 6.105448255513146e-05, + "loss": 2.425, + "step": 12586 + }, + { + "epoch": 1.0158179323702687, + "grad_norm": 0.6611737608909607, + "learning_rate": 6.103994259061714e-05, + "loss": 2.4563, + "step": 12587 + }, + { + "epoch": 1.0158986361068518, + "grad_norm": 0.7262980937957764, + "learning_rate": 6.102540359708926e-05, + "loss": 2.4538, + "step": 12588 + }, + { + "epoch": 1.0159793398434347, + "grad_norm": 0.7123451828956604, + "learning_rate": 6.10108655749102e-05, + "loss": 2.4677, + "step": 12589 + }, + { + "epoch": 1.0160600435800178, + "grad_norm": 0.7135589122772217, + "learning_rate": 6.099632852444235e-05, + "loss": 2.4312, + "step": 12590 + }, + { + "epoch": 1.0161407473166009, + "grad_norm": 0.6509461998939514, + "learning_rate": 6.09817924460479e-05, + "loss": 2.4716, + "step": 12591 + }, + { + "epoch": 1.0162214510531837, + "grad_norm": 0.8835915923118591, + "learning_rate": 6.096725734008919e-05, + "loss": 2.4817, + "step": 12592 + }, + { + "epoch": 1.0163021547897668, + "grad_norm": 0.7084136605262756, + "learning_rate": 6.095272320692846e-05, + "loss": 2.483, + "step": 12593 + }, + { + "epoch": 1.0163828585263497, + "grad_norm": 0.6866818070411682, + "learning_rate": 6.0938190046927934e-05, + "loss": 2.4838, + "step": 12594 + }, + { + "epoch": 1.0164635622629328, + "grad_norm": 0.7297510504722595, + "learning_rate": 6.0923657860449824e-05, + "loss": 2.4675, + "step": 12595 + }, + { + "epoch": 1.0165442659995159, + "grad_norm": 0.6735619306564331, + "learning_rate": 6.090912664785633e-05, + "loss": 2.444, + "step": 12596 + }, + { + "epoch": 1.0166249697360987, + "grad_norm": 0.7046451568603516, + "learning_rate": 6.0894596409509565e-05, + "loss": 2.4757, + "step": 12597 + }, + { + "epoch": 1.0167056734726818, + "grad_norm": 0.6646085977554321, + "learning_rate": 6.0880067145771656e-05, + "loss": 2.4772, + "step": 12598 + }, + { + "epoch": 1.0167863772092647, + "grad_norm": 0.7217094302177429, + "learning_rate": 6.086553885700478e-05, + "loss": 2.4589, + "step": 12599 + }, + { + "epoch": 1.0168670809458478, + "grad_norm": 0.647378146648407, + "learning_rate": 6.085101154357093e-05, + "loss": 2.4327, + "step": 12600 + }, + { + "epoch": 1.0169477846824309, + "grad_norm": 0.6907125115394592, + "learning_rate": 6.083648520583223e-05, + "loss": 2.467, + "step": 12601 + }, + { + "epoch": 1.0170284884190137, + "grad_norm": 0.690433919429779, + "learning_rate": 6.0821959844150687e-05, + "loss": 2.488, + "step": 12602 + }, + { + "epoch": 1.0171091921555968, + "grad_norm": 0.6528738737106323, + "learning_rate": 6.080743545888833e-05, + "loss": 2.5028, + "step": 12603 + }, + { + "epoch": 1.01718989589218, + "grad_norm": 0.6962323784828186, + "learning_rate": 6.079291205040711e-05, + "loss": 2.5381, + "step": 12604 + }, + { + "epoch": 1.0172705996287628, + "grad_norm": 0.7386075854301453, + "learning_rate": 6.077838961906902e-05, + "loss": 2.4445, + "step": 12605 + }, + { + "epoch": 1.0173513033653458, + "grad_norm": 0.7382189631462097, + "learning_rate": 6.0763868165236025e-05, + "loss": 2.4926, + "step": 12606 + }, + { + "epoch": 1.0174320071019287, + "grad_norm": 0.7291865944862366, + "learning_rate": 6.074934768926995e-05, + "loss": 2.4624, + "step": 12607 + }, + { + "epoch": 1.0175127108385118, + "grad_norm": 0.754843533039093, + "learning_rate": 6.073482819153275e-05, + "loss": 2.4291, + "step": 12608 + }, + { + "epoch": 1.017593414575095, + "grad_norm": 0.6827771663665771, + "learning_rate": 6.072030967238628e-05, + "loss": 2.453, + "step": 12609 + }, + { + "epoch": 1.0176741183116778, + "grad_norm": 0.7138541340827942, + "learning_rate": 6.0705792132192355e-05, + "loss": 2.5172, + "step": 12610 + }, + { + "epoch": 1.0177548220482608, + "grad_norm": 0.6539924740791321, + "learning_rate": 6.06912755713128e-05, + "loss": 2.4393, + "step": 12611 + }, + { + "epoch": 1.017835525784844, + "grad_norm": 0.7021273970603943, + "learning_rate": 6.067675999010945e-05, + "loss": 2.4519, + "step": 12612 + }, + { + "epoch": 1.0179162295214268, + "grad_norm": 0.7124225497245789, + "learning_rate": 6.0662245388944004e-05, + "loss": 2.4417, + "step": 12613 + }, + { + "epoch": 1.0179969332580099, + "grad_norm": 0.7214948534965515, + "learning_rate": 6.064773176817823e-05, + "loss": 2.4708, + "step": 12614 + }, + { + "epoch": 1.0180776369945927, + "grad_norm": 0.6738584041595459, + "learning_rate": 6.063321912817386e-05, + "loss": 2.4574, + "step": 12615 + }, + { + "epoch": 1.0181583407311758, + "grad_norm": 0.7215890884399414, + "learning_rate": 6.061870746929257e-05, + "loss": 2.4903, + "step": 12616 + }, + { + "epoch": 1.018239044467759, + "grad_norm": 0.6720155477523804, + "learning_rate": 6.0604196791896016e-05, + "loss": 2.4251, + "step": 12617 + }, + { + "epoch": 1.0183197482043418, + "grad_norm": 0.7046420574188232, + "learning_rate": 6.058968709634587e-05, + "loss": 2.446, + "step": 12618 + }, + { + "epoch": 1.0184004519409249, + "grad_norm": 0.6419540047645569, + "learning_rate": 6.0575178383003764e-05, + "loss": 2.4052, + "step": 12619 + }, + { + "epoch": 1.018481155677508, + "grad_norm": 0.6948695182800293, + "learning_rate": 6.0560670652231235e-05, + "loss": 2.5068, + "step": 12620 + }, + { + "epoch": 1.0185618594140908, + "grad_norm": 0.7274870276451111, + "learning_rate": 6.05461639043899e-05, + "loss": 2.4705, + "step": 12621 + }, + { + "epoch": 1.018642563150674, + "grad_norm": 0.6809766292572021, + "learning_rate": 6.053165813984134e-05, + "loss": 2.3767, + "step": 12622 + }, + { + "epoch": 1.0187232668872568, + "grad_norm": 0.6197625994682312, + "learning_rate": 6.0517153358946985e-05, + "loss": 2.4639, + "step": 12623 + }, + { + "epoch": 1.0188039706238399, + "grad_norm": 0.6613010764122009, + "learning_rate": 6.050264956206837e-05, + "loss": 2.5155, + "step": 12624 + }, + { + "epoch": 1.018884674360423, + "grad_norm": 0.7335553765296936, + "learning_rate": 6.0488146749567e-05, + "loss": 2.5344, + "step": 12625 + }, + { + "epoch": 1.0189653780970058, + "grad_norm": 0.7175146341323853, + "learning_rate": 6.047364492180428e-05, + "loss": 2.4972, + "step": 12626 + }, + { + "epoch": 1.019046081833589, + "grad_norm": 0.6825357675552368, + "learning_rate": 6.045914407914166e-05, + "loss": 2.4356, + "step": 12627 + }, + { + "epoch": 1.019126785570172, + "grad_norm": 0.6369633078575134, + "learning_rate": 6.044464422194056e-05, + "loss": 2.4692, + "step": 12628 + }, + { + "epoch": 1.0192074893067549, + "grad_norm": 0.7407073378562927, + "learning_rate": 6.0430145350562264e-05, + "loss": 2.4565, + "step": 12629 + }, + { + "epoch": 1.019288193043338, + "grad_norm": 0.6836552619934082, + "learning_rate": 6.041564746536821e-05, + "loss": 2.4357, + "step": 12630 + }, + { + "epoch": 1.0193688967799208, + "grad_norm": 0.6778741478919983, + "learning_rate": 6.040115056671972e-05, + "loss": 2.424, + "step": 12631 + }, + { + "epoch": 1.019449600516504, + "grad_norm": 0.6440724730491638, + "learning_rate": 6.0386654654978035e-05, + "loss": 2.4455, + "step": 12632 + }, + { + "epoch": 1.019530304253087, + "grad_norm": 0.681376039981842, + "learning_rate": 6.0372159730504476e-05, + "loss": 2.4562, + "step": 12633 + }, + { + "epoch": 1.0196110079896699, + "grad_norm": 0.657462477684021, + "learning_rate": 6.035766579366029e-05, + "loss": 2.4315, + "step": 12634 + }, + { + "epoch": 1.019691711726253, + "grad_norm": 0.6540380716323853, + "learning_rate": 6.0343172844806706e-05, + "loss": 2.4789, + "step": 12635 + }, + { + "epoch": 1.019772415462836, + "grad_norm": 0.711883008480072, + "learning_rate": 6.03286808843049e-05, + "loss": 2.4178, + "step": 12636 + }, + { + "epoch": 1.019853119199419, + "grad_norm": 0.6746736168861389, + "learning_rate": 6.031418991251607e-05, + "loss": 2.4351, + "step": 12637 + }, + { + "epoch": 1.019933822936002, + "grad_norm": 0.677237331867218, + "learning_rate": 6.02996999298014e-05, + "loss": 2.4335, + "step": 12638 + }, + { + "epoch": 1.0200145266725849, + "grad_norm": 0.6950497627258301, + "learning_rate": 6.0285210936521955e-05, + "loss": 2.5178, + "step": 12639 + }, + { + "epoch": 1.020095230409168, + "grad_norm": 0.6349243521690369, + "learning_rate": 6.027072293303885e-05, + "loss": 2.4405, + "step": 12640 + }, + { + "epoch": 1.020175934145751, + "grad_norm": 0.744276762008667, + "learning_rate": 6.0256235919713236e-05, + "loss": 2.5156, + "step": 12641 + }, + { + "epoch": 1.020256637882334, + "grad_norm": 0.7697997689247131, + "learning_rate": 6.0241749896906075e-05, + "loss": 2.4393, + "step": 12642 + }, + { + "epoch": 1.020337341618917, + "grad_norm": 0.7784204483032227, + "learning_rate": 6.022726486497844e-05, + "loss": 2.4565, + "step": 12643 + }, + { + "epoch": 1.0204180453555, + "grad_norm": 0.7434312701225281, + "learning_rate": 6.021278082429136e-05, + "loss": 2.4637, + "step": 12644 + }, + { + "epoch": 1.020498749092083, + "grad_norm": 0.7770118117332458, + "learning_rate": 6.019829777520575e-05, + "loss": 2.4998, + "step": 12645 + }, + { + "epoch": 1.020579452828666, + "grad_norm": 0.7021752595901489, + "learning_rate": 6.01838157180826e-05, + "loss": 2.4661, + "step": 12646 + }, + { + "epoch": 1.0206601565652489, + "grad_norm": 0.6812437176704407, + "learning_rate": 6.0169334653282895e-05, + "loss": 2.4611, + "step": 12647 + }, + { + "epoch": 1.020740860301832, + "grad_norm": 0.757724940776825, + "learning_rate": 6.0154854581167455e-05, + "loss": 2.4427, + "step": 12648 + }, + { + "epoch": 1.020821564038415, + "grad_norm": 0.7386252880096436, + "learning_rate": 6.014037550209718e-05, + "loss": 2.424, + "step": 12649 + }, + { + "epoch": 1.020902267774998, + "grad_norm": 0.7138059735298157, + "learning_rate": 6.012589741643295e-05, + "loss": 2.4951, + "step": 12650 + }, + { + "epoch": 1.020982971511581, + "grad_norm": 0.714022159576416, + "learning_rate": 6.011142032453561e-05, + "loss": 2.4398, + "step": 12651 + }, + { + "epoch": 1.0210636752481639, + "grad_norm": 0.6961550712585449, + "learning_rate": 6.00969442267659e-05, + "loss": 2.4495, + "step": 12652 + }, + { + "epoch": 1.021144378984747, + "grad_norm": 0.7196643948554993, + "learning_rate": 6.008246912348467e-05, + "loss": 2.4449, + "step": 12653 + }, + { + "epoch": 1.02122508272133, + "grad_norm": 0.6163341999053955, + "learning_rate": 6.006799501505268e-05, + "loss": 2.4108, + "step": 12654 + }, + { + "epoch": 1.021305786457913, + "grad_norm": 0.6657030582427979, + "learning_rate": 6.005352190183061e-05, + "loss": 2.4328, + "step": 12655 + }, + { + "epoch": 1.021386490194496, + "grad_norm": 0.7183353900909424, + "learning_rate": 6.00390497841792e-05, + "loss": 2.4912, + "step": 12656 + }, + { + "epoch": 1.021467193931079, + "grad_norm": 0.6912575364112854, + "learning_rate": 6.002457866245916e-05, + "loss": 2.4597, + "step": 12657 + }, + { + "epoch": 1.021547897667662, + "grad_norm": 0.7395210266113281, + "learning_rate": 6.0010108537031084e-05, + "loss": 2.4823, + "step": 12658 + }, + { + "epoch": 1.021628601404245, + "grad_norm": 0.722618043422699, + "learning_rate": 5.9995639408255636e-05, + "loss": 2.4924, + "step": 12659 + }, + { + "epoch": 1.021709305140828, + "grad_norm": 0.739009439945221, + "learning_rate": 5.998117127649344e-05, + "loss": 2.4454, + "step": 12660 + }, + { + "epoch": 1.021790008877411, + "grad_norm": 0.7017633318901062, + "learning_rate": 5.996670414210506e-05, + "loss": 2.5058, + "step": 12661 + }, + { + "epoch": 1.021870712613994, + "grad_norm": 0.742664635181427, + "learning_rate": 5.9952238005451046e-05, + "loss": 2.436, + "step": 12662 + }, + { + "epoch": 1.021951416350577, + "grad_norm": 0.6865660548210144, + "learning_rate": 5.9937772866892e-05, + "loss": 2.4364, + "step": 12663 + }, + { + "epoch": 1.02203212008716, + "grad_norm": 0.7376219034194946, + "learning_rate": 5.992330872678833e-05, + "loss": 2.4975, + "step": 12664 + }, + { + "epoch": 1.0221128238237431, + "grad_norm": 0.6496078372001648, + "learning_rate": 5.990884558550054e-05, + "loss": 2.4651, + "step": 12665 + }, + { + "epoch": 1.022193527560326, + "grad_norm": 0.7178322076797485, + "learning_rate": 5.989438344338915e-05, + "loss": 2.5015, + "step": 12666 + }, + { + "epoch": 1.022274231296909, + "grad_norm": 0.7084102034568787, + "learning_rate": 5.987992230081459e-05, + "loss": 2.4741, + "step": 12667 + }, + { + "epoch": 1.022354935033492, + "grad_norm": 0.6634935736656189, + "learning_rate": 5.986546215813722e-05, + "loss": 2.4255, + "step": 12668 + }, + { + "epoch": 1.022435638770075, + "grad_norm": 0.6897543668746948, + "learning_rate": 5.985100301571742e-05, + "loss": 2.4682, + "step": 12669 + }, + { + "epoch": 1.0225163425066581, + "grad_norm": 0.6643948554992676, + "learning_rate": 5.9836544873915614e-05, + "loss": 2.4009, + "step": 12670 + }, + { + "epoch": 1.022597046243241, + "grad_norm": 0.681252658367157, + "learning_rate": 5.982208773309208e-05, + "loss": 2.4542, + "step": 12671 + }, + { + "epoch": 1.022677749979824, + "grad_norm": 0.7608681917190552, + "learning_rate": 5.980763159360714e-05, + "loss": 2.5614, + "step": 12672 + }, + { + "epoch": 1.0227584537164072, + "grad_norm": 0.6855095028877258, + "learning_rate": 5.979317645582112e-05, + "loss": 2.4505, + "step": 12673 + }, + { + "epoch": 1.02283915745299, + "grad_norm": 0.6846089363098145, + "learning_rate": 5.97787223200942e-05, + "loss": 2.4438, + "step": 12674 + }, + { + "epoch": 1.0229198611895731, + "grad_norm": 0.7198090553283691, + "learning_rate": 5.9764269186786684e-05, + "loss": 2.4469, + "step": 12675 + }, + { + "epoch": 1.023000564926156, + "grad_norm": 0.7120245099067688, + "learning_rate": 5.9749817056258764e-05, + "loss": 2.4626, + "step": 12676 + }, + { + "epoch": 1.023081268662739, + "grad_norm": 0.6839897036552429, + "learning_rate": 5.973536592887059e-05, + "loss": 2.4384, + "step": 12677 + }, + { + "epoch": 1.0231619723993222, + "grad_norm": 0.7053773999214172, + "learning_rate": 5.9720915804982356e-05, + "loss": 2.4554, + "step": 12678 + }, + { + "epoch": 1.023242676135905, + "grad_norm": 0.7114294767379761, + "learning_rate": 5.970646668495421e-05, + "loss": 2.3964, + "step": 12679 + }, + { + "epoch": 1.0233233798724881, + "grad_norm": 0.7001516819000244, + "learning_rate": 5.9692018569146224e-05, + "loss": 2.5216, + "step": 12680 + }, + { + "epoch": 1.0234040836090712, + "grad_norm": 0.6715773940086365, + "learning_rate": 5.96775714579185e-05, + "loss": 2.4595, + "step": 12681 + }, + { + "epoch": 1.023484787345654, + "grad_norm": 0.6856278777122498, + "learning_rate": 5.96631253516311e-05, + "loss": 2.4637, + "step": 12682 + }, + { + "epoch": 1.0235654910822372, + "grad_norm": 0.6785625219345093, + "learning_rate": 5.96486802506441e-05, + "loss": 2.4615, + "step": 12683 + }, + { + "epoch": 1.02364619481882, + "grad_norm": 0.6834213137626648, + "learning_rate": 5.963423615531743e-05, + "loss": 2.4729, + "step": 12684 + }, + { + "epoch": 1.023726898555403, + "grad_norm": 0.6729516386985779, + "learning_rate": 5.961979306601109e-05, + "loss": 2.4013, + "step": 12685 + }, + { + "epoch": 1.0238076022919862, + "grad_norm": 0.6785775423049927, + "learning_rate": 5.960535098308511e-05, + "loss": 2.4825, + "step": 12686 + }, + { + "epoch": 1.023888306028569, + "grad_norm": 0.67277991771698, + "learning_rate": 5.959090990689934e-05, + "loss": 2.4606, + "step": 12687 + }, + { + "epoch": 1.0239690097651521, + "grad_norm": 0.7679588198661804, + "learning_rate": 5.957646983781373e-05, + "loss": 2.5234, + "step": 12688 + }, + { + "epoch": 1.0240497135017352, + "grad_norm": 0.6597407460212708, + "learning_rate": 5.956203077618821e-05, + "loss": 2.4699, + "step": 12689 + }, + { + "epoch": 1.024130417238318, + "grad_norm": 0.6743008494377136, + "learning_rate": 5.9547592722382525e-05, + "loss": 2.4266, + "step": 12690 + }, + { + "epoch": 1.0242111209749012, + "grad_norm": 0.7223396897315979, + "learning_rate": 5.953315567675657e-05, + "loss": 2.5117, + "step": 12691 + }, + { + "epoch": 1.024291824711484, + "grad_norm": 0.6729528307914734, + "learning_rate": 5.951871963967022e-05, + "loss": 2.4586, + "step": 12692 + }, + { + "epoch": 1.0243725284480671, + "grad_norm": 0.6523739695549011, + "learning_rate": 5.950428461148314e-05, + "loss": 2.4408, + "step": 12693 + }, + { + "epoch": 1.0244532321846502, + "grad_norm": 0.6830984950065613, + "learning_rate": 5.9489850592555164e-05, + "loss": 2.4094, + "step": 12694 + }, + { + "epoch": 1.024533935921233, + "grad_norm": 0.6223493814468384, + "learning_rate": 5.9475417583246006e-05, + "loss": 2.4105, + "step": 12695 + }, + { + "epoch": 1.0246146396578162, + "grad_norm": 0.6506635546684265, + "learning_rate": 5.9460985583915374e-05, + "loss": 2.4451, + "step": 12696 + }, + { + "epoch": 1.024695343394399, + "grad_norm": 0.7626760005950928, + "learning_rate": 5.944655459492293e-05, + "loss": 2.4643, + "step": 12697 + }, + { + "epoch": 1.0247760471309821, + "grad_norm": 0.7074631452560425, + "learning_rate": 5.943212461662837e-05, + "loss": 2.4662, + "step": 12698 + }, + { + "epoch": 1.0248567508675652, + "grad_norm": 0.718083918094635, + "learning_rate": 5.9417695649391346e-05, + "loss": 2.4686, + "step": 12699 + }, + { + "epoch": 1.024937454604148, + "grad_norm": 0.6850628852844238, + "learning_rate": 5.9403267693571384e-05, + "loss": 2.4542, + "step": 12700 + }, + { + "epoch": 1.0250181583407312, + "grad_norm": 0.6662585735321045, + "learning_rate": 5.938884074952812e-05, + "loss": 2.4676, + "step": 12701 + }, + { + "epoch": 1.0250988620773143, + "grad_norm": 0.6806240677833557, + "learning_rate": 5.9374414817621114e-05, + "loss": 2.4243, + "step": 12702 + }, + { + "epoch": 1.0251795658138971, + "grad_norm": 0.6763548851013184, + "learning_rate": 5.9359989898209876e-05, + "loss": 2.4389, + "step": 12703 + }, + { + "epoch": 1.0252602695504802, + "grad_norm": 0.7390143275260925, + "learning_rate": 5.934556599165393e-05, + "loss": 2.4667, + "step": 12704 + }, + { + "epoch": 1.025340973287063, + "grad_norm": 0.6159299612045288, + "learning_rate": 5.933114309831276e-05, + "loss": 2.3832, + "step": 12705 + }, + { + "epoch": 1.0254216770236462, + "grad_norm": 0.6779586672782898, + "learning_rate": 5.931672121854579e-05, + "loss": 2.4615, + "step": 12706 + }, + { + "epoch": 1.0255023807602293, + "grad_norm": 0.643800675868988, + "learning_rate": 5.930230035271247e-05, + "loss": 2.4725, + "step": 12707 + }, + { + "epoch": 1.0255830844968121, + "grad_norm": 0.6605903506278992, + "learning_rate": 5.928788050117227e-05, + "loss": 2.4332, + "step": 12708 + }, + { + "epoch": 1.0256637882333952, + "grad_norm": 0.7046334743499756, + "learning_rate": 5.927346166428446e-05, + "loss": 2.4445, + "step": 12709 + }, + { + "epoch": 1.0257444919699783, + "grad_norm": 0.6536325216293335, + "learning_rate": 5.925904384240843e-05, + "loss": 2.4168, + "step": 12710 + }, + { + "epoch": 1.0258251957065612, + "grad_norm": 0.6861097812652588, + "learning_rate": 5.9244627035903564e-05, + "loss": 2.512, + "step": 12711 + }, + { + "epoch": 1.0259058994431443, + "grad_norm": 0.6782278418540955, + "learning_rate": 5.923021124512911e-05, + "loss": 2.4667, + "step": 12712 + }, + { + "epoch": 1.0259866031797271, + "grad_norm": 0.724435031414032, + "learning_rate": 5.921579647044436e-05, + "loss": 2.4828, + "step": 12713 + }, + { + "epoch": 1.0260673069163102, + "grad_norm": 0.6690630316734314, + "learning_rate": 5.9201382712208575e-05, + "loss": 2.4832, + "step": 12714 + }, + { + "epoch": 1.0261480106528933, + "grad_norm": 0.7045348286628723, + "learning_rate": 5.9186969970781015e-05, + "loss": 2.4576, + "step": 12715 + }, + { + "epoch": 1.0262287143894762, + "grad_norm": 0.673321008682251, + "learning_rate": 5.9172558246520796e-05, + "loss": 2.3986, + "step": 12716 + }, + { + "epoch": 1.0263094181260592, + "grad_norm": 0.7184785008430481, + "learning_rate": 5.915814753978717e-05, + "loss": 2.4008, + "step": 12717 + }, + { + "epoch": 1.0263901218626423, + "grad_norm": 0.6971293091773987, + "learning_rate": 5.914373785093931e-05, + "loss": 2.4559, + "step": 12718 + }, + { + "epoch": 1.0264708255992252, + "grad_norm": 0.6941563487052917, + "learning_rate": 5.912932918033626e-05, + "loss": 2.4787, + "step": 12719 + }, + { + "epoch": 1.0265515293358083, + "grad_norm": 0.6276142001152039, + "learning_rate": 5.911492152833715e-05, + "loss": 2.4275, + "step": 12720 + }, + { + "epoch": 1.0266322330723912, + "grad_norm": 0.715928316116333, + "learning_rate": 5.9100514895301106e-05, + "loss": 2.4127, + "step": 12721 + }, + { + "epoch": 1.0267129368089742, + "grad_norm": 0.7004076838493347, + "learning_rate": 5.908610928158713e-05, + "loss": 2.4651, + "step": 12722 + }, + { + "epoch": 1.0267936405455573, + "grad_norm": 0.6761921048164368, + "learning_rate": 5.907170468755425e-05, + "loss": 2.4245, + "step": 12723 + }, + { + "epoch": 1.0268743442821402, + "grad_norm": 0.7246574759483337, + "learning_rate": 5.9057301113561515e-05, + "loss": 2.4489, + "step": 12724 + }, + { + "epoch": 1.0269550480187233, + "grad_norm": 0.7196606397628784, + "learning_rate": 5.904289855996783e-05, + "loss": 2.4357, + "step": 12725 + }, + { + "epoch": 1.0270357517553064, + "grad_norm": 0.7142692804336548, + "learning_rate": 5.902849702713216e-05, + "loss": 2.4821, + "step": 12726 + }, + { + "epoch": 1.0271164554918892, + "grad_norm": 0.7207832336425781, + "learning_rate": 5.9014096515413454e-05, + "loss": 2.4337, + "step": 12727 + }, + { + "epoch": 1.0271971592284723, + "grad_norm": 0.6865695714950562, + "learning_rate": 5.899969702517063e-05, + "loss": 2.4549, + "step": 12728 + }, + { + "epoch": 1.0272778629650552, + "grad_norm": 0.7136662006378174, + "learning_rate": 5.898529855676249e-05, + "loss": 2.4606, + "step": 12729 + }, + { + "epoch": 1.0273585667016383, + "grad_norm": 0.701885998249054, + "learning_rate": 5.897090111054795e-05, + "loss": 2.4913, + "step": 12730 + }, + { + "epoch": 1.0274392704382214, + "grad_norm": 0.6671354174613953, + "learning_rate": 5.8956504686885805e-05, + "loss": 2.4064, + "step": 12731 + }, + { + "epoch": 1.0275199741748042, + "grad_norm": 0.6720621585845947, + "learning_rate": 5.894210928613484e-05, + "loss": 2.4908, + "step": 12732 + }, + { + "epoch": 1.0276006779113873, + "grad_norm": 0.7530980706214905, + "learning_rate": 5.892771490865383e-05, + "loss": 2.4486, + "step": 12733 + }, + { + "epoch": 1.0276813816479704, + "grad_norm": 0.6771122813224792, + "learning_rate": 5.891332155480158e-05, + "loss": 2.3954, + "step": 12734 + }, + { + "epoch": 1.0277620853845533, + "grad_norm": 0.6779236793518066, + "learning_rate": 5.889892922493671e-05, + "loss": 2.4404, + "step": 12735 + }, + { + "epoch": 1.0278427891211364, + "grad_norm": 0.7593358755111694, + "learning_rate": 5.8884537919417974e-05, + "loss": 2.4997, + "step": 12736 + }, + { + "epoch": 1.0279234928577192, + "grad_norm": 0.672686755657196, + "learning_rate": 5.8870147638604044e-05, + "loss": 2.5394, + "step": 12737 + }, + { + "epoch": 1.0280041965943023, + "grad_norm": 0.6727546453475952, + "learning_rate": 5.885575838285353e-05, + "loss": 2.4554, + "step": 12738 + }, + { + "epoch": 1.0280849003308854, + "grad_norm": 0.7092764377593994, + "learning_rate": 5.884137015252507e-05, + "loss": 2.4568, + "step": 12739 + }, + { + "epoch": 1.0281656040674683, + "grad_norm": 0.6988070011138916, + "learning_rate": 5.882698294797728e-05, + "loss": 2.4453, + "step": 12740 + }, + { + "epoch": 1.0282463078040514, + "grad_norm": 0.7578697204589844, + "learning_rate": 5.8812596769568676e-05, + "loss": 2.5648, + "step": 12741 + }, + { + "epoch": 1.0283270115406344, + "grad_norm": 0.6523683667182922, + "learning_rate": 5.879821161765782e-05, + "loss": 2.4088, + "step": 12742 + }, + { + "epoch": 1.0284077152772173, + "grad_norm": 0.6797270178794861, + "learning_rate": 5.878382749260323e-05, + "loss": 2.4465, + "step": 12743 + }, + { + "epoch": 1.0284884190138004, + "grad_norm": 0.6823786497116089, + "learning_rate": 5.876944439476345e-05, + "loss": 2.5053, + "step": 12744 + }, + { + "epoch": 1.0285691227503833, + "grad_norm": 0.6840088367462158, + "learning_rate": 5.875506232449686e-05, + "loss": 2.3771, + "step": 12745 + }, + { + "epoch": 1.0286498264869663, + "grad_norm": 0.6985318064689636, + "learning_rate": 5.8740681282161914e-05, + "loss": 2.4456, + "step": 12746 + }, + { + "epoch": 1.0287305302235494, + "grad_norm": 0.7102388739585876, + "learning_rate": 5.872630126811707e-05, + "loss": 2.4802, + "step": 12747 + }, + { + "epoch": 1.0288112339601323, + "grad_norm": 0.7917937636375427, + "learning_rate": 5.871192228272067e-05, + "loss": 2.4606, + "step": 12748 + }, + { + "epoch": 1.0288919376967154, + "grad_norm": 0.683397114276886, + "learning_rate": 5.86975443263311e-05, + "loss": 2.5011, + "step": 12749 + }, + { + "epoch": 1.0289726414332985, + "grad_norm": 0.7543408870697021, + "learning_rate": 5.8683167399306724e-05, + "loss": 2.4705, + "step": 12750 + }, + { + "epoch": 1.0290533451698813, + "grad_norm": 0.6946283578872681, + "learning_rate": 5.866879150200579e-05, + "loss": 2.4986, + "step": 12751 + }, + { + "epoch": 1.0291340489064644, + "grad_norm": 0.6535125374794006, + "learning_rate": 5.8654416634786605e-05, + "loss": 2.4203, + "step": 12752 + }, + { + "epoch": 1.0292147526430473, + "grad_norm": 0.7470195889472961, + "learning_rate": 5.8640042798007455e-05, + "loss": 2.5103, + "step": 12753 + }, + { + "epoch": 1.0292954563796304, + "grad_norm": 0.6782363653182983, + "learning_rate": 5.8625669992026535e-05, + "loss": 2.4087, + "step": 12754 + }, + { + "epoch": 1.0293761601162135, + "grad_norm": 0.7601497173309326, + "learning_rate": 5.861129821720207e-05, + "loss": 2.4752, + "step": 12755 + }, + { + "epoch": 1.0294568638527963, + "grad_norm": 0.6875388026237488, + "learning_rate": 5.859692747389227e-05, + "loss": 2.448, + "step": 12756 + }, + { + "epoch": 1.0295375675893794, + "grad_norm": 0.7153629064559937, + "learning_rate": 5.858255776245525e-05, + "loss": 2.4641, + "step": 12757 + }, + { + "epoch": 1.0296182713259623, + "grad_norm": 0.682954728603363, + "learning_rate": 5.8568189083249145e-05, + "loss": 2.441, + "step": 12758 + }, + { + "epoch": 1.0296989750625454, + "grad_norm": 0.6959100961685181, + "learning_rate": 5.855382143663209e-05, + "loss": 2.4316, + "step": 12759 + }, + { + "epoch": 1.0297796787991285, + "grad_norm": 0.7062023878097534, + "learning_rate": 5.8539454822962167e-05, + "loss": 2.4287, + "step": 12760 + }, + { + "epoch": 1.0298603825357113, + "grad_norm": 0.706523597240448, + "learning_rate": 5.852508924259736e-05, + "loss": 2.4596, + "step": 12761 + }, + { + "epoch": 1.0299410862722944, + "grad_norm": 0.6908385753631592, + "learning_rate": 5.851072469589578e-05, + "loss": 2.4428, + "step": 12762 + }, + { + "epoch": 1.0300217900088775, + "grad_norm": 0.6810726523399353, + "learning_rate": 5.8496361183215386e-05, + "loss": 2.4902, + "step": 12763 + }, + { + "epoch": 1.0301024937454604, + "grad_norm": 0.661613941192627, + "learning_rate": 5.8481998704914156e-05, + "loss": 2.4256, + "step": 12764 + }, + { + "epoch": 1.0301831974820435, + "grad_norm": 0.6633132100105286, + "learning_rate": 5.846763726135005e-05, + "loss": 2.4512, + "step": 12765 + }, + { + "epoch": 1.0302639012186263, + "grad_norm": 0.6991820335388184, + "learning_rate": 5.8453276852881025e-05, + "loss": 2.3747, + "step": 12766 + }, + { + "epoch": 1.0303446049552094, + "grad_norm": 0.7392076253890991, + "learning_rate": 5.843891747986487e-05, + "loss": 2.438, + "step": 12767 + }, + { + "epoch": 1.0304253086917925, + "grad_norm": 0.6371724605560303, + "learning_rate": 5.842455914265958e-05, + "loss": 2.4627, + "step": 12768 + }, + { + "epoch": 1.0305060124283754, + "grad_norm": 0.6475048661231995, + "learning_rate": 5.841020184162298e-05, + "loss": 2.4883, + "step": 12769 + }, + { + "epoch": 1.0305867161649584, + "grad_norm": 0.6848995685577393, + "learning_rate": 5.839584557711283e-05, + "loss": 2.4452, + "step": 12770 + }, + { + "epoch": 1.0306674199015415, + "grad_norm": 0.7345505952835083, + "learning_rate": 5.838149034948697e-05, + "loss": 2.5121, + "step": 12771 + }, + { + "epoch": 1.0307481236381244, + "grad_norm": 0.715373158454895, + "learning_rate": 5.836713615910318e-05, + "loss": 2.4549, + "step": 12772 + }, + { + "epoch": 1.0308288273747075, + "grad_norm": 0.7371035814285278, + "learning_rate": 5.8352783006319166e-05, + "loss": 2.4633, + "step": 12773 + }, + { + "epoch": 1.0309095311112904, + "grad_norm": 0.6843077540397644, + "learning_rate": 5.833843089149267e-05, + "loss": 2.4067, + "step": 12774 + }, + { + "epoch": 1.0309902348478734, + "grad_norm": 0.7398965954780579, + "learning_rate": 5.832407981498136e-05, + "loss": 2.5199, + "step": 12775 + }, + { + "epoch": 1.0310709385844565, + "grad_norm": 0.6860283017158508, + "learning_rate": 5.830972977714294e-05, + "loss": 2.4564, + "step": 12776 + }, + { + "epoch": 1.0311516423210394, + "grad_norm": 0.683893084526062, + "learning_rate": 5.829538077833503e-05, + "loss": 2.4635, + "step": 12777 + }, + { + "epoch": 1.0312323460576225, + "grad_norm": 0.6412089467048645, + "learning_rate": 5.828103281891525e-05, + "loss": 2.4806, + "step": 12778 + }, + { + "epoch": 1.0313130497942056, + "grad_norm": 0.646393895149231, + "learning_rate": 5.826668589924123e-05, + "loss": 2.4674, + "step": 12779 + }, + { + "epoch": 1.0313937535307884, + "grad_norm": 0.6805605292320251, + "learning_rate": 5.825234001967044e-05, + "loss": 2.5145, + "step": 12780 + }, + { + "epoch": 1.0314744572673715, + "grad_norm": 0.681532084941864, + "learning_rate": 5.8237995180560455e-05, + "loss": 2.5041, + "step": 12781 + }, + { + "epoch": 1.0315551610039544, + "grad_norm": 0.6971312165260315, + "learning_rate": 5.8223651382268865e-05, + "loss": 2.5324, + "step": 12782 + }, + { + "epoch": 1.0316358647405375, + "grad_norm": 0.6634463667869568, + "learning_rate": 5.8209308625153026e-05, + "loss": 2.5086, + "step": 12783 + }, + { + "epoch": 1.0317165684771206, + "grad_norm": 0.6752117276191711, + "learning_rate": 5.819496690957047e-05, + "loss": 2.4805, + "step": 12784 + }, + { + "epoch": 1.0317972722137034, + "grad_norm": 0.7242109775543213, + "learning_rate": 5.818062623587861e-05, + "loss": 2.4205, + "step": 12785 + }, + { + "epoch": 1.0318779759502865, + "grad_norm": 0.7338563203811646, + "learning_rate": 5.816628660443486e-05, + "loss": 2.4277, + "step": 12786 + }, + { + "epoch": 1.0319586796868696, + "grad_norm": 0.6764293313026428, + "learning_rate": 5.81519480155966e-05, + "loss": 2.5096, + "step": 12787 + }, + { + "epoch": 1.0320393834234525, + "grad_norm": 0.6757099032402039, + "learning_rate": 5.813761046972124e-05, + "loss": 2.468, + "step": 12788 + }, + { + "epoch": 1.0321200871600356, + "grad_norm": 0.7072502374649048, + "learning_rate": 5.8123273967166017e-05, + "loss": 2.4642, + "step": 12789 + }, + { + "epoch": 1.0322007908966184, + "grad_norm": 0.6470256447792053, + "learning_rate": 5.810893850828827e-05, + "loss": 2.4146, + "step": 12790 + }, + { + "epoch": 1.0322814946332015, + "grad_norm": 0.7403351068496704, + "learning_rate": 5.809460409344527e-05, + "loss": 2.512, + "step": 12791 + }, + { + "epoch": 1.0323621983697846, + "grad_norm": 0.6711490154266357, + "learning_rate": 5.808027072299432e-05, + "loss": 2.4602, + "step": 12792 + }, + { + "epoch": 1.0324429021063675, + "grad_norm": 0.7920248508453369, + "learning_rate": 5.806593839729258e-05, + "loss": 2.4512, + "step": 12793 + }, + { + "epoch": 1.0325236058429506, + "grad_norm": 0.6442045569419861, + "learning_rate": 5.805160711669725e-05, + "loss": 2.4165, + "step": 12794 + }, + { + "epoch": 1.0326043095795336, + "grad_norm": 0.6681340932846069, + "learning_rate": 5.803727688156553e-05, + "loss": 2.4296, + "step": 12795 + }, + { + "epoch": 1.0326850133161165, + "grad_norm": 0.6653337478637695, + "learning_rate": 5.802294769225457e-05, + "loss": 2.5165, + "step": 12796 + }, + { + "epoch": 1.0327657170526996, + "grad_norm": 0.6444782018661499, + "learning_rate": 5.8008619549121476e-05, + "loss": 2.4266, + "step": 12797 + }, + { + "epoch": 1.0328464207892825, + "grad_norm": 0.6741451621055603, + "learning_rate": 5.7994292452523394e-05, + "loss": 2.4837, + "step": 12798 + }, + { + "epoch": 1.0329271245258655, + "grad_norm": 0.6629341840744019, + "learning_rate": 5.797996640281731e-05, + "loss": 2.4368, + "step": 12799 + }, + { + "epoch": 1.0330078282624486, + "grad_norm": 0.6755850315093994, + "learning_rate": 5.796564140036029e-05, + "loss": 2.4834, + "step": 12800 + }, + { + "epoch": 1.0330885319990315, + "grad_norm": 0.7271782755851746, + "learning_rate": 5.795131744550942e-05, + "loss": 2.5025, + "step": 12801 + }, + { + "epoch": 1.0331692357356146, + "grad_norm": 0.6870545744895935, + "learning_rate": 5.7936994538621605e-05, + "loss": 2.4443, + "step": 12802 + }, + { + "epoch": 1.0332499394721975, + "grad_norm": 0.7231935858726501, + "learning_rate": 5.792267268005382e-05, + "loss": 2.4917, + "step": 12803 + }, + { + "epoch": 1.0333306432087805, + "grad_norm": 0.6905832290649414, + "learning_rate": 5.790835187016307e-05, + "loss": 2.4902, + "step": 12804 + }, + { + "epoch": 1.0334113469453636, + "grad_norm": 0.711814284324646, + "learning_rate": 5.789403210930613e-05, + "loss": 2.4579, + "step": 12805 + }, + { + "epoch": 1.0334920506819465, + "grad_norm": 0.6982280015945435, + "learning_rate": 5.787971339784004e-05, + "loss": 2.5275, + "step": 12806 + }, + { + "epoch": 1.0335727544185296, + "grad_norm": 0.6871493458747864, + "learning_rate": 5.7865395736121575e-05, + "loss": 2.4401, + "step": 12807 + }, + { + "epoch": 1.0336534581551127, + "grad_norm": 0.6898353099822998, + "learning_rate": 5.785107912450763e-05, + "loss": 2.4005, + "step": 12808 + }, + { + "epoch": 1.0337341618916955, + "grad_norm": 0.6264411807060242, + "learning_rate": 5.7836763563354946e-05, + "loss": 2.4497, + "step": 12809 + }, + { + "epoch": 1.0338148656282786, + "grad_norm": 0.6997092962265015, + "learning_rate": 5.782244905302032e-05, + "loss": 2.4388, + "step": 12810 + }, + { + "epoch": 1.0338955693648615, + "grad_norm": 0.6834601759910583, + "learning_rate": 5.7808135593860555e-05, + "loss": 2.4298, + "step": 12811 + }, + { + "epoch": 1.0339762731014446, + "grad_norm": 0.664315402507782, + "learning_rate": 5.77938231862323e-05, + "loss": 2.4289, + "step": 12812 + }, + { + "epoch": 1.0340569768380277, + "grad_norm": 0.6660603284835815, + "learning_rate": 5.7779511830492306e-05, + "loss": 2.4772, + "step": 12813 + }, + { + "epoch": 1.0341376805746105, + "grad_norm": 0.6457028388977051, + "learning_rate": 5.776520152699728e-05, + "loss": 2.4408, + "step": 12814 + }, + { + "epoch": 1.0342183843111936, + "grad_norm": 0.7132207155227661, + "learning_rate": 5.7750892276103794e-05, + "loss": 2.4953, + "step": 12815 + }, + { + "epoch": 1.0342990880477767, + "grad_norm": 0.7397382259368896, + "learning_rate": 5.773658407816848e-05, + "loss": 2.4396, + "step": 12816 + }, + { + "epoch": 1.0343797917843596, + "grad_norm": 0.6951746344566345, + "learning_rate": 5.7722276933548034e-05, + "loss": 2.5021, + "step": 12817 + }, + { + "epoch": 1.0344604955209427, + "grad_norm": 0.6789736151695251, + "learning_rate": 5.7707970842598935e-05, + "loss": 2.4883, + "step": 12818 + }, + { + "epoch": 1.0345411992575255, + "grad_norm": 0.7231541872024536, + "learning_rate": 5.7693665805677747e-05, + "loss": 2.4761, + "step": 12819 + }, + { + "epoch": 1.0346219029941086, + "grad_norm": 0.685943603515625, + "learning_rate": 5.767936182314104e-05, + "loss": 2.4489, + "step": 12820 + }, + { + "epoch": 1.0347026067306917, + "grad_norm": 0.7081817984580994, + "learning_rate": 5.7665058895345236e-05, + "loss": 2.4329, + "step": 12821 + }, + { + "epoch": 1.0347833104672746, + "grad_norm": 0.6700818538665771, + "learning_rate": 5.7650757022646804e-05, + "loss": 2.4252, + "step": 12822 + }, + { + "epoch": 1.0348640142038577, + "grad_norm": 0.6712214946746826, + "learning_rate": 5.763645620540223e-05, + "loss": 2.419, + "step": 12823 + }, + { + "epoch": 1.0349447179404407, + "grad_norm": 0.6732817888259888, + "learning_rate": 5.762215644396793e-05, + "loss": 2.3928, + "step": 12824 + }, + { + "epoch": 1.0350254216770236, + "grad_norm": 0.6689301133155823, + "learning_rate": 5.760785773870024e-05, + "loss": 2.3981, + "step": 12825 + }, + { + "epoch": 1.0351061254136067, + "grad_norm": 0.6822957992553711, + "learning_rate": 5.759356008995556e-05, + "loss": 2.5265, + "step": 12826 + }, + { + "epoch": 1.0351868291501896, + "grad_norm": 0.7316287755966187, + "learning_rate": 5.7579263498090194e-05, + "loss": 2.4132, + "step": 12827 + }, + { + "epoch": 1.0352675328867726, + "grad_norm": 0.6688703894615173, + "learning_rate": 5.756496796346047e-05, + "loss": 2.4195, + "step": 12828 + }, + { + "epoch": 1.0353482366233557, + "grad_norm": 0.6894570589065552, + "learning_rate": 5.755067348642268e-05, + "loss": 2.4897, + "step": 12829 + }, + { + "epoch": 1.0354289403599386, + "grad_norm": 0.7635753750801086, + "learning_rate": 5.753638006733311e-05, + "loss": 2.4643, + "step": 12830 + }, + { + "epoch": 1.0355096440965217, + "grad_norm": 0.6353672742843628, + "learning_rate": 5.75220877065479e-05, + "loss": 2.4533, + "step": 12831 + }, + { + "epoch": 1.0355903478331048, + "grad_norm": 0.6725208759307861, + "learning_rate": 5.750779640442332e-05, + "loss": 2.4958, + "step": 12832 + }, + { + "epoch": 1.0356710515696876, + "grad_norm": 0.7350767254829407, + "learning_rate": 5.749350616131556e-05, + "loss": 2.4192, + "step": 12833 + }, + { + "epoch": 1.0357517553062707, + "grad_norm": 0.7322222590446472, + "learning_rate": 5.7479216977580695e-05, + "loss": 2.4719, + "step": 12834 + }, + { + "epoch": 1.0358324590428536, + "grad_norm": 0.7233425974845886, + "learning_rate": 5.7464928853574904e-05, + "loss": 2.4707, + "step": 12835 + }, + { + "epoch": 1.0359131627794367, + "grad_norm": 0.7117420434951782, + "learning_rate": 5.745064178965427e-05, + "loss": 2.4463, + "step": 12836 + }, + { + "epoch": 1.0359938665160198, + "grad_norm": 0.7615050077438354, + "learning_rate": 5.743635578617486e-05, + "loss": 2.4256, + "step": 12837 + }, + { + "epoch": 1.0360745702526026, + "grad_norm": 0.7056093215942383, + "learning_rate": 5.7422070843492734e-05, + "loss": 2.4628, + "step": 12838 + }, + { + "epoch": 1.0361552739891857, + "grad_norm": 0.685989499092102, + "learning_rate": 5.740778696196389e-05, + "loss": 2.4271, + "step": 12839 + }, + { + "epoch": 1.0362359777257688, + "grad_norm": 0.7286686301231384, + "learning_rate": 5.739350414194439e-05, + "loss": 2.4984, + "step": 12840 + }, + { + "epoch": 1.0363166814623517, + "grad_norm": 0.6939802765846252, + "learning_rate": 5.737922238379009e-05, + "loss": 2.4601, + "step": 12841 + }, + { + "epoch": 1.0363973851989348, + "grad_norm": 0.7077060341835022, + "learning_rate": 5.736494168785698e-05, + "loss": 2.4264, + "step": 12842 + }, + { + "epoch": 1.0364780889355176, + "grad_norm": 0.667086124420166, + "learning_rate": 5.7350662054501016e-05, + "loss": 2.4733, + "step": 12843 + }, + { + "epoch": 1.0365587926721007, + "grad_norm": 0.6531338691711426, + "learning_rate": 5.7336383484078004e-05, + "loss": 2.4709, + "step": 12844 + }, + { + "epoch": 1.0366394964086838, + "grad_norm": 0.7141630053520203, + "learning_rate": 5.732210597694383e-05, + "loss": 2.4747, + "step": 12845 + }, + { + "epoch": 1.0367202001452667, + "grad_norm": 0.7186396718025208, + "learning_rate": 5.730782953345435e-05, + "loss": 2.4401, + "step": 12846 + }, + { + "epoch": 1.0368009038818498, + "grad_norm": 0.6709686517715454, + "learning_rate": 5.7293554153965345e-05, + "loss": 2.456, + "step": 12847 + }, + { + "epoch": 1.0368816076184326, + "grad_norm": 0.6867267489433289, + "learning_rate": 5.727927983883261e-05, + "loss": 2.4522, + "step": 12848 + }, + { + "epoch": 1.0369623113550157, + "grad_norm": 0.7016724348068237, + "learning_rate": 5.7265006588411926e-05, + "loss": 2.4348, + "step": 12849 + }, + { + "epoch": 1.0370430150915988, + "grad_norm": 0.6764764785766602, + "learning_rate": 5.725073440305896e-05, + "loss": 2.4241, + "step": 12850 + }, + { + "epoch": 1.0371237188281817, + "grad_norm": 0.6965062618255615, + "learning_rate": 5.7236463283129435e-05, + "loss": 2.4559, + "step": 12851 + }, + { + "epoch": 1.0372044225647647, + "grad_norm": 0.6878135800361633, + "learning_rate": 5.7222193228979037e-05, + "loss": 2.4874, + "step": 12852 + }, + { + "epoch": 1.0372851263013478, + "grad_norm": 0.6576557755470276, + "learning_rate": 5.720792424096344e-05, + "loss": 2.4273, + "step": 12853 + }, + { + "epoch": 1.0373658300379307, + "grad_norm": 0.7463123798370361, + "learning_rate": 5.719365631943818e-05, + "loss": 2.4933, + "step": 12854 + }, + { + "epoch": 1.0374465337745138, + "grad_norm": 0.6920896768569946, + "learning_rate": 5.7179389464758914e-05, + "loss": 2.4799, + "step": 12855 + }, + { + "epoch": 1.0375272375110969, + "grad_norm": 0.7330591082572937, + "learning_rate": 5.71651236772812e-05, + "loss": 2.469, + "step": 12856 + }, + { + "epoch": 1.0376079412476797, + "grad_norm": 0.6766076683998108, + "learning_rate": 5.715085895736057e-05, + "loss": 2.4787, + "step": 12857 + }, + { + "epoch": 1.0376886449842628, + "grad_norm": 0.724278450012207, + "learning_rate": 5.713659530535255e-05, + "loss": 2.4524, + "step": 12858 + }, + { + "epoch": 1.0377693487208457, + "grad_norm": 0.6816281676292419, + "learning_rate": 5.712233272161265e-05, + "loss": 2.4993, + "step": 12859 + }, + { + "epoch": 1.0378500524574288, + "grad_norm": 0.7186439633369446, + "learning_rate": 5.710807120649626e-05, + "loss": 2.4108, + "step": 12860 + }, + { + "epoch": 1.0379307561940119, + "grad_norm": 0.6616777181625366, + "learning_rate": 5.709381076035887e-05, + "loss": 2.4797, + "step": 12861 + }, + { + "epoch": 1.0380114599305947, + "grad_norm": 0.6956895589828491, + "learning_rate": 5.7079551383555906e-05, + "loss": 2.4017, + "step": 12862 + }, + { + "epoch": 1.0380921636671778, + "grad_norm": 0.6650584936141968, + "learning_rate": 5.706529307644268e-05, + "loss": 2.4808, + "step": 12863 + }, + { + "epoch": 1.0381728674037607, + "grad_norm": 0.6362698674201965, + "learning_rate": 5.705103583937458e-05, + "loss": 2.4077, + "step": 12864 + }, + { + "epoch": 1.0382535711403438, + "grad_norm": 0.6962565183639526, + "learning_rate": 5.703677967270697e-05, + "loss": 2.4715, + "step": 12865 + }, + { + "epoch": 1.0383342748769269, + "grad_norm": 0.6927294135093689, + "learning_rate": 5.702252457679509e-05, + "loss": 2.4983, + "step": 12866 + }, + { + "epoch": 1.0384149786135097, + "grad_norm": 0.7107497453689575, + "learning_rate": 5.70082705519942e-05, + "loss": 2.4198, + "step": 12867 + }, + { + "epoch": 1.0384956823500928, + "grad_norm": 0.6459221243858337, + "learning_rate": 5.6994017598659634e-05, + "loss": 2.4423, + "step": 12868 + }, + { + "epoch": 1.038576386086676, + "grad_norm": 0.705563485622406, + "learning_rate": 5.697976571714658e-05, + "loss": 2.5346, + "step": 12869 + }, + { + "epoch": 1.0386570898232588, + "grad_norm": 0.7424784898757935, + "learning_rate": 5.696551490781021e-05, + "loss": 2.4824, + "step": 12870 + }, + { + "epoch": 1.0387377935598419, + "grad_norm": 0.6820988059043884, + "learning_rate": 5.695126517100569e-05, + "loss": 2.4965, + "step": 12871 + }, + { + "epoch": 1.0388184972964247, + "grad_norm": 0.8209595680236816, + "learning_rate": 5.6937016507088225e-05, + "loss": 2.475, + "step": 12872 + }, + { + "epoch": 1.0388992010330078, + "grad_norm": 0.7407695055007935, + "learning_rate": 5.6922768916412815e-05, + "loss": 2.4683, + "step": 12873 + }, + { + "epoch": 1.038979904769591, + "grad_norm": 0.7335677742958069, + "learning_rate": 5.690852239933462e-05, + "loss": 2.4621, + "step": 12874 + }, + { + "epoch": 1.0390606085061738, + "grad_norm": 0.6731325387954712, + "learning_rate": 5.689427695620873e-05, + "loss": 2.4882, + "step": 12875 + }, + { + "epoch": 1.0391413122427569, + "grad_norm": 0.7256175875663757, + "learning_rate": 5.68800325873901e-05, + "loss": 2.4827, + "step": 12876 + }, + { + "epoch": 1.03922201597934, + "grad_norm": 0.711928129196167, + "learning_rate": 5.686578929323377e-05, + "loss": 2.4447, + "step": 12877 + }, + { + "epoch": 1.0393027197159228, + "grad_norm": 0.6445996165275574, + "learning_rate": 5.685154707409473e-05, + "loss": 2.453, + "step": 12878 + }, + { + "epoch": 1.039383423452506, + "grad_norm": 0.6656066179275513, + "learning_rate": 5.6837305930327923e-05, + "loss": 2.4863, + "step": 12879 + }, + { + "epoch": 1.0394641271890888, + "grad_norm": 0.6844663619995117, + "learning_rate": 5.682306586228828e-05, + "loss": 2.4524, + "step": 12880 + }, + { + "epoch": 1.0395448309256718, + "grad_norm": 0.6436383724212646, + "learning_rate": 5.6808826870330746e-05, + "loss": 2.4137, + "step": 12881 + }, + { + "epoch": 1.039625534662255, + "grad_norm": 0.6731196641921997, + "learning_rate": 5.6794588954810104e-05, + "loss": 2.4176, + "step": 12882 + }, + { + "epoch": 1.0397062383988378, + "grad_norm": 0.6994587779045105, + "learning_rate": 5.678035211608125e-05, + "loss": 2.4651, + "step": 12883 + }, + { + "epoch": 1.0397869421354209, + "grad_norm": 0.6912599205970764, + "learning_rate": 5.6766116354499e-05, + "loss": 2.3918, + "step": 12884 + }, + { + "epoch": 1.039867645872004, + "grad_norm": 0.7627033591270447, + "learning_rate": 5.6751881670418185e-05, + "loss": 2.4278, + "step": 12885 + }, + { + "epoch": 1.0399483496085868, + "grad_norm": 0.7107213139533997, + "learning_rate": 5.6737648064193485e-05, + "loss": 2.5249, + "step": 12886 + }, + { + "epoch": 1.04002905334517, + "grad_norm": 0.7254211902618408, + "learning_rate": 5.672341553617968e-05, + "loss": 2.4454, + "step": 12887 + }, + { + "epoch": 1.0401097570817528, + "grad_norm": 0.6776205897331238, + "learning_rate": 5.670918408673149e-05, + "loss": 2.4333, + "step": 12888 + }, + { + "epoch": 1.0401904608183359, + "grad_norm": 0.6824465394020081, + "learning_rate": 5.669495371620359e-05, + "loss": 2.427, + "step": 12889 + }, + { + "epoch": 1.040271164554919, + "grad_norm": 0.6633001565933228, + "learning_rate": 5.668072442495066e-05, + "loss": 2.4874, + "step": 12890 + }, + { + "epoch": 1.0403518682915018, + "grad_norm": 0.6655289530754089, + "learning_rate": 5.666649621332735e-05, + "loss": 2.5023, + "step": 12891 + }, + { + "epoch": 1.040432572028085, + "grad_norm": 0.6892853379249573, + "learning_rate": 5.665226908168818e-05, + "loss": 2.4505, + "step": 12892 + }, + { + "epoch": 1.040513275764668, + "grad_norm": 0.7154649496078491, + "learning_rate": 5.6638043030387774e-05, + "loss": 2.4916, + "step": 12893 + }, + { + "epoch": 1.0405939795012509, + "grad_norm": 0.6780592799186707, + "learning_rate": 5.662381805978074e-05, + "loss": 2.4116, + "step": 12894 + }, + { + "epoch": 1.040674683237834, + "grad_norm": 0.6737352013587952, + "learning_rate": 5.66095941702215e-05, + "loss": 2.3903, + "step": 12895 + }, + { + "epoch": 1.0407553869744168, + "grad_norm": 0.7623820304870605, + "learning_rate": 5.659537136206461e-05, + "loss": 2.4334, + "step": 12896 + }, + { + "epoch": 1.040836090711, + "grad_norm": 0.7043081521987915, + "learning_rate": 5.65811496356645e-05, + "loss": 2.4403, + "step": 12897 + }, + { + "epoch": 1.040916794447583, + "grad_norm": 0.6704873442649841, + "learning_rate": 5.6566928991375654e-05, + "loss": 2.4416, + "step": 12898 + }, + { + "epoch": 1.0409974981841659, + "grad_norm": 0.6556837558746338, + "learning_rate": 5.6552709429552474e-05, + "loss": 2.4904, + "step": 12899 + }, + { + "epoch": 1.041078201920749, + "grad_norm": 0.6926451325416565, + "learning_rate": 5.653849095054935e-05, + "loss": 2.4889, + "step": 12900 + }, + { + "epoch": 1.041158905657332, + "grad_norm": 0.6407613158226013, + "learning_rate": 5.6524273554720674e-05, + "loss": 2.3951, + "step": 12901 + }, + { + "epoch": 1.041239609393915, + "grad_norm": 0.7812615633010864, + "learning_rate": 5.651005724242071e-05, + "loss": 2.4535, + "step": 12902 + }, + { + "epoch": 1.041320313130498, + "grad_norm": 0.6868990659713745, + "learning_rate": 5.6495842014003796e-05, + "loss": 2.4373, + "step": 12903 + }, + { + "epoch": 1.0414010168670809, + "grad_norm": 0.6467776894569397, + "learning_rate": 5.648162786982427e-05, + "loss": 2.4929, + "step": 12904 + }, + { + "epoch": 1.041481720603664, + "grad_norm": 0.6588063836097717, + "learning_rate": 5.64674148102363e-05, + "loss": 2.4445, + "step": 12905 + }, + { + "epoch": 1.041562424340247, + "grad_norm": 0.6880654096603394, + "learning_rate": 5.6453202835594136e-05, + "loss": 2.4298, + "step": 12906 + }, + { + "epoch": 1.04164312807683, + "grad_norm": 0.7471407055854797, + "learning_rate": 5.6438991946251996e-05, + "loss": 2.4669, + "step": 12907 + }, + { + "epoch": 1.041723831813413, + "grad_norm": 0.7069533467292786, + "learning_rate": 5.6424782142564034e-05, + "loss": 2.4498, + "step": 12908 + }, + { + "epoch": 1.0418045355499959, + "grad_norm": 0.7013602256774902, + "learning_rate": 5.641057342488443e-05, + "loss": 2.4993, + "step": 12909 + }, + { + "epoch": 1.041885239286579, + "grad_norm": 0.6870697736740112, + "learning_rate": 5.6396365793567305e-05, + "loss": 2.5338, + "step": 12910 + }, + { + "epoch": 1.041965943023162, + "grad_norm": 0.6569130420684814, + "learning_rate": 5.638215924896669e-05, + "loss": 2.4538, + "step": 12911 + }, + { + "epoch": 1.042046646759745, + "grad_norm": 0.6900331377983093, + "learning_rate": 5.636795379143669e-05, + "loss": 2.4013, + "step": 12912 + }, + { + "epoch": 1.042127350496328, + "grad_norm": 0.6800071001052856, + "learning_rate": 5.635374942133136e-05, + "loss": 2.4733, + "step": 12913 + }, + { + "epoch": 1.042208054232911, + "grad_norm": 0.703601598739624, + "learning_rate": 5.6339546139004663e-05, + "loss": 2.432, + "step": 12914 + }, + { + "epoch": 1.042288757969494, + "grad_norm": 0.6781988739967346, + "learning_rate": 5.6325343944810594e-05, + "loss": 2.4418, + "step": 12915 + }, + { + "epoch": 1.042369461706077, + "grad_norm": 0.7247167825698853, + "learning_rate": 5.6311142839103125e-05, + "loss": 2.5133, + "step": 12916 + }, + { + "epoch": 1.04245016544266, + "grad_norm": 0.7738155126571655, + "learning_rate": 5.629694282223619e-05, + "loss": 2.5137, + "step": 12917 + }, + { + "epoch": 1.042530869179243, + "grad_norm": 0.74723219871521, + "learning_rate": 5.628274389456367e-05, + "loss": 2.3996, + "step": 12918 + }, + { + "epoch": 1.042611572915826, + "grad_norm": 0.7245466709136963, + "learning_rate": 5.6268546056439456e-05, + "loss": 2.4213, + "step": 12919 + }, + { + "epoch": 1.042692276652409, + "grad_norm": 0.6307608485221863, + "learning_rate": 5.625434930821742e-05, + "loss": 2.4195, + "step": 12920 + }, + { + "epoch": 1.042772980388992, + "grad_norm": 0.7138007879257202, + "learning_rate": 5.6240153650251326e-05, + "loss": 2.463, + "step": 12921 + }, + { + "epoch": 1.042853684125575, + "grad_norm": 0.779659628868103, + "learning_rate": 5.622595908289498e-05, + "loss": 2.4898, + "step": 12922 + }, + { + "epoch": 1.042934387862158, + "grad_norm": 0.7144278287887573, + "learning_rate": 5.621176560650221e-05, + "loss": 2.4083, + "step": 12923 + }, + { + "epoch": 1.043015091598741, + "grad_norm": 0.7724754214286804, + "learning_rate": 5.619757322142667e-05, + "loss": 2.3917, + "step": 12924 + }, + { + "epoch": 1.043095795335324, + "grad_norm": 0.7667245268821716, + "learning_rate": 5.618338192802208e-05, + "loss": 2.4943, + "step": 12925 + }, + { + "epoch": 1.043176499071907, + "grad_norm": 0.6528030037879944, + "learning_rate": 5.616919172664221e-05, + "loss": 2.4323, + "step": 12926 + }, + { + "epoch": 1.04325720280849, + "grad_norm": 0.6790263652801514, + "learning_rate": 5.6155002617640615e-05, + "loss": 2.4304, + "step": 12927 + }, + { + "epoch": 1.043337906545073, + "grad_norm": 0.7554369568824768, + "learning_rate": 5.614081460137097e-05, + "loss": 2.4637, + "step": 12928 + }, + { + "epoch": 1.043418610281656, + "grad_norm": 0.7126293182373047, + "learning_rate": 5.612662767818686e-05, + "loss": 2.4765, + "step": 12929 + }, + { + "epoch": 1.0434993140182391, + "grad_norm": 0.6705749034881592, + "learning_rate": 5.611244184844189e-05, + "loss": 2.4746, + "step": 12930 + }, + { + "epoch": 1.043580017754822, + "grad_norm": 0.6595145463943481, + "learning_rate": 5.609825711248958e-05, + "loss": 2.463, + "step": 12931 + }, + { + "epoch": 1.043660721491405, + "grad_norm": 0.6942049860954285, + "learning_rate": 5.6084073470683476e-05, + "loss": 2.5101, + "step": 12932 + }, + { + "epoch": 1.043741425227988, + "grad_norm": 0.7285810708999634, + "learning_rate": 5.6069890923377087e-05, + "loss": 2.467, + "step": 12933 + }, + { + "epoch": 1.043822128964571, + "grad_norm": 0.7702928185462952, + "learning_rate": 5.605570947092382e-05, + "loss": 2.4998, + "step": 12934 + }, + { + "epoch": 1.0439028327011541, + "grad_norm": 0.6631895899772644, + "learning_rate": 5.604152911367713e-05, + "loss": 2.4277, + "step": 12935 + }, + { + "epoch": 1.043983536437737, + "grad_norm": 0.6447882652282715, + "learning_rate": 5.6027349851990494e-05, + "loss": 2.4868, + "step": 12936 + }, + { + "epoch": 1.04406424017432, + "grad_norm": 0.695160448551178, + "learning_rate": 5.6013171686217205e-05, + "loss": 2.3917, + "step": 12937 + }, + { + "epoch": 1.0441449439109032, + "grad_norm": 0.6579271554946899, + "learning_rate": 5.5998994616710656e-05, + "loss": 2.4245, + "step": 12938 + }, + { + "epoch": 1.044225647647486, + "grad_norm": 0.7053574323654175, + "learning_rate": 5.598481864382419e-05, + "loss": 2.4809, + "step": 12939 + }, + { + "epoch": 1.0443063513840691, + "grad_norm": 0.7008736729621887, + "learning_rate": 5.5970643767911105e-05, + "loss": 2.4481, + "step": 12940 + }, + { + "epoch": 1.044387055120652, + "grad_norm": 0.6577918529510498, + "learning_rate": 5.5956469989324644e-05, + "loss": 2.4211, + "step": 12941 + }, + { + "epoch": 1.044467758857235, + "grad_norm": 0.6662739515304565, + "learning_rate": 5.594229730841815e-05, + "loss": 2.4607, + "step": 12942 + }, + { + "epoch": 1.0445484625938182, + "grad_norm": 0.6637060046195984, + "learning_rate": 5.592812572554471e-05, + "loss": 2.4388, + "step": 12943 + }, + { + "epoch": 1.044629166330401, + "grad_norm": 0.7282097935676575, + "learning_rate": 5.5913955241057605e-05, + "loss": 2.4536, + "step": 12944 + }, + { + "epoch": 1.0447098700669841, + "grad_norm": 0.6470810174942017, + "learning_rate": 5.589978585530997e-05, + "loss": 2.4032, + "step": 12945 + }, + { + "epoch": 1.0447905738035672, + "grad_norm": 0.6958881616592407, + "learning_rate": 5.588561756865498e-05, + "loss": 2.4577, + "step": 12946 + }, + { + "epoch": 1.04487127754015, + "grad_norm": 0.6999812722206116, + "learning_rate": 5.587145038144569e-05, + "loss": 2.454, + "step": 12947 + }, + { + "epoch": 1.0449519812767332, + "grad_norm": 0.6919988989830017, + "learning_rate": 5.58572842940352e-05, + "loss": 2.4505, + "step": 12948 + }, + { + "epoch": 1.045032685013316, + "grad_norm": 0.6813084483146667, + "learning_rate": 5.584311930677659e-05, + "loss": 2.4873, + "step": 12949 + }, + { + "epoch": 1.0451133887498991, + "grad_norm": 0.6587427854537964, + "learning_rate": 5.582895542002286e-05, + "loss": 2.4658, + "step": 12950 + }, + { + "epoch": 1.0451940924864822, + "grad_norm": 0.6942041516304016, + "learning_rate": 5.581479263412703e-05, + "loss": 2.47, + "step": 12951 + }, + { + "epoch": 1.045274796223065, + "grad_norm": 0.7330117225646973, + "learning_rate": 5.58006309494421e-05, + "loss": 2.4826, + "step": 12952 + }, + { + "epoch": 1.0453554999596482, + "grad_norm": 0.7197144031524658, + "learning_rate": 5.578647036632096e-05, + "loss": 2.4425, + "step": 12953 + }, + { + "epoch": 1.045436203696231, + "grad_norm": 0.7442573308944702, + "learning_rate": 5.577231088511654e-05, + "loss": 2.4946, + "step": 12954 + }, + { + "epoch": 1.0455169074328141, + "grad_norm": 0.7039753198623657, + "learning_rate": 5.575815250618179e-05, + "loss": 2.4188, + "step": 12955 + }, + { + "epoch": 1.0455976111693972, + "grad_norm": 0.7374606728553772, + "learning_rate": 5.574399522986951e-05, + "loss": 2.3916, + "step": 12956 + }, + { + "epoch": 1.04567831490598, + "grad_norm": 0.6358140707015991, + "learning_rate": 5.572983905653253e-05, + "loss": 2.4502, + "step": 12957 + }, + { + "epoch": 1.0457590186425632, + "grad_norm": 0.712858259677887, + "learning_rate": 5.5715683986523694e-05, + "loss": 2.4746, + "step": 12958 + }, + { + "epoch": 1.0458397223791462, + "grad_norm": 0.6757933497428894, + "learning_rate": 5.5701530020195756e-05, + "loss": 2.4836, + "step": 12959 + }, + { + "epoch": 1.045920426115729, + "grad_norm": 0.7509831786155701, + "learning_rate": 5.568737715790151e-05, + "loss": 2.4061, + "step": 12960 + }, + { + "epoch": 1.0460011298523122, + "grad_norm": 0.7120335102081299, + "learning_rate": 5.5673225399993646e-05, + "loss": 2.4772, + "step": 12961 + }, + { + "epoch": 1.046081833588895, + "grad_norm": 0.7213751673698425, + "learning_rate": 5.5659074746824924e-05, + "loss": 2.4637, + "step": 12962 + }, + { + "epoch": 1.0461625373254781, + "grad_norm": 0.7161290645599365, + "learning_rate": 5.5644925198747934e-05, + "loss": 2.4552, + "step": 12963 + }, + { + "epoch": 1.0462432410620612, + "grad_norm": 0.7303922772407532, + "learning_rate": 5.563077675611534e-05, + "loss": 2.5091, + "step": 12964 + }, + { + "epoch": 1.046323944798644, + "grad_norm": 0.7051636576652527, + "learning_rate": 5.561662941927981e-05, + "loss": 2.3717, + "step": 12965 + }, + { + "epoch": 1.0464046485352272, + "grad_norm": 0.6880733370780945, + "learning_rate": 5.5602483188593866e-05, + "loss": 2.4205, + "step": 12966 + }, + { + "epoch": 1.0464853522718103, + "grad_norm": 0.6942360401153564, + "learning_rate": 5.558833806441008e-05, + "loss": 2.4601, + "step": 12967 + }, + { + "epoch": 1.0465660560083931, + "grad_norm": 0.7264992594718933, + "learning_rate": 5.5574194047081016e-05, + "loss": 2.4612, + "step": 12968 + }, + { + "epoch": 1.0466467597449762, + "grad_norm": 0.7502472996711731, + "learning_rate": 5.5560051136959166e-05, + "loss": 2.4099, + "step": 12969 + }, + { + "epoch": 1.046727463481559, + "grad_norm": 0.691694438457489, + "learning_rate": 5.5545909334397004e-05, + "loss": 2.5071, + "step": 12970 + }, + { + "epoch": 1.0468081672181422, + "grad_norm": 0.7120653986930847, + "learning_rate": 5.5531768639747026e-05, + "loss": 2.4066, + "step": 12971 + }, + { + "epoch": 1.0468888709547253, + "grad_norm": 0.6501363515853882, + "learning_rate": 5.551762905336159e-05, + "loss": 2.4186, + "step": 12972 + }, + { + "epoch": 1.0469695746913081, + "grad_norm": 0.6924965977668762, + "learning_rate": 5.5503490575593095e-05, + "loss": 2.4864, + "step": 12973 + }, + { + "epoch": 1.0470502784278912, + "grad_norm": 0.6772900819778442, + "learning_rate": 5.548935320679398e-05, + "loss": 2.4101, + "step": 12974 + }, + { + "epoch": 1.0471309821644743, + "grad_norm": 0.6950967311859131, + "learning_rate": 5.54752169473165e-05, + "loss": 2.4893, + "step": 12975 + }, + { + "epoch": 1.0472116859010572, + "grad_norm": 0.6663516163825989, + "learning_rate": 5.5461081797512994e-05, + "loss": 2.4136, + "step": 12976 + }, + { + "epoch": 1.0472923896376403, + "grad_norm": 0.7337449789047241, + "learning_rate": 5.5446947757735754e-05, + "loss": 2.473, + "step": 12977 + }, + { + "epoch": 1.0473730933742231, + "grad_norm": 0.6808840036392212, + "learning_rate": 5.543281482833709e-05, + "loss": 2.4473, + "step": 12978 + }, + { + "epoch": 1.0474537971108062, + "grad_norm": 0.6472508907318115, + "learning_rate": 5.5418683009669124e-05, + "loss": 2.4077, + "step": 12979 + }, + { + "epoch": 1.0475345008473893, + "grad_norm": 0.6904192566871643, + "learning_rate": 5.540455230208409e-05, + "loss": 2.482, + "step": 12980 + }, + { + "epoch": 1.0476152045839722, + "grad_norm": 0.6781610250473022, + "learning_rate": 5.5390422705934264e-05, + "loss": 2.4458, + "step": 12981 + }, + { + "epoch": 1.0476959083205553, + "grad_norm": 0.7130050659179688, + "learning_rate": 5.5376294221571666e-05, + "loss": 2.5136, + "step": 12982 + }, + { + "epoch": 1.0477766120571383, + "grad_norm": 0.7727184891700745, + "learning_rate": 5.536216684934846e-05, + "loss": 2.5346, + "step": 12983 + }, + { + "epoch": 1.0478573157937212, + "grad_norm": 0.7177208662033081, + "learning_rate": 5.534804058961679e-05, + "loss": 2.4153, + "step": 12984 + }, + { + "epoch": 1.0479380195303043, + "grad_norm": 0.7333023548126221, + "learning_rate": 5.5333915442728634e-05, + "loss": 2.4171, + "step": 12985 + }, + { + "epoch": 1.0480187232668872, + "grad_norm": 0.658423125743866, + "learning_rate": 5.5319791409036046e-05, + "loss": 2.446, + "step": 12986 + }, + { + "epoch": 1.0480994270034703, + "grad_norm": 0.8305184841156006, + "learning_rate": 5.5305668488891114e-05, + "loss": 2.5026, + "step": 12987 + }, + { + "epoch": 1.0481801307400533, + "grad_norm": 0.7083305716514587, + "learning_rate": 5.52915466826457e-05, + "loss": 2.5366, + "step": 12988 + }, + { + "epoch": 1.0482608344766362, + "grad_norm": 0.7924454212188721, + "learning_rate": 5.5277425990651824e-05, + "loss": 2.528, + "step": 12989 + }, + { + "epoch": 1.0483415382132193, + "grad_norm": 0.633376955986023, + "learning_rate": 5.5263306413261384e-05, + "loss": 2.4442, + "step": 12990 + }, + { + "epoch": 1.0484222419498024, + "grad_norm": 0.7387240529060364, + "learning_rate": 5.5249187950826295e-05, + "loss": 2.4761, + "step": 12991 + }, + { + "epoch": 1.0485029456863852, + "grad_norm": 0.6796224117279053, + "learning_rate": 5.523507060369843e-05, + "loss": 2.4828, + "step": 12992 + }, + { + "epoch": 1.0485836494229683, + "grad_norm": 0.6925581097602844, + "learning_rate": 5.5220954372229604e-05, + "loss": 2.4861, + "step": 12993 + }, + { + "epoch": 1.0486643531595512, + "grad_norm": 0.6854318380355835, + "learning_rate": 5.5206839256771704e-05, + "loss": 2.473, + "step": 12994 + }, + { + "epoch": 1.0487450568961343, + "grad_norm": 0.706375241279602, + "learning_rate": 5.519272525767643e-05, + "loss": 2.4284, + "step": 12995 + }, + { + "epoch": 1.0488257606327174, + "grad_norm": 0.6917428374290466, + "learning_rate": 5.517861237529556e-05, + "loss": 2.4702, + "step": 12996 + }, + { + "epoch": 1.0489064643693002, + "grad_norm": 0.6903818845748901, + "learning_rate": 5.516450060998086e-05, + "loss": 2.4679, + "step": 12997 + }, + { + "epoch": 1.0489871681058833, + "grad_norm": 0.6403356194496155, + "learning_rate": 5.515038996208398e-05, + "loss": 2.396, + "step": 12998 + }, + { + "epoch": 1.0490678718424662, + "grad_norm": 0.6491792798042297, + "learning_rate": 5.513628043195662e-05, + "loss": 2.4543, + "step": 12999 + }, + { + "epoch": 1.0491485755790493, + "grad_norm": 0.687303900718689, + "learning_rate": 5.512217201995043e-05, + "loss": 2.4716, + "step": 13000 + }, + { + "epoch": 1.0491485755790493, + "eval_loss": 2.4177169799804688, + "eval_runtime": 763.9215, + "eval_samples_per_second": 3.43, + "eval_steps_per_second": 0.572, + "step": 13000 + }, + { + "epoch": 1.0492292793156324, + "grad_norm": 0.7020761370658875, + "learning_rate": 5.510806472641701e-05, + "loss": 2.3591, + "step": 13001 + }, + { + "epoch": 1.0493099830522152, + "grad_norm": 0.6978075504302979, + "learning_rate": 5.509395855170798e-05, + "loss": 2.4585, + "step": 13002 + }, + { + "epoch": 1.0493906867887983, + "grad_norm": 0.7327752113342285, + "learning_rate": 5.5079853496174925e-05, + "loss": 2.5265, + "step": 13003 + }, + { + "epoch": 1.0494713905253814, + "grad_norm": 0.7552505135536194, + "learning_rate": 5.50657495601693e-05, + "loss": 2.4821, + "step": 13004 + }, + { + "epoch": 1.0495520942619643, + "grad_norm": 0.7100770473480225, + "learning_rate": 5.5051646744042664e-05, + "loss": 2.4566, + "step": 13005 + }, + { + "epoch": 1.0496327979985474, + "grad_norm": 0.7008209824562073, + "learning_rate": 5.503754504814651e-05, + "loss": 2.4476, + "step": 13006 + }, + { + "epoch": 1.0497135017351304, + "grad_norm": 0.640724241733551, + "learning_rate": 5.502344447283223e-05, + "loss": 2.437, + "step": 13007 + }, + { + "epoch": 1.0497942054717133, + "grad_norm": 0.7064981460571289, + "learning_rate": 5.5009345018451297e-05, + "loss": 2.5129, + "step": 13008 + }, + { + "epoch": 1.0498749092082964, + "grad_norm": 0.6729782223701477, + "learning_rate": 5.49952466853551e-05, + "loss": 2.4867, + "step": 13009 + }, + { + "epoch": 1.0499556129448793, + "grad_norm": 0.7245302200317383, + "learning_rate": 5.4981149473894966e-05, + "loss": 2.4485, + "step": 13010 + }, + { + "epoch": 1.0500363166814624, + "grad_norm": 0.6686248779296875, + "learning_rate": 5.4967053384422294e-05, + "loss": 2.4314, + "step": 13011 + }, + { + "epoch": 1.0501170204180454, + "grad_norm": 0.6790863871574402, + "learning_rate": 5.495295841728836e-05, + "loss": 2.4847, + "step": 13012 + }, + { + "epoch": 1.0501977241546283, + "grad_norm": 0.6516931653022766, + "learning_rate": 5.49388645728445e-05, + "loss": 2.4306, + "step": 13013 + }, + { + "epoch": 1.0502784278912114, + "grad_norm": 0.6967600584030151, + "learning_rate": 5.492477185144189e-05, + "loss": 2.4942, + "step": 13014 + }, + { + "epoch": 1.0503591316277943, + "grad_norm": 0.696246325969696, + "learning_rate": 5.491068025343178e-05, + "loss": 2.4647, + "step": 13015 + }, + { + "epoch": 1.0504398353643774, + "grad_norm": 0.6962751150131226, + "learning_rate": 5.489658977916543e-05, + "loss": 2.5095, + "step": 13016 + }, + { + "epoch": 1.0505205391009604, + "grad_norm": 0.6982631087303162, + "learning_rate": 5.488250042899392e-05, + "loss": 2.4327, + "step": 13017 + }, + { + "epoch": 1.0506012428375433, + "grad_norm": 0.6932644844055176, + "learning_rate": 5.486841220326845e-05, + "loss": 2.4777, + "step": 13018 + }, + { + "epoch": 1.0506819465741264, + "grad_norm": 0.6923339366912842, + "learning_rate": 5.485432510234012e-05, + "loss": 2.4321, + "step": 13019 + }, + { + "epoch": 1.0507626503107095, + "grad_norm": 0.7445859313011169, + "learning_rate": 5.4840239126560015e-05, + "loss": 2.4425, + "step": 13020 + }, + { + "epoch": 1.0508433540472923, + "grad_norm": 0.7122324705123901, + "learning_rate": 5.48261542762792e-05, + "loss": 2.4545, + "step": 13021 + }, + { + "epoch": 1.0509240577838754, + "grad_norm": 0.734779417514801, + "learning_rate": 5.4812070551848736e-05, + "loss": 2.4764, + "step": 13022 + }, + { + "epoch": 1.0510047615204583, + "grad_norm": 0.6544109582901001, + "learning_rate": 5.4797987953619566e-05, + "loss": 2.4492, + "step": 13023 + }, + { + "epoch": 1.0510854652570414, + "grad_norm": 0.6366097331047058, + "learning_rate": 5.4783906481942704e-05, + "loss": 2.4695, + "step": 13024 + }, + { + "epoch": 1.0511661689936245, + "grad_norm": 0.6966270804405212, + "learning_rate": 5.476982613716908e-05, + "loss": 2.4505, + "step": 13025 + }, + { + "epoch": 1.0512468727302073, + "grad_norm": 0.7010120153427124, + "learning_rate": 5.4755746919649665e-05, + "loss": 2.4545, + "step": 13026 + }, + { + "epoch": 1.0513275764667904, + "grad_norm": 0.6704719662666321, + "learning_rate": 5.474166882973526e-05, + "loss": 2.3899, + "step": 13027 + }, + { + "epoch": 1.0514082802033735, + "grad_norm": 0.757152259349823, + "learning_rate": 5.472759186777679e-05, + "loss": 2.5112, + "step": 13028 + }, + { + "epoch": 1.0514889839399564, + "grad_norm": 0.6668868660926819, + "learning_rate": 5.471351603412509e-05, + "loss": 2.4797, + "step": 13029 + }, + { + "epoch": 1.0515696876765395, + "grad_norm": 0.7919496893882751, + "learning_rate": 5.4699441329130887e-05, + "loss": 2.4874, + "step": 13030 + }, + { + "epoch": 1.0516503914131223, + "grad_norm": 0.7595484852790833, + "learning_rate": 5.468536775314506e-05, + "loss": 2.4621, + "step": 13031 + }, + { + "epoch": 1.0517310951497054, + "grad_norm": 0.6575995683670044, + "learning_rate": 5.467129530651835e-05, + "loss": 2.4474, + "step": 13032 + }, + { + "epoch": 1.0518117988862885, + "grad_norm": 0.6817733645439148, + "learning_rate": 5.4657223989601425e-05, + "loss": 2.4329, + "step": 13033 + }, + { + "epoch": 1.0518925026228714, + "grad_norm": 0.722882091999054, + "learning_rate": 5.464315380274501e-05, + "loss": 2.4544, + "step": 13034 + }, + { + "epoch": 1.0519732063594545, + "grad_norm": 0.6957377791404724, + "learning_rate": 5.4629084746299796e-05, + "loss": 2.5669, + "step": 13035 + }, + { + "epoch": 1.0520539100960375, + "grad_norm": 0.6749420166015625, + "learning_rate": 5.461501682061636e-05, + "loss": 2.5053, + "step": 13036 + }, + { + "epoch": 1.0521346138326204, + "grad_norm": 0.8158369064331055, + "learning_rate": 5.4600950026045326e-05, + "loss": 2.429, + "step": 13037 + }, + { + "epoch": 1.0522153175692035, + "grad_norm": 0.6960736513137817, + "learning_rate": 5.458688436293735e-05, + "loss": 2.4731, + "step": 13038 + }, + { + "epoch": 1.0522960213057864, + "grad_norm": 0.6686301231384277, + "learning_rate": 5.457281983164287e-05, + "loss": 2.4495, + "step": 13039 + }, + { + "epoch": 1.0523767250423695, + "grad_norm": 0.6691476106643677, + "learning_rate": 5.455875643251248e-05, + "loss": 2.4329, + "step": 13040 + }, + { + "epoch": 1.0524574287789525, + "grad_norm": 0.7737297415733337, + "learning_rate": 5.454469416589666e-05, + "loss": 2.4664, + "step": 13041 + }, + { + "epoch": 1.0525381325155354, + "grad_norm": 0.7848188281059265, + "learning_rate": 5.453063303214588e-05, + "loss": 2.4799, + "step": 13042 + }, + { + "epoch": 1.0526188362521185, + "grad_norm": 0.7831119894981384, + "learning_rate": 5.45165730316106e-05, + "loss": 2.5076, + "step": 13043 + }, + { + "epoch": 1.0526995399887016, + "grad_norm": 0.691635012626648, + "learning_rate": 5.4502514164641196e-05, + "loss": 2.4866, + "step": 13044 + }, + { + "epoch": 1.0527802437252844, + "grad_norm": 0.6667110919952393, + "learning_rate": 5.4488456431588106e-05, + "loss": 2.4162, + "step": 13045 + }, + { + "epoch": 1.0528609474618675, + "grad_norm": 0.7201905846595764, + "learning_rate": 5.447439983280163e-05, + "loss": 2.498, + "step": 13046 + }, + { + "epoch": 1.0529416511984504, + "grad_norm": 0.8538106083869934, + "learning_rate": 5.44603443686321e-05, + "loss": 2.4477, + "step": 13047 + }, + { + "epoch": 1.0530223549350335, + "grad_norm": 0.6661962270736694, + "learning_rate": 5.444629003942987e-05, + "loss": 2.5253, + "step": 13048 + }, + { + "epoch": 1.0531030586716166, + "grad_norm": 0.7239834666252136, + "learning_rate": 5.4432236845545146e-05, + "loss": 2.4786, + "step": 13049 + }, + { + "epoch": 1.0531837624081994, + "grad_norm": 0.7328412532806396, + "learning_rate": 5.4418184787328186e-05, + "loss": 2.4841, + "step": 13050 + }, + { + "epoch": 1.0532644661447825, + "grad_norm": 0.6395559310913086, + "learning_rate": 5.440413386512922e-05, + "loss": 2.3544, + "step": 13051 + }, + { + "epoch": 1.0533451698813656, + "grad_norm": 0.6632471084594727, + "learning_rate": 5.43900840792984e-05, + "loss": 2.4753, + "step": 13052 + }, + { + "epoch": 1.0534258736179485, + "grad_norm": 0.7262828350067139, + "learning_rate": 5.4376035430185935e-05, + "loss": 2.4162, + "step": 13053 + }, + { + "epoch": 1.0535065773545316, + "grad_norm": 0.7897952198982239, + "learning_rate": 5.436198791814196e-05, + "loss": 2.4571, + "step": 13054 + }, + { + "epoch": 1.0535872810911144, + "grad_norm": 0.7281489372253418, + "learning_rate": 5.434794154351651e-05, + "loss": 2.4531, + "step": 13055 + }, + { + "epoch": 1.0536679848276975, + "grad_norm": 0.7322356700897217, + "learning_rate": 5.4333896306659694e-05, + "loss": 2.4102, + "step": 13056 + }, + { + "epoch": 1.0537486885642806, + "grad_norm": 0.7657945156097412, + "learning_rate": 5.4319852207921554e-05, + "loss": 2.4526, + "step": 13057 + }, + { + "epoch": 1.0538293923008635, + "grad_norm": 0.6732973456382751, + "learning_rate": 5.430580924765214e-05, + "loss": 2.4516, + "step": 13058 + }, + { + "epoch": 1.0539100960374466, + "grad_norm": 0.663398027420044, + "learning_rate": 5.429176742620137e-05, + "loss": 2.4437, + "step": 13059 + }, + { + "epoch": 1.0539907997740294, + "grad_norm": 0.6363258957862854, + "learning_rate": 5.4277726743919244e-05, + "loss": 2.414, + "step": 13060 + }, + { + "epoch": 1.0540715035106125, + "grad_norm": 0.6600647568702698, + "learning_rate": 5.426368720115568e-05, + "loss": 2.4319, + "step": 13061 + }, + { + "epoch": 1.0541522072471956, + "grad_norm": 0.6941983699798584, + "learning_rate": 5.4249648798260574e-05, + "loss": 2.5247, + "step": 13062 + }, + { + "epoch": 1.0542329109837785, + "grad_norm": 0.7419719099998474, + "learning_rate": 5.423561153558383e-05, + "loss": 2.5088, + "step": 13063 + }, + { + "epoch": 1.0543136147203616, + "grad_norm": 0.708073079586029, + "learning_rate": 5.4221575413475326e-05, + "loss": 2.4037, + "step": 13064 + }, + { + "epoch": 1.0543943184569446, + "grad_norm": 0.7081628441810608, + "learning_rate": 5.4207540432284764e-05, + "loss": 2.4556, + "step": 13065 + }, + { + "epoch": 1.0544750221935275, + "grad_norm": 0.7058689594268799, + "learning_rate": 5.419350659236201e-05, + "loss": 2.4244, + "step": 13066 + }, + { + "epoch": 1.0545557259301106, + "grad_norm": 0.6858707070350647, + "learning_rate": 5.417947389405684e-05, + "loss": 2.4431, + "step": 13067 + }, + { + "epoch": 1.0546364296666935, + "grad_norm": 0.6769983768463135, + "learning_rate": 5.416544233771893e-05, + "loss": 2.4257, + "step": 13068 + }, + { + "epoch": 1.0547171334032766, + "grad_norm": 0.7128089070320129, + "learning_rate": 5.4151411923698e-05, + "loss": 2.4558, + "step": 13069 + }, + { + "epoch": 1.0547978371398596, + "grad_norm": 0.6419198513031006, + "learning_rate": 5.413738265234374e-05, + "loss": 2.4421, + "step": 13070 + }, + { + "epoch": 1.0548785408764425, + "grad_norm": 0.760848879814148, + "learning_rate": 5.4123354524005784e-05, + "loss": 2.4427, + "step": 13071 + }, + { + "epoch": 1.0549592446130256, + "grad_norm": 0.6749173998832703, + "learning_rate": 5.410932753903377e-05, + "loss": 2.4902, + "step": 13072 + }, + { + "epoch": 1.0550399483496087, + "grad_norm": 0.6908800601959229, + "learning_rate": 5.4095301697777265e-05, + "loss": 2.4219, + "step": 13073 + }, + { + "epoch": 1.0551206520861915, + "grad_norm": 0.6779965758323669, + "learning_rate": 5.408127700058587e-05, + "loss": 2.4533, + "step": 13074 + }, + { + "epoch": 1.0552013558227746, + "grad_norm": 0.6832355260848999, + "learning_rate": 5.406725344780906e-05, + "loss": 2.418, + "step": 13075 + }, + { + "epoch": 1.0552820595593575, + "grad_norm": 0.6766698956489563, + "learning_rate": 5.4053231039796357e-05, + "loss": 2.4493, + "step": 13076 + }, + { + "epoch": 1.0553627632959406, + "grad_norm": 0.7256276607513428, + "learning_rate": 5.4039209776897285e-05, + "loss": 2.4126, + "step": 13077 + }, + { + "epoch": 1.0554434670325237, + "grad_norm": 0.6687275171279907, + "learning_rate": 5.4025189659461196e-05, + "loss": 2.435, + "step": 13078 + }, + { + "epoch": 1.0555241707691065, + "grad_norm": 0.6800444722175598, + "learning_rate": 5.401117068783758e-05, + "loss": 2.4608, + "step": 13079 + }, + { + "epoch": 1.0556048745056896, + "grad_norm": 0.6947116851806641, + "learning_rate": 5.399715286237583e-05, + "loss": 2.4908, + "step": 13080 + }, + { + "epoch": 1.0556855782422727, + "grad_norm": 0.6907915472984314, + "learning_rate": 5.398313618342521e-05, + "loss": 2.4805, + "step": 13081 + }, + { + "epoch": 1.0557662819788556, + "grad_norm": 0.7429100275039673, + "learning_rate": 5.396912065133516e-05, + "loss": 2.458, + "step": 13082 + }, + { + "epoch": 1.0558469857154387, + "grad_norm": 0.7186924815177917, + "learning_rate": 5.3955106266454994e-05, + "loss": 2.4924, + "step": 13083 + }, + { + "epoch": 1.0559276894520215, + "grad_norm": 0.7017999887466431, + "learning_rate": 5.394109302913391e-05, + "loss": 2.4103, + "step": 13084 + }, + { + "epoch": 1.0560083931886046, + "grad_norm": 0.7318955659866333, + "learning_rate": 5.392708093972117e-05, + "loss": 2.4424, + "step": 13085 + }, + { + "epoch": 1.0560890969251877, + "grad_norm": 0.6278600692749023, + "learning_rate": 5.391306999856602e-05, + "loss": 2.4433, + "step": 13086 + }, + { + "epoch": 1.0561698006617706, + "grad_norm": 0.6895800232887268, + "learning_rate": 5.389906020601767e-05, + "loss": 2.4275, + "step": 13087 + }, + { + "epoch": 1.0562505043983537, + "grad_norm": 0.7197345495223999, + "learning_rate": 5.388505156242522e-05, + "loss": 2.4309, + "step": 13088 + }, + { + "epoch": 1.0563312081349367, + "grad_norm": 0.636433482170105, + "learning_rate": 5.3871044068137824e-05, + "loss": 2.4258, + "step": 13089 + }, + { + "epoch": 1.0564119118715196, + "grad_norm": 0.6884748339653015, + "learning_rate": 5.3857037723504634e-05, + "loss": 2.4543, + "step": 13090 + }, + { + "epoch": 1.0564926156081027, + "grad_norm": 0.7277036309242249, + "learning_rate": 5.384303252887464e-05, + "loss": 2.4911, + "step": 13091 + }, + { + "epoch": 1.0565733193446856, + "grad_norm": 0.6940809488296509, + "learning_rate": 5.38290284845969e-05, + "loss": 2.4112, + "step": 13092 + }, + { + "epoch": 1.0566540230812687, + "grad_norm": 0.6729177236557007, + "learning_rate": 5.3815025591020526e-05, + "loss": 2.4394, + "step": 13093 + }, + { + "epoch": 1.0567347268178517, + "grad_norm": 0.6941854357719421, + "learning_rate": 5.3801023848494416e-05, + "loss": 2.4263, + "step": 13094 + }, + { + "epoch": 1.0568154305544346, + "grad_norm": 0.7046812772750854, + "learning_rate": 5.3787023257367554e-05, + "loss": 2.5196, + "step": 13095 + }, + { + "epoch": 1.0568961342910177, + "grad_norm": 0.6896177530288696, + "learning_rate": 5.377302381798891e-05, + "loss": 2.4178, + "step": 13096 + }, + { + "epoch": 1.0569768380276008, + "grad_norm": 0.6693699955940247, + "learning_rate": 5.375902553070731e-05, + "loss": 2.4908, + "step": 13097 + }, + { + "epoch": 1.0570575417641837, + "grad_norm": 0.6751677989959717, + "learning_rate": 5.3745028395871674e-05, + "loss": 2.4222, + "step": 13098 + }, + { + "epoch": 1.0571382455007667, + "grad_norm": 0.7666265368461609, + "learning_rate": 5.373103241383088e-05, + "loss": 2.4965, + "step": 13099 + }, + { + "epoch": 1.0572189492373496, + "grad_norm": 0.8069329857826233, + "learning_rate": 5.3717037584933674e-05, + "loss": 2.4988, + "step": 13100 + }, + { + "epoch": 1.0572996529739327, + "grad_norm": 0.7160749435424805, + "learning_rate": 5.370304390952887e-05, + "loss": 2.4311, + "step": 13101 + }, + { + "epoch": 1.0573803567105158, + "grad_norm": 0.6936448812484741, + "learning_rate": 5.368905138796523e-05, + "loss": 2.4877, + "step": 13102 + }, + { + "epoch": 1.0574610604470986, + "grad_norm": 0.7202793955802917, + "learning_rate": 5.3675060020591494e-05, + "loss": 2.4841, + "step": 13103 + }, + { + "epoch": 1.0575417641836817, + "grad_norm": 0.7750168442726135, + "learning_rate": 5.366106980775636e-05, + "loss": 2.4828, + "step": 13104 + }, + { + "epoch": 1.0576224679202646, + "grad_norm": 0.7079972624778748, + "learning_rate": 5.364708074980849e-05, + "loss": 2.4912, + "step": 13105 + }, + { + "epoch": 1.0577031716568477, + "grad_norm": 0.704066276550293, + "learning_rate": 5.363309284709657e-05, + "loss": 2.4731, + "step": 13106 + }, + { + "epoch": 1.0577838753934308, + "grad_norm": 0.7040490508079529, + "learning_rate": 5.361910609996915e-05, + "loss": 2.3811, + "step": 13107 + }, + { + "epoch": 1.0578645791300136, + "grad_norm": 0.6669453978538513, + "learning_rate": 5.360512050877484e-05, + "loss": 2.5372, + "step": 13108 + }, + { + "epoch": 1.0579452828665967, + "grad_norm": 0.7197996973991394, + "learning_rate": 5.359113607386226e-05, + "loss": 2.4612, + "step": 13109 + }, + { + "epoch": 1.0580259866031798, + "grad_norm": 0.7192320823669434, + "learning_rate": 5.3577152795579824e-05, + "loss": 2.4636, + "step": 13110 + }, + { + "epoch": 1.0581066903397627, + "grad_norm": 0.6907937526702881, + "learning_rate": 5.35631706742761e-05, + "loss": 2.4791, + "step": 13111 + }, + { + "epoch": 1.0581873940763458, + "grad_norm": 0.687035083770752, + "learning_rate": 5.354918971029954e-05, + "loss": 2.4706, + "step": 13112 + }, + { + "epoch": 1.0582680978129286, + "grad_norm": 0.6666533350944519, + "learning_rate": 5.353520990399861e-05, + "loss": 2.4789, + "step": 13113 + }, + { + "epoch": 1.0583488015495117, + "grad_norm": 0.6261809468269348, + "learning_rate": 5.35212312557217e-05, + "loss": 2.4485, + "step": 13114 + }, + { + "epoch": 1.0584295052860948, + "grad_norm": 0.6740814447402954, + "learning_rate": 5.350725376581725e-05, + "loss": 2.47, + "step": 13115 + }, + { + "epoch": 1.0585102090226777, + "grad_norm": 0.7634154558181763, + "learning_rate": 5.3493277434633526e-05, + "loss": 2.4685, + "step": 13116 + }, + { + "epoch": 1.0585909127592608, + "grad_norm": 0.6674611568450928, + "learning_rate": 5.34793022625189e-05, + "loss": 2.4362, + "step": 13117 + }, + { + "epoch": 1.0586716164958438, + "grad_norm": 0.7584757804870605, + "learning_rate": 5.346532824982167e-05, + "loss": 2.499, + "step": 13118 + }, + { + "epoch": 1.0587523202324267, + "grad_norm": 0.6453456282615662, + "learning_rate": 5.345135539689015e-05, + "loss": 2.4341, + "step": 13119 + }, + { + "epoch": 1.0588330239690098, + "grad_norm": 0.70013427734375, + "learning_rate": 5.343738370407247e-05, + "loss": 2.3448, + "step": 13120 + }, + { + "epoch": 1.0589137277055927, + "grad_norm": 0.6763362884521484, + "learning_rate": 5.342341317171693e-05, + "loss": 2.4234, + "step": 13121 + }, + { + "epoch": 1.0589944314421758, + "grad_norm": 0.6896576881408691, + "learning_rate": 5.3409443800171664e-05, + "loss": 2.4753, + "step": 13122 + }, + { + "epoch": 1.0590751351787588, + "grad_norm": 0.6984997987747192, + "learning_rate": 5.339547558978486e-05, + "loss": 2.4581, + "step": 13123 + }, + { + "epoch": 1.0591558389153417, + "grad_norm": 0.7276118993759155, + "learning_rate": 5.338150854090462e-05, + "loss": 2.4765, + "step": 13124 + }, + { + "epoch": 1.0592365426519248, + "grad_norm": 0.6943252086639404, + "learning_rate": 5.336754265387911e-05, + "loss": 2.4514, + "step": 13125 + }, + { + "epoch": 1.0593172463885079, + "grad_norm": 0.7070014476776123, + "learning_rate": 5.335357792905628e-05, + "loss": 2.4365, + "step": 13126 + }, + { + "epoch": 1.0593979501250907, + "grad_norm": 0.6887189149856567, + "learning_rate": 5.333961436678422e-05, + "loss": 2.4834, + "step": 13127 + }, + { + "epoch": 1.0594786538616738, + "grad_norm": 0.8150162696838379, + "learning_rate": 5.332565196741098e-05, + "loss": 2.4474, + "step": 13128 + }, + { + "epoch": 1.0595593575982567, + "grad_norm": 0.6681316494941711, + "learning_rate": 5.331169073128447e-05, + "loss": 2.4888, + "step": 13129 + }, + { + "epoch": 1.0596400613348398, + "grad_norm": 0.6696690320968628, + "learning_rate": 5.329773065875267e-05, + "loss": 2.3874, + "step": 13130 + }, + { + "epoch": 1.0597207650714229, + "grad_norm": 0.729807436466217, + "learning_rate": 5.32837717501635e-05, + "loss": 2.4442, + "step": 13131 + }, + { + "epoch": 1.0598014688080057, + "grad_norm": 0.6959047913551331, + "learning_rate": 5.326981400586486e-05, + "loss": 2.4697, + "step": 13132 + }, + { + "epoch": 1.0598821725445888, + "grad_norm": 0.667294442653656, + "learning_rate": 5.3255857426204606e-05, + "loss": 2.3986, + "step": 13133 + }, + { + "epoch": 1.059962876281172, + "grad_norm": 0.6953842639923096, + "learning_rate": 5.3241902011530566e-05, + "loss": 2.396, + "step": 13134 + }, + { + "epoch": 1.0600435800177548, + "grad_norm": 0.6544597148895264, + "learning_rate": 5.32279477621906e-05, + "loss": 2.426, + "step": 13135 + }, + { + "epoch": 1.0601242837543379, + "grad_norm": 0.708017885684967, + "learning_rate": 5.321399467853241e-05, + "loss": 2.4931, + "step": 13136 + }, + { + "epoch": 1.0602049874909207, + "grad_norm": 0.6669809818267822, + "learning_rate": 5.3200042760903764e-05, + "loss": 2.4354, + "step": 13137 + }, + { + "epoch": 1.0602856912275038, + "grad_norm": 1.0144098997116089, + "learning_rate": 5.3186092009652435e-05, + "loss": 2.4803, + "step": 13138 + }, + { + "epoch": 1.060366394964087, + "grad_norm": 0.7213768362998962, + "learning_rate": 5.317214242512601e-05, + "loss": 2.4318, + "step": 13139 + }, + { + "epoch": 1.0604470987006698, + "grad_norm": 0.6429069638252258, + "learning_rate": 5.315819400767223e-05, + "loss": 2.458, + "step": 13140 + }, + { + "epoch": 1.0605278024372529, + "grad_norm": 0.6480485796928406, + "learning_rate": 5.3144246757638714e-05, + "loss": 2.4586, + "step": 13141 + }, + { + "epoch": 1.060608506173836, + "grad_norm": 0.7037697434425354, + "learning_rate": 5.3130300675373035e-05, + "loss": 2.4698, + "step": 13142 + }, + { + "epoch": 1.0606892099104188, + "grad_norm": 0.7307559251785278, + "learning_rate": 5.3116355761222725e-05, + "loss": 2.4027, + "step": 13143 + }, + { + "epoch": 1.060769913647002, + "grad_norm": 0.6684615612030029, + "learning_rate": 5.310241201553547e-05, + "loss": 2.478, + "step": 13144 + }, + { + "epoch": 1.0608506173835848, + "grad_norm": 0.7018016576766968, + "learning_rate": 5.308846943865866e-05, + "loss": 2.4229, + "step": 13145 + }, + { + "epoch": 1.0609313211201679, + "grad_norm": 0.7538621425628662, + "learning_rate": 5.307452803093982e-05, + "loss": 2.5201, + "step": 13146 + }, + { + "epoch": 1.061012024856751, + "grad_norm": 0.6957963109016418, + "learning_rate": 5.306058779272645e-05, + "loss": 2.4233, + "step": 13147 + }, + { + "epoch": 1.0610927285933338, + "grad_norm": 0.6280590295791626, + "learning_rate": 5.304664872436588e-05, + "loss": 2.5117, + "step": 13148 + }, + { + "epoch": 1.061173432329917, + "grad_norm": 0.6937280297279358, + "learning_rate": 5.3032710826205564e-05, + "loss": 2.4889, + "step": 13149 + }, + { + "epoch": 1.0612541360664998, + "grad_norm": 0.6750391125679016, + "learning_rate": 5.3018774098592884e-05, + "loss": 2.4472, + "step": 13150 + }, + { + "epoch": 1.0613348398030829, + "grad_norm": 0.6931902766227722, + "learning_rate": 5.300483854187519e-05, + "loss": 2.3883, + "step": 13151 + }, + { + "epoch": 1.061415543539666, + "grad_norm": 0.6982774138450623, + "learning_rate": 5.2990904156399726e-05, + "loss": 2.4688, + "step": 13152 + }, + { + "epoch": 1.0614962472762488, + "grad_norm": 0.6873522996902466, + "learning_rate": 5.297697094251382e-05, + "loss": 2.4818, + "step": 13153 + }, + { + "epoch": 1.061576951012832, + "grad_norm": 0.635377049446106, + "learning_rate": 5.296303890056471e-05, + "loss": 2.3906, + "step": 13154 + }, + { + "epoch": 1.061657654749415, + "grad_norm": 0.6368159651756287, + "learning_rate": 5.294910803089963e-05, + "loss": 2.4714, + "step": 13155 + }, + { + "epoch": 1.0617383584859978, + "grad_norm": 0.7147238254547119, + "learning_rate": 5.293517833386576e-05, + "loss": 2.4746, + "step": 13156 + }, + { + "epoch": 1.061819062222581, + "grad_norm": 0.742189884185791, + "learning_rate": 5.2921249809810326e-05, + "loss": 2.3913, + "step": 13157 + }, + { + "epoch": 1.061899765959164, + "grad_norm": 0.6665734648704529, + "learning_rate": 5.290732245908038e-05, + "loss": 2.4263, + "step": 13158 + }, + { + "epoch": 1.0619804696957469, + "grad_norm": 0.6894757747650146, + "learning_rate": 5.2893396282023055e-05, + "loss": 2.4204, + "step": 13159 + }, + { + "epoch": 1.06206117343233, + "grad_norm": 0.6394561529159546, + "learning_rate": 5.287947127898546e-05, + "loss": 2.4183, + "step": 13160 + }, + { + "epoch": 1.0621418771689128, + "grad_norm": 0.7422548532485962, + "learning_rate": 5.2865547450314576e-05, + "loss": 2.4454, + "step": 13161 + }, + { + "epoch": 1.062222580905496, + "grad_norm": 0.7486133575439453, + "learning_rate": 5.285162479635748e-05, + "loss": 2.4856, + "step": 13162 + }, + { + "epoch": 1.062303284642079, + "grad_norm": 0.6743031144142151, + "learning_rate": 5.283770331746112e-05, + "loss": 2.4318, + "step": 13163 + }, + { + "epoch": 1.0623839883786619, + "grad_norm": 0.6461686491966248, + "learning_rate": 5.282378301397248e-05, + "loss": 2.4133, + "step": 13164 + }, + { + "epoch": 1.062464692115245, + "grad_norm": 0.6745431423187256, + "learning_rate": 5.28098638862385e-05, + "loss": 2.4463, + "step": 13165 + }, + { + "epoch": 1.0625453958518278, + "grad_norm": 0.6646310687065125, + "learning_rate": 5.279594593460606e-05, + "loss": 2.4211, + "step": 13166 + }, + { + "epoch": 1.062626099588411, + "grad_norm": 0.6789249777793884, + "learning_rate": 5.278202915942207e-05, + "loss": 2.4832, + "step": 13167 + }, + { + "epoch": 1.062706803324994, + "grad_norm": 0.7082679867744446, + "learning_rate": 5.2768113561033326e-05, + "loss": 2.4303, + "step": 13168 + }, + { + "epoch": 1.0627875070615769, + "grad_norm": 0.6875587701797485, + "learning_rate": 5.275419913978664e-05, + "loss": 2.4601, + "step": 13169 + }, + { + "epoch": 1.06286821079816, + "grad_norm": 0.6556203961372375, + "learning_rate": 5.274028589602886e-05, + "loss": 2.4359, + "step": 13170 + }, + { + "epoch": 1.062948914534743, + "grad_norm": 0.7280015349388123, + "learning_rate": 5.272637383010666e-05, + "loss": 2.4999, + "step": 13171 + }, + { + "epoch": 1.063029618271326, + "grad_norm": 0.664654016494751, + "learning_rate": 5.271246294236678e-05, + "loss": 2.3951, + "step": 13172 + }, + { + "epoch": 1.063110322007909, + "grad_norm": 0.6941719055175781, + "learning_rate": 5.2698553233155945e-05, + "loss": 2.45, + "step": 13173 + }, + { + "epoch": 1.0631910257444919, + "grad_norm": 0.7212931513786316, + "learning_rate": 5.268464470282082e-05, + "loss": 2.4615, + "step": 13174 + }, + { + "epoch": 1.063271729481075, + "grad_norm": 0.6877106428146362, + "learning_rate": 5.2670737351708014e-05, + "loss": 2.4495, + "step": 13175 + }, + { + "epoch": 1.063352433217658, + "grad_norm": 0.737718939781189, + "learning_rate": 5.26568311801642e-05, + "loss": 2.4971, + "step": 13176 + }, + { + "epoch": 1.063433136954241, + "grad_norm": 0.6909129619598389, + "learning_rate": 5.264292618853587e-05, + "loss": 2.4889, + "step": 13177 + }, + { + "epoch": 1.063513840690824, + "grad_norm": 0.6750304102897644, + "learning_rate": 5.262902237716961e-05, + "loss": 2.4779, + "step": 13178 + }, + { + "epoch": 1.063594544427407, + "grad_norm": 0.7256019115447998, + "learning_rate": 5.2615119746411954e-05, + "loss": 2.4904, + "step": 13179 + }, + { + "epoch": 1.06367524816399, + "grad_norm": 0.7335983514785767, + "learning_rate": 5.26012182966094e-05, + "loss": 2.4357, + "step": 13180 + }, + { + "epoch": 1.063755951900573, + "grad_norm": 0.6534200310707092, + "learning_rate": 5.258731802810837e-05, + "loss": 2.4213, + "step": 13181 + }, + { + "epoch": 1.063836655637156, + "grad_norm": 0.6899768114089966, + "learning_rate": 5.257341894125529e-05, + "loss": 2.4963, + "step": 13182 + }, + { + "epoch": 1.063917359373739, + "grad_norm": 0.7016159892082214, + "learning_rate": 5.25595210363966e-05, + "loss": 2.4583, + "step": 13183 + }, + { + "epoch": 1.063998063110322, + "grad_norm": 0.6868152022361755, + "learning_rate": 5.2545624313878636e-05, + "loss": 2.4523, + "step": 13184 + }, + { + "epoch": 1.064078766846905, + "grad_norm": 0.7442622184753418, + "learning_rate": 5.2531728774047785e-05, + "loss": 2.425, + "step": 13185 + }, + { + "epoch": 1.064159470583488, + "grad_norm": 0.6900869011878967, + "learning_rate": 5.251783441725037e-05, + "loss": 2.459, + "step": 13186 + }, + { + "epoch": 1.0642401743200711, + "grad_norm": 0.6910288333892822, + "learning_rate": 5.25039412438326e-05, + "loss": 2.4882, + "step": 13187 + }, + { + "epoch": 1.064320878056654, + "grad_norm": 0.7644359469413757, + "learning_rate": 5.249004925414076e-05, + "loss": 2.4663, + "step": 13188 + }, + { + "epoch": 1.064401581793237, + "grad_norm": 0.6703082919120789, + "learning_rate": 5.247615844852114e-05, + "loss": 2.4309, + "step": 13189 + }, + { + "epoch": 1.06448228552982, + "grad_norm": 0.6449835896492004, + "learning_rate": 5.246226882731983e-05, + "loss": 2.4307, + "step": 13190 + }, + { + "epoch": 1.064562989266403, + "grad_norm": 0.7332713603973389, + "learning_rate": 5.244838039088305e-05, + "loss": 2.3763, + "step": 13191 + }, + { + "epoch": 1.0646436930029861, + "grad_norm": 0.7626641988754272, + "learning_rate": 5.2434493139556974e-05, + "loss": 2.4167, + "step": 13192 + }, + { + "epoch": 1.064724396739569, + "grad_norm": 0.6924002170562744, + "learning_rate": 5.2420607073687614e-05, + "loss": 2.4751, + "step": 13193 + }, + { + "epoch": 1.064805100476152, + "grad_norm": 0.6815003156661987, + "learning_rate": 5.2406722193621074e-05, + "loss": 2.4731, + "step": 13194 + }, + { + "epoch": 1.064885804212735, + "grad_norm": 0.7632609009742737, + "learning_rate": 5.239283849970347e-05, + "loss": 2.4562, + "step": 13195 + }, + { + "epoch": 1.064966507949318, + "grad_norm": 0.7157592177391052, + "learning_rate": 5.23789559922808e-05, + "loss": 2.4507, + "step": 13196 + }, + { + "epoch": 1.065047211685901, + "grad_norm": 0.7035543918609619, + "learning_rate": 5.2365074671699e-05, + "loss": 2.4616, + "step": 13197 + }, + { + "epoch": 1.065127915422484, + "grad_norm": 0.7566644549369812, + "learning_rate": 5.235119453830406e-05, + "loss": 2.4751, + "step": 13198 + }, + { + "epoch": 1.065208619159067, + "grad_norm": 0.7030916213989258, + "learning_rate": 5.233731559244194e-05, + "loss": 2.381, + "step": 13199 + }, + { + "epoch": 1.0652893228956501, + "grad_norm": 0.7663755416870117, + "learning_rate": 5.232343783445847e-05, + "loss": 2.4822, + "step": 13200 + }, + { + "epoch": 1.065370026632233, + "grad_norm": 0.717767596244812, + "learning_rate": 5.230956126469955e-05, + "loss": 2.4807, + "step": 13201 + }, + { + "epoch": 1.065450730368816, + "grad_norm": 0.6920818090438843, + "learning_rate": 5.229568588351108e-05, + "loss": 2.4643, + "step": 13202 + }, + { + "epoch": 1.0655314341053992, + "grad_norm": 0.6812553405761719, + "learning_rate": 5.228181169123877e-05, + "loss": 2.4443, + "step": 13203 + }, + { + "epoch": 1.065612137841982, + "grad_norm": 0.7241889834403992, + "learning_rate": 5.226793868822846e-05, + "loss": 2.4581, + "step": 13204 + }, + { + "epoch": 1.0656928415785651, + "grad_norm": 0.7254642248153687, + "learning_rate": 5.225406687482588e-05, + "loss": 2.4999, + "step": 13205 + }, + { + "epoch": 1.065773545315148, + "grad_norm": 0.7316950559616089, + "learning_rate": 5.2240196251376764e-05, + "loss": 2.4493, + "step": 13206 + }, + { + "epoch": 1.065854249051731, + "grad_norm": 0.7208307385444641, + "learning_rate": 5.22263268182268e-05, + "loss": 2.5083, + "step": 13207 + }, + { + "epoch": 1.0659349527883142, + "grad_norm": 0.6552214622497559, + "learning_rate": 5.22124585757217e-05, + "loss": 2.4662, + "step": 13208 + }, + { + "epoch": 1.066015656524897, + "grad_norm": 0.7949681878089905, + "learning_rate": 5.219859152420701e-05, + "loss": 2.4584, + "step": 13209 + }, + { + "epoch": 1.0660963602614801, + "grad_norm": 0.7012154459953308, + "learning_rate": 5.2184725664028366e-05, + "loss": 2.4702, + "step": 13210 + }, + { + "epoch": 1.066177063998063, + "grad_norm": 0.7431927919387817, + "learning_rate": 5.217086099553136e-05, + "loss": 2.4422, + "step": 13211 + }, + { + "epoch": 1.066257767734646, + "grad_norm": 0.7235366702079773, + "learning_rate": 5.2156997519061554e-05, + "loss": 2.4173, + "step": 13212 + }, + { + "epoch": 1.0663384714712292, + "grad_norm": 0.7475029826164246, + "learning_rate": 5.214313523496439e-05, + "loss": 2.4924, + "step": 13213 + }, + { + "epoch": 1.066419175207812, + "grad_norm": 0.6326786875724792, + "learning_rate": 5.212927414358542e-05, + "loss": 2.4154, + "step": 13214 + }, + { + "epoch": 1.0664998789443951, + "grad_norm": 0.6755837798118591, + "learning_rate": 5.211541424527004e-05, + "loss": 2.4248, + "step": 13215 + }, + { + "epoch": 1.0665805826809782, + "grad_norm": 0.645395040512085, + "learning_rate": 5.210155554036373e-05, + "loss": 2.4078, + "step": 13216 + }, + { + "epoch": 1.066661286417561, + "grad_norm": 0.799913763999939, + "learning_rate": 5.208769802921185e-05, + "loss": 2.5067, + "step": 13217 + }, + { + "epoch": 1.0667419901541442, + "grad_norm": 0.7056344747543335, + "learning_rate": 5.207384171215983e-05, + "loss": 2.4817, + "step": 13218 + }, + { + "epoch": 1.0668226938907273, + "grad_norm": 0.7082187533378601, + "learning_rate": 5.205998658955291e-05, + "loss": 2.4495, + "step": 13219 + }, + { + "epoch": 1.0669033976273101, + "grad_norm": 0.6948464512825012, + "learning_rate": 5.204613266173646e-05, + "loss": 2.4584, + "step": 13220 + }, + { + "epoch": 1.0669841013638932, + "grad_norm": 0.7812542915344238, + "learning_rate": 5.203227992905575e-05, + "loss": 2.4803, + "step": 13221 + }, + { + "epoch": 1.067064805100476, + "grad_norm": 0.6892200708389282, + "learning_rate": 5.201842839185598e-05, + "loss": 2.4424, + "step": 13222 + }, + { + "epoch": 1.0671455088370592, + "grad_norm": 0.6982070803642273, + "learning_rate": 5.20045780504824e-05, + "loss": 2.4654, + "step": 13223 + }, + { + "epoch": 1.0672262125736423, + "grad_norm": 0.6799101233482361, + "learning_rate": 5.1990728905280205e-05, + "loss": 2.4748, + "step": 13224 + }, + { + "epoch": 1.0673069163102251, + "grad_norm": 0.6703687906265259, + "learning_rate": 5.1976880956594544e-05, + "loss": 2.4459, + "step": 13225 + }, + { + "epoch": 1.0673876200468082, + "grad_norm": 0.6821435689926147, + "learning_rate": 5.196303420477053e-05, + "loss": 2.4517, + "step": 13226 + }, + { + "epoch": 1.067468323783391, + "grad_norm": 0.6369695067405701, + "learning_rate": 5.194918865015328e-05, + "loss": 2.4388, + "step": 13227 + }, + { + "epoch": 1.0675490275199742, + "grad_norm": 0.6465736627578735, + "learning_rate": 5.1935344293087885e-05, + "loss": 2.3839, + "step": 13228 + }, + { + "epoch": 1.0676297312565572, + "grad_norm": 0.6745415329933167, + "learning_rate": 5.192150113391933e-05, + "loss": 2.4676, + "step": 13229 + }, + { + "epoch": 1.0677104349931401, + "grad_norm": 0.7605211138725281, + "learning_rate": 5.190765917299263e-05, + "loss": 2.4764, + "step": 13230 + }, + { + "epoch": 1.0677911387297232, + "grad_norm": 0.7040959596633911, + "learning_rate": 5.1893818410652825e-05, + "loss": 2.4727, + "step": 13231 + }, + { + "epoch": 1.0678718424663063, + "grad_norm": 0.6718928813934326, + "learning_rate": 5.1879978847244785e-05, + "loss": 2.4308, + "step": 13232 + }, + { + "epoch": 1.0679525462028892, + "grad_norm": 0.6788188219070435, + "learning_rate": 5.1866140483113445e-05, + "loss": 2.4278, + "step": 13233 + }, + { + "epoch": 1.0680332499394722, + "grad_norm": 0.7310218811035156, + "learning_rate": 5.185230331860371e-05, + "loss": 2.4585, + "step": 13234 + }, + { + "epoch": 1.068113953676055, + "grad_norm": 0.8092277646064758, + "learning_rate": 5.183846735406044e-05, + "loss": 2.4128, + "step": 13235 + }, + { + "epoch": 1.0681946574126382, + "grad_norm": 0.6469862461090088, + "learning_rate": 5.182463258982846e-05, + "loss": 2.4315, + "step": 13236 + }, + { + "epoch": 1.0682753611492213, + "grad_norm": 0.7948115468025208, + "learning_rate": 5.181079902625261e-05, + "loss": 2.5127, + "step": 13237 + }, + { + "epoch": 1.0683560648858041, + "grad_norm": 0.6988852620124817, + "learning_rate": 5.179696666367757e-05, + "loss": 2.432, + "step": 13238 + }, + { + "epoch": 1.0684367686223872, + "grad_norm": 0.6914555430412292, + "learning_rate": 5.1783135502448124e-05, + "loss": 2.4748, + "step": 13239 + }, + { + "epoch": 1.0685174723589703, + "grad_norm": 0.7586313486099243, + "learning_rate": 5.176930554290902e-05, + "loss": 2.4522, + "step": 13240 + }, + { + "epoch": 1.0685981760955532, + "grad_norm": 0.6763948798179626, + "learning_rate": 5.175547678540487e-05, + "loss": 2.4477, + "step": 13241 + }, + { + "epoch": 1.0686788798321363, + "grad_norm": 0.7625983357429504, + "learning_rate": 5.1741649230280334e-05, + "loss": 2.4725, + "step": 13242 + }, + { + "epoch": 1.0687595835687191, + "grad_norm": 0.6574710011482239, + "learning_rate": 5.172782287788005e-05, + "loss": 2.4212, + "step": 13243 + }, + { + "epoch": 1.0688402873053022, + "grad_norm": 0.770062267780304, + "learning_rate": 5.1713997728548615e-05, + "loss": 2.5065, + "step": 13244 + }, + { + "epoch": 1.0689209910418853, + "grad_norm": 0.7719037532806396, + "learning_rate": 5.170017378263057e-05, + "loss": 2.5082, + "step": 13245 + }, + { + "epoch": 1.0690016947784682, + "grad_norm": 0.7106119394302368, + "learning_rate": 5.168635104047046e-05, + "loss": 2.4922, + "step": 13246 + }, + { + "epoch": 1.0690823985150513, + "grad_norm": 0.711815595626831, + "learning_rate": 5.167252950241281e-05, + "loss": 2.498, + "step": 13247 + }, + { + "epoch": 1.0691631022516344, + "grad_norm": 0.6926038265228271, + "learning_rate": 5.165870916880201e-05, + "loss": 2.4464, + "step": 13248 + }, + { + "epoch": 1.0692438059882172, + "grad_norm": 0.6959360241889954, + "learning_rate": 5.164489003998254e-05, + "loss": 2.4668, + "step": 13249 + }, + { + "epoch": 1.0693245097248003, + "grad_norm": 0.7165184617042542, + "learning_rate": 5.1631072116298875e-05, + "loss": 2.4198, + "step": 13250 + }, + { + "epoch": 1.0694052134613832, + "grad_norm": 0.7133236527442932, + "learning_rate": 5.161725539809527e-05, + "loss": 2.4691, + "step": 13251 + }, + { + "epoch": 1.0694859171979663, + "grad_norm": 0.7057758569717407, + "learning_rate": 5.160343988571613e-05, + "loss": 2.466, + "step": 13252 + }, + { + "epoch": 1.0695666209345494, + "grad_norm": 0.6808326244354248, + "learning_rate": 5.158962557950583e-05, + "loss": 2.4248, + "step": 13253 + }, + { + "epoch": 1.0696473246711322, + "grad_norm": 0.7166025638580322, + "learning_rate": 5.1575812479808563e-05, + "loss": 2.4753, + "step": 13254 + }, + { + "epoch": 1.0697280284077153, + "grad_norm": 0.7395358085632324, + "learning_rate": 5.156200058696863e-05, + "loss": 2.485, + "step": 13255 + }, + { + "epoch": 1.0698087321442982, + "grad_norm": 0.681106686592102, + "learning_rate": 5.154818990133026e-05, + "loss": 2.5077, + "step": 13256 + }, + { + "epoch": 1.0698894358808813, + "grad_norm": 0.7517002820968628, + "learning_rate": 5.153438042323766e-05, + "loss": 2.5093, + "step": 13257 + }, + { + "epoch": 1.0699701396174643, + "grad_norm": 0.6516926288604736, + "learning_rate": 5.152057215303499e-05, + "loss": 2.4416, + "step": 13258 + }, + { + "epoch": 1.0700508433540472, + "grad_norm": 0.6930893063545227, + "learning_rate": 5.150676509106638e-05, + "loss": 2.506, + "step": 13259 + }, + { + "epoch": 1.0701315470906303, + "grad_norm": 0.7737041115760803, + "learning_rate": 5.1492959237675986e-05, + "loss": 2.4355, + "step": 13260 + }, + { + "epoch": 1.0702122508272134, + "grad_norm": 0.7274872660636902, + "learning_rate": 5.14791545932078e-05, + "loss": 2.5552, + "step": 13261 + }, + { + "epoch": 1.0702929545637963, + "grad_norm": 0.7112408876419067, + "learning_rate": 5.146535115800593e-05, + "loss": 2.4041, + "step": 13262 + }, + { + "epoch": 1.0703736583003793, + "grad_norm": 0.6822024583816528, + "learning_rate": 5.1451548932414415e-05, + "loss": 2.4346, + "step": 13263 + }, + { + "epoch": 1.0704543620369624, + "grad_norm": 0.6590598225593567, + "learning_rate": 5.1437747916777165e-05, + "loss": 2.3946, + "step": 13264 + }, + { + "epoch": 1.0705350657735453, + "grad_norm": 0.643014132976532, + "learning_rate": 5.142394811143818e-05, + "loss": 2.4455, + "step": 13265 + }, + { + "epoch": 1.0706157695101284, + "grad_norm": 0.6480194926261902, + "learning_rate": 5.141014951674139e-05, + "loss": 2.4304, + "step": 13266 + }, + { + "epoch": 1.0706964732467112, + "grad_norm": 0.6933526992797852, + "learning_rate": 5.139635213303069e-05, + "loss": 2.4627, + "step": 13267 + }, + { + "epoch": 1.0707771769832943, + "grad_norm": 0.6832638382911682, + "learning_rate": 5.138255596064995e-05, + "loss": 2.4645, + "step": 13268 + }, + { + "epoch": 1.0708578807198774, + "grad_norm": 0.6579757928848267, + "learning_rate": 5.1368760999943034e-05, + "loss": 2.3928, + "step": 13269 + }, + { + "epoch": 1.0709385844564603, + "grad_norm": 0.6658132672309875, + "learning_rate": 5.1354967251253684e-05, + "loss": 2.4732, + "step": 13270 + }, + { + "epoch": 1.0710192881930434, + "grad_norm": 0.7610828876495361, + "learning_rate": 5.13411747149257e-05, + "loss": 2.4781, + "step": 13271 + }, + { + "epoch": 1.0710999919296262, + "grad_norm": 0.682858943939209, + "learning_rate": 5.1327383391302895e-05, + "loss": 2.4545, + "step": 13272 + }, + { + "epoch": 1.0711806956662093, + "grad_norm": 0.7461360692977905, + "learning_rate": 5.131359328072887e-05, + "loss": 2.4647, + "step": 13273 + }, + { + "epoch": 1.0712613994027924, + "grad_norm": 0.6767961382865906, + "learning_rate": 5.129980438354738e-05, + "loss": 2.4562, + "step": 13274 + }, + { + "epoch": 1.0713421031393753, + "grad_norm": 0.6768184304237366, + "learning_rate": 5.1286016700102066e-05, + "loss": 2.4662, + "step": 13275 + }, + { + "epoch": 1.0714228068759584, + "grad_norm": 0.7022743225097656, + "learning_rate": 5.1272230230736554e-05, + "loss": 2.4321, + "step": 13276 + }, + { + "epoch": 1.0715035106125415, + "grad_norm": 0.725488007068634, + "learning_rate": 5.125844497579444e-05, + "loss": 2.457, + "step": 13277 + }, + { + "epoch": 1.0715842143491243, + "grad_norm": 0.7542931437492371, + "learning_rate": 5.124466093561928e-05, + "loss": 2.4302, + "step": 13278 + }, + { + "epoch": 1.0716649180857074, + "grad_norm": 0.6598316431045532, + "learning_rate": 5.123087811055467e-05, + "loss": 2.4552, + "step": 13279 + }, + { + "epoch": 1.0717456218222903, + "grad_norm": 0.7533490061759949, + "learning_rate": 5.1217096500944017e-05, + "loss": 2.4778, + "step": 13280 + }, + { + "epoch": 1.0718263255588734, + "grad_norm": 0.6890795826911926, + "learning_rate": 5.1203316107130825e-05, + "loss": 2.4349, + "step": 13281 + }, + { + "epoch": 1.0719070292954564, + "grad_norm": 0.7004082202911377, + "learning_rate": 5.118953692945862e-05, + "loss": 2.4645, + "step": 13282 + }, + { + "epoch": 1.0719877330320393, + "grad_norm": 0.7409259676933289, + "learning_rate": 5.117575896827068e-05, + "loss": 2.4734, + "step": 13283 + }, + { + "epoch": 1.0720684367686224, + "grad_norm": 0.7035481929779053, + "learning_rate": 5.116198222391046e-05, + "loss": 2.5027, + "step": 13284 + }, + { + "epoch": 1.0721491405052055, + "grad_norm": 0.7146698236465454, + "learning_rate": 5.114820669672132e-05, + "loss": 2.4623, + "step": 13285 + }, + { + "epoch": 1.0722298442417884, + "grad_norm": 0.7813882231712341, + "learning_rate": 5.113443238704656e-05, + "loss": 2.4644, + "step": 13286 + }, + { + "epoch": 1.0723105479783714, + "grad_norm": 0.6592430472373962, + "learning_rate": 5.1120659295229486e-05, + "loss": 2.4682, + "step": 13287 + }, + { + "epoch": 1.0723912517149543, + "grad_norm": 0.7047967910766602, + "learning_rate": 5.1106887421613395e-05, + "loss": 2.4368, + "step": 13288 + }, + { + "epoch": 1.0724719554515374, + "grad_norm": 0.700977087020874, + "learning_rate": 5.109311676654143e-05, + "loss": 2.4471, + "step": 13289 + }, + { + "epoch": 1.0725526591881205, + "grad_norm": 0.6821093559265137, + "learning_rate": 5.107934733035684e-05, + "loss": 2.433, + "step": 13290 + }, + { + "epoch": 1.0726333629247033, + "grad_norm": 0.6579930186271667, + "learning_rate": 5.1065579113402794e-05, + "loss": 2.4527, + "step": 13291 + }, + { + "epoch": 1.0727140666612864, + "grad_norm": 0.658514678478241, + "learning_rate": 5.105181211602248e-05, + "loss": 2.4443, + "step": 13292 + }, + { + "epoch": 1.0727947703978695, + "grad_norm": 0.6963977217674255, + "learning_rate": 5.103804633855891e-05, + "loss": 2.4699, + "step": 13293 + }, + { + "epoch": 1.0728754741344524, + "grad_norm": 0.6670787334442139, + "learning_rate": 5.102428178135522e-05, + "loss": 2.4672, + "step": 13294 + }, + { + "epoch": 1.0729561778710355, + "grad_norm": 0.6959822773933411, + "learning_rate": 5.1010518444754454e-05, + "loss": 2.4338, + "step": 13295 + }, + { + "epoch": 1.0730368816076183, + "grad_norm": 0.6534817218780518, + "learning_rate": 5.0996756329099614e-05, + "loss": 2.4491, + "step": 13296 + }, + { + "epoch": 1.0731175853442014, + "grad_norm": 0.7265146970748901, + "learning_rate": 5.098299543473371e-05, + "loss": 2.4718, + "step": 13297 + }, + { + "epoch": 1.0731982890807845, + "grad_norm": 0.6554745435714722, + "learning_rate": 5.0969235761999746e-05, + "loss": 2.4286, + "step": 13298 + }, + { + "epoch": 1.0732789928173674, + "grad_norm": 0.7003172039985657, + "learning_rate": 5.095547731124053e-05, + "loss": 2.4182, + "step": 13299 + }, + { + "epoch": 1.0733596965539505, + "grad_norm": 0.6700341105461121, + "learning_rate": 5.094172008279904e-05, + "loss": 2.428, + "step": 13300 + }, + { + "epoch": 1.0734404002905333, + "grad_norm": 0.7290289402008057, + "learning_rate": 5.0927964077018164e-05, + "loss": 2.4324, + "step": 13301 + }, + { + "epoch": 1.0735211040271164, + "grad_norm": 0.6999204158782959, + "learning_rate": 5.0914209294240644e-05, + "loss": 2.5386, + "step": 13302 + }, + { + "epoch": 1.0736018077636995, + "grad_norm": 0.7008000612258911, + "learning_rate": 5.090045573480935e-05, + "loss": 2.5295, + "step": 13303 + }, + { + "epoch": 1.0736825115002824, + "grad_norm": 0.7023071646690369, + "learning_rate": 5.088670339906705e-05, + "loss": 2.4418, + "step": 13304 + }, + { + "epoch": 1.0737632152368655, + "grad_norm": 0.627174437046051, + "learning_rate": 5.0872952287356525e-05, + "loss": 2.3782, + "step": 13305 + }, + { + "epoch": 1.0738439189734486, + "grad_norm": 0.6992766857147217, + "learning_rate": 5.0859202400020364e-05, + "loss": 2.4698, + "step": 13306 + }, + { + "epoch": 1.0739246227100314, + "grad_norm": 0.7189817428588867, + "learning_rate": 5.084545373740138e-05, + "loss": 2.5248, + "step": 13307 + }, + { + "epoch": 1.0740053264466145, + "grad_norm": 0.6849164962768555, + "learning_rate": 5.0831706299842216e-05, + "loss": 2.4084, + "step": 13308 + }, + { + "epoch": 1.0740860301831976, + "grad_norm": 0.6985825300216675, + "learning_rate": 5.0817960087685424e-05, + "loss": 2.4893, + "step": 13309 + }, + { + "epoch": 1.0741667339197805, + "grad_norm": 0.6519783139228821, + "learning_rate": 5.080421510127362e-05, + "loss": 2.5144, + "step": 13310 + }, + { + "epoch": 1.0742474376563635, + "grad_norm": 0.6605731248855591, + "learning_rate": 5.079047134094941e-05, + "loss": 2.4487, + "step": 13311 + }, + { + "epoch": 1.0743281413929464, + "grad_norm": 0.7236705422401428, + "learning_rate": 5.077672880705526e-05, + "loss": 2.4578, + "step": 13312 + }, + { + "epoch": 1.0744088451295295, + "grad_norm": 0.7126381397247314, + "learning_rate": 5.07629874999337e-05, + "loss": 2.4528, + "step": 13313 + }, + { + "epoch": 1.0744895488661126, + "grad_norm": 0.7247878313064575, + "learning_rate": 5.0749247419927236e-05, + "loss": 2.563, + "step": 13314 + }, + { + "epoch": 1.0745702526026955, + "grad_norm": 0.728349506855011, + "learning_rate": 5.0735508567378234e-05, + "loss": 2.4229, + "step": 13315 + }, + { + "epoch": 1.0746509563392785, + "grad_norm": 0.6593719124794006, + "learning_rate": 5.072177094262913e-05, + "loss": 2.4853, + "step": 13316 + }, + { + "epoch": 1.0747316600758614, + "grad_norm": 0.6519735455513, + "learning_rate": 5.070803454602231e-05, + "loss": 2.4507, + "step": 13317 + }, + { + "epoch": 1.0748123638124445, + "grad_norm": 0.6660017371177673, + "learning_rate": 5.0694299377900115e-05, + "loss": 2.4286, + "step": 13318 + }, + { + "epoch": 1.0748930675490276, + "grad_norm": 0.7506695985794067, + "learning_rate": 5.0680565438604876e-05, + "loss": 2.4841, + "step": 13319 + }, + { + "epoch": 1.0749737712856104, + "grad_norm": 0.6855955719947815, + "learning_rate": 5.0666832728478863e-05, + "loss": 2.3817, + "step": 13320 + }, + { + "epoch": 1.0750544750221935, + "grad_norm": 0.7151634693145752, + "learning_rate": 5.065310124786438e-05, + "loss": 2.3984, + "step": 13321 + }, + { + "epoch": 1.0751351787587766, + "grad_norm": 0.6551649570465088, + "learning_rate": 5.063937099710356e-05, + "loss": 2.4574, + "step": 13322 + }, + { + "epoch": 1.0752158824953595, + "grad_norm": 0.7443479895591736, + "learning_rate": 5.062564197653865e-05, + "loss": 2.52, + "step": 13323 + }, + { + "epoch": 1.0752965862319426, + "grad_norm": 0.7554972767829895, + "learning_rate": 5.061191418651186e-05, + "loss": 2.483, + "step": 13324 + }, + { + "epoch": 1.0753772899685254, + "grad_norm": 0.7661007642745972, + "learning_rate": 5.059818762736521e-05, + "loss": 2.566, + "step": 13325 + }, + { + "epoch": 1.0754579937051085, + "grad_norm": 0.7416480183601379, + "learning_rate": 5.058446229944087e-05, + "loss": 2.465, + "step": 13326 + }, + { + "epoch": 1.0755386974416916, + "grad_norm": 0.6997848749160767, + "learning_rate": 5.057073820308089e-05, + "loss": 2.4936, + "step": 13327 + }, + { + "epoch": 1.0756194011782745, + "grad_norm": 0.7570235133171082, + "learning_rate": 5.0557015338627345e-05, + "loss": 2.519, + "step": 13328 + }, + { + "epoch": 1.0757001049148576, + "grad_norm": 0.7910803556442261, + "learning_rate": 5.0543293706422214e-05, + "loss": 2.4932, + "step": 13329 + }, + { + "epoch": 1.0757808086514407, + "grad_norm": 0.7068312168121338, + "learning_rate": 5.052957330680752e-05, + "loss": 2.4489, + "step": 13330 + }, + { + "epoch": 1.0758615123880235, + "grad_norm": 0.7818215489387512, + "learning_rate": 5.051585414012514e-05, + "loss": 2.4467, + "step": 13331 + }, + { + "epoch": 1.0759422161246066, + "grad_norm": 0.7359446287155151, + "learning_rate": 5.0502136206717046e-05, + "loss": 2.4348, + "step": 13332 + }, + { + "epoch": 1.0760229198611895, + "grad_norm": 0.694726824760437, + "learning_rate": 5.0488419506925124e-05, + "loss": 2.4554, + "step": 13333 + }, + { + "epoch": 1.0761036235977726, + "grad_norm": 0.6776530742645264, + "learning_rate": 5.047470404109118e-05, + "loss": 2.4206, + "step": 13334 + }, + { + "epoch": 1.0761843273343557, + "grad_norm": 0.6977556943893433, + "learning_rate": 5.0460989809557066e-05, + "loss": 2.4748, + "step": 13335 + }, + { + "epoch": 1.0762650310709385, + "grad_norm": 0.6888061761856079, + "learning_rate": 5.044727681266459e-05, + "loss": 2.4129, + "step": 13336 + }, + { + "epoch": 1.0763457348075216, + "grad_norm": 0.744110643863678, + "learning_rate": 5.043356505075549e-05, + "loss": 2.4815, + "step": 13337 + }, + { + "epoch": 1.0764264385441047, + "grad_norm": 0.6726455688476562, + "learning_rate": 5.041985452417154e-05, + "loss": 2.4299, + "step": 13338 + }, + { + "epoch": 1.0765071422806876, + "grad_norm": 0.6755545735359192, + "learning_rate": 5.040614523325441e-05, + "loss": 2.4188, + "step": 13339 + }, + { + "epoch": 1.0765878460172706, + "grad_norm": 0.7152739763259888, + "learning_rate": 5.039243717834582e-05, + "loss": 2.4366, + "step": 13340 + }, + { + "epoch": 1.0766685497538535, + "grad_norm": 0.7253085374832153, + "learning_rate": 5.037873035978733e-05, + "loss": 2.4681, + "step": 13341 + }, + { + "epoch": 1.0767492534904366, + "grad_norm": 0.6780266165733337, + "learning_rate": 5.03650247779206e-05, + "loss": 2.5163, + "step": 13342 + }, + { + "epoch": 1.0768299572270197, + "grad_norm": 0.7440996170043945, + "learning_rate": 5.035132043308722e-05, + "loss": 2.4831, + "step": 13343 + }, + { + "epoch": 1.0769106609636026, + "grad_norm": 0.6619833111763, + "learning_rate": 5.0337617325628695e-05, + "loss": 2.433, + "step": 13344 + }, + { + "epoch": 1.0769913647001856, + "grad_norm": 0.7518059015274048, + "learning_rate": 5.032391545588656e-05, + "loss": 2.4241, + "step": 13345 + }, + { + "epoch": 1.0770720684367687, + "grad_norm": 0.6592784523963928, + "learning_rate": 5.031021482420231e-05, + "loss": 2.4902, + "step": 13346 + }, + { + "epoch": 1.0771527721733516, + "grad_norm": 0.7192299365997314, + "learning_rate": 5.029651543091739e-05, + "loss": 2.4445, + "step": 13347 + }, + { + "epoch": 1.0772334759099347, + "grad_norm": 0.7376793622970581, + "learning_rate": 5.028281727637323e-05, + "loss": 2.4532, + "step": 13348 + }, + { + "epoch": 1.0773141796465175, + "grad_norm": 0.7344524264335632, + "learning_rate": 5.026912036091127e-05, + "loss": 2.4193, + "step": 13349 + }, + { + "epoch": 1.0773948833831006, + "grad_norm": 0.7343986630439758, + "learning_rate": 5.0255424684872785e-05, + "loss": 2.4912, + "step": 13350 + }, + { + "epoch": 1.0774755871196837, + "grad_norm": 0.7103631496429443, + "learning_rate": 5.024173024859916e-05, + "loss": 2.4611, + "step": 13351 + }, + { + "epoch": 1.0775562908562666, + "grad_norm": 0.7554094791412354, + "learning_rate": 5.022803705243169e-05, + "loss": 2.4875, + "step": 13352 + }, + { + "epoch": 1.0776369945928497, + "grad_norm": 0.6754978895187378, + "learning_rate": 5.0214345096711655e-05, + "loss": 2.4585, + "step": 13353 + }, + { + "epoch": 1.0777176983294328, + "grad_norm": 0.690747857093811, + "learning_rate": 5.020065438178026e-05, + "loss": 2.4751, + "step": 13354 + }, + { + "epoch": 1.0777984020660156, + "grad_norm": 0.7012028694152832, + "learning_rate": 5.018696490797874e-05, + "loss": 2.4443, + "step": 13355 + }, + { + "epoch": 1.0778791058025987, + "grad_norm": 0.6788459420204163, + "learning_rate": 5.017327667564831e-05, + "loss": 2.4135, + "step": 13356 + }, + { + "epoch": 1.0779598095391816, + "grad_norm": 0.6662794351577759, + "learning_rate": 5.015958968512997e-05, + "loss": 2.3801, + "step": 13357 + }, + { + "epoch": 1.0780405132757647, + "grad_norm": 0.7873939275741577, + "learning_rate": 5.0145903936764994e-05, + "loss": 2.4629, + "step": 13358 + }, + { + "epoch": 1.0781212170123478, + "grad_norm": 0.7484980225563049, + "learning_rate": 5.0132219430894455e-05, + "loss": 2.4307, + "step": 13359 + }, + { + "epoch": 1.0782019207489306, + "grad_norm": 0.7559076547622681, + "learning_rate": 5.011853616785932e-05, + "loss": 2.4846, + "step": 13360 + }, + { + "epoch": 1.0782826244855137, + "grad_norm": 0.6822710633277893, + "learning_rate": 5.010485414800066e-05, + "loss": 2.4448, + "step": 13361 + }, + { + "epoch": 1.0783633282220966, + "grad_norm": 0.6665955185890198, + "learning_rate": 5.0091173371659496e-05, + "loss": 2.4562, + "step": 13362 + }, + { + "epoch": 1.0784440319586797, + "grad_norm": 0.6645659804344177, + "learning_rate": 5.0077493839176714e-05, + "loss": 2.4545, + "step": 13363 + }, + { + "epoch": 1.0785247356952627, + "grad_norm": 0.6648181080818176, + "learning_rate": 5.0063815550893276e-05, + "loss": 2.4565, + "step": 13364 + }, + { + "epoch": 1.0786054394318456, + "grad_norm": 0.6679299473762512, + "learning_rate": 5.005013850715014e-05, + "loss": 2.4301, + "step": 13365 + }, + { + "epoch": 1.0786861431684287, + "grad_norm": 0.7116484642028809, + "learning_rate": 5.003646270828808e-05, + "loss": 2.4174, + "step": 13366 + }, + { + "epoch": 1.0787668469050118, + "grad_norm": 0.6850735545158386, + "learning_rate": 5.002278815464798e-05, + "loss": 2.4386, + "step": 13367 + }, + { + "epoch": 1.0788475506415947, + "grad_norm": 0.6613513827323914, + "learning_rate": 5.00091148465706e-05, + "loss": 2.4038, + "step": 13368 + }, + { + "epoch": 1.0789282543781777, + "grad_norm": 0.659635603427887, + "learning_rate": 4.9995442784396827e-05, + "loss": 2.4346, + "step": 13369 + }, + { + "epoch": 1.0790089581147608, + "grad_norm": 0.6775132417678833, + "learning_rate": 4.998177196846731e-05, + "loss": 2.4853, + "step": 13370 + }, + { + "epoch": 1.0790896618513437, + "grad_norm": 0.719860851764679, + "learning_rate": 4.996810239912277e-05, + "loss": 2.4018, + "step": 13371 + }, + { + "epoch": 1.0791703655879268, + "grad_norm": 0.7316389083862305, + "learning_rate": 4.9954434076703946e-05, + "loss": 2.424, + "step": 13372 + }, + { + "epoch": 1.0792510693245096, + "grad_norm": 0.6779622435569763, + "learning_rate": 4.99407670015514e-05, + "loss": 2.4743, + "step": 13373 + }, + { + "epoch": 1.0793317730610927, + "grad_norm": 0.7357139587402344, + "learning_rate": 4.992710117400581e-05, + "loss": 2.4385, + "step": 13374 + }, + { + "epoch": 1.0794124767976758, + "grad_norm": 0.671441912651062, + "learning_rate": 4.9913436594407784e-05, + "loss": 2.3988, + "step": 13375 + }, + { + "epoch": 1.0794931805342587, + "grad_norm": 0.7205149531364441, + "learning_rate": 4.9899773263097804e-05, + "loss": 2.4594, + "step": 13376 + }, + { + "epoch": 1.0795738842708418, + "grad_norm": 0.702910840511322, + "learning_rate": 4.988611118041644e-05, + "loss": 2.4831, + "step": 13377 + }, + { + "epoch": 1.0796545880074246, + "grad_norm": 0.6977962255477905, + "learning_rate": 4.987245034670418e-05, + "loss": 2.422, + "step": 13378 + }, + { + "epoch": 1.0797352917440077, + "grad_norm": 0.7106757760047913, + "learning_rate": 4.985879076230149e-05, + "loss": 2.4073, + "step": 13379 + }, + { + "epoch": 1.0798159954805908, + "grad_norm": 0.7046806812286377, + "learning_rate": 4.9845132427548814e-05, + "loss": 2.4065, + "step": 13380 + }, + { + "epoch": 1.0798966992171737, + "grad_norm": 0.7476605772972107, + "learning_rate": 4.9831475342786574e-05, + "loss": 2.4886, + "step": 13381 + }, + { + "epoch": 1.0799774029537568, + "grad_norm": 0.696977972984314, + "learning_rate": 4.981781950835508e-05, + "loss": 2.4732, + "step": 13382 + }, + { + "epoch": 1.0800581066903399, + "grad_norm": 0.6596804857254028, + "learning_rate": 4.98041649245947e-05, + "loss": 2.4497, + "step": 13383 + }, + { + "epoch": 1.0801388104269227, + "grad_norm": 0.7216050028800964, + "learning_rate": 4.979051159184573e-05, + "loss": 2.4745, + "step": 13384 + }, + { + "epoch": 1.0802195141635058, + "grad_norm": 0.6636630296707153, + "learning_rate": 4.977685951044852e-05, + "loss": 2.4904, + "step": 13385 + }, + { + "epoch": 1.0803002179000887, + "grad_norm": 0.7030208110809326, + "learning_rate": 4.97632086807432e-05, + "loss": 2.4302, + "step": 13386 + }, + { + "epoch": 1.0803809216366718, + "grad_norm": 0.7158327102661133, + "learning_rate": 4.974955910307004e-05, + "loss": 2.4735, + "step": 13387 + }, + { + "epoch": 1.0804616253732549, + "grad_norm": 0.6736464500427246, + "learning_rate": 4.9735910777769234e-05, + "loss": 2.4334, + "step": 13388 + }, + { + "epoch": 1.0805423291098377, + "grad_norm": 0.6913403272628784, + "learning_rate": 4.972226370518092e-05, + "loss": 2.468, + "step": 13389 + }, + { + "epoch": 1.0806230328464208, + "grad_norm": 0.7006524205207825, + "learning_rate": 4.970861788564522e-05, + "loss": 2.4598, + "step": 13390 + }, + { + "epoch": 1.080703736583004, + "grad_norm": 0.6892947554588318, + "learning_rate": 4.969497331950227e-05, + "loss": 2.4297, + "step": 13391 + }, + { + "epoch": 1.0807844403195868, + "grad_norm": 0.7270283699035645, + "learning_rate": 4.968133000709203e-05, + "loss": 2.5344, + "step": 13392 + }, + { + "epoch": 1.0808651440561698, + "grad_norm": 0.735342264175415, + "learning_rate": 4.9667687948754594e-05, + "loss": 2.4431, + "step": 13393 + }, + { + "epoch": 1.0809458477927527, + "grad_norm": 0.6869279146194458, + "learning_rate": 4.9654047144829974e-05, + "loss": 2.5581, + "step": 13394 + }, + { + "epoch": 1.0810265515293358, + "grad_norm": 0.6975715160369873, + "learning_rate": 4.964040759565808e-05, + "loss": 2.4328, + "step": 13395 + }, + { + "epoch": 1.0811072552659189, + "grad_norm": 0.7312532067298889, + "learning_rate": 4.9626769301578856e-05, + "loss": 2.4686, + "step": 13396 + }, + { + "epoch": 1.0811879590025018, + "grad_norm": 0.7824496626853943, + "learning_rate": 4.9613132262932215e-05, + "loss": 2.4564, + "step": 13397 + }, + { + "epoch": 1.0812686627390848, + "grad_norm": 0.7337941527366638, + "learning_rate": 4.959949648005805e-05, + "loss": 2.4752, + "step": 13398 + }, + { + "epoch": 1.081349366475668, + "grad_norm": 0.7450836300849915, + "learning_rate": 4.958586195329617e-05, + "loss": 2.4457, + "step": 13399 + }, + { + "epoch": 1.0814300702122508, + "grad_norm": 0.6990504860877991, + "learning_rate": 4.9572228682986385e-05, + "loss": 2.4172, + "step": 13400 + }, + { + "epoch": 1.0815107739488339, + "grad_norm": 0.7293999791145325, + "learning_rate": 4.955859666946853e-05, + "loss": 2.5295, + "step": 13401 + }, + { + "epoch": 1.0815914776854167, + "grad_norm": 0.6872537136077881, + "learning_rate": 4.9544965913082264e-05, + "loss": 2.5029, + "step": 13402 + }, + { + "epoch": 1.0816721814219998, + "grad_norm": 0.6821706891059875, + "learning_rate": 4.953133641416733e-05, + "loss": 2.4738, + "step": 13403 + }, + { + "epoch": 1.081752885158583, + "grad_norm": 0.6811527609825134, + "learning_rate": 4.951770817306346e-05, + "loss": 2.4323, + "step": 13404 + }, + { + "epoch": 1.0818335888951658, + "grad_norm": 0.7138943076133728, + "learning_rate": 4.950408119011023e-05, + "loss": 2.5155, + "step": 13405 + }, + { + "epoch": 1.0819142926317489, + "grad_norm": 0.6777952909469604, + "learning_rate": 4.949045546564729e-05, + "loss": 2.4414, + "step": 13406 + }, + { + "epoch": 1.0819949963683317, + "grad_norm": 0.7065548896789551, + "learning_rate": 4.9476831000014276e-05, + "loss": 2.4913, + "step": 13407 + }, + { + "epoch": 1.0820757001049148, + "grad_norm": 0.7286355495452881, + "learning_rate": 4.9463207793550626e-05, + "loss": 2.4171, + "step": 13408 + }, + { + "epoch": 1.082156403841498, + "grad_norm": 0.6703049540519714, + "learning_rate": 4.944958584659597e-05, + "loss": 2.4387, + "step": 13409 + }, + { + "epoch": 1.0822371075780808, + "grad_norm": 0.6572019457817078, + "learning_rate": 4.943596515948983e-05, + "loss": 2.4324, + "step": 13410 + }, + { + "epoch": 1.0823178113146639, + "grad_norm": 0.6722360849380493, + "learning_rate": 4.942234573257156e-05, + "loss": 2.4802, + "step": 13411 + }, + { + "epoch": 1.082398515051247, + "grad_norm": 0.7122535109519958, + "learning_rate": 4.9408727566180655e-05, + "loss": 2.4531, + "step": 13412 + }, + { + "epoch": 1.0824792187878298, + "grad_norm": 0.6769903898239136, + "learning_rate": 4.9395110660656505e-05, + "loss": 2.4549, + "step": 13413 + }, + { + "epoch": 1.082559922524413, + "grad_norm": 0.766251266002655, + "learning_rate": 4.938149501633852e-05, + "loss": 2.4416, + "step": 13414 + }, + { + "epoch": 1.082640626260996, + "grad_norm": 0.6677987575531006, + "learning_rate": 4.936788063356596e-05, + "loss": 2.4578, + "step": 13415 + }, + { + "epoch": 1.0827213299975789, + "grad_norm": 0.7461380362510681, + "learning_rate": 4.9354267512678156e-05, + "loss": 2.4776, + "step": 13416 + }, + { + "epoch": 1.082802033734162, + "grad_norm": 0.6681976914405823, + "learning_rate": 4.934065565401443e-05, + "loss": 2.5044, + "step": 13417 + }, + { + "epoch": 1.0828827374707448, + "grad_norm": 0.6809324622154236, + "learning_rate": 4.932704505791397e-05, + "loss": 2.4651, + "step": 13418 + }, + { + "epoch": 1.082963441207328, + "grad_norm": 0.6926563382148743, + "learning_rate": 4.931343572471596e-05, + "loss": 2.4633, + "step": 13419 + }, + { + "epoch": 1.083044144943911, + "grad_norm": 0.6451820135116577, + "learning_rate": 4.929982765475971e-05, + "loss": 2.474, + "step": 13420 + }, + { + "epoch": 1.0831248486804939, + "grad_norm": 0.7088493704795837, + "learning_rate": 4.9286220848384247e-05, + "loss": 2.462, + "step": 13421 + }, + { + "epoch": 1.083205552417077, + "grad_norm": 0.7819172739982605, + "learning_rate": 4.9272615305928725e-05, + "loss": 2.4534, + "step": 13422 + }, + { + "epoch": 1.0832862561536598, + "grad_norm": 0.6579666137695312, + "learning_rate": 4.925901102773227e-05, + "loss": 2.4101, + "step": 13423 + }, + { + "epoch": 1.083366959890243, + "grad_norm": 0.6999555230140686, + "learning_rate": 4.924540801413385e-05, + "loss": 2.4534, + "step": 13424 + }, + { + "epoch": 1.083447663626826, + "grad_norm": 0.7034400105476379, + "learning_rate": 4.9231806265472555e-05, + "loss": 2.4741, + "step": 13425 + }, + { + "epoch": 1.0835283673634089, + "grad_norm": 0.6595034599304199, + "learning_rate": 4.921820578208739e-05, + "loss": 2.4011, + "step": 13426 + }, + { + "epoch": 1.083609071099992, + "grad_norm": 0.666419267654419, + "learning_rate": 4.920460656431723e-05, + "loss": 2.4399, + "step": 13427 + }, + { + "epoch": 1.083689774836575, + "grad_norm": 0.7058294415473938, + "learning_rate": 4.919100861250108e-05, + "loss": 2.434, + "step": 13428 + }, + { + "epoch": 1.083770478573158, + "grad_norm": 0.7045806050300598, + "learning_rate": 4.917741192697779e-05, + "loss": 2.4616, + "step": 13429 + }, + { + "epoch": 1.083851182309741, + "grad_norm": 0.6565639972686768, + "learning_rate": 4.916381650808626e-05, + "loss": 2.3864, + "step": 13430 + }, + { + "epoch": 1.0839318860463238, + "grad_norm": 0.6939674615859985, + "learning_rate": 4.9150222356165295e-05, + "loss": 2.4217, + "step": 13431 + }, + { + "epoch": 1.084012589782907, + "grad_norm": 0.7240599989891052, + "learning_rate": 4.913662947155373e-05, + "loss": 2.447, + "step": 13432 + }, + { + "epoch": 1.08409329351949, + "grad_norm": 0.7369012832641602, + "learning_rate": 4.9123037854590336e-05, + "loss": 2.4588, + "step": 13433 + }, + { + "epoch": 1.0841739972560729, + "grad_norm": 0.714269757270813, + "learning_rate": 4.9109447505613803e-05, + "loss": 2.4921, + "step": 13434 + }, + { + "epoch": 1.084254700992656, + "grad_norm": 0.7541659474372864, + "learning_rate": 4.909585842496287e-05, + "loss": 2.4191, + "step": 13435 + }, + { + "epoch": 1.084335404729239, + "grad_norm": 0.7245596051216125, + "learning_rate": 4.9082270612976243e-05, + "loss": 2.4904, + "step": 13436 + }, + { + "epoch": 1.084416108465822, + "grad_norm": 0.7301090359687805, + "learning_rate": 4.90686840699925e-05, + "loss": 2.4461, + "step": 13437 + }, + { + "epoch": 1.084496812202405, + "grad_norm": 0.7404102683067322, + "learning_rate": 4.905509879635028e-05, + "loss": 2.4826, + "step": 13438 + }, + { + "epoch": 1.0845775159389879, + "grad_norm": 0.7053710222244263, + "learning_rate": 4.9041514792388175e-05, + "loss": 2.4231, + "step": 13439 + }, + { + "epoch": 1.084658219675571, + "grad_norm": 0.6171362400054932, + "learning_rate": 4.9027932058444724e-05, + "loss": 2.4472, + "step": 13440 + }, + { + "epoch": 1.084738923412154, + "grad_norm": 0.7367038130760193, + "learning_rate": 4.901435059485845e-05, + "loss": 2.4847, + "step": 13441 + }, + { + "epoch": 1.084819627148737, + "grad_norm": 0.754828691482544, + "learning_rate": 4.900077040196788e-05, + "loss": 2.4731, + "step": 13442 + }, + { + "epoch": 1.08490033088532, + "grad_norm": 0.7380684018135071, + "learning_rate": 4.8987191480111386e-05, + "loss": 2.4227, + "step": 13443 + }, + { + "epoch": 1.084981034621903, + "grad_norm": 0.6711444854736328, + "learning_rate": 4.897361382962742e-05, + "loss": 2.4744, + "step": 13444 + }, + { + "epoch": 1.085061738358486, + "grad_norm": 0.7709227204322815, + "learning_rate": 4.896003745085438e-05, + "loss": 2.5422, + "step": 13445 + }, + { + "epoch": 1.085142442095069, + "grad_norm": 0.6778519153594971, + "learning_rate": 4.8946462344130675e-05, + "loss": 2.4757, + "step": 13446 + }, + { + "epoch": 1.085223145831652, + "grad_norm": 0.7390698194503784, + "learning_rate": 4.893288850979454e-05, + "loss": 2.4214, + "step": 13447 + }, + { + "epoch": 1.085303849568235, + "grad_norm": 0.6632684469223022, + "learning_rate": 4.891931594818432e-05, + "loss": 2.4689, + "step": 13448 + }, + { + "epoch": 1.085384553304818, + "grad_norm": 0.68693608045578, + "learning_rate": 4.890574465963827e-05, + "loss": 2.4788, + "step": 13449 + }, + { + "epoch": 1.085465257041401, + "grad_norm": 0.6910344362258911, + "learning_rate": 4.8892174644494625e-05, + "loss": 2.4611, + "step": 13450 + }, + { + "epoch": 1.085545960777984, + "grad_norm": 0.6935380101203918, + "learning_rate": 4.887860590309158e-05, + "loss": 2.4481, + "step": 13451 + }, + { + "epoch": 1.085626664514567, + "grad_norm": 0.7086954712867737, + "learning_rate": 4.886503843576735e-05, + "loss": 2.4583, + "step": 13452 + }, + { + "epoch": 1.08570736825115, + "grad_norm": 0.7447777986526489, + "learning_rate": 4.8851472242859994e-05, + "loss": 2.5035, + "step": 13453 + }, + { + "epoch": 1.085788071987733, + "grad_norm": 0.6896036267280579, + "learning_rate": 4.8837907324707656e-05, + "loss": 2.4622, + "step": 13454 + }, + { + "epoch": 1.085868775724316, + "grad_norm": 0.7261155247688293, + "learning_rate": 4.882434368164843e-05, + "loss": 2.4958, + "step": 13455 + }, + { + "epoch": 1.085949479460899, + "grad_norm": 0.6868197321891785, + "learning_rate": 4.881078131402031e-05, + "loss": 2.4952, + "step": 13456 + }, + { + "epoch": 1.0860301831974821, + "grad_norm": 0.6338867545127869, + "learning_rate": 4.879722022216132e-05, + "loss": 2.4553, + "step": 13457 + }, + { + "epoch": 1.086110886934065, + "grad_norm": 0.7214454412460327, + "learning_rate": 4.878366040640946e-05, + "loss": 2.4433, + "step": 13458 + }, + { + "epoch": 1.086191590670648, + "grad_norm": 0.6871301531791687, + "learning_rate": 4.877010186710266e-05, + "loss": 2.4118, + "step": 13459 + }, + { + "epoch": 1.0862722944072312, + "grad_norm": 0.6845650672912598, + "learning_rate": 4.875654460457883e-05, + "loss": 2.4684, + "step": 13460 + }, + { + "epoch": 1.086352998143814, + "grad_norm": 0.7027513980865479, + "learning_rate": 4.8742988619175865e-05, + "loss": 2.4569, + "step": 13461 + }, + { + "epoch": 1.0864337018803971, + "grad_norm": 0.6428621411323547, + "learning_rate": 4.8729433911231646e-05, + "loss": 2.4211, + "step": 13462 + }, + { + "epoch": 1.08651440561698, + "grad_norm": 0.6921488046646118, + "learning_rate": 4.8715880481083934e-05, + "loss": 2.4668, + "step": 13463 + }, + { + "epoch": 1.086595109353563, + "grad_norm": 0.7001025676727295, + "learning_rate": 4.870232832907051e-05, + "loss": 2.4685, + "step": 13464 + }, + { + "epoch": 1.0866758130901462, + "grad_norm": 0.7460644245147705, + "learning_rate": 4.868877745552922e-05, + "loss": 2.3922, + "step": 13465 + }, + { + "epoch": 1.086756516826729, + "grad_norm": 0.7418891191482544, + "learning_rate": 4.867522786079768e-05, + "loss": 2.3777, + "step": 13466 + }, + { + "epoch": 1.0868372205633121, + "grad_norm": 0.6430083513259888, + "learning_rate": 4.8661679545213625e-05, + "loss": 2.4385, + "step": 13467 + }, + { + "epoch": 1.086917924299895, + "grad_norm": 0.6963593363761902, + "learning_rate": 4.864813250911475e-05, + "loss": 2.4083, + "step": 13468 + }, + { + "epoch": 1.086998628036478, + "grad_norm": 0.6796097159385681, + "learning_rate": 4.8634586752838606e-05, + "loss": 2.4984, + "step": 13469 + }, + { + "epoch": 1.0870793317730612, + "grad_norm": 0.6845307946205139, + "learning_rate": 4.862104227672281e-05, + "loss": 2.4168, + "step": 13470 + }, + { + "epoch": 1.087160035509644, + "grad_norm": 0.705348014831543, + "learning_rate": 4.8607499081105e-05, + "loss": 2.4216, + "step": 13471 + }, + { + "epoch": 1.087240739246227, + "grad_norm": 0.6906474828720093, + "learning_rate": 4.8593957166322636e-05, + "loss": 2.4955, + "step": 13472 + }, + { + "epoch": 1.0873214429828102, + "grad_norm": 0.696489691734314, + "learning_rate": 4.858041653271323e-05, + "loss": 2.4186, + "step": 13473 + }, + { + "epoch": 1.087402146719393, + "grad_norm": 0.6997761726379395, + "learning_rate": 4.856687718061429e-05, + "loss": 2.441, + "step": 13474 + }, + { + "epoch": 1.0874828504559761, + "grad_norm": 0.6515649557113647, + "learning_rate": 4.8553339110363184e-05, + "loss": 2.3997, + "step": 13475 + }, + { + "epoch": 1.087563554192559, + "grad_norm": 0.6902725696563721, + "learning_rate": 4.853980232229734e-05, + "loss": 2.4765, + "step": 13476 + }, + { + "epoch": 1.087644257929142, + "grad_norm": 0.6832055449485779, + "learning_rate": 4.852626681675415e-05, + "loss": 2.411, + "step": 13477 + }, + { + "epoch": 1.0877249616657252, + "grad_norm": 0.668520987033844, + "learning_rate": 4.8512732594070984e-05, + "loss": 2.4742, + "step": 13478 + }, + { + "epoch": 1.087805665402308, + "grad_norm": 0.7019832134246826, + "learning_rate": 4.849919965458507e-05, + "loss": 2.4638, + "step": 13479 + }, + { + "epoch": 1.0878863691388911, + "grad_norm": 0.6986027359962463, + "learning_rate": 4.8485667998633724e-05, + "loss": 2.4866, + "step": 13480 + }, + { + "epoch": 1.0879670728754742, + "grad_norm": 0.659037709236145, + "learning_rate": 4.8472137626554195e-05, + "loss": 2.4821, + "step": 13481 + }, + { + "epoch": 1.088047776612057, + "grad_norm": 0.6506801247596741, + "learning_rate": 4.8458608538683694e-05, + "loss": 2.4686, + "step": 13482 + }, + { + "epoch": 1.0881284803486402, + "grad_norm": 0.7136878967285156, + "learning_rate": 4.844508073535939e-05, + "loss": 2.4523, + "step": 13483 + }, + { + "epoch": 1.088209184085223, + "grad_norm": 0.6663414239883423, + "learning_rate": 4.843155421691848e-05, + "loss": 2.4287, + "step": 13484 + }, + { + "epoch": 1.0882898878218061, + "grad_norm": 0.7192783355712891, + "learning_rate": 4.8418028983698006e-05, + "loss": 2.4433, + "step": 13485 + }, + { + "epoch": 1.0883705915583892, + "grad_norm": 0.6620980501174927, + "learning_rate": 4.8404505036035086e-05, + "loss": 2.4823, + "step": 13486 + }, + { + "epoch": 1.088451295294972, + "grad_norm": 0.6282123327255249, + "learning_rate": 4.83909823742668e-05, + "loss": 2.4641, + "step": 13487 + }, + { + "epoch": 1.0885319990315552, + "grad_norm": 0.6384354829788208, + "learning_rate": 4.837746099873012e-05, + "loss": 2.4234, + "step": 13488 + }, + { + "epoch": 1.0886127027681383, + "grad_norm": 0.6550076603889465, + "learning_rate": 4.836394090976204e-05, + "loss": 2.4743, + "step": 13489 + }, + { + "epoch": 1.0886934065047211, + "grad_norm": 0.6987888216972351, + "learning_rate": 4.8350422107699545e-05, + "loss": 2.4263, + "step": 13490 + }, + { + "epoch": 1.0887741102413042, + "grad_norm": 0.7012613415718079, + "learning_rate": 4.833690459287953e-05, + "loss": 2.4801, + "step": 13491 + }, + { + "epoch": 1.088854813977887, + "grad_norm": 0.6986923217773438, + "learning_rate": 4.832338836563891e-05, + "loss": 2.426, + "step": 13492 + }, + { + "epoch": 1.0889355177144702, + "grad_norm": 0.6936241984367371, + "learning_rate": 4.830987342631453e-05, + "loss": 2.4361, + "step": 13493 + }, + { + "epoch": 1.0890162214510533, + "grad_norm": 0.6612359881401062, + "learning_rate": 4.8296359775243275e-05, + "loss": 2.4385, + "step": 13494 + }, + { + "epoch": 1.0890969251876361, + "grad_norm": 0.6927692294120789, + "learning_rate": 4.828284741276183e-05, + "loss": 2.4692, + "step": 13495 + }, + { + "epoch": 1.0891776289242192, + "grad_norm": 0.6710225343704224, + "learning_rate": 4.8269336339207036e-05, + "loss": 2.4078, + "step": 13496 + }, + { + "epoch": 1.0892583326608023, + "grad_norm": 0.639076828956604, + "learning_rate": 4.825582655491564e-05, + "loss": 2.4368, + "step": 13497 + }, + { + "epoch": 1.0893390363973852, + "grad_norm": 0.7050483226776123, + "learning_rate": 4.824231806022426e-05, + "loss": 2.4308, + "step": 13498 + }, + { + "epoch": 1.0894197401339683, + "grad_norm": 0.7097769975662231, + "learning_rate": 4.822881085546962e-05, + "loss": 2.4378, + "step": 13499 + }, + { + "epoch": 1.0895004438705511, + "grad_norm": 0.6939458847045898, + "learning_rate": 4.821530494098834e-05, + "loss": 2.4678, + "step": 13500 + }, + { + "epoch": 1.0895811476071342, + "grad_norm": 0.6797441840171814, + "learning_rate": 4.8201800317117016e-05, + "loss": 2.4837, + "step": 13501 + }, + { + "epoch": 1.0896618513437173, + "grad_norm": 0.7451521158218384, + "learning_rate": 4.818829698419225e-05, + "loss": 2.4651, + "step": 13502 + }, + { + "epoch": 1.0897425550803002, + "grad_norm": 0.6749109625816345, + "learning_rate": 4.8174794942550585e-05, + "loss": 2.4569, + "step": 13503 + }, + { + "epoch": 1.0898232588168832, + "grad_norm": 0.6321636438369751, + "learning_rate": 4.8161294192528474e-05, + "loss": 2.4049, + "step": 13504 + }, + { + "epoch": 1.0899039625534663, + "grad_norm": 0.7002367377281189, + "learning_rate": 4.8147794734462415e-05, + "loss": 2.4489, + "step": 13505 + }, + { + "epoch": 1.0899846662900492, + "grad_norm": 0.758057713508606, + "learning_rate": 4.813429656868889e-05, + "loss": 2.436, + "step": 13506 + }, + { + "epoch": 1.0900653700266323, + "grad_norm": 0.6665529012680054, + "learning_rate": 4.812079969554424e-05, + "loss": 2.3805, + "step": 13507 + }, + { + "epoch": 1.0901460737632152, + "grad_norm": 0.6962547898292542, + "learning_rate": 4.810730411536487e-05, + "loss": 2.4203, + "step": 13508 + }, + { + "epoch": 1.0902267774997982, + "grad_norm": 0.6860647201538086, + "learning_rate": 4.809380982848712e-05, + "loss": 2.4482, + "step": 13509 + }, + { + "epoch": 1.0903074812363813, + "grad_norm": 0.7045090198516846, + "learning_rate": 4.808031683524733e-05, + "loss": 2.4155, + "step": 13510 + }, + { + "epoch": 1.0903881849729642, + "grad_norm": 0.6609304547309875, + "learning_rate": 4.806682513598176e-05, + "loss": 2.4295, + "step": 13511 + }, + { + "epoch": 1.0904688887095473, + "grad_norm": 0.7647323608398438, + "learning_rate": 4.8053334731026665e-05, + "loss": 2.4704, + "step": 13512 + }, + { + "epoch": 1.0905495924461301, + "grad_norm": 0.677449643611908, + "learning_rate": 4.803984562071829e-05, + "loss": 2.4501, + "step": 13513 + }, + { + "epoch": 1.0906302961827132, + "grad_norm": 0.645866334438324, + "learning_rate": 4.8026357805392754e-05, + "loss": 2.427, + "step": 13514 + }, + { + "epoch": 1.0907109999192963, + "grad_norm": 0.6968488097190857, + "learning_rate": 4.801287128538624e-05, + "loss": 2.3933, + "step": 13515 + }, + { + "epoch": 1.0907917036558792, + "grad_norm": 0.7137444615364075, + "learning_rate": 4.799938606103491e-05, + "loss": 2.4611, + "step": 13516 + }, + { + "epoch": 1.0908724073924623, + "grad_norm": 0.6860007047653198, + "learning_rate": 4.7985902132674765e-05, + "loss": 2.4252, + "step": 13517 + }, + { + "epoch": 1.0909531111290454, + "grad_norm": 0.726290762424469, + "learning_rate": 4.797241950064192e-05, + "loss": 2.44, + "step": 13518 + }, + { + "epoch": 1.0910338148656282, + "grad_norm": 0.6833362579345703, + "learning_rate": 4.795893816527241e-05, + "loss": 2.4199, + "step": 13519 + }, + { + "epoch": 1.0911145186022113, + "grad_norm": 0.7412242293357849, + "learning_rate": 4.794545812690212e-05, + "loss": 2.5412, + "step": 13520 + }, + { + "epoch": 1.0911952223387944, + "grad_norm": 0.6882274150848389, + "learning_rate": 4.793197938586712e-05, + "loss": 2.473, + "step": 13521 + }, + { + "epoch": 1.0912759260753773, + "grad_norm": 0.7334007024765015, + "learning_rate": 4.791850194250335e-05, + "loss": 2.4357, + "step": 13522 + }, + { + "epoch": 1.0913566298119604, + "grad_norm": 0.6564081311225891, + "learning_rate": 4.790502579714661e-05, + "loss": 2.4425, + "step": 13523 + }, + { + "epoch": 1.0914373335485432, + "grad_norm": 0.7045762538909912, + "learning_rate": 4.78915509501328e-05, + "loss": 2.4929, + "step": 13524 + }, + { + "epoch": 1.0915180372851263, + "grad_norm": 0.7512505650520325, + "learning_rate": 4.787807740179776e-05, + "loss": 2.4187, + "step": 13525 + }, + { + "epoch": 1.0915987410217094, + "grad_norm": 0.6592997908592224, + "learning_rate": 4.786460515247732e-05, + "loss": 2.4344, + "step": 13526 + }, + { + "epoch": 1.0916794447582923, + "grad_norm": 0.6721770763397217, + "learning_rate": 4.785113420250715e-05, + "loss": 2.4415, + "step": 13527 + }, + { + "epoch": 1.0917601484948753, + "grad_norm": 0.7544431686401367, + "learning_rate": 4.783766455222305e-05, + "loss": 2.4831, + "step": 13528 + }, + { + "epoch": 1.0918408522314582, + "grad_norm": 0.7226355671882629, + "learning_rate": 4.782419620196073e-05, + "loss": 2.4807, + "step": 13529 + }, + { + "epoch": 1.0919215559680413, + "grad_norm": 0.6386340260505676, + "learning_rate": 4.78107291520558e-05, + "loss": 2.4062, + "step": 13530 + }, + { + "epoch": 1.0920022597046244, + "grad_norm": 0.6670595407485962, + "learning_rate": 4.7797263402843926e-05, + "loss": 2.4009, + "step": 13531 + }, + { + "epoch": 1.0920829634412073, + "grad_norm": 0.6600756049156189, + "learning_rate": 4.778379895466071e-05, + "loss": 2.4321, + "step": 13532 + }, + { + "epoch": 1.0921636671777903, + "grad_norm": 0.7190701961517334, + "learning_rate": 4.77703358078417e-05, + "loss": 2.4229, + "step": 13533 + }, + { + "epoch": 1.0922443709143734, + "grad_norm": 0.6554828882217407, + "learning_rate": 4.775687396272247e-05, + "loss": 2.442, + "step": 13534 + }, + { + "epoch": 1.0923250746509563, + "grad_norm": 0.6720205545425415, + "learning_rate": 4.774341341963853e-05, + "loss": 2.4994, + "step": 13535 + }, + { + "epoch": 1.0924057783875394, + "grad_norm": 0.7161003947257996, + "learning_rate": 4.7729954178925295e-05, + "loss": 2.4666, + "step": 13536 + }, + { + "epoch": 1.0924864821241222, + "grad_norm": 0.6817156672477722, + "learning_rate": 4.771649624091824e-05, + "loss": 2.4203, + "step": 13537 + }, + { + "epoch": 1.0925671858607053, + "grad_norm": 0.7167035937309265, + "learning_rate": 4.770303960595277e-05, + "loss": 2.4214, + "step": 13538 + }, + { + "epoch": 1.0926478895972884, + "grad_norm": 0.6373945474624634, + "learning_rate": 4.768958427436429e-05, + "loss": 2.485, + "step": 13539 + }, + { + "epoch": 1.0927285933338713, + "grad_norm": 0.7361387014389038, + "learning_rate": 4.767613024648808e-05, + "loss": 2.5192, + "step": 13540 + }, + { + "epoch": 1.0928092970704544, + "grad_norm": 0.7034375667572021, + "learning_rate": 4.766267752265947e-05, + "loss": 2.4324, + "step": 13541 + }, + { + "epoch": 1.0928900008070375, + "grad_norm": 0.7355689406394958, + "learning_rate": 4.7649226103213765e-05, + "loss": 2.5048, + "step": 13542 + }, + { + "epoch": 1.0929707045436203, + "grad_norm": 0.7120445966720581, + "learning_rate": 4.7635775988486176e-05, + "loss": 2.449, + "step": 13543 + }, + { + "epoch": 1.0930514082802034, + "grad_norm": 0.695888876914978, + "learning_rate": 4.7622327178811935e-05, + "loss": 2.4974, + "step": 13544 + }, + { + "epoch": 1.0931321120167863, + "grad_norm": 0.6953639984130859, + "learning_rate": 4.760887967452625e-05, + "loss": 2.3927, + "step": 13545 + }, + { + "epoch": 1.0932128157533694, + "grad_norm": 0.6457183957099915, + "learning_rate": 4.759543347596421e-05, + "loss": 2.4501, + "step": 13546 + }, + { + "epoch": 1.0932935194899525, + "grad_norm": 0.7259296774864197, + "learning_rate": 4.7581988583460946e-05, + "loss": 2.4896, + "step": 13547 + }, + { + "epoch": 1.0933742232265353, + "grad_norm": 0.6897724270820618, + "learning_rate": 4.7568544997351586e-05, + "loss": 2.4181, + "step": 13548 + }, + { + "epoch": 1.0934549269631184, + "grad_norm": 0.6723688840866089, + "learning_rate": 4.755510271797111e-05, + "loss": 2.5097, + "step": 13549 + }, + { + "epoch": 1.0935356306997015, + "grad_norm": 0.7353307604789734, + "learning_rate": 4.754166174565456e-05, + "loss": 2.4548, + "step": 13550 + }, + { + "epoch": 1.0936163344362844, + "grad_norm": 0.7334069013595581, + "learning_rate": 4.752822208073693e-05, + "loss": 2.5113, + "step": 13551 + }, + { + "epoch": 1.0936970381728675, + "grad_norm": 0.6581420302391052, + "learning_rate": 4.751478372355317e-05, + "loss": 2.4546, + "step": 13552 + }, + { + "epoch": 1.0937777419094503, + "grad_norm": 0.7890802621841431, + "learning_rate": 4.75013466744382e-05, + "loss": 2.4092, + "step": 13553 + }, + { + "epoch": 1.0938584456460334, + "grad_norm": 0.7226595282554626, + "learning_rate": 4.7487910933726895e-05, + "loss": 2.457, + "step": 13554 + }, + { + "epoch": 1.0939391493826165, + "grad_norm": 0.7108014225959778, + "learning_rate": 4.7474476501754165e-05, + "loss": 2.471, + "step": 13555 + }, + { + "epoch": 1.0940198531191994, + "grad_norm": 0.6864863038063049, + "learning_rate": 4.746104337885473e-05, + "loss": 2.4778, + "step": 13556 + }, + { + "epoch": 1.0941005568557824, + "grad_norm": 0.6890624165534973, + "learning_rate": 4.744761156536345e-05, + "loss": 2.456, + "step": 13557 + }, + { + "epoch": 1.0941812605923653, + "grad_norm": 0.7052781581878662, + "learning_rate": 4.743418106161509e-05, + "loss": 2.4796, + "step": 13558 + }, + { + "epoch": 1.0942619643289484, + "grad_norm": 0.6569164991378784, + "learning_rate": 4.742075186794431e-05, + "loss": 2.469, + "step": 13559 + }, + { + "epoch": 1.0943426680655315, + "grad_norm": 0.7302874326705933, + "learning_rate": 4.7407323984685836e-05, + "loss": 2.4543, + "step": 13560 + }, + { + "epoch": 1.0944233718021144, + "grad_norm": 0.6499345898628235, + "learning_rate": 4.7393897412174335e-05, + "loss": 2.4037, + "step": 13561 + }, + { + "epoch": 1.0945040755386974, + "grad_norm": 0.6643944382667542, + "learning_rate": 4.7380472150744416e-05, + "loss": 2.4067, + "step": 13562 + }, + { + "epoch": 1.0945847792752805, + "grad_norm": 0.7491872906684875, + "learning_rate": 4.736704820073069e-05, + "loss": 2.4277, + "step": 13563 + }, + { + "epoch": 1.0946654830118634, + "grad_norm": 0.7319512367248535, + "learning_rate": 4.735362556246773e-05, + "loss": 2.4588, + "step": 13564 + }, + { + "epoch": 1.0947461867484465, + "grad_norm": 0.7404350638389587, + "learning_rate": 4.734020423629001e-05, + "loss": 2.432, + "step": 13565 + }, + { + "epoch": 1.0948268904850296, + "grad_norm": 0.6462193727493286, + "learning_rate": 4.732678422253206e-05, + "loss": 2.4417, + "step": 13566 + }, + { + "epoch": 1.0949075942216124, + "grad_norm": 0.6711323857307434, + "learning_rate": 4.731336552152836e-05, + "loss": 2.4023, + "step": 13567 + }, + { + "epoch": 1.0949882979581955, + "grad_norm": 0.658261239528656, + "learning_rate": 4.729994813361329e-05, + "loss": 2.4132, + "step": 13568 + }, + { + "epoch": 1.0950690016947784, + "grad_norm": 0.8081904053688049, + "learning_rate": 4.728653205912127e-05, + "loss": 2.4412, + "step": 13569 + }, + { + "epoch": 1.0951497054313615, + "grad_norm": 0.6620786786079407, + "learning_rate": 4.727311729838666e-05, + "loss": 2.4357, + "step": 13570 + }, + { + "epoch": 1.0952304091679446, + "grad_norm": 0.7026848793029785, + "learning_rate": 4.725970385174381e-05, + "loss": 2.4159, + "step": 13571 + }, + { + "epoch": 1.0953111129045274, + "grad_norm": 0.7017392516136169, + "learning_rate": 4.7246291719526995e-05, + "loss": 2.4253, + "step": 13572 + }, + { + "epoch": 1.0953918166411105, + "grad_norm": 0.710172712802887, + "learning_rate": 4.7232880902070483e-05, + "loss": 2.4057, + "step": 13573 + }, + { + "epoch": 1.0954725203776934, + "grad_norm": 0.7208876013755798, + "learning_rate": 4.721947139970856e-05, + "loss": 2.4803, + "step": 13574 + }, + { + "epoch": 1.0955532241142765, + "grad_norm": 0.693219006061554, + "learning_rate": 4.720606321277534e-05, + "loss": 2.3611, + "step": 13575 + }, + { + "epoch": 1.0956339278508596, + "grad_norm": 0.737206757068634, + "learning_rate": 4.7192656341605026e-05, + "loss": 2.3873, + "step": 13576 + }, + { + "epoch": 1.0957146315874424, + "grad_norm": 0.6605268120765686, + "learning_rate": 4.717925078653179e-05, + "loss": 2.4155, + "step": 13577 + }, + { + "epoch": 1.0957953353240255, + "grad_norm": 0.7143047451972961, + "learning_rate": 4.716584654788967e-05, + "loss": 2.4526, + "step": 13578 + }, + { + "epoch": 1.0958760390606086, + "grad_norm": 0.6980953216552734, + "learning_rate": 4.715244362601277e-05, + "loss": 2.4422, + "step": 13579 + }, + { + "epoch": 1.0959567427971915, + "grad_norm": 0.6852009892463684, + "learning_rate": 4.713904202123515e-05, + "loss": 2.4599, + "step": 13580 + }, + { + "epoch": 1.0960374465337746, + "grad_norm": 0.7436656355857849, + "learning_rate": 4.712564173389074e-05, + "loss": 2.4441, + "step": 13581 + }, + { + "epoch": 1.0961181502703574, + "grad_norm": 0.7090624570846558, + "learning_rate": 4.711224276431352e-05, + "loss": 2.4741, + "step": 13582 + }, + { + "epoch": 1.0961988540069405, + "grad_norm": 0.6611043810844421, + "learning_rate": 4.709884511283753e-05, + "loss": 2.4589, + "step": 13583 + }, + { + "epoch": 1.0962795577435236, + "grad_norm": 0.6932426691055298, + "learning_rate": 4.708544877979658e-05, + "loss": 2.4199, + "step": 13584 + }, + { + "epoch": 1.0963602614801065, + "grad_norm": 0.7629422545433044, + "learning_rate": 4.707205376552456e-05, + "loss": 2.4588, + "step": 13585 + }, + { + "epoch": 1.0964409652166895, + "grad_norm": 0.8116739392280579, + "learning_rate": 4.705866007035531e-05, + "loss": 2.472, + "step": 13586 + }, + { + "epoch": 1.0965216689532726, + "grad_norm": 0.6711297631263733, + "learning_rate": 4.704526769462269e-05, + "loss": 2.4086, + "step": 13587 + }, + { + "epoch": 1.0966023726898555, + "grad_norm": 0.716015636920929, + "learning_rate": 4.703187663866037e-05, + "loss": 2.4411, + "step": 13588 + }, + { + "epoch": 1.0966830764264386, + "grad_norm": 0.6982430219650269, + "learning_rate": 4.701848690280215e-05, + "loss": 2.4438, + "step": 13589 + }, + { + "epoch": 1.0967637801630215, + "grad_norm": 0.7183159589767456, + "learning_rate": 4.7005098487381785e-05, + "loss": 2.4464, + "step": 13590 + }, + { + "epoch": 1.0968444838996045, + "grad_norm": 0.6983399391174316, + "learning_rate": 4.699171139273284e-05, + "loss": 2.4354, + "step": 13591 + }, + { + "epoch": 1.0969251876361876, + "grad_norm": 0.7157938480377197, + "learning_rate": 4.697832561918901e-05, + "loss": 2.4393, + "step": 13592 + }, + { + "epoch": 1.0970058913727705, + "grad_norm": 0.6991363763809204, + "learning_rate": 4.696494116708392e-05, + "loss": 2.4723, + "step": 13593 + }, + { + "epoch": 1.0970865951093536, + "grad_norm": 0.6722309589385986, + "learning_rate": 4.695155803675112e-05, + "loss": 2.447, + "step": 13594 + }, + { + "epoch": 1.0971672988459367, + "grad_norm": 0.6492688655853271, + "learning_rate": 4.6938176228524175e-05, + "loss": 2.4213, + "step": 13595 + }, + { + "epoch": 1.0972480025825195, + "grad_norm": 0.6941642165184021, + "learning_rate": 4.6924795742736616e-05, + "loss": 2.4714, + "step": 13596 + }, + { + "epoch": 1.0973287063191026, + "grad_norm": 0.7506042122840881, + "learning_rate": 4.691141657972185e-05, + "loss": 2.4563, + "step": 13597 + }, + { + "epoch": 1.0974094100556855, + "grad_norm": 0.7032836675643921, + "learning_rate": 4.6898038739813356e-05, + "loss": 2.4824, + "step": 13598 + }, + { + "epoch": 1.0974901137922686, + "grad_norm": 0.6908734440803528, + "learning_rate": 4.6884662223344575e-05, + "loss": 2.4486, + "step": 13599 + }, + { + "epoch": 1.0975708175288517, + "grad_norm": 0.714971661567688, + "learning_rate": 4.687128703064883e-05, + "loss": 2.4372, + "step": 13600 + }, + { + "epoch": 1.0976515212654345, + "grad_norm": 0.6989198327064514, + "learning_rate": 4.6857913162059486e-05, + "loss": 2.395, + "step": 13601 + }, + { + "epoch": 1.0977322250020176, + "grad_norm": 0.7163406014442444, + "learning_rate": 4.684454061790987e-05, + "loss": 2.4868, + "step": 13602 + }, + { + "epoch": 1.0978129287386005, + "grad_norm": 0.6600626707077026, + "learning_rate": 4.6831169398533245e-05, + "loss": 2.5134, + "step": 13603 + }, + { + "epoch": 1.0978936324751836, + "grad_norm": 0.6657080054283142, + "learning_rate": 4.681779950426286e-05, + "loss": 2.4701, + "step": 13604 + }, + { + "epoch": 1.0979743362117667, + "grad_norm": 0.665860116481781, + "learning_rate": 4.680443093543194e-05, + "loss": 2.4593, + "step": 13605 + }, + { + "epoch": 1.0980550399483495, + "grad_norm": 0.7000327110290527, + "learning_rate": 4.679106369237368e-05, + "loss": 2.4523, + "step": 13606 + }, + { + "epoch": 1.0981357436849326, + "grad_norm": 0.6969157457351685, + "learning_rate": 4.677769777542118e-05, + "loss": 2.4935, + "step": 13607 + }, + { + "epoch": 1.0982164474215157, + "grad_norm": 0.6864836812019348, + "learning_rate": 4.676433318490757e-05, + "loss": 2.457, + "step": 13608 + }, + { + "epoch": 1.0982971511580986, + "grad_norm": 0.7331364750862122, + "learning_rate": 4.675096992116598e-05, + "loss": 2.4253, + "step": 13609 + }, + { + "epoch": 1.0983778548946816, + "grad_norm": 0.75, + "learning_rate": 4.673760798452936e-05, + "loss": 2.4147, + "step": 13610 + }, + { + "epoch": 1.0984585586312647, + "grad_norm": 0.6589440703392029, + "learning_rate": 4.6724247375330786e-05, + "loss": 2.4718, + "step": 13611 + }, + { + "epoch": 1.0985392623678476, + "grad_norm": 0.7032667994499207, + "learning_rate": 4.671088809390324e-05, + "loss": 2.4724, + "step": 13612 + }, + { + "epoch": 1.0986199661044307, + "grad_norm": 0.7544135451316833, + "learning_rate": 4.6697530140579646e-05, + "loss": 2.4804, + "step": 13613 + }, + { + "epoch": 1.0987006698410136, + "grad_norm": 0.6503081917762756, + "learning_rate": 4.668417351569295e-05, + "loss": 2.3829, + "step": 13614 + }, + { + "epoch": 1.0987813735775966, + "grad_norm": 0.6928786039352417, + "learning_rate": 4.667081821957605e-05, + "loss": 2.5678, + "step": 13615 + }, + { + "epoch": 1.0988620773141797, + "grad_norm": 0.6652864217758179, + "learning_rate": 4.665746425256173e-05, + "loss": 2.4585, + "step": 13616 + }, + { + "epoch": 1.0989427810507626, + "grad_norm": 0.700265109539032, + "learning_rate": 4.664411161498283e-05, + "loss": 2.4785, + "step": 13617 + }, + { + "epoch": 1.0990234847873457, + "grad_norm": 0.7443608045578003, + "learning_rate": 4.663076030717216e-05, + "loss": 2.4869, + "step": 13618 + }, + { + "epoch": 1.0991041885239285, + "grad_norm": 0.7037705779075623, + "learning_rate": 4.6617410329462477e-05, + "loss": 2.4518, + "step": 13619 + }, + { + "epoch": 1.0991848922605116, + "grad_norm": 0.7528365850448608, + "learning_rate": 4.660406168218643e-05, + "loss": 2.4616, + "step": 13620 + }, + { + "epoch": 1.0992655959970947, + "grad_norm": 0.7149221301078796, + "learning_rate": 4.659071436567676e-05, + "loss": 2.4661, + "step": 13621 + }, + { + "epoch": 1.0993462997336776, + "grad_norm": 0.7212862968444824, + "learning_rate": 4.657736838026608e-05, + "loss": 2.4424, + "step": 13622 + }, + { + "epoch": 1.0994270034702607, + "grad_norm": 0.6934216022491455, + "learning_rate": 4.6564023726287045e-05, + "loss": 2.4633, + "step": 13623 + }, + { + "epoch": 1.0995077072068438, + "grad_norm": 0.7244036793708801, + "learning_rate": 4.655068040407221e-05, + "loss": 2.409, + "step": 13624 + }, + { + "epoch": 1.0995884109434266, + "grad_norm": 0.6911318898200989, + "learning_rate": 4.653733841395419e-05, + "loss": 2.5117, + "step": 13625 + }, + { + "epoch": 1.0996691146800097, + "grad_norm": 0.7579816579818726, + "learning_rate": 4.65239977562654e-05, + "loss": 2.4927, + "step": 13626 + }, + { + "epoch": 1.0997498184165928, + "grad_norm": 0.7699651122093201, + "learning_rate": 4.651065843133837e-05, + "loss": 2.4083, + "step": 13627 + }, + { + "epoch": 1.0998305221531757, + "grad_norm": 0.6669431328773499, + "learning_rate": 4.649732043950561e-05, + "loss": 2.4402, + "step": 13628 + }, + { + "epoch": 1.0999112258897588, + "grad_norm": 0.7134940028190613, + "learning_rate": 4.6483983781099426e-05, + "loss": 2.4275, + "step": 13629 + }, + { + "epoch": 1.0999919296263416, + "grad_norm": 0.7107651233673096, + "learning_rate": 4.647064845645227e-05, + "loss": 2.4654, + "step": 13630 + }, + { + "epoch": 1.1000726333629247, + "grad_norm": 0.7101391553878784, + "learning_rate": 4.645731446589652e-05, + "loss": 2.4357, + "step": 13631 + }, + { + "epoch": 1.1001533370995078, + "grad_norm": 0.7511606216430664, + "learning_rate": 4.6443981809764405e-05, + "loss": 2.5016, + "step": 13632 + }, + { + "epoch": 1.1002340408360907, + "grad_norm": 0.7315953373908997, + "learning_rate": 4.6430650488388226e-05, + "loss": 2.4541, + "step": 13633 + }, + { + "epoch": 1.1003147445726738, + "grad_norm": 0.6701769232749939, + "learning_rate": 4.6417320502100316e-05, + "loss": 2.4071, + "step": 13634 + }, + { + "epoch": 1.1003954483092566, + "grad_norm": 0.7164294123649597, + "learning_rate": 4.6403991851232876e-05, + "loss": 2.478, + "step": 13635 + }, + { + "epoch": 1.1004761520458397, + "grad_norm": 0.7003894448280334, + "learning_rate": 4.639066453611802e-05, + "loss": 2.4686, + "step": 13636 + }, + { + "epoch": 1.1005568557824228, + "grad_norm": 0.6855250000953674, + "learning_rate": 4.6377338557087957e-05, + "loss": 2.4531, + "step": 13637 + }, + { + "epoch": 1.1006375595190057, + "grad_norm": 0.6581299901008606, + "learning_rate": 4.6364013914474816e-05, + "loss": 2.4511, + "step": 13638 + }, + { + "epoch": 1.1007182632555887, + "grad_norm": 0.7599080204963684, + "learning_rate": 4.6350690608610604e-05, + "loss": 2.5143, + "step": 13639 + }, + { + "epoch": 1.1007989669921718, + "grad_norm": 0.7029981017112732, + "learning_rate": 4.633736863982744e-05, + "loss": 2.4541, + "step": 13640 + }, + { + "epoch": 1.1008796707287547, + "grad_norm": 0.7378708720207214, + "learning_rate": 4.6324048008457357e-05, + "loss": 2.4319, + "step": 13641 + }, + { + "epoch": 1.1009603744653378, + "grad_norm": 0.7087826728820801, + "learning_rate": 4.631072871483226e-05, + "loss": 2.4148, + "step": 13642 + }, + { + "epoch": 1.1010410782019207, + "grad_norm": 0.7000819444656372, + "learning_rate": 4.629741075928415e-05, + "loss": 2.4692, + "step": 13643 + }, + { + "epoch": 1.1011217819385037, + "grad_norm": 0.7363965511322021, + "learning_rate": 4.628409414214496e-05, + "loss": 2.4584, + "step": 13644 + }, + { + "epoch": 1.1012024856750868, + "grad_norm": 0.6691753268241882, + "learning_rate": 4.627077886374656e-05, + "loss": 2.4356, + "step": 13645 + }, + { + "epoch": 1.1012831894116697, + "grad_norm": 0.6864185929298401, + "learning_rate": 4.625746492442078e-05, + "loss": 2.4713, + "step": 13646 + }, + { + "epoch": 1.1013638931482528, + "grad_norm": 0.714318573474884, + "learning_rate": 4.624415232449947e-05, + "loss": 2.4482, + "step": 13647 + }, + { + "epoch": 1.1014445968848359, + "grad_norm": 0.6383495330810547, + "learning_rate": 4.623084106431444e-05, + "loss": 2.4248, + "step": 13648 + }, + { + "epoch": 1.1015253006214187, + "grad_norm": 0.7014495730400085, + "learning_rate": 4.6217531144197365e-05, + "loss": 2.4393, + "step": 13649 + }, + { + "epoch": 1.1016060043580018, + "grad_norm": 0.8128634095191956, + "learning_rate": 4.620422256448e-05, + "loss": 2.4741, + "step": 13650 + }, + { + "epoch": 1.1016867080945847, + "grad_norm": 0.7333208322525024, + "learning_rate": 4.619091532549408e-05, + "loss": 2.4288, + "step": 13651 + }, + { + "epoch": 1.1017674118311678, + "grad_norm": 0.7023218274116516, + "learning_rate": 4.617760942757117e-05, + "loss": 2.5025, + "step": 13652 + }, + { + "epoch": 1.1018481155677509, + "grad_norm": 0.6420873403549194, + "learning_rate": 4.616430487104292e-05, + "loss": 2.4165, + "step": 13653 + }, + { + "epoch": 1.1019288193043337, + "grad_norm": 0.6767684817314148, + "learning_rate": 4.615100165624092e-05, + "loss": 2.4642, + "step": 13654 + }, + { + "epoch": 1.1020095230409168, + "grad_norm": 0.7361159920692444, + "learning_rate": 4.613769978349672e-05, + "loss": 2.5343, + "step": 13655 + }, + { + "epoch": 1.1020902267775, + "grad_norm": 0.6642624735832214, + "learning_rate": 4.6124399253141846e-05, + "loss": 2.3769, + "step": 13656 + }, + { + "epoch": 1.1021709305140828, + "grad_norm": 0.6912256479263306, + "learning_rate": 4.611110006550781e-05, + "loss": 2.455, + "step": 13657 + }, + { + "epoch": 1.1022516342506659, + "grad_norm": 0.7419310212135315, + "learning_rate": 4.609780222092599e-05, + "loss": 2.4171, + "step": 13658 + }, + { + "epoch": 1.1023323379872487, + "grad_norm": 0.718953549861908, + "learning_rate": 4.6084505719727835e-05, + "loss": 2.4791, + "step": 13659 + }, + { + "epoch": 1.1024130417238318, + "grad_norm": 0.7904248237609863, + "learning_rate": 4.607121056224477e-05, + "loss": 2.4429, + "step": 13660 + }, + { + "epoch": 1.102493745460415, + "grad_norm": 0.6743534803390503, + "learning_rate": 4.605791674880808e-05, + "loss": 2.4481, + "step": 13661 + }, + { + "epoch": 1.1025744491969978, + "grad_norm": 0.6829143166542053, + "learning_rate": 4.6044624279749106e-05, + "loss": 2.4078, + "step": 13662 + }, + { + "epoch": 1.1026551529335809, + "grad_norm": 0.6803167462348938, + "learning_rate": 4.6031333155399136e-05, + "loss": 2.4509, + "step": 13663 + }, + { + "epoch": 1.1027358566701637, + "grad_norm": 0.7474592328071594, + "learning_rate": 4.601804337608943e-05, + "loss": 2.4563, + "step": 13664 + }, + { + "epoch": 1.1028165604067468, + "grad_norm": 0.6753630042076111, + "learning_rate": 4.6004754942151174e-05, + "loss": 2.4285, + "step": 13665 + }, + { + "epoch": 1.10289726414333, + "grad_norm": 0.7990161180496216, + "learning_rate": 4.599146785391558e-05, + "loss": 2.4907, + "step": 13666 + }, + { + "epoch": 1.1029779678799128, + "grad_norm": 0.8161290287971497, + "learning_rate": 4.597818211171383e-05, + "loss": 2.4599, + "step": 13667 + }, + { + "epoch": 1.1030586716164958, + "grad_norm": 0.6813610792160034, + "learning_rate": 4.596489771587695e-05, + "loss": 2.4484, + "step": 13668 + }, + { + "epoch": 1.103139375353079, + "grad_norm": 0.6598966121673584, + "learning_rate": 4.5951614666736076e-05, + "loss": 2.4326, + "step": 13669 + }, + { + "epoch": 1.1032200790896618, + "grad_norm": 0.7084827423095703, + "learning_rate": 4.593833296462228e-05, + "loss": 2.4188, + "step": 13670 + }, + { + "epoch": 1.1033007828262449, + "grad_norm": 0.6876685619354248, + "learning_rate": 4.59250526098665e-05, + "loss": 2.4482, + "step": 13671 + }, + { + "epoch": 1.103381486562828, + "grad_norm": 0.7292699813842773, + "learning_rate": 4.591177360279978e-05, + "loss": 2.4452, + "step": 13672 + }, + { + "epoch": 1.1034621902994108, + "grad_norm": 0.7057675123214722, + "learning_rate": 4.589849594375304e-05, + "loss": 2.4336, + "step": 13673 + }, + { + "epoch": 1.103542894035994, + "grad_norm": 0.7684180736541748, + "learning_rate": 4.5885219633057196e-05, + "loss": 2.4453, + "step": 13674 + }, + { + "epoch": 1.1036235977725768, + "grad_norm": 0.7107112407684326, + "learning_rate": 4.5871944671043154e-05, + "loss": 2.4116, + "step": 13675 + }, + { + "epoch": 1.1037043015091599, + "grad_norm": 0.659501314163208, + "learning_rate": 4.585867105804177e-05, + "loss": 2.4907, + "step": 13676 + }, + { + "epoch": 1.103785005245743, + "grad_norm": 0.7553967833518982, + "learning_rate": 4.5845398794383786e-05, + "loss": 2.3982, + "step": 13677 + }, + { + "epoch": 1.1038657089823258, + "grad_norm": 0.6861104965209961, + "learning_rate": 4.583212788040003e-05, + "loss": 2.416, + "step": 13678 + }, + { + "epoch": 1.103946412718909, + "grad_norm": 0.6546811461448669, + "learning_rate": 4.5818858316421254e-05, + "loss": 2.4506, + "step": 13679 + }, + { + "epoch": 1.1040271164554918, + "grad_norm": 0.7012909650802612, + "learning_rate": 4.58055901027782e-05, + "loss": 2.439, + "step": 13680 + }, + { + "epoch": 1.1041078201920749, + "grad_norm": 0.7594780325889587, + "learning_rate": 4.5792323239801446e-05, + "loss": 2.4437, + "step": 13681 + }, + { + "epoch": 1.104188523928658, + "grad_norm": 0.6576492190361023, + "learning_rate": 4.577905772782172e-05, + "loss": 2.443, + "step": 13682 + }, + { + "epoch": 1.1042692276652408, + "grad_norm": 0.6751925349235535, + "learning_rate": 4.576579356716963e-05, + "loss": 2.507, + "step": 13683 + }, + { + "epoch": 1.104349931401824, + "grad_norm": 0.7206710577011108, + "learning_rate": 4.575253075817567e-05, + "loss": 2.4236, + "step": 13684 + }, + { + "epoch": 1.104430635138407, + "grad_norm": 0.7736170291900635, + "learning_rate": 4.5739269301170485e-05, + "loss": 2.4095, + "step": 13685 + }, + { + "epoch": 1.1045113388749899, + "grad_norm": 0.6901736855506897, + "learning_rate": 4.572600919648457e-05, + "loss": 2.4519, + "step": 13686 + }, + { + "epoch": 1.104592042611573, + "grad_norm": 0.7762539982795715, + "learning_rate": 4.571275044444836e-05, + "loss": 2.5018, + "step": 13687 + }, + { + "epoch": 1.1046727463481558, + "grad_norm": 0.7231423854827881, + "learning_rate": 4.569949304539232e-05, + "loss": 2.4553, + "step": 13688 + }, + { + "epoch": 1.104753450084739, + "grad_norm": 0.7713531255722046, + "learning_rate": 4.568623699964688e-05, + "loss": 2.49, + "step": 13689 + }, + { + "epoch": 1.104834153821322, + "grad_norm": 0.7355079650878906, + "learning_rate": 4.5672982307542354e-05, + "loss": 2.5191, + "step": 13690 + }, + { + "epoch": 1.1049148575579049, + "grad_norm": 0.6916452050209045, + "learning_rate": 4.565972896940913e-05, + "loss": 2.3867, + "step": 13691 + }, + { + "epoch": 1.104995561294488, + "grad_norm": 0.6622549295425415, + "learning_rate": 4.5646476985577544e-05, + "loss": 2.4364, + "step": 13692 + }, + { + "epoch": 1.105076265031071, + "grad_norm": 0.6683297157287598, + "learning_rate": 4.563322635637779e-05, + "loss": 2.43, + "step": 13693 + }, + { + "epoch": 1.105156968767654, + "grad_norm": 0.6857880353927612, + "learning_rate": 4.561997708214015e-05, + "loss": 2.4515, + "step": 13694 + }, + { + "epoch": 1.105237672504237, + "grad_norm": 0.7473817467689514, + "learning_rate": 4.5606729163194807e-05, + "loss": 2.442, + "step": 13695 + }, + { + "epoch": 1.1053183762408199, + "grad_norm": 0.6988846063613892, + "learning_rate": 4.559348259987203e-05, + "loss": 2.3886, + "step": 13696 + }, + { + "epoch": 1.105399079977403, + "grad_norm": 0.6450650691986084, + "learning_rate": 4.5580237392501836e-05, + "loss": 2.4647, + "step": 13697 + }, + { + "epoch": 1.105479783713986, + "grad_norm": 0.7669623494148254, + "learning_rate": 4.556699354141439e-05, + "loss": 2.4362, + "step": 13698 + }, + { + "epoch": 1.105560487450569, + "grad_norm": 0.7019730806350708, + "learning_rate": 4.55537510469398e-05, + "loss": 2.49, + "step": 13699 + }, + { + "epoch": 1.105641191187152, + "grad_norm": 0.6736636757850647, + "learning_rate": 4.5540509909408e-05, + "loss": 2.43, + "step": 13700 + }, + { + "epoch": 1.105721894923735, + "grad_norm": 0.6872034668922424, + "learning_rate": 4.552727012914907e-05, + "loss": 2.4507, + "step": 13701 + }, + { + "epoch": 1.105802598660318, + "grad_norm": 0.6726621985435486, + "learning_rate": 4.5514031706492986e-05, + "loss": 2.4193, + "step": 13702 + }, + { + "epoch": 1.105883302396901, + "grad_norm": 0.7345453500747681, + "learning_rate": 4.550079464176963e-05, + "loss": 2.4257, + "step": 13703 + }, + { + "epoch": 1.105964006133484, + "grad_norm": 0.6764804124832153, + "learning_rate": 4.548755893530894e-05, + "loss": 2.4656, + "step": 13704 + }, + { + "epoch": 1.106044709870067, + "grad_norm": 0.6915058493614197, + "learning_rate": 4.5474324587440766e-05, + "loss": 2.4148, + "step": 13705 + }, + { + "epoch": 1.10612541360665, + "grad_norm": 0.7960236668586731, + "learning_rate": 4.5461091598494954e-05, + "loss": 2.4148, + "step": 13706 + }, + { + "epoch": 1.106206117343233, + "grad_norm": 0.7058970928192139, + "learning_rate": 4.544785996880131e-05, + "loss": 2.4795, + "step": 13707 + }, + { + "epoch": 1.106286821079816, + "grad_norm": 0.6979549527168274, + "learning_rate": 4.5434629698689634e-05, + "loss": 2.4329, + "step": 13708 + }, + { + "epoch": 1.1063675248163989, + "grad_norm": 0.6805241107940674, + "learning_rate": 4.5421400788489586e-05, + "loss": 2.4303, + "step": 13709 + }, + { + "epoch": 1.106448228552982, + "grad_norm": 0.7566354274749756, + "learning_rate": 4.5408173238530905e-05, + "loss": 2.4769, + "step": 13710 + }, + { + "epoch": 1.106528932289565, + "grad_norm": 0.647773802280426, + "learning_rate": 4.539494704914324e-05, + "loss": 2.4037, + "step": 13711 + }, + { + "epoch": 1.106609636026148, + "grad_norm": 0.7248135209083557, + "learning_rate": 4.538172222065628e-05, + "loss": 2.4366, + "step": 13712 + }, + { + "epoch": 1.106690339762731, + "grad_norm": 0.6861057281494141, + "learning_rate": 4.536849875339953e-05, + "loss": 2.456, + "step": 13713 + }, + { + "epoch": 1.106771043499314, + "grad_norm": 0.7386166453361511, + "learning_rate": 4.5355276647702605e-05, + "loss": 2.4806, + "step": 13714 + }, + { + "epoch": 1.106851747235897, + "grad_norm": 0.664402961730957, + "learning_rate": 4.534205590389503e-05, + "loss": 2.4846, + "step": 13715 + }, + { + "epoch": 1.10693245097248, + "grad_norm": 0.8123969435691833, + "learning_rate": 4.5328836522306296e-05, + "loss": 2.4945, + "step": 13716 + }, + { + "epoch": 1.1070131547090631, + "grad_norm": 0.7375624775886536, + "learning_rate": 4.5315618503265865e-05, + "loss": 2.4533, + "step": 13717 + }, + { + "epoch": 1.107093858445646, + "grad_norm": 0.70960932970047, + "learning_rate": 4.53024018471032e-05, + "loss": 2.4351, + "step": 13718 + }, + { + "epoch": 1.107174562182229, + "grad_norm": 0.7170885801315308, + "learning_rate": 4.5289186554147645e-05, + "loss": 2.4654, + "step": 13719 + }, + { + "epoch": 1.107255265918812, + "grad_norm": 0.6986895203590393, + "learning_rate": 4.5275972624728556e-05, + "loss": 2.4079, + "step": 13720 + }, + { + "epoch": 1.107335969655395, + "grad_norm": 0.6948813796043396, + "learning_rate": 4.526276005917532e-05, + "loss": 2.4981, + "step": 13721 + }, + { + "epoch": 1.1074166733919781, + "grad_norm": 0.7719457149505615, + "learning_rate": 4.524954885781717e-05, + "loss": 2.4853, + "step": 13722 + }, + { + "epoch": 1.107497377128561, + "grad_norm": 0.652686357498169, + "learning_rate": 4.5236339020983363e-05, + "loss": 2.3672, + "step": 13723 + }, + { + "epoch": 1.107578080865144, + "grad_norm": 0.7517427802085876, + "learning_rate": 4.5223130549003144e-05, + "loss": 2.3947, + "step": 13724 + }, + { + "epoch": 1.107658784601727, + "grad_norm": 0.6755498647689819, + "learning_rate": 4.5209923442205705e-05, + "loss": 2.4173, + "step": 13725 + }, + { + "epoch": 1.10773948833831, + "grad_norm": 0.6801806688308716, + "learning_rate": 4.519671770092019e-05, + "loss": 2.4366, + "step": 13726 + }, + { + "epoch": 1.1078201920748931, + "grad_norm": 0.6665045619010925, + "learning_rate": 4.5183513325475724e-05, + "loss": 2.4797, + "step": 13727 + }, + { + "epoch": 1.107900895811476, + "grad_norm": 0.7303451299667358, + "learning_rate": 4.517031031620145e-05, + "loss": 2.4487, + "step": 13728 + }, + { + "epoch": 1.107981599548059, + "grad_norm": 0.7241206765174866, + "learning_rate": 4.515710867342632e-05, + "loss": 2.4632, + "step": 13729 + }, + { + "epoch": 1.1080623032846422, + "grad_norm": 0.738835334777832, + "learning_rate": 4.514390839747941e-05, + "loss": 2.3937, + "step": 13730 + }, + { + "epoch": 1.108143007021225, + "grad_norm": 0.7062843441963196, + "learning_rate": 4.5130709488689726e-05, + "loss": 2.4576, + "step": 13731 + }, + { + "epoch": 1.1082237107578081, + "grad_norm": 0.7074100971221924, + "learning_rate": 4.511751194738616e-05, + "loss": 2.4843, + "step": 13732 + }, + { + "epoch": 1.108304414494391, + "grad_norm": 0.751742959022522, + "learning_rate": 4.510431577389765e-05, + "loss": 2.4607, + "step": 13733 + }, + { + "epoch": 1.108385118230974, + "grad_norm": 0.7370054125785828, + "learning_rate": 4.50911209685531e-05, + "loss": 2.4877, + "step": 13734 + }, + { + "epoch": 1.1084658219675572, + "grad_norm": 0.6410251259803772, + "learning_rate": 4.507792753168135e-05, + "loss": 2.4254, + "step": 13735 + }, + { + "epoch": 1.10854652570414, + "grad_norm": 0.7141317129135132, + "learning_rate": 4.506473546361121e-05, + "loss": 2.4962, + "step": 13736 + }, + { + "epoch": 1.1086272294407231, + "grad_norm": 0.6903412342071533, + "learning_rate": 4.50515447646715e-05, + "loss": 2.4315, + "step": 13737 + }, + { + "epoch": 1.1087079331773062, + "grad_norm": 0.7068564891815186, + "learning_rate": 4.50383554351909e-05, + "loss": 2.5795, + "step": 13738 + }, + { + "epoch": 1.108788636913889, + "grad_norm": 0.6880627274513245, + "learning_rate": 4.5025167475498154e-05, + "loss": 2.4399, + "step": 13739 + }, + { + "epoch": 1.1088693406504722, + "grad_norm": 0.6721192598342896, + "learning_rate": 4.5011980885921965e-05, + "loss": 2.4651, + "step": 13740 + }, + { + "epoch": 1.108950044387055, + "grad_norm": 0.7084259986877441, + "learning_rate": 4.499879566679093e-05, + "loss": 2.4121, + "step": 13741 + }, + { + "epoch": 1.109030748123638, + "grad_norm": 0.6809335947036743, + "learning_rate": 4.498561181843368e-05, + "loss": 2.4714, + "step": 13742 + }, + { + "epoch": 1.1091114518602212, + "grad_norm": 0.690416693687439, + "learning_rate": 4.497242934117879e-05, + "loss": 2.4744, + "step": 13743 + }, + { + "epoch": 1.109192155596804, + "grad_norm": 0.728522002696991, + "learning_rate": 4.495924823535483e-05, + "loss": 2.4374, + "step": 13744 + }, + { + "epoch": 1.1092728593333872, + "grad_norm": 0.7000796794891357, + "learning_rate": 4.494606850129026e-05, + "loss": 2.4635, + "step": 13745 + }, + { + "epoch": 1.1093535630699702, + "grad_norm": 0.824645459651947, + "learning_rate": 4.493289013931353e-05, + "loss": 2.3724, + "step": 13746 + }, + { + "epoch": 1.109434266806553, + "grad_norm": 0.6561198830604553, + "learning_rate": 4.491971314975321e-05, + "loss": 2.3726, + "step": 13747 + }, + { + "epoch": 1.1095149705431362, + "grad_norm": 0.7067599892616272, + "learning_rate": 4.490653753293757e-05, + "loss": 2.4285, + "step": 13748 + }, + { + "epoch": 1.109595674279719, + "grad_norm": 0.6954898834228516, + "learning_rate": 4.489336328919503e-05, + "loss": 2.4252, + "step": 13749 + }, + { + "epoch": 1.1096763780163021, + "grad_norm": 0.6683667302131653, + "learning_rate": 4.4880190418853974e-05, + "loss": 2.4815, + "step": 13750 + }, + { + "epoch": 1.1097570817528852, + "grad_norm": 0.7554971575737, + "learning_rate": 4.486701892224261e-05, + "loss": 2.5036, + "step": 13751 + }, + { + "epoch": 1.109837785489468, + "grad_norm": 0.7043242454528809, + "learning_rate": 4.485384879968926e-05, + "loss": 2.3757, + "step": 13752 + }, + { + "epoch": 1.1099184892260512, + "grad_norm": 0.8016893863677979, + "learning_rate": 4.4840680051522186e-05, + "loss": 2.4655, + "step": 13753 + }, + { + "epoch": 1.1099991929626343, + "grad_norm": 0.7022131085395813, + "learning_rate": 4.4827512678069515e-05, + "loss": 2.475, + "step": 13754 + }, + { + "epoch": 1.1100798966992171, + "grad_norm": 0.6963247656822205, + "learning_rate": 4.4814346679659455e-05, + "loss": 2.4866, + "step": 13755 + }, + { + "epoch": 1.1101606004358002, + "grad_norm": 0.6980907917022705, + "learning_rate": 4.4801182056620125e-05, + "loss": 2.4322, + "step": 13756 + }, + { + "epoch": 1.110241304172383, + "grad_norm": 0.68063884973526, + "learning_rate": 4.478801880927964e-05, + "loss": 2.426, + "step": 13757 + }, + { + "epoch": 1.1103220079089662, + "grad_norm": 0.7454195618629456, + "learning_rate": 4.477485693796605e-05, + "loss": 2.5042, + "step": 13758 + }, + { + "epoch": 1.1104027116455493, + "grad_norm": 0.685975193977356, + "learning_rate": 4.476169644300737e-05, + "loss": 2.4874, + "step": 13759 + }, + { + "epoch": 1.1104834153821321, + "grad_norm": 0.7060961723327637, + "learning_rate": 4.4748537324731664e-05, + "loss": 2.4126, + "step": 13760 + }, + { + "epoch": 1.1105641191187152, + "grad_norm": 0.6794416904449463, + "learning_rate": 4.4735379583466795e-05, + "loss": 2.4112, + "step": 13761 + }, + { + "epoch": 1.1106448228552983, + "grad_norm": 0.6854961514472961, + "learning_rate": 4.472222321954073e-05, + "loss": 2.4909, + "step": 13762 + }, + { + "epoch": 1.1107255265918812, + "grad_norm": 0.7660776972770691, + "learning_rate": 4.470906823328139e-05, + "loss": 2.5021, + "step": 13763 + }, + { + "epoch": 1.1108062303284643, + "grad_norm": 0.7027743458747864, + "learning_rate": 4.4695914625016564e-05, + "loss": 2.4375, + "step": 13764 + }, + { + "epoch": 1.1108869340650471, + "grad_norm": 0.6896719336509705, + "learning_rate": 4.468276239507413e-05, + "loss": 2.4574, + "step": 13765 + }, + { + "epoch": 1.1109676378016302, + "grad_norm": 0.685141384601593, + "learning_rate": 4.4669611543781844e-05, + "loss": 2.4311, + "step": 13766 + }, + { + "epoch": 1.1110483415382133, + "grad_norm": 0.7108263373374939, + "learning_rate": 4.465646207146746e-05, + "loss": 2.4565, + "step": 13767 + }, + { + "epoch": 1.1111290452747962, + "grad_norm": 0.63578861951828, + "learning_rate": 4.464331397845873e-05, + "loss": 2.449, + "step": 13768 + }, + { + "epoch": 1.1112097490113793, + "grad_norm": 0.6917306780815125, + "learning_rate": 4.463016726508335e-05, + "loss": 2.4681, + "step": 13769 + }, + { + "epoch": 1.1112904527479621, + "grad_norm": 0.7328054308891296, + "learning_rate": 4.4617021931668914e-05, + "loss": 2.404, + "step": 13770 + }, + { + "epoch": 1.1113711564845452, + "grad_norm": 0.6501660943031311, + "learning_rate": 4.460387797854305e-05, + "loss": 2.4228, + "step": 13771 + }, + { + "epoch": 1.1114518602211283, + "grad_norm": 0.6656771302223206, + "learning_rate": 4.459073540603336e-05, + "loss": 2.4814, + "step": 13772 + }, + { + "epoch": 1.1115325639577112, + "grad_norm": 0.671017587184906, + "learning_rate": 4.457759421446742e-05, + "loss": 2.4605, + "step": 13773 + }, + { + "epoch": 1.1116132676942942, + "grad_norm": 0.6715343594551086, + "learning_rate": 4.456445440417267e-05, + "loss": 2.424, + "step": 13774 + }, + { + "epoch": 1.1116939714308773, + "grad_norm": 0.7051515579223633, + "learning_rate": 4.4551315975476626e-05, + "loss": 2.4358, + "step": 13775 + }, + { + "epoch": 1.1117746751674602, + "grad_norm": 0.7810437679290771, + "learning_rate": 4.453817892870673e-05, + "loss": 2.4718, + "step": 13776 + }, + { + "epoch": 1.1118553789040433, + "grad_norm": 0.7072561383247375, + "learning_rate": 4.4525043264190405e-05, + "loss": 2.4429, + "step": 13777 + }, + { + "epoch": 1.1119360826406264, + "grad_norm": 0.7949702143669128, + "learning_rate": 4.4511908982255e-05, + "loss": 2.4413, + "step": 13778 + }, + { + "epoch": 1.1120167863772092, + "grad_norm": 0.6716235876083374, + "learning_rate": 4.449877608322792e-05, + "loss": 2.427, + "step": 13779 + }, + { + "epoch": 1.1120974901137923, + "grad_norm": 0.7332563996315002, + "learning_rate": 4.448564456743638e-05, + "loss": 2.4567, + "step": 13780 + }, + { + "epoch": 1.1121781938503752, + "grad_norm": 0.7264607548713684, + "learning_rate": 4.447251443520769e-05, + "loss": 2.4844, + "step": 13781 + }, + { + "epoch": 1.1122588975869583, + "grad_norm": 0.7819967865943909, + "learning_rate": 4.4459385686869136e-05, + "loss": 2.5129, + "step": 13782 + }, + { + "epoch": 1.1123396013235414, + "grad_norm": 0.7587651610374451, + "learning_rate": 4.4446258322747824e-05, + "loss": 2.4714, + "step": 13783 + }, + { + "epoch": 1.1124203050601242, + "grad_norm": 0.6392871141433716, + "learning_rate": 4.443313234317099e-05, + "loss": 2.462, + "step": 13784 + }, + { + "epoch": 1.1125010087967073, + "grad_norm": 0.6609585881233215, + "learning_rate": 4.442000774846574e-05, + "loss": 2.4566, + "step": 13785 + }, + { + "epoch": 1.1125817125332902, + "grad_norm": 0.762924075126648, + "learning_rate": 4.440688453895919e-05, + "loss": 2.4613, + "step": 13786 + }, + { + "epoch": 1.1126624162698733, + "grad_norm": 0.7096089124679565, + "learning_rate": 4.4393762714978394e-05, + "loss": 2.4195, + "step": 13787 + }, + { + "epoch": 1.1127431200064564, + "grad_norm": 0.6663284301757812, + "learning_rate": 4.438064227685039e-05, + "loss": 2.422, + "step": 13788 + }, + { + "epoch": 1.1128238237430392, + "grad_norm": 0.6653628945350647, + "learning_rate": 4.436752322490221e-05, + "loss": 2.4477, + "step": 13789 + }, + { + "epoch": 1.1129045274796223, + "grad_norm": 0.6527605056762695, + "learning_rate": 4.435440555946073e-05, + "loss": 2.3874, + "step": 13790 + }, + { + "epoch": 1.1129852312162054, + "grad_norm": 0.6801275014877319, + "learning_rate": 4.4341289280852935e-05, + "loss": 2.4474, + "step": 13791 + }, + { + "epoch": 1.1130659349527883, + "grad_norm": 0.729905366897583, + "learning_rate": 4.432817438940574e-05, + "loss": 2.4711, + "step": 13792 + }, + { + "epoch": 1.1131466386893714, + "grad_norm": 0.7074751853942871, + "learning_rate": 4.431506088544593e-05, + "loss": 2.451, + "step": 13793 + }, + { + "epoch": 1.1132273424259542, + "grad_norm": 0.7241154313087463, + "learning_rate": 4.430194876930035e-05, + "loss": 2.4883, + "step": 13794 + }, + { + "epoch": 1.1133080461625373, + "grad_norm": 0.6549142003059387, + "learning_rate": 4.428883804129586e-05, + "loss": 2.4243, + "step": 13795 + }, + { + "epoch": 1.1133887498991204, + "grad_norm": 0.7046780586242676, + "learning_rate": 4.427572870175907e-05, + "loss": 2.4143, + "step": 13796 + }, + { + "epoch": 1.1134694536357033, + "grad_norm": 0.6563952565193176, + "learning_rate": 4.426262075101682e-05, + "loss": 2.416, + "step": 13797 + }, + { + "epoch": 1.1135501573722864, + "grad_norm": 0.7002081871032715, + "learning_rate": 4.4249514189395803e-05, + "loss": 2.3673, + "step": 13798 + }, + { + "epoch": 1.1136308611088694, + "grad_norm": 0.6766571998596191, + "learning_rate": 4.423640901722259e-05, + "loss": 2.4941, + "step": 13799 + }, + { + "epoch": 1.1137115648454523, + "grad_norm": 0.7404381632804871, + "learning_rate": 4.422330523482383e-05, + "loss": 2.4794, + "step": 13800 + }, + { + "epoch": 1.1137922685820354, + "grad_norm": 0.6670998930931091, + "learning_rate": 4.421020284252614e-05, + "loss": 2.5131, + "step": 13801 + }, + { + "epoch": 1.1138729723186183, + "grad_norm": 0.803720235824585, + "learning_rate": 4.4197101840655995e-05, + "loss": 2.4751, + "step": 13802 + }, + { + "epoch": 1.1139536760552013, + "grad_norm": 0.6532074809074402, + "learning_rate": 4.4184002229539947e-05, + "loss": 2.4147, + "step": 13803 + }, + { + "epoch": 1.1140343797917844, + "grad_norm": 0.6548035144805908, + "learning_rate": 4.417090400950447e-05, + "loss": 2.4601, + "step": 13804 + }, + { + "epoch": 1.1141150835283673, + "grad_norm": 0.6971763968467712, + "learning_rate": 4.415780718087603e-05, + "loss": 2.4752, + "step": 13805 + }, + { + "epoch": 1.1141957872649504, + "grad_norm": 0.6624024510383606, + "learning_rate": 4.414471174398098e-05, + "loss": 2.4183, + "step": 13806 + }, + { + "epoch": 1.1142764910015335, + "grad_norm": 0.6571507453918457, + "learning_rate": 4.4131617699145714e-05, + "loss": 2.4747, + "step": 13807 + }, + { + "epoch": 1.1143571947381163, + "grad_norm": 0.7165808081626892, + "learning_rate": 4.411852504669658e-05, + "loss": 2.453, + "step": 13808 + }, + { + "epoch": 1.1144378984746994, + "grad_norm": 0.6708057522773743, + "learning_rate": 4.410543378695988e-05, + "loss": 2.4858, + "step": 13809 + }, + { + "epoch": 1.1145186022112823, + "grad_norm": 0.889302134513855, + "learning_rate": 4.409234392026187e-05, + "loss": 2.4333, + "step": 13810 + }, + { + "epoch": 1.1145993059478654, + "grad_norm": 0.7440677881240845, + "learning_rate": 4.407925544692884e-05, + "loss": 2.49, + "step": 13811 + }, + { + "epoch": 1.1146800096844485, + "grad_norm": 0.6688372492790222, + "learning_rate": 4.406616836728691e-05, + "loss": 2.4663, + "step": 13812 + }, + { + "epoch": 1.1147607134210313, + "grad_norm": 0.7108204364776611, + "learning_rate": 4.4053082681662264e-05, + "loss": 2.4843, + "step": 13813 + }, + { + "epoch": 1.1148414171576144, + "grad_norm": 0.7270475029945374, + "learning_rate": 4.4039998390381087e-05, + "loss": 2.4158, + "step": 13814 + }, + { + "epoch": 1.1149221208941973, + "grad_norm": 0.7243396639823914, + "learning_rate": 4.402691549376939e-05, + "loss": 2.3969, + "step": 13815 + }, + { + "epoch": 1.1150028246307804, + "grad_norm": 0.6687803268432617, + "learning_rate": 4.4013833992153285e-05, + "loss": 2.42, + "step": 13816 + }, + { + "epoch": 1.1150835283673635, + "grad_norm": 0.6892626285552979, + "learning_rate": 4.400075388585877e-05, + "loss": 2.4086, + "step": 13817 + }, + { + "epoch": 1.1151642321039463, + "grad_norm": 0.7556231021881104, + "learning_rate": 4.398767517521186e-05, + "loss": 2.4201, + "step": 13818 + }, + { + "epoch": 1.1152449358405294, + "grad_norm": 0.6872838735580444, + "learning_rate": 4.397459786053851e-05, + "loss": 2.4143, + "step": 13819 + }, + { + "epoch": 1.1153256395771125, + "grad_norm": 0.6681817770004272, + "learning_rate": 4.396152194216463e-05, + "loss": 2.4404, + "step": 13820 + }, + { + "epoch": 1.1154063433136954, + "grad_norm": 0.7107201218605042, + "learning_rate": 4.394844742041614e-05, + "loss": 2.4503, + "step": 13821 + }, + { + "epoch": 1.1154870470502785, + "grad_norm": 0.706541121006012, + "learning_rate": 4.3935374295618824e-05, + "loss": 2.5106, + "step": 13822 + }, + { + "epoch": 1.1155677507868615, + "grad_norm": 0.6659905910491943, + "learning_rate": 4.392230256809854e-05, + "loss": 2.3839, + "step": 13823 + }, + { + "epoch": 1.1156484545234444, + "grad_norm": 0.7125810980796814, + "learning_rate": 4.3909232238181095e-05, + "loss": 2.4463, + "step": 13824 + }, + { + "epoch": 1.1157291582600275, + "grad_norm": 0.6581901907920837, + "learning_rate": 4.389616330619217e-05, + "loss": 2.4004, + "step": 13825 + }, + { + "epoch": 1.1158098619966104, + "grad_norm": 0.7660872340202332, + "learning_rate": 4.388309577245752e-05, + "loss": 2.4685, + "step": 13826 + }, + { + "epoch": 1.1158905657331935, + "grad_norm": 0.699526846408844, + "learning_rate": 4.387002963730281e-05, + "loss": 2.4131, + "step": 13827 + }, + { + "epoch": 1.1159712694697765, + "grad_norm": 0.7031015753746033, + "learning_rate": 4.3856964901053685e-05, + "loss": 2.4476, + "step": 13828 + }, + { + "epoch": 1.1160519732063594, + "grad_norm": 0.6876828074455261, + "learning_rate": 4.384390156403575e-05, + "loss": 2.4402, + "step": 13829 + }, + { + "epoch": 1.1161326769429425, + "grad_norm": 0.7188935279846191, + "learning_rate": 4.3830839626574626e-05, + "loss": 2.4473, + "step": 13830 + }, + { + "epoch": 1.1162133806795254, + "grad_norm": 0.6825287938117981, + "learning_rate": 4.381777908899577e-05, + "loss": 2.4757, + "step": 13831 + }, + { + "epoch": 1.1162940844161084, + "grad_norm": 0.718267560005188, + "learning_rate": 4.380471995162472e-05, + "loss": 2.483, + "step": 13832 + }, + { + "epoch": 1.1163747881526915, + "grad_norm": 0.6526767611503601, + "learning_rate": 4.379166221478697e-05, + "loss": 2.4161, + "step": 13833 + }, + { + "epoch": 1.1164554918892744, + "grad_norm": 0.7541480660438538, + "learning_rate": 4.37786058788079e-05, + "loss": 2.4876, + "step": 13834 + }, + { + "epoch": 1.1165361956258575, + "grad_norm": 0.7144232988357544, + "learning_rate": 4.376555094401294e-05, + "loss": 2.4153, + "step": 13835 + }, + { + "epoch": 1.1166168993624406, + "grad_norm": 0.7544882297515869, + "learning_rate": 4.3752497410727445e-05, + "loss": 2.4634, + "step": 13836 + }, + { + "epoch": 1.1166976030990234, + "grad_norm": 0.7263267040252686, + "learning_rate": 4.373944527927674e-05, + "loss": 2.5189, + "step": 13837 + }, + { + "epoch": 1.1167783068356065, + "grad_norm": 0.7709252834320068, + "learning_rate": 4.3726394549986135e-05, + "loss": 2.5036, + "step": 13838 + }, + { + "epoch": 1.1168590105721894, + "grad_norm": 0.6849128007888794, + "learning_rate": 4.3713345223180866e-05, + "loss": 2.414, + "step": 13839 + }, + { + "epoch": 1.1169397143087725, + "grad_norm": 0.6807512044906616, + "learning_rate": 4.3700297299186224e-05, + "loss": 2.4924, + "step": 13840 + }, + { + "epoch": 1.1170204180453556, + "grad_norm": 0.6894977688789368, + "learning_rate": 4.3687250778327294e-05, + "loss": 2.4183, + "step": 13841 + }, + { + "epoch": 1.1171011217819384, + "grad_norm": 0.6657617092132568, + "learning_rate": 4.367420566092928e-05, + "loss": 2.448, + "step": 13842 + }, + { + "epoch": 1.1171818255185215, + "grad_norm": 0.7104446291923523, + "learning_rate": 4.366116194731733e-05, + "loss": 2.4862, + "step": 13843 + }, + { + "epoch": 1.1172625292551046, + "grad_norm": 0.7485257387161255, + "learning_rate": 4.3648119637816465e-05, + "loss": 2.4253, + "step": 13844 + }, + { + "epoch": 1.1173432329916875, + "grad_norm": 0.7079899907112122, + "learning_rate": 4.363507873275177e-05, + "loss": 2.4235, + "step": 13845 + }, + { + "epoch": 1.1174239367282706, + "grad_norm": 0.6891573667526245, + "learning_rate": 4.3622039232448274e-05, + "loss": 2.4382, + "step": 13846 + }, + { + "epoch": 1.1175046404648534, + "grad_norm": 0.6886103749275208, + "learning_rate": 4.360900113723086e-05, + "loss": 2.5115, + "step": 13847 + }, + { + "epoch": 1.1175853442014365, + "grad_norm": 0.7511457800865173, + "learning_rate": 4.35959644474246e-05, + "loss": 2.4071, + "step": 13848 + }, + { + "epoch": 1.1176660479380196, + "grad_norm": 0.6526182293891907, + "learning_rate": 4.358292916335437e-05, + "loss": 2.4242, + "step": 13849 + }, + { + "epoch": 1.1177467516746025, + "grad_norm": 0.7385138273239136, + "learning_rate": 4.356989528534499e-05, + "loss": 2.4459, + "step": 13850 + }, + { + "epoch": 1.1178274554111856, + "grad_norm": 0.6668610572814941, + "learning_rate": 4.355686281372132e-05, + "loss": 2.4188, + "step": 13851 + }, + { + "epoch": 1.1179081591477686, + "grad_norm": 0.6950691342353821, + "learning_rate": 4.354383174880818e-05, + "loss": 2.4339, + "step": 13852 + }, + { + "epoch": 1.1179888628843515, + "grad_norm": 0.7017496824264526, + "learning_rate": 4.3530802090930375e-05, + "loss": 2.4733, + "step": 13853 + }, + { + "epoch": 1.1180695666209346, + "grad_norm": 0.8118221759796143, + "learning_rate": 4.351777384041254e-05, + "loss": 2.4826, + "step": 13854 + }, + { + "epoch": 1.1181502703575175, + "grad_norm": 0.7233164310455322, + "learning_rate": 4.350474699757945e-05, + "loss": 2.4637, + "step": 13855 + }, + { + "epoch": 1.1182309740941005, + "grad_norm": 0.6354575157165527, + "learning_rate": 4.349172156275576e-05, + "loss": 2.4487, + "step": 13856 + }, + { + "epoch": 1.1183116778306836, + "grad_norm": 0.6776937246322632, + "learning_rate": 4.347869753626606e-05, + "loss": 2.4292, + "step": 13857 + }, + { + "epoch": 1.1183923815672665, + "grad_norm": 0.6656864881515503, + "learning_rate": 4.3465674918434953e-05, + "loss": 2.484, + "step": 13858 + }, + { + "epoch": 1.1184730853038496, + "grad_norm": 0.7659650444984436, + "learning_rate": 4.345265370958702e-05, + "loss": 2.4181, + "step": 13859 + }, + { + "epoch": 1.1185537890404325, + "grad_norm": 0.6546063423156738, + "learning_rate": 4.3439633910046764e-05, + "loss": 2.4657, + "step": 13860 + }, + { + "epoch": 1.1186344927770155, + "grad_norm": 0.6869762539863586, + "learning_rate": 4.342661552013869e-05, + "loss": 2.513, + "step": 13861 + }, + { + "epoch": 1.1187151965135986, + "grad_norm": 0.6633490324020386, + "learning_rate": 4.3413598540187275e-05, + "loss": 2.4716, + "step": 13862 + }, + { + "epoch": 1.1187959002501815, + "grad_norm": 0.7238267660140991, + "learning_rate": 4.340058297051687e-05, + "loss": 2.4353, + "step": 13863 + }, + { + "epoch": 1.1188766039867646, + "grad_norm": 0.67429119348526, + "learning_rate": 4.3387568811451875e-05, + "loss": 2.4808, + "step": 13864 + }, + { + "epoch": 1.1189573077233477, + "grad_norm": 0.6901153326034546, + "learning_rate": 4.33745560633167e-05, + "loss": 2.4785, + "step": 13865 + }, + { + "epoch": 1.1190380114599305, + "grad_norm": 0.7227689027786255, + "learning_rate": 4.336154472643556e-05, + "loss": 2.4414, + "step": 13866 + }, + { + "epoch": 1.1191187151965136, + "grad_norm": 0.713793933391571, + "learning_rate": 4.33485348011328e-05, + "loss": 2.5136, + "step": 13867 + }, + { + "epoch": 1.1191994189330967, + "grad_norm": 0.6495655179023743, + "learning_rate": 4.333552628773263e-05, + "loss": 2.4267, + "step": 13868 + }, + { + "epoch": 1.1192801226696796, + "grad_norm": 0.7265790104866028, + "learning_rate": 4.3322519186559274e-05, + "loss": 2.4406, + "step": 13869 + }, + { + "epoch": 1.1193608264062627, + "grad_norm": 0.6700571179389954, + "learning_rate": 4.330951349793688e-05, + "loss": 2.4457, + "step": 13870 + }, + { + "epoch": 1.1194415301428455, + "grad_norm": 0.7112334966659546, + "learning_rate": 4.3296509222189616e-05, + "loss": 2.4788, + "step": 13871 + }, + { + "epoch": 1.1195222338794286, + "grad_norm": 0.7056662440299988, + "learning_rate": 4.32835063596416e-05, + "loss": 2.5195, + "step": 13872 + }, + { + "epoch": 1.1196029376160117, + "grad_norm": 0.7198836207389832, + "learning_rate": 4.327050491061683e-05, + "loss": 2.4827, + "step": 13873 + }, + { + "epoch": 1.1196836413525946, + "grad_norm": 0.7384079694747925, + "learning_rate": 4.325750487543936e-05, + "loss": 2.4556, + "step": 13874 + }, + { + "epoch": 1.1197643450891777, + "grad_norm": 0.7315430641174316, + "learning_rate": 4.324450625443324e-05, + "loss": 2.4302, + "step": 13875 + }, + { + "epoch": 1.1198450488257605, + "grad_norm": 0.6692587733268738, + "learning_rate": 4.323150904792234e-05, + "loss": 2.5283, + "step": 13876 + }, + { + "epoch": 1.1199257525623436, + "grad_norm": 0.7407168745994568, + "learning_rate": 4.321851325623063e-05, + "loss": 2.4757, + "step": 13877 + }, + { + "epoch": 1.1200064562989267, + "grad_norm": 0.7387246489524841, + "learning_rate": 4.3205518879682e-05, + "loss": 2.5025, + "step": 13878 + }, + { + "epoch": 1.1200871600355096, + "grad_norm": 0.8058405518531799, + "learning_rate": 4.319252591860031e-05, + "loss": 2.4951, + "step": 13879 + }, + { + "epoch": 1.1201678637720927, + "grad_norm": 0.6964818835258484, + "learning_rate": 4.317953437330936e-05, + "loss": 2.4462, + "step": 13880 + }, + { + "epoch": 1.1202485675086757, + "grad_norm": 0.6904557347297668, + "learning_rate": 4.316654424413294e-05, + "loss": 2.3981, + "step": 13881 + }, + { + "epoch": 1.1203292712452586, + "grad_norm": 0.6555196046829224, + "learning_rate": 4.315355553139485e-05, + "loss": 2.418, + "step": 13882 + }, + { + "epoch": 1.1204099749818417, + "grad_norm": 0.7745094299316406, + "learning_rate": 4.3140568235418724e-05, + "loss": 2.4635, + "step": 13883 + }, + { + "epoch": 1.1204906787184246, + "grad_norm": 0.686676025390625, + "learning_rate": 4.312758235652825e-05, + "loss": 2.4847, + "step": 13884 + }, + { + "epoch": 1.1205713824550076, + "grad_norm": 0.6937002539634705, + "learning_rate": 4.311459789504714e-05, + "loss": 2.4632, + "step": 13885 + }, + { + "epoch": 1.1206520861915907, + "grad_norm": 0.7024590373039246, + "learning_rate": 4.310161485129891e-05, + "loss": 2.4268, + "step": 13886 + }, + { + "epoch": 1.1207327899281736, + "grad_norm": 0.6848484873771667, + "learning_rate": 4.308863322560717e-05, + "loss": 2.4895, + "step": 13887 + }, + { + "epoch": 1.1208134936647567, + "grad_norm": 0.7071602940559387, + "learning_rate": 4.307565301829546e-05, + "loss": 2.4348, + "step": 13888 + }, + { + "epoch": 1.1208941974013398, + "grad_norm": 0.6868199706077576, + "learning_rate": 4.3062674229687274e-05, + "loss": 2.4613, + "step": 13889 + }, + { + "epoch": 1.1209749011379226, + "grad_norm": 0.7283496260643005, + "learning_rate": 4.304969686010608e-05, + "loss": 2.478, + "step": 13890 + }, + { + "epoch": 1.1210556048745057, + "grad_norm": 0.6907255053520203, + "learning_rate": 4.303672090987535e-05, + "loss": 2.4431, + "step": 13891 + }, + { + "epoch": 1.1211363086110886, + "grad_norm": 0.675089418888092, + "learning_rate": 4.302374637931841e-05, + "loss": 2.4398, + "step": 13892 + }, + { + "epoch": 1.1212170123476717, + "grad_norm": 0.6929863095283508, + "learning_rate": 4.301077326875863e-05, + "loss": 2.3909, + "step": 13893 + }, + { + "epoch": 1.1212977160842548, + "grad_norm": 0.6746132969856262, + "learning_rate": 4.29978015785194e-05, + "loss": 2.4726, + "step": 13894 + }, + { + "epoch": 1.1213784198208376, + "grad_norm": 0.720781147480011, + "learning_rate": 4.298483130892392e-05, + "loss": 2.4445, + "step": 13895 + }, + { + "epoch": 1.1214591235574207, + "grad_norm": 0.6624416708946228, + "learning_rate": 4.297186246029549e-05, + "loss": 2.3868, + "step": 13896 + }, + { + "epoch": 1.1215398272940038, + "grad_norm": 0.7849127054214478, + "learning_rate": 4.295889503295731e-05, + "loss": 2.4479, + "step": 13897 + }, + { + "epoch": 1.1216205310305867, + "grad_norm": 0.6655337810516357, + "learning_rate": 4.294592902723259e-05, + "loss": 2.5093, + "step": 13898 + }, + { + "epoch": 1.1217012347671698, + "grad_norm": 0.7055402398109436, + "learning_rate": 4.293296444344445e-05, + "loss": 2.4385, + "step": 13899 + }, + { + "epoch": 1.1217819385037526, + "grad_norm": 0.7388767600059509, + "learning_rate": 4.2920001281916e-05, + "loss": 2.4863, + "step": 13900 + }, + { + "epoch": 1.1218626422403357, + "grad_norm": 0.6915223002433777, + "learning_rate": 4.2907039542970373e-05, + "loss": 2.4218, + "step": 13901 + }, + { + "epoch": 1.1219433459769188, + "grad_norm": 0.7124893665313721, + "learning_rate": 4.289407922693053e-05, + "loss": 2.4514, + "step": 13902 + }, + { + "epoch": 1.1220240497135017, + "grad_norm": 0.6552406549453735, + "learning_rate": 4.28811203341195e-05, + "loss": 2.4558, + "step": 13903 + }, + { + "epoch": 1.1221047534500848, + "grad_norm": 0.6641791462898254, + "learning_rate": 4.286816286486031e-05, + "loss": 2.4277, + "step": 13904 + }, + { + "epoch": 1.1221854571866678, + "grad_norm": 0.677733838558197, + "learning_rate": 4.285520681947579e-05, + "loss": 2.4861, + "step": 13905 + }, + { + "epoch": 1.1222661609232507, + "grad_norm": 0.6572888493537903, + "learning_rate": 4.284225219828891e-05, + "loss": 2.4657, + "step": 13906 + }, + { + "epoch": 1.1223468646598338, + "grad_norm": 0.6923860907554626, + "learning_rate": 4.2829299001622546e-05, + "loss": 2.4857, + "step": 13907 + }, + { + "epoch": 1.1224275683964167, + "grad_norm": 0.6971977949142456, + "learning_rate": 4.281634722979947e-05, + "loss": 2.4434, + "step": 13908 + }, + { + "epoch": 1.1225082721329998, + "grad_norm": 0.6828060746192932, + "learning_rate": 4.2803396883142456e-05, + "loss": 2.4342, + "step": 13909 + }, + { + "epoch": 1.1225889758695828, + "grad_norm": 0.7001270651817322, + "learning_rate": 4.279044796197438e-05, + "loss": 2.5222, + "step": 13910 + }, + { + "epoch": 1.1226696796061657, + "grad_norm": 0.6425578594207764, + "learning_rate": 4.277750046661785e-05, + "loss": 2.42, + "step": 13911 + }, + { + "epoch": 1.1227503833427488, + "grad_norm": 0.6498209834098816, + "learning_rate": 4.2764554397395585e-05, + "loss": 2.4448, + "step": 13912 + }, + { + "epoch": 1.1228310870793319, + "grad_norm": 0.6894031763076782, + "learning_rate": 4.275160975463025e-05, + "loss": 2.4508, + "step": 13913 + }, + { + "epoch": 1.1229117908159147, + "grad_norm": 0.7286608219146729, + "learning_rate": 4.273866653864448e-05, + "loss": 2.4557, + "step": 13914 + }, + { + "epoch": 1.1229924945524978, + "grad_norm": 0.753826379776001, + "learning_rate": 4.272572474976079e-05, + "loss": 2.4635, + "step": 13915 + }, + { + "epoch": 1.1230731982890807, + "grad_norm": 0.6715937256813049, + "learning_rate": 4.271278438830174e-05, + "loss": 2.5107, + "step": 13916 + }, + { + "epoch": 1.1231539020256638, + "grad_norm": 0.6833200454711914, + "learning_rate": 4.26998454545899e-05, + "loss": 2.4883, + "step": 13917 + }, + { + "epoch": 1.1232346057622469, + "grad_norm": 0.6763597130775452, + "learning_rate": 4.2686907948947666e-05, + "loss": 2.4178, + "step": 13918 + }, + { + "epoch": 1.1233153094988297, + "grad_norm": 0.7336227297782898, + "learning_rate": 4.26739718716975e-05, + "loss": 2.4542, + "step": 13919 + }, + { + "epoch": 1.1233960132354128, + "grad_norm": 0.6583260297775269, + "learning_rate": 4.2661037223161806e-05, + "loss": 2.3998, + "step": 13920 + }, + { + "epoch": 1.1234767169719957, + "grad_norm": 0.6444356441497803, + "learning_rate": 4.264810400366295e-05, + "loss": 2.4354, + "step": 13921 + }, + { + "epoch": 1.1235574207085788, + "grad_norm": 0.6786002516746521, + "learning_rate": 4.2635172213523255e-05, + "loss": 2.3989, + "step": 13922 + }, + { + "epoch": 1.1236381244451619, + "grad_norm": 0.6838372349739075, + "learning_rate": 4.262224185306507e-05, + "loss": 2.4431, + "step": 13923 + }, + { + "epoch": 1.1237188281817447, + "grad_norm": 0.7516793012619019, + "learning_rate": 4.260931292261056e-05, + "loss": 2.4373, + "step": 13924 + }, + { + "epoch": 1.1237995319183278, + "grad_norm": 0.6860260367393494, + "learning_rate": 4.2596385422481985e-05, + "loss": 2.4457, + "step": 13925 + }, + { + "epoch": 1.123880235654911, + "grad_norm": 0.6556448936462402, + "learning_rate": 4.2583459353001595e-05, + "loss": 2.4165, + "step": 13926 + }, + { + "epoch": 1.1239609393914938, + "grad_norm": 0.729131281375885, + "learning_rate": 4.257053471449144e-05, + "loss": 2.4124, + "step": 13927 + }, + { + "epoch": 1.1240416431280769, + "grad_norm": 0.6941910982131958, + "learning_rate": 4.2557611507273684e-05, + "loss": 2.4095, + "step": 13928 + }, + { + "epoch": 1.12412234686466, + "grad_norm": 0.6390536427497864, + "learning_rate": 4.25446897316704e-05, + "loss": 2.4221, + "step": 13929 + }, + { + "epoch": 1.1242030506012428, + "grad_norm": 0.7034881114959717, + "learning_rate": 4.253176938800365e-05, + "loss": 2.4685, + "step": 13930 + }, + { + "epoch": 1.124283754337826, + "grad_norm": 0.6975526809692383, + "learning_rate": 4.251885047659542e-05, + "loss": 2.4771, + "step": 13931 + }, + { + "epoch": 1.1243644580744088, + "grad_norm": 0.7020023465156555, + "learning_rate": 4.2505932997767695e-05, + "loss": 2.4746, + "step": 13932 + }, + { + "epoch": 1.1244451618109919, + "grad_norm": 0.7207093238830566, + "learning_rate": 4.2493016951842444e-05, + "loss": 2.4707, + "step": 13933 + }, + { + "epoch": 1.124525865547575, + "grad_norm": 0.7711251974105835, + "learning_rate": 4.24801023391415e-05, + "loss": 2.5104, + "step": 13934 + }, + { + "epoch": 1.1246065692841578, + "grad_norm": 0.7324040532112122, + "learning_rate": 4.246718915998677e-05, + "loss": 2.4257, + "step": 13935 + }, + { + "epoch": 1.124687273020741, + "grad_norm": 0.6532757878303528, + "learning_rate": 4.2454277414700116e-05, + "loss": 2.3708, + "step": 13936 + }, + { + "epoch": 1.1247679767573238, + "grad_norm": 0.6933012008666992, + "learning_rate": 4.244136710360325e-05, + "loss": 2.4985, + "step": 13937 + }, + { + "epoch": 1.1248486804939068, + "grad_norm": 0.6787589192390442, + "learning_rate": 4.242845822701798e-05, + "loss": 2.402, + "step": 13938 + }, + { + "epoch": 1.12492938423049, + "grad_norm": 0.6567786931991577, + "learning_rate": 4.241555078526602e-05, + "loss": 2.4295, + "step": 13939 + }, + { + "epoch": 1.1250100879670728, + "grad_norm": 0.6962547302246094, + "learning_rate": 4.2402644778669074e-05, + "loss": 2.4006, + "step": 13940 + }, + { + "epoch": 1.125090791703656, + "grad_norm": 0.7152721285820007, + "learning_rate": 4.238974020754877e-05, + "loss": 2.4757, + "step": 13941 + }, + { + "epoch": 1.125171495440239, + "grad_norm": 0.6869861483573914, + "learning_rate": 4.237683707222677e-05, + "loss": 2.3877, + "step": 13942 + }, + { + "epoch": 1.1252521991768218, + "grad_norm": 0.6951470971107483, + "learning_rate": 4.236393537302459e-05, + "loss": 2.3755, + "step": 13943 + }, + { + "epoch": 1.125332902913405, + "grad_norm": 0.6997567415237427, + "learning_rate": 4.2351035110263805e-05, + "loss": 2.4731, + "step": 13944 + }, + { + "epoch": 1.125413606649988, + "grad_norm": 0.6765854358673096, + "learning_rate": 4.23381362842659e-05, + "loss": 2.4004, + "step": 13945 + }, + { + "epoch": 1.1254943103865709, + "grad_norm": 0.7046722173690796, + "learning_rate": 4.2325238895352426e-05, + "loss": 2.4379, + "step": 13946 + }, + { + "epoch": 1.125575014123154, + "grad_norm": 0.6862985491752625, + "learning_rate": 4.231234294384472e-05, + "loss": 2.4614, + "step": 13947 + }, + { + "epoch": 1.1256557178597368, + "grad_norm": 0.6637778282165527, + "learning_rate": 4.229944843006422e-05, + "loss": 2.4412, + "step": 13948 + }, + { + "epoch": 1.12573642159632, + "grad_norm": 0.7042228579521179, + "learning_rate": 4.228655535433231e-05, + "loss": 2.4296, + "step": 13949 + }, + { + "epoch": 1.1258171253329028, + "grad_norm": 0.6767764687538147, + "learning_rate": 4.227366371697029e-05, + "loss": 2.409, + "step": 13950 + }, + { + "epoch": 1.1258978290694859, + "grad_norm": 0.6886798143386841, + "learning_rate": 4.226077351829948e-05, + "loss": 2.4786, + "step": 13951 + }, + { + "epoch": 1.125978532806069, + "grad_norm": 0.7723653316497803, + "learning_rate": 4.224788475864115e-05, + "loss": 2.4111, + "step": 13952 + }, + { + "epoch": 1.1260592365426518, + "grad_norm": 0.7614055275917053, + "learning_rate": 4.2234997438316473e-05, + "loss": 2.5055, + "step": 13953 + }, + { + "epoch": 1.126139940279235, + "grad_norm": 0.7195241451263428, + "learning_rate": 4.222211155764665e-05, + "loss": 2.411, + "step": 13954 + }, + { + "epoch": 1.126220644015818, + "grad_norm": 0.7130021452903748, + "learning_rate": 4.220922711695288e-05, + "loss": 2.4819, + "step": 13955 + }, + { + "epoch": 1.1263013477524009, + "grad_norm": 0.6972241401672363, + "learning_rate": 4.2196344116556194e-05, + "loss": 2.4611, + "step": 13956 + }, + { + "epoch": 1.126382051488984, + "grad_norm": 0.7023231387138367, + "learning_rate": 4.218346255677772e-05, + "loss": 2.4509, + "step": 13957 + }, + { + "epoch": 1.126462755225567, + "grad_norm": 0.6959301829338074, + "learning_rate": 4.2170582437938534e-05, + "loss": 2.4441, + "step": 13958 + }, + { + "epoch": 1.12654345896215, + "grad_norm": 0.7423149347305298, + "learning_rate": 4.2157703760359555e-05, + "loss": 2.4452, + "step": 13959 + }, + { + "epoch": 1.126624162698733, + "grad_norm": 0.6587820053100586, + "learning_rate": 4.214482652436177e-05, + "loss": 2.3936, + "step": 13960 + }, + { + "epoch": 1.1267048664353159, + "grad_norm": 0.6601768136024475, + "learning_rate": 4.213195073026618e-05, + "loss": 2.453, + "step": 13961 + }, + { + "epoch": 1.126785570171899, + "grad_norm": 0.6986891031265259, + "learning_rate": 4.2119076378393676e-05, + "loss": 2.452, + "step": 13962 + }, + { + "epoch": 1.126866273908482, + "grad_norm": 0.7207025289535522, + "learning_rate": 4.2106203469065055e-05, + "loss": 2.4048, + "step": 13963 + }, + { + "epoch": 1.126946977645065, + "grad_norm": 0.6731177568435669, + "learning_rate": 4.2093332002601184e-05, + "loss": 2.4573, + "step": 13964 + }, + { + "epoch": 1.127027681381648, + "grad_norm": 0.7330070734024048, + "learning_rate": 4.208046197932288e-05, + "loss": 2.4274, + "step": 13965 + }, + { + "epoch": 1.1271083851182309, + "grad_norm": 0.7008770704269409, + "learning_rate": 4.206759339955084e-05, + "loss": 2.4933, + "step": 13966 + }, + { + "epoch": 1.127189088854814, + "grad_norm": 0.8309584259986877, + "learning_rate": 4.20547262636058e-05, + "loss": 2.3857, + "step": 13967 + }, + { + "epoch": 1.127269792591397, + "grad_norm": 0.6705843210220337, + "learning_rate": 4.204186057180849e-05, + "loss": 2.4303, + "step": 13968 + }, + { + "epoch": 1.12735049632798, + "grad_norm": 0.7526851296424866, + "learning_rate": 4.202899632447949e-05, + "loss": 2.455, + "step": 13969 + }, + { + "epoch": 1.127431200064563, + "grad_norm": 0.6690995097160339, + "learning_rate": 4.201613352193943e-05, + "loss": 2.4398, + "step": 13970 + }, + { + "epoch": 1.127511903801146, + "grad_norm": 0.6946840286254883, + "learning_rate": 4.20032721645089e-05, + "loss": 2.4032, + "step": 13971 + }, + { + "epoch": 1.127592607537729, + "grad_norm": 0.7438863515853882, + "learning_rate": 4.1990412252508426e-05, + "loss": 2.4644, + "step": 13972 + }, + { + "epoch": 1.127673311274312, + "grad_norm": 0.6975359916687012, + "learning_rate": 4.197755378625852e-05, + "loss": 2.3991, + "step": 13973 + }, + { + "epoch": 1.1277540150108951, + "grad_norm": 0.6799279451370239, + "learning_rate": 4.196469676607968e-05, + "loss": 2.4328, + "step": 13974 + }, + { + "epoch": 1.127834718747478, + "grad_norm": 0.7014481425285339, + "learning_rate": 4.1951841192292274e-05, + "loss": 2.5045, + "step": 13975 + }, + { + "epoch": 1.127915422484061, + "grad_norm": 0.7074011564254761, + "learning_rate": 4.1938987065216716e-05, + "loss": 2.4583, + "step": 13976 + }, + { + "epoch": 1.127996126220644, + "grad_norm": 0.7246339917182922, + "learning_rate": 4.192613438517338e-05, + "loss": 2.447, + "step": 13977 + }, + { + "epoch": 1.128076829957227, + "grad_norm": 0.6757462620735168, + "learning_rate": 4.191328315248262e-05, + "loss": 2.4181, + "step": 13978 + }, + { + "epoch": 1.12815753369381, + "grad_norm": 0.6758493185043335, + "learning_rate": 4.1900433367464644e-05, + "loss": 2.4837, + "step": 13979 + }, + { + "epoch": 1.128238237430393, + "grad_norm": 0.6782165765762329, + "learning_rate": 4.1887585030439736e-05, + "loss": 2.3946, + "step": 13980 + }, + { + "epoch": 1.128318941166976, + "grad_norm": 0.7176415324211121, + "learning_rate": 4.187473814172812e-05, + "loss": 2.4538, + "step": 13981 + }, + { + "epoch": 1.128399644903559, + "grad_norm": 0.6636224985122681, + "learning_rate": 4.186189270164997e-05, + "loss": 2.4493, + "step": 13982 + }, + { + "epoch": 1.128480348640142, + "grad_norm": 0.6613143086433411, + "learning_rate": 4.184904871052544e-05, + "loss": 2.4994, + "step": 13983 + }, + { + "epoch": 1.128561052376725, + "grad_norm": 0.7148364186286926, + "learning_rate": 4.183620616867465e-05, + "loss": 2.4673, + "step": 13984 + }, + { + "epoch": 1.128641756113308, + "grad_norm": 0.6657952070236206, + "learning_rate": 4.1823365076417606e-05, + "loss": 2.3915, + "step": 13985 + }, + { + "epoch": 1.128722459849891, + "grad_norm": 0.7135687470436096, + "learning_rate": 4.181052543407439e-05, + "loss": 2.4961, + "step": 13986 + }, + { + "epoch": 1.1288031635864741, + "grad_norm": 0.7245377898216248, + "learning_rate": 4.179768724196501e-05, + "loss": 2.4519, + "step": 13987 + }, + { + "epoch": 1.128883867323057, + "grad_norm": 0.6832938194274902, + "learning_rate": 4.1784850500409376e-05, + "loss": 2.4471, + "step": 13988 + }, + { + "epoch": 1.12896457105964, + "grad_norm": 0.7303032279014587, + "learning_rate": 4.177201520972746e-05, + "loss": 2.3906, + "step": 13989 + }, + { + "epoch": 1.1290452747962232, + "grad_norm": 0.698581874370575, + "learning_rate": 4.175918137023911e-05, + "loss": 2.4667, + "step": 13990 + }, + { + "epoch": 1.129125978532806, + "grad_norm": 0.69133061170578, + "learning_rate": 4.174634898226422e-05, + "loss": 2.4285, + "step": 13991 + }, + { + "epoch": 1.1292066822693891, + "grad_norm": 0.7029501795768738, + "learning_rate": 4.1733518046122576e-05, + "loss": 2.4839, + "step": 13992 + }, + { + "epoch": 1.129287386005972, + "grad_norm": 0.7566521167755127, + "learning_rate": 4.172068856213398e-05, + "loss": 2.5019, + "step": 13993 + }, + { + "epoch": 1.129368089742555, + "grad_norm": 0.697998046875, + "learning_rate": 4.1707860530618204e-05, + "loss": 2.4305, + "step": 13994 + }, + { + "epoch": 1.1294487934791382, + "grad_norm": 0.674194872379303, + "learning_rate": 4.169503395189489e-05, + "loss": 2.4361, + "step": 13995 + }, + { + "epoch": 1.129529497215721, + "grad_norm": 0.6936436891555786, + "learning_rate": 4.168220882628373e-05, + "loss": 2.518, + "step": 13996 + }, + { + "epoch": 1.1296102009523041, + "grad_norm": 0.6831670999526978, + "learning_rate": 4.166938515410442e-05, + "loss": 2.4197, + "step": 13997 + }, + { + "epoch": 1.129690904688887, + "grad_norm": 0.7323662638664246, + "learning_rate": 4.165656293567647e-05, + "loss": 2.4555, + "step": 13998 + }, + { + "epoch": 1.12977160842547, + "grad_norm": 0.7699782848358154, + "learning_rate": 4.164374217131948e-05, + "loss": 2.4456, + "step": 13999 + }, + { + "epoch": 1.1298523121620532, + "grad_norm": 0.7009051442146301, + "learning_rate": 4.163092286135297e-05, + "loss": 2.4429, + "step": 14000 + }, + { + "epoch": 1.1298523121620532, + "eval_loss": 2.4034411907196045, + "eval_runtime": 771.1158, + "eval_samples_per_second": 3.398, + "eval_steps_per_second": 0.567, + "step": 14000 + }, + { + "epoch": 1.129933015898636, + "grad_norm": 0.674665093421936, + "learning_rate": 4.1618105006096456e-05, + "loss": 2.4127, + "step": 14001 + }, + { + "epoch": 1.1300137196352191, + "grad_norm": 0.7332403659820557, + "learning_rate": 4.1605288605869365e-05, + "loss": 2.4854, + "step": 14002 + }, + { + "epoch": 1.1300944233718022, + "grad_norm": 0.70233553647995, + "learning_rate": 4.159247366099117e-05, + "loss": 2.4433, + "step": 14003 + }, + { + "epoch": 1.130175127108385, + "grad_norm": 0.6259445548057556, + "learning_rate": 4.157966017178118e-05, + "loss": 2.3605, + "step": 14004 + }, + { + "epoch": 1.1302558308449682, + "grad_norm": 0.717408299446106, + "learning_rate": 4.1566848138558755e-05, + "loss": 2.4378, + "step": 14005 + }, + { + "epoch": 1.130336534581551, + "grad_norm": 0.6973297595977783, + "learning_rate": 4.155403756164323e-05, + "loss": 2.4363, + "step": 14006 + }, + { + "epoch": 1.1304172383181341, + "grad_norm": 0.7204940915107727, + "learning_rate": 4.154122844135391e-05, + "loss": 2.4814, + "step": 14007 + }, + { + "epoch": 1.1304979420547172, + "grad_norm": 0.8976696133613586, + "learning_rate": 4.1528420778009935e-05, + "loss": 2.4654, + "step": 14008 + }, + { + "epoch": 1.1305786457913, + "grad_norm": 0.7270354628562927, + "learning_rate": 4.151561457193057e-05, + "loss": 2.4088, + "step": 14009 + }, + { + "epoch": 1.1306593495278832, + "grad_norm": 0.7200367450714111, + "learning_rate": 4.1502809823434985e-05, + "loss": 2.4412, + "step": 14010 + }, + { + "epoch": 1.130740053264466, + "grad_norm": 0.7593986392021179, + "learning_rate": 4.149000653284227e-05, + "loss": 2.5058, + "step": 14011 + }, + { + "epoch": 1.1308207570010491, + "grad_norm": 0.7322795987129211, + "learning_rate": 4.147720470047155e-05, + "loss": 2.4899, + "step": 14012 + }, + { + "epoch": 1.1309014607376322, + "grad_norm": 0.6649030447006226, + "learning_rate": 4.1464404326641905e-05, + "loss": 2.4358, + "step": 14013 + }, + { + "epoch": 1.130982164474215, + "grad_norm": 0.7258814573287964, + "learning_rate": 4.145160541167228e-05, + "loss": 2.4732, + "step": 14014 + }, + { + "epoch": 1.1310628682107982, + "grad_norm": 0.7414976358413696, + "learning_rate": 4.1438807955881695e-05, + "loss": 2.4157, + "step": 14015 + }, + { + "epoch": 1.1311435719473812, + "grad_norm": 0.6813236474990845, + "learning_rate": 4.142601195958914e-05, + "loss": 2.3966, + "step": 14016 + }, + { + "epoch": 1.131224275683964, + "grad_norm": 0.6715923547744751, + "learning_rate": 4.141321742311344e-05, + "loss": 2.4358, + "step": 14017 + }, + { + "epoch": 1.1313049794205472, + "grad_norm": 0.7174912691116333, + "learning_rate": 4.14004243467735e-05, + "loss": 2.4838, + "step": 14018 + }, + { + "epoch": 1.1313856831571303, + "grad_norm": 0.6945109963417053, + "learning_rate": 4.138763273088821e-05, + "loss": 2.4674, + "step": 14019 + }, + { + "epoch": 1.1314663868937131, + "grad_norm": 0.6759494543075562, + "learning_rate": 4.137484257577629e-05, + "loss": 2.4659, + "step": 14020 + }, + { + "epoch": 1.1315470906302962, + "grad_norm": 0.7077876925468445, + "learning_rate": 4.1362053881756534e-05, + "loss": 2.4731, + "step": 14021 + }, + { + "epoch": 1.131627794366879, + "grad_norm": 0.6769500970840454, + "learning_rate": 4.1349266649147654e-05, + "loss": 2.3606, + "step": 14022 + }, + { + "epoch": 1.1317084981034622, + "grad_norm": 0.7104208469390869, + "learning_rate": 4.1336480878268424e-05, + "loss": 2.4626, + "step": 14023 + }, + { + "epoch": 1.1317892018400453, + "grad_norm": 0.7102686762809753, + "learning_rate": 4.132369656943741e-05, + "loss": 2.4545, + "step": 14024 + }, + { + "epoch": 1.1318699055766281, + "grad_norm": 0.7773897647857666, + "learning_rate": 4.1310913722973256e-05, + "loss": 2.5107, + "step": 14025 + }, + { + "epoch": 1.1319506093132112, + "grad_norm": 0.6427130103111267, + "learning_rate": 4.1298132339194585e-05, + "loss": 2.4349, + "step": 14026 + }, + { + "epoch": 1.132031313049794, + "grad_norm": 0.6725162863731384, + "learning_rate": 4.128535241841987e-05, + "loss": 2.4566, + "step": 14027 + }, + { + "epoch": 1.1321120167863772, + "grad_norm": 0.7182251214981079, + "learning_rate": 4.127257396096764e-05, + "loss": 2.4472, + "step": 14028 + }, + { + "epoch": 1.1321927205229603, + "grad_norm": 0.6712302565574646, + "learning_rate": 4.1259796967156426e-05, + "loss": 2.4326, + "step": 14029 + }, + { + "epoch": 1.1322734242595431, + "grad_norm": 0.7726041078567505, + "learning_rate": 4.124702143730459e-05, + "loss": 2.4994, + "step": 14030 + }, + { + "epoch": 1.1323541279961262, + "grad_norm": 0.651899516582489, + "learning_rate": 4.123424737173056e-05, + "loss": 2.4244, + "step": 14031 + }, + { + "epoch": 1.1324348317327093, + "grad_norm": 0.6646261215209961, + "learning_rate": 4.12214747707527e-05, + "loss": 2.5027, + "step": 14032 + }, + { + "epoch": 1.1325155354692922, + "grad_norm": 0.729098916053772, + "learning_rate": 4.120870363468933e-05, + "loss": 2.5117, + "step": 14033 + }, + { + "epoch": 1.1325962392058753, + "grad_norm": 0.7056638598442078, + "learning_rate": 4.119593396385876e-05, + "loss": 2.4279, + "step": 14034 + }, + { + "epoch": 1.1326769429424584, + "grad_norm": 0.7051844000816345, + "learning_rate": 4.1183165758579255e-05, + "loss": 2.3844, + "step": 14035 + }, + { + "epoch": 1.1327576466790412, + "grad_norm": 0.6954311728477478, + "learning_rate": 4.1170399019168984e-05, + "loss": 2.4041, + "step": 14036 + }, + { + "epoch": 1.1328383504156243, + "grad_norm": 0.650044858455658, + "learning_rate": 4.1157633745946135e-05, + "loss": 2.4397, + "step": 14037 + }, + { + "epoch": 1.1329190541522072, + "grad_norm": 0.6974380016326904, + "learning_rate": 4.114486993922888e-05, + "loss": 2.4391, + "step": 14038 + }, + { + "epoch": 1.1329997578887903, + "grad_norm": 0.7252807021141052, + "learning_rate": 4.113210759933536e-05, + "loss": 2.4471, + "step": 14039 + }, + { + "epoch": 1.1330804616253733, + "grad_norm": 0.7001414895057678, + "learning_rate": 4.111934672658354e-05, + "loss": 2.402, + "step": 14040 + }, + { + "epoch": 1.1331611653619562, + "grad_norm": 0.7420533895492554, + "learning_rate": 4.110658732129153e-05, + "loss": 2.4987, + "step": 14041 + }, + { + "epoch": 1.1332418690985393, + "grad_norm": 0.6850644946098328, + "learning_rate": 4.1093829383777315e-05, + "loss": 2.4355, + "step": 14042 + }, + { + "epoch": 1.1333225728351222, + "grad_norm": 0.6905977725982666, + "learning_rate": 4.108107291435885e-05, + "loss": 2.4818, + "step": 14043 + }, + { + "epoch": 1.1334032765717053, + "grad_norm": 0.6555112600326538, + "learning_rate": 4.106831791335407e-05, + "loss": 2.425, + "step": 14044 + }, + { + "epoch": 1.1334839803082883, + "grad_norm": 0.6570355892181396, + "learning_rate": 4.105556438108089e-05, + "loss": 2.4232, + "step": 14045 + }, + { + "epoch": 1.1335646840448712, + "grad_norm": 0.7910747528076172, + "learning_rate": 4.104281231785708e-05, + "loss": 2.484, + "step": 14046 + }, + { + "epoch": 1.1336453877814543, + "grad_norm": 0.6581952571868896, + "learning_rate": 4.103006172400052e-05, + "loss": 2.4102, + "step": 14047 + }, + { + "epoch": 1.1337260915180374, + "grad_norm": 0.6834773421287537, + "learning_rate": 4.1017312599828994e-05, + "loss": 2.4602, + "step": 14048 + }, + { + "epoch": 1.1338067952546202, + "grad_norm": 0.7588350772857666, + "learning_rate": 4.1004564945660195e-05, + "loss": 2.5059, + "step": 14049 + }, + { + "epoch": 1.1338874989912033, + "grad_norm": 0.6604699492454529, + "learning_rate": 4.099181876181185e-05, + "loss": 2.4403, + "step": 14050 + }, + { + "epoch": 1.1339682027277862, + "grad_norm": 0.6957669258117676, + "learning_rate": 4.097907404860163e-05, + "loss": 2.4218, + "step": 14051 + }, + { + "epoch": 1.1340489064643693, + "grad_norm": 0.7091849446296692, + "learning_rate": 4.0966330806347166e-05, + "loss": 2.4396, + "step": 14052 + }, + { + "epoch": 1.1341296102009524, + "grad_norm": 0.6637482047080994, + "learning_rate": 4.095358903536605e-05, + "loss": 2.4514, + "step": 14053 + }, + { + "epoch": 1.1342103139375352, + "grad_norm": 0.7485960125923157, + "learning_rate": 4.0940848735975846e-05, + "loss": 2.4401, + "step": 14054 + }, + { + "epoch": 1.1342910176741183, + "grad_norm": 0.6509774327278137, + "learning_rate": 4.092810990849411e-05, + "loss": 2.4575, + "step": 14055 + }, + { + "epoch": 1.1343717214107012, + "grad_norm": 0.7151626348495483, + "learning_rate": 4.091537255323825e-05, + "loss": 2.45, + "step": 14056 + }, + { + "epoch": 1.1344524251472843, + "grad_norm": 0.7536267042160034, + "learning_rate": 4.0902636670525764e-05, + "loss": 2.497, + "step": 14057 + }, + { + "epoch": 1.1345331288838674, + "grad_norm": 0.7779545783996582, + "learning_rate": 4.0889902260674086e-05, + "loss": 2.412, + "step": 14058 + }, + { + "epoch": 1.1346138326204502, + "grad_norm": 0.7211748957633972, + "learning_rate": 4.087716932400052e-05, + "loss": 2.4727, + "step": 14059 + }, + { + "epoch": 1.1346945363570333, + "grad_norm": 0.6710701584815979, + "learning_rate": 4.086443786082245e-05, + "loss": 2.4318, + "step": 14060 + }, + { + "epoch": 1.1347752400936164, + "grad_norm": 0.7072857022285461, + "learning_rate": 4.085170787145717e-05, + "loss": 2.4672, + "step": 14061 + }, + { + "epoch": 1.1348559438301993, + "grad_norm": 0.6475152969360352, + "learning_rate": 4.083897935622194e-05, + "loss": 2.4104, + "step": 14062 + }, + { + "epoch": 1.1349366475667824, + "grad_norm": 0.7408067584037781, + "learning_rate": 4.0826252315433986e-05, + "loss": 2.4129, + "step": 14063 + }, + { + "epoch": 1.1350173513033655, + "grad_norm": 0.732540488243103, + "learning_rate": 4.081352674941056e-05, + "loss": 2.4209, + "step": 14064 + }, + { + "epoch": 1.1350980550399483, + "grad_norm": 0.6933332681655884, + "learning_rate": 4.080080265846872e-05, + "loss": 2.3797, + "step": 14065 + }, + { + "epoch": 1.1351787587765314, + "grad_norm": 0.6507896780967712, + "learning_rate": 4.078808004292561e-05, + "loss": 2.4372, + "step": 14066 + }, + { + "epoch": 1.1352594625131143, + "grad_norm": 0.729292094707489, + "learning_rate": 4.0775358903098384e-05, + "loss": 2.5513, + "step": 14067 + }, + { + "epoch": 1.1353401662496974, + "grad_norm": 0.692757248878479, + "learning_rate": 4.076263923930398e-05, + "loss": 2.4228, + "step": 14068 + }, + { + "epoch": 1.1354208699862804, + "grad_norm": 0.7028260231018066, + "learning_rate": 4.074992105185946e-05, + "loss": 2.4478, + "step": 14069 + }, + { + "epoch": 1.1355015737228633, + "grad_norm": 0.65067058801651, + "learning_rate": 4.073720434108179e-05, + "loss": 2.3729, + "step": 14070 + }, + { + "epoch": 1.1355822774594464, + "grad_norm": 0.6884061098098755, + "learning_rate": 4.0724489107287933e-05, + "loss": 2.3693, + "step": 14071 + }, + { + "epoch": 1.1356629811960293, + "grad_norm": 0.70686936378479, + "learning_rate": 4.071177535079472e-05, + "loss": 2.4989, + "step": 14072 + }, + { + "epoch": 1.1357436849326124, + "grad_norm": 0.6792482733726501, + "learning_rate": 4.0699063071919016e-05, + "loss": 2.393, + "step": 14073 + }, + { + "epoch": 1.1358243886691954, + "grad_norm": 0.7231085896492004, + "learning_rate": 4.0686352270977745e-05, + "loss": 2.4597, + "step": 14074 + }, + { + "epoch": 1.1359050924057783, + "grad_norm": 0.8024532198905945, + "learning_rate": 4.067364294828758e-05, + "loss": 2.4409, + "step": 14075 + }, + { + "epoch": 1.1359857961423614, + "grad_norm": 0.6761424541473389, + "learning_rate": 4.066093510416532e-05, + "loss": 2.4598, + "step": 14076 + }, + { + "epoch": 1.1360664998789445, + "grad_norm": 0.7075559496879578, + "learning_rate": 4.064822873892771e-05, + "loss": 2.4649, + "step": 14077 + }, + { + "epoch": 1.1361472036155273, + "grad_norm": 0.6292272806167603, + "learning_rate": 4.063552385289134e-05, + "loss": 2.445, + "step": 14078 + }, + { + "epoch": 1.1362279073521104, + "grad_norm": 0.6435273885726929, + "learning_rate": 4.06228204463729e-05, + "loss": 2.4105, + "step": 14079 + }, + { + "epoch": 1.1363086110886935, + "grad_norm": 0.7135637402534485, + "learning_rate": 4.061011851968903e-05, + "loss": 2.3907, + "step": 14080 + }, + { + "epoch": 1.1363893148252764, + "grad_norm": 0.7424013614654541, + "learning_rate": 4.059741807315621e-05, + "loss": 2.4405, + "step": 14081 + }, + { + "epoch": 1.1364700185618595, + "grad_norm": 0.6649916768074036, + "learning_rate": 4.0584719107091016e-05, + "loss": 2.4314, + "step": 14082 + }, + { + "epoch": 1.1365507222984423, + "grad_norm": 0.6700563430786133, + "learning_rate": 4.0572021621809944e-05, + "loss": 2.4093, + "step": 14083 + }, + { + "epoch": 1.1366314260350254, + "grad_norm": 0.6740709543228149, + "learning_rate": 4.055932561762942e-05, + "loss": 2.4301, + "step": 14084 + }, + { + "epoch": 1.1367121297716085, + "grad_norm": 0.7039555907249451, + "learning_rate": 4.0546631094865895e-05, + "loss": 2.4427, + "step": 14085 + }, + { + "epoch": 1.1367928335081914, + "grad_norm": 0.7461164593696594, + "learning_rate": 4.053393805383573e-05, + "loss": 2.3865, + "step": 14086 + }, + { + "epoch": 1.1368735372447745, + "grad_norm": 0.6808290481567383, + "learning_rate": 4.0521246494855316e-05, + "loss": 2.3738, + "step": 14087 + }, + { + "epoch": 1.1369542409813573, + "grad_norm": 0.6942760944366455, + "learning_rate": 4.0508556418240875e-05, + "loss": 2.4351, + "step": 14088 + }, + { + "epoch": 1.1370349447179404, + "grad_norm": 0.7615510821342468, + "learning_rate": 4.049586782430872e-05, + "loss": 2.3968, + "step": 14089 + }, + { + "epoch": 1.1371156484545235, + "grad_norm": 0.7240662574768066, + "learning_rate": 4.048318071337512e-05, + "loss": 2.4046, + "step": 14090 + }, + { + "epoch": 1.1371963521911064, + "grad_norm": 0.7286471128463745, + "learning_rate": 4.047049508575621e-05, + "loss": 2.4039, + "step": 14091 + }, + { + "epoch": 1.1372770559276895, + "grad_norm": 0.7031459212303162, + "learning_rate": 4.045781094176816e-05, + "loss": 2.4494, + "step": 14092 + }, + { + "epoch": 1.1373577596642725, + "grad_norm": 0.7116301655769348, + "learning_rate": 4.0445128281727116e-05, + "loss": 2.3991, + "step": 14093 + }, + { + "epoch": 1.1374384634008554, + "grad_norm": 0.6719788312911987, + "learning_rate": 4.043244710594914e-05, + "loss": 2.4823, + "step": 14094 + }, + { + "epoch": 1.1375191671374385, + "grad_norm": 0.6770508885383606, + "learning_rate": 4.041976741475031e-05, + "loss": 2.4362, + "step": 14095 + }, + { + "epoch": 1.1375998708740216, + "grad_norm": 0.6808609962463379, + "learning_rate": 4.040708920844666e-05, + "loss": 2.435, + "step": 14096 + }, + { + "epoch": 1.1376805746106045, + "grad_norm": 0.7445514798164368, + "learning_rate": 4.0394412487354074e-05, + "loss": 2.4749, + "step": 14097 + }, + { + "epoch": 1.1377612783471875, + "grad_norm": 0.7024775743484497, + "learning_rate": 4.038173725178854e-05, + "loss": 2.4354, + "step": 14098 + }, + { + "epoch": 1.1378419820837704, + "grad_norm": 0.6925685405731201, + "learning_rate": 4.0369063502066e-05, + "loss": 2.4462, + "step": 14099 + }, + { + "epoch": 1.1379226858203535, + "grad_norm": 0.6970539689064026, + "learning_rate": 4.035639123850223e-05, + "loss": 2.3842, + "step": 14100 + }, + { + "epoch": 1.1380033895569364, + "grad_norm": 0.6571836471557617, + "learning_rate": 4.0343720461413107e-05, + "loss": 2.4213, + "step": 14101 + }, + { + "epoch": 1.1380840932935194, + "grad_norm": 0.7264918684959412, + "learning_rate": 4.033105117111441e-05, + "loss": 2.4697, + "step": 14102 + }, + { + "epoch": 1.1381647970301025, + "grad_norm": 0.6929560899734497, + "learning_rate": 4.03183833679219e-05, + "loss": 2.461, + "step": 14103 + }, + { + "epoch": 1.1382455007666854, + "grad_norm": 0.6533559560775757, + "learning_rate": 4.030571705215128e-05, + "loss": 2.4336, + "step": 14104 + }, + { + "epoch": 1.1383262045032685, + "grad_norm": 0.7372364401817322, + "learning_rate": 4.0293052224118234e-05, + "loss": 2.4396, + "step": 14105 + }, + { + "epoch": 1.1384069082398516, + "grad_norm": 0.6736310720443726, + "learning_rate": 4.028038888413844e-05, + "loss": 2.4123, + "step": 14106 + }, + { + "epoch": 1.1384876119764344, + "grad_norm": 0.6898338794708252, + "learning_rate": 4.026772703252742e-05, + "loss": 2.431, + "step": 14107 + }, + { + "epoch": 1.1385683157130175, + "grad_norm": 0.7933369278907776, + "learning_rate": 4.02550666696008e-05, + "loss": 2.4669, + "step": 14108 + }, + { + "epoch": 1.1386490194496006, + "grad_norm": 0.7218122482299805, + "learning_rate": 4.024240779567412e-05, + "loss": 2.3761, + "step": 14109 + }, + { + "epoch": 1.1387297231861835, + "grad_norm": 0.7018248438835144, + "learning_rate": 4.022975041106281e-05, + "loss": 2.4011, + "step": 14110 + }, + { + "epoch": 1.1388104269227666, + "grad_norm": 0.6709668040275574, + "learning_rate": 4.0217094516082364e-05, + "loss": 2.426, + "step": 14111 + }, + { + "epoch": 1.1388911306593494, + "grad_norm": 0.7241504192352295, + "learning_rate": 4.0204440111048195e-05, + "loss": 2.4085, + "step": 14112 + }, + { + "epoch": 1.1389718343959325, + "grad_norm": 0.731347382068634, + "learning_rate": 4.0191787196275675e-05, + "loss": 2.502, + "step": 14113 + }, + { + "epoch": 1.1390525381325156, + "grad_norm": 0.6630167365074158, + "learning_rate": 4.0179135772080166e-05, + "loss": 2.3999, + "step": 14114 + }, + { + "epoch": 1.1391332418690985, + "grad_norm": 0.7094748616218567, + "learning_rate": 4.016648583877698e-05, + "loss": 2.4666, + "step": 14115 + }, + { + "epoch": 1.1392139456056816, + "grad_norm": 0.7262436151504517, + "learning_rate": 4.0153837396681395e-05, + "loss": 2.4369, + "step": 14116 + }, + { + "epoch": 1.1392946493422644, + "grad_norm": 0.6796039938926697, + "learning_rate": 4.014119044610859e-05, + "loss": 2.4607, + "step": 14117 + }, + { + "epoch": 1.1393753530788475, + "grad_norm": 0.6690036058425903, + "learning_rate": 4.0128544987373785e-05, + "loss": 2.4145, + "step": 14118 + }, + { + "epoch": 1.1394560568154306, + "grad_norm": 0.6987181305885315, + "learning_rate": 4.011590102079219e-05, + "loss": 2.4294, + "step": 14119 + }, + { + "epoch": 1.1395367605520135, + "grad_norm": 0.6756789684295654, + "learning_rate": 4.0103258546678836e-05, + "loss": 2.396, + "step": 14120 + }, + { + "epoch": 1.1396174642885966, + "grad_norm": 0.7027772068977356, + "learning_rate": 4.009061756534885e-05, + "loss": 2.3971, + "step": 14121 + }, + { + "epoch": 1.1396981680251796, + "grad_norm": 0.6872174143791199, + "learning_rate": 4.007797807711732e-05, + "loss": 2.4297, + "step": 14122 + }, + { + "epoch": 1.1397788717617625, + "grad_norm": 0.7213007211685181, + "learning_rate": 4.006534008229914e-05, + "loss": 2.4792, + "step": 14123 + }, + { + "epoch": 1.1398595754983456, + "grad_norm": 0.6771649122238159, + "learning_rate": 4.0052703581209395e-05, + "loss": 2.4397, + "step": 14124 + }, + { + "epoch": 1.1399402792349287, + "grad_norm": 0.6577184796333313, + "learning_rate": 4.0040068574163013e-05, + "loss": 2.4113, + "step": 14125 + }, + { + "epoch": 1.1400209829715116, + "grad_norm": 0.7493160367012024, + "learning_rate": 4.002743506147483e-05, + "loss": 2.4454, + "step": 14126 + }, + { + "epoch": 1.1401016867080946, + "grad_norm": 0.6820357441902161, + "learning_rate": 4.0014803043459726e-05, + "loss": 2.4126, + "step": 14127 + }, + { + "epoch": 1.1401823904446775, + "grad_norm": 0.7177188992500305, + "learning_rate": 4.000217252043258e-05, + "loss": 2.4355, + "step": 14128 + }, + { + "epoch": 1.1402630941812606, + "grad_norm": 0.654371440410614, + "learning_rate": 3.998954349270808e-05, + "loss": 2.4932, + "step": 14129 + }, + { + "epoch": 1.1403437979178437, + "grad_norm": 0.7029837965965271, + "learning_rate": 3.997691596060104e-05, + "loss": 2.4341, + "step": 14130 + }, + { + "epoch": 1.1404245016544265, + "grad_norm": 0.7971171140670776, + "learning_rate": 3.996428992442615e-05, + "loss": 2.4466, + "step": 14131 + }, + { + "epoch": 1.1405052053910096, + "grad_norm": 0.6941849589347839, + "learning_rate": 3.9951665384498114e-05, + "loss": 2.4861, + "step": 14132 + }, + { + "epoch": 1.1405859091275925, + "grad_norm": 0.6657733917236328, + "learning_rate": 3.993904234113153e-05, + "loss": 2.4266, + "step": 14133 + }, + { + "epoch": 1.1406666128641756, + "grad_norm": 0.6780329346656799, + "learning_rate": 3.9926420794641e-05, + "loss": 2.458, + "step": 14134 + }, + { + "epoch": 1.1407473166007587, + "grad_norm": 0.7070702910423279, + "learning_rate": 3.991380074534109e-05, + "loss": 2.368, + "step": 14135 + }, + { + "epoch": 1.1408280203373415, + "grad_norm": 0.7186575531959534, + "learning_rate": 3.990118219354635e-05, + "loss": 2.4611, + "step": 14136 + }, + { + "epoch": 1.1409087240739246, + "grad_norm": 0.7171763777732849, + "learning_rate": 3.988856513957123e-05, + "loss": 2.4315, + "step": 14137 + }, + { + "epoch": 1.1409894278105077, + "grad_norm": 0.7090228796005249, + "learning_rate": 3.987594958373025e-05, + "loss": 2.4668, + "step": 14138 + }, + { + "epoch": 1.1410701315470906, + "grad_norm": 0.6523951888084412, + "learning_rate": 3.986333552633773e-05, + "loss": 2.4392, + "step": 14139 + }, + { + "epoch": 1.1411508352836737, + "grad_norm": 0.706000804901123, + "learning_rate": 3.98507229677081e-05, + "loss": 2.4382, + "step": 14140 + }, + { + "epoch": 1.1412315390202568, + "grad_norm": 0.6537537574768066, + "learning_rate": 3.983811190815571e-05, + "loss": 2.456, + "step": 14141 + }, + { + "epoch": 1.1413122427568396, + "grad_norm": 0.7509549856185913, + "learning_rate": 3.982550234799479e-05, + "loss": 2.4744, + "step": 14142 + }, + { + "epoch": 1.1413929464934227, + "grad_norm": 0.7188650965690613, + "learning_rate": 3.981289428753967e-05, + "loss": 2.4632, + "step": 14143 + }, + { + "epoch": 1.1414736502300056, + "grad_norm": 0.7563674449920654, + "learning_rate": 3.9800287727104544e-05, + "loss": 2.5063, + "step": 14144 + }, + { + "epoch": 1.1415543539665887, + "grad_norm": 0.8374128341674805, + "learning_rate": 3.978768266700361e-05, + "loss": 2.4942, + "step": 14145 + }, + { + "epoch": 1.1416350577031718, + "grad_norm": 0.7020177841186523, + "learning_rate": 3.9775079107551027e-05, + "loss": 2.4404, + "step": 14146 + }, + { + "epoch": 1.1417157614397546, + "grad_norm": 0.7326170802116394, + "learning_rate": 3.9762477049060895e-05, + "loss": 2.4127, + "step": 14147 + }, + { + "epoch": 1.1417964651763377, + "grad_norm": 0.6661173105239868, + "learning_rate": 3.974987649184734e-05, + "loss": 2.4649, + "step": 14148 + }, + { + "epoch": 1.1418771689129206, + "grad_norm": 0.7186033129692078, + "learning_rate": 3.973727743622432e-05, + "loss": 2.4275, + "step": 14149 + }, + { + "epoch": 1.1419578726495037, + "grad_norm": 0.7193881869316101, + "learning_rate": 3.972467988250588e-05, + "loss": 2.4997, + "step": 14150 + }, + { + "epoch": 1.1420385763860867, + "grad_norm": 0.7139542102813721, + "learning_rate": 3.971208383100601e-05, + "loss": 2.4211, + "step": 14151 + }, + { + "epoch": 1.1421192801226696, + "grad_norm": 0.6840166449546814, + "learning_rate": 3.969948928203856e-05, + "loss": 2.4504, + "step": 14152 + }, + { + "epoch": 1.1421999838592527, + "grad_norm": 0.8261072039604187, + "learning_rate": 3.968689623591747e-05, + "loss": 2.4901, + "step": 14153 + }, + { + "epoch": 1.1422806875958358, + "grad_norm": 0.7636086940765381, + "learning_rate": 3.96743046929566e-05, + "loss": 2.4202, + "step": 14154 + }, + { + "epoch": 1.1423613913324187, + "grad_norm": 0.7477976679801941, + "learning_rate": 3.966171465346973e-05, + "loss": 2.492, + "step": 14155 + }, + { + "epoch": 1.1424420950690017, + "grad_norm": 0.7516389489173889, + "learning_rate": 3.9649126117770665e-05, + "loss": 2.4512, + "step": 14156 + }, + { + "epoch": 1.1425227988055846, + "grad_norm": 0.6987521648406982, + "learning_rate": 3.9636539086173174e-05, + "loss": 2.4005, + "step": 14157 + }, + { + "epoch": 1.1426035025421677, + "grad_norm": 0.7242532968521118, + "learning_rate": 3.962395355899088e-05, + "loss": 2.4414, + "step": 14158 + }, + { + "epoch": 1.1426842062787508, + "grad_norm": 0.6616180539131165, + "learning_rate": 3.961136953653749e-05, + "loss": 2.4442, + "step": 14159 + }, + { + "epoch": 1.1427649100153336, + "grad_norm": 0.7165415287017822, + "learning_rate": 3.959878701912667e-05, + "loss": 2.4658, + "step": 14160 + }, + { + "epoch": 1.1428456137519167, + "grad_norm": 0.6619318127632141, + "learning_rate": 3.9586206007071926e-05, + "loss": 2.3803, + "step": 14161 + }, + { + "epoch": 1.1429263174884996, + "grad_norm": 0.6654838919639587, + "learning_rate": 3.957362650068684e-05, + "loss": 2.4584, + "step": 14162 + }, + { + "epoch": 1.1430070212250827, + "grad_norm": 0.6947140097618103, + "learning_rate": 3.956104850028496e-05, + "loss": 2.4236, + "step": 14163 + }, + { + "epoch": 1.1430877249616658, + "grad_norm": 0.6510412096977234, + "learning_rate": 3.954847200617973e-05, + "loss": 2.3589, + "step": 14164 + }, + { + "epoch": 1.1431684286982486, + "grad_norm": 0.7550667524337769, + "learning_rate": 3.95358970186846e-05, + "loss": 2.419, + "step": 14165 + }, + { + "epoch": 1.1432491324348317, + "grad_norm": 0.7898361682891846, + "learning_rate": 3.9523323538112975e-05, + "loss": 2.4549, + "step": 14166 + }, + { + "epoch": 1.1433298361714148, + "grad_norm": 0.7162390947341919, + "learning_rate": 3.9510751564778246e-05, + "loss": 2.4493, + "step": 14167 + }, + { + "epoch": 1.1434105399079977, + "grad_norm": 0.8251990079879761, + "learning_rate": 3.949818109899367e-05, + "loss": 2.4474, + "step": 14168 + }, + { + "epoch": 1.1434912436445808, + "grad_norm": 0.6739209890365601, + "learning_rate": 3.948561214107258e-05, + "loss": 2.4564, + "step": 14169 + }, + { + "epoch": 1.1435719473811639, + "grad_norm": 0.6606340408325195, + "learning_rate": 3.9473044691328254e-05, + "loss": 2.3838, + "step": 14170 + }, + { + "epoch": 1.1436526511177467, + "grad_norm": 0.7297452092170715, + "learning_rate": 3.946047875007384e-05, + "loss": 2.4673, + "step": 14171 + }, + { + "epoch": 1.1437333548543298, + "grad_norm": 0.7382420301437378, + "learning_rate": 3.9447914317622546e-05, + "loss": 2.4279, + "step": 14172 + }, + { + "epoch": 1.1438140585909127, + "grad_norm": 0.6947354674339294, + "learning_rate": 3.9435351394287546e-05, + "loss": 2.4553, + "step": 14173 + }, + { + "epoch": 1.1438947623274958, + "grad_norm": 0.670369565486908, + "learning_rate": 3.942278998038183e-05, + "loss": 2.4285, + "step": 14174 + }, + { + "epoch": 1.1439754660640788, + "grad_norm": 0.7097954154014587, + "learning_rate": 3.941023007621859e-05, + "loss": 2.477, + "step": 14175 + }, + { + "epoch": 1.1440561698006617, + "grad_norm": 0.6490213871002197, + "learning_rate": 3.9397671682110826e-05, + "loss": 2.3943, + "step": 14176 + }, + { + "epoch": 1.1441368735372448, + "grad_norm": 0.6505936980247498, + "learning_rate": 3.938511479837147e-05, + "loss": 2.4188, + "step": 14177 + }, + { + "epoch": 1.1442175772738277, + "grad_norm": 0.6696773767471313, + "learning_rate": 3.9372559425313496e-05, + "loss": 2.4377, + "step": 14178 + }, + { + "epoch": 1.1442982810104108, + "grad_norm": 0.6747034192085266, + "learning_rate": 3.936000556324982e-05, + "loss": 2.4111, + "step": 14179 + }, + { + "epoch": 1.1443789847469938, + "grad_norm": 0.7766546607017517, + "learning_rate": 3.934745321249336e-05, + "loss": 2.3873, + "step": 14180 + }, + { + "epoch": 1.1444596884835767, + "grad_norm": 0.7608100175857544, + "learning_rate": 3.933490237335688e-05, + "loss": 2.4567, + "step": 14181 + }, + { + "epoch": 1.1445403922201598, + "grad_norm": 0.7724356055259705, + "learning_rate": 3.9322353046153205e-05, + "loss": 2.4729, + "step": 14182 + }, + { + "epoch": 1.1446210959567429, + "grad_norm": 0.6908414363861084, + "learning_rate": 3.930980523119515e-05, + "loss": 2.41, + "step": 14183 + }, + { + "epoch": 1.1447017996933257, + "grad_norm": 0.7209733128547668, + "learning_rate": 3.9297258928795356e-05, + "loss": 2.4629, + "step": 14184 + }, + { + "epoch": 1.1447825034299088, + "grad_norm": 0.7116519212722778, + "learning_rate": 3.928471413926651e-05, + "loss": 2.5081, + "step": 14185 + }, + { + "epoch": 1.144863207166492, + "grad_norm": 0.6704578995704651, + "learning_rate": 3.9272170862921365e-05, + "loss": 2.494, + "step": 14186 + }, + { + "epoch": 1.1449439109030748, + "grad_norm": 0.6914607882499695, + "learning_rate": 3.9259629100072435e-05, + "loss": 2.3979, + "step": 14187 + }, + { + "epoch": 1.1450246146396579, + "grad_norm": 0.7413245439529419, + "learning_rate": 3.924708885103233e-05, + "loss": 2.4534, + "step": 14188 + }, + { + "epoch": 1.1451053183762407, + "grad_norm": 0.7411661744117737, + "learning_rate": 3.923455011611362e-05, + "loss": 2.4191, + "step": 14189 + }, + { + "epoch": 1.1451860221128238, + "grad_norm": 0.6581972241401672, + "learning_rate": 3.9222012895628716e-05, + "loss": 2.4494, + "step": 14190 + }, + { + "epoch": 1.145266725849407, + "grad_norm": 0.6628647446632385, + "learning_rate": 3.920947718989013e-05, + "loss": 2.4483, + "step": 14191 + }, + { + "epoch": 1.1453474295859898, + "grad_norm": 0.7068151831626892, + "learning_rate": 3.9196942999210316e-05, + "loss": 2.4549, + "step": 14192 + }, + { + "epoch": 1.1454281333225729, + "grad_norm": 0.6727713942527771, + "learning_rate": 3.918441032390159e-05, + "loss": 2.4261, + "step": 14193 + }, + { + "epoch": 1.1455088370591557, + "grad_norm": 0.6680718660354614, + "learning_rate": 3.9171879164276334e-05, + "loss": 2.4705, + "step": 14194 + }, + { + "epoch": 1.1455895407957388, + "grad_norm": 0.710096538066864, + "learning_rate": 3.915934952064685e-05, + "loss": 2.474, + "step": 14195 + }, + { + "epoch": 1.145670244532322, + "grad_norm": 0.6927496790885925, + "learning_rate": 3.9146821393325414e-05, + "loss": 2.3979, + "step": 14196 + }, + { + "epoch": 1.1457509482689048, + "grad_norm": 0.6887550354003906, + "learning_rate": 3.913429478262427e-05, + "loss": 2.4588, + "step": 14197 + }, + { + "epoch": 1.1458316520054879, + "grad_norm": 0.6847062706947327, + "learning_rate": 3.912176968885559e-05, + "loss": 2.4602, + "step": 14198 + }, + { + "epoch": 1.145912355742071, + "grad_norm": 0.6832349300384521, + "learning_rate": 3.91092461123316e-05, + "loss": 2.4672, + "step": 14199 + }, + { + "epoch": 1.1459930594786538, + "grad_norm": 0.6789066791534424, + "learning_rate": 3.909672405336432e-05, + "loss": 2.5029, + "step": 14200 + }, + { + "epoch": 1.146073763215237, + "grad_norm": 0.6953951120376587, + "learning_rate": 3.9084203512265885e-05, + "loss": 2.4223, + "step": 14201 + }, + { + "epoch": 1.1461544669518198, + "grad_norm": 0.6629688739776611, + "learning_rate": 3.907168448934836e-05, + "loss": 2.4028, + "step": 14202 + }, + { + "epoch": 1.1462351706884029, + "grad_norm": 0.6661216020584106, + "learning_rate": 3.90591669849237e-05, + "loss": 2.4668, + "step": 14203 + }, + { + "epoch": 1.146315874424986, + "grad_norm": 0.6814442276954651, + "learning_rate": 3.9046650999303894e-05, + "loss": 2.4273, + "step": 14204 + }, + { + "epoch": 1.1463965781615688, + "grad_norm": 0.6678626537322998, + "learning_rate": 3.903413653280088e-05, + "loss": 2.444, + "step": 14205 + }, + { + "epoch": 1.146477281898152, + "grad_norm": 0.6703703999519348, + "learning_rate": 3.902162358572655e-05, + "loss": 2.4273, + "step": 14206 + }, + { + "epoch": 1.1465579856347348, + "grad_norm": 0.7052578926086426, + "learning_rate": 3.900911215839276e-05, + "loss": 2.4397, + "step": 14207 + }, + { + "epoch": 1.1466386893713179, + "grad_norm": 0.6792036294937134, + "learning_rate": 3.899660225111136e-05, + "loss": 2.439, + "step": 14208 + }, + { + "epoch": 1.146719393107901, + "grad_norm": 0.6995401978492737, + "learning_rate": 3.898409386419407e-05, + "loss": 2.5002, + "step": 14209 + }, + { + "epoch": 1.1468000968444838, + "grad_norm": 0.6527338027954102, + "learning_rate": 3.897158699795265e-05, + "loss": 2.4523, + "step": 14210 + }, + { + "epoch": 1.146880800581067, + "grad_norm": 0.7509400248527527, + "learning_rate": 3.8959081652698814e-05, + "loss": 2.4193, + "step": 14211 + }, + { + "epoch": 1.14696150431765, + "grad_norm": 0.6985350251197815, + "learning_rate": 3.894657782874426e-05, + "loss": 2.4251, + "step": 14212 + }, + { + "epoch": 1.1470422080542328, + "grad_norm": 0.6831483840942383, + "learning_rate": 3.893407552640055e-05, + "loss": 2.4172, + "step": 14213 + }, + { + "epoch": 1.147122911790816, + "grad_norm": 0.7281469702720642, + "learning_rate": 3.892157474597929e-05, + "loss": 2.4451, + "step": 14214 + }, + { + "epoch": 1.147203615527399, + "grad_norm": 0.7326027750968933, + "learning_rate": 3.8909075487792066e-05, + "loss": 2.3926, + "step": 14215 + }, + { + "epoch": 1.1472843192639819, + "grad_norm": 0.7030496597290039, + "learning_rate": 3.889657775215036e-05, + "loss": 2.435, + "step": 14216 + }, + { + "epoch": 1.147365023000565, + "grad_norm": 0.6915596127510071, + "learning_rate": 3.888408153936568e-05, + "loss": 2.4622, + "step": 14217 + }, + { + "epoch": 1.1474457267371478, + "grad_norm": 0.678600013256073, + "learning_rate": 3.8871586849749474e-05, + "loss": 2.4264, + "step": 14218 + }, + { + "epoch": 1.147526430473731, + "grad_norm": 0.7487786412239075, + "learning_rate": 3.885909368361308e-05, + "loss": 2.4038, + "step": 14219 + }, + { + "epoch": 1.147607134210314, + "grad_norm": 0.6658064723014832, + "learning_rate": 3.8846602041267886e-05, + "loss": 2.4079, + "step": 14220 + }, + { + "epoch": 1.1476878379468969, + "grad_norm": 0.6985111832618713, + "learning_rate": 3.883411192302527e-05, + "loss": 2.481, + "step": 14221 + }, + { + "epoch": 1.14776854168348, + "grad_norm": 0.7056208848953247, + "learning_rate": 3.8821623329196445e-05, + "loss": 2.4409, + "step": 14222 + }, + { + "epoch": 1.1478492454200628, + "grad_norm": 0.7107830047607422, + "learning_rate": 3.880913626009268e-05, + "loss": 2.4578, + "step": 14223 + }, + { + "epoch": 1.147929949156646, + "grad_norm": 0.6678555607795715, + "learning_rate": 3.87966507160252e-05, + "loss": 2.4548, + "step": 14224 + }, + { + "epoch": 1.148010652893229, + "grad_norm": 0.6699830293655396, + "learning_rate": 3.8784166697305157e-05, + "loss": 2.3763, + "step": 14225 + }, + { + "epoch": 1.1480913566298119, + "grad_norm": 0.7695464491844177, + "learning_rate": 3.8771684204243716e-05, + "loss": 2.4774, + "step": 14226 + }, + { + "epoch": 1.148172060366395, + "grad_norm": 0.7801330089569092, + "learning_rate": 3.8759203237151954e-05, + "loss": 2.4598, + "step": 14227 + }, + { + "epoch": 1.148252764102978, + "grad_norm": 0.7029622793197632, + "learning_rate": 3.8746723796340955e-05, + "loss": 2.3901, + "step": 14228 + }, + { + "epoch": 1.148333467839561, + "grad_norm": 0.7472359538078308, + "learning_rate": 3.873424588212169e-05, + "loss": 2.4724, + "step": 14229 + }, + { + "epoch": 1.148414171576144, + "grad_norm": 0.6621725559234619, + "learning_rate": 3.872176949480517e-05, + "loss": 2.4523, + "step": 14230 + }, + { + "epoch": 1.148494875312727, + "grad_norm": 0.722658634185791, + "learning_rate": 3.8709294634702376e-05, + "loss": 2.4032, + "step": 14231 + }, + { + "epoch": 1.14857557904931, + "grad_norm": 0.7743202447891235, + "learning_rate": 3.869682130212413e-05, + "loss": 2.4373, + "step": 14232 + }, + { + "epoch": 1.148656282785893, + "grad_norm": 0.6906178593635559, + "learning_rate": 3.868434949738136e-05, + "loss": 2.4765, + "step": 14233 + }, + { + "epoch": 1.148736986522476, + "grad_norm": 0.6708275675773621, + "learning_rate": 3.86718792207849e-05, + "loss": 2.4263, + "step": 14234 + }, + { + "epoch": 1.148817690259059, + "grad_norm": 0.6992776989936829, + "learning_rate": 3.8659410472645494e-05, + "loss": 2.378, + "step": 14235 + }, + { + "epoch": 1.148898393995642, + "grad_norm": 0.7229011058807373, + "learning_rate": 3.864694325327389e-05, + "loss": 2.4075, + "step": 14236 + }, + { + "epoch": 1.148979097732225, + "grad_norm": 0.6622509956359863, + "learning_rate": 3.863447756298091e-05, + "loss": 2.3954, + "step": 14237 + }, + { + "epoch": 1.149059801468808, + "grad_norm": 0.7233534455299377, + "learning_rate": 3.862201340207712e-05, + "loss": 2.4506, + "step": 14238 + }, + { + "epoch": 1.149140505205391, + "grad_norm": 0.716869056224823, + "learning_rate": 3.860955077087321e-05, + "loss": 2.4304, + "step": 14239 + }, + { + "epoch": 1.149221208941974, + "grad_norm": 0.6550257205963135, + "learning_rate": 3.8597089669679766e-05, + "loss": 2.4261, + "step": 14240 + }, + { + "epoch": 1.149301912678557, + "grad_norm": 0.6981741786003113, + "learning_rate": 3.858463009880738e-05, + "loss": 2.4115, + "step": 14241 + }, + { + "epoch": 1.14938261641514, + "grad_norm": 0.6792196035385132, + "learning_rate": 3.8572172058566534e-05, + "loss": 2.4195, + "step": 14242 + }, + { + "epoch": 1.149463320151723, + "grad_norm": 0.7278807163238525, + "learning_rate": 3.855971554926773e-05, + "loss": 2.418, + "step": 14243 + }, + { + "epoch": 1.1495440238883061, + "grad_norm": 0.6451076865196228, + "learning_rate": 3.8547260571221456e-05, + "loss": 2.4591, + "step": 14244 + }, + { + "epoch": 1.149624727624889, + "grad_norm": 0.7052451968193054, + "learning_rate": 3.853480712473805e-05, + "loss": 2.4023, + "step": 14245 + }, + { + "epoch": 1.149705431361472, + "grad_norm": 0.7016182541847229, + "learning_rate": 3.852235521012793e-05, + "loss": 2.4959, + "step": 14246 + }, + { + "epoch": 1.1497861350980552, + "grad_norm": 0.7287492156028748, + "learning_rate": 3.850990482770141e-05, + "loss": 2.3884, + "step": 14247 + }, + { + "epoch": 1.149866838834638, + "grad_norm": 0.6648508310317993, + "learning_rate": 3.84974559777688e-05, + "loss": 2.4632, + "step": 14248 + }, + { + "epoch": 1.1499475425712211, + "grad_norm": 0.7387828230857849, + "learning_rate": 3.848500866064036e-05, + "loss": 2.4053, + "step": 14249 + }, + { + "epoch": 1.150028246307804, + "grad_norm": 0.7230356931686401, + "learning_rate": 3.847256287662635e-05, + "loss": 2.5128, + "step": 14250 + }, + { + "epoch": 1.150108950044387, + "grad_norm": 0.7209547162055969, + "learning_rate": 3.846011862603686e-05, + "loss": 2.4626, + "step": 14251 + }, + { + "epoch": 1.1501896537809702, + "grad_norm": 0.7177916765213013, + "learning_rate": 3.844767590918209e-05, + "loss": 2.4469, + "step": 14252 + }, + { + "epoch": 1.150270357517553, + "grad_norm": 0.7850151658058167, + "learning_rate": 3.843523472637216e-05, + "loss": 2.4731, + "step": 14253 + }, + { + "epoch": 1.150351061254136, + "grad_norm": 0.7051519155502319, + "learning_rate": 3.8422795077917084e-05, + "loss": 2.3696, + "step": 14254 + }, + { + "epoch": 1.150431764990719, + "grad_norm": 0.7434025406837463, + "learning_rate": 3.841035696412692e-05, + "loss": 2.444, + "step": 14255 + }, + { + "epoch": 1.150512468727302, + "grad_norm": 0.7404719591140747, + "learning_rate": 3.839792038531166e-05, + "loss": 2.4415, + "step": 14256 + }, + { + "epoch": 1.1505931724638851, + "grad_norm": 0.6883764266967773, + "learning_rate": 3.838548534178125e-05, + "loss": 2.4887, + "step": 14257 + }, + { + "epoch": 1.150673876200468, + "grad_norm": 0.6697155237197876, + "learning_rate": 3.83730518338456e-05, + "loss": 2.3721, + "step": 14258 + }, + { + "epoch": 1.150754579937051, + "grad_norm": 0.68825763463974, + "learning_rate": 3.836061986181459e-05, + "loss": 2.4712, + "step": 14259 + }, + { + "epoch": 1.1508352836736342, + "grad_norm": 0.6810611486434937, + "learning_rate": 3.8348189425998114e-05, + "loss": 2.3995, + "step": 14260 + }, + { + "epoch": 1.150915987410217, + "grad_norm": 0.6718329787254333, + "learning_rate": 3.8335760526705866e-05, + "loss": 2.4068, + "step": 14261 + }, + { + "epoch": 1.1509966911468001, + "grad_norm": 0.694618284702301, + "learning_rate": 3.832333316424767e-05, + "loss": 2.458, + "step": 14262 + }, + { + "epoch": 1.151077394883383, + "grad_norm": 0.6824250817298889, + "learning_rate": 3.8310907338933266e-05, + "loss": 2.4623, + "step": 14263 + }, + { + "epoch": 1.151158098619966, + "grad_norm": 0.6875178217887878, + "learning_rate": 3.8298483051072264e-05, + "loss": 2.4827, + "step": 14264 + }, + { + "epoch": 1.1512388023565492, + "grad_norm": 0.7868281602859497, + "learning_rate": 3.828606030097437e-05, + "loss": 2.4638, + "step": 14265 + }, + { + "epoch": 1.151319506093132, + "grad_norm": 0.7003639936447144, + "learning_rate": 3.8273639088949165e-05, + "loss": 2.4885, + "step": 14266 + }, + { + "epoch": 1.1514002098297151, + "grad_norm": 0.6965197920799255, + "learning_rate": 3.826121941530623e-05, + "loss": 2.3983, + "step": 14267 + }, + { + "epoch": 1.151480913566298, + "grad_norm": 0.7241101264953613, + "learning_rate": 3.824880128035509e-05, + "loss": 2.4598, + "step": 14268 + }, + { + "epoch": 1.151561617302881, + "grad_norm": 0.700764536857605, + "learning_rate": 3.823638468440528e-05, + "loss": 2.3627, + "step": 14269 + }, + { + "epoch": 1.1516423210394642, + "grad_norm": 0.6889846324920654, + "learning_rate": 3.822396962776619e-05, + "loss": 2.4442, + "step": 14270 + }, + { + "epoch": 1.151723024776047, + "grad_norm": 0.6660009026527405, + "learning_rate": 3.8211556110747245e-05, + "loss": 2.403, + "step": 14271 + }, + { + "epoch": 1.1518037285126301, + "grad_norm": 0.6537240743637085, + "learning_rate": 3.819914413365785e-05, + "loss": 2.4358, + "step": 14272 + }, + { + "epoch": 1.1518844322492132, + "grad_norm": 0.6852741837501526, + "learning_rate": 3.818673369680735e-05, + "loss": 2.4272, + "step": 14273 + }, + { + "epoch": 1.151965135985796, + "grad_norm": 0.701874852180481, + "learning_rate": 3.817432480050501e-05, + "loss": 2.4419, + "step": 14274 + }, + { + "epoch": 1.1520458397223792, + "grad_norm": 0.7089500427246094, + "learning_rate": 3.816191744506011e-05, + "loss": 2.4537, + "step": 14275 + }, + { + "epoch": 1.1521265434589623, + "grad_norm": 0.698564887046814, + "learning_rate": 3.8149511630781866e-05, + "loss": 2.3991, + "step": 14276 + }, + { + "epoch": 1.1522072471955451, + "grad_norm": 0.6940335035324097, + "learning_rate": 3.813710735797947e-05, + "loss": 2.5022, + "step": 14277 + }, + { + "epoch": 1.1522879509321282, + "grad_norm": 0.6916826367378235, + "learning_rate": 3.812470462696208e-05, + "loss": 2.4449, + "step": 14278 + }, + { + "epoch": 1.152368654668711, + "grad_norm": 0.7115256190299988, + "learning_rate": 3.811230343803882e-05, + "loss": 2.4371, + "step": 14279 + }, + { + "epoch": 1.1524493584052942, + "grad_norm": 0.6857369542121887, + "learning_rate": 3.80999037915187e-05, + "loss": 2.4426, + "step": 14280 + }, + { + "epoch": 1.1525300621418773, + "grad_norm": 0.7605363130569458, + "learning_rate": 3.808750568771079e-05, + "loss": 2.4999, + "step": 14281 + }, + { + "epoch": 1.1526107658784601, + "grad_norm": 0.6604358553886414, + "learning_rate": 3.8075109126924115e-05, + "loss": 2.419, + "step": 14282 + }, + { + "epoch": 1.1526914696150432, + "grad_norm": 0.6945412755012512, + "learning_rate": 3.806271410946756e-05, + "loss": 2.4555, + "step": 14283 + }, + { + "epoch": 1.152772173351626, + "grad_norm": 0.7205908894538879, + "learning_rate": 3.805032063565007e-05, + "loss": 2.4745, + "step": 14284 + }, + { + "epoch": 1.1528528770882092, + "grad_norm": 0.7198025584220886, + "learning_rate": 3.8037928705780554e-05, + "loss": 2.4358, + "step": 14285 + }, + { + "epoch": 1.1529335808247922, + "grad_norm": 0.7231044769287109, + "learning_rate": 3.802553832016781e-05, + "loss": 2.4713, + "step": 14286 + }, + { + "epoch": 1.1530142845613751, + "grad_norm": 0.6878815293312073, + "learning_rate": 3.80131494791206e-05, + "loss": 2.4479, + "step": 14287 + }, + { + "epoch": 1.1530949882979582, + "grad_norm": 0.6930533647537231, + "learning_rate": 3.800076218294779e-05, + "loss": 2.3912, + "step": 14288 + }, + { + "epoch": 1.1531756920345413, + "grad_norm": 0.703521192073822, + "learning_rate": 3.798837643195808e-05, + "loss": 2.451, + "step": 14289 + }, + { + "epoch": 1.1532563957711242, + "grad_norm": 0.7099746465682983, + "learning_rate": 3.79759922264601e-05, + "loss": 2.4957, + "step": 14290 + }, + { + "epoch": 1.1533370995077072, + "grad_norm": 0.7268218398094177, + "learning_rate": 3.7963609566762527e-05, + "loss": 2.4242, + "step": 14291 + }, + { + "epoch": 1.1534178032442903, + "grad_norm": 0.7465239763259888, + "learning_rate": 3.7951228453174004e-05, + "loss": 2.3867, + "step": 14292 + }, + { + "epoch": 1.1534985069808732, + "grad_norm": 0.704584002494812, + "learning_rate": 3.793884888600302e-05, + "loss": 2.5009, + "step": 14293 + }, + { + "epoch": 1.1535792107174563, + "grad_norm": 0.7057262063026428, + "learning_rate": 3.792647086555816e-05, + "loss": 2.4381, + "step": 14294 + }, + { + "epoch": 1.1536599144540391, + "grad_norm": 0.7045955061912537, + "learning_rate": 3.791409439214794e-05, + "loss": 2.4456, + "step": 14295 + }, + { + "epoch": 1.1537406181906222, + "grad_norm": 0.705476701259613, + "learning_rate": 3.790171946608074e-05, + "loss": 2.466, + "step": 14296 + }, + { + "epoch": 1.1538213219272053, + "grad_norm": 0.7128286957740784, + "learning_rate": 3.788934608766503e-05, + "loss": 2.4891, + "step": 14297 + }, + { + "epoch": 1.1539020256637882, + "grad_norm": 0.678144633769989, + "learning_rate": 3.787697425720918e-05, + "loss": 2.4453, + "step": 14298 + }, + { + "epoch": 1.1539827294003713, + "grad_norm": 0.754216730594635, + "learning_rate": 3.786460397502151e-05, + "loss": 2.4331, + "step": 14299 + }, + { + "epoch": 1.1540634331369541, + "grad_norm": 0.6881092190742493, + "learning_rate": 3.7852235241410325e-05, + "loss": 2.3692, + "step": 14300 + }, + { + "epoch": 1.1541441368735372, + "grad_norm": 0.7498507499694824, + "learning_rate": 3.783986805668395e-05, + "loss": 2.4556, + "step": 14301 + }, + { + "epoch": 1.1542248406101203, + "grad_norm": 0.6312216520309448, + "learning_rate": 3.7827502421150496e-05, + "loss": 2.4727, + "step": 14302 + }, + { + "epoch": 1.1543055443467032, + "grad_norm": 0.7156404256820679, + "learning_rate": 3.781513833511822e-05, + "loss": 2.4003, + "step": 14303 + }, + { + "epoch": 1.1543862480832863, + "grad_norm": 0.6589376926422119, + "learning_rate": 3.7802775798895226e-05, + "loss": 2.4461, + "step": 14304 + }, + { + "epoch": 1.1544669518198694, + "grad_norm": 0.7259865999221802, + "learning_rate": 3.77904148127897e-05, + "loss": 2.4021, + "step": 14305 + }, + { + "epoch": 1.1545476555564522, + "grad_norm": 0.7248456478118896, + "learning_rate": 3.777805537710961e-05, + "loss": 2.4784, + "step": 14306 + }, + { + "epoch": 1.1546283592930353, + "grad_norm": 0.7085593342781067, + "learning_rate": 3.7765697492163034e-05, + "loss": 2.4394, + "step": 14307 + }, + { + "epoch": 1.1547090630296182, + "grad_norm": 0.7394313216209412, + "learning_rate": 3.775334115825796e-05, + "loss": 2.5055, + "step": 14308 + }, + { + "epoch": 1.1547897667662013, + "grad_norm": 0.7231999039649963, + "learning_rate": 3.7740986375702336e-05, + "loss": 2.4551, + "step": 14309 + }, + { + "epoch": 1.1548704705027844, + "grad_norm": 0.6875953078269958, + "learning_rate": 3.7728633144804084e-05, + "loss": 2.4641, + "step": 14310 + }, + { + "epoch": 1.1549511742393672, + "grad_norm": 0.7477203607559204, + "learning_rate": 3.7716281465871094e-05, + "loss": 2.4929, + "step": 14311 + }, + { + "epoch": 1.1550318779759503, + "grad_norm": 0.6653971076011658, + "learning_rate": 3.770393133921115e-05, + "loss": 2.4819, + "step": 14312 + }, + { + "epoch": 1.1551125817125332, + "grad_norm": 0.7267318964004517, + "learning_rate": 3.769158276513209e-05, + "loss": 2.4568, + "step": 14313 + }, + { + "epoch": 1.1551932854491163, + "grad_norm": 0.6675654053688049, + "learning_rate": 3.76792357439417e-05, + "loss": 2.4789, + "step": 14314 + }, + { + "epoch": 1.1552739891856993, + "grad_norm": 0.6847487688064575, + "learning_rate": 3.7666890275947616e-05, + "loss": 2.4034, + "step": 14315 + }, + { + "epoch": 1.1553546929222822, + "grad_norm": 0.811553418636322, + "learning_rate": 3.765454636145758e-05, + "loss": 2.5051, + "step": 14316 + }, + { + "epoch": 1.1554353966588653, + "grad_norm": 0.690026581287384, + "learning_rate": 3.7642204000779204e-05, + "loss": 2.4477, + "step": 14317 + }, + { + "epoch": 1.1555161003954484, + "grad_norm": 0.695810079574585, + "learning_rate": 3.762986319422013e-05, + "loss": 2.4516, + "step": 14318 + }, + { + "epoch": 1.1555968041320313, + "grad_norm": 0.6869217753410339, + "learning_rate": 3.7617523942087886e-05, + "loss": 2.3802, + "step": 14319 + }, + { + "epoch": 1.1556775078686143, + "grad_norm": 0.7109078764915466, + "learning_rate": 3.7605186244690016e-05, + "loss": 2.4306, + "step": 14320 + }, + { + "epoch": 1.1557582116051974, + "grad_norm": 0.7385044693946838, + "learning_rate": 3.759285010233404e-05, + "loss": 2.4288, + "step": 14321 + }, + { + "epoch": 1.1558389153417803, + "grad_norm": 0.6775605082511902, + "learning_rate": 3.7580515515327355e-05, + "loss": 2.4155, + "step": 14322 + }, + { + "epoch": 1.1559196190783634, + "grad_norm": 0.7325694561004639, + "learning_rate": 3.7568182483977375e-05, + "loss": 2.5035, + "step": 14323 + }, + { + "epoch": 1.1560003228149462, + "grad_norm": 0.6896799206733704, + "learning_rate": 3.7555851008591526e-05, + "loss": 2.4739, + "step": 14324 + }, + { + "epoch": 1.1560810265515293, + "grad_norm": 0.7086506485939026, + "learning_rate": 3.7543521089477065e-05, + "loss": 2.4815, + "step": 14325 + }, + { + "epoch": 1.1561617302881124, + "grad_norm": 0.6886687874794006, + "learning_rate": 3.753119272694132e-05, + "loss": 2.4261, + "step": 14326 + }, + { + "epoch": 1.1562424340246953, + "grad_norm": 0.675136148929596, + "learning_rate": 3.751886592129155e-05, + "loss": 2.3946, + "step": 14327 + }, + { + "epoch": 1.1563231377612784, + "grad_norm": 0.706729531288147, + "learning_rate": 3.7506540672834964e-05, + "loss": 2.4199, + "step": 14328 + }, + { + "epoch": 1.1564038414978612, + "grad_norm": 0.6790904998779297, + "learning_rate": 3.749421698187875e-05, + "loss": 2.4419, + "step": 14329 + }, + { + "epoch": 1.1564845452344443, + "grad_norm": 0.6688171029090881, + "learning_rate": 3.748189484873007e-05, + "loss": 2.4516, + "step": 14330 + }, + { + "epoch": 1.1565652489710274, + "grad_norm": 0.6782420873641968, + "learning_rate": 3.746957427369596e-05, + "loss": 2.4586, + "step": 14331 + }, + { + "epoch": 1.1566459527076103, + "grad_norm": 0.7633399367332458, + "learning_rate": 3.7457255257083514e-05, + "loss": 2.3776, + "step": 14332 + }, + { + "epoch": 1.1567266564441934, + "grad_norm": 0.680000364780426, + "learning_rate": 3.744493779919976e-05, + "loss": 2.4978, + "step": 14333 + }, + { + "epoch": 1.1568073601807765, + "grad_norm": 0.6993350386619568, + "learning_rate": 3.743262190035171e-05, + "loss": 2.3974, + "step": 14334 + }, + { + "epoch": 1.1568880639173593, + "grad_norm": 0.7316375374794006, + "learning_rate": 3.7420307560846234e-05, + "loss": 2.4423, + "step": 14335 + }, + { + "epoch": 1.1569687676539424, + "grad_norm": 0.7384842038154602, + "learning_rate": 3.7407994780990285e-05, + "loss": 2.4604, + "step": 14336 + }, + { + "epoch": 1.1570494713905255, + "grad_norm": 0.6980708837509155, + "learning_rate": 3.739568356109072e-05, + "loss": 2.4408, + "step": 14337 + }, + { + "epoch": 1.1571301751271084, + "grad_norm": 0.6510182619094849, + "learning_rate": 3.738337390145438e-05, + "loss": 2.4076, + "step": 14338 + }, + { + "epoch": 1.1572108788636915, + "grad_norm": 0.7458614706993103, + "learning_rate": 3.737106580238804e-05, + "loss": 2.4976, + "step": 14339 + }, + { + "epoch": 1.1572915826002743, + "grad_norm": 0.6663469672203064, + "learning_rate": 3.735875926419849e-05, + "loss": 2.4414, + "step": 14340 + }, + { + "epoch": 1.1573722863368574, + "grad_norm": 0.6611858606338501, + "learning_rate": 3.7346454287192355e-05, + "loss": 2.3783, + "step": 14341 + }, + { + "epoch": 1.1574529900734405, + "grad_norm": 0.6605291366577148, + "learning_rate": 3.7334150871676364e-05, + "loss": 2.4291, + "step": 14342 + }, + { + "epoch": 1.1575336938100234, + "grad_norm": 0.6879985928535461, + "learning_rate": 3.7321849017957186e-05, + "loss": 2.4229, + "step": 14343 + }, + { + "epoch": 1.1576143975466064, + "grad_norm": 0.7466493844985962, + "learning_rate": 3.7309548726341334e-05, + "loss": 2.4278, + "step": 14344 + }, + { + "epoch": 1.1576951012831893, + "grad_norm": 0.7476457357406616, + "learning_rate": 3.72972499971354e-05, + "loss": 2.4944, + "step": 14345 + }, + { + "epoch": 1.1577758050197724, + "grad_norm": 0.6339364647865295, + "learning_rate": 3.728495283064594e-05, + "loss": 2.3753, + "step": 14346 + }, + { + "epoch": 1.1578565087563555, + "grad_norm": 0.6885230541229248, + "learning_rate": 3.7272657227179355e-05, + "loss": 2.4519, + "step": 14347 + }, + { + "epoch": 1.1579372124929384, + "grad_norm": 0.7561741471290588, + "learning_rate": 3.7260363187042126e-05, + "loss": 2.4808, + "step": 14348 + }, + { + "epoch": 1.1580179162295214, + "grad_norm": 0.8007705211639404, + "learning_rate": 3.724807071054062e-05, + "loss": 2.4649, + "step": 14349 + }, + { + "epoch": 1.1580986199661045, + "grad_norm": 0.6920937895774841, + "learning_rate": 3.72357797979813e-05, + "loss": 2.4145, + "step": 14350 + }, + { + "epoch": 1.1581793237026874, + "grad_norm": 0.7310675978660583, + "learning_rate": 3.7223490449670364e-05, + "loss": 2.4475, + "step": 14351 + }, + { + "epoch": 1.1582600274392705, + "grad_norm": 0.6600463390350342, + "learning_rate": 3.7211202665914155e-05, + "loss": 2.3938, + "step": 14352 + }, + { + "epoch": 1.1583407311758536, + "grad_norm": 0.690258800983429, + "learning_rate": 3.719891644701894e-05, + "loss": 2.3944, + "step": 14353 + }, + { + "epoch": 1.1584214349124364, + "grad_norm": 0.7075135111808777, + "learning_rate": 3.718663179329085e-05, + "loss": 2.3931, + "step": 14354 + }, + { + "epoch": 1.1585021386490195, + "grad_norm": 0.7416332960128784, + "learning_rate": 3.71743487050361e-05, + "loss": 2.4566, + "step": 14355 + }, + { + "epoch": 1.1585828423856024, + "grad_norm": 0.7459710836410522, + "learning_rate": 3.7162067182560846e-05, + "loss": 2.4232, + "step": 14356 + }, + { + "epoch": 1.1586635461221855, + "grad_norm": 0.7265400886535645, + "learning_rate": 3.71497872261711e-05, + "loss": 2.4798, + "step": 14357 + }, + { + "epoch": 1.1587442498587683, + "grad_norm": 0.7142636775970459, + "learning_rate": 3.713750883617294e-05, + "loss": 2.4576, + "step": 14358 + }, + { + "epoch": 1.1588249535953514, + "grad_norm": 0.7279871702194214, + "learning_rate": 3.712523201287239e-05, + "loss": 2.439, + "step": 14359 + }, + { + "epoch": 1.1589056573319345, + "grad_norm": 0.7151274681091309, + "learning_rate": 3.7112956756575414e-05, + "loss": 2.4684, + "step": 14360 + }, + { + "epoch": 1.1589863610685174, + "grad_norm": 0.7142657041549683, + "learning_rate": 3.7100683067587946e-05, + "loss": 2.4582, + "step": 14361 + }, + { + "epoch": 1.1590670648051005, + "grad_norm": 0.7716035842895508, + "learning_rate": 3.7088410946215914e-05, + "loss": 2.5038, + "step": 14362 + }, + { + "epoch": 1.1591477685416836, + "grad_norm": 0.7232338190078735, + "learning_rate": 3.707614039276509e-05, + "loss": 2.4558, + "step": 14363 + }, + { + "epoch": 1.1592284722782664, + "grad_norm": 0.7388719916343689, + "learning_rate": 3.706387140754134e-05, + "loss": 2.4535, + "step": 14364 + }, + { + "epoch": 1.1593091760148495, + "grad_norm": 0.7022652626037598, + "learning_rate": 3.7051603990850425e-05, + "loss": 2.4479, + "step": 14365 + }, + { + "epoch": 1.1593898797514326, + "grad_norm": 0.7861798405647278, + "learning_rate": 3.703933814299813e-05, + "loss": 2.4219, + "step": 14366 + }, + { + "epoch": 1.1594705834880155, + "grad_norm": 0.6928723454475403, + "learning_rate": 3.7027073864290074e-05, + "loss": 2.4401, + "step": 14367 + }, + { + "epoch": 1.1595512872245985, + "grad_norm": 0.6312821507453918, + "learning_rate": 3.701481115503194e-05, + "loss": 2.3975, + "step": 14368 + }, + { + "epoch": 1.1596319909611814, + "grad_norm": 0.7008257508277893, + "learning_rate": 3.700255001552937e-05, + "loss": 2.4988, + "step": 14369 + }, + { + "epoch": 1.1597126946977645, + "grad_norm": 0.6664693355560303, + "learning_rate": 3.699029044608792e-05, + "loss": 2.4123, + "step": 14370 + }, + { + "epoch": 1.1597933984343476, + "grad_norm": 0.6613842844963074, + "learning_rate": 3.6978032447013145e-05, + "loss": 2.4802, + "step": 14371 + }, + { + "epoch": 1.1598741021709305, + "grad_norm": 0.707788348197937, + "learning_rate": 3.696577601861057e-05, + "loss": 2.4432, + "step": 14372 + }, + { + "epoch": 1.1599548059075135, + "grad_norm": 0.6547604203224182, + "learning_rate": 3.695352116118561e-05, + "loss": 2.412, + "step": 14373 + }, + { + "epoch": 1.1600355096440964, + "grad_norm": 0.7238109707832336, + "learning_rate": 3.69412678750437e-05, + "loss": 2.4858, + "step": 14374 + }, + { + "epoch": 1.1601162133806795, + "grad_norm": 0.8156580328941345, + "learning_rate": 3.692901616049026e-05, + "loss": 2.4063, + "step": 14375 + }, + { + "epoch": 1.1601969171172626, + "grad_norm": 0.7035481333732605, + "learning_rate": 3.6916766017830585e-05, + "loss": 2.4586, + "step": 14376 + }, + { + "epoch": 1.1602776208538454, + "grad_norm": 0.7523401379585266, + "learning_rate": 3.690451744736999e-05, + "loss": 2.4262, + "step": 14377 + }, + { + "epoch": 1.1603583245904285, + "grad_norm": 0.6740732192993164, + "learning_rate": 3.689227044941376e-05, + "loss": 2.5215, + "step": 14378 + }, + { + "epoch": 1.1604390283270116, + "grad_norm": 0.6502695083618164, + "learning_rate": 3.6880025024267115e-05, + "loss": 2.4292, + "step": 14379 + }, + { + "epoch": 1.1605197320635945, + "grad_norm": 0.7000409364700317, + "learning_rate": 3.686778117223524e-05, + "loss": 2.4323, + "step": 14380 + }, + { + "epoch": 1.1606004358001776, + "grad_norm": 0.7415478229522705, + "learning_rate": 3.68555388936233e-05, + "loss": 2.4515, + "step": 14381 + }, + { + "epoch": 1.1606811395367607, + "grad_norm": 0.6890547871589661, + "learning_rate": 3.684329818873641e-05, + "loss": 2.4115, + "step": 14382 + }, + { + "epoch": 1.1607618432733435, + "grad_norm": 0.8238685727119446, + "learning_rate": 3.68310590578796e-05, + "loss": 2.4666, + "step": 14383 + }, + { + "epoch": 1.1608425470099266, + "grad_norm": 0.8098889589309692, + "learning_rate": 3.681882150135791e-05, + "loss": 2.4667, + "step": 14384 + }, + { + "epoch": 1.1609232507465095, + "grad_norm": 0.6932713985443115, + "learning_rate": 3.680658551947639e-05, + "loss": 2.4574, + "step": 14385 + }, + { + "epoch": 1.1610039544830926, + "grad_norm": 0.7062943577766418, + "learning_rate": 3.6794351112539915e-05, + "loss": 2.4408, + "step": 14386 + }, + { + "epoch": 1.1610846582196757, + "grad_norm": 0.7859255075454712, + "learning_rate": 3.678211828085343e-05, + "loss": 2.3946, + "step": 14387 + }, + { + "epoch": 1.1611653619562585, + "grad_norm": 0.674609899520874, + "learning_rate": 3.676988702472181e-05, + "loss": 2.4456, + "step": 14388 + }, + { + "epoch": 1.1612460656928416, + "grad_norm": 0.7068402171134949, + "learning_rate": 3.675765734444989e-05, + "loss": 2.4393, + "step": 14389 + }, + { + "epoch": 1.1613267694294245, + "grad_norm": 0.7276526689529419, + "learning_rate": 3.674542924034246e-05, + "loss": 2.456, + "step": 14390 + }, + { + "epoch": 1.1614074731660076, + "grad_norm": 0.7670585513114929, + "learning_rate": 3.673320271270433e-05, + "loss": 2.3774, + "step": 14391 + }, + { + "epoch": 1.1614881769025907, + "grad_norm": 0.702173113822937, + "learning_rate": 3.672097776184013e-05, + "loss": 2.3974, + "step": 14392 + }, + { + "epoch": 1.1615688806391735, + "grad_norm": 0.6922066807746887, + "learning_rate": 3.670875438805457e-05, + "loss": 2.4035, + "step": 14393 + }, + { + "epoch": 1.1616495843757566, + "grad_norm": 0.6675707697868347, + "learning_rate": 3.6696532591652335e-05, + "loss": 2.4369, + "step": 14394 + }, + { + "epoch": 1.1617302881123397, + "grad_norm": 0.6939712762832642, + "learning_rate": 3.668431237293796e-05, + "loss": 2.4265, + "step": 14395 + }, + { + "epoch": 1.1618109918489226, + "grad_norm": 0.719510018825531, + "learning_rate": 3.667209373221602e-05, + "loss": 2.4686, + "step": 14396 + }, + { + "epoch": 1.1618916955855056, + "grad_norm": 0.7167489528656006, + "learning_rate": 3.665987666979104e-05, + "loss": 2.5077, + "step": 14397 + }, + { + "epoch": 1.1619723993220887, + "grad_norm": 0.6539514064788818, + "learning_rate": 3.664766118596754e-05, + "loss": 2.4476, + "step": 14398 + }, + { + "epoch": 1.1620531030586716, + "grad_norm": 0.6926440596580505, + "learning_rate": 3.6635447281049876e-05, + "loss": 2.4336, + "step": 14399 + }, + { + "epoch": 1.1621338067952547, + "grad_norm": 0.7124993205070496, + "learning_rate": 3.662323495534252e-05, + "loss": 2.3938, + "step": 14400 + }, + { + "epoch": 1.1622145105318376, + "grad_norm": 0.7073954939842224, + "learning_rate": 3.661102420914986e-05, + "loss": 2.4232, + "step": 14401 + }, + { + "epoch": 1.1622952142684206, + "grad_norm": 0.7491076588630676, + "learning_rate": 3.659881504277613e-05, + "loss": 2.5047, + "step": 14402 + }, + { + "epoch": 1.1623759180050037, + "grad_norm": 0.6698675155639648, + "learning_rate": 3.658660745652568e-05, + "loss": 2.4164, + "step": 14403 + }, + { + "epoch": 1.1624566217415866, + "grad_norm": 0.6576815843582153, + "learning_rate": 3.657440145070276e-05, + "loss": 2.4368, + "step": 14404 + }, + { + "epoch": 1.1625373254781697, + "grad_norm": 0.8236953020095825, + "learning_rate": 3.6562197025611524e-05, + "loss": 2.5041, + "step": 14405 + }, + { + "epoch": 1.1626180292147525, + "grad_norm": 0.7391532063484192, + "learning_rate": 3.6549994181556157e-05, + "loss": 2.4556, + "step": 14406 + }, + { + "epoch": 1.1626987329513356, + "grad_norm": 0.6529936790466309, + "learning_rate": 3.653779291884084e-05, + "loss": 2.4559, + "step": 14407 + }, + { + "epoch": 1.1627794366879187, + "grad_norm": 0.7101796269416809, + "learning_rate": 3.652559323776957e-05, + "loss": 2.3937, + "step": 14408 + }, + { + "epoch": 1.1628601404245016, + "grad_norm": 0.6890308260917664, + "learning_rate": 3.651339513864645e-05, + "loss": 2.4694, + "step": 14409 + }, + { + "epoch": 1.1629408441610847, + "grad_norm": 0.6919918060302734, + "learning_rate": 3.650119862177548e-05, + "loss": 2.4793, + "step": 14410 + }, + { + "epoch": 1.1630215478976678, + "grad_norm": 0.6553575992584229, + "learning_rate": 3.6489003687460624e-05, + "loss": 2.454, + "step": 14411 + }, + { + "epoch": 1.1631022516342506, + "grad_norm": 0.7095460891723633, + "learning_rate": 3.6476810336005804e-05, + "loss": 2.4672, + "step": 14412 + }, + { + "epoch": 1.1631829553708337, + "grad_norm": 0.738301694393158, + "learning_rate": 3.6464618567714935e-05, + "loss": 2.4369, + "step": 14413 + }, + { + "epoch": 1.1632636591074166, + "grad_norm": 0.7574542760848999, + "learning_rate": 3.645242838289189e-05, + "loss": 2.4981, + "step": 14414 + }, + { + "epoch": 1.1633443628439997, + "grad_norm": 0.6780585646629333, + "learning_rate": 3.64402397818404e-05, + "loss": 2.4811, + "step": 14415 + }, + { + "epoch": 1.1634250665805828, + "grad_norm": 0.7050060629844666, + "learning_rate": 3.6428052764864287e-05, + "loss": 2.4607, + "step": 14416 + }, + { + "epoch": 1.1635057703171656, + "grad_norm": 0.6946923136711121, + "learning_rate": 3.6415867332267316e-05, + "loss": 2.4482, + "step": 14417 + }, + { + "epoch": 1.1635864740537487, + "grad_norm": 0.7202015519142151, + "learning_rate": 3.64036834843531e-05, + "loss": 2.4764, + "step": 14418 + }, + { + "epoch": 1.1636671777903316, + "grad_norm": 0.7845996618270874, + "learning_rate": 3.639150122142534e-05, + "loss": 2.4926, + "step": 14419 + }, + { + "epoch": 1.1637478815269147, + "grad_norm": 0.6924630403518677, + "learning_rate": 3.6379320543787645e-05, + "loss": 2.4664, + "step": 14420 + }, + { + "epoch": 1.1638285852634978, + "grad_norm": 0.7225920557975769, + "learning_rate": 3.636714145174358e-05, + "loss": 2.4638, + "step": 14421 + }, + { + "epoch": 1.1639092890000806, + "grad_norm": 0.6587103605270386, + "learning_rate": 3.63549639455967e-05, + "loss": 2.3629, + "step": 14422 + }, + { + "epoch": 1.1639899927366637, + "grad_norm": 0.7537658214569092, + "learning_rate": 3.634278802565051e-05, + "loss": 2.4971, + "step": 14423 + }, + { + "epoch": 1.1640706964732468, + "grad_norm": 0.6881381273269653, + "learning_rate": 3.633061369220841e-05, + "loss": 2.3737, + "step": 14424 + }, + { + "epoch": 1.1641514002098297, + "grad_norm": 0.693779468536377, + "learning_rate": 3.6318440945573864e-05, + "loss": 2.4346, + "step": 14425 + }, + { + "epoch": 1.1642321039464127, + "grad_norm": 0.777563750743866, + "learning_rate": 3.6306269786050265e-05, + "loss": 2.4288, + "step": 14426 + }, + { + "epoch": 1.1643128076829958, + "grad_norm": 0.6786738634109497, + "learning_rate": 3.629410021394087e-05, + "loss": 2.4094, + "step": 14427 + }, + { + "epoch": 1.1643935114195787, + "grad_norm": 0.7478442788124084, + "learning_rate": 3.628193222954904e-05, + "loss": 2.4163, + "step": 14428 + }, + { + "epoch": 1.1644742151561618, + "grad_norm": 0.6530766487121582, + "learning_rate": 3.626976583317803e-05, + "loss": 2.4328, + "step": 14429 + }, + { + "epoch": 1.1645549188927447, + "grad_norm": 0.6665371060371399, + "learning_rate": 3.6257601025131026e-05, + "loss": 2.4006, + "step": 14430 + }, + { + "epoch": 1.1646356226293277, + "grad_norm": 0.7184741497039795, + "learning_rate": 3.624543780571125e-05, + "loss": 2.462, + "step": 14431 + }, + { + "epoch": 1.1647163263659108, + "grad_norm": 0.7039462327957153, + "learning_rate": 3.6233276175221794e-05, + "loss": 2.4321, + "step": 14432 + }, + { + "epoch": 1.1647970301024937, + "grad_norm": 0.7039144039154053, + "learning_rate": 3.622111613396584e-05, + "loss": 2.4399, + "step": 14433 + }, + { + "epoch": 1.1648777338390768, + "grad_norm": 0.6690253615379333, + "learning_rate": 3.620895768224635e-05, + "loss": 2.3976, + "step": 14434 + }, + { + "epoch": 1.1649584375756596, + "grad_norm": 0.7048032879829407, + "learning_rate": 3.6196800820366384e-05, + "loss": 2.4848, + "step": 14435 + }, + { + "epoch": 1.1650391413122427, + "grad_norm": 0.668971836566925, + "learning_rate": 3.618464554862896e-05, + "loss": 2.4614, + "step": 14436 + }, + { + "epoch": 1.1651198450488258, + "grad_norm": 0.704858660697937, + "learning_rate": 3.617249186733695e-05, + "loss": 2.3962, + "step": 14437 + }, + { + "epoch": 1.1652005487854087, + "grad_norm": 0.692435085773468, + "learning_rate": 3.6160339776793296e-05, + "loss": 2.4059, + "step": 14438 + }, + { + "epoch": 1.1652812525219918, + "grad_norm": 0.6774182319641113, + "learning_rate": 3.614818927730085e-05, + "loss": 2.4975, + "step": 14439 + }, + { + "epoch": 1.1653619562585749, + "grad_norm": 0.6507411003112793, + "learning_rate": 3.613604036916243e-05, + "loss": 2.5029, + "step": 14440 + }, + { + "epoch": 1.1654426599951577, + "grad_norm": 0.7223206162452698, + "learning_rate": 3.612389305268084e-05, + "loss": 2.4599, + "step": 14441 + }, + { + "epoch": 1.1655233637317408, + "grad_norm": 0.6523364186286926, + "learning_rate": 3.611174732815883e-05, + "loss": 2.4521, + "step": 14442 + }, + { + "epoch": 1.165604067468324, + "grad_norm": 0.6668452024459839, + "learning_rate": 3.6099603195899046e-05, + "loss": 2.4082, + "step": 14443 + }, + { + "epoch": 1.1656847712049068, + "grad_norm": 0.6878299117088318, + "learning_rate": 3.60874606562042e-05, + "loss": 2.4144, + "step": 14444 + }, + { + "epoch": 1.1657654749414899, + "grad_norm": 0.6662277579307556, + "learning_rate": 3.6075319709376895e-05, + "loss": 2.438, + "step": 14445 + }, + { + "epoch": 1.1658461786780727, + "grad_norm": 0.721422553062439, + "learning_rate": 3.606318035571976e-05, + "loss": 2.4414, + "step": 14446 + }, + { + "epoch": 1.1659268824146558, + "grad_norm": 0.6739782691001892, + "learning_rate": 3.6051042595535264e-05, + "loss": 2.4093, + "step": 14447 + }, + { + "epoch": 1.166007586151239, + "grad_norm": 0.6890884637832642, + "learning_rate": 3.603890642912596e-05, + "loss": 2.4385, + "step": 14448 + }, + { + "epoch": 1.1660882898878218, + "grad_norm": 0.6503998637199402, + "learning_rate": 3.602677185679433e-05, + "loss": 2.4498, + "step": 14449 + }, + { + "epoch": 1.1661689936244048, + "grad_norm": 0.6748046875, + "learning_rate": 3.601463887884271e-05, + "loss": 2.3739, + "step": 14450 + }, + { + "epoch": 1.1662496973609877, + "grad_norm": 0.6843422651290894, + "learning_rate": 3.600250749557358e-05, + "loss": 2.4323, + "step": 14451 + }, + { + "epoch": 1.1663304010975708, + "grad_norm": 0.7061208486557007, + "learning_rate": 3.599037770728929e-05, + "loss": 2.4611, + "step": 14452 + }, + { + "epoch": 1.166411104834154, + "grad_norm": 0.6614537239074707, + "learning_rate": 3.597824951429208e-05, + "loss": 2.4656, + "step": 14453 + }, + { + "epoch": 1.1664918085707368, + "grad_norm": 0.6620328426361084, + "learning_rate": 3.596612291688424e-05, + "loss": 2.415, + "step": 14454 + }, + { + "epoch": 1.1665725123073198, + "grad_norm": 0.6936565041542053, + "learning_rate": 3.595399791536804e-05, + "loss": 2.4655, + "step": 14455 + }, + { + "epoch": 1.166653216043903, + "grad_norm": 0.6766063570976257, + "learning_rate": 3.594187451004559e-05, + "loss": 2.4628, + "step": 14456 + }, + { + "epoch": 1.1667339197804858, + "grad_norm": 0.6588734984397888, + "learning_rate": 3.592975270121909e-05, + "loss": 2.4503, + "step": 14457 + }, + { + "epoch": 1.1668146235170689, + "grad_norm": 0.7290894985198975, + "learning_rate": 3.591763248919062e-05, + "loss": 2.5075, + "step": 14458 + }, + { + "epoch": 1.1668953272536517, + "grad_norm": 0.6952784657478333, + "learning_rate": 3.590551387426231e-05, + "loss": 2.4258, + "step": 14459 + }, + { + "epoch": 1.1669760309902348, + "grad_norm": 0.6737042665481567, + "learning_rate": 3.5893396856736096e-05, + "loss": 2.4459, + "step": 14460 + }, + { + "epoch": 1.167056734726818, + "grad_norm": 0.6616976857185364, + "learning_rate": 3.588128143691397e-05, + "loss": 2.4726, + "step": 14461 + }, + { + "epoch": 1.1671374384634008, + "grad_norm": 0.7017171382904053, + "learning_rate": 3.5869167615098e-05, + "loss": 2.375, + "step": 14462 + }, + { + "epoch": 1.1672181421999839, + "grad_norm": 0.7153809666633606, + "learning_rate": 3.585705539158997e-05, + "loss": 2.4271, + "step": 14463 + }, + { + "epoch": 1.1672988459365667, + "grad_norm": 0.749196469783783, + "learning_rate": 3.584494476669179e-05, + "loss": 2.4713, + "step": 14464 + }, + { + "epoch": 1.1673795496731498, + "grad_norm": 0.6593676209449768, + "learning_rate": 3.583283574070533e-05, + "loss": 2.4276, + "step": 14465 + }, + { + "epoch": 1.167460253409733, + "grad_norm": 0.6949084401130676, + "learning_rate": 3.5820728313932295e-05, + "loss": 2.4128, + "step": 14466 + }, + { + "epoch": 1.1675409571463158, + "grad_norm": 0.6795482039451599, + "learning_rate": 3.5808622486674484e-05, + "loss": 2.485, + "step": 14467 + }, + { + "epoch": 1.1676216608828989, + "grad_norm": 0.6763483881950378, + "learning_rate": 3.5796518259233625e-05, + "loss": 2.4063, + "step": 14468 + }, + { + "epoch": 1.167702364619482, + "grad_norm": 0.665687620639801, + "learning_rate": 3.578441563191133e-05, + "loss": 2.437, + "step": 14469 + }, + { + "epoch": 1.1677830683560648, + "grad_norm": 0.6338435411453247, + "learning_rate": 3.577231460500926e-05, + "loss": 2.3747, + "step": 14470 + }, + { + "epoch": 1.167863772092648, + "grad_norm": 0.7031865119934082, + "learning_rate": 3.5760215178829e-05, + "loss": 2.3952, + "step": 14471 + }, + { + "epoch": 1.167944475829231, + "grad_norm": 0.7544599771499634, + "learning_rate": 3.5748117353672106e-05, + "loss": 2.3941, + "step": 14472 + }, + { + "epoch": 1.1680251795658139, + "grad_norm": 0.7271532416343689, + "learning_rate": 3.5736021129840083e-05, + "loss": 2.4371, + "step": 14473 + }, + { + "epoch": 1.168105883302397, + "grad_norm": 0.709048867225647, + "learning_rate": 3.572392650763441e-05, + "loss": 2.482, + "step": 14474 + }, + { + "epoch": 1.1681865870389798, + "grad_norm": 0.6894589066505432, + "learning_rate": 3.571183348735653e-05, + "loss": 2.4347, + "step": 14475 + }, + { + "epoch": 1.168267290775563, + "grad_norm": 0.6680620908737183, + "learning_rate": 3.5699742069307774e-05, + "loss": 2.3995, + "step": 14476 + }, + { + "epoch": 1.168347994512146, + "grad_norm": 0.701669454574585, + "learning_rate": 3.568765225378954e-05, + "loss": 2.4045, + "step": 14477 + }, + { + "epoch": 1.1684286982487289, + "grad_norm": 0.7102392911911011, + "learning_rate": 3.567556404110315e-05, + "loss": 2.4695, + "step": 14478 + }, + { + "epoch": 1.168509401985312, + "grad_norm": 0.6820430755615234, + "learning_rate": 3.566347743154982e-05, + "loss": 2.4155, + "step": 14479 + }, + { + "epoch": 1.1685901057218948, + "grad_norm": 0.6611022353172302, + "learning_rate": 3.565139242543081e-05, + "loss": 2.3992, + "step": 14480 + }, + { + "epoch": 1.168670809458478, + "grad_norm": 0.6844382882118225, + "learning_rate": 3.5639309023047306e-05, + "loss": 2.4345, + "step": 14481 + }, + { + "epoch": 1.168751513195061, + "grad_norm": 0.7557988166809082, + "learning_rate": 3.5627227224700464e-05, + "loss": 2.4454, + "step": 14482 + }, + { + "epoch": 1.1688322169316439, + "grad_norm": 0.6652555465698242, + "learning_rate": 3.5615147030691384e-05, + "loss": 2.3749, + "step": 14483 + }, + { + "epoch": 1.168912920668227, + "grad_norm": 0.6912989020347595, + "learning_rate": 3.56030684413212e-05, + "loss": 2.4737, + "step": 14484 + }, + { + "epoch": 1.16899362440481, + "grad_norm": 0.735103964805603, + "learning_rate": 3.559099145689083e-05, + "loss": 2.4098, + "step": 14485 + }, + { + "epoch": 1.169074328141393, + "grad_norm": 0.6873028874397278, + "learning_rate": 3.557891607770133e-05, + "loss": 2.4247, + "step": 14486 + }, + { + "epoch": 1.169155031877976, + "grad_norm": 0.7364680171012878, + "learning_rate": 3.556684230405367e-05, + "loss": 2.4314, + "step": 14487 + }, + { + "epoch": 1.169235735614559, + "grad_norm": 0.679122269153595, + "learning_rate": 3.55547701362487e-05, + "loss": 2.4196, + "step": 14488 + }, + { + "epoch": 1.169316439351142, + "grad_norm": 0.6783872246742249, + "learning_rate": 3.554269957458731e-05, + "loss": 2.4212, + "step": 14489 + }, + { + "epoch": 1.169397143087725, + "grad_norm": 0.7434942126274109, + "learning_rate": 3.553063061937034e-05, + "loss": 2.4139, + "step": 14490 + }, + { + "epoch": 1.1694778468243079, + "grad_norm": 0.6799852252006531, + "learning_rate": 3.55185632708986e-05, + "loss": 2.4252, + "step": 14491 + }, + { + "epoch": 1.169558550560891, + "grad_norm": 0.7040107250213623, + "learning_rate": 3.5506497529472795e-05, + "loss": 2.3937, + "step": 14492 + }, + { + "epoch": 1.169639254297474, + "grad_norm": 0.7350315451622009, + "learning_rate": 3.549443339539368e-05, + "loss": 2.4063, + "step": 14493 + }, + { + "epoch": 1.169719958034057, + "grad_norm": 0.694521963596344, + "learning_rate": 3.548237086896192e-05, + "loss": 2.4715, + "step": 14494 + }, + { + "epoch": 1.16980066177064, + "grad_norm": 0.6648221015930176, + "learning_rate": 3.5470309950478096e-05, + "loss": 2.4365, + "step": 14495 + }, + { + "epoch": 1.1698813655072229, + "grad_norm": 0.688024640083313, + "learning_rate": 3.545825064024284e-05, + "loss": 2.449, + "step": 14496 + }, + { + "epoch": 1.169962069243806, + "grad_norm": 0.6743311882019043, + "learning_rate": 3.544619293855672e-05, + "loss": 2.4283, + "step": 14497 + }, + { + "epoch": 1.170042772980389, + "grad_norm": 0.669119119644165, + "learning_rate": 3.543413684572019e-05, + "loss": 2.4363, + "step": 14498 + }, + { + "epoch": 1.170123476716972, + "grad_norm": 0.6998667120933533, + "learning_rate": 3.5422082362033745e-05, + "loss": 2.425, + "step": 14499 + }, + { + "epoch": 1.170204180453555, + "grad_norm": 0.7681630253791809, + "learning_rate": 3.5410029487797845e-05, + "loss": 2.4382, + "step": 14500 + }, + { + "epoch": 1.170284884190138, + "grad_norm": 0.6925049424171448, + "learning_rate": 3.539797822331279e-05, + "loss": 2.4261, + "step": 14501 + }, + { + "epoch": 1.170365587926721, + "grad_norm": 0.7145542502403259, + "learning_rate": 3.538592856887901e-05, + "loss": 2.4681, + "step": 14502 + }, + { + "epoch": 1.170446291663304, + "grad_norm": 0.6441611647605896, + "learning_rate": 3.537388052479684e-05, + "loss": 2.4187, + "step": 14503 + }, + { + "epoch": 1.1705269953998871, + "grad_norm": 0.6622560620307922, + "learning_rate": 3.5361834091366466e-05, + "loss": 2.4615, + "step": 14504 + }, + { + "epoch": 1.17060769913647, + "grad_norm": 0.6987677812576294, + "learning_rate": 3.5349789268888144e-05, + "loss": 2.413, + "step": 14505 + }, + { + "epoch": 1.170688402873053, + "grad_norm": 0.668358325958252, + "learning_rate": 3.533774605766207e-05, + "loss": 2.5146, + "step": 14506 + }, + { + "epoch": 1.170769106609636, + "grad_norm": 0.7514958381652832, + "learning_rate": 3.532570445798844e-05, + "loss": 2.4474, + "step": 14507 + }, + { + "epoch": 1.170849810346219, + "grad_norm": 0.6454465389251709, + "learning_rate": 3.5313664470167276e-05, + "loss": 2.3911, + "step": 14508 + }, + { + "epoch": 1.170930514082802, + "grad_norm": 0.6653602719306946, + "learning_rate": 3.5301626094498674e-05, + "loss": 2.4223, + "step": 14509 + }, + { + "epoch": 1.171011217819385, + "grad_norm": 0.6782815456390381, + "learning_rate": 3.5289589331282715e-05, + "loss": 2.457, + "step": 14510 + }, + { + "epoch": 1.171091921555968, + "grad_norm": 0.720973014831543, + "learning_rate": 3.527755418081932e-05, + "loss": 2.4541, + "step": 14511 + }, + { + "epoch": 1.171172625292551, + "grad_norm": 0.6300156712532043, + "learning_rate": 3.526552064340841e-05, + "loss": 2.4451, + "step": 14512 + }, + { + "epoch": 1.171253329029134, + "grad_norm": 0.7660964727401733, + "learning_rate": 3.5253488719350026e-05, + "loss": 2.5031, + "step": 14513 + }, + { + "epoch": 1.1713340327657171, + "grad_norm": 0.6931602358818054, + "learning_rate": 3.5241458408943905e-05, + "loss": 2.4249, + "step": 14514 + }, + { + "epoch": 1.1714147365023, + "grad_norm": 0.6863045692443848, + "learning_rate": 3.522942971248993e-05, + "loss": 2.4429, + "step": 14515 + }, + { + "epoch": 1.171495440238883, + "grad_norm": 0.6993531584739685, + "learning_rate": 3.521740263028791e-05, + "loss": 2.3864, + "step": 14516 + }, + { + "epoch": 1.1715761439754662, + "grad_norm": 0.807991087436676, + "learning_rate": 3.520537716263753e-05, + "loss": 2.459, + "step": 14517 + }, + { + "epoch": 1.171656847712049, + "grad_norm": 0.6722908020019531, + "learning_rate": 3.519335330983852e-05, + "loss": 2.4426, + "step": 14518 + }, + { + "epoch": 1.1717375514486321, + "grad_norm": 0.6934377551078796, + "learning_rate": 3.5181331072190585e-05, + "loss": 2.4326, + "step": 14519 + }, + { + "epoch": 1.171818255185215, + "grad_norm": 0.6532938480377197, + "learning_rate": 3.516931044999329e-05, + "loss": 2.3778, + "step": 14520 + }, + { + "epoch": 1.171898958921798, + "grad_norm": 0.6779183745384216, + "learning_rate": 3.5157291443546247e-05, + "loss": 2.4089, + "step": 14521 + }, + { + "epoch": 1.1719796626583812, + "grad_norm": 0.687005877494812, + "learning_rate": 3.514527405314899e-05, + "loss": 2.4669, + "step": 14522 + }, + { + "epoch": 1.172060366394964, + "grad_norm": 0.6804830431938171, + "learning_rate": 3.5133258279101045e-05, + "loss": 2.4789, + "step": 14523 + }, + { + "epoch": 1.1721410701315471, + "grad_norm": 0.8345538973808289, + "learning_rate": 3.512124412170187e-05, + "loss": 2.4506, + "step": 14524 + }, + { + "epoch": 1.17222177386813, + "grad_norm": 0.6571901440620422, + "learning_rate": 3.510923158125088e-05, + "loss": 2.4911, + "step": 14525 + }, + { + "epoch": 1.172302477604713, + "grad_norm": 0.6607047915458679, + "learning_rate": 3.5097220658047504e-05, + "loss": 2.4882, + "step": 14526 + }, + { + "epoch": 1.1723831813412962, + "grad_norm": 0.6883669495582581, + "learning_rate": 3.508521135239101e-05, + "loss": 2.4083, + "step": 14527 + }, + { + "epoch": 1.172463885077879, + "grad_norm": 0.6792941689491272, + "learning_rate": 3.5073203664580746e-05, + "loss": 2.368, + "step": 14528 + }, + { + "epoch": 1.172544588814462, + "grad_norm": 0.6675198674201965, + "learning_rate": 3.506119759491598e-05, + "loss": 2.4193, + "step": 14529 + }, + { + "epoch": 1.1726252925510452, + "grad_norm": 0.7267464399337769, + "learning_rate": 3.504919314369591e-05, + "loss": 2.3906, + "step": 14530 + }, + { + "epoch": 1.172705996287628, + "grad_norm": 0.6927710175514221, + "learning_rate": 3.503719031121973e-05, + "loss": 2.4082, + "step": 14531 + }, + { + "epoch": 1.1727867000242111, + "grad_norm": 0.7231000065803528, + "learning_rate": 3.502518909778656e-05, + "loss": 2.4845, + "step": 14532 + }, + { + "epoch": 1.1728674037607942, + "grad_norm": 0.7087520360946655, + "learning_rate": 3.5013189503695544e-05, + "loss": 2.4622, + "step": 14533 + }, + { + "epoch": 1.172948107497377, + "grad_norm": 0.6669846177101135, + "learning_rate": 3.5001191529245716e-05, + "loss": 2.4151, + "step": 14534 + }, + { + "epoch": 1.1730288112339602, + "grad_norm": 0.7338447570800781, + "learning_rate": 3.4989195174736134e-05, + "loss": 2.4274, + "step": 14535 + }, + { + "epoch": 1.173109514970543, + "grad_norm": 0.7032054662704468, + "learning_rate": 3.497720044046572e-05, + "loss": 2.4066, + "step": 14536 + }, + { + "epoch": 1.1731902187071261, + "grad_norm": 0.6571083068847656, + "learning_rate": 3.496520732673344e-05, + "loss": 2.4581, + "step": 14537 + }, + { + "epoch": 1.1732709224437092, + "grad_norm": 0.6618444919586182, + "learning_rate": 3.495321583383819e-05, + "loss": 2.3675, + "step": 14538 + }, + { + "epoch": 1.173351626180292, + "grad_norm": 0.6597652435302734, + "learning_rate": 3.4941225962078885e-05, + "loss": 2.416, + "step": 14539 + }, + { + "epoch": 1.1734323299168752, + "grad_norm": 0.682634711265564, + "learning_rate": 3.492923771175425e-05, + "loss": 2.5081, + "step": 14540 + }, + { + "epoch": 1.173513033653458, + "grad_norm": 0.7046132683753967, + "learning_rate": 3.49172510831631e-05, + "loss": 2.4439, + "step": 14541 + }, + { + "epoch": 1.1735937373900411, + "grad_norm": 0.6734833717346191, + "learning_rate": 3.4905266076604196e-05, + "loss": 2.4348, + "step": 14542 + }, + { + "epoch": 1.1736744411266242, + "grad_norm": 0.6624744534492493, + "learning_rate": 3.4893282692376214e-05, + "loss": 2.4364, + "step": 14543 + }, + { + "epoch": 1.173755144863207, + "grad_norm": 0.8425754308700562, + "learning_rate": 3.4881300930777815e-05, + "loss": 2.4803, + "step": 14544 + }, + { + "epoch": 1.1738358485997902, + "grad_norm": 0.6438888311386108, + "learning_rate": 3.486932079210766e-05, + "loss": 2.3973, + "step": 14545 + }, + { + "epoch": 1.1739165523363733, + "grad_norm": 0.650399923324585, + "learning_rate": 3.485734227666424e-05, + "loss": 2.4183, + "step": 14546 + }, + { + "epoch": 1.1739972560729561, + "grad_norm": 0.6857002973556519, + "learning_rate": 3.4845365384746144e-05, + "loss": 2.4061, + "step": 14547 + }, + { + "epoch": 1.1740779598095392, + "grad_norm": 0.6680994629859924, + "learning_rate": 3.483339011665189e-05, + "loss": 2.421, + "step": 14548 + }, + { + "epoch": 1.1741586635461223, + "grad_norm": 0.6440950632095337, + "learning_rate": 3.482141647267987e-05, + "loss": 2.3914, + "step": 14549 + }, + { + "epoch": 1.1742393672827052, + "grad_norm": 0.7329740524291992, + "learning_rate": 3.480944445312853e-05, + "loss": 2.4805, + "step": 14550 + }, + { + "epoch": 1.1743200710192883, + "grad_norm": 0.6848189234733582, + "learning_rate": 3.4797474058296245e-05, + "loss": 2.3611, + "step": 14551 + }, + { + "epoch": 1.1744007747558711, + "grad_norm": 0.6994072794914246, + "learning_rate": 3.478550528848134e-05, + "loss": 2.5106, + "step": 14552 + }, + { + "epoch": 1.1744814784924542, + "grad_norm": 0.6826444268226624, + "learning_rate": 3.477353814398212e-05, + "loss": 2.467, + "step": 14553 + }, + { + "epoch": 1.1745621822290373, + "grad_norm": 0.6658408045768738, + "learning_rate": 3.476157262509683e-05, + "loss": 2.423, + "step": 14554 + }, + { + "epoch": 1.1746428859656202, + "grad_norm": 0.6963697075843811, + "learning_rate": 3.474960873212372e-05, + "loss": 2.457, + "step": 14555 + }, + { + "epoch": 1.1747235897022033, + "grad_norm": 0.7574479579925537, + "learning_rate": 3.4737646465360894e-05, + "loss": 2.4292, + "step": 14556 + }, + { + "epoch": 1.1748042934387861, + "grad_norm": 0.7494931817054749, + "learning_rate": 3.472568582510652e-05, + "loss": 2.4395, + "step": 14557 + }, + { + "epoch": 1.1748849971753692, + "grad_norm": 0.7062687873840332, + "learning_rate": 3.471372681165872e-05, + "loss": 2.4561, + "step": 14558 + }, + { + "epoch": 1.1749657009119523, + "grad_norm": 0.6875349879264832, + "learning_rate": 3.4701769425315465e-05, + "loss": 2.4728, + "step": 14559 + }, + { + "epoch": 1.1750464046485352, + "grad_norm": 0.7009960412979126, + "learning_rate": 3.46898136663748e-05, + "loss": 2.5364, + "step": 14560 + }, + { + "epoch": 1.1751271083851182, + "grad_norm": 0.673791766166687, + "learning_rate": 3.467785953513475e-05, + "loss": 2.4611, + "step": 14561 + }, + { + "epoch": 1.1752078121217013, + "grad_norm": 0.7166882753372192, + "learning_rate": 3.4665907031893164e-05, + "loss": 2.4451, + "step": 14562 + }, + { + "epoch": 1.1752885158582842, + "grad_norm": 0.6868429780006409, + "learning_rate": 3.465395615694791e-05, + "loss": 2.4282, + "step": 14563 + }, + { + "epoch": 1.1753692195948673, + "grad_norm": 0.7212893962860107, + "learning_rate": 3.464200691059697e-05, + "loss": 2.4239, + "step": 14564 + }, + { + "epoch": 1.1754499233314502, + "grad_norm": 0.7213432192802429, + "learning_rate": 3.463005929313802e-05, + "loss": 2.4872, + "step": 14565 + }, + { + "epoch": 1.1755306270680332, + "grad_norm": 0.6805179119110107, + "learning_rate": 3.461811330486887e-05, + "loss": 2.4192, + "step": 14566 + }, + { + "epoch": 1.1756113308046163, + "grad_norm": 0.6746333241462708, + "learning_rate": 3.460616894608725e-05, + "loss": 2.3911, + "step": 14567 + }, + { + "epoch": 1.1756920345411992, + "grad_norm": 0.7388630509376526, + "learning_rate": 3.459422621709088e-05, + "loss": 2.4758, + "step": 14568 + }, + { + "epoch": 1.1757727382777823, + "grad_norm": 0.7730274200439453, + "learning_rate": 3.458228511817731e-05, + "loss": 2.4159, + "step": 14569 + }, + { + "epoch": 1.1758534420143651, + "grad_norm": 0.721075177192688, + "learning_rate": 3.457034564964422e-05, + "loss": 2.4673, + "step": 14570 + }, + { + "epoch": 1.1759341457509482, + "grad_norm": 0.6647645235061646, + "learning_rate": 3.4558407811789184e-05, + "loss": 2.395, + "step": 14571 + }, + { + "epoch": 1.1760148494875313, + "grad_norm": 0.7155466675758362, + "learning_rate": 3.454647160490965e-05, + "loss": 2.503, + "step": 14572 + }, + { + "epoch": 1.1760955532241142, + "grad_norm": 0.6789268851280212, + "learning_rate": 3.453453702930314e-05, + "loss": 2.401, + "step": 14573 + }, + { + "epoch": 1.1761762569606973, + "grad_norm": 0.7488093376159668, + "learning_rate": 3.4522604085267105e-05, + "loss": 2.4434, + "step": 14574 + }, + { + "epoch": 1.1762569606972804, + "grad_norm": 0.7954889535903931, + "learning_rate": 3.451067277309893e-05, + "loss": 2.5302, + "step": 14575 + }, + { + "epoch": 1.1763376644338632, + "grad_norm": 0.7008484601974487, + "learning_rate": 3.4498743093095975e-05, + "loss": 2.3935, + "step": 14576 + }, + { + "epoch": 1.1764183681704463, + "grad_norm": 0.6725437641143799, + "learning_rate": 3.448681504555561e-05, + "loss": 2.399, + "step": 14577 + }, + { + "epoch": 1.1764990719070294, + "grad_norm": 0.6778931617736816, + "learning_rate": 3.4474888630775026e-05, + "loss": 2.4178, + "step": 14578 + }, + { + "epoch": 1.1765797756436123, + "grad_norm": 0.7043762803077698, + "learning_rate": 3.44629638490515e-05, + "loss": 2.5581, + "step": 14579 + }, + { + "epoch": 1.1766604793801954, + "grad_norm": 0.6848085522651672, + "learning_rate": 3.445104070068227e-05, + "loss": 2.436, + "step": 14580 + }, + { + "epoch": 1.1767411831167782, + "grad_norm": 0.7504082322120667, + "learning_rate": 3.443911918596441e-05, + "loss": 2.4138, + "step": 14581 + }, + { + "epoch": 1.1768218868533613, + "grad_norm": 0.7441161870956421, + "learning_rate": 3.442719930519508e-05, + "loss": 2.4333, + "step": 14582 + }, + { + "epoch": 1.1769025905899444, + "grad_norm": 0.663894772529602, + "learning_rate": 3.4415281058671354e-05, + "loss": 2.4672, + "step": 14583 + }, + { + "epoch": 1.1769832943265273, + "grad_norm": 0.6814345121383667, + "learning_rate": 3.440336444669027e-05, + "loss": 2.4196, + "step": 14584 + }, + { + "epoch": 1.1770639980631104, + "grad_norm": 0.7566598057746887, + "learning_rate": 3.439144946954881e-05, + "loss": 2.4586, + "step": 14585 + }, + { + "epoch": 1.1771447017996932, + "grad_norm": 0.7324996590614319, + "learning_rate": 3.4379536127543934e-05, + "loss": 2.4286, + "step": 14586 + }, + { + "epoch": 1.1772254055362763, + "grad_norm": 0.6632608771324158, + "learning_rate": 3.436762442097259e-05, + "loss": 2.4713, + "step": 14587 + }, + { + "epoch": 1.1773061092728594, + "grad_norm": 0.7246156930923462, + "learning_rate": 3.4355714350131564e-05, + "loss": 2.4374, + "step": 14588 + }, + { + "epoch": 1.1773868130094423, + "grad_norm": 0.7096351981163025, + "learning_rate": 3.4343805915317737e-05, + "loss": 2.4649, + "step": 14589 + }, + { + "epoch": 1.1774675167460253, + "grad_norm": 0.7090620398521423, + "learning_rate": 3.433189911682793e-05, + "loss": 2.396, + "step": 14590 + }, + { + "epoch": 1.1775482204826084, + "grad_norm": 0.7782440185546875, + "learning_rate": 3.431999395495882e-05, + "loss": 2.4506, + "step": 14591 + }, + { + "epoch": 1.1776289242191913, + "grad_norm": 0.6933457851409912, + "learning_rate": 3.4308090430007155e-05, + "loss": 2.3985, + "step": 14592 + }, + { + "epoch": 1.1777096279557744, + "grad_norm": 0.6935414671897888, + "learning_rate": 3.429618854226959e-05, + "loss": 2.4372, + "step": 14593 + }, + { + "epoch": 1.1777903316923575, + "grad_norm": 0.6971156597137451, + "learning_rate": 3.428428829204276e-05, + "loss": 2.4837, + "step": 14594 + }, + { + "epoch": 1.1778710354289403, + "grad_norm": 0.6460022926330566, + "learning_rate": 3.427238967962325e-05, + "loss": 2.3742, + "step": 14595 + }, + { + "epoch": 1.1779517391655234, + "grad_norm": 0.6941941976547241, + "learning_rate": 3.426049270530763e-05, + "loss": 2.4706, + "step": 14596 + }, + { + "epoch": 1.1780324429021063, + "grad_norm": 0.7062166333198547, + "learning_rate": 3.424859736939236e-05, + "loss": 2.3893, + "step": 14597 + }, + { + "epoch": 1.1781131466386894, + "grad_norm": 0.6586433053016663, + "learning_rate": 3.42367036721739e-05, + "loss": 2.4385, + "step": 14598 + }, + { + "epoch": 1.1781938503752725, + "grad_norm": 0.6781242489814758, + "learning_rate": 3.422481161394869e-05, + "loss": 2.3876, + "step": 14599 + }, + { + "epoch": 1.1782745541118553, + "grad_norm": 0.710127592086792, + "learning_rate": 3.421292119501316e-05, + "loss": 2.4067, + "step": 14600 + }, + { + "epoch": 1.1783552578484384, + "grad_norm": 0.6856096982955933, + "learning_rate": 3.420103241566357e-05, + "loss": 2.4855, + "step": 14601 + }, + { + "epoch": 1.1784359615850213, + "grad_norm": 0.7173380851745605, + "learning_rate": 3.4189145276196245e-05, + "loss": 2.4871, + "step": 14602 + }, + { + "epoch": 1.1785166653216044, + "grad_norm": 0.6895382404327393, + "learning_rate": 3.417725977690745e-05, + "loss": 2.4066, + "step": 14603 + }, + { + "epoch": 1.1785973690581875, + "grad_norm": 0.7417690753936768, + "learning_rate": 3.416537591809341e-05, + "loss": 2.3779, + "step": 14604 + }, + { + "epoch": 1.1786780727947703, + "grad_norm": 0.7258411049842834, + "learning_rate": 3.4153493700050286e-05, + "loss": 2.4334, + "step": 14605 + }, + { + "epoch": 1.1787587765313534, + "grad_norm": 0.65704345703125, + "learning_rate": 3.414161312307427e-05, + "loss": 2.4531, + "step": 14606 + }, + { + "epoch": 1.1788394802679365, + "grad_norm": 0.6937118172645569, + "learning_rate": 3.4129734187461374e-05, + "loss": 2.4562, + "step": 14607 + }, + { + "epoch": 1.1789201840045194, + "grad_norm": 0.7331998348236084, + "learning_rate": 3.411785689350768e-05, + "loss": 2.4418, + "step": 14608 + }, + { + "epoch": 1.1790008877411025, + "grad_norm": 0.666582465171814, + "learning_rate": 3.410598124150924e-05, + "loss": 2.4154, + "step": 14609 + }, + { + "epoch": 1.1790815914776853, + "grad_norm": 0.6684321165084839, + "learning_rate": 3.409410723176197e-05, + "loss": 2.4155, + "step": 14610 + }, + { + "epoch": 1.1791622952142684, + "grad_norm": 0.6413382291793823, + "learning_rate": 3.408223486456184e-05, + "loss": 2.3924, + "step": 14611 + }, + { + "epoch": 1.1792429989508515, + "grad_norm": 0.7081305384635925, + "learning_rate": 3.407036414020475e-05, + "loss": 2.3811, + "step": 14612 + }, + { + "epoch": 1.1793237026874344, + "grad_norm": 0.7550063133239746, + "learning_rate": 3.405849505898645e-05, + "loss": 2.4425, + "step": 14613 + }, + { + "epoch": 1.1794044064240174, + "grad_norm": 0.677200198173523, + "learning_rate": 3.404662762120288e-05, + "loss": 2.5182, + "step": 14614 + }, + { + "epoch": 1.1794851101606003, + "grad_norm": 0.6829770803451538, + "learning_rate": 3.4034761827149745e-05, + "loss": 2.5068, + "step": 14615 + }, + { + "epoch": 1.1795658138971834, + "grad_norm": 0.7069409489631653, + "learning_rate": 3.4022897677122815e-05, + "loss": 2.4449, + "step": 14616 + }, + { + "epoch": 1.1796465176337665, + "grad_norm": 0.6604448556900024, + "learning_rate": 3.4011035171417696e-05, + "loss": 2.3996, + "step": 14617 + }, + { + "epoch": 1.1797272213703494, + "grad_norm": 0.6577324271202087, + "learning_rate": 3.3999174310330084e-05, + "loss": 2.4723, + "step": 14618 + }, + { + "epoch": 1.1798079251069324, + "grad_norm": 0.8159187436103821, + "learning_rate": 3.398731509415561e-05, + "loss": 2.4655, + "step": 14619 + }, + { + "epoch": 1.1798886288435155, + "grad_norm": 0.7170652747154236, + "learning_rate": 3.397545752318977e-05, + "loss": 2.5095, + "step": 14620 + }, + { + "epoch": 1.1799693325800984, + "grad_norm": 0.6865009665489197, + "learning_rate": 3.396360159772812e-05, + "loss": 2.4358, + "step": 14621 + }, + { + "epoch": 1.1800500363166815, + "grad_norm": 0.6485020518302917, + "learning_rate": 3.3951747318066175e-05, + "loss": 2.4576, + "step": 14622 + }, + { + "epoch": 1.1801307400532646, + "grad_norm": 0.6626582145690918, + "learning_rate": 3.39398946844993e-05, + "loss": 2.4824, + "step": 14623 + }, + { + "epoch": 1.1802114437898474, + "grad_norm": 0.718588650226593, + "learning_rate": 3.392804369732293e-05, + "loss": 2.4211, + "step": 14624 + }, + { + "epoch": 1.1802921475264305, + "grad_norm": 0.7449582815170288, + "learning_rate": 3.391619435683243e-05, + "loss": 2.444, + "step": 14625 + }, + { + "epoch": 1.1803728512630134, + "grad_norm": 0.6988492012023926, + "learning_rate": 3.3904346663323115e-05, + "loss": 2.4262, + "step": 14626 + }, + { + "epoch": 1.1804535549995965, + "grad_norm": 0.6779490113258362, + "learning_rate": 3.389250061709025e-05, + "loss": 2.4751, + "step": 14627 + }, + { + "epoch": 1.1805342587361796, + "grad_norm": 0.6883673667907715, + "learning_rate": 3.388065621842912e-05, + "loss": 2.4995, + "step": 14628 + }, + { + "epoch": 1.1806149624727624, + "grad_norm": 0.7112017273902893, + "learning_rate": 3.386881346763483e-05, + "loss": 2.4181, + "step": 14629 + }, + { + "epoch": 1.1806956662093455, + "grad_norm": 0.6960459351539612, + "learning_rate": 3.385697236500258e-05, + "loss": 2.4888, + "step": 14630 + }, + { + "epoch": 1.1807763699459284, + "grad_norm": 0.6874156594276428, + "learning_rate": 3.3845132910827484e-05, + "loss": 2.4175, + "step": 14631 + }, + { + "epoch": 1.1808570736825115, + "grad_norm": 0.7075642347335815, + "learning_rate": 3.383329510540463e-05, + "loss": 2.4315, + "step": 14632 + }, + { + "epoch": 1.1809377774190946, + "grad_norm": 0.674907386302948, + "learning_rate": 3.3821458949028995e-05, + "loss": 2.4216, + "step": 14633 + }, + { + "epoch": 1.1810184811556774, + "grad_norm": 0.7008463740348816, + "learning_rate": 3.380962444199559e-05, + "loss": 2.4114, + "step": 14634 + }, + { + "epoch": 1.1810991848922605, + "grad_norm": 0.6784217953681946, + "learning_rate": 3.379779158459937e-05, + "loss": 2.3663, + "step": 14635 + }, + { + "epoch": 1.1811798886288436, + "grad_norm": 0.7174829244613647, + "learning_rate": 3.378596037713525e-05, + "loss": 2.4582, + "step": 14636 + }, + { + "epoch": 1.1812605923654265, + "grad_norm": 0.7106035947799683, + "learning_rate": 3.3774130819898065e-05, + "loss": 2.5095, + "step": 14637 + }, + { + "epoch": 1.1813412961020096, + "grad_norm": 0.809107780456543, + "learning_rate": 3.3762302913182696e-05, + "loss": 2.4942, + "step": 14638 + }, + { + "epoch": 1.1814219998385926, + "grad_norm": 0.7150272727012634, + "learning_rate": 3.375047665728386e-05, + "loss": 2.378, + "step": 14639 + }, + { + "epoch": 1.1815027035751755, + "grad_norm": 0.7016271352767944, + "learning_rate": 3.373865205249632e-05, + "loss": 2.4393, + "step": 14640 + }, + { + "epoch": 1.1815834073117586, + "grad_norm": 0.6387282013893127, + "learning_rate": 3.372682909911481e-05, + "loss": 2.4399, + "step": 14641 + }, + { + "epoch": 1.1816641110483415, + "grad_norm": 0.834181010723114, + "learning_rate": 3.371500779743393e-05, + "loss": 2.4312, + "step": 14642 + }, + { + "epoch": 1.1817448147849245, + "grad_norm": 0.6690472960472107, + "learning_rate": 3.370318814774832e-05, + "loss": 2.407, + "step": 14643 + }, + { + "epoch": 1.1818255185215076, + "grad_norm": 0.6594302654266357, + "learning_rate": 3.369137015035256e-05, + "loss": 2.4275, + "step": 14644 + }, + { + "epoch": 1.1819062222580905, + "grad_norm": 0.7284699082374573, + "learning_rate": 3.3679553805541194e-05, + "loss": 2.3981, + "step": 14645 + }, + { + "epoch": 1.1819869259946736, + "grad_norm": 0.7109572291374207, + "learning_rate": 3.366773911360871e-05, + "loss": 2.4345, + "step": 14646 + }, + { + "epoch": 1.1820676297312565, + "grad_norm": 0.6874241828918457, + "learning_rate": 3.3655926074849566e-05, + "loss": 2.4488, + "step": 14647 + }, + { + "epoch": 1.1821483334678395, + "grad_norm": 0.6698973178863525, + "learning_rate": 3.364411468955819e-05, + "loss": 2.42, + "step": 14648 + }, + { + "epoch": 1.1822290372044226, + "grad_norm": 0.7816089391708374, + "learning_rate": 3.3632304958028915e-05, + "loss": 2.4638, + "step": 14649 + }, + { + "epoch": 1.1823097409410055, + "grad_norm": 0.6718220710754395, + "learning_rate": 3.3620496880556075e-05, + "loss": 2.413, + "step": 14650 + }, + { + "epoch": 1.1823904446775886, + "grad_norm": 0.753463089466095, + "learning_rate": 3.360869045743401e-05, + "loss": 2.3772, + "step": 14651 + }, + { + "epoch": 1.1824711484141717, + "grad_norm": 0.7031456828117371, + "learning_rate": 3.359688568895689e-05, + "loss": 2.4198, + "step": 14652 + }, + { + "epoch": 1.1825518521507545, + "grad_norm": 0.7857323288917542, + "learning_rate": 3.358508257541897e-05, + "loss": 2.4223, + "step": 14653 + }, + { + "epoch": 1.1826325558873376, + "grad_norm": 0.7779297828674316, + "learning_rate": 3.357328111711439e-05, + "loss": 2.5266, + "step": 14654 + }, + { + "epoch": 1.1827132596239207, + "grad_norm": 0.7382386326789856, + "learning_rate": 3.356148131433728e-05, + "loss": 2.4673, + "step": 14655 + }, + { + "epoch": 1.1827939633605036, + "grad_norm": 0.7868054509162903, + "learning_rate": 3.354968316738174e-05, + "loss": 2.4285, + "step": 14656 + }, + { + "epoch": 1.1828746670970867, + "grad_norm": 0.7007591724395752, + "learning_rate": 3.353788667654183e-05, + "loss": 2.4054, + "step": 14657 + }, + { + "epoch": 1.1829553708336695, + "grad_norm": 0.6627741456031799, + "learning_rate": 3.352609184211148e-05, + "loss": 2.4224, + "step": 14658 + }, + { + "epoch": 1.1830360745702526, + "grad_norm": 0.6865360736846924, + "learning_rate": 3.351429866438469e-05, + "loss": 2.4084, + "step": 14659 + }, + { + "epoch": 1.1831167783068357, + "grad_norm": 0.7572095990180969, + "learning_rate": 3.3502507143655404e-05, + "loss": 2.4339, + "step": 14660 + }, + { + "epoch": 1.1831974820434186, + "grad_norm": 0.6907969117164612, + "learning_rate": 3.349071728021743e-05, + "loss": 2.4578, + "step": 14661 + }, + { + "epoch": 1.1832781857800017, + "grad_norm": 0.6618743538856506, + "learning_rate": 3.347892907436465e-05, + "loss": 2.4131, + "step": 14662 + }, + { + "epoch": 1.1833588895165845, + "grad_norm": 0.777159571647644, + "learning_rate": 3.346714252639084e-05, + "loss": 2.419, + "step": 14663 + }, + { + "epoch": 1.1834395932531676, + "grad_norm": 0.666344165802002, + "learning_rate": 3.345535763658975e-05, + "loss": 2.4155, + "step": 14664 + }, + { + "epoch": 1.1835202969897507, + "grad_norm": 0.708848774433136, + "learning_rate": 3.3443574405255095e-05, + "loss": 2.4794, + "step": 14665 + }, + { + "epoch": 1.1836010007263336, + "grad_norm": 0.7247438430786133, + "learning_rate": 3.3431792832680555e-05, + "loss": 2.4445, + "step": 14666 + }, + { + "epoch": 1.1836817044629167, + "grad_norm": 0.6870034337043762, + "learning_rate": 3.342001291915978e-05, + "loss": 2.4309, + "step": 14667 + }, + { + "epoch": 1.1837624081994997, + "grad_norm": 0.7088049650192261, + "learning_rate": 3.340823466498629e-05, + "loss": 2.4456, + "step": 14668 + }, + { + "epoch": 1.1838431119360826, + "grad_norm": 0.695148229598999, + "learning_rate": 3.3396458070453676e-05, + "loss": 2.4018, + "step": 14669 + }, + { + "epoch": 1.1839238156726657, + "grad_norm": 0.7947117686271667, + "learning_rate": 3.3384683135855444e-05, + "loss": 2.4099, + "step": 14670 + }, + { + "epoch": 1.1840045194092486, + "grad_norm": 0.7268195748329163, + "learning_rate": 3.337290986148502e-05, + "loss": 2.3955, + "step": 14671 + }, + { + "epoch": 1.1840852231458316, + "grad_norm": 0.6932024955749512, + "learning_rate": 3.336113824763585e-05, + "loss": 2.4046, + "step": 14672 + }, + { + "epoch": 1.1841659268824147, + "grad_norm": 0.7408114671707153, + "learning_rate": 3.3349368294601334e-05, + "loss": 2.4186, + "step": 14673 + }, + { + "epoch": 1.1842466306189976, + "grad_norm": 0.6678428053855896, + "learning_rate": 3.3337600002674765e-05, + "loss": 2.4324, + "step": 14674 + }, + { + "epoch": 1.1843273343555807, + "grad_norm": 0.7221381664276123, + "learning_rate": 3.3325833372149416e-05, + "loss": 2.4474, + "step": 14675 + }, + { + "epoch": 1.1844080380921636, + "grad_norm": 0.6971224546432495, + "learning_rate": 3.3314068403318654e-05, + "loss": 2.4197, + "step": 14676 + }, + { + "epoch": 1.1844887418287466, + "grad_norm": 0.65053391456604, + "learning_rate": 3.3302305096475604e-05, + "loss": 2.4169, + "step": 14677 + }, + { + "epoch": 1.1845694455653297, + "grad_norm": 0.7231155633926392, + "learning_rate": 3.3290543451913457e-05, + "loss": 2.4222, + "step": 14678 + }, + { + "epoch": 1.1846501493019126, + "grad_norm": 0.6458824872970581, + "learning_rate": 3.3278783469925345e-05, + "loss": 2.422, + "step": 14679 + }, + { + "epoch": 1.1847308530384957, + "grad_norm": 0.6783488392829895, + "learning_rate": 3.32670251508044e-05, + "loss": 2.4231, + "step": 14680 + }, + { + "epoch": 1.1848115567750788, + "grad_norm": 0.6742293238639832, + "learning_rate": 3.3255268494843586e-05, + "loss": 2.409, + "step": 14681 + }, + { + "epoch": 1.1848922605116616, + "grad_norm": 0.7455186247825623, + "learning_rate": 3.3243513502335956e-05, + "loss": 2.4121, + "step": 14682 + }, + { + "epoch": 1.1849729642482447, + "grad_norm": 0.7042234539985657, + "learning_rate": 3.323176017357451e-05, + "loss": 2.4574, + "step": 14683 + }, + { + "epoch": 1.1850536679848278, + "grad_norm": 0.7897992134094238, + "learning_rate": 3.3220008508852094e-05, + "loss": 2.4796, + "step": 14684 + }, + { + "epoch": 1.1851343717214107, + "grad_norm": 0.6894058585166931, + "learning_rate": 3.3208258508461644e-05, + "loss": 2.4125, + "step": 14685 + }, + { + "epoch": 1.1852150754579938, + "grad_norm": 0.7574072480201721, + "learning_rate": 3.319651017269597e-05, + "loss": 2.4714, + "step": 14686 + }, + { + "epoch": 1.1852957791945766, + "grad_norm": 0.7457531094551086, + "learning_rate": 3.3184763501847905e-05, + "loss": 2.4793, + "step": 14687 + }, + { + "epoch": 1.1853764829311597, + "grad_norm": 0.6819709539413452, + "learning_rate": 3.317301849621018e-05, + "loss": 2.4563, + "step": 14688 + }, + { + "epoch": 1.1854571866677428, + "grad_norm": 0.6998026371002197, + "learning_rate": 3.316127515607555e-05, + "loss": 2.4548, + "step": 14689 + }, + { + "epoch": 1.1855378904043257, + "grad_norm": 0.7148768305778503, + "learning_rate": 3.314953348173664e-05, + "loss": 2.4897, + "step": 14690 + }, + { + "epoch": 1.1856185941409088, + "grad_norm": 0.6581987738609314, + "learning_rate": 3.31377934734861e-05, + "loss": 2.4683, + "step": 14691 + }, + { + "epoch": 1.1856992978774916, + "grad_norm": 0.7493093609809875, + "learning_rate": 3.312605513161653e-05, + "loss": 2.4564, + "step": 14692 + }, + { + "epoch": 1.1857800016140747, + "grad_norm": 0.7095562219619751, + "learning_rate": 3.311431845642051e-05, + "loss": 2.4595, + "step": 14693 + }, + { + "epoch": 1.1858607053506578, + "grad_norm": 0.8045323491096497, + "learning_rate": 3.310258344819047e-05, + "loss": 2.5044, + "step": 14694 + }, + { + "epoch": 1.1859414090872407, + "grad_norm": 0.7381219267845154, + "learning_rate": 3.3090850107218943e-05, + "loss": 2.415, + "step": 14695 + }, + { + "epoch": 1.1860221128238237, + "grad_norm": 0.6859883069992065, + "learning_rate": 3.307911843379832e-05, + "loss": 2.4314, + "step": 14696 + }, + { + "epoch": 1.1861028165604068, + "grad_norm": 0.7084196209907532, + "learning_rate": 3.306738842822099e-05, + "loss": 2.4404, + "step": 14697 + }, + { + "epoch": 1.1861835202969897, + "grad_norm": 0.6964806318283081, + "learning_rate": 3.305566009077932e-05, + "loss": 2.4391, + "step": 14698 + }, + { + "epoch": 1.1862642240335728, + "grad_norm": 0.7272049188613892, + "learning_rate": 3.304393342176562e-05, + "loss": 2.4395, + "step": 14699 + }, + { + "epoch": 1.1863449277701559, + "grad_norm": 0.6651458144187927, + "learning_rate": 3.303220842147209e-05, + "loss": 2.4059, + "step": 14700 + }, + { + "epoch": 1.1864256315067387, + "grad_norm": 0.7599130868911743, + "learning_rate": 3.302048509019099e-05, + "loss": 2.5044, + "step": 14701 + }, + { + "epoch": 1.1865063352433218, + "grad_norm": 0.6694391965866089, + "learning_rate": 3.3008763428214505e-05, + "loss": 2.4817, + "step": 14702 + }, + { + "epoch": 1.1865870389799047, + "grad_norm": 0.7176856398582458, + "learning_rate": 3.299704343583473e-05, + "loss": 2.4702, + "step": 14703 + }, + { + "epoch": 1.1866677427164878, + "grad_norm": 0.7133145332336426, + "learning_rate": 3.298532511334378e-05, + "loss": 2.4685, + "step": 14704 + }, + { + "epoch": 1.1867484464530709, + "grad_norm": 0.7170277833938599, + "learning_rate": 3.297360846103371e-05, + "loss": 2.4203, + "step": 14705 + }, + { + "epoch": 1.1868291501896537, + "grad_norm": 0.6853376626968384, + "learning_rate": 3.296189347919652e-05, + "loss": 2.4067, + "step": 14706 + }, + { + "epoch": 1.1869098539262368, + "grad_norm": 0.7269156575202942, + "learning_rate": 3.2950180168124175e-05, + "loss": 2.4211, + "step": 14707 + }, + { + "epoch": 1.1869905576628197, + "grad_norm": 0.8649005889892578, + "learning_rate": 3.2938468528108626e-05, + "loss": 2.4611, + "step": 14708 + }, + { + "epoch": 1.1870712613994028, + "grad_norm": 0.7256221771240234, + "learning_rate": 3.292675855944177e-05, + "loss": 2.4618, + "step": 14709 + }, + { + "epoch": 1.1871519651359859, + "grad_norm": 0.6854279637336731, + "learning_rate": 3.291505026241539e-05, + "loss": 2.4466, + "step": 14710 + }, + { + "epoch": 1.1872326688725687, + "grad_norm": 0.7182712554931641, + "learning_rate": 3.2903343637321316e-05, + "loss": 2.4847, + "step": 14711 + }, + { + "epoch": 1.1873133726091518, + "grad_norm": 0.6795300841331482, + "learning_rate": 3.289163868445134e-05, + "loss": 2.4407, + "step": 14712 + }, + { + "epoch": 1.187394076345735, + "grad_norm": 0.685146689414978, + "learning_rate": 3.287993540409713e-05, + "loss": 2.4537, + "step": 14713 + }, + { + "epoch": 1.1874747800823178, + "grad_norm": 0.7891005873680115, + "learning_rate": 3.2868233796550375e-05, + "loss": 2.4085, + "step": 14714 + }, + { + "epoch": 1.1875554838189009, + "grad_norm": 0.6521769762039185, + "learning_rate": 3.2856533862102724e-05, + "loss": 2.4174, + "step": 14715 + }, + { + "epoch": 1.1876361875554837, + "grad_norm": 0.7486612200737, + "learning_rate": 3.284483560104575e-05, + "loss": 2.4072, + "step": 14716 + }, + { + "epoch": 1.1877168912920668, + "grad_norm": 0.6895913481712341, + "learning_rate": 3.283313901367103e-05, + "loss": 2.4398, + "step": 14717 + }, + { + "epoch": 1.18779759502865, + "grad_norm": 0.6595678329467773, + "learning_rate": 3.282144410027009e-05, + "loss": 2.4407, + "step": 14718 + }, + { + "epoch": 1.1878782987652328, + "grad_norm": 0.7724249958992004, + "learning_rate": 3.280975086113435e-05, + "loss": 2.464, + "step": 14719 + }, + { + "epoch": 1.1879590025018159, + "grad_norm": 0.659472644329071, + "learning_rate": 3.279805929655524e-05, + "loss": 2.4774, + "step": 14720 + }, + { + "epoch": 1.1880397062383987, + "grad_norm": 0.7187919020652771, + "learning_rate": 3.27863694068242e-05, + "loss": 2.4767, + "step": 14721 + }, + { + "epoch": 1.1881204099749818, + "grad_norm": 0.7740198373794556, + "learning_rate": 3.2774681192232506e-05, + "loss": 2.4762, + "step": 14722 + }, + { + "epoch": 1.188201113711565, + "grad_norm": 0.700591504573822, + "learning_rate": 3.2762994653071464e-05, + "loss": 2.448, + "step": 14723 + }, + { + "epoch": 1.1882818174481478, + "grad_norm": 0.7168558239936829, + "learning_rate": 3.275130978963237e-05, + "loss": 2.4084, + "step": 14724 + }, + { + "epoch": 1.1883625211847308, + "grad_norm": 0.8039551973342896, + "learning_rate": 3.273962660220646e-05, + "loss": 2.3849, + "step": 14725 + }, + { + "epoch": 1.188443224921314, + "grad_norm": 0.6453016400337219, + "learning_rate": 3.27279450910848e-05, + "loss": 2.3856, + "step": 14726 + }, + { + "epoch": 1.1885239286578968, + "grad_norm": 0.7194651365280151, + "learning_rate": 3.2716265256558644e-05, + "loss": 2.4337, + "step": 14727 + }, + { + "epoch": 1.1886046323944799, + "grad_norm": 0.7298597097396851, + "learning_rate": 3.270458709891906e-05, + "loss": 2.4491, + "step": 14728 + }, + { + "epoch": 1.188685336131063, + "grad_norm": 0.7127524614334106, + "learning_rate": 3.269291061845705e-05, + "loss": 2.4319, + "step": 14729 + }, + { + "epoch": 1.1887660398676458, + "grad_norm": 0.6782705783843994, + "learning_rate": 3.2681235815463654e-05, + "loss": 2.4375, + "step": 14730 + }, + { + "epoch": 1.188846743604229, + "grad_norm": 0.7418326735496521, + "learning_rate": 3.266956269022987e-05, + "loss": 2.4149, + "step": 14731 + }, + { + "epoch": 1.1889274473408118, + "grad_norm": 0.7442455291748047, + "learning_rate": 3.265789124304654e-05, + "loss": 2.3935, + "step": 14732 + }, + { + "epoch": 1.1890081510773949, + "grad_norm": 0.7238253951072693, + "learning_rate": 3.264622147420461e-05, + "loss": 2.4592, + "step": 14733 + }, + { + "epoch": 1.189088854813978, + "grad_norm": 0.6488127708435059, + "learning_rate": 3.2634553383994925e-05, + "loss": 2.3468, + "step": 14734 + }, + { + "epoch": 1.1891695585505608, + "grad_norm": 0.7182446718215942, + "learning_rate": 3.2622886972708246e-05, + "loss": 2.4457, + "step": 14735 + }, + { + "epoch": 1.189250262287144, + "grad_norm": 0.6885523796081543, + "learning_rate": 3.261122224063534e-05, + "loss": 2.3943, + "step": 14736 + }, + { + "epoch": 1.1893309660237268, + "grad_norm": 0.653367817401886, + "learning_rate": 3.259955918806693e-05, + "loss": 2.4188, + "step": 14737 + }, + { + "epoch": 1.1894116697603099, + "grad_norm": 0.6968675851821899, + "learning_rate": 3.2587897815293686e-05, + "loss": 2.4276, + "step": 14738 + }, + { + "epoch": 1.189492373496893, + "grad_norm": 0.6827409267425537, + "learning_rate": 3.257623812260626e-05, + "loss": 2.4417, + "step": 14739 + }, + { + "epoch": 1.1895730772334758, + "grad_norm": 0.6807438731193542, + "learning_rate": 3.256458011029523e-05, + "loss": 2.4495, + "step": 14740 + }, + { + "epoch": 1.189653780970059, + "grad_norm": 0.6692882180213928, + "learning_rate": 3.255292377865116e-05, + "loss": 2.3789, + "step": 14741 + }, + { + "epoch": 1.189734484706642, + "grad_norm": 0.6581685543060303, + "learning_rate": 3.2541269127964515e-05, + "loss": 2.4073, + "step": 14742 + }, + { + "epoch": 1.1898151884432249, + "grad_norm": 0.6458544731140137, + "learning_rate": 3.252961615852578e-05, + "loss": 2.4657, + "step": 14743 + }, + { + "epoch": 1.189895892179808, + "grad_norm": 0.6971322298049927, + "learning_rate": 3.251796487062541e-05, + "loss": 2.4404, + "step": 14744 + }, + { + "epoch": 1.189976595916391, + "grad_norm": 0.6770374178886414, + "learning_rate": 3.2506315264553724e-05, + "loss": 2.4329, + "step": 14745 + }, + { + "epoch": 1.190057299652974, + "grad_norm": 0.7634715437889099, + "learning_rate": 3.2494667340601085e-05, + "loss": 2.4234, + "step": 14746 + }, + { + "epoch": 1.190138003389557, + "grad_norm": 0.7717967629432678, + "learning_rate": 3.24830210990578e-05, + "loss": 2.5009, + "step": 14747 + }, + { + "epoch": 1.1902187071261399, + "grad_norm": 0.7133559584617615, + "learning_rate": 3.2471376540214124e-05, + "loss": 2.4272, + "step": 14748 + }, + { + "epoch": 1.190299410862723, + "grad_norm": 0.7273291349411011, + "learning_rate": 3.245973366436027e-05, + "loss": 2.4174, + "step": 14749 + }, + { + "epoch": 1.190380114599306, + "grad_norm": 0.6955052614212036, + "learning_rate": 3.244809247178643e-05, + "loss": 2.3605, + "step": 14750 + }, + { + "epoch": 1.190460818335889, + "grad_norm": 0.7072615027427673, + "learning_rate": 3.2436452962782685e-05, + "loss": 2.4897, + "step": 14751 + }, + { + "epoch": 1.190541522072472, + "grad_norm": 0.7095344662666321, + "learning_rate": 3.242481513763913e-05, + "loss": 2.4172, + "step": 14752 + }, + { + "epoch": 1.1906222258090549, + "grad_norm": 0.7260944247245789, + "learning_rate": 3.2413178996645864e-05, + "loss": 2.4272, + "step": 14753 + }, + { + "epoch": 1.190702929545638, + "grad_norm": 0.6601141691207886, + "learning_rate": 3.2401544540092824e-05, + "loss": 2.4072, + "step": 14754 + }, + { + "epoch": 1.190783633282221, + "grad_norm": 0.6684936881065369, + "learning_rate": 3.238991176827e-05, + "loss": 2.3968, + "step": 14755 + }, + { + "epoch": 1.190864337018804, + "grad_norm": 0.7264483571052551, + "learning_rate": 3.23782806814673e-05, + "loss": 2.4263, + "step": 14756 + }, + { + "epoch": 1.190945040755387, + "grad_norm": 0.6927621960639954, + "learning_rate": 3.2366651279974614e-05, + "loss": 2.4495, + "step": 14757 + }, + { + "epoch": 1.19102574449197, + "grad_norm": 0.7007272243499756, + "learning_rate": 3.2355023564081775e-05, + "loss": 2.4373, + "step": 14758 + }, + { + "epoch": 1.191106448228553, + "grad_norm": 0.6756663918495178, + "learning_rate": 3.234339753407857e-05, + "loss": 2.4148, + "step": 14759 + }, + { + "epoch": 1.191187151965136, + "grad_norm": 0.6741094589233398, + "learning_rate": 3.233177319025479e-05, + "loss": 2.3976, + "step": 14760 + }, + { + "epoch": 1.1912678557017191, + "grad_norm": 0.7098578810691833, + "learning_rate": 3.2320150532900085e-05, + "loss": 2.4326, + "step": 14761 + }, + { + "epoch": 1.191348559438302, + "grad_norm": 0.750271737575531, + "learning_rate": 3.230852956230413e-05, + "loss": 2.4766, + "step": 14762 + }, + { + "epoch": 1.191429263174885, + "grad_norm": 0.68764728307724, + "learning_rate": 3.229691027875661e-05, + "loss": 2.4128, + "step": 14763 + }, + { + "epoch": 1.191509966911468, + "grad_norm": 0.656295657157898, + "learning_rate": 3.228529268254702e-05, + "loss": 2.3928, + "step": 14764 + }, + { + "epoch": 1.191590670648051, + "grad_norm": 0.6690353155136108, + "learning_rate": 3.2273676773964955e-05, + "loss": 2.408, + "step": 14765 + }, + { + "epoch": 1.1916713743846339, + "grad_norm": 0.8111640214920044, + "learning_rate": 3.22620625532999e-05, + "loss": 2.4644, + "step": 14766 + }, + { + "epoch": 1.191752078121217, + "grad_norm": 0.7329768538475037, + "learning_rate": 3.2250450020841316e-05, + "loss": 2.4235, + "step": 14767 + }, + { + "epoch": 1.1918327818578, + "grad_norm": 0.6902688145637512, + "learning_rate": 3.223883917687861e-05, + "loss": 2.3883, + "step": 14768 + }, + { + "epoch": 1.191913485594383, + "grad_norm": 0.797249972820282, + "learning_rate": 3.2227230021701205e-05, + "loss": 2.523, + "step": 14769 + }, + { + "epoch": 1.191994189330966, + "grad_norm": 0.6294408440589905, + "learning_rate": 3.221562255559834e-05, + "loss": 2.4156, + "step": 14770 + }, + { + "epoch": 1.192074893067549, + "grad_norm": 0.7326164245605469, + "learning_rate": 3.220401677885936e-05, + "loss": 2.3828, + "step": 14771 + }, + { + "epoch": 1.192155596804132, + "grad_norm": 0.783747673034668, + "learning_rate": 3.219241269177351e-05, + "loss": 2.4321, + "step": 14772 + }, + { + "epoch": 1.192236300540715, + "grad_norm": 0.7415335178375244, + "learning_rate": 3.2180810294630005e-05, + "loss": 2.4446, + "step": 14773 + }, + { + "epoch": 1.1923170042772981, + "grad_norm": 0.7125591039657593, + "learning_rate": 3.2169209587717966e-05, + "loss": 2.3914, + "step": 14774 + }, + { + "epoch": 1.192397708013881, + "grad_norm": 0.6714075207710266, + "learning_rate": 3.215761057132652e-05, + "loss": 2.3918, + "step": 14775 + }, + { + "epoch": 1.192478411750464, + "grad_norm": 0.7147830724716187, + "learning_rate": 3.214601324574481e-05, + "loss": 2.4389, + "step": 14776 + }, + { + "epoch": 1.192559115487047, + "grad_norm": 0.6780480146408081, + "learning_rate": 3.2134417611261755e-05, + "loss": 2.4119, + "step": 14777 + }, + { + "epoch": 1.19263981922363, + "grad_norm": 0.7473881840705872, + "learning_rate": 3.212282366816645e-05, + "loss": 2.4547, + "step": 14778 + }, + { + "epoch": 1.1927205229602131, + "grad_norm": 0.7418377995491028, + "learning_rate": 3.211123141674784e-05, + "loss": 2.4156, + "step": 14779 + }, + { + "epoch": 1.192801226696796, + "grad_norm": 0.687524139881134, + "learning_rate": 3.209964085729477e-05, + "loss": 2.4309, + "step": 14780 + }, + { + "epoch": 1.192881930433379, + "grad_norm": 0.6965883374214172, + "learning_rate": 3.208805199009615e-05, + "loss": 2.4028, + "step": 14781 + }, + { + "epoch": 1.192962634169962, + "grad_norm": 0.7024682760238647, + "learning_rate": 3.207646481544082e-05, + "loss": 2.4482, + "step": 14782 + }, + { + "epoch": 1.193043337906545, + "grad_norm": 0.6835834383964539, + "learning_rate": 3.2064879333617514e-05, + "loss": 2.3898, + "step": 14783 + }, + { + "epoch": 1.1931240416431281, + "grad_norm": 0.7002003788948059, + "learning_rate": 3.2053295544915e-05, + "loss": 2.487, + "step": 14784 + }, + { + "epoch": 1.193204745379711, + "grad_norm": 0.7128168940544128, + "learning_rate": 3.2041713449622e-05, + "loss": 2.4591, + "step": 14785 + }, + { + "epoch": 1.193285449116294, + "grad_norm": 0.6897242665290833, + "learning_rate": 3.203013304802712e-05, + "loss": 2.4458, + "step": 14786 + }, + { + "epoch": 1.1933661528528772, + "grad_norm": 0.7281817197799683, + "learning_rate": 3.2018554340419004e-05, + "loss": 2.3772, + "step": 14787 + }, + { + "epoch": 1.19344685658946, + "grad_norm": 0.6956086754798889, + "learning_rate": 3.200697732708619e-05, + "loss": 2.4316, + "step": 14788 + }, + { + "epoch": 1.1935275603260431, + "grad_norm": 0.7679805159568787, + "learning_rate": 3.199540200831729e-05, + "loss": 2.4464, + "step": 14789 + }, + { + "epoch": 1.1936082640626262, + "grad_norm": 0.6993041634559631, + "learning_rate": 3.19838283844007e-05, + "loss": 2.3881, + "step": 14790 + }, + { + "epoch": 1.193688967799209, + "grad_norm": 0.689618706703186, + "learning_rate": 3.197225645562493e-05, + "loss": 2.4184, + "step": 14791 + }, + { + "epoch": 1.1937696715357922, + "grad_norm": 0.6896520853042603, + "learning_rate": 3.1960686222278354e-05, + "loss": 2.4484, + "step": 14792 + }, + { + "epoch": 1.193850375272375, + "grad_norm": 0.6743811368942261, + "learning_rate": 3.1949117684649334e-05, + "loss": 2.4636, + "step": 14793 + }, + { + "epoch": 1.1939310790089581, + "grad_norm": 0.7028046250343323, + "learning_rate": 3.1937550843026163e-05, + "loss": 2.4576, + "step": 14794 + }, + { + "epoch": 1.1940117827455412, + "grad_norm": 0.7219679951667786, + "learning_rate": 3.192598569769718e-05, + "loss": 2.4495, + "step": 14795 + }, + { + "epoch": 1.194092486482124, + "grad_norm": 0.731438159942627, + "learning_rate": 3.191442224895056e-05, + "loss": 2.4699, + "step": 14796 + }, + { + "epoch": 1.1941731902187072, + "grad_norm": 0.6731431484222412, + "learning_rate": 3.19028604970745e-05, + "loss": 2.4292, + "step": 14797 + }, + { + "epoch": 1.19425389395529, + "grad_norm": 0.6720147728919983, + "learning_rate": 3.1891300442357174e-05, + "loss": 2.4482, + "step": 14798 + }, + { + "epoch": 1.1943345976918731, + "grad_norm": 0.7504273653030396, + "learning_rate": 3.187974208508667e-05, + "loss": 2.4233, + "step": 14799 + }, + { + "epoch": 1.1944153014284562, + "grad_norm": 0.6882641315460205, + "learning_rate": 3.186818542555108e-05, + "loss": 2.4633, + "step": 14800 + }, + { + "epoch": 1.194496005165039, + "grad_norm": 0.7337899208068848, + "learning_rate": 3.1856630464038385e-05, + "loss": 2.4257, + "step": 14801 + }, + { + "epoch": 1.1945767089016222, + "grad_norm": 0.7026493549346924, + "learning_rate": 3.1845077200836636e-05, + "loss": 2.482, + "step": 14802 + }, + { + "epoch": 1.1946574126382052, + "grad_norm": 0.763351321220398, + "learning_rate": 3.1833525636233675e-05, + "loss": 2.4428, + "step": 14803 + }, + { + "epoch": 1.194738116374788, + "grad_norm": 0.6568076610565186, + "learning_rate": 3.182197577051745e-05, + "loss": 2.4373, + "step": 14804 + }, + { + "epoch": 1.1948188201113712, + "grad_norm": 0.6954717040061951, + "learning_rate": 3.1810427603975844e-05, + "loss": 2.4582, + "step": 14805 + }, + { + "epoch": 1.1948995238479543, + "grad_norm": 0.7130215167999268, + "learning_rate": 3.179888113689661e-05, + "loss": 2.443, + "step": 14806 + }, + { + "epoch": 1.1949802275845371, + "grad_norm": 0.6789865493774414, + "learning_rate": 3.178733636956752e-05, + "loss": 2.4138, + "step": 14807 + }, + { + "epoch": 1.1950609313211202, + "grad_norm": 0.7725361585617065, + "learning_rate": 3.177579330227633e-05, + "loss": 2.4783, + "step": 14808 + }, + { + "epoch": 1.195141635057703, + "grad_norm": 0.6952371001243591, + "learning_rate": 3.17642519353107e-05, + "loss": 2.4571, + "step": 14809 + }, + { + "epoch": 1.1952223387942862, + "grad_norm": 0.7541885375976562, + "learning_rate": 3.1752712268958275e-05, + "loss": 2.4075, + "step": 14810 + }, + { + "epoch": 1.1953030425308693, + "grad_norm": 0.6974624395370483, + "learning_rate": 3.174117430350671e-05, + "loss": 2.4525, + "step": 14811 + }, + { + "epoch": 1.1953837462674521, + "grad_norm": 0.7293709516525269, + "learning_rate": 3.172963803924347e-05, + "loss": 2.4646, + "step": 14812 + }, + { + "epoch": 1.1954644500040352, + "grad_norm": 0.6944144368171692, + "learning_rate": 3.1718103476456106e-05, + "loss": 2.462, + "step": 14813 + }, + { + "epoch": 1.195545153740618, + "grad_norm": 0.6415363550186157, + "learning_rate": 3.170657061543214e-05, + "loss": 2.4086, + "step": 14814 + }, + { + "epoch": 1.1956258574772012, + "grad_norm": 0.6511349081993103, + "learning_rate": 3.169503945645892e-05, + "loss": 2.4376, + "step": 14815 + }, + { + "epoch": 1.1957065612137843, + "grad_norm": 0.7420210242271423, + "learning_rate": 3.1683509999823854e-05, + "loss": 2.4317, + "step": 14816 + }, + { + "epoch": 1.1957872649503671, + "grad_norm": 0.7291967272758484, + "learning_rate": 3.1671982245814316e-05, + "loss": 2.4369, + "step": 14817 + }, + { + "epoch": 1.1958679686869502, + "grad_norm": 0.685743510723114, + "learning_rate": 3.166045619471758e-05, + "loss": 2.465, + "step": 14818 + }, + { + "epoch": 1.1959486724235333, + "grad_norm": 0.7130060195922852, + "learning_rate": 3.164893184682093e-05, + "loss": 2.4305, + "step": 14819 + }, + { + "epoch": 1.1960293761601162, + "grad_norm": 0.694508969783783, + "learning_rate": 3.163740920241156e-05, + "loss": 2.4278, + "step": 14820 + }, + { + "epoch": 1.1961100798966993, + "grad_norm": 0.6478514075279236, + "learning_rate": 3.162588826177669e-05, + "loss": 2.4721, + "step": 14821 + }, + { + "epoch": 1.1961907836332821, + "grad_norm": 0.6586465835571289, + "learning_rate": 3.1614369025203386e-05, + "loss": 2.4716, + "step": 14822 + }, + { + "epoch": 1.1962714873698652, + "grad_norm": 0.7558106184005737, + "learning_rate": 3.160285149297876e-05, + "loss": 2.4656, + "step": 14823 + }, + { + "epoch": 1.1963521911064483, + "grad_norm": 0.7208340764045715, + "learning_rate": 3.1591335665389896e-05, + "loss": 2.4374, + "step": 14824 + }, + { + "epoch": 1.1964328948430312, + "grad_norm": 0.70301353931427, + "learning_rate": 3.157982154272375e-05, + "loss": 2.397, + "step": 14825 + }, + { + "epoch": 1.1965135985796143, + "grad_norm": 0.6857609152793884, + "learning_rate": 3.15683091252673e-05, + "loss": 2.4258, + "step": 14826 + }, + { + "epoch": 1.1965943023161971, + "grad_norm": 0.6954602003097534, + "learning_rate": 3.155679841330747e-05, + "loss": 2.4566, + "step": 14827 + }, + { + "epoch": 1.1966750060527802, + "grad_norm": 0.6923913955688477, + "learning_rate": 3.154528940713113e-05, + "loss": 2.4, + "step": 14828 + }, + { + "epoch": 1.1967557097893633, + "grad_norm": 0.6641134023666382, + "learning_rate": 3.1533782107025124e-05, + "loss": 2.4721, + "step": 14829 + }, + { + "epoch": 1.1968364135259462, + "grad_norm": 0.7470134496688843, + "learning_rate": 3.152227651327627e-05, + "loss": 2.4253, + "step": 14830 + }, + { + "epoch": 1.1969171172625293, + "grad_norm": 0.7234545350074768, + "learning_rate": 3.151077262617126e-05, + "loss": 2.4109, + "step": 14831 + }, + { + "epoch": 1.1969978209991123, + "grad_norm": 0.7814013957977295, + "learning_rate": 3.149927044599682e-05, + "loss": 2.4522, + "step": 14832 + }, + { + "epoch": 1.1970785247356952, + "grad_norm": 0.6825435161590576, + "learning_rate": 3.1487769973039624e-05, + "loss": 2.4728, + "step": 14833 + }, + { + "epoch": 1.1971592284722783, + "grad_norm": 0.7091361880302429, + "learning_rate": 3.147627120758634e-05, + "loss": 2.4615, + "step": 14834 + }, + { + "epoch": 1.1972399322088614, + "grad_norm": 0.7271433472633362, + "learning_rate": 3.146477414992346e-05, + "loss": 2.4154, + "step": 14835 + }, + { + "epoch": 1.1973206359454442, + "grad_norm": 0.6557306051254272, + "learning_rate": 3.145327880033756e-05, + "loss": 2.4348, + "step": 14836 + }, + { + "epoch": 1.1974013396820273, + "grad_norm": 0.6667891144752502, + "learning_rate": 3.1441785159115166e-05, + "loss": 2.4123, + "step": 14837 + }, + { + "epoch": 1.1974820434186102, + "grad_norm": 0.6755266189575195, + "learning_rate": 3.143029322654266e-05, + "loss": 2.4287, + "step": 14838 + }, + { + "epoch": 1.1975627471551933, + "grad_norm": 0.7647396922111511, + "learning_rate": 3.1418803002906475e-05, + "loss": 2.4343, + "step": 14839 + }, + { + "epoch": 1.1976434508917764, + "grad_norm": 0.7288243174552917, + "learning_rate": 3.140731448849305e-05, + "loss": 2.4536, + "step": 14840 + }, + { + "epoch": 1.1977241546283592, + "grad_norm": 0.6126244068145752, + "learning_rate": 3.1395827683588605e-05, + "loss": 2.4187, + "step": 14841 + }, + { + "epoch": 1.1978048583649423, + "grad_norm": 0.6773896217346191, + "learning_rate": 3.138434258847948e-05, + "loss": 2.3916, + "step": 14842 + }, + { + "epoch": 1.1978855621015252, + "grad_norm": 0.724413275718689, + "learning_rate": 3.1372859203451934e-05, + "loss": 2.4614, + "step": 14843 + }, + { + "epoch": 1.1979662658381083, + "grad_norm": 0.7043039798736572, + "learning_rate": 3.136137752879209e-05, + "loss": 2.4343, + "step": 14844 + }, + { + "epoch": 1.1980469695746914, + "grad_norm": 0.7543383240699768, + "learning_rate": 3.134989756478615e-05, + "loss": 2.4345, + "step": 14845 + }, + { + "epoch": 1.1981276733112742, + "grad_norm": 0.7193408608436584, + "learning_rate": 3.1338419311720244e-05, + "loss": 2.4728, + "step": 14846 + }, + { + "epoch": 1.1982083770478573, + "grad_norm": 0.8090186715126038, + "learning_rate": 3.132694276988038e-05, + "loss": 2.4246, + "step": 14847 + }, + { + "epoch": 1.1982890807844404, + "grad_norm": 0.7154600620269775, + "learning_rate": 3.131546793955261e-05, + "loss": 2.4061, + "step": 14848 + }, + { + "epoch": 1.1983697845210233, + "grad_norm": 0.6987032890319824, + "learning_rate": 3.130399482102293e-05, + "loss": 2.4525, + "step": 14849 + }, + { + "epoch": 1.1984504882576064, + "grad_norm": 0.7123507261276245, + "learning_rate": 3.129252341457727e-05, + "loss": 2.4017, + "step": 14850 + }, + { + "epoch": 1.1985311919941894, + "grad_norm": 0.6475987434387207, + "learning_rate": 3.128105372050153e-05, + "loss": 2.4617, + "step": 14851 + }, + { + "epoch": 1.1986118957307723, + "grad_norm": 0.6799046993255615, + "learning_rate": 3.126958573908156e-05, + "loss": 2.4337, + "step": 14852 + }, + { + "epoch": 1.1986925994673554, + "grad_norm": 0.6910607218742371, + "learning_rate": 3.125811947060322e-05, + "loss": 2.415, + "step": 14853 + }, + { + "epoch": 1.1987733032039383, + "grad_norm": 0.6879963278770447, + "learning_rate": 3.124665491535219e-05, + "loss": 2.4912, + "step": 14854 + }, + { + "epoch": 1.1988540069405214, + "grad_norm": 0.7038810849189758, + "learning_rate": 3.123519207361425e-05, + "loss": 2.4528, + "step": 14855 + }, + { + "epoch": 1.1989347106771044, + "grad_norm": 0.6771957278251648, + "learning_rate": 3.1223730945675104e-05, + "loss": 2.4524, + "step": 14856 + }, + { + "epoch": 1.1990154144136873, + "grad_norm": 0.7529320120811462, + "learning_rate": 3.1212271531820336e-05, + "loss": 2.4667, + "step": 14857 + }, + { + "epoch": 1.1990961181502704, + "grad_norm": 0.6498474478721619, + "learning_rate": 3.1200813832335574e-05, + "loss": 2.3863, + "step": 14858 + }, + { + "epoch": 1.1991768218868533, + "grad_norm": 0.7587705850601196, + "learning_rate": 3.1189357847506383e-05, + "loss": 2.4962, + "step": 14859 + }, + { + "epoch": 1.1992575256234363, + "grad_norm": 0.674013078212738, + "learning_rate": 3.117790357761825e-05, + "loss": 2.3939, + "step": 14860 + }, + { + "epoch": 1.1993382293600194, + "grad_norm": 0.6546844840049744, + "learning_rate": 3.116645102295668e-05, + "loss": 2.4775, + "step": 14861 + }, + { + "epoch": 1.1994189330966023, + "grad_norm": 0.7558320760726929, + "learning_rate": 3.11550001838071e-05, + "loss": 2.3918, + "step": 14862 + }, + { + "epoch": 1.1994996368331854, + "grad_norm": 0.7074883580207825, + "learning_rate": 3.114355106045486e-05, + "loss": 2.3969, + "step": 14863 + }, + { + "epoch": 1.1995803405697685, + "grad_norm": 0.706078290939331, + "learning_rate": 3.1132103653185305e-05, + "loss": 2.5028, + "step": 14864 + }, + { + "epoch": 1.1996610443063513, + "grad_norm": 0.6883544921875, + "learning_rate": 3.1120657962283764e-05, + "loss": 2.4407, + "step": 14865 + }, + { + "epoch": 1.1997417480429344, + "grad_norm": 0.6905466914176941, + "learning_rate": 3.110921398803551e-05, + "loss": 2.3893, + "step": 14866 + }, + { + "epoch": 1.1998224517795173, + "grad_norm": 0.6584910154342651, + "learning_rate": 3.109777173072569e-05, + "loss": 2.4515, + "step": 14867 + }, + { + "epoch": 1.1999031555161004, + "grad_norm": 0.6957471370697021, + "learning_rate": 3.108633119063951e-05, + "loss": 2.4483, + "step": 14868 + }, + { + "epoch": 1.1999838592526835, + "grad_norm": 0.6716276407241821, + "learning_rate": 3.1074892368062095e-05, + "loss": 2.4298, + "step": 14869 + }, + { + "epoch": 1.2000645629892663, + "grad_norm": 0.7350820302963257, + "learning_rate": 3.1063455263278543e-05, + "loss": 2.4088, + "step": 14870 + }, + { + "epoch": 1.2001452667258494, + "grad_norm": 0.7409771680831909, + "learning_rate": 3.105201987657388e-05, + "loss": 2.4089, + "step": 14871 + }, + { + "epoch": 1.2002259704624323, + "grad_norm": 0.7273266911506653, + "learning_rate": 3.104058620823315e-05, + "loss": 2.5149, + "step": 14872 + }, + { + "epoch": 1.2003066741990154, + "grad_norm": 0.6793962717056274, + "learning_rate": 3.102915425854124e-05, + "loss": 2.4422, + "step": 14873 + }, + { + "epoch": 1.2003873779355985, + "grad_norm": 0.72386234998703, + "learning_rate": 3.101772402778309e-05, + "loss": 2.4756, + "step": 14874 + }, + { + "epoch": 1.2004680816721813, + "grad_norm": 0.6530055999755859, + "learning_rate": 3.1006295516243625e-05, + "loss": 2.4145, + "step": 14875 + }, + { + "epoch": 1.2005487854087644, + "grad_norm": 0.7288365960121155, + "learning_rate": 3.099486872420758e-05, + "loss": 2.4565, + "step": 14876 + }, + { + "epoch": 1.2006294891453475, + "grad_norm": 0.6982102394104004, + "learning_rate": 3.09834436519598e-05, + "loss": 2.4788, + "step": 14877 + }, + { + "epoch": 1.2007101928819304, + "grad_norm": 0.7208256125450134, + "learning_rate": 3.0972020299785007e-05, + "loss": 2.4186, + "step": 14878 + }, + { + "epoch": 1.2007908966185135, + "grad_norm": 0.6928278803825378, + "learning_rate": 3.096059866796791e-05, + "loss": 2.4177, + "step": 14879 + }, + { + "epoch": 1.2008716003550965, + "grad_norm": 0.7145438194274902, + "learning_rate": 3.094917875679317e-05, + "loss": 2.4796, + "step": 14880 + }, + { + "epoch": 1.2009523040916794, + "grad_norm": 0.7126322388648987, + "learning_rate": 3.093776056654539e-05, + "loss": 2.4926, + "step": 14881 + }, + { + "epoch": 1.2010330078282625, + "grad_norm": 0.7775046825408936, + "learning_rate": 3.092634409750919e-05, + "loss": 2.4386, + "step": 14882 + }, + { + "epoch": 1.2011137115648454, + "grad_norm": 0.6387330889701843, + "learning_rate": 3.091492934996901e-05, + "loss": 2.4302, + "step": 14883 + }, + { + "epoch": 1.2011944153014285, + "grad_norm": 0.6883525252342224, + "learning_rate": 3.090351632420939e-05, + "loss": 2.4644, + "step": 14884 + }, + { + "epoch": 1.2012751190380115, + "grad_norm": 0.6698900461196899, + "learning_rate": 3.0892105020514795e-05, + "loss": 2.414, + "step": 14885 + }, + { + "epoch": 1.2013558227745944, + "grad_norm": 0.7124409079551697, + "learning_rate": 3.088069543916956e-05, + "loss": 2.4275, + "step": 14886 + }, + { + "epoch": 1.2014365265111775, + "grad_norm": 0.6996601223945618, + "learning_rate": 3.0869287580458076e-05, + "loss": 2.4725, + "step": 14887 + }, + { + "epoch": 1.2015172302477604, + "grad_norm": 0.653087317943573, + "learning_rate": 3.085788144466468e-05, + "loss": 2.383, + "step": 14888 + }, + { + "epoch": 1.2015979339843434, + "grad_norm": 0.7426899671554565, + "learning_rate": 3.0846477032073554e-05, + "loss": 2.4064, + "step": 14889 + }, + { + "epoch": 1.2016786377209265, + "grad_norm": 0.6417646408081055, + "learning_rate": 3.083507434296903e-05, + "loss": 2.3964, + "step": 14890 + }, + { + "epoch": 1.2017593414575094, + "grad_norm": 0.6301923394203186, + "learning_rate": 3.0823673377635274e-05, + "loss": 2.4285, + "step": 14891 + }, + { + "epoch": 1.2018400451940925, + "grad_norm": 0.7621259093284607, + "learning_rate": 3.081227413635638e-05, + "loss": 2.4731, + "step": 14892 + }, + { + "epoch": 1.2019207489306756, + "grad_norm": 0.6637598872184753, + "learning_rate": 3.080087661941648e-05, + "loss": 2.4126, + "step": 14893 + }, + { + "epoch": 1.2020014526672584, + "grad_norm": 0.6820287108421326, + "learning_rate": 3.078948082709964e-05, + "loss": 2.4108, + "step": 14894 + }, + { + "epoch": 1.2020821564038415, + "grad_norm": 0.7090989351272583, + "learning_rate": 3.077808675968983e-05, + "loss": 2.4678, + "step": 14895 + }, + { + "epoch": 1.2021628601404246, + "grad_norm": 0.7242181897163391, + "learning_rate": 3.076669441747105e-05, + "loss": 2.5346, + "step": 14896 + }, + { + "epoch": 1.2022435638770075, + "grad_norm": 0.7790088653564453, + "learning_rate": 3.075530380072722e-05, + "loss": 2.4436, + "step": 14897 + }, + { + "epoch": 1.2023242676135906, + "grad_norm": 0.6828821301460266, + "learning_rate": 3.074391490974225e-05, + "loss": 2.3767, + "step": 14898 + }, + { + "epoch": 1.2024049713501734, + "grad_norm": 0.709815502166748, + "learning_rate": 3.0732527744799945e-05, + "loss": 2.4139, + "step": 14899 + }, + { + "epoch": 1.2024856750867565, + "grad_norm": 0.6561180353164673, + "learning_rate": 3.07211423061841e-05, + "loss": 2.399, + "step": 14900 + }, + { + "epoch": 1.2025663788233396, + "grad_norm": 0.7122004628181458, + "learning_rate": 3.0709758594178495e-05, + "loss": 2.4314, + "step": 14901 + }, + { + "epoch": 1.2026470825599225, + "grad_norm": 0.6817516684532166, + "learning_rate": 3.0698376609066825e-05, + "loss": 2.4241, + "step": 14902 + }, + { + "epoch": 1.2027277862965056, + "grad_norm": 0.6848475337028503, + "learning_rate": 3.068699635113277e-05, + "loss": 2.4583, + "step": 14903 + }, + { + "epoch": 1.2028084900330884, + "grad_norm": 0.6567823886871338, + "learning_rate": 3.067561782065999e-05, + "loss": 2.3818, + "step": 14904 + }, + { + "epoch": 1.2028891937696715, + "grad_norm": 0.7373961806297302, + "learning_rate": 3.066424101793198e-05, + "loss": 2.4075, + "step": 14905 + }, + { + "epoch": 1.2029698975062546, + "grad_norm": 0.6968079209327698, + "learning_rate": 3.0652865943232346e-05, + "loss": 2.4701, + "step": 14906 + }, + { + "epoch": 1.2030506012428375, + "grad_norm": 0.7356292009353638, + "learning_rate": 3.064149259684459e-05, + "loss": 2.4188, + "step": 14907 + }, + { + "epoch": 1.2031313049794206, + "grad_norm": 0.7144857048988342, + "learning_rate": 3.063012097905211e-05, + "loss": 2.4411, + "step": 14908 + }, + { + "epoch": 1.2032120087160036, + "grad_norm": 0.734531044960022, + "learning_rate": 3.0618751090138365e-05, + "loss": 2.4595, + "step": 14909 + }, + { + "epoch": 1.2032927124525865, + "grad_norm": 0.6658234000205994, + "learning_rate": 3.060738293038669e-05, + "loss": 2.4206, + "step": 14910 + }, + { + "epoch": 1.2033734161891696, + "grad_norm": 0.678424596786499, + "learning_rate": 3.059601650008044e-05, + "loss": 2.4704, + "step": 14911 + }, + { + "epoch": 1.2034541199257527, + "grad_norm": 0.6852440237998962, + "learning_rate": 3.058465179950287e-05, + "loss": 2.46, + "step": 14912 + }, + { + "epoch": 1.2035348236623356, + "grad_norm": 0.702881395816803, + "learning_rate": 3.057328882893724e-05, + "loss": 2.4372, + "step": 14913 + }, + { + "epoch": 1.2036155273989186, + "grad_norm": 0.6978999972343445, + "learning_rate": 3.056192758866676e-05, + "loss": 2.401, + "step": 14914 + }, + { + "epoch": 1.2036962311355015, + "grad_norm": 0.7070993185043335, + "learning_rate": 3.055056807897454e-05, + "loss": 2.3967, + "step": 14915 + }, + { + "epoch": 1.2037769348720846, + "grad_norm": 0.7159305810928345, + "learning_rate": 3.0539210300143693e-05, + "loss": 2.4388, + "step": 14916 + }, + { + "epoch": 1.2038576386086675, + "grad_norm": 0.6920869946479797, + "learning_rate": 3.0527854252457333e-05, + "loss": 2.441, + "step": 14917 + }, + { + "epoch": 1.2039383423452505, + "grad_norm": 0.7014884352684021, + "learning_rate": 3.0516499936198417e-05, + "loss": 2.4115, + "step": 14918 + }, + { + "epoch": 1.2040190460818336, + "grad_norm": 0.6754150986671448, + "learning_rate": 3.0505147351649955e-05, + "loss": 2.3722, + "step": 14919 + }, + { + "epoch": 1.2040997498184165, + "grad_norm": 0.7681791186332703, + "learning_rate": 3.0493796499094874e-05, + "loss": 2.4331, + "step": 14920 + }, + { + "epoch": 1.2041804535549996, + "grad_norm": 0.7265221476554871, + "learning_rate": 3.0482447378816082e-05, + "loss": 2.4806, + "step": 14921 + }, + { + "epoch": 1.2042611572915827, + "grad_norm": 0.6841520667076111, + "learning_rate": 3.047109999109642e-05, + "loss": 2.3896, + "step": 14922 + }, + { + "epoch": 1.2043418610281655, + "grad_norm": 0.746347963809967, + "learning_rate": 3.0459754336218737e-05, + "loss": 2.4081, + "step": 14923 + }, + { + "epoch": 1.2044225647647486, + "grad_norm": 0.6679818034172058, + "learning_rate": 3.0448410414465712e-05, + "loss": 2.4206, + "step": 14924 + }, + { + "epoch": 1.2045032685013317, + "grad_norm": 0.7122265100479126, + "learning_rate": 3.0437068226120114e-05, + "loss": 2.4217, + "step": 14925 + }, + { + "epoch": 1.2045839722379146, + "grad_norm": 0.7023499011993408, + "learning_rate": 3.0425727771464618e-05, + "loss": 2.4597, + "step": 14926 + }, + { + "epoch": 1.2046646759744977, + "grad_norm": 0.7304259538650513, + "learning_rate": 3.0414389050781876e-05, + "loss": 2.4915, + "step": 14927 + }, + { + "epoch": 1.2047453797110805, + "grad_norm": 0.7209908962249756, + "learning_rate": 3.0403052064354442e-05, + "loss": 2.4163, + "step": 14928 + }, + { + "epoch": 1.2048260834476636, + "grad_norm": 0.7367275953292847, + "learning_rate": 3.0391716812464865e-05, + "loss": 2.4192, + "step": 14929 + }, + { + "epoch": 1.2049067871842467, + "grad_norm": 0.6576591730117798, + "learning_rate": 3.0380383295395674e-05, + "loss": 2.4606, + "step": 14930 + }, + { + "epoch": 1.2049874909208296, + "grad_norm": 0.7082500457763672, + "learning_rate": 3.0369051513429315e-05, + "loss": 2.4079, + "step": 14931 + }, + { + "epoch": 1.2050681946574127, + "grad_norm": 0.6770346760749817, + "learning_rate": 3.03577214668482e-05, + "loss": 2.45, + "step": 14932 + }, + { + "epoch": 1.2051488983939955, + "grad_norm": 0.6979790925979614, + "learning_rate": 3.034639315593476e-05, + "loss": 2.3966, + "step": 14933 + }, + { + "epoch": 1.2052296021305786, + "grad_norm": 0.6863394975662231, + "learning_rate": 3.033506658097124e-05, + "loss": 2.4637, + "step": 14934 + }, + { + "epoch": 1.2053103058671617, + "grad_norm": 0.7522799372673035, + "learning_rate": 3.0323741742239963e-05, + "loss": 2.4585, + "step": 14935 + }, + { + "epoch": 1.2053910096037446, + "grad_norm": 0.7119878530502319, + "learning_rate": 3.031241864002321e-05, + "loss": 2.4473, + "step": 14936 + }, + { + "epoch": 1.2054717133403277, + "grad_norm": 0.690861701965332, + "learning_rate": 3.030109727460312e-05, + "loss": 2.4564, + "step": 14937 + }, + { + "epoch": 1.2055524170769107, + "grad_norm": 0.6825447082519531, + "learning_rate": 3.0289777646261886e-05, + "loss": 2.4511, + "step": 14938 + }, + { + "epoch": 1.2056331208134936, + "grad_norm": 0.7404600977897644, + "learning_rate": 3.027845975528164e-05, + "loss": 2.4461, + "step": 14939 + }, + { + "epoch": 1.2057138245500767, + "grad_norm": 0.6871766448020935, + "learning_rate": 3.026714360194437e-05, + "loss": 2.4486, + "step": 14940 + }, + { + "epoch": 1.2057945282866598, + "grad_norm": 0.6646476984024048, + "learning_rate": 3.02558291865322e-05, + "loss": 2.378, + "step": 14941 + }, + { + "epoch": 1.2058752320232426, + "grad_norm": 0.6998385787010193, + "learning_rate": 3.024451650932707e-05, + "loss": 2.4646, + "step": 14942 + }, + { + "epoch": 1.2059559357598257, + "grad_norm": 0.6763097047805786, + "learning_rate": 3.023320557061098e-05, + "loss": 2.3971, + "step": 14943 + }, + { + "epoch": 1.2060366394964086, + "grad_norm": 0.7409633994102478, + "learning_rate": 3.0221896370665736e-05, + "loss": 2.4405, + "step": 14944 + }, + { + "epoch": 1.2061173432329917, + "grad_norm": 0.6972076892852783, + "learning_rate": 3.0210588909773242e-05, + "loss": 2.3935, + "step": 14945 + }, + { + "epoch": 1.2061980469695748, + "grad_norm": 0.6898512840270996, + "learning_rate": 3.0199283188215333e-05, + "loss": 2.4173, + "step": 14946 + }, + { + "epoch": 1.2062787507061576, + "grad_norm": 0.6878097057342529, + "learning_rate": 3.0187979206273707e-05, + "loss": 2.44, + "step": 14947 + }, + { + "epoch": 1.2063594544427407, + "grad_norm": 0.6629695296287537, + "learning_rate": 3.0176676964230143e-05, + "loss": 2.3836, + "step": 14948 + }, + { + "epoch": 1.2064401581793236, + "grad_norm": 0.717654824256897, + "learning_rate": 3.0165376462366336e-05, + "loss": 2.415, + "step": 14949 + }, + { + "epoch": 1.2065208619159067, + "grad_norm": 0.7526129484176636, + "learning_rate": 3.0154077700963867e-05, + "loss": 2.4985, + "step": 14950 + }, + { + "epoch": 1.2066015656524898, + "grad_norm": 0.6867300271987915, + "learning_rate": 3.014278068030435e-05, + "loss": 2.395, + "step": 14951 + }, + { + "epoch": 1.2066822693890726, + "grad_norm": 0.7321466207504272, + "learning_rate": 3.0131485400669356e-05, + "loss": 2.4503, + "step": 14952 + }, + { + "epoch": 1.2067629731256557, + "grad_norm": 0.6915534734725952, + "learning_rate": 3.0120191862340387e-05, + "loss": 2.398, + "step": 14953 + }, + { + "epoch": 1.2068436768622388, + "grad_norm": 0.7017377018928528, + "learning_rate": 3.01089000655989e-05, + "loss": 2.4367, + "step": 14954 + }, + { + "epoch": 1.2069243805988217, + "grad_norm": 0.7032245397567749, + "learning_rate": 3.0097610010726353e-05, + "loss": 2.4078, + "step": 14955 + }, + { + "epoch": 1.2070050843354048, + "grad_norm": 0.6795478463172913, + "learning_rate": 3.008632169800406e-05, + "loss": 2.3508, + "step": 14956 + }, + { + "epoch": 1.2070857880719879, + "grad_norm": 0.7149559855461121, + "learning_rate": 3.007503512771339e-05, + "loss": 2.4023, + "step": 14957 + }, + { + "epoch": 1.2071664918085707, + "grad_norm": 0.724756121635437, + "learning_rate": 3.006375030013563e-05, + "loss": 2.4439, + "step": 14958 + }, + { + "epoch": 1.2072471955451538, + "grad_norm": 0.7233348488807678, + "learning_rate": 3.005246721555205e-05, + "loss": 2.3819, + "step": 14959 + }, + { + "epoch": 1.2073278992817367, + "grad_norm": 0.700322151184082, + "learning_rate": 3.0041185874243815e-05, + "loss": 2.4222, + "step": 14960 + }, + { + "epoch": 1.2074086030183198, + "grad_norm": 0.7268145680427551, + "learning_rate": 3.002990627649209e-05, + "loss": 2.4698, + "step": 14961 + }, + { + "epoch": 1.2074893067549028, + "grad_norm": 0.6885111331939697, + "learning_rate": 3.001862842257801e-05, + "loss": 2.4505, + "step": 14962 + }, + { + "epoch": 1.2075700104914857, + "grad_norm": 0.7237974405288696, + "learning_rate": 3.0007352312782632e-05, + "loss": 2.422, + "step": 14963 + }, + { + "epoch": 1.2076507142280688, + "grad_norm": 0.7214741110801697, + "learning_rate": 2.9996077947387015e-05, + "loss": 2.4428, + "step": 14964 + }, + { + "epoch": 1.2077314179646517, + "grad_norm": 0.7264460921287537, + "learning_rate": 2.998480532667215e-05, + "loss": 2.4669, + "step": 14965 + }, + { + "epoch": 1.2078121217012348, + "grad_norm": 0.7055517435073853, + "learning_rate": 2.9973534450918928e-05, + "loss": 2.5082, + "step": 14966 + }, + { + "epoch": 1.2078928254378178, + "grad_norm": 0.6886781454086304, + "learning_rate": 2.9962265320408268e-05, + "loss": 2.4697, + "step": 14967 + }, + { + "epoch": 1.2079735291744007, + "grad_norm": 0.6875878572463989, + "learning_rate": 2.9950997935421076e-05, + "loss": 2.4384, + "step": 14968 + }, + { + "epoch": 1.2080542329109838, + "grad_norm": 0.7586886882781982, + "learning_rate": 2.99397322962381e-05, + "loss": 2.4088, + "step": 14969 + }, + { + "epoch": 1.2081349366475669, + "grad_norm": 0.6744365096092224, + "learning_rate": 2.992846840314013e-05, + "loss": 2.4109, + "step": 14970 + }, + { + "epoch": 1.2082156403841497, + "grad_norm": 0.6589661240577698, + "learning_rate": 2.9917206256407893e-05, + "loss": 2.4386, + "step": 14971 + }, + { + "epoch": 1.2082963441207328, + "grad_norm": 0.6787264943122864, + "learning_rate": 2.990594585632208e-05, + "loss": 2.401, + "step": 14972 + }, + { + "epoch": 1.2083770478573157, + "grad_norm": 0.710517406463623, + "learning_rate": 2.9894687203163317e-05, + "loss": 2.4813, + "step": 14973 + }, + { + "epoch": 1.2084577515938988, + "grad_norm": 0.676110029220581, + "learning_rate": 2.988343029721221e-05, + "loss": 2.4654, + "step": 14974 + }, + { + "epoch": 1.2085384553304819, + "grad_norm": 0.6940518617630005, + "learning_rate": 2.9872175138749336e-05, + "loss": 2.4188, + "step": 14975 + }, + { + "epoch": 1.2086191590670647, + "grad_norm": 0.6849910020828247, + "learning_rate": 2.9860921728055147e-05, + "loss": 2.384, + "step": 14976 + }, + { + "epoch": 1.2086998628036478, + "grad_norm": 0.6902467608451843, + "learning_rate": 2.9849670065410128e-05, + "loss": 2.4364, + "step": 14977 + }, + { + "epoch": 1.2087805665402307, + "grad_norm": 0.6742224097251892, + "learning_rate": 2.9838420151094747e-05, + "loss": 2.5085, + "step": 14978 + }, + { + "epoch": 1.2088612702768138, + "grad_norm": 0.6635094285011292, + "learning_rate": 2.9827171985389303e-05, + "loss": 2.3635, + "step": 14979 + }, + { + "epoch": 1.2089419740133969, + "grad_norm": 0.7189158201217651, + "learning_rate": 2.9815925568574165e-05, + "loss": 2.458, + "step": 14980 + }, + { + "epoch": 1.2090226777499797, + "grad_norm": 0.7370143532752991, + "learning_rate": 2.9804680900929628e-05, + "loss": 2.4543, + "step": 14981 + }, + { + "epoch": 1.2091033814865628, + "grad_norm": 0.7410217523574829, + "learning_rate": 2.979343798273593e-05, + "loss": 2.4537, + "step": 14982 + }, + { + "epoch": 1.209184085223146, + "grad_norm": 0.7525770664215088, + "learning_rate": 2.9782196814273277e-05, + "loss": 2.5147, + "step": 14983 + }, + { + "epoch": 1.2092647889597288, + "grad_norm": 0.7302291393280029, + "learning_rate": 2.9770957395821863e-05, + "loss": 2.4711, + "step": 14984 + }, + { + "epoch": 1.2093454926963119, + "grad_norm": 0.7154920101165771, + "learning_rate": 2.975971972766175e-05, + "loss": 2.5224, + "step": 14985 + }, + { + "epoch": 1.209426196432895, + "grad_norm": 0.6827684640884399, + "learning_rate": 2.9748483810073025e-05, + "loss": 2.4477, + "step": 14986 + }, + { + "epoch": 1.2095069001694778, + "grad_norm": 0.7753484845161438, + "learning_rate": 2.973724964333575e-05, + "loss": 2.4257, + "step": 14987 + }, + { + "epoch": 1.209587603906061, + "grad_norm": 0.7146809101104736, + "learning_rate": 2.9726017227729862e-05, + "loss": 2.3953, + "step": 14988 + }, + { + "epoch": 1.2096683076426438, + "grad_norm": 0.7360730767250061, + "learning_rate": 2.9714786563535313e-05, + "loss": 2.3774, + "step": 14989 + }, + { + "epoch": 1.2097490113792269, + "grad_norm": 0.7159923911094666, + "learning_rate": 2.970355765103201e-05, + "loss": 2.4068, + "step": 14990 + }, + { + "epoch": 1.20982971511581, + "grad_norm": 0.6732171773910522, + "learning_rate": 2.969233049049982e-05, + "loss": 2.4215, + "step": 14991 + }, + { + "epoch": 1.2099104188523928, + "grad_norm": 0.749812126159668, + "learning_rate": 2.968110508221853e-05, + "loss": 2.4415, + "step": 14992 + }, + { + "epoch": 1.209991122588976, + "grad_norm": 0.7185530662536621, + "learning_rate": 2.9669881426467916e-05, + "loss": 2.4536, + "step": 14993 + }, + { + "epoch": 1.2100718263255588, + "grad_norm": 0.6757143139839172, + "learning_rate": 2.9658659523527733e-05, + "loss": 2.3892, + "step": 14994 + }, + { + "epoch": 1.2101525300621419, + "grad_norm": 0.7187495231628418, + "learning_rate": 2.96474393736776e-05, + "loss": 2.434, + "step": 14995 + }, + { + "epoch": 1.210233233798725, + "grad_norm": 0.7016372680664062, + "learning_rate": 2.9636220977197182e-05, + "loss": 2.4903, + "step": 14996 + }, + { + "epoch": 1.2103139375353078, + "grad_norm": 0.7528983950614929, + "learning_rate": 2.9625004334366103e-05, + "loss": 2.3829, + "step": 14997 + }, + { + "epoch": 1.210394641271891, + "grad_norm": 0.6735692024230957, + "learning_rate": 2.9613789445463837e-05, + "loss": 2.3844, + "step": 14998 + }, + { + "epoch": 1.210475345008474, + "grad_norm": 0.6825322508811951, + "learning_rate": 2.9602576310769935e-05, + "loss": 2.4691, + "step": 14999 + }, + { + "epoch": 1.2105560487450568, + "grad_norm": 0.7507675290107727, + "learning_rate": 2.959136493056389e-05, + "loss": 2.4605, + "step": 15000 + }, + { + "epoch": 1.2105560487450568, + "eval_loss": 2.3882925510406494, + "eval_runtime": 1014.0781, + "eval_samples_per_second": 2.584, + "eval_steps_per_second": 0.431, + "step": 15000 + }, + { + "epoch": 1.21063675248164, + "grad_norm": 0.6937146782875061, + "learning_rate": 2.9580155305125044e-05, + "loss": 2.4444, + "step": 15001 + }, + { + "epoch": 1.210717456218223, + "grad_norm": 0.6572179794311523, + "learning_rate": 2.9568947434732775e-05, + "loss": 2.4373, + "step": 15002 + }, + { + "epoch": 1.2107981599548059, + "grad_norm": 0.7420738935470581, + "learning_rate": 2.955774131966651e-05, + "loss": 2.4046, + "step": 15003 + }, + { + "epoch": 1.210878863691389, + "grad_norm": 0.7952237129211426, + "learning_rate": 2.954653696020543e-05, + "loss": 2.4082, + "step": 15004 + }, + { + "epoch": 1.2109595674279718, + "grad_norm": 0.6640750765800476, + "learning_rate": 2.9535334356628817e-05, + "loss": 2.4109, + "step": 15005 + }, + { + "epoch": 1.211040271164555, + "grad_norm": 0.6968019008636475, + "learning_rate": 2.952413350921588e-05, + "loss": 2.3991, + "step": 15006 + }, + { + "epoch": 1.211120974901138, + "grad_norm": 0.7174221277236938, + "learning_rate": 2.9512934418245787e-05, + "loss": 2.3909, + "step": 15007 + }, + { + "epoch": 1.2112016786377209, + "grad_norm": 0.6854268908500671, + "learning_rate": 2.9501737083997595e-05, + "loss": 2.4321, + "step": 15008 + }, + { + "epoch": 1.211282382374304, + "grad_norm": 0.6705672740936279, + "learning_rate": 2.949054150675039e-05, + "loss": 2.4749, + "step": 15009 + }, + { + "epoch": 1.2113630861108868, + "grad_norm": 0.7871068716049194, + "learning_rate": 2.9479347686783244e-05, + "loss": 2.424, + "step": 15010 + }, + { + "epoch": 1.21144378984747, + "grad_norm": 0.8194620609283447, + "learning_rate": 2.946815562437506e-05, + "loss": 2.461, + "step": 15011 + }, + { + "epoch": 1.211524493584053, + "grad_norm": 0.673367977142334, + "learning_rate": 2.9456965319804818e-05, + "loss": 2.4212, + "step": 15012 + }, + { + "epoch": 1.2116051973206359, + "grad_norm": 0.6630001068115234, + "learning_rate": 2.9445776773351397e-05, + "loss": 2.4393, + "step": 15013 + }, + { + "epoch": 1.211685901057219, + "grad_norm": 0.676170825958252, + "learning_rate": 2.943458998529365e-05, + "loss": 2.3889, + "step": 15014 + }, + { + "epoch": 1.211766604793802, + "grad_norm": 0.6951417326927185, + "learning_rate": 2.942340495591037e-05, + "loss": 2.4088, + "step": 15015 + }, + { + "epoch": 1.211847308530385, + "grad_norm": 0.6909857988357544, + "learning_rate": 2.941222168548037e-05, + "loss": 2.4282, + "step": 15016 + }, + { + "epoch": 1.211928012266968, + "grad_norm": 0.653264045715332, + "learning_rate": 2.9401040174282292e-05, + "loss": 2.4369, + "step": 15017 + }, + { + "epoch": 1.2120087160035509, + "grad_norm": 0.6994543075561523, + "learning_rate": 2.938986042259484e-05, + "loss": 2.419, + "step": 15018 + }, + { + "epoch": 1.212089419740134, + "grad_norm": 0.709015965461731, + "learning_rate": 2.9378682430696668e-05, + "loss": 2.4747, + "step": 15019 + }, + { + "epoch": 1.212170123476717, + "grad_norm": 0.6899579167366028, + "learning_rate": 2.9367506198866313e-05, + "loss": 2.4134, + "step": 15020 + }, + { + "epoch": 1.2122508272133, + "grad_norm": 0.6811912059783936, + "learning_rate": 2.9356331727382337e-05, + "loss": 2.449, + "step": 15021 + }, + { + "epoch": 1.212331530949883, + "grad_norm": 0.8119748830795288, + "learning_rate": 2.9345159016523237e-05, + "loss": 2.4463, + "step": 15022 + }, + { + "epoch": 1.2124122346864659, + "grad_norm": 0.7323578000068665, + "learning_rate": 2.9333988066567463e-05, + "loss": 2.4305, + "step": 15023 + }, + { + "epoch": 1.212492938423049, + "grad_norm": 0.6639837622642517, + "learning_rate": 2.9322818877793436e-05, + "loss": 2.4237, + "step": 15024 + }, + { + "epoch": 1.212573642159632, + "grad_norm": 0.669623076915741, + "learning_rate": 2.9311651450479516e-05, + "loss": 2.4436, + "step": 15025 + }, + { + "epoch": 1.212654345896215, + "grad_norm": 0.7200437784194946, + "learning_rate": 2.9300485784904054e-05, + "loss": 2.4399, + "step": 15026 + }, + { + "epoch": 1.212735049632798, + "grad_norm": 0.7015525102615356, + "learning_rate": 2.9289321881345254e-05, + "loss": 2.4696, + "step": 15027 + }, + { + "epoch": 1.212815753369381, + "grad_norm": 0.74539715051651, + "learning_rate": 2.9278159740081402e-05, + "loss": 2.4204, + "step": 15028 + }, + { + "epoch": 1.212896457105964, + "grad_norm": 0.6373662352561951, + "learning_rate": 2.9266999361390713e-05, + "loss": 2.4273, + "step": 15029 + }, + { + "epoch": 1.212977160842547, + "grad_norm": 0.8213370442390442, + "learning_rate": 2.9255840745551256e-05, + "loss": 2.4166, + "step": 15030 + }, + { + "epoch": 1.2130578645791301, + "grad_norm": 0.7386181354522705, + "learning_rate": 2.9244683892841185e-05, + "loss": 2.3973, + "step": 15031 + }, + { + "epoch": 1.213138568315713, + "grad_norm": 0.7939273118972778, + "learning_rate": 2.9233528803538534e-05, + "loss": 2.5593, + "step": 15032 + }, + { + "epoch": 1.213219272052296, + "grad_norm": 0.7580689191818237, + "learning_rate": 2.9222375477921347e-05, + "loss": 2.4255, + "step": 15033 + }, + { + "epoch": 1.213299975788879, + "grad_norm": 0.7680409550666809, + "learning_rate": 2.9211223916267573e-05, + "loss": 2.4447, + "step": 15034 + }, + { + "epoch": 1.213380679525462, + "grad_norm": 0.6998565196990967, + "learning_rate": 2.9200074118855135e-05, + "loss": 2.4061, + "step": 15035 + }, + { + "epoch": 1.2134613832620451, + "grad_norm": 0.6673001050949097, + "learning_rate": 2.9188926085961954e-05, + "loss": 2.3989, + "step": 15036 + }, + { + "epoch": 1.213542086998628, + "grad_norm": 0.683215320110321, + "learning_rate": 2.9177779817865815e-05, + "loss": 2.4078, + "step": 15037 + }, + { + "epoch": 1.213622790735211, + "grad_norm": 0.696967363357544, + "learning_rate": 2.9166635314844527e-05, + "loss": 2.4224, + "step": 15038 + }, + { + "epoch": 1.213703494471794, + "grad_norm": 0.6930364370346069, + "learning_rate": 2.915549257717588e-05, + "loss": 2.4112, + "step": 15039 + }, + { + "epoch": 1.213784198208377, + "grad_norm": 0.7387405633926392, + "learning_rate": 2.914435160513752e-05, + "loss": 2.4458, + "step": 15040 + }, + { + "epoch": 1.21386490194496, + "grad_norm": 0.6615941524505615, + "learning_rate": 2.913321239900714e-05, + "loss": 2.4406, + "step": 15041 + }, + { + "epoch": 1.213945605681543, + "grad_norm": 0.7520569562911987, + "learning_rate": 2.912207495906235e-05, + "loss": 2.3991, + "step": 15042 + }, + { + "epoch": 1.214026309418126, + "grad_norm": 0.6952454447746277, + "learning_rate": 2.911093928558072e-05, + "loss": 2.4404, + "step": 15043 + }, + { + "epoch": 1.2141070131547091, + "grad_norm": 0.7595344185829163, + "learning_rate": 2.9099805378839794e-05, + "loss": 2.551, + "step": 15044 + }, + { + "epoch": 1.214187716891292, + "grad_norm": 0.6645220518112183, + "learning_rate": 2.9088673239117094e-05, + "loss": 2.4167, + "step": 15045 + }, + { + "epoch": 1.214268420627875, + "grad_norm": 0.6433377861976624, + "learning_rate": 2.907754286668998e-05, + "loss": 2.3873, + "step": 15046 + }, + { + "epoch": 1.2143491243644582, + "grad_norm": 0.6806936860084534, + "learning_rate": 2.9066414261835894e-05, + "loss": 2.3868, + "step": 15047 + }, + { + "epoch": 1.214429828101041, + "grad_norm": 0.7261343598365784, + "learning_rate": 2.905528742483222e-05, + "loss": 2.4785, + "step": 15048 + }, + { + "epoch": 1.2145105318376241, + "grad_norm": 0.6495440602302551, + "learning_rate": 2.9044162355956196e-05, + "loss": 2.4167, + "step": 15049 + }, + { + "epoch": 1.214591235574207, + "grad_norm": 0.6816607117652893, + "learning_rate": 2.9033039055485135e-05, + "loss": 2.459, + "step": 15050 + }, + { + "epoch": 1.21467193931079, + "grad_norm": 0.6624214053153992, + "learning_rate": 2.902191752369624e-05, + "loss": 2.4498, + "step": 15051 + }, + { + "epoch": 1.2147526430473732, + "grad_norm": 0.6800024509429932, + "learning_rate": 2.9010797760866737e-05, + "loss": 2.4442, + "step": 15052 + }, + { + "epoch": 1.214833346783956, + "grad_norm": 0.711705207824707, + "learning_rate": 2.8999679767273667e-05, + "loss": 2.422, + "step": 15053 + }, + { + "epoch": 1.2149140505205391, + "grad_norm": 0.6854784488677979, + "learning_rate": 2.898856354319419e-05, + "loss": 2.4567, + "step": 15054 + }, + { + "epoch": 1.214994754257122, + "grad_norm": 0.6676114797592163, + "learning_rate": 2.8977449088905373e-05, + "loss": 2.3913, + "step": 15055 + }, + { + "epoch": 1.215075457993705, + "grad_norm": 0.6893348693847656, + "learning_rate": 2.8966336404684145e-05, + "loss": 2.4407, + "step": 15056 + }, + { + "epoch": 1.2151561617302882, + "grad_norm": 0.6749289035797119, + "learning_rate": 2.8955225490807514e-05, + "loss": 2.409, + "step": 15057 + }, + { + "epoch": 1.215236865466871, + "grad_norm": 0.6998956203460693, + "learning_rate": 2.8944116347552387e-05, + "loss": 2.4297, + "step": 15058 + }, + { + "epoch": 1.2153175692034541, + "grad_norm": 0.7040024399757385, + "learning_rate": 2.8933008975195596e-05, + "loss": 2.4262, + "step": 15059 + }, + { + "epoch": 1.2153982729400372, + "grad_norm": 0.6638362407684326, + "learning_rate": 2.8921903374014005e-05, + "loss": 2.4355, + "step": 15060 + }, + { + "epoch": 1.21547897667662, + "grad_norm": 0.6864547729492188, + "learning_rate": 2.8910799544284407e-05, + "loss": 2.4493, + "step": 15061 + }, + { + "epoch": 1.2155596804132032, + "grad_norm": 0.707383394241333, + "learning_rate": 2.8899697486283474e-05, + "loss": 2.4604, + "step": 15062 + }, + { + "epoch": 1.2156403841497863, + "grad_norm": 0.7121397852897644, + "learning_rate": 2.888859720028795e-05, + "loss": 2.4272, + "step": 15063 + }, + { + "epoch": 1.2157210878863691, + "grad_norm": 0.7600439786911011, + "learning_rate": 2.8877498686574455e-05, + "loss": 2.4499, + "step": 15064 + }, + { + "epoch": 1.2158017916229522, + "grad_norm": 0.6654962301254272, + "learning_rate": 2.886640194541962e-05, + "loss": 2.4632, + "step": 15065 + }, + { + "epoch": 1.215882495359535, + "grad_norm": 0.7138063311576843, + "learning_rate": 2.8855306977099994e-05, + "loss": 2.4321, + "step": 15066 + }, + { + "epoch": 1.2159631990961182, + "grad_norm": 0.672604501247406, + "learning_rate": 2.884421378189208e-05, + "loss": 2.4026, + "step": 15067 + }, + { + "epoch": 1.2160439028327013, + "grad_norm": 0.6894693970680237, + "learning_rate": 2.8833122360072405e-05, + "loss": 2.4213, + "step": 15068 + }, + { + "epoch": 1.2161246065692841, + "grad_norm": 0.6784985065460205, + "learning_rate": 2.8822032711917325e-05, + "loss": 2.4207, + "step": 15069 + }, + { + "epoch": 1.2162053103058672, + "grad_norm": 0.6569294929504395, + "learning_rate": 2.8810944837703248e-05, + "loss": 2.4142, + "step": 15070 + }, + { + "epoch": 1.21628601404245, + "grad_norm": 0.7240702509880066, + "learning_rate": 2.879985873770654e-05, + "loss": 2.4173, + "step": 15071 + }, + { + "epoch": 1.2163667177790332, + "grad_norm": 0.6935575604438782, + "learning_rate": 2.8788774412203444e-05, + "loss": 2.4487, + "step": 15072 + }, + { + "epoch": 1.2164474215156162, + "grad_norm": 0.6903246641159058, + "learning_rate": 2.8777691861470234e-05, + "loss": 2.4193, + "step": 15073 + }, + { + "epoch": 1.216528125252199, + "grad_norm": 0.7982182502746582, + "learning_rate": 2.8766611085783123e-05, + "loss": 2.492, + "step": 15074 + }, + { + "epoch": 1.2166088289887822, + "grad_norm": 0.6958058476448059, + "learning_rate": 2.875553208541827e-05, + "loss": 2.4198, + "step": 15075 + }, + { + "epoch": 1.2166895327253653, + "grad_norm": 0.6869969964027405, + "learning_rate": 2.8744454860651794e-05, + "loss": 2.3768, + "step": 15076 + }, + { + "epoch": 1.2167702364619482, + "grad_norm": 0.7263007760047913, + "learning_rate": 2.8733379411759796e-05, + "loss": 2.386, + "step": 15077 + }, + { + "epoch": 1.2168509401985312, + "grad_norm": 0.7010302543640137, + "learning_rate": 2.872230573901825e-05, + "loss": 2.4417, + "step": 15078 + }, + { + "epoch": 1.216931643935114, + "grad_norm": 0.818980872631073, + "learning_rate": 2.8711233842703156e-05, + "loss": 2.433, + "step": 15079 + }, + { + "epoch": 1.2170123476716972, + "grad_norm": 0.6937929391860962, + "learning_rate": 2.87001637230905e-05, + "loss": 2.379, + "step": 15080 + }, + { + "epoch": 1.2170930514082803, + "grad_norm": 0.6954175233840942, + "learning_rate": 2.868909538045612e-05, + "loss": 2.4296, + "step": 15081 + }, + { + "epoch": 1.2171737551448631, + "grad_norm": 0.7177354097366333, + "learning_rate": 2.8678028815075887e-05, + "loss": 2.3978, + "step": 15082 + }, + { + "epoch": 1.2172544588814462, + "grad_norm": 0.7100846171379089, + "learning_rate": 2.8666964027225607e-05, + "loss": 2.4566, + "step": 15083 + }, + { + "epoch": 1.217335162618029, + "grad_norm": 0.6909635066986084, + "learning_rate": 2.8655901017181064e-05, + "loss": 2.4772, + "step": 15084 + }, + { + "epoch": 1.2174158663546122, + "grad_norm": 0.7319501638412476, + "learning_rate": 2.8644839785217947e-05, + "loss": 2.4402, + "step": 15085 + }, + { + "epoch": 1.2174965700911953, + "grad_norm": 0.6691421270370483, + "learning_rate": 2.8633780331611958e-05, + "loss": 2.4465, + "step": 15086 + }, + { + "epoch": 1.2175772738277781, + "grad_norm": 0.7028824687004089, + "learning_rate": 2.8622722656638745e-05, + "loss": 2.4765, + "step": 15087 + }, + { + "epoch": 1.2176579775643612, + "grad_norm": 0.7428398728370667, + "learning_rate": 2.861166676057383e-05, + "loss": 2.441, + "step": 15088 + }, + { + "epoch": 1.2177386813009443, + "grad_norm": 0.6715269684791565, + "learning_rate": 2.8600612643692803e-05, + "loss": 2.4621, + "step": 15089 + }, + { + "epoch": 1.2178193850375272, + "grad_norm": 0.6768512725830078, + "learning_rate": 2.8589560306271168e-05, + "loss": 2.4257, + "step": 15090 + }, + { + "epoch": 1.2179000887741103, + "grad_norm": 0.7442535758018494, + "learning_rate": 2.8578509748584326e-05, + "loss": 2.424, + "step": 15091 + }, + { + "epoch": 1.2179807925106934, + "grad_norm": 0.7275974154472351, + "learning_rate": 2.8567460970907722e-05, + "loss": 2.4698, + "step": 15092 + }, + { + "epoch": 1.2180614962472762, + "grad_norm": 0.7050346732139587, + "learning_rate": 2.8556413973516727e-05, + "loss": 2.4734, + "step": 15093 + }, + { + "epoch": 1.2181421999838593, + "grad_norm": 0.7325939536094666, + "learning_rate": 2.854536875668664e-05, + "loss": 2.4166, + "step": 15094 + }, + { + "epoch": 1.2182229037204422, + "grad_norm": 0.6764184236526489, + "learning_rate": 2.8534325320692746e-05, + "loss": 2.4742, + "step": 15095 + }, + { + "epoch": 1.2183036074570253, + "grad_norm": 0.7405500411987305, + "learning_rate": 2.8523283665810318e-05, + "loss": 2.3959, + "step": 15096 + }, + { + "epoch": 1.2183843111936083, + "grad_norm": 0.6714199185371399, + "learning_rate": 2.8512243792314465e-05, + "loss": 2.4571, + "step": 15097 + }, + { + "epoch": 1.2184650149301912, + "grad_norm": 0.6779391169548035, + "learning_rate": 2.8501205700480372e-05, + "loss": 2.3745, + "step": 15098 + }, + { + "epoch": 1.2185457186667743, + "grad_norm": 0.6876079440116882, + "learning_rate": 2.8490169390583134e-05, + "loss": 2.4432, + "step": 15099 + }, + { + "epoch": 1.2186264224033572, + "grad_norm": 0.7092362642288208, + "learning_rate": 2.8479134862897826e-05, + "loss": 2.4716, + "step": 15100 + }, + { + "epoch": 1.2187071261399403, + "grad_norm": 0.6901989579200745, + "learning_rate": 2.8468102117699414e-05, + "loss": 2.417, + "step": 15101 + }, + { + "epoch": 1.2187878298765233, + "grad_norm": 0.7011592984199524, + "learning_rate": 2.8457071155262884e-05, + "loss": 2.4439, + "step": 15102 + }, + { + "epoch": 1.2188685336131062, + "grad_norm": 0.6923472285270691, + "learning_rate": 2.8446041975863146e-05, + "loss": 2.4247, + "step": 15103 + }, + { + "epoch": 1.2189492373496893, + "grad_norm": 0.6948748230934143, + "learning_rate": 2.843501457977509e-05, + "loss": 2.3902, + "step": 15104 + }, + { + "epoch": 1.2190299410862724, + "grad_norm": 0.7034386396408081, + "learning_rate": 2.842398896727354e-05, + "loss": 2.4277, + "step": 15105 + }, + { + "epoch": 1.2191106448228552, + "grad_norm": 0.7965617775917053, + "learning_rate": 2.8412965138633318e-05, + "loss": 2.435, + "step": 15106 + }, + { + "epoch": 1.2191913485594383, + "grad_norm": 0.7371121644973755, + "learning_rate": 2.8401943094129112e-05, + "loss": 2.3928, + "step": 15107 + }, + { + "epoch": 1.2192720522960214, + "grad_norm": 0.7079561352729797, + "learning_rate": 2.839092283403564e-05, + "loss": 2.4706, + "step": 15108 + }, + { + "epoch": 1.2193527560326043, + "grad_norm": 0.6711337566375732, + "learning_rate": 2.8379904358627584e-05, + "loss": 2.4272, + "step": 15109 + }, + { + "epoch": 1.2194334597691874, + "grad_norm": 0.6840410828590393, + "learning_rate": 2.836888766817951e-05, + "loss": 2.4174, + "step": 15110 + }, + { + "epoch": 1.2195141635057702, + "grad_norm": 0.700366199016571, + "learning_rate": 2.8357872762965986e-05, + "loss": 2.4667, + "step": 15111 + }, + { + "epoch": 1.2195948672423533, + "grad_norm": 0.7090682983398438, + "learning_rate": 2.8346859643261593e-05, + "loss": 2.3748, + "step": 15112 + }, + { + "epoch": 1.2196755709789364, + "grad_norm": 0.7965148687362671, + "learning_rate": 2.8335848309340717e-05, + "loss": 2.5138, + "step": 15113 + }, + { + "epoch": 1.2197562747155193, + "grad_norm": 0.7845773696899414, + "learning_rate": 2.8324838761477833e-05, + "loss": 2.4274, + "step": 15114 + }, + { + "epoch": 1.2198369784521024, + "grad_norm": 0.6545087099075317, + "learning_rate": 2.831383099994731e-05, + "loss": 2.4311, + "step": 15115 + }, + { + "epoch": 1.2199176821886852, + "grad_norm": 0.6846331357955933, + "learning_rate": 2.830282502502356e-05, + "loss": 2.4239, + "step": 15116 + }, + { + "epoch": 1.2199983859252683, + "grad_norm": 0.7062236070632935, + "learning_rate": 2.8291820836980798e-05, + "loss": 2.4429, + "step": 15117 + }, + { + "epoch": 1.2200790896618514, + "grad_norm": 0.7526285648345947, + "learning_rate": 2.8280818436093315e-05, + "loss": 2.4882, + "step": 15118 + }, + { + "epoch": 1.2201597933984343, + "grad_norm": 0.6853364109992981, + "learning_rate": 2.8269817822635337e-05, + "loss": 2.3803, + "step": 15119 + }, + { + "epoch": 1.2202404971350174, + "grad_norm": 0.7796143293380737, + "learning_rate": 2.8258818996880964e-05, + "loss": 2.4157, + "step": 15120 + }, + { + "epoch": 1.2203212008716005, + "grad_norm": 0.7202157378196716, + "learning_rate": 2.824782195910437e-05, + "loss": 2.5101, + "step": 15121 + }, + { + "epoch": 1.2204019046081833, + "grad_norm": 0.6730707287788391, + "learning_rate": 2.8236826709579644e-05, + "loss": 2.4397, + "step": 15122 + }, + { + "epoch": 1.2204826083447664, + "grad_norm": 0.7840865850448608, + "learning_rate": 2.8225833248580745e-05, + "loss": 2.4452, + "step": 15123 + }, + { + "epoch": 1.2205633120813493, + "grad_norm": 0.8323497772216797, + "learning_rate": 2.821484157638171e-05, + "loss": 2.4775, + "step": 15124 + }, + { + "epoch": 1.2206440158179324, + "grad_norm": 0.6699438691139221, + "learning_rate": 2.8203851693256466e-05, + "loss": 2.3958, + "step": 15125 + }, + { + "epoch": 1.2207247195545154, + "grad_norm": 0.6711557507514954, + "learning_rate": 2.8192863599478923e-05, + "loss": 2.477, + "step": 15126 + }, + { + "epoch": 1.2208054232910983, + "grad_norm": 0.6255797743797302, + "learning_rate": 2.8181877295322922e-05, + "loss": 2.4222, + "step": 15127 + }, + { + "epoch": 1.2208861270276814, + "grad_norm": 0.7313731908798218, + "learning_rate": 2.8170892781062297e-05, + "loss": 2.4343, + "step": 15128 + }, + { + "epoch": 1.2209668307642643, + "grad_norm": 0.6611476540565491, + "learning_rate": 2.815991005697076e-05, + "loss": 2.3844, + "step": 15129 + }, + { + "epoch": 1.2210475345008474, + "grad_norm": 0.7293661236763, + "learning_rate": 2.8148929123322065e-05, + "loss": 2.3912, + "step": 15130 + }, + { + "epoch": 1.2211282382374304, + "grad_norm": 0.7150777578353882, + "learning_rate": 2.8137949980389866e-05, + "loss": 2.4227, + "step": 15131 + }, + { + "epoch": 1.2212089419740133, + "grad_norm": 0.7001000642776489, + "learning_rate": 2.8126972628447845e-05, + "loss": 2.4751, + "step": 15132 + }, + { + "epoch": 1.2212896457105964, + "grad_norm": 0.7106043100357056, + "learning_rate": 2.8115997067769505e-05, + "loss": 2.4127, + "step": 15133 + }, + { + "epoch": 1.2213703494471795, + "grad_norm": 0.6969115138053894, + "learning_rate": 2.810502329862842e-05, + "loss": 2.4073, + "step": 15134 + }, + { + "epoch": 1.2214510531837623, + "grad_norm": 0.7493317127227783, + "learning_rate": 2.8094051321298098e-05, + "loss": 2.4541, + "step": 15135 + }, + { + "epoch": 1.2215317569203454, + "grad_norm": 0.6499322652816772, + "learning_rate": 2.808308113605198e-05, + "loss": 2.4057, + "step": 15136 + }, + { + "epoch": 1.2216124606569285, + "grad_norm": 0.6716788411140442, + "learning_rate": 2.807211274316347e-05, + "loss": 2.3856, + "step": 15137 + }, + { + "epoch": 1.2216931643935114, + "grad_norm": 0.7724741101264954, + "learning_rate": 2.8061146142905958e-05, + "loss": 2.4652, + "step": 15138 + }, + { + "epoch": 1.2217738681300945, + "grad_norm": 0.7014325261116028, + "learning_rate": 2.8050181335552718e-05, + "loss": 2.4506, + "step": 15139 + }, + { + "epoch": 1.2218545718666773, + "grad_norm": 0.6705317497253418, + "learning_rate": 2.8039218321377026e-05, + "loss": 2.4581, + "step": 15140 + }, + { + "epoch": 1.2219352756032604, + "grad_norm": 0.709973931312561, + "learning_rate": 2.8028257100652156e-05, + "loss": 2.427, + "step": 15141 + }, + { + "epoch": 1.2220159793398435, + "grad_norm": 0.7021297812461853, + "learning_rate": 2.801729767365122e-05, + "loss": 2.3784, + "step": 15142 + }, + { + "epoch": 1.2220966830764264, + "grad_norm": 0.7431899905204773, + "learning_rate": 2.8006340040647393e-05, + "loss": 2.4135, + "step": 15143 + }, + { + "epoch": 1.2221773868130095, + "grad_norm": 0.6724472045898438, + "learning_rate": 2.7995384201913765e-05, + "loss": 2.3966, + "step": 15144 + }, + { + "epoch": 1.2222580905495923, + "grad_norm": 0.7381375432014465, + "learning_rate": 2.7984430157723384e-05, + "loss": 2.4853, + "step": 15145 + }, + { + "epoch": 1.2223387942861754, + "grad_norm": 0.6809988617897034, + "learning_rate": 2.7973477908349255e-05, + "loss": 2.408, + "step": 15146 + }, + { + "epoch": 1.2224194980227585, + "grad_norm": 0.7042898535728455, + "learning_rate": 2.7962527454064337e-05, + "loss": 2.3981, + "step": 15147 + }, + { + "epoch": 1.2225002017593414, + "grad_norm": 0.7096118330955505, + "learning_rate": 2.7951578795141576e-05, + "loss": 2.4175, + "step": 15148 + }, + { + "epoch": 1.2225809054959245, + "grad_norm": 0.7271720767021179, + "learning_rate": 2.794063193185378e-05, + "loss": 2.4193, + "step": 15149 + }, + { + "epoch": 1.2226616092325076, + "grad_norm": 0.7000352740287781, + "learning_rate": 2.7929686864473792e-05, + "loss": 2.422, + "step": 15150 + }, + { + "epoch": 1.2227423129690904, + "grad_norm": 0.6983076333999634, + "learning_rate": 2.791874359327443e-05, + "loss": 2.4613, + "step": 15151 + }, + { + "epoch": 1.2228230167056735, + "grad_norm": 0.7520100474357605, + "learning_rate": 2.7907802118528383e-05, + "loss": 2.4147, + "step": 15152 + }, + { + "epoch": 1.2229037204422566, + "grad_norm": 0.7056650519371033, + "learning_rate": 2.789686244050834e-05, + "loss": 2.4568, + "step": 15153 + }, + { + "epoch": 1.2229844241788395, + "grad_norm": 0.7092614769935608, + "learning_rate": 2.7885924559486975e-05, + "loss": 2.4758, + "step": 15154 + }, + { + "epoch": 1.2230651279154225, + "grad_norm": 0.702521562576294, + "learning_rate": 2.7874988475736885e-05, + "loss": 2.4893, + "step": 15155 + }, + { + "epoch": 1.2231458316520054, + "grad_norm": 0.7454921007156372, + "learning_rate": 2.786405418953061e-05, + "loss": 2.4277, + "step": 15156 + }, + { + "epoch": 1.2232265353885885, + "grad_norm": 0.659503161907196, + "learning_rate": 2.7853121701140694e-05, + "loss": 2.4664, + "step": 15157 + }, + { + "epoch": 1.2233072391251716, + "grad_norm": 0.6368914842605591, + "learning_rate": 2.7842191010839556e-05, + "loss": 2.3728, + "step": 15158 + }, + { + "epoch": 1.2233879428617545, + "grad_norm": 0.7076737880706787, + "learning_rate": 2.783126211889965e-05, + "loss": 2.4204, + "step": 15159 + }, + { + "epoch": 1.2234686465983375, + "grad_norm": 0.718100905418396, + "learning_rate": 2.7820335025593325e-05, + "loss": 2.478, + "step": 15160 + }, + { + "epoch": 1.2235493503349204, + "grad_norm": 0.6804678440093994, + "learning_rate": 2.7809409731192972e-05, + "loss": 2.3755, + "step": 15161 + }, + { + "epoch": 1.2236300540715035, + "grad_norm": 0.7068643569946289, + "learning_rate": 2.77984862359708e-05, + "loss": 2.3713, + "step": 15162 + }, + { + "epoch": 1.2237107578080866, + "grad_norm": 0.7047072052955627, + "learning_rate": 2.7787564540199097e-05, + "loss": 2.4264, + "step": 15163 + }, + { + "epoch": 1.2237914615446694, + "grad_norm": 0.6985021829605103, + "learning_rate": 2.7776644644150076e-05, + "loss": 2.4101, + "step": 15164 + }, + { + "epoch": 1.2238721652812525, + "grad_norm": 0.7543687224388123, + "learning_rate": 2.776572654809583e-05, + "loss": 2.3722, + "step": 15165 + }, + { + "epoch": 1.2239528690178356, + "grad_norm": 0.7199926972389221, + "learning_rate": 2.7754810252308473e-05, + "loss": 2.3819, + "step": 15166 + }, + { + "epoch": 1.2240335727544185, + "grad_norm": 0.696756899356842, + "learning_rate": 2.7743895757060156e-05, + "loss": 2.4245, + "step": 15167 + }, + { + "epoch": 1.2241142764910016, + "grad_norm": 0.7848933339118958, + "learning_rate": 2.773298306262281e-05, + "loss": 2.4725, + "step": 15168 + }, + { + "epoch": 1.2241949802275847, + "grad_norm": 0.6819389462471008, + "learning_rate": 2.7722072169268432e-05, + "loss": 2.4338, + "step": 15169 + }, + { + "epoch": 1.2242756839641675, + "grad_norm": 0.7185801267623901, + "learning_rate": 2.7711163077268977e-05, + "loss": 2.4745, + "step": 15170 + }, + { + "epoch": 1.2243563877007506, + "grad_norm": 0.7645030617713928, + "learning_rate": 2.7700255786896278e-05, + "loss": 2.4677, + "step": 15171 + }, + { + "epoch": 1.2244370914373335, + "grad_norm": 0.6559275388717651, + "learning_rate": 2.7689350298422202e-05, + "loss": 2.386, + "step": 15172 + }, + { + "epoch": 1.2245177951739166, + "grad_norm": 0.6965066194534302, + "learning_rate": 2.767844661211856e-05, + "loss": 2.4022, + "step": 15173 + }, + { + "epoch": 1.2245984989104994, + "grad_norm": 0.6618858575820923, + "learning_rate": 2.7667544728257057e-05, + "loss": 2.3541, + "step": 15174 + }, + { + "epoch": 1.2246792026470825, + "grad_norm": 0.6635501980781555, + "learning_rate": 2.765664464710941e-05, + "loss": 2.3984, + "step": 15175 + }, + { + "epoch": 1.2247599063836656, + "grad_norm": 0.6987191438674927, + "learning_rate": 2.764574636894729e-05, + "loss": 2.4637, + "step": 15176 + }, + { + "epoch": 1.2248406101202485, + "grad_norm": 0.7289232611656189, + "learning_rate": 2.7634849894042303e-05, + "loss": 2.4033, + "step": 15177 + }, + { + "epoch": 1.2249213138568316, + "grad_norm": 0.7245565056800842, + "learning_rate": 2.762395522266602e-05, + "loss": 2.4281, + "step": 15178 + }, + { + "epoch": 1.2250020175934146, + "grad_norm": 0.6946065425872803, + "learning_rate": 2.761306235508997e-05, + "loss": 2.3869, + "step": 15179 + }, + { + "epoch": 1.2250827213299975, + "grad_norm": 0.6381784677505493, + "learning_rate": 2.7602171291585666e-05, + "loss": 2.404, + "step": 15180 + }, + { + "epoch": 1.2251634250665806, + "grad_norm": 0.6893685460090637, + "learning_rate": 2.759128203242446e-05, + "loss": 2.4807, + "step": 15181 + }, + { + "epoch": 1.2252441288031637, + "grad_norm": 0.6640260815620422, + "learning_rate": 2.7580394577877787e-05, + "loss": 2.4036, + "step": 15182 + }, + { + "epoch": 1.2253248325397466, + "grad_norm": 0.7125177979469299, + "learning_rate": 2.7569508928217026e-05, + "loss": 2.3869, + "step": 15183 + }, + { + "epoch": 1.2254055362763296, + "grad_norm": 0.657865583896637, + "learning_rate": 2.7558625083713397e-05, + "loss": 2.3869, + "step": 15184 + }, + { + "epoch": 1.2254862400129125, + "grad_norm": 0.6776065230369568, + "learning_rate": 2.7547743044638197e-05, + "loss": 2.4128, + "step": 15185 + }, + { + "epoch": 1.2255669437494956, + "grad_norm": 0.7126299738883972, + "learning_rate": 2.753686281126263e-05, + "loss": 2.4465, + "step": 15186 + }, + { + "epoch": 1.2256476474860787, + "grad_norm": 0.6918273568153381, + "learning_rate": 2.7525984383857873e-05, + "loss": 2.428, + "step": 15187 + }, + { + "epoch": 1.2257283512226615, + "grad_norm": 0.7742759585380554, + "learning_rate": 2.7515107762695025e-05, + "loss": 2.4299, + "step": 15188 + }, + { + "epoch": 1.2258090549592446, + "grad_norm": 0.7194607853889465, + "learning_rate": 2.7504232948045205e-05, + "loss": 2.4315, + "step": 15189 + }, + { + "epoch": 1.2258897586958275, + "grad_norm": 0.6962646245956421, + "learning_rate": 2.7493359940179363e-05, + "loss": 2.4494, + "step": 15190 + }, + { + "epoch": 1.2259704624324106, + "grad_norm": 0.6681686639785767, + "learning_rate": 2.7482488739368538e-05, + "loss": 2.427, + "step": 15191 + }, + { + "epoch": 1.2260511661689937, + "grad_norm": 0.6589877009391785, + "learning_rate": 2.747161934588366e-05, + "loss": 2.4333, + "step": 15192 + }, + { + "epoch": 1.2261318699055765, + "grad_norm": 0.7415218949317932, + "learning_rate": 2.746075175999564e-05, + "loss": 2.4203, + "step": 15193 + }, + { + "epoch": 1.2262125736421596, + "grad_norm": 0.7371910214424133, + "learning_rate": 2.7449885981975276e-05, + "loss": 2.4684, + "step": 15194 + }, + { + "epoch": 1.2262932773787427, + "grad_norm": 0.7010802626609802, + "learning_rate": 2.7439022012093407e-05, + "loss": 2.4625, + "step": 15195 + }, + { + "epoch": 1.2263739811153256, + "grad_norm": 0.7125125527381897, + "learning_rate": 2.7428159850620773e-05, + "loss": 2.4075, + "step": 15196 + }, + { + "epoch": 1.2264546848519087, + "grad_norm": 0.701133668422699, + "learning_rate": 2.7417299497828107e-05, + "loss": 2.4525, + "step": 15197 + }, + { + "epoch": 1.2265353885884918, + "grad_norm": 0.7543410658836365, + "learning_rate": 2.7406440953986078e-05, + "loss": 2.474, + "step": 15198 + }, + { + "epoch": 1.2266160923250746, + "grad_norm": 0.69012051820755, + "learning_rate": 2.7395584219365323e-05, + "loss": 2.4853, + "step": 15199 + }, + { + "epoch": 1.2266967960616577, + "grad_norm": 0.6559048295021057, + "learning_rate": 2.7384729294236378e-05, + "loss": 2.4252, + "step": 15200 + }, + { + "epoch": 1.2267774997982406, + "grad_norm": 0.6603518128395081, + "learning_rate": 2.7373876178869794e-05, + "loss": 2.4047, + "step": 15201 + }, + { + "epoch": 1.2268582035348237, + "grad_norm": 0.7159265279769897, + "learning_rate": 2.736302487353609e-05, + "loss": 2.4352, + "step": 15202 + }, + { + "epoch": 1.2269389072714068, + "grad_norm": 0.6784560084342957, + "learning_rate": 2.735217537850565e-05, + "loss": 2.3933, + "step": 15203 + }, + { + "epoch": 1.2270196110079896, + "grad_norm": 0.7341950535774231, + "learning_rate": 2.7341327694048903e-05, + "loss": 2.4514, + "step": 15204 + }, + { + "epoch": 1.2271003147445727, + "grad_norm": 0.726046621799469, + "learning_rate": 2.7330481820436204e-05, + "loss": 2.4427, + "step": 15205 + }, + { + "epoch": 1.2271810184811556, + "grad_norm": 0.6897192001342773, + "learning_rate": 2.7319637757937854e-05, + "loss": 2.4587, + "step": 15206 + }, + { + "epoch": 1.2272617222177387, + "grad_norm": 0.6981058716773987, + "learning_rate": 2.7308795506824124e-05, + "loss": 2.4297, + "step": 15207 + }, + { + "epoch": 1.2273424259543217, + "grad_norm": 0.694583535194397, + "learning_rate": 2.729795506736522e-05, + "loss": 2.3608, + "step": 15208 + }, + { + "epoch": 1.2274231296909046, + "grad_norm": 0.710192084312439, + "learning_rate": 2.728711643983136e-05, + "loss": 2.3733, + "step": 15209 + }, + { + "epoch": 1.2275038334274877, + "grad_norm": 0.7203633785247803, + "learning_rate": 2.7276279624492595e-05, + "loss": 2.389, + "step": 15210 + }, + { + "epoch": 1.2275845371640708, + "grad_norm": 0.7298668622970581, + "learning_rate": 2.726544462161905e-05, + "loss": 2.3981, + "step": 15211 + }, + { + "epoch": 1.2276652409006537, + "grad_norm": 0.6640039682388306, + "learning_rate": 2.725461143148078e-05, + "loss": 2.4073, + "step": 15212 + }, + { + "epoch": 1.2277459446372367, + "grad_norm": 0.7203015685081482, + "learning_rate": 2.724378005434772e-05, + "loss": 2.4901, + "step": 15213 + }, + { + "epoch": 1.2278266483738198, + "grad_norm": 0.6668895483016968, + "learning_rate": 2.723295049048985e-05, + "loss": 2.4482, + "step": 15214 + }, + { + "epoch": 1.2279073521104027, + "grad_norm": 0.7551584839820862, + "learning_rate": 2.7222122740177103e-05, + "loss": 2.4877, + "step": 15215 + }, + { + "epoch": 1.2279880558469858, + "grad_norm": 0.707202672958374, + "learning_rate": 2.721129680367923e-05, + "loss": 2.4577, + "step": 15216 + }, + { + "epoch": 1.2280687595835686, + "grad_norm": 0.685153603553772, + "learning_rate": 2.7200472681266155e-05, + "loss": 2.476, + "step": 15217 + }, + { + "epoch": 1.2281494633201517, + "grad_norm": 0.6843041181564331, + "learning_rate": 2.718965037320762e-05, + "loss": 2.4164, + "step": 15218 + }, + { + "epoch": 1.2282301670567348, + "grad_norm": 0.6548978686332703, + "learning_rate": 2.7178829879773306e-05, + "loss": 2.4187, + "step": 15219 + }, + { + "epoch": 1.2283108707933177, + "grad_norm": 0.7037245035171509, + "learning_rate": 2.7168011201232902e-05, + "loss": 2.3621, + "step": 15220 + }, + { + "epoch": 1.2283915745299008, + "grad_norm": 0.6540676951408386, + "learning_rate": 2.7157194337856074e-05, + "loss": 2.4542, + "step": 15221 + }, + { + "epoch": 1.2284722782664836, + "grad_norm": 0.7699899673461914, + "learning_rate": 2.7146379289912338e-05, + "loss": 2.4639, + "step": 15222 + }, + { + "epoch": 1.2285529820030667, + "grad_norm": 0.7178743481636047, + "learning_rate": 2.713556605767128e-05, + "loss": 2.4222, + "step": 15223 + }, + { + "epoch": 1.2286336857396498, + "grad_norm": 0.6749793887138367, + "learning_rate": 2.7124754641402383e-05, + "loss": 2.4323, + "step": 15224 + }, + { + "epoch": 1.2287143894762327, + "grad_norm": 0.7035594582557678, + "learning_rate": 2.711394504137513e-05, + "loss": 2.4466, + "step": 15225 + }, + { + "epoch": 1.2287950932128158, + "grad_norm": 0.6518487930297852, + "learning_rate": 2.7103137257858868e-05, + "loss": 2.4969, + "step": 15226 + }, + { + "epoch": 1.2288757969493989, + "grad_norm": 0.6739057898521423, + "learning_rate": 2.7092331291122974e-05, + "loss": 2.406, + "step": 15227 + }, + { + "epoch": 1.2289565006859817, + "grad_norm": 0.6584770083427429, + "learning_rate": 2.7081527141436767e-05, + "loss": 2.4304, + "step": 15228 + }, + { + "epoch": 1.2290372044225648, + "grad_norm": 0.6846301555633545, + "learning_rate": 2.7070724809069514e-05, + "loss": 2.3995, + "step": 15229 + }, + { + "epoch": 1.2291179081591477, + "grad_norm": 0.6778364777565002, + "learning_rate": 2.705992429429044e-05, + "loss": 2.38, + "step": 15230 + }, + { + "epoch": 1.2291986118957308, + "grad_norm": 0.6957302689552307, + "learning_rate": 2.7049125597368753e-05, + "loss": 2.3973, + "step": 15231 + }, + { + "epoch": 1.2292793156323139, + "grad_norm": 0.730269193649292, + "learning_rate": 2.7038328718573514e-05, + "loss": 2.4829, + "step": 15232 + }, + { + "epoch": 1.2293600193688967, + "grad_norm": 0.7114049196243286, + "learning_rate": 2.702753365817384e-05, + "loss": 2.3902, + "step": 15233 + }, + { + "epoch": 1.2294407231054798, + "grad_norm": 0.7137531638145447, + "learning_rate": 2.7016740416438823e-05, + "loss": 2.3957, + "step": 15234 + }, + { + "epoch": 1.2295214268420627, + "grad_norm": 0.7178330421447754, + "learning_rate": 2.7005948993637386e-05, + "loss": 2.4429, + "step": 15235 + }, + { + "epoch": 1.2296021305786458, + "grad_norm": 0.6767767071723938, + "learning_rate": 2.6995159390038506e-05, + "loss": 2.4009, + "step": 15236 + }, + { + "epoch": 1.2296828343152288, + "grad_norm": 0.7713541984558105, + "learning_rate": 2.6984371605911086e-05, + "loss": 2.4326, + "step": 15237 + }, + { + "epoch": 1.2297635380518117, + "grad_norm": 0.7218228578567505, + "learning_rate": 2.6973585641523992e-05, + "loss": 2.4358, + "step": 15238 + }, + { + "epoch": 1.2298442417883948, + "grad_norm": 0.6782575249671936, + "learning_rate": 2.696280149714604e-05, + "loss": 2.3844, + "step": 15239 + }, + { + "epoch": 1.2299249455249779, + "grad_norm": 0.6825734972953796, + "learning_rate": 2.6952019173045982e-05, + "loss": 2.4621, + "step": 15240 + }, + { + "epoch": 1.2300056492615608, + "grad_norm": 0.6587522625923157, + "learning_rate": 2.6941238669492608e-05, + "loss": 2.4465, + "step": 15241 + }, + { + "epoch": 1.2300863529981438, + "grad_norm": 0.6898796558380127, + "learning_rate": 2.6930459986754498e-05, + "loss": 2.4469, + "step": 15242 + }, + { + "epoch": 1.230167056734727, + "grad_norm": 0.6764062643051147, + "learning_rate": 2.6919683125100338e-05, + "loss": 2.4476, + "step": 15243 + }, + { + "epoch": 1.2302477604713098, + "grad_norm": 0.6647047400474548, + "learning_rate": 2.6908908084798733e-05, + "loss": 2.3677, + "step": 15244 + }, + { + "epoch": 1.2303284642078929, + "grad_norm": 0.7091608047485352, + "learning_rate": 2.6898134866118174e-05, + "loss": 2.4605, + "step": 15245 + }, + { + "epoch": 1.2304091679444757, + "grad_norm": 0.691007137298584, + "learning_rate": 2.6887363469327188e-05, + "loss": 2.4397, + "step": 15246 + }, + { + "epoch": 1.2304898716810588, + "grad_norm": 0.6685532927513123, + "learning_rate": 2.6876593894694214e-05, + "loss": 2.4279, + "step": 15247 + }, + { + "epoch": 1.230570575417642, + "grad_norm": 0.684474766254425, + "learning_rate": 2.686582614248767e-05, + "loss": 2.4162, + "step": 15248 + }, + { + "epoch": 1.2306512791542248, + "grad_norm": 0.657293975353241, + "learning_rate": 2.6855060212975915e-05, + "loss": 2.4337, + "step": 15249 + }, + { + "epoch": 1.2307319828908079, + "grad_norm": 0.7136504650115967, + "learning_rate": 2.684429610642729e-05, + "loss": 2.4156, + "step": 15250 + }, + { + "epoch": 1.2308126866273907, + "grad_norm": 0.6564410924911499, + "learning_rate": 2.6833533823110013e-05, + "loss": 2.5101, + "step": 15251 + }, + { + "epoch": 1.2308933903639738, + "grad_norm": 0.6628747582435608, + "learning_rate": 2.682277336329233e-05, + "loss": 2.3933, + "step": 15252 + }, + { + "epoch": 1.230974094100557, + "grad_norm": 0.7362595796585083, + "learning_rate": 2.681201472724244e-05, + "loss": 2.4541, + "step": 15253 + }, + { + "epoch": 1.2310547978371398, + "grad_norm": 0.7604697346687317, + "learning_rate": 2.680125791522844e-05, + "loss": 2.4383, + "step": 15254 + }, + { + "epoch": 1.2311355015737229, + "grad_norm": 0.7128429412841797, + "learning_rate": 2.6790502927518434e-05, + "loss": 2.4492, + "step": 15255 + }, + { + "epoch": 1.231216205310306, + "grad_norm": 0.6761955618858337, + "learning_rate": 2.677974976438047e-05, + "loss": 2.4355, + "step": 15256 + }, + { + "epoch": 1.2312969090468888, + "grad_norm": 0.6687077879905701, + "learning_rate": 2.6768998426082538e-05, + "loss": 2.4317, + "step": 15257 + }, + { + "epoch": 1.231377612783472, + "grad_norm": 0.7423825860023499, + "learning_rate": 2.675824891289259e-05, + "loss": 2.4216, + "step": 15258 + }, + { + "epoch": 1.231458316520055, + "grad_norm": 0.671130359172821, + "learning_rate": 2.6747501225078542e-05, + "loss": 2.4775, + "step": 15259 + }, + { + "epoch": 1.2315390202566379, + "grad_norm": 0.7421461939811707, + "learning_rate": 2.6736755362908273e-05, + "loss": 2.4042, + "step": 15260 + }, + { + "epoch": 1.231619723993221, + "grad_norm": 0.7084131240844727, + "learning_rate": 2.6726011326649547e-05, + "loss": 2.4506, + "step": 15261 + }, + { + "epoch": 1.2317004277298038, + "grad_norm": 0.641852855682373, + "learning_rate": 2.671526911657015e-05, + "loss": 2.4261, + "step": 15262 + }, + { + "epoch": 1.231781131466387, + "grad_norm": 0.7627724409103394, + "learning_rate": 2.670452873293785e-05, + "loss": 2.4647, + "step": 15263 + }, + { + "epoch": 1.23186183520297, + "grad_norm": 0.6638163924217224, + "learning_rate": 2.669379017602026e-05, + "loss": 2.4208, + "step": 15264 + }, + { + "epoch": 1.2319425389395529, + "grad_norm": 0.6815361380577087, + "learning_rate": 2.668305344608505e-05, + "loss": 2.4404, + "step": 15265 + }, + { + "epoch": 1.232023242676136, + "grad_norm": 0.6466485857963562, + "learning_rate": 2.6672318543399823e-05, + "loss": 2.4327, + "step": 15266 + }, + { + "epoch": 1.2321039464127188, + "grad_norm": 0.7119305729866028, + "learning_rate": 2.6661585468232042e-05, + "loss": 2.4266, + "step": 15267 + }, + { + "epoch": 1.232184650149302, + "grad_norm": 0.7245718836784363, + "learning_rate": 2.6650854220849286e-05, + "loss": 2.4484, + "step": 15268 + }, + { + "epoch": 1.232265353885885, + "grad_norm": 0.7050287127494812, + "learning_rate": 2.6640124801518972e-05, + "loss": 2.4441, + "step": 15269 + }, + { + "epoch": 1.2323460576224678, + "grad_norm": 0.6906494498252869, + "learning_rate": 2.6629397210508556e-05, + "loss": 2.4297, + "step": 15270 + }, + { + "epoch": 1.232426761359051, + "grad_norm": 0.7224171757698059, + "learning_rate": 2.661867144808532e-05, + "loss": 2.4279, + "step": 15271 + }, + { + "epoch": 1.232507465095634, + "grad_norm": 0.688804030418396, + "learning_rate": 2.6607947514516606e-05, + "loss": 2.4741, + "step": 15272 + }, + { + "epoch": 1.232588168832217, + "grad_norm": 0.6462350487709045, + "learning_rate": 2.6597225410069726e-05, + "loss": 2.4499, + "step": 15273 + }, + { + "epoch": 1.2326688725688, + "grad_norm": 0.6860110759735107, + "learning_rate": 2.658650513501184e-05, + "loss": 2.4488, + "step": 15274 + }, + { + "epoch": 1.2327495763053828, + "grad_norm": 0.7158305644989014, + "learning_rate": 2.6575786689610138e-05, + "loss": 2.4318, + "step": 15275 + }, + { + "epoch": 1.232830280041966, + "grad_norm": 0.7740959525108337, + "learning_rate": 2.6565070074131804e-05, + "loss": 2.4824, + "step": 15276 + }, + { + "epoch": 1.232910983778549, + "grad_norm": 0.7573856711387634, + "learning_rate": 2.6554355288843847e-05, + "loss": 2.4034, + "step": 15277 + }, + { + "epoch": 1.2329916875151319, + "grad_norm": 0.6809369921684265, + "learning_rate": 2.654364233401332e-05, + "loss": 2.5085, + "step": 15278 + }, + { + "epoch": 1.233072391251715, + "grad_norm": 0.6695643067359924, + "learning_rate": 2.6532931209907307e-05, + "loss": 2.4697, + "step": 15279 + }, + { + "epoch": 1.2331530949882978, + "grad_norm": 0.7218750715255737, + "learning_rate": 2.6522221916792655e-05, + "loss": 2.4753, + "step": 15280 + }, + { + "epoch": 1.233233798724881, + "grad_norm": 0.8171822428703308, + "learning_rate": 2.6511514454936314e-05, + "loss": 2.45, + "step": 15281 + }, + { + "epoch": 1.233314502461464, + "grad_norm": 0.7234573364257812, + "learning_rate": 2.6500808824605162e-05, + "loss": 2.3963, + "step": 15282 + }, + { + "epoch": 1.2333952061980469, + "grad_norm": 0.6993409395217896, + "learning_rate": 2.6490105026065948e-05, + "loss": 2.4449, + "step": 15283 + }, + { + "epoch": 1.23347590993463, + "grad_norm": 0.7984449863433838, + "learning_rate": 2.6479403059585472e-05, + "loss": 2.4322, + "step": 15284 + }, + { + "epoch": 1.233556613671213, + "grad_norm": 0.683971107006073, + "learning_rate": 2.6468702925430466e-05, + "loss": 2.4125, + "step": 15285 + }, + { + "epoch": 1.233637317407796, + "grad_norm": 0.6739822626113892, + "learning_rate": 2.6458004623867617e-05, + "loss": 2.4487, + "step": 15286 + }, + { + "epoch": 1.233718021144379, + "grad_norm": 0.7003912925720215, + "learning_rate": 2.644730815516351e-05, + "loss": 2.4437, + "step": 15287 + }, + { + "epoch": 1.233798724880962, + "grad_norm": 0.7011744379997253, + "learning_rate": 2.643661351958474e-05, + "loss": 2.4798, + "step": 15288 + }, + { + "epoch": 1.233879428617545, + "grad_norm": 0.7003397941589355, + "learning_rate": 2.6425920717397867e-05, + "loss": 2.4554, + "step": 15289 + }, + { + "epoch": 1.233960132354128, + "grad_norm": 0.6682165265083313, + "learning_rate": 2.6415229748869374e-05, + "loss": 2.4252, + "step": 15290 + }, + { + "epoch": 1.234040836090711, + "grad_norm": 0.6712457537651062, + "learning_rate": 2.6404540614265715e-05, + "loss": 2.4225, + "step": 15291 + }, + { + "epoch": 1.234121539827294, + "grad_norm": 0.654464602470398, + "learning_rate": 2.63938533138533e-05, + "loss": 2.4462, + "step": 15292 + }, + { + "epoch": 1.234202243563877, + "grad_norm": 0.7311797738075256, + "learning_rate": 2.638316784789845e-05, + "loss": 2.502, + "step": 15293 + }, + { + "epoch": 1.23428294730046, + "grad_norm": 0.6836559176445007, + "learning_rate": 2.6372484216667492e-05, + "loss": 2.5134, + "step": 15294 + }, + { + "epoch": 1.234363651037043, + "grad_norm": 0.6961826086044312, + "learning_rate": 2.636180242042672e-05, + "loss": 2.4479, + "step": 15295 + }, + { + "epoch": 1.234444354773626, + "grad_norm": 0.6824259161949158, + "learning_rate": 2.635112245944229e-05, + "loss": 2.4299, + "step": 15296 + }, + { + "epoch": 1.234525058510209, + "grad_norm": 0.7594609260559082, + "learning_rate": 2.634044433398042e-05, + "loss": 2.4469, + "step": 15297 + }, + { + "epoch": 1.234605762246792, + "grad_norm": 0.7044653296470642, + "learning_rate": 2.632976804430721e-05, + "loss": 2.447, + "step": 15298 + }, + { + "epoch": 1.234686465983375, + "grad_norm": 0.6986916065216064, + "learning_rate": 2.631909359068876e-05, + "loss": 2.4705, + "step": 15299 + }, + { + "epoch": 1.234767169719958, + "grad_norm": 0.7025431990623474, + "learning_rate": 2.630842097339111e-05, + "loss": 2.3951, + "step": 15300 + }, + { + "epoch": 1.2348478734565411, + "grad_norm": 0.6533786058425903, + "learning_rate": 2.6297750192680237e-05, + "loss": 2.3769, + "step": 15301 + }, + { + "epoch": 1.234928577193124, + "grad_norm": 0.6575472354888916, + "learning_rate": 2.628708124882212e-05, + "loss": 2.4293, + "step": 15302 + }, + { + "epoch": 1.235009280929707, + "grad_norm": 0.6712046265602112, + "learning_rate": 2.6276414142082584e-05, + "loss": 2.4819, + "step": 15303 + }, + { + "epoch": 1.2350899846662902, + "grad_norm": 0.6947652101516724, + "learning_rate": 2.6265748872727535e-05, + "loss": 2.449, + "step": 15304 + }, + { + "epoch": 1.235170688402873, + "grad_norm": 0.6881443858146667, + "learning_rate": 2.62550854410228e-05, + "loss": 2.3991, + "step": 15305 + }, + { + "epoch": 1.2352513921394561, + "grad_norm": 0.6681519746780396, + "learning_rate": 2.624442384723407e-05, + "loss": 2.4005, + "step": 15306 + }, + { + "epoch": 1.235332095876039, + "grad_norm": 0.6728120446205139, + "learning_rate": 2.62337640916271e-05, + "loss": 2.4242, + "step": 15307 + }, + { + "epoch": 1.235412799612622, + "grad_norm": 0.707360029220581, + "learning_rate": 2.622310617446755e-05, + "loss": 2.4385, + "step": 15308 + }, + { + "epoch": 1.2354935033492052, + "grad_norm": 0.6890079975128174, + "learning_rate": 2.6212450096021058e-05, + "loss": 2.443, + "step": 15309 + }, + { + "epoch": 1.235574207085788, + "grad_norm": 0.7022379636764526, + "learning_rate": 2.620179585655318e-05, + "loss": 2.3982, + "step": 15310 + }, + { + "epoch": 1.235654910822371, + "grad_norm": 0.7283182740211487, + "learning_rate": 2.61911434563295e-05, + "loss": 2.4197, + "step": 15311 + }, + { + "epoch": 1.235735614558954, + "grad_norm": 0.6721852421760559, + "learning_rate": 2.6180492895615426e-05, + "loss": 2.4356, + "step": 15312 + }, + { + "epoch": 1.235816318295537, + "grad_norm": 0.6817916631698608, + "learning_rate": 2.616984417467645e-05, + "loss": 2.4325, + "step": 15313 + }, + { + "epoch": 1.2358970220321202, + "grad_norm": 0.6826596260070801, + "learning_rate": 2.6159197293777972e-05, + "loss": 2.4043, + "step": 15314 + }, + { + "epoch": 1.235977725768703, + "grad_norm": 0.7135530114173889, + "learning_rate": 2.6148552253185288e-05, + "loss": 2.4269, + "step": 15315 + }, + { + "epoch": 1.236058429505286, + "grad_norm": 0.7027753591537476, + "learning_rate": 2.6137909053163722e-05, + "loss": 2.4266, + "step": 15316 + }, + { + "epoch": 1.2361391332418692, + "grad_norm": 0.6597041487693787, + "learning_rate": 2.6127267693978552e-05, + "loss": 2.4073, + "step": 15317 + }, + { + "epoch": 1.236219836978452, + "grad_norm": 0.6450026631355286, + "learning_rate": 2.6116628175894974e-05, + "loss": 2.4299, + "step": 15318 + }, + { + "epoch": 1.2363005407150351, + "grad_norm": 0.7740476727485657, + "learning_rate": 2.6105990499178156e-05, + "loss": 2.4088, + "step": 15319 + }, + { + "epoch": 1.2363812444516182, + "grad_norm": 0.6460183262825012, + "learning_rate": 2.609535466409322e-05, + "loss": 2.4311, + "step": 15320 + }, + { + "epoch": 1.236461948188201, + "grad_norm": 0.6514838337898254, + "learning_rate": 2.608472067090525e-05, + "loss": 2.4069, + "step": 15321 + }, + { + "epoch": 1.2365426519247842, + "grad_norm": 0.7281234860420227, + "learning_rate": 2.6074088519879237e-05, + "loss": 2.4245, + "step": 15322 + }, + { + "epoch": 1.236623355661367, + "grad_norm": 0.752983570098877, + "learning_rate": 2.606345821128018e-05, + "loss": 2.4149, + "step": 15323 + }, + { + "epoch": 1.2367040593979501, + "grad_norm": 0.6912856101989746, + "learning_rate": 2.6052829745373054e-05, + "loss": 2.4489, + "step": 15324 + }, + { + "epoch": 1.236784763134533, + "grad_norm": 0.6719293594360352, + "learning_rate": 2.604220312242267e-05, + "loss": 2.457, + "step": 15325 + }, + { + "epoch": 1.236865466871116, + "grad_norm": 0.7440586090087891, + "learning_rate": 2.6031578342693918e-05, + "loss": 2.4657, + "step": 15326 + }, + { + "epoch": 1.2369461706076992, + "grad_norm": 0.694442629814148, + "learning_rate": 2.602095540645162e-05, + "loss": 2.4422, + "step": 15327 + }, + { + "epoch": 1.237026874344282, + "grad_norm": 0.7186843752861023, + "learning_rate": 2.601033431396046e-05, + "loss": 2.4229, + "step": 15328 + }, + { + "epoch": 1.2371075780808651, + "grad_norm": 0.7401825785636902, + "learning_rate": 2.5999715065485153e-05, + "loss": 2.45, + "step": 15329 + }, + { + "epoch": 1.2371882818174482, + "grad_norm": 0.6710138916969299, + "learning_rate": 2.598909766129045e-05, + "loss": 2.4074, + "step": 15330 + }, + { + "epoch": 1.237268985554031, + "grad_norm": 0.7867769598960876, + "learning_rate": 2.5978482101640867e-05, + "loss": 2.4709, + "step": 15331 + }, + { + "epoch": 1.2373496892906142, + "grad_norm": 0.7076219916343689, + "learning_rate": 2.5967868386801e-05, + "loss": 2.4887, + "step": 15332 + }, + { + "epoch": 1.2374303930271973, + "grad_norm": 0.7277626991271973, + "learning_rate": 2.5957256517035378e-05, + "loss": 2.4295, + "step": 15333 + }, + { + "epoch": 1.2375110967637801, + "grad_norm": 0.7339804768562317, + "learning_rate": 2.5946646492608506e-05, + "loss": 2.4624, + "step": 15334 + }, + { + "epoch": 1.2375918005003632, + "grad_norm": 0.6707656383514404, + "learning_rate": 2.593603831378475e-05, + "loss": 2.4159, + "step": 15335 + }, + { + "epoch": 1.237672504236946, + "grad_norm": 0.7118813991546631, + "learning_rate": 2.592543198082852e-05, + "loss": 2.4496, + "step": 15336 + }, + { + "epoch": 1.2377532079735292, + "grad_norm": 0.675167977809906, + "learning_rate": 2.591482749400419e-05, + "loss": 2.4519, + "step": 15337 + }, + { + "epoch": 1.2378339117101123, + "grad_norm": 0.8245306611061096, + "learning_rate": 2.5904224853575986e-05, + "loss": 2.4732, + "step": 15338 + }, + { + "epoch": 1.2379146154466951, + "grad_norm": 0.7411863207817078, + "learning_rate": 2.5893624059808184e-05, + "loss": 2.4458, + "step": 15339 + }, + { + "epoch": 1.2379953191832782, + "grad_norm": 0.6864522695541382, + "learning_rate": 2.5883025112964997e-05, + "loss": 2.4264, + "step": 15340 + }, + { + "epoch": 1.238076022919861, + "grad_norm": 0.6585919260978699, + "learning_rate": 2.5872428013310567e-05, + "loss": 2.3904, + "step": 15341 + }, + { + "epoch": 1.2381567266564442, + "grad_norm": 0.6605508327484131, + "learning_rate": 2.5861832761108995e-05, + "loss": 2.4828, + "step": 15342 + }, + { + "epoch": 1.2382374303930272, + "grad_norm": 0.7353223562240601, + "learning_rate": 2.5851239356624392e-05, + "loss": 2.4335, + "step": 15343 + }, + { + "epoch": 1.2383181341296101, + "grad_norm": 0.6907783150672913, + "learning_rate": 2.5840647800120688e-05, + "loss": 2.4394, + "step": 15344 + }, + { + "epoch": 1.2383988378661932, + "grad_norm": 0.7239590287208557, + "learning_rate": 2.5830058091861896e-05, + "loss": 2.4221, + "step": 15345 + }, + { + "epoch": 1.2384795416027763, + "grad_norm": 0.7001412510871887, + "learning_rate": 2.5819470232111975e-05, + "loss": 2.4521, + "step": 15346 + }, + { + "epoch": 1.2385602453393592, + "grad_norm": 0.6983658671379089, + "learning_rate": 2.580888422113473e-05, + "loss": 2.4839, + "step": 15347 + }, + { + "epoch": 1.2386409490759422, + "grad_norm": 0.7829005718231201, + "learning_rate": 2.5798300059194037e-05, + "loss": 2.4546, + "step": 15348 + }, + { + "epoch": 1.2387216528125253, + "grad_norm": 0.7248061299324036, + "learning_rate": 2.5787717746553664e-05, + "loss": 2.4341, + "step": 15349 + }, + { + "epoch": 1.2388023565491082, + "grad_norm": 0.7921163439750671, + "learning_rate": 2.577713728347736e-05, + "loss": 2.475, + "step": 15350 + }, + { + "epoch": 1.2388830602856913, + "grad_norm": 0.6571238040924072, + "learning_rate": 2.5766558670228813e-05, + "loss": 2.4636, + "step": 15351 + }, + { + "epoch": 1.2389637640222741, + "grad_norm": 0.7436683177947998, + "learning_rate": 2.575598190707168e-05, + "loss": 2.4868, + "step": 15352 + }, + { + "epoch": 1.2390444677588572, + "grad_norm": 0.6471900939941406, + "learning_rate": 2.5745406994269573e-05, + "loss": 2.4349, + "step": 15353 + }, + { + "epoch": 1.2391251714954403, + "grad_norm": 0.6612011194229126, + "learning_rate": 2.5734833932086012e-05, + "loss": 2.4088, + "step": 15354 + }, + { + "epoch": 1.2392058752320232, + "grad_norm": 0.6882977485656738, + "learning_rate": 2.572426272078451e-05, + "loss": 2.4344, + "step": 15355 + }, + { + "epoch": 1.2392865789686063, + "grad_norm": 0.6836830973625183, + "learning_rate": 2.5713693360628565e-05, + "loss": 2.4325, + "step": 15356 + }, + { + "epoch": 1.2393672827051891, + "grad_norm": 0.712127149105072, + "learning_rate": 2.5703125851881536e-05, + "loss": 2.4505, + "step": 15357 + }, + { + "epoch": 1.2394479864417722, + "grad_norm": 0.7162468433380127, + "learning_rate": 2.5692560194806837e-05, + "loss": 2.4167, + "step": 15358 + }, + { + "epoch": 1.2395286901783553, + "grad_norm": 0.7770177125930786, + "learning_rate": 2.568199638966777e-05, + "loss": 2.4072, + "step": 15359 + }, + { + "epoch": 1.2396093939149382, + "grad_norm": 0.7049651741981506, + "learning_rate": 2.5671434436727636e-05, + "loss": 2.434, + "step": 15360 + }, + { + "epoch": 1.2396900976515213, + "grad_norm": 0.7793349027633667, + "learning_rate": 2.566087433624964e-05, + "loss": 2.4762, + "step": 15361 + }, + { + "epoch": 1.2397708013881044, + "grad_norm": 0.6776690483093262, + "learning_rate": 2.5650316088497018e-05, + "loss": 2.402, + "step": 15362 + }, + { + "epoch": 1.2398515051246872, + "grad_norm": 0.7207701802253723, + "learning_rate": 2.5639759693732834e-05, + "loss": 2.4398, + "step": 15363 + }, + { + "epoch": 1.2399322088612703, + "grad_norm": 0.759787917137146, + "learning_rate": 2.5629205152220215e-05, + "loss": 2.4268, + "step": 15364 + }, + { + "epoch": 1.2400129125978534, + "grad_norm": 0.6906142830848694, + "learning_rate": 2.5618652464222215e-05, + "loss": 2.4075, + "step": 15365 + }, + { + "epoch": 1.2400936163344363, + "grad_norm": 0.7002954483032227, + "learning_rate": 2.560810163000187e-05, + "loss": 2.4516, + "step": 15366 + }, + { + "epoch": 1.2401743200710194, + "grad_norm": 0.7287559509277344, + "learning_rate": 2.5597552649822053e-05, + "loss": 2.4975, + "step": 15367 + }, + { + "epoch": 1.2402550238076022, + "grad_norm": 0.6523926854133606, + "learning_rate": 2.558700552394572e-05, + "loss": 2.4085, + "step": 15368 + }, + { + "epoch": 1.2403357275441853, + "grad_norm": 0.7289387583732605, + "learning_rate": 2.5576460252635727e-05, + "loss": 2.4789, + "step": 15369 + }, + { + "epoch": 1.2404164312807684, + "grad_norm": 0.6613432765007019, + "learning_rate": 2.5565916836154878e-05, + "loss": 2.4263, + "step": 15370 + }, + { + "epoch": 1.2404971350173513, + "grad_norm": 0.7275245785713196, + "learning_rate": 2.555537527476597e-05, + "loss": 2.4652, + "step": 15371 + }, + { + "epoch": 1.2405778387539343, + "grad_norm": 0.6726976037025452, + "learning_rate": 2.554483556873173e-05, + "loss": 2.4092, + "step": 15372 + }, + { + "epoch": 1.2406585424905172, + "grad_norm": 0.6908233761787415, + "learning_rate": 2.5534297718314794e-05, + "loss": 2.3678, + "step": 15373 + }, + { + "epoch": 1.2407392462271003, + "grad_norm": 0.6893147826194763, + "learning_rate": 2.5523761723777806e-05, + "loss": 2.4625, + "step": 15374 + }, + { + "epoch": 1.2408199499636834, + "grad_norm": 0.7640267014503479, + "learning_rate": 2.551322758538339e-05, + "loss": 2.446, + "step": 15375 + }, + { + "epoch": 1.2409006537002663, + "grad_norm": 0.7187458276748657, + "learning_rate": 2.550269530339402e-05, + "loss": 2.4215, + "step": 15376 + }, + { + "epoch": 1.2409813574368493, + "grad_norm": 0.8041789531707764, + "learning_rate": 2.5492164878072234e-05, + "loss": 2.5085, + "step": 15377 + }, + { + "epoch": 1.2410620611734324, + "grad_norm": 0.6582188010215759, + "learning_rate": 2.5481636309680445e-05, + "loss": 2.467, + "step": 15378 + }, + { + "epoch": 1.2411427649100153, + "grad_norm": 0.705731213092804, + "learning_rate": 2.5471109598481112e-05, + "loss": 2.3764, + "step": 15379 + }, + { + "epoch": 1.2412234686465984, + "grad_norm": 0.6918940544128418, + "learning_rate": 2.5460584744736495e-05, + "loss": 2.4513, + "step": 15380 + }, + { + "epoch": 1.2413041723831812, + "grad_norm": 0.7402673959732056, + "learning_rate": 2.5450061748708975e-05, + "loss": 2.5133, + "step": 15381 + }, + { + "epoch": 1.2413848761197643, + "grad_norm": 0.6740667223930359, + "learning_rate": 2.543954061066083e-05, + "loss": 2.4649, + "step": 15382 + }, + { + "epoch": 1.2414655798563474, + "grad_norm": 0.6665407419204712, + "learning_rate": 2.5429021330854197e-05, + "loss": 2.4321, + "step": 15383 + }, + { + "epoch": 1.2415462835929303, + "grad_norm": 0.7324530482292175, + "learning_rate": 2.5418503909551296e-05, + "loss": 2.3574, + "step": 15384 + }, + { + "epoch": 1.2416269873295134, + "grad_norm": 0.7117868661880493, + "learning_rate": 2.5407988347014255e-05, + "loss": 2.4552, + "step": 15385 + }, + { + "epoch": 1.2417076910660962, + "grad_norm": 0.7162930965423584, + "learning_rate": 2.5397474643505103e-05, + "loss": 2.4135, + "step": 15386 + }, + { + "epoch": 1.2417883948026793, + "grad_norm": 0.7301257848739624, + "learning_rate": 2.5386962799285895e-05, + "loss": 2.4277, + "step": 15387 + }, + { + "epoch": 1.2418690985392624, + "grad_norm": 0.7404977679252625, + "learning_rate": 2.5376452814618645e-05, + "loss": 2.478, + "step": 15388 + }, + { + "epoch": 1.2419498022758453, + "grad_norm": 0.6546272039413452, + "learning_rate": 2.536594468976522e-05, + "loss": 2.4879, + "step": 15389 + }, + { + "epoch": 1.2420305060124284, + "grad_norm": 0.6501599550247192, + "learning_rate": 2.5355438424987565e-05, + "loss": 2.3964, + "step": 15390 + }, + { + "epoch": 1.2421112097490115, + "grad_norm": 0.6711748242378235, + "learning_rate": 2.5344934020547496e-05, + "loss": 2.4123, + "step": 15391 + }, + { + "epoch": 1.2421919134855943, + "grad_norm": 0.6803534030914307, + "learning_rate": 2.5334431476706823e-05, + "loss": 2.4271, + "step": 15392 + }, + { + "epoch": 1.2422726172221774, + "grad_norm": 0.7407296299934387, + "learning_rate": 2.5323930793727302e-05, + "loss": 2.49, + "step": 15393 + }, + { + "epoch": 1.2423533209587605, + "grad_norm": 0.701870858669281, + "learning_rate": 2.5313431971870617e-05, + "loss": 2.4534, + "step": 15394 + }, + { + "epoch": 1.2424340246953434, + "grad_norm": 0.6658090353012085, + "learning_rate": 2.5302935011398475e-05, + "loss": 2.4581, + "step": 15395 + }, + { + "epoch": 1.2425147284319265, + "grad_norm": 0.6616473197937012, + "learning_rate": 2.529243991257243e-05, + "loss": 2.4169, + "step": 15396 + }, + { + "epoch": 1.2425954321685093, + "grad_norm": 0.6714773178100586, + "learning_rate": 2.5281946675654067e-05, + "loss": 2.4159, + "step": 15397 + }, + { + "epoch": 1.2426761359050924, + "grad_norm": 0.6789337396621704, + "learning_rate": 2.5271455300904935e-05, + "loss": 2.4211, + "step": 15398 + }, + { + "epoch": 1.2427568396416755, + "grad_norm": 0.6793739795684814, + "learning_rate": 2.5260965788586456e-05, + "loss": 2.4337, + "step": 15399 + }, + { + "epoch": 1.2428375433782584, + "grad_norm": 0.6432294249534607, + "learning_rate": 2.5250478138960076e-05, + "loss": 2.4268, + "step": 15400 + }, + { + "epoch": 1.2429182471148414, + "grad_norm": 0.6960669159889221, + "learning_rate": 2.523999235228718e-05, + "loss": 2.3535, + "step": 15401 + }, + { + "epoch": 1.2429989508514243, + "grad_norm": 0.6724488735198975, + "learning_rate": 2.5229508428829096e-05, + "loss": 2.4294, + "step": 15402 + }, + { + "epoch": 1.2430796545880074, + "grad_norm": 0.636105477809906, + "learning_rate": 2.521902636884711e-05, + "loss": 2.4438, + "step": 15403 + }, + { + "epoch": 1.2431603583245905, + "grad_norm": 0.6865580677986145, + "learning_rate": 2.52085461726025e-05, + "loss": 2.4473, + "step": 15404 + }, + { + "epoch": 1.2432410620611734, + "grad_norm": 0.6740261316299438, + "learning_rate": 2.5198067840356398e-05, + "loss": 2.4642, + "step": 15405 + }, + { + "epoch": 1.2433217657977564, + "grad_norm": 0.7241789698600769, + "learning_rate": 2.518759137236998e-05, + "loss": 2.4294, + "step": 15406 + }, + { + "epoch": 1.2434024695343395, + "grad_norm": 0.6839794516563416, + "learning_rate": 2.5177116768904373e-05, + "loss": 2.4697, + "step": 15407 + }, + { + "epoch": 1.2434831732709224, + "grad_norm": 0.677390992641449, + "learning_rate": 2.5166644030220578e-05, + "loss": 2.4411, + "step": 15408 + }, + { + "epoch": 1.2435638770075055, + "grad_norm": 0.709065854549408, + "learning_rate": 2.515617315657962e-05, + "loss": 2.4392, + "step": 15409 + }, + { + "epoch": 1.2436445807440886, + "grad_norm": 0.6735498905181885, + "learning_rate": 2.514570414824249e-05, + "loss": 2.3924, + "step": 15410 + }, + { + "epoch": 1.2437252844806714, + "grad_norm": 0.6729374527931213, + "learning_rate": 2.513523700547007e-05, + "loss": 2.4464, + "step": 15411 + }, + { + "epoch": 1.2438059882172545, + "grad_norm": 0.7232720851898193, + "learning_rate": 2.5124771728523244e-05, + "loss": 2.3975, + "step": 15412 + }, + { + "epoch": 1.2438866919538374, + "grad_norm": 0.7467584609985352, + "learning_rate": 2.5114308317662837e-05, + "loss": 2.4191, + "step": 15413 + }, + { + "epoch": 1.2439673956904205, + "grad_norm": 0.6951141953468323, + "learning_rate": 2.5103846773149642e-05, + "loss": 2.4207, + "step": 15414 + }, + { + "epoch": 1.2440480994270036, + "grad_norm": 0.6427489519119263, + "learning_rate": 2.5093387095244336e-05, + "loss": 2.3539, + "step": 15415 + }, + { + "epoch": 1.2441288031635864, + "grad_norm": 0.729580283164978, + "learning_rate": 2.5082929284207644e-05, + "loss": 2.4464, + "step": 15416 + }, + { + "epoch": 1.2442095069001695, + "grad_norm": 0.7247009873390198, + "learning_rate": 2.5072473340300207e-05, + "loss": 2.4294, + "step": 15417 + }, + { + "epoch": 1.2442902106367524, + "grad_norm": 0.7037674784660339, + "learning_rate": 2.5062019263782577e-05, + "loss": 2.4294, + "step": 15418 + }, + { + "epoch": 1.2443709143733355, + "grad_norm": 0.6997841596603394, + "learning_rate": 2.5051567054915303e-05, + "loss": 2.4976, + "step": 15419 + }, + { + "epoch": 1.2444516181099186, + "grad_norm": 0.7001172304153442, + "learning_rate": 2.504111671395891e-05, + "loss": 2.371, + "step": 15420 + }, + { + "epoch": 1.2445323218465014, + "grad_norm": 0.6781473159790039, + "learning_rate": 2.5030668241173827e-05, + "loss": 2.4124, + "step": 15421 + }, + { + "epoch": 1.2446130255830845, + "grad_norm": 0.7053182125091553, + "learning_rate": 2.5020221636820463e-05, + "loss": 2.4109, + "step": 15422 + }, + { + "epoch": 1.2446937293196676, + "grad_norm": 0.68635493516922, + "learning_rate": 2.50097769011592e-05, + "loss": 2.4548, + "step": 15423 + }, + { + "epoch": 1.2447744330562505, + "grad_norm": 0.7015564441680908, + "learning_rate": 2.4999334034450293e-05, + "loss": 2.4537, + "step": 15424 + }, + { + "epoch": 1.2448551367928335, + "grad_norm": 0.694054901599884, + "learning_rate": 2.4988893036954043e-05, + "loss": 2.4396, + "step": 15425 + }, + { + "epoch": 1.2449358405294164, + "grad_norm": 0.702518880367279, + "learning_rate": 2.4978453908930665e-05, + "loss": 2.4015, + "step": 15426 + }, + { + "epoch": 1.2450165442659995, + "grad_norm": 0.7237387895584106, + "learning_rate": 2.4968016650640348e-05, + "loss": 2.4257, + "step": 15427 + }, + { + "epoch": 1.2450972480025826, + "grad_norm": 0.7133163809776306, + "learning_rate": 2.4957581262343154e-05, + "loss": 2.4532, + "step": 15428 + }, + { + "epoch": 1.2451779517391655, + "grad_norm": 0.8339287042617798, + "learning_rate": 2.4947147744299203e-05, + "loss": 2.4621, + "step": 15429 + }, + { + "epoch": 1.2452586554757485, + "grad_norm": 0.7620034217834473, + "learning_rate": 2.493671609676852e-05, + "loss": 2.365, + "step": 15430 + }, + { + "epoch": 1.2453393592123314, + "grad_norm": 0.7445465922355652, + "learning_rate": 2.4926286320011094e-05, + "loss": 2.4764, + "step": 15431 + }, + { + "epoch": 1.2454200629489145, + "grad_norm": 0.7366160154342651, + "learning_rate": 2.4915858414286852e-05, + "loss": 2.4597, + "step": 15432 + }, + { + "epoch": 1.2455007666854976, + "grad_norm": 0.7098437547683716, + "learning_rate": 2.490543237985572e-05, + "loss": 2.4202, + "step": 15433 + }, + { + "epoch": 1.2455814704220805, + "grad_norm": 0.6483333706855774, + "learning_rate": 2.4895008216977478e-05, + "loss": 2.4108, + "step": 15434 + }, + { + "epoch": 1.2456621741586635, + "grad_norm": 0.6797904968261719, + "learning_rate": 2.4884585925911963e-05, + "loss": 2.4414, + "step": 15435 + }, + { + "epoch": 1.2457428778952466, + "grad_norm": 0.6853424310684204, + "learning_rate": 2.4874165506918957e-05, + "loss": 2.4226, + "step": 15436 + }, + { + "epoch": 1.2458235816318295, + "grad_norm": 0.6861590147018433, + "learning_rate": 2.4863746960258094e-05, + "loss": 2.3748, + "step": 15437 + }, + { + "epoch": 1.2459042853684126, + "grad_norm": 0.7360263466835022, + "learning_rate": 2.4853330286189058e-05, + "loss": 2.4441, + "step": 15438 + }, + { + "epoch": 1.2459849891049957, + "grad_norm": 0.6894183158874512, + "learning_rate": 2.4842915484971496e-05, + "loss": 2.3495, + "step": 15439 + }, + { + "epoch": 1.2460656928415785, + "grad_norm": 0.7570669651031494, + "learning_rate": 2.4832502556864923e-05, + "loss": 2.4622, + "step": 15440 + }, + { + "epoch": 1.2461463965781616, + "grad_norm": 0.6986069083213806, + "learning_rate": 2.4822091502128876e-05, + "loss": 2.3647, + "step": 15441 + }, + { + "epoch": 1.2462271003147445, + "grad_norm": 0.681450366973877, + "learning_rate": 2.481168232102279e-05, + "loss": 2.3872, + "step": 15442 + }, + { + "epoch": 1.2463078040513276, + "grad_norm": 0.7241837978363037, + "learning_rate": 2.480127501380618e-05, + "loss": 2.4692, + "step": 15443 + }, + { + "epoch": 1.2463885077879107, + "grad_norm": 0.6575295329093933, + "learning_rate": 2.479086958073834e-05, + "loss": 2.5057, + "step": 15444 + }, + { + "epoch": 1.2464692115244935, + "grad_norm": 0.7289770841598511, + "learning_rate": 2.478046602207864e-05, + "loss": 2.4164, + "step": 15445 + }, + { + "epoch": 1.2465499152610766, + "grad_norm": 0.6682024598121643, + "learning_rate": 2.4770064338086374e-05, + "loss": 2.4466, + "step": 15446 + }, + { + "epoch": 1.2466306189976595, + "grad_norm": 0.7238918542861938, + "learning_rate": 2.475966452902072e-05, + "loss": 2.4367, + "step": 15447 + }, + { + "epoch": 1.2467113227342426, + "grad_norm": 0.6825705170631409, + "learning_rate": 2.4749266595140918e-05, + "loss": 2.4337, + "step": 15448 + }, + { + "epoch": 1.2467920264708257, + "grad_norm": 0.7352269887924194, + "learning_rate": 2.4738870536706126e-05, + "loss": 2.4103, + "step": 15449 + }, + { + "epoch": 1.2468727302074085, + "grad_norm": 0.658930778503418, + "learning_rate": 2.4728476353975394e-05, + "loss": 2.4281, + "step": 15450 + }, + { + "epoch": 1.2469534339439916, + "grad_norm": 0.6933601498603821, + "learning_rate": 2.4718084047207778e-05, + "loss": 2.4502, + "step": 15451 + }, + { + "epoch": 1.2470341376805747, + "grad_norm": 0.6901879906654358, + "learning_rate": 2.4707693616662308e-05, + "loss": 2.4057, + "step": 15452 + }, + { + "epoch": 1.2471148414171576, + "grad_norm": 0.7648913860321045, + "learning_rate": 2.469730506259792e-05, + "loss": 2.4163, + "step": 15453 + }, + { + "epoch": 1.2471955451537406, + "grad_norm": 0.6496175527572632, + "learning_rate": 2.4686918385273537e-05, + "loss": 2.4373, + "step": 15454 + }, + { + "epoch": 1.2472762488903237, + "grad_norm": 0.6949105858802795, + "learning_rate": 2.4676533584948048e-05, + "loss": 2.4108, + "step": 15455 + }, + { + "epoch": 1.2473569526269066, + "grad_norm": 0.7018688321113586, + "learning_rate": 2.4666150661880206e-05, + "loss": 2.4589, + "step": 15456 + }, + { + "epoch": 1.2474376563634897, + "grad_norm": 0.7141219973564148, + "learning_rate": 2.4655769616328827e-05, + "loss": 2.4022, + "step": 15457 + }, + { + "epoch": 1.2475183601000726, + "grad_norm": 0.7276743054389954, + "learning_rate": 2.4645390448552608e-05, + "loss": 2.4443, + "step": 15458 + }, + { + "epoch": 1.2475990638366556, + "grad_norm": 0.6861153244972229, + "learning_rate": 2.463501315881027e-05, + "loss": 2.4478, + "step": 15459 + }, + { + "epoch": 1.2476797675732387, + "grad_norm": 0.7252256274223328, + "learning_rate": 2.462463774736038e-05, + "loss": 2.446, + "step": 15460 + }, + { + "epoch": 1.2477604713098216, + "grad_norm": 0.6914857625961304, + "learning_rate": 2.4614264214461557e-05, + "loss": 2.4294, + "step": 15461 + }, + { + "epoch": 1.2478411750464047, + "grad_norm": 0.6815036535263062, + "learning_rate": 2.460389256037232e-05, + "loss": 2.4389, + "step": 15462 + }, + { + "epoch": 1.2479218787829875, + "grad_norm": 0.7420194745063782, + "learning_rate": 2.4593522785351176e-05, + "loss": 2.4932, + "step": 15463 + }, + { + "epoch": 1.2480025825195706, + "grad_norm": 0.6622182130813599, + "learning_rate": 2.4583154889656556e-05, + "loss": 2.4327, + "step": 15464 + }, + { + "epoch": 1.2480832862561537, + "grad_norm": 0.6527934074401855, + "learning_rate": 2.457278887354689e-05, + "loss": 2.3857, + "step": 15465 + }, + { + "epoch": 1.2481639899927366, + "grad_norm": 0.6942344903945923, + "learning_rate": 2.4562424737280465e-05, + "loss": 2.4181, + "step": 15466 + }, + { + "epoch": 1.2482446937293197, + "grad_norm": 0.7449823021888733, + "learning_rate": 2.45520624811156e-05, + "loss": 2.4575, + "step": 15467 + }, + { + "epoch": 1.2483253974659028, + "grad_norm": 0.6905208826065063, + "learning_rate": 2.4541702105310605e-05, + "loss": 2.3858, + "step": 15468 + }, + { + "epoch": 1.2484061012024856, + "grad_norm": 0.6928502917289734, + "learning_rate": 2.4531343610123603e-05, + "loss": 2.4212, + "step": 15469 + }, + { + "epoch": 1.2484868049390687, + "grad_norm": 0.7182145118713379, + "learning_rate": 2.45209869958128e-05, + "loss": 2.4063, + "step": 15470 + }, + { + "epoch": 1.2485675086756518, + "grad_norm": 0.7379452586174011, + "learning_rate": 2.4510632262636314e-05, + "loss": 2.4612, + "step": 15471 + }, + { + "epoch": 1.2486482124122347, + "grad_norm": 0.6663349270820618, + "learning_rate": 2.450027941085219e-05, + "loss": 2.4583, + "step": 15472 + }, + { + "epoch": 1.2487289161488178, + "grad_norm": 0.7266560792922974, + "learning_rate": 2.4489928440718467e-05, + "loss": 2.4483, + "step": 15473 + }, + { + "epoch": 1.2488096198854006, + "grad_norm": 0.7046550512313843, + "learning_rate": 2.447957935249311e-05, + "loss": 2.4087, + "step": 15474 + }, + { + "epoch": 1.2488903236219837, + "grad_norm": 0.684248685836792, + "learning_rate": 2.4469232146434084e-05, + "loss": 2.4352, + "step": 15475 + }, + { + "epoch": 1.2489710273585668, + "grad_norm": 0.6864973902702332, + "learning_rate": 2.4458886822799198e-05, + "loss": 2.3872, + "step": 15476 + }, + { + "epoch": 1.2490517310951497, + "grad_norm": 0.6964752674102783, + "learning_rate": 2.444854338184631e-05, + "loss": 2.437, + "step": 15477 + }, + { + "epoch": 1.2491324348317328, + "grad_norm": 0.6755973100662231, + "learning_rate": 2.4438201823833252e-05, + "loss": 2.4302, + "step": 15478 + }, + { + "epoch": 1.2492131385683156, + "grad_norm": 0.6434857249259949, + "learning_rate": 2.44278621490177e-05, + "loss": 2.406, + "step": 15479 + }, + { + "epoch": 1.2492938423048987, + "grad_norm": 0.7342328429222107, + "learning_rate": 2.441752435765736e-05, + "loss": 2.451, + "step": 15480 + }, + { + "epoch": 1.2493745460414818, + "grad_norm": 0.7486860752105713, + "learning_rate": 2.44071884500099e-05, + "loss": 2.4536, + "step": 15481 + }, + { + "epoch": 1.2494552497780647, + "grad_norm": 0.7274537086486816, + "learning_rate": 2.4396854426332903e-05, + "loss": 2.4599, + "step": 15482 + }, + { + "epoch": 1.2495359535146477, + "grad_norm": 0.7580124735832214, + "learning_rate": 2.4386522286883918e-05, + "loss": 2.4038, + "step": 15483 + }, + { + "epoch": 1.2496166572512308, + "grad_norm": 0.6776975393295288, + "learning_rate": 2.4376192031920488e-05, + "loss": 2.4246, + "step": 15484 + }, + { + "epoch": 1.2496973609878137, + "grad_norm": 0.6899511814117432, + "learning_rate": 2.4365863661699996e-05, + "loss": 2.3922, + "step": 15485 + }, + { + "epoch": 1.2497780647243968, + "grad_norm": 0.7487930059432983, + "learning_rate": 2.4355537176479903e-05, + "loss": 2.4573, + "step": 15486 + }, + { + "epoch": 1.2498587684609797, + "grad_norm": 0.7306599617004395, + "learning_rate": 2.4345212576517575e-05, + "loss": 2.4745, + "step": 15487 + }, + { + "epoch": 1.2499394721975627, + "grad_norm": 0.7152543067932129, + "learning_rate": 2.43348898620703e-05, + "loss": 2.4768, + "step": 15488 + }, + { + "epoch": 1.2500201759341458, + "grad_norm": 0.6576277017593384, + "learning_rate": 2.432456903339535e-05, + "loss": 2.4289, + "step": 15489 + }, + { + "epoch": 1.2501008796707287, + "grad_norm": 0.6974572539329529, + "learning_rate": 2.4314250090749956e-05, + "loss": 2.4218, + "step": 15490 + }, + { + "epoch": 1.2501815834073118, + "grad_norm": 0.7869577407836914, + "learning_rate": 2.4303933034391323e-05, + "loss": 2.3899, + "step": 15491 + }, + { + "epoch": 1.2502622871438946, + "grad_norm": 0.6723129749298096, + "learning_rate": 2.42936178645765e-05, + "loss": 2.4238, + "step": 15492 + }, + { + "epoch": 1.2503429908804777, + "grad_norm": 0.6839526891708374, + "learning_rate": 2.428330458156265e-05, + "loss": 2.4037, + "step": 15493 + }, + { + "epoch": 1.2504236946170608, + "grad_norm": 0.6866093277931213, + "learning_rate": 2.4272993185606796e-05, + "loss": 2.4228, + "step": 15494 + }, + { + "epoch": 1.2505043983536437, + "grad_norm": 0.6992947459220886, + "learning_rate": 2.426268367696588e-05, + "loss": 2.4248, + "step": 15495 + }, + { + "epoch": 1.2505851020902268, + "grad_norm": 0.6836698651313782, + "learning_rate": 2.4252376055896862e-05, + "loss": 2.5387, + "step": 15496 + }, + { + "epoch": 1.2506658058268099, + "grad_norm": 0.6990752816200256, + "learning_rate": 2.4242070322656663e-05, + "loss": 2.4438, + "step": 15497 + }, + { + "epoch": 1.2507465095633927, + "grad_norm": 0.7143029570579529, + "learning_rate": 2.4231766477502082e-05, + "loss": 2.4, + "step": 15498 + }, + { + "epoch": 1.2508272132999758, + "grad_norm": 0.6585043668746948, + "learning_rate": 2.422146452068994e-05, + "loss": 2.4256, + "step": 15499 + }, + { + "epoch": 1.250907917036559, + "grad_norm": 0.739107072353363, + "learning_rate": 2.421116445247702e-05, + "loss": 2.428, + "step": 15500 + }, + { + "epoch": 1.2509886207731418, + "grad_norm": 0.6675287485122681, + "learning_rate": 2.420086627311997e-05, + "loss": 2.5095, + "step": 15501 + }, + { + "epoch": 1.2510693245097249, + "grad_norm": 0.7133405804634094, + "learning_rate": 2.4190569982875467e-05, + "loss": 2.4719, + "step": 15502 + }, + { + "epoch": 1.2511500282463077, + "grad_norm": 0.710904061794281, + "learning_rate": 2.4180275582000134e-05, + "loss": 2.4449, + "step": 15503 + }, + { + "epoch": 1.2512307319828908, + "grad_norm": 0.7088729739189148, + "learning_rate": 2.4169983070750525e-05, + "loss": 2.4059, + "step": 15504 + }, + { + "epoch": 1.2513114357194737, + "grad_norm": 0.7187358736991882, + "learning_rate": 2.4159692449383152e-05, + "loss": 2.4577, + "step": 15505 + }, + { + "epoch": 1.2513921394560568, + "grad_norm": 0.7531955242156982, + "learning_rate": 2.4149403718154497e-05, + "loss": 2.4101, + "step": 15506 + }, + { + "epoch": 1.2514728431926398, + "grad_norm": 0.7565199136734009, + "learning_rate": 2.413911687732101e-05, + "loss": 2.4805, + "step": 15507 + }, + { + "epoch": 1.2515535469292227, + "grad_norm": 0.706471860408783, + "learning_rate": 2.4128831927139008e-05, + "loss": 2.4494, + "step": 15508 + }, + { + "epoch": 1.2516342506658058, + "grad_norm": 0.7022314667701721, + "learning_rate": 2.4118548867864832e-05, + "loss": 2.4442, + "step": 15509 + }, + { + "epoch": 1.251714954402389, + "grad_norm": 0.6885591745376587, + "learning_rate": 2.4108267699754806e-05, + "loss": 2.4186, + "step": 15510 + }, + { + "epoch": 1.2517956581389718, + "grad_norm": 0.6963610649108887, + "learning_rate": 2.409798842306511e-05, + "loss": 2.4209, + "step": 15511 + }, + { + "epoch": 1.2518763618755548, + "grad_norm": 0.7117185592651367, + "learning_rate": 2.4087711038051942e-05, + "loss": 2.4106, + "step": 15512 + }, + { + "epoch": 1.251957065612138, + "grad_norm": 0.6944519281387329, + "learning_rate": 2.407743554497146e-05, + "loss": 2.4493, + "step": 15513 + }, + { + "epoch": 1.2520377693487208, + "grad_norm": 0.689818263053894, + "learning_rate": 2.406716194407974e-05, + "loss": 2.4358, + "step": 15514 + }, + { + "epoch": 1.2521184730853039, + "grad_norm": 0.8132768273353577, + "learning_rate": 2.4056890235632846e-05, + "loss": 2.4574, + "step": 15515 + }, + { + "epoch": 1.252199176821887, + "grad_norm": 0.6855002045631409, + "learning_rate": 2.4046620419886777e-05, + "loss": 2.4118, + "step": 15516 + }, + { + "epoch": 1.2522798805584698, + "grad_norm": 0.6616373658180237, + "learning_rate": 2.4036352497097458e-05, + "loss": 2.4332, + "step": 15517 + }, + { + "epoch": 1.252360584295053, + "grad_norm": 0.6657225489616394, + "learning_rate": 2.4026086467520803e-05, + "loss": 2.3989, + "step": 15518 + }, + { + "epoch": 1.2524412880316358, + "grad_norm": 0.6796447038650513, + "learning_rate": 2.4015822331412664e-05, + "loss": 2.4269, + "step": 15519 + }, + { + "epoch": 1.2525219917682189, + "grad_norm": 0.7168079614639282, + "learning_rate": 2.400556008902889e-05, + "loss": 2.4263, + "step": 15520 + }, + { + "epoch": 1.2526026955048017, + "grad_norm": 0.6985058188438416, + "learning_rate": 2.3995299740625186e-05, + "loss": 2.437, + "step": 15521 + }, + { + "epoch": 1.2526833992413848, + "grad_norm": 0.7078086137771606, + "learning_rate": 2.3985041286457287e-05, + "loss": 2.3996, + "step": 15522 + }, + { + "epoch": 1.252764102977968, + "grad_norm": 0.6989054083824158, + "learning_rate": 2.3974784726780865e-05, + "loss": 2.4717, + "step": 15523 + }, + { + "epoch": 1.2528448067145508, + "grad_norm": 0.747606098651886, + "learning_rate": 2.396453006185153e-05, + "loss": 2.4228, + "step": 15524 + }, + { + "epoch": 1.2529255104511339, + "grad_norm": 0.7500887513160706, + "learning_rate": 2.3954277291924876e-05, + "loss": 2.4636, + "step": 15525 + }, + { + "epoch": 1.253006214187717, + "grad_norm": 0.7710712552070618, + "learning_rate": 2.3944026417256437e-05, + "loss": 2.4405, + "step": 15526 + }, + { + "epoch": 1.2530869179242998, + "grad_norm": 0.7278285622596741, + "learning_rate": 2.3933777438101657e-05, + "loss": 2.4279, + "step": 15527 + }, + { + "epoch": 1.253167621660883, + "grad_norm": 0.6979010701179504, + "learning_rate": 2.3923530354715973e-05, + "loss": 2.4272, + "step": 15528 + }, + { + "epoch": 1.253248325397466, + "grad_norm": 0.7330336570739746, + "learning_rate": 2.3913285167354804e-05, + "loss": 2.3861, + "step": 15529 + }, + { + "epoch": 1.2533290291340489, + "grad_norm": 0.675499677658081, + "learning_rate": 2.3903041876273436e-05, + "loss": 2.3987, + "step": 15530 + }, + { + "epoch": 1.253409732870632, + "grad_norm": 0.6854682564735413, + "learning_rate": 2.3892800481727186e-05, + "loss": 2.4085, + "step": 15531 + }, + { + "epoch": 1.253490436607215, + "grad_norm": 0.713810384273529, + "learning_rate": 2.388256098397129e-05, + "loss": 2.3897, + "step": 15532 + }, + { + "epoch": 1.253571140343798, + "grad_norm": 0.683214545249939, + "learning_rate": 2.3872323383260953e-05, + "loss": 2.4526, + "step": 15533 + }, + { + "epoch": 1.253651844080381, + "grad_norm": 0.6718357801437378, + "learning_rate": 2.3862087679851318e-05, + "loss": 2.4612, + "step": 15534 + }, + { + "epoch": 1.2537325478169639, + "grad_norm": 0.722283124923706, + "learning_rate": 2.3851853873997488e-05, + "loss": 2.4163, + "step": 15535 + }, + { + "epoch": 1.253813251553547, + "grad_norm": 0.689393162727356, + "learning_rate": 2.384162196595453e-05, + "loss": 2.3984, + "step": 15536 + }, + { + "epoch": 1.2538939552901298, + "grad_norm": 0.7146410346031189, + "learning_rate": 2.3831391955977412e-05, + "loss": 2.4442, + "step": 15537 + }, + { + "epoch": 1.253974659026713, + "grad_norm": 0.6651021838188171, + "learning_rate": 2.3821163844321104e-05, + "loss": 2.4064, + "step": 15538 + }, + { + "epoch": 1.254055362763296, + "grad_norm": 0.7088985443115234, + "learning_rate": 2.381093763124056e-05, + "loss": 2.4831, + "step": 15539 + }, + { + "epoch": 1.2541360664998789, + "grad_norm": 0.661375105381012, + "learning_rate": 2.3800713316990588e-05, + "loss": 2.3657, + "step": 15540 + }, + { + "epoch": 1.254216770236462, + "grad_norm": 0.6870979070663452, + "learning_rate": 2.3790490901826012e-05, + "loss": 2.4208, + "step": 15541 + }, + { + "epoch": 1.254297473973045, + "grad_norm": 0.6256219148635864, + "learning_rate": 2.3780270386001657e-05, + "loss": 2.4182, + "step": 15542 + }, + { + "epoch": 1.254378177709628, + "grad_norm": 0.7070638537406921, + "learning_rate": 2.377005176977215e-05, + "loss": 2.3758, + "step": 15543 + }, + { + "epoch": 1.254458881446211, + "grad_norm": 0.6571370363235474, + "learning_rate": 2.3759835053392242e-05, + "loss": 2.3927, + "step": 15544 + }, + { + "epoch": 1.254539585182794, + "grad_norm": 0.644263744354248, + "learning_rate": 2.3749620237116565e-05, + "loss": 2.3992, + "step": 15545 + }, + { + "epoch": 1.254620288919377, + "grad_norm": 0.7127394676208496, + "learning_rate": 2.3739407321199648e-05, + "loss": 2.3942, + "step": 15546 + }, + { + "epoch": 1.25470099265596, + "grad_norm": 0.7274866104125977, + "learning_rate": 2.372919630589605e-05, + "loss": 2.5232, + "step": 15547 + }, + { + "epoch": 1.2547816963925431, + "grad_norm": 0.690138041973114, + "learning_rate": 2.3718987191460274e-05, + "loss": 2.4371, + "step": 15548 + }, + { + "epoch": 1.254862400129126, + "grad_norm": 0.6990681886672974, + "learning_rate": 2.3708779978146724e-05, + "loss": 2.4568, + "step": 15549 + }, + { + "epoch": 1.254943103865709, + "grad_norm": 0.7430790662765503, + "learning_rate": 2.3698574666209793e-05, + "loss": 2.423, + "step": 15550 + }, + { + "epoch": 1.255023807602292, + "grad_norm": 0.6991416215896606, + "learning_rate": 2.3688371255903828e-05, + "loss": 2.4529, + "step": 15551 + }, + { + "epoch": 1.255104511338875, + "grad_norm": 0.6733322739601135, + "learning_rate": 2.367816974748317e-05, + "loss": 2.4531, + "step": 15552 + }, + { + "epoch": 1.2551852150754579, + "grad_norm": 0.7460463047027588, + "learning_rate": 2.3667970141202e-05, + "loss": 2.4267, + "step": 15553 + }, + { + "epoch": 1.255265918812041, + "grad_norm": 0.6784021854400635, + "learning_rate": 2.3657772437314517e-05, + "loss": 2.4996, + "step": 15554 + }, + { + "epoch": 1.255346622548624, + "grad_norm": 0.7499529719352722, + "learning_rate": 2.3647576636074975e-05, + "loss": 2.4749, + "step": 15555 + }, + { + "epoch": 1.255427326285207, + "grad_norm": 0.6698335409164429, + "learning_rate": 2.3637382737737368e-05, + "loss": 2.4499, + "step": 15556 + }, + { + "epoch": 1.25550803002179, + "grad_norm": 0.6644846200942993, + "learning_rate": 2.3627190742555806e-05, + "loss": 2.397, + "step": 15557 + }, + { + "epoch": 1.255588733758373, + "grad_norm": 0.7041488289833069, + "learning_rate": 2.3617000650784315e-05, + "loss": 2.4012, + "step": 15558 + }, + { + "epoch": 1.255669437494956, + "grad_norm": 0.72523033618927, + "learning_rate": 2.3606812462676798e-05, + "loss": 2.4151, + "step": 15559 + }, + { + "epoch": 1.255750141231539, + "grad_norm": 0.77669757604599, + "learning_rate": 2.3596626178487225e-05, + "loss": 2.4478, + "step": 15560 + }, + { + "epoch": 1.2558308449681221, + "grad_norm": 0.6919559836387634, + "learning_rate": 2.3586441798469462e-05, + "loss": 2.4548, + "step": 15561 + }, + { + "epoch": 1.255911548704705, + "grad_norm": 0.7613349556922913, + "learning_rate": 2.3576259322877292e-05, + "loss": 2.4475, + "step": 15562 + }, + { + "epoch": 1.255992252441288, + "grad_norm": 0.6738333106040955, + "learning_rate": 2.3566078751964515e-05, + "loss": 2.4242, + "step": 15563 + }, + { + "epoch": 1.256072956177871, + "grad_norm": 0.7242118716239929, + "learning_rate": 2.355590008598486e-05, + "loss": 2.4047, + "step": 15564 + }, + { + "epoch": 1.256153659914454, + "grad_norm": 0.7117685675621033, + "learning_rate": 2.354572332519199e-05, + "loss": 2.4473, + "step": 15565 + }, + { + "epoch": 1.256234363651037, + "grad_norm": 0.7466531991958618, + "learning_rate": 2.3535548469839564e-05, + "loss": 2.453, + "step": 15566 + }, + { + "epoch": 1.25631506738762, + "grad_norm": 0.6750668883323669, + "learning_rate": 2.3525375520181136e-05, + "loss": 2.4367, + "step": 15567 + }, + { + "epoch": 1.256395771124203, + "grad_norm": 0.7640851736068726, + "learning_rate": 2.35152044764703e-05, + "loss": 2.5014, + "step": 15568 + }, + { + "epoch": 1.256476474860786, + "grad_norm": 0.7198928594589233, + "learning_rate": 2.3505035338960456e-05, + "loss": 2.5138, + "step": 15569 + }, + { + "epoch": 1.256557178597369, + "grad_norm": 0.7079946398735046, + "learning_rate": 2.349486810790511e-05, + "loss": 2.4172, + "step": 15570 + }, + { + "epoch": 1.2566378823339521, + "grad_norm": 0.7477186918258667, + "learning_rate": 2.3484702783557655e-05, + "loss": 2.4224, + "step": 15571 + }, + { + "epoch": 1.256718586070535, + "grad_norm": 0.6875394582748413, + "learning_rate": 2.3474539366171388e-05, + "loss": 2.4621, + "step": 15572 + }, + { + "epoch": 1.256799289807118, + "grad_norm": 0.7164824604988098, + "learning_rate": 2.346437785599964e-05, + "loss": 2.4416, + "step": 15573 + }, + { + "epoch": 1.2568799935437012, + "grad_norm": 0.7031935453414917, + "learning_rate": 2.3454218253295668e-05, + "loss": 2.3943, + "step": 15574 + }, + { + "epoch": 1.256960697280284, + "grad_norm": 0.6739614009857178, + "learning_rate": 2.3444060558312665e-05, + "loss": 2.4114, + "step": 15575 + }, + { + "epoch": 1.2570414010168671, + "grad_norm": 0.6710866689682007, + "learning_rate": 2.3433904771303794e-05, + "loss": 2.4077, + "step": 15576 + }, + { + "epoch": 1.2571221047534502, + "grad_norm": 0.6589750051498413, + "learning_rate": 2.342375089252219e-05, + "loss": 2.3494, + "step": 15577 + }, + { + "epoch": 1.257202808490033, + "grad_norm": 0.7018333077430725, + "learning_rate": 2.3413598922220857e-05, + "loss": 2.459, + "step": 15578 + }, + { + "epoch": 1.2572835122266162, + "grad_norm": 0.7735301852226257, + "learning_rate": 2.3403448860652842e-05, + "loss": 2.4524, + "step": 15579 + }, + { + "epoch": 1.257364215963199, + "grad_norm": 0.7009726762771606, + "learning_rate": 2.339330070807113e-05, + "loss": 2.4244, + "step": 15580 + }, + { + "epoch": 1.2574449196997821, + "grad_norm": 0.671521008014679, + "learning_rate": 2.3383154464728595e-05, + "loss": 2.3808, + "step": 15581 + }, + { + "epoch": 1.257525623436365, + "grad_norm": 0.7736711502075195, + "learning_rate": 2.3373010130878126e-05, + "loss": 2.4936, + "step": 15582 + }, + { + "epoch": 1.257606327172948, + "grad_norm": 0.6987056136131287, + "learning_rate": 2.336286770677255e-05, + "loss": 2.4484, + "step": 15583 + }, + { + "epoch": 1.2576870309095312, + "grad_norm": 0.6337067484855652, + "learning_rate": 2.3352727192664635e-05, + "loss": 2.4196, + "step": 15584 + }, + { + "epoch": 1.257767734646114, + "grad_norm": 0.6832795143127441, + "learning_rate": 2.3342588588807123e-05, + "loss": 2.3681, + "step": 15585 + }, + { + "epoch": 1.257848438382697, + "grad_norm": 0.7208079695701599, + "learning_rate": 2.3332451895452688e-05, + "loss": 2.4436, + "step": 15586 + }, + { + "epoch": 1.2579291421192802, + "grad_norm": 0.6607621312141418, + "learning_rate": 2.3322317112853986e-05, + "loss": 2.4088, + "step": 15587 + }, + { + "epoch": 1.258009845855863, + "grad_norm": 0.7261247038841248, + "learning_rate": 2.331218424126356e-05, + "loss": 2.4389, + "step": 15588 + }, + { + "epoch": 1.2580905495924462, + "grad_norm": 0.6187729239463806, + "learning_rate": 2.3302053280933954e-05, + "loss": 2.3568, + "step": 15589 + }, + { + "epoch": 1.2581712533290292, + "grad_norm": 0.6196430921554565, + "learning_rate": 2.3291924232117713e-05, + "loss": 2.4285, + "step": 15590 + }, + { + "epoch": 1.258251957065612, + "grad_norm": 0.7271853685379028, + "learning_rate": 2.3281797095067193e-05, + "loss": 2.4058, + "step": 15591 + }, + { + "epoch": 1.2583326608021952, + "grad_norm": 0.7141130566596985, + "learning_rate": 2.327167187003484e-05, + "loss": 2.3971, + "step": 15592 + }, + { + "epoch": 1.2584133645387783, + "grad_norm": 0.680743932723999, + "learning_rate": 2.3261548557273027e-05, + "loss": 2.4387, + "step": 15593 + }, + { + "epoch": 1.2584940682753611, + "grad_norm": 0.718173086643219, + "learning_rate": 2.3251427157033955e-05, + "loss": 2.43, + "step": 15594 + }, + { + "epoch": 1.2585747720119442, + "grad_norm": 0.7600045800209045, + "learning_rate": 2.324130766956998e-05, + "loss": 2.4584, + "step": 15595 + }, + { + "epoch": 1.258655475748527, + "grad_norm": 0.7432500123977661, + "learning_rate": 2.3231190095133294e-05, + "loss": 2.4717, + "step": 15596 + }, + { + "epoch": 1.2587361794851102, + "grad_norm": 0.6603000164031982, + "learning_rate": 2.3221074433975988e-05, + "loss": 2.3952, + "step": 15597 + }, + { + "epoch": 1.258816883221693, + "grad_norm": 0.7020140290260315, + "learning_rate": 2.3210960686350213e-05, + "loss": 2.4064, + "step": 15598 + }, + { + "epoch": 1.2588975869582761, + "grad_norm": 0.7434887290000916, + "learning_rate": 2.320084885250804e-05, + "loss": 2.4708, + "step": 15599 + }, + { + "epoch": 1.2589782906948592, + "grad_norm": 0.6626797318458557, + "learning_rate": 2.3190738932701482e-05, + "loss": 2.4503, + "step": 15600 + }, + { + "epoch": 1.259058994431442, + "grad_norm": 0.7880598902702332, + "learning_rate": 2.3180630927182466e-05, + "loss": 2.384, + "step": 15601 + }, + { + "epoch": 1.2591396981680252, + "grad_norm": 0.7766147255897522, + "learning_rate": 2.3170524836202933e-05, + "loss": 2.4019, + "step": 15602 + }, + { + "epoch": 1.2592204019046083, + "grad_norm": 0.7817980051040649, + "learning_rate": 2.3160420660014792e-05, + "loss": 2.4729, + "step": 15603 + }, + { + "epoch": 1.2593011056411911, + "grad_norm": 0.6915614604949951, + "learning_rate": 2.3150318398869787e-05, + "loss": 2.4028, + "step": 15604 + }, + { + "epoch": 1.2593818093777742, + "grad_norm": 0.690882682800293, + "learning_rate": 2.3140218053019714e-05, + "loss": 2.4386, + "step": 15605 + }, + { + "epoch": 1.2594625131143573, + "grad_norm": 0.6670350432395935, + "learning_rate": 2.3130119622716382e-05, + "loss": 2.4224, + "step": 15606 + }, + { + "epoch": 1.2595432168509402, + "grad_norm": 0.6680006980895996, + "learning_rate": 2.3120023108211375e-05, + "loss": 2.3475, + "step": 15607 + }, + { + "epoch": 1.2596239205875233, + "grad_norm": 0.7003577947616577, + "learning_rate": 2.310992850975636e-05, + "loss": 2.4198, + "step": 15608 + }, + { + "epoch": 1.2597046243241061, + "grad_norm": 0.7444167733192444, + "learning_rate": 2.3099835827602944e-05, + "loss": 2.3756, + "step": 15609 + }, + { + "epoch": 1.2597853280606892, + "grad_norm": 0.6757989525794983, + "learning_rate": 2.3089745062002612e-05, + "loss": 2.3955, + "step": 15610 + }, + { + "epoch": 1.259866031797272, + "grad_norm": 0.6955820322036743, + "learning_rate": 2.3079656213206878e-05, + "loss": 2.4031, + "step": 15611 + }, + { + "epoch": 1.2599467355338552, + "grad_norm": 0.6646408438682556, + "learning_rate": 2.3069569281467184e-05, + "loss": 2.4246, + "step": 15612 + }, + { + "epoch": 1.2600274392704383, + "grad_norm": 0.6922882199287415, + "learning_rate": 2.3059484267034958e-05, + "loss": 2.4157, + "step": 15613 + }, + { + "epoch": 1.2601081430070211, + "grad_norm": 0.8092310428619385, + "learning_rate": 2.3049401170161468e-05, + "loss": 2.4137, + "step": 15614 + }, + { + "epoch": 1.2601888467436042, + "grad_norm": 0.7024559378623962, + "learning_rate": 2.3039319991098063e-05, + "loss": 2.4497, + "step": 15615 + }, + { + "epoch": 1.2602695504801873, + "grad_norm": 0.7096099853515625, + "learning_rate": 2.302924073009597e-05, + "loss": 2.4045, + "step": 15616 + }, + { + "epoch": 1.2603502542167702, + "grad_norm": 0.6777564287185669, + "learning_rate": 2.3019163387406406e-05, + "loss": 2.4607, + "step": 15617 + }, + { + "epoch": 1.2604309579533532, + "grad_norm": 0.7564159035682678, + "learning_rate": 2.300908796328052e-05, + "loss": 2.4985, + "step": 15618 + }, + { + "epoch": 1.2605116616899363, + "grad_norm": 0.7432986497879028, + "learning_rate": 2.2999014457969447e-05, + "loss": 2.4326, + "step": 15619 + }, + { + "epoch": 1.2605923654265192, + "grad_norm": 0.7178141474723816, + "learning_rate": 2.2988942871724182e-05, + "loss": 2.4118, + "step": 15620 + }, + { + "epoch": 1.2606730691631023, + "grad_norm": 0.7074497938156128, + "learning_rate": 2.2978873204795782e-05, + "loss": 2.4163, + "step": 15621 + }, + { + "epoch": 1.2607537728996854, + "grad_norm": 0.670200765132904, + "learning_rate": 2.2968805457435217e-05, + "loss": 2.4081, + "step": 15622 + }, + { + "epoch": 1.2608344766362682, + "grad_norm": 0.7258187532424927, + "learning_rate": 2.2958739629893355e-05, + "loss": 2.4889, + "step": 15623 + }, + { + "epoch": 1.2609151803728513, + "grad_norm": 0.6999781727790833, + "learning_rate": 2.2948675722421086e-05, + "loss": 2.3945, + "step": 15624 + }, + { + "epoch": 1.2609958841094342, + "grad_norm": 0.7030084133148193, + "learning_rate": 2.2938613735269243e-05, + "loss": 2.4509, + "step": 15625 + }, + { + "epoch": 1.2610765878460173, + "grad_norm": 0.6875420212745667, + "learning_rate": 2.292855366868858e-05, + "loss": 2.3658, + "step": 15626 + }, + { + "epoch": 1.2611572915826001, + "grad_norm": 0.7375235557556152, + "learning_rate": 2.2918495522929817e-05, + "loss": 2.4308, + "step": 15627 + }, + { + "epoch": 1.2612379953191832, + "grad_norm": 0.7021106481552124, + "learning_rate": 2.2908439298243644e-05, + "loss": 2.4046, + "step": 15628 + }, + { + "epoch": 1.2613186990557663, + "grad_norm": 0.76661616563797, + "learning_rate": 2.2898384994880716e-05, + "loss": 2.5156, + "step": 15629 + }, + { + "epoch": 1.2613994027923492, + "grad_norm": 0.6684869527816772, + "learning_rate": 2.2888332613091558e-05, + "loss": 2.4342, + "step": 15630 + }, + { + "epoch": 1.2614801065289323, + "grad_norm": 0.6878669261932373, + "learning_rate": 2.2878282153126706e-05, + "loss": 2.4544, + "step": 15631 + }, + { + "epoch": 1.2615608102655154, + "grad_norm": 0.6659132838249207, + "learning_rate": 2.2868233615236702e-05, + "loss": 2.4341, + "step": 15632 + }, + { + "epoch": 1.2616415140020982, + "grad_norm": 0.657474160194397, + "learning_rate": 2.2858186999671905e-05, + "loss": 2.3515, + "step": 15633 + }, + { + "epoch": 1.2617222177386813, + "grad_norm": 0.7245650291442871, + "learning_rate": 2.284814230668274e-05, + "loss": 2.3983, + "step": 15634 + }, + { + "epoch": 1.2618029214752644, + "grad_norm": 0.6400195360183716, + "learning_rate": 2.2838099536519554e-05, + "loss": 2.3535, + "step": 15635 + }, + { + "epoch": 1.2618836252118473, + "grad_norm": 0.6719450950622559, + "learning_rate": 2.282805868943262e-05, + "loss": 2.3906, + "step": 15636 + }, + { + "epoch": 1.2619643289484304, + "grad_norm": 0.682746946811676, + "learning_rate": 2.2818019765672207e-05, + "loss": 2.4045, + "step": 15637 + }, + { + "epoch": 1.2620450326850134, + "grad_norm": 0.6631760597229004, + "learning_rate": 2.2807982765488513e-05, + "loss": 2.4896, + "step": 15638 + }, + { + "epoch": 1.2621257364215963, + "grad_norm": 0.782202422618866, + "learning_rate": 2.279794768913164e-05, + "loss": 2.4628, + "step": 15639 + }, + { + "epoch": 1.2622064401581794, + "grad_norm": 0.7579823732376099, + "learning_rate": 2.278791453685173e-05, + "loss": 2.4635, + "step": 15640 + }, + { + "epoch": 1.2622871438947623, + "grad_norm": 0.665096640586853, + "learning_rate": 2.277788330889884e-05, + "loss": 2.4899, + "step": 15641 + }, + { + "epoch": 1.2623678476313454, + "grad_norm": 0.7635685205459595, + "learning_rate": 2.2767854005522936e-05, + "loss": 2.4146, + "step": 15642 + }, + { + "epoch": 1.2624485513679282, + "grad_norm": 0.7579118609428406, + "learning_rate": 2.2757826626974e-05, + "loss": 2.3692, + "step": 15643 + }, + { + "epoch": 1.2625292551045113, + "grad_norm": 0.6772074699401855, + "learning_rate": 2.2747801173501938e-05, + "loss": 2.3954, + "step": 15644 + }, + { + "epoch": 1.2626099588410944, + "grad_norm": 0.7028382420539856, + "learning_rate": 2.2737777645356606e-05, + "loss": 2.4799, + "step": 15645 + }, + { + "epoch": 1.2626906625776773, + "grad_norm": 0.7152617573738098, + "learning_rate": 2.2727756042787818e-05, + "loss": 2.4095, + "step": 15646 + }, + { + "epoch": 1.2627713663142603, + "grad_norm": 0.7286608219146729, + "learning_rate": 2.271773636604535e-05, + "loss": 2.4496, + "step": 15647 + }, + { + "epoch": 1.2628520700508434, + "grad_norm": 0.7006896734237671, + "learning_rate": 2.2707718615378935e-05, + "loss": 2.4128, + "step": 15648 + }, + { + "epoch": 1.2629327737874263, + "grad_norm": 0.6856697797775269, + "learning_rate": 2.2697702791038177e-05, + "loss": 2.4169, + "step": 15649 + }, + { + "epoch": 1.2630134775240094, + "grad_norm": 0.7582918405532837, + "learning_rate": 2.268768889327275e-05, + "loss": 2.4007, + "step": 15650 + }, + { + "epoch": 1.2630941812605925, + "grad_norm": 0.664633572101593, + "learning_rate": 2.2677676922332237e-05, + "loss": 2.3876, + "step": 15651 + }, + { + "epoch": 1.2631748849971753, + "grad_norm": 0.7283070087432861, + "learning_rate": 2.266766687846611e-05, + "loss": 2.4175, + "step": 15652 + }, + { + "epoch": 1.2632555887337584, + "grad_norm": 0.7309537529945374, + "learning_rate": 2.2657658761923863e-05, + "loss": 2.3998, + "step": 15653 + }, + { + "epoch": 1.2633362924703415, + "grad_norm": 0.6386510133743286, + "learning_rate": 2.2647652572954968e-05, + "loss": 2.3723, + "step": 15654 + }, + { + "epoch": 1.2634169962069244, + "grad_norm": 0.6805689930915833, + "learning_rate": 2.263764831180876e-05, + "loss": 2.3989, + "step": 15655 + }, + { + "epoch": 1.2634976999435072, + "grad_norm": 0.7147208452224731, + "learning_rate": 2.2627645978734536e-05, + "loss": 2.4748, + "step": 15656 + }, + { + "epoch": 1.2635784036800903, + "grad_norm": 0.6835155487060547, + "learning_rate": 2.2617645573981683e-05, + "loss": 2.4266, + "step": 15657 + }, + { + "epoch": 1.2636591074166734, + "grad_norm": 0.7631552219390869, + "learning_rate": 2.2607647097799368e-05, + "loss": 2.4152, + "step": 15658 + }, + { + "epoch": 1.2637398111532563, + "grad_norm": 0.6793624758720398, + "learning_rate": 2.2597650550436777e-05, + "loss": 2.3491, + "step": 15659 + }, + { + "epoch": 1.2638205148898394, + "grad_norm": 0.6465637683868408, + "learning_rate": 2.2587655932143083e-05, + "loss": 2.3774, + "step": 15660 + }, + { + "epoch": 1.2639012186264225, + "grad_norm": 0.6920284628868103, + "learning_rate": 2.2577663243167368e-05, + "loss": 2.4321, + "step": 15661 + }, + { + "epoch": 1.2639819223630053, + "grad_norm": 0.6922522783279419, + "learning_rate": 2.256767248375866e-05, + "loss": 2.4242, + "step": 15662 + }, + { + "epoch": 1.2640626260995884, + "grad_norm": 0.6811214089393616, + "learning_rate": 2.255768365416595e-05, + "loss": 2.4101, + "step": 15663 + }, + { + "epoch": 1.2641433298361715, + "grad_norm": 0.6704947352409363, + "learning_rate": 2.2547696754638238e-05, + "loss": 2.4792, + "step": 15664 + }, + { + "epoch": 1.2642240335727544, + "grad_norm": 0.6814701557159424, + "learning_rate": 2.2537711785424354e-05, + "loss": 2.4429, + "step": 15665 + }, + { + "epoch": 1.2643047373093375, + "grad_norm": 0.6778244972229004, + "learning_rate": 2.252772874677318e-05, + "loss": 2.3882, + "step": 15666 + }, + { + "epoch": 1.2643854410459205, + "grad_norm": 0.6570093035697937, + "learning_rate": 2.2517747638933518e-05, + "loss": 2.4162, + "step": 15667 + }, + { + "epoch": 1.2644661447825034, + "grad_norm": 0.6973466873168945, + "learning_rate": 2.2507768462154133e-05, + "loss": 2.3646, + "step": 15668 + }, + { + "epoch": 1.2645468485190865, + "grad_norm": 0.7258623242378235, + "learning_rate": 2.2497791216683715e-05, + "loss": 2.404, + "step": 15669 + }, + { + "epoch": 1.2646275522556694, + "grad_norm": 0.7462170124053955, + "learning_rate": 2.248781590277097e-05, + "loss": 2.5076, + "step": 15670 + }, + { + "epoch": 1.2647082559922525, + "grad_norm": 0.7070441246032715, + "learning_rate": 2.247784252066444e-05, + "loss": 2.3817, + "step": 15671 + }, + { + "epoch": 1.2647889597288353, + "grad_norm": 0.7150183916091919, + "learning_rate": 2.246787107061272e-05, + "loss": 2.461, + "step": 15672 + }, + { + "epoch": 1.2648696634654184, + "grad_norm": 0.668436586856842, + "learning_rate": 2.2457901552864347e-05, + "loss": 2.466, + "step": 15673 + }, + { + "epoch": 1.2649503672020015, + "grad_norm": 0.7011097073554993, + "learning_rate": 2.2447933967667745e-05, + "loss": 2.4582, + "step": 15674 + }, + { + "epoch": 1.2650310709385844, + "grad_norm": 0.7149096727371216, + "learning_rate": 2.243796831527134e-05, + "loss": 2.4461, + "step": 15675 + }, + { + "epoch": 1.2651117746751674, + "grad_norm": 0.6810914278030396, + "learning_rate": 2.2428004595923525e-05, + "loss": 2.4043, + "step": 15676 + }, + { + "epoch": 1.2651924784117505, + "grad_norm": 0.7700765132904053, + "learning_rate": 2.241804280987261e-05, + "loss": 2.4197, + "step": 15677 + }, + { + "epoch": 1.2652731821483334, + "grad_norm": 0.6897448897361755, + "learning_rate": 2.240808295736686e-05, + "loss": 2.4052, + "step": 15678 + }, + { + "epoch": 1.2653538858849165, + "grad_norm": 0.7092932462692261, + "learning_rate": 2.2398125038654515e-05, + "loss": 2.4088, + "step": 15679 + }, + { + "epoch": 1.2654345896214996, + "grad_norm": 0.6930294632911682, + "learning_rate": 2.2388169053983777e-05, + "loss": 2.4504, + "step": 15680 + }, + { + "epoch": 1.2655152933580824, + "grad_norm": 0.7056782245635986, + "learning_rate": 2.237821500360271e-05, + "loss": 2.3975, + "step": 15681 + }, + { + "epoch": 1.2655959970946655, + "grad_norm": 0.651772141456604, + "learning_rate": 2.236826288775944e-05, + "loss": 2.3941, + "step": 15682 + }, + { + "epoch": 1.2656767008312486, + "grad_norm": 0.7254980206489563, + "learning_rate": 2.2358312706702012e-05, + "loss": 2.4149, + "step": 15683 + }, + { + "epoch": 1.2657574045678315, + "grad_norm": 0.6553635597229004, + "learning_rate": 2.2348364460678373e-05, + "loss": 2.4099, + "step": 15684 + }, + { + "epoch": 1.2658381083044146, + "grad_norm": 0.6952616572380066, + "learning_rate": 2.233841814993646e-05, + "loss": 2.384, + "step": 15685 + }, + { + "epoch": 1.2659188120409974, + "grad_norm": 0.72947096824646, + "learning_rate": 2.2328473774724178e-05, + "loss": 2.5033, + "step": 15686 + }, + { + "epoch": 1.2659995157775805, + "grad_norm": 0.7419683933258057, + "learning_rate": 2.231853133528937e-05, + "loss": 2.4881, + "step": 15687 + }, + { + "epoch": 1.2660802195141634, + "grad_norm": 0.7125211358070374, + "learning_rate": 2.2308590831879827e-05, + "loss": 2.4334, + "step": 15688 + }, + { + "epoch": 1.2661609232507465, + "grad_norm": 0.6668617129325867, + "learning_rate": 2.2298652264743315e-05, + "loss": 2.4144, + "step": 15689 + }, + { + "epoch": 1.2662416269873296, + "grad_norm": 0.8075512051582336, + "learning_rate": 2.2288715634127465e-05, + "loss": 2.421, + "step": 15690 + }, + { + "epoch": 1.2663223307239124, + "grad_norm": 0.6894629001617432, + "learning_rate": 2.2278780940279965e-05, + "loss": 2.4142, + "step": 15691 + }, + { + "epoch": 1.2664030344604955, + "grad_norm": 0.7418074011802673, + "learning_rate": 2.226884818344841e-05, + "loss": 2.4214, + "step": 15692 + }, + { + "epoch": 1.2664837381970786, + "grad_norm": 0.6724219918251038, + "learning_rate": 2.225891736388037e-05, + "loss": 2.4455, + "step": 15693 + }, + { + "epoch": 1.2665644419336615, + "grad_norm": 0.7202882766723633, + "learning_rate": 2.224898848182331e-05, + "loss": 2.4017, + "step": 15694 + }, + { + "epoch": 1.2666451456702446, + "grad_norm": 0.7671259641647339, + "learning_rate": 2.2239061537524698e-05, + "loss": 2.4386, + "step": 15695 + }, + { + "epoch": 1.2667258494068276, + "grad_norm": 0.7154317498207092, + "learning_rate": 2.222913653123194e-05, + "loss": 2.3754, + "step": 15696 + }, + { + "epoch": 1.2668065531434105, + "grad_norm": 0.7203264236450195, + "learning_rate": 2.221921346319239e-05, + "loss": 2.3926, + "step": 15697 + }, + { + "epoch": 1.2668872568799936, + "grad_norm": 0.7104187607765198, + "learning_rate": 2.2209292333653365e-05, + "loss": 2.4528, + "step": 15698 + }, + { + "epoch": 1.2669679606165767, + "grad_norm": 0.7650138139724731, + "learning_rate": 2.2199373142862158e-05, + "loss": 2.4372, + "step": 15699 + }, + { + "epoch": 1.2670486643531595, + "grad_norm": 0.6796044111251831, + "learning_rate": 2.2189455891065903e-05, + "loss": 2.415, + "step": 15700 + }, + { + "epoch": 1.2671293680897426, + "grad_norm": 0.6749297380447388, + "learning_rate": 2.2179540578511813e-05, + "loss": 2.4337, + "step": 15701 + }, + { + "epoch": 1.2672100718263255, + "grad_norm": 0.7330272793769836, + "learning_rate": 2.216962720544703e-05, + "loss": 2.4322, + "step": 15702 + }, + { + "epoch": 1.2672907755629086, + "grad_norm": 0.6793510913848877, + "learning_rate": 2.215971577211855e-05, + "loss": 2.4473, + "step": 15703 + }, + { + "epoch": 1.2673714792994915, + "grad_norm": 0.7477267384529114, + "learning_rate": 2.2149806278773433e-05, + "loss": 2.4699, + "step": 15704 + }, + { + "epoch": 1.2674521830360745, + "grad_norm": 0.7048643827438354, + "learning_rate": 2.213989872565867e-05, + "loss": 2.4341, + "step": 15705 + }, + { + "epoch": 1.2675328867726576, + "grad_norm": 0.647433340549469, + "learning_rate": 2.2129993113021108e-05, + "loss": 2.423, + "step": 15706 + }, + { + "epoch": 1.2676135905092405, + "grad_norm": 0.6886507272720337, + "learning_rate": 2.2120089441107706e-05, + "loss": 2.4185, + "step": 15707 + }, + { + "epoch": 1.2676942942458236, + "grad_norm": 0.6720516085624695, + "learning_rate": 2.2110187710165242e-05, + "loss": 2.4587, + "step": 15708 + }, + { + "epoch": 1.2677749979824067, + "grad_norm": 0.676665723323822, + "learning_rate": 2.2100287920440543e-05, + "loss": 2.4241, + "step": 15709 + }, + { + "epoch": 1.2678557017189895, + "grad_norm": 0.6939559578895569, + "learning_rate": 2.209039007218028e-05, + "loss": 2.3974, + "step": 15710 + }, + { + "epoch": 1.2679364054555726, + "grad_norm": 0.6485786437988281, + "learning_rate": 2.2080494165631137e-05, + "loss": 2.4041, + "step": 15711 + }, + { + "epoch": 1.2680171091921557, + "grad_norm": 0.668319582939148, + "learning_rate": 2.2070600201039802e-05, + "loss": 2.4705, + "step": 15712 + }, + { + "epoch": 1.2680978129287386, + "grad_norm": 0.6837478280067444, + "learning_rate": 2.206070817865279e-05, + "loss": 2.4474, + "step": 15713 + }, + { + "epoch": 1.2681785166653217, + "grad_norm": 0.7000131011009216, + "learning_rate": 2.2050818098716664e-05, + "loss": 2.4463, + "step": 15714 + }, + { + "epoch": 1.2682592204019045, + "grad_norm": 0.7063068151473999, + "learning_rate": 2.204092996147794e-05, + "loss": 2.4226, + "step": 15715 + }, + { + "epoch": 1.2683399241384876, + "grad_norm": 0.6497172117233276, + "learning_rate": 2.2031043767183003e-05, + "loss": 2.3678, + "step": 15716 + }, + { + "epoch": 1.2684206278750705, + "grad_norm": 0.6558645963668823, + "learning_rate": 2.2021159516078262e-05, + "loss": 2.4021, + "step": 15717 + }, + { + "epoch": 1.2685013316116536, + "grad_norm": 0.7411713600158691, + "learning_rate": 2.2011277208410062e-05, + "loss": 2.4346, + "step": 15718 + }, + { + "epoch": 1.2685820353482367, + "grad_norm": 0.7275578379631042, + "learning_rate": 2.2001396844424714e-05, + "loss": 2.4262, + "step": 15719 + }, + { + "epoch": 1.2686627390848195, + "grad_norm": 0.7010936141014099, + "learning_rate": 2.199151842436844e-05, + "loss": 2.4774, + "step": 15720 + }, + { + "epoch": 1.2687434428214026, + "grad_norm": 0.7551137208938599, + "learning_rate": 2.1981641948487462e-05, + "loss": 2.5286, + "step": 15721 + }, + { + "epoch": 1.2688241465579857, + "grad_norm": 0.6510799527168274, + "learning_rate": 2.1971767417027888e-05, + "loss": 2.3813, + "step": 15722 + }, + { + "epoch": 1.2689048502945686, + "grad_norm": 0.636050283908844, + "learning_rate": 2.196189483023584e-05, + "loss": 2.4226, + "step": 15723 + }, + { + "epoch": 1.2689855540311517, + "grad_norm": 0.6939265131950378, + "learning_rate": 2.1952024188357368e-05, + "loss": 2.4516, + "step": 15724 + }, + { + "epoch": 1.2690662577677347, + "grad_norm": 0.6715239882469177, + "learning_rate": 2.1942155491638494e-05, + "loss": 2.4358, + "step": 15725 + }, + { + "epoch": 1.2691469615043176, + "grad_norm": 0.740680456161499, + "learning_rate": 2.1932288740325123e-05, + "loss": 2.4135, + "step": 15726 + }, + { + "epoch": 1.2692276652409007, + "grad_norm": 0.6969335079193115, + "learning_rate": 2.1922423934663193e-05, + "loss": 2.43, + "step": 15727 + }, + { + "epoch": 1.2693083689774838, + "grad_norm": 0.6390758156776428, + "learning_rate": 2.1912561074898554e-05, + "loss": 2.4492, + "step": 15728 + }, + { + "epoch": 1.2693890727140666, + "grad_norm": 0.7129701375961304, + "learning_rate": 2.190270016127701e-05, + "loss": 2.3799, + "step": 15729 + }, + { + "epoch": 1.2694697764506497, + "grad_norm": 0.7309553027153015, + "learning_rate": 2.1892841194044332e-05, + "loss": 2.4955, + "step": 15730 + }, + { + "epoch": 1.2695504801872326, + "grad_norm": 0.7257225513458252, + "learning_rate": 2.1882984173446252e-05, + "loss": 2.4184, + "step": 15731 + }, + { + "epoch": 1.2696311839238157, + "grad_norm": 0.7434510588645935, + "learning_rate": 2.1873129099728384e-05, + "loss": 2.453, + "step": 15732 + }, + { + "epoch": 1.2697118876603986, + "grad_norm": 0.6643160581588745, + "learning_rate": 2.1863275973136356e-05, + "loss": 2.3619, + "step": 15733 + }, + { + "epoch": 1.2697925913969816, + "grad_norm": 0.6677344441413879, + "learning_rate": 2.1853424793915778e-05, + "loss": 2.406, + "step": 15734 + }, + { + "epoch": 1.2698732951335647, + "grad_norm": 0.760028064250946, + "learning_rate": 2.1843575562312092e-05, + "loss": 2.5479, + "step": 15735 + }, + { + "epoch": 1.2699539988701476, + "grad_norm": 0.6668389439582825, + "learning_rate": 2.183372827857082e-05, + "loss": 2.4104, + "step": 15736 + }, + { + "epoch": 1.2700347026067307, + "grad_norm": 0.651155412197113, + "learning_rate": 2.182388294293736e-05, + "loss": 2.3738, + "step": 15737 + }, + { + "epoch": 1.2701154063433138, + "grad_norm": 0.736907958984375, + "learning_rate": 2.1814039555657084e-05, + "loss": 2.4179, + "step": 15738 + }, + { + "epoch": 1.2701961100798966, + "grad_norm": 0.7068225741386414, + "learning_rate": 2.180419811697534e-05, + "loss": 2.3911, + "step": 15739 + }, + { + "epoch": 1.2702768138164797, + "grad_norm": 0.6959261894226074, + "learning_rate": 2.1794358627137368e-05, + "loss": 2.452, + "step": 15740 + }, + { + "epoch": 1.2703575175530628, + "grad_norm": 0.6886181235313416, + "learning_rate": 2.1784521086388442e-05, + "loss": 2.4166, + "step": 15741 + }, + { + "epoch": 1.2704382212896457, + "grad_norm": 0.6494541168212891, + "learning_rate": 2.177468549497369e-05, + "loss": 2.3589, + "step": 15742 + }, + { + "epoch": 1.2705189250262288, + "grad_norm": 0.7008326649665833, + "learning_rate": 2.1764851853138247e-05, + "loss": 2.3697, + "step": 15743 + }, + { + "epoch": 1.2705996287628119, + "grad_norm": 0.6800456643104553, + "learning_rate": 2.1755020161127238e-05, + "loss": 2.4162, + "step": 15744 + }, + { + "epoch": 1.2706803324993947, + "grad_norm": 0.6836018562316895, + "learning_rate": 2.1745190419185634e-05, + "loss": 2.3977, + "step": 15745 + }, + { + "epoch": 1.2707610362359778, + "grad_norm": 0.6489691138267517, + "learning_rate": 2.173536262755844e-05, + "loss": 2.464, + "step": 15746 + }, + { + "epoch": 1.2708417399725607, + "grad_norm": 0.7309786677360535, + "learning_rate": 2.172553678649061e-05, + "loss": 2.4065, + "step": 15747 + }, + { + "epoch": 1.2709224437091438, + "grad_norm": 0.6752686500549316, + "learning_rate": 2.1715712896227004e-05, + "loss": 2.3935, + "step": 15748 + }, + { + "epoch": 1.2710031474457266, + "grad_norm": 0.7039850354194641, + "learning_rate": 2.1705890957012465e-05, + "loss": 2.4605, + "step": 15749 + }, + { + "epoch": 1.2710838511823097, + "grad_norm": 0.6904652714729309, + "learning_rate": 2.169607096909182e-05, + "loss": 2.4264, + "step": 15750 + }, + { + "epoch": 1.2711645549188928, + "grad_norm": 0.7104331254959106, + "learning_rate": 2.168625293270974e-05, + "loss": 2.378, + "step": 15751 + }, + { + "epoch": 1.2712452586554757, + "grad_norm": 0.6732800602912903, + "learning_rate": 2.167643684811096e-05, + "loss": 2.4216, + "step": 15752 + }, + { + "epoch": 1.2713259623920588, + "grad_norm": 0.7207335829734802, + "learning_rate": 2.166662271554011e-05, + "loss": 2.3861, + "step": 15753 + }, + { + "epoch": 1.2714066661286418, + "grad_norm": 0.7561055421829224, + "learning_rate": 2.1656810535241813e-05, + "loss": 2.4753, + "step": 15754 + }, + { + "epoch": 1.2714873698652247, + "grad_norm": 0.7018210887908936, + "learning_rate": 2.1647000307460564e-05, + "loss": 2.401, + "step": 15755 + }, + { + "epoch": 1.2715680736018078, + "grad_norm": 0.6908013224601746, + "learning_rate": 2.163719203244089e-05, + "loss": 2.4451, + "step": 15756 + }, + { + "epoch": 1.2716487773383909, + "grad_norm": 0.734909176826477, + "learning_rate": 2.162738571042723e-05, + "loss": 2.4221, + "step": 15757 + }, + { + "epoch": 1.2717294810749737, + "grad_norm": 0.7047279477119446, + "learning_rate": 2.1617581341663973e-05, + "loss": 2.4149, + "step": 15758 + }, + { + "epoch": 1.2718101848115568, + "grad_norm": 0.6875640749931335, + "learning_rate": 2.1607778926395496e-05, + "loss": 2.3874, + "step": 15759 + }, + { + "epoch": 1.2718908885481397, + "grad_norm": 0.7300851345062256, + "learning_rate": 2.159797846486611e-05, + "loss": 2.4706, + "step": 15760 + }, + { + "epoch": 1.2719715922847228, + "grad_norm": 0.733775794506073, + "learning_rate": 2.1588179957320022e-05, + "loss": 2.4208, + "step": 15761 + }, + { + "epoch": 1.2720522960213057, + "grad_norm": 0.8375213742256165, + "learning_rate": 2.1578383404001458e-05, + "loss": 2.4672, + "step": 15762 + }, + { + "epoch": 1.2721329997578887, + "grad_norm": 0.7276780009269714, + "learning_rate": 2.15685888051546e-05, + "loss": 2.4536, + "step": 15763 + }, + { + "epoch": 1.2722137034944718, + "grad_norm": 0.7765224575996399, + "learning_rate": 2.1558796161023508e-05, + "loss": 2.3671, + "step": 15764 + }, + { + "epoch": 1.2722944072310547, + "grad_norm": 0.7225642204284668, + "learning_rate": 2.1549005471852256e-05, + "loss": 2.4316, + "step": 15765 + }, + { + "epoch": 1.2723751109676378, + "grad_norm": 0.6959484219551086, + "learning_rate": 2.1539216737884904e-05, + "loss": 2.4581, + "step": 15766 + }, + { + "epoch": 1.2724558147042209, + "grad_norm": 0.6943621039390564, + "learning_rate": 2.1529429959365332e-05, + "loss": 2.4372, + "step": 15767 + }, + { + "epoch": 1.2725365184408037, + "grad_norm": 0.7067148089408875, + "learning_rate": 2.151964513653746e-05, + "loss": 2.431, + "step": 15768 + }, + { + "epoch": 1.2726172221773868, + "grad_norm": 0.8317076563835144, + "learning_rate": 2.150986226964521e-05, + "loss": 2.4177, + "step": 15769 + }, + { + "epoch": 1.27269792591397, + "grad_norm": 0.7390087246894836, + "learning_rate": 2.150008135893239e-05, + "loss": 2.4711, + "step": 15770 + }, + { + "epoch": 1.2727786296505528, + "grad_norm": 0.6829150915145874, + "learning_rate": 2.1490302404642725e-05, + "loss": 2.4477, + "step": 15771 + }, + { + "epoch": 1.2728593333871359, + "grad_norm": 0.7355613708496094, + "learning_rate": 2.148052540701995e-05, + "loss": 2.493, + "step": 15772 + }, + { + "epoch": 1.272940037123719, + "grad_norm": 0.6872289776802063, + "learning_rate": 2.1470750366307747e-05, + "loss": 2.4363, + "step": 15773 + }, + { + "epoch": 1.2730207408603018, + "grad_norm": 0.7753220796585083, + "learning_rate": 2.1460977282749705e-05, + "loss": 2.4376, + "step": 15774 + }, + { + "epoch": 1.273101444596885, + "grad_norm": 0.6717056632041931, + "learning_rate": 2.145120615658942e-05, + "loss": 2.4383, + "step": 15775 + }, + { + "epoch": 1.2731821483334678, + "grad_norm": 0.7441569566726685, + "learning_rate": 2.1441436988070428e-05, + "loss": 2.462, + "step": 15776 + }, + { + "epoch": 1.2732628520700509, + "grad_norm": 0.6824371814727783, + "learning_rate": 2.143166977743615e-05, + "loss": 2.4173, + "step": 15777 + }, + { + "epoch": 1.2733435558066337, + "grad_norm": 0.7310225963592529, + "learning_rate": 2.1421904524930038e-05, + "loss": 2.4222, + "step": 15778 + }, + { + "epoch": 1.2734242595432168, + "grad_norm": 0.7198066115379333, + "learning_rate": 2.141214123079548e-05, + "loss": 2.4262, + "step": 15779 + }, + { + "epoch": 1.2735049632798, + "grad_norm": 0.7081776857376099, + "learning_rate": 2.1402379895275783e-05, + "loss": 2.4473, + "step": 15780 + }, + { + "epoch": 1.2735856670163828, + "grad_norm": 0.6909368634223938, + "learning_rate": 2.1392620518614235e-05, + "loss": 2.4528, + "step": 15781 + }, + { + "epoch": 1.2736663707529658, + "grad_norm": 0.7170675992965698, + "learning_rate": 2.1382863101054107e-05, + "loss": 2.4214, + "step": 15782 + }, + { + "epoch": 1.273747074489549, + "grad_norm": 0.6992846727371216, + "learning_rate": 2.1373107642838497e-05, + "loss": 2.4397, + "step": 15783 + }, + { + "epoch": 1.2738277782261318, + "grad_norm": 0.7245237231254578, + "learning_rate": 2.1363354144210578e-05, + "loss": 2.373, + "step": 15784 + }, + { + "epoch": 1.273908481962715, + "grad_norm": 0.6929232478141785, + "learning_rate": 2.1353602605413435e-05, + "loss": 2.4297, + "step": 15785 + }, + { + "epoch": 1.273989185699298, + "grad_norm": 0.7243950366973877, + "learning_rate": 2.134385302669013e-05, + "loss": 2.3856, + "step": 15786 + }, + { + "epoch": 1.2740698894358808, + "grad_norm": 0.6712679266929626, + "learning_rate": 2.133410540828359e-05, + "loss": 2.3818, + "step": 15787 + }, + { + "epoch": 1.274150593172464, + "grad_norm": 0.7433474063873291, + "learning_rate": 2.1324359750436774e-05, + "loss": 2.4148, + "step": 15788 + }, + { + "epoch": 1.274231296909047, + "grad_norm": 0.7225894927978516, + "learning_rate": 2.1314616053392577e-05, + "loss": 2.395, + "step": 15789 + }, + { + "epoch": 1.2743120006456299, + "grad_norm": 0.7026889324188232, + "learning_rate": 2.130487431739383e-05, + "loss": 2.4693, + "step": 15790 + }, + { + "epoch": 1.274392704382213, + "grad_norm": 0.6898565292358398, + "learning_rate": 2.1295134542683325e-05, + "loss": 2.3643, + "step": 15791 + }, + { + "epoch": 1.2744734081187958, + "grad_norm": 0.7212820649147034, + "learning_rate": 2.1285396729503826e-05, + "loss": 2.4178, + "step": 15792 + }, + { + "epoch": 1.274554111855379, + "grad_norm": 0.7149149179458618, + "learning_rate": 2.127566087809798e-05, + "loss": 2.4023, + "step": 15793 + }, + { + "epoch": 1.2746348155919618, + "grad_norm": 0.7039671540260315, + "learning_rate": 2.126592698870846e-05, + "loss": 2.4667, + "step": 15794 + }, + { + "epoch": 1.2747155193285449, + "grad_norm": 0.806849479675293, + "learning_rate": 2.1256195061577877e-05, + "loss": 2.4741, + "step": 15795 + }, + { + "epoch": 1.274796223065128, + "grad_norm": 0.7544776797294617, + "learning_rate": 2.124646509694872e-05, + "loss": 2.4258, + "step": 15796 + }, + { + "epoch": 1.2748769268017108, + "grad_norm": 0.6946810483932495, + "learning_rate": 2.1236737095063518e-05, + "loss": 2.4088, + "step": 15797 + }, + { + "epoch": 1.274957630538294, + "grad_norm": 0.7714219093322754, + "learning_rate": 2.1227011056164714e-05, + "loss": 2.4705, + "step": 15798 + }, + { + "epoch": 1.275038334274877, + "grad_norm": 0.6789658665657043, + "learning_rate": 2.121728698049471e-05, + "loss": 2.4692, + "step": 15799 + }, + { + "epoch": 1.2751190380114599, + "grad_norm": 0.7003477215766907, + "learning_rate": 2.120756486829586e-05, + "loss": 2.4437, + "step": 15800 + }, + { + "epoch": 1.275199741748043, + "grad_norm": 0.6802948117256165, + "learning_rate": 2.1197844719810455e-05, + "loss": 2.4002, + "step": 15801 + }, + { + "epoch": 1.275280445484626, + "grad_norm": 0.67823326587677, + "learning_rate": 2.1188126535280773e-05, + "loss": 2.5119, + "step": 15802 + }, + { + "epoch": 1.275361149221209, + "grad_norm": 0.6580843925476074, + "learning_rate": 2.1178410314948972e-05, + "loss": 2.3814, + "step": 15803 + }, + { + "epoch": 1.275441852957792, + "grad_norm": 0.681642472743988, + "learning_rate": 2.1168696059057226e-05, + "loss": 2.4206, + "step": 15804 + }, + { + "epoch": 1.275522556694375, + "grad_norm": 0.7483543753623962, + "learning_rate": 2.1158983767847674e-05, + "loss": 2.4633, + "step": 15805 + }, + { + "epoch": 1.275603260430958, + "grad_norm": 0.6565235257148743, + "learning_rate": 2.11492734415623e-05, + "loss": 2.4145, + "step": 15806 + }, + { + "epoch": 1.275683964167541, + "grad_norm": 0.6606764793395996, + "learning_rate": 2.1139565080443157e-05, + "loss": 2.3935, + "step": 15807 + }, + { + "epoch": 1.275764667904124, + "grad_norm": 0.7915800213813782, + "learning_rate": 2.1129858684732206e-05, + "loss": 2.4288, + "step": 15808 + }, + { + "epoch": 1.275845371640707, + "grad_norm": 0.6763594746589661, + "learning_rate": 2.112015425467133e-05, + "loss": 2.4147, + "step": 15809 + }, + { + "epoch": 1.2759260753772899, + "grad_norm": 0.6886053085327148, + "learning_rate": 2.1110451790502405e-05, + "loss": 2.3798, + "step": 15810 + }, + { + "epoch": 1.276006779113873, + "grad_norm": 0.686122715473175, + "learning_rate": 2.110075129246728e-05, + "loss": 2.3896, + "step": 15811 + }, + { + "epoch": 1.276087482850456, + "grad_norm": 0.6989614367485046, + "learning_rate": 2.109105276080764e-05, + "loss": 2.4533, + "step": 15812 + }, + { + "epoch": 1.276168186587039, + "grad_norm": 0.6818450689315796, + "learning_rate": 2.1081356195765232e-05, + "loss": 2.4012, + "step": 15813 + }, + { + "epoch": 1.276248890323622, + "grad_norm": 0.7492663860321045, + "learning_rate": 2.107166159758176e-05, + "loss": 2.4269, + "step": 15814 + }, + { + "epoch": 1.276329594060205, + "grad_norm": 0.6752359867095947, + "learning_rate": 2.1061968966498767e-05, + "loss": 2.4478, + "step": 15815 + }, + { + "epoch": 1.276410297796788, + "grad_norm": 0.6784162521362305, + "learning_rate": 2.1052278302757854e-05, + "loss": 2.4853, + "step": 15816 + }, + { + "epoch": 1.276491001533371, + "grad_norm": 0.7273215651512146, + "learning_rate": 2.104258960660055e-05, + "loss": 2.4365, + "step": 15817 + }, + { + "epoch": 1.2765717052699541, + "grad_norm": 0.7021621465682983, + "learning_rate": 2.1032902878268323e-05, + "loss": 2.4665, + "step": 15818 + }, + { + "epoch": 1.276652409006537, + "grad_norm": 0.666828989982605, + "learning_rate": 2.102321811800253e-05, + "loss": 2.3922, + "step": 15819 + }, + { + "epoch": 1.27673311274312, + "grad_norm": 0.6780487298965454, + "learning_rate": 2.1013535326044608e-05, + "loss": 2.4072, + "step": 15820 + }, + { + "epoch": 1.276813816479703, + "grad_norm": 0.6474688053131104, + "learning_rate": 2.1003854502635888e-05, + "loss": 2.4145, + "step": 15821 + }, + { + "epoch": 1.276894520216286, + "grad_norm": 0.6712753772735596, + "learning_rate": 2.0994175648017587e-05, + "loss": 2.4349, + "step": 15822 + }, + { + "epoch": 1.2769752239528689, + "grad_norm": 0.6705189943313599, + "learning_rate": 2.098449876243096e-05, + "loss": 2.4376, + "step": 15823 + }, + { + "epoch": 1.277055927689452, + "grad_norm": 0.6794685125350952, + "learning_rate": 2.0974823846117197e-05, + "loss": 2.3717, + "step": 15824 + }, + { + "epoch": 1.277136631426035, + "grad_norm": 0.7145677804946899, + "learning_rate": 2.0965150899317364e-05, + "loss": 2.3829, + "step": 15825 + }, + { + "epoch": 1.277217335162618, + "grad_norm": 0.7043245434761047, + "learning_rate": 2.095547992227257e-05, + "loss": 2.405, + "step": 15826 + }, + { + "epoch": 1.277298038899201, + "grad_norm": 0.7969205379486084, + "learning_rate": 2.0945810915223873e-05, + "loss": 2.4115, + "step": 15827 + }, + { + "epoch": 1.277378742635784, + "grad_norm": 0.657482385635376, + "learning_rate": 2.0936143878412186e-05, + "loss": 2.372, + "step": 15828 + }, + { + "epoch": 1.277459446372367, + "grad_norm": 0.7315167784690857, + "learning_rate": 2.0926478812078466e-05, + "loss": 2.4372, + "step": 15829 + }, + { + "epoch": 1.27754015010895, + "grad_norm": 0.6985061764717102, + "learning_rate": 2.09168157164636e-05, + "loss": 2.3901, + "step": 15830 + }, + { + "epoch": 1.2776208538455331, + "grad_norm": 0.6906184554100037, + "learning_rate": 2.0907154591808408e-05, + "loss": 2.4562, + "step": 15831 + }, + { + "epoch": 1.277701557582116, + "grad_norm": 0.655094563961029, + "learning_rate": 2.0897495438353676e-05, + "loss": 2.451, + "step": 15832 + }, + { + "epoch": 1.277782261318699, + "grad_norm": 0.7663134932518005, + "learning_rate": 2.0887838256340143e-05, + "loss": 2.4634, + "step": 15833 + }, + { + "epoch": 1.2778629650552822, + "grad_norm": 0.7164491415023804, + "learning_rate": 2.087818304600849e-05, + "loss": 2.4624, + "step": 15834 + }, + { + "epoch": 1.277943668791865, + "grad_norm": 0.6962822079658508, + "learning_rate": 2.0868529807599336e-05, + "loss": 2.4325, + "step": 15835 + }, + { + "epoch": 1.2780243725284481, + "grad_norm": 0.702985405921936, + "learning_rate": 2.0858878541353255e-05, + "loss": 2.4219, + "step": 15836 + }, + { + "epoch": 1.278105076265031, + "grad_norm": 0.7605595588684082, + "learning_rate": 2.0849229247510826e-05, + "loss": 2.4201, + "step": 15837 + }, + { + "epoch": 1.278185780001614, + "grad_norm": 0.8479344248771667, + "learning_rate": 2.083958192631249e-05, + "loss": 2.4689, + "step": 15838 + }, + { + "epoch": 1.278266483738197, + "grad_norm": 0.7241235375404358, + "learning_rate": 2.082993657799869e-05, + "loss": 2.4861, + "step": 15839 + }, + { + "epoch": 1.27834718747478, + "grad_norm": 0.7069835066795349, + "learning_rate": 2.0820293202809827e-05, + "loss": 2.3759, + "step": 15840 + }, + { + "epoch": 1.2784278912113631, + "grad_norm": 0.6606370210647583, + "learning_rate": 2.0810651800986237e-05, + "loss": 2.4444, + "step": 15841 + }, + { + "epoch": 1.278508594947946, + "grad_norm": 0.6608174443244934, + "learning_rate": 2.08010123727682e-05, + "loss": 2.4339, + "step": 15842 + }, + { + "epoch": 1.278589298684529, + "grad_norm": 0.751000702381134, + "learning_rate": 2.0791374918396e-05, + "loss": 2.4327, + "step": 15843 + }, + { + "epoch": 1.2786700024211122, + "grad_norm": 0.7223808765411377, + "learning_rate": 2.0781739438109748e-05, + "loss": 2.3573, + "step": 15844 + }, + { + "epoch": 1.278750706157695, + "grad_norm": 0.6872109770774841, + "learning_rate": 2.0772105932149642e-05, + "loss": 2.3973, + "step": 15845 + }, + { + "epoch": 1.2788314098942781, + "grad_norm": 0.6967385411262512, + "learning_rate": 2.0762474400755762e-05, + "loss": 2.4622, + "step": 15846 + }, + { + "epoch": 1.2789121136308612, + "grad_norm": 0.7289159893989563, + "learning_rate": 2.0752844844168163e-05, + "loss": 2.4507, + "step": 15847 + }, + { + "epoch": 1.278992817367444, + "grad_norm": 0.7735978364944458, + "learning_rate": 2.0743217262626802e-05, + "loss": 2.4341, + "step": 15848 + }, + { + "epoch": 1.2790735211040272, + "grad_norm": 0.7209177017211914, + "learning_rate": 2.0733591656371655e-05, + "loss": 2.4024, + "step": 15849 + }, + { + "epoch": 1.2791542248406103, + "grad_norm": 0.6789259314537048, + "learning_rate": 2.0723968025642604e-05, + "loss": 2.3809, + "step": 15850 + }, + { + "epoch": 1.2792349285771931, + "grad_norm": 0.6972812414169312, + "learning_rate": 2.0714346370679495e-05, + "loss": 2.3986, + "step": 15851 + }, + { + "epoch": 1.2793156323137762, + "grad_norm": 0.7144166827201843, + "learning_rate": 2.070472669172213e-05, + "loss": 2.4241, + "step": 15852 + }, + { + "epoch": 1.279396336050359, + "grad_norm": 0.7325223088264465, + "learning_rate": 2.0695108989010282e-05, + "loss": 2.452, + "step": 15853 + }, + { + "epoch": 1.2794770397869422, + "grad_norm": 0.6900116205215454, + "learning_rate": 2.0685493262783608e-05, + "loss": 2.4091, + "step": 15854 + }, + { + "epoch": 1.279557743523525, + "grad_norm": 0.6846197843551636, + "learning_rate": 2.0675879513281758e-05, + "loss": 2.4337, + "step": 15855 + }, + { + "epoch": 1.2796384472601081, + "grad_norm": 0.6901541352272034, + "learning_rate": 2.0666267740744372e-05, + "loss": 2.4586, + "step": 15856 + }, + { + "epoch": 1.2797191509966912, + "grad_norm": 0.6842665672302246, + "learning_rate": 2.0656657945410953e-05, + "loss": 2.4383, + "step": 15857 + }, + { + "epoch": 1.279799854733274, + "grad_norm": 0.7450493574142456, + "learning_rate": 2.0647050127521028e-05, + "loss": 2.4308, + "step": 15858 + }, + { + "epoch": 1.2798805584698572, + "grad_norm": 0.6928436160087585, + "learning_rate": 2.0637444287314033e-05, + "loss": 2.4726, + "step": 15859 + }, + { + "epoch": 1.2799612622064402, + "grad_norm": 0.6539968252182007, + "learning_rate": 2.06278404250294e-05, + "loss": 2.3983, + "step": 15860 + }, + { + "epoch": 1.280041965943023, + "grad_norm": 0.7183163166046143, + "learning_rate": 2.0618238540906444e-05, + "loss": 2.4172, + "step": 15861 + }, + { + "epoch": 1.2801226696796062, + "grad_norm": 0.7070814371109009, + "learning_rate": 2.0608638635184507e-05, + "loss": 2.4018, + "step": 15862 + }, + { + "epoch": 1.2802033734161893, + "grad_norm": 0.7589142918586731, + "learning_rate": 2.0599040708102847e-05, + "loss": 2.4175, + "step": 15863 + }, + { + "epoch": 1.2802840771527721, + "grad_norm": 0.6945414543151855, + "learning_rate": 2.0589444759900613e-05, + "loss": 2.4093, + "step": 15864 + }, + { + "epoch": 1.2803647808893552, + "grad_norm": 0.685482919216156, + "learning_rate": 2.0579850790817003e-05, + "loss": 2.4388, + "step": 15865 + }, + { + "epoch": 1.280445484625938, + "grad_norm": 0.7089706063270569, + "learning_rate": 2.0570258801091148e-05, + "loss": 2.3779, + "step": 15866 + }, + { + "epoch": 1.2805261883625212, + "grad_norm": 0.6994217038154602, + "learning_rate": 2.0560668790962046e-05, + "loss": 2.3757, + "step": 15867 + }, + { + "epoch": 1.280606892099104, + "grad_norm": 0.7170232534408569, + "learning_rate": 2.055108076066874e-05, + "loss": 2.4087, + "step": 15868 + }, + { + "epoch": 1.2806875958356871, + "grad_norm": 0.7008751034736633, + "learning_rate": 2.0541494710450206e-05, + "loss": 2.4384, + "step": 15869 + }, + { + "epoch": 1.2807682995722702, + "grad_norm": 0.6795800924301147, + "learning_rate": 2.053191064054527e-05, + "loss": 2.415, + "step": 15870 + }, + { + "epoch": 1.280849003308853, + "grad_norm": 0.6650210022926331, + "learning_rate": 2.0522328551192882e-05, + "loss": 2.4421, + "step": 15871 + }, + { + "epoch": 1.2809297070454362, + "grad_norm": 0.7045374512672424, + "learning_rate": 2.0512748442631858e-05, + "loss": 2.4285, + "step": 15872 + }, + { + "epoch": 1.2810104107820193, + "grad_norm": 0.6585350632667542, + "learning_rate": 2.0503170315100883e-05, + "loss": 2.3806, + "step": 15873 + }, + { + "epoch": 1.2810911145186021, + "grad_norm": 0.7833496332168579, + "learning_rate": 2.0493594168838725e-05, + "loss": 2.4557, + "step": 15874 + }, + { + "epoch": 1.2811718182551852, + "grad_norm": 0.7237457036972046, + "learning_rate": 2.0484020004084048e-05, + "loss": 2.3966, + "step": 15875 + }, + { + "epoch": 1.2812525219917683, + "grad_norm": 0.7416609525680542, + "learning_rate": 2.0474447821075426e-05, + "loss": 2.3729, + "step": 15876 + }, + { + "epoch": 1.2813332257283512, + "grad_norm": 0.7148095369338989, + "learning_rate": 2.046487762005146e-05, + "loss": 2.4163, + "step": 15877 + }, + { + "epoch": 1.2814139294649343, + "grad_norm": 0.670281171798706, + "learning_rate": 2.0455309401250632e-05, + "loss": 2.383, + "step": 15878 + }, + { + "epoch": 1.2814946332015174, + "grad_norm": 0.6968950629234314, + "learning_rate": 2.0445743164911457e-05, + "loss": 2.3967, + "step": 15879 + }, + { + "epoch": 1.2815753369381002, + "grad_norm": 0.783441960811615, + "learning_rate": 2.0436178911272298e-05, + "loss": 2.455, + "step": 15880 + }, + { + "epoch": 1.2816560406746833, + "grad_norm": 0.709032416343689, + "learning_rate": 2.0426616640571518e-05, + "loss": 2.4207, + "step": 15881 + }, + { + "epoch": 1.2817367444112662, + "grad_norm": 0.6727990508079529, + "learning_rate": 2.0417056353047504e-05, + "loss": 2.4115, + "step": 15882 + }, + { + "epoch": 1.2818174481478493, + "grad_norm": 0.7336034774780273, + "learning_rate": 2.0407498048938445e-05, + "loss": 2.43, + "step": 15883 + }, + { + "epoch": 1.2818981518844321, + "grad_norm": 0.7649042010307312, + "learning_rate": 2.0397941728482604e-05, + "loss": 2.4655, + "step": 15884 + }, + { + "epoch": 1.2819788556210152, + "grad_norm": 0.7218052744865417, + "learning_rate": 2.038838739191816e-05, + "loss": 2.4872, + "step": 15885 + }, + { + "epoch": 1.2820595593575983, + "grad_norm": 0.7192350625991821, + "learning_rate": 2.0378835039483178e-05, + "loss": 2.4751, + "step": 15886 + }, + { + "epoch": 1.2821402630941812, + "grad_norm": 0.7059212923049927, + "learning_rate": 2.0369284671415768e-05, + "loss": 2.43, + "step": 15887 + }, + { + "epoch": 1.2822209668307643, + "grad_norm": 0.7387098073959351, + "learning_rate": 2.0359736287953956e-05, + "loss": 2.4281, + "step": 15888 + }, + { + "epoch": 1.2823016705673473, + "grad_norm": 0.7454321980476379, + "learning_rate": 2.035018988933568e-05, + "loss": 2.4372, + "step": 15889 + }, + { + "epoch": 1.2823823743039302, + "grad_norm": 0.6822765469551086, + "learning_rate": 2.034064547579888e-05, + "loss": 2.3728, + "step": 15890 + }, + { + "epoch": 1.2824630780405133, + "grad_norm": 0.6917527914047241, + "learning_rate": 2.0331103047581412e-05, + "loss": 2.3997, + "step": 15891 + }, + { + "epoch": 1.2825437817770964, + "grad_norm": 0.6734376549720764, + "learning_rate": 2.032156260492113e-05, + "loss": 2.4495, + "step": 15892 + }, + { + "epoch": 1.2826244855136792, + "grad_norm": 0.7222443222999573, + "learning_rate": 2.0312024148055776e-05, + "loss": 2.3466, + "step": 15893 + }, + { + "epoch": 1.2827051892502623, + "grad_norm": 0.703714907169342, + "learning_rate": 2.030248767722309e-05, + "loss": 2.4599, + "step": 15894 + }, + { + "epoch": 1.2827858929868454, + "grad_norm": 0.655161440372467, + "learning_rate": 2.029295319266078e-05, + "loss": 2.3896, + "step": 15895 + }, + { + "epoch": 1.2828665967234283, + "grad_norm": 0.6449242234230042, + "learning_rate": 2.028342069460639e-05, + "loss": 2.3511, + "step": 15896 + }, + { + "epoch": 1.2829473004600114, + "grad_norm": 0.6578382849693298, + "learning_rate": 2.027389018329755e-05, + "loss": 2.3678, + "step": 15897 + }, + { + "epoch": 1.2830280041965942, + "grad_norm": 0.7047572731971741, + "learning_rate": 2.0264361658971797e-05, + "loss": 2.4522, + "step": 15898 + }, + { + "epoch": 1.2831087079331773, + "grad_norm": 0.7310267090797424, + "learning_rate": 2.0254835121866554e-05, + "loss": 2.4117, + "step": 15899 + }, + { + "epoch": 1.2831894116697602, + "grad_norm": 0.7020776867866516, + "learning_rate": 2.024531057221927e-05, + "loss": 2.4033, + "step": 15900 + }, + { + "epoch": 1.2832701154063433, + "grad_norm": 0.6967746615409851, + "learning_rate": 2.023578801026733e-05, + "loss": 2.3491, + "step": 15901 + }, + { + "epoch": 1.2833508191429264, + "grad_norm": 0.7062339782714844, + "learning_rate": 2.022626743624807e-05, + "loss": 2.4598, + "step": 15902 + }, + { + "epoch": 1.2834315228795092, + "grad_norm": 0.730625331401825, + "learning_rate": 2.0216748850398748e-05, + "loss": 2.4995, + "step": 15903 + }, + { + "epoch": 1.2835122266160923, + "grad_norm": 0.6634403467178345, + "learning_rate": 2.020723225295662e-05, + "loss": 2.3843, + "step": 15904 + }, + { + "epoch": 1.2835929303526754, + "grad_norm": 0.6924816966056824, + "learning_rate": 2.019771764415883e-05, + "loss": 2.4258, + "step": 15905 + }, + { + "epoch": 1.2836736340892583, + "grad_norm": 0.7127227187156677, + "learning_rate": 2.018820502424251e-05, + "loss": 2.4038, + "step": 15906 + }, + { + "epoch": 1.2837543378258414, + "grad_norm": 0.7108431458473206, + "learning_rate": 2.0178694393444785e-05, + "loss": 2.4571, + "step": 15907 + }, + { + "epoch": 1.2838350415624245, + "grad_norm": 0.7478229999542236, + "learning_rate": 2.016918575200262e-05, + "loss": 2.4526, + "step": 15908 + }, + { + "epoch": 1.2839157452990073, + "grad_norm": 0.65651935338974, + "learning_rate": 2.015967910015303e-05, + "loss": 2.434, + "step": 15909 + }, + { + "epoch": 1.2839964490355904, + "grad_norm": 0.7285312414169312, + "learning_rate": 2.015017443813294e-05, + "loss": 2.3857, + "step": 15910 + }, + { + "epoch": 1.2840771527721733, + "grad_norm": 0.6947231292724609, + "learning_rate": 2.014067176617923e-05, + "loss": 2.4294, + "step": 15911 + }, + { + "epoch": 1.2841578565087564, + "grad_norm": 0.6965867877006531, + "learning_rate": 2.0131171084528744e-05, + "loss": 2.4514, + "step": 15912 + }, + { + "epoch": 1.2842385602453392, + "grad_norm": 0.6962311863899231, + "learning_rate": 2.0121672393418246e-05, + "loss": 2.4391, + "step": 15913 + }, + { + "epoch": 1.2843192639819223, + "grad_norm": 0.6687992215156555, + "learning_rate": 2.01121756930845e-05, + "loss": 2.4266, + "step": 15914 + }, + { + "epoch": 1.2843999677185054, + "grad_norm": 0.7118954658508301, + "learning_rate": 2.0102680983764145e-05, + "loss": 2.3436, + "step": 15915 + }, + { + "epoch": 1.2844806714550883, + "grad_norm": 0.6866199970245361, + "learning_rate": 2.009318826569382e-05, + "loss": 2.3719, + "step": 15916 + }, + { + "epoch": 1.2845613751916714, + "grad_norm": 0.6701404452323914, + "learning_rate": 2.008369753911016e-05, + "loss": 2.4875, + "step": 15917 + }, + { + "epoch": 1.2846420789282544, + "grad_norm": 0.7020917534828186, + "learning_rate": 2.007420880424963e-05, + "loss": 2.3871, + "step": 15918 + }, + { + "epoch": 1.2847227826648373, + "grad_norm": 0.6865704655647278, + "learning_rate": 2.006472206134875e-05, + "loss": 2.3815, + "step": 15919 + }, + { + "epoch": 1.2848034864014204, + "grad_norm": 0.7106871008872986, + "learning_rate": 2.0055237310643948e-05, + "loss": 2.4276, + "step": 15920 + }, + { + "epoch": 1.2848841901380035, + "grad_norm": 0.6891976594924927, + "learning_rate": 2.004575455237161e-05, + "loss": 2.3641, + "step": 15921 + }, + { + "epoch": 1.2849648938745863, + "grad_norm": 0.6385056972503662, + "learning_rate": 2.0036273786768067e-05, + "loss": 2.3898, + "step": 15922 + }, + { + "epoch": 1.2850455976111694, + "grad_norm": 0.7038321495056152, + "learning_rate": 2.0026795014069633e-05, + "loss": 2.4688, + "step": 15923 + }, + { + "epoch": 1.2851263013477525, + "grad_norm": 0.6310208439826965, + "learning_rate": 2.0017318234512494e-05, + "loss": 2.3821, + "step": 15924 + }, + { + "epoch": 1.2852070050843354, + "grad_norm": 0.6989426016807556, + "learning_rate": 2.0007843448332865e-05, + "loss": 2.434, + "step": 15925 + }, + { + "epoch": 1.2852877088209185, + "grad_norm": 0.6666426658630371, + "learning_rate": 1.9998370655766886e-05, + "loss": 2.4687, + "step": 15926 + }, + { + "epoch": 1.2853684125575013, + "grad_norm": 0.6421633958816528, + "learning_rate": 1.9988899857050648e-05, + "loss": 2.4269, + "step": 15927 + }, + { + "epoch": 1.2854491162940844, + "grad_norm": 0.7229343056678772, + "learning_rate": 1.997943105242016e-05, + "loss": 2.4139, + "step": 15928 + }, + { + "epoch": 1.2855298200306673, + "grad_norm": 0.7168964743614197, + "learning_rate": 1.9969964242111427e-05, + "loss": 2.405, + "step": 15929 + }, + { + "epoch": 1.2856105237672504, + "grad_norm": 0.6824480891227722, + "learning_rate": 1.99604994263604e-05, + "loss": 2.3955, + "step": 15930 + }, + { + "epoch": 1.2856912275038335, + "grad_norm": 0.670956552028656, + "learning_rate": 1.995103660540294e-05, + "loss": 2.3743, + "step": 15931 + }, + { + "epoch": 1.2857719312404163, + "grad_norm": 0.7057971954345703, + "learning_rate": 1.9941575779474864e-05, + "loss": 2.4496, + "step": 15932 + }, + { + "epoch": 1.2858526349769994, + "grad_norm": 0.7802264094352722, + "learning_rate": 1.9932116948812052e-05, + "loss": 2.4231, + "step": 15933 + }, + { + "epoch": 1.2859333387135825, + "grad_norm": 0.7151160836219788, + "learning_rate": 1.992266011365016e-05, + "loss": 2.4319, + "step": 15934 + }, + { + "epoch": 1.2860140424501654, + "grad_norm": 0.7078769207000732, + "learning_rate": 1.991320527422489e-05, + "loss": 2.4037, + "step": 15935 + }, + { + "epoch": 1.2860947461867485, + "grad_norm": 0.7483938336372375, + "learning_rate": 1.9903752430771927e-05, + "loss": 2.4946, + "step": 15936 + }, + { + "epoch": 1.2861754499233315, + "grad_norm": 0.7774620056152344, + "learning_rate": 1.9894301583526808e-05, + "loss": 2.4536, + "step": 15937 + }, + { + "epoch": 1.2862561536599144, + "grad_norm": 0.7311348915100098, + "learning_rate": 1.988485273272509e-05, + "loss": 2.4178, + "step": 15938 + }, + { + "epoch": 1.2863368573964975, + "grad_norm": 0.6821309328079224, + "learning_rate": 1.9875405878602282e-05, + "loss": 2.4851, + "step": 15939 + }, + { + "epoch": 1.2864175611330806, + "grad_norm": 0.7081651091575623, + "learning_rate": 1.9865961021393785e-05, + "loss": 2.4377, + "step": 15940 + }, + { + "epoch": 1.2864982648696635, + "grad_norm": 0.8093439340591431, + "learning_rate": 1.9856518161335014e-05, + "loss": 2.4681, + "step": 15941 + }, + { + "epoch": 1.2865789686062465, + "grad_norm": 0.6769521832466125, + "learning_rate": 1.984707729866131e-05, + "loss": 2.4231, + "step": 15942 + }, + { + "epoch": 1.2866596723428294, + "grad_norm": 0.6973356604576111, + "learning_rate": 1.983763843360795e-05, + "loss": 2.4144, + "step": 15943 + }, + { + "epoch": 1.2867403760794125, + "grad_norm": 0.7814682722091675, + "learning_rate": 1.9828201566410197e-05, + "loss": 2.3935, + "step": 15944 + }, + { + "epoch": 1.2868210798159954, + "grad_norm": 0.7545498609542847, + "learning_rate": 1.9818766697303236e-05, + "loss": 2.4136, + "step": 15945 + }, + { + "epoch": 1.2869017835525784, + "grad_norm": 0.7165581583976746, + "learning_rate": 1.9809333826522225e-05, + "loss": 2.3757, + "step": 15946 + }, + { + "epoch": 1.2869824872891615, + "grad_norm": 0.6812456846237183, + "learning_rate": 1.9799902954302208e-05, + "loss": 2.4143, + "step": 15947 + }, + { + "epoch": 1.2870631910257444, + "grad_norm": 0.7231366634368896, + "learning_rate": 1.9790474080878262e-05, + "loss": 2.4837, + "step": 15948 + }, + { + "epoch": 1.2871438947623275, + "grad_norm": 0.690916121006012, + "learning_rate": 1.9781047206485393e-05, + "loss": 2.4513, + "step": 15949 + }, + { + "epoch": 1.2872245984989106, + "grad_norm": 0.6608129143714905, + "learning_rate": 1.9771622331358485e-05, + "loss": 2.3908, + "step": 15950 + }, + { + "epoch": 1.2873053022354934, + "grad_norm": 0.7194501161575317, + "learning_rate": 1.976219945573249e-05, + "loss": 2.38, + "step": 15951 + }, + { + "epoch": 1.2873860059720765, + "grad_norm": 0.7315083146095276, + "learning_rate": 1.9752778579842213e-05, + "loss": 2.4351, + "step": 15952 + }, + { + "epoch": 1.2874667097086596, + "grad_norm": 0.7313492298126221, + "learning_rate": 1.974335970392246e-05, + "loss": 2.3531, + "step": 15953 + }, + { + "epoch": 1.2875474134452425, + "grad_norm": 0.6982418894767761, + "learning_rate": 1.9733942828207985e-05, + "loss": 2.4319, + "step": 15954 + }, + { + "epoch": 1.2876281171818256, + "grad_norm": 0.6664792895317078, + "learning_rate": 1.972452795293347e-05, + "loss": 2.3981, + "step": 15955 + }, + { + "epoch": 1.2877088209184087, + "grad_norm": 0.6849696040153503, + "learning_rate": 1.9715115078333578e-05, + "loss": 2.3952, + "step": 15956 + }, + { + "epoch": 1.2877895246549915, + "grad_norm": 0.7355225086212158, + "learning_rate": 1.9705704204642873e-05, + "loss": 2.4556, + "step": 15957 + }, + { + "epoch": 1.2878702283915746, + "grad_norm": 0.6850876808166504, + "learning_rate": 1.9696295332095906e-05, + "loss": 2.3873, + "step": 15958 + }, + { + "epoch": 1.2879509321281575, + "grad_norm": 0.6449069976806641, + "learning_rate": 1.9686888460927198e-05, + "loss": 2.4226, + "step": 15959 + }, + { + "epoch": 1.2880316358647406, + "grad_norm": 0.7517794966697693, + "learning_rate": 1.967748359137114e-05, + "loss": 2.377, + "step": 15960 + }, + { + "epoch": 1.2881123396013234, + "grad_norm": 0.6861303448677063, + "learning_rate": 1.9668080723662162e-05, + "loss": 2.4451, + "step": 15961 + }, + { + "epoch": 1.2881930433379065, + "grad_norm": 0.7025154829025269, + "learning_rate": 1.9658679858034602e-05, + "loss": 2.3856, + "step": 15962 + }, + { + "epoch": 1.2882737470744896, + "grad_norm": 0.6775577068328857, + "learning_rate": 1.964928099472275e-05, + "loss": 2.4383, + "step": 15963 + }, + { + "epoch": 1.2883544508110725, + "grad_norm": 0.6889605522155762, + "learning_rate": 1.963988413396086e-05, + "loss": 2.3766, + "step": 15964 + }, + { + "epoch": 1.2884351545476556, + "grad_norm": 0.6697166562080383, + "learning_rate": 1.9630489275983156e-05, + "loss": 2.44, + "step": 15965 + }, + { + "epoch": 1.2885158582842386, + "grad_norm": 0.6895437836647034, + "learning_rate": 1.96210964210237e-05, + "loss": 2.4242, + "step": 15966 + }, + { + "epoch": 1.2885965620208215, + "grad_norm": 0.6955164670944214, + "learning_rate": 1.9611705569316652e-05, + "loss": 2.3915, + "step": 15967 + }, + { + "epoch": 1.2886772657574046, + "grad_norm": 0.7133461236953735, + "learning_rate": 1.960231672109605e-05, + "loss": 2.4307, + "step": 15968 + }, + { + "epoch": 1.2887579694939877, + "grad_norm": 0.6874761581420898, + "learning_rate": 1.9592929876595857e-05, + "loss": 2.4371, + "step": 15969 + }, + { + "epoch": 1.2888386732305706, + "grad_norm": 0.7168406248092651, + "learning_rate": 1.9583545036050044e-05, + "loss": 2.4681, + "step": 15970 + }, + { + "epoch": 1.2889193769671536, + "grad_norm": 0.701874852180481, + "learning_rate": 1.9574162199692492e-05, + "loss": 2.4746, + "step": 15971 + }, + { + "epoch": 1.2890000807037365, + "grad_norm": 0.7118390202522278, + "learning_rate": 1.9564781367757058e-05, + "loss": 2.4139, + "step": 15972 + }, + { + "epoch": 1.2890807844403196, + "grad_norm": 0.6597239971160889, + "learning_rate": 1.955540254047753e-05, + "loss": 2.4346, + "step": 15973 + }, + { + "epoch": 1.2891614881769025, + "grad_norm": 0.7461068630218506, + "learning_rate": 1.9546025718087645e-05, + "loss": 2.4331, + "step": 15974 + }, + { + "epoch": 1.2892421919134855, + "grad_norm": 0.6992977857589722, + "learning_rate": 1.953665090082115e-05, + "loss": 2.424, + "step": 15975 + }, + { + "epoch": 1.2893228956500686, + "grad_norm": 0.6674031615257263, + "learning_rate": 1.9527278088911617e-05, + "loss": 2.4545, + "step": 15976 + }, + { + "epoch": 1.2894035993866515, + "grad_norm": 0.7377402782440186, + "learning_rate": 1.9517907282592662e-05, + "loss": 2.4625, + "step": 15977 + }, + { + "epoch": 1.2894843031232346, + "grad_norm": 0.720579206943512, + "learning_rate": 1.950853848209788e-05, + "loss": 2.4073, + "step": 15978 + }, + { + "epoch": 1.2895650068598177, + "grad_norm": 0.7221893668174744, + "learning_rate": 1.9499171687660688e-05, + "loss": 2.4056, + "step": 15979 + }, + { + "epoch": 1.2896457105964005, + "grad_norm": 0.7409725189208984, + "learning_rate": 1.9489806899514574e-05, + "loss": 2.3899, + "step": 15980 + }, + { + "epoch": 1.2897264143329836, + "grad_norm": 0.6946583986282349, + "learning_rate": 1.948044411789296e-05, + "loss": 2.4832, + "step": 15981 + }, + { + "epoch": 1.2898071180695667, + "grad_norm": 0.7031306028366089, + "learning_rate": 1.9471083343029096e-05, + "loss": 2.4265, + "step": 15982 + }, + { + "epoch": 1.2898878218061496, + "grad_norm": 0.660093367099762, + "learning_rate": 1.946172457515637e-05, + "loss": 2.4883, + "step": 15983 + }, + { + "epoch": 1.2899685255427327, + "grad_norm": 0.700641930103302, + "learning_rate": 1.945236781450802e-05, + "loss": 2.4096, + "step": 15984 + }, + { + "epoch": 1.2900492292793158, + "grad_norm": 0.7350760698318481, + "learning_rate": 1.9443013061317205e-05, + "loss": 2.4161, + "step": 15985 + }, + { + "epoch": 1.2901299330158986, + "grad_norm": 0.7567386031150818, + "learning_rate": 1.9433660315817072e-05, + "loss": 2.3978, + "step": 15986 + }, + { + "epoch": 1.2902106367524817, + "grad_norm": 0.7471369504928589, + "learning_rate": 1.9424309578240717e-05, + "loss": 2.4079, + "step": 15987 + }, + { + "epoch": 1.2902913404890646, + "grad_norm": 0.6630815267562866, + "learning_rate": 1.941496084882124e-05, + "loss": 2.4223, + "step": 15988 + }, + { + "epoch": 1.2903720442256477, + "grad_norm": 0.687224268913269, + "learning_rate": 1.940561412779155e-05, + "loss": 2.4413, + "step": 15989 + }, + { + "epoch": 1.2904527479622305, + "grad_norm": 0.6989685297012329, + "learning_rate": 1.9396269415384637e-05, + "loss": 2.3651, + "step": 15990 + }, + { + "epoch": 1.2905334516988136, + "grad_norm": 0.7256720066070557, + "learning_rate": 1.938692671183342e-05, + "loss": 2.4526, + "step": 15991 + }, + { + "epoch": 1.2906141554353967, + "grad_norm": 0.692032516002655, + "learning_rate": 1.9377586017370685e-05, + "loss": 2.3936, + "step": 15992 + }, + { + "epoch": 1.2906948591719796, + "grad_norm": 0.6733511686325073, + "learning_rate": 1.936824733222925e-05, + "loss": 2.4691, + "step": 15993 + }, + { + "epoch": 1.2907755629085627, + "grad_norm": 0.6698563098907471, + "learning_rate": 1.935891065664187e-05, + "loss": 2.3904, + "step": 15994 + }, + { + "epoch": 1.2908562666451457, + "grad_norm": 0.660521388053894, + "learning_rate": 1.934957599084123e-05, + "loss": 2.4647, + "step": 15995 + }, + { + "epoch": 1.2909369703817286, + "grad_norm": 0.6714615821838379, + "learning_rate": 1.9340243335059982e-05, + "loss": 2.403, + "step": 15996 + }, + { + "epoch": 1.2910176741183117, + "grad_norm": 0.726099967956543, + "learning_rate": 1.9330912689530746e-05, + "loss": 2.4101, + "step": 15997 + }, + { + "epoch": 1.2910983778548948, + "grad_norm": 0.6585896015167236, + "learning_rate": 1.932158405448601e-05, + "loss": 2.3813, + "step": 15998 + }, + { + "epoch": 1.2911790815914777, + "grad_norm": 0.7967908382415771, + "learning_rate": 1.9312257430158286e-05, + "loss": 2.4188, + "step": 15999 + }, + { + "epoch": 1.2912597853280607, + "grad_norm": 0.7340367436408997, + "learning_rate": 1.9302932816780063e-05, + "loss": 2.4642, + "step": 16000 + }, + { + "epoch": 1.2912597853280607, + "eval_loss": 2.3791537284851074, + "eval_runtime": 780.6124, + "eval_samples_per_second": 3.356, + "eval_steps_per_second": 0.56, + "step": 16000 + }, + { + "epoch": 1.2913404890646438, + "grad_norm": 0.6778663992881775, + "learning_rate": 1.929361021458367e-05, + "loss": 2.4057, + "step": 16001 + }, + { + "epoch": 1.2914211928012267, + "grad_norm": 0.6982381343841553, + "learning_rate": 1.9284289623801477e-05, + "loss": 2.4376, + "step": 16002 + }, + { + "epoch": 1.2915018965378098, + "grad_norm": 0.6956612467765808, + "learning_rate": 1.927497104466578e-05, + "loss": 2.4485, + "step": 16003 + }, + { + "epoch": 1.2915826002743926, + "grad_norm": 0.6780211925506592, + "learning_rate": 1.9265654477408825e-05, + "loss": 2.4233, + "step": 16004 + }, + { + "epoch": 1.2916633040109757, + "grad_norm": 0.6869028806686401, + "learning_rate": 1.92563399222628e-05, + "loss": 2.4156, + "step": 16005 + }, + { + "epoch": 1.2917440077475586, + "grad_norm": 0.6402696967124939, + "learning_rate": 1.9247027379459848e-05, + "loss": 2.4208, + "step": 16006 + }, + { + "epoch": 1.2918247114841417, + "grad_norm": 0.6868177652359009, + "learning_rate": 1.92377168492321e-05, + "loss": 2.4067, + "step": 16007 + }, + { + "epoch": 1.2919054152207248, + "grad_norm": 0.7152438759803772, + "learning_rate": 1.922840833181152e-05, + "loss": 2.3944, + "step": 16008 + }, + { + "epoch": 1.2919861189573076, + "grad_norm": 0.6467335820198059, + "learning_rate": 1.921910182743015e-05, + "loss": 2.4064, + "step": 16009 + }, + { + "epoch": 1.2920668226938907, + "grad_norm": 0.6918551325798035, + "learning_rate": 1.9209797336319956e-05, + "loss": 2.4457, + "step": 16010 + }, + { + "epoch": 1.2921475264304738, + "grad_norm": 0.7308588027954102, + "learning_rate": 1.920049485871278e-05, + "loss": 2.3785, + "step": 16011 + }, + { + "epoch": 1.2922282301670567, + "grad_norm": 0.6918718814849854, + "learning_rate": 1.9191194394840472e-05, + "loss": 2.4645, + "step": 16012 + }, + { + "epoch": 1.2923089339036398, + "grad_norm": 0.7048078775405884, + "learning_rate": 1.9181895944934848e-05, + "loss": 2.4082, + "step": 16013 + }, + { + "epoch": 1.2923896376402229, + "grad_norm": 0.7175794839859009, + "learning_rate": 1.917259950922763e-05, + "loss": 2.4521, + "step": 16014 + }, + { + "epoch": 1.2924703413768057, + "grad_norm": 0.6895543932914734, + "learning_rate": 1.916330508795051e-05, + "loss": 2.4058, + "step": 16015 + }, + { + "epoch": 1.2925510451133888, + "grad_norm": 0.6951895952224731, + "learning_rate": 1.9154012681335176e-05, + "loss": 2.4274, + "step": 16016 + }, + { + "epoch": 1.2926317488499717, + "grad_norm": 0.6807428598403931, + "learning_rate": 1.9144722289613148e-05, + "loss": 2.4008, + "step": 16017 + }, + { + "epoch": 1.2927124525865548, + "grad_norm": 0.6643410325050354, + "learning_rate": 1.9135433913015997e-05, + "loss": 2.4036, + "step": 16018 + }, + { + "epoch": 1.2927931563231376, + "grad_norm": 0.7283294796943665, + "learning_rate": 1.912614755177522e-05, + "loss": 2.4118, + "step": 16019 + }, + { + "epoch": 1.2928738600597207, + "grad_norm": 0.7516021132469177, + "learning_rate": 1.911686320612227e-05, + "loss": 2.3983, + "step": 16020 + }, + { + "epoch": 1.2929545637963038, + "grad_norm": 0.7314203381538391, + "learning_rate": 1.91075808762885e-05, + "loss": 2.4352, + "step": 16021 + }, + { + "epoch": 1.2930352675328867, + "grad_norm": 0.6904106736183167, + "learning_rate": 1.9098300562505266e-05, + "loss": 2.3734, + "step": 16022 + }, + { + "epoch": 1.2931159712694698, + "grad_norm": 0.6936709880828857, + "learning_rate": 1.9089022265003863e-05, + "loss": 2.4356, + "step": 16023 + }, + { + "epoch": 1.2931966750060528, + "grad_norm": 0.6753442883491516, + "learning_rate": 1.9079745984015528e-05, + "loss": 2.4713, + "step": 16024 + }, + { + "epoch": 1.2932773787426357, + "grad_norm": 0.7185340523719788, + "learning_rate": 1.9070471719771445e-05, + "loss": 2.4021, + "step": 16025 + }, + { + "epoch": 1.2933580824792188, + "grad_norm": 0.7486871480941772, + "learning_rate": 1.9061199472502798e-05, + "loss": 2.4144, + "step": 16026 + }, + { + "epoch": 1.2934387862158019, + "grad_norm": 0.6790735721588135, + "learning_rate": 1.90519292424406e-05, + "loss": 2.413, + "step": 16027 + }, + { + "epoch": 1.2935194899523847, + "grad_norm": 0.7104402780532837, + "learning_rate": 1.9042661029815922e-05, + "loss": 2.452, + "step": 16028 + }, + { + "epoch": 1.2936001936889678, + "grad_norm": 0.6975364685058594, + "learning_rate": 1.9033394834859796e-05, + "loss": 2.4169, + "step": 16029 + }, + { + "epoch": 1.293680897425551, + "grad_norm": 0.7619667649269104, + "learning_rate": 1.9024130657803085e-05, + "loss": 2.4106, + "step": 16030 + }, + { + "epoch": 1.2937616011621338, + "grad_norm": 0.6600254774093628, + "learning_rate": 1.9014868498876716e-05, + "loss": 2.3955, + "step": 16031 + }, + { + "epoch": 1.2938423048987169, + "grad_norm": 0.6790784597396851, + "learning_rate": 1.9005608358311533e-05, + "loss": 2.437, + "step": 16032 + }, + { + "epoch": 1.2939230086352997, + "grad_norm": 0.7085568308830261, + "learning_rate": 1.899635023633828e-05, + "loss": 2.4729, + "step": 16033 + }, + { + "epoch": 1.2940037123718828, + "grad_norm": 0.6940603256225586, + "learning_rate": 1.8987094133187732e-05, + "loss": 2.4099, + "step": 16034 + }, + { + "epoch": 1.2940844161084657, + "grad_norm": 0.7387171387672424, + "learning_rate": 1.897784004909058e-05, + "loss": 2.4509, + "step": 16035 + }, + { + "epoch": 1.2941651198450488, + "grad_norm": 0.8263981938362122, + "learning_rate": 1.8968587984277463e-05, + "loss": 2.4208, + "step": 16036 + }, + { + "epoch": 1.2942458235816319, + "grad_norm": 0.7393552660942078, + "learning_rate": 1.8959337938978937e-05, + "loss": 2.4458, + "step": 16037 + }, + { + "epoch": 1.2943265273182147, + "grad_norm": 0.652787983417511, + "learning_rate": 1.895008991342555e-05, + "loss": 2.3593, + "step": 16038 + }, + { + "epoch": 1.2944072310547978, + "grad_norm": 0.6533015370368958, + "learning_rate": 1.8940843907847817e-05, + "loss": 2.4538, + "step": 16039 + }, + { + "epoch": 1.294487934791381, + "grad_norm": 0.6723785400390625, + "learning_rate": 1.8931599922476106e-05, + "loss": 2.4528, + "step": 16040 + }, + { + "epoch": 1.2945686385279638, + "grad_norm": 0.693242073059082, + "learning_rate": 1.892235795754085e-05, + "loss": 2.4006, + "step": 16041 + }, + { + "epoch": 1.2946493422645469, + "grad_norm": 0.6849604845046997, + "learning_rate": 1.8913118013272403e-05, + "loss": 2.3758, + "step": 16042 + }, + { + "epoch": 1.29473004600113, + "grad_norm": 0.7252739667892456, + "learning_rate": 1.8903880089900983e-05, + "loss": 2.4101, + "step": 16043 + }, + { + "epoch": 1.2948107497377128, + "grad_norm": 0.720431923866272, + "learning_rate": 1.8894644187656864e-05, + "loss": 2.4241, + "step": 16044 + }, + { + "epoch": 1.294891453474296, + "grad_norm": 0.6936169862747192, + "learning_rate": 1.8885410306770225e-05, + "loss": 2.4225, + "step": 16045 + }, + { + "epoch": 1.294972157210879, + "grad_norm": 0.7698646187782288, + "learning_rate": 1.8876178447471193e-05, + "loss": 2.4031, + "step": 16046 + }, + { + "epoch": 1.2950528609474619, + "grad_norm": 0.6800495982170105, + "learning_rate": 1.8866948609989854e-05, + "loss": 2.3679, + "step": 16047 + }, + { + "epoch": 1.295133564684045, + "grad_norm": 0.7348111867904663, + "learning_rate": 1.8857720794556267e-05, + "loss": 2.4263, + "step": 16048 + }, + { + "epoch": 1.2952142684206278, + "grad_norm": 0.6614782214164734, + "learning_rate": 1.8848495001400356e-05, + "loss": 2.4396, + "step": 16049 + }, + { + "epoch": 1.295294972157211, + "grad_norm": 0.6683650612831116, + "learning_rate": 1.8839271230752075e-05, + "loss": 2.4189, + "step": 16050 + }, + { + "epoch": 1.2953756758937938, + "grad_norm": 0.711040198802948, + "learning_rate": 1.8830049482841328e-05, + "loss": 2.3974, + "step": 16051 + }, + { + "epoch": 1.2954563796303769, + "grad_norm": 0.6663193702697754, + "learning_rate": 1.882082975789795e-05, + "loss": 2.4196, + "step": 16052 + }, + { + "epoch": 1.29553708336696, + "grad_norm": 0.6551210284233093, + "learning_rate": 1.881161205615166e-05, + "loss": 2.3793, + "step": 16053 + }, + { + "epoch": 1.2956177871035428, + "grad_norm": 0.6849039793014526, + "learning_rate": 1.8802396377832243e-05, + "loss": 2.3941, + "step": 16054 + }, + { + "epoch": 1.295698490840126, + "grad_norm": 0.7642949223518372, + "learning_rate": 1.8793182723169357e-05, + "loss": 2.4296, + "step": 16055 + }, + { + "epoch": 1.295779194576709, + "grad_norm": 0.7104716897010803, + "learning_rate": 1.878397109239263e-05, + "loss": 2.4124, + "step": 16056 + }, + { + "epoch": 1.2958598983132918, + "grad_norm": 0.6822344064712524, + "learning_rate": 1.877476148573164e-05, + "loss": 2.4072, + "step": 16057 + }, + { + "epoch": 1.295940602049875, + "grad_norm": 0.6824066042900085, + "learning_rate": 1.8765553903415956e-05, + "loss": 2.4137, + "step": 16058 + }, + { + "epoch": 1.296021305786458, + "grad_norm": 0.7083307504653931, + "learning_rate": 1.875634834567498e-05, + "loss": 2.4423, + "step": 16059 + }, + { + "epoch": 1.2961020095230409, + "grad_norm": 0.7301077246665955, + "learning_rate": 1.874714481273818e-05, + "loss": 2.3926, + "step": 16060 + }, + { + "epoch": 1.296182713259624, + "grad_norm": 0.685656726360321, + "learning_rate": 1.873794330483496e-05, + "loss": 2.4409, + "step": 16061 + }, + { + "epoch": 1.296263416996207, + "grad_norm": 0.6916719675064087, + "learning_rate": 1.8728743822194584e-05, + "loss": 2.4141, + "step": 16062 + }, + { + "epoch": 1.29634412073279, + "grad_norm": 0.7188845276832581, + "learning_rate": 1.871954636504636e-05, + "loss": 2.4186, + "step": 16063 + }, + { + "epoch": 1.2964248244693728, + "grad_norm": 0.6637440919876099, + "learning_rate": 1.8710350933619504e-05, + "loss": 2.4526, + "step": 16064 + }, + { + "epoch": 1.2965055282059559, + "grad_norm": 0.7000349760055542, + "learning_rate": 1.87011575281432e-05, + "loss": 2.4096, + "step": 16065 + }, + { + "epoch": 1.296586231942539, + "grad_norm": 0.693513810634613, + "learning_rate": 1.8691966148846573e-05, + "loss": 2.3931, + "step": 16066 + }, + { + "epoch": 1.2966669356791218, + "grad_norm": 0.6928985118865967, + "learning_rate": 1.8682776795958678e-05, + "loss": 2.4384, + "step": 16067 + }, + { + "epoch": 1.296747639415705, + "grad_norm": 0.6474096179008484, + "learning_rate": 1.8673589469708585e-05, + "loss": 2.3985, + "step": 16068 + }, + { + "epoch": 1.296828343152288, + "grad_norm": 0.6827313899993896, + "learning_rate": 1.866440417032521e-05, + "loss": 2.4607, + "step": 16069 + }, + { + "epoch": 1.2969090468888709, + "grad_norm": 0.7183445692062378, + "learning_rate": 1.8655220898037485e-05, + "loss": 2.4396, + "step": 16070 + }, + { + "epoch": 1.296989750625454, + "grad_norm": 0.6997376680374146, + "learning_rate": 1.8646039653074333e-05, + "loss": 2.4627, + "step": 16071 + }, + { + "epoch": 1.297070454362037, + "grad_norm": 0.7358444333076477, + "learning_rate": 1.8636860435664493e-05, + "loss": 2.4165, + "step": 16072 + }, + { + "epoch": 1.29715115809862, + "grad_norm": 0.8126270771026611, + "learning_rate": 1.8627683246036787e-05, + "loss": 2.4681, + "step": 16073 + }, + { + "epoch": 1.297231861835203, + "grad_norm": 0.7364177107810974, + "learning_rate": 1.8618508084419918e-05, + "loss": 2.44, + "step": 16074 + }, + { + "epoch": 1.297312565571786, + "grad_norm": 0.7480010390281677, + "learning_rate": 1.8609334951042567e-05, + "loss": 2.4759, + "step": 16075 + }, + { + "epoch": 1.297393269308369, + "grad_norm": 0.6563693284988403, + "learning_rate": 1.8600163846133335e-05, + "loss": 2.3865, + "step": 16076 + }, + { + "epoch": 1.297473973044952, + "grad_norm": 0.6961230039596558, + "learning_rate": 1.8590994769920832e-05, + "loss": 2.3851, + "step": 16077 + }, + { + "epoch": 1.297554676781535, + "grad_norm": 0.7137415409088135, + "learning_rate": 1.8581827722633527e-05, + "loss": 2.4115, + "step": 16078 + }, + { + "epoch": 1.297635380518118, + "grad_norm": 0.6579335331916809, + "learning_rate": 1.85726627044999e-05, + "loss": 2.4464, + "step": 16079 + }, + { + "epoch": 1.2977160842547009, + "grad_norm": 0.7069905400276184, + "learning_rate": 1.8563499715748366e-05, + "loss": 2.4057, + "step": 16080 + }, + { + "epoch": 1.297796787991284, + "grad_norm": 0.771925687789917, + "learning_rate": 1.8554338756607325e-05, + "loss": 2.4696, + "step": 16081 + }, + { + "epoch": 1.297877491727867, + "grad_norm": 0.7268456816673279, + "learning_rate": 1.8545179827305048e-05, + "loss": 2.3949, + "step": 16082 + }, + { + "epoch": 1.29795819546445, + "grad_norm": 0.7049130797386169, + "learning_rate": 1.8536022928069796e-05, + "loss": 2.4448, + "step": 16083 + }, + { + "epoch": 1.298038899201033, + "grad_norm": 0.6716888546943665, + "learning_rate": 1.852686805912982e-05, + "loss": 2.3356, + "step": 16084 + }, + { + "epoch": 1.298119602937616, + "grad_norm": 0.666386604309082, + "learning_rate": 1.851771522071325e-05, + "loss": 2.4226, + "step": 16085 + }, + { + "epoch": 1.298200306674199, + "grad_norm": 0.7084901332855225, + "learning_rate": 1.8508564413048223e-05, + "loss": 2.4452, + "step": 16086 + }, + { + "epoch": 1.298281010410782, + "grad_norm": 0.6615412831306458, + "learning_rate": 1.8499415636362815e-05, + "loss": 2.4193, + "step": 16087 + }, + { + "epoch": 1.2983617141473651, + "grad_norm": 0.7143606543540955, + "learning_rate": 1.849026889088499e-05, + "loss": 2.4513, + "step": 16088 + }, + { + "epoch": 1.298442417883948, + "grad_norm": 0.7241482734680176, + "learning_rate": 1.8481124176842723e-05, + "loss": 2.458, + "step": 16089 + }, + { + "epoch": 1.298523121620531, + "grad_norm": 0.6762149930000305, + "learning_rate": 1.8471981494463963e-05, + "loss": 2.4386, + "step": 16090 + }, + { + "epoch": 1.2986038253571142, + "grad_norm": 0.6672768592834473, + "learning_rate": 1.8462840843976525e-05, + "loss": 2.375, + "step": 16091 + }, + { + "epoch": 1.298684529093697, + "grad_norm": 0.6871693134307861, + "learning_rate": 1.8453702225608226e-05, + "loss": 2.4342, + "step": 16092 + }, + { + "epoch": 1.2987652328302801, + "grad_norm": 0.6771275401115417, + "learning_rate": 1.8444565639586864e-05, + "loss": 2.402, + "step": 16093 + }, + { + "epoch": 1.298845936566863, + "grad_norm": 0.6627403497695923, + "learning_rate": 1.8435431086140077e-05, + "loss": 2.4667, + "step": 16094 + }, + { + "epoch": 1.298926640303446, + "grad_norm": 0.7001610398292542, + "learning_rate": 1.8426298565495538e-05, + "loss": 2.4396, + "step": 16095 + }, + { + "epoch": 1.299007344040029, + "grad_norm": 0.7574489712715149, + "learning_rate": 1.8417168077880908e-05, + "loss": 2.4601, + "step": 16096 + }, + { + "epoch": 1.299088047776612, + "grad_norm": 0.7771055698394775, + "learning_rate": 1.840803962352372e-05, + "loss": 2.4371, + "step": 16097 + }, + { + "epoch": 1.299168751513195, + "grad_norm": 0.6738649606704712, + "learning_rate": 1.8398913202651457e-05, + "loss": 2.3921, + "step": 16098 + }, + { + "epoch": 1.299249455249778, + "grad_norm": 0.7014862895011902, + "learning_rate": 1.8389788815491583e-05, + "loss": 2.451, + "step": 16099 + }, + { + "epoch": 1.299330158986361, + "grad_norm": 0.7026070952415466, + "learning_rate": 1.8380666462271523e-05, + "loss": 2.4583, + "step": 16100 + }, + { + "epoch": 1.2994108627229441, + "grad_norm": 0.6904535293579102, + "learning_rate": 1.8371546143218588e-05, + "loss": 2.4453, + "step": 16101 + }, + { + "epoch": 1.299491566459527, + "grad_norm": 0.6974804997444153, + "learning_rate": 1.8362427858560093e-05, + "loss": 2.4291, + "step": 16102 + }, + { + "epoch": 1.29957227019611, + "grad_norm": 0.6826989650726318, + "learning_rate": 1.8353311608523326e-05, + "loss": 2.4183, + "step": 16103 + }, + { + "epoch": 1.2996529739326932, + "grad_norm": 0.6804787516593933, + "learning_rate": 1.8344197393335448e-05, + "loss": 2.434, + "step": 16104 + }, + { + "epoch": 1.299733677669276, + "grad_norm": 0.7144587635993958, + "learning_rate": 1.8335085213223613e-05, + "loss": 2.4296, + "step": 16105 + }, + { + "epoch": 1.2998143814058591, + "grad_norm": 0.7228755354881287, + "learning_rate": 1.8325975068414924e-05, + "loss": 2.3987, + "step": 16106 + }, + { + "epoch": 1.2998950851424422, + "grad_norm": 0.7417716383934021, + "learning_rate": 1.8316866959136438e-05, + "loss": 2.4076, + "step": 16107 + }, + { + "epoch": 1.299975788879025, + "grad_norm": 0.6737387776374817, + "learning_rate": 1.8307760885615154e-05, + "loss": 2.4175, + "step": 16108 + }, + { + "epoch": 1.3000564926156082, + "grad_norm": 0.7294918298721313, + "learning_rate": 1.8298656848078035e-05, + "loss": 2.4022, + "step": 16109 + }, + { + "epoch": 1.300137196352191, + "grad_norm": 0.7200861573219299, + "learning_rate": 1.828955484675193e-05, + "loss": 2.4018, + "step": 16110 + }, + { + "epoch": 1.3002179000887741, + "grad_norm": 0.7704176306724548, + "learning_rate": 1.8280454881863718e-05, + "loss": 2.4539, + "step": 16111 + }, + { + "epoch": 1.300298603825357, + "grad_norm": 0.6790730953216553, + "learning_rate": 1.8271356953640184e-05, + "loss": 2.4196, + "step": 16112 + }, + { + "epoch": 1.30037930756194, + "grad_norm": 0.7165740132331848, + "learning_rate": 1.8262261062308096e-05, + "loss": 2.4234, + "step": 16113 + }, + { + "epoch": 1.3004600112985232, + "grad_norm": 0.7716830372810364, + "learning_rate": 1.82531672080941e-05, + "loss": 2.4255, + "step": 16114 + }, + { + "epoch": 1.300540715035106, + "grad_norm": 0.6525317430496216, + "learning_rate": 1.824407539122488e-05, + "loss": 2.4482, + "step": 16115 + }, + { + "epoch": 1.3006214187716891, + "grad_norm": 0.7397769093513489, + "learning_rate": 1.8234985611927003e-05, + "loss": 2.33, + "step": 16116 + }, + { + "epoch": 1.3007021225082722, + "grad_norm": 0.7106032967567444, + "learning_rate": 1.822589787042702e-05, + "loss": 2.485, + "step": 16117 + }, + { + "epoch": 1.300782826244855, + "grad_norm": 0.7030045390129089, + "learning_rate": 1.8216812166951425e-05, + "loss": 2.454, + "step": 16118 + }, + { + "epoch": 1.3008635299814382, + "grad_norm": 0.7075662612915039, + "learning_rate": 1.8207728501726683e-05, + "loss": 2.4589, + "step": 16119 + }, + { + "epoch": 1.3009442337180213, + "grad_norm": 0.6700533032417297, + "learning_rate": 1.819864687497912e-05, + "loss": 2.4398, + "step": 16120 + }, + { + "epoch": 1.3010249374546041, + "grad_norm": 0.6951712369918823, + "learning_rate": 1.8189567286935117e-05, + "loss": 2.3998, + "step": 16121 + }, + { + "epoch": 1.3011056411911872, + "grad_norm": 0.708344578742981, + "learning_rate": 1.818048973782097e-05, + "loss": 2.4142, + "step": 16122 + }, + { + "epoch": 1.30118634492777, + "grad_norm": 0.7078592777252197, + "learning_rate": 1.817141422786287e-05, + "loss": 2.451, + "step": 16123 + }, + { + "epoch": 1.3012670486643532, + "grad_norm": 0.7111849784851074, + "learning_rate": 1.816234075728703e-05, + "loss": 2.4762, + "step": 16124 + }, + { + "epoch": 1.301347752400936, + "grad_norm": 0.6716348528862, + "learning_rate": 1.8153269326319588e-05, + "loss": 2.4373, + "step": 16125 + }, + { + "epoch": 1.3014284561375191, + "grad_norm": 0.6592512130737305, + "learning_rate": 1.8144199935186623e-05, + "loss": 2.412, + "step": 16126 + }, + { + "epoch": 1.3015091598741022, + "grad_norm": 0.6958334445953369, + "learning_rate": 1.8135132584114167e-05, + "loss": 2.4077, + "step": 16127 + }, + { + "epoch": 1.301589863610685, + "grad_norm": 0.6911341547966003, + "learning_rate": 1.8126067273328207e-05, + "loss": 2.409, + "step": 16128 + }, + { + "epoch": 1.3016705673472682, + "grad_norm": 0.676114022731781, + "learning_rate": 1.8117004003054693e-05, + "loss": 2.4463, + "step": 16129 + }, + { + "epoch": 1.3017512710838512, + "grad_norm": 0.6493322849273682, + "learning_rate": 1.810794277351947e-05, + "loss": 2.4377, + "step": 16130 + }, + { + "epoch": 1.3018319748204341, + "grad_norm": 0.6938454508781433, + "learning_rate": 1.8098883584948367e-05, + "loss": 2.4298, + "step": 16131 + }, + { + "epoch": 1.3019126785570172, + "grad_norm": 0.69407719373703, + "learning_rate": 1.8089826437567214e-05, + "loss": 2.4107, + "step": 16132 + }, + { + "epoch": 1.3019933822936003, + "grad_norm": 0.6898862719535828, + "learning_rate": 1.8080771331601664e-05, + "loss": 2.4182, + "step": 16133 + }, + { + "epoch": 1.3020740860301832, + "grad_norm": 0.7377758026123047, + "learning_rate": 1.807171826727744e-05, + "loss": 2.4112, + "step": 16134 + }, + { + "epoch": 1.3021547897667662, + "grad_norm": 0.674057126045227, + "learning_rate": 1.8062667244820154e-05, + "loss": 2.4276, + "step": 16135 + }, + { + "epoch": 1.3022354935033493, + "grad_norm": 0.7087522745132446, + "learning_rate": 1.8053618264455384e-05, + "loss": 2.4338, + "step": 16136 + }, + { + "epoch": 1.3023161972399322, + "grad_norm": 0.70958411693573, + "learning_rate": 1.8044571326408667e-05, + "loss": 2.4369, + "step": 16137 + }, + { + "epoch": 1.3023969009765153, + "grad_norm": 0.7023837566375732, + "learning_rate": 1.803552643090548e-05, + "loss": 2.4185, + "step": 16138 + }, + { + "epoch": 1.3024776047130981, + "grad_norm": 0.708543598651886, + "learning_rate": 1.8026483578171216e-05, + "loss": 2.4053, + "step": 16139 + }, + { + "epoch": 1.3025583084496812, + "grad_norm": 0.748601496219635, + "learning_rate": 1.8017442768431257e-05, + "loss": 2.3948, + "step": 16140 + }, + { + "epoch": 1.302639012186264, + "grad_norm": 0.6626949310302734, + "learning_rate": 1.800840400191096e-05, + "loss": 2.4636, + "step": 16141 + }, + { + "epoch": 1.3027197159228472, + "grad_norm": 0.7079617977142334, + "learning_rate": 1.7999367278835534e-05, + "loss": 2.4091, + "step": 16142 + }, + { + "epoch": 1.3028004196594303, + "grad_norm": 0.7025624513626099, + "learning_rate": 1.7990332599430225e-05, + "loss": 2.3732, + "step": 16143 + }, + { + "epoch": 1.3028811233960131, + "grad_norm": 0.7365758419036865, + "learning_rate": 1.7981299963920205e-05, + "loss": 2.4725, + "step": 16144 + }, + { + "epoch": 1.3029618271325962, + "grad_norm": 0.7511963248252869, + "learning_rate": 1.7972269372530615e-05, + "loss": 2.4304, + "step": 16145 + }, + { + "epoch": 1.3030425308691793, + "grad_norm": 0.7055985331535339, + "learning_rate": 1.796324082548644e-05, + "loss": 2.4259, + "step": 16146 + }, + { + "epoch": 1.3031232346057622, + "grad_norm": 0.691162645816803, + "learning_rate": 1.7954214323012775e-05, + "loss": 2.4262, + "step": 16147 + }, + { + "epoch": 1.3032039383423453, + "grad_norm": 0.7179710268974304, + "learning_rate": 1.7945189865334587e-05, + "loss": 2.4301, + "step": 16148 + }, + { + "epoch": 1.3032846420789284, + "grad_norm": 0.7391623258590698, + "learning_rate": 1.7936167452676744e-05, + "loss": 2.4302, + "step": 16149 + }, + { + "epoch": 1.3033653458155112, + "grad_norm": 0.7297981381416321, + "learning_rate": 1.7927147085264117e-05, + "loss": 2.3911, + "step": 16150 + }, + { + "epoch": 1.3034460495520943, + "grad_norm": 0.7571932673454285, + "learning_rate": 1.7918128763321552e-05, + "loss": 2.4348, + "step": 16151 + }, + { + "epoch": 1.3035267532886774, + "grad_norm": 0.7074765563011169, + "learning_rate": 1.7909112487073754e-05, + "loss": 2.4164, + "step": 16152 + }, + { + "epoch": 1.3036074570252603, + "grad_norm": 0.7534131407737732, + "learning_rate": 1.7900098256745467e-05, + "loss": 2.3784, + "step": 16153 + }, + { + "epoch": 1.3036881607618434, + "grad_norm": 0.675398588180542, + "learning_rate": 1.789108607256136e-05, + "loss": 2.4305, + "step": 16154 + }, + { + "epoch": 1.3037688644984262, + "grad_norm": 0.7099249362945557, + "learning_rate": 1.7882075934746002e-05, + "loss": 2.4053, + "step": 16155 + }, + { + "epoch": 1.3038495682350093, + "grad_norm": 0.6914681196212769, + "learning_rate": 1.787306784352397e-05, + "loss": 2.3902, + "step": 16156 + }, + { + "epoch": 1.3039302719715922, + "grad_norm": 0.6956958770751953, + "learning_rate": 1.786406179911977e-05, + "loss": 2.4026, + "step": 16157 + }, + { + "epoch": 1.3040109757081753, + "grad_norm": 0.6873000860214233, + "learning_rate": 1.7855057801757857e-05, + "loss": 2.4082, + "step": 16158 + }, + { + "epoch": 1.3040916794447583, + "grad_norm": 0.7340587377548218, + "learning_rate": 1.7846055851662625e-05, + "loss": 2.4894, + "step": 16159 + }, + { + "epoch": 1.3041723831813412, + "grad_norm": 0.6956963539123535, + "learning_rate": 1.7837055949058444e-05, + "loss": 2.3976, + "step": 16160 + }, + { + "epoch": 1.3042530869179243, + "grad_norm": 0.7654300332069397, + "learning_rate": 1.782805809416962e-05, + "loss": 2.4272, + "step": 16161 + }, + { + "epoch": 1.3043337906545074, + "grad_norm": 0.7735971212387085, + "learning_rate": 1.7819062287220368e-05, + "loss": 2.4513, + "step": 16162 + }, + { + "epoch": 1.3044144943910903, + "grad_norm": 0.6897203326225281, + "learning_rate": 1.7810068528434908e-05, + "loss": 2.3974, + "step": 16163 + }, + { + "epoch": 1.3044951981276733, + "grad_norm": 0.7328432202339172, + "learning_rate": 1.780107681803741e-05, + "loss": 2.4455, + "step": 16164 + }, + { + "epoch": 1.3045759018642564, + "grad_norm": 0.7098489999771118, + "learning_rate": 1.7792087156251924e-05, + "loss": 2.4173, + "step": 16165 + }, + { + "epoch": 1.3046566056008393, + "grad_norm": 0.6593194007873535, + "learning_rate": 1.7783099543302518e-05, + "loss": 2.4102, + "step": 16166 + }, + { + "epoch": 1.3047373093374224, + "grad_norm": 0.7329291105270386, + "learning_rate": 1.7774113979413188e-05, + "loss": 2.4856, + "step": 16167 + }, + { + "epoch": 1.3048180130740052, + "grad_norm": 0.7033355236053467, + "learning_rate": 1.776513046480788e-05, + "loss": 2.4503, + "step": 16168 + }, + { + "epoch": 1.3048987168105883, + "grad_norm": 0.7063608765602112, + "learning_rate": 1.7756148999710486e-05, + "loss": 2.4523, + "step": 16169 + }, + { + "epoch": 1.3049794205471712, + "grad_norm": 0.6905883550643921, + "learning_rate": 1.774716958434487e-05, + "loss": 2.4149, + "step": 16170 + }, + { + "epoch": 1.3050601242837543, + "grad_norm": 0.694551408290863, + "learning_rate": 1.7738192218934778e-05, + "loss": 2.437, + "step": 16171 + }, + { + "epoch": 1.3051408280203374, + "grad_norm": 0.7173176407814026, + "learning_rate": 1.772921690370396e-05, + "loss": 2.4817, + "step": 16172 + }, + { + "epoch": 1.3052215317569202, + "grad_norm": 0.7197130918502808, + "learning_rate": 1.7720243638876153e-05, + "loss": 2.4481, + "step": 16173 + }, + { + "epoch": 1.3053022354935033, + "grad_norm": 0.710811197757721, + "learning_rate": 1.771127242467493e-05, + "loss": 2.397, + "step": 16174 + }, + { + "epoch": 1.3053829392300864, + "grad_norm": 0.9194550514221191, + "learning_rate": 1.7702303261323894e-05, + "loss": 2.5206, + "step": 16175 + }, + { + "epoch": 1.3054636429666693, + "grad_norm": 0.7003832459449768, + "learning_rate": 1.769333614904659e-05, + "loss": 2.4175, + "step": 16176 + }, + { + "epoch": 1.3055443467032524, + "grad_norm": 0.7161554098129272, + "learning_rate": 1.768437108806651e-05, + "loss": 2.3892, + "step": 16177 + }, + { + "epoch": 1.3056250504398355, + "grad_norm": 0.6516181826591492, + "learning_rate": 1.767540807860707e-05, + "loss": 2.4361, + "step": 16178 + }, + { + "epoch": 1.3057057541764183, + "grad_norm": 0.7518061399459839, + "learning_rate": 1.7666447120891662e-05, + "loss": 2.4572, + "step": 16179 + }, + { + "epoch": 1.3057864579130014, + "grad_norm": 0.735388994216919, + "learning_rate": 1.7657488215143637e-05, + "loss": 2.3965, + "step": 16180 + }, + { + "epoch": 1.3058671616495845, + "grad_norm": 0.6994282007217407, + "learning_rate": 1.764853136158622e-05, + "loss": 2.4052, + "step": 16181 + }, + { + "epoch": 1.3059478653861674, + "grad_norm": 0.7095311880111694, + "learning_rate": 1.7639576560442684e-05, + "loss": 2.4818, + "step": 16182 + }, + { + "epoch": 1.3060285691227504, + "grad_norm": 0.6527207493782043, + "learning_rate": 1.7630623811936208e-05, + "loss": 2.3962, + "step": 16183 + }, + { + "epoch": 1.3061092728593333, + "grad_norm": 0.6668451428413391, + "learning_rate": 1.7621673116289882e-05, + "loss": 2.4514, + "step": 16184 + }, + { + "epoch": 1.3061899765959164, + "grad_norm": 0.7119911909103394, + "learning_rate": 1.7612724473726795e-05, + "loss": 2.4313, + "step": 16185 + }, + { + "epoch": 1.3062706803324993, + "grad_norm": 0.706249475479126, + "learning_rate": 1.7603777884469984e-05, + "loss": 2.4131, + "step": 16186 + }, + { + "epoch": 1.3063513840690824, + "grad_norm": 0.6634086966514587, + "learning_rate": 1.759483334874241e-05, + "loss": 2.3532, + "step": 16187 + }, + { + "epoch": 1.3064320878056654, + "grad_norm": 0.8096393942832947, + "learning_rate": 1.7585890866766995e-05, + "loss": 2.4485, + "step": 16188 + }, + { + "epoch": 1.3065127915422483, + "grad_norm": 0.675308883190155, + "learning_rate": 1.7576950438766615e-05, + "loss": 2.388, + "step": 16189 + }, + { + "epoch": 1.3065934952788314, + "grad_norm": 0.738275408744812, + "learning_rate": 1.756801206496411e-05, + "loss": 2.4485, + "step": 16190 + }, + { + "epoch": 1.3066741990154145, + "grad_norm": 0.7045620083808899, + "learning_rate": 1.755907574558221e-05, + "loss": 2.3985, + "step": 16191 + }, + { + "epoch": 1.3067549027519973, + "grad_norm": 0.6499879360198975, + "learning_rate": 1.755014148084363e-05, + "loss": 2.3992, + "step": 16192 + }, + { + "epoch": 1.3068356064885804, + "grad_norm": 0.7101179361343384, + "learning_rate": 1.7541209270971083e-05, + "loss": 2.4217, + "step": 16193 + }, + { + "epoch": 1.3069163102251635, + "grad_norm": 0.6865181922912598, + "learning_rate": 1.7532279116187124e-05, + "loss": 2.4805, + "step": 16194 + }, + { + "epoch": 1.3069970139617464, + "grad_norm": 0.7710141539573669, + "learning_rate": 1.752335101671434e-05, + "loss": 2.3654, + "step": 16195 + }, + { + "epoch": 1.3070777176983295, + "grad_norm": 0.695936381816864, + "learning_rate": 1.7514424972775244e-05, + "loss": 2.4315, + "step": 16196 + }, + { + "epoch": 1.3071584214349126, + "grad_norm": 0.6781535148620605, + "learning_rate": 1.7505500984592304e-05, + "loss": 2.4238, + "step": 16197 + }, + { + "epoch": 1.3072391251714954, + "grad_norm": 0.6549252271652222, + "learning_rate": 1.7496579052387918e-05, + "loss": 2.3766, + "step": 16198 + }, + { + "epoch": 1.3073198289080785, + "grad_norm": 0.6599059700965881, + "learning_rate": 1.7487659176384474e-05, + "loss": 2.4613, + "step": 16199 + }, + { + "epoch": 1.3074005326446614, + "grad_norm": 0.6742514967918396, + "learning_rate": 1.7478741356804228e-05, + "loss": 2.3917, + "step": 16200 + }, + { + "epoch": 1.3074812363812445, + "grad_norm": 0.6542397141456604, + "learning_rate": 1.746982559386946e-05, + "loss": 2.44, + "step": 16201 + }, + { + "epoch": 1.3075619401178273, + "grad_norm": 0.7200478315353394, + "learning_rate": 1.74609118878024e-05, + "loss": 2.4324, + "step": 16202 + }, + { + "epoch": 1.3076426438544104, + "grad_norm": 0.717628002166748, + "learning_rate": 1.745200023882515e-05, + "loss": 2.3996, + "step": 16203 + }, + { + "epoch": 1.3077233475909935, + "grad_norm": 0.7350025177001953, + "learning_rate": 1.744309064715983e-05, + "loss": 2.4812, + "step": 16204 + }, + { + "epoch": 1.3078040513275764, + "grad_norm": 0.7253599762916565, + "learning_rate": 1.74341831130285e-05, + "loss": 2.4454, + "step": 16205 + }, + { + "epoch": 1.3078847550641595, + "grad_norm": 0.7537909746170044, + "learning_rate": 1.7425277636653193e-05, + "loss": 2.4247, + "step": 16206 + }, + { + "epoch": 1.3079654588007426, + "grad_norm": 0.7563284039497375, + "learning_rate": 1.7416374218255783e-05, + "loss": 2.3893, + "step": 16207 + }, + { + "epoch": 1.3080461625373254, + "grad_norm": 0.7118926048278809, + "learning_rate": 1.740747285805818e-05, + "loss": 2.4146, + "step": 16208 + }, + { + "epoch": 1.3081268662739085, + "grad_norm": 0.7805569171905518, + "learning_rate": 1.7398573556282304e-05, + "loss": 2.396, + "step": 16209 + }, + { + "epoch": 1.3082075700104916, + "grad_norm": 0.7357630133628845, + "learning_rate": 1.738967631314987e-05, + "loss": 2.5405, + "step": 16210 + }, + { + "epoch": 1.3082882737470745, + "grad_norm": 0.6670438647270203, + "learning_rate": 1.7380781128882652e-05, + "loss": 2.4452, + "step": 16211 + }, + { + "epoch": 1.3083689774836575, + "grad_norm": 0.7374427318572998, + "learning_rate": 1.7371888003702353e-05, + "loss": 2.5143, + "step": 16212 + }, + { + "epoch": 1.3084496812202406, + "grad_norm": 0.672207236289978, + "learning_rate": 1.736299693783058e-05, + "loss": 2.4178, + "step": 16213 + }, + { + "epoch": 1.3085303849568235, + "grad_norm": 0.6926576495170593, + "learning_rate": 1.735410793148894e-05, + "loss": 2.3466, + "step": 16214 + }, + { + "epoch": 1.3086110886934066, + "grad_norm": 0.6928917169570923, + "learning_rate": 1.734522098489899e-05, + "loss": 2.4654, + "step": 16215 + }, + { + "epoch": 1.3086917924299895, + "grad_norm": 0.6536242961883545, + "learning_rate": 1.733633609828217e-05, + "loss": 2.3761, + "step": 16216 + }, + { + "epoch": 1.3087724961665725, + "grad_norm": 0.6993953585624695, + "learning_rate": 1.732745327185994e-05, + "loss": 2.3963, + "step": 16217 + }, + { + "epoch": 1.3088531999031554, + "grad_norm": 0.6851957440376282, + "learning_rate": 1.731857250585368e-05, + "loss": 2.4253, + "step": 16218 + }, + { + "epoch": 1.3089339036397385, + "grad_norm": 0.6620005965232849, + "learning_rate": 1.7309693800484728e-05, + "loss": 2.4302, + "step": 16219 + }, + { + "epoch": 1.3090146073763216, + "grad_norm": 0.6704410314559937, + "learning_rate": 1.7300817155974356e-05, + "loss": 2.4065, + "step": 16220 + }, + { + "epoch": 1.3090953111129044, + "grad_norm": 0.6882327198982239, + "learning_rate": 1.7291942572543807e-05, + "loss": 2.4526, + "step": 16221 + }, + { + "epoch": 1.3091760148494875, + "grad_norm": 0.6971533298492432, + "learning_rate": 1.7283070050414275e-05, + "loss": 2.4076, + "step": 16222 + }, + { + "epoch": 1.3092567185860706, + "grad_norm": 0.6662544012069702, + "learning_rate": 1.7274199589806827e-05, + "loss": 2.3678, + "step": 16223 + }, + { + "epoch": 1.3093374223226535, + "grad_norm": 0.6342894434928894, + "learning_rate": 1.726533119094258e-05, + "loss": 2.3424, + "step": 16224 + }, + { + "epoch": 1.3094181260592366, + "grad_norm": 0.6808488965034485, + "learning_rate": 1.7256464854042577e-05, + "loss": 2.4286, + "step": 16225 + }, + { + "epoch": 1.3094988297958197, + "grad_norm": 0.6417922973632812, + "learning_rate": 1.7247600579327738e-05, + "loss": 2.3677, + "step": 16226 + }, + { + "epoch": 1.3095795335324025, + "grad_norm": 0.7267102599143982, + "learning_rate": 1.7238738367019002e-05, + "loss": 2.3974, + "step": 16227 + }, + { + "epoch": 1.3096602372689856, + "grad_norm": 0.6915002465248108, + "learning_rate": 1.722987821733725e-05, + "loss": 2.4429, + "step": 16228 + }, + { + "epoch": 1.3097409410055685, + "grad_norm": 0.6930112242698669, + "learning_rate": 1.7221020130503296e-05, + "loss": 2.4272, + "step": 16229 + }, + { + "epoch": 1.3098216447421516, + "grad_norm": 0.7049465179443359, + "learning_rate": 1.7212164106737904e-05, + "loss": 2.4089, + "step": 16230 + }, + { + "epoch": 1.3099023484787344, + "grad_norm": 0.7230044603347778, + "learning_rate": 1.720331014626182e-05, + "loss": 2.4313, + "step": 16231 + }, + { + "epoch": 1.3099830522153175, + "grad_norm": 0.6513530015945435, + "learning_rate": 1.7194458249295665e-05, + "loss": 2.3293, + "step": 16232 + }, + { + "epoch": 1.3100637559519006, + "grad_norm": 0.6880534291267395, + "learning_rate": 1.718560841606005e-05, + "loss": 2.4556, + "step": 16233 + }, + { + "epoch": 1.3101444596884835, + "grad_norm": 0.7075292468070984, + "learning_rate": 1.717676064677559e-05, + "loss": 2.4747, + "step": 16234 + }, + { + "epoch": 1.3102251634250666, + "grad_norm": 0.7713594436645508, + "learning_rate": 1.7167914941662723e-05, + "loss": 2.4135, + "step": 16235 + }, + { + "epoch": 1.3103058671616497, + "grad_norm": 0.7883979082107544, + "learning_rate": 1.7159071300941943e-05, + "loss": 2.418, + "step": 16236 + }, + { + "epoch": 1.3103865708982325, + "grad_norm": 0.6588975787162781, + "learning_rate": 1.7150229724833655e-05, + "loss": 2.3295, + "step": 16237 + }, + { + "epoch": 1.3104672746348156, + "grad_norm": 0.679086446762085, + "learning_rate": 1.7141390213558217e-05, + "loss": 2.413, + "step": 16238 + }, + { + "epoch": 1.3105479783713987, + "grad_norm": 0.6803067326545715, + "learning_rate": 1.713255276733592e-05, + "loss": 2.4338, + "step": 16239 + }, + { + "epoch": 1.3106286821079816, + "grad_norm": 0.7041650414466858, + "learning_rate": 1.712371738638704e-05, + "loss": 2.469, + "step": 16240 + }, + { + "epoch": 1.3107093858445646, + "grad_norm": 0.6560962796211243, + "learning_rate": 1.711488407093178e-05, + "loss": 2.4353, + "step": 16241 + }, + { + "epoch": 1.3107900895811477, + "grad_norm": 0.6637921333312988, + "learning_rate": 1.7106052821190244e-05, + "loss": 2.3996, + "step": 16242 + }, + { + "epoch": 1.3108707933177306, + "grad_norm": 0.8131709098815918, + "learning_rate": 1.7097223637382565e-05, + "loss": 2.466, + "step": 16243 + }, + { + "epoch": 1.3109514970543137, + "grad_norm": 0.6637253165245056, + "learning_rate": 1.708839651972881e-05, + "loss": 2.3811, + "step": 16244 + }, + { + "epoch": 1.3110322007908966, + "grad_norm": 0.71912682056427, + "learning_rate": 1.7079571468448917e-05, + "loss": 2.4175, + "step": 16245 + }, + { + "epoch": 1.3111129045274796, + "grad_norm": 0.7028010487556458, + "learning_rate": 1.7070748483762854e-05, + "loss": 2.41, + "step": 16246 + }, + { + "epoch": 1.3111936082640625, + "grad_norm": 0.7241945862770081, + "learning_rate": 1.7061927565890522e-05, + "loss": 2.4171, + "step": 16247 + }, + { + "epoch": 1.3112743120006456, + "grad_norm": 0.7039221525192261, + "learning_rate": 1.705310871505177e-05, + "loss": 2.4154, + "step": 16248 + }, + { + "epoch": 1.3113550157372287, + "grad_norm": 0.672444760799408, + "learning_rate": 1.704429193146636e-05, + "loss": 2.4025, + "step": 16249 + }, + { + "epoch": 1.3114357194738115, + "grad_norm": 0.7240859866142273, + "learning_rate": 1.7035477215354068e-05, + "loss": 2.3864, + "step": 16250 + }, + { + "epoch": 1.3115164232103946, + "grad_norm": 0.7379294633865356, + "learning_rate": 1.7026664566934536e-05, + "loss": 2.4663, + "step": 16251 + }, + { + "epoch": 1.3115971269469777, + "grad_norm": 0.6928708553314209, + "learning_rate": 1.7017853986427425e-05, + "loss": 2.4407, + "step": 16252 + }, + { + "epoch": 1.3116778306835606, + "grad_norm": 0.6304093599319458, + "learning_rate": 1.7009045474052298e-05, + "loss": 2.4755, + "step": 16253 + }, + { + "epoch": 1.3117585344201437, + "grad_norm": 0.6945829391479492, + "learning_rate": 1.700023903002872e-05, + "loss": 2.3817, + "step": 16254 + }, + { + "epoch": 1.3118392381567268, + "grad_norm": 0.6899009346961975, + "learning_rate": 1.6991434654576133e-05, + "loss": 2.3989, + "step": 16255 + }, + { + "epoch": 1.3119199418933096, + "grad_norm": 0.7359157204627991, + "learning_rate": 1.6982632347913985e-05, + "loss": 2.3788, + "step": 16256 + }, + { + "epoch": 1.3120006456298927, + "grad_norm": 0.6562486886978149, + "learning_rate": 1.6973832110261658e-05, + "loss": 2.3955, + "step": 16257 + }, + { + "epoch": 1.3120813493664758, + "grad_norm": 0.6772989630699158, + "learning_rate": 1.696503394183846e-05, + "loss": 2.4788, + "step": 16258 + }, + { + "epoch": 1.3121620531030587, + "grad_norm": 0.7214391231536865, + "learning_rate": 1.695623784286363e-05, + "loss": 2.3836, + "step": 16259 + }, + { + "epoch": 1.3122427568396418, + "grad_norm": 0.7041679620742798, + "learning_rate": 1.6947443813556495e-05, + "loss": 2.4547, + "step": 16260 + }, + { + "epoch": 1.3123234605762246, + "grad_norm": 0.6819555163383484, + "learning_rate": 1.6938651854136135e-05, + "loss": 2.468, + "step": 16261 + }, + { + "epoch": 1.3124041643128077, + "grad_norm": 0.6466858983039856, + "learning_rate": 1.6929861964821693e-05, + "loss": 2.4572, + "step": 16262 + }, + { + "epoch": 1.3124848680493906, + "grad_norm": 0.688709557056427, + "learning_rate": 1.6921074145832248e-05, + "loss": 2.3891, + "step": 16263 + }, + { + "epoch": 1.3125655717859737, + "grad_norm": 0.6896470785140991, + "learning_rate": 1.69122883973868e-05, + "loss": 2.3825, + "step": 16264 + }, + { + "epoch": 1.3126462755225567, + "grad_norm": 0.8242524266242981, + "learning_rate": 1.690350471970431e-05, + "loss": 2.4804, + "step": 16265 + }, + { + "epoch": 1.3127269792591396, + "grad_norm": 0.7506044507026672, + "learning_rate": 1.689472311300373e-05, + "loss": 2.4671, + "step": 16266 + }, + { + "epoch": 1.3128076829957227, + "grad_norm": 0.6776263117790222, + "learning_rate": 1.688594357750386e-05, + "loss": 2.4646, + "step": 16267 + }, + { + "epoch": 1.3128883867323058, + "grad_norm": 0.6843759417533875, + "learning_rate": 1.6877166113423548e-05, + "loss": 2.4147, + "step": 16268 + }, + { + "epoch": 1.3129690904688887, + "grad_norm": 0.6650474667549133, + "learning_rate": 1.686839072098153e-05, + "loss": 2.4379, + "step": 16269 + }, + { + "epoch": 1.3130497942054717, + "grad_norm": 0.6636466383934021, + "learning_rate": 1.6859617400396533e-05, + "loss": 2.4334, + "step": 16270 + }, + { + "epoch": 1.3131304979420548, + "grad_norm": 0.649217963218689, + "learning_rate": 1.685084615188719e-05, + "loss": 2.319, + "step": 16271 + }, + { + "epoch": 1.3132112016786377, + "grad_norm": 0.7343039512634277, + "learning_rate": 1.6842076975672126e-05, + "loss": 2.3844, + "step": 16272 + }, + { + "epoch": 1.3132919054152208, + "grad_norm": 0.6916847825050354, + "learning_rate": 1.6833309871969894e-05, + "loss": 2.4544, + "step": 16273 + }, + { + "epoch": 1.3133726091518036, + "grad_norm": 0.6762102842330933, + "learning_rate": 1.6824544840998967e-05, + "loss": 2.3912, + "step": 16274 + }, + { + "epoch": 1.3134533128883867, + "grad_norm": 0.7327221035957336, + "learning_rate": 1.68157818829778e-05, + "loss": 2.4403, + "step": 16275 + }, + { + "epoch": 1.3135340166249696, + "grad_norm": 0.7362363338470459, + "learning_rate": 1.6807020998124812e-05, + "loss": 2.5169, + "step": 16276 + }, + { + "epoch": 1.3136147203615527, + "grad_norm": 0.6882300972938538, + "learning_rate": 1.679826218665832e-05, + "loss": 2.4139, + "step": 16277 + }, + { + "epoch": 1.3136954240981358, + "grad_norm": 0.7146984934806824, + "learning_rate": 1.6789505448796615e-05, + "loss": 2.4738, + "step": 16278 + }, + { + "epoch": 1.3137761278347186, + "grad_norm": 0.6581223607063293, + "learning_rate": 1.6780750784757947e-05, + "loss": 2.4617, + "step": 16279 + }, + { + "epoch": 1.3138568315713017, + "grad_norm": 0.7729318141937256, + "learning_rate": 1.6771998194760518e-05, + "loss": 2.4541, + "step": 16280 + }, + { + "epoch": 1.3139375353078848, + "grad_norm": 0.7617159485816956, + "learning_rate": 1.6763247679022442e-05, + "loss": 2.4727, + "step": 16281 + }, + { + "epoch": 1.3140182390444677, + "grad_norm": 0.6640555262565613, + "learning_rate": 1.6754499237761844e-05, + "loss": 2.4717, + "step": 16282 + }, + { + "epoch": 1.3140989427810508, + "grad_norm": 0.7289882898330688, + "learning_rate": 1.6745752871196707e-05, + "loss": 2.4515, + "step": 16283 + }, + { + "epoch": 1.3141796465176339, + "grad_norm": 0.7075887322425842, + "learning_rate": 1.6737008579545043e-05, + "loss": 2.4586, + "step": 16284 + }, + { + "epoch": 1.3142603502542167, + "grad_norm": 0.7152252197265625, + "learning_rate": 1.672826636302477e-05, + "loss": 2.512, + "step": 16285 + }, + { + "epoch": 1.3143410539907998, + "grad_norm": 0.6875295639038086, + "learning_rate": 1.6719526221853808e-05, + "loss": 2.4049, + "step": 16286 + }, + { + "epoch": 1.314421757727383, + "grad_norm": 0.6812484860420227, + "learning_rate": 1.671078815624991e-05, + "loss": 2.3705, + "step": 16287 + }, + { + "epoch": 1.3145024614639658, + "grad_norm": 0.664282500743866, + "learning_rate": 1.6702052166430904e-05, + "loss": 2.3776, + "step": 16288 + }, + { + "epoch": 1.3145831652005489, + "grad_norm": 0.7460842728614807, + "learning_rate": 1.66933182526145e-05, + "loss": 2.4525, + "step": 16289 + }, + { + "epoch": 1.3146638689371317, + "grad_norm": 0.6555477380752563, + "learning_rate": 1.6684586415018366e-05, + "loss": 2.3902, + "step": 16290 + }, + { + "epoch": 1.3147445726737148, + "grad_norm": 0.7191921472549438, + "learning_rate": 1.6675856653860135e-05, + "loss": 2.4957, + "step": 16291 + }, + { + "epoch": 1.3148252764102977, + "grad_norm": 0.738667368888855, + "learning_rate": 1.666712896935738e-05, + "loss": 2.4182, + "step": 16292 + }, + { + "epoch": 1.3149059801468808, + "grad_norm": 0.6764421463012695, + "learning_rate": 1.6658403361727593e-05, + "loss": 2.4179, + "step": 16293 + }, + { + "epoch": 1.3149866838834638, + "grad_norm": 0.6981594562530518, + "learning_rate": 1.6649679831188247e-05, + "loss": 2.4288, + "step": 16294 + }, + { + "epoch": 1.3150673876200467, + "grad_norm": 0.6657801866531372, + "learning_rate": 1.6640958377956784e-05, + "loss": 2.3716, + "step": 16295 + }, + { + "epoch": 1.3151480913566298, + "grad_norm": 0.7238973379135132, + "learning_rate": 1.6632239002250505e-05, + "loss": 2.438, + "step": 16296 + }, + { + "epoch": 1.3152287950932129, + "grad_norm": 0.6727766990661621, + "learning_rate": 1.6623521704286772e-05, + "loss": 2.4406, + "step": 16297 + }, + { + "epoch": 1.3153094988297958, + "grad_norm": 0.6741603016853333, + "learning_rate": 1.661480648428282e-05, + "loss": 2.4379, + "step": 16298 + }, + { + "epoch": 1.3153902025663788, + "grad_norm": 0.7174610495567322, + "learning_rate": 1.6606093342455865e-05, + "loss": 2.4368, + "step": 16299 + }, + { + "epoch": 1.315470906302962, + "grad_norm": 0.6604920029640198, + "learning_rate": 1.6597382279023057e-05, + "loss": 2.4431, + "step": 16300 + }, + { + "epoch": 1.3155516100395448, + "grad_norm": 0.6930821537971497, + "learning_rate": 1.6588673294201494e-05, + "loss": 2.4064, + "step": 16301 + }, + { + "epoch": 1.3156323137761279, + "grad_norm": 0.6489799618721008, + "learning_rate": 1.657996638820826e-05, + "loss": 2.4256, + "step": 16302 + }, + { + "epoch": 1.315713017512711, + "grad_norm": 0.6781083345413208, + "learning_rate": 1.65712615612603e-05, + "loss": 2.4731, + "step": 16303 + }, + { + "epoch": 1.3157937212492938, + "grad_norm": 0.6710748076438904, + "learning_rate": 1.656255881357458e-05, + "loss": 2.4065, + "step": 16304 + }, + { + "epoch": 1.315874424985877, + "grad_norm": 0.7099822163581848, + "learning_rate": 1.655385814536804e-05, + "loss": 2.3978, + "step": 16305 + }, + { + "epoch": 1.3159551287224598, + "grad_norm": 0.7215133905410767, + "learning_rate": 1.6545159556857447e-05, + "loss": 2.4655, + "step": 16306 + }, + { + "epoch": 1.3160358324590429, + "grad_norm": 0.7705253958702087, + "learning_rate": 1.6536463048259643e-05, + "loss": 2.4576, + "step": 16307 + }, + { + "epoch": 1.3161165361956257, + "grad_norm": 0.6232311725616455, + "learning_rate": 1.6527768619791372e-05, + "loss": 2.3923, + "step": 16308 + }, + { + "epoch": 1.3161972399322088, + "grad_norm": 0.6599528789520264, + "learning_rate": 1.6519076271669264e-05, + "loss": 2.4236, + "step": 16309 + }, + { + "epoch": 1.316277943668792, + "grad_norm": 0.6598034501075745, + "learning_rate": 1.6510386004110023e-05, + "loss": 2.368, + "step": 16310 + }, + { + "epoch": 1.3163586474053748, + "grad_norm": 0.6949655413627625, + "learning_rate": 1.650169781733022e-05, + "loss": 2.4277, + "step": 16311 + }, + { + "epoch": 1.3164393511419579, + "grad_norm": 0.6838186383247375, + "learning_rate": 1.6493011711546358e-05, + "loss": 2.4413, + "step": 16312 + }, + { + "epoch": 1.316520054878541, + "grad_norm": 0.7026765942573547, + "learning_rate": 1.6484327686974933e-05, + "loss": 2.4628, + "step": 16313 + }, + { + "epoch": 1.3166007586151238, + "grad_norm": 0.745360791683197, + "learning_rate": 1.647564574383237e-05, + "loss": 2.4358, + "step": 16314 + }, + { + "epoch": 1.316681462351707, + "grad_norm": 0.676225483417511, + "learning_rate": 1.6466965882335083e-05, + "loss": 2.4119, + "step": 16315 + }, + { + "epoch": 1.31676216608829, + "grad_norm": 0.6767755150794983, + "learning_rate": 1.6458288102699325e-05, + "loss": 2.4322, + "step": 16316 + }, + { + "epoch": 1.3168428698248729, + "grad_norm": 0.6957309246063232, + "learning_rate": 1.6449612405141424e-05, + "loss": 2.4327, + "step": 16317 + }, + { + "epoch": 1.316923573561456, + "grad_norm": 0.6773050427436829, + "learning_rate": 1.64409387898776e-05, + "loss": 2.4207, + "step": 16318 + }, + { + "epoch": 1.3170042772980388, + "grad_norm": 0.7319278717041016, + "learning_rate": 1.6432267257123978e-05, + "loss": 2.445, + "step": 16319 + }, + { + "epoch": 1.317084981034622, + "grad_norm": 0.7531326413154602, + "learning_rate": 1.6423597807096714e-05, + "loss": 2.3948, + "step": 16320 + }, + { + "epoch": 1.3171656847712048, + "grad_norm": 0.6741669178009033, + "learning_rate": 1.6414930440011854e-05, + "loss": 2.4177, + "step": 16321 + }, + { + "epoch": 1.3172463885077879, + "grad_norm": 0.6814963221549988, + "learning_rate": 1.640626515608543e-05, + "loss": 2.4419, + "step": 16322 + }, + { + "epoch": 1.317327092244371, + "grad_norm": 0.6740893721580505, + "learning_rate": 1.6397601955533392e-05, + "loss": 2.3516, + "step": 16323 + }, + { + "epoch": 1.3174077959809538, + "grad_norm": 0.7172163724899292, + "learning_rate": 1.6388940838571675e-05, + "loss": 2.4665, + "step": 16324 + }, + { + "epoch": 1.317488499717537, + "grad_norm": 0.6690489053726196, + "learning_rate": 1.6380281805416085e-05, + "loss": 2.3957, + "step": 16325 + }, + { + "epoch": 1.31756920345412, + "grad_norm": 0.7182994484901428, + "learning_rate": 1.6371624856282462e-05, + "loss": 2.4456, + "step": 16326 + }, + { + "epoch": 1.3176499071907029, + "grad_norm": 0.6324366927146912, + "learning_rate": 1.636296999138659e-05, + "loss": 2.4111, + "step": 16327 + }, + { + "epoch": 1.317730610927286, + "grad_norm": 0.6740162372589111, + "learning_rate": 1.6354317210944093e-05, + "loss": 2.451, + "step": 16328 + }, + { + "epoch": 1.317811314663869, + "grad_norm": 0.6964122653007507, + "learning_rate": 1.6345666515170665e-05, + "loss": 2.4269, + "step": 16329 + }, + { + "epoch": 1.317892018400452, + "grad_norm": 0.7093058824539185, + "learning_rate": 1.6337017904281915e-05, + "loss": 2.4686, + "step": 16330 + }, + { + "epoch": 1.317972722137035, + "grad_norm": 0.693233072757721, + "learning_rate": 1.6328371378493367e-05, + "loss": 2.4149, + "step": 16331 + }, + { + "epoch": 1.318053425873618, + "grad_norm": 0.6418019533157349, + "learning_rate": 1.631972693802052e-05, + "loss": 2.4268, + "step": 16332 + }, + { + "epoch": 1.318134129610201, + "grad_norm": 0.6815310120582581, + "learning_rate": 1.631108458307883e-05, + "loss": 2.4274, + "step": 16333 + }, + { + "epoch": 1.318214833346784, + "grad_norm": 0.6774280071258545, + "learning_rate": 1.630244431388369e-05, + "loss": 2.3927, + "step": 16334 + }, + { + "epoch": 1.3182955370833669, + "grad_norm": 0.688090443611145, + "learning_rate": 1.6293806130650413e-05, + "loss": 2.4013, + "step": 16335 + }, + { + "epoch": 1.31837624081995, + "grad_norm": 0.7300553321838379, + "learning_rate": 1.6285170033594288e-05, + "loss": 2.4716, + "step": 16336 + }, + { + "epoch": 1.3184569445565328, + "grad_norm": 0.6798286437988281, + "learning_rate": 1.627653602293059e-05, + "loss": 2.3893, + "step": 16337 + }, + { + "epoch": 1.318537648293116, + "grad_norm": 0.6699275970458984, + "learning_rate": 1.6267904098874442e-05, + "loss": 2.4446, + "step": 16338 + }, + { + "epoch": 1.318618352029699, + "grad_norm": 0.7632322311401367, + "learning_rate": 1.6259274261641e-05, + "loss": 2.4434, + "step": 16339 + }, + { + "epoch": 1.3186990557662819, + "grad_norm": 0.7156099677085876, + "learning_rate": 1.6250646511445343e-05, + "loss": 2.4142, + "step": 16340 + }, + { + "epoch": 1.318779759502865, + "grad_norm": 0.7525599598884583, + "learning_rate": 1.6242020848502505e-05, + "loss": 2.3543, + "step": 16341 + }, + { + "epoch": 1.318860463239448, + "grad_norm": 0.7063113451004028, + "learning_rate": 1.623339727302745e-05, + "loss": 2.4754, + "step": 16342 + }, + { + "epoch": 1.318941166976031, + "grad_norm": 0.7138137221336365, + "learning_rate": 1.6224775785235123e-05, + "loss": 2.4223, + "step": 16343 + }, + { + "epoch": 1.319021870712614, + "grad_norm": 0.6976706981658936, + "learning_rate": 1.6216156385340352e-05, + "loss": 2.4878, + "step": 16344 + }, + { + "epoch": 1.319102574449197, + "grad_norm": 0.6931003332138062, + "learning_rate": 1.6207539073557974e-05, + "loss": 2.39, + "step": 16345 + }, + { + "epoch": 1.31918327818578, + "grad_norm": 0.6919357180595398, + "learning_rate": 1.6198923850102765e-05, + "loss": 2.4197, + "step": 16346 + }, + { + "epoch": 1.319263981922363, + "grad_norm": 0.7453805804252625, + "learning_rate": 1.619031071518945e-05, + "loss": 2.4226, + "step": 16347 + }, + { + "epoch": 1.3193446856589461, + "grad_norm": 0.6990562677383423, + "learning_rate": 1.6181699669032658e-05, + "loss": 2.3925, + "step": 16348 + }, + { + "epoch": 1.319425389395529, + "grad_norm": 0.6974303126335144, + "learning_rate": 1.6173090711847006e-05, + "loss": 2.445, + "step": 16349 + }, + { + "epoch": 1.319506093132112, + "grad_norm": 0.7278286814689636, + "learning_rate": 1.6164483843847057e-05, + "loss": 2.3869, + "step": 16350 + }, + { + "epoch": 1.319586796868695, + "grad_norm": 0.7282646298408508, + "learning_rate": 1.6155879065247326e-05, + "loss": 2.3694, + "step": 16351 + }, + { + "epoch": 1.319667500605278, + "grad_norm": 0.7329844832420349, + "learning_rate": 1.6147276376262255e-05, + "loss": 2.4369, + "step": 16352 + }, + { + "epoch": 1.319748204341861, + "grad_norm": 0.6499385833740234, + "learning_rate": 1.613867577710627e-05, + "loss": 2.441, + "step": 16353 + }, + { + "epoch": 1.319828908078444, + "grad_norm": 0.7026061415672302, + "learning_rate": 1.6130077267993683e-05, + "loss": 2.4117, + "step": 16354 + }, + { + "epoch": 1.319909611815027, + "grad_norm": 0.7007814049720764, + "learning_rate": 1.6121480849138803e-05, + "loss": 2.4287, + "step": 16355 + }, + { + "epoch": 1.31999031555161, + "grad_norm": 0.6525697708129883, + "learning_rate": 1.611288652075591e-05, + "loss": 2.3969, + "step": 16356 + }, + { + "epoch": 1.320071019288193, + "grad_norm": 0.7268216609954834, + "learning_rate": 1.610429428305914e-05, + "loss": 2.4227, + "step": 16357 + }, + { + "epoch": 1.3201517230247761, + "grad_norm": 0.6665107011795044, + "learning_rate": 1.6095704136262668e-05, + "loss": 2.3694, + "step": 16358 + }, + { + "epoch": 1.320232426761359, + "grad_norm": 0.6832399368286133, + "learning_rate": 1.60871160805806e-05, + "loss": 2.4001, + "step": 16359 + }, + { + "epoch": 1.320313130497942, + "grad_norm": 0.6788592338562012, + "learning_rate": 1.6078530116226897e-05, + "loss": 2.4294, + "step": 16360 + }, + { + "epoch": 1.3203938342345252, + "grad_norm": 0.7147449254989624, + "learning_rate": 1.6069946243415625e-05, + "loss": 2.3904, + "step": 16361 + }, + { + "epoch": 1.320474537971108, + "grad_norm": 0.7014418840408325, + "learning_rate": 1.6061364462360683e-05, + "loss": 2.4026, + "step": 16362 + }, + { + "epoch": 1.3205552417076911, + "grad_norm": 0.6867612600326538, + "learning_rate": 1.6052784773275987e-05, + "loss": 2.4092, + "step": 16363 + }, + { + "epoch": 1.3206359454442742, + "grad_norm": 0.6588961482048035, + "learning_rate": 1.6044207176375303e-05, + "loss": 2.4588, + "step": 16364 + }, + { + "epoch": 1.320716649180857, + "grad_norm": 0.688671350479126, + "learning_rate": 1.6035631671872444e-05, + "loss": 2.3957, + "step": 16365 + }, + { + "epoch": 1.3207973529174402, + "grad_norm": 0.7548064589500427, + "learning_rate": 1.6027058259981154e-05, + "loss": 2.4168, + "step": 16366 + }, + { + "epoch": 1.320878056654023, + "grad_norm": 0.7251972556114197, + "learning_rate": 1.6018486940915044e-05, + "loss": 2.4704, + "step": 16367 + }, + { + "epoch": 1.3209587603906061, + "grad_norm": 0.73149174451828, + "learning_rate": 1.6009917714887778e-05, + "loss": 2.4597, + "step": 16368 + }, + { + "epoch": 1.321039464127189, + "grad_norm": 0.6741003394126892, + "learning_rate": 1.600135058211294e-05, + "loss": 2.3876, + "step": 16369 + }, + { + "epoch": 1.321120167863772, + "grad_norm": 0.6891310214996338, + "learning_rate": 1.5992785542804e-05, + "loss": 2.4229, + "step": 16370 + }, + { + "epoch": 1.3212008716003552, + "grad_norm": 0.7529458403587341, + "learning_rate": 1.5984222597174415e-05, + "loss": 2.45, + "step": 16371 + }, + { + "epoch": 1.321281575336938, + "grad_norm": 0.708134651184082, + "learning_rate": 1.5975661745437664e-05, + "loss": 2.454, + "step": 16372 + }, + { + "epoch": 1.321362279073521, + "grad_norm": 0.7511130571365356, + "learning_rate": 1.596710298780705e-05, + "loss": 2.4201, + "step": 16373 + }, + { + "epoch": 1.3214429828101042, + "grad_norm": 0.6599537134170532, + "learning_rate": 1.595854632449588e-05, + "loss": 2.3982, + "step": 16374 + }, + { + "epoch": 1.321523686546687, + "grad_norm": 0.6821228861808777, + "learning_rate": 1.5949991755717453e-05, + "loss": 2.4525, + "step": 16375 + }, + { + "epoch": 1.3216043902832701, + "grad_norm": 0.6872302293777466, + "learning_rate": 1.5941439281684923e-05, + "loss": 2.3631, + "step": 16376 + }, + { + "epoch": 1.3216850940198532, + "grad_norm": 0.6650066375732422, + "learning_rate": 1.5932888902611453e-05, + "loss": 2.3718, + "step": 16377 + }, + { + "epoch": 1.321765797756436, + "grad_norm": 0.6620016694068909, + "learning_rate": 1.5924340618710143e-05, + "loss": 2.4076, + "step": 16378 + }, + { + "epoch": 1.3218465014930192, + "grad_norm": 0.694807231426239, + "learning_rate": 1.5915794430194066e-05, + "loss": 2.4369, + "step": 16379 + }, + { + "epoch": 1.321927205229602, + "grad_norm": 0.6810131669044495, + "learning_rate": 1.590725033727616e-05, + "loss": 2.4151, + "step": 16380 + }, + { + "epoch": 1.3220079089661851, + "grad_norm": 0.768846333026886, + "learning_rate": 1.58987083401694e-05, + "loss": 2.4991, + "step": 16381 + }, + { + "epoch": 1.322088612702768, + "grad_norm": 0.6581698656082153, + "learning_rate": 1.5890168439086672e-05, + "loss": 2.4263, + "step": 16382 + }, + { + "epoch": 1.322169316439351, + "grad_norm": 0.7267034649848938, + "learning_rate": 1.5881630634240818e-05, + "loss": 2.4219, + "step": 16383 + }, + { + "epoch": 1.3222500201759342, + "grad_norm": 0.7391555905342102, + "learning_rate": 1.5873094925844612e-05, + "loss": 2.427, + "step": 16384 + }, + { + "epoch": 1.322330723912517, + "grad_norm": 0.6612021923065186, + "learning_rate": 1.5864561314110815e-05, + "loss": 2.4108, + "step": 16385 + }, + { + "epoch": 1.3224114276491001, + "grad_norm": 0.7118437886238098, + "learning_rate": 1.585602979925206e-05, + "loss": 2.3839, + "step": 16386 + }, + { + "epoch": 1.3224921313856832, + "grad_norm": 0.6663616299629211, + "learning_rate": 1.5847500381480997e-05, + "loss": 2.4302, + "step": 16387 + }, + { + "epoch": 1.322572835122266, + "grad_norm": 0.6848715543746948, + "learning_rate": 1.583897306101022e-05, + "loss": 2.4228, + "step": 16388 + }, + { + "epoch": 1.3226535388588492, + "grad_norm": 0.680895209312439, + "learning_rate": 1.5830447838052208e-05, + "loss": 2.4457, + "step": 16389 + }, + { + "epoch": 1.3227342425954323, + "grad_norm": 0.683276891708374, + "learning_rate": 1.582192471281946e-05, + "loss": 2.4412, + "step": 16390 + }, + { + "epoch": 1.3228149463320151, + "grad_norm": 0.7311880588531494, + "learning_rate": 1.5813403685524396e-05, + "loss": 2.4604, + "step": 16391 + }, + { + "epoch": 1.3228956500685982, + "grad_norm": 0.6769095659255981, + "learning_rate": 1.580488475637937e-05, + "loss": 2.4311, + "step": 16392 + }, + { + "epoch": 1.3229763538051813, + "grad_norm": 0.6683096289634705, + "learning_rate": 1.579636792559671e-05, + "loss": 2.445, + "step": 16393 + }, + { + "epoch": 1.3230570575417642, + "grad_norm": 0.7268782258033752, + "learning_rate": 1.5787853193388667e-05, + "loss": 2.4176, + "step": 16394 + }, + { + "epoch": 1.3231377612783473, + "grad_norm": 0.6878541707992554, + "learning_rate": 1.5779340559967494e-05, + "loss": 2.4615, + "step": 16395 + }, + { + "epoch": 1.3232184650149301, + "grad_norm": 0.7031291127204895, + "learning_rate": 1.577083002554527e-05, + "loss": 2.3726, + "step": 16396 + }, + { + "epoch": 1.3232991687515132, + "grad_norm": 0.7738708853721619, + "learning_rate": 1.5762321590334138e-05, + "loss": 2.5046, + "step": 16397 + }, + { + "epoch": 1.323379872488096, + "grad_norm": 0.6660913228988647, + "learning_rate": 1.575381525454619e-05, + "loss": 2.3759, + "step": 16398 + }, + { + "epoch": 1.3234605762246792, + "grad_norm": 0.6534021496772766, + "learning_rate": 1.574531101839335e-05, + "loss": 2.3983, + "step": 16399 + }, + { + "epoch": 1.3235412799612623, + "grad_norm": 0.6645511388778687, + "learning_rate": 1.5736808882087606e-05, + "loss": 2.3958, + "step": 16400 + }, + { + "epoch": 1.3236219836978451, + "grad_norm": 0.6723225712776184, + "learning_rate": 1.5728308845840855e-05, + "loss": 2.4248, + "step": 16401 + }, + { + "epoch": 1.3237026874344282, + "grad_norm": 0.6609976887702942, + "learning_rate": 1.5719810909864942e-05, + "loss": 2.3888, + "step": 16402 + }, + { + "epoch": 1.3237833911710113, + "grad_norm": 0.6713845729827881, + "learning_rate": 1.5711315074371635e-05, + "loss": 2.4474, + "step": 16403 + }, + { + "epoch": 1.3238640949075942, + "grad_norm": 0.701438307762146, + "learning_rate": 1.5702821339572726e-05, + "loss": 2.4673, + "step": 16404 + }, + { + "epoch": 1.3239447986441772, + "grad_norm": 0.7235428094863892, + "learning_rate": 1.5694329705679834e-05, + "loss": 2.3825, + "step": 16405 + }, + { + "epoch": 1.3240255023807603, + "grad_norm": 0.6785053610801697, + "learning_rate": 1.568584017290462e-05, + "loss": 2.4668, + "step": 16406 + }, + { + "epoch": 1.3241062061173432, + "grad_norm": 0.6918929815292358, + "learning_rate": 1.5677352741458705e-05, + "loss": 2.4329, + "step": 16407 + }, + { + "epoch": 1.3241869098539263, + "grad_norm": 0.7194826006889343, + "learning_rate": 1.5668867411553544e-05, + "loss": 2.3717, + "step": 16408 + }, + { + "epoch": 1.3242676135905094, + "grad_norm": 0.7299134731292725, + "learning_rate": 1.5660384183400658e-05, + "loss": 2.4695, + "step": 16409 + }, + { + "epoch": 1.3243483173270922, + "grad_norm": 0.7047600746154785, + "learning_rate": 1.565190305721147e-05, + "loss": 2.4525, + "step": 16410 + }, + { + "epoch": 1.3244290210636753, + "grad_norm": 0.685001015663147, + "learning_rate": 1.5643424033197328e-05, + "loss": 2.322, + "step": 16411 + }, + { + "epoch": 1.3245097248002582, + "grad_norm": 0.7696635127067566, + "learning_rate": 1.5634947111569588e-05, + "loss": 2.4464, + "step": 16412 + }, + { + "epoch": 1.3245904285368413, + "grad_norm": 0.7066066265106201, + "learning_rate": 1.5626472292539485e-05, + "loss": 2.4315, + "step": 16413 + }, + { + "epoch": 1.3246711322734241, + "grad_norm": 0.6553033590316772, + "learning_rate": 1.5617999576318276e-05, + "loss": 2.4296, + "step": 16414 + }, + { + "epoch": 1.3247518360100072, + "grad_norm": 0.7031354308128357, + "learning_rate": 1.560952896311707e-05, + "loss": 2.4565, + "step": 16415 + }, + { + "epoch": 1.3248325397465903, + "grad_norm": 0.7826353311538696, + "learning_rate": 1.560106045314701e-05, + "loss": 2.4275, + "step": 16416 + }, + { + "epoch": 1.3249132434831732, + "grad_norm": 0.6408981084823608, + "learning_rate": 1.559259404661916e-05, + "loss": 2.3869, + "step": 16417 + }, + { + "epoch": 1.3249939472197563, + "grad_norm": 0.7487547993659973, + "learning_rate": 1.558412974374448e-05, + "loss": 2.3678, + "step": 16418 + }, + { + "epoch": 1.3250746509563394, + "grad_norm": 0.7163991332054138, + "learning_rate": 1.5575667544733963e-05, + "loss": 2.397, + "step": 16419 + }, + { + "epoch": 1.3251553546929222, + "grad_norm": 0.6933553814888, + "learning_rate": 1.5567207449798515e-05, + "loss": 2.424, + "step": 16420 + }, + { + "epoch": 1.3252360584295053, + "grad_norm": 0.687406063079834, + "learning_rate": 1.5558749459148945e-05, + "loss": 2.4346, + "step": 16421 + }, + { + "epoch": 1.3253167621660884, + "grad_norm": 0.6781243681907654, + "learning_rate": 1.5550293572996054e-05, + "loss": 2.4526, + "step": 16422 + }, + { + "epoch": 1.3253974659026713, + "grad_norm": 0.6632506847381592, + "learning_rate": 1.5541839791550616e-05, + "loss": 2.4559, + "step": 16423 + }, + { + "epoch": 1.3254781696392544, + "grad_norm": 0.668396532535553, + "learning_rate": 1.5533388115023327e-05, + "loss": 2.4463, + "step": 16424 + }, + { + "epoch": 1.3255588733758372, + "grad_norm": 0.6853309869766235, + "learning_rate": 1.552493854362479e-05, + "loss": 2.429, + "step": 16425 + }, + { + "epoch": 1.3256395771124203, + "grad_norm": 0.7443413138389587, + "learning_rate": 1.5516491077565597e-05, + "loss": 2.4091, + "step": 16426 + }, + { + "epoch": 1.3257202808490032, + "grad_norm": 0.690170168876648, + "learning_rate": 1.550804571705632e-05, + "loss": 2.3942, + "step": 16427 + }, + { + "epoch": 1.3258009845855863, + "grad_norm": NaN, + "learning_rate": 1.550804571705632e-05, + "loss": 2.3788, + "step": 16428 + }, + { + "epoch": 1.3258816883221693, + "grad_norm": 0.6901132464408875, + "learning_rate": 1.5499602462307373e-05, + "loss": 2.3859, + "step": 16429 + }, + { + "epoch": 1.3259623920587522, + "grad_norm": 0.6639334559440613, + "learning_rate": 1.5491161313529223e-05, + "loss": 2.4271, + "step": 16430 + }, + { + "epoch": 1.3260430957953353, + "grad_norm": 0.7121936678886414, + "learning_rate": 1.548272227093227e-05, + "loss": 2.3818, + "step": 16431 + }, + { + "epoch": 1.3261237995319184, + "grad_norm": 0.6863218545913696, + "learning_rate": 1.5474285334726778e-05, + "loss": 2.3744, + "step": 16432 + }, + { + "epoch": 1.3262045032685013, + "grad_norm": 0.6697081327438354, + "learning_rate": 1.5465850505123057e-05, + "loss": 2.4001, + "step": 16433 + }, + { + "epoch": 1.3262852070050843, + "grad_norm": 0.7258912324905396, + "learning_rate": 1.5457417782331308e-05, + "loss": 2.4556, + "step": 16434 + }, + { + "epoch": 1.3263659107416674, + "grad_norm": 0.6930057406425476, + "learning_rate": 1.5448987166561712e-05, + "loss": 2.4979, + "step": 16435 + }, + { + "epoch": 1.3264466144782503, + "grad_norm": 0.6475574970245361, + "learning_rate": 1.5440558658024363e-05, + "loss": 2.3821, + "step": 16436 + }, + { + "epoch": 1.3265273182148334, + "grad_norm": 0.7489237785339355, + "learning_rate": 1.5432132256929367e-05, + "loss": 2.465, + "step": 16437 + }, + { + "epoch": 1.3266080219514165, + "grad_norm": 0.704391360282898, + "learning_rate": 1.5423707963486667e-05, + "loss": 2.433, + "step": 16438 + }, + { + "epoch": 1.3266887256879993, + "grad_norm": 0.669452965259552, + "learning_rate": 1.5415285777906253e-05, + "loss": 2.3981, + "step": 16439 + }, + { + "epoch": 1.3267694294245824, + "grad_norm": 0.6961604356765747, + "learning_rate": 1.540686570039802e-05, + "loss": 2.4684, + "step": 16440 + }, + { + "epoch": 1.3268501331611653, + "grad_norm": 0.6613924503326416, + "learning_rate": 1.539844773117185e-05, + "loss": 2.3711, + "step": 16441 + }, + { + "epoch": 1.3269308368977484, + "grad_norm": 0.7019763588905334, + "learning_rate": 1.5390031870437492e-05, + "loss": 2.3716, + "step": 16442 + }, + { + "epoch": 1.3270115406343312, + "grad_norm": 0.700176477432251, + "learning_rate": 1.5381618118404707e-05, + "loss": 2.4305, + "step": 16443 + }, + { + "epoch": 1.3270922443709143, + "grad_norm": 0.6716598272323608, + "learning_rate": 1.5373206475283197e-05, + "loss": 2.3835, + "step": 16444 + }, + { + "epoch": 1.3271729481074974, + "grad_norm": 0.6449697017669678, + "learning_rate": 1.53647969412826e-05, + "loss": 2.3707, + "step": 16445 + }, + { + "epoch": 1.3272536518440803, + "grad_norm": 0.7276685237884521, + "learning_rate": 1.535638951661249e-05, + "loss": 2.4313, + "step": 16446 + }, + { + "epoch": 1.3273343555806634, + "grad_norm": 0.7144705057144165, + "learning_rate": 1.5347984201482456e-05, + "loss": 2.4122, + "step": 16447 + }, + { + "epoch": 1.3274150593172465, + "grad_norm": 0.660225510597229, + "learning_rate": 1.53395809961019e-05, + "loss": 2.4282, + "step": 16448 + }, + { + "epoch": 1.3274957630538293, + "grad_norm": 0.7431676983833313, + "learning_rate": 1.5331179900680293e-05, + "loss": 2.3863, + "step": 16449 + }, + { + "epoch": 1.3275764667904124, + "grad_norm": 0.6670290231704712, + "learning_rate": 1.5322780915427036e-05, + "loss": 2.4266, + "step": 16450 + }, + { + "epoch": 1.3276571705269955, + "grad_norm": 0.711098313331604, + "learning_rate": 1.531438404055141e-05, + "loss": 2.4431, + "step": 16451 + }, + { + "epoch": 1.3277378742635784, + "grad_norm": 0.6908091902732849, + "learning_rate": 1.5305989276262688e-05, + "loss": 2.4153, + "step": 16452 + }, + { + "epoch": 1.3278185780001615, + "grad_norm": 0.7458107471466064, + "learning_rate": 1.5297596622770115e-05, + "loss": 2.4076, + "step": 16453 + }, + { + "epoch": 1.3278992817367445, + "grad_norm": 0.7406951189041138, + "learning_rate": 1.528920608028285e-05, + "loss": 2.3585, + "step": 16454 + }, + { + "epoch": 1.3279799854733274, + "grad_norm": 0.718824565410614, + "learning_rate": 1.5280817649010005e-05, + "loss": 2.4092, + "step": 16455 + }, + { + "epoch": 1.3280606892099105, + "grad_norm": 0.7163959741592407, + "learning_rate": 1.527243132916064e-05, + "loss": 2.4344, + "step": 16456 + }, + { + "epoch": 1.3281413929464934, + "grad_norm": 0.6695916652679443, + "learning_rate": 1.5264047120943793e-05, + "loss": 2.4144, + "step": 16457 + }, + { + "epoch": 1.3282220966830764, + "grad_norm": 0.6858509182929993, + "learning_rate": 1.5255665024568366e-05, + "loss": 2.4345, + "step": 16458 + }, + { + "epoch": 1.3283028004196593, + "grad_norm": 0.7277235388755798, + "learning_rate": 1.5247285040243297e-05, + "loss": 2.4219, + "step": 16459 + }, + { + "epoch": 1.3283835041562424, + "grad_norm": 0.6481949090957642, + "learning_rate": 1.5238907168177441e-05, + "loss": 2.4483, + "step": 16460 + }, + { + "epoch": 1.3284642078928255, + "grad_norm": 0.6956833600997925, + "learning_rate": 1.5230531408579574e-05, + "loss": 2.4241, + "step": 16461 + }, + { + "epoch": 1.3285449116294084, + "grad_norm": 0.7266185879707336, + "learning_rate": 1.522215776165845e-05, + "loss": 2.4577, + "step": 16462 + }, + { + "epoch": 1.3286256153659914, + "grad_norm": 0.725574254989624, + "learning_rate": 1.5213786227622773e-05, + "loss": 2.4451, + "step": 16463 + }, + { + "epoch": 1.3287063191025745, + "grad_norm": 0.7550850510597229, + "learning_rate": 1.5205416806681172e-05, + "loss": 2.4262, + "step": 16464 + }, + { + "epoch": 1.3287870228391574, + "grad_norm": 0.6391028761863708, + "learning_rate": 1.5197049499042237e-05, + "loss": 2.4116, + "step": 16465 + }, + { + "epoch": 1.3288677265757405, + "grad_norm": 0.6899027824401855, + "learning_rate": 1.5188684304914524e-05, + "loss": 2.3754, + "step": 16466 + }, + { + "epoch": 1.3289484303123236, + "grad_norm": 0.696681022644043, + "learning_rate": 1.518032122450649e-05, + "loss": 2.471, + "step": 16467 + }, + { + "epoch": 1.3290291340489064, + "grad_norm": 0.7090939283370972, + "learning_rate": 1.5171960258026551e-05, + "loss": 2.4153, + "step": 16468 + }, + { + "epoch": 1.3291098377854895, + "grad_norm": 0.7125746607780457, + "learning_rate": 1.5163601405683148e-05, + "loss": 2.4102, + "step": 16469 + }, + { + "epoch": 1.3291905415220726, + "grad_norm": 0.7407518029212952, + "learning_rate": 1.5155244667684531e-05, + "loss": 2.429, + "step": 16470 + }, + { + "epoch": 1.3292712452586555, + "grad_norm": 0.7401885390281677, + "learning_rate": 1.5146890044239004e-05, + "loss": 2.4577, + "step": 16471 + }, + { + "epoch": 1.3293519489952383, + "grad_norm": 0.7625757455825806, + "learning_rate": 1.5138537535554786e-05, + "loss": 2.3813, + "step": 16472 + }, + { + "epoch": 1.3294326527318214, + "grad_norm": 0.7423396706581116, + "learning_rate": 1.5130187141840057e-05, + "loss": 2.3797, + "step": 16473 + }, + { + "epoch": 1.3295133564684045, + "grad_norm": 0.7029228806495667, + "learning_rate": 1.5121838863302884e-05, + "loss": 2.4203, + "step": 16474 + }, + { + "epoch": 1.3295940602049874, + "grad_norm": 0.8062863349914551, + "learning_rate": 1.5113492700151378e-05, + "loss": 2.3743, + "step": 16475 + }, + { + "epoch": 1.3296747639415705, + "grad_norm": 0.7113343477249146, + "learning_rate": 1.5105148652593548e-05, + "loss": 2.3837, + "step": 16476 + }, + { + "epoch": 1.3297554676781536, + "grad_norm": 0.6733126044273376, + "learning_rate": 1.5096806720837309e-05, + "loss": 2.4677, + "step": 16477 + }, + { + "epoch": 1.3298361714147364, + "grad_norm": 0.6936657428741455, + "learning_rate": 1.5088466905090593e-05, + "loss": 2.3677, + "step": 16478 + }, + { + "epoch": 1.3299168751513195, + "grad_norm": 0.746746301651001, + "learning_rate": 1.5080129205561255e-05, + "loss": 2.423, + "step": 16479 + }, + { + "epoch": 1.3299975788879026, + "grad_norm": 0.6879116296768188, + "learning_rate": 1.5071793622457065e-05, + "loss": 2.4867, + "step": 16480 + }, + { + "epoch": 1.3300782826244855, + "grad_norm": 0.6841214299201965, + "learning_rate": 1.5063460155985776e-05, + "loss": 2.5015, + "step": 16481 + }, + { + "epoch": 1.3301589863610686, + "grad_norm": 0.6955111622810364, + "learning_rate": 1.5055128806355123e-05, + "loss": 2.3975, + "step": 16482 + }, + { + "epoch": 1.3302396900976516, + "grad_norm": 0.7084987163543701, + "learning_rate": 1.5046799573772673e-05, + "loss": 2.4511, + "step": 16483 + }, + { + "epoch": 1.3303203938342345, + "grad_norm": 0.6905840039253235, + "learning_rate": 1.5038472458446051e-05, + "loss": 2.3542, + "step": 16484 + }, + { + "epoch": 1.3304010975708176, + "grad_norm": 0.7182672023773193, + "learning_rate": 1.5030147460582788e-05, + "loss": 2.3673, + "step": 16485 + }, + { + "epoch": 1.3304818013074005, + "grad_norm": 0.6805183291435242, + "learning_rate": 1.5021824580390353e-05, + "loss": 2.3751, + "step": 16486 + }, + { + "epoch": 1.3305625050439835, + "grad_norm": 0.6278836727142334, + "learning_rate": 1.5013503818076202e-05, + "loss": 2.3508, + "step": 16487 + }, + { + "epoch": 1.3306432087805664, + "grad_norm": 0.664000391960144, + "learning_rate": 1.500518517384768e-05, + "loss": 2.4039, + "step": 16488 + }, + { + "epoch": 1.3307239125171495, + "grad_norm": 0.6906681060791016, + "learning_rate": 1.4996868647912155e-05, + "loss": 2.4068, + "step": 16489 + }, + { + "epoch": 1.3308046162537326, + "grad_norm": 0.6756102442741394, + "learning_rate": 1.4988554240476826e-05, + "loss": 2.4423, + "step": 16490 + }, + { + "epoch": 1.3308853199903155, + "grad_norm": 0.7013095021247864, + "learning_rate": 1.4980241951748964e-05, + "loss": 2.3536, + "step": 16491 + }, + { + "epoch": 1.3309660237268985, + "grad_norm": 0.6689851880073547, + "learning_rate": 1.4971931781935732e-05, + "loss": 2.4192, + "step": 16492 + }, + { + "epoch": 1.3310467274634816, + "grad_norm": 0.6411572694778442, + "learning_rate": 1.4963623731244202e-05, + "loss": 2.4012, + "step": 16493 + }, + { + "epoch": 1.3311274312000645, + "grad_norm": 0.7209812998771667, + "learning_rate": 1.4955317799881453e-05, + "loss": 2.378, + "step": 16494 + }, + { + "epoch": 1.3312081349366476, + "grad_norm": 0.7041119933128357, + "learning_rate": 1.4947013988054504e-05, + "loss": 2.4047, + "step": 16495 + }, + { + "epoch": 1.3312888386732307, + "grad_norm": 0.6928852796554565, + "learning_rate": 1.4938712295970292e-05, + "loss": 2.4489, + "step": 16496 + }, + { + "epoch": 1.3313695424098135, + "grad_norm": 0.6923524141311646, + "learning_rate": 1.4930412723835718e-05, + "loss": 2.3752, + "step": 16497 + }, + { + "epoch": 1.3314502461463966, + "grad_norm": 0.7034686803817749, + "learning_rate": 1.4922115271857662e-05, + "loss": 2.3898, + "step": 16498 + }, + { + "epoch": 1.3315309498829797, + "grad_norm": 0.6717320084571838, + "learning_rate": 1.4913819940242856e-05, + "loss": 2.3629, + "step": 16499 + }, + { + "epoch": 1.3316116536195626, + "grad_norm": 0.6885079741477966, + "learning_rate": 1.4905526729198083e-05, + "loss": 2.4321, + "step": 16500 + }, + { + "epoch": 1.3316923573561457, + "grad_norm": 0.662452757358551, + "learning_rate": 1.489723563893004e-05, + "loss": 2.4532, + "step": 16501 + }, + { + "epoch": 1.3317730610927285, + "grad_norm": 0.6650903224945068, + "learning_rate": 1.4888946669645332e-05, + "loss": 2.4347, + "step": 16502 + }, + { + "epoch": 1.3318537648293116, + "grad_norm": 0.7217590808868408, + "learning_rate": 1.4880659821550546e-05, + "loss": 2.4641, + "step": 16503 + }, + { + "epoch": 1.3319344685658945, + "grad_norm": 0.7063763737678528, + "learning_rate": 1.4872375094852232e-05, + "loss": 2.4365, + "step": 16504 + }, + { + "epoch": 1.3320151723024776, + "grad_norm": 0.7366454005241394, + "learning_rate": 1.4864092489756853e-05, + "loss": 2.4223, + "step": 16505 + }, + { + "epoch": 1.3320958760390607, + "grad_norm": 0.7132206559181213, + "learning_rate": 1.4855812006470838e-05, + "loss": 2.4404, + "step": 16506 + }, + { + "epoch": 1.3321765797756435, + "grad_norm": 0.665553867816925, + "learning_rate": 1.484753364520055e-05, + "loss": 2.3818, + "step": 16507 + }, + { + "epoch": 1.3322572835122266, + "grad_norm": 0.7854028344154358, + "learning_rate": 1.483925740615234e-05, + "loss": 2.4111, + "step": 16508 + }, + { + "epoch": 1.3323379872488097, + "grad_norm": 0.7331317663192749, + "learning_rate": 1.4830983289532418e-05, + "loss": 2.4446, + "step": 16509 + }, + { + "epoch": 1.3324186909853926, + "grad_norm": 0.670315146446228, + "learning_rate": 1.4822711295547042e-05, + "loss": 2.4017, + "step": 16510 + }, + { + "epoch": 1.3324993947219756, + "grad_norm": 0.7242144346237183, + "learning_rate": 1.481444142440237e-05, + "loss": 2.4281, + "step": 16511 + }, + { + "epoch": 1.3325800984585587, + "grad_norm": 0.7108538746833801, + "learning_rate": 1.4806173676304468e-05, + "loss": 2.4331, + "step": 16512 + }, + { + "epoch": 1.3326608021951416, + "grad_norm": 0.658989667892456, + "learning_rate": 1.479790805145943e-05, + "loss": 2.4321, + "step": 16513 + }, + { + "epoch": 1.3327415059317247, + "grad_norm": 0.6596404314041138, + "learning_rate": 1.4789644550073233e-05, + "loss": 2.3817, + "step": 16514 + }, + { + "epoch": 1.3328222096683078, + "grad_norm": 0.6922028064727783, + "learning_rate": 1.4781383172351837e-05, + "loss": 2.399, + "step": 16515 + }, + { + "epoch": 1.3329029134048906, + "grad_norm": 0.750747799873352, + "learning_rate": 1.4773123918501141e-05, + "loss": 2.4502, + "step": 16516 + }, + { + "epoch": 1.3329836171414737, + "grad_norm": 0.6887632608413696, + "learning_rate": 1.4764866788727006e-05, + "loss": 2.3636, + "step": 16517 + }, + { + "epoch": 1.3330643208780566, + "grad_norm": 0.6751166582107544, + "learning_rate": 1.4756611783235163e-05, + "loss": 2.3956, + "step": 16518 + }, + { + "epoch": 1.3331450246146397, + "grad_norm": 0.679040253162384, + "learning_rate": 1.4748358902231395e-05, + "loss": 2.4044, + "step": 16519 + }, + { + "epoch": 1.3332257283512225, + "grad_norm": 0.6396780610084534, + "learning_rate": 1.4740108145921373e-05, + "loss": 2.4114, + "step": 16520 + }, + { + "epoch": 1.3333064320878056, + "grad_norm": 0.6686230301856995, + "learning_rate": 1.4731859514510738e-05, + "loss": 2.4535, + "step": 16521 + }, + { + "epoch": 1.3333871358243887, + "grad_norm": 0.6693681478500366, + "learning_rate": 1.472361300820505e-05, + "loss": 2.3885, + "step": 16522 + }, + { + "epoch": 1.3334678395609716, + "grad_norm": 0.7700718641281128, + "learning_rate": 1.4715368627209836e-05, + "loss": 2.3939, + "step": 16523 + }, + { + "epoch": 1.3335485432975547, + "grad_norm": 0.7203121781349182, + "learning_rate": 1.4707126371730561e-05, + "loss": 2.4644, + "step": 16524 + }, + { + "epoch": 1.3336292470341378, + "grad_norm": 0.7798308730125427, + "learning_rate": 1.4698886241972665e-05, + "loss": 2.4293, + "step": 16525 + }, + { + "epoch": 1.3337099507707206, + "grad_norm": 0.7017160654067993, + "learning_rate": 1.4690648238141503e-05, + "loss": 2.4327, + "step": 16526 + }, + { + "epoch": 1.3337906545073037, + "grad_norm": 0.6522603631019592, + "learning_rate": 1.468241236044241e-05, + "loss": 2.3955, + "step": 16527 + }, + { + "epoch": 1.3338713582438868, + "grad_norm": 0.766222357749939, + "learning_rate": 1.4674178609080602e-05, + "loss": 2.4652, + "step": 16528 + }, + { + "epoch": 1.3339520619804697, + "grad_norm": 0.7351565361022949, + "learning_rate": 1.4665946984261303e-05, + "loss": 2.4607, + "step": 16529 + }, + { + "epoch": 1.3340327657170528, + "grad_norm": 0.6817728281021118, + "learning_rate": 1.4657717486189693e-05, + "loss": 2.3687, + "step": 16530 + }, + { + "epoch": 1.3341134694536356, + "grad_norm": 0.7401643395423889, + "learning_rate": 1.464949011507083e-05, + "loss": 2.4179, + "step": 16531 + }, + { + "epoch": 1.3341941731902187, + "grad_norm": 0.7783530354499817, + "learning_rate": 1.4641264871109784e-05, + "loss": 2.4088, + "step": 16532 + }, + { + "epoch": 1.3342748769268016, + "grad_norm": 0.6761943697929382, + "learning_rate": 1.4633041754511534e-05, + "loss": 2.4141, + "step": 16533 + }, + { + "epoch": 1.3343555806633847, + "grad_norm": 0.6842260360717773, + "learning_rate": 1.4624820765481073e-05, + "loss": 2.4918, + "step": 16534 + }, + { + "epoch": 1.3344362843999678, + "grad_norm": 0.6906094551086426, + "learning_rate": 1.4616601904223225e-05, + "loss": 2.4576, + "step": 16535 + }, + { + "epoch": 1.3345169881365506, + "grad_norm": 0.6549125909805298, + "learning_rate": 1.4608385170942829e-05, + "loss": 2.3748, + "step": 16536 + }, + { + "epoch": 1.3345976918731337, + "grad_norm": 0.6603896617889404, + "learning_rate": 1.4600170565844728e-05, + "loss": 2.3739, + "step": 16537 + }, + { + "epoch": 1.3346783956097168, + "grad_norm": 0.6413096189498901, + "learning_rate": 1.4591958089133606e-05, + "loss": 2.3979, + "step": 16538 + }, + { + "epoch": 1.3347590993462997, + "grad_norm": 0.7085204720497131, + "learning_rate": 1.4583747741014142e-05, + "loss": 2.4185, + "step": 16539 + }, + { + "epoch": 1.3348398030828827, + "grad_norm": 0.6517937183380127, + "learning_rate": 1.4575539521690983e-05, + "loss": 2.3938, + "step": 16540 + }, + { + "epoch": 1.3349205068194658, + "grad_norm": 0.6326449513435364, + "learning_rate": 1.4567333431368658e-05, + "loss": 2.4613, + "step": 16541 + }, + { + "epoch": 1.3350012105560487, + "grad_norm": 0.8046317100524902, + "learning_rate": 1.4559129470251708e-05, + "loss": 2.4547, + "step": 16542 + }, + { + "epoch": 1.3350819142926318, + "grad_norm": 0.6661570072174072, + "learning_rate": 1.455092763854462e-05, + "loss": 2.3636, + "step": 16543 + }, + { + "epoch": 1.3351626180292149, + "grad_norm": 0.6806541085243225, + "learning_rate": 1.454272793645176e-05, + "loss": 2.4309, + "step": 16544 + }, + { + "epoch": 1.3352433217657977, + "grad_norm": 0.651836097240448, + "learning_rate": 1.45345303641775e-05, + "loss": 2.3862, + "step": 16545 + }, + { + "epoch": 1.3353240255023808, + "grad_norm": 0.7448983192443848, + "learning_rate": 1.4526334921926165e-05, + "loss": 2.4654, + "step": 16546 + }, + { + "epoch": 1.3354047292389637, + "grad_norm": 0.6885285973548889, + "learning_rate": 1.4518141609901992e-05, + "loss": 2.3943, + "step": 16547 + }, + { + "epoch": 1.3354854329755468, + "grad_norm": 0.7204004526138306, + "learning_rate": 1.450995042830917e-05, + "loss": 2.4117, + "step": 16548 + }, + { + "epoch": 1.3355661367121296, + "grad_norm": 0.6551961898803711, + "learning_rate": 1.4501761377351864e-05, + "loss": 2.4269, + "step": 16549 + }, + { + "epoch": 1.3356468404487127, + "grad_norm": 0.7191253304481506, + "learning_rate": 1.4493574457234182e-05, + "loss": 2.3472, + "step": 16550 + }, + { + "epoch": 1.3357275441852958, + "grad_norm": 0.6793580651283264, + "learning_rate": 1.4485389668160121e-05, + "loss": 2.4264, + "step": 16551 + }, + { + "epoch": 1.3358082479218787, + "grad_norm": 0.704250693321228, + "learning_rate": 1.4477207010333682e-05, + "loss": 2.5236, + "step": 16552 + }, + { + "epoch": 1.3358889516584618, + "grad_norm": 0.6826470494270325, + "learning_rate": 1.4469026483958837e-05, + "loss": 2.4473, + "step": 16553 + }, + { + "epoch": 1.3359696553950449, + "grad_norm": 0.6646167039871216, + "learning_rate": 1.4460848089239399e-05, + "loss": 2.4232, + "step": 16554 + }, + { + "epoch": 1.3360503591316277, + "grad_norm": 0.7604451179504395, + "learning_rate": 1.4452671826379227e-05, + "loss": 2.4208, + "step": 16555 + }, + { + "epoch": 1.3361310628682108, + "grad_norm": 0.7129300236701965, + "learning_rate": 1.4444497695582093e-05, + "loss": 2.4304, + "step": 16556 + }, + { + "epoch": 1.336211766604794, + "grad_norm": 0.6769927740097046, + "learning_rate": 1.4436325697051733e-05, + "loss": 2.3467, + "step": 16557 + }, + { + "epoch": 1.3362924703413768, + "grad_norm": 0.6568608283996582, + "learning_rate": 1.4428155830991797e-05, + "loss": 2.4285, + "step": 16558 + }, + { + "epoch": 1.3363731740779599, + "grad_norm": 0.7687276005744934, + "learning_rate": 1.4419988097605919e-05, + "loss": 2.4815, + "step": 16559 + }, + { + "epoch": 1.336453877814543, + "grad_norm": 0.7001463770866394, + "learning_rate": 1.4411822497097638e-05, + "loss": 2.4629, + "step": 16560 + }, + { + "epoch": 1.3365345815511258, + "grad_norm": 0.7211995720863342, + "learning_rate": 1.4403659029670458e-05, + "loss": 2.4323, + "step": 16561 + }, + { + "epoch": 1.336615285287709, + "grad_norm": 0.7371769547462463, + "learning_rate": 1.439549769552787e-05, + "loss": 2.3962, + "step": 16562 + }, + { + "epoch": 1.3366959890242918, + "grad_norm": 0.7475463151931763, + "learning_rate": 1.4387338494873237e-05, + "loss": 2.3593, + "step": 16563 + }, + { + "epoch": 1.3367766927608749, + "grad_norm": 0.7215834856033325, + "learning_rate": 1.4379181427909916e-05, + "loss": 2.3687, + "step": 16564 + }, + { + "epoch": 1.3368573964974577, + "grad_norm": 0.7160200476646423, + "learning_rate": 1.4371026494841211e-05, + "loss": 2.3652, + "step": 16565 + }, + { + "epoch": 1.3369381002340408, + "grad_norm": 0.6636231541633606, + "learning_rate": 1.436287369587036e-05, + "loss": 2.4628, + "step": 16566 + }, + { + "epoch": 1.337018803970624, + "grad_norm": 0.657774806022644, + "learning_rate": 1.4354723031200556e-05, + "loss": 2.4082, + "step": 16567 + }, + { + "epoch": 1.3370995077072068, + "grad_norm": 0.7020300626754761, + "learning_rate": 1.4346574501034936e-05, + "loss": 2.3821, + "step": 16568 + }, + { + "epoch": 1.3371802114437898, + "grad_norm": 0.6800786256790161, + "learning_rate": 1.4338428105576595e-05, + "loss": 2.3839, + "step": 16569 + }, + { + "epoch": 1.337260915180373, + "grad_norm": 0.7176932692527771, + "learning_rate": 1.4330283845028536e-05, + "loss": 2.4614, + "step": 16570 + }, + { + "epoch": 1.3373416189169558, + "grad_norm": 0.7233355641365051, + "learning_rate": 1.432214171959374e-05, + "loss": 2.4048, + "step": 16571 + }, + { + "epoch": 1.3374223226535389, + "grad_norm": 0.7721874117851257, + "learning_rate": 1.4314001729475157e-05, + "loss": 2.4169, + "step": 16572 + }, + { + "epoch": 1.337503026390122, + "grad_norm": 0.7123380303382874, + "learning_rate": 1.4305863874875613e-05, + "loss": 2.3799, + "step": 16573 + }, + { + "epoch": 1.3375837301267048, + "grad_norm": 0.7297765016555786, + "learning_rate": 1.4297728155997958e-05, + "loss": 2.4655, + "step": 16574 + }, + { + "epoch": 1.337664433863288, + "grad_norm": 0.6806401610374451, + "learning_rate": 1.428959457304493e-05, + "loss": 2.4102, + "step": 16575 + }, + { + "epoch": 1.3377451375998708, + "grad_norm": 0.6811275482177734, + "learning_rate": 1.4281463126219264e-05, + "loss": 2.4298, + "step": 16576 + }, + { + "epoch": 1.3378258413364539, + "grad_norm": 0.6900678277015686, + "learning_rate": 1.427333381572361e-05, + "loss": 2.4745, + "step": 16577 + }, + { + "epoch": 1.3379065450730367, + "grad_norm": 0.7815307974815369, + "learning_rate": 1.4265206641760587e-05, + "loss": 2.3624, + "step": 16578 + }, + { + "epoch": 1.3379872488096198, + "grad_norm": 0.6948800683021545, + "learning_rate": 1.4257081604532708e-05, + "loss": 2.4142, + "step": 16579 + }, + { + "epoch": 1.338067952546203, + "grad_norm": 0.7387657165527344, + "learning_rate": 1.4248958704242488e-05, + "loss": 2.4241, + "step": 16580 + }, + { + "epoch": 1.3381486562827858, + "grad_norm": 0.7158597111701965, + "learning_rate": 1.4240837941092367e-05, + "loss": 2.4473, + "step": 16581 + }, + { + "epoch": 1.3382293600193689, + "grad_norm": 0.758674144744873, + "learning_rate": 1.423271931528477e-05, + "loss": 2.4504, + "step": 16582 + }, + { + "epoch": 1.338310063755952, + "grad_norm": 0.6904417872428894, + "learning_rate": 1.4224602827021982e-05, + "loss": 2.4288, + "step": 16583 + }, + { + "epoch": 1.3383907674925348, + "grad_norm": 0.6988760828971863, + "learning_rate": 1.4216488476506307e-05, + "loss": 2.3874, + "step": 16584 + }, + { + "epoch": 1.338471471229118, + "grad_norm": 0.6969872117042542, + "learning_rate": 1.4208376263940003e-05, + "loss": 2.3388, + "step": 16585 + }, + { + "epoch": 1.338552174965701, + "grad_norm": 0.687179684638977, + "learning_rate": 1.420026618952518e-05, + "loss": 2.431, + "step": 16586 + }, + { + "epoch": 1.3386328787022839, + "grad_norm": 0.6319810152053833, + "learning_rate": 1.4192158253464038e-05, + "loss": 2.4415, + "step": 16587 + }, + { + "epoch": 1.338713582438867, + "grad_norm": 0.7554977536201477, + "learning_rate": 1.4184052455958629e-05, + "loss": 2.3863, + "step": 16588 + }, + { + "epoch": 1.33879428617545, + "grad_norm": 0.7025974988937378, + "learning_rate": 1.4175948797210936e-05, + "loss": 2.3957, + "step": 16589 + }, + { + "epoch": 1.338874989912033, + "grad_norm": 0.7270370721817017, + "learning_rate": 1.4167847277422952e-05, + "loss": 2.4309, + "step": 16590 + }, + { + "epoch": 1.338955693648616, + "grad_norm": 0.7017608284950256, + "learning_rate": 1.4159747896796593e-05, + "loss": 2.4142, + "step": 16591 + }, + { + "epoch": 1.3390363973851989, + "grad_norm": 0.7114055156707764, + "learning_rate": 1.4151650655533687e-05, + "loss": 2.473, + "step": 16592 + }, + { + "epoch": 1.339117101121782, + "grad_norm": 0.6420357823371887, + "learning_rate": 1.4143555553836063e-05, + "loss": 2.3671, + "step": 16593 + }, + { + "epoch": 1.3391978048583648, + "grad_norm": 0.7067350745201111, + "learning_rate": 1.413546259190548e-05, + "loss": 2.4422, + "step": 16594 + }, + { + "epoch": 1.339278508594948, + "grad_norm": 0.7376763224601746, + "learning_rate": 1.4127371769943598e-05, + "loss": 2.4443, + "step": 16595 + }, + { + "epoch": 1.339359212331531, + "grad_norm": 0.646515965461731, + "learning_rate": 1.4119283088152092e-05, + "loss": 2.3949, + "step": 16596 + }, + { + "epoch": 1.3394399160681139, + "grad_norm": 0.6896061301231384, + "learning_rate": 1.411119654673254e-05, + "loss": 2.4535, + "step": 16597 + }, + { + "epoch": 1.339520619804697, + "grad_norm": 0.6992611289024353, + "learning_rate": 1.4103112145886489e-05, + "loss": 2.3983, + "step": 16598 + }, + { + "epoch": 1.33960132354128, + "grad_norm": 0.7176348567008972, + "learning_rate": 1.4095029885815426e-05, + "loss": 2.4671, + "step": 16599 + }, + { + "epoch": 1.339682027277863, + "grad_norm": 0.6635856628417969, + "learning_rate": 1.4086949766720759e-05, + "loss": 2.4235, + "step": 16600 + }, + { + "epoch": 1.339762731014446, + "grad_norm": 0.673332154750824, + "learning_rate": 1.4078871788803915e-05, + "loss": 2.4328, + "step": 16601 + }, + { + "epoch": 1.339843434751029, + "grad_norm": 0.6738821864128113, + "learning_rate": 1.407079595226617e-05, + "loss": 2.4786, + "step": 16602 + }, + { + "epoch": 1.339924138487612, + "grad_norm": 0.690605103969574, + "learning_rate": 1.4062722257308803e-05, + "loss": 2.4025, + "step": 16603 + }, + { + "epoch": 1.340004842224195, + "grad_norm": 0.7186758518218994, + "learning_rate": 1.4054650704133066e-05, + "loss": 2.4793, + "step": 16604 + }, + { + "epoch": 1.3400855459607781, + "grad_norm": 0.6484951376914978, + "learning_rate": 1.4046581292940075e-05, + "loss": 2.3855, + "step": 16605 + }, + { + "epoch": 1.340166249697361, + "grad_norm": 0.6993771195411682, + "learning_rate": 1.403851402393096e-05, + "loss": 2.3872, + "step": 16606 + }, + { + "epoch": 1.340246953433944, + "grad_norm": 0.7446531653404236, + "learning_rate": 1.403044889730678e-05, + "loss": 2.4253, + "step": 16607 + }, + { + "epoch": 1.340327657170527, + "grad_norm": 0.6873160004615784, + "learning_rate": 1.4022385913268542e-05, + "loss": 2.464, + "step": 16608 + }, + { + "epoch": 1.34040836090711, + "grad_norm": 0.6570948362350464, + "learning_rate": 1.4014325072017198e-05, + "loss": 2.4063, + "step": 16609 + }, + { + "epoch": 1.3404890646436929, + "grad_norm": 0.7209224104881287, + "learning_rate": 1.4006266373753651e-05, + "loss": 2.4827, + "step": 16610 + }, + { + "epoch": 1.340569768380276, + "grad_norm": 0.7283413410186768, + "learning_rate": 1.3998209818678732e-05, + "loss": 2.4009, + "step": 16611 + }, + { + "epoch": 1.340650472116859, + "grad_norm": 0.6650960445404053, + "learning_rate": 1.3990155406993221e-05, + "loss": 2.3576, + "step": 16612 + }, + { + "epoch": 1.340731175853442, + "grad_norm": 0.6857860088348389, + "learning_rate": 1.3982103138897873e-05, + "loss": 2.4686, + "step": 16613 + }, + { + "epoch": 1.340811879590025, + "grad_norm": 0.7065873146057129, + "learning_rate": 1.3974053014593402e-05, + "loss": 2.3999, + "step": 16614 + }, + { + "epoch": 1.340892583326608, + "grad_norm": 0.8093010783195496, + "learning_rate": 1.3966005034280372e-05, + "loss": 2.4273, + "step": 16615 + }, + { + "epoch": 1.340973287063191, + "grad_norm": 0.649132251739502, + "learning_rate": 1.3957959198159387e-05, + "loss": 2.3418, + "step": 16616 + }, + { + "epoch": 1.341053990799774, + "grad_norm": 0.7114978432655334, + "learning_rate": 1.3949915506430976e-05, + "loss": 2.4393, + "step": 16617 + }, + { + "epoch": 1.3411346945363571, + "grad_norm": 0.7989282608032227, + "learning_rate": 1.3941873959295615e-05, + "loss": 2.4044, + "step": 16618 + }, + { + "epoch": 1.34121539827294, + "grad_norm": 0.7373676896095276, + "learning_rate": 1.3933834556953707e-05, + "loss": 2.4758, + "step": 16619 + }, + { + "epoch": 1.341296102009523, + "grad_norm": 0.7076435089111328, + "learning_rate": 1.3925797299605647e-05, + "loss": 2.4429, + "step": 16620 + }, + { + "epoch": 1.3413768057461062, + "grad_norm": 0.6739028692245483, + "learning_rate": 1.39177621874517e-05, + "loss": 2.4275, + "step": 16621 + }, + { + "epoch": 1.341457509482689, + "grad_norm": 0.7134198546409607, + "learning_rate": 1.3909729220692125e-05, + "loss": 2.4541, + "step": 16622 + }, + { + "epoch": 1.3415382132192721, + "grad_norm": 0.6770301461219788, + "learning_rate": 1.3901698399527175e-05, + "loss": 2.4143, + "step": 16623 + }, + { + "epoch": 1.341618916955855, + "grad_norm": 0.7146373987197876, + "learning_rate": 1.3893669724156943e-05, + "loss": 2.4886, + "step": 16624 + }, + { + "epoch": 1.341699620692438, + "grad_norm": 0.6801536083221436, + "learning_rate": 1.3885643194781539e-05, + "loss": 2.4154, + "step": 16625 + }, + { + "epoch": 1.341780324429021, + "grad_norm": 0.7350363731384277, + "learning_rate": 1.3877618811601024e-05, + "loss": 2.3918, + "step": 16626 + }, + { + "epoch": 1.341861028165604, + "grad_norm": 0.7088882327079773, + "learning_rate": 1.3869596574815358e-05, + "loss": 2.412, + "step": 16627 + }, + { + "epoch": 1.3419417319021871, + "grad_norm": 0.7199791669845581, + "learning_rate": 1.3861576484624506e-05, + "loss": 2.3912, + "step": 16628 + }, + { + "epoch": 1.34202243563877, + "grad_norm": 0.692971408367157, + "learning_rate": 1.3853558541228328e-05, + "loss": 2.3826, + "step": 16629 + }, + { + "epoch": 1.342103139375353, + "grad_norm": 0.7524722814559937, + "learning_rate": 1.3845542744826679e-05, + "loss": 2.4227, + "step": 16630 + }, + { + "epoch": 1.3421838431119362, + "grad_norm": 0.6624585390090942, + "learning_rate": 1.3837529095619307e-05, + "loss": 2.3649, + "step": 16631 + }, + { + "epoch": 1.342264546848519, + "grad_norm": 0.6884489059448242, + "learning_rate": 1.3829517593805929e-05, + "loss": 2.3687, + "step": 16632 + }, + { + "epoch": 1.3423452505851021, + "grad_norm": 0.6766197085380554, + "learning_rate": 1.3821508239586246e-05, + "loss": 2.4191, + "step": 16633 + }, + { + "epoch": 1.3424259543216852, + "grad_norm": 0.6744453310966492, + "learning_rate": 1.3813501033159837e-05, + "loss": 2.4254, + "step": 16634 + }, + { + "epoch": 1.342506658058268, + "grad_norm": 0.6906216144561768, + "learning_rate": 1.3805495974726267e-05, + "loss": 2.4763, + "step": 16635 + }, + { + "epoch": 1.3425873617948512, + "grad_norm": 0.7052608132362366, + "learning_rate": 1.3797493064485078e-05, + "loss": 2.4307, + "step": 16636 + }, + { + "epoch": 1.342668065531434, + "grad_norm": 0.6701127290725708, + "learning_rate": 1.3789492302635653e-05, + "loss": 2.4529, + "step": 16637 + }, + { + "epoch": 1.3427487692680171, + "grad_norm": 0.7440397143363953, + "learning_rate": 1.3781493689377455e-05, + "loss": 2.4471, + "step": 16638 + }, + { + "epoch": 1.3428294730046, + "grad_norm": 0.7340207695960999, + "learning_rate": 1.3773497224909848e-05, + "loss": 2.4434, + "step": 16639 + }, + { + "epoch": 1.342910176741183, + "grad_norm": 0.6836793422698975, + "learning_rate": 1.376550290943205e-05, + "loss": 2.4072, + "step": 16640 + }, + { + "epoch": 1.3429908804777662, + "grad_norm": 0.6820472478866577, + "learning_rate": 1.3757510743143342e-05, + "loss": 2.4078, + "step": 16641 + }, + { + "epoch": 1.343071584214349, + "grad_norm": 0.6608061194419861, + "learning_rate": 1.3749520726242938e-05, + "loss": 2.3995, + "step": 16642 + }, + { + "epoch": 1.3431522879509321, + "grad_norm": 0.6582421064376831, + "learning_rate": 1.3741532858929906e-05, + "loss": 2.3768, + "step": 16643 + }, + { + "epoch": 1.3432329916875152, + "grad_norm": 0.7032744288444519, + "learning_rate": 1.3733547141403358e-05, + "loss": 2.4367, + "step": 16644 + }, + { + "epoch": 1.343313695424098, + "grad_norm": 0.7149307727813721, + "learning_rate": 1.3725563573862321e-05, + "loss": 2.4425, + "step": 16645 + }, + { + "epoch": 1.3433943991606812, + "grad_norm": 0.7375392913818359, + "learning_rate": 1.3717582156505793e-05, + "loss": 2.409, + "step": 16646 + }, + { + "epoch": 1.3434751028972642, + "grad_norm": 0.8422170877456665, + "learning_rate": 1.3709602889532624e-05, + "loss": 2.4758, + "step": 16647 + }, + { + "epoch": 1.343555806633847, + "grad_norm": 0.6542177796363831, + "learning_rate": 1.3701625773141712e-05, + "loss": 2.4199, + "step": 16648 + }, + { + "epoch": 1.3436365103704302, + "grad_norm": 0.6639342904090881, + "learning_rate": 1.3693650807531898e-05, + "loss": 2.4366, + "step": 16649 + }, + { + "epoch": 1.3437172141070133, + "grad_norm": 0.7270925045013428, + "learning_rate": 1.3685677992901901e-05, + "loss": 2.3745, + "step": 16650 + }, + { + "epoch": 1.3437979178435961, + "grad_norm": 0.7325547337532043, + "learning_rate": 1.367770732945044e-05, + "loss": 2.5053, + "step": 16651 + }, + { + "epoch": 1.3438786215801792, + "grad_norm": 0.7752320766448975, + "learning_rate": 1.3669738817376177e-05, + "loss": 2.4505, + "step": 16652 + }, + { + "epoch": 1.343959325316762, + "grad_norm": 0.6538182497024536, + "learning_rate": 1.3661772456877675e-05, + "loss": 2.4164, + "step": 16653 + }, + { + "epoch": 1.3440400290533452, + "grad_norm": 0.6886051297187805, + "learning_rate": 1.3653808248153487e-05, + "loss": 2.4156, + "step": 16654 + }, + { + "epoch": 1.344120732789928, + "grad_norm": 0.6990679502487183, + "learning_rate": 1.3645846191402134e-05, + "loss": 2.418, + "step": 16655 + }, + { + "epoch": 1.3442014365265111, + "grad_norm": 0.7006608247756958, + "learning_rate": 1.3637886286821999e-05, + "loss": 2.3987, + "step": 16656 + }, + { + "epoch": 1.3442821402630942, + "grad_norm": 0.6858758926391602, + "learning_rate": 1.3629928534611502e-05, + "loss": 2.3571, + "step": 16657 + }, + { + "epoch": 1.344362843999677, + "grad_norm": 0.7273774147033691, + "learning_rate": 1.3621972934968951e-05, + "loss": 2.4141, + "step": 16658 + }, + { + "epoch": 1.3444435477362602, + "grad_norm": 0.6770352721214294, + "learning_rate": 1.3614019488092633e-05, + "loss": 2.4602, + "step": 16659 + }, + { + "epoch": 1.3445242514728433, + "grad_norm": 0.7473095655441284, + "learning_rate": 1.3606068194180766e-05, + "loss": 2.3884, + "step": 16660 + }, + { + "epoch": 1.3446049552094261, + "grad_norm": 0.7271387577056885, + "learning_rate": 1.3598119053431512e-05, + "loss": 2.4705, + "step": 16661 + }, + { + "epoch": 1.3446856589460092, + "grad_norm": 0.658349335193634, + "learning_rate": 1.3590172066043006e-05, + "loss": 2.4271, + "step": 16662 + }, + { + "epoch": 1.3447663626825923, + "grad_norm": 0.6479319930076599, + "learning_rate": 1.3582227232213273e-05, + "loss": 2.3428, + "step": 16663 + }, + { + "epoch": 1.3448470664191752, + "grad_norm": 0.700951874256134, + "learning_rate": 1.3574284552140337e-05, + "loss": 2.4926, + "step": 16664 + }, + { + "epoch": 1.3449277701557583, + "grad_norm": 0.6699960231781006, + "learning_rate": 1.3566344026022171e-05, + "loss": 2.4372, + "step": 16665 + }, + { + "epoch": 1.3450084738923413, + "grad_norm": 0.6743033528327942, + "learning_rate": 1.3558405654056617e-05, + "loss": 2.4142, + "step": 16666 + }, + { + "epoch": 1.3450891776289242, + "grad_norm": 0.6619464755058289, + "learning_rate": 1.355046943644157e-05, + "loss": 2.4099, + "step": 16667 + }, + { + "epoch": 1.3451698813655073, + "grad_norm": 0.668084442615509, + "learning_rate": 1.3542535373374798e-05, + "loss": 2.3895, + "step": 16668 + }, + { + "epoch": 1.3452505851020902, + "grad_norm": 0.7954626679420471, + "learning_rate": 1.3534603465054052e-05, + "loss": 2.479, + "step": 16669 + }, + { + "epoch": 1.3453312888386733, + "grad_norm": 0.6742919683456421, + "learning_rate": 1.3526673711677008e-05, + "loss": 2.4289, + "step": 16670 + }, + { + "epoch": 1.3454119925752561, + "grad_norm": 0.6564723253250122, + "learning_rate": 1.3518746113441316e-05, + "loss": 2.404, + "step": 16671 + }, + { + "epoch": 1.3454926963118392, + "grad_norm": 0.6955705881118774, + "learning_rate": 1.3510820670544521e-05, + "loss": 2.4274, + "step": 16672 + }, + { + "epoch": 1.3455734000484223, + "grad_norm": 0.6687749028205872, + "learning_rate": 1.3502897383184154e-05, + "loss": 2.4564, + "step": 16673 + }, + { + "epoch": 1.3456541037850052, + "grad_norm": 0.7984250783920288, + "learning_rate": 1.34949762515577e-05, + "loss": 2.3426, + "step": 16674 + }, + { + "epoch": 1.3457348075215882, + "grad_norm": 0.7334223389625549, + "learning_rate": 1.348705727586258e-05, + "loss": 2.4712, + "step": 16675 + }, + { + "epoch": 1.3458155112581713, + "grad_norm": 0.6732765436172485, + "learning_rate": 1.3479140456296114e-05, + "loss": 2.424, + "step": 16676 + }, + { + "epoch": 1.3458962149947542, + "grad_norm": 0.7944334149360657, + "learning_rate": 1.3471225793055641e-05, + "loss": 2.3951, + "step": 16677 + }, + { + "epoch": 1.3459769187313373, + "grad_norm": 0.6829007863998413, + "learning_rate": 1.3463313286338408e-05, + "loss": 2.4158, + "step": 16678 + }, + { + "epoch": 1.3460576224679204, + "grad_norm": 0.7019640207290649, + "learning_rate": 1.345540293634161e-05, + "loss": 2.4093, + "step": 16679 + }, + { + "epoch": 1.3461383262045032, + "grad_norm": 0.6839374303817749, + "learning_rate": 1.3447494743262412e-05, + "loss": 2.3959, + "step": 16680 + }, + { + "epoch": 1.3462190299410863, + "grad_norm": 0.7211155295372009, + "learning_rate": 1.3439588707297911e-05, + "loss": 2.4052, + "step": 16681 + }, + { + "epoch": 1.3462997336776692, + "grad_norm": 0.73811274766922, + "learning_rate": 1.3431684828645109e-05, + "loss": 2.4179, + "step": 16682 + }, + { + "epoch": 1.3463804374142523, + "grad_norm": 0.6634721159934998, + "learning_rate": 1.3423783107501009e-05, + "loss": 2.379, + "step": 16683 + }, + { + "epoch": 1.3464611411508352, + "grad_norm": 0.6884057521820068, + "learning_rate": 1.3415883544062579e-05, + "loss": 2.4144, + "step": 16684 + }, + { + "epoch": 1.3465418448874182, + "grad_norm": 0.7239587306976318, + "learning_rate": 1.340798613852664e-05, + "loss": 2.3856, + "step": 16685 + }, + { + "epoch": 1.3466225486240013, + "grad_norm": 0.7201077342033386, + "learning_rate": 1.3400090891090033e-05, + "loss": 2.4552, + "step": 16686 + }, + { + "epoch": 1.3467032523605842, + "grad_norm": 0.7049584984779358, + "learning_rate": 1.3392197801949558e-05, + "loss": 2.4424, + "step": 16687 + }, + { + "epoch": 1.3467839560971673, + "grad_norm": 0.7240790128707886, + "learning_rate": 1.3384306871301877e-05, + "loss": 2.4156, + "step": 16688 + }, + { + "epoch": 1.3468646598337504, + "grad_norm": 0.7276458740234375, + "learning_rate": 1.337641809934369e-05, + "loss": 2.3882, + "step": 16689 + }, + { + "epoch": 1.3469453635703332, + "grad_norm": 0.6650896072387695, + "learning_rate": 1.3368531486271607e-05, + "loss": 2.396, + "step": 16690 + }, + { + "epoch": 1.3470260673069163, + "grad_norm": 0.6946447491645813, + "learning_rate": 1.3360647032282203e-05, + "loss": 2.3779, + "step": 16691 + }, + { + "epoch": 1.3471067710434994, + "grad_norm": 0.7507699728012085, + "learning_rate": 1.3352764737571932e-05, + "loss": 2.4378, + "step": 16692 + }, + { + "epoch": 1.3471874747800823, + "grad_norm": 0.6548876762390137, + "learning_rate": 1.334488460233725e-05, + "loss": 2.4181, + "step": 16693 + }, + { + "epoch": 1.3472681785166654, + "grad_norm": 0.7000874280929565, + "learning_rate": 1.3337006626774595e-05, + "loss": 2.4463, + "step": 16694 + }, + { + "epoch": 1.3473488822532484, + "grad_norm": 0.6487517356872559, + "learning_rate": 1.3329130811080249e-05, + "loss": 2.3703, + "step": 16695 + }, + { + "epoch": 1.3474295859898313, + "grad_norm": 0.6447827219963074, + "learning_rate": 1.3321257155450517e-05, + "loss": 2.3779, + "step": 16696 + }, + { + "epoch": 1.3475102897264144, + "grad_norm": 0.6309572458267212, + "learning_rate": 1.3313385660081667e-05, + "loss": 2.4443, + "step": 16697 + }, + { + "epoch": 1.3475909934629973, + "grad_norm": 0.6366227865219116, + "learning_rate": 1.330551632516982e-05, + "loss": 2.3418, + "step": 16698 + }, + { + "epoch": 1.3476716971995804, + "grad_norm": 0.6864019632339478, + "learning_rate": 1.3297649150911117e-05, + "loss": 2.4416, + "step": 16699 + }, + { + "epoch": 1.3477524009361632, + "grad_norm": 0.6807940006256104, + "learning_rate": 1.3289784137501671e-05, + "loss": 2.4465, + "step": 16700 + }, + { + "epoch": 1.3478331046727463, + "grad_norm": 0.6991185545921326, + "learning_rate": 1.3281921285137455e-05, + "loss": 2.3929, + "step": 16701 + }, + { + "epoch": 1.3479138084093294, + "grad_norm": 0.691908061504364, + "learning_rate": 1.3274060594014437e-05, + "loss": 2.4237, + "step": 16702 + }, + { + "epoch": 1.3479945121459123, + "grad_norm": 0.6909685730934143, + "learning_rate": 1.3266202064328548e-05, + "loss": 2.3695, + "step": 16703 + }, + { + "epoch": 1.3480752158824953, + "grad_norm": 0.6473715901374817, + "learning_rate": 1.325834569627562e-05, + "loss": 2.384, + "step": 16704 + }, + { + "epoch": 1.3481559196190784, + "grad_norm": 0.7433453798294067, + "learning_rate": 1.3250491490051454e-05, + "loss": 2.4546, + "step": 16705 + }, + { + "epoch": 1.3482366233556613, + "grad_norm": 0.7432501316070557, + "learning_rate": 1.3242639445851812e-05, + "loss": 2.4204, + "step": 16706 + }, + { + "epoch": 1.3483173270922444, + "grad_norm": 0.6661228537559509, + "learning_rate": 1.3234789563872397e-05, + "loss": 2.4454, + "step": 16707 + }, + { + "epoch": 1.3483980308288275, + "grad_norm": 0.7481260895729065, + "learning_rate": 1.3226941844308816e-05, + "loss": 2.4348, + "step": 16708 + }, + { + "epoch": 1.3484787345654103, + "grad_norm": 0.6986531019210815, + "learning_rate": 1.3219096287356669e-05, + "loss": 2.3622, + "step": 16709 + }, + { + "epoch": 1.3485594383019934, + "grad_norm": 0.7457645535469055, + "learning_rate": 1.321125289321149e-05, + "loss": 2.4399, + "step": 16710 + }, + { + "epoch": 1.3486401420385765, + "grad_norm": 0.6710307598114014, + "learning_rate": 1.3203411662068754e-05, + "loss": 2.3857, + "step": 16711 + }, + { + "epoch": 1.3487208457751594, + "grad_norm": 0.767304539680481, + "learning_rate": 1.3195572594123884e-05, + "loss": 2.4666, + "step": 16712 + }, + { + "epoch": 1.3488015495117425, + "grad_norm": 0.6720963716506958, + "learning_rate": 1.3187735689572289e-05, + "loss": 2.3952, + "step": 16713 + }, + { + "epoch": 1.3488822532483253, + "grad_norm": 0.6381734609603882, + "learning_rate": 1.3179900948609213e-05, + "loss": 2.3632, + "step": 16714 + }, + { + "epoch": 1.3489629569849084, + "grad_norm": 0.6697315573692322, + "learning_rate": 1.317206837142997e-05, + "loss": 2.4117, + "step": 16715 + }, + { + "epoch": 1.3490436607214913, + "grad_norm": 0.723676323890686, + "learning_rate": 1.3164237958229764e-05, + "loss": 2.3772, + "step": 16716 + }, + { + "epoch": 1.3491243644580744, + "grad_norm": 0.7021055817604065, + "learning_rate": 1.3156409709203732e-05, + "loss": 2.3808, + "step": 16717 + }, + { + "epoch": 1.3492050681946575, + "grad_norm": 0.7128920555114746, + "learning_rate": 1.3148583624546962e-05, + "loss": 2.3854, + "step": 16718 + }, + { + "epoch": 1.3492857719312403, + "grad_norm": 0.6684797406196594, + "learning_rate": 1.314075970445453e-05, + "loss": 2.3722, + "step": 16719 + }, + { + "epoch": 1.3493664756678234, + "grad_norm": 0.6710386276245117, + "learning_rate": 1.3132937949121426e-05, + "loss": 2.412, + "step": 16720 + }, + { + "epoch": 1.3494471794044065, + "grad_norm": 0.7207252979278564, + "learning_rate": 1.3125118358742572e-05, + "loss": 2.4506, + "step": 16721 + }, + { + "epoch": 1.3495278831409894, + "grad_norm": 0.685516893863678, + "learning_rate": 1.3117300933512865e-05, + "loss": 2.435, + "step": 16722 + }, + { + "epoch": 1.3496085868775725, + "grad_norm": 0.71708744764328, + "learning_rate": 1.3109485673627154e-05, + "loss": 2.4735, + "step": 16723 + }, + { + "epoch": 1.3496892906141555, + "grad_norm": 0.7293861508369446, + "learning_rate": 1.3101672579280166e-05, + "loss": 2.4545, + "step": 16724 + }, + { + "epoch": 1.3497699943507384, + "grad_norm": 0.6448976993560791, + "learning_rate": 1.3093861650666661e-05, + "loss": 2.386, + "step": 16725 + }, + { + "epoch": 1.3498506980873215, + "grad_norm": 0.8111226558685303, + "learning_rate": 1.3086052887981315e-05, + "loss": 2.4733, + "step": 16726 + }, + { + "epoch": 1.3499314018239044, + "grad_norm": 0.7673875093460083, + "learning_rate": 1.3078246291418706e-05, + "loss": 2.4119, + "step": 16727 + }, + { + "epoch": 1.3500121055604875, + "grad_norm": 0.7296731472015381, + "learning_rate": 1.307044186117341e-05, + "loss": 2.3724, + "step": 16728 + }, + { + "epoch": 1.3500928092970703, + "grad_norm": 0.6947155594825745, + "learning_rate": 1.306263959743994e-05, + "loss": 2.3989, + "step": 16729 + }, + { + "epoch": 1.3501735130336534, + "grad_norm": 0.6781659722328186, + "learning_rate": 1.3054839500412753e-05, + "loss": 2.429, + "step": 16730 + }, + { + "epoch": 1.3502542167702365, + "grad_norm": 0.7498819231987, + "learning_rate": 1.3047041570286244e-05, + "loss": 2.459, + "step": 16731 + }, + { + "epoch": 1.3503349205068194, + "grad_norm": 0.6651057004928589, + "learning_rate": 1.3039245807254774e-05, + "loss": 2.4049, + "step": 16732 + }, + { + "epoch": 1.3504156242434024, + "grad_norm": 0.6998507380485535, + "learning_rate": 1.3031452211512596e-05, + "loss": 2.4083, + "step": 16733 + }, + { + "epoch": 1.3504963279799855, + "grad_norm": 0.6522402167320251, + "learning_rate": 1.3023660783253966e-05, + "loss": 2.3987, + "step": 16734 + }, + { + "epoch": 1.3505770317165684, + "grad_norm": 0.6618130207061768, + "learning_rate": 1.3015871522673096e-05, + "loss": 2.4514, + "step": 16735 + }, + { + "epoch": 1.3506577354531515, + "grad_norm": 0.7139489650726318, + "learning_rate": 1.300808442996405e-05, + "loss": 2.484, + "step": 16736 + }, + { + "epoch": 1.3507384391897346, + "grad_norm": 0.6582522988319397, + "learning_rate": 1.3000299505320956e-05, + "loss": 2.4463, + "step": 16737 + }, + { + "epoch": 1.3508191429263174, + "grad_norm": 0.7115446329116821, + "learning_rate": 1.2992516748937811e-05, + "loss": 2.4795, + "step": 16738 + }, + { + "epoch": 1.3508998466629005, + "grad_norm": 0.7243752479553223, + "learning_rate": 1.2984736161008581e-05, + "loss": 2.4151, + "step": 16739 + }, + { + "epoch": 1.3509805503994836, + "grad_norm": 0.758084774017334, + "learning_rate": 1.297695774172719e-05, + "loss": 2.4028, + "step": 16740 + }, + { + "epoch": 1.3510612541360665, + "grad_norm": 0.6555618643760681, + "learning_rate": 1.2969181491287496e-05, + "loss": 2.4184, + "step": 16741 + }, + { + "epoch": 1.3511419578726496, + "grad_norm": 0.6657842993736267, + "learning_rate": 1.2961407409883331e-05, + "loss": 2.375, + "step": 16742 + }, + { + "epoch": 1.3512226616092324, + "grad_norm": 0.6355723142623901, + "learning_rate": 1.2953635497708382e-05, + "loss": 2.4202, + "step": 16743 + }, + { + "epoch": 1.3513033653458155, + "grad_norm": 0.7384408116340637, + "learning_rate": 1.2945865754956377e-05, + "loss": 2.4298, + "step": 16744 + }, + { + "epoch": 1.3513840690823984, + "grad_norm": 0.7300455570220947, + "learning_rate": 1.2938098181820979e-05, + "loss": 2.3842, + "step": 16745 + }, + { + "epoch": 1.3514647728189815, + "grad_norm": 0.7378895282745361, + "learning_rate": 1.2930332778495735e-05, + "loss": 2.4025, + "step": 16746 + }, + { + "epoch": 1.3515454765555646, + "grad_norm": 0.6542565822601318, + "learning_rate": 1.2922569545174212e-05, + "loss": 2.3995, + "step": 16747 + }, + { + "epoch": 1.3516261802921474, + "grad_norm": 0.669829249382019, + "learning_rate": 1.291480848204989e-05, + "loss": 2.3843, + "step": 16748 + }, + { + "epoch": 1.3517068840287305, + "grad_norm": 0.6747604608535767, + "learning_rate": 1.2907049589316167e-05, + "loss": 2.4108, + "step": 16749 + }, + { + "epoch": 1.3517875877653136, + "grad_norm": 0.7003559470176697, + "learning_rate": 1.2899292867166402e-05, + "loss": 2.4233, + "step": 16750 + }, + { + "epoch": 1.3518682915018965, + "grad_norm": 0.7365099191665649, + "learning_rate": 1.2891538315793994e-05, + "loss": 2.3592, + "step": 16751 + }, + { + "epoch": 1.3519489952384796, + "grad_norm": 0.6849377751350403, + "learning_rate": 1.2883785935392123e-05, + "loss": 2.3943, + "step": 16752 + }, + { + "epoch": 1.3520296989750626, + "grad_norm": 0.7263002395629883, + "learning_rate": 1.2876035726154045e-05, + "loss": 2.4078, + "step": 16753 + }, + { + "epoch": 1.3521104027116455, + "grad_norm": 0.7341182827949524, + "learning_rate": 1.2868287688272884e-05, + "loss": 2.3568, + "step": 16754 + }, + { + "epoch": 1.3521911064482286, + "grad_norm": 0.7281078100204468, + "learning_rate": 1.2860541821941796e-05, + "loss": 2.4073, + "step": 16755 + }, + { + "epoch": 1.3522718101848117, + "grad_norm": 0.6302868127822876, + "learning_rate": 1.285279812735376e-05, + "loss": 2.3946, + "step": 16756 + }, + { + "epoch": 1.3523525139213946, + "grad_norm": 0.7333062887191772, + "learning_rate": 1.28450566047018e-05, + "loss": 2.3892, + "step": 16757 + }, + { + "epoch": 1.3524332176579776, + "grad_norm": 0.74838787317276, + "learning_rate": 1.2837317254178882e-05, + "loss": 2.4844, + "step": 16758 + }, + { + "epoch": 1.3525139213945605, + "grad_norm": 0.7085757255554199, + "learning_rate": 1.2829580075977843e-05, + "loss": 2.3583, + "step": 16759 + }, + { + "epoch": 1.3525946251311436, + "grad_norm": 0.7182579040527344, + "learning_rate": 1.2821845070291527e-05, + "loss": 2.4326, + "step": 16760 + }, + { + "epoch": 1.3526753288677265, + "grad_norm": 0.6857885718345642, + "learning_rate": 1.2814112237312714e-05, + "loss": 2.4406, + "step": 16761 + }, + { + "epoch": 1.3527560326043095, + "grad_norm": 0.7629652619361877, + "learning_rate": 1.2806381577234139e-05, + "loss": 2.4839, + "step": 16762 + }, + { + "epoch": 1.3528367363408926, + "grad_norm": 0.6940319538116455, + "learning_rate": 1.2798653090248458e-05, + "loss": 2.3918, + "step": 16763 + }, + { + "epoch": 1.3529174400774755, + "grad_norm": 0.6825633645057678, + "learning_rate": 1.2790926776548318e-05, + "loss": 2.3828, + "step": 16764 + }, + { + "epoch": 1.3529981438140586, + "grad_norm": 0.6830280423164368, + "learning_rate": 1.278320263632622e-05, + "loss": 2.3727, + "step": 16765 + }, + { + "epoch": 1.3530788475506417, + "grad_norm": 0.6782984733581543, + "learning_rate": 1.2775480669774698e-05, + "loss": 2.3984, + "step": 16766 + }, + { + "epoch": 1.3531595512872245, + "grad_norm": 0.6939808130264282, + "learning_rate": 1.276776087708621e-05, + "loss": 2.3724, + "step": 16767 + }, + { + "epoch": 1.3532402550238076, + "grad_norm": 0.7562546133995056, + "learning_rate": 1.276004325845317e-05, + "loss": 2.4178, + "step": 16768 + }, + { + "epoch": 1.3533209587603907, + "grad_norm": 0.6692922115325928, + "learning_rate": 1.2752327814067877e-05, + "loss": 2.4072, + "step": 16769 + }, + { + "epoch": 1.3534016624969736, + "grad_norm": 0.6783415079116821, + "learning_rate": 1.2744614544122635e-05, + "loss": 2.3993, + "step": 16770 + }, + { + "epoch": 1.3534823662335567, + "grad_norm": 0.6608997583389282, + "learning_rate": 1.27369034488097e-05, + "loss": 2.3883, + "step": 16771 + }, + { + "epoch": 1.3535630699701398, + "grad_norm": 0.6849228739738464, + "learning_rate": 1.2729194528321231e-05, + "loss": 2.4009, + "step": 16772 + }, + { + "epoch": 1.3536437737067226, + "grad_norm": 0.7059305906295776, + "learning_rate": 1.2721487782849362e-05, + "loss": 2.508, + "step": 16773 + }, + { + "epoch": 1.3537244774433057, + "grad_norm": 0.6471492052078247, + "learning_rate": 1.2713783212586183e-05, + "loss": 2.3813, + "step": 16774 + }, + { + "epoch": 1.3538051811798886, + "grad_norm": 0.7108949422836304, + "learning_rate": 1.2706080817723687e-05, + "loss": 2.4189, + "step": 16775 + }, + { + "epoch": 1.3538858849164717, + "grad_norm": 0.6623945236206055, + "learning_rate": 1.269838059845383e-05, + "loss": 2.4128, + "step": 16776 + }, + { + "epoch": 1.3539665886530545, + "grad_norm": 0.6595518589019775, + "learning_rate": 1.269068255496857e-05, + "loss": 2.3984, + "step": 16777 + }, + { + "epoch": 1.3540472923896376, + "grad_norm": 0.6932248473167419, + "learning_rate": 1.2682986687459708e-05, + "loss": 2.3951, + "step": 16778 + }, + { + "epoch": 1.3541279961262207, + "grad_norm": 0.6914867162704468, + "learning_rate": 1.2675292996119059e-05, + "loss": 2.4602, + "step": 16779 + }, + { + "epoch": 1.3542086998628036, + "grad_norm": 0.6633034348487854, + "learning_rate": 1.266760148113838e-05, + "loss": 2.43, + "step": 16780 + }, + { + "epoch": 1.3542894035993867, + "grad_norm": 0.6987594366073608, + "learning_rate": 1.2659912142709363e-05, + "loss": 2.3962, + "step": 16781 + }, + { + "epoch": 1.3543701073359697, + "grad_norm": 0.7429597973823547, + "learning_rate": 1.2652224981023652e-05, + "loss": 2.4838, + "step": 16782 + }, + { + "epoch": 1.3544508110725526, + "grad_norm": 0.6402504444122314, + "learning_rate": 1.2644539996272808e-05, + "loss": 2.43, + "step": 16783 + }, + { + "epoch": 1.3545315148091357, + "grad_norm": 0.6763156652450562, + "learning_rate": 1.263685718864841e-05, + "loss": 2.4911, + "step": 16784 + }, + { + "epoch": 1.3546122185457188, + "grad_norm": 0.8133900165557861, + "learning_rate": 1.2629176558341881e-05, + "loss": 2.45, + "step": 16785 + }, + { + "epoch": 1.3546929222823016, + "grad_norm": 0.6946277022361755, + "learning_rate": 1.262149810554465e-05, + "loss": 2.43, + "step": 16786 + }, + { + "epoch": 1.3547736260188847, + "grad_norm": 0.7667170166969299, + "learning_rate": 1.2613821830448125e-05, + "loss": 2.4464, + "step": 16787 + }, + { + "epoch": 1.3548543297554676, + "grad_norm": 0.672662615776062, + "learning_rate": 1.2606147733243567e-05, + "loss": 2.3653, + "step": 16788 + }, + { + "epoch": 1.3549350334920507, + "grad_norm": 0.6856412291526794, + "learning_rate": 1.2598475814122258e-05, + "loss": 2.3924, + "step": 16789 + }, + { + "epoch": 1.3550157372286336, + "grad_norm": 0.6966650485992432, + "learning_rate": 1.2590806073275407e-05, + "loss": 2.4039, + "step": 16790 + }, + { + "epoch": 1.3550964409652166, + "grad_norm": 0.7397874593734741, + "learning_rate": 1.2583138510894143e-05, + "loss": 2.4769, + "step": 16791 + }, + { + "epoch": 1.3551771447017997, + "grad_norm": 0.6960996985435486, + "learning_rate": 1.2575473127169591e-05, + "loss": 2.4342, + "step": 16792 + }, + { + "epoch": 1.3552578484383826, + "grad_norm": 0.7324376702308655, + "learning_rate": 1.2567809922292795e-05, + "loss": 2.4779, + "step": 16793 + }, + { + "epoch": 1.3553385521749657, + "grad_norm": 0.6891930103302002, + "learning_rate": 1.2560148896454704e-05, + "loss": 2.4228, + "step": 16794 + }, + { + "epoch": 1.3554192559115488, + "grad_norm": 0.6919474601745605, + "learning_rate": 1.2552490049846278e-05, + "loss": 2.4178, + "step": 16795 + }, + { + "epoch": 1.3554999596481316, + "grad_norm": 0.7067604660987854, + "learning_rate": 1.2544833382658405e-05, + "loss": 2.457, + "step": 16796 + }, + { + "epoch": 1.3555806633847147, + "grad_norm": 0.7667992115020752, + "learning_rate": 1.253717889508188e-05, + "loss": 2.3951, + "step": 16797 + }, + { + "epoch": 1.3556613671212978, + "grad_norm": 0.6337998509407043, + "learning_rate": 1.2529526587307482e-05, + "loss": 2.3788, + "step": 16798 + }, + { + "epoch": 1.3557420708578807, + "grad_norm": 0.6591900587081909, + "learning_rate": 1.2521876459525927e-05, + "loss": 2.4101, + "step": 16799 + }, + { + "epoch": 1.3558227745944638, + "grad_norm": 0.7115298509597778, + "learning_rate": 1.2514228511927895e-05, + "loss": 2.4417, + "step": 16800 + }, + { + "epoch": 1.3559034783310469, + "grad_norm": 0.6851321458816528, + "learning_rate": 1.2506582744703965e-05, + "loss": 2.4081, + "step": 16801 + }, + { + "epoch": 1.3559841820676297, + "grad_norm": 0.7469603419303894, + "learning_rate": 1.249893915804471e-05, + "loss": 2.3703, + "step": 16802 + }, + { + "epoch": 1.3560648858042128, + "grad_norm": 0.6972614526748657, + "learning_rate": 1.2491297752140641e-05, + "loss": 2.3549, + "step": 16803 + }, + { + "epoch": 1.3561455895407957, + "grad_norm": 0.6669485569000244, + "learning_rate": 1.2483658527182151e-05, + "loss": 2.4261, + "step": 16804 + }, + { + "epoch": 1.3562262932773788, + "grad_norm": 0.7516919374465942, + "learning_rate": 1.247602148335968e-05, + "loss": 2.4323, + "step": 16805 + }, + { + "epoch": 1.3563069970139616, + "grad_norm": 0.7191836833953857, + "learning_rate": 1.2468386620863548e-05, + "loss": 2.4242, + "step": 16806 + }, + { + "epoch": 1.3563877007505447, + "grad_norm": 0.660237729549408, + "learning_rate": 1.2460753939884017e-05, + "loss": 2.4154, + "step": 16807 + }, + { + "epoch": 1.3564684044871278, + "grad_norm": 0.749531626701355, + "learning_rate": 1.2453123440611325e-05, + "loss": 2.4138, + "step": 16808 + }, + { + "epoch": 1.3565491082237107, + "grad_norm": 0.6808986067771912, + "learning_rate": 1.2445495123235673e-05, + "loss": 2.3918, + "step": 16809 + }, + { + "epoch": 1.3566298119602938, + "grad_norm": 0.686183750629425, + "learning_rate": 1.2437868987947133e-05, + "loss": 2.4172, + "step": 16810 + }, + { + "epoch": 1.3567105156968768, + "grad_norm": 0.6487868428230286, + "learning_rate": 1.2430245034935784e-05, + "loss": 2.4199, + "step": 16811 + }, + { + "epoch": 1.3567912194334597, + "grad_norm": 0.7352244257926941, + "learning_rate": 1.242262326439163e-05, + "loss": 2.3779, + "step": 16812 + }, + { + "epoch": 1.3568719231700428, + "grad_norm": 0.7250565886497498, + "learning_rate": 1.2415003676504644e-05, + "loss": 2.4106, + "step": 16813 + }, + { + "epoch": 1.3569526269066259, + "grad_norm": 0.6843926906585693, + "learning_rate": 1.2407386271464716e-05, + "loss": 2.3725, + "step": 16814 + }, + { + "epoch": 1.3570333306432087, + "grad_norm": 0.686326801776886, + "learning_rate": 1.2399771049461684e-05, + "loss": 2.3709, + "step": 16815 + }, + { + "epoch": 1.3571140343797918, + "grad_norm": 0.6796969771385193, + "learning_rate": 1.2392158010685373e-05, + "loss": 2.4545, + "step": 16816 + }, + { + "epoch": 1.357194738116375, + "grad_norm": 0.6469466090202332, + "learning_rate": 1.2384547155325466e-05, + "loss": 2.4263, + "step": 16817 + }, + { + "epoch": 1.3572754418529578, + "grad_norm": 0.7089909911155701, + "learning_rate": 1.2376938483571688e-05, + "loss": 2.378, + "step": 16818 + }, + { + "epoch": 1.3573561455895409, + "grad_norm": 0.7313235402107239, + "learning_rate": 1.2369331995613665e-05, + "loss": 2.46, + "step": 16819 + }, + { + "epoch": 1.3574368493261237, + "grad_norm": 0.7555651664733887, + "learning_rate": 1.2361727691640934e-05, + "loss": 2.531, + "step": 16820 + }, + { + "epoch": 1.3575175530627068, + "grad_norm": 0.7563485503196716, + "learning_rate": 1.2354125571843033e-05, + "loss": 2.4205, + "step": 16821 + }, + { + "epoch": 1.3575982567992897, + "grad_norm": 0.7996519804000854, + "learning_rate": 1.2346525636409434e-05, + "loss": 2.4223, + "step": 16822 + }, + { + "epoch": 1.3576789605358728, + "grad_norm": 0.7141731977462769, + "learning_rate": 1.233892788552955e-05, + "loss": 2.4554, + "step": 16823 + }, + { + "epoch": 1.3577596642724559, + "grad_norm": 0.6715070605278015, + "learning_rate": 1.233133231939273e-05, + "loss": 2.4386, + "step": 16824 + }, + { + "epoch": 1.3578403680090387, + "grad_norm": 0.6893020272254944, + "learning_rate": 1.2323738938188301e-05, + "loss": 2.4065, + "step": 16825 + }, + { + "epoch": 1.3579210717456218, + "grad_norm": 0.7542821764945984, + "learning_rate": 1.2316147742105454e-05, + "loss": 2.3974, + "step": 16826 + }, + { + "epoch": 1.358001775482205, + "grad_norm": 0.7177664041519165, + "learning_rate": 1.230855873133343e-05, + "loss": 2.4306, + "step": 16827 + }, + { + "epoch": 1.3580824792187878, + "grad_norm": 0.7056576013565063, + "learning_rate": 1.2300971906061354e-05, + "loss": 2.4238, + "step": 16828 + }, + { + "epoch": 1.3581631829553709, + "grad_norm": 0.686903715133667, + "learning_rate": 1.2293387266478296e-05, + "loss": 2.3902, + "step": 16829 + }, + { + "epoch": 1.358243886691954, + "grad_norm": 0.7377725839614868, + "learning_rate": 1.2285804812773293e-05, + "loss": 2.4294, + "step": 16830 + }, + { + "epoch": 1.3583245904285368, + "grad_norm": 0.6537891030311584, + "learning_rate": 1.227822454513532e-05, + "loss": 2.374, + "step": 16831 + }, + { + "epoch": 1.35840529416512, + "grad_norm": 0.684699296951294, + "learning_rate": 1.2270646463753288e-05, + "loss": 2.4105, + "step": 16832 + }, + { + "epoch": 1.3584859979017028, + "grad_norm": 0.7042316794395447, + "learning_rate": 1.2263070568816081e-05, + "loss": 2.4246, + "step": 16833 + }, + { + "epoch": 1.3585667016382859, + "grad_norm": 0.7610476613044739, + "learning_rate": 1.2255496860512505e-05, + "loss": 2.4581, + "step": 16834 + }, + { + "epoch": 1.3586474053748687, + "grad_norm": 0.6620839834213257, + "learning_rate": 1.224792533903134e-05, + "loss": 2.4138, + "step": 16835 + }, + { + "epoch": 1.3587281091114518, + "grad_norm": 0.6861035823822021, + "learning_rate": 1.2240356004561227e-05, + "loss": 2.4195, + "step": 16836 + }, + { + "epoch": 1.358808812848035, + "grad_norm": 0.7186882495880127, + "learning_rate": 1.2232788857290855e-05, + "loss": 2.404, + "step": 16837 + }, + { + "epoch": 1.3588895165846178, + "grad_norm": 0.7219386696815491, + "learning_rate": 1.2225223897408833e-05, + "loss": 2.3778, + "step": 16838 + }, + { + "epoch": 1.3589702203212009, + "grad_norm": 0.6935911774635315, + "learning_rate": 1.2217661125103663e-05, + "loss": 2.4617, + "step": 16839 + }, + { + "epoch": 1.359050924057784, + "grad_norm": 0.7885910272598267, + "learning_rate": 1.2210100540563828e-05, + "loss": 2.4467, + "step": 16840 + }, + { + "epoch": 1.3591316277943668, + "grad_norm": 0.6690255403518677, + "learning_rate": 1.220254214397778e-05, + "loss": 2.381, + "step": 16841 + }, + { + "epoch": 1.35921233153095, + "grad_norm": 0.7592741847038269, + "learning_rate": 1.2194985935533887e-05, + "loss": 2.4459, + "step": 16842 + }, + { + "epoch": 1.359293035267533, + "grad_norm": 0.827460527420044, + "learning_rate": 1.2187431915420466e-05, + "loss": 2.3842, + "step": 16843 + }, + { + "epoch": 1.3593737390041158, + "grad_norm": 0.7313764691352844, + "learning_rate": 1.2179880083825811e-05, + "loss": 2.3938, + "step": 16844 + }, + { + "epoch": 1.359454442740699, + "grad_norm": 0.7093486189842224, + "learning_rate": 1.2172330440938084e-05, + "loss": 2.4316, + "step": 16845 + }, + { + "epoch": 1.359535146477282, + "grad_norm": 0.6805742383003235, + "learning_rate": 1.2164782986945467e-05, + "loss": 2.4372, + "step": 16846 + }, + { + "epoch": 1.3596158502138649, + "grad_norm": 0.7525961399078369, + "learning_rate": 1.2157237722036064e-05, + "loss": 2.3867, + "step": 16847 + }, + { + "epoch": 1.359696553950448, + "grad_norm": 0.723896861076355, + "learning_rate": 1.2149694646397947e-05, + "loss": 2.4685, + "step": 16848 + }, + { + "epoch": 1.3597772576870308, + "grad_norm": 0.704448938369751, + "learning_rate": 1.2142153760219055e-05, + "loss": 2.4463, + "step": 16849 + }, + { + "epoch": 1.359857961423614, + "grad_norm": 0.7207927703857422, + "learning_rate": 1.2134615063687349e-05, + "loss": 2.3549, + "step": 16850 + }, + { + "epoch": 1.3599386651601968, + "grad_norm": 0.7106234431266785, + "learning_rate": 1.2127078556990724e-05, + "loss": 2.4145, + "step": 16851 + }, + { + "epoch": 1.3600193688967799, + "grad_norm": 0.7740694284439087, + "learning_rate": 1.2119544240316993e-05, + "loss": 2.3999, + "step": 16852 + }, + { + "epoch": 1.360100072633363, + "grad_norm": 0.6696181297302246, + "learning_rate": 1.2112012113853954e-05, + "loss": 2.4046, + "step": 16853 + }, + { + "epoch": 1.3601807763699458, + "grad_norm": 0.6758043169975281, + "learning_rate": 1.2104482177789334e-05, + "loss": 2.4021, + "step": 16854 + }, + { + "epoch": 1.360261480106529, + "grad_norm": 0.6659380793571472, + "learning_rate": 1.2096954432310758e-05, + "loss": 2.4145, + "step": 16855 + }, + { + "epoch": 1.360342183843112, + "grad_norm": 0.6889290809631348, + "learning_rate": 1.2089428877605858e-05, + "loss": 2.3486, + "step": 16856 + }, + { + "epoch": 1.3604228875796949, + "grad_norm": 0.6755563020706177, + "learning_rate": 1.2081905513862201e-05, + "loss": 2.4294, + "step": 16857 + }, + { + "epoch": 1.360503591316278, + "grad_norm": 0.7662243843078613, + "learning_rate": 1.2074384341267276e-05, + "loss": 2.414, + "step": 16858 + }, + { + "epoch": 1.360584295052861, + "grad_norm": 0.7432721853256226, + "learning_rate": 1.2066865360008517e-05, + "loss": 2.4314, + "step": 16859 + }, + { + "epoch": 1.360664998789444, + "grad_norm": 0.6465074419975281, + "learning_rate": 1.2059348570273366e-05, + "loss": 2.3349, + "step": 16860 + }, + { + "epoch": 1.360745702526027, + "grad_norm": 0.6940968632698059, + "learning_rate": 1.2051833972249105e-05, + "loss": 2.4539, + "step": 16861 + }, + { + "epoch": 1.36082640626261, + "grad_norm": 0.7211138010025024, + "learning_rate": 1.2044321566123019e-05, + "loss": 2.4041, + "step": 16862 + }, + { + "epoch": 1.360907109999193, + "grad_norm": 0.6746649146080017, + "learning_rate": 1.2036811352082367e-05, + "loss": 2.4329, + "step": 16863 + }, + { + "epoch": 1.360987813735776, + "grad_norm": 0.7502184510231018, + "learning_rate": 1.2029303330314345e-05, + "loss": 2.407, + "step": 16864 + }, + { + "epoch": 1.361068517472359, + "grad_norm": 0.7192596793174744, + "learning_rate": 1.2021797501006027e-05, + "loss": 2.3907, + "step": 16865 + }, + { + "epoch": 1.361149221208942, + "grad_norm": 0.6682254672050476, + "learning_rate": 1.2014293864344483e-05, + "loss": 2.391, + "step": 16866 + }, + { + "epoch": 1.3612299249455249, + "grad_norm": 0.680969774723053, + "learning_rate": 1.2006792420516755e-05, + "loss": 2.3479, + "step": 16867 + }, + { + "epoch": 1.361310628682108, + "grad_norm": 0.682671308517456, + "learning_rate": 1.1999293169709757e-05, + "loss": 2.4097, + "step": 16868 + }, + { + "epoch": 1.361391332418691, + "grad_norm": 0.7030573487281799, + "learning_rate": 1.199179611211041e-05, + "loss": 2.4514, + "step": 16869 + }, + { + "epoch": 1.361472036155274, + "grad_norm": 0.670630693435669, + "learning_rate": 1.1984301247905582e-05, + "loss": 2.3982, + "step": 16870 + }, + { + "epoch": 1.361552739891857, + "grad_norm": 0.6993644833564758, + "learning_rate": 1.1976808577282017e-05, + "loss": 2.4297, + "step": 16871 + }, + { + "epoch": 1.36163344362844, + "grad_norm": 0.7448122501373291, + "learning_rate": 1.1969318100426486e-05, + "loss": 2.3612, + "step": 16872 + }, + { + "epoch": 1.361714147365023, + "grad_norm": 0.7014498114585876, + "learning_rate": 1.1961829817525649e-05, + "loss": 2.3451, + "step": 16873 + }, + { + "epoch": 1.361794851101606, + "grad_norm": 0.7140750885009766, + "learning_rate": 1.195434372876616e-05, + "loss": 2.4231, + "step": 16874 + }, + { + "epoch": 1.3618755548381891, + "grad_norm": 0.7377427816390991, + "learning_rate": 1.1946859834334567e-05, + "loss": 2.4055, + "step": 16875 + }, + { + "epoch": 1.361956258574772, + "grad_norm": 0.7969191670417786, + "learning_rate": 1.1939378134417433e-05, + "loss": 2.3503, + "step": 16876 + }, + { + "epoch": 1.362036962311355, + "grad_norm": 0.6821554899215698, + "learning_rate": 1.1931898629201155e-05, + "loss": 2.4259, + "step": 16877 + }, + { + "epoch": 1.3621176660479382, + "grad_norm": 0.6598221659660339, + "learning_rate": 1.1924421318872182e-05, + "loss": 2.3833, + "step": 16878 + }, + { + "epoch": 1.362198369784521, + "grad_norm": 0.8031432628631592, + "learning_rate": 1.1916946203616863e-05, + "loss": 2.5077, + "step": 16879 + }, + { + "epoch": 1.362279073521104, + "grad_norm": 0.7247405648231506, + "learning_rate": 1.190947328362152e-05, + "loss": 2.426, + "step": 16880 + }, + { + "epoch": 1.362359777257687, + "grad_norm": 0.7256691455841064, + "learning_rate": 1.1902002559072344e-05, + "loss": 2.474, + "step": 16881 + }, + { + "epoch": 1.36244048099427, + "grad_norm": 0.7382180094718933, + "learning_rate": 1.1894534030155558e-05, + "loss": 2.4487, + "step": 16882 + }, + { + "epoch": 1.362521184730853, + "grad_norm": 0.700179398059845, + "learning_rate": 1.1887067697057297e-05, + "loss": 2.3836, + "step": 16883 + }, + { + "epoch": 1.362601888467436, + "grad_norm": 0.706106424331665, + "learning_rate": 1.1879603559963638e-05, + "loss": 2.4304, + "step": 16884 + }, + { + "epoch": 1.362682592204019, + "grad_norm": 0.7514815926551819, + "learning_rate": 1.1872141619060606e-05, + "loss": 2.4895, + "step": 16885 + }, + { + "epoch": 1.362763295940602, + "grad_norm": 0.6605612635612488, + "learning_rate": 1.1864681874534201e-05, + "loss": 2.3569, + "step": 16886 + }, + { + "epoch": 1.362843999677185, + "grad_norm": 0.6366496682167053, + "learning_rate": 1.1857224326570283e-05, + "loss": 2.3919, + "step": 16887 + }, + { + "epoch": 1.3629247034137681, + "grad_norm": 0.8100820183753967, + "learning_rate": 1.1849768975354736e-05, + "loss": 2.5063, + "step": 16888 + }, + { + "epoch": 1.363005407150351, + "grad_norm": 0.685127854347229, + "learning_rate": 1.1842315821073403e-05, + "loss": 2.4647, + "step": 16889 + }, + { + "epoch": 1.363086110886934, + "grad_norm": 0.696172833442688, + "learning_rate": 1.1834864863911987e-05, + "loss": 2.4224, + "step": 16890 + }, + { + "epoch": 1.3631668146235172, + "grad_norm": 0.6558032035827637, + "learning_rate": 1.1827416104056199e-05, + "loss": 2.3619, + "step": 16891 + }, + { + "epoch": 1.3632475183601, + "grad_norm": 0.744687020778656, + "learning_rate": 1.1819969541691689e-05, + "loss": 2.4669, + "step": 16892 + }, + { + "epoch": 1.3633282220966831, + "grad_norm": 0.6925212740898132, + "learning_rate": 1.1812525177004052e-05, + "loss": 2.3967, + "step": 16893 + }, + { + "epoch": 1.363408925833266, + "grad_norm": 0.6861244440078735, + "learning_rate": 1.1805083010178797e-05, + "loss": 2.3979, + "step": 16894 + }, + { + "epoch": 1.363489629569849, + "grad_norm": 0.6987108588218689, + "learning_rate": 1.179764304140143e-05, + "loss": 2.4263, + "step": 16895 + }, + { + "epoch": 1.363570333306432, + "grad_norm": 0.6940091848373413, + "learning_rate": 1.179020527085738e-05, + "loss": 2.4328, + "step": 16896 + }, + { + "epoch": 1.363651037043015, + "grad_norm": 0.6831968426704407, + "learning_rate": 1.1782769698731966e-05, + "loss": 2.427, + "step": 16897 + }, + { + "epoch": 1.3637317407795981, + "grad_norm": 0.7370985746383667, + "learning_rate": 1.177533632521054e-05, + "loss": 2.3711, + "step": 16898 + }, + { + "epoch": 1.363812444516181, + "grad_norm": 0.8176774978637695, + "learning_rate": 1.1767905150478376e-05, + "loss": 2.4337, + "step": 16899 + }, + { + "epoch": 1.363893148252764, + "grad_norm": 0.786318302154541, + "learning_rate": 1.1760476174720637e-05, + "loss": 2.5099, + "step": 16900 + }, + { + "epoch": 1.3639738519893472, + "grad_norm": 0.7309854626655579, + "learning_rate": 1.1753049398122495e-05, + "loss": 2.46, + "step": 16901 + }, + { + "epoch": 1.36405455572593, + "grad_norm": 0.7410863637924194, + "learning_rate": 1.1745624820869039e-05, + "loss": 2.4249, + "step": 16902 + }, + { + "epoch": 1.3641352594625131, + "grad_norm": 0.7059988379478455, + "learning_rate": 1.1738202443145308e-05, + "loss": 2.4964, + "step": 16903 + }, + { + "epoch": 1.3642159631990962, + "grad_norm": 0.7351845502853394, + "learning_rate": 1.1730782265136287e-05, + "loss": 2.4694, + "step": 16904 + }, + { + "epoch": 1.364296666935679, + "grad_norm": 0.6928153038024902, + "learning_rate": 1.1723364287026938e-05, + "loss": 2.426, + "step": 16905 + }, + { + "epoch": 1.3643773706722622, + "grad_norm": 0.759920060634613, + "learning_rate": 1.1715948509002083e-05, + "loss": 2.4359, + "step": 16906 + }, + { + "epoch": 1.3644580744088453, + "grad_norm": 0.6655696630477905, + "learning_rate": 1.1708534931246573e-05, + "loss": 2.4118, + "step": 16907 + }, + { + "epoch": 1.3645387781454281, + "grad_norm": 0.6912528872489929, + "learning_rate": 1.170112355394517e-05, + "loss": 2.4257, + "step": 16908 + }, + { + "epoch": 1.3646194818820112, + "grad_norm": 0.6612871289253235, + "learning_rate": 1.1693714377282604e-05, + "loss": 2.4192, + "step": 16909 + }, + { + "epoch": 1.364700185618594, + "grad_norm": 0.6548018455505371, + "learning_rate": 1.1686307401443486e-05, + "loss": 2.4054, + "step": 16910 + }, + { + "epoch": 1.3647808893551772, + "grad_norm": 0.7749961018562317, + "learning_rate": 1.1678902626612443e-05, + "loss": 2.44, + "step": 16911 + }, + { + "epoch": 1.36486159309176, + "grad_norm": 0.7187496423721313, + "learning_rate": 1.1671500052974039e-05, + "loss": 2.4033, + "step": 16912 + }, + { + "epoch": 1.3649422968283431, + "grad_norm": 0.7002814412117004, + "learning_rate": 1.1664099680712715e-05, + "loss": 2.4442, + "step": 16913 + }, + { + "epoch": 1.3650230005649262, + "grad_norm": 0.6852529644966125, + "learning_rate": 1.1656701510012946e-05, + "loss": 2.4253, + "step": 16914 + }, + { + "epoch": 1.365103704301509, + "grad_norm": 0.6922035813331604, + "learning_rate": 1.1649305541059142e-05, + "loss": 2.4406, + "step": 16915 + }, + { + "epoch": 1.3651844080380922, + "grad_norm": 0.6883397698402405, + "learning_rate": 1.1641911774035563e-05, + "loss": 2.4064, + "step": 16916 + }, + { + "epoch": 1.3652651117746752, + "grad_norm": 0.7101531624794006, + "learning_rate": 1.163452020912652e-05, + "loss": 2.4068, + "step": 16917 + }, + { + "epoch": 1.365345815511258, + "grad_norm": 0.728369951248169, + "learning_rate": 1.1627130846516231e-05, + "loss": 2.4319, + "step": 16918 + }, + { + "epoch": 1.3654265192478412, + "grad_norm": 0.6765053272247314, + "learning_rate": 1.161974368638884e-05, + "loss": 2.3922, + "step": 16919 + }, + { + "epoch": 1.3655072229844243, + "grad_norm": 0.6909242868423462, + "learning_rate": 1.1612358728928475e-05, + "loss": 2.4124, + "step": 16920 + }, + { + "epoch": 1.3655879267210072, + "grad_norm": 0.735650897026062, + "learning_rate": 1.1604975974319177e-05, + "loss": 2.5137, + "step": 16921 + }, + { + "epoch": 1.3656686304575902, + "grad_norm": 0.6587653756141663, + "learning_rate": 1.1597595422744934e-05, + "loss": 2.4163, + "step": 16922 + }, + { + "epoch": 1.3657493341941733, + "grad_norm": 0.700282096862793, + "learning_rate": 1.159021707438971e-05, + "loss": 2.4272, + "step": 16923 + }, + { + "epoch": 1.3658300379307562, + "grad_norm": 0.7175682783126831, + "learning_rate": 1.1582840929437365e-05, + "loss": 2.4598, + "step": 16924 + }, + { + "epoch": 1.3659107416673393, + "grad_norm": 0.6725881695747375, + "learning_rate": 1.157546698807176e-05, + "loss": 2.4064, + "step": 16925 + }, + { + "epoch": 1.3659914454039221, + "grad_norm": 0.7130467295646667, + "learning_rate": 1.1568095250476651e-05, + "loss": 2.3851, + "step": 16926 + }, + { + "epoch": 1.3660721491405052, + "grad_norm": 0.6859269142150879, + "learning_rate": 1.1560725716835785e-05, + "loss": 2.3577, + "step": 16927 + }, + { + "epoch": 1.366152852877088, + "grad_norm": 0.7037541270256042, + "learning_rate": 1.1553358387332824e-05, + "loss": 2.4402, + "step": 16928 + }, + { + "epoch": 1.3662335566136712, + "grad_norm": 0.7094031572341919, + "learning_rate": 1.1545993262151366e-05, + "loss": 2.4036, + "step": 16929 + }, + { + "epoch": 1.3663142603502543, + "grad_norm": 0.6953302025794983, + "learning_rate": 1.1538630341474965e-05, + "loss": 2.4192, + "step": 16930 + }, + { + "epoch": 1.3663949640868371, + "grad_norm": 0.7012252807617188, + "learning_rate": 1.1531269625487163e-05, + "loss": 2.4207, + "step": 16931 + }, + { + "epoch": 1.3664756678234202, + "grad_norm": 0.6616495847702026, + "learning_rate": 1.1523911114371366e-05, + "loss": 2.4187, + "step": 16932 + }, + { + "epoch": 1.3665563715600033, + "grad_norm": 0.6819868087768555, + "learning_rate": 1.1516554808310975e-05, + "loss": 2.448, + "step": 16933 + }, + { + "epoch": 1.3666370752965862, + "grad_norm": 0.6869969964027405, + "learning_rate": 1.1509200707489343e-05, + "loss": 2.4134, + "step": 16934 + }, + { + "epoch": 1.3667177790331693, + "grad_norm": 0.6600778698921204, + "learning_rate": 1.1501848812089733e-05, + "loss": 2.4159, + "step": 16935 + }, + { + "epoch": 1.3667984827697524, + "grad_norm": 0.668712317943573, + "learning_rate": 1.1494499122295398e-05, + "loss": 2.41, + "step": 16936 + }, + { + "epoch": 1.3668791865063352, + "grad_norm": 0.767365574836731, + "learning_rate": 1.1487151638289518e-05, + "loss": 2.3856, + "step": 16937 + }, + { + "epoch": 1.3669598902429183, + "grad_norm": 0.721546471118927, + "learning_rate": 1.1479806360255174e-05, + "loss": 2.4038, + "step": 16938 + }, + { + "epoch": 1.3670405939795012, + "grad_norm": 0.6796963810920715, + "learning_rate": 1.1472463288375456e-05, + "loss": 2.3698, + "step": 16939 + }, + { + "epoch": 1.3671212977160843, + "grad_norm": 0.7340671420097351, + "learning_rate": 1.1465122422833363e-05, + "loss": 2.4296, + "step": 16940 + }, + { + "epoch": 1.3672020014526671, + "grad_norm": 0.7173369526863098, + "learning_rate": 1.145778376381187e-05, + "loss": 2.3923, + "step": 16941 + }, + { + "epoch": 1.3672827051892502, + "grad_norm": 0.6683956980705261, + "learning_rate": 1.1450447311493839e-05, + "loss": 2.4092, + "step": 16942 + }, + { + "epoch": 1.3673634089258333, + "grad_norm": 0.6457851529121399, + "learning_rate": 1.1443113066062129e-05, + "loss": 2.3467, + "step": 16943 + }, + { + "epoch": 1.3674441126624162, + "grad_norm": 0.6870608925819397, + "learning_rate": 1.1435781027699532e-05, + "loss": 2.3766, + "step": 16944 + }, + { + "epoch": 1.3675248163989993, + "grad_norm": 0.6496049165725708, + "learning_rate": 1.1428451196588775e-05, + "loss": 2.4464, + "step": 16945 + }, + { + "epoch": 1.3676055201355823, + "grad_norm": 0.7554739117622375, + "learning_rate": 1.1421123572912551e-05, + "loss": 2.4243, + "step": 16946 + }, + { + "epoch": 1.3676862238721652, + "grad_norm": 0.7208122611045837, + "learning_rate": 1.1413798156853495e-05, + "loss": 2.3699, + "step": 16947 + }, + { + "epoch": 1.3677669276087483, + "grad_norm": 0.7072176337242126, + "learning_rate": 1.1406474948594126e-05, + "loss": 2.4011, + "step": 16948 + }, + { + "epoch": 1.3678476313453314, + "grad_norm": 0.7316476106643677, + "learning_rate": 1.1399153948316999e-05, + "loss": 2.4508, + "step": 16949 + }, + { + "epoch": 1.3679283350819142, + "grad_norm": 0.8518069386482239, + "learning_rate": 1.1391835156204577e-05, + "loss": 2.4197, + "step": 16950 + }, + { + "epoch": 1.3680090388184973, + "grad_norm": 0.6700364947319031, + "learning_rate": 1.1384518572439228e-05, + "loss": 2.4272, + "step": 16951 + }, + { + "epoch": 1.3680897425550804, + "grad_norm": 0.7007749676704407, + "learning_rate": 1.1377204197203317e-05, + "loss": 2.3777, + "step": 16952 + }, + { + "epoch": 1.3681704462916633, + "grad_norm": 0.6792053580284119, + "learning_rate": 1.1369892030679141e-05, + "loss": 2.4487, + "step": 16953 + }, + { + "epoch": 1.3682511500282464, + "grad_norm": 0.6913022398948669, + "learning_rate": 1.1362582073048932e-05, + "loss": 2.3757, + "step": 16954 + }, + { + "epoch": 1.3683318537648292, + "grad_norm": 0.648248016834259, + "learning_rate": 1.135527432449488e-05, + "loss": 2.3482, + "step": 16955 + }, + { + "epoch": 1.3684125575014123, + "grad_norm": 0.6711798906326294, + "learning_rate": 1.1347968785199115e-05, + "loss": 2.4096, + "step": 16956 + }, + { + "epoch": 1.3684932612379952, + "grad_norm": 0.6932381987571716, + "learning_rate": 1.1340665455343724e-05, + "loss": 2.3834, + "step": 16957 + }, + { + "epoch": 1.3685739649745783, + "grad_norm": 0.6890178918838501, + "learning_rate": 1.1333364335110697e-05, + "loss": 2.4182, + "step": 16958 + }, + { + "epoch": 1.3686546687111614, + "grad_norm": 0.6612519025802612, + "learning_rate": 1.1326065424681997e-05, + "loss": 2.3691, + "step": 16959 + }, + { + "epoch": 1.3687353724477442, + "grad_norm": 0.7123190760612488, + "learning_rate": 1.131876872423957e-05, + "loss": 2.3919, + "step": 16960 + }, + { + "epoch": 1.3688160761843273, + "grad_norm": 0.6615463495254517, + "learning_rate": 1.1311474233965214e-05, + "loss": 2.4266, + "step": 16961 + }, + { + "epoch": 1.3688967799209104, + "grad_norm": 0.7320190668106079, + "learning_rate": 1.130418195404076e-05, + "loss": 2.4268, + "step": 16962 + }, + { + "epoch": 1.3689774836574933, + "grad_norm": 0.6845116019248962, + "learning_rate": 1.1296891884647965e-05, + "loss": 2.3972, + "step": 16963 + }, + { + "epoch": 1.3690581873940764, + "grad_norm": 0.70455002784729, + "learning_rate": 1.1289604025968448e-05, + "loss": 2.4183, + "step": 16964 + }, + { + "epoch": 1.3691388911306595, + "grad_norm": 0.6952407956123352, + "learning_rate": 1.128231837818392e-05, + "loss": 2.4276, + "step": 16965 + }, + { + "epoch": 1.3692195948672423, + "grad_norm": 0.7939464449882507, + "learning_rate": 1.1275034941475938e-05, + "loss": 2.4072, + "step": 16966 + }, + { + "epoch": 1.3693002986038254, + "grad_norm": 0.6974930763244629, + "learning_rate": 1.1267753716026007e-05, + "loss": 2.4133, + "step": 16967 + }, + { + "epoch": 1.3693810023404085, + "grad_norm": 0.7187508344650269, + "learning_rate": 1.126047470201559e-05, + "loss": 2.3588, + "step": 16968 + }, + { + "epoch": 1.3694617060769914, + "grad_norm": 0.6887609958648682, + "learning_rate": 1.1253197899626134e-05, + "loss": 2.4322, + "step": 16969 + }, + { + "epoch": 1.3695424098135744, + "grad_norm": 0.679957389831543, + "learning_rate": 1.1245923309038964e-05, + "loss": 2.3907, + "step": 16970 + }, + { + "epoch": 1.3696231135501573, + "grad_norm": 0.7540870308876038, + "learning_rate": 1.1238650930435378e-05, + "loss": 2.4752, + "step": 16971 + }, + { + "epoch": 1.3697038172867404, + "grad_norm": 0.7697634100914001, + "learning_rate": 1.1231380763996635e-05, + "loss": 2.4366, + "step": 16972 + }, + { + "epoch": 1.3697845210233233, + "grad_norm": 0.6836850643157959, + "learning_rate": 1.1224112809903954e-05, + "loss": 2.3511, + "step": 16973 + }, + { + "epoch": 1.3698652247599064, + "grad_norm": 0.6904506683349609, + "learning_rate": 1.1216847068338421e-05, + "loss": 2.4109, + "step": 16974 + }, + { + "epoch": 1.3699459284964894, + "grad_norm": 0.6579318046569824, + "learning_rate": 1.1209583539481127e-05, + "loss": 2.4391, + "step": 16975 + }, + { + "epoch": 1.3700266322330723, + "grad_norm": 0.7107192277908325, + "learning_rate": 1.120232222351314e-05, + "loss": 2.399, + "step": 16976 + }, + { + "epoch": 1.3701073359696554, + "grad_norm": 0.7581583261489868, + "learning_rate": 1.119506312061539e-05, + "loss": 2.4817, + "step": 16977 + }, + { + "epoch": 1.3701880397062385, + "grad_norm": 0.6836642622947693, + "learning_rate": 1.11878062309688e-05, + "loss": 2.4415, + "step": 16978 + }, + { + "epoch": 1.3702687434428213, + "grad_norm": 0.6842699646949768, + "learning_rate": 1.118055155475426e-05, + "loss": 2.4045, + "step": 16979 + }, + { + "epoch": 1.3703494471794044, + "grad_norm": 0.7630519270896912, + "learning_rate": 1.1173299092152534e-05, + "loss": 2.4314, + "step": 16980 + }, + { + "epoch": 1.3704301509159875, + "grad_norm": 0.7334303259849548, + "learning_rate": 1.116604884334439e-05, + "loss": 2.3564, + "step": 16981 + }, + { + "epoch": 1.3705108546525704, + "grad_norm": 0.6929439306259155, + "learning_rate": 1.1158800808510538e-05, + "loss": 2.4258, + "step": 16982 + }, + { + "epoch": 1.3705915583891535, + "grad_norm": 0.6387187838554382, + "learning_rate": 1.1151554987831591e-05, + "loss": 2.3263, + "step": 16983 + }, + { + "epoch": 1.3706722621257363, + "grad_norm": 0.7279032468795776, + "learning_rate": 1.1144311381488136e-05, + "loss": 2.4074, + "step": 16984 + }, + { + "epoch": 1.3707529658623194, + "grad_norm": 0.7066916227340698, + "learning_rate": 1.113706998966072e-05, + "loss": 2.4358, + "step": 16985 + }, + { + "epoch": 1.3708336695989023, + "grad_norm": 0.6753098964691162, + "learning_rate": 1.1129830812529807e-05, + "loss": 2.4195, + "step": 16986 + }, + { + "epoch": 1.3709143733354854, + "grad_norm": 0.6728894114494324, + "learning_rate": 1.112259385027582e-05, + "loss": 2.3712, + "step": 16987 + }, + { + "epoch": 1.3709950770720685, + "grad_norm": 0.7251775860786438, + "learning_rate": 1.1115359103079115e-05, + "loss": 2.4063, + "step": 16988 + }, + { + "epoch": 1.3710757808086513, + "grad_norm": 0.6797254085540771, + "learning_rate": 1.1108126571120036e-05, + "loss": 2.395, + "step": 16989 + }, + { + "epoch": 1.3711564845452344, + "grad_norm": 0.7505605220794678, + "learning_rate": 1.1100896254578786e-05, + "loss": 2.4044, + "step": 16990 + }, + { + "epoch": 1.3712371882818175, + "grad_norm": 0.7126416563987732, + "learning_rate": 1.1093668153635594e-05, + "loss": 2.4043, + "step": 16991 + }, + { + "epoch": 1.3713178920184004, + "grad_norm": 0.6550771594047546, + "learning_rate": 1.1086442268470609e-05, + "loss": 2.3515, + "step": 16992 + }, + { + "epoch": 1.3713985957549835, + "grad_norm": 0.7253621816635132, + "learning_rate": 1.1079218599263874e-05, + "loss": 2.4109, + "step": 16993 + }, + { + "epoch": 1.3714792994915666, + "grad_norm": 0.7272186875343323, + "learning_rate": 1.1071997146195468e-05, + "loss": 2.3531, + "step": 16994 + }, + { + "epoch": 1.3715600032281494, + "grad_norm": 0.6841129660606384, + "learning_rate": 1.1064777909445345e-05, + "loss": 2.4031, + "step": 16995 + }, + { + "epoch": 1.3716407069647325, + "grad_norm": 0.692945659160614, + "learning_rate": 1.1057560889193441e-05, + "loss": 2.3858, + "step": 16996 + }, + { + "epoch": 1.3717214107013156, + "grad_norm": 0.721182644367218, + "learning_rate": 1.1050346085619612e-05, + "loss": 2.3871, + "step": 16997 + }, + { + "epoch": 1.3718021144378985, + "grad_norm": 0.722960889339447, + "learning_rate": 1.1043133498903702e-05, + "loss": 2.3452, + "step": 16998 + }, + { + "epoch": 1.3718828181744815, + "grad_norm": 0.7148451805114746, + "learning_rate": 1.1035923129225412e-05, + "loss": 2.3905, + "step": 16999 + }, + { + "epoch": 1.3719635219110644, + "grad_norm": 0.7118532061576843, + "learning_rate": 1.1028714976764486e-05, + "loss": 2.3894, + "step": 17000 + }, + { + "epoch": 1.3719635219110644, + "eval_loss": 2.3730249404907227, + "eval_runtime": 769.4165, + "eval_samples_per_second": 3.405, + "eval_steps_per_second": 0.568, + "step": 17000 + }, + { + "epoch": 1.3720442256476475, + "grad_norm": 0.6933719515800476, + "learning_rate": 1.1021509041700539e-05, + "loss": 2.394, + "step": 17001 + }, + { + "epoch": 1.3721249293842304, + "grad_norm": 0.7330136895179749, + "learning_rate": 1.1014305324213215e-05, + "loss": 2.4466, + "step": 17002 + }, + { + "epoch": 1.3722056331208135, + "grad_norm": 0.6614598631858826, + "learning_rate": 1.1007103824481979e-05, + "loss": 2.4441, + "step": 17003 + }, + { + "epoch": 1.3722863368573965, + "grad_norm": 0.8030059933662415, + "learning_rate": 1.0999904542686356e-05, + "loss": 2.4284, + "step": 17004 + }, + { + "epoch": 1.3723670405939794, + "grad_norm": 0.6881710886955261, + "learning_rate": 1.099270747900576e-05, + "loss": 2.4433, + "step": 17005 + }, + { + "epoch": 1.3724477443305625, + "grad_norm": 0.661325216293335, + "learning_rate": 1.0985512633619555e-05, + "loss": 2.4144, + "step": 17006 + }, + { + "epoch": 1.3725284480671456, + "grad_norm": 0.6896070241928101, + "learning_rate": 1.0978320006707065e-05, + "loss": 2.3972, + "step": 17007 + }, + { + "epoch": 1.3726091518037284, + "grad_norm": 0.7043858766555786, + "learning_rate": 1.0971129598447561e-05, + "loss": 2.4082, + "step": 17008 + }, + { + "epoch": 1.3726898555403115, + "grad_norm": 0.7162652611732483, + "learning_rate": 1.0963941409020217e-05, + "loss": 2.3696, + "step": 17009 + }, + { + "epoch": 1.3727705592768946, + "grad_norm": 0.6809261441230774, + "learning_rate": 1.0956755438604194e-05, + "loss": 2.4392, + "step": 17010 + }, + { + "epoch": 1.3728512630134775, + "grad_norm": 0.6897100806236267, + "learning_rate": 1.0949571687378602e-05, + "loss": 2.4942, + "step": 17011 + }, + { + "epoch": 1.3729319667500606, + "grad_norm": 0.6903488039970398, + "learning_rate": 1.0942390155522442e-05, + "loss": 2.3936, + "step": 17012 + }, + { + "epoch": 1.3730126704866437, + "grad_norm": 0.676643431186676, + "learning_rate": 1.0935210843214727e-05, + "loss": 2.3972, + "step": 17013 + }, + { + "epoch": 1.3730933742232265, + "grad_norm": 0.6523454189300537, + "learning_rate": 1.092803375063437e-05, + "loss": 2.4914, + "step": 17014 + }, + { + "epoch": 1.3731740779598096, + "grad_norm": 0.7250776886940002, + "learning_rate": 1.092085887796026e-05, + "loss": 2.4493, + "step": 17015 + }, + { + "epoch": 1.3732547816963925, + "grad_norm": 0.6791245937347412, + "learning_rate": 1.091368622537119e-05, + "loss": 2.4553, + "step": 17016 + }, + { + "epoch": 1.3733354854329756, + "grad_norm": 0.8086698651313782, + "learning_rate": 1.0906515793045934e-05, + "loss": 2.457, + "step": 17017 + }, + { + "epoch": 1.3734161891695584, + "grad_norm": 0.6653520464897156, + "learning_rate": 1.0899347581163221e-05, + "loss": 2.3974, + "step": 17018 + }, + { + "epoch": 1.3734968929061415, + "grad_norm": 0.6596232056617737, + "learning_rate": 1.0892181589901651e-05, + "loss": 2.3771, + "step": 17019 + }, + { + "epoch": 1.3735775966427246, + "grad_norm": 0.7042080760002136, + "learning_rate": 1.0885017819439858e-05, + "loss": 2.4493, + "step": 17020 + }, + { + "epoch": 1.3736583003793075, + "grad_norm": 0.6882427930831909, + "learning_rate": 1.0877856269956377e-05, + "loss": 2.4293, + "step": 17021 + }, + { + "epoch": 1.3737390041158906, + "grad_norm": 0.6881027221679688, + "learning_rate": 1.0870696941629676e-05, + "loss": 2.4503, + "step": 17022 + }, + { + "epoch": 1.3738197078524736, + "grad_norm": 0.7282640337944031, + "learning_rate": 1.086353983463818e-05, + "loss": 2.4173, + "step": 17023 + }, + { + "epoch": 1.3739004115890565, + "grad_norm": 0.7281018495559692, + "learning_rate": 1.0856384949160314e-05, + "loss": 2.4514, + "step": 17024 + }, + { + "epoch": 1.3739811153256396, + "grad_norm": 0.7185690402984619, + "learning_rate": 1.0849232285374323e-05, + "loss": 2.4244, + "step": 17025 + }, + { + "epoch": 1.3740618190622227, + "grad_norm": 0.7732044458389282, + "learning_rate": 1.0842081843458496e-05, + "loss": 2.4855, + "step": 17026 + }, + { + "epoch": 1.3741425227988056, + "grad_norm": 0.6599788665771484, + "learning_rate": 1.0834933623591093e-05, + "loss": 2.4339, + "step": 17027 + }, + { + "epoch": 1.3742232265353886, + "grad_norm": 0.7193527817726135, + "learning_rate": 1.0827787625950192e-05, + "loss": 2.4284, + "step": 17028 + }, + { + "epoch": 1.3743039302719717, + "grad_norm": 0.7255674004554749, + "learning_rate": 1.082064385071393e-05, + "loss": 2.4056, + "step": 17029 + }, + { + "epoch": 1.3743846340085546, + "grad_norm": 0.7823398113250732, + "learning_rate": 1.0813502298060363e-05, + "loss": 2.4268, + "step": 17030 + }, + { + "epoch": 1.3744653377451377, + "grad_norm": 0.6839333176612854, + "learning_rate": 1.0806362968167427e-05, + "loss": 2.4415, + "step": 17031 + }, + { + "epoch": 1.3745460414817205, + "grad_norm": 0.798973560333252, + "learning_rate": 1.079922586121308e-05, + "loss": 2.4251, + "step": 17032 + }, + { + "epoch": 1.3746267452183036, + "grad_norm": 0.7234559655189514, + "learning_rate": 1.0792090977375203e-05, + "loss": 2.3821, + "step": 17033 + }, + { + "epoch": 1.3747074489548865, + "grad_norm": 0.6686646938323975, + "learning_rate": 1.0784958316831628e-05, + "loss": 2.4123, + "step": 17034 + }, + { + "epoch": 1.3747881526914696, + "grad_norm": 0.6656081676483154, + "learning_rate": 1.0777827879760084e-05, + "loss": 2.3527, + "step": 17035 + }, + { + "epoch": 1.3748688564280527, + "grad_norm": 0.6609933972358704, + "learning_rate": 1.0770699666338303e-05, + "loss": 2.4128, + "step": 17036 + }, + { + "epoch": 1.3749495601646355, + "grad_norm": 0.710719108581543, + "learning_rate": 1.0763573676743921e-05, + "loss": 2.4634, + "step": 17037 + }, + { + "epoch": 1.3750302639012186, + "grad_norm": 0.6638451814651489, + "learning_rate": 1.0756449911154554e-05, + "loss": 2.3828, + "step": 17038 + }, + { + "epoch": 1.3751109676378017, + "grad_norm": 0.7525094151496887, + "learning_rate": 1.0749328369747746e-05, + "loss": 2.4078, + "step": 17039 + }, + { + "epoch": 1.3751916713743846, + "grad_norm": 0.7343288064002991, + "learning_rate": 1.0742209052701002e-05, + "loss": 2.4731, + "step": 17040 + }, + { + "epoch": 1.3752723751109677, + "grad_norm": 0.7966243624687195, + "learning_rate": 1.0735091960191701e-05, + "loss": 2.3501, + "step": 17041 + }, + { + "epoch": 1.3753530788475508, + "grad_norm": 0.6693055033683777, + "learning_rate": 1.0727977092397256e-05, + "loss": 2.4214, + "step": 17042 + }, + { + "epoch": 1.3754337825841336, + "grad_norm": 0.6831601858139038, + "learning_rate": 1.0720864449494994e-05, + "loss": 2.4029, + "step": 17043 + }, + { + "epoch": 1.3755144863207167, + "grad_norm": 0.7081588506698608, + "learning_rate": 1.0713754031662149e-05, + "loss": 2.4532, + "step": 17044 + }, + { + "epoch": 1.3755951900572996, + "grad_norm": 0.698469877243042, + "learning_rate": 1.0706645839075957e-05, + "loss": 2.4181, + "step": 17045 + }, + { + "epoch": 1.3756758937938827, + "grad_norm": 0.652568519115448, + "learning_rate": 1.0699539871913556e-05, + "loss": 2.4761, + "step": 17046 + }, + { + "epoch": 1.3757565975304655, + "grad_norm": 0.7698256969451904, + "learning_rate": 1.0692436130352068e-05, + "loss": 2.4742, + "step": 17047 + }, + { + "epoch": 1.3758373012670486, + "grad_norm": 0.7192606329917908, + "learning_rate": 1.068533461456851e-05, + "loss": 2.401, + "step": 17048 + }, + { + "epoch": 1.3759180050036317, + "grad_norm": 0.6296666860580444, + "learning_rate": 1.0678235324739894e-05, + "loss": 2.4628, + "step": 17049 + }, + { + "epoch": 1.3759987087402146, + "grad_norm": 0.7048724293708801, + "learning_rate": 1.0671138261043156e-05, + "loss": 2.4799, + "step": 17050 + }, + { + "epoch": 1.3760794124767977, + "grad_norm": 0.6724091172218323, + "learning_rate": 1.0664043423655146e-05, + "loss": 2.4108, + "step": 17051 + }, + { + "epoch": 1.3761601162133807, + "grad_norm": 0.6380212306976318, + "learning_rate": 1.0656950812752709e-05, + "loss": 2.3943, + "step": 17052 + }, + { + "epoch": 1.3762408199499636, + "grad_norm": 0.7005279660224915, + "learning_rate": 1.0649860428512604e-05, + "loss": 2.3623, + "step": 17053 + }, + { + "epoch": 1.3763215236865467, + "grad_norm": 0.719219982624054, + "learning_rate": 1.0642772271111534e-05, + "loss": 2.3873, + "step": 17054 + }, + { + "epoch": 1.3764022274231298, + "grad_norm": 0.7318363785743713, + "learning_rate": 1.063568634072616e-05, + "loss": 2.4335, + "step": 17055 + }, + { + "epoch": 1.3764829311597127, + "grad_norm": 0.7131830453872681, + "learning_rate": 1.062860263753308e-05, + "loss": 2.3829, + "step": 17056 + }, + { + "epoch": 1.3765636348962957, + "grad_norm": 0.7030664086341858, + "learning_rate": 1.0621521161708836e-05, + "loss": 2.3216, + "step": 17057 + }, + { + "epoch": 1.3766443386328788, + "grad_norm": 0.738999605178833, + "learning_rate": 1.0614441913429929e-05, + "loss": 2.4951, + "step": 17058 + }, + { + "epoch": 1.3767250423694617, + "grad_norm": 0.6926800012588501, + "learning_rate": 1.0607364892872806e-05, + "loss": 2.3977, + "step": 17059 + }, + { + "epoch": 1.3768057461060448, + "grad_norm": 0.6439639925956726, + "learning_rate": 1.0600290100213805e-05, + "loss": 2.4049, + "step": 17060 + }, + { + "epoch": 1.3768864498426276, + "grad_norm": 0.7035220265388489, + "learning_rate": 1.0593217535629264e-05, + "loss": 2.4212, + "step": 17061 + }, + { + "epoch": 1.3769671535792107, + "grad_norm": 0.705183207988739, + "learning_rate": 1.0586147199295482e-05, + "loss": 2.4244, + "step": 17062 + }, + { + "epoch": 1.3770478573157936, + "grad_norm": 0.7036949396133423, + "learning_rate": 1.057907909138861e-05, + "loss": 2.4254, + "step": 17063 + }, + { + "epoch": 1.3771285610523767, + "grad_norm": 0.7137075066566467, + "learning_rate": 1.0572013212084841e-05, + "loss": 2.4135, + "step": 17064 + }, + { + "epoch": 1.3772092647889598, + "grad_norm": 0.6973327398300171, + "learning_rate": 1.0564949561560267e-05, + "loss": 2.4568, + "step": 17065 + }, + { + "epoch": 1.3772899685255426, + "grad_norm": 0.7157370448112488, + "learning_rate": 1.0557888139990946e-05, + "loss": 2.3877, + "step": 17066 + }, + { + "epoch": 1.3773706722621257, + "grad_norm": 0.6622396111488342, + "learning_rate": 1.0550828947552848e-05, + "loss": 2.3636, + "step": 17067 + }, + { + "epoch": 1.3774513759987088, + "grad_norm": 0.7295750975608826, + "learning_rate": 1.0543771984421913e-05, + "loss": 2.4192, + "step": 17068 + }, + { + "epoch": 1.3775320797352917, + "grad_norm": 0.7245587110519409, + "learning_rate": 1.0536717250774053e-05, + "loss": 2.3575, + "step": 17069 + }, + { + "epoch": 1.3776127834718748, + "grad_norm": 0.6923871040344238, + "learning_rate": 1.052966474678503e-05, + "loss": 2.4547, + "step": 17070 + }, + { + "epoch": 1.3776934872084579, + "grad_norm": 0.6754410862922668, + "learning_rate": 1.0522614472630632e-05, + "loss": 2.4469, + "step": 17071 + }, + { + "epoch": 1.3777741909450407, + "grad_norm": 0.6979227662086487, + "learning_rate": 1.0515566428486612e-05, + "loss": 2.407, + "step": 17072 + }, + { + "epoch": 1.3778548946816238, + "grad_norm": 0.7050029635429382, + "learning_rate": 1.050852061452856e-05, + "loss": 2.3937, + "step": 17073 + }, + { + "epoch": 1.377935598418207, + "grad_norm": 0.676030158996582, + "learning_rate": 1.0501477030932117e-05, + "loss": 2.4144, + "step": 17074 + }, + { + "epoch": 1.3780163021547898, + "grad_norm": 0.6984726786613464, + "learning_rate": 1.0494435677872827e-05, + "loss": 2.4541, + "step": 17075 + }, + { + "epoch": 1.3780970058913729, + "grad_norm": 0.6987836956977844, + "learning_rate": 1.0487396555526141e-05, + "loss": 2.3984, + "step": 17076 + }, + { + "epoch": 1.3781777096279557, + "grad_norm": 0.7071307897567749, + "learning_rate": 1.0480359664067529e-05, + "loss": 2.3861, + "step": 17077 + }, + { + "epoch": 1.3782584133645388, + "grad_norm": 0.6713467836380005, + "learning_rate": 1.0473325003672384e-05, + "loss": 2.4029, + "step": 17078 + }, + { + "epoch": 1.3783391171011217, + "grad_norm": 0.7389634847640991, + "learning_rate": 1.046629257451599e-05, + "loss": 2.415, + "step": 17079 + }, + { + "epoch": 1.3784198208377048, + "grad_norm": 0.7122809886932373, + "learning_rate": 1.0459262376773627e-05, + "loss": 2.4278, + "step": 17080 + }, + { + "epoch": 1.3785005245742878, + "grad_norm": 0.7036066651344299, + "learning_rate": 1.045223441062051e-05, + "loss": 2.4276, + "step": 17081 + }, + { + "epoch": 1.3785812283108707, + "grad_norm": 0.7709795236587524, + "learning_rate": 1.0445208676231811e-05, + "loss": 2.4398, + "step": 17082 + }, + { + "epoch": 1.3786619320474538, + "grad_norm": 0.7131057977676392, + "learning_rate": 1.0438185173782589e-05, + "loss": 2.4414, + "step": 17083 + }, + { + "epoch": 1.3787426357840369, + "grad_norm": 0.7172132730484009, + "learning_rate": 1.0431163903447904e-05, + "loss": 2.4574, + "step": 17084 + }, + { + "epoch": 1.3788233395206198, + "grad_norm": 0.6760988831520081, + "learning_rate": 1.0424144865402774e-05, + "loss": 2.442, + "step": 17085 + }, + { + "epoch": 1.3789040432572028, + "grad_norm": 0.701665997505188, + "learning_rate": 1.041712805982209e-05, + "loss": 2.4012, + "step": 17086 + }, + { + "epoch": 1.378984746993786, + "grad_norm": 0.661851167678833, + "learning_rate": 1.0410113486880746e-05, + "loss": 2.3591, + "step": 17087 + }, + { + "epoch": 1.3790654507303688, + "grad_norm": 0.6929948925971985, + "learning_rate": 1.0403101146753569e-05, + "loss": 2.4285, + "step": 17088 + }, + { + "epoch": 1.3791461544669519, + "grad_norm": 0.703576922416687, + "learning_rate": 1.0396091039615308e-05, + "loss": 2.4643, + "step": 17089 + }, + { + "epoch": 1.3792268582035347, + "grad_norm": 0.6697961688041687, + "learning_rate": 1.038908316564069e-05, + "loss": 2.4046, + "step": 17090 + }, + { + "epoch": 1.3793075619401178, + "grad_norm": 0.7338510155677795, + "learning_rate": 1.0382077525004396e-05, + "loss": 2.3507, + "step": 17091 + }, + { + "epoch": 1.3793882656767007, + "grad_norm": 0.6967883110046387, + "learning_rate": 1.0375074117880956e-05, + "loss": 2.4458, + "step": 17092 + }, + { + "epoch": 1.3794689694132838, + "grad_norm": 0.7204736471176147, + "learning_rate": 1.0368072944444962e-05, + "loss": 2.427, + "step": 17093 + }, + { + "epoch": 1.3795496731498669, + "grad_norm": 0.7665053606033325, + "learning_rate": 1.0361074004870907e-05, + "loss": 2.3985, + "step": 17094 + }, + { + "epoch": 1.3796303768864497, + "grad_norm": 0.7157881855964661, + "learning_rate": 1.0354077299333187e-05, + "loss": 2.4229, + "step": 17095 + }, + { + "epoch": 1.3797110806230328, + "grad_norm": 0.6643819808959961, + "learning_rate": 1.0347082828006194e-05, + "loss": 2.357, + "step": 17096 + }, + { + "epoch": 1.379791784359616, + "grad_norm": 0.6965252757072449, + "learning_rate": 1.0340090591064255e-05, + "loss": 2.42, + "step": 17097 + }, + { + "epoch": 1.3798724880961988, + "grad_norm": 0.767876923084259, + "learning_rate": 1.0333100588681633e-05, + "loss": 2.4019, + "step": 17098 + }, + { + "epoch": 1.3799531918327819, + "grad_norm": 0.6687513589859009, + "learning_rate": 1.0326112821032541e-05, + "loss": 2.3515, + "step": 17099 + }, + { + "epoch": 1.380033895569365, + "grad_norm": 0.674007773399353, + "learning_rate": 1.031912728829112e-05, + "loss": 2.4281, + "step": 17100 + }, + { + "epoch": 1.3801145993059478, + "grad_norm": 0.6486735939979553, + "learning_rate": 1.0312143990631495e-05, + "loss": 2.4324, + "step": 17101 + }, + { + "epoch": 1.380195303042531, + "grad_norm": 0.7174487709999084, + "learning_rate": 1.0305162928227674e-05, + "loss": 2.4445, + "step": 17102 + }, + { + "epoch": 1.380276006779114, + "grad_norm": 0.6515870690345764, + "learning_rate": 1.029818410125365e-05, + "loss": 2.4078, + "step": 17103 + }, + { + "epoch": 1.3803567105156969, + "grad_norm": 0.697830080986023, + "learning_rate": 1.0291207509883383e-05, + "loss": 2.4024, + "step": 17104 + }, + { + "epoch": 1.38043741425228, + "grad_norm": 0.7636575102806091, + "learning_rate": 1.0284233154290711e-05, + "loss": 2.3912, + "step": 17105 + }, + { + "epoch": 1.3805181179888628, + "grad_norm": 0.6910358667373657, + "learning_rate": 1.0277261034649466e-05, + "loss": 2.4099, + "step": 17106 + }, + { + "epoch": 1.380598821725446, + "grad_norm": 0.6778038740158081, + "learning_rate": 1.0270291151133415e-05, + "loss": 2.4111, + "step": 17107 + }, + { + "epoch": 1.3806795254620288, + "grad_norm": 0.6927553415298462, + "learning_rate": 1.0263323503916255e-05, + "loss": 2.4239, + "step": 17108 + }, + { + "epoch": 1.3807602291986119, + "grad_norm": 0.6654019355773926, + "learning_rate": 1.0256358093171658e-05, + "loss": 2.4374, + "step": 17109 + }, + { + "epoch": 1.380840932935195, + "grad_norm": 0.7174705266952515, + "learning_rate": 1.0249394919073219e-05, + "loss": 2.4142, + "step": 17110 + }, + { + "epoch": 1.3809216366717778, + "grad_norm": 0.7386046648025513, + "learning_rate": 1.0242433981794463e-05, + "loss": 2.4453, + "step": 17111 + }, + { + "epoch": 1.381002340408361, + "grad_norm": 0.6723792552947998, + "learning_rate": 1.0235475281508866e-05, + "loss": 2.4595, + "step": 17112 + }, + { + "epoch": 1.381083044144944, + "grad_norm": 0.7069140672683716, + "learning_rate": 1.0228518818389887e-05, + "loss": 2.4434, + "step": 17113 + }, + { + "epoch": 1.3811637478815268, + "grad_norm": 0.7239270210266113, + "learning_rate": 1.0221564592610888e-05, + "loss": 2.5121, + "step": 17114 + }, + { + "epoch": 1.38124445161811, + "grad_norm": 0.6907179951667786, + "learning_rate": 1.0214612604345175e-05, + "loss": 2.3673, + "step": 17115 + }, + { + "epoch": 1.381325155354693, + "grad_norm": 0.6908708810806274, + "learning_rate": 1.020766285376602e-05, + "loss": 2.4419, + "step": 17116 + }, + { + "epoch": 1.381405859091276, + "grad_norm": 0.6947401165962219, + "learning_rate": 1.0200715341046618e-05, + "loss": 2.4566, + "step": 17117 + }, + { + "epoch": 1.381486562827859, + "grad_norm": 0.687776505947113, + "learning_rate": 1.019377006636012e-05, + "loss": 2.4631, + "step": 17118 + }, + { + "epoch": 1.381567266564442, + "grad_norm": 0.7059805989265442, + "learning_rate": 1.0186827029879642e-05, + "loss": 2.3892, + "step": 17119 + }, + { + "epoch": 1.381647970301025, + "grad_norm": 0.685351550579071, + "learning_rate": 1.0179886231778224e-05, + "loss": 2.4041, + "step": 17120 + }, + { + "epoch": 1.381728674037608, + "grad_norm": 0.6662759184837341, + "learning_rate": 1.0172947672228817e-05, + "loss": 2.4254, + "step": 17121 + }, + { + "epoch": 1.3818093777741909, + "grad_norm": 0.6769386529922485, + "learning_rate": 1.0166011351404358e-05, + "loss": 2.5057, + "step": 17122 + }, + { + "epoch": 1.381890081510774, + "grad_norm": 0.8168340921401978, + "learning_rate": 1.0159077269477746e-05, + "loss": 2.4936, + "step": 17123 + }, + { + "epoch": 1.3819707852473568, + "grad_norm": 0.6659611463546753, + "learning_rate": 1.0152145426621751e-05, + "loss": 2.4062, + "step": 17124 + }, + { + "epoch": 1.38205148898394, + "grad_norm": 0.7131680846214294, + "learning_rate": 1.0145215823009158e-05, + "loss": 2.3767, + "step": 17125 + }, + { + "epoch": 1.382132192720523, + "grad_norm": 0.7241190075874329, + "learning_rate": 1.0138288458812673e-05, + "loss": 2.4082, + "step": 17126 + }, + { + "epoch": 1.3822128964571059, + "grad_norm": 0.6905619502067566, + "learning_rate": 1.0131363334204947e-05, + "loss": 2.3859, + "step": 17127 + }, + { + "epoch": 1.382293600193689, + "grad_norm": 0.7163190245628357, + "learning_rate": 1.0124440449358551e-05, + "loss": 2.4238, + "step": 17128 + }, + { + "epoch": 1.382374303930272, + "grad_norm": 0.6857485175132751, + "learning_rate": 1.0117519804446041e-05, + "loss": 2.4076, + "step": 17129 + }, + { + "epoch": 1.382455007666855, + "grad_norm": 0.6817807555198669, + "learning_rate": 1.0110601399639918e-05, + "loss": 2.4226, + "step": 17130 + }, + { + "epoch": 1.382535711403438, + "grad_norm": 0.714421808719635, + "learning_rate": 1.0103685235112558e-05, + "loss": 2.3581, + "step": 17131 + }, + { + "epoch": 1.382616415140021, + "grad_norm": 0.7885473370552063, + "learning_rate": 1.0096771311036357e-05, + "loss": 2.3821, + "step": 17132 + }, + { + "epoch": 1.382697118876604, + "grad_norm": 0.6432569026947021, + "learning_rate": 1.0089859627583642e-05, + "loss": 2.3899, + "step": 17133 + }, + { + "epoch": 1.382777822613187, + "grad_norm": 0.6620168089866638, + "learning_rate": 1.0082950184926632e-05, + "loss": 2.4503, + "step": 17134 + }, + { + "epoch": 1.38285852634977, + "grad_norm": 0.6495606303215027, + "learning_rate": 1.0076042983237544e-05, + "loss": 2.3606, + "step": 17135 + }, + { + "epoch": 1.382939230086353, + "grad_norm": 0.7192469835281372, + "learning_rate": 1.006913802268855e-05, + "loss": 2.425, + "step": 17136 + }, + { + "epoch": 1.3830199338229359, + "grad_norm": 0.6835115551948547, + "learning_rate": 1.0062235303451706e-05, + "loss": 2.3605, + "step": 17137 + }, + { + "epoch": 1.383100637559519, + "grad_norm": 0.7469161748886108, + "learning_rate": 1.0055334825699059e-05, + "loss": 2.4811, + "step": 17138 + }, + { + "epoch": 1.383181341296102, + "grad_norm": 0.7641372084617615, + "learning_rate": 1.0048436589602572e-05, + "loss": 2.4317, + "step": 17139 + }, + { + "epoch": 1.383262045032685, + "grad_norm": 0.7059566378593445, + "learning_rate": 1.0041540595334186e-05, + "loss": 2.4677, + "step": 17140 + }, + { + "epoch": 1.383342748769268, + "grad_norm": 0.7218295931816101, + "learning_rate": 1.0034646843065777e-05, + "loss": 2.3889, + "step": 17141 + }, + { + "epoch": 1.383423452505851, + "grad_norm": 0.7059688568115234, + "learning_rate": 1.0027755332969124e-05, + "loss": 2.4276, + "step": 17142 + }, + { + "epoch": 1.383504156242434, + "grad_norm": 0.7444838285446167, + "learning_rate": 1.0020866065216017e-05, + "loss": 2.4647, + "step": 17143 + }, + { + "epoch": 1.383584859979017, + "grad_norm": 0.662229597568512, + "learning_rate": 1.0013979039978127e-05, + "loss": 2.3913, + "step": 17144 + }, + { + "epoch": 1.3836655637156001, + "grad_norm": 0.6696064472198486, + "learning_rate": 1.0007094257427097e-05, + "loss": 2.3904, + "step": 17145 + }, + { + "epoch": 1.383746267452183, + "grad_norm": 0.7516316175460815, + "learning_rate": 1.0000211717734541e-05, + "loss": 2.3621, + "step": 17146 + }, + { + "epoch": 1.383826971188766, + "grad_norm": 0.6833345293998718, + "learning_rate": 9.993331421071961e-06, + "loss": 2.4113, + "step": 17147 + }, + { + "epoch": 1.3839076749253492, + "grad_norm": 0.675074577331543, + "learning_rate": 9.986453367610827e-06, + "loss": 2.398, + "step": 17148 + }, + { + "epoch": 1.383988378661932, + "grad_norm": 0.7046546936035156, + "learning_rate": 9.979577557522579e-06, + "loss": 2.4441, + "step": 17149 + }, + { + "epoch": 1.3840690823985151, + "grad_norm": 0.7228004336357117, + "learning_rate": 9.972703990978582e-06, + "loss": 2.4451, + "step": 17150 + }, + { + "epoch": 1.384149786135098, + "grad_norm": 0.6642273664474487, + "learning_rate": 9.965832668150132e-06, + "loss": 2.3809, + "step": 17151 + }, + { + "epoch": 1.384230489871681, + "grad_norm": 0.7238738536834717, + "learning_rate": 9.958963589208493e-06, + "loss": 2.4283, + "step": 17152 + }, + { + "epoch": 1.384311193608264, + "grad_norm": 0.7356482744216919, + "learning_rate": 9.952096754324847e-06, + "loss": 2.4666, + "step": 17153 + }, + { + "epoch": 1.384391897344847, + "grad_norm": 0.7092667818069458, + "learning_rate": 9.945232163670327e-06, + "loss": 2.5028, + "step": 17154 + }, + { + "epoch": 1.38447260108143, + "grad_norm": 0.6972974538803101, + "learning_rate": 9.938369817416049e-06, + "loss": 2.4223, + "step": 17155 + }, + { + "epoch": 1.384553304818013, + "grad_norm": 0.7163854837417603, + "learning_rate": 9.931509715733e-06, + "loss": 2.4256, + "step": 17156 + }, + { + "epoch": 1.384634008554596, + "grad_norm": 0.7319930195808411, + "learning_rate": 9.924651858792166e-06, + "loss": 2.4208, + "step": 17157 + }, + { + "epoch": 1.3847147122911792, + "grad_norm": 0.6813424825668335, + "learning_rate": 9.917796246764466e-06, + "loss": 2.3794, + "step": 17158 + }, + { + "epoch": 1.384795416027762, + "grad_norm": 0.7059821486473083, + "learning_rate": 9.910942879820761e-06, + "loss": 2.4462, + "step": 17159 + }, + { + "epoch": 1.384876119764345, + "grad_norm": 0.726754903793335, + "learning_rate": 9.904091758131862e-06, + "loss": 2.4037, + "step": 17160 + }, + { + "epoch": 1.3849568235009282, + "grad_norm": 0.6972840428352356, + "learning_rate": 9.897242881868508e-06, + "loss": 2.4275, + "step": 17161 + }, + { + "epoch": 1.385037527237511, + "grad_norm": 0.6906942129135132, + "learning_rate": 9.890396251201405e-06, + "loss": 2.4547, + "step": 17162 + }, + { + "epoch": 1.3851182309740941, + "grad_norm": 0.6928840279579163, + "learning_rate": 9.883551866301165e-06, + "loss": 2.4622, + "step": 17163 + }, + { + "epoch": 1.3851989347106772, + "grad_norm": 0.6840118169784546, + "learning_rate": 9.876709727338374e-06, + "loss": 2.4546, + "step": 17164 + }, + { + "epoch": 1.38527963844726, + "grad_norm": 0.6800721287727356, + "learning_rate": 9.86986983448358e-06, + "loss": 2.508, + "step": 17165 + }, + { + "epoch": 1.3853603421838432, + "grad_norm": 0.678666353225708, + "learning_rate": 9.863032187907217e-06, + "loss": 2.383, + "step": 17166 + }, + { + "epoch": 1.385441045920426, + "grad_norm": 0.7311298251152039, + "learning_rate": 9.856196787779714e-06, + "loss": 2.4111, + "step": 17167 + }, + { + "epoch": 1.3855217496570091, + "grad_norm": 0.6527237296104431, + "learning_rate": 9.849363634271425e-06, + "loss": 2.3592, + "step": 17168 + }, + { + "epoch": 1.385602453393592, + "grad_norm": 0.7478907108306885, + "learning_rate": 9.842532727552645e-06, + "loss": 2.4321, + "step": 17169 + }, + { + "epoch": 1.385683157130175, + "grad_norm": 0.6855963468551636, + "learning_rate": 9.835704067793628e-06, + "loss": 2.3966, + "step": 17170 + }, + { + "epoch": 1.3857638608667582, + "grad_norm": 0.7468744516372681, + "learning_rate": 9.828877655164571e-06, + "loss": 2.3695, + "step": 17171 + }, + { + "epoch": 1.385844564603341, + "grad_norm": 0.7127626538276672, + "learning_rate": 9.82205348983558e-06, + "loss": 2.4718, + "step": 17172 + }, + { + "epoch": 1.3859252683399241, + "grad_norm": 0.6831564903259277, + "learning_rate": 9.815231571976735e-06, + "loss": 2.373, + "step": 17173 + }, + { + "epoch": 1.3860059720765072, + "grad_norm": 0.7020923495292664, + "learning_rate": 9.808411901758075e-06, + "loss": 2.4516, + "step": 17174 + }, + { + "epoch": 1.38608667581309, + "grad_norm": 0.8129574060440063, + "learning_rate": 9.801594479349563e-06, + "loss": 2.4157, + "step": 17175 + }, + { + "epoch": 1.3861673795496732, + "grad_norm": 0.6603944301605225, + "learning_rate": 9.794779304921087e-06, + "loss": 2.386, + "step": 17176 + }, + { + "epoch": 1.3862480832862563, + "grad_norm": 0.669863224029541, + "learning_rate": 9.78796637864251e-06, + "loss": 2.4273, + "step": 17177 + }, + { + "epoch": 1.3863287870228391, + "grad_norm": 0.7654524445533752, + "learning_rate": 9.78115570068362e-06, + "loss": 2.4868, + "step": 17178 + }, + { + "epoch": 1.3864094907594222, + "grad_norm": 0.7104062438011169, + "learning_rate": 9.774347271214169e-06, + "loss": 2.4684, + "step": 17179 + }, + { + "epoch": 1.3864901944960053, + "grad_norm": 0.6499059796333313, + "learning_rate": 9.767541090403831e-06, + "loss": 2.4131, + "step": 17180 + }, + { + "epoch": 1.3865708982325882, + "grad_norm": 0.7515703439712524, + "learning_rate": 9.760737158422262e-06, + "loss": 2.4484, + "step": 17181 + }, + { + "epoch": 1.3866516019691713, + "grad_norm": 0.7019369006156921, + "learning_rate": 9.753935475438991e-06, + "loss": 2.4393, + "step": 17182 + }, + { + "epoch": 1.3867323057057541, + "grad_norm": 0.7191709280014038, + "learning_rate": 9.747136041623562e-06, + "loss": 2.4533, + "step": 17183 + }, + { + "epoch": 1.3868130094423372, + "grad_norm": 0.6970816254615784, + "learning_rate": 9.740338857145438e-06, + "loss": 2.4886, + "step": 17184 + }, + { + "epoch": 1.38689371317892, + "grad_norm": 0.6682983636856079, + "learning_rate": 9.733543922173982e-06, + "loss": 2.3896, + "step": 17185 + }, + { + "epoch": 1.3869744169155032, + "grad_norm": 0.735559344291687, + "learning_rate": 9.726751236878584e-06, + "loss": 2.4777, + "step": 17186 + }, + { + "epoch": 1.3870551206520862, + "grad_norm": 0.790460467338562, + "learning_rate": 9.71996080142854e-06, + "loss": 2.3773, + "step": 17187 + }, + { + "epoch": 1.3871358243886691, + "grad_norm": 0.6593269109725952, + "learning_rate": 9.713172615993038e-06, + "loss": 2.461, + "step": 17188 + }, + { + "epoch": 1.3872165281252522, + "grad_norm": 0.7211339473724365, + "learning_rate": 9.706386680741275e-06, + "loss": 2.4155, + "step": 17189 + }, + { + "epoch": 1.3872972318618353, + "grad_norm": 0.7158735990524292, + "learning_rate": 9.699602995842406e-06, + "loss": 2.4214, + "step": 17190 + }, + { + "epoch": 1.3873779355984182, + "grad_norm": 0.7172560095787048, + "learning_rate": 9.692821561465493e-06, + "loss": 2.3617, + "step": 17191 + }, + { + "epoch": 1.3874586393350012, + "grad_norm": 0.721144437789917, + "learning_rate": 9.686042377779513e-06, + "loss": 2.3984, + "step": 17192 + }, + { + "epoch": 1.3875393430715843, + "grad_norm": 0.7066751718521118, + "learning_rate": 9.679265444953444e-06, + "loss": 2.4735, + "step": 17193 + }, + { + "epoch": 1.3876200468081672, + "grad_norm": 0.7111334204673767, + "learning_rate": 9.672490763156194e-06, + "loss": 2.4336, + "step": 17194 + }, + { + "epoch": 1.3877007505447503, + "grad_norm": 0.6845266222953796, + "learning_rate": 9.665718332556584e-06, + "loss": 2.466, + "step": 17195 + }, + { + "epoch": 1.3877814542813331, + "grad_norm": 0.6982793807983398, + "learning_rate": 9.6589481533234e-06, + "loss": 2.3819, + "step": 17196 + }, + { + "epoch": 1.3878621580179162, + "grad_norm": 0.8404912352561951, + "learning_rate": 9.652180225625407e-06, + "loss": 2.4329, + "step": 17197 + }, + { + "epoch": 1.387942861754499, + "grad_norm": 0.7335420250892639, + "learning_rate": 9.645414549631227e-06, + "loss": 2.4368, + "step": 17198 + }, + { + "epoch": 1.3880235654910822, + "grad_norm": 0.7425113916397095, + "learning_rate": 9.638651125509513e-06, + "loss": 2.41, + "step": 17199 + }, + { + "epoch": 1.3881042692276653, + "grad_norm": 0.6818472146987915, + "learning_rate": 9.631889953428818e-06, + "loss": 2.4227, + "step": 17200 + }, + { + "epoch": 1.3881849729642481, + "grad_norm": 0.6991598010063171, + "learning_rate": 9.625131033557655e-06, + "loss": 2.422, + "step": 17201 + }, + { + "epoch": 1.3882656767008312, + "grad_norm": 0.6927391886711121, + "learning_rate": 9.618374366064465e-06, + "loss": 2.4092, + "step": 17202 + }, + { + "epoch": 1.3883463804374143, + "grad_norm": 0.6987093687057495, + "learning_rate": 9.611619951117657e-06, + "loss": 2.419, + "step": 17203 + }, + { + "epoch": 1.3884270841739972, + "grad_norm": 0.7766227722167969, + "learning_rate": 9.604867788885552e-06, + "loss": 2.4174, + "step": 17204 + }, + { + "epoch": 1.3885077879105803, + "grad_norm": 0.77024245262146, + "learning_rate": 9.598117879536427e-06, + "loss": 2.3851, + "step": 17205 + }, + { + "epoch": 1.3885884916471634, + "grad_norm": 0.7106937170028687, + "learning_rate": 9.591370223238515e-06, + "loss": 2.3322, + "step": 17206 + }, + { + "epoch": 1.3886691953837462, + "grad_norm": 0.7056468725204468, + "learning_rate": 9.584624820160016e-06, + "loss": 2.4496, + "step": 17207 + }, + { + "epoch": 1.3887498991203293, + "grad_norm": 0.6738306879997253, + "learning_rate": 9.57788167046899e-06, + "loss": 2.3853, + "step": 17208 + }, + { + "epoch": 1.3888306028569124, + "grad_norm": 0.6830081343650818, + "learning_rate": 9.57114077433352e-06, + "loss": 2.3974, + "step": 17209 + }, + { + "epoch": 1.3889113065934953, + "grad_norm": 0.6968281865119934, + "learning_rate": 9.564402131921612e-06, + "loss": 2.4349, + "step": 17210 + }, + { + "epoch": 1.3889920103300784, + "grad_norm": 0.720506489276886, + "learning_rate": 9.55766574340119e-06, + "loss": 2.386, + "step": 17211 + }, + { + "epoch": 1.3890727140666612, + "grad_norm": 0.7361373901367188, + "learning_rate": 9.550931608940161e-06, + "loss": 2.4303, + "step": 17212 + }, + { + "epoch": 1.3891534178032443, + "grad_norm": 0.6967737674713135, + "learning_rate": 9.544199728706383e-06, + "loss": 2.4073, + "step": 17213 + }, + { + "epoch": 1.3892341215398272, + "grad_norm": 0.6645474433898926, + "learning_rate": 9.537470102867573e-06, + "loss": 2.4236, + "step": 17214 + }, + { + "epoch": 1.3893148252764103, + "grad_norm": 0.7314795851707458, + "learning_rate": 9.53074273159148e-06, + "loss": 2.4362, + "step": 17215 + }, + { + "epoch": 1.3893955290129933, + "grad_norm": 0.7935917377471924, + "learning_rate": 9.524017615045789e-06, + "loss": 2.3982, + "step": 17216 + }, + { + "epoch": 1.3894762327495762, + "grad_norm": 0.7083787322044373, + "learning_rate": 9.517294753398064e-06, + "loss": 2.4095, + "step": 17217 + }, + { + "epoch": 1.3895569364861593, + "grad_norm": 0.6737664937973022, + "learning_rate": 9.510574146815876e-06, + "loss": 2.457, + "step": 17218 + }, + { + "epoch": 1.3896376402227424, + "grad_norm": 0.6705507040023804, + "learning_rate": 9.50385579546672e-06, + "loss": 2.3893, + "step": 17219 + }, + { + "epoch": 1.3897183439593253, + "grad_norm": 0.6711611151695251, + "learning_rate": 9.497139699518042e-06, + "loss": 2.3982, + "step": 17220 + }, + { + "epoch": 1.3897990476959083, + "grad_norm": 0.7133504748344421, + "learning_rate": 9.490425859137219e-06, + "loss": 2.4178, + "step": 17221 + }, + { + "epoch": 1.3898797514324914, + "grad_norm": 0.6962296366691589, + "learning_rate": 9.483714274491572e-06, + "loss": 2.4126, + "step": 17222 + }, + { + "epoch": 1.3899604551690743, + "grad_norm": 0.7658503651618958, + "learning_rate": 9.477004945748402e-06, + "loss": 2.3047, + "step": 17223 + }, + { + "epoch": 1.3900411589056574, + "grad_norm": 0.706066370010376, + "learning_rate": 9.470297873074885e-06, + "loss": 2.4055, + "step": 17224 + }, + { + "epoch": 1.3901218626422405, + "grad_norm": 0.6563149094581604, + "learning_rate": 9.463593056638187e-06, + "loss": 2.4425, + "step": 17225 + }, + { + "epoch": 1.3902025663788233, + "grad_norm": 0.7133740782737732, + "learning_rate": 9.45689049660543e-06, + "loss": 2.3917, + "step": 17226 + }, + { + "epoch": 1.3902832701154064, + "grad_norm": 0.6759207248687744, + "learning_rate": 9.450190193143626e-06, + "loss": 2.4261, + "step": 17227 + }, + { + "epoch": 1.3903639738519893, + "grad_norm": 0.7461724877357483, + "learning_rate": 9.443492146419786e-06, + "loss": 2.4121, + "step": 17228 + }, + { + "epoch": 1.3904446775885724, + "grad_norm": 0.6825011372566223, + "learning_rate": 9.436796356600842e-06, + "loss": 2.3746, + "step": 17229 + }, + { + "epoch": 1.3905253813251552, + "grad_norm": 0.7314637303352356, + "learning_rate": 9.430102823853659e-06, + "loss": 2.4246, + "step": 17230 + }, + { + "epoch": 1.3906060850617383, + "grad_norm": 0.6963483095169067, + "learning_rate": 9.423411548345063e-06, + "loss": 2.3504, + "step": 17231 + }, + { + "epoch": 1.3906867887983214, + "grad_norm": 0.7879536747932434, + "learning_rate": 9.41672253024185e-06, + "loss": 2.4454, + "step": 17232 + }, + { + "epoch": 1.3907674925349043, + "grad_norm": 0.6961038708686829, + "learning_rate": 9.410035769710668e-06, + "loss": 2.4107, + "step": 17233 + }, + { + "epoch": 1.3908481962714874, + "grad_norm": 0.6528958082199097, + "learning_rate": 9.403351266918215e-06, + "loss": 2.4131, + "step": 17234 + }, + { + "epoch": 1.3909289000080705, + "grad_norm": 0.8091046810150146, + "learning_rate": 9.396669022031057e-06, + "loss": 2.4143, + "step": 17235 + }, + { + "epoch": 1.3910096037446533, + "grad_norm": 0.7430968880653381, + "learning_rate": 9.389989035215774e-06, + "loss": 2.4197, + "step": 17236 + }, + { + "epoch": 1.3910903074812364, + "grad_norm": 0.7089489102363586, + "learning_rate": 9.383311306638797e-06, + "loss": 2.4179, + "step": 17237 + }, + { + "epoch": 1.3911710112178195, + "grad_norm": 0.7121657729148865, + "learning_rate": 9.376635836466574e-06, + "loss": 2.4136, + "step": 17238 + }, + { + "epoch": 1.3912517149544024, + "grad_norm": 0.6793569326400757, + "learning_rate": 9.369962624865503e-06, + "loss": 2.4029, + "step": 17239 + }, + { + "epoch": 1.3913324186909855, + "grad_norm": 0.7534452080726624, + "learning_rate": 9.363291672001828e-06, + "loss": 2.421, + "step": 17240 + }, + { + "epoch": 1.3914131224275683, + "grad_norm": 0.6758937239646912, + "learning_rate": 9.356622978041873e-06, + "loss": 2.378, + "step": 17241 + }, + { + "epoch": 1.3914938261641514, + "grad_norm": 0.7330620288848877, + "learning_rate": 9.349956543151839e-06, + "loss": 2.3983, + "step": 17242 + }, + { + "epoch": 1.3915745299007343, + "grad_norm": 0.7044413089752197, + "learning_rate": 9.343292367497835e-06, + "loss": 2.4204, + "step": 17243 + }, + { + "epoch": 1.3916552336373174, + "grad_norm": 0.7051666975021362, + "learning_rate": 9.336630451245954e-06, + "loss": 2.3994, + "step": 17244 + }, + { + "epoch": 1.3917359373739004, + "grad_norm": 0.721764326095581, + "learning_rate": 9.32997079456227e-06, + "loss": 2.4127, + "step": 17245 + }, + { + "epoch": 1.3918166411104833, + "grad_norm": 0.7074810862541199, + "learning_rate": 9.323313397612698e-06, + "loss": 2.4449, + "step": 17246 + }, + { + "epoch": 1.3918973448470664, + "grad_norm": 0.7203366160392761, + "learning_rate": 9.316658260563193e-06, + "loss": 2.3564, + "step": 17247 + }, + { + "epoch": 1.3919780485836495, + "grad_norm": 0.6879156827926636, + "learning_rate": 9.310005383579623e-06, + "loss": 2.3568, + "step": 17248 + }, + { + "epoch": 1.3920587523202324, + "grad_norm": 0.6491550803184509, + "learning_rate": 9.303354766827776e-06, + "loss": 2.421, + "step": 17249 + }, + { + "epoch": 1.3921394560568154, + "grad_norm": 0.683704674243927, + "learning_rate": 9.29670641047341e-06, + "loss": 2.4633, + "step": 17250 + }, + { + "epoch": 1.3922201597933985, + "grad_norm": 0.6716236472129822, + "learning_rate": 9.290060314682203e-06, + "loss": 2.4423, + "step": 17251 + }, + { + "epoch": 1.3923008635299814, + "grad_norm": 0.7086344957351685, + "learning_rate": 9.283416479619844e-06, + "loss": 2.3877, + "step": 17252 + }, + { + "epoch": 1.3923815672665645, + "grad_norm": 0.6638349294662476, + "learning_rate": 9.276774905451869e-06, + "loss": 2.4499, + "step": 17253 + }, + { + "epoch": 1.3924622710031476, + "grad_norm": 0.7091326713562012, + "learning_rate": 9.27013559234381e-06, + "loss": 2.4659, + "step": 17254 + }, + { + "epoch": 1.3925429747397304, + "grad_norm": 0.6906822323799133, + "learning_rate": 9.263498540461157e-06, + "loss": 2.4195, + "step": 17255 + }, + { + "epoch": 1.3926236784763135, + "grad_norm": 0.7003819942474365, + "learning_rate": 9.256863749969302e-06, + "loss": 2.4156, + "step": 17256 + }, + { + "epoch": 1.3927043822128964, + "grad_norm": 0.7270472645759583, + "learning_rate": 9.250231221033601e-06, + "loss": 2.4197, + "step": 17257 + }, + { + "epoch": 1.3927850859494795, + "grad_norm": 0.7070592641830444, + "learning_rate": 9.243600953819376e-06, + "loss": 2.4296, + "step": 17258 + }, + { + "epoch": 1.3928657896860623, + "grad_norm": 0.6560600996017456, + "learning_rate": 9.23697294849184e-06, + "loss": 2.4441, + "step": 17259 + }, + { + "epoch": 1.3929464934226454, + "grad_norm": 0.6654617190361023, + "learning_rate": 9.230347205216194e-06, + "loss": 2.3406, + "step": 17260 + }, + { + "epoch": 1.3930271971592285, + "grad_norm": 0.7147239446640015, + "learning_rate": 9.223723724157563e-06, + "loss": 2.4203, + "step": 17261 + }, + { + "epoch": 1.3931079008958114, + "grad_norm": 0.7148180603981018, + "learning_rate": 9.217102505481046e-06, + "loss": 2.4525, + "step": 17262 + }, + { + "epoch": 1.3931886046323945, + "grad_norm": 0.6779814958572388, + "learning_rate": 9.210483549351623e-06, + "loss": 2.4051, + "step": 17263 + }, + { + "epoch": 1.3932693083689776, + "grad_norm": 0.6880484223365784, + "learning_rate": 9.203866855934307e-06, + "loss": 2.4492, + "step": 17264 + }, + { + "epoch": 1.3933500121055604, + "grad_norm": 0.7845660448074341, + "learning_rate": 9.197252425393954e-06, + "loss": 2.4448, + "step": 17265 + }, + { + "epoch": 1.3934307158421435, + "grad_norm": 0.7001363635063171, + "learning_rate": 9.190640257895433e-06, + "loss": 2.4226, + "step": 17266 + }, + { + "epoch": 1.3935114195787266, + "grad_norm": 0.7282695770263672, + "learning_rate": 9.184030353603524e-06, + "loss": 2.4354, + "step": 17267 + }, + { + "epoch": 1.3935921233153095, + "grad_norm": 0.7547619342803955, + "learning_rate": 9.177422712683003e-06, + "loss": 2.456, + "step": 17268 + }, + { + "epoch": 1.3936728270518925, + "grad_norm": 0.7191921472549438, + "learning_rate": 9.170817335298499e-06, + "loss": 2.3923, + "step": 17269 + }, + { + "epoch": 1.3937535307884756, + "grad_norm": 0.6578717827796936, + "learning_rate": 9.164214221614654e-06, + "loss": 2.4354, + "step": 17270 + }, + { + "epoch": 1.3938342345250585, + "grad_norm": 0.7156858444213867, + "learning_rate": 9.157613371796036e-06, + "loss": 2.3983, + "step": 17271 + }, + { + "epoch": 1.3939149382616416, + "grad_norm": 0.6779402494430542, + "learning_rate": 9.151014786007162e-06, + "loss": 2.435, + "step": 17272 + }, + { + "epoch": 1.3939956419982245, + "grad_norm": 0.7038381099700928, + "learning_rate": 9.144418464412486e-06, + "loss": 2.3848, + "step": 17273 + }, + { + "epoch": 1.3940763457348075, + "grad_norm": 0.7381990551948547, + "learning_rate": 9.13782440717641e-06, + "loss": 2.3693, + "step": 17274 + }, + { + "epoch": 1.3941570494713904, + "grad_norm": 0.6982381939888, + "learning_rate": 9.131232614463247e-06, + "loss": 2.4095, + "step": 17275 + }, + { + "epoch": 1.3942377532079735, + "grad_norm": 0.6968829035758972, + "learning_rate": 9.124643086437312e-06, + "loss": 2.3802, + "step": 17276 + }, + { + "epoch": 1.3943184569445566, + "grad_norm": 0.7584258317947388, + "learning_rate": 9.118055823262828e-06, + "loss": 2.4153, + "step": 17277 + }, + { + "epoch": 1.3943991606811394, + "grad_norm": 0.7331502437591553, + "learning_rate": 9.11147082510395e-06, + "loss": 2.4404, + "step": 17278 + }, + { + "epoch": 1.3944798644177225, + "grad_norm": 0.7939555048942566, + "learning_rate": 9.104888092124796e-06, + "loss": 2.4568, + "step": 17279 + }, + { + "epoch": 1.3945605681543056, + "grad_norm": 0.6752094626426697, + "learning_rate": 9.098307624489443e-06, + "loss": 2.3298, + "step": 17280 + }, + { + "epoch": 1.3946412718908885, + "grad_norm": 0.682428240776062, + "learning_rate": 9.091729422361872e-06, + "loss": 2.4449, + "step": 17281 + }, + { + "epoch": 1.3947219756274716, + "grad_norm": 0.7422902584075928, + "learning_rate": 9.085153485906051e-06, + "loss": 2.4, + "step": 17282 + }, + { + "epoch": 1.3948026793640547, + "grad_norm": 0.7528017163276672, + "learning_rate": 9.07857981528586e-06, + "loss": 2.4045, + "step": 17283 + }, + { + "epoch": 1.3948833831006375, + "grad_norm": 0.622075080871582, + "learning_rate": 9.072008410665133e-06, + "loss": 2.3865, + "step": 17284 + }, + { + "epoch": 1.3949640868372206, + "grad_norm": 0.7127060890197754, + "learning_rate": 9.065439272207642e-06, + "loss": 2.4108, + "step": 17285 + }, + { + "epoch": 1.3950447905738037, + "grad_norm": 0.7381206750869751, + "learning_rate": 9.0588724000771e-06, + "loss": 2.4459, + "step": 17286 + }, + { + "epoch": 1.3951254943103866, + "grad_norm": 0.7453467845916748, + "learning_rate": 9.05230779443721e-06, + "loss": 2.4144, + "step": 17287 + }, + { + "epoch": 1.3952061980469694, + "grad_norm": 0.6772522330284119, + "learning_rate": 9.045745455451527e-06, + "loss": 2.4373, + "step": 17288 + }, + { + "epoch": 1.3952869017835525, + "grad_norm": 0.7005482316017151, + "learning_rate": 9.039185383283622e-06, + "loss": 2.3991, + "step": 17289 + }, + { + "epoch": 1.3953676055201356, + "grad_norm": 0.7172494530677795, + "learning_rate": 9.032627578096986e-06, + "loss": 2.4535, + "step": 17290 + }, + { + "epoch": 1.3954483092567185, + "grad_norm": 0.6911814212799072, + "learning_rate": 9.026072040055067e-06, + "loss": 2.3586, + "step": 17291 + }, + { + "epoch": 1.3955290129933016, + "grad_norm": 0.6708523035049438, + "learning_rate": 9.019518769321245e-06, + "loss": 2.4189, + "step": 17292 + }, + { + "epoch": 1.3956097167298847, + "grad_norm": 0.6716340780258179, + "learning_rate": 9.012967766058855e-06, + "loss": 2.3982, + "step": 17293 + }, + { + "epoch": 1.3956904204664675, + "grad_norm": 0.7001132965087891, + "learning_rate": 9.006419030431135e-06, + "loss": 2.3722, + "step": 17294 + }, + { + "epoch": 1.3957711242030506, + "grad_norm": 0.6912658214569092, + "learning_rate": 8.999872562601308e-06, + "loss": 2.371, + "step": 17295 + }, + { + "epoch": 1.3958518279396337, + "grad_norm": 0.7627947330474854, + "learning_rate": 8.993328362732545e-06, + "loss": 2.4123, + "step": 17296 + }, + { + "epoch": 1.3959325316762166, + "grad_norm": 0.6897323131561279, + "learning_rate": 8.986786430987926e-06, + "loss": 2.4466, + "step": 17297 + }, + { + "epoch": 1.3960132354127996, + "grad_norm": 0.7040663361549377, + "learning_rate": 8.980246767530498e-06, + "loss": 2.4008, + "step": 17298 + }, + { + "epoch": 1.3960939391493827, + "grad_norm": 0.7423021197319031, + "learning_rate": 8.973709372523254e-06, + "loss": 2.421, + "step": 17299 + }, + { + "epoch": 1.3961746428859656, + "grad_norm": 0.7053872346878052, + "learning_rate": 8.967174246129128e-06, + "loss": 2.4217, + "step": 17300 + }, + { + "epoch": 1.3962553466225487, + "grad_norm": 0.7772163152694702, + "learning_rate": 8.960641388510959e-06, + "loss": 2.3686, + "step": 17301 + }, + { + "epoch": 1.3963360503591316, + "grad_norm": 0.7254317402839661, + "learning_rate": 8.954110799831582e-06, + "loss": 2.3974, + "step": 17302 + }, + { + "epoch": 1.3964167540957146, + "grad_norm": 0.6462311744689941, + "learning_rate": 8.94758248025378e-06, + "loss": 2.3506, + "step": 17303 + }, + { + "epoch": 1.3964974578322975, + "grad_norm": 0.693526029586792, + "learning_rate": 8.94105642994023e-06, + "loss": 2.3774, + "step": 17304 + }, + { + "epoch": 1.3965781615688806, + "grad_norm": 0.6220893263816833, + "learning_rate": 8.934532649053585e-06, + "loss": 2.3588, + "step": 17305 + }, + { + "epoch": 1.3966588653054637, + "grad_norm": 0.6866275668144226, + "learning_rate": 8.928011137756443e-06, + "loss": 2.4001, + "step": 17306 + }, + { + "epoch": 1.3967395690420465, + "grad_norm": 0.7290368676185608, + "learning_rate": 8.92149189621132e-06, + "loss": 2.3936, + "step": 17307 + }, + { + "epoch": 1.3968202727786296, + "grad_norm": 0.6699230670928955, + "learning_rate": 8.914974924580688e-06, + "loss": 2.3656, + "step": 17308 + }, + { + "epoch": 1.3969009765152127, + "grad_norm": 0.6863143444061279, + "learning_rate": 8.908460223027016e-06, + "loss": 2.4157, + "step": 17309 + }, + { + "epoch": 1.3969816802517956, + "grad_norm": 0.7856658697128296, + "learning_rate": 8.901947791712594e-06, + "loss": 2.3927, + "step": 17310 + }, + { + "epoch": 1.3970623839883787, + "grad_norm": 0.692934513092041, + "learning_rate": 8.895437630799775e-06, + "loss": 2.4089, + "step": 17311 + }, + { + "epoch": 1.3971430877249618, + "grad_norm": 0.6908941268920898, + "learning_rate": 8.888929740450802e-06, + "loss": 2.3907, + "step": 17312 + }, + { + "epoch": 1.3972237914615446, + "grad_norm": 0.662405788898468, + "learning_rate": 8.88242412082786e-06, + "loss": 2.4287, + "step": 17313 + }, + { + "epoch": 1.3973044951981277, + "grad_norm": 0.6889618635177612, + "learning_rate": 8.875920772093094e-06, + "loss": 2.3815, + "step": 17314 + }, + { + "epoch": 1.3973851989347108, + "grad_norm": 0.6734819412231445, + "learning_rate": 8.869419694408586e-06, + "loss": 2.4046, + "step": 17315 + }, + { + "epoch": 1.3974659026712937, + "grad_norm": 0.6958059668540955, + "learning_rate": 8.862920887936378e-06, + "loss": 2.4449, + "step": 17316 + }, + { + "epoch": 1.3975466064078768, + "grad_norm": 0.6793306469917297, + "learning_rate": 8.856424352838389e-06, + "loss": 2.4023, + "step": 17317 + }, + { + "epoch": 1.3976273101444596, + "grad_norm": 0.6622069478034973, + "learning_rate": 8.84993008927656e-06, + "loss": 2.4098, + "step": 17318 + }, + { + "epoch": 1.3977080138810427, + "grad_norm": 0.6999792456626892, + "learning_rate": 8.843438097412771e-06, + "loss": 2.4205, + "step": 17319 + }, + { + "epoch": 1.3977887176176256, + "grad_norm": 0.693848192691803, + "learning_rate": 8.83694837740876e-06, + "loss": 2.4284, + "step": 17320 + }, + { + "epoch": 1.3978694213542087, + "grad_norm": 0.6813297271728516, + "learning_rate": 8.830460929426299e-06, + "loss": 2.3887, + "step": 17321 + }, + { + "epoch": 1.3979501250907918, + "grad_norm": 0.6795780658721924, + "learning_rate": 8.823975753627079e-06, + "loss": 2.4428, + "step": 17322 + }, + { + "epoch": 1.3980308288273746, + "grad_norm": 0.7395818829536438, + "learning_rate": 8.817492850172703e-06, + "loss": 2.4842, + "step": 17323 + }, + { + "epoch": 1.3981115325639577, + "grad_norm": 0.6772391200065613, + "learning_rate": 8.811012219224778e-06, + "loss": 2.4555, + "step": 17324 + }, + { + "epoch": 1.3981922363005408, + "grad_norm": 0.66059809923172, + "learning_rate": 8.804533860944808e-06, + "loss": 2.3565, + "step": 17325 + }, + { + "epoch": 1.3982729400371237, + "grad_norm": 0.7336263656616211, + "learning_rate": 8.798057775494229e-06, + "loss": 2.4575, + "step": 17326 + }, + { + "epoch": 1.3983536437737067, + "grad_norm": 0.7758119702339172, + "learning_rate": 8.791583963034444e-06, + "loss": 2.4239, + "step": 17327 + }, + { + "epoch": 1.3984343475102898, + "grad_norm": 0.7417536377906799, + "learning_rate": 8.785112423726827e-06, + "loss": 2.4547, + "step": 17328 + }, + { + "epoch": 1.3985150512468727, + "grad_norm": 0.6901140213012695, + "learning_rate": 8.778643157732636e-06, + "loss": 2.4253, + "step": 17329 + }, + { + "epoch": 1.3985957549834558, + "grad_norm": 0.6766345500946045, + "learning_rate": 8.772176165213109e-06, + "loss": 2.4312, + "step": 17330 + }, + { + "epoch": 1.3986764587200389, + "grad_norm": 0.7406117916107178, + "learning_rate": 8.765711446329427e-06, + "loss": 2.4223, + "step": 17331 + }, + { + "epoch": 1.3987571624566217, + "grad_norm": 0.7236598134040833, + "learning_rate": 8.759249001242697e-06, + "loss": 2.4078, + "step": 17332 + }, + { + "epoch": 1.3988378661932048, + "grad_norm": 0.7009963393211365, + "learning_rate": 8.752788830114e-06, + "loss": 2.3573, + "step": 17333 + }, + { + "epoch": 1.3989185699297877, + "grad_norm": 0.7128826975822449, + "learning_rate": 8.746330933104319e-06, + "loss": 2.4039, + "step": 17334 + }, + { + "epoch": 1.3989992736663708, + "grad_norm": 0.6832678914070129, + "learning_rate": 8.739875310374635e-06, + "loss": 2.3917, + "step": 17335 + }, + { + "epoch": 1.3990799774029536, + "grad_norm": 0.6790578961372375, + "learning_rate": 8.733421962085786e-06, + "loss": 2.3908, + "step": 17336 + }, + { + "epoch": 1.3991606811395367, + "grad_norm": 0.7215133905410767, + "learning_rate": 8.726970888398644e-06, + "loss": 2.3494, + "step": 17337 + }, + { + "epoch": 1.3992413848761198, + "grad_norm": 0.677761435508728, + "learning_rate": 8.720522089473992e-06, + "loss": 2.3747, + "step": 17338 + }, + { + "epoch": 1.3993220886127027, + "grad_norm": 0.6423436403274536, + "learning_rate": 8.714075565472513e-06, + "loss": 2.3386, + "step": 17339 + }, + { + "epoch": 1.3994027923492858, + "grad_norm": 0.798370897769928, + "learning_rate": 8.707631316554909e-06, + "loss": 2.3901, + "step": 17340 + }, + { + "epoch": 1.3994834960858689, + "grad_norm": 0.6572564840316772, + "learning_rate": 8.701189342881767e-06, + "loss": 2.4311, + "step": 17341 + }, + { + "epoch": 1.3995641998224517, + "grad_norm": 0.721610426902771, + "learning_rate": 8.694749644613642e-06, + "loss": 2.4158, + "step": 17342 + }, + { + "epoch": 1.3996449035590348, + "grad_norm": 0.8007451891899109, + "learning_rate": 8.688312221911022e-06, + "loss": 2.3931, + "step": 17343 + }, + { + "epoch": 1.399725607295618, + "grad_norm": 0.7181806564331055, + "learning_rate": 8.681877074934363e-06, + "loss": 2.4062, + "step": 17344 + }, + { + "epoch": 1.3998063110322008, + "grad_norm": 0.6630976796150208, + "learning_rate": 8.675444203844053e-06, + "loss": 2.3936, + "step": 17345 + }, + { + "epoch": 1.3998870147687839, + "grad_norm": 0.7093006372451782, + "learning_rate": 8.66901360880038e-06, + "loss": 2.4065, + "step": 17346 + }, + { + "epoch": 1.3999677185053667, + "grad_norm": 0.6685216426849365, + "learning_rate": 8.662585289963621e-06, + "loss": 2.4589, + "step": 17347 + }, + { + "epoch": 1.4000484222419498, + "grad_norm": 0.7227702140808105, + "learning_rate": 8.656159247494023e-06, + "loss": 2.3946, + "step": 17348 + }, + { + "epoch": 1.4001291259785327, + "grad_norm": 0.7459855079650879, + "learning_rate": 8.64973548155169e-06, + "loss": 2.4766, + "step": 17349 + }, + { + "epoch": 1.4002098297151158, + "grad_norm": 0.713190495967865, + "learning_rate": 8.643313992296743e-06, + "loss": 2.3974, + "step": 17350 + }, + { + "epoch": 1.4002905334516988, + "grad_norm": 0.6921802759170532, + "learning_rate": 8.636894779889237e-06, + "loss": 2.4483, + "step": 17351 + }, + { + "epoch": 1.4003712371882817, + "grad_norm": 0.7517138719558716, + "learning_rate": 8.630477844489116e-06, + "loss": 2.402, + "step": 17352 + }, + { + "epoch": 1.4004519409248648, + "grad_norm": 0.728131115436554, + "learning_rate": 8.624063186256326e-06, + "loss": 2.4363, + "step": 17353 + }, + { + "epoch": 1.400532644661448, + "grad_norm": 0.6918095350265503, + "learning_rate": 8.617650805350763e-06, + "loss": 2.4424, + "step": 17354 + }, + { + "epoch": 1.4006133483980308, + "grad_norm": 0.6802886128425598, + "learning_rate": 8.6112407019322e-06, + "loss": 2.4133, + "step": 17355 + }, + { + "epoch": 1.4006940521346138, + "grad_norm": 0.6760320663452148, + "learning_rate": 8.604832876160418e-06, + "loss": 2.4187, + "step": 17356 + }, + { + "epoch": 1.400774755871197, + "grad_norm": 0.7422602772712708, + "learning_rate": 8.598427328195124e-06, + "loss": 2.4051, + "step": 17357 + }, + { + "epoch": 1.4008554596077798, + "grad_norm": 0.7278845906257629, + "learning_rate": 8.592024058195925e-06, + "loss": 2.4256, + "step": 17358 + }, + { + "epoch": 1.4009361633443629, + "grad_norm": 0.7399848699569702, + "learning_rate": 8.585623066322435e-06, + "loss": 2.4045, + "step": 17359 + }, + { + "epoch": 1.401016867080946, + "grad_norm": 0.703372061252594, + "learning_rate": 8.579224352734184e-06, + "loss": 2.404, + "step": 17360 + }, + { + "epoch": 1.4010975708175288, + "grad_norm": 0.6849603056907654, + "learning_rate": 8.572827917590642e-06, + "loss": 2.3808, + "step": 17361 + }, + { + "epoch": 1.401178274554112, + "grad_norm": 0.6907341480255127, + "learning_rate": 8.566433761051207e-06, + "loss": 2.3777, + "step": 17362 + }, + { + "epoch": 1.4012589782906948, + "grad_norm": 0.7436221837997437, + "learning_rate": 8.560041883275261e-06, + "loss": 2.4027, + "step": 17363 + }, + { + "epoch": 1.4013396820272779, + "grad_norm": 0.6975259780883789, + "learning_rate": 8.553652284422088e-06, + "loss": 2.4235, + "step": 17364 + }, + { + "epoch": 1.4014203857638607, + "grad_norm": 0.7692399024963379, + "learning_rate": 8.547264964650948e-06, + "loss": 2.4615, + "step": 17365 + }, + { + "epoch": 1.4015010895004438, + "grad_norm": 0.7096135020256042, + "learning_rate": 8.540879924121025e-06, + "loss": 2.3972, + "step": 17366 + }, + { + "epoch": 1.401581793237027, + "grad_norm": 0.6851587891578674, + "learning_rate": 8.534497162991473e-06, + "loss": 2.3697, + "step": 17367 + }, + { + "epoch": 1.4016624969736098, + "grad_norm": 0.6977655291557312, + "learning_rate": 8.528116681421317e-06, + "loss": 2.4413, + "step": 17368 + }, + { + "epoch": 1.4017432007101929, + "grad_norm": 0.715307354927063, + "learning_rate": 8.521738479569618e-06, + "loss": 2.4006, + "step": 17369 + }, + { + "epoch": 1.401823904446776, + "grad_norm": 0.7282734513282776, + "learning_rate": 8.51536255759533e-06, + "loss": 2.4418, + "step": 17370 + }, + { + "epoch": 1.4019046081833588, + "grad_norm": 0.6996017098426819, + "learning_rate": 8.508988915657334e-06, + "loss": 2.435, + "step": 17371 + }, + { + "epoch": 1.401985311919942, + "grad_norm": 0.7084866762161255, + "learning_rate": 8.502617553914494e-06, + "loss": 2.4314, + "step": 17372 + }, + { + "epoch": 1.402066015656525, + "grad_norm": 0.7217462658882141, + "learning_rate": 8.496248472525603e-06, + "loss": 2.4811, + "step": 17373 + }, + { + "epoch": 1.4021467193931079, + "grad_norm": 0.7414960265159607, + "learning_rate": 8.489881671649391e-06, + "loss": 2.4016, + "step": 17374 + }, + { + "epoch": 1.402227423129691, + "grad_norm": 0.7439210414886475, + "learning_rate": 8.483517151444532e-06, + "loss": 2.4711, + "step": 17375 + }, + { + "epoch": 1.402308126866274, + "grad_norm": 0.7277424335479736, + "learning_rate": 8.477154912069663e-06, + "loss": 2.4095, + "step": 17376 + }, + { + "epoch": 1.402388830602857, + "grad_norm": 0.7506297826766968, + "learning_rate": 8.470794953683347e-06, + "loss": 2.4187, + "step": 17377 + }, + { + "epoch": 1.40246953433944, + "grad_norm": 0.7137917280197144, + "learning_rate": 8.464437276444059e-06, + "loss": 2.4069, + "step": 17378 + }, + { + "epoch": 1.4025502380760229, + "grad_norm": 0.6610304117202759, + "learning_rate": 8.458081880510282e-06, + "loss": 2.4709, + "step": 17379 + }, + { + "epoch": 1.402630941812606, + "grad_norm": 0.7147911190986633, + "learning_rate": 8.451728766040411e-06, + "loss": 2.4147, + "step": 17380 + }, + { + "epoch": 1.4027116455491888, + "grad_norm": 0.7196649312973022, + "learning_rate": 8.445377933192745e-06, + "loss": 2.4611, + "step": 17381 + }, + { + "epoch": 1.402792349285772, + "grad_norm": 0.6550390124320984, + "learning_rate": 8.439029382125596e-06, + "loss": 2.4229, + "step": 17382 + }, + { + "epoch": 1.402873053022355, + "grad_norm": 0.6517959833145142, + "learning_rate": 8.432683112997175e-06, + "loss": 2.421, + "step": 17383 + }, + { + "epoch": 1.4029537567589379, + "grad_norm": 0.6660284399986267, + "learning_rate": 8.426339125965643e-06, + "loss": 2.3918, + "step": 17384 + }, + { + "epoch": 1.403034460495521, + "grad_norm": 0.696163535118103, + "learning_rate": 8.41999742118913e-06, + "loss": 2.4334, + "step": 17385 + }, + { + "epoch": 1.403115164232104, + "grad_norm": 0.7146298885345459, + "learning_rate": 8.413657998825674e-06, + "loss": 2.3984, + "step": 17386 + }, + { + "epoch": 1.403195867968687, + "grad_norm": 0.7084376215934753, + "learning_rate": 8.407320859033262e-06, + "loss": 2.4098, + "step": 17387 + }, + { + "epoch": 1.40327657170527, + "grad_norm": 0.7499445080757141, + "learning_rate": 8.400986001969846e-06, + "loss": 2.4315, + "step": 17388 + }, + { + "epoch": 1.403357275441853, + "grad_norm": 0.6822247505187988, + "learning_rate": 8.394653427793308e-06, + "loss": 2.3816, + "step": 17389 + }, + { + "epoch": 1.403437979178436, + "grad_norm": 0.6859664916992188, + "learning_rate": 8.388323136661458e-06, + "loss": 2.3772, + "step": 17390 + }, + { + "epoch": 1.403518682915019, + "grad_norm": 0.6771109104156494, + "learning_rate": 8.381995128732057e-06, + "loss": 2.4295, + "step": 17391 + }, + { + "epoch": 1.4035993866516019, + "grad_norm": 0.7589800357818604, + "learning_rate": 8.375669404162845e-06, + "loss": 2.3806, + "step": 17392 + }, + { + "epoch": 1.403680090388185, + "grad_norm": 0.665472149848938, + "learning_rate": 8.369345963111453e-06, + "loss": 2.383, + "step": 17393 + }, + { + "epoch": 1.4037607941247678, + "grad_norm": 0.6658698916435242, + "learning_rate": 8.363024805735475e-06, + "loss": 2.3682, + "step": 17394 + }, + { + "epoch": 1.403841497861351, + "grad_norm": 0.7445670366287231, + "learning_rate": 8.356705932192477e-06, + "loss": 2.5224, + "step": 17395 + }, + { + "epoch": 1.403922201597934, + "grad_norm": 0.6812258362770081, + "learning_rate": 8.35038934263993e-06, + "loss": 2.426, + "step": 17396 + }, + { + "epoch": 1.4040029053345169, + "grad_norm": 0.6613782644271851, + "learning_rate": 8.344075037235243e-06, + "loss": 2.3756, + "step": 17397 + }, + { + "epoch": 1.4040836090711, + "grad_norm": 0.6314469575881958, + "learning_rate": 8.337763016135792e-06, + "loss": 2.3703, + "step": 17398 + }, + { + "epoch": 1.404164312807683, + "grad_norm": 0.6611869931221008, + "learning_rate": 8.331453279498914e-06, + "loss": 2.3951, + "step": 17399 + }, + { + "epoch": 1.404245016544266, + "grad_norm": 0.6668544411659241, + "learning_rate": 8.325145827481828e-06, + "loss": 2.4732, + "step": 17400 + }, + { + "epoch": 1.404325720280849, + "grad_norm": 0.7428251504898071, + "learning_rate": 8.318840660241755e-06, + "loss": 2.391, + "step": 17401 + }, + { + "epoch": 1.404406424017432, + "grad_norm": 0.7163440585136414, + "learning_rate": 8.312537777935836e-06, + "loss": 2.4379, + "step": 17402 + }, + { + "epoch": 1.404487127754015, + "grad_norm": 0.7152317762374878, + "learning_rate": 8.306237180721121e-06, + "loss": 2.426, + "step": 17403 + }, + { + "epoch": 1.404567831490598, + "grad_norm": 0.7675083875656128, + "learning_rate": 8.299938868754686e-06, + "loss": 2.4014, + "step": 17404 + }, + { + "epoch": 1.4046485352271811, + "grad_norm": 0.7118947505950928, + "learning_rate": 8.293642842193494e-06, + "loss": 2.3998, + "step": 17405 + }, + { + "epoch": 1.404729238963764, + "grad_norm": 0.713556706905365, + "learning_rate": 8.28734910119442e-06, + "loss": 2.4134, + "step": 17406 + }, + { + "epoch": 1.404809942700347, + "grad_norm": 0.7631849646568298, + "learning_rate": 8.281057645914359e-06, + "loss": 2.4866, + "step": 17407 + }, + { + "epoch": 1.40489064643693, + "grad_norm": 0.7348508834838867, + "learning_rate": 8.274768476510087e-06, + "loss": 2.4067, + "step": 17408 + }, + { + "epoch": 1.404971350173513, + "grad_norm": 0.7371857762336731, + "learning_rate": 8.268481593138377e-06, + "loss": 2.429, + "step": 17409 + }, + { + "epoch": 1.405052053910096, + "grad_norm": 0.674980640411377, + "learning_rate": 8.262196995955874e-06, + "loss": 2.3897, + "step": 17410 + }, + { + "epoch": 1.405132757646679, + "grad_norm": 0.6975973844528198, + "learning_rate": 8.255914685119237e-06, + "loss": 2.445, + "step": 17411 + }, + { + "epoch": 1.405213461383262, + "grad_norm": 0.6854067444801331, + "learning_rate": 8.249634660785033e-06, + "loss": 2.3528, + "step": 17412 + }, + { + "epoch": 1.405294165119845, + "grad_norm": 0.6678418517112732, + "learning_rate": 8.243356923109768e-06, + "loss": 2.4078, + "step": 17413 + }, + { + "epoch": 1.405374868856428, + "grad_norm": 0.6600239276885986, + "learning_rate": 8.237081472249885e-06, + "loss": 2.3719, + "step": 17414 + }, + { + "epoch": 1.4054555725930111, + "grad_norm": 0.7209253907203674, + "learning_rate": 8.230808308361815e-06, + "loss": 2.4203, + "step": 17415 + }, + { + "epoch": 1.405536276329594, + "grad_norm": 0.6849339604377747, + "learning_rate": 8.224537431601886e-06, + "loss": 2.3898, + "step": 17416 + }, + { + "epoch": 1.405616980066177, + "grad_norm": 0.718558132648468, + "learning_rate": 8.218268842126387e-06, + "loss": 2.4063, + "step": 17417 + }, + { + "epoch": 1.4056976838027602, + "grad_norm": 0.7118551731109619, + "learning_rate": 8.212002540091567e-06, + "loss": 2.3942, + "step": 17418 + }, + { + "epoch": 1.405778387539343, + "grad_norm": 0.7138789892196655, + "learning_rate": 8.205738525653562e-06, + "loss": 2.4614, + "step": 17419 + }, + { + "epoch": 1.4058590912759261, + "grad_norm": 0.7254295349121094, + "learning_rate": 8.199476798968508e-06, + "loss": 2.4126, + "step": 17420 + }, + { + "epoch": 1.4059397950125092, + "grad_norm": 0.691965639591217, + "learning_rate": 8.193217360192473e-06, + "loss": 2.4233, + "step": 17421 + }, + { + "epoch": 1.406020498749092, + "grad_norm": 0.7132619619369507, + "learning_rate": 8.186960209481431e-06, + "loss": 2.3764, + "step": 17422 + }, + { + "epoch": 1.4061012024856752, + "grad_norm": 0.6838160753250122, + "learning_rate": 8.180705346991346e-06, + "loss": 2.3927, + "step": 17423 + }, + { + "epoch": 1.406181906222258, + "grad_norm": 0.6755721569061279, + "learning_rate": 8.174452772878094e-06, + "loss": 2.435, + "step": 17424 + }, + { + "epoch": 1.4062626099588411, + "grad_norm": 0.774718701839447, + "learning_rate": 8.168202487297527e-06, + "loss": 2.4811, + "step": 17425 + }, + { + "epoch": 1.406343313695424, + "grad_norm": 0.6601200699806213, + "learning_rate": 8.161954490405388e-06, + "loss": 2.3494, + "step": 17426 + }, + { + "epoch": 1.406424017432007, + "grad_norm": 0.6854710578918457, + "learning_rate": 8.155708782357419e-06, + "loss": 2.4214, + "step": 17427 + }, + { + "epoch": 1.4065047211685902, + "grad_norm": 0.7471936345100403, + "learning_rate": 8.149465363309294e-06, + "loss": 2.3702, + "step": 17428 + }, + { + "epoch": 1.406585424905173, + "grad_norm": 0.7129673957824707, + "learning_rate": 8.143224233416569e-06, + "loss": 2.4078, + "step": 17429 + }, + { + "epoch": 1.406666128641756, + "grad_norm": 0.7168975472450256, + "learning_rate": 8.136985392834807e-06, + "loss": 2.4265, + "step": 17430 + }, + { + "epoch": 1.4067468323783392, + "grad_norm": 0.709699809551239, + "learning_rate": 8.130748841719526e-06, + "loss": 2.4069, + "step": 17431 + }, + { + "epoch": 1.406827536114922, + "grad_norm": 0.7571663856506348, + "learning_rate": 8.124514580226105e-06, + "loss": 2.3949, + "step": 17432 + }, + { + "epoch": 1.4069082398515051, + "grad_norm": 0.6844212412834167, + "learning_rate": 8.118282608509952e-06, + "loss": 2.4156, + "step": 17433 + }, + { + "epoch": 1.4069889435880882, + "grad_norm": 0.6632293462753296, + "learning_rate": 8.112052926726376e-06, + "loss": 2.3973, + "step": 17434 + }, + { + "epoch": 1.407069647324671, + "grad_norm": 0.6375966668128967, + "learning_rate": 8.105825535030643e-06, + "loss": 2.4168, + "step": 17435 + }, + { + "epoch": 1.4071503510612542, + "grad_norm": 0.6997824907302856, + "learning_rate": 8.099600433577947e-06, + "loss": 2.3279, + "step": 17436 + }, + { + "epoch": 1.4072310547978373, + "grad_norm": 0.7491862177848816, + "learning_rate": 8.093377622523458e-06, + "loss": 2.403, + "step": 17437 + }, + { + "epoch": 1.4073117585344201, + "grad_norm": 0.6938888430595398, + "learning_rate": 8.087157102022235e-06, + "loss": 2.3965, + "step": 17438 + }, + { + "epoch": 1.4073924622710032, + "grad_norm": 0.708043098449707, + "learning_rate": 8.080938872229304e-06, + "loss": 2.4429, + "step": 17439 + }, + { + "epoch": 1.407473166007586, + "grad_norm": 0.6587165594100952, + "learning_rate": 8.074722933299673e-06, + "loss": 2.3951, + "step": 17440 + }, + { + "epoch": 1.4075538697441692, + "grad_norm": 0.6987459659576416, + "learning_rate": 8.068509285388248e-06, + "loss": 2.41, + "step": 17441 + }, + { + "epoch": 1.407634573480752, + "grad_norm": 0.6864002346992493, + "learning_rate": 8.062297928649865e-06, + "loss": 2.3867, + "step": 17442 + }, + { + "epoch": 1.4077152772173351, + "grad_norm": 0.6478279829025269, + "learning_rate": 8.056088863239342e-06, + "loss": 2.391, + "step": 17443 + }, + { + "epoch": 1.4077959809539182, + "grad_norm": 0.658235490322113, + "learning_rate": 8.049882089311433e-06, + "loss": 2.3646, + "step": 17444 + }, + { + "epoch": 1.407876684690501, + "grad_norm": 0.6664391160011292, + "learning_rate": 8.043677607020828e-06, + "loss": 2.4101, + "step": 17445 + }, + { + "epoch": 1.4079573884270842, + "grad_norm": 0.6662336587905884, + "learning_rate": 8.037475416522144e-06, + "loss": 2.4461, + "step": 17446 + }, + { + "epoch": 1.4080380921636673, + "grad_norm": 0.6629661321640015, + "learning_rate": 8.031275517969982e-06, + "loss": 2.4191, + "step": 17447 + }, + { + "epoch": 1.4081187959002501, + "grad_norm": 0.6586340665817261, + "learning_rate": 8.02507791151883e-06, + "loss": 2.4213, + "step": 17448 + }, + { + "epoch": 1.4081994996368332, + "grad_norm": 0.692555844783783, + "learning_rate": 8.018882597323163e-06, + "loss": 2.4148, + "step": 17449 + }, + { + "epoch": 1.4082802033734163, + "grad_norm": 0.6890958547592163, + "learning_rate": 8.012689575537402e-06, + "loss": 2.4121, + "step": 17450 + }, + { + "epoch": 1.4083609071099992, + "grad_norm": 0.7425588965415955, + "learning_rate": 8.006498846315846e-06, + "loss": 2.4426, + "step": 17451 + }, + { + "epoch": 1.4084416108465823, + "grad_norm": 0.6801562309265137, + "learning_rate": 8.000310409812828e-06, + "loss": 2.3786, + "step": 17452 + }, + { + "epoch": 1.4085223145831651, + "grad_norm": 0.7273206114768982, + "learning_rate": 7.994124266182568e-06, + "loss": 2.3635, + "step": 17453 + }, + { + "epoch": 1.4086030183197482, + "grad_norm": 0.6684201955795288, + "learning_rate": 7.987940415579209e-06, + "loss": 2.4565, + "step": 17454 + }, + { + "epoch": 1.408683722056331, + "grad_norm": 0.7803860902786255, + "learning_rate": 7.981758858156908e-06, + "loss": 2.3957, + "step": 17455 + }, + { + "epoch": 1.4087644257929142, + "grad_norm": 0.7033873200416565, + "learning_rate": 7.975579594069727e-06, + "loss": 2.3273, + "step": 17456 + }, + { + "epoch": 1.4088451295294973, + "grad_norm": 0.7338894009590149, + "learning_rate": 7.969402623471656e-06, + "loss": 2.4657, + "step": 17457 + }, + { + "epoch": 1.4089258332660801, + "grad_norm": 0.6912354230880737, + "learning_rate": 7.963227946516637e-06, + "loss": 2.4329, + "step": 17458 + }, + { + "epoch": 1.4090065370026632, + "grad_norm": 0.7227259278297424, + "learning_rate": 7.957055563358561e-06, + "loss": 2.4043, + "step": 17459 + }, + { + "epoch": 1.4090872407392463, + "grad_norm": 0.7320930361747742, + "learning_rate": 7.950885474151281e-06, + "loss": 2.3889, + "step": 17460 + }, + { + "epoch": 1.4091679444758292, + "grad_norm": 0.6754814982414246, + "learning_rate": 7.944717679048542e-06, + "loss": 2.4199, + "step": 17461 + }, + { + "epoch": 1.4092486482124122, + "grad_norm": 0.6574978828430176, + "learning_rate": 7.938552178204061e-06, + "loss": 2.3846, + "step": 17462 + }, + { + "epoch": 1.4093293519489953, + "grad_norm": 0.6976850628852844, + "learning_rate": 7.932388971771543e-06, + "loss": 2.4647, + "step": 17463 + }, + { + "epoch": 1.4094100556855782, + "grad_norm": 0.7376202344894409, + "learning_rate": 7.926228059904529e-06, + "loss": 2.4279, + "step": 17464 + }, + { + "epoch": 1.4094907594221613, + "grad_norm": 0.6907104253768921, + "learning_rate": 7.920069442756584e-06, + "loss": 2.4238, + "step": 17465 + }, + { + "epoch": 1.4095714631587444, + "grad_norm": 0.7079440951347351, + "learning_rate": 7.913913120481243e-06, + "loss": 2.4173, + "step": 17466 + }, + { + "epoch": 1.4096521668953272, + "grad_norm": 0.7188387513160706, + "learning_rate": 7.907759093231882e-06, + "loss": 2.4134, + "step": 17467 + }, + { + "epoch": 1.4097328706319103, + "grad_norm": 0.6877745389938354, + "learning_rate": 7.901607361161889e-06, + "loss": 2.4098, + "step": 17468 + }, + { + "epoch": 1.4098135743684932, + "grad_norm": 0.6914156079292297, + "learning_rate": 7.8954579244246e-06, + "loss": 2.4244, + "step": 17469 + }, + { + "epoch": 1.4098942781050763, + "grad_norm": 0.6616036295890808, + "learning_rate": 7.889310783173277e-06, + "loss": 2.4617, + "step": 17470 + }, + { + "epoch": 1.4099749818416591, + "grad_norm": 0.7090594172477722, + "learning_rate": 7.883165937561088e-06, + "loss": 2.4234, + "step": 17471 + }, + { + "epoch": 1.4100556855782422, + "grad_norm": 0.7596384286880493, + "learning_rate": 7.8770233877412e-06, + "loss": 2.39, + "step": 17472 + }, + { + "epoch": 1.4101363893148253, + "grad_norm": 0.7311475872993469, + "learning_rate": 7.870883133866725e-06, + "loss": 2.418, + "step": 17473 + }, + { + "epoch": 1.4102170930514082, + "grad_norm": 0.6628947854042053, + "learning_rate": 7.86474517609065e-06, + "loss": 2.4177, + "step": 17474 + }, + { + "epoch": 1.4102977967879913, + "grad_norm": 0.7169137597084045, + "learning_rate": 7.858609514565974e-06, + "loss": 2.4359, + "step": 17475 + }, + { + "epoch": 1.4103785005245744, + "grad_norm": 0.7364529371261597, + "learning_rate": 7.852476149445598e-06, + "loss": 2.45, + "step": 17476 + }, + { + "epoch": 1.4104592042611572, + "grad_norm": 0.7494707703590393, + "learning_rate": 7.8463450808824e-06, + "loss": 2.403, + "step": 17477 + }, + { + "epoch": 1.4105399079977403, + "grad_norm": 0.6723065376281738, + "learning_rate": 7.84021630902917e-06, + "loss": 2.4089, + "step": 17478 + }, + { + "epoch": 1.4106206117343234, + "grad_norm": 0.7032917141914368, + "learning_rate": 7.83408983403867e-06, + "loss": 2.4285, + "step": 17479 + }, + { + "epoch": 1.4107013154709063, + "grad_norm": 0.6634184718132019, + "learning_rate": 7.827965656063573e-06, + "loss": 2.3701, + "step": 17480 + }, + { + "epoch": 1.4107820192074894, + "grad_norm": 0.6645818948745728, + "learning_rate": 7.821843775256498e-06, + "loss": 2.3891, + "step": 17481 + }, + { + "epoch": 1.4108627229440724, + "grad_norm": 0.6750596165657043, + "learning_rate": 7.815724191770058e-06, + "loss": 2.4043, + "step": 17482 + }, + { + "epoch": 1.4109434266806553, + "grad_norm": 0.7519060969352722, + "learning_rate": 7.809606905756727e-06, + "loss": 2.4287, + "step": 17483 + }, + { + "epoch": 1.4110241304172384, + "grad_norm": 0.69886714220047, + "learning_rate": 7.803491917368977e-06, + "loss": 2.4565, + "step": 17484 + }, + { + "epoch": 1.4111048341538213, + "grad_norm": 0.6600854992866516, + "learning_rate": 7.797379226759216e-06, + "loss": 2.3743, + "step": 17485 + }, + { + "epoch": 1.4111855378904044, + "grad_norm": 0.65254807472229, + "learning_rate": 7.791268834079779e-06, + "loss": 2.435, + "step": 17486 + }, + { + "epoch": 1.4112662416269872, + "grad_norm": 0.6900071501731873, + "learning_rate": 7.785160739482955e-06, + "loss": 2.4073, + "step": 17487 + }, + { + "epoch": 1.4113469453635703, + "grad_norm": 0.6831900477409363, + "learning_rate": 7.779054943120989e-06, + "loss": 2.4325, + "step": 17488 + }, + { + "epoch": 1.4114276491001534, + "grad_norm": 0.7446292042732239, + "learning_rate": 7.772951445146049e-06, + "loss": 2.4693, + "step": 17489 + }, + { + "epoch": 1.4115083528367363, + "grad_norm": 0.6620200872421265, + "learning_rate": 7.766850245710233e-06, + "loss": 2.4345, + "step": 17490 + }, + { + "epoch": 1.4115890565733193, + "grad_norm": 0.7509312629699707, + "learning_rate": 7.76075134496561e-06, + "loss": 2.3596, + "step": 17491 + }, + { + "epoch": 1.4116697603099024, + "grad_norm": 0.7003920078277588, + "learning_rate": 7.754654743064194e-06, + "loss": 2.4016, + "step": 17492 + }, + { + "epoch": 1.4117504640464853, + "grad_norm": 0.6603164076805115, + "learning_rate": 7.748560440157892e-06, + "loss": 2.4031, + "step": 17493 + }, + { + "epoch": 1.4118311677830684, + "grad_norm": 0.7125976085662842, + "learning_rate": 7.742468436398608e-06, + "loss": 2.4199, + "step": 17494 + }, + { + "epoch": 1.4119118715196515, + "grad_norm": 0.7279991507530212, + "learning_rate": 7.736378731938187e-06, + "loss": 2.4263, + "step": 17495 + }, + { + "epoch": 1.4119925752562343, + "grad_norm": 0.7445220351219177, + "learning_rate": 7.730291326928385e-06, + "loss": 2.4256, + "step": 17496 + }, + { + "epoch": 1.4120732789928174, + "grad_norm": 0.7625001072883606, + "learning_rate": 7.724206221520913e-06, + "loss": 2.4307, + "step": 17497 + }, + { + "epoch": 1.4121539827294003, + "grad_norm": 0.7109429240226746, + "learning_rate": 7.71812341586745e-06, + "loss": 2.4157, + "step": 17498 + }, + { + "epoch": 1.4122346864659834, + "grad_norm": 0.7360411882400513, + "learning_rate": 7.712042910119566e-06, + "loss": 2.3855, + "step": 17499 + }, + { + "epoch": 1.4123153902025662, + "grad_norm": 0.6878146529197693, + "learning_rate": 7.705964704428815e-06, + "loss": 2.4059, + "step": 17500 + }, + { + "epoch": 1.4123960939391493, + "grad_norm": 0.7399710416793823, + "learning_rate": 7.699888798946674e-06, + "loss": 2.4234, + "step": 17501 + }, + { + "epoch": 1.4124767976757324, + "grad_norm": 0.6825466156005859, + "learning_rate": 7.693815193824605e-06, + "loss": 2.4428, + "step": 17502 + }, + { + "epoch": 1.4125575014123153, + "grad_norm": 0.6567744016647339, + "learning_rate": 7.687743889213938e-06, + "loss": 2.3609, + "step": 17503 + }, + { + "epoch": 1.4126382051488984, + "grad_norm": 0.7361522316932678, + "learning_rate": 7.681674885265989e-06, + "loss": 2.4006, + "step": 17504 + }, + { + "epoch": 1.4127189088854815, + "grad_norm": 0.7350279688835144, + "learning_rate": 7.675608182132033e-06, + "loss": 2.4395, + "step": 17505 + }, + { + "epoch": 1.4127996126220643, + "grad_norm": 0.6630931496620178, + "learning_rate": 7.669543779963262e-06, + "loss": 2.4451, + "step": 17506 + }, + { + "epoch": 1.4128803163586474, + "grad_norm": 0.6845518350601196, + "learning_rate": 7.6634816789108e-06, + "loss": 2.436, + "step": 17507 + }, + { + "epoch": 1.4129610200952305, + "grad_norm": 0.6736167073249817, + "learning_rate": 7.657421879125782e-06, + "loss": 2.3628, + "step": 17508 + }, + { + "epoch": 1.4130417238318134, + "grad_norm": 0.6932296752929688, + "learning_rate": 7.651364380759163e-06, + "loss": 2.4353, + "step": 17509 + }, + { + "epoch": 1.4131224275683965, + "grad_norm": 0.7034411430358887, + "learning_rate": 7.645309183961947e-06, + "loss": 2.3853, + "step": 17510 + }, + { + "epoch": 1.4132031313049795, + "grad_norm": 0.6912705898284912, + "learning_rate": 7.639256288885065e-06, + "loss": 2.2978, + "step": 17511 + }, + { + "epoch": 1.4132838350415624, + "grad_norm": 0.6716031432151794, + "learning_rate": 7.633205695679336e-06, + "loss": 2.3602, + "step": 17512 + }, + { + "epoch": 1.4133645387781455, + "grad_norm": 0.707477331161499, + "learning_rate": 7.6271574044955664e-06, + "loss": 2.434, + "step": 17513 + }, + { + "epoch": 1.4134452425147284, + "grad_norm": 0.7031993269920349, + "learning_rate": 7.621111415484517e-06, + "loss": 2.3718, + "step": 17514 + }, + { + "epoch": 1.4135259462513114, + "grad_norm": 0.6708939671516418, + "learning_rate": 7.615067728796832e-06, + "loss": 2.4218, + "step": 17515 + }, + { + "epoch": 1.4136066499878943, + "grad_norm": 0.7508932948112488, + "learning_rate": 7.609026344583148e-06, + "loss": 2.4273, + "step": 17516 + }, + { + "epoch": 1.4136873537244774, + "grad_norm": 0.6981049180030823, + "learning_rate": 7.602987262994055e-06, + "loss": 2.3941, + "step": 17517 + }, + { + "epoch": 1.4137680574610605, + "grad_norm": 0.7662717700004578, + "learning_rate": 7.5969504841800544e-06, + "loss": 2.3875, + "step": 17518 + }, + { + "epoch": 1.4138487611976434, + "grad_norm": 0.688423752784729, + "learning_rate": 7.590916008291582e-06, + "loss": 2.4091, + "step": 17519 + }, + { + "epoch": 1.4139294649342264, + "grad_norm": 0.6867286562919617, + "learning_rate": 7.584883835479039e-06, + "loss": 2.3983, + "step": 17520 + }, + { + "epoch": 1.4140101686708095, + "grad_norm": 0.7491776943206787, + "learning_rate": 7.578853965892785e-06, + "loss": 2.4151, + "step": 17521 + }, + { + "epoch": 1.4140908724073924, + "grad_norm": 0.6946732997894287, + "learning_rate": 7.572826399683064e-06, + "loss": 2.4196, + "step": 17522 + }, + { + "epoch": 1.4141715761439755, + "grad_norm": 0.6638106107711792, + "learning_rate": 7.566801137000123e-06, + "loss": 2.441, + "step": 17523 + }, + { + "epoch": 1.4142522798805586, + "grad_norm": 0.7190408110618591, + "learning_rate": 7.5607781779941325e-06, + "loss": 2.4026, + "step": 17524 + }, + { + "epoch": 1.4143329836171414, + "grad_norm": 0.708963930606842, + "learning_rate": 7.55475752281517e-06, + "loss": 2.3842, + "step": 17525 + }, + { + "epoch": 1.4144136873537245, + "grad_norm": 0.6763237118721008, + "learning_rate": 7.548739171613306e-06, + "loss": 2.4259, + "step": 17526 + }, + { + "epoch": 1.4144943910903076, + "grad_norm": 0.7374435067176819, + "learning_rate": 7.542723124538531e-06, + "loss": 2.4603, + "step": 17527 + }, + { + "epoch": 1.4145750948268905, + "grad_norm": 0.7165411114692688, + "learning_rate": 7.5367093817407805e-06, + "loss": 2.4103, + "step": 17528 + }, + { + "epoch": 1.4146557985634736, + "grad_norm": 0.7794588804244995, + "learning_rate": 7.530697943369935e-06, + "loss": 2.3912, + "step": 17529 + }, + { + "epoch": 1.4147365023000564, + "grad_norm": 0.691405713558197, + "learning_rate": 7.5246888095758305e-06, + "loss": 2.4357, + "step": 17530 + }, + { + "epoch": 1.4148172060366395, + "grad_norm": 0.6955364346504211, + "learning_rate": 7.518681980508191e-06, + "loss": 2.3645, + "step": 17531 + }, + { + "epoch": 1.4148979097732224, + "grad_norm": 0.6848856210708618, + "learning_rate": 7.512677456316753e-06, + "loss": 2.4145, + "step": 17532 + }, + { + "epoch": 1.4149786135098055, + "grad_norm": 0.668624997138977, + "learning_rate": 7.506675237151151e-06, + "loss": 2.4367, + "step": 17533 + }, + { + "epoch": 1.4150593172463886, + "grad_norm": 0.7547643780708313, + "learning_rate": 7.50067532316101e-06, + "loss": 2.437, + "step": 17534 + }, + { + "epoch": 1.4151400209829714, + "grad_norm": 0.6710182428359985, + "learning_rate": 7.494677714495812e-06, + "loss": 2.3596, + "step": 17535 + }, + { + "epoch": 1.4152207247195545, + "grad_norm": 0.7603517770767212, + "learning_rate": 7.488682411305048e-06, + "loss": 2.4277, + "step": 17536 + }, + { + "epoch": 1.4153014284561376, + "grad_norm": 0.7142195105552673, + "learning_rate": 7.482689413738153e-06, + "loss": 2.386, + "step": 17537 + }, + { + "epoch": 1.4153821321927205, + "grad_norm": 0.6910836100578308, + "learning_rate": 7.4766987219444865e-06, + "loss": 2.4394, + "step": 17538 + }, + { + "epoch": 1.4154628359293036, + "grad_norm": 0.7568751573562622, + "learning_rate": 7.470710336073339e-06, + "loss": 2.4621, + "step": 17539 + }, + { + "epoch": 1.4155435396658866, + "grad_norm": 0.7378259301185608, + "learning_rate": 7.46472425627398e-06, + "loss": 2.3677, + "step": 17540 + }, + { + "epoch": 1.4156242434024695, + "grad_norm": 0.7365754842758179, + "learning_rate": 7.458740482695569e-06, + "loss": 2.3881, + "step": 17541 + }, + { + "epoch": 1.4157049471390526, + "grad_norm": 0.6753227114677429, + "learning_rate": 7.452759015487254e-06, + "loss": 2.3997, + "step": 17542 + }, + { + "epoch": 1.4157856508756355, + "grad_norm": 0.6384701728820801, + "learning_rate": 7.446779854798114e-06, + "loss": 2.4029, + "step": 17543 + }, + { + "epoch": 1.4158663546122185, + "grad_norm": 0.6766810417175293, + "learning_rate": 7.4408030007771416e-06, + "loss": 2.4083, + "step": 17544 + }, + { + "epoch": 1.4159470583488014, + "grad_norm": 0.6948650479316711, + "learning_rate": 7.434828453573317e-06, + "loss": 2.3521, + "step": 17545 + }, + { + "epoch": 1.4160277620853845, + "grad_norm": 0.7690626978874207, + "learning_rate": 7.428856213335533e-06, + "loss": 2.4318, + "step": 17546 + }, + { + "epoch": 1.4161084658219676, + "grad_norm": 0.7151117920875549, + "learning_rate": 7.422886280212626e-06, + "loss": 2.4261, + "step": 17547 + }, + { + "epoch": 1.4161891695585505, + "grad_norm": 0.6966549754142761, + "learning_rate": 7.4169186543534e-06, + "loss": 2.4112, + "step": 17548 + }, + { + "epoch": 1.4162698732951335, + "grad_norm": 0.6930578947067261, + "learning_rate": 7.410953335906578e-06, + "loss": 2.4155, + "step": 17549 + }, + { + "epoch": 1.4163505770317166, + "grad_norm": 0.7319084405899048, + "learning_rate": 7.404990325020844e-06, + "loss": 2.4015, + "step": 17550 + }, + { + "epoch": 1.4164312807682995, + "grad_norm": 0.6913621425628662, + "learning_rate": 7.399029621844778e-06, + "loss": 2.4474, + "step": 17551 + }, + { + "epoch": 1.4165119845048826, + "grad_norm": 0.7726523280143738, + "learning_rate": 7.3930712265269595e-06, + "loss": 2.4815, + "step": 17552 + }, + { + "epoch": 1.4165926882414657, + "grad_norm": 0.6549103856086731, + "learning_rate": 7.387115139215894e-06, + "loss": 2.378, + "step": 17553 + }, + { + "epoch": 1.4166733919780485, + "grad_norm": 0.6902545094490051, + "learning_rate": 7.381161360059996e-06, + "loss": 2.3993, + "step": 17554 + }, + { + "epoch": 1.4167540957146316, + "grad_norm": 0.6871094107627869, + "learning_rate": 7.375209889207668e-06, + "loss": 2.4211, + "step": 17555 + }, + { + "epoch": 1.4168347994512147, + "grad_norm": 0.7043696641921997, + "learning_rate": 7.369260726807226e-06, + "loss": 2.4395, + "step": 17556 + }, + { + "epoch": 1.4169155031877976, + "grad_norm": 0.6889273524284363, + "learning_rate": 7.363313873006949e-06, + "loss": 2.4014, + "step": 17557 + }, + { + "epoch": 1.4169962069243807, + "grad_norm": 0.6670657992362976, + "learning_rate": 7.3573693279550545e-06, + "loss": 2.3943, + "step": 17558 + }, + { + "epoch": 1.4170769106609635, + "grad_norm": 0.7316192984580994, + "learning_rate": 7.3514270917996895e-06, + "loss": 2.3763, + "step": 17559 + }, + { + "epoch": 1.4171576143975466, + "grad_norm": 0.6922768950462341, + "learning_rate": 7.345487164688947e-06, + "loss": 2.4102, + "step": 17560 + }, + { + "epoch": 1.4172383181341295, + "grad_norm": 0.7255418300628662, + "learning_rate": 7.339549546770852e-06, + "loss": 2.4874, + "step": 17561 + }, + { + "epoch": 1.4173190218707126, + "grad_norm": 0.7474549412727356, + "learning_rate": 7.3336142381934206e-06, + "loss": 2.4817, + "step": 17562 + }, + { + "epoch": 1.4173997256072957, + "grad_norm": 0.6574866771697998, + "learning_rate": 7.327681239104534e-06, + "loss": 2.4504, + "step": 17563 + }, + { + "epoch": 1.4174804293438785, + "grad_norm": 0.751109778881073, + "learning_rate": 7.321750549652084e-06, + "loss": 2.482, + "step": 17564 + }, + { + "epoch": 1.4175611330804616, + "grad_norm": 0.6917319297790527, + "learning_rate": 7.315822169983866e-06, + "loss": 2.426, + "step": 17565 + }, + { + "epoch": 1.4176418368170447, + "grad_norm": 0.7236911058425903, + "learning_rate": 7.309896100247671e-06, + "loss": 2.4222, + "step": 17566 + }, + { + "epoch": 1.4177225405536276, + "grad_norm": 0.7382739186286926, + "learning_rate": 7.3039723405911145e-06, + "loss": 2.4673, + "step": 17567 + }, + { + "epoch": 1.4178032442902107, + "grad_norm": 0.6394448280334473, + "learning_rate": 7.2980508911618895e-06, + "loss": 2.4301, + "step": 17568 + }, + { + "epoch": 1.4178839480267937, + "grad_norm": 0.7402171492576599, + "learning_rate": 7.292131752107589e-06, + "loss": 2.4345, + "step": 17569 + }, + { + "epoch": 1.4179646517633766, + "grad_norm": 0.6540209054946899, + "learning_rate": 7.286214923575685e-06, + "loss": 2.4025, + "step": 17570 + }, + { + "epoch": 1.4180453554999597, + "grad_norm": 0.7361408472061157, + "learning_rate": 7.280300405713658e-06, + "loss": 2.4383, + "step": 17571 + }, + { + "epoch": 1.4181260592365428, + "grad_norm": 0.7483302354812622, + "learning_rate": 7.274388198668936e-06, + "loss": 2.3909, + "step": 17572 + }, + { + "epoch": 1.4182067629731256, + "grad_norm": 0.7666492462158203, + "learning_rate": 7.268478302588833e-06, + "loss": 2.3646, + "step": 17573 + }, + { + "epoch": 1.4182874667097087, + "grad_norm": 0.7461634278297424, + "learning_rate": 7.262570717620642e-06, + "loss": 2.4247, + "step": 17574 + }, + { + "epoch": 1.4183681704462916, + "grad_norm": 0.6593511700630188, + "learning_rate": 7.256665443911637e-06, + "loss": 2.4373, + "step": 17575 + }, + { + "epoch": 1.4184488741828747, + "grad_norm": 0.6628448963165283, + "learning_rate": 7.250762481608941e-06, + "loss": 2.4028, + "step": 17576 + }, + { + "epoch": 1.4185295779194576, + "grad_norm": 0.7371554970741272, + "learning_rate": 7.244861830859695e-06, + "loss": 2.3893, + "step": 17577 + }, + { + "epoch": 1.4186102816560406, + "grad_norm": 0.6896550059318542, + "learning_rate": 7.238963491810935e-06, + "loss": 2.4039, + "step": 17578 + }, + { + "epoch": 1.4186909853926237, + "grad_norm": 0.6840630173683167, + "learning_rate": 7.233067464609722e-06, + "loss": 2.3658, + "step": 17579 + }, + { + "epoch": 1.4187716891292066, + "grad_norm": 0.7413774728775024, + "learning_rate": 7.227173749402949e-06, + "loss": 2.4429, + "step": 17580 + }, + { + "epoch": 1.4188523928657897, + "grad_norm": 0.7088857889175415, + "learning_rate": 7.22128234633751e-06, + "loss": 2.4487, + "step": 17581 + }, + { + "epoch": 1.4189330966023728, + "grad_norm": 0.7451753616333008, + "learning_rate": 7.215393255560265e-06, + "loss": 2.43, + "step": 17582 + }, + { + "epoch": 1.4190138003389556, + "grad_norm": 0.7113354802131653, + "learning_rate": 7.209506477217942e-06, + "loss": 2.4079, + "step": 17583 + }, + { + "epoch": 1.4190945040755387, + "grad_norm": 0.6877462863922119, + "learning_rate": 7.203622011457268e-06, + "loss": 2.4638, + "step": 17584 + }, + { + "epoch": 1.4191752078121218, + "grad_norm": 0.6908687353134155, + "learning_rate": 7.1977398584249345e-06, + "loss": 2.4117, + "step": 17585 + }, + { + "epoch": 1.4192559115487047, + "grad_norm": 0.7053657174110413, + "learning_rate": 7.191860018267482e-06, + "loss": 2.4128, + "step": 17586 + }, + { + "epoch": 1.4193366152852878, + "grad_norm": 0.6886352896690369, + "learning_rate": 7.185982491131493e-06, + "loss": 2.4201, + "step": 17587 + }, + { + "epoch": 1.4194173190218708, + "grad_norm": 0.7148453593254089, + "learning_rate": 7.180107277163428e-06, + "loss": 2.456, + "step": 17588 + }, + { + "epoch": 1.4194980227584537, + "grad_norm": 0.7405968904495239, + "learning_rate": 7.174234376509725e-06, + "loss": 2.371, + "step": 17589 + }, + { + "epoch": 1.4195787264950368, + "grad_norm": 0.6733896136283875, + "learning_rate": 7.168363789316757e-06, + "loss": 2.439, + "step": 17590 + }, + { + "epoch": 1.4196594302316197, + "grad_norm": 0.7196522355079651, + "learning_rate": 7.162495515730838e-06, + "loss": 2.4666, + "step": 17591 + }, + { + "epoch": 1.4197401339682028, + "grad_norm": 0.7885043025016785, + "learning_rate": 7.156629555898198e-06, + "loss": 2.3704, + "step": 17592 + }, + { + "epoch": 1.4198208377047856, + "grad_norm": 0.7290148735046387, + "learning_rate": 7.15076590996504e-06, + "loss": 2.4693, + "step": 17593 + }, + { + "epoch": 1.4199015414413687, + "grad_norm": 0.7527376413345337, + "learning_rate": 7.144904578077505e-06, + "loss": 2.5135, + "step": 17594 + }, + { + "epoch": 1.4199822451779518, + "grad_norm": 0.740208625793457, + "learning_rate": 7.139045560381697e-06, + "loss": 2.4153, + "step": 17595 + }, + { + "epoch": 1.4200629489145347, + "grad_norm": 0.7285439968109131, + "learning_rate": 7.133188857023599e-06, + "loss": 2.391, + "step": 17596 + }, + { + "epoch": 1.4201436526511177, + "grad_norm": 0.6705127358436584, + "learning_rate": 7.1273344681491824e-06, + "loss": 2.4037, + "step": 17597 + }, + { + "epoch": 1.4202243563877008, + "grad_norm": 0.7113380432128906, + "learning_rate": 7.121482393904366e-06, + "loss": 2.4395, + "step": 17598 + }, + { + "epoch": 1.4203050601242837, + "grad_norm": 0.6606113314628601, + "learning_rate": 7.1156326344349985e-06, + "loss": 2.4618, + "step": 17599 + }, + { + "epoch": 1.4203857638608668, + "grad_norm": 0.6471076607704163, + "learning_rate": 7.109785189886864e-06, + "loss": 2.4263, + "step": 17600 + }, + { + "epoch": 1.4204664675974499, + "grad_norm": 0.7686622142791748, + "learning_rate": 7.103940060405712e-06, + "loss": 2.3989, + "step": 17601 + }, + { + "epoch": 1.4205471713340327, + "grad_norm": 0.6636856198310852, + "learning_rate": 7.0980972461372035e-06, + "loss": 2.4012, + "step": 17602 + }, + { + "epoch": 1.4206278750706158, + "grad_norm": 0.719194769859314, + "learning_rate": 7.0922567472269444e-06, + "loss": 2.4121, + "step": 17603 + }, + { + "epoch": 1.4207085788071987, + "grad_norm": 0.6569145321846008, + "learning_rate": 7.0864185638205404e-06, + "loss": 2.368, + "step": 17604 + }, + { + "epoch": 1.4207892825437818, + "grad_norm": 0.6548880338668823, + "learning_rate": 7.080582696063442e-06, + "loss": 2.4081, + "step": 17605 + }, + { + "epoch": 1.4208699862803646, + "grad_norm": 0.6192221641540527, + "learning_rate": 7.074749144101112e-06, + "loss": 2.3765, + "step": 17606 + }, + { + "epoch": 1.4209506900169477, + "grad_norm": 0.733065128326416, + "learning_rate": 7.068917908078942e-06, + "loss": 2.4429, + "step": 17607 + }, + { + "epoch": 1.4210313937535308, + "grad_norm": 0.7430265545845032, + "learning_rate": 7.063088988142275e-06, + "loss": 2.4041, + "step": 17608 + }, + { + "epoch": 1.4211120974901137, + "grad_norm": 0.7140394449234009, + "learning_rate": 7.0572623844363584e-06, + "loss": 2.3897, + "step": 17609 + }, + { + "epoch": 1.4211928012266968, + "grad_norm": 0.7149982452392578, + "learning_rate": 7.051438097106422e-06, + "loss": 2.4124, + "step": 17610 + }, + { + "epoch": 1.4212735049632799, + "grad_norm": 0.7337482571601868, + "learning_rate": 7.045616126297638e-06, + "loss": 2.4636, + "step": 17611 + }, + { + "epoch": 1.4213542086998627, + "grad_norm": 0.6936220526695251, + "learning_rate": 7.039796472155058e-06, + "loss": 2.4287, + "step": 17612 + }, + { + "epoch": 1.4214349124364458, + "grad_norm": 0.7598823308944702, + "learning_rate": 7.033979134823765e-06, + "loss": 2.3592, + "step": 17613 + }, + { + "epoch": 1.421515616173029, + "grad_norm": 0.7291054725646973, + "learning_rate": 7.028164114448732e-06, + "loss": 2.4433, + "step": 17614 + }, + { + "epoch": 1.4215963199096118, + "grad_norm": 0.7178683876991272, + "learning_rate": 7.022351411174866e-06, + "loss": 2.4615, + "step": 17615 + }, + { + "epoch": 1.4216770236461949, + "grad_norm": 0.6711047887802124, + "learning_rate": 7.01654102514705e-06, + "loss": 2.3828, + "step": 17616 + }, + { + "epoch": 1.421757727382778, + "grad_norm": 0.7782542705535889, + "learning_rate": 7.010732956510091e-06, + "loss": 2.3609, + "step": 17617 + }, + { + "epoch": 1.4218384311193608, + "grad_norm": 0.7100348472595215, + "learning_rate": 7.004927205408751e-06, + "loss": 2.4107, + "step": 17618 + }, + { + "epoch": 1.421919134855944, + "grad_norm": 0.7031453251838684, + "learning_rate": 6.9991237719877145e-06, + "loss": 2.3806, + "step": 17619 + }, + { + "epoch": 1.4219998385925268, + "grad_norm": 0.6231544613838196, + "learning_rate": 6.993322656391632e-06, + "loss": 2.3515, + "step": 17620 + }, + { + "epoch": 1.4220805423291099, + "grad_norm": 0.7339803576469421, + "learning_rate": 6.987523858765055e-06, + "loss": 2.4218, + "step": 17621 + }, + { + "epoch": 1.4221612460656927, + "grad_norm": 0.6874008774757385, + "learning_rate": 6.9817273792525224e-06, + "loss": 2.4308, + "step": 17622 + }, + { + "epoch": 1.4222419498022758, + "grad_norm": 0.692850649356842, + "learning_rate": 6.97593321799851e-06, + "loss": 2.4159, + "step": 17623 + }, + { + "epoch": 1.422322653538859, + "grad_norm": 0.7120705842971802, + "learning_rate": 6.970141375147398e-06, + "loss": 2.4639, + "step": 17624 + }, + { + "epoch": 1.4224033572754418, + "grad_norm": 0.6556580662727356, + "learning_rate": 6.9643518508435425e-06, + "loss": 2.425, + "step": 17625 + }, + { + "epoch": 1.4224840610120248, + "grad_norm": 0.6515032052993774, + "learning_rate": 6.958564645231225e-06, + "loss": 2.3712, + "step": 17626 + }, + { + "epoch": 1.422564764748608, + "grad_norm": 0.6835498213768005, + "learning_rate": 6.95277975845472e-06, + "loss": 2.4274, + "step": 17627 + }, + { + "epoch": 1.4226454684851908, + "grad_norm": 0.7465600967407227, + "learning_rate": 6.9469971906581555e-06, + "loss": 2.4905, + "step": 17628 + }, + { + "epoch": 1.4227261722217739, + "grad_norm": 0.7540421485900879, + "learning_rate": 6.94121694198564e-06, + "loss": 2.4636, + "step": 17629 + }, + { + "epoch": 1.422806875958357, + "grad_norm": 0.8491081595420837, + "learning_rate": 6.935439012581291e-06, + "loss": 2.345, + "step": 17630 + }, + { + "epoch": 1.4228875796949398, + "grad_norm": 0.6806172728538513, + "learning_rate": 6.92966340258906e-06, + "loss": 2.3937, + "step": 17631 + }, + { + "epoch": 1.422968283431523, + "grad_norm": 0.7586994171142578, + "learning_rate": 6.9238901121529085e-06, + "loss": 2.3645, + "step": 17632 + }, + { + "epoch": 1.423048987168106, + "grad_norm": 0.6934102773666382, + "learning_rate": 6.918119141416735e-06, + "loss": 2.3861, + "step": 17633 + }, + { + "epoch": 1.4231296909046889, + "grad_norm": 0.7167627215385437, + "learning_rate": 6.912350490524322e-06, + "loss": 2.4044, + "step": 17634 + }, + { + "epoch": 1.423210394641272, + "grad_norm": 0.6630876660346985, + "learning_rate": 6.906584159619478e-06, + "loss": 2.4214, + "step": 17635 + }, + { + "epoch": 1.4232910983778548, + "grad_norm": 0.7125325798988342, + "learning_rate": 6.9008201488459325e-06, + "loss": 2.4516, + "step": 17636 + }, + { + "epoch": 1.423371802114438, + "grad_norm": 0.6531164050102234, + "learning_rate": 6.895058458347281e-06, + "loss": 2.4223, + "step": 17637 + }, + { + "epoch": 1.4234525058510208, + "grad_norm": 0.727008581161499, + "learning_rate": 6.889299088267154e-06, + "loss": 2.446, + "step": 17638 + }, + { + "epoch": 1.4235332095876039, + "grad_norm": 0.7188040614128113, + "learning_rate": 6.883542038749091e-06, + "loss": 2.4109, + "step": 17639 + }, + { + "epoch": 1.423613913324187, + "grad_norm": 0.73248291015625, + "learning_rate": 6.877787309936568e-06, + "loss": 2.4398, + "step": 17640 + }, + { + "epoch": 1.4236946170607698, + "grad_norm": 0.7350964546203613, + "learning_rate": 6.872034901973012e-06, + "loss": 2.4766, + "step": 17641 + }, + { + "epoch": 1.423775320797353, + "grad_norm": 0.7280460596084595, + "learning_rate": 6.866284815001777e-06, + "loss": 2.4588, + "step": 17642 + }, + { + "epoch": 1.423856024533936, + "grad_norm": 0.68912672996521, + "learning_rate": 6.860537049166205e-06, + "loss": 2.353, + "step": 17643 + }, + { + "epoch": 1.4239367282705189, + "grad_norm": 0.6742156147956848, + "learning_rate": 6.85479160460949e-06, + "loss": 2.4123, + "step": 17644 + }, + { + "epoch": 1.424017432007102, + "grad_norm": 0.6858388185501099, + "learning_rate": 6.849048481474863e-06, + "loss": 2.4243, + "step": 17645 + }, + { + "epoch": 1.424098135743685, + "grad_norm": 0.7317911386489868, + "learning_rate": 6.8433076799054644e-06, + "loss": 2.3713, + "step": 17646 + }, + { + "epoch": 1.424178839480268, + "grad_norm": 0.6934579014778137, + "learning_rate": 6.837569200044325e-06, + "loss": 2.4667, + "step": 17647 + }, + { + "epoch": 1.424259543216851, + "grad_norm": 0.7017713189125061, + "learning_rate": 6.831833042034497e-06, + "loss": 2.3543, + "step": 17648 + }, + { + "epoch": 1.4243402469534339, + "grad_norm": 0.7379886507987976, + "learning_rate": 6.8260992060189325e-06, + "loss": 2.4392, + "step": 17649 + }, + { + "epoch": 1.424420950690017, + "grad_norm": 0.6645724177360535, + "learning_rate": 6.820367692140539e-06, + "loss": 2.4329, + "step": 17650 + }, + { + "epoch": 1.4245016544265998, + "grad_norm": 0.642423689365387, + "learning_rate": 6.814638500542159e-06, + "loss": 2.4157, + "step": 17651 + }, + { + "epoch": 1.424582358163183, + "grad_norm": 0.6720073819160461, + "learning_rate": 6.808911631366588e-06, + "loss": 2.44, + "step": 17652 + }, + { + "epoch": 1.424663061899766, + "grad_norm": 0.6966024041175842, + "learning_rate": 6.803187084756524e-06, + "loss": 2.4087, + "step": 17653 + }, + { + "epoch": 1.4247437656363489, + "grad_norm": 0.6998239755630493, + "learning_rate": 6.797464860854652e-06, + "loss": 2.4335, + "step": 17654 + }, + { + "epoch": 1.424824469372932, + "grad_norm": 0.6885339617729187, + "learning_rate": 6.791744959803614e-06, + "loss": 2.4327, + "step": 17655 + }, + { + "epoch": 1.424905173109515, + "grad_norm": 0.6395631432533264, + "learning_rate": 6.7860273817459294e-06, + "loss": 2.3941, + "step": 17656 + }, + { + "epoch": 1.424985876846098, + "grad_norm": 0.7010350823402405, + "learning_rate": 6.7803121268240956e-06, + "loss": 2.4118, + "step": 17657 + }, + { + "epoch": 1.425066580582681, + "grad_norm": 0.6954346895217896, + "learning_rate": 6.774599195180565e-06, + "loss": 2.416, + "step": 17658 + }, + { + "epoch": 1.425147284319264, + "grad_norm": 0.6685010194778442, + "learning_rate": 6.768888586957722e-06, + "loss": 2.4246, + "step": 17659 + }, + { + "epoch": 1.425227988055847, + "grad_norm": 0.7244373559951782, + "learning_rate": 6.7631803022978776e-06, + "loss": 2.4385, + "step": 17660 + }, + { + "epoch": 1.42530869179243, + "grad_norm": 0.6633989810943604, + "learning_rate": 6.757474341343306e-06, + "loss": 2.413, + "step": 17661 + }, + { + "epoch": 1.4253893955290131, + "grad_norm": 0.6696286797523499, + "learning_rate": 6.751770704236226e-06, + "loss": 2.4586, + "step": 17662 + }, + { + "epoch": 1.425470099265596, + "grad_norm": 0.7322936654090881, + "learning_rate": 6.746069391118759e-06, + "loss": 2.414, + "step": 17663 + }, + { + "epoch": 1.425550803002179, + "grad_norm": 0.6786227226257324, + "learning_rate": 6.740370402133012e-06, + "loss": 2.3964, + "step": 17664 + }, + { + "epoch": 1.425631506738762, + "grad_norm": 0.6408207416534424, + "learning_rate": 6.734673737421027e-06, + "loss": 2.4064, + "step": 17665 + }, + { + "epoch": 1.425712210475345, + "grad_norm": 0.7589663863182068, + "learning_rate": 6.728979397124768e-06, + "loss": 2.3765, + "step": 17666 + }, + { + "epoch": 1.4257929142119279, + "grad_norm": 0.6696135401725769, + "learning_rate": 6.723287381386145e-06, + "loss": 2.4317, + "step": 17667 + }, + { + "epoch": 1.425873617948511, + "grad_norm": 0.6599292159080505, + "learning_rate": 6.7175976903470325e-06, + "loss": 2.3867, + "step": 17668 + }, + { + "epoch": 1.425954321685094, + "grad_norm": 0.692328929901123, + "learning_rate": 6.711910324149228e-06, + "loss": 2.3996, + "step": 17669 + }, + { + "epoch": 1.426035025421677, + "grad_norm": 0.7615126371383667, + "learning_rate": 6.706225282934475e-06, + "loss": 2.4436, + "step": 17670 + }, + { + "epoch": 1.42611572915826, + "grad_norm": 0.7187603712081909, + "learning_rate": 6.70054256684447e-06, + "loss": 2.4128, + "step": 17671 + }, + { + "epoch": 1.426196432894843, + "grad_norm": 0.6679204702377319, + "learning_rate": 6.694862176020822e-06, + "loss": 2.423, + "step": 17672 + }, + { + "epoch": 1.426277136631426, + "grad_norm": 0.759952962398529, + "learning_rate": 6.689184110605106e-06, + "loss": 2.4279, + "step": 17673 + }, + { + "epoch": 1.426357840368009, + "grad_norm": 0.6619845628738403, + "learning_rate": 6.683508370738845e-06, + "loss": 2.4219, + "step": 17674 + }, + { + "epoch": 1.4264385441045921, + "grad_norm": 0.6806942224502563, + "learning_rate": 6.6778349565635005e-06, + "loss": 2.4214, + "step": 17675 + }, + { + "epoch": 1.426519247841175, + "grad_norm": 0.6780219674110413, + "learning_rate": 6.672163868220449e-06, + "loss": 2.4404, + "step": 17676 + }, + { + "epoch": 1.426599951577758, + "grad_norm": 0.7276327013969421, + "learning_rate": 6.6664951058510224e-06, + "loss": 2.4088, + "step": 17677 + }, + { + "epoch": 1.4266806553143412, + "grad_norm": 0.7608953714370728, + "learning_rate": 6.66082866959653e-06, + "loss": 2.4102, + "step": 17678 + }, + { + "epoch": 1.426761359050924, + "grad_norm": 0.6784111261367798, + "learning_rate": 6.6551645595981485e-06, + "loss": 2.4823, + "step": 17679 + }, + { + "epoch": 1.4268420627875071, + "grad_norm": 0.6937912106513977, + "learning_rate": 6.649502775997096e-06, + "loss": 2.4118, + "step": 17680 + }, + { + "epoch": 1.42692276652409, + "grad_norm": 0.7426064014434814, + "learning_rate": 6.643843318934462e-06, + "loss": 2.4407, + "step": 17681 + }, + { + "epoch": 1.427003470260673, + "grad_norm": 0.6722440719604492, + "learning_rate": 6.638186188551277e-06, + "loss": 2.3981, + "step": 17682 + }, + { + "epoch": 1.427084173997256, + "grad_norm": 0.6830718517303467, + "learning_rate": 6.632531384988538e-06, + "loss": 2.4076, + "step": 17683 + }, + { + "epoch": 1.427164877733839, + "grad_norm": 0.6521410942077637, + "learning_rate": 6.626878908387202e-06, + "loss": 2.4311, + "step": 17684 + }, + { + "epoch": 1.4272455814704221, + "grad_norm": 0.7150115966796875, + "learning_rate": 6.6212287588880985e-06, + "loss": 2.4776, + "step": 17685 + }, + { + "epoch": 1.427326285207005, + "grad_norm": 0.6741146445274353, + "learning_rate": 6.615580936632082e-06, + "loss": 2.4134, + "step": 17686 + }, + { + "epoch": 1.427406988943588, + "grad_norm": 0.6979508996009827, + "learning_rate": 6.6099354417599064e-06, + "loss": 2.4022, + "step": 17687 + }, + { + "epoch": 1.4274876926801712, + "grad_norm": 0.7078632712364197, + "learning_rate": 6.604292274412249e-06, + "loss": 2.4259, + "step": 17688 + }, + { + "epoch": 1.427568396416754, + "grad_norm": 0.6485830545425415, + "learning_rate": 6.598651434729764e-06, + "loss": 2.3641, + "step": 17689 + }, + { + "epoch": 1.4276491001533371, + "grad_norm": 0.7130312919616699, + "learning_rate": 6.593012922853048e-06, + "loss": 2.3965, + "step": 17690 + }, + { + "epoch": 1.4277298038899202, + "grad_norm": 0.6736258268356323, + "learning_rate": 6.587376738922613e-06, + "loss": 2.3729, + "step": 17691 + }, + { + "epoch": 1.427810507626503, + "grad_norm": 0.6798346638679504, + "learning_rate": 6.581742883078923e-06, + "loss": 2.4479, + "step": 17692 + }, + { + "epoch": 1.4278912113630862, + "grad_norm": 0.6962637901306152, + "learning_rate": 6.576111355462411e-06, + "loss": 2.4433, + "step": 17693 + }, + { + "epoch": 1.4279719150996693, + "grad_norm": 0.6981319785118103, + "learning_rate": 6.570482156213431e-06, + "loss": 2.4564, + "step": 17694 + }, + { + "epoch": 1.4280526188362521, + "grad_norm": 0.6484888195991516, + "learning_rate": 6.564855285472238e-06, + "loss": 2.3709, + "step": 17695 + }, + { + "epoch": 1.428133322572835, + "grad_norm": 0.6646093726158142, + "learning_rate": 6.5592307433791074e-06, + "loss": 2.3716, + "step": 17696 + }, + { + "epoch": 1.428214026309418, + "grad_norm": 0.7607010006904602, + "learning_rate": 6.5536085300742065e-06, + "loss": 2.4029, + "step": 17697 + }, + { + "epoch": 1.4282947300460012, + "grad_norm": 0.7242185473442078, + "learning_rate": 6.547988645697644e-06, + "loss": 2.4091, + "step": 17698 + }, + { + "epoch": 1.428375433782584, + "grad_norm": 0.7394922375679016, + "learning_rate": 6.542371090389487e-06, + "loss": 2.4288, + "step": 17699 + }, + { + "epoch": 1.4284561375191671, + "grad_norm": 0.6763161420822144, + "learning_rate": 6.536755864289745e-06, + "loss": 2.3556, + "step": 17700 + }, + { + "epoch": 1.4285368412557502, + "grad_norm": 0.6837669610977173, + "learning_rate": 6.531142967538362e-06, + "loss": 2.4312, + "step": 17701 + }, + { + "epoch": 1.428617544992333, + "grad_norm": 0.6702602505683899, + "learning_rate": 6.525532400275225e-06, + "loss": 2.4144, + "step": 17702 + }, + { + "epoch": 1.4286982487289162, + "grad_norm": 0.7338566780090332, + "learning_rate": 6.519924162640167e-06, + "loss": 2.4536, + "step": 17703 + }, + { + "epoch": 1.4287789524654992, + "grad_norm": 0.7169400453567505, + "learning_rate": 6.514318254772967e-06, + "loss": 2.4236, + "step": 17704 + }, + { + "epoch": 1.428859656202082, + "grad_norm": 0.7129381895065308, + "learning_rate": 6.508714676813321e-06, + "loss": 2.393, + "step": 17705 + }, + { + "epoch": 1.4289403599386652, + "grad_norm": 0.7212249636650085, + "learning_rate": 6.503113428900898e-06, + "loss": 2.3907, + "step": 17706 + }, + { + "epoch": 1.4290210636752483, + "grad_norm": 0.7539047002792358, + "learning_rate": 6.497514511175296e-06, + "loss": 2.434, + "step": 17707 + }, + { + "epoch": 1.4291017674118311, + "grad_norm": 0.6876792907714844, + "learning_rate": 6.491917923776048e-06, + "loss": 2.4172, + "step": 17708 + }, + { + "epoch": 1.4291824711484142, + "grad_norm": 0.6665194034576416, + "learning_rate": 6.486323666842631e-06, + "loss": 2.4277, + "step": 17709 + }, + { + "epoch": 1.429263174884997, + "grad_norm": 0.7311907410621643, + "learning_rate": 6.4807317405144675e-06, + "loss": 2.4201, + "step": 17710 + }, + { + "epoch": 1.4293438786215802, + "grad_norm": 0.6492041349411011, + "learning_rate": 6.475142144930946e-06, + "loss": 2.425, + "step": 17711 + }, + { + "epoch": 1.429424582358163, + "grad_norm": 0.7610225677490234, + "learning_rate": 6.469554880231343e-06, + "loss": 2.4694, + "step": 17712 + }, + { + "epoch": 1.4295052860947461, + "grad_norm": 0.7112852931022644, + "learning_rate": 6.463969946554948e-06, + "loss": 2.4431, + "step": 17713 + }, + { + "epoch": 1.4295859898313292, + "grad_norm": 0.6712578535079956, + "learning_rate": 6.458387344040917e-06, + "loss": 2.4067, + "step": 17714 + }, + { + "epoch": 1.429666693567912, + "grad_norm": 0.6936217546463013, + "learning_rate": 6.452807072828393e-06, + "loss": 2.4229, + "step": 17715 + }, + { + "epoch": 1.4297473973044952, + "grad_norm": 0.6615330576896667, + "learning_rate": 6.4472291330564535e-06, + "loss": 2.3567, + "step": 17716 + }, + { + "epoch": 1.4298281010410783, + "grad_norm": 0.7209796905517578, + "learning_rate": 6.441653524864111e-06, + "loss": 2.3577, + "step": 17717 + }, + { + "epoch": 1.4299088047776611, + "grad_norm": 0.7022082805633545, + "learning_rate": 6.436080248390319e-06, + "loss": 2.3681, + "step": 17718 + }, + { + "epoch": 1.4299895085142442, + "grad_norm": 0.6859815120697021, + "learning_rate": 6.430509303773991e-06, + "loss": 2.4193, + "step": 17719 + }, + { + "epoch": 1.4300702122508273, + "grad_norm": 0.7126015424728394, + "learning_rate": 6.424940691153969e-06, + "loss": 2.3746, + "step": 17720 + }, + { + "epoch": 1.4301509159874102, + "grad_norm": 0.6499980092048645, + "learning_rate": 6.419374410669021e-06, + "loss": 2.445, + "step": 17721 + }, + { + "epoch": 1.4302316197239933, + "grad_norm": 0.6867473125457764, + "learning_rate": 6.413810462457892e-06, + "loss": 2.3323, + "step": 17722 + }, + { + "epoch": 1.4303123234605764, + "grad_norm": 0.7272062301635742, + "learning_rate": 6.4082488466592596e-06, + "loss": 2.4058, + "step": 17723 + }, + { + "epoch": 1.4303930271971592, + "grad_norm": 0.7681101560592651, + "learning_rate": 6.40268956341169e-06, + "loss": 2.4534, + "step": 17724 + }, + { + "epoch": 1.4304737309337423, + "grad_norm": 0.8149757981300354, + "learning_rate": 6.397132612853773e-06, + "loss": 2.4165, + "step": 17725 + }, + { + "epoch": 1.4305544346703252, + "grad_norm": 0.6749057769775391, + "learning_rate": 6.39157799512401e-06, + "loss": 2.364, + "step": 17726 + }, + { + "epoch": 1.4306351384069083, + "grad_norm": 0.716894268989563, + "learning_rate": 6.386025710360799e-06, + "loss": 2.4379, + "step": 17727 + }, + { + "epoch": 1.4307158421434911, + "grad_norm": 0.738310694694519, + "learning_rate": 6.380475758702531e-06, + "loss": 2.3938, + "step": 17728 + }, + { + "epoch": 1.4307965458800742, + "grad_norm": 0.7101424336433411, + "learning_rate": 6.3749281402875505e-06, + "loss": 2.4629, + "step": 17729 + }, + { + "epoch": 1.4308772496166573, + "grad_norm": 0.6945566534996033, + "learning_rate": 6.369382855254069e-06, + "loss": 2.4235, + "step": 17730 + }, + { + "epoch": 1.4309579533532402, + "grad_norm": 0.7886360287666321, + "learning_rate": 6.363839903740332e-06, + "loss": 2.4284, + "step": 17731 + }, + { + "epoch": 1.4310386570898233, + "grad_norm": 0.7391656637191772, + "learning_rate": 6.358299285884495e-06, + "loss": 2.379, + "step": 17732 + }, + { + "epoch": 1.4311193608264063, + "grad_norm": 0.6601181626319885, + "learning_rate": 6.352761001824603e-06, + "loss": 2.3646, + "step": 17733 + }, + { + "epoch": 1.4312000645629892, + "grad_norm": 0.7043817043304443, + "learning_rate": 6.347225051698702e-06, + "loss": 2.4055, + "step": 17734 + }, + { + "epoch": 1.4312807682995723, + "grad_norm": 0.7078529000282288, + "learning_rate": 6.341691435644759e-06, + "loss": 2.3811, + "step": 17735 + }, + { + "epoch": 1.4313614720361554, + "grad_norm": 0.7172150015830994, + "learning_rate": 6.336160153800707e-06, + "loss": 2.3854, + "step": 17736 + }, + { + "epoch": 1.4314421757727382, + "grad_norm": 0.6997926235198975, + "learning_rate": 6.330631206304383e-06, + "loss": 2.3534, + "step": 17737 + }, + { + "epoch": 1.4315228795093213, + "grad_norm": 0.7089913487434387, + "learning_rate": 6.325104593293563e-06, + "loss": 2.4508, + "step": 17738 + }, + { + "epoch": 1.4316035832459044, + "grad_norm": 0.7183980345726013, + "learning_rate": 6.319580314906037e-06, + "loss": 2.3972, + "step": 17739 + }, + { + "epoch": 1.4316842869824873, + "grad_norm": 0.6621310710906982, + "learning_rate": 6.3140583712794295e-06, + "loss": 2.3512, + "step": 17740 + }, + { + "epoch": 1.4317649907190704, + "grad_norm": 0.7076746821403503, + "learning_rate": 6.308538762551386e-06, + "loss": 2.4544, + "step": 17741 + }, + { + "epoch": 1.4318456944556532, + "grad_norm": 0.7050352692604065, + "learning_rate": 6.303021488859462e-06, + "loss": 2.3314, + "step": 17742 + }, + { + "epoch": 1.4319263981922363, + "grad_norm": 0.7305126190185547, + "learning_rate": 6.297506550341181e-06, + "loss": 2.4232, + "step": 17743 + }, + { + "epoch": 1.4320071019288192, + "grad_norm": 0.7779221534729004, + "learning_rate": 6.291993947133967e-06, + "loss": 2.4861, + "step": 17744 + }, + { + "epoch": 1.4320878056654023, + "grad_norm": 0.7207643389701843, + "learning_rate": 6.286483679375244e-06, + "loss": 2.4184, + "step": 17745 + }, + { + "epoch": 1.4321685094019854, + "grad_norm": 0.7540406584739685, + "learning_rate": 6.280975747202289e-06, + "loss": 2.4741, + "step": 17746 + }, + { + "epoch": 1.4322492131385682, + "grad_norm": 0.7011128067970276, + "learning_rate": 6.275470150752416e-06, + "loss": 2.3661, + "step": 17747 + }, + { + "epoch": 1.4323299168751513, + "grad_norm": 0.666495680809021, + "learning_rate": 6.269966890162837e-06, + "loss": 2.4294, + "step": 17748 + }, + { + "epoch": 1.4324106206117344, + "grad_norm": 0.7928789854049683, + "learning_rate": 6.264465965570676e-06, + "loss": 2.3722, + "step": 17749 + }, + { + "epoch": 1.4324913243483173, + "grad_norm": 0.778322160243988, + "learning_rate": 6.258967377113056e-06, + "loss": 2.4365, + "step": 17750 + }, + { + "epoch": 1.4325720280849004, + "grad_norm": 0.7157254815101624, + "learning_rate": 6.2534711249270015e-06, + "loss": 2.4222, + "step": 17751 + }, + { + "epoch": 1.4326527318214834, + "grad_norm": 0.752855122089386, + "learning_rate": 6.247977209149514e-06, + "loss": 2.4195, + "step": 17752 + }, + { + "epoch": 1.4327334355580663, + "grad_norm": 0.6898384690284729, + "learning_rate": 6.242485629917494e-06, + "loss": 2.372, + "step": 17753 + }, + { + "epoch": 1.4328141392946494, + "grad_norm": 0.6400893330574036, + "learning_rate": 6.236996387367822e-06, + "loss": 2.3678, + "step": 17754 + }, + { + "epoch": 1.4328948430312323, + "grad_norm": 0.6957802176475525, + "learning_rate": 6.23150948163731e-06, + "loss": 2.4423, + "step": 17755 + }, + { + "epoch": 1.4329755467678154, + "grad_norm": 0.6983963251113892, + "learning_rate": 6.226024912862683e-06, + "loss": 2.3467, + "step": 17756 + }, + { + "epoch": 1.4330562505043982, + "grad_norm": 0.697910487651825, + "learning_rate": 6.220542681180652e-06, + "loss": 2.3676, + "step": 17757 + }, + { + "epoch": 1.4331369542409813, + "grad_norm": 0.6732818484306335, + "learning_rate": 6.215062786727843e-06, + "loss": 2.4259, + "step": 17758 + }, + { + "epoch": 1.4332176579775644, + "grad_norm": 0.6379408240318298, + "learning_rate": 6.209585229640813e-06, + "loss": 2.409, + "step": 17759 + }, + { + "epoch": 1.4332983617141473, + "grad_norm": 0.6726407408714294, + "learning_rate": 6.2041100100560856e-06, + "loss": 2.3732, + "step": 17760 + }, + { + "epoch": 1.4333790654507303, + "grad_norm": 0.7126357555389404, + "learning_rate": 6.19863712811013e-06, + "loss": 2.4324, + "step": 17761 + }, + { + "epoch": 1.4334597691873134, + "grad_norm": 0.7055345773696899, + "learning_rate": 6.193166583939336e-06, + "loss": 2.463, + "step": 17762 + }, + { + "epoch": 1.4335404729238963, + "grad_norm": 0.6864510774612427, + "learning_rate": 6.18769837768004e-06, + "loss": 2.4155, + "step": 17763 + }, + { + "epoch": 1.4336211766604794, + "grad_norm": 0.7269968390464783, + "learning_rate": 6.182232509468544e-06, + "loss": 2.4197, + "step": 17764 + }, + { + "epoch": 1.4337018803970625, + "grad_norm": 0.7829548716545105, + "learning_rate": 6.176768979441039e-06, + "loss": 2.4054, + "step": 17765 + }, + { + "epoch": 1.4337825841336453, + "grad_norm": 0.6840609312057495, + "learning_rate": 6.171307787733704e-06, + "loss": 2.4177, + "step": 17766 + }, + { + "epoch": 1.4338632878702284, + "grad_norm": 0.7106159925460815, + "learning_rate": 6.165848934482654e-06, + "loss": 2.4039, + "step": 17767 + }, + { + "epoch": 1.4339439916068115, + "grad_norm": 0.6945303082466125, + "learning_rate": 6.160392419823957e-06, + "loss": 2.45, + "step": 17768 + }, + { + "epoch": 1.4340246953433944, + "grad_norm": 0.6924156546592712, + "learning_rate": 6.15493824389356e-06, + "loss": 2.4059, + "step": 17769 + }, + { + "epoch": 1.4341053990799775, + "grad_norm": 0.6932214498519897, + "learning_rate": 6.149486406827409e-06, + "loss": 2.4046, + "step": 17770 + }, + { + "epoch": 1.4341861028165603, + "grad_norm": 0.6683449149131775, + "learning_rate": 6.144036908761386e-06, + "loss": 2.4074, + "step": 17771 + }, + { + "epoch": 1.4342668065531434, + "grad_norm": 0.7230218052864075, + "learning_rate": 6.138589749831314e-06, + "loss": 2.3718, + "step": 17772 + }, + { + "epoch": 1.4343475102897263, + "grad_norm": 0.68938809633255, + "learning_rate": 6.133144930172929e-06, + "loss": 2.3776, + "step": 17773 + }, + { + "epoch": 1.4344282140263094, + "grad_norm": 0.6659870743751526, + "learning_rate": 6.127702449921968e-06, + "loss": 2.3779, + "step": 17774 + }, + { + "epoch": 1.4345089177628925, + "grad_norm": 0.7351429462432861, + "learning_rate": 6.122262309214033e-06, + "loss": 2.334, + "step": 17775 + }, + { + "epoch": 1.4345896214994753, + "grad_norm": 0.6995889544487, + "learning_rate": 6.116824508184715e-06, + "loss": 2.4139, + "step": 17776 + }, + { + "epoch": 1.4346703252360584, + "grad_norm": 0.6568582653999329, + "learning_rate": 6.111389046969551e-06, + "loss": 2.4348, + "step": 17777 + }, + { + "epoch": 1.4347510289726415, + "grad_norm": 0.7047903537750244, + "learning_rate": 6.1059559257039985e-06, + "loss": 2.3877, + "step": 17778 + }, + { + "epoch": 1.4348317327092244, + "grad_norm": 0.7299826145172119, + "learning_rate": 6.10052514452345e-06, + "loss": 2.4533, + "step": 17779 + }, + { + "epoch": 1.4349124364458075, + "grad_norm": 0.6617172956466675, + "learning_rate": 6.095096703563296e-06, + "loss": 2.4276, + "step": 17780 + }, + { + "epoch": 1.4349931401823905, + "grad_norm": 0.7248536944389343, + "learning_rate": 6.089670602958775e-06, + "loss": 2.4145, + "step": 17781 + }, + { + "epoch": 1.4350738439189734, + "grad_norm": 0.7404766082763672, + "learning_rate": 6.084246842845154e-06, + "loss": 2.4556, + "step": 17782 + }, + { + "epoch": 1.4351545476555565, + "grad_norm": 0.6808308362960815, + "learning_rate": 6.0788254233576035e-06, + "loss": 2.3648, + "step": 17783 + }, + { + "epoch": 1.4352352513921396, + "grad_norm": 0.6631487011909485, + "learning_rate": 6.073406344631249e-06, + "loss": 2.4064, + "step": 17784 + }, + { + "epoch": 1.4353159551287225, + "grad_norm": 0.6690654158592224, + "learning_rate": 6.067989606801128e-06, + "loss": 2.4749, + "step": 17785 + }, + { + "epoch": 1.4353966588653055, + "grad_norm": 0.6438129544258118, + "learning_rate": 6.062575210002241e-06, + "loss": 2.424, + "step": 17786 + }, + { + "epoch": 1.4354773626018884, + "grad_norm": 0.710590124130249, + "learning_rate": 6.05716315436955e-06, + "loss": 2.4419, + "step": 17787 + }, + { + "epoch": 1.4355580663384715, + "grad_norm": 0.72870272397995, + "learning_rate": 6.0517534400378995e-06, + "loss": 2.4341, + "step": 17788 + }, + { + "epoch": 1.4356387700750544, + "grad_norm": 0.6548538208007812, + "learning_rate": 6.04634606714215e-06, + "loss": 2.3721, + "step": 17789 + }, + { + "epoch": 1.4357194738116374, + "grad_norm": 0.7368030548095703, + "learning_rate": 6.040941035817061e-06, + "loss": 2.461, + "step": 17790 + }, + { + "epoch": 1.4358001775482205, + "grad_norm": 0.7763129472732544, + "learning_rate": 6.035538346197311e-06, + "loss": 2.4701, + "step": 17791 + }, + { + "epoch": 1.4358808812848034, + "grad_norm": 0.7631728649139404, + "learning_rate": 6.030137998417573e-06, + "loss": 2.4796, + "step": 17792 + }, + { + "epoch": 1.4359615850213865, + "grad_norm": 0.7032707929611206, + "learning_rate": 6.024739992612449e-06, + "loss": 2.4119, + "step": 17793 + }, + { + "epoch": 1.4360422887579696, + "grad_norm": 0.701252818107605, + "learning_rate": 6.019344328916454e-06, + "loss": 2.4501, + "step": 17794 + }, + { + "epoch": 1.4361229924945524, + "grad_norm": 0.7271695733070374, + "learning_rate": 6.013951007464058e-06, + "loss": 2.4136, + "step": 17795 + }, + { + "epoch": 1.4362036962311355, + "grad_norm": 0.6560700535774231, + "learning_rate": 6.0085600283897095e-06, + "loss": 2.3737, + "step": 17796 + }, + { + "epoch": 1.4362843999677186, + "grad_norm": 0.6831890344619751, + "learning_rate": 6.003171391827722e-06, + "loss": 2.3986, + "step": 17797 + }, + { + "epoch": 1.4363651037043015, + "grad_norm": 0.6875705718994141, + "learning_rate": 5.997785097912412e-06, + "loss": 2.4159, + "step": 17798 + }, + { + "epoch": 1.4364458074408846, + "grad_norm": 0.704727053642273, + "learning_rate": 5.992401146778026e-06, + "loss": 2.3833, + "step": 17799 + }, + { + "epoch": 1.4365265111774674, + "grad_norm": 0.6632246971130371, + "learning_rate": 5.987019538558758e-06, + "loss": 2.3907, + "step": 17800 + }, + { + "epoch": 1.4366072149140505, + "grad_norm": 0.7065477967262268, + "learning_rate": 5.981640273388689e-06, + "loss": 2.3473, + "step": 17801 + }, + { + "epoch": 1.4366879186506334, + "grad_norm": 0.6765400171279907, + "learning_rate": 5.976263351401923e-06, + "loss": 2.4051, + "step": 17802 + }, + { + "epoch": 1.4367686223872165, + "grad_norm": 0.6867364645004272, + "learning_rate": 5.9708887727324525e-06, + "loss": 2.3452, + "step": 17803 + }, + { + "epoch": 1.4368493261237996, + "grad_norm": 0.644715428352356, + "learning_rate": 5.965516537514215e-06, + "loss": 2.3826, + "step": 17804 + }, + { + "epoch": 1.4369300298603824, + "grad_norm": 0.7649596333503723, + "learning_rate": 5.9601466458811265e-06, + "loss": 2.436, + "step": 17805 + }, + { + "epoch": 1.4370107335969655, + "grad_norm": 0.699653148651123, + "learning_rate": 5.954779097967023e-06, + "loss": 2.3694, + "step": 17806 + }, + { + "epoch": 1.4370914373335486, + "grad_norm": 0.7054964900016785, + "learning_rate": 5.949413893905642e-06, + "loss": 2.4194, + "step": 17807 + }, + { + "epoch": 1.4371721410701315, + "grad_norm": 0.7534568309783936, + "learning_rate": 5.944051033830722e-06, + "loss": 2.4175, + "step": 17808 + }, + { + "epoch": 1.4372528448067146, + "grad_norm": 0.7056108117103577, + "learning_rate": 5.9386905178759225e-06, + "loss": 2.4232, + "step": 17809 + }, + { + "epoch": 1.4373335485432976, + "grad_norm": 0.6868974566459656, + "learning_rate": 5.933332346174825e-06, + "loss": 2.3799, + "step": 17810 + }, + { + "epoch": 1.4374142522798805, + "grad_norm": 0.7155748009681702, + "learning_rate": 5.927976518860978e-06, + "loss": 2.4151, + "step": 17811 + }, + { + "epoch": 1.4374949560164636, + "grad_norm": 0.7482681274414062, + "learning_rate": 5.922623036067853e-06, + "loss": 2.4568, + "step": 17812 + }, + { + "epoch": 1.4375756597530467, + "grad_norm": 0.6348850727081299, + "learning_rate": 5.917271897928889e-06, + "loss": 2.4202, + "step": 17813 + }, + { + "epoch": 1.4376563634896296, + "grad_norm": 0.7463829517364502, + "learning_rate": 5.911923104577455e-06, + "loss": 2.4288, + "step": 17814 + }, + { + "epoch": 1.4377370672262126, + "grad_norm": 0.7019917964935303, + "learning_rate": 5.9065766561468335e-06, + "loss": 2.475, + "step": 17815 + }, + { + "epoch": 1.4378177709627955, + "grad_norm": 0.7005626559257507, + "learning_rate": 5.9012325527702975e-06, + "loss": 2.3869, + "step": 17816 + }, + { + "epoch": 1.4378984746993786, + "grad_norm": 0.7216863632202148, + "learning_rate": 5.895890794581016e-06, + "loss": 2.4224, + "step": 17817 + }, + { + "epoch": 1.4379791784359615, + "grad_norm": 0.7037425637245178, + "learning_rate": 5.890551381712128e-06, + "loss": 2.4347, + "step": 17818 + }, + { + "epoch": 1.4380598821725445, + "grad_norm": 0.7240646481513977, + "learning_rate": 5.8852143142967055e-06, + "loss": 2.4275, + "step": 17819 + }, + { + "epoch": 1.4381405859091276, + "grad_norm": 0.6970441937446594, + "learning_rate": 5.879879592467763e-06, + "loss": 2.4526, + "step": 17820 + }, + { + "epoch": 1.4382212896457105, + "grad_norm": 0.6941537857055664, + "learning_rate": 5.8745472163582395e-06, + "loss": 2.4882, + "step": 17821 + }, + { + "epoch": 1.4383019933822936, + "grad_norm": 0.668228030204773, + "learning_rate": 5.86921718610105e-06, + "loss": 2.3824, + "step": 17822 + }, + { + "epoch": 1.4383826971188767, + "grad_norm": 0.6851341128349304, + "learning_rate": 5.863889501829034e-06, + "loss": 2.3931, + "step": 17823 + }, + { + "epoch": 1.4384634008554595, + "grad_norm": 0.6785841584205627, + "learning_rate": 5.858564163674962e-06, + "loss": 2.4268, + "step": 17824 + }, + { + "epoch": 1.4385441045920426, + "grad_norm": 0.7137345671653748, + "learning_rate": 5.853241171771573e-06, + "loss": 2.3509, + "step": 17825 + }, + { + "epoch": 1.4386248083286257, + "grad_norm": 0.7188790440559387, + "learning_rate": 5.847920526251505e-06, + "loss": 2.422, + "step": 17826 + }, + { + "epoch": 1.4387055120652086, + "grad_norm": 0.6798515915870667, + "learning_rate": 5.842602227247374e-06, + "loss": 2.3917, + "step": 17827 + }, + { + "epoch": 1.4387862158017917, + "grad_norm": 0.7113839387893677, + "learning_rate": 5.837286274891718e-06, + "loss": 2.4119, + "step": 17828 + }, + { + "epoch": 1.4388669195383748, + "grad_norm": 0.6735878586769104, + "learning_rate": 5.831972669317054e-06, + "loss": 2.3973, + "step": 17829 + }, + { + "epoch": 1.4389476232749576, + "grad_norm": 0.6665332913398743, + "learning_rate": 5.8266614106557645e-06, + "loss": 2.3567, + "step": 17830 + }, + { + "epoch": 1.4390283270115407, + "grad_norm": 0.6652774214744568, + "learning_rate": 5.821352499040256e-06, + "loss": 2.4022, + "step": 17831 + }, + { + "epoch": 1.4391090307481236, + "grad_norm": 0.672563910484314, + "learning_rate": 5.8160459346028205e-06, + "loss": 2.4142, + "step": 17832 + }, + { + "epoch": 1.4391897344847067, + "grad_norm": 0.6333127021789551, + "learning_rate": 5.8107417174757205e-06, + "loss": 2.3679, + "step": 17833 + }, + { + "epoch": 1.4392704382212895, + "grad_norm": 0.7484139204025269, + "learning_rate": 5.80543984779115e-06, + "loss": 2.408, + "step": 17834 + }, + { + "epoch": 1.4393511419578726, + "grad_norm": 0.687872052192688, + "learning_rate": 5.800140325681269e-06, + "loss": 2.3956, + "step": 17835 + }, + { + "epoch": 1.4394318456944557, + "grad_norm": 0.716371476650238, + "learning_rate": 5.794843151278107e-06, + "loss": 2.4134, + "step": 17836 + }, + { + "epoch": 1.4395125494310386, + "grad_norm": 0.7058377265930176, + "learning_rate": 5.789548324713711e-06, + "loss": 2.3758, + "step": 17837 + }, + { + "epoch": 1.4395932531676217, + "grad_norm": 0.6678213477134705, + "learning_rate": 5.784255846120057e-06, + "loss": 2.437, + "step": 17838 + }, + { + "epoch": 1.4396739569042047, + "grad_norm": 0.659657895565033, + "learning_rate": 5.778965715629015e-06, + "loss": 2.4551, + "step": 17839 + }, + { + "epoch": 1.4397546606407876, + "grad_norm": 0.7233473062515259, + "learning_rate": 5.773677933372445e-06, + "loss": 2.422, + "step": 17840 + }, + { + "epoch": 1.4398353643773707, + "grad_norm": 0.6661399006843567, + "learning_rate": 5.768392499482144e-06, + "loss": 2.4354, + "step": 17841 + }, + { + "epoch": 1.4399160681139538, + "grad_norm": 0.700758695602417, + "learning_rate": 5.763109414089807e-06, + "loss": 2.4248, + "step": 17842 + }, + { + "epoch": 1.4399967718505366, + "grad_norm": 0.7119004130363464, + "learning_rate": 5.757828677327104e-06, + "loss": 2.4281, + "step": 17843 + }, + { + "epoch": 1.4400774755871197, + "grad_norm": 0.6928756237030029, + "learning_rate": 5.752550289325687e-06, + "loss": 2.431, + "step": 17844 + }, + { + "epoch": 1.4401581793237028, + "grad_norm": 0.7062112092971802, + "learning_rate": 5.747274250217094e-06, + "loss": 2.3986, + "step": 17845 + }, + { + "epoch": 1.4402388830602857, + "grad_norm": 0.7257757782936096, + "learning_rate": 5.742000560132787e-06, + "loss": 2.398, + "step": 17846 + }, + { + "epoch": 1.4403195867968688, + "grad_norm": 0.7206892371177673, + "learning_rate": 5.736729219204218e-06, + "loss": 2.4126, + "step": 17847 + }, + { + "epoch": 1.4404002905334516, + "grad_norm": 0.6752306818962097, + "learning_rate": 5.73146022756278e-06, + "loss": 2.3732, + "step": 17848 + }, + { + "epoch": 1.4404809942700347, + "grad_norm": 0.6507758498191833, + "learning_rate": 5.726193585339756e-06, + "loss": 2.42, + "step": 17849 + }, + { + "epoch": 1.4405616980066176, + "grad_norm": 0.6858177781105042, + "learning_rate": 5.7209292926664325e-06, + "loss": 2.3956, + "step": 17850 + }, + { + "epoch": 1.4406424017432007, + "grad_norm": 0.7283064723014832, + "learning_rate": 5.715667349674003e-06, + "loss": 2.4295, + "step": 17851 + }, + { + "epoch": 1.4407231054797838, + "grad_norm": 0.7306254506111145, + "learning_rate": 5.710407756493597e-06, + "loss": 2.4017, + "step": 17852 + }, + { + "epoch": 1.4408038092163666, + "grad_norm": 0.6728531122207642, + "learning_rate": 5.7051505132562965e-06, + "loss": 2.3767, + "step": 17853 + }, + { + "epoch": 1.4408845129529497, + "grad_norm": 0.6739331483840942, + "learning_rate": 5.699895620093143e-06, + "loss": 2.4215, + "step": 17854 + }, + { + "epoch": 1.4409652166895328, + "grad_norm": 0.6646329760551453, + "learning_rate": 5.6946430771350975e-06, + "loss": 2.3565, + "step": 17855 + }, + { + "epoch": 1.4410459204261157, + "grad_norm": 0.7297715544700623, + "learning_rate": 5.6893928845130565e-06, + "loss": 2.4182, + "step": 17856 + }, + { + "epoch": 1.4411266241626988, + "grad_norm": 0.7202762961387634, + "learning_rate": 5.684145042357891e-06, + "loss": 2.4061, + "step": 17857 + }, + { + "epoch": 1.4412073278992819, + "grad_norm": 0.6860011219978333, + "learning_rate": 5.678899550800354e-06, + "loss": 2.4116, + "step": 17858 + }, + { + "epoch": 1.4412880316358647, + "grad_norm": 0.8249632120132446, + "learning_rate": 5.6736564099712064e-06, + "loss": 2.44, + "step": 17859 + }, + { + "epoch": 1.4413687353724478, + "grad_norm": 0.6403428912162781, + "learning_rate": 5.668415620001111e-06, + "loss": 2.4067, + "step": 17860 + }, + { + "epoch": 1.4414494391090307, + "grad_norm": 0.7119578123092651, + "learning_rate": 5.663177181020696e-06, + "loss": 2.4161, + "step": 17861 + }, + { + "epoch": 1.4415301428456138, + "grad_norm": 0.6670625805854797, + "learning_rate": 5.65794109316049e-06, + "loss": 2.4548, + "step": 17862 + }, + { + "epoch": 1.4416108465821966, + "grad_norm": 0.7028807997703552, + "learning_rate": 5.652707356551001e-06, + "loss": 2.4008, + "step": 17863 + }, + { + "epoch": 1.4416915503187797, + "grad_norm": 0.7150121331214905, + "learning_rate": 5.64747597132268e-06, + "loss": 2.3776, + "step": 17864 + }, + { + "epoch": 1.4417722540553628, + "grad_norm": 0.6778405904769897, + "learning_rate": 5.642246937605888e-06, + "loss": 2.4485, + "step": 17865 + }, + { + "epoch": 1.4418529577919457, + "grad_norm": 0.7118825316429138, + "learning_rate": 5.637020255530967e-06, + "loss": 2.3808, + "step": 17866 + }, + { + "epoch": 1.4419336615285288, + "grad_norm": 0.7020435929298401, + "learning_rate": 5.631795925228178e-06, + "loss": 2.3947, + "step": 17867 + }, + { + "epoch": 1.4420143652651118, + "grad_norm": 0.6727933287620544, + "learning_rate": 5.626573946827696e-06, + "loss": 2.3789, + "step": 17868 + }, + { + "epoch": 1.4420950690016947, + "grad_norm": 0.7938553690910339, + "learning_rate": 5.621354320459693e-06, + "loss": 2.4262, + "step": 17869 + }, + { + "epoch": 1.4421757727382778, + "grad_norm": 0.6903455853462219, + "learning_rate": 5.616137046254255e-06, + "loss": 2.3382, + "step": 17870 + }, + { + "epoch": 1.4422564764748609, + "grad_norm": 0.6873618960380554, + "learning_rate": 5.6109221243414e-06, + "loss": 2.3795, + "step": 17871 + }, + { + "epoch": 1.4423371802114437, + "grad_norm": 0.667328953742981, + "learning_rate": 5.60570955485109e-06, + "loss": 2.4353, + "step": 17872 + }, + { + "epoch": 1.4424178839480268, + "grad_norm": 0.7091758847236633, + "learning_rate": 5.600499337913256e-06, + "loss": 2.3897, + "step": 17873 + }, + { + "epoch": 1.44249858768461, + "grad_norm": 0.6954033374786377, + "learning_rate": 5.5952914736577375e-06, + "loss": 2.4334, + "step": 17874 + }, + { + "epoch": 1.4425792914211928, + "grad_norm": 0.692724347114563, + "learning_rate": 5.590085962214331e-06, + "loss": 2.3355, + "step": 17875 + }, + { + "epoch": 1.4426599951577759, + "grad_norm": 0.7159389853477478, + "learning_rate": 5.584882803712777e-06, + "loss": 2.4425, + "step": 17876 + }, + { + "epoch": 1.4427406988943587, + "grad_norm": 0.7154572606086731, + "learning_rate": 5.579681998282759e-06, + "loss": 2.4353, + "step": 17877 + }, + { + "epoch": 1.4428214026309418, + "grad_norm": 0.6575120687484741, + "learning_rate": 5.574483546053866e-06, + "loss": 2.4038, + "step": 17878 + }, + { + "epoch": 1.4429021063675247, + "grad_norm": 0.7108171582221985, + "learning_rate": 5.56928744715568e-06, + "loss": 2.3661, + "step": 17879 + }, + { + "epoch": 1.4429828101041078, + "grad_norm": 0.7755489349365234, + "learning_rate": 5.564093701717698e-06, + "loss": 2.4026, + "step": 17880 + }, + { + "epoch": 1.4430635138406909, + "grad_norm": 0.7044881582260132, + "learning_rate": 5.5589023098693625e-06, + "loss": 2.433, + "step": 17881 + }, + { + "epoch": 1.4431442175772737, + "grad_norm": 0.6959014534950256, + "learning_rate": 5.553713271740035e-06, + "loss": 2.3399, + "step": 17882 + }, + { + "epoch": 1.4432249213138568, + "grad_norm": 0.6273486614227295, + "learning_rate": 5.5485265874590685e-06, + "loss": 2.4085, + "step": 17883 + }, + { + "epoch": 1.44330562505044, + "grad_norm": 0.711344301700592, + "learning_rate": 5.5433422571557145e-06, + "loss": 2.5058, + "step": 17884 + }, + { + "epoch": 1.4433863287870228, + "grad_norm": 0.7118481397628784, + "learning_rate": 5.5381602809591815e-06, + "loss": 2.4213, + "step": 17885 + }, + { + "epoch": 1.4434670325236059, + "grad_norm": 0.6486421227455139, + "learning_rate": 5.5329806589986435e-06, + "loss": 2.4225, + "step": 17886 + }, + { + "epoch": 1.443547736260189, + "grad_norm": 0.6768030524253845, + "learning_rate": 5.527803391403141e-06, + "loss": 2.4155, + "step": 17887 + }, + { + "epoch": 1.4436284399967718, + "grad_norm": 0.6921476721763611, + "learning_rate": 5.522628478301739e-06, + "loss": 2.4487, + "step": 17888 + }, + { + "epoch": 1.443709143733355, + "grad_norm": 0.6598425507545471, + "learning_rate": 5.517455919823411e-06, + "loss": 2.3929, + "step": 17889 + }, + { + "epoch": 1.443789847469938, + "grad_norm": 0.6784876585006714, + "learning_rate": 5.512285716097043e-06, + "loss": 2.4357, + "step": 17890 + }, + { + "epoch": 1.4438705512065209, + "grad_norm": 0.6828306913375854, + "learning_rate": 5.507117867251521e-06, + "loss": 2.3931, + "step": 17891 + }, + { + "epoch": 1.443951254943104, + "grad_norm": 0.708244800567627, + "learning_rate": 5.5019523734156195e-06, + "loss": 2.3955, + "step": 17892 + }, + { + "epoch": 1.4440319586796868, + "grad_norm": 0.7499315142631531, + "learning_rate": 5.496789234718081e-06, + "loss": 2.4862, + "step": 17893 + }, + { + "epoch": 1.44411266241627, + "grad_norm": 0.6969838738441467, + "learning_rate": 5.491628451287601e-06, + "loss": 2.4367, + "step": 17894 + }, + { + "epoch": 1.4441933661528528, + "grad_norm": 0.6904775500297546, + "learning_rate": 5.486470023252777e-06, + "loss": 2.4772, + "step": 17895 + }, + { + "epoch": 1.4442740698894359, + "grad_norm": 0.7058213949203491, + "learning_rate": 5.481313950742195e-06, + "loss": 2.4059, + "step": 17896 + }, + { + "epoch": 1.444354773626019, + "grad_norm": 0.6824650764465332, + "learning_rate": 5.4761602338843425e-06, + "loss": 2.4058, + "step": 17897 + }, + { + "epoch": 1.4444354773626018, + "grad_norm": 0.6874315738677979, + "learning_rate": 5.471008872807648e-06, + "loss": 2.4055, + "step": 17898 + }, + { + "epoch": 1.444516181099185, + "grad_norm": 0.7096625566482544, + "learning_rate": 5.465859867640544e-06, + "loss": 2.4319, + "step": 17899 + }, + { + "epoch": 1.444596884835768, + "grad_norm": 0.6456719636917114, + "learning_rate": 5.460713218511304e-06, + "loss": 2.3403, + "step": 17900 + }, + { + "epoch": 1.4446775885723508, + "grad_norm": 0.6711640357971191, + "learning_rate": 5.4555689255482156e-06, + "loss": 2.4333, + "step": 17901 + }, + { + "epoch": 1.444758292308934, + "grad_norm": 0.6594802737236023, + "learning_rate": 5.450426988879509e-06, + "loss": 2.4027, + "step": 17902 + }, + { + "epoch": 1.444838996045517, + "grad_norm": 0.6931496858596802, + "learning_rate": 5.445287408633304e-06, + "loss": 2.4085, + "step": 17903 + }, + { + "epoch": 1.4449196997820999, + "grad_norm": 0.6932462453842163, + "learning_rate": 5.440150184937709e-06, + "loss": 2.3989, + "step": 17904 + }, + { + "epoch": 1.445000403518683, + "grad_norm": 0.7502899765968323, + "learning_rate": 5.435015317920744e-06, + "loss": 2.4083, + "step": 17905 + }, + { + "epoch": 1.4450811072552658, + "grad_norm": 0.6513844132423401, + "learning_rate": 5.429882807710396e-06, + "loss": 2.3895, + "step": 17906 + }, + { + "epoch": 1.445161810991849, + "grad_norm": 0.6809015274047852, + "learning_rate": 5.4247526544345835e-06, + "loss": 2.3957, + "step": 17907 + }, + { + "epoch": 1.4452425147284318, + "grad_norm": 0.6784202456474304, + "learning_rate": 5.419624858221151e-06, + "loss": 2.3735, + "step": 17908 + }, + { + "epoch": 1.4453232184650149, + "grad_norm": 0.8005407452583313, + "learning_rate": 5.414499419197916e-06, + "loss": 2.3888, + "step": 17909 + }, + { + "epoch": 1.445403922201598, + "grad_norm": 0.7133296728134155, + "learning_rate": 5.409376337492589e-06, + "loss": 2.4347, + "step": 17910 + }, + { + "epoch": 1.4454846259381808, + "grad_norm": 0.6852008104324341, + "learning_rate": 5.404255613232867e-06, + "loss": 2.4154, + "step": 17911 + }, + { + "epoch": 1.445565329674764, + "grad_norm": 0.7864294648170471, + "learning_rate": 5.399137246546393e-06, + "loss": 2.4104, + "step": 17912 + }, + { + "epoch": 1.445646033411347, + "grad_norm": 0.7150406837463379, + "learning_rate": 5.394021237560687e-06, + "loss": 2.4423, + "step": 17913 + }, + { + "epoch": 1.4457267371479299, + "grad_norm": 0.6756410598754883, + "learning_rate": 5.388907586403269e-06, + "loss": 2.4038, + "step": 17914 + }, + { + "epoch": 1.445807440884513, + "grad_norm": 0.662440836429596, + "learning_rate": 5.383796293201604e-06, + "loss": 2.3529, + "step": 17915 + }, + { + "epoch": 1.445888144621096, + "grad_norm": 0.7391942739486694, + "learning_rate": 5.378687358083057e-06, + "loss": 2.4062, + "step": 17916 + }, + { + "epoch": 1.445968848357679, + "grad_norm": 0.762143611907959, + "learning_rate": 5.373580781174958e-06, + "loss": 2.4344, + "step": 17917 + }, + { + "epoch": 1.446049552094262, + "grad_norm": 0.7365298867225647, + "learning_rate": 5.368476562604608e-06, + "loss": 2.4144, + "step": 17918 + }, + { + "epoch": 1.446130255830845, + "grad_norm": 0.7313491702079773, + "learning_rate": 5.3633747024991685e-06, + "loss": 2.3671, + "step": 17919 + }, + { + "epoch": 1.446210959567428, + "grad_norm": 0.7121514081954956, + "learning_rate": 5.358275200985818e-06, + "loss": 2.3573, + "step": 17920 + }, + { + "epoch": 1.446291663304011, + "grad_norm": 0.6716858744621277, + "learning_rate": 5.353178058191643e-06, + "loss": 2.4398, + "step": 17921 + }, + { + "epoch": 1.446372367040594, + "grad_norm": 0.7036706805229187, + "learning_rate": 5.348083274243687e-06, + "loss": 2.3913, + "step": 17922 + }, + { + "epoch": 1.446453070777177, + "grad_norm": 0.7855868935585022, + "learning_rate": 5.342990849268914e-06, + "loss": 2.4195, + "step": 17923 + }, + { + "epoch": 1.4465337745137599, + "grad_norm": 0.627890408039093, + "learning_rate": 5.337900783394245e-06, + "loss": 2.3954, + "step": 17924 + }, + { + "epoch": 1.446614478250343, + "grad_norm": 0.7047661542892456, + "learning_rate": 5.332813076746535e-06, + "loss": 2.5015, + "step": 17925 + }, + { + "epoch": 1.446695181986926, + "grad_norm": 0.6752549409866333, + "learning_rate": 5.327727729452592e-06, + "loss": 2.4384, + "step": 17926 + }, + { + "epoch": 1.446775885723509, + "grad_norm": 0.8034621477127075, + "learning_rate": 5.322644741639138e-06, + "loss": 2.444, + "step": 17927 + }, + { + "epoch": 1.446856589460092, + "grad_norm": 0.7055982947349548, + "learning_rate": 5.317564113432882e-06, + "loss": 2.4228, + "step": 17928 + }, + { + "epoch": 1.446937293196675, + "grad_norm": 0.7311068177223206, + "learning_rate": 5.312485844960424e-06, + "loss": 2.3979, + "step": 17929 + }, + { + "epoch": 1.447017996933258, + "grad_norm": 0.7067704796791077, + "learning_rate": 5.307409936348329e-06, + "loss": 2.3724, + "step": 17930 + }, + { + "epoch": 1.447098700669841, + "grad_norm": 0.7303062677383423, + "learning_rate": 5.302336387723128e-06, + "loss": 2.444, + "step": 17931 + }, + { + "epoch": 1.4471794044064241, + "grad_norm": 0.7445392608642578, + "learning_rate": 5.297265199211232e-06, + "loss": 2.4629, + "step": 17932 + }, + { + "epoch": 1.447260108143007, + "grad_norm": 0.6778857707977295, + "learning_rate": 5.2921963709390394e-06, + "loss": 2.3836, + "step": 17933 + }, + { + "epoch": 1.44734081187959, + "grad_norm": 0.6575925350189209, + "learning_rate": 5.287129903032873e-06, + "loss": 2.3851, + "step": 17934 + }, + { + "epoch": 1.4474215156161732, + "grad_norm": 0.736710250377655, + "learning_rate": 5.282065795619029e-06, + "loss": 2.4644, + "step": 17935 + }, + { + "epoch": 1.447502219352756, + "grad_norm": 0.6607224941253662, + "learning_rate": 5.277004048823686e-06, + "loss": 2.3838, + "step": 17936 + }, + { + "epoch": 1.4475829230893391, + "grad_norm": 0.6364536881446838, + "learning_rate": 5.271944662773021e-06, + "loss": 2.3929, + "step": 17937 + }, + { + "epoch": 1.447663626825922, + "grad_norm": 0.7810595631599426, + "learning_rate": 5.266887637593121e-06, + "loss": 2.3823, + "step": 17938 + }, + { + "epoch": 1.447744330562505, + "grad_norm": 0.6959996819496155, + "learning_rate": 5.261832973410008e-06, + "loss": 2.4392, + "step": 17939 + }, + { + "epoch": 1.447825034299088, + "grad_norm": 0.7112187147140503, + "learning_rate": 5.256780670349659e-06, + "loss": 2.356, + "step": 17940 + }, + { + "epoch": 1.447905738035671, + "grad_norm": 0.7003504633903503, + "learning_rate": 5.251730728538018e-06, + "loss": 2.4182, + "step": 17941 + }, + { + "epoch": 1.447986441772254, + "grad_norm": 0.7685346603393555, + "learning_rate": 5.246683148100906e-06, + "loss": 2.3814, + "step": 17942 + }, + { + "epoch": 1.448067145508837, + "grad_norm": 0.6874574422836304, + "learning_rate": 5.2416379291641336e-06, + "loss": 2.5082, + "step": 17943 + }, + { + "epoch": 1.44814784924542, + "grad_norm": 0.6901064515113831, + "learning_rate": 5.236595071853456e-06, + "loss": 2.484, + "step": 17944 + }, + { + "epoch": 1.4482285529820031, + "grad_norm": 0.7325465083122253, + "learning_rate": 5.231554576294528e-06, + "loss": 2.3479, + "step": 17945 + }, + { + "epoch": 1.448309256718586, + "grad_norm": 0.6547845005989075, + "learning_rate": 5.226516442612994e-06, + "loss": 2.4001, + "step": 17946 + }, + { + "epoch": 1.448389960455169, + "grad_norm": 0.7091573476791382, + "learning_rate": 5.221480670934431e-06, + "loss": 2.3743, + "step": 17947 + }, + { + "epoch": 1.4484706641917522, + "grad_norm": 0.6750717163085938, + "learning_rate": 5.216447261384306e-06, + "loss": 2.3841, + "step": 17948 + }, + { + "epoch": 1.448551367928335, + "grad_norm": 0.682778537273407, + "learning_rate": 5.2114162140880715e-06, + "loss": 2.3735, + "step": 17949 + }, + { + "epoch": 1.4486320716649181, + "grad_norm": 0.702796995639801, + "learning_rate": 5.206387529171153e-06, + "loss": 2.397, + "step": 17950 + }, + { + "epoch": 1.448712775401501, + "grad_norm": 0.7154842615127563, + "learning_rate": 5.2013612067588254e-06, + "loss": 2.4072, + "step": 17951 + }, + { + "epoch": 1.448793479138084, + "grad_norm": 0.7017061710357666, + "learning_rate": 5.1963372469763905e-06, + "loss": 2.3638, + "step": 17952 + }, + { + "epoch": 1.448874182874667, + "grad_norm": 0.7153539657592773, + "learning_rate": 5.191315649949047e-06, + "loss": 2.4159, + "step": 17953 + }, + { + "epoch": 1.44895488661125, + "grad_norm": 0.7425200939178467, + "learning_rate": 5.1862964158019615e-06, + "loss": 2.3536, + "step": 17954 + }, + { + "epoch": 1.4490355903478331, + "grad_norm": 0.6961267590522766, + "learning_rate": 5.1812795446602115e-06, + "loss": 2.4257, + "step": 17955 + }, + { + "epoch": 1.449116294084416, + "grad_norm": 0.6912462115287781, + "learning_rate": 5.176265036648808e-06, + "loss": 2.4573, + "step": 17956 + }, + { + "epoch": 1.449196997820999, + "grad_norm": 0.7435596585273743, + "learning_rate": 5.171252891892786e-06, + "loss": 2.4134, + "step": 17957 + }, + { + "epoch": 1.4492777015575822, + "grad_norm": 0.7270591259002686, + "learning_rate": 5.166243110517011e-06, + "loss": 2.3162, + "step": 17958 + }, + { + "epoch": 1.449358405294165, + "grad_norm": 0.6728709936141968, + "learning_rate": 5.161235692646349e-06, + "loss": 2.3991, + "step": 17959 + }, + { + "epoch": 1.4494391090307481, + "grad_norm": 0.6676486134529114, + "learning_rate": 5.156230638405624e-06, + "loss": 2.4215, + "step": 17960 + }, + { + "epoch": 1.4495198127673312, + "grad_norm": 0.7242336869239807, + "learning_rate": 5.1512279479195455e-06, + "loss": 2.4144, + "step": 17961 + }, + { + "epoch": 1.449600516503914, + "grad_norm": 0.6936756372451782, + "learning_rate": 5.146227621312804e-06, + "loss": 2.3752, + "step": 17962 + }, + { + "epoch": 1.4496812202404972, + "grad_norm": 0.7574671506881714, + "learning_rate": 5.141229658710034e-06, + "loss": 2.4536, + "step": 17963 + }, + { + "epoch": 1.4497619239770803, + "grad_norm": 0.6585906147956848, + "learning_rate": 5.136234060235767e-06, + "loss": 2.4192, + "step": 17964 + }, + { + "epoch": 1.4498426277136631, + "grad_norm": 0.7344881296157837, + "learning_rate": 5.131240826014516e-06, + "loss": 2.375, + "step": 17965 + }, + { + "epoch": 1.4499233314502462, + "grad_norm": 0.6896358132362366, + "learning_rate": 5.126249956170748e-06, + "loss": 2.3417, + "step": 17966 + }, + { + "epoch": 1.450004035186829, + "grad_norm": 0.7076104283332825, + "learning_rate": 5.1212614508288185e-06, + "loss": 2.4131, + "step": 17967 + }, + { + "epoch": 1.4500847389234122, + "grad_norm": 0.6901896595954895, + "learning_rate": 5.116275310113083e-06, + "loss": 2.4232, + "step": 17968 + }, + { + "epoch": 1.450165442659995, + "grad_norm": 0.7986876964569092, + "learning_rate": 5.111291534147788e-06, + "loss": 2.4545, + "step": 17969 + }, + { + "epoch": 1.4502461463965781, + "grad_norm": 0.723733127117157, + "learning_rate": 5.106310123057167e-06, + "loss": 2.3816, + "step": 17970 + }, + { + "epoch": 1.4503268501331612, + "grad_norm": 0.6440990567207336, + "learning_rate": 5.101331076965332e-06, + "loss": 2.3819, + "step": 17971 + }, + { + "epoch": 1.450407553869744, + "grad_norm": 0.718396782875061, + "learning_rate": 5.096354395996405e-06, + "loss": 2.406, + "step": 17972 + }, + { + "epoch": 1.4504882576063272, + "grad_norm": 0.6515427231788635, + "learning_rate": 5.0913800802744105e-06, + "loss": 2.4555, + "step": 17973 + }, + { + "epoch": 1.4505689613429102, + "grad_norm": 0.7006518244743347, + "learning_rate": 5.0864081299233035e-06, + "loss": 2.3532, + "step": 17974 + }, + { + "epoch": 1.4506496650794931, + "grad_norm": 0.6596084237098694, + "learning_rate": 5.081438545067019e-06, + "loss": 2.3521, + "step": 17975 + }, + { + "epoch": 1.4507303688160762, + "grad_norm": 0.7091804146766663, + "learning_rate": 5.076471325829413e-06, + "loss": 2.397, + "step": 17976 + }, + { + "epoch": 1.4508110725526593, + "grad_norm": 0.6768068671226501, + "learning_rate": 5.071506472334264e-06, + "loss": 2.3692, + "step": 17977 + }, + { + "epoch": 1.4508917762892422, + "grad_norm": 0.6937921643257141, + "learning_rate": 5.066543984705318e-06, + "loss": 2.4674, + "step": 17978 + }, + { + "epoch": 1.4509724800258252, + "grad_norm": 0.6987953186035156, + "learning_rate": 5.061583863066266e-06, + "loss": 2.388, + "step": 17979 + }, + { + "epoch": 1.4510531837624083, + "grad_norm": 0.7390346527099609, + "learning_rate": 5.056626107540708e-06, + "loss": 2.4279, + "step": 17980 + }, + { + "epoch": 1.4511338874989912, + "grad_norm": 0.6433011889457703, + "learning_rate": 5.05167071825221e-06, + "loss": 2.3897, + "step": 17981 + }, + { + "epoch": 1.4512145912355743, + "grad_norm": 0.6530279517173767, + "learning_rate": 5.046717695324288e-06, + "loss": 2.3794, + "step": 17982 + }, + { + "epoch": 1.4512952949721571, + "grad_norm": 0.7322575449943542, + "learning_rate": 5.041767038880363e-06, + "loss": 2.3391, + "step": 17983 + }, + { + "epoch": 1.4513759987087402, + "grad_norm": 0.7013799548149109, + "learning_rate": 5.036818749043825e-06, + "loss": 2.417, + "step": 17984 + }, + { + "epoch": 1.451456702445323, + "grad_norm": 0.6833368539810181, + "learning_rate": 5.031872825937989e-06, + "loss": 2.4109, + "step": 17985 + }, + { + "epoch": 1.4515374061819062, + "grad_norm": 0.6758227348327637, + "learning_rate": 5.026929269686143e-06, + "loss": 2.3913, + "step": 17986 + }, + { + "epoch": 1.4516181099184893, + "grad_norm": 0.6799556016921997, + "learning_rate": 5.021988080411477e-06, + "loss": 2.3963, + "step": 17987 + }, + { + "epoch": 1.4516988136550721, + "grad_norm": 0.670512318611145, + "learning_rate": 5.01704925823715e-06, + "loss": 2.4372, + "step": 17988 + }, + { + "epoch": 1.4517795173916552, + "grad_norm": 0.7226561903953552, + "learning_rate": 5.01211280328625e-06, + "loss": 2.3723, + "step": 17989 + }, + { + "epoch": 1.4518602211282383, + "grad_norm": 0.7119970917701721, + "learning_rate": 5.007178715681793e-06, + "loss": 2.454, + "step": 17990 + }, + { + "epoch": 1.4519409248648212, + "grad_norm": 0.670310378074646, + "learning_rate": 5.002246995546744e-06, + "loss": 2.4751, + "step": 17991 + }, + { + "epoch": 1.4520216286014043, + "grad_norm": 0.6663460731506348, + "learning_rate": 4.9973176430040515e-06, + "loss": 2.4779, + "step": 17992 + }, + { + "epoch": 1.4521023323379874, + "grad_norm": 0.72465980052948, + "learning_rate": 4.992390658176526e-06, + "loss": 2.429, + "step": 17993 + }, + { + "epoch": 1.4521830360745702, + "grad_norm": 0.7189087867736816, + "learning_rate": 4.987466041186972e-06, + "loss": 2.4086, + "step": 17994 + }, + { + "epoch": 1.4522637398111533, + "grad_norm": 0.6699924468994141, + "learning_rate": 4.982543792158134e-06, + "loss": 2.3932, + "step": 17995 + }, + { + "epoch": 1.4523444435477364, + "grad_norm": 0.6420440077781677, + "learning_rate": 4.977623911212681e-06, + "loss": 2.4164, + "step": 17996 + }, + { + "epoch": 1.4524251472843193, + "grad_norm": 0.6452329754829407, + "learning_rate": 4.972706398473237e-06, + "loss": 2.3391, + "step": 17997 + }, + { + "epoch": 1.4525058510209023, + "grad_norm": 0.6906129121780396, + "learning_rate": 4.967791254062359e-06, + "loss": 2.4345, + "step": 17998 + }, + { + "epoch": 1.4525865547574852, + "grad_norm": 0.6918602585792542, + "learning_rate": 4.96287847810254e-06, + "loss": 2.3304, + "step": 17999 + }, + { + "epoch": 1.4526672584940683, + "grad_norm": 0.727873682975769, + "learning_rate": 4.957968070716201e-06, + "loss": 2.417, + "step": 18000 + }, + { + "epoch": 1.4526672584940683, + "eval_loss": 2.3678998947143555, + "eval_runtime": 764.534, + "eval_samples_per_second": 3.427, + "eval_steps_per_second": 0.572, + "step": 18000 + }, + { + "epoch": 1.4527479622306512, + "grad_norm": 0.6551083922386169, + "learning_rate": 4.953060032025747e-06, + "loss": 2.3777, + "step": 18001 + }, + { + "epoch": 1.4528286659672343, + "grad_norm": 0.6975324153900146, + "learning_rate": 4.948154362153512e-06, + "loss": 2.4277, + "step": 18002 + }, + { + "epoch": 1.4529093697038173, + "grad_norm": 0.6673024892807007, + "learning_rate": 4.943251061221721e-06, + "loss": 2.3652, + "step": 18003 + }, + { + "epoch": 1.4529900734404002, + "grad_norm": 0.713287889957428, + "learning_rate": 4.938350129352587e-06, + "loss": 2.3868, + "step": 18004 + }, + { + "epoch": 1.4530707771769833, + "grad_norm": 0.6872570514678955, + "learning_rate": 4.9334515666682905e-06, + "loss": 2.3639, + "step": 18005 + }, + { + "epoch": 1.4531514809135664, + "grad_norm": 0.7270746827125549, + "learning_rate": 4.928555373290844e-06, + "loss": 2.4394, + "step": 18006 + }, + { + "epoch": 1.4532321846501493, + "grad_norm": 0.7313820123672485, + "learning_rate": 4.9236615493423395e-06, + "loss": 2.4312, + "step": 18007 + }, + { + "epoch": 1.4533128883867323, + "grad_norm": 0.7104899287223816, + "learning_rate": 4.918770094944736e-06, + "loss": 2.4121, + "step": 18008 + }, + { + "epoch": 1.4533935921233154, + "grad_norm": 0.6785389184951782, + "learning_rate": 4.913881010219912e-06, + "loss": 2.4871, + "step": 18009 + }, + { + "epoch": 1.4534742958598983, + "grad_norm": 0.71209716796875, + "learning_rate": 4.908994295289726e-06, + "loss": 2.4822, + "step": 18010 + }, + { + "epoch": 1.4535549995964814, + "grad_norm": 0.7160407900810242, + "learning_rate": 4.904109950275992e-06, + "loss": 2.4656, + "step": 18011 + }, + { + "epoch": 1.4536357033330642, + "grad_norm": 0.7023136615753174, + "learning_rate": 4.899227975300402e-06, + "loss": 2.4387, + "step": 18012 + }, + { + "epoch": 1.4537164070696473, + "grad_norm": 0.7554822564125061, + "learning_rate": 4.8943483704846475e-06, + "loss": 2.4355, + "step": 18013 + }, + { + "epoch": 1.4537971108062302, + "grad_norm": 0.685516893863678, + "learning_rate": 4.889471135950352e-06, + "loss": 2.4362, + "step": 18014 + }, + { + "epoch": 1.4538778145428133, + "grad_norm": 0.6651094555854797, + "learning_rate": 4.884596271819053e-06, + "loss": 2.4479, + "step": 18015 + }, + { + "epoch": 1.4539585182793964, + "grad_norm": 0.7710262537002563, + "learning_rate": 4.879723778212242e-06, + "loss": 2.4509, + "step": 18016 + }, + { + "epoch": 1.4540392220159792, + "grad_norm": 0.7243364453315735, + "learning_rate": 4.874853655251365e-06, + "loss": 2.4253, + "step": 18017 + }, + { + "epoch": 1.4541199257525623, + "grad_norm": 0.7639968395233154, + "learning_rate": 4.869985903057783e-06, + "loss": 2.3748, + "step": 18018 + }, + { + "epoch": 1.4542006294891454, + "grad_norm": 0.7307243347167969, + "learning_rate": 4.865120521752842e-06, + "loss": 2.4043, + "step": 18019 + }, + { + "epoch": 1.4542813332257283, + "grad_norm": 0.6940774321556091, + "learning_rate": 4.860257511457767e-06, + "loss": 2.3836, + "step": 18020 + }, + { + "epoch": 1.4543620369623114, + "grad_norm": 0.6808940172195435, + "learning_rate": 4.855396872293794e-06, + "loss": 2.4482, + "step": 18021 + }, + { + "epoch": 1.4544427406988945, + "grad_norm": 0.6618911027908325, + "learning_rate": 4.8505386043820265e-06, + "loss": 2.4141, + "step": 18022 + }, + { + "epoch": 1.4545234444354773, + "grad_norm": 0.7657433748245239, + "learning_rate": 4.845682707843569e-06, + "loss": 2.3576, + "step": 18023 + }, + { + "epoch": 1.4546041481720604, + "grad_norm": 0.7346564531326294, + "learning_rate": 4.840829182799434e-06, + "loss": 2.4335, + "step": 18024 + }, + { + "epoch": 1.4546848519086435, + "grad_norm": 0.6671693325042725, + "learning_rate": 4.83597802937058e-06, + "loss": 2.3965, + "step": 18025 + }, + { + "epoch": 1.4547655556452264, + "grad_norm": 0.7164655327796936, + "learning_rate": 4.831129247677913e-06, + "loss": 2.3631, + "step": 18026 + }, + { + "epoch": 1.4548462593818094, + "grad_norm": 0.6799946427345276, + "learning_rate": 4.826282837842278e-06, + "loss": 2.4018, + "step": 18027 + }, + { + "epoch": 1.4549269631183923, + "grad_norm": 0.6891220211982727, + "learning_rate": 4.821438799984457e-06, + "loss": 2.3942, + "step": 18028 + }, + { + "epoch": 1.4550076668549754, + "grad_norm": 0.6948480010032654, + "learning_rate": 4.816597134225187e-06, + "loss": 2.4359, + "step": 18029 + }, + { + "epoch": 1.4550883705915583, + "grad_norm": 0.7973241209983826, + "learning_rate": 4.8117578406851385e-06, + "loss": 2.4464, + "step": 18030 + }, + { + "epoch": 1.4551690743281414, + "grad_norm": 0.7553974390029907, + "learning_rate": 4.806920919484903e-06, + "loss": 2.3943, + "step": 18031 + }, + { + "epoch": 1.4552497780647244, + "grad_norm": 0.6626315116882324, + "learning_rate": 4.8020863707450185e-06, + "loss": 2.3603, + "step": 18032 + }, + { + "epoch": 1.4553304818013073, + "grad_norm": 0.6878045797348022, + "learning_rate": 4.79725419458601e-06, + "loss": 2.4646, + "step": 18033 + }, + { + "epoch": 1.4554111855378904, + "grad_norm": 0.7127307057380676, + "learning_rate": 4.792424391128292e-06, + "loss": 2.3914, + "step": 18034 + }, + { + "epoch": 1.4554918892744735, + "grad_norm": 0.6839823722839355, + "learning_rate": 4.787596960492224e-06, + "loss": 2.4282, + "step": 18035 + }, + { + "epoch": 1.4555725930110563, + "grad_norm": 0.6685464978218079, + "learning_rate": 4.782771902798122e-06, + "loss": 2.427, + "step": 18036 + }, + { + "epoch": 1.4556532967476394, + "grad_norm": 0.7302927374839783, + "learning_rate": 4.777949218166256e-06, + "loss": 2.4019, + "step": 18037 + }, + { + "epoch": 1.4557340004842225, + "grad_norm": 0.6756429672241211, + "learning_rate": 4.773128906716795e-06, + "loss": 2.4271, + "step": 18038 + }, + { + "epoch": 1.4558147042208054, + "grad_norm": 0.6744102835655212, + "learning_rate": 4.768310968569889e-06, + "loss": 2.4165, + "step": 18039 + }, + { + "epoch": 1.4558954079573885, + "grad_norm": 0.7034773826599121, + "learning_rate": 4.76349540384563e-06, + "loss": 2.4079, + "step": 18040 + }, + { + "epoch": 1.4559761116939716, + "grad_norm": 0.6483279466629028, + "learning_rate": 4.758682212664012e-06, + "loss": 2.3873, + "step": 18041 + }, + { + "epoch": 1.4560568154305544, + "grad_norm": 0.6655837893486023, + "learning_rate": 4.753871395144982e-06, + "loss": 2.4022, + "step": 18042 + }, + { + "epoch": 1.4561375191671375, + "grad_norm": 0.7327212691307068, + "learning_rate": 4.749062951408467e-06, + "loss": 2.4068, + "step": 18043 + }, + { + "epoch": 1.4562182229037204, + "grad_norm": 0.6827791333198547, + "learning_rate": 4.744256881574283e-06, + "loss": 2.4941, + "step": 18044 + }, + { + "epoch": 1.4562989266403035, + "grad_norm": 0.7078829407691956, + "learning_rate": 4.739453185762221e-06, + "loss": 2.4065, + "step": 18045 + }, + { + "epoch": 1.4563796303768863, + "grad_norm": 0.7201517820358276, + "learning_rate": 4.734651864091999e-06, + "loss": 2.3617, + "step": 18046 + }, + { + "epoch": 1.4564603341134694, + "grad_norm": 0.6765565872192383, + "learning_rate": 4.729852916683275e-06, + "loss": 2.4026, + "step": 18047 + }, + { + "epoch": 1.4565410378500525, + "grad_norm": 0.6781981587409973, + "learning_rate": 4.725056343655654e-06, + "loss": 2.4638, + "step": 18048 + }, + { + "epoch": 1.4566217415866354, + "grad_norm": 0.7230713367462158, + "learning_rate": 4.720262145128684e-06, + "loss": 2.382, + "step": 18049 + }, + { + "epoch": 1.4567024453232185, + "grad_norm": 0.918341338634491, + "learning_rate": 4.71547032122186e-06, + "loss": 2.447, + "step": 18050 + }, + { + "epoch": 1.4567831490598016, + "grad_norm": 0.683489978313446, + "learning_rate": 4.710680872054574e-06, + "loss": 2.4175, + "step": 18051 + }, + { + "epoch": 1.4568638527963844, + "grad_norm": 0.6769242882728577, + "learning_rate": 4.7058937977462085e-06, + "loss": 2.4192, + "step": 18052 + }, + { + "epoch": 1.4569445565329675, + "grad_norm": 0.681427001953125, + "learning_rate": 4.701109098416079e-06, + "loss": 2.4194, + "step": 18053 + }, + { + "epoch": 1.4570252602695506, + "grad_norm": 0.8209199905395508, + "learning_rate": 4.6963267741834235e-06, + "loss": 2.4703, + "step": 18054 + }, + { + "epoch": 1.4571059640061335, + "grad_norm": 0.6629942059516907, + "learning_rate": 4.691546825167425e-06, + "loss": 2.4278, + "step": 18055 + }, + { + "epoch": 1.4571866677427165, + "grad_norm": 0.6706543564796448, + "learning_rate": 4.686769251487233e-06, + "loss": 2.4137, + "step": 18056 + }, + { + "epoch": 1.4572673714792994, + "grad_norm": 0.6950179934501648, + "learning_rate": 4.6819940532618735e-06, + "loss": 2.4491, + "step": 18057 + }, + { + "epoch": 1.4573480752158825, + "grad_norm": 0.6982719898223877, + "learning_rate": 4.677221230610407e-06, + "loss": 2.3487, + "step": 18058 + }, + { + "epoch": 1.4574287789524654, + "grad_norm": 0.7230788469314575, + "learning_rate": 4.672450783651772e-06, + "loss": 2.433, + "step": 18059 + }, + { + "epoch": 1.4575094826890485, + "grad_norm": 0.6349153518676758, + "learning_rate": 4.6676827125048394e-06, + "loss": 2.4531, + "step": 18060 + }, + { + "epoch": 1.4575901864256315, + "grad_norm": 0.6164267659187317, + "learning_rate": 4.662917017288449e-06, + "loss": 2.3774, + "step": 18061 + }, + { + "epoch": 1.4576708901622144, + "grad_norm": 0.660593569278717, + "learning_rate": 4.658153698121382e-06, + "loss": 2.4419, + "step": 18062 + }, + { + "epoch": 1.4577515938987975, + "grad_norm": 0.7083500027656555, + "learning_rate": 4.653392755122365e-06, + "loss": 2.453, + "step": 18063 + }, + { + "epoch": 1.4578322976353806, + "grad_norm": 0.6704061627388, + "learning_rate": 4.648634188410028e-06, + "loss": 2.3893, + "step": 18064 + }, + { + "epoch": 1.4579130013719634, + "grad_norm": 0.6892523765563965, + "learning_rate": 4.643877998102985e-06, + "loss": 2.344, + "step": 18065 + }, + { + "epoch": 1.4579937051085465, + "grad_norm": NaN, + "learning_rate": 4.643877998102985e-06, + "loss": 2.4214, + "step": 18066 + }, + { + "epoch": 1.4580744088451296, + "grad_norm": 0.6861626505851746, + "learning_rate": 4.639124184319765e-06, + "loss": 2.4126, + "step": 18067 + }, + { + "epoch": 1.4581551125817125, + "grad_norm": 0.7208431363105774, + "learning_rate": 4.63437274717885e-06, + "loss": 2.4176, + "step": 18068 + }, + { + "epoch": 1.4582358163182956, + "grad_norm": 0.692640483379364, + "learning_rate": 4.629623686798623e-06, + "loss": 2.4041, + "step": 18069 + }, + { + "epoch": 1.4583165200548787, + "grad_norm": 0.7293663620948792, + "learning_rate": 4.624877003297512e-06, + "loss": 2.4739, + "step": 18070 + }, + { + "epoch": 1.4583972237914615, + "grad_norm": 0.7625227570533752, + "learning_rate": 4.6201326967937665e-06, + "loss": 2.44, + "step": 18071 + }, + { + "epoch": 1.4584779275280446, + "grad_norm": 0.6759201884269714, + "learning_rate": 4.615390767405636e-06, + "loss": 2.4204, + "step": 18072 + }, + { + "epoch": 1.4585586312646275, + "grad_norm": 0.6490656137466431, + "learning_rate": 4.610651215251316e-06, + "loss": 2.3858, + "step": 18073 + }, + { + "epoch": 1.4586393350012106, + "grad_norm": 0.7280056476593018, + "learning_rate": 4.605914040448911e-06, + "loss": 2.4262, + "step": 18074 + }, + { + "epoch": 1.4587200387377934, + "grad_norm": 0.78135746717453, + "learning_rate": 4.6011792431164826e-06, + "loss": 2.4533, + "step": 18075 + }, + { + "epoch": 1.4588007424743765, + "grad_norm": 0.7509358525276184, + "learning_rate": 4.596446823372058e-06, + "loss": 2.4183, + "step": 18076 + }, + { + "epoch": 1.4588814462109596, + "grad_norm": 0.7389116883277893, + "learning_rate": 4.591716781333555e-06, + "loss": 2.4201, + "step": 18077 + }, + { + "epoch": 1.4589621499475425, + "grad_norm": 0.7294317483901978, + "learning_rate": 4.586989117118867e-06, + "loss": 2.4412, + "step": 18078 + }, + { + "epoch": 1.4590428536841256, + "grad_norm": 0.8043732047080994, + "learning_rate": 4.582263830845834e-06, + "loss": 2.4385, + "step": 18079 + }, + { + "epoch": 1.4591235574207087, + "grad_norm": 0.6626152396202087, + "learning_rate": 4.5775409226321955e-06, + "loss": 2.3706, + "step": 18080 + }, + { + "epoch": 1.4592042611572915, + "grad_norm": 0.7048769593238831, + "learning_rate": 4.572820392595678e-06, + "loss": 2.3855, + "step": 18081 + }, + { + "epoch": 1.4592849648938746, + "grad_norm": 0.6663374304771423, + "learning_rate": 4.568102240853933e-06, + "loss": 2.4205, + "step": 18082 + }, + { + "epoch": 1.4593656686304577, + "grad_norm": 0.7204031944274902, + "learning_rate": 4.563386467524544e-06, + "loss": 2.4484, + "step": 18083 + }, + { + "epoch": 1.4594463723670406, + "grad_norm": 0.7225900888442993, + "learning_rate": 4.55867307272504e-06, + "loss": 2.3677, + "step": 18084 + }, + { + "epoch": 1.4595270761036236, + "grad_norm": 0.7384055852890015, + "learning_rate": 4.55396205657288e-06, + "loss": 2.414, + "step": 18085 + }, + { + "epoch": 1.4596077798402067, + "grad_norm": 0.7159018516540527, + "learning_rate": 4.5492534191854955e-06, + "loss": 2.4265, + "step": 18086 + }, + { + "epoch": 1.4596884835767896, + "grad_norm": 0.7001106142997742, + "learning_rate": 4.544547160680213e-06, + "loss": 2.407, + "step": 18087 + }, + { + "epoch": 1.4597691873133727, + "grad_norm": 0.7521629929542542, + "learning_rate": 4.539843281174339e-06, + "loss": 2.42, + "step": 18088 + }, + { + "epoch": 1.4598498910499556, + "grad_norm": 0.6956350207328796, + "learning_rate": 4.535141780785102e-06, + "loss": 2.4639, + "step": 18089 + }, + { + "epoch": 1.4599305947865386, + "grad_norm": 0.7860763072967529, + "learning_rate": 4.530442659629686e-06, + "loss": 2.3979, + "step": 18090 + }, + { + "epoch": 1.4600112985231215, + "grad_norm": 0.69307541847229, + "learning_rate": 4.5257459178251974e-06, + "loss": 2.3511, + "step": 18091 + }, + { + "epoch": 1.4600920022597046, + "grad_norm": 0.6837919354438782, + "learning_rate": 4.521051555488709e-06, + "loss": 2.3985, + "step": 18092 + }, + { + "epoch": 1.4601727059962877, + "grad_norm": 0.7990331053733826, + "learning_rate": 4.516359572737183e-06, + "loss": 2.4066, + "step": 18093 + }, + { + "epoch": 1.4602534097328705, + "grad_norm": 0.6431984901428223, + "learning_rate": 4.511669969687571e-06, + "loss": 2.4111, + "step": 18094 + }, + { + "epoch": 1.4603341134694536, + "grad_norm": 0.6853081583976746, + "learning_rate": 4.506982746456756e-06, + "loss": 2.3837, + "step": 18095 + }, + { + "epoch": 1.4604148172060367, + "grad_norm": 0.6754196882247925, + "learning_rate": 4.502297903161568e-06, + "loss": 2.357, + "step": 18096 + }, + { + "epoch": 1.4604955209426196, + "grad_norm": 0.7235881686210632, + "learning_rate": 4.497615439918734e-06, + "loss": 2.4749, + "step": 18097 + }, + { + "epoch": 1.4605762246792027, + "grad_norm": 0.7340710163116455, + "learning_rate": 4.4929353568449735e-06, + "loss": 2.4776, + "step": 18098 + }, + { + "epoch": 1.4606569284157858, + "grad_norm": 0.7013822793960571, + "learning_rate": 4.488257654056915e-06, + "loss": 2.4716, + "step": 18099 + }, + { + "epoch": 1.4607376321523686, + "grad_norm": 0.7052991986274719, + "learning_rate": 4.483582331671143e-06, + "loss": 2.4296, + "step": 18100 + }, + { + "epoch": 1.4608183358889517, + "grad_norm": 0.710962176322937, + "learning_rate": 4.478909389804187e-06, + "loss": 2.401, + "step": 18101 + }, + { + "epoch": 1.4608990396255348, + "grad_norm": 0.670494019985199, + "learning_rate": 4.474238828572519e-06, + "loss": 2.4259, + "step": 18102 + }, + { + "epoch": 1.4609797433621177, + "grad_norm": 0.7328322529792786, + "learning_rate": 4.4695706480925136e-06, + "loss": 2.4196, + "step": 18103 + }, + { + "epoch": 1.4610604470987005, + "grad_norm": 0.6856482028961182, + "learning_rate": 4.464904848480523e-06, + "loss": 2.3896, + "step": 18104 + }, + { + "epoch": 1.4611411508352836, + "grad_norm": 0.6747605204582214, + "learning_rate": 4.4602414298528405e-06, + "loss": 2.3924, + "step": 18105 + }, + { + "epoch": 1.4612218545718667, + "grad_norm": 0.7371439337730408, + "learning_rate": 4.455580392325687e-06, + "loss": 2.3831, + "step": 18106 + }, + { + "epoch": 1.4613025583084496, + "grad_norm": 0.6863524317741394, + "learning_rate": 4.450921736015212e-06, + "loss": 2.4224, + "step": 18107 + }, + { + "epoch": 1.4613832620450327, + "grad_norm": 0.6699609160423279, + "learning_rate": 4.4462654610375465e-06, + "loss": 2.4119, + "step": 18108 + }, + { + "epoch": 1.4614639657816157, + "grad_norm": 0.6912252306938171, + "learning_rate": 4.441611567508719e-06, + "loss": 2.3899, + "step": 18109 + }, + { + "epoch": 1.4615446695181986, + "grad_norm": 0.7110146284103394, + "learning_rate": 4.436960055544726e-06, + "loss": 2.4768, + "step": 18110 + }, + { + "epoch": 1.4616253732547817, + "grad_norm": 0.7201465368270874, + "learning_rate": 4.432310925261496e-06, + "loss": 2.3887, + "step": 18111 + }, + { + "epoch": 1.4617060769913648, + "grad_norm": 0.6860183477401733, + "learning_rate": 4.4276641767749035e-06, + "loss": 2.37, + "step": 18112 + }, + { + "epoch": 1.4617867807279477, + "grad_norm": 0.6903096437454224, + "learning_rate": 4.4230198102007344e-06, + "loss": 2.4226, + "step": 18113 + }, + { + "epoch": 1.4618674844645307, + "grad_norm": 0.72129225730896, + "learning_rate": 4.418377825654752e-06, + "loss": 2.4313, + "step": 18114 + }, + { + "epoch": 1.4619481882011138, + "grad_norm": 0.686478316783905, + "learning_rate": 4.4137382232526615e-06, + "loss": 2.4766, + "step": 18115 + }, + { + "epoch": 1.4620288919376967, + "grad_norm": 0.69380784034729, + "learning_rate": 4.409101003110061e-06, + "loss": 2.4182, + "step": 18116 + }, + { + "epoch": 1.4621095956742798, + "grad_norm": 0.7099065184593201, + "learning_rate": 4.404466165342547e-06, + "loss": 2.4172, + "step": 18117 + }, + { + "epoch": 1.4621902994108626, + "grad_norm": 0.7571132779121399, + "learning_rate": 4.399833710065637e-06, + "loss": 2.4231, + "step": 18118 + }, + { + "epoch": 1.4622710031474457, + "grad_norm": 0.7232388854026794, + "learning_rate": 4.3952036373947625e-06, + "loss": 2.4282, + "step": 18119 + }, + { + "epoch": 1.4623517068840286, + "grad_norm": 0.6481829881668091, + "learning_rate": 4.390575947445308e-06, + "loss": 2.3985, + "step": 18120 + }, + { + "epoch": 1.4624324106206117, + "grad_norm": 0.6784008741378784, + "learning_rate": 4.385950640332659e-06, + "loss": 2.4314, + "step": 18121 + }, + { + "epoch": 1.4625131143571948, + "grad_norm": 0.6858715415000916, + "learning_rate": 4.381327716172046e-06, + "loss": 2.4649, + "step": 18122 + }, + { + "epoch": 1.4625938180937776, + "grad_norm": 0.6565954089164734, + "learning_rate": 4.376707175078687e-06, + "loss": 2.377, + "step": 18123 + }, + { + "epoch": 1.4626745218303607, + "grad_norm": 0.6645387411117554, + "learning_rate": 4.372089017167769e-06, + "loss": 2.4133, + "step": 18124 + }, + { + "epoch": 1.4627552255669438, + "grad_norm": 0.7109405398368835, + "learning_rate": 4.367473242554343e-06, + "loss": 2.4048, + "step": 18125 + }, + { + "epoch": 1.4628359293035267, + "grad_norm": 0.6737244129180908, + "learning_rate": 4.362859851353473e-06, + "loss": 2.4009, + "step": 18126 + }, + { + "epoch": 1.4629166330401098, + "grad_norm": 0.7147111892700195, + "learning_rate": 4.358248843680135e-06, + "loss": 2.402, + "step": 18127 + }, + { + "epoch": 1.4629973367766929, + "grad_norm": 0.7494312524795532, + "learning_rate": 4.353640219649269e-06, + "loss": 2.3841, + "step": 18128 + }, + { + "epoch": 1.4630780405132757, + "grad_norm": 0.6915758848190308, + "learning_rate": 4.349033979375683e-06, + "loss": 2.4388, + "step": 18129 + }, + { + "epoch": 1.4631587442498588, + "grad_norm": 0.7709435820579529, + "learning_rate": 4.344430122974208e-06, + "loss": 2.5079, + "step": 18130 + }, + { + "epoch": 1.463239447986442, + "grad_norm": 0.6913777589797974, + "learning_rate": 4.3398286505595854e-06, + "loss": 2.4108, + "step": 18131 + }, + { + "epoch": 1.4633201517230248, + "grad_norm": 0.7236559987068176, + "learning_rate": 4.33522956224649e-06, + "loss": 2.4486, + "step": 18132 + }, + { + "epoch": 1.4634008554596079, + "grad_norm": 0.7122974395751953, + "learning_rate": 4.330632858149541e-06, + "loss": 2.377, + "step": 18133 + }, + { + "epoch": 1.4634815591961907, + "grad_norm": 0.713534951210022, + "learning_rate": 4.326038538383315e-06, + "loss": 2.4272, + "step": 18134 + }, + { + "epoch": 1.4635622629327738, + "grad_norm": 0.7163103222846985, + "learning_rate": 4.3214466030622955e-06, + "loss": 2.4787, + "step": 18135 + }, + { + "epoch": 1.4636429666693567, + "grad_norm": 0.6943918466567993, + "learning_rate": 4.316857052300927e-06, + "loss": 2.3893, + "step": 18136 + }, + { + "epoch": 1.4637236704059398, + "grad_norm": 0.6980963945388794, + "learning_rate": 4.312269886213615e-06, + "loss": 2.3745, + "step": 18137 + }, + { + "epoch": 1.4638043741425228, + "grad_norm": 0.6529614925384521, + "learning_rate": 4.3076851049146605e-06, + "loss": 2.4438, + "step": 18138 + }, + { + "epoch": 1.4638850778791057, + "grad_norm": 0.7353845238685608, + "learning_rate": 4.303102708518325e-06, + "loss": 2.4655, + "step": 18139 + }, + { + "epoch": 1.4639657816156888, + "grad_norm": 0.6540514826774597, + "learning_rate": 4.29852269713883e-06, + "loss": 2.389, + "step": 18140 + }, + { + "epoch": 1.4640464853522719, + "grad_norm": 0.6866925954818726, + "learning_rate": 4.293945070890315e-06, + "loss": 2.4197, + "step": 18141 + }, + { + "epoch": 1.4641271890888548, + "grad_norm": 0.701850175857544, + "learning_rate": 4.289369829886869e-06, + "loss": 2.4213, + "step": 18142 + }, + { + "epoch": 1.4642078928254378, + "grad_norm": 0.700334906578064, + "learning_rate": 4.284796974242511e-06, + "loss": 2.3587, + "step": 18143 + }, + { + "epoch": 1.464288596562021, + "grad_norm": 0.7060009241104126, + "learning_rate": 4.2802265040712275e-06, + "loss": 2.4579, + "step": 18144 + }, + { + "epoch": 1.4643693002986038, + "grad_norm": 0.6994202136993408, + "learning_rate": 4.2756584194869055e-06, + "loss": 2.4344, + "step": 18145 + }, + { + "epoch": 1.4644500040351869, + "grad_norm": 0.6504814624786377, + "learning_rate": 4.271092720603409e-06, + "loss": 2.3715, + "step": 18146 + }, + { + "epoch": 1.46453070777177, + "grad_norm": 0.6882978677749634, + "learning_rate": 4.266529407534514e-06, + "loss": 2.4387, + "step": 18147 + }, + { + "epoch": 1.4646114115083528, + "grad_norm": 0.6723669767379761, + "learning_rate": 4.261968480393963e-06, + "loss": 2.4423, + "step": 18148 + }, + { + "epoch": 1.464692115244936, + "grad_norm": 0.6500051021575928, + "learning_rate": 4.257409939295409e-06, + "loss": 2.4027, + "step": 18149 + }, + { + "epoch": 1.4647728189815188, + "grad_norm": 0.7253198623657227, + "learning_rate": 4.252853784352473e-06, + "loss": 2.4454, + "step": 18150 + }, + { + "epoch": 1.4648535227181019, + "grad_norm": 0.6945883631706238, + "learning_rate": 4.248300015678696e-06, + "loss": 2.4018, + "step": 18151 + }, + { + "epoch": 1.4649342264546847, + "grad_norm": 0.6615251302719116, + "learning_rate": 4.243748633387601e-06, + "loss": 2.367, + "step": 18152 + }, + { + "epoch": 1.4650149301912678, + "grad_norm": 0.7132222056388855, + "learning_rate": 4.239199637592595e-06, + "loss": 2.3724, + "step": 18153 + }, + { + "epoch": 1.465095633927851, + "grad_norm": 0.7064909338951111, + "learning_rate": 4.234653028407054e-06, + "loss": 2.3697, + "step": 18154 + }, + { + "epoch": 1.4651763376644338, + "grad_norm": 0.6656587719917297, + "learning_rate": 4.2301088059442884e-06, + "loss": 2.358, + "step": 18155 + }, + { + "epoch": 1.4652570414010169, + "grad_norm": 0.6481126546859741, + "learning_rate": 4.225566970317552e-06, + "loss": 2.4053, + "step": 18156 + }, + { + "epoch": 1.4653377451376, + "grad_norm": 0.7085857391357422, + "learning_rate": 4.221027521640064e-06, + "loss": 2.4376, + "step": 18157 + }, + { + "epoch": 1.4654184488741828, + "grad_norm": 0.6920461058616638, + "learning_rate": 4.216490460024914e-06, + "loss": 2.4671, + "step": 18158 + }, + { + "epoch": 1.465499152610766, + "grad_norm": 0.7046825885772705, + "learning_rate": 4.21195578558522e-06, + "loss": 2.4248, + "step": 18159 + }, + { + "epoch": 1.465579856347349, + "grad_norm": 0.7101480960845947, + "learning_rate": 4.2074234984339715e-06, + "loss": 2.4433, + "step": 18160 + }, + { + "epoch": 1.4656605600839319, + "grad_norm": 0.7143067121505737, + "learning_rate": 4.202893598684132e-06, + "loss": 2.4073, + "step": 18161 + }, + { + "epoch": 1.465741263820515, + "grad_norm": 0.7557536959648132, + "learning_rate": 4.198366086448602e-06, + "loss": 2.4053, + "step": 18162 + }, + { + "epoch": 1.4658219675570978, + "grad_norm": 0.6909283399581909, + "learning_rate": 4.193840961840223e-06, + "loss": 2.3831, + "step": 18163 + }, + { + "epoch": 1.465902671293681, + "grad_norm": 0.7262178659439087, + "learning_rate": 4.189318224971761e-06, + "loss": 2.4886, + "step": 18164 + }, + { + "epoch": 1.4659833750302638, + "grad_norm": 0.699925422668457, + "learning_rate": 4.184797875955937e-06, + "loss": 2.4073, + "step": 18165 + }, + { + "epoch": 1.4660640787668469, + "grad_norm": 0.6438626050949097, + "learning_rate": 4.180279914905439e-06, + "loss": 2.3531, + "step": 18166 + }, + { + "epoch": 1.46614478250343, + "grad_norm": 0.729622483253479, + "learning_rate": 4.175764341932809e-06, + "loss": 2.4312, + "step": 18167 + }, + { + "epoch": 1.4662254862400128, + "grad_norm": 0.6617357730865479, + "learning_rate": 4.1712511571506354e-06, + "loss": 2.3947, + "step": 18168 + }, + { + "epoch": 1.466306189976596, + "grad_norm": 0.7361389994621277, + "learning_rate": 4.166740360671384e-06, + "loss": 2.3975, + "step": 18169 + }, + { + "epoch": 1.466386893713179, + "grad_norm": 0.711264967918396, + "learning_rate": 4.1622319526074645e-06, + "loss": 2.4527, + "step": 18170 + }, + { + "epoch": 1.4664675974497619, + "grad_norm": 0.71773362159729, + "learning_rate": 4.157725933071233e-06, + "loss": 2.4529, + "step": 18171 + }, + { + "epoch": 1.466548301186345, + "grad_norm": 0.7069514393806458, + "learning_rate": 4.153222302175019e-06, + "loss": 2.4674, + "step": 18172 + }, + { + "epoch": 1.466629004922928, + "grad_norm": 0.795305073261261, + "learning_rate": 4.148721060031069e-06, + "loss": 2.4234, + "step": 18173 + }, + { + "epoch": 1.466709708659511, + "grad_norm": 0.6819591522216797, + "learning_rate": 4.144222206751524e-06, + "loss": 2.3764, + "step": 18174 + }, + { + "epoch": 1.466790412396094, + "grad_norm": 0.6816638112068176, + "learning_rate": 4.139725742448541e-06, + "loss": 2.447, + "step": 18175 + }, + { + "epoch": 1.466871116132677, + "grad_norm": 0.7039487361907959, + "learning_rate": 4.135231667234185e-06, + "loss": 2.3506, + "step": 18176 + }, + { + "epoch": 1.46695181986926, + "grad_norm": 0.6754382252693176, + "learning_rate": 4.130739981220433e-06, + "loss": 2.409, + "step": 18177 + }, + { + "epoch": 1.467032523605843, + "grad_norm": 0.7245250344276428, + "learning_rate": 4.12625068451924e-06, + "loss": 2.4222, + "step": 18178 + }, + { + "epoch": 1.4671132273424259, + "grad_norm": 0.7069350481033325, + "learning_rate": 4.121763777242515e-06, + "loss": 2.4346, + "step": 18179 + }, + { + "epoch": 1.467193931079009, + "grad_norm": 0.7400095462799072, + "learning_rate": 4.117279259502061e-06, + "loss": 2.4172, + "step": 18180 + }, + { + "epoch": 1.4672746348155918, + "grad_norm": 0.7178627252578735, + "learning_rate": 4.11279713140964e-06, + "loss": 2.3841, + "step": 18181 + }, + { + "epoch": 1.467355338552175, + "grad_norm": 0.6641840934753418, + "learning_rate": 4.108317393076966e-06, + "loss": 2.4728, + "step": 18182 + }, + { + "epoch": 1.467436042288758, + "grad_norm": 0.6809187531471252, + "learning_rate": 4.103840044615681e-06, + "loss": 2.4372, + "step": 18183 + }, + { + "epoch": 1.4675167460253409, + "grad_norm": 0.6674811244010925, + "learning_rate": 4.099365086137385e-06, + "loss": 2.3998, + "step": 18184 + }, + { + "epoch": 1.467597449761924, + "grad_norm": 0.7920583486557007, + "learning_rate": 4.094892517753601e-06, + "loss": 2.4203, + "step": 18185 + }, + { + "epoch": 1.467678153498507, + "grad_norm": 0.6881268620491028, + "learning_rate": 4.090422339575795e-06, + "loss": 2.3943, + "step": 18186 + }, + { + "epoch": 1.46775885723509, + "grad_norm": 0.6778728365898132, + "learning_rate": 4.085954551715365e-06, + "loss": 2.4208, + "step": 18187 + }, + { + "epoch": 1.467839560971673, + "grad_norm": 0.6784557104110718, + "learning_rate": 4.081489154283669e-06, + "loss": 2.4067, + "step": 18188 + }, + { + "epoch": 1.467920264708256, + "grad_norm": 0.6981526017189026, + "learning_rate": 4.0770261473920155e-06, + "loss": 2.42, + "step": 18189 + }, + { + "epoch": 1.468000968444839, + "grad_norm": 0.6901406645774841, + "learning_rate": 4.072565531151595e-06, + "loss": 2.4133, + "step": 18190 + }, + { + "epoch": 1.468081672181422, + "grad_norm": 0.6496356129646301, + "learning_rate": 4.068107305673608e-06, + "loss": 2.38, + "step": 18191 + }, + { + "epoch": 1.4681623759180051, + "grad_norm": 0.7348635792732239, + "learning_rate": 4.063651471069152e-06, + "loss": 2.4665, + "step": 18192 + }, + { + "epoch": 1.468243079654588, + "grad_norm": 0.8344720005989075, + "learning_rate": 4.059198027449274e-06, + "loss": 2.3849, + "step": 18193 + }, + { + "epoch": 1.468323783391171, + "grad_norm": 0.7210039496421814, + "learning_rate": 4.0547469749249835e-06, + "loss": 2.4604, + "step": 18194 + }, + { + "epoch": 1.468404487127754, + "grad_norm": 0.7330215573310852, + "learning_rate": 4.050298313607203e-06, + "loss": 2.4768, + "step": 18195 + }, + { + "epoch": 1.468485190864337, + "grad_norm": 0.759384274482727, + "learning_rate": 4.045852043606801e-06, + "loss": 2.3686, + "step": 18196 + }, + { + "epoch": 1.46856589460092, + "grad_norm": 0.7119100689888, + "learning_rate": 4.041408165034588e-06, + "loss": 2.401, + "step": 18197 + }, + { + "epoch": 1.468646598337503, + "grad_norm": 0.7030404210090637, + "learning_rate": 4.036966678001342e-06, + "loss": 2.4242, + "step": 18198 + }, + { + "epoch": 1.468727302074086, + "grad_norm": 0.6559282541275024, + "learning_rate": 4.032527582617718e-06, + "loss": 2.4567, + "step": 18199 + }, + { + "epoch": 1.468808005810669, + "grad_norm": 0.7369895577430725, + "learning_rate": 4.028090878994361e-06, + "loss": 2.3795, + "step": 18200 + }, + { + "epoch": 1.468888709547252, + "grad_norm": 0.6997527480125427, + "learning_rate": 4.0236565672418624e-06, + "loss": 2.4247, + "step": 18201 + }, + { + "epoch": 1.4689694132838351, + "grad_norm": 0.6773854494094849, + "learning_rate": 4.0192246474707205e-06, + "loss": 2.4022, + "step": 18202 + }, + { + "epoch": 1.469050117020418, + "grad_norm": 0.6483170390129089, + "learning_rate": 4.014795119791404e-06, + "loss": 2.4022, + "step": 18203 + }, + { + "epoch": 1.469130820757001, + "grad_norm": 0.6568546295166016, + "learning_rate": 4.0103679843142895e-06, + "loss": 2.3802, + "step": 18204 + }, + { + "epoch": 1.4692115244935842, + "grad_norm": 0.6876521706581116, + "learning_rate": 4.005943241149746e-06, + "loss": 2.4521, + "step": 18205 + }, + { + "epoch": 1.469292228230167, + "grad_norm": 0.7165477275848389, + "learning_rate": 4.001520890408017e-06, + "loss": 2.3683, + "step": 18206 + }, + { + "epoch": 1.4693729319667501, + "grad_norm": 0.7466868162155151, + "learning_rate": 3.997100932199327e-06, + "loss": 2.4073, + "step": 18207 + }, + { + "epoch": 1.469453635703333, + "grad_norm": 0.6731385588645935, + "learning_rate": 3.992683366633842e-06, + "loss": 2.4025, + "step": 18208 + }, + { + "epoch": 1.469534339439916, + "grad_norm": 0.7291627526283264, + "learning_rate": 3.988268193821654e-06, + "loss": 2.4205, + "step": 18209 + }, + { + "epoch": 1.469615043176499, + "grad_norm": 0.6596493721008301, + "learning_rate": 3.983855413872795e-06, + "loss": 2.3996, + "step": 18210 + }, + { + "epoch": 1.469695746913082, + "grad_norm": 0.7010817527770996, + "learning_rate": 3.979445026897244e-06, + "loss": 2.4094, + "step": 18211 + }, + { + "epoch": 1.4697764506496651, + "grad_norm": 0.715941309928894, + "learning_rate": 3.975037033004925e-06, + "loss": 2.4256, + "step": 18212 + }, + { + "epoch": 1.469857154386248, + "grad_norm": 0.728072464466095, + "learning_rate": 3.970631432305694e-06, + "loss": 2.4084, + "step": 18213 + }, + { + "epoch": 1.469937858122831, + "grad_norm": 0.7201817035675049, + "learning_rate": 3.966228224909363e-06, + "loss": 2.3945, + "step": 18214 + }, + { + "epoch": 1.4700185618594142, + "grad_norm": 0.70964115858078, + "learning_rate": 3.961827410925644e-06, + "loss": 2.3664, + "step": 18215 + }, + { + "epoch": 1.470099265595997, + "grad_norm": 0.692813515663147, + "learning_rate": 3.957428990464229e-06, + "loss": 2.3622, + "step": 18216 + }, + { + "epoch": 1.47017996933258, + "grad_norm": 0.6732754707336426, + "learning_rate": 3.953032963634762e-06, + "loss": 2.3618, + "step": 18217 + }, + { + "epoch": 1.4702606730691632, + "grad_norm": 0.726357638835907, + "learning_rate": 3.9486393305467775e-06, + "loss": 2.4024, + "step": 18218 + }, + { + "epoch": 1.470341376805746, + "grad_norm": 0.7013699412345886, + "learning_rate": 3.944248091309765e-06, + "loss": 2.4343, + "step": 18219 + }, + { + "epoch": 1.4704220805423291, + "grad_norm": 0.6978548169136047, + "learning_rate": 3.939859246033195e-06, + "loss": 2.4206, + "step": 18220 + }, + { + "epoch": 1.4705027842789122, + "grad_norm": 0.8108847141265869, + "learning_rate": 3.935472794826434e-06, + "loss": 2.3756, + "step": 18221 + }, + { + "epoch": 1.470583488015495, + "grad_norm": 0.6821001768112183, + "learning_rate": 3.931088737798805e-06, + "loss": 2.4323, + "step": 18222 + }, + { + "epoch": 1.4706641917520782, + "grad_norm": 0.688704252243042, + "learning_rate": 3.9267070750595654e-06, + "loss": 2.412, + "step": 18223 + }, + { + "epoch": 1.470744895488661, + "grad_norm": 0.7279560565948486, + "learning_rate": 3.92232780671794e-06, + "loss": 2.3503, + "step": 18224 + }, + { + "epoch": 1.4708255992252441, + "grad_norm": 0.6519368886947632, + "learning_rate": 3.917950932883052e-06, + "loss": 2.4106, + "step": 18225 + }, + { + "epoch": 1.470906302961827, + "grad_norm": 0.7112751603126526, + "learning_rate": 3.91357645366397e-06, + "loss": 2.4313, + "step": 18226 + }, + { + "epoch": 1.47098700669841, + "grad_norm": 0.7301532626152039, + "learning_rate": 3.909204369169761e-06, + "loss": 2.4218, + "step": 18227 + }, + { + "epoch": 1.4710677104349932, + "grad_norm": 0.7091543078422546, + "learning_rate": 3.90483467950935e-06, + "loss": 2.4016, + "step": 18228 + }, + { + "epoch": 1.471148414171576, + "grad_norm": 0.6589071750640869, + "learning_rate": 3.900467384791651e-06, + "loss": 2.4202, + "step": 18229 + }, + { + "epoch": 1.4712291179081591, + "grad_norm": 0.6887986063957214, + "learning_rate": 3.896102485125519e-06, + "loss": 2.3689, + "step": 18230 + }, + { + "epoch": 1.4713098216447422, + "grad_norm": 0.6951364278793335, + "learning_rate": 3.891739980619724e-06, + "loss": 2.4067, + "step": 18231 + }, + { + "epoch": 1.471390525381325, + "grad_norm": 0.6578256487846375, + "learning_rate": 3.887379871383001e-06, + "loss": 2.4535, + "step": 18232 + }, + { + "epoch": 1.4714712291179082, + "grad_norm": 0.6622738838195801, + "learning_rate": 3.883022157524008e-06, + "loss": 2.4566, + "step": 18233 + }, + { + "epoch": 1.4715519328544913, + "grad_norm": 0.699840784072876, + "learning_rate": 3.878666839151357e-06, + "loss": 2.4145, + "step": 18234 + }, + { + "epoch": 1.4716326365910741, + "grad_norm": 0.7405043840408325, + "learning_rate": 3.874313916373595e-06, + "loss": 2.3819, + "step": 18235 + }, + { + "epoch": 1.4717133403276572, + "grad_norm": 0.6740127801895142, + "learning_rate": 3.869963389299203e-06, + "loss": 2.4152, + "step": 18236 + }, + { + "epoch": 1.4717940440642403, + "grad_norm": 0.6735069155693054, + "learning_rate": 3.865615258036615e-06, + "loss": 2.4111, + "step": 18237 + }, + { + "epoch": 1.4718747478008232, + "grad_norm": 0.654137134552002, + "learning_rate": 3.861269522694188e-06, + "loss": 2.4427, + "step": 18238 + }, + { + "epoch": 1.4719554515374063, + "grad_norm": 0.6844269633293152, + "learning_rate": 3.856926183380227e-06, + "loss": 2.4553, + "step": 18239 + }, + { + "epoch": 1.4720361552739891, + "grad_norm": 0.6604157090187073, + "learning_rate": 3.85258524020301e-06, + "loss": 2.3749, + "step": 18240 + }, + { + "epoch": 1.4721168590105722, + "grad_norm": 0.6837483048439026, + "learning_rate": 3.848246693270674e-06, + "loss": 2.384, + "step": 18241 + }, + { + "epoch": 1.472197562747155, + "grad_norm": 0.6852267384529114, + "learning_rate": 3.8439105426913865e-06, + "loss": 2.4112, + "step": 18242 + }, + { + "epoch": 1.4722782664837382, + "grad_norm": 0.6974645256996155, + "learning_rate": 3.839576788573196e-06, + "loss": 2.3884, + "step": 18243 + }, + { + "epoch": 1.4723589702203213, + "grad_norm": 0.6737220287322998, + "learning_rate": 3.835245431024126e-06, + "loss": 2.424, + "step": 18244 + }, + { + "epoch": 1.4724396739569041, + "grad_norm": 0.695035457611084, + "learning_rate": 3.8309164701521016e-06, + "loss": 2.3358, + "step": 18245 + }, + { + "epoch": 1.4725203776934872, + "grad_norm": 0.6795023679733276, + "learning_rate": 3.826589906065048e-06, + "loss": 2.3569, + "step": 18246 + }, + { + "epoch": 1.4726010814300703, + "grad_norm": 0.6965143084526062, + "learning_rate": 3.8222657388707675e-06, + "loss": 2.4078, + "step": 18247 + }, + { + "epoch": 1.4726817851666532, + "grad_norm": 0.6551299095153809, + "learning_rate": 3.817943968677029e-06, + "loss": 2.3622, + "step": 18248 + }, + { + "epoch": 1.4727624889032362, + "grad_norm": 0.7963354587554932, + "learning_rate": 3.8136245955915582e-06, + "loss": 2.4108, + "step": 18249 + }, + { + "epoch": 1.4728431926398193, + "grad_norm": 0.6898682117462158, + "learning_rate": 3.8093076197219913e-06, + "loss": 2.405, + "step": 18250 + }, + { + "epoch": 1.4729238963764022, + "grad_norm": 0.7282465100288391, + "learning_rate": 3.8049930411759195e-06, + "loss": 2.3696, + "step": 18251 + }, + { + "epoch": 1.4730046001129853, + "grad_norm": 0.7880160212516785, + "learning_rate": 3.800680860060879e-06, + "loss": 2.4156, + "step": 18252 + }, + { + "epoch": 1.4730853038495684, + "grad_norm": 0.7149094343185425, + "learning_rate": 3.7963710764843397e-06, + "loss": 2.415, + "step": 18253 + }, + { + "epoch": 1.4731660075861512, + "grad_norm": 0.7015249133110046, + "learning_rate": 3.7920636905537155e-06, + "loss": 2.3672, + "step": 18254 + }, + { + "epoch": 1.4732467113227343, + "grad_norm": 0.6848294138908386, + "learning_rate": 3.787758702376343e-06, + "loss": 2.4225, + "step": 18255 + }, + { + "epoch": 1.4733274150593172, + "grad_norm": 0.6866233348846436, + "learning_rate": 3.7834561120595467e-06, + "loss": 2.4119, + "step": 18256 + }, + { + "epoch": 1.4734081187959003, + "grad_norm": 0.7697205543518066, + "learning_rate": 3.7791559197105197e-06, + "loss": 2.3943, + "step": 18257 + }, + { + "epoch": 1.4734888225324831, + "grad_norm": 0.6798329949378967, + "learning_rate": 3.7748581254364533e-06, + "loss": 2.4329, + "step": 18258 + }, + { + "epoch": 1.4735695262690662, + "grad_norm": 0.6995163559913635, + "learning_rate": 3.7705627293444732e-06, + "loss": 2.3561, + "step": 18259 + }, + { + "epoch": 1.4736502300056493, + "grad_norm": 0.6825453042984009, + "learning_rate": 3.766269731541594e-06, + "loss": 2.3887, + "step": 18260 + }, + { + "epoch": 1.4737309337422322, + "grad_norm": 0.7159842848777771, + "learning_rate": 3.7619791321348407e-06, + "loss": 2.4096, + "step": 18261 + }, + { + "epoch": 1.4738116374788153, + "grad_norm": 0.6697775721549988, + "learning_rate": 3.757690931231139e-06, + "loss": 2.4095, + "step": 18262 + }, + { + "epoch": 1.4738923412153984, + "grad_norm": 0.71161949634552, + "learning_rate": 3.7534051289373486e-06, + "loss": 2.4444, + "step": 18263 + }, + { + "epoch": 1.4739730449519812, + "grad_norm": 0.6437444090843201, + "learning_rate": 3.749121725360294e-06, + "loss": 2.386, + "step": 18264 + }, + { + "epoch": 1.4740537486885643, + "grad_norm": 0.7008254528045654, + "learning_rate": 3.744840720606746e-06, + "loss": 2.417, + "step": 18265 + }, + { + "epoch": 1.4741344524251474, + "grad_norm": 0.6326326727867126, + "learning_rate": 3.7405621147833634e-06, + "loss": 2.3346, + "step": 18266 + }, + { + "epoch": 1.4742151561617303, + "grad_norm": 0.6802831888198853, + "learning_rate": 3.736285907996806e-06, + "loss": 2.3794, + "step": 18267 + }, + { + "epoch": 1.4742958598983134, + "grad_norm": 0.6425875425338745, + "learning_rate": 3.7320121003536323e-06, + "loss": 2.3931, + "step": 18268 + }, + { + "epoch": 1.4743765636348962, + "grad_norm": 0.6619433760643005, + "learning_rate": 3.7277406919603797e-06, + "loss": 2.4162, + "step": 18269 + }, + { + "epoch": 1.4744572673714793, + "grad_norm": 0.6600280404090881, + "learning_rate": 3.723471682923474e-06, + "loss": 2.4334, + "step": 18270 + }, + { + "epoch": 1.4745379711080622, + "grad_norm": 0.7603200078010559, + "learning_rate": 3.719205073349319e-06, + "loss": 2.4413, + "step": 18271 + }, + { + "epoch": 1.4746186748446453, + "grad_norm": 0.6581423878669739, + "learning_rate": 3.714940863344263e-06, + "loss": 2.4117, + "step": 18272 + }, + { + "epoch": 1.4746993785812283, + "grad_norm": 0.6989814043045044, + "learning_rate": 3.710679053014565e-06, + "loss": 2.377, + "step": 18273 + }, + { + "epoch": 1.4747800823178112, + "grad_norm": 0.6707834005355835, + "learning_rate": 3.7064196424664522e-06, + "loss": 2.3407, + "step": 18274 + }, + { + "epoch": 1.4748607860543943, + "grad_norm": 0.7205011248588562, + "learning_rate": 3.702162631806083e-06, + "loss": 2.4182, + "step": 18275 + }, + { + "epoch": 1.4749414897909774, + "grad_norm": 0.7529718279838562, + "learning_rate": 3.69790802113954e-06, + "loss": 2.3434, + "step": 18276 + }, + { + "epoch": 1.4750221935275603, + "grad_norm": 0.6794082522392273, + "learning_rate": 3.69365581057286e-06, + "loss": 2.4157, + "step": 18277 + }, + { + "epoch": 1.4751028972641433, + "grad_norm": 0.7068135738372803, + "learning_rate": 3.689406000212037e-06, + "loss": 2.3516, + "step": 18278 + }, + { + "epoch": 1.4751836010007264, + "grad_norm": 0.7128797769546509, + "learning_rate": 3.6851585901629736e-06, + "loss": 2.3809, + "step": 18279 + }, + { + "epoch": 1.4752643047373093, + "grad_norm": 0.7014521956443787, + "learning_rate": 3.68091358053152e-06, + "loss": 2.4091, + "step": 18280 + }, + { + "epoch": 1.4753450084738924, + "grad_norm": 0.7495442628860474, + "learning_rate": 3.6766709714234793e-06, + "loss": 2.3977, + "step": 18281 + }, + { + "epoch": 1.4754257122104755, + "grad_norm": 0.6657838225364685, + "learning_rate": 3.6724307629446007e-06, + "loss": 2.3892, + "step": 18282 + }, + { + "epoch": 1.4755064159470583, + "grad_norm": 0.688546895980835, + "learning_rate": 3.668192955200522e-06, + "loss": 2.4159, + "step": 18283 + }, + { + "epoch": 1.4755871196836414, + "grad_norm": 0.6888083219528198, + "learning_rate": 3.6639575482969034e-06, + "loss": 2.3221, + "step": 18284 + }, + { + "epoch": 1.4756678234202243, + "grad_norm": 0.8717848658561707, + "learning_rate": 3.6597245423393046e-06, + "loss": 2.4453, + "step": 18285 + }, + { + "epoch": 1.4757485271568074, + "grad_norm": 0.6860103011131287, + "learning_rate": 3.6554939374331963e-06, + "loss": 2.4251, + "step": 18286 + }, + { + "epoch": 1.4758292308933902, + "grad_norm": 0.6638378500938416, + "learning_rate": 3.6512657336840174e-06, + "loss": 2.4506, + "step": 18287 + }, + { + "epoch": 1.4759099346299733, + "grad_norm": 0.6854584813117981, + "learning_rate": 3.6470399311971716e-06, + "loss": 2.3474, + "step": 18288 + }, + { + "epoch": 1.4759906383665564, + "grad_norm": 0.6957666873931885, + "learning_rate": 3.6428165300779526e-06, + "loss": 2.3452, + "step": 18289 + }, + { + "epoch": 1.4760713421031393, + "grad_norm": 0.646803081035614, + "learning_rate": 3.638595530431621e-06, + "loss": 2.3617, + "step": 18290 + }, + { + "epoch": 1.4761520458397224, + "grad_norm": 0.6761566996574402, + "learning_rate": 3.6343769323633924e-06, + "loss": 2.4115, + "step": 18291 + }, + { + "epoch": 1.4762327495763055, + "grad_norm": 0.7071232795715332, + "learning_rate": 3.6301607359783827e-06, + "loss": 2.4088, + "step": 18292 + }, + { + "epoch": 1.4763134533128883, + "grad_norm": 0.6781535148620605, + "learning_rate": 3.625946941381675e-06, + "loss": 2.3733, + "step": 18293 + }, + { + "epoch": 1.4763941570494714, + "grad_norm": 0.6833710670471191, + "learning_rate": 3.6217355486782957e-06, + "loss": 2.4711, + "step": 18294 + }, + { + "epoch": 1.4764748607860545, + "grad_norm": 0.7589881420135498, + "learning_rate": 3.6175265579732055e-06, + "loss": 2.3845, + "step": 18295 + }, + { + "epoch": 1.4765555645226374, + "grad_norm": 0.6896101236343384, + "learning_rate": 3.6133199693712983e-06, + "loss": 2.3758, + "step": 18296 + }, + { + "epoch": 1.4766362682592205, + "grad_norm": 0.6634401082992554, + "learning_rate": 3.6091157829774127e-06, + "loss": 2.369, + "step": 18297 + }, + { + "epoch": 1.4767169719958035, + "grad_norm": 0.6652467846870422, + "learning_rate": 3.604913998896342e-06, + "loss": 2.4098, + "step": 18298 + }, + { + "epoch": 1.4767976757323864, + "grad_norm": 0.7705509662628174, + "learning_rate": 3.600714617232781e-06, + "loss": 2.395, + "step": 18299 + }, + { + "epoch": 1.4768783794689695, + "grad_norm": 0.6642572283744812, + "learning_rate": 3.5965176380914122e-06, + "loss": 2.4144, + "step": 18300 + }, + { + "epoch": 1.4769590832055524, + "grad_norm": 0.7557141184806824, + "learning_rate": 3.59232306157683e-06, + "loss": 2.3329, + "step": 18301 + }, + { + "epoch": 1.4770397869421354, + "grad_norm": 0.715446949005127, + "learning_rate": 3.5881308877935504e-06, + "loss": 2.4349, + "step": 18302 + }, + { + "epoch": 1.4771204906787183, + "grad_norm": 0.7579060196876526, + "learning_rate": 3.583941116846079e-06, + "loss": 2.4296, + "step": 18303 + }, + { + "epoch": 1.4772011944153014, + "grad_norm": 0.6764013767242432, + "learning_rate": 3.5797537488388323e-06, + "loss": 2.4128, + "step": 18304 + }, + { + "epoch": 1.4772818981518845, + "grad_norm": 0.7495453953742981, + "learning_rate": 3.57556878387616e-06, + "loss": 2.5065, + "step": 18305 + }, + { + "epoch": 1.4773626018884674, + "grad_norm": 0.7046003341674805, + "learning_rate": 3.5713862220623785e-06, + "loss": 2.4498, + "step": 18306 + }, + { + "epoch": 1.4774433056250504, + "grad_norm": 0.6819034814834595, + "learning_rate": 3.567206063501727e-06, + "loss": 2.4052, + "step": 18307 + }, + { + "epoch": 1.4775240093616335, + "grad_norm": 0.6607410907745361, + "learning_rate": 3.5630283082983663e-06, + "loss": 2.396, + "step": 18308 + }, + { + "epoch": 1.4776047130982164, + "grad_norm": 0.7284536957740784, + "learning_rate": 3.5588529565564244e-06, + "loss": 2.4311, + "step": 18309 + }, + { + "epoch": 1.4776854168347995, + "grad_norm": 0.7704942226409912, + "learning_rate": 3.554680008379985e-06, + "loss": 2.4481, + "step": 18310 + }, + { + "epoch": 1.4777661205713826, + "grad_norm": 0.7008868455886841, + "learning_rate": 3.5505094638730083e-06, + "loss": 2.4954, + "step": 18311 + }, + { + "epoch": 1.4778468243079654, + "grad_norm": 0.6746332049369812, + "learning_rate": 3.546341323139468e-06, + "loss": 2.3946, + "step": 18312 + }, + { + "epoch": 1.4779275280445485, + "grad_norm": 0.6415507197380066, + "learning_rate": 3.5421755862832253e-06, + "loss": 2.3786, + "step": 18313 + }, + { + "epoch": 1.4780082317811314, + "grad_norm": 0.7158175110816956, + "learning_rate": 3.5380122534081184e-06, + "loss": 2.4348, + "step": 18314 + }, + { + "epoch": 1.4780889355177145, + "grad_norm": 0.7158238887786865, + "learning_rate": 3.5338513246178985e-06, + "loss": 2.385, + "step": 18315 + }, + { + "epoch": 1.4781696392542973, + "grad_norm": 0.6766643524169922, + "learning_rate": 3.529692800016271e-06, + "loss": 2.4401, + "step": 18316 + }, + { + "epoch": 1.4782503429908804, + "grad_norm": 0.7073598504066467, + "learning_rate": 3.525536679706887e-06, + "loss": 2.4669, + "step": 18317 + }, + { + "epoch": 1.4783310467274635, + "grad_norm": 0.7213411927223206, + "learning_rate": 3.521382963793296e-06, + "loss": 2.4186, + "step": 18318 + }, + { + "epoch": 1.4784117504640464, + "grad_norm": 0.7676820755004883, + "learning_rate": 3.5172316523790384e-06, + "loss": 2.4653, + "step": 18319 + }, + { + "epoch": 1.4784924542006295, + "grad_norm": 0.8283714056015015, + "learning_rate": 3.5130827455675975e-06, + "loss": 2.3896, + "step": 18320 + }, + { + "epoch": 1.4785731579372126, + "grad_norm": 0.685022234916687, + "learning_rate": 3.508936243462335e-06, + "loss": 2.3726, + "step": 18321 + }, + { + "epoch": 1.4786538616737954, + "grad_norm": 0.6866634488105774, + "learning_rate": 3.5047921461666135e-06, + "loss": 2.4511, + "step": 18322 + }, + { + "epoch": 1.4787345654103785, + "grad_norm": 0.6487671732902527, + "learning_rate": 3.500650453783716e-06, + "loss": 2.4113, + "step": 18323 + }, + { + "epoch": 1.4788152691469616, + "grad_norm": 0.6886214017868042, + "learning_rate": 3.4965111664168604e-06, + "loss": 2.4272, + "step": 18324 + }, + { + "epoch": 1.4788959728835445, + "grad_norm": 0.6808422207832336, + "learning_rate": 3.4923742841692085e-06, + "loss": 2.3936, + "step": 18325 + }, + { + "epoch": 1.4789766766201276, + "grad_norm": 0.713890552520752, + "learning_rate": 3.4882398071438783e-06, + "loss": 2.3742, + "step": 18326 + }, + { + "epoch": 1.4790573803567106, + "grad_norm": 0.6884218454360962, + "learning_rate": 3.4841077354438758e-06, + "loss": 2.3663, + "step": 18327 + }, + { + "epoch": 1.4791380840932935, + "grad_norm": 0.6903060674667358, + "learning_rate": 3.4799780691722074e-06, + "loss": 2.4586, + "step": 18328 + }, + { + "epoch": 1.4792187878298766, + "grad_norm": 0.7081164717674255, + "learning_rate": 3.475850808431791e-06, + "loss": 2.3848, + "step": 18329 + }, + { + "epoch": 1.4792994915664595, + "grad_norm": 0.7136076092720032, + "learning_rate": 3.4717259533254997e-06, + "loss": 2.4092, + "step": 18330 + }, + { + "epoch": 1.4793801953030425, + "grad_norm": 0.6860584616661072, + "learning_rate": 3.4676035039561182e-06, + "loss": 2.4348, + "step": 18331 + }, + { + "epoch": 1.4794608990396254, + "grad_norm": 0.6885141730308533, + "learning_rate": 3.4634834604263978e-06, + "loss": 2.4029, + "step": 18332 + }, + { + "epoch": 1.4795416027762085, + "grad_norm": 0.6577363610267639, + "learning_rate": 3.4593658228390223e-06, + "loss": 2.3659, + "step": 18333 + }, + { + "epoch": 1.4796223065127916, + "grad_norm": 0.6664844155311584, + "learning_rate": 3.4552505912965884e-06, + "loss": 2.3532, + "step": 18334 + }, + { + "epoch": 1.4797030102493745, + "grad_norm": 0.7257712483406067, + "learning_rate": 3.451137765901702e-06, + "loss": 2.4117, + "step": 18335 + }, + { + "epoch": 1.4797837139859575, + "grad_norm": 0.7410221099853516, + "learning_rate": 3.447027346756837e-06, + "loss": 2.4092, + "step": 18336 + }, + { + "epoch": 1.4798644177225406, + "grad_norm": 0.7233858108520508, + "learning_rate": 3.442919333964445e-06, + "loss": 2.3718, + "step": 18337 + }, + { + "epoch": 1.4799451214591235, + "grad_norm": 0.704576849937439, + "learning_rate": 3.4388137276268996e-06, + "loss": 2.4513, + "step": 18338 + }, + { + "epoch": 1.4800258251957066, + "grad_norm": 0.662105143070221, + "learning_rate": 3.434710527846552e-06, + "loss": 2.3803, + "step": 18339 + }, + { + "epoch": 1.4801065289322897, + "grad_norm": 0.6548754572868347, + "learning_rate": 3.4306097347256207e-06, + "loss": 2.3922, + "step": 18340 + }, + { + "epoch": 1.4801872326688725, + "grad_norm": 0.6719009280204773, + "learning_rate": 3.4265113483663238e-06, + "loss": 2.3943, + "step": 18341 + }, + { + "epoch": 1.4802679364054556, + "grad_norm": 0.7208795547485352, + "learning_rate": 3.422415368870835e-06, + "loss": 2.362, + "step": 18342 + }, + { + "epoch": 1.4803486401420387, + "grad_norm": 0.7121373414993286, + "learning_rate": 3.4183217963411953e-06, + "loss": 2.4508, + "step": 18343 + }, + { + "epoch": 1.4804293438786216, + "grad_norm": 0.651792585849762, + "learning_rate": 3.4142306308794334e-06, + "loss": 2.3923, + "step": 18344 + }, + { + "epoch": 1.4805100476152047, + "grad_norm": 0.6823711395263672, + "learning_rate": 3.4101418725875245e-06, + "loss": 2.4156, + "step": 18345 + }, + { + "epoch": 1.4805907513517875, + "grad_norm": 0.6949301362037659, + "learning_rate": 3.406055521567386e-06, + "loss": 2.3666, + "step": 18346 + }, + { + "epoch": 1.4806714550883706, + "grad_norm": 0.723517894744873, + "learning_rate": 3.401971577920826e-06, + "loss": 2.4534, + "step": 18347 + }, + { + "epoch": 1.4807521588249535, + "grad_norm": 0.6967771053314209, + "learning_rate": 3.3978900417496516e-06, + "loss": 2.4345, + "step": 18348 + }, + { + "epoch": 1.4808328625615366, + "grad_norm": 0.6820134520530701, + "learning_rate": 3.393810913155593e-06, + "loss": 2.3905, + "step": 18349 + }, + { + "epoch": 1.4809135662981197, + "grad_norm": 0.7566741704940796, + "learning_rate": 3.3897341922402794e-06, + "loss": 2.457, + "step": 18350 + }, + { + "epoch": 1.4809942700347025, + "grad_norm": 0.732586145401001, + "learning_rate": 3.3856598791053297e-06, + "loss": 2.395, + "step": 18351 + }, + { + "epoch": 1.4810749737712856, + "grad_norm": 0.7377440333366394, + "learning_rate": 3.3815879738523073e-06, + "loss": 2.412, + "step": 18352 + }, + { + "epoch": 1.4811556775078687, + "grad_norm": 0.6709005832672119, + "learning_rate": 3.3775184765826527e-06, + "loss": 2.4048, + "step": 18353 + }, + { + "epoch": 1.4812363812444516, + "grad_norm": 0.6626690030097961, + "learning_rate": 3.373451387397819e-06, + "loss": 2.3663, + "step": 18354 + }, + { + "epoch": 1.4813170849810346, + "grad_norm": 0.671341598033905, + "learning_rate": 3.369386706399158e-06, + "loss": 2.4147, + "step": 18355 + }, + { + "epoch": 1.4813977887176177, + "grad_norm": 0.7172929048538208, + "learning_rate": 3.3653244336879773e-06, + "loss": 2.4042, + "step": 18356 + }, + { + "epoch": 1.4814784924542006, + "grad_norm": 0.6489603519439697, + "learning_rate": 3.361264569365519e-06, + "loss": 2.3811, + "step": 18357 + }, + { + "epoch": 1.4815591961907837, + "grad_norm": 0.7350562214851379, + "learning_rate": 3.3572071135329786e-06, + "loss": 2.4428, + "step": 18358 + }, + { + "epoch": 1.4816398999273666, + "grad_norm": 0.6472034454345703, + "learning_rate": 3.3531520662914428e-06, + "loss": 2.3613, + "step": 18359 + }, + { + "epoch": 1.4817206036639496, + "grad_norm": 0.6974912285804749, + "learning_rate": 3.3490994277419975e-06, + "loss": 2.3932, + "step": 18360 + }, + { + "epoch": 1.4818013074005325, + "grad_norm": 0.7560031414031982, + "learning_rate": 3.34504919798565e-06, + "loss": 2.3633, + "step": 18361 + }, + { + "epoch": 1.4818820111371156, + "grad_norm": 0.6837224364280701, + "learning_rate": 3.341001377123343e-06, + "loss": 2.4298, + "step": 18362 + }, + { + "epoch": 1.4819627148736987, + "grad_norm": 0.6952646970748901, + "learning_rate": 3.336955965255939e-06, + "loss": 2.4155, + "step": 18363 + }, + { + "epoch": 1.4820434186102815, + "grad_norm": 0.6897403597831726, + "learning_rate": 3.332912962484269e-06, + "loss": 2.3911, + "step": 18364 + }, + { + "epoch": 1.4821241223468646, + "grad_norm": 0.7033999562263489, + "learning_rate": 3.3288723689090973e-06, + "loss": 2.503, + "step": 18365 + }, + { + "epoch": 1.4822048260834477, + "grad_norm": 0.6422268152236938, + "learning_rate": 3.3248341846311317e-06, + "loss": 2.4095, + "step": 18366 + }, + { + "epoch": 1.4822855298200306, + "grad_norm": 0.7891619205474854, + "learning_rate": 3.3207984097510024e-06, + "loss": 2.4561, + "step": 18367 + }, + { + "epoch": 1.4823662335566137, + "grad_norm": 0.8084300756454468, + "learning_rate": 3.3167650443693186e-06, + "loss": 2.3867, + "step": 18368 + }, + { + "epoch": 1.4824469372931968, + "grad_norm": 0.6958054900169373, + "learning_rate": 3.3127340885865666e-06, + "loss": 2.4479, + "step": 18369 + }, + { + "epoch": 1.4825276410297796, + "grad_norm": 0.6672516465187073, + "learning_rate": 3.308705542503232e-06, + "loss": 2.3908, + "step": 18370 + }, + { + "epoch": 1.4826083447663627, + "grad_norm": 0.6914852261543274, + "learning_rate": 3.3046794062197127e-06, + "loss": 2.3808, + "step": 18371 + }, + { + "epoch": 1.4826890485029458, + "grad_norm": 0.661186695098877, + "learning_rate": 3.3006556798363284e-06, + "loss": 2.4348, + "step": 18372 + }, + { + "epoch": 1.4827697522395287, + "grad_norm": 0.6800875067710876, + "learning_rate": 3.296634363453388e-06, + "loss": 2.4277, + "step": 18373 + }, + { + "epoch": 1.4828504559761118, + "grad_norm": 0.7118602395057678, + "learning_rate": 3.292615457171111e-06, + "loss": 2.3708, + "step": 18374 + }, + { + "epoch": 1.4829311597126946, + "grad_norm": 0.8216844797134399, + "learning_rate": 3.2885989610896395e-06, + "loss": 2.4083, + "step": 18375 + }, + { + "epoch": 1.4830118634492777, + "grad_norm": 0.6696308851242065, + "learning_rate": 3.2845848753090935e-06, + "loss": 2.4329, + "step": 18376 + }, + { + "epoch": 1.4830925671858606, + "grad_norm": 0.7084461450576782, + "learning_rate": 3.280573199929515e-06, + "loss": 2.4084, + "step": 18377 + }, + { + "epoch": 1.4831732709224437, + "grad_norm": 0.6815770268440247, + "learning_rate": 3.2765639350508802e-06, + "loss": 2.4622, + "step": 18378 + }, + { + "epoch": 1.4832539746590268, + "grad_norm": 0.7094982862472534, + "learning_rate": 3.2725570807730975e-06, + "loss": 2.4213, + "step": 18379 + }, + { + "epoch": 1.4833346783956096, + "grad_norm": 0.6778813004493713, + "learning_rate": 3.2685526371960538e-06, + "loss": 2.4003, + "step": 18380 + }, + { + "epoch": 1.4834153821321927, + "grad_norm": 0.6944702863693237, + "learning_rate": 3.2645506044195363e-06, + "loss": 2.3931, + "step": 18381 + }, + { + "epoch": 1.4834960858687758, + "grad_norm": 0.7213063836097717, + "learning_rate": 3.2605509825432755e-06, + "loss": 2.465, + "step": 18382 + }, + { + "epoch": 1.4835767896053587, + "grad_norm": 0.6559615731239319, + "learning_rate": 3.2565537716669703e-06, + "loss": 2.4149, + "step": 18383 + }, + { + "epoch": 1.4836574933419417, + "grad_norm": 0.7576823830604553, + "learning_rate": 3.2525589718902515e-06, + "loss": 2.4355, + "step": 18384 + }, + { + "epoch": 1.4837381970785248, + "grad_norm": 0.6799216866493225, + "learning_rate": 3.248566583312629e-06, + "loss": 2.3853, + "step": 18385 + }, + { + "epoch": 1.4838189008151077, + "grad_norm": 0.6761351823806763, + "learning_rate": 3.244576606033656e-06, + "loss": 2.4577, + "step": 18386 + }, + { + "epoch": 1.4838996045516908, + "grad_norm": 0.6876667141914368, + "learning_rate": 3.240589040152764e-06, + "loss": 2.3898, + "step": 18387 + }, + { + "epoch": 1.4839803082882739, + "grad_norm": 0.6555415987968445, + "learning_rate": 3.236603885769307e-06, + "loss": 2.3773, + "step": 18388 + }, + { + "epoch": 1.4840610120248567, + "grad_norm": 0.782966673374176, + "learning_rate": 3.232621142982628e-06, + "loss": 2.3987, + "step": 18389 + }, + { + "epoch": 1.4841417157614398, + "grad_norm": 0.6703657507896423, + "learning_rate": 3.228640811891992e-06, + "loss": 2.3617, + "step": 18390 + }, + { + "epoch": 1.4842224194980227, + "grad_norm": 0.7010387778282166, + "learning_rate": 3.224662892596586e-06, + "loss": 2.396, + "step": 18391 + }, + { + "epoch": 1.4843031232346058, + "grad_norm": 0.7821521162986755, + "learning_rate": 3.2206873851955535e-06, + "loss": 2.4362, + "step": 18392 + }, + { + "epoch": 1.4843838269711886, + "grad_norm": 0.7236925959587097, + "learning_rate": 3.21671428978797e-06, + "loss": 2.4813, + "step": 18393 + }, + { + "epoch": 1.4844645307077717, + "grad_norm": 0.6522866487503052, + "learning_rate": 3.2127436064728788e-06, + "loss": 2.3885, + "step": 18394 + }, + { + "epoch": 1.4845452344443548, + "grad_norm": 0.7148615121841431, + "learning_rate": 3.2087753353492013e-06, + "loss": 2.4546, + "step": 18395 + }, + { + "epoch": 1.4846259381809377, + "grad_norm": 0.6313709020614624, + "learning_rate": 3.2048094765158463e-06, + "loss": 2.3596, + "step": 18396 + }, + { + "epoch": 1.4847066419175208, + "grad_norm": 0.7160886526107788, + "learning_rate": 3.2008460300716914e-06, + "loss": 2.3852, + "step": 18397 + }, + { + "epoch": 1.4847873456541039, + "grad_norm": 0.6922785043716431, + "learning_rate": 3.196884996115479e-06, + "loss": 2.4601, + "step": 18398 + }, + { + "epoch": 1.4848680493906867, + "grad_norm": 0.7803853154182434, + "learning_rate": 3.1929263747459414e-06, + "loss": 2.3588, + "step": 18399 + }, + { + "epoch": 1.4849487531272698, + "grad_norm": 0.7317460179328918, + "learning_rate": 3.1889701660617333e-06, + "loss": 2.4515, + "step": 18400 + }, + { + "epoch": 1.485029456863853, + "grad_norm": 0.6729404330253601, + "learning_rate": 3.1850163701614533e-06, + "loss": 2.4158, + "step": 18401 + }, + { + "epoch": 1.4851101606004358, + "grad_norm": 0.7266910672187805, + "learning_rate": 3.181064987143645e-06, + "loss": 2.4365, + "step": 18402 + }, + { + "epoch": 1.4851908643370189, + "grad_norm": 0.6553283333778381, + "learning_rate": 3.177116017106785e-06, + "loss": 2.4125, + "step": 18403 + }, + { + "epoch": 1.485271568073602, + "grad_norm": 0.7099964618682861, + "learning_rate": 3.1731694601492833e-06, + "loss": 2.3994, + "step": 18404 + }, + { + "epoch": 1.4853522718101848, + "grad_norm": 0.7573987245559692, + "learning_rate": 3.1692253163695173e-06, + "loss": 2.4295, + "step": 18405 + }, + { + "epoch": 1.485432975546768, + "grad_norm": 0.6570815443992615, + "learning_rate": 3.165283585865764e-06, + "loss": 2.4129, + "step": 18406 + }, + { + "epoch": 1.4855136792833508, + "grad_norm": 0.6884456276893616, + "learning_rate": 3.1613442687362772e-06, + "loss": 2.4729, + "step": 18407 + }, + { + "epoch": 1.4855943830199339, + "grad_norm": 0.6423753499984741, + "learning_rate": 3.1574073650792234e-06, + "loss": 2.433, + "step": 18408 + }, + { + "epoch": 1.4856750867565167, + "grad_norm": 0.7291930913925171, + "learning_rate": 3.1534728749927358e-06, + "loss": 2.4329, + "step": 18409 + }, + { + "epoch": 1.4857557904930998, + "grad_norm": 0.6597060561180115, + "learning_rate": 3.149540798574868e-06, + "loss": 2.4525, + "step": 18410 + }, + { + "epoch": 1.485836494229683, + "grad_norm": 0.6662060618400574, + "learning_rate": 3.1456111359235986e-06, + "loss": 2.3624, + "step": 18411 + }, + { + "epoch": 1.4859171979662658, + "grad_norm": 0.710584282875061, + "learning_rate": 3.1416838871368924e-06, + "loss": 2.4229, + "step": 18412 + }, + { + "epoch": 1.4859979017028488, + "grad_norm": 0.7081347107887268, + "learning_rate": 3.1377590523126165e-06, + "loss": 2.3889, + "step": 18413 + }, + { + "epoch": 1.486078605439432, + "grad_norm": 0.6779326796531677, + "learning_rate": 3.1338366315485703e-06, + "loss": 2.4074, + "step": 18414 + }, + { + "epoch": 1.4861593091760148, + "grad_norm": 0.6911298036575317, + "learning_rate": 3.1299166249425305e-06, + "loss": 2.4258, + "step": 18415 + }, + { + "epoch": 1.4862400129125979, + "grad_norm": 0.670421302318573, + "learning_rate": 3.1259990325921973e-06, + "loss": 2.4211, + "step": 18416 + }, + { + "epoch": 1.486320716649181, + "grad_norm": 0.6860554814338684, + "learning_rate": 3.1220838545951925e-06, + "loss": 2.3699, + "step": 18417 + }, + { + "epoch": 1.4864014203857638, + "grad_norm": 0.7171792984008789, + "learning_rate": 3.1181710910490935e-06, + "loss": 2.395, + "step": 18418 + }, + { + "epoch": 1.486482124122347, + "grad_norm": 0.6713120341300964, + "learning_rate": 3.1142607420514446e-06, + "loss": 2.4065, + "step": 18419 + }, + { + "epoch": 1.4865628278589298, + "grad_norm": 0.6774618625640869, + "learning_rate": 3.1103528076996568e-06, + "loss": 2.3923, + "step": 18420 + }, + { + "epoch": 1.4866435315955129, + "grad_norm": 0.6554906368255615, + "learning_rate": 3.1064472880911632e-06, + "loss": 2.4161, + "step": 18421 + }, + { + "epoch": 1.4867242353320957, + "grad_norm": 0.6858103275299072, + "learning_rate": 3.102544183323275e-06, + "loss": 2.4297, + "step": 18422 + }, + { + "epoch": 1.4868049390686788, + "grad_norm": 0.727878212928772, + "learning_rate": 3.0986434934932916e-06, + "loss": 2.3525, + "step": 18423 + }, + { + "epoch": 1.486885642805262, + "grad_norm": 0.6654942035675049, + "learning_rate": 3.094745218698403e-06, + "loss": 2.4297, + "step": 18424 + }, + { + "epoch": 1.4869663465418448, + "grad_norm": 0.658942461013794, + "learning_rate": 3.0908493590357856e-06, + "loss": 2.3723, + "step": 18425 + }, + { + "epoch": 1.4870470502784279, + "grad_norm": 0.6851345896720886, + "learning_rate": 3.0869559146025185e-06, + "loss": 2.4382, + "step": 18426 + }, + { + "epoch": 1.487127754015011, + "grad_norm": 0.6994932889938354, + "learning_rate": 3.0830648854956347e-06, + "loss": 2.3655, + "step": 18427 + }, + { + "epoch": 1.4872084577515938, + "grad_norm": 0.6469771862030029, + "learning_rate": 3.079176271812134e-06, + "loss": 2.4389, + "step": 18428 + }, + { + "epoch": 1.487289161488177, + "grad_norm": 0.7069564461708069, + "learning_rate": 3.0752900736489178e-06, + "loss": 2.3458, + "step": 18429 + }, + { + "epoch": 1.48736986522476, + "grad_norm": 0.7221277952194214, + "learning_rate": 3.0714062911028184e-06, + "loss": 2.4314, + "step": 18430 + }, + { + "epoch": 1.4874505689613429, + "grad_norm": 0.6999499201774597, + "learning_rate": 3.0675249242706593e-06, + "loss": 2.4113, + "step": 18431 + }, + { + "epoch": 1.487531272697926, + "grad_norm": 0.7012192606925964, + "learning_rate": 3.0636459732491628e-06, + "loss": 2.4281, + "step": 18432 + }, + { + "epoch": 1.487611976434509, + "grad_norm": 0.6578752994537354, + "learning_rate": 3.059769438135007e-06, + "loss": 2.391, + "step": 18433 + }, + { + "epoch": 1.487692680171092, + "grad_norm": 0.6541566848754883, + "learning_rate": 3.055895319024782e-06, + "loss": 2.4021, + "step": 18434 + }, + { + "epoch": 1.487773383907675, + "grad_norm": 0.6928902864456177, + "learning_rate": 3.052023616015076e-06, + "loss": 2.3543, + "step": 18435 + }, + { + "epoch": 1.4878540876442579, + "grad_norm": 0.6487705111503601, + "learning_rate": 3.048154329202357e-06, + "loss": 2.4147, + "step": 18436 + }, + { + "epoch": 1.487934791380841, + "grad_norm": 0.6711629629135132, + "learning_rate": 3.0442874586830705e-06, + "loss": 2.4273, + "step": 18437 + }, + { + "epoch": 1.4880154951174238, + "grad_norm": 0.6932334899902344, + "learning_rate": 3.0404230045535942e-06, + "loss": 2.3515, + "step": 18438 + }, + { + "epoch": 1.488096198854007, + "grad_norm": 0.7008633017539978, + "learning_rate": 3.036560966910229e-06, + "loss": 2.4667, + "step": 18439 + }, + { + "epoch": 1.48817690259059, + "grad_norm": 0.6920375823974609, + "learning_rate": 3.0327013458492203e-06, + "loss": 2.3934, + "step": 18440 + }, + { + "epoch": 1.4882576063271729, + "grad_norm": 0.7152913808822632, + "learning_rate": 3.028844141466769e-06, + "loss": 2.4049, + "step": 18441 + }, + { + "epoch": 1.488338310063756, + "grad_norm": 0.7209664583206177, + "learning_rate": 3.0249893538590202e-06, + "loss": 2.3956, + "step": 18442 + }, + { + "epoch": 1.488419013800339, + "grad_norm": 0.7767702341079712, + "learning_rate": 3.0211369831220305e-06, + "loss": 2.449, + "step": 18443 + }, + { + "epoch": 1.488499717536922, + "grad_norm": 0.7306828498840332, + "learning_rate": 3.017287029351801e-06, + "loss": 2.4244, + "step": 18444 + }, + { + "epoch": 1.488580421273505, + "grad_norm": 0.7171465158462524, + "learning_rate": 3.01343949264431e-06, + "loss": 2.4145, + "step": 18445 + }, + { + "epoch": 1.488661125010088, + "grad_norm": 0.6547496914863586, + "learning_rate": 3.0095943730954146e-06, + "loss": 2.3829, + "step": 18446 + }, + { + "epoch": 1.488741828746671, + "grad_norm": 0.68947833776474, + "learning_rate": 3.00575167080096e-06, + "loss": 2.3469, + "step": 18447 + }, + { + "epoch": 1.488822532483254, + "grad_norm": 0.6359937191009521, + "learning_rate": 3.001911385856737e-06, + "loss": 2.419, + "step": 18448 + }, + { + "epoch": 1.4889032362198371, + "grad_norm": 0.7035027146339417, + "learning_rate": 2.998073518358424e-06, + "loss": 2.4082, + "step": 18449 + }, + { + "epoch": 1.48898393995642, + "grad_norm": 0.7352398037910461, + "learning_rate": 2.994238068401689e-06, + "loss": 2.3677, + "step": 18450 + }, + { + "epoch": 1.489064643693003, + "grad_norm": 0.6598670482635498, + "learning_rate": 2.9904050360821222e-06, + "loss": 2.3775, + "step": 18451 + }, + { + "epoch": 1.489145347429586, + "grad_norm": 0.698826014995575, + "learning_rate": 2.9865744214952472e-06, + "loss": 2.4086, + "step": 18452 + }, + { + "epoch": 1.489226051166169, + "grad_norm": 0.6918448209762573, + "learning_rate": 2.982746224736521e-06, + "loss": 2.4418, + "step": 18453 + }, + { + "epoch": 1.4893067549027519, + "grad_norm": 0.7679443359375, + "learning_rate": 2.9789204459013785e-06, + "loss": 2.4279, + "step": 18454 + }, + { + "epoch": 1.489387458639335, + "grad_norm": 0.6985172033309937, + "learning_rate": 2.9750970850851544e-06, + "loss": 2.3943, + "step": 18455 + }, + { + "epoch": 1.489468162375918, + "grad_norm": 0.705737829208374, + "learning_rate": 2.971276142383128e-06, + "loss": 2.3632, + "step": 18456 + }, + { + "epoch": 1.489548866112501, + "grad_norm": 0.68868488073349, + "learning_rate": 2.9674576178905343e-06, + "loss": 2.4607, + "step": 18457 + }, + { + "epoch": 1.489629569849084, + "grad_norm": 0.6910532712936401, + "learning_rate": 2.9636415117025416e-06, + "loss": 2.3732, + "step": 18458 + }, + { + "epoch": 1.489710273585667, + "grad_norm": 0.6957756280899048, + "learning_rate": 2.959827823914263e-06, + "loss": 2.3696, + "step": 18459 + }, + { + "epoch": 1.48979097732225, + "grad_norm": 0.698004961013794, + "learning_rate": 2.956016554620744e-06, + "loss": 2.3999, + "step": 18460 + }, + { + "epoch": 1.489871681058833, + "grad_norm": 0.6441684365272522, + "learning_rate": 2.952207703916965e-06, + "loss": 2.3946, + "step": 18461 + }, + { + "epoch": 1.4899523847954161, + "grad_norm": 0.68703693151474, + "learning_rate": 2.9484012718978605e-06, + "loss": 2.4102, + "step": 18462 + }, + { + "epoch": 1.490033088531999, + "grad_norm": 0.6793025732040405, + "learning_rate": 2.944597258658277e-06, + "loss": 2.4356, + "step": 18463 + }, + { + "epoch": 1.490113792268582, + "grad_norm": 0.6771492958068848, + "learning_rate": 2.9407956642930613e-06, + "loss": 2.3779, + "step": 18464 + }, + { + "epoch": 1.490194496005165, + "grad_norm": 0.8017939925193787, + "learning_rate": 2.9369964888969147e-06, + "loss": 2.4128, + "step": 18465 + }, + { + "epoch": 1.490275199741748, + "grad_norm": 0.7499281764030457, + "learning_rate": 2.93319973256454e-06, + "loss": 2.4646, + "step": 18466 + }, + { + "epoch": 1.490355903478331, + "grad_norm": 0.7264615297317505, + "learning_rate": 2.929405395390561e-06, + "loss": 2.42, + "step": 18467 + }, + { + "epoch": 1.490436607214914, + "grad_norm": 0.6842880845069885, + "learning_rate": 2.9256134774695464e-06, + "loss": 2.3864, + "step": 18468 + }, + { + "epoch": 1.490517310951497, + "grad_norm": 0.7287806272506714, + "learning_rate": 2.9218239788959987e-06, + "loss": 2.4208, + "step": 18469 + }, + { + "epoch": 1.49059801468808, + "grad_norm": 0.683708906173706, + "learning_rate": 2.9180368997643646e-06, + "loss": 2.379, + "step": 18470 + }, + { + "epoch": 1.490678718424663, + "grad_norm": 0.7012128233909607, + "learning_rate": 2.9142522401690353e-06, + "loss": 2.4046, + "step": 18471 + }, + { + "epoch": 1.4907594221612461, + "grad_norm": 0.7036008834838867, + "learning_rate": 2.9104700002043128e-06, + "loss": 2.4177, + "step": 18472 + }, + { + "epoch": 1.490840125897829, + "grad_norm": 0.6707095503807068, + "learning_rate": 2.9066901799644776e-06, + "loss": 2.4333, + "step": 18473 + }, + { + "epoch": 1.490920829634412, + "grad_norm": 0.6534161567687988, + "learning_rate": 2.9029127795437317e-06, + "loss": 2.4293, + "step": 18474 + }, + { + "epoch": 1.4910015333709952, + "grad_norm": 0.7266476154327393, + "learning_rate": 2.8991377990362e-06, + "loss": 2.4023, + "step": 18475 + }, + { + "epoch": 1.491082237107578, + "grad_norm": 0.68699049949646, + "learning_rate": 2.8953652385359852e-06, + "loss": 2.4531, + "step": 18476 + }, + { + "epoch": 1.4911629408441611, + "grad_norm": 0.710686206817627, + "learning_rate": 2.891595098137101e-06, + "loss": 2.3729, + "step": 18477 + }, + { + "epoch": 1.4912436445807442, + "grad_norm": 0.7585535049438477, + "learning_rate": 2.8878273779335165e-06, + "loss": 2.4254, + "step": 18478 + }, + { + "epoch": 1.491324348317327, + "grad_norm": 0.7347260117530823, + "learning_rate": 2.884062078019123e-06, + "loss": 2.3753, + "step": 18479 + }, + { + "epoch": 1.4914050520539102, + "grad_norm": 0.662326455116272, + "learning_rate": 2.880299198487779e-06, + "loss": 2.4229, + "step": 18480 + }, + { + "epoch": 1.491485755790493, + "grad_norm": 0.7223392128944397, + "learning_rate": 2.8765387394332323e-06, + "loss": 2.4101, + "step": 18481 + }, + { + "epoch": 1.4915664595270761, + "grad_norm": 0.6733242869377136, + "learning_rate": 2.8727807009492293e-06, + "loss": 2.4009, + "step": 18482 + }, + { + "epoch": 1.491647163263659, + "grad_norm": 0.6901989579200745, + "learning_rate": 2.8690250831294398e-06, + "loss": 2.3742, + "step": 18483 + }, + { + "epoch": 1.491727867000242, + "grad_norm": 0.734670877456665, + "learning_rate": 2.8652718860674333e-06, + "loss": 2.374, + "step": 18484 + }, + { + "epoch": 1.4918085707368252, + "grad_norm": 0.6870261430740356, + "learning_rate": 2.8615211098567686e-06, + "loss": 2.4386, + "step": 18485 + }, + { + "epoch": 1.491889274473408, + "grad_norm": 0.7317399382591248, + "learning_rate": 2.8577727545909148e-06, + "loss": 2.3601, + "step": 18486 + }, + { + "epoch": 1.491969978209991, + "grad_norm": 0.7105548977851868, + "learning_rate": 2.854026820363298e-06, + "loss": 2.4112, + "step": 18487 + }, + { + "epoch": 1.4920506819465742, + "grad_norm": 0.7378930449485779, + "learning_rate": 2.8502833072672763e-06, + "loss": 2.4487, + "step": 18488 + }, + { + "epoch": 1.492131385683157, + "grad_norm": 0.69692462682724, + "learning_rate": 2.8465422153961418e-06, + "loss": 2.4672, + "step": 18489 + }, + { + "epoch": 1.4922120894197402, + "grad_norm": 0.6905173063278198, + "learning_rate": 2.8428035448431534e-06, + "loss": 2.3586, + "step": 18490 + }, + { + "epoch": 1.4922927931563232, + "grad_norm": 0.6969714760780334, + "learning_rate": 2.8390672957014586e-06, + "loss": 2.4488, + "step": 18491 + }, + { + "epoch": 1.492373496892906, + "grad_norm": 0.6935562491416931, + "learning_rate": 2.835333468064183e-06, + "loss": 2.4342, + "step": 18492 + }, + { + "epoch": 1.4924542006294892, + "grad_norm": 0.7018017768859863, + "learning_rate": 2.831602062024408e-06, + "loss": 2.3931, + "step": 18493 + }, + { + "epoch": 1.4925349043660723, + "grad_norm": 0.7257668375968933, + "learning_rate": 2.8278730776750917e-06, + "loss": 2.4752, + "step": 18494 + }, + { + "epoch": 1.4926156081026551, + "grad_norm": 0.7172815799713135, + "learning_rate": 2.824146515109194e-06, + "loss": 2.4264, + "step": 18495 + }, + { + "epoch": 1.4926963118392382, + "grad_norm": 0.6975371241569519, + "learning_rate": 2.8204223744195958e-06, + "loss": 2.4833, + "step": 18496 + }, + { + "epoch": 1.492777015575821, + "grad_norm": 0.741058886051178, + "learning_rate": 2.8167006556990893e-06, + "loss": 2.4557, + "step": 18497 + }, + { + "epoch": 1.4928577193124042, + "grad_norm": 0.7467125654220581, + "learning_rate": 2.8129813590404342e-06, + "loss": 2.4586, + "step": 18498 + }, + { + "epoch": 1.492938423048987, + "grad_norm": 0.7192440032958984, + "learning_rate": 2.809264484536356e-06, + "loss": 2.3789, + "step": 18499 + }, + { + "epoch": 1.4930191267855701, + "grad_norm": 0.7029628753662109, + "learning_rate": 2.805550032279458e-06, + "loss": 2.3833, + "step": 18500 + }, + { + "epoch": 1.4930998305221532, + "grad_norm": 0.8207079172134399, + "learning_rate": 2.8018380023623116e-06, + "loss": 2.4767, + "step": 18501 + }, + { + "epoch": 1.493180534258736, + "grad_norm": 0.6775376796722412, + "learning_rate": 2.7981283948774527e-06, + "loss": 2.4166, + "step": 18502 + }, + { + "epoch": 1.4932612379953192, + "grad_norm": 0.7079663276672363, + "learning_rate": 2.7944212099173194e-06, + "loss": 2.4247, + "step": 18503 + }, + { + "epoch": 1.4933419417319023, + "grad_norm": 0.7320355772972107, + "learning_rate": 2.7907164475743043e-06, + "loss": 2.4352, + "step": 18504 + }, + { + "epoch": 1.4934226454684851, + "grad_norm": 0.6638190150260925, + "learning_rate": 2.7870141079407442e-06, + "loss": 2.4045, + "step": 18505 + }, + { + "epoch": 1.4935033492050682, + "grad_norm": 0.6977740526199341, + "learning_rate": 2.7833141911089213e-06, + "loss": 2.3973, + "step": 18506 + }, + { + "epoch": 1.4935840529416513, + "grad_norm": 0.6586610078811646, + "learning_rate": 2.7796166971710167e-06, + "loss": 2.4308, + "step": 18507 + }, + { + "epoch": 1.4936647566782342, + "grad_norm": 0.6625449657440186, + "learning_rate": 2.7759216262192133e-06, + "loss": 2.4498, + "step": 18508 + }, + { + "epoch": 1.4937454604148173, + "grad_norm": 0.760132908821106, + "learning_rate": 2.772228978345581e-06, + "loss": 2.4554, + "step": 18509 + }, + { + "epoch": 1.4938261641514003, + "grad_norm": 0.7072888612747192, + "learning_rate": 2.7685387536421582e-06, + "loss": 2.3822, + "step": 18510 + }, + { + "epoch": 1.4939068678879832, + "grad_norm": 0.7946352362632751, + "learning_rate": 2.764850952200915e-06, + "loss": 2.3972, + "step": 18511 + }, + { + "epoch": 1.493987571624566, + "grad_norm": 0.6885955929756165, + "learning_rate": 2.7611655741137775e-06, + "loss": 2.4101, + "step": 18512 + }, + { + "epoch": 1.4940682753611492, + "grad_norm": 0.7515766620635986, + "learning_rate": 2.7574826194725622e-06, + "loss": 2.4282, + "step": 18513 + }, + { + "epoch": 1.4941489790977323, + "grad_norm": 0.6854525804519653, + "learning_rate": 2.7538020883690727e-06, + "loss": 2.3898, + "step": 18514 + }, + { + "epoch": 1.4942296828343151, + "grad_norm": 0.6416916251182556, + "learning_rate": 2.7501239808950473e-06, + "loss": 2.3419, + "step": 18515 + }, + { + "epoch": 1.4943103865708982, + "grad_norm": 0.6626073122024536, + "learning_rate": 2.746448297142157e-06, + "loss": 2.4021, + "step": 18516 + }, + { + "epoch": 1.4943910903074813, + "grad_norm": 0.6947335004806519, + "learning_rate": 2.7427750372019833e-06, + "loss": 2.4233, + "step": 18517 + }, + { + "epoch": 1.4944717940440642, + "grad_norm": 0.7005210518836975, + "learning_rate": 2.739104201166087e-06, + "loss": 2.3649, + "step": 18518 + }, + { + "epoch": 1.4945524977806472, + "grad_norm": 0.7207785248756409, + "learning_rate": 2.735435789125962e-06, + "loss": 2.4612, + "step": 18519 + }, + { + "epoch": 1.4946332015172303, + "grad_norm": 0.6695407629013062, + "learning_rate": 2.731769801173023e-06, + "loss": 2.4302, + "step": 18520 + }, + { + "epoch": 1.4947139052538132, + "grad_norm": 0.6625963449478149, + "learning_rate": 2.728106237398642e-06, + "loss": 2.4016, + "step": 18521 + }, + { + "epoch": 1.4947946089903963, + "grad_norm": 0.6939513087272644, + "learning_rate": 2.724445097894135e-06, + "loss": 2.3906, + "step": 18522 + }, + { + "epoch": 1.4948753127269794, + "grad_norm": 0.6693980097770691, + "learning_rate": 2.7207863827507395e-06, + "loss": 2.3769, + "step": 18523 + }, + { + "epoch": 1.4949560164635622, + "grad_norm": 0.7011690735816956, + "learning_rate": 2.717130092059628e-06, + "loss": 2.3497, + "step": 18524 + }, + { + "epoch": 1.4950367202001453, + "grad_norm": 0.7054407596588135, + "learning_rate": 2.7134762259119373e-06, + "loss": 2.4087, + "step": 18525 + }, + { + "epoch": 1.4951174239367282, + "grad_norm": 0.7248849272727966, + "learning_rate": 2.709824784398729e-06, + "loss": 2.4658, + "step": 18526 + }, + { + "epoch": 1.4951981276733113, + "grad_norm": 0.6783565282821655, + "learning_rate": 2.706175767611008e-06, + "loss": 2.4486, + "step": 18527 + }, + { + "epoch": 1.4952788314098941, + "grad_norm": 0.7590169310569763, + "learning_rate": 2.702529175639712e-06, + "loss": 2.415, + "step": 18528 + }, + { + "epoch": 1.4953595351464772, + "grad_norm": 0.6909342408180237, + "learning_rate": 2.6988850085757244e-06, + "loss": 2.4161, + "step": 18529 + }, + { + "epoch": 1.4954402388830603, + "grad_norm": 0.7009775638580322, + "learning_rate": 2.6952432665098724e-06, + "loss": 2.4345, + "step": 18530 + }, + { + "epoch": 1.4955209426196432, + "grad_norm": 0.6565183997154236, + "learning_rate": 2.691603949532917e-06, + "loss": 2.4248, + "step": 18531 + }, + { + "epoch": 1.4956016463562263, + "grad_norm": 0.6656069755554199, + "learning_rate": 2.687967057735563e-06, + "loss": 2.3897, + "step": 18532 + }, + { + "epoch": 1.4956823500928094, + "grad_norm": 0.6860701441764832, + "learning_rate": 2.6843325912084383e-06, + "loss": 2.435, + "step": 18533 + }, + { + "epoch": 1.4957630538293922, + "grad_norm": 0.7380251288414001, + "learning_rate": 2.6807005500421256e-06, + "loss": 2.4544, + "step": 18534 + }, + { + "epoch": 1.4958437575659753, + "grad_norm": 0.7232703566551208, + "learning_rate": 2.677070934327175e-06, + "loss": 2.4701, + "step": 18535 + }, + { + "epoch": 1.4959244613025584, + "grad_norm": 0.6819149851799011, + "learning_rate": 2.673443744154003e-06, + "loss": 2.3664, + "step": 18536 + }, + { + "epoch": 1.4960051650391413, + "grad_norm": 0.7755081057548523, + "learning_rate": 2.669818979613026e-06, + "loss": 2.4371, + "step": 18537 + }, + { + "epoch": 1.4960858687757244, + "grad_norm": 0.7655733823776245, + "learning_rate": 2.6661966407945826e-06, + "loss": 2.4068, + "step": 18538 + }, + { + "epoch": 1.4961665725123074, + "grad_norm": 0.711729884147644, + "learning_rate": 2.6625767277889567e-06, + "loss": 2.4384, + "step": 18539 + }, + { + "epoch": 1.4962472762488903, + "grad_norm": 0.7411779761314392, + "learning_rate": 2.658959240686354e-06, + "loss": 2.3928, + "step": 18540 + }, + { + "epoch": 1.4963279799854734, + "grad_norm": 0.7470163106918335, + "learning_rate": 2.6553441795769574e-06, + "loss": 2.4121, + "step": 18541 + }, + { + "epoch": 1.4964086837220563, + "grad_norm": 0.6805182695388794, + "learning_rate": 2.6517315445508285e-06, + "loss": 2.4439, + "step": 18542 + }, + { + "epoch": 1.4964893874586394, + "grad_norm": 0.6465758085250854, + "learning_rate": 2.6481213356980285e-06, + "loss": 2.3996, + "step": 18543 + }, + { + "epoch": 1.4965700911952222, + "grad_norm": 0.7103277444839478, + "learning_rate": 2.6445135531085297e-06, + "loss": 2.4107, + "step": 18544 + }, + { + "epoch": 1.4966507949318053, + "grad_norm": 0.7064812779426575, + "learning_rate": 2.640908196872227e-06, + "loss": 2.437, + "step": 18545 + }, + { + "epoch": 1.4967314986683884, + "grad_norm": 0.7219479084014893, + "learning_rate": 2.6373052670790043e-06, + "loss": 2.3647, + "step": 18546 + }, + { + "epoch": 1.4968122024049713, + "grad_norm": 0.655364453792572, + "learning_rate": 2.633704763818634e-06, + "loss": 2.4055, + "step": 18547 + }, + { + "epoch": 1.4968929061415543, + "grad_norm": 0.7051714658737183, + "learning_rate": 2.6301066871808668e-06, + "loss": 2.4221, + "step": 18548 + }, + { + "epoch": 1.4969736098781374, + "grad_norm": 0.6792117953300476, + "learning_rate": 2.626511037255364e-06, + "loss": 2.4437, + "step": 18549 + }, + { + "epoch": 1.4970543136147203, + "grad_norm": 0.7968631982803345, + "learning_rate": 2.6229178141317314e-06, + "loss": 2.3948, + "step": 18550 + }, + { + "epoch": 1.4971350173513034, + "grad_norm": 0.8141141533851624, + "learning_rate": 2.6193270178995644e-06, + "loss": 2.4079, + "step": 18551 + }, + { + "epoch": 1.4972157210878865, + "grad_norm": 0.7343787550926208, + "learning_rate": 2.6157386486483027e-06, + "loss": 2.3716, + "step": 18552 + }, + { + "epoch": 1.4972964248244693, + "grad_norm": 0.7314772009849548, + "learning_rate": 2.612152706467397e-06, + "loss": 2.4201, + "step": 18553 + }, + { + "epoch": 1.4973771285610524, + "grad_norm": 0.6845466494560242, + "learning_rate": 2.6085691914462306e-06, + "loss": 2.4698, + "step": 18554 + }, + { + "epoch": 1.4974578322976355, + "grad_norm": 0.7247948050498962, + "learning_rate": 2.6049881036741e-06, + "loss": 2.4039, + "step": 18555 + }, + { + "epoch": 1.4975385360342184, + "grad_norm": 0.6975938081741333, + "learning_rate": 2.601409443240255e-06, + "loss": 2.4121, + "step": 18556 + }, + { + "epoch": 1.4976192397708015, + "grad_norm": 0.7096135020256042, + "learning_rate": 2.597833210233891e-06, + "loss": 2.3661, + "step": 18557 + }, + { + "epoch": 1.4976999435073843, + "grad_norm": 0.7084534168243408, + "learning_rate": 2.594259404744137e-06, + "loss": 2.4388, + "step": 18558 + }, + { + "epoch": 1.4977806472439674, + "grad_norm": 0.7675961852073669, + "learning_rate": 2.5906880268600442e-06, + "loss": 2.4495, + "step": 18559 + }, + { + "epoch": 1.4978613509805503, + "grad_norm": 0.6656114459037781, + "learning_rate": 2.5871190766706632e-06, + "loss": 2.3662, + "step": 18560 + }, + { + "epoch": 1.4979420547171334, + "grad_norm": 0.7376806139945984, + "learning_rate": 2.583552554264901e-06, + "loss": 2.4522, + "step": 18561 + }, + { + "epoch": 1.4980227584537165, + "grad_norm": 0.6656897664070129, + "learning_rate": 2.5799884597316527e-06, + "loss": 2.3719, + "step": 18562 + }, + { + "epoch": 1.4981034621902993, + "grad_norm": 0.686014711856842, + "learning_rate": 2.5764267931597586e-06, + "loss": 2.3807, + "step": 18563 + }, + { + "epoch": 1.4981841659268824, + "grad_norm": 0.739297091960907, + "learning_rate": 2.572867554637981e-06, + "loss": 2.4135, + "step": 18564 + }, + { + "epoch": 1.4982648696634655, + "grad_norm": 0.6836863160133362, + "learning_rate": 2.569310744255016e-06, + "loss": 2.4243, + "step": 18565 + }, + { + "epoch": 1.4983455734000484, + "grad_norm": 0.6839776039123535, + "learning_rate": 2.565756362099503e-06, + "loss": 2.3698, + "step": 18566 + }, + { + "epoch": 1.4984262771366315, + "grad_norm": 0.717965841293335, + "learning_rate": 2.5622044082600604e-06, + "loss": 2.4255, + "step": 18567 + }, + { + "epoch": 1.4985069808732145, + "grad_norm": 0.7073249816894531, + "learning_rate": 2.5586548828251733e-06, + "loss": 2.3958, + "step": 18568 + }, + { + "epoch": 1.4985876846097974, + "grad_norm": 0.6807124018669128, + "learning_rate": 2.555107785883315e-06, + "loss": 2.3746, + "step": 18569 + }, + { + "epoch": 1.4986683883463805, + "grad_norm": 0.6823258996009827, + "learning_rate": 2.5515631175229037e-06, + "loss": 2.4117, + "step": 18570 + }, + { + "epoch": 1.4987490920829634, + "grad_norm": 0.6415054202079773, + "learning_rate": 2.548020877832269e-06, + "loss": 2.3362, + "step": 18571 + }, + { + "epoch": 1.4988297958195465, + "grad_norm": 0.6377396583557129, + "learning_rate": 2.5444810668996956e-06, + "loss": 2.3808, + "step": 18572 + }, + { + "epoch": 1.4989104995561293, + "grad_norm": 0.6864121556282043, + "learning_rate": 2.5409436848134127e-06, + "loss": 2.4115, + "step": 18573 + }, + { + "epoch": 1.4989912032927124, + "grad_norm": 0.6817963719367981, + "learning_rate": 2.5374087316615726e-06, + "loss": 2.4278, + "step": 18574 + }, + { + "epoch": 1.4990719070292955, + "grad_norm": 0.7278866171836853, + "learning_rate": 2.533876207532271e-06, + "loss": 2.3838, + "step": 18575 + }, + { + "epoch": 1.4991526107658784, + "grad_norm": 0.6872361898422241, + "learning_rate": 2.5303461125135596e-06, + "loss": 2.3583, + "step": 18576 + }, + { + "epoch": 1.4992333145024614, + "grad_norm": 0.7112752795219421, + "learning_rate": 2.526818446693402e-06, + "loss": 2.3556, + "step": 18577 + }, + { + "epoch": 1.4993140182390445, + "grad_norm": 0.6485861539840698, + "learning_rate": 2.5232932101597273e-06, + "loss": 2.4051, + "step": 18578 + }, + { + "epoch": 1.4993947219756274, + "grad_norm": 0.796795129776001, + "learning_rate": 2.519770403000399e-06, + "loss": 2.4487, + "step": 18579 + }, + { + "epoch": 1.4994754257122105, + "grad_norm": 0.6965582370758057, + "learning_rate": 2.5162500253032016e-06, + "loss": 2.4096, + "step": 18580 + }, + { + "epoch": 1.4995561294487936, + "grad_norm": 0.6711980104446411, + "learning_rate": 2.5127320771558772e-06, + "loss": 2.3684, + "step": 18581 + }, + { + "epoch": 1.4996368331853764, + "grad_norm": 0.6734749674797058, + "learning_rate": 2.50921655864611e-06, + "loss": 2.4111, + "step": 18582 + }, + { + "epoch": 1.4997175369219595, + "grad_norm": 0.6705273389816284, + "learning_rate": 2.505703469861509e-06, + "loss": 2.4486, + "step": 18583 + }, + { + "epoch": 1.4997982406585426, + "grad_norm": 0.6863572597503662, + "learning_rate": 2.5021928108896365e-06, + "loss": 2.3861, + "step": 18584 + }, + { + "epoch": 1.4998789443951255, + "grad_norm": 0.7196049094200134, + "learning_rate": 2.498684581817967e-06, + "loss": 2.4723, + "step": 18585 + }, + { + "epoch": 1.4999596481317086, + "grad_norm": 0.6990470290184021, + "learning_rate": 2.4951787827339644e-06, + "loss": 2.4122, + "step": 18586 + }, + { + "epoch": 1.5000403518682917, + "grad_norm": 0.6765878796577454, + "learning_rate": 2.49167541372497e-06, + "loss": 2.4416, + "step": 18587 + }, + { + "epoch": 1.5001210556048745, + "grad_norm": 0.695720911026001, + "learning_rate": 2.488174474878324e-06, + "loss": 2.4378, + "step": 18588 + }, + { + "epoch": 1.5002017593414574, + "grad_norm": 0.6874660849571228, + "learning_rate": 2.484675966281269e-06, + "loss": 2.4061, + "step": 18589 + }, + { + "epoch": 1.5002824630780405, + "grad_norm": 0.7196346521377563, + "learning_rate": 2.4811798880209903e-06, + "loss": 2.4147, + "step": 18590 + }, + { + "epoch": 1.5003631668146236, + "grad_norm": 0.7235828042030334, + "learning_rate": 2.477686240184629e-06, + "loss": 2.3971, + "step": 18591 + }, + { + "epoch": 1.5004438705512064, + "grad_norm": 0.690998911857605, + "learning_rate": 2.47419502285926e-06, + "loss": 2.4617, + "step": 18592 + }, + { + "epoch": 1.5005245742877895, + "grad_norm": 0.704179048538208, + "learning_rate": 2.47070623613187e-06, + "loss": 2.3694, + "step": 18593 + }, + { + "epoch": 1.5006052780243726, + "grad_norm": 0.6459659934043884, + "learning_rate": 2.467219880089433e-06, + "loss": 2.3735, + "step": 18594 + }, + { + "epoch": 1.5006859817609555, + "grad_norm": 0.6891184449195862, + "learning_rate": 2.463735954818824e-06, + "loss": 2.4479, + "step": 18595 + }, + { + "epoch": 1.5007666854975386, + "grad_norm": 0.7227807641029358, + "learning_rate": 2.460254460406897e-06, + "loss": 2.3642, + "step": 18596 + }, + { + "epoch": 1.5008473892341216, + "grad_norm": 0.7072375416755676, + "learning_rate": 2.4567753969403807e-06, + "loss": 2.385, + "step": 18597 + }, + { + "epoch": 1.5009280929707045, + "grad_norm": 0.7210230231285095, + "learning_rate": 2.453298764506007e-06, + "loss": 2.4116, + "step": 18598 + }, + { + "epoch": 1.5010087967072876, + "grad_norm": 0.7932078242301941, + "learning_rate": 2.449824563190417e-06, + "loss": 2.4631, + "step": 18599 + }, + { + "epoch": 1.5010895004438707, + "grad_norm": 0.6900286078453064, + "learning_rate": 2.4463527930801977e-06, + "loss": 2.4342, + "step": 18600 + }, + { + "epoch": 1.5011702041804535, + "grad_norm": 0.6741199493408203, + "learning_rate": 2.4428834542618796e-06, + "loss": 2.4389, + "step": 18601 + }, + { + "epoch": 1.5012509079170364, + "grad_norm": 0.6513713002204895, + "learning_rate": 2.4394165468219264e-06, + "loss": 2.3851, + "step": 18602 + }, + { + "epoch": 1.5013316116536197, + "grad_norm": 0.7287545204162598, + "learning_rate": 2.4359520708467255e-06, + "loss": 2.4199, + "step": 18603 + }, + { + "epoch": 1.5014123153902026, + "grad_norm": 0.6606385111808777, + "learning_rate": 2.4324900264226403e-06, + "loss": 2.4127, + "step": 18604 + }, + { + "epoch": 1.5014930191267855, + "grad_norm": 0.6798221468925476, + "learning_rate": 2.4290304136359575e-06, + "loss": 2.429, + "step": 18605 + }, + { + "epoch": 1.5015737228633685, + "grad_norm": 0.6801900863647461, + "learning_rate": 2.425573232572875e-06, + "loss": 2.4403, + "step": 18606 + }, + { + "epoch": 1.5016544265999516, + "grad_norm": 0.6709669232368469, + "learning_rate": 2.422118483319569e-06, + "loss": 2.4102, + "step": 18607 + }, + { + "epoch": 1.5017351303365345, + "grad_norm": 0.6942405700683594, + "learning_rate": 2.418666165962158e-06, + "loss": 2.3717, + "step": 18608 + }, + { + "epoch": 1.5018158340731176, + "grad_norm": 0.7532398700714111, + "learning_rate": 2.415216280586652e-06, + "loss": 2.3848, + "step": 18609 + }, + { + "epoch": 1.5018965378097007, + "grad_norm": 0.7056287527084351, + "learning_rate": 2.4117688272790373e-06, + "loss": 2.4101, + "step": 18610 + }, + { + "epoch": 1.5019772415462835, + "grad_norm": 0.7303447723388672, + "learning_rate": 2.4083238061252567e-06, + "loss": 2.4206, + "step": 18611 + }, + { + "epoch": 1.5020579452828666, + "grad_norm": 0.7364635467529297, + "learning_rate": 2.404881217211152e-06, + "loss": 2.4063, + "step": 18612 + }, + { + "epoch": 1.5021386490194497, + "grad_norm": 0.6893425583839417, + "learning_rate": 2.4014410606225225e-06, + "loss": 2.4183, + "step": 18613 + }, + { + "epoch": 1.5022193527560326, + "grad_norm": 0.6890718936920166, + "learning_rate": 2.3980033364451094e-06, + "loss": 2.4023, + "step": 18614 + }, + { + "epoch": 1.5023000564926154, + "grad_norm": 0.6982435584068298, + "learning_rate": 2.394568044764589e-06, + "loss": 2.3832, + "step": 18615 + }, + { + "epoch": 1.5023807602291988, + "grad_norm": 0.7023438811302185, + "learning_rate": 2.391135185666571e-06, + "loss": 2.401, + "step": 18616 + }, + { + "epoch": 1.5024614639657816, + "grad_norm": 0.7713298201560974, + "learning_rate": 2.3877047592366195e-06, + "loss": 2.3814, + "step": 18617 + }, + { + "epoch": 1.5025421677023645, + "grad_norm": 0.6758377552032471, + "learning_rate": 2.384276765560234e-06, + "loss": 2.3654, + "step": 18618 + }, + { + "epoch": 1.5026228714389476, + "grad_norm": 0.7223884463310242, + "learning_rate": 2.3808512047228227e-06, + "loss": 2.4036, + "step": 18619 + }, + { + "epoch": 1.5027035751755307, + "grad_norm": 0.6677948832511902, + "learning_rate": 2.3774280768097843e-06, + "loss": 2.454, + "step": 18620 + }, + { + "epoch": 1.5027842789121135, + "grad_norm": 0.6792545914649963, + "learning_rate": 2.374007381906429e-06, + "loss": 2.4515, + "step": 18621 + }, + { + "epoch": 1.5028649826486966, + "grad_norm": 0.6737624406814575, + "learning_rate": 2.3705891200980103e-06, + "loss": 2.3978, + "step": 18622 + }, + { + "epoch": 1.5029456863852797, + "grad_norm": 0.6470539569854736, + "learning_rate": 2.367173291469704e-06, + "loss": 2.4051, + "step": 18623 + }, + { + "epoch": 1.5030263901218626, + "grad_norm": 0.6720410585403442, + "learning_rate": 2.3637598961066655e-06, + "loss": 2.3405, + "step": 18624 + }, + { + "epoch": 1.5031070938584457, + "grad_norm": 0.6465243101119995, + "learning_rate": 2.3603489340939588e-06, + "loss": 2.3998, + "step": 18625 + }, + { + "epoch": 1.5031877975950287, + "grad_norm": 0.7025001645088196, + "learning_rate": 2.3569404055165836e-06, + "loss": 2.4181, + "step": 18626 + }, + { + "epoch": 1.5032685013316116, + "grad_norm": 0.72223961353302, + "learning_rate": 2.353534310459493e-06, + "loss": 2.3888, + "step": 18627 + }, + { + "epoch": 1.5033492050681947, + "grad_norm": 0.7461752891540527, + "learning_rate": 2.350130649007587e-06, + "loss": 2.3983, + "step": 18628 + }, + { + "epoch": 1.5034299088047778, + "grad_norm": 0.7365756034851074, + "learning_rate": 2.346729421245675e-06, + "loss": 2.4019, + "step": 18629 + }, + { + "epoch": 1.5035106125413606, + "grad_norm": 0.6703508496284485, + "learning_rate": 2.343330627258533e-06, + "loss": 2.3518, + "step": 18630 + }, + { + "epoch": 1.5035913162779435, + "grad_norm": 0.7155243158340454, + "learning_rate": 2.3399342671308722e-06, + "loss": 2.4097, + "step": 18631 + }, + { + "epoch": 1.5036720200145268, + "grad_norm": 0.7172690629959106, + "learning_rate": 2.336540340947324e-06, + "loss": 2.4041, + "step": 18632 + }, + { + "epoch": 1.5037527237511097, + "grad_norm": 0.7039667367935181, + "learning_rate": 2.333148848792499e-06, + "loss": 2.3767, + "step": 18633 + }, + { + "epoch": 1.5038334274876926, + "grad_norm": 0.6833097338676453, + "learning_rate": 2.329759790750907e-06, + "loss": 2.4188, + "step": 18634 + }, + { + "epoch": 1.5039141312242756, + "grad_norm": 0.6812809109687805, + "learning_rate": 2.3263731669070145e-06, + "loss": 2.443, + "step": 18635 + }, + { + "epoch": 1.5039948349608587, + "grad_norm": 0.6669073104858398, + "learning_rate": 2.3229889773452195e-06, + "loss": 2.4097, + "step": 18636 + }, + { + "epoch": 1.5040755386974416, + "grad_norm": 0.6794682145118713, + "learning_rate": 2.3196072221498778e-06, + "loss": 2.4558, + "step": 18637 + }, + { + "epoch": 1.5041562424340247, + "grad_norm": 0.6677505970001221, + "learning_rate": 2.3162279014052547e-06, + "loss": 2.4204, + "step": 18638 + }, + { + "epoch": 1.5042369461706078, + "grad_norm": 0.6727068424224854, + "learning_rate": 2.312851015195583e-06, + "loss": 2.3996, + "step": 18639 + }, + { + "epoch": 1.5043176499071906, + "grad_norm": 0.6639944911003113, + "learning_rate": 2.3094765636050177e-06, + "loss": 2.384, + "step": 18640 + }, + { + "epoch": 1.5043983536437737, + "grad_norm": 0.7160700559616089, + "learning_rate": 2.306104546717658e-06, + "loss": 2.4036, + "step": 18641 + }, + { + "epoch": 1.5044790573803568, + "grad_norm": 0.7650535702705383, + "learning_rate": 2.3027349646175588e-06, + "loss": 2.4178, + "step": 18642 + }, + { + "epoch": 1.5045597611169397, + "grad_norm": 0.7348201870918274, + "learning_rate": 2.299367817388676e-06, + "loss": 2.4216, + "step": 18643 + }, + { + "epoch": 1.5046404648535228, + "grad_norm": 0.7645912170410156, + "learning_rate": 2.2960031051149524e-06, + "loss": 2.4465, + "step": 18644 + }, + { + "epoch": 1.5047211685901059, + "grad_norm": 0.7808031439781189, + "learning_rate": 2.2926408278802327e-06, + "loss": 2.4039, + "step": 18645 + }, + { + "epoch": 1.5048018723266887, + "grad_norm": 0.8323469161987305, + "learning_rate": 2.2892809857683053e-06, + "loss": 2.4223, + "step": 18646 + }, + { + "epoch": 1.5048825760632716, + "grad_norm": 0.7380712032318115, + "learning_rate": 2.285923578862914e-06, + "loss": 2.3822, + "step": 18647 + }, + { + "epoch": 1.504963279799855, + "grad_norm": 0.734913170337677, + "learning_rate": 2.282568607247737e-06, + "loss": 2.4136, + "step": 18648 + }, + { + "epoch": 1.5050439835364378, + "grad_norm": 0.6847864389419556, + "learning_rate": 2.2792160710063846e-06, + "loss": 2.4458, + "step": 18649 + }, + { + "epoch": 1.5051246872730206, + "grad_norm": 0.7042723298072815, + "learning_rate": 2.2758659702224127e-06, + "loss": 2.4205, + "step": 18650 + }, + { + "epoch": 1.5052053910096037, + "grad_norm": 0.7443733811378479, + "learning_rate": 2.2725183049793096e-06, + "loss": 2.4135, + "step": 18651 + }, + { + "epoch": 1.5052860947461868, + "grad_norm": 0.6596884727478027, + "learning_rate": 2.26917307536052e-06, + "loss": 2.4134, + "step": 18652 + }, + { + "epoch": 1.5053667984827697, + "grad_norm": 0.6547135710716248, + "learning_rate": 2.2658302814494103e-06, + "loss": 2.3842, + "step": 18653 + }, + { + "epoch": 1.5054475022193528, + "grad_norm": 0.7708645462989807, + "learning_rate": 2.2624899233292806e-06, + "loss": 2.4263, + "step": 18654 + }, + { + "epoch": 1.5055282059559358, + "grad_norm": 0.7285633087158203, + "learning_rate": 2.2591520010833978e-06, + "loss": 2.4192, + "step": 18655 + }, + { + "epoch": 1.5056089096925187, + "grad_norm": 0.7440153956413269, + "learning_rate": 2.255816514794928e-06, + "loss": 2.4419, + "step": 18656 + }, + { + "epoch": 1.5056896134291018, + "grad_norm": 0.7068066596984863, + "learning_rate": 2.2524834645470395e-06, + "loss": 2.4174, + "step": 18657 + }, + { + "epoch": 1.5057703171656849, + "grad_norm": 0.7280914187431335, + "learning_rate": 2.249152850422764e-06, + "loss": 2.4275, + "step": 18658 + }, + { + "epoch": 1.5058510209022677, + "grad_norm": 0.6725744009017944, + "learning_rate": 2.245824672505126e-06, + "loss": 2.3799, + "step": 18659 + }, + { + "epoch": 1.5059317246388508, + "grad_norm": 0.6966879367828369, + "learning_rate": 2.2424989308770796e-06, + "loss": 2.4448, + "step": 18660 + }, + { + "epoch": 1.506012428375434, + "grad_norm": 0.6617816090583801, + "learning_rate": 2.2391756256214813e-06, + "loss": 2.3881, + "step": 18661 + }, + { + "epoch": 1.5060931321120168, + "grad_norm": 0.6595850586891174, + "learning_rate": 2.2358547568211873e-06, + "loss": 2.3878, + "step": 18662 + }, + { + "epoch": 1.5061738358485997, + "grad_norm": 0.769210696220398, + "learning_rate": 2.2325363245589535e-06, + "loss": 2.3398, + "step": 18663 + }, + { + "epoch": 1.5062545395851827, + "grad_norm": 0.6378950476646423, + "learning_rate": 2.2292203289174695e-06, + "loss": 2.3622, + "step": 18664 + }, + { + "epoch": 1.5063352433217658, + "grad_norm": 0.7006397843360901, + "learning_rate": 2.225906769979402e-06, + "loss": 2.454, + "step": 18665 + }, + { + "epoch": 1.5064159470583487, + "grad_norm": 0.7044196128845215, + "learning_rate": 2.222595647827319e-06, + "loss": 2.4629, + "step": 18666 + }, + { + "epoch": 1.5064966507949318, + "grad_norm": 0.7604904770851135, + "learning_rate": 2.219286962543743e-06, + "loss": 2.4704, + "step": 18667 + }, + { + "epoch": 1.5065773545315149, + "grad_norm": 0.6727971434593201, + "learning_rate": 2.215980714211141e-06, + "loss": 2.4113, + "step": 18668 + }, + { + "epoch": 1.5066580582680977, + "grad_norm": 0.7251582741737366, + "learning_rate": 2.2126769029119143e-06, + "loss": 2.441, + "step": 18669 + }, + { + "epoch": 1.5067387620046808, + "grad_norm": 0.7177818417549133, + "learning_rate": 2.209375528728386e-06, + "loss": 2.3668, + "step": 18670 + }, + { + "epoch": 1.506819465741264, + "grad_norm": 0.7172769904136658, + "learning_rate": 2.206076591742845e-06, + "loss": 2.4247, + "step": 18671 + }, + { + "epoch": 1.5069001694778468, + "grad_norm": 0.6539075374603271, + "learning_rate": 2.202780092037504e-06, + "loss": 2.3896, + "step": 18672 + }, + { + "epoch": 1.5069808732144299, + "grad_norm": 0.7096640467643738, + "learning_rate": 2.199486029694553e-06, + "loss": 2.4369, + "step": 18673 + }, + { + "epoch": 1.507061576951013, + "grad_norm": 0.64681476354599, + "learning_rate": 2.196194404796048e-06, + "loss": 2.3674, + "step": 18674 + }, + { + "epoch": 1.5071422806875958, + "grad_norm": 0.6609311699867249, + "learning_rate": 2.192905217424035e-06, + "loss": 2.4007, + "step": 18675 + }, + { + "epoch": 1.5072229844241787, + "grad_norm": 0.7324950098991394, + "learning_rate": 2.1896184676605145e-06, + "loss": 2.42, + "step": 18676 + }, + { + "epoch": 1.507303688160762, + "grad_norm": 0.686190128326416, + "learning_rate": 2.186334155587366e-06, + "loss": 2.4413, + "step": 18677 + }, + { + "epoch": 1.5073843918973449, + "grad_norm": 0.7591853141784668, + "learning_rate": 2.183052281286457e-06, + "loss": 2.408, + "step": 18678 + }, + { + "epoch": 1.5074650956339277, + "grad_norm": 0.681408703327179, + "learning_rate": 2.1797728448395893e-06, + "loss": 2.4814, + "step": 18679 + }, + { + "epoch": 1.5075457993705108, + "grad_norm": 0.695336639881134, + "learning_rate": 2.1764958463284855e-06, + "loss": 2.3995, + "step": 18680 + }, + { + "epoch": 1.507626503107094, + "grad_norm": 0.7404937148094177, + "learning_rate": 2.1732212858348143e-06, + "loss": 2.4041, + "step": 18681 + }, + { + "epoch": 1.5077072068436768, + "grad_norm": 0.7484709620475769, + "learning_rate": 2.169949163440188e-06, + "loss": 2.4133, + "step": 18682 + }, + { + "epoch": 1.5077879105802598, + "grad_norm": 0.6750720143318176, + "learning_rate": 2.1666794792261524e-06, + "loss": 2.387, + "step": 18683 + }, + { + "epoch": 1.507868614316843, + "grad_norm": 0.6828570365905762, + "learning_rate": 2.1634122332742093e-06, + "loss": 2.3908, + "step": 18684 + }, + { + "epoch": 1.5079493180534258, + "grad_norm": 0.7603326439857483, + "learning_rate": 2.1601474256657927e-06, + "loss": 2.4337, + "step": 18685 + }, + { + "epoch": 1.508030021790009, + "grad_norm": 0.7744943499565125, + "learning_rate": 2.15688505648225e-06, + "loss": 2.4279, + "step": 18686 + }, + { + "epoch": 1.508110725526592, + "grad_norm": 0.6829258799552917, + "learning_rate": 2.153625125804892e-06, + "loss": 2.4895, + "step": 18687 + }, + { + "epoch": 1.5081914292631748, + "grad_norm": 0.6903569102287292, + "learning_rate": 2.150367633714978e-06, + "loss": 2.4086, + "step": 18688 + }, + { + "epoch": 1.508272132999758, + "grad_norm": 0.6580927968025208, + "learning_rate": 2.1471125802936863e-06, + "loss": 2.3969, + "step": 18689 + }, + { + "epoch": 1.508352836736341, + "grad_norm": 0.7075905203819275, + "learning_rate": 2.1438599656221303e-06, + "loss": 2.4096, + "step": 18690 + }, + { + "epoch": 1.5084335404729239, + "grad_norm": 0.6775155067443848, + "learning_rate": 2.1406097897813783e-06, + "loss": 2.4142, + "step": 18691 + }, + { + "epoch": 1.5085142442095067, + "grad_norm": 0.6592757701873779, + "learning_rate": 2.137362052852443e-06, + "loss": 2.4354, + "step": 18692 + }, + { + "epoch": 1.50859494794609, + "grad_norm": 0.6985810399055481, + "learning_rate": 2.13411675491626e-06, + "loss": 2.403, + "step": 18693 + }, + { + "epoch": 1.508675651682673, + "grad_norm": 0.6725364327430725, + "learning_rate": 2.130873896053709e-06, + "loss": 2.3974, + "step": 18694 + }, + { + "epoch": 1.5087563554192558, + "grad_norm": 0.8433510661125183, + "learning_rate": 2.127633476345625e-06, + "loss": 2.499, + "step": 18695 + }, + { + "epoch": 1.5088370591558389, + "grad_norm": 0.7117698788642883, + "learning_rate": 2.124395495872744e-06, + "loss": 2.4069, + "step": 18696 + }, + { + "epoch": 1.508917762892422, + "grad_norm": 0.6914052367210388, + "learning_rate": 2.121159954715779e-06, + "loss": 2.414, + "step": 18697 + }, + { + "epoch": 1.5089984666290048, + "grad_norm": 0.6826418042182922, + "learning_rate": 2.117926852955365e-06, + "loss": 2.3616, + "step": 18698 + }, + { + "epoch": 1.509079170365588, + "grad_norm": 0.687097430229187, + "learning_rate": 2.114696190672083e-06, + "loss": 2.4434, + "step": 18699 + }, + { + "epoch": 1.509159874102171, + "grad_norm": 0.7137446403503418, + "learning_rate": 2.1114679679464454e-06, + "loss": 2.4431, + "step": 18700 + }, + { + "epoch": 1.5092405778387539, + "grad_norm": 0.7330455780029297, + "learning_rate": 2.1082421848588996e-06, + "loss": 2.4451, + "step": 18701 + }, + { + "epoch": 1.509321281575337, + "grad_norm": 0.701392650604248, + "learning_rate": 2.1050188414898584e-06, + "loss": 2.4038, + "step": 18702 + }, + { + "epoch": 1.50940198531192, + "grad_norm": 0.6891985535621643, + "learning_rate": 2.1017979379196474e-06, + "loss": 2.3863, + "step": 18703 + }, + { + "epoch": 1.509482689048503, + "grad_norm": 0.6793761849403381, + "learning_rate": 2.098579474228546e-06, + "loss": 2.4171, + "step": 18704 + }, + { + "epoch": 1.509563392785086, + "grad_norm": 0.7276668548583984, + "learning_rate": 2.095363450496757e-06, + "loss": 2.4207, + "step": 18705 + }, + { + "epoch": 1.509644096521669, + "grad_norm": 0.6547731757164001, + "learning_rate": 2.0921498668044383e-06, + "loss": 2.4113, + "step": 18706 + }, + { + "epoch": 1.509724800258252, + "grad_norm": 0.6921097636222839, + "learning_rate": 2.0889387232316703e-06, + "loss": 2.4162, + "step": 18707 + }, + { + "epoch": 1.5098055039948348, + "grad_norm": 0.7069120407104492, + "learning_rate": 2.085730019858512e-06, + "loss": 2.3696, + "step": 18708 + }, + { + "epoch": 1.5098862077314181, + "grad_norm": 0.6641648411750793, + "learning_rate": 2.082523756764898e-06, + "loss": 2.3926, + "step": 18709 + }, + { + "epoch": 1.509966911468001, + "grad_norm": 0.658637523651123, + "learning_rate": 2.0793199340307433e-06, + "loss": 2.3748, + "step": 18710 + }, + { + "epoch": 1.5100476152045839, + "grad_norm": 0.695314884185791, + "learning_rate": 2.076118551735906e-06, + "loss": 2.4386, + "step": 18711 + }, + { + "epoch": 1.510128318941167, + "grad_norm": 0.8113142848014832, + "learning_rate": 2.072919609960178e-06, + "loss": 2.4162, + "step": 18712 + }, + { + "epoch": 1.51020902267775, + "grad_norm": 0.677663266658783, + "learning_rate": 2.0697231087832724e-06, + "loss": 2.4099, + "step": 18713 + }, + { + "epoch": 1.510289726414333, + "grad_norm": 0.8038804531097412, + "learning_rate": 2.0665290482848597e-06, + "loss": 2.4721, + "step": 18714 + }, + { + "epoch": 1.510370430150916, + "grad_norm": 0.7014409303665161, + "learning_rate": 2.0633374285445427e-06, + "loss": 2.3641, + "step": 18715 + }, + { + "epoch": 1.510451133887499, + "grad_norm": 0.7066230773925781, + "learning_rate": 2.060148249641869e-06, + "loss": 2.4361, + "step": 18716 + }, + { + "epoch": 1.510531837624082, + "grad_norm": 0.6830186247825623, + "learning_rate": 2.056961511656319e-06, + "loss": 2.3958, + "step": 18717 + }, + { + "epoch": 1.510612541360665, + "grad_norm": 0.7098764181137085, + "learning_rate": 2.0537772146673182e-06, + "loss": 2.4474, + "step": 18718 + }, + { + "epoch": 1.5106932450972481, + "grad_norm": 0.6630643010139465, + "learning_rate": 2.050595358754215e-06, + "loss": 2.3363, + "step": 18719 + }, + { + "epoch": 1.510773948833831, + "grad_norm": 0.7090222835540771, + "learning_rate": 2.0474159439963115e-06, + "loss": 2.3895, + "step": 18720 + }, + { + "epoch": 1.5108546525704138, + "grad_norm": 0.6796701550483704, + "learning_rate": 2.044238970472867e-06, + "loss": 2.3925, + "step": 18721 + }, + { + "epoch": 1.5109353563069972, + "grad_norm": 0.7596279978752136, + "learning_rate": 2.0410644382630408e-06, + "loss": 2.4606, + "step": 18722 + }, + { + "epoch": 1.51101606004358, + "grad_norm": 0.6724212765693665, + "learning_rate": 2.0378923474459466e-06, + "loss": 2.4033, + "step": 18723 + }, + { + "epoch": 1.5110967637801629, + "grad_norm": 0.6791815161705017, + "learning_rate": 2.034722698100666e-06, + "loss": 2.4433, + "step": 18724 + }, + { + "epoch": 1.511177467516746, + "grad_norm": 0.686861515045166, + "learning_rate": 2.0315554903061697e-06, + "loss": 2.3319, + "step": 18725 + }, + { + "epoch": 1.511258171253329, + "grad_norm": 0.671930730342865, + "learning_rate": 2.0283907241414047e-06, + "loss": 2.3423, + "step": 18726 + }, + { + "epoch": 1.511338874989912, + "grad_norm": 0.6657836437225342, + "learning_rate": 2.025228399685253e-06, + "loss": 2.3696, + "step": 18727 + }, + { + "epoch": 1.511419578726495, + "grad_norm": 0.7551192045211792, + "learning_rate": 2.0220685170165067e-06, + "loss": 2.3879, + "step": 18728 + }, + { + "epoch": 1.511500282463078, + "grad_norm": 0.7677510380744934, + "learning_rate": 2.018911076213936e-06, + "loss": 2.4264, + "step": 18729 + }, + { + "epoch": 1.511580986199661, + "grad_norm": 0.7070802450180054, + "learning_rate": 2.0157560773562346e-06, + "loss": 2.4055, + "step": 18730 + }, + { + "epoch": 1.511661689936244, + "grad_norm": 0.7047102451324463, + "learning_rate": 2.012603520522005e-06, + "loss": 2.4127, + "step": 18731 + }, + { + "epoch": 1.5117423936728271, + "grad_norm": 0.7608091235160828, + "learning_rate": 2.0094534057898517e-06, + "loss": 2.4461, + "step": 18732 + }, + { + "epoch": 1.51182309740941, + "grad_norm": 0.69472336769104, + "learning_rate": 2.006305733238256e-06, + "loss": 2.3927, + "step": 18733 + }, + { + "epoch": 1.511903801145993, + "grad_norm": 0.7638588547706604, + "learning_rate": 2.0031605029456892e-06, + "loss": 2.4585, + "step": 18734 + }, + { + "epoch": 1.5119845048825762, + "grad_norm": 0.7421556711196899, + "learning_rate": 2.0000177149905208e-06, + "loss": 2.4123, + "step": 18735 + }, + { + "epoch": 1.512065208619159, + "grad_norm": 0.7327919602394104, + "learning_rate": 1.9968773694511e-06, + "loss": 2.416, + "step": 18736 + }, + { + "epoch": 1.512145912355742, + "grad_norm": 0.6789775490760803, + "learning_rate": 1.9937394664056753e-06, + "loss": 2.4116, + "step": 18737 + }, + { + "epoch": 1.5122266160923252, + "grad_norm": 0.8124228715896606, + "learning_rate": 1.9906040059324504e-06, + "loss": 2.3691, + "step": 18738 + }, + { + "epoch": 1.512307319828908, + "grad_norm": 0.7483124136924744, + "learning_rate": 1.987470988109563e-06, + "loss": 2.3636, + "step": 18739 + }, + { + "epoch": 1.512388023565491, + "grad_norm": 0.7223673462867737, + "learning_rate": 1.9843404130151176e-06, + "loss": 2.3638, + "step": 18740 + }, + { + "epoch": 1.512468727302074, + "grad_norm": 0.6911413669586182, + "learning_rate": 1.9812122807271293e-06, + "loss": 2.3337, + "step": 18741 + }, + { + "epoch": 1.5125494310386571, + "grad_norm": 0.7634989619255066, + "learning_rate": 1.978086591323536e-06, + "loss": 2.393, + "step": 18742 + }, + { + "epoch": 1.51263013477524, + "grad_norm": 0.747278094291687, + "learning_rate": 1.9749633448822748e-06, + "loss": 2.4688, + "step": 18743 + }, + { + "epoch": 1.512710838511823, + "grad_norm": 0.6391082406044006, + "learning_rate": 1.9718425414811502e-06, + "loss": 2.3856, + "step": 18744 + }, + { + "epoch": 1.5127915422484062, + "grad_norm": 0.7871484756469727, + "learning_rate": 1.968724181197967e-06, + "loss": 2.3737, + "step": 18745 + }, + { + "epoch": 1.512872245984989, + "grad_norm": 0.6946254968643188, + "learning_rate": 1.965608264110441e-06, + "loss": 2.3711, + "step": 18746 + }, + { + "epoch": 1.5129529497215721, + "grad_norm": 0.6642282009124756, + "learning_rate": 1.9624947902962098e-06, + "loss": 2.4034, + "step": 18747 + }, + { + "epoch": 1.5130336534581552, + "grad_norm": 0.6511447429656982, + "learning_rate": 1.959383759832889e-06, + "loss": 2.4114, + "step": 18748 + }, + { + "epoch": 1.513114357194738, + "grad_norm": 0.6886571049690247, + "learning_rate": 1.9562751727979943e-06, + "loss": 2.3954, + "step": 18749 + }, + { + "epoch": 1.5131950609313212, + "grad_norm": 0.7461123466491699, + "learning_rate": 1.9531690292690308e-06, + "loss": 2.4607, + "step": 18750 + }, + { + "epoch": 1.5132757646679043, + "grad_norm": 0.6922837495803833, + "learning_rate": 1.9500653293233808e-06, + "loss": 2.4126, + "step": 18751 + }, + { + "epoch": 1.5133564684044871, + "grad_norm": 0.736294150352478, + "learning_rate": 1.9469640730384042e-06, + "loss": 2.4562, + "step": 18752 + }, + { + "epoch": 1.51343717214107, + "grad_norm": 0.6553577780723572, + "learning_rate": 1.9438652604913955e-06, + "loss": 2.3973, + "step": 18753 + }, + { + "epoch": 1.5135178758776533, + "grad_norm": 0.7067225575447083, + "learning_rate": 1.9407688917595925e-06, + "loss": 2.4333, + "step": 18754 + }, + { + "epoch": 1.5135985796142362, + "grad_norm": 0.7250834107398987, + "learning_rate": 1.9376749669201553e-06, + "loss": 2.4195, + "step": 18755 + }, + { + "epoch": 1.513679283350819, + "grad_norm": 0.7244740724563599, + "learning_rate": 1.934583486050201e-06, + "loss": 2.4422, + "step": 18756 + }, + { + "epoch": 1.5137599870874021, + "grad_norm": 0.6884569525718689, + "learning_rate": 1.931494449226756e-06, + "loss": 2.3681, + "step": 18757 + }, + { + "epoch": 1.5138406908239852, + "grad_norm": 0.7152425646781921, + "learning_rate": 1.9284078565268373e-06, + "loss": 2.4023, + "step": 18758 + }, + { + "epoch": 1.513921394560568, + "grad_norm": 0.6469550132751465, + "learning_rate": 1.92532370802736e-06, + "loss": 2.4102, + "step": 18759 + }, + { + "epoch": 1.5140020982971512, + "grad_norm": 0.6262938380241394, + "learning_rate": 1.9222420038051747e-06, + "loss": 2.3668, + "step": 18760 + }, + { + "epoch": 1.5140828020337342, + "grad_norm": 0.6930738091468811, + "learning_rate": 1.9191627439370974e-06, + "loss": 2.4345, + "step": 18761 + }, + { + "epoch": 1.514163505770317, + "grad_norm": 0.6779739260673523, + "learning_rate": 1.9160859284998777e-06, + "loss": 2.4353, + "step": 18762 + }, + { + "epoch": 1.5142442095069002, + "grad_norm": 0.7086219191551208, + "learning_rate": 1.913011557570177e-06, + "loss": 2.3804, + "step": 18763 + }, + { + "epoch": 1.5143249132434833, + "grad_norm": 0.6894867420196533, + "learning_rate": 1.909939631224644e-06, + "loss": 2.3749, + "step": 18764 + }, + { + "epoch": 1.5144056169800661, + "grad_norm": 0.6909998059272766, + "learning_rate": 1.906870149539819e-06, + "loss": 2.4083, + "step": 18765 + }, + { + "epoch": 1.514486320716649, + "grad_norm": 0.6844708323478699, + "learning_rate": 1.9038031125922174e-06, + "loss": 2.4039, + "step": 18766 + }, + { + "epoch": 1.5145670244532323, + "grad_norm": 0.6927101016044617, + "learning_rate": 1.900738520458256e-06, + "loss": 2.3549, + "step": 18767 + }, + { + "epoch": 1.5146477281898152, + "grad_norm": 0.6853668093681335, + "learning_rate": 1.8976763732143298e-06, + "loss": 2.4001, + "step": 18768 + }, + { + "epoch": 1.514728431926398, + "grad_norm": 0.7288877367973328, + "learning_rate": 1.8946166709367553e-06, + "loss": 2.4295, + "step": 18769 + }, + { + "epoch": 1.5148091356629811, + "grad_norm": 0.6837958097457886, + "learning_rate": 1.891559413701771e-06, + "loss": 2.3687, + "step": 18770 + }, + { + "epoch": 1.5148898393995642, + "grad_norm": 0.7109480500221252, + "learning_rate": 1.8885046015855946e-06, + "loss": 2.4561, + "step": 18771 + }, + { + "epoch": 1.514970543136147, + "grad_norm": 0.6929563283920288, + "learning_rate": 1.8854522346643533e-06, + "loss": 2.3597, + "step": 18772 + }, + { + "epoch": 1.5150512468727302, + "grad_norm": 0.6835468411445618, + "learning_rate": 1.8824023130140978e-06, + "loss": 2.4212, + "step": 18773 + }, + { + "epoch": 1.5151319506093133, + "grad_norm": 0.6762038469314575, + "learning_rate": 1.8793548367108671e-06, + "loss": 2.3742, + "step": 18774 + }, + { + "epoch": 1.5152126543458961, + "grad_norm": 0.6824073195457458, + "learning_rate": 1.8763098058306118e-06, + "loss": 2.4822, + "step": 18775 + }, + { + "epoch": 1.5152933580824792, + "grad_norm": 0.7239061594009399, + "learning_rate": 1.873267220449204e-06, + "loss": 2.4036, + "step": 18776 + }, + { + "epoch": 1.5153740618190623, + "grad_norm": 0.6647765040397644, + "learning_rate": 1.8702270806424837e-06, + "loss": 2.4164, + "step": 18777 + }, + { + "epoch": 1.5154547655556452, + "grad_norm": 0.6472916007041931, + "learning_rate": 1.8671893864862345e-06, + "loss": 2.3915, + "step": 18778 + }, + { + "epoch": 1.5155354692922283, + "grad_norm": 0.7041392922401428, + "learning_rate": 1.864154138056129e-06, + "loss": 2.4124, + "step": 18779 + }, + { + "epoch": 1.5156161730288114, + "grad_norm": 0.6630376577377319, + "learning_rate": 1.86112133542784e-06, + "loss": 2.36, + "step": 18780 + }, + { + "epoch": 1.5156968767653942, + "grad_norm": 0.6880913972854614, + "learning_rate": 1.8580909786769406e-06, + "loss": 2.3711, + "step": 18781 + }, + { + "epoch": 1.515777580501977, + "grad_norm": 0.6794038414955139, + "learning_rate": 1.8550630678789705e-06, + "loss": 2.4399, + "step": 18782 + }, + { + "epoch": 1.5158582842385604, + "grad_norm": 0.7231845259666443, + "learning_rate": 1.8520376031093688e-06, + "loss": 2.4661, + "step": 18783 + }, + { + "epoch": 1.5159389879751433, + "grad_norm": 0.640635073184967, + "learning_rate": 1.8490145844435646e-06, + "loss": 2.3447, + "step": 18784 + }, + { + "epoch": 1.5160196917117261, + "grad_norm": 0.6949231624603271, + "learning_rate": 1.8459940119568753e-06, + "loss": 2.413, + "step": 18785 + }, + { + "epoch": 1.5161003954483092, + "grad_norm": 0.7331423759460449, + "learning_rate": 1.8429758857245849e-06, + "loss": 2.3968, + "step": 18786 + }, + { + "epoch": 1.5161810991848923, + "grad_norm": 0.7337766289710999, + "learning_rate": 1.8399602058219334e-06, + "loss": 2.3721, + "step": 18787 + }, + { + "epoch": 1.5162618029214752, + "grad_norm": 0.6949995160102844, + "learning_rate": 1.8369469723240717e-06, + "loss": 2.3815, + "step": 18788 + }, + { + "epoch": 1.5163425066580583, + "grad_norm": 0.6975441575050354, + "learning_rate": 1.8339361853060843e-06, + "loss": 2.4681, + "step": 18789 + }, + { + "epoch": 1.5164232103946413, + "grad_norm": 0.682364284992218, + "learning_rate": 1.8309278448430111e-06, + "loss": 2.3789, + "step": 18790 + }, + { + "epoch": 1.5165039141312242, + "grad_norm": 0.795218288898468, + "learning_rate": 1.8279219510098478e-06, + "loss": 2.4204, + "step": 18791 + }, + { + "epoch": 1.5165846178678073, + "grad_norm": 0.6837748885154724, + "learning_rate": 1.8249185038814786e-06, + "loss": 2.4165, + "step": 18792 + }, + { + "epoch": 1.5166653216043904, + "grad_norm": 0.7043229341506958, + "learning_rate": 1.8219175035327773e-06, + "loss": 2.4357, + "step": 18793 + }, + { + "epoch": 1.5167460253409732, + "grad_norm": 0.7295538187026978, + "learning_rate": 1.8189189500385283e-06, + "loss": 2.4108, + "step": 18794 + }, + { + "epoch": 1.5168267290775563, + "grad_norm": 0.7195125222206116, + "learning_rate": 1.8159228434734722e-06, + "loss": 2.4056, + "step": 18795 + }, + { + "epoch": 1.5169074328141394, + "grad_norm": 0.679076075553894, + "learning_rate": 1.812929183912271e-06, + "loss": 2.3591, + "step": 18796 + }, + { + "epoch": 1.5169881365507223, + "grad_norm": 0.7039214372634888, + "learning_rate": 1.8099379714295427e-06, + "loss": 2.4075, + "step": 18797 + }, + { + "epoch": 1.5170688402873052, + "grad_norm": 0.7246118783950806, + "learning_rate": 1.8069492060998393e-06, + "loss": 2.3952, + "step": 18798 + }, + { + "epoch": 1.5171495440238885, + "grad_norm": 0.740473747253418, + "learning_rate": 1.8039628879976233e-06, + "loss": 2.3529, + "step": 18799 + }, + { + "epoch": 1.5172302477604713, + "grad_norm": 0.8230307102203369, + "learning_rate": 1.8009790171973462e-06, + "loss": 2.3789, + "step": 18800 + }, + { + "epoch": 1.5173109514970542, + "grad_norm": 0.6905292868614197, + "learning_rate": 1.7979975937733706e-06, + "loss": 2.3314, + "step": 18801 + }, + { + "epoch": 1.5173916552336373, + "grad_norm": 0.7145891189575195, + "learning_rate": 1.7950186177999928e-06, + "loss": 2.3905, + "step": 18802 + }, + { + "epoch": 1.5174723589702204, + "grad_norm": 0.7292607426643372, + "learning_rate": 1.7920420893514645e-06, + "loss": 2.4806, + "step": 18803 + }, + { + "epoch": 1.5175530627068032, + "grad_norm": 0.6705700159072876, + "learning_rate": 1.7890680085019595e-06, + "loss": 2.4328, + "step": 18804 + }, + { + "epoch": 1.5176337664433863, + "grad_norm": 0.7559483051300049, + "learning_rate": 1.7860963753256077e-06, + "loss": 2.3555, + "step": 18805 + }, + { + "epoch": 1.5177144701799694, + "grad_norm": 0.703779399394989, + "learning_rate": 1.783127189896472e-06, + "loss": 2.4989, + "step": 18806 + }, + { + "epoch": 1.5177951739165523, + "grad_norm": 0.6725503206253052, + "learning_rate": 1.7801604522885596e-06, + "loss": 2.4035, + "step": 18807 + }, + { + "epoch": 1.5178758776531354, + "grad_norm": 0.7030585408210754, + "learning_rate": 1.7771961625757782e-06, + "loss": 2.4594, + "step": 18808 + }, + { + "epoch": 1.5179565813897185, + "grad_norm": 0.7017019987106323, + "learning_rate": 1.7742343208320355e-06, + "loss": 2.4053, + "step": 18809 + }, + { + "epoch": 1.5180372851263013, + "grad_norm": 0.6798418760299683, + "learning_rate": 1.771274927131139e-06, + "loss": 2.3945, + "step": 18810 + }, + { + "epoch": 1.5181179888628844, + "grad_norm": 0.7820610404014587, + "learning_rate": 1.7683179815468408e-06, + "loss": 2.4243, + "step": 18811 + }, + { + "epoch": 1.5181986925994675, + "grad_norm": 0.780927300453186, + "learning_rate": 1.7653634841528377e-06, + "loss": 2.3786, + "step": 18812 + }, + { + "epoch": 1.5182793963360504, + "grad_norm": 0.6910156011581421, + "learning_rate": 1.7624114350227595e-06, + "loss": 2.3687, + "step": 18813 + }, + { + "epoch": 1.5183601000726332, + "grad_norm": 0.74334716796875, + "learning_rate": 1.7594618342301917e-06, + "loss": 2.4245, + "step": 18814 + }, + { + "epoch": 1.5184408038092165, + "grad_norm": 0.7189802527427673, + "learning_rate": 1.7565146818486311e-06, + "loss": 2.4617, + "step": 18815 + }, + { + "epoch": 1.5185215075457994, + "grad_norm": 0.6682239770889282, + "learning_rate": 1.7535699779515412e-06, + "loss": 2.3924, + "step": 18816 + }, + { + "epoch": 1.5186022112823823, + "grad_norm": 0.7187373638153076, + "learning_rate": 1.750627722612308e-06, + "loss": 2.3686, + "step": 18817 + }, + { + "epoch": 1.5186829150189654, + "grad_norm": 0.6907529830932617, + "learning_rate": 1.7476879159042503e-06, + "loss": 2.3942, + "step": 18818 + }, + { + "epoch": 1.5187636187555484, + "grad_norm": 0.7133082747459412, + "learning_rate": 1.744750557900654e-06, + "loss": 2.495, + "step": 18819 + }, + { + "epoch": 1.5188443224921313, + "grad_norm": 0.666289210319519, + "learning_rate": 1.7418156486747162e-06, + "loss": 2.3726, + "step": 18820 + }, + { + "epoch": 1.5189250262287144, + "grad_norm": 0.7055099010467529, + "learning_rate": 1.7388831882995782e-06, + "loss": 2.4071, + "step": 18821 + }, + { + "epoch": 1.5190057299652975, + "grad_norm": 0.6810482740402222, + "learning_rate": 1.7359531768483261e-06, + "loss": 2.4183, + "step": 18822 + }, + { + "epoch": 1.5190864337018803, + "grad_norm": 0.7321486473083496, + "learning_rate": 1.7330256143939905e-06, + "loss": 2.4529, + "step": 18823 + }, + { + "epoch": 1.5191671374384634, + "grad_norm": 0.7226361036300659, + "learning_rate": 1.7301005010095128e-06, + "loss": 2.4364, + "step": 18824 + }, + { + "epoch": 1.5192478411750465, + "grad_norm": 0.6732020974159241, + "learning_rate": 1.7271778367678237e-06, + "loss": 2.4198, + "step": 18825 + }, + { + "epoch": 1.5193285449116294, + "grad_norm": 0.6751465201377869, + "learning_rate": 1.7242576217417538e-06, + "loss": 2.4273, + "step": 18826 + }, + { + "epoch": 1.5194092486482123, + "grad_norm": 0.7088303565979004, + "learning_rate": 1.7213398560040783e-06, + "loss": 2.3857, + "step": 18827 + }, + { + "epoch": 1.5194899523847956, + "grad_norm": 0.7239326238632202, + "learning_rate": 1.7184245396275056e-06, + "loss": 2.3681, + "step": 18828 + }, + { + "epoch": 1.5195706561213784, + "grad_norm": 0.7118703722953796, + "learning_rate": 1.7155116726847109e-06, + "loss": 2.4401, + "step": 18829 + }, + { + "epoch": 1.5196513598579613, + "grad_norm": 0.6479594111442566, + "learning_rate": 1.7126012552482917e-06, + "loss": 2.3794, + "step": 18830 + }, + { + "epoch": 1.5197320635945444, + "grad_norm": 0.6913226842880249, + "learning_rate": 1.7096932873907679e-06, + "loss": 2.3875, + "step": 18831 + }, + { + "epoch": 1.5198127673311275, + "grad_norm": 0.6577833890914917, + "learning_rate": 1.7067877691846258e-06, + "loss": 2.4328, + "step": 18832 + }, + { + "epoch": 1.5198934710677103, + "grad_norm": 0.7346724271774292, + "learning_rate": 1.703884700702274e-06, + "loss": 2.4161, + "step": 18833 + }, + { + "epoch": 1.5199741748042934, + "grad_norm": 0.7034791111946106, + "learning_rate": 1.700984082016055e-06, + "loss": 2.4166, + "step": 18834 + }, + { + "epoch": 1.5200548785408765, + "grad_norm": 0.69721919298172, + "learning_rate": 1.6980859131982662e-06, + "loss": 2.3892, + "step": 18835 + }, + { + "epoch": 1.5201355822774594, + "grad_norm": 0.77543705701828, + "learning_rate": 1.69519019432115e-06, + "loss": 2.4424, + "step": 18836 + }, + { + "epoch": 1.5202162860140425, + "grad_norm": 0.6738883852958679, + "learning_rate": 1.69229692545686e-06, + "loss": 2.4521, + "step": 18837 + }, + { + "epoch": 1.5202969897506255, + "grad_norm": 0.7213564515113831, + "learning_rate": 1.6894061066775158e-06, + "loss": 2.3824, + "step": 18838 + }, + { + "epoch": 1.5203776934872084, + "grad_norm": 0.6511073112487793, + "learning_rate": 1.68651773805516e-06, + "loss": 2.4027, + "step": 18839 + }, + { + "epoch": 1.5204583972237915, + "grad_norm": 0.707277774810791, + "learning_rate": 1.6836318196617684e-06, + "loss": 2.4513, + "step": 18840 + }, + { + "epoch": 1.5205391009603746, + "grad_norm": 0.7205690741539001, + "learning_rate": 1.6807483515692724e-06, + "loss": 2.3609, + "step": 18841 + }, + { + "epoch": 1.5206198046969575, + "grad_norm": 0.7299683690071106, + "learning_rate": 1.6778673338495476e-06, + "loss": 2.4653, + "step": 18842 + }, + { + "epoch": 1.5207005084335403, + "grad_norm": 0.6780205368995667, + "learning_rate": 1.6749887665743703e-06, + "loss": 2.4108, + "step": 18843 + }, + { + "epoch": 1.5207812121701236, + "grad_norm": 0.6702545285224915, + "learning_rate": 1.6721126498155048e-06, + "loss": 2.3838, + "step": 18844 + }, + { + "epoch": 1.5208619159067065, + "grad_norm": 0.7097615003585815, + "learning_rate": 1.6692389836446165e-06, + "loss": 2.4273, + "step": 18845 + }, + { + "epoch": 1.5209426196432894, + "grad_norm": 0.6766102910041809, + "learning_rate": 1.6663677681333368e-06, + "loss": 2.4357, + "step": 18846 + }, + { + "epoch": 1.5210233233798724, + "grad_norm": 0.7652571797370911, + "learning_rate": 1.6634990033532194e-06, + "loss": 2.4562, + "step": 18847 + }, + { + "epoch": 1.5211040271164555, + "grad_norm": 0.6772809624671936, + "learning_rate": 1.6606326893757628e-06, + "loss": 2.4173, + "step": 18848 + }, + { + "epoch": 1.5211847308530384, + "grad_norm": 0.7474905848503113, + "learning_rate": 1.65776882627241e-06, + "loss": 2.3759, + "step": 18849 + }, + { + "epoch": 1.5212654345896215, + "grad_norm": 0.7467244267463684, + "learning_rate": 1.6549074141145149e-06, + "loss": 2.3935, + "step": 18850 + }, + { + "epoch": 1.5213461383262046, + "grad_norm": 0.7091644406318665, + "learning_rate": 1.6520484529734092e-06, + "loss": 2.3507, + "step": 18851 + }, + { + "epoch": 1.5214268420627874, + "grad_norm": 0.7161739468574524, + "learning_rate": 1.6491919429203473e-06, + "loss": 2.4125, + "step": 18852 + }, + { + "epoch": 1.5215075457993705, + "grad_norm": 0.6733263731002808, + "learning_rate": 1.6463378840264941e-06, + "loss": 2.4026, + "step": 18853 + }, + { + "epoch": 1.5215882495359536, + "grad_norm": 0.6848629713058472, + "learning_rate": 1.6434862763630155e-06, + "loss": 2.3753, + "step": 18854 + }, + { + "epoch": 1.5216689532725365, + "grad_norm": 0.840535044670105, + "learning_rate": 1.640637120000954e-06, + "loss": 2.4067, + "step": 18855 + }, + { + "epoch": 1.5217496570091196, + "grad_norm": 0.7456166744232178, + "learning_rate": 1.637790415011342e-06, + "loss": 2.384, + "step": 18856 + }, + { + "epoch": 1.5218303607457027, + "grad_norm": 0.7038760781288147, + "learning_rate": 1.6349461614651008e-06, + "loss": 2.3857, + "step": 18857 + }, + { + "epoch": 1.5219110644822855, + "grad_norm": 0.6688199639320374, + "learning_rate": 1.6321043594331399e-06, + "loss": 2.4, + "step": 18858 + }, + { + "epoch": 1.5219917682188684, + "grad_norm": 0.7367751598358154, + "learning_rate": 1.6292650089862694e-06, + "loss": 2.458, + "step": 18859 + }, + { + "epoch": 1.5220724719554517, + "grad_norm": 0.7959186434745789, + "learning_rate": 1.626428110195266e-06, + "loss": 2.463, + "step": 18860 + }, + { + "epoch": 1.5221531756920346, + "grad_norm": 0.6830917596817017, + "learning_rate": 1.6235936631308179e-06, + "loss": 2.3843, + "step": 18861 + }, + { + "epoch": 1.5222338794286174, + "grad_norm": 0.6762063503265381, + "learning_rate": 1.6207616678635795e-06, + "loss": 2.4006, + "step": 18862 + }, + { + "epoch": 1.5223145831652005, + "grad_norm": 0.7410191893577576, + "learning_rate": 1.6179321244641277e-06, + "loss": 2.3894, + "step": 18863 + }, + { + "epoch": 1.5223952869017836, + "grad_norm": 0.6335217952728271, + "learning_rate": 1.6151050330029726e-06, + "loss": 2.3622, + "step": 18864 + }, + { + "epoch": 1.5224759906383665, + "grad_norm": 0.6569252014160156, + "learning_rate": 1.6122803935505804e-06, + "loss": 2.4683, + "step": 18865 + }, + { + "epoch": 1.5225566943749496, + "grad_norm": 0.755725085735321, + "learning_rate": 1.60945820617735e-06, + "loss": 2.3681, + "step": 18866 + }, + { + "epoch": 1.5226373981115326, + "grad_norm": 0.7522092461585999, + "learning_rate": 1.6066384709536253e-06, + "loss": 2.4316, + "step": 18867 + }, + { + "epoch": 1.5227181018481155, + "grad_norm": 0.7349351048469543, + "learning_rate": 1.6038211879496723e-06, + "loss": 2.4419, + "step": 18868 + }, + { + "epoch": 1.5227988055846986, + "grad_norm": 0.7310368418693542, + "learning_rate": 1.6010063572357014e-06, + "loss": 2.3956, + "step": 18869 + }, + { + "epoch": 1.5228795093212817, + "grad_norm": 0.7016099691390991, + "learning_rate": 1.5981939788818678e-06, + "loss": 2.3434, + "step": 18870 + }, + { + "epoch": 1.5229602130578646, + "grad_norm": 0.7399678230285645, + "learning_rate": 1.5953840529582708e-06, + "loss": 2.4468, + "step": 18871 + }, + { + "epoch": 1.5230409167944474, + "grad_norm": 0.7483804225921631, + "learning_rate": 1.5925765795349213e-06, + "loss": 2.4589, + "step": 18872 + }, + { + "epoch": 1.5231216205310307, + "grad_norm": 0.7376934885978699, + "learning_rate": 1.5897715586818185e-06, + "loss": 2.4414, + "step": 18873 + }, + { + "epoch": 1.5232023242676136, + "grad_norm": 0.6889188289642334, + "learning_rate": 1.5869689904688401e-06, + "loss": 2.3904, + "step": 18874 + }, + { + "epoch": 1.5232830280041965, + "grad_norm": 0.7198030948638916, + "learning_rate": 1.5841688749658634e-06, + "loss": 2.3654, + "step": 18875 + }, + { + "epoch": 1.5233637317407795, + "grad_norm": 0.7398289442062378, + "learning_rate": 1.581371212242655e-06, + "loss": 2.3903, + "step": 18876 + }, + { + "epoch": 1.5234444354773626, + "grad_norm": 0.6917053461074829, + "learning_rate": 1.5785760023689366e-06, + "loss": 2.4462, + "step": 18877 + }, + { + "epoch": 1.5235251392139455, + "grad_norm": 0.707867443561554, + "learning_rate": 1.5757832454143972e-06, + "loss": 2.4399, + "step": 18878 + }, + { + "epoch": 1.5236058429505286, + "grad_norm": 0.6719911098480225, + "learning_rate": 1.5729929414486144e-06, + "loss": 2.3984, + "step": 18879 + }, + { + "epoch": 1.5236865466871117, + "grad_norm": 0.7843443155288696, + "learning_rate": 1.5702050905411326e-06, + "loss": 2.3631, + "step": 18880 + }, + { + "epoch": 1.5237672504236945, + "grad_norm": 0.7120097279548645, + "learning_rate": 1.5674196927614516e-06, + "loss": 2.3608, + "step": 18881 + }, + { + "epoch": 1.5238479541602776, + "grad_norm": 0.7455726861953735, + "learning_rate": 1.5646367481789604e-06, + "loss": 2.4499, + "step": 18882 + }, + { + "epoch": 1.5239286578968607, + "grad_norm": 0.720418393611908, + "learning_rate": 1.561856256863048e-06, + "loss": 2.421, + "step": 18883 + }, + { + "epoch": 1.5240093616334436, + "grad_norm": 0.6765218377113342, + "learning_rate": 1.5590782188829923e-06, + "loss": 2.3552, + "step": 18884 + }, + { + "epoch": 1.5240900653700267, + "grad_norm": 0.6665711402893066, + "learning_rate": 1.5563026343080378e-06, + "loss": 2.4116, + "step": 18885 + }, + { + "epoch": 1.5241707691066098, + "grad_norm": 0.6785176992416382, + "learning_rate": 1.5535295032073405e-06, + "loss": 2.3543, + "step": 18886 + }, + { + "epoch": 1.5242514728431926, + "grad_norm": 0.692261278629303, + "learning_rate": 1.550758825650045e-06, + "loss": 2.4613, + "step": 18887 + }, + { + "epoch": 1.5243321765797755, + "grad_norm": 0.7043518424034119, + "learning_rate": 1.547990601705185e-06, + "loss": 2.3802, + "step": 18888 + }, + { + "epoch": 1.5244128803163588, + "grad_norm": 0.677109956741333, + "learning_rate": 1.5452248314417605e-06, + "loss": 2.4045, + "step": 18889 + }, + { + "epoch": 1.5244935840529417, + "grad_norm": 0.7338987588882446, + "learning_rate": 1.5424615149286835e-06, + "loss": 2.3944, + "step": 18890 + }, + { + "epoch": 1.5245742877895245, + "grad_norm": 0.7003028392791748, + "learning_rate": 1.5397006522348546e-06, + "loss": 2.4482, + "step": 18891 + }, + { + "epoch": 1.5246549915261076, + "grad_norm": 0.679331362247467, + "learning_rate": 1.5369422434290515e-06, + "loss": 2.435, + "step": 18892 + }, + { + "epoch": 1.5247356952626907, + "grad_norm": 0.7156202793121338, + "learning_rate": 1.5341862885800307e-06, + "loss": 2.4535, + "step": 18893 + }, + { + "epoch": 1.5248163989992736, + "grad_norm": 0.6846185922622681, + "learning_rate": 1.5314327877564926e-06, + "loss": 2.4047, + "step": 18894 + }, + { + "epoch": 1.5248971027358567, + "grad_norm": 0.7099572420120239, + "learning_rate": 1.5286817410270382e-06, + "loss": 2.4283, + "step": 18895 + }, + { + "epoch": 1.5249778064724397, + "grad_norm": 0.7120501399040222, + "learning_rate": 1.5259331484602345e-06, + "loss": 2.4255, + "step": 18896 + }, + { + "epoch": 1.5250585102090226, + "grad_norm": 0.7055281400680542, + "learning_rate": 1.5231870101245937e-06, + "loss": 2.3463, + "step": 18897 + }, + { + "epoch": 1.5251392139456057, + "grad_norm": 0.6632781624794006, + "learning_rate": 1.5204433260885608e-06, + "loss": 2.3487, + "step": 18898 + }, + { + "epoch": 1.5252199176821888, + "grad_norm": 0.6453731656074524, + "learning_rate": 1.5177020964205034e-06, + "loss": 2.3545, + "step": 18899 + }, + { + "epoch": 1.5253006214187717, + "grad_norm": 0.8149442672729492, + "learning_rate": 1.514963321188756e-06, + "loss": 2.431, + "step": 18900 + }, + { + "epoch": 1.5253813251553547, + "grad_norm": 0.730827271938324, + "learning_rate": 1.5122270004615525e-06, + "loss": 2.3812, + "step": 18901 + }, + { + "epoch": 1.5254620288919378, + "grad_norm": 0.6867875456809998, + "learning_rate": 1.5094931343071051e-06, + "loss": 2.4262, + "step": 18902 + }, + { + "epoch": 1.5255427326285207, + "grad_norm": 0.7112615704536438, + "learning_rate": 1.5067617227935593e-06, + "loss": 2.4221, + "step": 18903 + }, + { + "epoch": 1.5256234363651036, + "grad_norm": 0.7412725687026978, + "learning_rate": 1.5040327659889608e-06, + "loss": 2.3338, + "step": 18904 + }, + { + "epoch": 1.5257041401016869, + "grad_norm": 0.7514991164207458, + "learning_rate": 1.501306263961333e-06, + "loss": 2.45, + "step": 18905 + }, + { + "epoch": 1.5257848438382697, + "grad_norm": 0.7420109510421753, + "learning_rate": 1.4985822167786323e-06, + "loss": 2.342, + "step": 18906 + }, + { + "epoch": 1.5258655475748526, + "grad_norm": 0.6807692050933838, + "learning_rate": 1.4958606245087602e-06, + "loss": 2.4438, + "step": 18907 + }, + { + "epoch": 1.5259462513114357, + "grad_norm": 0.6926922798156738, + "learning_rate": 1.493141487219518e-06, + "loss": 2.3726, + "step": 18908 + }, + { + "epoch": 1.5260269550480188, + "grad_norm": 0.7947930693626404, + "learning_rate": 1.490424804978696e-06, + "loss": 2.3887, + "step": 18909 + }, + { + "epoch": 1.5261076587846016, + "grad_norm": 0.6710916757583618, + "learning_rate": 1.4877105778540069e-06, + "loss": 2.3674, + "step": 18910 + }, + { + "epoch": 1.5261883625211847, + "grad_norm": 0.7039839029312134, + "learning_rate": 1.4849988059130738e-06, + "loss": 2.4165, + "step": 18911 + }, + { + "epoch": 1.5262690662577678, + "grad_norm": 0.7044761180877686, + "learning_rate": 1.4822894892234874e-06, + "loss": 2.4431, + "step": 18912 + }, + { + "epoch": 1.5263497699943507, + "grad_norm": 0.7750450372695923, + "learning_rate": 1.4795826278527824e-06, + "loss": 2.3867, + "step": 18913 + }, + { + "epoch": 1.5264304737309338, + "grad_norm": 0.6689462661743164, + "learning_rate": 1.4768782218684052e-06, + "loss": 2.4665, + "step": 18914 + }, + { + "epoch": 1.5265111774675169, + "grad_norm": 0.7244156002998352, + "learning_rate": 1.4741762713377682e-06, + "loss": 2.4075, + "step": 18915 + }, + { + "epoch": 1.5265918812040997, + "grad_norm": 0.659988284111023, + "learning_rate": 1.4714767763282067e-06, + "loss": 2.3702, + "step": 18916 + }, + { + "epoch": 1.5266725849406828, + "grad_norm": 0.6512012481689453, + "learning_rate": 1.468779736907e-06, + "loss": 2.447, + "step": 18917 + }, + { + "epoch": 1.526753288677266, + "grad_norm": 0.7002681493759155, + "learning_rate": 1.4660851531413722e-06, + "loss": 2.3993, + "step": 18918 + }, + { + "epoch": 1.5268339924138488, + "grad_norm": 0.7057614922523499, + "learning_rate": 1.4633930250984695e-06, + "loss": 2.4794, + "step": 18919 + }, + { + "epoch": 1.5269146961504316, + "grad_norm": 0.6431131362915039, + "learning_rate": 1.4607033528453829e-06, + "loss": 2.4572, + "step": 18920 + }, + { + "epoch": 1.5269953998870147, + "grad_norm": 0.7665689587593079, + "learning_rate": 1.4580161364491584e-06, + "loss": 2.3644, + "step": 18921 + }, + { + "epoch": 1.5270761036235978, + "grad_norm": 0.7558016180992126, + "learning_rate": 1.455331375976765e-06, + "loss": 2.4114, + "step": 18922 + }, + { + "epoch": 1.5271568073601807, + "grad_norm": 0.738858699798584, + "learning_rate": 1.4526490714951158e-06, + "loss": 2.4036, + "step": 18923 + }, + { + "epoch": 1.5272375110967638, + "grad_norm": 0.6631876230239868, + "learning_rate": 1.4499692230710459e-06, + "loss": 2.3717, + "step": 18924 + }, + { + "epoch": 1.5273182148333468, + "grad_norm": 0.661270022392273, + "learning_rate": 1.4472918307713579e-06, + "loss": 2.3438, + "step": 18925 + }, + { + "epoch": 1.5273989185699297, + "grad_norm": 0.6621153354644775, + "learning_rate": 1.4446168946627757e-06, + "loss": 2.3787, + "step": 18926 + }, + { + "epoch": 1.5274796223065128, + "grad_norm": 0.7466804385185242, + "learning_rate": 1.4419444148119798e-06, + "loss": 2.4215, + "step": 18927 + }, + { + "epoch": 1.5275603260430959, + "grad_norm": 0.6980069279670715, + "learning_rate": 1.43927439128555e-06, + "loss": 2.3845, + "step": 18928 + }, + { + "epoch": 1.5276410297796787, + "grad_norm": 0.716249406337738, + "learning_rate": 1.4366068241500442e-06, + "loss": 2.4502, + "step": 18929 + }, + { + "epoch": 1.5277217335162618, + "grad_norm": 0.754284679889679, + "learning_rate": 1.4339417134719536e-06, + "loss": 2.3767, + "step": 18930 + }, + { + "epoch": 1.527802437252845, + "grad_norm": 0.6864803433418274, + "learning_rate": 1.4312790593176807e-06, + "loss": 2.3783, + "step": 18931 + }, + { + "epoch": 1.5278831409894278, + "grad_norm": 0.7305008769035339, + "learning_rate": 1.4286188617535945e-06, + "loss": 2.4186, + "step": 18932 + }, + { + "epoch": 1.5279638447260107, + "grad_norm": 0.7028940320014954, + "learning_rate": 1.4259611208459979e-06, + "loss": 2.4659, + "step": 18933 + }, + { + "epoch": 1.528044548462594, + "grad_norm": 0.7353081703186035, + "learning_rate": 1.4233058366611151e-06, + "loss": 2.4355, + "step": 18934 + }, + { + "epoch": 1.5281252521991768, + "grad_norm": 0.6228030323982239, + "learning_rate": 1.4206530092651494e-06, + "loss": 2.3496, + "step": 18935 + }, + { + "epoch": 1.5282059559357597, + "grad_norm": 0.7117124795913696, + "learning_rate": 1.4180026387241918e-06, + "loss": 2.4108, + "step": 18936 + }, + { + "epoch": 1.5282866596723428, + "grad_norm": 0.7654587030410767, + "learning_rate": 1.415354725104301e-06, + "loss": 2.3717, + "step": 18937 + }, + { + "epoch": 1.5283673634089259, + "grad_norm": 0.6835399866104126, + "learning_rate": 1.4127092684714683e-06, + "loss": 2.3403, + "step": 18938 + }, + { + "epoch": 1.5284480671455087, + "grad_norm": 0.7172822952270508, + "learning_rate": 1.410066268891641e-06, + "loss": 2.3928, + "step": 18939 + }, + { + "epoch": 1.5285287708820918, + "grad_norm": 0.6987513303756714, + "learning_rate": 1.407425726430678e-06, + "loss": 2.3965, + "step": 18940 + }, + { + "epoch": 1.528609474618675, + "grad_norm": 0.7663477063179016, + "learning_rate": 1.4047876411543925e-06, + "loss": 2.4411, + "step": 18941 + }, + { + "epoch": 1.5286901783552578, + "grad_norm": 0.6900299191474915, + "learning_rate": 1.4021520131285216e-06, + "loss": 2.4464, + "step": 18942 + }, + { + "epoch": 1.5287708820918409, + "grad_norm": 0.6860430836677551, + "learning_rate": 1.3995188424187676e-06, + "loss": 2.3512, + "step": 18943 + }, + { + "epoch": 1.528851585828424, + "grad_norm": 0.6658843755722046, + "learning_rate": 1.3968881290907453e-06, + "loss": 2.4124, + "step": 18944 + }, + { + "epoch": 1.5289322895650068, + "grad_norm": 0.6960515975952148, + "learning_rate": 1.3942598732100243e-06, + "loss": 2.3591, + "step": 18945 + }, + { + "epoch": 1.52901299330159, + "grad_norm": 0.7546302676200867, + "learning_rate": 1.3916340748420963e-06, + "loss": 2.407, + "step": 18946 + }, + { + "epoch": 1.529093697038173, + "grad_norm": 0.7384806871414185, + "learning_rate": 1.3890107340524205e-06, + "loss": 2.3563, + "step": 18947 + }, + { + "epoch": 1.5291744007747559, + "grad_norm": 0.6989250779151917, + "learning_rate": 1.3863898509063555e-06, + "loss": 2.4044, + "step": 18948 + }, + { + "epoch": 1.5292551045113387, + "grad_norm": 0.6974141597747803, + "learning_rate": 1.383771425469249e-06, + "loss": 2.4159, + "step": 18949 + }, + { + "epoch": 1.529335808247922, + "grad_norm": 0.7042572498321533, + "learning_rate": 1.381155457806338e-06, + "loss": 2.3826, + "step": 18950 + }, + { + "epoch": 1.529416511984505, + "grad_norm": 0.737964391708374, + "learning_rate": 1.3785419479828255e-06, + "loss": 2.4146, + "step": 18951 + }, + { + "epoch": 1.5294972157210878, + "grad_norm": 0.698883593082428, + "learning_rate": 1.3759308960638484e-06, + "loss": 2.4203, + "step": 18952 + }, + { + "epoch": 1.5295779194576709, + "grad_norm": 0.6545951962471008, + "learning_rate": 1.373322302114477e-06, + "loss": 2.3445, + "step": 18953 + }, + { + "epoch": 1.529658623194254, + "grad_norm": 0.663454532623291, + "learning_rate": 1.370716166199726e-06, + "loss": 2.3787, + "step": 18954 + }, + { + "epoch": 1.5297393269308368, + "grad_norm": 0.7036040425300598, + "learning_rate": 1.3681124883845543e-06, + "loss": 2.4202, + "step": 18955 + }, + { + "epoch": 1.52982003066742, + "grad_norm": 0.6702279448509216, + "learning_rate": 1.3655112687338434e-06, + "loss": 2.4357, + "step": 18956 + }, + { + "epoch": 1.529900734404003, + "grad_norm": 0.722159206867218, + "learning_rate": 1.3629125073124193e-06, + "loss": 2.4469, + "step": 18957 + }, + { + "epoch": 1.5299814381405858, + "grad_norm": 0.6717368364334106, + "learning_rate": 1.3603162041850636e-06, + "loss": 2.3563, + "step": 18958 + }, + { + "epoch": 1.530062141877169, + "grad_norm": 0.6880894899368286, + "learning_rate": 1.357722359416469e-06, + "loss": 2.3829, + "step": 18959 + }, + { + "epoch": 1.530142845613752, + "grad_norm": 0.6776503920555115, + "learning_rate": 1.3551309730712835e-06, + "loss": 2.3516, + "step": 18960 + }, + { + "epoch": 1.5302235493503349, + "grad_norm": 0.6807117462158203, + "learning_rate": 1.3525420452141002e-06, + "loss": 2.4042, + "step": 18961 + }, + { + "epoch": 1.530304253086918, + "grad_norm": 0.7218049764633179, + "learning_rate": 1.349955575909434e-06, + "loss": 2.4208, + "step": 18962 + }, + { + "epoch": 1.530384956823501, + "grad_norm": 0.6765930652618408, + "learning_rate": 1.3473715652217556e-06, + "loss": 2.4686, + "step": 18963 + }, + { + "epoch": 1.530465660560084, + "grad_norm": 0.7073772549629211, + "learning_rate": 1.3447900132154578e-06, + "loss": 2.3915, + "step": 18964 + }, + { + "epoch": 1.5305463642966668, + "grad_norm": 0.7247893810272217, + "learning_rate": 1.3422109199548672e-06, + "loss": 2.4182, + "step": 18965 + }, + { + "epoch": 1.53062706803325, + "grad_norm": 0.6569304466247559, + "learning_rate": 1.3396342855042876e-06, + "loss": 2.4686, + "step": 18966 + }, + { + "epoch": 1.530707771769833, + "grad_norm": 0.7075461745262146, + "learning_rate": 1.3370601099279122e-06, + "loss": 2.4054, + "step": 18967 + }, + { + "epoch": 1.5307884755064158, + "grad_norm": 0.6850137114524841, + "learning_rate": 1.334488393289912e-06, + "loss": 2.4359, + "step": 18968 + }, + { + "epoch": 1.530869179242999, + "grad_norm": 0.7391964793205261, + "learning_rate": 1.3319191356543691e-06, + "loss": 2.4276, + "step": 18969 + }, + { + "epoch": 1.530949882979582, + "grad_norm": 0.7017062902450562, + "learning_rate": 1.3293523370853211e-06, + "loss": 2.3984, + "step": 18970 + }, + { + "epoch": 1.5310305867161649, + "grad_norm": 0.7009238600730896, + "learning_rate": 1.3267879976467612e-06, + "loss": 2.4359, + "step": 18971 + }, + { + "epoch": 1.531111290452748, + "grad_norm": 0.6929598450660706, + "learning_rate": 1.3242261174025606e-06, + "loss": 2.4326, + "step": 18972 + }, + { + "epoch": 1.531191994189331, + "grad_norm": 0.7422237992286682, + "learning_rate": 1.3216666964165902e-06, + "loss": 2.3896, + "step": 18973 + }, + { + "epoch": 1.531272697925914, + "grad_norm": 0.7049415111541748, + "learning_rate": 1.3191097347526328e-06, + "loss": 2.4069, + "step": 18974 + }, + { + "epoch": 1.531353401662497, + "grad_norm": 0.7242603302001953, + "learning_rate": 1.3165552324744145e-06, + "loss": 2.3738, + "step": 18975 + }, + { + "epoch": 1.53143410539908, + "grad_norm": 0.6795815825462341, + "learning_rate": 1.3140031896456073e-06, + "loss": 2.4512, + "step": 18976 + }, + { + "epoch": 1.531514809135663, + "grad_norm": 0.6888797283172607, + "learning_rate": 1.3114536063297932e-06, + "loss": 2.4532, + "step": 18977 + }, + { + "epoch": 1.5315955128722458, + "grad_norm": 0.6484637260437012, + "learning_rate": 1.3089064825905438e-06, + "loss": 2.3946, + "step": 18978 + }, + { + "epoch": 1.5316762166088291, + "grad_norm": 0.7018564939498901, + "learning_rate": 1.3063618184913196e-06, + "loss": 2.3645, + "step": 18979 + }, + { + "epoch": 1.531756920345412, + "grad_norm": 0.673145055770874, + "learning_rate": 1.3038196140955584e-06, + "loss": 2.395, + "step": 18980 + }, + { + "epoch": 1.5318376240819949, + "grad_norm": 0.7300434112548828, + "learning_rate": 1.3012798694665873e-06, + "loss": 2.4325, + "step": 18981 + }, + { + "epoch": 1.531918327818578, + "grad_norm": 0.706119954586029, + "learning_rate": 1.2987425846677337e-06, + "loss": 2.4204, + "step": 18982 + }, + { + "epoch": 1.531999031555161, + "grad_norm": 0.7130329608917236, + "learning_rate": 1.2962077597622247e-06, + "loss": 2.4483, + "step": 18983 + }, + { + "epoch": 1.532079735291744, + "grad_norm": 0.716433584690094, + "learning_rate": 1.2936753948132318e-06, + "loss": 2.4296, + "step": 18984 + }, + { + "epoch": 1.532160439028327, + "grad_norm": 0.7647578120231628, + "learning_rate": 1.2911454898838714e-06, + "loss": 2.4262, + "step": 18985 + }, + { + "epoch": 1.53224114276491, + "grad_norm": 0.6844768524169922, + "learning_rate": 1.2886180450371822e-06, + "loss": 2.3896, + "step": 18986 + }, + { + "epoch": 1.532321846501493, + "grad_norm": 0.6975526809692383, + "learning_rate": 1.2860930603361686e-06, + "loss": 2.4362, + "step": 18987 + }, + { + "epoch": 1.532402550238076, + "grad_norm": 0.7288907170295715, + "learning_rate": 1.2835705358437588e-06, + "loss": 2.3775, + "step": 18988 + }, + { + "epoch": 1.5324832539746591, + "grad_norm": 0.718291163444519, + "learning_rate": 1.2810504716228245e-06, + "loss": 2.3863, + "step": 18989 + }, + { + "epoch": 1.532563957711242, + "grad_norm": 0.7351683974266052, + "learning_rate": 1.2785328677361597e-06, + "loss": 2.4136, + "step": 18990 + }, + { + "epoch": 1.532644661447825, + "grad_norm": 0.665600061416626, + "learning_rate": 1.2760177242465254e-06, + "loss": 2.3741, + "step": 18991 + }, + { + "epoch": 1.5327253651844082, + "grad_norm": 0.7038269639015198, + "learning_rate": 1.2735050412165827e-06, + "loss": 2.3613, + "step": 18992 + }, + { + "epoch": 1.532806068920991, + "grad_norm": 0.6893567442893982, + "learning_rate": 1.2709948187089814e-06, + "loss": 2.3785, + "step": 18993 + }, + { + "epoch": 1.532886772657574, + "grad_norm": 0.7487246990203857, + "learning_rate": 1.2684870567862605e-06, + "loss": 2.414, + "step": 18994 + }, + { + "epoch": 1.5329674763941572, + "grad_norm": 0.6581461429595947, + "learning_rate": 1.2659817555109367e-06, + "loss": 2.3777, + "step": 18995 + }, + { + "epoch": 1.53304818013074, + "grad_norm": 0.7202548384666443, + "learning_rate": 1.2634789149454374e-06, + "loss": 2.4328, + "step": 18996 + }, + { + "epoch": 1.533128883867323, + "grad_norm": 0.7678282260894775, + "learning_rate": 1.2609785351521352e-06, + "loss": 2.452, + "step": 18997 + }, + { + "epoch": 1.533209587603906, + "grad_norm": 0.7092801332473755, + "learning_rate": 1.2584806161933582e-06, + "loss": 2.3806, + "step": 18998 + }, + { + "epoch": 1.533290291340489, + "grad_norm": 0.6543184518814087, + "learning_rate": 1.2559851581313565e-06, + "loss": 2.4002, + "step": 18999 + }, + { + "epoch": 1.533370995077072, + "grad_norm": 0.7272716164588928, + "learning_rate": 1.2534921610283356e-06, + "loss": 2.4519, + "step": 19000 + }, + { + "epoch": 1.533370995077072, + "eval_loss": 2.36470365524292, + "eval_runtime": 766.3392, + "eval_samples_per_second": 3.419, + "eval_steps_per_second": 0.57, + "step": 19000 + }, + { + "epoch": 1.533451698813655, + "grad_norm": 0.6988897919654846, + "learning_rate": 1.251001624946402e-06, + "loss": 2.373, + "step": 19001 + }, + { + "epoch": 1.5335324025502381, + "grad_norm": 0.7422608137130737, + "learning_rate": 1.2485135499476498e-06, + "loss": 2.4027, + "step": 19002 + }, + { + "epoch": 1.533613106286821, + "grad_norm": 0.7004046440124512, + "learning_rate": 1.2460279360940742e-06, + "loss": 2.4616, + "step": 19003 + }, + { + "epoch": 1.533693810023404, + "grad_norm": 0.6326462030410767, + "learning_rate": 1.2435447834476255e-06, + "loss": 2.3633, + "step": 19004 + }, + { + "epoch": 1.5337745137599872, + "grad_norm": 0.6574158668518066, + "learning_rate": 1.2410640920701987e-06, + "loss": 2.4259, + "step": 19005 + }, + { + "epoch": 1.53385521749657, + "grad_norm": 0.7163352370262146, + "learning_rate": 1.2385858620236223e-06, + "loss": 2.4401, + "step": 19006 + }, + { + "epoch": 1.5339359212331531, + "grad_norm": 0.7343004941940308, + "learning_rate": 1.2361100933696356e-06, + "loss": 2.4501, + "step": 19007 + }, + { + "epoch": 1.5340166249697362, + "grad_norm": 0.7254945039749146, + "learning_rate": 1.233636786169956e-06, + "loss": 2.4383, + "step": 19008 + }, + { + "epoch": 1.534097328706319, + "grad_norm": 0.6400811076164246, + "learning_rate": 1.231165940486234e-06, + "loss": 2.376, + "step": 19009 + }, + { + "epoch": 1.534178032442902, + "grad_norm": 0.7108075618743896, + "learning_rate": 1.2286975563800317e-06, + "loss": 2.3788, + "step": 19010 + }, + { + "epoch": 1.5342587361794853, + "grad_norm": 0.6801196336746216, + "learning_rate": 1.2262316339128776e-06, + "loss": 2.4431, + "step": 19011 + }, + { + "epoch": 1.5343394399160681, + "grad_norm": 0.7298370003700256, + "learning_rate": 1.2237681731462448e-06, + "loss": 2.3462, + "step": 19012 + }, + { + "epoch": 1.534420143652651, + "grad_norm": 0.6715682744979858, + "learning_rate": 1.221307174141495e-06, + "loss": 2.4468, + "step": 19013 + }, + { + "epoch": 1.534500847389234, + "grad_norm": 0.7428280711174011, + "learning_rate": 1.2188486369599904e-06, + "loss": 2.4265, + "step": 19014 + }, + { + "epoch": 1.5345815511258172, + "grad_norm": 0.715877890586853, + "learning_rate": 1.2163925616629824e-06, + "loss": 2.3859, + "step": 19015 + }, + { + "epoch": 1.5346622548624, + "grad_norm": 0.6486028432846069, + "learning_rate": 1.2139389483117102e-06, + "loss": 2.3371, + "step": 19016 + }, + { + "epoch": 1.5347429585989831, + "grad_norm": 0.730654776096344, + "learning_rate": 1.2114877969673033e-06, + "loss": 2.4176, + "step": 19017 + }, + { + "epoch": 1.5348236623355662, + "grad_norm": 0.748631477355957, + "learning_rate": 1.2090391076908569e-06, + "loss": 2.4022, + "step": 19018 + }, + { + "epoch": 1.534904366072149, + "grad_norm": 0.6438515782356262, + "learning_rate": 1.206592880543389e-06, + "loss": 2.4418, + "step": 19019 + }, + { + "epoch": 1.5349850698087322, + "grad_norm": 0.6952247023582458, + "learning_rate": 1.2041491155858842e-06, + "loss": 2.3744, + "step": 19020 + }, + { + "epoch": 1.5350657735453153, + "grad_norm": 0.70782470703125, + "learning_rate": 1.201707812879238e-06, + "loss": 2.4041, + "step": 19021 + }, + { + "epoch": 1.5351464772818981, + "grad_norm": 0.7378403544425964, + "learning_rate": 1.1992689724842909e-06, + "loss": 2.4067, + "step": 19022 + }, + { + "epoch": 1.535227181018481, + "grad_norm": 0.7623130083084106, + "learning_rate": 1.1968325944618386e-06, + "loss": 2.4227, + "step": 19023 + }, + { + "epoch": 1.5353078847550643, + "grad_norm": 0.6920705437660217, + "learning_rate": 1.194398678872577e-06, + "loss": 2.3782, + "step": 19024 + }, + { + "epoch": 1.5353885884916472, + "grad_norm": 0.7110825181007385, + "learning_rate": 1.1919672257771908e-06, + "loss": 2.3578, + "step": 19025 + }, + { + "epoch": 1.53546929222823, + "grad_norm": 0.7839763164520264, + "learning_rate": 1.189538235236265e-06, + "loss": 2.4345, + "step": 19026 + }, + { + "epoch": 1.5355499959648131, + "grad_norm": 0.6758056879043579, + "learning_rate": 1.18711170731034e-06, + "loss": 2.4482, + "step": 19027 + }, + { + "epoch": 1.5356306997013962, + "grad_norm": 0.7417055368423462, + "learning_rate": 1.1846876420598896e-06, + "loss": 2.446, + "step": 19028 + }, + { + "epoch": 1.535711403437979, + "grad_norm": 0.681891679763794, + "learning_rate": 1.1822660395453321e-06, + "loss": 2.3934, + "step": 19029 + }, + { + "epoch": 1.5357921071745622, + "grad_norm": 0.7401404976844788, + "learning_rate": 1.1798468998270086e-06, + "loss": 2.3605, + "step": 19030 + }, + { + "epoch": 1.5358728109111452, + "grad_norm": 0.6908402442932129, + "learning_rate": 1.1774302229652257e-06, + "loss": 2.3534, + "step": 19031 + }, + { + "epoch": 1.5359535146477281, + "grad_norm": 0.6733761429786682, + "learning_rate": 1.1750160090202133e-06, + "loss": 2.4347, + "step": 19032 + }, + { + "epoch": 1.5360342183843112, + "grad_norm": 0.7412551045417786, + "learning_rate": 1.1726042580521234e-06, + "loss": 2.4262, + "step": 19033 + }, + { + "epoch": 1.5361149221208943, + "grad_norm": 0.794582724571228, + "learning_rate": 1.1701949701210747e-06, + "loss": 2.4297, + "step": 19034 + }, + { + "epoch": 1.5361956258574772, + "grad_norm": 0.653629720211029, + "learning_rate": 1.1677881452871187e-06, + "loss": 2.4007, + "step": 19035 + }, + { + "epoch": 1.5362763295940602, + "grad_norm": 0.6217665672302246, + "learning_rate": 1.1653837836102189e-06, + "loss": 2.3701, + "step": 19036 + }, + { + "epoch": 1.5363570333306433, + "grad_norm": 0.7155484557151794, + "learning_rate": 1.1629818851503161e-06, + "loss": 2.4533, + "step": 19037 + }, + { + "epoch": 1.5364377370672262, + "grad_norm": 0.6588923931121826, + "learning_rate": 1.1605824499672734e-06, + "loss": 2.4332, + "step": 19038 + }, + { + "epoch": 1.536518440803809, + "grad_norm": 0.6829205751419067, + "learning_rate": 1.1581854781208767e-06, + "loss": 2.4407, + "step": 19039 + }, + { + "epoch": 1.5365991445403924, + "grad_norm": 0.6711968779563904, + "learning_rate": 1.155790969670878e-06, + "loss": 2.4412, + "step": 19040 + }, + { + "epoch": 1.5366798482769752, + "grad_norm": 0.6866469383239746, + "learning_rate": 1.1533989246769516e-06, + "loss": 2.351, + "step": 19041 + }, + { + "epoch": 1.536760552013558, + "grad_norm": 0.7236056923866272, + "learning_rate": 1.1510093431987057e-06, + "loss": 2.3898, + "step": 19042 + }, + { + "epoch": 1.5368412557501412, + "grad_norm": 0.6912446022033691, + "learning_rate": 1.1486222252957036e-06, + "loss": 2.4024, + "step": 19043 + }, + { + "epoch": 1.5369219594867243, + "grad_norm": 0.6832977533340454, + "learning_rate": 1.146237571027442e-06, + "loss": 2.4422, + "step": 19044 + }, + { + "epoch": 1.5370026632233071, + "grad_norm": 0.7664303183555603, + "learning_rate": 1.1438553804533292e-06, + "loss": 2.422, + "step": 19045 + }, + { + "epoch": 1.5370833669598902, + "grad_norm": 0.771228551864624, + "learning_rate": 1.141475653632762e-06, + "loss": 2.3873, + "step": 19046 + }, + { + "epoch": 1.5371640706964733, + "grad_norm": 0.718014657497406, + "learning_rate": 1.1390983906250486e-06, + "loss": 2.4258, + "step": 19047 + }, + { + "epoch": 1.5372447744330562, + "grad_norm": 0.7383404970169067, + "learning_rate": 1.1367235914894192e-06, + "loss": 2.4257, + "step": 19048 + }, + { + "epoch": 1.5373254781696393, + "grad_norm": 0.7850503325462341, + "learning_rate": 1.1343512562850712e-06, + "loss": 2.4281, + "step": 19049 + }, + { + "epoch": 1.5374061819062224, + "grad_norm": 0.6942077875137329, + "learning_rate": 1.1319813850711237e-06, + "loss": 2.3916, + "step": 19050 + }, + { + "epoch": 1.5374868856428052, + "grad_norm": 0.662036120891571, + "learning_rate": 1.1296139779066517e-06, + "loss": 2.4179, + "step": 19051 + }, + { + "epoch": 1.5375675893793883, + "grad_norm": 0.6748930811882019, + "learning_rate": 1.1272490348506415e-06, + "loss": 2.3663, + "step": 19052 + }, + { + "epoch": 1.5376482931159714, + "grad_norm": 0.6629074215888977, + "learning_rate": 1.1248865559620459e-06, + "loss": 2.3791, + "step": 19053 + }, + { + "epoch": 1.5377289968525543, + "grad_norm": 0.6888332366943359, + "learning_rate": 1.122526541299751e-06, + "loss": 2.3949, + "step": 19054 + }, + { + "epoch": 1.5378097005891371, + "grad_norm": 0.720655083656311, + "learning_rate": 1.1201689909225432e-06, + "loss": 2.3701, + "step": 19055 + }, + { + "epoch": 1.5378904043257204, + "grad_norm": 0.6749783754348755, + "learning_rate": 1.117813904889209e-06, + "loss": 2.313, + "step": 19056 + }, + { + "epoch": 1.5379711080623033, + "grad_norm": 0.7315804362297058, + "learning_rate": 1.1154612832584344e-06, + "loss": 2.4083, + "step": 19057 + }, + { + "epoch": 1.5380518117988862, + "grad_norm": 0.7733453512191772, + "learning_rate": 1.1131111260888395e-06, + "loss": 2.4855, + "step": 19058 + }, + { + "epoch": 1.5381325155354693, + "grad_norm": 0.727128267288208, + "learning_rate": 1.1107634334390217e-06, + "loss": 2.3989, + "step": 19059 + }, + { + "epoch": 1.5382132192720523, + "grad_norm": 0.6917513608932495, + "learning_rate": 1.1084182053674674e-06, + "loss": 2.4218, + "step": 19060 + }, + { + "epoch": 1.5382939230086352, + "grad_norm": 0.6856530904769897, + "learning_rate": 1.1060754419326413e-06, + "loss": 2.3849, + "step": 19061 + }, + { + "epoch": 1.5383746267452183, + "grad_norm": 0.7059873938560486, + "learning_rate": 1.10373514319293e-06, + "loss": 2.4535, + "step": 19062 + }, + { + "epoch": 1.5384553304818014, + "grad_norm": 0.7123165726661682, + "learning_rate": 1.101397309206642e-06, + "loss": 2.4122, + "step": 19063 + }, + { + "epoch": 1.5385360342183843, + "grad_norm": 0.7211580872535706, + "learning_rate": 1.0990619400320756e-06, + "loss": 2.3733, + "step": 19064 + }, + { + "epoch": 1.5386167379549673, + "grad_norm": 0.6998704075813293, + "learning_rate": 1.0967290357274063e-06, + "loss": 2.4054, + "step": 19065 + }, + { + "epoch": 1.5386974416915504, + "grad_norm": 0.7167361378669739, + "learning_rate": 1.0943985963507875e-06, + "loss": 2.4315, + "step": 19066 + }, + { + "epoch": 1.5387781454281333, + "grad_norm": 0.6754775643348694, + "learning_rate": 1.0920706219603062e-06, + "loss": 2.3876, + "step": 19067 + }, + { + "epoch": 1.5388588491647164, + "grad_norm": 0.715957522392273, + "learning_rate": 1.0897451126139603e-06, + "loss": 2.4604, + "step": 19068 + }, + { + "epoch": 1.5389395529012995, + "grad_norm": 0.7188153266906738, + "learning_rate": 1.0874220683697146e-06, + "loss": 2.4955, + "step": 19069 + }, + { + "epoch": 1.5390202566378823, + "grad_norm": 0.6485354900360107, + "learning_rate": 1.0851014892854783e-06, + "loss": 2.4263, + "step": 19070 + }, + { + "epoch": 1.5391009603744652, + "grad_norm": 0.7650482654571533, + "learning_rate": 1.082783375419083e-06, + "loss": 2.4205, + "step": 19071 + }, + { + "epoch": 1.5391816641110483, + "grad_norm": 0.6637241840362549, + "learning_rate": 1.0804677268282937e-06, + "loss": 2.4509, + "step": 19072 + }, + { + "epoch": 1.5392623678476314, + "grad_norm": 0.6775830388069153, + "learning_rate": 1.0781545435708306e-06, + "loss": 2.4104, + "step": 19073 + }, + { + "epoch": 1.5393430715842142, + "grad_norm": 0.6755654811859131, + "learning_rate": 1.0758438257043369e-06, + "loss": 2.4281, + "step": 19074 + }, + { + "epoch": 1.5394237753207973, + "grad_norm": 0.720555305480957, + "learning_rate": 1.0735355732864106e-06, + "loss": 2.4013, + "step": 19075 + }, + { + "epoch": 1.5395044790573804, + "grad_norm": 0.7110928297042847, + "learning_rate": 1.0712297863745724e-06, + "loss": 2.4725, + "step": 19076 + }, + { + "epoch": 1.5395851827939633, + "grad_norm": 0.6953117251396179, + "learning_rate": 1.0689264650262875e-06, + "loss": 2.4452, + "step": 19077 + }, + { + "epoch": 1.5396658865305464, + "grad_norm": 0.6691006422042847, + "learning_rate": 1.0666256092989657e-06, + "loss": 2.4429, + "step": 19078 + }, + { + "epoch": 1.5397465902671295, + "grad_norm": 0.7241020202636719, + "learning_rate": 1.0643272192499498e-06, + "loss": 2.3877, + "step": 19079 + }, + { + "epoch": 1.5398272940037123, + "grad_norm": 0.7464954257011414, + "learning_rate": 1.0620312949365162e-06, + "loss": 2.3952, + "step": 19080 + }, + { + "epoch": 1.5399079977402954, + "grad_norm": 0.6667360663414001, + "learning_rate": 1.059737836415886e-06, + "loss": 2.4421, + "step": 19081 + }, + { + "epoch": 1.5399887014768785, + "grad_norm": 0.8315143585205078, + "learning_rate": 1.0574468437452245e-06, + "loss": 2.442, + "step": 19082 + }, + { + "epoch": 1.5400694052134614, + "grad_norm": 0.7573503255844116, + "learning_rate": 1.0551583169816304e-06, + "loss": 2.4042, + "step": 19083 + }, + { + "epoch": 1.5401501089500442, + "grad_norm": 0.7037340998649597, + "learning_rate": 1.0528722561821359e-06, + "loss": 2.4142, + "step": 19084 + }, + { + "epoch": 1.5402308126866275, + "grad_norm": 0.7367774248123169, + "learning_rate": 1.0505886614037063e-06, + "loss": 2.4282, + "step": 19085 + }, + { + "epoch": 1.5403115164232104, + "grad_norm": 0.7041850090026855, + "learning_rate": 1.0483075327032743e-06, + "loss": 2.4311, + "step": 19086 + }, + { + "epoch": 1.5403922201597933, + "grad_norm": 0.681599497795105, + "learning_rate": 1.046028870137672e-06, + "loss": 2.3064, + "step": 19087 + }, + { + "epoch": 1.5404729238963764, + "grad_norm": 0.7551162838935852, + "learning_rate": 1.0437526737636983e-06, + "loss": 2.4679, + "step": 19088 + }, + { + "epoch": 1.5405536276329594, + "grad_norm": 0.6397513747215271, + "learning_rate": 1.0414789436380857e-06, + "loss": 2.3815, + "step": 19089 + }, + { + "epoch": 1.5406343313695423, + "grad_norm": 0.7688049077987671, + "learning_rate": 1.0392076798174998e-06, + "loss": 2.4293, + "step": 19090 + }, + { + "epoch": 1.5407150351061254, + "grad_norm": 0.7162747383117676, + "learning_rate": 1.0369388823585401e-06, + "loss": 2.3973, + "step": 19091 + }, + { + "epoch": 1.5407957388427085, + "grad_norm": 0.7172191143035889, + "learning_rate": 1.0346725513177613e-06, + "loss": 2.391, + "step": 19092 + }, + { + "epoch": 1.5408764425792913, + "grad_norm": 0.7141976356506348, + "learning_rate": 1.0324086867516403e-06, + "loss": 2.4456, + "step": 19093 + }, + { + "epoch": 1.5409571463158744, + "grad_norm": 0.7529257535934448, + "learning_rate": 1.0301472887165987e-06, + "loss": 2.4612, + "step": 19094 + }, + { + "epoch": 1.5410378500524575, + "grad_norm": 0.818497359752655, + "learning_rate": 1.0278883572689911e-06, + "loss": 2.3922, + "step": 19095 + }, + { + "epoch": 1.5411185537890404, + "grad_norm": 0.6952725052833557, + "learning_rate": 1.0256318924651287e-06, + "loss": 2.4069, + "step": 19096 + }, + { + "epoch": 1.5411992575256235, + "grad_norm": 0.7355678677558899, + "learning_rate": 1.0233778943612327e-06, + "loss": 2.4272, + "step": 19097 + }, + { + "epoch": 1.5412799612622066, + "grad_norm": 0.7497351765632629, + "learning_rate": 1.0211263630134916e-06, + "loss": 2.4334, + "step": 19098 + }, + { + "epoch": 1.5413606649987894, + "grad_norm": 0.6627529263496399, + "learning_rate": 1.018877298478027e-06, + "loss": 2.4338, + "step": 19099 + }, + { + "epoch": 1.5414413687353723, + "grad_norm": 0.6831768751144409, + "learning_rate": 1.01663070081085e-06, + "loss": 2.3938, + "step": 19100 + }, + { + "epoch": 1.5415220724719556, + "grad_norm": 0.7275303602218628, + "learning_rate": 1.0143865700680044e-06, + "loss": 2.4299, + "step": 19101 + }, + { + "epoch": 1.5416027762085385, + "grad_norm": 0.6843026876449585, + "learning_rate": 1.01214490630539e-06, + "loss": 2.4028, + "step": 19102 + }, + { + "epoch": 1.5416834799451213, + "grad_norm": 0.7204479575157166, + "learning_rate": 1.0099057095788845e-06, + "loss": 2.4505, + "step": 19103 + }, + { + "epoch": 1.5417641836817044, + "grad_norm": 0.6450038552284241, + "learning_rate": 1.0076689799442873e-06, + "loss": 2.4176, + "step": 19104 + }, + { + "epoch": 1.5418448874182875, + "grad_norm": 0.7244459390640259, + "learning_rate": 1.0054347174573542e-06, + "loss": 2.3998, + "step": 19105 + }, + { + "epoch": 1.5419255911548704, + "grad_norm": 0.663318932056427, + "learning_rate": 1.0032029221737517e-06, + "loss": 2.3686, + "step": 19106 + }, + { + "epoch": 1.5420062948914535, + "grad_norm": 0.6830315589904785, + "learning_rate": 1.0009735941491127e-06, + "loss": 2.4351, + "step": 19107 + }, + { + "epoch": 1.5420869986280366, + "grad_norm": 0.7720602750778198, + "learning_rate": 9.987467334390043e-07, + "loss": 2.4385, + "step": 19108 + }, + { + "epoch": 1.5421677023646194, + "grad_norm": 0.7093452215194702, + "learning_rate": 9.965223400989265e-07, + "loss": 2.4008, + "step": 19109 + }, + { + "epoch": 1.5422484061012025, + "grad_norm": 0.6942614316940308, + "learning_rate": 9.943004141843016e-07, + "loss": 2.3717, + "step": 19110 + }, + { + "epoch": 1.5423291098377856, + "grad_norm": 0.647497296333313, + "learning_rate": 9.920809557505072e-07, + "loss": 2.3905, + "step": 19111 + }, + { + "epoch": 1.5424098135743685, + "grad_norm": 0.7235120534896851, + "learning_rate": 9.898639648528662e-07, + "loss": 2.4056, + "step": 19112 + }, + { + "epoch": 1.5424905173109515, + "grad_norm": 0.7509769797325134, + "learning_rate": 9.876494415466342e-07, + "loss": 2.4641, + "step": 19113 + }, + { + "epoch": 1.5425712210475346, + "grad_norm": 0.6972876191139221, + "learning_rate": 9.854373858870003e-07, + "loss": 2.4242, + "step": 19114 + }, + { + "epoch": 1.5426519247841175, + "grad_norm": 0.7343918085098267, + "learning_rate": 9.832277979290983e-07, + "loss": 2.3774, + "step": 19115 + }, + { + "epoch": 1.5427326285207004, + "grad_norm": 0.6731958389282227, + "learning_rate": 9.810206777279841e-07, + "loss": 2.4674, + "step": 19116 + }, + { + "epoch": 1.5428133322572837, + "grad_norm": 0.7607294321060181, + "learning_rate": 9.788160253386803e-07, + "loss": 2.3957, + "step": 19117 + }, + { + "epoch": 1.5428940359938665, + "grad_norm": 0.6569436192512512, + "learning_rate": 9.766138408161208e-07, + "loss": 2.4259, + "step": 19118 + }, + { + "epoch": 1.5429747397304494, + "grad_norm": 0.695393443107605, + "learning_rate": 9.74414124215195e-07, + "loss": 2.3873, + "step": 19119 + }, + { + "epoch": 1.5430554434670325, + "grad_norm": 0.7301076054573059, + "learning_rate": 9.722168755907258e-07, + "loss": 2.4636, + "step": 19120 + }, + { + "epoch": 1.5431361472036156, + "grad_norm": 0.7235682010650635, + "learning_rate": 9.700220949974693e-07, + "loss": 2.4285, + "step": 19121 + }, + { + "epoch": 1.5432168509401984, + "grad_norm": 0.6985684633255005, + "learning_rate": 9.678297824901262e-07, + "loss": 2.4293, + "step": 19122 + }, + { + "epoch": 1.5432975546767815, + "grad_norm": 0.7281288504600525, + "learning_rate": 9.656399381233417e-07, + "loss": 2.4055, + "step": 19123 + }, + { + "epoch": 1.5433782584133646, + "grad_norm": 0.6899799108505249, + "learning_rate": 9.634525619516832e-07, + "loss": 2.4348, + "step": 19124 + }, + { + "epoch": 1.5434589621499475, + "grad_norm": 0.7520057559013367, + "learning_rate": 9.61267654029685e-07, + "loss": 2.3532, + "step": 19125 + }, + { + "epoch": 1.5435396658865306, + "grad_norm": 0.6671153903007507, + "learning_rate": 9.5908521441177e-07, + "loss": 2.4307, + "step": 19126 + }, + { + "epoch": 1.5436203696231137, + "grad_norm": 0.6772165894508362, + "learning_rate": 9.569052431523617e-07, + "loss": 2.3792, + "step": 19127 + }, + { + "epoch": 1.5437010733596965, + "grad_norm": 0.690838098526001, + "learning_rate": 9.547277403057719e-07, + "loss": 2.4338, + "step": 19128 + }, + { + "epoch": 1.5437817770962794, + "grad_norm": 0.7338915467262268, + "learning_rate": 9.525527059262684e-07, + "loss": 2.4477, + "step": 19129 + }, + { + "epoch": 1.5438624808328627, + "grad_norm": 0.7767740488052368, + "learning_rate": 9.503801400680634e-07, + "loss": 2.4653, + "step": 19130 + }, + { + "epoch": 1.5439431845694456, + "grad_norm": 0.721861720085144, + "learning_rate": 9.482100427853136e-07, + "loss": 2.4199, + "step": 19131 + }, + { + "epoch": 1.5440238883060284, + "grad_norm": 0.7570068836212158, + "learning_rate": 9.460424141320867e-07, + "loss": 2.3977, + "step": 19132 + }, + { + "epoch": 1.5441045920426115, + "grad_norm": 0.788520872592926, + "learning_rate": 9.438772541624063e-07, + "loss": 2.3567, + "step": 19133 + }, + { + "epoch": 1.5441852957791946, + "grad_norm": 0.6396552324295044, + "learning_rate": 9.417145629302515e-07, + "loss": 2.4542, + "step": 19134 + }, + { + "epoch": 1.5442659995157775, + "grad_norm": 0.7046605944633484, + "learning_rate": 9.395543404895013e-07, + "loss": 2.4377, + "step": 19135 + }, + { + "epoch": 1.5443467032523606, + "grad_norm": 0.6729561686515808, + "learning_rate": 9.373965868940127e-07, + "loss": 2.4159, + "step": 19136 + }, + { + "epoch": 1.5444274069889437, + "grad_norm": 0.684211254119873, + "learning_rate": 9.352413021975537e-07, + "loss": 2.4083, + "step": 19137 + }, + { + "epoch": 1.5445081107255265, + "grad_norm": 0.6997072696685791, + "learning_rate": 9.330884864538369e-07, + "loss": 2.3983, + "step": 19138 + }, + { + "epoch": 1.5445888144621096, + "grad_norm": 0.6781525015830994, + "learning_rate": 9.309381397165085e-07, + "loss": 2.4391, + "step": 19139 + }, + { + "epoch": 1.5446695181986927, + "grad_norm": 0.6652973890304565, + "learning_rate": 9.287902620391808e-07, + "loss": 2.4354, + "step": 19140 + }, + { + "epoch": 1.5447502219352756, + "grad_norm": 0.6931048631668091, + "learning_rate": 9.266448534753669e-07, + "loss": 2.4016, + "step": 19141 + }, + { + "epoch": 1.5448309256718586, + "grad_norm": 0.7639850974082947, + "learning_rate": 9.245019140785461e-07, + "loss": 2.4453, + "step": 19142 + }, + { + "epoch": 1.5449116294084417, + "grad_norm": 0.683496356010437, + "learning_rate": 9.223614439021311e-07, + "loss": 2.4202, + "step": 19143 + }, + { + "epoch": 1.5449923331450246, + "grad_norm": 0.7046312689781189, + "learning_rate": 9.202234429994572e-07, + "loss": 2.3661, + "step": 19144 + }, + { + "epoch": 1.5450730368816075, + "grad_norm": 0.6588082909584045, + "learning_rate": 9.18087911423815e-07, + "loss": 2.3788, + "step": 19145 + }, + { + "epoch": 1.5451537406181908, + "grad_norm": 0.695152223110199, + "learning_rate": 9.159548492284176e-07, + "loss": 2.4174, + "step": 19146 + }, + { + "epoch": 1.5452344443547736, + "grad_norm": 0.7124348282814026, + "learning_rate": 9.138242564664446e-07, + "loss": 2.4365, + "step": 19147 + }, + { + "epoch": 1.5453151480913565, + "grad_norm": 0.7370025515556335, + "learning_rate": 9.116961331909757e-07, + "loss": 2.4111, + "step": 19148 + }, + { + "epoch": 1.5453958518279396, + "grad_norm": 0.8131148219108582, + "learning_rate": 9.095704794550574e-07, + "loss": 2.4039, + "step": 19149 + }, + { + "epoch": 1.5454765555645227, + "grad_norm": 0.750605046749115, + "learning_rate": 9.074472953116697e-07, + "loss": 2.4331, + "step": 19150 + }, + { + "epoch": 1.5455572593011055, + "grad_norm": 0.63783198595047, + "learning_rate": 9.053265808137257e-07, + "loss": 2.4068, + "step": 19151 + }, + { + "epoch": 1.5456379630376886, + "grad_norm": 0.65320885181427, + "learning_rate": 9.032083360140719e-07, + "loss": 2.3655, + "step": 19152 + }, + { + "epoch": 1.5457186667742717, + "grad_norm": 0.6764385104179382, + "learning_rate": 9.010925609655219e-07, + "loss": 2.3951, + "step": 19153 + }, + { + "epoch": 1.5457993705108546, + "grad_norm": 0.6856040358543396, + "learning_rate": 8.989792557207887e-07, + "loss": 2.356, + "step": 19154 + }, + { + "epoch": 1.5458800742474377, + "grad_norm": 0.6802964210510254, + "learning_rate": 8.968684203325417e-07, + "loss": 2.3085, + "step": 19155 + }, + { + "epoch": 1.5459607779840208, + "grad_norm": 0.7369034290313721, + "learning_rate": 8.947600548533941e-07, + "loss": 2.387, + "step": 19156 + }, + { + "epoch": 1.5460414817206036, + "grad_norm": 0.6967560052871704, + "learning_rate": 8.926541593358928e-07, + "loss": 2.4407, + "step": 19157 + }, + { + "epoch": 1.5461221854571867, + "grad_norm": 0.7036765217781067, + "learning_rate": 8.905507338325181e-07, + "loss": 2.4206, + "step": 19158 + }, + { + "epoch": 1.5462028891937698, + "grad_norm": 0.690966010093689, + "learning_rate": 8.884497783956836e-07, + "loss": 2.4362, + "step": 19159 + }, + { + "epoch": 1.5462835929303527, + "grad_norm": 0.7242603302001953, + "learning_rate": 8.863512930777806e-07, + "loss": 2.4075, + "step": 19160 + }, + { + "epoch": 1.5463642966669355, + "grad_norm": 0.6484987139701843, + "learning_rate": 8.842552779310675e-07, + "loss": 2.361, + "step": 19161 + }, + { + "epoch": 1.5464450004035188, + "grad_norm": 0.6532461047172546, + "learning_rate": 8.821617330078136e-07, + "loss": 2.4182, + "step": 19162 + }, + { + "epoch": 1.5465257041401017, + "grad_norm": 0.700127363204956, + "learning_rate": 8.800706583601881e-07, + "loss": 2.4575, + "step": 19163 + }, + { + "epoch": 1.5466064078766846, + "grad_norm": 0.7675496339797974, + "learning_rate": 8.77982054040305e-07, + "loss": 2.4084, + "step": 19164 + }, + { + "epoch": 1.5466871116132677, + "grad_norm": 0.6848316192626953, + "learning_rate": 8.758959201002115e-07, + "loss": 2.4783, + "step": 19165 + }, + { + "epoch": 1.5467678153498507, + "grad_norm": 0.6221129894256592, + "learning_rate": 8.738122565919104e-07, + "loss": 2.411, + "step": 19166 + }, + { + "epoch": 1.5468485190864336, + "grad_norm": 0.7287629842758179, + "learning_rate": 8.71731063567316e-07, + "loss": 2.3618, + "step": 19167 + }, + { + "epoch": 1.5469292228230167, + "grad_norm": 0.6609008312225342, + "learning_rate": 8.696523410783198e-07, + "loss": 2.3889, + "step": 19168 + }, + { + "epoch": 1.5470099265595998, + "grad_norm": 0.8017542958259583, + "learning_rate": 8.675760891767138e-07, + "loss": 2.4837, + "step": 19169 + }, + { + "epoch": 1.5470906302961827, + "grad_norm": 0.6606761813163757, + "learning_rate": 8.655023079142343e-07, + "loss": 2.3846, + "step": 19170 + }, + { + "epoch": 1.5471713340327657, + "grad_norm": 0.6829126477241516, + "learning_rate": 8.634309973425847e-07, + "loss": 2.3925, + "step": 19171 + }, + { + "epoch": 1.5472520377693488, + "grad_norm": 0.7086566090583801, + "learning_rate": 8.613621575133901e-07, + "loss": 2.4643, + "step": 19172 + }, + { + "epoch": 1.5473327415059317, + "grad_norm": 0.7239049673080444, + "learning_rate": 8.592957884781871e-07, + "loss": 2.4309, + "step": 19173 + }, + { + "epoch": 1.5474134452425146, + "grad_norm": 0.6763661503791809, + "learning_rate": 8.572318902884902e-07, + "loss": 2.4485, + "step": 19174 + }, + { + "epoch": 1.5474941489790979, + "grad_norm": 0.6714745759963989, + "learning_rate": 8.551704629957469e-07, + "loss": 2.3743, + "step": 19175 + }, + { + "epoch": 1.5475748527156807, + "grad_norm": 0.6859524846076965, + "learning_rate": 8.531115066513162e-07, + "loss": 2.39, + "step": 19176 + }, + { + "epoch": 1.5476555564522636, + "grad_norm": 0.703208327293396, + "learning_rate": 8.510550213065238e-07, + "loss": 2.4002, + "step": 19177 + }, + { + "epoch": 1.5477362601888467, + "grad_norm": 0.7372791171073914, + "learning_rate": 8.490010070126175e-07, + "loss": 2.4585, + "step": 19178 + }, + { + "epoch": 1.5478169639254298, + "grad_norm": 0.6707181930541992, + "learning_rate": 8.469494638207898e-07, + "loss": 2.357, + "step": 19179 + }, + { + "epoch": 1.5478976676620126, + "grad_norm": 0.6646085381507874, + "learning_rate": 8.449003917821663e-07, + "loss": 2.405, + "step": 19180 + }, + { + "epoch": 1.5479783713985957, + "grad_norm": 0.7051934599876404, + "learning_rate": 8.428537909478174e-07, + "loss": 2.3968, + "step": 19181 + }, + { + "epoch": 1.5480590751351788, + "grad_norm": 0.7081723809242249, + "learning_rate": 8.408096613687578e-07, + "loss": 2.3406, + "step": 19182 + }, + { + "epoch": 1.5481397788717617, + "grad_norm": 0.7205994725227356, + "learning_rate": 8.387680030959244e-07, + "loss": 2.4192, + "step": 19183 + }, + { + "epoch": 1.5482204826083448, + "grad_norm": 0.6869010329246521, + "learning_rate": 8.36728816180199e-07, + "loss": 2.4202, + "step": 19184 + }, + { + "epoch": 1.5483011863449279, + "grad_norm": 0.6632729172706604, + "learning_rate": 8.346921006724073e-07, + "loss": 2.4214, + "step": 19185 + }, + { + "epoch": 1.5483818900815107, + "grad_norm": 0.6682900786399841, + "learning_rate": 8.32657856623309e-07, + "loss": 2.4151, + "step": 19186 + }, + { + "epoch": 1.5484625938180938, + "grad_norm": 0.6790106892585754, + "learning_rate": 8.306260840836078e-07, + "loss": 2.4119, + "step": 19187 + }, + { + "epoch": 1.548543297554677, + "grad_norm": 0.7497449517250061, + "learning_rate": 8.285967831039299e-07, + "loss": 2.4036, + "step": 19188 + }, + { + "epoch": 1.5486240012912598, + "grad_norm": 0.6842204928398132, + "learning_rate": 8.265699537348681e-07, + "loss": 2.3892, + "step": 19189 + }, + { + "epoch": 1.5487047050278426, + "grad_norm": 0.6980693340301514, + "learning_rate": 8.245455960269156e-07, + "loss": 2.4142, + "step": 19190 + }, + { + "epoch": 1.548785408764426, + "grad_norm": 0.7071843147277832, + "learning_rate": 8.225237100305316e-07, + "loss": 2.4066, + "step": 19191 + }, + { + "epoch": 1.5488661125010088, + "grad_norm": 0.7079197764396667, + "learning_rate": 8.205042957961096e-07, + "loss": 2.4186, + "step": 19192 + }, + { + "epoch": 1.5489468162375917, + "grad_norm": 0.684631884098053, + "learning_rate": 8.184873533739867e-07, + "loss": 2.3962, + "step": 19193 + }, + { + "epoch": 1.5490275199741748, + "grad_norm": 0.6830846667289734, + "learning_rate": 8.164728828144119e-07, + "loss": 2.4338, + "step": 19194 + }, + { + "epoch": 1.5491082237107578, + "grad_norm": 0.7033431529998779, + "learning_rate": 8.144608841676116e-07, + "loss": 2.3611, + "step": 19195 + }, + { + "epoch": 1.5491889274473407, + "grad_norm": 0.6961686015129089, + "learning_rate": 8.124513574837122e-07, + "loss": 2.3882, + "step": 19196 + }, + { + "epoch": 1.5492696311839238, + "grad_norm": 0.7209047675132751, + "learning_rate": 8.104443028128073e-07, + "loss": 2.4177, + "step": 19197 + }, + { + "epoch": 1.5493503349205069, + "grad_norm": 0.7201551795005798, + "learning_rate": 8.084397202049121e-07, + "loss": 2.3988, + "step": 19198 + }, + { + "epoch": 1.5494310386570898, + "grad_norm": 0.6855216026306152, + "learning_rate": 8.064376097099979e-07, + "loss": 2.4572, + "step": 19199 + }, + { + "epoch": 1.5495117423936728, + "grad_norm": 0.6492244601249695, + "learning_rate": 8.04437971377936e-07, + "loss": 2.429, + "step": 19200 + }, + { + "epoch": 1.549592446130256, + "grad_norm": 0.6999516487121582, + "learning_rate": 8.024408052585864e-07, + "loss": 2.3809, + "step": 19201 + }, + { + "epoch": 1.5496731498668388, + "grad_norm": 0.7052421569824219, + "learning_rate": 8.004461114017203e-07, + "loss": 2.4315, + "step": 19202 + }, + { + "epoch": 1.5497538536034219, + "grad_norm": 0.6808926463127136, + "learning_rate": 7.984538898570426e-07, + "loss": 2.4391, + "step": 19203 + }, + { + "epoch": 1.549834557340005, + "grad_norm": 0.6847327351570129, + "learning_rate": 7.964641406742135e-07, + "loss": 2.4252, + "step": 19204 + }, + { + "epoch": 1.5499152610765878, + "grad_norm": 0.6767359375953674, + "learning_rate": 7.944768639028266e-07, + "loss": 2.3823, + "step": 19205 + }, + { + "epoch": 1.5499959648131707, + "grad_norm": 0.6847020387649536, + "learning_rate": 7.924920595923868e-07, + "loss": 2.4168, + "step": 19206 + }, + { + "epoch": 1.550076668549754, + "grad_norm": 0.6742147207260132, + "learning_rate": 7.905097277923879e-07, + "loss": 2.4206, + "step": 19207 + }, + { + "epoch": 1.5501573722863369, + "grad_norm": 0.7320376634597778, + "learning_rate": 7.885298685522235e-07, + "loss": 2.391, + "step": 19208 + }, + { + "epoch": 1.5502380760229197, + "grad_norm": 0.6892278790473938, + "learning_rate": 7.865524819212211e-07, + "loss": 2.3863, + "step": 19209 + }, + { + "epoch": 1.5503187797595028, + "grad_norm": 0.7395393252372742, + "learning_rate": 7.845775679486855e-07, + "loss": 2.38, + "step": 19210 + }, + { + "epoch": 1.550399483496086, + "grad_norm": 0.6716200113296509, + "learning_rate": 7.826051266838441e-07, + "loss": 2.455, + "step": 19211 + }, + { + "epoch": 1.5504801872326688, + "grad_norm": 0.664772093296051, + "learning_rate": 7.806351581758242e-07, + "loss": 2.3437, + "step": 19212 + }, + { + "epoch": 1.5505608909692519, + "grad_norm": 0.6904496550559998, + "learning_rate": 7.78667662473731e-07, + "loss": 2.4147, + "step": 19213 + }, + { + "epoch": 1.550641594705835, + "grad_norm": 0.6697477102279663, + "learning_rate": 7.767026396266142e-07, + "loss": 2.4178, + "step": 19214 + }, + { + "epoch": 1.5507222984424178, + "grad_norm": 0.893882155418396, + "learning_rate": 7.747400896834456e-07, + "loss": 2.4976, + "step": 19215 + }, + { + "epoch": 1.550803002179001, + "grad_norm": 0.6615902185440063, + "learning_rate": 7.727800126931195e-07, + "loss": 2.3952, + "step": 19216 + }, + { + "epoch": 1.550883705915584, + "grad_norm": 0.7881698608398438, + "learning_rate": 7.70822408704508e-07, + "loss": 2.484, + "step": 19217 + }, + { + "epoch": 1.5509644096521669, + "grad_norm": 0.7452302575111389, + "learning_rate": 7.688672777663941e-07, + "loss": 2.4517, + "step": 19218 + }, + { + "epoch": 1.55104511338875, + "grad_norm": 0.6759300827980042, + "learning_rate": 7.669146199274946e-07, + "loss": 2.3662, + "step": 19219 + }, + { + "epoch": 1.551125817125333, + "grad_norm": 0.7412277460098267, + "learning_rate": 7.649644352364705e-07, + "loss": 2.4281, + "step": 19220 + }, + { + "epoch": 1.551206520861916, + "grad_norm": 0.6926305294036865, + "learning_rate": 7.630167237419494e-07, + "loss": 2.4882, + "step": 19221 + }, + { + "epoch": 1.5512872245984988, + "grad_norm": 0.667733907699585, + "learning_rate": 7.610714854924595e-07, + "loss": 2.4451, + "step": 19222 + }, + { + "epoch": 1.551367928335082, + "grad_norm": 0.7715731263160706, + "learning_rate": 7.591287205364727e-07, + "loss": 2.3961, + "step": 19223 + }, + { + "epoch": 1.551448632071665, + "grad_norm": 0.6897944808006287, + "learning_rate": 7.571884289224174e-07, + "loss": 2.461, + "step": 19224 + }, + { + "epoch": 1.5515293358082478, + "grad_norm": 0.707567572593689, + "learning_rate": 7.552506106986546e-07, + "loss": 2.3926, + "step": 19225 + }, + { + "epoch": 1.551610039544831, + "grad_norm": 0.6644080877304077, + "learning_rate": 7.533152659134679e-07, + "loss": 2.4393, + "step": 19226 + }, + { + "epoch": 1.551690743281414, + "grad_norm": 0.6877533793449402, + "learning_rate": 7.513823946151077e-07, + "loss": 2.3582, + "step": 19227 + }, + { + "epoch": 1.5517714470179969, + "grad_norm": 0.6568461656570435, + "learning_rate": 7.494519968517355e-07, + "loss": 2.385, + "step": 19228 + }, + { + "epoch": 1.55185215075458, + "grad_norm": 0.6575854420661926, + "learning_rate": 7.475240726714572e-07, + "loss": 2.3772, + "step": 19229 + }, + { + "epoch": 1.551932854491163, + "grad_norm": 0.7378003001213074, + "learning_rate": 7.455986221223343e-07, + "loss": 2.4352, + "step": 19230 + }, + { + "epoch": 1.552013558227746, + "grad_norm": 0.7200499773025513, + "learning_rate": 7.436756452523507e-07, + "loss": 2.3977, + "step": 19231 + }, + { + "epoch": 1.552094261964329, + "grad_norm": 0.6984114646911621, + "learning_rate": 7.417551421094127e-07, + "loss": 2.4035, + "step": 19232 + }, + { + "epoch": 1.552174965700912, + "grad_norm": 0.7811709046363831, + "learning_rate": 7.39837112741415e-07, + "loss": 2.3964, + "step": 19233 + }, + { + "epoch": 1.552255669437495, + "grad_norm": 0.6950088739395142, + "learning_rate": 7.379215571961306e-07, + "loss": 2.3653, + "step": 19234 + }, + { + "epoch": 1.5523363731740778, + "grad_norm": 0.6969046592712402, + "learning_rate": 7.360084755213325e-07, + "loss": 2.3522, + "step": 19235 + }, + { + "epoch": 1.552417076910661, + "grad_norm": 0.747194230556488, + "learning_rate": 7.340978677646715e-07, + "loss": 2.4443, + "step": 19236 + }, + { + "epoch": 1.552497780647244, + "grad_norm": 0.7345097064971924, + "learning_rate": 7.32189733973776e-07, + "loss": 2.4142, + "step": 19237 + }, + { + "epoch": 1.5525784843838268, + "grad_norm": 0.7270597815513611, + "learning_rate": 7.302840741961969e-07, + "loss": 2.441, + "step": 19238 + }, + { + "epoch": 1.55265918812041, + "grad_norm": 0.7429140210151672, + "learning_rate": 7.283808884794408e-07, + "loss": 2.4393, + "step": 19239 + }, + { + "epoch": 1.552739891856993, + "grad_norm": 0.7116754055023193, + "learning_rate": 7.264801768709361e-07, + "loss": 2.3961, + "step": 19240 + }, + { + "epoch": 1.5528205955935759, + "grad_norm": 0.703602135181427, + "learning_rate": 7.245819394180342e-07, + "loss": 2.346, + "step": 19241 + }, + { + "epoch": 1.552901299330159, + "grad_norm": 0.6723247766494751, + "learning_rate": 7.226861761680636e-07, + "loss": 2.445, + "step": 19242 + }, + { + "epoch": 1.552982003066742, + "grad_norm": 0.678294837474823, + "learning_rate": 7.207928871682757e-07, + "loss": 2.4373, + "step": 19243 + }, + { + "epoch": 1.553062706803325, + "grad_norm": 0.6648022532463074, + "learning_rate": 7.189020724658325e-07, + "loss": 2.3954, + "step": 19244 + }, + { + "epoch": 1.553143410539908, + "grad_norm": 0.7083376049995422, + "learning_rate": 7.170137321078851e-07, + "loss": 2.4005, + "step": 19245 + }, + { + "epoch": 1.553224114276491, + "grad_norm": 0.6918483376502991, + "learning_rate": 7.151278661414851e-07, + "loss": 2.3876, + "step": 19246 + }, + { + "epoch": 1.553304818013074, + "grad_norm": 0.6656081676483154, + "learning_rate": 7.13244474613628e-07, + "loss": 2.3763, + "step": 19247 + }, + { + "epoch": 1.553385521749657, + "grad_norm": 0.7443872094154358, + "learning_rate": 7.113635575712541e-07, + "loss": 2.4226, + "step": 19248 + }, + { + "epoch": 1.5534662254862401, + "grad_norm": 0.661173403263092, + "learning_rate": 7.094851150612369e-07, + "loss": 2.419, + "step": 19249 + }, + { + "epoch": 1.553546929222823, + "grad_norm": 0.6634771227836609, + "learning_rate": 7.07609147130417e-07, + "loss": 2.3717, + "step": 19250 + }, + { + "epoch": 1.5536276329594059, + "grad_norm": 0.6785014271736145, + "learning_rate": 7.057356538255122e-07, + "loss": 2.4568, + "step": 19251 + }, + { + "epoch": 1.5537083366959892, + "grad_norm": 0.7533704042434692, + "learning_rate": 7.03864635193241e-07, + "loss": 2.485, + "step": 19252 + }, + { + "epoch": 1.553789040432572, + "grad_norm": 0.7393310070037842, + "learning_rate": 7.019960912802215e-07, + "loss": 2.4075, + "step": 19253 + }, + { + "epoch": 1.553869744169155, + "grad_norm": 0.6767510771751404, + "learning_rate": 7.001300221330387e-07, + "loss": 2.3841, + "step": 19254 + }, + { + "epoch": 1.553950447905738, + "grad_norm": 0.7293907999992371, + "learning_rate": 6.982664277981776e-07, + "loss": 2.426, + "step": 19255 + }, + { + "epoch": 1.554031151642321, + "grad_norm": 0.701337993144989, + "learning_rate": 6.964053083221011e-07, + "loss": 2.4659, + "step": 19256 + }, + { + "epoch": 1.554111855378904, + "grad_norm": 0.6693280935287476, + "learning_rate": 6.945466637511833e-07, + "loss": 2.4395, + "step": 19257 + }, + { + "epoch": 1.554192559115487, + "grad_norm": 0.7023574709892273, + "learning_rate": 6.926904941317425e-07, + "loss": 2.3703, + "step": 19258 + }, + { + "epoch": 1.5542732628520701, + "grad_norm": 0.7118813991546631, + "learning_rate": 6.90836799510064e-07, + "loss": 2.3914, + "step": 19259 + }, + { + "epoch": 1.554353966588653, + "grad_norm": 0.7429333329200745, + "learning_rate": 6.889855799323108e-07, + "loss": 2.3396, + "step": 19260 + }, + { + "epoch": 1.554434670325236, + "grad_norm": 0.6991192698478699, + "learning_rate": 6.871368354446461e-07, + "loss": 2.3973, + "step": 19261 + }, + { + "epoch": 1.5545153740618192, + "grad_norm": 0.6381178498268127, + "learning_rate": 6.852905660931441e-07, + "loss": 2.3941, + "step": 19262 + }, + { + "epoch": 1.554596077798402, + "grad_norm": 0.7152952551841736, + "learning_rate": 6.834467719238014e-07, + "loss": 2.4428, + "step": 19263 + }, + { + "epoch": 1.5546767815349851, + "grad_norm": 0.6674736142158508, + "learning_rate": 6.81605452982581e-07, + "loss": 2.4169, + "step": 19264 + }, + { + "epoch": 1.5547574852715682, + "grad_norm": 0.7325178384780884, + "learning_rate": 6.797666093153798e-07, + "loss": 2.366, + "step": 19265 + }, + { + "epoch": 1.554838189008151, + "grad_norm": 0.6610640287399292, + "learning_rate": 6.779302409680277e-07, + "loss": 2.3988, + "step": 19266 + }, + { + "epoch": 1.554918892744734, + "grad_norm": 0.7417724132537842, + "learning_rate": 6.760963479862659e-07, + "loss": 2.392, + "step": 19267 + }, + { + "epoch": 1.5549995964813172, + "grad_norm": 0.652363657951355, + "learning_rate": 6.742649304158355e-07, + "loss": 2.4063, + "step": 19268 + }, + { + "epoch": 1.5550803002179001, + "grad_norm": 0.6984983086585999, + "learning_rate": 6.724359883023556e-07, + "loss": 2.3751, + "step": 19269 + }, + { + "epoch": 1.555161003954483, + "grad_norm": 0.7256860136985779, + "learning_rate": 6.706095216914121e-07, + "loss": 2.4451, + "step": 19270 + }, + { + "epoch": 1.555241707691066, + "grad_norm": 0.7346799373626709, + "learning_rate": 6.687855306285351e-07, + "loss": 2.4998, + "step": 19271 + }, + { + "epoch": 1.5553224114276492, + "grad_norm": 0.6899287700653076, + "learning_rate": 6.669640151591771e-07, + "loss": 2.3768, + "step": 19272 + }, + { + "epoch": 1.555403115164232, + "grad_norm": 0.6981001496315002, + "learning_rate": 6.651449753287354e-07, + "loss": 2.4342, + "step": 19273 + }, + { + "epoch": 1.555483818900815, + "grad_norm": 0.751319944858551, + "learning_rate": 6.633284111825399e-07, + "loss": 2.3897, + "step": 19274 + }, + { + "epoch": 1.5555645226373982, + "grad_norm": 0.6876060366630554, + "learning_rate": 6.615143227658771e-07, + "loss": 2.4301, + "step": 19275 + }, + { + "epoch": 1.555645226373981, + "grad_norm": 0.648356556892395, + "learning_rate": 6.597027101239439e-07, + "loss": 2.3858, + "step": 19276 + }, + { + "epoch": 1.5557259301105641, + "grad_norm": 0.7137950658798218, + "learning_rate": 6.578935733018932e-07, + "loss": 2.4338, + "step": 19277 + }, + { + "epoch": 1.5558066338471472, + "grad_norm": 0.7018558979034424, + "learning_rate": 6.560869123448332e-07, + "loss": 2.4361, + "step": 19278 + }, + { + "epoch": 1.55588733758373, + "grad_norm": 0.6574081182479858, + "learning_rate": 6.542827272977615e-07, + "loss": 2.446, + "step": 19279 + }, + { + "epoch": 1.555968041320313, + "grad_norm": 0.7260562181472778, + "learning_rate": 6.524810182056529e-07, + "loss": 2.3803, + "step": 19280 + }, + { + "epoch": 1.5560487450568963, + "grad_norm": 0.6866613626480103, + "learning_rate": 6.506817851134272e-07, + "loss": 2.3643, + "step": 19281 + }, + { + "epoch": 1.5561294487934791, + "grad_norm": 0.672102153301239, + "learning_rate": 6.488850280659042e-07, + "loss": 2.3943, + "step": 19282 + }, + { + "epoch": 1.556210152530062, + "grad_norm": 0.7725780010223389, + "learning_rate": 6.470907471078813e-07, + "loss": 2.4248, + "step": 19283 + }, + { + "epoch": 1.556290856266645, + "grad_norm": 0.7249971628189087, + "learning_rate": 6.452989422840561e-07, + "loss": 2.4227, + "step": 19284 + }, + { + "epoch": 1.5563715600032282, + "grad_norm": 0.756390392780304, + "learning_rate": 6.435096136391039e-07, + "loss": 2.4247, + "step": 19285 + }, + { + "epoch": 1.556452263739811, + "grad_norm": 0.6976982951164246, + "learning_rate": 6.417227612176114e-07, + "loss": 2.4012, + "step": 19286 + }, + { + "epoch": 1.5565329674763941, + "grad_norm": 0.6979532837867737, + "learning_rate": 6.399383850641205e-07, + "loss": 2.333, + "step": 19287 + }, + { + "epoch": 1.5566136712129772, + "grad_norm": 0.7074969410896301, + "learning_rate": 6.381564852230848e-07, + "loss": 2.3925, + "step": 19288 + }, + { + "epoch": 1.55669437494956, + "grad_norm": 0.7873122096061707, + "learning_rate": 6.363770617389353e-07, + "loss": 2.5208, + "step": 19289 + }, + { + "epoch": 1.5567750786861432, + "grad_norm": 0.6984712481498718, + "learning_rate": 6.346001146559921e-07, + "loss": 2.387, + "step": 19290 + }, + { + "epoch": 1.5568557824227263, + "grad_norm": 0.7126075625419617, + "learning_rate": 6.328256440185754e-07, + "loss": 2.4446, + "step": 19291 + }, + { + "epoch": 1.5569364861593091, + "grad_norm": 0.7251972556114197, + "learning_rate": 6.31053649870883e-07, + "loss": 2.4556, + "step": 19292 + }, + { + "epoch": 1.5570171898958922, + "grad_norm": 0.681929886341095, + "learning_rate": 6.292841322570797e-07, + "loss": 2.466, + "step": 19293 + }, + { + "epoch": 1.5570978936324753, + "grad_norm": 0.7257016897201538, + "learning_rate": 6.275170912212857e-07, + "loss": 2.41, + "step": 19294 + }, + { + "epoch": 1.5571785973690582, + "grad_norm": 0.7186483144760132, + "learning_rate": 6.257525268075215e-07, + "loss": 2.4098, + "step": 19295 + }, + { + "epoch": 1.557259301105641, + "grad_norm": 0.6800422668457031, + "learning_rate": 6.23990439059774e-07, + "loss": 2.4023, + "step": 19296 + }, + { + "epoch": 1.5573400048422243, + "grad_norm": 0.7078402042388916, + "learning_rate": 6.222308280219524e-07, + "loss": 2.3742, + "step": 19297 + }, + { + "epoch": 1.5574207085788072, + "grad_norm": 0.7318849563598633, + "learning_rate": 6.204736937379219e-07, + "loss": 2.4333, + "step": 19298 + }, + { + "epoch": 1.55750141231539, + "grad_norm": 0.7419871687889099, + "learning_rate": 6.187190362514584e-07, + "loss": 2.4375, + "step": 19299 + }, + { + "epoch": 1.5575821160519732, + "grad_norm": 0.6832826137542725, + "learning_rate": 6.169668556063046e-07, + "loss": 2.4445, + "step": 19300 + }, + { + "epoch": 1.5576628197885563, + "grad_norm": 0.6925410628318787, + "learning_rate": 6.152171518461369e-07, + "loss": 2.3245, + "step": 19301 + }, + { + "epoch": 1.5577435235251391, + "grad_norm": 0.7662268877029419, + "learning_rate": 6.134699250145426e-07, + "loss": 2.4055, + "step": 19302 + }, + { + "epoch": 1.5578242272617222, + "grad_norm": 0.6765877604484558, + "learning_rate": 6.117251751550757e-07, + "loss": 2.3561, + "step": 19303 + }, + { + "epoch": 1.5579049309983053, + "grad_norm": 0.7231481671333313, + "learning_rate": 6.099829023112235e-07, + "loss": 2.3405, + "step": 19304 + }, + { + "epoch": 1.5579856347348882, + "grad_norm": 0.6975328922271729, + "learning_rate": 6.082431065263961e-07, + "loss": 2.3995, + "step": 19305 + }, + { + "epoch": 1.5580663384714712, + "grad_norm": 0.6499672532081604, + "learning_rate": 6.065057878439806e-07, + "loss": 2.4575, + "step": 19306 + }, + { + "epoch": 1.5581470422080543, + "grad_norm": 0.6954952478408813, + "learning_rate": 6.047709463072538e-07, + "loss": 2.398, + "step": 19307 + }, + { + "epoch": 1.5582277459446372, + "grad_norm": 0.6801657676696777, + "learning_rate": 6.030385819594475e-07, + "loss": 2.3674, + "step": 19308 + }, + { + "epoch": 1.5583084496812203, + "grad_norm": 0.6733521819114685, + "learning_rate": 6.013086948437496e-07, + "loss": 2.4226, + "step": 19309 + }, + { + "epoch": 1.5583891534178034, + "grad_norm": 0.6511983871459961, + "learning_rate": 5.9958128500327e-07, + "loss": 2.4003, + "step": 19310 + }, + { + "epoch": 1.5584698571543862, + "grad_norm": 0.6741353869438171, + "learning_rate": 5.978563524810632e-07, + "loss": 2.3824, + "step": 19311 + }, + { + "epoch": 1.558550560890969, + "grad_norm": 0.741064727306366, + "learning_rate": 5.96133897320117e-07, + "loss": 2.4386, + "step": 19312 + }, + { + "epoch": 1.5586312646275524, + "grad_norm": 0.6985476613044739, + "learning_rate": 5.944139195633525e-07, + "loss": 2.4689, + "step": 19313 + }, + { + "epoch": 1.5587119683641353, + "grad_norm": 0.6882340908050537, + "learning_rate": 5.926964192536466e-07, + "loss": 2.3866, + "step": 19314 + }, + { + "epoch": 1.5587926721007181, + "grad_norm": 0.6650036573410034, + "learning_rate": 5.909813964337985e-07, + "loss": 2.4276, + "step": 19315 + }, + { + "epoch": 1.5588733758373012, + "grad_norm": 0.6756288409233093, + "learning_rate": 5.892688511465516e-07, + "loss": 2.415, + "step": 19316 + }, + { + "epoch": 1.5589540795738843, + "grad_norm": 0.6614066958427429, + "learning_rate": 5.875587834345942e-07, + "loss": 2.3999, + "step": 19317 + }, + { + "epoch": 1.5590347833104672, + "grad_norm": 0.7137793898582458, + "learning_rate": 5.858511933405253e-07, + "loss": 2.4241, + "step": 19318 + }, + { + "epoch": 1.5591154870470503, + "grad_norm": 0.6698580384254456, + "learning_rate": 5.841460809069221e-07, + "loss": 2.4266, + "step": 19319 + }, + { + "epoch": 1.5591961907836334, + "grad_norm": 0.714978814125061, + "learning_rate": 5.824434461762729e-07, + "loss": 2.4227, + "step": 19320 + }, + { + "epoch": 1.5592768945202162, + "grad_norm": 0.6798544526100159, + "learning_rate": 5.807432891910214e-07, + "loss": 2.3981, + "step": 19321 + }, + { + "epoch": 1.5593575982567993, + "grad_norm": 0.7036132216453552, + "learning_rate": 5.790456099935227e-07, + "loss": 2.4763, + "step": 19322 + }, + { + "epoch": 1.5594383019933824, + "grad_norm": 0.654386579990387, + "learning_rate": 5.773504086260983e-07, + "loss": 2.4226, + "step": 19323 + }, + { + "epoch": 1.5595190057299653, + "grad_norm": 0.6939975619316101, + "learning_rate": 5.756576851309925e-07, + "loss": 2.4292, + "step": 19324 + }, + { + "epoch": 1.5595997094665484, + "grad_norm": 0.6557560563087463, + "learning_rate": 5.739674395503935e-07, + "loss": 2.3817, + "step": 19325 + }, + { + "epoch": 1.5596804132031314, + "grad_norm": 0.7088754177093506, + "learning_rate": 5.722796719264345e-07, + "loss": 2.4012, + "step": 19326 + }, + { + "epoch": 1.5597611169397143, + "grad_norm": 0.72486811876297, + "learning_rate": 5.705943823011705e-07, + "loss": 2.4403, + "step": 19327 + }, + { + "epoch": 1.5598418206762972, + "grad_norm": 0.9082531929016113, + "learning_rate": 5.689115707166015e-07, + "loss": 2.4072, + "step": 19328 + }, + { + "epoch": 1.5599225244128803, + "grad_norm": 0.6311739087104797, + "learning_rate": 5.672312372146826e-07, + "loss": 2.3898, + "step": 19329 + }, + { + "epoch": 1.5600032281494634, + "grad_norm": 0.6943666338920593, + "learning_rate": 5.655533818372693e-07, + "loss": 2.4152, + "step": 19330 + }, + { + "epoch": 1.5600839318860462, + "grad_norm": 0.6921557188034058, + "learning_rate": 5.638780046261949e-07, + "loss": 2.368, + "step": 19331 + }, + { + "epoch": 1.5601646356226293, + "grad_norm": 0.7003421783447266, + "learning_rate": 5.622051056232147e-07, + "loss": 2.3899, + "step": 19332 + }, + { + "epoch": 1.5602453393592124, + "grad_norm": 0.7134489417076111, + "learning_rate": 5.605346848700177e-07, + "loss": 2.4222, + "step": 19333 + }, + { + "epoch": 1.5603260430957953, + "grad_norm": 0.6547747254371643, + "learning_rate": 5.588667424082262e-07, + "loss": 2.4282, + "step": 19334 + }, + { + "epoch": 1.5604067468323783, + "grad_norm": 0.7284405827522278, + "learning_rate": 5.572012782794068e-07, + "loss": 2.4133, + "step": 19335 + }, + { + "epoch": 1.5604874505689614, + "grad_norm": 0.6927350759506226, + "learning_rate": 5.555382925250929e-07, + "loss": 2.4243, + "step": 19336 + }, + { + "epoch": 1.5605681543055443, + "grad_norm": 0.7317484021186829, + "learning_rate": 5.538777851867072e-07, + "loss": 2.4306, + "step": 19337 + }, + { + "epoch": 1.5606488580421274, + "grad_norm": 0.715969979763031, + "learning_rate": 5.522197563056497e-07, + "loss": 2.4241, + "step": 19338 + }, + { + "epoch": 1.5607295617787105, + "grad_norm": 0.6449747085571289, + "learning_rate": 5.505642059232319e-07, + "loss": 2.3962, + "step": 19339 + }, + { + "epoch": 1.5608102655152933, + "grad_norm": 0.7388099431991577, + "learning_rate": 5.489111340807208e-07, + "loss": 2.374, + "step": 19340 + }, + { + "epoch": 1.5608909692518762, + "grad_norm": 0.6541383862495422, + "learning_rate": 5.472605408193055e-07, + "loss": 2.3732, + "step": 19341 + }, + { + "epoch": 1.5609716729884595, + "grad_norm": 0.7107636332511902, + "learning_rate": 5.45612426180131e-07, + "loss": 2.4275, + "step": 19342 + }, + { + "epoch": 1.5610523767250424, + "grad_norm": 0.6995171904563904, + "learning_rate": 5.439667902042756e-07, + "loss": 2.4379, + "step": 19343 + }, + { + "epoch": 1.5611330804616252, + "grad_norm": 0.693679690361023, + "learning_rate": 5.423236329327397e-07, + "loss": 2.4298, + "step": 19344 + }, + { + "epoch": 1.5612137841982083, + "grad_norm": 0.6931268572807312, + "learning_rate": 5.406829544064907e-07, + "loss": 2.3195, + "step": 19345 + }, + { + "epoch": 1.5612944879347914, + "grad_norm": 0.7049005627632141, + "learning_rate": 5.390447546664069e-07, + "loss": 2.4159, + "step": 19346 + }, + { + "epoch": 1.5613751916713743, + "grad_norm": 0.6754768490791321, + "learning_rate": 5.374090337533333e-07, + "loss": 2.3703, + "step": 19347 + }, + { + "epoch": 1.5614558954079574, + "grad_norm": 0.7043792009353638, + "learning_rate": 5.357757917080153e-07, + "loss": 2.4343, + "step": 19348 + }, + { + "epoch": 1.5615365991445405, + "grad_norm": 0.7013272047042847, + "learning_rate": 5.341450285711647e-07, + "loss": 2.4089, + "step": 19349 + }, + { + "epoch": 1.5616173028811233, + "grad_norm": 0.6953254342079163, + "learning_rate": 5.325167443834267e-07, + "loss": 2.3838, + "step": 19350 + }, + { + "epoch": 1.5616980066177064, + "grad_norm": 0.6658989191055298, + "learning_rate": 5.308909391853911e-07, + "loss": 2.3713, + "step": 19351 + }, + { + "epoch": 1.5617787103542895, + "grad_norm": 0.7743833661079407, + "learning_rate": 5.292676130175589e-07, + "loss": 2.4047, + "step": 19352 + }, + { + "epoch": 1.5618594140908724, + "grad_norm": 0.7156519293785095, + "learning_rate": 5.276467659203976e-07, + "loss": 2.3867, + "step": 19353 + }, + { + "epoch": 1.5619401178274555, + "grad_norm": 0.6272128224372864, + "learning_rate": 5.260283979343084e-07, + "loss": 2.4146, + "step": 19354 + }, + { + "epoch": 1.5620208215640385, + "grad_norm": 0.7026631236076355, + "learning_rate": 5.244125090996143e-07, + "loss": 2.4047, + "step": 19355 + }, + { + "epoch": 1.5621015253006214, + "grad_norm": 0.648259699344635, + "learning_rate": 5.227990994565835e-07, + "loss": 2.3784, + "step": 19356 + }, + { + "epoch": 1.5621822290372043, + "grad_norm": 0.644374668598175, + "learning_rate": 5.2118816904545e-07, + "loss": 2.3629, + "step": 19357 + }, + { + "epoch": 1.5622629327737876, + "grad_norm": 0.7909294366836548, + "learning_rate": 5.195797179063377e-07, + "loss": 2.477, + "step": 19358 + }, + { + "epoch": 1.5623436365103704, + "grad_norm": 0.6520152688026428, + "learning_rate": 5.179737460793587e-07, + "loss": 2.3993, + "step": 19359 + }, + { + "epoch": 1.5624243402469533, + "grad_norm": 0.6525024771690369, + "learning_rate": 5.163702536045034e-07, + "loss": 2.3719, + "step": 19360 + }, + { + "epoch": 1.5625050439835364, + "grad_norm": 0.7684478759765625, + "learning_rate": 5.147692405217619e-07, + "loss": 2.4154, + "step": 19361 + }, + { + "epoch": 1.5625857477201195, + "grad_norm": 0.7000769972801208, + "learning_rate": 5.131707068710356e-07, + "loss": 2.3755, + "step": 19362 + }, + { + "epoch": 1.5626664514567024, + "grad_norm": 0.6674086451530457, + "learning_rate": 5.115746526921484e-07, + "loss": 2.4133, + "step": 19363 + }, + { + "epoch": 1.5627471551932854, + "grad_norm": 0.6620389819145203, + "learning_rate": 5.099810780248792e-07, + "loss": 2.4445, + "step": 19364 + }, + { + "epoch": 1.5628278589298685, + "grad_norm": 0.735074520111084, + "learning_rate": 5.083899829089633e-07, + "loss": 2.3781, + "step": 19365 + }, + { + "epoch": 1.5629085626664514, + "grad_norm": 0.6440105438232422, + "learning_rate": 5.068013673840355e-07, + "loss": 2.3778, + "step": 19366 + }, + { + "epoch": 1.5629892664030345, + "grad_norm": 0.7031756639480591, + "learning_rate": 5.052152314896974e-07, + "loss": 2.3867, + "step": 19367 + }, + { + "epoch": 1.5630699701396176, + "grad_norm": 0.7151559591293335, + "learning_rate": 5.036315752654841e-07, + "loss": 2.398, + "step": 19368 + }, + { + "epoch": 1.5631506738762004, + "grad_norm": 0.6854581236839294, + "learning_rate": 5.020503987508529e-07, + "loss": 2.4132, + "step": 19369 + }, + { + "epoch": 1.5632313776127835, + "grad_norm": 0.744924783706665, + "learning_rate": 5.004717019852168e-07, + "loss": 2.4317, + "step": 19370 + }, + { + "epoch": 1.5633120813493666, + "grad_norm": 0.6753507256507874, + "learning_rate": 4.988954850079219e-07, + "loss": 2.4185, + "step": 19371 + }, + { + "epoch": 1.5633927850859495, + "grad_norm": 0.6601449847221375, + "learning_rate": 4.973217478582482e-07, + "loss": 2.4039, + "step": 19372 + }, + { + "epoch": 1.5634734888225323, + "grad_norm": 0.7005258202552795, + "learning_rate": 4.957504905754196e-07, + "loss": 2.4412, + "step": 19373 + }, + { + "epoch": 1.5635541925591157, + "grad_norm": 0.6714457273483276, + "learning_rate": 4.941817131985938e-07, + "loss": 2.3907, + "step": 19374 + }, + { + "epoch": 1.5636348962956985, + "grad_norm": 0.6480659246444702, + "learning_rate": 4.926154157668728e-07, + "loss": 2.3902, + "step": 19375 + }, + { + "epoch": 1.5637156000322814, + "grad_norm": 0.6795184016227722, + "learning_rate": 4.910515983192809e-07, + "loss": 2.4245, + "step": 19376 + }, + { + "epoch": 1.5637963037688645, + "grad_norm": 0.7324832081794739, + "learning_rate": 4.894902608948093e-07, + "loss": 2.3878, + "step": 19377 + }, + { + "epoch": 1.5638770075054476, + "grad_norm": 0.7051582932472229, + "learning_rate": 4.879314035323712e-07, + "loss": 2.3985, + "step": 19378 + }, + { + "epoch": 1.5639577112420304, + "grad_norm": 0.6758540868759155, + "learning_rate": 4.863750262708022e-07, + "loss": 2.3593, + "step": 19379 + }, + { + "epoch": 1.5640384149786135, + "grad_norm": 0.7647781372070312, + "learning_rate": 4.848211291488936e-07, + "loss": 2.4286, + "step": 19380 + }, + { + "epoch": 1.5641191187151966, + "grad_norm": 0.6838028430938721, + "learning_rate": 4.83269712205392e-07, + "loss": 2.3802, + "step": 19381 + }, + { + "epoch": 1.5641998224517795, + "grad_norm": 0.7877007722854614, + "learning_rate": 4.817207754789332e-07, + "loss": 2.3517, + "step": 19382 + }, + { + "epoch": 1.5642805261883626, + "grad_norm": 0.7278311252593994, + "learning_rate": 4.80174319008142e-07, + "loss": 2.4342, + "step": 19383 + }, + { + "epoch": 1.5643612299249456, + "grad_norm": 0.6646801233291626, + "learning_rate": 4.786303428315653e-07, + "loss": 2.4213, + "step": 19384 + }, + { + "epoch": 1.5644419336615285, + "grad_norm": 0.707219660282135, + "learning_rate": 4.770888469876611e-07, + "loss": 2.3905, + "step": 19385 + }, + { + "epoch": 1.5645226373981114, + "grad_norm": 0.6646847128868103, + "learning_rate": 4.7554983151485433e-07, + "loss": 2.4051, + "step": 19386 + }, + { + "epoch": 1.5646033411346947, + "grad_norm": 0.6625415086746216, + "learning_rate": 4.7401329645150317e-07, + "loss": 2.3614, + "step": 19387 + }, + { + "epoch": 1.5646840448712775, + "grad_norm": 0.6784572601318359, + "learning_rate": 4.724792418359103e-07, + "loss": 2.4383, + "step": 19388 + }, + { + "epoch": 1.5647647486078604, + "grad_norm": 0.7001104950904846, + "learning_rate": 4.7094766770631184e-07, + "loss": 2.4096, + "step": 19389 + }, + { + "epoch": 1.5648454523444435, + "grad_norm": 0.7197737693786621, + "learning_rate": 4.69418574100855e-07, + "loss": 2.4343, + "step": 19390 + }, + { + "epoch": 1.5649261560810266, + "grad_norm": 0.6619700789451599, + "learning_rate": 4.6789196105767596e-07, + "loss": 2.3904, + "step": 19391 + }, + { + "epoch": 1.5650068598176095, + "grad_norm": 0.7257784605026245, + "learning_rate": 4.663678286147999e-07, + "loss": 2.4097, + "step": 19392 + }, + { + "epoch": 1.5650875635541925, + "grad_norm": 0.6869077682495117, + "learning_rate": 4.648461768102186e-07, + "loss": 2.4053, + "step": 19393 + }, + { + "epoch": 1.5651682672907756, + "grad_norm": 0.6906449794769287, + "learning_rate": 4.633270056818684e-07, + "loss": 2.4225, + "step": 19394 + }, + { + "epoch": 1.5652489710273585, + "grad_norm": 0.6462907195091248, + "learning_rate": 4.6181031526758566e-07, + "loss": 2.3832, + "step": 19395 + }, + { + "epoch": 1.5653296747639416, + "grad_norm": 0.738439679145813, + "learning_rate": 4.6029610560519574e-07, + "loss": 2.3559, + "step": 19396 + }, + { + "epoch": 1.5654103785005247, + "grad_norm": 0.7454297542572021, + "learning_rate": 4.58784376732424e-07, + "loss": 2.4224, + "step": 19397 + }, + { + "epoch": 1.5654910822371075, + "grad_norm": 0.6787495613098145, + "learning_rate": 4.5727512868695143e-07, + "loss": 2.418, + "step": 19398 + }, + { + "epoch": 1.5655717859736906, + "grad_norm": 0.7003183364868164, + "learning_rate": 4.557683615063812e-07, + "loss": 2.401, + "step": 19399 + }, + { + "epoch": 1.5656524897102737, + "grad_norm": 0.6190437078475952, + "learning_rate": 4.542640752282834e-07, + "loss": 2.3909, + "step": 19400 + }, + { + "epoch": 1.5657331934468566, + "grad_norm": 0.7409480214118958, + "learning_rate": 4.5276226989013905e-07, + "loss": 2.3938, + "step": 19401 + }, + { + "epoch": 1.5658138971834394, + "grad_norm": 0.7195636034011841, + "learning_rate": 4.5126294552937375e-07, + "loss": 2.4542, + "step": 19402 + }, + { + "epoch": 1.5658946009200228, + "grad_norm": 0.6671004891395569, + "learning_rate": 4.497661021833688e-07, + "loss": 2.3968, + "step": 19403 + }, + { + "epoch": 1.5659753046566056, + "grad_norm": 0.6861428022384644, + "learning_rate": 4.482717398894165e-07, + "loss": 2.4474, + "step": 19404 + }, + { + "epoch": 1.5660560083931885, + "grad_norm": 0.811326265335083, + "learning_rate": 4.467798586847538e-07, + "loss": 2.4755, + "step": 19405 + }, + { + "epoch": 1.5661367121297716, + "grad_norm": 0.7222245335578918, + "learning_rate": 4.4529045860657316e-07, + "loss": 2.4168, + "step": 19406 + }, + { + "epoch": 1.5662174158663547, + "grad_norm": 0.639931321144104, + "learning_rate": 4.438035396920004e-07, + "loss": 2.4041, + "step": 19407 + }, + { + "epoch": 1.5662981196029375, + "grad_norm": 0.6859482526779175, + "learning_rate": 4.423191019780837e-07, + "loss": 2.456, + "step": 19408 + }, + { + "epoch": 1.5663788233395206, + "grad_norm": 0.739364743232727, + "learning_rate": 4.4083714550181566e-07, + "loss": 2.4402, + "step": 19409 + }, + { + "epoch": 1.5664595270761037, + "grad_norm": 0.7241039872169495, + "learning_rate": 4.3935767030014454e-07, + "loss": 2.3805, + "step": 19410 + }, + { + "epoch": 1.5665402308126866, + "grad_norm": 0.7182741761207581, + "learning_rate": 4.378806764099297e-07, + "loss": 2.4325, + "step": 19411 + }, + { + "epoch": 1.5666209345492697, + "grad_norm": 0.6885257959365845, + "learning_rate": 4.3640616386798614e-07, + "loss": 2.3886, + "step": 19412 + }, + { + "epoch": 1.5667016382858527, + "grad_norm": 0.8143306970596313, + "learning_rate": 4.349341327110734e-07, + "loss": 2.3952, + "step": 19413 + }, + { + "epoch": 1.5667823420224356, + "grad_norm": 0.6972131729125977, + "learning_rate": 4.33464582975851e-07, + "loss": 2.4445, + "step": 19414 + }, + { + "epoch": 1.5668630457590187, + "grad_norm": 0.6729097366333008, + "learning_rate": 4.3199751469896744e-07, + "loss": 2.3993, + "step": 19415 + }, + { + "epoch": 1.5669437494956018, + "grad_norm": 0.7196716666221619, + "learning_rate": 4.305329279169823e-07, + "loss": 2.4489, + "step": 19416 + }, + { + "epoch": 1.5670244532321846, + "grad_norm": 0.7489563822746277, + "learning_rate": 4.2907082266638865e-07, + "loss": 2.4317, + "step": 19417 + }, + { + "epoch": 1.5671051569687675, + "grad_norm": 0.7010371685028076, + "learning_rate": 4.276111989836351e-07, + "loss": 2.4637, + "step": 19418 + }, + { + "epoch": 1.5671858607053508, + "grad_norm": 0.7744003534317017, + "learning_rate": 4.261540569050926e-07, + "loss": 2.4563, + "step": 19419 + }, + { + "epoch": 1.5672665644419337, + "grad_norm": 0.6580430269241333, + "learning_rate": 4.2469939646708755e-07, + "loss": 2.4245, + "step": 19420 + }, + { + "epoch": 1.5673472681785166, + "grad_norm": 0.6745176911354065, + "learning_rate": 4.2324721770585775e-07, + "loss": 2.3566, + "step": 19421 + }, + { + "epoch": 1.5674279719150996, + "grad_norm": 0.6980976462364197, + "learning_rate": 4.2179752065760746e-07, + "loss": 2.3522, + "step": 19422 + }, + { + "epoch": 1.5675086756516827, + "grad_norm": 0.6869443655014038, + "learning_rate": 4.203503053584634e-07, + "loss": 2.3774, + "step": 19423 + }, + { + "epoch": 1.5675893793882656, + "grad_norm": 0.6886731386184692, + "learning_rate": 4.1890557184448567e-07, + "loss": 2.4127, + "step": 19424 + }, + { + "epoch": 1.5676700831248487, + "grad_norm": 0.6952071785926819, + "learning_rate": 4.1746332015170085e-07, + "loss": 2.4798, + "step": 19425 + }, + { + "epoch": 1.5677507868614318, + "grad_norm": 0.6766799688339233, + "learning_rate": 4.160235503160359e-07, + "loss": 2.4064, + "step": 19426 + }, + { + "epoch": 1.5678314905980146, + "grad_norm": 0.7062126994132996, + "learning_rate": 4.145862623733732e-07, + "loss": 2.427, + "step": 19427 + }, + { + "epoch": 1.5679121943345977, + "grad_norm": 0.6548494696617126, + "learning_rate": 4.1315145635953955e-07, + "loss": 2.402, + "step": 19428 + }, + { + "epoch": 1.5679928980711808, + "grad_norm": 0.711933434009552, + "learning_rate": 4.1171913231030646e-07, + "loss": 2.4373, + "step": 19429 + }, + { + "epoch": 1.5680736018077637, + "grad_norm": 0.6692190170288086, + "learning_rate": 4.102892902613453e-07, + "loss": 2.3734, + "step": 19430 + }, + { + "epoch": 1.5681543055443465, + "grad_norm": 0.6544106602668762, + "learning_rate": 4.0886193024831653e-07, + "loss": 2.4182, + "step": 19431 + }, + { + "epoch": 1.5682350092809298, + "grad_norm": 0.7023935317993164, + "learning_rate": 4.074370523067805e-07, + "loss": 2.4095, + "step": 19432 + }, + { + "epoch": 1.5683157130175127, + "grad_norm": 0.7151557207107544, + "learning_rate": 4.060146564722422e-07, + "loss": 2.4187, + "step": 19433 + }, + { + "epoch": 1.5683964167540956, + "grad_norm": 0.6406722664833069, + "learning_rate": 4.045947427801733e-07, + "loss": 2.3751, + "step": 19434 + }, + { + "epoch": 1.5684771204906787, + "grad_norm": 0.6845474243164062, + "learning_rate": 4.031773112659343e-07, + "loss": 2.3617, + "step": 19435 + }, + { + "epoch": 1.5685578242272618, + "grad_norm": 0.7414054274559021, + "learning_rate": 4.017623619648747e-07, + "loss": 2.3801, + "step": 19436 + }, + { + "epoch": 1.5686385279638446, + "grad_norm": 0.6821498274803162, + "learning_rate": 4.0034989491224416e-07, + "loss": 2.3937, + "step": 19437 + }, + { + "epoch": 1.5687192317004277, + "grad_norm": 0.7180706858634949, + "learning_rate": 3.9893991014325894e-07, + "loss": 2.3953, + "step": 19438 + }, + { + "epoch": 1.5687999354370108, + "grad_norm": 0.7036407589912415, + "learning_rate": 3.9753240769304647e-07, + "loss": 2.4191, + "step": 19439 + }, + { + "epoch": 1.5688806391735937, + "grad_norm": 0.6791162490844727, + "learning_rate": 3.961273875966787e-07, + "loss": 2.3925, + "step": 19440 + }, + { + "epoch": 1.5689613429101767, + "grad_norm": 0.7595280408859253, + "learning_rate": 3.947248498891942e-07, + "loss": 2.3638, + "step": 19441 + }, + { + "epoch": 1.5690420466467598, + "grad_norm": 0.7401887774467468, + "learning_rate": 3.9332479460554296e-07, + "loss": 2.3654, + "step": 19442 + }, + { + "epoch": 1.5691227503833427, + "grad_norm": 0.6966845989227295, + "learning_rate": 3.91927221780608e-07, + "loss": 2.4382, + "step": 19443 + }, + { + "epoch": 1.5692034541199258, + "grad_norm": 0.694014847278595, + "learning_rate": 3.905321314492172e-07, + "loss": 2.4224, + "step": 19444 + }, + { + "epoch": 1.5692841578565089, + "grad_norm": 0.730681300163269, + "learning_rate": 3.891395236461537e-07, + "loss": 2.4657, + "step": 19445 + }, + { + "epoch": 1.5693648615930917, + "grad_norm": 0.7140524983406067, + "learning_rate": 3.8774939840612313e-07, + "loss": 2.3938, + "step": 19446 + }, + { + "epoch": 1.5694455653296746, + "grad_norm": 0.7233960628509521, + "learning_rate": 3.8636175576375334e-07, + "loss": 2.4509, + "step": 19447 + }, + { + "epoch": 1.569526269066258, + "grad_norm": 0.6955321431159973, + "learning_rate": 3.8497659575365e-07, + "loss": 2.4199, + "step": 19448 + }, + { + "epoch": 1.5696069728028408, + "grad_norm": 0.7585558295249939, + "learning_rate": 3.835939184103299e-07, + "loss": 2.3564, + "step": 19449 + }, + { + "epoch": 1.5696876765394236, + "grad_norm": 0.7005706429481506, + "learning_rate": 3.8221372376824325e-07, + "loss": 2.4001, + "step": 19450 + }, + { + "epoch": 1.5697683802760067, + "grad_norm": 0.7237557768821716, + "learning_rate": 3.8083601186179593e-07, + "loss": 2.3816, + "step": 19451 + }, + { + "epoch": 1.5698490840125898, + "grad_norm": 0.6831969022750854, + "learning_rate": 3.7946078272532714e-07, + "loss": 2.3896, + "step": 19452 + }, + { + "epoch": 1.5699297877491727, + "grad_norm": 0.6717517375946045, + "learning_rate": 3.780880363930983e-07, + "loss": 2.4294, + "step": 19453 + }, + { + "epoch": 1.5700104914857558, + "grad_norm": 0.720720648765564, + "learning_rate": 3.767177728993265e-07, + "loss": 2.3999, + "step": 19454 + }, + { + "epoch": 1.5700911952223389, + "grad_norm": 0.6734107732772827, + "learning_rate": 3.7534999227817336e-07, + "loss": 2.3898, + "step": 19455 + }, + { + "epoch": 1.5701718989589217, + "grad_norm": 0.6925922632217407, + "learning_rate": 3.7398469456372266e-07, + "loss": 2.3917, + "step": 19456 + }, + { + "epoch": 1.5702526026955048, + "grad_norm": 0.6596848368644714, + "learning_rate": 3.7262187978999163e-07, + "loss": 2.4322, + "step": 19457 + }, + { + "epoch": 1.570333306432088, + "grad_norm": 0.7231408357620239, + "learning_rate": 3.7126154799095317e-07, + "loss": 2.3759, + "step": 19458 + }, + { + "epoch": 1.5704140101686708, + "grad_norm": 0.6516187787055969, + "learning_rate": 3.6990369920051337e-07, + "loss": 2.4131, + "step": 19459 + }, + { + "epoch": 1.5704947139052539, + "grad_norm": 0.6565192937850952, + "learning_rate": 3.685483334525008e-07, + "loss": 2.3759, + "step": 19460 + }, + { + "epoch": 1.570575417641837, + "grad_norm": 0.6889563202857971, + "learning_rate": 3.671954507807107e-07, + "loss": 2.3841, + "step": 19461 + }, + { + "epoch": 1.5706561213784198, + "grad_norm": 0.7231041193008423, + "learning_rate": 3.658450512188494e-07, + "loss": 2.4119, + "step": 19462 + }, + { + "epoch": 1.5707368251150027, + "grad_norm": 0.7025059461593628, + "learning_rate": 3.644971348005788e-07, + "loss": 2.4188, + "step": 19463 + }, + { + "epoch": 1.570817528851586, + "grad_norm": 0.6800994873046875, + "learning_rate": 3.631517015594943e-07, + "loss": 2.3831, + "step": 19464 + }, + { + "epoch": 1.5708982325881689, + "grad_norm": 0.6947640180587769, + "learning_rate": 3.6180875152911353e-07, + "loss": 2.3606, + "step": 19465 + }, + { + "epoch": 1.5709789363247517, + "grad_norm": 0.69830322265625, + "learning_rate": 3.604682847429208e-07, + "loss": 2.4203, + "step": 19466 + }, + { + "epoch": 1.5710596400613348, + "grad_norm": 0.6469851136207581, + "learning_rate": 3.591303012343117e-07, + "loss": 2.393, + "step": 19467 + }, + { + "epoch": 1.571140343797918, + "grad_norm": 0.7026922106742859, + "learning_rate": 3.577948010366372e-07, + "loss": 2.4128, + "step": 19468 + }, + { + "epoch": 1.5712210475345008, + "grad_norm": 0.678066611289978, + "learning_rate": 3.5646178418319297e-07, + "loss": 2.3764, + "step": 19469 + }, + { + "epoch": 1.5713017512710838, + "grad_norm": 0.7004178166389465, + "learning_rate": 3.5513125070718576e-07, + "loss": 2.4657, + "step": 19470 + }, + { + "epoch": 1.571382455007667, + "grad_norm": 0.7196616530418396, + "learning_rate": 3.5380320064178905e-07, + "loss": 2.3922, + "step": 19471 + }, + { + "epoch": 1.5714631587442498, + "grad_norm": 0.7095093727111816, + "learning_rate": 3.524776340200875e-07, + "loss": 2.3599, + "step": 19472 + }, + { + "epoch": 1.5715438624808329, + "grad_norm": 0.6695747971534729, + "learning_rate": 3.5115455087513237e-07, + "loss": 2.4018, + "step": 19473 + }, + { + "epoch": 1.571624566217416, + "grad_norm": 0.7534603476524353, + "learning_rate": 3.498339512398863e-07, + "loss": 2.4521, + "step": 19474 + }, + { + "epoch": 1.5717052699539988, + "grad_norm": 0.7905675768852234, + "learning_rate": 3.4851583514726725e-07, + "loss": 2.4272, + "step": 19475 + }, + { + "epoch": 1.571785973690582, + "grad_norm": 0.6776765584945679, + "learning_rate": 3.4720020263012684e-07, + "loss": 2.3222, + "step": 19476 + }, + { + "epoch": 1.571866677427165, + "grad_norm": 0.7125731706619263, + "learning_rate": 3.4588705372124997e-07, + "loss": 2.3751, + "step": 19477 + }, + { + "epoch": 1.5719473811637479, + "grad_norm": 0.7129982709884644, + "learning_rate": 3.445763884533548e-07, + "loss": 2.3238, + "step": 19478 + }, + { + "epoch": 1.5720280849003307, + "grad_norm": 0.6839120984077454, + "learning_rate": 3.432682068591375e-07, + "loss": 2.4365, + "step": 19479 + }, + { + "epoch": 1.5721087886369138, + "grad_norm": 0.7290100455284119, + "learning_rate": 3.41962508971172e-07, + "loss": 2.3426, + "step": 19480 + }, + { + "epoch": 1.572189492373497, + "grad_norm": 0.6621668338775635, + "learning_rate": 3.4065929482200997e-07, + "loss": 2.3758, + "step": 19481 + }, + { + "epoch": 1.5722701961100798, + "grad_norm": 0.6673764586448669, + "learning_rate": 3.393585644441255e-07, + "loss": 2.436, + "step": 19482 + }, + { + "epoch": 1.5723508998466629, + "grad_norm": 0.6899733543395996, + "learning_rate": 3.380603178699482e-07, + "loss": 2.396, + "step": 19483 + }, + { + "epoch": 1.572431603583246, + "grad_norm": 0.6511523127555847, + "learning_rate": 3.3676455513182994e-07, + "loss": 2.3683, + "step": 19484 + }, + { + "epoch": 1.5725123073198288, + "grad_norm": 0.7350820899009705, + "learning_rate": 3.35471276262056e-07, + "loss": 2.4538, + "step": 19485 + }, + { + "epoch": 1.572593011056412, + "grad_norm": 0.710725724697113, + "learning_rate": 3.341804812928673e-07, + "loss": 2.4344, + "step": 19486 + }, + { + "epoch": 1.572673714792995, + "grad_norm": 0.7406159043312073, + "learning_rate": 3.3289217025642694e-07, + "loss": 2.3888, + "step": 19487 + }, + { + "epoch": 1.5727544185295779, + "grad_norm": 0.6357181668281555, + "learning_rate": 3.316063431848426e-07, + "loss": 2.4259, + "step": 19488 + }, + { + "epoch": 1.572835122266161, + "grad_norm": 0.720058798789978, + "learning_rate": 3.3032300011016647e-07, + "loss": 2.3656, + "step": 19489 + }, + { + "epoch": 1.572915826002744, + "grad_norm": 0.7195632457733154, + "learning_rate": 3.29042141064384e-07, + "loss": 2.4153, + "step": 19490 + }, + { + "epoch": 1.572996529739327, + "grad_norm": 0.6918438673019409, + "learning_rate": 3.277637660794142e-07, + "loss": 2.4136, + "step": 19491 + }, + { + "epoch": 1.5730772334759098, + "grad_norm": 0.6733240485191345, + "learning_rate": 3.2648787518710924e-07, + "loss": 2.3959, + "step": 19492 + }, + { + "epoch": 1.573157937212493, + "grad_norm": 0.682981014251709, + "learning_rate": 3.2521446841927707e-07, + "loss": 2.3819, + "step": 19493 + }, + { + "epoch": 1.573238640949076, + "grad_norm": 0.7159077525138855, + "learning_rate": 3.2394354580765896e-07, + "loss": 2.3915, + "step": 19494 + }, + { + "epoch": 1.5733193446856588, + "grad_norm": 0.6678228378295898, + "learning_rate": 3.226751073839185e-07, + "loss": 2.325, + "step": 19495 + }, + { + "epoch": 1.573400048422242, + "grad_norm": 0.7392027974128723, + "learning_rate": 3.214091531796859e-07, + "loss": 2.3972, + "step": 19496 + }, + { + "epoch": 1.573480752158825, + "grad_norm": 0.7228594422340393, + "learning_rate": 3.201456832264915e-07, + "loss": 2.397, + "step": 19497 + }, + { + "epoch": 1.5735614558954079, + "grad_norm": 0.733490526676178, + "learning_rate": 3.1888469755582127e-07, + "loss": 2.3571, + "step": 19498 + }, + { + "epoch": 1.573642159631991, + "grad_norm": 0.6712044477462769, + "learning_rate": 3.176261961991278e-07, + "loss": 2.4145, + "step": 19499 + }, + { + "epoch": 1.573722863368574, + "grad_norm": 0.7613135576248169, + "learning_rate": 3.1637017918775267e-07, + "loss": 2.4786, + "step": 19500 + }, + { + "epoch": 1.573803567105157, + "grad_norm": 0.7119792103767395, + "learning_rate": 3.151166465530153e-07, + "loss": 2.4149, + "step": 19501 + }, + { + "epoch": 1.57388427084174, + "grad_norm": 0.6982532143592834, + "learning_rate": 3.1386559832614624e-07, + "loss": 2.4376, + "step": 19502 + }, + { + "epoch": 1.573964974578323, + "grad_norm": 0.7028994560241699, + "learning_rate": 3.126170345383317e-07, + "loss": 2.4994, + "step": 19503 + }, + { + "epoch": 1.574045678314906, + "grad_norm": 0.8555117249488831, + "learning_rate": 3.1137095522068007e-07, + "loss": 2.4314, + "step": 19504 + }, + { + "epoch": 1.574126382051489, + "grad_norm": 0.709402322769165, + "learning_rate": 3.1012736040425536e-07, + "loss": 2.4049, + "step": 19505 + }, + { + "epoch": 1.5742070857880721, + "grad_norm": 0.7311747074127197, + "learning_rate": 3.088862501200551e-07, + "loss": 2.4286, + "step": 19506 + }, + { + "epoch": 1.574287789524655, + "grad_norm": 0.6722131967544556, + "learning_rate": 3.07647624398999e-07, + "loss": 2.3318, + "step": 19507 + }, + { + "epoch": 1.5743684932612378, + "grad_norm": 0.7237920165061951, + "learning_rate": 3.0641148327196225e-07, + "loss": 2.3734, + "step": 19508 + }, + { + "epoch": 1.5744491969978212, + "grad_norm": 0.6654006838798523, + "learning_rate": 3.051778267697536e-07, + "loss": 2.3997, + "step": 19509 + }, + { + "epoch": 1.574529900734404, + "grad_norm": 0.6787348985671997, + "learning_rate": 3.0394665492312626e-07, + "loss": 2.3286, + "step": 19510 + }, + { + "epoch": 1.5746106044709869, + "grad_norm": 0.7186006307601929, + "learning_rate": 3.027179677627445e-07, + "loss": 2.484, + "step": 19511 + }, + { + "epoch": 1.57469130820757, + "grad_norm": 0.6698916554450989, + "learning_rate": 3.014917653192506e-07, + "loss": 2.3831, + "step": 19512 + }, + { + "epoch": 1.574772011944153, + "grad_norm": 0.6446281671524048, + "learning_rate": 3.0026804762318674e-07, + "loss": 2.3444, + "step": 19513 + }, + { + "epoch": 1.574852715680736, + "grad_norm": 0.7469406723976135, + "learning_rate": 2.990468147050729e-07, + "loss": 2.4062, + "step": 19514 + }, + { + "epoch": 1.574933419417319, + "grad_norm": 0.6877521872520447, + "learning_rate": 2.9782806659532926e-07, + "loss": 2.3793, + "step": 19515 + }, + { + "epoch": 1.575014123153902, + "grad_norm": 0.6991806626319885, + "learning_rate": 2.966118033243315e-07, + "loss": 2.4549, + "step": 19516 + }, + { + "epoch": 1.575094826890485, + "grad_norm": 0.710056722164154, + "learning_rate": 2.9539802492239976e-07, + "loss": 2.3942, + "step": 19517 + }, + { + "epoch": 1.575175530627068, + "grad_norm": 0.6914008855819702, + "learning_rate": 2.941867314197655e-07, + "loss": 2.4122, + "step": 19518 + }, + { + "epoch": 1.5752562343636511, + "grad_norm": 0.6838723421096802, + "learning_rate": 2.929779228466489e-07, + "loss": 2.4037, + "step": 19519 + }, + { + "epoch": 1.575336938100234, + "grad_norm": 0.7394174337387085, + "learning_rate": 2.9177159923314826e-07, + "loss": 2.4017, + "step": 19520 + }, + { + "epoch": 1.575417641836817, + "grad_norm": 0.7108052968978882, + "learning_rate": 2.905677606093393e-07, + "loss": 2.3879, + "step": 19521 + }, + { + "epoch": 1.5754983455734002, + "grad_norm": 0.7046779990196228, + "learning_rate": 2.8936640700523153e-07, + "loss": 2.4276, + "step": 19522 + }, + { + "epoch": 1.575579049309983, + "grad_norm": 0.7244623303413391, + "learning_rate": 2.881675384507565e-07, + "loss": 2.436, + "step": 19523 + }, + { + "epoch": 1.575659753046566, + "grad_norm": 0.677969217300415, + "learning_rate": 2.869711549758014e-07, + "loss": 2.3511, + "step": 19524 + }, + { + "epoch": 1.5757404567831492, + "grad_norm": 0.6743468642234802, + "learning_rate": 2.8577725661017576e-07, + "loss": 2.3664, + "step": 19525 + }, + { + "epoch": 1.575821160519732, + "grad_norm": 0.671424388885498, + "learning_rate": 2.845858433836335e-07, + "loss": 2.3577, + "step": 19526 + }, + { + "epoch": 1.575901864256315, + "grad_norm": 0.7217385768890381, + "learning_rate": 2.83396915325862e-07, + "loss": 2.3871, + "step": 19527 + }, + { + "epoch": 1.575982567992898, + "grad_norm": 0.6964769959449768, + "learning_rate": 2.822104724665153e-07, + "loss": 2.3994, + "step": 19528 + }, + { + "epoch": 1.5760632717294811, + "grad_norm": 0.6757654547691345, + "learning_rate": 2.8102651483513656e-07, + "loss": 2.4076, + "step": 19529 + }, + { + "epoch": 1.576143975466064, + "grad_norm": 0.7008597254753113, + "learning_rate": 2.798450424612464e-07, + "loss": 2.4416, + "step": 19530 + }, + { + "epoch": 1.576224679202647, + "grad_norm": 0.7653341889381409, + "learning_rate": 2.7866605537429923e-07, + "loss": 2.4954, + "step": 19531 + }, + { + "epoch": 1.5763053829392302, + "grad_norm": 0.7089917659759521, + "learning_rate": 2.7748955360366037e-07, + "loss": 2.4125, + "step": 19532 + }, + { + "epoch": 1.576386086675813, + "grad_norm": 0.737715482711792, + "learning_rate": 2.763155371786619e-07, + "loss": 2.3948, + "step": 19533 + }, + { + "epoch": 1.5764667904123961, + "grad_norm": 0.7243852019309998, + "learning_rate": 2.7514400612855815e-07, + "loss": 2.3866, + "step": 19534 + }, + { + "epoch": 1.5765474941489792, + "grad_norm": 0.6988227367401123, + "learning_rate": 2.7397496048255923e-07, + "loss": 2.4224, + "step": 19535 + }, + { + "epoch": 1.576628197885562, + "grad_norm": 0.6965763568878174, + "learning_rate": 2.7280840026977506e-07, + "loss": 2.4266, + "step": 19536 + }, + { + "epoch": 1.576708901622145, + "grad_norm": 0.7066041827201843, + "learning_rate": 2.7164432551930466e-07, + "loss": 2.4081, + "step": 19537 + }, + { + "epoch": 1.5767896053587283, + "grad_norm": 0.811836838722229, + "learning_rate": 2.7048273626014695e-07, + "loss": 2.3538, + "step": 19538 + }, + { + "epoch": 1.5768703090953111, + "grad_norm": 0.7499393224716187, + "learning_rate": 2.693236325212567e-07, + "loss": 2.4264, + "step": 19539 + }, + { + "epoch": 1.576951012831894, + "grad_norm": 0.6594284772872925, + "learning_rate": 2.681670143315218e-07, + "loss": 2.4291, + "step": 19540 + }, + { + "epoch": 1.577031716568477, + "grad_norm": 0.7405110001564026, + "learning_rate": 2.670128817197637e-07, + "loss": 2.4041, + "step": 19541 + }, + { + "epoch": 1.5771124203050602, + "grad_norm": 0.7367891669273376, + "learning_rate": 2.658612347147482e-07, + "loss": 2.4091, + "step": 19542 + }, + { + "epoch": 1.577193124041643, + "grad_norm": 0.7289342284202576, + "learning_rate": 2.6471207334517466e-07, + "loss": 2.3708, + "step": 19543 + }, + { + "epoch": 1.5772738277782261, + "grad_norm": 0.7146711349487305, + "learning_rate": 2.635653976396979e-07, + "loss": 2.4101, + "step": 19544 + }, + { + "epoch": 1.5773545315148092, + "grad_norm": 0.6830505132675171, + "learning_rate": 2.624212076268839e-07, + "loss": 2.3988, + "step": 19545 + }, + { + "epoch": 1.577435235251392, + "grad_norm": 0.7075647711753845, + "learning_rate": 2.612795033352433e-07, + "loss": 2.4166, + "step": 19546 + }, + { + "epoch": 1.5775159389879752, + "grad_norm": 0.7065207958221436, + "learning_rate": 2.6014028479324214e-07, + "loss": 2.4092, + "step": 19547 + }, + { + "epoch": 1.5775966427245582, + "grad_norm": 0.6712577939033508, + "learning_rate": 2.5900355202925773e-07, + "loss": 2.38, + "step": 19548 + }, + { + "epoch": 1.577677346461141, + "grad_norm": 0.6984275579452515, + "learning_rate": 2.578693050716452e-07, + "loss": 2.3574, + "step": 19549 + }, + { + "epoch": 1.5777580501977242, + "grad_norm": 0.6909767985343933, + "learning_rate": 2.5673754394864857e-07, + "loss": 2.3682, + "step": 19550 + }, + { + "epoch": 1.5778387539343073, + "grad_norm": 0.6264119744300842, + "learning_rate": 2.5560826868847866e-07, + "loss": 2.3966, + "step": 19551 + }, + { + "epoch": 1.5779194576708901, + "grad_norm": 0.736967146396637, + "learning_rate": 2.544814793193018e-07, + "loss": 2.3294, + "step": 19552 + }, + { + "epoch": 1.578000161407473, + "grad_norm": 0.7749661803245544, + "learning_rate": 2.533571758691733e-07, + "loss": 2.3475, + "step": 19553 + }, + { + "epoch": 1.5780808651440563, + "grad_norm": 0.6409062743186951, + "learning_rate": 2.522353583661263e-07, + "loss": 2.387, + "step": 19554 + }, + { + "epoch": 1.5781615688806392, + "grad_norm": 0.6904575824737549, + "learning_rate": 2.511160268381163e-07, + "loss": 2.4217, + "step": 19555 + }, + { + "epoch": 1.578242272617222, + "grad_norm": 0.681140124797821, + "learning_rate": 2.4999918131304314e-07, + "loss": 2.4251, + "step": 19556 + }, + { + "epoch": 1.5783229763538051, + "grad_norm": 0.750292956829071, + "learning_rate": 2.4888482181874004e-07, + "loss": 2.489, + "step": 19557 + }, + { + "epoch": 1.5784036800903882, + "grad_norm": 0.6662226319313049, + "learning_rate": 2.477729483829738e-07, + "loss": 2.3494, + "step": 19558 + }, + { + "epoch": 1.578484383826971, + "grad_norm": 0.6624183058738708, + "learning_rate": 2.4666356103346666e-07, + "loss": 2.379, + "step": 19559 + }, + { + "epoch": 1.5785650875635542, + "grad_norm": 0.662188708782196, + "learning_rate": 2.4555665979785203e-07, + "loss": 2.4224, + "step": 19560 + }, + { + "epoch": 1.5786457913001373, + "grad_norm": 0.6776434183120728, + "learning_rate": 2.444522447037301e-07, + "loss": 2.3909, + "step": 19561 + }, + { + "epoch": 1.5787264950367201, + "grad_norm": 0.7576977610588074, + "learning_rate": 2.4335031577862323e-07, + "loss": 2.3842, + "step": 19562 + }, + { + "epoch": 1.5788071987733032, + "grad_norm": 0.7654067277908325, + "learning_rate": 2.422508730499984e-07, + "loss": 2.4492, + "step": 19563 + }, + { + "epoch": 1.5788879025098863, + "grad_norm": 0.6901730298995972, + "learning_rate": 2.4115391654524477e-07, + "loss": 2.4851, + "step": 19564 + }, + { + "epoch": 1.5789686062464692, + "grad_norm": 0.6742538809776306, + "learning_rate": 2.400594462917183e-07, + "loss": 2.4569, + "step": 19565 + }, + { + "epoch": 1.5790493099830523, + "grad_norm": 0.6856924891471863, + "learning_rate": 2.389674623166749e-07, + "loss": 2.4009, + "step": 19566 + }, + { + "epoch": 1.5791300137196354, + "grad_norm": 0.701468288898468, + "learning_rate": 2.3787796464733724e-07, + "loss": 2.3977, + "step": 19567 + }, + { + "epoch": 1.5792107174562182, + "grad_norm": 0.6824967861175537, + "learning_rate": 2.3679095331087253e-07, + "loss": 2.4038, + "step": 19568 + }, + { + "epoch": 1.579291421192801, + "grad_norm": 0.7547240257263184, + "learning_rate": 2.3570642833435908e-07, + "loss": 2.4731, + "step": 19569 + }, + { + "epoch": 1.5793721249293844, + "grad_norm": 0.7479576468467712, + "learning_rate": 2.3462438974481971e-07, + "loss": 2.4525, + "step": 19570 + }, + { + "epoch": 1.5794528286659673, + "grad_norm": 0.6773137450218201, + "learning_rate": 2.3354483756923283e-07, + "loss": 2.4292, + "step": 19571 + }, + { + "epoch": 1.5795335324025501, + "grad_norm": 0.6966623067855835, + "learning_rate": 2.324677718344992e-07, + "loss": 2.3735, + "step": 19572 + }, + { + "epoch": 1.5796142361391332, + "grad_norm": 0.6873766779899597, + "learning_rate": 2.3139319256747504e-07, + "loss": 2.4378, + "step": 19573 + }, + { + "epoch": 1.5796949398757163, + "grad_norm": 0.7327106595039368, + "learning_rate": 2.303210997949168e-07, + "loss": 2.4593, + "step": 19574 + }, + { + "epoch": 1.5797756436122992, + "grad_norm": 0.6556729674339294, + "learning_rate": 2.2925149354356966e-07, + "loss": 2.4909, + "step": 19575 + }, + { + "epoch": 1.5798563473488823, + "grad_norm": 0.7356710433959961, + "learning_rate": 2.2818437384006796e-07, + "loss": 2.4006, + "step": 19576 + }, + { + "epoch": 1.5799370510854653, + "grad_norm": 0.7129979133605957, + "learning_rate": 2.271197407110237e-07, + "loss": 2.4479, + "step": 19577 + }, + { + "epoch": 1.5800177548220482, + "grad_norm": 0.6787149906158447, + "learning_rate": 2.2605759418296013e-07, + "loss": 2.4006, + "step": 19578 + }, + { + "epoch": 1.5800984585586313, + "grad_norm": 0.6970722079277039, + "learning_rate": 2.2499793428235604e-07, + "loss": 2.3607, + "step": 19579 + }, + { + "epoch": 1.5801791622952144, + "grad_norm": 0.7457042336463928, + "learning_rate": 2.2394076103561256e-07, + "loss": 2.407, + "step": 19580 + }, + { + "epoch": 1.5802598660317972, + "grad_norm": 0.675424337387085, + "learning_rate": 2.2288607446908637e-07, + "loss": 2.3951, + "step": 19581 + }, + { + "epoch": 1.58034056976838, + "grad_norm": 0.6848486661911011, + "learning_rate": 2.2183387460906758e-07, + "loss": 2.4268, + "step": 19582 + }, + { + "epoch": 1.5804212735049634, + "grad_norm": 0.7659448385238647, + "learning_rate": 2.207841614817574e-07, + "loss": 2.3834, + "step": 19583 + }, + { + "epoch": 1.5805019772415463, + "grad_norm": 0.6831560730934143, + "learning_rate": 2.1973693511334604e-07, + "loss": 2.4377, + "step": 19584 + }, + { + "epoch": 1.5805826809781292, + "grad_norm": 0.674228847026825, + "learning_rate": 2.186921955299015e-07, + "loss": 2.3551, + "step": 19585 + }, + { + "epoch": 1.5806633847147122, + "grad_norm": 0.6938178539276123, + "learning_rate": 2.1764994275749185e-07, + "loss": 2.4079, + "step": 19586 + }, + { + "epoch": 1.5807440884512953, + "grad_norm": 0.6617661714553833, + "learning_rate": 2.1661017682207407e-07, + "loss": 2.3951, + "step": 19587 + }, + { + "epoch": 1.5808247921878782, + "grad_norm": 0.7516794204711914, + "learning_rate": 2.155728977495719e-07, + "loss": 2.4094, + "step": 19588 + }, + { + "epoch": 1.5809054959244613, + "grad_norm": 0.6833170056343079, + "learning_rate": 2.1453810556583132e-07, + "loss": 2.3941, + "step": 19589 + }, + { + "epoch": 1.5809861996610444, + "grad_norm": 0.6640968322753906, + "learning_rate": 2.135058002966317e-07, + "loss": 2.4015, + "step": 19590 + }, + { + "epoch": 1.5810669033976272, + "grad_norm": 0.6701916456222534, + "learning_rate": 2.1247598196771911e-07, + "loss": 2.4166, + "step": 19591 + }, + { + "epoch": 1.5811476071342103, + "grad_norm": 0.7246288657188416, + "learning_rate": 2.1144865060475082e-07, + "loss": 2.3837, + "step": 19592 + }, + { + "epoch": 1.5812283108707934, + "grad_norm": 0.7187030911445618, + "learning_rate": 2.1042380623333967e-07, + "loss": 2.4908, + "step": 19593 + }, + { + "epoch": 1.5813090146073763, + "grad_norm": 0.7366796731948853, + "learning_rate": 2.094014488790097e-07, + "loss": 2.4194, + "step": 19594 + }, + { + "epoch": 1.5813897183439594, + "grad_norm": 0.7130455374717712, + "learning_rate": 2.083815785672627e-07, + "loss": 2.4128, + "step": 19595 + }, + { + "epoch": 1.5814704220805424, + "grad_norm": 0.7512035369873047, + "learning_rate": 2.073641953235006e-07, + "loss": 2.4157, + "step": 19596 + }, + { + "epoch": 1.5815511258171253, + "grad_norm": 0.7330604791641235, + "learning_rate": 2.0634929917306978e-07, + "loss": 2.3879, + "step": 19597 + }, + { + "epoch": 1.5816318295537082, + "grad_norm": 0.6850903630256653, + "learning_rate": 2.0533689014129442e-07, + "loss": 2.3769, + "step": 19598 + }, + { + "epoch": 1.5817125332902915, + "grad_norm": 0.6630103588104248, + "learning_rate": 2.043269682533877e-07, + "loss": 2.3661, + "step": 19599 + }, + { + "epoch": 1.5817932370268744, + "grad_norm": 0.7248691916465759, + "learning_rate": 2.033195335345184e-07, + "loss": 2.4059, + "step": 19600 + }, + { + "epoch": 1.5818739407634572, + "grad_norm": 0.6642807722091675, + "learning_rate": 2.0231458600978858e-07, + "loss": 2.4022, + "step": 19601 + }, + { + "epoch": 1.5819546445000403, + "grad_norm": 0.6879783868789673, + "learning_rate": 2.0131212570426716e-07, + "loss": 2.3774, + "step": 19602 + }, + { + "epoch": 1.5820353482366234, + "grad_norm": 0.7324118614196777, + "learning_rate": 2.0031215264291193e-07, + "loss": 2.4126, + "step": 19603 + }, + { + "epoch": 1.5821160519732063, + "grad_norm": 0.6727913022041321, + "learning_rate": 1.9931466685065847e-07, + "loss": 2.4062, + "step": 19604 + }, + { + "epoch": 1.5821967557097893, + "grad_norm": 0.7737346887588501, + "learning_rate": 1.983196683523758e-07, + "loss": 2.4138, + "step": 19605 + }, + { + "epoch": 1.5822774594463724, + "grad_norm": 0.6715213656425476, + "learning_rate": 1.973271571728441e-07, + "loss": 2.4082, + "step": 19606 + }, + { + "epoch": 1.5823581631829553, + "grad_norm": 0.7665833830833435, + "learning_rate": 1.9633713333681026e-07, + "loss": 2.3876, + "step": 19607 + }, + { + "epoch": 1.5824388669195384, + "grad_norm": 0.7304366230964661, + "learning_rate": 1.953495968689434e-07, + "loss": 2.4405, + "step": 19608 + }, + { + "epoch": 1.5825195706561215, + "grad_norm": 0.6973900198936462, + "learning_rate": 1.9436454779385715e-07, + "loss": 2.4172, + "step": 19609 + }, + { + "epoch": 1.5826002743927043, + "grad_norm": 0.6609316468238831, + "learning_rate": 1.933819861360875e-07, + "loss": 2.4235, + "step": 19610 + }, + { + "epoch": 1.5826809781292874, + "grad_norm": 0.6351159811019897, + "learning_rate": 1.9240191192014812e-07, + "loss": 2.3914, + "step": 19611 + }, + { + "epoch": 1.5827616818658705, + "grad_norm": 0.7079761028289795, + "learning_rate": 1.9142432517045283e-07, + "loss": 2.4148, + "step": 19612 + }, + { + "epoch": 1.5828423856024534, + "grad_norm": 0.7295036911964417, + "learning_rate": 1.9044922591135995e-07, + "loss": 2.4108, + "step": 19613 + }, + { + "epoch": 1.5829230893390362, + "grad_norm": 0.707464873790741, + "learning_rate": 1.894766141671833e-07, + "loss": 2.4361, + "step": 19614 + }, + { + "epoch": 1.5830037930756196, + "grad_norm": 0.692263126373291, + "learning_rate": 1.8850648996215913e-07, + "loss": 2.3676, + "step": 19615 + }, + { + "epoch": 1.5830844968122024, + "grad_norm": 0.6685823202133179, + "learning_rate": 1.8753885332045696e-07, + "loss": 2.4127, + "step": 19616 + }, + { + "epoch": 1.5831652005487853, + "grad_norm": 0.7150568962097168, + "learning_rate": 1.8657370426620191e-07, + "loss": 2.4514, + "step": 19617 + }, + { + "epoch": 1.5832459042853684, + "grad_norm": 0.7067859768867493, + "learning_rate": 1.8561104282344144e-07, + "loss": 2.4512, + "step": 19618 + }, + { + "epoch": 1.5833266080219515, + "grad_norm": 0.6928460001945496, + "learning_rate": 1.8465086901617857e-07, + "loss": 2.4167, + "step": 19619 + }, + { + "epoch": 1.5834073117585343, + "grad_norm": 0.6788341999053955, + "learning_rate": 1.836931828683275e-07, + "loss": 2.3735, + "step": 19620 + }, + { + "epoch": 1.5834880154951174, + "grad_norm": 0.6936364769935608, + "learning_rate": 1.827379844037691e-07, + "loss": 2.3681, + "step": 19621 + }, + { + "epoch": 1.5835687192317005, + "grad_norm": 0.6555269360542297, + "learning_rate": 1.817852736463066e-07, + "loss": 2.4058, + "step": 19622 + }, + { + "epoch": 1.5836494229682834, + "grad_norm": 0.6922528743743896, + "learning_rate": 1.8083505061967653e-07, + "loss": 2.3614, + "step": 19623 + }, + { + "epoch": 1.5837301267048665, + "grad_norm": 0.6790033578872681, + "learning_rate": 1.7988731534757108e-07, + "loss": 2.4374, + "step": 19624 + }, + { + "epoch": 1.5838108304414495, + "grad_norm": 0.7672970294952393, + "learning_rate": 1.7894206785360467e-07, + "loss": 2.4746, + "step": 19625 + }, + { + "epoch": 1.5838915341780324, + "grad_norm": 0.7184334993362427, + "learning_rate": 1.7799930816134735e-07, + "loss": 2.3736, + "step": 19626 + }, + { + "epoch": 1.5839722379146155, + "grad_norm": 0.7583862543106079, + "learning_rate": 1.7705903629426922e-07, + "loss": 2.3634, + "step": 19627 + }, + { + "epoch": 1.5840529416511986, + "grad_norm": 0.716771125793457, + "learning_rate": 1.7612125227582932e-07, + "loss": 2.396, + "step": 19628 + }, + { + "epoch": 1.5841336453877815, + "grad_norm": 0.7561047077178955, + "learning_rate": 1.751859561293867e-07, + "loss": 2.3666, + "step": 19629 + }, + { + "epoch": 1.5842143491243643, + "grad_norm": 0.721318781375885, + "learning_rate": 1.7425314787825609e-07, + "loss": 2.3836, + "step": 19630 + }, + { + "epoch": 1.5842950528609476, + "grad_norm": 0.6694928407669067, + "learning_rate": 1.733228275456744e-07, + "loss": 2.435, + "step": 19631 + }, + { + "epoch": 1.5843757565975305, + "grad_norm": 0.6873437166213989, + "learning_rate": 1.7239499515484536e-07, + "loss": 2.3934, + "step": 19632 + }, + { + "epoch": 1.5844564603341134, + "grad_norm": 0.7650922536849976, + "learning_rate": 1.7146965072888378e-07, + "loss": 2.4093, + "step": 19633 + }, + { + "epoch": 1.5845371640706964, + "grad_norm": 0.6611059904098511, + "learning_rate": 1.7054679429086007e-07, + "loss": 2.3429, + "step": 19634 + }, + { + "epoch": 1.5846178678072795, + "grad_norm": 0.6984875798225403, + "learning_rate": 1.6962642586376698e-07, + "loss": 2.4266, + "step": 19635 + }, + { + "epoch": 1.5846985715438624, + "grad_norm": 0.7174743413925171, + "learning_rate": 1.687085454705306e-07, + "loss": 2.4751, + "step": 19636 + }, + { + "epoch": 1.5847792752804455, + "grad_norm": 0.7151217460632324, + "learning_rate": 1.6779315313404376e-07, + "loss": 2.4593, + "step": 19637 + }, + { + "epoch": 1.5848599790170286, + "grad_norm": 0.7106527090072632, + "learning_rate": 1.668802488771215e-07, + "loss": 2.3759, + "step": 19638 + }, + { + "epoch": 1.5849406827536114, + "grad_norm": 0.693871796131134, + "learning_rate": 1.659698327225012e-07, + "loss": 2.4639, + "step": 19639 + }, + { + "epoch": 1.5850213864901945, + "grad_norm": 0.7400439381599426, + "learning_rate": 1.6506190469288695e-07, + "loss": 2.4036, + "step": 19640 + }, + { + "epoch": 1.5851020902267776, + "grad_norm": 0.665549635887146, + "learning_rate": 1.6415646481090508e-07, + "loss": 2.3489, + "step": 19641 + }, + { + "epoch": 1.5851827939633605, + "grad_norm": 0.7490979433059692, + "learning_rate": 1.632535130991042e-07, + "loss": 2.4464, + "step": 19642 + }, + { + "epoch": 1.5852634976999433, + "grad_norm": 0.7252774834632874, + "learning_rate": 1.6235304958001075e-07, + "loss": 2.3991, + "step": 19643 + }, + { + "epoch": 1.5853442014365267, + "grad_norm": 0.6748854517936707, + "learning_rate": 1.6145507427606232e-07, + "loss": 2.4315, + "step": 19644 + }, + { + "epoch": 1.5854249051731095, + "grad_norm": 0.6639416813850403, + "learning_rate": 1.6055958720962995e-07, + "loss": 2.4333, + "step": 19645 + }, + { + "epoch": 1.5855056089096924, + "grad_norm": 0.7251947522163391, + "learning_rate": 1.5966658840302906e-07, + "loss": 2.4187, + "step": 19646 + }, + { + "epoch": 1.5855863126462755, + "grad_norm": 0.7625366449356079, + "learning_rate": 1.5877607787853077e-07, + "loss": 2.4645, + "step": 19647 + }, + { + "epoch": 1.5856670163828586, + "grad_norm": 0.7019835710525513, + "learning_rate": 1.5788805565831732e-07, + "loss": 2.3942, + "step": 19648 + }, + { + "epoch": 1.5857477201194414, + "grad_norm": 0.673926055431366, + "learning_rate": 1.5700252176452657e-07, + "loss": 2.4484, + "step": 19649 + }, + { + "epoch": 1.5858284238560245, + "grad_norm": 0.7518368363380432, + "learning_rate": 1.5611947621921864e-07, + "loss": 2.4045, + "step": 19650 + }, + { + "epoch": 1.5859091275926076, + "grad_norm": 0.6897228956222534, + "learning_rate": 1.5523891904440922e-07, + "loss": 2.3706, + "step": 19651 + }, + { + "epoch": 1.5859898313291905, + "grad_norm": 0.6625255942344666, + "learning_rate": 1.5436085026204748e-07, + "loss": 2.3907, + "step": 19652 + }, + { + "epoch": 1.5860705350657736, + "grad_norm": 0.7099746465682983, + "learning_rate": 1.534852698940048e-07, + "loss": 2.3813, + "step": 19653 + }, + { + "epoch": 1.5861512388023566, + "grad_norm": 0.6962074041366577, + "learning_rate": 1.5261217796211923e-07, + "loss": 2.3744, + "step": 19654 + }, + { + "epoch": 1.5862319425389395, + "grad_norm": 0.715735137462616, + "learning_rate": 1.5174157448814008e-07, + "loss": 2.3937, + "step": 19655 + }, + { + "epoch": 1.5863126462755226, + "grad_norm": 0.6551645994186401, + "learning_rate": 1.5087345949376108e-07, + "loss": 2.3155, + "step": 19656 + }, + { + "epoch": 1.5863933500121057, + "grad_norm": 0.6945969462394714, + "learning_rate": 1.500078330006316e-07, + "loss": 2.4504, + "step": 19657 + }, + { + "epoch": 1.5864740537486886, + "grad_norm": 0.7456083297729492, + "learning_rate": 1.4914469503031214e-07, + "loss": 2.4163, + "step": 19658 + }, + { + "epoch": 1.5865547574852714, + "grad_norm": 0.7366841435432434, + "learning_rate": 1.4828404560431884e-07, + "loss": 2.4717, + "step": 19659 + }, + { + "epoch": 1.5866354612218547, + "grad_norm": 0.693014919757843, + "learning_rate": 1.4742588474410123e-07, + "loss": 2.4306, + "step": 19660 + }, + { + "epoch": 1.5867161649584376, + "grad_norm": 0.686577320098877, + "learning_rate": 1.4657021247105328e-07, + "loss": 2.4021, + "step": 19661 + }, + { + "epoch": 1.5867968686950205, + "grad_norm": 0.6772071719169617, + "learning_rate": 1.457170288064913e-07, + "loss": 2.3534, + "step": 19662 + }, + { + "epoch": 1.5868775724316035, + "grad_norm": 0.7997143268585205, + "learning_rate": 1.4486633377168713e-07, + "loss": 2.3741, + "step": 19663 + }, + { + "epoch": 1.5869582761681866, + "grad_norm": 0.7072343826293945, + "learning_rate": 1.4401812738782383e-07, + "loss": 2.4005, + "step": 19664 + }, + { + "epoch": 1.5870389799047695, + "grad_norm": 0.7292042374610901, + "learning_rate": 1.4317240967607338e-07, + "loss": 2.4108, + "step": 19665 + }, + { + "epoch": 1.5871196836413526, + "grad_norm": 0.6923854947090149, + "learning_rate": 1.4232918065748558e-07, + "loss": 2.425, + "step": 19666 + }, + { + "epoch": 1.5872003873779357, + "grad_norm": 0.7188506722450256, + "learning_rate": 1.4148844035308805e-07, + "loss": 2.4278, + "step": 19667 + }, + { + "epoch": 1.5872810911145185, + "grad_norm": 0.6729195713996887, + "learning_rate": 1.4065018878383074e-07, + "loss": 2.4572, + "step": 19668 + }, + { + "epoch": 1.5873617948511016, + "grad_norm": 0.6851118803024292, + "learning_rate": 1.398144259705969e-07, + "loss": 2.4157, + "step": 19669 + }, + { + "epoch": 1.5874424985876847, + "grad_norm": 0.6716967225074768, + "learning_rate": 1.3898115193423656e-07, + "loss": 2.3568, + "step": 19670 + }, + { + "epoch": 1.5875232023242676, + "grad_norm": 0.8128048181533813, + "learning_rate": 1.3815036669549974e-07, + "loss": 2.4371, + "step": 19671 + }, + { + "epoch": 1.5876039060608507, + "grad_norm": 0.7035068869590759, + "learning_rate": 1.3732207027510323e-07, + "loss": 2.3997, + "step": 19672 + }, + { + "epoch": 1.5876846097974338, + "grad_norm": 0.7085563540458679, + "learning_rate": 1.36496262693675e-07, + "loss": 2.4039, + "step": 19673 + }, + { + "epoch": 1.5877653135340166, + "grad_norm": 0.6786942481994629, + "learning_rate": 1.3567294397180964e-07, + "loss": 2.4101, + "step": 19674 + }, + { + "epoch": 1.5878460172705995, + "grad_norm": 0.6832700967788696, + "learning_rate": 1.3485211413002407e-07, + "loss": 2.3382, + "step": 19675 + }, + { + "epoch": 1.5879267210071828, + "grad_norm": 0.7460437417030334, + "learning_rate": 1.3403377318877973e-07, + "loss": 2.414, + "step": 19676 + }, + { + "epoch": 1.5880074247437657, + "grad_norm": 0.7320219278335571, + "learning_rate": 1.3321792116846032e-07, + "loss": 2.5066, + "step": 19677 + }, + { + "epoch": 1.5880881284803485, + "grad_norm": 0.6574358940124512, + "learning_rate": 1.324045580894051e-07, + "loss": 2.3876, + "step": 19678 + }, + { + "epoch": 1.5881688322169316, + "grad_norm": 0.7185317277908325, + "learning_rate": 1.3159368397188676e-07, + "loss": 2.4454, + "step": 19679 + }, + { + "epoch": 1.5882495359535147, + "grad_norm": 0.7073564529418945, + "learning_rate": 1.3078529883611134e-07, + "loss": 2.4151, + "step": 19680 + }, + { + "epoch": 1.5883302396900976, + "grad_norm": 0.7120694518089294, + "learning_rate": 1.2997940270221832e-07, + "loss": 2.3603, + "step": 19681 + }, + { + "epoch": 1.5884109434266807, + "grad_norm": 0.666560709476471, + "learning_rate": 1.291759955903138e-07, + "loss": 2.3887, + "step": 19682 + }, + { + "epoch": 1.5884916471632637, + "grad_norm": 0.6261511445045471, + "learning_rate": 1.2837507752040402e-07, + "loss": 2.3984, + "step": 19683 + }, + { + "epoch": 1.5885723508998466, + "grad_norm": 0.637839138507843, + "learning_rate": 1.2757664851245078e-07, + "loss": 2.3901, + "step": 19684 + }, + { + "epoch": 1.5886530546364297, + "grad_norm": 0.6959593892097473, + "learning_rate": 1.2678070858636039e-07, + "loss": 2.4234, + "step": 19685 + }, + { + "epoch": 1.5887337583730128, + "grad_norm": 0.6666539311408997, + "learning_rate": 1.259872577619614e-07, + "loss": 2.3921, + "step": 19686 + }, + { + "epoch": 1.5888144621095956, + "grad_norm": 0.7251231670379639, + "learning_rate": 1.2519629605903803e-07, + "loss": 2.4364, + "step": 19687 + }, + { + "epoch": 1.5888951658461785, + "grad_norm": 0.6676712036132812, + "learning_rate": 1.2440782349729673e-07, + "loss": 2.395, + "step": 19688 + }, + { + "epoch": 1.5889758695827618, + "grad_norm": 0.7133972644805908, + "learning_rate": 1.2362184009638845e-07, + "loss": 2.4477, + "step": 19689 + }, + { + "epoch": 1.5890565733193447, + "grad_norm": 0.7005056142807007, + "learning_rate": 1.2283834587590858e-07, + "loss": 2.369, + "step": 19690 + }, + { + "epoch": 1.5891372770559276, + "grad_norm": 0.6613330841064453, + "learning_rate": 1.220573408553749e-07, + "loss": 2.3645, + "step": 19691 + }, + { + "epoch": 1.5892179807925106, + "grad_norm": 0.6670987606048584, + "learning_rate": 1.212788250542607e-07, + "loss": 2.4223, + "step": 19692 + }, + { + "epoch": 1.5892986845290937, + "grad_norm": 0.6678892970085144, + "learning_rate": 1.2050279849195045e-07, + "loss": 2.3824, + "step": 19693 + }, + { + "epoch": 1.5893793882656766, + "grad_norm": 0.781054675579071, + "learning_rate": 1.1972926118780647e-07, + "loss": 2.3699, + "step": 19694 + }, + { + "epoch": 1.5894600920022597, + "grad_norm": 0.6755295395851135, + "learning_rate": 1.1895821316110223e-07, + "loss": 2.4048, + "step": 19695 + }, + { + "epoch": 1.5895407957388428, + "grad_norm": 0.6501383185386658, + "learning_rate": 1.1818965443105568e-07, + "loss": 2.4089, + "step": 19696 + }, + { + "epoch": 1.5896214994754256, + "grad_norm": 0.6801205277442932, + "learning_rate": 1.1742358501680706e-07, + "loss": 2.3916, + "step": 19697 + }, + { + "epoch": 1.5897022032120087, + "grad_norm": 0.677849292755127, + "learning_rate": 1.1666000493746332e-07, + "loss": 2.466, + "step": 19698 + }, + { + "epoch": 1.5897829069485918, + "grad_norm": 0.7178460955619812, + "learning_rate": 1.1589891421204258e-07, + "loss": 2.449, + "step": 19699 + }, + { + "epoch": 1.5898636106851747, + "grad_norm": 0.7549518346786499, + "learning_rate": 1.1514031285952964e-07, + "loss": 2.452, + "step": 19700 + }, + { + "epoch": 1.5899443144217578, + "grad_norm": 0.6895601749420166, + "learning_rate": 1.1438420089880941e-07, + "loss": 2.4449, + "step": 19701 + }, + { + "epoch": 1.5900250181583409, + "grad_norm": 0.6945269107818604, + "learning_rate": 1.1363057834875568e-07, + "loss": 2.3642, + "step": 19702 + }, + { + "epoch": 1.5901057218949237, + "grad_norm": 0.7265799045562744, + "learning_rate": 1.1287944522812011e-07, + "loss": 2.4142, + "step": 19703 + }, + { + "epoch": 1.5901864256315066, + "grad_norm": 0.7272506356239319, + "learning_rate": 1.1213080155564326e-07, + "loss": 2.4715, + "step": 19704 + }, + { + "epoch": 1.59026712936809, + "grad_norm": 0.7248110175132751, + "learning_rate": 1.1138464734996579e-07, + "loss": 2.4022, + "step": 19705 + }, + { + "epoch": 1.5903478331046728, + "grad_norm": 0.6912270784378052, + "learning_rate": 1.1064098262970612e-07, + "loss": 2.3729, + "step": 19706 + }, + { + "epoch": 1.5904285368412556, + "grad_norm": 0.6900086998939514, + "learning_rate": 1.098998074133828e-07, + "loss": 2.4207, + "step": 19707 + }, + { + "epoch": 1.5905092405778387, + "grad_norm": 0.6722525358200073, + "learning_rate": 1.091611217194699e-07, + "loss": 2.4129, + "step": 19708 + }, + { + "epoch": 1.5905899443144218, + "grad_norm": 0.6852079033851624, + "learning_rate": 1.0842492556638606e-07, + "loss": 2.47, + "step": 19709 + }, + { + "epoch": 1.5906706480510047, + "grad_norm": 0.6785573363304138, + "learning_rate": 1.0769121897246103e-07, + "loss": 2.383, + "step": 19710 + }, + { + "epoch": 1.5907513517875878, + "grad_norm": 0.6966723799705505, + "learning_rate": 1.0696000195600242e-07, + "loss": 2.388, + "step": 19711 + }, + { + "epoch": 1.5908320555241708, + "grad_norm": 0.6843985915184021, + "learning_rate": 1.0623127453521787e-07, + "loss": 2.4639, + "step": 19712 + }, + { + "epoch": 1.5909127592607537, + "grad_norm": 0.749025821685791, + "learning_rate": 1.0550503672828171e-07, + "loss": 2.4147, + "step": 19713 + }, + { + "epoch": 1.5909934629973368, + "grad_norm": 0.671472430229187, + "learning_rate": 1.0478128855327952e-07, + "loss": 2.423, + "step": 19714 + }, + { + "epoch": 1.5910741667339199, + "grad_norm": 0.7129948735237122, + "learning_rate": 1.0406003002825237e-07, + "loss": 2.3335, + "step": 19715 + }, + { + "epoch": 1.5911548704705027, + "grad_norm": 0.7866218686103821, + "learning_rate": 1.0334126117118592e-07, + "loss": 2.4536, + "step": 19716 + }, + { + "epoch": 1.5912355742070858, + "grad_norm": 0.65444016456604, + "learning_rate": 1.0262498199998804e-07, + "loss": 2.3961, + "step": 19717 + }, + { + "epoch": 1.591316277943669, + "grad_norm": 0.7422659397125244, + "learning_rate": 1.0191119253251114e-07, + "loss": 2.4429, + "step": 19718 + }, + { + "epoch": 1.5913969816802518, + "grad_norm": 0.6993165612220764, + "learning_rate": 1.0119989278654096e-07, + "loss": 2.3837, + "step": 19719 + }, + { + "epoch": 1.5914776854168347, + "grad_norm": 0.7320719957351685, + "learning_rate": 1.0049108277980778e-07, + "loss": 2.4295, + "step": 19720 + }, + { + "epoch": 1.591558389153418, + "grad_norm": 0.6608186364173889, + "learning_rate": 9.978476252998636e-08, + "loss": 2.3268, + "step": 19721 + }, + { + "epoch": 1.5916390928900008, + "grad_norm": 0.7910242676734924, + "learning_rate": 9.908093205465152e-08, + "loss": 2.4129, + "step": 19722 + }, + { + "epoch": 1.5917197966265837, + "grad_norm": 0.7058143615722656, + "learning_rate": 9.837959137137809e-08, + "loss": 2.3774, + "step": 19723 + }, + { + "epoch": 1.5918005003631668, + "grad_norm": 0.6841233968734741, + "learning_rate": 9.768074049762988e-08, + "loss": 2.4198, + "step": 19724 + }, + { + "epoch": 1.5918812040997499, + "grad_norm": 0.6511690616607666, + "learning_rate": 9.698437945081518e-08, + "loss": 2.3443, + "step": 19725 + }, + { + "epoch": 1.5919619078363327, + "grad_norm": 0.7132717967033386, + "learning_rate": 9.629050824830898e-08, + "loss": 2.4319, + "step": 19726 + }, + { + "epoch": 1.5920426115729158, + "grad_norm": 0.7754331231117249, + "learning_rate": 9.559912690738638e-08, + "loss": 2.4258, + "step": 19727 + }, + { + "epoch": 1.592123315309499, + "grad_norm": 0.7054601311683655, + "learning_rate": 9.49102354452891e-08, + "loss": 2.4351, + "step": 19728 + }, + { + "epoch": 1.5922040190460818, + "grad_norm": 0.6809507608413696, + "learning_rate": 9.422383387919231e-08, + "loss": 2.3725, + "step": 19729 + }, + { + "epoch": 1.5922847227826649, + "grad_norm": 0.6348769664764404, + "learning_rate": 9.353992222618236e-08, + "loss": 2.3682, + "step": 19730 + }, + { + "epoch": 1.592365426519248, + "grad_norm": 0.687200665473938, + "learning_rate": 9.285850050332335e-08, + "loss": 2.4013, + "step": 19731 + }, + { + "epoch": 1.5924461302558308, + "grad_norm": 0.6773887276649475, + "learning_rate": 9.21795687275795e-08, + "loss": 2.3799, + "step": 19732 + }, + { + "epoch": 1.592526833992414, + "grad_norm": 0.7059040665626526, + "learning_rate": 9.15031269158928e-08, + "loss": 2.3709, + "step": 19733 + }, + { + "epoch": 1.592607537728997, + "grad_norm": 0.690894365310669, + "learning_rate": 9.082917508510536e-08, + "loss": 2.4334, + "step": 19734 + }, + { + "epoch": 1.5926882414655799, + "grad_norm": 0.7484365105628967, + "learning_rate": 9.015771325202593e-08, + "loss": 2.4706, + "step": 19735 + }, + { + "epoch": 1.5927689452021627, + "grad_norm": 0.6529715657234192, + "learning_rate": 8.94887414333745e-08, + "loss": 2.4326, + "step": 19736 + }, + { + "epoch": 1.5928496489387458, + "grad_norm": 0.666780948638916, + "learning_rate": 8.882225964583767e-08, + "loss": 2.4062, + "step": 19737 + }, + { + "epoch": 1.592930352675329, + "grad_norm": 0.8028662204742432, + "learning_rate": 8.815826790602444e-08, + "loss": 2.3685, + "step": 19738 + }, + { + "epoch": 1.5930110564119118, + "grad_norm": 0.7198097109794617, + "learning_rate": 8.749676623047709e-08, + "loss": 2.3769, + "step": 19739 + }, + { + "epoch": 1.5930917601484949, + "grad_norm": 0.6754295825958252, + "learning_rate": 8.683775463568245e-08, + "loss": 2.4256, + "step": 19740 + }, + { + "epoch": 1.593172463885078, + "grad_norm": 0.6798133850097656, + "learning_rate": 8.61812331380718e-08, + "loss": 2.4007, + "step": 19741 + }, + { + "epoch": 1.5932531676216608, + "grad_norm": 0.6824161410331726, + "learning_rate": 8.552720175398765e-08, + "loss": 2.3722, + "step": 19742 + }, + { + "epoch": 1.593333871358244, + "grad_norm": 0.7088763117790222, + "learning_rate": 8.487566049976137e-08, + "loss": 2.38, + "step": 19743 + }, + { + "epoch": 1.593414575094827, + "grad_norm": 0.6472757458686829, + "learning_rate": 8.42266093916022e-08, + "loss": 2.3979, + "step": 19744 + }, + { + "epoch": 1.5934952788314098, + "grad_norm": 0.7028400897979736, + "learning_rate": 8.358004844570833e-08, + "loss": 2.4049, + "step": 19745 + }, + { + "epoch": 1.593575982567993, + "grad_norm": 0.6774114370346069, + "learning_rate": 8.293597767817795e-08, + "loss": 2.4047, + "step": 19746 + }, + { + "epoch": 1.593656686304576, + "grad_norm": 0.7507948875427246, + "learning_rate": 8.229439710506493e-08, + "loss": 2.3845, + "step": 19747 + }, + { + "epoch": 1.5937373900411589, + "grad_norm": 0.7092359662055969, + "learning_rate": 8.165530674237865e-08, + "loss": 2.3936, + "step": 19748 + }, + { + "epoch": 1.5938180937777418, + "grad_norm": 0.7118118405342102, + "learning_rate": 8.101870660601752e-08, + "loss": 2.44, + "step": 19749 + }, + { + "epoch": 1.593898797514325, + "grad_norm": 0.676643431186676, + "learning_rate": 8.038459671186882e-08, + "loss": 2.4159, + "step": 19750 + }, + { + "epoch": 1.593979501250908, + "grad_norm": 0.6885136961936951, + "learning_rate": 7.975297707571994e-08, + "loss": 2.4018, + "step": 19751 + }, + { + "epoch": 1.5940602049874908, + "grad_norm": 0.7201601266860962, + "learning_rate": 7.912384771332493e-08, + "loss": 2.4428, + "step": 19752 + }, + { + "epoch": 1.5941409087240739, + "grad_norm": 0.7055841684341431, + "learning_rate": 7.849720864036014e-08, + "loss": 2.4325, + "step": 19753 + }, + { + "epoch": 1.594221612460657, + "grad_norm": 0.6576136350631714, + "learning_rate": 7.787305987243532e-08, + "loss": 2.3689, + "step": 19754 + }, + { + "epoch": 1.5943023161972398, + "grad_norm": 0.6765410900115967, + "learning_rate": 7.725140142512688e-08, + "loss": 2.3832, + "step": 19755 + }, + { + "epoch": 1.594383019933823, + "grad_norm": 0.6923627257347107, + "learning_rate": 7.663223331390024e-08, + "loss": 2.4441, + "step": 19756 + }, + { + "epoch": 1.594463723670406, + "grad_norm": 0.6518635153770447, + "learning_rate": 7.60155555542097e-08, + "loss": 2.4416, + "step": 19757 + }, + { + "epoch": 1.5945444274069889, + "grad_norm": 0.6843993067741394, + "learning_rate": 7.540136816140963e-08, + "loss": 2.4035, + "step": 19758 + }, + { + "epoch": 1.594625131143572, + "grad_norm": 0.7025246024131775, + "learning_rate": 7.478967115082113e-08, + "loss": 2.4468, + "step": 19759 + }, + { + "epoch": 1.594705834880155, + "grad_norm": 0.6707848310470581, + "learning_rate": 7.418046453767646e-08, + "loss": 2.3669, + "step": 19760 + }, + { + "epoch": 1.594786538616738, + "grad_norm": 0.7015649080276489, + "learning_rate": 7.357374833716346e-08, + "loss": 2.3723, + "step": 19761 + }, + { + "epoch": 1.594867242353321, + "grad_norm": 0.6933848857879639, + "learning_rate": 7.296952256440337e-08, + "loss": 2.413, + "step": 19762 + }, + { + "epoch": 1.594947946089904, + "grad_norm": 0.73984295129776, + "learning_rate": 7.236778723445082e-08, + "loss": 2.3928, + "step": 19763 + }, + { + "epoch": 1.595028649826487, + "grad_norm": 0.8195617198944092, + "learning_rate": 7.1768542362316e-08, + "loss": 2.445, + "step": 19764 + }, + { + "epoch": 1.5951093535630698, + "grad_norm": 0.6890015602111816, + "learning_rate": 7.117178796292035e-08, + "loss": 2.4329, + "step": 19765 + }, + { + "epoch": 1.5951900572996531, + "grad_norm": 0.6562687754631042, + "learning_rate": 7.057752405112972e-08, + "loss": 2.4014, + "step": 19766 + }, + { + "epoch": 1.595270761036236, + "grad_norm": 0.7748126983642578, + "learning_rate": 6.99857506417767e-08, + "loss": 2.4012, + "step": 19767 + }, + { + "epoch": 1.5953514647728189, + "grad_norm": 0.7711277008056641, + "learning_rate": 6.939646774959396e-08, + "loss": 2.3535, + "step": 19768 + }, + { + "epoch": 1.595432168509402, + "grad_norm": 0.709509551525116, + "learning_rate": 6.880967538928085e-08, + "loss": 2.4027, + "step": 19769 + }, + { + "epoch": 1.595512872245985, + "grad_norm": 0.6528860330581665, + "learning_rate": 6.82253735754479e-08, + "loss": 2.3679, + "step": 19770 + }, + { + "epoch": 1.595593575982568, + "grad_norm": 0.6697279810905457, + "learning_rate": 6.764356232265013e-08, + "loss": 2.4203, + "step": 19771 + }, + { + "epoch": 1.595674279719151, + "grad_norm": 0.7303707599639893, + "learning_rate": 6.706424164542035e-08, + "loss": 2.3478, + "step": 19772 + }, + { + "epoch": 1.595754983455734, + "grad_norm": 0.7086525559425354, + "learning_rate": 6.648741155816929e-08, + "loss": 2.4677, + "step": 19773 + }, + { + "epoch": 1.595835687192317, + "grad_norm": 0.6876404881477356, + "learning_rate": 6.591307207527431e-08, + "loss": 2.3579, + "step": 19774 + }, + { + "epoch": 1.5959163909289, + "grad_norm": 0.6423486471176147, + "learning_rate": 6.534122321106839e-08, + "loss": 2.3597, + "step": 19775 + }, + { + "epoch": 1.5959970946654831, + "grad_norm": 0.6969172954559326, + "learning_rate": 6.47718649797846e-08, + "loss": 2.4367, + "step": 19776 + }, + { + "epoch": 1.596077798402066, + "grad_norm": 0.7247799038887024, + "learning_rate": 6.420499739561158e-08, + "loss": 2.4043, + "step": 19777 + }, + { + "epoch": 1.596158502138649, + "grad_norm": 0.6570133566856384, + "learning_rate": 6.36406204726936e-08, + "loss": 2.4044, + "step": 19778 + }, + { + "epoch": 1.5962392058752322, + "grad_norm": 0.696624219417572, + "learning_rate": 6.307873422508603e-08, + "loss": 2.389, + "step": 19779 + }, + { + "epoch": 1.596319909611815, + "grad_norm": 0.6321461200714111, + "learning_rate": 6.251933866679993e-08, + "loss": 2.4229, + "step": 19780 + }, + { + "epoch": 1.596400613348398, + "grad_norm": 0.700206458568573, + "learning_rate": 6.196243381175748e-08, + "loss": 2.366, + "step": 19781 + }, + { + "epoch": 1.5964813170849812, + "grad_norm": 0.678466260433197, + "learning_rate": 6.140801967386977e-08, + "loss": 2.3961, + "step": 19782 + }, + { + "epoch": 1.596562020821564, + "grad_norm": 0.6987680196762085, + "learning_rate": 6.085609626692579e-08, + "loss": 2.4302, + "step": 19783 + }, + { + "epoch": 1.596642724558147, + "grad_norm": 0.7178279757499695, + "learning_rate": 6.030666360469228e-08, + "loss": 2.4161, + "step": 19784 + }, + { + "epoch": 1.59672342829473, + "grad_norm": 0.6797524094581604, + "learning_rate": 5.975972170085831e-08, + "loss": 2.4289, + "step": 19785 + }, + { + "epoch": 1.596804132031313, + "grad_norm": 0.6914597749710083, + "learning_rate": 5.9215270569057405e-08, + "loss": 2.4263, + "step": 19786 + }, + { + "epoch": 1.596884835767896, + "grad_norm": 0.6576448678970337, + "learning_rate": 5.8673310222867595e-08, + "loss": 2.3702, + "step": 19787 + }, + { + "epoch": 1.596965539504479, + "grad_norm": 0.6253501772880554, + "learning_rate": 5.8133840675789195e-08, + "loss": 2.3987, + "step": 19788 + }, + { + "epoch": 1.5970462432410621, + "grad_norm": 0.7034394145011902, + "learning_rate": 5.759686194125591e-08, + "loss": 2.4215, + "step": 19789 + }, + { + "epoch": 1.597126946977645, + "grad_norm": 0.6561691761016846, + "learning_rate": 5.706237403266812e-08, + "loss": 2.3779, + "step": 19790 + }, + { + "epoch": 1.597207650714228, + "grad_norm": 0.718203604221344, + "learning_rate": 5.6530376963337405e-08, + "loss": 2.3534, + "step": 19791 + }, + { + "epoch": 1.5972883544508112, + "grad_norm": 0.6715928316116333, + "learning_rate": 5.600087074653093e-08, + "loss": 2.4229, + "step": 19792 + }, + { + "epoch": 1.597369058187394, + "grad_norm": 0.6797523498535156, + "learning_rate": 5.547385539542704e-08, + "loss": 2.3933, + "step": 19793 + }, + { + "epoch": 1.597449761923977, + "grad_norm": 0.6607221961021423, + "learning_rate": 5.494933092318189e-08, + "loss": 2.3821, + "step": 19794 + }, + { + "epoch": 1.5975304656605602, + "grad_norm": 0.7215580344200134, + "learning_rate": 5.442729734285168e-08, + "loss": 2.467, + "step": 19795 + }, + { + "epoch": 1.597611169397143, + "grad_norm": 0.7169375419616699, + "learning_rate": 5.3907754667459345e-08, + "loss": 2.366, + "step": 19796 + }, + { + "epoch": 1.597691873133726, + "grad_norm": 0.6919476389884949, + "learning_rate": 5.339070290993897e-08, + "loss": 2.399, + "step": 19797 + }, + { + "epoch": 1.597772576870309, + "grad_norm": 0.6628741025924683, + "learning_rate": 5.287614208320246e-08, + "loss": 2.4328, + "step": 19798 + }, + { + "epoch": 1.5978532806068921, + "grad_norm": 0.680820882320404, + "learning_rate": 5.2364072200039584e-08, + "loss": 2.4267, + "step": 19799 + }, + { + "epoch": 1.597933984343475, + "grad_norm": 0.7319220304489136, + "learning_rate": 5.18544932732401e-08, + "loss": 2.3948, + "step": 19800 + }, + { + "epoch": 1.598014688080058, + "grad_norm": 0.7617683410644531, + "learning_rate": 5.1347405315482765e-08, + "loss": 2.3755, + "step": 19801 + }, + { + "epoch": 1.5980953918166412, + "grad_norm": 0.7148268222808838, + "learning_rate": 5.084280833942412e-08, + "loss": 2.4107, + "step": 19802 + }, + { + "epoch": 1.598176095553224, + "grad_norm": 0.6958122253417969, + "learning_rate": 5.034070235763189e-08, + "loss": 2.3612, + "step": 19803 + }, + { + "epoch": 1.5982567992898071, + "grad_norm": 0.7199648022651672, + "learning_rate": 4.9841087382618276e-08, + "loss": 2.3829, + "step": 19804 + }, + { + "epoch": 1.5983375030263902, + "grad_norm": 0.6674304604530334, + "learning_rate": 4.934396342684e-08, + "loss": 2.3915, + "step": 19805 + }, + { + "epoch": 1.598418206762973, + "grad_norm": 0.710919976234436, + "learning_rate": 4.884933050268714e-08, + "loss": 2.3751, + "step": 19806 + }, + { + "epoch": 1.5984989104995562, + "grad_norm": 0.6998607516288757, + "learning_rate": 4.835718862248317e-08, + "loss": 2.3827, + "step": 19807 + }, + { + "epoch": 1.5985796142361393, + "grad_norm": 0.6749023199081421, + "learning_rate": 4.786753779848496e-08, + "loss": 2.3848, + "step": 19808 + }, + { + "epoch": 1.5986603179727221, + "grad_norm": 0.6873534917831421, + "learning_rate": 4.738037804291606e-08, + "loss": 2.3888, + "step": 19809 + }, + { + "epoch": 1.598741021709305, + "grad_norm": 0.7397543787956238, + "learning_rate": 4.689570936790011e-08, + "loss": 2.4239, + "step": 19810 + }, + { + "epoch": 1.5988217254458883, + "grad_norm": 0.677931547164917, + "learning_rate": 4.641353178552743e-08, + "loss": 2.3958, + "step": 19811 + }, + { + "epoch": 1.5989024291824712, + "grad_norm": 0.6794888377189636, + "learning_rate": 4.593384530781064e-08, + "loss": 2.4118, + "step": 19812 + }, + { + "epoch": 1.598983132919054, + "grad_norm": 0.7329216599464417, + "learning_rate": 4.545664994669574e-08, + "loss": 2.3594, + "step": 19813 + }, + { + "epoch": 1.5990638366556371, + "grad_norm": 0.708848774433136, + "learning_rate": 4.498194571409542e-08, + "loss": 2.3726, + "step": 19814 + }, + { + "epoch": 1.5991445403922202, + "grad_norm": 0.701494574546814, + "learning_rate": 4.450973262182245e-08, + "loss": 2.3946, + "step": 19815 + }, + { + "epoch": 1.599225244128803, + "grad_norm": 0.772100031375885, + "learning_rate": 4.404001068165631e-08, + "loss": 2.4029, + "step": 19816 + }, + { + "epoch": 1.5993059478653862, + "grad_norm": 0.7124289274215698, + "learning_rate": 4.3572779905309835e-08, + "loss": 2.4313, + "step": 19817 + }, + { + "epoch": 1.5993866516019692, + "grad_norm": 0.8094269633293152, + "learning_rate": 4.310804030440707e-08, + "loss": 2.4505, + "step": 19818 + }, + { + "epoch": 1.599467355338552, + "grad_norm": 0.7688892483711243, + "learning_rate": 4.264579189054985e-08, + "loss": 2.4265, + "step": 19819 + }, + { + "epoch": 1.5995480590751352, + "grad_norm": 0.6754326224327087, + "learning_rate": 4.218603467524007e-08, + "loss": 2.3887, + "step": 19820 + }, + { + "epoch": 1.5996287628117183, + "grad_norm": 0.6846565008163452, + "learning_rate": 4.1728768669957455e-08, + "loss": 2.4376, + "step": 19821 + }, + { + "epoch": 1.5997094665483012, + "grad_norm": 0.7059599161148071, + "learning_rate": 4.127399388608177e-08, + "loss": 2.4636, + "step": 19822 + }, + { + "epoch": 1.5997901702848842, + "grad_norm": 0.7053768634796143, + "learning_rate": 4.082171033494841e-08, + "loss": 2.4225, + "step": 19823 + }, + { + "epoch": 1.5998708740214673, + "grad_norm": 0.6539292931556702, + "learning_rate": 4.037191802783724e-08, + "loss": 2.3774, + "step": 19824 + }, + { + "epoch": 1.5999515777580502, + "grad_norm": 0.6501747369766235, + "learning_rate": 3.9924616975961505e-08, + "loss": 2.4072, + "step": 19825 + }, + { + "epoch": 1.600032281494633, + "grad_norm": 0.6677641272544861, + "learning_rate": 3.947980719045674e-08, + "loss": 2.3932, + "step": 19826 + }, + { + "epoch": 1.6001129852312164, + "grad_norm": 0.7477465867996216, + "learning_rate": 3.903748868241408e-08, + "loss": 2.4657, + "step": 19827 + }, + { + "epoch": 1.6001936889677992, + "grad_norm": 0.7863909602165222, + "learning_rate": 3.859766146285804e-08, + "loss": 2.4582, + "step": 19828 + }, + { + "epoch": 1.600274392704382, + "grad_norm": 0.7084606885910034, + "learning_rate": 3.8160325542757616e-08, + "loss": 2.4559, + "step": 19829 + }, + { + "epoch": 1.6003550964409652, + "grad_norm": 0.7032583951950073, + "learning_rate": 3.77254809330041e-08, + "loss": 2.4157, + "step": 19830 + }, + { + "epoch": 1.6004358001775483, + "grad_norm": 0.7207308411598206, + "learning_rate": 3.729312764442216e-08, + "loss": 2.3756, + "step": 19831 + }, + { + "epoch": 1.6005165039141311, + "grad_norm": 0.7856607437133789, + "learning_rate": 3.6863265687814284e-08, + "loss": 2.4033, + "step": 19832 + }, + { + "epoch": 1.6005972076507142, + "grad_norm": 0.7099567651748657, + "learning_rate": 3.6435895073871906e-08, + "loss": 2.5035, + "step": 19833 + }, + { + "epoch": 1.6006779113872973, + "grad_norm": 0.7611396908760071, + "learning_rate": 3.6011015813253166e-08, + "loss": 2.4207, + "step": 19834 + }, + { + "epoch": 1.6007586151238802, + "grad_norm": 0.6844400763511658, + "learning_rate": 3.55886279165607e-08, + "loss": 2.4022, + "step": 19835 + }, + { + "epoch": 1.6008393188604633, + "grad_norm": 0.6863864660263062, + "learning_rate": 3.516873139429722e-08, + "loss": 2.3927, + "step": 19836 + }, + { + "epoch": 1.6009200225970464, + "grad_norm": 0.6733689308166504, + "learning_rate": 3.475132625694322e-08, + "loss": 2.3953, + "step": 19837 + }, + { + "epoch": 1.6010007263336292, + "grad_norm": 0.7231999039649963, + "learning_rate": 3.43364125149015e-08, + "loss": 2.4221, + "step": 19838 + }, + { + "epoch": 1.601081430070212, + "grad_norm": 0.6673304438591003, + "learning_rate": 3.392399017849712e-08, + "loss": 2.3616, + "step": 19839 + }, + { + "epoch": 1.6011621338067954, + "grad_norm": 0.7725997567176819, + "learning_rate": 3.351405925803297e-08, + "loss": 2.4349, + "step": 19840 + }, + { + "epoch": 1.6012428375433783, + "grad_norm": 0.8134771585464478, + "learning_rate": 3.3106619763711985e-08, + "loss": 2.4511, + "step": 19841 + }, + { + "epoch": 1.6013235412799611, + "grad_norm": 0.7164833545684814, + "learning_rate": 3.270167170569272e-08, + "loss": 2.3485, + "step": 19842 + }, + { + "epoch": 1.6014042450165442, + "grad_norm": 0.6800829172134399, + "learning_rate": 3.229921509405598e-08, + "loss": 2.4109, + "step": 19843 + }, + { + "epoch": 1.6014849487531273, + "grad_norm": 0.7247059941291809, + "learning_rate": 3.18992499388493e-08, + "loss": 2.4349, + "step": 19844 + }, + { + "epoch": 1.6015656524897102, + "grad_norm": 0.6944572925567627, + "learning_rate": 3.1501776250020265e-08, + "loss": 2.4046, + "step": 19845 + }, + { + "epoch": 1.6016463562262933, + "grad_norm": 0.6986684203147888, + "learning_rate": 3.1106794037505384e-08, + "loss": 2.4338, + "step": 19846 + }, + { + "epoch": 1.6017270599628763, + "grad_norm": 0.7396299242973328, + "learning_rate": 3.071430331111902e-08, + "loss": 2.451, + "step": 19847 + }, + { + "epoch": 1.6018077636994592, + "grad_norm": 0.7358464002609253, + "learning_rate": 3.0324304080653344e-08, + "loss": 2.4081, + "step": 19848 + }, + { + "epoch": 1.6018884674360423, + "grad_norm": 0.6887704730033875, + "learning_rate": 2.993679635583391e-08, + "loss": 2.4256, + "step": 19849 + }, + { + "epoch": 1.6019691711726254, + "grad_norm": 0.6982260942459106, + "learning_rate": 2.955178014630855e-08, + "loss": 2.4146, + "step": 19850 + }, + { + "epoch": 1.6020498749092082, + "grad_norm": 0.7813087105751038, + "learning_rate": 2.91692554616807e-08, + "loss": 2.4343, + "step": 19851 + }, + { + "epoch": 1.6021305786457913, + "grad_norm": 0.7883115410804749, + "learning_rate": 2.8789222311487173e-08, + "loss": 2.3405, + "step": 19852 + }, + { + "epoch": 1.6022112823823744, + "grad_norm": 0.6593830585479736, + "learning_rate": 2.841168070518707e-08, + "loss": 2.4479, + "step": 19853 + }, + { + "epoch": 1.6022919861189573, + "grad_norm": 0.7203704714775085, + "learning_rate": 2.8036630652206187e-08, + "loss": 2.4445, + "step": 19854 + }, + { + "epoch": 1.6023726898555402, + "grad_norm": 0.6729990839958191, + "learning_rate": 2.766407216187039e-08, + "loss": 2.3589, + "step": 19855 + }, + { + "epoch": 1.6024533935921235, + "grad_norm": 0.699585497379303, + "learning_rate": 2.7294005243483357e-08, + "loss": 2.4108, + "step": 19856 + }, + { + "epoch": 1.6025340973287063, + "grad_norm": 0.6901966333389282, + "learning_rate": 2.692642990624883e-08, + "loss": 2.3353, + "step": 19857 + }, + { + "epoch": 1.6026148010652892, + "grad_norm": 0.7188943028450012, + "learning_rate": 2.6561346159348354e-08, + "loss": 2.4629, + "step": 19858 + }, + { + "epoch": 1.6026955048018723, + "grad_norm": 0.7001612782478333, + "learning_rate": 2.619875401187466e-08, + "loss": 2.3743, + "step": 19859 + }, + { + "epoch": 1.6027762085384554, + "grad_norm": 0.7024890184402466, + "learning_rate": 2.583865347286496e-08, + "loss": 2.3786, + "step": 19860 + }, + { + "epoch": 1.6028569122750382, + "grad_norm": 0.6670389771461487, + "learning_rate": 2.5481044551289858e-08, + "loss": 2.3996, + "step": 19861 + }, + { + "epoch": 1.6029376160116213, + "grad_norm": 0.7063748240470886, + "learning_rate": 2.5125927256053338e-08, + "loss": 2.4022, + "step": 19862 + }, + { + "epoch": 1.6030183197482044, + "grad_norm": 0.7233209609985352, + "learning_rate": 2.477330159602609e-08, + "loss": 2.5039, + "step": 19863 + }, + { + "epoch": 1.6030990234847873, + "grad_norm": 0.7208409905433655, + "learning_rate": 2.4423167579978868e-08, + "loss": 2.4297, + "step": 19864 + }, + { + "epoch": 1.6031797272213704, + "grad_norm": 0.6710116863250732, + "learning_rate": 2.407552521664913e-08, + "loss": 2.3989, + "step": 19865 + }, + { + "epoch": 1.6032604309579535, + "grad_norm": 0.6510246396064758, + "learning_rate": 2.3730374514696617e-08, + "loss": 2.4467, + "step": 19866 + }, + { + "epoch": 1.6033411346945363, + "grad_norm": 0.7394194006919861, + "learning_rate": 2.3387715482725558e-08, + "loss": 2.4619, + "step": 19867 + }, + { + "epoch": 1.6034218384311194, + "grad_norm": 0.7016871571540833, + "learning_rate": 2.3047548129273567e-08, + "loss": 2.4757, + "step": 19868 + }, + { + "epoch": 1.6035025421677025, + "grad_norm": 0.6663894653320312, + "learning_rate": 2.2709872462811644e-08, + "loss": 2.4247, + "step": 19869 + }, + { + "epoch": 1.6035832459042854, + "grad_norm": 0.6780986785888672, + "learning_rate": 2.237468849176638e-08, + "loss": 2.4089, + "step": 19870 + }, + { + "epoch": 1.6036639496408682, + "grad_norm": 0.6934168338775635, + "learning_rate": 2.2041996224497763e-08, + "loss": 2.3592, + "step": 19871 + }, + { + "epoch": 1.6037446533774515, + "grad_norm": 0.6614354848861694, + "learning_rate": 2.1711795669276946e-08, + "loss": 2.4332, + "step": 19872 + }, + { + "epoch": 1.6038253571140344, + "grad_norm": 0.6910732984542847, + "learning_rate": 2.138408683434179e-08, + "loss": 2.4563, + "step": 19873 + }, + { + "epoch": 1.6039060608506173, + "grad_norm": 0.7352861762046814, + "learning_rate": 2.105886972787463e-08, + "loss": 2.4558, + "step": 19874 + }, + { + "epoch": 1.6039867645872004, + "grad_norm": 0.7062209844589233, + "learning_rate": 2.0736144357957898e-08, + "loss": 2.4051, + "step": 19875 + }, + { + "epoch": 1.6040674683237834, + "grad_norm": 0.6468757390975952, + "learning_rate": 2.0415910732651812e-08, + "loss": 2.3635, + "step": 19876 + }, + { + "epoch": 1.6041481720603663, + "grad_norm": 0.7655599117279053, + "learning_rate": 2.009816885992777e-08, + "loss": 2.3981, + "step": 19877 + }, + { + "epoch": 1.6042288757969494, + "grad_norm": 0.6625165343284607, + "learning_rate": 1.978291874770166e-08, + "loss": 2.4005, + "step": 19878 + }, + { + "epoch": 1.6043095795335325, + "grad_norm": 0.630001962184906, + "learning_rate": 1.9470160403844974e-08, + "loss": 2.3878, + "step": 19879 + }, + { + "epoch": 1.6043902832701153, + "grad_norm": 0.6760852336883545, + "learning_rate": 1.9159893836140365e-08, + "loss": 2.3839, + "step": 19880 + }, + { + "epoch": 1.6044709870066984, + "grad_norm": 0.737600564956665, + "learning_rate": 1.8852119052314985e-08, + "loss": 2.3664, + "step": 19881 + }, + { + "epoch": 1.6045516907432815, + "grad_norm": 0.7165957093238831, + "learning_rate": 1.8546836060062687e-08, + "loss": 2.4542, + "step": 19882 + }, + { + "epoch": 1.6046323944798644, + "grad_norm": 0.6909278035163879, + "learning_rate": 1.824404486696629e-08, + "loss": 2.3794, + "step": 19883 + }, + { + "epoch": 1.6047130982164475, + "grad_norm": 0.6891295313835144, + "learning_rate": 1.7943745480586417e-08, + "loss": 2.4331, + "step": 19884 + }, + { + "epoch": 1.6047938019530306, + "grad_norm": 0.668829083442688, + "learning_rate": 1.764593790840596e-08, + "loss": 2.3972, + "step": 19885 + }, + { + "epoch": 1.6048745056896134, + "grad_norm": 0.6895432472229004, + "learning_rate": 1.7350622157841224e-08, + "loss": 2.3786, + "step": 19886 + }, + { + "epoch": 1.6049552094261963, + "grad_norm": 0.7383646368980408, + "learning_rate": 1.7057798236264076e-08, + "loss": 2.3376, + "step": 19887 + }, + { + "epoch": 1.6050359131627794, + "grad_norm": 0.6685279011726379, + "learning_rate": 1.676746615095759e-08, + "loss": 2.3596, + "step": 19888 + }, + { + "epoch": 1.6051166168993625, + "grad_norm": 0.7290921211242676, + "learning_rate": 1.6479625909171515e-08, + "loss": 2.4666, + "step": 19889 + }, + { + "epoch": 1.6051973206359453, + "grad_norm": 0.7422195672988892, + "learning_rate": 1.6194277518066793e-08, + "loss": 2.3733, + "step": 19890 + }, + { + "epoch": 1.6052780243725284, + "grad_norm": 0.650917649269104, + "learning_rate": 1.591142098475995e-08, + "loss": 2.3986, + "step": 19891 + }, + { + "epoch": 1.6053587281091115, + "grad_norm": 0.7405489087104797, + "learning_rate": 1.563105631631201e-08, + "loss": 2.4273, + "step": 19892 + }, + { + "epoch": 1.6054394318456944, + "grad_norm": 0.6961483359336853, + "learning_rate": 1.5353183519684065e-08, + "loss": 2.4364, + "step": 19893 + }, + { + "epoch": 1.6055201355822775, + "grad_norm": 0.678555428981781, + "learning_rate": 1.5077802601826118e-08, + "loss": 2.396, + "step": 19894 + }, + { + "epoch": 1.6056008393188606, + "grad_norm": 0.6907315850257874, + "learning_rate": 1.4804913569599344e-08, + "loss": 2.3782, + "step": 19895 + }, + { + "epoch": 1.6056815430554434, + "grad_norm": 0.6808211207389832, + "learning_rate": 1.4534516429787204e-08, + "loss": 2.4279, + "step": 19896 + }, + { + "epoch": 1.6057622467920265, + "grad_norm": 0.6447044014930725, + "learning_rate": 1.4266611189139856e-08, + "loss": 2.3801, + "step": 19897 + }, + { + "epoch": 1.6058429505286096, + "grad_norm": 0.733194887638092, + "learning_rate": 1.4001197854340842e-08, + "loss": 2.4229, + "step": 19898 + }, + { + "epoch": 1.6059236542651925, + "grad_norm": 0.7144802212715149, + "learning_rate": 1.3738276431995989e-08, + "loss": 2.421, + "step": 19899 + }, + { + "epoch": 1.6060043580017753, + "grad_norm": 0.694912314414978, + "learning_rate": 1.3477846928655613e-08, + "loss": 2.4229, + "step": 19900 + }, + { + "epoch": 1.6060850617383586, + "grad_norm": 0.6931365728378296, + "learning_rate": 1.3219909350814518e-08, + "loss": 2.3555, + "step": 19901 + }, + { + "epoch": 1.6061657654749415, + "grad_norm": 0.7068983912467957, + "learning_rate": 1.2964463704900898e-08, + "loss": 2.4501, + "step": 19902 + }, + { + "epoch": 1.6062464692115244, + "grad_norm": 0.6837992668151855, + "learning_rate": 1.271150999728743e-08, + "loss": 2.4597, + "step": 19903 + }, + { + "epoch": 1.6063271729481075, + "grad_norm": 0.7375256419181824, + "learning_rate": 1.2461048234269079e-08, + "loss": 2.422, + "step": 19904 + }, + { + "epoch": 1.6064078766846905, + "grad_norm": 0.6658474802970886, + "learning_rate": 1.2213078422096403e-08, + "loss": 2.4338, + "step": 19905 + }, + { + "epoch": 1.6064885804212734, + "grad_norm": 0.6418828964233398, + "learning_rate": 1.1967600566942238e-08, + "loss": 2.3637, + "step": 19906 + }, + { + "epoch": 1.6065692841578565, + "grad_norm": 0.7329421043395996, + "learning_rate": 1.1724614674923918e-08, + "loss": 2.454, + "step": 19907 + }, + { + "epoch": 1.6066499878944396, + "grad_norm": 0.7373830676078796, + "learning_rate": 1.1484120752103256e-08, + "loss": 2.4516, + "step": 19908 + }, + { + "epoch": 1.6067306916310224, + "grad_norm": 0.7151721119880676, + "learning_rate": 1.1246118804464357e-08, + "loss": 2.4417, + "step": 19909 + }, + { + "epoch": 1.6068113953676055, + "grad_norm": 0.7841477394104004, + "learning_rate": 1.1010608837946912e-08, + "loss": 2.4339, + "step": 19910 + }, + { + "epoch": 1.6068920991041886, + "grad_norm": 0.6786444187164307, + "learning_rate": 1.0777590858424002e-08, + "loss": 2.4362, + "step": 19911 + }, + { + "epoch": 1.6069728028407715, + "grad_norm": 0.7415705919265747, + "learning_rate": 1.0547064871702094e-08, + "loss": 2.4605, + "step": 19912 + }, + { + "epoch": 1.6070535065773546, + "grad_norm": 0.6903213262557983, + "learning_rate": 1.0319030883509939e-08, + "loss": 2.3597, + "step": 19913 + }, + { + "epoch": 1.6071342103139377, + "grad_norm": 0.7123509049415588, + "learning_rate": 1.0093488899554082e-08, + "loss": 2.3859, + "step": 19914 + }, + { + "epoch": 1.6072149140505205, + "grad_norm": 0.6534863114356995, + "learning_rate": 9.87043892544115e-09, + "loss": 2.3951, + "step": 19915 + }, + { + "epoch": 1.6072956177871034, + "grad_norm": 0.7245554327964783, + "learning_rate": 9.64988096673336e-09, + "loss": 2.4287, + "step": 19916 + }, + { + "epoch": 1.6073763215236867, + "grad_norm": 0.6860896944999695, + "learning_rate": 9.431815028926316e-09, + "loss": 2.4538, + "step": 19917 + }, + { + "epoch": 1.6074570252602696, + "grad_norm": 0.7019451260566711, + "learning_rate": 9.21624111744901e-09, + "loss": 2.3711, + "step": 19918 + }, + { + "epoch": 1.6075377289968524, + "grad_norm": 0.738824725151062, + "learning_rate": 9.003159237697123e-09, + "loss": 2.3955, + "step": 19919 + }, + { + "epoch": 1.6076184327334355, + "grad_norm": 0.7687380313873291, + "learning_rate": 8.79256939495532e-09, + "loss": 2.3904, + "step": 19920 + }, + { + "epoch": 1.6076991364700186, + "grad_norm": 0.6834751963615417, + "learning_rate": 8.584471594486054e-09, + "loss": 2.4659, + "step": 19921 + }, + { + "epoch": 1.6077798402066015, + "grad_norm": 0.7464317083358765, + "learning_rate": 8.37886584147407e-09, + "loss": 2.4608, + "step": 19922 + }, + { + "epoch": 1.6078605439431846, + "grad_norm": 0.7654271125793457, + "learning_rate": 8.175752141037496e-09, + "loss": 2.4507, + "step": 19923 + }, + { + "epoch": 1.6079412476797676, + "grad_norm": 0.8260121941566467, + "learning_rate": 7.975130498238948e-09, + "loss": 2.3833, + "step": 19924 + }, + { + "epoch": 1.6080219514163505, + "grad_norm": 0.7304602861404419, + "learning_rate": 7.777000918096633e-09, + "loss": 2.3867, + "step": 19925 + }, + { + "epoch": 1.6081026551529336, + "grad_norm": 0.6921819448471069, + "learning_rate": 7.581363405517738e-09, + "loss": 2.3206, + "step": 19926 + }, + { + "epoch": 1.6081833588895167, + "grad_norm": 0.7634371519088745, + "learning_rate": 7.388217965398347e-09, + "loss": 2.4225, + "step": 19927 + }, + { + "epoch": 1.6082640626260996, + "grad_norm": 0.6708488464355469, + "learning_rate": 7.197564602556828e-09, + "loss": 2.3769, + "step": 19928 + }, + { + "epoch": 1.6083447663626826, + "grad_norm": 0.6929381489753723, + "learning_rate": 7.0094033217227295e-09, + "loss": 2.3522, + "step": 19929 + }, + { + "epoch": 1.6084254700992657, + "grad_norm": 0.8143155574798584, + "learning_rate": 6.823734127603398e-09, + "loss": 2.3403, + "step": 19930 + }, + { + "epoch": 1.6085061738358486, + "grad_norm": 0.7155687212944031, + "learning_rate": 6.640557024828465e-09, + "loss": 2.3845, + "step": 19931 + }, + { + "epoch": 1.6085868775724315, + "grad_norm": 0.6936028003692627, + "learning_rate": 6.459872017949842e-09, + "loss": 2.3977, + "step": 19932 + }, + { + "epoch": 1.6086675813090148, + "grad_norm": 0.6897889375686646, + "learning_rate": 6.2816791114750365e-09, + "loss": 2.4076, + "step": 19933 + }, + { + "epoch": 1.6087482850455976, + "grad_norm": 0.7102131247520447, + "learning_rate": 6.105978309856042e-09, + "loss": 2.3981, + "step": 19934 + }, + { + "epoch": 1.6088289887821805, + "grad_norm": 0.7192089557647705, + "learning_rate": 5.932769617456036e-09, + "loss": 2.3555, + "step": 19935 + }, + { + "epoch": 1.6089096925187636, + "grad_norm": 0.6901637315750122, + "learning_rate": 5.762053038593784e-09, + "loss": 2.3939, + "step": 19936 + }, + { + "epoch": 1.6089903962553467, + "grad_norm": 0.6547083854675293, + "learning_rate": 5.593828577532545e-09, + "loss": 2.3438, + "step": 19937 + }, + { + "epoch": 1.6090710999919295, + "grad_norm": 0.6387443542480469, + "learning_rate": 5.428096238457858e-09, + "loss": 2.3747, + "step": 19938 + }, + { + "epoch": 1.6091518037285126, + "grad_norm": 0.686191201210022, + "learning_rate": 5.2648560255108555e-09, + "loss": 2.4753, + "step": 19939 + }, + { + "epoch": 1.6092325074650957, + "grad_norm": 0.6696913242340088, + "learning_rate": 5.104107942754954e-09, + "loss": 2.4139, + "step": 19940 + }, + { + "epoch": 1.6093132112016786, + "grad_norm": 0.6929105520248413, + "learning_rate": 4.945851994186956e-09, + "loss": 2.4193, + "step": 19941 + }, + { + "epoch": 1.6093939149382617, + "grad_norm": 0.7959415912628174, + "learning_rate": 4.790088183759256e-09, + "loss": 2.3996, + "step": 19942 + }, + { + "epoch": 1.6094746186748448, + "grad_norm": 0.7224646806716919, + "learning_rate": 4.636816515346531e-09, + "loss": 2.3907, + "step": 19943 + }, + { + "epoch": 1.6095553224114276, + "grad_norm": 0.6729363799095154, + "learning_rate": 4.486036992790155e-09, + "loss": 2.3771, + "step": 19944 + }, + { + "epoch": 1.6096360261480105, + "grad_norm": 0.7519870400428772, + "learning_rate": 4.337749619820475e-09, + "loss": 2.4174, + "step": 19945 + }, + { + "epoch": 1.6097167298845938, + "grad_norm": 0.6507316827774048, + "learning_rate": 4.191954400145637e-09, + "loss": 2.3158, + "step": 19946 + }, + { + "epoch": 1.6097974336211767, + "grad_norm": 0.6579844355583191, + "learning_rate": 4.048651337407172e-09, + "loss": 2.3824, + "step": 19947 + }, + { + "epoch": 1.6098781373577595, + "grad_norm": 0.6911383867263794, + "learning_rate": 3.907840435157795e-09, + "loss": 2.3687, + "step": 19948 + }, + { + "epoch": 1.6099588410943426, + "grad_norm": 0.6734700798988342, + "learning_rate": 3.769521696928013e-09, + "loss": 2.415, + "step": 19949 + }, + { + "epoch": 1.6100395448309257, + "grad_norm": 0.846986711025238, + "learning_rate": 3.6336951261484175e-09, + "loss": 2.4273, + "step": 19950 + }, + { + "epoch": 1.6101202485675086, + "grad_norm": 0.6638641357421875, + "learning_rate": 3.5003607262051876e-09, + "loss": 2.35, + "step": 19951 + }, + { + "epoch": 1.6102009523040917, + "grad_norm": 0.6984385251998901, + "learning_rate": 3.369518500440094e-09, + "loss": 2.4349, + "step": 19952 + }, + { + "epoch": 1.6102816560406747, + "grad_norm": 0.8217600584030151, + "learning_rate": 3.2411684520949894e-09, + "loss": 2.409, + "step": 19953 + }, + { + "epoch": 1.6103623597772576, + "grad_norm": 0.6829268932342529, + "learning_rate": 3.115310584367315e-09, + "loss": 2.3689, + "step": 19954 + }, + { + "epoch": 1.6104430635138407, + "grad_norm": 0.7052109241485596, + "learning_rate": 2.991944900410104e-09, + "loss": 2.3992, + "step": 19955 + }, + { + "epoch": 1.6105237672504238, + "grad_norm": 0.7001661062240601, + "learning_rate": 2.871071403276471e-09, + "loss": 2.3523, + "step": 19956 + }, + { + "epoch": 1.6106044709870067, + "grad_norm": 0.6726387739181519, + "learning_rate": 2.7526900959973234e-09, + "loss": 2.3957, + "step": 19957 + }, + { + "epoch": 1.6106851747235897, + "grad_norm": 0.6479758620262146, + "learning_rate": 2.6368009815258555e-09, + "loss": 2.4014, + "step": 19958 + }, + { + "epoch": 1.6107658784601728, + "grad_norm": 0.6617366671562195, + "learning_rate": 2.523404062726442e-09, + "loss": 2.3944, + "step": 19959 + }, + { + "epoch": 1.6108465821967557, + "grad_norm": 0.7203007936477661, + "learning_rate": 2.412499342452357e-09, + "loss": 2.4776, + "step": 19960 + }, + { + "epoch": 1.6109272859333386, + "grad_norm": 0.7482421398162842, + "learning_rate": 2.3040868234458503e-09, + "loss": 2.456, + "step": 19961 + }, + { + "epoch": 1.6110079896699219, + "grad_norm": 0.7188587784767151, + "learning_rate": 2.1981665084158664e-09, + "loss": 2.4594, + "step": 19962 + }, + { + "epoch": 1.6110886934065047, + "grad_norm": 0.6892440915107727, + "learning_rate": 2.094738400015839e-09, + "loss": 2.4013, + "step": 19963 + }, + { + "epoch": 1.6111693971430876, + "grad_norm": 0.6665911674499512, + "learning_rate": 1.99380250079928e-09, + "loss": 2.3949, + "step": 19964 + }, + { + "epoch": 1.6112501008796707, + "grad_norm": 0.716108500957489, + "learning_rate": 1.8953588133086007e-09, + "loss": 2.4163, + "step": 19965 + }, + { + "epoch": 1.6113308046162538, + "grad_norm": 0.7255630493164062, + "learning_rate": 1.7994073399751898e-09, + "loss": 2.4512, + "step": 19966 + }, + { + "epoch": 1.6114115083528366, + "grad_norm": 0.679620087146759, + "learning_rate": 1.7059480831971286e-09, + "loss": 2.4005, + "step": 19967 + }, + { + "epoch": 1.6114922120894197, + "grad_norm": 0.6881493330001831, + "learning_rate": 1.6149810453058855e-09, + "loss": 2.4047, + "step": 19968 + }, + { + "epoch": 1.6115729158260028, + "grad_norm": 0.6086495518684387, + "learning_rate": 1.5265062285663156e-09, + "loss": 2.3924, + "step": 19969 + }, + { + "epoch": 1.6116536195625857, + "grad_norm": 0.6974717974662781, + "learning_rate": 1.4405236351877626e-09, + "loss": 2.3822, + "step": 19970 + }, + { + "epoch": 1.6117343232991688, + "grad_norm": 0.6754374504089355, + "learning_rate": 1.357033267312957e-09, + "loss": 2.4096, + "step": 19971 + }, + { + "epoch": 1.6118150270357519, + "grad_norm": 0.7444424033164978, + "learning_rate": 1.276035127018016e-09, + "loss": 2.4317, + "step": 19972 + }, + { + "epoch": 1.6118957307723347, + "grad_norm": 0.7187811732292175, + "learning_rate": 1.1975292163235452e-09, + "loss": 2.4373, + "step": 19973 + }, + { + "epoch": 1.6119764345089178, + "grad_norm": 0.6829712986946106, + "learning_rate": 1.1215155371835373e-09, + "loss": 2.4154, + "step": 19974 + }, + { + "epoch": 1.612057138245501, + "grad_norm": 0.6749333143234253, + "learning_rate": 1.0479940914964736e-09, + "loss": 2.4117, + "step": 19975 + }, + { + "epoch": 1.6121378419820838, + "grad_norm": 0.6506801247596741, + "learning_rate": 9.769648810942222e-10, + "loss": 2.3538, + "step": 19976 + }, + { + "epoch": 1.6122185457186666, + "grad_norm": 0.752211332321167, + "learning_rate": 9.084279077420377e-10, + "loss": 2.4069, + "step": 19977 + }, + { + "epoch": 1.61229924945525, + "grad_norm": 0.7003064751625061, + "learning_rate": 8.423831731607657e-10, + "loss": 2.4264, + "step": 19978 + }, + { + "epoch": 1.6123799531918328, + "grad_norm": 0.6748045086860657, + "learning_rate": 7.78830678982434e-10, + "loss": 2.3839, + "step": 19979 + }, + { + "epoch": 1.6124606569284157, + "grad_norm": 0.7079327702522278, + "learning_rate": 7.177704267946616e-10, + "loss": 2.4435, + "step": 19980 + }, + { + "epoch": 1.6125413606649988, + "grad_norm": 0.7959599494934082, + "learning_rate": 6.592024181184542e-10, + "loss": 2.4284, + "step": 19981 + }, + { + "epoch": 1.6126220644015818, + "grad_norm": 0.7553056478500366, + "learning_rate": 6.031266544193059e-10, + "loss": 2.4191, + "step": 19982 + }, + { + "epoch": 1.6127027681381647, + "grad_norm": 0.7239125370979309, + "learning_rate": 5.49543137096098e-10, + "loss": 2.3927, + "step": 19983 + }, + { + "epoch": 1.6127834718747478, + "grad_norm": 0.7169431447982788, + "learning_rate": 4.984518674699956e-10, + "loss": 2.4823, + "step": 19984 + }, + { + "epoch": 1.6128641756113309, + "grad_norm": 0.7063155770301819, + "learning_rate": 4.498528468288576e-10, + "loss": 2.3995, + "step": 19985 + }, + { + "epoch": 1.6129448793479138, + "grad_norm": 0.6616485714912415, + "learning_rate": 4.037460763828271e-10, + "loss": 2.4175, + "step": 19986 + }, + { + "epoch": 1.6130255830844968, + "grad_norm": 0.7302383780479431, + "learning_rate": 3.601315572754338e-10, + "loss": 2.4386, + "step": 19987 + }, + { + "epoch": 1.61310628682108, + "grad_norm": 0.7047107219696045, + "learning_rate": 3.1900929058359397e-10, + "loss": 2.351, + "step": 19988 + }, + { + "epoch": 1.6131869905576628, + "grad_norm": 0.7069158554077148, + "learning_rate": 2.803792773620195e-10, + "loss": 2.3593, + "step": 19989 + }, + { + "epoch": 1.6132676942942457, + "grad_norm": 0.7108876705169678, + "learning_rate": 2.442415185432978e-10, + "loss": 2.3987, + "step": 19990 + }, + { + "epoch": 1.613348398030829, + "grad_norm": 0.668897807598114, + "learning_rate": 2.1059601503781167e-10, + "loss": 2.4408, + "step": 19991 + }, + { + "epoch": 1.6134291017674118, + "grad_norm": 0.7062950730323792, + "learning_rate": 1.794427677004329e-10, + "loss": 2.3961, + "step": 19992 + }, + { + "epoch": 1.6135098055039947, + "grad_norm": 0.6716587543487549, + "learning_rate": 1.507817772861131e-10, + "loss": 2.4186, + "step": 19993 + }, + { + "epoch": 1.6135905092405778, + "grad_norm": 0.7225587368011475, + "learning_rate": 1.2461304450539502e-10, + "loss": 2.3972, + "step": 19994 + }, + { + "epoch": 1.6136712129771609, + "grad_norm": 0.7363900542259216, + "learning_rate": 1.0093657003551472e-10, + "loss": 2.411, + "step": 19995 + }, + { + "epoch": 1.6137519167137437, + "grad_norm": 0.6568713188171387, + "learning_rate": 7.975235445378814e-11, + "loss": 2.3883, + "step": 19996 + }, + { + "epoch": 1.6138326204503268, + "grad_norm": 0.7733542919158936, + "learning_rate": 6.106039828202015e-11, + "loss": 2.4071, + "step": 19997 + }, + { + "epoch": 1.61391332418691, + "grad_norm": 0.6876296401023865, + "learning_rate": 4.4860701986504386e-11, + "loss": 2.3631, + "step": 19998 + }, + { + "epoch": 1.6139940279234928, + "grad_norm": 0.6861765384674072, + "learning_rate": 3.115326597802337e-11, + "loss": 2.3928, + "step": 19999 + }, + { + "epoch": 1.6140747316600759, + "grad_norm": 0.7239798903465271, + "learning_rate": 1.993809060074625e-11, + "loss": 2.4262, + "step": 20000 + }, + { + "epoch": 1.6140747316600759, + "eval_loss": 2.3642282485961914, + "eval_runtime": 771.388, + "eval_samples_per_second": 3.396, + "eval_steps_per_second": 0.567, + "step": 20000 + } + ], + "logging_steps": 1, + "max_steps": 20000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.29892329677312e+17, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-20000/training_args.bin b/out/checkpoint-20000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae4a8b118e2a671c30e37a5d24a42d8090b49055 --- /dev/null +++ b/out/checkpoint-20000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c2928f4418c9a306cbe65ca0c1b156ae660c125ec9122008a9f527a50891704 +size 5112